xref: /xnu-10063.141.1/bsd/net/if_bridge.c (revision d8b80295118ef25ac3a784134bcf95cd8e88109f)
1 /*
2  * Copyright (c) 2004-2024 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*	$NetBSD: if_bridge.c,v 1.31 2005/06/01 19:45:34 jdc Exp $	*/
30 /*
31  * Copyright 2001 Wasabi Systems, Inc.
32  * All rights reserved.
33  *
34  * Written by Jason R. Thorpe for Wasabi Systems, Inc.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. All advertising materials mentioning features or use of this software
45  *    must display the following acknowledgement:
46  *	This product includes software developed for the NetBSD Project by
47  *	Wasabi Systems, Inc.
48  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
49  *    or promote products derived from this software without specific prior
50  *    written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
54  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
55  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
56  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
57  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
58  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
59  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
60  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
61  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
62  * POSSIBILITY OF SUCH DAMAGE.
63  */
64 
65 /*
66  * Copyright (c) 1999, 2000 Jason L. Wright ([email protected])
67  * All rights reserved.
68  *
69  * Redistribution and use in source and binary forms, with or without
70  * modification, are permitted provided that the following conditions
71  * are met:
72  * 1. Redistributions of source code must retain the above copyright
73  *    notice, this list of conditions and the following disclaimer.
74  * 2. Redistributions in binary form must reproduce the above copyright
75  *    notice, this list of conditions and the following disclaimer in the
76  *    documentation and/or other materials provided with the distribution.
77  *
78  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
79  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
80  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
81  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
82  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
83  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
84  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
86  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
87  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
88  * POSSIBILITY OF SUCH DAMAGE.
89  *
90  * OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp
91  */
92 
93 /*
94  * Network interface bridge support.
95  *
96  * TODO:
97  *
98  *	- Currently only supports Ethernet-like interfaces (Ethernet,
99  *	  802.11, VLANs on Ethernet, etc.)  Figure out a nice way
100  *	  to bridge other types of interfaces (FDDI-FDDI, and maybe
101  *	  consider heterogenous bridges).
102  *
103  *	- GIF isn't handled due to the lack of IPPROTO_ETHERIP support.
104  */
105 
106 #include <sys/cdefs.h>
107 
108 #include <sys/param.h>
109 #include <sys/mbuf.h>
110 #include <sys/malloc.h>
111 #include <sys/protosw.h>
112 #include <sys/systm.h>
113 #include <sys/time.h>
114 #include <sys/socket.h> /* for net/if.h */
115 #include <sys/sockio.h>
116 #include <sys/kernel.h>
117 #include <sys/random.h>
118 #include <sys/syslog.h>
119 #include <sys/sysctl.h>
120 #include <sys/proc.h>
121 #include <sys/lock.h>
122 #include <sys/mcache.h>
123 
124 #include <sys/kauth.h>
125 
126 #include <kern/thread_call.h>
127 
128 #include <libkern/libkern.h>
129 
130 #include <kern/zalloc.h>
131 
132 #if NBPFILTER > 0
133 #include <net/bpf.h>
134 #endif
135 #include <net/if.h>
136 #include <net/if_dl.h>
137 #include <net/if_types.h>
138 #include <net/if_var.h>
139 #include <net/if_media.h>
140 #include <net/net_api_stats.h>
141 #include <net/pfvar.h>
142 
143 #include <netinet/in.h> /* for struct arpcom */
144 #include <netinet/tcp.h> /* for struct tcphdr */
145 #include <netinet/in_systm.h>
146 #include <netinet/in_var.h>
147 #define _IP_VHL
148 #include <netinet/ip.h>
149 #include <netinet/ip_var.h>
150 #include <netinet/ip6.h>
151 #include <netinet6/ip6_var.h>
152 #ifdef DEV_CARP
153 #include <netinet/ip_carp.h>
154 #endif
155 #include <netinet/if_ether.h> /* for struct arpcom */
156 #include <net/bridgestp.h>
157 #include <net/if_bridgevar.h>
158 #include <net/if_llc.h>
159 #if NVLAN > 0
160 #include <net/if_vlan_var.h>
161 #endif /* NVLAN > 0 */
162 
163 #include <net/if_ether.h>
164 #include <net/dlil.h>
165 #include <net/kpi_interfacefilter.h>
166 
167 #include <net/route.h>
168 #include <dev/random/randomdev.h>
169 
170 #include <netinet/bootp.h>
171 #include <netinet/dhcp.h>
172 
173 #if SKYWALK
174 #include <skywalk/nexus/netif/nx_netif.h>
175 #endif /* SKYWALK */
176 
177 #include <net/sockaddr_utils.h>
178 
179 #include <os/log.h>
180 
181 /*
182  * if_bridge_debug, BR_DBGF_*
183  * - 'if_bridge_debug' is a bitmask of BR_DBGF_* flags that can be set
184  *   to enable additional logs for the corresponding bridge function
185  * - "sysctl net.link.bridge.debug" controls the value of
186  *   'if_bridge_debug'
187  */
188 static uint32_t if_bridge_debug = 0;
189 #define BR_DBGF_LIFECYCLE       0x0001
190 #define BR_DBGF_INPUT           0x0002
191 #define BR_DBGF_OUTPUT          0x0004
192 #define BR_DBGF_RT_TABLE        0x0008
193 #define BR_DBGF_DELAYED_CALL    0x0010
194 #define BR_DBGF_IOCTL           0x0020
195 #define BR_DBGF_MBUF            0x0040
196 #define BR_DBGF_MCAST           0x0080
197 #define BR_DBGF_HOSTFILTER      0x0100
198 #define BR_DBGF_CHECKSUM        0x0200
199 #define BR_DBGF_MAC_NAT         0x0400
200 
201 /*
202  * if_bridge_log_level
203  * - 'if_bridge_log_level' ensures that by default important logs are
204  *   logged regardless of if_bridge_debug by comparing the log level
205  *   in BRIDGE_LOG to if_bridge_log_level
206  * - use "sysctl net.link.bridge.log_level" controls the value of
207  *   'if_bridge_log_level'
208  * - the default value of 'if_bridge_log_level' is LOG_NOTICE; important
209  *   logs must use LOG_NOTICE to ensure they appear by default
210  */
211 static int if_bridge_log_level = LOG_NOTICE;
212 
213 #define BRIDGE_DBGF_ENABLED(__flag)     ((if_bridge_debug & __flag) != 0)
214 
215 /*
216  * BRIDGE_LOG, BRIDGE_LOG_SIMPLE
217  * - macros to generate the specified log conditionally based on
218  *   the specified log level and debug flags
219  * - BRIDGE_LOG_SIMPLE does not include the function name in the log
220  */
221 #define BRIDGE_LOG(__level, __dbgf, __string, ...)              \
222 	do {                                                            \
223 	        if (__level <= if_bridge_log_level ||                   \
224 	            BRIDGE_DBGF_ENABLED(__dbgf)) {                      \
225 	                os_log(OS_LOG_DEFAULT, "%s: " __string, \
226 	                       __func__, ## __VA_ARGS__);       \
227 	        }                                                       \
228 	} while (0)
229 #define BRIDGE_LOG_SIMPLE(__level, __dbgf, __string, ...)               \
230 	do {                                                    \
231 	        if (__level <= if_bridge_log_level ||           \
232 	            BRIDGE_DBGF_ENABLED(__dbgf)) {                      \
233 	                os_log(OS_LOG_DEFAULT, __string, ## __VA_ARGS__); \
234 	        }                                                               \
235 	} while (0)
236 
237 #define _BRIDGE_LOCK(_sc)               lck_mtx_lock(&(_sc)->sc_mtx)
238 #define _BRIDGE_UNLOCK(_sc)             lck_mtx_unlock(&(_sc)->sc_mtx)
239 #define BRIDGE_LOCK_ASSERT_HELD(_sc)            \
240 	LCK_MTX_ASSERT(&(_sc)->sc_mtx, LCK_MTX_ASSERT_OWNED)
241 #define BRIDGE_LOCK_ASSERT_NOTHELD(_sc)         \
242 	LCK_MTX_ASSERT(&(_sc)->sc_mtx, LCK_MTX_ASSERT_NOTOWNED)
243 
244 #define BRIDGE_LOCK_DEBUG      1
245 #if BRIDGE_LOCK_DEBUG
246 
247 #define BR_LCKDBG_MAX                   4
248 
249 #define BRIDGE_LOCK(_sc)                bridge_lock(_sc)
250 #define BRIDGE_UNLOCK(_sc)              bridge_unlock(_sc)
251 #define BRIDGE_LOCK2REF(_sc, _err)      _err = bridge_lock2ref(_sc)
252 #define BRIDGE_UNREF(_sc)               bridge_unref(_sc)
253 #define BRIDGE_XLOCK(_sc)               bridge_xlock(_sc)
254 #define BRIDGE_XDROP(_sc)               bridge_xdrop(_sc)
255 
256 #else /* !BRIDGE_LOCK_DEBUG */
257 
258 #define BRIDGE_LOCK(_sc)                _BRIDGE_LOCK(_sc)
259 #define BRIDGE_UNLOCK(_sc)              _BRIDGE_UNLOCK(_sc)
260 #define BRIDGE_LOCK2REF(_sc, _err)      do {                            \
261 	BRIDGE_LOCK_ASSERT_HELD(_sc);                                   \
262 	if ((_sc)->sc_iflist_xcnt > 0)                                  \
263 	        (_err) = EBUSY;                                         \
264 	else {                                                          \
265 	        (_sc)->sc_iflist_ref++;                                 \
266 	        (_err) = 0;                                             \
267 	}                                                               \
268 	_BRIDGE_UNLOCK(_sc);                                            \
269 } while (0)
270 #define BRIDGE_UNREF(_sc)               do {                            \
271 	_BRIDGE_LOCK(_sc);                                              \
272 	(_sc)->sc_iflist_ref--;                                         \
273 	if (((_sc)->sc_iflist_xcnt > 0) && ((_sc)->sc_iflist_ref == 0))	{ \
274 	        _BRIDGE_UNLOCK(_sc);                                    \
275 	        wakeup(&(_sc)->sc_cv);                                  \
276 	} else                                                          \
277 	        _BRIDGE_UNLOCK(_sc);                                    \
278 } while (0)
279 #define BRIDGE_XLOCK(_sc)               do {                            \
280 	BRIDGE_LOCK_ASSERT_HELD(_sc);                                   \
281 	(_sc)->sc_iflist_xcnt++;                                        \
282 	while ((_sc)->sc_iflist_ref > 0)                                \
283 	        msleep(&(_sc)->sc_cv, &(_sc)->sc_mtx, PZERO,            \
284 	            "BRIDGE_XLOCK", NULL);                              \
285 } while (0)
286 #define BRIDGE_XDROP(_sc)               do {                            \
287 	BRIDGE_LOCK_ASSERT_HELD(_sc);                                   \
288 	(_sc)->sc_iflist_xcnt--;                                        \
289 } while (0)
290 
291 #endif /* BRIDGE_LOCK_DEBUG */
292 
293 #if NBPFILTER > 0
294 #define BRIDGE_BPF_MTAP_INPUT(sc, m)                                    \
295 	if (sc->sc_bpf_input != NULL)                                   \
296 	        bridge_bpf_input(sc->sc_ifp, m, __func__, __LINE__)
297 #else /* NBPFILTER */
298 #define BRIDGE_BPF_MTAP_INPUT(ifp, m)
299 #endif /* NBPFILTER */
300 
301 /*
302  * Initial size of the route hash table.  Must be a power of two.
303  */
304 #ifndef BRIDGE_RTHASH_SIZE
305 #define BRIDGE_RTHASH_SIZE              16
306 #endif
307 
308 /*
309  * Maximum size of the routing hash table
310  */
311 #define BRIDGE_RTHASH_SIZE_MAX          2048
312 
313 #define BRIDGE_RTHASH_MASK(sc)          ((sc)->sc_rthash_size - 1)
314 
315 /*
316  * Maximum number of addresses to cache.
317  */
318 #ifndef BRIDGE_RTABLE_MAX
319 #define BRIDGE_RTABLE_MAX               100
320 #endif
321 
322 
323 /*
324  * Timeout (in seconds) for entries learned dynamically.
325  */
326 #ifndef BRIDGE_RTABLE_TIMEOUT
327 #define BRIDGE_RTABLE_TIMEOUT           (20 * 60)       /* same as ARP */
328 #endif
329 
330 /*
331  * Number of seconds between walks of the route list.
332  */
333 #ifndef BRIDGE_RTABLE_PRUNE_PERIOD
334 #define BRIDGE_RTABLE_PRUNE_PERIOD      (5 * 60)
335 #endif
336 
337 /*
338  * Number of MAC NAT entries
339  * - sized based on 16 clients (including MAC NAT interface)
340  *   each with 4 addresses
341  */
342 #ifndef BRIDGE_MAC_NAT_ENTRY_MAX
343 #define BRIDGE_MAC_NAT_ENTRY_MAX        64
344 #endif /* BRIDGE_MAC_NAT_ENTRY_MAX */
345 
346 /*
347  * List of capabilities to possibly mask on the member interface.
348  */
349 #define BRIDGE_IFCAPS_MASK              (IFCAP_TSO | IFCAP_TXCSUM)
350 /*
351  * List of capabilities to disable on the member interface.
352  */
353 #define BRIDGE_IFCAPS_STRIP             IFCAP_LRO
354 
355 /*
356  * Bridge interface list entry.
357  */
358 struct bridge_iflist {
359 	TAILQ_ENTRY(bridge_iflist) bif_next;
360 	struct ifnet            *bif_ifp;       /* member if */
361 	struct bstp_port        bif_stp;        /* STP state */
362 	uint32_t                bif_ifflags;    /* member if flags */
363 	int                     bif_savedcaps;  /* saved capabilities */
364 	uint32_t                bif_addrmax;    /* max # of addresses */
365 	uint32_t                bif_addrcnt;    /* cur. # of addresses */
366 	uint32_t                bif_addrexceeded; /* # of address violations */
367 
368 	interface_filter_t      bif_iff_ref;
369 	struct bridge_softc     *bif_sc;
370 	uint32_t                bif_flags;
371 
372 	/* host filter */
373 	struct in_addr          bif_hf_ipsrc;
374 	uint8_t                 bif_hf_hwsrc[ETHER_ADDR_LEN];
375 
376 	struct ifbrmstats       bif_stats;
377 };
378 
379 static inline bool
bif_ifflags_are_set(struct bridge_iflist * bif,uint32_t flags)380 bif_ifflags_are_set(struct bridge_iflist * bif, uint32_t flags)
381 {
382 	return (bif->bif_ifflags & flags) == flags;
383 }
384 
385 static inline bool
bif_has_checksum_offload(struct bridge_iflist * bif)386 bif_has_checksum_offload(struct bridge_iflist * bif)
387 {
388 	return bif_ifflags_are_set(bif, IFBIF_CHECKSUM_OFFLOAD);
389 }
390 
391 /* fake errors to make the code clearer */
392 #define _EBADIP                 EJUSTRETURN
393 #define _EBADIPCHECKSUM         EJUSTRETURN
394 #define _EBADIPV6               EJUSTRETURN
395 #define _EBADUDP                EJUSTRETURN
396 #define _EBADTCP                EJUSTRETURN
397 #define _EBADUDPCHECKSUM        EJUSTRETURN
398 #define _EBADTCPCHECKSUM        EJUSTRETURN
399 
400 #define BIFF_PROMISC            0x01    /* promiscuous mode set */
401 #define BIFF_PROTO_ATTACHED     0x02    /* protocol attached */
402 #define BIFF_FILTER_ATTACHED    0x04    /* interface filter attached */
403 #define BIFF_MEDIA_ACTIVE       0x08    /* interface media active */
404 #define BIFF_HOST_FILTER        0x10    /* host filter enabled */
405 #define BIFF_HF_HWSRC           0x20    /* host filter source MAC is set */
406 #define BIFF_HF_IPSRC           0x40    /* host filter source IP is set */
407 #define BIFF_INPUT_BROADCAST    0x80    /* send broadcast packets in */
408 #define BIFF_IN_MEMBER_LIST     0x100   /* added to the member list */
409 #define BIFF_WIFI_INFRA         0x200   /* interface is Wi-Fi infra */
410 #define BIFF_ALL_MULTI          0x400   /* allmulti set */
411 #define BIFF_LRO_DISABLED       0x800   /* LRO was disabled */
412 #if SKYWALK
413 #define BIFF_FLOWSWITCH_ATTACHED 0x1000   /* we attached the flowswitch */
414 #define BIFF_NETAGENT_REMOVED    0x2000   /* we removed the netagent */
415 #endif /* SKYWALK */
416 
417 /*
418  * mac_nat_entry
419  * - translates between an IP address and MAC address on a specific
420  *   bridge interface member
421  */
422 struct mac_nat_entry {
423 	LIST_ENTRY(mac_nat_entry) mne_list;     /* list linkage */
424 	struct bridge_iflist    *mne_bif;       /* originating interface */
425 	unsigned long           mne_expire;     /* expiration time */
426 	union {
427 		struct in_addr  mneu_ip;        /* originating IPv4 address */
428 		struct in6_addr mneu_ip6;       /* originating IPv6 address */
429 	} mne_u;
430 	uint8_t                 mne_mac[ETHER_ADDR_LEN];
431 	uint8_t                 mne_flags;
432 	uint8_t                 mne_reserved;
433 };
434 #define mne_ip  mne_u.mneu_ip
435 #define mne_ip6 mne_u.mneu_ip6
436 
437 #define MNE_FLAGS_IPV6          0x01    /* IPv6 address */
438 
439 LIST_HEAD(mac_nat_entry_list, mac_nat_entry);
440 
441 /*
442  * mac_nat_record
443  * - used by bridge_mac_nat_output() to convey the translation that needs
444  *   to take place in bridge_mac_nat_translate
445  * - holds enough information so that the translation can be done later without
446  *   holding the bridge lock
447  */
448 struct mac_nat_record {
449 	uint16_t                mnr_ether_type;
450 	union {
451 		uint16_t        mnru_arp_offset;
452 		struct {
453 			uint16_t mnruip_dhcp_flags;
454 			uint16_t mnruip_udp_csum;
455 			uint8_t  mnruip_header_len;
456 		} mnru_ip;
457 		struct {
458 			uint16_t mnruip6_icmp6_len;
459 			uint16_t mnruip6_lladdr_offset;
460 			uint8_t mnruip6_icmp6_type;
461 			uint8_t mnruip6_header_len;
462 		} mnru_ip6;
463 	} mnr_u;
464 };
465 
466 #define mnr_arp_offset  mnr_u.mnru_arp_offset
467 
468 #define mnr_ip_header_len       mnr_u.mnru_ip.mnruip_header_len
469 #define mnr_ip_dhcp_flags       mnr_u.mnru_ip.mnruip_dhcp_flags
470 #define mnr_ip_udp_csum         mnr_u.mnru_ip.mnruip_udp_csum
471 
472 #define mnr_ip6_icmp6_len       mnr_u.mnru_ip6.mnruip6_icmp6_len
473 #define mnr_ip6_icmp6_type      mnr_u.mnru_ip6.mnruip6_icmp6_type
474 #define mnr_ip6_header_len      mnr_u.mnru_ip6.mnruip6_header_len
475 #define mnr_ip6_lladdr_offset   mnr_u.mnru_ip6.mnruip6_lladdr_offset
476 
477 /*
478  * Bridge route node.
479  */
480 struct bridge_rtnode {
481 	LIST_ENTRY(bridge_rtnode) brt_hash;     /* hash table linkage */
482 	LIST_ENTRY(bridge_rtnode) brt_list;     /* list linkage */
483 	struct bridge_iflist    *brt_dst;       /* destination if */
484 	unsigned long           brt_expire;     /* expiration time */
485 	uint8_t                 brt_flags;      /* address flags */
486 	uint8_t                 brt_addr[ETHER_ADDR_LEN];
487 	uint16_t                brt_vlan;       /* vlan id */
488 
489 };
490 #define brt_ifp                 brt_dst->bif_ifp
491 
492 /*
493  * Bridge delayed function call context
494  */
495 typedef void (*bridge_delayed_func_t)(struct bridge_softc *);
496 
497 struct bridge_delayed_call {
498 	struct bridge_softc     *bdc_sc;
499 	bridge_delayed_func_t   bdc_func; /* Function to call */
500 	struct timespec         bdc_ts; /* Time to call */
501 	u_int32_t               bdc_flags;
502 	thread_call_t           bdc_thread_call;
503 };
504 
505 #define BDCF_OUTSTANDING        0x01    /* Delayed call has been scheduled */
506 #define BDCF_CANCELLING         0x02    /* May be waiting for call completion */
507 
508 /*
509  * Software state for each bridge.
510  */
511 LIST_HEAD(_bridge_rtnode_list, bridge_rtnode);
512 
513 struct bridge_softc {
514 	struct ifnet            *sc_ifp;        /* make this an interface */
515 	u_int32_t               sc_flags;
516 	LIST_ENTRY(bridge_softc) sc_list;
517 	decl_lck_mtx_data(, sc_mtx);
518 	struct _bridge_rtnode_list *sc_rthash;  /* our forwarding table */
519 	struct _bridge_rtnode_list sc_rtlist;   /* list version of above */
520 	uint32_t                sc_rthash_key;  /* key for hash */
521 	uint32_t                sc_rthash_size; /* size of the hash table */
522 	struct bridge_delayed_call sc_aging_timer;
523 	struct bridge_delayed_call sc_resize_call;
524 	TAILQ_HEAD(, bridge_iflist) sc_spanlist;        /* span ports list */
525 	struct bstp_state       sc_stp;         /* STP state */
526 	bpf_packet_func         sc_bpf_input;
527 	bpf_packet_func         sc_bpf_output;
528 	void                    *sc_cv;
529 	uint32_t                sc_brtmax;      /* max # of addresses */
530 	uint32_t                sc_brtcnt;      /* cur. # of addresses */
531 	uint32_t                sc_brttimeout;  /* rt timeout in seconds */
532 	uint32_t                sc_iflist_ref;  /* refcount for sc_iflist */
533 	uint32_t                sc_iflist_xcnt; /* refcount for sc_iflist */
534 	TAILQ_HEAD(, bridge_iflist) sc_iflist;  /* member interface list */
535 	uint32_t                sc_brtexceeded; /* # of cache drops */
536 	uint32_t                sc_filter_flags; /* ipf and flags */
537 	struct ifnet            *sc_ifaddr;     /* member mac copied from */
538 	u_char                  sc_defaddr[6];  /* Default MAC address */
539 	char                    sc_if_xname[IFNAMSIZ];
540 
541 	struct bridge_iflist    *sc_mac_nat_bif; /* single MAC NAT interface */
542 	struct mac_nat_entry_list sc_mne_list;  /* MAC NAT IPv4 */
543 	struct mac_nat_entry_list sc_mne_list_v6;/* MAC NAT IPv6 */
544 	uint32_t                sc_mne_max;      /* max # of entries */
545 	uint32_t                sc_mne_count;    /* cur. # of entries */
546 	uint32_t                sc_mne_allocation_failures;
547 #if BRIDGE_LOCK_DEBUG
548 	/*
549 	 * Locking and unlocking calling history
550 	 */
551 	void                    *lock_lr[BR_LCKDBG_MAX];
552 	int                     next_lock_lr;
553 	void                    *unlock_lr[BR_LCKDBG_MAX];
554 	int                     next_unlock_lr;
555 #endif /* BRIDGE_LOCK_DEBUG */
556 };
557 
558 #define SCF_DETACHING            0x01
559 #define SCF_RESIZING             0x02
560 #define SCF_MEDIA_ACTIVE         0x04
561 
562 typedef enum {
563 	CHECKSUM_OPERATION_NONE = 0,
564 	CHECKSUM_OPERATION_CLEAR_OFFLOAD = 1,
565 	CHECKSUM_OPERATION_FINALIZE = 2,
566 	CHECKSUM_OPERATION_COMPUTE = 3,
567 } ChecksumOperation;
568 
569 union iphdr {
570 	struct ip *ip;
571 	struct ip6_hdr *ip6;
572 	void * ptr;
573 };
574 
575 typedef struct {
576 	u_int           ip_hlen;        /* IP header length */
577 	u_int           ip_pay_len;     /* length of payload (exclusive of ip_hlen) */
578 	u_int           ip_opt_len;     /* IPv6 options headers length */
579 	uint8_t         ip_proto;       /* IPPROTO_TCP, IPPROTO_UDP, etc. */
580 	bool            ip_is_ipv4;
581 	bool            ip_is_fragmented;
582 	union iphdr     ip_hdr;         /* pointer to IP header */
583 	void *          ip_proto_hdr;   /* ptr to protocol header (TCP) */
584 } ip_packet_info, *ip_packet_info_t;
585 
586 struct bridge_hostfilter_stats bridge_hostfilter_stats;
587 
588 static LCK_GRP_DECLARE(bridge_lock_grp, "if_bridge");
589 #if BRIDGE_LOCK_DEBUG
590 static LCK_ATTR_DECLARE(bridge_lock_attr, 0, 0);
591 #else
592 static LCK_ATTR_DECLARE(bridge_lock_attr, LCK_ATTR_DEBUG, 0);
593 #endif
594 static LCK_MTX_DECLARE_ATTR(bridge_list_mtx, &bridge_lock_grp, &bridge_lock_attr);
595 
596 static int      bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
597 
598 static KALLOC_TYPE_DEFINE(bridge_rtnode_pool, struct bridge_rtnode, NET_KT_DEFAULT);
599 static KALLOC_TYPE_DEFINE(bridge_mne_pool, struct mac_nat_entry, NET_KT_DEFAULT);
600 
601 static int      bridge_clone_create(struct if_clone *, uint32_t, void *);
602 static int      bridge_clone_destroy(struct ifnet *);
603 
604 static errno_t  bridge_ioctl(struct ifnet *, u_long, void *);
605 #if HAS_IF_CAP
606 static void     bridge_mutecaps(struct bridge_softc *);
607 static void     bridge_set_ifcap(struct bridge_softc *, struct bridge_iflist *,
608     int);
609 #endif
610 static errno_t bridge_set_tso(struct bridge_softc *);
611 static void     bridge_proto_attach_changed(struct ifnet *);
612 static int      bridge_init(struct ifnet *);
613 #if HAS_BRIDGE_DUMMYNET
614 static void     bridge_dummynet(struct mbuf *, struct ifnet *);
615 #endif
616 static void     bridge_ifstop(struct ifnet *, int);
617 static int      bridge_output(struct ifnet *, struct mbuf *);
618 static void     bridge_finalize_cksum(struct ifnet *, struct mbuf *);
619 static void     bridge_start(struct ifnet *);
620 static errno_t  bridge_input(struct ifnet *, mbuf_t *);
621 static errno_t  bridge_iff_input(void *, ifnet_t, protocol_family_t,
622     mbuf_t *, char **);
623 static errno_t  bridge_iff_output(void *, ifnet_t, protocol_family_t,
624     mbuf_t *);
625 static errno_t  bridge_member_output(struct bridge_softc *sc, ifnet_t ifp,
626     mbuf_t *m);
627 
628 static int      bridge_enqueue(ifnet_t, struct ifnet *,
629     struct ifnet *, struct mbuf *, ChecksumOperation);
630 static void     bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int);
631 
632 static void     bridge_forward(struct bridge_softc *, struct bridge_iflist *,
633     struct mbuf *);
634 
635 static void     bridge_aging_timer(struct bridge_softc *sc);
636 
637 static void     bridge_broadcast(struct bridge_softc *, struct bridge_iflist *,
638     struct mbuf *, int);
639 static void     bridge_span(struct bridge_softc *, struct mbuf *);
640 
641 static int      bridge_rtupdate(struct bridge_softc *, const uint8_t *,
642     uint16_t, struct bridge_iflist *, int, uint8_t);
643 static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *,
644     uint16_t);
645 static void     bridge_rttrim(struct bridge_softc *);
646 static void     bridge_rtage(struct bridge_softc *);
647 static void     bridge_rtflush(struct bridge_softc *, int);
648 static int      bridge_rtdaddr(struct bridge_softc *, const uint8_t *,
649     uint16_t);
650 
651 static int      bridge_rtable_init(struct bridge_softc *);
652 static void     bridge_rtable_fini(struct bridge_softc *);
653 
654 static void     bridge_rthash_resize(struct bridge_softc *);
655 
656 static int      bridge_rtnode_addr_cmp(const uint8_t *, const uint8_t *);
657 static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
658     const uint8_t *, uint16_t);
659 static int      bridge_rtnode_hash(struct bridge_softc *,
660     struct bridge_rtnode *);
661 static int      bridge_rtnode_insert(struct bridge_softc *,
662     struct bridge_rtnode *);
663 static void     bridge_rtnode_destroy(struct bridge_softc *,
664     struct bridge_rtnode *);
665 #if BRIDGESTP
666 static void     bridge_rtable_expire(struct ifnet *, int);
667 static void     bridge_state_change(struct ifnet *, int);
668 #endif /* BRIDGESTP */
669 
670 static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
671     const char *name);
672 static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
673     struct ifnet *ifp);
674 static void     bridge_delete_member(struct bridge_softc *,
675     struct bridge_iflist *);
676 static void     bridge_delete_span(struct bridge_softc *,
677     struct bridge_iflist *);
678 
679 static int      bridge_ioctl_add(struct bridge_softc *, void *);
680 static int      bridge_ioctl_del(struct bridge_softc *, void *);
681 static int      bridge_ioctl_gifflags(struct bridge_softc *, void *);
682 static int      bridge_ioctl_sifflags(struct bridge_softc *, void *);
683 static int      bridge_ioctl_scache(struct bridge_softc *, void *);
684 static int      bridge_ioctl_gcache(struct bridge_softc *, void *);
685 static int      bridge_ioctl_gifs32(struct bridge_softc *, void *);
686 static int      bridge_ioctl_gifs64(struct bridge_softc *, void *);
687 static int      bridge_ioctl_rts32(struct bridge_softc *, void *);
688 static int      bridge_ioctl_rts64(struct bridge_softc *, void *);
689 static int      bridge_ioctl_saddr32(struct bridge_softc *, void *);
690 static int      bridge_ioctl_saddr64(struct bridge_softc *, void *);
691 static int      bridge_ioctl_sto(struct bridge_softc *, void *);
692 static int      bridge_ioctl_gto(struct bridge_softc *, void *);
693 static int      bridge_ioctl_daddr32(struct bridge_softc *, void *);
694 static int      bridge_ioctl_daddr64(struct bridge_softc *, void *);
695 static int      bridge_ioctl_flush(struct bridge_softc *, void *);
696 static int      bridge_ioctl_gpri(struct bridge_softc *, void *);
697 static int      bridge_ioctl_spri(struct bridge_softc *, void *);
698 static int      bridge_ioctl_ght(struct bridge_softc *, void *);
699 static int      bridge_ioctl_sht(struct bridge_softc *, void *);
700 static int      bridge_ioctl_gfd(struct bridge_softc *, void *);
701 static int      bridge_ioctl_sfd(struct bridge_softc *, void *);
702 static int      bridge_ioctl_gma(struct bridge_softc *, void *);
703 static int      bridge_ioctl_sma(struct bridge_softc *, void *);
704 static int      bridge_ioctl_sifprio(struct bridge_softc *, void *);
705 static int      bridge_ioctl_sifcost(struct bridge_softc *, void *);
706 static int      bridge_ioctl_sifmaxaddr(struct bridge_softc *, void *);
707 static int      bridge_ioctl_addspan(struct bridge_softc *, void *);
708 static int      bridge_ioctl_delspan(struct bridge_softc *, void *);
709 static int      bridge_ioctl_gbparam32(struct bridge_softc *, void *);
710 static int      bridge_ioctl_gbparam64(struct bridge_softc *, void *);
711 static int      bridge_ioctl_grte(struct bridge_softc *, void *);
712 static int      bridge_ioctl_gifsstp32(struct bridge_softc *, void *);
713 static int      bridge_ioctl_gifsstp64(struct bridge_softc *, void *);
714 static int      bridge_ioctl_sproto(struct bridge_softc *, void *);
715 static int      bridge_ioctl_stxhc(struct bridge_softc *, void *);
716 static int      bridge_ioctl_purge(struct bridge_softc *sc, void *);
717 static int      bridge_ioctl_gfilt(struct bridge_softc *, void *);
718 static int      bridge_ioctl_sfilt(struct bridge_softc *, void *);
719 static int      bridge_ioctl_ghostfilter(struct bridge_softc *, void *);
720 static int      bridge_ioctl_shostfilter(struct bridge_softc *, void *);
721 static int      bridge_ioctl_gmnelist32(struct bridge_softc *, void *);
722 static int      bridge_ioctl_gmnelist64(struct bridge_softc *, void *);
723 static int      bridge_ioctl_gifstats32(struct bridge_softc *, void *);
724 static int      bridge_ioctl_gifstats64(struct bridge_softc *, void *);
725 
726 static int bridge_pf(struct mbuf **, struct ifnet *, uint32_t sc_filter_flags, int input);
727 static int bridge_ip_checkbasic(struct mbuf **);
728 static int bridge_ip6_checkbasic(struct mbuf **);
729 
730 static errno_t bridge_set_bpf_tap(ifnet_t, bpf_tap_mode, bpf_packet_func);
731 static errno_t bridge_bpf_input(ifnet_t, struct mbuf *, const char *, int);
732 static errno_t bridge_bpf_output(ifnet_t, struct mbuf *);
733 
734 static void bridge_detach(ifnet_t);
735 static void bridge_link_event(struct ifnet *, u_int32_t);
736 static void bridge_iflinkevent(struct ifnet *);
737 static u_int32_t bridge_updatelinkstatus(struct bridge_softc *);
738 static int interface_media_active(struct ifnet *);
739 static void bridge_schedule_delayed_call(struct bridge_delayed_call *);
740 static void bridge_cancel_delayed_call(struct bridge_delayed_call *);
741 static void bridge_cleanup_delayed_call(struct bridge_delayed_call *);
742 static int bridge_host_filter(struct bridge_iflist *, mbuf_t *);
743 
744 static errno_t bridge_mac_nat_enable(struct bridge_softc *,
745     struct bridge_iflist *);
746 static void bridge_mac_nat_disable(struct bridge_softc *sc);
747 static void bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long);
748 static void bridge_mac_nat_populate_entries(struct bridge_softc *sc);
749 static void bridge_mac_nat_flush_entries(struct bridge_softc *sc,
750     struct bridge_iflist *);
751 static ifnet_t bridge_mac_nat_input(struct bridge_softc *, mbuf_t *,
752     boolean_t *);
753 static boolean_t bridge_mac_nat_output(struct bridge_softc *,
754     struct bridge_iflist *, mbuf_t *, struct mac_nat_record *);
755 static void bridge_mac_nat_translate(mbuf_t *, struct mac_nat_record *,
756     const caddr_t);
757 static bool is_broadcast_ip_packet(mbuf_t *);
758 static bool in_addr_is_ours(const struct in_addr);
759 static bool in6_addr_is_ours(const struct in6_addr *, uint32_t);
760 
761 #define m_copypacket(m, how) m_copym(m, 0, M_COPYALL, how)
762 
763 static int
764 gso_tcp(struct ifnet *ifp, struct mbuf **mp, u_int mac_hlen, bool is_ipv4,
765     boolean_t is_tx);
766 
767 /* The default bridge vlan is 1 (IEEE 802.1Q-2003 Table 9-2) */
768 #define VLANTAGOF(_m)   0
769 
770 u_int8_t bstp_etheraddr[ETHER_ADDR_LEN] =
771 { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
772 
773 static u_int8_t ethernulladdr[ETHER_ADDR_LEN] =
774 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
775 
776 #if BRIDGESTP
777 static struct bstp_cb_ops bridge_ops = {
778 	.bcb_state = bridge_state_change,
779 	.bcb_rtage = bridge_rtable_expire
780 };
781 #endif /* BRIDGESTP */
782 
783 SYSCTL_DECL(_net_link);
784 SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
785     "Bridge");
786 
787 static int bridge_inherit_mac = 0;   /* share MAC with first bridge member */
788 SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac,
789     CTLFLAG_RW | CTLFLAG_LOCKED,
790     &bridge_inherit_mac, 0,
791     "Inherit MAC address from the first bridge member");
792 
793 SYSCTL_INT(_net_link_bridge, OID_AUTO, rtable_prune_period,
794     CTLFLAG_RW | CTLFLAG_LOCKED,
795     &bridge_rtable_prune_period, 0,
796     "Interval between pruning of routing table");
797 
798 static unsigned int bridge_rtable_hash_size_max = BRIDGE_RTHASH_SIZE_MAX;
799 SYSCTL_UINT(_net_link_bridge, OID_AUTO, rtable_hash_size_max,
800     CTLFLAG_RW | CTLFLAG_LOCKED,
801     &bridge_rtable_hash_size_max, 0,
802     "Maximum size of the routing hash table");
803 
804 #if BRIDGE_DELAYED_CALLBACK_DEBUG
805 static int bridge_delayed_callback_delay = 0;
806 SYSCTL_INT(_net_link_bridge, OID_AUTO, delayed_callback_delay,
807     CTLFLAG_RW | CTLFLAG_LOCKED,
808     &bridge_delayed_callback_delay, 0,
809     "Delay before calling delayed function");
810 #endif
811 
812 SYSCTL_STRUCT(_net_link_bridge, OID_AUTO,
813     hostfilterstats, CTLFLAG_RD | CTLFLAG_LOCKED,
814     &bridge_hostfilter_stats, bridge_hostfilter_stats, "");
815 
816 #if BRIDGESTP
817 static int log_stp   = 0;   /* log STP state changes */
818 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp, CTLFLAG_RW,
819     &log_stp, 0, "Log STP state changes");
820 #endif /* BRIDGESTP */
821 
822 struct bridge_control {
823 	int             (*bc_func)(struct bridge_softc *, void *);
824 	unsigned int    bc_argsize;
825 	unsigned int    bc_flags;
826 };
827 
828 #define VMNET_TAG               "com.apple.vmnet"
829 #define VMNET_LOCAL_TAG         VMNET_TAG ".local"
830 #define VMNET_BROADCAST_TAG     VMNET_TAG ".broadcast"
831 #define VMNET_MULTICAST_TAG     VMNET_TAG ".multicast"
832 
833 static u_int16_t vmnet_tag;
834 static u_int16_t vmnet_local_tag;
835 static u_int16_t vmnet_broadcast_tag;
836 static u_int16_t vmnet_multicast_tag;
837 
838 static u_int16_t
allocate_pf_tag(char * name)839 allocate_pf_tag(char * name)
840 {
841 	u_int16_t       tag;
842 
843 	tag = pf_tagname2tag_ext(name);
844 	BRIDGE_LOG(LOG_NOTICE, 0, "%s %d", name, tag);
845 	return tag;
846 }
847 
848 static void
allocate_vmnet_pf_tags(void)849 allocate_vmnet_pf_tags(void)
850 {
851 	/* allocate tags to use with PF */
852 	if (vmnet_tag == 0) {
853 		vmnet_tag = allocate_pf_tag(VMNET_TAG);
854 	}
855 	if (vmnet_local_tag == 0) {
856 		vmnet_local_tag = allocate_pf_tag(VMNET_LOCAL_TAG);
857 	}
858 	if (vmnet_broadcast_tag == 0) {
859 		vmnet_broadcast_tag = allocate_pf_tag(VMNET_BROADCAST_TAG);
860 	}
861 	if (vmnet_multicast_tag == 0) {
862 		vmnet_multicast_tag = allocate_pf_tag(VMNET_MULTICAST_TAG);
863 	}
864 }
865 
866 #define BC_F_COPYIN             0x01    /* copy arguments in */
867 #define BC_F_COPYOUT            0x02    /* copy arguments out */
868 #define BC_F_SUSER              0x04    /* do super-user check */
869 
870 static const struct bridge_control bridge_control_table32[] = {
871 	{ .bc_func = bridge_ioctl_add, .bc_argsize = sizeof(struct ifbreq),             /* 0 */
872 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
873 	{ .bc_func = bridge_ioctl_del, .bc_argsize = sizeof(struct ifbreq),
874 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
875 
876 	{ .bc_func = bridge_ioctl_gifflags, .bc_argsize = sizeof(struct ifbreq),
877 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
878 	{ .bc_func = bridge_ioctl_sifflags, .bc_argsize = sizeof(struct ifbreq),
879 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
880 
881 	{ .bc_func = bridge_ioctl_scache, .bc_argsize = sizeof(struct ifbrparam),
882 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
883 	{ .bc_func = bridge_ioctl_gcache, .bc_argsize = sizeof(struct ifbrparam),
884 	  .bc_flags = BC_F_COPYOUT },
885 
886 	{ .bc_func = bridge_ioctl_gifs32, .bc_argsize = sizeof(struct ifbifconf32),
887 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
888 	{ .bc_func = bridge_ioctl_rts32, .bc_argsize = sizeof(struct ifbaconf32),
889 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
890 
891 	{ .bc_func = bridge_ioctl_saddr32, .bc_argsize = sizeof(struct ifbareq32),
892 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
893 
894 	{ .bc_func = bridge_ioctl_sto, .bc_argsize = sizeof(struct ifbrparam),
895 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
896 	{ .bc_func = bridge_ioctl_gto, .bc_argsize = sizeof(struct ifbrparam),           /* 10 */
897 	  .bc_flags = BC_F_COPYOUT },
898 
899 	{ .bc_func = bridge_ioctl_daddr32, .bc_argsize = sizeof(struct ifbareq32),
900 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
901 
902 	{ .bc_func = bridge_ioctl_flush, .bc_argsize = sizeof(struct ifbreq),
903 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
904 
905 	{ .bc_func = bridge_ioctl_gpri, .bc_argsize = sizeof(struct ifbrparam),
906 	  .bc_flags = BC_F_COPYOUT },
907 	{ .bc_func = bridge_ioctl_spri, .bc_argsize = sizeof(struct ifbrparam),
908 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
909 
910 	{ .bc_func = bridge_ioctl_ght, .bc_argsize = sizeof(struct ifbrparam),
911 	  .bc_flags = BC_F_COPYOUT },
912 	{ .bc_func = bridge_ioctl_sht, .bc_argsize = sizeof(struct ifbrparam),
913 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
914 
915 	{ .bc_func = bridge_ioctl_gfd, .bc_argsize = sizeof(struct ifbrparam),
916 	  .bc_flags = BC_F_COPYOUT },
917 	{ .bc_func = bridge_ioctl_sfd, .bc_argsize = sizeof(struct ifbrparam),
918 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
919 
920 	{ .bc_func = bridge_ioctl_gma, .bc_argsize = sizeof(struct ifbrparam),
921 	  .bc_flags = BC_F_COPYOUT },
922 	{ .bc_func = bridge_ioctl_sma, .bc_argsize = sizeof(struct ifbrparam),           /* 20 */
923 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
924 
925 	{ .bc_func = bridge_ioctl_sifprio, .bc_argsize = sizeof(struct ifbreq),
926 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
927 
928 	{ .bc_func = bridge_ioctl_sifcost, .bc_argsize = sizeof(struct ifbreq),
929 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
930 
931 	{ .bc_func = bridge_ioctl_gfilt, .bc_argsize = sizeof(struct ifbrparam),
932 	  .bc_flags = BC_F_COPYOUT },
933 	{ .bc_func = bridge_ioctl_sfilt, .bc_argsize = sizeof(struct ifbrparam),
934 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
935 
936 	{ .bc_func = bridge_ioctl_purge, .bc_argsize = sizeof(struct ifbreq),
937 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
938 
939 	{ .bc_func = bridge_ioctl_addspan, .bc_argsize = sizeof(struct ifbreq),
940 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
941 	{ .bc_func = bridge_ioctl_delspan, .bc_argsize = sizeof(struct ifbreq),
942 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
943 
944 	{ .bc_func = bridge_ioctl_gbparam32, .bc_argsize = sizeof(struct ifbropreq32),
945 	  .bc_flags = BC_F_COPYOUT },
946 
947 	{ .bc_func = bridge_ioctl_grte, .bc_argsize = sizeof(struct ifbrparam),
948 	  .bc_flags = BC_F_COPYOUT },
949 
950 	{ .bc_func = bridge_ioctl_gifsstp32, .bc_argsize = sizeof(struct ifbpstpconf32),     /* 30 */
951 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
952 
953 	{ .bc_func = bridge_ioctl_sproto, .bc_argsize = sizeof(struct ifbrparam),
954 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
955 
956 	{ .bc_func = bridge_ioctl_stxhc, .bc_argsize = sizeof(struct ifbrparam),
957 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
958 
959 	{ .bc_func = bridge_ioctl_sifmaxaddr, .bc_argsize = sizeof(struct ifbreq),
960 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
961 
962 	{ .bc_func = bridge_ioctl_ghostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
963 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
964 	{ .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
965 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
966 
967 	{ .bc_func = bridge_ioctl_gmnelist32,
968 	  .bc_argsize = sizeof(struct ifbrmnelist32),
969 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
970 	{ .bc_func = bridge_ioctl_gifstats32,
971 	  .bc_argsize = sizeof(struct ifbrmreq32),
972 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
973 };
974 
975 static const struct bridge_control bridge_control_table64[] = {
976 	{ .bc_func = bridge_ioctl_add, .bc_argsize = sizeof(struct ifbreq),           /* 0 */
977 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
978 	{ .bc_func = bridge_ioctl_del, .bc_argsize = sizeof(struct ifbreq),
979 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
980 
981 	{ .bc_func = bridge_ioctl_gifflags, .bc_argsize = sizeof(struct ifbreq),
982 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
983 	{ .bc_func = bridge_ioctl_sifflags, .bc_argsize = sizeof(struct ifbreq),
984 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
985 
986 	{ .bc_func = bridge_ioctl_scache, .bc_argsize = sizeof(struct ifbrparam),
987 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
988 	{ .bc_func = bridge_ioctl_gcache, .bc_argsize = sizeof(struct ifbrparam),
989 	  .bc_flags = BC_F_COPYOUT },
990 
991 	{ .bc_func = bridge_ioctl_gifs64, .bc_argsize = sizeof(struct ifbifconf64),
992 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
993 	{ .bc_func = bridge_ioctl_rts64, .bc_argsize = sizeof(struct ifbaconf64),
994 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
995 
996 	{ .bc_func = bridge_ioctl_saddr64, .bc_argsize = sizeof(struct ifbareq64),
997 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
998 
999 	{ .bc_func = bridge_ioctl_sto, .bc_argsize = sizeof(struct ifbrparam),
1000 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1001 	{ .bc_func = bridge_ioctl_gto, .bc_argsize = sizeof(struct ifbrparam),           /* 10 */
1002 	  .bc_flags = BC_F_COPYOUT },
1003 
1004 	{ .bc_func = bridge_ioctl_daddr64, .bc_argsize = sizeof(struct ifbareq64),
1005 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1006 
1007 	{ .bc_func = bridge_ioctl_flush, .bc_argsize = sizeof(struct ifbreq),
1008 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1009 
1010 	{ .bc_func = bridge_ioctl_gpri, .bc_argsize = sizeof(struct ifbrparam),
1011 	  .bc_flags = BC_F_COPYOUT },
1012 	{ .bc_func = bridge_ioctl_spri, .bc_argsize = sizeof(struct ifbrparam),
1013 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1014 
1015 	{ .bc_func = bridge_ioctl_ght, .bc_argsize = sizeof(struct ifbrparam),
1016 	  .bc_flags = BC_F_COPYOUT },
1017 	{ .bc_func = bridge_ioctl_sht, .bc_argsize = sizeof(struct ifbrparam),
1018 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1019 
1020 	{ .bc_func = bridge_ioctl_gfd, .bc_argsize = sizeof(struct ifbrparam),
1021 	  .bc_flags = BC_F_COPYOUT },
1022 	{ .bc_func = bridge_ioctl_sfd, .bc_argsize = sizeof(struct ifbrparam),
1023 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1024 
1025 	{ .bc_func = bridge_ioctl_gma, .bc_argsize = sizeof(struct ifbrparam),
1026 	  .bc_flags = BC_F_COPYOUT },
1027 	{ .bc_func = bridge_ioctl_sma, .bc_argsize = sizeof(struct ifbrparam),           /* 20 */
1028 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1029 
1030 	{ .bc_func = bridge_ioctl_sifprio, .bc_argsize = sizeof(struct ifbreq),
1031 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1032 
1033 	{ .bc_func = bridge_ioctl_sifcost, .bc_argsize = sizeof(struct ifbreq),
1034 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1035 
1036 	{ .bc_func = bridge_ioctl_gfilt, .bc_argsize = sizeof(struct ifbrparam),
1037 	  .bc_flags = BC_F_COPYOUT },
1038 	{ .bc_func = bridge_ioctl_sfilt, .bc_argsize = sizeof(struct ifbrparam),
1039 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1040 
1041 	{ .bc_func = bridge_ioctl_purge, .bc_argsize = sizeof(struct ifbreq),
1042 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1043 
1044 	{ .bc_func = bridge_ioctl_addspan, .bc_argsize = sizeof(struct ifbreq),
1045 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1046 	{ .bc_func = bridge_ioctl_delspan, .bc_argsize = sizeof(struct ifbreq),
1047 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1048 
1049 	{ .bc_func = bridge_ioctl_gbparam64, .bc_argsize = sizeof(struct ifbropreq64),
1050 	  .bc_flags = BC_F_COPYOUT },
1051 
1052 	{ .bc_func = bridge_ioctl_grte, .bc_argsize = sizeof(struct ifbrparam),
1053 	  .bc_flags = BC_F_COPYOUT },
1054 
1055 	{ .bc_func = bridge_ioctl_gifsstp64, .bc_argsize = sizeof(struct ifbpstpconf64),     /* 30 */
1056 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1057 
1058 	{ .bc_func = bridge_ioctl_sproto, .bc_argsize = sizeof(struct ifbrparam),
1059 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1060 
1061 	{ .bc_func = bridge_ioctl_stxhc, .bc_argsize = sizeof(struct ifbrparam),
1062 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1063 
1064 	{ .bc_func = bridge_ioctl_sifmaxaddr, .bc_argsize = sizeof(struct ifbreq),
1065 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1066 
1067 	{ .bc_func = bridge_ioctl_ghostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1068 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1069 	{ .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1070 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1071 
1072 	{ .bc_func = bridge_ioctl_gmnelist64,
1073 	  .bc_argsize = sizeof(struct ifbrmnelist64),
1074 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1075 	{ .bc_func = bridge_ioctl_gifstats64,
1076 	  .bc_argsize = sizeof(struct ifbrmreq64),
1077 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1078 };
1079 
1080 static const unsigned int bridge_control_table_size =
1081     sizeof(bridge_control_table32) / sizeof(bridge_control_table32[0]);
1082 
1083 static LIST_HEAD(, bridge_softc) bridge_list =
1084     LIST_HEAD_INITIALIZER(bridge_list);
1085 
1086 #define BRIDGENAME      "bridge"
1087 #define BRIDGES_MAX     IF_MAXUNIT
1088 #define BRIDGE_ZONE_MAX_ELEM    MIN(IFNETS_MAX, BRIDGES_MAX)
1089 
1090 static struct if_clone bridge_cloner =
1091     IF_CLONE_INITIALIZER(BRIDGENAME, bridge_clone_create, bridge_clone_destroy,
1092     0, BRIDGES_MAX);
1093 
1094 static int if_bridge_txstart = 0;
1095 SYSCTL_INT(_net_link_bridge, OID_AUTO, txstart, CTLFLAG_RW | CTLFLAG_LOCKED,
1096     &if_bridge_txstart, 0, "Bridge interface uses TXSTART model");
1097 
1098 SYSCTL_INT(_net_link_bridge, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
1099     &if_bridge_debug, 0, "Bridge debug flags");
1100 
1101 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_level,
1102     CTLFLAG_RW | CTLFLAG_LOCKED,
1103     &if_bridge_log_level, 0, "Bridge log level");
1104 
1105 static int if_bridge_segmentation = 1;
1106 SYSCTL_INT(_net_link_bridge, OID_AUTO, segmentation,
1107     CTLFLAG_RW | CTLFLAG_LOCKED,
1108     &if_bridge_segmentation, 0, "Bridge interface enable segmentation");
1109 
1110 static int if_bridge_vmnet_pf_tagging = 1;
1111 SYSCTL_INT(_net_link_bridge, OID_AUTO, vmnet_pf_tagging,
1112     CTLFLAG_RW | CTLFLAG_LOCKED,
1113     &if_bridge_segmentation, 0, "Bridge interface enable vmnet PF tagging");
1114 
1115 #define BRIDGE_TSO_REDUCE_MSS_FORWARDING_MAX            256
1116 #define BRIDGE_TSO_REDUCE_MSS_FORWARDING_DEFAULT        110
1117 #define BRIDGE_TSO_REDUCE_MSS_TX_MAX                    256
1118 #define BRIDGE_TSO_REDUCE_MSS_TX_DEFAULT                0
1119 
1120 static u_int if_bridge_tso_reduce_mss_forwarding
1121         = BRIDGE_TSO_REDUCE_MSS_FORWARDING_DEFAULT;
1122 static u_int if_bridge_tso_reduce_mss_tx
1123         = BRIDGE_TSO_REDUCE_MSS_TX_DEFAULT;
1124 
1125 static int
bridge_tso_reduce_mss(struct sysctl_req * req,u_int * val,u_int val_max)1126 bridge_tso_reduce_mss(struct sysctl_req *req, u_int * val, u_int val_max)
1127 {
1128 	int     changed;
1129 	int     error;
1130 	u_int   new_value;
1131 
1132 	error = sysctl_io_number(req, *val, sizeof(*val), &new_value,
1133 	    &changed);
1134 	if (error == 0 && changed != 0) {
1135 		if (new_value > val_max) {
1136 			return EINVAL;
1137 		}
1138 		*val = new_value;
1139 	}
1140 	return error;
1141 }
1142 
1143 static int
1144 bridge_tso_reduce_mss_forwarding_sysctl SYSCTL_HANDLER_ARGS
1145 {
1146 	return bridge_tso_reduce_mss(req, &if_bridge_tso_reduce_mss_forwarding,
1147     BRIDGE_TSO_REDUCE_MSS_FORWARDING_MAX);
1148 }
1149 
1150 SYSCTL_PROC(_net_link_bridge, OID_AUTO, tso_reduce_mss_forwarding,
1151     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1152     0, 0, bridge_tso_reduce_mss_forwarding_sysctl, "IU",
1153     "Bridge tso reduce mss when forwarding");
1154 
1155 static int
1156 bridge_tso_reduce_mss_tx_sysctl SYSCTL_HANDLER_ARGS
1157 {
1158 	return bridge_tso_reduce_mss(req, &if_bridge_tso_reduce_mss_tx,
1159     BRIDGE_TSO_REDUCE_MSS_TX_MAX);
1160 }
1161 
1162 SYSCTL_PROC(_net_link_bridge, OID_AUTO, tso_reduce_mss_tx,
1163     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1164     0, 0, bridge_tso_reduce_mss_tx_sysctl, "IU",
1165     "Bridge tso reduce mss on transmit");
1166 
1167 #if DEBUG || DEVELOPMENT
1168 #define BRIDGE_FORCE_ONE        0x00000001
1169 #define BRIDGE_FORCE_TWO        0x00000002
1170 static u_int32_t if_bridge_force_errors = 0;
1171 SYSCTL_INT(_net_link_bridge, OID_AUTO, force_errors,
1172     CTLFLAG_RW | CTLFLAG_LOCKED,
1173     &if_bridge_force_errors, 0, "Bridge interface force errors");
1174 static inline bool
bridge_error_is_forced(u_int32_t flags)1175 bridge_error_is_forced(u_int32_t flags)
1176 {
1177 	return (if_bridge_force_errors & flags) != 0;
1178 }
1179 
1180 #define BRIDGE_ERROR_GET_FORCED(__is_forced, __flags)                   \
1181 	do {                                                            \
1182 	        __is_forced = bridge_error_is_forced(__flags);          \
1183 	        if (__is_forced) {                                      \
1184 	                BRIDGE_LOG(LOG_NOTICE, 0, "0x%x forced", __flags); \
1185 	        }                                                       \
1186 	} while (0)
1187 
1188 /*
1189  * net.link.bridge.reduce_tso_mtu
1190  * - when non-zero, the bridge overrides the interface TSO MTU to a lower
1191  *   value (i.e. 16K) to enable testing the "use GSO instead" path
1192  */
1193 static int if_bridge_reduce_tso_mtu = 0;
1194 SYSCTL_INT(_net_link_bridge, OID_AUTO, reduce_tso_mtu,
1195     CTLFLAG_RW | CTLFLAG_LOCKED,
1196     &if_bridge_reduce_tso_mtu, 0, "Bridge interface reduce TSO MTU");
1197 
1198 #endif /* DEBUG || DEVELOPMENT */
1199 
1200 static void brlog_ether_header(struct ether_header *);
1201 static void brlog_mbuf_data(mbuf_t, size_t, size_t);
1202 static void brlog_mbuf_pkthdr(mbuf_t, const char *, const char *);
1203 static void brlog_mbuf(mbuf_t, const char *, const char *);
1204 static void brlog_link(struct bridge_softc * sc);
1205 
1206 #if BRIDGE_LOCK_DEBUG
1207 static void bridge_lock(struct bridge_softc *);
1208 static void bridge_unlock(struct bridge_softc *);
1209 static int bridge_lock2ref(struct bridge_softc *);
1210 static void bridge_unref(struct bridge_softc *);
1211 static void bridge_xlock(struct bridge_softc *);
1212 static void bridge_xdrop(struct bridge_softc *);
1213 
1214 static void
bridge_lock(struct bridge_softc * sc)1215 bridge_lock(struct bridge_softc *sc)
1216 {
1217 	void *lr_saved = __builtin_return_address(0);
1218 
1219 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1220 
1221 	_BRIDGE_LOCK(sc);
1222 
1223 	sc->lock_lr[sc->next_lock_lr] = lr_saved;
1224 	sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1225 }
1226 
1227 static void
bridge_unlock(struct bridge_softc * sc)1228 bridge_unlock(struct bridge_softc *sc)
1229 {
1230 	void *lr_saved = __builtin_return_address(0);
1231 
1232 	BRIDGE_LOCK_ASSERT_HELD(sc);
1233 
1234 	sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1235 	sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1236 
1237 	_BRIDGE_UNLOCK(sc);
1238 }
1239 
1240 static int
bridge_lock2ref(struct bridge_softc * sc)1241 bridge_lock2ref(struct bridge_softc *sc)
1242 {
1243 	int error = 0;
1244 	void *lr_saved = __builtin_return_address(0);
1245 
1246 	BRIDGE_LOCK_ASSERT_HELD(sc);
1247 
1248 	if (sc->sc_iflist_xcnt > 0) {
1249 		error = EBUSY;
1250 	} else {
1251 		sc->sc_iflist_ref++;
1252 	}
1253 
1254 	sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1255 	sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1256 
1257 	_BRIDGE_UNLOCK(sc);
1258 
1259 	return error;
1260 }
1261 
1262 static void
bridge_unref(struct bridge_softc * sc)1263 bridge_unref(struct bridge_softc *sc)
1264 {
1265 	void *lr_saved = __builtin_return_address(0);
1266 
1267 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1268 
1269 	_BRIDGE_LOCK(sc);
1270 	sc->lock_lr[sc->next_lock_lr] = lr_saved;
1271 	sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1272 
1273 	sc->sc_iflist_ref--;
1274 
1275 	sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1276 	sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1277 	if ((sc->sc_iflist_xcnt > 0) && (sc->sc_iflist_ref == 0)) {
1278 		_BRIDGE_UNLOCK(sc);
1279 		wakeup(&sc->sc_cv);
1280 	} else {
1281 		_BRIDGE_UNLOCK(sc);
1282 	}
1283 }
1284 
1285 static void
bridge_xlock(struct bridge_softc * sc)1286 bridge_xlock(struct bridge_softc *sc)
1287 {
1288 	void *lr_saved = __builtin_return_address(0);
1289 
1290 	BRIDGE_LOCK_ASSERT_HELD(sc);
1291 
1292 	sc->sc_iflist_xcnt++;
1293 	while (sc->sc_iflist_ref > 0) {
1294 		sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1295 		sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1296 
1297 		msleep(&sc->sc_cv, &sc->sc_mtx, PZERO, "BRIDGE_XLOCK", NULL);
1298 
1299 		sc->lock_lr[sc->next_lock_lr] = lr_saved;
1300 		sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1301 	}
1302 }
1303 
1304 static void
bridge_xdrop(struct bridge_softc * sc)1305 bridge_xdrop(struct bridge_softc *sc)
1306 {
1307 	BRIDGE_LOCK_ASSERT_HELD(sc);
1308 
1309 	sc->sc_iflist_xcnt--;
1310 }
1311 
1312 #endif /* BRIDGE_LOCK_DEBUG */
1313 
1314 static void
brlog_mbuf_pkthdr(mbuf_t m,const char * prefix,const char * suffix)1315 brlog_mbuf_pkthdr(mbuf_t m, const char *prefix, const char *suffix)
1316 {
1317 	if (m) {
1318 		BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1319 		    "%spktlen: %u rcvif: 0x%llx header: 0x%llx nextpkt: 0x%llx%s",
1320 		    prefix ? prefix : "", (unsigned int)mbuf_pkthdr_len(m),
1321 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_rcvif(m)),
1322 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_header(m)),
1323 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_nextpkt(m)),
1324 		    suffix ? suffix : "");
1325 	} else {
1326 		BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0, "%s<NULL>%s", prefix, suffix);
1327 	}
1328 }
1329 
1330 static void
brlog_mbuf(mbuf_t m,const char * prefix,const char * suffix)1331 brlog_mbuf(mbuf_t m, const char *prefix, const char *suffix)
1332 {
1333 	if (m) {
1334 		BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1335 		    "%s0x%llx type: %u flags: 0x%x len: %u data: 0x%llx "
1336 		    "maxlen: %u datastart: 0x%llx next: 0x%llx%s",
1337 		    prefix ? prefix : "", (uint64_t)VM_KERNEL_ADDRPERM(m),
1338 		    mbuf_type(m), mbuf_flags(m), (unsigned int)mbuf_len(m),
1339 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)),
1340 		    (unsigned int)mbuf_maxlen(m),
1341 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
1342 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_next(m)),
1343 		    !suffix || (mbuf_flags(m) & MBUF_PKTHDR) ? "" : suffix);
1344 		if ((mbuf_flags(m) & MBUF_PKTHDR)) {
1345 			brlog_mbuf_pkthdr(m, "", suffix);
1346 		}
1347 	} else {
1348 		BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0, "%s<NULL>%s", prefix, suffix);
1349 	}
1350 }
1351 
1352 static void
brlog_mbuf_data(mbuf_t m,size_t offset,size_t len)1353 brlog_mbuf_data(mbuf_t m, size_t offset, size_t len)
1354 {
1355 	mbuf_t                  n;
1356 	size_t                  i, j;
1357 	size_t                  pktlen, mlen, maxlen;
1358 	unsigned char   *ptr;
1359 
1360 	pktlen = mbuf_pkthdr_len(m);
1361 
1362 	if (offset > pktlen) {
1363 		return;
1364 	}
1365 
1366 	maxlen = (pktlen - offset > len) ? len : pktlen - offset;
1367 	n = m;
1368 	mlen = mbuf_len(n);
1369 	ptr = mbuf_data(n);
1370 	for (i = 0, j = 0; i < maxlen; i++, j++) {
1371 		if (j >= mlen) {
1372 			n = mbuf_next(n);
1373 			if (n == 0) {
1374 				break;
1375 			}
1376 			ptr = mbuf_data(n);
1377 			mlen = mbuf_len(n);
1378 			j = 0;
1379 		}
1380 		if (i >= offset) {
1381 			BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1382 			    "%02x%s", ptr[j], i % 2 ? " " : "");
1383 		}
1384 	}
1385 }
1386 
1387 static void
brlog_ether_header(struct ether_header * eh)1388 brlog_ether_header(struct ether_header *eh)
1389 {
1390 	BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1391 	    "%02x:%02x:%02x:%02x:%02x:%02x > "
1392 	    "%02x:%02x:%02x:%02x:%02x:%02x 0x%04x ",
1393 	    eh->ether_shost[0], eh->ether_shost[1], eh->ether_shost[2],
1394 	    eh->ether_shost[3], eh->ether_shost[4], eh->ether_shost[5],
1395 	    eh->ether_dhost[0], eh->ether_dhost[1], eh->ether_dhost[2],
1396 	    eh->ether_dhost[3], eh->ether_dhost[4], eh->ether_dhost[5],
1397 	    ntohs(eh->ether_type));
1398 }
1399 
1400 static char *
ether_ntop(char * buf,size_t len,const u_char * ap)1401 ether_ntop(char *buf, size_t len, const u_char *ap)
1402 {
1403 	snprintf(buf, len, "%02x:%02x:%02x:%02x:%02x:%02x",
1404 	    ap[0], ap[1], ap[2], ap[3], ap[4], ap[5]);
1405 
1406 	return buf;
1407 }
1408 
1409 static void
brlog_link(struct bridge_softc * sc)1410 brlog_link(struct bridge_softc * sc)
1411 {
1412 	int i;
1413 	uint32_t sdl_buffer[(offsetof(struct sockaddr_dl, sdl_data) +
1414 	IFNAMSIZ + ETHER_ADDR_LEN)];
1415 	struct sockaddr_dl *sdl = SDL((uint8_t*)&sdl_buffer); /* SDL requires byte pointer */
1416 	const u_char * lladdr;
1417 	char lladdr_str[48];
1418 
1419 	memset(sdl, 0, sizeof(sdl_buffer));
1420 	sdl->sdl_family = AF_LINK;
1421 	sdl->sdl_nlen = strlen(sc->sc_if_xname);
1422 	sdl->sdl_alen = ETHER_ADDR_LEN;
1423 	sdl->sdl_len = offsetof(struct sockaddr_dl, sdl_data);
1424 	memcpy(sdl->sdl_data, sc->sc_if_xname, sdl->sdl_nlen);
1425 	memcpy(LLADDR(sdl), sc->sc_defaddr, ETHER_ADDR_LEN);
1426 	lladdr_str[0] = '\0';
1427 	for (i = 0, lladdr = CONST_LLADDR(sdl);
1428 	    i < sdl->sdl_alen;
1429 	    i++, lladdr++) {
1430 		char    byte_str[4];
1431 
1432 		snprintf(byte_str, sizeof(byte_str), "%s%x", i ? ":" : "",
1433 		    *lladdr);
1434 		strlcat(lladdr_str, byte_str, sizeof(lladdr_str));
1435 	}
1436 	BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1437 	    "%s sdl len %d index %d family %d type 0x%x nlen %d alen %d"
1438 	    " slen %d addr %s", sc->sc_if_xname,
1439 	    sdl->sdl_len, sdl->sdl_index,
1440 	    sdl->sdl_family, sdl->sdl_type, sdl->sdl_nlen,
1441 	    sdl->sdl_alen, sdl->sdl_slen, lladdr_str);
1442 }
1443 
1444 
1445 /*
1446  * bridgeattach:
1447  *
1448  *	Pseudo-device attach routine.
1449  */
1450 __private_extern__ int
bridgeattach(int n)1451 bridgeattach(int n)
1452 {
1453 #pragma unused(n)
1454 	int error;
1455 
1456 	LIST_INIT(&bridge_list);
1457 
1458 #if BRIDGESTP
1459 	bstp_sys_init();
1460 #endif /* BRIDGESTP */
1461 
1462 	error = if_clone_attach(&bridge_cloner);
1463 	if (error != 0) {
1464 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_clone_attach failed %d", error);
1465 	}
1466 	return error;
1467 }
1468 
1469 
1470 static errno_t
bridge_ifnet_set_attrs(struct ifnet * ifp)1471 bridge_ifnet_set_attrs(struct ifnet * ifp)
1472 {
1473 	errno_t         error;
1474 
1475 	error = ifnet_set_mtu(ifp, ETHERMTU);
1476 	if (error != 0) {
1477 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_mtu failed %d", error);
1478 		goto done;
1479 	}
1480 	error = ifnet_set_addrlen(ifp, ETHER_ADDR_LEN);
1481 	if (error != 0) {
1482 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_addrlen failed %d", error);
1483 		goto done;
1484 	}
1485 	error = ifnet_set_hdrlen(ifp, ETHER_HDR_LEN);
1486 	if (error != 0) {
1487 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_hdrlen failed %d", error);
1488 		goto done;
1489 	}
1490 	error = ifnet_set_flags(ifp,
1491 	    IFF_BROADCAST | IFF_SIMPLEX | IFF_NOTRAILERS | IFF_MULTICAST,
1492 	    0xffff);
1493 
1494 	if (error != 0) {
1495 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_flags failed %d", error);
1496 		goto done;
1497 	}
1498 done:
1499 	return error;
1500 }
1501 
1502 /*
1503  * bridge_clone_create:
1504  *
1505  *	Create a new bridge instance.
1506  */
1507 static int
bridge_clone_create(struct if_clone * ifc,uint32_t unit,void * params)1508 bridge_clone_create(struct if_clone *ifc, uint32_t unit, void *params)
1509 {
1510 #pragma unused(params)
1511 	struct ifnet *ifp = NULL;
1512 	struct bridge_softc *sc = NULL;
1513 	struct bridge_softc *sc2 = NULL;
1514 	struct ifnet_init_eparams init_params;
1515 	errno_t error = 0;
1516 	uint8_t eth_hostid[ETHER_ADDR_LEN];
1517 	int fb, retry, has_hostid;
1518 
1519 	sc = kalloc_type(struct bridge_softc, Z_WAITOK_ZERO_NOFAIL);
1520 	lck_mtx_init(&sc->sc_mtx, &bridge_lock_grp, &bridge_lock_attr);
1521 	sc->sc_brtmax = BRIDGE_RTABLE_MAX;
1522 	sc->sc_mne_max = BRIDGE_MAC_NAT_ENTRY_MAX;
1523 	sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
1524 	sc->sc_filter_flags = 0;
1525 
1526 	TAILQ_INIT(&sc->sc_iflist);
1527 
1528 	/* use the interface name as the unique id for ifp recycle */
1529 	snprintf(sc->sc_if_xname, sizeof(sc->sc_if_xname), "%s%d",
1530 	    ifc->ifc_name, unit);
1531 	bzero(&init_params, sizeof(init_params));
1532 	init_params.ver                 = IFNET_INIT_CURRENT_VERSION;
1533 	init_params.len                 = sizeof(init_params);
1534 	/* Initialize our routing table. */
1535 	error = bridge_rtable_init(sc);
1536 	if (error != 0) {
1537 		BRIDGE_LOG(LOG_NOTICE, 0, "bridge_rtable_init failed %d", error);
1538 		goto done;
1539 	}
1540 	TAILQ_INIT(&sc->sc_spanlist);
1541 	if (if_bridge_txstart) {
1542 		init_params.start = bridge_start;
1543 	} else {
1544 		init_params.flags = IFNET_INIT_LEGACY;
1545 		init_params.output = bridge_output;
1546 	}
1547 	init_params.set_bpf_tap = bridge_set_bpf_tap;
1548 	init_params.uniqueid            = sc->sc_if_xname;
1549 	init_params.uniqueid_len        = strlen(sc->sc_if_xname);
1550 	init_params.sndq_maxlen         = IFQ_MAXLEN;
1551 	init_params.name                = ifc->ifc_name;
1552 	init_params.unit                = unit;
1553 	init_params.family              = IFNET_FAMILY_ETHERNET;
1554 	init_params.type                = IFT_BRIDGE;
1555 	init_params.demux               = ether_demux;
1556 	init_params.add_proto           = ether_add_proto;
1557 	init_params.del_proto           = ether_del_proto;
1558 	init_params.check_multi         = ether_check_multi;
1559 	init_params.framer_extended     = ether_frameout_extended;
1560 	init_params.softc               = sc;
1561 	init_params.ioctl               = bridge_ioctl;
1562 	init_params.detach              = bridge_detach;
1563 	init_params.broadcast_addr      = etherbroadcastaddr;
1564 	init_params.broadcast_len       = ETHER_ADDR_LEN;
1565 
1566 	error = ifnet_allocate_extended(&init_params, &ifp);
1567 	if (error != 0) {
1568 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_allocate failed %d", error);
1569 		goto done;
1570 	}
1571 	LIST_INIT(&sc->sc_mne_list);
1572 	LIST_INIT(&sc->sc_mne_list_v6);
1573 	sc->sc_ifp = ifp;
1574 	error = bridge_ifnet_set_attrs(ifp);
1575 	if (error != 0) {
1576 		BRIDGE_LOG(LOG_NOTICE, 0, "bridge_ifnet_set_attrs failed %d",
1577 		    error);
1578 		goto done;
1579 	}
1580 	/*
1581 	 * Generate an ethernet address with a locally administered address.
1582 	 *
1583 	 * Since we are using random ethernet addresses for the bridge, it is
1584 	 * possible that we might have address collisions, so make sure that
1585 	 * this hardware address isn't already in use on another bridge.
1586 	 * The first try uses the "hostid" and falls back to read_frandom();
1587 	 * for "hostid", we use the MAC address of the first-encountered
1588 	 * Ethernet-type interface that is currently configured.
1589 	 */
1590 	fb = 0;
1591 	has_hostid = (uuid_get_ethernet(&eth_hostid[0]) == 0);
1592 	for (retry = 1; retry != 0;) {
1593 		if (fb || has_hostid == 0) {
1594 			read_frandom(&sc->sc_defaddr, ETHER_ADDR_LEN);
1595 			sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
1596 			sc->sc_defaddr[0] |= 2;  /* set the LAA bit */
1597 		} else {
1598 			bcopy(&eth_hostid[0], &sc->sc_defaddr,
1599 			    ETHER_ADDR_LEN);
1600 			sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
1601 			sc->sc_defaddr[0] |= 2;  /* set the LAA bit */
1602 			sc->sc_defaddr[3] =     /* stir it up a bit */
1603 			    ((sc->sc_defaddr[3] & 0x0f) << 4) |
1604 			    ((sc->sc_defaddr[3] & 0xf0) >> 4);
1605 			/*
1606 			 * Mix in the LSB as it's actually pretty significant,
1607 			 * see rdar://14076061
1608 			 */
1609 			sc->sc_defaddr[4] =
1610 			    (((sc->sc_defaddr[4] & 0x0f) << 4) |
1611 			    ((sc->sc_defaddr[4] & 0xf0) >> 4)) ^
1612 			    sc->sc_defaddr[5];
1613 			sc->sc_defaddr[5] = ifp->if_unit & 0xff;
1614 		}
1615 
1616 		fb = 1;
1617 		retry = 0;
1618 		lck_mtx_lock(&bridge_list_mtx);
1619 		LIST_FOREACH(sc2, &bridge_list, sc_list) {
1620 			if (_ether_cmp(sc->sc_defaddr,
1621 			    IF_LLADDR(sc2->sc_ifp)) == 0) {
1622 				retry = 1;
1623 			}
1624 		}
1625 		lck_mtx_unlock(&bridge_list_mtx);
1626 	}
1627 
1628 	sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
1629 
1630 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_LIFECYCLE)) {
1631 		brlog_link(sc);
1632 	}
1633 	error = ifnet_attach(ifp, NULL);
1634 	if (error != 0) {
1635 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_attach failed %d", error);
1636 		goto done;
1637 	}
1638 
1639 	error = ifnet_set_lladdr_and_type(ifp, sc->sc_defaddr, ETHER_ADDR_LEN,
1640 	    IFT_ETHER);
1641 	if (error != 0) {
1642 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr_and_type failed %d",
1643 		    error);
1644 		goto done;
1645 	}
1646 
1647 	ifnet_set_offload(ifp,
1648 	    IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP |
1649 	    IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6 | IFNET_MULTIPAGES);
1650 	error = bridge_set_tso(sc);
1651 	if (error != 0) {
1652 		BRIDGE_LOG(LOG_NOTICE, 0, "bridge_set_tso failed %d", error);
1653 		goto done;
1654 	}
1655 #if BRIDGESTP
1656 	bstp_attach(&sc->sc_stp, &bridge_ops);
1657 #endif /* BRIDGESTP */
1658 
1659 	lck_mtx_lock(&bridge_list_mtx);
1660 	LIST_INSERT_HEAD(&bridge_list, sc, sc_list);
1661 	lck_mtx_unlock(&bridge_list_mtx);
1662 
1663 	/* attach as ethernet */
1664 	error = bpf_attach(ifp, DLT_EN10MB, sizeof(struct ether_header),
1665 	    NULL, NULL);
1666 
1667 done:
1668 	if (error != 0) {
1669 		BRIDGE_LOG(LOG_NOTICE, 0, "failed error %d", error);
1670 		/* TBD: Clean up: sc, sc_rthash etc */
1671 	}
1672 
1673 	return error;
1674 }
1675 
1676 /*
1677  * bridge_clone_destroy:
1678  *
1679  *	Destroy a bridge instance.
1680  */
1681 static int
bridge_clone_destroy(struct ifnet * ifp)1682 bridge_clone_destroy(struct ifnet *ifp)
1683 {
1684 	struct bridge_softc *sc = ifp->if_softc;
1685 	struct bridge_iflist *bif;
1686 	errno_t error;
1687 
1688 	BRIDGE_LOCK(sc);
1689 	if ((sc->sc_flags & SCF_DETACHING)) {
1690 		BRIDGE_UNLOCK(sc);
1691 		return 0;
1692 	}
1693 	sc->sc_flags |= SCF_DETACHING;
1694 
1695 	bridge_ifstop(ifp, 1);
1696 
1697 	bridge_cancel_delayed_call(&sc->sc_resize_call);
1698 
1699 	bridge_cleanup_delayed_call(&sc->sc_resize_call);
1700 	bridge_cleanup_delayed_call(&sc->sc_aging_timer);
1701 
1702 	error = ifnet_set_flags(ifp, 0, IFF_UP);
1703 	if (error != 0) {
1704 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_flags failed %d", error);
1705 	}
1706 
1707 	while ((bif = TAILQ_FIRST(&sc->sc_iflist)) != NULL) {
1708 		bridge_delete_member(sc, bif);
1709 	}
1710 
1711 	while ((bif = TAILQ_FIRST(&sc->sc_spanlist)) != NULL) {
1712 		bridge_delete_span(sc, bif);
1713 	}
1714 	BRIDGE_UNLOCK(sc);
1715 
1716 	error = ifnet_detach(ifp);
1717 	if (error != 0) {
1718 		panic("%s (%d): ifnet_detach(%p) failed %d",
1719 		    __func__, __LINE__, ifp, error);
1720 	}
1721 	return 0;
1722 }
1723 
1724 #define DRVSPEC do { \
1725 	if (ifd->ifd_cmd >= bridge_control_table_size) {                \
1726 	        error = EINVAL;                                         \
1727 	        break;                                                  \
1728 	}                                                               \
1729 	bc = &bridge_control_table[ifd->ifd_cmd];                       \
1730                                                                         \
1731 	if (cmd == SIOCGDRVSPEC &&                                      \
1732 	    (bc->bc_flags & BC_F_COPYOUT) == 0) {                       \
1733 	        error = EINVAL;                                         \
1734 	        break;                                                  \
1735 	} else if (cmd == SIOCSDRVSPEC &&                               \
1736 	    (bc->bc_flags & BC_F_COPYOUT) != 0) {                       \
1737 	        error = EINVAL;                                         \
1738 	        break;                                                  \
1739 	}                                                               \
1740                                                                         \
1741 	if (bc->bc_flags & BC_F_SUSER) {                                \
1742 	        error = kauth_authorize_generic(kauth_cred_get(),       \
1743 	            KAUTH_GENERIC_ISSUSER);                             \
1744 	        if (error)                                              \
1745 	                break;                                          \
1746 	}                                                               \
1747                                                                         \
1748 	if (ifd->ifd_len != bc->bc_argsize ||                           \
1749 	    ifd->ifd_len > sizeof (args)) {                             \
1750 	        error = EINVAL;                                         \
1751 	        break;                                                  \
1752 	}                                                               \
1753                                                                         \
1754 	bzero(&args, sizeof (args));                                    \
1755 	if (bc->bc_flags & BC_F_COPYIN) {                               \
1756 	        error = copyin(ifd->ifd_data, &args, ifd->ifd_len);     \
1757 	        if (error)                                              \
1758 	                break;                                          \
1759 	}                                                               \
1760                                                                         \
1761 	BRIDGE_LOCK(sc);                                                \
1762 	error = (*bc->bc_func)(sc, &args);                              \
1763 	BRIDGE_UNLOCK(sc);                                              \
1764 	if (error)                                                      \
1765 	        break;                                                  \
1766                                                                         \
1767 	if (bc->bc_flags & BC_F_COPYOUT)                                \
1768 	        error = copyout(&args, ifd->ifd_data, ifd->ifd_len);    \
1769 } while (0)
1770 
1771 static boolean_t
interface_needs_input_broadcast(struct ifnet * ifp)1772 interface_needs_input_broadcast(struct ifnet * ifp)
1773 {
1774 	/*
1775 	 * Selectively enable input broadcast only when necessary.
1776 	 * The bridge interface itself attaches a fake protocol
1777 	 * so checking for at least two protocols means that the
1778 	 * interface is being used for something besides bridging
1779 	 * and needs to see broadcast packets from other members.
1780 	 */
1781 	return if_get_protolist(ifp, NULL, 0) >= 2;
1782 }
1783 
1784 static boolean_t
bif_set_input_broadcast(struct bridge_iflist * bif,boolean_t input_broadcast)1785 bif_set_input_broadcast(struct bridge_iflist * bif, boolean_t input_broadcast)
1786 {
1787 	boolean_t       old_input_broadcast;
1788 
1789 	old_input_broadcast = (bif->bif_flags & BIFF_INPUT_BROADCAST) != 0;
1790 	if (input_broadcast) {
1791 		bif->bif_flags |= BIFF_INPUT_BROADCAST;
1792 	} else {
1793 		bif->bif_flags &= ~BIFF_INPUT_BROADCAST;
1794 	}
1795 	return old_input_broadcast != input_broadcast;
1796 }
1797 
1798 /*
1799  * bridge_ioctl:
1800  *
1801  *	Handle a control request from the operator.
1802  */
1803 static errno_t
bridge_ioctl(struct ifnet * ifp,u_long cmd,void * data)1804 bridge_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1805 {
1806 	struct bridge_softc *sc = ifp->if_softc;
1807 	struct ifreq *ifr = (struct ifreq *)data;
1808 	struct bridge_iflist *bif;
1809 	int error = 0;
1810 
1811 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1812 
1813 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_IOCTL,
1814 	    "ifp %s cmd 0x%08lx (%c%c [%lu] %c %lu)",
1815 	    ifp->if_xname, cmd, (cmd & IOC_IN) ? 'I' : ' ',
1816 	    (cmd & IOC_OUT) ? 'O' : ' ', IOCPARM_LEN(cmd),
1817 	    (char)IOCGROUP(cmd), cmd & 0xff);
1818 
1819 	switch (cmd) {
1820 	case SIOCSIFADDR:
1821 	case SIOCAIFADDR:
1822 		ifnet_set_flags(ifp, IFF_UP, IFF_UP);
1823 		break;
1824 
1825 	case SIOCGIFMEDIA32:
1826 	case SIOCGIFMEDIA64: {
1827 		struct ifmediareq *ifmr = (struct ifmediareq *)data;
1828 		user_addr_t user_addr;
1829 
1830 		user_addr = (cmd == SIOCGIFMEDIA64) ?
1831 		    ((struct ifmediareq64 *)ifmr)->ifmu_ulist :
1832 		    CAST_USER_ADDR_T(((struct ifmediareq32 *)ifmr)->ifmu_ulist);
1833 
1834 		ifmr->ifm_status = IFM_AVALID;
1835 		ifmr->ifm_mask = 0;
1836 		ifmr->ifm_count = 1;
1837 
1838 		BRIDGE_LOCK(sc);
1839 		if (!(sc->sc_flags & SCF_DETACHING) &&
1840 		    (sc->sc_flags & SCF_MEDIA_ACTIVE)) {
1841 			ifmr->ifm_status |= IFM_ACTIVE;
1842 			ifmr->ifm_active = ifmr->ifm_current =
1843 			    IFM_ETHER | IFM_AUTO;
1844 		} else {
1845 			ifmr->ifm_active = ifmr->ifm_current = IFM_NONE;
1846 		}
1847 		BRIDGE_UNLOCK(sc);
1848 
1849 		if (user_addr != USER_ADDR_NULL) {
1850 			error = copyout(&ifmr->ifm_current, user_addr,
1851 			    sizeof(int));
1852 		}
1853 		break;
1854 	}
1855 
1856 	case SIOCADDMULTI:
1857 	case SIOCDELMULTI:
1858 		break;
1859 
1860 	case SIOCSDRVSPEC32:
1861 	case SIOCGDRVSPEC32: {
1862 		union {
1863 			struct ifbreq ifbreq;
1864 			struct ifbifconf32 ifbifconf;
1865 			struct ifbareq32 ifbareq;
1866 			struct ifbaconf32 ifbaconf;
1867 			struct ifbrparam ifbrparam;
1868 			struct ifbropreq32 ifbropreq;
1869 		} args;
1870 		struct ifdrv32 *ifd = (struct ifdrv32 *)data;
1871 		const struct bridge_control *bridge_control_table =
1872 		    bridge_control_table32, *bc;
1873 
1874 		DRVSPEC;
1875 
1876 		break;
1877 	}
1878 	case SIOCSDRVSPEC64:
1879 	case SIOCGDRVSPEC64: {
1880 		union {
1881 			struct ifbreq ifbreq;
1882 			struct ifbifconf64 ifbifconf;
1883 			struct ifbareq64 ifbareq;
1884 			struct ifbaconf64 ifbaconf;
1885 			struct ifbrparam ifbrparam;
1886 			struct ifbropreq64 ifbropreq;
1887 		} args;
1888 		struct ifdrv64 *ifd = (struct ifdrv64 *)data;
1889 		const struct bridge_control *bridge_control_table =
1890 		    bridge_control_table64, *bc;
1891 
1892 		DRVSPEC;
1893 
1894 		break;
1895 	}
1896 
1897 	case SIOCSIFFLAGS:
1898 		if (!(ifp->if_flags & IFF_UP) &&
1899 		    (ifp->if_flags & IFF_RUNNING)) {
1900 			/*
1901 			 * If interface is marked down and it is running,
1902 			 * then stop and disable it.
1903 			 */
1904 			BRIDGE_LOCK(sc);
1905 			bridge_ifstop(ifp, 1);
1906 			BRIDGE_UNLOCK(sc);
1907 		} else if ((ifp->if_flags & IFF_UP) &&
1908 		    !(ifp->if_flags & IFF_RUNNING)) {
1909 			/*
1910 			 * If interface is marked up and it is stopped, then
1911 			 * start it.
1912 			 */
1913 			BRIDGE_LOCK(sc);
1914 			error = bridge_init(ifp);
1915 			BRIDGE_UNLOCK(sc);
1916 		}
1917 		break;
1918 
1919 	case SIOCSIFLLADDR:
1920 		error = ifnet_set_lladdr(ifp, ifr->ifr_addr.sa_data,
1921 		    ifr->ifr_addr.sa_len);
1922 		if (error != 0) {
1923 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_IOCTL,
1924 			    "%s SIOCSIFLLADDR error %d", ifp->if_xname,
1925 			    error);
1926 		}
1927 		break;
1928 
1929 	case SIOCSIFMTU:
1930 		if (ifr->ifr_mtu < 576) {
1931 			error = EINVAL;
1932 			break;
1933 		}
1934 		BRIDGE_LOCK(sc);
1935 		if (TAILQ_EMPTY(&sc->sc_iflist)) {
1936 			sc->sc_ifp->if_mtu = ifr->ifr_mtu;
1937 			BRIDGE_UNLOCK(sc);
1938 			break;
1939 		}
1940 		TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1941 			if (bif->bif_ifp->if_mtu != (unsigned)ifr->ifr_mtu) {
1942 				BRIDGE_LOG(LOG_NOTICE, 0,
1943 				    "%s invalid MTU: %u(%s) != %d",
1944 				    sc->sc_ifp->if_xname,
1945 				    bif->bif_ifp->if_mtu,
1946 				    bif->bif_ifp->if_xname, ifr->ifr_mtu);
1947 				error = EINVAL;
1948 				break;
1949 			}
1950 		}
1951 		if (!error) {
1952 			sc->sc_ifp->if_mtu = ifr->ifr_mtu;
1953 		}
1954 		BRIDGE_UNLOCK(sc);
1955 		break;
1956 
1957 	default:
1958 		error = ether_ioctl(ifp, cmd, data);
1959 		if (error != 0 && error != EOPNOTSUPP) {
1960 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_IOCTL,
1961 			    "ifp %s cmd 0x%08lx "
1962 			    "(%c%c [%lu] %c %lu) failed error: %d",
1963 			    ifp->if_xname, cmd,
1964 			    (cmd & IOC_IN) ? 'I' : ' ',
1965 			    (cmd & IOC_OUT) ? 'O' : ' ',
1966 			    IOCPARM_LEN(cmd), (char)IOCGROUP(cmd),
1967 			    cmd & 0xff, error);
1968 		}
1969 		break;
1970 	}
1971 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1972 
1973 	return error;
1974 }
1975 
1976 #if HAS_IF_CAP
1977 /*
1978  * bridge_mutecaps:
1979  *
1980  *	Clear or restore unwanted capabilities on the member interface
1981  */
1982 static void
bridge_mutecaps(struct bridge_softc * sc)1983 bridge_mutecaps(struct bridge_softc *sc)
1984 {
1985 	struct bridge_iflist *bif;
1986 	int enabled, mask;
1987 
1988 	/* Initial bitmask of capabilities to test */
1989 	mask = BRIDGE_IFCAPS_MASK;
1990 
1991 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1992 		/* Every member must support it or its disabled */
1993 		mask &= bif->bif_savedcaps;
1994 	}
1995 
1996 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1997 		enabled = bif->bif_ifp->if_capenable;
1998 		enabled &= ~BRIDGE_IFCAPS_STRIP;
1999 		/* strip off mask bits and enable them again if allowed */
2000 		enabled &= ~BRIDGE_IFCAPS_MASK;
2001 		enabled |= mask;
2002 
2003 		bridge_set_ifcap(sc, bif, enabled);
2004 	}
2005 }
2006 
2007 static void
bridge_set_ifcap(struct bridge_softc * sc,struct bridge_iflist * bif,int set)2008 bridge_set_ifcap(struct bridge_softc *sc, struct bridge_iflist *bif, int set)
2009 {
2010 	struct ifnet *ifp = bif->bif_ifp;
2011 	struct ifreq ifr;
2012 	int error;
2013 
2014 	bzero(&ifr, sizeof(ifr));
2015 	ifr.ifr_reqcap = set;
2016 
2017 	if (ifp->if_capenable != set) {
2018 		IFF_LOCKGIANT(ifp);
2019 		error = (*ifp->if_ioctl)(ifp, SIOCSIFCAP, (caddr_t)&ifr);
2020 		IFF_UNLOCKGIANT(ifp);
2021 		if (error) {
2022 			BRIDGE_LOG(LOG_NOTICE, 0,
2023 			    "%s error setting interface capabilities on %s",
2024 			    sc->sc_ifp->if_xname, ifp->if_xname);
2025 		}
2026 	}
2027 }
2028 #endif /* HAS_IF_CAP */
2029 
2030 static errno_t
siocsifcap(struct ifnet * ifp,uint32_t cap_enable)2031 siocsifcap(struct ifnet * ifp, uint32_t cap_enable)
2032 {
2033 	struct ifreq    ifr;
2034 
2035 	bzero(&ifr, sizeof(ifr));
2036 	ifr.ifr_reqcap = cap_enable;
2037 	return ifnet_ioctl(ifp, 0, SIOCSIFCAP, &ifr);
2038 }
2039 
2040 static const char *
enable_disable_str(boolean_t enable)2041 enable_disable_str(boolean_t enable)
2042 {
2043 	return enable ? "enable" : "disable";
2044 }
2045 
2046 static boolean_t
bridge_set_lro(struct ifnet * ifp,boolean_t enable)2047 bridge_set_lro(struct ifnet * ifp, boolean_t enable)
2048 {
2049 	uint32_t        cap_enable;
2050 	uint32_t        cap_supported;
2051 	boolean_t       changed = FALSE;
2052 	boolean_t       lro_enabled;
2053 
2054 	cap_supported = ifnet_capabilities_supported(ifp);
2055 	if ((cap_supported & IFCAP_LRO) == 0) {
2056 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2057 		    "%s doesn't support LRO",
2058 		    ifp->if_xname);
2059 		goto done;
2060 	}
2061 	cap_enable = ifnet_capabilities_enabled(ifp);
2062 	lro_enabled = (cap_enable & IFCAP_LRO) != 0;
2063 	if (lro_enabled != enable) {
2064 		errno_t         error;
2065 
2066 		if (enable) {
2067 			cap_enable |= IFCAP_LRO;
2068 		} else {
2069 			cap_enable &= ~IFCAP_LRO;
2070 		}
2071 		error = siocsifcap(ifp, cap_enable);
2072 		if (error != 0) {
2073 			BRIDGE_LOG(LOG_NOTICE, 0,
2074 			    "%s %s failed (cap 0x%x) %d",
2075 			    ifp->if_xname,
2076 			    enable_disable_str(enable),
2077 			    cap_enable,
2078 			    error);
2079 		} else {
2080 			changed = TRUE;
2081 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2082 			    "%s %s success (cap 0x%x)",
2083 			    ifp->if_xname,
2084 			    enable_disable_str(enable),
2085 			    cap_enable);
2086 		}
2087 	}
2088 done:
2089 	return changed;
2090 }
2091 
2092 static errno_t
bridge_set_tso(struct bridge_softc * sc)2093 bridge_set_tso(struct bridge_softc *sc)
2094 {
2095 	struct bridge_iflist *bif;
2096 	u_int32_t tso_v4_mtu;
2097 	u_int32_t tso_v6_mtu;
2098 	ifnet_offload_t offload;
2099 	errno_t error = 0;
2100 
2101 	/* By default, support TSO */
2102 	offload = sc->sc_ifp->if_hwassist | IFNET_TSO_IPV4 | IFNET_TSO_IPV6;
2103 	tso_v4_mtu = IP_MAXPACKET;
2104 	tso_v6_mtu = IP_MAXPACKET;
2105 
2106 	/* Use the lowest common denominator of the members */
2107 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2108 		ifnet_t ifp = bif->bif_ifp;
2109 
2110 		if (ifp == NULL) {
2111 			continue;
2112 		}
2113 
2114 		if (offload & IFNET_TSO_IPV4) {
2115 			if (ifp->if_hwassist & IFNET_TSO_IPV4) {
2116 				if (tso_v4_mtu > ifp->if_tso_v4_mtu) {
2117 					tso_v4_mtu = ifp->if_tso_v4_mtu;
2118 				}
2119 			} else {
2120 				offload &= ~IFNET_TSO_IPV4;
2121 				tso_v4_mtu = 0;
2122 			}
2123 		}
2124 		if (offload & IFNET_TSO_IPV6) {
2125 			if (ifp->if_hwassist & IFNET_TSO_IPV6) {
2126 				if (tso_v6_mtu > ifp->if_tso_v6_mtu) {
2127 					tso_v6_mtu = ifp->if_tso_v6_mtu;
2128 				}
2129 			} else {
2130 				offload &= ~IFNET_TSO_IPV6;
2131 				tso_v6_mtu = 0;
2132 			}
2133 		}
2134 	}
2135 
2136 	if (offload != sc->sc_ifp->if_hwassist) {
2137 		error = ifnet_set_offload(sc->sc_ifp, offload);
2138 		if (error != 0) {
2139 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2140 			    "ifnet_set_offload(%s, 0x%x) failed %d",
2141 			    sc->sc_ifp->if_xname, offload, error);
2142 			goto done;
2143 		}
2144 		/*
2145 		 * For ifnet_set_tso_mtu() sake, the TSO MTU must be at least
2146 		 * as large as the interface MTU
2147 		 */
2148 		if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV4) {
2149 			if (tso_v4_mtu < sc->sc_ifp->if_mtu) {
2150 				tso_v4_mtu = sc->sc_ifp->if_mtu;
2151 			}
2152 			error = ifnet_set_tso_mtu(sc->sc_ifp, AF_INET,
2153 			    tso_v4_mtu);
2154 			if (error != 0) {
2155 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2156 				    "ifnet_set_tso_mtu(%s, "
2157 				    "AF_INET, %u) failed %d",
2158 				    sc->sc_ifp->if_xname,
2159 				    tso_v4_mtu, error);
2160 				goto done;
2161 			}
2162 		}
2163 		if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV6) {
2164 			if (tso_v6_mtu < sc->sc_ifp->if_mtu) {
2165 				tso_v6_mtu = sc->sc_ifp->if_mtu;
2166 			}
2167 			error = ifnet_set_tso_mtu(sc->sc_ifp, AF_INET6,
2168 			    tso_v6_mtu);
2169 			if (error != 0) {
2170 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2171 				    "ifnet_set_tso_mtu(%s, "
2172 				    "AF_INET6, %u) failed %d",
2173 				    sc->sc_ifp->if_xname,
2174 				    tso_v6_mtu, error);
2175 				goto done;
2176 			}
2177 		}
2178 	}
2179 done:
2180 	return error;
2181 }
2182 
2183 /*
2184  * bridge_lookup_member:
2185  *
2186  *	Lookup a bridge member interface.
2187  */
2188 static struct bridge_iflist *
bridge_lookup_member(struct bridge_softc * sc,const char * name)2189 bridge_lookup_member(struct bridge_softc *sc, const char *name)
2190 {
2191 	struct bridge_iflist *bif;
2192 	struct ifnet *ifp;
2193 
2194 	BRIDGE_LOCK_ASSERT_HELD(sc);
2195 
2196 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2197 		ifp = bif->bif_ifp;
2198 		if (strcmp(ifp->if_xname, name) == 0) {
2199 			return bif;
2200 		}
2201 	}
2202 
2203 	return NULL;
2204 }
2205 
2206 /*
2207  * bridge_lookup_member_if:
2208  *
2209  *	Lookup a bridge member interface by ifnet*.
2210  */
2211 static struct bridge_iflist *
bridge_lookup_member_if(struct bridge_softc * sc,struct ifnet * member_ifp)2212 bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
2213 {
2214 	struct bridge_iflist *bif;
2215 
2216 	BRIDGE_LOCK_ASSERT_HELD(sc);
2217 
2218 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2219 		if (bif->bif_ifp == member_ifp) {
2220 			return bif;
2221 		}
2222 	}
2223 
2224 	return NULL;
2225 }
2226 
2227 static errno_t
bridge_iff_input(void * cookie,ifnet_t ifp,protocol_family_t protocol,mbuf_t * data,char ** frame_ptr)2228 bridge_iff_input(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2229     mbuf_t *data, char **frame_ptr)
2230 {
2231 #pragma unused(protocol)
2232 	errno_t error = 0;
2233 	struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2234 	struct bridge_softc *sc = bif->bif_sc;
2235 	int included = 0;
2236 	size_t frmlen = 0;
2237 	mbuf_t m = *data;
2238 
2239 	if ((m->m_flags & M_PROTO1)) {
2240 		goto out;
2241 	}
2242 
2243 	if (*frame_ptr >= (char *)mbuf_datastart(m) &&
2244 	    *frame_ptr <= (char *)mbuf_data(m)) {
2245 		included = 1;
2246 		frmlen = (char *)mbuf_data(m) - *frame_ptr;
2247 	}
2248 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2249 	    "%s from %s m 0x%llx data 0x%llx frame 0x%llx %s "
2250 	    "frmlen %lu", sc->sc_ifp->if_xname,
2251 	    ifp->if_xname, (uint64_t)VM_KERNEL_ADDRPERM(m),
2252 	    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)),
2253 	    (uint64_t)VM_KERNEL_ADDRPERM(*frame_ptr),
2254 	    included ? "inside" : "outside", frmlen);
2255 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_MBUF)) {
2256 		brlog_mbuf(m, "bridge_iff_input[", "");
2257 		brlog_ether_header((struct ether_header *)
2258 		    (void *)*frame_ptr);
2259 		brlog_mbuf_data(m, 0, 20);
2260 	}
2261 	if (included == 0) {
2262 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT, "frame_ptr outside mbuf");
2263 		goto out;
2264 	}
2265 
2266 	/* Move data pointer to start of frame to the link layer header */
2267 	(void) mbuf_setdata(m, (char *)mbuf_data(m) - frmlen,
2268 	    mbuf_len(m) + frmlen);
2269 	(void) mbuf_pkthdr_adjustlen(m, frmlen);
2270 
2271 	/* make sure we can access the ethernet header */
2272 	if (mbuf_pkthdr_len(m) < sizeof(struct ether_header)) {
2273 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2274 		    "short frame %lu < %lu",
2275 		    mbuf_pkthdr_len(m), sizeof(struct ether_header));
2276 		goto out;
2277 	}
2278 	if (mbuf_len(m) < sizeof(struct ether_header)) {
2279 		error = mbuf_pullup(data, sizeof(struct ether_header));
2280 		if (error != 0) {
2281 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2282 			    "mbuf_pullup(%lu) failed %d",
2283 			    sizeof(struct ether_header),
2284 			    error);
2285 			error = EJUSTRETURN;
2286 			goto out;
2287 		}
2288 		if (m != *data) {
2289 			m = *data;
2290 			*frame_ptr = mbuf_data(m);
2291 		}
2292 	}
2293 
2294 	error = bridge_input(ifp, data);
2295 
2296 	/* Adjust packet back to original */
2297 	if (error == 0) {
2298 		/* bridge_input might have modified *data */
2299 		if (*data != m) {
2300 			m = *data;
2301 			*frame_ptr = mbuf_data(m);
2302 		}
2303 		(void) mbuf_setdata(m, (char *)mbuf_data(m) + frmlen,
2304 		    mbuf_len(m) - frmlen);
2305 		(void) mbuf_pkthdr_adjustlen(m, -frmlen);
2306 	}
2307 
2308 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_MBUF) &&
2309 	    BRIDGE_DBGF_ENABLED(BR_DBGF_INPUT)) {
2310 		brlog_mbuf(m, "bridge_iff_input]", "");
2311 	}
2312 
2313 out:
2314 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2315 
2316 	return error;
2317 }
2318 
2319 static errno_t
bridge_iff_output(void * cookie,ifnet_t ifp,protocol_family_t protocol,mbuf_t * data)2320 bridge_iff_output(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2321     mbuf_t *data)
2322 {
2323 #pragma unused(protocol)
2324 	errno_t error = 0;
2325 	struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2326 	struct bridge_softc *sc = bif->bif_sc;
2327 	mbuf_t m = *data;
2328 
2329 	if ((m->m_flags & M_PROTO1)) {
2330 		goto out;
2331 	}
2332 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
2333 	    "%s from %s m 0x%llx data 0x%llx",
2334 	    sc->sc_ifp->if_xname, ifp->if_xname,
2335 	    (uint64_t)VM_KERNEL_ADDRPERM(m),
2336 	    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)));
2337 
2338 	error = bridge_member_output(sc, ifp, data);
2339 	if (error != 0 && error != EJUSTRETURN) {
2340 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_OUTPUT,
2341 		    "bridge_member_output failed error %d",
2342 		    error);
2343 	}
2344 out:
2345 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2346 
2347 	return error;
2348 }
2349 
2350 static void
bridge_iff_event(void * cookie,ifnet_t ifp,protocol_family_t protocol,const struct kev_msg * event_msg)2351 bridge_iff_event(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2352     const struct kev_msg *event_msg)
2353 {
2354 #pragma unused(protocol)
2355 	struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2356 	struct bridge_softc *sc = bif->bif_sc;
2357 
2358 	if (event_msg->vendor_code == KEV_VENDOR_APPLE &&
2359 	    event_msg->kev_class == KEV_NETWORK_CLASS &&
2360 	    event_msg->kev_subclass == KEV_DL_SUBCLASS) {
2361 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
2362 		    "%s event_code %u - %s",
2363 		    ifp->if_xname, event_msg->event_code,
2364 		    dlil_kev_dl_code_str(event_msg->event_code));
2365 
2366 		switch (event_msg->event_code) {
2367 		case KEV_DL_LINK_OFF:
2368 		case KEV_DL_LINK_ON: {
2369 			bridge_iflinkevent(ifp);
2370 #if BRIDGESTP
2371 			bstp_linkstate(ifp, event_msg->event_code);
2372 #endif /* BRIDGESTP */
2373 			break;
2374 		}
2375 		case KEV_DL_SIFFLAGS: {
2376 			if ((ifp->if_flags & IFF_UP) == 0) {
2377 				break;
2378 			}
2379 			if ((bif->bif_flags & BIFF_PROMISC) == 0) {
2380 				errno_t error;
2381 
2382 				error = ifnet_set_promiscuous(ifp, 1);
2383 				if (error != 0) {
2384 					BRIDGE_LOG(LOG_NOTICE, 0,
2385 					    "ifnet_set_promiscuous (%s)"
2386 					    " failed %d", ifp->if_xname,
2387 					    error);
2388 				} else {
2389 					bif->bif_flags |= BIFF_PROMISC;
2390 				}
2391 			}
2392 			if ((bif->bif_flags & BIFF_WIFI_INFRA) != 0 &&
2393 			    (bif->bif_flags & BIFF_ALL_MULTI) == 0) {
2394 				errno_t error;
2395 
2396 				error = if_allmulti(ifp, 1);
2397 				if (error != 0) {
2398 					BRIDGE_LOG(LOG_NOTICE, 0,
2399 					    "if_allmulti (%s)"
2400 					    " failed %d", ifp->if_xname,
2401 					    error);
2402 				} else {
2403 					bif->bif_flags |= BIFF_ALL_MULTI;
2404 #ifdef XNU_PLATFORM_AppleTVOS
2405 					ip6_forwarding = 1;
2406 #endif /* XNU_PLATFORM_AppleTVOS */
2407 				}
2408 			}
2409 			break;
2410 		}
2411 		case KEV_DL_IFCAP_CHANGED: {
2412 			BRIDGE_LOCK(sc);
2413 			bridge_set_tso(sc);
2414 			BRIDGE_UNLOCK(sc);
2415 			break;
2416 		}
2417 		case KEV_DL_PROTO_DETACHED:
2418 		case KEV_DL_PROTO_ATTACHED: {
2419 			bridge_proto_attach_changed(ifp);
2420 			break;
2421 		}
2422 		default:
2423 			break;
2424 		}
2425 	}
2426 }
2427 
2428 /*
2429  * bridge_iff_detached:
2430  *
2431  *      Called when our interface filter has been detached from a
2432  *      member interface.
2433  */
2434 static void
bridge_iff_detached(void * cookie,ifnet_t ifp)2435 bridge_iff_detached(void *cookie, ifnet_t ifp)
2436 {
2437 #pragma unused(cookie)
2438 	struct bridge_iflist *bif;
2439 	struct bridge_softc *sc = ifp->if_bridge;
2440 
2441 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2442 
2443 	/* Check if the interface is a bridge member */
2444 	if (sc != NULL) {
2445 		BRIDGE_LOCK(sc);
2446 		bif = bridge_lookup_member_if(sc, ifp);
2447 		if (bif != NULL) {
2448 			bridge_delete_member(sc, bif);
2449 		}
2450 		BRIDGE_UNLOCK(sc);
2451 		return;
2452 	}
2453 	/* Check if the interface is a span port */
2454 	lck_mtx_lock(&bridge_list_mtx);
2455 	LIST_FOREACH(sc, &bridge_list, sc_list) {
2456 		BRIDGE_LOCK(sc);
2457 		TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
2458 		if (ifp == bif->bif_ifp) {
2459 			bridge_delete_span(sc, bif);
2460 			break;
2461 		}
2462 		BRIDGE_UNLOCK(sc);
2463 	}
2464 	lck_mtx_unlock(&bridge_list_mtx);
2465 }
2466 
2467 static errno_t
bridge_proto_input(ifnet_t ifp,protocol_family_t protocol,mbuf_t packet,char * header)2468 bridge_proto_input(ifnet_t ifp, protocol_family_t protocol, mbuf_t packet,
2469     char *header)
2470 {
2471 #pragma unused(protocol, packet, header)
2472 	BRIDGE_LOG(LOG_NOTICE, 0, "%s unexpected packet",
2473 	    ifp->if_xname);
2474 	return 0;
2475 }
2476 
2477 static int
bridge_attach_protocol(struct ifnet * ifp)2478 bridge_attach_protocol(struct ifnet *ifp)
2479 {
2480 	int     error;
2481 	struct ifnet_attach_proto_param reg;
2482 
2483 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2484 	bzero(&reg, sizeof(reg));
2485 	reg.input = bridge_proto_input;
2486 
2487 	error = ifnet_attach_protocol(ifp, PF_BRIDGE, &reg);
2488 	if (error) {
2489 		BRIDGE_LOG(LOG_NOTICE, 0,
2490 		    "ifnet_attach_protocol(%s) failed, %d",
2491 		    ifp->if_xname, error);
2492 	}
2493 
2494 	return error;
2495 }
2496 
2497 static int
bridge_detach_protocol(struct ifnet * ifp)2498 bridge_detach_protocol(struct ifnet *ifp)
2499 {
2500 	int     error;
2501 
2502 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2503 	error = ifnet_detach_protocol(ifp, PF_BRIDGE);
2504 	if (error) {
2505 		BRIDGE_LOG(LOG_NOTICE, 0,
2506 		    "ifnet_detach_protocol(%s) failed, %d",
2507 		    ifp->if_xname, error);
2508 	}
2509 
2510 	return error;
2511 }
2512 
2513 /*
2514  * bridge_delete_member:
2515  *
2516  *	Delete the specified member interface.
2517  */
2518 static void
bridge_delete_member(struct bridge_softc * sc,struct bridge_iflist * bif)2519 bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif)
2520 {
2521 #if SKYWALK
2522 	boolean_t add_netagent = FALSE;
2523 #endif /* SKYWALK */
2524 	uint32_t    bif_flags;
2525 	struct ifnet *ifs = bif->bif_ifp, *bifp = sc->sc_ifp;
2526 	int lladdr_changed = 0, error;
2527 	uint8_t eaddr[ETHER_ADDR_LEN];
2528 	u_int32_t event_code = 0;
2529 
2530 	BRIDGE_LOCK_ASSERT_HELD(sc);
2531 	VERIFY(ifs != NULL);
2532 
2533 	/*
2534 	 * Remove the member from the list first so it cannot be found anymore
2535 	 * when we release the bridge lock below
2536 	 */
2537 	if ((bif->bif_flags & BIFF_IN_MEMBER_LIST) != 0) {
2538 		BRIDGE_XLOCK(sc);
2539 		TAILQ_REMOVE(&sc->sc_iflist, bif, bif_next);
2540 		BRIDGE_XDROP(sc);
2541 	}
2542 	if (sc->sc_mac_nat_bif != NULL) {
2543 		if (bif == sc->sc_mac_nat_bif) {
2544 			bridge_mac_nat_disable(sc);
2545 		} else {
2546 			bridge_mac_nat_flush_entries(sc, bif);
2547 		}
2548 	}
2549 #if BRIDGESTP
2550 	if ((bif->bif_ifflags & IFBIF_STP) != 0) {
2551 		bstp_disable(&bif->bif_stp);
2552 	}
2553 #endif /* BRIDGESTP */
2554 
2555 	/*
2556 	 * If removing the interface that gave the bridge its mac address, set
2557 	 * the mac address of the bridge to the address of the next member, or
2558 	 * to its default address if no members are left.
2559 	 */
2560 	if (bridge_inherit_mac && sc->sc_ifaddr == ifs) {
2561 		ifnet_release(sc->sc_ifaddr);
2562 		if (TAILQ_EMPTY(&sc->sc_iflist)) {
2563 			bcopy(sc->sc_defaddr, eaddr, ETHER_ADDR_LEN);
2564 			sc->sc_ifaddr = NULL;
2565 		} else {
2566 			struct ifnet *fif =
2567 			    TAILQ_FIRST(&sc->sc_iflist)->bif_ifp;
2568 			bcopy(IF_LLADDR(fif), eaddr, ETHER_ADDR_LEN);
2569 			sc->sc_ifaddr = fif;
2570 			ifnet_reference(fif);   /* for sc_ifaddr */
2571 		}
2572 		lladdr_changed = 1;
2573 	}
2574 
2575 #if HAS_IF_CAP
2576 	bridge_mutecaps(sc);    /* recalculate now this interface is removed */
2577 #endif /* HAS_IF_CAP */
2578 
2579 	error = bridge_set_tso(sc);
2580 	if (error != 0) {
2581 		BRIDGE_LOG(LOG_NOTICE, 0, "bridge_set_tso failed %d", error);
2582 	}
2583 
2584 	bridge_rtdelete(sc, ifs, IFBF_FLUSHALL);
2585 
2586 	KASSERT(bif->bif_addrcnt == 0,
2587 	    ("%s: %d bridge routes referenced", __func__, bif->bif_addrcnt));
2588 
2589 	/*
2590 	 * Update link status of the bridge based on its remaining members
2591 	 */
2592 	event_code = bridge_updatelinkstatus(sc);
2593 	bif_flags = bif->bif_flags;
2594 	BRIDGE_UNLOCK(sc);
2595 
2596 	/* only perform these steps if the interface is still attached */
2597 	if (ifnet_is_attached(ifs, 1)) {
2598 #if SKYWALK
2599 		add_netagent = (bif_flags & BIFF_NETAGENT_REMOVED) != 0;
2600 
2601 		if ((bif_flags & BIFF_FLOWSWITCH_ATTACHED) != 0) {
2602 			ifnet_detach_flowswitch_nexus(ifs);
2603 		}
2604 #endif /* SKYWALK */
2605 		/* disable promiscuous mode */
2606 		if ((bif_flags & BIFF_PROMISC) != 0) {
2607 			(void) ifnet_set_promiscuous(ifs, 0);
2608 		}
2609 		/* disable all multi */
2610 		if ((bif_flags & BIFF_ALL_MULTI) != 0) {
2611 			(void)if_allmulti(ifs, 0);
2612 		}
2613 #if HAS_IF_CAP
2614 		/* re-enable any interface capabilities */
2615 		bridge_set_ifcap(sc, bif, bif->bif_savedcaps);
2616 #endif
2617 		/* detach bridge "protocol" */
2618 		if ((bif_flags & BIFF_PROTO_ATTACHED) != 0) {
2619 			(void)bridge_detach_protocol(ifs);
2620 		}
2621 		/* detach interface filter */
2622 		if ((bif_flags & BIFF_FILTER_ATTACHED) != 0) {
2623 			iflt_detach(bif->bif_iff_ref);
2624 		}
2625 		/* re-enable LRO */
2626 		if ((bif_flags & BIFF_LRO_DISABLED) != 0) {
2627 			(void)bridge_set_lro(ifs, TRUE);
2628 		}
2629 		ifnet_decr_iorefcnt(ifs);
2630 	}
2631 
2632 	if (lladdr_changed &&
2633 	    (error = ifnet_set_lladdr(bifp, eaddr, ETHER_ADDR_LEN)) != 0) {
2634 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr failed %d", error);
2635 	}
2636 
2637 	if (event_code != 0) {
2638 		bridge_link_event(bifp, event_code);
2639 	}
2640 
2641 #if BRIDGESTP
2642 	bstp_destroy(&bif->bif_stp);    /* prepare to free */
2643 #endif /* BRIDGESTP */
2644 
2645 	kfree_type(struct bridge_iflist, bif);
2646 	ifs->if_bridge = NULL;
2647 #if SKYWALK
2648 	if (add_netagent && ifnet_is_attached(ifs, 1)) {
2649 		(void)ifnet_add_netagent(ifs);
2650 		ifnet_decr_iorefcnt(ifs);
2651 	}
2652 #endif /* SKYWALK */
2653 
2654 	ifnet_release(ifs);
2655 
2656 	BRIDGE_LOCK(sc);
2657 }
2658 
2659 /*
2660  * bridge_delete_span:
2661  *
2662  *	Delete the specified span interface.
2663  */
2664 static void
bridge_delete_span(struct bridge_softc * sc,struct bridge_iflist * bif)2665 bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif)
2666 {
2667 	BRIDGE_LOCK_ASSERT_HELD(sc);
2668 
2669 	KASSERT(bif->bif_ifp->if_bridge == NULL,
2670 	    ("%s: not a span interface", __func__));
2671 
2672 	ifnet_release(bif->bif_ifp);
2673 
2674 	TAILQ_REMOVE(&sc->sc_spanlist, bif, bif_next);
2675 	kfree_type(struct bridge_iflist, bif);
2676 }
2677 
2678 static int
bridge_ioctl_add(struct bridge_softc * sc,void * arg)2679 bridge_ioctl_add(struct bridge_softc *sc, void *arg)
2680 {
2681 	struct ifbreq *req = arg;
2682 	struct bridge_iflist *bif = NULL;
2683 	struct ifnet *ifs, *bifp = sc->sc_ifp;
2684 	int error = 0, lladdr_changed = 0;
2685 	uint8_t eaddr[ETHER_ADDR_LEN];
2686 	struct iff_filter iff;
2687 	u_int32_t event_code = 0;
2688 	boolean_t input_broadcast;
2689 	int media_active;
2690 	boolean_t wifi_infra = FALSE;
2691 
2692 	ifs = ifunit(req->ifbr_ifsname);
2693 	if (ifs == NULL) {
2694 		return ENOENT;
2695 	}
2696 	if (ifs->if_ioctl == NULL) {    /* must be supported */
2697 		return EINVAL;
2698 	}
2699 
2700 	if (IFNET_IS_INTCOPROC(ifs) || IFNET_IS_MANAGEMENT(ifs)) {
2701 		return EINVAL;
2702 	}
2703 
2704 	/* If it's in the span list, it can't be a member. */
2705 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
2706 		if (ifs == bif->bif_ifp) {
2707 			return EBUSY;
2708 		}
2709 	}
2710 
2711 	if (ifs->if_bridge == sc) {
2712 		return EEXIST;
2713 	}
2714 
2715 	if (ifs->if_bridge != NULL) {
2716 		return EBUSY;
2717 	}
2718 
2719 	switch (ifs->if_type) {
2720 	case IFT_ETHER:
2721 		if (strcmp(ifs->if_name, "en") == 0 &&
2722 		    ifs->if_subfamily == IFNET_SUBFAMILY_WIFI &&
2723 		    (ifs->if_eflags & IFEF_IPV4_ROUTER) == 0) {
2724 			/* XXX is there a better way to identify Wi-Fi STA? */
2725 			wifi_infra = TRUE;
2726 		}
2727 		break;
2728 	case IFT_L2VLAN:
2729 	case IFT_IEEE8023ADLAG:
2730 		break;
2731 	case IFT_GIF:
2732 	/* currently not supported */
2733 	/* FALLTHRU */
2734 	default:
2735 		return EINVAL;
2736 	}
2737 
2738 	/* fail to add the interface if the MTU doesn't match */
2739 	if (!TAILQ_EMPTY(&sc->sc_iflist) && sc->sc_ifp->if_mtu != ifs->if_mtu) {
2740 		BRIDGE_LOG(LOG_NOTICE, 0, "%s invalid MTU for %s",
2741 		    sc->sc_ifp->if_xname,
2742 		    ifs->if_xname);
2743 		return EINVAL;
2744 	}
2745 
2746 	/* there's already an interface that's doing MAC NAT */
2747 	if (wifi_infra && sc->sc_mac_nat_bif != NULL) {
2748 		return EBUSY;
2749 	}
2750 
2751 	/* prevent the interface from detaching while we add the member */
2752 	if (!ifnet_is_attached(ifs, 1)) {
2753 		return ENXIO;
2754 	}
2755 
2756 	/* allocate a new member */
2757 	bif = kalloc_type(struct bridge_iflist, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2758 	bif->bif_ifp = ifs;
2759 	ifnet_reference(ifs);
2760 	bif->bif_ifflags |= IFBIF_LEARNING | IFBIF_DISCOVER;
2761 #if HAS_IF_CAP
2762 	bif->bif_savedcaps = ifs->if_capenable;
2763 #endif /* HAS_IF_CAP */
2764 	bif->bif_sc = sc;
2765 	if (wifi_infra) {
2766 		(void)bridge_mac_nat_enable(sc, bif);
2767 	}
2768 
2769 	if (IFNET_IS_VMNET(ifs)) {
2770 		allocate_vmnet_pf_tags();
2771 	}
2772 	/* Allow the first Ethernet member to define the MTU */
2773 	if (TAILQ_EMPTY(&sc->sc_iflist)) {
2774 		sc->sc_ifp->if_mtu = ifs->if_mtu;
2775 	}
2776 
2777 	/*
2778 	 * Assign the interface's MAC address to the bridge if it's the first
2779 	 * member and the MAC address of the bridge has not been changed from
2780 	 * the default (randomly) generated one.
2781 	 */
2782 	if (bridge_inherit_mac && TAILQ_EMPTY(&sc->sc_iflist) &&
2783 	    _ether_cmp(IF_LLADDR(sc->sc_ifp), sc->sc_defaddr) == 0) {
2784 		bcopy(IF_LLADDR(ifs), eaddr, ETHER_ADDR_LEN);
2785 		sc->sc_ifaddr = ifs;
2786 		ifnet_reference(ifs);   /* for sc_ifaddr */
2787 		lladdr_changed = 1;
2788 	}
2789 
2790 	ifs->if_bridge = sc;
2791 #if BRIDGESTP
2792 	bstp_create(&sc->sc_stp, &bif->bif_stp, bif->bif_ifp);
2793 #endif /* BRIDGESTP */
2794 
2795 #if HAS_IF_CAP
2796 	/* Set interface capabilities to the intersection set of all members */
2797 	bridge_mutecaps(sc);
2798 #endif /* HAS_IF_CAP */
2799 
2800 
2801 	/*
2802 	 * Respect lock ordering with DLIL lock for the following operations
2803 	 */
2804 	BRIDGE_UNLOCK(sc);
2805 
2806 	/* enable promiscuous mode */
2807 	error = ifnet_set_promiscuous(ifs, 1);
2808 	switch (error) {
2809 	case 0:
2810 		bif->bif_flags |= BIFF_PROMISC;
2811 		break;
2812 	case ENETDOWN:
2813 	case EPWROFF:
2814 		BRIDGE_LOG(LOG_NOTICE, 0,
2815 		    "ifnet_set_promiscuous(%s) failed %d, ignoring",
2816 		    ifs->if_xname, error);
2817 		/* Ignore error when device is not up */
2818 		error = 0;
2819 		break;
2820 	default:
2821 		BRIDGE_LOG(LOG_NOTICE, 0,
2822 		    "ifnet_set_promiscuous(%s) failed %d",
2823 		    ifs->if_xname, error);
2824 		BRIDGE_LOCK(sc);
2825 		goto out;
2826 	}
2827 	if (wifi_infra) {
2828 		int this_error;
2829 
2830 		/* Wi-Fi doesn't really support promiscuous, set allmulti */
2831 		bif->bif_flags |= BIFF_WIFI_INFRA;
2832 		this_error = if_allmulti(ifs, 1);
2833 		if (this_error == 0) {
2834 			bif->bif_flags |= BIFF_ALL_MULTI;
2835 #ifdef XNU_PLATFORM_AppleTVOS
2836 			ip6_forwarding = 1;
2837 #endif /* XNU_PLATFORM_AppleTVOS */
2838 		} else {
2839 			BRIDGE_LOG(LOG_NOTICE, 0,
2840 			    "if_allmulti(%s) failed %d, ignoring",
2841 			    ifs->if_xname, this_error);
2842 		}
2843 	}
2844 #if SKYWALK
2845 	/* ensure that the flowswitch is present for native interface */
2846 	if (SKYWALK_NATIVE(ifs)) {
2847 		if (ifnet_attach_flowswitch_nexus(ifs)) {
2848 			bif->bif_flags |= BIFF_FLOWSWITCH_ATTACHED;
2849 		}
2850 	}
2851 	/* remove the netagent on the flowswitch (rdar://75050182) */
2852 	if (if_is_fsw_netagent_enabled()) {
2853 		(void)ifnet_remove_netagent(ifs);
2854 		bif->bif_flags |= BIFF_NETAGENT_REMOVED;
2855 	}
2856 #endif /* SKYWALK */
2857 
2858 	/*
2859 	 * install an interface filter
2860 	 */
2861 	memset(&iff, 0, sizeof(struct iff_filter));
2862 	iff.iff_cookie = bif;
2863 	iff.iff_name = "com.apple.kernel.bsd.net.if_bridge";
2864 	iff.iff_input = bridge_iff_input;
2865 	iff.iff_output = bridge_iff_output;
2866 	iff.iff_event = bridge_iff_event;
2867 	iff.iff_detached = bridge_iff_detached;
2868 	error = dlil_attach_filter(ifs, &iff, &bif->bif_iff_ref,
2869 	    DLIL_IFF_TSO | DLIL_IFF_INTERNAL);
2870 	if (error != 0) {
2871 		BRIDGE_LOG(LOG_NOTICE, 0, "iflt_attach failed %d", error);
2872 		BRIDGE_LOCK(sc);
2873 		goto out;
2874 	}
2875 	bif->bif_flags |= BIFF_FILTER_ATTACHED;
2876 
2877 	/*
2878 	 * install a dummy "bridge" protocol
2879 	 */
2880 	if ((error = bridge_attach_protocol(ifs)) != 0) {
2881 		if (error != 0) {
2882 			BRIDGE_LOG(LOG_NOTICE, 0,
2883 			    "bridge_attach_protocol failed %d", error);
2884 			BRIDGE_LOCK(sc);
2885 			goto out;
2886 		}
2887 	}
2888 	bif->bif_flags |= BIFF_PROTO_ATTACHED;
2889 
2890 	if (lladdr_changed &&
2891 	    (error = ifnet_set_lladdr(bifp, eaddr, ETHER_ADDR_LEN)) != 0) {
2892 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr failed %d", error);
2893 	}
2894 
2895 	media_active = interface_media_active(ifs);
2896 
2897 	/* disable LRO */
2898 	if (bridge_set_lro(ifs, FALSE)) {
2899 		bif->bif_flags |= BIFF_LRO_DISABLED;
2900 	}
2901 
2902 	/*
2903 	 * No failures past this point. Add the member to the list.
2904 	 */
2905 	BRIDGE_LOCK(sc);
2906 	bif->bif_flags |= BIFF_IN_MEMBER_LIST;
2907 	BRIDGE_XLOCK(sc);
2908 	TAILQ_INSERT_TAIL(&sc->sc_iflist, bif, bif_next);
2909 	BRIDGE_XDROP(sc);
2910 
2911 	/* cache the member link status */
2912 	if (media_active != 0) {
2913 		bif->bif_flags |= BIFF_MEDIA_ACTIVE;
2914 	} else {
2915 		bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
2916 	}
2917 
2918 	/* the new member may change the link status of the bridge interface */
2919 	event_code = bridge_updatelinkstatus(sc);
2920 
2921 	/* check whether we need input broadcast or not */
2922 	input_broadcast = interface_needs_input_broadcast(ifs);
2923 	bif_set_input_broadcast(bif, input_broadcast);
2924 	BRIDGE_UNLOCK(sc);
2925 
2926 	if (event_code != 0) {
2927 		bridge_link_event(bifp, event_code);
2928 	}
2929 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
2930 	    "%s input broadcast %s", ifs->if_xname,
2931 	    input_broadcast ? "ENABLED" : "DISABLED");
2932 
2933 	BRIDGE_LOCK(sc);
2934 	bridge_set_tso(sc);
2935 
2936 out:
2937 	/* allow the interface to detach */
2938 	ifnet_decr_iorefcnt(ifs);
2939 
2940 	if (error != 0) {
2941 		if (bif != NULL) {
2942 			bridge_delete_member(sc, bif);
2943 		}
2944 	} else if (IFNET_IS_VMNET(ifs)) {
2945 		INC_ATOMIC_INT64_LIM(net_api_stats.nas_vmnet_total);
2946 	}
2947 
2948 	return error;
2949 }
2950 
2951 static int
bridge_ioctl_del(struct bridge_softc * sc,void * arg)2952 bridge_ioctl_del(struct bridge_softc *sc, void *arg)
2953 {
2954 	struct ifbreq *req = arg;
2955 	struct bridge_iflist *bif;
2956 
2957 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
2958 	if (bif == NULL) {
2959 		return ENOENT;
2960 	}
2961 
2962 	bridge_delete_member(sc, bif);
2963 
2964 	return 0;
2965 }
2966 
2967 static int
bridge_ioctl_purge(struct bridge_softc * sc,void * arg)2968 bridge_ioctl_purge(struct bridge_softc *sc, void *arg)
2969 {
2970 #pragma unused(sc, arg)
2971 	return 0;
2972 }
2973 
2974 static int
bridge_ioctl_gifflags(struct bridge_softc * sc,void * arg)2975 bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
2976 {
2977 	struct ifbreq *req = arg;
2978 	struct bridge_iflist *bif;
2979 
2980 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
2981 	if (bif == NULL) {
2982 		return ENOENT;
2983 	}
2984 
2985 	struct bstp_port *bp;
2986 
2987 	bp = &bif->bif_stp;
2988 	req->ifbr_state = bp->bp_state;
2989 	req->ifbr_priority = bp->bp_priority;
2990 	req->ifbr_path_cost = bp->bp_path_cost;
2991 	req->ifbr_proto = bp->bp_protover;
2992 	req->ifbr_role = bp->bp_role;
2993 	req->ifbr_stpflags = bp->bp_flags;
2994 	req->ifbr_ifsflags = bif->bif_ifflags;
2995 
2996 	/* Copy STP state options as flags */
2997 	if (bp->bp_operedge) {
2998 		req->ifbr_ifsflags |= IFBIF_BSTP_EDGE;
2999 	}
3000 	if (bp->bp_flags & BSTP_PORT_AUTOEDGE) {
3001 		req->ifbr_ifsflags |= IFBIF_BSTP_AUTOEDGE;
3002 	}
3003 	if (bp->bp_ptp_link) {
3004 		req->ifbr_ifsflags |= IFBIF_BSTP_PTP;
3005 	}
3006 	if (bp->bp_flags & BSTP_PORT_AUTOPTP) {
3007 		req->ifbr_ifsflags |= IFBIF_BSTP_AUTOPTP;
3008 	}
3009 	if (bp->bp_flags & BSTP_PORT_ADMEDGE) {
3010 		req->ifbr_ifsflags |= IFBIF_BSTP_ADMEDGE;
3011 	}
3012 	if (bp->bp_flags & BSTP_PORT_ADMCOST) {
3013 		req->ifbr_ifsflags |= IFBIF_BSTP_ADMCOST;
3014 	}
3015 
3016 	req->ifbr_portno = bif->bif_ifp->if_index & 0xfff;
3017 	req->ifbr_addrcnt = bif->bif_addrcnt;
3018 	req->ifbr_addrmax = bif->bif_addrmax;
3019 	req->ifbr_addrexceeded = bif->bif_addrexceeded;
3020 
3021 	return 0;
3022 }
3023 
3024 static int
bridge_ioctl_sifflags(struct bridge_softc * sc,void * arg)3025 bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
3026 {
3027 	struct ifbreq *req = arg;
3028 	struct bridge_iflist *bif;
3029 #if BRIDGESTP
3030 	struct bstp_port *bp;
3031 	int error;
3032 #endif /* BRIDGESTP */
3033 
3034 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3035 	if (bif == NULL) {
3036 		return ENOENT;
3037 	}
3038 
3039 	if (req->ifbr_ifsflags & IFBIF_SPAN) {
3040 		/* SPAN is readonly */
3041 		return EINVAL;
3042 	}
3043 #define _EXCLUSIVE_FLAGS        (IFBIF_CHECKSUM_OFFLOAD | IFBIF_MAC_NAT)
3044 	if ((req->ifbr_ifsflags & _EXCLUSIVE_FLAGS) == _EXCLUSIVE_FLAGS) {
3045 		/* can't specify both MAC-NAT and checksum offload */
3046 		return EINVAL;
3047 	}
3048 	if ((req->ifbr_ifsflags & IFBIF_MAC_NAT) != 0) {
3049 		errno_t error;
3050 
3051 		error = bridge_mac_nat_enable(sc, bif);
3052 		if (error != 0) {
3053 			return error;
3054 		}
3055 	} else if (sc->sc_mac_nat_bif == bif) {
3056 		bridge_mac_nat_disable(sc);
3057 	}
3058 
3059 
3060 #if BRIDGESTP
3061 	if (req->ifbr_ifsflags & IFBIF_STP) {
3062 		if ((bif->bif_ifflags & IFBIF_STP) == 0) {
3063 			error = bstp_enable(&bif->bif_stp);
3064 			if (error) {
3065 				return error;
3066 			}
3067 		}
3068 	} else {
3069 		if ((bif->bif_ifflags & IFBIF_STP) != 0) {
3070 			bstp_disable(&bif->bif_stp);
3071 		}
3072 	}
3073 
3074 	/* Pass on STP flags */
3075 	bp = &bif->bif_stp;
3076 	bstp_set_edge(bp, req->ifbr_ifsflags & IFBIF_BSTP_EDGE ? 1 : 0);
3077 	bstp_set_autoedge(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOEDGE ? 1 : 0);
3078 	bstp_set_ptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_PTP ? 1 : 0);
3079 	bstp_set_autoptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOPTP ? 1 : 0);
3080 #else /* !BRIDGESTP */
3081 	if (req->ifbr_ifsflags & IFBIF_STP) {
3082 		return EOPNOTSUPP;
3083 	}
3084 #endif /* !BRIDGESTP */
3085 
3086 	/* Save the bits relating to the bridge */
3087 	bif->bif_ifflags = req->ifbr_ifsflags & IFBIFMASK;
3088 
3089 
3090 	return 0;
3091 }
3092 
3093 static int
bridge_ioctl_scache(struct bridge_softc * sc,void * arg)3094 bridge_ioctl_scache(struct bridge_softc *sc, void *arg)
3095 {
3096 	struct ifbrparam *param = arg;
3097 
3098 	sc->sc_brtmax = param->ifbrp_csize;
3099 	bridge_rttrim(sc);
3100 	return 0;
3101 }
3102 
3103 static int
bridge_ioctl_gcache(struct bridge_softc * sc,void * arg)3104 bridge_ioctl_gcache(struct bridge_softc *sc, void *arg)
3105 {
3106 	struct ifbrparam *param = arg;
3107 
3108 	param->ifbrp_csize = sc->sc_brtmax;
3109 
3110 	return 0;
3111 }
3112 
3113 #define BRIDGE_IOCTL_GIFS do { \
3114 	struct bridge_iflist *bif;                                      \
3115 	struct ifbreq breq;                                             \
3116 	char *buf, *outbuf;                                             \
3117 	unsigned int count, buflen, len;                                \
3118                                                                         \
3119 	count = 0;                                                      \
3120 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next)                    \
3121 	        count++;                                                \
3122 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)                  \
3123 	        count++;                                                \
3124                                                                         \
3125 	buflen = sizeof (breq) * count;                                 \
3126 	if (bifc->ifbic_len == 0) {                                     \
3127 	        bifc->ifbic_len = buflen;                               \
3128 	        return (0);                                             \
3129 	}                                                               \
3130 	BRIDGE_UNLOCK(sc);                                              \
3131 	outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO);        \
3132 	BRIDGE_LOCK(sc);                                                \
3133                                                                         \
3134 	count = 0;                                                      \
3135 	buf = outbuf;                                                   \
3136 	len = min(bifc->ifbic_len, buflen);                             \
3137 	bzero(&breq, sizeof (breq));                                    \
3138 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {                  \
3139 	        if (len < sizeof (breq))                                \
3140 	                break;                                          \
3141                                                                         \
3142 	        snprintf(breq.ifbr_ifsname, sizeof (breq.ifbr_ifsname), \
3143 	            "%s", bif->bif_ifp->if_xname);                      \
3144 	/* Fill in the ifbreq structure */                      \
3145 	        error = bridge_ioctl_gifflags(sc, &breq);               \
3146 	        if (error)                                              \
3147 	                break;                                          \
3148 	        memcpy(buf, &breq, sizeof (breq));                      \
3149 	        count++;                                                \
3150 	        buf += sizeof (breq);                                   \
3151 	        len -= sizeof (breq);                                   \
3152 	}                                                               \
3153 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {                \
3154 	        if (len < sizeof (breq))                                \
3155 	                break;                                          \
3156                                                                         \
3157 	        snprintf(breq.ifbr_ifsname,                             \
3158 	                 sizeof (breq.ifbr_ifsname),                    \
3159 	                 "%s", bif->bif_ifp->if_xname);                 \
3160 	        breq.ifbr_ifsflags = bif->bif_ifflags;                  \
3161 	        breq.ifbr_portno                                        \
3162 	                = bif->bif_ifp->if_index & 0xfff;               \
3163 	        memcpy(buf, &breq, sizeof (breq));                      \
3164 	        count++;                                                \
3165 	        buf += sizeof (breq);                                   \
3166 	        len -= sizeof (breq);                                   \
3167 	}                                                               \
3168                                                                         \
3169 	BRIDGE_UNLOCK(sc);                                              \
3170 	bifc->ifbic_len = sizeof (breq) * count;                        \
3171 	error = copyout(outbuf, bifc->ifbic_req, bifc->ifbic_len);      \
3172 	BRIDGE_LOCK(sc);                                                \
3173 	kfree_data(outbuf, buflen);                                     \
3174 } while (0)
3175 
3176 static int
bridge_ioctl_gifs64(struct bridge_softc * sc,void * arg)3177 bridge_ioctl_gifs64(struct bridge_softc *sc, void *arg)
3178 {
3179 	struct ifbifconf64 *bifc = arg;
3180 	int error = 0;
3181 
3182 	BRIDGE_IOCTL_GIFS;
3183 
3184 	return error;
3185 }
3186 
3187 static int
bridge_ioctl_gifs32(struct bridge_softc * sc,void * arg)3188 bridge_ioctl_gifs32(struct bridge_softc *sc, void *arg)
3189 {
3190 	struct ifbifconf32 *bifc = arg;
3191 	int error = 0;
3192 
3193 	BRIDGE_IOCTL_GIFS;
3194 
3195 	return error;
3196 }
3197 
3198 #define BRIDGE_IOCTL_RTS do {                                               \
3199 	struct bridge_rtnode *brt;                                          \
3200 	char *buf;                                                          \
3201 	char *outbuf = NULL;                                                \
3202 	unsigned int count, buflen, len;                                    \
3203 	unsigned long now;                                                  \
3204                                                                             \
3205 	if (bac->ifbac_len == 0)                                            \
3206 	        return (0);                                                 \
3207                                                                             \
3208 	bzero(&bareq, sizeof (bareq));                                      \
3209 	count = 0;                                                          \
3210 	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list)                         \
3211 	        count++;                                                    \
3212 	buflen = sizeof (bareq) * count;                                    \
3213                                                                             \
3214 	BRIDGE_UNLOCK(sc);                                                  \
3215 	outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO);            \
3216 	BRIDGE_LOCK(sc);                                                    \
3217                                                                             \
3218 	count = 0;                                                          \
3219 	buf = outbuf;                                                       \
3220 	len = min(bac->ifbac_len, buflen);                                  \
3221 	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {                       \
3222 	        if (len < sizeof (bareq))                                   \
3223 	                goto out;                                           \
3224 	        snprintf(bareq.ifba_ifsname, sizeof (bareq.ifba_ifsname),   \
3225 	                 "%s", brt->brt_ifp->if_xname);                     \
3226 	        memcpy(bareq.ifba_dst, brt->brt_addr, sizeof (brt->brt_addr)); \
3227 	        bareq.ifba_vlan = brt->brt_vlan;                            \
3228 	        if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {   \
3229 	                now = (unsigned long) net_uptime();                 \
3230 	                if (now < brt->brt_expire)                          \
3231 	                        bareq.ifba_expire =                         \
3232 	                            brt->brt_expire - now;                  \
3233 	        } else                                                      \
3234 	                bareq.ifba_expire = 0;                              \
3235 	        bareq.ifba_flags = brt->brt_flags;                          \
3236                                                                             \
3237 	        memcpy(buf, &bareq, sizeof (bareq));                        \
3238 	        count++;                                                    \
3239 	        buf += sizeof (bareq);                                      \
3240 	        len -= sizeof (bareq);                                      \
3241 	}                                                                   \
3242 out:                                                                        \
3243 	bac->ifbac_len = sizeof (bareq) * count;                            \
3244 	if (outbuf != NULL) {                                               \
3245 	        BRIDGE_UNLOCK(sc);                                          \
3246 	        error = copyout(outbuf, bac->ifbac_req, bac->ifbac_len);    \
3247 	        kfree_data(outbuf, buflen);                                 \
3248 	        BRIDGE_LOCK(sc);                                            \
3249 	}                                                                   \
3250 	return (error);                                                     \
3251 } while (0)
3252 
3253 static int
bridge_ioctl_rts64(struct bridge_softc * sc,void * arg)3254 bridge_ioctl_rts64(struct bridge_softc *sc, void *arg)
3255 {
3256 	struct ifbaconf64 *bac = arg;
3257 	struct ifbareq64 bareq;
3258 	int error = 0;
3259 
3260 	BRIDGE_IOCTL_RTS;
3261 	return error;
3262 }
3263 
3264 static int
bridge_ioctl_rts32(struct bridge_softc * sc,void * arg)3265 bridge_ioctl_rts32(struct bridge_softc *sc, void *arg)
3266 {
3267 	struct ifbaconf32 *bac = arg;
3268 	struct ifbareq32 bareq;
3269 	int error = 0;
3270 
3271 	BRIDGE_IOCTL_RTS;
3272 	return error;
3273 }
3274 
3275 static int
bridge_ioctl_saddr32(struct bridge_softc * sc,void * arg)3276 bridge_ioctl_saddr32(struct bridge_softc *sc, void *arg)
3277 {
3278 	struct ifbareq32 *req = arg;
3279 	struct bridge_iflist *bif;
3280 	int error;
3281 
3282 	bif = bridge_lookup_member(sc, req->ifba_ifsname);
3283 	if (bif == NULL) {
3284 		return ENOENT;
3285 	}
3286 
3287 	error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
3288 	    req->ifba_flags);
3289 
3290 	return error;
3291 }
3292 
3293 static int
bridge_ioctl_saddr64(struct bridge_softc * sc,void * arg)3294 bridge_ioctl_saddr64(struct bridge_softc *sc, void *arg)
3295 {
3296 	struct ifbareq64 *req = arg;
3297 	struct bridge_iflist *bif;
3298 	int error;
3299 
3300 	bif = bridge_lookup_member(sc, req->ifba_ifsname);
3301 	if (bif == NULL) {
3302 		return ENOENT;
3303 	}
3304 
3305 	error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
3306 	    req->ifba_flags);
3307 
3308 	return error;
3309 }
3310 
3311 static int
bridge_ioctl_sto(struct bridge_softc * sc,void * arg)3312 bridge_ioctl_sto(struct bridge_softc *sc, void *arg)
3313 {
3314 	struct ifbrparam *param = arg;
3315 
3316 	sc->sc_brttimeout = param->ifbrp_ctime;
3317 	return 0;
3318 }
3319 
3320 static int
bridge_ioctl_gto(struct bridge_softc * sc,void * arg)3321 bridge_ioctl_gto(struct bridge_softc *sc, void *arg)
3322 {
3323 	struct ifbrparam *param = arg;
3324 
3325 	param->ifbrp_ctime = sc->sc_brttimeout;
3326 	return 0;
3327 }
3328 
3329 static int
bridge_ioctl_daddr32(struct bridge_softc * sc,void * arg)3330 bridge_ioctl_daddr32(struct bridge_softc *sc, void *arg)
3331 {
3332 	struct ifbareq32 *req = arg;
3333 
3334 	return bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan);
3335 }
3336 
3337 static int
bridge_ioctl_daddr64(struct bridge_softc * sc,void * arg)3338 bridge_ioctl_daddr64(struct bridge_softc *sc, void *arg)
3339 {
3340 	struct ifbareq64 *req = arg;
3341 
3342 	return bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan);
3343 }
3344 
3345 static int
bridge_ioctl_flush(struct bridge_softc * sc,void * arg)3346 bridge_ioctl_flush(struct bridge_softc *sc, void *arg)
3347 {
3348 	struct ifbreq *req = arg;
3349 
3350 	bridge_rtflush(sc, req->ifbr_ifsflags);
3351 	return 0;
3352 }
3353 
3354 static int
bridge_ioctl_gpri(struct bridge_softc * sc,void * arg)3355 bridge_ioctl_gpri(struct bridge_softc *sc, void *arg)
3356 {
3357 	struct ifbrparam *param = arg;
3358 	struct bstp_state *bs = &sc->sc_stp;
3359 
3360 	param->ifbrp_prio = bs->bs_bridge_priority;
3361 	return 0;
3362 }
3363 
3364 static int
bridge_ioctl_spri(struct bridge_softc * sc,void * arg)3365 bridge_ioctl_spri(struct bridge_softc *sc, void *arg)
3366 {
3367 #if BRIDGESTP
3368 	struct ifbrparam *param = arg;
3369 
3370 	return bstp_set_priority(&sc->sc_stp, param->ifbrp_prio);
3371 #else /* !BRIDGESTP */
3372 #pragma unused(sc, arg)
3373 	return EOPNOTSUPP;
3374 #endif /* !BRIDGESTP */
3375 }
3376 
3377 static int
bridge_ioctl_ght(struct bridge_softc * sc,void * arg)3378 bridge_ioctl_ght(struct bridge_softc *sc, void *arg)
3379 {
3380 	struct ifbrparam *param = arg;
3381 	struct bstp_state *bs = &sc->sc_stp;
3382 
3383 	param->ifbrp_hellotime = bs->bs_bridge_htime >> 8;
3384 	return 0;
3385 }
3386 
3387 static int
bridge_ioctl_sht(struct bridge_softc * sc,void * arg)3388 bridge_ioctl_sht(struct bridge_softc *sc, void *arg)
3389 {
3390 #if BRIDGESTP
3391 	struct ifbrparam *param = arg;
3392 
3393 	return bstp_set_htime(&sc->sc_stp, param->ifbrp_hellotime);
3394 #else /* !BRIDGESTP */
3395 #pragma unused(sc, arg)
3396 	return EOPNOTSUPP;
3397 #endif /* !BRIDGESTP */
3398 }
3399 
3400 static int
bridge_ioctl_gfd(struct bridge_softc * sc,void * arg)3401 bridge_ioctl_gfd(struct bridge_softc *sc, void *arg)
3402 {
3403 	struct ifbrparam *param;
3404 	struct bstp_state *bs;
3405 
3406 	param = arg;
3407 	bs = &sc->sc_stp;
3408 	param->ifbrp_fwddelay = bs->bs_bridge_fdelay >> 8;
3409 	return 0;
3410 }
3411 
3412 static int
bridge_ioctl_sfd(struct bridge_softc * sc,void * arg)3413 bridge_ioctl_sfd(struct bridge_softc *sc, void *arg)
3414 {
3415 #if BRIDGESTP
3416 	struct ifbrparam *param = arg;
3417 
3418 	return bstp_set_fdelay(&sc->sc_stp, param->ifbrp_fwddelay);
3419 #else /* !BRIDGESTP */
3420 #pragma unused(sc, arg)
3421 	return EOPNOTSUPP;
3422 #endif /* !BRIDGESTP */
3423 }
3424 
3425 static int
bridge_ioctl_gma(struct bridge_softc * sc,void * arg)3426 bridge_ioctl_gma(struct bridge_softc *sc, void *arg)
3427 {
3428 	struct ifbrparam *param;
3429 	struct bstp_state *bs;
3430 
3431 	param = arg;
3432 	bs = &sc->sc_stp;
3433 	param->ifbrp_maxage = bs->bs_bridge_max_age >> 8;
3434 	return 0;
3435 }
3436 
3437 static int
bridge_ioctl_sma(struct bridge_softc * sc,void * arg)3438 bridge_ioctl_sma(struct bridge_softc *sc, void *arg)
3439 {
3440 #if BRIDGESTP
3441 	struct ifbrparam *param = arg;
3442 
3443 	return bstp_set_maxage(&sc->sc_stp, param->ifbrp_maxage);
3444 #else /* !BRIDGESTP */
3445 #pragma unused(sc, arg)
3446 	return EOPNOTSUPP;
3447 #endif /* !BRIDGESTP */
3448 }
3449 
3450 static int
bridge_ioctl_sifprio(struct bridge_softc * sc,void * arg)3451 bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg)
3452 {
3453 #if BRIDGESTP
3454 	struct ifbreq *req = arg;
3455 	struct bridge_iflist *bif;
3456 
3457 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3458 	if (bif == NULL) {
3459 		return ENOENT;
3460 	}
3461 
3462 	return bstp_set_port_priority(&bif->bif_stp, req->ifbr_priority);
3463 #else /* !BRIDGESTP */
3464 #pragma unused(sc, arg)
3465 	return EOPNOTSUPP;
3466 #endif /* !BRIDGESTP */
3467 }
3468 
3469 static int
bridge_ioctl_sifcost(struct bridge_softc * sc,void * arg)3470 bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg)
3471 {
3472 #if BRIDGESTP
3473 	struct ifbreq *req = arg;
3474 	struct bridge_iflist *bif;
3475 
3476 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3477 	if (bif == NULL) {
3478 		return ENOENT;
3479 	}
3480 
3481 	return bstp_set_path_cost(&bif->bif_stp, req->ifbr_path_cost);
3482 #else /* !BRIDGESTP */
3483 #pragma unused(sc, arg)
3484 	return EOPNOTSUPP;
3485 #endif /* !BRIDGESTP */
3486 }
3487 
3488 static int
bridge_ioctl_gfilt(struct bridge_softc * sc,void * arg)3489 bridge_ioctl_gfilt(struct bridge_softc *sc, void *arg)
3490 {
3491 	struct ifbrparam *param = arg;
3492 
3493 	param->ifbrp_filter = sc->sc_filter_flags;
3494 
3495 	return 0;
3496 }
3497 
3498 static int
bridge_ioctl_sfilt(struct bridge_softc * sc,void * arg)3499 bridge_ioctl_sfilt(struct bridge_softc *sc, void *arg)
3500 {
3501 	struct ifbrparam *param = arg;
3502 
3503 	if (param->ifbrp_filter & ~IFBF_FILT_MASK) {
3504 		return EINVAL;
3505 	}
3506 
3507 	if (param->ifbrp_filter & IFBF_FILT_USEIPF) {
3508 		return EINVAL;
3509 	}
3510 
3511 	sc->sc_filter_flags = param->ifbrp_filter;
3512 
3513 	return 0;
3514 }
3515 
3516 static int
bridge_ioctl_sifmaxaddr(struct bridge_softc * sc,void * arg)3517 bridge_ioctl_sifmaxaddr(struct bridge_softc *sc, void *arg)
3518 {
3519 	struct ifbreq *req = arg;
3520 	struct bridge_iflist *bif;
3521 
3522 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3523 	if (bif == NULL) {
3524 		return ENOENT;
3525 	}
3526 
3527 	bif->bif_addrmax = req->ifbr_addrmax;
3528 	return 0;
3529 }
3530 
3531 static int
bridge_ioctl_addspan(struct bridge_softc * sc,void * arg)3532 bridge_ioctl_addspan(struct bridge_softc *sc, void *arg)
3533 {
3534 	struct ifbreq *req = arg;
3535 	struct bridge_iflist *bif = NULL;
3536 	struct ifnet *ifs;
3537 
3538 	ifs = ifunit(req->ifbr_ifsname);
3539 	if (ifs == NULL) {
3540 		return ENOENT;
3541 	}
3542 
3543 	if (IFNET_IS_INTCOPROC(ifs) || IFNET_IS_MANAGEMENT(ifs)) {
3544 		return EINVAL;
3545 	}
3546 
3547 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
3548 	if (ifs == bif->bif_ifp) {
3549 		return EBUSY;
3550 	}
3551 
3552 	if (ifs->if_bridge != NULL) {
3553 		return EBUSY;
3554 	}
3555 
3556 	switch (ifs->if_type) {
3557 	case IFT_ETHER:
3558 	case IFT_L2VLAN:
3559 	case IFT_IEEE8023ADLAG:
3560 		break;
3561 	case IFT_GIF:
3562 	/* currently not supported */
3563 	/* FALLTHRU */
3564 	default:
3565 		return EINVAL;
3566 	}
3567 
3568 	bif = kalloc_type(struct bridge_iflist, Z_WAITOK | Z_ZERO | Z_NOFAIL);
3569 
3570 	bif->bif_ifp = ifs;
3571 	bif->bif_ifflags = IFBIF_SPAN;
3572 
3573 	ifnet_reference(bif->bif_ifp);
3574 
3575 	TAILQ_INSERT_HEAD(&sc->sc_spanlist, bif, bif_next);
3576 
3577 	return 0;
3578 }
3579 
3580 static int
bridge_ioctl_delspan(struct bridge_softc * sc,void * arg)3581 bridge_ioctl_delspan(struct bridge_softc *sc, void *arg)
3582 {
3583 	struct ifbreq *req = arg;
3584 	struct bridge_iflist *bif;
3585 	struct ifnet *ifs;
3586 
3587 	ifs = ifunit(req->ifbr_ifsname);
3588 	if (ifs == NULL) {
3589 		return ENOENT;
3590 	}
3591 
3592 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
3593 	if (ifs == bif->bif_ifp) {
3594 		break;
3595 	}
3596 
3597 	if (bif == NULL) {
3598 		return ENOENT;
3599 	}
3600 
3601 	bridge_delete_span(sc, bif);
3602 
3603 	return 0;
3604 }
3605 
3606 #define BRIDGE_IOCTL_GBPARAM do {                                       \
3607 	struct bstp_state *bs = &sc->sc_stp;                            \
3608 	struct bstp_port *root_port;                                    \
3609                                                                         \
3610 	req->ifbop_maxage = bs->bs_bridge_max_age >> 8;                 \
3611 	req->ifbop_hellotime = bs->bs_bridge_htime >> 8;                \
3612 	req->ifbop_fwddelay = bs->bs_bridge_fdelay >> 8;                \
3613                                                                         \
3614 	root_port = bs->bs_root_port;                                   \
3615 	if (root_port == NULL)                                          \
3616 	        req->ifbop_root_port = 0;                               \
3617 	else                                                            \
3618 	        req->ifbop_root_port = root_port->bp_ifp->if_index;     \
3619                                                                         \
3620 	req->ifbop_holdcount = bs->bs_txholdcount;                      \
3621 	req->ifbop_priority = bs->bs_bridge_priority;                   \
3622 	req->ifbop_protocol = bs->bs_protover;                          \
3623 	req->ifbop_root_path_cost = bs->bs_root_pv.pv_cost;             \
3624 	req->ifbop_bridgeid = bs->bs_bridge_pv.pv_dbridge_id;           \
3625 	req->ifbop_designated_root = bs->bs_root_pv.pv_root_id;         \
3626 	req->ifbop_designated_bridge = bs->bs_root_pv.pv_dbridge_id;    \
3627 	req->ifbop_last_tc_time.tv_sec = bs->bs_last_tc_time.tv_sec;    \
3628 	req->ifbop_last_tc_time.tv_usec = bs->bs_last_tc_time.tv_usec;  \
3629 } while (0)
3630 
3631 static int
bridge_ioctl_gbparam32(struct bridge_softc * sc,void * arg)3632 bridge_ioctl_gbparam32(struct bridge_softc *sc, void *arg)
3633 {
3634 	struct ifbropreq32 *req = arg;
3635 
3636 	BRIDGE_IOCTL_GBPARAM;
3637 	return 0;
3638 }
3639 
3640 static int
bridge_ioctl_gbparam64(struct bridge_softc * sc,void * arg)3641 bridge_ioctl_gbparam64(struct bridge_softc *sc, void *arg)
3642 {
3643 	struct ifbropreq64 *req = arg;
3644 
3645 	BRIDGE_IOCTL_GBPARAM;
3646 	return 0;
3647 }
3648 
3649 static int
bridge_ioctl_grte(struct bridge_softc * sc,void * arg)3650 bridge_ioctl_grte(struct bridge_softc *sc, void *arg)
3651 {
3652 	struct ifbrparam *param = arg;
3653 
3654 	param->ifbrp_cexceeded = sc->sc_brtexceeded;
3655 	return 0;
3656 }
3657 
3658 #define BRIDGE_IOCTL_GIFSSTP do {                                       \
3659 	struct bridge_iflist *bif;                                      \
3660 	struct bstp_port *bp;                                           \
3661 	struct ifbpstpreq bpreq;                                        \
3662 	char *buf, *outbuf;                                             \
3663 	unsigned int count, buflen, len;                                \
3664                                                                         \
3665 	count = 0;                                                      \
3666 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {                  \
3667 	        if ((bif->bif_ifflags & IFBIF_STP) != 0)                \
3668 	                count++;                                        \
3669 	}                                                               \
3670                                                                         \
3671 	buflen = sizeof (bpreq) * count;                                \
3672 	if (bifstp->ifbpstp_len == 0) {                                 \
3673 	        bifstp->ifbpstp_len = buflen;                           \
3674 	        return (0);                                             \
3675 	}                                                               \
3676                                                                         \
3677 	BRIDGE_UNLOCK(sc);                                              \
3678 	outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO);        \
3679 	BRIDGE_LOCK(sc);                                                \
3680                                                                         \
3681 	count = 0;                                                      \
3682 	buf = outbuf;                                                   \
3683 	len = min(bifstp->ifbpstp_len, buflen);                         \
3684 	bzero(&bpreq, sizeof (bpreq));                                  \
3685 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {                  \
3686 	        if (len < sizeof (bpreq))                               \
3687 	                break;                                          \
3688                                                                         \
3689 	        if ((bif->bif_ifflags & IFBIF_STP) == 0)                \
3690 	                continue;                                       \
3691                                                                         \
3692 	        bp = &bif->bif_stp;                                     \
3693 	        bpreq.ifbp_portno = bif->bif_ifp->if_index & 0xfff;     \
3694 	        bpreq.ifbp_fwd_trans = bp->bp_forward_transitions;      \
3695 	        bpreq.ifbp_design_cost = bp->bp_desg_pv.pv_cost;        \
3696 	        bpreq.ifbp_design_port = bp->bp_desg_pv.pv_port_id;     \
3697 	        bpreq.ifbp_design_bridge = bp->bp_desg_pv.pv_dbridge_id; \
3698 	        bpreq.ifbp_design_root = bp->bp_desg_pv.pv_root_id;     \
3699                                                                         \
3700 	        memcpy(buf, &bpreq, sizeof (bpreq));                    \
3701 	        count++;                                                \
3702 	        buf += sizeof (bpreq);                                  \
3703 	        len -= sizeof (bpreq);                                  \
3704 	}                                                               \
3705                                                                         \
3706 	BRIDGE_UNLOCK(sc);                                              \
3707 	bifstp->ifbpstp_len = sizeof (bpreq) * count;                   \
3708 	error = copyout(outbuf, bifstp->ifbpstp_req, bifstp->ifbpstp_len); \
3709 	BRIDGE_LOCK(sc);                                                \
3710 	kfree_data(outbuf, buflen);                                     \
3711 	return (error);                                                 \
3712 } while (0)
3713 
3714 static int
bridge_ioctl_gifsstp32(struct bridge_softc * sc,void * arg)3715 bridge_ioctl_gifsstp32(struct bridge_softc *sc, void *arg)
3716 {
3717 	struct ifbpstpconf32 *bifstp = arg;
3718 	int error = 0;
3719 
3720 	BRIDGE_IOCTL_GIFSSTP;
3721 	return error;
3722 }
3723 
3724 static int
bridge_ioctl_gifsstp64(struct bridge_softc * sc,void * arg)3725 bridge_ioctl_gifsstp64(struct bridge_softc *sc, void *arg)
3726 {
3727 	struct ifbpstpconf64 *bifstp = arg;
3728 	int error = 0;
3729 
3730 	BRIDGE_IOCTL_GIFSSTP;
3731 	return error;
3732 }
3733 
3734 static int
bridge_ioctl_sproto(struct bridge_softc * sc,void * arg)3735 bridge_ioctl_sproto(struct bridge_softc *sc, void *arg)
3736 {
3737 #if BRIDGESTP
3738 	struct ifbrparam *param = arg;
3739 
3740 	return bstp_set_protocol(&sc->sc_stp, param->ifbrp_proto);
3741 #else /* !BRIDGESTP */
3742 #pragma unused(sc, arg)
3743 	return EOPNOTSUPP;
3744 #endif /* !BRIDGESTP */
3745 }
3746 
3747 static int
bridge_ioctl_stxhc(struct bridge_softc * sc,void * arg)3748 bridge_ioctl_stxhc(struct bridge_softc *sc, void *arg)
3749 {
3750 #if BRIDGESTP
3751 	struct ifbrparam *param = arg;
3752 
3753 	return bstp_set_holdcount(&sc->sc_stp, param->ifbrp_txhc);
3754 #else /* !BRIDGESTP */
3755 #pragma unused(sc, arg)
3756 	return EOPNOTSUPP;
3757 #endif /* !BRIDGESTP */
3758 }
3759 
3760 
3761 static int
bridge_ioctl_ghostfilter(struct bridge_softc * sc,void * arg)3762 bridge_ioctl_ghostfilter(struct bridge_softc *sc, void *arg)
3763 {
3764 	struct ifbrhostfilter *req = arg;
3765 	struct bridge_iflist *bif;
3766 
3767 	bif = bridge_lookup_member(sc, req->ifbrhf_ifsname);
3768 	if (bif == NULL) {
3769 		return ENOENT;
3770 	}
3771 
3772 	bzero(req, sizeof(struct ifbrhostfilter));
3773 	if (bif->bif_flags & BIFF_HOST_FILTER) {
3774 		req->ifbrhf_flags |= IFBRHF_ENABLED;
3775 		bcopy(bif->bif_hf_hwsrc, req->ifbrhf_hwsrca,
3776 		    ETHER_ADDR_LEN);
3777 		req->ifbrhf_ipsrc = bif->bif_hf_ipsrc.s_addr;
3778 	}
3779 	return 0;
3780 }
3781 
3782 static int
bridge_ioctl_shostfilter(struct bridge_softc * sc,void * arg)3783 bridge_ioctl_shostfilter(struct bridge_softc *sc, void *arg)
3784 {
3785 	struct ifbrhostfilter *req = arg;
3786 	struct bridge_iflist *bif;
3787 
3788 	bif = bridge_lookup_member(sc, req->ifbrhf_ifsname);
3789 	if (bif == NULL) {
3790 		return ENOENT;
3791 	}
3792 
3793 	if (req->ifbrhf_flags & IFBRHF_ENABLED) {
3794 		bif->bif_flags |= BIFF_HOST_FILTER;
3795 
3796 		if (req->ifbrhf_flags & IFBRHF_HWSRC) {
3797 			bcopy(req->ifbrhf_hwsrca, bif->bif_hf_hwsrc,
3798 			    ETHER_ADDR_LEN);
3799 			if (bcmp(req->ifbrhf_hwsrca, ethernulladdr,
3800 			    ETHER_ADDR_LEN) != 0) {
3801 				bif->bif_flags |= BIFF_HF_HWSRC;
3802 			} else {
3803 				bif->bif_flags &= ~BIFF_HF_HWSRC;
3804 			}
3805 		}
3806 		if (req->ifbrhf_flags & IFBRHF_IPSRC) {
3807 			bif->bif_hf_ipsrc.s_addr = req->ifbrhf_ipsrc;
3808 			if (bif->bif_hf_ipsrc.s_addr != INADDR_ANY) {
3809 				bif->bif_flags |= BIFF_HF_IPSRC;
3810 			} else {
3811 				bif->bif_flags &= ~BIFF_HF_IPSRC;
3812 			}
3813 		}
3814 	} else {
3815 		bif->bif_flags &= ~(BIFF_HOST_FILTER | BIFF_HF_HWSRC |
3816 		    BIFF_HF_IPSRC);
3817 		bzero(bif->bif_hf_hwsrc, ETHER_ADDR_LEN);
3818 		bif->bif_hf_ipsrc.s_addr = INADDR_ANY;
3819 	}
3820 
3821 	return 0;
3822 }
3823 
3824 static char *
bridge_mac_nat_entry_out(struct mac_nat_entry_list * list,unsigned int * count_p,char * buf,unsigned int * len_p)3825 bridge_mac_nat_entry_out(struct mac_nat_entry_list * list,
3826     unsigned int * count_p, char *buf, unsigned int *len_p)
3827 {
3828 	unsigned int            count = *count_p;
3829 	struct ifbrmne          ifbmne;
3830 	unsigned int            len = *len_p;
3831 	struct mac_nat_entry    *mne;
3832 	unsigned long           now;
3833 
3834 	bzero(&ifbmne, sizeof(ifbmne));
3835 	LIST_FOREACH(mne, list, mne_list) {
3836 		if (len < sizeof(ifbmne)) {
3837 			break;
3838 		}
3839 		snprintf(ifbmne.ifbmne_ifname, sizeof(ifbmne.ifbmne_ifname),
3840 		    "%s", mne->mne_bif->bif_ifp->if_xname);
3841 		memcpy(ifbmne.ifbmne_mac, mne->mne_mac,
3842 		    sizeof(ifbmne.ifbmne_mac));
3843 		now = (unsigned long) net_uptime();
3844 		if (now < mne->mne_expire) {
3845 			ifbmne.ifbmne_expire = mne->mne_expire - now;
3846 		} else {
3847 			ifbmne.ifbmne_expire = 0;
3848 		}
3849 		if ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) {
3850 			ifbmne.ifbmne_af = AF_INET6;
3851 			ifbmne.ifbmne_ip6_addr = mne->mne_ip6;
3852 		} else {
3853 			ifbmne.ifbmne_af = AF_INET;
3854 			ifbmne.ifbmne_ip_addr = mne->mne_ip;
3855 		}
3856 		memcpy(buf, &ifbmne, sizeof(ifbmne));
3857 		count++;
3858 		buf += sizeof(ifbmne);
3859 		len -= sizeof(ifbmne);
3860 	}
3861 	*count_p = count;
3862 	*len_p = len;
3863 	return buf;
3864 }
3865 
3866 /*
3867  * bridge_ioctl_gmnelist()
3868  *   Perform the get mac_nat_entry list ioctl.
3869  *
3870  * Note:
3871  *   The struct ifbrmnelist32 and struct ifbrmnelist64 have the same
3872  *   field size/layout except for the last field ifbml_buf, the user-supplied
3873  *   buffer pointer. That is passed in separately via the 'user_addr'
3874  *   parameter from the respective 32-bit or 64-bit ioctl routine.
3875  */
3876 static int
bridge_ioctl_gmnelist(struct bridge_softc * sc,struct ifbrmnelist32 * mnl,user_addr_t user_addr)3877 bridge_ioctl_gmnelist(struct bridge_softc *sc, struct ifbrmnelist32 *mnl,
3878     user_addr_t user_addr)
3879 {
3880 	unsigned int            count;
3881 	char                    *buf;
3882 	int                     error = 0;
3883 	char                    *outbuf = NULL;
3884 	struct mac_nat_entry    *mne;
3885 	unsigned int            buflen;
3886 	unsigned int            len;
3887 
3888 	mnl->ifbml_elsize = sizeof(struct ifbrmne);
3889 	count = 0;
3890 	LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
3891 		count++;
3892 	}
3893 	LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
3894 		count++;
3895 	}
3896 	buflen = sizeof(struct ifbrmne) * count;
3897 	if (buflen == 0 || mnl->ifbml_len == 0) {
3898 		mnl->ifbml_len = buflen;
3899 		return error;
3900 	}
3901 	BRIDGE_UNLOCK(sc);
3902 	outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO);
3903 	BRIDGE_LOCK(sc);
3904 	count = 0;
3905 	buf = outbuf;
3906 	len = min(mnl->ifbml_len, buflen);
3907 	buf = bridge_mac_nat_entry_out(&sc->sc_mne_list, &count, buf, &len);
3908 	buf = bridge_mac_nat_entry_out(&sc->sc_mne_list_v6, &count, buf, &len);
3909 	mnl->ifbml_len = count * sizeof(struct ifbrmne);
3910 	BRIDGE_UNLOCK(sc);
3911 	error = copyout(outbuf, user_addr, mnl->ifbml_len);
3912 	kfree_data(outbuf, buflen);
3913 	BRIDGE_LOCK(sc);
3914 	return error;
3915 }
3916 
3917 static int
bridge_ioctl_gmnelist64(struct bridge_softc * sc,void * arg)3918 bridge_ioctl_gmnelist64(struct bridge_softc *sc, void *arg)
3919 {
3920 	struct ifbrmnelist64 *mnl = arg;
3921 
3922 	return bridge_ioctl_gmnelist(sc, arg, mnl->ifbml_buf);
3923 }
3924 
3925 static int
bridge_ioctl_gmnelist32(struct bridge_softc * sc,void * arg)3926 bridge_ioctl_gmnelist32(struct bridge_softc *sc, void *arg)
3927 {
3928 	struct ifbrmnelist32 *mnl = arg;
3929 
3930 	return bridge_ioctl_gmnelist(sc, arg,
3931 	           CAST_USER_ADDR_T(mnl->ifbml_buf));
3932 }
3933 
3934 /*
3935  * bridge_ioctl_gifstats()
3936  *   Return per-member stats.
3937  *
3938  * Note:
3939  *   The ifbrmreq32 and ifbrmreq64 structures have the same
3940  *   field size/layout except for the last field brmr_buf, the user-supplied
3941  *   buffer pointer. That is passed in separately via the 'user_addr'
3942  *   parameter from the respective 32-bit or 64-bit ioctl routine.
3943  */
3944 static int
bridge_ioctl_gifstats(struct bridge_softc * sc,struct ifbrmreq32 * mreq,user_addr_t user_addr)3945 bridge_ioctl_gifstats(struct bridge_softc *sc, struct ifbrmreq32 *mreq,
3946     user_addr_t user_addr)
3947 {
3948 	struct bridge_iflist    *bif;
3949 	int                     error = 0;
3950 	unsigned int            buflen;
3951 
3952 	bif = bridge_lookup_member(sc, mreq->brmr_ifname);
3953 	if (bif == NULL) {
3954 		error = ENOENT;
3955 		goto done;
3956 	}
3957 
3958 	buflen = mreq->brmr_elsize = sizeof(struct ifbrmstats);
3959 	if (buflen == 0 || mreq->brmr_len == 0) {
3960 		mreq->brmr_len = buflen;
3961 		goto done;
3962 	}
3963 	if (mreq->brmr_len != 0 && mreq->brmr_len < buflen) {
3964 		error = ENOBUFS;
3965 		goto done;
3966 	}
3967 	mreq->brmr_len = buflen;
3968 	error = copyout(&bif->bif_stats, user_addr, buflen);
3969 done:
3970 	return error;
3971 }
3972 
3973 static int
bridge_ioctl_gifstats32(struct bridge_softc * sc,void * arg)3974 bridge_ioctl_gifstats32(struct bridge_softc *sc, void *arg)
3975 {
3976 	struct ifbrmreq32 *mreq = arg;
3977 
3978 	return bridge_ioctl_gifstats(sc, arg, mreq->brmr_buf);
3979 }
3980 
3981 static int
bridge_ioctl_gifstats64(struct bridge_softc * sc,void * arg)3982 bridge_ioctl_gifstats64(struct bridge_softc *sc, void *arg)
3983 {
3984 	struct ifbrmreq64 *mreq = arg;
3985 
3986 	return bridge_ioctl_gifstats(sc, arg, mreq->brmr_buf);
3987 }
3988 
3989 /*
3990  * bridge_proto_attach_changed
3991  *
3992  *	Called when protocol attachment on the interface changes.
3993  */
3994 static void
bridge_proto_attach_changed(struct ifnet * ifp)3995 bridge_proto_attach_changed(struct ifnet *ifp)
3996 {
3997 	boolean_t changed = FALSE;
3998 	struct bridge_iflist *bif;
3999 	boolean_t input_broadcast;
4000 	struct bridge_softc *sc = ifp->if_bridge;
4001 
4002 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
4003 	if (sc == NULL) {
4004 		return;
4005 	}
4006 	input_broadcast = interface_needs_input_broadcast(ifp);
4007 	BRIDGE_LOCK(sc);
4008 	bif = bridge_lookup_member_if(sc, ifp);
4009 	if (bif != NULL) {
4010 		changed = bif_set_input_broadcast(bif, input_broadcast);
4011 	}
4012 	BRIDGE_UNLOCK(sc);
4013 	if (changed) {
4014 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
4015 		    "%s input broadcast %s", ifp->if_xname,
4016 		    input_broadcast ? "ENABLED" : "DISABLED");
4017 	}
4018 	return;
4019 }
4020 
4021 /*
4022  * interface_media_active:
4023  *
4024  *	Tells if an interface media is active.
4025  */
4026 static int
interface_media_active(struct ifnet * ifp)4027 interface_media_active(struct ifnet *ifp)
4028 {
4029 	struct ifmediareq   ifmr;
4030 	int status = 0;
4031 
4032 	bzero(&ifmr, sizeof(ifmr));
4033 	if (ifnet_ioctl(ifp, 0, SIOCGIFMEDIA, &ifmr) == 0) {
4034 		if ((ifmr.ifm_status & IFM_AVALID) && ifmr.ifm_count > 0) {
4035 			status = ifmr.ifm_status & IFM_ACTIVE ? 1 : 0;
4036 		}
4037 	}
4038 
4039 	return status;
4040 }
4041 
4042 /*
4043  * bridge_updatelinkstatus:
4044  *
4045  *      Update the media active status of the bridge based on the
4046  *	media active status of its member.
4047  *	If changed, return the corresponding onf/off link event.
4048  */
4049 static u_int32_t
bridge_updatelinkstatus(struct bridge_softc * sc)4050 bridge_updatelinkstatus(struct bridge_softc *sc)
4051 {
4052 	struct bridge_iflist *bif;
4053 	int active_member = 0;
4054 	u_int32_t event_code = 0;
4055 
4056 	BRIDGE_LOCK_ASSERT_HELD(sc);
4057 
4058 	/*
4059 	 * Find out if we have an active interface
4060 	 */
4061 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
4062 		if (bif->bif_flags & BIFF_MEDIA_ACTIVE) {
4063 			active_member = 1;
4064 			break;
4065 		}
4066 	}
4067 
4068 	if (active_member && !(sc->sc_flags & SCF_MEDIA_ACTIVE)) {
4069 		sc->sc_flags |= SCF_MEDIA_ACTIVE;
4070 		event_code = KEV_DL_LINK_ON;
4071 	} else if (!active_member && (sc->sc_flags & SCF_MEDIA_ACTIVE)) {
4072 		sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
4073 		event_code = KEV_DL_LINK_OFF;
4074 	}
4075 
4076 	return event_code;
4077 }
4078 
4079 /*
4080  * bridge_iflinkevent:
4081  */
4082 static void
bridge_iflinkevent(struct ifnet * ifp)4083 bridge_iflinkevent(struct ifnet *ifp)
4084 {
4085 	struct bridge_softc *sc = ifp->if_bridge;
4086 	struct bridge_iflist *bif;
4087 	u_int32_t event_code = 0;
4088 	int media_active;
4089 
4090 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
4091 
4092 	/* Check if the interface is a bridge member */
4093 	if (sc == NULL) {
4094 		return;
4095 	}
4096 
4097 	media_active = interface_media_active(ifp);
4098 	BRIDGE_LOCK(sc);
4099 	bif = bridge_lookup_member_if(sc, ifp);
4100 	if (bif != NULL) {
4101 		if (media_active) {
4102 			bif->bif_flags |= BIFF_MEDIA_ACTIVE;
4103 		} else {
4104 			bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
4105 		}
4106 		if (sc->sc_mac_nat_bif != NULL) {
4107 			bridge_mac_nat_flush_entries(sc, bif);
4108 		}
4109 
4110 		event_code = bridge_updatelinkstatus(sc);
4111 	}
4112 	BRIDGE_UNLOCK(sc);
4113 
4114 	if (event_code != 0) {
4115 		bridge_link_event(sc->sc_ifp, event_code);
4116 	}
4117 }
4118 
4119 /*
4120  * bridge_delayed_callback:
4121  *
4122  *	Makes a delayed call
4123  */
4124 static void
bridge_delayed_callback(void * param,__unused void * param2)4125 bridge_delayed_callback(void *param, __unused void *param2)
4126 {
4127 	struct bridge_delayed_call *call = (struct bridge_delayed_call *)param;
4128 	struct bridge_softc *sc = call->bdc_sc;
4129 
4130 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4131 	if (bridge_delayed_callback_delay > 0) {
4132 		struct timespec ts;
4133 
4134 		ts.tv_sec = bridge_delayed_callback_delay;
4135 		ts.tv_nsec = 0;
4136 
4137 		BRIDGE_LOG(LOG_NOTICE, 0,
4138 		    "sleeping for %d seconds",
4139 		    bridge_delayed_callback_delay);
4140 
4141 		msleep(&bridge_delayed_callback_delay, NULL, PZERO,
4142 		    __func__, &ts);
4143 
4144 		BRIDGE_LOG(LOG_NOTICE, 0, "awoken");
4145 	}
4146 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4147 
4148 	BRIDGE_LOCK(sc);
4149 
4150 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4151 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4152 	    "%s call 0x%llx flags 0x%x",
4153 	    sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4154 	    call->bdc_flags);
4155 }
4156 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4157 
4158 	if (call->bdc_flags & BDCF_CANCELLING) {
4159 		wakeup(call);
4160 	} else {
4161 		if ((sc->sc_flags & SCF_DETACHING) == 0) {
4162 			(*call->bdc_func)(sc);
4163 		}
4164 	}
4165 	call->bdc_flags &= ~BDCF_OUTSTANDING;
4166 	BRIDGE_UNLOCK(sc);
4167 }
4168 
4169 /*
4170  * bridge_schedule_delayed_call:
4171  *
4172  *	Schedule a function to be called on a separate thread
4173  *      The actual call may be scheduled to run at a given time or ASAP.
4174  */
4175 static void
4176 bridge_schedule_delayed_call(struct bridge_delayed_call *call)
4177 {
4178 	uint64_t deadline = 0;
4179 	struct bridge_softc *sc = call->bdc_sc;
4180 
4181 	BRIDGE_LOCK_ASSERT_HELD(sc);
4182 
4183 	if ((sc->sc_flags & SCF_DETACHING) ||
4184 	    (call->bdc_flags & (BDCF_OUTSTANDING | BDCF_CANCELLING))) {
4185 		return;
4186 	}
4187 
4188 	if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) {
4189 		nanoseconds_to_absolutetime(
4190 			(uint64_t)call->bdc_ts.tv_sec * NSEC_PER_SEC +
4191 			call->bdc_ts.tv_nsec, &deadline);
4192 		clock_absolutetime_interval_to_deadline(deadline, &deadline);
4193 	}
4194 
4195 	call->bdc_flags = BDCF_OUTSTANDING;
4196 
4197 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4198 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4199 	    "%s call 0x%llx flags 0x%x",
4200 	    sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4201 	    call->bdc_flags);
4202 }
4203 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4204 
4205 	if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) {
4206 		thread_call_func_delayed(
4207 			(thread_call_func_t)bridge_delayed_callback,
4208 			call, deadline);
4209 	} else {
4210 		if (call->bdc_thread_call == NULL) {
4211 			call->bdc_thread_call = thread_call_allocate(
4212 				(thread_call_func_t)bridge_delayed_callback,
4213 				call);
4214 		}
4215 		thread_call_enter(call->bdc_thread_call);
4216 	}
4217 }
4218 
4219 /*
4220  * bridge_cancel_delayed_call:
4221  *
4222  *	Cancel a queued or running delayed call.
4223  *	If call is running, does not return until the call is done to
4224  *	prevent race condition with the brigde interface getting destroyed
4225  */
4226 static void
4227 bridge_cancel_delayed_call(struct bridge_delayed_call *call)
4228 {
4229 	boolean_t result;
4230 	struct bridge_softc *sc = call->bdc_sc;
4231 
4232 	/*
4233 	 * The call was never scheduled
4234 	 */
4235 	if (sc == NULL) {
4236 		return;
4237 	}
4238 
4239 	BRIDGE_LOCK_ASSERT_HELD(sc);
4240 
4241 	call->bdc_flags |= BDCF_CANCELLING;
4242 
4243 	while (call->bdc_flags & BDCF_OUTSTANDING) {
4244 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4245 		    "%s call 0x%llx flags 0x%x",
4246 		    sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4247 		    call->bdc_flags);
4248 		result = thread_call_func_cancel(
4249 			(thread_call_func_t)bridge_delayed_callback, call, FALSE);
4250 
4251 		if (result) {
4252 			/*
4253 			 * We managed to dequeue the delayed call
4254 			 */
4255 			call->bdc_flags &= ~BDCF_OUTSTANDING;
4256 		} else {
4257 			/*
4258 			 * Wait for delayed call do be done running
4259 			 */
4260 			msleep(call, &sc->sc_mtx, PZERO, __func__, NULL);
4261 		}
4262 	}
4263 	call->bdc_flags &= ~BDCF_CANCELLING;
4264 }
4265 
4266 /*
4267  * bridge_cleanup_delayed_call:
4268  *
4269  *	Dispose resource allocated for a delayed call
4270  *	Assume the delayed call is not queued or running .
4271  */
4272 static void
4273 bridge_cleanup_delayed_call(struct bridge_delayed_call *call)
4274 {
4275 	boolean_t result;
4276 	struct bridge_softc *sc = call->bdc_sc;
4277 
4278 	/*
4279 	 * The call was never scheduled
4280 	 */
4281 	if (sc == NULL) {
4282 		return;
4283 	}
4284 
4285 	BRIDGE_LOCK_ASSERT_HELD(sc);
4286 
4287 	VERIFY((call->bdc_flags & BDCF_OUTSTANDING) == 0);
4288 	VERIFY((call->bdc_flags & BDCF_CANCELLING) == 0);
4289 
4290 	if (call->bdc_thread_call != NULL) {
4291 		result = thread_call_free(call->bdc_thread_call);
4292 		if (result == FALSE) {
4293 			panic("%s thread_call_free() failed for call %p",
4294 			    __func__, call);
4295 		}
4296 		call->bdc_thread_call = NULL;
4297 	}
4298 }
4299 
4300 /*
4301  * bridge_init:
4302  *
4303  *	Initialize a bridge interface.
4304  */
4305 static int
4306 bridge_init(struct ifnet *ifp)
4307 {
4308 	struct bridge_softc *sc = (struct bridge_softc *)ifp->if_softc;
4309 	errno_t error;
4310 
4311 	BRIDGE_LOCK_ASSERT_HELD(sc);
4312 
4313 	if ((ifnet_flags(ifp) & IFF_RUNNING)) {
4314 		return 0;
4315 	}
4316 
4317 	error = ifnet_set_flags(ifp, IFF_RUNNING, IFF_RUNNING);
4318 
4319 	/*
4320 	 * Calling bridge_aging_timer() is OK as there are no entries to
4321 	 * age so we're just going to arm the timer
4322 	 */
4323 	bridge_aging_timer(sc);
4324 #if BRIDGESTP
4325 	if (error == 0) {
4326 		bstp_init(&sc->sc_stp); /* Initialize Spanning Tree */
4327 	}
4328 #endif /* BRIDGESTP */
4329 	return error;
4330 }
4331 
4332 /*
4333  * bridge_ifstop:
4334  *
4335  *	Stop the bridge interface.
4336  */
4337 static void
4338 bridge_ifstop(struct ifnet *ifp, int disable)
4339 {
4340 #pragma unused(disable)
4341 	struct bridge_softc *sc = ifp->if_softc;
4342 
4343 	BRIDGE_LOCK_ASSERT_HELD(sc);
4344 
4345 	if ((ifnet_flags(ifp) & IFF_RUNNING) == 0) {
4346 		return;
4347 	}
4348 
4349 	bridge_cancel_delayed_call(&sc->sc_aging_timer);
4350 
4351 #if BRIDGESTP
4352 	bstp_stop(&sc->sc_stp);
4353 #endif /* BRIDGESTP */
4354 
4355 	bridge_rtflush(sc, IFBF_FLUSHDYN);
4356 	(void) ifnet_set_flags(ifp, 0, IFF_RUNNING);
4357 }
4358 
4359 /*
4360  * bridge_compute_cksum:
4361  *
4362  *	If the packet has checksum flags, compare the hardware checksum
4363  *	capabilities of the source and destination interfaces. If they
4364  *	are the same, there's nothing to do. If they are different,
4365  *	finalize the checksum so that it can be sent on the destination
4366  *	interface.
4367  */
4368 static void
4369 bridge_compute_cksum(struct ifnet *src_if, struct ifnet *dst_if, struct mbuf *m)
4370 {
4371 	uint32_t csum_flags;
4372 	uint16_t dst_hw_csum;
4373 	uint32_t did_sw = 0;
4374 	struct ether_header *eh;
4375 	uint16_t src_hw_csum;
4376 
4377 	if (src_if == dst_if) {
4378 		return;
4379 	}
4380 	csum_flags = m->m_pkthdr.csum_flags & IF_HWASSIST_CSUM_MASK;
4381 	if (csum_flags == 0) {
4382 		/* no checksum offload */
4383 		return;
4384 	}
4385 
4386 	/*
4387 	 * if destination/source differ in checksum offload
4388 	 * capabilities, finalize/compute the checksum
4389 	 */
4390 	dst_hw_csum = IF_HWASSIST_CSUM_FLAGS(dst_if->if_hwassist);
4391 	src_hw_csum = IF_HWASSIST_CSUM_FLAGS(src_if->if_hwassist);
4392 	if (dst_hw_csum == src_hw_csum) {
4393 		return;
4394 	}
4395 	eh = mtod(m, struct ether_header *);
4396 	switch (ntohs(eh->ether_type)) {
4397 	case ETHERTYPE_IP:
4398 		did_sw = in_finalize_cksum(m, sizeof(*eh), csum_flags);
4399 		break;
4400 	case ETHERTYPE_IPV6:
4401 		did_sw = in6_finalize_cksum(m, sizeof(*eh), -1, -1, csum_flags);
4402 		break;
4403 	}
4404 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4405 	    "[%s -> %s] before 0x%x did 0x%x after 0x%x",
4406 	    src_if->if_xname, dst_if->if_xname, csum_flags, did_sw,
4407 	    m->m_pkthdr.csum_flags);
4408 }
4409 
4410 static errno_t
4411 bridge_transmit(struct ifnet * ifp, struct mbuf *m)
4412 {
4413 	struct flowadv  adv = { .code = FADV_SUCCESS };
4414 	errno_t         error;
4415 
4416 	error = dlil_output(ifp, 0, m, NULL, NULL, 1, &adv);
4417 	if (error == 0) {
4418 		if (adv.code == FADV_FLOW_CONTROLLED) {
4419 			error = EQFULL;
4420 		} else if (adv.code == FADV_SUSPENDED) {
4421 			error = EQSUSPENDED;
4422 		}
4423 	}
4424 	return error;
4425 }
4426 
4427 static int
4428 get_last_ip6_hdr(struct mbuf *m, int off, int proto, int * nxtp,
4429     bool *is_fragmented)
4430 {
4431 	int newoff;
4432 
4433 	*is_fragmented = false;
4434 	while (1) {
4435 		newoff = ip6_nexthdr(m, off, proto, nxtp);
4436 		if (newoff < 0) {
4437 			return off;
4438 		} else if (newoff < off) {
4439 			return -1;    /* invalid */
4440 		} else if (newoff == off) {
4441 			return newoff;
4442 		}
4443 		off = newoff;
4444 		proto = *nxtp;
4445 		if (proto == IPPROTO_FRAGMENT) {
4446 			*is_fragmented = true;
4447 		}
4448 	}
4449 }
4450 
4451 static int
4452 bridge_get_ip_proto(struct mbuf * * mp, u_int mac_hlen, bool is_ipv4,
4453     ip_packet_info_t info_p, struct bripstats * stats_p)
4454 {
4455 	int             error = 0;
4456 	u_int           hlen;
4457 	u_int           ip_hlen;
4458 	u_int           ip_pay_len;
4459 	struct mbuf *   m0 = *mp;
4460 	int             off;
4461 	int             opt_len = 0;
4462 	int             proto = 0;
4463 
4464 	bzero(info_p, sizeof(*info_p));
4465 	if (is_ipv4) {
4466 		struct ip *     ip;
4467 		u_int           ip_total_len;
4468 
4469 		/* IPv4 */
4470 		hlen = mac_hlen + sizeof(struct ip);
4471 		if (m0->m_pkthdr.len < hlen) {
4472 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4473 			    "Short IP packet %d < %d",
4474 			    m0->m_pkthdr.len, hlen);
4475 			error = _EBADIP;
4476 			stats_p->bips_bad_ip++;
4477 			goto done;
4478 		}
4479 		if (m0->m_len < hlen) {
4480 			*mp = m0 = m_pullup(m0, hlen);
4481 			if (m0 == NULL) {
4482 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4483 				    "m_pullup failed hlen %d",
4484 				    hlen);
4485 				error = ENOBUFS;
4486 				stats_p->bips_bad_ip++;
4487 				goto done;
4488 			}
4489 		}
4490 		ip = (struct ip *)(void *)(mtod(m0, uint8_t *) + mac_hlen);
4491 		if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
4492 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4493 			    "bad IP version");
4494 			error = _EBADIP;
4495 			stats_p->bips_bad_ip++;
4496 			goto done;
4497 		}
4498 		ip_hlen = IP_VHL_HL(ip->ip_vhl) << 2;
4499 		if (ip_hlen < sizeof(struct ip)) {
4500 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4501 			    "bad IP header length %d < %d",
4502 			    ip_hlen,
4503 			    (int)sizeof(struct ip));
4504 			error = _EBADIP;
4505 			stats_p->bips_bad_ip++;
4506 			goto done;
4507 		}
4508 		hlen = mac_hlen + ip_hlen;
4509 		if (m0->m_len < hlen) {
4510 			*mp = m0 = m_pullup(m0, hlen);
4511 			if (m0 == NULL) {
4512 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4513 				    "m_pullup failed hlen %d",
4514 				    hlen);
4515 				error = ENOBUFS;
4516 				stats_p->bips_bad_ip++;
4517 				goto done;
4518 			}
4519 		}
4520 
4521 		ip_total_len = ntohs(ip->ip_len);
4522 		if (ip_total_len < ip_hlen) {
4523 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4524 			    "IP total len %d < header len %d",
4525 			    ip_total_len, ip_hlen);
4526 			error = _EBADIP;
4527 			stats_p->bips_bad_ip++;
4528 			goto done;
4529 		}
4530 		if (ip_total_len > (m0->m_pkthdr.len - mac_hlen)) {
4531 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4532 			    "invalid IP payload length %d > %d",
4533 			    ip_total_len,
4534 			    (m0->m_pkthdr.len - mac_hlen));
4535 			error = _EBADIP;
4536 			stats_p->bips_bad_ip++;
4537 			goto done;
4538 		}
4539 		ip_pay_len = ip_total_len - ip_hlen;
4540 		info_p->ip_proto = ip->ip_p;
4541 		info_p->ip_hdr.ip = ip;
4542 #define FRAG_BITS       (IP_OFFMASK | IP_MF)
4543 		if ((ntohs(ip->ip_off) & FRAG_BITS) != 0) {
4544 			info_p->ip_is_fragmented = true;
4545 		}
4546 		stats_p->bips_ip++;
4547 	} else {
4548 		struct ip6_hdr *ip6;
4549 
4550 		/* IPv6 */
4551 		hlen = mac_hlen + sizeof(struct ip6_hdr);
4552 		if (m0->m_pkthdr.len < hlen) {
4553 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4554 			    "short IPv6 packet %d < %d",
4555 			    m0->m_pkthdr.len, hlen);
4556 			error = _EBADIPV6;
4557 			stats_p->bips_bad_ip6++;
4558 			goto done;
4559 		}
4560 		if (m0->m_len < hlen) {
4561 			*mp = m0 = m_pullup(m0, hlen);
4562 			if (m0 == NULL) {
4563 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4564 				    "m_pullup failed hlen %d",
4565 				    hlen);
4566 				error = ENOBUFS;
4567 				stats_p->bips_bad_ip6++;
4568 				goto done;
4569 			}
4570 		}
4571 		ip6 = (struct ip6_hdr *)(mtod(m0, uint8_t *) + mac_hlen);
4572 		if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
4573 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4574 			    "bad IPv6 version");
4575 			error = _EBADIPV6;
4576 			stats_p->bips_bad_ip6++;
4577 			goto done;
4578 		}
4579 		off = get_last_ip6_hdr(m0, mac_hlen, IPPROTO_IPV6, &proto,
4580 		    &info_p->ip_is_fragmented);
4581 		if (off < 0 || m0->m_pkthdr.len < off) {
4582 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4583 			    "ip6_lasthdr() returned %d",
4584 			    off);
4585 			error = _EBADIPV6;
4586 			stats_p->bips_bad_ip6++;
4587 			goto done;
4588 		}
4589 		ip_hlen = sizeof(*ip6);
4590 		opt_len = off - mac_hlen - ip_hlen;
4591 		if (opt_len < 0) {
4592 			error = _EBADIPV6;
4593 			stats_p->bips_bad_ip6++;
4594 			goto done;
4595 		}
4596 		info_p->ip_proto = proto;
4597 		info_p->ip_hdr.ip6 = ip6;
4598 		ip_pay_len = ntohs(ip6->ip6_plen);
4599 		if (ip_pay_len > (m0->m_pkthdr.len - mac_hlen - ip_hlen)) {
4600 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4601 			    "invalid IPv6 payload length %d > %d",
4602 			    ip_pay_len,
4603 			    (m0->m_pkthdr.len - mac_hlen - ip_hlen));
4604 			error = _EBADIPV6;
4605 			stats_p->bips_bad_ip6++;
4606 			goto done;
4607 		}
4608 		stats_p->bips_ip6++;
4609 	}
4610 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4611 	    "IPv%c proto %d ip %u pay %u opt %u pkt %u%s",
4612 	    is_ipv4 ? '4' : '6',
4613 	    proto, ip_hlen, ip_pay_len, opt_len,
4614 	    m0->m_pkthdr.len, info_p->ip_is_fragmented ? " frag" : "");
4615 	info_p->ip_hlen = ip_hlen;
4616 	info_p->ip_pay_len = ip_pay_len;
4617 	info_p->ip_opt_len = opt_len;
4618 	info_p->ip_is_ipv4 = is_ipv4;
4619 done:
4620 	return error;
4621 }
4622 
4623 static int
4624 bridge_get_tcp_header(struct mbuf * * mp, u_int mac_hlen, bool is_ipv4,
4625     ip_packet_info_t info_p, struct bripstats * stats_p)
4626 {
4627 	int             error;
4628 	u_int           hlen;
4629 
4630 	error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, info_p, stats_p);
4631 	if (error != 0) {
4632 		goto done;
4633 	}
4634 	if (info_p->ip_proto != IPPROTO_TCP) {
4635 		/* not a TCP frame, not an error, just a bad guess */
4636 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4637 		    "non-TCP (%d) IPv%c frame %d bytes",
4638 		    info_p->ip_proto, is_ipv4 ? '4' : '6',
4639 		    (*mp)->m_pkthdr.len);
4640 		goto done;
4641 	}
4642 	if (info_p->ip_is_fragmented) {
4643 		/* both TSO and IP fragmentation don't make sense */
4644 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4645 		    "fragmented TSO packet?");
4646 		stats_p->bips_bad_tcp++;
4647 		error = _EBADTCP;
4648 		goto done;
4649 	}
4650 	hlen = mac_hlen + info_p->ip_hlen + sizeof(struct tcphdr) +
4651 	    info_p->ip_opt_len;
4652 	if ((*mp)->m_len < hlen) {
4653 		*mp = m_pullup(*mp, hlen);
4654 		if (*mp == NULL) {
4655 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4656 			    "m_pullup %d failed",
4657 			    hlen);
4658 			stats_p->bips_bad_tcp++;
4659 			error = _EBADTCP;
4660 			goto done;
4661 		}
4662 	}
4663 	info_p->ip_proto_hdr = ((caddr_t)info_p->ip_hdr.ptr) +
4664 	    info_p->ip_hlen + info_p->ip_opt_len;
4665 done:
4666 	return error;
4667 }
4668 
4669 static inline void
4670 proto_csum_stats_increment(uint8_t proto, struct brcsumstats * stats_p)
4671 {
4672 	if (proto == IPPROTO_TCP) {
4673 		stats_p->brcs_tcp_checksum++;
4674 	} else {
4675 		stats_p->brcs_udp_checksum++;
4676 	}
4677 	return;
4678 }
4679 
4680 static bool
4681 ether_header_type_is_ip(struct ether_header * eh, bool *is_ipv4)
4682 {
4683 	uint16_t        ether_type;
4684 	bool            is_ip = TRUE;
4685 
4686 	ether_type = ntohs(eh->ether_type);
4687 	switch (ether_type) {
4688 	case ETHERTYPE_IP:
4689 		*is_ipv4 = TRUE;
4690 		break;
4691 	case ETHERTYPE_IPV6:
4692 		*is_ipv4 = FALSE;
4693 		break;
4694 	default:
4695 		is_ip = FALSE;
4696 		break;
4697 	}
4698 	return is_ip;
4699 }
4700 
4701 static errno_t
4702 bridge_verify_checksum(struct mbuf * * mp, struct ifbrmstats *stats_p)
4703 {
4704 	struct brcsumstats *csum_stats_p;
4705 	struct ether_header     *eh;
4706 	errno_t         error = 0;
4707 	ip_packet_info  info;
4708 	bool            is_ipv4;
4709 	struct mbuf *   m;
4710 	u_int           mac_hlen = sizeof(struct ether_header);
4711 	uint16_t        sum;
4712 	bool            valid;
4713 
4714 	eh = mtod(*mp, struct ether_header *);
4715 	if (!ether_header_type_is_ip(eh, &is_ipv4)) {
4716 		goto done;
4717 	}
4718 	error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, &info,
4719 	    &stats_p->brms_out_ip);
4720 	m = *mp;
4721 	if (error != 0) {
4722 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4723 		    "bridge_get_ip_proto failed %d",
4724 		    error);
4725 		goto done;
4726 	}
4727 	if (is_ipv4) {
4728 		if ((m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) != 0) {
4729 			/* hardware offloaded IP header checksum */
4730 			valid = (m->m_pkthdr.csum_flags & CSUM_IP_VALID) != 0;
4731 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4732 			    "IP checksum HW %svalid",
4733 			    valid ? "" : "in");
4734 			if (!valid) {
4735 				stats_p->brms_out_cksum_bad_hw.brcs_ip_checksum++;
4736 				error = _EBADIPCHECKSUM;
4737 				goto done;
4738 			}
4739 			stats_p->brms_out_cksum_good_hw.brcs_ip_checksum++;
4740 		} else {
4741 			/* verify */
4742 			sum = inet_cksum(m, 0, mac_hlen, info.ip_hlen);
4743 			valid = (sum == 0);
4744 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4745 			    "IP checksum SW %svalid",
4746 			    valid ? "" : "in");
4747 			if (!valid) {
4748 				stats_p->brms_out_cksum_bad.brcs_ip_checksum++;
4749 				error = _EBADIPCHECKSUM;
4750 				goto done;
4751 			}
4752 			stats_p->brms_out_cksum_good.brcs_ip_checksum++;
4753 		}
4754 	}
4755 	if (info.ip_is_fragmented) {
4756 		/* can't verify checksum on fragmented packets */
4757 		goto done;
4758 	}
4759 	switch (info.ip_proto) {
4760 	case IPPROTO_TCP:
4761 		stats_p->brms_out_ip.bips_tcp++;
4762 		break;
4763 	case IPPROTO_UDP:
4764 		stats_p->brms_out_ip.bips_udp++;
4765 		break;
4766 	default:
4767 		goto done;
4768 	}
4769 	/* check for hardware offloaded UDP/TCP checksum */
4770 #define HW_CSUM         (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)
4771 	if ((m->m_pkthdr.csum_flags & HW_CSUM) == HW_CSUM) {
4772 		/* checksum verified by hardware */
4773 		valid = (m->m_pkthdr.csum_rx_val == 0xffff);
4774 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4775 		    "IPv%c %s checksum HW 0x%x %svalid",
4776 		    is_ipv4 ? '4' : '6',
4777 		    (info.ip_proto == IPPROTO_TCP)
4778 		    ? "TCP" : "UDP",
4779 		    m->m_pkthdr.csum_data,
4780 		    valid ? "" : "in" );
4781 		if (!valid) {
4782 			/* bad checksum */
4783 			csum_stats_p = &stats_p->brms_out_cksum_bad_hw;
4784 			error = (info.ip_proto == IPPROTO_TCP) ? _EBADTCPCHECKSUM
4785 			    : _EBADTCPCHECKSUM;
4786 		} else {
4787 			/* good checksum */
4788 			csum_stats_p = &stats_p->brms_out_cksum_good_hw;
4789 		}
4790 		proto_csum_stats_increment(info.ip_proto, csum_stats_p);
4791 		goto done;
4792 	}
4793 	m->m_data += mac_hlen;
4794 	m->m_len -= mac_hlen;
4795 	m->m_pkthdr.len -= mac_hlen;
4796 	if (is_ipv4) {
4797 		sum = inet_cksum(m, info.ip_proto,
4798 		    info.ip_hlen,
4799 		    info.ip_pay_len);
4800 	} else {
4801 		sum = inet6_cksum(m, info.ip_proto,
4802 		    info.ip_hlen + info.ip_opt_len,
4803 		    info.ip_pay_len - info.ip_opt_len);
4804 	}
4805 	valid = (sum == 0);
4806 	if (valid) {
4807 		csum_stats_p = &stats_p->brms_out_cksum_good;
4808 	} else {
4809 		csum_stats_p = &stats_p->brms_out_cksum_bad;
4810 		error = (info.ip_proto == IPPROTO_TCP)
4811 		    ? _EBADTCPCHECKSUM : _EBADUDPCHECKSUM;
4812 	}
4813 	proto_csum_stats_increment(info.ip_proto, csum_stats_p);
4814 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4815 	    "IPv%c %s checksum SW %svalid (0x%x) hlen %d paylen %d",
4816 	    is_ipv4 ? '4' : '6',
4817 	    (info.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
4818 	    valid ? "" : "in",
4819 	    sum, info.ip_hlen, info.ip_pay_len);
4820 	m->m_data -= mac_hlen;
4821 	m->m_len += mac_hlen;
4822 	m->m_pkthdr.len += mac_hlen;
4823 done:
4824 	return error;
4825 }
4826 
4827 static errno_t
4828 bridge_offload_checksum(struct mbuf * * mp, ip_packet_info * info_p,
4829     struct ifbrmstats * stats_p)
4830 {
4831 	uint16_t *      csum_p;
4832 	errno_t         error = 0;
4833 	u_int           hlen;
4834 	struct mbuf *   m0 = *mp;
4835 	u_int           mac_hlen = sizeof(struct ether_header);
4836 	u_int           pkt_hdr_len;
4837 	struct tcphdr * tcp;
4838 	u_int           tcp_hlen;
4839 	struct udphdr * udp;
4840 
4841 	if (info_p->ip_is_ipv4) {
4842 		/* compute IP header checksum */
4843 		info_p->ip_hdr.ip->ip_sum = 0;
4844 		info_p->ip_hdr.ip->ip_sum = inet_cksum(m0, 0, mac_hlen,
4845 		    info_p->ip_hlen);
4846 		stats_p->brms_in_computed_cksum.brcs_ip_checksum++;
4847 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4848 		    "IPv4 checksum 0x%x",
4849 		    ntohs(info_p->ip_hdr.ip->ip_sum));
4850 	}
4851 	if (info_p->ip_is_fragmented) {
4852 		/* can't compute checksum on fragmented packets */
4853 		goto done;
4854 	}
4855 	pkt_hdr_len = m0->m_pkthdr.len;
4856 	switch (info_p->ip_proto) {
4857 	case IPPROTO_TCP:
4858 		hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len
4859 		    + sizeof(struct tcphdr);
4860 		if (m0->m_len < hlen) {
4861 			*mp = m0 = m_pullup(m0, hlen);
4862 			if (m0 == NULL) {
4863 				stats_p->brms_in_ip.bips_bad_tcp++;
4864 				error = _EBADTCP;
4865 				goto done;
4866 			}
4867 		}
4868 		tcp = (struct tcphdr *)(void *)
4869 		    ((caddr_t)info_p->ip_hdr.ptr + info_p->ip_hlen
4870 		    + info_p->ip_opt_len);
4871 		tcp_hlen = tcp->th_off << 2;
4872 		hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len + tcp_hlen;
4873 		if (hlen > pkt_hdr_len) {
4874 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4875 			    "bad tcp header length %u",
4876 			    tcp_hlen);
4877 			stats_p->brms_in_ip.bips_bad_tcp++;
4878 			error = _EBADTCP;
4879 			goto done;
4880 		}
4881 		csum_p = &tcp->th_sum;
4882 		stats_p->brms_in_ip.bips_tcp++;
4883 		break;
4884 	case IPPROTO_UDP:
4885 		hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len + sizeof(*udp);
4886 		if (m0->m_len < hlen) {
4887 			*mp = m0 = m_pullup(m0, hlen);
4888 			if (m0 == NULL) {
4889 				stats_p->brms_in_ip.bips_bad_udp++;
4890 				error = ENOBUFS;
4891 				goto done;
4892 			}
4893 		}
4894 		udp = (struct udphdr *)(void *)
4895 		    ((caddr_t)info_p->ip_hdr.ptr + info_p->ip_hlen
4896 		    + info_p->ip_opt_len);
4897 		csum_p = &udp->uh_sum;
4898 		stats_p->brms_in_ip.bips_udp++;
4899 		break;
4900 	default:
4901 		/* not TCP or UDP */
4902 		goto done;
4903 	}
4904 	*csum_p = 0;
4905 	m0->m_data += mac_hlen;
4906 	m0->m_len -= mac_hlen;
4907 	m0->m_pkthdr.len -= mac_hlen;
4908 	if (info_p->ip_is_ipv4) {
4909 		*csum_p = inet_cksum(m0, info_p->ip_proto, info_p->ip_hlen,
4910 		    info_p->ip_pay_len);
4911 	} else {
4912 		*csum_p = inet6_cksum(m0, info_p->ip_proto,
4913 		    info_p->ip_hlen + info_p->ip_opt_len,
4914 		    info_p->ip_pay_len - info_p->ip_opt_len);
4915 	}
4916 	if (info_p->ip_proto == IPPROTO_UDP && *csum_p == 0) {
4917 		/* RFC 1122 4.1.3.4 */
4918 		*csum_p = 0xffff;
4919 	}
4920 	m0->m_data -= mac_hlen;
4921 	m0->m_len += mac_hlen;
4922 	m0->m_pkthdr.len += mac_hlen;
4923 	proto_csum_stats_increment(info_p->ip_proto,
4924 	    &stats_p->brms_in_computed_cksum);
4925 
4926 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4927 	    "IPv%c %s set checksum 0x%x",
4928 	    info_p->ip_is_ipv4 ? '4' : '6',
4929 	    (info_p->ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
4930 	    ntohs(*csum_p));
4931 done:
4932 	return error;
4933 }
4934 
4935 static errno_t
4936 bridge_send(struct ifnet *src_ifp,
4937     struct ifnet *dst_ifp, struct mbuf *m, ChecksumOperation cksum_op)
4938 {
4939 	switch (cksum_op) {
4940 	case CHECKSUM_OPERATION_CLEAR_OFFLOAD:
4941 		m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
4942 		break;
4943 	case CHECKSUM_OPERATION_FINALIZE:
4944 		/* the checksum might not be correct, finalize now */
4945 		bridge_finalize_cksum(dst_ifp, m);
4946 		break;
4947 	case CHECKSUM_OPERATION_COMPUTE:
4948 		bridge_compute_cksum(src_ifp, dst_ifp, m);
4949 		break;
4950 	default:
4951 		break;
4952 	}
4953 #if HAS_IF_CAP
4954 	/*
4955 	 * If underlying interface can not do VLAN tag insertion itself
4956 	 * then attach a packet tag that holds it.
4957 	 */
4958 	if ((m->m_flags & M_VLANTAG) &&
4959 	    (dst_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) {
4960 		m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
4961 		if (m == NULL) {
4962 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4963 			    "%s: unable to prepend VLAN header",
4964 			    dst_ifp->if_xname);
4965 			(void) ifnet_stat_increment_out(dst_ifp,
4966 			    0, 0, 1);
4967 			return 0;
4968 		}
4969 		m->m_flags &= ~M_VLANTAG;
4970 	}
4971 #endif /* HAS_IF_CAP */
4972 	return bridge_transmit(dst_ifp, m);
4973 }
4974 
4975 static errno_t
4976 bridge_send_tso(struct ifnet *dst_ifp, struct mbuf *m, bool is_ipv4)
4977 {
4978 	errno_t                 error;
4979 	u_int                   mac_hlen;
4980 
4981 	mac_hlen = sizeof(struct ether_header);
4982 
4983 #if HAS_IF_CAP
4984 	/*
4985 	 * If underlying interface can not do VLAN tag insertion itself
4986 	 * then attach a packet tag that holds it.
4987 	 */
4988 	if ((m->m_flags & M_VLANTAG) &&
4989 	    (dst_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) {
4990 		m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
4991 		if (m == NULL) {
4992 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4993 			    "%s: unable to prepend VLAN header",
4994 			    dst_ifp->if_xname);
4995 			(void) ifnet_stat_increment_out(dst_ifp,
4996 			    0, 0, 1);
4997 			error = ENOBUFS;
4998 			goto done;
4999 		}
5000 		m->m_flags &= ~M_VLANTAG;
5001 		mac_hlen += ETHER_VLAN_ENCAP_LEN;
5002 	}
5003 #endif /* HAS_IF_CAP */
5004 	error = gso_tcp(dst_ifp, &m, mac_hlen, is_ipv4, TRUE);
5005 	return error;
5006 }
5007 
5008 static uint32_t
5009 get_if_tso_mtu(struct ifnet * ifp, bool is_ipv4)
5010 {
5011 	uint32_t tso_mtu;
5012 
5013 	tso_mtu = is_ipv4 ? ifp->if_tso_v4_mtu : ifp->if_tso_v6_mtu;
5014 	if (tso_mtu == 0) {
5015 		tso_mtu = IP_MAXPACKET;
5016 	}
5017 
5018 #if DEBUG || DEVELOPMENT
5019 #define REDUCED_TSO_MTU         (16 * 1024)
5020 	if (if_bridge_reduce_tso_mtu != 0 && tso_mtu > REDUCED_TSO_MTU) {
5021 		tso_mtu = REDUCED_TSO_MTU;
5022 	}
5023 #endif /* DEBUG || DEVELOPMENT */
5024 	return tso_mtu;
5025 }
5026 
5027 /*
5028  * tso_hwassist:
5029  * - determine whether the destination interface supports TSO offload
5030  * - if the packet is already marked for offload and the hardware supports
5031  *   it, just allow the packet to continue on
5032  * - if not, parse the packet headers to verify that this is a large TCP
5033  *   packet requiring segmentation; if the hardware doesn't support it
5034  *   set need_sw_tso; otherwise, mark the packet for TSO offload
5035  */
5036 static int
5037 tso_hwassist(struct mbuf **mp, bool is_ipv4, struct ifnet * ifp, u_int mac_hlen,
5038     bool * need_sw_tso, bool * is_large_tcp)
5039 {
5040 	int             error = 0;
5041 	u_int32_t       if_csum;
5042 	u_int32_t       if_tso;
5043 	u_int32_t       mbuf_tso;
5044 	bool            supports_cksum = false;
5045 
5046 	*need_sw_tso = false;
5047 	*is_large_tcp = false;
5048 	if (is_ipv4) {
5049 		/*
5050 		 * Enable both TCP and IP offload if the hardware supports it.
5051 		 * If the hardware doesn't support TCP offload, supports_cksum
5052 		 * will be false so we won't set either offload.
5053 		 */
5054 		if_csum = ifp->if_hwassist & (CSUM_TCP | CSUM_IP);
5055 		supports_cksum = (if_csum & CSUM_TCP) != 0;
5056 		if_tso = IFNET_TSO_IPV4;
5057 		mbuf_tso = CSUM_TSO_IPV4;
5058 	} else {
5059 		supports_cksum = (ifp->if_hwassist & CSUM_TCPIPV6) != 0;
5060 		if_csum = CSUM_TCPIPV6;
5061 		if_tso = IFNET_TSO_IPV6;
5062 		mbuf_tso = CSUM_TSO_IPV6;
5063 	}
5064 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5065 	    "%s: does%s support checksum 0x%x if_csum 0x%x",
5066 	    ifp->if_xname, supports_cksum ? "" : " not",
5067 	    ifp->if_hwassist, if_csum);
5068 	if ((ifp->if_hwassist & if_tso) != 0 &&
5069 	    ((*mp)->m_pkthdr.csum_flags & mbuf_tso) != 0) {
5070 		/* hardware TSO, mbuf already marked */
5071 	} else {
5072 		/* verify that this is a large TCP frame */
5073 		uint32_t                csum_flags;
5074 		ip_packet_info          info;
5075 		int                     mss;
5076 		uint32_t                pkt_mtu;
5077 		struct bripstats        stats;
5078 		struct tcphdr *         tcp;
5079 		uint32_t                tso_mtu;
5080 
5081 		error = bridge_get_tcp_header(mp, mac_hlen, is_ipv4,
5082 		    &info, &stats);
5083 		if (error != 0) {
5084 			/* bad packet */
5085 			goto done;
5086 		}
5087 		if (info.ip_proto_hdr == NULL) {
5088 			/* not a TCP packet */
5089 			goto done;
5090 		}
5091 		pkt_mtu = info.ip_hlen + info.ip_pay_len + info.ip_opt_len;
5092 		if (pkt_mtu <= ifp->if_mtu) {
5093 			/* not actually a large packet */
5094 			goto done;
5095 		}
5096 		if ((ifp->if_hwassist & if_tso) == 0) {
5097 			/* hardware does not support TSO, enable sw tso */
5098 			*need_sw_tso = if_bridge_segmentation != 0;
5099 			goto done;
5100 		}
5101 		tso_mtu = get_if_tso_mtu(ifp, is_ipv4);
5102 		if (pkt_mtu > tso_mtu) {
5103 			/* hardware can't segment this, enable sw tso */
5104 			*need_sw_tso = if_bridge_segmentation != 0;
5105 			goto done;
5106 		}
5107 
5108 		/* use hardware TSO */
5109 		(*mp)->m_pkthdr.pkt_proto = IPPROTO_TCP;
5110 		tcp = (struct tcphdr *)info.ip_proto_hdr;
5111 		mss = ifp->if_mtu - info.ip_hlen - info.ip_opt_len
5112 		    - (tcp->th_off << 2) - if_bridge_tso_reduce_mss_tx;
5113 		assert(mss > 0);
5114 		csum_flags = mbuf_tso;
5115 		if (supports_cksum) {
5116 			csum_flags |= if_csum;
5117 		}
5118 		(*mp)->m_pkthdr.tso_segsz = mss;
5119 		(*mp)->m_pkthdr.csum_flags |= csum_flags;
5120 		(*mp)->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
5121 		*is_large_tcp = true;
5122 	}
5123 done:
5124 	return error;
5125 }
5126 
5127 /*
5128  * bridge_enqueue:
5129  *
5130  *	Enqueue a packet on a bridge member interface.
5131  *
5132  */
5133 static errno_t
5134 bridge_enqueue(ifnet_t bridge_ifp, struct ifnet *src_ifp,
5135     struct ifnet *dst_ifp, struct mbuf *m, ChecksumOperation cksum_op)
5136 {
5137 	errno_t         error = 0;
5138 	int             len;
5139 
5140 	VERIFY(dst_ifp != NULL);
5141 
5142 	/*
5143 	 * We may be sending a fragment so traverse the mbuf
5144 	 *
5145 	 * NOTE: bridge_fragment() is called only when PFIL_HOOKS is enabled.
5146 	 */
5147 	for (struct mbuf *next_m = NULL; m != NULL; m = next_m) {
5148 		bool            need_sw_tso = false;
5149 		bool            is_ipv4 = false;
5150 		bool            is_large_pkt;
5151 		errno_t         _error = 0;
5152 
5153 		len = m->m_pkthdr.len;
5154 		m->m_flags |= M_PROTO1; /* set to avoid loops */
5155 		next_m = m->m_nextpkt;
5156 		m->m_nextpkt = NULL;
5157 		/*
5158 		 * Need to segment the packet if it is a large frame
5159 		 * and the destination interface does not support TSO.
5160 		 *
5161 		 * Note that with trailers, it's possible for a packet to
5162 		 * be large but not actually require segmentation.
5163 		 */
5164 		is_large_pkt = (len > (bridge_ifp->if_mtu + ETHER_HDR_LEN));
5165 		if (is_large_pkt) {
5166 			struct ether_header     *eh;
5167 			bool                    is_large_tcp = false;
5168 
5169 			eh = mtod(m, struct ether_header *);
5170 			if (ether_header_type_is_ip(eh, &is_ipv4)) {
5171 				_error = tso_hwassist(&m, is_ipv4,
5172 				    dst_ifp, sizeof(struct ether_header),
5173 				    &need_sw_tso, &is_large_tcp);
5174 				if (is_large_tcp) {
5175 					cksum_op = CHECKSUM_OPERATION_NONE;
5176 				}
5177 			} else {
5178 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5179 				    "large non IP packet");
5180 			}
5181 		}
5182 		if (_error != 0) {
5183 			if (m != NULL) {
5184 				m_freem(m);
5185 			}
5186 		} else if (need_sw_tso) {
5187 			_error = bridge_send_tso(dst_ifp, m, is_ipv4);
5188 		} else {
5189 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5190 			    "%s bridge_send(%s) len %d op %d",
5191 			    bridge_ifp->if_xname,
5192 			    dst_ifp->if_xname,
5193 			    len, cksum_op);
5194 			_error = bridge_send(src_ifp, dst_ifp, m, cksum_op);
5195 		}
5196 
5197 		/* Preserve first error value */
5198 		if (error == 0 && _error != 0) {
5199 			error = _error;
5200 		}
5201 		if (_error == 0) {
5202 			(void) ifnet_stat_increment_out(bridge_ifp, 1, len, 0);
5203 		} else {
5204 			(void) ifnet_stat_increment_out(bridge_ifp, 0, 0, 1);
5205 		}
5206 	}
5207 
5208 	return error;
5209 }
5210 
5211 #if HAS_BRIDGE_DUMMYNET
5212 /*
5213  * bridge_dummynet:
5214  *
5215  *	Receive a queued packet from dummynet and pass it on to the output
5216  *	interface.
5217  *
5218  *	The mbuf has the Ethernet header already attached.
5219  */
5220 static void
5221 bridge_dummynet(struct mbuf *m, struct ifnet *ifp)
5222 {
5223 	struct bridge_softc *sc;
5224 
5225 	sc = ifp->if_bridge;
5226 
5227 	/*
5228 	 * The packet didn't originate from a member interface. This should only
5229 	 * ever happen if a member interface is removed while packets are
5230 	 * queued for it.
5231 	 */
5232 	if (sc == NULL) {
5233 		m_freem(m);
5234 		return;
5235 	}
5236 
5237 	if (PFIL_HOOKED(&inet_pfil_hook) || PFIL_HOOKED_INET6) {
5238 		if (bridge_pfil(&m, sc->sc_ifp, ifp, PFIL_OUT) != 0) {
5239 			return;
5240 		}
5241 		if (m == NULL) {
5242 			return;
5243 		}
5244 	}
5245 	(void) bridge_enqueue(sc->sc_ifp, NULL, ifp, m, CHECKSUM_OPERATION_NONE);
5246 }
5247 
5248 #endif /* HAS_BRIDGE_DUMMYNET */
5249 
5250 /*
5251  * bridge_member_output:
5252  *
5253  *	Send output from a bridge member interface.  This
5254  *	performs the bridging function for locally originated
5255  *	packets.
5256  *
5257  *	The mbuf has the Ethernet header already attached.
5258  */
5259 static errno_t
5260 bridge_member_output(struct bridge_softc *sc, ifnet_t ifp, mbuf_t *data)
5261 {
5262 	ifnet_t bridge_ifp;
5263 	struct ether_header *eh;
5264 	struct ifnet *dst_if;
5265 	uint16_t vlan;
5266 	struct bridge_iflist *mac_nat_bif;
5267 	ifnet_t mac_nat_ifp;
5268 	mbuf_t m = *data;
5269 
5270 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
5271 	    "ifp %s", ifp->if_xname);
5272 	if (m->m_len < ETHER_HDR_LEN) {
5273 		m = m_pullup(m, ETHER_HDR_LEN);
5274 		if (m == NULL) {
5275 			*data = NULL;
5276 			return EJUSTRETURN;
5277 		}
5278 	}
5279 
5280 	eh = mtod(m, struct ether_header *);
5281 	vlan = VLANTAGOF(m);
5282 
5283 	BRIDGE_LOCK(sc);
5284 	mac_nat_bif = sc->sc_mac_nat_bif;
5285 	mac_nat_ifp = (mac_nat_bif != NULL) ? mac_nat_bif->bif_ifp : NULL;
5286 	if (mac_nat_ifp == ifp) {
5287 		/* record the IP address used by the MAC NAT interface */
5288 		(void)bridge_mac_nat_output(sc, mac_nat_bif, data, NULL);
5289 		m = *data;
5290 		if (m == NULL) {
5291 			/* packet was deallocated */
5292 			BRIDGE_UNLOCK(sc);
5293 			return EJUSTRETURN;
5294 		}
5295 	}
5296 	bridge_ifp = sc->sc_ifp;
5297 
5298 	/*
5299 	 * APPLE MODIFICATION
5300 	 * If the packet is an 802.1X ethertype, then only send on the
5301 	 * original output interface.
5302 	 */
5303 	if (eh->ether_type == htons(ETHERTYPE_PAE)) {
5304 		dst_if = ifp;
5305 		goto sendunicast;
5306 	}
5307 
5308 	/*
5309 	 * If bridge is down, but the original output interface is up,
5310 	 * go ahead and send out that interface.  Otherwise, the packet
5311 	 * is dropped below.
5312 	 */
5313 	if ((bridge_ifp->if_flags & IFF_RUNNING) == 0) {
5314 		dst_if = ifp;
5315 		goto sendunicast;
5316 	}
5317 
5318 	/*
5319 	 * If the packet is a multicast, or we don't know a better way to
5320 	 * get there, send to all interfaces.
5321 	 */
5322 	if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
5323 		dst_if = NULL;
5324 	} else {
5325 		dst_if = bridge_rtlookup(sc, eh->ether_dhost, vlan);
5326 	}
5327 	if (dst_if == NULL) {
5328 		struct bridge_iflist *bif;
5329 		struct mbuf *mc;
5330 		errno_t error;
5331 
5332 
5333 		bridge_span(sc, m);
5334 
5335 		BRIDGE_LOCK2REF(sc, error);
5336 		if (error != 0) {
5337 			m_freem(m);
5338 			return EJUSTRETURN;
5339 		}
5340 
5341 		/*
5342 		 * Duplicate and send the packet across all member interfaces
5343 		 * except the originating interface.
5344 		 */
5345 		TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
5346 			dst_if = bif->bif_ifp;
5347 			if (dst_if == ifp) {
5348 				/* skip the originating interface */
5349 				continue;
5350 			}
5351 			/* skip interface with inactive link status */
5352 			if ((bif->bif_flags & BIFF_MEDIA_ACTIVE) == 0) {
5353 				continue;
5354 			}
5355 #if 0
5356 			if (dst_if->if_type == IFT_GIF) {
5357 				continue;
5358 			}
5359 #endif
5360 			/* skip interface that isn't running */
5361 			if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5362 				continue;
5363 			}
5364 			/*
5365 			 * If the interface is participating in spanning
5366 			 * tree, make sure the port is in a state that
5367 			 * allows forwarding.
5368 			 */
5369 			if ((bif->bif_ifflags & IFBIF_STP) &&
5370 			    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5371 				continue;
5372 			}
5373 			/*
5374 			 * If the destination is the MAC NAT interface,
5375 			 * skip sending the packet. The packet can't be sent
5376 			 * if the source MAC is incorrect.
5377 			 */
5378 			if (dst_if == mac_nat_ifp) {
5379 				continue;
5380 			}
5381 
5382 			/* make a deep copy to send on this member interface */
5383 			mc = m_dup(m, M_DONTWAIT);
5384 			if (mc == NULL) {
5385 				(void)ifnet_stat_increment_out(bridge_ifp,
5386 				    0, 0, 1);
5387 				continue;
5388 			}
5389 			(void)bridge_enqueue(bridge_ifp, ifp, dst_if,
5390 			    mc, CHECKSUM_OPERATION_COMPUTE);
5391 		}
5392 		BRIDGE_UNREF(sc);
5393 
5394 		if ((ifp->if_flags & IFF_RUNNING) == 0) {
5395 			m_freem(m);
5396 			return EJUSTRETURN;
5397 		}
5398 		/* allow packet to continue on the originating interface */
5399 		return 0;
5400 	}
5401 
5402 sendunicast:
5403 	/*
5404 	 * XXX Spanning tree consideration here?
5405 	 */
5406 
5407 	bridge_span(sc, m);
5408 	if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5409 		m_freem(m);
5410 		BRIDGE_UNLOCK(sc);
5411 		return EJUSTRETURN;
5412 	}
5413 
5414 	BRIDGE_UNLOCK(sc);
5415 	if (dst_if == ifp) {
5416 		/* allow packet to continue on the originating interface */
5417 		return 0;
5418 	}
5419 	if (dst_if != mac_nat_ifp) {
5420 		(void) bridge_enqueue(bridge_ifp, ifp, dst_if, m,
5421 		    CHECKSUM_OPERATION_COMPUTE);
5422 	} else {
5423 		/*
5424 		 * This is not the original output interface
5425 		 * and the destination is the MAC NAT interface.
5426 		 * Drop the packet because the packet can't be sent
5427 		 * if the source MAC is incorrect.
5428 		 */
5429 		m_freem(m);
5430 	}
5431 	return EJUSTRETURN;
5432 }
5433 
5434 /*
5435  * Output callback.
5436  *
5437  * This routine is called externally from above only when if_bridge_txstart
5438  * is disabled; otherwise it is called internally by bridge_start().
5439  */
5440 static int
5441 bridge_output(struct ifnet *ifp, struct mbuf *m)
5442 {
5443 	struct bridge_softc *sc = ifnet_softc(ifp);
5444 	struct ether_header *eh;
5445 	struct ifnet *dst_if = NULL;
5446 	int error = 0;
5447 
5448 	eh = mtod(m, struct ether_header *);
5449 
5450 	BRIDGE_LOCK(sc);
5451 
5452 	if (!(m->m_flags & (M_BCAST | M_MCAST))) {
5453 		dst_if = bridge_rtlookup(sc, eh->ether_dhost, 0);
5454 	}
5455 
5456 	(void) ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0);
5457 
5458 #if NBPFILTER > 0
5459 	if (sc->sc_bpf_output) {
5460 		bridge_bpf_output(ifp, m);
5461 	}
5462 #endif
5463 
5464 	if (dst_if == NULL) {
5465 		/* callee will unlock */
5466 		bridge_broadcast(sc, NULL, m, 0);
5467 	} else {
5468 		ifnet_t bridge_ifp;
5469 
5470 		bridge_ifp = sc->sc_ifp;
5471 		BRIDGE_UNLOCK(sc);
5472 
5473 		error = bridge_enqueue(bridge_ifp, NULL, dst_if, m,
5474 		    CHECKSUM_OPERATION_FINALIZE);
5475 	}
5476 
5477 	return error;
5478 }
5479 
5480 static void
5481 bridge_finalize_cksum(struct ifnet *ifp, struct mbuf *m)
5482 {
5483 	struct ether_header *eh;
5484 	bool is_ipv4;
5485 	uint32_t sw_csum, hwcap;
5486 	uint32_t did_sw;
5487 	uint32_t csum_flags;
5488 
5489 	eh = mtod(m, struct ether_header *);
5490 	if (!ether_header_type_is_ip(eh, &is_ipv4)) {
5491 		return;
5492 	}
5493 
5494 	/* do in software what the hardware cannot */
5495 	hwcap = (ifp->if_hwassist | CSUM_DATA_VALID);
5496 	csum_flags = m->m_pkthdr.csum_flags;
5497 	sw_csum = csum_flags & ~IF_HWASSIST_CSUM_FLAGS(hwcap);
5498 	sw_csum &= IF_HWASSIST_CSUM_MASK;
5499 
5500 	if (is_ipv4) {
5501 		if ((hwcap & CSUM_PARTIAL) && !(sw_csum & CSUM_DELAY_DATA) &&
5502 		    (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) {
5503 			if (m->m_pkthdr.csum_flags & CSUM_TCP) {
5504 				uint16_t start =
5505 				    sizeof(*eh) + sizeof(struct ip);
5506 				uint16_t ulpoff =
5507 				    m->m_pkthdr.csum_data & 0xffff;
5508 				m->m_pkthdr.csum_flags |=
5509 				    (CSUM_DATA_VALID | CSUM_PARTIAL);
5510 				m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
5511 				m->m_pkthdr.csum_tx_start = start;
5512 			} else {
5513 				sw_csum |= (CSUM_DELAY_DATA &
5514 				    m->m_pkthdr.csum_flags);
5515 			}
5516 		}
5517 		did_sw = in_finalize_cksum(m, sizeof(*eh), sw_csum);
5518 	} else {
5519 		if ((hwcap & CSUM_PARTIAL) &&
5520 		    !(sw_csum & CSUM_DELAY_IPV6_DATA) &&
5521 		    (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA)) {
5522 			if (m->m_pkthdr.csum_flags & CSUM_TCPIPV6) {
5523 				uint16_t start =
5524 				    sizeof(*eh) + sizeof(struct ip6_hdr);
5525 				uint16_t ulpoff =
5526 				    m->m_pkthdr.csum_data & 0xffff;
5527 				m->m_pkthdr.csum_flags |=
5528 				    (CSUM_DATA_VALID | CSUM_PARTIAL);
5529 				m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
5530 				m->m_pkthdr.csum_tx_start = start;
5531 			} else {
5532 				sw_csum |= (CSUM_DELAY_IPV6_DATA &
5533 				    m->m_pkthdr.csum_flags);
5534 			}
5535 		}
5536 		did_sw = in6_finalize_cksum(m, sizeof(*eh), -1, -1, sw_csum);
5537 	}
5538 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5539 	    "[%s] before 0x%x hwcap 0x%x sw_csum 0x%x did 0x%x after 0x%x",
5540 	    ifp->if_xname, csum_flags, hwcap, sw_csum,
5541 	    did_sw, m->m_pkthdr.csum_flags);
5542 }
5543 
5544 /*
5545  * bridge_start:
5546  *
5547  *	Start output on a bridge.
5548  *
5549  * This routine is invoked by the start worker thread; because we never call
5550  * it directly, there is no need do deploy any serialization mechanism other
5551  * than what's already used by the worker thread, i.e. this is already single
5552  * threaded.
5553  *
5554  * This routine is called only when if_bridge_txstart is enabled.
5555  */
5556 static void
5557 bridge_start(struct ifnet *ifp)
5558 {
5559 	struct mbuf *m;
5560 
5561 	for (;;) {
5562 		if (ifnet_dequeue(ifp, &m) != 0) {
5563 			break;
5564 		}
5565 
5566 		(void) bridge_output(ifp, m);
5567 	}
5568 }
5569 
5570 /*
5571  * bridge_forward:
5572  *
5573  *	The forwarding function of the bridge.
5574  *
5575  *	NOTE: Releases the lock on return.
5576  */
5577 static void
5578 bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif,
5579     struct mbuf *m)
5580 {
5581 	struct bridge_iflist *dbif;
5582 	ifnet_t bridge_ifp;
5583 	struct ifnet *src_if, *dst_if;
5584 	struct ether_header *eh;
5585 	uint16_t vlan;
5586 	uint8_t *dst;
5587 	int error;
5588 	struct mac_nat_record mnr;
5589 	bool translate_mac = FALSE;
5590 	uint32_t sc_filter_flags = 0;
5591 
5592 	BRIDGE_LOCK_ASSERT_HELD(sc);
5593 
5594 	bridge_ifp = sc->sc_ifp;
5595 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
5596 	    "%s m 0x%llx", bridge_ifp->if_xname,
5597 	    (uint64_t)VM_KERNEL_ADDRPERM(m));
5598 
5599 	src_if = m->m_pkthdr.rcvif;
5600 	if (src_if != sbif->bif_ifp) {
5601 		const char *    src_if_name;
5602 
5603 		src_if_name = (src_if != NULL) ? src_if->if_xname : "?";
5604 		BRIDGE_LOG(LOG_NOTICE, 0,
5605 		    "src_if %s != bif_ifp %s",
5606 		    src_if_name, sbif->bif_ifp->if_xname);
5607 		goto drop;
5608 	}
5609 
5610 	(void) ifnet_stat_increment_in(bridge_ifp, 1, m->m_pkthdr.len, 0);
5611 	vlan = VLANTAGOF(m);
5612 
5613 
5614 	if ((sbif->bif_ifflags & IFBIF_STP) &&
5615 	    sbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5616 		goto drop;
5617 	}
5618 
5619 	eh = mtod(m, struct ether_header *);
5620 	dst = eh->ether_dhost;
5621 
5622 	/* If the interface is learning, record the address. */
5623 	if (sbif->bif_ifflags & IFBIF_LEARNING) {
5624 		error = bridge_rtupdate(sc, eh->ether_shost, vlan,
5625 		    sbif, 0, IFBAF_DYNAMIC);
5626 		/*
5627 		 * If the interface has addresses limits then deny any source
5628 		 * that is not in the cache.
5629 		 */
5630 		if (error && sbif->bif_addrmax) {
5631 			goto drop;
5632 		}
5633 	}
5634 
5635 	if ((sbif->bif_ifflags & IFBIF_STP) != 0 &&
5636 	    sbif->bif_stp.bp_state == BSTP_IFSTATE_LEARNING) {
5637 		goto drop;
5638 	}
5639 
5640 	/*
5641 	 * At this point, the port either doesn't participate
5642 	 * in spanning tree or it is in the forwarding state.
5643 	 */
5644 
5645 	/*
5646 	 * If the packet is unicast, destined for someone on
5647 	 * "this" side of the bridge, drop it.
5648 	 */
5649 	if ((m->m_flags & (M_BCAST | M_MCAST)) == 0) {
5650 		/* unicast */
5651 		dst_if = bridge_rtlookup(sc, dst, vlan);
5652 		if (src_if == dst_if) {
5653 			goto drop;
5654 		}
5655 	} else {
5656 		/* broadcast/multicast */
5657 
5658 		/*
5659 		 * Check if its a reserved multicast address, any address
5660 		 * listed in 802.1D section 7.12.6 may not be forwarded by the
5661 		 * bridge.
5662 		 * This is currently 01-80-C2-00-00-00 to 01-80-C2-00-00-0F
5663 		 */
5664 		if (dst[0] == 0x01 && dst[1] == 0x80 &&
5665 		    dst[2] == 0xc2 && dst[3] == 0x00 &&
5666 		    dst[4] == 0x00 && dst[5] <= 0x0f) {
5667 			goto drop;
5668 		}
5669 
5670 
5671 		/* ...forward it to all interfaces. */
5672 		os_atomic_inc(&bridge_ifp->if_imcasts, relaxed);
5673 		dst_if = NULL;
5674 	}
5675 
5676 	/*
5677 	 * If we have a destination interface which is a member of our bridge,
5678 	 * OR this is a unicast packet, push it through the bpf(4) machinery.
5679 	 * For broadcast or multicast packets, don't bother because it will
5680 	 * be reinjected into ether_input. We do this before we pass the packets
5681 	 * through the pfil(9) framework, as it is possible that pfil(9) will
5682 	 * drop the packet, or possibly modify it, making it difficult to debug
5683 	 * firewall issues on the bridge.
5684 	 */
5685 #if NBPFILTER > 0
5686 	if (eh->ether_type == htons(ETHERTYPE_RSN_PREAUTH) ||
5687 	    dst_if != NULL || (m->m_flags & (M_BCAST | M_MCAST)) == 0) {
5688 		m->m_pkthdr.rcvif = bridge_ifp;
5689 		BRIDGE_BPF_MTAP_INPUT(sc, m);
5690 	}
5691 #endif /* NBPFILTER */
5692 
5693 	if (dst_if == NULL) {
5694 		/* bridge_broadcast will unlock */
5695 		bridge_broadcast(sc, sbif, m, 1);
5696 		return;
5697 	}
5698 
5699 	/*
5700 	 * Unicast.
5701 	 */
5702 	/*
5703 	 * At this point, we're dealing with a unicast frame
5704 	 * going to a different interface.
5705 	 */
5706 	if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5707 		goto drop;
5708 	}
5709 
5710 	dbif = bridge_lookup_member_if(sc, dst_if);
5711 	if (dbif == NULL) {
5712 		/* Not a member of the bridge (anymore?) */
5713 		goto drop;
5714 	}
5715 
5716 	/* Private segments can not talk to each other */
5717 	if (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE) {
5718 		goto drop;
5719 	}
5720 
5721 	if ((dbif->bif_ifflags & IFBIF_STP) &&
5722 	    dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5723 		goto drop;
5724 	}
5725 
5726 #if HAS_DHCPRA_MASK
5727 	/* APPLE MODIFICATION <rdar:6985737> */
5728 	if ((dst_if->if_extflags & IFEXTF_DHCPRA_MASK) != 0) {
5729 		m = ip_xdhcpra_output(dst_if, m);
5730 		if (!m) {
5731 			++bridge_ifp.if_xdhcpra;
5732 			BRIDGE_UNLOCK(sc);
5733 			return;
5734 		}
5735 	}
5736 #endif /* HAS_DHCPRA_MASK */
5737 
5738 	if (dbif == sc->sc_mac_nat_bif) {
5739 		/* determine how to translate the packet */
5740 		translate_mac
5741 		        = bridge_mac_nat_output(sc, sbif, &m, &mnr);
5742 		if (m == NULL) {
5743 			/* packet was deallocated */
5744 			BRIDGE_UNLOCK(sc);
5745 			return;
5746 		}
5747 	} else if (bif_has_checksum_offload(dbif) &&
5748 	    !bif_has_checksum_offload(sbif)) {
5749 		/*
5750 		 * If the destination interface has checksum offload enabled,
5751 		 * verify the checksum now, unless the source interface also has
5752 		 * checksum offload enabled. The checksum in that case has
5753 		 * already just been computed and verifying it is unnecessary.
5754 		 */
5755 		error = bridge_verify_checksum(&m, &dbif->bif_stats);
5756 		if (error != 0) {
5757 			BRIDGE_UNLOCK(sc);
5758 			if (m != NULL) {
5759 				m_freem(m);
5760 			}
5761 			return;
5762 		}
5763 	}
5764 
5765 	sc_filter_flags = sc->sc_filter_flags;
5766 
5767 	BRIDGE_UNLOCK(sc);
5768 	if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
5769 		if (bridge_pf(&m, dst_if, sc_filter_flags, FALSE) != 0) {
5770 			return;
5771 		}
5772 		if (m == NULL) {
5773 			return;
5774 		}
5775 	}
5776 
5777 	/* if we need to, translate the MAC address */
5778 	if (translate_mac) {
5779 		bridge_mac_nat_translate(&m, &mnr, IF_LLADDR(dst_if));
5780 	}
5781 	/*
5782 	 * We're forwarding an inbound packet in which the checksum must
5783 	 * already have been computed and if required, verified.
5784 	 */
5785 	if (m != NULL) {
5786 		(void) bridge_enqueue(bridge_ifp, src_if, dst_if, m,
5787 		    CHECKSUM_OPERATION_CLEAR_OFFLOAD);
5788 	}
5789 	return;
5790 
5791 drop:
5792 	BRIDGE_UNLOCK(sc);
5793 	m_freem(m);
5794 }
5795 
5796 static void
5797 inject_input_packet(ifnet_t ifp, mbuf_t m)
5798 {
5799 	mbuf_pkthdr_setrcvif(m, ifp);
5800 	mbuf_pkthdr_setheader(m, mbuf_data(m));
5801 	mbuf_setdata(m, (char *)mbuf_data(m) + ETHER_HDR_LEN,
5802 	    mbuf_len(m) - ETHER_HDR_LEN);
5803 	mbuf_pkthdr_adjustlen(m, -ETHER_HDR_LEN);
5804 	m->m_flags |= M_PROTO1; /* set to avoid loops */
5805 	dlil_input_packet_list(ifp, m);
5806 	return;
5807 }
5808 
5809 static bool
5810 in_addr_is_ours(struct in_addr ip)
5811 {
5812 	struct in_ifaddr *ia;
5813 	bool             ours = false;
5814 
5815 	lck_rw_lock_shared(&in_ifaddr_rwlock);
5816 	TAILQ_FOREACH(ia, INADDR_HASH(ip.s_addr), ia_hash) {
5817 		if (IA_SIN(ia)->sin_addr.s_addr == ip.s_addr) {
5818 			ours = true;
5819 			break;
5820 		}
5821 	}
5822 	lck_rw_done(&in_ifaddr_rwlock);
5823 	return ours;
5824 }
5825 
5826 static bool
5827 in6_addr_is_ours(const struct in6_addr * ip6_p, uint32_t ifscope)
5828 {
5829 	struct in6_ifaddr       *ia6;
5830 	bool                    ours = false;
5831 
5832 	if (in6_embedded_scope && IN6_IS_ADDR_LINKLOCAL(ip6_p)) {
5833 		struct in6_addr         dst_ip;
5834 
5835 		/* need to embed scope ID for comparison */
5836 		bcopy(ip6_p, &dst_ip, sizeof(dst_ip));
5837 		dst_ip.s6_addr16[1] = htons(ifscope);
5838 		ip6_p = &dst_ip;
5839 	}
5840 	lck_rw_lock_shared(&in6_ifaddr_rwlock);
5841 	TAILQ_FOREACH(ia6, IN6ADDR_HASH(ip6_p), ia6_hash) {
5842 		if (in6_are_addr_equal_scoped(&ia6->ia_addr.sin6_addr, ip6_p,
5843 		    ia6->ia_addr.sin6_scope_id, ifscope)) {
5844 			ours = true;
5845 			break;
5846 		}
5847 	}
5848 	lck_rw_done(&in6_ifaddr_rwlock);
5849 	return ours;
5850 }
5851 
5852 static void
5853 bridge_interface_input(ifnet_t bridge_ifp, mbuf_t m,
5854     bpf_packet_func bpf_input_func)
5855 {
5856 	size_t                  byte_count;
5857 	struct ether_header     *eh;
5858 	errno_t                 error;
5859 	bool                    is_ipv4;
5860 	int                     len;
5861 	u_int                   mac_hlen;
5862 	int                     pkt_count;
5863 
5864 	/* segment large packets before sending them up */
5865 	if (if_bridge_segmentation == 0) {
5866 		goto done;
5867 	}
5868 	len = m->m_pkthdr.len;
5869 	if (len <= (bridge_ifp->if_mtu + ETHER_HDR_LEN)) {
5870 		goto done;
5871 	}
5872 	eh = mtod(m, struct ether_header *);
5873 	if (!ether_header_type_is_ip(eh, &is_ipv4)) {
5874 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5875 		    "large non IPv4/IPv6 packet");
5876 		goto done;
5877 	}
5878 
5879 	/*
5880 	 * We have a large IPv4/IPv6 TCP packet. Segment it if required.
5881 	 *
5882 	 * If gso_tcp() returns success (0), the packet(s) are
5883 	 * ready to be passed up. If the destination is a local IP address,
5884 	 * the packet will be passed up as a large, single packet.
5885 	 *
5886 	 * If gso_tcp() returns an error, the packet has already
5887 	 * been freed.
5888 	 */
5889 	mac_hlen = sizeof(*eh);
5890 	error = gso_tcp(bridge_ifp, &m, mac_hlen, is_ipv4, FALSE);
5891 	if (error != 0) {
5892 		return;
5893 	}
5894 
5895 done:
5896 	pkt_count = 0;
5897 	byte_count = 0;
5898 	for (mbuf_t scan = m; scan != NULL; scan = scan->m_nextpkt) {
5899 		/* Mark the packet as arriving on the bridge interface */
5900 		mbuf_pkthdr_setrcvif(scan, bridge_ifp);
5901 		mbuf_pkthdr_setheader(scan, mbuf_data(scan));
5902 		if (bpf_input_func != NULL) {
5903 			(*bpf_input_func)(bridge_ifp, scan);
5904 		}
5905 		mbuf_setdata(scan, (char *)mbuf_data(scan) + ETHER_HDR_LEN,
5906 		    mbuf_len(scan) - ETHER_HDR_LEN);
5907 		mbuf_pkthdr_adjustlen(scan, -ETHER_HDR_LEN);
5908 		byte_count += mbuf_pkthdr_len(scan);
5909 		pkt_count++;
5910 	}
5911 	(void)ifnet_stat_increment_in(bridge_ifp, pkt_count, byte_count, 0);
5912 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
5913 	    "%s %d packet(s) %ld bytes",
5914 	    bridge_ifp->if_xname, pkt_count, byte_count);
5915 	dlil_input_packet_list(bridge_ifp, m);
5916 	return;
5917 }
5918 
5919 static bool
5920 is_our_ip(ip_packet_info_t info_p, uint32_t ifscope)
5921 {
5922 	bool    ours;
5923 
5924 	if (info_p->ip_is_ipv4) {
5925 		struct in_addr  dst_ip;
5926 
5927 		bcopy(&info_p->ip_hdr.ip->ip_dst, &dst_ip, sizeof(dst_ip));
5928 		ours = in_addr_is_ours(dst_ip);
5929 	} else {
5930 		ours = in6_addr_is_ours(&info_p->ip_hdr.ip6->ip6_dst, ifscope);
5931 	}
5932 	return ours;
5933 }
5934 
5935 static inline errno_t
5936 bridge_vmnet_tag_input(ifnet_t bridge_ifp, ifnet_t ifp,
5937     const u_char * ether_dhost, mbuf_t *mp,
5938     bool is_broadcast, bool is_ip, bool is_ipv4,
5939     ip_packet_info * info_p, struct bripstats * stats_p,
5940     bool *info_initialized)
5941 {
5942 	errno_t         error = 0;
5943 	bool            is_local = false;
5944 	struct pf_mtag *pf_mtag;
5945 	u_int16_t       tag = vmnet_tag;
5946 
5947 	*info_initialized = false;
5948 	if (is_broadcast) {
5949 		if (_ether_cmp(ether_dhost, etherbroadcastaddr) == 0) {
5950 			tag = vmnet_broadcast_tag;
5951 		} else {
5952 			tag = vmnet_multicast_tag;
5953 		}
5954 	} else if (is_ip) {
5955 		unsigned int    mac_hlen = sizeof(struct ether_header);
5956 
5957 		bzero(stats_p, sizeof(*stats_p));
5958 		*info_initialized = true;
5959 		error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, info_p,
5960 		    stats_p);
5961 		if (error != 0) {
5962 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_INPUT,
5963 			    "%s(%s) bridge_get_ip_proto failed %d",
5964 			    bridge_ifp->if_xname,
5965 			    ifp->if_xname, error);
5966 			if (*mp == NULL) {
5967 				return EJUSTRETURN;
5968 			}
5969 		} else {
5970 			is_local = is_our_ip(info_p, bridge_ifp->if_index);
5971 			if (is_local) {
5972 				tag = vmnet_local_tag;
5973 			}
5974 		}
5975 	}
5976 	pf_mtag = pf_get_mtag(*mp);
5977 	if (pf_mtag != NULL) {
5978 		pf_mtag->pftag_tag = tag;
5979 	}
5980 #if DEBUG || DEVELOPMENT
5981 	{
5982 		bool forced;
5983 
5984 		BRIDGE_ERROR_GET_FORCED(forced, BRIDGE_FORCE_ONE);
5985 		if (forced) {
5986 			m_freem(*mp);
5987 			*mp = NULL;
5988 			error = EJUSTRETURN;
5989 			goto done;
5990 		}
5991 		BRIDGE_ERROR_GET_FORCED(forced, BRIDGE_FORCE_TWO);
5992 		if (forced) {
5993 			error = _EBADIP;
5994 			goto done;
5995 		}
5996 	}
5997 done:
5998 #endif /* DEBUG || DEVELOPMENT */
5999 	return error;
6000 }
6001 
6002 static void
6003 bripstats_apply(struct bripstats *dst_p, const struct bripstats *src_p)
6004 {
6005 	dst_p->bips_ip += src_p->bips_ip;
6006 	dst_p->bips_ip6 += src_p->bips_ip6;
6007 	dst_p->bips_udp += src_p->bips_udp;
6008 	dst_p->bips_tcp += src_p->bips_tcp;
6009 
6010 	dst_p->bips_bad_ip += src_p->bips_bad_ip;
6011 	dst_p->bips_bad_ip6 += src_p->bips_bad_ip6;
6012 	dst_p->bips_bad_udp += src_p->bips_bad_udp;
6013 	dst_p->bips_bad_tcp += src_p->bips_bad_tcp;
6014 }
6015 
6016 static void
6017 bridge_bripstats_apply(ifnet_t ifp, const struct bripstats *stats_p)
6018 {
6019 	struct bridge_iflist *bif;
6020 	struct bridge_softc *sc = ifp->if_bridge;
6021 
6022 	BRIDGE_LOCK(sc);
6023 	bif = bridge_lookup_member_if(sc, ifp);
6024 	if (bif == NULL) {
6025 		goto done;
6026 	}
6027 	if (!bif_has_checksum_offload(bif)) {
6028 		goto done;
6029 	}
6030 	bripstats_apply(&bif->bif_stats.brms_in_ip, stats_p);
6031 
6032 done:
6033 	BRIDGE_UNLOCK(sc);
6034 	return;
6035 }
6036 
6037 /*
6038  * bridge_input:
6039  *
6040  *	Filter input from a member interface.  Queue the packet for
6041  *	bridging if it is not for us.
6042  */
6043 errno_t
6044 bridge_input(struct ifnet *ifp, mbuf_t *data)
6045 {
6046 	struct bridge_softc *sc = ifp->if_bridge;
6047 	struct bridge_iflist *bif, *bif2;
6048 	struct ether_header eh_in;
6049 	bool is_ip = false;
6050 	bool is_ipv4 = false;
6051 	ifnet_t bridge_ifp;
6052 	struct mbuf *mc, *mc2;
6053 	unsigned int mac_hlen = sizeof(struct ether_header);
6054 	uint16_t vlan;
6055 	errno_t error;
6056 	ip_packet_info info;
6057 	struct bripstats stats;
6058 	bool info_initialized = false;
6059 	errno_t ip_packet_error = 0;
6060 	bool is_broadcast;
6061 	bool is_ip_broadcast = false;
6062 	bool is_ifp_mac = false;
6063 	mbuf_t m = *data;
6064 	uint32_t sc_filter_flags = 0;
6065 
6066 	bridge_ifp = sc->sc_ifp;
6067 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
6068 	    "%s from %s m 0x%llx data 0x%llx",
6069 	    bridge_ifp->if_xname, ifp->if_xname,
6070 	    (uint64_t)VM_KERNEL_ADDRPERM(m),
6071 	    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)));
6072 	if ((sc->sc_ifp->if_flags & IFF_RUNNING) == 0) {
6073 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
6074 		    "%s not running passing along",
6075 		    bridge_ifp->if_xname);
6076 		return 0;
6077 	}
6078 
6079 	vlan = VLANTAGOF(m);
6080 
6081 #ifdef IFF_MONITOR
6082 	/*
6083 	 * Implement support for bridge monitoring. If this flag has been
6084 	 * set on this interface, discard the packet once we push it through
6085 	 * the bpf(4) machinery, but before we do, increment the byte and
6086 	 * packet counters associated with this interface.
6087 	 */
6088 	if ((bridge_ifp->if_flags & IFF_MONITOR) != 0) {
6089 		m->m_pkthdr.rcvif = bridge_ifp;
6090 		BRIDGE_BPF_MTAP_INPUT(sc, m);
6091 		(void) ifnet_stat_increment_in(bridge_ifp, 1, m->m_pkthdr.len, 0);
6092 		*data = NULL;
6093 		m_freem(m);
6094 		return EJUSTRETURN;
6095 	}
6096 #endif /* IFF_MONITOR */
6097 
6098 	is_broadcast = (m->m_flags & (M_BCAST | M_MCAST)) != 0;
6099 
6100 	/*
6101 	 * Need to clear the promiscuous flag otherwise it will be
6102 	 * dropped by DLIL after processing filters
6103 	 */
6104 	if ((mbuf_flags(m) & MBUF_PROMISC)) {
6105 		mbuf_setflags_mask(m, 0, MBUF_PROMISC);
6106 	}
6107 
6108 	/* copy the ethernet header */
6109 	eh_in = *(mtod(m, struct ether_header *));
6110 
6111 	is_ip = ether_header_type_is_ip(&eh_in, &is_ipv4);
6112 
6113 	if (if_bridge_vmnet_pf_tagging != 0 && IFNET_IS_VMNET(ifp)) {
6114 		/* tag packets coming from VMNET interfaces */
6115 		ip_packet_error = bridge_vmnet_tag_input(bridge_ifp, ifp,
6116 		    eh_in.ether_dhost, data, is_broadcast, is_ip, is_ipv4,
6117 		    &info, &stats, &info_initialized);
6118 		m = *data;
6119 		if (m == NULL) {
6120 			bridge_bripstats_apply(ifp, &stats);
6121 			return EJUSTRETURN;
6122 		}
6123 	}
6124 
6125 	sc_filter_flags = sc->sc_filter_flags;
6126 	if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6127 		error = bridge_pf(data, ifp, sc_filter_flags, TRUE);
6128 		m = *data;
6129 		if (error != 0 || m == NULL) {
6130 			return EJUSTRETURN;
6131 		}
6132 	}
6133 
6134 	BRIDGE_LOCK(sc);
6135 	bif = bridge_lookup_member_if(sc, ifp);
6136 	if (bif == NULL) {
6137 		BRIDGE_UNLOCK(sc);
6138 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
6139 		    "%s bridge_lookup_member_if failed",
6140 		    bridge_ifp->if_xname);
6141 		return 0;
6142 	}
6143 	if (is_ip && bif_has_checksum_offload(bif)) {
6144 		if (info_initialized) {
6145 			bripstats_apply(&bif->bif_stats.brms_in_ip, &stats);
6146 		} else {
6147 			error = bridge_get_ip_proto(data, mac_hlen, is_ipv4,
6148 			    &info, &bif->bif_stats.brms_in_ip);
6149 			if (error != 0) {
6150 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
6151 				    "%s(%s) bridge_get_ip_proto failed %d",
6152 				    bridge_ifp->if_xname,
6153 				    bif->bif_ifp->if_xname, error);
6154 				ip_packet_error = error;
6155 			}
6156 		}
6157 		if (ip_packet_error == 0) {
6158 			/* need to compute IP/UDP/TCP/checksums */
6159 			error = bridge_offload_checksum(data, &info,
6160 			    &bif->bif_stats);
6161 			if (error != 0) {
6162 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
6163 				    "%s(%s) bridge_offload_checksum failed %d",
6164 				    bridge_ifp->if_xname,
6165 				    bif->bif_ifp->if_xname, error);
6166 				ip_packet_error = error;
6167 			}
6168 		}
6169 		if (ip_packet_error != 0) {
6170 			BRIDGE_UNLOCK(sc);
6171 			if (*data != NULL) {
6172 				m_freem(*data);
6173 				*data = NULL;
6174 			}
6175 			return EJUSTRETURN;
6176 		}
6177 		m = *data;
6178 	}
6179 
6180 	if (bif->bif_flags & BIFF_HOST_FILTER) {
6181 		error = bridge_host_filter(bif, data);
6182 		if (error != 0) {
6183 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
6184 			    "%s bridge_host_filter failed",
6185 			    bif->bif_ifp->if_xname);
6186 			BRIDGE_UNLOCK(sc);
6187 			return EJUSTRETURN;
6188 		}
6189 		m = *data;
6190 	}
6191 
6192 	if (!is_broadcast &&
6193 	    _ether_cmp(eh_in.ether_dhost, IF_LLADDR(ifp)) == 0) {
6194 		/* the packet is unicast to the interface's MAC address */
6195 		if (is_ip && sc->sc_mac_nat_bif == bif) {
6196 			/* doing MAC-NAT, check if destination is IP broadcast */
6197 			is_ip_broadcast = is_broadcast_ip_packet(data);
6198 			if (*data == NULL) {
6199 				BRIDGE_UNLOCK(sc);
6200 				return EJUSTRETURN;
6201 			}
6202 			m = *data;
6203 		}
6204 		if (!is_ip_broadcast) {
6205 			is_ifp_mac = TRUE;
6206 		}
6207 	}
6208 
6209 	bridge_span(sc, m);
6210 
6211 	if (is_broadcast || is_ip_broadcast) {
6212 		if (is_broadcast && (m->m_flags & M_MCAST) != 0) {
6213 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MCAST,
6214 			    " multicast: "
6215 			    "%02x:%02x:%02x:%02x:%02x:%02x",
6216 			    eh_in.ether_dhost[0], eh_in.ether_dhost[1],
6217 			    eh_in.ether_dhost[2], eh_in.ether_dhost[3],
6218 			    eh_in.ether_dhost[4], eh_in.ether_dhost[5]);
6219 		}
6220 		/* Tap off 802.1D packets; they do not get forwarded. */
6221 		if (is_broadcast &&
6222 		    _ether_cmp(eh_in.ether_dhost, bstp_etheraddr) == 0) {
6223 #if BRIDGESTP
6224 			m = bstp_input(&bif->bif_stp, ifp, m);
6225 #else /* !BRIDGESTP */
6226 			m_freem(m);
6227 			m = NULL;
6228 #endif /* !BRIDGESTP */
6229 			if (m == NULL) {
6230 				BRIDGE_UNLOCK(sc);
6231 				return EJUSTRETURN;
6232 			}
6233 		}
6234 
6235 		if ((bif->bif_ifflags & IFBIF_STP) &&
6236 		    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6237 			BRIDGE_UNLOCK(sc);
6238 			return 0;
6239 		}
6240 
6241 		/*
6242 		 * Make a deep copy of the packet and enqueue the copy
6243 		 * for bridge processing.
6244 		 */
6245 		mc = m_dup(m, M_DONTWAIT);
6246 		if (mc == NULL) {
6247 			BRIDGE_UNLOCK(sc);
6248 			return 0;
6249 		}
6250 
6251 		/*
6252 		 * Perform the bridge forwarding function with the copy.
6253 		 *
6254 		 * Note that bridge_forward calls BRIDGE_UNLOCK
6255 		 */
6256 		if (is_ip_broadcast) {
6257 			struct ether_header *eh;
6258 
6259 			/* make the copy look like it is actually broadcast */
6260 			mc->m_flags |= M_BCAST;
6261 			eh = mtod(mc, struct ether_header *);
6262 			bcopy(etherbroadcastaddr, eh->ether_dhost,
6263 			    ETHER_ADDR_LEN);
6264 		}
6265 		bridge_forward(sc, bif, mc);
6266 
6267 		/*
6268 		 * Reinject the mbuf as arriving on the bridge so we have a
6269 		 * chance at claiming multicast packets. We can not loop back
6270 		 * here from ether_input as a bridge is never a member of a
6271 		 * bridge.
6272 		 */
6273 		VERIFY(bridge_ifp->if_bridge == NULL);
6274 		mc2 = m_dup(m, M_DONTWAIT);
6275 		if (mc2 != NULL) {
6276 			/* Keep the layer3 header aligned */
6277 			int i = min(mc2->m_pkthdr.len, max_protohdr);
6278 			mc2 = m_copyup(mc2, i, ETHER_ALIGN);
6279 		}
6280 		if (mc2 != NULL) {
6281 			/* mark packet as arriving on the bridge */
6282 			mc2->m_pkthdr.rcvif = bridge_ifp;
6283 			mc2->m_pkthdr.pkt_hdr = mbuf_data(mc2);
6284 			BRIDGE_BPF_MTAP_INPUT(sc, mc2);
6285 			(void) mbuf_setdata(mc2,
6286 			    (char *)mbuf_data(mc2) + ETHER_HDR_LEN,
6287 			    mbuf_len(mc2) - ETHER_HDR_LEN);
6288 			(void) mbuf_pkthdr_adjustlen(mc2, -ETHER_HDR_LEN);
6289 			(void) ifnet_stat_increment_in(bridge_ifp, 1,
6290 			    mbuf_pkthdr_len(mc2), 0);
6291 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MCAST,
6292 			    "%s mcast for us", bridge_ifp->if_xname);
6293 			dlil_input_packet_list(bridge_ifp, mc2);
6294 		}
6295 
6296 		/* Return the original packet for local processing. */
6297 		return 0;
6298 	}
6299 
6300 	if ((bif->bif_ifflags & IFBIF_STP) &&
6301 	    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6302 		BRIDGE_UNLOCK(sc);
6303 		return 0;
6304 	}
6305 
6306 #ifdef DEV_CARP
6307 #define CARP_CHECK_WE_ARE_DST(iface) \
6308 	((iface)->if_carp &&\
6309 	        carp_forus((iface)->if_carp, eh_in.ether_dhost))
6310 #define CARP_CHECK_WE_ARE_SRC(iface) \
6311 	((iface)->if_carp &&\
6312 	        carp_forus((iface)->if_carp, eh_in.ether_shost))
6313 #else
6314 #define CARP_CHECK_WE_ARE_DST(iface) 0
6315 #define CARP_CHECK_WE_ARE_SRC(iface) 0
6316 #endif
6317 
6318 #define PFIL_HOOKED_INET6 PFIL_HOOKED(&inet6_pfil_hook)
6319 
6320 #define PFIL_PHYS(sc, ifp, m)
6321 
6322 #define GRAB_OUR_PACKETS(iface)                                         \
6323 	if ((iface)->if_type == IFT_GIF)                                \
6324 	        continue;                                               \
6325 	/* It is destined for us. */                                    \
6326 	if (_ether_cmp(IF_LLADDR((iface)), eh_in.ether_dhost) == 0 ||   \
6327 	    CARP_CHECK_WE_ARE_DST((iface))) {                           \
6328 	        if ((iface)->if_type == IFT_BRIDGE) {                   \
6329 	                BRIDGE_BPF_MTAP_INPUT(sc, m);                   \
6330 	/* Filter on the physical interface. */         \
6331 	                PFIL_PHYS(sc, iface, m);                        \
6332 	        } else {                                                \
6333 	                bpf_tap_in(iface, DLT_EN10MB, m, NULL, 0);      \
6334 	        }                                                       \
6335 	        if (bif->bif_ifflags & IFBIF_LEARNING) {                \
6336 	                error = bridge_rtupdate(sc, eh_in.ether_shost,  \
6337 	                    vlan, bif, 0, IFBAF_DYNAMIC);               \
6338 	                if (error && bif->bif_addrmax) {                \
6339 	                        BRIDGE_UNLOCK(sc);                      \
6340 	                        m_freem(m);                             \
6341 	                        return (EJUSTRETURN);                   \
6342 	                }                                               \
6343 	        }                                                       \
6344 	        BRIDGE_UNLOCK(sc);                                      \
6345 	        inject_input_packet(iface, m);                          \
6346 	        return (EJUSTRETURN);                                   \
6347 	}                                                               \
6348                                                                         \
6349 	/* We just received a packet that we sent out. */               \
6350 	if (_ether_cmp(IF_LLADDR((iface)), eh_in.ether_shost) == 0 ||   \
6351 	    CARP_CHECK_WE_ARE_SRC((iface))) {                           \
6352 	        BRIDGE_UNLOCK(sc);                                      \
6353 	        m_freem(m);                                             \
6354 	        return (EJUSTRETURN);                                   \
6355 	}
6356 
6357 	/*
6358 	 * Unicast.
6359 	 */
6360 
6361 	/* handle MAC-NAT if enabled */
6362 	if (is_ifp_mac && sc->sc_mac_nat_bif == bif) {
6363 		ifnet_t dst_if;
6364 		boolean_t is_input = FALSE;
6365 
6366 		dst_if = bridge_mac_nat_input(sc, data, &is_input);
6367 		m = *data;
6368 		if (dst_if == ifp) {
6369 			/* our input packet */
6370 		} else if (dst_if != NULL || m == NULL) {
6371 			BRIDGE_UNLOCK(sc);
6372 			if (dst_if != NULL) {
6373 				ASSERT(m != NULL);
6374 				if (is_input) {
6375 					inject_input_packet(dst_if, m);
6376 				} else {
6377 					(void)bridge_enqueue(bridge_ifp, NULL,
6378 					    dst_if, m,
6379 					    CHECKSUM_OPERATION_CLEAR_OFFLOAD);
6380 				}
6381 			}
6382 			return EJUSTRETURN;
6383 		}
6384 	}
6385 
6386 	/*
6387 	 * If the packet is for the bridge, pass it up for local processing.
6388 	 */
6389 	if (_ether_cmp(eh_in.ether_dhost, IF_LLADDR(bridge_ifp)) == 0 ||
6390 	    CARP_CHECK_WE_ARE_DST(bridge_ifp)) {
6391 		bpf_packet_func     bpf_input_func = sc->sc_bpf_input;
6392 
6393 		/*
6394 		 * If the interface is learning, and the source
6395 		 * address is valid and not multicast, record
6396 		 * the address.
6397 		 */
6398 		if (bif->bif_ifflags & IFBIF_LEARNING) {
6399 			(void) bridge_rtupdate(sc, eh_in.ether_shost,
6400 			    vlan, bif, 0, IFBAF_DYNAMIC);
6401 		}
6402 		BRIDGE_UNLOCK(sc);
6403 
6404 		bridge_interface_input(bridge_ifp, m, bpf_input_func);
6405 		return EJUSTRETURN;
6406 	}
6407 
6408 	/*
6409 	 * if the destination of the packet is for the MAC address of
6410 	 * the member interface itself, then we don't need to forward
6411 	 * it -- just pass it back.  Note that it'll likely just be
6412 	 * dropped by the stack, but if something else is bound to
6413 	 * the interface directly (for example, the wireless stats
6414 	 * protocol -- although that actually uses BPF right now),
6415 	 * then it will consume the packet
6416 	 *
6417 	 * ALSO, note that we do this check AFTER checking for the
6418 	 * bridge's own MAC address, because the bridge may be
6419 	 * using the SAME MAC address as one of its interfaces
6420 	 */
6421 	if (is_ifp_mac) {
6422 
6423 #ifdef VERY_VERY_VERY_DIAGNOSTIC
6424 		BRIDGE_LOG(LOG_NOTICE, 0,
6425 		    "not forwarding packet bound for member interface");
6426 #endif
6427 
6428 		BRIDGE_UNLOCK(sc);
6429 		return 0;
6430 	}
6431 
6432 	/* Now check the remaining bridge members. */
6433 	TAILQ_FOREACH(bif2, &sc->sc_iflist, bif_next) {
6434 		if (bif2->bif_ifp != ifp) {
6435 			GRAB_OUR_PACKETS(bif2->bif_ifp);
6436 		}
6437 	}
6438 
6439 #undef CARP_CHECK_WE_ARE_DST
6440 #undef CARP_CHECK_WE_ARE_SRC
6441 #undef GRAB_OUR_PACKETS
6442 
6443 	/*
6444 	 * Perform the bridge forwarding function.
6445 	 *
6446 	 * Note that bridge_forward calls BRIDGE_UNLOCK
6447 	 */
6448 	bridge_forward(sc, bif, m);
6449 
6450 	return EJUSTRETURN;
6451 }
6452 
6453 /*
6454  * bridge_broadcast:
6455  *
6456  *	Send a frame to all interfaces that are members of
6457  *	the bridge, except for the one on which the packet
6458  *	arrived.
6459  *
6460  *	NOTE: Releases the lock on return.
6461  */
6462 static void
6463 bridge_broadcast(struct bridge_softc *sc, struct bridge_iflist * sbif,
6464     struct mbuf *m, int runfilt)
6465 {
6466 	ifnet_t bridge_ifp;
6467 	struct bridge_iflist *dbif;
6468 	struct ifnet * src_if;
6469 	struct mbuf *mc;
6470 	struct mbuf *mc_in;
6471 	struct ifnet *dst_if;
6472 	int error = 0, used = 0;
6473 	boolean_t bridge_if_out;
6474 	ChecksumOperation cksum_op;
6475 	struct mac_nat_record mnr;
6476 	struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
6477 	boolean_t translate_mac = FALSE;
6478 	uint32_t sc_filter_flags = 0;
6479 
6480 	bridge_ifp = sc->sc_ifp;
6481 	if (sbif != NULL) {
6482 		bridge_if_out = FALSE;
6483 		src_if = sbif->bif_ifp;
6484 		cksum_op = CHECKSUM_OPERATION_CLEAR_OFFLOAD;
6485 		if (mac_nat_bif != NULL && sbif != mac_nat_bif) {
6486 			/* get the translation record while holding the lock */
6487 			translate_mac
6488 			        = bridge_mac_nat_output(sc, sbif, &m, &mnr);
6489 			if (m == NULL) {
6490 				/* packet was deallocated */
6491 				BRIDGE_UNLOCK(sc);
6492 				return;
6493 			}
6494 		}
6495 	} else {
6496 		/*
6497 		 * sbif is NULL when the bridge interface calls
6498 		 * bridge_broadcast().
6499 		 */
6500 		bridge_if_out = TRUE;
6501 		cksum_op = CHECKSUM_OPERATION_FINALIZE;
6502 		sbif = NULL;
6503 		src_if = NULL;
6504 	}
6505 
6506 	BRIDGE_LOCK2REF(sc, error);
6507 	if (error) {
6508 		m_freem(m);
6509 		return;
6510 	}
6511 
6512 	TAILQ_FOREACH(dbif, &sc->sc_iflist, bif_next) {
6513 		dst_if = dbif->bif_ifp;
6514 		if (dst_if == src_if) {
6515 			/* skip the interface that the packet came in on */
6516 			continue;
6517 		}
6518 
6519 		/* Private segments can not talk to each other */
6520 		if (sbif != NULL &&
6521 		    (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE)) {
6522 			continue;
6523 		}
6524 
6525 		if ((dbif->bif_ifflags & IFBIF_STP) &&
6526 		    dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6527 			continue;
6528 		}
6529 
6530 		if ((dbif->bif_ifflags & IFBIF_DISCOVER) == 0 &&
6531 		    (m->m_flags & (M_BCAST | M_MCAST)) == 0) {
6532 			continue;
6533 		}
6534 
6535 		if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6536 			continue;
6537 		}
6538 
6539 		if (!(dbif->bif_flags & BIFF_MEDIA_ACTIVE)) {
6540 			continue;
6541 		}
6542 
6543 		if (TAILQ_NEXT(dbif, bif_next) == NULL) {
6544 			mc = m;
6545 			used = 1;
6546 		} else {
6547 			mc = m_dup(m, M_DONTWAIT);
6548 			if (mc == NULL) {
6549 				(void) ifnet_stat_increment_out(bridge_ifp,
6550 				    0, 0, 1);
6551 				continue;
6552 			}
6553 		}
6554 
6555 		/*
6556 		 * If broadcast input is enabled, do so only if this
6557 		 * is an input packet.
6558 		 */
6559 		if (!bridge_if_out &&
6560 		    (dbif->bif_flags & BIFF_INPUT_BROADCAST) != 0) {
6561 			mc_in = m_dup(mc, M_DONTWAIT);
6562 			/* this could fail, but we continue anyways */
6563 		} else {
6564 			mc_in = NULL;
6565 		}
6566 
6567 		/* out */
6568 		if (translate_mac && mac_nat_bif == dbif) {
6569 			/* translate the packet without holding the lock */
6570 			bridge_mac_nat_translate(&mc, &mnr, IF_LLADDR(dst_if));
6571 		}
6572 
6573 		sc_filter_flags = sc->sc_filter_flags;
6574 		if (runfilt &&
6575 		    PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6576 			if (used == 0) {
6577 				/* Keep the layer3 header aligned */
6578 				int i = min(mc->m_pkthdr.len, max_protohdr);
6579 				mc = m_copyup(mc, i, ETHER_ALIGN);
6580 				if (mc == NULL) {
6581 					(void) ifnet_stat_increment_out(
6582 						sc->sc_ifp, 0, 0, 1);
6583 					if (mc_in != NULL) {
6584 						m_freem(mc_in);
6585 						mc_in = NULL;
6586 					}
6587 					continue;
6588 				}
6589 			}
6590 			if (bridge_pf(&mc, dst_if, sc_filter_flags, FALSE) != 0) {
6591 				if (mc_in != NULL) {
6592 					m_freem(mc_in);
6593 					mc_in = NULL;
6594 				}
6595 				continue;
6596 			}
6597 			if (mc == NULL) {
6598 				if (mc_in != NULL) {
6599 					m_freem(mc_in);
6600 					mc_in = NULL;
6601 				}
6602 				continue;
6603 			}
6604 		}
6605 
6606 		if (mc != NULL) {
6607 			/* verify checksum if necessary */
6608 			if (bif_has_checksum_offload(dbif) && sbif != NULL &&
6609 			    !bif_has_checksum_offload(sbif)) {
6610 				error = bridge_verify_checksum(&mc,
6611 				    &dbif->bif_stats);
6612 				if (error != 0) {
6613 					if (mc != NULL) {
6614 						m_freem(mc);
6615 					}
6616 					mc = NULL;
6617 				}
6618 			}
6619 			if (mc != NULL) {
6620 				(void) bridge_enqueue(bridge_ifp,
6621 				    NULL, dst_if, mc, cksum_op);
6622 			}
6623 		}
6624 
6625 		/* in */
6626 		if (mc_in == NULL) {
6627 			continue;
6628 		}
6629 		bpf_tap_in(dst_if, DLT_EN10MB, mc_in, NULL, 0);
6630 		mbuf_pkthdr_setrcvif(mc_in, dst_if);
6631 		mbuf_pkthdr_setheader(mc_in, mbuf_data(mc_in));
6632 		mbuf_setdata(mc_in, (char *)mbuf_data(mc_in) + ETHER_HDR_LEN,
6633 		    mbuf_len(mc_in) - ETHER_HDR_LEN);
6634 		mbuf_pkthdr_adjustlen(mc_in, -ETHER_HDR_LEN);
6635 		mc_in->m_flags |= M_PROTO1; /* set to avoid loops */
6636 		dlil_input_packet_list(dst_if, mc_in);
6637 	}
6638 	if (used == 0) {
6639 		m_freem(m);
6640 	}
6641 
6642 
6643 	BRIDGE_UNREF(sc);
6644 }
6645 
6646 /*
6647  * bridge_span:
6648  *
6649  *	Duplicate a packet out one or more interfaces that are in span mode,
6650  *	the original mbuf is unmodified.
6651  */
6652 static void
6653 bridge_span(struct bridge_softc *sc, struct mbuf *m)
6654 {
6655 	struct bridge_iflist *bif;
6656 	struct ifnet *dst_if;
6657 	struct mbuf *mc;
6658 
6659 	if (TAILQ_EMPTY(&sc->sc_spanlist)) {
6660 		return;
6661 	}
6662 
6663 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
6664 		dst_if = bif->bif_ifp;
6665 
6666 		if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6667 			continue;
6668 		}
6669 
6670 		mc = m_copypacket(m, M_DONTWAIT);
6671 		if (mc == NULL) {
6672 			(void) ifnet_stat_increment_out(sc->sc_ifp, 0, 0, 1);
6673 			continue;
6674 		}
6675 
6676 		(void) bridge_enqueue(sc->sc_ifp, NULL, dst_if, mc,
6677 		    CHECKSUM_OPERATION_NONE);
6678 	}
6679 }
6680 
6681 
6682 /*
6683  * bridge_rtupdate:
6684  *
6685  *	Add a bridge routing entry.
6686  */
6687 static int
6688 bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst, uint16_t vlan,
6689     struct bridge_iflist *bif, int setflags, uint8_t flags)
6690 {
6691 	struct bridge_rtnode *brt;
6692 	int error;
6693 
6694 	BRIDGE_LOCK_ASSERT_HELD(sc);
6695 
6696 	/* Check the source address is valid and not multicast. */
6697 	if (ETHER_IS_MULTICAST(dst) ||
6698 	    (dst[0] == 0 && dst[1] == 0 && dst[2] == 0 &&
6699 	    dst[3] == 0 && dst[4] == 0 && dst[5] == 0) != 0) {
6700 		return EINVAL;
6701 	}
6702 
6703 
6704 	/* 802.1p frames map to vlan 1 */
6705 	if (vlan == 0) {
6706 		vlan = 1;
6707 	}
6708 
6709 	/*
6710 	 * A route for this destination might already exist.  If so,
6711 	 * update it, otherwise create a new one.
6712 	 */
6713 	if ((brt = bridge_rtnode_lookup(sc, dst, vlan)) == NULL) {
6714 		if (sc->sc_brtcnt >= sc->sc_brtmax) {
6715 			sc->sc_brtexceeded++;
6716 			return ENOSPC;
6717 		}
6718 		/* Check per interface address limits (if enabled) */
6719 		if (bif->bif_addrmax && bif->bif_addrcnt >= bif->bif_addrmax) {
6720 			bif->bif_addrexceeded++;
6721 			return ENOSPC;
6722 		}
6723 
6724 		/*
6725 		 * Allocate a new bridge forwarding node, and
6726 		 * initialize the expiration time and Ethernet
6727 		 * address.
6728 		 */
6729 		brt = zalloc_noblock(bridge_rtnode_pool);
6730 		if (brt == NULL) {
6731 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6732 			    "zalloc_nolock failed");
6733 			return ENOMEM;
6734 		}
6735 		bzero(brt, sizeof(struct bridge_rtnode));
6736 
6737 		if (bif->bif_ifflags & IFBIF_STICKY) {
6738 			brt->brt_flags = IFBAF_STICKY;
6739 		} else {
6740 			brt->brt_flags = IFBAF_DYNAMIC;
6741 		}
6742 
6743 		memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
6744 		brt->brt_vlan = vlan;
6745 
6746 
6747 		if ((error = bridge_rtnode_insert(sc, brt)) != 0) {
6748 			zfree(bridge_rtnode_pool, brt);
6749 			return error;
6750 		}
6751 		brt->brt_dst = bif;
6752 		bif->bif_addrcnt++;
6753 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6754 		    "added %02x:%02x:%02x:%02x:%02x:%02x "
6755 		    "on %s count %u hashsize %u",
6756 		    dst[0], dst[1], dst[2], dst[3], dst[4], dst[5],
6757 		    sc->sc_ifp->if_xname, sc->sc_brtcnt,
6758 		    sc->sc_rthash_size);
6759 	}
6760 
6761 	if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
6762 	    brt->brt_dst != bif) {
6763 		brt->brt_dst->bif_addrcnt--;
6764 		brt->brt_dst = bif;
6765 		brt->brt_dst->bif_addrcnt++;
6766 	}
6767 
6768 	if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6769 		unsigned long now;
6770 
6771 		now = (unsigned long) net_uptime();
6772 		brt->brt_expire = now + sc->sc_brttimeout;
6773 	}
6774 	if (setflags) {
6775 		brt->brt_flags = flags;
6776 	}
6777 
6778 
6779 	return 0;
6780 }
6781 
6782 /*
6783  * bridge_rtlookup:
6784  *
6785  *	Lookup the destination interface for an address.
6786  */
6787 static struct ifnet *
6788 bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
6789 {
6790 	struct bridge_rtnode *brt;
6791 
6792 	BRIDGE_LOCK_ASSERT_HELD(sc);
6793 
6794 	if ((brt = bridge_rtnode_lookup(sc, addr, vlan)) == NULL) {
6795 		return NULL;
6796 	}
6797 
6798 	return brt->brt_ifp;
6799 }
6800 
6801 /*
6802  * bridge_rttrim:
6803  *
6804  *	Trim the routine table so that we have a number
6805  *	of routing entries less than or equal to the
6806  *	maximum number.
6807  */
6808 static void
6809 bridge_rttrim(struct bridge_softc *sc)
6810 {
6811 	struct bridge_rtnode *brt, *nbrt;
6812 
6813 	BRIDGE_LOCK_ASSERT_HELD(sc);
6814 
6815 	/* Make sure we actually need to do this. */
6816 	if (sc->sc_brtcnt <= sc->sc_brtmax) {
6817 		return;
6818 	}
6819 
6820 	/* Force an aging cycle; this might trim enough addresses. */
6821 	bridge_rtage(sc);
6822 	if (sc->sc_brtcnt <= sc->sc_brtmax) {
6823 		return;
6824 	}
6825 
6826 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6827 		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6828 			bridge_rtnode_destroy(sc, brt);
6829 			if (sc->sc_brtcnt <= sc->sc_brtmax) {
6830 				return;
6831 			}
6832 		}
6833 	}
6834 }
6835 
6836 /*
6837  * bridge_aging_timer:
6838  *
6839  *	Aging periodic timer for the bridge routing table.
6840  */
6841 static void
6842 bridge_aging_timer(struct bridge_softc *sc)
6843 {
6844 	BRIDGE_LOCK_ASSERT_HELD(sc);
6845 
6846 	bridge_rtage(sc);
6847 	if ((sc->sc_ifp->if_flags & IFF_RUNNING) &&
6848 	    (sc->sc_flags & SCF_DETACHING) == 0) {
6849 		sc->sc_aging_timer.bdc_sc = sc;
6850 		sc->sc_aging_timer.bdc_func = bridge_aging_timer;
6851 		sc->sc_aging_timer.bdc_ts.tv_sec = bridge_rtable_prune_period;
6852 		bridge_schedule_delayed_call(&sc->sc_aging_timer);
6853 	}
6854 }
6855 
6856 /*
6857  * bridge_rtage:
6858  *
6859  *	Perform an aging cycle.
6860  */
6861 static void
6862 bridge_rtage(struct bridge_softc *sc)
6863 {
6864 	struct bridge_rtnode *brt, *nbrt;
6865 	unsigned long now;
6866 
6867 	BRIDGE_LOCK_ASSERT_HELD(sc);
6868 
6869 	now = (unsigned long) net_uptime();
6870 
6871 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6872 		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6873 			if (now >= brt->brt_expire) {
6874 				bridge_rtnode_destroy(sc, brt);
6875 			}
6876 		}
6877 	}
6878 	if (sc->sc_mac_nat_bif != NULL) {
6879 		bridge_mac_nat_age_entries(sc, now);
6880 	}
6881 }
6882 
6883 /*
6884  * bridge_rtflush:
6885  *
6886  *	Remove all dynamic addresses from the bridge.
6887  */
6888 static void
6889 bridge_rtflush(struct bridge_softc *sc, int full)
6890 {
6891 	struct bridge_rtnode *brt, *nbrt;
6892 
6893 	BRIDGE_LOCK_ASSERT_HELD(sc);
6894 
6895 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6896 		if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6897 			bridge_rtnode_destroy(sc, brt);
6898 		}
6899 	}
6900 }
6901 
6902 /*
6903  * bridge_rtdaddr:
6904  *
6905  *	Remove an address from the table.
6906  */
6907 static int
6908 bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
6909 {
6910 	struct bridge_rtnode *brt;
6911 	int found = 0;
6912 
6913 	BRIDGE_LOCK_ASSERT_HELD(sc);
6914 
6915 	/*
6916 	 * If vlan is zero then we want to delete for all vlans so the lookup
6917 	 * may return more than one.
6918 	 */
6919 	while ((brt = bridge_rtnode_lookup(sc, addr, vlan)) != NULL) {
6920 		bridge_rtnode_destroy(sc, brt);
6921 		found = 1;
6922 	}
6923 
6924 	return found ? 0 : ENOENT;
6925 }
6926 
6927 /*
6928  * bridge_rtdelete:
6929  *
6930  *	Delete routes to a specific member interface.
6931  */
6932 static void
6933 bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int full)
6934 {
6935 	struct bridge_rtnode *brt, *nbrt;
6936 
6937 	BRIDGE_LOCK_ASSERT_HELD(sc);
6938 
6939 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6940 		if (brt->brt_ifp == ifp && (full ||
6941 		    (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)) {
6942 			bridge_rtnode_destroy(sc, brt);
6943 		}
6944 	}
6945 }
6946 
6947 /*
6948  * bridge_rtable_init:
6949  *
6950  *	Initialize the route table for this bridge.
6951  */
6952 static int
6953 bridge_rtable_init(struct bridge_softc *sc)
6954 {
6955 	u_int32_t i;
6956 
6957 	sc->sc_rthash = kalloc_type(struct _bridge_rtnode_list,
6958 	    BRIDGE_RTHASH_SIZE, Z_WAITOK_ZERO_NOFAIL);
6959 	sc->sc_rthash_size = BRIDGE_RTHASH_SIZE;
6960 
6961 	for (i = 0; i < sc->sc_rthash_size; i++) {
6962 		LIST_INIT(&sc->sc_rthash[i]);
6963 	}
6964 
6965 	sc->sc_rthash_key = RandomULong();
6966 
6967 	LIST_INIT(&sc->sc_rtlist);
6968 
6969 	return 0;
6970 }
6971 
6972 /*
6973  * bridge_rthash_delayed_resize:
6974  *
6975  *	Resize the routing table hash on a delayed thread call.
6976  */
6977 static void
6978 bridge_rthash_delayed_resize(struct bridge_softc *sc)
6979 {
6980 	u_int32_t new_rthash_size = 0;
6981 	u_int32_t old_rthash_size = 0;
6982 	struct _bridge_rtnode_list *new_rthash = NULL;
6983 	struct _bridge_rtnode_list *old_rthash = NULL;
6984 	u_int32_t i;
6985 	struct bridge_rtnode *brt;
6986 	int error = 0;
6987 
6988 	BRIDGE_LOCK_ASSERT_HELD(sc);
6989 
6990 	/*
6991 	 * Four entries per hash bucket is our ideal load factor
6992 	 */
6993 	if (sc->sc_brtcnt < sc->sc_rthash_size * 4) {
6994 		goto out;
6995 	}
6996 
6997 	/*
6998 	 * Doubling the number of hash buckets may be too simplistic
6999 	 * especially when facing a spike of new entries
7000 	 */
7001 	new_rthash_size = sc->sc_rthash_size * 2;
7002 
7003 	sc->sc_flags |= SCF_RESIZING;
7004 	BRIDGE_UNLOCK(sc);
7005 
7006 	new_rthash = kalloc_type(struct _bridge_rtnode_list, new_rthash_size,
7007 	    Z_WAITOK | Z_ZERO);
7008 
7009 	BRIDGE_LOCK(sc);
7010 	sc->sc_flags &= ~SCF_RESIZING;
7011 
7012 	if (new_rthash == NULL) {
7013 		error = ENOMEM;
7014 		goto out;
7015 	}
7016 	if ((sc->sc_flags & SCF_DETACHING)) {
7017 		error = ENODEV;
7018 		goto out;
7019 	}
7020 	/*
7021 	 * Fail safe from here on
7022 	 */
7023 	old_rthash = sc->sc_rthash;
7024 	old_rthash_size = sc->sc_rthash_size;
7025 	sc->sc_rthash = new_rthash;
7026 	sc->sc_rthash_size = new_rthash_size;
7027 
7028 	/*
7029 	 * Get a new key to force entries to be shuffled around to reduce
7030 	 * the likelihood they will land in the same buckets
7031 	 */
7032 	sc->sc_rthash_key = RandomULong();
7033 
7034 	for (i = 0; i < sc->sc_rthash_size; i++) {
7035 		LIST_INIT(&sc->sc_rthash[i]);
7036 	}
7037 
7038 	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
7039 		LIST_REMOVE(brt, brt_hash);
7040 		(void) bridge_rtnode_hash(sc, brt);
7041 	}
7042 out:
7043 	if (error == 0) {
7044 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
7045 		    "%s new size %u",
7046 		    sc->sc_ifp->if_xname, sc->sc_rthash_size);
7047 		kfree_type(struct _bridge_rtnode_list, old_rthash_size, old_rthash);
7048 	} else {
7049 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_RT_TABLE,
7050 		    "%s failed %d", sc->sc_ifp->if_xname, error);
7051 		kfree_type(struct _bridge_rtnode_list, new_rthash_size, new_rthash);
7052 	}
7053 }
7054 
7055 /*
7056  * Resize the number of hash buckets based on the load factor
7057  * Currently only grow
7058  * Failing to resize the hash table is not fatal
7059  */
7060 static void
7061 bridge_rthash_resize(struct bridge_softc *sc)
7062 {
7063 	BRIDGE_LOCK_ASSERT_HELD(sc);
7064 
7065 	if ((sc->sc_flags & SCF_DETACHING) || (sc->sc_flags & SCF_RESIZING)) {
7066 		return;
7067 	}
7068 
7069 	/*
7070 	 * Four entries per hash bucket is our ideal load factor
7071 	 */
7072 	if (sc->sc_brtcnt < sc->sc_rthash_size * 4) {
7073 		return;
7074 	}
7075 	/*
7076 	 * Hard limit on the size of the routing hash table
7077 	 */
7078 	if (sc->sc_rthash_size >= bridge_rtable_hash_size_max) {
7079 		return;
7080 	}
7081 
7082 	sc->sc_resize_call.bdc_sc = sc;
7083 	sc->sc_resize_call.bdc_func = bridge_rthash_delayed_resize;
7084 	bridge_schedule_delayed_call(&sc->sc_resize_call);
7085 }
7086 
7087 /*
7088  * bridge_rtable_fini:
7089  *
7090  *	Deconstruct the route table for this bridge.
7091  */
7092 static void
7093 bridge_rtable_fini(struct bridge_softc *sc)
7094 {
7095 	KASSERT(sc->sc_brtcnt == 0,
7096 	    ("%s: %d bridge routes referenced", __func__, sc->sc_brtcnt));
7097 	kfree_type(struct _bridge_rtnode_list, sc->sc_rthash_size,
7098 	    sc->sc_rthash);
7099 	sc->sc_rthash = NULL;
7100 	sc->sc_rthash_size = 0;
7101 }
7102 
7103 /*
7104  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
7105  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
7106  */
7107 #define mix(a, b, c)                                                    \
7108 do {                                                                    \
7109 	a -= b; a -= c; a ^= (c >> 13);                                 \
7110 	b -= c; b -= a; b ^= (a << 8);                                  \
7111 	c -= a; c -= b; c ^= (b >> 13);                                 \
7112 	a -= b; a -= c; a ^= (c >> 12);                                 \
7113 	b -= c; b -= a; b ^= (a << 16);                                 \
7114 	c -= a; c -= b; c ^= (b >> 5);                                  \
7115 	a -= b; a -= c; a ^= (c >> 3);                                  \
7116 	b -= c; b -= a; b ^= (a << 10);                                 \
7117 	c -= a; c -= b; c ^= (b >> 15);                                 \
7118 } while ( /*CONSTCOND*/ 0)
7119 
7120 static __inline uint32_t
7121 bridge_rthash(struct bridge_softc *sc, const uint8_t *addr)
7122 {
7123 	uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
7124 
7125 	b += addr[5] << 8;
7126 	b += addr[4];
7127 	a += addr[3] << 24;
7128 	a += addr[2] << 16;
7129 	a += addr[1] << 8;
7130 	a += addr[0];
7131 
7132 	mix(a, b, c);
7133 
7134 	return c & BRIDGE_RTHASH_MASK(sc);
7135 }
7136 
7137 #undef mix
7138 
7139 static int
7140 bridge_rtnode_addr_cmp(const uint8_t *a, const uint8_t *b)
7141 {
7142 	int i, d;
7143 
7144 	for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) {
7145 		d = ((int)a[i]) - ((int)b[i]);
7146 	}
7147 
7148 	return d;
7149 }
7150 
7151 /*
7152  * bridge_rtnode_lookup:
7153  *
7154  *	Look up a bridge route node for the specified destination. Compare the
7155  *	vlan id or if zero then just return the first match.
7156  */
7157 static struct bridge_rtnode *
7158 bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr,
7159     uint16_t vlan)
7160 {
7161 	struct bridge_rtnode *brt;
7162 	uint32_t hash;
7163 	int dir;
7164 
7165 	BRIDGE_LOCK_ASSERT_HELD(sc);
7166 
7167 	hash = bridge_rthash(sc, addr);
7168 	LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) {
7169 		dir = bridge_rtnode_addr_cmp(addr, brt->brt_addr);
7170 		if (dir == 0 && (brt->brt_vlan == vlan || vlan == 0)) {
7171 			return brt;
7172 		}
7173 		if (dir > 0) {
7174 			return NULL;
7175 		}
7176 	}
7177 
7178 	return NULL;
7179 }
7180 
7181 /*
7182  * bridge_rtnode_hash:
7183  *
7184  *	Insert the specified bridge node into the route hash table.
7185  *	This is used when adding a new node or to rehash when resizing
7186  *	the hash table
7187  */
7188 static int
7189 bridge_rtnode_hash(struct bridge_softc *sc, struct bridge_rtnode *brt)
7190 {
7191 	struct bridge_rtnode *lbrt;
7192 	uint32_t hash;
7193 	int dir;
7194 
7195 	BRIDGE_LOCK_ASSERT_HELD(sc);
7196 
7197 	hash = bridge_rthash(sc, brt->brt_addr);
7198 
7199 	lbrt = LIST_FIRST(&sc->sc_rthash[hash]);
7200 	if (lbrt == NULL) {
7201 		LIST_INSERT_HEAD(&sc->sc_rthash[hash], brt, brt_hash);
7202 		goto out;
7203 	}
7204 
7205 	do {
7206 		dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr);
7207 		if (dir == 0 && brt->brt_vlan == lbrt->brt_vlan) {
7208 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
7209 			    "%s EEXIST %02x:%02x:%02x:%02x:%02x:%02x",
7210 			    sc->sc_ifp->if_xname,
7211 			    brt->brt_addr[0], brt->brt_addr[1],
7212 			    brt->brt_addr[2], brt->brt_addr[3],
7213 			    brt->brt_addr[4], brt->brt_addr[5]);
7214 			return EEXIST;
7215 		}
7216 		if (dir > 0) {
7217 			LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
7218 			goto out;
7219 		}
7220 		if (LIST_NEXT(lbrt, brt_hash) == NULL) {
7221 			LIST_INSERT_AFTER(lbrt, brt, brt_hash);
7222 			goto out;
7223 		}
7224 		lbrt = LIST_NEXT(lbrt, brt_hash);
7225 	} while (lbrt != NULL);
7226 
7227 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
7228 	    "%s impossible %02x:%02x:%02x:%02x:%02x:%02x",
7229 	    sc->sc_ifp->if_xname,
7230 	    brt->brt_addr[0], brt->brt_addr[1], brt->brt_addr[2],
7231 	    brt->brt_addr[3], brt->brt_addr[4], brt->brt_addr[5]);
7232 out:
7233 	return 0;
7234 }
7235 
7236 /*
7237  * bridge_rtnode_insert:
7238  *
7239  *	Insert the specified bridge node into the route table.  We
7240  *	assume the entry is not already in the table.
7241  */
7242 static int
7243 bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
7244 {
7245 	int error;
7246 
7247 	error = bridge_rtnode_hash(sc, brt);
7248 	if (error != 0) {
7249 		return error;
7250 	}
7251 
7252 	LIST_INSERT_HEAD(&sc->sc_rtlist, brt, brt_list);
7253 	sc->sc_brtcnt++;
7254 
7255 	bridge_rthash_resize(sc);
7256 
7257 	return 0;
7258 }
7259 
7260 /*
7261  * bridge_rtnode_destroy:
7262  *
7263  *	Destroy a bridge rtnode.
7264  */
7265 static void
7266 bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
7267 {
7268 	BRIDGE_LOCK_ASSERT_HELD(sc);
7269 
7270 	LIST_REMOVE(brt, brt_hash);
7271 
7272 	LIST_REMOVE(brt, brt_list);
7273 	sc->sc_brtcnt--;
7274 	brt->brt_dst->bif_addrcnt--;
7275 	zfree(bridge_rtnode_pool, brt);
7276 }
7277 
7278 #if BRIDGESTP
7279 /*
7280  * bridge_rtable_expire:
7281  *
7282  *	Set the expiry time for all routes on an interface.
7283  */
7284 static void
7285 bridge_rtable_expire(struct ifnet *ifp, int age)
7286 {
7287 	struct bridge_softc *sc = ifp->if_bridge;
7288 	struct bridge_rtnode *brt;
7289 
7290 	BRIDGE_LOCK(sc);
7291 
7292 	/*
7293 	 * If the age is zero then flush, otherwise set all the expiry times to
7294 	 * age for the interface
7295 	 */
7296 	if (age == 0) {
7297 		bridge_rtdelete(sc, ifp, IFBF_FLUSHDYN);
7298 	} else {
7299 		unsigned long now;
7300 
7301 		now = (unsigned long) net_uptime();
7302 
7303 		LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
7304 			/* Cap the expiry time to 'age' */
7305 			if (brt->brt_ifp == ifp &&
7306 			    brt->brt_expire > now + age &&
7307 			    (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
7308 				brt->brt_expire = now + age;
7309 			}
7310 		}
7311 	}
7312 	BRIDGE_UNLOCK(sc);
7313 }
7314 
7315 /*
7316  * bridge_state_change:
7317  *
7318  *	Callback from the bridgestp code when a port changes states.
7319  */
7320 static void
7321 bridge_state_change(struct ifnet *ifp, int state)
7322 {
7323 	struct bridge_softc *sc = ifp->if_bridge;
7324 	static const char *stpstates[] = {
7325 		"disabled",
7326 		"listening",
7327 		"learning",
7328 		"forwarding",
7329 		"blocking",
7330 		"discarding"
7331 	};
7332 
7333 	if (log_stp) {
7334 		log(LOG_NOTICE, "%s: state changed to %s on %s",
7335 		    sc->sc_ifp->if_xname,
7336 		    stpstates[state], ifp->if_xname);
7337 	}
7338 }
7339 #endif /* BRIDGESTP */
7340 
7341 /*
7342  * bridge_set_bpf_tap:
7343  *
7344  *	Sets ups the BPF callbacks.
7345  */
7346 static errno_t
7347 bridge_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func bpf_callback)
7348 {
7349 	struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7350 
7351 	/* TBD locking */
7352 	if (sc == NULL || (sc->sc_flags & SCF_DETACHING)) {
7353 		return ENODEV;
7354 	}
7355 	switch (mode) {
7356 	case BPF_TAP_DISABLE:
7357 		sc->sc_bpf_input = sc->sc_bpf_output = NULL;
7358 		break;
7359 
7360 	case BPF_TAP_INPUT:
7361 		sc->sc_bpf_input = bpf_callback;
7362 		break;
7363 
7364 	case BPF_TAP_OUTPUT:
7365 		sc->sc_bpf_output = bpf_callback;
7366 		break;
7367 
7368 	case BPF_TAP_INPUT_OUTPUT:
7369 		sc->sc_bpf_input = sc->sc_bpf_output = bpf_callback;
7370 		break;
7371 
7372 	default:
7373 		break;
7374 	}
7375 
7376 	return 0;
7377 }
7378 
7379 /*
7380  * bridge_detach:
7381  *
7382  *	Callback when interface has been detached.
7383  */
7384 static void
7385 bridge_detach(ifnet_t ifp)
7386 {
7387 	struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7388 
7389 #if BRIDGESTP
7390 	bstp_detach(&sc->sc_stp);
7391 #endif /* BRIDGESTP */
7392 
7393 	/* Tear down the routing table. */
7394 	bridge_rtable_fini(sc);
7395 
7396 	lck_mtx_lock(&bridge_list_mtx);
7397 	LIST_REMOVE(sc, sc_list);
7398 	lck_mtx_unlock(&bridge_list_mtx);
7399 
7400 	ifnet_release(ifp);
7401 
7402 	lck_mtx_destroy(&sc->sc_mtx, &bridge_lock_grp);
7403 	kfree_type(struct bridge_softc, sc);
7404 }
7405 
7406 /*
7407  * bridge_bpf_input:
7408  *
7409  *	Invoke the input BPF callback if enabled
7410  */
7411 static errno_t
7412 bridge_bpf_input(ifnet_t ifp, struct mbuf *m, const char * func, int line)
7413 {
7414 	struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7415 	bpf_packet_func     input_func = sc->sc_bpf_input;
7416 
7417 	if (input_func != NULL) {
7418 		if (mbuf_pkthdr_rcvif(m) != ifp) {
7419 			BRIDGE_LOG(LOG_NOTICE, 0,
7420 			    "%s.%d: rcvif: 0x%llx != ifp 0x%llx", func, line,
7421 			    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_rcvif(m)),
7422 			    (uint64_t)VM_KERNEL_ADDRPERM(ifp));
7423 		}
7424 		(*input_func)(ifp, m);
7425 	}
7426 	return 0;
7427 }
7428 
7429 /*
7430  * bridge_bpf_output:
7431  *
7432  *	Invoke the output BPF callback if enabled
7433  */
7434 static errno_t
7435 bridge_bpf_output(ifnet_t ifp, struct mbuf *m)
7436 {
7437 	struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7438 	bpf_packet_func     output_func = sc->sc_bpf_output;
7439 
7440 	if (output_func != NULL) {
7441 		(*output_func)(ifp, m);
7442 	}
7443 	return 0;
7444 }
7445 
7446 /*
7447  * bridge_link_event:
7448  *
7449  *	Report a data link event on an interface
7450  */
7451 static void
7452 bridge_link_event(struct ifnet *ifp, u_int32_t event_code)
7453 {
7454 	struct event {
7455 		u_int32_t ifnet_family;
7456 		u_int32_t unit;
7457 		char if_name[IFNAMSIZ];
7458 	};
7459 	_Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
7460 	struct kern_event_msg *header = (struct kern_event_msg*)message;
7461 	struct event *data = (struct event *)(header + 1);
7462 
7463 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
7464 	    "%s event_code %u - %s", ifp->if_xname,
7465 	    event_code, dlil_kev_dl_code_str(event_code));
7466 	header->total_size   = sizeof(message);
7467 	header->vendor_code  = KEV_VENDOR_APPLE;
7468 	header->kev_class    = KEV_NETWORK_CLASS;
7469 	header->kev_subclass = KEV_DL_SUBCLASS;
7470 	header->event_code   = event_code;
7471 	data->ifnet_family   = ifnet_family(ifp);
7472 	data->unit           = (u_int32_t)ifnet_unit(ifp);
7473 	strlcpy(data->if_name, ifnet_name(ifp), IFNAMSIZ);
7474 	ifnet_event(ifp, header);
7475 }
7476 
7477 #define BRIDGE_HF_DROP(reason, func, line) {                            \
7478 	        bridge_hostfilter_stats.reason++;                       \
7479 	        BRIDGE_LOG(LOG_DEBUG, BR_DBGF_HOSTFILTER,               \
7480 	                   "%s.%d" #reason, func, line);                \
7481 	        error = EINVAL;                                         \
7482 	}
7483 
7484 /*
7485  * Make sure this is a DHCP or Bootp request that match the host filter
7486  */
7487 static int
7488 bridge_dhcp_filter(struct bridge_iflist *bif, struct mbuf *m, size_t offset)
7489 {
7490 	int error = EINVAL;
7491 	struct dhcp dhcp;
7492 
7493 	/*
7494 	 * Note: We use the dhcp structure because bootp structure definition
7495 	 * is larger and some vendors do not pad the request
7496 	 */
7497 	error = mbuf_copydata(m, offset, sizeof(struct dhcp), &dhcp);
7498 	if (error != 0) {
7499 		BRIDGE_HF_DROP(brhf_dhcp_too_small, __func__, __LINE__);
7500 		goto done;
7501 	}
7502 	if (dhcp.dp_op != BOOTREQUEST) {
7503 		BRIDGE_HF_DROP(brhf_dhcp_bad_op, __func__, __LINE__);
7504 		goto done;
7505 	}
7506 	/*
7507 	 * The hardware address must be an exact match
7508 	 */
7509 	if (dhcp.dp_htype != ARPHRD_ETHER) {
7510 		BRIDGE_HF_DROP(brhf_dhcp_bad_htype, __func__, __LINE__);
7511 		goto done;
7512 	}
7513 	if (dhcp.dp_hlen != ETHER_ADDR_LEN) {
7514 		BRIDGE_HF_DROP(brhf_dhcp_bad_hlen, __func__, __LINE__);
7515 		goto done;
7516 	}
7517 	if (bcmp(dhcp.dp_chaddr, bif->bif_hf_hwsrc,
7518 	    ETHER_ADDR_LEN) != 0) {
7519 		BRIDGE_HF_DROP(brhf_dhcp_bad_chaddr, __func__, __LINE__);
7520 		goto done;
7521 	}
7522 	/*
7523 	 * Client address must match the host address or be not specified
7524 	 */
7525 	if (dhcp.dp_ciaddr.s_addr != bif->bif_hf_ipsrc.s_addr &&
7526 	    dhcp.dp_ciaddr.s_addr != INADDR_ANY) {
7527 		BRIDGE_HF_DROP(brhf_dhcp_bad_ciaddr, __func__, __LINE__);
7528 		goto done;
7529 	}
7530 	error = 0;
7531 done:
7532 	return error;
7533 }
7534 
7535 static int
7536 bridge_host_filter(struct bridge_iflist *bif, mbuf_t *data)
7537 {
7538 	int error = EINVAL;
7539 	struct ether_header *eh;
7540 	static struct in_addr inaddr_any = { .s_addr = INADDR_ANY };
7541 	mbuf_t m = *data;
7542 
7543 	eh = mtod(m, struct ether_header *);
7544 
7545 	/*
7546 	 * Restrict the source hardware address
7547 	 */
7548 	if ((bif->bif_flags & BIFF_HF_HWSRC) != 0 &&
7549 	    bcmp(eh->ether_shost, bif->bif_hf_hwsrc,
7550 	    ETHER_ADDR_LEN) != 0) {
7551 		BRIDGE_HF_DROP(brhf_bad_ether_srchw_addr, __func__, __LINE__);
7552 		goto done;
7553 	}
7554 
7555 	/*
7556 	 * Restrict Ethernet protocols to ARP and IP/IPv6
7557 	 */
7558 	if (eh->ether_type == htons(ETHERTYPE_ARP)) {
7559 		struct ether_arp *ea;
7560 		size_t minlen = sizeof(struct ether_header) +
7561 		    sizeof(struct ether_arp);
7562 
7563 		/*
7564 		 * Make the Ethernet and ARP headers contiguous
7565 		 */
7566 		if (mbuf_pkthdr_len(m) < minlen) {
7567 			BRIDGE_HF_DROP(brhf_arp_too_small, __func__, __LINE__);
7568 			goto done;
7569 		}
7570 		if (mbuf_len(m) < minlen && mbuf_pullup(data, minlen) != 0) {
7571 			BRIDGE_HF_DROP(brhf_arp_pullup_failed,
7572 			    __func__, __LINE__);
7573 			goto done;
7574 		}
7575 		m = *data;
7576 
7577 		/*
7578 		 * Verify this is an ethernet/ip arp
7579 		 */
7580 		eh = mtod(m, struct ether_header *);
7581 		ea = (struct ether_arp *)(eh + 1);
7582 		if (ea->arp_hrd != htons(ARPHRD_ETHER)) {
7583 			BRIDGE_HF_DROP(brhf_arp_bad_hw_type,
7584 			    __func__, __LINE__);
7585 			goto done;
7586 		}
7587 		if (ea->arp_pro != htons(ETHERTYPE_IP)) {
7588 			BRIDGE_HF_DROP(brhf_arp_bad_pro_type,
7589 			    __func__, __LINE__);
7590 			goto done;
7591 		}
7592 		/*
7593 		 * Verify the address lengths are correct
7594 		 */
7595 		if (ea->arp_hln != ETHER_ADDR_LEN) {
7596 			BRIDGE_HF_DROP(brhf_arp_bad_hw_len, __func__, __LINE__);
7597 			goto done;
7598 		}
7599 		if (ea->arp_pln != sizeof(struct in_addr)) {
7600 			BRIDGE_HF_DROP(brhf_arp_bad_pro_len,
7601 			    __func__, __LINE__);
7602 			goto done;
7603 		}
7604 		/*
7605 		 * Allow only ARP request or ARP reply
7606 		 */
7607 		if (ea->arp_op != htons(ARPOP_REQUEST) &&
7608 		    ea->arp_op != htons(ARPOP_REPLY)) {
7609 			BRIDGE_HF_DROP(brhf_arp_bad_op, __func__, __LINE__);
7610 			goto done;
7611 		}
7612 		if ((bif->bif_flags & BIFF_HF_HWSRC) != 0) {
7613 			/*
7614 			 * Verify source hardware address matches
7615 			 */
7616 			if (bcmp(ea->arp_sha, bif->bif_hf_hwsrc,
7617 			    ETHER_ADDR_LEN) != 0) {
7618 				BRIDGE_HF_DROP(brhf_arp_bad_sha, __func__, __LINE__);
7619 				goto done;
7620 			}
7621 		}
7622 		if ((bif->bif_flags & BIFF_HF_IPSRC) != 0) {
7623 			/*
7624 			 * Verify source protocol address:
7625 			 * May be null for an ARP probe
7626 			 */
7627 			if (bcmp(ea->arp_spa, &bif->bif_hf_ipsrc.s_addr,
7628 			    sizeof(struct in_addr)) != 0 &&
7629 			    bcmp(ea->arp_spa, &inaddr_any,
7630 			    sizeof(struct in_addr)) != 0) {
7631 				BRIDGE_HF_DROP(brhf_arp_bad_spa, __func__, __LINE__);
7632 				goto done;
7633 			}
7634 		}
7635 		bridge_hostfilter_stats.brhf_arp_ok += 1;
7636 		error = 0;
7637 	} else if (eh->ether_type == htons(ETHERTYPE_IP)) {
7638 		size_t minlen = sizeof(struct ether_header) + sizeof(struct ip);
7639 		struct ip iphdr;
7640 		size_t offset;
7641 
7642 		/*
7643 		 * Make the Ethernet and IP headers contiguous
7644 		 */
7645 		if (mbuf_pkthdr_len(m) < minlen) {
7646 			BRIDGE_HF_DROP(brhf_ip_too_small, __func__, __LINE__);
7647 			goto done;
7648 		}
7649 		offset = sizeof(struct ether_header);
7650 		error = mbuf_copydata(m, offset, sizeof(struct ip), &iphdr);
7651 		if (error != 0) {
7652 			BRIDGE_HF_DROP(brhf_ip_too_small, __func__, __LINE__);
7653 			goto done;
7654 		}
7655 		if ((bif->bif_flags & BIFF_HF_IPSRC) != 0) {
7656 			/*
7657 			 * Verify the source IP address
7658 			 */
7659 			if (iphdr.ip_p == IPPROTO_UDP) {
7660 				struct udphdr udp;
7661 
7662 				minlen += sizeof(struct udphdr);
7663 				if (mbuf_pkthdr_len(m) < minlen) {
7664 					BRIDGE_HF_DROP(brhf_ip_too_small,
7665 					    __func__, __LINE__);
7666 					goto done;
7667 				}
7668 
7669 				/*
7670 				 * Allow all zero addresses for DHCP requests
7671 				 */
7672 				if (iphdr.ip_src.s_addr != bif->bif_hf_ipsrc.s_addr &&
7673 				    iphdr.ip_src.s_addr != INADDR_ANY) {
7674 					BRIDGE_HF_DROP(brhf_ip_bad_srcaddr,
7675 					    __func__, __LINE__);
7676 					goto done;
7677 				}
7678 				offset = sizeof(struct ether_header) +
7679 				    (IP_VHL_HL(iphdr.ip_vhl) << 2);
7680 				error = mbuf_copydata(m, offset,
7681 				    sizeof(struct udphdr), &udp);
7682 				if (error != 0) {
7683 					BRIDGE_HF_DROP(brhf_ip_too_small,
7684 					    __func__, __LINE__);
7685 					goto done;
7686 				}
7687 				/*
7688 				 * Either it's a Bootp/DHCP packet that we like or
7689 				 * it's a UDP packet from the host IP as source address
7690 				 */
7691 				if (udp.uh_sport == htons(IPPORT_BOOTPC) &&
7692 				    udp.uh_dport == htons(IPPORT_BOOTPS)) {
7693 					minlen += sizeof(struct dhcp);
7694 					if (mbuf_pkthdr_len(m) < minlen) {
7695 						BRIDGE_HF_DROP(brhf_ip_too_small,
7696 						    __func__, __LINE__);
7697 						goto done;
7698 					}
7699 					offset += sizeof(struct udphdr);
7700 					error = bridge_dhcp_filter(bif, m, offset);
7701 					if (error != 0) {
7702 						goto done;
7703 					}
7704 				} else if (iphdr.ip_src.s_addr == INADDR_ANY) {
7705 					BRIDGE_HF_DROP(brhf_ip_bad_srcaddr,
7706 					    __func__, __LINE__);
7707 					goto done;
7708 				}
7709 			} else if (iphdr.ip_src.s_addr != bif->bif_hf_ipsrc.s_addr) {
7710 				assert(bif->bif_hf_ipsrc.s_addr != INADDR_ANY);
7711 				BRIDGE_HF_DROP(brhf_ip_bad_srcaddr, __func__, __LINE__);
7712 				goto done;
7713 			}
7714 		}
7715 		/*
7716 		 * Allow only boring IP protocols
7717 		 */
7718 		if (iphdr.ip_p != IPPROTO_TCP &&
7719 		    iphdr.ip_p != IPPROTO_UDP &&
7720 		    iphdr.ip_p != IPPROTO_ICMP &&
7721 		    iphdr.ip_p != IPPROTO_IGMP) {
7722 			BRIDGE_HF_DROP(brhf_ip_bad_proto, __func__, __LINE__);
7723 			goto done;
7724 		}
7725 		bridge_hostfilter_stats.brhf_ip_ok += 1;
7726 		error = 0;
7727 	} else if (eh->ether_type == htons(ETHERTYPE_IPV6)) {
7728 		size_t minlen = sizeof(struct ether_header) + sizeof(struct ip6_hdr);
7729 		struct ip6_hdr ip6hdr;
7730 		size_t offset;
7731 
7732 		/*
7733 		 * Make the Ethernet and IP headers contiguous
7734 		 */
7735 		if (mbuf_pkthdr_len(m) < minlen) {
7736 			BRIDGE_HF_DROP(brhf_ip_too_small, __func__, __LINE__);
7737 			goto done;
7738 		}
7739 		offset = sizeof(struct ether_header);
7740 		error = mbuf_copydata(m, offset, sizeof(struct ip6_hdr), &ip6hdr);
7741 		if (error != 0) {
7742 			BRIDGE_HF_DROP(brhf_ip_too_small, __func__, __LINE__);
7743 			goto done;
7744 		}
7745 		/*
7746 		 * Allow only boring IPv6 protocols
7747 		 */
7748 		if (ip6hdr.ip6_nxt != IPPROTO_TCP &&
7749 		    ip6hdr.ip6_nxt != IPPROTO_UDP &&
7750 		    ip6hdr.ip6_nxt != IPPROTO_ICMPV6) {
7751 			BRIDGE_HF_DROP(brhf_ip_bad_proto, __func__, __LINE__);
7752 			goto done;
7753 		}
7754 		bridge_hostfilter_stats.brhf_ip_ok += 1;
7755 		error = 0;
7756 	} else {
7757 		BRIDGE_HF_DROP(brhf_bad_ether_type, __func__, __LINE__);
7758 		goto done;
7759 	}
7760 done:
7761 	if (error != 0) {
7762 		if (BRIDGE_DBGF_ENABLED(BR_DBGF_HOSTFILTER)) {
7763 			if (m) {
7764 				brlog_mbuf_data(m, 0,
7765 				    sizeof(struct ether_header) +
7766 				    sizeof(struct ip));
7767 			}
7768 		}
7769 
7770 		if (m != NULL) {
7771 			m_freem(m);
7772 		}
7773 	}
7774 	return error;
7775 }
7776 
7777 /*
7778  * MAC NAT
7779  */
7780 
7781 static errno_t
7782 bridge_mac_nat_enable(struct bridge_softc *sc, struct bridge_iflist *bif)
7783 {
7784 	errno_t         error = 0;
7785 
7786 	BRIDGE_LOCK_ASSERT_HELD(sc);
7787 
7788 	if (IFNET_IS_VMNET(bif->bif_ifp)) {
7789 		error = EINVAL;
7790 		goto done;
7791 	}
7792 	if (sc->sc_mac_nat_bif != NULL) {
7793 		if (sc->sc_mac_nat_bif != bif) {
7794 			error = EBUSY;
7795 		}
7796 		goto done;
7797 	}
7798 	sc->sc_mac_nat_bif = bif;
7799 	bif->bif_ifflags |= IFBIF_MAC_NAT;
7800 	bridge_mac_nat_populate_entries(sc);
7801 
7802 done:
7803 	return error;
7804 }
7805 
7806 static void
7807 bridge_mac_nat_disable(struct bridge_softc *sc)
7808 {
7809 	struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7810 
7811 	assert(mac_nat_bif != NULL);
7812 	bridge_mac_nat_flush_entries(sc, mac_nat_bif);
7813 	mac_nat_bif->bif_ifflags &= ~IFBIF_MAC_NAT;
7814 	sc->sc_mac_nat_bif = NULL;
7815 	return;
7816 }
7817 
7818 static void
7819 mac_nat_entry_print2(struct mac_nat_entry *mne,
7820     char *ifname, const char *msg1, const char *msg2)
7821 {
7822 	int             af;
7823 	char            etopbuf[24];
7824 	char            ntopbuf[MAX_IPv6_STR_LEN];
7825 	const char      *space;
7826 
7827 	af = ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) ? AF_INET6 : AF_INET;
7828 	ether_ntop(etopbuf, sizeof(etopbuf), mne->mne_mac);
7829 	(void)inet_ntop(af, &mne->mne_u, ntopbuf, sizeof(ntopbuf));
7830 	if (msg2 == NULL) {
7831 		msg2 = "";
7832 		space = "";
7833 	} else {
7834 		space = " ";
7835 	}
7836 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7837 	    "%s %s%s%s %p (%s, %s, %s)",
7838 	    ifname, msg1, space, msg2, mne, mne->mne_bif->bif_ifp->if_xname,
7839 	    ntopbuf, etopbuf);
7840 }
7841 
7842 static void
7843 mac_nat_entry_print(struct mac_nat_entry *mne,
7844     char *ifname, const char *msg)
7845 {
7846 	mac_nat_entry_print2(mne, ifname, msg, NULL);
7847 }
7848 
7849 static struct mac_nat_entry *
7850 bridge_lookup_mac_nat_entry(struct bridge_softc *sc, int af, void * ip)
7851 {
7852 	struct mac_nat_entry    *mne;
7853 	struct mac_nat_entry    *ret_mne = NULL;
7854 
7855 	if (af == AF_INET) {
7856 		in_addr_t s_addr = ((struct in_addr *)ip)->s_addr;
7857 
7858 		LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
7859 			if (mne->mne_ip.s_addr == s_addr) {
7860 				if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7861 					mac_nat_entry_print(mne, sc->sc_if_xname,
7862 					    "found");
7863 				}
7864 				ret_mne = mne;
7865 				break;
7866 			}
7867 		}
7868 	} else {
7869 		const struct in6_addr *ip6 = (const struct in6_addr *)ip;
7870 
7871 		LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
7872 			if (IN6_ARE_ADDR_EQUAL(&mne->mne_ip6, ip6)) {
7873 				if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7874 					mac_nat_entry_print(mne, sc->sc_if_xname,
7875 					    "found");
7876 				}
7877 				ret_mne = mne;
7878 				break;
7879 			}
7880 		}
7881 	}
7882 	return ret_mne;
7883 }
7884 
7885 static void
7886 bridge_destroy_mac_nat_entry(struct bridge_softc *sc,
7887     struct mac_nat_entry *mne, const char *reason)
7888 {
7889 	LIST_REMOVE(mne, mne_list);
7890 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7891 		mac_nat_entry_print(mne, sc->sc_if_xname, reason);
7892 	}
7893 	zfree(bridge_mne_pool, mne);
7894 	sc->sc_mne_count--;
7895 }
7896 
7897 static struct mac_nat_entry *
7898 bridge_create_mac_nat_entry(struct bridge_softc *sc,
7899     struct bridge_iflist *bif, int af, const void *ip, uint8_t *eaddr)
7900 {
7901 	struct mac_nat_entry_list *list;
7902 	struct mac_nat_entry *mne;
7903 
7904 	if (sc->sc_mne_count >= sc->sc_mne_max) {
7905 		sc->sc_mne_allocation_failures++;
7906 		return NULL;
7907 	}
7908 	mne = zalloc_noblock(bridge_mne_pool);
7909 	if (mne == NULL) {
7910 		sc->sc_mne_allocation_failures++;
7911 		return NULL;
7912 	}
7913 	sc->sc_mne_count++;
7914 	bzero(mne, sizeof(*mne));
7915 	bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
7916 	mne->mne_bif = bif;
7917 	if (af == AF_INET) {
7918 		bcopy(ip, &mne->mne_ip, sizeof(mne->mne_ip));
7919 		list = &sc->sc_mne_list;
7920 	} else {
7921 		bcopy(ip, &mne->mne_ip6, sizeof(mne->mne_ip6));
7922 		mne->mne_flags |= MNE_FLAGS_IPV6;
7923 		list = &sc->sc_mne_list_v6;
7924 	}
7925 	LIST_INSERT_HEAD(list, mne, mne_list);
7926 	mne->mne_expire = (unsigned long)net_uptime() + sc->sc_brttimeout;
7927 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7928 		mac_nat_entry_print(mne, sc->sc_if_xname, "created");
7929 	}
7930 	return mne;
7931 }
7932 
7933 static struct mac_nat_entry *
7934 bridge_update_mac_nat_entry(struct bridge_softc *sc,
7935     struct bridge_iflist *bif, int af, void *ip, uint8_t *eaddr)
7936 {
7937 	struct mac_nat_entry *mne;
7938 
7939 	mne = bridge_lookup_mac_nat_entry(sc, af, ip);
7940 	if (mne != NULL) {
7941 		struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7942 
7943 		if (mne->mne_bif == mac_nat_bif) {
7944 			/* the MAC NAT interface takes precedence */
7945 			if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7946 				if (mne->mne_bif != bif) {
7947 					mac_nat_entry_print2(mne,
7948 					    sc->sc_if_xname, "reject",
7949 					    bif->bif_ifp->if_xname);
7950 				}
7951 			}
7952 		} else if (mne->mne_bif != bif) {
7953 			const char *old_if = mne->mne_bif->bif_ifp->if_xname;
7954 
7955 			mne->mne_bif = bif;
7956 			if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7957 				mac_nat_entry_print2(mne,
7958 				    sc->sc_if_xname, "replaced",
7959 				    old_if);
7960 			}
7961 			bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
7962 		}
7963 		mne->mne_expire = (unsigned long)net_uptime() +
7964 		    sc->sc_brttimeout;
7965 	} else {
7966 		mne = bridge_create_mac_nat_entry(sc, bif, af, ip, eaddr);
7967 	}
7968 	return mne;
7969 }
7970 
7971 static void
7972 bridge_mac_nat_flush_entries_common(struct bridge_softc *sc,
7973     struct mac_nat_entry_list *list, struct bridge_iflist *bif)
7974 {
7975 	struct mac_nat_entry *mne;
7976 	struct mac_nat_entry *tmne;
7977 
7978 	LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
7979 		if (bif != NULL && mne->mne_bif != bif) {
7980 			continue;
7981 		}
7982 		bridge_destroy_mac_nat_entry(sc, mne, "flushed");
7983 	}
7984 }
7985 
7986 /*
7987  * bridge_mac_nat_flush_entries:
7988  *
7989  * Flush MAC NAT entries for the specified member. Flush all entries if
7990  * the member is the one that requires MAC NAT, otherwise just flush the
7991  * ones for the specified member.
7992  */
7993 static void
7994 bridge_mac_nat_flush_entries(struct bridge_softc *sc, struct bridge_iflist * bif)
7995 {
7996 	struct bridge_iflist *flush_bif;
7997 
7998 	flush_bif = (bif == sc->sc_mac_nat_bif) ? NULL : bif;
7999 	bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list, flush_bif);
8000 	bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list_v6, flush_bif);
8001 }
8002 
8003 static void
8004 bridge_mac_nat_populate_entries(struct bridge_softc *sc)
8005 {
8006 	errno_t                 error;
8007 	ifnet_t                 ifp;
8008 	ifaddr_t                *list;
8009 	struct bridge_iflist    *mac_nat_bif = sc->sc_mac_nat_bif;
8010 
8011 	assert(mac_nat_bif != NULL);
8012 	ifp = mac_nat_bif->bif_ifp;
8013 	error = ifnet_get_address_list(ifp, &list);
8014 	if (error != 0) {
8015 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8016 		    "ifnet_get_address_list(%s) failed %d",
8017 		    ifp->if_xname, error);
8018 		return;
8019 	}
8020 	for (ifaddr_t *scan = list; *scan != NULL; scan++) {
8021 		sa_family_t     af;
8022 		void            *ip;
8023 
8024 		union {
8025 			struct sockaddr         sa;
8026 			struct sockaddr_in      sin;
8027 			struct sockaddr_in6     sin6;
8028 		} u;
8029 		af = ifaddr_address_family(*scan);
8030 		switch (af) {
8031 		case AF_INET:
8032 		case AF_INET6:
8033 			error = ifaddr_address(*scan, &u.sa, sizeof(u));
8034 			if (error != 0) {
8035 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8036 				    "ifaddr_address failed %d",
8037 				    error);
8038 				break;
8039 			}
8040 			if (af == AF_INET) {
8041 				ip = (void *)&u.sin.sin_addr;
8042 			} else {
8043 				if (IN6_IS_ADDR_LINKLOCAL(&u.sin6.sin6_addr)) {
8044 					/* remove scope ID */
8045 					u.sin6.sin6_addr.s6_addr16[1] = 0;
8046 				}
8047 				ip = (void *)&u.sin6.sin6_addr;
8048 			}
8049 			bridge_create_mac_nat_entry(sc, mac_nat_bif, af, ip,
8050 			    (uint8_t *)IF_LLADDR(ifp));
8051 			break;
8052 		default:
8053 			break;
8054 		}
8055 	}
8056 	ifnet_free_address_list(list);
8057 	return;
8058 }
8059 
8060 static void
8061 bridge_mac_nat_age_entries_common(struct bridge_softc *sc,
8062     struct mac_nat_entry_list *list, unsigned long now)
8063 {
8064 	struct mac_nat_entry *mne;
8065 	struct mac_nat_entry *tmne;
8066 
8067 	LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
8068 		if (now >= mne->mne_expire) {
8069 			bridge_destroy_mac_nat_entry(sc, mne, "aged out");
8070 		}
8071 	}
8072 }
8073 
8074 static void
8075 bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long now)
8076 {
8077 	if (sc->sc_mac_nat_bif == NULL) {
8078 		return;
8079 	}
8080 	bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list, now);
8081 	bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list_v6, now);
8082 }
8083 
8084 static const char *
8085 get_in_out_string(boolean_t is_output)
8086 {
8087 	return is_output ? "OUT" : "IN";
8088 }
8089 
8090 /*
8091  * is_valid_arp_packet:
8092  *	Verify that this is a valid ARP packet.
8093  *
8094  *	Returns TRUE if the packet is valid, FALSE otherwise.
8095  */
8096 static boolean_t
8097 is_valid_arp_packet(mbuf_t *data, boolean_t is_output,
8098     struct ether_header **eh_p, struct ether_arp **ea_p)
8099 {
8100 	struct ether_arp *ea;
8101 	struct ether_header *eh;
8102 	size_t minlen = sizeof(struct ether_header) + sizeof(struct ether_arp);
8103 	boolean_t is_valid = FALSE;
8104 	int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
8105 
8106 	if (mbuf_pkthdr_len(*data) < minlen) {
8107 		BRIDGE_LOG(LOG_DEBUG, flags,
8108 		    "ARP %s short frame %lu < %lu",
8109 		    get_in_out_string(is_output),
8110 		    mbuf_pkthdr_len(*data), minlen);
8111 		goto done;
8112 	}
8113 	if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
8114 		BRIDGE_LOG(LOG_DEBUG, flags,
8115 		    "ARP %s size %lu mbuf_pullup fail",
8116 		    get_in_out_string(is_output),
8117 		    minlen);
8118 		*data = NULL;
8119 		goto done;
8120 	}
8121 
8122 	/* validate ARP packet */
8123 	eh = mtod(*data, struct ether_header *);
8124 	ea = (struct ether_arp *)(eh + 1);
8125 	if (ntohs(ea->arp_hrd) != ARPHRD_ETHER) {
8126 		BRIDGE_LOG(LOG_DEBUG, flags,
8127 		    "ARP %s htype not ethernet",
8128 		    get_in_out_string(is_output));
8129 		goto done;
8130 	}
8131 	if (ea->arp_hln != ETHER_ADDR_LEN) {
8132 		BRIDGE_LOG(LOG_DEBUG, flags,
8133 		    "ARP %s hlen not ethernet",
8134 		    get_in_out_string(is_output));
8135 		goto done;
8136 	}
8137 	if (ntohs(ea->arp_pro) != ETHERTYPE_IP) {
8138 		BRIDGE_LOG(LOG_DEBUG, flags,
8139 		    "ARP %s ptype not IP",
8140 		    get_in_out_string(is_output));
8141 		goto done;
8142 	}
8143 	if (ea->arp_pln != sizeof(struct in_addr)) {
8144 		BRIDGE_LOG(LOG_DEBUG, flags,
8145 		    "ARP %s plen not IP",
8146 		    get_in_out_string(is_output));
8147 		goto done;
8148 	}
8149 	is_valid = TRUE;
8150 	*ea_p = ea;
8151 	*eh_p = eh;
8152 done:
8153 	return is_valid;
8154 }
8155 
8156 static struct mac_nat_entry *
8157 bridge_mac_nat_arp_input(struct bridge_softc *sc, mbuf_t *data)
8158 {
8159 	struct ether_arp        *ea;
8160 	struct ether_header     *eh;
8161 	struct mac_nat_entry    *mne = NULL;
8162 	u_short                 op;
8163 	struct in_addr          tpa;
8164 
8165 	if (!is_valid_arp_packet(data, FALSE, &eh, &ea)) {
8166 		goto done;
8167 	}
8168 	op = ntohs(ea->arp_op);
8169 	switch (op) {
8170 	case ARPOP_REQUEST:
8171 	case ARPOP_REPLY:
8172 		/* only care about REQUEST and REPLY */
8173 		break;
8174 	default:
8175 		goto done;
8176 	}
8177 
8178 	/* check the target IP address for a NAT entry */
8179 	bcopy(ea->arp_tpa, &tpa, sizeof(tpa));
8180 	if (tpa.s_addr != 0) {
8181 		mne = bridge_lookup_mac_nat_entry(sc, AF_INET, &tpa);
8182 	}
8183 	if (mne != NULL) {
8184 		if (op == ARPOP_REPLY) {
8185 			/* translate the MAC address */
8186 			if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
8187 				char    mac_src[24];
8188 				char    mac_dst[24];
8189 
8190 				ether_ntop(mac_src, sizeof(mac_src),
8191 				    ea->arp_tha);
8192 				ether_ntop(mac_dst, sizeof(mac_dst),
8193 				    mne->mne_mac);
8194 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8195 				    "%s %s ARP %s -> %s",
8196 				    sc->sc_if_xname,
8197 				    mne->mne_bif->bif_ifp->if_xname,
8198 				    mac_src, mac_dst);
8199 			}
8200 			bcopy(mne->mne_mac, ea->arp_tha, sizeof(ea->arp_tha));
8201 		}
8202 	} else {
8203 		/* handle conflicting ARP (sender matches mne) */
8204 		struct in_addr spa;
8205 
8206 		bcopy(ea->arp_spa, &spa, sizeof(spa));
8207 		if (spa.s_addr != 0 && spa.s_addr != tpa.s_addr) {
8208 			/* check the source IP for a NAT entry */
8209 			mne = bridge_lookup_mac_nat_entry(sc, AF_INET, &spa);
8210 		}
8211 	}
8212 
8213 done:
8214 	return mne;
8215 }
8216 
8217 static boolean_t
8218 bridge_mac_nat_arp_output(struct bridge_softc *sc,
8219     struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8220 {
8221 	struct ether_arp        *ea;
8222 	struct ether_header     *eh;
8223 	struct in_addr          ip;
8224 	struct mac_nat_entry    *mne = NULL;
8225 	u_short                 op;
8226 	boolean_t               translate = FALSE;
8227 
8228 	if (!is_valid_arp_packet(data, TRUE, &eh, &ea)) {
8229 		goto done;
8230 	}
8231 	op = ntohs(ea->arp_op);
8232 	switch (op) {
8233 	case ARPOP_REQUEST:
8234 	case ARPOP_REPLY:
8235 		/* only care about REQUEST and REPLY */
8236 		break;
8237 	default:
8238 		goto done;
8239 	}
8240 
8241 	bcopy(ea->arp_spa, &ip, sizeof(ip));
8242 	if (ip.s_addr == 0) {
8243 		goto done;
8244 	}
8245 	/* XXX validate IP address: no multicast/broadcast */
8246 	mne = bridge_update_mac_nat_entry(sc, bif, AF_INET, &ip, ea->arp_sha);
8247 	if (mnr != NULL && mne != NULL) {
8248 		/* record the offset to do the replacement */
8249 		translate = TRUE;
8250 		mnr->mnr_arp_offset = (char *)ea->arp_sha - (char *)eh;
8251 	}
8252 
8253 done:
8254 	return translate;
8255 }
8256 
8257 #define ETHER_IPV4_HEADER_LEN   (sizeof(struct ether_header) +  \
8258 	                         + sizeof(struct ip))
8259 static struct ether_header *
8260 get_ether_ip_header(mbuf_t *data, boolean_t is_output)
8261 {
8262 	struct ether_header     *eh = NULL;
8263 	int             flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
8264 	size_t          minlen = ETHER_IPV4_HEADER_LEN;
8265 
8266 	if (mbuf_pkthdr_len(*data) < minlen) {
8267 		BRIDGE_LOG(LOG_DEBUG, flags,
8268 		    "IP %s short frame %lu < %lu",
8269 		    get_in_out_string(is_output),
8270 		    mbuf_pkthdr_len(*data), minlen);
8271 		goto done;
8272 	}
8273 	if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
8274 		BRIDGE_LOG(LOG_DEBUG, flags,
8275 		    "IP %s size %lu mbuf_pullup fail",
8276 		    get_in_out_string(is_output),
8277 		    minlen);
8278 		*data = NULL;
8279 		goto done;
8280 	}
8281 	eh = mtod(*data, struct ether_header *);
8282 done:
8283 	return eh;
8284 }
8285 
8286 static bool
8287 is_broadcast_ip_packet(mbuf_t *data)
8288 {
8289 	struct ether_header     *eh;
8290 	uint16_t                ether_type;
8291 	bool                    is_broadcast = FALSE;
8292 
8293 	eh = mtod(*data, struct ether_header *);
8294 	ether_type = ntohs(eh->ether_type);
8295 	switch (ether_type) {
8296 	case ETHERTYPE_IP:
8297 		eh = get_ether_ip_header(data, FALSE);
8298 		if (eh != NULL) {
8299 			struct in_addr  dst;
8300 			struct ip       *iphdr;
8301 
8302 			iphdr = (struct ip *)(void *)(eh + 1);
8303 			bcopy(&iphdr->ip_dst, &dst, sizeof(dst));
8304 			is_broadcast = (dst.s_addr == INADDR_BROADCAST);
8305 		}
8306 		break;
8307 	default:
8308 		break;
8309 	}
8310 	return is_broadcast;
8311 }
8312 
8313 static struct mac_nat_entry *
8314 bridge_mac_nat_ip_input(struct bridge_softc *sc, mbuf_t *data)
8315 {
8316 	struct in_addr          dst;
8317 	struct ether_header     *eh;
8318 	struct ip               *iphdr;
8319 	struct mac_nat_entry    *mne = NULL;
8320 
8321 	eh = get_ether_ip_header(data, FALSE);
8322 	if (eh == NULL) {
8323 		goto done;
8324 	}
8325 	iphdr = (struct ip *)(void *)(eh + 1);
8326 	bcopy(&iphdr->ip_dst, &dst, sizeof(dst));
8327 	/* XXX validate IP address */
8328 	if (dst.s_addr == 0) {
8329 		goto done;
8330 	}
8331 	mne = bridge_lookup_mac_nat_entry(sc, AF_INET, &dst);
8332 done:
8333 	return mne;
8334 }
8335 
8336 static void
8337 bridge_mac_nat_udp_output(struct bridge_softc *sc,
8338     struct bridge_iflist *bif, mbuf_t m,
8339     uint8_t ip_header_len, struct mac_nat_record *mnr)
8340 {
8341 	uint16_t        dp_flags;
8342 	errno_t         error;
8343 	size_t          offset;
8344 	struct udphdr   udphdr;
8345 
8346 	/* copy the UDP header */
8347 	offset = sizeof(struct ether_header) + ip_header_len;
8348 	error = mbuf_copydata(m, offset, sizeof(struct udphdr), &udphdr);
8349 	if (error != 0) {
8350 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8351 		    "mbuf_copydata udphdr failed %d",
8352 		    error);
8353 		return;
8354 	}
8355 	if (ntohs(udphdr.uh_sport) != IPPORT_BOOTPC ||
8356 	    ntohs(udphdr.uh_dport) != IPPORT_BOOTPS) {
8357 		/* not a BOOTP/DHCP packet */
8358 		return;
8359 	}
8360 	/* check whether the broadcast bit is already set */
8361 	offset += sizeof(struct udphdr) + offsetof(struct dhcp, dp_flags);
8362 	error = mbuf_copydata(m, offset, sizeof(dp_flags), &dp_flags);
8363 	if (error != 0) {
8364 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8365 		    "mbuf_copydata dp_flags failed %d",
8366 		    error);
8367 		return;
8368 	}
8369 	if ((ntohs(dp_flags) & DHCP_FLAGS_BROADCAST) != 0) {
8370 		/* it's already set, nothing to do */
8371 		return;
8372 	}
8373 	/* broadcast bit needs to be set */
8374 	mnr->mnr_ip_dhcp_flags = dp_flags | htons(DHCP_FLAGS_BROADCAST);
8375 	mnr->mnr_ip_header_len = ip_header_len;
8376 	if (udphdr.uh_sum != 0) {
8377 		uint16_t        delta;
8378 
8379 		/* adjust checksum to take modified dp_flags into account */
8380 		delta = dp_flags - mnr->mnr_ip_dhcp_flags;
8381 		mnr->mnr_ip_udp_csum = udphdr.uh_sum + delta;
8382 	}
8383 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8384 	    "%s %s DHCP dp_flags 0x%x UDP cksum 0x%x",
8385 	    sc->sc_if_xname,
8386 	    bif->bif_ifp->if_xname,
8387 	    ntohs(mnr->mnr_ip_dhcp_flags),
8388 	    ntohs(mnr->mnr_ip_udp_csum));
8389 	return;
8390 }
8391 
8392 static boolean_t
8393 bridge_mac_nat_ip_output(struct bridge_softc *sc,
8394     struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8395 {
8396 #pragma unused(mnr)
8397 	struct ether_header     *eh;
8398 	struct in_addr          ip;
8399 	struct ip               *iphdr;
8400 	uint8_t                 ip_header_len;
8401 	struct mac_nat_entry    *mne = NULL;
8402 	boolean_t               translate = FALSE;
8403 
8404 	eh = get_ether_ip_header(data, TRUE);
8405 	if (eh == NULL) {
8406 		goto done;
8407 	}
8408 	iphdr = (struct ip *)(void *)(eh + 1);
8409 	ip_header_len = IP_VHL_HL(iphdr->ip_vhl) << 2;
8410 	if (ip_header_len < sizeof(ip)) {
8411 		/* bogus IP header */
8412 		goto done;
8413 	}
8414 	bcopy(&iphdr->ip_src, &ip, sizeof(ip));
8415 	/* XXX validate the source address */
8416 	if (ip.s_addr != 0) {
8417 		mne = bridge_update_mac_nat_entry(sc, bif, AF_INET, &ip,
8418 		    eh->ether_shost);
8419 	}
8420 	if (mnr != NULL) {
8421 		if (ip.s_addr == 0 && iphdr->ip_p == IPPROTO_UDP) {
8422 			/* handle DHCP must broadcast */
8423 			bridge_mac_nat_udp_output(sc, bif, *data,
8424 			    ip_header_len, mnr);
8425 		}
8426 		translate = TRUE;
8427 	}
8428 done:
8429 	return translate;
8430 }
8431 
8432 #define ETHER_IPV6_HEADER_LEN   (sizeof(struct ether_header) +  \
8433 	                         + sizeof(struct ip6_hdr))
8434 static struct ether_header *
8435 get_ether_ipv6_header(mbuf_t *data, size_t plen, boolean_t is_output)
8436 {
8437 	struct ether_header     *eh = NULL;
8438 	int             flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
8439 	size_t          minlen = ETHER_IPV6_HEADER_LEN + plen;
8440 
8441 	if (mbuf_pkthdr_len(*data) < minlen) {
8442 		BRIDGE_LOG(LOG_DEBUG, flags,
8443 		    "IP %s short frame %lu < %lu",
8444 		    get_in_out_string(is_output),
8445 		    mbuf_pkthdr_len(*data), minlen);
8446 		goto done;
8447 	}
8448 	if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
8449 		BRIDGE_LOG(LOG_DEBUG, flags,
8450 		    "IP %s size %lu mbuf_pullup fail",
8451 		    get_in_out_string(is_output),
8452 		    minlen);
8453 		*data = NULL;
8454 		goto done;
8455 	}
8456 	eh = mtod(*data, struct ether_header *);
8457 done:
8458 	return eh;
8459 }
8460 
8461 #include <netinet/icmp6.h>
8462 #include <netinet6/nd6.h>
8463 
8464 #define ETHER_ND_LLADDR_LEN     (ETHER_ADDR_LEN + sizeof(struct nd_opt_hdr))
8465 
8466 static void
8467 bridge_mac_nat_icmpv6_output(struct bridge_softc *sc,
8468     struct bridge_iflist *bif,
8469     mbuf_t *data, struct ip6_hdr *ip6h,
8470     struct in6_addr *saddrp,
8471     struct mac_nat_record *mnr)
8472 {
8473 	struct ether_header *eh;
8474 	struct icmp6_hdr *icmp6;
8475 	uint8_t         icmp6_type;
8476 	uint32_t        icmp6len;
8477 	int             lladdrlen = 0;
8478 	char            *lladdr = NULL;
8479 	unsigned int    off = sizeof(*ip6h);
8480 
8481 	icmp6len = (u_int32_t)ntohs(ip6h->ip6_plen);
8482 	if (icmp6len < sizeof(*icmp6)) {
8483 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8484 		    "short IPv6 payload length %d < %lu",
8485 		    icmp6len, sizeof(*icmp6));
8486 		return;
8487 	}
8488 
8489 	/* pullup IP6 header + ICMPv6 header */
8490 	eh = get_ether_ipv6_header(data, sizeof(*icmp6), TRUE);
8491 	if (eh == NULL) {
8492 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8493 		    "failed to pullup icmp6 header");
8494 		return;
8495 	}
8496 	ip6h = (struct ip6_hdr *)(void *)(eh + 1);
8497 	icmp6 = (struct icmp6_hdr *)((caddr_t)ip6h + off);
8498 	icmp6_type = icmp6->icmp6_type;
8499 	switch (icmp6_type) {
8500 	case ND_NEIGHBOR_SOLICIT:
8501 	case ND_NEIGHBOR_ADVERT:
8502 	case ND_ROUTER_ADVERT:
8503 	case ND_ROUTER_SOLICIT:
8504 		break;
8505 	default:
8506 		return;
8507 	}
8508 
8509 	/* pullup IP6 header + payload */
8510 	eh = get_ether_ipv6_header(data, icmp6len, TRUE);
8511 	if (eh == NULL) {
8512 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8513 		    "failed to pullup icmp6 + payload");
8514 		return;
8515 	}
8516 	ip6h = (struct ip6_hdr *)(void *)(eh + 1);
8517 	icmp6 = (struct icmp6_hdr *)((caddr_t)ip6h + off);
8518 	switch (icmp6_type) {
8519 	case ND_NEIGHBOR_SOLICIT: {
8520 		struct nd_neighbor_solicit *nd_ns;
8521 		union nd_opts ndopts;
8522 		boolean_t is_dad_probe;
8523 		struct in6_addr taddr;
8524 
8525 		if (icmp6len < sizeof(*nd_ns)) {
8526 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8527 			    "short nd_ns %d < %lu",
8528 			    icmp6len, sizeof(*nd_ns));
8529 			return;
8530 		}
8531 
8532 		nd_ns = (struct nd_neighbor_solicit *)(void *)icmp6;
8533 		bcopy(&nd_ns->nd_ns_target, &taddr, sizeof(taddr));
8534 		if (IN6_IS_ADDR_MULTICAST(&taddr) ||
8535 		    IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
8536 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8537 			    "invalid target ignored");
8538 			return;
8539 		}
8540 		/* parse options */
8541 		nd6_option_init(nd_ns + 1, icmp6len - sizeof(*nd_ns), &ndopts);
8542 		if (nd6_options(&ndopts) < 0) {
8543 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8544 			    "invalid ND6 NS option");
8545 			return;
8546 		}
8547 		if (ndopts.nd_opts_src_lladdr != NULL) {
8548 			lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
8549 			lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
8550 		}
8551 		is_dad_probe = IN6_IS_ADDR_UNSPECIFIED(saddrp);
8552 		if (lladdr != NULL) {
8553 			if (is_dad_probe) {
8554 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8555 				    "bad ND6 DAD packet");
8556 				return;
8557 			}
8558 			if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8559 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8560 				    "source lladdrlen %d != %lu",
8561 				    lladdrlen, ETHER_ND_LLADDR_LEN);
8562 				return;
8563 			}
8564 		}
8565 		if (is_dad_probe) {
8566 			/* node is trying use taddr, create an mne for taddr */
8567 			*saddrp = taddr;
8568 		}
8569 		break;
8570 	}
8571 	case ND_NEIGHBOR_ADVERT: {
8572 		struct nd_neighbor_advert *nd_na;
8573 		union nd_opts ndopts;
8574 		struct in6_addr taddr;
8575 
8576 
8577 		nd_na = (struct nd_neighbor_advert *)(void *)icmp6;
8578 
8579 		if (icmp6len < sizeof(*nd_na)) {
8580 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8581 			    "short nd_na %d < %lu",
8582 			    icmp6len, sizeof(*nd_na));
8583 			return;
8584 		}
8585 
8586 		bcopy(&nd_na->nd_na_target, &taddr, sizeof(taddr));
8587 		if (IN6_IS_ADDR_MULTICAST(&taddr) ||
8588 		    IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
8589 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8590 			    "invalid target ignored");
8591 			return;
8592 		}
8593 		/* parse options */
8594 		nd6_option_init(nd_na + 1, icmp6len - sizeof(*nd_na), &ndopts);
8595 		if (nd6_options(&ndopts) < 0) {
8596 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8597 			    "invalid ND6 NA option");
8598 			return;
8599 		}
8600 		if (ndopts.nd_opts_tgt_lladdr == NULL) {
8601 			/* target linklayer, nothing to do */
8602 			return;
8603 		}
8604 		lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1);
8605 		lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
8606 		if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8607 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8608 			    "target lladdrlen %d != %lu",
8609 			    lladdrlen, ETHER_ND_LLADDR_LEN);
8610 			return;
8611 		}
8612 		break;
8613 	}
8614 	case ND_ROUTER_ADVERT:
8615 	case ND_ROUTER_SOLICIT: {
8616 		union nd_opts ndopts;
8617 		uint32_t type_length;
8618 		const char *description;
8619 
8620 		if (icmp6_type == ND_ROUTER_ADVERT) {
8621 			type_length = sizeof(struct nd_router_advert);
8622 			description = "RA";
8623 		} else {
8624 			type_length = sizeof(struct nd_router_solicit);
8625 			description = "RS";
8626 		}
8627 		if (icmp6len < type_length) {
8628 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8629 			    "short ND6 %s %d < %d",
8630 			    description, icmp6len, type_length);
8631 			return;
8632 		}
8633 		/* parse options */
8634 		nd6_option_init(((uint8_t *)icmp6) + type_length,
8635 		    icmp6len - type_length, &ndopts);
8636 		if (nd6_options(&ndopts) < 0) {
8637 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8638 			    "invalid ND6 %s option", description);
8639 			return;
8640 		}
8641 		if (ndopts.nd_opts_src_lladdr != NULL) {
8642 			lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
8643 			lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
8644 			if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8645 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8646 				    "source lladdrlen %d != %lu",
8647 				    lladdrlen, ETHER_ND_LLADDR_LEN);
8648 				return;
8649 			}
8650 		}
8651 		break;
8652 	}
8653 	default:
8654 		break;
8655 	}
8656 	if (lladdr != NULL) {
8657 		mnr->mnr_ip6_lladdr_offset = (uint16_t)
8658 		    ((uintptr_t)lladdr - (uintptr_t)eh);
8659 		mnr->mnr_ip6_icmp6_len = icmp6len;
8660 		mnr->mnr_ip6_icmp6_type = icmp6_type;
8661 		mnr->mnr_ip6_header_len = off;
8662 		if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
8663 			const char *str;
8664 
8665 			switch (mnr->mnr_ip6_icmp6_type) {
8666 			case ND_ROUTER_ADVERT:
8667 				str = "ROUTER ADVERT";
8668 				break;
8669 			case ND_ROUTER_SOLICIT:
8670 				str = "ROUTER SOLICIT";
8671 				break;
8672 			case ND_NEIGHBOR_ADVERT:
8673 				str = "NEIGHBOR ADVERT";
8674 				break;
8675 			case ND_NEIGHBOR_SOLICIT:
8676 				str = "NEIGHBOR SOLICIT";
8677 				break;
8678 			default:
8679 				str = "";
8680 				break;
8681 			}
8682 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8683 			    "%s %s %s ip6len %d icmp6len %d lladdr offset %d",
8684 			    sc->sc_if_xname, bif->bif_ifp->if_xname, str,
8685 			    mnr->mnr_ip6_header_len,
8686 			    mnr->mnr_ip6_icmp6_len, mnr->mnr_ip6_lladdr_offset);
8687 		}
8688 	}
8689 }
8690 
8691 static struct mac_nat_entry *
8692 bridge_mac_nat_ipv6_input(struct bridge_softc *sc, mbuf_t *data)
8693 {
8694 	struct in6_addr         dst;
8695 	struct ether_header     *eh;
8696 	struct ip6_hdr          *ip6h;
8697 	struct mac_nat_entry    *mne = NULL;
8698 
8699 	eh = get_ether_ipv6_header(data, 0, FALSE);
8700 	if (eh == NULL) {
8701 		goto done;
8702 	}
8703 	ip6h = (struct ip6_hdr *)(void *)(eh + 1);
8704 	bcopy(&ip6h->ip6_dst, &dst, sizeof(dst));
8705 	/* XXX validate IPv6 address */
8706 	if (IN6_IS_ADDR_UNSPECIFIED(&dst)) {
8707 		goto done;
8708 	}
8709 	mne = bridge_lookup_mac_nat_entry(sc, AF_INET6, &dst);
8710 
8711 done:
8712 	return mne;
8713 }
8714 
8715 static boolean_t
8716 bridge_mac_nat_ipv6_output(struct bridge_softc *sc,
8717     struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8718 {
8719 	struct ether_header     *eh;
8720 	ether_addr_t            ether_shost;
8721 	struct ip6_hdr          *ip6h;
8722 	struct in6_addr         saddr;
8723 	boolean_t               translate;
8724 
8725 	translate = (bif == sc->sc_mac_nat_bif) ? FALSE : TRUE;
8726 	eh = get_ether_ipv6_header(data, 0, TRUE);
8727 	if (eh == NULL) {
8728 		translate = FALSE;
8729 		goto done;
8730 	}
8731 	bcopy(eh->ether_shost, &ether_shost, sizeof(ether_shost));
8732 	ip6h = (struct ip6_hdr *)(void *)(eh + 1);
8733 	bcopy(&ip6h->ip6_src, &saddr, sizeof(saddr));
8734 	if (mnr != NULL && ip6h->ip6_nxt == IPPROTO_ICMPV6) {
8735 		bridge_mac_nat_icmpv6_output(sc, bif, data, ip6h, &saddr, mnr);
8736 	}
8737 	if (IN6_IS_ADDR_UNSPECIFIED(&saddr)) {
8738 		goto done;
8739 	}
8740 	(void)bridge_update_mac_nat_entry(sc, bif, AF_INET6, &saddr,
8741 	    ether_shost.octet);
8742 
8743 done:
8744 	return translate;
8745 }
8746 
8747 /*
8748  * bridge_mac_nat_input:
8749  * Process a packet arriving on the MAC NAT interface (sc_mac_nat_bif).
8750  * This interface is the "external" interface with respect to NAT.
8751  * The interface is only capable of receiving a single MAC address
8752  * (e.g. a Wi-Fi STA interface).
8753  *
8754  * When a packet arrives on the external interface, look up the destination
8755  * IP address in the mac_nat_entry table. If there is a match, *is_input
8756  * is set to TRUE if it's for the MAC NAT interface, otherwise *is_input
8757  * is set to FALSE and translate the MAC address if necessary.
8758  *
8759  * Returns:
8760  * The internal interface to direct the packet to, or NULL if the packet
8761  * should not be redirected.
8762  *
8763  * *data may be updated to point at a different mbuf chain, or set to NULL
8764  * if the chain was deallocated during processing.
8765  */
8766 static ifnet_t
8767 bridge_mac_nat_input(struct bridge_softc *sc, mbuf_t *data,
8768     boolean_t *is_input)
8769 {
8770 	ifnet_t                 dst_if = NULL;
8771 	struct ether_header     *eh;
8772 	uint16_t                ether_type;
8773 	boolean_t               is_unicast;
8774 	mbuf_t                  m = *data;
8775 	struct mac_nat_entry    *mne = NULL;
8776 
8777 	BRIDGE_LOCK_ASSERT_HELD(sc);
8778 	*is_input = FALSE;
8779 	assert(sc->sc_mac_nat_bif != NULL);
8780 	is_unicast = ((m->m_flags & (M_BCAST | M_MCAST)) == 0);
8781 	eh = mtod(m, struct ether_header *);
8782 	ether_type = ntohs(eh->ether_type);
8783 	switch (ether_type) {
8784 	case ETHERTYPE_ARP:
8785 		mne = bridge_mac_nat_arp_input(sc, data);
8786 		break;
8787 	case ETHERTYPE_IP:
8788 		if (is_unicast) {
8789 			mne = bridge_mac_nat_ip_input(sc, data);
8790 		}
8791 		break;
8792 	case ETHERTYPE_IPV6:
8793 		if (is_unicast) {
8794 			mne = bridge_mac_nat_ipv6_input(sc, data);
8795 		}
8796 		break;
8797 	default:
8798 		break;
8799 	}
8800 	if (mne != NULL) {
8801 		if (is_unicast) {
8802 			if (m != *data) {
8803 				/* it may have changed */
8804 				eh = mtod(*data, struct ether_header *);
8805 			}
8806 			bcopy(mne->mne_mac, eh->ether_dhost,
8807 			    sizeof(eh->ether_dhost));
8808 		}
8809 		dst_if = mne->mne_bif->bif_ifp;
8810 		*is_input = (mne->mne_bif == sc->sc_mac_nat_bif);
8811 	}
8812 	return dst_if;
8813 }
8814 
8815 /*
8816  * bridge_mac_nat_output:
8817  * Process a packet destined to the MAC NAT interface (sc_mac_nat_bif)
8818  * from the interface 'bif'.
8819  *
8820  * Create a mac_nat_entry containing the source IP address and MAC address
8821  * from the packet. Populate a mac_nat_record with information detailing
8822  * how to translate the packet. Translation takes place later when
8823  * the bridge lock is no longer held.
8824  *
8825  * If 'bif' == sc_mac_nat_bif, the stack over the MAC NAT
8826  * interface is generating an output packet. No translation is required in this
8827  * case, we just record the IP address used to prevent another bif from
8828  * claiming our IP address.
8829  *
8830  * Returns:
8831  * TRUE if the packet should be translated (*mnr updated as well),
8832  * FALSE otherwise.
8833  *
8834  * *data may be updated to point at a different mbuf chain or NULL if
8835  * the chain was deallocated during processing.
8836  */
8837 
8838 static boolean_t
8839 bridge_mac_nat_output(struct bridge_softc *sc,
8840     struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8841 {
8842 	struct ether_header     *eh;
8843 	uint16_t                ether_type;
8844 	boolean_t               translate = FALSE;
8845 
8846 	BRIDGE_LOCK_ASSERT_HELD(sc);
8847 	assert(sc->sc_mac_nat_bif != NULL);
8848 
8849 	eh = mtod(*data, struct ether_header *);
8850 	ether_type = ntohs(eh->ether_type);
8851 	if (mnr != NULL) {
8852 		bzero(mnr, sizeof(*mnr));
8853 		mnr->mnr_ether_type = ether_type;
8854 	}
8855 	switch (ether_type) {
8856 	case ETHERTYPE_ARP:
8857 		translate = bridge_mac_nat_arp_output(sc, bif, data, mnr);
8858 		break;
8859 	case ETHERTYPE_IP:
8860 		translate = bridge_mac_nat_ip_output(sc, bif, data, mnr);
8861 		break;
8862 	case ETHERTYPE_IPV6:
8863 		translate = bridge_mac_nat_ipv6_output(sc, bif, data, mnr);
8864 		break;
8865 	default:
8866 		break;
8867 	}
8868 	return translate;
8869 }
8870 
8871 static void
8872 bridge_mac_nat_arp_translate(mbuf_t *data, struct mac_nat_record *mnr,
8873     const caddr_t eaddr)
8874 {
8875 	errno_t                 error;
8876 
8877 	if (mnr->mnr_arp_offset == 0) {
8878 		return;
8879 	}
8880 	/* replace the source hardware address */
8881 	error = mbuf_copyback(*data, mnr->mnr_arp_offset,
8882 	    ETHER_ADDR_LEN, eaddr,
8883 	    MBUF_DONTWAIT);
8884 	if (error != 0) {
8885 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8886 		    "mbuf_copyback failed");
8887 		m_freem(*data);
8888 		*data = NULL;
8889 	}
8890 	return;
8891 }
8892 
8893 static void
8894 bridge_mac_nat_ip_translate(mbuf_t *data, struct mac_nat_record *mnr)
8895 {
8896 	errno_t         error;
8897 	size_t          offset;
8898 
8899 	if (mnr->mnr_ip_header_len == 0) {
8900 		return;
8901 	}
8902 	/* update the UDP checksum */
8903 	offset = sizeof(struct ether_header) + mnr->mnr_ip_header_len;
8904 	error = mbuf_copyback(*data, offset + offsetof(struct udphdr, uh_sum),
8905 	    sizeof(mnr->mnr_ip_udp_csum),
8906 	    &mnr->mnr_ip_udp_csum,
8907 	    MBUF_DONTWAIT);
8908 	if (error != 0) {
8909 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8910 		    "mbuf_copyback uh_sum failed");
8911 		m_freem(*data);
8912 		*data = NULL;
8913 	}
8914 	/* update the DHCP must broadcast flag */
8915 	offset += sizeof(struct udphdr);
8916 	error = mbuf_copyback(*data, offset + offsetof(struct dhcp, dp_flags),
8917 	    sizeof(mnr->mnr_ip_dhcp_flags),
8918 	    &mnr->mnr_ip_dhcp_flags,
8919 	    MBUF_DONTWAIT);
8920 	if (error != 0) {
8921 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8922 		    "mbuf_copyback dp_flags failed");
8923 		m_freem(*data);
8924 		*data = NULL;
8925 	}
8926 }
8927 
8928 static void
8929 bridge_mac_nat_ipv6_translate(mbuf_t *data, struct mac_nat_record *mnr,
8930     const caddr_t eaddr)
8931 {
8932 	uint16_t        cksum;
8933 	errno_t         error;
8934 	mbuf_t          m = *data;
8935 
8936 	if (mnr->mnr_ip6_header_len == 0) {
8937 		return;
8938 	}
8939 	switch (mnr->mnr_ip6_icmp6_type) {
8940 	case ND_ROUTER_ADVERT:
8941 	case ND_ROUTER_SOLICIT:
8942 	case ND_NEIGHBOR_SOLICIT:
8943 	case ND_NEIGHBOR_ADVERT:
8944 		if (mnr->mnr_ip6_lladdr_offset == 0) {
8945 			/* nothing to do */
8946 			return;
8947 		}
8948 		break;
8949 	default:
8950 		return;
8951 	}
8952 
8953 	/*
8954 	 * replace the lladdr
8955 	 */
8956 	error = mbuf_copyback(m, mnr->mnr_ip6_lladdr_offset,
8957 	    ETHER_ADDR_LEN, eaddr,
8958 	    MBUF_DONTWAIT);
8959 	if (error != 0) {
8960 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8961 		    "mbuf_copyback lladdr failed");
8962 		m_freem(m);
8963 		*data = NULL;
8964 		return;
8965 	}
8966 
8967 	/*
8968 	 * recompute the icmp6 checksum
8969 	 */
8970 
8971 	/* skip past the ethernet header */
8972 	mbuf_setdata(m, (char *)mbuf_data(m) + ETHER_HDR_LEN,
8973 	    mbuf_len(m) - ETHER_HDR_LEN);
8974 	mbuf_pkthdr_adjustlen(m, -ETHER_HDR_LEN);
8975 
8976 #define CKSUM_OFFSET_ICMP6      offsetof(struct icmp6_hdr, icmp6_cksum)
8977 	/* set the checksum to zero */
8978 	cksum = 0;
8979 	error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
8980 	    sizeof(cksum), &cksum, MBUF_DONTWAIT);
8981 	if (error != 0) {
8982 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8983 		    "mbuf_copyback cksum=0 failed");
8984 		m_freem(m);
8985 		*data = NULL;
8986 		return;
8987 	}
8988 	/* compute and set the new checksum */
8989 	cksum = in6_cksum(m, IPPROTO_ICMPV6, mnr->mnr_ip6_header_len,
8990 	    mnr->mnr_ip6_icmp6_len);
8991 	error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
8992 	    sizeof(cksum), &cksum, MBUF_DONTWAIT);
8993 	if (error != 0) {
8994 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8995 		    "mbuf_copyback cksum failed");
8996 		m_freem(m);
8997 		*data = NULL;
8998 		return;
8999 	}
9000 	/* restore the ethernet header */
9001 	mbuf_setdata(m, (char *)mbuf_data(m) - ETHER_HDR_LEN,
9002 	    mbuf_len(m) + ETHER_HDR_LEN);
9003 	mbuf_pkthdr_adjustlen(m, ETHER_HDR_LEN);
9004 	return;
9005 }
9006 
9007 static void
9008 bridge_mac_nat_translate(mbuf_t *data, struct mac_nat_record *mnr,
9009     const caddr_t eaddr)
9010 {
9011 	struct ether_header     *eh;
9012 
9013 	/* replace the source ethernet address with the single MAC */
9014 	eh = mtod(*data, struct ether_header *);
9015 	bcopy(eaddr, eh->ether_shost, sizeof(eh->ether_shost));
9016 	switch (mnr->mnr_ether_type) {
9017 	case ETHERTYPE_ARP:
9018 		bridge_mac_nat_arp_translate(data, mnr, eaddr);
9019 		break;
9020 
9021 	case ETHERTYPE_IP:
9022 		bridge_mac_nat_ip_translate(data, mnr);
9023 		break;
9024 
9025 	case ETHERTYPE_IPV6:
9026 		bridge_mac_nat_ipv6_translate(data, mnr, eaddr);
9027 		break;
9028 
9029 	default:
9030 		break;
9031 	}
9032 	return;
9033 }
9034 
9035 /*
9036  * bridge packet filtering
9037  */
9038 
9039 /*
9040  * Perform basic checks on header size since
9041  * pfil assumes ip_input has already processed
9042  * it for it.  Cut-and-pasted from ip_input.c.
9043  * Given how simple the IPv6 version is,
9044  * does the IPv4 version really need to be
9045  * this complicated?
9046  *
9047  * XXX Should we update ipstat here, or not?
9048  * XXX Right now we update ipstat but not
9049  * XXX csum_counter.
9050  */
9051 static int
9052 bridge_ip_checkbasic(struct mbuf **mp)
9053 {
9054 	struct mbuf *m = *mp;
9055 	struct ip *ip;
9056 	int len, hlen;
9057 	u_short sum;
9058 
9059 	if (*mp == NULL) {
9060 		return -1;
9061 	}
9062 
9063 	if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
9064 		/* max_linkhdr is already rounded up to nearest 4-byte */
9065 		if ((m = m_copyup(m, sizeof(struct ip),
9066 		    max_linkhdr)) == NULL) {
9067 			/* XXXJRT new stat, please */
9068 			ipstat.ips_toosmall++;
9069 			goto bad;
9070 		}
9071 	} else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip), 0)) {
9072 		if ((m = m_pullup(m, sizeof(struct ip))) == NULL) {
9073 			ipstat.ips_toosmall++;
9074 			goto bad;
9075 		}
9076 	}
9077 	ip = mtod(m, struct ip *);
9078 	if (ip == NULL) {
9079 		goto bad;
9080 	}
9081 
9082 	if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
9083 		ipstat.ips_badvers++;
9084 		goto bad;
9085 	}
9086 	hlen = IP_VHL_HL(ip->ip_vhl) << 2;
9087 	if (hlen < (int)sizeof(struct ip)) {  /* minimum header length */
9088 		ipstat.ips_badhlen++;
9089 		goto bad;
9090 	}
9091 	if (hlen > m->m_len) {
9092 		if ((m = m_pullup(m, hlen)) == 0) {
9093 			ipstat.ips_badhlen++;
9094 			goto bad;
9095 		}
9096 		ip = mtod(m, struct ip *);
9097 		if (ip == NULL) {
9098 			goto bad;
9099 		}
9100 	}
9101 
9102 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
9103 		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
9104 	} else {
9105 		if (hlen == sizeof(struct ip)) {
9106 			sum = in_cksum_hdr(ip);
9107 		} else {
9108 			sum = in_cksum(m, hlen);
9109 		}
9110 	}
9111 	if (sum) {
9112 		ipstat.ips_badsum++;
9113 		goto bad;
9114 	}
9115 
9116 	/* Retrieve the packet length. */
9117 	len = ntohs(ip->ip_len);
9118 
9119 	/*
9120 	 * Check for additional length bogosity
9121 	 */
9122 	if (len < hlen) {
9123 		ipstat.ips_badlen++;
9124 		goto bad;
9125 	}
9126 
9127 	/*
9128 	 * Check that the amount of data in the buffers
9129 	 * is as at least much as the IP header would have us expect.
9130 	 * Drop packet if shorter than we expect.
9131 	 */
9132 	if (m->m_pkthdr.len < len) {
9133 		ipstat.ips_tooshort++;
9134 		goto bad;
9135 	}
9136 
9137 	/* Checks out, proceed */
9138 	*mp = m;
9139 	return 0;
9140 
9141 bad:
9142 	*mp = m;
9143 	return -1;
9144 }
9145 
9146 /*
9147  * Same as above, but for IPv6.
9148  * Cut-and-pasted from ip6_input.c.
9149  * XXX Should we update ip6stat, or not?
9150  */
9151 static int
9152 bridge_ip6_checkbasic(struct mbuf **mp)
9153 {
9154 	struct mbuf *m = *mp;
9155 	struct ip6_hdr *ip6;
9156 
9157 	/*
9158 	 * If the IPv6 header is not aligned, slurp it up into a new
9159 	 * mbuf with space for link headers, in the event we forward
9160 	 * it.  Otherwise, if it is aligned, make sure the entire base
9161 	 * IPv6 header is in the first mbuf of the chain.
9162 	 */
9163 	if (IP6_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
9164 		struct ifnet *inifp = m->m_pkthdr.rcvif;
9165 		/* max_linkhdr is already rounded up to nearest 4-byte */
9166 		if ((m = m_copyup(m, sizeof(struct ip6_hdr),
9167 		    max_linkhdr)) == NULL) {
9168 			/* XXXJRT new stat, please */
9169 			ip6stat.ip6s_toosmall++;
9170 			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
9171 			goto bad;
9172 		}
9173 	} else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip6_hdr), 0)) {
9174 		struct ifnet *inifp = m->m_pkthdr.rcvif;
9175 		if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
9176 			ip6stat.ip6s_toosmall++;
9177 			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
9178 			goto bad;
9179 		}
9180 	}
9181 
9182 	ip6 = mtod(m, struct ip6_hdr *);
9183 
9184 	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
9185 		ip6stat.ip6s_badvers++;
9186 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
9187 		goto bad;
9188 	}
9189 
9190 	/* Checks out, proceed */
9191 	*mp = m;
9192 	return 0;
9193 
9194 bad:
9195 	*mp = m;
9196 	return -1;
9197 }
9198 
9199 /*
9200  * the PF routines expect to be called from ip_input, so we
9201  * need to do and undo here some of the same processing.
9202  *
9203  * XXX : this is heavily inspired on bridge_pfil()
9204  */
9205 static int
9206 bridge_pf(struct mbuf **mp, struct ifnet *ifp, uint32_t sc_filter_flags,
9207     int input)
9208 {
9209 	/*
9210 	 * XXX : mpetit : heavily inspired by bridge_pfil()
9211 	 */
9212 
9213 	int snap, error, i, hlen;
9214 	struct ether_header *eh1, eh2;
9215 	struct ip *ip;
9216 	struct llc llc1;
9217 	u_int16_t ether_type;
9218 
9219 	snap = 0;
9220 	error = -1;     /* Default error if not error == 0 */
9221 
9222 	if ((sc_filter_flags & IFBF_FILT_MEMBER) == 0) {
9223 		return 0; /* filtering is disabled */
9224 	}
9225 	i = min((*mp)->m_pkthdr.len, max_protohdr);
9226 	if ((*mp)->m_len < i) {
9227 		*mp = m_pullup(*mp, i);
9228 		if (*mp == NULL) {
9229 			BRIDGE_LOG(LOG_NOTICE, 0, "m_pullup failed");
9230 			return -1;
9231 		}
9232 	}
9233 
9234 	eh1 = mtod(*mp, struct ether_header *);
9235 	ether_type = ntohs(eh1->ether_type);
9236 
9237 	/*
9238 	 * Check for SNAP/LLC.
9239 	 */
9240 	if (ether_type < ETHERMTU) {
9241 		struct llc *llc2 = (struct llc *)(eh1 + 1);
9242 
9243 		if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
9244 		    llc2->llc_dsap == LLC_SNAP_LSAP &&
9245 		    llc2->llc_ssap == LLC_SNAP_LSAP &&
9246 		    llc2->llc_control == LLC_UI) {
9247 			ether_type = htons(llc2->llc_un.type_snap.ether_type);
9248 			snap = 1;
9249 		}
9250 	}
9251 
9252 	/*
9253 	 * If we're trying to filter bridge traffic, don't look at anything
9254 	 * other than IP and ARP traffic.  If the filter doesn't understand
9255 	 * IPv6, don't allow IPv6 through the bridge either.  This is lame
9256 	 * since if we really wanted, say, an AppleTalk filter, we are hosed,
9257 	 * but of course we don't have an AppleTalk filter to begin with.
9258 	 * (Note that since pfil doesn't understand ARP it will pass *ALL*
9259 	 * ARP traffic.)
9260 	 */
9261 	switch (ether_type) {
9262 	case ETHERTYPE_ARP:
9263 	case ETHERTYPE_REVARP:
9264 		return 0;         /* Automatically pass */
9265 
9266 	case ETHERTYPE_IP:
9267 	case ETHERTYPE_IPV6:
9268 		break;
9269 	default:
9270 		/*
9271 		 * Check to see if the user wants to pass non-ip
9272 		 * packets, these will not be checked by pf and
9273 		 * passed unconditionally so the default is to drop.
9274 		 */
9275 		if ((sc_filter_flags & IFBF_FILT_ONLYIP)) {
9276 			goto bad;
9277 		}
9278 		break;
9279 	}
9280 
9281 	/* Strip off the Ethernet header and keep a copy. */
9282 	m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t)&eh2);
9283 	m_adj(*mp, ETHER_HDR_LEN);
9284 
9285 	/* Strip off snap header, if present */
9286 	if (snap) {
9287 		m_copydata(*mp, 0, sizeof(struct llc), (caddr_t)&llc1);
9288 		m_adj(*mp, sizeof(struct llc));
9289 	}
9290 
9291 	/*
9292 	 * Check the IP header for alignment and errors
9293 	 */
9294 	switch (ether_type) {
9295 	case ETHERTYPE_IP:
9296 		error = bridge_ip_checkbasic(mp);
9297 		break;
9298 	case ETHERTYPE_IPV6:
9299 		error = bridge_ip6_checkbasic(mp);
9300 		break;
9301 	default:
9302 		error = 0;
9303 		break;
9304 	}
9305 	if (error) {
9306 		goto bad;
9307 	}
9308 
9309 	error = 0;
9310 
9311 	/*
9312 	 * Run the packet through pf rules
9313 	 */
9314 	switch (ether_type) {
9315 	case ETHERTYPE_IP:
9316 		/*
9317 		 * before calling the firewall, swap fields the same as
9318 		 * IP does. here we assume the header is contiguous
9319 		 */
9320 		ip = mtod(*mp, struct ip *);
9321 
9322 		ip->ip_len = ntohs(ip->ip_len);
9323 		ip->ip_off = ntohs(ip->ip_off);
9324 
9325 		if (ifp != NULL) {
9326 			error = pf_af_hook(ifp, 0, mp, AF_INET, input, NULL);
9327 		}
9328 
9329 		if (*mp == NULL || error != 0) { /* filter may consume */
9330 			break;
9331 		}
9332 
9333 		/* Recalculate the ip checksum and restore byte ordering */
9334 		ip = mtod(*mp, struct ip *);
9335 		hlen = IP_VHL_HL(ip->ip_vhl) << 2;
9336 		if (hlen < (int)sizeof(struct ip)) {
9337 			goto bad;
9338 		}
9339 		if (hlen > (*mp)->m_len) {
9340 			if ((*mp = m_pullup(*mp, hlen)) == 0) {
9341 				goto bad;
9342 			}
9343 			ip = mtod(*mp, struct ip *);
9344 			if (ip == NULL) {
9345 				goto bad;
9346 			}
9347 		}
9348 		ip->ip_len = htons(ip->ip_len);
9349 		ip->ip_off = htons(ip->ip_off);
9350 		ip->ip_sum = 0;
9351 		if (hlen == sizeof(struct ip)) {
9352 			ip->ip_sum = in_cksum_hdr(ip);
9353 		} else {
9354 			ip->ip_sum = in_cksum(*mp, hlen);
9355 		}
9356 		break;
9357 
9358 	case ETHERTYPE_IPV6:
9359 		if (ifp != NULL) {
9360 			error = pf_af_hook(ifp, 0, mp, AF_INET6, input, NULL);
9361 		}
9362 
9363 		if (*mp == NULL || error != 0) { /* filter may consume */
9364 			break;
9365 		}
9366 		break;
9367 	default:
9368 		error = 0;
9369 		break;
9370 	}
9371 
9372 	if (*mp == NULL) {
9373 		return error;
9374 	}
9375 	if (error != 0) {
9376 		goto bad;
9377 	}
9378 
9379 	error = -1;
9380 
9381 	/*
9382 	 * Finally, put everything back the way it was and return
9383 	 */
9384 	if (snap) {
9385 		M_PREPEND(*mp, sizeof(struct llc), M_DONTWAIT, 0);
9386 		if (*mp == NULL) {
9387 			return error;
9388 		}
9389 		bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
9390 	}
9391 
9392 	M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT, 0);
9393 	if (*mp == NULL) {
9394 		return error;
9395 	}
9396 	bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
9397 
9398 	return 0;
9399 
9400 bad:
9401 	m_freem(*mp);
9402 	*mp = NULL;
9403 	return error;
9404 }
9405 
9406 /*
9407  * Copyright (C) 2014, Stefano Garzarella - Universita` di Pisa.
9408  * All rights reserved.
9409  *
9410  * Redistribution and use in source and binary forms, with or without
9411  * modification, are permitted provided that the following conditions
9412  * are met:
9413  *   1. Redistributions of source code must retain the above copyright
9414  *      notice, this list of conditions and the following disclaimer.
9415  *   2. Redistributions in binary form must reproduce the above copyright
9416  *      notice, this list of conditions and the following disclaimer in the
9417  *      documentation and/or other materials provided with the distribution.
9418  *
9419  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
9420  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
9421  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
9422  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
9423  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
9424  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
9425  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
9426  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
9427  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
9428  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
9429  * SUCH DAMAGE.
9430  */
9431 
9432 /*
9433  * XXX-ste: Maybe this function must be moved into kern/uipc_mbuf.c
9434  *
9435  * Create a queue of packets/segments which fit the given mss + hdr_len.
9436  * m0 points to mbuf chain to be segmented.
9437  * This function splits the payload (m0-> m_pkthdr.len - hdr_len)
9438  * into segments of length MSS bytes and then copy the first hdr_len bytes
9439  * from m0 at the top of each segment.
9440  * If hdr2_buf is not NULL (hdr2_len is the buf length), it is copied
9441  * in each segment after the first hdr_len bytes
9442  *
9443  * Return the new queue with the segments on success, NULL on failure.
9444  * (the mbuf queue is freed in this case).
9445  * nsegs contains the number of segments generated.
9446  */
9447 
9448 static struct mbuf *
9449 m_seg(struct mbuf *m0, int hdr_len, int mss, int *nsegs,
9450     char * hdr2_buf, int hdr2_len)
9451 {
9452 	int off = 0, n, firstlen;
9453 	struct mbuf **mnext, *mseg;
9454 	int total_len = m0->m_pkthdr.len;
9455 
9456 	/*
9457 	 * Segmentation useless
9458 	 */
9459 	if (total_len <= hdr_len + mss) {
9460 		return m0;
9461 	}
9462 
9463 	if (hdr2_buf == NULL || hdr2_len <= 0) {
9464 		hdr2_buf = NULL;
9465 		hdr2_len = 0;
9466 	}
9467 
9468 	off = hdr_len + mss;
9469 	firstlen = mss; /* first segment stored in the original mbuf */
9470 
9471 	mnext = &(m0->m_nextpkt); /* pointer to next packet */
9472 
9473 	for (n = 1; off < total_len; off += mss, n++) {
9474 		struct mbuf *m;
9475 		/*
9476 		 * Copy the header from the original packet
9477 		 * and create a new mbuf chain
9478 		 */
9479 		if (MHLEN < hdr_len) {
9480 			m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
9481 		} else {
9482 			m = m_gethdr(M_NOWAIT, MT_DATA);
9483 		}
9484 
9485 		if (m == NULL) {
9486 #ifdef GSO_DEBUG
9487 			D("MGETHDR error\n");
9488 #endif
9489 			goto err;
9490 		}
9491 
9492 		m_copydata(m0, 0, hdr_len, mtod(m, caddr_t));
9493 
9494 		m->m_len = hdr_len;
9495 		/*
9496 		 * if the optional header is present, copy it
9497 		 */
9498 		if (hdr2_buf != NULL) {
9499 			m_copyback(m, hdr_len, hdr2_len, hdr2_buf);
9500 		}
9501 
9502 		m->m_flags |= (m0->m_flags & M_COPYFLAGS);
9503 		if (off + mss >= total_len) {           /* last segment */
9504 			mss = total_len - off;
9505 		}
9506 		/*
9507 		 * Copy the payload from original packet
9508 		 */
9509 		mseg = m_copym(m0, off, mss, M_NOWAIT);
9510 		if (mseg == NULL) {
9511 			m_freem(m);
9512 #ifdef GSO_DEBUG
9513 			D("m_copym error\n");
9514 #endif
9515 			goto err;
9516 		}
9517 		m_cat(m, mseg);
9518 
9519 		m->m_pkthdr.len = hdr_len + hdr2_len + mss;
9520 		m->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
9521 		/*
9522 		 * Copy the checksum flags and data (in_cksum() need this)
9523 		 */
9524 		m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
9525 		m->m_pkthdr.csum_data = m0->m_pkthdr.csum_data;
9526 		m->m_pkthdr.tso_segsz = m0->m_pkthdr.tso_segsz;
9527 
9528 		*mnext = m;
9529 		mnext = &(m->m_nextpkt);
9530 	}
9531 
9532 	/*
9533 	 * Update first segment.
9534 	 * If the optional header is present, is necessary
9535 	 * to insert it into the first segment.
9536 	 */
9537 	if (hdr2_buf == NULL) {
9538 		m_adj(m0, hdr_len + firstlen - total_len);
9539 		m0->m_pkthdr.len = hdr_len + firstlen;
9540 	} else {
9541 		mseg = m_copym(m0, hdr_len, firstlen, M_NOWAIT);
9542 		if (mseg == NULL) {
9543 #ifdef GSO_DEBUG
9544 			D("m_copym error\n");
9545 #endif
9546 			goto err;
9547 		}
9548 		m_adj(m0, hdr_len - total_len);
9549 		m_copyback(m0, hdr_len, hdr2_len, hdr2_buf);
9550 		m_cat(m0, mseg);
9551 		m0->m_pkthdr.len = hdr_len + hdr2_len + firstlen;
9552 	}
9553 
9554 	if (nsegs != NULL) {
9555 		*nsegs = n;
9556 	}
9557 	return m0;
9558 err:
9559 	while (m0 != NULL) {
9560 		mseg = m0->m_nextpkt;
9561 		m0->m_nextpkt = NULL;
9562 		m_freem(m0);
9563 		m0 = mseg;
9564 	}
9565 	return NULL;
9566 }
9567 
9568 /*
9569  * Wrappers of IPv4 checksum functions
9570  */
9571 static inline void
9572 gso_ipv4_data_cksum(struct mbuf *m, struct ip *ip, int mac_hlen)
9573 {
9574 	m->m_data += mac_hlen;
9575 	m->m_len -= mac_hlen;
9576 	m->m_pkthdr.len -= mac_hlen;
9577 #if __FreeBSD_version < 1000000
9578 	ip->ip_len = ntohs(ip->ip_len); /* needed for in_delayed_cksum() */
9579 #endif
9580 
9581 	in_delayed_cksum(m);
9582 
9583 #if __FreeBSD_version < 1000000
9584 	ip->ip_len = htons(ip->ip_len);
9585 #endif
9586 	m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
9587 	m->m_len += mac_hlen;
9588 	m->m_pkthdr.len += mac_hlen;
9589 	m->m_data -= mac_hlen;
9590 }
9591 
9592 static inline void
9593 gso_ipv4_hdr_cksum(struct mbuf *m, struct ip *ip, int mac_hlen, int ip_hlen)
9594 {
9595 	m->m_data += mac_hlen;
9596 
9597 	ip->ip_sum = in_cksum(m, ip_hlen);
9598 
9599 	m->m_pkthdr.csum_flags &= ~CSUM_IP;
9600 	m->m_data -= mac_hlen;
9601 }
9602 
9603 /*
9604  * Structure that contains the state during the TCP segmentation
9605  */
9606 struct gso_ip_tcp_state {
9607 	void    (*update)
9608 	(struct gso_ip_tcp_state*, struct mbuf*);
9609 	void    (*internal)
9610 	(struct gso_ip_tcp_state*, struct mbuf*);
9611 	union iphdr hdr;
9612 	struct tcphdr *tcp;
9613 	int mac_hlen;
9614 	int ip_hlen;
9615 	int tcp_hlen;
9616 	int hlen;
9617 	int pay_len;
9618 	int sw_csum;
9619 	uint32_t tcp_seq;
9620 	uint16_t ip_id;
9621 	boolean_t is_tx;
9622 };
9623 
9624 /*
9625  * Update the pointers to TCP and IPv4 headers
9626  */
9627 static inline void
9628 gso_ipv4_tcp_update(struct gso_ip_tcp_state *state, struct mbuf *m)
9629 {
9630 	state->hdr.ip = (struct ip *)(void *)(mtod(m, uint8_t *) + state->mac_hlen);
9631 	state->tcp = (struct tcphdr *)(void *)((caddr_t)(state->hdr.ip) + state->ip_hlen);
9632 	state->pay_len = m->m_pkthdr.len - state->hlen;
9633 }
9634 
9635 /*
9636  * Set properly the TCP and IPv4 headers
9637  */
9638 static inline void
9639 gso_ipv4_tcp_internal(struct gso_ip_tcp_state *state, struct mbuf *m)
9640 {
9641 	/*
9642 	 * Update IP header
9643 	 */
9644 	state->hdr.ip->ip_id = htons((state->ip_id)++);
9645 	state->hdr.ip->ip_len = htons(m->m_pkthdr.len - state->mac_hlen);
9646 	/*
9647 	 * TCP Checksum
9648 	 */
9649 	state->tcp->th_sum = 0;
9650 	state->tcp->th_sum = in_pseudo(state->hdr.ip->ip_src.s_addr,
9651 	    state->hdr.ip->ip_dst.s_addr,
9652 	    htons(state->tcp_hlen + IPPROTO_TCP + state->pay_len));
9653 	/*
9654 	 * Checksum HW not supported (TCP)
9655 	 */
9656 	if (state->sw_csum & CSUM_DELAY_DATA) {
9657 		gso_ipv4_data_cksum(m, state->hdr.ip, state->mac_hlen);
9658 	}
9659 
9660 	state->tcp_seq += state->pay_len;
9661 	/*
9662 	 * IP Checksum
9663 	 */
9664 	state->hdr.ip->ip_sum = 0;
9665 	/*
9666 	 * Checksum HW not supported (IP)
9667 	 */
9668 	if (state->sw_csum & CSUM_IP) {
9669 		gso_ipv4_hdr_cksum(m, state->hdr.ip, state->mac_hlen, state->ip_hlen);
9670 	}
9671 }
9672 
9673 
9674 /*
9675  * Updates the pointers to TCP and IPv6 headers
9676  */
9677 static inline void
9678 gso_ipv6_tcp_update(struct gso_ip_tcp_state *state, struct mbuf *m)
9679 {
9680 	state->hdr.ip6 = (struct ip6_hdr *)(mtod(m, uint8_t *) + state->mac_hlen);
9681 	state->tcp = (struct tcphdr *)(void *)((caddr_t)(state->hdr.ip6) + state->ip_hlen);
9682 	state->pay_len = m->m_pkthdr.len - state->hlen;
9683 }
9684 
9685 /*
9686  * Sets properly the TCP and IPv6 headers
9687  */
9688 static inline void
9689 gso_ipv6_tcp_internal(struct gso_ip_tcp_state *state, struct mbuf *m)
9690 {
9691 	state->hdr.ip6->ip6_plen = htons(m->m_pkthdr.len -
9692 	    state->mac_hlen - state->ip_hlen);
9693 	/*
9694 	 * TCP Checksum
9695 	 */
9696 	state->tcp->th_sum = 0;
9697 	state->tcp->th_sum = in6_pseudo(&state->hdr.ip6->ip6_src,
9698 	    &state->hdr.ip6->ip6_dst,
9699 	    htonl(state->tcp_hlen + state->pay_len + IPPROTO_TCP));
9700 	/*
9701 	 * Checksum HW not supported (TCP)
9702 	 */
9703 	if (state->sw_csum & CSUM_DELAY_IPV6_DATA) {
9704 		(void)in6_finalize_cksum(m, state->mac_hlen, -1, -1, state->sw_csum);
9705 		m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA;
9706 	}
9707 	state->tcp_seq += state->pay_len;
9708 }
9709 
9710 /*
9711  * Init the state during the TCP segmentation
9712  */
9713 static void
9714 gso_ip_tcp_init_state(struct gso_ip_tcp_state *state, struct ifnet *ifp,
9715     bool is_ipv4, int mac_hlen, int ip_hlen,
9716     void * ip_hdr, struct tcphdr * tcp_hdr)
9717 {
9718 #pragma unused(ifp)
9719 
9720 	state->hdr.ptr = ip_hdr;
9721 	state->tcp = tcp_hdr;
9722 	if (is_ipv4) {
9723 		state->ip_id = ntohs(state->hdr.ip->ip_id);
9724 		state->update = gso_ipv4_tcp_update;
9725 		state->internal = gso_ipv4_tcp_internal;
9726 		state->sw_csum = CSUM_DELAY_DATA | CSUM_IP; /* XXX */
9727 	} else {
9728 		state->update = gso_ipv6_tcp_update;
9729 		state->internal = gso_ipv6_tcp_internal;
9730 		state->sw_csum = CSUM_DELAY_IPV6_DATA; /* XXX */
9731 	}
9732 	state->mac_hlen = mac_hlen;
9733 	state->ip_hlen = ip_hlen;
9734 	state->tcp_hlen = state->tcp->th_off << 2;
9735 	state->hlen = mac_hlen + ip_hlen + state->tcp_hlen;
9736 	state->tcp_seq = ntohl(state->tcp->th_seq);
9737 	//state->sw_csum = m->m_pkthdr.csum_flags & ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
9738 	return;
9739 }
9740 
9741 /*
9742  * GSO on TCP/IP (v4 or v6)
9743  *
9744  * If is_tx is TRUE, segmented packets are transmitted after they are
9745  * segmented.
9746  *
9747  * If is_tx is FALSE, the segmented packets are returned as a chain in *mp.
9748  */
9749 static int
9750 gso_ip_tcp(struct ifnet *ifp, struct mbuf **mp, struct gso_ip_tcp_state *state,
9751     boolean_t is_tx)
9752 {
9753 	struct mbuf *m, *m_tx;
9754 	int error = 0;
9755 	int mss = 0;
9756 	int nsegs = 0;
9757 	struct mbuf *m0 = *mp;
9758 #ifdef GSO_STATS
9759 	int total_len = m0->m_pkthdr.len;
9760 #endif /* GSO_STATS */
9761 
9762 #if 1
9763 	u_int reduce_mss;
9764 
9765 	reduce_mss = is_tx ? if_bridge_tso_reduce_mss_tx
9766 	    : if_bridge_tso_reduce_mss_forwarding;
9767 	mss = ifp->if_mtu - state->ip_hlen - state->tcp_hlen - reduce_mss;
9768 	assert(mss > 0);
9769 #else
9770 	if (m0->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) {/* TSO with GSO */
9771 		mss = ifp->if_hw_tsomax - state->ip_hlen - state->tcp_hlen;
9772 	} else {
9773 		mss = m0->m_pkthdr.tso_segsz;
9774 	}
9775 #endif
9776 
9777 	*mp = m0 = m_seg(m0, state->hlen, mss, &nsegs, 0, 0);
9778 	if (m0 == NULL) {
9779 		return ENOBUFS; /* XXX ok? */
9780 	}
9781 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
9782 	    "%s %s mss %d nsegs %d",
9783 	    ifp->if_xname,
9784 	    is_tx ? "TX" : "RX",
9785 	    mss, nsegs);
9786 	/*
9787 	 * XXX-ste: can this happen?
9788 	 */
9789 	if (m0->m_nextpkt == NULL) {
9790 #ifdef GSO_DEBUG
9791 		D("only 1 segment");
9792 #endif
9793 		if (is_tx) {
9794 			error = bridge_transmit(ifp, m0);
9795 		}
9796 		return error;
9797 	}
9798 #ifdef GSO_STATS
9799 	GSOSTAT_SET_MAX(tcp.gsos_max_mss, mss);
9800 	GSOSTAT_SET_MIN(tcp.gsos_min_mss, mss);
9801 	GSOSTAT_ADD(tcp.gsos_osegments, nsegs);
9802 #endif /* GSO_STATS */
9803 
9804 	/* first pkt */
9805 	m = m0;
9806 
9807 	state->update(state, m);
9808 
9809 	do {
9810 		state->tcp->th_flags &= ~(TH_FIN | TH_PUSH);
9811 
9812 		state->internal(state, m);
9813 		m_tx = m;
9814 		m = m->m_nextpkt;
9815 		if (is_tx) {
9816 			m_tx->m_nextpkt = NULL;
9817 			if ((error = bridge_transmit(ifp, m_tx)) != 0) {
9818 				/*
9819 				 * XXX: If a segment can not be sent, discard the following
9820 				 * segments and propagate the error to the upper levels.
9821 				 * In this way the TCP retransmits all the initial packet.
9822 				 */
9823 #ifdef GSO_DEBUG
9824 				D("if_transmit error\n");
9825 #endif
9826 				goto err;
9827 			}
9828 		}
9829 		state->update(state, m);
9830 
9831 		state->tcp->th_flags &= ~TH_CWR;
9832 		state->tcp->th_seq = htonl(state->tcp_seq);
9833 	} while (m->m_nextpkt);
9834 
9835 	/* last pkt */
9836 	state->internal(state, m);
9837 
9838 	if (is_tx) {
9839 		error = bridge_transmit(ifp, m);
9840 #ifdef GSO_DEBUG
9841 		if (error) {
9842 			D("last if_transmit error\n");
9843 			D("error - type = %d \n", error);
9844 		}
9845 #endif
9846 	}
9847 #ifdef GSO_STATS
9848 	if (!error) {
9849 		GSOSTAT_INC(tcp.gsos_segmented);
9850 		GSOSTAT_SET_MAX(tcp.gsos_maxsegmented, total_len);
9851 		GSOSTAT_SET_MIN(tcp.gsos_minsegmented, total_len);
9852 		GSOSTAT_ADD(tcp.gsos_totalbyteseg, total_len);
9853 	}
9854 #endif /* GSO_STATS */
9855 	return error;
9856 
9857 err:
9858 #ifdef GSO_DEBUG
9859 	D("error - type = %d \n", error);
9860 #endif
9861 	while (m != NULL) {
9862 		m_tx = m->m_nextpkt;
9863 		m->m_nextpkt = NULL;
9864 		m_freem(m);
9865 		m = m_tx;
9866 	}
9867 	return error;
9868 }
9869 
9870 /*
9871  * GSO for TCP/IPv[46]
9872  */
9873 static int
9874 gso_tcp(struct ifnet *ifp, struct mbuf **mp, u_int mac_hlen, bool is_ipv4,
9875     boolean_t is_tx)
9876 {
9877 	int error;
9878 	ip_packet_info  info;
9879 	uint32_t csum_flags;
9880 	struct gso_ip_tcp_state state;
9881 	struct bripstats stats; /* XXX ignored */
9882 	struct tcphdr *tcp;
9883 
9884 	if (!is_tx && ipforwarding == 0) {
9885 		/* no need to segment if the packet will not be forwarded */
9886 		return 0;
9887 	}
9888 	error = bridge_get_tcp_header(mp, mac_hlen, is_ipv4, &info, &stats);
9889 	if (error != 0) {
9890 		if (*mp != NULL) {
9891 			m_freem(*mp);
9892 			*mp = NULL;
9893 		}
9894 		return error;
9895 	}
9896 	if (info.ip_proto_hdr == NULL) {
9897 		/* not a TCP packet */
9898 		return 0;
9899 	}
9900 	tcp = (struct tcphdr *)(void *)info.ip_proto_hdr;
9901 	gso_ip_tcp_init_state(&state, ifp, is_ipv4, mac_hlen,
9902 	    info.ip_hlen + info.ip_opt_len, info.ip_hdr.ptr, tcp);
9903 	if (is_ipv4) {
9904 		csum_flags = CSUM_DELAY_DATA; /* XXX */
9905 		if (!is_tx) {
9906 			/* if RX to our local IP address, don't segment */
9907 			struct in_addr  dst_ip;
9908 
9909 			bcopy(&state.hdr.ip->ip_dst, &dst_ip, sizeof(dst_ip));
9910 			if (in_addr_is_ours(dst_ip)) {
9911 				return 0;
9912 			}
9913 		}
9914 	} else {
9915 		csum_flags = CSUM_DELAY_IPV6_DATA; /* XXX */
9916 		if (!is_tx) {
9917 			/* if RX to our local IP address, don't segment */
9918 			if (in6_addr_is_ours(&state.hdr.ip6->ip6_dst,
9919 			    ifp->if_index)) {
9920 				/* local IP address, no need to segment */
9921 				return 0;
9922 			}
9923 		}
9924 	}
9925 	(*mp)->m_pkthdr.csum_flags = csum_flags;
9926 	(*mp)->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
9927 	return gso_ip_tcp(ifp, mp, &state, is_tx);
9928 }
9929