xref: /xnu-8019.80.24/bsd/net/if_bridge.c (revision a325d9c4a84054e40bbe985afedcb50ab80993ea)
1 /*
2  * Copyright (c) 2004-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*	$NetBSD: if_bridge.c,v 1.31 2005/06/01 19:45:34 jdc Exp $	*/
30 /*
31  * Copyright 2001 Wasabi Systems, Inc.
32  * All rights reserved.
33  *
34  * Written by Jason R. Thorpe for Wasabi Systems, Inc.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. All advertising materials mentioning features or use of this software
45  *    must display the following acknowledgement:
46  *	This product includes software developed for the NetBSD Project by
47  *	Wasabi Systems, Inc.
48  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
49  *    or promote products derived from this software without specific prior
50  *    written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
54  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
55  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
56  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
57  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
58  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
59  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
60  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
61  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
62  * POSSIBILITY OF SUCH DAMAGE.
63  */
64 
65 /*
66  * Copyright (c) 1999, 2000 Jason L. Wright ([email protected])
67  * All rights reserved.
68  *
69  * Redistribution and use in source and binary forms, with or without
70  * modification, are permitted provided that the following conditions
71  * are met:
72  * 1. Redistributions of source code must retain the above copyright
73  *    notice, this list of conditions and the following disclaimer.
74  * 2. Redistributions in binary form must reproduce the above copyright
75  *    notice, this list of conditions and the following disclaimer in the
76  *    documentation and/or other materials provided with the distribution.
77  *
78  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
79  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
80  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
81  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
82  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
83  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
84  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
86  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
87  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
88  * POSSIBILITY OF SUCH DAMAGE.
89  *
90  * OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp
91  */
92 
93 /*
94  * Network interface bridge support.
95  *
96  * TODO:
97  *
98  *	- Currently only supports Ethernet-like interfaces (Ethernet,
99  *	  802.11, VLANs on Ethernet, etc.)  Figure out a nice way
100  *	  to bridge other types of interfaces (FDDI-FDDI, and maybe
101  *	  consider heterogenous bridges).
102  *
103  *	- GIF isn't handled due to the lack of IPPROTO_ETHERIP support.
104  */
105 
106 #include <sys/cdefs.h>
107 
108 #include <sys/param.h>
109 #include <sys/mbuf.h>
110 #include <sys/malloc.h>
111 #include <sys/protosw.h>
112 #include <sys/systm.h>
113 #include <sys/time.h>
114 #include <sys/socket.h> /* for net/if.h */
115 #include <sys/sockio.h>
116 #include <sys/kernel.h>
117 #include <sys/random.h>
118 #include <sys/syslog.h>
119 #include <sys/sysctl.h>
120 #include <sys/proc.h>
121 #include <sys/lock.h>
122 #include <sys/mcache.h>
123 
124 #include <sys/kauth.h>
125 
126 #include <kern/thread_call.h>
127 
128 #include <libkern/libkern.h>
129 
130 #include <kern/zalloc.h>
131 
132 #if NBPFILTER > 0
133 #include <net/bpf.h>
134 #endif
135 #include <net/if.h>
136 #include <net/if_dl.h>
137 #include <net/if_types.h>
138 #include <net/if_var.h>
139 #include <net/if_media.h>
140 #include <net/net_api_stats.h>
141 #include <net/pfvar.h>
142 
143 #include <netinet/in.h> /* for struct arpcom */
144 #include <netinet/tcp.h> /* for struct tcphdr */
145 #include <netinet/in_systm.h>
146 #include <netinet/in_var.h>
147 #define _IP_VHL
148 #include <netinet/ip.h>
149 #include <netinet/ip_var.h>
150 #include <netinet/ip6.h>
151 #include <netinet6/ip6_var.h>
152 #ifdef DEV_CARP
153 #include <netinet/ip_carp.h>
154 #endif
155 #include <netinet/if_ether.h> /* for struct arpcom */
156 #include <net/bridgestp.h>
157 #include <net/if_bridgevar.h>
158 #include <net/if_llc.h>
159 #if NVLAN > 0
160 #include <net/if_vlan_var.h>
161 #endif /* NVLAN > 0 */
162 
163 #include <net/if_ether.h>
164 #include <net/dlil.h>
165 #include <net/kpi_interfacefilter.h>
166 
167 #include <net/route.h>
168 #include <dev/random/randomdev.h>
169 
170 #include <netinet/bootp.h>
171 #include <netinet/dhcp.h>
172 
173 #if SKYWALK
174 #include <skywalk/nexus/netif/nx_netif.h>
175 #endif /* SKYWALK */
176 
177 #include <os/log.h>
178 
179 /*
180  * if_bridge_debug, BR_DBGF_*
181  * - 'if_bridge_debug' is a bitmask of BR_DBGF_* flags that can be set
182  *   to enable additional logs for the corresponding bridge function
183  * - "sysctl net.link.bridge.debug" controls the value of
184  *   'if_bridge_debug'
185  */
186 static uint32_t if_bridge_debug = 0;
187 #define BR_DBGF_LIFECYCLE       0x0001
188 #define BR_DBGF_INPUT           0x0002
189 #define BR_DBGF_OUTPUT          0x0004
190 #define BR_DBGF_RT_TABLE        0x0008
191 #define BR_DBGF_DELAYED_CALL    0x0010
192 #define BR_DBGF_IOCTL           0x0020
193 #define BR_DBGF_MBUF            0x0040
194 #define BR_DBGF_MCAST           0x0080
195 #define BR_DBGF_HOSTFILTER      0x0100
196 #define BR_DBGF_CHECKSUM        0x0200
197 #define BR_DBGF_MAC_NAT         0x0400
198 
199 /*
200  * if_bridge_log_level
201  * - 'if_bridge_log_level' ensures that by default important logs are
202  *   logged regardless of if_bridge_debug by comparing the log level
203  *   in BRIDGE_LOG to if_bridge_log_level
204  * - use "sysctl net.link.bridge.log_level" controls the value of
205  *   'if_bridge_log_level'
206  * - the default value of 'if_bridge_log_level' is LOG_NOTICE; important
207  *   logs must use LOG_NOTICE to ensure they appear by default
208  */
209 static int if_bridge_log_level = LOG_NOTICE;
210 
211 #define BRIDGE_DBGF_ENABLED(__flag)     ((if_bridge_debug & __flag) != 0)
212 
213 /*
214  * BRIDGE_LOG, BRIDGE_LOG_SIMPLE
215  * - macros to generate the specified log conditionally based on
216  *   the specified log level and debug flags
217  * - BRIDGE_LOG_SIMPLE does not include the function name in the log
218  */
219 #define BRIDGE_LOG(__level, __dbgf, __string, ...)              \
220 	do {                                                            \
221 	        if (__level <= if_bridge_log_level ||                   \
222 	            BRIDGE_DBGF_ENABLED(__dbgf)) {                      \
223 	                os_log(OS_LOG_DEFAULT, "%s: " __string, \
224 	                       __func__, ## __VA_ARGS__);       \
225 	        }                                                       \
226 	} while (0)
227 #define BRIDGE_LOG_SIMPLE(__level, __dbgf, __string, ...)               \
228 	do {                                                    \
229 	        if (__level <= if_bridge_log_level ||           \
230 	            BRIDGE_DBGF_ENABLED(__dbgf)) {                      \
231 	                os_log(OS_LOG_DEFAULT, __string, ## __VA_ARGS__); \
232 	        }                                                               \
233 	} while (0)
234 
235 #define _BRIDGE_LOCK(_sc)               lck_mtx_lock(&(_sc)->sc_mtx)
236 #define _BRIDGE_UNLOCK(_sc)             lck_mtx_unlock(&(_sc)->sc_mtx)
237 #define BRIDGE_LOCK_ASSERT_HELD(_sc)            \
238 	LCK_MTX_ASSERT(&(_sc)->sc_mtx, LCK_MTX_ASSERT_OWNED)
239 #define BRIDGE_LOCK_ASSERT_NOTHELD(_sc)         \
240 	LCK_MTX_ASSERT(&(_sc)->sc_mtx, LCK_MTX_ASSERT_NOTOWNED)
241 
242 #define BRIDGE_LOCK_DEBUG      1
243 #if BRIDGE_LOCK_DEBUG
244 
245 #define BR_LCKDBG_MAX                   4
246 
247 #define BRIDGE_LOCK(_sc)                bridge_lock(_sc)
248 #define BRIDGE_UNLOCK(_sc)              bridge_unlock(_sc)
249 #define BRIDGE_LOCK2REF(_sc, _err)      _err = bridge_lock2ref(_sc)
250 #define BRIDGE_UNREF(_sc)               bridge_unref(_sc)
251 #define BRIDGE_XLOCK(_sc)               bridge_xlock(_sc)
252 #define BRIDGE_XDROP(_sc)               bridge_xdrop(_sc)
253 
254 #else /* !BRIDGE_LOCK_DEBUG */
255 
256 #define BRIDGE_LOCK(_sc)                _BRIDGE_LOCK(_sc)
257 #define BRIDGE_UNLOCK(_sc)              _BRIDGE_UNLOCK(_sc)
258 #define BRIDGE_LOCK2REF(_sc, _err)      do {                            \
259 	BRIDGE_LOCK_ASSERT_HELD(_sc);                                   \
260 	if ((_sc)->sc_iflist_xcnt > 0)                                  \
261 	        (_err) = EBUSY;                                         \
262 	else {                                                          \
263 	        (_sc)->sc_iflist_ref++;                                 \
264 	        (_err) = 0;                                             \
265 	}                                                               \
266 	_BRIDGE_UNLOCK(_sc);                                            \
267 } while (0)
268 #define BRIDGE_UNREF(_sc)               do {                            \
269 	_BRIDGE_LOCK(_sc);                                              \
270 	(_sc)->sc_iflist_ref--;                                         \
271 	if (((_sc)->sc_iflist_xcnt > 0) && ((_sc)->sc_iflist_ref == 0))	{ \
272 	        _BRIDGE_UNLOCK(_sc);                                    \
273 	        wakeup(&(_sc)->sc_cv);                                  \
274 	} else                                                          \
275 	        _BRIDGE_UNLOCK(_sc);                                    \
276 } while (0)
277 #define BRIDGE_XLOCK(_sc)               do {                            \
278 	BRIDGE_LOCK_ASSERT_HELD(_sc);                                   \
279 	(_sc)->sc_iflist_xcnt++;                                        \
280 	while ((_sc)->sc_iflist_ref > 0)                                \
281 	        msleep(&(_sc)->sc_cv, &(_sc)->sc_mtx, PZERO,            \
282 	            "BRIDGE_XLOCK", NULL);                              \
283 } while (0)
284 #define BRIDGE_XDROP(_sc)               do {                            \
285 	BRIDGE_LOCK_ASSERT_HELD(_sc);                                   \
286 	(_sc)->sc_iflist_xcnt--;                                        \
287 } while (0)
288 
289 #endif /* BRIDGE_LOCK_DEBUG */
290 
291 #if NBPFILTER > 0
292 #define BRIDGE_BPF_MTAP_INPUT(sc, m)                                    \
293 	if (sc->sc_bpf_input != NULL)                                   \
294 	        bridge_bpf_input(sc->sc_ifp, m, __func__, __LINE__)
295 #else /* NBPFILTER */
296 #define BRIDGE_BPF_MTAP_INPUT(ifp, m)
297 #endif /* NBPFILTER */
298 
299 /*
300  * Initial size of the route hash table.  Must be a power of two.
301  */
302 #ifndef BRIDGE_RTHASH_SIZE
303 #define BRIDGE_RTHASH_SIZE              16
304 #endif
305 
306 /*
307  * Maximum size of the routing hash table
308  */
309 #define BRIDGE_RTHASH_SIZE_MAX          2048
310 
311 #define BRIDGE_RTHASH_MASK(sc)          ((sc)->sc_rthash_size - 1)
312 
313 /*
314  * Maximum number of addresses to cache.
315  */
316 #ifndef BRIDGE_RTABLE_MAX
317 #define BRIDGE_RTABLE_MAX               100
318 #endif
319 
320 
321 /*
322  * Timeout (in seconds) for entries learned dynamically.
323  */
324 #ifndef BRIDGE_RTABLE_TIMEOUT
325 #define BRIDGE_RTABLE_TIMEOUT           (20 * 60)       /* same as ARP */
326 #endif
327 
328 /*
329  * Number of seconds between walks of the route list.
330  */
331 #ifndef BRIDGE_RTABLE_PRUNE_PERIOD
332 #define BRIDGE_RTABLE_PRUNE_PERIOD      (5 * 60)
333 #endif
334 
335 /*
336  * Number of MAC NAT entries
337  * - sized based on 16 clients (including MAC NAT interface)
338  *   each with 4 addresses
339  */
340 #ifndef BRIDGE_MAC_NAT_ENTRY_MAX
341 #define BRIDGE_MAC_NAT_ENTRY_MAX        64
342 #endif /* BRIDGE_MAC_NAT_ENTRY_MAX */
343 
344 /*
345  * List of capabilities to possibly mask on the member interface.
346  */
347 #define BRIDGE_IFCAPS_MASK              (IFCAP_TSO | IFCAP_TXCSUM)
348 /*
349  * List of capabilities to disable on the member interface.
350  */
351 #define BRIDGE_IFCAPS_STRIP             IFCAP_LRO
352 
353 /*
354  * Bridge interface list entry.
355  */
356 struct bridge_iflist {
357 	TAILQ_ENTRY(bridge_iflist) bif_next;
358 	struct ifnet            *bif_ifp;       /* member if */
359 	struct bstp_port        bif_stp;        /* STP state */
360 	uint32_t                bif_ifflags;    /* member if flags */
361 	int                     bif_savedcaps;  /* saved capabilities */
362 	uint32_t                bif_addrmax;    /* max # of addresses */
363 	uint32_t                bif_addrcnt;    /* cur. # of addresses */
364 	uint32_t                bif_addrexceeded; /* # of address violations */
365 
366 	interface_filter_t      bif_iff_ref;
367 	struct bridge_softc     *bif_sc;
368 	uint32_t                bif_flags;
369 
370 	/* host filter */
371 	struct in_addr          bif_hf_ipsrc;
372 	uint8_t                 bif_hf_hwsrc[ETHER_ADDR_LEN];
373 
374 	struct ifbrmstats       bif_stats;
375 };
376 
377 static inline bool
bif_ifflags_are_set(struct bridge_iflist * bif,uint32_t flags)378 bif_ifflags_are_set(struct bridge_iflist * bif, uint32_t flags)
379 {
380 	return (bif->bif_ifflags & flags) == flags;
381 }
382 
383 static inline bool
bif_has_checksum_offload(struct bridge_iflist * bif)384 bif_has_checksum_offload(struct bridge_iflist * bif)
385 {
386 	return bif_ifflags_are_set(bif, IFBIF_CHECKSUM_OFFLOAD);
387 }
388 
389 /* fake errors to make the code clearer */
390 #define _EBADIP                 EJUSTRETURN
391 #define _EBADIPCHECKSUM         EJUSTRETURN
392 #define _EBADIPV6               EJUSTRETURN
393 #define _EBADUDP                EJUSTRETURN
394 #define _EBADTCP                EJUSTRETURN
395 #define _EBADUDPCHECKSUM        EJUSTRETURN
396 #define _EBADTCPCHECKSUM        EJUSTRETURN
397 
398 #define BIFF_PROMISC            0x01    /* promiscuous mode set */
399 #define BIFF_PROTO_ATTACHED     0x02    /* protocol attached */
400 #define BIFF_FILTER_ATTACHED    0x04    /* interface filter attached */
401 #define BIFF_MEDIA_ACTIVE       0x08    /* interface media active */
402 #define BIFF_HOST_FILTER        0x10    /* host filter enabled */
403 #define BIFF_HF_HWSRC           0x20    /* host filter source MAC is set */
404 #define BIFF_HF_IPSRC           0x40    /* host filter source IP is set */
405 #define BIFF_INPUT_BROADCAST    0x80    /* send broadcast packets in */
406 #if SKYWALK
407 #define BIFF_FLOWSWITCH_ATTACHED 0x1000   /* we attached the flowswitch */
408 #define BIFF_NETAGENT_REMOVED    0x2000   /* we removed the netagent */
409 #endif /* SKYWALK */
410 
411 /*
412  * mac_nat_entry
413  * - translates between an IP address and MAC address on a specific
414  *   bridge interface member
415  */
416 struct mac_nat_entry {
417 	LIST_ENTRY(mac_nat_entry) mne_list;     /* list linkage */
418 	struct bridge_iflist    *mne_bif;       /* originating interface */
419 	unsigned long           mne_expire;     /* expiration time */
420 	union {
421 		struct in_addr  mneu_ip;        /* originating IPv4 address */
422 		struct in6_addr mneu_ip6;       /* originating IPv6 address */
423 	} mne_u;
424 	uint8_t                 mne_mac[ETHER_ADDR_LEN];
425 	uint8_t                 mne_flags;
426 	uint8_t                 mne_reserved;
427 };
428 #define mne_ip  mne_u.mneu_ip
429 #define mne_ip6 mne_u.mneu_ip6
430 
431 #define MNE_FLAGS_IPV6          0x01    /* IPv6 address */
432 
433 LIST_HEAD(mac_nat_entry_list, mac_nat_entry);
434 
435 /*
436  * mac_nat_record
437  * - used by bridge_mac_nat_output() to convey the translation that needs
438  *   to take place in bridge_mac_nat_translate
439  * - holds enough information so that the translation can be done later without
440  *   holding the bridge lock
441  */
442 struct mac_nat_record {
443 	uint16_t                mnr_ether_type;
444 	union {
445 		uint16_t        mnru_arp_offset;
446 		struct {
447 			uint16_t mnruip_dhcp_flags;
448 			uint16_t mnruip_udp_csum;
449 			uint8_t  mnruip_header_len;
450 		} mnru_ip;
451 		struct {
452 			uint16_t mnruip6_icmp6_len;
453 			uint16_t mnruip6_lladdr_offset;
454 			uint8_t mnruip6_icmp6_type;
455 			uint8_t mnruip6_header_len;
456 		} mnru_ip6;
457 	} mnr_u;
458 };
459 
460 #define mnr_arp_offset  mnr_u.mnru_arp_offset
461 
462 #define mnr_ip_header_len       mnr_u.mnru_ip.mnruip_header_len
463 #define mnr_ip_dhcp_flags       mnr_u.mnru_ip.mnruip_dhcp_flags
464 #define mnr_ip_udp_csum         mnr_u.mnru_ip.mnruip_udp_csum
465 
466 #define mnr_ip6_icmp6_len       mnr_u.mnru_ip6.mnruip6_icmp6_len
467 #define mnr_ip6_icmp6_type      mnr_u.mnru_ip6.mnruip6_icmp6_type
468 #define mnr_ip6_header_len      mnr_u.mnru_ip6.mnruip6_header_len
469 #define mnr_ip6_lladdr_offset   mnr_u.mnru_ip6.mnruip6_lladdr_offset
470 
471 /*
472  * Bridge route node.
473  */
474 struct bridge_rtnode {
475 	LIST_ENTRY(bridge_rtnode) brt_hash;     /* hash table linkage */
476 	LIST_ENTRY(bridge_rtnode) brt_list;     /* list linkage */
477 	struct bridge_iflist    *brt_dst;       /* destination if */
478 	unsigned long           brt_expire;     /* expiration time */
479 	uint8_t                 brt_flags;      /* address flags */
480 	uint8_t                 brt_addr[ETHER_ADDR_LEN];
481 	uint16_t                brt_vlan;       /* vlan id */
482 
483 };
484 #define brt_ifp                 brt_dst->bif_ifp
485 
486 /*
487  * Bridge delayed function call context
488  */
489 typedef void (*bridge_delayed_func_t)(struct bridge_softc *);
490 
491 struct bridge_delayed_call {
492 	struct bridge_softc     *bdc_sc;
493 	bridge_delayed_func_t   bdc_func; /* Function to call */
494 	struct timespec         bdc_ts; /* Time to call */
495 	u_int32_t               bdc_flags;
496 	thread_call_t           bdc_thread_call;
497 };
498 
499 #define BDCF_OUTSTANDING        0x01    /* Delayed call has been scheduled */
500 #define BDCF_CANCELLING         0x02    /* May be waiting for call completion */
501 
502 /*
503  * Software state for each bridge.
504  */
505 LIST_HEAD(_bridge_rtnode_list, bridge_rtnode);
506 
507 struct bridge_softc {
508 	struct ifnet            *sc_ifp;        /* make this an interface */
509 	u_int32_t               sc_flags;
510 	LIST_ENTRY(bridge_softc) sc_list;
511 	decl_lck_mtx_data(, sc_mtx);
512 	struct _bridge_rtnode_list *sc_rthash;  /* our forwarding table */
513 	struct _bridge_rtnode_list sc_rtlist;   /* list version of above */
514 	uint32_t                sc_rthash_key;  /* key for hash */
515 	uint32_t                sc_rthash_size; /* size of the hash table */
516 	struct bridge_delayed_call sc_aging_timer;
517 	struct bridge_delayed_call sc_resize_call;
518 	TAILQ_HEAD(, bridge_iflist) sc_spanlist;        /* span ports list */
519 	struct bstp_state       sc_stp;         /* STP state */
520 	bpf_packet_func         sc_bpf_input;
521 	bpf_packet_func         sc_bpf_output;
522 	void                    *sc_cv;
523 	uint32_t                sc_brtmax;      /* max # of addresses */
524 	uint32_t                sc_brtcnt;      /* cur. # of addresses */
525 	uint32_t                sc_brttimeout;  /* rt timeout in seconds */
526 	uint32_t                sc_iflist_ref;  /* refcount for sc_iflist */
527 	uint32_t                sc_iflist_xcnt; /* refcount for sc_iflist */
528 	TAILQ_HEAD(, bridge_iflist) sc_iflist;  /* member interface list */
529 	uint32_t                sc_brtexceeded; /* # of cache drops */
530 	uint32_t                sc_filter_flags; /* ipf and flags */
531 	struct ifnet            *sc_ifaddr;     /* member mac copied from */
532 	u_char                  sc_defaddr[6];  /* Default MAC address */
533 	char                    sc_if_xname[IFNAMSIZ];
534 
535 	struct bridge_iflist    *sc_mac_nat_bif; /* single MAC NAT interface */
536 	struct mac_nat_entry_list sc_mne_list;  /* MAC NAT IPv4 */
537 	struct mac_nat_entry_list sc_mne_list_v6;/* MAC NAT IPv6 */
538 	uint32_t                sc_mne_max;      /* max # of entries */
539 	uint32_t                sc_mne_count;    /* cur. # of entries */
540 	uint32_t                sc_mne_allocation_failures;
541 #if BRIDGE_LOCK_DEBUG
542 	/*
543 	 * Locking and unlocking calling history
544 	 */
545 	void                    *lock_lr[BR_LCKDBG_MAX];
546 	int                     next_lock_lr;
547 	void                    *unlock_lr[BR_LCKDBG_MAX];
548 	int                     next_unlock_lr;
549 #endif /* BRIDGE_LOCK_DEBUG */
550 };
551 
552 #define SCF_DETACHING            0x01
553 #define SCF_RESIZING             0x02
554 #define SCF_MEDIA_ACTIVE         0x04
555 
556 typedef enum {
557 	CHECKSUM_OPERATION_NONE = 0,
558 	CHECKSUM_OPERATION_CLEAR_OFFLOAD = 1,
559 	CHECKSUM_OPERATION_FINALIZE = 2,
560 	CHECKSUM_OPERATION_COMPUTE = 3,
561 } ChecksumOperation;
562 
563 union iphdr {
564 	struct ip *ip;
565 	struct ip6_hdr *ip6;
566 	void * ptr;
567 };
568 
569 typedef struct {
570 	u_int           ip_hlen;        /* IP header length */
571 	u_int           ip_pay_len;     /* length of payload (exclusive of ip_hlen) */
572 	u_int           ip_opt_len;     /* IPv6 options headers length */
573 	uint8_t         ip_proto;       /* IPPROTO_TCP, IPPROTO_UDP, etc. */
574 	bool            ip_is_fragmented;
575 	union iphdr     ip_hdr;         /* pointer to IP header */
576 	void *          ip_proto_hdr;   /* ptr to protocol header (TCP) */
577 } ip_packet_info, *ip_packet_info_t;
578 
579 struct bridge_hostfilter_stats bridge_hostfilter_stats;
580 
581 static LCK_GRP_DECLARE(bridge_lock_grp, "if_bridge");
582 #if BRIDGE_LOCK_DEBUG
583 static LCK_ATTR_DECLARE(bridge_lock_attr, 0, 0);
584 #else
585 static LCK_ATTR_DECLARE(bridge_lock_attr, LCK_ATTR_DEBUG, 0);
586 #endif
587 static LCK_MTX_DECLARE_ATTR(bridge_list_mtx, &bridge_lock_grp, &bridge_lock_attr);
588 
589 static int      bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
590 
591 static ZONE_DECLARE(bridge_rtnode_pool, "bridge_rtnode",
592     sizeof(struct bridge_rtnode), ZC_NONE);
593 static ZONE_DECLARE(bridge_mne_pool, "bridge_mac_nat_entry",
594     sizeof(struct mac_nat_entry), ZC_NONE);
595 
596 static int      bridge_clone_create(struct if_clone *, uint32_t, void *);
597 static int      bridge_clone_destroy(struct ifnet *);
598 
599 static errno_t  bridge_ioctl(struct ifnet *, u_long, void *);
600 #if HAS_IF_CAP
601 static void     bridge_mutecaps(struct bridge_softc *);
602 static void     bridge_set_ifcap(struct bridge_softc *, struct bridge_iflist *,
603     int);
604 #endif
605 static errno_t bridge_set_tso(struct bridge_softc *);
606 static void     bridge_proto_attach_changed(struct ifnet *);
607 static int      bridge_init(struct ifnet *);
608 #if HAS_BRIDGE_DUMMYNET
609 static void     bridge_dummynet(struct mbuf *, struct ifnet *);
610 #endif
611 static void     bridge_ifstop(struct ifnet *, int);
612 static int      bridge_output(struct ifnet *, struct mbuf *);
613 static void     bridge_finalize_cksum(struct ifnet *, struct mbuf *);
614 static void     bridge_start(struct ifnet *);
615 static errno_t  bridge_input(struct ifnet *, mbuf_t *);
616 static errno_t  bridge_iff_input(void *, ifnet_t, protocol_family_t,
617     mbuf_t *, char **);
618 static errno_t  bridge_iff_output(void *, ifnet_t, protocol_family_t,
619     mbuf_t *);
620 static errno_t  bridge_member_output(struct bridge_softc *sc, ifnet_t ifp,
621     mbuf_t *m);
622 
623 static int      bridge_enqueue(ifnet_t, struct ifnet *,
624     struct ifnet *, struct mbuf *, ChecksumOperation);
625 static void     bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int);
626 
627 static void     bridge_forward(struct bridge_softc *, struct bridge_iflist *,
628     struct mbuf *);
629 
630 static void     bridge_aging_timer(struct bridge_softc *sc);
631 
632 static void     bridge_broadcast(struct bridge_softc *, struct bridge_iflist *,
633     struct mbuf *, int);
634 static void     bridge_span(struct bridge_softc *, struct mbuf *);
635 
636 static int      bridge_rtupdate(struct bridge_softc *, const uint8_t *,
637     uint16_t, struct bridge_iflist *, int, uint8_t);
638 static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *,
639     uint16_t);
640 static void     bridge_rttrim(struct bridge_softc *);
641 static void     bridge_rtage(struct bridge_softc *);
642 static void     bridge_rtflush(struct bridge_softc *, int);
643 static int      bridge_rtdaddr(struct bridge_softc *, const uint8_t *,
644     uint16_t);
645 
646 static int      bridge_rtable_init(struct bridge_softc *);
647 static void     bridge_rtable_fini(struct bridge_softc *);
648 
649 static void     bridge_rthash_resize(struct bridge_softc *);
650 
651 static int      bridge_rtnode_addr_cmp(const uint8_t *, const uint8_t *);
652 static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
653     const uint8_t *, uint16_t);
654 static int      bridge_rtnode_hash(struct bridge_softc *,
655     struct bridge_rtnode *);
656 static int      bridge_rtnode_insert(struct bridge_softc *,
657     struct bridge_rtnode *);
658 static void     bridge_rtnode_destroy(struct bridge_softc *,
659     struct bridge_rtnode *);
660 #if BRIDGESTP
661 static void     bridge_rtable_expire(struct ifnet *, int);
662 static void     bridge_state_change(struct ifnet *, int);
663 #endif /* BRIDGESTP */
664 
665 static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
666     const char *name);
667 static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
668     struct ifnet *ifp);
669 static void     bridge_delete_member(struct bridge_softc *,
670     struct bridge_iflist *, int);
671 static void     bridge_delete_span(struct bridge_softc *,
672     struct bridge_iflist *);
673 
674 static int      bridge_ioctl_add(struct bridge_softc *, void *);
675 static int      bridge_ioctl_del(struct bridge_softc *, void *);
676 static int      bridge_ioctl_gifflags(struct bridge_softc *, void *);
677 static int      bridge_ioctl_sifflags(struct bridge_softc *, void *);
678 static int      bridge_ioctl_scache(struct bridge_softc *, void *);
679 static int      bridge_ioctl_gcache(struct bridge_softc *, void *);
680 static int      bridge_ioctl_gifs32(struct bridge_softc *, void *);
681 static int      bridge_ioctl_gifs64(struct bridge_softc *, void *);
682 static int      bridge_ioctl_rts32(struct bridge_softc *, void *);
683 static int      bridge_ioctl_rts64(struct bridge_softc *, void *);
684 static int      bridge_ioctl_saddr32(struct bridge_softc *, void *);
685 static int      bridge_ioctl_saddr64(struct bridge_softc *, void *);
686 static int      bridge_ioctl_sto(struct bridge_softc *, void *);
687 static int      bridge_ioctl_gto(struct bridge_softc *, void *);
688 static int      bridge_ioctl_daddr32(struct bridge_softc *, void *);
689 static int      bridge_ioctl_daddr64(struct bridge_softc *, void *);
690 static int      bridge_ioctl_flush(struct bridge_softc *, void *);
691 static int      bridge_ioctl_gpri(struct bridge_softc *, void *);
692 static int      bridge_ioctl_spri(struct bridge_softc *, void *);
693 static int      bridge_ioctl_ght(struct bridge_softc *, void *);
694 static int      bridge_ioctl_sht(struct bridge_softc *, void *);
695 static int      bridge_ioctl_gfd(struct bridge_softc *, void *);
696 static int      bridge_ioctl_sfd(struct bridge_softc *, void *);
697 static int      bridge_ioctl_gma(struct bridge_softc *, void *);
698 static int      bridge_ioctl_sma(struct bridge_softc *, void *);
699 static int      bridge_ioctl_sifprio(struct bridge_softc *, void *);
700 static int      bridge_ioctl_sifcost(struct bridge_softc *, void *);
701 static int      bridge_ioctl_sifmaxaddr(struct bridge_softc *, void *);
702 static int      bridge_ioctl_addspan(struct bridge_softc *, void *);
703 static int      bridge_ioctl_delspan(struct bridge_softc *, void *);
704 static int      bridge_ioctl_gbparam32(struct bridge_softc *, void *);
705 static int      bridge_ioctl_gbparam64(struct bridge_softc *, void *);
706 static int      bridge_ioctl_grte(struct bridge_softc *, void *);
707 static int      bridge_ioctl_gifsstp32(struct bridge_softc *, void *);
708 static int      bridge_ioctl_gifsstp64(struct bridge_softc *, void *);
709 static int      bridge_ioctl_sproto(struct bridge_softc *, void *);
710 static int      bridge_ioctl_stxhc(struct bridge_softc *, void *);
711 static int      bridge_ioctl_purge(struct bridge_softc *sc, void *);
712 static int      bridge_ioctl_gfilt(struct bridge_softc *, void *);
713 static int      bridge_ioctl_sfilt(struct bridge_softc *, void *);
714 static int      bridge_ioctl_ghostfilter(struct bridge_softc *, void *);
715 static int      bridge_ioctl_shostfilter(struct bridge_softc *, void *);
716 static int      bridge_ioctl_gmnelist32(struct bridge_softc *, void *);
717 static int      bridge_ioctl_gmnelist64(struct bridge_softc *, void *);
718 static int      bridge_ioctl_gifstats32(struct bridge_softc *, void *);
719 static int      bridge_ioctl_gifstats64(struct bridge_softc *, void *);
720 
721 static int bridge_pf(struct mbuf **, struct ifnet *, uint32_t sc_filter_flags, int input);
722 static int bridge_ip_checkbasic(struct mbuf **);
723 static int bridge_ip6_checkbasic(struct mbuf **);
724 
725 static errno_t bridge_set_bpf_tap(ifnet_t, bpf_tap_mode, bpf_packet_func);
726 static errno_t bridge_bpf_input(ifnet_t, struct mbuf *, const char *, int);
727 static errno_t bridge_bpf_output(ifnet_t, struct mbuf *);
728 
729 static void bridge_detach(ifnet_t);
730 static void bridge_link_event(struct ifnet *, u_int32_t);
731 static void bridge_iflinkevent(struct ifnet *);
732 static u_int32_t bridge_updatelinkstatus(struct bridge_softc *);
733 static int interface_media_active(struct ifnet *);
734 static void bridge_schedule_delayed_call(struct bridge_delayed_call *);
735 static void bridge_cancel_delayed_call(struct bridge_delayed_call *);
736 static void bridge_cleanup_delayed_call(struct bridge_delayed_call *);
737 static int bridge_host_filter(struct bridge_iflist *, mbuf_t *);
738 
739 static errno_t bridge_mac_nat_enable(struct bridge_softc *,
740     struct bridge_iflist *);
741 static void bridge_mac_nat_disable(struct bridge_softc *sc);
742 static void bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long);
743 static void bridge_mac_nat_populate_entries(struct bridge_softc *sc);
744 static void bridge_mac_nat_flush_entries(struct bridge_softc *sc,
745     struct bridge_iflist *);
746 static ifnet_t bridge_mac_nat_input(struct bridge_softc *, mbuf_t *,
747     boolean_t *);
748 static boolean_t bridge_mac_nat_output(struct bridge_softc *,
749     struct bridge_iflist *, mbuf_t *, struct mac_nat_record *);
750 static void bridge_mac_nat_translate(mbuf_t *, struct mac_nat_record *,
751     const caddr_t);
752 static boolean_t is_broadcast_ip_packet(mbuf_t *);
753 
754 #define m_copypacket(m, how) m_copym(m, 0, M_COPYALL, how)
755 
756 static int
757 gso_tcp(struct ifnet *ifp, struct mbuf **mp, u_int mac_hlen, bool is_ipv4,
758     boolean_t is_tx);
759 
760 /* The default bridge vlan is 1 (IEEE 802.1Q-2003 Table 9-2) */
761 #define VLANTAGOF(_m)   0
762 
763 u_int8_t bstp_etheraddr[ETHER_ADDR_LEN] =
764 { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
765 
766 static u_int8_t ethernulladdr[ETHER_ADDR_LEN] =
767 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
768 
769 #if BRIDGESTP
770 static struct bstp_cb_ops bridge_ops = {
771 	.bcb_state = bridge_state_change,
772 	.bcb_rtage = bridge_rtable_expire
773 };
774 #endif /* BRIDGESTP */
775 
776 SYSCTL_DECL(_net_link);
777 SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
778     "Bridge");
779 
780 static int bridge_inherit_mac = 0;   /* share MAC with first bridge member */
781 SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac,
782     CTLFLAG_RW | CTLFLAG_LOCKED,
783     &bridge_inherit_mac, 0,
784     "Inherit MAC address from the first bridge member");
785 
786 SYSCTL_INT(_net_link_bridge, OID_AUTO, rtable_prune_period,
787     CTLFLAG_RW | CTLFLAG_LOCKED,
788     &bridge_rtable_prune_period, 0,
789     "Interval between pruning of routing table");
790 
791 static unsigned int bridge_rtable_hash_size_max = BRIDGE_RTHASH_SIZE_MAX;
792 SYSCTL_UINT(_net_link_bridge, OID_AUTO, rtable_hash_size_max,
793     CTLFLAG_RW | CTLFLAG_LOCKED,
794     &bridge_rtable_hash_size_max, 0,
795     "Maximum size of the routing hash table");
796 
797 #if BRIDGE_DELAYED_CALLBACK_DEBUG
798 static int bridge_delayed_callback_delay = 0;
799 SYSCTL_INT(_net_link_bridge, OID_AUTO, delayed_callback_delay,
800     CTLFLAG_RW | CTLFLAG_LOCKED,
801     &bridge_delayed_callback_delay, 0,
802     "Delay before calling delayed function");
803 #endif
804 
805 SYSCTL_STRUCT(_net_link_bridge, OID_AUTO,
806     hostfilterstats, CTLFLAG_RD | CTLFLAG_LOCKED,
807     &bridge_hostfilter_stats, bridge_hostfilter_stats, "");
808 
809 #if BRIDGESTP
810 static int log_stp   = 0;   /* log STP state changes */
811 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp, CTLFLAG_RW,
812     &log_stp, 0, "Log STP state changes");
813 #endif /* BRIDGESTP */
814 
815 struct bridge_control {
816 	int             (*bc_func)(struct bridge_softc *, void *);
817 	unsigned int    bc_argsize;
818 	unsigned int    bc_flags;
819 };
820 
821 #define BC_F_COPYIN             0x01    /* copy arguments in */
822 #define BC_F_COPYOUT            0x02    /* copy arguments out */
823 #define BC_F_SUSER              0x04    /* do super-user check */
824 
825 static const struct bridge_control bridge_control_table32[] = {
826 	{ .bc_func = bridge_ioctl_add, .bc_argsize = sizeof(struct ifbreq),             /* 0 */
827 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
828 	{ .bc_func = bridge_ioctl_del, .bc_argsize = sizeof(struct ifbreq),
829 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
830 
831 	{ .bc_func = bridge_ioctl_gifflags, .bc_argsize = sizeof(struct ifbreq),
832 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
833 	{ .bc_func = bridge_ioctl_sifflags, .bc_argsize = sizeof(struct ifbreq),
834 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
835 
836 	{ .bc_func = bridge_ioctl_scache, .bc_argsize = sizeof(struct ifbrparam),
837 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
838 	{ .bc_func = bridge_ioctl_gcache, .bc_argsize = sizeof(struct ifbrparam),
839 	  .bc_flags = BC_F_COPYOUT },
840 
841 	{ .bc_func = bridge_ioctl_gifs32, .bc_argsize = sizeof(struct ifbifconf32),
842 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
843 	{ .bc_func = bridge_ioctl_rts32, .bc_argsize = sizeof(struct ifbaconf32),
844 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
845 
846 	{ .bc_func = bridge_ioctl_saddr32, .bc_argsize = sizeof(struct ifbareq32),
847 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
848 
849 	{ .bc_func = bridge_ioctl_sto, .bc_argsize = sizeof(struct ifbrparam),
850 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
851 	{ .bc_func = bridge_ioctl_gto, .bc_argsize = sizeof(struct ifbrparam),           /* 10 */
852 	  .bc_flags = BC_F_COPYOUT },
853 
854 	{ .bc_func = bridge_ioctl_daddr32, .bc_argsize = sizeof(struct ifbareq32),
855 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
856 
857 	{ .bc_func = bridge_ioctl_flush, .bc_argsize = sizeof(struct ifbreq),
858 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
859 
860 	{ .bc_func = bridge_ioctl_gpri, .bc_argsize = sizeof(struct ifbrparam),
861 	  .bc_flags = BC_F_COPYOUT },
862 	{ .bc_func = bridge_ioctl_spri, .bc_argsize = sizeof(struct ifbrparam),
863 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
864 
865 	{ .bc_func = bridge_ioctl_ght, .bc_argsize = sizeof(struct ifbrparam),
866 	  .bc_flags = BC_F_COPYOUT },
867 	{ .bc_func = bridge_ioctl_sht, .bc_argsize = sizeof(struct ifbrparam),
868 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
869 
870 	{ .bc_func = bridge_ioctl_gfd, .bc_argsize = sizeof(struct ifbrparam),
871 	  .bc_flags = BC_F_COPYOUT },
872 	{ .bc_func = bridge_ioctl_sfd, .bc_argsize = sizeof(struct ifbrparam),
873 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
874 
875 	{ .bc_func = bridge_ioctl_gma, .bc_argsize = sizeof(struct ifbrparam),
876 	  .bc_flags = BC_F_COPYOUT },
877 	{ .bc_func = bridge_ioctl_sma, .bc_argsize = sizeof(struct ifbrparam),           /* 20 */
878 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
879 
880 	{ .bc_func = bridge_ioctl_sifprio, .bc_argsize = sizeof(struct ifbreq),
881 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
882 
883 	{ .bc_func = bridge_ioctl_sifcost, .bc_argsize = sizeof(struct ifbreq),
884 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
885 
886 	{ .bc_func = bridge_ioctl_gfilt, .bc_argsize = sizeof(struct ifbrparam),
887 	  .bc_flags = BC_F_COPYOUT },
888 	{ .bc_func = bridge_ioctl_sfilt, .bc_argsize = sizeof(struct ifbrparam),
889 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
890 
891 	{ .bc_func = bridge_ioctl_purge, .bc_argsize = sizeof(struct ifbreq),
892 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
893 
894 	{ .bc_func = bridge_ioctl_addspan, .bc_argsize = sizeof(struct ifbreq),
895 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
896 	{ .bc_func = bridge_ioctl_delspan, .bc_argsize = sizeof(struct ifbreq),
897 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
898 
899 	{ .bc_func = bridge_ioctl_gbparam32, .bc_argsize = sizeof(struct ifbropreq32),
900 	  .bc_flags = BC_F_COPYOUT },
901 
902 	{ .bc_func = bridge_ioctl_grte, .bc_argsize = sizeof(struct ifbrparam),
903 	  .bc_flags = BC_F_COPYOUT },
904 
905 	{ .bc_func = bridge_ioctl_gifsstp32, .bc_argsize = sizeof(struct ifbpstpconf32),     /* 30 */
906 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
907 
908 	{ .bc_func = bridge_ioctl_sproto, .bc_argsize = sizeof(struct ifbrparam),
909 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
910 
911 	{ .bc_func = bridge_ioctl_stxhc, .bc_argsize = sizeof(struct ifbrparam),
912 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
913 
914 	{ .bc_func = bridge_ioctl_sifmaxaddr, .bc_argsize = sizeof(struct ifbreq),
915 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
916 
917 	{ .bc_func = bridge_ioctl_ghostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
918 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
919 	{ .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
920 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
921 
922 	{ .bc_func = bridge_ioctl_gmnelist32,
923 	  .bc_argsize = sizeof(struct ifbrmnelist32),
924 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
925 	{ .bc_func = bridge_ioctl_gifstats32,
926 	  .bc_argsize = sizeof(struct ifbrmreq32),
927 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
928 };
929 
930 static const struct bridge_control bridge_control_table64[] = {
931 	{ .bc_func = bridge_ioctl_add, .bc_argsize = sizeof(struct ifbreq),           /* 0 */
932 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
933 	{ .bc_func = bridge_ioctl_del, .bc_argsize = sizeof(struct ifbreq),
934 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
935 
936 	{ .bc_func = bridge_ioctl_gifflags, .bc_argsize = sizeof(struct ifbreq),
937 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
938 	{ .bc_func = bridge_ioctl_sifflags, .bc_argsize = sizeof(struct ifbreq),
939 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
940 
941 	{ .bc_func = bridge_ioctl_scache, .bc_argsize = sizeof(struct ifbrparam),
942 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
943 	{ .bc_func = bridge_ioctl_gcache, .bc_argsize = sizeof(struct ifbrparam),
944 	  .bc_flags = BC_F_COPYOUT },
945 
946 	{ .bc_func = bridge_ioctl_gifs64, .bc_argsize = sizeof(struct ifbifconf64),
947 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
948 	{ .bc_func = bridge_ioctl_rts64, .bc_argsize = sizeof(struct ifbaconf64),
949 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
950 
951 	{ .bc_func = bridge_ioctl_saddr64, .bc_argsize = sizeof(struct ifbareq64),
952 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
953 
954 	{ .bc_func = bridge_ioctl_sto, .bc_argsize = sizeof(struct ifbrparam),
955 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
956 	{ .bc_func = bridge_ioctl_gto, .bc_argsize = sizeof(struct ifbrparam),           /* 10 */
957 	  .bc_flags = BC_F_COPYOUT },
958 
959 	{ .bc_func = bridge_ioctl_daddr64, .bc_argsize = sizeof(struct ifbareq64),
960 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
961 
962 	{ .bc_func = bridge_ioctl_flush, .bc_argsize = sizeof(struct ifbreq),
963 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
964 
965 	{ .bc_func = bridge_ioctl_gpri, .bc_argsize = sizeof(struct ifbrparam),
966 	  .bc_flags = BC_F_COPYOUT },
967 	{ .bc_func = bridge_ioctl_spri, .bc_argsize = sizeof(struct ifbrparam),
968 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
969 
970 	{ .bc_func = bridge_ioctl_ght, .bc_argsize = sizeof(struct ifbrparam),
971 	  .bc_flags = BC_F_COPYOUT },
972 	{ .bc_func = bridge_ioctl_sht, .bc_argsize = sizeof(struct ifbrparam),
973 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
974 
975 	{ .bc_func = bridge_ioctl_gfd, .bc_argsize = sizeof(struct ifbrparam),
976 	  .bc_flags = BC_F_COPYOUT },
977 	{ .bc_func = bridge_ioctl_sfd, .bc_argsize = sizeof(struct ifbrparam),
978 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
979 
980 	{ .bc_func = bridge_ioctl_gma, .bc_argsize = sizeof(struct ifbrparam),
981 	  .bc_flags = BC_F_COPYOUT },
982 	{ .bc_func = bridge_ioctl_sma, .bc_argsize = sizeof(struct ifbrparam),           /* 20 */
983 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
984 
985 	{ .bc_func = bridge_ioctl_sifprio, .bc_argsize = sizeof(struct ifbreq),
986 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
987 
988 	{ .bc_func = bridge_ioctl_sifcost, .bc_argsize = sizeof(struct ifbreq),
989 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
990 
991 	{ .bc_func = bridge_ioctl_gfilt, .bc_argsize = sizeof(struct ifbrparam),
992 	  .bc_flags = BC_F_COPYOUT },
993 	{ .bc_func = bridge_ioctl_sfilt, .bc_argsize = sizeof(struct ifbrparam),
994 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
995 
996 	{ .bc_func = bridge_ioctl_purge, .bc_argsize = sizeof(struct ifbreq),
997 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
998 
999 	{ .bc_func = bridge_ioctl_addspan, .bc_argsize = sizeof(struct ifbreq),
1000 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1001 	{ .bc_func = bridge_ioctl_delspan, .bc_argsize = sizeof(struct ifbreq),
1002 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1003 
1004 	{ .bc_func = bridge_ioctl_gbparam64, .bc_argsize = sizeof(struct ifbropreq64),
1005 	  .bc_flags = BC_F_COPYOUT },
1006 
1007 	{ .bc_func = bridge_ioctl_grte, .bc_argsize = sizeof(struct ifbrparam),
1008 	  .bc_flags = BC_F_COPYOUT },
1009 
1010 	{ .bc_func = bridge_ioctl_gifsstp64, .bc_argsize = sizeof(struct ifbpstpconf64),     /* 30 */
1011 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1012 
1013 	{ .bc_func = bridge_ioctl_sproto, .bc_argsize = sizeof(struct ifbrparam),
1014 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1015 
1016 	{ .bc_func = bridge_ioctl_stxhc, .bc_argsize = sizeof(struct ifbrparam),
1017 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1018 
1019 	{ .bc_func = bridge_ioctl_sifmaxaddr, .bc_argsize = sizeof(struct ifbreq),
1020 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1021 
1022 	{ .bc_func = bridge_ioctl_ghostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1023 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1024 	{ .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1025 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1026 
1027 	{ .bc_func = bridge_ioctl_gmnelist64,
1028 	  .bc_argsize = sizeof(struct ifbrmnelist64),
1029 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1030 	{ .bc_func = bridge_ioctl_gifstats64,
1031 	  .bc_argsize = sizeof(struct ifbrmreq64),
1032 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1033 };
1034 
1035 static const unsigned int bridge_control_table_size =
1036     sizeof(bridge_control_table32) / sizeof(bridge_control_table32[0]);
1037 
1038 static LIST_HEAD(, bridge_softc) bridge_list =
1039     LIST_HEAD_INITIALIZER(bridge_list);
1040 
1041 #define BRIDGENAME      "bridge"
1042 #define BRIDGES_MAX     IF_MAXUNIT
1043 #define BRIDGE_ZONE_MAX_ELEM    MIN(IFNETS_MAX, BRIDGES_MAX)
1044 
1045 static struct if_clone bridge_cloner =
1046     IF_CLONE_INITIALIZER(BRIDGENAME, bridge_clone_create, bridge_clone_destroy,
1047     0, BRIDGES_MAX, BRIDGE_ZONE_MAX_ELEM, sizeof(struct bridge_softc));
1048 
1049 static int if_bridge_txstart = 0;
1050 SYSCTL_INT(_net_link_bridge, OID_AUTO, txstart, CTLFLAG_RW | CTLFLAG_LOCKED,
1051     &if_bridge_txstart, 0, "Bridge interface uses TXSTART model");
1052 
1053 SYSCTL_INT(_net_link_bridge, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
1054     &if_bridge_debug, 0, "Bridge debug flags");
1055 
1056 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_level,
1057     CTLFLAG_RW | CTLFLAG_LOCKED,
1058     &if_bridge_log_level, 0, "Bridge log level");
1059 
1060 static int if_bridge_segmentation = 1;
1061 SYSCTL_INT(_net_link_bridge, OID_AUTO, segmentation,
1062     CTLFLAG_RW | CTLFLAG_LOCKED,
1063     &if_bridge_segmentation, 0, "Bridge interface enable segmentation");
1064 
1065 static void brlog_ether_header(struct ether_header *);
1066 static void brlog_mbuf_data(mbuf_t, size_t, size_t);
1067 static void brlog_mbuf_pkthdr(mbuf_t, const char *, const char *);
1068 static void brlog_mbuf(mbuf_t, const char *, const char *);
1069 static void brlog_link(struct bridge_softc * sc);
1070 
1071 #if BRIDGE_LOCK_DEBUG
1072 static void bridge_lock(struct bridge_softc *);
1073 static void bridge_unlock(struct bridge_softc *);
1074 static int bridge_lock2ref(struct bridge_softc *);
1075 static void bridge_unref(struct bridge_softc *);
1076 static void bridge_xlock(struct bridge_softc *);
1077 static void bridge_xdrop(struct bridge_softc *);
1078 
1079 static void
bridge_lock(struct bridge_softc * sc)1080 bridge_lock(struct bridge_softc *sc)
1081 {
1082 	void *lr_saved = __builtin_return_address(0);
1083 
1084 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1085 
1086 	_BRIDGE_LOCK(sc);
1087 
1088 	sc->lock_lr[sc->next_lock_lr] = lr_saved;
1089 	sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1090 }
1091 
1092 static void
bridge_unlock(struct bridge_softc * sc)1093 bridge_unlock(struct bridge_softc *sc)
1094 {
1095 	void *lr_saved = __builtin_return_address(0);
1096 
1097 	BRIDGE_LOCK_ASSERT_HELD(sc);
1098 
1099 	sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1100 	sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1101 
1102 	_BRIDGE_UNLOCK(sc);
1103 }
1104 
1105 static int
bridge_lock2ref(struct bridge_softc * sc)1106 bridge_lock2ref(struct bridge_softc *sc)
1107 {
1108 	int error = 0;
1109 	void *lr_saved = __builtin_return_address(0);
1110 
1111 	BRIDGE_LOCK_ASSERT_HELD(sc);
1112 
1113 	if (sc->sc_iflist_xcnt > 0) {
1114 		error = EBUSY;
1115 	} else {
1116 		sc->sc_iflist_ref++;
1117 	}
1118 
1119 	sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1120 	sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1121 
1122 	_BRIDGE_UNLOCK(sc);
1123 
1124 	return error;
1125 }
1126 
1127 static void
bridge_unref(struct bridge_softc * sc)1128 bridge_unref(struct bridge_softc *sc)
1129 {
1130 	void *lr_saved = __builtin_return_address(0);
1131 
1132 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1133 
1134 	_BRIDGE_LOCK(sc);
1135 	sc->lock_lr[sc->next_lock_lr] = lr_saved;
1136 	sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1137 
1138 	sc->sc_iflist_ref--;
1139 
1140 	sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1141 	sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1142 	if ((sc->sc_iflist_xcnt > 0) && (sc->sc_iflist_ref == 0)) {
1143 		_BRIDGE_UNLOCK(sc);
1144 		wakeup(&sc->sc_cv);
1145 	} else {
1146 		_BRIDGE_UNLOCK(sc);
1147 	}
1148 }
1149 
1150 static void
bridge_xlock(struct bridge_softc * sc)1151 bridge_xlock(struct bridge_softc *sc)
1152 {
1153 	void *lr_saved = __builtin_return_address(0);
1154 
1155 	BRIDGE_LOCK_ASSERT_HELD(sc);
1156 
1157 	sc->sc_iflist_xcnt++;
1158 	while (sc->sc_iflist_ref > 0) {
1159 		sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1160 		sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1161 
1162 		msleep(&sc->sc_cv, &sc->sc_mtx, PZERO, "BRIDGE_XLOCK", NULL);
1163 
1164 		sc->lock_lr[sc->next_lock_lr] = lr_saved;
1165 		sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1166 	}
1167 }
1168 
1169 static void
bridge_xdrop(struct bridge_softc * sc)1170 bridge_xdrop(struct bridge_softc *sc)
1171 {
1172 	BRIDGE_LOCK_ASSERT_HELD(sc);
1173 
1174 	sc->sc_iflist_xcnt--;
1175 }
1176 
1177 #endif /* BRIDGE_LOCK_DEBUG */
1178 
1179 static void
brlog_mbuf_pkthdr(mbuf_t m,const char * prefix,const char * suffix)1180 brlog_mbuf_pkthdr(mbuf_t m, const char *prefix, const char *suffix)
1181 {
1182 	if (m) {
1183 		BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1184 		    "%spktlen: %u rcvif: 0x%llx header: 0x%llx nextpkt: 0x%llx%s",
1185 		    prefix ? prefix : "", (unsigned int)mbuf_pkthdr_len(m),
1186 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_rcvif(m)),
1187 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_header(m)),
1188 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_nextpkt(m)),
1189 		    suffix ? suffix : "");
1190 	} else {
1191 		BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0, "%s<NULL>%s", prefix, suffix);
1192 	}
1193 }
1194 
1195 static void
brlog_mbuf(mbuf_t m,const char * prefix,const char * suffix)1196 brlog_mbuf(mbuf_t m, const char *prefix, const char *suffix)
1197 {
1198 	if (m) {
1199 		BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1200 		    "%s0x%llx type: %u flags: 0x%x len: %u data: 0x%llx "
1201 		    "maxlen: %u datastart: 0x%llx next: 0x%llx%s",
1202 		    prefix ? prefix : "", (uint64_t)VM_KERNEL_ADDRPERM(m),
1203 		    mbuf_type(m), mbuf_flags(m), (unsigned int)mbuf_len(m),
1204 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)),
1205 		    (unsigned int)mbuf_maxlen(m),
1206 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
1207 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_next(m)),
1208 		    !suffix || (mbuf_flags(m) & MBUF_PKTHDR) ? "" : suffix);
1209 		if ((mbuf_flags(m) & MBUF_PKTHDR)) {
1210 			brlog_mbuf_pkthdr(m, "", suffix);
1211 		}
1212 	} else {
1213 		BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0, "%s<NULL>%s", prefix, suffix);
1214 	}
1215 }
1216 
1217 static void
brlog_mbuf_data(mbuf_t m,size_t offset,size_t len)1218 brlog_mbuf_data(mbuf_t m, size_t offset, size_t len)
1219 {
1220 	mbuf_t                  n;
1221 	size_t                  i, j;
1222 	size_t                  pktlen, mlen, maxlen;
1223 	unsigned char   *ptr;
1224 
1225 	pktlen = mbuf_pkthdr_len(m);
1226 
1227 	if (offset > pktlen) {
1228 		return;
1229 	}
1230 
1231 	maxlen = (pktlen - offset > len) ? len : pktlen - offset;
1232 	n = m;
1233 	mlen = mbuf_len(n);
1234 	ptr = mbuf_data(n);
1235 	for (i = 0, j = 0; i < maxlen; i++, j++) {
1236 		if (j >= mlen) {
1237 			n = mbuf_next(n);
1238 			if (n == 0) {
1239 				break;
1240 			}
1241 			ptr = mbuf_data(n);
1242 			mlen = mbuf_len(n);
1243 			j = 0;
1244 		}
1245 		if (i >= offset) {
1246 			BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1247 			    "%02x%s", ptr[j], i % 2 ? " " : "");
1248 		}
1249 	}
1250 }
1251 
1252 static void
brlog_ether_header(struct ether_header * eh)1253 brlog_ether_header(struct ether_header *eh)
1254 {
1255 	BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1256 	    "%02x:%02x:%02x:%02x:%02x:%02x > "
1257 	    "%02x:%02x:%02x:%02x:%02x:%02x 0x%04x ",
1258 	    eh->ether_shost[0], eh->ether_shost[1], eh->ether_shost[2],
1259 	    eh->ether_shost[3], eh->ether_shost[4], eh->ether_shost[5],
1260 	    eh->ether_dhost[0], eh->ether_dhost[1], eh->ether_dhost[2],
1261 	    eh->ether_dhost[3], eh->ether_dhost[4], eh->ether_dhost[5],
1262 	    ntohs(eh->ether_type));
1263 }
1264 
1265 static char *
ether_ntop(char * buf,size_t len,const u_char * ap)1266 ether_ntop(char *buf, size_t len, const u_char *ap)
1267 {
1268 	snprintf(buf, len, "%02x:%02x:%02x:%02x:%02x:%02x",
1269 	    ap[0], ap[1], ap[2], ap[3], ap[4], ap[5]);
1270 
1271 	return buf;
1272 }
1273 
1274 static void
brlog_link(struct bridge_softc * sc)1275 brlog_link(struct bridge_softc * sc)
1276 {
1277 	int i;
1278 	uint32_t sdl_buffer[offsetof(struct sockaddr_dl, sdl_data) +
1279 	IFNAMSIZ + ETHER_ADDR_LEN];
1280 	struct sockaddr_dl *sdl = (struct sockaddr_dl *)sdl_buffer;
1281 	const u_char * lladdr;
1282 	char lladdr_str[48];
1283 
1284 	memset(sdl, 0, sizeof(sdl_buffer));
1285 	sdl->sdl_family = AF_LINK;
1286 	sdl->sdl_nlen = strlen(sc->sc_if_xname);
1287 	sdl->sdl_alen = ETHER_ADDR_LEN;
1288 	sdl->sdl_len = offsetof(struct sockaddr_dl, sdl_data);
1289 	memcpy(sdl->sdl_data, sc->sc_if_xname, sdl->sdl_nlen);
1290 	memcpy(LLADDR(sdl), sc->sc_defaddr, ETHER_ADDR_LEN);
1291 	lladdr_str[0] = '\0';
1292 	for (i = 0, lladdr = CONST_LLADDR(sdl);
1293 	    i < sdl->sdl_alen;
1294 	    i++, lladdr++) {
1295 		char    byte_str[4];
1296 
1297 		snprintf(byte_str, sizeof(byte_str), "%s%x", i ? ":" : "",
1298 		    *lladdr);
1299 		strlcat(lladdr_str, byte_str, sizeof(lladdr_str));
1300 	}
1301 	BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1302 	    "%s sdl len %d index %d family %d type 0x%x nlen %d alen %d"
1303 	    " slen %d addr %s", sc->sc_if_xname,
1304 	    sdl->sdl_len, sdl->sdl_index,
1305 	    sdl->sdl_family, sdl->sdl_type, sdl->sdl_nlen,
1306 	    sdl->sdl_alen, sdl->sdl_slen, lladdr_str);
1307 }
1308 
1309 
1310 /*
1311  * bridgeattach:
1312  *
1313  *	Pseudo-device attach routine.
1314  */
1315 __private_extern__ int
bridgeattach(int n)1316 bridgeattach(int n)
1317 {
1318 #pragma unused(n)
1319 	int error;
1320 
1321 	LIST_INIT(&bridge_list);
1322 
1323 #if BRIDGESTP
1324 	bstp_sys_init();
1325 #endif /* BRIDGESTP */
1326 
1327 	error = if_clone_attach(&bridge_cloner);
1328 	if (error != 0) {
1329 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_clone_attach failed %d", error);
1330 	}
1331 	return error;
1332 }
1333 
1334 
1335 static errno_t
bridge_ifnet_set_attrs(struct ifnet * ifp)1336 bridge_ifnet_set_attrs(struct ifnet * ifp)
1337 {
1338 	errno_t         error;
1339 
1340 	error = ifnet_set_mtu(ifp, ETHERMTU);
1341 	if (error != 0) {
1342 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_mtu failed %d", error);
1343 		goto done;
1344 	}
1345 	error = ifnet_set_addrlen(ifp, ETHER_ADDR_LEN);
1346 	if (error != 0) {
1347 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_addrlen failed %d", error);
1348 		goto done;
1349 	}
1350 	error = ifnet_set_hdrlen(ifp, ETHER_HDR_LEN);
1351 	if (error != 0) {
1352 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_hdrlen failed %d", error);
1353 		goto done;
1354 	}
1355 	error = ifnet_set_flags(ifp,
1356 	    IFF_BROADCAST | IFF_SIMPLEX | IFF_NOTRAILERS | IFF_MULTICAST,
1357 	    0xffff);
1358 
1359 	if (error != 0) {
1360 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_flags failed %d", error);
1361 		goto done;
1362 	}
1363 done:
1364 	return error;
1365 }
1366 
1367 /*
1368  * bridge_clone_create:
1369  *
1370  *	Create a new bridge instance.
1371  */
1372 static int
bridge_clone_create(struct if_clone * ifc,uint32_t unit,void * params)1373 bridge_clone_create(struct if_clone *ifc, uint32_t unit, void *params)
1374 {
1375 #pragma unused(params)
1376 	struct ifnet *ifp = NULL;
1377 	struct bridge_softc *sc = NULL;
1378 	struct bridge_softc *sc2 = NULL;
1379 	struct ifnet_init_eparams init_params;
1380 	errno_t error = 0;
1381 	uint8_t eth_hostid[ETHER_ADDR_LEN];
1382 	int fb, retry, has_hostid;
1383 
1384 	sc =  if_clone_softc_allocate(&bridge_cloner);
1385 	if (sc == NULL) {
1386 		error = ENOMEM;
1387 		goto done;
1388 	}
1389 
1390 	lck_mtx_init(&sc->sc_mtx, &bridge_lock_grp, &bridge_lock_attr);
1391 	sc->sc_brtmax = BRIDGE_RTABLE_MAX;
1392 	sc->sc_mne_max = BRIDGE_MAC_NAT_ENTRY_MAX;
1393 	sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
1394 	sc->sc_filter_flags = 0;
1395 
1396 	TAILQ_INIT(&sc->sc_iflist);
1397 
1398 	/* use the interface name as the unique id for ifp recycle */
1399 	snprintf(sc->sc_if_xname, sizeof(sc->sc_if_xname), "%s%d",
1400 	    ifc->ifc_name, unit);
1401 	bzero(&init_params, sizeof(init_params));
1402 	init_params.ver                 = IFNET_INIT_CURRENT_VERSION;
1403 	init_params.len                 = sizeof(init_params);
1404 	/* Initialize our routing table. */
1405 	error = bridge_rtable_init(sc);
1406 	if (error != 0) {
1407 		BRIDGE_LOG(LOG_NOTICE, 0, "bridge_rtable_init failed %d", error);
1408 		goto done;
1409 	}
1410 	TAILQ_INIT(&sc->sc_spanlist);
1411 	if (if_bridge_txstart) {
1412 		init_params.start = bridge_start;
1413 	} else {
1414 		init_params.flags = IFNET_INIT_LEGACY;
1415 		init_params.output = bridge_output;
1416 	}
1417 	init_params.set_bpf_tap = bridge_set_bpf_tap;
1418 	init_params.uniqueid            = sc->sc_if_xname;
1419 	init_params.uniqueid_len        = strlen(sc->sc_if_xname);
1420 	init_params.sndq_maxlen         = IFQ_MAXLEN;
1421 	init_params.name                = ifc->ifc_name;
1422 	init_params.unit                = unit;
1423 	init_params.family              = IFNET_FAMILY_ETHERNET;
1424 	init_params.type                = IFT_BRIDGE;
1425 	init_params.demux               = ether_demux;
1426 	init_params.add_proto           = ether_add_proto;
1427 	init_params.del_proto           = ether_del_proto;
1428 	init_params.check_multi         = ether_check_multi;
1429 	init_params.framer_extended     = ether_frameout_extended;
1430 	init_params.softc               = sc;
1431 	init_params.ioctl               = bridge_ioctl;
1432 	init_params.detach              = bridge_detach;
1433 	init_params.broadcast_addr      = etherbroadcastaddr;
1434 	init_params.broadcast_len       = ETHER_ADDR_LEN;
1435 
1436 	error = ifnet_allocate_extended(&init_params, &ifp);
1437 	if (error != 0) {
1438 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_allocate failed %d", error);
1439 		goto done;
1440 	}
1441 	LIST_INIT(&sc->sc_mne_list);
1442 	LIST_INIT(&sc->sc_mne_list_v6);
1443 	sc->sc_ifp = ifp;
1444 	error = bridge_ifnet_set_attrs(ifp);
1445 	if (error != 0) {
1446 		BRIDGE_LOG(LOG_NOTICE, 0, "bridge_ifnet_set_attrs failed %d",
1447 		    error);
1448 		goto done;
1449 	}
1450 	/*
1451 	 * Generate an ethernet address with a locally administered address.
1452 	 *
1453 	 * Since we are using random ethernet addresses for the bridge, it is
1454 	 * possible that we might have address collisions, so make sure that
1455 	 * this hardware address isn't already in use on another bridge.
1456 	 * The first try uses the "hostid" and falls back to read_frandom();
1457 	 * for "hostid", we use the MAC address of the first-encountered
1458 	 * Ethernet-type interface that is currently configured.
1459 	 */
1460 	fb = 0;
1461 	has_hostid = (uuid_get_ethernet(&eth_hostid[0]) == 0);
1462 	for (retry = 1; retry != 0;) {
1463 		if (fb || has_hostid == 0) {
1464 			read_frandom(&sc->sc_defaddr, ETHER_ADDR_LEN);
1465 			sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
1466 			sc->sc_defaddr[0] |= 2;  /* set the LAA bit */
1467 		} else {
1468 			bcopy(&eth_hostid[0], &sc->sc_defaddr,
1469 			    ETHER_ADDR_LEN);
1470 			sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
1471 			sc->sc_defaddr[0] |= 2;  /* set the LAA bit */
1472 			sc->sc_defaddr[3] =     /* stir it up a bit */
1473 			    ((sc->sc_defaddr[3] & 0x0f) << 4) |
1474 			    ((sc->sc_defaddr[3] & 0xf0) >> 4);
1475 			/*
1476 			 * Mix in the LSB as it's actually pretty significant,
1477 			 * see rdar://14076061
1478 			 */
1479 			sc->sc_defaddr[4] =
1480 			    (((sc->sc_defaddr[4] & 0x0f) << 4) |
1481 			    ((sc->sc_defaddr[4] & 0xf0) >> 4)) ^
1482 			    sc->sc_defaddr[5];
1483 			sc->sc_defaddr[5] = ifp->if_unit & 0xff;
1484 		}
1485 
1486 		fb = 1;
1487 		retry = 0;
1488 		lck_mtx_lock(&bridge_list_mtx);
1489 		LIST_FOREACH(sc2, &bridge_list, sc_list) {
1490 			if (memcmp(sc->sc_defaddr,
1491 			    IF_LLADDR(sc2->sc_ifp), ETHER_ADDR_LEN) == 0) {
1492 				retry = 1;
1493 			}
1494 		}
1495 		lck_mtx_unlock(&bridge_list_mtx);
1496 	}
1497 
1498 	sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
1499 
1500 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_LIFECYCLE)) {
1501 		brlog_link(sc);
1502 	}
1503 	error = ifnet_attach(ifp, NULL);
1504 	if (error != 0) {
1505 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_attach failed %d", error);
1506 		goto done;
1507 	}
1508 
1509 	error = ifnet_set_lladdr_and_type(ifp, sc->sc_defaddr, ETHER_ADDR_LEN,
1510 	    IFT_ETHER);
1511 	if (error != 0) {
1512 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr_and_type failed %d",
1513 		    error);
1514 		goto done;
1515 	}
1516 
1517 	ifnet_set_offload(ifp,
1518 	    IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP |
1519 	    IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6 | IFNET_MULTIPAGES);
1520 	error = bridge_set_tso(sc);
1521 	if (error != 0) {
1522 		BRIDGE_LOG(LOG_NOTICE, 0, "bridge_set_tso failed %d", error);
1523 		goto done;
1524 	}
1525 #if BRIDGESTP
1526 	bstp_attach(&sc->sc_stp, &bridge_ops);
1527 #endif /* BRIDGESTP */
1528 
1529 	lck_mtx_lock(&bridge_list_mtx);
1530 	LIST_INSERT_HEAD(&bridge_list, sc, sc_list);
1531 	lck_mtx_unlock(&bridge_list_mtx);
1532 
1533 	/* attach as ethernet */
1534 	error = bpf_attach(ifp, DLT_EN10MB, sizeof(struct ether_header),
1535 	    NULL, NULL);
1536 
1537 done:
1538 	if (error != 0) {
1539 		BRIDGE_LOG(LOG_NOTICE, 0, "failed error %d", error);
1540 		/* TBD: Clean up: sc, sc_rthash etc */
1541 	}
1542 
1543 	return error;
1544 }
1545 
1546 /*
1547  * bridge_clone_destroy:
1548  *
1549  *	Destroy a bridge instance.
1550  */
1551 static int
bridge_clone_destroy(struct ifnet * ifp)1552 bridge_clone_destroy(struct ifnet *ifp)
1553 {
1554 	struct bridge_softc *sc = ifp->if_softc;
1555 	struct bridge_iflist *bif;
1556 	errno_t error;
1557 
1558 	BRIDGE_LOCK(sc);
1559 	if ((sc->sc_flags & SCF_DETACHING)) {
1560 		BRIDGE_UNLOCK(sc);
1561 		return 0;
1562 	}
1563 	sc->sc_flags |= SCF_DETACHING;
1564 
1565 	bridge_ifstop(ifp, 1);
1566 
1567 	bridge_cancel_delayed_call(&sc->sc_resize_call);
1568 
1569 	bridge_cleanup_delayed_call(&sc->sc_resize_call);
1570 	bridge_cleanup_delayed_call(&sc->sc_aging_timer);
1571 
1572 	error = ifnet_set_flags(ifp, 0, IFF_UP);
1573 	if (error != 0) {
1574 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_flags failed %d", error);
1575 	}
1576 
1577 	while ((bif = TAILQ_FIRST(&sc->sc_iflist)) != NULL) {
1578 		bridge_delete_member(sc, bif, 0);
1579 	}
1580 
1581 	while ((bif = TAILQ_FIRST(&sc->sc_spanlist)) != NULL) {
1582 		bridge_delete_span(sc, bif);
1583 	}
1584 	BRIDGE_UNLOCK(sc);
1585 
1586 	error = ifnet_detach(ifp);
1587 	if (error != 0) {
1588 		panic("%s (%d): ifnet_detach(%p) failed %d",
1589 		    __func__, __LINE__, ifp, error);
1590 	}
1591 	return 0;
1592 }
1593 
1594 #define DRVSPEC do { \
1595 	if (ifd->ifd_cmd >= bridge_control_table_size) {                \
1596 	        error = EINVAL;                                         \
1597 	        break;                                                  \
1598 	}                                                               \
1599 	bc = &bridge_control_table[ifd->ifd_cmd];                       \
1600                                                                         \
1601 	if (cmd == SIOCGDRVSPEC &&                                      \
1602 	    (bc->bc_flags & BC_F_COPYOUT) == 0) {                       \
1603 	        error = EINVAL;                                         \
1604 	        break;                                                  \
1605 	} else if (cmd == SIOCSDRVSPEC &&                               \
1606 	    (bc->bc_flags & BC_F_COPYOUT) != 0) {                       \
1607 	        error = EINVAL;                                         \
1608 	        break;                                                  \
1609 	}                                                               \
1610                                                                         \
1611 	if (bc->bc_flags & BC_F_SUSER) {                                \
1612 	        error = kauth_authorize_generic(kauth_cred_get(),       \
1613 	            KAUTH_GENERIC_ISSUSER);                             \
1614 	        if (error)                                              \
1615 	                break;                                          \
1616 	}                                                               \
1617                                                                         \
1618 	if (ifd->ifd_len != bc->bc_argsize ||                           \
1619 	    ifd->ifd_len > sizeof (args)) {                             \
1620 	        error = EINVAL;                                         \
1621 	        break;                                                  \
1622 	}                                                               \
1623                                                                         \
1624 	bzero(&args, sizeof (args));                                    \
1625 	if (bc->bc_flags & BC_F_COPYIN) {                               \
1626 	        error = copyin(ifd->ifd_data, &args, ifd->ifd_len);     \
1627 	        if (error)                                              \
1628 	                break;                                          \
1629 	}                                                               \
1630                                                                         \
1631 	BRIDGE_LOCK(sc);                                                \
1632 	error = (*bc->bc_func)(sc, &args);                              \
1633 	BRIDGE_UNLOCK(sc);                                              \
1634 	if (error)                                                      \
1635 	        break;                                                  \
1636                                                                         \
1637 	if (bc->bc_flags & BC_F_COPYOUT)                                \
1638 	        error = copyout(&args, ifd->ifd_data, ifd->ifd_len);    \
1639 } while (0)
1640 
1641 /*
1642  * bridge_ioctl:
1643  *
1644  *	Handle a control request from the operator.
1645  */
1646 static errno_t
bridge_ioctl(struct ifnet * ifp,u_long cmd,void * data)1647 bridge_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1648 {
1649 	struct bridge_softc *sc = ifp->if_softc;
1650 	struct ifreq *ifr = (struct ifreq *)data;
1651 	struct bridge_iflist *bif;
1652 	int error = 0;
1653 
1654 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1655 
1656 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_IOCTL,
1657 	    "ifp %s cmd 0x%08lx (%c%c [%lu] %c %lu)",
1658 	    ifp->if_xname, cmd, (cmd & IOC_IN) ? 'I' : ' ',
1659 	    (cmd & IOC_OUT) ? 'O' : ' ', IOCPARM_LEN(cmd),
1660 	    (char)IOCGROUP(cmd), cmd & 0xff);
1661 
1662 	switch (cmd) {
1663 	case SIOCSIFADDR:
1664 	case SIOCAIFADDR:
1665 		ifnet_set_flags(ifp, IFF_UP, IFF_UP);
1666 		break;
1667 
1668 	case SIOCGIFMEDIA32:
1669 	case SIOCGIFMEDIA64: {
1670 		struct ifmediareq *ifmr = (struct ifmediareq *)data;
1671 		user_addr_t user_addr;
1672 
1673 		user_addr = (cmd == SIOCGIFMEDIA64) ?
1674 		    ((struct ifmediareq64 *)ifmr)->ifmu_ulist :
1675 		    CAST_USER_ADDR_T(((struct ifmediareq32 *)ifmr)->ifmu_ulist);
1676 
1677 		ifmr->ifm_status = IFM_AVALID;
1678 		ifmr->ifm_mask = 0;
1679 		ifmr->ifm_count = 1;
1680 
1681 		BRIDGE_LOCK(sc);
1682 		if (!(sc->sc_flags & SCF_DETACHING) &&
1683 		    (sc->sc_flags & SCF_MEDIA_ACTIVE)) {
1684 			ifmr->ifm_status |= IFM_ACTIVE;
1685 			ifmr->ifm_active = ifmr->ifm_current =
1686 			    IFM_ETHER | IFM_AUTO;
1687 		} else {
1688 			ifmr->ifm_active = ifmr->ifm_current = IFM_NONE;
1689 		}
1690 		BRIDGE_UNLOCK(sc);
1691 
1692 		if (user_addr != USER_ADDR_NULL) {
1693 			error = copyout(&ifmr->ifm_current, user_addr,
1694 			    sizeof(int));
1695 		}
1696 		break;
1697 	}
1698 
1699 	case SIOCADDMULTI:
1700 	case SIOCDELMULTI:
1701 		break;
1702 
1703 	case SIOCSDRVSPEC32:
1704 	case SIOCGDRVSPEC32: {
1705 		union {
1706 			struct ifbreq ifbreq;
1707 			struct ifbifconf32 ifbifconf;
1708 			struct ifbareq32 ifbareq;
1709 			struct ifbaconf32 ifbaconf;
1710 			struct ifbrparam ifbrparam;
1711 			struct ifbropreq32 ifbropreq;
1712 		} args;
1713 		struct ifdrv32 *ifd = (struct ifdrv32 *)data;
1714 		const struct bridge_control *bridge_control_table =
1715 		    bridge_control_table32, *bc;
1716 
1717 		DRVSPEC;
1718 
1719 		break;
1720 	}
1721 	case SIOCSDRVSPEC64:
1722 	case SIOCGDRVSPEC64: {
1723 		union {
1724 			struct ifbreq ifbreq;
1725 			struct ifbifconf64 ifbifconf;
1726 			struct ifbareq64 ifbareq;
1727 			struct ifbaconf64 ifbaconf;
1728 			struct ifbrparam ifbrparam;
1729 			struct ifbropreq64 ifbropreq;
1730 		} args;
1731 		struct ifdrv64 *ifd = (struct ifdrv64 *)data;
1732 		const struct bridge_control *bridge_control_table =
1733 		    bridge_control_table64, *bc;
1734 
1735 		DRVSPEC;
1736 
1737 		break;
1738 	}
1739 
1740 	case SIOCSIFFLAGS:
1741 		if (!(ifp->if_flags & IFF_UP) &&
1742 		    (ifp->if_flags & IFF_RUNNING)) {
1743 			/*
1744 			 * If interface is marked down and it is running,
1745 			 * then stop and disable it.
1746 			 */
1747 			BRIDGE_LOCK(sc);
1748 			bridge_ifstop(ifp, 1);
1749 			BRIDGE_UNLOCK(sc);
1750 		} else if ((ifp->if_flags & IFF_UP) &&
1751 		    !(ifp->if_flags & IFF_RUNNING)) {
1752 			/*
1753 			 * If interface is marked up and it is stopped, then
1754 			 * start it.
1755 			 */
1756 			BRIDGE_LOCK(sc);
1757 			error = bridge_init(ifp);
1758 			BRIDGE_UNLOCK(sc);
1759 		}
1760 		break;
1761 
1762 	case SIOCSIFLLADDR:
1763 		error = ifnet_set_lladdr(ifp, ifr->ifr_addr.sa_data,
1764 		    ifr->ifr_addr.sa_len);
1765 		if (error != 0) {
1766 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_IOCTL,
1767 			    "%s SIOCSIFLLADDR error %d", ifp->if_xname,
1768 			    error);
1769 		}
1770 		break;
1771 
1772 	case SIOCSIFMTU:
1773 		if (ifr->ifr_mtu < 576) {
1774 			error = EINVAL;
1775 			break;
1776 		}
1777 		BRIDGE_LOCK(sc);
1778 		if (TAILQ_EMPTY(&sc->sc_iflist)) {
1779 			sc->sc_ifp->if_mtu = ifr->ifr_mtu;
1780 			BRIDGE_UNLOCK(sc);
1781 			break;
1782 		}
1783 		TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1784 			if (bif->bif_ifp->if_mtu != (unsigned)ifr->ifr_mtu) {
1785 				BRIDGE_LOG(LOG_NOTICE, 0,
1786 				    "%s invalid MTU: %u(%s) != %d",
1787 				    sc->sc_ifp->if_xname,
1788 				    bif->bif_ifp->if_mtu,
1789 				    bif->bif_ifp->if_xname, ifr->ifr_mtu);
1790 				error = EINVAL;
1791 				break;
1792 			}
1793 		}
1794 		if (!error) {
1795 			sc->sc_ifp->if_mtu = ifr->ifr_mtu;
1796 		}
1797 		BRIDGE_UNLOCK(sc);
1798 		break;
1799 
1800 	default:
1801 		error = ether_ioctl(ifp, cmd, data);
1802 		if (error != 0 && error != EOPNOTSUPP) {
1803 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_IOCTL,
1804 			    "ifp %s cmd 0x%08lx "
1805 			    "(%c%c [%lu] %c %lu) failed error: %d",
1806 			    ifp->if_xname, cmd,
1807 			    (cmd & IOC_IN) ? 'I' : ' ',
1808 			    (cmd & IOC_OUT) ? 'O' : ' ',
1809 			    IOCPARM_LEN(cmd), (char)IOCGROUP(cmd),
1810 			    cmd & 0xff, error);
1811 		}
1812 		break;
1813 	}
1814 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1815 
1816 	return error;
1817 }
1818 
1819 #if HAS_IF_CAP
1820 /*
1821  * bridge_mutecaps:
1822  *
1823  *	Clear or restore unwanted capabilities on the member interface
1824  */
1825 static void
bridge_mutecaps(struct bridge_softc * sc)1826 bridge_mutecaps(struct bridge_softc *sc)
1827 {
1828 	struct bridge_iflist *bif;
1829 	int enabled, mask;
1830 
1831 	/* Initial bitmask of capabilities to test */
1832 	mask = BRIDGE_IFCAPS_MASK;
1833 
1834 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1835 		/* Every member must support it or its disabled */
1836 		mask &= bif->bif_savedcaps;
1837 	}
1838 
1839 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1840 		enabled = bif->bif_ifp->if_capenable;
1841 		enabled &= ~BRIDGE_IFCAPS_STRIP;
1842 		/* strip off mask bits and enable them again if allowed */
1843 		enabled &= ~BRIDGE_IFCAPS_MASK;
1844 		enabled |= mask;
1845 
1846 		bridge_set_ifcap(sc, bif, enabled);
1847 	}
1848 }
1849 
1850 static void
bridge_set_ifcap(struct bridge_softc * sc,struct bridge_iflist * bif,int set)1851 bridge_set_ifcap(struct bridge_softc *sc, struct bridge_iflist *bif, int set)
1852 {
1853 	struct ifnet *ifp = bif->bif_ifp;
1854 	struct ifreq ifr;
1855 	int error;
1856 
1857 	bzero(&ifr, sizeof(ifr));
1858 	ifr.ifr_reqcap = set;
1859 
1860 	if (ifp->if_capenable != set) {
1861 		IFF_LOCKGIANT(ifp);
1862 		error = (*ifp->if_ioctl)(ifp, SIOCSIFCAP, (caddr_t)&ifr);
1863 		IFF_UNLOCKGIANT(ifp);
1864 		if (error) {
1865 			BRIDGE_LOG(LOG_NOTICE, 0,
1866 			    "%s error setting interface capabilities on %s",
1867 			    sc->sc_ifp->if_xname, ifp->if_xname);
1868 		}
1869 	}
1870 }
1871 #endif /* HAS_IF_CAP */
1872 
1873 static errno_t
bridge_set_tso(struct bridge_softc * sc)1874 bridge_set_tso(struct bridge_softc *sc)
1875 {
1876 	struct bridge_iflist *bif;
1877 	u_int32_t tso_v4_mtu;
1878 	u_int32_t tso_v6_mtu;
1879 	ifnet_offload_t offload;
1880 	errno_t error = 0;
1881 
1882 	/* By default, support TSO */
1883 	offload = sc->sc_ifp->if_hwassist | IFNET_TSO_IPV4 | IFNET_TSO_IPV6;
1884 	tso_v4_mtu = IP_MAXPACKET;
1885 	tso_v6_mtu = IP_MAXPACKET;
1886 
1887 	/* Use the lowest common denominator of the members */
1888 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1889 		ifnet_t ifp = bif->bif_ifp;
1890 
1891 		if (ifp == NULL) {
1892 			continue;
1893 		}
1894 
1895 		if (offload & IFNET_TSO_IPV4) {
1896 			if (ifp->if_hwassist & IFNET_TSO_IPV4) {
1897 				if (tso_v4_mtu > ifp->if_tso_v4_mtu) {
1898 					tso_v4_mtu = ifp->if_tso_v4_mtu;
1899 				}
1900 			} else {
1901 				offload &= ~IFNET_TSO_IPV4;
1902 				tso_v4_mtu = 0;
1903 			}
1904 		}
1905 		if (offload & IFNET_TSO_IPV6) {
1906 			if (ifp->if_hwassist & IFNET_TSO_IPV6) {
1907 				if (tso_v6_mtu > ifp->if_tso_v6_mtu) {
1908 					tso_v6_mtu = ifp->if_tso_v6_mtu;
1909 				}
1910 			} else {
1911 				offload &= ~IFNET_TSO_IPV6;
1912 				tso_v6_mtu = 0;
1913 			}
1914 		}
1915 	}
1916 
1917 	if (offload != sc->sc_ifp->if_hwassist) {
1918 		error = ifnet_set_offload(sc->sc_ifp, offload);
1919 		if (error != 0) {
1920 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
1921 			    "ifnet_set_offload(%s, 0x%x) failed %d",
1922 			    sc->sc_ifp->if_xname, offload, error);
1923 			goto done;
1924 		}
1925 		/*
1926 		 * For ifnet_set_tso_mtu() sake, the TSO MTU must be at least
1927 		 * as large as the interface MTU
1928 		 */
1929 		if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV4) {
1930 			if (tso_v4_mtu < sc->sc_ifp->if_mtu) {
1931 				tso_v4_mtu = sc->sc_ifp->if_mtu;
1932 			}
1933 			error = ifnet_set_tso_mtu(sc->sc_ifp, AF_INET,
1934 			    tso_v4_mtu);
1935 			if (error != 0) {
1936 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
1937 				    "ifnet_set_tso_mtu(%s, "
1938 				    "AF_INET, %u) failed %d",
1939 				    sc->sc_ifp->if_xname,
1940 				    tso_v4_mtu, error);
1941 				goto done;
1942 			}
1943 		}
1944 		if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV6) {
1945 			if (tso_v6_mtu < sc->sc_ifp->if_mtu) {
1946 				tso_v6_mtu = sc->sc_ifp->if_mtu;
1947 			}
1948 			error = ifnet_set_tso_mtu(sc->sc_ifp, AF_INET6,
1949 			    tso_v6_mtu);
1950 			if (error != 0) {
1951 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
1952 				    "ifnet_set_tso_mtu(%s, "
1953 				    "AF_INET6, %u) failed %d",
1954 				    sc->sc_ifp->if_xname,
1955 				    tso_v6_mtu, error);
1956 				goto done;
1957 			}
1958 		}
1959 	}
1960 done:
1961 	return error;
1962 }
1963 
1964 /*
1965  * bridge_lookup_member:
1966  *
1967  *	Lookup a bridge member interface.
1968  */
1969 static struct bridge_iflist *
bridge_lookup_member(struct bridge_softc * sc,const char * name)1970 bridge_lookup_member(struct bridge_softc *sc, const char *name)
1971 {
1972 	struct bridge_iflist *bif;
1973 	struct ifnet *ifp;
1974 
1975 	BRIDGE_LOCK_ASSERT_HELD(sc);
1976 
1977 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1978 		ifp = bif->bif_ifp;
1979 		if (strcmp(ifp->if_xname, name) == 0) {
1980 			return bif;
1981 		}
1982 	}
1983 
1984 	return NULL;
1985 }
1986 
1987 /*
1988  * bridge_lookup_member_if:
1989  *
1990  *	Lookup a bridge member interface by ifnet*.
1991  */
1992 static struct bridge_iflist *
bridge_lookup_member_if(struct bridge_softc * sc,struct ifnet * member_ifp)1993 bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
1994 {
1995 	struct bridge_iflist *bif;
1996 
1997 	BRIDGE_LOCK_ASSERT_HELD(sc);
1998 
1999 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2000 		if (bif->bif_ifp == member_ifp) {
2001 			return bif;
2002 		}
2003 	}
2004 
2005 	return NULL;
2006 }
2007 
2008 static errno_t
bridge_iff_input(void * cookie,ifnet_t ifp,protocol_family_t protocol,mbuf_t * data,char ** frame_ptr)2009 bridge_iff_input(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2010     mbuf_t *data, char **frame_ptr)
2011 {
2012 #pragma unused(protocol)
2013 	errno_t error = 0;
2014 	struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2015 	struct bridge_softc *sc = bif->bif_sc;
2016 	int included = 0;
2017 	size_t frmlen = 0;
2018 	mbuf_t m = *data;
2019 
2020 	if ((m->m_flags & M_PROTO1)) {
2021 		goto out;
2022 	}
2023 
2024 	if (*frame_ptr >= (char *)mbuf_datastart(m) &&
2025 	    *frame_ptr <= (char *)mbuf_data(m)) {
2026 		included = 1;
2027 		frmlen = (char *)mbuf_data(m) - *frame_ptr;
2028 	}
2029 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2030 	    "%s from %s m 0x%llx data 0x%llx frame 0x%llx %s "
2031 	    "frmlen %lu", sc->sc_ifp->if_xname,
2032 	    ifp->if_xname, (uint64_t)VM_KERNEL_ADDRPERM(m),
2033 	    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)),
2034 	    (uint64_t)VM_KERNEL_ADDRPERM(*frame_ptr),
2035 	    included ? "inside" : "outside", frmlen);
2036 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_MBUF)) {
2037 		brlog_mbuf(m, "bridge_iff_input[", "");
2038 		brlog_ether_header((struct ether_header *)
2039 		    (void *)*frame_ptr);
2040 		brlog_mbuf_data(m, 0, 20);
2041 	}
2042 	if (included == 0) {
2043 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT, "frame_ptr outside mbuf");
2044 		goto out;
2045 	}
2046 
2047 	/* Move data pointer to start of frame to the link layer header */
2048 	(void) mbuf_setdata(m, (char *)mbuf_data(m) - frmlen,
2049 	    mbuf_len(m) + frmlen);
2050 	(void) mbuf_pkthdr_adjustlen(m, frmlen);
2051 
2052 	/* make sure we can access the ethernet header */
2053 	if (mbuf_pkthdr_len(m) < sizeof(struct ether_header)) {
2054 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2055 		    "short frame %lu < %lu",
2056 		    mbuf_pkthdr_len(m), sizeof(struct ether_header));
2057 		goto out;
2058 	}
2059 	if (mbuf_len(m) < sizeof(struct ether_header)) {
2060 		error = mbuf_pullup(data, sizeof(struct ether_header));
2061 		if (error != 0) {
2062 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2063 			    "mbuf_pullup(%lu) failed %d",
2064 			    sizeof(struct ether_header),
2065 			    error);
2066 			error = EJUSTRETURN;
2067 			goto out;
2068 		}
2069 		if (m != *data) {
2070 			m = *data;
2071 			*frame_ptr = mbuf_data(m);
2072 		}
2073 	}
2074 
2075 	error = bridge_input(ifp, data);
2076 
2077 	/* Adjust packet back to original */
2078 	if (error == 0) {
2079 		/* bridge_input might have modified *data */
2080 		if (*data != m) {
2081 			m = *data;
2082 			*frame_ptr = mbuf_data(m);
2083 		}
2084 		(void) mbuf_setdata(m, (char *)mbuf_data(m) + frmlen,
2085 		    mbuf_len(m) - frmlen);
2086 		(void) mbuf_pkthdr_adjustlen(m, -frmlen);
2087 	}
2088 
2089 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_MBUF) &&
2090 	    BRIDGE_DBGF_ENABLED(BR_DBGF_INPUT)) {
2091 		brlog_mbuf(m, "bridge_iff_input]", "");
2092 	}
2093 
2094 out:
2095 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2096 
2097 	return error;
2098 }
2099 
2100 static errno_t
bridge_iff_output(void * cookie,ifnet_t ifp,protocol_family_t protocol,mbuf_t * data)2101 bridge_iff_output(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2102     mbuf_t *data)
2103 {
2104 #pragma unused(protocol)
2105 	errno_t error = 0;
2106 	struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2107 	struct bridge_softc *sc = bif->bif_sc;
2108 	mbuf_t m = *data;
2109 
2110 	if ((m->m_flags & M_PROTO1)) {
2111 		goto out;
2112 	}
2113 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
2114 	    "%s from %s m 0x%llx data 0x%llx",
2115 	    sc->sc_ifp->if_xname, ifp->if_xname,
2116 	    (uint64_t)VM_KERNEL_ADDRPERM(m),
2117 	    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)));
2118 
2119 	error = bridge_member_output(sc, ifp, data);
2120 	if (error != 0 && error != EJUSTRETURN) {
2121 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_OUTPUT,
2122 		    "bridge_member_output failed error %d",
2123 		    error);
2124 	}
2125 out:
2126 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2127 
2128 	return error;
2129 }
2130 
2131 static void
bridge_iff_event(void * cookie,ifnet_t ifp,protocol_family_t protocol,const struct kev_msg * event_msg)2132 bridge_iff_event(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2133     const struct kev_msg *event_msg)
2134 {
2135 #pragma unused(protocol)
2136 	struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2137 	struct bridge_softc *sc = bif->bif_sc;
2138 
2139 	if (event_msg->vendor_code == KEV_VENDOR_APPLE &&
2140 	    event_msg->kev_class == KEV_NETWORK_CLASS &&
2141 	    event_msg->kev_subclass == KEV_DL_SUBCLASS) {
2142 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
2143 		    "%s event_code %u - %s",
2144 		    ifp->if_xname, event_msg->event_code,
2145 		    dlil_kev_dl_code_str(event_msg->event_code));
2146 
2147 		switch (event_msg->event_code) {
2148 		case KEV_DL_LINK_OFF:
2149 		case KEV_DL_LINK_ON: {
2150 			bridge_iflinkevent(ifp);
2151 #if BRIDGESTP
2152 			bstp_linkstate(ifp, event_msg->event_code);
2153 #endif /* BRIDGESTP */
2154 			break;
2155 		}
2156 		case KEV_DL_SIFFLAGS: {
2157 			if ((bif->bif_flags & BIFF_PROMISC) == 0 &&
2158 			    (ifp->if_flags & IFF_UP)) {
2159 				errno_t error;
2160 
2161 				error = ifnet_set_promiscuous(ifp, 1);
2162 				if (error != 0) {
2163 					BRIDGE_LOG(LOG_NOTICE, 0,
2164 					    "ifnet_set_promiscuous (%s)"
2165 					    " failed %d", ifp->if_xname,
2166 					    error);
2167 				} else {
2168 					bif->bif_flags |= BIFF_PROMISC;
2169 				}
2170 			}
2171 			break;
2172 		}
2173 		case KEV_DL_IFCAP_CHANGED: {
2174 			BRIDGE_LOCK(sc);
2175 			bridge_set_tso(sc);
2176 			BRIDGE_UNLOCK(sc);
2177 			break;
2178 		}
2179 		case KEV_DL_PROTO_DETACHED:
2180 		case KEV_DL_PROTO_ATTACHED: {
2181 			bridge_proto_attach_changed(ifp);
2182 			break;
2183 		}
2184 		default:
2185 			break;
2186 		}
2187 	}
2188 }
2189 
2190 /*
2191  * bridge_iff_detached:
2192  *
2193  *      Called when our interface filter has been detached from a
2194  *      member interface.
2195  */
2196 static void
bridge_iff_detached(void * cookie,ifnet_t ifp)2197 bridge_iff_detached(void *cookie, ifnet_t ifp)
2198 {
2199 	struct bridge_iflist *bif;
2200 	struct bridge_softc *sc = ifp->if_bridge;
2201 
2202 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2203 
2204 	/* Check if the interface is a bridge member */
2205 	if (sc != NULL) {
2206 		BRIDGE_LOCK(sc);
2207 		bif = bridge_lookup_member_if(sc, ifp);
2208 		if (bif != NULL) {
2209 			bridge_delete_member(sc, bif, 1);
2210 		}
2211 		BRIDGE_UNLOCK(sc);
2212 		goto done;
2213 	}
2214 	/* Check if the interface is a span port */
2215 	lck_mtx_lock(&bridge_list_mtx);
2216 	LIST_FOREACH(sc, &bridge_list, sc_list) {
2217 		BRIDGE_LOCK(sc);
2218 		TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
2219 		if (ifp == bif->bif_ifp) {
2220 			bridge_delete_span(sc, bif);
2221 			break;
2222 		}
2223 		BRIDGE_UNLOCK(sc);
2224 	}
2225 	lck_mtx_unlock(&bridge_list_mtx);
2226 
2227 done:
2228 	bif = (struct bridge_iflist *)cookie;
2229 	kfree_type(struct bridge_iflist, bif);
2230 }
2231 
2232 static errno_t
bridge_proto_input(ifnet_t ifp,protocol_family_t protocol,mbuf_t packet,char * header)2233 bridge_proto_input(ifnet_t ifp, protocol_family_t protocol, mbuf_t packet,
2234     char *header)
2235 {
2236 #pragma unused(protocol, packet, header)
2237 	BRIDGE_LOG(LOG_NOTICE, 0, "%s unexpected packet",
2238 	    ifp->if_xname);
2239 	return 0;
2240 }
2241 
2242 static int
bridge_attach_protocol(struct ifnet * ifp)2243 bridge_attach_protocol(struct ifnet *ifp)
2244 {
2245 	int     error;
2246 	struct ifnet_attach_proto_param reg;
2247 
2248 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2249 	bzero(&reg, sizeof(reg));
2250 	reg.input = bridge_proto_input;
2251 
2252 	error = ifnet_attach_protocol(ifp, PF_BRIDGE, &reg);
2253 	if (error) {
2254 		BRIDGE_LOG(LOG_NOTICE, 0,
2255 		    "ifnet_attach_protocol(%s) failed, %d",
2256 		    ifp->if_xname, error);
2257 	}
2258 
2259 	return error;
2260 }
2261 
2262 static int
bridge_detach_protocol(struct ifnet * ifp)2263 bridge_detach_protocol(struct ifnet *ifp)
2264 {
2265 	int     error;
2266 
2267 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2268 	error = ifnet_detach_protocol(ifp, PF_BRIDGE);
2269 	if (error) {
2270 		BRIDGE_LOG(LOG_NOTICE, 0,
2271 		    "ifnet_detach_protocol(%s) failed, %d",
2272 		    ifp->if_xname, error);
2273 	}
2274 
2275 	return error;
2276 }
2277 
2278 /*
2279  * bridge_delete_member:
2280  *
2281  *	Delete the specified member interface.
2282  */
2283 static void
bridge_delete_member(struct bridge_softc * sc,struct bridge_iflist * bif,int gone)2284 bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif,
2285     int gone)
2286 {
2287 	struct ifnet *ifs = bif->bif_ifp, *bifp = sc->sc_ifp;
2288 	int lladdr_changed = 0, error, filt_attached;
2289 	uint8_t eaddr[ETHER_ADDR_LEN];
2290 	u_int32_t event_code = 0;
2291 
2292 	BRIDGE_LOCK_ASSERT_HELD(sc);
2293 	VERIFY(ifs != NULL);
2294 
2295 	/*
2296 	 * Remove the member from the list first so it cannot be found anymore
2297 	 * when we release the bridge lock below
2298 	 */
2299 	BRIDGE_XLOCK(sc);
2300 	TAILQ_REMOVE(&sc->sc_iflist, bif, bif_next);
2301 	BRIDGE_XDROP(sc);
2302 
2303 	if (sc->sc_mac_nat_bif != NULL) {
2304 		if (bif == sc->sc_mac_nat_bif) {
2305 			bridge_mac_nat_disable(sc);
2306 		} else {
2307 			bridge_mac_nat_flush_entries(sc, bif);
2308 		}
2309 	}
2310 
2311 	if (!gone) {
2312 		switch (ifs->if_type) {
2313 		case IFT_ETHER:
2314 		case IFT_L2VLAN:
2315 		case IFT_IEEE8023ADLAG:
2316 			/*
2317 			 * Take the interface out of promiscuous mode.
2318 			 */
2319 			if (bif->bif_flags & BIFF_PROMISC) {
2320 				/*
2321 				 * Unlock to prevent deadlock with
2322 				 * bridge_iff_event() in case the driver
2323 				 * generates an interface event
2324 				 */
2325 				BRIDGE_UNLOCK(sc);
2326 				(void) ifnet_set_promiscuous(ifs, 0);
2327 				BRIDGE_LOCK(sc);
2328 			}
2329 			break;
2330 
2331 		case IFT_GIF:
2332 		/* currently not supported */
2333 		/* FALLTHRU */
2334 		default:
2335 			VERIFY(0);
2336 			/* NOTREACHED */
2337 		}
2338 
2339 #if HAS_IF_CAP
2340 		/* reneable any interface capabilities */
2341 		bridge_set_ifcap(sc, bif, bif->bif_savedcaps);
2342 #endif
2343 	}
2344 
2345 	if (bif->bif_flags & BIFF_PROTO_ATTACHED) {
2346 		/* Respect lock ordering with DLIL lock */
2347 		BRIDGE_UNLOCK(sc);
2348 		(void) bridge_detach_protocol(ifs);
2349 		BRIDGE_LOCK(sc);
2350 	}
2351 #if BRIDGESTP
2352 	if ((bif->bif_ifflags & IFBIF_STP) != 0) {
2353 		bstp_disable(&bif->bif_stp);
2354 	}
2355 #endif /* BRIDGESTP */
2356 
2357 	/*
2358 	 * If removing the interface that gave the bridge its mac address, set
2359 	 * the mac address of the bridge to the address of the next member, or
2360 	 * to its default address if no members are left.
2361 	 */
2362 	if (bridge_inherit_mac && sc->sc_ifaddr == ifs) {
2363 		ifnet_release(sc->sc_ifaddr);
2364 		if (TAILQ_EMPTY(&sc->sc_iflist)) {
2365 			bcopy(sc->sc_defaddr, eaddr, ETHER_ADDR_LEN);
2366 			sc->sc_ifaddr = NULL;
2367 		} else {
2368 			struct ifnet *fif =
2369 			    TAILQ_FIRST(&sc->sc_iflist)->bif_ifp;
2370 			bcopy(IF_LLADDR(fif), eaddr, ETHER_ADDR_LEN);
2371 			sc->sc_ifaddr = fif;
2372 			ifnet_reference(fif);   /* for sc_ifaddr */
2373 		}
2374 		lladdr_changed = 1;
2375 	}
2376 
2377 #if HAS_IF_CAP
2378 	bridge_mutecaps(sc);    /* recalculate now this interface is removed */
2379 #endif /* HAS_IF_CAP */
2380 
2381 	error = bridge_set_tso(sc);
2382 	if (error != 0) {
2383 		BRIDGE_LOG(LOG_NOTICE, 0, "bridge_set_tso failed %d", error);
2384 	}
2385 
2386 	bridge_rtdelete(sc, ifs, IFBF_FLUSHALL);
2387 
2388 	KASSERT(bif->bif_addrcnt == 0,
2389 	    ("%s: %d bridge routes referenced", __func__, bif->bif_addrcnt));
2390 
2391 	filt_attached = bif->bif_flags & BIFF_FILTER_ATTACHED;
2392 
2393 	/*
2394 	 * Update link status of the bridge based on its remaining members
2395 	 */
2396 	event_code = bridge_updatelinkstatus(sc);
2397 
2398 	BRIDGE_UNLOCK(sc);
2399 
2400 #if SKYWALK
2401 	if (!gone) {
2402 		if ((bif->bif_flags & BIFF_NETAGENT_REMOVED) != 0) {
2403 			ifnet_add_netagent(ifs);
2404 			bif->bif_flags &= ~BIFF_NETAGENT_REMOVED;
2405 		}
2406 		if ((bif->bif_flags & BIFF_FLOWSWITCH_ATTACHED) != 0) {
2407 			ifnet_detach_flowswitch_nexus(ifs);
2408 			bif->bif_flags &= ~BIFF_FLOWSWITCH_ATTACHED;
2409 		}
2410 	}
2411 #endif /* SKYWALK */
2412 
2413 	if (lladdr_changed &&
2414 	    (error = ifnet_set_lladdr(bifp, eaddr, ETHER_ADDR_LEN)) != 0) {
2415 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr failed %d", error);
2416 	}
2417 
2418 	if (event_code != 0) {
2419 		bridge_link_event(bifp, event_code);
2420 	}
2421 
2422 #if BRIDGESTP
2423 	bstp_destroy(&bif->bif_stp);    /* prepare to free */
2424 #endif /* BRIDGESTP */
2425 
2426 	if (filt_attached) {
2427 		/* only detach if the interface is still present */
2428 		if (!gone) {
2429 			iflt_detach(bif->bif_iff_ref);
2430 		}
2431 	} else {
2432 		/* filter wasn't attached, need to free now */
2433 		kfree_type(struct bridge_iflist, bif);
2434 	}
2435 
2436 	ifs->if_bridge = NULL;
2437 	ifnet_release(ifs);
2438 
2439 	BRIDGE_LOCK(sc);
2440 }
2441 
2442 /*
2443  * bridge_delete_span:
2444  *
2445  *	Delete the specified span interface.
2446  */
2447 static void
bridge_delete_span(struct bridge_softc * sc,struct bridge_iflist * bif)2448 bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif)
2449 {
2450 	BRIDGE_LOCK_ASSERT_HELD(sc);
2451 
2452 	KASSERT(bif->bif_ifp->if_bridge == NULL,
2453 	    ("%s: not a span interface", __func__));
2454 
2455 	ifnet_release(bif->bif_ifp);
2456 
2457 	TAILQ_REMOVE(&sc->sc_spanlist, bif, bif_next);
2458 	kfree_type(struct bridge_iflist, bif);
2459 }
2460 
2461 static int
bridge_ioctl_add(struct bridge_softc * sc,void * arg)2462 bridge_ioctl_add(struct bridge_softc *sc, void *arg)
2463 {
2464 	struct ifbreq *req = arg;
2465 	struct bridge_iflist *bif = NULL;
2466 	struct ifnet *ifs, *bifp = sc->sc_ifp;
2467 	int error = 0, lladdr_changed = 0;
2468 	uint8_t eaddr[ETHER_ADDR_LEN];
2469 	struct iff_filter iff;
2470 	u_int32_t event_code = 0;
2471 	boolean_t mac_nat = FALSE;
2472 
2473 	ifs = ifunit(req->ifbr_ifsname);
2474 	if (ifs == NULL) {
2475 		return ENOENT;
2476 	}
2477 	if (ifs->if_ioctl == NULL) {    /* must be supported */
2478 		return EINVAL;
2479 	}
2480 
2481 	if (IFNET_IS_INTCOPROC(ifs)) {
2482 		return EINVAL;
2483 	}
2484 
2485 	/* If it's in the span list, it can't be a member. */
2486 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
2487 		if (ifs == bif->bif_ifp) {
2488 			return EBUSY;
2489 		}
2490 	}
2491 
2492 	if (ifs->if_bridge == sc) {
2493 		return EEXIST;
2494 	}
2495 
2496 	if (ifs->if_bridge != NULL) {
2497 		return EBUSY;
2498 	}
2499 
2500 	switch (ifs->if_type) {
2501 	case IFT_ETHER:
2502 		if (strcmp(ifs->if_name, "en") == 0 &&
2503 		    ifs->if_subfamily == IFNET_SUBFAMILY_WIFI &&
2504 		    (ifs->if_eflags & IFEF_IPV4_ROUTER) == 0) {
2505 			/* XXX is there a better way to identify Wi-Fi STA? */
2506 			mac_nat = TRUE;
2507 		}
2508 		break;
2509 	case IFT_L2VLAN:
2510 	case IFT_IEEE8023ADLAG:
2511 		break;
2512 	case IFT_GIF:
2513 	/* currently not supported */
2514 	/* FALLTHRU */
2515 	default:
2516 		return EINVAL;
2517 	}
2518 
2519 	/* fail to add the interface if the MTU doesn't match */
2520 	if (!TAILQ_EMPTY(&sc->sc_iflist) && sc->sc_ifp->if_mtu != ifs->if_mtu) {
2521 		BRIDGE_LOG(LOG_NOTICE, 0, "%s invalid MTU for %s",
2522 		    sc->sc_ifp->if_xname,
2523 		    ifs->if_xname);
2524 		return EINVAL;
2525 	}
2526 
2527 	/* there's already an interface that's doing MAC NAT */
2528 	if (mac_nat && sc->sc_mac_nat_bif != NULL) {
2529 		return EBUSY;
2530 	}
2531 	bif = kalloc_type(struct bridge_iflist, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2532 	bif->bif_ifp = ifs;
2533 	ifnet_reference(ifs);
2534 	bif->bif_ifflags |= IFBIF_LEARNING | IFBIF_DISCOVER;
2535 #if HAS_IF_CAP
2536 	bif->bif_savedcaps = ifs->if_capenable;
2537 #endif /* HAS_IF_CAP */
2538 	bif->bif_sc = sc;
2539 	if (mac_nat) {
2540 		(void)bridge_mac_nat_enable(sc, bif);
2541 	}
2542 
2543 	/* Allow the first Ethernet member to define the MTU */
2544 	if (TAILQ_EMPTY(&sc->sc_iflist)) {
2545 		sc->sc_ifp->if_mtu = ifs->if_mtu;
2546 	}
2547 
2548 	/*
2549 	 * Assign the interface's MAC address to the bridge if it's the first
2550 	 * member and the MAC address of the bridge has not been changed from
2551 	 * the default (randomly) generated one.
2552 	 */
2553 	if (bridge_inherit_mac && TAILQ_EMPTY(&sc->sc_iflist) &&
2554 	    !memcmp(IF_LLADDR(sc->sc_ifp), sc->sc_defaddr, ETHER_ADDR_LEN)) {
2555 		bcopy(IF_LLADDR(ifs), eaddr, ETHER_ADDR_LEN);
2556 		sc->sc_ifaddr = ifs;
2557 		ifnet_reference(ifs);   /* for sc_ifaddr */
2558 		lladdr_changed = 1;
2559 	}
2560 
2561 	ifs->if_bridge = sc;
2562 #if BRIDGESTP
2563 	bstp_create(&sc->sc_stp, &bif->bif_stp, bif->bif_ifp);
2564 #endif /* BRIDGESTP */
2565 
2566 	/*
2567 	 * XXX: XLOCK HERE!?!
2568 	 */
2569 	TAILQ_INSERT_TAIL(&sc->sc_iflist, bif, bif_next);
2570 
2571 #if HAS_IF_CAP
2572 	/* Set interface capabilities to the intersection set of all members */
2573 	bridge_mutecaps(sc);
2574 #endif /* HAS_IF_CAP */
2575 
2576 	bridge_set_tso(sc);
2577 
2578 
2579 	/*
2580 	 * Place the interface into promiscuous mode.
2581 	 */
2582 	switch (ifs->if_type) {
2583 	case IFT_ETHER:
2584 	case IFT_L2VLAN:
2585 	case IFT_IEEE8023ADLAG:
2586 		error = ifnet_set_promiscuous(ifs, 1);
2587 		switch (error) {
2588 		case 0:
2589 			bif->bif_flags |= BIFF_PROMISC;
2590 			break;
2591 		case ENETDOWN:
2592 		case EPWROFF:
2593 			BRIDGE_LOG(LOG_NOTICE, 0,
2594 			    "ifnet_set_promiscuous(%s) failed %d, ignoring",
2595 			    ifs->if_xname, error);
2596 			/* Ignore error when device is not up */
2597 			error = 0;
2598 			break;
2599 		default:
2600 			BRIDGE_LOG(LOG_NOTICE, 0,
2601 			    "ifnet_set_promiscuous(%s) failed %d",
2602 			    ifs->if_xname, error);
2603 			goto out;
2604 		}
2605 		break;
2606 
2607 	default:
2608 		break;
2609 	}
2610 
2611 	/*
2612 	 * The new member may change the link status of the bridge interface
2613 	 */
2614 	if (interface_media_active(ifs)) {
2615 		bif->bif_flags |= BIFF_MEDIA_ACTIVE;
2616 	} else {
2617 		bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
2618 	}
2619 
2620 	event_code = bridge_updatelinkstatus(sc);
2621 
2622 	/*
2623 	 * Respect lock ordering with DLIL lock for the following operations
2624 	 */
2625 	BRIDGE_UNLOCK(sc);
2626 
2627 #if SKYWALK
2628 	/* ensure that the flowswitch is present for native interface */
2629 	if (SKYWALK_NATIVE(ifs)) {
2630 		if (ifnet_attach_flowswitch_nexus(ifs)) {
2631 			bif->bif_flags |= BIFF_FLOWSWITCH_ATTACHED;
2632 		}
2633 	}
2634 	/* remove the netagent on the flowswitch (rdar://75050182) */
2635 	if (ifnet_remove_netagent(ifs)) {
2636 		bif->bif_flags |= BIFF_NETAGENT_REMOVED;
2637 	}
2638 #endif /* SKYWALK */
2639 
2640 	/*
2641 	 * install an interface filter
2642 	 */
2643 	memset(&iff, 0, sizeof(struct iff_filter));
2644 	iff.iff_cookie = bif;
2645 	iff.iff_name = "com.apple.kernel.bsd.net.if_bridge";
2646 	iff.iff_input = bridge_iff_input;
2647 	iff.iff_output = bridge_iff_output;
2648 	iff.iff_event = bridge_iff_event;
2649 	iff.iff_detached = bridge_iff_detached;
2650 	error = dlil_attach_filter(ifs, &iff, &bif->bif_iff_ref,
2651 	    DLIL_IFF_TSO | DLIL_IFF_INTERNAL);
2652 	if (error != 0) {
2653 		BRIDGE_LOG(LOG_NOTICE, 0, "iflt_attach failed %d", error);
2654 		BRIDGE_LOCK(sc);
2655 		goto out;
2656 	}
2657 	BRIDGE_LOCK(sc);
2658 	bif->bif_flags |= BIFF_FILTER_ATTACHED;
2659 	BRIDGE_UNLOCK(sc);
2660 
2661 	/*
2662 	 * install a dummy "bridge" protocol
2663 	 */
2664 	if ((error = bridge_attach_protocol(ifs)) != 0) {
2665 		if (error != 0) {
2666 			BRIDGE_LOG(LOG_NOTICE, 0,
2667 			    "bridge_attach_protocol failed %d", error);
2668 			BRIDGE_LOCK(sc);
2669 			goto out;
2670 		}
2671 	}
2672 	BRIDGE_LOCK(sc);
2673 	bif->bif_flags |= BIFF_PROTO_ATTACHED;
2674 	BRIDGE_UNLOCK(sc);
2675 
2676 	if (lladdr_changed &&
2677 	    (error = ifnet_set_lladdr(bifp, eaddr, ETHER_ADDR_LEN)) != 0) {
2678 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr failed %d", error);
2679 	}
2680 
2681 	if (event_code != 0) {
2682 		bridge_link_event(bifp, event_code);
2683 	}
2684 
2685 	BRIDGE_LOCK(sc);
2686 
2687 out:
2688 	if (error != 0) {
2689 		if (bif != NULL) {
2690 			bridge_delete_member(sc, bif, 0);
2691 		}
2692 	} else if (IFNET_IS_VMNET(ifs)) {
2693 		INC_ATOMIC_INT64_LIM(net_api_stats.nas_vmnet_total);
2694 	}
2695 
2696 	return error;
2697 }
2698 
2699 static int
bridge_ioctl_del(struct bridge_softc * sc,void * arg)2700 bridge_ioctl_del(struct bridge_softc *sc, void *arg)
2701 {
2702 	struct ifbreq *req = arg;
2703 	struct bridge_iflist *bif;
2704 
2705 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
2706 	if (bif == NULL) {
2707 		return ENOENT;
2708 	}
2709 
2710 	bridge_delete_member(sc, bif, 0);
2711 
2712 	return 0;
2713 }
2714 
2715 static int
bridge_ioctl_purge(struct bridge_softc * sc,void * arg)2716 bridge_ioctl_purge(struct bridge_softc *sc, void *arg)
2717 {
2718 #pragma unused(sc, arg)
2719 	return 0;
2720 }
2721 
2722 static int
bridge_ioctl_gifflags(struct bridge_softc * sc,void * arg)2723 bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
2724 {
2725 	struct ifbreq *req = arg;
2726 	struct bridge_iflist *bif;
2727 
2728 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
2729 	if (bif == NULL) {
2730 		return ENOENT;
2731 	}
2732 
2733 	struct bstp_port *bp;
2734 
2735 	bp = &bif->bif_stp;
2736 	req->ifbr_state = bp->bp_state;
2737 	req->ifbr_priority = bp->bp_priority;
2738 	req->ifbr_path_cost = bp->bp_path_cost;
2739 	req->ifbr_proto = bp->bp_protover;
2740 	req->ifbr_role = bp->bp_role;
2741 	req->ifbr_stpflags = bp->bp_flags;
2742 	req->ifbr_ifsflags = bif->bif_ifflags;
2743 
2744 	/* Copy STP state options as flags */
2745 	if (bp->bp_operedge) {
2746 		req->ifbr_ifsflags |= IFBIF_BSTP_EDGE;
2747 	}
2748 	if (bp->bp_flags & BSTP_PORT_AUTOEDGE) {
2749 		req->ifbr_ifsflags |= IFBIF_BSTP_AUTOEDGE;
2750 	}
2751 	if (bp->bp_ptp_link) {
2752 		req->ifbr_ifsflags |= IFBIF_BSTP_PTP;
2753 	}
2754 	if (bp->bp_flags & BSTP_PORT_AUTOPTP) {
2755 		req->ifbr_ifsflags |= IFBIF_BSTP_AUTOPTP;
2756 	}
2757 	if (bp->bp_flags & BSTP_PORT_ADMEDGE) {
2758 		req->ifbr_ifsflags |= IFBIF_BSTP_ADMEDGE;
2759 	}
2760 	if (bp->bp_flags & BSTP_PORT_ADMCOST) {
2761 		req->ifbr_ifsflags |= IFBIF_BSTP_ADMCOST;
2762 	}
2763 
2764 	req->ifbr_portno = bif->bif_ifp->if_index & 0xfff;
2765 	req->ifbr_addrcnt = bif->bif_addrcnt;
2766 	req->ifbr_addrmax = bif->bif_addrmax;
2767 	req->ifbr_addrexceeded = bif->bif_addrexceeded;
2768 
2769 	return 0;
2770 }
2771 
2772 static int
bridge_ioctl_sifflags(struct bridge_softc * sc,void * arg)2773 bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
2774 {
2775 	struct ifbreq *req = arg;
2776 	struct bridge_iflist *bif;
2777 #if BRIDGESTP
2778 	struct bstp_port *bp;
2779 	int error;
2780 #endif /* BRIDGESTP */
2781 
2782 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
2783 	if (bif == NULL) {
2784 		return ENOENT;
2785 	}
2786 
2787 	if (req->ifbr_ifsflags & IFBIF_SPAN) {
2788 		/* SPAN is readonly */
2789 		return EINVAL;
2790 	}
2791 #define _EXCLUSIVE_FLAGS        (IFBIF_CHECKSUM_OFFLOAD | IFBIF_MAC_NAT)
2792 	if ((req->ifbr_ifsflags & _EXCLUSIVE_FLAGS) == _EXCLUSIVE_FLAGS) {
2793 		/* can't specify both MAC-NAT and checksum offload */
2794 		return EINVAL;
2795 	}
2796 	if ((req->ifbr_ifsflags & IFBIF_MAC_NAT) != 0) {
2797 		errno_t error;
2798 
2799 		error = bridge_mac_nat_enable(sc, bif);
2800 		if (error != 0) {
2801 			return error;
2802 		}
2803 	} else if (sc->sc_mac_nat_bif == bif) {
2804 		bridge_mac_nat_disable(sc);
2805 	}
2806 
2807 
2808 #if BRIDGESTP
2809 	if (req->ifbr_ifsflags & IFBIF_STP) {
2810 		if ((bif->bif_ifflags & IFBIF_STP) == 0) {
2811 			error = bstp_enable(&bif->bif_stp);
2812 			if (error) {
2813 				return error;
2814 			}
2815 		}
2816 	} else {
2817 		if ((bif->bif_ifflags & IFBIF_STP) != 0) {
2818 			bstp_disable(&bif->bif_stp);
2819 		}
2820 	}
2821 
2822 	/* Pass on STP flags */
2823 	bp = &bif->bif_stp;
2824 	bstp_set_edge(bp, req->ifbr_ifsflags & IFBIF_BSTP_EDGE ? 1 : 0);
2825 	bstp_set_autoedge(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOEDGE ? 1 : 0);
2826 	bstp_set_ptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_PTP ? 1 : 0);
2827 	bstp_set_autoptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOPTP ? 1 : 0);
2828 #else /* !BRIDGESTP */
2829 	if (req->ifbr_ifsflags & IFBIF_STP) {
2830 		return EOPNOTSUPP;
2831 	}
2832 #endif /* !BRIDGESTP */
2833 
2834 	/* Save the bits relating to the bridge */
2835 	bif->bif_ifflags = req->ifbr_ifsflags & IFBIFMASK;
2836 
2837 
2838 	return 0;
2839 }
2840 
2841 static int
bridge_ioctl_scache(struct bridge_softc * sc,void * arg)2842 bridge_ioctl_scache(struct bridge_softc *sc, void *arg)
2843 {
2844 	struct ifbrparam *param = arg;
2845 
2846 	sc->sc_brtmax = param->ifbrp_csize;
2847 	bridge_rttrim(sc);
2848 	return 0;
2849 }
2850 
2851 static int
bridge_ioctl_gcache(struct bridge_softc * sc,void * arg)2852 bridge_ioctl_gcache(struct bridge_softc *sc, void *arg)
2853 {
2854 	struct ifbrparam *param = arg;
2855 
2856 	param->ifbrp_csize = sc->sc_brtmax;
2857 
2858 	return 0;
2859 }
2860 
2861 #define BRIDGE_IOCTL_GIFS do { \
2862 	struct bridge_iflist *bif;                                      \
2863 	struct ifbreq breq;                                             \
2864 	char *buf, *outbuf;                                             \
2865 	unsigned int count, buflen, len;                                \
2866                                                                         \
2867 	count = 0;                                                      \
2868 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next)                    \
2869 	        count++;                                                \
2870 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)                  \
2871 	        count++;                                                \
2872                                                                         \
2873 	buflen = sizeof (breq) * count;                                 \
2874 	if (bifc->ifbic_len == 0) {                                     \
2875 	        bifc->ifbic_len = buflen;                               \
2876 	        return (0);                                             \
2877 	}                                                               \
2878 	BRIDGE_UNLOCK(sc);                                              \
2879 	outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO);        \
2880 	BRIDGE_LOCK(sc);                                                \
2881                                                                         \
2882 	count = 0;                                                      \
2883 	buf = outbuf;                                                   \
2884 	len = min(bifc->ifbic_len, buflen);                             \
2885 	bzero(&breq, sizeof (breq));                                    \
2886 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {                  \
2887 	        if (len < sizeof (breq))                                \
2888 	                break;                                          \
2889                                                                         \
2890 	        snprintf(breq.ifbr_ifsname, sizeof (breq.ifbr_ifsname), \
2891 	            "%s", bif->bif_ifp->if_xname);                      \
2892 	/* Fill in the ifbreq structure */                      \
2893 	        error = bridge_ioctl_gifflags(sc, &breq);               \
2894 	        if (error)                                              \
2895 	                break;                                          \
2896 	        memcpy(buf, &breq, sizeof (breq));                      \
2897 	        count++;                                                \
2898 	        buf += sizeof (breq);                                   \
2899 	        len -= sizeof (breq);                                   \
2900 	}                                                               \
2901 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {                \
2902 	        if (len < sizeof (breq))                                \
2903 	                break;                                          \
2904                                                                         \
2905 	        snprintf(breq.ifbr_ifsname,                             \
2906 	                 sizeof (breq.ifbr_ifsname),                    \
2907 	                 "%s", bif->bif_ifp->if_xname);                 \
2908 	        breq.ifbr_ifsflags = bif->bif_ifflags;                  \
2909 	        breq.ifbr_portno                                        \
2910 	                = bif->bif_ifp->if_index & 0xfff;               \
2911 	        memcpy(buf, &breq, sizeof (breq));                      \
2912 	        count++;                                                \
2913 	        buf += sizeof (breq);                                   \
2914 	        len -= sizeof (breq);                                   \
2915 	}                                                               \
2916                                                                         \
2917 	BRIDGE_UNLOCK(sc);                                              \
2918 	bifc->ifbic_len = sizeof (breq) * count;                        \
2919 	error = copyout(outbuf, bifc->ifbic_req, bifc->ifbic_len);      \
2920 	BRIDGE_LOCK(sc);                                                \
2921 	kfree_data(outbuf, buflen);                                     \
2922 } while (0)
2923 
2924 static int
bridge_ioctl_gifs64(struct bridge_softc * sc,void * arg)2925 bridge_ioctl_gifs64(struct bridge_softc *sc, void *arg)
2926 {
2927 	struct ifbifconf64 *bifc = arg;
2928 	int error = 0;
2929 
2930 	BRIDGE_IOCTL_GIFS;
2931 
2932 	return error;
2933 }
2934 
2935 static int
bridge_ioctl_gifs32(struct bridge_softc * sc,void * arg)2936 bridge_ioctl_gifs32(struct bridge_softc *sc, void *arg)
2937 {
2938 	struct ifbifconf32 *bifc = arg;
2939 	int error = 0;
2940 
2941 	BRIDGE_IOCTL_GIFS;
2942 
2943 	return error;
2944 }
2945 
2946 #define BRIDGE_IOCTL_RTS do {                                               \
2947 	struct bridge_rtnode *brt;                                          \
2948 	char *buf;                                                          \
2949 	char *outbuf = NULL;                                                \
2950 	unsigned int count, buflen, len;                                    \
2951 	unsigned long now;                                                  \
2952                                                                             \
2953 	if (bac->ifbac_len == 0)                                            \
2954 	        return (0);                                                 \
2955                                                                             \
2956 	bzero(&bareq, sizeof (bareq));                                      \
2957 	count = 0;                                                          \
2958 	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list)                         \
2959 	        count++;                                                    \
2960 	buflen = sizeof (bareq) * count;                                    \
2961                                                                             \
2962 	BRIDGE_UNLOCK(sc);                                                  \
2963 	outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO);            \
2964 	BRIDGE_LOCK(sc);                                                    \
2965                                                                             \
2966 	count = 0;                                                          \
2967 	buf = outbuf;                                                       \
2968 	len = min(bac->ifbac_len, buflen);                                  \
2969 	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {                       \
2970 	        if (len < sizeof (bareq))                                   \
2971 	                goto out;                                           \
2972 	        snprintf(bareq.ifba_ifsname, sizeof (bareq.ifba_ifsname),   \
2973 	                 "%s", brt->brt_ifp->if_xname);                     \
2974 	        memcpy(bareq.ifba_dst, brt->brt_addr, sizeof (brt->brt_addr)); \
2975 	        bareq.ifba_vlan = brt->brt_vlan;                            \
2976 	        if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {   \
2977 	                now = (unsigned long) net_uptime();                 \
2978 	                if (now < brt->brt_expire)                          \
2979 	                        bareq.ifba_expire =                         \
2980 	                            brt->brt_expire - now;                  \
2981 	        } else                                                      \
2982 	                bareq.ifba_expire = 0;                              \
2983 	        bareq.ifba_flags = brt->brt_flags;                          \
2984                                                                             \
2985 	        memcpy(buf, &bareq, sizeof (bareq));                        \
2986 	        count++;                                                    \
2987 	        buf += sizeof (bareq);                                      \
2988 	        len -= sizeof (bareq);                                      \
2989 	}                                                                   \
2990 out:                                                                        \
2991 	bac->ifbac_len = sizeof (bareq) * count;                            \
2992 	if (outbuf != NULL) {                                               \
2993 	        BRIDGE_UNLOCK(sc);                                          \
2994 	        error = copyout(outbuf, bac->ifbac_req, bac->ifbac_len);    \
2995 	        kfree_data(outbuf, buflen);                                 \
2996 	        BRIDGE_LOCK(sc);                                            \
2997 	}                                                                   \
2998 	return (error);                                                     \
2999 } while (0)
3000 
3001 static int
bridge_ioctl_rts64(struct bridge_softc * sc,void * arg)3002 bridge_ioctl_rts64(struct bridge_softc *sc, void *arg)
3003 {
3004 	struct ifbaconf64 *bac = arg;
3005 	struct ifbareq64 bareq;
3006 	int error = 0;
3007 
3008 	BRIDGE_IOCTL_RTS;
3009 	return error;
3010 }
3011 
3012 static int
bridge_ioctl_rts32(struct bridge_softc * sc,void * arg)3013 bridge_ioctl_rts32(struct bridge_softc *sc, void *arg)
3014 {
3015 	struct ifbaconf32 *bac = arg;
3016 	struct ifbareq32 bareq;
3017 	int error = 0;
3018 
3019 	BRIDGE_IOCTL_RTS;
3020 	return error;
3021 }
3022 
3023 static int
bridge_ioctl_saddr32(struct bridge_softc * sc,void * arg)3024 bridge_ioctl_saddr32(struct bridge_softc *sc, void *arg)
3025 {
3026 	struct ifbareq32 *req = arg;
3027 	struct bridge_iflist *bif;
3028 	int error;
3029 
3030 	bif = bridge_lookup_member(sc, req->ifba_ifsname);
3031 	if (bif == NULL) {
3032 		return ENOENT;
3033 	}
3034 
3035 	error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
3036 	    req->ifba_flags);
3037 
3038 	return error;
3039 }
3040 
3041 static int
bridge_ioctl_saddr64(struct bridge_softc * sc,void * arg)3042 bridge_ioctl_saddr64(struct bridge_softc *sc, void *arg)
3043 {
3044 	struct ifbareq64 *req = arg;
3045 	struct bridge_iflist *bif;
3046 	int error;
3047 
3048 	bif = bridge_lookup_member(sc, req->ifba_ifsname);
3049 	if (bif == NULL) {
3050 		return ENOENT;
3051 	}
3052 
3053 	error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
3054 	    req->ifba_flags);
3055 
3056 	return error;
3057 }
3058 
3059 static int
bridge_ioctl_sto(struct bridge_softc * sc,void * arg)3060 bridge_ioctl_sto(struct bridge_softc *sc, void *arg)
3061 {
3062 	struct ifbrparam *param = arg;
3063 
3064 	sc->sc_brttimeout = param->ifbrp_ctime;
3065 	return 0;
3066 }
3067 
3068 static int
bridge_ioctl_gto(struct bridge_softc * sc,void * arg)3069 bridge_ioctl_gto(struct bridge_softc *sc, void *arg)
3070 {
3071 	struct ifbrparam *param = arg;
3072 
3073 	param->ifbrp_ctime = sc->sc_brttimeout;
3074 	return 0;
3075 }
3076 
3077 static int
bridge_ioctl_daddr32(struct bridge_softc * sc,void * arg)3078 bridge_ioctl_daddr32(struct bridge_softc *sc, void *arg)
3079 {
3080 	struct ifbareq32 *req = arg;
3081 
3082 	return bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan);
3083 }
3084 
3085 static int
bridge_ioctl_daddr64(struct bridge_softc * sc,void * arg)3086 bridge_ioctl_daddr64(struct bridge_softc *sc, void *arg)
3087 {
3088 	struct ifbareq64 *req = arg;
3089 
3090 	return bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan);
3091 }
3092 
3093 static int
bridge_ioctl_flush(struct bridge_softc * sc,void * arg)3094 bridge_ioctl_flush(struct bridge_softc *sc, void *arg)
3095 {
3096 	struct ifbreq *req = arg;
3097 
3098 	bridge_rtflush(sc, req->ifbr_ifsflags);
3099 	return 0;
3100 }
3101 
3102 static int
bridge_ioctl_gpri(struct bridge_softc * sc,void * arg)3103 bridge_ioctl_gpri(struct bridge_softc *sc, void *arg)
3104 {
3105 	struct ifbrparam *param = arg;
3106 	struct bstp_state *bs = &sc->sc_stp;
3107 
3108 	param->ifbrp_prio = bs->bs_bridge_priority;
3109 	return 0;
3110 }
3111 
3112 static int
bridge_ioctl_spri(struct bridge_softc * sc,void * arg)3113 bridge_ioctl_spri(struct bridge_softc *sc, void *arg)
3114 {
3115 #if BRIDGESTP
3116 	struct ifbrparam *param = arg;
3117 
3118 	return bstp_set_priority(&sc->sc_stp, param->ifbrp_prio);
3119 #else /* !BRIDGESTP */
3120 #pragma unused(sc, arg)
3121 	return EOPNOTSUPP;
3122 #endif /* !BRIDGESTP */
3123 }
3124 
3125 static int
bridge_ioctl_ght(struct bridge_softc * sc,void * arg)3126 bridge_ioctl_ght(struct bridge_softc *sc, void *arg)
3127 {
3128 	struct ifbrparam *param = arg;
3129 	struct bstp_state *bs = &sc->sc_stp;
3130 
3131 	param->ifbrp_hellotime = bs->bs_bridge_htime >> 8;
3132 	return 0;
3133 }
3134 
3135 static int
bridge_ioctl_sht(struct bridge_softc * sc,void * arg)3136 bridge_ioctl_sht(struct bridge_softc *sc, void *arg)
3137 {
3138 #if BRIDGESTP
3139 	struct ifbrparam *param = arg;
3140 
3141 	return bstp_set_htime(&sc->sc_stp, param->ifbrp_hellotime);
3142 #else /* !BRIDGESTP */
3143 #pragma unused(sc, arg)
3144 	return EOPNOTSUPP;
3145 #endif /* !BRIDGESTP */
3146 }
3147 
3148 static int
bridge_ioctl_gfd(struct bridge_softc * sc,void * arg)3149 bridge_ioctl_gfd(struct bridge_softc *sc, void *arg)
3150 {
3151 	struct ifbrparam *param;
3152 	struct bstp_state *bs;
3153 
3154 	param = arg;
3155 	bs = &sc->sc_stp;
3156 	param->ifbrp_fwddelay = bs->bs_bridge_fdelay >> 8;
3157 	return 0;
3158 }
3159 
3160 static int
bridge_ioctl_sfd(struct bridge_softc * sc,void * arg)3161 bridge_ioctl_sfd(struct bridge_softc *sc, void *arg)
3162 {
3163 #if BRIDGESTP
3164 	struct ifbrparam *param = arg;
3165 
3166 	return bstp_set_fdelay(&sc->sc_stp, param->ifbrp_fwddelay);
3167 #else /* !BRIDGESTP */
3168 #pragma unused(sc, arg)
3169 	return EOPNOTSUPP;
3170 #endif /* !BRIDGESTP */
3171 }
3172 
3173 static int
bridge_ioctl_gma(struct bridge_softc * sc,void * arg)3174 bridge_ioctl_gma(struct bridge_softc *sc, void *arg)
3175 {
3176 	struct ifbrparam *param;
3177 	struct bstp_state *bs;
3178 
3179 	param = arg;
3180 	bs = &sc->sc_stp;
3181 	param->ifbrp_maxage = bs->bs_bridge_max_age >> 8;
3182 	return 0;
3183 }
3184 
3185 static int
bridge_ioctl_sma(struct bridge_softc * sc,void * arg)3186 bridge_ioctl_sma(struct bridge_softc *sc, void *arg)
3187 {
3188 #if BRIDGESTP
3189 	struct ifbrparam *param = arg;
3190 
3191 	return bstp_set_maxage(&sc->sc_stp, param->ifbrp_maxage);
3192 #else /* !BRIDGESTP */
3193 #pragma unused(sc, arg)
3194 	return EOPNOTSUPP;
3195 #endif /* !BRIDGESTP */
3196 }
3197 
3198 static int
bridge_ioctl_sifprio(struct bridge_softc * sc,void * arg)3199 bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg)
3200 {
3201 #if BRIDGESTP
3202 	struct ifbreq *req = arg;
3203 	struct bridge_iflist *bif;
3204 
3205 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3206 	if (bif == NULL) {
3207 		return ENOENT;
3208 	}
3209 
3210 	return bstp_set_port_priority(&bif->bif_stp, req->ifbr_priority);
3211 #else /* !BRIDGESTP */
3212 #pragma unused(sc, arg)
3213 	return EOPNOTSUPP;
3214 #endif /* !BRIDGESTP */
3215 }
3216 
3217 static int
bridge_ioctl_sifcost(struct bridge_softc * sc,void * arg)3218 bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg)
3219 {
3220 #if BRIDGESTP
3221 	struct ifbreq *req = arg;
3222 	struct bridge_iflist *bif;
3223 
3224 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3225 	if (bif == NULL) {
3226 		return ENOENT;
3227 	}
3228 
3229 	return bstp_set_path_cost(&bif->bif_stp, req->ifbr_path_cost);
3230 #else /* !BRIDGESTP */
3231 #pragma unused(sc, arg)
3232 	return EOPNOTSUPP;
3233 #endif /* !BRIDGESTP */
3234 }
3235 
3236 static int
bridge_ioctl_gfilt(struct bridge_softc * sc,void * arg)3237 bridge_ioctl_gfilt(struct bridge_softc *sc, void *arg)
3238 {
3239 	struct ifbrparam *param = arg;
3240 
3241 	param->ifbrp_filter = sc->sc_filter_flags;
3242 
3243 	return 0;
3244 }
3245 
3246 static int
bridge_ioctl_sfilt(struct bridge_softc * sc,void * arg)3247 bridge_ioctl_sfilt(struct bridge_softc *sc, void *arg)
3248 {
3249 	struct ifbrparam *param = arg;
3250 
3251 	if (param->ifbrp_filter & ~IFBF_FILT_MASK) {
3252 		return EINVAL;
3253 	}
3254 
3255 	if (param->ifbrp_filter & IFBF_FILT_USEIPF) {
3256 		return EINVAL;
3257 	}
3258 
3259 	sc->sc_filter_flags = param->ifbrp_filter;
3260 
3261 	return 0;
3262 }
3263 
3264 static int
bridge_ioctl_sifmaxaddr(struct bridge_softc * sc,void * arg)3265 bridge_ioctl_sifmaxaddr(struct bridge_softc *sc, void *arg)
3266 {
3267 	struct ifbreq *req = arg;
3268 	struct bridge_iflist *bif;
3269 
3270 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3271 	if (bif == NULL) {
3272 		return ENOENT;
3273 	}
3274 
3275 	bif->bif_addrmax = req->ifbr_addrmax;
3276 	return 0;
3277 }
3278 
3279 static int
bridge_ioctl_addspan(struct bridge_softc * sc,void * arg)3280 bridge_ioctl_addspan(struct bridge_softc *sc, void *arg)
3281 {
3282 	struct ifbreq *req = arg;
3283 	struct bridge_iflist *bif = NULL;
3284 	struct ifnet *ifs;
3285 
3286 	ifs = ifunit(req->ifbr_ifsname);
3287 	if (ifs == NULL) {
3288 		return ENOENT;
3289 	}
3290 
3291 	if (IFNET_IS_INTCOPROC(ifs)) {
3292 		return EINVAL;
3293 	}
3294 
3295 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
3296 	if (ifs == bif->bif_ifp) {
3297 		return EBUSY;
3298 	}
3299 
3300 	if (ifs->if_bridge != NULL) {
3301 		return EBUSY;
3302 	}
3303 
3304 	switch (ifs->if_type) {
3305 	case IFT_ETHER:
3306 	case IFT_L2VLAN:
3307 	case IFT_IEEE8023ADLAG:
3308 		break;
3309 	case IFT_GIF:
3310 	/* currently not supported */
3311 	/* FALLTHRU */
3312 	default:
3313 		return EINVAL;
3314 	}
3315 
3316 	bif = kalloc_type(struct bridge_iflist, Z_WAITOK | Z_ZERO | Z_NOFAIL);
3317 
3318 	bif->bif_ifp = ifs;
3319 	bif->bif_ifflags = IFBIF_SPAN;
3320 
3321 	ifnet_reference(bif->bif_ifp);
3322 
3323 	TAILQ_INSERT_HEAD(&sc->sc_spanlist, bif, bif_next);
3324 
3325 	return 0;
3326 }
3327 
3328 static int
bridge_ioctl_delspan(struct bridge_softc * sc,void * arg)3329 bridge_ioctl_delspan(struct bridge_softc *sc, void *arg)
3330 {
3331 	struct ifbreq *req = arg;
3332 	struct bridge_iflist *bif;
3333 	struct ifnet *ifs;
3334 
3335 	ifs = ifunit(req->ifbr_ifsname);
3336 	if (ifs == NULL) {
3337 		return ENOENT;
3338 	}
3339 
3340 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
3341 	if (ifs == bif->bif_ifp) {
3342 		break;
3343 	}
3344 
3345 	if (bif == NULL) {
3346 		return ENOENT;
3347 	}
3348 
3349 	bridge_delete_span(sc, bif);
3350 
3351 	return 0;
3352 }
3353 
3354 #define BRIDGE_IOCTL_GBPARAM do {                                       \
3355 	struct bstp_state *bs = &sc->sc_stp;                            \
3356 	struct bstp_port *root_port;                                    \
3357                                                                         \
3358 	req->ifbop_maxage = bs->bs_bridge_max_age >> 8;                 \
3359 	req->ifbop_hellotime = bs->bs_bridge_htime >> 8;                \
3360 	req->ifbop_fwddelay = bs->bs_bridge_fdelay >> 8;                \
3361                                                                         \
3362 	root_port = bs->bs_root_port;                                   \
3363 	if (root_port == NULL)                                          \
3364 	        req->ifbop_root_port = 0;                               \
3365 	else                                                            \
3366 	        req->ifbop_root_port = root_port->bp_ifp->if_index;     \
3367                                                                         \
3368 	req->ifbop_holdcount = bs->bs_txholdcount;                      \
3369 	req->ifbop_priority = bs->bs_bridge_priority;                   \
3370 	req->ifbop_protocol = bs->bs_protover;                          \
3371 	req->ifbop_root_path_cost = bs->bs_root_pv.pv_cost;             \
3372 	req->ifbop_bridgeid = bs->bs_bridge_pv.pv_dbridge_id;           \
3373 	req->ifbop_designated_root = bs->bs_root_pv.pv_root_id;         \
3374 	req->ifbop_designated_bridge = bs->bs_root_pv.pv_dbridge_id;    \
3375 	req->ifbop_last_tc_time.tv_sec = bs->bs_last_tc_time.tv_sec;    \
3376 	req->ifbop_last_tc_time.tv_usec = bs->bs_last_tc_time.tv_usec;  \
3377 } while (0)
3378 
3379 static int
bridge_ioctl_gbparam32(struct bridge_softc * sc,void * arg)3380 bridge_ioctl_gbparam32(struct bridge_softc *sc, void *arg)
3381 {
3382 	struct ifbropreq32 *req = arg;
3383 
3384 	BRIDGE_IOCTL_GBPARAM;
3385 	return 0;
3386 }
3387 
3388 static int
bridge_ioctl_gbparam64(struct bridge_softc * sc,void * arg)3389 bridge_ioctl_gbparam64(struct bridge_softc *sc, void *arg)
3390 {
3391 	struct ifbropreq64 *req = arg;
3392 
3393 	BRIDGE_IOCTL_GBPARAM;
3394 	return 0;
3395 }
3396 
3397 static int
bridge_ioctl_grte(struct bridge_softc * sc,void * arg)3398 bridge_ioctl_grte(struct bridge_softc *sc, void *arg)
3399 {
3400 	struct ifbrparam *param = arg;
3401 
3402 	param->ifbrp_cexceeded = sc->sc_brtexceeded;
3403 	return 0;
3404 }
3405 
3406 #define BRIDGE_IOCTL_GIFSSTP do {                                       \
3407 	struct bridge_iflist *bif;                                      \
3408 	struct bstp_port *bp;                                           \
3409 	struct ifbpstpreq bpreq;                                        \
3410 	char *buf, *outbuf;                                             \
3411 	unsigned int count, buflen, len;                                \
3412                                                                         \
3413 	count = 0;                                                      \
3414 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {                  \
3415 	        if ((bif->bif_ifflags & IFBIF_STP) != 0)                \
3416 	                count++;                                        \
3417 	}                                                               \
3418                                                                         \
3419 	buflen = sizeof (bpreq) * count;                                \
3420 	if (bifstp->ifbpstp_len == 0) {                                 \
3421 	        bifstp->ifbpstp_len = buflen;                           \
3422 	        return (0);                                             \
3423 	}                                                               \
3424                                                                         \
3425 	BRIDGE_UNLOCK(sc);                                              \
3426 	outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO);        \
3427 	BRIDGE_LOCK(sc);                                                \
3428                                                                         \
3429 	count = 0;                                                      \
3430 	buf = outbuf;                                                   \
3431 	len = min(bifstp->ifbpstp_len, buflen);                         \
3432 	bzero(&bpreq, sizeof (bpreq));                                  \
3433 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {                  \
3434 	        if (len < sizeof (bpreq))                               \
3435 	                break;                                          \
3436                                                                         \
3437 	        if ((bif->bif_ifflags & IFBIF_STP) == 0)                \
3438 	                continue;                                       \
3439                                                                         \
3440 	        bp = &bif->bif_stp;                                     \
3441 	        bpreq.ifbp_portno = bif->bif_ifp->if_index & 0xfff;     \
3442 	        bpreq.ifbp_fwd_trans = bp->bp_forward_transitions;      \
3443 	        bpreq.ifbp_design_cost = bp->bp_desg_pv.pv_cost;        \
3444 	        bpreq.ifbp_design_port = bp->bp_desg_pv.pv_port_id;     \
3445 	        bpreq.ifbp_design_bridge = bp->bp_desg_pv.pv_dbridge_id; \
3446 	        bpreq.ifbp_design_root = bp->bp_desg_pv.pv_root_id;     \
3447                                                                         \
3448 	        memcpy(buf, &bpreq, sizeof (bpreq));                    \
3449 	        count++;                                                \
3450 	        buf += sizeof (bpreq);                                  \
3451 	        len -= sizeof (bpreq);                                  \
3452 	}                                                               \
3453                                                                         \
3454 	BRIDGE_UNLOCK(sc);                                              \
3455 	bifstp->ifbpstp_len = sizeof (bpreq) * count;                   \
3456 	error = copyout(outbuf, bifstp->ifbpstp_req, bifstp->ifbpstp_len); \
3457 	BRIDGE_LOCK(sc);                                                \
3458 	kfree_data(outbuf, buflen);                                     \
3459 	return (error);                                                 \
3460 } while (0)
3461 
3462 static int
bridge_ioctl_gifsstp32(struct bridge_softc * sc,void * arg)3463 bridge_ioctl_gifsstp32(struct bridge_softc *sc, void *arg)
3464 {
3465 	struct ifbpstpconf32 *bifstp = arg;
3466 	int error = 0;
3467 
3468 	BRIDGE_IOCTL_GIFSSTP;
3469 	return error;
3470 }
3471 
3472 static int
bridge_ioctl_gifsstp64(struct bridge_softc * sc,void * arg)3473 bridge_ioctl_gifsstp64(struct bridge_softc *sc, void *arg)
3474 {
3475 	struct ifbpstpconf64 *bifstp = arg;
3476 	int error = 0;
3477 
3478 	BRIDGE_IOCTL_GIFSSTP;
3479 	return error;
3480 }
3481 
3482 static int
bridge_ioctl_sproto(struct bridge_softc * sc,void * arg)3483 bridge_ioctl_sproto(struct bridge_softc *sc, void *arg)
3484 {
3485 #if BRIDGESTP
3486 	struct ifbrparam *param = arg;
3487 
3488 	return bstp_set_protocol(&sc->sc_stp, param->ifbrp_proto);
3489 #else /* !BRIDGESTP */
3490 #pragma unused(sc, arg)
3491 	return EOPNOTSUPP;
3492 #endif /* !BRIDGESTP */
3493 }
3494 
3495 static int
bridge_ioctl_stxhc(struct bridge_softc * sc,void * arg)3496 bridge_ioctl_stxhc(struct bridge_softc *sc, void *arg)
3497 {
3498 #if BRIDGESTP
3499 	struct ifbrparam *param = arg;
3500 
3501 	return bstp_set_holdcount(&sc->sc_stp, param->ifbrp_txhc);
3502 #else /* !BRIDGESTP */
3503 #pragma unused(sc, arg)
3504 	return EOPNOTSUPP;
3505 #endif /* !BRIDGESTP */
3506 }
3507 
3508 
3509 static int
bridge_ioctl_ghostfilter(struct bridge_softc * sc,void * arg)3510 bridge_ioctl_ghostfilter(struct bridge_softc *sc, void *arg)
3511 {
3512 	struct ifbrhostfilter *req = arg;
3513 	struct bridge_iflist *bif;
3514 
3515 	bif = bridge_lookup_member(sc, req->ifbrhf_ifsname);
3516 	if (bif == NULL) {
3517 		return ENOENT;
3518 	}
3519 
3520 	bzero(req, sizeof(struct ifbrhostfilter));
3521 	if (bif->bif_flags & BIFF_HOST_FILTER) {
3522 		req->ifbrhf_flags |= IFBRHF_ENABLED;
3523 		bcopy(bif->bif_hf_hwsrc, req->ifbrhf_hwsrca,
3524 		    ETHER_ADDR_LEN);
3525 		req->ifbrhf_ipsrc = bif->bif_hf_ipsrc.s_addr;
3526 	}
3527 	return 0;
3528 }
3529 
3530 static int
bridge_ioctl_shostfilter(struct bridge_softc * sc,void * arg)3531 bridge_ioctl_shostfilter(struct bridge_softc *sc, void *arg)
3532 {
3533 	struct ifbrhostfilter *req = arg;
3534 	struct bridge_iflist *bif;
3535 
3536 	bif = bridge_lookup_member(sc, req->ifbrhf_ifsname);
3537 	if (bif == NULL) {
3538 		return ENOENT;
3539 	}
3540 
3541 	if (req->ifbrhf_flags & IFBRHF_ENABLED) {
3542 		bif->bif_flags |= BIFF_HOST_FILTER;
3543 
3544 		if (req->ifbrhf_flags & IFBRHF_HWSRC) {
3545 			bcopy(req->ifbrhf_hwsrca, bif->bif_hf_hwsrc,
3546 			    ETHER_ADDR_LEN);
3547 			if (bcmp(req->ifbrhf_hwsrca, ethernulladdr,
3548 			    ETHER_ADDR_LEN) != 0) {
3549 				bif->bif_flags |= BIFF_HF_HWSRC;
3550 			} else {
3551 				bif->bif_flags &= ~BIFF_HF_HWSRC;
3552 			}
3553 		}
3554 		if (req->ifbrhf_flags & IFBRHF_IPSRC) {
3555 			bif->bif_hf_ipsrc.s_addr = req->ifbrhf_ipsrc;
3556 			if (bif->bif_hf_ipsrc.s_addr != INADDR_ANY) {
3557 				bif->bif_flags |= BIFF_HF_IPSRC;
3558 			} else {
3559 				bif->bif_flags &= ~BIFF_HF_IPSRC;
3560 			}
3561 		}
3562 	} else {
3563 		bif->bif_flags &= ~(BIFF_HOST_FILTER | BIFF_HF_HWSRC |
3564 		    BIFF_HF_IPSRC);
3565 		bzero(bif->bif_hf_hwsrc, ETHER_ADDR_LEN);
3566 		bif->bif_hf_ipsrc.s_addr = INADDR_ANY;
3567 	}
3568 
3569 	return 0;
3570 }
3571 
3572 static char *
bridge_mac_nat_entry_out(struct mac_nat_entry_list * list,unsigned int * count_p,char * buf,unsigned int * len_p)3573 bridge_mac_nat_entry_out(struct mac_nat_entry_list * list,
3574     unsigned int * count_p, char *buf, unsigned int *len_p)
3575 {
3576 	unsigned int            count = *count_p;
3577 	struct ifbrmne          ifbmne;
3578 	unsigned int            len = *len_p;
3579 	struct mac_nat_entry    *mne;
3580 	unsigned long           now;
3581 
3582 	bzero(&ifbmne, sizeof(ifbmne));
3583 	LIST_FOREACH(mne, list, mne_list) {
3584 		if (len < sizeof(ifbmne)) {
3585 			break;
3586 		}
3587 		snprintf(ifbmne.ifbmne_ifname, sizeof(ifbmne.ifbmne_ifname),
3588 		    "%s", mne->mne_bif->bif_ifp->if_xname);
3589 		memcpy(ifbmne.ifbmne_mac, mne->mne_mac,
3590 		    sizeof(ifbmne.ifbmne_mac));
3591 		now = (unsigned long) net_uptime();
3592 		if (now < mne->mne_expire) {
3593 			ifbmne.ifbmne_expire = mne->mne_expire - now;
3594 		} else {
3595 			ifbmne.ifbmne_expire = 0;
3596 		}
3597 		if ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) {
3598 			ifbmne.ifbmne_af = AF_INET6;
3599 			ifbmne.ifbmne_ip6_addr = mne->mne_ip6;
3600 		} else {
3601 			ifbmne.ifbmne_af = AF_INET;
3602 			ifbmne.ifbmne_ip_addr = mne->mne_ip;
3603 		}
3604 		memcpy(buf, &ifbmne, sizeof(ifbmne));
3605 		count++;
3606 		buf += sizeof(ifbmne);
3607 		len -= sizeof(ifbmne);
3608 	}
3609 	*count_p = count;
3610 	*len_p = len;
3611 	return buf;
3612 }
3613 
3614 /*
3615  * bridge_ioctl_gmnelist()
3616  *   Perform the get mac_nat_entry list ioctl.
3617  *
3618  * Note:
3619  *   The struct ifbrmnelist32 and struct ifbrmnelist64 have the same
3620  *   field size/layout except for the last field ifbml_buf, the user-supplied
3621  *   buffer pointer. That is passed in separately via the 'user_addr'
3622  *   parameter from the respective 32-bit or 64-bit ioctl routine.
3623  */
3624 static int
bridge_ioctl_gmnelist(struct bridge_softc * sc,struct ifbrmnelist32 * mnl,user_addr_t user_addr)3625 bridge_ioctl_gmnelist(struct bridge_softc *sc, struct ifbrmnelist32 *mnl,
3626     user_addr_t user_addr)
3627 {
3628 	unsigned int            count;
3629 	char                    *buf;
3630 	int                     error = 0;
3631 	char                    *outbuf = NULL;
3632 	struct mac_nat_entry    *mne;
3633 	unsigned int            buflen;
3634 	unsigned int            len;
3635 
3636 	mnl->ifbml_elsize = sizeof(struct ifbrmne);
3637 	count = 0;
3638 	LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
3639 		count++;
3640 	}
3641 	LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
3642 		count++;
3643 	}
3644 	buflen = sizeof(struct ifbrmne) * count;
3645 	if (buflen == 0 || mnl->ifbml_len == 0) {
3646 		mnl->ifbml_len = buflen;
3647 		return error;
3648 	}
3649 	BRIDGE_UNLOCK(sc);
3650 	outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO);
3651 	BRIDGE_LOCK(sc);
3652 	count = 0;
3653 	buf = outbuf;
3654 	len = min(mnl->ifbml_len, buflen);
3655 	buf = bridge_mac_nat_entry_out(&sc->sc_mne_list, &count, buf, &len);
3656 	buf = bridge_mac_nat_entry_out(&sc->sc_mne_list_v6, &count, buf, &len);
3657 	mnl->ifbml_len = count * sizeof(struct ifbrmne);
3658 	BRIDGE_UNLOCK(sc);
3659 	error = copyout(outbuf, user_addr, mnl->ifbml_len);
3660 	kfree_data(outbuf, buflen);
3661 	BRIDGE_LOCK(sc);
3662 	return error;
3663 }
3664 
3665 static int
bridge_ioctl_gmnelist64(struct bridge_softc * sc,void * arg)3666 bridge_ioctl_gmnelist64(struct bridge_softc *sc, void *arg)
3667 {
3668 	struct ifbrmnelist64 *mnl = arg;
3669 
3670 	return bridge_ioctl_gmnelist(sc, arg, mnl->ifbml_buf);
3671 }
3672 
3673 static int
bridge_ioctl_gmnelist32(struct bridge_softc * sc,void * arg)3674 bridge_ioctl_gmnelist32(struct bridge_softc *sc, void *arg)
3675 {
3676 	struct ifbrmnelist32 *mnl = arg;
3677 
3678 	return bridge_ioctl_gmnelist(sc, arg,
3679 	           CAST_USER_ADDR_T(mnl->ifbml_buf));
3680 }
3681 
3682 /*
3683  * bridge_ioctl_gifstats()
3684  *   Return per-member stats.
3685  *
3686  * Note:
3687  *   The ifbrmreq32 and ifbrmreq64 structures have the same
3688  *   field size/layout except for the last field brmr_buf, the user-supplied
3689  *   buffer pointer. That is passed in separately via the 'user_addr'
3690  *   parameter from the respective 32-bit or 64-bit ioctl routine.
3691  */
3692 static int
bridge_ioctl_gifstats(struct bridge_softc * sc,struct ifbrmreq32 * mreq,user_addr_t user_addr)3693 bridge_ioctl_gifstats(struct bridge_softc *sc, struct ifbrmreq32 *mreq,
3694     user_addr_t user_addr)
3695 {
3696 	struct bridge_iflist    *bif;
3697 	int                     error = 0;
3698 	unsigned int            buflen;
3699 
3700 	bif = bridge_lookup_member(sc, mreq->brmr_ifname);
3701 	if (bif == NULL) {
3702 		error = ENOENT;
3703 		goto done;
3704 	}
3705 
3706 	buflen = mreq->brmr_elsize = sizeof(struct ifbrmstats);
3707 	if (buflen == 0 || mreq->brmr_len == 0) {
3708 		mreq->brmr_len = buflen;
3709 		goto done;
3710 	}
3711 	if (mreq->brmr_len != 0 && mreq->brmr_len < buflen) {
3712 		error = ENOBUFS;
3713 		goto done;
3714 	}
3715 	mreq->brmr_len = buflen;
3716 	error = copyout(&bif->bif_stats, user_addr, buflen);
3717 done:
3718 	return error;
3719 }
3720 
3721 static int
bridge_ioctl_gifstats32(struct bridge_softc * sc,void * arg)3722 bridge_ioctl_gifstats32(struct bridge_softc *sc, void *arg)
3723 {
3724 	struct ifbrmreq32 *mreq = arg;
3725 
3726 	return bridge_ioctl_gifstats(sc, arg, mreq->brmr_buf);
3727 }
3728 
3729 static int
bridge_ioctl_gifstats64(struct bridge_softc * sc,void * arg)3730 bridge_ioctl_gifstats64(struct bridge_softc *sc, void *arg)
3731 {
3732 	struct ifbrmreq64 *mreq = arg;
3733 
3734 	return bridge_ioctl_gifstats(sc, arg, mreq->brmr_buf);
3735 }
3736 
3737 /*
3738  * bridge_proto_attach_changed
3739  *
3740  *	Called when protocol attachment on the interface changes.
3741  */
3742 static void
bridge_proto_attach_changed(struct ifnet * ifp)3743 bridge_proto_attach_changed(struct ifnet *ifp)
3744 {
3745 	boolean_t changed = FALSE;
3746 	struct bridge_iflist *bif;
3747 	boolean_t input_broadcast;
3748 	struct bridge_softc *sc = ifp->if_bridge;
3749 
3750 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
3751 	if (sc == NULL) {
3752 		return;
3753 	}
3754 	/*
3755 	 * Selectively enable input broadcast only when necessary.
3756 	 * The bridge interface itself attaches a fake protocol
3757 	 * so checking for at least two protocols means that the
3758 	 * interface is being used for something besides bridging.
3759 	 */
3760 	input_broadcast = if_get_protolist(ifp, NULL, 0) >= 2;
3761 	BRIDGE_LOCK(sc);
3762 	bif = bridge_lookup_member_if(sc, ifp);
3763 	if (bif != NULL) {
3764 		if (input_broadcast) {
3765 			if ((bif->bif_flags & BIFF_INPUT_BROADCAST) == 0) {
3766 				bif->bif_flags |= BIFF_INPUT_BROADCAST;
3767 				changed = TRUE;
3768 			}
3769 		} else if ((bif->bif_flags & BIFF_INPUT_BROADCAST) != 0) {
3770 			changed = TRUE;
3771 			bif->bif_flags &= ~BIFF_INPUT_BROADCAST;
3772 		}
3773 	}
3774 	BRIDGE_UNLOCK(sc);
3775 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
3776 	    "%s input broadcast %s", ifp->if_xname,
3777 	    input_broadcast ? "ENABLED" : "DISABLED");
3778 	return;
3779 }
3780 
3781 /*
3782  * interface_media_active:
3783  *
3784  *	Tells if an interface media is active.
3785  */
3786 static int
interface_media_active(struct ifnet * ifp)3787 interface_media_active(struct ifnet *ifp)
3788 {
3789 	struct ifmediareq   ifmr;
3790 	int status = 0;
3791 
3792 	bzero(&ifmr, sizeof(ifmr));
3793 	if (ifnet_ioctl(ifp, 0, SIOCGIFMEDIA, &ifmr) == 0) {
3794 		if ((ifmr.ifm_status & IFM_AVALID) && ifmr.ifm_count > 0) {
3795 			status = ifmr.ifm_status & IFM_ACTIVE ? 1 : 0;
3796 		}
3797 	}
3798 
3799 	return status;
3800 }
3801 
3802 /*
3803  * bridge_updatelinkstatus:
3804  *
3805  *      Update the media active status of the bridge based on the
3806  *	media active status of its member.
3807  *	If changed, return the corresponding onf/off link event.
3808  */
3809 static u_int32_t
bridge_updatelinkstatus(struct bridge_softc * sc)3810 bridge_updatelinkstatus(struct bridge_softc *sc)
3811 {
3812 	struct bridge_iflist *bif;
3813 	int active_member = 0;
3814 	u_int32_t event_code = 0;
3815 
3816 	BRIDGE_LOCK_ASSERT_HELD(sc);
3817 
3818 	/*
3819 	 * Find out if we have an active interface
3820 	 */
3821 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
3822 		if (bif->bif_flags & BIFF_MEDIA_ACTIVE) {
3823 			active_member = 1;
3824 			break;
3825 		}
3826 	}
3827 
3828 	if (active_member && !(sc->sc_flags & SCF_MEDIA_ACTIVE)) {
3829 		sc->sc_flags |= SCF_MEDIA_ACTIVE;
3830 		event_code = KEV_DL_LINK_ON;
3831 	} else if (!active_member && (sc->sc_flags & SCF_MEDIA_ACTIVE)) {
3832 		sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
3833 		event_code = KEV_DL_LINK_OFF;
3834 	}
3835 
3836 	return event_code;
3837 }
3838 
3839 /*
3840  * bridge_iflinkevent:
3841  */
3842 static void
bridge_iflinkevent(struct ifnet * ifp)3843 bridge_iflinkevent(struct ifnet *ifp)
3844 {
3845 	struct bridge_softc *sc = ifp->if_bridge;
3846 	struct bridge_iflist *bif;
3847 	u_int32_t event_code = 0;
3848 	int media_active;
3849 
3850 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
3851 
3852 	/* Check if the interface is a bridge member */
3853 	if (sc == NULL) {
3854 		return;
3855 	}
3856 
3857 	media_active = interface_media_active(ifp);
3858 	BRIDGE_LOCK(sc);
3859 	bif = bridge_lookup_member_if(sc, ifp);
3860 	if (bif != NULL) {
3861 		if (media_active) {
3862 			bif->bif_flags |= BIFF_MEDIA_ACTIVE;
3863 		} else {
3864 			bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
3865 		}
3866 		if (sc->sc_mac_nat_bif != NULL) {
3867 			bridge_mac_nat_flush_entries(sc, bif);
3868 		}
3869 
3870 		event_code = bridge_updatelinkstatus(sc);
3871 	}
3872 	BRIDGE_UNLOCK(sc);
3873 
3874 	if (event_code != 0) {
3875 		bridge_link_event(sc->sc_ifp, event_code);
3876 	}
3877 }
3878 
3879 /*
3880  * bridge_delayed_callback:
3881  *
3882  *	Makes a delayed call
3883  */
3884 static void
bridge_delayed_callback(void * param)3885 bridge_delayed_callback(void *param)
3886 {
3887 	struct bridge_delayed_call *call = (struct bridge_delayed_call *)param;
3888 	struct bridge_softc *sc = call->bdc_sc;
3889 
3890 #if BRIDGE_DELAYED_CALLBACK_DEBUG
3891 	if (bridge_delayed_callback_delay > 0) {
3892 		struct timespec ts;
3893 
3894 		ts.tv_sec = bridge_delayed_callback_delay;
3895 		ts.tv_nsec = 0;
3896 
3897 		BRIDGE_LOG(LOG_NOTICE, 0,
3898 		    "sleeping for %d seconds",
3899 		    bridge_delayed_callback_delay);
3900 
3901 		msleep(&bridge_delayed_callback_delay, NULL, PZERO,
3902 		    __func__, &ts);
3903 
3904 		BRIDGE_LOG(LOG_NOTICE, 0, "awoken");
3905 	}
3906 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
3907 
3908 	BRIDGE_LOCK(sc);
3909 
3910 #if BRIDGE_DELAYED_CALLBACK_DEBUG
3911 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
3912 	    "%s call 0x%llx flags 0x%x",
3913 	    sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
3914 	    call->bdc_flags);
3915 }
3916 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
3917 
3918 	if (call->bdc_flags & BDCF_CANCELLING) {
3919 		wakeup(call);
3920 	} else {
3921 		if ((sc->sc_flags & SCF_DETACHING) == 0) {
3922 			(*call->bdc_func)(sc);
3923 		}
3924 	}
3925 	call->bdc_flags &= ~BDCF_OUTSTANDING;
3926 	BRIDGE_UNLOCK(sc);
3927 }
3928 
3929 /*
3930  * bridge_schedule_delayed_call:
3931  *
3932  *	Schedule a function to be called on a separate thread
3933  *      The actual call may be scheduled to run at a given time or ASAP.
3934  */
3935 static void
3936 bridge_schedule_delayed_call(struct bridge_delayed_call *call)
3937 {
3938 	uint64_t deadline = 0;
3939 	struct bridge_softc *sc = call->bdc_sc;
3940 
3941 	BRIDGE_LOCK_ASSERT_HELD(sc);
3942 
3943 	if ((sc->sc_flags & SCF_DETACHING) ||
3944 	    (call->bdc_flags & (BDCF_OUTSTANDING | BDCF_CANCELLING))) {
3945 		return;
3946 	}
3947 
3948 	if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) {
3949 		nanoseconds_to_absolutetime(
3950 			(uint64_t)call->bdc_ts.tv_sec * NSEC_PER_SEC +
3951 			call->bdc_ts.tv_nsec, &deadline);
3952 		clock_absolutetime_interval_to_deadline(deadline, &deadline);
3953 	}
3954 
3955 	call->bdc_flags = BDCF_OUTSTANDING;
3956 
3957 #if BRIDGE_DELAYED_CALLBACK_DEBUG
3958 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
3959 	    "%s call 0x%llx flags 0x%x",
3960 	    sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
3961 	    call->bdc_flags);
3962 }
3963 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
3964 
3965 	if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) {
3966 		thread_call_func_delayed(
3967 			(thread_call_func_t)bridge_delayed_callback,
3968 			call, deadline);
3969 	} else {
3970 		if (call->bdc_thread_call == NULL) {
3971 			call->bdc_thread_call = thread_call_allocate(
3972 				(thread_call_func_t)bridge_delayed_callback,
3973 				call);
3974 		}
3975 		thread_call_enter(call->bdc_thread_call);
3976 	}
3977 }
3978 
3979 /*
3980  * bridge_cancel_delayed_call:
3981  *
3982  *	Cancel a queued or running delayed call.
3983  *	If call is running, does not return until the call is done to
3984  *	prevent race condition with the brigde interface getting destroyed
3985  */
3986 static void
3987 bridge_cancel_delayed_call(struct bridge_delayed_call *call)
3988 {
3989 	boolean_t result;
3990 	struct bridge_softc *sc = call->bdc_sc;
3991 
3992 	/*
3993 	 * The call was never scheduled
3994 	 */
3995 	if (sc == NULL) {
3996 		return;
3997 	}
3998 
3999 	BRIDGE_LOCK_ASSERT_HELD(sc);
4000 
4001 	call->bdc_flags |= BDCF_CANCELLING;
4002 
4003 	while (call->bdc_flags & BDCF_OUTSTANDING) {
4004 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4005 		    "%s call 0x%llx flags 0x%x",
4006 		    sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4007 		    call->bdc_flags);
4008 		result = thread_call_func_cancel(
4009 			(thread_call_func_t)bridge_delayed_callback, call, FALSE);
4010 
4011 		if (result) {
4012 			/*
4013 			 * We managed to dequeue the delayed call
4014 			 */
4015 			call->bdc_flags &= ~BDCF_OUTSTANDING;
4016 		} else {
4017 			/*
4018 			 * Wait for delayed call do be done running
4019 			 */
4020 			msleep(call, &sc->sc_mtx, PZERO, __func__, NULL);
4021 		}
4022 	}
4023 	call->bdc_flags &= ~BDCF_CANCELLING;
4024 }
4025 
4026 /*
4027  * bridge_cleanup_delayed_call:
4028  *
4029  *	Dispose resource allocated for a delayed call
4030  *	Assume the delayed call is not queued or running .
4031  */
4032 static void
4033 bridge_cleanup_delayed_call(struct bridge_delayed_call *call)
4034 {
4035 	boolean_t result;
4036 	struct bridge_softc *sc = call->bdc_sc;
4037 
4038 	/*
4039 	 * The call was never scheduled
4040 	 */
4041 	if (sc == NULL) {
4042 		return;
4043 	}
4044 
4045 	BRIDGE_LOCK_ASSERT_HELD(sc);
4046 
4047 	VERIFY((call->bdc_flags & BDCF_OUTSTANDING) == 0);
4048 	VERIFY((call->bdc_flags & BDCF_CANCELLING) == 0);
4049 
4050 	if (call->bdc_thread_call != NULL) {
4051 		result = thread_call_free(call->bdc_thread_call);
4052 		if (result == FALSE) {
4053 			panic("%s thread_call_free() failed for call %p",
4054 			    __func__, call);
4055 		}
4056 		call->bdc_thread_call = NULL;
4057 	}
4058 }
4059 
4060 /*
4061  * bridge_init:
4062  *
4063  *	Initialize a bridge interface.
4064  */
4065 static int
4066 bridge_init(struct ifnet *ifp)
4067 {
4068 	struct bridge_softc *sc = (struct bridge_softc *)ifp->if_softc;
4069 	errno_t error;
4070 
4071 	BRIDGE_LOCK_ASSERT_HELD(sc);
4072 
4073 	if ((ifnet_flags(ifp) & IFF_RUNNING)) {
4074 		return 0;
4075 	}
4076 
4077 	error = ifnet_set_flags(ifp, IFF_RUNNING, IFF_RUNNING);
4078 
4079 	/*
4080 	 * Calling bridge_aging_timer() is OK as there are no entries to
4081 	 * age so we're just going to arm the timer
4082 	 */
4083 	bridge_aging_timer(sc);
4084 #if BRIDGESTP
4085 	if (error == 0) {
4086 		bstp_init(&sc->sc_stp); /* Initialize Spanning Tree */
4087 	}
4088 #endif /* BRIDGESTP */
4089 	return error;
4090 }
4091 
4092 /*
4093  * bridge_ifstop:
4094  *
4095  *	Stop the bridge interface.
4096  */
4097 static void
4098 bridge_ifstop(struct ifnet *ifp, int disable)
4099 {
4100 #pragma unused(disable)
4101 	struct bridge_softc *sc = ifp->if_softc;
4102 
4103 	BRIDGE_LOCK_ASSERT_HELD(sc);
4104 
4105 	if ((ifnet_flags(ifp) & IFF_RUNNING) == 0) {
4106 		return;
4107 	}
4108 
4109 	bridge_cancel_delayed_call(&sc->sc_aging_timer);
4110 
4111 #if BRIDGESTP
4112 	bstp_stop(&sc->sc_stp);
4113 #endif /* BRIDGESTP */
4114 
4115 	bridge_rtflush(sc, IFBF_FLUSHDYN);
4116 	(void) ifnet_set_flags(ifp, 0, IFF_RUNNING);
4117 }
4118 
4119 /*
4120  * bridge_compute_cksum:
4121  *
4122  *	If the packet has checksum flags, compare the hardware checksum
4123  *	capabilities of the source and destination interfaces. If they
4124  *	are the same, there's nothing to do. If they are different,
4125  *	finalize the checksum so that it can be sent on the destination
4126  *	interface.
4127  */
4128 static void
4129 bridge_compute_cksum(struct ifnet *src_if, struct ifnet *dst_if, struct mbuf *m)
4130 {
4131 	uint32_t csum_flags;
4132 	uint16_t dst_hw_csum;
4133 	uint32_t did_sw = 0;
4134 	struct ether_header *eh;
4135 	uint16_t src_hw_csum;
4136 
4137 	if (src_if == dst_if) {
4138 		return;
4139 	}
4140 	csum_flags = m->m_pkthdr.csum_flags & IF_HWASSIST_CSUM_MASK;
4141 	if (csum_flags == 0) {
4142 		/* no checksum offload */
4143 		return;
4144 	}
4145 
4146 	/*
4147 	 * if destination/source differ in checksum offload
4148 	 * capabilities, finalize/compute the checksum
4149 	 */
4150 	dst_hw_csum = IF_HWASSIST_CSUM_FLAGS(dst_if->if_hwassist);
4151 	src_hw_csum = IF_HWASSIST_CSUM_FLAGS(src_if->if_hwassist);
4152 	if (dst_hw_csum == src_hw_csum) {
4153 		return;
4154 	}
4155 	eh = mtod(m, struct ether_header *);
4156 	switch (ntohs(eh->ether_type)) {
4157 	case ETHERTYPE_IP:
4158 		did_sw = in_finalize_cksum(m, sizeof(*eh), csum_flags);
4159 		break;
4160 	case ETHERTYPE_IPV6:
4161 		did_sw = in6_finalize_cksum(m, sizeof(*eh), -1, -1, csum_flags);
4162 		break;
4163 	}
4164 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4165 	    "[%s -> %s] before 0x%x did 0x%x after 0x%x",
4166 	    src_if->if_xname, dst_if->if_xname, csum_flags, did_sw,
4167 	    m->m_pkthdr.csum_flags);
4168 }
4169 
4170 static errno_t
4171 bridge_transmit(struct ifnet * ifp, struct mbuf *m)
4172 {
4173 	struct flowadv  adv = { .code = FADV_SUCCESS };
4174 	errno_t         error;
4175 
4176 	error = dlil_output(ifp, 0, m, NULL, NULL, 1, &adv);
4177 	if (error == 0) {
4178 		if (adv.code == FADV_FLOW_CONTROLLED) {
4179 			error = EQFULL;
4180 		} else if (adv.code == FADV_SUSPENDED) {
4181 			error = EQSUSPENDED;
4182 		}
4183 	}
4184 	return error;
4185 }
4186 
4187 static u_int16_t
4188 get_ether_type(struct mbuf * m)
4189 {
4190 	struct ether_header     *eh;
4191 
4192 	eh = mtod(m, struct ether_header *);
4193 	return ntohs(eh->ether_type);
4194 }
4195 
4196 static int
4197 get_last_ip6_hdr(struct mbuf *m, int off, int proto, int * nxtp,
4198     bool *is_fragmented)
4199 {
4200 	int newoff;
4201 
4202 	*is_fragmented = false;
4203 	while (1) {
4204 		newoff = ip6_nexthdr(m, off, proto, nxtp);
4205 		if (newoff < 0) {
4206 			return off;
4207 		} else if (newoff < off) {
4208 			return -1;    /* invalid */
4209 		} else if (newoff == off) {
4210 			return newoff;
4211 		}
4212 		off = newoff;
4213 		proto = *nxtp;
4214 		if (proto == IPPROTO_FRAGMENT) {
4215 			*is_fragmented = true;
4216 		}
4217 	}
4218 }
4219 
4220 static int
4221 bridge_get_ip_proto(struct mbuf * * mp, u_int mac_hlen, bool is_ipv4,
4222     ip_packet_info_t info_p, struct bripstats * stats_p)
4223 {
4224 	int             error = 0;
4225 	u_int           hlen;
4226 	u_int           ip_hlen;
4227 	u_int           ip_pay_len;
4228 	struct mbuf *   m0 = *mp;
4229 	int             off;
4230 	int             opt_len = 0;
4231 	int             proto = 0;
4232 
4233 	bzero(info_p, sizeof(*info_p));
4234 	if (is_ipv4) {
4235 		struct ip *     ip;
4236 		u_int           ip_total_len;
4237 
4238 		/* IPv4 */
4239 		hlen = mac_hlen + sizeof(struct ip);
4240 		if (m0->m_pkthdr.len < hlen) {
4241 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4242 			    "Short IP packet %d < %d",
4243 			    m0->m_pkthdr.len, hlen);
4244 			error = _EBADIP;
4245 			stats_p->bips_bad_ip++;
4246 			goto done;
4247 		}
4248 		if (m0->m_len < hlen) {
4249 			*mp = m0 = m_pullup(m0, hlen);
4250 			if (m0 == NULL) {
4251 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4252 				    "m_pullup failed hlen %d",
4253 				    hlen);
4254 				error = ENOBUFS;
4255 				stats_p->bips_bad_ip++;
4256 				goto done;
4257 			}
4258 		}
4259 		ip = (struct ip *)(void *)(mtod(m0, uint8_t *) + mac_hlen);
4260 		if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
4261 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4262 			    "bad IP version");
4263 			error = _EBADIP;
4264 			stats_p->bips_bad_ip++;
4265 			goto done;
4266 		}
4267 		ip_hlen = IP_VHL_HL(ip->ip_vhl) << 2;
4268 		if (ip_hlen < sizeof(struct ip)) {
4269 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4270 			    "bad IP header length %d < %d",
4271 			    ip_hlen,
4272 			    (int)sizeof(struct ip));
4273 			error = _EBADIP;
4274 			stats_p->bips_bad_ip++;
4275 			goto done;
4276 		}
4277 		hlen = mac_hlen + ip_hlen;
4278 		if (m0->m_len < hlen) {
4279 			*mp = m0 = m_pullup(m0, hlen);
4280 			if (m0 == NULL) {
4281 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4282 				    "m_pullup failed hlen %d",
4283 				    hlen);
4284 				error = ENOBUFS;
4285 				stats_p->bips_bad_ip++;
4286 				goto done;
4287 			}
4288 		}
4289 
4290 		ip_total_len = ntohs(ip->ip_len);
4291 		if (ip_total_len < ip_hlen) {
4292 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4293 			    "IP total len %d < header len %d",
4294 			    ip_total_len, ip_hlen);
4295 			error = _EBADIP;
4296 			stats_p->bips_bad_ip++;
4297 			goto done;
4298 		}
4299 		if (ip_total_len > (m0->m_pkthdr.len - mac_hlen)) {
4300 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4301 			    "invalid IP payload length %d > %d",
4302 			    ip_total_len,
4303 			    (m0->m_pkthdr.len - mac_hlen));
4304 			error = _EBADIP;
4305 			stats_p->bips_bad_ip++;
4306 			goto done;
4307 		}
4308 		ip_pay_len = ip_total_len - ip_hlen;
4309 		info_p->ip_proto = ip->ip_p;
4310 		info_p->ip_hdr.ip = ip;
4311 #define FRAG_BITS       (IP_OFFMASK | IP_MF)
4312 		if ((ntohs(ip->ip_off) & FRAG_BITS) != 0) {
4313 			info_p->ip_is_fragmented = true;
4314 		}
4315 		stats_p->bips_ip++;
4316 	} else {
4317 		struct ip6_hdr *ip6;
4318 
4319 		/* IPv6 */
4320 		hlen = mac_hlen + sizeof(struct ip6_hdr);
4321 		if (m0->m_pkthdr.len < hlen) {
4322 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4323 			    "short IPv6 packet %d < %d",
4324 			    m0->m_pkthdr.len, hlen);
4325 			error = _EBADIPV6;
4326 			stats_p->bips_bad_ip6++;
4327 			goto done;
4328 		}
4329 		if (m0->m_len < hlen) {
4330 			*mp = m0 = m_pullup(m0, hlen);
4331 			if (m0 == NULL) {
4332 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4333 				    "m_pullup failed hlen %d",
4334 				    hlen);
4335 				error = ENOBUFS;
4336 				stats_p->bips_bad_ip6++;
4337 				goto done;
4338 			}
4339 		}
4340 		ip6 = (struct ip6_hdr *)(mtod(m0, uint8_t *) + mac_hlen);
4341 		if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
4342 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4343 			    "bad IPv6 version");
4344 			error = _EBADIPV6;
4345 			stats_p->bips_bad_ip6++;
4346 			goto done;
4347 		}
4348 		off = get_last_ip6_hdr(m0, mac_hlen, IPPROTO_IPV6, &proto,
4349 		    &info_p->ip_is_fragmented);
4350 		if (off < 0 || m0->m_pkthdr.len < off) {
4351 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4352 			    "ip6_lasthdr() returned %d",
4353 			    off);
4354 			error = _EBADIPV6;
4355 			stats_p->bips_bad_ip6++;
4356 			goto done;
4357 		}
4358 		ip_hlen = sizeof(*ip6);
4359 		opt_len = off - mac_hlen - ip_hlen;
4360 		if (opt_len < 0) {
4361 			error = _EBADIPV6;
4362 			stats_p->bips_bad_ip6++;
4363 			goto done;
4364 		}
4365 		info_p->ip_proto = proto;
4366 		info_p->ip_hdr.ip6 = ip6;
4367 		ip_pay_len = ntohs(ip6->ip6_plen);
4368 		if (ip_pay_len > (m0->m_pkthdr.len - mac_hlen - ip_hlen)) {
4369 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4370 			    "invalid IPv6 payload length %d > %d",
4371 			    ip_pay_len,
4372 			    (m0->m_pkthdr.len - mac_hlen - ip_hlen));
4373 			error = _EBADIPV6;
4374 			stats_p->bips_bad_ip6++;
4375 			goto done;
4376 		}
4377 		stats_p->bips_ip6++;
4378 	}
4379 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4380 	    "IPv%c proto %d ip %u pay %u opt %u pkt %u%s",
4381 	    is_ipv4 ? '4' : '6',
4382 	    proto, ip_hlen, ip_pay_len, opt_len,
4383 	    m0->m_pkthdr.len, info_p->ip_is_fragmented ? " frag" : "");
4384 	info_p->ip_hlen = ip_hlen;
4385 	info_p->ip_pay_len = ip_pay_len;
4386 	info_p->ip_opt_len = opt_len;
4387 
4388 done:
4389 	return error;
4390 }
4391 
4392 static int
4393 bridge_get_tcp_header(struct mbuf * * mp, u_int mac_hlen, bool is_ipv4,
4394     ip_packet_info_t info_p, struct bripstats * stats_p)
4395 {
4396 	int             error;
4397 	u_int           hlen;
4398 
4399 	error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, info_p, stats_p);
4400 	if (error != 0) {
4401 		goto done;
4402 	}
4403 	if (info_p->ip_proto != IPPROTO_TCP) {
4404 		/* not a TCP frame, not an error, just a bad guess */
4405 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4406 		    "non-TCP (%d) IPv%c frame %d bytes",
4407 		    info_p->ip_proto, is_ipv4 ? '4' : '6',
4408 		    (*mp)->m_pkthdr.len);
4409 		goto done;
4410 	}
4411 	if (info_p->ip_is_fragmented) {
4412 		/* both TSO and IP fragmentation don't make sense */
4413 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4414 		    "fragmented TSO packet?");
4415 		stats_p->bips_bad_tcp++;
4416 		error = _EBADTCP;
4417 		goto done;
4418 	}
4419 	hlen = mac_hlen + info_p->ip_hlen + sizeof(struct tcphdr) +
4420 	    info_p->ip_opt_len;
4421 	if ((*mp)->m_len < hlen) {
4422 		*mp = m_pullup(*mp, hlen);
4423 		if (*mp == NULL) {
4424 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4425 			    "m_pullup %d failed",
4426 			    hlen);
4427 			stats_p->bips_bad_tcp++;
4428 			error = _EBADTCP;
4429 			goto done;
4430 		}
4431 	}
4432 	info_p->ip_proto_hdr = ((caddr_t)info_p->ip_hdr.ptr) +
4433 	    info_p->ip_hlen + info_p->ip_opt_len;
4434 done:
4435 	return error;
4436 }
4437 
4438 static inline void
4439 proto_csum_stats_increment(uint8_t proto, struct brcsumstats * stats_p)
4440 {
4441 	if (proto == IPPROTO_TCP) {
4442 		stats_p->brcs_tcp_checksum++;
4443 	} else {
4444 		stats_p->brcs_udp_checksum++;
4445 	}
4446 	return;
4447 }
4448 
4449 static errno_t
4450 bridge_verify_checksum(struct mbuf * * mp, struct ifbrmstats *stats_p)
4451 {
4452 	struct brcsumstats *csum_stats_p;
4453 	errno_t         error = 0;
4454 	u_int16_t       ether_type;
4455 	ip_packet_info  info;
4456 	bool            is_ipv4;
4457 	struct mbuf *   m;
4458 	u_int           mac_hlen = sizeof(struct ether_header);
4459 	uint16_t        sum;
4460 	bool            valid;
4461 
4462 	ether_type = get_ether_type(*mp);
4463 	switch (ether_type) {
4464 	case ETHERTYPE_IP:
4465 		is_ipv4 = true;
4466 		break;
4467 	case ETHERTYPE_IPV6:
4468 		is_ipv4 = false;
4469 		break;
4470 	default:
4471 		goto done;
4472 	}
4473 	error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, &info,
4474 	    &stats_p->brms_out_ip);
4475 	if (error != 0) {
4476 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4477 		    "bridge_get_ip_proto failed %d",
4478 		    error);
4479 		goto done;
4480 	}
4481 	m = *mp;
4482 	if (is_ipv4) {
4483 		if ((m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) != 0) {
4484 			/* hardware offloaded IP header checksum */
4485 			valid = (m->m_pkthdr.csum_flags & CSUM_IP_VALID) != 0;
4486 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4487 			    "IP checksum HW %svalid",
4488 			    valid ? "" : "in");
4489 			if (!valid) {
4490 				stats_p->brms_out_cksum_bad_hw.brcs_ip_checksum++;
4491 				error = _EBADIPCHECKSUM;
4492 				goto done;
4493 			}
4494 			stats_p->brms_out_cksum_good_hw.brcs_ip_checksum++;
4495 		} else {
4496 			/* verify */
4497 			sum = inet_cksum(m, 0, mac_hlen, info.ip_hlen);
4498 			valid = (sum == 0);
4499 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4500 			    "IP checksum SW %svalid",
4501 			    valid ? "" : "in");
4502 			if (!valid) {
4503 				stats_p->brms_out_cksum_bad.brcs_ip_checksum++;
4504 				error = _EBADIPCHECKSUM;
4505 				goto done;
4506 			}
4507 			stats_p->brms_out_cksum_good.brcs_ip_checksum++;
4508 		}
4509 	}
4510 	if (info.ip_is_fragmented) {
4511 		/* can't verify checksum on fragmented packets */
4512 		goto done;
4513 	}
4514 	switch (info.ip_proto) {
4515 	case IPPROTO_TCP:
4516 		stats_p->brms_out_ip.bips_tcp++;
4517 		break;
4518 	case IPPROTO_UDP:
4519 		stats_p->brms_out_ip.bips_udp++;
4520 		break;
4521 	default:
4522 		goto done;
4523 	}
4524 	/* check for hardware offloaded UDP/TCP checksum */
4525 #define HW_CSUM         (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)
4526 	if ((m->m_pkthdr.csum_flags & HW_CSUM) == HW_CSUM) {
4527 		/* checksum verified by hardware */
4528 		valid = (m->m_pkthdr.csum_rx_val == 0xffff);
4529 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4530 		    "IPv%c %s checksum HW 0x%x %svalid",
4531 		    is_ipv4 ? '4' : '6',
4532 		    (info.ip_proto == IPPROTO_TCP)
4533 		    ? "TCP" : "UDP",
4534 		    m->m_pkthdr.csum_data,
4535 		    valid ? "" : "in" );
4536 		if (!valid) {
4537 			/* bad checksum */
4538 			csum_stats_p = &stats_p->brms_out_cksum_bad_hw;
4539 			error = (info.ip_proto == IPPROTO_TCP) ? _EBADTCPCHECKSUM
4540 			    : _EBADTCPCHECKSUM;
4541 		} else {
4542 			/* good checksum */
4543 			csum_stats_p = &stats_p->brms_out_cksum_good_hw;
4544 		}
4545 		proto_csum_stats_increment(info.ip_proto, csum_stats_p);
4546 		goto done;
4547 	}
4548 	m->m_data += mac_hlen;
4549 	m->m_len -= mac_hlen;
4550 	m->m_pkthdr.len -= mac_hlen;
4551 	if (is_ipv4) {
4552 		sum = inet_cksum(m, info.ip_proto,
4553 		    info.ip_hlen,
4554 		    info.ip_pay_len);
4555 	} else {
4556 		sum = inet6_cksum(m, info.ip_proto,
4557 		    info.ip_hlen + info.ip_opt_len,
4558 		    info.ip_pay_len - info.ip_opt_len);
4559 	}
4560 	valid = (sum == 0);
4561 	if (valid) {
4562 		csum_stats_p = &stats_p->brms_out_cksum_good;
4563 	} else {
4564 		csum_stats_p = &stats_p->brms_out_cksum_bad;
4565 		error = (info.ip_proto == IPPROTO_TCP)
4566 		    ? _EBADTCPCHECKSUM : _EBADUDPCHECKSUM;
4567 	}
4568 	proto_csum_stats_increment(info.ip_proto, csum_stats_p);
4569 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4570 	    "IPv%c %s checksum SW %svalid (0x%x) hlen %d paylen %d",
4571 	    is_ipv4 ? '4' : '6',
4572 	    (info.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
4573 	    valid ? "" : "in",
4574 	    sum, info.ip_hlen, info.ip_pay_len);
4575 	m->m_data -= mac_hlen;
4576 	m->m_len += mac_hlen;
4577 	m->m_pkthdr.len += mac_hlen;
4578 done:
4579 	return error;
4580 }
4581 
4582 static errno_t
4583 bridge_offload_checksum(struct mbuf * * mp, struct ifbrmstats * stats_p)
4584 {
4585 	uint16_t *      csum_p;
4586 	errno_t         error = 0;
4587 	u_int16_t       ether_type;
4588 	u_int           hlen;
4589 	ip_packet_info  info;
4590 	bool            is_ipv4;
4591 	struct mbuf *   m0 = *mp;
4592 	u_int           mac_hlen = sizeof(struct ether_header);
4593 	u_int           pkt_hdr_len;
4594 	struct tcphdr * tcp;
4595 	u_int           tcp_hlen;
4596 	struct udphdr * udp;
4597 
4598 	ether_type = get_ether_type(m0);
4599 	switch (ether_type) {
4600 	case ETHERTYPE_IP:
4601 		is_ipv4 = true;
4602 		break;
4603 	case ETHERTYPE_IPV6:
4604 		is_ipv4 = false;
4605 		break;
4606 	default:
4607 		goto done;
4608 	}
4609 	error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, &info,
4610 	    &stats_p->brms_in_ip);
4611 	if (error != 0) {
4612 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4613 		    "bridge_get_ip_proto failed %d",
4614 		    error);
4615 		goto done;
4616 	}
4617 	if (is_ipv4) {
4618 		/* compute IP header checksum */
4619 		info.ip_hdr.ip->ip_sum = 0;
4620 		info.ip_hdr.ip->ip_sum = inet_cksum(m0, 0, mac_hlen,
4621 		    info.ip_hlen);
4622 		stats_p->brms_in_computed_cksum.brcs_ip_checksum++;
4623 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4624 		    "IPv4 checksum 0x%x",
4625 		    ntohs(info.ip_hdr.ip->ip_sum));
4626 	}
4627 	if (info.ip_is_fragmented) {
4628 		/* can't compute checksum on fragmented packets */
4629 		goto done;
4630 	}
4631 	pkt_hdr_len = m0->m_pkthdr.len;
4632 	switch (info.ip_proto) {
4633 	case IPPROTO_TCP:
4634 		hlen = mac_hlen + info.ip_hlen + info.ip_opt_len
4635 		    + sizeof(struct tcphdr);
4636 		if (m0->m_len < hlen) {
4637 			*mp = m0 = m_pullup(m0, hlen);
4638 			if (m0 == NULL) {
4639 				stats_p->brms_in_ip.bips_bad_tcp++;
4640 				error = _EBADTCP;
4641 				goto done;
4642 			}
4643 		}
4644 		tcp = (struct tcphdr *)(void *)
4645 		    ((caddr_t)info.ip_hdr.ptr + info.ip_hlen
4646 		    + info.ip_opt_len);
4647 		tcp_hlen = tcp->th_off << 2;
4648 		hlen = mac_hlen + info.ip_hlen + info.ip_opt_len + tcp_hlen;
4649 		if (hlen > pkt_hdr_len) {
4650 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4651 			    "bad tcp header length %u",
4652 			    tcp_hlen);
4653 			stats_p->brms_in_ip.bips_bad_tcp++;
4654 			error = _EBADTCP;
4655 			goto done;
4656 		}
4657 		csum_p = &tcp->th_sum;
4658 		stats_p->brms_in_ip.bips_tcp++;
4659 		break;
4660 	case IPPROTO_UDP:
4661 		hlen = mac_hlen + info.ip_hlen + info.ip_opt_len + sizeof(*udp);
4662 		if (m0->m_len < hlen) {
4663 			*mp = m0 = m_pullup(m0, hlen);
4664 			if (m0 == NULL) {
4665 				stats_p->brms_in_ip.bips_bad_udp++;
4666 				error = ENOBUFS;
4667 				goto done;
4668 			}
4669 		}
4670 		udp = (struct udphdr *)(void *)
4671 		    ((caddr_t)info.ip_hdr.ptr + info.ip_hlen
4672 		    + info.ip_opt_len);
4673 		csum_p = &udp->uh_sum;
4674 		stats_p->brms_in_ip.bips_udp++;
4675 		break;
4676 	default:
4677 		/* not TCP or UDP */
4678 		goto done;
4679 	}
4680 	*csum_p = 0;
4681 	m0->m_data += mac_hlen;
4682 	m0->m_len -= mac_hlen;
4683 	m0->m_pkthdr.len -= mac_hlen;
4684 	if (is_ipv4) {
4685 		*csum_p = inet_cksum(m0, info.ip_proto, info.ip_hlen,
4686 		    info.ip_pay_len);
4687 	} else {
4688 		*csum_p = inet6_cksum(m0, info.ip_proto,
4689 		    info.ip_hlen + info.ip_opt_len,
4690 		    info.ip_pay_len - info.ip_opt_len);
4691 	}
4692 	if (info.ip_proto == IPPROTO_UDP && *csum_p == 0) {
4693 		/* RFC 1122 4.1.3.4 */
4694 		*csum_p = 0xffff;
4695 	}
4696 	m0->m_data -= mac_hlen;
4697 	m0->m_len += mac_hlen;
4698 	m0->m_pkthdr.len += mac_hlen;
4699 	proto_csum_stats_increment(info.ip_proto,
4700 	    &stats_p->brms_in_computed_cksum);
4701 
4702 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4703 	    "IPv%c %s set checksum 0x%x",
4704 	    is_ipv4 ? '4' : '6',
4705 	    (info.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
4706 	    ntohs(*csum_p));
4707 done:
4708 	return error;
4709 }
4710 
4711 static errno_t
4712 bridge_send(struct ifnet *src_ifp,
4713     struct ifnet *dst_ifp, struct mbuf *m, ChecksumOperation cksum_op)
4714 {
4715 	switch (cksum_op) {
4716 	case CHECKSUM_OPERATION_CLEAR_OFFLOAD:
4717 		m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
4718 		break;
4719 	case CHECKSUM_OPERATION_FINALIZE:
4720 		/* the checksum might not be correct, finalize now */
4721 		bridge_finalize_cksum(dst_ifp, m);
4722 		break;
4723 	case CHECKSUM_OPERATION_COMPUTE:
4724 		bridge_compute_cksum(src_ifp, dst_ifp, m);
4725 		break;
4726 	default:
4727 		break;
4728 	}
4729 #if HAS_IF_CAP
4730 	/*
4731 	 * If underlying interface can not do VLAN tag insertion itself
4732 	 * then attach a packet tag that holds it.
4733 	 */
4734 	if ((m->m_flags & M_VLANTAG) &&
4735 	    (dst_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) {
4736 		m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
4737 		if (m == NULL) {
4738 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4739 			    "%s: unable to prepend VLAN header",
4740 			    dst_ifp->if_xname);
4741 			(void) ifnet_stat_increment_out(dst_ifp,
4742 			    0, 0, 1);
4743 			return 0;
4744 		}
4745 		m->m_flags &= ~M_VLANTAG;
4746 	}
4747 #endif /* HAS_IF_CAP */
4748 	return bridge_transmit(dst_ifp, m);
4749 }
4750 
4751 static errno_t
4752 bridge_send_tso(struct ifnet *dst_ifp, struct mbuf *m, bool is_ipv4)
4753 {
4754 	errno_t                 error;
4755 	u_int                   mac_hlen;
4756 
4757 	mac_hlen = sizeof(struct ether_header);
4758 
4759 #if HAS_IF_CAP
4760 	/*
4761 	 * If underlying interface can not do VLAN tag insertion itself
4762 	 * then attach a packet tag that holds it.
4763 	 */
4764 	if ((m->m_flags & M_VLANTAG) &&
4765 	    (dst_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) {
4766 		m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
4767 		if (m == NULL) {
4768 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4769 			    "%s: unable to prepend VLAN header",
4770 			    dst_ifp->if_xname);
4771 			(void) ifnet_stat_increment_out(dst_ifp,
4772 			    0, 0, 1);
4773 			error = ENOBUFS;
4774 			goto done;
4775 		}
4776 		m->m_flags &= ~M_VLANTAG;
4777 		mac_hlen += ETHER_VLAN_ENCAP_LEN;
4778 	}
4779 #endif /* HAS_IF_CAP */
4780 	error = gso_tcp(dst_ifp, &m, mac_hlen, is_ipv4, TRUE);
4781 	return error;
4782 }
4783 
4784 /*
4785  * tso_hwassist:
4786  * - determine whether the destination interface supports TSO offload
4787  * - if the packet is already marked for offload and the hardware supports
4788  *   it, just allow the packet to continue on
4789  * - if not, parse the packet headers to verify that this is a large TCP
4790  *   packet requiring segmentation; if the hardware doesn't support it
4791  *   set need_sw_tso; otherwise, mark the packet for TSO offload
4792  */
4793 static int
4794 tso_hwassist(struct mbuf **mp, bool is_ipv4, struct ifnet * ifp, u_int mac_hlen,
4795     bool * need_sw_tso, bool * supports_cksum)
4796 {
4797 	int             error = 0;
4798 	u_int32_t       if_csum;
4799 	u_int32_t       if_tso;
4800 	u_int32_t       mbuf_tso;
4801 
4802 	if (is_ipv4) {
4803 		/*
4804 		 * Enable both TCP and IP offload if the hardware supports it.
4805 		 * If the hardware doesn't support TCP offload, *supports_cksum
4806 		 * will be false so we won't set either offload.
4807 		 */
4808 		if_csum = ifp->if_hwassist & (CSUM_TCP | CSUM_IP);
4809 		*supports_cksum = (if_csum & CSUM_TCP) != 0;
4810 		if_tso = IFNET_TSO_IPV4;
4811 		mbuf_tso = CSUM_TSO_IPV4;
4812 	} else {
4813 		*supports_cksum = (ifp->if_hwassist & CSUM_TCPIPV6) != 0;
4814 		if_csum = CSUM_TCPIPV6;
4815 		if_tso = IFNET_TSO_IPV6;
4816 		mbuf_tso = CSUM_TSO_IPV6;
4817 	}
4818 	*need_sw_tso = false;
4819 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4820 	    "%s: does%s support checksum 0x%x if_csum 0x%x",
4821 	    ifp->if_xname, *supports_cksum ? "" : " not",
4822 	    ifp->if_hwassist, if_csum);
4823 	if ((ifp->if_hwassist & if_tso) != 0 &&
4824 	    ((*mp)->m_pkthdr.csum_flags & mbuf_tso) != 0) {
4825 		/* hardware TSO, mbuf already marked */
4826 	} else {
4827 		/* verify that this is a large TCP frame */
4828 		uint32_t                csum_flags;
4829 		ip_packet_info          info;
4830 		u_int                   mss;
4831 		struct bripstats        stats;
4832 		struct tcphdr *         tcp;
4833 
4834 		error = bridge_get_tcp_header(mp, mac_hlen, is_ipv4,
4835 		    &info, &stats);
4836 		if (error != 0) {
4837 			/* bad packet */
4838 			goto done;
4839 		}
4840 		if ((info.ip_hlen + info.ip_pay_len + info.ip_opt_len) <=
4841 		    ifp->if_mtu) {
4842 			/* not actually a large packet */
4843 			goto done;
4844 		}
4845 		if (info.ip_proto_hdr == NULL) {
4846 			/* not a TCP packet */
4847 			goto done;
4848 		}
4849 		if ((ifp->if_hwassist & if_tso) == 0) {
4850 			/* hardware does not support TSO, enable sw tso */
4851 			*need_sw_tso = if_bridge_segmentation != 0;
4852 			goto done;
4853 		}
4854 		/* use hardware TSO */
4855 		(*mp)->m_pkthdr.pkt_proto = IPPROTO_TCP;
4856 		tcp = (struct tcphdr *)info.ip_proto_hdr;
4857 		mss = ifp->if_mtu - info.ip_hlen - info.ip_opt_len
4858 		    - (tcp->th_off << 2);
4859 		csum_flags = mbuf_tso;
4860 		if (*supports_cksum) {
4861 			csum_flags |= if_csum;
4862 		}
4863 		(*mp)->m_pkthdr.tso_segsz = mss;
4864 		(*mp)->m_pkthdr.csum_flags |= csum_flags;
4865 		(*mp)->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
4866 	}
4867 done:
4868 	return error;
4869 }
4870 
4871 /*
4872  * bridge_enqueue:
4873  *
4874  *	Enqueue a packet on a bridge member interface.
4875  *
4876  */
4877 static errno_t
4878 bridge_enqueue(ifnet_t bridge_ifp, struct ifnet *src_ifp,
4879     struct ifnet *dst_ifp, struct mbuf *m, ChecksumOperation cksum_op)
4880 {
4881 	errno_t         error = 0;
4882 	int             len;
4883 
4884 	VERIFY(dst_ifp != NULL);
4885 
4886 	/*
4887 	 * We may be sending a fragment so traverse the mbuf
4888 	 *
4889 	 * NOTE: bridge_fragment() is called only when PFIL_HOOKS is enabled.
4890 	 */
4891 	for (struct mbuf *next_m = NULL; m != NULL; m = next_m) {
4892 		bool            need_sw_tso = false;
4893 		bool            is_large_pkt;
4894 		errno_t         _error = 0;
4895 		u_int16_t       ether_type = 0;
4896 
4897 		len = m->m_pkthdr.len;
4898 		m->m_flags |= M_PROTO1; /* set to avoid loops */
4899 		next_m = m->m_nextpkt;
4900 		m->m_nextpkt = NULL;
4901 		/*
4902 		 * Need to segment the packet if it is a large frame
4903 		 * and the destination interface does not support TSO.
4904 		 *
4905 		 * Note that with trailers, it's possible for a packet to
4906 		 * be large but not actually require segmentation.
4907 		 */
4908 		is_large_pkt = (len > (bridge_ifp->if_mtu + ETHER_HDR_LEN));
4909 		if (is_large_pkt) {
4910 			bool    hw_supports_cksum = false;
4911 
4912 			ether_type = get_ether_type(m);
4913 			switch (ether_type) {
4914 			case ETHERTYPE_IP:
4915 			case ETHERTYPE_IPV6:
4916 				_error = tso_hwassist(&m,
4917 				    (ether_type == ETHERTYPE_IP),
4918 				    dst_ifp, sizeof(struct ether_header),
4919 				    &need_sw_tso, &hw_supports_cksum);
4920 				if (_error == 0 && hw_supports_cksum) {
4921 					cksum_op = CHECKSUM_OPERATION_NONE;
4922 				}
4923 				break;
4924 			default:
4925 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4926 				    "large non IP packet");
4927 				break;
4928 			}
4929 		}
4930 		if (_error != 0) {
4931 			if (m != NULL) {
4932 				m_freem(m);
4933 			}
4934 		} else if (need_sw_tso) {
4935 			_error = bridge_send_tso(dst_ifp, m,
4936 			    (ether_type == ETHERTYPE_IP));
4937 		} else {
4938 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4939 			    "%s bridge_send(%s) len %d op %d",
4940 			    bridge_ifp->if_xname,
4941 			    dst_ifp->if_xname,
4942 			    len, cksum_op);
4943 			_error = bridge_send(src_ifp, dst_ifp, m, cksum_op);
4944 		}
4945 
4946 		/* Preserve first error value */
4947 		if (error == 0 && _error != 0) {
4948 			error = _error;
4949 		}
4950 		if (_error == 0) {
4951 			(void) ifnet_stat_increment_out(bridge_ifp, 1, len, 0);
4952 		} else {
4953 			(void) ifnet_stat_increment_out(bridge_ifp, 0, 0, 1);
4954 		}
4955 	}
4956 
4957 	return error;
4958 }
4959 
4960 #if HAS_BRIDGE_DUMMYNET
4961 /*
4962  * bridge_dummynet:
4963  *
4964  *	Receive a queued packet from dummynet and pass it on to the output
4965  *	interface.
4966  *
4967  *	The mbuf has the Ethernet header already attached.
4968  */
4969 static void
4970 bridge_dummynet(struct mbuf *m, struct ifnet *ifp)
4971 {
4972 	struct bridge_softc *sc;
4973 
4974 	sc = ifp->if_bridge;
4975 
4976 	/*
4977 	 * The packet didn't originate from a member interface. This should only
4978 	 * ever happen if a member interface is removed while packets are
4979 	 * queued for it.
4980 	 */
4981 	if (sc == NULL) {
4982 		m_freem(m);
4983 		return;
4984 	}
4985 
4986 	if (PFIL_HOOKED(&inet_pfil_hook) || PFIL_HOOKED_INET6) {
4987 		if (bridge_pfil(&m, sc->sc_ifp, ifp, PFIL_OUT) != 0) {
4988 			return;
4989 		}
4990 		if (m == NULL) {
4991 			return;
4992 		}
4993 	}
4994 	(void) bridge_enqueue(sc->sc_ifp, NULL, ifp, m, CHECKSUM_OPERATION_NONE);
4995 }
4996 
4997 #endif /* HAS_BRIDGE_DUMMYNET */
4998 
4999 /*
5000  * bridge_member_output:
5001  *
5002  *	Send output from a bridge member interface.  This
5003  *	performs the bridging function for locally originated
5004  *	packets.
5005  *
5006  *	The mbuf has the Ethernet header already attached.
5007  */
5008 static errno_t
5009 bridge_member_output(struct bridge_softc *sc, ifnet_t ifp, mbuf_t *data)
5010 {
5011 	ifnet_t bridge_ifp;
5012 	struct ether_header *eh;
5013 	struct ifnet *dst_if;
5014 	uint16_t vlan;
5015 	struct bridge_iflist *mac_nat_bif;
5016 	ifnet_t mac_nat_ifp;
5017 	mbuf_t m = *data;
5018 
5019 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
5020 	    "ifp %s", ifp->if_xname);
5021 	if (m->m_len < ETHER_HDR_LEN) {
5022 		m = m_pullup(m, ETHER_HDR_LEN);
5023 		if (m == NULL) {
5024 			*data = NULL;
5025 			return EJUSTRETURN;
5026 		}
5027 	}
5028 
5029 	eh = mtod(m, struct ether_header *);
5030 	vlan = VLANTAGOF(m);
5031 
5032 	BRIDGE_LOCK(sc);
5033 	mac_nat_bif = sc->sc_mac_nat_bif;
5034 	mac_nat_ifp = (mac_nat_bif != NULL) ? mac_nat_bif->bif_ifp : NULL;
5035 	if (mac_nat_ifp == ifp) {
5036 		/* record the IP address used by the MAC NAT interface */
5037 		(void)bridge_mac_nat_output(sc, mac_nat_bif, data, NULL);
5038 		m = *data;
5039 		if (m == NULL) {
5040 			/* packet was deallocated */
5041 			BRIDGE_UNLOCK(sc);
5042 			return EJUSTRETURN;
5043 		}
5044 	}
5045 	bridge_ifp = sc->sc_ifp;
5046 
5047 	/*
5048 	 * APPLE MODIFICATION
5049 	 * If the packet is an 802.1X ethertype, then only send on the
5050 	 * original output interface.
5051 	 */
5052 	if (eh->ether_type == htons(ETHERTYPE_PAE)) {
5053 		dst_if = ifp;
5054 		goto sendunicast;
5055 	}
5056 
5057 	/*
5058 	 * If bridge is down, but the original output interface is up,
5059 	 * go ahead and send out that interface.  Otherwise, the packet
5060 	 * is dropped below.
5061 	 */
5062 	if ((bridge_ifp->if_flags & IFF_RUNNING) == 0) {
5063 		dst_if = ifp;
5064 		goto sendunicast;
5065 	}
5066 
5067 	/*
5068 	 * If the packet is a multicast, or we don't know a better way to
5069 	 * get there, send to all interfaces.
5070 	 */
5071 	if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
5072 		dst_if = NULL;
5073 	} else {
5074 		dst_if = bridge_rtlookup(sc, eh->ether_dhost, vlan);
5075 	}
5076 	if (dst_if == NULL) {
5077 		struct bridge_iflist *bif;
5078 		struct mbuf *mc;
5079 		int used = 0;
5080 		errno_t error;
5081 
5082 
5083 		bridge_span(sc, m);
5084 
5085 		BRIDGE_LOCK2REF(sc, error);
5086 		if (error != 0) {
5087 			m_freem(m);
5088 			return EJUSTRETURN;
5089 		}
5090 
5091 		TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
5092 			/* skip interface with inactive link status */
5093 			if ((bif->bif_flags & BIFF_MEDIA_ACTIVE) == 0) {
5094 				continue;
5095 			}
5096 			dst_if = bif->bif_ifp;
5097 
5098 #if 0
5099 			if (dst_if->if_type == IFT_GIF) {
5100 				continue;
5101 			}
5102 #endif
5103 			if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5104 				continue;
5105 			}
5106 			if (dst_if != ifp) {
5107 				/*
5108 				 * If this is not the original output interface,
5109 				 * and the interface is participating in spanning
5110 				 * tree, make sure the port is in a state that
5111 				 * allows forwarding.
5112 				 */
5113 				if ((bif->bif_ifflags & IFBIF_STP) &&
5114 				    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5115 					continue;
5116 				}
5117 				/*
5118 				 * If this is not the original output interface,
5119 				 * and the destination is the MAC NAT interface,
5120 				 * drop the packet. The packet can't be sent
5121 				 * if the source MAC is incorrect.
5122 				 */
5123 				if (dst_if == mac_nat_ifp) {
5124 					continue;
5125 				}
5126 			}
5127 			if (TAILQ_NEXT(bif, bif_next) == NULL) {
5128 				used = 1;
5129 				mc = m;
5130 			} else {
5131 				mc = m_dup(m, M_DONTWAIT);
5132 				if (mc == NULL) {
5133 					(void) ifnet_stat_increment_out(
5134 						bridge_ifp, 0, 0, 1);
5135 					continue;
5136 				}
5137 			}
5138 			(void) bridge_enqueue(bridge_ifp, ifp, dst_if,
5139 			    mc, CHECKSUM_OPERATION_COMPUTE);
5140 		}
5141 		if (used == 0) {
5142 			m_freem(m);
5143 		}
5144 		BRIDGE_UNREF(sc);
5145 		return EJUSTRETURN;
5146 	}
5147 
5148 sendunicast:
5149 	/*
5150 	 * XXX Spanning tree consideration here?
5151 	 */
5152 
5153 	bridge_span(sc, m);
5154 	if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5155 		m_freem(m);
5156 		BRIDGE_UNLOCK(sc);
5157 		return EJUSTRETURN;
5158 	}
5159 
5160 	BRIDGE_UNLOCK(sc);
5161 	if (dst_if == ifp) {
5162 		/* just let the packet continue on its way */
5163 		return 0;
5164 	}
5165 	if (dst_if != mac_nat_ifp) {
5166 		(void) bridge_enqueue(bridge_ifp, ifp, dst_if, m,
5167 		    CHECKSUM_OPERATION_COMPUTE);
5168 	} else {
5169 		/*
5170 		 * This is not the original output interface
5171 		 * and the destination is the MAC NAT interface.
5172 		 * Drop the packet because the packet can't be sent
5173 		 * if the source MAC is incorrect.
5174 		 */
5175 		m_freem(m);
5176 	}
5177 	return EJUSTRETURN;
5178 }
5179 
5180 /*
5181  * Output callback.
5182  *
5183  * This routine is called externally from above only when if_bridge_txstart
5184  * is disabled; otherwise it is called internally by bridge_start().
5185  */
5186 static int
5187 bridge_output(struct ifnet *ifp, struct mbuf *m)
5188 {
5189 	struct bridge_softc *sc = ifnet_softc(ifp);
5190 	struct ether_header *eh;
5191 	struct ifnet *dst_if = NULL;
5192 	int error = 0;
5193 
5194 	eh = mtod(m, struct ether_header *);
5195 
5196 	BRIDGE_LOCK(sc);
5197 
5198 	if (!(m->m_flags & (M_BCAST | M_MCAST))) {
5199 		dst_if = bridge_rtlookup(sc, eh->ether_dhost, 0);
5200 	}
5201 
5202 	(void) ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0);
5203 
5204 #if NBPFILTER > 0
5205 	if (sc->sc_bpf_output) {
5206 		bridge_bpf_output(ifp, m);
5207 	}
5208 #endif
5209 
5210 	if (dst_if == NULL) {
5211 		/* callee will unlock */
5212 		bridge_broadcast(sc, NULL, m, 0);
5213 	} else {
5214 		ifnet_t bridge_ifp;
5215 
5216 		bridge_ifp = sc->sc_ifp;
5217 		BRIDGE_UNLOCK(sc);
5218 
5219 		error = bridge_enqueue(bridge_ifp, NULL, dst_if, m,
5220 		    CHECKSUM_OPERATION_FINALIZE);
5221 	}
5222 
5223 	return error;
5224 }
5225 
5226 static void
5227 bridge_finalize_cksum(struct ifnet *ifp, struct mbuf *m)
5228 {
5229 	struct ether_header *eh = mtod(m, struct ether_header *);
5230 	uint16_t ether_type;
5231 	uint32_t sw_csum, hwcap;
5232 	uint32_t did_sw;
5233 	uint32_t csum_flags;
5234 
5235 	ether_type = ntohs(eh->ether_type);
5236 	switch (ether_type) {
5237 	case ETHERTYPE_IP:
5238 	case ETHERTYPE_IPV6:
5239 		break;
5240 	default:
5241 		return;
5242 	}
5243 
5244 	/* do in software what the hardware cannot */
5245 	hwcap = (ifp->if_hwassist | CSUM_DATA_VALID);
5246 	csum_flags = m->m_pkthdr.csum_flags;
5247 	sw_csum = csum_flags & ~IF_HWASSIST_CSUM_FLAGS(hwcap);
5248 	sw_csum &= IF_HWASSIST_CSUM_MASK;
5249 
5250 	switch (ether_type) {
5251 	case ETHERTYPE_IP:
5252 		if ((hwcap & CSUM_PARTIAL) && !(sw_csum & CSUM_DELAY_DATA) &&
5253 		    (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) {
5254 			if (m->m_pkthdr.csum_flags & CSUM_TCP) {
5255 				uint16_t start =
5256 				    sizeof(*eh) + sizeof(struct ip);
5257 				uint16_t ulpoff =
5258 				    m->m_pkthdr.csum_data & 0xffff;
5259 				m->m_pkthdr.csum_flags |=
5260 				    (CSUM_DATA_VALID | CSUM_PARTIAL);
5261 				m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
5262 				m->m_pkthdr.csum_tx_start = start;
5263 			} else {
5264 				sw_csum |= (CSUM_DELAY_DATA &
5265 				    m->m_pkthdr.csum_flags);
5266 			}
5267 		}
5268 		did_sw = in_finalize_cksum(m, sizeof(*eh), sw_csum);
5269 		break;
5270 
5271 	case ETHERTYPE_IPV6:
5272 		if ((hwcap & CSUM_PARTIAL) &&
5273 		    !(sw_csum & CSUM_DELAY_IPV6_DATA) &&
5274 		    (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA)) {
5275 			if (m->m_pkthdr.csum_flags & CSUM_TCPIPV6) {
5276 				uint16_t start =
5277 				    sizeof(*eh) + sizeof(struct ip6_hdr);
5278 				uint16_t ulpoff =
5279 				    m->m_pkthdr.csum_data & 0xffff;
5280 				m->m_pkthdr.csum_flags |=
5281 				    (CSUM_DATA_VALID | CSUM_PARTIAL);
5282 				m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
5283 				m->m_pkthdr.csum_tx_start = start;
5284 			} else {
5285 				sw_csum |= (CSUM_DELAY_IPV6_DATA &
5286 				    m->m_pkthdr.csum_flags);
5287 			}
5288 		}
5289 		did_sw = in6_finalize_cksum(m, sizeof(*eh), -1, -1, sw_csum);
5290 		break;
5291 	}
5292 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5293 	    "[%s] before 0x%x hwcap 0x%x sw_csum 0x%x did 0x%x after 0x%x",
5294 	    ifp->if_xname, csum_flags, hwcap, sw_csum,
5295 	    did_sw, m->m_pkthdr.csum_flags);
5296 }
5297 
5298 /*
5299  * bridge_start:
5300  *
5301  *	Start output on a bridge.
5302  *
5303  * This routine is invoked by the start worker thread; because we never call
5304  * it directly, there is no need do deploy any serialization mechanism other
5305  * than what's already used by the worker thread, i.e. this is already single
5306  * threaded.
5307  *
5308  * This routine is called only when if_bridge_txstart is enabled.
5309  */
5310 static void
5311 bridge_start(struct ifnet *ifp)
5312 {
5313 	struct mbuf *m;
5314 
5315 	for (;;) {
5316 		if (ifnet_dequeue(ifp, &m) != 0) {
5317 			break;
5318 		}
5319 
5320 		(void) bridge_output(ifp, m);
5321 	}
5322 }
5323 
5324 /*
5325  * bridge_forward:
5326  *
5327  *	The forwarding function of the bridge.
5328  *
5329  *	NOTE: Releases the lock on return.
5330  */
5331 static void
5332 bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif,
5333     struct mbuf *m)
5334 {
5335 	struct bridge_iflist *dbif;
5336 	ifnet_t bridge_ifp;
5337 	struct ifnet *src_if, *dst_if;
5338 	struct ether_header *eh;
5339 	uint16_t vlan;
5340 	uint8_t *dst;
5341 	int error;
5342 	struct mac_nat_record mnr;
5343 	bool translate_mac = FALSE;
5344 	uint32_t sc_filter_flags = 0;
5345 
5346 	BRIDGE_LOCK_ASSERT_HELD(sc);
5347 
5348 	bridge_ifp = sc->sc_ifp;
5349 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
5350 	    "%s m 0x%llx", bridge_ifp->if_xname,
5351 	    (uint64_t)VM_KERNEL_ADDRPERM(m));
5352 
5353 	src_if = m->m_pkthdr.rcvif;
5354 	if (src_if != sbif->bif_ifp) {
5355 		const char *    src_if_name;
5356 
5357 		src_if_name = (src_if != NULL) ? src_if->if_xname : "?";
5358 		BRIDGE_LOG(LOG_NOTICE, 0,
5359 		    "src_if %s != bif_ifp %s",
5360 		    src_if_name, sbif->bif_ifp->if_xname);
5361 		goto drop;
5362 	}
5363 
5364 	(void) ifnet_stat_increment_in(bridge_ifp, 1, m->m_pkthdr.len, 0);
5365 	vlan = VLANTAGOF(m);
5366 
5367 
5368 	if ((sbif->bif_ifflags & IFBIF_STP) &&
5369 	    sbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5370 		goto drop;
5371 	}
5372 
5373 	eh = mtod(m, struct ether_header *);
5374 	dst = eh->ether_dhost;
5375 
5376 	/* If the interface is learning, record the address. */
5377 	if (sbif->bif_ifflags & IFBIF_LEARNING) {
5378 		error = bridge_rtupdate(sc, eh->ether_shost, vlan,
5379 		    sbif, 0, IFBAF_DYNAMIC);
5380 		/*
5381 		 * If the interface has addresses limits then deny any source
5382 		 * that is not in the cache.
5383 		 */
5384 		if (error && sbif->bif_addrmax) {
5385 			goto drop;
5386 		}
5387 	}
5388 
5389 	if ((sbif->bif_ifflags & IFBIF_STP) != 0 &&
5390 	    sbif->bif_stp.bp_state == BSTP_IFSTATE_LEARNING) {
5391 		goto drop;
5392 	}
5393 
5394 	/*
5395 	 * At this point, the port either doesn't participate
5396 	 * in spanning tree or it is in the forwarding state.
5397 	 */
5398 
5399 	/*
5400 	 * If the packet is unicast, destined for someone on
5401 	 * "this" side of the bridge, drop it.
5402 	 */
5403 	if ((m->m_flags & (M_BCAST | M_MCAST)) == 0) {
5404 		/* unicast */
5405 		dst_if = bridge_rtlookup(sc, dst, vlan);
5406 		if (src_if == dst_if) {
5407 			goto drop;
5408 		}
5409 	} else {
5410 		/* broadcast/multicast */
5411 
5412 		/*
5413 		 * Check if its a reserved multicast address, any address
5414 		 * listed in 802.1D section 7.12.6 may not be forwarded by the
5415 		 * bridge.
5416 		 * This is currently 01-80-C2-00-00-00 to 01-80-C2-00-00-0F
5417 		 */
5418 		if (dst[0] == 0x01 && dst[1] == 0x80 &&
5419 		    dst[2] == 0xc2 && dst[3] == 0x00 &&
5420 		    dst[4] == 0x00 && dst[5] <= 0x0f) {
5421 			goto drop;
5422 		}
5423 
5424 
5425 		/* ...forward it to all interfaces. */
5426 		atomic_add_64(&bridge_ifp->if_imcasts, 1);
5427 		dst_if = NULL;
5428 	}
5429 
5430 	/*
5431 	 * If we have a destination interface which is a member of our bridge,
5432 	 * OR this is a unicast packet, push it through the bpf(4) machinery.
5433 	 * For broadcast or multicast packets, don't bother because it will
5434 	 * be reinjected into ether_input. We do this before we pass the packets
5435 	 * through the pfil(9) framework, as it is possible that pfil(9) will
5436 	 * drop the packet, or possibly modify it, making it difficult to debug
5437 	 * firewall issues on the bridge.
5438 	 */
5439 #if NBPFILTER > 0
5440 	if (eh->ether_type == htons(ETHERTYPE_RSN_PREAUTH) ||
5441 	    dst_if != NULL || (m->m_flags & (M_BCAST | M_MCAST)) == 0) {
5442 		m->m_pkthdr.rcvif = bridge_ifp;
5443 		BRIDGE_BPF_MTAP_INPUT(sc, m);
5444 	}
5445 #endif /* NBPFILTER */
5446 
5447 	if (dst_if == NULL) {
5448 		/* bridge_broadcast will unlock */
5449 		bridge_broadcast(sc, sbif, m, 1);
5450 		return;
5451 	}
5452 
5453 	/*
5454 	 * Unicast.
5455 	 */
5456 	/*
5457 	 * At this point, we're dealing with a unicast frame
5458 	 * going to a different interface.
5459 	 */
5460 	if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5461 		goto drop;
5462 	}
5463 
5464 	dbif = bridge_lookup_member_if(sc, dst_if);
5465 	if (dbif == NULL) {
5466 		/* Not a member of the bridge (anymore?) */
5467 		goto drop;
5468 	}
5469 
5470 	/* Private segments can not talk to each other */
5471 	if (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE) {
5472 		goto drop;
5473 	}
5474 
5475 	if ((dbif->bif_ifflags & IFBIF_STP) &&
5476 	    dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5477 		goto drop;
5478 	}
5479 
5480 #if HAS_DHCPRA_MASK
5481 	/* APPLE MODIFICATION <rdar:6985737> */
5482 	if ((dst_if->if_extflags & IFEXTF_DHCPRA_MASK) != 0) {
5483 		m = ip_xdhcpra_output(dst_if, m);
5484 		if (!m) {
5485 			++bridge_ifp.if_xdhcpra;
5486 			BRIDGE_UNLOCK(sc);
5487 			return;
5488 		}
5489 	}
5490 #endif /* HAS_DHCPRA_MASK */
5491 
5492 	if (dbif == sc->sc_mac_nat_bif) {
5493 		/* determine how to translate the packet */
5494 		translate_mac
5495 		        = bridge_mac_nat_output(sc, sbif, &m, &mnr);
5496 		if (m == NULL) {
5497 			/* packet was deallocated */
5498 			BRIDGE_UNLOCK(sc);
5499 			return;
5500 		}
5501 	} else if (bif_has_checksum_offload(dbif) &&
5502 	    !bif_has_checksum_offload(sbif)) {
5503 		/*
5504 		 * If the destination interface has checksum offload enabled,
5505 		 * verify the checksum now, unless the source interface also has
5506 		 * checksum offload enabled. The checksum in that case has
5507 		 * already just been computed and verifying it is unnecessary.
5508 		 */
5509 		error = bridge_verify_checksum(&m, &dbif->bif_stats);
5510 		if (error != 0) {
5511 			BRIDGE_UNLOCK(sc);
5512 			if (m != NULL) {
5513 				m_freem(m);
5514 			}
5515 			return;
5516 		}
5517 	}
5518 
5519 	sc_filter_flags = sc->sc_filter_flags;
5520 
5521 	BRIDGE_UNLOCK(sc);
5522 	if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
5523 		if (bridge_pf(&m, dst_if, sc_filter_flags, FALSE) != 0) {
5524 			return;
5525 		}
5526 		if (m == NULL) {
5527 			return;
5528 		}
5529 	}
5530 
5531 	/* if we need to, translate the MAC address */
5532 	if (translate_mac) {
5533 		bridge_mac_nat_translate(&m, &mnr, IF_LLADDR(dst_if));
5534 	}
5535 	/*
5536 	 * We're forwarding an inbound packet in which the checksum must
5537 	 * already have been computed and if required, verified.
5538 	 */
5539 	if (m != NULL) {
5540 		(void) bridge_enqueue(bridge_ifp, src_if, dst_if, m,
5541 		    CHECKSUM_OPERATION_CLEAR_OFFLOAD);
5542 	}
5543 	return;
5544 
5545 drop:
5546 	BRIDGE_UNLOCK(sc);
5547 	m_freem(m);
5548 }
5549 
5550 static void
5551 inject_input_packet(ifnet_t ifp, mbuf_t m)
5552 {
5553 	mbuf_pkthdr_setrcvif(m, ifp);
5554 	mbuf_pkthdr_setheader(m, mbuf_data(m));
5555 	mbuf_setdata(m, (char *)mbuf_data(m) + ETHER_HDR_LEN,
5556 	    mbuf_len(m) - ETHER_HDR_LEN);
5557 	mbuf_pkthdr_adjustlen(m, -ETHER_HDR_LEN);
5558 	m->m_flags |= M_PROTO1; /* set to avoid loops */
5559 	dlil_input_packet_list(ifp, m);
5560 	return;
5561 }
5562 
5563 static boolean_t
5564 in_addr_is_ours(struct in_addr ip)
5565 {
5566 	struct in_ifaddr *ia;
5567 	boolean_t       ours = FALSE;
5568 
5569 	lck_rw_lock_shared(&in_ifaddr_rwlock);
5570 	TAILQ_FOREACH(ia, INADDR_HASH(ip.s_addr), ia_hash) {
5571 		if (IA_SIN(ia)->sin_addr.s_addr == ip.s_addr) {
5572 			ours = TRUE;
5573 			break;
5574 		}
5575 	}
5576 	lck_rw_done(&in_ifaddr_rwlock);
5577 	return ours;
5578 }
5579 
5580 static boolean_t
5581 in6_addr_is_ours(const struct in6_addr * ip6_p, uint32_t ifscope)
5582 {
5583 	struct in6_ifaddr *ia6;
5584 	boolean_t       ours = FALSE;
5585 
5586 	lck_rw_lock_shared(&in6_ifaddr_rwlock);
5587 	TAILQ_FOREACH(ia6, IN6ADDR_HASH(ip6_p), ia6_hash) {
5588 		if (in6_are_addr_equal_scoped(&ia6->ia_addr.sin6_addr, ip6_p, ia6->ia_addr.sin6_scope_id, ifscope)) {
5589 			ours = TRUE;
5590 			break;
5591 		}
5592 	}
5593 	lck_rw_done(&in6_ifaddr_rwlock);
5594 	return ours;
5595 }
5596 
5597 static void
5598 bridge_interface_input(ifnet_t bridge_ifp, mbuf_t m,
5599     bpf_packet_func bpf_input_func)
5600 {
5601 	size_t                  byte_count;
5602 	struct ether_header     *eh;
5603 	uint16_t                ether_type;
5604 	errno_t                 error;
5605 	boolean_t               is_ipv4;
5606 	int                     len;
5607 	u_int                   mac_hlen;
5608 	int                     pkt_count;
5609 
5610 	/* segment large packets before sending them up */
5611 	if (if_bridge_segmentation == 0) {
5612 		goto done;
5613 	}
5614 	len = m->m_pkthdr.len;
5615 	if (len <= (bridge_ifp->if_mtu + ETHER_HDR_LEN)) {
5616 		goto done;
5617 	}
5618 	eh = mtod(m, struct ether_header *);
5619 	ether_type = ntohs(eh->ether_type);
5620 	switch (ether_type) {
5621 	case ETHERTYPE_IP:
5622 		is_ipv4 = TRUE;
5623 		break;
5624 	case ETHERTYPE_IPV6:
5625 		is_ipv4 = FALSE;
5626 		break;
5627 	default:
5628 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5629 		    "large non IPv4/IPv6 packet");
5630 		goto done;
5631 	}
5632 
5633 	/*
5634 	 * We have a large IPv4/IPv6 TCP packet. Segment it if required.
5635 	 *
5636 	 * If gso_tcp() returns success (0), the packet(s) are
5637 	 * ready to be passed up. If the destination is a local IP address,
5638 	 * the packet will be passed up as a large, single packet.
5639 	 *
5640 	 * If gso_tcp() returns an error, the packet has already
5641 	 * been freed.
5642 	 */
5643 	mac_hlen = sizeof(*eh);
5644 	error = gso_tcp(bridge_ifp, &m, mac_hlen, is_ipv4, FALSE);
5645 	if (error != 0) {
5646 		return;
5647 	}
5648 
5649 done:
5650 	pkt_count = 0;
5651 	byte_count = 0;
5652 	for (mbuf_t scan = m; scan != NULL; scan = scan->m_nextpkt) {
5653 		/* Mark the packet as arriving on the bridge interface */
5654 		mbuf_pkthdr_setrcvif(scan, bridge_ifp);
5655 		mbuf_pkthdr_setheader(scan, mbuf_data(scan));
5656 		if (bpf_input_func != NULL) {
5657 			(*bpf_input_func)(bridge_ifp, scan);
5658 		}
5659 		mbuf_setdata(scan, (char *)mbuf_data(scan) + ETHER_HDR_LEN,
5660 		    mbuf_len(scan) - ETHER_HDR_LEN);
5661 		mbuf_pkthdr_adjustlen(scan, -ETHER_HDR_LEN);
5662 		byte_count += mbuf_pkthdr_len(scan);
5663 		pkt_count++;
5664 	}
5665 	(void)ifnet_stat_increment_in(bridge_ifp, pkt_count, byte_count, 0);
5666 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
5667 	    "%s %d packet(s) %ld bytes",
5668 	    bridge_ifp->if_xname, pkt_count, byte_count);
5669 	dlil_input_packet_list(bridge_ifp, m);
5670 	return;
5671 }
5672 
5673 /*
5674  * bridge_input:
5675  *
5676  *	Filter input from a member interface.  Queue the packet for
5677  *	bridging if it is not for us.
5678  */
5679 errno_t
5680 bridge_input(struct ifnet *ifp, mbuf_t *data)
5681 {
5682 	struct bridge_softc *sc = ifp->if_bridge;
5683 	struct bridge_iflist *bif, *bif2;
5684 	ifnet_t bridge_ifp;
5685 	struct ether_header *eh;
5686 	struct mbuf *mc, *mc2;
5687 	uint16_t vlan;
5688 	errno_t error;
5689 	boolean_t is_broadcast;
5690 	boolean_t is_ip_broadcast = FALSE;
5691 	boolean_t is_ifp_mac = FALSE;
5692 	mbuf_t m = *data;
5693 	uint32_t sc_filter_flags = 0;
5694 
5695 	bridge_ifp = sc->sc_ifp;
5696 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
5697 	    "%s from %s m 0x%llx data 0x%llx",
5698 	    bridge_ifp->if_xname, ifp->if_xname,
5699 	    (uint64_t)VM_KERNEL_ADDRPERM(m),
5700 	    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)));
5701 	if ((sc->sc_ifp->if_flags & IFF_RUNNING) == 0) {
5702 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
5703 		    "%s not running passing along",
5704 		    bridge_ifp->if_xname);
5705 		return 0;
5706 	}
5707 
5708 	vlan = VLANTAGOF(m);
5709 
5710 #ifdef IFF_MONITOR
5711 	/*
5712 	 * Implement support for bridge monitoring. If this flag has been
5713 	 * set on this interface, discard the packet once we push it through
5714 	 * the bpf(4) machinery, but before we do, increment the byte and
5715 	 * packet counters associated with this interface.
5716 	 */
5717 	if ((bridge_ifp->if_flags & IFF_MONITOR) != 0) {
5718 		m->m_pkthdr.rcvif = bridge_ifp;
5719 		BRIDGE_BPF_MTAP_INPUT(sc, m);
5720 		(void) ifnet_stat_increment_in(bridge_ifp, 1, m->m_pkthdr.len, 0);
5721 		m_freem(m);
5722 		return EJUSTRETURN;
5723 	}
5724 #endif /* IFF_MONITOR */
5725 
5726 	/*
5727 	 * Need to clear the promiscuous flags otherwise it will be
5728 	 * dropped by DLIL after processing filters
5729 	 */
5730 	if ((mbuf_flags(m) & MBUF_PROMISC)) {
5731 		mbuf_setflags_mask(m, 0, MBUF_PROMISC);
5732 	}
5733 
5734 	sc_filter_flags = sc->sc_filter_flags;
5735 	if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
5736 		error = bridge_pf(&m, ifp, sc_filter_flags, TRUE);
5737 		if (error != 0) {
5738 			return EJUSTRETURN;
5739 		}
5740 		if (m == NULL) {
5741 			return EJUSTRETURN;
5742 		}
5743 		/*
5744 		 * bridge_pf could have modified the pointer on success in order
5745 		 * to do its processing. Updated data such that we don't use a
5746 		 * stale pointer.
5747 		 */
5748 		*data = m;
5749 	}
5750 
5751 	BRIDGE_LOCK(sc);
5752 	bif = bridge_lookup_member_if(sc, ifp);
5753 	if (bif == NULL) {
5754 		BRIDGE_UNLOCK(sc);
5755 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
5756 		    "%s bridge_lookup_member_if failed",
5757 		    bridge_ifp->if_xname);
5758 		return 0;
5759 	}
5760 	if (bif_has_checksum_offload(bif)) {
5761 		/* need to compute IP/UDP/TCP/checksums */
5762 		error = bridge_offload_checksum(data, &bif->bif_stats);
5763 		if (error != 0) {
5764 			BRIDGE_UNLOCK(sc);
5765 			if (*data != NULL) {
5766 				m_freem(*data);
5767 				*data = NULL;
5768 			}
5769 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
5770 			    "%s(%s) bridge_offload_checksum rdbgffailed %d",
5771 			    bridge_ifp->if_xname,
5772 			    bif->bif_ifp->if_xname, error);
5773 			return EJUSTRETURN;
5774 		}
5775 		m = *data;
5776 	}
5777 
5778 	if (bif->bif_flags & BIFF_HOST_FILTER) {
5779 		error = bridge_host_filter(bif, data);
5780 		if (error != 0) {
5781 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
5782 			    "%s bridge_host_filter failed",
5783 			    bif->bif_ifp->if_xname);
5784 			BRIDGE_UNLOCK(sc);
5785 			return EJUSTRETURN;
5786 		}
5787 		m = *data;
5788 	}
5789 
5790 	is_broadcast = (m->m_flags & (M_BCAST | M_MCAST)) != 0;
5791 	eh = mtod(m, struct ether_header *);
5792 	if (!is_broadcast &&
5793 	    memcmp(eh->ether_dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0) {
5794 		if (sc->sc_mac_nat_bif == bif) {
5795 			/* doing MAC-NAT, check if destination is broadcast */
5796 			is_ip_broadcast = is_broadcast_ip_packet(data);
5797 			if (*data == NULL) {
5798 				BRIDGE_UNLOCK(sc);
5799 				return EJUSTRETURN;
5800 			}
5801 			m = *data;
5802 		}
5803 		if (!is_ip_broadcast) {
5804 			is_ifp_mac = TRUE;
5805 		}
5806 	}
5807 
5808 	bridge_span(sc, m);
5809 
5810 	if (is_broadcast || is_ip_broadcast) {
5811 		if (is_broadcast && (m->m_flags & M_MCAST) != 0) {
5812 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MCAST,
5813 			    " multicast: "
5814 			    "%02x:%02x:%02x:%02x:%02x:%02x",
5815 			    eh->ether_dhost[0], eh->ether_dhost[1],
5816 			    eh->ether_dhost[2], eh->ether_dhost[3],
5817 			    eh->ether_dhost[4], eh->ether_dhost[5]);
5818 		}
5819 		/* Tap off 802.1D packets; they do not get forwarded. */
5820 		if (is_broadcast && memcmp(eh->ether_dhost, bstp_etheraddr,
5821 		    ETHER_ADDR_LEN) == 0) {
5822 #if BRIDGESTP
5823 			m = bstp_input(&bif->bif_stp, ifp, m);
5824 #else /* !BRIDGESTP */
5825 			m_freem(m);
5826 			m = NULL;
5827 #endif /* !BRIDGESTP */
5828 			if (m == NULL) {
5829 				BRIDGE_UNLOCK(sc);
5830 				return EJUSTRETURN;
5831 			}
5832 		}
5833 
5834 		if ((bif->bif_ifflags & IFBIF_STP) &&
5835 		    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5836 			BRIDGE_UNLOCK(sc);
5837 			return 0;
5838 		}
5839 
5840 		/*
5841 		 * Make a deep copy of the packet and enqueue the copy
5842 		 * for bridge processing.
5843 		 */
5844 		mc = m_dup(m, M_DONTWAIT);
5845 		if (mc == NULL) {
5846 			BRIDGE_UNLOCK(sc);
5847 			return 0;
5848 		}
5849 
5850 		/*
5851 		 * Perform the bridge forwarding function with the copy.
5852 		 *
5853 		 * Note that bridge_forward calls BRIDGE_UNLOCK
5854 		 */
5855 		if (is_ip_broadcast) {
5856 			/* make the copy look like it is actually broadcast */
5857 			mc->m_flags |= M_BCAST;
5858 			eh = mtod(mc, struct ether_header *);
5859 			bcopy(etherbroadcastaddr, eh->ether_dhost,
5860 			    ETHER_ADDR_LEN);
5861 		}
5862 		bridge_forward(sc, bif, mc);
5863 
5864 		/*
5865 		 * Reinject the mbuf as arriving on the bridge so we have a
5866 		 * chance at claiming multicast packets. We can not loop back
5867 		 * here from ether_input as a bridge is never a member of a
5868 		 * bridge.
5869 		 */
5870 		VERIFY(bridge_ifp->if_bridge == NULL);
5871 		mc2 = m_dup(m, M_DONTWAIT);
5872 		if (mc2 != NULL) {
5873 			/* Keep the layer3 header aligned */
5874 			int i = min(mc2->m_pkthdr.len, max_protohdr);
5875 			mc2 = m_copyup(mc2, i, ETHER_ALIGN);
5876 		}
5877 		if (mc2 != NULL) {
5878 			/* mark packet as arriving on the bridge */
5879 			mc2->m_pkthdr.rcvif = bridge_ifp;
5880 			mc2->m_pkthdr.pkt_hdr = mbuf_data(mc2);
5881 			BRIDGE_BPF_MTAP_INPUT(sc, mc2);
5882 			(void) mbuf_setdata(mc2,
5883 			    (char *)mbuf_data(mc2) + ETHER_HDR_LEN,
5884 			    mbuf_len(mc2) - ETHER_HDR_LEN);
5885 			(void) mbuf_pkthdr_adjustlen(mc2, -ETHER_HDR_LEN);
5886 			(void) ifnet_stat_increment_in(bridge_ifp, 1,
5887 			    mbuf_pkthdr_len(mc2), 0);
5888 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MCAST,
5889 			    "%s mcast for us", bridge_ifp->if_xname);
5890 			dlil_input_packet_list(bridge_ifp, mc2);
5891 		}
5892 
5893 		/* Return the original packet for local processing. */
5894 		return 0;
5895 	}
5896 
5897 	if ((bif->bif_ifflags & IFBIF_STP) &&
5898 	    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5899 		BRIDGE_UNLOCK(sc);
5900 		return 0;
5901 	}
5902 
5903 #ifdef DEV_CARP
5904 #define CARP_CHECK_WE_ARE_DST(iface) \
5905 	((iface)->if_carp &&\
5906 	        carp_forus((iface)->if_carp, eh->ether_dhost))
5907 #define CARP_CHECK_WE_ARE_SRC(iface) \
5908 	((iface)->if_carp &&\
5909 	        carp_forus((iface)->if_carp, eh->ether_shost))
5910 #else
5911 #define CARP_CHECK_WE_ARE_DST(iface) 0
5912 #define CARP_CHECK_WE_ARE_SRC(iface) 0
5913 #endif
5914 
5915 #define PFIL_HOOKED_INET6 PFIL_HOOKED(&inet6_pfil_hook)
5916 
5917 #define PFIL_PHYS(sc, ifp, m)
5918 
5919 #define GRAB_OUR_PACKETS(iface)                                         \
5920 	if ((iface)->if_type == IFT_GIF)                                \
5921 	        continue;                                               \
5922 	/* It is destined for us. */                                    \
5923 	if (memcmp(IF_LLADDR((iface)), eh->ether_dhost,                 \
5924 	    ETHER_ADDR_LEN) == 0 || CARP_CHECK_WE_ARE_DST((iface))) {   \
5925 	        if ((iface)->if_type == IFT_BRIDGE) {                   \
5926 	                BRIDGE_BPF_MTAP_INPUT(sc, m);                   \
5927 	/* Filter on the physical interface. */         \
5928 	                PFIL_PHYS(sc, iface, m);                        \
5929 	        } else {                                                \
5930 	                bpf_tap_in(iface, DLT_EN10MB, m, NULL, 0);      \
5931 	        }                                                       \
5932 	        if (bif->bif_ifflags & IFBIF_LEARNING) {                \
5933 	                error = bridge_rtupdate(sc, eh->ether_shost,    \
5934 	                    vlan, bif, 0, IFBAF_DYNAMIC);               \
5935 	                if (error && bif->bif_addrmax) {                \
5936 	                        BRIDGE_UNLOCK(sc);                      \
5937 	                        m_freem(m);                             \
5938 	                        return (EJUSTRETURN);                   \
5939 	                }                                               \
5940 	        }                                                       \
5941 	        BRIDGE_UNLOCK(sc);                                      \
5942 	        inject_input_packet(iface, m);                          \
5943 	        return (EJUSTRETURN);                                   \
5944 	}                                                               \
5945                                                                         \
5946 	/* We just received a packet that we sent out. */               \
5947 	if (memcmp(IF_LLADDR((iface)), eh->ether_shost,                 \
5948 	    ETHER_ADDR_LEN) == 0 || CARP_CHECK_WE_ARE_SRC((iface))) {   \
5949 	        BRIDGE_UNLOCK(sc);                                      \
5950 	        m_freem(m);                                             \
5951 	        return (EJUSTRETURN);                                   \
5952 	}
5953 
5954 	/*
5955 	 * Unicast.
5956 	 */
5957 
5958 	/* handle MAC-NAT if enabled */
5959 	if (is_ifp_mac && sc->sc_mac_nat_bif == bif) {
5960 		ifnet_t dst_if;
5961 		boolean_t is_input = FALSE;
5962 
5963 		dst_if = bridge_mac_nat_input(sc, data, &is_input);
5964 		m = *data;
5965 		if (dst_if == ifp) {
5966 			/* our input packet */
5967 		} else if (dst_if != NULL || m == NULL) {
5968 			BRIDGE_UNLOCK(sc);
5969 			if (dst_if != NULL) {
5970 				ASSERT(m != NULL);
5971 				if (is_input) {
5972 					inject_input_packet(dst_if, m);
5973 				} else {
5974 					(void)bridge_enqueue(bridge_ifp, NULL,
5975 					    dst_if, m,
5976 					    CHECKSUM_OPERATION_CLEAR_OFFLOAD);
5977 				}
5978 			}
5979 			return EJUSTRETURN;
5980 		}
5981 	}
5982 
5983 	/*
5984 	 * If the packet is for the bridge, pass it up for local processing.
5985 	 */
5986 	if (memcmp(eh->ether_dhost, IF_LLADDR(bridge_ifp),
5987 	    ETHER_ADDR_LEN) == 0 || CARP_CHECK_WE_ARE_DST(bridge_ifp)) {
5988 		bpf_packet_func     bpf_input_func = sc->sc_bpf_input;
5989 
5990 		/*
5991 		 * If the interface is learning, and the source
5992 		 * address is valid and not multicast, record
5993 		 * the address.
5994 		 */
5995 		if (bif->bif_ifflags & IFBIF_LEARNING) {
5996 			(void) bridge_rtupdate(sc, eh->ether_shost,
5997 			    vlan, bif, 0, IFBAF_DYNAMIC);
5998 		}
5999 		BRIDGE_UNLOCK(sc);
6000 
6001 		bridge_interface_input(bridge_ifp, m, bpf_input_func);
6002 		return EJUSTRETURN;
6003 	}
6004 
6005 	/*
6006 	 * if the destination of the packet is for the MAC address of
6007 	 * the member interface itself, then we don't need to forward
6008 	 * it -- just pass it back.  Note that it'll likely just be
6009 	 * dropped by the stack, but if something else is bound to
6010 	 * the interface directly (for example, the wireless stats
6011 	 * protocol -- although that actually uses BPF right now),
6012 	 * then it will consume the packet
6013 	 *
6014 	 * ALSO, note that we do this check AFTER checking for the
6015 	 * bridge's own MAC address, because the bridge may be
6016 	 * using the SAME MAC address as one of its interfaces
6017 	 */
6018 	if (is_ifp_mac) {
6019 
6020 #ifdef VERY_VERY_VERY_DIAGNOSTIC
6021 		BRIDGE_LOG(LOG_NOTICE, 0,
6022 		    "not forwarding packet bound for member interface");
6023 #endif
6024 
6025 		BRIDGE_UNLOCK(sc);
6026 		return 0;
6027 	}
6028 
6029 	/* Now check the remaining bridge members. */
6030 	TAILQ_FOREACH(bif2, &sc->sc_iflist, bif_next) {
6031 		if (bif2->bif_ifp != ifp) {
6032 			GRAB_OUR_PACKETS(bif2->bif_ifp);
6033 		}
6034 	}
6035 
6036 #undef CARP_CHECK_WE_ARE_DST
6037 #undef CARP_CHECK_WE_ARE_SRC
6038 #undef GRAB_OUR_PACKETS
6039 
6040 	/*
6041 	 * Perform the bridge forwarding function.
6042 	 *
6043 	 * Note that bridge_forward calls BRIDGE_UNLOCK
6044 	 */
6045 	bridge_forward(sc, bif, m);
6046 
6047 	return EJUSTRETURN;
6048 }
6049 
6050 /*
6051  * bridge_broadcast:
6052  *
6053  *	Send a frame to all interfaces that are members of
6054  *	the bridge, except for the one on which the packet
6055  *	arrived.
6056  *
6057  *	NOTE: Releases the lock on return.
6058  */
6059 static void
6060 bridge_broadcast(struct bridge_softc *sc, struct bridge_iflist * sbif,
6061     struct mbuf *m, int runfilt)
6062 {
6063 	ifnet_t bridge_ifp;
6064 	struct bridge_iflist *dbif;
6065 	struct ifnet * src_if;
6066 	struct mbuf *mc;
6067 	struct mbuf *mc_in;
6068 	struct ifnet *dst_if;
6069 	int error = 0, used = 0;
6070 	boolean_t bridge_if_out;
6071 	ChecksumOperation cksum_op;
6072 	struct mac_nat_record mnr;
6073 	struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
6074 	boolean_t translate_mac = FALSE;
6075 	uint32_t sc_filter_flags = 0;
6076 
6077 	bridge_ifp = sc->sc_ifp;
6078 	if (sbif != NULL) {
6079 		bridge_if_out = FALSE;
6080 		src_if = sbif->bif_ifp;
6081 		cksum_op = CHECKSUM_OPERATION_CLEAR_OFFLOAD;
6082 		if (mac_nat_bif != NULL && sbif != mac_nat_bif) {
6083 			/* get the translation record while holding the lock */
6084 			translate_mac
6085 			        = bridge_mac_nat_output(sc, sbif, &m, &mnr);
6086 			if (m == NULL) {
6087 				/* packet was deallocated */
6088 				BRIDGE_UNLOCK(sc);
6089 				return;
6090 			}
6091 		}
6092 	} else {
6093 		/*
6094 		 * sbif is NULL when the bridge interface calls
6095 		 * bridge_broadcast().
6096 		 */
6097 		bridge_if_out = TRUE;
6098 		cksum_op = CHECKSUM_OPERATION_FINALIZE;
6099 		sbif = NULL;
6100 		src_if = NULL;
6101 	}
6102 
6103 	BRIDGE_LOCK2REF(sc, error);
6104 	if (error) {
6105 		m_freem(m);
6106 		return;
6107 	}
6108 
6109 	TAILQ_FOREACH(dbif, &sc->sc_iflist, bif_next) {
6110 		dst_if = dbif->bif_ifp;
6111 		if (dst_if == src_if) {
6112 			/* skip the interface that the packet came in on */
6113 			continue;
6114 		}
6115 
6116 		/* Private segments can not talk to each other */
6117 		if (sbif != NULL &&
6118 		    (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE)) {
6119 			continue;
6120 		}
6121 
6122 		if ((dbif->bif_ifflags & IFBIF_STP) &&
6123 		    dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6124 			continue;
6125 		}
6126 
6127 		if ((dbif->bif_ifflags & IFBIF_DISCOVER) == 0 &&
6128 		    (m->m_flags & (M_BCAST | M_MCAST)) == 0) {
6129 			continue;
6130 		}
6131 
6132 		if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6133 			continue;
6134 		}
6135 
6136 		if (!(dbif->bif_flags & BIFF_MEDIA_ACTIVE)) {
6137 			continue;
6138 		}
6139 
6140 		if (TAILQ_NEXT(dbif, bif_next) == NULL) {
6141 			mc = m;
6142 			used = 1;
6143 		} else {
6144 			mc = m_dup(m, M_DONTWAIT);
6145 			if (mc == NULL) {
6146 				(void) ifnet_stat_increment_out(bridge_ifp,
6147 				    0, 0, 1);
6148 				continue;
6149 			}
6150 		}
6151 
6152 		/*
6153 		 * If broadcast input is enabled, do so only if this
6154 		 * is an input packet.
6155 		 */
6156 		if (!bridge_if_out &&
6157 		    (dbif->bif_flags & BIFF_INPUT_BROADCAST) != 0) {
6158 			mc_in = m_dup(mc, M_DONTWAIT);
6159 			/* this could fail, but we continue anyways */
6160 		} else {
6161 			mc_in = NULL;
6162 		}
6163 
6164 		/* out */
6165 		if (translate_mac && mac_nat_bif == dbif) {
6166 			/* translate the packet without holding the lock */
6167 			bridge_mac_nat_translate(&mc, &mnr, IF_LLADDR(dst_if));
6168 		}
6169 
6170 		sc_filter_flags = sc->sc_filter_flags;
6171 		if (runfilt &&
6172 		    PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6173 			if (used == 0) {
6174 				/* Keep the layer3 header aligned */
6175 				int i = min(mc->m_pkthdr.len, max_protohdr);
6176 				mc = m_copyup(mc, i, ETHER_ALIGN);
6177 				if (mc == NULL) {
6178 					(void) ifnet_stat_increment_out(
6179 						sc->sc_ifp, 0, 0, 1);
6180 					if (mc_in != NULL) {
6181 						m_freem(mc_in);
6182 						mc_in = NULL;
6183 					}
6184 					continue;
6185 				}
6186 			}
6187 			if (bridge_pf(&mc, dst_if, sc_filter_flags, FALSE) != 0) {
6188 				if (mc_in != NULL) {
6189 					m_freem(mc_in);
6190 					mc_in = NULL;
6191 				}
6192 				continue;
6193 			}
6194 			if (mc == NULL) {
6195 				if (mc_in != NULL) {
6196 					m_freem(mc_in);
6197 					mc_in = NULL;
6198 				}
6199 				continue;
6200 			}
6201 		}
6202 
6203 		if (mc != NULL) {
6204 			/* verify checksum if necessary */
6205 			if (bif_has_checksum_offload(dbif) && sbif != NULL &&
6206 			    !bif_has_checksum_offload(sbif)) {
6207 				error = bridge_verify_checksum(&mc,
6208 				    &dbif->bif_stats);
6209 				if (error != 0) {
6210 					if (mc != NULL) {
6211 						m_freem(mc);
6212 					}
6213 					mc = NULL;
6214 				}
6215 			}
6216 			if (mc != NULL) {
6217 				(void) bridge_enqueue(bridge_ifp,
6218 				    NULL, dst_if, mc, cksum_op);
6219 			}
6220 		}
6221 
6222 		/* in */
6223 		if (mc_in == NULL) {
6224 			continue;
6225 		}
6226 		bpf_tap_in(dst_if, DLT_EN10MB, mc_in, NULL, 0);
6227 		mbuf_pkthdr_setrcvif(mc_in, dst_if);
6228 		mbuf_pkthdr_setheader(mc_in, mbuf_data(mc_in));
6229 		mbuf_setdata(mc_in, (char *)mbuf_data(mc_in) + ETHER_HDR_LEN,
6230 		    mbuf_len(mc_in) - ETHER_HDR_LEN);
6231 		mbuf_pkthdr_adjustlen(mc_in, -ETHER_HDR_LEN);
6232 		mc_in->m_flags |= M_PROTO1; /* set to avoid loops */
6233 		dlil_input_packet_list(dst_if, mc_in);
6234 	}
6235 	if (used == 0) {
6236 		m_freem(m);
6237 	}
6238 
6239 
6240 	BRIDGE_UNREF(sc);
6241 }
6242 
6243 /*
6244  * bridge_span:
6245  *
6246  *	Duplicate a packet out one or more interfaces that are in span mode,
6247  *	the original mbuf is unmodified.
6248  */
6249 static void
6250 bridge_span(struct bridge_softc *sc, struct mbuf *m)
6251 {
6252 	struct bridge_iflist *bif;
6253 	struct ifnet *dst_if;
6254 	struct mbuf *mc;
6255 
6256 	if (TAILQ_EMPTY(&sc->sc_spanlist)) {
6257 		return;
6258 	}
6259 
6260 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
6261 		dst_if = bif->bif_ifp;
6262 
6263 		if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6264 			continue;
6265 		}
6266 
6267 		mc = m_copypacket(m, M_DONTWAIT);
6268 		if (mc == NULL) {
6269 			(void) ifnet_stat_increment_out(sc->sc_ifp, 0, 0, 1);
6270 			continue;
6271 		}
6272 
6273 		(void) bridge_enqueue(sc->sc_ifp, NULL, dst_if, mc,
6274 		    CHECKSUM_OPERATION_NONE);
6275 	}
6276 }
6277 
6278 
6279 /*
6280  * bridge_rtupdate:
6281  *
6282  *	Add a bridge routing entry.
6283  */
6284 static int
6285 bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst, uint16_t vlan,
6286     struct bridge_iflist *bif, int setflags, uint8_t flags)
6287 {
6288 	struct bridge_rtnode *brt;
6289 	int error;
6290 
6291 	BRIDGE_LOCK_ASSERT_HELD(sc);
6292 
6293 	/* Check the source address is valid and not multicast. */
6294 	if (ETHER_IS_MULTICAST(dst) ||
6295 	    (dst[0] == 0 && dst[1] == 0 && dst[2] == 0 &&
6296 	    dst[3] == 0 && dst[4] == 0 && dst[5] == 0) != 0) {
6297 		return EINVAL;
6298 	}
6299 
6300 
6301 	/* 802.1p frames map to vlan 1 */
6302 	if (vlan == 0) {
6303 		vlan = 1;
6304 	}
6305 
6306 	/*
6307 	 * A route for this destination might already exist.  If so,
6308 	 * update it, otherwise create a new one.
6309 	 */
6310 	if ((brt = bridge_rtnode_lookup(sc, dst, vlan)) == NULL) {
6311 		if (sc->sc_brtcnt >= sc->sc_brtmax) {
6312 			sc->sc_brtexceeded++;
6313 			return ENOSPC;
6314 		}
6315 		/* Check per interface address limits (if enabled) */
6316 		if (bif->bif_addrmax && bif->bif_addrcnt >= bif->bif_addrmax) {
6317 			bif->bif_addrexceeded++;
6318 			return ENOSPC;
6319 		}
6320 
6321 		/*
6322 		 * Allocate a new bridge forwarding node, and
6323 		 * initialize the expiration time and Ethernet
6324 		 * address.
6325 		 */
6326 		brt = zalloc_noblock(bridge_rtnode_pool);
6327 		if (brt == NULL) {
6328 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6329 			    "zalloc_nolock failed");
6330 			return ENOMEM;
6331 		}
6332 		bzero(brt, sizeof(struct bridge_rtnode));
6333 
6334 		if (bif->bif_ifflags & IFBIF_STICKY) {
6335 			brt->brt_flags = IFBAF_STICKY;
6336 		} else {
6337 			brt->brt_flags = IFBAF_DYNAMIC;
6338 		}
6339 
6340 		memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
6341 		brt->brt_vlan = vlan;
6342 
6343 
6344 		if ((error = bridge_rtnode_insert(sc, brt)) != 0) {
6345 			zfree(bridge_rtnode_pool, brt);
6346 			return error;
6347 		}
6348 		brt->brt_dst = bif;
6349 		bif->bif_addrcnt++;
6350 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6351 		    "added %02x:%02x:%02x:%02x:%02x:%02x "
6352 		    "on %s count %u hashsize %u",
6353 		    dst[0], dst[1], dst[2], dst[3], dst[4], dst[5],
6354 		    sc->sc_ifp->if_xname, sc->sc_brtcnt,
6355 		    sc->sc_rthash_size);
6356 	}
6357 
6358 	if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
6359 	    brt->brt_dst != bif) {
6360 		brt->brt_dst->bif_addrcnt--;
6361 		brt->brt_dst = bif;
6362 		brt->brt_dst->bif_addrcnt++;
6363 	}
6364 
6365 	if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6366 		unsigned long now;
6367 
6368 		now = (unsigned long) net_uptime();
6369 		brt->brt_expire = now + sc->sc_brttimeout;
6370 	}
6371 	if (setflags) {
6372 		brt->brt_flags = flags;
6373 	}
6374 
6375 
6376 	return 0;
6377 }
6378 
6379 /*
6380  * bridge_rtlookup:
6381  *
6382  *	Lookup the destination interface for an address.
6383  */
6384 static struct ifnet *
6385 bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
6386 {
6387 	struct bridge_rtnode *brt;
6388 
6389 	BRIDGE_LOCK_ASSERT_HELD(sc);
6390 
6391 	if ((brt = bridge_rtnode_lookup(sc, addr, vlan)) == NULL) {
6392 		return NULL;
6393 	}
6394 
6395 	return brt->brt_ifp;
6396 }
6397 
6398 /*
6399  * bridge_rttrim:
6400  *
6401  *	Trim the routine table so that we have a number
6402  *	of routing entries less than or equal to the
6403  *	maximum number.
6404  */
6405 static void
6406 bridge_rttrim(struct bridge_softc *sc)
6407 {
6408 	struct bridge_rtnode *brt, *nbrt;
6409 
6410 	BRIDGE_LOCK_ASSERT_HELD(sc);
6411 
6412 	/* Make sure we actually need to do this. */
6413 	if (sc->sc_brtcnt <= sc->sc_brtmax) {
6414 		return;
6415 	}
6416 
6417 	/* Force an aging cycle; this might trim enough addresses. */
6418 	bridge_rtage(sc);
6419 	if (sc->sc_brtcnt <= sc->sc_brtmax) {
6420 		return;
6421 	}
6422 
6423 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6424 		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6425 			bridge_rtnode_destroy(sc, brt);
6426 			if (sc->sc_brtcnt <= sc->sc_brtmax) {
6427 				return;
6428 			}
6429 		}
6430 	}
6431 }
6432 
6433 /*
6434  * bridge_aging_timer:
6435  *
6436  *	Aging periodic timer for the bridge routing table.
6437  */
6438 static void
6439 bridge_aging_timer(struct bridge_softc *sc)
6440 {
6441 	BRIDGE_LOCK_ASSERT_HELD(sc);
6442 
6443 	bridge_rtage(sc);
6444 	if ((sc->sc_ifp->if_flags & IFF_RUNNING) &&
6445 	    (sc->sc_flags & SCF_DETACHING) == 0) {
6446 		sc->sc_aging_timer.bdc_sc = sc;
6447 		sc->sc_aging_timer.bdc_func = bridge_aging_timer;
6448 		sc->sc_aging_timer.bdc_ts.tv_sec = bridge_rtable_prune_period;
6449 		bridge_schedule_delayed_call(&sc->sc_aging_timer);
6450 	}
6451 }
6452 
6453 /*
6454  * bridge_rtage:
6455  *
6456  *	Perform an aging cycle.
6457  */
6458 static void
6459 bridge_rtage(struct bridge_softc *sc)
6460 {
6461 	struct bridge_rtnode *brt, *nbrt;
6462 	unsigned long now;
6463 
6464 	BRIDGE_LOCK_ASSERT_HELD(sc);
6465 
6466 	now = (unsigned long) net_uptime();
6467 
6468 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6469 		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6470 			if (now >= brt->brt_expire) {
6471 				bridge_rtnode_destroy(sc, brt);
6472 			}
6473 		}
6474 	}
6475 	if (sc->sc_mac_nat_bif != NULL) {
6476 		bridge_mac_nat_age_entries(sc, now);
6477 	}
6478 }
6479 
6480 /*
6481  * bridge_rtflush:
6482  *
6483  *	Remove all dynamic addresses from the bridge.
6484  */
6485 static void
6486 bridge_rtflush(struct bridge_softc *sc, int full)
6487 {
6488 	struct bridge_rtnode *brt, *nbrt;
6489 
6490 	BRIDGE_LOCK_ASSERT_HELD(sc);
6491 
6492 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6493 		if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6494 			bridge_rtnode_destroy(sc, brt);
6495 		}
6496 	}
6497 }
6498 
6499 /*
6500  * bridge_rtdaddr:
6501  *
6502  *	Remove an address from the table.
6503  */
6504 static int
6505 bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
6506 {
6507 	struct bridge_rtnode *brt;
6508 	int found = 0;
6509 
6510 	BRIDGE_LOCK_ASSERT_HELD(sc);
6511 
6512 	/*
6513 	 * If vlan is zero then we want to delete for all vlans so the lookup
6514 	 * may return more than one.
6515 	 */
6516 	while ((brt = bridge_rtnode_lookup(sc, addr, vlan)) != NULL) {
6517 		bridge_rtnode_destroy(sc, brt);
6518 		found = 1;
6519 	}
6520 
6521 	return found ? 0 : ENOENT;
6522 }
6523 
6524 /*
6525  * bridge_rtdelete:
6526  *
6527  *	Delete routes to a specific member interface.
6528  */
6529 static void
6530 bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int full)
6531 {
6532 	struct bridge_rtnode *brt, *nbrt;
6533 
6534 	BRIDGE_LOCK_ASSERT_HELD(sc);
6535 
6536 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6537 		if (brt->brt_ifp == ifp && (full ||
6538 		    (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)) {
6539 			bridge_rtnode_destroy(sc, brt);
6540 		}
6541 	}
6542 }
6543 
6544 /*
6545  * bridge_rtable_init:
6546  *
6547  *	Initialize the route table for this bridge.
6548  */
6549 static int
6550 bridge_rtable_init(struct bridge_softc *sc)
6551 {
6552 	u_int32_t i;
6553 
6554 	sc->sc_rthash = _MALLOC(sizeof(*sc->sc_rthash) * BRIDGE_RTHASH_SIZE,
6555 	    M_DEVBUF, M_WAITOK | M_ZERO);
6556 	if (sc->sc_rthash == NULL) {
6557 		BRIDGE_LOG(LOG_NOTICE, 0, "no memory");
6558 		return ENOMEM;
6559 	}
6560 	sc->sc_rthash_size = BRIDGE_RTHASH_SIZE;
6561 
6562 	for (i = 0; i < sc->sc_rthash_size; i++) {
6563 		LIST_INIT(&sc->sc_rthash[i]);
6564 	}
6565 
6566 	sc->sc_rthash_key = RandomULong();
6567 
6568 	LIST_INIT(&sc->sc_rtlist);
6569 
6570 	return 0;
6571 }
6572 
6573 /*
6574  * bridge_rthash_delayed_resize:
6575  *
6576  *	Resize the routing table hash on a delayed thread call.
6577  */
6578 static void
6579 bridge_rthash_delayed_resize(struct bridge_softc *sc)
6580 {
6581 	u_int32_t new_rthash_size;
6582 	struct _bridge_rtnode_list *new_rthash = NULL;
6583 	struct _bridge_rtnode_list *old_rthash = NULL;
6584 	u_int32_t i;
6585 	struct bridge_rtnode *brt;
6586 	int error = 0;
6587 
6588 	BRIDGE_LOCK_ASSERT_HELD(sc);
6589 
6590 	/*
6591 	 * Four entries per hash bucket is our ideal load factor
6592 	 */
6593 	if (sc->sc_brtcnt < sc->sc_rthash_size * 4) {
6594 		goto out;
6595 	}
6596 
6597 	/*
6598 	 * Doubling the number of hash buckets may be too simplistic
6599 	 * especially when facing a spike of new entries
6600 	 */
6601 	new_rthash_size = sc->sc_rthash_size * 2;
6602 
6603 	sc->sc_flags |= SCF_RESIZING;
6604 	BRIDGE_UNLOCK(sc);
6605 
6606 	new_rthash = _MALLOC(sizeof(*sc->sc_rthash) * new_rthash_size,
6607 	    M_DEVBUF, M_WAITOK | M_ZERO);
6608 
6609 	BRIDGE_LOCK(sc);
6610 	sc->sc_flags &= ~SCF_RESIZING;
6611 
6612 	if (new_rthash == NULL) {
6613 		error = ENOMEM;
6614 		goto out;
6615 	}
6616 	if ((sc->sc_flags & SCF_DETACHING)) {
6617 		error = ENODEV;
6618 		goto out;
6619 	}
6620 	/*
6621 	 * Fail safe from here on
6622 	 */
6623 	old_rthash = sc->sc_rthash;
6624 	sc->sc_rthash = new_rthash;
6625 	sc->sc_rthash_size = new_rthash_size;
6626 
6627 	/*
6628 	 * Get a new key to force entries to be shuffled around to reduce
6629 	 * the likelihood they will land in the same buckets
6630 	 */
6631 	sc->sc_rthash_key = RandomULong();
6632 
6633 	for (i = 0; i < sc->sc_rthash_size; i++) {
6634 		LIST_INIT(&sc->sc_rthash[i]);
6635 	}
6636 
6637 	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
6638 		LIST_REMOVE(brt, brt_hash);
6639 		(void) bridge_rtnode_hash(sc, brt);
6640 	}
6641 out:
6642 	if (error == 0) {
6643 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6644 		    "%s new size %u",
6645 		    sc->sc_ifp->if_xname, sc->sc_rthash_size);
6646 		if (old_rthash) {
6647 			_FREE(old_rthash, M_DEVBUF);
6648 		}
6649 	} else {
6650 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_RT_TABLE,
6651 		    "%s failed %d", sc->sc_ifp->if_xname, error);
6652 		if (new_rthash != NULL) {
6653 			_FREE(new_rthash, M_DEVBUF);
6654 		}
6655 	}
6656 }
6657 
6658 /*
6659  * Resize the number of hash buckets based on the load factor
6660  * Currently only grow
6661  * Failing to resize the hash table is not fatal
6662  */
6663 static void
6664 bridge_rthash_resize(struct bridge_softc *sc)
6665 {
6666 	BRIDGE_LOCK_ASSERT_HELD(sc);
6667 
6668 	if ((sc->sc_flags & SCF_DETACHING) || (sc->sc_flags & SCF_RESIZING)) {
6669 		return;
6670 	}
6671 
6672 	/*
6673 	 * Four entries per hash bucket is our ideal load factor
6674 	 */
6675 	if (sc->sc_brtcnt < sc->sc_rthash_size * 4) {
6676 		return;
6677 	}
6678 	/*
6679 	 * Hard limit on the size of the routing hash table
6680 	 */
6681 	if (sc->sc_rthash_size >= bridge_rtable_hash_size_max) {
6682 		return;
6683 	}
6684 
6685 	sc->sc_resize_call.bdc_sc = sc;
6686 	sc->sc_resize_call.bdc_func = bridge_rthash_delayed_resize;
6687 	bridge_schedule_delayed_call(&sc->sc_resize_call);
6688 }
6689 
6690 /*
6691  * bridge_rtable_fini:
6692  *
6693  *	Deconstruct the route table for this bridge.
6694  */
6695 static void
6696 bridge_rtable_fini(struct bridge_softc *sc)
6697 {
6698 	KASSERT(sc->sc_brtcnt == 0,
6699 	    ("%s: %d bridge routes referenced", __func__, sc->sc_brtcnt));
6700 	if (sc->sc_rthash) {
6701 		_FREE(sc->sc_rthash, M_DEVBUF);
6702 		sc->sc_rthash = NULL;
6703 	}
6704 }
6705 
6706 /*
6707  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
6708  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
6709  */
6710 #define mix(a, b, c)                                                    \
6711 do {                                                                    \
6712 	a -= b; a -= c; a ^= (c >> 13);                                 \
6713 	b -= c; b -= a; b ^= (a << 8);                                  \
6714 	c -= a; c -= b; c ^= (b >> 13);                                 \
6715 	a -= b; a -= c; a ^= (c >> 12);                                 \
6716 	b -= c; b -= a; b ^= (a << 16);                                 \
6717 	c -= a; c -= b; c ^= (b >> 5);                                  \
6718 	a -= b; a -= c; a ^= (c >> 3);                                  \
6719 	b -= c; b -= a; b ^= (a << 10);                                 \
6720 	c -= a; c -= b; c ^= (b >> 15);                                 \
6721 } while ( /*CONSTCOND*/ 0)
6722 
6723 static __inline uint32_t
6724 bridge_rthash(struct bridge_softc *sc, const uint8_t *addr)
6725 {
6726 	uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
6727 
6728 	b += addr[5] << 8;
6729 	b += addr[4];
6730 	a += addr[3] << 24;
6731 	a += addr[2] << 16;
6732 	a += addr[1] << 8;
6733 	a += addr[0];
6734 
6735 	mix(a, b, c);
6736 
6737 	return c & BRIDGE_RTHASH_MASK(sc);
6738 }
6739 
6740 #undef mix
6741 
6742 static int
6743 bridge_rtnode_addr_cmp(const uint8_t *a, const uint8_t *b)
6744 {
6745 	int i, d;
6746 
6747 	for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) {
6748 		d = ((int)a[i]) - ((int)b[i]);
6749 	}
6750 
6751 	return d;
6752 }
6753 
6754 /*
6755  * bridge_rtnode_lookup:
6756  *
6757  *	Look up a bridge route node for the specified destination. Compare the
6758  *	vlan id or if zero then just return the first match.
6759  */
6760 static struct bridge_rtnode *
6761 bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr,
6762     uint16_t vlan)
6763 {
6764 	struct bridge_rtnode *brt;
6765 	uint32_t hash;
6766 	int dir;
6767 
6768 	BRIDGE_LOCK_ASSERT_HELD(sc);
6769 
6770 	hash = bridge_rthash(sc, addr);
6771 	LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) {
6772 		dir = bridge_rtnode_addr_cmp(addr, brt->brt_addr);
6773 		if (dir == 0 && (brt->brt_vlan == vlan || vlan == 0)) {
6774 			return brt;
6775 		}
6776 		if (dir > 0) {
6777 			return NULL;
6778 		}
6779 	}
6780 
6781 	return NULL;
6782 }
6783 
6784 /*
6785  * bridge_rtnode_hash:
6786  *
6787  *	Insert the specified bridge node into the route hash table.
6788  *	This is used when adding a new node or to rehash when resizing
6789  *	the hash table
6790  */
6791 static int
6792 bridge_rtnode_hash(struct bridge_softc *sc, struct bridge_rtnode *brt)
6793 {
6794 	struct bridge_rtnode *lbrt;
6795 	uint32_t hash;
6796 	int dir;
6797 
6798 	BRIDGE_LOCK_ASSERT_HELD(sc);
6799 
6800 	hash = bridge_rthash(sc, brt->brt_addr);
6801 
6802 	lbrt = LIST_FIRST(&sc->sc_rthash[hash]);
6803 	if (lbrt == NULL) {
6804 		LIST_INSERT_HEAD(&sc->sc_rthash[hash], brt, brt_hash);
6805 		goto out;
6806 	}
6807 
6808 	do {
6809 		dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr);
6810 		if (dir == 0 && brt->brt_vlan == lbrt->brt_vlan) {
6811 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6812 			    "%s EEXIST %02x:%02x:%02x:%02x:%02x:%02x",
6813 			    sc->sc_ifp->if_xname,
6814 			    brt->brt_addr[0], brt->brt_addr[1],
6815 			    brt->brt_addr[2], brt->brt_addr[3],
6816 			    brt->brt_addr[4], brt->brt_addr[5]);
6817 			return EEXIST;
6818 		}
6819 		if (dir > 0) {
6820 			LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
6821 			goto out;
6822 		}
6823 		if (LIST_NEXT(lbrt, brt_hash) == NULL) {
6824 			LIST_INSERT_AFTER(lbrt, brt, brt_hash);
6825 			goto out;
6826 		}
6827 		lbrt = LIST_NEXT(lbrt, brt_hash);
6828 	} while (lbrt != NULL);
6829 
6830 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6831 	    "%s impossible %02x:%02x:%02x:%02x:%02x:%02x",
6832 	    sc->sc_ifp->if_xname,
6833 	    brt->brt_addr[0], brt->brt_addr[1], brt->brt_addr[2],
6834 	    brt->brt_addr[3], brt->brt_addr[4], brt->brt_addr[5]);
6835 out:
6836 	return 0;
6837 }
6838 
6839 /*
6840  * bridge_rtnode_insert:
6841  *
6842  *	Insert the specified bridge node into the route table.  We
6843  *	assume the entry is not already in the table.
6844  */
6845 static int
6846 bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
6847 {
6848 	int error;
6849 
6850 	error = bridge_rtnode_hash(sc, brt);
6851 	if (error != 0) {
6852 		return error;
6853 	}
6854 
6855 	LIST_INSERT_HEAD(&sc->sc_rtlist, brt, brt_list);
6856 	sc->sc_brtcnt++;
6857 
6858 	bridge_rthash_resize(sc);
6859 
6860 	return 0;
6861 }
6862 
6863 /*
6864  * bridge_rtnode_destroy:
6865  *
6866  *	Destroy a bridge rtnode.
6867  */
6868 static void
6869 bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
6870 {
6871 	BRIDGE_LOCK_ASSERT_HELD(sc);
6872 
6873 	LIST_REMOVE(brt, brt_hash);
6874 
6875 	LIST_REMOVE(brt, brt_list);
6876 	sc->sc_brtcnt--;
6877 	brt->brt_dst->bif_addrcnt--;
6878 	zfree(bridge_rtnode_pool, brt);
6879 }
6880 
6881 #if BRIDGESTP
6882 /*
6883  * bridge_rtable_expire:
6884  *
6885  *	Set the expiry time for all routes on an interface.
6886  */
6887 static void
6888 bridge_rtable_expire(struct ifnet *ifp, int age)
6889 {
6890 	struct bridge_softc *sc = ifp->if_bridge;
6891 	struct bridge_rtnode *brt;
6892 
6893 	BRIDGE_LOCK(sc);
6894 
6895 	/*
6896 	 * If the age is zero then flush, otherwise set all the expiry times to
6897 	 * age for the interface
6898 	 */
6899 	if (age == 0) {
6900 		bridge_rtdelete(sc, ifp, IFBF_FLUSHDYN);
6901 	} else {
6902 		unsigned long now;
6903 
6904 		now = (unsigned long) net_uptime();
6905 
6906 		LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
6907 			/* Cap the expiry time to 'age' */
6908 			if (brt->brt_ifp == ifp &&
6909 			    brt->brt_expire > now + age &&
6910 			    (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6911 				brt->brt_expire = now + age;
6912 			}
6913 		}
6914 	}
6915 	BRIDGE_UNLOCK(sc);
6916 }
6917 
6918 /*
6919  * bridge_state_change:
6920  *
6921  *	Callback from the bridgestp code when a port changes states.
6922  */
6923 static void
6924 bridge_state_change(struct ifnet *ifp, int state)
6925 {
6926 	struct bridge_softc *sc = ifp->if_bridge;
6927 	static const char *stpstates[] = {
6928 		"disabled",
6929 		"listening",
6930 		"learning",
6931 		"forwarding",
6932 		"blocking",
6933 		"discarding"
6934 	};
6935 
6936 	if (log_stp) {
6937 		log(LOG_NOTICE, "%s: state changed to %s on %s",
6938 		    sc->sc_ifp->if_xname,
6939 		    stpstates[state], ifp->if_xname);
6940 	}
6941 }
6942 #endif /* BRIDGESTP */
6943 
6944 /*
6945  * bridge_set_bpf_tap:
6946  *
6947  *	Sets ups the BPF callbacks.
6948  */
6949 static errno_t
6950 bridge_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func bpf_callback)
6951 {
6952 	struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
6953 
6954 	/* TBD locking */
6955 	if (sc == NULL || (sc->sc_flags & SCF_DETACHING)) {
6956 		return ENODEV;
6957 	}
6958 	switch (mode) {
6959 	case BPF_TAP_DISABLE:
6960 		sc->sc_bpf_input = sc->sc_bpf_output = NULL;
6961 		break;
6962 
6963 	case BPF_TAP_INPUT:
6964 		sc->sc_bpf_input = bpf_callback;
6965 		break;
6966 
6967 	case BPF_TAP_OUTPUT:
6968 		sc->sc_bpf_output = bpf_callback;
6969 		break;
6970 
6971 	case BPF_TAP_INPUT_OUTPUT:
6972 		sc->sc_bpf_input = sc->sc_bpf_output = bpf_callback;
6973 		break;
6974 
6975 	default:
6976 		break;
6977 	}
6978 
6979 	return 0;
6980 }
6981 
6982 /*
6983  * bridge_detach:
6984  *
6985  *	Callback when interface has been detached.
6986  */
6987 static void
6988 bridge_detach(ifnet_t ifp)
6989 {
6990 	struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
6991 
6992 #if BRIDGESTP
6993 	bstp_detach(&sc->sc_stp);
6994 #endif /* BRIDGESTP */
6995 
6996 	/* Tear down the routing table. */
6997 	bridge_rtable_fini(sc);
6998 
6999 	lck_mtx_lock(&bridge_list_mtx);
7000 	LIST_REMOVE(sc, sc_list);
7001 	lck_mtx_unlock(&bridge_list_mtx);
7002 
7003 	ifnet_release(ifp);
7004 
7005 	lck_mtx_destroy(&sc->sc_mtx, &bridge_lock_grp);
7006 	if_clone_softc_deallocate(&bridge_cloner, sc);
7007 }
7008 
7009 /*
7010  * bridge_bpf_input:
7011  *
7012  *	Invoke the input BPF callback if enabled
7013  */
7014 static errno_t
7015 bridge_bpf_input(ifnet_t ifp, struct mbuf *m, const char * func, int line)
7016 {
7017 	struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7018 	bpf_packet_func     input_func = sc->sc_bpf_input;
7019 
7020 	if (input_func != NULL) {
7021 		if (mbuf_pkthdr_rcvif(m) != ifp) {
7022 			BRIDGE_LOG(LOG_NOTICE, 0,
7023 			    "%s.%d: rcvif: 0x%llx != ifp 0x%llx", func, line,
7024 			    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_rcvif(m)),
7025 			    (uint64_t)VM_KERNEL_ADDRPERM(ifp));
7026 		}
7027 		(*input_func)(ifp, m);
7028 	}
7029 	return 0;
7030 }
7031 
7032 /*
7033  * bridge_bpf_output:
7034  *
7035  *	Invoke the output BPF callback if enabled
7036  */
7037 static errno_t
7038 bridge_bpf_output(ifnet_t ifp, struct mbuf *m)
7039 {
7040 	struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7041 	bpf_packet_func     output_func = sc->sc_bpf_output;
7042 
7043 	if (output_func != NULL) {
7044 		(*output_func)(ifp, m);
7045 	}
7046 	return 0;
7047 }
7048 
7049 /*
7050  * bridge_link_event:
7051  *
7052  *	Report a data link event on an interface
7053  */
7054 static void
7055 bridge_link_event(struct ifnet *ifp, u_int32_t event_code)
7056 {
7057 	struct event {
7058 		u_int32_t ifnet_family;
7059 		u_int32_t unit;
7060 		char if_name[IFNAMSIZ];
7061 	};
7062 	_Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
7063 	struct kern_event_msg *header = (struct kern_event_msg*)message;
7064 	struct event *data = (struct event *)(header + 1);
7065 
7066 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
7067 	    "%s event_code %u - %s", ifp->if_xname,
7068 	    event_code, dlil_kev_dl_code_str(event_code));
7069 	header->total_size   = sizeof(message);
7070 	header->vendor_code  = KEV_VENDOR_APPLE;
7071 	header->kev_class    = KEV_NETWORK_CLASS;
7072 	header->kev_subclass = KEV_DL_SUBCLASS;
7073 	header->event_code   = event_code;
7074 	data->ifnet_family   = ifnet_family(ifp);
7075 	data->unit           = (u_int32_t)ifnet_unit(ifp);
7076 	strlcpy(data->if_name, ifnet_name(ifp), IFNAMSIZ);
7077 	ifnet_event(ifp, header);
7078 }
7079 
7080 #define BRIDGE_HF_DROP(reason, func, line) {                            \
7081 	        bridge_hostfilter_stats.reason++;                       \
7082 	        BRIDGE_LOG(LOG_DEBUG, BR_DBGF_HOSTFILTER,               \
7083 	                   "%s.%d" #reason, func, line);                \
7084 	        error = EINVAL;                                         \
7085 	}
7086 
7087 /*
7088  * Make sure this is a DHCP or Bootp request that match the host filter
7089  */
7090 static int
7091 bridge_dhcp_filter(struct bridge_iflist *bif, struct mbuf *m, size_t offset)
7092 {
7093 	int error = EINVAL;
7094 	struct dhcp dhcp;
7095 
7096 	/*
7097 	 * Note: We use the dhcp structure because bootp structure definition
7098 	 * is larger and some vendors do not pad the request
7099 	 */
7100 	error = mbuf_copydata(m, offset, sizeof(struct dhcp), &dhcp);
7101 	if (error != 0) {
7102 		BRIDGE_HF_DROP(brhf_dhcp_too_small, __func__, __LINE__);
7103 		goto done;
7104 	}
7105 	if (dhcp.dp_op != BOOTREQUEST) {
7106 		BRIDGE_HF_DROP(brhf_dhcp_bad_op, __func__, __LINE__);
7107 		goto done;
7108 	}
7109 	/*
7110 	 * The hardware address must be an exact match
7111 	 */
7112 	if (dhcp.dp_htype != ARPHRD_ETHER) {
7113 		BRIDGE_HF_DROP(brhf_dhcp_bad_htype, __func__, __LINE__);
7114 		goto done;
7115 	}
7116 	if (dhcp.dp_hlen != ETHER_ADDR_LEN) {
7117 		BRIDGE_HF_DROP(brhf_dhcp_bad_hlen, __func__, __LINE__);
7118 		goto done;
7119 	}
7120 	if (bcmp(dhcp.dp_chaddr, bif->bif_hf_hwsrc,
7121 	    ETHER_ADDR_LEN) != 0) {
7122 		BRIDGE_HF_DROP(brhf_dhcp_bad_chaddr, __func__, __LINE__);
7123 		goto done;
7124 	}
7125 	/*
7126 	 * Client address must match the host address or be not specified
7127 	 */
7128 	if (dhcp.dp_ciaddr.s_addr != bif->bif_hf_ipsrc.s_addr &&
7129 	    dhcp.dp_ciaddr.s_addr != INADDR_ANY) {
7130 		BRIDGE_HF_DROP(brhf_dhcp_bad_ciaddr, __func__, __LINE__);
7131 		goto done;
7132 	}
7133 	error = 0;
7134 done:
7135 	return error;
7136 }
7137 
7138 static int
7139 bridge_host_filter(struct bridge_iflist *bif, mbuf_t *data)
7140 {
7141 	int error = EINVAL;
7142 	struct ether_header *eh;
7143 	static struct in_addr inaddr_any = { .s_addr = INADDR_ANY };
7144 	mbuf_t m = *data;
7145 
7146 	eh = mtod(m, struct ether_header *);
7147 
7148 	/*
7149 	 * Restrict the source hardware address
7150 	 */
7151 	if ((bif->bif_flags & BIFF_HF_HWSRC) == 0 ||
7152 	    bcmp(eh->ether_shost, bif->bif_hf_hwsrc,
7153 	    ETHER_ADDR_LEN) != 0) {
7154 		BRIDGE_HF_DROP(brhf_bad_ether_srchw_addr, __func__, __LINE__);
7155 		goto done;
7156 	}
7157 
7158 	/*
7159 	 * Restrict Ethernet protocols to ARP and IP
7160 	 */
7161 	if (eh->ether_type == htons(ETHERTYPE_ARP)) {
7162 		struct ether_arp *ea;
7163 		size_t minlen = sizeof(struct ether_header) +
7164 		    sizeof(struct ether_arp);
7165 
7166 		/*
7167 		 * Make the Ethernet and ARP headers contiguous
7168 		 */
7169 		if (mbuf_pkthdr_len(m) < minlen) {
7170 			BRIDGE_HF_DROP(brhf_arp_too_small, __func__, __LINE__);
7171 			goto done;
7172 		}
7173 		if (mbuf_len(m) < minlen && mbuf_pullup(data, minlen) != 0) {
7174 			BRIDGE_HF_DROP(brhf_arp_pullup_failed,
7175 			    __func__, __LINE__);
7176 			goto done;
7177 		}
7178 		m = *data;
7179 
7180 		/*
7181 		 * Verify this is an ethernet/ip arp
7182 		 */
7183 		eh = mtod(m, struct ether_header *);
7184 		ea = (struct ether_arp *)(eh + 1);
7185 		if (ea->arp_hrd != htons(ARPHRD_ETHER)) {
7186 			BRIDGE_HF_DROP(brhf_arp_bad_hw_type,
7187 			    __func__, __LINE__);
7188 			goto done;
7189 		}
7190 		if (ea->arp_pro != htons(ETHERTYPE_IP)) {
7191 			BRIDGE_HF_DROP(brhf_arp_bad_pro_type,
7192 			    __func__, __LINE__);
7193 			goto done;
7194 		}
7195 		/*
7196 		 * Verify the address lengths are correct
7197 		 */
7198 		if (ea->arp_hln != ETHER_ADDR_LEN) {
7199 			BRIDGE_HF_DROP(brhf_arp_bad_hw_len, __func__, __LINE__);
7200 			goto done;
7201 		}
7202 		if (ea->arp_pln != sizeof(struct in_addr)) {
7203 			BRIDGE_HF_DROP(brhf_arp_bad_pro_len,
7204 			    __func__, __LINE__);
7205 			goto done;
7206 		}
7207 
7208 		/*
7209 		 * Allow only ARP request or ARP reply
7210 		 */
7211 		if (ea->arp_op != htons(ARPOP_REQUEST) &&
7212 		    ea->arp_op != htons(ARPOP_REPLY)) {
7213 			BRIDGE_HF_DROP(brhf_arp_bad_op, __func__, __LINE__);
7214 			goto done;
7215 		}
7216 		/*
7217 		 * Verify source hardware address matches
7218 		 */
7219 		if (bcmp(ea->arp_sha, bif->bif_hf_hwsrc,
7220 		    ETHER_ADDR_LEN) != 0) {
7221 			BRIDGE_HF_DROP(brhf_arp_bad_sha, __func__, __LINE__);
7222 			goto done;
7223 		}
7224 		/*
7225 		 * Verify source protocol address:
7226 		 * May be null for an ARP probe
7227 		 */
7228 		if (bcmp(ea->arp_spa, &bif->bif_hf_ipsrc.s_addr,
7229 		    sizeof(struct in_addr)) != 0 &&
7230 		    bcmp(ea->arp_spa, &inaddr_any,
7231 		    sizeof(struct in_addr)) != 0) {
7232 			BRIDGE_HF_DROP(brhf_arp_bad_spa, __func__, __LINE__);
7233 			goto done;
7234 		}
7235 		bridge_hostfilter_stats.brhf_arp_ok += 1;
7236 		error = 0;
7237 	} else if (eh->ether_type == htons(ETHERTYPE_IP)) {
7238 		size_t minlen = sizeof(struct ether_header) + sizeof(struct ip);
7239 		struct ip iphdr;
7240 		size_t offset;
7241 
7242 		/*
7243 		 * Make the Ethernet and IP headers contiguous
7244 		 */
7245 		if (mbuf_pkthdr_len(m) < minlen) {
7246 			BRIDGE_HF_DROP(brhf_ip_too_small, __func__, __LINE__);
7247 			goto done;
7248 		}
7249 		offset = sizeof(struct ether_header);
7250 		error = mbuf_copydata(m, offset, sizeof(struct ip), &iphdr);
7251 		if (error != 0) {
7252 			BRIDGE_HF_DROP(brhf_ip_too_small, __func__, __LINE__);
7253 			goto done;
7254 		}
7255 		/*
7256 		 * Verify the source IP address
7257 		 */
7258 		if (iphdr.ip_p == IPPROTO_UDP) {
7259 			struct udphdr udp;
7260 
7261 			minlen += sizeof(struct udphdr);
7262 			if (mbuf_pkthdr_len(m) < minlen) {
7263 				BRIDGE_HF_DROP(brhf_ip_too_small,
7264 				    __func__, __LINE__);
7265 				goto done;
7266 			}
7267 
7268 			/*
7269 			 * Allow all zero addresses for DHCP requests
7270 			 */
7271 			if (iphdr.ip_src.s_addr != bif->bif_hf_ipsrc.s_addr &&
7272 			    iphdr.ip_src.s_addr != INADDR_ANY) {
7273 				BRIDGE_HF_DROP(brhf_ip_bad_srcaddr,
7274 				    __func__, __LINE__);
7275 				goto done;
7276 			}
7277 			offset = sizeof(struct ether_header) +
7278 			    (IP_VHL_HL(iphdr.ip_vhl) << 2);
7279 			error = mbuf_copydata(m, offset,
7280 			    sizeof(struct udphdr), &udp);
7281 			if (error != 0) {
7282 				BRIDGE_HF_DROP(brhf_ip_too_small,
7283 				    __func__, __LINE__);
7284 				goto done;
7285 			}
7286 			/*
7287 			 * Either it's a Bootp/DHCP packet that we like or
7288 			 * it's a UDP packet from the host IP as source address
7289 			 */
7290 			if (udp.uh_sport == htons(IPPORT_BOOTPC) &&
7291 			    udp.uh_dport == htons(IPPORT_BOOTPS)) {
7292 				minlen += sizeof(struct dhcp);
7293 				if (mbuf_pkthdr_len(m) < minlen) {
7294 					BRIDGE_HF_DROP(brhf_ip_too_small,
7295 					    __func__, __LINE__);
7296 					goto done;
7297 				}
7298 				offset += sizeof(struct udphdr);
7299 				error = bridge_dhcp_filter(bif, m, offset);
7300 				if (error != 0) {
7301 					goto done;
7302 				}
7303 			} else if (iphdr.ip_src.s_addr == INADDR_ANY) {
7304 				BRIDGE_HF_DROP(brhf_ip_bad_srcaddr,
7305 				    __func__, __LINE__);
7306 				goto done;
7307 			}
7308 		} else if (iphdr.ip_src.s_addr != bif->bif_hf_ipsrc.s_addr ||
7309 		    bif->bif_hf_ipsrc.s_addr == INADDR_ANY) {
7310 			BRIDGE_HF_DROP(brhf_ip_bad_srcaddr, __func__, __LINE__);
7311 			goto done;
7312 		}
7313 		/*
7314 		 * Allow only boring IP protocols
7315 		 */
7316 		if (iphdr.ip_p != IPPROTO_TCP &&
7317 		    iphdr.ip_p != IPPROTO_UDP &&
7318 		    iphdr.ip_p != IPPROTO_ICMP &&
7319 		    iphdr.ip_p != IPPROTO_ESP &&
7320 		    iphdr.ip_p != IPPROTO_AH &&
7321 		    iphdr.ip_p != IPPROTO_GRE) {
7322 			BRIDGE_HF_DROP(brhf_ip_bad_proto, __func__, __LINE__);
7323 			goto done;
7324 		}
7325 		bridge_hostfilter_stats.brhf_ip_ok += 1;
7326 		error = 0;
7327 	} else {
7328 		BRIDGE_HF_DROP(brhf_bad_ether_type, __func__, __LINE__);
7329 		goto done;
7330 	}
7331 done:
7332 	if (error != 0) {
7333 		if (BRIDGE_DBGF_ENABLED(BR_DBGF_HOSTFILTER)) {
7334 			if (m) {
7335 				brlog_mbuf_data(m, 0,
7336 				    sizeof(struct ether_header) +
7337 				    sizeof(struct ip));
7338 			}
7339 		}
7340 
7341 		if (m != NULL) {
7342 			m_freem(m);
7343 		}
7344 	}
7345 	return error;
7346 }
7347 
7348 /*
7349  * MAC NAT
7350  */
7351 
7352 static errno_t
7353 bridge_mac_nat_enable(struct bridge_softc *sc, struct bridge_iflist *bif)
7354 {
7355 	errno_t         error = 0;
7356 
7357 	BRIDGE_LOCK_ASSERT_HELD(sc);
7358 
7359 	if (IFNET_IS_VMNET(bif->bif_ifp)) {
7360 		error = EINVAL;
7361 		goto done;
7362 	}
7363 	if (sc->sc_mac_nat_bif != NULL) {
7364 		if (sc->sc_mac_nat_bif != bif) {
7365 			error = EBUSY;
7366 		}
7367 		goto done;
7368 	}
7369 	sc->sc_mac_nat_bif = bif;
7370 	bif->bif_ifflags |= IFBIF_MAC_NAT;
7371 	bridge_mac_nat_populate_entries(sc);
7372 
7373 done:
7374 	return error;
7375 }
7376 
7377 static void
7378 bridge_mac_nat_disable(struct bridge_softc *sc)
7379 {
7380 	struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7381 
7382 	assert(mac_nat_bif != NULL);
7383 	bridge_mac_nat_flush_entries(sc, mac_nat_bif);
7384 	mac_nat_bif->bif_ifflags &= ~IFBIF_MAC_NAT;
7385 	sc->sc_mac_nat_bif = NULL;
7386 	return;
7387 }
7388 
7389 static void
7390 mac_nat_entry_print2(struct mac_nat_entry *mne,
7391     char *ifname, const char *msg1, const char *msg2)
7392 {
7393 	int             af;
7394 	char            etopbuf[24];
7395 	char            ntopbuf[MAX_IPv6_STR_LEN];
7396 	const char      *space;
7397 
7398 	af = ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) ? AF_INET6 : AF_INET;
7399 	ether_ntop(etopbuf, sizeof(etopbuf), mne->mne_mac);
7400 	(void)inet_ntop(af, &mne->mne_u, ntopbuf, sizeof(ntopbuf));
7401 	if (msg2 == NULL) {
7402 		msg2 = "";
7403 		space = "";
7404 	} else {
7405 		space = " ";
7406 	}
7407 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7408 	    "%s %s%s%s %p (%s, %s, %s)",
7409 	    ifname, msg1, space, msg2, mne, mne->mne_bif->bif_ifp->if_xname,
7410 	    ntopbuf, etopbuf);
7411 }
7412 
7413 static void
7414 mac_nat_entry_print(struct mac_nat_entry *mne,
7415     char *ifname, const char *msg)
7416 {
7417 	mac_nat_entry_print2(mne, ifname, msg, NULL);
7418 }
7419 
7420 static struct mac_nat_entry *
7421 bridge_lookup_mac_nat_entry(struct bridge_softc *sc, int af, void * ip)
7422 {
7423 	struct mac_nat_entry    *mne;
7424 	struct mac_nat_entry    *ret_mne = NULL;
7425 
7426 	if (af == AF_INET) {
7427 		in_addr_t s_addr = ((struct in_addr *)ip)->s_addr;
7428 
7429 		LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
7430 			if (mne->mne_ip.s_addr == s_addr) {
7431 				if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7432 					mac_nat_entry_print(mne, sc->sc_if_xname,
7433 					    "found");
7434 				}
7435 				ret_mne = mne;
7436 				break;
7437 			}
7438 		}
7439 	} else {
7440 		const struct in6_addr *ip6 = (const struct in6_addr *)ip;
7441 
7442 		LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
7443 			if (IN6_ARE_ADDR_EQUAL(&mne->mne_ip6, ip6)) {
7444 				if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7445 					mac_nat_entry_print(mne, sc->sc_if_xname,
7446 					    "found");
7447 				}
7448 				ret_mne = mne;
7449 				break;
7450 			}
7451 		}
7452 	}
7453 	return ret_mne;
7454 }
7455 
7456 static void
7457 bridge_destroy_mac_nat_entry(struct bridge_softc *sc,
7458     struct mac_nat_entry *mne, const char *reason)
7459 {
7460 	LIST_REMOVE(mne, mne_list);
7461 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7462 		mac_nat_entry_print(mne, sc->sc_if_xname, reason);
7463 	}
7464 	zfree(bridge_mne_pool, mne);
7465 	sc->sc_mne_count--;
7466 }
7467 
7468 static struct mac_nat_entry *
7469 bridge_create_mac_nat_entry(struct bridge_softc *sc,
7470     struct bridge_iflist *bif, int af, const void *ip, uint8_t *eaddr)
7471 {
7472 	struct mac_nat_entry_list *list;
7473 	struct mac_nat_entry *mne;
7474 
7475 	if (sc->sc_mne_count >= sc->sc_mne_max) {
7476 		sc->sc_mne_allocation_failures++;
7477 		return NULL;
7478 	}
7479 	mne = zalloc_noblock(bridge_mne_pool);
7480 	if (mne == NULL) {
7481 		sc->sc_mne_allocation_failures++;
7482 		return NULL;
7483 	}
7484 	sc->sc_mne_count++;
7485 	bzero(mne, sizeof(*mne));
7486 	bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
7487 	mne->mne_bif = bif;
7488 	if (af == AF_INET) {
7489 		bcopy(ip, &mne->mne_ip, sizeof(mne->mne_ip));
7490 		list = &sc->sc_mne_list;
7491 	} else {
7492 		bcopy(ip, &mne->mne_ip6, sizeof(mne->mne_ip6));
7493 		mne->mne_flags |= MNE_FLAGS_IPV6;
7494 		list = &sc->sc_mne_list_v6;
7495 	}
7496 	LIST_INSERT_HEAD(list, mne, mne_list);
7497 	mne->mne_expire = (unsigned long)net_uptime() + sc->sc_brttimeout;
7498 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7499 		mac_nat_entry_print(mne, sc->sc_if_xname, "created");
7500 	}
7501 	return mne;
7502 }
7503 
7504 static struct mac_nat_entry *
7505 bridge_update_mac_nat_entry(struct bridge_softc *sc,
7506     struct bridge_iflist *bif, int af, void *ip, uint8_t *eaddr)
7507 {
7508 	struct mac_nat_entry *mne;
7509 
7510 	mne = bridge_lookup_mac_nat_entry(sc, af, ip);
7511 	if (mne != NULL) {
7512 		struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7513 
7514 		if (mne->mne_bif == mac_nat_bif) {
7515 			/* the MAC NAT interface takes precedence */
7516 			if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7517 				if (mne->mne_bif != bif) {
7518 					mac_nat_entry_print2(mne,
7519 					    sc->sc_if_xname, "reject",
7520 					    bif->bif_ifp->if_xname);
7521 				}
7522 			}
7523 		} else if (mne->mne_bif != bif) {
7524 			const char *old_if = mne->mne_bif->bif_ifp->if_xname;
7525 
7526 			mne->mne_bif = bif;
7527 			if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7528 				mac_nat_entry_print2(mne,
7529 				    sc->sc_if_xname, "replaced",
7530 				    old_if);
7531 			}
7532 			bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
7533 		}
7534 		mne->mne_expire = (unsigned long)net_uptime() +
7535 		    sc->sc_brttimeout;
7536 	} else {
7537 		mne = bridge_create_mac_nat_entry(sc, bif, af, ip, eaddr);
7538 	}
7539 	return mne;
7540 }
7541 
7542 static void
7543 bridge_mac_nat_flush_entries_common(struct bridge_softc *sc,
7544     struct mac_nat_entry_list *list, struct bridge_iflist *bif)
7545 {
7546 	struct mac_nat_entry *mne;
7547 	struct mac_nat_entry *tmne;
7548 
7549 	LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
7550 		if (bif != NULL && mne->mne_bif != bif) {
7551 			continue;
7552 		}
7553 		bridge_destroy_mac_nat_entry(sc, mne, "flushed");
7554 	}
7555 }
7556 
7557 /*
7558  * bridge_mac_nat_flush_entries:
7559  *
7560  * Flush MAC NAT entries for the specified member. Flush all entries if
7561  * the member is the one that requires MAC NAT, otherwise just flush the
7562  * ones for the specified member.
7563  */
7564 static void
7565 bridge_mac_nat_flush_entries(struct bridge_softc *sc, struct bridge_iflist * bif)
7566 {
7567 	struct bridge_iflist *flush_bif;
7568 
7569 	flush_bif = (bif == sc->sc_mac_nat_bif) ? NULL : bif;
7570 	bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list, flush_bif);
7571 	bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list_v6, flush_bif);
7572 }
7573 
7574 static void
7575 bridge_mac_nat_populate_entries(struct bridge_softc *sc)
7576 {
7577 	errno_t                 error;
7578 	ifnet_t                 ifp;
7579 	ifaddr_t                *list;
7580 	struct bridge_iflist    *mac_nat_bif = sc->sc_mac_nat_bif;
7581 
7582 	assert(mac_nat_bif != NULL);
7583 	ifp = mac_nat_bif->bif_ifp;
7584 	error = ifnet_get_address_list(ifp, &list);
7585 	if (error != 0) {
7586 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7587 		    "ifnet_get_address_list(%s) failed %d",
7588 		    ifp->if_xname, error);
7589 		return;
7590 	}
7591 	for (ifaddr_t *scan = list; *scan != NULL; scan++) {
7592 		sa_family_t     af;
7593 		void            *ip;
7594 
7595 		union {
7596 			struct sockaddr         sa;
7597 			struct sockaddr_in      sin;
7598 			struct sockaddr_in6     sin6;
7599 		} u;
7600 		af = ifaddr_address_family(*scan);
7601 		switch (af) {
7602 		case AF_INET:
7603 		case AF_INET6:
7604 			error = ifaddr_address(*scan, &u.sa, sizeof(u));
7605 			if (error != 0) {
7606 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7607 				    "ifaddr_address failed %d",
7608 				    error);
7609 				break;
7610 			}
7611 			if (af == AF_INET) {
7612 				ip = (void *)&u.sin.sin_addr;
7613 			} else {
7614 				if (IN6_IS_ADDR_LINKLOCAL(&u.sin6.sin6_addr)) {
7615 					/* remove scope ID */
7616 					u.sin6.sin6_addr.s6_addr16[1] = 0;
7617 				}
7618 				ip = (void *)&u.sin6.sin6_addr;
7619 			}
7620 			bridge_create_mac_nat_entry(sc, mac_nat_bif, af, ip,
7621 			    (uint8_t *)IF_LLADDR(ifp));
7622 			break;
7623 		default:
7624 			break;
7625 		}
7626 	}
7627 	ifnet_free_address_list(list);
7628 	return;
7629 }
7630 
7631 static void
7632 bridge_mac_nat_age_entries_common(struct bridge_softc *sc,
7633     struct mac_nat_entry_list *list, unsigned long now)
7634 {
7635 	struct mac_nat_entry *mne;
7636 	struct mac_nat_entry *tmne;
7637 
7638 	LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
7639 		if (now >= mne->mne_expire) {
7640 			bridge_destroy_mac_nat_entry(sc, mne, "aged out");
7641 		}
7642 	}
7643 }
7644 
7645 static void
7646 bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long now)
7647 {
7648 	if (sc->sc_mac_nat_bif == NULL) {
7649 		return;
7650 	}
7651 	bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list, now);
7652 	bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list_v6, now);
7653 }
7654 
7655 static const char *
7656 get_in_out_string(boolean_t is_output)
7657 {
7658 	return is_output ? "OUT" : "IN";
7659 }
7660 
7661 /*
7662  * is_valid_arp_packet:
7663  *	Verify that this is a valid ARP packet.
7664  *
7665  *	Returns TRUE if the packet is valid, FALSE otherwise.
7666  */
7667 static boolean_t
7668 is_valid_arp_packet(mbuf_t *data, boolean_t is_output,
7669     struct ether_header **eh_p, struct ether_arp **ea_p)
7670 {
7671 	struct ether_arp *ea;
7672 	struct ether_header *eh;
7673 	size_t minlen = sizeof(struct ether_header) + sizeof(struct ether_arp);
7674 	boolean_t is_valid = FALSE;
7675 	int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
7676 
7677 	if (mbuf_pkthdr_len(*data) < minlen) {
7678 		BRIDGE_LOG(LOG_DEBUG, flags,
7679 		    "ARP %s short frame %lu < %lu",
7680 		    get_in_out_string(is_output),
7681 		    mbuf_pkthdr_len(*data), minlen);
7682 		goto done;
7683 	}
7684 	if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
7685 		BRIDGE_LOG(LOG_DEBUG, flags,
7686 		    "ARP %s size %lu mbuf_pullup fail",
7687 		    get_in_out_string(is_output),
7688 		    minlen);
7689 		*data = NULL;
7690 		goto done;
7691 	}
7692 
7693 	/* validate ARP packet */
7694 	eh = mtod(*data, struct ether_header *);
7695 	ea = (struct ether_arp *)(eh + 1);
7696 	if (ntohs(ea->arp_hrd) != ARPHRD_ETHER) {
7697 		BRIDGE_LOG(LOG_DEBUG, flags,
7698 		    "ARP %s htype not ethernet",
7699 		    get_in_out_string(is_output));
7700 		goto done;
7701 	}
7702 	if (ea->arp_hln != ETHER_ADDR_LEN) {
7703 		BRIDGE_LOG(LOG_DEBUG, flags,
7704 		    "ARP %s hlen not ethernet",
7705 		    get_in_out_string(is_output));
7706 		goto done;
7707 	}
7708 	if (ntohs(ea->arp_pro) != ETHERTYPE_IP) {
7709 		BRIDGE_LOG(LOG_DEBUG, flags,
7710 		    "ARP %s ptype not IP",
7711 		    get_in_out_string(is_output));
7712 		goto done;
7713 	}
7714 	if (ea->arp_pln != sizeof(struct in_addr)) {
7715 		BRIDGE_LOG(LOG_DEBUG, flags,
7716 		    "ARP %s plen not IP",
7717 		    get_in_out_string(is_output));
7718 		goto done;
7719 	}
7720 	is_valid = TRUE;
7721 	*ea_p = ea;
7722 	*eh_p = eh;
7723 done:
7724 	return is_valid;
7725 }
7726 
7727 static struct mac_nat_entry *
7728 bridge_mac_nat_arp_input(struct bridge_softc *sc, mbuf_t *data)
7729 {
7730 	struct ether_arp        *ea;
7731 	struct ether_header     *eh;
7732 	struct mac_nat_entry    *mne = NULL;
7733 	u_short                 op;
7734 	struct in_addr          tpa;
7735 
7736 	if (!is_valid_arp_packet(data, FALSE, &eh, &ea)) {
7737 		goto done;
7738 	}
7739 	op = ntohs(ea->arp_op);
7740 	switch (op) {
7741 	case ARPOP_REQUEST:
7742 	case ARPOP_REPLY:
7743 		/* only care about REQUEST and REPLY */
7744 		break;
7745 	default:
7746 		goto done;
7747 	}
7748 
7749 	/* check the target IP address for a NAT entry */
7750 	bcopy(ea->arp_tpa, &tpa, sizeof(tpa));
7751 	if (tpa.s_addr != 0) {
7752 		mne = bridge_lookup_mac_nat_entry(sc, AF_INET, &tpa);
7753 	}
7754 	if (mne != NULL) {
7755 		if (op == ARPOP_REPLY) {
7756 			/* translate the MAC address */
7757 			if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7758 				char    mac_src[24];
7759 				char    mac_dst[24];
7760 
7761 				ether_ntop(mac_src, sizeof(mac_src),
7762 				    ea->arp_tha);
7763 				ether_ntop(mac_dst, sizeof(mac_dst),
7764 				    mne->mne_mac);
7765 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7766 				    "%s %s ARP %s -> %s",
7767 				    sc->sc_if_xname,
7768 				    mne->mne_bif->bif_ifp->if_xname,
7769 				    mac_src, mac_dst);
7770 			}
7771 			bcopy(mne->mne_mac, ea->arp_tha, sizeof(ea->arp_tha));
7772 		}
7773 	} else {
7774 		/* handle conflicting ARP (sender matches mne) */
7775 		struct in_addr spa;
7776 
7777 		bcopy(ea->arp_spa, &spa, sizeof(spa));
7778 		if (spa.s_addr != 0 && spa.s_addr != tpa.s_addr) {
7779 			/* check the source IP for a NAT entry */
7780 			mne = bridge_lookup_mac_nat_entry(sc, AF_INET, &spa);
7781 		}
7782 	}
7783 
7784 done:
7785 	return mne;
7786 }
7787 
7788 static boolean_t
7789 bridge_mac_nat_arp_output(struct bridge_softc *sc,
7790     struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
7791 {
7792 	struct ether_arp        *ea;
7793 	struct ether_header     *eh;
7794 	struct in_addr          ip;
7795 	struct mac_nat_entry    *mne = NULL;
7796 	u_short                 op;
7797 	boolean_t               translate = FALSE;
7798 
7799 	if (!is_valid_arp_packet(data, TRUE, &eh, &ea)) {
7800 		goto done;
7801 	}
7802 	op = ntohs(ea->arp_op);
7803 	switch (op) {
7804 	case ARPOP_REQUEST:
7805 	case ARPOP_REPLY:
7806 		/* only care about REQUEST and REPLY */
7807 		break;
7808 	default:
7809 		goto done;
7810 	}
7811 
7812 	bcopy(ea->arp_spa, &ip, sizeof(ip));
7813 	if (ip.s_addr == 0) {
7814 		goto done;
7815 	}
7816 	/* XXX validate IP address: no multicast/broadcast */
7817 	mne = bridge_update_mac_nat_entry(sc, bif, AF_INET, &ip, ea->arp_sha);
7818 	if (mnr != NULL && mne != NULL) {
7819 		/* record the offset to do the replacement */
7820 		translate = TRUE;
7821 		mnr->mnr_arp_offset = (char *)ea->arp_sha - (char *)eh;
7822 	}
7823 
7824 done:
7825 	return translate;
7826 }
7827 
7828 #define ETHER_IPV4_HEADER_LEN   (sizeof(struct ether_header) +  \
7829 	                         + sizeof(struct ip))
7830 static struct ether_header *
7831 get_ether_ip_header(mbuf_t *data, boolean_t is_output)
7832 {
7833 	struct ether_header     *eh = NULL;
7834 	int             flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
7835 	size_t          minlen = ETHER_IPV4_HEADER_LEN;
7836 
7837 	if (mbuf_pkthdr_len(*data) < minlen) {
7838 		BRIDGE_LOG(LOG_DEBUG, flags,
7839 		    "IP %s short frame %lu < %lu",
7840 		    get_in_out_string(is_output),
7841 		    mbuf_pkthdr_len(*data), minlen);
7842 		goto done;
7843 	}
7844 	if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
7845 		BRIDGE_LOG(LOG_DEBUG, flags,
7846 		    "IP %s size %lu mbuf_pullup fail",
7847 		    get_in_out_string(is_output),
7848 		    minlen);
7849 		*data = NULL;
7850 		goto done;
7851 	}
7852 	eh = mtod(*data, struct ether_header *);
7853 done:
7854 	return eh;
7855 }
7856 
7857 static boolean_t
7858 is_broadcast_ip_packet(mbuf_t *data)
7859 {
7860 	struct ether_header     *eh;
7861 	uint16_t                ether_type;
7862 	boolean_t               is_broadcast = FALSE;
7863 
7864 	eh = mtod(*data, struct ether_header *);
7865 	ether_type = ntohs(eh->ether_type);
7866 	switch (ether_type) {
7867 	case ETHERTYPE_IP:
7868 		eh = get_ether_ip_header(data, FALSE);
7869 		if (eh != NULL) {
7870 			struct in_addr  dst;
7871 			struct ip       *iphdr;
7872 
7873 			iphdr = (struct ip *)(void *)(eh + 1);
7874 			bcopy(&iphdr->ip_dst, &dst, sizeof(dst));
7875 			is_broadcast = (dst.s_addr == INADDR_BROADCAST);
7876 		}
7877 		break;
7878 	default:
7879 		break;
7880 	}
7881 	return is_broadcast;
7882 }
7883 
7884 static struct mac_nat_entry *
7885 bridge_mac_nat_ip_input(struct bridge_softc *sc, mbuf_t *data)
7886 {
7887 	struct in_addr          dst;
7888 	struct ether_header     *eh;
7889 	struct ip               *iphdr;
7890 	struct mac_nat_entry    *mne = NULL;
7891 
7892 	eh = get_ether_ip_header(data, FALSE);
7893 	if (eh == NULL) {
7894 		goto done;
7895 	}
7896 	iphdr = (struct ip *)(void *)(eh + 1);
7897 	bcopy(&iphdr->ip_dst, &dst, sizeof(dst));
7898 	/* XXX validate IP address */
7899 	if (dst.s_addr == 0) {
7900 		goto done;
7901 	}
7902 	mne = bridge_lookup_mac_nat_entry(sc, AF_INET, &dst);
7903 done:
7904 	return mne;
7905 }
7906 
7907 static void
7908 bridge_mac_nat_udp_output(struct bridge_softc *sc,
7909     struct bridge_iflist *bif, mbuf_t m,
7910     uint8_t ip_header_len, struct mac_nat_record *mnr)
7911 {
7912 	uint16_t        dp_flags;
7913 	errno_t         error;
7914 	size_t          offset;
7915 	struct udphdr   udphdr;
7916 
7917 	/* copy the UDP header */
7918 	offset = sizeof(struct ether_header) + ip_header_len;
7919 	error = mbuf_copydata(m, offset, sizeof(struct udphdr), &udphdr);
7920 	if (error != 0) {
7921 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7922 		    "mbuf_copydata udphdr failed %d",
7923 		    error);
7924 		return;
7925 	}
7926 	if (ntohs(udphdr.uh_sport) != IPPORT_BOOTPC ||
7927 	    ntohs(udphdr.uh_dport) != IPPORT_BOOTPS) {
7928 		/* not a BOOTP/DHCP packet */
7929 		return;
7930 	}
7931 	/* check whether the broadcast bit is already set */
7932 	offset += sizeof(struct udphdr) + offsetof(struct dhcp, dp_flags);
7933 	error = mbuf_copydata(m, offset, sizeof(dp_flags), &dp_flags);
7934 	if (error != 0) {
7935 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7936 		    "mbuf_copydata dp_flags failed %d",
7937 		    error);
7938 		return;
7939 	}
7940 	if ((ntohs(dp_flags) & DHCP_FLAGS_BROADCAST) != 0) {
7941 		/* it's already set, nothing to do */
7942 		return;
7943 	}
7944 	/* broadcast bit needs to be set */
7945 	mnr->mnr_ip_dhcp_flags = dp_flags | htons(DHCP_FLAGS_BROADCAST);
7946 	mnr->mnr_ip_header_len = ip_header_len;
7947 	if (udphdr.uh_sum != 0) {
7948 		uint16_t        delta;
7949 
7950 		/* adjust checksum to take modified dp_flags into account */
7951 		delta = dp_flags - mnr->mnr_ip_dhcp_flags;
7952 		mnr->mnr_ip_udp_csum = udphdr.uh_sum + delta;
7953 	}
7954 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7955 	    "%s %s DHCP dp_flags 0x%x UDP cksum 0x%x",
7956 	    sc->sc_if_xname,
7957 	    bif->bif_ifp->if_xname,
7958 	    ntohs(mnr->mnr_ip_dhcp_flags),
7959 	    ntohs(mnr->mnr_ip_udp_csum));
7960 	return;
7961 }
7962 
7963 static boolean_t
7964 bridge_mac_nat_ip_output(struct bridge_softc *sc,
7965     struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
7966 {
7967 #pragma unused(mnr)
7968 	struct ether_header     *eh;
7969 	struct in_addr          ip;
7970 	struct ip               *iphdr;
7971 	uint8_t                 ip_header_len;
7972 	struct mac_nat_entry    *mne = NULL;
7973 	boolean_t               translate = FALSE;
7974 
7975 	eh = get_ether_ip_header(data, TRUE);
7976 	if (eh == NULL) {
7977 		goto done;
7978 	}
7979 	iphdr = (struct ip *)(void *)(eh + 1);
7980 	ip_header_len = IP_VHL_HL(iphdr->ip_vhl) << 2;
7981 	if (ip_header_len < sizeof(ip)) {
7982 		/* bogus IP header */
7983 		goto done;
7984 	}
7985 	bcopy(&iphdr->ip_src, &ip, sizeof(ip));
7986 	/* XXX validate the source address */
7987 	if (ip.s_addr != 0) {
7988 		mne = bridge_update_mac_nat_entry(sc, bif, AF_INET, &ip,
7989 		    eh->ether_shost);
7990 	}
7991 	if (mnr != NULL) {
7992 		if (iphdr->ip_p == IPPROTO_UDP) {
7993 			/* handle DHCP must broadcast */
7994 			bridge_mac_nat_udp_output(sc, bif, *data,
7995 			    ip_header_len, mnr);
7996 		}
7997 		translate = TRUE;
7998 	}
7999 done:
8000 	return translate;
8001 }
8002 
8003 #define ETHER_IPV6_HEADER_LEN   (sizeof(struct ether_header) +  \
8004 	                         + sizeof(struct ip6_hdr))
8005 static struct ether_header *
8006 get_ether_ipv6_header(mbuf_t *data, boolean_t is_output)
8007 {
8008 	struct ether_header     *eh = NULL;
8009 	int             flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
8010 	size_t          minlen = ETHER_IPV6_HEADER_LEN;
8011 
8012 	if (mbuf_pkthdr_len(*data) < minlen) {
8013 		BRIDGE_LOG(LOG_DEBUG, flags,
8014 		    "IP %s short frame %lu < %lu",
8015 		    get_in_out_string(is_output),
8016 		    mbuf_pkthdr_len(*data), minlen);
8017 		goto done;
8018 	}
8019 	if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
8020 		BRIDGE_LOG(LOG_DEBUG, flags,
8021 		    "IP %s size %lu mbuf_pullup fail",
8022 		    get_in_out_string(is_output),
8023 		    minlen);
8024 		*data = NULL;
8025 		goto done;
8026 	}
8027 	eh = mtod(*data, struct ether_header *);
8028 done:
8029 	return eh;
8030 }
8031 
8032 #include <netinet/icmp6.h>
8033 #include <netinet6/nd6.h>
8034 
8035 #define ETHER_ND_LLADDR_LEN     (ETHER_ADDR_LEN + sizeof(struct nd_opt_hdr))
8036 
8037 static void
8038 bridge_mac_nat_icmpv6_output(struct bridge_softc *sc, struct bridge_iflist *bif,
8039     mbuf_t *data, struct ether_header *eh,
8040     struct ip6_hdr *ip6h, struct in6_addr *saddrp, struct mac_nat_record *mnr)
8041 {
8042 	struct icmp6_hdr *icmp6;
8043 	unsigned int    icmp6len;
8044 	int             lladdrlen = 0;
8045 	char            *lladdr = NULL;
8046 	mbuf_t          m = *data;
8047 	unsigned int    off = sizeof(*ip6h);
8048 
8049 	icmp6len = m->m_pkthdr.len - sizeof(*eh) - off;
8050 	if (icmp6len < sizeof(*icmp6)) {
8051 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8052 		    "short packet %d < %lu",
8053 		    icmp6len, sizeof(*icmp6));
8054 		return;
8055 	}
8056 	icmp6 = (struct icmp6_hdr *)((caddr_t)ip6h + off);
8057 	switch (icmp6->icmp6_type) {
8058 	case ND_NEIGHBOR_SOLICIT: {
8059 		struct nd_neighbor_solicit *nd_ns;
8060 		union nd_opts ndopts;
8061 		boolean_t is_dad_probe;
8062 		struct in6_addr taddr;
8063 
8064 		if (icmp6len < sizeof(*nd_ns)) {
8065 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8066 			    "short nd_ns %d < %lu",
8067 			    icmp6len, sizeof(*nd_ns));
8068 			return;
8069 		}
8070 
8071 		nd_ns = (struct nd_neighbor_solicit *)(void *)icmp6;
8072 		bcopy(&nd_ns->nd_ns_target, &taddr, sizeof(taddr));
8073 		if (IN6_IS_ADDR_MULTICAST(&taddr) ||
8074 		    IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
8075 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8076 			    "invalid target ignored");
8077 			return;
8078 		}
8079 		/* parse options */
8080 		nd6_option_init(nd_ns + 1, icmp6len - sizeof(*nd_ns), &ndopts);
8081 		if (nd6_options(&ndopts) < 0) {
8082 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8083 			    "invalid ND6 NS option");
8084 			return;
8085 		}
8086 		if (ndopts.nd_opts_src_lladdr != NULL) {
8087 			lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
8088 			lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
8089 		}
8090 		is_dad_probe = IN6_IS_ADDR_UNSPECIFIED(saddrp);
8091 		if (lladdr != NULL) {
8092 			if (is_dad_probe) {
8093 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8094 				    "bad ND6 DAD packet");
8095 				return;
8096 			}
8097 			if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8098 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8099 				    "source lladdrlen %d != %lu",
8100 				    lladdrlen, ETHER_ND_LLADDR_LEN);
8101 				return;
8102 			}
8103 			mnr->mnr_ip6_lladdr_offset = (uint16_t)((uintptr_t)lladdr -
8104 			    (uintptr_t)eh);
8105 			mnr->mnr_ip6_icmp6_len = icmp6len;
8106 			mnr->mnr_ip6_icmp6_type = icmp6->icmp6_type;
8107 			mnr->mnr_ip6_header_len = off;
8108 		}
8109 		if (is_dad_probe) {
8110 			/* node is trying use taddr, create an mne using taddr */
8111 			*saddrp = taddr;
8112 		}
8113 		break;
8114 	}
8115 	case ND_NEIGHBOR_ADVERT: {
8116 		struct nd_neighbor_advert *nd_na;
8117 		union nd_opts ndopts;
8118 		struct in6_addr taddr;
8119 
8120 
8121 		nd_na = (struct nd_neighbor_advert *)(void *)icmp6;
8122 
8123 		if (icmp6len < sizeof(*nd_na)) {
8124 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8125 			    "short nd_na %d < %lu",
8126 			    icmp6len, sizeof(*nd_na));
8127 			return;
8128 		}
8129 
8130 		bcopy(&nd_na->nd_na_target, &taddr, sizeof(taddr));
8131 		if (IN6_IS_ADDR_MULTICAST(&taddr) ||
8132 		    IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
8133 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8134 			    "invalid target ignored");
8135 			return;
8136 		}
8137 		/* parse options */
8138 		nd6_option_init(nd_na + 1, icmp6len - sizeof(*nd_na), &ndopts);
8139 		if (nd6_options(&ndopts) < 0) {
8140 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8141 			    "invalid ND6 NA option");
8142 			return;
8143 		}
8144 		if (ndopts.nd_opts_tgt_lladdr == NULL) {
8145 			/* target linklayer, nothing to do */
8146 			return;
8147 		}
8148 		lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1);
8149 		lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
8150 		if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8151 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8152 			    "target lladdrlen %d != %lu",
8153 			    lladdrlen, ETHER_ND_LLADDR_LEN);
8154 			return;
8155 		}
8156 		mnr->mnr_ip6_lladdr_offset = (uint16_t)((uintptr_t)lladdr - (uintptr_t)eh);
8157 		mnr->mnr_ip6_icmp6_len = icmp6len;
8158 		mnr->mnr_ip6_header_len = off;
8159 		mnr->mnr_ip6_icmp6_type = icmp6->icmp6_type;
8160 		break;
8161 	}
8162 	case ND_ROUTER_SOLICIT: {
8163 		struct nd_router_solicit *nd_rs;
8164 		union nd_opts ndopts;
8165 
8166 		if (icmp6len < sizeof(*nd_rs)) {
8167 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8168 			    "short nd_rs %d < %lu",
8169 			    icmp6len, sizeof(*nd_rs));
8170 			return;
8171 		}
8172 		nd_rs = (struct nd_router_solicit *)(void *)icmp6;
8173 
8174 		/* parse options */
8175 		nd6_option_init(nd_rs + 1, icmp6len - sizeof(*nd_rs), &ndopts);
8176 		if (nd6_options(&ndopts) < 0) {
8177 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8178 			    "invalid ND6 RS option");
8179 			return;
8180 		}
8181 		if (ndopts.nd_opts_src_lladdr != NULL) {
8182 			lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
8183 			lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
8184 		}
8185 		if (lladdr != NULL) {
8186 			if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8187 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8188 				    "source lladdrlen %d != %lu",
8189 				    lladdrlen, ETHER_ND_LLADDR_LEN);
8190 				return;
8191 			}
8192 			mnr->mnr_ip6_lladdr_offset = (uint16_t)((uintptr_t)lladdr -
8193 			    (uintptr_t)eh);
8194 			mnr->mnr_ip6_icmp6_len = icmp6len;
8195 			mnr->mnr_ip6_icmp6_type = icmp6->icmp6_type;
8196 			mnr->mnr_ip6_header_len = off;
8197 		}
8198 		break;
8199 	}
8200 	default:
8201 		break;
8202 	}
8203 	if (mnr->mnr_ip6_lladdr_offset != 0 &&
8204 	    BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
8205 		const char *str;
8206 
8207 		switch (mnr->mnr_ip6_icmp6_type) {
8208 		case ND_ROUTER_SOLICIT:
8209 			str = "ROUTER SOLICIT";
8210 			break;
8211 		case ND_NEIGHBOR_ADVERT:
8212 			str = "NEIGHBOR ADVERT";
8213 			break;
8214 		case ND_NEIGHBOR_SOLICIT:
8215 			str = "NEIGHBOR SOLICIT";
8216 			break;
8217 		default:
8218 			str = "";
8219 			break;
8220 		}
8221 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8222 		    "%s %s %s ip6len %d icmp6len %d lladdr offset %d",
8223 		    sc->sc_if_xname, bif->bif_ifp->if_xname, str,
8224 		    mnr->mnr_ip6_header_len,
8225 		    mnr->mnr_ip6_icmp6_len, mnr->mnr_ip6_lladdr_offset);
8226 	}
8227 }
8228 
8229 static struct mac_nat_entry *
8230 bridge_mac_nat_ipv6_input(struct bridge_softc *sc, mbuf_t *data)
8231 {
8232 	struct in6_addr         dst;
8233 	struct ether_header     *eh;
8234 	struct ip6_hdr          *ip6h;
8235 	struct mac_nat_entry    *mne = NULL;
8236 
8237 	eh = get_ether_ipv6_header(data, FALSE);
8238 	if (eh == NULL) {
8239 		goto done;
8240 	}
8241 	ip6h = (struct ip6_hdr *)(void *)(eh + 1);
8242 	bcopy(&ip6h->ip6_dst, &dst, sizeof(dst));
8243 	/* XXX validate IPv6 address */
8244 	if (IN6_IS_ADDR_UNSPECIFIED(&dst)) {
8245 		goto done;
8246 	}
8247 	mne = bridge_lookup_mac_nat_entry(sc, AF_INET6, &dst);
8248 
8249 done:
8250 	return mne;
8251 }
8252 
8253 static boolean_t
8254 bridge_mac_nat_ipv6_output(struct bridge_softc *sc,
8255     struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8256 {
8257 	struct ether_header     *eh;
8258 	struct ip6_hdr          *ip6h;
8259 	struct in6_addr         saddr;
8260 	boolean_t               translate;
8261 
8262 	translate = (bif == sc->sc_mac_nat_bif) ? FALSE : TRUE;
8263 	eh = get_ether_ipv6_header(data, TRUE);
8264 	if (eh == NULL) {
8265 		translate = FALSE;
8266 		goto done;
8267 	}
8268 	ip6h = (struct ip6_hdr *)(void *)(eh + 1);
8269 	bcopy(&ip6h->ip6_src, &saddr, sizeof(saddr));
8270 	if (mnr != NULL && ip6h->ip6_nxt == IPPROTO_ICMPV6) {
8271 		bridge_mac_nat_icmpv6_output(sc, bif, data,
8272 		    eh, ip6h, &saddr, mnr);
8273 	}
8274 	if (IN6_IS_ADDR_UNSPECIFIED(&saddr)) {
8275 		goto done;
8276 	}
8277 	(void)bridge_update_mac_nat_entry(sc, bif, AF_INET6, &saddr,
8278 	    eh->ether_shost);
8279 
8280 done:
8281 	return translate;
8282 }
8283 
8284 /*
8285  * bridge_mac_nat_input:
8286  * Process a packet arriving on the MAC NAT interface (sc_mac_nat_bif).
8287  * This interface is the "external" interface with respect to NAT.
8288  * The interface is only capable of receiving a single MAC address
8289  * (e.g. a Wi-Fi STA interface).
8290  *
8291  * When a packet arrives on the external interface, look up the destination
8292  * IP address in the mac_nat_entry table. If there is a match, *is_input
8293  * is set to TRUE if it's for the MAC NAT interface, otherwise *is_input
8294  * is set to FALSE and translate the MAC address if necessary.
8295  *
8296  * Returns:
8297  * The internal interface to direct the packet to, or NULL if the packet
8298  * should not be redirected.
8299  *
8300  * *data may be updated to point at a different mbuf chain, or set to NULL
8301  * if the chain was deallocated during processing.
8302  */
8303 static ifnet_t
8304 bridge_mac_nat_input(struct bridge_softc *sc, mbuf_t *data,
8305     boolean_t *is_input)
8306 {
8307 	ifnet_t                 dst_if = NULL;
8308 	struct ether_header     *eh;
8309 	uint16_t                ether_type;
8310 	boolean_t               is_unicast;
8311 	mbuf_t                  m = *data;
8312 	struct mac_nat_entry    *mne = NULL;
8313 
8314 	BRIDGE_LOCK_ASSERT_HELD(sc);
8315 	*is_input = FALSE;
8316 	assert(sc->sc_mac_nat_bif != NULL);
8317 	is_unicast = ((m->m_flags & (M_BCAST | M_MCAST)) == 0);
8318 	eh = mtod(m, struct ether_header *);
8319 	ether_type = ntohs(eh->ether_type);
8320 	switch (ether_type) {
8321 	case ETHERTYPE_ARP:
8322 		mne = bridge_mac_nat_arp_input(sc, data);
8323 		break;
8324 	case ETHERTYPE_IP:
8325 		if (is_unicast) {
8326 			mne = bridge_mac_nat_ip_input(sc, data);
8327 		}
8328 		break;
8329 	case ETHERTYPE_IPV6:
8330 		if (is_unicast) {
8331 			mne = bridge_mac_nat_ipv6_input(sc, data);
8332 		}
8333 		break;
8334 	default:
8335 		break;
8336 	}
8337 	if (mne != NULL) {
8338 		if (is_unicast) {
8339 			if (m != *data) {
8340 				/* it may have changed */
8341 				eh = mtod(*data, struct ether_header *);
8342 			}
8343 			bcopy(mne->mne_mac, eh->ether_dhost,
8344 			    sizeof(eh->ether_dhost));
8345 		}
8346 		dst_if = mne->mne_bif->bif_ifp;
8347 		*is_input = (mne->mne_bif == sc->sc_mac_nat_bif);
8348 	}
8349 	return dst_if;
8350 }
8351 
8352 /*
8353  * bridge_mac_nat_output:
8354  * Process a packet destined to the MAC NAT interface (sc_mac_nat_bif)
8355  * from the interface 'bif'.
8356  *
8357  * Create a mac_nat_entry containing the source IP address and MAC address
8358  * from the packet. Populate a mac_nat_record with information detailing
8359  * how to translate the packet. Translation takes place later when
8360  * the bridge lock is no longer held.
8361  *
8362  * If 'bif' == sc_mac_nat_bif, the stack over the MAC NAT
8363  * interface is generating an output packet. No translation is required in this
8364  * case, we just record the IP address used to prevent another bif from
8365  * claiming our IP address.
8366  *
8367  * Returns:
8368  * TRUE if the packet should be translated (*mnr updated as well),
8369  * FALSE otherwise.
8370  *
8371  * *data may be updated to point at a different mbuf chain or NULL if
8372  * the chain was deallocated during processing.
8373  */
8374 
8375 static boolean_t
8376 bridge_mac_nat_output(struct bridge_softc *sc,
8377     struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8378 {
8379 	struct ether_header     *eh;
8380 	uint16_t                ether_type;
8381 	boolean_t               translate = FALSE;
8382 
8383 	BRIDGE_LOCK_ASSERT_HELD(sc);
8384 	assert(sc->sc_mac_nat_bif != NULL);
8385 
8386 	eh = mtod(*data, struct ether_header *);
8387 	ether_type = ntohs(eh->ether_type);
8388 	if (mnr != NULL) {
8389 		bzero(mnr, sizeof(*mnr));
8390 		mnr->mnr_ether_type = ether_type;
8391 	}
8392 	switch (ether_type) {
8393 	case ETHERTYPE_ARP:
8394 		translate = bridge_mac_nat_arp_output(sc, bif, data, mnr);
8395 		break;
8396 	case ETHERTYPE_IP:
8397 		translate = bridge_mac_nat_ip_output(sc, bif, data, mnr);
8398 		break;
8399 	case ETHERTYPE_IPV6:
8400 		translate = bridge_mac_nat_ipv6_output(sc, bif, data, mnr);
8401 		break;
8402 	default:
8403 		break;
8404 	}
8405 	return translate;
8406 }
8407 
8408 static void
8409 bridge_mac_nat_arp_translate(mbuf_t *data, struct mac_nat_record *mnr,
8410     const caddr_t eaddr)
8411 {
8412 	errno_t                 error;
8413 
8414 	if (mnr->mnr_arp_offset == 0) {
8415 		return;
8416 	}
8417 	/* replace the source hardware address */
8418 	error = mbuf_copyback(*data, mnr->mnr_arp_offset,
8419 	    ETHER_ADDR_LEN, eaddr,
8420 	    MBUF_DONTWAIT);
8421 	if (error != 0) {
8422 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8423 		    "mbuf_copyback failed");
8424 		m_freem(*data);
8425 		*data = NULL;
8426 	}
8427 	return;
8428 }
8429 
8430 static void
8431 bridge_mac_nat_ip_translate(mbuf_t *data, struct mac_nat_record *mnr)
8432 {
8433 	errno_t         error;
8434 	size_t          offset;
8435 
8436 	if (mnr->mnr_ip_header_len == 0) {
8437 		return;
8438 	}
8439 	/* update the UDP checksum */
8440 	offset = sizeof(struct ether_header) + mnr->mnr_ip_header_len;
8441 	error = mbuf_copyback(*data, offset + offsetof(struct udphdr, uh_sum),
8442 	    sizeof(mnr->mnr_ip_udp_csum),
8443 	    &mnr->mnr_ip_udp_csum,
8444 	    MBUF_DONTWAIT);
8445 	if (error != 0) {
8446 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8447 		    "mbuf_copyback uh_sum failed");
8448 		m_freem(*data);
8449 		*data = NULL;
8450 	}
8451 	/* update the DHCP must broadcast flag */
8452 	offset += sizeof(struct udphdr);
8453 	error = mbuf_copyback(*data, offset + offsetof(struct dhcp, dp_flags),
8454 	    sizeof(mnr->mnr_ip_dhcp_flags),
8455 	    &mnr->mnr_ip_dhcp_flags,
8456 	    MBUF_DONTWAIT);
8457 	if (error != 0) {
8458 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8459 		    "mbuf_copyback dp_flags failed");
8460 		m_freem(*data);
8461 		*data = NULL;
8462 	}
8463 }
8464 
8465 static void
8466 bridge_mac_nat_ipv6_translate(mbuf_t *data, struct mac_nat_record *mnr,
8467     const caddr_t eaddr)
8468 {
8469 	uint16_t        cksum;
8470 	errno_t         error;
8471 	mbuf_t          m = *data;
8472 
8473 	if (mnr->mnr_ip6_header_len == 0) {
8474 		return;
8475 	}
8476 	switch (mnr->mnr_ip6_icmp6_type) {
8477 	case ND_ROUTER_SOLICIT:
8478 	case ND_NEIGHBOR_SOLICIT:
8479 	case ND_NEIGHBOR_ADVERT:
8480 		if (mnr->mnr_ip6_lladdr_offset == 0) {
8481 			/* nothing to do */
8482 			return;
8483 		}
8484 		break;
8485 	default:
8486 		return;
8487 	}
8488 
8489 	/*
8490 	 * replace the lladdr
8491 	 */
8492 	error = mbuf_copyback(m, mnr->mnr_ip6_lladdr_offset,
8493 	    ETHER_ADDR_LEN, eaddr,
8494 	    MBUF_DONTWAIT);
8495 	if (error != 0) {
8496 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8497 		    "mbuf_copyback lladdr failed");
8498 		m_freem(m);
8499 		*data = NULL;
8500 		return;
8501 	}
8502 
8503 	/*
8504 	 * recompute the icmp6 checksum
8505 	 */
8506 
8507 	/* skip past the ethernet header */
8508 	mbuf_setdata(m, (char *)mbuf_data(m) + ETHER_HDR_LEN,
8509 	    mbuf_len(m) - ETHER_HDR_LEN);
8510 	mbuf_pkthdr_adjustlen(m, -ETHER_HDR_LEN);
8511 
8512 #define CKSUM_OFFSET_ICMP6      offsetof(struct icmp6_hdr, icmp6_cksum)
8513 	/* set the checksum to zero */
8514 	cksum = 0;
8515 	error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
8516 	    sizeof(cksum), &cksum, MBUF_DONTWAIT);
8517 	if (error != 0) {
8518 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8519 		    "mbuf_copyback cksum=0 failed");
8520 		m_freem(m);
8521 		*data = NULL;
8522 		return;
8523 	}
8524 	/* compute and set the new checksum */
8525 	cksum = in6_cksum(m, IPPROTO_ICMPV6, mnr->mnr_ip6_header_len,
8526 	    mnr->mnr_ip6_icmp6_len);
8527 	error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
8528 	    sizeof(cksum), &cksum, MBUF_DONTWAIT);
8529 	if (error != 0) {
8530 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8531 		    "mbuf_copyback cksum failed");
8532 		m_freem(m);
8533 		*data = NULL;
8534 		return;
8535 	}
8536 	/* restore the ethernet header */
8537 	mbuf_setdata(m, (char *)mbuf_data(m) - ETHER_HDR_LEN,
8538 	    mbuf_len(m) + ETHER_HDR_LEN);
8539 	mbuf_pkthdr_adjustlen(m, ETHER_HDR_LEN);
8540 	return;
8541 }
8542 
8543 static void
8544 bridge_mac_nat_translate(mbuf_t *data, struct mac_nat_record *mnr,
8545     const caddr_t eaddr)
8546 {
8547 	struct ether_header     *eh;
8548 
8549 	/* replace the source ethernet address with the single MAC */
8550 	eh = mtod(*data, struct ether_header *);
8551 	bcopy(eaddr, eh->ether_shost, sizeof(eh->ether_shost));
8552 	switch (mnr->mnr_ether_type) {
8553 	case ETHERTYPE_ARP:
8554 		bridge_mac_nat_arp_translate(data, mnr, eaddr);
8555 		break;
8556 
8557 	case ETHERTYPE_IP:
8558 		bridge_mac_nat_ip_translate(data, mnr);
8559 		break;
8560 
8561 	case ETHERTYPE_IPV6:
8562 		bridge_mac_nat_ipv6_translate(data, mnr, eaddr);
8563 		break;
8564 
8565 	default:
8566 		break;
8567 	}
8568 	return;
8569 }
8570 
8571 /*
8572  * bridge packet filtering
8573  */
8574 
8575 /*
8576  * Perform basic checks on header size since
8577  * pfil assumes ip_input has already processed
8578  * it for it.  Cut-and-pasted from ip_input.c.
8579  * Given how simple the IPv6 version is,
8580  * does the IPv4 version really need to be
8581  * this complicated?
8582  *
8583  * XXX Should we update ipstat here, or not?
8584  * XXX Right now we update ipstat but not
8585  * XXX csum_counter.
8586  */
8587 static int
8588 bridge_ip_checkbasic(struct mbuf **mp)
8589 {
8590 	struct mbuf *m = *mp;
8591 	struct ip *ip;
8592 	int len, hlen;
8593 	u_short sum;
8594 
8595 	if (*mp == NULL) {
8596 		return -1;
8597 	}
8598 
8599 	if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
8600 		/* max_linkhdr is already rounded up to nearest 4-byte */
8601 		if ((m = m_copyup(m, sizeof(struct ip),
8602 		    max_linkhdr)) == NULL) {
8603 			/* XXXJRT new stat, please */
8604 			ipstat.ips_toosmall++;
8605 			goto bad;
8606 		}
8607 	} else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip), 0)) {
8608 		if ((m = m_pullup(m, sizeof(struct ip))) == NULL) {
8609 			ipstat.ips_toosmall++;
8610 			goto bad;
8611 		}
8612 	}
8613 	ip = mtod(m, struct ip *);
8614 	if (ip == NULL) {
8615 		goto bad;
8616 	}
8617 
8618 	if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
8619 		ipstat.ips_badvers++;
8620 		goto bad;
8621 	}
8622 	hlen = IP_VHL_HL(ip->ip_vhl) << 2;
8623 	if (hlen < (int)sizeof(struct ip)) {  /* minimum header length */
8624 		ipstat.ips_badhlen++;
8625 		goto bad;
8626 	}
8627 	if (hlen > m->m_len) {
8628 		if ((m = m_pullup(m, hlen)) == 0) {
8629 			ipstat.ips_badhlen++;
8630 			goto bad;
8631 		}
8632 		ip = mtod(m, struct ip *);
8633 		if (ip == NULL) {
8634 			goto bad;
8635 		}
8636 	}
8637 
8638 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
8639 		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
8640 	} else {
8641 		if (hlen == sizeof(struct ip)) {
8642 			sum = in_cksum_hdr(ip);
8643 		} else {
8644 			sum = in_cksum(m, hlen);
8645 		}
8646 	}
8647 	if (sum) {
8648 		ipstat.ips_badsum++;
8649 		goto bad;
8650 	}
8651 
8652 	/* Retrieve the packet length. */
8653 	len = ntohs(ip->ip_len);
8654 
8655 	/*
8656 	 * Check for additional length bogosity
8657 	 */
8658 	if (len < hlen) {
8659 		ipstat.ips_badlen++;
8660 		goto bad;
8661 	}
8662 
8663 	/*
8664 	 * Check that the amount of data in the buffers
8665 	 * is as at least much as the IP header would have us expect.
8666 	 * Drop packet if shorter than we expect.
8667 	 */
8668 	if (m->m_pkthdr.len < len) {
8669 		ipstat.ips_tooshort++;
8670 		goto bad;
8671 	}
8672 
8673 	/* Checks out, proceed */
8674 	*mp = m;
8675 	return 0;
8676 
8677 bad:
8678 	*mp = m;
8679 	return -1;
8680 }
8681 
8682 /*
8683  * Same as above, but for IPv6.
8684  * Cut-and-pasted from ip6_input.c.
8685  * XXX Should we update ip6stat, or not?
8686  */
8687 static int
8688 bridge_ip6_checkbasic(struct mbuf **mp)
8689 {
8690 	struct mbuf *m = *mp;
8691 	struct ip6_hdr *ip6;
8692 
8693 	/*
8694 	 * If the IPv6 header is not aligned, slurp it up into a new
8695 	 * mbuf with space for link headers, in the event we forward
8696 	 * it.  Otherwise, if it is aligned, make sure the entire base
8697 	 * IPv6 header is in the first mbuf of the chain.
8698 	 */
8699 	if (IP6_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
8700 		struct ifnet *inifp = m->m_pkthdr.rcvif;
8701 		/* max_linkhdr is already rounded up to nearest 4-byte */
8702 		if ((m = m_copyup(m, sizeof(struct ip6_hdr),
8703 		    max_linkhdr)) == NULL) {
8704 			/* XXXJRT new stat, please */
8705 			ip6stat.ip6s_toosmall++;
8706 			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
8707 			goto bad;
8708 		}
8709 	} else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip6_hdr), 0)) {
8710 		struct ifnet *inifp = m->m_pkthdr.rcvif;
8711 		if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
8712 			ip6stat.ip6s_toosmall++;
8713 			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
8714 			goto bad;
8715 		}
8716 	}
8717 
8718 	ip6 = mtod(m, struct ip6_hdr *);
8719 
8720 	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
8721 		ip6stat.ip6s_badvers++;
8722 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
8723 		goto bad;
8724 	}
8725 
8726 	/* Checks out, proceed */
8727 	*mp = m;
8728 	return 0;
8729 
8730 bad:
8731 	*mp = m;
8732 	return -1;
8733 }
8734 
8735 /*
8736  * the PF routines expect to be called from ip_input, so we
8737  * need to do and undo here some of the same processing.
8738  *
8739  * XXX : this is heavily inspired on bridge_pfil()
8740  */
8741 static int
8742 bridge_pf(struct mbuf **mp, struct ifnet *ifp, uint32_t sc_filter_flags,
8743     int input)
8744 {
8745 	/*
8746 	 * XXX : mpetit : heavily inspired by bridge_pfil()
8747 	 */
8748 
8749 	int snap, error, i, hlen;
8750 	struct ether_header *eh1, eh2;
8751 	struct ip *ip;
8752 	struct llc llc1;
8753 	u_int16_t ether_type;
8754 
8755 	snap = 0;
8756 	error = -1;     /* Default error if not error == 0 */
8757 
8758 	if ((sc_filter_flags & IFBF_FILT_MEMBER) == 0) {
8759 		return 0; /* filtering is disabled */
8760 	}
8761 	i = min((*mp)->m_pkthdr.len, max_protohdr);
8762 	if ((*mp)->m_len < i) {
8763 		*mp = m_pullup(*mp, i);
8764 		if (*mp == NULL) {
8765 			BRIDGE_LOG(LOG_NOTICE, 0, "m_pullup failed");
8766 			return -1;
8767 		}
8768 	}
8769 
8770 	eh1 = mtod(*mp, struct ether_header *);
8771 	ether_type = ntohs(eh1->ether_type);
8772 
8773 	/*
8774 	 * Check for SNAP/LLC.
8775 	 */
8776 	if (ether_type < ETHERMTU) {
8777 		struct llc *llc2 = (struct llc *)(eh1 + 1);
8778 
8779 		if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
8780 		    llc2->llc_dsap == LLC_SNAP_LSAP &&
8781 		    llc2->llc_ssap == LLC_SNAP_LSAP &&
8782 		    llc2->llc_control == LLC_UI) {
8783 			ether_type = htons(llc2->llc_un.type_snap.ether_type);
8784 			snap = 1;
8785 		}
8786 	}
8787 
8788 	/*
8789 	 * If we're trying to filter bridge traffic, don't look at anything
8790 	 * other than IP and ARP traffic.  If the filter doesn't understand
8791 	 * IPv6, don't allow IPv6 through the bridge either.  This is lame
8792 	 * since if we really wanted, say, an AppleTalk filter, we are hosed,
8793 	 * but of course we don't have an AppleTalk filter to begin with.
8794 	 * (Note that since pfil doesn't understand ARP it will pass *ALL*
8795 	 * ARP traffic.)
8796 	 */
8797 	switch (ether_type) {
8798 	case ETHERTYPE_ARP:
8799 	case ETHERTYPE_REVARP:
8800 		return 0;         /* Automatically pass */
8801 
8802 	case ETHERTYPE_IP:
8803 	case ETHERTYPE_IPV6:
8804 		break;
8805 	default:
8806 		/*
8807 		 * Check to see if the user wants to pass non-ip
8808 		 * packets, these will not be checked by pf and
8809 		 * passed unconditionally so the default is to drop.
8810 		 */
8811 		if ((sc_filter_flags & IFBF_FILT_ONLYIP)) {
8812 			goto bad;
8813 		}
8814 		break;
8815 	}
8816 
8817 	/* Strip off the Ethernet header and keep a copy. */
8818 	m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t)&eh2);
8819 	m_adj(*mp, ETHER_HDR_LEN);
8820 
8821 	/* Strip off snap header, if present */
8822 	if (snap) {
8823 		m_copydata(*mp, 0, sizeof(struct llc), (caddr_t)&llc1);
8824 		m_adj(*mp, sizeof(struct llc));
8825 	}
8826 
8827 	/*
8828 	 * Check the IP header for alignment and errors
8829 	 */
8830 	switch (ether_type) {
8831 	case ETHERTYPE_IP:
8832 		error = bridge_ip_checkbasic(mp);
8833 		break;
8834 	case ETHERTYPE_IPV6:
8835 		error = bridge_ip6_checkbasic(mp);
8836 		break;
8837 	default:
8838 		error = 0;
8839 		break;
8840 	}
8841 	if (error) {
8842 		goto bad;
8843 	}
8844 
8845 	error = 0;
8846 
8847 	/*
8848 	 * Run the packet through pf rules
8849 	 */
8850 	switch (ether_type) {
8851 	case ETHERTYPE_IP:
8852 		/*
8853 		 * before calling the firewall, swap fields the same as
8854 		 * IP does. here we assume the header is contiguous
8855 		 */
8856 		ip = mtod(*mp, struct ip *);
8857 
8858 		ip->ip_len = ntohs(ip->ip_len);
8859 		ip->ip_off = ntohs(ip->ip_off);
8860 
8861 		if (ifp != NULL) {
8862 			error = pf_af_hook(ifp, 0, mp, AF_INET, input, NULL);
8863 		}
8864 
8865 		if (*mp == NULL || error != 0) { /* filter may consume */
8866 			break;
8867 		}
8868 
8869 		/* Recalculate the ip checksum and restore byte ordering */
8870 		ip = mtod(*mp, struct ip *);
8871 		hlen = IP_VHL_HL(ip->ip_vhl) << 2;
8872 		if (hlen < (int)sizeof(struct ip)) {
8873 			goto bad;
8874 		}
8875 		if (hlen > (*mp)->m_len) {
8876 			if ((*mp = m_pullup(*mp, hlen)) == 0) {
8877 				goto bad;
8878 			}
8879 			ip = mtod(*mp, struct ip *);
8880 			if (ip == NULL) {
8881 				goto bad;
8882 			}
8883 		}
8884 		ip->ip_len = htons(ip->ip_len);
8885 		ip->ip_off = htons(ip->ip_off);
8886 		ip->ip_sum = 0;
8887 		if (hlen == sizeof(struct ip)) {
8888 			ip->ip_sum = in_cksum_hdr(ip);
8889 		} else {
8890 			ip->ip_sum = in_cksum(*mp, hlen);
8891 		}
8892 		break;
8893 
8894 	case ETHERTYPE_IPV6:
8895 		if (ifp != NULL) {
8896 			error = pf_af_hook(ifp, 0, mp, AF_INET6, input, NULL);
8897 		}
8898 
8899 		if (*mp == NULL || error != 0) { /* filter may consume */
8900 			break;
8901 		}
8902 		break;
8903 	default:
8904 		error = 0;
8905 		break;
8906 	}
8907 
8908 	if (*mp == NULL) {
8909 		return error;
8910 	}
8911 	if (error != 0) {
8912 		goto bad;
8913 	}
8914 
8915 	error = -1;
8916 
8917 	/*
8918 	 * Finally, put everything back the way it was and return
8919 	 */
8920 	if (snap) {
8921 		M_PREPEND(*mp, sizeof(struct llc), M_DONTWAIT, 0);
8922 		if (*mp == NULL) {
8923 			return error;
8924 		}
8925 		bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
8926 	}
8927 
8928 	M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT, 0);
8929 	if (*mp == NULL) {
8930 		return error;
8931 	}
8932 	bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
8933 
8934 	return 0;
8935 
8936 bad:
8937 	m_freem(*mp);
8938 	*mp = NULL;
8939 	return error;
8940 }
8941 
8942 /*
8943  * Copyright (C) 2014, Stefano Garzarella - Universita` di Pisa.
8944  * All rights reserved.
8945  *
8946  * Redistribution and use in source and binary forms, with or without
8947  * modification, are permitted provided that the following conditions
8948  * are met:
8949  *   1. Redistributions of source code must retain the above copyright
8950  *      notice, this list of conditions and the following disclaimer.
8951  *   2. Redistributions in binary form must reproduce the above copyright
8952  *      notice, this list of conditions and the following disclaimer in the
8953  *      documentation and/or other materials provided with the distribution.
8954  *
8955  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
8956  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
8957  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
8958  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
8959  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
8960  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
8961  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
8962  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
8963  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
8964  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
8965  * SUCH DAMAGE.
8966  */
8967 
8968 /*
8969  * XXX-ste: Maybe this function must be moved into kern/uipc_mbuf.c
8970  *
8971  * Create a queue of packets/segments which fit the given mss + hdr_len.
8972  * m0 points to mbuf chain to be segmented.
8973  * This function splits the payload (m0-> m_pkthdr.len - hdr_len)
8974  * into segments of length MSS bytes and then copy the first hdr_len bytes
8975  * from m0 at the top of each segment.
8976  * If hdr2_buf is not NULL (hdr2_len is the buf length), it is copied
8977  * in each segment after the first hdr_len bytes
8978  *
8979  * Return the new queue with the segments on success, NULL on failure.
8980  * (the mbuf queue is freed in this case).
8981  * nsegs contains the number of segments generated.
8982  */
8983 
8984 static struct mbuf *
8985 m_seg(struct mbuf *m0, int hdr_len, int mss, int *nsegs,
8986     char * hdr2_buf, int hdr2_len)
8987 {
8988 	int off = 0, n, firstlen;
8989 	struct mbuf **mnext, *mseg;
8990 	int total_len = m0->m_pkthdr.len;
8991 
8992 	/*
8993 	 * Segmentation useless
8994 	 */
8995 	if (total_len <= hdr_len + mss) {
8996 		return m0;
8997 	}
8998 
8999 	if (hdr2_buf == NULL || hdr2_len <= 0) {
9000 		hdr2_buf = NULL;
9001 		hdr2_len = 0;
9002 	}
9003 
9004 	off = hdr_len + mss;
9005 	firstlen = mss; /* first segment stored in the original mbuf */
9006 
9007 	mnext = &(m0->m_nextpkt); /* pointer to next packet */
9008 
9009 	for (n = 1; off < total_len; off += mss, n++) {
9010 		struct mbuf *m;
9011 		/*
9012 		 * Copy the header from the original packet
9013 		 * and create a new mbuf chain
9014 		 */
9015 		if (MHLEN < hdr_len) {
9016 			m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
9017 		} else {
9018 			m = m_gethdr(M_NOWAIT, MT_DATA);
9019 		}
9020 
9021 		if (m == NULL) {
9022 #ifdef GSO_DEBUG
9023 			D("MGETHDR error\n");
9024 #endif
9025 			goto err;
9026 		}
9027 
9028 		m_copydata(m0, 0, hdr_len, mtod(m, caddr_t));
9029 
9030 		m->m_len = hdr_len;
9031 		/*
9032 		 * if the optional header is present, copy it
9033 		 */
9034 		if (hdr2_buf != NULL) {
9035 			m_copyback(m, hdr_len, hdr2_len, hdr2_buf);
9036 		}
9037 
9038 		m->m_flags |= (m0->m_flags & M_COPYFLAGS);
9039 		if (off + mss >= total_len) {           /* last segment */
9040 			mss = total_len - off;
9041 		}
9042 		/*
9043 		 * Copy the payload from original packet
9044 		 */
9045 		mseg = m_copym(m0, off, mss, M_NOWAIT);
9046 		if (mseg == NULL) {
9047 			m_freem(m);
9048 #ifdef GSO_DEBUG
9049 			D("m_copym error\n");
9050 #endif
9051 			goto err;
9052 		}
9053 		m_cat(m, mseg);
9054 
9055 		m->m_pkthdr.len = hdr_len + hdr2_len + mss;
9056 		m->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
9057 		/*
9058 		 * Copy the checksum flags and data (in_cksum() need this)
9059 		 */
9060 		m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
9061 		m->m_pkthdr.csum_data = m0->m_pkthdr.csum_data;
9062 		m->m_pkthdr.tso_segsz = m0->m_pkthdr.tso_segsz;
9063 
9064 		*mnext = m;
9065 		mnext = &(m->m_nextpkt);
9066 	}
9067 
9068 	/*
9069 	 * Update first segment.
9070 	 * If the optional header is present, is necessary
9071 	 * to insert it into the first segment.
9072 	 */
9073 	if (hdr2_buf == NULL) {
9074 		m_adj(m0, hdr_len + firstlen - total_len);
9075 		m0->m_pkthdr.len = hdr_len + firstlen;
9076 	} else {
9077 		mseg = m_copym(m0, hdr_len, firstlen, M_NOWAIT);
9078 		if (mseg == NULL) {
9079 #ifdef GSO_DEBUG
9080 			D("m_copym error\n");
9081 #endif
9082 			goto err;
9083 		}
9084 		m_adj(m0, hdr_len - total_len);
9085 		m_copyback(m0, hdr_len, hdr2_len, hdr2_buf);
9086 		m_cat(m0, mseg);
9087 		m0->m_pkthdr.len = hdr_len + hdr2_len + firstlen;
9088 	}
9089 
9090 	if (nsegs != NULL) {
9091 		*nsegs = n;
9092 	}
9093 	return m0;
9094 err:
9095 	while (m0 != NULL) {
9096 		mseg = m0->m_nextpkt;
9097 		m0->m_nextpkt = NULL;
9098 		m_freem(m0);
9099 		m0 = mseg;
9100 	}
9101 	return NULL;
9102 }
9103 
9104 /*
9105  * Wrappers of IPv4 checksum functions
9106  */
9107 static inline void
9108 gso_ipv4_data_cksum(struct mbuf *m, struct ip *ip, int mac_hlen)
9109 {
9110 	m->m_data += mac_hlen;
9111 	m->m_len -= mac_hlen;
9112 	m->m_pkthdr.len -= mac_hlen;
9113 #if __FreeBSD_version < 1000000
9114 	ip->ip_len = ntohs(ip->ip_len); /* needed for in_delayed_cksum() */
9115 #endif
9116 
9117 	in_delayed_cksum(m);
9118 
9119 #if __FreeBSD_version < 1000000
9120 	ip->ip_len = htons(ip->ip_len);
9121 #endif
9122 	m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
9123 	m->m_len += mac_hlen;
9124 	m->m_pkthdr.len += mac_hlen;
9125 	m->m_data -= mac_hlen;
9126 }
9127 
9128 static inline void
9129 gso_ipv4_hdr_cksum(struct mbuf *m, struct ip *ip, int mac_hlen, int ip_hlen)
9130 {
9131 	m->m_data += mac_hlen;
9132 
9133 	ip->ip_sum = in_cksum(m, ip_hlen);
9134 
9135 	m->m_pkthdr.csum_flags &= ~CSUM_IP;
9136 	m->m_data -= mac_hlen;
9137 }
9138 
9139 /*
9140  * Structure that contains the state during the TCP segmentation
9141  */
9142 struct gso_ip_tcp_state {
9143 	void    (*update)
9144 	(struct gso_ip_tcp_state*, struct mbuf*);
9145 	void    (*internal)
9146 	(struct gso_ip_tcp_state*, struct mbuf*);
9147 	union iphdr hdr;
9148 	struct tcphdr *tcp;
9149 	int mac_hlen;
9150 	int ip_hlen;
9151 	int tcp_hlen;
9152 	int hlen;
9153 	int pay_len;
9154 	int sw_csum;
9155 	uint32_t tcp_seq;
9156 	uint16_t ip_id;
9157 	boolean_t is_tx;
9158 };
9159 
9160 /*
9161  * Update the pointers to TCP and IPv4 headers
9162  */
9163 static inline void
9164 gso_ipv4_tcp_update(struct gso_ip_tcp_state *state, struct mbuf *m)
9165 {
9166 	state->hdr.ip = (struct ip *)(void *)(mtod(m, uint8_t *) + state->mac_hlen);
9167 	state->tcp = (struct tcphdr *)(void *)((caddr_t)(state->hdr.ip) + state->ip_hlen);
9168 	state->pay_len = m->m_pkthdr.len - state->hlen;
9169 }
9170 
9171 /*
9172  * Set properly the TCP and IPv4 headers
9173  */
9174 static inline void
9175 gso_ipv4_tcp_internal(struct gso_ip_tcp_state *state, struct mbuf *m)
9176 {
9177 	/*
9178 	 * Update IP header
9179 	 */
9180 	state->hdr.ip->ip_id = htons((state->ip_id)++);
9181 	state->hdr.ip->ip_len = htons(m->m_pkthdr.len - state->mac_hlen);
9182 	/*
9183 	 * TCP Checksum
9184 	 */
9185 	state->tcp->th_sum = 0;
9186 	state->tcp->th_sum = in_pseudo(state->hdr.ip->ip_src.s_addr,
9187 	    state->hdr.ip->ip_dst.s_addr,
9188 	    htons(state->tcp_hlen + IPPROTO_TCP + state->pay_len));
9189 	/*
9190 	 * Checksum HW not supported (TCP)
9191 	 */
9192 	if (state->sw_csum & CSUM_DELAY_DATA) {
9193 		gso_ipv4_data_cksum(m, state->hdr.ip, state->mac_hlen);
9194 	}
9195 
9196 	state->tcp_seq += state->pay_len;
9197 	/*
9198 	 * IP Checksum
9199 	 */
9200 	state->hdr.ip->ip_sum = 0;
9201 	/*
9202 	 * Checksum HW not supported (IP)
9203 	 */
9204 	if (state->sw_csum & CSUM_IP) {
9205 		gso_ipv4_hdr_cksum(m, state->hdr.ip, state->mac_hlen, state->ip_hlen);
9206 	}
9207 }
9208 
9209 
9210 /*
9211  * Updates the pointers to TCP and IPv6 headers
9212  */
9213 static inline void
9214 gso_ipv6_tcp_update(struct gso_ip_tcp_state *state, struct mbuf *m)
9215 {
9216 	state->hdr.ip6 = (struct ip6_hdr *)(mtod(m, uint8_t *) + state->mac_hlen);
9217 	state->tcp = (struct tcphdr *)(void *)((caddr_t)(state->hdr.ip6) + state->ip_hlen);
9218 	state->pay_len = m->m_pkthdr.len - state->hlen;
9219 }
9220 
9221 /*
9222  * Sets properly the TCP and IPv6 headers
9223  */
9224 static inline void
9225 gso_ipv6_tcp_internal(struct gso_ip_tcp_state *state, struct mbuf *m)
9226 {
9227 	state->hdr.ip6->ip6_plen = htons(m->m_pkthdr.len -
9228 	    state->mac_hlen - state->ip_hlen);
9229 	/*
9230 	 * TCP Checksum
9231 	 */
9232 	state->tcp->th_sum = 0;
9233 	state->tcp->th_sum = in6_pseudo(&state->hdr.ip6->ip6_src,
9234 	    &state->hdr.ip6->ip6_dst,
9235 	    htonl(state->tcp_hlen + state->pay_len + IPPROTO_TCP));
9236 	/*
9237 	 * Checksum HW not supported (TCP)
9238 	 */
9239 	if (state->sw_csum & CSUM_DELAY_IPV6_DATA) {
9240 		(void)in6_finalize_cksum(m, state->mac_hlen, -1, -1, state->sw_csum);
9241 		m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA;
9242 	}
9243 	state->tcp_seq += state->pay_len;
9244 }
9245 
9246 /*
9247  * Init the state during the TCP segmentation
9248  */
9249 static void
9250 gso_ip_tcp_init_state(struct gso_ip_tcp_state *state, struct ifnet *ifp,
9251     bool is_ipv4, int mac_hlen, int ip_hlen,
9252     void * ip_hdr, struct tcphdr * tcp_hdr)
9253 {
9254 #pragma unused(ifp)
9255 
9256 	state->hdr.ptr = ip_hdr;
9257 	state->tcp = tcp_hdr;
9258 	if (is_ipv4) {
9259 		state->ip_id = ntohs(state->hdr.ip->ip_id);
9260 		state->update = gso_ipv4_tcp_update;
9261 		state->internal = gso_ipv4_tcp_internal;
9262 		state->sw_csum = CSUM_DELAY_DATA | CSUM_IP; /* XXX */
9263 	} else {
9264 		state->update = gso_ipv6_tcp_update;
9265 		state->internal = gso_ipv6_tcp_internal;
9266 		state->sw_csum = CSUM_DELAY_IPV6_DATA; /* XXX */
9267 	}
9268 	state->mac_hlen = mac_hlen;
9269 	state->ip_hlen = ip_hlen;
9270 	state->tcp_hlen = state->tcp->th_off << 2;
9271 	state->hlen = mac_hlen + ip_hlen + state->tcp_hlen;
9272 	state->tcp_seq = ntohl(state->tcp->th_seq);
9273 	//state->sw_csum = m->m_pkthdr.csum_flags & ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
9274 	return;
9275 }
9276 
9277 /*
9278  * GSO on TCP/IP (v4 or v6)
9279  *
9280  * If is_tx is TRUE, segmented packets are transmitted after they are
9281  * segmented.
9282  *
9283  * If is_tx is FALSE, the segmented packets are returned as a chain in *mp.
9284  */
9285 static int
9286 gso_ip_tcp(struct ifnet *ifp, struct mbuf **mp, struct gso_ip_tcp_state *state,
9287     boolean_t is_tx)
9288 {
9289 	struct mbuf *m, *m_tx;
9290 	int error = 0;
9291 	int mss = 0;
9292 	int nsegs = 0;
9293 	struct mbuf *m0 = *mp;
9294 #ifdef GSO_STATS
9295 	int total_len = m0->m_pkthdr.len;
9296 #endif /* GSO_STATS */
9297 
9298 #if 1
9299 	mss = ifp->if_mtu - state->ip_hlen - state->tcp_hlen;
9300 #else
9301 	if (m0->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) {/* TSO with GSO */
9302 		mss = ifp->if_hw_tsomax - state->ip_hlen - state->tcp_hlen;
9303 	} else {
9304 		mss = m0->m_pkthdr.tso_segsz;
9305 	}
9306 #endif
9307 
9308 	*mp = m0 = m_seg(m0, state->hlen, mss, &nsegs, 0, 0);
9309 	if (m0 == NULL) {
9310 		return ENOBUFS; /* XXX ok? */
9311 	}
9312 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
9313 	    "%s %s mss %d nsegs %d",
9314 	    ifp->if_xname,
9315 	    is_tx ? "TX" : "RX",
9316 	    mss, nsegs);
9317 	/*
9318 	 * XXX-ste: can this happen?
9319 	 */
9320 	if (m0->m_nextpkt == NULL) {
9321 #ifdef GSO_DEBUG
9322 		D("only 1 segment");
9323 #endif
9324 		if (is_tx) {
9325 			error = bridge_transmit(ifp, m0);
9326 		}
9327 		return error;
9328 	}
9329 #ifdef GSO_STATS
9330 	GSOSTAT_SET_MAX(tcp.gsos_max_mss, mss);
9331 	GSOSTAT_SET_MIN(tcp.gsos_min_mss, mss);
9332 	GSOSTAT_ADD(tcp.gsos_osegments, nsegs);
9333 #endif /* GSO_STATS */
9334 
9335 	/* first pkt */
9336 	m = m0;
9337 
9338 	state->update(state, m);
9339 
9340 	do {
9341 		state->tcp->th_flags &= ~(TH_FIN | TH_PUSH);
9342 
9343 		state->internal(state, m);
9344 		m_tx = m;
9345 		m = m->m_nextpkt;
9346 		if (is_tx) {
9347 			m_tx->m_nextpkt = NULL;
9348 			if ((error = bridge_transmit(ifp, m_tx)) != 0) {
9349 				/*
9350 				 * XXX: If a segment can not be sent, discard the following
9351 				 * segments and propagate the error to the upper levels.
9352 				 * In this way the TCP retransmits all the initial packet.
9353 				 */
9354 #ifdef GSO_DEBUG
9355 				D("if_transmit error\n");
9356 #endif
9357 				goto err;
9358 			}
9359 		}
9360 		state->update(state, m);
9361 
9362 		state->tcp->th_flags &= ~TH_CWR;
9363 		state->tcp->th_seq = htonl(state->tcp_seq);
9364 	} while (m->m_nextpkt);
9365 
9366 	/* last pkt */
9367 	state->internal(state, m);
9368 
9369 	if (is_tx) {
9370 		error = bridge_transmit(ifp, m);
9371 #ifdef GSO_DEBUG
9372 		if (error) {
9373 			D("last if_transmit error\n");
9374 			D("error - type = %d \n", error);
9375 		}
9376 #endif
9377 	}
9378 #ifdef GSO_STATS
9379 	if (!error) {
9380 		GSOSTAT_INC(tcp.gsos_segmented);
9381 		GSOSTAT_SET_MAX(tcp.gsos_maxsegmented, total_len);
9382 		GSOSTAT_SET_MIN(tcp.gsos_minsegmented, total_len);
9383 		GSOSTAT_ADD(tcp.gsos_totalbyteseg, total_len);
9384 	}
9385 #endif /* GSO_STATS */
9386 	return error;
9387 
9388 err:
9389 #ifdef GSO_DEBUG
9390 	D("error - type = %d \n", error);
9391 #endif
9392 	while (m != NULL) {
9393 		m_tx = m->m_nextpkt;
9394 		m->m_nextpkt = NULL;
9395 		m_freem(m);
9396 		m = m_tx;
9397 	}
9398 	return error;
9399 }
9400 
9401 /*
9402  * GSO for TCP/IPv[46]
9403  */
9404 static int
9405 gso_tcp(struct ifnet *ifp, struct mbuf **mp, u_int mac_hlen, bool is_ipv4,
9406     boolean_t is_tx)
9407 {
9408 	int error;
9409 	ip_packet_info  info;
9410 	uint32_t csum_flags;
9411 	struct gso_ip_tcp_state state;
9412 	struct bripstats stats; /* XXX ignored */
9413 	struct tcphdr *tcp;
9414 
9415 	if (!is_tx && ipforwarding == 0) {
9416 		/* no need to segment if the packet will not be forwarded */
9417 		return 0;
9418 	}
9419 	error = bridge_get_tcp_header(mp, mac_hlen, is_ipv4, &info, &stats);
9420 	if (error != 0) {
9421 		if (*mp != NULL) {
9422 			m_freem(*mp);
9423 			*mp = NULL;
9424 		}
9425 		return error;
9426 	}
9427 	if (info.ip_proto_hdr == NULL) {
9428 		/* not a TCP packet */
9429 		return 0;
9430 	}
9431 	tcp = (struct tcphdr *)(void *)info.ip_proto_hdr;
9432 	gso_ip_tcp_init_state(&state, ifp, is_ipv4, mac_hlen,
9433 	    info.ip_hlen, info.ip_hdr.ptr, tcp);
9434 	if (is_ipv4) {
9435 		csum_flags = CSUM_DELAY_DATA; /* XXX */
9436 		if (!is_tx) {
9437 			/* if RX to our local IP address, don't segment */
9438 			struct in_addr  dst_ip;
9439 
9440 			bcopy(&state.hdr.ip->ip_dst, &dst_ip, sizeof(dst_ip));
9441 			if (in_addr_is_ours(dst_ip)) {
9442 				return 0;
9443 			}
9444 		}
9445 	} else {
9446 		csum_flags = CSUM_DELAY_IPV6_DATA; /* XXX */
9447 		if (!is_tx) {
9448 			/* if RX to our local IP address, don't segment */
9449 			struct in6_addr dst_ip6;
9450 
9451 			bcopy(&state.hdr.ip6->ip6_dst, &dst_ip6,
9452 			    sizeof(dst_ip6));
9453 			if (in6_embedded_scope && IN6_IS_ADDR_LINKLOCAL(&dst_ip6)) {
9454 				dst_ip6.s6_addr16[1] = htons(ifp->if_index);
9455 			}
9456 			if (in6_addr_is_ours(&dst_ip6, ifp->if_index)) {
9457 				/* local IP address, no need to segment */
9458 				return 0;
9459 			}
9460 		}
9461 	}
9462 	(*mp)->m_pkthdr.csum_flags = csum_flags;
9463 	(*mp)->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
9464 	return gso_ip_tcp(ifp, mp, &state, is_tx);
9465 }
9466