1 /*
2 * Copyright (c) 2004-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /* $NetBSD: if_bridge.c,v 1.31 2005/06/01 19:45:34 jdc Exp $ */
30 /*
31 * Copyright 2001 Wasabi Systems, Inc.
32 * All rights reserved.
33 *
34 * Written by Jason R. Thorpe for Wasabi Systems, Inc.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed for the NetBSD Project by
47 * Wasabi Systems, Inc.
48 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
49 * or promote products derived from this software without specific prior
50 * written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
54 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
55 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
56 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
57 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
58 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
59 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
60 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
61 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
62 * POSSIBILITY OF SUCH DAMAGE.
63 */
64
65 /*
66 * Copyright (c) 1999, 2000 Jason L. Wright ([email protected])
67 * All rights reserved.
68 *
69 * Redistribution and use in source and binary forms, with or without
70 * modification, are permitted provided that the following conditions
71 * are met:
72 * 1. Redistributions of source code must retain the above copyright
73 * notice, this list of conditions and the following disclaimer.
74 * 2. Redistributions in binary form must reproduce the above copyright
75 * notice, this list of conditions and the following disclaimer in the
76 * documentation and/or other materials provided with the distribution.
77 *
78 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
79 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
80 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
81 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
82 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
83 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
84 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
86 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
87 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
88 * POSSIBILITY OF SUCH DAMAGE.
89 *
90 * OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp
91 */
92
93 /*
94 * Network interface bridge support.
95 *
96 * TODO:
97 *
98 * - Currently only supports Ethernet-like interfaces (Ethernet,
99 * 802.11, VLANs on Ethernet, etc.) Figure out a nice way
100 * to bridge other types of interfaces (FDDI-FDDI, and maybe
101 * consider heterogenous bridges).
102 *
103 * - GIF isn't handled due to the lack of IPPROTO_ETHERIP support.
104 */
105
106 #include <sys/cdefs.h>
107
108 #include <sys/param.h>
109 #include <sys/mbuf.h>
110 #include <sys/malloc.h>
111 #include <sys/protosw.h>
112 #include <sys/systm.h>
113 #include <sys/time.h>
114 #include <sys/socket.h> /* for net/if.h */
115 #include <sys/sockio.h>
116 #include <sys/kernel.h>
117 #include <sys/random.h>
118 #include <sys/syslog.h>
119 #include <sys/sysctl.h>
120 #include <sys/proc.h>
121 #include <sys/lock.h>
122 #include <sys/mcache.h>
123
124 #include <sys/kauth.h>
125
126 #include <kern/thread_call.h>
127
128 #include <libkern/libkern.h>
129
130 #include <kern/zalloc.h>
131
132 #if NBPFILTER > 0
133 #include <net/bpf.h>
134 #endif
135 #include <net/if.h>
136 #include <net/if_dl.h>
137 #include <net/if_types.h>
138 #include <net/if_var.h>
139 #include <net/if_media.h>
140 #include <net/net_api_stats.h>
141 #include <net/pfvar.h>
142
143 #include <netinet/in.h> /* for struct arpcom */
144 #include <netinet/tcp.h> /* for struct tcphdr */
145 #include <netinet/in_systm.h>
146 #include <netinet/in_var.h>
147 #define _IP_VHL
148 #include <netinet/ip.h>
149 #include <netinet/ip_var.h>
150 #include <netinet/ip6.h>
151 #include <netinet6/ip6_var.h>
152 #ifdef DEV_CARP
153 #include <netinet/ip_carp.h>
154 #endif
155 #include <netinet/if_ether.h> /* for struct arpcom */
156 #include <net/bridgestp.h>
157 #include <net/if_bridgevar.h>
158 #include <net/if_llc.h>
159 #if NVLAN > 0
160 #include <net/if_vlan_var.h>
161 #endif /* NVLAN > 0 */
162
163 #include <net/if_ether.h>
164 #include <net/dlil.h>
165 #include <net/kpi_interfacefilter.h>
166
167 #include <net/route.h>
168 #include <dev/random/randomdev.h>
169
170 #include <netinet/bootp.h>
171 #include <netinet/dhcp.h>
172
173 #if SKYWALK
174 #include <skywalk/nexus/netif/nx_netif.h>
175 #endif /* SKYWALK */
176
177 #include <os/log.h>
178
179 /*
180 * if_bridge_debug, BR_DBGF_*
181 * - 'if_bridge_debug' is a bitmask of BR_DBGF_* flags that can be set
182 * to enable additional logs for the corresponding bridge function
183 * - "sysctl net.link.bridge.debug" controls the value of
184 * 'if_bridge_debug'
185 */
186 static uint32_t if_bridge_debug = 0;
187 #define BR_DBGF_LIFECYCLE 0x0001
188 #define BR_DBGF_INPUT 0x0002
189 #define BR_DBGF_OUTPUT 0x0004
190 #define BR_DBGF_RT_TABLE 0x0008
191 #define BR_DBGF_DELAYED_CALL 0x0010
192 #define BR_DBGF_IOCTL 0x0020
193 #define BR_DBGF_MBUF 0x0040
194 #define BR_DBGF_MCAST 0x0080
195 #define BR_DBGF_HOSTFILTER 0x0100
196 #define BR_DBGF_CHECKSUM 0x0200
197 #define BR_DBGF_MAC_NAT 0x0400
198
199 /*
200 * if_bridge_log_level
201 * - 'if_bridge_log_level' ensures that by default important logs are
202 * logged regardless of if_bridge_debug by comparing the log level
203 * in BRIDGE_LOG to if_bridge_log_level
204 * - use "sysctl net.link.bridge.log_level" controls the value of
205 * 'if_bridge_log_level'
206 * - the default value of 'if_bridge_log_level' is LOG_NOTICE; important
207 * logs must use LOG_NOTICE to ensure they appear by default
208 */
209 static int if_bridge_log_level = LOG_NOTICE;
210
211 #define BRIDGE_DBGF_ENABLED(__flag) ((if_bridge_debug & __flag) != 0)
212
213 /*
214 * BRIDGE_LOG, BRIDGE_LOG_SIMPLE
215 * - macros to generate the specified log conditionally based on
216 * the specified log level and debug flags
217 * - BRIDGE_LOG_SIMPLE does not include the function name in the log
218 */
219 #define BRIDGE_LOG(__level, __dbgf, __string, ...) \
220 do { \
221 if (__level <= if_bridge_log_level || \
222 BRIDGE_DBGF_ENABLED(__dbgf)) { \
223 os_log(OS_LOG_DEFAULT, "%s: " __string, \
224 __func__, ## __VA_ARGS__); \
225 } \
226 } while (0)
227 #define BRIDGE_LOG_SIMPLE(__level, __dbgf, __string, ...) \
228 do { \
229 if (__level <= if_bridge_log_level || \
230 BRIDGE_DBGF_ENABLED(__dbgf)) { \
231 os_log(OS_LOG_DEFAULT, __string, ## __VA_ARGS__); \
232 } \
233 } while (0)
234
235 #define _BRIDGE_LOCK(_sc) lck_mtx_lock(&(_sc)->sc_mtx)
236 #define _BRIDGE_UNLOCK(_sc) lck_mtx_unlock(&(_sc)->sc_mtx)
237 #define BRIDGE_LOCK_ASSERT_HELD(_sc) \
238 LCK_MTX_ASSERT(&(_sc)->sc_mtx, LCK_MTX_ASSERT_OWNED)
239 #define BRIDGE_LOCK_ASSERT_NOTHELD(_sc) \
240 LCK_MTX_ASSERT(&(_sc)->sc_mtx, LCK_MTX_ASSERT_NOTOWNED)
241
242 #define BRIDGE_LOCK_DEBUG 1
243 #if BRIDGE_LOCK_DEBUG
244
245 #define BR_LCKDBG_MAX 4
246
247 #define BRIDGE_LOCK(_sc) bridge_lock(_sc)
248 #define BRIDGE_UNLOCK(_sc) bridge_unlock(_sc)
249 #define BRIDGE_LOCK2REF(_sc, _err) _err = bridge_lock2ref(_sc)
250 #define BRIDGE_UNREF(_sc) bridge_unref(_sc)
251 #define BRIDGE_XLOCK(_sc) bridge_xlock(_sc)
252 #define BRIDGE_XDROP(_sc) bridge_xdrop(_sc)
253
254 #else /* !BRIDGE_LOCK_DEBUG */
255
256 #define BRIDGE_LOCK(_sc) _BRIDGE_LOCK(_sc)
257 #define BRIDGE_UNLOCK(_sc) _BRIDGE_UNLOCK(_sc)
258 #define BRIDGE_LOCK2REF(_sc, _err) do { \
259 BRIDGE_LOCK_ASSERT_HELD(_sc); \
260 if ((_sc)->sc_iflist_xcnt > 0) \
261 (_err) = EBUSY; \
262 else { \
263 (_sc)->sc_iflist_ref++; \
264 (_err) = 0; \
265 } \
266 _BRIDGE_UNLOCK(_sc); \
267 } while (0)
268 #define BRIDGE_UNREF(_sc) do { \
269 _BRIDGE_LOCK(_sc); \
270 (_sc)->sc_iflist_ref--; \
271 if (((_sc)->sc_iflist_xcnt > 0) && ((_sc)->sc_iflist_ref == 0)) { \
272 _BRIDGE_UNLOCK(_sc); \
273 wakeup(&(_sc)->sc_cv); \
274 } else \
275 _BRIDGE_UNLOCK(_sc); \
276 } while (0)
277 #define BRIDGE_XLOCK(_sc) do { \
278 BRIDGE_LOCK_ASSERT_HELD(_sc); \
279 (_sc)->sc_iflist_xcnt++; \
280 while ((_sc)->sc_iflist_ref > 0) \
281 msleep(&(_sc)->sc_cv, &(_sc)->sc_mtx, PZERO, \
282 "BRIDGE_XLOCK", NULL); \
283 } while (0)
284 #define BRIDGE_XDROP(_sc) do { \
285 BRIDGE_LOCK_ASSERT_HELD(_sc); \
286 (_sc)->sc_iflist_xcnt--; \
287 } while (0)
288
289 #endif /* BRIDGE_LOCK_DEBUG */
290
291 #if NBPFILTER > 0
292 #define BRIDGE_BPF_MTAP_INPUT(sc, m) \
293 if (sc->sc_bpf_input != NULL) \
294 bridge_bpf_input(sc->sc_ifp, m, __func__, __LINE__)
295 #else /* NBPFILTER */
296 #define BRIDGE_BPF_MTAP_INPUT(ifp, m)
297 #endif /* NBPFILTER */
298
299 /*
300 * Initial size of the route hash table. Must be a power of two.
301 */
302 #ifndef BRIDGE_RTHASH_SIZE
303 #define BRIDGE_RTHASH_SIZE 16
304 #endif
305
306 /*
307 * Maximum size of the routing hash table
308 */
309 #define BRIDGE_RTHASH_SIZE_MAX 2048
310
311 #define BRIDGE_RTHASH_MASK(sc) ((sc)->sc_rthash_size - 1)
312
313 /*
314 * Maximum number of addresses to cache.
315 */
316 #ifndef BRIDGE_RTABLE_MAX
317 #define BRIDGE_RTABLE_MAX 100
318 #endif
319
320
321 /*
322 * Timeout (in seconds) for entries learned dynamically.
323 */
324 #ifndef BRIDGE_RTABLE_TIMEOUT
325 #define BRIDGE_RTABLE_TIMEOUT (20 * 60) /* same as ARP */
326 #endif
327
328 /*
329 * Number of seconds between walks of the route list.
330 */
331 #ifndef BRIDGE_RTABLE_PRUNE_PERIOD
332 #define BRIDGE_RTABLE_PRUNE_PERIOD (5 * 60)
333 #endif
334
335 /*
336 * Number of MAC NAT entries
337 * - sized based on 16 clients (including MAC NAT interface)
338 * each with 4 addresses
339 */
340 #ifndef BRIDGE_MAC_NAT_ENTRY_MAX
341 #define BRIDGE_MAC_NAT_ENTRY_MAX 64
342 #endif /* BRIDGE_MAC_NAT_ENTRY_MAX */
343
344 /*
345 * List of capabilities to possibly mask on the member interface.
346 */
347 #define BRIDGE_IFCAPS_MASK (IFCAP_TSO | IFCAP_TXCSUM)
348 /*
349 * List of capabilities to disable on the member interface.
350 */
351 #define BRIDGE_IFCAPS_STRIP IFCAP_LRO
352
353 /*
354 * Bridge interface list entry.
355 */
356 struct bridge_iflist {
357 TAILQ_ENTRY(bridge_iflist) bif_next;
358 struct ifnet *bif_ifp; /* member if */
359 struct bstp_port bif_stp; /* STP state */
360 uint32_t bif_ifflags; /* member if flags */
361 int bif_savedcaps; /* saved capabilities */
362 uint32_t bif_addrmax; /* max # of addresses */
363 uint32_t bif_addrcnt; /* cur. # of addresses */
364 uint32_t bif_addrexceeded; /* # of address violations */
365
366 interface_filter_t bif_iff_ref;
367 struct bridge_softc *bif_sc;
368 uint32_t bif_flags;
369
370 /* host filter */
371 struct in_addr bif_hf_ipsrc;
372 uint8_t bif_hf_hwsrc[ETHER_ADDR_LEN];
373
374 struct ifbrmstats bif_stats;
375 };
376
377 static inline bool
bif_ifflags_are_set(struct bridge_iflist * bif,uint32_t flags)378 bif_ifflags_are_set(struct bridge_iflist * bif, uint32_t flags)
379 {
380 return (bif->bif_ifflags & flags) == flags;
381 }
382
383 static inline bool
bif_has_checksum_offload(struct bridge_iflist * bif)384 bif_has_checksum_offload(struct bridge_iflist * bif)
385 {
386 return bif_ifflags_are_set(bif, IFBIF_CHECKSUM_OFFLOAD);
387 }
388
389 /* fake errors to make the code clearer */
390 #define _EBADIP EJUSTRETURN
391 #define _EBADIPCHECKSUM EJUSTRETURN
392 #define _EBADIPV6 EJUSTRETURN
393 #define _EBADUDP EJUSTRETURN
394 #define _EBADTCP EJUSTRETURN
395 #define _EBADUDPCHECKSUM EJUSTRETURN
396 #define _EBADTCPCHECKSUM EJUSTRETURN
397
398 #define BIFF_PROMISC 0x01 /* promiscuous mode set */
399 #define BIFF_PROTO_ATTACHED 0x02 /* protocol attached */
400 #define BIFF_FILTER_ATTACHED 0x04 /* interface filter attached */
401 #define BIFF_MEDIA_ACTIVE 0x08 /* interface media active */
402 #define BIFF_HOST_FILTER 0x10 /* host filter enabled */
403 #define BIFF_HF_HWSRC 0x20 /* host filter source MAC is set */
404 #define BIFF_HF_IPSRC 0x40 /* host filter source IP is set */
405 #define BIFF_INPUT_BROADCAST 0x80 /* send broadcast packets in */
406 #if SKYWALK
407 #define BIFF_FLOWSWITCH_ATTACHED 0x1000 /* we attached the flowswitch */
408 #define BIFF_NETAGENT_REMOVED 0x2000 /* we removed the netagent */
409 #endif /* SKYWALK */
410
411 /*
412 * mac_nat_entry
413 * - translates between an IP address and MAC address on a specific
414 * bridge interface member
415 */
416 struct mac_nat_entry {
417 LIST_ENTRY(mac_nat_entry) mne_list; /* list linkage */
418 struct bridge_iflist *mne_bif; /* originating interface */
419 unsigned long mne_expire; /* expiration time */
420 union {
421 struct in_addr mneu_ip; /* originating IPv4 address */
422 struct in6_addr mneu_ip6; /* originating IPv6 address */
423 } mne_u;
424 uint8_t mne_mac[ETHER_ADDR_LEN];
425 uint8_t mne_flags;
426 uint8_t mne_reserved;
427 };
428 #define mne_ip mne_u.mneu_ip
429 #define mne_ip6 mne_u.mneu_ip6
430
431 #define MNE_FLAGS_IPV6 0x01 /* IPv6 address */
432
433 LIST_HEAD(mac_nat_entry_list, mac_nat_entry);
434
435 /*
436 * mac_nat_record
437 * - used by bridge_mac_nat_output() to convey the translation that needs
438 * to take place in bridge_mac_nat_translate
439 * - holds enough information so that the translation can be done later without
440 * holding the bridge lock
441 */
442 struct mac_nat_record {
443 uint16_t mnr_ether_type;
444 union {
445 uint16_t mnru_arp_offset;
446 struct {
447 uint16_t mnruip_dhcp_flags;
448 uint16_t mnruip_udp_csum;
449 uint8_t mnruip_header_len;
450 } mnru_ip;
451 struct {
452 uint16_t mnruip6_icmp6_len;
453 uint16_t mnruip6_lladdr_offset;
454 uint8_t mnruip6_icmp6_type;
455 uint8_t mnruip6_header_len;
456 } mnru_ip6;
457 } mnr_u;
458 };
459
460 #define mnr_arp_offset mnr_u.mnru_arp_offset
461
462 #define mnr_ip_header_len mnr_u.mnru_ip.mnruip_header_len
463 #define mnr_ip_dhcp_flags mnr_u.mnru_ip.mnruip_dhcp_flags
464 #define mnr_ip_udp_csum mnr_u.mnru_ip.mnruip_udp_csum
465
466 #define mnr_ip6_icmp6_len mnr_u.mnru_ip6.mnruip6_icmp6_len
467 #define mnr_ip6_icmp6_type mnr_u.mnru_ip6.mnruip6_icmp6_type
468 #define mnr_ip6_header_len mnr_u.mnru_ip6.mnruip6_header_len
469 #define mnr_ip6_lladdr_offset mnr_u.mnru_ip6.mnruip6_lladdr_offset
470
471 /*
472 * Bridge route node.
473 */
474 struct bridge_rtnode {
475 LIST_ENTRY(bridge_rtnode) brt_hash; /* hash table linkage */
476 LIST_ENTRY(bridge_rtnode) brt_list; /* list linkage */
477 struct bridge_iflist *brt_dst; /* destination if */
478 unsigned long brt_expire; /* expiration time */
479 uint8_t brt_flags; /* address flags */
480 uint8_t brt_addr[ETHER_ADDR_LEN];
481 uint16_t brt_vlan; /* vlan id */
482
483 };
484 #define brt_ifp brt_dst->bif_ifp
485
486 /*
487 * Bridge delayed function call context
488 */
489 typedef void (*bridge_delayed_func_t)(struct bridge_softc *);
490
491 struct bridge_delayed_call {
492 struct bridge_softc *bdc_sc;
493 bridge_delayed_func_t bdc_func; /* Function to call */
494 struct timespec bdc_ts; /* Time to call */
495 u_int32_t bdc_flags;
496 thread_call_t bdc_thread_call;
497 };
498
499 #define BDCF_OUTSTANDING 0x01 /* Delayed call has been scheduled */
500 #define BDCF_CANCELLING 0x02 /* May be waiting for call completion */
501
502 /*
503 * Software state for each bridge.
504 */
505 LIST_HEAD(_bridge_rtnode_list, bridge_rtnode);
506
507 struct bridge_softc {
508 struct ifnet *sc_ifp; /* make this an interface */
509 u_int32_t sc_flags;
510 LIST_ENTRY(bridge_softc) sc_list;
511 decl_lck_mtx_data(, sc_mtx);
512 struct _bridge_rtnode_list *sc_rthash; /* our forwarding table */
513 struct _bridge_rtnode_list sc_rtlist; /* list version of above */
514 uint32_t sc_rthash_key; /* key for hash */
515 uint32_t sc_rthash_size; /* size of the hash table */
516 struct bridge_delayed_call sc_aging_timer;
517 struct bridge_delayed_call sc_resize_call;
518 TAILQ_HEAD(, bridge_iflist) sc_spanlist; /* span ports list */
519 struct bstp_state sc_stp; /* STP state */
520 bpf_packet_func sc_bpf_input;
521 bpf_packet_func sc_bpf_output;
522 void *sc_cv;
523 uint32_t sc_brtmax; /* max # of addresses */
524 uint32_t sc_brtcnt; /* cur. # of addresses */
525 uint32_t sc_brttimeout; /* rt timeout in seconds */
526 uint32_t sc_iflist_ref; /* refcount for sc_iflist */
527 uint32_t sc_iflist_xcnt; /* refcount for sc_iflist */
528 TAILQ_HEAD(, bridge_iflist) sc_iflist; /* member interface list */
529 uint32_t sc_brtexceeded; /* # of cache drops */
530 uint32_t sc_filter_flags; /* ipf and flags */
531 struct ifnet *sc_ifaddr; /* member mac copied from */
532 u_char sc_defaddr[6]; /* Default MAC address */
533 char sc_if_xname[IFNAMSIZ];
534
535 struct bridge_iflist *sc_mac_nat_bif; /* single MAC NAT interface */
536 struct mac_nat_entry_list sc_mne_list; /* MAC NAT IPv4 */
537 struct mac_nat_entry_list sc_mne_list_v6;/* MAC NAT IPv6 */
538 uint32_t sc_mne_max; /* max # of entries */
539 uint32_t sc_mne_count; /* cur. # of entries */
540 uint32_t sc_mne_allocation_failures;
541 #if BRIDGE_LOCK_DEBUG
542 /*
543 * Locking and unlocking calling history
544 */
545 void *lock_lr[BR_LCKDBG_MAX];
546 int next_lock_lr;
547 void *unlock_lr[BR_LCKDBG_MAX];
548 int next_unlock_lr;
549 #endif /* BRIDGE_LOCK_DEBUG */
550 };
551
552 #define SCF_DETACHING 0x01
553 #define SCF_RESIZING 0x02
554 #define SCF_MEDIA_ACTIVE 0x04
555
556 typedef enum {
557 CHECKSUM_OPERATION_NONE = 0,
558 CHECKSUM_OPERATION_CLEAR_OFFLOAD = 1,
559 CHECKSUM_OPERATION_FINALIZE = 2,
560 CHECKSUM_OPERATION_COMPUTE = 3,
561 } ChecksumOperation;
562
563 union iphdr {
564 struct ip *ip;
565 struct ip6_hdr *ip6;
566 void * ptr;
567 };
568
569 typedef struct {
570 u_int ip_hlen; /* IP header length */
571 u_int ip_pay_len; /* length of payload (exclusive of ip_hlen) */
572 u_int ip_opt_len; /* IPv6 options headers length */
573 uint8_t ip_proto; /* IPPROTO_TCP, IPPROTO_UDP, etc. */
574 bool ip_is_fragmented;
575 union iphdr ip_hdr; /* pointer to IP header */
576 void * ip_proto_hdr; /* ptr to protocol header (TCP) */
577 } ip_packet_info, *ip_packet_info_t;
578
579 struct bridge_hostfilter_stats bridge_hostfilter_stats;
580
581 static LCK_GRP_DECLARE(bridge_lock_grp, "if_bridge");
582 #if BRIDGE_LOCK_DEBUG
583 static LCK_ATTR_DECLARE(bridge_lock_attr, 0, 0);
584 #else
585 static LCK_ATTR_DECLARE(bridge_lock_attr, LCK_ATTR_DEBUG, 0);
586 #endif
587 static LCK_MTX_DECLARE_ATTR(bridge_list_mtx, &bridge_lock_grp, &bridge_lock_attr);
588
589 static int bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
590
591 static ZONE_DECLARE(bridge_rtnode_pool, "bridge_rtnode",
592 sizeof(struct bridge_rtnode), ZC_NONE);
593 static ZONE_DECLARE(bridge_mne_pool, "bridge_mac_nat_entry",
594 sizeof(struct mac_nat_entry), ZC_NONE);
595
596 static int bridge_clone_create(struct if_clone *, uint32_t, void *);
597 static int bridge_clone_destroy(struct ifnet *);
598
599 static errno_t bridge_ioctl(struct ifnet *, u_long, void *);
600 #if HAS_IF_CAP
601 static void bridge_mutecaps(struct bridge_softc *);
602 static void bridge_set_ifcap(struct bridge_softc *, struct bridge_iflist *,
603 int);
604 #endif
605 static errno_t bridge_set_tso(struct bridge_softc *);
606 static void bridge_proto_attach_changed(struct ifnet *);
607 static int bridge_init(struct ifnet *);
608 #if HAS_BRIDGE_DUMMYNET
609 static void bridge_dummynet(struct mbuf *, struct ifnet *);
610 #endif
611 static void bridge_ifstop(struct ifnet *, int);
612 static int bridge_output(struct ifnet *, struct mbuf *);
613 static void bridge_finalize_cksum(struct ifnet *, struct mbuf *);
614 static void bridge_start(struct ifnet *);
615 static errno_t bridge_input(struct ifnet *, mbuf_t *);
616 static errno_t bridge_iff_input(void *, ifnet_t, protocol_family_t,
617 mbuf_t *, char **);
618 static errno_t bridge_iff_output(void *, ifnet_t, protocol_family_t,
619 mbuf_t *);
620 static errno_t bridge_member_output(struct bridge_softc *sc, ifnet_t ifp,
621 mbuf_t *m);
622
623 static int bridge_enqueue(ifnet_t, struct ifnet *,
624 struct ifnet *, struct mbuf *, ChecksumOperation);
625 static void bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int);
626
627 static void bridge_forward(struct bridge_softc *, struct bridge_iflist *,
628 struct mbuf *);
629
630 static void bridge_aging_timer(struct bridge_softc *sc);
631
632 static void bridge_broadcast(struct bridge_softc *, struct bridge_iflist *,
633 struct mbuf *, int);
634 static void bridge_span(struct bridge_softc *, struct mbuf *);
635
636 static int bridge_rtupdate(struct bridge_softc *, const uint8_t *,
637 uint16_t, struct bridge_iflist *, int, uint8_t);
638 static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *,
639 uint16_t);
640 static void bridge_rttrim(struct bridge_softc *);
641 static void bridge_rtage(struct bridge_softc *);
642 static void bridge_rtflush(struct bridge_softc *, int);
643 static int bridge_rtdaddr(struct bridge_softc *, const uint8_t *,
644 uint16_t);
645
646 static int bridge_rtable_init(struct bridge_softc *);
647 static void bridge_rtable_fini(struct bridge_softc *);
648
649 static void bridge_rthash_resize(struct bridge_softc *);
650
651 static int bridge_rtnode_addr_cmp(const uint8_t *, const uint8_t *);
652 static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
653 const uint8_t *, uint16_t);
654 static int bridge_rtnode_hash(struct bridge_softc *,
655 struct bridge_rtnode *);
656 static int bridge_rtnode_insert(struct bridge_softc *,
657 struct bridge_rtnode *);
658 static void bridge_rtnode_destroy(struct bridge_softc *,
659 struct bridge_rtnode *);
660 #if BRIDGESTP
661 static void bridge_rtable_expire(struct ifnet *, int);
662 static void bridge_state_change(struct ifnet *, int);
663 #endif /* BRIDGESTP */
664
665 static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
666 const char *name);
667 static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
668 struct ifnet *ifp);
669 static void bridge_delete_member(struct bridge_softc *,
670 struct bridge_iflist *, int);
671 static void bridge_delete_span(struct bridge_softc *,
672 struct bridge_iflist *);
673
674 static int bridge_ioctl_add(struct bridge_softc *, void *);
675 static int bridge_ioctl_del(struct bridge_softc *, void *);
676 static int bridge_ioctl_gifflags(struct bridge_softc *, void *);
677 static int bridge_ioctl_sifflags(struct bridge_softc *, void *);
678 static int bridge_ioctl_scache(struct bridge_softc *, void *);
679 static int bridge_ioctl_gcache(struct bridge_softc *, void *);
680 static int bridge_ioctl_gifs32(struct bridge_softc *, void *);
681 static int bridge_ioctl_gifs64(struct bridge_softc *, void *);
682 static int bridge_ioctl_rts32(struct bridge_softc *, void *);
683 static int bridge_ioctl_rts64(struct bridge_softc *, void *);
684 static int bridge_ioctl_saddr32(struct bridge_softc *, void *);
685 static int bridge_ioctl_saddr64(struct bridge_softc *, void *);
686 static int bridge_ioctl_sto(struct bridge_softc *, void *);
687 static int bridge_ioctl_gto(struct bridge_softc *, void *);
688 static int bridge_ioctl_daddr32(struct bridge_softc *, void *);
689 static int bridge_ioctl_daddr64(struct bridge_softc *, void *);
690 static int bridge_ioctl_flush(struct bridge_softc *, void *);
691 static int bridge_ioctl_gpri(struct bridge_softc *, void *);
692 static int bridge_ioctl_spri(struct bridge_softc *, void *);
693 static int bridge_ioctl_ght(struct bridge_softc *, void *);
694 static int bridge_ioctl_sht(struct bridge_softc *, void *);
695 static int bridge_ioctl_gfd(struct bridge_softc *, void *);
696 static int bridge_ioctl_sfd(struct bridge_softc *, void *);
697 static int bridge_ioctl_gma(struct bridge_softc *, void *);
698 static int bridge_ioctl_sma(struct bridge_softc *, void *);
699 static int bridge_ioctl_sifprio(struct bridge_softc *, void *);
700 static int bridge_ioctl_sifcost(struct bridge_softc *, void *);
701 static int bridge_ioctl_sifmaxaddr(struct bridge_softc *, void *);
702 static int bridge_ioctl_addspan(struct bridge_softc *, void *);
703 static int bridge_ioctl_delspan(struct bridge_softc *, void *);
704 static int bridge_ioctl_gbparam32(struct bridge_softc *, void *);
705 static int bridge_ioctl_gbparam64(struct bridge_softc *, void *);
706 static int bridge_ioctl_grte(struct bridge_softc *, void *);
707 static int bridge_ioctl_gifsstp32(struct bridge_softc *, void *);
708 static int bridge_ioctl_gifsstp64(struct bridge_softc *, void *);
709 static int bridge_ioctl_sproto(struct bridge_softc *, void *);
710 static int bridge_ioctl_stxhc(struct bridge_softc *, void *);
711 static int bridge_ioctl_purge(struct bridge_softc *sc, void *);
712 static int bridge_ioctl_gfilt(struct bridge_softc *, void *);
713 static int bridge_ioctl_sfilt(struct bridge_softc *, void *);
714 static int bridge_ioctl_ghostfilter(struct bridge_softc *, void *);
715 static int bridge_ioctl_shostfilter(struct bridge_softc *, void *);
716 static int bridge_ioctl_gmnelist32(struct bridge_softc *, void *);
717 static int bridge_ioctl_gmnelist64(struct bridge_softc *, void *);
718 static int bridge_ioctl_gifstats32(struct bridge_softc *, void *);
719 static int bridge_ioctl_gifstats64(struct bridge_softc *, void *);
720
721 static int bridge_pf(struct mbuf **, struct ifnet *, uint32_t sc_filter_flags, int input);
722 static int bridge_ip_checkbasic(struct mbuf **);
723 static int bridge_ip6_checkbasic(struct mbuf **);
724
725 static errno_t bridge_set_bpf_tap(ifnet_t, bpf_tap_mode, bpf_packet_func);
726 static errno_t bridge_bpf_input(ifnet_t, struct mbuf *, const char *, int);
727 static errno_t bridge_bpf_output(ifnet_t, struct mbuf *);
728
729 static void bridge_detach(ifnet_t);
730 static void bridge_link_event(struct ifnet *, u_int32_t);
731 static void bridge_iflinkevent(struct ifnet *);
732 static u_int32_t bridge_updatelinkstatus(struct bridge_softc *);
733 static int interface_media_active(struct ifnet *);
734 static void bridge_schedule_delayed_call(struct bridge_delayed_call *);
735 static void bridge_cancel_delayed_call(struct bridge_delayed_call *);
736 static void bridge_cleanup_delayed_call(struct bridge_delayed_call *);
737 static int bridge_host_filter(struct bridge_iflist *, mbuf_t *);
738
739 static errno_t bridge_mac_nat_enable(struct bridge_softc *,
740 struct bridge_iflist *);
741 static void bridge_mac_nat_disable(struct bridge_softc *sc);
742 static void bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long);
743 static void bridge_mac_nat_populate_entries(struct bridge_softc *sc);
744 static void bridge_mac_nat_flush_entries(struct bridge_softc *sc,
745 struct bridge_iflist *);
746 static ifnet_t bridge_mac_nat_input(struct bridge_softc *, mbuf_t *,
747 boolean_t *);
748 static boolean_t bridge_mac_nat_output(struct bridge_softc *,
749 struct bridge_iflist *, mbuf_t *, struct mac_nat_record *);
750 static void bridge_mac_nat_translate(mbuf_t *, struct mac_nat_record *,
751 const caddr_t);
752 static boolean_t is_broadcast_ip_packet(mbuf_t *);
753
754 #define m_copypacket(m, how) m_copym(m, 0, M_COPYALL, how)
755
756 static int
757 gso_tcp(struct ifnet *ifp, struct mbuf **mp, u_int mac_hlen, bool is_ipv4,
758 boolean_t is_tx);
759
760 /* The default bridge vlan is 1 (IEEE 802.1Q-2003 Table 9-2) */
761 #define VLANTAGOF(_m) 0
762
763 u_int8_t bstp_etheraddr[ETHER_ADDR_LEN] =
764 { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
765
766 static u_int8_t ethernulladdr[ETHER_ADDR_LEN] =
767 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
768
769 #if BRIDGESTP
770 static struct bstp_cb_ops bridge_ops = {
771 .bcb_state = bridge_state_change,
772 .bcb_rtage = bridge_rtable_expire
773 };
774 #endif /* BRIDGESTP */
775
776 SYSCTL_DECL(_net_link);
777 SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
778 "Bridge");
779
780 static int bridge_inherit_mac = 0; /* share MAC with first bridge member */
781 SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac,
782 CTLFLAG_RW | CTLFLAG_LOCKED,
783 &bridge_inherit_mac, 0,
784 "Inherit MAC address from the first bridge member");
785
786 SYSCTL_INT(_net_link_bridge, OID_AUTO, rtable_prune_period,
787 CTLFLAG_RW | CTLFLAG_LOCKED,
788 &bridge_rtable_prune_period, 0,
789 "Interval between pruning of routing table");
790
791 static unsigned int bridge_rtable_hash_size_max = BRIDGE_RTHASH_SIZE_MAX;
792 SYSCTL_UINT(_net_link_bridge, OID_AUTO, rtable_hash_size_max,
793 CTLFLAG_RW | CTLFLAG_LOCKED,
794 &bridge_rtable_hash_size_max, 0,
795 "Maximum size of the routing hash table");
796
797 #if BRIDGE_DELAYED_CALLBACK_DEBUG
798 static int bridge_delayed_callback_delay = 0;
799 SYSCTL_INT(_net_link_bridge, OID_AUTO, delayed_callback_delay,
800 CTLFLAG_RW | CTLFLAG_LOCKED,
801 &bridge_delayed_callback_delay, 0,
802 "Delay before calling delayed function");
803 #endif
804
805 SYSCTL_STRUCT(_net_link_bridge, OID_AUTO,
806 hostfilterstats, CTLFLAG_RD | CTLFLAG_LOCKED,
807 &bridge_hostfilter_stats, bridge_hostfilter_stats, "");
808
809 #if BRIDGESTP
810 static int log_stp = 0; /* log STP state changes */
811 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp, CTLFLAG_RW,
812 &log_stp, 0, "Log STP state changes");
813 #endif /* BRIDGESTP */
814
815 struct bridge_control {
816 int (*bc_func)(struct bridge_softc *, void *);
817 unsigned int bc_argsize;
818 unsigned int bc_flags;
819 };
820
821 #define BC_F_COPYIN 0x01 /* copy arguments in */
822 #define BC_F_COPYOUT 0x02 /* copy arguments out */
823 #define BC_F_SUSER 0x04 /* do super-user check */
824
825 static const struct bridge_control bridge_control_table32[] = {
826 { .bc_func = bridge_ioctl_add, .bc_argsize = sizeof(struct ifbreq), /* 0 */
827 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
828 { .bc_func = bridge_ioctl_del, .bc_argsize = sizeof(struct ifbreq),
829 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
830
831 { .bc_func = bridge_ioctl_gifflags, .bc_argsize = sizeof(struct ifbreq),
832 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
833 { .bc_func = bridge_ioctl_sifflags, .bc_argsize = sizeof(struct ifbreq),
834 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
835
836 { .bc_func = bridge_ioctl_scache, .bc_argsize = sizeof(struct ifbrparam),
837 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
838 { .bc_func = bridge_ioctl_gcache, .bc_argsize = sizeof(struct ifbrparam),
839 .bc_flags = BC_F_COPYOUT },
840
841 { .bc_func = bridge_ioctl_gifs32, .bc_argsize = sizeof(struct ifbifconf32),
842 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
843 { .bc_func = bridge_ioctl_rts32, .bc_argsize = sizeof(struct ifbaconf32),
844 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
845
846 { .bc_func = bridge_ioctl_saddr32, .bc_argsize = sizeof(struct ifbareq32),
847 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
848
849 { .bc_func = bridge_ioctl_sto, .bc_argsize = sizeof(struct ifbrparam),
850 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
851 { .bc_func = bridge_ioctl_gto, .bc_argsize = sizeof(struct ifbrparam), /* 10 */
852 .bc_flags = BC_F_COPYOUT },
853
854 { .bc_func = bridge_ioctl_daddr32, .bc_argsize = sizeof(struct ifbareq32),
855 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
856
857 { .bc_func = bridge_ioctl_flush, .bc_argsize = sizeof(struct ifbreq),
858 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
859
860 { .bc_func = bridge_ioctl_gpri, .bc_argsize = sizeof(struct ifbrparam),
861 .bc_flags = BC_F_COPYOUT },
862 { .bc_func = bridge_ioctl_spri, .bc_argsize = sizeof(struct ifbrparam),
863 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
864
865 { .bc_func = bridge_ioctl_ght, .bc_argsize = sizeof(struct ifbrparam),
866 .bc_flags = BC_F_COPYOUT },
867 { .bc_func = bridge_ioctl_sht, .bc_argsize = sizeof(struct ifbrparam),
868 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
869
870 { .bc_func = bridge_ioctl_gfd, .bc_argsize = sizeof(struct ifbrparam),
871 .bc_flags = BC_F_COPYOUT },
872 { .bc_func = bridge_ioctl_sfd, .bc_argsize = sizeof(struct ifbrparam),
873 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
874
875 { .bc_func = bridge_ioctl_gma, .bc_argsize = sizeof(struct ifbrparam),
876 .bc_flags = BC_F_COPYOUT },
877 { .bc_func = bridge_ioctl_sma, .bc_argsize = sizeof(struct ifbrparam), /* 20 */
878 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
879
880 { .bc_func = bridge_ioctl_sifprio, .bc_argsize = sizeof(struct ifbreq),
881 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
882
883 { .bc_func = bridge_ioctl_sifcost, .bc_argsize = sizeof(struct ifbreq),
884 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
885
886 { .bc_func = bridge_ioctl_gfilt, .bc_argsize = sizeof(struct ifbrparam),
887 .bc_flags = BC_F_COPYOUT },
888 { .bc_func = bridge_ioctl_sfilt, .bc_argsize = sizeof(struct ifbrparam),
889 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
890
891 { .bc_func = bridge_ioctl_purge, .bc_argsize = sizeof(struct ifbreq),
892 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
893
894 { .bc_func = bridge_ioctl_addspan, .bc_argsize = sizeof(struct ifbreq),
895 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
896 { .bc_func = bridge_ioctl_delspan, .bc_argsize = sizeof(struct ifbreq),
897 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
898
899 { .bc_func = bridge_ioctl_gbparam32, .bc_argsize = sizeof(struct ifbropreq32),
900 .bc_flags = BC_F_COPYOUT },
901
902 { .bc_func = bridge_ioctl_grte, .bc_argsize = sizeof(struct ifbrparam),
903 .bc_flags = BC_F_COPYOUT },
904
905 { .bc_func = bridge_ioctl_gifsstp32, .bc_argsize = sizeof(struct ifbpstpconf32), /* 30 */
906 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
907
908 { .bc_func = bridge_ioctl_sproto, .bc_argsize = sizeof(struct ifbrparam),
909 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
910
911 { .bc_func = bridge_ioctl_stxhc, .bc_argsize = sizeof(struct ifbrparam),
912 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
913
914 { .bc_func = bridge_ioctl_sifmaxaddr, .bc_argsize = sizeof(struct ifbreq),
915 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
916
917 { .bc_func = bridge_ioctl_ghostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
918 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
919 { .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
920 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
921
922 { .bc_func = bridge_ioctl_gmnelist32,
923 .bc_argsize = sizeof(struct ifbrmnelist32),
924 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
925 { .bc_func = bridge_ioctl_gifstats32,
926 .bc_argsize = sizeof(struct ifbrmreq32),
927 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
928 };
929
930 static const struct bridge_control bridge_control_table64[] = {
931 { .bc_func = bridge_ioctl_add, .bc_argsize = sizeof(struct ifbreq), /* 0 */
932 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
933 { .bc_func = bridge_ioctl_del, .bc_argsize = sizeof(struct ifbreq),
934 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
935
936 { .bc_func = bridge_ioctl_gifflags, .bc_argsize = sizeof(struct ifbreq),
937 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
938 { .bc_func = bridge_ioctl_sifflags, .bc_argsize = sizeof(struct ifbreq),
939 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
940
941 { .bc_func = bridge_ioctl_scache, .bc_argsize = sizeof(struct ifbrparam),
942 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
943 { .bc_func = bridge_ioctl_gcache, .bc_argsize = sizeof(struct ifbrparam),
944 .bc_flags = BC_F_COPYOUT },
945
946 { .bc_func = bridge_ioctl_gifs64, .bc_argsize = sizeof(struct ifbifconf64),
947 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
948 { .bc_func = bridge_ioctl_rts64, .bc_argsize = sizeof(struct ifbaconf64),
949 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
950
951 { .bc_func = bridge_ioctl_saddr64, .bc_argsize = sizeof(struct ifbareq64),
952 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
953
954 { .bc_func = bridge_ioctl_sto, .bc_argsize = sizeof(struct ifbrparam),
955 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
956 { .bc_func = bridge_ioctl_gto, .bc_argsize = sizeof(struct ifbrparam), /* 10 */
957 .bc_flags = BC_F_COPYOUT },
958
959 { .bc_func = bridge_ioctl_daddr64, .bc_argsize = sizeof(struct ifbareq64),
960 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
961
962 { .bc_func = bridge_ioctl_flush, .bc_argsize = sizeof(struct ifbreq),
963 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
964
965 { .bc_func = bridge_ioctl_gpri, .bc_argsize = sizeof(struct ifbrparam),
966 .bc_flags = BC_F_COPYOUT },
967 { .bc_func = bridge_ioctl_spri, .bc_argsize = sizeof(struct ifbrparam),
968 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
969
970 { .bc_func = bridge_ioctl_ght, .bc_argsize = sizeof(struct ifbrparam),
971 .bc_flags = BC_F_COPYOUT },
972 { .bc_func = bridge_ioctl_sht, .bc_argsize = sizeof(struct ifbrparam),
973 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
974
975 { .bc_func = bridge_ioctl_gfd, .bc_argsize = sizeof(struct ifbrparam),
976 .bc_flags = BC_F_COPYOUT },
977 { .bc_func = bridge_ioctl_sfd, .bc_argsize = sizeof(struct ifbrparam),
978 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
979
980 { .bc_func = bridge_ioctl_gma, .bc_argsize = sizeof(struct ifbrparam),
981 .bc_flags = BC_F_COPYOUT },
982 { .bc_func = bridge_ioctl_sma, .bc_argsize = sizeof(struct ifbrparam), /* 20 */
983 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
984
985 { .bc_func = bridge_ioctl_sifprio, .bc_argsize = sizeof(struct ifbreq),
986 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
987
988 { .bc_func = bridge_ioctl_sifcost, .bc_argsize = sizeof(struct ifbreq),
989 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
990
991 { .bc_func = bridge_ioctl_gfilt, .bc_argsize = sizeof(struct ifbrparam),
992 .bc_flags = BC_F_COPYOUT },
993 { .bc_func = bridge_ioctl_sfilt, .bc_argsize = sizeof(struct ifbrparam),
994 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
995
996 { .bc_func = bridge_ioctl_purge, .bc_argsize = sizeof(struct ifbreq),
997 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
998
999 { .bc_func = bridge_ioctl_addspan, .bc_argsize = sizeof(struct ifbreq),
1000 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1001 { .bc_func = bridge_ioctl_delspan, .bc_argsize = sizeof(struct ifbreq),
1002 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1003
1004 { .bc_func = bridge_ioctl_gbparam64, .bc_argsize = sizeof(struct ifbropreq64),
1005 .bc_flags = BC_F_COPYOUT },
1006
1007 { .bc_func = bridge_ioctl_grte, .bc_argsize = sizeof(struct ifbrparam),
1008 .bc_flags = BC_F_COPYOUT },
1009
1010 { .bc_func = bridge_ioctl_gifsstp64, .bc_argsize = sizeof(struct ifbpstpconf64), /* 30 */
1011 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1012
1013 { .bc_func = bridge_ioctl_sproto, .bc_argsize = sizeof(struct ifbrparam),
1014 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1015
1016 { .bc_func = bridge_ioctl_stxhc, .bc_argsize = sizeof(struct ifbrparam),
1017 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1018
1019 { .bc_func = bridge_ioctl_sifmaxaddr, .bc_argsize = sizeof(struct ifbreq),
1020 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1021
1022 { .bc_func = bridge_ioctl_ghostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1023 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1024 { .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1025 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1026
1027 { .bc_func = bridge_ioctl_gmnelist64,
1028 .bc_argsize = sizeof(struct ifbrmnelist64),
1029 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1030 { .bc_func = bridge_ioctl_gifstats64,
1031 .bc_argsize = sizeof(struct ifbrmreq64),
1032 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1033 };
1034
1035 static const unsigned int bridge_control_table_size =
1036 sizeof(bridge_control_table32) / sizeof(bridge_control_table32[0]);
1037
1038 static LIST_HEAD(, bridge_softc) bridge_list =
1039 LIST_HEAD_INITIALIZER(bridge_list);
1040
1041 #define BRIDGENAME "bridge"
1042 #define BRIDGES_MAX IF_MAXUNIT
1043 #define BRIDGE_ZONE_MAX_ELEM MIN(IFNETS_MAX, BRIDGES_MAX)
1044
1045 static struct if_clone bridge_cloner =
1046 IF_CLONE_INITIALIZER(BRIDGENAME, bridge_clone_create, bridge_clone_destroy,
1047 0, BRIDGES_MAX, BRIDGE_ZONE_MAX_ELEM, sizeof(struct bridge_softc));
1048
1049 static int if_bridge_txstart = 0;
1050 SYSCTL_INT(_net_link_bridge, OID_AUTO, txstart, CTLFLAG_RW | CTLFLAG_LOCKED,
1051 &if_bridge_txstart, 0, "Bridge interface uses TXSTART model");
1052
1053 SYSCTL_INT(_net_link_bridge, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
1054 &if_bridge_debug, 0, "Bridge debug flags");
1055
1056 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_level,
1057 CTLFLAG_RW | CTLFLAG_LOCKED,
1058 &if_bridge_log_level, 0, "Bridge log level");
1059
1060 static int if_bridge_segmentation = 1;
1061 SYSCTL_INT(_net_link_bridge, OID_AUTO, segmentation,
1062 CTLFLAG_RW | CTLFLAG_LOCKED,
1063 &if_bridge_segmentation, 0, "Bridge interface enable segmentation");
1064
1065 static void brlog_ether_header(struct ether_header *);
1066 static void brlog_mbuf_data(mbuf_t, size_t, size_t);
1067 static void brlog_mbuf_pkthdr(mbuf_t, const char *, const char *);
1068 static void brlog_mbuf(mbuf_t, const char *, const char *);
1069 static void brlog_link(struct bridge_softc * sc);
1070
1071 #if BRIDGE_LOCK_DEBUG
1072 static void bridge_lock(struct bridge_softc *);
1073 static void bridge_unlock(struct bridge_softc *);
1074 static int bridge_lock2ref(struct bridge_softc *);
1075 static void bridge_unref(struct bridge_softc *);
1076 static void bridge_xlock(struct bridge_softc *);
1077 static void bridge_xdrop(struct bridge_softc *);
1078
1079 static void
bridge_lock(struct bridge_softc * sc)1080 bridge_lock(struct bridge_softc *sc)
1081 {
1082 void *lr_saved = __builtin_return_address(0);
1083
1084 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1085
1086 _BRIDGE_LOCK(sc);
1087
1088 sc->lock_lr[sc->next_lock_lr] = lr_saved;
1089 sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1090 }
1091
1092 static void
bridge_unlock(struct bridge_softc * sc)1093 bridge_unlock(struct bridge_softc *sc)
1094 {
1095 void *lr_saved = __builtin_return_address(0);
1096
1097 BRIDGE_LOCK_ASSERT_HELD(sc);
1098
1099 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1100 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1101
1102 _BRIDGE_UNLOCK(sc);
1103 }
1104
1105 static int
bridge_lock2ref(struct bridge_softc * sc)1106 bridge_lock2ref(struct bridge_softc *sc)
1107 {
1108 int error = 0;
1109 void *lr_saved = __builtin_return_address(0);
1110
1111 BRIDGE_LOCK_ASSERT_HELD(sc);
1112
1113 if (sc->sc_iflist_xcnt > 0) {
1114 error = EBUSY;
1115 } else {
1116 sc->sc_iflist_ref++;
1117 }
1118
1119 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1120 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1121
1122 _BRIDGE_UNLOCK(sc);
1123
1124 return error;
1125 }
1126
1127 static void
bridge_unref(struct bridge_softc * sc)1128 bridge_unref(struct bridge_softc *sc)
1129 {
1130 void *lr_saved = __builtin_return_address(0);
1131
1132 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1133
1134 _BRIDGE_LOCK(sc);
1135 sc->lock_lr[sc->next_lock_lr] = lr_saved;
1136 sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1137
1138 sc->sc_iflist_ref--;
1139
1140 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1141 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1142 if ((sc->sc_iflist_xcnt > 0) && (sc->sc_iflist_ref == 0)) {
1143 _BRIDGE_UNLOCK(sc);
1144 wakeup(&sc->sc_cv);
1145 } else {
1146 _BRIDGE_UNLOCK(sc);
1147 }
1148 }
1149
1150 static void
bridge_xlock(struct bridge_softc * sc)1151 bridge_xlock(struct bridge_softc *sc)
1152 {
1153 void *lr_saved = __builtin_return_address(0);
1154
1155 BRIDGE_LOCK_ASSERT_HELD(sc);
1156
1157 sc->sc_iflist_xcnt++;
1158 while (sc->sc_iflist_ref > 0) {
1159 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1160 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1161
1162 msleep(&sc->sc_cv, &sc->sc_mtx, PZERO, "BRIDGE_XLOCK", NULL);
1163
1164 sc->lock_lr[sc->next_lock_lr] = lr_saved;
1165 sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1166 }
1167 }
1168
1169 static void
bridge_xdrop(struct bridge_softc * sc)1170 bridge_xdrop(struct bridge_softc *sc)
1171 {
1172 BRIDGE_LOCK_ASSERT_HELD(sc);
1173
1174 sc->sc_iflist_xcnt--;
1175 }
1176
1177 #endif /* BRIDGE_LOCK_DEBUG */
1178
1179 static void
brlog_mbuf_pkthdr(mbuf_t m,const char * prefix,const char * suffix)1180 brlog_mbuf_pkthdr(mbuf_t m, const char *prefix, const char *suffix)
1181 {
1182 if (m) {
1183 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1184 "%spktlen: %u rcvif: 0x%llx header: 0x%llx nextpkt: 0x%llx%s",
1185 prefix ? prefix : "", (unsigned int)mbuf_pkthdr_len(m),
1186 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_rcvif(m)),
1187 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_header(m)),
1188 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_nextpkt(m)),
1189 suffix ? suffix : "");
1190 } else {
1191 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0, "%s<NULL>%s", prefix, suffix);
1192 }
1193 }
1194
1195 static void
brlog_mbuf(mbuf_t m,const char * prefix,const char * suffix)1196 brlog_mbuf(mbuf_t m, const char *prefix, const char *suffix)
1197 {
1198 if (m) {
1199 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1200 "%s0x%llx type: %u flags: 0x%x len: %u data: 0x%llx "
1201 "maxlen: %u datastart: 0x%llx next: 0x%llx%s",
1202 prefix ? prefix : "", (uint64_t)VM_KERNEL_ADDRPERM(m),
1203 mbuf_type(m), mbuf_flags(m), (unsigned int)mbuf_len(m),
1204 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)),
1205 (unsigned int)mbuf_maxlen(m),
1206 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
1207 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_next(m)),
1208 !suffix || (mbuf_flags(m) & MBUF_PKTHDR) ? "" : suffix);
1209 if ((mbuf_flags(m) & MBUF_PKTHDR)) {
1210 brlog_mbuf_pkthdr(m, "", suffix);
1211 }
1212 } else {
1213 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0, "%s<NULL>%s", prefix, suffix);
1214 }
1215 }
1216
1217 static void
brlog_mbuf_data(mbuf_t m,size_t offset,size_t len)1218 brlog_mbuf_data(mbuf_t m, size_t offset, size_t len)
1219 {
1220 mbuf_t n;
1221 size_t i, j;
1222 size_t pktlen, mlen, maxlen;
1223 unsigned char *ptr;
1224
1225 pktlen = mbuf_pkthdr_len(m);
1226
1227 if (offset > pktlen) {
1228 return;
1229 }
1230
1231 maxlen = (pktlen - offset > len) ? len : pktlen - offset;
1232 n = m;
1233 mlen = mbuf_len(n);
1234 ptr = mbuf_data(n);
1235 for (i = 0, j = 0; i < maxlen; i++, j++) {
1236 if (j >= mlen) {
1237 n = mbuf_next(n);
1238 if (n == 0) {
1239 break;
1240 }
1241 ptr = mbuf_data(n);
1242 mlen = mbuf_len(n);
1243 j = 0;
1244 }
1245 if (i >= offset) {
1246 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1247 "%02x%s", ptr[j], i % 2 ? " " : "");
1248 }
1249 }
1250 }
1251
1252 static void
brlog_ether_header(struct ether_header * eh)1253 brlog_ether_header(struct ether_header *eh)
1254 {
1255 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1256 "%02x:%02x:%02x:%02x:%02x:%02x > "
1257 "%02x:%02x:%02x:%02x:%02x:%02x 0x%04x ",
1258 eh->ether_shost[0], eh->ether_shost[1], eh->ether_shost[2],
1259 eh->ether_shost[3], eh->ether_shost[4], eh->ether_shost[5],
1260 eh->ether_dhost[0], eh->ether_dhost[1], eh->ether_dhost[2],
1261 eh->ether_dhost[3], eh->ether_dhost[4], eh->ether_dhost[5],
1262 ntohs(eh->ether_type));
1263 }
1264
1265 static char *
ether_ntop(char * buf,size_t len,const u_char * ap)1266 ether_ntop(char *buf, size_t len, const u_char *ap)
1267 {
1268 snprintf(buf, len, "%02x:%02x:%02x:%02x:%02x:%02x",
1269 ap[0], ap[1], ap[2], ap[3], ap[4], ap[5]);
1270
1271 return buf;
1272 }
1273
1274 static void
brlog_link(struct bridge_softc * sc)1275 brlog_link(struct bridge_softc * sc)
1276 {
1277 int i;
1278 uint32_t sdl_buffer[offsetof(struct sockaddr_dl, sdl_data) +
1279 IFNAMSIZ + ETHER_ADDR_LEN];
1280 struct sockaddr_dl *sdl = (struct sockaddr_dl *)sdl_buffer;
1281 const u_char * lladdr;
1282 char lladdr_str[48];
1283
1284 memset(sdl, 0, sizeof(sdl_buffer));
1285 sdl->sdl_family = AF_LINK;
1286 sdl->sdl_nlen = strlen(sc->sc_if_xname);
1287 sdl->sdl_alen = ETHER_ADDR_LEN;
1288 sdl->sdl_len = offsetof(struct sockaddr_dl, sdl_data);
1289 memcpy(sdl->sdl_data, sc->sc_if_xname, sdl->sdl_nlen);
1290 memcpy(LLADDR(sdl), sc->sc_defaddr, ETHER_ADDR_LEN);
1291 lladdr_str[0] = '\0';
1292 for (i = 0, lladdr = CONST_LLADDR(sdl);
1293 i < sdl->sdl_alen;
1294 i++, lladdr++) {
1295 char byte_str[4];
1296
1297 snprintf(byte_str, sizeof(byte_str), "%s%x", i ? ":" : "",
1298 *lladdr);
1299 strlcat(lladdr_str, byte_str, sizeof(lladdr_str));
1300 }
1301 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1302 "%s sdl len %d index %d family %d type 0x%x nlen %d alen %d"
1303 " slen %d addr %s", sc->sc_if_xname,
1304 sdl->sdl_len, sdl->sdl_index,
1305 sdl->sdl_family, sdl->sdl_type, sdl->sdl_nlen,
1306 sdl->sdl_alen, sdl->sdl_slen, lladdr_str);
1307 }
1308
1309
1310 /*
1311 * bridgeattach:
1312 *
1313 * Pseudo-device attach routine.
1314 */
1315 __private_extern__ int
bridgeattach(int n)1316 bridgeattach(int n)
1317 {
1318 #pragma unused(n)
1319 int error;
1320
1321 LIST_INIT(&bridge_list);
1322
1323 #if BRIDGESTP
1324 bstp_sys_init();
1325 #endif /* BRIDGESTP */
1326
1327 error = if_clone_attach(&bridge_cloner);
1328 if (error != 0) {
1329 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_clone_attach failed %d", error);
1330 }
1331 return error;
1332 }
1333
1334
1335 static errno_t
bridge_ifnet_set_attrs(struct ifnet * ifp)1336 bridge_ifnet_set_attrs(struct ifnet * ifp)
1337 {
1338 errno_t error;
1339
1340 error = ifnet_set_mtu(ifp, ETHERMTU);
1341 if (error != 0) {
1342 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_mtu failed %d", error);
1343 goto done;
1344 }
1345 error = ifnet_set_addrlen(ifp, ETHER_ADDR_LEN);
1346 if (error != 0) {
1347 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_addrlen failed %d", error);
1348 goto done;
1349 }
1350 error = ifnet_set_hdrlen(ifp, ETHER_HDR_LEN);
1351 if (error != 0) {
1352 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_hdrlen failed %d", error);
1353 goto done;
1354 }
1355 error = ifnet_set_flags(ifp,
1356 IFF_BROADCAST | IFF_SIMPLEX | IFF_NOTRAILERS | IFF_MULTICAST,
1357 0xffff);
1358
1359 if (error != 0) {
1360 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_flags failed %d", error);
1361 goto done;
1362 }
1363 done:
1364 return error;
1365 }
1366
1367 /*
1368 * bridge_clone_create:
1369 *
1370 * Create a new bridge instance.
1371 */
1372 static int
bridge_clone_create(struct if_clone * ifc,uint32_t unit,void * params)1373 bridge_clone_create(struct if_clone *ifc, uint32_t unit, void *params)
1374 {
1375 #pragma unused(params)
1376 struct ifnet *ifp = NULL;
1377 struct bridge_softc *sc = NULL;
1378 struct bridge_softc *sc2 = NULL;
1379 struct ifnet_init_eparams init_params;
1380 errno_t error = 0;
1381 uint8_t eth_hostid[ETHER_ADDR_LEN];
1382 int fb, retry, has_hostid;
1383
1384 sc = if_clone_softc_allocate(&bridge_cloner);
1385 if (sc == NULL) {
1386 error = ENOMEM;
1387 goto done;
1388 }
1389
1390 lck_mtx_init(&sc->sc_mtx, &bridge_lock_grp, &bridge_lock_attr);
1391 sc->sc_brtmax = BRIDGE_RTABLE_MAX;
1392 sc->sc_mne_max = BRIDGE_MAC_NAT_ENTRY_MAX;
1393 sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
1394 sc->sc_filter_flags = 0;
1395
1396 TAILQ_INIT(&sc->sc_iflist);
1397
1398 /* use the interface name as the unique id for ifp recycle */
1399 snprintf(sc->sc_if_xname, sizeof(sc->sc_if_xname), "%s%d",
1400 ifc->ifc_name, unit);
1401 bzero(&init_params, sizeof(init_params));
1402 init_params.ver = IFNET_INIT_CURRENT_VERSION;
1403 init_params.len = sizeof(init_params);
1404 /* Initialize our routing table. */
1405 error = bridge_rtable_init(sc);
1406 if (error != 0) {
1407 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_rtable_init failed %d", error);
1408 goto done;
1409 }
1410 TAILQ_INIT(&sc->sc_spanlist);
1411 if (if_bridge_txstart) {
1412 init_params.start = bridge_start;
1413 } else {
1414 init_params.flags = IFNET_INIT_LEGACY;
1415 init_params.output = bridge_output;
1416 }
1417 init_params.set_bpf_tap = bridge_set_bpf_tap;
1418 init_params.uniqueid = sc->sc_if_xname;
1419 init_params.uniqueid_len = strlen(sc->sc_if_xname);
1420 init_params.sndq_maxlen = IFQ_MAXLEN;
1421 init_params.name = ifc->ifc_name;
1422 init_params.unit = unit;
1423 init_params.family = IFNET_FAMILY_ETHERNET;
1424 init_params.type = IFT_BRIDGE;
1425 init_params.demux = ether_demux;
1426 init_params.add_proto = ether_add_proto;
1427 init_params.del_proto = ether_del_proto;
1428 init_params.check_multi = ether_check_multi;
1429 init_params.framer_extended = ether_frameout_extended;
1430 init_params.softc = sc;
1431 init_params.ioctl = bridge_ioctl;
1432 init_params.detach = bridge_detach;
1433 init_params.broadcast_addr = etherbroadcastaddr;
1434 init_params.broadcast_len = ETHER_ADDR_LEN;
1435
1436 error = ifnet_allocate_extended(&init_params, &ifp);
1437 if (error != 0) {
1438 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_allocate failed %d", error);
1439 goto done;
1440 }
1441 LIST_INIT(&sc->sc_mne_list);
1442 LIST_INIT(&sc->sc_mne_list_v6);
1443 sc->sc_ifp = ifp;
1444 error = bridge_ifnet_set_attrs(ifp);
1445 if (error != 0) {
1446 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_ifnet_set_attrs failed %d",
1447 error);
1448 goto done;
1449 }
1450 /*
1451 * Generate an ethernet address with a locally administered address.
1452 *
1453 * Since we are using random ethernet addresses for the bridge, it is
1454 * possible that we might have address collisions, so make sure that
1455 * this hardware address isn't already in use on another bridge.
1456 * The first try uses the "hostid" and falls back to read_frandom();
1457 * for "hostid", we use the MAC address of the first-encountered
1458 * Ethernet-type interface that is currently configured.
1459 */
1460 fb = 0;
1461 has_hostid = (uuid_get_ethernet(ð_hostid[0]) == 0);
1462 for (retry = 1; retry != 0;) {
1463 if (fb || has_hostid == 0) {
1464 read_frandom(&sc->sc_defaddr, ETHER_ADDR_LEN);
1465 sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
1466 sc->sc_defaddr[0] |= 2; /* set the LAA bit */
1467 } else {
1468 bcopy(ð_hostid[0], &sc->sc_defaddr,
1469 ETHER_ADDR_LEN);
1470 sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
1471 sc->sc_defaddr[0] |= 2; /* set the LAA bit */
1472 sc->sc_defaddr[3] = /* stir it up a bit */
1473 ((sc->sc_defaddr[3] & 0x0f) << 4) |
1474 ((sc->sc_defaddr[3] & 0xf0) >> 4);
1475 /*
1476 * Mix in the LSB as it's actually pretty significant,
1477 * see rdar://14076061
1478 */
1479 sc->sc_defaddr[4] =
1480 (((sc->sc_defaddr[4] & 0x0f) << 4) |
1481 ((sc->sc_defaddr[4] & 0xf0) >> 4)) ^
1482 sc->sc_defaddr[5];
1483 sc->sc_defaddr[5] = ifp->if_unit & 0xff;
1484 }
1485
1486 fb = 1;
1487 retry = 0;
1488 lck_mtx_lock(&bridge_list_mtx);
1489 LIST_FOREACH(sc2, &bridge_list, sc_list) {
1490 if (memcmp(sc->sc_defaddr,
1491 IF_LLADDR(sc2->sc_ifp), ETHER_ADDR_LEN) == 0) {
1492 retry = 1;
1493 }
1494 }
1495 lck_mtx_unlock(&bridge_list_mtx);
1496 }
1497
1498 sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
1499
1500 if (BRIDGE_DBGF_ENABLED(BR_DBGF_LIFECYCLE)) {
1501 brlog_link(sc);
1502 }
1503 error = ifnet_attach(ifp, NULL);
1504 if (error != 0) {
1505 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_attach failed %d", error);
1506 goto done;
1507 }
1508
1509 error = ifnet_set_lladdr_and_type(ifp, sc->sc_defaddr, ETHER_ADDR_LEN,
1510 IFT_ETHER);
1511 if (error != 0) {
1512 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr_and_type failed %d",
1513 error);
1514 goto done;
1515 }
1516
1517 ifnet_set_offload(ifp,
1518 IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP |
1519 IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6 | IFNET_MULTIPAGES);
1520 error = bridge_set_tso(sc);
1521 if (error != 0) {
1522 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_set_tso failed %d", error);
1523 goto done;
1524 }
1525 #if BRIDGESTP
1526 bstp_attach(&sc->sc_stp, &bridge_ops);
1527 #endif /* BRIDGESTP */
1528
1529 lck_mtx_lock(&bridge_list_mtx);
1530 LIST_INSERT_HEAD(&bridge_list, sc, sc_list);
1531 lck_mtx_unlock(&bridge_list_mtx);
1532
1533 /* attach as ethernet */
1534 error = bpf_attach(ifp, DLT_EN10MB, sizeof(struct ether_header),
1535 NULL, NULL);
1536
1537 done:
1538 if (error != 0) {
1539 BRIDGE_LOG(LOG_NOTICE, 0, "failed error %d", error);
1540 /* TBD: Clean up: sc, sc_rthash etc */
1541 }
1542
1543 return error;
1544 }
1545
1546 /*
1547 * bridge_clone_destroy:
1548 *
1549 * Destroy a bridge instance.
1550 */
1551 static int
bridge_clone_destroy(struct ifnet * ifp)1552 bridge_clone_destroy(struct ifnet *ifp)
1553 {
1554 struct bridge_softc *sc = ifp->if_softc;
1555 struct bridge_iflist *bif;
1556 errno_t error;
1557
1558 BRIDGE_LOCK(sc);
1559 if ((sc->sc_flags & SCF_DETACHING)) {
1560 BRIDGE_UNLOCK(sc);
1561 return 0;
1562 }
1563 sc->sc_flags |= SCF_DETACHING;
1564
1565 bridge_ifstop(ifp, 1);
1566
1567 bridge_cancel_delayed_call(&sc->sc_resize_call);
1568
1569 bridge_cleanup_delayed_call(&sc->sc_resize_call);
1570 bridge_cleanup_delayed_call(&sc->sc_aging_timer);
1571
1572 error = ifnet_set_flags(ifp, 0, IFF_UP);
1573 if (error != 0) {
1574 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_flags failed %d", error);
1575 }
1576
1577 while ((bif = TAILQ_FIRST(&sc->sc_iflist)) != NULL) {
1578 bridge_delete_member(sc, bif, 0);
1579 }
1580
1581 while ((bif = TAILQ_FIRST(&sc->sc_spanlist)) != NULL) {
1582 bridge_delete_span(sc, bif);
1583 }
1584 BRIDGE_UNLOCK(sc);
1585
1586 error = ifnet_detach(ifp);
1587 if (error != 0) {
1588 panic("%s (%d): ifnet_detach(%p) failed %d",
1589 __func__, __LINE__, ifp, error);
1590 }
1591 return 0;
1592 }
1593
1594 #define DRVSPEC do { \
1595 if (ifd->ifd_cmd >= bridge_control_table_size) { \
1596 error = EINVAL; \
1597 break; \
1598 } \
1599 bc = &bridge_control_table[ifd->ifd_cmd]; \
1600 \
1601 if (cmd == SIOCGDRVSPEC && \
1602 (bc->bc_flags & BC_F_COPYOUT) == 0) { \
1603 error = EINVAL; \
1604 break; \
1605 } else if (cmd == SIOCSDRVSPEC && \
1606 (bc->bc_flags & BC_F_COPYOUT) != 0) { \
1607 error = EINVAL; \
1608 break; \
1609 } \
1610 \
1611 if (bc->bc_flags & BC_F_SUSER) { \
1612 error = kauth_authorize_generic(kauth_cred_get(), \
1613 KAUTH_GENERIC_ISSUSER); \
1614 if (error) \
1615 break; \
1616 } \
1617 \
1618 if (ifd->ifd_len != bc->bc_argsize || \
1619 ifd->ifd_len > sizeof (args)) { \
1620 error = EINVAL; \
1621 break; \
1622 } \
1623 \
1624 bzero(&args, sizeof (args)); \
1625 if (bc->bc_flags & BC_F_COPYIN) { \
1626 error = copyin(ifd->ifd_data, &args, ifd->ifd_len); \
1627 if (error) \
1628 break; \
1629 } \
1630 \
1631 BRIDGE_LOCK(sc); \
1632 error = (*bc->bc_func)(sc, &args); \
1633 BRIDGE_UNLOCK(sc); \
1634 if (error) \
1635 break; \
1636 \
1637 if (bc->bc_flags & BC_F_COPYOUT) \
1638 error = copyout(&args, ifd->ifd_data, ifd->ifd_len); \
1639 } while (0)
1640
1641 /*
1642 * bridge_ioctl:
1643 *
1644 * Handle a control request from the operator.
1645 */
1646 static errno_t
bridge_ioctl(struct ifnet * ifp,u_long cmd,void * data)1647 bridge_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1648 {
1649 struct bridge_softc *sc = ifp->if_softc;
1650 struct ifreq *ifr = (struct ifreq *)data;
1651 struct bridge_iflist *bif;
1652 int error = 0;
1653
1654 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1655
1656 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_IOCTL,
1657 "ifp %s cmd 0x%08lx (%c%c [%lu] %c %lu)",
1658 ifp->if_xname, cmd, (cmd & IOC_IN) ? 'I' : ' ',
1659 (cmd & IOC_OUT) ? 'O' : ' ', IOCPARM_LEN(cmd),
1660 (char)IOCGROUP(cmd), cmd & 0xff);
1661
1662 switch (cmd) {
1663 case SIOCSIFADDR:
1664 case SIOCAIFADDR:
1665 ifnet_set_flags(ifp, IFF_UP, IFF_UP);
1666 break;
1667
1668 case SIOCGIFMEDIA32:
1669 case SIOCGIFMEDIA64: {
1670 struct ifmediareq *ifmr = (struct ifmediareq *)data;
1671 user_addr_t user_addr;
1672
1673 user_addr = (cmd == SIOCGIFMEDIA64) ?
1674 ((struct ifmediareq64 *)ifmr)->ifmu_ulist :
1675 CAST_USER_ADDR_T(((struct ifmediareq32 *)ifmr)->ifmu_ulist);
1676
1677 ifmr->ifm_status = IFM_AVALID;
1678 ifmr->ifm_mask = 0;
1679 ifmr->ifm_count = 1;
1680
1681 BRIDGE_LOCK(sc);
1682 if (!(sc->sc_flags & SCF_DETACHING) &&
1683 (sc->sc_flags & SCF_MEDIA_ACTIVE)) {
1684 ifmr->ifm_status |= IFM_ACTIVE;
1685 ifmr->ifm_active = ifmr->ifm_current =
1686 IFM_ETHER | IFM_AUTO;
1687 } else {
1688 ifmr->ifm_active = ifmr->ifm_current = IFM_NONE;
1689 }
1690 BRIDGE_UNLOCK(sc);
1691
1692 if (user_addr != USER_ADDR_NULL) {
1693 error = copyout(&ifmr->ifm_current, user_addr,
1694 sizeof(int));
1695 }
1696 break;
1697 }
1698
1699 case SIOCADDMULTI:
1700 case SIOCDELMULTI:
1701 break;
1702
1703 case SIOCSDRVSPEC32:
1704 case SIOCGDRVSPEC32: {
1705 union {
1706 struct ifbreq ifbreq;
1707 struct ifbifconf32 ifbifconf;
1708 struct ifbareq32 ifbareq;
1709 struct ifbaconf32 ifbaconf;
1710 struct ifbrparam ifbrparam;
1711 struct ifbropreq32 ifbropreq;
1712 } args;
1713 struct ifdrv32 *ifd = (struct ifdrv32 *)data;
1714 const struct bridge_control *bridge_control_table =
1715 bridge_control_table32, *bc;
1716
1717 DRVSPEC;
1718
1719 break;
1720 }
1721 case SIOCSDRVSPEC64:
1722 case SIOCGDRVSPEC64: {
1723 union {
1724 struct ifbreq ifbreq;
1725 struct ifbifconf64 ifbifconf;
1726 struct ifbareq64 ifbareq;
1727 struct ifbaconf64 ifbaconf;
1728 struct ifbrparam ifbrparam;
1729 struct ifbropreq64 ifbropreq;
1730 } args;
1731 struct ifdrv64 *ifd = (struct ifdrv64 *)data;
1732 const struct bridge_control *bridge_control_table =
1733 bridge_control_table64, *bc;
1734
1735 DRVSPEC;
1736
1737 break;
1738 }
1739
1740 case SIOCSIFFLAGS:
1741 if (!(ifp->if_flags & IFF_UP) &&
1742 (ifp->if_flags & IFF_RUNNING)) {
1743 /*
1744 * If interface is marked down and it is running,
1745 * then stop and disable it.
1746 */
1747 BRIDGE_LOCK(sc);
1748 bridge_ifstop(ifp, 1);
1749 BRIDGE_UNLOCK(sc);
1750 } else if ((ifp->if_flags & IFF_UP) &&
1751 !(ifp->if_flags & IFF_RUNNING)) {
1752 /*
1753 * If interface is marked up and it is stopped, then
1754 * start it.
1755 */
1756 BRIDGE_LOCK(sc);
1757 error = bridge_init(ifp);
1758 BRIDGE_UNLOCK(sc);
1759 }
1760 break;
1761
1762 case SIOCSIFLLADDR:
1763 error = ifnet_set_lladdr(ifp, ifr->ifr_addr.sa_data,
1764 ifr->ifr_addr.sa_len);
1765 if (error != 0) {
1766 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_IOCTL,
1767 "%s SIOCSIFLLADDR error %d", ifp->if_xname,
1768 error);
1769 }
1770 break;
1771
1772 case SIOCSIFMTU:
1773 if (ifr->ifr_mtu < 576) {
1774 error = EINVAL;
1775 break;
1776 }
1777 BRIDGE_LOCK(sc);
1778 if (TAILQ_EMPTY(&sc->sc_iflist)) {
1779 sc->sc_ifp->if_mtu = ifr->ifr_mtu;
1780 BRIDGE_UNLOCK(sc);
1781 break;
1782 }
1783 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1784 if (bif->bif_ifp->if_mtu != (unsigned)ifr->ifr_mtu) {
1785 BRIDGE_LOG(LOG_NOTICE, 0,
1786 "%s invalid MTU: %u(%s) != %d",
1787 sc->sc_ifp->if_xname,
1788 bif->bif_ifp->if_mtu,
1789 bif->bif_ifp->if_xname, ifr->ifr_mtu);
1790 error = EINVAL;
1791 break;
1792 }
1793 }
1794 if (!error) {
1795 sc->sc_ifp->if_mtu = ifr->ifr_mtu;
1796 }
1797 BRIDGE_UNLOCK(sc);
1798 break;
1799
1800 default:
1801 error = ether_ioctl(ifp, cmd, data);
1802 if (error != 0 && error != EOPNOTSUPP) {
1803 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_IOCTL,
1804 "ifp %s cmd 0x%08lx "
1805 "(%c%c [%lu] %c %lu) failed error: %d",
1806 ifp->if_xname, cmd,
1807 (cmd & IOC_IN) ? 'I' : ' ',
1808 (cmd & IOC_OUT) ? 'O' : ' ',
1809 IOCPARM_LEN(cmd), (char)IOCGROUP(cmd),
1810 cmd & 0xff, error);
1811 }
1812 break;
1813 }
1814 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1815
1816 return error;
1817 }
1818
1819 #if HAS_IF_CAP
1820 /*
1821 * bridge_mutecaps:
1822 *
1823 * Clear or restore unwanted capabilities on the member interface
1824 */
1825 static void
bridge_mutecaps(struct bridge_softc * sc)1826 bridge_mutecaps(struct bridge_softc *sc)
1827 {
1828 struct bridge_iflist *bif;
1829 int enabled, mask;
1830
1831 /* Initial bitmask of capabilities to test */
1832 mask = BRIDGE_IFCAPS_MASK;
1833
1834 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1835 /* Every member must support it or its disabled */
1836 mask &= bif->bif_savedcaps;
1837 }
1838
1839 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1840 enabled = bif->bif_ifp->if_capenable;
1841 enabled &= ~BRIDGE_IFCAPS_STRIP;
1842 /* strip off mask bits and enable them again if allowed */
1843 enabled &= ~BRIDGE_IFCAPS_MASK;
1844 enabled |= mask;
1845
1846 bridge_set_ifcap(sc, bif, enabled);
1847 }
1848 }
1849
1850 static void
bridge_set_ifcap(struct bridge_softc * sc,struct bridge_iflist * bif,int set)1851 bridge_set_ifcap(struct bridge_softc *sc, struct bridge_iflist *bif, int set)
1852 {
1853 struct ifnet *ifp = bif->bif_ifp;
1854 struct ifreq ifr;
1855 int error;
1856
1857 bzero(&ifr, sizeof(ifr));
1858 ifr.ifr_reqcap = set;
1859
1860 if (ifp->if_capenable != set) {
1861 IFF_LOCKGIANT(ifp);
1862 error = (*ifp->if_ioctl)(ifp, SIOCSIFCAP, (caddr_t)&ifr);
1863 IFF_UNLOCKGIANT(ifp);
1864 if (error) {
1865 BRIDGE_LOG(LOG_NOTICE, 0,
1866 "%s error setting interface capabilities on %s",
1867 sc->sc_ifp->if_xname, ifp->if_xname);
1868 }
1869 }
1870 }
1871 #endif /* HAS_IF_CAP */
1872
1873 static errno_t
bridge_set_tso(struct bridge_softc * sc)1874 bridge_set_tso(struct bridge_softc *sc)
1875 {
1876 struct bridge_iflist *bif;
1877 u_int32_t tso_v4_mtu;
1878 u_int32_t tso_v6_mtu;
1879 ifnet_offload_t offload;
1880 errno_t error = 0;
1881
1882 /* By default, support TSO */
1883 offload = sc->sc_ifp->if_hwassist | IFNET_TSO_IPV4 | IFNET_TSO_IPV6;
1884 tso_v4_mtu = IP_MAXPACKET;
1885 tso_v6_mtu = IP_MAXPACKET;
1886
1887 /* Use the lowest common denominator of the members */
1888 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1889 ifnet_t ifp = bif->bif_ifp;
1890
1891 if (ifp == NULL) {
1892 continue;
1893 }
1894
1895 if (offload & IFNET_TSO_IPV4) {
1896 if (ifp->if_hwassist & IFNET_TSO_IPV4) {
1897 if (tso_v4_mtu > ifp->if_tso_v4_mtu) {
1898 tso_v4_mtu = ifp->if_tso_v4_mtu;
1899 }
1900 } else {
1901 offload &= ~IFNET_TSO_IPV4;
1902 tso_v4_mtu = 0;
1903 }
1904 }
1905 if (offload & IFNET_TSO_IPV6) {
1906 if (ifp->if_hwassist & IFNET_TSO_IPV6) {
1907 if (tso_v6_mtu > ifp->if_tso_v6_mtu) {
1908 tso_v6_mtu = ifp->if_tso_v6_mtu;
1909 }
1910 } else {
1911 offload &= ~IFNET_TSO_IPV6;
1912 tso_v6_mtu = 0;
1913 }
1914 }
1915 }
1916
1917 if (offload != sc->sc_ifp->if_hwassist) {
1918 error = ifnet_set_offload(sc->sc_ifp, offload);
1919 if (error != 0) {
1920 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
1921 "ifnet_set_offload(%s, 0x%x) failed %d",
1922 sc->sc_ifp->if_xname, offload, error);
1923 goto done;
1924 }
1925 /*
1926 * For ifnet_set_tso_mtu() sake, the TSO MTU must be at least
1927 * as large as the interface MTU
1928 */
1929 if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV4) {
1930 if (tso_v4_mtu < sc->sc_ifp->if_mtu) {
1931 tso_v4_mtu = sc->sc_ifp->if_mtu;
1932 }
1933 error = ifnet_set_tso_mtu(sc->sc_ifp, AF_INET,
1934 tso_v4_mtu);
1935 if (error != 0) {
1936 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
1937 "ifnet_set_tso_mtu(%s, "
1938 "AF_INET, %u) failed %d",
1939 sc->sc_ifp->if_xname,
1940 tso_v4_mtu, error);
1941 goto done;
1942 }
1943 }
1944 if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV6) {
1945 if (tso_v6_mtu < sc->sc_ifp->if_mtu) {
1946 tso_v6_mtu = sc->sc_ifp->if_mtu;
1947 }
1948 error = ifnet_set_tso_mtu(sc->sc_ifp, AF_INET6,
1949 tso_v6_mtu);
1950 if (error != 0) {
1951 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
1952 "ifnet_set_tso_mtu(%s, "
1953 "AF_INET6, %u) failed %d",
1954 sc->sc_ifp->if_xname,
1955 tso_v6_mtu, error);
1956 goto done;
1957 }
1958 }
1959 }
1960 done:
1961 return error;
1962 }
1963
1964 /*
1965 * bridge_lookup_member:
1966 *
1967 * Lookup a bridge member interface.
1968 */
1969 static struct bridge_iflist *
bridge_lookup_member(struct bridge_softc * sc,const char * name)1970 bridge_lookup_member(struct bridge_softc *sc, const char *name)
1971 {
1972 struct bridge_iflist *bif;
1973 struct ifnet *ifp;
1974
1975 BRIDGE_LOCK_ASSERT_HELD(sc);
1976
1977 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1978 ifp = bif->bif_ifp;
1979 if (strcmp(ifp->if_xname, name) == 0) {
1980 return bif;
1981 }
1982 }
1983
1984 return NULL;
1985 }
1986
1987 /*
1988 * bridge_lookup_member_if:
1989 *
1990 * Lookup a bridge member interface by ifnet*.
1991 */
1992 static struct bridge_iflist *
bridge_lookup_member_if(struct bridge_softc * sc,struct ifnet * member_ifp)1993 bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
1994 {
1995 struct bridge_iflist *bif;
1996
1997 BRIDGE_LOCK_ASSERT_HELD(sc);
1998
1999 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2000 if (bif->bif_ifp == member_ifp) {
2001 return bif;
2002 }
2003 }
2004
2005 return NULL;
2006 }
2007
2008 static errno_t
bridge_iff_input(void * cookie,ifnet_t ifp,protocol_family_t protocol,mbuf_t * data,char ** frame_ptr)2009 bridge_iff_input(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2010 mbuf_t *data, char **frame_ptr)
2011 {
2012 #pragma unused(protocol)
2013 errno_t error = 0;
2014 struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2015 struct bridge_softc *sc = bif->bif_sc;
2016 int included = 0;
2017 size_t frmlen = 0;
2018 mbuf_t m = *data;
2019
2020 if ((m->m_flags & M_PROTO1)) {
2021 goto out;
2022 }
2023
2024 if (*frame_ptr >= (char *)mbuf_datastart(m) &&
2025 *frame_ptr <= (char *)mbuf_data(m)) {
2026 included = 1;
2027 frmlen = (char *)mbuf_data(m) - *frame_ptr;
2028 }
2029 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2030 "%s from %s m 0x%llx data 0x%llx frame 0x%llx %s "
2031 "frmlen %lu", sc->sc_ifp->if_xname,
2032 ifp->if_xname, (uint64_t)VM_KERNEL_ADDRPERM(m),
2033 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)),
2034 (uint64_t)VM_KERNEL_ADDRPERM(*frame_ptr),
2035 included ? "inside" : "outside", frmlen);
2036 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MBUF)) {
2037 brlog_mbuf(m, "bridge_iff_input[", "");
2038 brlog_ether_header((struct ether_header *)
2039 (void *)*frame_ptr);
2040 brlog_mbuf_data(m, 0, 20);
2041 }
2042 if (included == 0) {
2043 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT, "frame_ptr outside mbuf");
2044 goto out;
2045 }
2046
2047 /* Move data pointer to start of frame to the link layer header */
2048 (void) mbuf_setdata(m, (char *)mbuf_data(m) - frmlen,
2049 mbuf_len(m) + frmlen);
2050 (void) mbuf_pkthdr_adjustlen(m, frmlen);
2051
2052 /* make sure we can access the ethernet header */
2053 if (mbuf_pkthdr_len(m) < sizeof(struct ether_header)) {
2054 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2055 "short frame %lu < %lu",
2056 mbuf_pkthdr_len(m), sizeof(struct ether_header));
2057 goto out;
2058 }
2059 if (mbuf_len(m) < sizeof(struct ether_header)) {
2060 error = mbuf_pullup(data, sizeof(struct ether_header));
2061 if (error != 0) {
2062 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2063 "mbuf_pullup(%lu) failed %d",
2064 sizeof(struct ether_header),
2065 error);
2066 error = EJUSTRETURN;
2067 goto out;
2068 }
2069 if (m != *data) {
2070 m = *data;
2071 *frame_ptr = mbuf_data(m);
2072 }
2073 }
2074
2075 error = bridge_input(ifp, data);
2076
2077 /* Adjust packet back to original */
2078 if (error == 0) {
2079 /* bridge_input might have modified *data */
2080 if (*data != m) {
2081 m = *data;
2082 *frame_ptr = mbuf_data(m);
2083 }
2084 (void) mbuf_setdata(m, (char *)mbuf_data(m) + frmlen,
2085 mbuf_len(m) - frmlen);
2086 (void) mbuf_pkthdr_adjustlen(m, -frmlen);
2087 }
2088
2089 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MBUF) &&
2090 BRIDGE_DBGF_ENABLED(BR_DBGF_INPUT)) {
2091 brlog_mbuf(m, "bridge_iff_input]", "");
2092 }
2093
2094 out:
2095 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2096
2097 return error;
2098 }
2099
2100 static errno_t
bridge_iff_output(void * cookie,ifnet_t ifp,protocol_family_t protocol,mbuf_t * data)2101 bridge_iff_output(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2102 mbuf_t *data)
2103 {
2104 #pragma unused(protocol)
2105 errno_t error = 0;
2106 struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2107 struct bridge_softc *sc = bif->bif_sc;
2108 mbuf_t m = *data;
2109
2110 if ((m->m_flags & M_PROTO1)) {
2111 goto out;
2112 }
2113 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
2114 "%s from %s m 0x%llx data 0x%llx",
2115 sc->sc_ifp->if_xname, ifp->if_xname,
2116 (uint64_t)VM_KERNEL_ADDRPERM(m),
2117 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)));
2118
2119 error = bridge_member_output(sc, ifp, data);
2120 if (error != 0 && error != EJUSTRETURN) {
2121 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_OUTPUT,
2122 "bridge_member_output failed error %d",
2123 error);
2124 }
2125 out:
2126 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2127
2128 return error;
2129 }
2130
2131 static void
bridge_iff_event(void * cookie,ifnet_t ifp,protocol_family_t protocol,const struct kev_msg * event_msg)2132 bridge_iff_event(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2133 const struct kev_msg *event_msg)
2134 {
2135 #pragma unused(protocol)
2136 struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2137 struct bridge_softc *sc = bif->bif_sc;
2138
2139 if (event_msg->vendor_code == KEV_VENDOR_APPLE &&
2140 event_msg->kev_class == KEV_NETWORK_CLASS &&
2141 event_msg->kev_subclass == KEV_DL_SUBCLASS) {
2142 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
2143 "%s event_code %u - %s",
2144 ifp->if_xname, event_msg->event_code,
2145 dlil_kev_dl_code_str(event_msg->event_code));
2146
2147 switch (event_msg->event_code) {
2148 case KEV_DL_LINK_OFF:
2149 case KEV_DL_LINK_ON: {
2150 bridge_iflinkevent(ifp);
2151 #if BRIDGESTP
2152 bstp_linkstate(ifp, event_msg->event_code);
2153 #endif /* BRIDGESTP */
2154 break;
2155 }
2156 case KEV_DL_SIFFLAGS: {
2157 if ((bif->bif_flags & BIFF_PROMISC) == 0 &&
2158 (ifp->if_flags & IFF_UP)) {
2159 errno_t error;
2160
2161 error = ifnet_set_promiscuous(ifp, 1);
2162 if (error != 0) {
2163 BRIDGE_LOG(LOG_NOTICE, 0,
2164 "ifnet_set_promiscuous (%s)"
2165 " failed %d", ifp->if_xname,
2166 error);
2167 } else {
2168 bif->bif_flags |= BIFF_PROMISC;
2169 }
2170 }
2171 break;
2172 }
2173 case KEV_DL_IFCAP_CHANGED: {
2174 BRIDGE_LOCK(sc);
2175 bridge_set_tso(sc);
2176 BRIDGE_UNLOCK(sc);
2177 break;
2178 }
2179 case KEV_DL_PROTO_DETACHED:
2180 case KEV_DL_PROTO_ATTACHED: {
2181 bridge_proto_attach_changed(ifp);
2182 break;
2183 }
2184 default:
2185 break;
2186 }
2187 }
2188 }
2189
2190 /*
2191 * bridge_iff_detached:
2192 *
2193 * Called when our interface filter has been detached from a
2194 * member interface.
2195 */
2196 static void
bridge_iff_detached(void * cookie,ifnet_t ifp)2197 bridge_iff_detached(void *cookie, ifnet_t ifp)
2198 {
2199 struct bridge_iflist *bif;
2200 struct bridge_softc *sc = ifp->if_bridge;
2201
2202 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2203
2204 /* Check if the interface is a bridge member */
2205 if (sc != NULL) {
2206 BRIDGE_LOCK(sc);
2207 bif = bridge_lookup_member_if(sc, ifp);
2208 if (bif != NULL) {
2209 bridge_delete_member(sc, bif, 1);
2210 }
2211 BRIDGE_UNLOCK(sc);
2212 goto done;
2213 }
2214 /* Check if the interface is a span port */
2215 lck_mtx_lock(&bridge_list_mtx);
2216 LIST_FOREACH(sc, &bridge_list, sc_list) {
2217 BRIDGE_LOCK(sc);
2218 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
2219 if (ifp == bif->bif_ifp) {
2220 bridge_delete_span(sc, bif);
2221 break;
2222 }
2223 BRIDGE_UNLOCK(sc);
2224 }
2225 lck_mtx_unlock(&bridge_list_mtx);
2226
2227 done:
2228 bif = (struct bridge_iflist *)cookie;
2229 kfree_type(struct bridge_iflist, bif);
2230 }
2231
2232 static errno_t
bridge_proto_input(ifnet_t ifp,protocol_family_t protocol,mbuf_t packet,char * header)2233 bridge_proto_input(ifnet_t ifp, protocol_family_t protocol, mbuf_t packet,
2234 char *header)
2235 {
2236 #pragma unused(protocol, packet, header)
2237 BRIDGE_LOG(LOG_NOTICE, 0, "%s unexpected packet",
2238 ifp->if_xname);
2239 return 0;
2240 }
2241
2242 static int
bridge_attach_protocol(struct ifnet * ifp)2243 bridge_attach_protocol(struct ifnet *ifp)
2244 {
2245 int error;
2246 struct ifnet_attach_proto_param reg;
2247
2248 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2249 bzero(®, sizeof(reg));
2250 reg.input = bridge_proto_input;
2251
2252 error = ifnet_attach_protocol(ifp, PF_BRIDGE, ®);
2253 if (error) {
2254 BRIDGE_LOG(LOG_NOTICE, 0,
2255 "ifnet_attach_protocol(%s) failed, %d",
2256 ifp->if_xname, error);
2257 }
2258
2259 return error;
2260 }
2261
2262 static int
bridge_detach_protocol(struct ifnet * ifp)2263 bridge_detach_protocol(struct ifnet *ifp)
2264 {
2265 int error;
2266
2267 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2268 error = ifnet_detach_protocol(ifp, PF_BRIDGE);
2269 if (error) {
2270 BRIDGE_LOG(LOG_NOTICE, 0,
2271 "ifnet_detach_protocol(%s) failed, %d",
2272 ifp->if_xname, error);
2273 }
2274
2275 return error;
2276 }
2277
2278 /*
2279 * bridge_delete_member:
2280 *
2281 * Delete the specified member interface.
2282 */
2283 static void
bridge_delete_member(struct bridge_softc * sc,struct bridge_iflist * bif,int gone)2284 bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif,
2285 int gone)
2286 {
2287 struct ifnet *ifs = bif->bif_ifp, *bifp = sc->sc_ifp;
2288 int lladdr_changed = 0, error, filt_attached;
2289 uint8_t eaddr[ETHER_ADDR_LEN];
2290 u_int32_t event_code = 0;
2291
2292 BRIDGE_LOCK_ASSERT_HELD(sc);
2293 VERIFY(ifs != NULL);
2294
2295 /*
2296 * Remove the member from the list first so it cannot be found anymore
2297 * when we release the bridge lock below
2298 */
2299 BRIDGE_XLOCK(sc);
2300 TAILQ_REMOVE(&sc->sc_iflist, bif, bif_next);
2301 BRIDGE_XDROP(sc);
2302
2303 if (sc->sc_mac_nat_bif != NULL) {
2304 if (bif == sc->sc_mac_nat_bif) {
2305 bridge_mac_nat_disable(sc);
2306 } else {
2307 bridge_mac_nat_flush_entries(sc, bif);
2308 }
2309 }
2310
2311 if (!gone) {
2312 switch (ifs->if_type) {
2313 case IFT_ETHER:
2314 case IFT_L2VLAN:
2315 case IFT_IEEE8023ADLAG:
2316 /*
2317 * Take the interface out of promiscuous mode.
2318 */
2319 if (bif->bif_flags & BIFF_PROMISC) {
2320 /*
2321 * Unlock to prevent deadlock with
2322 * bridge_iff_event() in case the driver
2323 * generates an interface event
2324 */
2325 BRIDGE_UNLOCK(sc);
2326 (void) ifnet_set_promiscuous(ifs, 0);
2327 BRIDGE_LOCK(sc);
2328 }
2329 break;
2330
2331 case IFT_GIF:
2332 /* currently not supported */
2333 /* FALLTHRU */
2334 default:
2335 VERIFY(0);
2336 /* NOTREACHED */
2337 }
2338
2339 #if HAS_IF_CAP
2340 /* reneable any interface capabilities */
2341 bridge_set_ifcap(sc, bif, bif->bif_savedcaps);
2342 #endif
2343 }
2344
2345 if (bif->bif_flags & BIFF_PROTO_ATTACHED) {
2346 /* Respect lock ordering with DLIL lock */
2347 BRIDGE_UNLOCK(sc);
2348 (void) bridge_detach_protocol(ifs);
2349 BRIDGE_LOCK(sc);
2350 }
2351 #if BRIDGESTP
2352 if ((bif->bif_ifflags & IFBIF_STP) != 0) {
2353 bstp_disable(&bif->bif_stp);
2354 }
2355 #endif /* BRIDGESTP */
2356
2357 /*
2358 * If removing the interface that gave the bridge its mac address, set
2359 * the mac address of the bridge to the address of the next member, or
2360 * to its default address if no members are left.
2361 */
2362 if (bridge_inherit_mac && sc->sc_ifaddr == ifs) {
2363 ifnet_release(sc->sc_ifaddr);
2364 if (TAILQ_EMPTY(&sc->sc_iflist)) {
2365 bcopy(sc->sc_defaddr, eaddr, ETHER_ADDR_LEN);
2366 sc->sc_ifaddr = NULL;
2367 } else {
2368 struct ifnet *fif =
2369 TAILQ_FIRST(&sc->sc_iflist)->bif_ifp;
2370 bcopy(IF_LLADDR(fif), eaddr, ETHER_ADDR_LEN);
2371 sc->sc_ifaddr = fif;
2372 ifnet_reference(fif); /* for sc_ifaddr */
2373 }
2374 lladdr_changed = 1;
2375 }
2376
2377 #if HAS_IF_CAP
2378 bridge_mutecaps(sc); /* recalculate now this interface is removed */
2379 #endif /* HAS_IF_CAP */
2380
2381 error = bridge_set_tso(sc);
2382 if (error != 0) {
2383 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_set_tso failed %d", error);
2384 }
2385
2386 bridge_rtdelete(sc, ifs, IFBF_FLUSHALL);
2387
2388 KASSERT(bif->bif_addrcnt == 0,
2389 ("%s: %d bridge routes referenced", __func__, bif->bif_addrcnt));
2390
2391 filt_attached = bif->bif_flags & BIFF_FILTER_ATTACHED;
2392
2393 /*
2394 * Update link status of the bridge based on its remaining members
2395 */
2396 event_code = bridge_updatelinkstatus(sc);
2397
2398 BRIDGE_UNLOCK(sc);
2399
2400 #if SKYWALK
2401 if (!gone) {
2402 if ((bif->bif_flags & BIFF_NETAGENT_REMOVED) != 0) {
2403 ifnet_add_netagent(ifs);
2404 bif->bif_flags &= ~BIFF_NETAGENT_REMOVED;
2405 }
2406 if ((bif->bif_flags & BIFF_FLOWSWITCH_ATTACHED) != 0) {
2407 ifnet_detach_flowswitch_nexus(ifs);
2408 bif->bif_flags &= ~BIFF_FLOWSWITCH_ATTACHED;
2409 }
2410 }
2411 #endif /* SKYWALK */
2412
2413 if (lladdr_changed &&
2414 (error = ifnet_set_lladdr(bifp, eaddr, ETHER_ADDR_LEN)) != 0) {
2415 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr failed %d", error);
2416 }
2417
2418 if (event_code != 0) {
2419 bridge_link_event(bifp, event_code);
2420 }
2421
2422 #if BRIDGESTP
2423 bstp_destroy(&bif->bif_stp); /* prepare to free */
2424 #endif /* BRIDGESTP */
2425
2426 if (filt_attached) {
2427 /* only detach if the interface is still present */
2428 if (!gone) {
2429 iflt_detach(bif->bif_iff_ref);
2430 }
2431 } else {
2432 /* filter wasn't attached, need to free now */
2433 kfree_type(struct bridge_iflist, bif);
2434 }
2435
2436 ifs->if_bridge = NULL;
2437 ifnet_release(ifs);
2438
2439 BRIDGE_LOCK(sc);
2440 }
2441
2442 /*
2443 * bridge_delete_span:
2444 *
2445 * Delete the specified span interface.
2446 */
2447 static void
bridge_delete_span(struct bridge_softc * sc,struct bridge_iflist * bif)2448 bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif)
2449 {
2450 BRIDGE_LOCK_ASSERT_HELD(sc);
2451
2452 KASSERT(bif->bif_ifp->if_bridge == NULL,
2453 ("%s: not a span interface", __func__));
2454
2455 ifnet_release(bif->bif_ifp);
2456
2457 TAILQ_REMOVE(&sc->sc_spanlist, bif, bif_next);
2458 kfree_type(struct bridge_iflist, bif);
2459 }
2460
2461 static int
bridge_ioctl_add(struct bridge_softc * sc,void * arg)2462 bridge_ioctl_add(struct bridge_softc *sc, void *arg)
2463 {
2464 struct ifbreq *req = arg;
2465 struct bridge_iflist *bif = NULL;
2466 struct ifnet *ifs, *bifp = sc->sc_ifp;
2467 int error = 0, lladdr_changed = 0;
2468 uint8_t eaddr[ETHER_ADDR_LEN];
2469 struct iff_filter iff;
2470 u_int32_t event_code = 0;
2471 boolean_t mac_nat = FALSE;
2472
2473 ifs = ifunit(req->ifbr_ifsname);
2474 if (ifs == NULL) {
2475 return ENOENT;
2476 }
2477 if (ifs->if_ioctl == NULL) { /* must be supported */
2478 return EINVAL;
2479 }
2480
2481 if (IFNET_IS_INTCOPROC(ifs)) {
2482 return EINVAL;
2483 }
2484
2485 /* If it's in the span list, it can't be a member. */
2486 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
2487 if (ifs == bif->bif_ifp) {
2488 return EBUSY;
2489 }
2490 }
2491
2492 if (ifs->if_bridge == sc) {
2493 return EEXIST;
2494 }
2495
2496 if (ifs->if_bridge != NULL) {
2497 return EBUSY;
2498 }
2499
2500 switch (ifs->if_type) {
2501 case IFT_ETHER:
2502 if (strcmp(ifs->if_name, "en") == 0 &&
2503 ifs->if_subfamily == IFNET_SUBFAMILY_WIFI &&
2504 (ifs->if_eflags & IFEF_IPV4_ROUTER) == 0) {
2505 /* XXX is there a better way to identify Wi-Fi STA? */
2506 mac_nat = TRUE;
2507 }
2508 break;
2509 case IFT_L2VLAN:
2510 case IFT_IEEE8023ADLAG:
2511 break;
2512 case IFT_GIF:
2513 /* currently not supported */
2514 /* FALLTHRU */
2515 default:
2516 return EINVAL;
2517 }
2518
2519 /* fail to add the interface if the MTU doesn't match */
2520 if (!TAILQ_EMPTY(&sc->sc_iflist) && sc->sc_ifp->if_mtu != ifs->if_mtu) {
2521 BRIDGE_LOG(LOG_NOTICE, 0, "%s invalid MTU for %s",
2522 sc->sc_ifp->if_xname,
2523 ifs->if_xname);
2524 return EINVAL;
2525 }
2526
2527 /* there's already an interface that's doing MAC NAT */
2528 if (mac_nat && sc->sc_mac_nat_bif != NULL) {
2529 return EBUSY;
2530 }
2531 bif = kalloc_type(struct bridge_iflist, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2532 bif->bif_ifp = ifs;
2533 ifnet_reference(ifs);
2534 bif->bif_ifflags |= IFBIF_LEARNING | IFBIF_DISCOVER;
2535 #if HAS_IF_CAP
2536 bif->bif_savedcaps = ifs->if_capenable;
2537 #endif /* HAS_IF_CAP */
2538 bif->bif_sc = sc;
2539 if (mac_nat) {
2540 (void)bridge_mac_nat_enable(sc, bif);
2541 }
2542
2543 /* Allow the first Ethernet member to define the MTU */
2544 if (TAILQ_EMPTY(&sc->sc_iflist)) {
2545 sc->sc_ifp->if_mtu = ifs->if_mtu;
2546 }
2547
2548 /*
2549 * Assign the interface's MAC address to the bridge if it's the first
2550 * member and the MAC address of the bridge has not been changed from
2551 * the default (randomly) generated one.
2552 */
2553 if (bridge_inherit_mac && TAILQ_EMPTY(&sc->sc_iflist) &&
2554 !memcmp(IF_LLADDR(sc->sc_ifp), sc->sc_defaddr, ETHER_ADDR_LEN)) {
2555 bcopy(IF_LLADDR(ifs), eaddr, ETHER_ADDR_LEN);
2556 sc->sc_ifaddr = ifs;
2557 ifnet_reference(ifs); /* for sc_ifaddr */
2558 lladdr_changed = 1;
2559 }
2560
2561 ifs->if_bridge = sc;
2562 #if BRIDGESTP
2563 bstp_create(&sc->sc_stp, &bif->bif_stp, bif->bif_ifp);
2564 #endif /* BRIDGESTP */
2565
2566 /*
2567 * XXX: XLOCK HERE!?!
2568 */
2569 TAILQ_INSERT_TAIL(&sc->sc_iflist, bif, bif_next);
2570
2571 #if HAS_IF_CAP
2572 /* Set interface capabilities to the intersection set of all members */
2573 bridge_mutecaps(sc);
2574 #endif /* HAS_IF_CAP */
2575
2576 bridge_set_tso(sc);
2577
2578
2579 /*
2580 * Place the interface into promiscuous mode.
2581 */
2582 switch (ifs->if_type) {
2583 case IFT_ETHER:
2584 case IFT_L2VLAN:
2585 case IFT_IEEE8023ADLAG:
2586 error = ifnet_set_promiscuous(ifs, 1);
2587 switch (error) {
2588 case 0:
2589 bif->bif_flags |= BIFF_PROMISC;
2590 break;
2591 case ENETDOWN:
2592 case EPWROFF:
2593 BRIDGE_LOG(LOG_NOTICE, 0,
2594 "ifnet_set_promiscuous(%s) failed %d, ignoring",
2595 ifs->if_xname, error);
2596 /* Ignore error when device is not up */
2597 error = 0;
2598 break;
2599 default:
2600 BRIDGE_LOG(LOG_NOTICE, 0,
2601 "ifnet_set_promiscuous(%s) failed %d",
2602 ifs->if_xname, error);
2603 goto out;
2604 }
2605 break;
2606
2607 default:
2608 break;
2609 }
2610
2611 /*
2612 * The new member may change the link status of the bridge interface
2613 */
2614 if (interface_media_active(ifs)) {
2615 bif->bif_flags |= BIFF_MEDIA_ACTIVE;
2616 } else {
2617 bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
2618 }
2619
2620 event_code = bridge_updatelinkstatus(sc);
2621
2622 /*
2623 * Respect lock ordering with DLIL lock for the following operations
2624 */
2625 BRIDGE_UNLOCK(sc);
2626
2627 #if SKYWALK
2628 /* ensure that the flowswitch is present for native interface */
2629 if (SKYWALK_NATIVE(ifs)) {
2630 if (ifnet_attach_flowswitch_nexus(ifs)) {
2631 bif->bif_flags |= BIFF_FLOWSWITCH_ATTACHED;
2632 }
2633 }
2634 /* remove the netagent on the flowswitch (rdar://75050182) */
2635 if (ifnet_remove_netagent(ifs)) {
2636 bif->bif_flags |= BIFF_NETAGENT_REMOVED;
2637 }
2638 #endif /* SKYWALK */
2639
2640 /*
2641 * install an interface filter
2642 */
2643 memset(&iff, 0, sizeof(struct iff_filter));
2644 iff.iff_cookie = bif;
2645 iff.iff_name = "com.apple.kernel.bsd.net.if_bridge";
2646 iff.iff_input = bridge_iff_input;
2647 iff.iff_output = bridge_iff_output;
2648 iff.iff_event = bridge_iff_event;
2649 iff.iff_detached = bridge_iff_detached;
2650 error = dlil_attach_filter(ifs, &iff, &bif->bif_iff_ref,
2651 DLIL_IFF_TSO | DLIL_IFF_INTERNAL);
2652 if (error != 0) {
2653 BRIDGE_LOG(LOG_NOTICE, 0, "iflt_attach failed %d", error);
2654 BRIDGE_LOCK(sc);
2655 goto out;
2656 }
2657 BRIDGE_LOCK(sc);
2658 bif->bif_flags |= BIFF_FILTER_ATTACHED;
2659 BRIDGE_UNLOCK(sc);
2660
2661 /*
2662 * install a dummy "bridge" protocol
2663 */
2664 if ((error = bridge_attach_protocol(ifs)) != 0) {
2665 if (error != 0) {
2666 BRIDGE_LOG(LOG_NOTICE, 0,
2667 "bridge_attach_protocol failed %d", error);
2668 BRIDGE_LOCK(sc);
2669 goto out;
2670 }
2671 }
2672 BRIDGE_LOCK(sc);
2673 bif->bif_flags |= BIFF_PROTO_ATTACHED;
2674 BRIDGE_UNLOCK(sc);
2675
2676 if (lladdr_changed &&
2677 (error = ifnet_set_lladdr(bifp, eaddr, ETHER_ADDR_LEN)) != 0) {
2678 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr failed %d", error);
2679 }
2680
2681 if (event_code != 0) {
2682 bridge_link_event(bifp, event_code);
2683 }
2684
2685 BRIDGE_LOCK(sc);
2686
2687 out:
2688 if (error != 0) {
2689 if (bif != NULL) {
2690 bridge_delete_member(sc, bif, 0);
2691 }
2692 } else if (IFNET_IS_VMNET(ifs)) {
2693 INC_ATOMIC_INT64_LIM(net_api_stats.nas_vmnet_total);
2694 }
2695
2696 return error;
2697 }
2698
2699 static int
bridge_ioctl_del(struct bridge_softc * sc,void * arg)2700 bridge_ioctl_del(struct bridge_softc *sc, void *arg)
2701 {
2702 struct ifbreq *req = arg;
2703 struct bridge_iflist *bif;
2704
2705 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
2706 if (bif == NULL) {
2707 return ENOENT;
2708 }
2709
2710 bridge_delete_member(sc, bif, 0);
2711
2712 return 0;
2713 }
2714
2715 static int
bridge_ioctl_purge(struct bridge_softc * sc,void * arg)2716 bridge_ioctl_purge(struct bridge_softc *sc, void *arg)
2717 {
2718 #pragma unused(sc, arg)
2719 return 0;
2720 }
2721
2722 static int
bridge_ioctl_gifflags(struct bridge_softc * sc,void * arg)2723 bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
2724 {
2725 struct ifbreq *req = arg;
2726 struct bridge_iflist *bif;
2727
2728 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
2729 if (bif == NULL) {
2730 return ENOENT;
2731 }
2732
2733 struct bstp_port *bp;
2734
2735 bp = &bif->bif_stp;
2736 req->ifbr_state = bp->bp_state;
2737 req->ifbr_priority = bp->bp_priority;
2738 req->ifbr_path_cost = bp->bp_path_cost;
2739 req->ifbr_proto = bp->bp_protover;
2740 req->ifbr_role = bp->bp_role;
2741 req->ifbr_stpflags = bp->bp_flags;
2742 req->ifbr_ifsflags = bif->bif_ifflags;
2743
2744 /* Copy STP state options as flags */
2745 if (bp->bp_operedge) {
2746 req->ifbr_ifsflags |= IFBIF_BSTP_EDGE;
2747 }
2748 if (bp->bp_flags & BSTP_PORT_AUTOEDGE) {
2749 req->ifbr_ifsflags |= IFBIF_BSTP_AUTOEDGE;
2750 }
2751 if (bp->bp_ptp_link) {
2752 req->ifbr_ifsflags |= IFBIF_BSTP_PTP;
2753 }
2754 if (bp->bp_flags & BSTP_PORT_AUTOPTP) {
2755 req->ifbr_ifsflags |= IFBIF_BSTP_AUTOPTP;
2756 }
2757 if (bp->bp_flags & BSTP_PORT_ADMEDGE) {
2758 req->ifbr_ifsflags |= IFBIF_BSTP_ADMEDGE;
2759 }
2760 if (bp->bp_flags & BSTP_PORT_ADMCOST) {
2761 req->ifbr_ifsflags |= IFBIF_BSTP_ADMCOST;
2762 }
2763
2764 req->ifbr_portno = bif->bif_ifp->if_index & 0xfff;
2765 req->ifbr_addrcnt = bif->bif_addrcnt;
2766 req->ifbr_addrmax = bif->bif_addrmax;
2767 req->ifbr_addrexceeded = bif->bif_addrexceeded;
2768
2769 return 0;
2770 }
2771
2772 static int
bridge_ioctl_sifflags(struct bridge_softc * sc,void * arg)2773 bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
2774 {
2775 struct ifbreq *req = arg;
2776 struct bridge_iflist *bif;
2777 #if BRIDGESTP
2778 struct bstp_port *bp;
2779 int error;
2780 #endif /* BRIDGESTP */
2781
2782 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
2783 if (bif == NULL) {
2784 return ENOENT;
2785 }
2786
2787 if (req->ifbr_ifsflags & IFBIF_SPAN) {
2788 /* SPAN is readonly */
2789 return EINVAL;
2790 }
2791 #define _EXCLUSIVE_FLAGS (IFBIF_CHECKSUM_OFFLOAD | IFBIF_MAC_NAT)
2792 if ((req->ifbr_ifsflags & _EXCLUSIVE_FLAGS) == _EXCLUSIVE_FLAGS) {
2793 /* can't specify both MAC-NAT and checksum offload */
2794 return EINVAL;
2795 }
2796 if ((req->ifbr_ifsflags & IFBIF_MAC_NAT) != 0) {
2797 errno_t error;
2798
2799 error = bridge_mac_nat_enable(sc, bif);
2800 if (error != 0) {
2801 return error;
2802 }
2803 } else if (sc->sc_mac_nat_bif == bif) {
2804 bridge_mac_nat_disable(sc);
2805 }
2806
2807
2808 #if BRIDGESTP
2809 if (req->ifbr_ifsflags & IFBIF_STP) {
2810 if ((bif->bif_ifflags & IFBIF_STP) == 0) {
2811 error = bstp_enable(&bif->bif_stp);
2812 if (error) {
2813 return error;
2814 }
2815 }
2816 } else {
2817 if ((bif->bif_ifflags & IFBIF_STP) != 0) {
2818 bstp_disable(&bif->bif_stp);
2819 }
2820 }
2821
2822 /* Pass on STP flags */
2823 bp = &bif->bif_stp;
2824 bstp_set_edge(bp, req->ifbr_ifsflags & IFBIF_BSTP_EDGE ? 1 : 0);
2825 bstp_set_autoedge(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOEDGE ? 1 : 0);
2826 bstp_set_ptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_PTP ? 1 : 0);
2827 bstp_set_autoptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOPTP ? 1 : 0);
2828 #else /* !BRIDGESTP */
2829 if (req->ifbr_ifsflags & IFBIF_STP) {
2830 return EOPNOTSUPP;
2831 }
2832 #endif /* !BRIDGESTP */
2833
2834 /* Save the bits relating to the bridge */
2835 bif->bif_ifflags = req->ifbr_ifsflags & IFBIFMASK;
2836
2837
2838 return 0;
2839 }
2840
2841 static int
bridge_ioctl_scache(struct bridge_softc * sc,void * arg)2842 bridge_ioctl_scache(struct bridge_softc *sc, void *arg)
2843 {
2844 struct ifbrparam *param = arg;
2845
2846 sc->sc_brtmax = param->ifbrp_csize;
2847 bridge_rttrim(sc);
2848 return 0;
2849 }
2850
2851 static int
bridge_ioctl_gcache(struct bridge_softc * sc,void * arg)2852 bridge_ioctl_gcache(struct bridge_softc *sc, void *arg)
2853 {
2854 struct ifbrparam *param = arg;
2855
2856 param->ifbrp_csize = sc->sc_brtmax;
2857
2858 return 0;
2859 }
2860
2861 #define BRIDGE_IOCTL_GIFS do { \
2862 struct bridge_iflist *bif; \
2863 struct ifbreq breq; \
2864 char *buf, *outbuf; \
2865 unsigned int count, buflen, len; \
2866 \
2867 count = 0; \
2868 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) \
2869 count++; \
2870 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) \
2871 count++; \
2872 \
2873 buflen = sizeof (breq) * count; \
2874 if (bifc->ifbic_len == 0) { \
2875 bifc->ifbic_len = buflen; \
2876 return (0); \
2877 } \
2878 BRIDGE_UNLOCK(sc); \
2879 outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO); \
2880 BRIDGE_LOCK(sc); \
2881 \
2882 count = 0; \
2883 buf = outbuf; \
2884 len = min(bifc->ifbic_len, buflen); \
2885 bzero(&breq, sizeof (breq)); \
2886 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { \
2887 if (len < sizeof (breq)) \
2888 break; \
2889 \
2890 snprintf(breq.ifbr_ifsname, sizeof (breq.ifbr_ifsname), \
2891 "%s", bif->bif_ifp->if_xname); \
2892 /* Fill in the ifbreq structure */ \
2893 error = bridge_ioctl_gifflags(sc, &breq); \
2894 if (error) \
2895 break; \
2896 memcpy(buf, &breq, sizeof (breq)); \
2897 count++; \
2898 buf += sizeof (breq); \
2899 len -= sizeof (breq); \
2900 } \
2901 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) { \
2902 if (len < sizeof (breq)) \
2903 break; \
2904 \
2905 snprintf(breq.ifbr_ifsname, \
2906 sizeof (breq.ifbr_ifsname), \
2907 "%s", bif->bif_ifp->if_xname); \
2908 breq.ifbr_ifsflags = bif->bif_ifflags; \
2909 breq.ifbr_portno \
2910 = bif->bif_ifp->if_index & 0xfff; \
2911 memcpy(buf, &breq, sizeof (breq)); \
2912 count++; \
2913 buf += sizeof (breq); \
2914 len -= sizeof (breq); \
2915 } \
2916 \
2917 BRIDGE_UNLOCK(sc); \
2918 bifc->ifbic_len = sizeof (breq) * count; \
2919 error = copyout(outbuf, bifc->ifbic_req, bifc->ifbic_len); \
2920 BRIDGE_LOCK(sc); \
2921 kfree_data(outbuf, buflen); \
2922 } while (0)
2923
2924 static int
bridge_ioctl_gifs64(struct bridge_softc * sc,void * arg)2925 bridge_ioctl_gifs64(struct bridge_softc *sc, void *arg)
2926 {
2927 struct ifbifconf64 *bifc = arg;
2928 int error = 0;
2929
2930 BRIDGE_IOCTL_GIFS;
2931
2932 return error;
2933 }
2934
2935 static int
bridge_ioctl_gifs32(struct bridge_softc * sc,void * arg)2936 bridge_ioctl_gifs32(struct bridge_softc *sc, void *arg)
2937 {
2938 struct ifbifconf32 *bifc = arg;
2939 int error = 0;
2940
2941 BRIDGE_IOCTL_GIFS;
2942
2943 return error;
2944 }
2945
2946 #define BRIDGE_IOCTL_RTS do { \
2947 struct bridge_rtnode *brt; \
2948 char *buf; \
2949 char *outbuf = NULL; \
2950 unsigned int count, buflen, len; \
2951 unsigned long now; \
2952 \
2953 if (bac->ifbac_len == 0) \
2954 return (0); \
2955 \
2956 bzero(&bareq, sizeof (bareq)); \
2957 count = 0; \
2958 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) \
2959 count++; \
2960 buflen = sizeof (bareq) * count; \
2961 \
2962 BRIDGE_UNLOCK(sc); \
2963 outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO); \
2964 BRIDGE_LOCK(sc); \
2965 \
2966 count = 0; \
2967 buf = outbuf; \
2968 len = min(bac->ifbac_len, buflen); \
2969 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) { \
2970 if (len < sizeof (bareq)) \
2971 goto out; \
2972 snprintf(bareq.ifba_ifsname, sizeof (bareq.ifba_ifsname), \
2973 "%s", brt->brt_ifp->if_xname); \
2974 memcpy(bareq.ifba_dst, brt->brt_addr, sizeof (brt->brt_addr)); \
2975 bareq.ifba_vlan = brt->brt_vlan; \
2976 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) { \
2977 now = (unsigned long) net_uptime(); \
2978 if (now < brt->brt_expire) \
2979 bareq.ifba_expire = \
2980 brt->brt_expire - now; \
2981 } else \
2982 bareq.ifba_expire = 0; \
2983 bareq.ifba_flags = brt->brt_flags; \
2984 \
2985 memcpy(buf, &bareq, sizeof (bareq)); \
2986 count++; \
2987 buf += sizeof (bareq); \
2988 len -= sizeof (bareq); \
2989 } \
2990 out: \
2991 bac->ifbac_len = sizeof (bareq) * count; \
2992 if (outbuf != NULL) { \
2993 BRIDGE_UNLOCK(sc); \
2994 error = copyout(outbuf, bac->ifbac_req, bac->ifbac_len); \
2995 kfree_data(outbuf, buflen); \
2996 BRIDGE_LOCK(sc); \
2997 } \
2998 return (error); \
2999 } while (0)
3000
3001 static int
bridge_ioctl_rts64(struct bridge_softc * sc,void * arg)3002 bridge_ioctl_rts64(struct bridge_softc *sc, void *arg)
3003 {
3004 struct ifbaconf64 *bac = arg;
3005 struct ifbareq64 bareq;
3006 int error = 0;
3007
3008 BRIDGE_IOCTL_RTS;
3009 return error;
3010 }
3011
3012 static int
bridge_ioctl_rts32(struct bridge_softc * sc,void * arg)3013 bridge_ioctl_rts32(struct bridge_softc *sc, void *arg)
3014 {
3015 struct ifbaconf32 *bac = arg;
3016 struct ifbareq32 bareq;
3017 int error = 0;
3018
3019 BRIDGE_IOCTL_RTS;
3020 return error;
3021 }
3022
3023 static int
bridge_ioctl_saddr32(struct bridge_softc * sc,void * arg)3024 bridge_ioctl_saddr32(struct bridge_softc *sc, void *arg)
3025 {
3026 struct ifbareq32 *req = arg;
3027 struct bridge_iflist *bif;
3028 int error;
3029
3030 bif = bridge_lookup_member(sc, req->ifba_ifsname);
3031 if (bif == NULL) {
3032 return ENOENT;
3033 }
3034
3035 error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
3036 req->ifba_flags);
3037
3038 return error;
3039 }
3040
3041 static int
bridge_ioctl_saddr64(struct bridge_softc * sc,void * arg)3042 bridge_ioctl_saddr64(struct bridge_softc *sc, void *arg)
3043 {
3044 struct ifbareq64 *req = arg;
3045 struct bridge_iflist *bif;
3046 int error;
3047
3048 bif = bridge_lookup_member(sc, req->ifba_ifsname);
3049 if (bif == NULL) {
3050 return ENOENT;
3051 }
3052
3053 error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
3054 req->ifba_flags);
3055
3056 return error;
3057 }
3058
3059 static int
bridge_ioctl_sto(struct bridge_softc * sc,void * arg)3060 bridge_ioctl_sto(struct bridge_softc *sc, void *arg)
3061 {
3062 struct ifbrparam *param = arg;
3063
3064 sc->sc_brttimeout = param->ifbrp_ctime;
3065 return 0;
3066 }
3067
3068 static int
bridge_ioctl_gto(struct bridge_softc * sc,void * arg)3069 bridge_ioctl_gto(struct bridge_softc *sc, void *arg)
3070 {
3071 struct ifbrparam *param = arg;
3072
3073 param->ifbrp_ctime = sc->sc_brttimeout;
3074 return 0;
3075 }
3076
3077 static int
bridge_ioctl_daddr32(struct bridge_softc * sc,void * arg)3078 bridge_ioctl_daddr32(struct bridge_softc *sc, void *arg)
3079 {
3080 struct ifbareq32 *req = arg;
3081
3082 return bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan);
3083 }
3084
3085 static int
bridge_ioctl_daddr64(struct bridge_softc * sc,void * arg)3086 bridge_ioctl_daddr64(struct bridge_softc *sc, void *arg)
3087 {
3088 struct ifbareq64 *req = arg;
3089
3090 return bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan);
3091 }
3092
3093 static int
bridge_ioctl_flush(struct bridge_softc * sc,void * arg)3094 bridge_ioctl_flush(struct bridge_softc *sc, void *arg)
3095 {
3096 struct ifbreq *req = arg;
3097
3098 bridge_rtflush(sc, req->ifbr_ifsflags);
3099 return 0;
3100 }
3101
3102 static int
bridge_ioctl_gpri(struct bridge_softc * sc,void * arg)3103 bridge_ioctl_gpri(struct bridge_softc *sc, void *arg)
3104 {
3105 struct ifbrparam *param = arg;
3106 struct bstp_state *bs = &sc->sc_stp;
3107
3108 param->ifbrp_prio = bs->bs_bridge_priority;
3109 return 0;
3110 }
3111
3112 static int
bridge_ioctl_spri(struct bridge_softc * sc,void * arg)3113 bridge_ioctl_spri(struct bridge_softc *sc, void *arg)
3114 {
3115 #if BRIDGESTP
3116 struct ifbrparam *param = arg;
3117
3118 return bstp_set_priority(&sc->sc_stp, param->ifbrp_prio);
3119 #else /* !BRIDGESTP */
3120 #pragma unused(sc, arg)
3121 return EOPNOTSUPP;
3122 #endif /* !BRIDGESTP */
3123 }
3124
3125 static int
bridge_ioctl_ght(struct bridge_softc * sc,void * arg)3126 bridge_ioctl_ght(struct bridge_softc *sc, void *arg)
3127 {
3128 struct ifbrparam *param = arg;
3129 struct bstp_state *bs = &sc->sc_stp;
3130
3131 param->ifbrp_hellotime = bs->bs_bridge_htime >> 8;
3132 return 0;
3133 }
3134
3135 static int
bridge_ioctl_sht(struct bridge_softc * sc,void * arg)3136 bridge_ioctl_sht(struct bridge_softc *sc, void *arg)
3137 {
3138 #if BRIDGESTP
3139 struct ifbrparam *param = arg;
3140
3141 return bstp_set_htime(&sc->sc_stp, param->ifbrp_hellotime);
3142 #else /* !BRIDGESTP */
3143 #pragma unused(sc, arg)
3144 return EOPNOTSUPP;
3145 #endif /* !BRIDGESTP */
3146 }
3147
3148 static int
bridge_ioctl_gfd(struct bridge_softc * sc,void * arg)3149 bridge_ioctl_gfd(struct bridge_softc *sc, void *arg)
3150 {
3151 struct ifbrparam *param;
3152 struct bstp_state *bs;
3153
3154 param = arg;
3155 bs = &sc->sc_stp;
3156 param->ifbrp_fwddelay = bs->bs_bridge_fdelay >> 8;
3157 return 0;
3158 }
3159
3160 static int
bridge_ioctl_sfd(struct bridge_softc * sc,void * arg)3161 bridge_ioctl_sfd(struct bridge_softc *sc, void *arg)
3162 {
3163 #if BRIDGESTP
3164 struct ifbrparam *param = arg;
3165
3166 return bstp_set_fdelay(&sc->sc_stp, param->ifbrp_fwddelay);
3167 #else /* !BRIDGESTP */
3168 #pragma unused(sc, arg)
3169 return EOPNOTSUPP;
3170 #endif /* !BRIDGESTP */
3171 }
3172
3173 static int
bridge_ioctl_gma(struct bridge_softc * sc,void * arg)3174 bridge_ioctl_gma(struct bridge_softc *sc, void *arg)
3175 {
3176 struct ifbrparam *param;
3177 struct bstp_state *bs;
3178
3179 param = arg;
3180 bs = &sc->sc_stp;
3181 param->ifbrp_maxage = bs->bs_bridge_max_age >> 8;
3182 return 0;
3183 }
3184
3185 static int
bridge_ioctl_sma(struct bridge_softc * sc,void * arg)3186 bridge_ioctl_sma(struct bridge_softc *sc, void *arg)
3187 {
3188 #if BRIDGESTP
3189 struct ifbrparam *param = arg;
3190
3191 return bstp_set_maxage(&sc->sc_stp, param->ifbrp_maxage);
3192 #else /* !BRIDGESTP */
3193 #pragma unused(sc, arg)
3194 return EOPNOTSUPP;
3195 #endif /* !BRIDGESTP */
3196 }
3197
3198 static int
bridge_ioctl_sifprio(struct bridge_softc * sc,void * arg)3199 bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg)
3200 {
3201 #if BRIDGESTP
3202 struct ifbreq *req = arg;
3203 struct bridge_iflist *bif;
3204
3205 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3206 if (bif == NULL) {
3207 return ENOENT;
3208 }
3209
3210 return bstp_set_port_priority(&bif->bif_stp, req->ifbr_priority);
3211 #else /* !BRIDGESTP */
3212 #pragma unused(sc, arg)
3213 return EOPNOTSUPP;
3214 #endif /* !BRIDGESTP */
3215 }
3216
3217 static int
bridge_ioctl_sifcost(struct bridge_softc * sc,void * arg)3218 bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg)
3219 {
3220 #if BRIDGESTP
3221 struct ifbreq *req = arg;
3222 struct bridge_iflist *bif;
3223
3224 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3225 if (bif == NULL) {
3226 return ENOENT;
3227 }
3228
3229 return bstp_set_path_cost(&bif->bif_stp, req->ifbr_path_cost);
3230 #else /* !BRIDGESTP */
3231 #pragma unused(sc, arg)
3232 return EOPNOTSUPP;
3233 #endif /* !BRIDGESTP */
3234 }
3235
3236 static int
bridge_ioctl_gfilt(struct bridge_softc * sc,void * arg)3237 bridge_ioctl_gfilt(struct bridge_softc *sc, void *arg)
3238 {
3239 struct ifbrparam *param = arg;
3240
3241 param->ifbrp_filter = sc->sc_filter_flags;
3242
3243 return 0;
3244 }
3245
3246 static int
bridge_ioctl_sfilt(struct bridge_softc * sc,void * arg)3247 bridge_ioctl_sfilt(struct bridge_softc *sc, void *arg)
3248 {
3249 struct ifbrparam *param = arg;
3250
3251 if (param->ifbrp_filter & ~IFBF_FILT_MASK) {
3252 return EINVAL;
3253 }
3254
3255 if (param->ifbrp_filter & IFBF_FILT_USEIPF) {
3256 return EINVAL;
3257 }
3258
3259 sc->sc_filter_flags = param->ifbrp_filter;
3260
3261 return 0;
3262 }
3263
3264 static int
bridge_ioctl_sifmaxaddr(struct bridge_softc * sc,void * arg)3265 bridge_ioctl_sifmaxaddr(struct bridge_softc *sc, void *arg)
3266 {
3267 struct ifbreq *req = arg;
3268 struct bridge_iflist *bif;
3269
3270 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3271 if (bif == NULL) {
3272 return ENOENT;
3273 }
3274
3275 bif->bif_addrmax = req->ifbr_addrmax;
3276 return 0;
3277 }
3278
3279 static int
bridge_ioctl_addspan(struct bridge_softc * sc,void * arg)3280 bridge_ioctl_addspan(struct bridge_softc *sc, void *arg)
3281 {
3282 struct ifbreq *req = arg;
3283 struct bridge_iflist *bif = NULL;
3284 struct ifnet *ifs;
3285
3286 ifs = ifunit(req->ifbr_ifsname);
3287 if (ifs == NULL) {
3288 return ENOENT;
3289 }
3290
3291 if (IFNET_IS_INTCOPROC(ifs)) {
3292 return EINVAL;
3293 }
3294
3295 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
3296 if (ifs == bif->bif_ifp) {
3297 return EBUSY;
3298 }
3299
3300 if (ifs->if_bridge != NULL) {
3301 return EBUSY;
3302 }
3303
3304 switch (ifs->if_type) {
3305 case IFT_ETHER:
3306 case IFT_L2VLAN:
3307 case IFT_IEEE8023ADLAG:
3308 break;
3309 case IFT_GIF:
3310 /* currently not supported */
3311 /* FALLTHRU */
3312 default:
3313 return EINVAL;
3314 }
3315
3316 bif = kalloc_type(struct bridge_iflist, Z_WAITOK | Z_ZERO | Z_NOFAIL);
3317
3318 bif->bif_ifp = ifs;
3319 bif->bif_ifflags = IFBIF_SPAN;
3320
3321 ifnet_reference(bif->bif_ifp);
3322
3323 TAILQ_INSERT_HEAD(&sc->sc_spanlist, bif, bif_next);
3324
3325 return 0;
3326 }
3327
3328 static int
bridge_ioctl_delspan(struct bridge_softc * sc,void * arg)3329 bridge_ioctl_delspan(struct bridge_softc *sc, void *arg)
3330 {
3331 struct ifbreq *req = arg;
3332 struct bridge_iflist *bif;
3333 struct ifnet *ifs;
3334
3335 ifs = ifunit(req->ifbr_ifsname);
3336 if (ifs == NULL) {
3337 return ENOENT;
3338 }
3339
3340 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
3341 if (ifs == bif->bif_ifp) {
3342 break;
3343 }
3344
3345 if (bif == NULL) {
3346 return ENOENT;
3347 }
3348
3349 bridge_delete_span(sc, bif);
3350
3351 return 0;
3352 }
3353
3354 #define BRIDGE_IOCTL_GBPARAM do { \
3355 struct bstp_state *bs = &sc->sc_stp; \
3356 struct bstp_port *root_port; \
3357 \
3358 req->ifbop_maxage = bs->bs_bridge_max_age >> 8; \
3359 req->ifbop_hellotime = bs->bs_bridge_htime >> 8; \
3360 req->ifbop_fwddelay = bs->bs_bridge_fdelay >> 8; \
3361 \
3362 root_port = bs->bs_root_port; \
3363 if (root_port == NULL) \
3364 req->ifbop_root_port = 0; \
3365 else \
3366 req->ifbop_root_port = root_port->bp_ifp->if_index; \
3367 \
3368 req->ifbop_holdcount = bs->bs_txholdcount; \
3369 req->ifbop_priority = bs->bs_bridge_priority; \
3370 req->ifbop_protocol = bs->bs_protover; \
3371 req->ifbop_root_path_cost = bs->bs_root_pv.pv_cost; \
3372 req->ifbop_bridgeid = bs->bs_bridge_pv.pv_dbridge_id; \
3373 req->ifbop_designated_root = bs->bs_root_pv.pv_root_id; \
3374 req->ifbop_designated_bridge = bs->bs_root_pv.pv_dbridge_id; \
3375 req->ifbop_last_tc_time.tv_sec = bs->bs_last_tc_time.tv_sec; \
3376 req->ifbop_last_tc_time.tv_usec = bs->bs_last_tc_time.tv_usec; \
3377 } while (0)
3378
3379 static int
bridge_ioctl_gbparam32(struct bridge_softc * sc,void * arg)3380 bridge_ioctl_gbparam32(struct bridge_softc *sc, void *arg)
3381 {
3382 struct ifbropreq32 *req = arg;
3383
3384 BRIDGE_IOCTL_GBPARAM;
3385 return 0;
3386 }
3387
3388 static int
bridge_ioctl_gbparam64(struct bridge_softc * sc,void * arg)3389 bridge_ioctl_gbparam64(struct bridge_softc *sc, void *arg)
3390 {
3391 struct ifbropreq64 *req = arg;
3392
3393 BRIDGE_IOCTL_GBPARAM;
3394 return 0;
3395 }
3396
3397 static int
bridge_ioctl_grte(struct bridge_softc * sc,void * arg)3398 bridge_ioctl_grte(struct bridge_softc *sc, void *arg)
3399 {
3400 struct ifbrparam *param = arg;
3401
3402 param->ifbrp_cexceeded = sc->sc_brtexceeded;
3403 return 0;
3404 }
3405
3406 #define BRIDGE_IOCTL_GIFSSTP do { \
3407 struct bridge_iflist *bif; \
3408 struct bstp_port *bp; \
3409 struct ifbpstpreq bpreq; \
3410 char *buf, *outbuf; \
3411 unsigned int count, buflen, len; \
3412 \
3413 count = 0; \
3414 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { \
3415 if ((bif->bif_ifflags & IFBIF_STP) != 0) \
3416 count++; \
3417 } \
3418 \
3419 buflen = sizeof (bpreq) * count; \
3420 if (bifstp->ifbpstp_len == 0) { \
3421 bifstp->ifbpstp_len = buflen; \
3422 return (0); \
3423 } \
3424 \
3425 BRIDGE_UNLOCK(sc); \
3426 outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO); \
3427 BRIDGE_LOCK(sc); \
3428 \
3429 count = 0; \
3430 buf = outbuf; \
3431 len = min(bifstp->ifbpstp_len, buflen); \
3432 bzero(&bpreq, sizeof (bpreq)); \
3433 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { \
3434 if (len < sizeof (bpreq)) \
3435 break; \
3436 \
3437 if ((bif->bif_ifflags & IFBIF_STP) == 0) \
3438 continue; \
3439 \
3440 bp = &bif->bif_stp; \
3441 bpreq.ifbp_portno = bif->bif_ifp->if_index & 0xfff; \
3442 bpreq.ifbp_fwd_trans = bp->bp_forward_transitions; \
3443 bpreq.ifbp_design_cost = bp->bp_desg_pv.pv_cost; \
3444 bpreq.ifbp_design_port = bp->bp_desg_pv.pv_port_id; \
3445 bpreq.ifbp_design_bridge = bp->bp_desg_pv.pv_dbridge_id; \
3446 bpreq.ifbp_design_root = bp->bp_desg_pv.pv_root_id; \
3447 \
3448 memcpy(buf, &bpreq, sizeof (bpreq)); \
3449 count++; \
3450 buf += sizeof (bpreq); \
3451 len -= sizeof (bpreq); \
3452 } \
3453 \
3454 BRIDGE_UNLOCK(sc); \
3455 bifstp->ifbpstp_len = sizeof (bpreq) * count; \
3456 error = copyout(outbuf, bifstp->ifbpstp_req, bifstp->ifbpstp_len); \
3457 BRIDGE_LOCK(sc); \
3458 kfree_data(outbuf, buflen); \
3459 return (error); \
3460 } while (0)
3461
3462 static int
bridge_ioctl_gifsstp32(struct bridge_softc * sc,void * arg)3463 bridge_ioctl_gifsstp32(struct bridge_softc *sc, void *arg)
3464 {
3465 struct ifbpstpconf32 *bifstp = arg;
3466 int error = 0;
3467
3468 BRIDGE_IOCTL_GIFSSTP;
3469 return error;
3470 }
3471
3472 static int
bridge_ioctl_gifsstp64(struct bridge_softc * sc,void * arg)3473 bridge_ioctl_gifsstp64(struct bridge_softc *sc, void *arg)
3474 {
3475 struct ifbpstpconf64 *bifstp = arg;
3476 int error = 0;
3477
3478 BRIDGE_IOCTL_GIFSSTP;
3479 return error;
3480 }
3481
3482 static int
bridge_ioctl_sproto(struct bridge_softc * sc,void * arg)3483 bridge_ioctl_sproto(struct bridge_softc *sc, void *arg)
3484 {
3485 #if BRIDGESTP
3486 struct ifbrparam *param = arg;
3487
3488 return bstp_set_protocol(&sc->sc_stp, param->ifbrp_proto);
3489 #else /* !BRIDGESTP */
3490 #pragma unused(sc, arg)
3491 return EOPNOTSUPP;
3492 #endif /* !BRIDGESTP */
3493 }
3494
3495 static int
bridge_ioctl_stxhc(struct bridge_softc * sc,void * arg)3496 bridge_ioctl_stxhc(struct bridge_softc *sc, void *arg)
3497 {
3498 #if BRIDGESTP
3499 struct ifbrparam *param = arg;
3500
3501 return bstp_set_holdcount(&sc->sc_stp, param->ifbrp_txhc);
3502 #else /* !BRIDGESTP */
3503 #pragma unused(sc, arg)
3504 return EOPNOTSUPP;
3505 #endif /* !BRIDGESTP */
3506 }
3507
3508
3509 static int
bridge_ioctl_ghostfilter(struct bridge_softc * sc,void * arg)3510 bridge_ioctl_ghostfilter(struct bridge_softc *sc, void *arg)
3511 {
3512 struct ifbrhostfilter *req = arg;
3513 struct bridge_iflist *bif;
3514
3515 bif = bridge_lookup_member(sc, req->ifbrhf_ifsname);
3516 if (bif == NULL) {
3517 return ENOENT;
3518 }
3519
3520 bzero(req, sizeof(struct ifbrhostfilter));
3521 if (bif->bif_flags & BIFF_HOST_FILTER) {
3522 req->ifbrhf_flags |= IFBRHF_ENABLED;
3523 bcopy(bif->bif_hf_hwsrc, req->ifbrhf_hwsrca,
3524 ETHER_ADDR_LEN);
3525 req->ifbrhf_ipsrc = bif->bif_hf_ipsrc.s_addr;
3526 }
3527 return 0;
3528 }
3529
3530 static int
bridge_ioctl_shostfilter(struct bridge_softc * sc,void * arg)3531 bridge_ioctl_shostfilter(struct bridge_softc *sc, void *arg)
3532 {
3533 struct ifbrhostfilter *req = arg;
3534 struct bridge_iflist *bif;
3535
3536 bif = bridge_lookup_member(sc, req->ifbrhf_ifsname);
3537 if (bif == NULL) {
3538 return ENOENT;
3539 }
3540
3541 if (req->ifbrhf_flags & IFBRHF_ENABLED) {
3542 bif->bif_flags |= BIFF_HOST_FILTER;
3543
3544 if (req->ifbrhf_flags & IFBRHF_HWSRC) {
3545 bcopy(req->ifbrhf_hwsrca, bif->bif_hf_hwsrc,
3546 ETHER_ADDR_LEN);
3547 if (bcmp(req->ifbrhf_hwsrca, ethernulladdr,
3548 ETHER_ADDR_LEN) != 0) {
3549 bif->bif_flags |= BIFF_HF_HWSRC;
3550 } else {
3551 bif->bif_flags &= ~BIFF_HF_HWSRC;
3552 }
3553 }
3554 if (req->ifbrhf_flags & IFBRHF_IPSRC) {
3555 bif->bif_hf_ipsrc.s_addr = req->ifbrhf_ipsrc;
3556 if (bif->bif_hf_ipsrc.s_addr != INADDR_ANY) {
3557 bif->bif_flags |= BIFF_HF_IPSRC;
3558 } else {
3559 bif->bif_flags &= ~BIFF_HF_IPSRC;
3560 }
3561 }
3562 } else {
3563 bif->bif_flags &= ~(BIFF_HOST_FILTER | BIFF_HF_HWSRC |
3564 BIFF_HF_IPSRC);
3565 bzero(bif->bif_hf_hwsrc, ETHER_ADDR_LEN);
3566 bif->bif_hf_ipsrc.s_addr = INADDR_ANY;
3567 }
3568
3569 return 0;
3570 }
3571
3572 static char *
bridge_mac_nat_entry_out(struct mac_nat_entry_list * list,unsigned int * count_p,char * buf,unsigned int * len_p)3573 bridge_mac_nat_entry_out(struct mac_nat_entry_list * list,
3574 unsigned int * count_p, char *buf, unsigned int *len_p)
3575 {
3576 unsigned int count = *count_p;
3577 struct ifbrmne ifbmne;
3578 unsigned int len = *len_p;
3579 struct mac_nat_entry *mne;
3580 unsigned long now;
3581
3582 bzero(&ifbmne, sizeof(ifbmne));
3583 LIST_FOREACH(mne, list, mne_list) {
3584 if (len < sizeof(ifbmne)) {
3585 break;
3586 }
3587 snprintf(ifbmne.ifbmne_ifname, sizeof(ifbmne.ifbmne_ifname),
3588 "%s", mne->mne_bif->bif_ifp->if_xname);
3589 memcpy(ifbmne.ifbmne_mac, mne->mne_mac,
3590 sizeof(ifbmne.ifbmne_mac));
3591 now = (unsigned long) net_uptime();
3592 if (now < mne->mne_expire) {
3593 ifbmne.ifbmne_expire = mne->mne_expire - now;
3594 } else {
3595 ifbmne.ifbmne_expire = 0;
3596 }
3597 if ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) {
3598 ifbmne.ifbmne_af = AF_INET6;
3599 ifbmne.ifbmne_ip6_addr = mne->mne_ip6;
3600 } else {
3601 ifbmne.ifbmne_af = AF_INET;
3602 ifbmne.ifbmne_ip_addr = mne->mne_ip;
3603 }
3604 memcpy(buf, &ifbmne, sizeof(ifbmne));
3605 count++;
3606 buf += sizeof(ifbmne);
3607 len -= sizeof(ifbmne);
3608 }
3609 *count_p = count;
3610 *len_p = len;
3611 return buf;
3612 }
3613
3614 /*
3615 * bridge_ioctl_gmnelist()
3616 * Perform the get mac_nat_entry list ioctl.
3617 *
3618 * Note:
3619 * The struct ifbrmnelist32 and struct ifbrmnelist64 have the same
3620 * field size/layout except for the last field ifbml_buf, the user-supplied
3621 * buffer pointer. That is passed in separately via the 'user_addr'
3622 * parameter from the respective 32-bit or 64-bit ioctl routine.
3623 */
3624 static int
bridge_ioctl_gmnelist(struct bridge_softc * sc,struct ifbrmnelist32 * mnl,user_addr_t user_addr)3625 bridge_ioctl_gmnelist(struct bridge_softc *sc, struct ifbrmnelist32 *mnl,
3626 user_addr_t user_addr)
3627 {
3628 unsigned int count;
3629 char *buf;
3630 int error = 0;
3631 char *outbuf = NULL;
3632 struct mac_nat_entry *mne;
3633 unsigned int buflen;
3634 unsigned int len;
3635
3636 mnl->ifbml_elsize = sizeof(struct ifbrmne);
3637 count = 0;
3638 LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
3639 count++;
3640 }
3641 LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
3642 count++;
3643 }
3644 buflen = sizeof(struct ifbrmne) * count;
3645 if (buflen == 0 || mnl->ifbml_len == 0) {
3646 mnl->ifbml_len = buflen;
3647 return error;
3648 }
3649 BRIDGE_UNLOCK(sc);
3650 outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO);
3651 BRIDGE_LOCK(sc);
3652 count = 0;
3653 buf = outbuf;
3654 len = min(mnl->ifbml_len, buflen);
3655 buf = bridge_mac_nat_entry_out(&sc->sc_mne_list, &count, buf, &len);
3656 buf = bridge_mac_nat_entry_out(&sc->sc_mne_list_v6, &count, buf, &len);
3657 mnl->ifbml_len = count * sizeof(struct ifbrmne);
3658 BRIDGE_UNLOCK(sc);
3659 error = copyout(outbuf, user_addr, mnl->ifbml_len);
3660 kfree_data(outbuf, buflen);
3661 BRIDGE_LOCK(sc);
3662 return error;
3663 }
3664
3665 static int
bridge_ioctl_gmnelist64(struct bridge_softc * sc,void * arg)3666 bridge_ioctl_gmnelist64(struct bridge_softc *sc, void *arg)
3667 {
3668 struct ifbrmnelist64 *mnl = arg;
3669
3670 return bridge_ioctl_gmnelist(sc, arg, mnl->ifbml_buf);
3671 }
3672
3673 static int
bridge_ioctl_gmnelist32(struct bridge_softc * sc,void * arg)3674 bridge_ioctl_gmnelist32(struct bridge_softc *sc, void *arg)
3675 {
3676 struct ifbrmnelist32 *mnl = arg;
3677
3678 return bridge_ioctl_gmnelist(sc, arg,
3679 CAST_USER_ADDR_T(mnl->ifbml_buf));
3680 }
3681
3682 /*
3683 * bridge_ioctl_gifstats()
3684 * Return per-member stats.
3685 *
3686 * Note:
3687 * The ifbrmreq32 and ifbrmreq64 structures have the same
3688 * field size/layout except for the last field brmr_buf, the user-supplied
3689 * buffer pointer. That is passed in separately via the 'user_addr'
3690 * parameter from the respective 32-bit or 64-bit ioctl routine.
3691 */
3692 static int
bridge_ioctl_gifstats(struct bridge_softc * sc,struct ifbrmreq32 * mreq,user_addr_t user_addr)3693 bridge_ioctl_gifstats(struct bridge_softc *sc, struct ifbrmreq32 *mreq,
3694 user_addr_t user_addr)
3695 {
3696 struct bridge_iflist *bif;
3697 int error = 0;
3698 unsigned int buflen;
3699
3700 bif = bridge_lookup_member(sc, mreq->brmr_ifname);
3701 if (bif == NULL) {
3702 error = ENOENT;
3703 goto done;
3704 }
3705
3706 buflen = mreq->brmr_elsize = sizeof(struct ifbrmstats);
3707 if (buflen == 0 || mreq->brmr_len == 0) {
3708 mreq->brmr_len = buflen;
3709 goto done;
3710 }
3711 if (mreq->brmr_len != 0 && mreq->brmr_len < buflen) {
3712 error = ENOBUFS;
3713 goto done;
3714 }
3715 mreq->brmr_len = buflen;
3716 error = copyout(&bif->bif_stats, user_addr, buflen);
3717 done:
3718 return error;
3719 }
3720
3721 static int
bridge_ioctl_gifstats32(struct bridge_softc * sc,void * arg)3722 bridge_ioctl_gifstats32(struct bridge_softc *sc, void *arg)
3723 {
3724 struct ifbrmreq32 *mreq = arg;
3725
3726 return bridge_ioctl_gifstats(sc, arg, mreq->brmr_buf);
3727 }
3728
3729 static int
bridge_ioctl_gifstats64(struct bridge_softc * sc,void * arg)3730 bridge_ioctl_gifstats64(struct bridge_softc *sc, void *arg)
3731 {
3732 struct ifbrmreq64 *mreq = arg;
3733
3734 return bridge_ioctl_gifstats(sc, arg, mreq->brmr_buf);
3735 }
3736
3737 /*
3738 * bridge_proto_attach_changed
3739 *
3740 * Called when protocol attachment on the interface changes.
3741 */
3742 static void
bridge_proto_attach_changed(struct ifnet * ifp)3743 bridge_proto_attach_changed(struct ifnet *ifp)
3744 {
3745 boolean_t changed = FALSE;
3746 struct bridge_iflist *bif;
3747 boolean_t input_broadcast;
3748 struct bridge_softc *sc = ifp->if_bridge;
3749
3750 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
3751 if (sc == NULL) {
3752 return;
3753 }
3754 /*
3755 * Selectively enable input broadcast only when necessary.
3756 * The bridge interface itself attaches a fake protocol
3757 * so checking for at least two protocols means that the
3758 * interface is being used for something besides bridging.
3759 */
3760 input_broadcast = if_get_protolist(ifp, NULL, 0) >= 2;
3761 BRIDGE_LOCK(sc);
3762 bif = bridge_lookup_member_if(sc, ifp);
3763 if (bif != NULL) {
3764 if (input_broadcast) {
3765 if ((bif->bif_flags & BIFF_INPUT_BROADCAST) == 0) {
3766 bif->bif_flags |= BIFF_INPUT_BROADCAST;
3767 changed = TRUE;
3768 }
3769 } else if ((bif->bif_flags & BIFF_INPUT_BROADCAST) != 0) {
3770 changed = TRUE;
3771 bif->bif_flags &= ~BIFF_INPUT_BROADCAST;
3772 }
3773 }
3774 BRIDGE_UNLOCK(sc);
3775 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
3776 "%s input broadcast %s", ifp->if_xname,
3777 input_broadcast ? "ENABLED" : "DISABLED");
3778 return;
3779 }
3780
3781 /*
3782 * interface_media_active:
3783 *
3784 * Tells if an interface media is active.
3785 */
3786 static int
interface_media_active(struct ifnet * ifp)3787 interface_media_active(struct ifnet *ifp)
3788 {
3789 struct ifmediareq ifmr;
3790 int status = 0;
3791
3792 bzero(&ifmr, sizeof(ifmr));
3793 if (ifnet_ioctl(ifp, 0, SIOCGIFMEDIA, &ifmr) == 0) {
3794 if ((ifmr.ifm_status & IFM_AVALID) && ifmr.ifm_count > 0) {
3795 status = ifmr.ifm_status & IFM_ACTIVE ? 1 : 0;
3796 }
3797 }
3798
3799 return status;
3800 }
3801
3802 /*
3803 * bridge_updatelinkstatus:
3804 *
3805 * Update the media active status of the bridge based on the
3806 * media active status of its member.
3807 * If changed, return the corresponding onf/off link event.
3808 */
3809 static u_int32_t
bridge_updatelinkstatus(struct bridge_softc * sc)3810 bridge_updatelinkstatus(struct bridge_softc *sc)
3811 {
3812 struct bridge_iflist *bif;
3813 int active_member = 0;
3814 u_int32_t event_code = 0;
3815
3816 BRIDGE_LOCK_ASSERT_HELD(sc);
3817
3818 /*
3819 * Find out if we have an active interface
3820 */
3821 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
3822 if (bif->bif_flags & BIFF_MEDIA_ACTIVE) {
3823 active_member = 1;
3824 break;
3825 }
3826 }
3827
3828 if (active_member && !(sc->sc_flags & SCF_MEDIA_ACTIVE)) {
3829 sc->sc_flags |= SCF_MEDIA_ACTIVE;
3830 event_code = KEV_DL_LINK_ON;
3831 } else if (!active_member && (sc->sc_flags & SCF_MEDIA_ACTIVE)) {
3832 sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
3833 event_code = KEV_DL_LINK_OFF;
3834 }
3835
3836 return event_code;
3837 }
3838
3839 /*
3840 * bridge_iflinkevent:
3841 */
3842 static void
bridge_iflinkevent(struct ifnet * ifp)3843 bridge_iflinkevent(struct ifnet *ifp)
3844 {
3845 struct bridge_softc *sc = ifp->if_bridge;
3846 struct bridge_iflist *bif;
3847 u_int32_t event_code = 0;
3848 int media_active;
3849
3850 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
3851
3852 /* Check if the interface is a bridge member */
3853 if (sc == NULL) {
3854 return;
3855 }
3856
3857 media_active = interface_media_active(ifp);
3858 BRIDGE_LOCK(sc);
3859 bif = bridge_lookup_member_if(sc, ifp);
3860 if (bif != NULL) {
3861 if (media_active) {
3862 bif->bif_flags |= BIFF_MEDIA_ACTIVE;
3863 } else {
3864 bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
3865 }
3866 if (sc->sc_mac_nat_bif != NULL) {
3867 bridge_mac_nat_flush_entries(sc, bif);
3868 }
3869
3870 event_code = bridge_updatelinkstatus(sc);
3871 }
3872 BRIDGE_UNLOCK(sc);
3873
3874 if (event_code != 0) {
3875 bridge_link_event(sc->sc_ifp, event_code);
3876 }
3877 }
3878
3879 /*
3880 * bridge_delayed_callback:
3881 *
3882 * Makes a delayed call
3883 */
3884 static void
bridge_delayed_callback(void * param)3885 bridge_delayed_callback(void *param)
3886 {
3887 struct bridge_delayed_call *call = (struct bridge_delayed_call *)param;
3888 struct bridge_softc *sc = call->bdc_sc;
3889
3890 #if BRIDGE_DELAYED_CALLBACK_DEBUG
3891 if (bridge_delayed_callback_delay > 0) {
3892 struct timespec ts;
3893
3894 ts.tv_sec = bridge_delayed_callback_delay;
3895 ts.tv_nsec = 0;
3896
3897 BRIDGE_LOG(LOG_NOTICE, 0,
3898 "sleeping for %d seconds",
3899 bridge_delayed_callback_delay);
3900
3901 msleep(&bridge_delayed_callback_delay, NULL, PZERO,
3902 __func__, &ts);
3903
3904 BRIDGE_LOG(LOG_NOTICE, 0, "awoken");
3905 }
3906 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
3907
3908 BRIDGE_LOCK(sc);
3909
3910 #if BRIDGE_DELAYED_CALLBACK_DEBUG
3911 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
3912 "%s call 0x%llx flags 0x%x",
3913 sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
3914 call->bdc_flags);
3915 }
3916 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
3917
3918 if (call->bdc_flags & BDCF_CANCELLING) {
3919 wakeup(call);
3920 } else {
3921 if ((sc->sc_flags & SCF_DETACHING) == 0) {
3922 (*call->bdc_func)(sc);
3923 }
3924 }
3925 call->bdc_flags &= ~BDCF_OUTSTANDING;
3926 BRIDGE_UNLOCK(sc);
3927 }
3928
3929 /*
3930 * bridge_schedule_delayed_call:
3931 *
3932 * Schedule a function to be called on a separate thread
3933 * The actual call may be scheduled to run at a given time or ASAP.
3934 */
3935 static void
3936 bridge_schedule_delayed_call(struct bridge_delayed_call *call)
3937 {
3938 uint64_t deadline = 0;
3939 struct bridge_softc *sc = call->bdc_sc;
3940
3941 BRIDGE_LOCK_ASSERT_HELD(sc);
3942
3943 if ((sc->sc_flags & SCF_DETACHING) ||
3944 (call->bdc_flags & (BDCF_OUTSTANDING | BDCF_CANCELLING))) {
3945 return;
3946 }
3947
3948 if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) {
3949 nanoseconds_to_absolutetime(
3950 (uint64_t)call->bdc_ts.tv_sec * NSEC_PER_SEC +
3951 call->bdc_ts.tv_nsec, &deadline);
3952 clock_absolutetime_interval_to_deadline(deadline, &deadline);
3953 }
3954
3955 call->bdc_flags = BDCF_OUTSTANDING;
3956
3957 #if BRIDGE_DELAYED_CALLBACK_DEBUG
3958 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
3959 "%s call 0x%llx flags 0x%x",
3960 sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
3961 call->bdc_flags);
3962 }
3963 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
3964
3965 if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) {
3966 thread_call_func_delayed(
3967 (thread_call_func_t)bridge_delayed_callback,
3968 call, deadline);
3969 } else {
3970 if (call->bdc_thread_call == NULL) {
3971 call->bdc_thread_call = thread_call_allocate(
3972 (thread_call_func_t)bridge_delayed_callback,
3973 call);
3974 }
3975 thread_call_enter(call->bdc_thread_call);
3976 }
3977 }
3978
3979 /*
3980 * bridge_cancel_delayed_call:
3981 *
3982 * Cancel a queued or running delayed call.
3983 * If call is running, does not return until the call is done to
3984 * prevent race condition with the brigde interface getting destroyed
3985 */
3986 static void
3987 bridge_cancel_delayed_call(struct bridge_delayed_call *call)
3988 {
3989 boolean_t result;
3990 struct bridge_softc *sc = call->bdc_sc;
3991
3992 /*
3993 * The call was never scheduled
3994 */
3995 if (sc == NULL) {
3996 return;
3997 }
3998
3999 BRIDGE_LOCK_ASSERT_HELD(sc);
4000
4001 call->bdc_flags |= BDCF_CANCELLING;
4002
4003 while (call->bdc_flags & BDCF_OUTSTANDING) {
4004 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4005 "%s call 0x%llx flags 0x%x",
4006 sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4007 call->bdc_flags);
4008 result = thread_call_func_cancel(
4009 (thread_call_func_t)bridge_delayed_callback, call, FALSE);
4010
4011 if (result) {
4012 /*
4013 * We managed to dequeue the delayed call
4014 */
4015 call->bdc_flags &= ~BDCF_OUTSTANDING;
4016 } else {
4017 /*
4018 * Wait for delayed call do be done running
4019 */
4020 msleep(call, &sc->sc_mtx, PZERO, __func__, NULL);
4021 }
4022 }
4023 call->bdc_flags &= ~BDCF_CANCELLING;
4024 }
4025
4026 /*
4027 * bridge_cleanup_delayed_call:
4028 *
4029 * Dispose resource allocated for a delayed call
4030 * Assume the delayed call is not queued or running .
4031 */
4032 static void
4033 bridge_cleanup_delayed_call(struct bridge_delayed_call *call)
4034 {
4035 boolean_t result;
4036 struct bridge_softc *sc = call->bdc_sc;
4037
4038 /*
4039 * The call was never scheduled
4040 */
4041 if (sc == NULL) {
4042 return;
4043 }
4044
4045 BRIDGE_LOCK_ASSERT_HELD(sc);
4046
4047 VERIFY((call->bdc_flags & BDCF_OUTSTANDING) == 0);
4048 VERIFY((call->bdc_flags & BDCF_CANCELLING) == 0);
4049
4050 if (call->bdc_thread_call != NULL) {
4051 result = thread_call_free(call->bdc_thread_call);
4052 if (result == FALSE) {
4053 panic("%s thread_call_free() failed for call %p",
4054 __func__, call);
4055 }
4056 call->bdc_thread_call = NULL;
4057 }
4058 }
4059
4060 /*
4061 * bridge_init:
4062 *
4063 * Initialize a bridge interface.
4064 */
4065 static int
4066 bridge_init(struct ifnet *ifp)
4067 {
4068 struct bridge_softc *sc = (struct bridge_softc *)ifp->if_softc;
4069 errno_t error;
4070
4071 BRIDGE_LOCK_ASSERT_HELD(sc);
4072
4073 if ((ifnet_flags(ifp) & IFF_RUNNING)) {
4074 return 0;
4075 }
4076
4077 error = ifnet_set_flags(ifp, IFF_RUNNING, IFF_RUNNING);
4078
4079 /*
4080 * Calling bridge_aging_timer() is OK as there are no entries to
4081 * age so we're just going to arm the timer
4082 */
4083 bridge_aging_timer(sc);
4084 #if BRIDGESTP
4085 if (error == 0) {
4086 bstp_init(&sc->sc_stp); /* Initialize Spanning Tree */
4087 }
4088 #endif /* BRIDGESTP */
4089 return error;
4090 }
4091
4092 /*
4093 * bridge_ifstop:
4094 *
4095 * Stop the bridge interface.
4096 */
4097 static void
4098 bridge_ifstop(struct ifnet *ifp, int disable)
4099 {
4100 #pragma unused(disable)
4101 struct bridge_softc *sc = ifp->if_softc;
4102
4103 BRIDGE_LOCK_ASSERT_HELD(sc);
4104
4105 if ((ifnet_flags(ifp) & IFF_RUNNING) == 0) {
4106 return;
4107 }
4108
4109 bridge_cancel_delayed_call(&sc->sc_aging_timer);
4110
4111 #if BRIDGESTP
4112 bstp_stop(&sc->sc_stp);
4113 #endif /* BRIDGESTP */
4114
4115 bridge_rtflush(sc, IFBF_FLUSHDYN);
4116 (void) ifnet_set_flags(ifp, 0, IFF_RUNNING);
4117 }
4118
4119 /*
4120 * bridge_compute_cksum:
4121 *
4122 * If the packet has checksum flags, compare the hardware checksum
4123 * capabilities of the source and destination interfaces. If they
4124 * are the same, there's nothing to do. If they are different,
4125 * finalize the checksum so that it can be sent on the destination
4126 * interface.
4127 */
4128 static void
4129 bridge_compute_cksum(struct ifnet *src_if, struct ifnet *dst_if, struct mbuf *m)
4130 {
4131 uint32_t csum_flags;
4132 uint16_t dst_hw_csum;
4133 uint32_t did_sw = 0;
4134 struct ether_header *eh;
4135 uint16_t src_hw_csum;
4136
4137 if (src_if == dst_if) {
4138 return;
4139 }
4140 csum_flags = m->m_pkthdr.csum_flags & IF_HWASSIST_CSUM_MASK;
4141 if (csum_flags == 0) {
4142 /* no checksum offload */
4143 return;
4144 }
4145
4146 /*
4147 * if destination/source differ in checksum offload
4148 * capabilities, finalize/compute the checksum
4149 */
4150 dst_hw_csum = IF_HWASSIST_CSUM_FLAGS(dst_if->if_hwassist);
4151 src_hw_csum = IF_HWASSIST_CSUM_FLAGS(src_if->if_hwassist);
4152 if (dst_hw_csum == src_hw_csum) {
4153 return;
4154 }
4155 eh = mtod(m, struct ether_header *);
4156 switch (ntohs(eh->ether_type)) {
4157 case ETHERTYPE_IP:
4158 did_sw = in_finalize_cksum(m, sizeof(*eh), csum_flags);
4159 break;
4160 case ETHERTYPE_IPV6:
4161 did_sw = in6_finalize_cksum(m, sizeof(*eh), -1, -1, csum_flags);
4162 break;
4163 }
4164 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4165 "[%s -> %s] before 0x%x did 0x%x after 0x%x",
4166 src_if->if_xname, dst_if->if_xname, csum_flags, did_sw,
4167 m->m_pkthdr.csum_flags);
4168 }
4169
4170 static errno_t
4171 bridge_transmit(struct ifnet * ifp, struct mbuf *m)
4172 {
4173 struct flowadv adv = { .code = FADV_SUCCESS };
4174 errno_t error;
4175
4176 error = dlil_output(ifp, 0, m, NULL, NULL, 1, &adv);
4177 if (error == 0) {
4178 if (adv.code == FADV_FLOW_CONTROLLED) {
4179 error = EQFULL;
4180 } else if (adv.code == FADV_SUSPENDED) {
4181 error = EQSUSPENDED;
4182 }
4183 }
4184 return error;
4185 }
4186
4187 static u_int16_t
4188 get_ether_type(struct mbuf * m)
4189 {
4190 struct ether_header *eh;
4191
4192 eh = mtod(m, struct ether_header *);
4193 return ntohs(eh->ether_type);
4194 }
4195
4196 static int
4197 get_last_ip6_hdr(struct mbuf *m, int off, int proto, int * nxtp,
4198 bool *is_fragmented)
4199 {
4200 int newoff;
4201
4202 *is_fragmented = false;
4203 while (1) {
4204 newoff = ip6_nexthdr(m, off, proto, nxtp);
4205 if (newoff < 0) {
4206 return off;
4207 } else if (newoff < off) {
4208 return -1; /* invalid */
4209 } else if (newoff == off) {
4210 return newoff;
4211 }
4212 off = newoff;
4213 proto = *nxtp;
4214 if (proto == IPPROTO_FRAGMENT) {
4215 *is_fragmented = true;
4216 }
4217 }
4218 }
4219
4220 static int
4221 bridge_get_ip_proto(struct mbuf * * mp, u_int mac_hlen, bool is_ipv4,
4222 ip_packet_info_t info_p, struct bripstats * stats_p)
4223 {
4224 int error = 0;
4225 u_int hlen;
4226 u_int ip_hlen;
4227 u_int ip_pay_len;
4228 struct mbuf * m0 = *mp;
4229 int off;
4230 int opt_len = 0;
4231 int proto = 0;
4232
4233 bzero(info_p, sizeof(*info_p));
4234 if (is_ipv4) {
4235 struct ip * ip;
4236 u_int ip_total_len;
4237
4238 /* IPv4 */
4239 hlen = mac_hlen + sizeof(struct ip);
4240 if (m0->m_pkthdr.len < hlen) {
4241 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4242 "Short IP packet %d < %d",
4243 m0->m_pkthdr.len, hlen);
4244 error = _EBADIP;
4245 stats_p->bips_bad_ip++;
4246 goto done;
4247 }
4248 if (m0->m_len < hlen) {
4249 *mp = m0 = m_pullup(m0, hlen);
4250 if (m0 == NULL) {
4251 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4252 "m_pullup failed hlen %d",
4253 hlen);
4254 error = ENOBUFS;
4255 stats_p->bips_bad_ip++;
4256 goto done;
4257 }
4258 }
4259 ip = (struct ip *)(void *)(mtod(m0, uint8_t *) + mac_hlen);
4260 if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
4261 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4262 "bad IP version");
4263 error = _EBADIP;
4264 stats_p->bips_bad_ip++;
4265 goto done;
4266 }
4267 ip_hlen = IP_VHL_HL(ip->ip_vhl) << 2;
4268 if (ip_hlen < sizeof(struct ip)) {
4269 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4270 "bad IP header length %d < %d",
4271 ip_hlen,
4272 (int)sizeof(struct ip));
4273 error = _EBADIP;
4274 stats_p->bips_bad_ip++;
4275 goto done;
4276 }
4277 hlen = mac_hlen + ip_hlen;
4278 if (m0->m_len < hlen) {
4279 *mp = m0 = m_pullup(m0, hlen);
4280 if (m0 == NULL) {
4281 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4282 "m_pullup failed hlen %d",
4283 hlen);
4284 error = ENOBUFS;
4285 stats_p->bips_bad_ip++;
4286 goto done;
4287 }
4288 }
4289
4290 ip_total_len = ntohs(ip->ip_len);
4291 if (ip_total_len < ip_hlen) {
4292 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4293 "IP total len %d < header len %d",
4294 ip_total_len, ip_hlen);
4295 error = _EBADIP;
4296 stats_p->bips_bad_ip++;
4297 goto done;
4298 }
4299 if (ip_total_len > (m0->m_pkthdr.len - mac_hlen)) {
4300 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4301 "invalid IP payload length %d > %d",
4302 ip_total_len,
4303 (m0->m_pkthdr.len - mac_hlen));
4304 error = _EBADIP;
4305 stats_p->bips_bad_ip++;
4306 goto done;
4307 }
4308 ip_pay_len = ip_total_len - ip_hlen;
4309 info_p->ip_proto = ip->ip_p;
4310 info_p->ip_hdr.ip = ip;
4311 #define FRAG_BITS (IP_OFFMASK | IP_MF)
4312 if ((ntohs(ip->ip_off) & FRAG_BITS) != 0) {
4313 info_p->ip_is_fragmented = true;
4314 }
4315 stats_p->bips_ip++;
4316 } else {
4317 struct ip6_hdr *ip6;
4318
4319 /* IPv6 */
4320 hlen = mac_hlen + sizeof(struct ip6_hdr);
4321 if (m0->m_pkthdr.len < hlen) {
4322 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4323 "short IPv6 packet %d < %d",
4324 m0->m_pkthdr.len, hlen);
4325 error = _EBADIPV6;
4326 stats_p->bips_bad_ip6++;
4327 goto done;
4328 }
4329 if (m0->m_len < hlen) {
4330 *mp = m0 = m_pullup(m0, hlen);
4331 if (m0 == NULL) {
4332 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4333 "m_pullup failed hlen %d",
4334 hlen);
4335 error = ENOBUFS;
4336 stats_p->bips_bad_ip6++;
4337 goto done;
4338 }
4339 }
4340 ip6 = (struct ip6_hdr *)(mtod(m0, uint8_t *) + mac_hlen);
4341 if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
4342 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4343 "bad IPv6 version");
4344 error = _EBADIPV6;
4345 stats_p->bips_bad_ip6++;
4346 goto done;
4347 }
4348 off = get_last_ip6_hdr(m0, mac_hlen, IPPROTO_IPV6, &proto,
4349 &info_p->ip_is_fragmented);
4350 if (off < 0 || m0->m_pkthdr.len < off) {
4351 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4352 "ip6_lasthdr() returned %d",
4353 off);
4354 error = _EBADIPV6;
4355 stats_p->bips_bad_ip6++;
4356 goto done;
4357 }
4358 ip_hlen = sizeof(*ip6);
4359 opt_len = off - mac_hlen - ip_hlen;
4360 if (opt_len < 0) {
4361 error = _EBADIPV6;
4362 stats_p->bips_bad_ip6++;
4363 goto done;
4364 }
4365 info_p->ip_proto = proto;
4366 info_p->ip_hdr.ip6 = ip6;
4367 ip_pay_len = ntohs(ip6->ip6_plen);
4368 if (ip_pay_len > (m0->m_pkthdr.len - mac_hlen - ip_hlen)) {
4369 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4370 "invalid IPv6 payload length %d > %d",
4371 ip_pay_len,
4372 (m0->m_pkthdr.len - mac_hlen - ip_hlen));
4373 error = _EBADIPV6;
4374 stats_p->bips_bad_ip6++;
4375 goto done;
4376 }
4377 stats_p->bips_ip6++;
4378 }
4379 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4380 "IPv%c proto %d ip %u pay %u opt %u pkt %u%s",
4381 is_ipv4 ? '4' : '6',
4382 proto, ip_hlen, ip_pay_len, opt_len,
4383 m0->m_pkthdr.len, info_p->ip_is_fragmented ? " frag" : "");
4384 info_p->ip_hlen = ip_hlen;
4385 info_p->ip_pay_len = ip_pay_len;
4386 info_p->ip_opt_len = opt_len;
4387
4388 done:
4389 return error;
4390 }
4391
4392 static int
4393 bridge_get_tcp_header(struct mbuf * * mp, u_int mac_hlen, bool is_ipv4,
4394 ip_packet_info_t info_p, struct bripstats * stats_p)
4395 {
4396 int error;
4397 u_int hlen;
4398
4399 error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, info_p, stats_p);
4400 if (error != 0) {
4401 goto done;
4402 }
4403 if (info_p->ip_proto != IPPROTO_TCP) {
4404 /* not a TCP frame, not an error, just a bad guess */
4405 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4406 "non-TCP (%d) IPv%c frame %d bytes",
4407 info_p->ip_proto, is_ipv4 ? '4' : '6',
4408 (*mp)->m_pkthdr.len);
4409 goto done;
4410 }
4411 if (info_p->ip_is_fragmented) {
4412 /* both TSO and IP fragmentation don't make sense */
4413 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4414 "fragmented TSO packet?");
4415 stats_p->bips_bad_tcp++;
4416 error = _EBADTCP;
4417 goto done;
4418 }
4419 hlen = mac_hlen + info_p->ip_hlen + sizeof(struct tcphdr) +
4420 info_p->ip_opt_len;
4421 if ((*mp)->m_len < hlen) {
4422 *mp = m_pullup(*mp, hlen);
4423 if (*mp == NULL) {
4424 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4425 "m_pullup %d failed",
4426 hlen);
4427 stats_p->bips_bad_tcp++;
4428 error = _EBADTCP;
4429 goto done;
4430 }
4431 }
4432 info_p->ip_proto_hdr = ((caddr_t)info_p->ip_hdr.ptr) +
4433 info_p->ip_hlen + info_p->ip_opt_len;
4434 done:
4435 return error;
4436 }
4437
4438 static inline void
4439 proto_csum_stats_increment(uint8_t proto, struct brcsumstats * stats_p)
4440 {
4441 if (proto == IPPROTO_TCP) {
4442 stats_p->brcs_tcp_checksum++;
4443 } else {
4444 stats_p->brcs_udp_checksum++;
4445 }
4446 return;
4447 }
4448
4449 static errno_t
4450 bridge_verify_checksum(struct mbuf * * mp, struct ifbrmstats *stats_p)
4451 {
4452 struct brcsumstats *csum_stats_p;
4453 errno_t error = 0;
4454 u_int16_t ether_type;
4455 ip_packet_info info;
4456 bool is_ipv4;
4457 struct mbuf * m;
4458 u_int mac_hlen = sizeof(struct ether_header);
4459 uint16_t sum;
4460 bool valid;
4461
4462 ether_type = get_ether_type(*mp);
4463 switch (ether_type) {
4464 case ETHERTYPE_IP:
4465 is_ipv4 = true;
4466 break;
4467 case ETHERTYPE_IPV6:
4468 is_ipv4 = false;
4469 break;
4470 default:
4471 goto done;
4472 }
4473 error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, &info,
4474 &stats_p->brms_out_ip);
4475 if (error != 0) {
4476 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4477 "bridge_get_ip_proto failed %d",
4478 error);
4479 goto done;
4480 }
4481 m = *mp;
4482 if (is_ipv4) {
4483 if ((m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) != 0) {
4484 /* hardware offloaded IP header checksum */
4485 valid = (m->m_pkthdr.csum_flags & CSUM_IP_VALID) != 0;
4486 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4487 "IP checksum HW %svalid",
4488 valid ? "" : "in");
4489 if (!valid) {
4490 stats_p->brms_out_cksum_bad_hw.brcs_ip_checksum++;
4491 error = _EBADIPCHECKSUM;
4492 goto done;
4493 }
4494 stats_p->brms_out_cksum_good_hw.brcs_ip_checksum++;
4495 } else {
4496 /* verify */
4497 sum = inet_cksum(m, 0, mac_hlen, info.ip_hlen);
4498 valid = (sum == 0);
4499 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4500 "IP checksum SW %svalid",
4501 valid ? "" : "in");
4502 if (!valid) {
4503 stats_p->brms_out_cksum_bad.brcs_ip_checksum++;
4504 error = _EBADIPCHECKSUM;
4505 goto done;
4506 }
4507 stats_p->brms_out_cksum_good.brcs_ip_checksum++;
4508 }
4509 }
4510 if (info.ip_is_fragmented) {
4511 /* can't verify checksum on fragmented packets */
4512 goto done;
4513 }
4514 switch (info.ip_proto) {
4515 case IPPROTO_TCP:
4516 stats_p->brms_out_ip.bips_tcp++;
4517 break;
4518 case IPPROTO_UDP:
4519 stats_p->brms_out_ip.bips_udp++;
4520 break;
4521 default:
4522 goto done;
4523 }
4524 /* check for hardware offloaded UDP/TCP checksum */
4525 #define HW_CSUM (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)
4526 if ((m->m_pkthdr.csum_flags & HW_CSUM) == HW_CSUM) {
4527 /* checksum verified by hardware */
4528 valid = (m->m_pkthdr.csum_rx_val == 0xffff);
4529 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4530 "IPv%c %s checksum HW 0x%x %svalid",
4531 is_ipv4 ? '4' : '6',
4532 (info.ip_proto == IPPROTO_TCP)
4533 ? "TCP" : "UDP",
4534 m->m_pkthdr.csum_data,
4535 valid ? "" : "in" );
4536 if (!valid) {
4537 /* bad checksum */
4538 csum_stats_p = &stats_p->brms_out_cksum_bad_hw;
4539 error = (info.ip_proto == IPPROTO_TCP) ? _EBADTCPCHECKSUM
4540 : _EBADTCPCHECKSUM;
4541 } else {
4542 /* good checksum */
4543 csum_stats_p = &stats_p->brms_out_cksum_good_hw;
4544 }
4545 proto_csum_stats_increment(info.ip_proto, csum_stats_p);
4546 goto done;
4547 }
4548 m->m_data += mac_hlen;
4549 m->m_len -= mac_hlen;
4550 m->m_pkthdr.len -= mac_hlen;
4551 if (is_ipv4) {
4552 sum = inet_cksum(m, info.ip_proto,
4553 info.ip_hlen,
4554 info.ip_pay_len);
4555 } else {
4556 sum = inet6_cksum(m, info.ip_proto,
4557 info.ip_hlen + info.ip_opt_len,
4558 info.ip_pay_len - info.ip_opt_len);
4559 }
4560 valid = (sum == 0);
4561 if (valid) {
4562 csum_stats_p = &stats_p->brms_out_cksum_good;
4563 } else {
4564 csum_stats_p = &stats_p->brms_out_cksum_bad;
4565 error = (info.ip_proto == IPPROTO_TCP)
4566 ? _EBADTCPCHECKSUM : _EBADUDPCHECKSUM;
4567 }
4568 proto_csum_stats_increment(info.ip_proto, csum_stats_p);
4569 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4570 "IPv%c %s checksum SW %svalid (0x%x) hlen %d paylen %d",
4571 is_ipv4 ? '4' : '6',
4572 (info.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
4573 valid ? "" : "in",
4574 sum, info.ip_hlen, info.ip_pay_len);
4575 m->m_data -= mac_hlen;
4576 m->m_len += mac_hlen;
4577 m->m_pkthdr.len += mac_hlen;
4578 done:
4579 return error;
4580 }
4581
4582 static errno_t
4583 bridge_offload_checksum(struct mbuf * * mp, struct ifbrmstats * stats_p)
4584 {
4585 uint16_t * csum_p;
4586 errno_t error = 0;
4587 u_int16_t ether_type;
4588 u_int hlen;
4589 ip_packet_info info;
4590 bool is_ipv4;
4591 struct mbuf * m0 = *mp;
4592 u_int mac_hlen = sizeof(struct ether_header);
4593 u_int pkt_hdr_len;
4594 struct tcphdr * tcp;
4595 u_int tcp_hlen;
4596 struct udphdr * udp;
4597
4598 ether_type = get_ether_type(m0);
4599 switch (ether_type) {
4600 case ETHERTYPE_IP:
4601 is_ipv4 = true;
4602 break;
4603 case ETHERTYPE_IPV6:
4604 is_ipv4 = false;
4605 break;
4606 default:
4607 goto done;
4608 }
4609 error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, &info,
4610 &stats_p->brms_in_ip);
4611 if (error != 0) {
4612 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4613 "bridge_get_ip_proto failed %d",
4614 error);
4615 goto done;
4616 }
4617 if (is_ipv4) {
4618 /* compute IP header checksum */
4619 info.ip_hdr.ip->ip_sum = 0;
4620 info.ip_hdr.ip->ip_sum = inet_cksum(m0, 0, mac_hlen,
4621 info.ip_hlen);
4622 stats_p->brms_in_computed_cksum.brcs_ip_checksum++;
4623 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4624 "IPv4 checksum 0x%x",
4625 ntohs(info.ip_hdr.ip->ip_sum));
4626 }
4627 if (info.ip_is_fragmented) {
4628 /* can't compute checksum on fragmented packets */
4629 goto done;
4630 }
4631 pkt_hdr_len = m0->m_pkthdr.len;
4632 switch (info.ip_proto) {
4633 case IPPROTO_TCP:
4634 hlen = mac_hlen + info.ip_hlen + info.ip_opt_len
4635 + sizeof(struct tcphdr);
4636 if (m0->m_len < hlen) {
4637 *mp = m0 = m_pullup(m0, hlen);
4638 if (m0 == NULL) {
4639 stats_p->brms_in_ip.bips_bad_tcp++;
4640 error = _EBADTCP;
4641 goto done;
4642 }
4643 }
4644 tcp = (struct tcphdr *)(void *)
4645 ((caddr_t)info.ip_hdr.ptr + info.ip_hlen
4646 + info.ip_opt_len);
4647 tcp_hlen = tcp->th_off << 2;
4648 hlen = mac_hlen + info.ip_hlen + info.ip_opt_len + tcp_hlen;
4649 if (hlen > pkt_hdr_len) {
4650 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4651 "bad tcp header length %u",
4652 tcp_hlen);
4653 stats_p->brms_in_ip.bips_bad_tcp++;
4654 error = _EBADTCP;
4655 goto done;
4656 }
4657 csum_p = &tcp->th_sum;
4658 stats_p->brms_in_ip.bips_tcp++;
4659 break;
4660 case IPPROTO_UDP:
4661 hlen = mac_hlen + info.ip_hlen + info.ip_opt_len + sizeof(*udp);
4662 if (m0->m_len < hlen) {
4663 *mp = m0 = m_pullup(m0, hlen);
4664 if (m0 == NULL) {
4665 stats_p->brms_in_ip.bips_bad_udp++;
4666 error = ENOBUFS;
4667 goto done;
4668 }
4669 }
4670 udp = (struct udphdr *)(void *)
4671 ((caddr_t)info.ip_hdr.ptr + info.ip_hlen
4672 + info.ip_opt_len);
4673 csum_p = &udp->uh_sum;
4674 stats_p->brms_in_ip.bips_udp++;
4675 break;
4676 default:
4677 /* not TCP or UDP */
4678 goto done;
4679 }
4680 *csum_p = 0;
4681 m0->m_data += mac_hlen;
4682 m0->m_len -= mac_hlen;
4683 m0->m_pkthdr.len -= mac_hlen;
4684 if (is_ipv4) {
4685 *csum_p = inet_cksum(m0, info.ip_proto, info.ip_hlen,
4686 info.ip_pay_len);
4687 } else {
4688 *csum_p = inet6_cksum(m0, info.ip_proto,
4689 info.ip_hlen + info.ip_opt_len,
4690 info.ip_pay_len - info.ip_opt_len);
4691 }
4692 if (info.ip_proto == IPPROTO_UDP && *csum_p == 0) {
4693 /* RFC 1122 4.1.3.4 */
4694 *csum_p = 0xffff;
4695 }
4696 m0->m_data -= mac_hlen;
4697 m0->m_len += mac_hlen;
4698 m0->m_pkthdr.len += mac_hlen;
4699 proto_csum_stats_increment(info.ip_proto,
4700 &stats_p->brms_in_computed_cksum);
4701
4702 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4703 "IPv%c %s set checksum 0x%x",
4704 is_ipv4 ? '4' : '6',
4705 (info.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
4706 ntohs(*csum_p));
4707 done:
4708 return error;
4709 }
4710
4711 static errno_t
4712 bridge_send(struct ifnet *src_ifp,
4713 struct ifnet *dst_ifp, struct mbuf *m, ChecksumOperation cksum_op)
4714 {
4715 switch (cksum_op) {
4716 case CHECKSUM_OPERATION_CLEAR_OFFLOAD:
4717 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
4718 break;
4719 case CHECKSUM_OPERATION_FINALIZE:
4720 /* the checksum might not be correct, finalize now */
4721 bridge_finalize_cksum(dst_ifp, m);
4722 break;
4723 case CHECKSUM_OPERATION_COMPUTE:
4724 bridge_compute_cksum(src_ifp, dst_ifp, m);
4725 break;
4726 default:
4727 break;
4728 }
4729 #if HAS_IF_CAP
4730 /*
4731 * If underlying interface can not do VLAN tag insertion itself
4732 * then attach a packet tag that holds it.
4733 */
4734 if ((m->m_flags & M_VLANTAG) &&
4735 (dst_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) {
4736 m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
4737 if (m == NULL) {
4738 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4739 "%s: unable to prepend VLAN header",
4740 dst_ifp->if_xname);
4741 (void) ifnet_stat_increment_out(dst_ifp,
4742 0, 0, 1);
4743 return 0;
4744 }
4745 m->m_flags &= ~M_VLANTAG;
4746 }
4747 #endif /* HAS_IF_CAP */
4748 return bridge_transmit(dst_ifp, m);
4749 }
4750
4751 static errno_t
4752 bridge_send_tso(struct ifnet *dst_ifp, struct mbuf *m, bool is_ipv4)
4753 {
4754 errno_t error;
4755 u_int mac_hlen;
4756
4757 mac_hlen = sizeof(struct ether_header);
4758
4759 #if HAS_IF_CAP
4760 /*
4761 * If underlying interface can not do VLAN tag insertion itself
4762 * then attach a packet tag that holds it.
4763 */
4764 if ((m->m_flags & M_VLANTAG) &&
4765 (dst_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) {
4766 m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
4767 if (m == NULL) {
4768 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4769 "%s: unable to prepend VLAN header",
4770 dst_ifp->if_xname);
4771 (void) ifnet_stat_increment_out(dst_ifp,
4772 0, 0, 1);
4773 error = ENOBUFS;
4774 goto done;
4775 }
4776 m->m_flags &= ~M_VLANTAG;
4777 mac_hlen += ETHER_VLAN_ENCAP_LEN;
4778 }
4779 #endif /* HAS_IF_CAP */
4780 error = gso_tcp(dst_ifp, &m, mac_hlen, is_ipv4, TRUE);
4781 return error;
4782 }
4783
4784 /*
4785 * tso_hwassist:
4786 * - determine whether the destination interface supports TSO offload
4787 * - if the packet is already marked for offload and the hardware supports
4788 * it, just allow the packet to continue on
4789 * - if not, parse the packet headers to verify that this is a large TCP
4790 * packet requiring segmentation; if the hardware doesn't support it
4791 * set need_sw_tso; otherwise, mark the packet for TSO offload
4792 */
4793 static int
4794 tso_hwassist(struct mbuf **mp, bool is_ipv4, struct ifnet * ifp, u_int mac_hlen,
4795 bool * need_sw_tso, bool * supports_cksum)
4796 {
4797 int error = 0;
4798 u_int32_t if_csum;
4799 u_int32_t if_tso;
4800 u_int32_t mbuf_tso;
4801
4802 if (is_ipv4) {
4803 /*
4804 * Enable both TCP and IP offload if the hardware supports it.
4805 * If the hardware doesn't support TCP offload, *supports_cksum
4806 * will be false so we won't set either offload.
4807 */
4808 if_csum = ifp->if_hwassist & (CSUM_TCP | CSUM_IP);
4809 *supports_cksum = (if_csum & CSUM_TCP) != 0;
4810 if_tso = IFNET_TSO_IPV4;
4811 mbuf_tso = CSUM_TSO_IPV4;
4812 } else {
4813 *supports_cksum = (ifp->if_hwassist & CSUM_TCPIPV6) != 0;
4814 if_csum = CSUM_TCPIPV6;
4815 if_tso = IFNET_TSO_IPV6;
4816 mbuf_tso = CSUM_TSO_IPV6;
4817 }
4818 *need_sw_tso = false;
4819 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4820 "%s: does%s support checksum 0x%x if_csum 0x%x",
4821 ifp->if_xname, *supports_cksum ? "" : " not",
4822 ifp->if_hwassist, if_csum);
4823 if ((ifp->if_hwassist & if_tso) != 0 &&
4824 ((*mp)->m_pkthdr.csum_flags & mbuf_tso) != 0) {
4825 /* hardware TSO, mbuf already marked */
4826 } else {
4827 /* verify that this is a large TCP frame */
4828 uint32_t csum_flags;
4829 ip_packet_info info;
4830 u_int mss;
4831 struct bripstats stats;
4832 struct tcphdr * tcp;
4833
4834 error = bridge_get_tcp_header(mp, mac_hlen, is_ipv4,
4835 &info, &stats);
4836 if (error != 0) {
4837 /* bad packet */
4838 goto done;
4839 }
4840 if ((info.ip_hlen + info.ip_pay_len + info.ip_opt_len) <=
4841 ifp->if_mtu) {
4842 /* not actually a large packet */
4843 goto done;
4844 }
4845 if (info.ip_proto_hdr == NULL) {
4846 /* not a TCP packet */
4847 goto done;
4848 }
4849 if ((ifp->if_hwassist & if_tso) == 0) {
4850 /* hardware does not support TSO, enable sw tso */
4851 *need_sw_tso = if_bridge_segmentation != 0;
4852 goto done;
4853 }
4854 /* use hardware TSO */
4855 (*mp)->m_pkthdr.pkt_proto = IPPROTO_TCP;
4856 tcp = (struct tcphdr *)info.ip_proto_hdr;
4857 mss = ifp->if_mtu - info.ip_hlen - info.ip_opt_len
4858 - (tcp->th_off << 2);
4859 csum_flags = mbuf_tso;
4860 if (*supports_cksum) {
4861 csum_flags |= if_csum;
4862 }
4863 (*mp)->m_pkthdr.tso_segsz = mss;
4864 (*mp)->m_pkthdr.csum_flags |= csum_flags;
4865 (*mp)->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
4866 }
4867 done:
4868 return error;
4869 }
4870
4871 /*
4872 * bridge_enqueue:
4873 *
4874 * Enqueue a packet on a bridge member interface.
4875 *
4876 */
4877 static errno_t
4878 bridge_enqueue(ifnet_t bridge_ifp, struct ifnet *src_ifp,
4879 struct ifnet *dst_ifp, struct mbuf *m, ChecksumOperation cksum_op)
4880 {
4881 errno_t error = 0;
4882 int len;
4883
4884 VERIFY(dst_ifp != NULL);
4885
4886 /*
4887 * We may be sending a fragment so traverse the mbuf
4888 *
4889 * NOTE: bridge_fragment() is called only when PFIL_HOOKS is enabled.
4890 */
4891 for (struct mbuf *next_m = NULL; m != NULL; m = next_m) {
4892 bool need_sw_tso = false;
4893 bool is_large_pkt;
4894 errno_t _error = 0;
4895 u_int16_t ether_type = 0;
4896
4897 len = m->m_pkthdr.len;
4898 m->m_flags |= M_PROTO1; /* set to avoid loops */
4899 next_m = m->m_nextpkt;
4900 m->m_nextpkt = NULL;
4901 /*
4902 * Need to segment the packet if it is a large frame
4903 * and the destination interface does not support TSO.
4904 *
4905 * Note that with trailers, it's possible for a packet to
4906 * be large but not actually require segmentation.
4907 */
4908 is_large_pkt = (len > (bridge_ifp->if_mtu + ETHER_HDR_LEN));
4909 if (is_large_pkt) {
4910 bool hw_supports_cksum = false;
4911
4912 ether_type = get_ether_type(m);
4913 switch (ether_type) {
4914 case ETHERTYPE_IP:
4915 case ETHERTYPE_IPV6:
4916 _error = tso_hwassist(&m,
4917 (ether_type == ETHERTYPE_IP),
4918 dst_ifp, sizeof(struct ether_header),
4919 &need_sw_tso, &hw_supports_cksum);
4920 if (_error == 0 && hw_supports_cksum) {
4921 cksum_op = CHECKSUM_OPERATION_NONE;
4922 }
4923 break;
4924 default:
4925 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4926 "large non IP packet");
4927 break;
4928 }
4929 }
4930 if (_error != 0) {
4931 if (m != NULL) {
4932 m_freem(m);
4933 }
4934 } else if (need_sw_tso) {
4935 _error = bridge_send_tso(dst_ifp, m,
4936 (ether_type == ETHERTYPE_IP));
4937 } else {
4938 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4939 "%s bridge_send(%s) len %d op %d",
4940 bridge_ifp->if_xname,
4941 dst_ifp->if_xname,
4942 len, cksum_op);
4943 _error = bridge_send(src_ifp, dst_ifp, m, cksum_op);
4944 }
4945
4946 /* Preserve first error value */
4947 if (error == 0 && _error != 0) {
4948 error = _error;
4949 }
4950 if (_error == 0) {
4951 (void) ifnet_stat_increment_out(bridge_ifp, 1, len, 0);
4952 } else {
4953 (void) ifnet_stat_increment_out(bridge_ifp, 0, 0, 1);
4954 }
4955 }
4956
4957 return error;
4958 }
4959
4960 #if HAS_BRIDGE_DUMMYNET
4961 /*
4962 * bridge_dummynet:
4963 *
4964 * Receive a queued packet from dummynet and pass it on to the output
4965 * interface.
4966 *
4967 * The mbuf has the Ethernet header already attached.
4968 */
4969 static void
4970 bridge_dummynet(struct mbuf *m, struct ifnet *ifp)
4971 {
4972 struct bridge_softc *sc;
4973
4974 sc = ifp->if_bridge;
4975
4976 /*
4977 * The packet didn't originate from a member interface. This should only
4978 * ever happen if a member interface is removed while packets are
4979 * queued for it.
4980 */
4981 if (sc == NULL) {
4982 m_freem(m);
4983 return;
4984 }
4985
4986 if (PFIL_HOOKED(&inet_pfil_hook) || PFIL_HOOKED_INET6) {
4987 if (bridge_pfil(&m, sc->sc_ifp, ifp, PFIL_OUT) != 0) {
4988 return;
4989 }
4990 if (m == NULL) {
4991 return;
4992 }
4993 }
4994 (void) bridge_enqueue(sc->sc_ifp, NULL, ifp, m, CHECKSUM_OPERATION_NONE);
4995 }
4996
4997 #endif /* HAS_BRIDGE_DUMMYNET */
4998
4999 /*
5000 * bridge_member_output:
5001 *
5002 * Send output from a bridge member interface. This
5003 * performs the bridging function for locally originated
5004 * packets.
5005 *
5006 * The mbuf has the Ethernet header already attached.
5007 */
5008 static errno_t
5009 bridge_member_output(struct bridge_softc *sc, ifnet_t ifp, mbuf_t *data)
5010 {
5011 ifnet_t bridge_ifp;
5012 struct ether_header *eh;
5013 struct ifnet *dst_if;
5014 uint16_t vlan;
5015 struct bridge_iflist *mac_nat_bif;
5016 ifnet_t mac_nat_ifp;
5017 mbuf_t m = *data;
5018
5019 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
5020 "ifp %s", ifp->if_xname);
5021 if (m->m_len < ETHER_HDR_LEN) {
5022 m = m_pullup(m, ETHER_HDR_LEN);
5023 if (m == NULL) {
5024 *data = NULL;
5025 return EJUSTRETURN;
5026 }
5027 }
5028
5029 eh = mtod(m, struct ether_header *);
5030 vlan = VLANTAGOF(m);
5031
5032 BRIDGE_LOCK(sc);
5033 mac_nat_bif = sc->sc_mac_nat_bif;
5034 mac_nat_ifp = (mac_nat_bif != NULL) ? mac_nat_bif->bif_ifp : NULL;
5035 if (mac_nat_ifp == ifp) {
5036 /* record the IP address used by the MAC NAT interface */
5037 (void)bridge_mac_nat_output(sc, mac_nat_bif, data, NULL);
5038 m = *data;
5039 if (m == NULL) {
5040 /* packet was deallocated */
5041 BRIDGE_UNLOCK(sc);
5042 return EJUSTRETURN;
5043 }
5044 }
5045 bridge_ifp = sc->sc_ifp;
5046
5047 /*
5048 * APPLE MODIFICATION
5049 * If the packet is an 802.1X ethertype, then only send on the
5050 * original output interface.
5051 */
5052 if (eh->ether_type == htons(ETHERTYPE_PAE)) {
5053 dst_if = ifp;
5054 goto sendunicast;
5055 }
5056
5057 /*
5058 * If bridge is down, but the original output interface is up,
5059 * go ahead and send out that interface. Otherwise, the packet
5060 * is dropped below.
5061 */
5062 if ((bridge_ifp->if_flags & IFF_RUNNING) == 0) {
5063 dst_if = ifp;
5064 goto sendunicast;
5065 }
5066
5067 /*
5068 * If the packet is a multicast, or we don't know a better way to
5069 * get there, send to all interfaces.
5070 */
5071 if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
5072 dst_if = NULL;
5073 } else {
5074 dst_if = bridge_rtlookup(sc, eh->ether_dhost, vlan);
5075 }
5076 if (dst_if == NULL) {
5077 struct bridge_iflist *bif;
5078 struct mbuf *mc;
5079 int used = 0;
5080 errno_t error;
5081
5082
5083 bridge_span(sc, m);
5084
5085 BRIDGE_LOCK2REF(sc, error);
5086 if (error != 0) {
5087 m_freem(m);
5088 return EJUSTRETURN;
5089 }
5090
5091 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
5092 /* skip interface with inactive link status */
5093 if ((bif->bif_flags & BIFF_MEDIA_ACTIVE) == 0) {
5094 continue;
5095 }
5096 dst_if = bif->bif_ifp;
5097
5098 #if 0
5099 if (dst_if->if_type == IFT_GIF) {
5100 continue;
5101 }
5102 #endif
5103 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5104 continue;
5105 }
5106 if (dst_if != ifp) {
5107 /*
5108 * If this is not the original output interface,
5109 * and the interface is participating in spanning
5110 * tree, make sure the port is in a state that
5111 * allows forwarding.
5112 */
5113 if ((bif->bif_ifflags & IFBIF_STP) &&
5114 bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5115 continue;
5116 }
5117 /*
5118 * If this is not the original output interface,
5119 * and the destination is the MAC NAT interface,
5120 * drop the packet. The packet can't be sent
5121 * if the source MAC is incorrect.
5122 */
5123 if (dst_if == mac_nat_ifp) {
5124 continue;
5125 }
5126 }
5127 if (TAILQ_NEXT(bif, bif_next) == NULL) {
5128 used = 1;
5129 mc = m;
5130 } else {
5131 mc = m_dup(m, M_DONTWAIT);
5132 if (mc == NULL) {
5133 (void) ifnet_stat_increment_out(
5134 bridge_ifp, 0, 0, 1);
5135 continue;
5136 }
5137 }
5138 (void) bridge_enqueue(bridge_ifp, ifp, dst_if,
5139 mc, CHECKSUM_OPERATION_COMPUTE);
5140 }
5141 if (used == 0) {
5142 m_freem(m);
5143 }
5144 BRIDGE_UNREF(sc);
5145 return EJUSTRETURN;
5146 }
5147
5148 sendunicast:
5149 /*
5150 * XXX Spanning tree consideration here?
5151 */
5152
5153 bridge_span(sc, m);
5154 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5155 m_freem(m);
5156 BRIDGE_UNLOCK(sc);
5157 return EJUSTRETURN;
5158 }
5159
5160 BRIDGE_UNLOCK(sc);
5161 if (dst_if == ifp) {
5162 /* just let the packet continue on its way */
5163 return 0;
5164 }
5165 if (dst_if != mac_nat_ifp) {
5166 (void) bridge_enqueue(bridge_ifp, ifp, dst_if, m,
5167 CHECKSUM_OPERATION_COMPUTE);
5168 } else {
5169 /*
5170 * This is not the original output interface
5171 * and the destination is the MAC NAT interface.
5172 * Drop the packet because the packet can't be sent
5173 * if the source MAC is incorrect.
5174 */
5175 m_freem(m);
5176 }
5177 return EJUSTRETURN;
5178 }
5179
5180 /*
5181 * Output callback.
5182 *
5183 * This routine is called externally from above only when if_bridge_txstart
5184 * is disabled; otherwise it is called internally by bridge_start().
5185 */
5186 static int
5187 bridge_output(struct ifnet *ifp, struct mbuf *m)
5188 {
5189 struct bridge_softc *sc = ifnet_softc(ifp);
5190 struct ether_header *eh;
5191 struct ifnet *dst_if = NULL;
5192 int error = 0;
5193
5194 eh = mtod(m, struct ether_header *);
5195
5196 BRIDGE_LOCK(sc);
5197
5198 if (!(m->m_flags & (M_BCAST | M_MCAST))) {
5199 dst_if = bridge_rtlookup(sc, eh->ether_dhost, 0);
5200 }
5201
5202 (void) ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0);
5203
5204 #if NBPFILTER > 0
5205 if (sc->sc_bpf_output) {
5206 bridge_bpf_output(ifp, m);
5207 }
5208 #endif
5209
5210 if (dst_if == NULL) {
5211 /* callee will unlock */
5212 bridge_broadcast(sc, NULL, m, 0);
5213 } else {
5214 ifnet_t bridge_ifp;
5215
5216 bridge_ifp = sc->sc_ifp;
5217 BRIDGE_UNLOCK(sc);
5218
5219 error = bridge_enqueue(bridge_ifp, NULL, dst_if, m,
5220 CHECKSUM_OPERATION_FINALIZE);
5221 }
5222
5223 return error;
5224 }
5225
5226 static void
5227 bridge_finalize_cksum(struct ifnet *ifp, struct mbuf *m)
5228 {
5229 struct ether_header *eh = mtod(m, struct ether_header *);
5230 uint16_t ether_type;
5231 uint32_t sw_csum, hwcap;
5232 uint32_t did_sw;
5233 uint32_t csum_flags;
5234
5235 ether_type = ntohs(eh->ether_type);
5236 switch (ether_type) {
5237 case ETHERTYPE_IP:
5238 case ETHERTYPE_IPV6:
5239 break;
5240 default:
5241 return;
5242 }
5243
5244 /* do in software what the hardware cannot */
5245 hwcap = (ifp->if_hwassist | CSUM_DATA_VALID);
5246 csum_flags = m->m_pkthdr.csum_flags;
5247 sw_csum = csum_flags & ~IF_HWASSIST_CSUM_FLAGS(hwcap);
5248 sw_csum &= IF_HWASSIST_CSUM_MASK;
5249
5250 switch (ether_type) {
5251 case ETHERTYPE_IP:
5252 if ((hwcap & CSUM_PARTIAL) && !(sw_csum & CSUM_DELAY_DATA) &&
5253 (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) {
5254 if (m->m_pkthdr.csum_flags & CSUM_TCP) {
5255 uint16_t start =
5256 sizeof(*eh) + sizeof(struct ip);
5257 uint16_t ulpoff =
5258 m->m_pkthdr.csum_data & 0xffff;
5259 m->m_pkthdr.csum_flags |=
5260 (CSUM_DATA_VALID | CSUM_PARTIAL);
5261 m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
5262 m->m_pkthdr.csum_tx_start = start;
5263 } else {
5264 sw_csum |= (CSUM_DELAY_DATA &
5265 m->m_pkthdr.csum_flags);
5266 }
5267 }
5268 did_sw = in_finalize_cksum(m, sizeof(*eh), sw_csum);
5269 break;
5270
5271 case ETHERTYPE_IPV6:
5272 if ((hwcap & CSUM_PARTIAL) &&
5273 !(sw_csum & CSUM_DELAY_IPV6_DATA) &&
5274 (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA)) {
5275 if (m->m_pkthdr.csum_flags & CSUM_TCPIPV6) {
5276 uint16_t start =
5277 sizeof(*eh) + sizeof(struct ip6_hdr);
5278 uint16_t ulpoff =
5279 m->m_pkthdr.csum_data & 0xffff;
5280 m->m_pkthdr.csum_flags |=
5281 (CSUM_DATA_VALID | CSUM_PARTIAL);
5282 m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
5283 m->m_pkthdr.csum_tx_start = start;
5284 } else {
5285 sw_csum |= (CSUM_DELAY_IPV6_DATA &
5286 m->m_pkthdr.csum_flags);
5287 }
5288 }
5289 did_sw = in6_finalize_cksum(m, sizeof(*eh), -1, -1, sw_csum);
5290 break;
5291 }
5292 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5293 "[%s] before 0x%x hwcap 0x%x sw_csum 0x%x did 0x%x after 0x%x",
5294 ifp->if_xname, csum_flags, hwcap, sw_csum,
5295 did_sw, m->m_pkthdr.csum_flags);
5296 }
5297
5298 /*
5299 * bridge_start:
5300 *
5301 * Start output on a bridge.
5302 *
5303 * This routine is invoked by the start worker thread; because we never call
5304 * it directly, there is no need do deploy any serialization mechanism other
5305 * than what's already used by the worker thread, i.e. this is already single
5306 * threaded.
5307 *
5308 * This routine is called only when if_bridge_txstart is enabled.
5309 */
5310 static void
5311 bridge_start(struct ifnet *ifp)
5312 {
5313 struct mbuf *m;
5314
5315 for (;;) {
5316 if (ifnet_dequeue(ifp, &m) != 0) {
5317 break;
5318 }
5319
5320 (void) bridge_output(ifp, m);
5321 }
5322 }
5323
5324 /*
5325 * bridge_forward:
5326 *
5327 * The forwarding function of the bridge.
5328 *
5329 * NOTE: Releases the lock on return.
5330 */
5331 static void
5332 bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif,
5333 struct mbuf *m)
5334 {
5335 struct bridge_iflist *dbif;
5336 ifnet_t bridge_ifp;
5337 struct ifnet *src_if, *dst_if;
5338 struct ether_header *eh;
5339 uint16_t vlan;
5340 uint8_t *dst;
5341 int error;
5342 struct mac_nat_record mnr;
5343 bool translate_mac = FALSE;
5344 uint32_t sc_filter_flags = 0;
5345
5346 BRIDGE_LOCK_ASSERT_HELD(sc);
5347
5348 bridge_ifp = sc->sc_ifp;
5349 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
5350 "%s m 0x%llx", bridge_ifp->if_xname,
5351 (uint64_t)VM_KERNEL_ADDRPERM(m));
5352
5353 src_if = m->m_pkthdr.rcvif;
5354 if (src_if != sbif->bif_ifp) {
5355 const char * src_if_name;
5356
5357 src_if_name = (src_if != NULL) ? src_if->if_xname : "?";
5358 BRIDGE_LOG(LOG_NOTICE, 0,
5359 "src_if %s != bif_ifp %s",
5360 src_if_name, sbif->bif_ifp->if_xname);
5361 goto drop;
5362 }
5363
5364 (void) ifnet_stat_increment_in(bridge_ifp, 1, m->m_pkthdr.len, 0);
5365 vlan = VLANTAGOF(m);
5366
5367
5368 if ((sbif->bif_ifflags & IFBIF_STP) &&
5369 sbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5370 goto drop;
5371 }
5372
5373 eh = mtod(m, struct ether_header *);
5374 dst = eh->ether_dhost;
5375
5376 /* If the interface is learning, record the address. */
5377 if (sbif->bif_ifflags & IFBIF_LEARNING) {
5378 error = bridge_rtupdate(sc, eh->ether_shost, vlan,
5379 sbif, 0, IFBAF_DYNAMIC);
5380 /*
5381 * If the interface has addresses limits then deny any source
5382 * that is not in the cache.
5383 */
5384 if (error && sbif->bif_addrmax) {
5385 goto drop;
5386 }
5387 }
5388
5389 if ((sbif->bif_ifflags & IFBIF_STP) != 0 &&
5390 sbif->bif_stp.bp_state == BSTP_IFSTATE_LEARNING) {
5391 goto drop;
5392 }
5393
5394 /*
5395 * At this point, the port either doesn't participate
5396 * in spanning tree or it is in the forwarding state.
5397 */
5398
5399 /*
5400 * If the packet is unicast, destined for someone on
5401 * "this" side of the bridge, drop it.
5402 */
5403 if ((m->m_flags & (M_BCAST | M_MCAST)) == 0) {
5404 /* unicast */
5405 dst_if = bridge_rtlookup(sc, dst, vlan);
5406 if (src_if == dst_if) {
5407 goto drop;
5408 }
5409 } else {
5410 /* broadcast/multicast */
5411
5412 /*
5413 * Check if its a reserved multicast address, any address
5414 * listed in 802.1D section 7.12.6 may not be forwarded by the
5415 * bridge.
5416 * This is currently 01-80-C2-00-00-00 to 01-80-C2-00-00-0F
5417 */
5418 if (dst[0] == 0x01 && dst[1] == 0x80 &&
5419 dst[2] == 0xc2 && dst[3] == 0x00 &&
5420 dst[4] == 0x00 && dst[5] <= 0x0f) {
5421 goto drop;
5422 }
5423
5424
5425 /* ...forward it to all interfaces. */
5426 atomic_add_64(&bridge_ifp->if_imcasts, 1);
5427 dst_if = NULL;
5428 }
5429
5430 /*
5431 * If we have a destination interface which is a member of our bridge,
5432 * OR this is a unicast packet, push it through the bpf(4) machinery.
5433 * For broadcast or multicast packets, don't bother because it will
5434 * be reinjected into ether_input. We do this before we pass the packets
5435 * through the pfil(9) framework, as it is possible that pfil(9) will
5436 * drop the packet, or possibly modify it, making it difficult to debug
5437 * firewall issues on the bridge.
5438 */
5439 #if NBPFILTER > 0
5440 if (eh->ether_type == htons(ETHERTYPE_RSN_PREAUTH) ||
5441 dst_if != NULL || (m->m_flags & (M_BCAST | M_MCAST)) == 0) {
5442 m->m_pkthdr.rcvif = bridge_ifp;
5443 BRIDGE_BPF_MTAP_INPUT(sc, m);
5444 }
5445 #endif /* NBPFILTER */
5446
5447 if (dst_if == NULL) {
5448 /* bridge_broadcast will unlock */
5449 bridge_broadcast(sc, sbif, m, 1);
5450 return;
5451 }
5452
5453 /*
5454 * Unicast.
5455 */
5456 /*
5457 * At this point, we're dealing with a unicast frame
5458 * going to a different interface.
5459 */
5460 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5461 goto drop;
5462 }
5463
5464 dbif = bridge_lookup_member_if(sc, dst_if);
5465 if (dbif == NULL) {
5466 /* Not a member of the bridge (anymore?) */
5467 goto drop;
5468 }
5469
5470 /* Private segments can not talk to each other */
5471 if (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE) {
5472 goto drop;
5473 }
5474
5475 if ((dbif->bif_ifflags & IFBIF_STP) &&
5476 dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5477 goto drop;
5478 }
5479
5480 #if HAS_DHCPRA_MASK
5481 /* APPLE MODIFICATION <rdar:6985737> */
5482 if ((dst_if->if_extflags & IFEXTF_DHCPRA_MASK) != 0) {
5483 m = ip_xdhcpra_output(dst_if, m);
5484 if (!m) {
5485 ++bridge_ifp.if_xdhcpra;
5486 BRIDGE_UNLOCK(sc);
5487 return;
5488 }
5489 }
5490 #endif /* HAS_DHCPRA_MASK */
5491
5492 if (dbif == sc->sc_mac_nat_bif) {
5493 /* determine how to translate the packet */
5494 translate_mac
5495 = bridge_mac_nat_output(sc, sbif, &m, &mnr);
5496 if (m == NULL) {
5497 /* packet was deallocated */
5498 BRIDGE_UNLOCK(sc);
5499 return;
5500 }
5501 } else if (bif_has_checksum_offload(dbif) &&
5502 !bif_has_checksum_offload(sbif)) {
5503 /*
5504 * If the destination interface has checksum offload enabled,
5505 * verify the checksum now, unless the source interface also has
5506 * checksum offload enabled. The checksum in that case has
5507 * already just been computed and verifying it is unnecessary.
5508 */
5509 error = bridge_verify_checksum(&m, &dbif->bif_stats);
5510 if (error != 0) {
5511 BRIDGE_UNLOCK(sc);
5512 if (m != NULL) {
5513 m_freem(m);
5514 }
5515 return;
5516 }
5517 }
5518
5519 sc_filter_flags = sc->sc_filter_flags;
5520
5521 BRIDGE_UNLOCK(sc);
5522 if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
5523 if (bridge_pf(&m, dst_if, sc_filter_flags, FALSE) != 0) {
5524 return;
5525 }
5526 if (m == NULL) {
5527 return;
5528 }
5529 }
5530
5531 /* if we need to, translate the MAC address */
5532 if (translate_mac) {
5533 bridge_mac_nat_translate(&m, &mnr, IF_LLADDR(dst_if));
5534 }
5535 /*
5536 * We're forwarding an inbound packet in which the checksum must
5537 * already have been computed and if required, verified.
5538 */
5539 if (m != NULL) {
5540 (void) bridge_enqueue(bridge_ifp, src_if, dst_if, m,
5541 CHECKSUM_OPERATION_CLEAR_OFFLOAD);
5542 }
5543 return;
5544
5545 drop:
5546 BRIDGE_UNLOCK(sc);
5547 m_freem(m);
5548 }
5549
5550 static void
5551 inject_input_packet(ifnet_t ifp, mbuf_t m)
5552 {
5553 mbuf_pkthdr_setrcvif(m, ifp);
5554 mbuf_pkthdr_setheader(m, mbuf_data(m));
5555 mbuf_setdata(m, (char *)mbuf_data(m) + ETHER_HDR_LEN,
5556 mbuf_len(m) - ETHER_HDR_LEN);
5557 mbuf_pkthdr_adjustlen(m, -ETHER_HDR_LEN);
5558 m->m_flags |= M_PROTO1; /* set to avoid loops */
5559 dlil_input_packet_list(ifp, m);
5560 return;
5561 }
5562
5563 static boolean_t
5564 in_addr_is_ours(struct in_addr ip)
5565 {
5566 struct in_ifaddr *ia;
5567 boolean_t ours = FALSE;
5568
5569 lck_rw_lock_shared(&in_ifaddr_rwlock);
5570 TAILQ_FOREACH(ia, INADDR_HASH(ip.s_addr), ia_hash) {
5571 if (IA_SIN(ia)->sin_addr.s_addr == ip.s_addr) {
5572 ours = TRUE;
5573 break;
5574 }
5575 }
5576 lck_rw_done(&in_ifaddr_rwlock);
5577 return ours;
5578 }
5579
5580 static boolean_t
5581 in6_addr_is_ours(const struct in6_addr * ip6_p, uint32_t ifscope)
5582 {
5583 struct in6_ifaddr *ia6;
5584 boolean_t ours = FALSE;
5585
5586 lck_rw_lock_shared(&in6_ifaddr_rwlock);
5587 TAILQ_FOREACH(ia6, IN6ADDR_HASH(ip6_p), ia6_hash) {
5588 if (in6_are_addr_equal_scoped(&ia6->ia_addr.sin6_addr, ip6_p, ia6->ia_addr.sin6_scope_id, ifscope)) {
5589 ours = TRUE;
5590 break;
5591 }
5592 }
5593 lck_rw_done(&in6_ifaddr_rwlock);
5594 return ours;
5595 }
5596
5597 static void
5598 bridge_interface_input(ifnet_t bridge_ifp, mbuf_t m,
5599 bpf_packet_func bpf_input_func)
5600 {
5601 size_t byte_count;
5602 struct ether_header *eh;
5603 uint16_t ether_type;
5604 errno_t error;
5605 boolean_t is_ipv4;
5606 int len;
5607 u_int mac_hlen;
5608 int pkt_count;
5609
5610 /* segment large packets before sending them up */
5611 if (if_bridge_segmentation == 0) {
5612 goto done;
5613 }
5614 len = m->m_pkthdr.len;
5615 if (len <= (bridge_ifp->if_mtu + ETHER_HDR_LEN)) {
5616 goto done;
5617 }
5618 eh = mtod(m, struct ether_header *);
5619 ether_type = ntohs(eh->ether_type);
5620 switch (ether_type) {
5621 case ETHERTYPE_IP:
5622 is_ipv4 = TRUE;
5623 break;
5624 case ETHERTYPE_IPV6:
5625 is_ipv4 = FALSE;
5626 break;
5627 default:
5628 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5629 "large non IPv4/IPv6 packet");
5630 goto done;
5631 }
5632
5633 /*
5634 * We have a large IPv4/IPv6 TCP packet. Segment it if required.
5635 *
5636 * If gso_tcp() returns success (0), the packet(s) are
5637 * ready to be passed up. If the destination is a local IP address,
5638 * the packet will be passed up as a large, single packet.
5639 *
5640 * If gso_tcp() returns an error, the packet has already
5641 * been freed.
5642 */
5643 mac_hlen = sizeof(*eh);
5644 error = gso_tcp(bridge_ifp, &m, mac_hlen, is_ipv4, FALSE);
5645 if (error != 0) {
5646 return;
5647 }
5648
5649 done:
5650 pkt_count = 0;
5651 byte_count = 0;
5652 for (mbuf_t scan = m; scan != NULL; scan = scan->m_nextpkt) {
5653 /* Mark the packet as arriving on the bridge interface */
5654 mbuf_pkthdr_setrcvif(scan, bridge_ifp);
5655 mbuf_pkthdr_setheader(scan, mbuf_data(scan));
5656 if (bpf_input_func != NULL) {
5657 (*bpf_input_func)(bridge_ifp, scan);
5658 }
5659 mbuf_setdata(scan, (char *)mbuf_data(scan) + ETHER_HDR_LEN,
5660 mbuf_len(scan) - ETHER_HDR_LEN);
5661 mbuf_pkthdr_adjustlen(scan, -ETHER_HDR_LEN);
5662 byte_count += mbuf_pkthdr_len(scan);
5663 pkt_count++;
5664 }
5665 (void)ifnet_stat_increment_in(bridge_ifp, pkt_count, byte_count, 0);
5666 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
5667 "%s %d packet(s) %ld bytes",
5668 bridge_ifp->if_xname, pkt_count, byte_count);
5669 dlil_input_packet_list(bridge_ifp, m);
5670 return;
5671 }
5672
5673 /*
5674 * bridge_input:
5675 *
5676 * Filter input from a member interface. Queue the packet for
5677 * bridging if it is not for us.
5678 */
5679 errno_t
5680 bridge_input(struct ifnet *ifp, mbuf_t *data)
5681 {
5682 struct bridge_softc *sc = ifp->if_bridge;
5683 struct bridge_iflist *bif, *bif2;
5684 ifnet_t bridge_ifp;
5685 struct ether_header *eh;
5686 struct mbuf *mc, *mc2;
5687 uint16_t vlan;
5688 errno_t error;
5689 boolean_t is_broadcast;
5690 boolean_t is_ip_broadcast = FALSE;
5691 boolean_t is_ifp_mac = FALSE;
5692 mbuf_t m = *data;
5693 uint32_t sc_filter_flags = 0;
5694
5695 bridge_ifp = sc->sc_ifp;
5696 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
5697 "%s from %s m 0x%llx data 0x%llx",
5698 bridge_ifp->if_xname, ifp->if_xname,
5699 (uint64_t)VM_KERNEL_ADDRPERM(m),
5700 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)));
5701 if ((sc->sc_ifp->if_flags & IFF_RUNNING) == 0) {
5702 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
5703 "%s not running passing along",
5704 bridge_ifp->if_xname);
5705 return 0;
5706 }
5707
5708 vlan = VLANTAGOF(m);
5709
5710 #ifdef IFF_MONITOR
5711 /*
5712 * Implement support for bridge monitoring. If this flag has been
5713 * set on this interface, discard the packet once we push it through
5714 * the bpf(4) machinery, but before we do, increment the byte and
5715 * packet counters associated with this interface.
5716 */
5717 if ((bridge_ifp->if_flags & IFF_MONITOR) != 0) {
5718 m->m_pkthdr.rcvif = bridge_ifp;
5719 BRIDGE_BPF_MTAP_INPUT(sc, m);
5720 (void) ifnet_stat_increment_in(bridge_ifp, 1, m->m_pkthdr.len, 0);
5721 m_freem(m);
5722 return EJUSTRETURN;
5723 }
5724 #endif /* IFF_MONITOR */
5725
5726 /*
5727 * Need to clear the promiscuous flags otherwise it will be
5728 * dropped by DLIL after processing filters
5729 */
5730 if ((mbuf_flags(m) & MBUF_PROMISC)) {
5731 mbuf_setflags_mask(m, 0, MBUF_PROMISC);
5732 }
5733
5734 sc_filter_flags = sc->sc_filter_flags;
5735 if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
5736 error = bridge_pf(&m, ifp, sc_filter_flags, TRUE);
5737 if (error != 0) {
5738 return EJUSTRETURN;
5739 }
5740 if (m == NULL) {
5741 return EJUSTRETURN;
5742 }
5743 /*
5744 * bridge_pf could have modified the pointer on success in order
5745 * to do its processing. Updated data such that we don't use a
5746 * stale pointer.
5747 */
5748 *data = m;
5749 }
5750
5751 BRIDGE_LOCK(sc);
5752 bif = bridge_lookup_member_if(sc, ifp);
5753 if (bif == NULL) {
5754 BRIDGE_UNLOCK(sc);
5755 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
5756 "%s bridge_lookup_member_if failed",
5757 bridge_ifp->if_xname);
5758 return 0;
5759 }
5760 if (bif_has_checksum_offload(bif)) {
5761 /* need to compute IP/UDP/TCP/checksums */
5762 error = bridge_offload_checksum(data, &bif->bif_stats);
5763 if (error != 0) {
5764 BRIDGE_UNLOCK(sc);
5765 if (*data != NULL) {
5766 m_freem(*data);
5767 *data = NULL;
5768 }
5769 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
5770 "%s(%s) bridge_offload_checksum rdbgffailed %d",
5771 bridge_ifp->if_xname,
5772 bif->bif_ifp->if_xname, error);
5773 return EJUSTRETURN;
5774 }
5775 m = *data;
5776 }
5777
5778 if (bif->bif_flags & BIFF_HOST_FILTER) {
5779 error = bridge_host_filter(bif, data);
5780 if (error != 0) {
5781 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
5782 "%s bridge_host_filter failed",
5783 bif->bif_ifp->if_xname);
5784 BRIDGE_UNLOCK(sc);
5785 return EJUSTRETURN;
5786 }
5787 m = *data;
5788 }
5789
5790 is_broadcast = (m->m_flags & (M_BCAST | M_MCAST)) != 0;
5791 eh = mtod(m, struct ether_header *);
5792 if (!is_broadcast &&
5793 memcmp(eh->ether_dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0) {
5794 if (sc->sc_mac_nat_bif == bif) {
5795 /* doing MAC-NAT, check if destination is broadcast */
5796 is_ip_broadcast = is_broadcast_ip_packet(data);
5797 if (*data == NULL) {
5798 BRIDGE_UNLOCK(sc);
5799 return EJUSTRETURN;
5800 }
5801 m = *data;
5802 }
5803 if (!is_ip_broadcast) {
5804 is_ifp_mac = TRUE;
5805 }
5806 }
5807
5808 bridge_span(sc, m);
5809
5810 if (is_broadcast || is_ip_broadcast) {
5811 if (is_broadcast && (m->m_flags & M_MCAST) != 0) {
5812 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MCAST,
5813 " multicast: "
5814 "%02x:%02x:%02x:%02x:%02x:%02x",
5815 eh->ether_dhost[0], eh->ether_dhost[1],
5816 eh->ether_dhost[2], eh->ether_dhost[3],
5817 eh->ether_dhost[4], eh->ether_dhost[5]);
5818 }
5819 /* Tap off 802.1D packets; they do not get forwarded. */
5820 if (is_broadcast && memcmp(eh->ether_dhost, bstp_etheraddr,
5821 ETHER_ADDR_LEN) == 0) {
5822 #if BRIDGESTP
5823 m = bstp_input(&bif->bif_stp, ifp, m);
5824 #else /* !BRIDGESTP */
5825 m_freem(m);
5826 m = NULL;
5827 #endif /* !BRIDGESTP */
5828 if (m == NULL) {
5829 BRIDGE_UNLOCK(sc);
5830 return EJUSTRETURN;
5831 }
5832 }
5833
5834 if ((bif->bif_ifflags & IFBIF_STP) &&
5835 bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5836 BRIDGE_UNLOCK(sc);
5837 return 0;
5838 }
5839
5840 /*
5841 * Make a deep copy of the packet and enqueue the copy
5842 * for bridge processing.
5843 */
5844 mc = m_dup(m, M_DONTWAIT);
5845 if (mc == NULL) {
5846 BRIDGE_UNLOCK(sc);
5847 return 0;
5848 }
5849
5850 /*
5851 * Perform the bridge forwarding function with the copy.
5852 *
5853 * Note that bridge_forward calls BRIDGE_UNLOCK
5854 */
5855 if (is_ip_broadcast) {
5856 /* make the copy look like it is actually broadcast */
5857 mc->m_flags |= M_BCAST;
5858 eh = mtod(mc, struct ether_header *);
5859 bcopy(etherbroadcastaddr, eh->ether_dhost,
5860 ETHER_ADDR_LEN);
5861 }
5862 bridge_forward(sc, bif, mc);
5863
5864 /*
5865 * Reinject the mbuf as arriving on the bridge so we have a
5866 * chance at claiming multicast packets. We can not loop back
5867 * here from ether_input as a bridge is never a member of a
5868 * bridge.
5869 */
5870 VERIFY(bridge_ifp->if_bridge == NULL);
5871 mc2 = m_dup(m, M_DONTWAIT);
5872 if (mc2 != NULL) {
5873 /* Keep the layer3 header aligned */
5874 int i = min(mc2->m_pkthdr.len, max_protohdr);
5875 mc2 = m_copyup(mc2, i, ETHER_ALIGN);
5876 }
5877 if (mc2 != NULL) {
5878 /* mark packet as arriving on the bridge */
5879 mc2->m_pkthdr.rcvif = bridge_ifp;
5880 mc2->m_pkthdr.pkt_hdr = mbuf_data(mc2);
5881 BRIDGE_BPF_MTAP_INPUT(sc, mc2);
5882 (void) mbuf_setdata(mc2,
5883 (char *)mbuf_data(mc2) + ETHER_HDR_LEN,
5884 mbuf_len(mc2) - ETHER_HDR_LEN);
5885 (void) mbuf_pkthdr_adjustlen(mc2, -ETHER_HDR_LEN);
5886 (void) ifnet_stat_increment_in(bridge_ifp, 1,
5887 mbuf_pkthdr_len(mc2), 0);
5888 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MCAST,
5889 "%s mcast for us", bridge_ifp->if_xname);
5890 dlil_input_packet_list(bridge_ifp, mc2);
5891 }
5892
5893 /* Return the original packet for local processing. */
5894 return 0;
5895 }
5896
5897 if ((bif->bif_ifflags & IFBIF_STP) &&
5898 bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5899 BRIDGE_UNLOCK(sc);
5900 return 0;
5901 }
5902
5903 #ifdef DEV_CARP
5904 #define CARP_CHECK_WE_ARE_DST(iface) \
5905 ((iface)->if_carp &&\
5906 carp_forus((iface)->if_carp, eh->ether_dhost))
5907 #define CARP_CHECK_WE_ARE_SRC(iface) \
5908 ((iface)->if_carp &&\
5909 carp_forus((iface)->if_carp, eh->ether_shost))
5910 #else
5911 #define CARP_CHECK_WE_ARE_DST(iface) 0
5912 #define CARP_CHECK_WE_ARE_SRC(iface) 0
5913 #endif
5914
5915 #define PFIL_HOOKED_INET6 PFIL_HOOKED(&inet6_pfil_hook)
5916
5917 #define PFIL_PHYS(sc, ifp, m)
5918
5919 #define GRAB_OUR_PACKETS(iface) \
5920 if ((iface)->if_type == IFT_GIF) \
5921 continue; \
5922 /* It is destined for us. */ \
5923 if (memcmp(IF_LLADDR((iface)), eh->ether_dhost, \
5924 ETHER_ADDR_LEN) == 0 || CARP_CHECK_WE_ARE_DST((iface))) { \
5925 if ((iface)->if_type == IFT_BRIDGE) { \
5926 BRIDGE_BPF_MTAP_INPUT(sc, m); \
5927 /* Filter on the physical interface. */ \
5928 PFIL_PHYS(sc, iface, m); \
5929 } else { \
5930 bpf_tap_in(iface, DLT_EN10MB, m, NULL, 0); \
5931 } \
5932 if (bif->bif_ifflags & IFBIF_LEARNING) { \
5933 error = bridge_rtupdate(sc, eh->ether_shost, \
5934 vlan, bif, 0, IFBAF_DYNAMIC); \
5935 if (error && bif->bif_addrmax) { \
5936 BRIDGE_UNLOCK(sc); \
5937 m_freem(m); \
5938 return (EJUSTRETURN); \
5939 } \
5940 } \
5941 BRIDGE_UNLOCK(sc); \
5942 inject_input_packet(iface, m); \
5943 return (EJUSTRETURN); \
5944 } \
5945 \
5946 /* We just received a packet that we sent out. */ \
5947 if (memcmp(IF_LLADDR((iface)), eh->ether_shost, \
5948 ETHER_ADDR_LEN) == 0 || CARP_CHECK_WE_ARE_SRC((iface))) { \
5949 BRIDGE_UNLOCK(sc); \
5950 m_freem(m); \
5951 return (EJUSTRETURN); \
5952 }
5953
5954 /*
5955 * Unicast.
5956 */
5957
5958 /* handle MAC-NAT if enabled */
5959 if (is_ifp_mac && sc->sc_mac_nat_bif == bif) {
5960 ifnet_t dst_if;
5961 boolean_t is_input = FALSE;
5962
5963 dst_if = bridge_mac_nat_input(sc, data, &is_input);
5964 m = *data;
5965 if (dst_if == ifp) {
5966 /* our input packet */
5967 } else if (dst_if != NULL || m == NULL) {
5968 BRIDGE_UNLOCK(sc);
5969 if (dst_if != NULL) {
5970 ASSERT(m != NULL);
5971 if (is_input) {
5972 inject_input_packet(dst_if, m);
5973 } else {
5974 (void)bridge_enqueue(bridge_ifp, NULL,
5975 dst_if, m,
5976 CHECKSUM_OPERATION_CLEAR_OFFLOAD);
5977 }
5978 }
5979 return EJUSTRETURN;
5980 }
5981 }
5982
5983 /*
5984 * If the packet is for the bridge, pass it up for local processing.
5985 */
5986 if (memcmp(eh->ether_dhost, IF_LLADDR(bridge_ifp),
5987 ETHER_ADDR_LEN) == 0 || CARP_CHECK_WE_ARE_DST(bridge_ifp)) {
5988 bpf_packet_func bpf_input_func = sc->sc_bpf_input;
5989
5990 /*
5991 * If the interface is learning, and the source
5992 * address is valid and not multicast, record
5993 * the address.
5994 */
5995 if (bif->bif_ifflags & IFBIF_LEARNING) {
5996 (void) bridge_rtupdate(sc, eh->ether_shost,
5997 vlan, bif, 0, IFBAF_DYNAMIC);
5998 }
5999 BRIDGE_UNLOCK(sc);
6000
6001 bridge_interface_input(bridge_ifp, m, bpf_input_func);
6002 return EJUSTRETURN;
6003 }
6004
6005 /*
6006 * if the destination of the packet is for the MAC address of
6007 * the member interface itself, then we don't need to forward
6008 * it -- just pass it back. Note that it'll likely just be
6009 * dropped by the stack, but if something else is bound to
6010 * the interface directly (for example, the wireless stats
6011 * protocol -- although that actually uses BPF right now),
6012 * then it will consume the packet
6013 *
6014 * ALSO, note that we do this check AFTER checking for the
6015 * bridge's own MAC address, because the bridge may be
6016 * using the SAME MAC address as one of its interfaces
6017 */
6018 if (is_ifp_mac) {
6019
6020 #ifdef VERY_VERY_VERY_DIAGNOSTIC
6021 BRIDGE_LOG(LOG_NOTICE, 0,
6022 "not forwarding packet bound for member interface");
6023 #endif
6024
6025 BRIDGE_UNLOCK(sc);
6026 return 0;
6027 }
6028
6029 /* Now check the remaining bridge members. */
6030 TAILQ_FOREACH(bif2, &sc->sc_iflist, bif_next) {
6031 if (bif2->bif_ifp != ifp) {
6032 GRAB_OUR_PACKETS(bif2->bif_ifp);
6033 }
6034 }
6035
6036 #undef CARP_CHECK_WE_ARE_DST
6037 #undef CARP_CHECK_WE_ARE_SRC
6038 #undef GRAB_OUR_PACKETS
6039
6040 /*
6041 * Perform the bridge forwarding function.
6042 *
6043 * Note that bridge_forward calls BRIDGE_UNLOCK
6044 */
6045 bridge_forward(sc, bif, m);
6046
6047 return EJUSTRETURN;
6048 }
6049
6050 /*
6051 * bridge_broadcast:
6052 *
6053 * Send a frame to all interfaces that are members of
6054 * the bridge, except for the one on which the packet
6055 * arrived.
6056 *
6057 * NOTE: Releases the lock on return.
6058 */
6059 static void
6060 bridge_broadcast(struct bridge_softc *sc, struct bridge_iflist * sbif,
6061 struct mbuf *m, int runfilt)
6062 {
6063 ifnet_t bridge_ifp;
6064 struct bridge_iflist *dbif;
6065 struct ifnet * src_if;
6066 struct mbuf *mc;
6067 struct mbuf *mc_in;
6068 struct ifnet *dst_if;
6069 int error = 0, used = 0;
6070 boolean_t bridge_if_out;
6071 ChecksumOperation cksum_op;
6072 struct mac_nat_record mnr;
6073 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
6074 boolean_t translate_mac = FALSE;
6075 uint32_t sc_filter_flags = 0;
6076
6077 bridge_ifp = sc->sc_ifp;
6078 if (sbif != NULL) {
6079 bridge_if_out = FALSE;
6080 src_if = sbif->bif_ifp;
6081 cksum_op = CHECKSUM_OPERATION_CLEAR_OFFLOAD;
6082 if (mac_nat_bif != NULL && sbif != mac_nat_bif) {
6083 /* get the translation record while holding the lock */
6084 translate_mac
6085 = bridge_mac_nat_output(sc, sbif, &m, &mnr);
6086 if (m == NULL) {
6087 /* packet was deallocated */
6088 BRIDGE_UNLOCK(sc);
6089 return;
6090 }
6091 }
6092 } else {
6093 /*
6094 * sbif is NULL when the bridge interface calls
6095 * bridge_broadcast().
6096 */
6097 bridge_if_out = TRUE;
6098 cksum_op = CHECKSUM_OPERATION_FINALIZE;
6099 sbif = NULL;
6100 src_if = NULL;
6101 }
6102
6103 BRIDGE_LOCK2REF(sc, error);
6104 if (error) {
6105 m_freem(m);
6106 return;
6107 }
6108
6109 TAILQ_FOREACH(dbif, &sc->sc_iflist, bif_next) {
6110 dst_if = dbif->bif_ifp;
6111 if (dst_if == src_if) {
6112 /* skip the interface that the packet came in on */
6113 continue;
6114 }
6115
6116 /* Private segments can not talk to each other */
6117 if (sbif != NULL &&
6118 (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE)) {
6119 continue;
6120 }
6121
6122 if ((dbif->bif_ifflags & IFBIF_STP) &&
6123 dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6124 continue;
6125 }
6126
6127 if ((dbif->bif_ifflags & IFBIF_DISCOVER) == 0 &&
6128 (m->m_flags & (M_BCAST | M_MCAST)) == 0) {
6129 continue;
6130 }
6131
6132 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6133 continue;
6134 }
6135
6136 if (!(dbif->bif_flags & BIFF_MEDIA_ACTIVE)) {
6137 continue;
6138 }
6139
6140 if (TAILQ_NEXT(dbif, bif_next) == NULL) {
6141 mc = m;
6142 used = 1;
6143 } else {
6144 mc = m_dup(m, M_DONTWAIT);
6145 if (mc == NULL) {
6146 (void) ifnet_stat_increment_out(bridge_ifp,
6147 0, 0, 1);
6148 continue;
6149 }
6150 }
6151
6152 /*
6153 * If broadcast input is enabled, do so only if this
6154 * is an input packet.
6155 */
6156 if (!bridge_if_out &&
6157 (dbif->bif_flags & BIFF_INPUT_BROADCAST) != 0) {
6158 mc_in = m_dup(mc, M_DONTWAIT);
6159 /* this could fail, but we continue anyways */
6160 } else {
6161 mc_in = NULL;
6162 }
6163
6164 /* out */
6165 if (translate_mac && mac_nat_bif == dbif) {
6166 /* translate the packet without holding the lock */
6167 bridge_mac_nat_translate(&mc, &mnr, IF_LLADDR(dst_if));
6168 }
6169
6170 sc_filter_flags = sc->sc_filter_flags;
6171 if (runfilt &&
6172 PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6173 if (used == 0) {
6174 /* Keep the layer3 header aligned */
6175 int i = min(mc->m_pkthdr.len, max_protohdr);
6176 mc = m_copyup(mc, i, ETHER_ALIGN);
6177 if (mc == NULL) {
6178 (void) ifnet_stat_increment_out(
6179 sc->sc_ifp, 0, 0, 1);
6180 if (mc_in != NULL) {
6181 m_freem(mc_in);
6182 mc_in = NULL;
6183 }
6184 continue;
6185 }
6186 }
6187 if (bridge_pf(&mc, dst_if, sc_filter_flags, FALSE) != 0) {
6188 if (mc_in != NULL) {
6189 m_freem(mc_in);
6190 mc_in = NULL;
6191 }
6192 continue;
6193 }
6194 if (mc == NULL) {
6195 if (mc_in != NULL) {
6196 m_freem(mc_in);
6197 mc_in = NULL;
6198 }
6199 continue;
6200 }
6201 }
6202
6203 if (mc != NULL) {
6204 /* verify checksum if necessary */
6205 if (bif_has_checksum_offload(dbif) && sbif != NULL &&
6206 !bif_has_checksum_offload(sbif)) {
6207 error = bridge_verify_checksum(&mc,
6208 &dbif->bif_stats);
6209 if (error != 0) {
6210 if (mc != NULL) {
6211 m_freem(mc);
6212 }
6213 mc = NULL;
6214 }
6215 }
6216 if (mc != NULL) {
6217 (void) bridge_enqueue(bridge_ifp,
6218 NULL, dst_if, mc, cksum_op);
6219 }
6220 }
6221
6222 /* in */
6223 if (mc_in == NULL) {
6224 continue;
6225 }
6226 bpf_tap_in(dst_if, DLT_EN10MB, mc_in, NULL, 0);
6227 mbuf_pkthdr_setrcvif(mc_in, dst_if);
6228 mbuf_pkthdr_setheader(mc_in, mbuf_data(mc_in));
6229 mbuf_setdata(mc_in, (char *)mbuf_data(mc_in) + ETHER_HDR_LEN,
6230 mbuf_len(mc_in) - ETHER_HDR_LEN);
6231 mbuf_pkthdr_adjustlen(mc_in, -ETHER_HDR_LEN);
6232 mc_in->m_flags |= M_PROTO1; /* set to avoid loops */
6233 dlil_input_packet_list(dst_if, mc_in);
6234 }
6235 if (used == 0) {
6236 m_freem(m);
6237 }
6238
6239
6240 BRIDGE_UNREF(sc);
6241 }
6242
6243 /*
6244 * bridge_span:
6245 *
6246 * Duplicate a packet out one or more interfaces that are in span mode,
6247 * the original mbuf is unmodified.
6248 */
6249 static void
6250 bridge_span(struct bridge_softc *sc, struct mbuf *m)
6251 {
6252 struct bridge_iflist *bif;
6253 struct ifnet *dst_if;
6254 struct mbuf *mc;
6255
6256 if (TAILQ_EMPTY(&sc->sc_spanlist)) {
6257 return;
6258 }
6259
6260 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
6261 dst_if = bif->bif_ifp;
6262
6263 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6264 continue;
6265 }
6266
6267 mc = m_copypacket(m, M_DONTWAIT);
6268 if (mc == NULL) {
6269 (void) ifnet_stat_increment_out(sc->sc_ifp, 0, 0, 1);
6270 continue;
6271 }
6272
6273 (void) bridge_enqueue(sc->sc_ifp, NULL, dst_if, mc,
6274 CHECKSUM_OPERATION_NONE);
6275 }
6276 }
6277
6278
6279 /*
6280 * bridge_rtupdate:
6281 *
6282 * Add a bridge routing entry.
6283 */
6284 static int
6285 bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst, uint16_t vlan,
6286 struct bridge_iflist *bif, int setflags, uint8_t flags)
6287 {
6288 struct bridge_rtnode *brt;
6289 int error;
6290
6291 BRIDGE_LOCK_ASSERT_HELD(sc);
6292
6293 /* Check the source address is valid and not multicast. */
6294 if (ETHER_IS_MULTICAST(dst) ||
6295 (dst[0] == 0 && dst[1] == 0 && dst[2] == 0 &&
6296 dst[3] == 0 && dst[4] == 0 && dst[5] == 0) != 0) {
6297 return EINVAL;
6298 }
6299
6300
6301 /* 802.1p frames map to vlan 1 */
6302 if (vlan == 0) {
6303 vlan = 1;
6304 }
6305
6306 /*
6307 * A route for this destination might already exist. If so,
6308 * update it, otherwise create a new one.
6309 */
6310 if ((brt = bridge_rtnode_lookup(sc, dst, vlan)) == NULL) {
6311 if (sc->sc_brtcnt >= sc->sc_brtmax) {
6312 sc->sc_brtexceeded++;
6313 return ENOSPC;
6314 }
6315 /* Check per interface address limits (if enabled) */
6316 if (bif->bif_addrmax && bif->bif_addrcnt >= bif->bif_addrmax) {
6317 bif->bif_addrexceeded++;
6318 return ENOSPC;
6319 }
6320
6321 /*
6322 * Allocate a new bridge forwarding node, and
6323 * initialize the expiration time and Ethernet
6324 * address.
6325 */
6326 brt = zalloc_noblock(bridge_rtnode_pool);
6327 if (brt == NULL) {
6328 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6329 "zalloc_nolock failed");
6330 return ENOMEM;
6331 }
6332 bzero(brt, sizeof(struct bridge_rtnode));
6333
6334 if (bif->bif_ifflags & IFBIF_STICKY) {
6335 brt->brt_flags = IFBAF_STICKY;
6336 } else {
6337 brt->brt_flags = IFBAF_DYNAMIC;
6338 }
6339
6340 memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
6341 brt->brt_vlan = vlan;
6342
6343
6344 if ((error = bridge_rtnode_insert(sc, brt)) != 0) {
6345 zfree(bridge_rtnode_pool, brt);
6346 return error;
6347 }
6348 brt->brt_dst = bif;
6349 bif->bif_addrcnt++;
6350 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6351 "added %02x:%02x:%02x:%02x:%02x:%02x "
6352 "on %s count %u hashsize %u",
6353 dst[0], dst[1], dst[2], dst[3], dst[4], dst[5],
6354 sc->sc_ifp->if_xname, sc->sc_brtcnt,
6355 sc->sc_rthash_size);
6356 }
6357
6358 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
6359 brt->brt_dst != bif) {
6360 brt->brt_dst->bif_addrcnt--;
6361 brt->brt_dst = bif;
6362 brt->brt_dst->bif_addrcnt++;
6363 }
6364
6365 if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6366 unsigned long now;
6367
6368 now = (unsigned long) net_uptime();
6369 brt->brt_expire = now + sc->sc_brttimeout;
6370 }
6371 if (setflags) {
6372 brt->brt_flags = flags;
6373 }
6374
6375
6376 return 0;
6377 }
6378
6379 /*
6380 * bridge_rtlookup:
6381 *
6382 * Lookup the destination interface for an address.
6383 */
6384 static struct ifnet *
6385 bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
6386 {
6387 struct bridge_rtnode *brt;
6388
6389 BRIDGE_LOCK_ASSERT_HELD(sc);
6390
6391 if ((brt = bridge_rtnode_lookup(sc, addr, vlan)) == NULL) {
6392 return NULL;
6393 }
6394
6395 return brt->brt_ifp;
6396 }
6397
6398 /*
6399 * bridge_rttrim:
6400 *
6401 * Trim the routine table so that we have a number
6402 * of routing entries less than or equal to the
6403 * maximum number.
6404 */
6405 static void
6406 bridge_rttrim(struct bridge_softc *sc)
6407 {
6408 struct bridge_rtnode *brt, *nbrt;
6409
6410 BRIDGE_LOCK_ASSERT_HELD(sc);
6411
6412 /* Make sure we actually need to do this. */
6413 if (sc->sc_brtcnt <= sc->sc_brtmax) {
6414 return;
6415 }
6416
6417 /* Force an aging cycle; this might trim enough addresses. */
6418 bridge_rtage(sc);
6419 if (sc->sc_brtcnt <= sc->sc_brtmax) {
6420 return;
6421 }
6422
6423 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6424 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6425 bridge_rtnode_destroy(sc, brt);
6426 if (sc->sc_brtcnt <= sc->sc_brtmax) {
6427 return;
6428 }
6429 }
6430 }
6431 }
6432
6433 /*
6434 * bridge_aging_timer:
6435 *
6436 * Aging periodic timer for the bridge routing table.
6437 */
6438 static void
6439 bridge_aging_timer(struct bridge_softc *sc)
6440 {
6441 BRIDGE_LOCK_ASSERT_HELD(sc);
6442
6443 bridge_rtage(sc);
6444 if ((sc->sc_ifp->if_flags & IFF_RUNNING) &&
6445 (sc->sc_flags & SCF_DETACHING) == 0) {
6446 sc->sc_aging_timer.bdc_sc = sc;
6447 sc->sc_aging_timer.bdc_func = bridge_aging_timer;
6448 sc->sc_aging_timer.bdc_ts.tv_sec = bridge_rtable_prune_period;
6449 bridge_schedule_delayed_call(&sc->sc_aging_timer);
6450 }
6451 }
6452
6453 /*
6454 * bridge_rtage:
6455 *
6456 * Perform an aging cycle.
6457 */
6458 static void
6459 bridge_rtage(struct bridge_softc *sc)
6460 {
6461 struct bridge_rtnode *brt, *nbrt;
6462 unsigned long now;
6463
6464 BRIDGE_LOCK_ASSERT_HELD(sc);
6465
6466 now = (unsigned long) net_uptime();
6467
6468 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6469 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6470 if (now >= brt->brt_expire) {
6471 bridge_rtnode_destroy(sc, brt);
6472 }
6473 }
6474 }
6475 if (sc->sc_mac_nat_bif != NULL) {
6476 bridge_mac_nat_age_entries(sc, now);
6477 }
6478 }
6479
6480 /*
6481 * bridge_rtflush:
6482 *
6483 * Remove all dynamic addresses from the bridge.
6484 */
6485 static void
6486 bridge_rtflush(struct bridge_softc *sc, int full)
6487 {
6488 struct bridge_rtnode *brt, *nbrt;
6489
6490 BRIDGE_LOCK_ASSERT_HELD(sc);
6491
6492 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6493 if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6494 bridge_rtnode_destroy(sc, brt);
6495 }
6496 }
6497 }
6498
6499 /*
6500 * bridge_rtdaddr:
6501 *
6502 * Remove an address from the table.
6503 */
6504 static int
6505 bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
6506 {
6507 struct bridge_rtnode *brt;
6508 int found = 0;
6509
6510 BRIDGE_LOCK_ASSERT_HELD(sc);
6511
6512 /*
6513 * If vlan is zero then we want to delete for all vlans so the lookup
6514 * may return more than one.
6515 */
6516 while ((brt = bridge_rtnode_lookup(sc, addr, vlan)) != NULL) {
6517 bridge_rtnode_destroy(sc, brt);
6518 found = 1;
6519 }
6520
6521 return found ? 0 : ENOENT;
6522 }
6523
6524 /*
6525 * bridge_rtdelete:
6526 *
6527 * Delete routes to a specific member interface.
6528 */
6529 static void
6530 bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int full)
6531 {
6532 struct bridge_rtnode *brt, *nbrt;
6533
6534 BRIDGE_LOCK_ASSERT_HELD(sc);
6535
6536 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6537 if (brt->brt_ifp == ifp && (full ||
6538 (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)) {
6539 bridge_rtnode_destroy(sc, brt);
6540 }
6541 }
6542 }
6543
6544 /*
6545 * bridge_rtable_init:
6546 *
6547 * Initialize the route table for this bridge.
6548 */
6549 static int
6550 bridge_rtable_init(struct bridge_softc *sc)
6551 {
6552 u_int32_t i;
6553
6554 sc->sc_rthash = _MALLOC(sizeof(*sc->sc_rthash) * BRIDGE_RTHASH_SIZE,
6555 M_DEVBUF, M_WAITOK | M_ZERO);
6556 if (sc->sc_rthash == NULL) {
6557 BRIDGE_LOG(LOG_NOTICE, 0, "no memory");
6558 return ENOMEM;
6559 }
6560 sc->sc_rthash_size = BRIDGE_RTHASH_SIZE;
6561
6562 for (i = 0; i < sc->sc_rthash_size; i++) {
6563 LIST_INIT(&sc->sc_rthash[i]);
6564 }
6565
6566 sc->sc_rthash_key = RandomULong();
6567
6568 LIST_INIT(&sc->sc_rtlist);
6569
6570 return 0;
6571 }
6572
6573 /*
6574 * bridge_rthash_delayed_resize:
6575 *
6576 * Resize the routing table hash on a delayed thread call.
6577 */
6578 static void
6579 bridge_rthash_delayed_resize(struct bridge_softc *sc)
6580 {
6581 u_int32_t new_rthash_size;
6582 struct _bridge_rtnode_list *new_rthash = NULL;
6583 struct _bridge_rtnode_list *old_rthash = NULL;
6584 u_int32_t i;
6585 struct bridge_rtnode *brt;
6586 int error = 0;
6587
6588 BRIDGE_LOCK_ASSERT_HELD(sc);
6589
6590 /*
6591 * Four entries per hash bucket is our ideal load factor
6592 */
6593 if (sc->sc_brtcnt < sc->sc_rthash_size * 4) {
6594 goto out;
6595 }
6596
6597 /*
6598 * Doubling the number of hash buckets may be too simplistic
6599 * especially when facing a spike of new entries
6600 */
6601 new_rthash_size = sc->sc_rthash_size * 2;
6602
6603 sc->sc_flags |= SCF_RESIZING;
6604 BRIDGE_UNLOCK(sc);
6605
6606 new_rthash = _MALLOC(sizeof(*sc->sc_rthash) * new_rthash_size,
6607 M_DEVBUF, M_WAITOK | M_ZERO);
6608
6609 BRIDGE_LOCK(sc);
6610 sc->sc_flags &= ~SCF_RESIZING;
6611
6612 if (new_rthash == NULL) {
6613 error = ENOMEM;
6614 goto out;
6615 }
6616 if ((sc->sc_flags & SCF_DETACHING)) {
6617 error = ENODEV;
6618 goto out;
6619 }
6620 /*
6621 * Fail safe from here on
6622 */
6623 old_rthash = sc->sc_rthash;
6624 sc->sc_rthash = new_rthash;
6625 sc->sc_rthash_size = new_rthash_size;
6626
6627 /*
6628 * Get a new key to force entries to be shuffled around to reduce
6629 * the likelihood they will land in the same buckets
6630 */
6631 sc->sc_rthash_key = RandomULong();
6632
6633 for (i = 0; i < sc->sc_rthash_size; i++) {
6634 LIST_INIT(&sc->sc_rthash[i]);
6635 }
6636
6637 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
6638 LIST_REMOVE(brt, brt_hash);
6639 (void) bridge_rtnode_hash(sc, brt);
6640 }
6641 out:
6642 if (error == 0) {
6643 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6644 "%s new size %u",
6645 sc->sc_ifp->if_xname, sc->sc_rthash_size);
6646 if (old_rthash) {
6647 _FREE(old_rthash, M_DEVBUF);
6648 }
6649 } else {
6650 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_RT_TABLE,
6651 "%s failed %d", sc->sc_ifp->if_xname, error);
6652 if (new_rthash != NULL) {
6653 _FREE(new_rthash, M_DEVBUF);
6654 }
6655 }
6656 }
6657
6658 /*
6659 * Resize the number of hash buckets based on the load factor
6660 * Currently only grow
6661 * Failing to resize the hash table is not fatal
6662 */
6663 static void
6664 bridge_rthash_resize(struct bridge_softc *sc)
6665 {
6666 BRIDGE_LOCK_ASSERT_HELD(sc);
6667
6668 if ((sc->sc_flags & SCF_DETACHING) || (sc->sc_flags & SCF_RESIZING)) {
6669 return;
6670 }
6671
6672 /*
6673 * Four entries per hash bucket is our ideal load factor
6674 */
6675 if (sc->sc_brtcnt < sc->sc_rthash_size * 4) {
6676 return;
6677 }
6678 /*
6679 * Hard limit on the size of the routing hash table
6680 */
6681 if (sc->sc_rthash_size >= bridge_rtable_hash_size_max) {
6682 return;
6683 }
6684
6685 sc->sc_resize_call.bdc_sc = sc;
6686 sc->sc_resize_call.bdc_func = bridge_rthash_delayed_resize;
6687 bridge_schedule_delayed_call(&sc->sc_resize_call);
6688 }
6689
6690 /*
6691 * bridge_rtable_fini:
6692 *
6693 * Deconstruct the route table for this bridge.
6694 */
6695 static void
6696 bridge_rtable_fini(struct bridge_softc *sc)
6697 {
6698 KASSERT(sc->sc_brtcnt == 0,
6699 ("%s: %d bridge routes referenced", __func__, sc->sc_brtcnt));
6700 if (sc->sc_rthash) {
6701 _FREE(sc->sc_rthash, M_DEVBUF);
6702 sc->sc_rthash = NULL;
6703 }
6704 }
6705
6706 /*
6707 * The following hash function is adapted from "Hash Functions" by Bob Jenkins
6708 * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
6709 */
6710 #define mix(a, b, c) \
6711 do { \
6712 a -= b; a -= c; a ^= (c >> 13); \
6713 b -= c; b -= a; b ^= (a << 8); \
6714 c -= a; c -= b; c ^= (b >> 13); \
6715 a -= b; a -= c; a ^= (c >> 12); \
6716 b -= c; b -= a; b ^= (a << 16); \
6717 c -= a; c -= b; c ^= (b >> 5); \
6718 a -= b; a -= c; a ^= (c >> 3); \
6719 b -= c; b -= a; b ^= (a << 10); \
6720 c -= a; c -= b; c ^= (b >> 15); \
6721 } while ( /*CONSTCOND*/ 0)
6722
6723 static __inline uint32_t
6724 bridge_rthash(struct bridge_softc *sc, const uint8_t *addr)
6725 {
6726 uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
6727
6728 b += addr[5] << 8;
6729 b += addr[4];
6730 a += addr[3] << 24;
6731 a += addr[2] << 16;
6732 a += addr[1] << 8;
6733 a += addr[0];
6734
6735 mix(a, b, c);
6736
6737 return c & BRIDGE_RTHASH_MASK(sc);
6738 }
6739
6740 #undef mix
6741
6742 static int
6743 bridge_rtnode_addr_cmp(const uint8_t *a, const uint8_t *b)
6744 {
6745 int i, d;
6746
6747 for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) {
6748 d = ((int)a[i]) - ((int)b[i]);
6749 }
6750
6751 return d;
6752 }
6753
6754 /*
6755 * bridge_rtnode_lookup:
6756 *
6757 * Look up a bridge route node for the specified destination. Compare the
6758 * vlan id or if zero then just return the first match.
6759 */
6760 static struct bridge_rtnode *
6761 bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr,
6762 uint16_t vlan)
6763 {
6764 struct bridge_rtnode *brt;
6765 uint32_t hash;
6766 int dir;
6767
6768 BRIDGE_LOCK_ASSERT_HELD(sc);
6769
6770 hash = bridge_rthash(sc, addr);
6771 LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) {
6772 dir = bridge_rtnode_addr_cmp(addr, brt->brt_addr);
6773 if (dir == 0 && (brt->brt_vlan == vlan || vlan == 0)) {
6774 return brt;
6775 }
6776 if (dir > 0) {
6777 return NULL;
6778 }
6779 }
6780
6781 return NULL;
6782 }
6783
6784 /*
6785 * bridge_rtnode_hash:
6786 *
6787 * Insert the specified bridge node into the route hash table.
6788 * This is used when adding a new node or to rehash when resizing
6789 * the hash table
6790 */
6791 static int
6792 bridge_rtnode_hash(struct bridge_softc *sc, struct bridge_rtnode *brt)
6793 {
6794 struct bridge_rtnode *lbrt;
6795 uint32_t hash;
6796 int dir;
6797
6798 BRIDGE_LOCK_ASSERT_HELD(sc);
6799
6800 hash = bridge_rthash(sc, brt->brt_addr);
6801
6802 lbrt = LIST_FIRST(&sc->sc_rthash[hash]);
6803 if (lbrt == NULL) {
6804 LIST_INSERT_HEAD(&sc->sc_rthash[hash], brt, brt_hash);
6805 goto out;
6806 }
6807
6808 do {
6809 dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr);
6810 if (dir == 0 && brt->brt_vlan == lbrt->brt_vlan) {
6811 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6812 "%s EEXIST %02x:%02x:%02x:%02x:%02x:%02x",
6813 sc->sc_ifp->if_xname,
6814 brt->brt_addr[0], brt->brt_addr[1],
6815 brt->brt_addr[2], brt->brt_addr[3],
6816 brt->brt_addr[4], brt->brt_addr[5]);
6817 return EEXIST;
6818 }
6819 if (dir > 0) {
6820 LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
6821 goto out;
6822 }
6823 if (LIST_NEXT(lbrt, brt_hash) == NULL) {
6824 LIST_INSERT_AFTER(lbrt, brt, brt_hash);
6825 goto out;
6826 }
6827 lbrt = LIST_NEXT(lbrt, brt_hash);
6828 } while (lbrt != NULL);
6829
6830 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6831 "%s impossible %02x:%02x:%02x:%02x:%02x:%02x",
6832 sc->sc_ifp->if_xname,
6833 brt->brt_addr[0], brt->brt_addr[1], brt->brt_addr[2],
6834 brt->brt_addr[3], brt->brt_addr[4], brt->brt_addr[5]);
6835 out:
6836 return 0;
6837 }
6838
6839 /*
6840 * bridge_rtnode_insert:
6841 *
6842 * Insert the specified bridge node into the route table. We
6843 * assume the entry is not already in the table.
6844 */
6845 static int
6846 bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
6847 {
6848 int error;
6849
6850 error = bridge_rtnode_hash(sc, brt);
6851 if (error != 0) {
6852 return error;
6853 }
6854
6855 LIST_INSERT_HEAD(&sc->sc_rtlist, brt, brt_list);
6856 sc->sc_brtcnt++;
6857
6858 bridge_rthash_resize(sc);
6859
6860 return 0;
6861 }
6862
6863 /*
6864 * bridge_rtnode_destroy:
6865 *
6866 * Destroy a bridge rtnode.
6867 */
6868 static void
6869 bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
6870 {
6871 BRIDGE_LOCK_ASSERT_HELD(sc);
6872
6873 LIST_REMOVE(brt, brt_hash);
6874
6875 LIST_REMOVE(brt, brt_list);
6876 sc->sc_brtcnt--;
6877 brt->brt_dst->bif_addrcnt--;
6878 zfree(bridge_rtnode_pool, brt);
6879 }
6880
6881 #if BRIDGESTP
6882 /*
6883 * bridge_rtable_expire:
6884 *
6885 * Set the expiry time for all routes on an interface.
6886 */
6887 static void
6888 bridge_rtable_expire(struct ifnet *ifp, int age)
6889 {
6890 struct bridge_softc *sc = ifp->if_bridge;
6891 struct bridge_rtnode *brt;
6892
6893 BRIDGE_LOCK(sc);
6894
6895 /*
6896 * If the age is zero then flush, otherwise set all the expiry times to
6897 * age for the interface
6898 */
6899 if (age == 0) {
6900 bridge_rtdelete(sc, ifp, IFBF_FLUSHDYN);
6901 } else {
6902 unsigned long now;
6903
6904 now = (unsigned long) net_uptime();
6905
6906 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
6907 /* Cap the expiry time to 'age' */
6908 if (brt->brt_ifp == ifp &&
6909 brt->brt_expire > now + age &&
6910 (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6911 brt->brt_expire = now + age;
6912 }
6913 }
6914 }
6915 BRIDGE_UNLOCK(sc);
6916 }
6917
6918 /*
6919 * bridge_state_change:
6920 *
6921 * Callback from the bridgestp code when a port changes states.
6922 */
6923 static void
6924 bridge_state_change(struct ifnet *ifp, int state)
6925 {
6926 struct bridge_softc *sc = ifp->if_bridge;
6927 static const char *stpstates[] = {
6928 "disabled",
6929 "listening",
6930 "learning",
6931 "forwarding",
6932 "blocking",
6933 "discarding"
6934 };
6935
6936 if (log_stp) {
6937 log(LOG_NOTICE, "%s: state changed to %s on %s",
6938 sc->sc_ifp->if_xname,
6939 stpstates[state], ifp->if_xname);
6940 }
6941 }
6942 #endif /* BRIDGESTP */
6943
6944 /*
6945 * bridge_set_bpf_tap:
6946 *
6947 * Sets ups the BPF callbacks.
6948 */
6949 static errno_t
6950 bridge_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func bpf_callback)
6951 {
6952 struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
6953
6954 /* TBD locking */
6955 if (sc == NULL || (sc->sc_flags & SCF_DETACHING)) {
6956 return ENODEV;
6957 }
6958 switch (mode) {
6959 case BPF_TAP_DISABLE:
6960 sc->sc_bpf_input = sc->sc_bpf_output = NULL;
6961 break;
6962
6963 case BPF_TAP_INPUT:
6964 sc->sc_bpf_input = bpf_callback;
6965 break;
6966
6967 case BPF_TAP_OUTPUT:
6968 sc->sc_bpf_output = bpf_callback;
6969 break;
6970
6971 case BPF_TAP_INPUT_OUTPUT:
6972 sc->sc_bpf_input = sc->sc_bpf_output = bpf_callback;
6973 break;
6974
6975 default:
6976 break;
6977 }
6978
6979 return 0;
6980 }
6981
6982 /*
6983 * bridge_detach:
6984 *
6985 * Callback when interface has been detached.
6986 */
6987 static void
6988 bridge_detach(ifnet_t ifp)
6989 {
6990 struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
6991
6992 #if BRIDGESTP
6993 bstp_detach(&sc->sc_stp);
6994 #endif /* BRIDGESTP */
6995
6996 /* Tear down the routing table. */
6997 bridge_rtable_fini(sc);
6998
6999 lck_mtx_lock(&bridge_list_mtx);
7000 LIST_REMOVE(sc, sc_list);
7001 lck_mtx_unlock(&bridge_list_mtx);
7002
7003 ifnet_release(ifp);
7004
7005 lck_mtx_destroy(&sc->sc_mtx, &bridge_lock_grp);
7006 if_clone_softc_deallocate(&bridge_cloner, sc);
7007 }
7008
7009 /*
7010 * bridge_bpf_input:
7011 *
7012 * Invoke the input BPF callback if enabled
7013 */
7014 static errno_t
7015 bridge_bpf_input(ifnet_t ifp, struct mbuf *m, const char * func, int line)
7016 {
7017 struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7018 bpf_packet_func input_func = sc->sc_bpf_input;
7019
7020 if (input_func != NULL) {
7021 if (mbuf_pkthdr_rcvif(m) != ifp) {
7022 BRIDGE_LOG(LOG_NOTICE, 0,
7023 "%s.%d: rcvif: 0x%llx != ifp 0x%llx", func, line,
7024 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_rcvif(m)),
7025 (uint64_t)VM_KERNEL_ADDRPERM(ifp));
7026 }
7027 (*input_func)(ifp, m);
7028 }
7029 return 0;
7030 }
7031
7032 /*
7033 * bridge_bpf_output:
7034 *
7035 * Invoke the output BPF callback if enabled
7036 */
7037 static errno_t
7038 bridge_bpf_output(ifnet_t ifp, struct mbuf *m)
7039 {
7040 struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7041 bpf_packet_func output_func = sc->sc_bpf_output;
7042
7043 if (output_func != NULL) {
7044 (*output_func)(ifp, m);
7045 }
7046 return 0;
7047 }
7048
7049 /*
7050 * bridge_link_event:
7051 *
7052 * Report a data link event on an interface
7053 */
7054 static void
7055 bridge_link_event(struct ifnet *ifp, u_int32_t event_code)
7056 {
7057 struct event {
7058 u_int32_t ifnet_family;
7059 u_int32_t unit;
7060 char if_name[IFNAMSIZ];
7061 };
7062 _Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
7063 struct kern_event_msg *header = (struct kern_event_msg*)message;
7064 struct event *data = (struct event *)(header + 1);
7065
7066 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
7067 "%s event_code %u - %s", ifp->if_xname,
7068 event_code, dlil_kev_dl_code_str(event_code));
7069 header->total_size = sizeof(message);
7070 header->vendor_code = KEV_VENDOR_APPLE;
7071 header->kev_class = KEV_NETWORK_CLASS;
7072 header->kev_subclass = KEV_DL_SUBCLASS;
7073 header->event_code = event_code;
7074 data->ifnet_family = ifnet_family(ifp);
7075 data->unit = (u_int32_t)ifnet_unit(ifp);
7076 strlcpy(data->if_name, ifnet_name(ifp), IFNAMSIZ);
7077 ifnet_event(ifp, header);
7078 }
7079
7080 #define BRIDGE_HF_DROP(reason, func, line) { \
7081 bridge_hostfilter_stats.reason++; \
7082 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_HOSTFILTER, \
7083 "%s.%d" #reason, func, line); \
7084 error = EINVAL; \
7085 }
7086
7087 /*
7088 * Make sure this is a DHCP or Bootp request that match the host filter
7089 */
7090 static int
7091 bridge_dhcp_filter(struct bridge_iflist *bif, struct mbuf *m, size_t offset)
7092 {
7093 int error = EINVAL;
7094 struct dhcp dhcp;
7095
7096 /*
7097 * Note: We use the dhcp structure because bootp structure definition
7098 * is larger and some vendors do not pad the request
7099 */
7100 error = mbuf_copydata(m, offset, sizeof(struct dhcp), &dhcp);
7101 if (error != 0) {
7102 BRIDGE_HF_DROP(brhf_dhcp_too_small, __func__, __LINE__);
7103 goto done;
7104 }
7105 if (dhcp.dp_op != BOOTREQUEST) {
7106 BRIDGE_HF_DROP(brhf_dhcp_bad_op, __func__, __LINE__);
7107 goto done;
7108 }
7109 /*
7110 * The hardware address must be an exact match
7111 */
7112 if (dhcp.dp_htype != ARPHRD_ETHER) {
7113 BRIDGE_HF_DROP(brhf_dhcp_bad_htype, __func__, __LINE__);
7114 goto done;
7115 }
7116 if (dhcp.dp_hlen != ETHER_ADDR_LEN) {
7117 BRIDGE_HF_DROP(brhf_dhcp_bad_hlen, __func__, __LINE__);
7118 goto done;
7119 }
7120 if (bcmp(dhcp.dp_chaddr, bif->bif_hf_hwsrc,
7121 ETHER_ADDR_LEN) != 0) {
7122 BRIDGE_HF_DROP(brhf_dhcp_bad_chaddr, __func__, __LINE__);
7123 goto done;
7124 }
7125 /*
7126 * Client address must match the host address or be not specified
7127 */
7128 if (dhcp.dp_ciaddr.s_addr != bif->bif_hf_ipsrc.s_addr &&
7129 dhcp.dp_ciaddr.s_addr != INADDR_ANY) {
7130 BRIDGE_HF_DROP(brhf_dhcp_bad_ciaddr, __func__, __LINE__);
7131 goto done;
7132 }
7133 error = 0;
7134 done:
7135 return error;
7136 }
7137
7138 static int
7139 bridge_host_filter(struct bridge_iflist *bif, mbuf_t *data)
7140 {
7141 int error = EINVAL;
7142 struct ether_header *eh;
7143 static struct in_addr inaddr_any = { .s_addr = INADDR_ANY };
7144 mbuf_t m = *data;
7145
7146 eh = mtod(m, struct ether_header *);
7147
7148 /*
7149 * Restrict the source hardware address
7150 */
7151 if ((bif->bif_flags & BIFF_HF_HWSRC) == 0 ||
7152 bcmp(eh->ether_shost, bif->bif_hf_hwsrc,
7153 ETHER_ADDR_LEN) != 0) {
7154 BRIDGE_HF_DROP(brhf_bad_ether_srchw_addr, __func__, __LINE__);
7155 goto done;
7156 }
7157
7158 /*
7159 * Restrict Ethernet protocols to ARP and IP
7160 */
7161 if (eh->ether_type == htons(ETHERTYPE_ARP)) {
7162 struct ether_arp *ea;
7163 size_t minlen = sizeof(struct ether_header) +
7164 sizeof(struct ether_arp);
7165
7166 /*
7167 * Make the Ethernet and ARP headers contiguous
7168 */
7169 if (mbuf_pkthdr_len(m) < minlen) {
7170 BRIDGE_HF_DROP(brhf_arp_too_small, __func__, __LINE__);
7171 goto done;
7172 }
7173 if (mbuf_len(m) < minlen && mbuf_pullup(data, minlen) != 0) {
7174 BRIDGE_HF_DROP(brhf_arp_pullup_failed,
7175 __func__, __LINE__);
7176 goto done;
7177 }
7178 m = *data;
7179
7180 /*
7181 * Verify this is an ethernet/ip arp
7182 */
7183 eh = mtod(m, struct ether_header *);
7184 ea = (struct ether_arp *)(eh + 1);
7185 if (ea->arp_hrd != htons(ARPHRD_ETHER)) {
7186 BRIDGE_HF_DROP(brhf_arp_bad_hw_type,
7187 __func__, __LINE__);
7188 goto done;
7189 }
7190 if (ea->arp_pro != htons(ETHERTYPE_IP)) {
7191 BRIDGE_HF_DROP(brhf_arp_bad_pro_type,
7192 __func__, __LINE__);
7193 goto done;
7194 }
7195 /*
7196 * Verify the address lengths are correct
7197 */
7198 if (ea->arp_hln != ETHER_ADDR_LEN) {
7199 BRIDGE_HF_DROP(brhf_arp_bad_hw_len, __func__, __LINE__);
7200 goto done;
7201 }
7202 if (ea->arp_pln != sizeof(struct in_addr)) {
7203 BRIDGE_HF_DROP(brhf_arp_bad_pro_len,
7204 __func__, __LINE__);
7205 goto done;
7206 }
7207
7208 /*
7209 * Allow only ARP request or ARP reply
7210 */
7211 if (ea->arp_op != htons(ARPOP_REQUEST) &&
7212 ea->arp_op != htons(ARPOP_REPLY)) {
7213 BRIDGE_HF_DROP(brhf_arp_bad_op, __func__, __LINE__);
7214 goto done;
7215 }
7216 /*
7217 * Verify source hardware address matches
7218 */
7219 if (bcmp(ea->arp_sha, bif->bif_hf_hwsrc,
7220 ETHER_ADDR_LEN) != 0) {
7221 BRIDGE_HF_DROP(brhf_arp_bad_sha, __func__, __LINE__);
7222 goto done;
7223 }
7224 /*
7225 * Verify source protocol address:
7226 * May be null for an ARP probe
7227 */
7228 if (bcmp(ea->arp_spa, &bif->bif_hf_ipsrc.s_addr,
7229 sizeof(struct in_addr)) != 0 &&
7230 bcmp(ea->arp_spa, &inaddr_any,
7231 sizeof(struct in_addr)) != 0) {
7232 BRIDGE_HF_DROP(brhf_arp_bad_spa, __func__, __LINE__);
7233 goto done;
7234 }
7235 bridge_hostfilter_stats.brhf_arp_ok += 1;
7236 error = 0;
7237 } else if (eh->ether_type == htons(ETHERTYPE_IP)) {
7238 size_t minlen = sizeof(struct ether_header) + sizeof(struct ip);
7239 struct ip iphdr;
7240 size_t offset;
7241
7242 /*
7243 * Make the Ethernet and IP headers contiguous
7244 */
7245 if (mbuf_pkthdr_len(m) < minlen) {
7246 BRIDGE_HF_DROP(brhf_ip_too_small, __func__, __LINE__);
7247 goto done;
7248 }
7249 offset = sizeof(struct ether_header);
7250 error = mbuf_copydata(m, offset, sizeof(struct ip), &iphdr);
7251 if (error != 0) {
7252 BRIDGE_HF_DROP(brhf_ip_too_small, __func__, __LINE__);
7253 goto done;
7254 }
7255 /*
7256 * Verify the source IP address
7257 */
7258 if (iphdr.ip_p == IPPROTO_UDP) {
7259 struct udphdr udp;
7260
7261 minlen += sizeof(struct udphdr);
7262 if (mbuf_pkthdr_len(m) < minlen) {
7263 BRIDGE_HF_DROP(brhf_ip_too_small,
7264 __func__, __LINE__);
7265 goto done;
7266 }
7267
7268 /*
7269 * Allow all zero addresses for DHCP requests
7270 */
7271 if (iphdr.ip_src.s_addr != bif->bif_hf_ipsrc.s_addr &&
7272 iphdr.ip_src.s_addr != INADDR_ANY) {
7273 BRIDGE_HF_DROP(brhf_ip_bad_srcaddr,
7274 __func__, __LINE__);
7275 goto done;
7276 }
7277 offset = sizeof(struct ether_header) +
7278 (IP_VHL_HL(iphdr.ip_vhl) << 2);
7279 error = mbuf_copydata(m, offset,
7280 sizeof(struct udphdr), &udp);
7281 if (error != 0) {
7282 BRIDGE_HF_DROP(brhf_ip_too_small,
7283 __func__, __LINE__);
7284 goto done;
7285 }
7286 /*
7287 * Either it's a Bootp/DHCP packet that we like or
7288 * it's a UDP packet from the host IP as source address
7289 */
7290 if (udp.uh_sport == htons(IPPORT_BOOTPC) &&
7291 udp.uh_dport == htons(IPPORT_BOOTPS)) {
7292 minlen += sizeof(struct dhcp);
7293 if (mbuf_pkthdr_len(m) < minlen) {
7294 BRIDGE_HF_DROP(brhf_ip_too_small,
7295 __func__, __LINE__);
7296 goto done;
7297 }
7298 offset += sizeof(struct udphdr);
7299 error = bridge_dhcp_filter(bif, m, offset);
7300 if (error != 0) {
7301 goto done;
7302 }
7303 } else if (iphdr.ip_src.s_addr == INADDR_ANY) {
7304 BRIDGE_HF_DROP(brhf_ip_bad_srcaddr,
7305 __func__, __LINE__);
7306 goto done;
7307 }
7308 } else if (iphdr.ip_src.s_addr != bif->bif_hf_ipsrc.s_addr ||
7309 bif->bif_hf_ipsrc.s_addr == INADDR_ANY) {
7310 BRIDGE_HF_DROP(brhf_ip_bad_srcaddr, __func__, __LINE__);
7311 goto done;
7312 }
7313 /*
7314 * Allow only boring IP protocols
7315 */
7316 if (iphdr.ip_p != IPPROTO_TCP &&
7317 iphdr.ip_p != IPPROTO_UDP &&
7318 iphdr.ip_p != IPPROTO_ICMP &&
7319 iphdr.ip_p != IPPROTO_ESP &&
7320 iphdr.ip_p != IPPROTO_AH &&
7321 iphdr.ip_p != IPPROTO_GRE) {
7322 BRIDGE_HF_DROP(brhf_ip_bad_proto, __func__, __LINE__);
7323 goto done;
7324 }
7325 bridge_hostfilter_stats.brhf_ip_ok += 1;
7326 error = 0;
7327 } else {
7328 BRIDGE_HF_DROP(brhf_bad_ether_type, __func__, __LINE__);
7329 goto done;
7330 }
7331 done:
7332 if (error != 0) {
7333 if (BRIDGE_DBGF_ENABLED(BR_DBGF_HOSTFILTER)) {
7334 if (m) {
7335 brlog_mbuf_data(m, 0,
7336 sizeof(struct ether_header) +
7337 sizeof(struct ip));
7338 }
7339 }
7340
7341 if (m != NULL) {
7342 m_freem(m);
7343 }
7344 }
7345 return error;
7346 }
7347
7348 /*
7349 * MAC NAT
7350 */
7351
7352 static errno_t
7353 bridge_mac_nat_enable(struct bridge_softc *sc, struct bridge_iflist *bif)
7354 {
7355 errno_t error = 0;
7356
7357 BRIDGE_LOCK_ASSERT_HELD(sc);
7358
7359 if (IFNET_IS_VMNET(bif->bif_ifp)) {
7360 error = EINVAL;
7361 goto done;
7362 }
7363 if (sc->sc_mac_nat_bif != NULL) {
7364 if (sc->sc_mac_nat_bif != bif) {
7365 error = EBUSY;
7366 }
7367 goto done;
7368 }
7369 sc->sc_mac_nat_bif = bif;
7370 bif->bif_ifflags |= IFBIF_MAC_NAT;
7371 bridge_mac_nat_populate_entries(sc);
7372
7373 done:
7374 return error;
7375 }
7376
7377 static void
7378 bridge_mac_nat_disable(struct bridge_softc *sc)
7379 {
7380 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7381
7382 assert(mac_nat_bif != NULL);
7383 bridge_mac_nat_flush_entries(sc, mac_nat_bif);
7384 mac_nat_bif->bif_ifflags &= ~IFBIF_MAC_NAT;
7385 sc->sc_mac_nat_bif = NULL;
7386 return;
7387 }
7388
7389 static void
7390 mac_nat_entry_print2(struct mac_nat_entry *mne,
7391 char *ifname, const char *msg1, const char *msg2)
7392 {
7393 int af;
7394 char etopbuf[24];
7395 char ntopbuf[MAX_IPv6_STR_LEN];
7396 const char *space;
7397
7398 af = ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) ? AF_INET6 : AF_INET;
7399 ether_ntop(etopbuf, sizeof(etopbuf), mne->mne_mac);
7400 (void)inet_ntop(af, &mne->mne_u, ntopbuf, sizeof(ntopbuf));
7401 if (msg2 == NULL) {
7402 msg2 = "";
7403 space = "";
7404 } else {
7405 space = " ";
7406 }
7407 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7408 "%s %s%s%s %p (%s, %s, %s)",
7409 ifname, msg1, space, msg2, mne, mne->mne_bif->bif_ifp->if_xname,
7410 ntopbuf, etopbuf);
7411 }
7412
7413 static void
7414 mac_nat_entry_print(struct mac_nat_entry *mne,
7415 char *ifname, const char *msg)
7416 {
7417 mac_nat_entry_print2(mne, ifname, msg, NULL);
7418 }
7419
7420 static struct mac_nat_entry *
7421 bridge_lookup_mac_nat_entry(struct bridge_softc *sc, int af, void * ip)
7422 {
7423 struct mac_nat_entry *mne;
7424 struct mac_nat_entry *ret_mne = NULL;
7425
7426 if (af == AF_INET) {
7427 in_addr_t s_addr = ((struct in_addr *)ip)->s_addr;
7428
7429 LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
7430 if (mne->mne_ip.s_addr == s_addr) {
7431 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7432 mac_nat_entry_print(mne, sc->sc_if_xname,
7433 "found");
7434 }
7435 ret_mne = mne;
7436 break;
7437 }
7438 }
7439 } else {
7440 const struct in6_addr *ip6 = (const struct in6_addr *)ip;
7441
7442 LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
7443 if (IN6_ARE_ADDR_EQUAL(&mne->mne_ip6, ip6)) {
7444 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7445 mac_nat_entry_print(mne, sc->sc_if_xname,
7446 "found");
7447 }
7448 ret_mne = mne;
7449 break;
7450 }
7451 }
7452 }
7453 return ret_mne;
7454 }
7455
7456 static void
7457 bridge_destroy_mac_nat_entry(struct bridge_softc *sc,
7458 struct mac_nat_entry *mne, const char *reason)
7459 {
7460 LIST_REMOVE(mne, mne_list);
7461 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7462 mac_nat_entry_print(mne, sc->sc_if_xname, reason);
7463 }
7464 zfree(bridge_mne_pool, mne);
7465 sc->sc_mne_count--;
7466 }
7467
7468 static struct mac_nat_entry *
7469 bridge_create_mac_nat_entry(struct bridge_softc *sc,
7470 struct bridge_iflist *bif, int af, const void *ip, uint8_t *eaddr)
7471 {
7472 struct mac_nat_entry_list *list;
7473 struct mac_nat_entry *mne;
7474
7475 if (sc->sc_mne_count >= sc->sc_mne_max) {
7476 sc->sc_mne_allocation_failures++;
7477 return NULL;
7478 }
7479 mne = zalloc_noblock(bridge_mne_pool);
7480 if (mne == NULL) {
7481 sc->sc_mne_allocation_failures++;
7482 return NULL;
7483 }
7484 sc->sc_mne_count++;
7485 bzero(mne, sizeof(*mne));
7486 bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
7487 mne->mne_bif = bif;
7488 if (af == AF_INET) {
7489 bcopy(ip, &mne->mne_ip, sizeof(mne->mne_ip));
7490 list = &sc->sc_mne_list;
7491 } else {
7492 bcopy(ip, &mne->mne_ip6, sizeof(mne->mne_ip6));
7493 mne->mne_flags |= MNE_FLAGS_IPV6;
7494 list = &sc->sc_mne_list_v6;
7495 }
7496 LIST_INSERT_HEAD(list, mne, mne_list);
7497 mne->mne_expire = (unsigned long)net_uptime() + sc->sc_brttimeout;
7498 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7499 mac_nat_entry_print(mne, sc->sc_if_xname, "created");
7500 }
7501 return mne;
7502 }
7503
7504 static struct mac_nat_entry *
7505 bridge_update_mac_nat_entry(struct bridge_softc *sc,
7506 struct bridge_iflist *bif, int af, void *ip, uint8_t *eaddr)
7507 {
7508 struct mac_nat_entry *mne;
7509
7510 mne = bridge_lookup_mac_nat_entry(sc, af, ip);
7511 if (mne != NULL) {
7512 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7513
7514 if (mne->mne_bif == mac_nat_bif) {
7515 /* the MAC NAT interface takes precedence */
7516 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7517 if (mne->mne_bif != bif) {
7518 mac_nat_entry_print2(mne,
7519 sc->sc_if_xname, "reject",
7520 bif->bif_ifp->if_xname);
7521 }
7522 }
7523 } else if (mne->mne_bif != bif) {
7524 const char *old_if = mne->mne_bif->bif_ifp->if_xname;
7525
7526 mne->mne_bif = bif;
7527 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7528 mac_nat_entry_print2(mne,
7529 sc->sc_if_xname, "replaced",
7530 old_if);
7531 }
7532 bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
7533 }
7534 mne->mne_expire = (unsigned long)net_uptime() +
7535 sc->sc_brttimeout;
7536 } else {
7537 mne = bridge_create_mac_nat_entry(sc, bif, af, ip, eaddr);
7538 }
7539 return mne;
7540 }
7541
7542 static void
7543 bridge_mac_nat_flush_entries_common(struct bridge_softc *sc,
7544 struct mac_nat_entry_list *list, struct bridge_iflist *bif)
7545 {
7546 struct mac_nat_entry *mne;
7547 struct mac_nat_entry *tmne;
7548
7549 LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
7550 if (bif != NULL && mne->mne_bif != bif) {
7551 continue;
7552 }
7553 bridge_destroy_mac_nat_entry(sc, mne, "flushed");
7554 }
7555 }
7556
7557 /*
7558 * bridge_mac_nat_flush_entries:
7559 *
7560 * Flush MAC NAT entries for the specified member. Flush all entries if
7561 * the member is the one that requires MAC NAT, otherwise just flush the
7562 * ones for the specified member.
7563 */
7564 static void
7565 bridge_mac_nat_flush_entries(struct bridge_softc *sc, struct bridge_iflist * bif)
7566 {
7567 struct bridge_iflist *flush_bif;
7568
7569 flush_bif = (bif == sc->sc_mac_nat_bif) ? NULL : bif;
7570 bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list, flush_bif);
7571 bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list_v6, flush_bif);
7572 }
7573
7574 static void
7575 bridge_mac_nat_populate_entries(struct bridge_softc *sc)
7576 {
7577 errno_t error;
7578 ifnet_t ifp;
7579 ifaddr_t *list;
7580 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7581
7582 assert(mac_nat_bif != NULL);
7583 ifp = mac_nat_bif->bif_ifp;
7584 error = ifnet_get_address_list(ifp, &list);
7585 if (error != 0) {
7586 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7587 "ifnet_get_address_list(%s) failed %d",
7588 ifp->if_xname, error);
7589 return;
7590 }
7591 for (ifaddr_t *scan = list; *scan != NULL; scan++) {
7592 sa_family_t af;
7593 void *ip;
7594
7595 union {
7596 struct sockaddr sa;
7597 struct sockaddr_in sin;
7598 struct sockaddr_in6 sin6;
7599 } u;
7600 af = ifaddr_address_family(*scan);
7601 switch (af) {
7602 case AF_INET:
7603 case AF_INET6:
7604 error = ifaddr_address(*scan, &u.sa, sizeof(u));
7605 if (error != 0) {
7606 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7607 "ifaddr_address failed %d",
7608 error);
7609 break;
7610 }
7611 if (af == AF_INET) {
7612 ip = (void *)&u.sin.sin_addr;
7613 } else {
7614 if (IN6_IS_ADDR_LINKLOCAL(&u.sin6.sin6_addr)) {
7615 /* remove scope ID */
7616 u.sin6.sin6_addr.s6_addr16[1] = 0;
7617 }
7618 ip = (void *)&u.sin6.sin6_addr;
7619 }
7620 bridge_create_mac_nat_entry(sc, mac_nat_bif, af, ip,
7621 (uint8_t *)IF_LLADDR(ifp));
7622 break;
7623 default:
7624 break;
7625 }
7626 }
7627 ifnet_free_address_list(list);
7628 return;
7629 }
7630
7631 static void
7632 bridge_mac_nat_age_entries_common(struct bridge_softc *sc,
7633 struct mac_nat_entry_list *list, unsigned long now)
7634 {
7635 struct mac_nat_entry *mne;
7636 struct mac_nat_entry *tmne;
7637
7638 LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
7639 if (now >= mne->mne_expire) {
7640 bridge_destroy_mac_nat_entry(sc, mne, "aged out");
7641 }
7642 }
7643 }
7644
7645 static void
7646 bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long now)
7647 {
7648 if (sc->sc_mac_nat_bif == NULL) {
7649 return;
7650 }
7651 bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list, now);
7652 bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list_v6, now);
7653 }
7654
7655 static const char *
7656 get_in_out_string(boolean_t is_output)
7657 {
7658 return is_output ? "OUT" : "IN";
7659 }
7660
7661 /*
7662 * is_valid_arp_packet:
7663 * Verify that this is a valid ARP packet.
7664 *
7665 * Returns TRUE if the packet is valid, FALSE otherwise.
7666 */
7667 static boolean_t
7668 is_valid_arp_packet(mbuf_t *data, boolean_t is_output,
7669 struct ether_header **eh_p, struct ether_arp **ea_p)
7670 {
7671 struct ether_arp *ea;
7672 struct ether_header *eh;
7673 size_t minlen = sizeof(struct ether_header) + sizeof(struct ether_arp);
7674 boolean_t is_valid = FALSE;
7675 int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
7676
7677 if (mbuf_pkthdr_len(*data) < minlen) {
7678 BRIDGE_LOG(LOG_DEBUG, flags,
7679 "ARP %s short frame %lu < %lu",
7680 get_in_out_string(is_output),
7681 mbuf_pkthdr_len(*data), minlen);
7682 goto done;
7683 }
7684 if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
7685 BRIDGE_LOG(LOG_DEBUG, flags,
7686 "ARP %s size %lu mbuf_pullup fail",
7687 get_in_out_string(is_output),
7688 minlen);
7689 *data = NULL;
7690 goto done;
7691 }
7692
7693 /* validate ARP packet */
7694 eh = mtod(*data, struct ether_header *);
7695 ea = (struct ether_arp *)(eh + 1);
7696 if (ntohs(ea->arp_hrd) != ARPHRD_ETHER) {
7697 BRIDGE_LOG(LOG_DEBUG, flags,
7698 "ARP %s htype not ethernet",
7699 get_in_out_string(is_output));
7700 goto done;
7701 }
7702 if (ea->arp_hln != ETHER_ADDR_LEN) {
7703 BRIDGE_LOG(LOG_DEBUG, flags,
7704 "ARP %s hlen not ethernet",
7705 get_in_out_string(is_output));
7706 goto done;
7707 }
7708 if (ntohs(ea->arp_pro) != ETHERTYPE_IP) {
7709 BRIDGE_LOG(LOG_DEBUG, flags,
7710 "ARP %s ptype not IP",
7711 get_in_out_string(is_output));
7712 goto done;
7713 }
7714 if (ea->arp_pln != sizeof(struct in_addr)) {
7715 BRIDGE_LOG(LOG_DEBUG, flags,
7716 "ARP %s plen not IP",
7717 get_in_out_string(is_output));
7718 goto done;
7719 }
7720 is_valid = TRUE;
7721 *ea_p = ea;
7722 *eh_p = eh;
7723 done:
7724 return is_valid;
7725 }
7726
7727 static struct mac_nat_entry *
7728 bridge_mac_nat_arp_input(struct bridge_softc *sc, mbuf_t *data)
7729 {
7730 struct ether_arp *ea;
7731 struct ether_header *eh;
7732 struct mac_nat_entry *mne = NULL;
7733 u_short op;
7734 struct in_addr tpa;
7735
7736 if (!is_valid_arp_packet(data, FALSE, &eh, &ea)) {
7737 goto done;
7738 }
7739 op = ntohs(ea->arp_op);
7740 switch (op) {
7741 case ARPOP_REQUEST:
7742 case ARPOP_REPLY:
7743 /* only care about REQUEST and REPLY */
7744 break;
7745 default:
7746 goto done;
7747 }
7748
7749 /* check the target IP address for a NAT entry */
7750 bcopy(ea->arp_tpa, &tpa, sizeof(tpa));
7751 if (tpa.s_addr != 0) {
7752 mne = bridge_lookup_mac_nat_entry(sc, AF_INET, &tpa);
7753 }
7754 if (mne != NULL) {
7755 if (op == ARPOP_REPLY) {
7756 /* translate the MAC address */
7757 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7758 char mac_src[24];
7759 char mac_dst[24];
7760
7761 ether_ntop(mac_src, sizeof(mac_src),
7762 ea->arp_tha);
7763 ether_ntop(mac_dst, sizeof(mac_dst),
7764 mne->mne_mac);
7765 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7766 "%s %s ARP %s -> %s",
7767 sc->sc_if_xname,
7768 mne->mne_bif->bif_ifp->if_xname,
7769 mac_src, mac_dst);
7770 }
7771 bcopy(mne->mne_mac, ea->arp_tha, sizeof(ea->arp_tha));
7772 }
7773 } else {
7774 /* handle conflicting ARP (sender matches mne) */
7775 struct in_addr spa;
7776
7777 bcopy(ea->arp_spa, &spa, sizeof(spa));
7778 if (spa.s_addr != 0 && spa.s_addr != tpa.s_addr) {
7779 /* check the source IP for a NAT entry */
7780 mne = bridge_lookup_mac_nat_entry(sc, AF_INET, &spa);
7781 }
7782 }
7783
7784 done:
7785 return mne;
7786 }
7787
7788 static boolean_t
7789 bridge_mac_nat_arp_output(struct bridge_softc *sc,
7790 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
7791 {
7792 struct ether_arp *ea;
7793 struct ether_header *eh;
7794 struct in_addr ip;
7795 struct mac_nat_entry *mne = NULL;
7796 u_short op;
7797 boolean_t translate = FALSE;
7798
7799 if (!is_valid_arp_packet(data, TRUE, &eh, &ea)) {
7800 goto done;
7801 }
7802 op = ntohs(ea->arp_op);
7803 switch (op) {
7804 case ARPOP_REQUEST:
7805 case ARPOP_REPLY:
7806 /* only care about REQUEST and REPLY */
7807 break;
7808 default:
7809 goto done;
7810 }
7811
7812 bcopy(ea->arp_spa, &ip, sizeof(ip));
7813 if (ip.s_addr == 0) {
7814 goto done;
7815 }
7816 /* XXX validate IP address: no multicast/broadcast */
7817 mne = bridge_update_mac_nat_entry(sc, bif, AF_INET, &ip, ea->arp_sha);
7818 if (mnr != NULL && mne != NULL) {
7819 /* record the offset to do the replacement */
7820 translate = TRUE;
7821 mnr->mnr_arp_offset = (char *)ea->arp_sha - (char *)eh;
7822 }
7823
7824 done:
7825 return translate;
7826 }
7827
7828 #define ETHER_IPV4_HEADER_LEN (sizeof(struct ether_header) + \
7829 + sizeof(struct ip))
7830 static struct ether_header *
7831 get_ether_ip_header(mbuf_t *data, boolean_t is_output)
7832 {
7833 struct ether_header *eh = NULL;
7834 int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
7835 size_t minlen = ETHER_IPV4_HEADER_LEN;
7836
7837 if (mbuf_pkthdr_len(*data) < minlen) {
7838 BRIDGE_LOG(LOG_DEBUG, flags,
7839 "IP %s short frame %lu < %lu",
7840 get_in_out_string(is_output),
7841 mbuf_pkthdr_len(*data), minlen);
7842 goto done;
7843 }
7844 if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
7845 BRIDGE_LOG(LOG_DEBUG, flags,
7846 "IP %s size %lu mbuf_pullup fail",
7847 get_in_out_string(is_output),
7848 minlen);
7849 *data = NULL;
7850 goto done;
7851 }
7852 eh = mtod(*data, struct ether_header *);
7853 done:
7854 return eh;
7855 }
7856
7857 static boolean_t
7858 is_broadcast_ip_packet(mbuf_t *data)
7859 {
7860 struct ether_header *eh;
7861 uint16_t ether_type;
7862 boolean_t is_broadcast = FALSE;
7863
7864 eh = mtod(*data, struct ether_header *);
7865 ether_type = ntohs(eh->ether_type);
7866 switch (ether_type) {
7867 case ETHERTYPE_IP:
7868 eh = get_ether_ip_header(data, FALSE);
7869 if (eh != NULL) {
7870 struct in_addr dst;
7871 struct ip *iphdr;
7872
7873 iphdr = (struct ip *)(void *)(eh + 1);
7874 bcopy(&iphdr->ip_dst, &dst, sizeof(dst));
7875 is_broadcast = (dst.s_addr == INADDR_BROADCAST);
7876 }
7877 break;
7878 default:
7879 break;
7880 }
7881 return is_broadcast;
7882 }
7883
7884 static struct mac_nat_entry *
7885 bridge_mac_nat_ip_input(struct bridge_softc *sc, mbuf_t *data)
7886 {
7887 struct in_addr dst;
7888 struct ether_header *eh;
7889 struct ip *iphdr;
7890 struct mac_nat_entry *mne = NULL;
7891
7892 eh = get_ether_ip_header(data, FALSE);
7893 if (eh == NULL) {
7894 goto done;
7895 }
7896 iphdr = (struct ip *)(void *)(eh + 1);
7897 bcopy(&iphdr->ip_dst, &dst, sizeof(dst));
7898 /* XXX validate IP address */
7899 if (dst.s_addr == 0) {
7900 goto done;
7901 }
7902 mne = bridge_lookup_mac_nat_entry(sc, AF_INET, &dst);
7903 done:
7904 return mne;
7905 }
7906
7907 static void
7908 bridge_mac_nat_udp_output(struct bridge_softc *sc,
7909 struct bridge_iflist *bif, mbuf_t m,
7910 uint8_t ip_header_len, struct mac_nat_record *mnr)
7911 {
7912 uint16_t dp_flags;
7913 errno_t error;
7914 size_t offset;
7915 struct udphdr udphdr;
7916
7917 /* copy the UDP header */
7918 offset = sizeof(struct ether_header) + ip_header_len;
7919 error = mbuf_copydata(m, offset, sizeof(struct udphdr), &udphdr);
7920 if (error != 0) {
7921 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7922 "mbuf_copydata udphdr failed %d",
7923 error);
7924 return;
7925 }
7926 if (ntohs(udphdr.uh_sport) != IPPORT_BOOTPC ||
7927 ntohs(udphdr.uh_dport) != IPPORT_BOOTPS) {
7928 /* not a BOOTP/DHCP packet */
7929 return;
7930 }
7931 /* check whether the broadcast bit is already set */
7932 offset += sizeof(struct udphdr) + offsetof(struct dhcp, dp_flags);
7933 error = mbuf_copydata(m, offset, sizeof(dp_flags), &dp_flags);
7934 if (error != 0) {
7935 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7936 "mbuf_copydata dp_flags failed %d",
7937 error);
7938 return;
7939 }
7940 if ((ntohs(dp_flags) & DHCP_FLAGS_BROADCAST) != 0) {
7941 /* it's already set, nothing to do */
7942 return;
7943 }
7944 /* broadcast bit needs to be set */
7945 mnr->mnr_ip_dhcp_flags = dp_flags | htons(DHCP_FLAGS_BROADCAST);
7946 mnr->mnr_ip_header_len = ip_header_len;
7947 if (udphdr.uh_sum != 0) {
7948 uint16_t delta;
7949
7950 /* adjust checksum to take modified dp_flags into account */
7951 delta = dp_flags - mnr->mnr_ip_dhcp_flags;
7952 mnr->mnr_ip_udp_csum = udphdr.uh_sum + delta;
7953 }
7954 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7955 "%s %s DHCP dp_flags 0x%x UDP cksum 0x%x",
7956 sc->sc_if_xname,
7957 bif->bif_ifp->if_xname,
7958 ntohs(mnr->mnr_ip_dhcp_flags),
7959 ntohs(mnr->mnr_ip_udp_csum));
7960 return;
7961 }
7962
7963 static boolean_t
7964 bridge_mac_nat_ip_output(struct bridge_softc *sc,
7965 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
7966 {
7967 #pragma unused(mnr)
7968 struct ether_header *eh;
7969 struct in_addr ip;
7970 struct ip *iphdr;
7971 uint8_t ip_header_len;
7972 struct mac_nat_entry *mne = NULL;
7973 boolean_t translate = FALSE;
7974
7975 eh = get_ether_ip_header(data, TRUE);
7976 if (eh == NULL) {
7977 goto done;
7978 }
7979 iphdr = (struct ip *)(void *)(eh + 1);
7980 ip_header_len = IP_VHL_HL(iphdr->ip_vhl) << 2;
7981 if (ip_header_len < sizeof(ip)) {
7982 /* bogus IP header */
7983 goto done;
7984 }
7985 bcopy(&iphdr->ip_src, &ip, sizeof(ip));
7986 /* XXX validate the source address */
7987 if (ip.s_addr != 0) {
7988 mne = bridge_update_mac_nat_entry(sc, bif, AF_INET, &ip,
7989 eh->ether_shost);
7990 }
7991 if (mnr != NULL) {
7992 if (iphdr->ip_p == IPPROTO_UDP) {
7993 /* handle DHCP must broadcast */
7994 bridge_mac_nat_udp_output(sc, bif, *data,
7995 ip_header_len, mnr);
7996 }
7997 translate = TRUE;
7998 }
7999 done:
8000 return translate;
8001 }
8002
8003 #define ETHER_IPV6_HEADER_LEN (sizeof(struct ether_header) + \
8004 + sizeof(struct ip6_hdr))
8005 static struct ether_header *
8006 get_ether_ipv6_header(mbuf_t *data, boolean_t is_output)
8007 {
8008 struct ether_header *eh = NULL;
8009 int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
8010 size_t minlen = ETHER_IPV6_HEADER_LEN;
8011
8012 if (mbuf_pkthdr_len(*data) < minlen) {
8013 BRIDGE_LOG(LOG_DEBUG, flags,
8014 "IP %s short frame %lu < %lu",
8015 get_in_out_string(is_output),
8016 mbuf_pkthdr_len(*data), minlen);
8017 goto done;
8018 }
8019 if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
8020 BRIDGE_LOG(LOG_DEBUG, flags,
8021 "IP %s size %lu mbuf_pullup fail",
8022 get_in_out_string(is_output),
8023 minlen);
8024 *data = NULL;
8025 goto done;
8026 }
8027 eh = mtod(*data, struct ether_header *);
8028 done:
8029 return eh;
8030 }
8031
8032 #include <netinet/icmp6.h>
8033 #include <netinet6/nd6.h>
8034
8035 #define ETHER_ND_LLADDR_LEN (ETHER_ADDR_LEN + sizeof(struct nd_opt_hdr))
8036
8037 static void
8038 bridge_mac_nat_icmpv6_output(struct bridge_softc *sc, struct bridge_iflist *bif,
8039 mbuf_t *data, struct ether_header *eh,
8040 struct ip6_hdr *ip6h, struct in6_addr *saddrp, struct mac_nat_record *mnr)
8041 {
8042 struct icmp6_hdr *icmp6;
8043 unsigned int icmp6len;
8044 int lladdrlen = 0;
8045 char *lladdr = NULL;
8046 mbuf_t m = *data;
8047 unsigned int off = sizeof(*ip6h);
8048
8049 icmp6len = m->m_pkthdr.len - sizeof(*eh) - off;
8050 if (icmp6len < sizeof(*icmp6)) {
8051 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8052 "short packet %d < %lu",
8053 icmp6len, sizeof(*icmp6));
8054 return;
8055 }
8056 icmp6 = (struct icmp6_hdr *)((caddr_t)ip6h + off);
8057 switch (icmp6->icmp6_type) {
8058 case ND_NEIGHBOR_SOLICIT: {
8059 struct nd_neighbor_solicit *nd_ns;
8060 union nd_opts ndopts;
8061 boolean_t is_dad_probe;
8062 struct in6_addr taddr;
8063
8064 if (icmp6len < sizeof(*nd_ns)) {
8065 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8066 "short nd_ns %d < %lu",
8067 icmp6len, sizeof(*nd_ns));
8068 return;
8069 }
8070
8071 nd_ns = (struct nd_neighbor_solicit *)(void *)icmp6;
8072 bcopy(&nd_ns->nd_ns_target, &taddr, sizeof(taddr));
8073 if (IN6_IS_ADDR_MULTICAST(&taddr) ||
8074 IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
8075 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8076 "invalid target ignored");
8077 return;
8078 }
8079 /* parse options */
8080 nd6_option_init(nd_ns + 1, icmp6len - sizeof(*nd_ns), &ndopts);
8081 if (nd6_options(&ndopts) < 0) {
8082 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8083 "invalid ND6 NS option");
8084 return;
8085 }
8086 if (ndopts.nd_opts_src_lladdr != NULL) {
8087 lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
8088 lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
8089 }
8090 is_dad_probe = IN6_IS_ADDR_UNSPECIFIED(saddrp);
8091 if (lladdr != NULL) {
8092 if (is_dad_probe) {
8093 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8094 "bad ND6 DAD packet");
8095 return;
8096 }
8097 if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8098 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8099 "source lladdrlen %d != %lu",
8100 lladdrlen, ETHER_ND_LLADDR_LEN);
8101 return;
8102 }
8103 mnr->mnr_ip6_lladdr_offset = (uint16_t)((uintptr_t)lladdr -
8104 (uintptr_t)eh);
8105 mnr->mnr_ip6_icmp6_len = icmp6len;
8106 mnr->mnr_ip6_icmp6_type = icmp6->icmp6_type;
8107 mnr->mnr_ip6_header_len = off;
8108 }
8109 if (is_dad_probe) {
8110 /* node is trying use taddr, create an mne using taddr */
8111 *saddrp = taddr;
8112 }
8113 break;
8114 }
8115 case ND_NEIGHBOR_ADVERT: {
8116 struct nd_neighbor_advert *nd_na;
8117 union nd_opts ndopts;
8118 struct in6_addr taddr;
8119
8120
8121 nd_na = (struct nd_neighbor_advert *)(void *)icmp6;
8122
8123 if (icmp6len < sizeof(*nd_na)) {
8124 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8125 "short nd_na %d < %lu",
8126 icmp6len, sizeof(*nd_na));
8127 return;
8128 }
8129
8130 bcopy(&nd_na->nd_na_target, &taddr, sizeof(taddr));
8131 if (IN6_IS_ADDR_MULTICAST(&taddr) ||
8132 IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
8133 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8134 "invalid target ignored");
8135 return;
8136 }
8137 /* parse options */
8138 nd6_option_init(nd_na + 1, icmp6len - sizeof(*nd_na), &ndopts);
8139 if (nd6_options(&ndopts) < 0) {
8140 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8141 "invalid ND6 NA option");
8142 return;
8143 }
8144 if (ndopts.nd_opts_tgt_lladdr == NULL) {
8145 /* target linklayer, nothing to do */
8146 return;
8147 }
8148 lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1);
8149 lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
8150 if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8151 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8152 "target lladdrlen %d != %lu",
8153 lladdrlen, ETHER_ND_LLADDR_LEN);
8154 return;
8155 }
8156 mnr->mnr_ip6_lladdr_offset = (uint16_t)((uintptr_t)lladdr - (uintptr_t)eh);
8157 mnr->mnr_ip6_icmp6_len = icmp6len;
8158 mnr->mnr_ip6_header_len = off;
8159 mnr->mnr_ip6_icmp6_type = icmp6->icmp6_type;
8160 break;
8161 }
8162 case ND_ROUTER_SOLICIT: {
8163 struct nd_router_solicit *nd_rs;
8164 union nd_opts ndopts;
8165
8166 if (icmp6len < sizeof(*nd_rs)) {
8167 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8168 "short nd_rs %d < %lu",
8169 icmp6len, sizeof(*nd_rs));
8170 return;
8171 }
8172 nd_rs = (struct nd_router_solicit *)(void *)icmp6;
8173
8174 /* parse options */
8175 nd6_option_init(nd_rs + 1, icmp6len - sizeof(*nd_rs), &ndopts);
8176 if (nd6_options(&ndopts) < 0) {
8177 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8178 "invalid ND6 RS option");
8179 return;
8180 }
8181 if (ndopts.nd_opts_src_lladdr != NULL) {
8182 lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
8183 lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
8184 }
8185 if (lladdr != NULL) {
8186 if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8187 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8188 "source lladdrlen %d != %lu",
8189 lladdrlen, ETHER_ND_LLADDR_LEN);
8190 return;
8191 }
8192 mnr->mnr_ip6_lladdr_offset = (uint16_t)((uintptr_t)lladdr -
8193 (uintptr_t)eh);
8194 mnr->mnr_ip6_icmp6_len = icmp6len;
8195 mnr->mnr_ip6_icmp6_type = icmp6->icmp6_type;
8196 mnr->mnr_ip6_header_len = off;
8197 }
8198 break;
8199 }
8200 default:
8201 break;
8202 }
8203 if (mnr->mnr_ip6_lladdr_offset != 0 &&
8204 BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
8205 const char *str;
8206
8207 switch (mnr->mnr_ip6_icmp6_type) {
8208 case ND_ROUTER_SOLICIT:
8209 str = "ROUTER SOLICIT";
8210 break;
8211 case ND_NEIGHBOR_ADVERT:
8212 str = "NEIGHBOR ADVERT";
8213 break;
8214 case ND_NEIGHBOR_SOLICIT:
8215 str = "NEIGHBOR SOLICIT";
8216 break;
8217 default:
8218 str = "";
8219 break;
8220 }
8221 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8222 "%s %s %s ip6len %d icmp6len %d lladdr offset %d",
8223 sc->sc_if_xname, bif->bif_ifp->if_xname, str,
8224 mnr->mnr_ip6_header_len,
8225 mnr->mnr_ip6_icmp6_len, mnr->mnr_ip6_lladdr_offset);
8226 }
8227 }
8228
8229 static struct mac_nat_entry *
8230 bridge_mac_nat_ipv6_input(struct bridge_softc *sc, mbuf_t *data)
8231 {
8232 struct in6_addr dst;
8233 struct ether_header *eh;
8234 struct ip6_hdr *ip6h;
8235 struct mac_nat_entry *mne = NULL;
8236
8237 eh = get_ether_ipv6_header(data, FALSE);
8238 if (eh == NULL) {
8239 goto done;
8240 }
8241 ip6h = (struct ip6_hdr *)(void *)(eh + 1);
8242 bcopy(&ip6h->ip6_dst, &dst, sizeof(dst));
8243 /* XXX validate IPv6 address */
8244 if (IN6_IS_ADDR_UNSPECIFIED(&dst)) {
8245 goto done;
8246 }
8247 mne = bridge_lookup_mac_nat_entry(sc, AF_INET6, &dst);
8248
8249 done:
8250 return mne;
8251 }
8252
8253 static boolean_t
8254 bridge_mac_nat_ipv6_output(struct bridge_softc *sc,
8255 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8256 {
8257 struct ether_header *eh;
8258 struct ip6_hdr *ip6h;
8259 struct in6_addr saddr;
8260 boolean_t translate;
8261
8262 translate = (bif == sc->sc_mac_nat_bif) ? FALSE : TRUE;
8263 eh = get_ether_ipv6_header(data, TRUE);
8264 if (eh == NULL) {
8265 translate = FALSE;
8266 goto done;
8267 }
8268 ip6h = (struct ip6_hdr *)(void *)(eh + 1);
8269 bcopy(&ip6h->ip6_src, &saddr, sizeof(saddr));
8270 if (mnr != NULL && ip6h->ip6_nxt == IPPROTO_ICMPV6) {
8271 bridge_mac_nat_icmpv6_output(sc, bif, data,
8272 eh, ip6h, &saddr, mnr);
8273 }
8274 if (IN6_IS_ADDR_UNSPECIFIED(&saddr)) {
8275 goto done;
8276 }
8277 (void)bridge_update_mac_nat_entry(sc, bif, AF_INET6, &saddr,
8278 eh->ether_shost);
8279
8280 done:
8281 return translate;
8282 }
8283
8284 /*
8285 * bridge_mac_nat_input:
8286 * Process a packet arriving on the MAC NAT interface (sc_mac_nat_bif).
8287 * This interface is the "external" interface with respect to NAT.
8288 * The interface is only capable of receiving a single MAC address
8289 * (e.g. a Wi-Fi STA interface).
8290 *
8291 * When a packet arrives on the external interface, look up the destination
8292 * IP address in the mac_nat_entry table. If there is a match, *is_input
8293 * is set to TRUE if it's for the MAC NAT interface, otherwise *is_input
8294 * is set to FALSE and translate the MAC address if necessary.
8295 *
8296 * Returns:
8297 * The internal interface to direct the packet to, or NULL if the packet
8298 * should not be redirected.
8299 *
8300 * *data may be updated to point at a different mbuf chain, or set to NULL
8301 * if the chain was deallocated during processing.
8302 */
8303 static ifnet_t
8304 bridge_mac_nat_input(struct bridge_softc *sc, mbuf_t *data,
8305 boolean_t *is_input)
8306 {
8307 ifnet_t dst_if = NULL;
8308 struct ether_header *eh;
8309 uint16_t ether_type;
8310 boolean_t is_unicast;
8311 mbuf_t m = *data;
8312 struct mac_nat_entry *mne = NULL;
8313
8314 BRIDGE_LOCK_ASSERT_HELD(sc);
8315 *is_input = FALSE;
8316 assert(sc->sc_mac_nat_bif != NULL);
8317 is_unicast = ((m->m_flags & (M_BCAST | M_MCAST)) == 0);
8318 eh = mtod(m, struct ether_header *);
8319 ether_type = ntohs(eh->ether_type);
8320 switch (ether_type) {
8321 case ETHERTYPE_ARP:
8322 mne = bridge_mac_nat_arp_input(sc, data);
8323 break;
8324 case ETHERTYPE_IP:
8325 if (is_unicast) {
8326 mne = bridge_mac_nat_ip_input(sc, data);
8327 }
8328 break;
8329 case ETHERTYPE_IPV6:
8330 if (is_unicast) {
8331 mne = bridge_mac_nat_ipv6_input(sc, data);
8332 }
8333 break;
8334 default:
8335 break;
8336 }
8337 if (mne != NULL) {
8338 if (is_unicast) {
8339 if (m != *data) {
8340 /* it may have changed */
8341 eh = mtod(*data, struct ether_header *);
8342 }
8343 bcopy(mne->mne_mac, eh->ether_dhost,
8344 sizeof(eh->ether_dhost));
8345 }
8346 dst_if = mne->mne_bif->bif_ifp;
8347 *is_input = (mne->mne_bif == sc->sc_mac_nat_bif);
8348 }
8349 return dst_if;
8350 }
8351
8352 /*
8353 * bridge_mac_nat_output:
8354 * Process a packet destined to the MAC NAT interface (sc_mac_nat_bif)
8355 * from the interface 'bif'.
8356 *
8357 * Create a mac_nat_entry containing the source IP address and MAC address
8358 * from the packet. Populate a mac_nat_record with information detailing
8359 * how to translate the packet. Translation takes place later when
8360 * the bridge lock is no longer held.
8361 *
8362 * If 'bif' == sc_mac_nat_bif, the stack over the MAC NAT
8363 * interface is generating an output packet. No translation is required in this
8364 * case, we just record the IP address used to prevent another bif from
8365 * claiming our IP address.
8366 *
8367 * Returns:
8368 * TRUE if the packet should be translated (*mnr updated as well),
8369 * FALSE otherwise.
8370 *
8371 * *data may be updated to point at a different mbuf chain or NULL if
8372 * the chain was deallocated during processing.
8373 */
8374
8375 static boolean_t
8376 bridge_mac_nat_output(struct bridge_softc *sc,
8377 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8378 {
8379 struct ether_header *eh;
8380 uint16_t ether_type;
8381 boolean_t translate = FALSE;
8382
8383 BRIDGE_LOCK_ASSERT_HELD(sc);
8384 assert(sc->sc_mac_nat_bif != NULL);
8385
8386 eh = mtod(*data, struct ether_header *);
8387 ether_type = ntohs(eh->ether_type);
8388 if (mnr != NULL) {
8389 bzero(mnr, sizeof(*mnr));
8390 mnr->mnr_ether_type = ether_type;
8391 }
8392 switch (ether_type) {
8393 case ETHERTYPE_ARP:
8394 translate = bridge_mac_nat_arp_output(sc, bif, data, mnr);
8395 break;
8396 case ETHERTYPE_IP:
8397 translate = bridge_mac_nat_ip_output(sc, bif, data, mnr);
8398 break;
8399 case ETHERTYPE_IPV6:
8400 translate = bridge_mac_nat_ipv6_output(sc, bif, data, mnr);
8401 break;
8402 default:
8403 break;
8404 }
8405 return translate;
8406 }
8407
8408 static void
8409 bridge_mac_nat_arp_translate(mbuf_t *data, struct mac_nat_record *mnr,
8410 const caddr_t eaddr)
8411 {
8412 errno_t error;
8413
8414 if (mnr->mnr_arp_offset == 0) {
8415 return;
8416 }
8417 /* replace the source hardware address */
8418 error = mbuf_copyback(*data, mnr->mnr_arp_offset,
8419 ETHER_ADDR_LEN, eaddr,
8420 MBUF_DONTWAIT);
8421 if (error != 0) {
8422 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8423 "mbuf_copyback failed");
8424 m_freem(*data);
8425 *data = NULL;
8426 }
8427 return;
8428 }
8429
8430 static void
8431 bridge_mac_nat_ip_translate(mbuf_t *data, struct mac_nat_record *mnr)
8432 {
8433 errno_t error;
8434 size_t offset;
8435
8436 if (mnr->mnr_ip_header_len == 0) {
8437 return;
8438 }
8439 /* update the UDP checksum */
8440 offset = sizeof(struct ether_header) + mnr->mnr_ip_header_len;
8441 error = mbuf_copyback(*data, offset + offsetof(struct udphdr, uh_sum),
8442 sizeof(mnr->mnr_ip_udp_csum),
8443 &mnr->mnr_ip_udp_csum,
8444 MBUF_DONTWAIT);
8445 if (error != 0) {
8446 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8447 "mbuf_copyback uh_sum failed");
8448 m_freem(*data);
8449 *data = NULL;
8450 }
8451 /* update the DHCP must broadcast flag */
8452 offset += sizeof(struct udphdr);
8453 error = mbuf_copyback(*data, offset + offsetof(struct dhcp, dp_flags),
8454 sizeof(mnr->mnr_ip_dhcp_flags),
8455 &mnr->mnr_ip_dhcp_flags,
8456 MBUF_DONTWAIT);
8457 if (error != 0) {
8458 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8459 "mbuf_copyback dp_flags failed");
8460 m_freem(*data);
8461 *data = NULL;
8462 }
8463 }
8464
8465 static void
8466 bridge_mac_nat_ipv6_translate(mbuf_t *data, struct mac_nat_record *mnr,
8467 const caddr_t eaddr)
8468 {
8469 uint16_t cksum;
8470 errno_t error;
8471 mbuf_t m = *data;
8472
8473 if (mnr->mnr_ip6_header_len == 0) {
8474 return;
8475 }
8476 switch (mnr->mnr_ip6_icmp6_type) {
8477 case ND_ROUTER_SOLICIT:
8478 case ND_NEIGHBOR_SOLICIT:
8479 case ND_NEIGHBOR_ADVERT:
8480 if (mnr->mnr_ip6_lladdr_offset == 0) {
8481 /* nothing to do */
8482 return;
8483 }
8484 break;
8485 default:
8486 return;
8487 }
8488
8489 /*
8490 * replace the lladdr
8491 */
8492 error = mbuf_copyback(m, mnr->mnr_ip6_lladdr_offset,
8493 ETHER_ADDR_LEN, eaddr,
8494 MBUF_DONTWAIT);
8495 if (error != 0) {
8496 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8497 "mbuf_copyback lladdr failed");
8498 m_freem(m);
8499 *data = NULL;
8500 return;
8501 }
8502
8503 /*
8504 * recompute the icmp6 checksum
8505 */
8506
8507 /* skip past the ethernet header */
8508 mbuf_setdata(m, (char *)mbuf_data(m) + ETHER_HDR_LEN,
8509 mbuf_len(m) - ETHER_HDR_LEN);
8510 mbuf_pkthdr_adjustlen(m, -ETHER_HDR_LEN);
8511
8512 #define CKSUM_OFFSET_ICMP6 offsetof(struct icmp6_hdr, icmp6_cksum)
8513 /* set the checksum to zero */
8514 cksum = 0;
8515 error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
8516 sizeof(cksum), &cksum, MBUF_DONTWAIT);
8517 if (error != 0) {
8518 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8519 "mbuf_copyback cksum=0 failed");
8520 m_freem(m);
8521 *data = NULL;
8522 return;
8523 }
8524 /* compute and set the new checksum */
8525 cksum = in6_cksum(m, IPPROTO_ICMPV6, mnr->mnr_ip6_header_len,
8526 mnr->mnr_ip6_icmp6_len);
8527 error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
8528 sizeof(cksum), &cksum, MBUF_DONTWAIT);
8529 if (error != 0) {
8530 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8531 "mbuf_copyback cksum failed");
8532 m_freem(m);
8533 *data = NULL;
8534 return;
8535 }
8536 /* restore the ethernet header */
8537 mbuf_setdata(m, (char *)mbuf_data(m) - ETHER_HDR_LEN,
8538 mbuf_len(m) + ETHER_HDR_LEN);
8539 mbuf_pkthdr_adjustlen(m, ETHER_HDR_LEN);
8540 return;
8541 }
8542
8543 static void
8544 bridge_mac_nat_translate(mbuf_t *data, struct mac_nat_record *mnr,
8545 const caddr_t eaddr)
8546 {
8547 struct ether_header *eh;
8548
8549 /* replace the source ethernet address with the single MAC */
8550 eh = mtod(*data, struct ether_header *);
8551 bcopy(eaddr, eh->ether_shost, sizeof(eh->ether_shost));
8552 switch (mnr->mnr_ether_type) {
8553 case ETHERTYPE_ARP:
8554 bridge_mac_nat_arp_translate(data, mnr, eaddr);
8555 break;
8556
8557 case ETHERTYPE_IP:
8558 bridge_mac_nat_ip_translate(data, mnr);
8559 break;
8560
8561 case ETHERTYPE_IPV6:
8562 bridge_mac_nat_ipv6_translate(data, mnr, eaddr);
8563 break;
8564
8565 default:
8566 break;
8567 }
8568 return;
8569 }
8570
8571 /*
8572 * bridge packet filtering
8573 */
8574
8575 /*
8576 * Perform basic checks on header size since
8577 * pfil assumes ip_input has already processed
8578 * it for it. Cut-and-pasted from ip_input.c.
8579 * Given how simple the IPv6 version is,
8580 * does the IPv4 version really need to be
8581 * this complicated?
8582 *
8583 * XXX Should we update ipstat here, or not?
8584 * XXX Right now we update ipstat but not
8585 * XXX csum_counter.
8586 */
8587 static int
8588 bridge_ip_checkbasic(struct mbuf **mp)
8589 {
8590 struct mbuf *m = *mp;
8591 struct ip *ip;
8592 int len, hlen;
8593 u_short sum;
8594
8595 if (*mp == NULL) {
8596 return -1;
8597 }
8598
8599 if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
8600 /* max_linkhdr is already rounded up to nearest 4-byte */
8601 if ((m = m_copyup(m, sizeof(struct ip),
8602 max_linkhdr)) == NULL) {
8603 /* XXXJRT new stat, please */
8604 ipstat.ips_toosmall++;
8605 goto bad;
8606 }
8607 } else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip), 0)) {
8608 if ((m = m_pullup(m, sizeof(struct ip))) == NULL) {
8609 ipstat.ips_toosmall++;
8610 goto bad;
8611 }
8612 }
8613 ip = mtod(m, struct ip *);
8614 if (ip == NULL) {
8615 goto bad;
8616 }
8617
8618 if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
8619 ipstat.ips_badvers++;
8620 goto bad;
8621 }
8622 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
8623 if (hlen < (int)sizeof(struct ip)) { /* minimum header length */
8624 ipstat.ips_badhlen++;
8625 goto bad;
8626 }
8627 if (hlen > m->m_len) {
8628 if ((m = m_pullup(m, hlen)) == 0) {
8629 ipstat.ips_badhlen++;
8630 goto bad;
8631 }
8632 ip = mtod(m, struct ip *);
8633 if (ip == NULL) {
8634 goto bad;
8635 }
8636 }
8637
8638 if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
8639 sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
8640 } else {
8641 if (hlen == sizeof(struct ip)) {
8642 sum = in_cksum_hdr(ip);
8643 } else {
8644 sum = in_cksum(m, hlen);
8645 }
8646 }
8647 if (sum) {
8648 ipstat.ips_badsum++;
8649 goto bad;
8650 }
8651
8652 /* Retrieve the packet length. */
8653 len = ntohs(ip->ip_len);
8654
8655 /*
8656 * Check for additional length bogosity
8657 */
8658 if (len < hlen) {
8659 ipstat.ips_badlen++;
8660 goto bad;
8661 }
8662
8663 /*
8664 * Check that the amount of data in the buffers
8665 * is as at least much as the IP header would have us expect.
8666 * Drop packet if shorter than we expect.
8667 */
8668 if (m->m_pkthdr.len < len) {
8669 ipstat.ips_tooshort++;
8670 goto bad;
8671 }
8672
8673 /* Checks out, proceed */
8674 *mp = m;
8675 return 0;
8676
8677 bad:
8678 *mp = m;
8679 return -1;
8680 }
8681
8682 /*
8683 * Same as above, but for IPv6.
8684 * Cut-and-pasted from ip6_input.c.
8685 * XXX Should we update ip6stat, or not?
8686 */
8687 static int
8688 bridge_ip6_checkbasic(struct mbuf **mp)
8689 {
8690 struct mbuf *m = *mp;
8691 struct ip6_hdr *ip6;
8692
8693 /*
8694 * If the IPv6 header is not aligned, slurp it up into a new
8695 * mbuf with space for link headers, in the event we forward
8696 * it. Otherwise, if it is aligned, make sure the entire base
8697 * IPv6 header is in the first mbuf of the chain.
8698 */
8699 if (IP6_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
8700 struct ifnet *inifp = m->m_pkthdr.rcvif;
8701 /* max_linkhdr is already rounded up to nearest 4-byte */
8702 if ((m = m_copyup(m, sizeof(struct ip6_hdr),
8703 max_linkhdr)) == NULL) {
8704 /* XXXJRT new stat, please */
8705 ip6stat.ip6s_toosmall++;
8706 in6_ifstat_inc(inifp, ifs6_in_hdrerr);
8707 goto bad;
8708 }
8709 } else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip6_hdr), 0)) {
8710 struct ifnet *inifp = m->m_pkthdr.rcvif;
8711 if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
8712 ip6stat.ip6s_toosmall++;
8713 in6_ifstat_inc(inifp, ifs6_in_hdrerr);
8714 goto bad;
8715 }
8716 }
8717
8718 ip6 = mtod(m, struct ip6_hdr *);
8719
8720 if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
8721 ip6stat.ip6s_badvers++;
8722 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
8723 goto bad;
8724 }
8725
8726 /* Checks out, proceed */
8727 *mp = m;
8728 return 0;
8729
8730 bad:
8731 *mp = m;
8732 return -1;
8733 }
8734
8735 /*
8736 * the PF routines expect to be called from ip_input, so we
8737 * need to do and undo here some of the same processing.
8738 *
8739 * XXX : this is heavily inspired on bridge_pfil()
8740 */
8741 static int
8742 bridge_pf(struct mbuf **mp, struct ifnet *ifp, uint32_t sc_filter_flags,
8743 int input)
8744 {
8745 /*
8746 * XXX : mpetit : heavily inspired by bridge_pfil()
8747 */
8748
8749 int snap, error, i, hlen;
8750 struct ether_header *eh1, eh2;
8751 struct ip *ip;
8752 struct llc llc1;
8753 u_int16_t ether_type;
8754
8755 snap = 0;
8756 error = -1; /* Default error if not error == 0 */
8757
8758 if ((sc_filter_flags & IFBF_FILT_MEMBER) == 0) {
8759 return 0; /* filtering is disabled */
8760 }
8761 i = min((*mp)->m_pkthdr.len, max_protohdr);
8762 if ((*mp)->m_len < i) {
8763 *mp = m_pullup(*mp, i);
8764 if (*mp == NULL) {
8765 BRIDGE_LOG(LOG_NOTICE, 0, "m_pullup failed");
8766 return -1;
8767 }
8768 }
8769
8770 eh1 = mtod(*mp, struct ether_header *);
8771 ether_type = ntohs(eh1->ether_type);
8772
8773 /*
8774 * Check for SNAP/LLC.
8775 */
8776 if (ether_type < ETHERMTU) {
8777 struct llc *llc2 = (struct llc *)(eh1 + 1);
8778
8779 if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
8780 llc2->llc_dsap == LLC_SNAP_LSAP &&
8781 llc2->llc_ssap == LLC_SNAP_LSAP &&
8782 llc2->llc_control == LLC_UI) {
8783 ether_type = htons(llc2->llc_un.type_snap.ether_type);
8784 snap = 1;
8785 }
8786 }
8787
8788 /*
8789 * If we're trying to filter bridge traffic, don't look at anything
8790 * other than IP and ARP traffic. If the filter doesn't understand
8791 * IPv6, don't allow IPv6 through the bridge either. This is lame
8792 * since if we really wanted, say, an AppleTalk filter, we are hosed,
8793 * but of course we don't have an AppleTalk filter to begin with.
8794 * (Note that since pfil doesn't understand ARP it will pass *ALL*
8795 * ARP traffic.)
8796 */
8797 switch (ether_type) {
8798 case ETHERTYPE_ARP:
8799 case ETHERTYPE_REVARP:
8800 return 0; /* Automatically pass */
8801
8802 case ETHERTYPE_IP:
8803 case ETHERTYPE_IPV6:
8804 break;
8805 default:
8806 /*
8807 * Check to see if the user wants to pass non-ip
8808 * packets, these will not be checked by pf and
8809 * passed unconditionally so the default is to drop.
8810 */
8811 if ((sc_filter_flags & IFBF_FILT_ONLYIP)) {
8812 goto bad;
8813 }
8814 break;
8815 }
8816
8817 /* Strip off the Ethernet header and keep a copy. */
8818 m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t)&eh2);
8819 m_adj(*mp, ETHER_HDR_LEN);
8820
8821 /* Strip off snap header, if present */
8822 if (snap) {
8823 m_copydata(*mp, 0, sizeof(struct llc), (caddr_t)&llc1);
8824 m_adj(*mp, sizeof(struct llc));
8825 }
8826
8827 /*
8828 * Check the IP header for alignment and errors
8829 */
8830 switch (ether_type) {
8831 case ETHERTYPE_IP:
8832 error = bridge_ip_checkbasic(mp);
8833 break;
8834 case ETHERTYPE_IPV6:
8835 error = bridge_ip6_checkbasic(mp);
8836 break;
8837 default:
8838 error = 0;
8839 break;
8840 }
8841 if (error) {
8842 goto bad;
8843 }
8844
8845 error = 0;
8846
8847 /*
8848 * Run the packet through pf rules
8849 */
8850 switch (ether_type) {
8851 case ETHERTYPE_IP:
8852 /*
8853 * before calling the firewall, swap fields the same as
8854 * IP does. here we assume the header is contiguous
8855 */
8856 ip = mtod(*mp, struct ip *);
8857
8858 ip->ip_len = ntohs(ip->ip_len);
8859 ip->ip_off = ntohs(ip->ip_off);
8860
8861 if (ifp != NULL) {
8862 error = pf_af_hook(ifp, 0, mp, AF_INET, input, NULL);
8863 }
8864
8865 if (*mp == NULL || error != 0) { /* filter may consume */
8866 break;
8867 }
8868
8869 /* Recalculate the ip checksum and restore byte ordering */
8870 ip = mtod(*mp, struct ip *);
8871 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
8872 if (hlen < (int)sizeof(struct ip)) {
8873 goto bad;
8874 }
8875 if (hlen > (*mp)->m_len) {
8876 if ((*mp = m_pullup(*mp, hlen)) == 0) {
8877 goto bad;
8878 }
8879 ip = mtod(*mp, struct ip *);
8880 if (ip == NULL) {
8881 goto bad;
8882 }
8883 }
8884 ip->ip_len = htons(ip->ip_len);
8885 ip->ip_off = htons(ip->ip_off);
8886 ip->ip_sum = 0;
8887 if (hlen == sizeof(struct ip)) {
8888 ip->ip_sum = in_cksum_hdr(ip);
8889 } else {
8890 ip->ip_sum = in_cksum(*mp, hlen);
8891 }
8892 break;
8893
8894 case ETHERTYPE_IPV6:
8895 if (ifp != NULL) {
8896 error = pf_af_hook(ifp, 0, mp, AF_INET6, input, NULL);
8897 }
8898
8899 if (*mp == NULL || error != 0) { /* filter may consume */
8900 break;
8901 }
8902 break;
8903 default:
8904 error = 0;
8905 break;
8906 }
8907
8908 if (*mp == NULL) {
8909 return error;
8910 }
8911 if (error != 0) {
8912 goto bad;
8913 }
8914
8915 error = -1;
8916
8917 /*
8918 * Finally, put everything back the way it was and return
8919 */
8920 if (snap) {
8921 M_PREPEND(*mp, sizeof(struct llc), M_DONTWAIT, 0);
8922 if (*mp == NULL) {
8923 return error;
8924 }
8925 bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
8926 }
8927
8928 M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT, 0);
8929 if (*mp == NULL) {
8930 return error;
8931 }
8932 bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
8933
8934 return 0;
8935
8936 bad:
8937 m_freem(*mp);
8938 *mp = NULL;
8939 return error;
8940 }
8941
8942 /*
8943 * Copyright (C) 2014, Stefano Garzarella - Universita` di Pisa.
8944 * All rights reserved.
8945 *
8946 * Redistribution and use in source and binary forms, with or without
8947 * modification, are permitted provided that the following conditions
8948 * are met:
8949 * 1. Redistributions of source code must retain the above copyright
8950 * notice, this list of conditions and the following disclaimer.
8951 * 2. Redistributions in binary form must reproduce the above copyright
8952 * notice, this list of conditions and the following disclaimer in the
8953 * documentation and/or other materials provided with the distribution.
8954 *
8955 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
8956 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
8957 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
8958 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
8959 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
8960 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
8961 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
8962 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
8963 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
8964 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
8965 * SUCH DAMAGE.
8966 */
8967
8968 /*
8969 * XXX-ste: Maybe this function must be moved into kern/uipc_mbuf.c
8970 *
8971 * Create a queue of packets/segments which fit the given mss + hdr_len.
8972 * m0 points to mbuf chain to be segmented.
8973 * This function splits the payload (m0-> m_pkthdr.len - hdr_len)
8974 * into segments of length MSS bytes and then copy the first hdr_len bytes
8975 * from m0 at the top of each segment.
8976 * If hdr2_buf is not NULL (hdr2_len is the buf length), it is copied
8977 * in each segment after the first hdr_len bytes
8978 *
8979 * Return the new queue with the segments on success, NULL on failure.
8980 * (the mbuf queue is freed in this case).
8981 * nsegs contains the number of segments generated.
8982 */
8983
8984 static struct mbuf *
8985 m_seg(struct mbuf *m0, int hdr_len, int mss, int *nsegs,
8986 char * hdr2_buf, int hdr2_len)
8987 {
8988 int off = 0, n, firstlen;
8989 struct mbuf **mnext, *mseg;
8990 int total_len = m0->m_pkthdr.len;
8991
8992 /*
8993 * Segmentation useless
8994 */
8995 if (total_len <= hdr_len + mss) {
8996 return m0;
8997 }
8998
8999 if (hdr2_buf == NULL || hdr2_len <= 0) {
9000 hdr2_buf = NULL;
9001 hdr2_len = 0;
9002 }
9003
9004 off = hdr_len + mss;
9005 firstlen = mss; /* first segment stored in the original mbuf */
9006
9007 mnext = &(m0->m_nextpkt); /* pointer to next packet */
9008
9009 for (n = 1; off < total_len; off += mss, n++) {
9010 struct mbuf *m;
9011 /*
9012 * Copy the header from the original packet
9013 * and create a new mbuf chain
9014 */
9015 if (MHLEN < hdr_len) {
9016 m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
9017 } else {
9018 m = m_gethdr(M_NOWAIT, MT_DATA);
9019 }
9020
9021 if (m == NULL) {
9022 #ifdef GSO_DEBUG
9023 D("MGETHDR error\n");
9024 #endif
9025 goto err;
9026 }
9027
9028 m_copydata(m0, 0, hdr_len, mtod(m, caddr_t));
9029
9030 m->m_len = hdr_len;
9031 /*
9032 * if the optional header is present, copy it
9033 */
9034 if (hdr2_buf != NULL) {
9035 m_copyback(m, hdr_len, hdr2_len, hdr2_buf);
9036 }
9037
9038 m->m_flags |= (m0->m_flags & M_COPYFLAGS);
9039 if (off + mss >= total_len) { /* last segment */
9040 mss = total_len - off;
9041 }
9042 /*
9043 * Copy the payload from original packet
9044 */
9045 mseg = m_copym(m0, off, mss, M_NOWAIT);
9046 if (mseg == NULL) {
9047 m_freem(m);
9048 #ifdef GSO_DEBUG
9049 D("m_copym error\n");
9050 #endif
9051 goto err;
9052 }
9053 m_cat(m, mseg);
9054
9055 m->m_pkthdr.len = hdr_len + hdr2_len + mss;
9056 m->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
9057 /*
9058 * Copy the checksum flags and data (in_cksum() need this)
9059 */
9060 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
9061 m->m_pkthdr.csum_data = m0->m_pkthdr.csum_data;
9062 m->m_pkthdr.tso_segsz = m0->m_pkthdr.tso_segsz;
9063
9064 *mnext = m;
9065 mnext = &(m->m_nextpkt);
9066 }
9067
9068 /*
9069 * Update first segment.
9070 * If the optional header is present, is necessary
9071 * to insert it into the first segment.
9072 */
9073 if (hdr2_buf == NULL) {
9074 m_adj(m0, hdr_len + firstlen - total_len);
9075 m0->m_pkthdr.len = hdr_len + firstlen;
9076 } else {
9077 mseg = m_copym(m0, hdr_len, firstlen, M_NOWAIT);
9078 if (mseg == NULL) {
9079 #ifdef GSO_DEBUG
9080 D("m_copym error\n");
9081 #endif
9082 goto err;
9083 }
9084 m_adj(m0, hdr_len - total_len);
9085 m_copyback(m0, hdr_len, hdr2_len, hdr2_buf);
9086 m_cat(m0, mseg);
9087 m0->m_pkthdr.len = hdr_len + hdr2_len + firstlen;
9088 }
9089
9090 if (nsegs != NULL) {
9091 *nsegs = n;
9092 }
9093 return m0;
9094 err:
9095 while (m0 != NULL) {
9096 mseg = m0->m_nextpkt;
9097 m0->m_nextpkt = NULL;
9098 m_freem(m0);
9099 m0 = mseg;
9100 }
9101 return NULL;
9102 }
9103
9104 /*
9105 * Wrappers of IPv4 checksum functions
9106 */
9107 static inline void
9108 gso_ipv4_data_cksum(struct mbuf *m, struct ip *ip, int mac_hlen)
9109 {
9110 m->m_data += mac_hlen;
9111 m->m_len -= mac_hlen;
9112 m->m_pkthdr.len -= mac_hlen;
9113 #if __FreeBSD_version < 1000000
9114 ip->ip_len = ntohs(ip->ip_len); /* needed for in_delayed_cksum() */
9115 #endif
9116
9117 in_delayed_cksum(m);
9118
9119 #if __FreeBSD_version < 1000000
9120 ip->ip_len = htons(ip->ip_len);
9121 #endif
9122 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
9123 m->m_len += mac_hlen;
9124 m->m_pkthdr.len += mac_hlen;
9125 m->m_data -= mac_hlen;
9126 }
9127
9128 static inline void
9129 gso_ipv4_hdr_cksum(struct mbuf *m, struct ip *ip, int mac_hlen, int ip_hlen)
9130 {
9131 m->m_data += mac_hlen;
9132
9133 ip->ip_sum = in_cksum(m, ip_hlen);
9134
9135 m->m_pkthdr.csum_flags &= ~CSUM_IP;
9136 m->m_data -= mac_hlen;
9137 }
9138
9139 /*
9140 * Structure that contains the state during the TCP segmentation
9141 */
9142 struct gso_ip_tcp_state {
9143 void (*update)
9144 (struct gso_ip_tcp_state*, struct mbuf*);
9145 void (*internal)
9146 (struct gso_ip_tcp_state*, struct mbuf*);
9147 union iphdr hdr;
9148 struct tcphdr *tcp;
9149 int mac_hlen;
9150 int ip_hlen;
9151 int tcp_hlen;
9152 int hlen;
9153 int pay_len;
9154 int sw_csum;
9155 uint32_t tcp_seq;
9156 uint16_t ip_id;
9157 boolean_t is_tx;
9158 };
9159
9160 /*
9161 * Update the pointers to TCP and IPv4 headers
9162 */
9163 static inline void
9164 gso_ipv4_tcp_update(struct gso_ip_tcp_state *state, struct mbuf *m)
9165 {
9166 state->hdr.ip = (struct ip *)(void *)(mtod(m, uint8_t *) + state->mac_hlen);
9167 state->tcp = (struct tcphdr *)(void *)((caddr_t)(state->hdr.ip) + state->ip_hlen);
9168 state->pay_len = m->m_pkthdr.len - state->hlen;
9169 }
9170
9171 /*
9172 * Set properly the TCP and IPv4 headers
9173 */
9174 static inline void
9175 gso_ipv4_tcp_internal(struct gso_ip_tcp_state *state, struct mbuf *m)
9176 {
9177 /*
9178 * Update IP header
9179 */
9180 state->hdr.ip->ip_id = htons((state->ip_id)++);
9181 state->hdr.ip->ip_len = htons(m->m_pkthdr.len - state->mac_hlen);
9182 /*
9183 * TCP Checksum
9184 */
9185 state->tcp->th_sum = 0;
9186 state->tcp->th_sum = in_pseudo(state->hdr.ip->ip_src.s_addr,
9187 state->hdr.ip->ip_dst.s_addr,
9188 htons(state->tcp_hlen + IPPROTO_TCP + state->pay_len));
9189 /*
9190 * Checksum HW not supported (TCP)
9191 */
9192 if (state->sw_csum & CSUM_DELAY_DATA) {
9193 gso_ipv4_data_cksum(m, state->hdr.ip, state->mac_hlen);
9194 }
9195
9196 state->tcp_seq += state->pay_len;
9197 /*
9198 * IP Checksum
9199 */
9200 state->hdr.ip->ip_sum = 0;
9201 /*
9202 * Checksum HW not supported (IP)
9203 */
9204 if (state->sw_csum & CSUM_IP) {
9205 gso_ipv4_hdr_cksum(m, state->hdr.ip, state->mac_hlen, state->ip_hlen);
9206 }
9207 }
9208
9209
9210 /*
9211 * Updates the pointers to TCP and IPv6 headers
9212 */
9213 static inline void
9214 gso_ipv6_tcp_update(struct gso_ip_tcp_state *state, struct mbuf *m)
9215 {
9216 state->hdr.ip6 = (struct ip6_hdr *)(mtod(m, uint8_t *) + state->mac_hlen);
9217 state->tcp = (struct tcphdr *)(void *)((caddr_t)(state->hdr.ip6) + state->ip_hlen);
9218 state->pay_len = m->m_pkthdr.len - state->hlen;
9219 }
9220
9221 /*
9222 * Sets properly the TCP and IPv6 headers
9223 */
9224 static inline void
9225 gso_ipv6_tcp_internal(struct gso_ip_tcp_state *state, struct mbuf *m)
9226 {
9227 state->hdr.ip6->ip6_plen = htons(m->m_pkthdr.len -
9228 state->mac_hlen - state->ip_hlen);
9229 /*
9230 * TCP Checksum
9231 */
9232 state->tcp->th_sum = 0;
9233 state->tcp->th_sum = in6_pseudo(&state->hdr.ip6->ip6_src,
9234 &state->hdr.ip6->ip6_dst,
9235 htonl(state->tcp_hlen + state->pay_len + IPPROTO_TCP));
9236 /*
9237 * Checksum HW not supported (TCP)
9238 */
9239 if (state->sw_csum & CSUM_DELAY_IPV6_DATA) {
9240 (void)in6_finalize_cksum(m, state->mac_hlen, -1, -1, state->sw_csum);
9241 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA;
9242 }
9243 state->tcp_seq += state->pay_len;
9244 }
9245
9246 /*
9247 * Init the state during the TCP segmentation
9248 */
9249 static void
9250 gso_ip_tcp_init_state(struct gso_ip_tcp_state *state, struct ifnet *ifp,
9251 bool is_ipv4, int mac_hlen, int ip_hlen,
9252 void * ip_hdr, struct tcphdr * tcp_hdr)
9253 {
9254 #pragma unused(ifp)
9255
9256 state->hdr.ptr = ip_hdr;
9257 state->tcp = tcp_hdr;
9258 if (is_ipv4) {
9259 state->ip_id = ntohs(state->hdr.ip->ip_id);
9260 state->update = gso_ipv4_tcp_update;
9261 state->internal = gso_ipv4_tcp_internal;
9262 state->sw_csum = CSUM_DELAY_DATA | CSUM_IP; /* XXX */
9263 } else {
9264 state->update = gso_ipv6_tcp_update;
9265 state->internal = gso_ipv6_tcp_internal;
9266 state->sw_csum = CSUM_DELAY_IPV6_DATA; /* XXX */
9267 }
9268 state->mac_hlen = mac_hlen;
9269 state->ip_hlen = ip_hlen;
9270 state->tcp_hlen = state->tcp->th_off << 2;
9271 state->hlen = mac_hlen + ip_hlen + state->tcp_hlen;
9272 state->tcp_seq = ntohl(state->tcp->th_seq);
9273 //state->sw_csum = m->m_pkthdr.csum_flags & ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
9274 return;
9275 }
9276
9277 /*
9278 * GSO on TCP/IP (v4 or v6)
9279 *
9280 * If is_tx is TRUE, segmented packets are transmitted after they are
9281 * segmented.
9282 *
9283 * If is_tx is FALSE, the segmented packets are returned as a chain in *mp.
9284 */
9285 static int
9286 gso_ip_tcp(struct ifnet *ifp, struct mbuf **mp, struct gso_ip_tcp_state *state,
9287 boolean_t is_tx)
9288 {
9289 struct mbuf *m, *m_tx;
9290 int error = 0;
9291 int mss = 0;
9292 int nsegs = 0;
9293 struct mbuf *m0 = *mp;
9294 #ifdef GSO_STATS
9295 int total_len = m0->m_pkthdr.len;
9296 #endif /* GSO_STATS */
9297
9298 #if 1
9299 mss = ifp->if_mtu - state->ip_hlen - state->tcp_hlen;
9300 #else
9301 if (m0->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) {/* TSO with GSO */
9302 mss = ifp->if_hw_tsomax - state->ip_hlen - state->tcp_hlen;
9303 } else {
9304 mss = m0->m_pkthdr.tso_segsz;
9305 }
9306 #endif
9307
9308 *mp = m0 = m_seg(m0, state->hlen, mss, &nsegs, 0, 0);
9309 if (m0 == NULL) {
9310 return ENOBUFS; /* XXX ok? */
9311 }
9312 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
9313 "%s %s mss %d nsegs %d",
9314 ifp->if_xname,
9315 is_tx ? "TX" : "RX",
9316 mss, nsegs);
9317 /*
9318 * XXX-ste: can this happen?
9319 */
9320 if (m0->m_nextpkt == NULL) {
9321 #ifdef GSO_DEBUG
9322 D("only 1 segment");
9323 #endif
9324 if (is_tx) {
9325 error = bridge_transmit(ifp, m0);
9326 }
9327 return error;
9328 }
9329 #ifdef GSO_STATS
9330 GSOSTAT_SET_MAX(tcp.gsos_max_mss, mss);
9331 GSOSTAT_SET_MIN(tcp.gsos_min_mss, mss);
9332 GSOSTAT_ADD(tcp.gsos_osegments, nsegs);
9333 #endif /* GSO_STATS */
9334
9335 /* first pkt */
9336 m = m0;
9337
9338 state->update(state, m);
9339
9340 do {
9341 state->tcp->th_flags &= ~(TH_FIN | TH_PUSH);
9342
9343 state->internal(state, m);
9344 m_tx = m;
9345 m = m->m_nextpkt;
9346 if (is_tx) {
9347 m_tx->m_nextpkt = NULL;
9348 if ((error = bridge_transmit(ifp, m_tx)) != 0) {
9349 /*
9350 * XXX: If a segment can not be sent, discard the following
9351 * segments and propagate the error to the upper levels.
9352 * In this way the TCP retransmits all the initial packet.
9353 */
9354 #ifdef GSO_DEBUG
9355 D("if_transmit error\n");
9356 #endif
9357 goto err;
9358 }
9359 }
9360 state->update(state, m);
9361
9362 state->tcp->th_flags &= ~TH_CWR;
9363 state->tcp->th_seq = htonl(state->tcp_seq);
9364 } while (m->m_nextpkt);
9365
9366 /* last pkt */
9367 state->internal(state, m);
9368
9369 if (is_tx) {
9370 error = bridge_transmit(ifp, m);
9371 #ifdef GSO_DEBUG
9372 if (error) {
9373 D("last if_transmit error\n");
9374 D("error - type = %d \n", error);
9375 }
9376 #endif
9377 }
9378 #ifdef GSO_STATS
9379 if (!error) {
9380 GSOSTAT_INC(tcp.gsos_segmented);
9381 GSOSTAT_SET_MAX(tcp.gsos_maxsegmented, total_len);
9382 GSOSTAT_SET_MIN(tcp.gsos_minsegmented, total_len);
9383 GSOSTAT_ADD(tcp.gsos_totalbyteseg, total_len);
9384 }
9385 #endif /* GSO_STATS */
9386 return error;
9387
9388 err:
9389 #ifdef GSO_DEBUG
9390 D("error - type = %d \n", error);
9391 #endif
9392 while (m != NULL) {
9393 m_tx = m->m_nextpkt;
9394 m->m_nextpkt = NULL;
9395 m_freem(m);
9396 m = m_tx;
9397 }
9398 return error;
9399 }
9400
9401 /*
9402 * GSO for TCP/IPv[46]
9403 */
9404 static int
9405 gso_tcp(struct ifnet *ifp, struct mbuf **mp, u_int mac_hlen, bool is_ipv4,
9406 boolean_t is_tx)
9407 {
9408 int error;
9409 ip_packet_info info;
9410 uint32_t csum_flags;
9411 struct gso_ip_tcp_state state;
9412 struct bripstats stats; /* XXX ignored */
9413 struct tcphdr *tcp;
9414
9415 if (!is_tx && ipforwarding == 0) {
9416 /* no need to segment if the packet will not be forwarded */
9417 return 0;
9418 }
9419 error = bridge_get_tcp_header(mp, mac_hlen, is_ipv4, &info, &stats);
9420 if (error != 0) {
9421 if (*mp != NULL) {
9422 m_freem(*mp);
9423 *mp = NULL;
9424 }
9425 return error;
9426 }
9427 if (info.ip_proto_hdr == NULL) {
9428 /* not a TCP packet */
9429 return 0;
9430 }
9431 tcp = (struct tcphdr *)(void *)info.ip_proto_hdr;
9432 gso_ip_tcp_init_state(&state, ifp, is_ipv4, mac_hlen,
9433 info.ip_hlen, info.ip_hdr.ptr, tcp);
9434 if (is_ipv4) {
9435 csum_flags = CSUM_DELAY_DATA; /* XXX */
9436 if (!is_tx) {
9437 /* if RX to our local IP address, don't segment */
9438 struct in_addr dst_ip;
9439
9440 bcopy(&state.hdr.ip->ip_dst, &dst_ip, sizeof(dst_ip));
9441 if (in_addr_is_ours(dst_ip)) {
9442 return 0;
9443 }
9444 }
9445 } else {
9446 csum_flags = CSUM_DELAY_IPV6_DATA; /* XXX */
9447 if (!is_tx) {
9448 /* if RX to our local IP address, don't segment */
9449 struct in6_addr dst_ip6;
9450
9451 bcopy(&state.hdr.ip6->ip6_dst, &dst_ip6,
9452 sizeof(dst_ip6));
9453 if (in6_embedded_scope && IN6_IS_ADDR_LINKLOCAL(&dst_ip6)) {
9454 dst_ip6.s6_addr16[1] = htons(ifp->if_index);
9455 }
9456 if (in6_addr_is_ours(&dst_ip6, ifp->if_index)) {
9457 /* local IP address, no need to segment */
9458 return 0;
9459 }
9460 }
9461 }
9462 (*mp)->m_pkthdr.csum_flags = csum_flags;
9463 (*mp)->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
9464 return gso_ip_tcp(ifp, mp, &state, is_tx);
9465 }
9466