1 /*
2 * Copyright (c) 2004-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /* $NetBSD: if_bridge.c,v 1.31 2005/06/01 19:45:34 jdc Exp $ */
30 /*
31 * Copyright 2001 Wasabi Systems, Inc.
32 * All rights reserved.
33 *
34 * Written by Jason R. Thorpe for Wasabi Systems, Inc.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed for the NetBSD Project by
47 * Wasabi Systems, Inc.
48 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
49 * or promote products derived from this software without specific prior
50 * written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
54 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
55 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
56 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
57 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
58 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
59 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
60 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
61 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
62 * POSSIBILITY OF SUCH DAMAGE.
63 */
64
65 /*
66 * Copyright (c) 1999, 2000 Jason L. Wright ([email protected])
67 * All rights reserved.
68 *
69 * Redistribution and use in source and binary forms, with or without
70 * modification, are permitted provided that the following conditions
71 * are met:
72 * 1. Redistributions of source code must retain the above copyright
73 * notice, this list of conditions and the following disclaimer.
74 * 2. Redistributions in binary form must reproduce the above copyright
75 * notice, this list of conditions and the following disclaimer in the
76 * documentation and/or other materials provided with the distribution.
77 *
78 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
79 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
80 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
81 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
82 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
83 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
84 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
86 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
87 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
88 * POSSIBILITY OF SUCH DAMAGE.
89 *
90 * OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp
91 */
92
93 /*
94 * Network interface bridge support.
95 *
96 * TODO:
97 *
98 * - Currently only supports Ethernet-like interfaces (Ethernet,
99 * 802.11, VLANs on Ethernet, etc.) Figure out a nice way
100 * to bridge other types of interfaces (FDDI-FDDI, and maybe
101 * consider heterogenous bridges).
102 *
103 * - GIF isn't handled due to the lack of IPPROTO_ETHERIP support.
104 */
105
106 #include <sys/cdefs.h>
107
108 #include <sys/param.h>
109 #include <sys/mbuf.h>
110 #include <sys/malloc.h>
111 #include <sys/protosw.h>
112 #include <sys/systm.h>
113 #include <sys/time.h>
114 #include <sys/socket.h> /* for net/if.h */
115 #include <sys/sockio.h>
116 #include <sys/kernel.h>
117 #include <sys/random.h>
118 #include <sys/syslog.h>
119 #include <sys/sysctl.h>
120 #include <sys/proc.h>
121 #include <sys/lock.h>
122 #include <sys/mcache.h>
123
124 #include <sys/kauth.h>
125
126 #include <kern/thread_call.h>
127
128 #include <libkern/libkern.h>
129
130 #include <kern/zalloc.h>
131
132 #if NBPFILTER > 0
133 #include <net/bpf.h>
134 #endif
135 #include <net/if.h>
136 #include <net/if_dl.h>
137 #include <net/if_types.h>
138 #include <net/if_var.h>
139 #include <net/if_media.h>
140 #include <net/net_api_stats.h>
141 #include <net/pfvar.h>
142
143 #include <netinet/in.h> /* for struct arpcom */
144 #include <netinet/tcp.h> /* for struct tcphdr */
145 #include <netinet/in_systm.h>
146 #include <netinet/in_var.h>
147 #define _IP_VHL
148 #include <netinet/ip.h>
149 #include <netinet/ip_var.h>
150 #include <netinet/ip6.h>
151 #include <netinet6/ip6_var.h>
152 #ifdef DEV_CARP
153 #include <netinet/ip_carp.h>
154 #endif
155 #include <netinet/if_ether.h> /* for struct arpcom */
156 #include <net/bridgestp.h>
157 #include <net/if_bridgevar.h>
158 #include <net/if_llc.h>
159 #if NVLAN > 0
160 #include <net/if_vlan_var.h>
161 #endif /* NVLAN > 0 */
162
163 #include <net/if_ether.h>
164 #include <net/dlil.h>
165 #include <net/kpi_interfacefilter.h>
166
167 #include <net/route.h>
168 #include <dev/random/randomdev.h>
169
170 #include <netinet/bootp.h>
171 #include <netinet/dhcp.h>
172
173 #if SKYWALK
174 #include <skywalk/nexus/netif/nx_netif.h>
175 #endif /* SKYWALK */
176
177 #include <os/log.h>
178
179 /*
180 * if_bridge_debug, BR_DBGF_*
181 * - 'if_bridge_debug' is a bitmask of BR_DBGF_* flags that can be set
182 * to enable additional logs for the corresponding bridge function
183 * - "sysctl net.link.bridge.debug" controls the value of
184 * 'if_bridge_debug'
185 */
186 static uint32_t if_bridge_debug = 0;
187 #define BR_DBGF_LIFECYCLE 0x0001
188 #define BR_DBGF_INPUT 0x0002
189 #define BR_DBGF_OUTPUT 0x0004
190 #define BR_DBGF_RT_TABLE 0x0008
191 #define BR_DBGF_DELAYED_CALL 0x0010
192 #define BR_DBGF_IOCTL 0x0020
193 #define BR_DBGF_MBUF 0x0040
194 #define BR_DBGF_MCAST 0x0080
195 #define BR_DBGF_HOSTFILTER 0x0100
196 #define BR_DBGF_CHECKSUM 0x0200
197 #define BR_DBGF_MAC_NAT 0x0400
198
199 /*
200 * if_bridge_log_level
201 * - 'if_bridge_log_level' ensures that by default important logs are
202 * logged regardless of if_bridge_debug by comparing the log level
203 * in BRIDGE_LOG to if_bridge_log_level
204 * - use "sysctl net.link.bridge.log_level" controls the value of
205 * 'if_bridge_log_level'
206 * - the default value of 'if_bridge_log_level' is LOG_NOTICE; important
207 * logs must use LOG_NOTICE to ensure they appear by default
208 */
209 static int if_bridge_log_level = LOG_NOTICE;
210
211 #define BRIDGE_DBGF_ENABLED(__flag) ((if_bridge_debug & __flag) != 0)
212
213 /*
214 * BRIDGE_LOG, BRIDGE_LOG_SIMPLE
215 * - macros to generate the specified log conditionally based on
216 * the specified log level and debug flags
217 * - BRIDGE_LOG_SIMPLE does not include the function name in the log
218 */
219 #define BRIDGE_LOG(__level, __dbgf, __string, ...) \
220 do { \
221 if (__level <= if_bridge_log_level || \
222 BRIDGE_DBGF_ENABLED(__dbgf)) { \
223 os_log(OS_LOG_DEFAULT, "%s: " __string, \
224 __func__, ## __VA_ARGS__); \
225 } \
226 } while (0)
227 #define BRIDGE_LOG_SIMPLE(__level, __dbgf, __string, ...) \
228 do { \
229 if (__level <= if_bridge_log_level || \
230 BRIDGE_DBGF_ENABLED(__dbgf)) { \
231 os_log(OS_LOG_DEFAULT, __string, ## __VA_ARGS__); \
232 } \
233 } while (0)
234
235 #define _BRIDGE_LOCK(_sc) lck_mtx_lock(&(_sc)->sc_mtx)
236 #define _BRIDGE_UNLOCK(_sc) lck_mtx_unlock(&(_sc)->sc_mtx)
237 #define BRIDGE_LOCK_ASSERT_HELD(_sc) \
238 LCK_MTX_ASSERT(&(_sc)->sc_mtx, LCK_MTX_ASSERT_OWNED)
239 #define BRIDGE_LOCK_ASSERT_NOTHELD(_sc) \
240 LCK_MTX_ASSERT(&(_sc)->sc_mtx, LCK_MTX_ASSERT_NOTOWNED)
241
242 #define BRIDGE_LOCK_DEBUG 1
243 #if BRIDGE_LOCK_DEBUG
244
245 #define BR_LCKDBG_MAX 4
246
247 #define BRIDGE_LOCK(_sc) bridge_lock(_sc)
248 #define BRIDGE_UNLOCK(_sc) bridge_unlock(_sc)
249 #define BRIDGE_LOCK2REF(_sc, _err) _err = bridge_lock2ref(_sc)
250 #define BRIDGE_UNREF(_sc) bridge_unref(_sc)
251 #define BRIDGE_XLOCK(_sc) bridge_xlock(_sc)
252 #define BRIDGE_XDROP(_sc) bridge_xdrop(_sc)
253
254 #else /* !BRIDGE_LOCK_DEBUG */
255
256 #define BRIDGE_LOCK(_sc) _BRIDGE_LOCK(_sc)
257 #define BRIDGE_UNLOCK(_sc) _BRIDGE_UNLOCK(_sc)
258 #define BRIDGE_LOCK2REF(_sc, _err) do { \
259 BRIDGE_LOCK_ASSERT_HELD(_sc); \
260 if ((_sc)->sc_iflist_xcnt > 0) \
261 (_err) = EBUSY; \
262 else { \
263 (_sc)->sc_iflist_ref++; \
264 (_err) = 0; \
265 } \
266 _BRIDGE_UNLOCK(_sc); \
267 } while (0)
268 #define BRIDGE_UNREF(_sc) do { \
269 _BRIDGE_LOCK(_sc); \
270 (_sc)->sc_iflist_ref--; \
271 if (((_sc)->sc_iflist_xcnt > 0) && ((_sc)->sc_iflist_ref == 0)) { \
272 _BRIDGE_UNLOCK(_sc); \
273 wakeup(&(_sc)->sc_cv); \
274 } else \
275 _BRIDGE_UNLOCK(_sc); \
276 } while (0)
277 #define BRIDGE_XLOCK(_sc) do { \
278 BRIDGE_LOCK_ASSERT_HELD(_sc); \
279 (_sc)->sc_iflist_xcnt++; \
280 while ((_sc)->sc_iflist_ref > 0) \
281 msleep(&(_sc)->sc_cv, &(_sc)->sc_mtx, PZERO, \
282 "BRIDGE_XLOCK", NULL); \
283 } while (0)
284 #define BRIDGE_XDROP(_sc) do { \
285 BRIDGE_LOCK_ASSERT_HELD(_sc); \
286 (_sc)->sc_iflist_xcnt--; \
287 } while (0)
288
289 #endif /* BRIDGE_LOCK_DEBUG */
290
291 #if NBPFILTER > 0
292 #define BRIDGE_BPF_MTAP_INPUT(sc, m) \
293 if (sc->sc_bpf_input != NULL) \
294 bridge_bpf_input(sc->sc_ifp, m, __func__, __LINE__)
295 #else /* NBPFILTER */
296 #define BRIDGE_BPF_MTAP_INPUT(ifp, m)
297 #endif /* NBPFILTER */
298
299 /*
300 * Initial size of the route hash table. Must be a power of two.
301 */
302 #ifndef BRIDGE_RTHASH_SIZE
303 #define BRIDGE_RTHASH_SIZE 16
304 #endif
305
306 /*
307 * Maximum size of the routing hash table
308 */
309 #define BRIDGE_RTHASH_SIZE_MAX 2048
310
311 #define BRIDGE_RTHASH_MASK(sc) ((sc)->sc_rthash_size - 1)
312
313 /*
314 * Maximum number of addresses to cache.
315 */
316 #ifndef BRIDGE_RTABLE_MAX
317 #define BRIDGE_RTABLE_MAX 100
318 #endif
319
320
321 /*
322 * Timeout (in seconds) for entries learned dynamically.
323 */
324 #ifndef BRIDGE_RTABLE_TIMEOUT
325 #define BRIDGE_RTABLE_TIMEOUT (20 * 60) /* same as ARP */
326 #endif
327
328 /*
329 * Number of seconds between walks of the route list.
330 */
331 #ifndef BRIDGE_RTABLE_PRUNE_PERIOD
332 #define BRIDGE_RTABLE_PRUNE_PERIOD (5 * 60)
333 #endif
334
335 /*
336 * Number of MAC NAT entries
337 * - sized based on 16 clients (including MAC NAT interface)
338 * each with 4 addresses
339 */
340 #ifndef BRIDGE_MAC_NAT_ENTRY_MAX
341 #define BRIDGE_MAC_NAT_ENTRY_MAX 64
342 #endif /* BRIDGE_MAC_NAT_ENTRY_MAX */
343
344 /*
345 * List of capabilities to possibly mask on the member interface.
346 */
347 #define BRIDGE_IFCAPS_MASK (IFCAP_TSO | IFCAP_TXCSUM)
348 /*
349 * List of capabilities to disable on the member interface.
350 */
351 #define BRIDGE_IFCAPS_STRIP IFCAP_LRO
352
353 /*
354 * Bridge interface list entry.
355 */
356 struct bridge_iflist {
357 TAILQ_ENTRY(bridge_iflist) bif_next;
358 struct ifnet *bif_ifp; /* member if */
359 struct bstp_port bif_stp; /* STP state */
360 uint32_t bif_ifflags; /* member if flags */
361 int bif_savedcaps; /* saved capabilities */
362 uint32_t bif_addrmax; /* max # of addresses */
363 uint32_t bif_addrcnt; /* cur. # of addresses */
364 uint32_t bif_addrexceeded; /* # of address violations */
365
366 interface_filter_t bif_iff_ref;
367 struct bridge_softc *bif_sc;
368 uint32_t bif_flags;
369
370 /* host filter */
371 struct in_addr bif_hf_ipsrc;
372 uint8_t bif_hf_hwsrc[ETHER_ADDR_LEN];
373
374 struct ifbrmstats bif_stats;
375 };
376
377 static inline bool
bif_ifflags_are_set(struct bridge_iflist * bif,uint32_t flags)378 bif_ifflags_are_set(struct bridge_iflist * bif, uint32_t flags)
379 {
380 return (bif->bif_ifflags & flags) == flags;
381 }
382
383 static inline bool
bif_has_checksum_offload(struct bridge_iflist * bif)384 bif_has_checksum_offload(struct bridge_iflist * bif)
385 {
386 return bif_ifflags_are_set(bif, IFBIF_CHECKSUM_OFFLOAD);
387 }
388
389 /* fake errors to make the code clearer */
390 #define _EBADIP EJUSTRETURN
391 #define _EBADIPCHECKSUM EJUSTRETURN
392 #define _EBADIPV6 EJUSTRETURN
393 #define _EBADUDP EJUSTRETURN
394 #define _EBADTCP EJUSTRETURN
395 #define _EBADUDPCHECKSUM EJUSTRETURN
396 #define _EBADTCPCHECKSUM EJUSTRETURN
397
398 #define BIFF_PROMISC 0x01 /* promiscuous mode set */
399 #define BIFF_PROTO_ATTACHED 0x02 /* protocol attached */
400 #define BIFF_FILTER_ATTACHED 0x04 /* interface filter attached */
401 #define BIFF_MEDIA_ACTIVE 0x08 /* interface media active */
402 #define BIFF_HOST_FILTER 0x10 /* host filter enabled */
403 #define BIFF_HF_HWSRC 0x20 /* host filter source MAC is set */
404 #define BIFF_HF_IPSRC 0x40 /* host filter source IP is set */
405 #define BIFF_INPUT_BROADCAST 0x80 /* send broadcast packets in */
406 #define BIFF_IN_MEMBER_LIST 0x100 /* added to the member list */
407 #if SKYWALK
408 #define BIFF_FLOWSWITCH_ATTACHED 0x1000 /* we attached the flowswitch */
409 #define BIFF_NETAGENT_REMOVED 0x2000 /* we removed the netagent */
410 #endif /* SKYWALK */
411
412 /*
413 * mac_nat_entry
414 * - translates between an IP address and MAC address on a specific
415 * bridge interface member
416 */
417 struct mac_nat_entry {
418 LIST_ENTRY(mac_nat_entry) mne_list; /* list linkage */
419 struct bridge_iflist *mne_bif; /* originating interface */
420 unsigned long mne_expire; /* expiration time */
421 union {
422 struct in_addr mneu_ip; /* originating IPv4 address */
423 struct in6_addr mneu_ip6; /* originating IPv6 address */
424 } mne_u;
425 uint8_t mne_mac[ETHER_ADDR_LEN];
426 uint8_t mne_flags;
427 uint8_t mne_reserved;
428 };
429 #define mne_ip mne_u.mneu_ip
430 #define mne_ip6 mne_u.mneu_ip6
431
432 #define MNE_FLAGS_IPV6 0x01 /* IPv6 address */
433
434 LIST_HEAD(mac_nat_entry_list, mac_nat_entry);
435
436 /*
437 * mac_nat_record
438 * - used by bridge_mac_nat_output() to convey the translation that needs
439 * to take place in bridge_mac_nat_translate
440 * - holds enough information so that the translation can be done later without
441 * holding the bridge lock
442 */
443 struct mac_nat_record {
444 uint16_t mnr_ether_type;
445 union {
446 uint16_t mnru_arp_offset;
447 struct {
448 uint16_t mnruip_dhcp_flags;
449 uint16_t mnruip_udp_csum;
450 uint8_t mnruip_header_len;
451 } mnru_ip;
452 struct {
453 uint16_t mnruip6_icmp6_len;
454 uint16_t mnruip6_lladdr_offset;
455 uint8_t mnruip6_icmp6_type;
456 uint8_t mnruip6_header_len;
457 } mnru_ip6;
458 } mnr_u;
459 };
460
461 #define mnr_arp_offset mnr_u.mnru_arp_offset
462
463 #define mnr_ip_header_len mnr_u.mnru_ip.mnruip_header_len
464 #define mnr_ip_dhcp_flags mnr_u.mnru_ip.mnruip_dhcp_flags
465 #define mnr_ip_udp_csum mnr_u.mnru_ip.mnruip_udp_csum
466
467 #define mnr_ip6_icmp6_len mnr_u.mnru_ip6.mnruip6_icmp6_len
468 #define mnr_ip6_icmp6_type mnr_u.mnru_ip6.mnruip6_icmp6_type
469 #define mnr_ip6_header_len mnr_u.mnru_ip6.mnruip6_header_len
470 #define mnr_ip6_lladdr_offset mnr_u.mnru_ip6.mnruip6_lladdr_offset
471
472 /*
473 * Bridge route node.
474 */
475 struct bridge_rtnode {
476 LIST_ENTRY(bridge_rtnode) brt_hash; /* hash table linkage */
477 LIST_ENTRY(bridge_rtnode) brt_list; /* list linkage */
478 struct bridge_iflist *brt_dst; /* destination if */
479 unsigned long brt_expire; /* expiration time */
480 uint8_t brt_flags; /* address flags */
481 uint8_t brt_addr[ETHER_ADDR_LEN];
482 uint16_t brt_vlan; /* vlan id */
483
484 };
485 #define brt_ifp brt_dst->bif_ifp
486
487 /*
488 * Bridge delayed function call context
489 */
490 typedef void (*bridge_delayed_func_t)(struct bridge_softc *);
491
492 struct bridge_delayed_call {
493 struct bridge_softc *bdc_sc;
494 bridge_delayed_func_t bdc_func; /* Function to call */
495 struct timespec bdc_ts; /* Time to call */
496 u_int32_t bdc_flags;
497 thread_call_t bdc_thread_call;
498 };
499
500 #define BDCF_OUTSTANDING 0x01 /* Delayed call has been scheduled */
501 #define BDCF_CANCELLING 0x02 /* May be waiting for call completion */
502
503 /*
504 * Software state for each bridge.
505 */
506 LIST_HEAD(_bridge_rtnode_list, bridge_rtnode);
507
508 struct bridge_softc {
509 struct ifnet *sc_ifp; /* make this an interface */
510 u_int32_t sc_flags;
511 LIST_ENTRY(bridge_softc) sc_list;
512 decl_lck_mtx_data(, sc_mtx);
513 struct _bridge_rtnode_list *sc_rthash; /* our forwarding table */
514 struct _bridge_rtnode_list sc_rtlist; /* list version of above */
515 uint32_t sc_rthash_key; /* key for hash */
516 uint32_t sc_rthash_size; /* size of the hash table */
517 struct bridge_delayed_call sc_aging_timer;
518 struct bridge_delayed_call sc_resize_call;
519 TAILQ_HEAD(, bridge_iflist) sc_spanlist; /* span ports list */
520 struct bstp_state sc_stp; /* STP state */
521 bpf_packet_func sc_bpf_input;
522 bpf_packet_func sc_bpf_output;
523 void *sc_cv;
524 uint32_t sc_brtmax; /* max # of addresses */
525 uint32_t sc_brtcnt; /* cur. # of addresses */
526 uint32_t sc_brttimeout; /* rt timeout in seconds */
527 uint32_t sc_iflist_ref; /* refcount for sc_iflist */
528 uint32_t sc_iflist_xcnt; /* refcount for sc_iflist */
529 TAILQ_HEAD(, bridge_iflist) sc_iflist; /* member interface list */
530 uint32_t sc_brtexceeded; /* # of cache drops */
531 uint32_t sc_filter_flags; /* ipf and flags */
532 struct ifnet *sc_ifaddr; /* member mac copied from */
533 u_char sc_defaddr[6]; /* Default MAC address */
534 char sc_if_xname[IFNAMSIZ];
535
536 struct bridge_iflist *sc_mac_nat_bif; /* single MAC NAT interface */
537 struct mac_nat_entry_list sc_mne_list; /* MAC NAT IPv4 */
538 struct mac_nat_entry_list sc_mne_list_v6;/* MAC NAT IPv6 */
539 uint32_t sc_mne_max; /* max # of entries */
540 uint32_t sc_mne_count; /* cur. # of entries */
541 uint32_t sc_mne_allocation_failures;
542 #if BRIDGE_LOCK_DEBUG
543 /*
544 * Locking and unlocking calling history
545 */
546 void *lock_lr[BR_LCKDBG_MAX];
547 int next_lock_lr;
548 void *unlock_lr[BR_LCKDBG_MAX];
549 int next_unlock_lr;
550 #endif /* BRIDGE_LOCK_DEBUG */
551 };
552
553 #define SCF_DETACHING 0x01
554 #define SCF_RESIZING 0x02
555 #define SCF_MEDIA_ACTIVE 0x04
556
557 typedef enum {
558 CHECKSUM_OPERATION_NONE = 0,
559 CHECKSUM_OPERATION_CLEAR_OFFLOAD = 1,
560 CHECKSUM_OPERATION_FINALIZE = 2,
561 CHECKSUM_OPERATION_COMPUTE = 3,
562 } ChecksumOperation;
563
564 union iphdr {
565 struct ip *ip;
566 struct ip6_hdr *ip6;
567 void * ptr;
568 };
569
570 typedef struct {
571 u_int ip_hlen; /* IP header length */
572 u_int ip_pay_len; /* length of payload (exclusive of ip_hlen) */
573 u_int ip_opt_len; /* IPv6 options headers length */
574 uint8_t ip_proto; /* IPPROTO_TCP, IPPROTO_UDP, etc. */
575 bool ip_is_ipv4;
576 bool ip_is_fragmented;
577 union iphdr ip_hdr; /* pointer to IP header */
578 void * ip_proto_hdr; /* ptr to protocol header (TCP) */
579 } ip_packet_info, *ip_packet_info_t;
580
581 struct bridge_hostfilter_stats bridge_hostfilter_stats;
582
583 static LCK_GRP_DECLARE(bridge_lock_grp, "if_bridge");
584 #if BRIDGE_LOCK_DEBUG
585 static LCK_ATTR_DECLARE(bridge_lock_attr, 0, 0);
586 #else
587 static LCK_ATTR_DECLARE(bridge_lock_attr, LCK_ATTR_DEBUG, 0);
588 #endif
589 static LCK_MTX_DECLARE_ATTR(bridge_list_mtx, &bridge_lock_grp, &bridge_lock_attr);
590
591 static int bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
592
593 static ZONE_DEFINE(bridge_rtnode_pool, "bridge_rtnode",
594 sizeof(struct bridge_rtnode), ZC_NONE);
595 static ZONE_DEFINE(bridge_mne_pool, "bridge_mac_nat_entry",
596 sizeof(struct mac_nat_entry), ZC_NONE);
597
598 static int bridge_clone_create(struct if_clone *, uint32_t, void *);
599 static int bridge_clone_destroy(struct ifnet *);
600
601 static errno_t bridge_ioctl(struct ifnet *, u_long, void *);
602 #if HAS_IF_CAP
603 static void bridge_mutecaps(struct bridge_softc *);
604 static void bridge_set_ifcap(struct bridge_softc *, struct bridge_iflist *,
605 int);
606 #endif
607 static errno_t bridge_set_tso(struct bridge_softc *);
608 static void bridge_proto_attach_changed(struct ifnet *);
609 static int bridge_init(struct ifnet *);
610 #if HAS_BRIDGE_DUMMYNET
611 static void bridge_dummynet(struct mbuf *, struct ifnet *);
612 #endif
613 static void bridge_ifstop(struct ifnet *, int);
614 static int bridge_output(struct ifnet *, struct mbuf *);
615 static void bridge_finalize_cksum(struct ifnet *, struct mbuf *);
616 static void bridge_start(struct ifnet *);
617 static errno_t bridge_input(struct ifnet *, mbuf_t *);
618 static errno_t bridge_iff_input(void *, ifnet_t, protocol_family_t,
619 mbuf_t *, char **);
620 static errno_t bridge_iff_output(void *, ifnet_t, protocol_family_t,
621 mbuf_t *);
622 static errno_t bridge_member_output(struct bridge_softc *sc, ifnet_t ifp,
623 mbuf_t *m);
624
625 static int bridge_enqueue(ifnet_t, struct ifnet *,
626 struct ifnet *, struct mbuf *, ChecksumOperation);
627 static void bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int);
628
629 static void bridge_forward(struct bridge_softc *, struct bridge_iflist *,
630 struct mbuf *);
631
632 static void bridge_aging_timer(struct bridge_softc *sc);
633
634 static void bridge_broadcast(struct bridge_softc *, struct bridge_iflist *,
635 struct mbuf *, int);
636 static void bridge_span(struct bridge_softc *, struct mbuf *);
637
638 static int bridge_rtupdate(struct bridge_softc *, const uint8_t *,
639 uint16_t, struct bridge_iflist *, int, uint8_t);
640 static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *,
641 uint16_t);
642 static void bridge_rttrim(struct bridge_softc *);
643 static void bridge_rtage(struct bridge_softc *);
644 static void bridge_rtflush(struct bridge_softc *, int);
645 static int bridge_rtdaddr(struct bridge_softc *, const uint8_t *,
646 uint16_t);
647
648 static int bridge_rtable_init(struct bridge_softc *);
649 static void bridge_rtable_fini(struct bridge_softc *);
650
651 static void bridge_rthash_resize(struct bridge_softc *);
652
653 static int bridge_rtnode_addr_cmp(const uint8_t *, const uint8_t *);
654 static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
655 const uint8_t *, uint16_t);
656 static int bridge_rtnode_hash(struct bridge_softc *,
657 struct bridge_rtnode *);
658 static int bridge_rtnode_insert(struct bridge_softc *,
659 struct bridge_rtnode *);
660 static void bridge_rtnode_destroy(struct bridge_softc *,
661 struct bridge_rtnode *);
662 #if BRIDGESTP
663 static void bridge_rtable_expire(struct ifnet *, int);
664 static void bridge_state_change(struct ifnet *, int);
665 #endif /* BRIDGESTP */
666
667 static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
668 const char *name);
669 static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
670 struct ifnet *ifp);
671 static void bridge_delete_member(struct bridge_softc *,
672 struct bridge_iflist *);
673 static void bridge_delete_span(struct bridge_softc *,
674 struct bridge_iflist *);
675
676 static int bridge_ioctl_add(struct bridge_softc *, void *);
677 static int bridge_ioctl_del(struct bridge_softc *, void *);
678 static int bridge_ioctl_gifflags(struct bridge_softc *, void *);
679 static int bridge_ioctl_sifflags(struct bridge_softc *, void *);
680 static int bridge_ioctl_scache(struct bridge_softc *, void *);
681 static int bridge_ioctl_gcache(struct bridge_softc *, void *);
682 static int bridge_ioctl_gifs32(struct bridge_softc *, void *);
683 static int bridge_ioctl_gifs64(struct bridge_softc *, void *);
684 static int bridge_ioctl_rts32(struct bridge_softc *, void *);
685 static int bridge_ioctl_rts64(struct bridge_softc *, void *);
686 static int bridge_ioctl_saddr32(struct bridge_softc *, void *);
687 static int bridge_ioctl_saddr64(struct bridge_softc *, void *);
688 static int bridge_ioctl_sto(struct bridge_softc *, void *);
689 static int bridge_ioctl_gto(struct bridge_softc *, void *);
690 static int bridge_ioctl_daddr32(struct bridge_softc *, void *);
691 static int bridge_ioctl_daddr64(struct bridge_softc *, void *);
692 static int bridge_ioctl_flush(struct bridge_softc *, void *);
693 static int bridge_ioctl_gpri(struct bridge_softc *, void *);
694 static int bridge_ioctl_spri(struct bridge_softc *, void *);
695 static int bridge_ioctl_ght(struct bridge_softc *, void *);
696 static int bridge_ioctl_sht(struct bridge_softc *, void *);
697 static int bridge_ioctl_gfd(struct bridge_softc *, void *);
698 static int bridge_ioctl_sfd(struct bridge_softc *, void *);
699 static int bridge_ioctl_gma(struct bridge_softc *, void *);
700 static int bridge_ioctl_sma(struct bridge_softc *, void *);
701 static int bridge_ioctl_sifprio(struct bridge_softc *, void *);
702 static int bridge_ioctl_sifcost(struct bridge_softc *, void *);
703 static int bridge_ioctl_sifmaxaddr(struct bridge_softc *, void *);
704 static int bridge_ioctl_addspan(struct bridge_softc *, void *);
705 static int bridge_ioctl_delspan(struct bridge_softc *, void *);
706 static int bridge_ioctl_gbparam32(struct bridge_softc *, void *);
707 static int bridge_ioctl_gbparam64(struct bridge_softc *, void *);
708 static int bridge_ioctl_grte(struct bridge_softc *, void *);
709 static int bridge_ioctl_gifsstp32(struct bridge_softc *, void *);
710 static int bridge_ioctl_gifsstp64(struct bridge_softc *, void *);
711 static int bridge_ioctl_sproto(struct bridge_softc *, void *);
712 static int bridge_ioctl_stxhc(struct bridge_softc *, void *);
713 static int bridge_ioctl_purge(struct bridge_softc *sc, void *);
714 static int bridge_ioctl_gfilt(struct bridge_softc *, void *);
715 static int bridge_ioctl_sfilt(struct bridge_softc *, void *);
716 static int bridge_ioctl_ghostfilter(struct bridge_softc *, void *);
717 static int bridge_ioctl_shostfilter(struct bridge_softc *, void *);
718 static int bridge_ioctl_gmnelist32(struct bridge_softc *, void *);
719 static int bridge_ioctl_gmnelist64(struct bridge_softc *, void *);
720 static int bridge_ioctl_gifstats32(struct bridge_softc *, void *);
721 static int bridge_ioctl_gifstats64(struct bridge_softc *, void *);
722
723 static int bridge_pf(struct mbuf **, struct ifnet *, uint32_t sc_filter_flags, int input);
724 static int bridge_ip_checkbasic(struct mbuf **);
725 static int bridge_ip6_checkbasic(struct mbuf **);
726
727 static errno_t bridge_set_bpf_tap(ifnet_t, bpf_tap_mode, bpf_packet_func);
728 static errno_t bridge_bpf_input(ifnet_t, struct mbuf *, const char *, int);
729 static errno_t bridge_bpf_output(ifnet_t, struct mbuf *);
730
731 static void bridge_detach(ifnet_t);
732 static void bridge_link_event(struct ifnet *, u_int32_t);
733 static void bridge_iflinkevent(struct ifnet *);
734 static u_int32_t bridge_updatelinkstatus(struct bridge_softc *);
735 static int interface_media_active(struct ifnet *);
736 static void bridge_schedule_delayed_call(struct bridge_delayed_call *);
737 static void bridge_cancel_delayed_call(struct bridge_delayed_call *);
738 static void bridge_cleanup_delayed_call(struct bridge_delayed_call *);
739 static int bridge_host_filter(struct bridge_iflist *, mbuf_t *);
740
741 static errno_t bridge_mac_nat_enable(struct bridge_softc *,
742 struct bridge_iflist *);
743 static void bridge_mac_nat_disable(struct bridge_softc *sc);
744 static void bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long);
745 static void bridge_mac_nat_populate_entries(struct bridge_softc *sc);
746 static void bridge_mac_nat_flush_entries(struct bridge_softc *sc,
747 struct bridge_iflist *);
748 static ifnet_t bridge_mac_nat_input(struct bridge_softc *, mbuf_t *,
749 boolean_t *);
750 static boolean_t bridge_mac_nat_output(struct bridge_softc *,
751 struct bridge_iflist *, mbuf_t *, struct mac_nat_record *);
752 static void bridge_mac_nat_translate(mbuf_t *, struct mac_nat_record *,
753 const caddr_t);
754 static bool is_broadcast_ip_packet(mbuf_t *);
755 static bool in_addr_is_ours(const struct in_addr);
756 static bool in6_addr_is_ours(const struct in6_addr *, uint32_t);
757
758 #define m_copypacket(m, how) m_copym(m, 0, M_COPYALL, how)
759
760 static int
761 gso_tcp(struct ifnet *ifp, struct mbuf **mp, u_int mac_hlen, bool is_ipv4,
762 boolean_t is_tx);
763
764 /* The default bridge vlan is 1 (IEEE 802.1Q-2003 Table 9-2) */
765 #define VLANTAGOF(_m) 0
766
767 u_int8_t bstp_etheraddr[ETHER_ADDR_LEN] =
768 { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
769
770 static u_int8_t ethernulladdr[ETHER_ADDR_LEN] =
771 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
772
773 #if BRIDGESTP
774 static struct bstp_cb_ops bridge_ops = {
775 .bcb_state = bridge_state_change,
776 .bcb_rtage = bridge_rtable_expire
777 };
778 #endif /* BRIDGESTP */
779
780 SYSCTL_DECL(_net_link);
781 SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
782 "Bridge");
783
784 static int bridge_inherit_mac = 0; /* share MAC with first bridge member */
785 SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac,
786 CTLFLAG_RW | CTLFLAG_LOCKED,
787 &bridge_inherit_mac, 0,
788 "Inherit MAC address from the first bridge member");
789
790 SYSCTL_INT(_net_link_bridge, OID_AUTO, rtable_prune_period,
791 CTLFLAG_RW | CTLFLAG_LOCKED,
792 &bridge_rtable_prune_period, 0,
793 "Interval between pruning of routing table");
794
795 static unsigned int bridge_rtable_hash_size_max = BRIDGE_RTHASH_SIZE_MAX;
796 SYSCTL_UINT(_net_link_bridge, OID_AUTO, rtable_hash_size_max,
797 CTLFLAG_RW | CTLFLAG_LOCKED,
798 &bridge_rtable_hash_size_max, 0,
799 "Maximum size of the routing hash table");
800
801 #if BRIDGE_DELAYED_CALLBACK_DEBUG
802 static int bridge_delayed_callback_delay = 0;
803 SYSCTL_INT(_net_link_bridge, OID_AUTO, delayed_callback_delay,
804 CTLFLAG_RW | CTLFLAG_LOCKED,
805 &bridge_delayed_callback_delay, 0,
806 "Delay before calling delayed function");
807 #endif
808
809 SYSCTL_STRUCT(_net_link_bridge, OID_AUTO,
810 hostfilterstats, CTLFLAG_RD | CTLFLAG_LOCKED,
811 &bridge_hostfilter_stats, bridge_hostfilter_stats, "");
812
813 #if BRIDGESTP
814 static int log_stp = 0; /* log STP state changes */
815 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp, CTLFLAG_RW,
816 &log_stp, 0, "Log STP state changes");
817 #endif /* BRIDGESTP */
818
819 struct bridge_control {
820 int (*bc_func)(struct bridge_softc *, void *);
821 unsigned int bc_argsize;
822 unsigned int bc_flags;
823 };
824
825 #define VMNET_TAG "com.apple.vmnet"
826 #define VMNET_LOCAL_TAG VMNET_TAG ".local"
827 #define VMNET_BROADCAST_TAG VMNET_TAG ".broadcast"
828 #define VMNET_MULTICAST_TAG VMNET_TAG ".multicast"
829
830 static u_int16_t vmnet_tag;
831 static u_int16_t vmnet_local_tag;
832 static u_int16_t vmnet_broadcast_tag;
833 static u_int16_t vmnet_multicast_tag;
834
835 static u_int16_t
allocate_pf_tag(char * name)836 allocate_pf_tag(char * name)
837 {
838 u_int16_t tag;
839
840 tag = pf_tagname2tag_ext(name);
841 BRIDGE_LOG(LOG_NOTICE, 0, "%s %d", name, tag);
842 return tag;
843 }
844
845 static void
allocate_vmnet_pf_tags(void)846 allocate_vmnet_pf_tags(void)
847 {
848 /* allocate tags to use with PF */
849 if (vmnet_tag == 0) {
850 vmnet_tag = allocate_pf_tag(VMNET_TAG);
851 }
852 if (vmnet_local_tag == 0) {
853 vmnet_local_tag = allocate_pf_tag(VMNET_LOCAL_TAG);
854 }
855 if (vmnet_broadcast_tag == 0) {
856 vmnet_broadcast_tag = allocate_pf_tag(VMNET_BROADCAST_TAG);
857 }
858 if (vmnet_multicast_tag == 0) {
859 vmnet_multicast_tag = allocate_pf_tag(VMNET_MULTICAST_TAG);
860 }
861 }
862
863 #define BC_F_COPYIN 0x01 /* copy arguments in */
864 #define BC_F_COPYOUT 0x02 /* copy arguments out */
865 #define BC_F_SUSER 0x04 /* do super-user check */
866
867 static const struct bridge_control bridge_control_table32[] = {
868 { .bc_func = bridge_ioctl_add, .bc_argsize = sizeof(struct ifbreq), /* 0 */
869 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
870 { .bc_func = bridge_ioctl_del, .bc_argsize = sizeof(struct ifbreq),
871 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
872
873 { .bc_func = bridge_ioctl_gifflags, .bc_argsize = sizeof(struct ifbreq),
874 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
875 { .bc_func = bridge_ioctl_sifflags, .bc_argsize = sizeof(struct ifbreq),
876 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
877
878 { .bc_func = bridge_ioctl_scache, .bc_argsize = sizeof(struct ifbrparam),
879 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
880 { .bc_func = bridge_ioctl_gcache, .bc_argsize = sizeof(struct ifbrparam),
881 .bc_flags = BC_F_COPYOUT },
882
883 { .bc_func = bridge_ioctl_gifs32, .bc_argsize = sizeof(struct ifbifconf32),
884 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
885 { .bc_func = bridge_ioctl_rts32, .bc_argsize = sizeof(struct ifbaconf32),
886 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
887
888 { .bc_func = bridge_ioctl_saddr32, .bc_argsize = sizeof(struct ifbareq32),
889 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
890
891 { .bc_func = bridge_ioctl_sto, .bc_argsize = sizeof(struct ifbrparam),
892 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
893 { .bc_func = bridge_ioctl_gto, .bc_argsize = sizeof(struct ifbrparam), /* 10 */
894 .bc_flags = BC_F_COPYOUT },
895
896 { .bc_func = bridge_ioctl_daddr32, .bc_argsize = sizeof(struct ifbareq32),
897 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
898
899 { .bc_func = bridge_ioctl_flush, .bc_argsize = sizeof(struct ifbreq),
900 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
901
902 { .bc_func = bridge_ioctl_gpri, .bc_argsize = sizeof(struct ifbrparam),
903 .bc_flags = BC_F_COPYOUT },
904 { .bc_func = bridge_ioctl_spri, .bc_argsize = sizeof(struct ifbrparam),
905 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
906
907 { .bc_func = bridge_ioctl_ght, .bc_argsize = sizeof(struct ifbrparam),
908 .bc_flags = BC_F_COPYOUT },
909 { .bc_func = bridge_ioctl_sht, .bc_argsize = sizeof(struct ifbrparam),
910 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
911
912 { .bc_func = bridge_ioctl_gfd, .bc_argsize = sizeof(struct ifbrparam),
913 .bc_flags = BC_F_COPYOUT },
914 { .bc_func = bridge_ioctl_sfd, .bc_argsize = sizeof(struct ifbrparam),
915 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
916
917 { .bc_func = bridge_ioctl_gma, .bc_argsize = sizeof(struct ifbrparam),
918 .bc_flags = BC_F_COPYOUT },
919 { .bc_func = bridge_ioctl_sma, .bc_argsize = sizeof(struct ifbrparam), /* 20 */
920 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
921
922 { .bc_func = bridge_ioctl_sifprio, .bc_argsize = sizeof(struct ifbreq),
923 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
924
925 { .bc_func = bridge_ioctl_sifcost, .bc_argsize = sizeof(struct ifbreq),
926 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
927
928 { .bc_func = bridge_ioctl_gfilt, .bc_argsize = sizeof(struct ifbrparam),
929 .bc_flags = BC_F_COPYOUT },
930 { .bc_func = bridge_ioctl_sfilt, .bc_argsize = sizeof(struct ifbrparam),
931 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
932
933 { .bc_func = bridge_ioctl_purge, .bc_argsize = sizeof(struct ifbreq),
934 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
935
936 { .bc_func = bridge_ioctl_addspan, .bc_argsize = sizeof(struct ifbreq),
937 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
938 { .bc_func = bridge_ioctl_delspan, .bc_argsize = sizeof(struct ifbreq),
939 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
940
941 { .bc_func = bridge_ioctl_gbparam32, .bc_argsize = sizeof(struct ifbropreq32),
942 .bc_flags = BC_F_COPYOUT },
943
944 { .bc_func = bridge_ioctl_grte, .bc_argsize = sizeof(struct ifbrparam),
945 .bc_flags = BC_F_COPYOUT },
946
947 { .bc_func = bridge_ioctl_gifsstp32, .bc_argsize = sizeof(struct ifbpstpconf32), /* 30 */
948 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
949
950 { .bc_func = bridge_ioctl_sproto, .bc_argsize = sizeof(struct ifbrparam),
951 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
952
953 { .bc_func = bridge_ioctl_stxhc, .bc_argsize = sizeof(struct ifbrparam),
954 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
955
956 { .bc_func = bridge_ioctl_sifmaxaddr, .bc_argsize = sizeof(struct ifbreq),
957 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
958
959 { .bc_func = bridge_ioctl_ghostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
960 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
961 { .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
962 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
963
964 { .bc_func = bridge_ioctl_gmnelist32,
965 .bc_argsize = sizeof(struct ifbrmnelist32),
966 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
967 { .bc_func = bridge_ioctl_gifstats32,
968 .bc_argsize = sizeof(struct ifbrmreq32),
969 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
970 };
971
972 static const struct bridge_control bridge_control_table64[] = {
973 { .bc_func = bridge_ioctl_add, .bc_argsize = sizeof(struct ifbreq), /* 0 */
974 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
975 { .bc_func = bridge_ioctl_del, .bc_argsize = sizeof(struct ifbreq),
976 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
977
978 { .bc_func = bridge_ioctl_gifflags, .bc_argsize = sizeof(struct ifbreq),
979 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
980 { .bc_func = bridge_ioctl_sifflags, .bc_argsize = sizeof(struct ifbreq),
981 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
982
983 { .bc_func = bridge_ioctl_scache, .bc_argsize = sizeof(struct ifbrparam),
984 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
985 { .bc_func = bridge_ioctl_gcache, .bc_argsize = sizeof(struct ifbrparam),
986 .bc_flags = BC_F_COPYOUT },
987
988 { .bc_func = bridge_ioctl_gifs64, .bc_argsize = sizeof(struct ifbifconf64),
989 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
990 { .bc_func = bridge_ioctl_rts64, .bc_argsize = sizeof(struct ifbaconf64),
991 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
992
993 { .bc_func = bridge_ioctl_saddr64, .bc_argsize = sizeof(struct ifbareq64),
994 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
995
996 { .bc_func = bridge_ioctl_sto, .bc_argsize = sizeof(struct ifbrparam),
997 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
998 { .bc_func = bridge_ioctl_gto, .bc_argsize = sizeof(struct ifbrparam), /* 10 */
999 .bc_flags = BC_F_COPYOUT },
1000
1001 { .bc_func = bridge_ioctl_daddr64, .bc_argsize = sizeof(struct ifbareq64),
1002 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1003
1004 { .bc_func = bridge_ioctl_flush, .bc_argsize = sizeof(struct ifbreq),
1005 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1006
1007 { .bc_func = bridge_ioctl_gpri, .bc_argsize = sizeof(struct ifbrparam),
1008 .bc_flags = BC_F_COPYOUT },
1009 { .bc_func = bridge_ioctl_spri, .bc_argsize = sizeof(struct ifbrparam),
1010 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1011
1012 { .bc_func = bridge_ioctl_ght, .bc_argsize = sizeof(struct ifbrparam),
1013 .bc_flags = BC_F_COPYOUT },
1014 { .bc_func = bridge_ioctl_sht, .bc_argsize = sizeof(struct ifbrparam),
1015 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1016
1017 { .bc_func = bridge_ioctl_gfd, .bc_argsize = sizeof(struct ifbrparam),
1018 .bc_flags = BC_F_COPYOUT },
1019 { .bc_func = bridge_ioctl_sfd, .bc_argsize = sizeof(struct ifbrparam),
1020 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1021
1022 { .bc_func = bridge_ioctl_gma, .bc_argsize = sizeof(struct ifbrparam),
1023 .bc_flags = BC_F_COPYOUT },
1024 { .bc_func = bridge_ioctl_sma, .bc_argsize = sizeof(struct ifbrparam), /* 20 */
1025 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1026
1027 { .bc_func = bridge_ioctl_sifprio, .bc_argsize = sizeof(struct ifbreq),
1028 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1029
1030 { .bc_func = bridge_ioctl_sifcost, .bc_argsize = sizeof(struct ifbreq),
1031 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1032
1033 { .bc_func = bridge_ioctl_gfilt, .bc_argsize = sizeof(struct ifbrparam),
1034 .bc_flags = BC_F_COPYOUT },
1035 { .bc_func = bridge_ioctl_sfilt, .bc_argsize = sizeof(struct ifbrparam),
1036 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1037
1038 { .bc_func = bridge_ioctl_purge, .bc_argsize = sizeof(struct ifbreq),
1039 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1040
1041 { .bc_func = bridge_ioctl_addspan, .bc_argsize = sizeof(struct ifbreq),
1042 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1043 { .bc_func = bridge_ioctl_delspan, .bc_argsize = sizeof(struct ifbreq),
1044 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1045
1046 { .bc_func = bridge_ioctl_gbparam64, .bc_argsize = sizeof(struct ifbropreq64),
1047 .bc_flags = BC_F_COPYOUT },
1048
1049 { .bc_func = bridge_ioctl_grte, .bc_argsize = sizeof(struct ifbrparam),
1050 .bc_flags = BC_F_COPYOUT },
1051
1052 { .bc_func = bridge_ioctl_gifsstp64, .bc_argsize = sizeof(struct ifbpstpconf64), /* 30 */
1053 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1054
1055 { .bc_func = bridge_ioctl_sproto, .bc_argsize = sizeof(struct ifbrparam),
1056 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1057
1058 { .bc_func = bridge_ioctl_stxhc, .bc_argsize = sizeof(struct ifbrparam),
1059 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1060
1061 { .bc_func = bridge_ioctl_sifmaxaddr, .bc_argsize = sizeof(struct ifbreq),
1062 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1063
1064 { .bc_func = bridge_ioctl_ghostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1065 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1066 { .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1067 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1068
1069 { .bc_func = bridge_ioctl_gmnelist64,
1070 .bc_argsize = sizeof(struct ifbrmnelist64),
1071 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1072 { .bc_func = bridge_ioctl_gifstats64,
1073 .bc_argsize = sizeof(struct ifbrmreq64),
1074 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1075 };
1076
1077 static const unsigned int bridge_control_table_size =
1078 sizeof(bridge_control_table32) / sizeof(bridge_control_table32[0]);
1079
1080 static LIST_HEAD(, bridge_softc) bridge_list =
1081 LIST_HEAD_INITIALIZER(bridge_list);
1082
1083 #define BRIDGENAME "bridge"
1084 #define BRIDGES_MAX IF_MAXUNIT
1085 #define BRIDGE_ZONE_MAX_ELEM MIN(IFNETS_MAX, BRIDGES_MAX)
1086
1087 static struct if_clone bridge_cloner =
1088 IF_CLONE_INITIALIZER(BRIDGENAME, bridge_clone_create, bridge_clone_destroy,
1089 0, BRIDGES_MAX, BRIDGE_ZONE_MAX_ELEM, sizeof(struct bridge_softc));
1090
1091 static int if_bridge_txstart = 0;
1092 SYSCTL_INT(_net_link_bridge, OID_AUTO, txstart, CTLFLAG_RW | CTLFLAG_LOCKED,
1093 &if_bridge_txstart, 0, "Bridge interface uses TXSTART model");
1094
1095 SYSCTL_INT(_net_link_bridge, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
1096 &if_bridge_debug, 0, "Bridge debug flags");
1097
1098 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_level,
1099 CTLFLAG_RW | CTLFLAG_LOCKED,
1100 &if_bridge_log_level, 0, "Bridge log level");
1101
1102 static int if_bridge_segmentation = 1;
1103 SYSCTL_INT(_net_link_bridge, OID_AUTO, segmentation,
1104 CTLFLAG_RW | CTLFLAG_LOCKED,
1105 &if_bridge_segmentation, 0, "Bridge interface enable segmentation");
1106
1107 static int if_bridge_vmnet_pf_tagging = 1;
1108 SYSCTL_INT(_net_link_bridge, OID_AUTO, vmnet_pf_tagging,
1109 CTLFLAG_RW | CTLFLAG_LOCKED,
1110 &if_bridge_segmentation, 0, "Bridge interface enable vmnet PF tagging");
1111
1112 #if DEBUG || DEVELOPMENT
1113 #define BRIDGE_FORCE_ONE 0x00000001
1114 #define BRIDGE_FORCE_TWO 0x00000002
1115 static u_int32_t if_bridge_force_errors = 0;
1116 SYSCTL_INT(_net_link_bridge, OID_AUTO, force_errors,
1117 CTLFLAG_RW | CTLFLAG_LOCKED,
1118 &if_bridge_force_errors, 0, "Bridge interface force errors");
1119 static inline bool
bridge_error_is_forced(u_int32_t flags)1120 bridge_error_is_forced(u_int32_t flags)
1121 {
1122 return (if_bridge_force_errors & flags) != 0;
1123 }
1124
1125 #define BRIDGE_ERROR_GET_FORCED(__is_forced, __flags) \
1126 do { \
1127 __is_forced = bridge_error_is_forced(__flags); \
1128 if (__is_forced) { \
1129 BRIDGE_LOG(LOG_NOTICE, 0, "0x%x forced", __flags); \
1130 } \
1131 } while (0)
1132 #endif /* DEBUG || DEVELOPMENT */
1133
1134
1135 static void brlog_ether_header(struct ether_header *);
1136 static void brlog_mbuf_data(mbuf_t, size_t, size_t);
1137 static void brlog_mbuf_pkthdr(mbuf_t, const char *, const char *);
1138 static void brlog_mbuf(mbuf_t, const char *, const char *);
1139 static void brlog_link(struct bridge_softc * sc);
1140
1141 #if BRIDGE_LOCK_DEBUG
1142 static void bridge_lock(struct bridge_softc *);
1143 static void bridge_unlock(struct bridge_softc *);
1144 static int bridge_lock2ref(struct bridge_softc *);
1145 static void bridge_unref(struct bridge_softc *);
1146 static void bridge_xlock(struct bridge_softc *);
1147 static void bridge_xdrop(struct bridge_softc *);
1148
1149 static void
bridge_lock(struct bridge_softc * sc)1150 bridge_lock(struct bridge_softc *sc)
1151 {
1152 void *lr_saved = __builtin_return_address(0);
1153
1154 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1155
1156 _BRIDGE_LOCK(sc);
1157
1158 sc->lock_lr[sc->next_lock_lr] = lr_saved;
1159 sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1160 }
1161
1162 static void
bridge_unlock(struct bridge_softc * sc)1163 bridge_unlock(struct bridge_softc *sc)
1164 {
1165 void *lr_saved = __builtin_return_address(0);
1166
1167 BRIDGE_LOCK_ASSERT_HELD(sc);
1168
1169 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1170 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1171
1172 _BRIDGE_UNLOCK(sc);
1173 }
1174
1175 static int
bridge_lock2ref(struct bridge_softc * sc)1176 bridge_lock2ref(struct bridge_softc *sc)
1177 {
1178 int error = 0;
1179 void *lr_saved = __builtin_return_address(0);
1180
1181 BRIDGE_LOCK_ASSERT_HELD(sc);
1182
1183 if (sc->sc_iflist_xcnt > 0) {
1184 error = EBUSY;
1185 } else {
1186 sc->sc_iflist_ref++;
1187 }
1188
1189 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1190 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1191
1192 _BRIDGE_UNLOCK(sc);
1193
1194 return error;
1195 }
1196
1197 static void
bridge_unref(struct bridge_softc * sc)1198 bridge_unref(struct bridge_softc *sc)
1199 {
1200 void *lr_saved = __builtin_return_address(0);
1201
1202 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1203
1204 _BRIDGE_LOCK(sc);
1205 sc->lock_lr[sc->next_lock_lr] = lr_saved;
1206 sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1207
1208 sc->sc_iflist_ref--;
1209
1210 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1211 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1212 if ((sc->sc_iflist_xcnt > 0) && (sc->sc_iflist_ref == 0)) {
1213 _BRIDGE_UNLOCK(sc);
1214 wakeup(&sc->sc_cv);
1215 } else {
1216 _BRIDGE_UNLOCK(sc);
1217 }
1218 }
1219
1220 static void
bridge_xlock(struct bridge_softc * sc)1221 bridge_xlock(struct bridge_softc *sc)
1222 {
1223 void *lr_saved = __builtin_return_address(0);
1224
1225 BRIDGE_LOCK_ASSERT_HELD(sc);
1226
1227 sc->sc_iflist_xcnt++;
1228 while (sc->sc_iflist_ref > 0) {
1229 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1230 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1231
1232 msleep(&sc->sc_cv, &sc->sc_mtx, PZERO, "BRIDGE_XLOCK", NULL);
1233
1234 sc->lock_lr[sc->next_lock_lr] = lr_saved;
1235 sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1236 }
1237 }
1238
1239 static void
bridge_xdrop(struct bridge_softc * sc)1240 bridge_xdrop(struct bridge_softc *sc)
1241 {
1242 BRIDGE_LOCK_ASSERT_HELD(sc);
1243
1244 sc->sc_iflist_xcnt--;
1245 }
1246
1247 #endif /* BRIDGE_LOCK_DEBUG */
1248
1249 static void
brlog_mbuf_pkthdr(mbuf_t m,const char * prefix,const char * suffix)1250 brlog_mbuf_pkthdr(mbuf_t m, const char *prefix, const char *suffix)
1251 {
1252 if (m) {
1253 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1254 "%spktlen: %u rcvif: 0x%llx header: 0x%llx nextpkt: 0x%llx%s",
1255 prefix ? prefix : "", (unsigned int)mbuf_pkthdr_len(m),
1256 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_rcvif(m)),
1257 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_header(m)),
1258 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_nextpkt(m)),
1259 suffix ? suffix : "");
1260 } else {
1261 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0, "%s<NULL>%s", prefix, suffix);
1262 }
1263 }
1264
1265 static void
brlog_mbuf(mbuf_t m,const char * prefix,const char * suffix)1266 brlog_mbuf(mbuf_t m, const char *prefix, const char *suffix)
1267 {
1268 if (m) {
1269 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1270 "%s0x%llx type: %u flags: 0x%x len: %u data: 0x%llx "
1271 "maxlen: %u datastart: 0x%llx next: 0x%llx%s",
1272 prefix ? prefix : "", (uint64_t)VM_KERNEL_ADDRPERM(m),
1273 mbuf_type(m), mbuf_flags(m), (unsigned int)mbuf_len(m),
1274 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)),
1275 (unsigned int)mbuf_maxlen(m),
1276 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
1277 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_next(m)),
1278 !suffix || (mbuf_flags(m) & MBUF_PKTHDR) ? "" : suffix);
1279 if ((mbuf_flags(m) & MBUF_PKTHDR)) {
1280 brlog_mbuf_pkthdr(m, "", suffix);
1281 }
1282 } else {
1283 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0, "%s<NULL>%s", prefix, suffix);
1284 }
1285 }
1286
1287 static void
brlog_mbuf_data(mbuf_t m,size_t offset,size_t len)1288 brlog_mbuf_data(mbuf_t m, size_t offset, size_t len)
1289 {
1290 mbuf_t n;
1291 size_t i, j;
1292 size_t pktlen, mlen, maxlen;
1293 unsigned char *ptr;
1294
1295 pktlen = mbuf_pkthdr_len(m);
1296
1297 if (offset > pktlen) {
1298 return;
1299 }
1300
1301 maxlen = (pktlen - offset > len) ? len : pktlen - offset;
1302 n = m;
1303 mlen = mbuf_len(n);
1304 ptr = mbuf_data(n);
1305 for (i = 0, j = 0; i < maxlen; i++, j++) {
1306 if (j >= mlen) {
1307 n = mbuf_next(n);
1308 if (n == 0) {
1309 break;
1310 }
1311 ptr = mbuf_data(n);
1312 mlen = mbuf_len(n);
1313 j = 0;
1314 }
1315 if (i >= offset) {
1316 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1317 "%02x%s", ptr[j], i % 2 ? " " : "");
1318 }
1319 }
1320 }
1321
1322 static void
brlog_ether_header(struct ether_header * eh)1323 brlog_ether_header(struct ether_header *eh)
1324 {
1325 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1326 "%02x:%02x:%02x:%02x:%02x:%02x > "
1327 "%02x:%02x:%02x:%02x:%02x:%02x 0x%04x ",
1328 eh->ether_shost[0], eh->ether_shost[1], eh->ether_shost[2],
1329 eh->ether_shost[3], eh->ether_shost[4], eh->ether_shost[5],
1330 eh->ether_dhost[0], eh->ether_dhost[1], eh->ether_dhost[2],
1331 eh->ether_dhost[3], eh->ether_dhost[4], eh->ether_dhost[5],
1332 ntohs(eh->ether_type));
1333 }
1334
1335 static char *
ether_ntop(char * buf,size_t len,const u_char * ap)1336 ether_ntop(char *buf, size_t len, const u_char *ap)
1337 {
1338 snprintf(buf, len, "%02x:%02x:%02x:%02x:%02x:%02x",
1339 ap[0], ap[1], ap[2], ap[3], ap[4], ap[5]);
1340
1341 return buf;
1342 }
1343
1344 static void
brlog_link(struct bridge_softc * sc)1345 brlog_link(struct bridge_softc * sc)
1346 {
1347 int i;
1348 uint32_t sdl_buffer[offsetof(struct sockaddr_dl, sdl_data) +
1349 IFNAMSIZ + ETHER_ADDR_LEN];
1350 struct sockaddr_dl *sdl = (struct sockaddr_dl *)sdl_buffer;
1351 const u_char * lladdr;
1352 char lladdr_str[48];
1353
1354 memset(sdl, 0, sizeof(sdl_buffer));
1355 sdl->sdl_family = AF_LINK;
1356 sdl->sdl_nlen = strlen(sc->sc_if_xname);
1357 sdl->sdl_alen = ETHER_ADDR_LEN;
1358 sdl->sdl_len = offsetof(struct sockaddr_dl, sdl_data);
1359 memcpy(sdl->sdl_data, sc->sc_if_xname, sdl->sdl_nlen);
1360 memcpy(LLADDR(sdl), sc->sc_defaddr, ETHER_ADDR_LEN);
1361 lladdr_str[0] = '\0';
1362 for (i = 0, lladdr = CONST_LLADDR(sdl);
1363 i < sdl->sdl_alen;
1364 i++, lladdr++) {
1365 char byte_str[4];
1366
1367 snprintf(byte_str, sizeof(byte_str), "%s%x", i ? ":" : "",
1368 *lladdr);
1369 strlcat(lladdr_str, byte_str, sizeof(lladdr_str));
1370 }
1371 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1372 "%s sdl len %d index %d family %d type 0x%x nlen %d alen %d"
1373 " slen %d addr %s", sc->sc_if_xname,
1374 sdl->sdl_len, sdl->sdl_index,
1375 sdl->sdl_family, sdl->sdl_type, sdl->sdl_nlen,
1376 sdl->sdl_alen, sdl->sdl_slen, lladdr_str);
1377 }
1378
1379
1380 /*
1381 * bridgeattach:
1382 *
1383 * Pseudo-device attach routine.
1384 */
1385 __private_extern__ int
bridgeattach(int n)1386 bridgeattach(int n)
1387 {
1388 #pragma unused(n)
1389 int error;
1390
1391 LIST_INIT(&bridge_list);
1392
1393 #if BRIDGESTP
1394 bstp_sys_init();
1395 #endif /* BRIDGESTP */
1396
1397 error = if_clone_attach(&bridge_cloner);
1398 if (error != 0) {
1399 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_clone_attach failed %d", error);
1400 }
1401 return error;
1402 }
1403
1404
1405 static errno_t
bridge_ifnet_set_attrs(struct ifnet * ifp)1406 bridge_ifnet_set_attrs(struct ifnet * ifp)
1407 {
1408 errno_t error;
1409
1410 error = ifnet_set_mtu(ifp, ETHERMTU);
1411 if (error != 0) {
1412 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_mtu failed %d", error);
1413 goto done;
1414 }
1415 error = ifnet_set_addrlen(ifp, ETHER_ADDR_LEN);
1416 if (error != 0) {
1417 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_addrlen failed %d", error);
1418 goto done;
1419 }
1420 error = ifnet_set_hdrlen(ifp, ETHER_HDR_LEN);
1421 if (error != 0) {
1422 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_hdrlen failed %d", error);
1423 goto done;
1424 }
1425 error = ifnet_set_flags(ifp,
1426 IFF_BROADCAST | IFF_SIMPLEX | IFF_NOTRAILERS | IFF_MULTICAST,
1427 0xffff);
1428
1429 if (error != 0) {
1430 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_flags failed %d", error);
1431 goto done;
1432 }
1433 done:
1434 return error;
1435 }
1436
1437 /*
1438 * bridge_clone_create:
1439 *
1440 * Create a new bridge instance.
1441 */
1442 static int
bridge_clone_create(struct if_clone * ifc,uint32_t unit,void * params)1443 bridge_clone_create(struct if_clone *ifc, uint32_t unit, void *params)
1444 {
1445 #pragma unused(params)
1446 struct ifnet *ifp = NULL;
1447 struct bridge_softc *sc = NULL;
1448 struct bridge_softc *sc2 = NULL;
1449 struct ifnet_init_eparams init_params;
1450 errno_t error = 0;
1451 uint8_t eth_hostid[ETHER_ADDR_LEN];
1452 int fb, retry, has_hostid;
1453
1454 sc = if_clone_softc_allocate(&bridge_cloner);
1455 if (sc == NULL) {
1456 error = ENOMEM;
1457 goto done;
1458 }
1459
1460 lck_mtx_init(&sc->sc_mtx, &bridge_lock_grp, &bridge_lock_attr);
1461 sc->sc_brtmax = BRIDGE_RTABLE_MAX;
1462 sc->sc_mne_max = BRIDGE_MAC_NAT_ENTRY_MAX;
1463 sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
1464 sc->sc_filter_flags = 0;
1465
1466 TAILQ_INIT(&sc->sc_iflist);
1467
1468 /* use the interface name as the unique id for ifp recycle */
1469 snprintf(sc->sc_if_xname, sizeof(sc->sc_if_xname), "%s%d",
1470 ifc->ifc_name, unit);
1471 bzero(&init_params, sizeof(init_params));
1472 init_params.ver = IFNET_INIT_CURRENT_VERSION;
1473 init_params.len = sizeof(init_params);
1474 /* Initialize our routing table. */
1475 error = bridge_rtable_init(sc);
1476 if (error != 0) {
1477 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_rtable_init failed %d", error);
1478 goto done;
1479 }
1480 TAILQ_INIT(&sc->sc_spanlist);
1481 if (if_bridge_txstart) {
1482 init_params.start = bridge_start;
1483 } else {
1484 init_params.flags = IFNET_INIT_LEGACY;
1485 init_params.output = bridge_output;
1486 }
1487 init_params.set_bpf_tap = bridge_set_bpf_tap;
1488 init_params.uniqueid = sc->sc_if_xname;
1489 init_params.uniqueid_len = strlen(sc->sc_if_xname);
1490 init_params.sndq_maxlen = IFQ_MAXLEN;
1491 init_params.name = ifc->ifc_name;
1492 init_params.unit = unit;
1493 init_params.family = IFNET_FAMILY_ETHERNET;
1494 init_params.type = IFT_BRIDGE;
1495 init_params.demux = ether_demux;
1496 init_params.add_proto = ether_add_proto;
1497 init_params.del_proto = ether_del_proto;
1498 init_params.check_multi = ether_check_multi;
1499 init_params.framer_extended = ether_frameout_extended;
1500 init_params.softc = sc;
1501 init_params.ioctl = bridge_ioctl;
1502 init_params.detach = bridge_detach;
1503 init_params.broadcast_addr = etherbroadcastaddr;
1504 init_params.broadcast_len = ETHER_ADDR_LEN;
1505
1506 error = ifnet_allocate_extended(&init_params, &ifp);
1507 if (error != 0) {
1508 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_allocate failed %d", error);
1509 goto done;
1510 }
1511 LIST_INIT(&sc->sc_mne_list);
1512 LIST_INIT(&sc->sc_mne_list_v6);
1513 sc->sc_ifp = ifp;
1514 error = bridge_ifnet_set_attrs(ifp);
1515 if (error != 0) {
1516 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_ifnet_set_attrs failed %d",
1517 error);
1518 goto done;
1519 }
1520 /*
1521 * Generate an ethernet address with a locally administered address.
1522 *
1523 * Since we are using random ethernet addresses for the bridge, it is
1524 * possible that we might have address collisions, so make sure that
1525 * this hardware address isn't already in use on another bridge.
1526 * The first try uses the "hostid" and falls back to read_frandom();
1527 * for "hostid", we use the MAC address of the first-encountered
1528 * Ethernet-type interface that is currently configured.
1529 */
1530 fb = 0;
1531 has_hostid = (uuid_get_ethernet(ð_hostid[0]) == 0);
1532 for (retry = 1; retry != 0;) {
1533 if (fb || has_hostid == 0) {
1534 read_frandom(&sc->sc_defaddr, ETHER_ADDR_LEN);
1535 sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
1536 sc->sc_defaddr[0] |= 2; /* set the LAA bit */
1537 } else {
1538 bcopy(ð_hostid[0], &sc->sc_defaddr,
1539 ETHER_ADDR_LEN);
1540 sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
1541 sc->sc_defaddr[0] |= 2; /* set the LAA bit */
1542 sc->sc_defaddr[3] = /* stir it up a bit */
1543 ((sc->sc_defaddr[3] & 0x0f) << 4) |
1544 ((sc->sc_defaddr[3] & 0xf0) >> 4);
1545 /*
1546 * Mix in the LSB as it's actually pretty significant,
1547 * see rdar://14076061
1548 */
1549 sc->sc_defaddr[4] =
1550 (((sc->sc_defaddr[4] & 0x0f) << 4) |
1551 ((sc->sc_defaddr[4] & 0xf0) >> 4)) ^
1552 sc->sc_defaddr[5];
1553 sc->sc_defaddr[5] = ifp->if_unit & 0xff;
1554 }
1555
1556 fb = 1;
1557 retry = 0;
1558 lck_mtx_lock(&bridge_list_mtx);
1559 LIST_FOREACH(sc2, &bridge_list, sc_list) {
1560 if (_ether_cmp(sc->sc_defaddr,
1561 IF_LLADDR(sc2->sc_ifp)) == 0) {
1562 retry = 1;
1563 }
1564 }
1565 lck_mtx_unlock(&bridge_list_mtx);
1566 }
1567
1568 sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
1569
1570 if (BRIDGE_DBGF_ENABLED(BR_DBGF_LIFECYCLE)) {
1571 brlog_link(sc);
1572 }
1573 error = ifnet_attach(ifp, NULL);
1574 if (error != 0) {
1575 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_attach failed %d", error);
1576 goto done;
1577 }
1578
1579 error = ifnet_set_lladdr_and_type(ifp, sc->sc_defaddr, ETHER_ADDR_LEN,
1580 IFT_ETHER);
1581 if (error != 0) {
1582 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr_and_type failed %d",
1583 error);
1584 goto done;
1585 }
1586
1587 ifnet_set_offload(ifp,
1588 IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP |
1589 IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6 | IFNET_MULTIPAGES);
1590 error = bridge_set_tso(sc);
1591 if (error != 0) {
1592 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_set_tso failed %d", error);
1593 goto done;
1594 }
1595 #if BRIDGESTP
1596 bstp_attach(&sc->sc_stp, &bridge_ops);
1597 #endif /* BRIDGESTP */
1598
1599 lck_mtx_lock(&bridge_list_mtx);
1600 LIST_INSERT_HEAD(&bridge_list, sc, sc_list);
1601 lck_mtx_unlock(&bridge_list_mtx);
1602
1603 /* attach as ethernet */
1604 error = bpf_attach(ifp, DLT_EN10MB, sizeof(struct ether_header),
1605 NULL, NULL);
1606
1607 done:
1608 if (error != 0) {
1609 BRIDGE_LOG(LOG_NOTICE, 0, "failed error %d", error);
1610 /* TBD: Clean up: sc, sc_rthash etc */
1611 }
1612
1613 return error;
1614 }
1615
1616 /*
1617 * bridge_clone_destroy:
1618 *
1619 * Destroy a bridge instance.
1620 */
1621 static int
bridge_clone_destroy(struct ifnet * ifp)1622 bridge_clone_destroy(struct ifnet *ifp)
1623 {
1624 struct bridge_softc *sc = ifp->if_softc;
1625 struct bridge_iflist *bif;
1626 errno_t error;
1627
1628 BRIDGE_LOCK(sc);
1629 if ((sc->sc_flags & SCF_DETACHING)) {
1630 BRIDGE_UNLOCK(sc);
1631 return 0;
1632 }
1633 sc->sc_flags |= SCF_DETACHING;
1634
1635 bridge_ifstop(ifp, 1);
1636
1637 bridge_cancel_delayed_call(&sc->sc_resize_call);
1638
1639 bridge_cleanup_delayed_call(&sc->sc_resize_call);
1640 bridge_cleanup_delayed_call(&sc->sc_aging_timer);
1641
1642 error = ifnet_set_flags(ifp, 0, IFF_UP);
1643 if (error != 0) {
1644 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_flags failed %d", error);
1645 }
1646
1647 while ((bif = TAILQ_FIRST(&sc->sc_iflist)) != NULL) {
1648 bridge_delete_member(sc, bif);
1649 }
1650
1651 while ((bif = TAILQ_FIRST(&sc->sc_spanlist)) != NULL) {
1652 bridge_delete_span(sc, bif);
1653 }
1654 BRIDGE_UNLOCK(sc);
1655
1656 error = ifnet_detach(ifp);
1657 if (error != 0) {
1658 panic("%s (%d): ifnet_detach(%p) failed %d",
1659 __func__, __LINE__, ifp, error);
1660 }
1661 return 0;
1662 }
1663
1664 #define DRVSPEC do { \
1665 if (ifd->ifd_cmd >= bridge_control_table_size) { \
1666 error = EINVAL; \
1667 break; \
1668 } \
1669 bc = &bridge_control_table[ifd->ifd_cmd]; \
1670 \
1671 if (cmd == SIOCGDRVSPEC && \
1672 (bc->bc_flags & BC_F_COPYOUT) == 0) { \
1673 error = EINVAL; \
1674 break; \
1675 } else if (cmd == SIOCSDRVSPEC && \
1676 (bc->bc_flags & BC_F_COPYOUT) != 0) { \
1677 error = EINVAL; \
1678 break; \
1679 } \
1680 \
1681 if (bc->bc_flags & BC_F_SUSER) { \
1682 error = kauth_authorize_generic(kauth_cred_get(), \
1683 KAUTH_GENERIC_ISSUSER); \
1684 if (error) \
1685 break; \
1686 } \
1687 \
1688 if (ifd->ifd_len != bc->bc_argsize || \
1689 ifd->ifd_len > sizeof (args)) { \
1690 error = EINVAL; \
1691 break; \
1692 } \
1693 \
1694 bzero(&args, sizeof (args)); \
1695 if (bc->bc_flags & BC_F_COPYIN) { \
1696 error = copyin(ifd->ifd_data, &args, ifd->ifd_len); \
1697 if (error) \
1698 break; \
1699 } \
1700 \
1701 BRIDGE_LOCK(sc); \
1702 error = (*bc->bc_func)(sc, &args); \
1703 BRIDGE_UNLOCK(sc); \
1704 if (error) \
1705 break; \
1706 \
1707 if (bc->bc_flags & BC_F_COPYOUT) \
1708 error = copyout(&args, ifd->ifd_data, ifd->ifd_len); \
1709 } while (0)
1710
1711 static boolean_t
interface_needs_input_broadcast(struct ifnet * ifp)1712 interface_needs_input_broadcast(struct ifnet * ifp)
1713 {
1714 /*
1715 * Selectively enable input broadcast only when necessary.
1716 * The bridge interface itself attaches a fake protocol
1717 * so checking for at least two protocols means that the
1718 * interface is being used for something besides bridging
1719 * and needs to see broadcast packets from other members.
1720 */
1721 return if_get_protolist(ifp, NULL, 0) >= 2;
1722 }
1723
1724 static boolean_t
bif_set_input_broadcast(struct bridge_iflist * bif,boolean_t input_broadcast)1725 bif_set_input_broadcast(struct bridge_iflist * bif, boolean_t input_broadcast)
1726 {
1727 boolean_t old_input_broadcast;
1728
1729 old_input_broadcast = (bif->bif_flags & BIFF_INPUT_BROADCAST) != 0;
1730 if (input_broadcast) {
1731 bif->bif_flags |= BIFF_INPUT_BROADCAST;
1732 } else {
1733 bif->bif_flags &= ~BIFF_INPUT_BROADCAST;
1734 }
1735 return old_input_broadcast != input_broadcast;
1736 }
1737
1738 /*
1739 * bridge_ioctl:
1740 *
1741 * Handle a control request from the operator.
1742 */
1743 static errno_t
bridge_ioctl(struct ifnet * ifp,u_long cmd,void * data)1744 bridge_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1745 {
1746 struct bridge_softc *sc = ifp->if_softc;
1747 struct ifreq *ifr = (struct ifreq *)data;
1748 struct bridge_iflist *bif;
1749 int error = 0;
1750
1751 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1752
1753 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_IOCTL,
1754 "ifp %s cmd 0x%08lx (%c%c [%lu] %c %lu)",
1755 ifp->if_xname, cmd, (cmd & IOC_IN) ? 'I' : ' ',
1756 (cmd & IOC_OUT) ? 'O' : ' ', IOCPARM_LEN(cmd),
1757 (char)IOCGROUP(cmd), cmd & 0xff);
1758
1759 switch (cmd) {
1760 case SIOCSIFADDR:
1761 case SIOCAIFADDR:
1762 ifnet_set_flags(ifp, IFF_UP, IFF_UP);
1763 break;
1764
1765 case SIOCGIFMEDIA32:
1766 case SIOCGIFMEDIA64: {
1767 struct ifmediareq *ifmr = (struct ifmediareq *)data;
1768 user_addr_t user_addr;
1769
1770 user_addr = (cmd == SIOCGIFMEDIA64) ?
1771 ((struct ifmediareq64 *)ifmr)->ifmu_ulist :
1772 CAST_USER_ADDR_T(((struct ifmediareq32 *)ifmr)->ifmu_ulist);
1773
1774 ifmr->ifm_status = IFM_AVALID;
1775 ifmr->ifm_mask = 0;
1776 ifmr->ifm_count = 1;
1777
1778 BRIDGE_LOCK(sc);
1779 if (!(sc->sc_flags & SCF_DETACHING) &&
1780 (sc->sc_flags & SCF_MEDIA_ACTIVE)) {
1781 ifmr->ifm_status |= IFM_ACTIVE;
1782 ifmr->ifm_active = ifmr->ifm_current =
1783 IFM_ETHER | IFM_AUTO;
1784 } else {
1785 ifmr->ifm_active = ifmr->ifm_current = IFM_NONE;
1786 }
1787 BRIDGE_UNLOCK(sc);
1788
1789 if (user_addr != USER_ADDR_NULL) {
1790 error = copyout(&ifmr->ifm_current, user_addr,
1791 sizeof(int));
1792 }
1793 break;
1794 }
1795
1796 case SIOCADDMULTI:
1797 case SIOCDELMULTI:
1798 break;
1799
1800 case SIOCSDRVSPEC32:
1801 case SIOCGDRVSPEC32: {
1802 union {
1803 struct ifbreq ifbreq;
1804 struct ifbifconf32 ifbifconf;
1805 struct ifbareq32 ifbareq;
1806 struct ifbaconf32 ifbaconf;
1807 struct ifbrparam ifbrparam;
1808 struct ifbropreq32 ifbropreq;
1809 } args;
1810 struct ifdrv32 *ifd = (struct ifdrv32 *)data;
1811 const struct bridge_control *bridge_control_table =
1812 bridge_control_table32, *bc;
1813
1814 DRVSPEC;
1815
1816 break;
1817 }
1818 case SIOCSDRVSPEC64:
1819 case SIOCGDRVSPEC64: {
1820 union {
1821 struct ifbreq ifbreq;
1822 struct ifbifconf64 ifbifconf;
1823 struct ifbareq64 ifbareq;
1824 struct ifbaconf64 ifbaconf;
1825 struct ifbrparam ifbrparam;
1826 struct ifbropreq64 ifbropreq;
1827 } args;
1828 struct ifdrv64 *ifd = (struct ifdrv64 *)data;
1829 const struct bridge_control *bridge_control_table =
1830 bridge_control_table64, *bc;
1831
1832 DRVSPEC;
1833
1834 break;
1835 }
1836
1837 case SIOCSIFFLAGS:
1838 if (!(ifp->if_flags & IFF_UP) &&
1839 (ifp->if_flags & IFF_RUNNING)) {
1840 /*
1841 * If interface is marked down and it is running,
1842 * then stop and disable it.
1843 */
1844 BRIDGE_LOCK(sc);
1845 bridge_ifstop(ifp, 1);
1846 BRIDGE_UNLOCK(sc);
1847 } else if ((ifp->if_flags & IFF_UP) &&
1848 !(ifp->if_flags & IFF_RUNNING)) {
1849 /*
1850 * If interface is marked up and it is stopped, then
1851 * start it.
1852 */
1853 BRIDGE_LOCK(sc);
1854 error = bridge_init(ifp);
1855 BRIDGE_UNLOCK(sc);
1856 }
1857 break;
1858
1859 case SIOCSIFLLADDR:
1860 error = ifnet_set_lladdr(ifp, ifr->ifr_addr.sa_data,
1861 ifr->ifr_addr.sa_len);
1862 if (error != 0) {
1863 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_IOCTL,
1864 "%s SIOCSIFLLADDR error %d", ifp->if_xname,
1865 error);
1866 }
1867 break;
1868
1869 case SIOCSIFMTU:
1870 if (ifr->ifr_mtu < 576) {
1871 error = EINVAL;
1872 break;
1873 }
1874 BRIDGE_LOCK(sc);
1875 if (TAILQ_EMPTY(&sc->sc_iflist)) {
1876 sc->sc_ifp->if_mtu = ifr->ifr_mtu;
1877 BRIDGE_UNLOCK(sc);
1878 break;
1879 }
1880 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1881 if (bif->bif_ifp->if_mtu != (unsigned)ifr->ifr_mtu) {
1882 BRIDGE_LOG(LOG_NOTICE, 0,
1883 "%s invalid MTU: %u(%s) != %d",
1884 sc->sc_ifp->if_xname,
1885 bif->bif_ifp->if_mtu,
1886 bif->bif_ifp->if_xname, ifr->ifr_mtu);
1887 error = EINVAL;
1888 break;
1889 }
1890 }
1891 if (!error) {
1892 sc->sc_ifp->if_mtu = ifr->ifr_mtu;
1893 }
1894 BRIDGE_UNLOCK(sc);
1895 break;
1896
1897 default:
1898 error = ether_ioctl(ifp, cmd, data);
1899 if (error != 0 && error != EOPNOTSUPP) {
1900 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_IOCTL,
1901 "ifp %s cmd 0x%08lx "
1902 "(%c%c [%lu] %c %lu) failed error: %d",
1903 ifp->if_xname, cmd,
1904 (cmd & IOC_IN) ? 'I' : ' ',
1905 (cmd & IOC_OUT) ? 'O' : ' ',
1906 IOCPARM_LEN(cmd), (char)IOCGROUP(cmd),
1907 cmd & 0xff, error);
1908 }
1909 break;
1910 }
1911 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1912
1913 return error;
1914 }
1915
1916 #if HAS_IF_CAP
1917 /*
1918 * bridge_mutecaps:
1919 *
1920 * Clear or restore unwanted capabilities on the member interface
1921 */
1922 static void
bridge_mutecaps(struct bridge_softc * sc)1923 bridge_mutecaps(struct bridge_softc *sc)
1924 {
1925 struct bridge_iflist *bif;
1926 int enabled, mask;
1927
1928 /* Initial bitmask of capabilities to test */
1929 mask = BRIDGE_IFCAPS_MASK;
1930
1931 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1932 /* Every member must support it or its disabled */
1933 mask &= bif->bif_savedcaps;
1934 }
1935
1936 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1937 enabled = bif->bif_ifp->if_capenable;
1938 enabled &= ~BRIDGE_IFCAPS_STRIP;
1939 /* strip off mask bits and enable them again if allowed */
1940 enabled &= ~BRIDGE_IFCAPS_MASK;
1941 enabled |= mask;
1942
1943 bridge_set_ifcap(sc, bif, enabled);
1944 }
1945 }
1946
1947 static void
bridge_set_ifcap(struct bridge_softc * sc,struct bridge_iflist * bif,int set)1948 bridge_set_ifcap(struct bridge_softc *sc, struct bridge_iflist *bif, int set)
1949 {
1950 struct ifnet *ifp = bif->bif_ifp;
1951 struct ifreq ifr;
1952 int error;
1953
1954 bzero(&ifr, sizeof(ifr));
1955 ifr.ifr_reqcap = set;
1956
1957 if (ifp->if_capenable != set) {
1958 IFF_LOCKGIANT(ifp);
1959 error = (*ifp->if_ioctl)(ifp, SIOCSIFCAP, (caddr_t)&ifr);
1960 IFF_UNLOCKGIANT(ifp);
1961 if (error) {
1962 BRIDGE_LOG(LOG_NOTICE, 0,
1963 "%s error setting interface capabilities on %s",
1964 sc->sc_ifp->if_xname, ifp->if_xname);
1965 }
1966 }
1967 }
1968 #endif /* HAS_IF_CAP */
1969
1970 static errno_t
bridge_set_tso(struct bridge_softc * sc)1971 bridge_set_tso(struct bridge_softc *sc)
1972 {
1973 struct bridge_iflist *bif;
1974 u_int32_t tso_v4_mtu;
1975 u_int32_t tso_v6_mtu;
1976 ifnet_offload_t offload;
1977 errno_t error = 0;
1978
1979 /* By default, support TSO */
1980 offload = sc->sc_ifp->if_hwassist | IFNET_TSO_IPV4 | IFNET_TSO_IPV6;
1981 tso_v4_mtu = IP_MAXPACKET;
1982 tso_v6_mtu = IP_MAXPACKET;
1983
1984 /* Use the lowest common denominator of the members */
1985 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1986 ifnet_t ifp = bif->bif_ifp;
1987
1988 if (ifp == NULL) {
1989 continue;
1990 }
1991
1992 if (offload & IFNET_TSO_IPV4) {
1993 if (ifp->if_hwassist & IFNET_TSO_IPV4) {
1994 if (tso_v4_mtu > ifp->if_tso_v4_mtu) {
1995 tso_v4_mtu = ifp->if_tso_v4_mtu;
1996 }
1997 } else {
1998 offload &= ~IFNET_TSO_IPV4;
1999 tso_v4_mtu = 0;
2000 }
2001 }
2002 if (offload & IFNET_TSO_IPV6) {
2003 if (ifp->if_hwassist & IFNET_TSO_IPV6) {
2004 if (tso_v6_mtu > ifp->if_tso_v6_mtu) {
2005 tso_v6_mtu = ifp->if_tso_v6_mtu;
2006 }
2007 } else {
2008 offload &= ~IFNET_TSO_IPV6;
2009 tso_v6_mtu = 0;
2010 }
2011 }
2012 }
2013
2014 if (offload != sc->sc_ifp->if_hwassist) {
2015 error = ifnet_set_offload(sc->sc_ifp, offload);
2016 if (error != 0) {
2017 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2018 "ifnet_set_offload(%s, 0x%x) failed %d",
2019 sc->sc_ifp->if_xname, offload, error);
2020 goto done;
2021 }
2022 /*
2023 * For ifnet_set_tso_mtu() sake, the TSO MTU must be at least
2024 * as large as the interface MTU
2025 */
2026 if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV4) {
2027 if (tso_v4_mtu < sc->sc_ifp->if_mtu) {
2028 tso_v4_mtu = sc->sc_ifp->if_mtu;
2029 }
2030 error = ifnet_set_tso_mtu(sc->sc_ifp, AF_INET,
2031 tso_v4_mtu);
2032 if (error != 0) {
2033 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2034 "ifnet_set_tso_mtu(%s, "
2035 "AF_INET, %u) failed %d",
2036 sc->sc_ifp->if_xname,
2037 tso_v4_mtu, error);
2038 goto done;
2039 }
2040 }
2041 if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV6) {
2042 if (tso_v6_mtu < sc->sc_ifp->if_mtu) {
2043 tso_v6_mtu = sc->sc_ifp->if_mtu;
2044 }
2045 error = ifnet_set_tso_mtu(sc->sc_ifp, AF_INET6,
2046 tso_v6_mtu);
2047 if (error != 0) {
2048 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2049 "ifnet_set_tso_mtu(%s, "
2050 "AF_INET6, %u) failed %d",
2051 sc->sc_ifp->if_xname,
2052 tso_v6_mtu, error);
2053 goto done;
2054 }
2055 }
2056 }
2057 done:
2058 return error;
2059 }
2060
2061 /*
2062 * bridge_lookup_member:
2063 *
2064 * Lookup a bridge member interface.
2065 */
2066 static struct bridge_iflist *
bridge_lookup_member(struct bridge_softc * sc,const char * name)2067 bridge_lookup_member(struct bridge_softc *sc, const char *name)
2068 {
2069 struct bridge_iflist *bif;
2070 struct ifnet *ifp;
2071
2072 BRIDGE_LOCK_ASSERT_HELD(sc);
2073
2074 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2075 ifp = bif->bif_ifp;
2076 if (strcmp(ifp->if_xname, name) == 0) {
2077 return bif;
2078 }
2079 }
2080
2081 return NULL;
2082 }
2083
2084 /*
2085 * bridge_lookup_member_if:
2086 *
2087 * Lookup a bridge member interface by ifnet*.
2088 */
2089 static struct bridge_iflist *
bridge_lookup_member_if(struct bridge_softc * sc,struct ifnet * member_ifp)2090 bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
2091 {
2092 struct bridge_iflist *bif;
2093
2094 BRIDGE_LOCK_ASSERT_HELD(sc);
2095
2096 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2097 if (bif->bif_ifp == member_ifp) {
2098 return bif;
2099 }
2100 }
2101
2102 return NULL;
2103 }
2104
2105 static errno_t
bridge_iff_input(void * cookie,ifnet_t ifp,protocol_family_t protocol,mbuf_t * data,char ** frame_ptr)2106 bridge_iff_input(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2107 mbuf_t *data, char **frame_ptr)
2108 {
2109 #pragma unused(protocol)
2110 errno_t error = 0;
2111 struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2112 struct bridge_softc *sc = bif->bif_sc;
2113 int included = 0;
2114 size_t frmlen = 0;
2115 mbuf_t m = *data;
2116
2117 if ((m->m_flags & M_PROTO1)) {
2118 goto out;
2119 }
2120
2121 if (*frame_ptr >= (char *)mbuf_datastart(m) &&
2122 *frame_ptr <= (char *)mbuf_data(m)) {
2123 included = 1;
2124 frmlen = (char *)mbuf_data(m) - *frame_ptr;
2125 }
2126 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2127 "%s from %s m 0x%llx data 0x%llx frame 0x%llx %s "
2128 "frmlen %lu", sc->sc_ifp->if_xname,
2129 ifp->if_xname, (uint64_t)VM_KERNEL_ADDRPERM(m),
2130 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)),
2131 (uint64_t)VM_KERNEL_ADDRPERM(*frame_ptr),
2132 included ? "inside" : "outside", frmlen);
2133 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MBUF)) {
2134 brlog_mbuf(m, "bridge_iff_input[", "");
2135 brlog_ether_header((struct ether_header *)
2136 (void *)*frame_ptr);
2137 brlog_mbuf_data(m, 0, 20);
2138 }
2139 if (included == 0) {
2140 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT, "frame_ptr outside mbuf");
2141 goto out;
2142 }
2143
2144 /* Move data pointer to start of frame to the link layer header */
2145 (void) mbuf_setdata(m, (char *)mbuf_data(m) - frmlen,
2146 mbuf_len(m) + frmlen);
2147 (void) mbuf_pkthdr_adjustlen(m, frmlen);
2148
2149 /* make sure we can access the ethernet header */
2150 if (mbuf_pkthdr_len(m) < sizeof(struct ether_header)) {
2151 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2152 "short frame %lu < %lu",
2153 mbuf_pkthdr_len(m), sizeof(struct ether_header));
2154 goto out;
2155 }
2156 if (mbuf_len(m) < sizeof(struct ether_header)) {
2157 error = mbuf_pullup(data, sizeof(struct ether_header));
2158 if (error != 0) {
2159 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2160 "mbuf_pullup(%lu) failed %d",
2161 sizeof(struct ether_header),
2162 error);
2163 error = EJUSTRETURN;
2164 goto out;
2165 }
2166 if (m != *data) {
2167 m = *data;
2168 *frame_ptr = mbuf_data(m);
2169 }
2170 }
2171
2172 error = bridge_input(ifp, data);
2173
2174 /* Adjust packet back to original */
2175 if (error == 0) {
2176 /* bridge_input might have modified *data */
2177 if (*data != m) {
2178 m = *data;
2179 *frame_ptr = mbuf_data(m);
2180 }
2181 (void) mbuf_setdata(m, (char *)mbuf_data(m) + frmlen,
2182 mbuf_len(m) - frmlen);
2183 (void) mbuf_pkthdr_adjustlen(m, -frmlen);
2184 }
2185
2186 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MBUF) &&
2187 BRIDGE_DBGF_ENABLED(BR_DBGF_INPUT)) {
2188 brlog_mbuf(m, "bridge_iff_input]", "");
2189 }
2190
2191 out:
2192 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2193
2194 return error;
2195 }
2196
2197 static errno_t
bridge_iff_output(void * cookie,ifnet_t ifp,protocol_family_t protocol,mbuf_t * data)2198 bridge_iff_output(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2199 mbuf_t *data)
2200 {
2201 #pragma unused(protocol)
2202 errno_t error = 0;
2203 struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2204 struct bridge_softc *sc = bif->bif_sc;
2205 mbuf_t m = *data;
2206
2207 if ((m->m_flags & M_PROTO1)) {
2208 goto out;
2209 }
2210 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
2211 "%s from %s m 0x%llx data 0x%llx",
2212 sc->sc_ifp->if_xname, ifp->if_xname,
2213 (uint64_t)VM_KERNEL_ADDRPERM(m),
2214 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)));
2215
2216 error = bridge_member_output(sc, ifp, data);
2217 if (error != 0 && error != EJUSTRETURN) {
2218 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_OUTPUT,
2219 "bridge_member_output failed error %d",
2220 error);
2221 }
2222 out:
2223 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2224
2225 return error;
2226 }
2227
2228 static void
bridge_iff_event(void * cookie,ifnet_t ifp,protocol_family_t protocol,const struct kev_msg * event_msg)2229 bridge_iff_event(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2230 const struct kev_msg *event_msg)
2231 {
2232 #pragma unused(protocol)
2233 struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2234 struct bridge_softc *sc = bif->bif_sc;
2235
2236 if (event_msg->vendor_code == KEV_VENDOR_APPLE &&
2237 event_msg->kev_class == KEV_NETWORK_CLASS &&
2238 event_msg->kev_subclass == KEV_DL_SUBCLASS) {
2239 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
2240 "%s event_code %u - %s",
2241 ifp->if_xname, event_msg->event_code,
2242 dlil_kev_dl_code_str(event_msg->event_code));
2243
2244 switch (event_msg->event_code) {
2245 case KEV_DL_LINK_OFF:
2246 case KEV_DL_LINK_ON: {
2247 bridge_iflinkevent(ifp);
2248 #if BRIDGESTP
2249 bstp_linkstate(ifp, event_msg->event_code);
2250 #endif /* BRIDGESTP */
2251 break;
2252 }
2253 case KEV_DL_SIFFLAGS: {
2254 if ((bif->bif_flags & BIFF_PROMISC) == 0 &&
2255 (ifp->if_flags & IFF_UP)) {
2256 errno_t error;
2257
2258 error = ifnet_set_promiscuous(ifp, 1);
2259 if (error != 0) {
2260 BRIDGE_LOG(LOG_NOTICE, 0,
2261 "ifnet_set_promiscuous (%s)"
2262 " failed %d", ifp->if_xname,
2263 error);
2264 } else {
2265 bif->bif_flags |= BIFF_PROMISC;
2266 }
2267 }
2268 break;
2269 }
2270 case KEV_DL_IFCAP_CHANGED: {
2271 BRIDGE_LOCK(sc);
2272 bridge_set_tso(sc);
2273 BRIDGE_UNLOCK(sc);
2274 break;
2275 }
2276 case KEV_DL_PROTO_DETACHED:
2277 case KEV_DL_PROTO_ATTACHED: {
2278 bridge_proto_attach_changed(ifp);
2279 break;
2280 }
2281 default:
2282 break;
2283 }
2284 }
2285 }
2286
2287 /*
2288 * bridge_iff_detached:
2289 *
2290 * Called when our interface filter has been detached from a
2291 * member interface.
2292 */
2293 static void
bridge_iff_detached(void * cookie,ifnet_t ifp)2294 bridge_iff_detached(void *cookie, ifnet_t ifp)
2295 {
2296 #pragma unused(cookie)
2297 struct bridge_iflist *bif;
2298 struct bridge_softc *sc = ifp->if_bridge;
2299
2300 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2301
2302 /* Check if the interface is a bridge member */
2303 if (sc != NULL) {
2304 BRIDGE_LOCK(sc);
2305 bif = bridge_lookup_member_if(sc, ifp);
2306 if (bif != NULL) {
2307 bridge_delete_member(sc, bif);
2308 }
2309 BRIDGE_UNLOCK(sc);
2310 return;
2311 }
2312 /* Check if the interface is a span port */
2313 lck_mtx_lock(&bridge_list_mtx);
2314 LIST_FOREACH(sc, &bridge_list, sc_list) {
2315 BRIDGE_LOCK(sc);
2316 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
2317 if (ifp == bif->bif_ifp) {
2318 bridge_delete_span(sc, bif);
2319 break;
2320 }
2321 BRIDGE_UNLOCK(sc);
2322 }
2323 lck_mtx_unlock(&bridge_list_mtx);
2324 }
2325
2326 static errno_t
bridge_proto_input(ifnet_t ifp,protocol_family_t protocol,mbuf_t packet,char * header)2327 bridge_proto_input(ifnet_t ifp, protocol_family_t protocol, mbuf_t packet,
2328 char *header)
2329 {
2330 #pragma unused(protocol, packet, header)
2331 BRIDGE_LOG(LOG_NOTICE, 0, "%s unexpected packet",
2332 ifp->if_xname);
2333 return 0;
2334 }
2335
2336 static int
bridge_attach_protocol(struct ifnet * ifp)2337 bridge_attach_protocol(struct ifnet *ifp)
2338 {
2339 int error;
2340 struct ifnet_attach_proto_param reg;
2341
2342 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2343 bzero(®, sizeof(reg));
2344 reg.input = bridge_proto_input;
2345
2346 error = ifnet_attach_protocol(ifp, PF_BRIDGE, ®);
2347 if (error) {
2348 BRIDGE_LOG(LOG_NOTICE, 0,
2349 "ifnet_attach_protocol(%s) failed, %d",
2350 ifp->if_xname, error);
2351 }
2352
2353 return error;
2354 }
2355
2356 static int
bridge_detach_protocol(struct ifnet * ifp)2357 bridge_detach_protocol(struct ifnet *ifp)
2358 {
2359 int error;
2360
2361 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2362 error = ifnet_detach_protocol(ifp, PF_BRIDGE);
2363 if (error) {
2364 BRIDGE_LOG(LOG_NOTICE, 0,
2365 "ifnet_detach_protocol(%s) failed, %d",
2366 ifp->if_xname, error);
2367 }
2368
2369 return error;
2370 }
2371
2372 /*
2373 * bridge_delete_member:
2374 *
2375 * Delete the specified member interface.
2376 */
2377 static void
bridge_delete_member(struct bridge_softc * sc,struct bridge_iflist * bif)2378 bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif)
2379 {
2380 uint32_t bif_flags;
2381 struct ifnet *ifs = bif->bif_ifp, *bifp = sc->sc_ifp;
2382 int lladdr_changed = 0, error;
2383 uint8_t eaddr[ETHER_ADDR_LEN];
2384 u_int32_t event_code = 0;
2385
2386 BRIDGE_LOCK_ASSERT_HELD(sc);
2387 VERIFY(ifs != NULL);
2388
2389 /*
2390 * Remove the member from the list first so it cannot be found anymore
2391 * when we release the bridge lock below
2392 */
2393 if ((bif->bif_flags & BIFF_IN_MEMBER_LIST) != 0) {
2394 BRIDGE_XLOCK(sc);
2395 TAILQ_REMOVE(&sc->sc_iflist, bif, bif_next);
2396 BRIDGE_XDROP(sc);
2397 }
2398 if (sc->sc_mac_nat_bif != NULL) {
2399 if (bif == sc->sc_mac_nat_bif) {
2400 bridge_mac_nat_disable(sc);
2401 } else {
2402 bridge_mac_nat_flush_entries(sc, bif);
2403 }
2404 }
2405 #if BRIDGESTP
2406 if ((bif->bif_ifflags & IFBIF_STP) != 0) {
2407 bstp_disable(&bif->bif_stp);
2408 }
2409 #endif /* BRIDGESTP */
2410
2411 /*
2412 * If removing the interface that gave the bridge its mac address, set
2413 * the mac address of the bridge to the address of the next member, or
2414 * to its default address if no members are left.
2415 */
2416 if (bridge_inherit_mac && sc->sc_ifaddr == ifs) {
2417 ifnet_release(sc->sc_ifaddr);
2418 if (TAILQ_EMPTY(&sc->sc_iflist)) {
2419 bcopy(sc->sc_defaddr, eaddr, ETHER_ADDR_LEN);
2420 sc->sc_ifaddr = NULL;
2421 } else {
2422 struct ifnet *fif =
2423 TAILQ_FIRST(&sc->sc_iflist)->bif_ifp;
2424 bcopy(IF_LLADDR(fif), eaddr, ETHER_ADDR_LEN);
2425 sc->sc_ifaddr = fif;
2426 ifnet_reference(fif); /* for sc_ifaddr */
2427 }
2428 lladdr_changed = 1;
2429 }
2430
2431 #if HAS_IF_CAP
2432 bridge_mutecaps(sc); /* recalculate now this interface is removed */
2433 #endif /* HAS_IF_CAP */
2434
2435 error = bridge_set_tso(sc);
2436 if (error != 0) {
2437 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_set_tso failed %d", error);
2438 }
2439
2440 bridge_rtdelete(sc, ifs, IFBF_FLUSHALL);
2441
2442 KASSERT(bif->bif_addrcnt == 0,
2443 ("%s: %d bridge routes referenced", __func__, bif->bif_addrcnt));
2444
2445 /*
2446 * Update link status of the bridge based on its remaining members
2447 */
2448 event_code = bridge_updatelinkstatus(sc);
2449 bif_flags = bif->bif_flags;
2450 BRIDGE_UNLOCK(sc);
2451
2452 /* only perform these steps if the interface is still attached */
2453 if (ifnet_is_attached(ifs, 1)) {
2454 #if SKYWALK
2455 if ((bif_flags & BIFF_NETAGENT_REMOVED) != 0) {
2456 ifnet_add_netagent(ifs);
2457 }
2458 if ((bif_flags & BIFF_FLOWSWITCH_ATTACHED) != 0) {
2459 ifnet_detach_flowswitch_nexus(ifs);
2460 }
2461 #endif /* SKYWALK */
2462 /* disable promiscuous mode */
2463 if ((bif_flags & BIFF_PROMISC) != 0) {
2464 (void) ifnet_set_promiscuous(ifs, 0);
2465 }
2466 #if HAS_IF_CAP
2467 /* re-enable any interface capabilities */
2468 bridge_set_ifcap(sc, bif, bif->bif_savedcaps);
2469 #endif
2470 /* detach bridge "protocol" */
2471 if ((bif_flags & BIFF_PROTO_ATTACHED) != 0) {
2472 (void)bridge_detach_protocol(ifs);
2473 }
2474 /* detach interface filter */
2475 if ((bif_flags & BIFF_FILTER_ATTACHED) != 0) {
2476 iflt_detach(bif->bif_iff_ref);
2477 }
2478 ifnet_decr_iorefcnt(ifs);
2479 }
2480
2481 if (lladdr_changed &&
2482 (error = ifnet_set_lladdr(bifp, eaddr, ETHER_ADDR_LEN)) != 0) {
2483 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr failed %d", error);
2484 }
2485
2486 if (event_code != 0) {
2487 bridge_link_event(bifp, event_code);
2488 }
2489
2490 #if BRIDGESTP
2491 bstp_destroy(&bif->bif_stp); /* prepare to free */
2492 #endif /* BRIDGESTP */
2493
2494 kfree_type(struct bridge_iflist, bif);
2495 ifs->if_bridge = NULL;
2496 ifnet_release(ifs);
2497
2498 BRIDGE_LOCK(sc);
2499 }
2500
2501 /*
2502 * bridge_delete_span:
2503 *
2504 * Delete the specified span interface.
2505 */
2506 static void
bridge_delete_span(struct bridge_softc * sc,struct bridge_iflist * bif)2507 bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif)
2508 {
2509 BRIDGE_LOCK_ASSERT_HELD(sc);
2510
2511 KASSERT(bif->bif_ifp->if_bridge == NULL,
2512 ("%s: not a span interface", __func__));
2513
2514 ifnet_release(bif->bif_ifp);
2515
2516 TAILQ_REMOVE(&sc->sc_spanlist, bif, bif_next);
2517 kfree_type(struct bridge_iflist, bif);
2518 }
2519
2520 static int
bridge_ioctl_add(struct bridge_softc * sc,void * arg)2521 bridge_ioctl_add(struct bridge_softc *sc, void *arg)
2522 {
2523 struct ifbreq *req = arg;
2524 struct bridge_iflist *bif = NULL;
2525 struct ifnet *ifs, *bifp = sc->sc_ifp;
2526 int error = 0, lladdr_changed = 0;
2527 uint8_t eaddr[ETHER_ADDR_LEN];
2528 struct iff_filter iff;
2529 u_int32_t event_code = 0;
2530 boolean_t mac_nat = FALSE;
2531 boolean_t input_broadcast;
2532
2533 ifs = ifunit(req->ifbr_ifsname);
2534 if (ifs == NULL) {
2535 return ENOENT;
2536 }
2537 if (ifs->if_ioctl == NULL) { /* must be supported */
2538 return EINVAL;
2539 }
2540
2541 if (IFNET_IS_INTCOPROC(ifs)) {
2542 return EINVAL;
2543 }
2544
2545 /* If it's in the span list, it can't be a member. */
2546 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
2547 if (ifs == bif->bif_ifp) {
2548 return EBUSY;
2549 }
2550 }
2551
2552 if (ifs->if_bridge == sc) {
2553 return EEXIST;
2554 }
2555
2556 if (ifs->if_bridge != NULL) {
2557 return EBUSY;
2558 }
2559
2560 switch (ifs->if_type) {
2561 case IFT_ETHER:
2562 if (strcmp(ifs->if_name, "en") == 0 &&
2563 ifs->if_subfamily == IFNET_SUBFAMILY_WIFI &&
2564 (ifs->if_eflags & IFEF_IPV4_ROUTER) == 0) {
2565 /* XXX is there a better way to identify Wi-Fi STA? */
2566 mac_nat = TRUE;
2567 }
2568 break;
2569 case IFT_L2VLAN:
2570 case IFT_IEEE8023ADLAG:
2571 break;
2572 case IFT_GIF:
2573 /* currently not supported */
2574 /* FALLTHRU */
2575 default:
2576 return EINVAL;
2577 }
2578
2579 /* fail to add the interface if the MTU doesn't match */
2580 if (!TAILQ_EMPTY(&sc->sc_iflist) && sc->sc_ifp->if_mtu != ifs->if_mtu) {
2581 BRIDGE_LOG(LOG_NOTICE, 0, "%s invalid MTU for %s",
2582 sc->sc_ifp->if_xname,
2583 ifs->if_xname);
2584 return EINVAL;
2585 }
2586
2587 /* there's already an interface that's doing MAC NAT */
2588 if (mac_nat && sc->sc_mac_nat_bif != NULL) {
2589 return EBUSY;
2590 }
2591
2592 /* prevent the interface from detaching while we add the member */
2593 if (!ifnet_is_attached(ifs, 1)) {
2594 return ENXIO;
2595 }
2596
2597 /* allocate a new member */
2598 bif = kalloc_type(struct bridge_iflist, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2599 bif->bif_ifp = ifs;
2600 ifnet_reference(ifs);
2601 bif->bif_ifflags |= IFBIF_LEARNING | IFBIF_DISCOVER;
2602 #if HAS_IF_CAP
2603 bif->bif_savedcaps = ifs->if_capenable;
2604 #endif /* HAS_IF_CAP */
2605 bif->bif_sc = sc;
2606 if (mac_nat) {
2607 (void)bridge_mac_nat_enable(sc, bif);
2608 }
2609
2610 if (IFNET_IS_VMNET(ifs)) {
2611 allocate_vmnet_pf_tags();
2612 }
2613 /* Allow the first Ethernet member to define the MTU */
2614 if (TAILQ_EMPTY(&sc->sc_iflist)) {
2615 sc->sc_ifp->if_mtu = ifs->if_mtu;
2616 }
2617
2618 /*
2619 * Assign the interface's MAC address to the bridge if it's the first
2620 * member and the MAC address of the bridge has not been changed from
2621 * the default (randomly) generated one.
2622 */
2623 if (bridge_inherit_mac && TAILQ_EMPTY(&sc->sc_iflist) &&
2624 _ether_cmp(IF_LLADDR(sc->sc_ifp), sc->sc_defaddr) == 0) {
2625 bcopy(IF_LLADDR(ifs), eaddr, ETHER_ADDR_LEN);
2626 sc->sc_ifaddr = ifs;
2627 ifnet_reference(ifs); /* for sc_ifaddr */
2628 lladdr_changed = 1;
2629 }
2630
2631 ifs->if_bridge = sc;
2632 #if BRIDGESTP
2633 bstp_create(&sc->sc_stp, &bif->bif_stp, bif->bif_ifp);
2634 #endif /* BRIDGESTP */
2635
2636 #if HAS_IF_CAP
2637 /* Set interface capabilities to the intersection set of all members */
2638 bridge_mutecaps(sc);
2639 #endif /* HAS_IF_CAP */
2640
2641
2642 /*
2643 * Respect lock ordering with DLIL lock for the following operations
2644 */
2645 BRIDGE_UNLOCK(sc);
2646
2647 /* enable promiscuous mode */
2648 error = ifnet_set_promiscuous(ifs, 1);
2649 switch (error) {
2650 case 0:
2651 bif->bif_flags |= BIFF_PROMISC;
2652 break;
2653 case ENETDOWN:
2654 case EPWROFF:
2655 BRIDGE_LOG(LOG_NOTICE, 0,
2656 "ifnet_set_promiscuous(%s) failed %d, ignoring",
2657 ifs->if_xname, error);
2658 /* Ignore error when device is not up */
2659 error = 0;
2660 break;
2661 default:
2662 BRIDGE_LOG(LOG_NOTICE, 0,
2663 "ifnet_set_promiscuous(%s) failed %d",
2664 ifs->if_xname, error);
2665 BRIDGE_LOCK(sc);
2666 goto out;
2667 }
2668
2669 #if SKYWALK
2670 /* ensure that the flowswitch is present for native interface */
2671 if (SKYWALK_NATIVE(ifs)) {
2672 if (ifnet_attach_flowswitch_nexus(ifs)) {
2673 bif->bif_flags |= BIFF_FLOWSWITCH_ATTACHED;
2674 }
2675 }
2676 /* remove the netagent on the flowswitch (rdar://75050182) */
2677 if (ifnet_remove_netagent(ifs)) {
2678 bif->bif_flags |= BIFF_NETAGENT_REMOVED;
2679 }
2680 #endif /* SKYWALK */
2681
2682 /*
2683 * install an interface filter
2684 */
2685 memset(&iff, 0, sizeof(struct iff_filter));
2686 iff.iff_cookie = bif;
2687 iff.iff_name = "com.apple.kernel.bsd.net.if_bridge";
2688 iff.iff_input = bridge_iff_input;
2689 iff.iff_output = bridge_iff_output;
2690 iff.iff_event = bridge_iff_event;
2691 iff.iff_detached = bridge_iff_detached;
2692 error = dlil_attach_filter(ifs, &iff, &bif->bif_iff_ref,
2693 DLIL_IFF_TSO | DLIL_IFF_INTERNAL);
2694 if (error != 0) {
2695 BRIDGE_LOG(LOG_NOTICE, 0, "iflt_attach failed %d", error);
2696 BRIDGE_LOCK(sc);
2697 goto out;
2698 }
2699 bif->bif_flags |= BIFF_FILTER_ATTACHED;
2700
2701 /*
2702 * install a dummy "bridge" protocol
2703 */
2704 if ((error = bridge_attach_protocol(ifs)) != 0) {
2705 if (error != 0) {
2706 BRIDGE_LOG(LOG_NOTICE, 0,
2707 "bridge_attach_protocol failed %d", error);
2708 BRIDGE_LOCK(sc);
2709 goto out;
2710 }
2711 }
2712 bif->bif_flags |= BIFF_PROTO_ATTACHED;
2713
2714 if (lladdr_changed &&
2715 (error = ifnet_set_lladdr(bifp, eaddr, ETHER_ADDR_LEN)) != 0) {
2716 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr failed %d", error);
2717 }
2718
2719 /*
2720 * No failures past this point. Add the member to the list.
2721 */
2722 BRIDGE_LOCK(sc);
2723 bif->bif_flags |= BIFF_IN_MEMBER_LIST;
2724 BRIDGE_XLOCK(sc);
2725 TAILQ_INSERT_TAIL(&sc->sc_iflist, bif, bif_next);
2726 BRIDGE_XDROP(sc);
2727
2728 /* cache the member link status */
2729 if (interface_media_active(ifs)) {
2730 bif->bif_flags |= BIFF_MEDIA_ACTIVE;
2731 } else {
2732 bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
2733 }
2734
2735 /* the new member may change the link status of the bridge interface */
2736 event_code = bridge_updatelinkstatus(sc);
2737
2738 /* check whether we need input broadcast or not */
2739 input_broadcast = interface_needs_input_broadcast(ifs);
2740 bif_set_input_broadcast(bif, input_broadcast);
2741 BRIDGE_UNLOCK(sc);
2742
2743 if (event_code != 0) {
2744 bridge_link_event(bifp, event_code);
2745 }
2746 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
2747 "%s input broadcast %s", ifs->if_xname,
2748 input_broadcast ? "ENABLED" : "DISABLED");
2749
2750 BRIDGE_LOCK(sc);
2751 bridge_set_tso(sc);
2752
2753 out:
2754 /* allow the interface to detach */
2755 ifnet_decr_iorefcnt(ifs);
2756
2757 if (error != 0) {
2758 if (bif != NULL) {
2759 bridge_delete_member(sc, bif);
2760 }
2761 } else if (IFNET_IS_VMNET(ifs)) {
2762 INC_ATOMIC_INT64_LIM(net_api_stats.nas_vmnet_total);
2763 }
2764
2765 return error;
2766 }
2767
2768 static int
bridge_ioctl_del(struct bridge_softc * sc,void * arg)2769 bridge_ioctl_del(struct bridge_softc *sc, void *arg)
2770 {
2771 struct ifbreq *req = arg;
2772 struct bridge_iflist *bif;
2773
2774 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
2775 if (bif == NULL) {
2776 return ENOENT;
2777 }
2778
2779 bridge_delete_member(sc, bif);
2780
2781 return 0;
2782 }
2783
2784 static int
bridge_ioctl_purge(struct bridge_softc * sc,void * arg)2785 bridge_ioctl_purge(struct bridge_softc *sc, void *arg)
2786 {
2787 #pragma unused(sc, arg)
2788 return 0;
2789 }
2790
2791 static int
bridge_ioctl_gifflags(struct bridge_softc * sc,void * arg)2792 bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
2793 {
2794 struct ifbreq *req = arg;
2795 struct bridge_iflist *bif;
2796
2797 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
2798 if (bif == NULL) {
2799 return ENOENT;
2800 }
2801
2802 struct bstp_port *bp;
2803
2804 bp = &bif->bif_stp;
2805 req->ifbr_state = bp->bp_state;
2806 req->ifbr_priority = bp->bp_priority;
2807 req->ifbr_path_cost = bp->bp_path_cost;
2808 req->ifbr_proto = bp->bp_protover;
2809 req->ifbr_role = bp->bp_role;
2810 req->ifbr_stpflags = bp->bp_flags;
2811 req->ifbr_ifsflags = bif->bif_ifflags;
2812
2813 /* Copy STP state options as flags */
2814 if (bp->bp_operedge) {
2815 req->ifbr_ifsflags |= IFBIF_BSTP_EDGE;
2816 }
2817 if (bp->bp_flags & BSTP_PORT_AUTOEDGE) {
2818 req->ifbr_ifsflags |= IFBIF_BSTP_AUTOEDGE;
2819 }
2820 if (bp->bp_ptp_link) {
2821 req->ifbr_ifsflags |= IFBIF_BSTP_PTP;
2822 }
2823 if (bp->bp_flags & BSTP_PORT_AUTOPTP) {
2824 req->ifbr_ifsflags |= IFBIF_BSTP_AUTOPTP;
2825 }
2826 if (bp->bp_flags & BSTP_PORT_ADMEDGE) {
2827 req->ifbr_ifsflags |= IFBIF_BSTP_ADMEDGE;
2828 }
2829 if (bp->bp_flags & BSTP_PORT_ADMCOST) {
2830 req->ifbr_ifsflags |= IFBIF_BSTP_ADMCOST;
2831 }
2832
2833 req->ifbr_portno = bif->bif_ifp->if_index & 0xfff;
2834 req->ifbr_addrcnt = bif->bif_addrcnt;
2835 req->ifbr_addrmax = bif->bif_addrmax;
2836 req->ifbr_addrexceeded = bif->bif_addrexceeded;
2837
2838 return 0;
2839 }
2840
2841 static int
bridge_ioctl_sifflags(struct bridge_softc * sc,void * arg)2842 bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
2843 {
2844 struct ifbreq *req = arg;
2845 struct bridge_iflist *bif;
2846 #if BRIDGESTP
2847 struct bstp_port *bp;
2848 int error;
2849 #endif /* BRIDGESTP */
2850
2851 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
2852 if (bif == NULL) {
2853 return ENOENT;
2854 }
2855
2856 if (req->ifbr_ifsflags & IFBIF_SPAN) {
2857 /* SPAN is readonly */
2858 return EINVAL;
2859 }
2860 #define _EXCLUSIVE_FLAGS (IFBIF_CHECKSUM_OFFLOAD | IFBIF_MAC_NAT)
2861 if ((req->ifbr_ifsflags & _EXCLUSIVE_FLAGS) == _EXCLUSIVE_FLAGS) {
2862 /* can't specify both MAC-NAT and checksum offload */
2863 return EINVAL;
2864 }
2865 if ((req->ifbr_ifsflags & IFBIF_MAC_NAT) != 0) {
2866 errno_t error;
2867
2868 error = bridge_mac_nat_enable(sc, bif);
2869 if (error != 0) {
2870 return error;
2871 }
2872 } else if (sc->sc_mac_nat_bif == bif) {
2873 bridge_mac_nat_disable(sc);
2874 }
2875
2876
2877 #if BRIDGESTP
2878 if (req->ifbr_ifsflags & IFBIF_STP) {
2879 if ((bif->bif_ifflags & IFBIF_STP) == 0) {
2880 error = bstp_enable(&bif->bif_stp);
2881 if (error) {
2882 return error;
2883 }
2884 }
2885 } else {
2886 if ((bif->bif_ifflags & IFBIF_STP) != 0) {
2887 bstp_disable(&bif->bif_stp);
2888 }
2889 }
2890
2891 /* Pass on STP flags */
2892 bp = &bif->bif_stp;
2893 bstp_set_edge(bp, req->ifbr_ifsflags & IFBIF_BSTP_EDGE ? 1 : 0);
2894 bstp_set_autoedge(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOEDGE ? 1 : 0);
2895 bstp_set_ptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_PTP ? 1 : 0);
2896 bstp_set_autoptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOPTP ? 1 : 0);
2897 #else /* !BRIDGESTP */
2898 if (req->ifbr_ifsflags & IFBIF_STP) {
2899 return EOPNOTSUPP;
2900 }
2901 #endif /* !BRIDGESTP */
2902
2903 /* Save the bits relating to the bridge */
2904 bif->bif_ifflags = req->ifbr_ifsflags & IFBIFMASK;
2905
2906
2907 return 0;
2908 }
2909
2910 static int
bridge_ioctl_scache(struct bridge_softc * sc,void * arg)2911 bridge_ioctl_scache(struct bridge_softc *sc, void *arg)
2912 {
2913 struct ifbrparam *param = arg;
2914
2915 sc->sc_brtmax = param->ifbrp_csize;
2916 bridge_rttrim(sc);
2917 return 0;
2918 }
2919
2920 static int
bridge_ioctl_gcache(struct bridge_softc * sc,void * arg)2921 bridge_ioctl_gcache(struct bridge_softc *sc, void *arg)
2922 {
2923 struct ifbrparam *param = arg;
2924
2925 param->ifbrp_csize = sc->sc_brtmax;
2926
2927 return 0;
2928 }
2929
2930 #define BRIDGE_IOCTL_GIFS do { \
2931 struct bridge_iflist *bif; \
2932 struct ifbreq breq; \
2933 char *buf, *outbuf; \
2934 unsigned int count, buflen, len; \
2935 \
2936 count = 0; \
2937 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) \
2938 count++; \
2939 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) \
2940 count++; \
2941 \
2942 buflen = sizeof (breq) * count; \
2943 if (bifc->ifbic_len == 0) { \
2944 bifc->ifbic_len = buflen; \
2945 return (0); \
2946 } \
2947 BRIDGE_UNLOCK(sc); \
2948 outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO); \
2949 BRIDGE_LOCK(sc); \
2950 \
2951 count = 0; \
2952 buf = outbuf; \
2953 len = min(bifc->ifbic_len, buflen); \
2954 bzero(&breq, sizeof (breq)); \
2955 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { \
2956 if (len < sizeof (breq)) \
2957 break; \
2958 \
2959 snprintf(breq.ifbr_ifsname, sizeof (breq.ifbr_ifsname), \
2960 "%s", bif->bif_ifp->if_xname); \
2961 /* Fill in the ifbreq structure */ \
2962 error = bridge_ioctl_gifflags(sc, &breq); \
2963 if (error) \
2964 break; \
2965 memcpy(buf, &breq, sizeof (breq)); \
2966 count++; \
2967 buf += sizeof (breq); \
2968 len -= sizeof (breq); \
2969 } \
2970 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) { \
2971 if (len < sizeof (breq)) \
2972 break; \
2973 \
2974 snprintf(breq.ifbr_ifsname, \
2975 sizeof (breq.ifbr_ifsname), \
2976 "%s", bif->bif_ifp->if_xname); \
2977 breq.ifbr_ifsflags = bif->bif_ifflags; \
2978 breq.ifbr_portno \
2979 = bif->bif_ifp->if_index & 0xfff; \
2980 memcpy(buf, &breq, sizeof (breq)); \
2981 count++; \
2982 buf += sizeof (breq); \
2983 len -= sizeof (breq); \
2984 } \
2985 \
2986 BRIDGE_UNLOCK(sc); \
2987 bifc->ifbic_len = sizeof (breq) * count; \
2988 error = copyout(outbuf, bifc->ifbic_req, bifc->ifbic_len); \
2989 BRIDGE_LOCK(sc); \
2990 kfree_data(outbuf, buflen); \
2991 } while (0)
2992
2993 static int
bridge_ioctl_gifs64(struct bridge_softc * sc,void * arg)2994 bridge_ioctl_gifs64(struct bridge_softc *sc, void *arg)
2995 {
2996 struct ifbifconf64 *bifc = arg;
2997 int error = 0;
2998
2999 BRIDGE_IOCTL_GIFS;
3000
3001 return error;
3002 }
3003
3004 static int
bridge_ioctl_gifs32(struct bridge_softc * sc,void * arg)3005 bridge_ioctl_gifs32(struct bridge_softc *sc, void *arg)
3006 {
3007 struct ifbifconf32 *bifc = arg;
3008 int error = 0;
3009
3010 BRIDGE_IOCTL_GIFS;
3011
3012 return error;
3013 }
3014
3015 #define BRIDGE_IOCTL_RTS do { \
3016 struct bridge_rtnode *brt; \
3017 char *buf; \
3018 char *outbuf = NULL; \
3019 unsigned int count, buflen, len; \
3020 unsigned long now; \
3021 \
3022 if (bac->ifbac_len == 0) \
3023 return (0); \
3024 \
3025 bzero(&bareq, sizeof (bareq)); \
3026 count = 0; \
3027 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) \
3028 count++; \
3029 buflen = sizeof (bareq) * count; \
3030 \
3031 BRIDGE_UNLOCK(sc); \
3032 outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO); \
3033 BRIDGE_LOCK(sc); \
3034 \
3035 count = 0; \
3036 buf = outbuf; \
3037 len = min(bac->ifbac_len, buflen); \
3038 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) { \
3039 if (len < sizeof (bareq)) \
3040 goto out; \
3041 snprintf(bareq.ifba_ifsname, sizeof (bareq.ifba_ifsname), \
3042 "%s", brt->brt_ifp->if_xname); \
3043 memcpy(bareq.ifba_dst, brt->brt_addr, sizeof (brt->brt_addr)); \
3044 bareq.ifba_vlan = brt->brt_vlan; \
3045 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) { \
3046 now = (unsigned long) net_uptime(); \
3047 if (now < brt->brt_expire) \
3048 bareq.ifba_expire = \
3049 brt->brt_expire - now; \
3050 } else \
3051 bareq.ifba_expire = 0; \
3052 bareq.ifba_flags = brt->brt_flags; \
3053 \
3054 memcpy(buf, &bareq, sizeof (bareq)); \
3055 count++; \
3056 buf += sizeof (bareq); \
3057 len -= sizeof (bareq); \
3058 } \
3059 out: \
3060 bac->ifbac_len = sizeof (bareq) * count; \
3061 if (outbuf != NULL) { \
3062 BRIDGE_UNLOCK(sc); \
3063 error = copyout(outbuf, bac->ifbac_req, bac->ifbac_len); \
3064 kfree_data(outbuf, buflen); \
3065 BRIDGE_LOCK(sc); \
3066 } \
3067 return (error); \
3068 } while (0)
3069
3070 static int
bridge_ioctl_rts64(struct bridge_softc * sc,void * arg)3071 bridge_ioctl_rts64(struct bridge_softc *sc, void *arg)
3072 {
3073 struct ifbaconf64 *bac = arg;
3074 struct ifbareq64 bareq;
3075 int error = 0;
3076
3077 BRIDGE_IOCTL_RTS;
3078 return error;
3079 }
3080
3081 static int
bridge_ioctl_rts32(struct bridge_softc * sc,void * arg)3082 bridge_ioctl_rts32(struct bridge_softc *sc, void *arg)
3083 {
3084 struct ifbaconf32 *bac = arg;
3085 struct ifbareq32 bareq;
3086 int error = 0;
3087
3088 BRIDGE_IOCTL_RTS;
3089 return error;
3090 }
3091
3092 static int
bridge_ioctl_saddr32(struct bridge_softc * sc,void * arg)3093 bridge_ioctl_saddr32(struct bridge_softc *sc, void *arg)
3094 {
3095 struct ifbareq32 *req = arg;
3096 struct bridge_iflist *bif;
3097 int error;
3098
3099 bif = bridge_lookup_member(sc, req->ifba_ifsname);
3100 if (bif == NULL) {
3101 return ENOENT;
3102 }
3103
3104 error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
3105 req->ifba_flags);
3106
3107 return error;
3108 }
3109
3110 static int
bridge_ioctl_saddr64(struct bridge_softc * sc,void * arg)3111 bridge_ioctl_saddr64(struct bridge_softc *sc, void *arg)
3112 {
3113 struct ifbareq64 *req = arg;
3114 struct bridge_iflist *bif;
3115 int error;
3116
3117 bif = bridge_lookup_member(sc, req->ifba_ifsname);
3118 if (bif == NULL) {
3119 return ENOENT;
3120 }
3121
3122 error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
3123 req->ifba_flags);
3124
3125 return error;
3126 }
3127
3128 static int
bridge_ioctl_sto(struct bridge_softc * sc,void * arg)3129 bridge_ioctl_sto(struct bridge_softc *sc, void *arg)
3130 {
3131 struct ifbrparam *param = arg;
3132
3133 sc->sc_brttimeout = param->ifbrp_ctime;
3134 return 0;
3135 }
3136
3137 static int
bridge_ioctl_gto(struct bridge_softc * sc,void * arg)3138 bridge_ioctl_gto(struct bridge_softc *sc, void *arg)
3139 {
3140 struct ifbrparam *param = arg;
3141
3142 param->ifbrp_ctime = sc->sc_brttimeout;
3143 return 0;
3144 }
3145
3146 static int
bridge_ioctl_daddr32(struct bridge_softc * sc,void * arg)3147 bridge_ioctl_daddr32(struct bridge_softc *sc, void *arg)
3148 {
3149 struct ifbareq32 *req = arg;
3150
3151 return bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan);
3152 }
3153
3154 static int
bridge_ioctl_daddr64(struct bridge_softc * sc,void * arg)3155 bridge_ioctl_daddr64(struct bridge_softc *sc, void *arg)
3156 {
3157 struct ifbareq64 *req = arg;
3158
3159 return bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan);
3160 }
3161
3162 static int
bridge_ioctl_flush(struct bridge_softc * sc,void * arg)3163 bridge_ioctl_flush(struct bridge_softc *sc, void *arg)
3164 {
3165 struct ifbreq *req = arg;
3166
3167 bridge_rtflush(sc, req->ifbr_ifsflags);
3168 return 0;
3169 }
3170
3171 static int
bridge_ioctl_gpri(struct bridge_softc * sc,void * arg)3172 bridge_ioctl_gpri(struct bridge_softc *sc, void *arg)
3173 {
3174 struct ifbrparam *param = arg;
3175 struct bstp_state *bs = &sc->sc_stp;
3176
3177 param->ifbrp_prio = bs->bs_bridge_priority;
3178 return 0;
3179 }
3180
3181 static int
bridge_ioctl_spri(struct bridge_softc * sc,void * arg)3182 bridge_ioctl_spri(struct bridge_softc *sc, void *arg)
3183 {
3184 #if BRIDGESTP
3185 struct ifbrparam *param = arg;
3186
3187 return bstp_set_priority(&sc->sc_stp, param->ifbrp_prio);
3188 #else /* !BRIDGESTP */
3189 #pragma unused(sc, arg)
3190 return EOPNOTSUPP;
3191 #endif /* !BRIDGESTP */
3192 }
3193
3194 static int
bridge_ioctl_ght(struct bridge_softc * sc,void * arg)3195 bridge_ioctl_ght(struct bridge_softc *sc, void *arg)
3196 {
3197 struct ifbrparam *param = arg;
3198 struct bstp_state *bs = &sc->sc_stp;
3199
3200 param->ifbrp_hellotime = bs->bs_bridge_htime >> 8;
3201 return 0;
3202 }
3203
3204 static int
bridge_ioctl_sht(struct bridge_softc * sc,void * arg)3205 bridge_ioctl_sht(struct bridge_softc *sc, void *arg)
3206 {
3207 #if BRIDGESTP
3208 struct ifbrparam *param = arg;
3209
3210 return bstp_set_htime(&sc->sc_stp, param->ifbrp_hellotime);
3211 #else /* !BRIDGESTP */
3212 #pragma unused(sc, arg)
3213 return EOPNOTSUPP;
3214 #endif /* !BRIDGESTP */
3215 }
3216
3217 static int
bridge_ioctl_gfd(struct bridge_softc * sc,void * arg)3218 bridge_ioctl_gfd(struct bridge_softc *sc, void *arg)
3219 {
3220 struct ifbrparam *param;
3221 struct bstp_state *bs;
3222
3223 param = arg;
3224 bs = &sc->sc_stp;
3225 param->ifbrp_fwddelay = bs->bs_bridge_fdelay >> 8;
3226 return 0;
3227 }
3228
3229 static int
bridge_ioctl_sfd(struct bridge_softc * sc,void * arg)3230 bridge_ioctl_sfd(struct bridge_softc *sc, void *arg)
3231 {
3232 #if BRIDGESTP
3233 struct ifbrparam *param = arg;
3234
3235 return bstp_set_fdelay(&sc->sc_stp, param->ifbrp_fwddelay);
3236 #else /* !BRIDGESTP */
3237 #pragma unused(sc, arg)
3238 return EOPNOTSUPP;
3239 #endif /* !BRIDGESTP */
3240 }
3241
3242 static int
bridge_ioctl_gma(struct bridge_softc * sc,void * arg)3243 bridge_ioctl_gma(struct bridge_softc *sc, void *arg)
3244 {
3245 struct ifbrparam *param;
3246 struct bstp_state *bs;
3247
3248 param = arg;
3249 bs = &sc->sc_stp;
3250 param->ifbrp_maxage = bs->bs_bridge_max_age >> 8;
3251 return 0;
3252 }
3253
3254 static int
bridge_ioctl_sma(struct bridge_softc * sc,void * arg)3255 bridge_ioctl_sma(struct bridge_softc *sc, void *arg)
3256 {
3257 #if BRIDGESTP
3258 struct ifbrparam *param = arg;
3259
3260 return bstp_set_maxage(&sc->sc_stp, param->ifbrp_maxage);
3261 #else /* !BRIDGESTP */
3262 #pragma unused(sc, arg)
3263 return EOPNOTSUPP;
3264 #endif /* !BRIDGESTP */
3265 }
3266
3267 static int
bridge_ioctl_sifprio(struct bridge_softc * sc,void * arg)3268 bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg)
3269 {
3270 #if BRIDGESTP
3271 struct ifbreq *req = arg;
3272 struct bridge_iflist *bif;
3273
3274 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3275 if (bif == NULL) {
3276 return ENOENT;
3277 }
3278
3279 return bstp_set_port_priority(&bif->bif_stp, req->ifbr_priority);
3280 #else /* !BRIDGESTP */
3281 #pragma unused(sc, arg)
3282 return EOPNOTSUPP;
3283 #endif /* !BRIDGESTP */
3284 }
3285
3286 static int
bridge_ioctl_sifcost(struct bridge_softc * sc,void * arg)3287 bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg)
3288 {
3289 #if BRIDGESTP
3290 struct ifbreq *req = arg;
3291 struct bridge_iflist *bif;
3292
3293 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3294 if (bif == NULL) {
3295 return ENOENT;
3296 }
3297
3298 return bstp_set_path_cost(&bif->bif_stp, req->ifbr_path_cost);
3299 #else /* !BRIDGESTP */
3300 #pragma unused(sc, arg)
3301 return EOPNOTSUPP;
3302 #endif /* !BRIDGESTP */
3303 }
3304
3305 static int
bridge_ioctl_gfilt(struct bridge_softc * sc,void * arg)3306 bridge_ioctl_gfilt(struct bridge_softc *sc, void *arg)
3307 {
3308 struct ifbrparam *param = arg;
3309
3310 param->ifbrp_filter = sc->sc_filter_flags;
3311
3312 return 0;
3313 }
3314
3315 static int
bridge_ioctl_sfilt(struct bridge_softc * sc,void * arg)3316 bridge_ioctl_sfilt(struct bridge_softc *sc, void *arg)
3317 {
3318 struct ifbrparam *param = arg;
3319
3320 if (param->ifbrp_filter & ~IFBF_FILT_MASK) {
3321 return EINVAL;
3322 }
3323
3324 if (param->ifbrp_filter & IFBF_FILT_USEIPF) {
3325 return EINVAL;
3326 }
3327
3328 sc->sc_filter_flags = param->ifbrp_filter;
3329
3330 return 0;
3331 }
3332
3333 static int
bridge_ioctl_sifmaxaddr(struct bridge_softc * sc,void * arg)3334 bridge_ioctl_sifmaxaddr(struct bridge_softc *sc, void *arg)
3335 {
3336 struct ifbreq *req = arg;
3337 struct bridge_iflist *bif;
3338
3339 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3340 if (bif == NULL) {
3341 return ENOENT;
3342 }
3343
3344 bif->bif_addrmax = req->ifbr_addrmax;
3345 return 0;
3346 }
3347
3348 static int
bridge_ioctl_addspan(struct bridge_softc * sc,void * arg)3349 bridge_ioctl_addspan(struct bridge_softc *sc, void *arg)
3350 {
3351 struct ifbreq *req = arg;
3352 struct bridge_iflist *bif = NULL;
3353 struct ifnet *ifs;
3354
3355 ifs = ifunit(req->ifbr_ifsname);
3356 if (ifs == NULL) {
3357 return ENOENT;
3358 }
3359
3360 if (IFNET_IS_INTCOPROC(ifs)) {
3361 return EINVAL;
3362 }
3363
3364 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
3365 if (ifs == bif->bif_ifp) {
3366 return EBUSY;
3367 }
3368
3369 if (ifs->if_bridge != NULL) {
3370 return EBUSY;
3371 }
3372
3373 switch (ifs->if_type) {
3374 case IFT_ETHER:
3375 case IFT_L2VLAN:
3376 case IFT_IEEE8023ADLAG:
3377 break;
3378 case IFT_GIF:
3379 /* currently not supported */
3380 /* FALLTHRU */
3381 default:
3382 return EINVAL;
3383 }
3384
3385 bif = kalloc_type(struct bridge_iflist, Z_WAITOK | Z_ZERO | Z_NOFAIL);
3386
3387 bif->bif_ifp = ifs;
3388 bif->bif_ifflags = IFBIF_SPAN;
3389
3390 ifnet_reference(bif->bif_ifp);
3391
3392 TAILQ_INSERT_HEAD(&sc->sc_spanlist, bif, bif_next);
3393
3394 return 0;
3395 }
3396
3397 static int
bridge_ioctl_delspan(struct bridge_softc * sc,void * arg)3398 bridge_ioctl_delspan(struct bridge_softc *sc, void *arg)
3399 {
3400 struct ifbreq *req = arg;
3401 struct bridge_iflist *bif;
3402 struct ifnet *ifs;
3403
3404 ifs = ifunit(req->ifbr_ifsname);
3405 if (ifs == NULL) {
3406 return ENOENT;
3407 }
3408
3409 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
3410 if (ifs == bif->bif_ifp) {
3411 break;
3412 }
3413
3414 if (bif == NULL) {
3415 return ENOENT;
3416 }
3417
3418 bridge_delete_span(sc, bif);
3419
3420 return 0;
3421 }
3422
3423 #define BRIDGE_IOCTL_GBPARAM do { \
3424 struct bstp_state *bs = &sc->sc_stp; \
3425 struct bstp_port *root_port; \
3426 \
3427 req->ifbop_maxage = bs->bs_bridge_max_age >> 8; \
3428 req->ifbop_hellotime = bs->bs_bridge_htime >> 8; \
3429 req->ifbop_fwddelay = bs->bs_bridge_fdelay >> 8; \
3430 \
3431 root_port = bs->bs_root_port; \
3432 if (root_port == NULL) \
3433 req->ifbop_root_port = 0; \
3434 else \
3435 req->ifbop_root_port = root_port->bp_ifp->if_index; \
3436 \
3437 req->ifbop_holdcount = bs->bs_txholdcount; \
3438 req->ifbop_priority = bs->bs_bridge_priority; \
3439 req->ifbop_protocol = bs->bs_protover; \
3440 req->ifbop_root_path_cost = bs->bs_root_pv.pv_cost; \
3441 req->ifbop_bridgeid = bs->bs_bridge_pv.pv_dbridge_id; \
3442 req->ifbop_designated_root = bs->bs_root_pv.pv_root_id; \
3443 req->ifbop_designated_bridge = bs->bs_root_pv.pv_dbridge_id; \
3444 req->ifbop_last_tc_time.tv_sec = bs->bs_last_tc_time.tv_sec; \
3445 req->ifbop_last_tc_time.tv_usec = bs->bs_last_tc_time.tv_usec; \
3446 } while (0)
3447
3448 static int
bridge_ioctl_gbparam32(struct bridge_softc * sc,void * arg)3449 bridge_ioctl_gbparam32(struct bridge_softc *sc, void *arg)
3450 {
3451 struct ifbropreq32 *req = arg;
3452
3453 BRIDGE_IOCTL_GBPARAM;
3454 return 0;
3455 }
3456
3457 static int
bridge_ioctl_gbparam64(struct bridge_softc * sc,void * arg)3458 bridge_ioctl_gbparam64(struct bridge_softc *sc, void *arg)
3459 {
3460 struct ifbropreq64 *req = arg;
3461
3462 BRIDGE_IOCTL_GBPARAM;
3463 return 0;
3464 }
3465
3466 static int
bridge_ioctl_grte(struct bridge_softc * sc,void * arg)3467 bridge_ioctl_grte(struct bridge_softc *sc, void *arg)
3468 {
3469 struct ifbrparam *param = arg;
3470
3471 param->ifbrp_cexceeded = sc->sc_brtexceeded;
3472 return 0;
3473 }
3474
3475 #define BRIDGE_IOCTL_GIFSSTP do { \
3476 struct bridge_iflist *bif; \
3477 struct bstp_port *bp; \
3478 struct ifbpstpreq bpreq; \
3479 char *buf, *outbuf; \
3480 unsigned int count, buflen, len; \
3481 \
3482 count = 0; \
3483 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { \
3484 if ((bif->bif_ifflags & IFBIF_STP) != 0) \
3485 count++; \
3486 } \
3487 \
3488 buflen = sizeof (bpreq) * count; \
3489 if (bifstp->ifbpstp_len == 0) { \
3490 bifstp->ifbpstp_len = buflen; \
3491 return (0); \
3492 } \
3493 \
3494 BRIDGE_UNLOCK(sc); \
3495 outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO); \
3496 BRIDGE_LOCK(sc); \
3497 \
3498 count = 0; \
3499 buf = outbuf; \
3500 len = min(bifstp->ifbpstp_len, buflen); \
3501 bzero(&bpreq, sizeof (bpreq)); \
3502 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { \
3503 if (len < sizeof (bpreq)) \
3504 break; \
3505 \
3506 if ((bif->bif_ifflags & IFBIF_STP) == 0) \
3507 continue; \
3508 \
3509 bp = &bif->bif_stp; \
3510 bpreq.ifbp_portno = bif->bif_ifp->if_index & 0xfff; \
3511 bpreq.ifbp_fwd_trans = bp->bp_forward_transitions; \
3512 bpreq.ifbp_design_cost = bp->bp_desg_pv.pv_cost; \
3513 bpreq.ifbp_design_port = bp->bp_desg_pv.pv_port_id; \
3514 bpreq.ifbp_design_bridge = bp->bp_desg_pv.pv_dbridge_id; \
3515 bpreq.ifbp_design_root = bp->bp_desg_pv.pv_root_id; \
3516 \
3517 memcpy(buf, &bpreq, sizeof (bpreq)); \
3518 count++; \
3519 buf += sizeof (bpreq); \
3520 len -= sizeof (bpreq); \
3521 } \
3522 \
3523 BRIDGE_UNLOCK(sc); \
3524 bifstp->ifbpstp_len = sizeof (bpreq) * count; \
3525 error = copyout(outbuf, bifstp->ifbpstp_req, bifstp->ifbpstp_len); \
3526 BRIDGE_LOCK(sc); \
3527 kfree_data(outbuf, buflen); \
3528 return (error); \
3529 } while (0)
3530
3531 static int
bridge_ioctl_gifsstp32(struct bridge_softc * sc,void * arg)3532 bridge_ioctl_gifsstp32(struct bridge_softc *sc, void *arg)
3533 {
3534 struct ifbpstpconf32 *bifstp = arg;
3535 int error = 0;
3536
3537 BRIDGE_IOCTL_GIFSSTP;
3538 return error;
3539 }
3540
3541 static int
bridge_ioctl_gifsstp64(struct bridge_softc * sc,void * arg)3542 bridge_ioctl_gifsstp64(struct bridge_softc *sc, void *arg)
3543 {
3544 struct ifbpstpconf64 *bifstp = arg;
3545 int error = 0;
3546
3547 BRIDGE_IOCTL_GIFSSTP;
3548 return error;
3549 }
3550
3551 static int
bridge_ioctl_sproto(struct bridge_softc * sc,void * arg)3552 bridge_ioctl_sproto(struct bridge_softc *sc, void *arg)
3553 {
3554 #if BRIDGESTP
3555 struct ifbrparam *param = arg;
3556
3557 return bstp_set_protocol(&sc->sc_stp, param->ifbrp_proto);
3558 #else /* !BRIDGESTP */
3559 #pragma unused(sc, arg)
3560 return EOPNOTSUPP;
3561 #endif /* !BRIDGESTP */
3562 }
3563
3564 static int
bridge_ioctl_stxhc(struct bridge_softc * sc,void * arg)3565 bridge_ioctl_stxhc(struct bridge_softc *sc, void *arg)
3566 {
3567 #if BRIDGESTP
3568 struct ifbrparam *param = arg;
3569
3570 return bstp_set_holdcount(&sc->sc_stp, param->ifbrp_txhc);
3571 #else /* !BRIDGESTP */
3572 #pragma unused(sc, arg)
3573 return EOPNOTSUPP;
3574 #endif /* !BRIDGESTP */
3575 }
3576
3577
3578 static int
bridge_ioctl_ghostfilter(struct bridge_softc * sc,void * arg)3579 bridge_ioctl_ghostfilter(struct bridge_softc *sc, void *arg)
3580 {
3581 struct ifbrhostfilter *req = arg;
3582 struct bridge_iflist *bif;
3583
3584 bif = bridge_lookup_member(sc, req->ifbrhf_ifsname);
3585 if (bif == NULL) {
3586 return ENOENT;
3587 }
3588
3589 bzero(req, sizeof(struct ifbrhostfilter));
3590 if (bif->bif_flags & BIFF_HOST_FILTER) {
3591 req->ifbrhf_flags |= IFBRHF_ENABLED;
3592 bcopy(bif->bif_hf_hwsrc, req->ifbrhf_hwsrca,
3593 ETHER_ADDR_LEN);
3594 req->ifbrhf_ipsrc = bif->bif_hf_ipsrc.s_addr;
3595 }
3596 return 0;
3597 }
3598
3599 static int
bridge_ioctl_shostfilter(struct bridge_softc * sc,void * arg)3600 bridge_ioctl_shostfilter(struct bridge_softc *sc, void *arg)
3601 {
3602 struct ifbrhostfilter *req = arg;
3603 struct bridge_iflist *bif;
3604
3605 bif = bridge_lookup_member(sc, req->ifbrhf_ifsname);
3606 if (bif == NULL) {
3607 return ENOENT;
3608 }
3609
3610 if (req->ifbrhf_flags & IFBRHF_ENABLED) {
3611 bif->bif_flags |= BIFF_HOST_FILTER;
3612
3613 if (req->ifbrhf_flags & IFBRHF_HWSRC) {
3614 bcopy(req->ifbrhf_hwsrca, bif->bif_hf_hwsrc,
3615 ETHER_ADDR_LEN);
3616 if (bcmp(req->ifbrhf_hwsrca, ethernulladdr,
3617 ETHER_ADDR_LEN) != 0) {
3618 bif->bif_flags |= BIFF_HF_HWSRC;
3619 } else {
3620 bif->bif_flags &= ~BIFF_HF_HWSRC;
3621 }
3622 }
3623 if (req->ifbrhf_flags & IFBRHF_IPSRC) {
3624 bif->bif_hf_ipsrc.s_addr = req->ifbrhf_ipsrc;
3625 if (bif->bif_hf_ipsrc.s_addr != INADDR_ANY) {
3626 bif->bif_flags |= BIFF_HF_IPSRC;
3627 } else {
3628 bif->bif_flags &= ~BIFF_HF_IPSRC;
3629 }
3630 }
3631 } else {
3632 bif->bif_flags &= ~(BIFF_HOST_FILTER | BIFF_HF_HWSRC |
3633 BIFF_HF_IPSRC);
3634 bzero(bif->bif_hf_hwsrc, ETHER_ADDR_LEN);
3635 bif->bif_hf_ipsrc.s_addr = INADDR_ANY;
3636 }
3637
3638 return 0;
3639 }
3640
3641 static char *
bridge_mac_nat_entry_out(struct mac_nat_entry_list * list,unsigned int * count_p,char * buf,unsigned int * len_p)3642 bridge_mac_nat_entry_out(struct mac_nat_entry_list * list,
3643 unsigned int * count_p, char *buf, unsigned int *len_p)
3644 {
3645 unsigned int count = *count_p;
3646 struct ifbrmne ifbmne;
3647 unsigned int len = *len_p;
3648 struct mac_nat_entry *mne;
3649 unsigned long now;
3650
3651 bzero(&ifbmne, sizeof(ifbmne));
3652 LIST_FOREACH(mne, list, mne_list) {
3653 if (len < sizeof(ifbmne)) {
3654 break;
3655 }
3656 snprintf(ifbmne.ifbmne_ifname, sizeof(ifbmne.ifbmne_ifname),
3657 "%s", mne->mne_bif->bif_ifp->if_xname);
3658 memcpy(ifbmne.ifbmne_mac, mne->mne_mac,
3659 sizeof(ifbmne.ifbmne_mac));
3660 now = (unsigned long) net_uptime();
3661 if (now < mne->mne_expire) {
3662 ifbmne.ifbmne_expire = mne->mne_expire - now;
3663 } else {
3664 ifbmne.ifbmne_expire = 0;
3665 }
3666 if ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) {
3667 ifbmne.ifbmne_af = AF_INET6;
3668 ifbmne.ifbmne_ip6_addr = mne->mne_ip6;
3669 } else {
3670 ifbmne.ifbmne_af = AF_INET;
3671 ifbmne.ifbmne_ip_addr = mne->mne_ip;
3672 }
3673 memcpy(buf, &ifbmne, sizeof(ifbmne));
3674 count++;
3675 buf += sizeof(ifbmne);
3676 len -= sizeof(ifbmne);
3677 }
3678 *count_p = count;
3679 *len_p = len;
3680 return buf;
3681 }
3682
3683 /*
3684 * bridge_ioctl_gmnelist()
3685 * Perform the get mac_nat_entry list ioctl.
3686 *
3687 * Note:
3688 * The struct ifbrmnelist32 and struct ifbrmnelist64 have the same
3689 * field size/layout except for the last field ifbml_buf, the user-supplied
3690 * buffer pointer. That is passed in separately via the 'user_addr'
3691 * parameter from the respective 32-bit or 64-bit ioctl routine.
3692 */
3693 static int
bridge_ioctl_gmnelist(struct bridge_softc * sc,struct ifbrmnelist32 * mnl,user_addr_t user_addr)3694 bridge_ioctl_gmnelist(struct bridge_softc *sc, struct ifbrmnelist32 *mnl,
3695 user_addr_t user_addr)
3696 {
3697 unsigned int count;
3698 char *buf;
3699 int error = 0;
3700 char *outbuf = NULL;
3701 struct mac_nat_entry *mne;
3702 unsigned int buflen;
3703 unsigned int len;
3704
3705 mnl->ifbml_elsize = sizeof(struct ifbrmne);
3706 count = 0;
3707 LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
3708 count++;
3709 }
3710 LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
3711 count++;
3712 }
3713 buflen = sizeof(struct ifbrmne) * count;
3714 if (buflen == 0 || mnl->ifbml_len == 0) {
3715 mnl->ifbml_len = buflen;
3716 return error;
3717 }
3718 BRIDGE_UNLOCK(sc);
3719 outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO);
3720 BRIDGE_LOCK(sc);
3721 count = 0;
3722 buf = outbuf;
3723 len = min(mnl->ifbml_len, buflen);
3724 buf = bridge_mac_nat_entry_out(&sc->sc_mne_list, &count, buf, &len);
3725 buf = bridge_mac_nat_entry_out(&sc->sc_mne_list_v6, &count, buf, &len);
3726 mnl->ifbml_len = count * sizeof(struct ifbrmne);
3727 BRIDGE_UNLOCK(sc);
3728 error = copyout(outbuf, user_addr, mnl->ifbml_len);
3729 kfree_data(outbuf, buflen);
3730 BRIDGE_LOCK(sc);
3731 return error;
3732 }
3733
3734 static int
bridge_ioctl_gmnelist64(struct bridge_softc * sc,void * arg)3735 bridge_ioctl_gmnelist64(struct bridge_softc *sc, void *arg)
3736 {
3737 struct ifbrmnelist64 *mnl = arg;
3738
3739 return bridge_ioctl_gmnelist(sc, arg, mnl->ifbml_buf);
3740 }
3741
3742 static int
bridge_ioctl_gmnelist32(struct bridge_softc * sc,void * arg)3743 bridge_ioctl_gmnelist32(struct bridge_softc *sc, void *arg)
3744 {
3745 struct ifbrmnelist32 *mnl = arg;
3746
3747 return bridge_ioctl_gmnelist(sc, arg,
3748 CAST_USER_ADDR_T(mnl->ifbml_buf));
3749 }
3750
3751 /*
3752 * bridge_ioctl_gifstats()
3753 * Return per-member stats.
3754 *
3755 * Note:
3756 * The ifbrmreq32 and ifbrmreq64 structures have the same
3757 * field size/layout except for the last field brmr_buf, the user-supplied
3758 * buffer pointer. That is passed in separately via the 'user_addr'
3759 * parameter from the respective 32-bit or 64-bit ioctl routine.
3760 */
3761 static int
bridge_ioctl_gifstats(struct bridge_softc * sc,struct ifbrmreq32 * mreq,user_addr_t user_addr)3762 bridge_ioctl_gifstats(struct bridge_softc *sc, struct ifbrmreq32 *mreq,
3763 user_addr_t user_addr)
3764 {
3765 struct bridge_iflist *bif;
3766 int error = 0;
3767 unsigned int buflen;
3768
3769 bif = bridge_lookup_member(sc, mreq->brmr_ifname);
3770 if (bif == NULL) {
3771 error = ENOENT;
3772 goto done;
3773 }
3774
3775 buflen = mreq->brmr_elsize = sizeof(struct ifbrmstats);
3776 if (buflen == 0 || mreq->brmr_len == 0) {
3777 mreq->brmr_len = buflen;
3778 goto done;
3779 }
3780 if (mreq->brmr_len != 0 && mreq->brmr_len < buflen) {
3781 error = ENOBUFS;
3782 goto done;
3783 }
3784 mreq->brmr_len = buflen;
3785 error = copyout(&bif->bif_stats, user_addr, buflen);
3786 done:
3787 return error;
3788 }
3789
3790 static int
bridge_ioctl_gifstats32(struct bridge_softc * sc,void * arg)3791 bridge_ioctl_gifstats32(struct bridge_softc *sc, void *arg)
3792 {
3793 struct ifbrmreq32 *mreq = arg;
3794
3795 return bridge_ioctl_gifstats(sc, arg, mreq->brmr_buf);
3796 }
3797
3798 static int
bridge_ioctl_gifstats64(struct bridge_softc * sc,void * arg)3799 bridge_ioctl_gifstats64(struct bridge_softc *sc, void *arg)
3800 {
3801 struct ifbrmreq64 *mreq = arg;
3802
3803 return bridge_ioctl_gifstats(sc, arg, mreq->brmr_buf);
3804 }
3805
3806 /*
3807 * bridge_proto_attach_changed
3808 *
3809 * Called when protocol attachment on the interface changes.
3810 */
3811 static void
bridge_proto_attach_changed(struct ifnet * ifp)3812 bridge_proto_attach_changed(struct ifnet *ifp)
3813 {
3814 boolean_t changed = FALSE;
3815 struct bridge_iflist *bif;
3816 boolean_t input_broadcast;
3817 struct bridge_softc *sc = ifp->if_bridge;
3818
3819 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
3820 if (sc == NULL) {
3821 return;
3822 }
3823 input_broadcast = interface_needs_input_broadcast(ifp);
3824 BRIDGE_LOCK(sc);
3825 bif = bridge_lookup_member_if(sc, ifp);
3826 if (bif != NULL) {
3827 changed = bif_set_input_broadcast(bif, input_broadcast);
3828 }
3829 BRIDGE_UNLOCK(sc);
3830 if (changed) {
3831 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
3832 "%s input broadcast %s", ifp->if_xname,
3833 input_broadcast ? "ENABLED" : "DISABLED");
3834 }
3835 return;
3836 }
3837
3838 /*
3839 * interface_media_active:
3840 *
3841 * Tells if an interface media is active.
3842 */
3843 static int
interface_media_active(struct ifnet * ifp)3844 interface_media_active(struct ifnet *ifp)
3845 {
3846 struct ifmediareq ifmr;
3847 int status = 0;
3848
3849 bzero(&ifmr, sizeof(ifmr));
3850 if (ifnet_ioctl(ifp, 0, SIOCGIFMEDIA, &ifmr) == 0) {
3851 if ((ifmr.ifm_status & IFM_AVALID) && ifmr.ifm_count > 0) {
3852 status = ifmr.ifm_status & IFM_ACTIVE ? 1 : 0;
3853 }
3854 }
3855
3856 return status;
3857 }
3858
3859 /*
3860 * bridge_updatelinkstatus:
3861 *
3862 * Update the media active status of the bridge based on the
3863 * media active status of its member.
3864 * If changed, return the corresponding onf/off link event.
3865 */
3866 static u_int32_t
bridge_updatelinkstatus(struct bridge_softc * sc)3867 bridge_updatelinkstatus(struct bridge_softc *sc)
3868 {
3869 struct bridge_iflist *bif;
3870 int active_member = 0;
3871 u_int32_t event_code = 0;
3872
3873 BRIDGE_LOCK_ASSERT_HELD(sc);
3874
3875 /*
3876 * Find out if we have an active interface
3877 */
3878 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
3879 if (bif->bif_flags & BIFF_MEDIA_ACTIVE) {
3880 active_member = 1;
3881 break;
3882 }
3883 }
3884
3885 if (active_member && !(sc->sc_flags & SCF_MEDIA_ACTIVE)) {
3886 sc->sc_flags |= SCF_MEDIA_ACTIVE;
3887 event_code = KEV_DL_LINK_ON;
3888 } else if (!active_member && (sc->sc_flags & SCF_MEDIA_ACTIVE)) {
3889 sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
3890 event_code = KEV_DL_LINK_OFF;
3891 }
3892
3893 return event_code;
3894 }
3895
3896 /*
3897 * bridge_iflinkevent:
3898 */
3899 static void
bridge_iflinkevent(struct ifnet * ifp)3900 bridge_iflinkevent(struct ifnet *ifp)
3901 {
3902 struct bridge_softc *sc = ifp->if_bridge;
3903 struct bridge_iflist *bif;
3904 u_int32_t event_code = 0;
3905 int media_active;
3906
3907 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
3908
3909 /* Check if the interface is a bridge member */
3910 if (sc == NULL) {
3911 return;
3912 }
3913
3914 media_active = interface_media_active(ifp);
3915 BRIDGE_LOCK(sc);
3916 bif = bridge_lookup_member_if(sc, ifp);
3917 if (bif != NULL) {
3918 if (media_active) {
3919 bif->bif_flags |= BIFF_MEDIA_ACTIVE;
3920 } else {
3921 bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
3922 }
3923 if (sc->sc_mac_nat_bif != NULL) {
3924 bridge_mac_nat_flush_entries(sc, bif);
3925 }
3926
3927 event_code = bridge_updatelinkstatus(sc);
3928 }
3929 BRIDGE_UNLOCK(sc);
3930
3931 if (event_code != 0) {
3932 bridge_link_event(sc->sc_ifp, event_code);
3933 }
3934 }
3935
3936 /*
3937 * bridge_delayed_callback:
3938 *
3939 * Makes a delayed call
3940 */
3941 static void
bridge_delayed_callback(void * param)3942 bridge_delayed_callback(void *param)
3943 {
3944 struct bridge_delayed_call *call = (struct bridge_delayed_call *)param;
3945 struct bridge_softc *sc = call->bdc_sc;
3946
3947 #if BRIDGE_DELAYED_CALLBACK_DEBUG
3948 if (bridge_delayed_callback_delay > 0) {
3949 struct timespec ts;
3950
3951 ts.tv_sec = bridge_delayed_callback_delay;
3952 ts.tv_nsec = 0;
3953
3954 BRIDGE_LOG(LOG_NOTICE, 0,
3955 "sleeping for %d seconds",
3956 bridge_delayed_callback_delay);
3957
3958 msleep(&bridge_delayed_callback_delay, NULL, PZERO,
3959 __func__, &ts);
3960
3961 BRIDGE_LOG(LOG_NOTICE, 0, "awoken");
3962 }
3963 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
3964
3965 BRIDGE_LOCK(sc);
3966
3967 #if BRIDGE_DELAYED_CALLBACK_DEBUG
3968 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
3969 "%s call 0x%llx flags 0x%x",
3970 sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
3971 call->bdc_flags);
3972 }
3973 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
3974
3975 if (call->bdc_flags & BDCF_CANCELLING) {
3976 wakeup(call);
3977 } else {
3978 if ((sc->sc_flags & SCF_DETACHING) == 0) {
3979 (*call->bdc_func)(sc);
3980 }
3981 }
3982 call->bdc_flags &= ~BDCF_OUTSTANDING;
3983 BRIDGE_UNLOCK(sc);
3984 }
3985
3986 /*
3987 * bridge_schedule_delayed_call:
3988 *
3989 * Schedule a function to be called on a separate thread
3990 * The actual call may be scheduled to run at a given time or ASAP.
3991 */
3992 static void
3993 bridge_schedule_delayed_call(struct bridge_delayed_call *call)
3994 {
3995 uint64_t deadline = 0;
3996 struct bridge_softc *sc = call->bdc_sc;
3997
3998 BRIDGE_LOCK_ASSERT_HELD(sc);
3999
4000 if ((sc->sc_flags & SCF_DETACHING) ||
4001 (call->bdc_flags & (BDCF_OUTSTANDING | BDCF_CANCELLING))) {
4002 return;
4003 }
4004
4005 if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) {
4006 nanoseconds_to_absolutetime(
4007 (uint64_t)call->bdc_ts.tv_sec * NSEC_PER_SEC +
4008 call->bdc_ts.tv_nsec, &deadline);
4009 clock_absolutetime_interval_to_deadline(deadline, &deadline);
4010 }
4011
4012 call->bdc_flags = BDCF_OUTSTANDING;
4013
4014 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4015 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4016 "%s call 0x%llx flags 0x%x",
4017 sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4018 call->bdc_flags);
4019 }
4020 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4021
4022 if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) {
4023 thread_call_func_delayed(
4024 (thread_call_func_t)bridge_delayed_callback,
4025 call, deadline);
4026 } else {
4027 if (call->bdc_thread_call == NULL) {
4028 call->bdc_thread_call = thread_call_allocate(
4029 (thread_call_func_t)bridge_delayed_callback,
4030 call);
4031 }
4032 thread_call_enter(call->bdc_thread_call);
4033 }
4034 }
4035
4036 /*
4037 * bridge_cancel_delayed_call:
4038 *
4039 * Cancel a queued or running delayed call.
4040 * If call is running, does not return until the call is done to
4041 * prevent race condition with the brigde interface getting destroyed
4042 */
4043 static void
4044 bridge_cancel_delayed_call(struct bridge_delayed_call *call)
4045 {
4046 boolean_t result;
4047 struct bridge_softc *sc = call->bdc_sc;
4048
4049 /*
4050 * The call was never scheduled
4051 */
4052 if (sc == NULL) {
4053 return;
4054 }
4055
4056 BRIDGE_LOCK_ASSERT_HELD(sc);
4057
4058 call->bdc_flags |= BDCF_CANCELLING;
4059
4060 while (call->bdc_flags & BDCF_OUTSTANDING) {
4061 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4062 "%s call 0x%llx flags 0x%x",
4063 sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4064 call->bdc_flags);
4065 result = thread_call_func_cancel(
4066 (thread_call_func_t)bridge_delayed_callback, call, FALSE);
4067
4068 if (result) {
4069 /*
4070 * We managed to dequeue the delayed call
4071 */
4072 call->bdc_flags &= ~BDCF_OUTSTANDING;
4073 } else {
4074 /*
4075 * Wait for delayed call do be done running
4076 */
4077 msleep(call, &sc->sc_mtx, PZERO, __func__, NULL);
4078 }
4079 }
4080 call->bdc_flags &= ~BDCF_CANCELLING;
4081 }
4082
4083 /*
4084 * bridge_cleanup_delayed_call:
4085 *
4086 * Dispose resource allocated for a delayed call
4087 * Assume the delayed call is not queued or running .
4088 */
4089 static void
4090 bridge_cleanup_delayed_call(struct bridge_delayed_call *call)
4091 {
4092 boolean_t result;
4093 struct bridge_softc *sc = call->bdc_sc;
4094
4095 /*
4096 * The call was never scheduled
4097 */
4098 if (sc == NULL) {
4099 return;
4100 }
4101
4102 BRIDGE_LOCK_ASSERT_HELD(sc);
4103
4104 VERIFY((call->bdc_flags & BDCF_OUTSTANDING) == 0);
4105 VERIFY((call->bdc_flags & BDCF_CANCELLING) == 0);
4106
4107 if (call->bdc_thread_call != NULL) {
4108 result = thread_call_free(call->bdc_thread_call);
4109 if (result == FALSE) {
4110 panic("%s thread_call_free() failed for call %p",
4111 __func__, call);
4112 }
4113 call->bdc_thread_call = NULL;
4114 }
4115 }
4116
4117 /*
4118 * bridge_init:
4119 *
4120 * Initialize a bridge interface.
4121 */
4122 static int
4123 bridge_init(struct ifnet *ifp)
4124 {
4125 struct bridge_softc *sc = (struct bridge_softc *)ifp->if_softc;
4126 errno_t error;
4127
4128 BRIDGE_LOCK_ASSERT_HELD(sc);
4129
4130 if ((ifnet_flags(ifp) & IFF_RUNNING)) {
4131 return 0;
4132 }
4133
4134 error = ifnet_set_flags(ifp, IFF_RUNNING, IFF_RUNNING);
4135
4136 /*
4137 * Calling bridge_aging_timer() is OK as there are no entries to
4138 * age so we're just going to arm the timer
4139 */
4140 bridge_aging_timer(sc);
4141 #if BRIDGESTP
4142 if (error == 0) {
4143 bstp_init(&sc->sc_stp); /* Initialize Spanning Tree */
4144 }
4145 #endif /* BRIDGESTP */
4146 return error;
4147 }
4148
4149 /*
4150 * bridge_ifstop:
4151 *
4152 * Stop the bridge interface.
4153 */
4154 static void
4155 bridge_ifstop(struct ifnet *ifp, int disable)
4156 {
4157 #pragma unused(disable)
4158 struct bridge_softc *sc = ifp->if_softc;
4159
4160 BRIDGE_LOCK_ASSERT_HELD(sc);
4161
4162 if ((ifnet_flags(ifp) & IFF_RUNNING) == 0) {
4163 return;
4164 }
4165
4166 bridge_cancel_delayed_call(&sc->sc_aging_timer);
4167
4168 #if BRIDGESTP
4169 bstp_stop(&sc->sc_stp);
4170 #endif /* BRIDGESTP */
4171
4172 bridge_rtflush(sc, IFBF_FLUSHDYN);
4173 (void) ifnet_set_flags(ifp, 0, IFF_RUNNING);
4174 }
4175
4176 /*
4177 * bridge_compute_cksum:
4178 *
4179 * If the packet has checksum flags, compare the hardware checksum
4180 * capabilities of the source and destination interfaces. If they
4181 * are the same, there's nothing to do. If they are different,
4182 * finalize the checksum so that it can be sent on the destination
4183 * interface.
4184 */
4185 static void
4186 bridge_compute_cksum(struct ifnet *src_if, struct ifnet *dst_if, struct mbuf *m)
4187 {
4188 uint32_t csum_flags;
4189 uint16_t dst_hw_csum;
4190 uint32_t did_sw = 0;
4191 struct ether_header *eh;
4192 uint16_t src_hw_csum;
4193
4194 if (src_if == dst_if) {
4195 return;
4196 }
4197 csum_flags = m->m_pkthdr.csum_flags & IF_HWASSIST_CSUM_MASK;
4198 if (csum_flags == 0) {
4199 /* no checksum offload */
4200 return;
4201 }
4202
4203 /*
4204 * if destination/source differ in checksum offload
4205 * capabilities, finalize/compute the checksum
4206 */
4207 dst_hw_csum = IF_HWASSIST_CSUM_FLAGS(dst_if->if_hwassist);
4208 src_hw_csum = IF_HWASSIST_CSUM_FLAGS(src_if->if_hwassist);
4209 if (dst_hw_csum == src_hw_csum) {
4210 return;
4211 }
4212 eh = mtod(m, struct ether_header *);
4213 switch (ntohs(eh->ether_type)) {
4214 case ETHERTYPE_IP:
4215 did_sw = in_finalize_cksum(m, sizeof(*eh), csum_flags);
4216 break;
4217 case ETHERTYPE_IPV6:
4218 did_sw = in6_finalize_cksum(m, sizeof(*eh), -1, -1, csum_flags);
4219 break;
4220 }
4221 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4222 "[%s -> %s] before 0x%x did 0x%x after 0x%x",
4223 src_if->if_xname, dst_if->if_xname, csum_flags, did_sw,
4224 m->m_pkthdr.csum_flags);
4225 }
4226
4227 static errno_t
4228 bridge_transmit(struct ifnet * ifp, struct mbuf *m)
4229 {
4230 struct flowadv adv = { .code = FADV_SUCCESS };
4231 errno_t error;
4232
4233 error = dlil_output(ifp, 0, m, NULL, NULL, 1, &adv);
4234 if (error == 0) {
4235 if (adv.code == FADV_FLOW_CONTROLLED) {
4236 error = EQFULL;
4237 } else if (adv.code == FADV_SUSPENDED) {
4238 error = EQSUSPENDED;
4239 }
4240 }
4241 return error;
4242 }
4243
4244 static int
4245 get_last_ip6_hdr(struct mbuf *m, int off, int proto, int * nxtp,
4246 bool *is_fragmented)
4247 {
4248 int newoff;
4249
4250 *is_fragmented = false;
4251 while (1) {
4252 newoff = ip6_nexthdr(m, off, proto, nxtp);
4253 if (newoff < 0) {
4254 return off;
4255 } else if (newoff < off) {
4256 return -1; /* invalid */
4257 } else if (newoff == off) {
4258 return newoff;
4259 }
4260 off = newoff;
4261 proto = *nxtp;
4262 if (proto == IPPROTO_FRAGMENT) {
4263 *is_fragmented = true;
4264 }
4265 }
4266 }
4267
4268 static int
4269 bridge_get_ip_proto(struct mbuf * * mp, u_int mac_hlen, bool is_ipv4,
4270 ip_packet_info_t info_p, struct bripstats * stats_p)
4271 {
4272 int error = 0;
4273 u_int hlen;
4274 u_int ip_hlen;
4275 u_int ip_pay_len;
4276 struct mbuf * m0 = *mp;
4277 int off;
4278 int opt_len = 0;
4279 int proto = 0;
4280
4281 bzero(info_p, sizeof(*info_p));
4282 if (is_ipv4) {
4283 struct ip * ip;
4284 u_int ip_total_len;
4285
4286 /* IPv4 */
4287 hlen = mac_hlen + sizeof(struct ip);
4288 if (m0->m_pkthdr.len < hlen) {
4289 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4290 "Short IP packet %d < %d",
4291 m0->m_pkthdr.len, hlen);
4292 error = _EBADIP;
4293 stats_p->bips_bad_ip++;
4294 goto done;
4295 }
4296 if (m0->m_len < hlen) {
4297 *mp = m0 = m_pullup(m0, hlen);
4298 if (m0 == NULL) {
4299 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4300 "m_pullup failed hlen %d",
4301 hlen);
4302 error = ENOBUFS;
4303 stats_p->bips_bad_ip++;
4304 goto done;
4305 }
4306 }
4307 ip = (struct ip *)(void *)(mtod(m0, uint8_t *) + mac_hlen);
4308 if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
4309 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4310 "bad IP version");
4311 error = _EBADIP;
4312 stats_p->bips_bad_ip++;
4313 goto done;
4314 }
4315 ip_hlen = IP_VHL_HL(ip->ip_vhl) << 2;
4316 if (ip_hlen < sizeof(struct ip)) {
4317 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4318 "bad IP header length %d < %d",
4319 ip_hlen,
4320 (int)sizeof(struct ip));
4321 error = _EBADIP;
4322 stats_p->bips_bad_ip++;
4323 goto done;
4324 }
4325 hlen = mac_hlen + ip_hlen;
4326 if (m0->m_len < hlen) {
4327 *mp = m0 = m_pullup(m0, hlen);
4328 if (m0 == NULL) {
4329 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4330 "m_pullup failed hlen %d",
4331 hlen);
4332 error = ENOBUFS;
4333 stats_p->bips_bad_ip++;
4334 goto done;
4335 }
4336 }
4337
4338 ip_total_len = ntohs(ip->ip_len);
4339 if (ip_total_len < ip_hlen) {
4340 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4341 "IP total len %d < header len %d",
4342 ip_total_len, ip_hlen);
4343 error = _EBADIP;
4344 stats_p->bips_bad_ip++;
4345 goto done;
4346 }
4347 if (ip_total_len > (m0->m_pkthdr.len - mac_hlen)) {
4348 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4349 "invalid IP payload length %d > %d",
4350 ip_total_len,
4351 (m0->m_pkthdr.len - mac_hlen));
4352 error = _EBADIP;
4353 stats_p->bips_bad_ip++;
4354 goto done;
4355 }
4356 ip_pay_len = ip_total_len - ip_hlen;
4357 info_p->ip_proto = ip->ip_p;
4358 info_p->ip_hdr.ip = ip;
4359 #define FRAG_BITS (IP_OFFMASK | IP_MF)
4360 if ((ntohs(ip->ip_off) & FRAG_BITS) != 0) {
4361 info_p->ip_is_fragmented = true;
4362 }
4363 stats_p->bips_ip++;
4364 } else {
4365 struct ip6_hdr *ip6;
4366
4367 /* IPv6 */
4368 hlen = mac_hlen + sizeof(struct ip6_hdr);
4369 if (m0->m_pkthdr.len < hlen) {
4370 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4371 "short IPv6 packet %d < %d",
4372 m0->m_pkthdr.len, hlen);
4373 error = _EBADIPV6;
4374 stats_p->bips_bad_ip6++;
4375 goto done;
4376 }
4377 if (m0->m_len < hlen) {
4378 *mp = m0 = m_pullup(m0, hlen);
4379 if (m0 == NULL) {
4380 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4381 "m_pullup failed hlen %d",
4382 hlen);
4383 error = ENOBUFS;
4384 stats_p->bips_bad_ip6++;
4385 goto done;
4386 }
4387 }
4388 ip6 = (struct ip6_hdr *)(mtod(m0, uint8_t *) + mac_hlen);
4389 if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
4390 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4391 "bad IPv6 version");
4392 error = _EBADIPV6;
4393 stats_p->bips_bad_ip6++;
4394 goto done;
4395 }
4396 off = get_last_ip6_hdr(m0, mac_hlen, IPPROTO_IPV6, &proto,
4397 &info_p->ip_is_fragmented);
4398 if (off < 0 || m0->m_pkthdr.len < off) {
4399 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4400 "ip6_lasthdr() returned %d",
4401 off);
4402 error = _EBADIPV6;
4403 stats_p->bips_bad_ip6++;
4404 goto done;
4405 }
4406 ip_hlen = sizeof(*ip6);
4407 opt_len = off - mac_hlen - ip_hlen;
4408 if (opt_len < 0) {
4409 error = _EBADIPV6;
4410 stats_p->bips_bad_ip6++;
4411 goto done;
4412 }
4413 info_p->ip_proto = proto;
4414 info_p->ip_hdr.ip6 = ip6;
4415 ip_pay_len = ntohs(ip6->ip6_plen);
4416 if (ip_pay_len > (m0->m_pkthdr.len - mac_hlen - ip_hlen)) {
4417 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4418 "invalid IPv6 payload length %d > %d",
4419 ip_pay_len,
4420 (m0->m_pkthdr.len - mac_hlen - ip_hlen));
4421 error = _EBADIPV6;
4422 stats_p->bips_bad_ip6++;
4423 goto done;
4424 }
4425 stats_p->bips_ip6++;
4426 }
4427 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4428 "IPv%c proto %d ip %u pay %u opt %u pkt %u%s",
4429 is_ipv4 ? '4' : '6',
4430 proto, ip_hlen, ip_pay_len, opt_len,
4431 m0->m_pkthdr.len, info_p->ip_is_fragmented ? " frag" : "");
4432 info_p->ip_hlen = ip_hlen;
4433 info_p->ip_pay_len = ip_pay_len;
4434 info_p->ip_opt_len = opt_len;
4435 info_p->ip_is_ipv4 = is_ipv4;
4436 done:
4437 return error;
4438 }
4439
4440 static int
4441 bridge_get_tcp_header(struct mbuf * * mp, u_int mac_hlen, bool is_ipv4,
4442 ip_packet_info_t info_p, struct bripstats * stats_p)
4443 {
4444 int error;
4445 u_int hlen;
4446
4447 error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, info_p, stats_p);
4448 if (error != 0) {
4449 goto done;
4450 }
4451 if (info_p->ip_proto != IPPROTO_TCP) {
4452 /* not a TCP frame, not an error, just a bad guess */
4453 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4454 "non-TCP (%d) IPv%c frame %d bytes",
4455 info_p->ip_proto, is_ipv4 ? '4' : '6',
4456 (*mp)->m_pkthdr.len);
4457 goto done;
4458 }
4459 if (info_p->ip_is_fragmented) {
4460 /* both TSO and IP fragmentation don't make sense */
4461 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4462 "fragmented TSO packet?");
4463 stats_p->bips_bad_tcp++;
4464 error = _EBADTCP;
4465 goto done;
4466 }
4467 hlen = mac_hlen + info_p->ip_hlen + sizeof(struct tcphdr) +
4468 info_p->ip_opt_len;
4469 if ((*mp)->m_len < hlen) {
4470 *mp = m_pullup(*mp, hlen);
4471 if (*mp == NULL) {
4472 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4473 "m_pullup %d failed",
4474 hlen);
4475 stats_p->bips_bad_tcp++;
4476 error = _EBADTCP;
4477 goto done;
4478 }
4479 }
4480 info_p->ip_proto_hdr = ((caddr_t)info_p->ip_hdr.ptr) +
4481 info_p->ip_hlen + info_p->ip_opt_len;
4482 done:
4483 return error;
4484 }
4485
4486 static inline void
4487 proto_csum_stats_increment(uint8_t proto, struct brcsumstats * stats_p)
4488 {
4489 if (proto == IPPROTO_TCP) {
4490 stats_p->brcs_tcp_checksum++;
4491 } else {
4492 stats_p->brcs_udp_checksum++;
4493 }
4494 return;
4495 }
4496
4497 static bool
4498 ether_header_type_is_ip(struct ether_header * eh, bool *is_ipv4)
4499 {
4500 uint16_t ether_type;
4501 bool is_ip = TRUE;
4502
4503 ether_type = ntohs(eh->ether_type);
4504 switch (ether_type) {
4505 case ETHERTYPE_IP:
4506 *is_ipv4 = TRUE;
4507 break;
4508 case ETHERTYPE_IPV6:
4509 *is_ipv4 = FALSE;
4510 break;
4511 default:
4512 is_ip = FALSE;
4513 break;
4514 }
4515 return is_ip;
4516 }
4517
4518 static errno_t
4519 bridge_verify_checksum(struct mbuf * * mp, struct ifbrmstats *stats_p)
4520 {
4521 struct brcsumstats *csum_stats_p;
4522 struct ether_header *eh;
4523 errno_t error = 0;
4524 ip_packet_info info;
4525 bool is_ipv4;
4526 struct mbuf * m;
4527 u_int mac_hlen = sizeof(struct ether_header);
4528 uint16_t sum;
4529 bool valid;
4530
4531 eh = mtod(*mp, struct ether_header *);
4532 if (!ether_header_type_is_ip(eh, &is_ipv4)) {
4533 goto done;
4534 }
4535 error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, &info,
4536 &stats_p->brms_out_ip);
4537 m = *mp;
4538 if (error != 0) {
4539 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4540 "bridge_get_ip_proto failed %d",
4541 error);
4542 goto done;
4543 }
4544 if (is_ipv4) {
4545 if ((m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) != 0) {
4546 /* hardware offloaded IP header checksum */
4547 valid = (m->m_pkthdr.csum_flags & CSUM_IP_VALID) != 0;
4548 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4549 "IP checksum HW %svalid",
4550 valid ? "" : "in");
4551 if (!valid) {
4552 stats_p->brms_out_cksum_bad_hw.brcs_ip_checksum++;
4553 error = _EBADIPCHECKSUM;
4554 goto done;
4555 }
4556 stats_p->brms_out_cksum_good_hw.brcs_ip_checksum++;
4557 } else {
4558 /* verify */
4559 sum = inet_cksum(m, 0, mac_hlen, info.ip_hlen);
4560 valid = (sum == 0);
4561 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4562 "IP checksum SW %svalid",
4563 valid ? "" : "in");
4564 if (!valid) {
4565 stats_p->brms_out_cksum_bad.brcs_ip_checksum++;
4566 error = _EBADIPCHECKSUM;
4567 goto done;
4568 }
4569 stats_p->brms_out_cksum_good.brcs_ip_checksum++;
4570 }
4571 }
4572 if (info.ip_is_fragmented) {
4573 /* can't verify checksum on fragmented packets */
4574 goto done;
4575 }
4576 switch (info.ip_proto) {
4577 case IPPROTO_TCP:
4578 stats_p->brms_out_ip.bips_tcp++;
4579 break;
4580 case IPPROTO_UDP:
4581 stats_p->brms_out_ip.bips_udp++;
4582 break;
4583 default:
4584 goto done;
4585 }
4586 /* check for hardware offloaded UDP/TCP checksum */
4587 #define HW_CSUM (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)
4588 if ((m->m_pkthdr.csum_flags & HW_CSUM) == HW_CSUM) {
4589 /* checksum verified by hardware */
4590 valid = (m->m_pkthdr.csum_rx_val == 0xffff);
4591 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4592 "IPv%c %s checksum HW 0x%x %svalid",
4593 is_ipv4 ? '4' : '6',
4594 (info.ip_proto == IPPROTO_TCP)
4595 ? "TCP" : "UDP",
4596 m->m_pkthdr.csum_data,
4597 valid ? "" : "in" );
4598 if (!valid) {
4599 /* bad checksum */
4600 csum_stats_p = &stats_p->brms_out_cksum_bad_hw;
4601 error = (info.ip_proto == IPPROTO_TCP) ? _EBADTCPCHECKSUM
4602 : _EBADTCPCHECKSUM;
4603 } else {
4604 /* good checksum */
4605 csum_stats_p = &stats_p->brms_out_cksum_good_hw;
4606 }
4607 proto_csum_stats_increment(info.ip_proto, csum_stats_p);
4608 goto done;
4609 }
4610 m->m_data += mac_hlen;
4611 m->m_len -= mac_hlen;
4612 m->m_pkthdr.len -= mac_hlen;
4613 if (is_ipv4) {
4614 sum = inet_cksum(m, info.ip_proto,
4615 info.ip_hlen,
4616 info.ip_pay_len);
4617 } else {
4618 sum = inet6_cksum(m, info.ip_proto,
4619 info.ip_hlen + info.ip_opt_len,
4620 info.ip_pay_len - info.ip_opt_len);
4621 }
4622 valid = (sum == 0);
4623 if (valid) {
4624 csum_stats_p = &stats_p->brms_out_cksum_good;
4625 } else {
4626 csum_stats_p = &stats_p->brms_out_cksum_bad;
4627 error = (info.ip_proto == IPPROTO_TCP)
4628 ? _EBADTCPCHECKSUM : _EBADUDPCHECKSUM;
4629 }
4630 proto_csum_stats_increment(info.ip_proto, csum_stats_p);
4631 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4632 "IPv%c %s checksum SW %svalid (0x%x) hlen %d paylen %d",
4633 is_ipv4 ? '4' : '6',
4634 (info.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
4635 valid ? "" : "in",
4636 sum, info.ip_hlen, info.ip_pay_len);
4637 m->m_data -= mac_hlen;
4638 m->m_len += mac_hlen;
4639 m->m_pkthdr.len += mac_hlen;
4640 done:
4641 return error;
4642 }
4643
4644 static errno_t
4645 bridge_offload_checksum(struct mbuf * * mp, ip_packet_info * info_p,
4646 struct ifbrmstats * stats_p)
4647 {
4648 uint16_t * csum_p;
4649 errno_t error = 0;
4650 u_int hlen;
4651 struct mbuf * m0 = *mp;
4652 u_int mac_hlen = sizeof(struct ether_header);
4653 u_int pkt_hdr_len;
4654 struct tcphdr * tcp;
4655 u_int tcp_hlen;
4656 struct udphdr * udp;
4657
4658 if (info_p->ip_is_ipv4) {
4659 /* compute IP header checksum */
4660 info_p->ip_hdr.ip->ip_sum = 0;
4661 info_p->ip_hdr.ip->ip_sum = inet_cksum(m0, 0, mac_hlen,
4662 info_p->ip_hlen);
4663 stats_p->brms_in_computed_cksum.brcs_ip_checksum++;
4664 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4665 "IPv4 checksum 0x%x",
4666 ntohs(info_p->ip_hdr.ip->ip_sum));
4667 }
4668 if (info_p->ip_is_fragmented) {
4669 /* can't compute checksum on fragmented packets */
4670 goto done;
4671 }
4672 pkt_hdr_len = m0->m_pkthdr.len;
4673 switch (info_p->ip_proto) {
4674 case IPPROTO_TCP:
4675 hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len
4676 + sizeof(struct tcphdr);
4677 if (m0->m_len < hlen) {
4678 *mp = m0 = m_pullup(m0, hlen);
4679 if (m0 == NULL) {
4680 stats_p->brms_in_ip.bips_bad_tcp++;
4681 error = _EBADTCP;
4682 goto done;
4683 }
4684 }
4685 tcp = (struct tcphdr *)(void *)
4686 ((caddr_t)info_p->ip_hdr.ptr + info_p->ip_hlen
4687 + info_p->ip_opt_len);
4688 tcp_hlen = tcp->th_off << 2;
4689 hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len + tcp_hlen;
4690 if (hlen > pkt_hdr_len) {
4691 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4692 "bad tcp header length %u",
4693 tcp_hlen);
4694 stats_p->brms_in_ip.bips_bad_tcp++;
4695 error = _EBADTCP;
4696 goto done;
4697 }
4698 csum_p = &tcp->th_sum;
4699 stats_p->brms_in_ip.bips_tcp++;
4700 break;
4701 case IPPROTO_UDP:
4702 hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len + sizeof(*udp);
4703 if (m0->m_len < hlen) {
4704 *mp = m0 = m_pullup(m0, hlen);
4705 if (m0 == NULL) {
4706 stats_p->brms_in_ip.bips_bad_udp++;
4707 error = ENOBUFS;
4708 goto done;
4709 }
4710 }
4711 udp = (struct udphdr *)(void *)
4712 ((caddr_t)info_p->ip_hdr.ptr + info_p->ip_hlen
4713 + info_p->ip_opt_len);
4714 csum_p = &udp->uh_sum;
4715 stats_p->brms_in_ip.bips_udp++;
4716 break;
4717 default:
4718 /* not TCP or UDP */
4719 goto done;
4720 }
4721 *csum_p = 0;
4722 m0->m_data += mac_hlen;
4723 m0->m_len -= mac_hlen;
4724 m0->m_pkthdr.len -= mac_hlen;
4725 if (info_p->ip_is_ipv4) {
4726 *csum_p = inet_cksum(m0, info_p->ip_proto, info_p->ip_hlen,
4727 info_p->ip_pay_len);
4728 } else {
4729 *csum_p = inet6_cksum(m0, info_p->ip_proto,
4730 info_p->ip_hlen + info_p->ip_opt_len,
4731 info_p->ip_pay_len - info_p->ip_opt_len);
4732 }
4733 if (info_p->ip_proto == IPPROTO_UDP && *csum_p == 0) {
4734 /* RFC 1122 4.1.3.4 */
4735 *csum_p = 0xffff;
4736 }
4737 m0->m_data -= mac_hlen;
4738 m0->m_len += mac_hlen;
4739 m0->m_pkthdr.len += mac_hlen;
4740 proto_csum_stats_increment(info_p->ip_proto,
4741 &stats_p->brms_in_computed_cksum);
4742
4743 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4744 "IPv%c %s set checksum 0x%x",
4745 info_p->ip_is_ipv4 ? '4' : '6',
4746 (info_p->ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
4747 ntohs(*csum_p));
4748 done:
4749 return error;
4750 }
4751
4752 static errno_t
4753 bridge_send(struct ifnet *src_ifp,
4754 struct ifnet *dst_ifp, struct mbuf *m, ChecksumOperation cksum_op)
4755 {
4756 switch (cksum_op) {
4757 case CHECKSUM_OPERATION_CLEAR_OFFLOAD:
4758 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
4759 break;
4760 case CHECKSUM_OPERATION_FINALIZE:
4761 /* the checksum might not be correct, finalize now */
4762 bridge_finalize_cksum(dst_ifp, m);
4763 break;
4764 case CHECKSUM_OPERATION_COMPUTE:
4765 bridge_compute_cksum(src_ifp, dst_ifp, m);
4766 break;
4767 default:
4768 break;
4769 }
4770 #if HAS_IF_CAP
4771 /*
4772 * If underlying interface can not do VLAN tag insertion itself
4773 * then attach a packet tag that holds it.
4774 */
4775 if ((m->m_flags & M_VLANTAG) &&
4776 (dst_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) {
4777 m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
4778 if (m == NULL) {
4779 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4780 "%s: unable to prepend VLAN header",
4781 dst_ifp->if_xname);
4782 (void) ifnet_stat_increment_out(dst_ifp,
4783 0, 0, 1);
4784 return 0;
4785 }
4786 m->m_flags &= ~M_VLANTAG;
4787 }
4788 #endif /* HAS_IF_CAP */
4789 return bridge_transmit(dst_ifp, m);
4790 }
4791
4792 static errno_t
4793 bridge_send_tso(struct ifnet *dst_ifp, struct mbuf *m, bool is_ipv4)
4794 {
4795 errno_t error;
4796 u_int mac_hlen;
4797
4798 mac_hlen = sizeof(struct ether_header);
4799
4800 #if HAS_IF_CAP
4801 /*
4802 * If underlying interface can not do VLAN tag insertion itself
4803 * then attach a packet tag that holds it.
4804 */
4805 if ((m->m_flags & M_VLANTAG) &&
4806 (dst_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) {
4807 m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
4808 if (m == NULL) {
4809 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4810 "%s: unable to prepend VLAN header",
4811 dst_ifp->if_xname);
4812 (void) ifnet_stat_increment_out(dst_ifp,
4813 0, 0, 1);
4814 error = ENOBUFS;
4815 goto done;
4816 }
4817 m->m_flags &= ~M_VLANTAG;
4818 mac_hlen += ETHER_VLAN_ENCAP_LEN;
4819 }
4820 #endif /* HAS_IF_CAP */
4821 error = gso_tcp(dst_ifp, &m, mac_hlen, is_ipv4, TRUE);
4822 return error;
4823 }
4824
4825 /*
4826 * tso_hwassist:
4827 * - determine whether the destination interface supports TSO offload
4828 * - if the packet is already marked for offload and the hardware supports
4829 * it, just allow the packet to continue on
4830 * - if not, parse the packet headers to verify that this is a large TCP
4831 * packet requiring segmentation; if the hardware doesn't support it
4832 * set need_sw_tso; otherwise, mark the packet for TSO offload
4833 */
4834 static int
4835 tso_hwassist(struct mbuf **mp, bool is_ipv4, struct ifnet * ifp, u_int mac_hlen,
4836 bool * need_sw_tso, bool * supports_cksum)
4837 {
4838 int error = 0;
4839 u_int32_t if_csum;
4840 u_int32_t if_tso;
4841 u_int32_t mbuf_tso;
4842
4843 if (is_ipv4) {
4844 /*
4845 * Enable both TCP and IP offload if the hardware supports it.
4846 * If the hardware doesn't support TCP offload, *supports_cksum
4847 * will be false so we won't set either offload.
4848 */
4849 if_csum = ifp->if_hwassist & (CSUM_TCP | CSUM_IP);
4850 *supports_cksum = (if_csum & CSUM_TCP) != 0;
4851 if_tso = IFNET_TSO_IPV4;
4852 mbuf_tso = CSUM_TSO_IPV4;
4853 } else {
4854 *supports_cksum = (ifp->if_hwassist & CSUM_TCPIPV6) != 0;
4855 if_csum = CSUM_TCPIPV6;
4856 if_tso = IFNET_TSO_IPV6;
4857 mbuf_tso = CSUM_TSO_IPV6;
4858 }
4859 *need_sw_tso = false;
4860 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4861 "%s: does%s support checksum 0x%x if_csum 0x%x",
4862 ifp->if_xname, *supports_cksum ? "" : " not",
4863 ifp->if_hwassist, if_csum);
4864 if ((ifp->if_hwassist & if_tso) != 0 &&
4865 ((*mp)->m_pkthdr.csum_flags & mbuf_tso) != 0) {
4866 /* hardware TSO, mbuf already marked */
4867 } else {
4868 /* verify that this is a large TCP frame */
4869 uint32_t csum_flags;
4870 ip_packet_info info;
4871 u_int mss;
4872 struct bripstats stats;
4873 struct tcphdr * tcp;
4874
4875 error = bridge_get_tcp_header(mp, mac_hlen, is_ipv4,
4876 &info, &stats);
4877 if (error != 0) {
4878 /* bad packet */
4879 goto done;
4880 }
4881 if ((info.ip_hlen + info.ip_pay_len + info.ip_opt_len) <=
4882 ifp->if_mtu) {
4883 /* not actually a large packet */
4884 goto done;
4885 }
4886 if (info.ip_proto_hdr == NULL) {
4887 /* not a TCP packet */
4888 goto done;
4889 }
4890 if ((ifp->if_hwassist & if_tso) == 0) {
4891 /* hardware does not support TSO, enable sw tso */
4892 *need_sw_tso = if_bridge_segmentation != 0;
4893 goto done;
4894 }
4895 /* use hardware TSO */
4896 (*mp)->m_pkthdr.pkt_proto = IPPROTO_TCP;
4897 tcp = (struct tcphdr *)info.ip_proto_hdr;
4898 mss = ifp->if_mtu - info.ip_hlen - info.ip_opt_len
4899 - (tcp->th_off << 2);
4900 csum_flags = mbuf_tso;
4901 if (*supports_cksum) {
4902 csum_flags |= if_csum;
4903 }
4904 (*mp)->m_pkthdr.tso_segsz = mss;
4905 (*mp)->m_pkthdr.csum_flags |= csum_flags;
4906 (*mp)->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
4907 }
4908 done:
4909 return error;
4910 }
4911
4912 /*
4913 * bridge_enqueue:
4914 *
4915 * Enqueue a packet on a bridge member interface.
4916 *
4917 */
4918 static errno_t
4919 bridge_enqueue(ifnet_t bridge_ifp, struct ifnet *src_ifp,
4920 struct ifnet *dst_ifp, struct mbuf *m, ChecksumOperation cksum_op)
4921 {
4922 errno_t error = 0;
4923 int len;
4924
4925 VERIFY(dst_ifp != NULL);
4926
4927 /*
4928 * We may be sending a fragment so traverse the mbuf
4929 *
4930 * NOTE: bridge_fragment() is called only when PFIL_HOOKS is enabled.
4931 */
4932 for (struct mbuf *next_m = NULL; m != NULL; m = next_m) {
4933 bool need_sw_tso = false;
4934 bool is_ipv4 = false;
4935 bool is_large_pkt;
4936 errno_t _error = 0;
4937
4938 len = m->m_pkthdr.len;
4939 m->m_flags |= M_PROTO1; /* set to avoid loops */
4940 next_m = m->m_nextpkt;
4941 m->m_nextpkt = NULL;
4942 /*
4943 * Need to segment the packet if it is a large frame
4944 * and the destination interface does not support TSO.
4945 *
4946 * Note that with trailers, it's possible for a packet to
4947 * be large but not actually require segmentation.
4948 */
4949 is_large_pkt = (len > (bridge_ifp->if_mtu + ETHER_HDR_LEN));
4950 if (is_large_pkt) {
4951 struct ether_header *eh;
4952 bool hw_supports_cksum = false;
4953
4954 eh = mtod(m, struct ether_header *);
4955 if (ether_header_type_is_ip(eh, &is_ipv4)) {
4956 _error = tso_hwassist(&m, is_ipv4,
4957 dst_ifp, sizeof(struct ether_header),
4958 &need_sw_tso, &hw_supports_cksum);
4959 if (_error == 0 && hw_supports_cksum) {
4960 cksum_op = CHECKSUM_OPERATION_NONE;
4961 }
4962 } else {
4963 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4964 "large non IP packet");
4965 }
4966 }
4967 if (_error != 0) {
4968 if (m != NULL) {
4969 m_freem(m);
4970 }
4971 } else if (need_sw_tso) {
4972 _error = bridge_send_tso(dst_ifp, m, is_ipv4);
4973 } else {
4974 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4975 "%s bridge_send(%s) len %d op %d",
4976 bridge_ifp->if_xname,
4977 dst_ifp->if_xname,
4978 len, cksum_op);
4979 _error = bridge_send(src_ifp, dst_ifp, m, cksum_op);
4980 }
4981
4982 /* Preserve first error value */
4983 if (error == 0 && _error != 0) {
4984 error = _error;
4985 }
4986 if (_error == 0) {
4987 (void) ifnet_stat_increment_out(bridge_ifp, 1, len, 0);
4988 } else {
4989 (void) ifnet_stat_increment_out(bridge_ifp, 0, 0, 1);
4990 }
4991 }
4992
4993 return error;
4994 }
4995
4996 #if HAS_BRIDGE_DUMMYNET
4997 /*
4998 * bridge_dummynet:
4999 *
5000 * Receive a queued packet from dummynet and pass it on to the output
5001 * interface.
5002 *
5003 * The mbuf has the Ethernet header already attached.
5004 */
5005 static void
5006 bridge_dummynet(struct mbuf *m, struct ifnet *ifp)
5007 {
5008 struct bridge_softc *sc;
5009
5010 sc = ifp->if_bridge;
5011
5012 /*
5013 * The packet didn't originate from a member interface. This should only
5014 * ever happen if a member interface is removed while packets are
5015 * queued for it.
5016 */
5017 if (sc == NULL) {
5018 m_freem(m);
5019 return;
5020 }
5021
5022 if (PFIL_HOOKED(&inet_pfil_hook) || PFIL_HOOKED_INET6) {
5023 if (bridge_pfil(&m, sc->sc_ifp, ifp, PFIL_OUT) != 0) {
5024 return;
5025 }
5026 if (m == NULL) {
5027 return;
5028 }
5029 }
5030 (void) bridge_enqueue(sc->sc_ifp, NULL, ifp, m, CHECKSUM_OPERATION_NONE);
5031 }
5032
5033 #endif /* HAS_BRIDGE_DUMMYNET */
5034
5035 /*
5036 * bridge_member_output:
5037 *
5038 * Send output from a bridge member interface. This
5039 * performs the bridging function for locally originated
5040 * packets.
5041 *
5042 * The mbuf has the Ethernet header already attached.
5043 */
5044 static errno_t
5045 bridge_member_output(struct bridge_softc *sc, ifnet_t ifp, mbuf_t *data)
5046 {
5047 ifnet_t bridge_ifp;
5048 struct ether_header *eh;
5049 struct ifnet *dst_if;
5050 uint16_t vlan;
5051 struct bridge_iflist *mac_nat_bif;
5052 ifnet_t mac_nat_ifp;
5053 mbuf_t m = *data;
5054
5055 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
5056 "ifp %s", ifp->if_xname);
5057 if (m->m_len < ETHER_HDR_LEN) {
5058 m = m_pullup(m, ETHER_HDR_LEN);
5059 if (m == NULL) {
5060 *data = NULL;
5061 return EJUSTRETURN;
5062 }
5063 }
5064
5065 eh = mtod(m, struct ether_header *);
5066 vlan = VLANTAGOF(m);
5067
5068 BRIDGE_LOCK(sc);
5069 mac_nat_bif = sc->sc_mac_nat_bif;
5070 mac_nat_ifp = (mac_nat_bif != NULL) ? mac_nat_bif->bif_ifp : NULL;
5071 if (mac_nat_ifp == ifp) {
5072 /* record the IP address used by the MAC NAT interface */
5073 (void)bridge_mac_nat_output(sc, mac_nat_bif, data, NULL);
5074 m = *data;
5075 if (m == NULL) {
5076 /* packet was deallocated */
5077 BRIDGE_UNLOCK(sc);
5078 return EJUSTRETURN;
5079 }
5080 }
5081 bridge_ifp = sc->sc_ifp;
5082
5083 /*
5084 * APPLE MODIFICATION
5085 * If the packet is an 802.1X ethertype, then only send on the
5086 * original output interface.
5087 */
5088 if (eh->ether_type == htons(ETHERTYPE_PAE)) {
5089 dst_if = ifp;
5090 goto sendunicast;
5091 }
5092
5093 /*
5094 * If bridge is down, but the original output interface is up,
5095 * go ahead and send out that interface. Otherwise, the packet
5096 * is dropped below.
5097 */
5098 if ((bridge_ifp->if_flags & IFF_RUNNING) == 0) {
5099 dst_if = ifp;
5100 goto sendunicast;
5101 }
5102
5103 /*
5104 * If the packet is a multicast, or we don't know a better way to
5105 * get there, send to all interfaces.
5106 */
5107 if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
5108 dst_if = NULL;
5109 } else {
5110 dst_if = bridge_rtlookup(sc, eh->ether_dhost, vlan);
5111 }
5112 if (dst_if == NULL) {
5113 struct bridge_iflist *bif;
5114 struct mbuf *mc;
5115 int used = 0;
5116 errno_t error;
5117
5118
5119 bridge_span(sc, m);
5120
5121 BRIDGE_LOCK2REF(sc, error);
5122 if (error != 0) {
5123 m_freem(m);
5124 return EJUSTRETURN;
5125 }
5126
5127 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
5128 /* skip interface with inactive link status */
5129 if ((bif->bif_flags & BIFF_MEDIA_ACTIVE) == 0) {
5130 continue;
5131 }
5132 dst_if = bif->bif_ifp;
5133
5134 #if 0
5135 if (dst_if->if_type == IFT_GIF) {
5136 continue;
5137 }
5138 #endif
5139 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5140 continue;
5141 }
5142 if (dst_if != ifp) {
5143 /*
5144 * If this is not the original output interface,
5145 * and the interface is participating in spanning
5146 * tree, make sure the port is in a state that
5147 * allows forwarding.
5148 */
5149 if ((bif->bif_ifflags & IFBIF_STP) &&
5150 bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5151 continue;
5152 }
5153 /*
5154 * If this is not the original output interface,
5155 * and the destination is the MAC NAT interface,
5156 * drop the packet. The packet can't be sent
5157 * if the source MAC is incorrect.
5158 */
5159 if (dst_if == mac_nat_ifp) {
5160 continue;
5161 }
5162 }
5163 if (TAILQ_NEXT(bif, bif_next) == NULL) {
5164 used = 1;
5165 mc = m;
5166 } else {
5167 mc = m_dup(m, M_DONTWAIT);
5168 if (mc == NULL) {
5169 (void) ifnet_stat_increment_out(
5170 bridge_ifp, 0, 0, 1);
5171 continue;
5172 }
5173 }
5174 (void) bridge_enqueue(bridge_ifp, ifp, dst_if,
5175 mc, CHECKSUM_OPERATION_COMPUTE);
5176 }
5177 if (used == 0) {
5178 m_freem(m);
5179 }
5180 BRIDGE_UNREF(sc);
5181 return EJUSTRETURN;
5182 }
5183
5184 sendunicast:
5185 /*
5186 * XXX Spanning tree consideration here?
5187 */
5188
5189 bridge_span(sc, m);
5190 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5191 m_freem(m);
5192 BRIDGE_UNLOCK(sc);
5193 return EJUSTRETURN;
5194 }
5195
5196 BRIDGE_UNLOCK(sc);
5197 if (dst_if == ifp) {
5198 /* just let the packet continue on its way */
5199 return 0;
5200 }
5201 if (dst_if != mac_nat_ifp) {
5202 (void) bridge_enqueue(bridge_ifp, ifp, dst_if, m,
5203 CHECKSUM_OPERATION_COMPUTE);
5204 } else {
5205 /*
5206 * This is not the original output interface
5207 * and the destination is the MAC NAT interface.
5208 * Drop the packet because the packet can't be sent
5209 * if the source MAC is incorrect.
5210 */
5211 m_freem(m);
5212 }
5213 return EJUSTRETURN;
5214 }
5215
5216 /*
5217 * Output callback.
5218 *
5219 * This routine is called externally from above only when if_bridge_txstart
5220 * is disabled; otherwise it is called internally by bridge_start().
5221 */
5222 static int
5223 bridge_output(struct ifnet *ifp, struct mbuf *m)
5224 {
5225 struct bridge_softc *sc = ifnet_softc(ifp);
5226 struct ether_header *eh;
5227 struct ifnet *dst_if = NULL;
5228 int error = 0;
5229
5230 eh = mtod(m, struct ether_header *);
5231
5232 BRIDGE_LOCK(sc);
5233
5234 if (!(m->m_flags & (M_BCAST | M_MCAST))) {
5235 dst_if = bridge_rtlookup(sc, eh->ether_dhost, 0);
5236 }
5237
5238 (void) ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0);
5239
5240 #if NBPFILTER > 0
5241 if (sc->sc_bpf_output) {
5242 bridge_bpf_output(ifp, m);
5243 }
5244 #endif
5245
5246 if (dst_if == NULL) {
5247 /* callee will unlock */
5248 bridge_broadcast(sc, NULL, m, 0);
5249 } else {
5250 ifnet_t bridge_ifp;
5251
5252 bridge_ifp = sc->sc_ifp;
5253 BRIDGE_UNLOCK(sc);
5254
5255 error = bridge_enqueue(bridge_ifp, NULL, dst_if, m,
5256 CHECKSUM_OPERATION_FINALIZE);
5257 }
5258
5259 return error;
5260 }
5261
5262 static void
5263 bridge_finalize_cksum(struct ifnet *ifp, struct mbuf *m)
5264 {
5265 struct ether_header *eh;
5266 bool is_ipv4;
5267 uint32_t sw_csum, hwcap;
5268 uint32_t did_sw;
5269 uint32_t csum_flags;
5270
5271 eh = mtod(m, struct ether_header *);
5272 if (!ether_header_type_is_ip(eh, &is_ipv4)) {
5273 return;
5274 }
5275
5276 /* do in software what the hardware cannot */
5277 hwcap = (ifp->if_hwassist | CSUM_DATA_VALID);
5278 csum_flags = m->m_pkthdr.csum_flags;
5279 sw_csum = csum_flags & ~IF_HWASSIST_CSUM_FLAGS(hwcap);
5280 sw_csum &= IF_HWASSIST_CSUM_MASK;
5281
5282 if (is_ipv4) {
5283 if ((hwcap & CSUM_PARTIAL) && !(sw_csum & CSUM_DELAY_DATA) &&
5284 (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) {
5285 if (m->m_pkthdr.csum_flags & CSUM_TCP) {
5286 uint16_t start =
5287 sizeof(*eh) + sizeof(struct ip);
5288 uint16_t ulpoff =
5289 m->m_pkthdr.csum_data & 0xffff;
5290 m->m_pkthdr.csum_flags |=
5291 (CSUM_DATA_VALID | CSUM_PARTIAL);
5292 m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
5293 m->m_pkthdr.csum_tx_start = start;
5294 } else {
5295 sw_csum |= (CSUM_DELAY_DATA &
5296 m->m_pkthdr.csum_flags);
5297 }
5298 }
5299 did_sw = in_finalize_cksum(m, sizeof(*eh), sw_csum);
5300 } else {
5301 if ((hwcap & CSUM_PARTIAL) &&
5302 !(sw_csum & CSUM_DELAY_IPV6_DATA) &&
5303 (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA)) {
5304 if (m->m_pkthdr.csum_flags & CSUM_TCPIPV6) {
5305 uint16_t start =
5306 sizeof(*eh) + sizeof(struct ip6_hdr);
5307 uint16_t ulpoff =
5308 m->m_pkthdr.csum_data & 0xffff;
5309 m->m_pkthdr.csum_flags |=
5310 (CSUM_DATA_VALID | CSUM_PARTIAL);
5311 m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
5312 m->m_pkthdr.csum_tx_start = start;
5313 } else {
5314 sw_csum |= (CSUM_DELAY_IPV6_DATA &
5315 m->m_pkthdr.csum_flags);
5316 }
5317 }
5318 did_sw = in6_finalize_cksum(m, sizeof(*eh), -1, -1, sw_csum);
5319 }
5320 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5321 "[%s] before 0x%x hwcap 0x%x sw_csum 0x%x did 0x%x after 0x%x",
5322 ifp->if_xname, csum_flags, hwcap, sw_csum,
5323 did_sw, m->m_pkthdr.csum_flags);
5324 }
5325
5326 /*
5327 * bridge_start:
5328 *
5329 * Start output on a bridge.
5330 *
5331 * This routine is invoked by the start worker thread; because we never call
5332 * it directly, there is no need do deploy any serialization mechanism other
5333 * than what's already used by the worker thread, i.e. this is already single
5334 * threaded.
5335 *
5336 * This routine is called only when if_bridge_txstart is enabled.
5337 */
5338 static void
5339 bridge_start(struct ifnet *ifp)
5340 {
5341 struct mbuf *m;
5342
5343 for (;;) {
5344 if (ifnet_dequeue(ifp, &m) != 0) {
5345 break;
5346 }
5347
5348 (void) bridge_output(ifp, m);
5349 }
5350 }
5351
5352 /*
5353 * bridge_forward:
5354 *
5355 * The forwarding function of the bridge.
5356 *
5357 * NOTE: Releases the lock on return.
5358 */
5359 static void
5360 bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif,
5361 struct mbuf *m)
5362 {
5363 struct bridge_iflist *dbif;
5364 ifnet_t bridge_ifp;
5365 struct ifnet *src_if, *dst_if;
5366 struct ether_header *eh;
5367 uint16_t vlan;
5368 uint8_t *dst;
5369 int error;
5370 struct mac_nat_record mnr;
5371 bool translate_mac = FALSE;
5372 uint32_t sc_filter_flags = 0;
5373
5374 BRIDGE_LOCK_ASSERT_HELD(sc);
5375
5376 bridge_ifp = sc->sc_ifp;
5377 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
5378 "%s m 0x%llx", bridge_ifp->if_xname,
5379 (uint64_t)VM_KERNEL_ADDRPERM(m));
5380
5381 src_if = m->m_pkthdr.rcvif;
5382 if (src_if != sbif->bif_ifp) {
5383 const char * src_if_name;
5384
5385 src_if_name = (src_if != NULL) ? src_if->if_xname : "?";
5386 BRIDGE_LOG(LOG_NOTICE, 0,
5387 "src_if %s != bif_ifp %s",
5388 src_if_name, sbif->bif_ifp->if_xname);
5389 goto drop;
5390 }
5391
5392 (void) ifnet_stat_increment_in(bridge_ifp, 1, m->m_pkthdr.len, 0);
5393 vlan = VLANTAGOF(m);
5394
5395
5396 if ((sbif->bif_ifflags & IFBIF_STP) &&
5397 sbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5398 goto drop;
5399 }
5400
5401 eh = mtod(m, struct ether_header *);
5402 dst = eh->ether_dhost;
5403
5404 /* If the interface is learning, record the address. */
5405 if (sbif->bif_ifflags & IFBIF_LEARNING) {
5406 error = bridge_rtupdate(sc, eh->ether_shost, vlan,
5407 sbif, 0, IFBAF_DYNAMIC);
5408 /*
5409 * If the interface has addresses limits then deny any source
5410 * that is not in the cache.
5411 */
5412 if (error && sbif->bif_addrmax) {
5413 goto drop;
5414 }
5415 }
5416
5417 if ((sbif->bif_ifflags & IFBIF_STP) != 0 &&
5418 sbif->bif_stp.bp_state == BSTP_IFSTATE_LEARNING) {
5419 goto drop;
5420 }
5421
5422 /*
5423 * At this point, the port either doesn't participate
5424 * in spanning tree or it is in the forwarding state.
5425 */
5426
5427 /*
5428 * If the packet is unicast, destined for someone on
5429 * "this" side of the bridge, drop it.
5430 */
5431 if ((m->m_flags & (M_BCAST | M_MCAST)) == 0) {
5432 /* unicast */
5433 dst_if = bridge_rtlookup(sc, dst, vlan);
5434 if (src_if == dst_if) {
5435 goto drop;
5436 }
5437 } else {
5438 /* broadcast/multicast */
5439
5440 /*
5441 * Check if its a reserved multicast address, any address
5442 * listed in 802.1D section 7.12.6 may not be forwarded by the
5443 * bridge.
5444 * This is currently 01-80-C2-00-00-00 to 01-80-C2-00-00-0F
5445 */
5446 if (dst[0] == 0x01 && dst[1] == 0x80 &&
5447 dst[2] == 0xc2 && dst[3] == 0x00 &&
5448 dst[4] == 0x00 && dst[5] <= 0x0f) {
5449 goto drop;
5450 }
5451
5452
5453 /* ...forward it to all interfaces. */
5454 atomic_add_64(&bridge_ifp->if_imcasts, 1);
5455 dst_if = NULL;
5456 }
5457
5458 /*
5459 * If we have a destination interface which is a member of our bridge,
5460 * OR this is a unicast packet, push it through the bpf(4) machinery.
5461 * For broadcast or multicast packets, don't bother because it will
5462 * be reinjected into ether_input. We do this before we pass the packets
5463 * through the pfil(9) framework, as it is possible that pfil(9) will
5464 * drop the packet, or possibly modify it, making it difficult to debug
5465 * firewall issues on the bridge.
5466 */
5467 #if NBPFILTER > 0
5468 if (eh->ether_type == htons(ETHERTYPE_RSN_PREAUTH) ||
5469 dst_if != NULL || (m->m_flags & (M_BCAST | M_MCAST)) == 0) {
5470 m->m_pkthdr.rcvif = bridge_ifp;
5471 BRIDGE_BPF_MTAP_INPUT(sc, m);
5472 }
5473 #endif /* NBPFILTER */
5474
5475 if (dst_if == NULL) {
5476 /* bridge_broadcast will unlock */
5477 bridge_broadcast(sc, sbif, m, 1);
5478 return;
5479 }
5480
5481 /*
5482 * Unicast.
5483 */
5484 /*
5485 * At this point, we're dealing with a unicast frame
5486 * going to a different interface.
5487 */
5488 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5489 goto drop;
5490 }
5491
5492 dbif = bridge_lookup_member_if(sc, dst_if);
5493 if (dbif == NULL) {
5494 /* Not a member of the bridge (anymore?) */
5495 goto drop;
5496 }
5497
5498 /* Private segments can not talk to each other */
5499 if (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE) {
5500 goto drop;
5501 }
5502
5503 if ((dbif->bif_ifflags & IFBIF_STP) &&
5504 dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5505 goto drop;
5506 }
5507
5508 #if HAS_DHCPRA_MASK
5509 /* APPLE MODIFICATION <rdar:6985737> */
5510 if ((dst_if->if_extflags & IFEXTF_DHCPRA_MASK) != 0) {
5511 m = ip_xdhcpra_output(dst_if, m);
5512 if (!m) {
5513 ++bridge_ifp.if_xdhcpra;
5514 BRIDGE_UNLOCK(sc);
5515 return;
5516 }
5517 }
5518 #endif /* HAS_DHCPRA_MASK */
5519
5520 if (dbif == sc->sc_mac_nat_bif) {
5521 /* determine how to translate the packet */
5522 translate_mac
5523 = bridge_mac_nat_output(sc, sbif, &m, &mnr);
5524 if (m == NULL) {
5525 /* packet was deallocated */
5526 BRIDGE_UNLOCK(sc);
5527 return;
5528 }
5529 } else if (bif_has_checksum_offload(dbif) &&
5530 !bif_has_checksum_offload(sbif)) {
5531 /*
5532 * If the destination interface has checksum offload enabled,
5533 * verify the checksum now, unless the source interface also has
5534 * checksum offload enabled. The checksum in that case has
5535 * already just been computed and verifying it is unnecessary.
5536 */
5537 error = bridge_verify_checksum(&m, &dbif->bif_stats);
5538 if (error != 0) {
5539 BRIDGE_UNLOCK(sc);
5540 if (m != NULL) {
5541 m_freem(m);
5542 }
5543 return;
5544 }
5545 }
5546
5547 sc_filter_flags = sc->sc_filter_flags;
5548
5549 BRIDGE_UNLOCK(sc);
5550 if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
5551 if (bridge_pf(&m, dst_if, sc_filter_flags, FALSE) != 0) {
5552 return;
5553 }
5554 if (m == NULL) {
5555 return;
5556 }
5557 }
5558
5559 /* if we need to, translate the MAC address */
5560 if (translate_mac) {
5561 bridge_mac_nat_translate(&m, &mnr, IF_LLADDR(dst_if));
5562 }
5563 /*
5564 * We're forwarding an inbound packet in which the checksum must
5565 * already have been computed and if required, verified.
5566 */
5567 if (m != NULL) {
5568 (void) bridge_enqueue(bridge_ifp, src_if, dst_if, m,
5569 CHECKSUM_OPERATION_CLEAR_OFFLOAD);
5570 }
5571 return;
5572
5573 drop:
5574 BRIDGE_UNLOCK(sc);
5575 m_freem(m);
5576 }
5577
5578 static void
5579 inject_input_packet(ifnet_t ifp, mbuf_t m)
5580 {
5581 mbuf_pkthdr_setrcvif(m, ifp);
5582 mbuf_pkthdr_setheader(m, mbuf_data(m));
5583 mbuf_setdata(m, (char *)mbuf_data(m) + ETHER_HDR_LEN,
5584 mbuf_len(m) - ETHER_HDR_LEN);
5585 mbuf_pkthdr_adjustlen(m, -ETHER_HDR_LEN);
5586 m->m_flags |= M_PROTO1; /* set to avoid loops */
5587 dlil_input_packet_list(ifp, m);
5588 return;
5589 }
5590
5591 static bool
5592 in_addr_is_ours(struct in_addr ip)
5593 {
5594 struct in_ifaddr *ia;
5595 bool ours = false;
5596
5597 lck_rw_lock_shared(&in_ifaddr_rwlock);
5598 TAILQ_FOREACH(ia, INADDR_HASH(ip.s_addr), ia_hash) {
5599 if (IA_SIN(ia)->sin_addr.s_addr == ip.s_addr) {
5600 ours = true;
5601 break;
5602 }
5603 }
5604 lck_rw_done(&in_ifaddr_rwlock);
5605 return ours;
5606 }
5607
5608 static bool
5609 in6_addr_is_ours(const struct in6_addr * ip6_p, uint32_t ifscope)
5610 {
5611 struct in6_ifaddr *ia6;
5612 bool ours = false;
5613
5614 if (in6_embedded_scope && IN6_IS_ADDR_LINKLOCAL(ip6_p)) {
5615 struct in6_addr dst_ip;
5616
5617 /* need to embed scope ID for comparison */
5618 bcopy(ip6_p, &dst_ip, sizeof(dst_ip));
5619 dst_ip.s6_addr16[1] = htons(ifscope);
5620 ip6_p = &dst_ip;
5621 }
5622 lck_rw_lock_shared(&in6_ifaddr_rwlock);
5623 TAILQ_FOREACH(ia6, IN6ADDR_HASH(ip6_p), ia6_hash) {
5624 if (in6_are_addr_equal_scoped(&ia6->ia_addr.sin6_addr, ip6_p,
5625 ia6->ia_addr.sin6_scope_id, ifscope)) {
5626 ours = true;
5627 break;
5628 }
5629 }
5630 lck_rw_done(&in6_ifaddr_rwlock);
5631 return ours;
5632 }
5633
5634 static void
5635 bridge_interface_input(ifnet_t bridge_ifp, mbuf_t m,
5636 bpf_packet_func bpf_input_func)
5637 {
5638 size_t byte_count;
5639 struct ether_header *eh;
5640 errno_t error;
5641 bool is_ipv4;
5642 int len;
5643 u_int mac_hlen;
5644 int pkt_count;
5645
5646 /* segment large packets before sending them up */
5647 if (if_bridge_segmentation == 0) {
5648 goto done;
5649 }
5650 len = m->m_pkthdr.len;
5651 if (len <= (bridge_ifp->if_mtu + ETHER_HDR_LEN)) {
5652 goto done;
5653 }
5654 eh = mtod(m, struct ether_header *);
5655 if (!ether_header_type_is_ip(eh, &is_ipv4)) {
5656 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5657 "large non IPv4/IPv6 packet");
5658 goto done;
5659 }
5660
5661 /*
5662 * We have a large IPv4/IPv6 TCP packet. Segment it if required.
5663 *
5664 * If gso_tcp() returns success (0), the packet(s) are
5665 * ready to be passed up. If the destination is a local IP address,
5666 * the packet will be passed up as a large, single packet.
5667 *
5668 * If gso_tcp() returns an error, the packet has already
5669 * been freed.
5670 */
5671 mac_hlen = sizeof(*eh);
5672 error = gso_tcp(bridge_ifp, &m, mac_hlen, is_ipv4, FALSE);
5673 if (error != 0) {
5674 return;
5675 }
5676
5677 done:
5678 pkt_count = 0;
5679 byte_count = 0;
5680 for (mbuf_t scan = m; scan != NULL; scan = scan->m_nextpkt) {
5681 /* Mark the packet as arriving on the bridge interface */
5682 mbuf_pkthdr_setrcvif(scan, bridge_ifp);
5683 mbuf_pkthdr_setheader(scan, mbuf_data(scan));
5684 if (bpf_input_func != NULL) {
5685 (*bpf_input_func)(bridge_ifp, scan);
5686 }
5687 mbuf_setdata(scan, (char *)mbuf_data(scan) + ETHER_HDR_LEN,
5688 mbuf_len(scan) - ETHER_HDR_LEN);
5689 mbuf_pkthdr_adjustlen(scan, -ETHER_HDR_LEN);
5690 byte_count += mbuf_pkthdr_len(scan);
5691 pkt_count++;
5692 }
5693 (void)ifnet_stat_increment_in(bridge_ifp, pkt_count, byte_count, 0);
5694 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
5695 "%s %d packet(s) %ld bytes",
5696 bridge_ifp->if_xname, pkt_count, byte_count);
5697 dlil_input_packet_list(bridge_ifp, m);
5698 return;
5699 }
5700
5701 static bool
5702 is_our_ip(ip_packet_info_t info_p, uint32_t ifscope)
5703 {
5704 bool ours;
5705
5706 if (info_p->ip_is_ipv4) {
5707 struct in_addr dst_ip;
5708
5709 bcopy(&info_p->ip_hdr.ip->ip_dst, &dst_ip, sizeof(dst_ip));
5710 ours = in_addr_is_ours(dst_ip);
5711 } else {
5712 ours = in6_addr_is_ours(&info_p->ip_hdr.ip6->ip6_dst, ifscope);
5713 }
5714 return ours;
5715 }
5716
5717 static inline errno_t
5718 bridge_vmnet_tag_input(ifnet_t bridge_ifp, ifnet_t ifp,
5719 const u_char * ether_dhost, mbuf_t *mp,
5720 bool is_broadcast, bool is_ip, bool is_ipv4,
5721 ip_packet_info * info_p, struct bripstats * stats_p,
5722 bool *info_initialized)
5723 {
5724 errno_t error = 0;
5725 bool is_local = false;
5726 struct pf_mtag *pf_mtag;
5727 u_int16_t tag = vmnet_tag;
5728
5729 *info_initialized = false;
5730 if (is_broadcast) {
5731 if (_ether_cmp(ether_dhost, etherbroadcastaddr) == 0) {
5732 tag = vmnet_broadcast_tag;
5733 } else {
5734 tag = vmnet_multicast_tag;
5735 }
5736 } else if (is_ip) {
5737 unsigned int mac_hlen = sizeof(struct ether_header);
5738
5739 bzero(stats_p, sizeof(*stats_p));
5740 *info_initialized = true;
5741 error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, info_p,
5742 stats_p);
5743 if (error != 0) {
5744 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_INPUT,
5745 "%s(%s) bridge_get_ip_proto failed %d",
5746 bridge_ifp->if_xname,
5747 ifp->if_xname, error);
5748 if (*mp == NULL) {
5749 return EJUSTRETURN;
5750 }
5751 } else {
5752 is_local = is_our_ip(info_p, bridge_ifp->if_index);
5753 if (is_local) {
5754 tag = vmnet_local_tag;
5755 }
5756 }
5757 }
5758 pf_mtag = pf_get_mtag(*mp);
5759 if (pf_mtag != NULL) {
5760 pf_mtag->pftag_tag = tag;
5761 }
5762 #if DEBUG || DEVELOPMENT
5763 {
5764 bool forced;
5765
5766 BRIDGE_ERROR_GET_FORCED(forced, BRIDGE_FORCE_ONE);
5767 if (forced) {
5768 m_freem(*mp);
5769 *mp = NULL;
5770 error = EJUSTRETURN;
5771 goto done;
5772 }
5773 BRIDGE_ERROR_GET_FORCED(forced, BRIDGE_FORCE_TWO);
5774 if (forced) {
5775 error = _EBADIP;
5776 goto done;
5777 }
5778 }
5779 done:
5780 #endif /* DEBUG || DEVELOPMENT */
5781 return error;
5782 }
5783
5784 static void
5785 bripstats_apply(struct bripstats *dst_p, const struct bripstats *src_p)
5786 {
5787 dst_p->bips_ip += src_p->bips_ip;
5788 dst_p->bips_ip6 += src_p->bips_ip6;
5789 dst_p->bips_udp += src_p->bips_udp;
5790 dst_p->bips_tcp += src_p->bips_tcp;
5791
5792 dst_p->bips_bad_ip += src_p->bips_bad_ip;
5793 dst_p->bips_bad_ip6 += src_p->bips_bad_ip6;
5794 dst_p->bips_bad_udp += src_p->bips_bad_udp;
5795 dst_p->bips_bad_tcp += src_p->bips_bad_tcp;
5796 }
5797
5798 static void
5799 bridge_bripstats_apply(ifnet_t ifp, const struct bripstats *stats_p)
5800 {
5801 struct bridge_iflist *bif;
5802 struct bridge_softc *sc = ifp->if_bridge;
5803
5804 BRIDGE_LOCK(sc);
5805 bif = bridge_lookup_member_if(sc, ifp);
5806 if (bif == NULL) {
5807 goto done;
5808 }
5809 if (!bif_has_checksum_offload(bif)) {
5810 goto done;
5811 }
5812 bripstats_apply(&bif->bif_stats.brms_in_ip, stats_p);
5813
5814 done:
5815 BRIDGE_UNLOCK(sc);
5816 return;
5817 }
5818
5819 /*
5820 * bridge_input:
5821 *
5822 * Filter input from a member interface. Queue the packet for
5823 * bridging if it is not for us.
5824 */
5825 errno_t
5826 bridge_input(struct ifnet *ifp, mbuf_t *data)
5827 {
5828 struct bridge_softc *sc = ifp->if_bridge;
5829 struct bridge_iflist *bif, *bif2;
5830 struct ether_header eh_in;
5831 bool is_ip = false;
5832 bool is_ipv4 = false;
5833 ifnet_t bridge_ifp;
5834 struct mbuf *mc, *mc2;
5835 unsigned int mac_hlen = sizeof(struct ether_header);
5836 uint16_t vlan;
5837 errno_t error;
5838 ip_packet_info info;
5839 struct bripstats stats;
5840 bool info_initialized = false;
5841 errno_t ip_packet_error = 0;
5842 bool is_broadcast;
5843 bool is_ip_broadcast = false;
5844 bool is_ifp_mac = false;
5845 mbuf_t m = *data;
5846 uint32_t sc_filter_flags = 0;
5847
5848 bridge_ifp = sc->sc_ifp;
5849 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
5850 "%s from %s m 0x%llx data 0x%llx",
5851 bridge_ifp->if_xname, ifp->if_xname,
5852 (uint64_t)VM_KERNEL_ADDRPERM(m),
5853 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)));
5854 if ((sc->sc_ifp->if_flags & IFF_RUNNING) == 0) {
5855 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
5856 "%s not running passing along",
5857 bridge_ifp->if_xname);
5858 return 0;
5859 }
5860
5861 vlan = VLANTAGOF(m);
5862
5863 #ifdef IFF_MONITOR
5864 /*
5865 * Implement support for bridge monitoring. If this flag has been
5866 * set on this interface, discard the packet once we push it through
5867 * the bpf(4) machinery, but before we do, increment the byte and
5868 * packet counters associated with this interface.
5869 */
5870 if ((bridge_ifp->if_flags & IFF_MONITOR) != 0) {
5871 m->m_pkthdr.rcvif = bridge_ifp;
5872 BRIDGE_BPF_MTAP_INPUT(sc, m);
5873 (void) ifnet_stat_increment_in(bridge_ifp, 1, m->m_pkthdr.len, 0);
5874 *data = NULL;
5875 m_freem(m);
5876 return EJUSTRETURN;
5877 }
5878 #endif /* IFF_MONITOR */
5879
5880 is_broadcast = (m->m_flags & (M_BCAST | M_MCAST)) != 0;
5881
5882 /*
5883 * Need to clear the promiscuous flag otherwise it will be
5884 * dropped by DLIL after processing filters
5885 */
5886 if ((mbuf_flags(m) & MBUF_PROMISC)) {
5887 mbuf_setflags_mask(m, 0, MBUF_PROMISC);
5888 }
5889
5890 /* copy the ethernet header */
5891 eh_in = *(mtod(m, struct ether_header *));
5892
5893 is_ip = ether_header_type_is_ip(&eh_in, &is_ipv4);
5894
5895 if (if_bridge_vmnet_pf_tagging != 0 && IFNET_IS_VMNET(ifp)) {
5896 /* tag packets coming from VMNET interfaces */
5897 ip_packet_error = bridge_vmnet_tag_input(bridge_ifp, ifp,
5898 eh_in.ether_dhost, data, is_broadcast, is_ip, is_ipv4,
5899 &info, &stats, &info_initialized);
5900 m = *data;
5901 if (m == NULL) {
5902 bridge_bripstats_apply(ifp, &stats);
5903 return EJUSTRETURN;
5904 }
5905 }
5906
5907 sc_filter_flags = sc->sc_filter_flags;
5908 if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
5909 error = bridge_pf(data, ifp, sc_filter_flags, TRUE);
5910 m = *data;
5911 if (error != 0 || m == NULL) {
5912 return EJUSTRETURN;
5913 }
5914 }
5915
5916 BRIDGE_LOCK(sc);
5917 bif = bridge_lookup_member_if(sc, ifp);
5918 if (bif == NULL) {
5919 BRIDGE_UNLOCK(sc);
5920 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
5921 "%s bridge_lookup_member_if failed",
5922 bridge_ifp->if_xname);
5923 return 0;
5924 }
5925 if (is_ip && bif_has_checksum_offload(bif)) {
5926 if (info_initialized) {
5927 bripstats_apply(&bif->bif_stats.brms_in_ip, &stats);
5928 } else {
5929 error = bridge_get_ip_proto(data, mac_hlen, is_ipv4,
5930 &info, &bif->bif_stats.brms_in_ip);
5931 if (error != 0) {
5932 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
5933 "%s(%s) bridge_get_ip_proto failed %d",
5934 bridge_ifp->if_xname,
5935 bif->bif_ifp->if_xname, error);
5936 ip_packet_error = error;
5937 }
5938 }
5939 if (ip_packet_error == 0) {
5940 /* need to compute IP/UDP/TCP/checksums */
5941 error = bridge_offload_checksum(data, &info,
5942 &bif->bif_stats);
5943 if (error != 0) {
5944 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
5945 "%s(%s) bridge_offload_checksum failed %d",
5946 bridge_ifp->if_xname,
5947 bif->bif_ifp->if_xname, error);
5948 ip_packet_error = error;
5949 }
5950 }
5951 if (ip_packet_error != 0) {
5952 BRIDGE_UNLOCK(sc);
5953 if (*data != NULL) {
5954 m_freem(*data);
5955 *data = NULL;
5956 }
5957 return EJUSTRETURN;
5958 }
5959 m = *data;
5960 }
5961
5962 if (bif->bif_flags & BIFF_HOST_FILTER) {
5963 error = bridge_host_filter(bif, data);
5964 if (error != 0) {
5965 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
5966 "%s bridge_host_filter failed",
5967 bif->bif_ifp->if_xname);
5968 BRIDGE_UNLOCK(sc);
5969 return EJUSTRETURN;
5970 }
5971 m = *data;
5972 }
5973
5974 if (!is_broadcast &&
5975 _ether_cmp(eh_in.ether_dhost, IF_LLADDR(ifp)) == 0) {
5976 /* the packet is unicast to the interface's MAC address */
5977 if (is_ip && sc->sc_mac_nat_bif == bif) {
5978 /* doing MAC-NAT, check if destination is IP broadcast */
5979 is_ip_broadcast = is_broadcast_ip_packet(data);
5980 if (*data == NULL) {
5981 BRIDGE_UNLOCK(sc);
5982 return EJUSTRETURN;
5983 }
5984 m = *data;
5985 }
5986 if (!is_ip_broadcast) {
5987 is_ifp_mac = TRUE;
5988 }
5989 }
5990
5991 bridge_span(sc, m);
5992
5993 if (is_broadcast || is_ip_broadcast) {
5994 if (is_broadcast && (m->m_flags & M_MCAST) != 0) {
5995 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MCAST,
5996 " multicast: "
5997 "%02x:%02x:%02x:%02x:%02x:%02x",
5998 eh_in.ether_dhost[0], eh_in.ether_dhost[1],
5999 eh_in.ether_dhost[2], eh_in.ether_dhost[3],
6000 eh_in.ether_dhost[4], eh_in.ether_dhost[5]);
6001 }
6002 /* Tap off 802.1D packets; they do not get forwarded. */
6003 if (is_broadcast &&
6004 _ether_cmp(eh_in.ether_dhost, bstp_etheraddr) == 0) {
6005 #if BRIDGESTP
6006 m = bstp_input(&bif->bif_stp, ifp, m);
6007 #else /* !BRIDGESTP */
6008 m_freem(m);
6009 m = NULL;
6010 #endif /* !BRIDGESTP */
6011 if (m == NULL) {
6012 BRIDGE_UNLOCK(sc);
6013 return EJUSTRETURN;
6014 }
6015 }
6016
6017 if ((bif->bif_ifflags & IFBIF_STP) &&
6018 bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6019 BRIDGE_UNLOCK(sc);
6020 return 0;
6021 }
6022
6023 /*
6024 * Make a deep copy of the packet and enqueue the copy
6025 * for bridge processing.
6026 */
6027 mc = m_dup(m, M_DONTWAIT);
6028 if (mc == NULL) {
6029 BRIDGE_UNLOCK(sc);
6030 return 0;
6031 }
6032
6033 /*
6034 * Perform the bridge forwarding function with the copy.
6035 *
6036 * Note that bridge_forward calls BRIDGE_UNLOCK
6037 */
6038 if (is_ip_broadcast) {
6039 struct ether_header *eh;
6040
6041 /* make the copy look like it is actually broadcast */
6042 mc->m_flags |= M_BCAST;
6043 eh = mtod(mc, struct ether_header *);
6044 bcopy(etherbroadcastaddr, eh->ether_dhost,
6045 ETHER_ADDR_LEN);
6046 }
6047 bridge_forward(sc, bif, mc);
6048
6049 /*
6050 * Reinject the mbuf as arriving on the bridge so we have a
6051 * chance at claiming multicast packets. We can not loop back
6052 * here from ether_input as a bridge is never a member of a
6053 * bridge.
6054 */
6055 VERIFY(bridge_ifp->if_bridge == NULL);
6056 mc2 = m_dup(m, M_DONTWAIT);
6057 if (mc2 != NULL) {
6058 /* Keep the layer3 header aligned */
6059 int i = min(mc2->m_pkthdr.len, max_protohdr);
6060 mc2 = m_copyup(mc2, i, ETHER_ALIGN);
6061 }
6062 if (mc2 != NULL) {
6063 /* mark packet as arriving on the bridge */
6064 mc2->m_pkthdr.rcvif = bridge_ifp;
6065 mc2->m_pkthdr.pkt_hdr = mbuf_data(mc2);
6066 BRIDGE_BPF_MTAP_INPUT(sc, mc2);
6067 (void) mbuf_setdata(mc2,
6068 (char *)mbuf_data(mc2) + ETHER_HDR_LEN,
6069 mbuf_len(mc2) - ETHER_HDR_LEN);
6070 (void) mbuf_pkthdr_adjustlen(mc2, -ETHER_HDR_LEN);
6071 (void) ifnet_stat_increment_in(bridge_ifp, 1,
6072 mbuf_pkthdr_len(mc2), 0);
6073 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MCAST,
6074 "%s mcast for us", bridge_ifp->if_xname);
6075 dlil_input_packet_list(bridge_ifp, mc2);
6076 }
6077
6078 /* Return the original packet for local processing. */
6079 return 0;
6080 }
6081
6082 if ((bif->bif_ifflags & IFBIF_STP) &&
6083 bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6084 BRIDGE_UNLOCK(sc);
6085 return 0;
6086 }
6087
6088 #ifdef DEV_CARP
6089 #define CARP_CHECK_WE_ARE_DST(iface) \
6090 ((iface)->if_carp &&\
6091 carp_forus((iface)->if_carp, eh_in.ether_dhost))
6092 #define CARP_CHECK_WE_ARE_SRC(iface) \
6093 ((iface)->if_carp &&\
6094 carp_forus((iface)->if_carp, eh_in.ether_shost))
6095 #else
6096 #define CARP_CHECK_WE_ARE_DST(iface) 0
6097 #define CARP_CHECK_WE_ARE_SRC(iface) 0
6098 #endif
6099
6100 #define PFIL_HOOKED_INET6 PFIL_HOOKED(&inet6_pfil_hook)
6101
6102 #define PFIL_PHYS(sc, ifp, m)
6103
6104 #define GRAB_OUR_PACKETS(iface) \
6105 if ((iface)->if_type == IFT_GIF) \
6106 continue; \
6107 /* It is destined for us. */ \
6108 if (_ether_cmp(IF_LLADDR((iface)), eh_in.ether_dhost) == 0 || \
6109 CARP_CHECK_WE_ARE_DST((iface))) { \
6110 if ((iface)->if_type == IFT_BRIDGE) { \
6111 BRIDGE_BPF_MTAP_INPUT(sc, m); \
6112 /* Filter on the physical interface. */ \
6113 PFIL_PHYS(sc, iface, m); \
6114 } else { \
6115 bpf_tap_in(iface, DLT_EN10MB, m, NULL, 0); \
6116 } \
6117 if (bif->bif_ifflags & IFBIF_LEARNING) { \
6118 error = bridge_rtupdate(sc, eh_in.ether_shost, \
6119 vlan, bif, 0, IFBAF_DYNAMIC); \
6120 if (error && bif->bif_addrmax) { \
6121 BRIDGE_UNLOCK(sc); \
6122 m_freem(m); \
6123 return (EJUSTRETURN); \
6124 } \
6125 } \
6126 BRIDGE_UNLOCK(sc); \
6127 inject_input_packet(iface, m); \
6128 return (EJUSTRETURN); \
6129 } \
6130 \
6131 /* We just received a packet that we sent out. */ \
6132 if (_ether_cmp(IF_LLADDR((iface)), eh_in.ether_shost) == 0 || \
6133 CARP_CHECK_WE_ARE_SRC((iface))) { \
6134 BRIDGE_UNLOCK(sc); \
6135 m_freem(m); \
6136 return (EJUSTRETURN); \
6137 }
6138
6139 /*
6140 * Unicast.
6141 */
6142
6143 /* handle MAC-NAT if enabled */
6144 if (is_ifp_mac && sc->sc_mac_nat_bif == bif) {
6145 ifnet_t dst_if;
6146 boolean_t is_input = FALSE;
6147
6148 dst_if = bridge_mac_nat_input(sc, data, &is_input);
6149 m = *data;
6150 if (dst_if == ifp) {
6151 /* our input packet */
6152 } else if (dst_if != NULL || m == NULL) {
6153 BRIDGE_UNLOCK(sc);
6154 if (dst_if != NULL) {
6155 ASSERT(m != NULL);
6156 if (is_input) {
6157 inject_input_packet(dst_if, m);
6158 } else {
6159 (void)bridge_enqueue(bridge_ifp, NULL,
6160 dst_if, m,
6161 CHECKSUM_OPERATION_CLEAR_OFFLOAD);
6162 }
6163 }
6164 return EJUSTRETURN;
6165 }
6166 }
6167
6168 /*
6169 * If the packet is for the bridge, pass it up for local processing.
6170 */
6171 if (_ether_cmp(eh_in.ether_dhost, IF_LLADDR(bridge_ifp)) == 0 ||
6172 CARP_CHECK_WE_ARE_DST(bridge_ifp)) {
6173 bpf_packet_func bpf_input_func = sc->sc_bpf_input;
6174
6175 /*
6176 * If the interface is learning, and the source
6177 * address is valid and not multicast, record
6178 * the address.
6179 */
6180 if (bif->bif_ifflags & IFBIF_LEARNING) {
6181 (void) bridge_rtupdate(sc, eh_in.ether_shost,
6182 vlan, bif, 0, IFBAF_DYNAMIC);
6183 }
6184 BRIDGE_UNLOCK(sc);
6185
6186 bridge_interface_input(bridge_ifp, m, bpf_input_func);
6187 return EJUSTRETURN;
6188 }
6189
6190 /*
6191 * if the destination of the packet is for the MAC address of
6192 * the member interface itself, then we don't need to forward
6193 * it -- just pass it back. Note that it'll likely just be
6194 * dropped by the stack, but if something else is bound to
6195 * the interface directly (for example, the wireless stats
6196 * protocol -- although that actually uses BPF right now),
6197 * then it will consume the packet
6198 *
6199 * ALSO, note that we do this check AFTER checking for the
6200 * bridge's own MAC address, because the bridge may be
6201 * using the SAME MAC address as one of its interfaces
6202 */
6203 if (is_ifp_mac) {
6204
6205 #ifdef VERY_VERY_VERY_DIAGNOSTIC
6206 BRIDGE_LOG(LOG_NOTICE, 0,
6207 "not forwarding packet bound for member interface");
6208 #endif
6209
6210 BRIDGE_UNLOCK(sc);
6211 return 0;
6212 }
6213
6214 /* Now check the remaining bridge members. */
6215 TAILQ_FOREACH(bif2, &sc->sc_iflist, bif_next) {
6216 if (bif2->bif_ifp != ifp) {
6217 GRAB_OUR_PACKETS(bif2->bif_ifp);
6218 }
6219 }
6220
6221 #undef CARP_CHECK_WE_ARE_DST
6222 #undef CARP_CHECK_WE_ARE_SRC
6223 #undef GRAB_OUR_PACKETS
6224
6225 /*
6226 * Perform the bridge forwarding function.
6227 *
6228 * Note that bridge_forward calls BRIDGE_UNLOCK
6229 */
6230 bridge_forward(sc, bif, m);
6231
6232 return EJUSTRETURN;
6233 }
6234
6235 /*
6236 * bridge_broadcast:
6237 *
6238 * Send a frame to all interfaces that are members of
6239 * the bridge, except for the one on which the packet
6240 * arrived.
6241 *
6242 * NOTE: Releases the lock on return.
6243 */
6244 static void
6245 bridge_broadcast(struct bridge_softc *sc, struct bridge_iflist * sbif,
6246 struct mbuf *m, int runfilt)
6247 {
6248 ifnet_t bridge_ifp;
6249 struct bridge_iflist *dbif;
6250 struct ifnet * src_if;
6251 struct mbuf *mc;
6252 struct mbuf *mc_in;
6253 struct ifnet *dst_if;
6254 int error = 0, used = 0;
6255 boolean_t bridge_if_out;
6256 ChecksumOperation cksum_op;
6257 struct mac_nat_record mnr;
6258 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
6259 boolean_t translate_mac = FALSE;
6260 uint32_t sc_filter_flags = 0;
6261
6262 bridge_ifp = sc->sc_ifp;
6263 if (sbif != NULL) {
6264 bridge_if_out = FALSE;
6265 src_if = sbif->bif_ifp;
6266 cksum_op = CHECKSUM_OPERATION_CLEAR_OFFLOAD;
6267 if (mac_nat_bif != NULL && sbif != mac_nat_bif) {
6268 /* get the translation record while holding the lock */
6269 translate_mac
6270 = bridge_mac_nat_output(sc, sbif, &m, &mnr);
6271 if (m == NULL) {
6272 /* packet was deallocated */
6273 BRIDGE_UNLOCK(sc);
6274 return;
6275 }
6276 }
6277 } else {
6278 /*
6279 * sbif is NULL when the bridge interface calls
6280 * bridge_broadcast().
6281 */
6282 bridge_if_out = TRUE;
6283 cksum_op = CHECKSUM_OPERATION_FINALIZE;
6284 sbif = NULL;
6285 src_if = NULL;
6286 }
6287
6288 BRIDGE_LOCK2REF(sc, error);
6289 if (error) {
6290 m_freem(m);
6291 return;
6292 }
6293
6294 TAILQ_FOREACH(dbif, &sc->sc_iflist, bif_next) {
6295 dst_if = dbif->bif_ifp;
6296 if (dst_if == src_if) {
6297 /* skip the interface that the packet came in on */
6298 continue;
6299 }
6300
6301 /* Private segments can not talk to each other */
6302 if (sbif != NULL &&
6303 (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE)) {
6304 continue;
6305 }
6306
6307 if ((dbif->bif_ifflags & IFBIF_STP) &&
6308 dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6309 continue;
6310 }
6311
6312 if ((dbif->bif_ifflags & IFBIF_DISCOVER) == 0 &&
6313 (m->m_flags & (M_BCAST | M_MCAST)) == 0) {
6314 continue;
6315 }
6316
6317 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6318 continue;
6319 }
6320
6321 if (!(dbif->bif_flags & BIFF_MEDIA_ACTIVE)) {
6322 continue;
6323 }
6324
6325 if (TAILQ_NEXT(dbif, bif_next) == NULL) {
6326 mc = m;
6327 used = 1;
6328 } else {
6329 mc = m_dup(m, M_DONTWAIT);
6330 if (mc == NULL) {
6331 (void) ifnet_stat_increment_out(bridge_ifp,
6332 0, 0, 1);
6333 continue;
6334 }
6335 }
6336
6337 /*
6338 * If broadcast input is enabled, do so only if this
6339 * is an input packet.
6340 */
6341 if (!bridge_if_out &&
6342 (dbif->bif_flags & BIFF_INPUT_BROADCAST) != 0) {
6343 mc_in = m_dup(mc, M_DONTWAIT);
6344 /* this could fail, but we continue anyways */
6345 } else {
6346 mc_in = NULL;
6347 }
6348
6349 /* out */
6350 if (translate_mac && mac_nat_bif == dbif) {
6351 /* translate the packet without holding the lock */
6352 bridge_mac_nat_translate(&mc, &mnr, IF_LLADDR(dst_if));
6353 }
6354
6355 sc_filter_flags = sc->sc_filter_flags;
6356 if (runfilt &&
6357 PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6358 if (used == 0) {
6359 /* Keep the layer3 header aligned */
6360 int i = min(mc->m_pkthdr.len, max_protohdr);
6361 mc = m_copyup(mc, i, ETHER_ALIGN);
6362 if (mc == NULL) {
6363 (void) ifnet_stat_increment_out(
6364 sc->sc_ifp, 0, 0, 1);
6365 if (mc_in != NULL) {
6366 m_freem(mc_in);
6367 mc_in = NULL;
6368 }
6369 continue;
6370 }
6371 }
6372 if (bridge_pf(&mc, dst_if, sc_filter_flags, FALSE) != 0) {
6373 if (mc_in != NULL) {
6374 m_freem(mc_in);
6375 mc_in = NULL;
6376 }
6377 continue;
6378 }
6379 if (mc == NULL) {
6380 if (mc_in != NULL) {
6381 m_freem(mc_in);
6382 mc_in = NULL;
6383 }
6384 continue;
6385 }
6386 }
6387
6388 if (mc != NULL) {
6389 /* verify checksum if necessary */
6390 if (bif_has_checksum_offload(dbif) && sbif != NULL &&
6391 !bif_has_checksum_offload(sbif)) {
6392 error = bridge_verify_checksum(&mc,
6393 &dbif->bif_stats);
6394 if (error != 0) {
6395 if (mc != NULL) {
6396 m_freem(mc);
6397 }
6398 mc = NULL;
6399 }
6400 }
6401 if (mc != NULL) {
6402 (void) bridge_enqueue(bridge_ifp,
6403 NULL, dst_if, mc, cksum_op);
6404 }
6405 }
6406
6407 /* in */
6408 if (mc_in == NULL) {
6409 continue;
6410 }
6411 bpf_tap_in(dst_if, DLT_EN10MB, mc_in, NULL, 0);
6412 mbuf_pkthdr_setrcvif(mc_in, dst_if);
6413 mbuf_pkthdr_setheader(mc_in, mbuf_data(mc_in));
6414 mbuf_setdata(mc_in, (char *)mbuf_data(mc_in) + ETHER_HDR_LEN,
6415 mbuf_len(mc_in) - ETHER_HDR_LEN);
6416 mbuf_pkthdr_adjustlen(mc_in, -ETHER_HDR_LEN);
6417 mc_in->m_flags |= M_PROTO1; /* set to avoid loops */
6418 dlil_input_packet_list(dst_if, mc_in);
6419 }
6420 if (used == 0) {
6421 m_freem(m);
6422 }
6423
6424
6425 BRIDGE_UNREF(sc);
6426 }
6427
6428 /*
6429 * bridge_span:
6430 *
6431 * Duplicate a packet out one or more interfaces that are in span mode,
6432 * the original mbuf is unmodified.
6433 */
6434 static void
6435 bridge_span(struct bridge_softc *sc, struct mbuf *m)
6436 {
6437 struct bridge_iflist *bif;
6438 struct ifnet *dst_if;
6439 struct mbuf *mc;
6440
6441 if (TAILQ_EMPTY(&sc->sc_spanlist)) {
6442 return;
6443 }
6444
6445 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
6446 dst_if = bif->bif_ifp;
6447
6448 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6449 continue;
6450 }
6451
6452 mc = m_copypacket(m, M_DONTWAIT);
6453 if (mc == NULL) {
6454 (void) ifnet_stat_increment_out(sc->sc_ifp, 0, 0, 1);
6455 continue;
6456 }
6457
6458 (void) bridge_enqueue(sc->sc_ifp, NULL, dst_if, mc,
6459 CHECKSUM_OPERATION_NONE);
6460 }
6461 }
6462
6463
6464 /*
6465 * bridge_rtupdate:
6466 *
6467 * Add a bridge routing entry.
6468 */
6469 static int
6470 bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst, uint16_t vlan,
6471 struct bridge_iflist *bif, int setflags, uint8_t flags)
6472 {
6473 struct bridge_rtnode *brt;
6474 int error;
6475
6476 BRIDGE_LOCK_ASSERT_HELD(sc);
6477
6478 /* Check the source address is valid and not multicast. */
6479 if (ETHER_IS_MULTICAST(dst) ||
6480 (dst[0] == 0 && dst[1] == 0 && dst[2] == 0 &&
6481 dst[3] == 0 && dst[4] == 0 && dst[5] == 0) != 0) {
6482 return EINVAL;
6483 }
6484
6485
6486 /* 802.1p frames map to vlan 1 */
6487 if (vlan == 0) {
6488 vlan = 1;
6489 }
6490
6491 /*
6492 * A route for this destination might already exist. If so,
6493 * update it, otherwise create a new one.
6494 */
6495 if ((brt = bridge_rtnode_lookup(sc, dst, vlan)) == NULL) {
6496 if (sc->sc_brtcnt >= sc->sc_brtmax) {
6497 sc->sc_brtexceeded++;
6498 return ENOSPC;
6499 }
6500 /* Check per interface address limits (if enabled) */
6501 if (bif->bif_addrmax && bif->bif_addrcnt >= bif->bif_addrmax) {
6502 bif->bif_addrexceeded++;
6503 return ENOSPC;
6504 }
6505
6506 /*
6507 * Allocate a new bridge forwarding node, and
6508 * initialize the expiration time and Ethernet
6509 * address.
6510 */
6511 brt = zalloc_noblock(bridge_rtnode_pool);
6512 if (brt == NULL) {
6513 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6514 "zalloc_nolock failed");
6515 return ENOMEM;
6516 }
6517 bzero(brt, sizeof(struct bridge_rtnode));
6518
6519 if (bif->bif_ifflags & IFBIF_STICKY) {
6520 brt->brt_flags = IFBAF_STICKY;
6521 } else {
6522 brt->brt_flags = IFBAF_DYNAMIC;
6523 }
6524
6525 memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
6526 brt->brt_vlan = vlan;
6527
6528
6529 if ((error = bridge_rtnode_insert(sc, brt)) != 0) {
6530 zfree(bridge_rtnode_pool, brt);
6531 return error;
6532 }
6533 brt->brt_dst = bif;
6534 bif->bif_addrcnt++;
6535 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6536 "added %02x:%02x:%02x:%02x:%02x:%02x "
6537 "on %s count %u hashsize %u",
6538 dst[0], dst[1], dst[2], dst[3], dst[4], dst[5],
6539 sc->sc_ifp->if_xname, sc->sc_brtcnt,
6540 sc->sc_rthash_size);
6541 }
6542
6543 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
6544 brt->brt_dst != bif) {
6545 brt->brt_dst->bif_addrcnt--;
6546 brt->brt_dst = bif;
6547 brt->brt_dst->bif_addrcnt++;
6548 }
6549
6550 if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6551 unsigned long now;
6552
6553 now = (unsigned long) net_uptime();
6554 brt->brt_expire = now + sc->sc_brttimeout;
6555 }
6556 if (setflags) {
6557 brt->brt_flags = flags;
6558 }
6559
6560
6561 return 0;
6562 }
6563
6564 /*
6565 * bridge_rtlookup:
6566 *
6567 * Lookup the destination interface for an address.
6568 */
6569 static struct ifnet *
6570 bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
6571 {
6572 struct bridge_rtnode *brt;
6573
6574 BRIDGE_LOCK_ASSERT_HELD(sc);
6575
6576 if ((brt = bridge_rtnode_lookup(sc, addr, vlan)) == NULL) {
6577 return NULL;
6578 }
6579
6580 return brt->brt_ifp;
6581 }
6582
6583 /*
6584 * bridge_rttrim:
6585 *
6586 * Trim the routine table so that we have a number
6587 * of routing entries less than or equal to the
6588 * maximum number.
6589 */
6590 static void
6591 bridge_rttrim(struct bridge_softc *sc)
6592 {
6593 struct bridge_rtnode *brt, *nbrt;
6594
6595 BRIDGE_LOCK_ASSERT_HELD(sc);
6596
6597 /* Make sure we actually need to do this. */
6598 if (sc->sc_brtcnt <= sc->sc_brtmax) {
6599 return;
6600 }
6601
6602 /* Force an aging cycle; this might trim enough addresses. */
6603 bridge_rtage(sc);
6604 if (sc->sc_brtcnt <= sc->sc_brtmax) {
6605 return;
6606 }
6607
6608 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6609 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6610 bridge_rtnode_destroy(sc, brt);
6611 if (sc->sc_brtcnt <= sc->sc_brtmax) {
6612 return;
6613 }
6614 }
6615 }
6616 }
6617
6618 /*
6619 * bridge_aging_timer:
6620 *
6621 * Aging periodic timer for the bridge routing table.
6622 */
6623 static void
6624 bridge_aging_timer(struct bridge_softc *sc)
6625 {
6626 BRIDGE_LOCK_ASSERT_HELD(sc);
6627
6628 bridge_rtage(sc);
6629 if ((sc->sc_ifp->if_flags & IFF_RUNNING) &&
6630 (sc->sc_flags & SCF_DETACHING) == 0) {
6631 sc->sc_aging_timer.bdc_sc = sc;
6632 sc->sc_aging_timer.bdc_func = bridge_aging_timer;
6633 sc->sc_aging_timer.bdc_ts.tv_sec = bridge_rtable_prune_period;
6634 bridge_schedule_delayed_call(&sc->sc_aging_timer);
6635 }
6636 }
6637
6638 /*
6639 * bridge_rtage:
6640 *
6641 * Perform an aging cycle.
6642 */
6643 static void
6644 bridge_rtage(struct bridge_softc *sc)
6645 {
6646 struct bridge_rtnode *brt, *nbrt;
6647 unsigned long now;
6648
6649 BRIDGE_LOCK_ASSERT_HELD(sc);
6650
6651 now = (unsigned long) net_uptime();
6652
6653 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6654 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6655 if (now >= brt->brt_expire) {
6656 bridge_rtnode_destroy(sc, brt);
6657 }
6658 }
6659 }
6660 if (sc->sc_mac_nat_bif != NULL) {
6661 bridge_mac_nat_age_entries(sc, now);
6662 }
6663 }
6664
6665 /*
6666 * bridge_rtflush:
6667 *
6668 * Remove all dynamic addresses from the bridge.
6669 */
6670 static void
6671 bridge_rtflush(struct bridge_softc *sc, int full)
6672 {
6673 struct bridge_rtnode *brt, *nbrt;
6674
6675 BRIDGE_LOCK_ASSERT_HELD(sc);
6676
6677 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6678 if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6679 bridge_rtnode_destroy(sc, brt);
6680 }
6681 }
6682 }
6683
6684 /*
6685 * bridge_rtdaddr:
6686 *
6687 * Remove an address from the table.
6688 */
6689 static int
6690 bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
6691 {
6692 struct bridge_rtnode *brt;
6693 int found = 0;
6694
6695 BRIDGE_LOCK_ASSERT_HELD(sc);
6696
6697 /*
6698 * If vlan is zero then we want to delete for all vlans so the lookup
6699 * may return more than one.
6700 */
6701 while ((brt = bridge_rtnode_lookup(sc, addr, vlan)) != NULL) {
6702 bridge_rtnode_destroy(sc, brt);
6703 found = 1;
6704 }
6705
6706 return found ? 0 : ENOENT;
6707 }
6708
6709 /*
6710 * bridge_rtdelete:
6711 *
6712 * Delete routes to a specific member interface.
6713 */
6714 static void
6715 bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int full)
6716 {
6717 struct bridge_rtnode *brt, *nbrt;
6718
6719 BRIDGE_LOCK_ASSERT_HELD(sc);
6720
6721 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6722 if (brt->brt_ifp == ifp && (full ||
6723 (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)) {
6724 bridge_rtnode_destroy(sc, brt);
6725 }
6726 }
6727 }
6728
6729 /*
6730 * bridge_rtable_init:
6731 *
6732 * Initialize the route table for this bridge.
6733 */
6734 static int
6735 bridge_rtable_init(struct bridge_softc *sc)
6736 {
6737 u_int32_t i;
6738
6739 sc->sc_rthash = kalloc_type(struct _bridge_rtnode_list,
6740 BRIDGE_RTHASH_SIZE, Z_WAITOK_ZERO_NOFAIL);
6741 sc->sc_rthash_size = BRIDGE_RTHASH_SIZE;
6742
6743 for (i = 0; i < sc->sc_rthash_size; i++) {
6744 LIST_INIT(&sc->sc_rthash[i]);
6745 }
6746
6747 sc->sc_rthash_key = RandomULong();
6748
6749 LIST_INIT(&sc->sc_rtlist);
6750
6751 return 0;
6752 }
6753
6754 /*
6755 * bridge_rthash_delayed_resize:
6756 *
6757 * Resize the routing table hash on a delayed thread call.
6758 */
6759 static void
6760 bridge_rthash_delayed_resize(struct bridge_softc *sc)
6761 {
6762 u_int32_t new_rthash_size = 0;
6763 u_int32_t old_rthash_size = 0;
6764 struct _bridge_rtnode_list *new_rthash = NULL;
6765 struct _bridge_rtnode_list *old_rthash = NULL;
6766 u_int32_t i;
6767 struct bridge_rtnode *brt;
6768 int error = 0;
6769
6770 BRIDGE_LOCK_ASSERT_HELD(sc);
6771
6772 /*
6773 * Four entries per hash bucket is our ideal load factor
6774 */
6775 if (sc->sc_brtcnt < sc->sc_rthash_size * 4) {
6776 goto out;
6777 }
6778
6779 /*
6780 * Doubling the number of hash buckets may be too simplistic
6781 * especially when facing a spike of new entries
6782 */
6783 new_rthash_size = sc->sc_rthash_size * 2;
6784
6785 sc->sc_flags |= SCF_RESIZING;
6786 BRIDGE_UNLOCK(sc);
6787
6788 new_rthash = kalloc_type(struct _bridge_rtnode_list, new_rthash_size,
6789 Z_WAITOK | Z_ZERO);
6790
6791 BRIDGE_LOCK(sc);
6792 sc->sc_flags &= ~SCF_RESIZING;
6793
6794 if (new_rthash == NULL) {
6795 error = ENOMEM;
6796 goto out;
6797 }
6798 if ((sc->sc_flags & SCF_DETACHING)) {
6799 error = ENODEV;
6800 goto out;
6801 }
6802 /*
6803 * Fail safe from here on
6804 */
6805 old_rthash = sc->sc_rthash;
6806 old_rthash_size = sc->sc_rthash_size;
6807 sc->sc_rthash = new_rthash;
6808 sc->sc_rthash_size = new_rthash_size;
6809
6810 /*
6811 * Get a new key to force entries to be shuffled around to reduce
6812 * the likelihood they will land in the same buckets
6813 */
6814 sc->sc_rthash_key = RandomULong();
6815
6816 for (i = 0; i < sc->sc_rthash_size; i++) {
6817 LIST_INIT(&sc->sc_rthash[i]);
6818 }
6819
6820 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
6821 LIST_REMOVE(brt, brt_hash);
6822 (void) bridge_rtnode_hash(sc, brt);
6823 }
6824 out:
6825 if (error == 0) {
6826 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6827 "%s new size %u",
6828 sc->sc_ifp->if_xname, sc->sc_rthash_size);
6829 kfree_type(struct _bridge_rtnode_list, old_rthash_size, old_rthash);
6830 } else {
6831 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_RT_TABLE,
6832 "%s failed %d", sc->sc_ifp->if_xname, error);
6833 kfree_type(struct _bridge_rtnode_list, new_rthash_size, new_rthash);
6834 }
6835 }
6836
6837 /*
6838 * Resize the number of hash buckets based on the load factor
6839 * Currently only grow
6840 * Failing to resize the hash table is not fatal
6841 */
6842 static void
6843 bridge_rthash_resize(struct bridge_softc *sc)
6844 {
6845 BRIDGE_LOCK_ASSERT_HELD(sc);
6846
6847 if ((sc->sc_flags & SCF_DETACHING) || (sc->sc_flags & SCF_RESIZING)) {
6848 return;
6849 }
6850
6851 /*
6852 * Four entries per hash bucket is our ideal load factor
6853 */
6854 if (sc->sc_brtcnt < sc->sc_rthash_size * 4) {
6855 return;
6856 }
6857 /*
6858 * Hard limit on the size of the routing hash table
6859 */
6860 if (sc->sc_rthash_size >= bridge_rtable_hash_size_max) {
6861 return;
6862 }
6863
6864 sc->sc_resize_call.bdc_sc = sc;
6865 sc->sc_resize_call.bdc_func = bridge_rthash_delayed_resize;
6866 bridge_schedule_delayed_call(&sc->sc_resize_call);
6867 }
6868
6869 /*
6870 * bridge_rtable_fini:
6871 *
6872 * Deconstruct the route table for this bridge.
6873 */
6874 static void
6875 bridge_rtable_fini(struct bridge_softc *sc)
6876 {
6877 KASSERT(sc->sc_brtcnt == 0,
6878 ("%s: %d bridge routes referenced", __func__, sc->sc_brtcnt));
6879 kfree_type(struct _bridge_rtnode_list, sc->sc_rthash_size,
6880 sc->sc_rthash);
6881 sc->sc_rthash = NULL;
6882 sc->sc_rthash_size = 0;
6883 }
6884
6885 /*
6886 * The following hash function is adapted from "Hash Functions" by Bob Jenkins
6887 * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
6888 */
6889 #define mix(a, b, c) \
6890 do { \
6891 a -= b; a -= c; a ^= (c >> 13); \
6892 b -= c; b -= a; b ^= (a << 8); \
6893 c -= a; c -= b; c ^= (b >> 13); \
6894 a -= b; a -= c; a ^= (c >> 12); \
6895 b -= c; b -= a; b ^= (a << 16); \
6896 c -= a; c -= b; c ^= (b >> 5); \
6897 a -= b; a -= c; a ^= (c >> 3); \
6898 b -= c; b -= a; b ^= (a << 10); \
6899 c -= a; c -= b; c ^= (b >> 15); \
6900 } while ( /*CONSTCOND*/ 0)
6901
6902 static __inline uint32_t
6903 bridge_rthash(struct bridge_softc *sc, const uint8_t *addr)
6904 {
6905 uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
6906
6907 b += addr[5] << 8;
6908 b += addr[4];
6909 a += addr[3] << 24;
6910 a += addr[2] << 16;
6911 a += addr[1] << 8;
6912 a += addr[0];
6913
6914 mix(a, b, c);
6915
6916 return c & BRIDGE_RTHASH_MASK(sc);
6917 }
6918
6919 #undef mix
6920
6921 static int
6922 bridge_rtnode_addr_cmp(const uint8_t *a, const uint8_t *b)
6923 {
6924 int i, d;
6925
6926 for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) {
6927 d = ((int)a[i]) - ((int)b[i]);
6928 }
6929
6930 return d;
6931 }
6932
6933 /*
6934 * bridge_rtnode_lookup:
6935 *
6936 * Look up a bridge route node for the specified destination. Compare the
6937 * vlan id or if zero then just return the first match.
6938 */
6939 static struct bridge_rtnode *
6940 bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr,
6941 uint16_t vlan)
6942 {
6943 struct bridge_rtnode *brt;
6944 uint32_t hash;
6945 int dir;
6946
6947 BRIDGE_LOCK_ASSERT_HELD(sc);
6948
6949 hash = bridge_rthash(sc, addr);
6950 LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) {
6951 dir = bridge_rtnode_addr_cmp(addr, brt->brt_addr);
6952 if (dir == 0 && (brt->brt_vlan == vlan || vlan == 0)) {
6953 return brt;
6954 }
6955 if (dir > 0) {
6956 return NULL;
6957 }
6958 }
6959
6960 return NULL;
6961 }
6962
6963 /*
6964 * bridge_rtnode_hash:
6965 *
6966 * Insert the specified bridge node into the route hash table.
6967 * This is used when adding a new node or to rehash when resizing
6968 * the hash table
6969 */
6970 static int
6971 bridge_rtnode_hash(struct bridge_softc *sc, struct bridge_rtnode *brt)
6972 {
6973 struct bridge_rtnode *lbrt;
6974 uint32_t hash;
6975 int dir;
6976
6977 BRIDGE_LOCK_ASSERT_HELD(sc);
6978
6979 hash = bridge_rthash(sc, brt->brt_addr);
6980
6981 lbrt = LIST_FIRST(&sc->sc_rthash[hash]);
6982 if (lbrt == NULL) {
6983 LIST_INSERT_HEAD(&sc->sc_rthash[hash], brt, brt_hash);
6984 goto out;
6985 }
6986
6987 do {
6988 dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr);
6989 if (dir == 0 && brt->brt_vlan == lbrt->brt_vlan) {
6990 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6991 "%s EEXIST %02x:%02x:%02x:%02x:%02x:%02x",
6992 sc->sc_ifp->if_xname,
6993 brt->brt_addr[0], brt->brt_addr[1],
6994 brt->brt_addr[2], brt->brt_addr[3],
6995 brt->brt_addr[4], brt->brt_addr[5]);
6996 return EEXIST;
6997 }
6998 if (dir > 0) {
6999 LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
7000 goto out;
7001 }
7002 if (LIST_NEXT(lbrt, brt_hash) == NULL) {
7003 LIST_INSERT_AFTER(lbrt, brt, brt_hash);
7004 goto out;
7005 }
7006 lbrt = LIST_NEXT(lbrt, brt_hash);
7007 } while (lbrt != NULL);
7008
7009 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
7010 "%s impossible %02x:%02x:%02x:%02x:%02x:%02x",
7011 sc->sc_ifp->if_xname,
7012 brt->brt_addr[0], brt->brt_addr[1], brt->brt_addr[2],
7013 brt->brt_addr[3], brt->brt_addr[4], brt->brt_addr[5]);
7014 out:
7015 return 0;
7016 }
7017
7018 /*
7019 * bridge_rtnode_insert:
7020 *
7021 * Insert the specified bridge node into the route table. We
7022 * assume the entry is not already in the table.
7023 */
7024 static int
7025 bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
7026 {
7027 int error;
7028
7029 error = bridge_rtnode_hash(sc, brt);
7030 if (error != 0) {
7031 return error;
7032 }
7033
7034 LIST_INSERT_HEAD(&sc->sc_rtlist, brt, brt_list);
7035 sc->sc_brtcnt++;
7036
7037 bridge_rthash_resize(sc);
7038
7039 return 0;
7040 }
7041
7042 /*
7043 * bridge_rtnode_destroy:
7044 *
7045 * Destroy a bridge rtnode.
7046 */
7047 static void
7048 bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
7049 {
7050 BRIDGE_LOCK_ASSERT_HELD(sc);
7051
7052 LIST_REMOVE(brt, brt_hash);
7053
7054 LIST_REMOVE(brt, brt_list);
7055 sc->sc_brtcnt--;
7056 brt->brt_dst->bif_addrcnt--;
7057 zfree(bridge_rtnode_pool, brt);
7058 }
7059
7060 #if BRIDGESTP
7061 /*
7062 * bridge_rtable_expire:
7063 *
7064 * Set the expiry time for all routes on an interface.
7065 */
7066 static void
7067 bridge_rtable_expire(struct ifnet *ifp, int age)
7068 {
7069 struct bridge_softc *sc = ifp->if_bridge;
7070 struct bridge_rtnode *brt;
7071
7072 BRIDGE_LOCK(sc);
7073
7074 /*
7075 * If the age is zero then flush, otherwise set all the expiry times to
7076 * age for the interface
7077 */
7078 if (age == 0) {
7079 bridge_rtdelete(sc, ifp, IFBF_FLUSHDYN);
7080 } else {
7081 unsigned long now;
7082
7083 now = (unsigned long) net_uptime();
7084
7085 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
7086 /* Cap the expiry time to 'age' */
7087 if (brt->brt_ifp == ifp &&
7088 brt->brt_expire > now + age &&
7089 (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
7090 brt->brt_expire = now + age;
7091 }
7092 }
7093 }
7094 BRIDGE_UNLOCK(sc);
7095 }
7096
7097 /*
7098 * bridge_state_change:
7099 *
7100 * Callback from the bridgestp code when a port changes states.
7101 */
7102 static void
7103 bridge_state_change(struct ifnet *ifp, int state)
7104 {
7105 struct bridge_softc *sc = ifp->if_bridge;
7106 static const char *stpstates[] = {
7107 "disabled",
7108 "listening",
7109 "learning",
7110 "forwarding",
7111 "blocking",
7112 "discarding"
7113 };
7114
7115 if (log_stp) {
7116 log(LOG_NOTICE, "%s: state changed to %s on %s",
7117 sc->sc_ifp->if_xname,
7118 stpstates[state], ifp->if_xname);
7119 }
7120 }
7121 #endif /* BRIDGESTP */
7122
7123 /*
7124 * bridge_set_bpf_tap:
7125 *
7126 * Sets ups the BPF callbacks.
7127 */
7128 static errno_t
7129 bridge_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func bpf_callback)
7130 {
7131 struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7132
7133 /* TBD locking */
7134 if (sc == NULL || (sc->sc_flags & SCF_DETACHING)) {
7135 return ENODEV;
7136 }
7137 switch (mode) {
7138 case BPF_TAP_DISABLE:
7139 sc->sc_bpf_input = sc->sc_bpf_output = NULL;
7140 break;
7141
7142 case BPF_TAP_INPUT:
7143 sc->sc_bpf_input = bpf_callback;
7144 break;
7145
7146 case BPF_TAP_OUTPUT:
7147 sc->sc_bpf_output = bpf_callback;
7148 break;
7149
7150 case BPF_TAP_INPUT_OUTPUT:
7151 sc->sc_bpf_input = sc->sc_bpf_output = bpf_callback;
7152 break;
7153
7154 default:
7155 break;
7156 }
7157
7158 return 0;
7159 }
7160
7161 /*
7162 * bridge_detach:
7163 *
7164 * Callback when interface has been detached.
7165 */
7166 static void
7167 bridge_detach(ifnet_t ifp)
7168 {
7169 struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7170
7171 #if BRIDGESTP
7172 bstp_detach(&sc->sc_stp);
7173 #endif /* BRIDGESTP */
7174
7175 /* Tear down the routing table. */
7176 bridge_rtable_fini(sc);
7177
7178 lck_mtx_lock(&bridge_list_mtx);
7179 LIST_REMOVE(sc, sc_list);
7180 lck_mtx_unlock(&bridge_list_mtx);
7181
7182 ifnet_release(ifp);
7183
7184 lck_mtx_destroy(&sc->sc_mtx, &bridge_lock_grp);
7185 if_clone_softc_deallocate(&bridge_cloner, sc);
7186 }
7187
7188 /*
7189 * bridge_bpf_input:
7190 *
7191 * Invoke the input BPF callback if enabled
7192 */
7193 static errno_t
7194 bridge_bpf_input(ifnet_t ifp, struct mbuf *m, const char * func, int line)
7195 {
7196 struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7197 bpf_packet_func input_func = sc->sc_bpf_input;
7198
7199 if (input_func != NULL) {
7200 if (mbuf_pkthdr_rcvif(m) != ifp) {
7201 BRIDGE_LOG(LOG_NOTICE, 0,
7202 "%s.%d: rcvif: 0x%llx != ifp 0x%llx", func, line,
7203 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_rcvif(m)),
7204 (uint64_t)VM_KERNEL_ADDRPERM(ifp));
7205 }
7206 (*input_func)(ifp, m);
7207 }
7208 return 0;
7209 }
7210
7211 /*
7212 * bridge_bpf_output:
7213 *
7214 * Invoke the output BPF callback if enabled
7215 */
7216 static errno_t
7217 bridge_bpf_output(ifnet_t ifp, struct mbuf *m)
7218 {
7219 struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7220 bpf_packet_func output_func = sc->sc_bpf_output;
7221
7222 if (output_func != NULL) {
7223 (*output_func)(ifp, m);
7224 }
7225 return 0;
7226 }
7227
7228 /*
7229 * bridge_link_event:
7230 *
7231 * Report a data link event on an interface
7232 */
7233 static void
7234 bridge_link_event(struct ifnet *ifp, u_int32_t event_code)
7235 {
7236 struct event {
7237 u_int32_t ifnet_family;
7238 u_int32_t unit;
7239 char if_name[IFNAMSIZ];
7240 };
7241 _Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
7242 struct kern_event_msg *header = (struct kern_event_msg*)message;
7243 struct event *data = (struct event *)(header + 1);
7244
7245 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
7246 "%s event_code %u - %s", ifp->if_xname,
7247 event_code, dlil_kev_dl_code_str(event_code));
7248 header->total_size = sizeof(message);
7249 header->vendor_code = KEV_VENDOR_APPLE;
7250 header->kev_class = KEV_NETWORK_CLASS;
7251 header->kev_subclass = KEV_DL_SUBCLASS;
7252 header->event_code = event_code;
7253 data->ifnet_family = ifnet_family(ifp);
7254 data->unit = (u_int32_t)ifnet_unit(ifp);
7255 strlcpy(data->if_name, ifnet_name(ifp), IFNAMSIZ);
7256 ifnet_event(ifp, header);
7257 }
7258
7259 #define BRIDGE_HF_DROP(reason, func, line) { \
7260 bridge_hostfilter_stats.reason++; \
7261 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_HOSTFILTER, \
7262 "%s.%d" #reason, func, line); \
7263 error = EINVAL; \
7264 }
7265
7266 /*
7267 * Make sure this is a DHCP or Bootp request that match the host filter
7268 */
7269 static int
7270 bridge_dhcp_filter(struct bridge_iflist *bif, struct mbuf *m, size_t offset)
7271 {
7272 int error = EINVAL;
7273 struct dhcp dhcp;
7274
7275 /*
7276 * Note: We use the dhcp structure because bootp structure definition
7277 * is larger and some vendors do not pad the request
7278 */
7279 error = mbuf_copydata(m, offset, sizeof(struct dhcp), &dhcp);
7280 if (error != 0) {
7281 BRIDGE_HF_DROP(brhf_dhcp_too_small, __func__, __LINE__);
7282 goto done;
7283 }
7284 if (dhcp.dp_op != BOOTREQUEST) {
7285 BRIDGE_HF_DROP(brhf_dhcp_bad_op, __func__, __LINE__);
7286 goto done;
7287 }
7288 /*
7289 * The hardware address must be an exact match
7290 */
7291 if (dhcp.dp_htype != ARPHRD_ETHER) {
7292 BRIDGE_HF_DROP(brhf_dhcp_bad_htype, __func__, __LINE__);
7293 goto done;
7294 }
7295 if (dhcp.dp_hlen != ETHER_ADDR_LEN) {
7296 BRIDGE_HF_DROP(brhf_dhcp_bad_hlen, __func__, __LINE__);
7297 goto done;
7298 }
7299 if (bcmp(dhcp.dp_chaddr, bif->bif_hf_hwsrc,
7300 ETHER_ADDR_LEN) != 0) {
7301 BRIDGE_HF_DROP(brhf_dhcp_bad_chaddr, __func__, __LINE__);
7302 goto done;
7303 }
7304 /*
7305 * Client address must match the host address or be not specified
7306 */
7307 if (dhcp.dp_ciaddr.s_addr != bif->bif_hf_ipsrc.s_addr &&
7308 dhcp.dp_ciaddr.s_addr != INADDR_ANY) {
7309 BRIDGE_HF_DROP(brhf_dhcp_bad_ciaddr, __func__, __LINE__);
7310 goto done;
7311 }
7312 error = 0;
7313 done:
7314 return error;
7315 }
7316
7317 static int
7318 bridge_host_filter(struct bridge_iflist *bif, mbuf_t *data)
7319 {
7320 int error = EINVAL;
7321 struct ether_header *eh;
7322 static struct in_addr inaddr_any = { .s_addr = INADDR_ANY };
7323 mbuf_t m = *data;
7324
7325 eh = mtod(m, struct ether_header *);
7326
7327 /*
7328 * Restrict the source hardware address
7329 */
7330 if ((bif->bif_flags & BIFF_HF_HWSRC) == 0 ||
7331 bcmp(eh->ether_shost, bif->bif_hf_hwsrc,
7332 ETHER_ADDR_LEN) != 0) {
7333 BRIDGE_HF_DROP(brhf_bad_ether_srchw_addr, __func__, __LINE__);
7334 goto done;
7335 }
7336
7337 /*
7338 * Restrict Ethernet protocols to ARP and IP
7339 */
7340 if (eh->ether_type == htons(ETHERTYPE_ARP)) {
7341 struct ether_arp *ea;
7342 size_t minlen = sizeof(struct ether_header) +
7343 sizeof(struct ether_arp);
7344
7345 /*
7346 * Make the Ethernet and ARP headers contiguous
7347 */
7348 if (mbuf_pkthdr_len(m) < minlen) {
7349 BRIDGE_HF_DROP(brhf_arp_too_small, __func__, __LINE__);
7350 goto done;
7351 }
7352 if (mbuf_len(m) < minlen && mbuf_pullup(data, minlen) != 0) {
7353 BRIDGE_HF_DROP(brhf_arp_pullup_failed,
7354 __func__, __LINE__);
7355 goto done;
7356 }
7357 m = *data;
7358
7359 /*
7360 * Verify this is an ethernet/ip arp
7361 */
7362 eh = mtod(m, struct ether_header *);
7363 ea = (struct ether_arp *)(eh + 1);
7364 if (ea->arp_hrd != htons(ARPHRD_ETHER)) {
7365 BRIDGE_HF_DROP(brhf_arp_bad_hw_type,
7366 __func__, __LINE__);
7367 goto done;
7368 }
7369 if (ea->arp_pro != htons(ETHERTYPE_IP)) {
7370 BRIDGE_HF_DROP(brhf_arp_bad_pro_type,
7371 __func__, __LINE__);
7372 goto done;
7373 }
7374 /*
7375 * Verify the address lengths are correct
7376 */
7377 if (ea->arp_hln != ETHER_ADDR_LEN) {
7378 BRIDGE_HF_DROP(brhf_arp_bad_hw_len, __func__, __LINE__);
7379 goto done;
7380 }
7381 if (ea->arp_pln != sizeof(struct in_addr)) {
7382 BRIDGE_HF_DROP(brhf_arp_bad_pro_len,
7383 __func__, __LINE__);
7384 goto done;
7385 }
7386
7387 /*
7388 * Allow only ARP request or ARP reply
7389 */
7390 if (ea->arp_op != htons(ARPOP_REQUEST) &&
7391 ea->arp_op != htons(ARPOP_REPLY)) {
7392 BRIDGE_HF_DROP(brhf_arp_bad_op, __func__, __LINE__);
7393 goto done;
7394 }
7395 /*
7396 * Verify source hardware address matches
7397 */
7398 if (bcmp(ea->arp_sha, bif->bif_hf_hwsrc,
7399 ETHER_ADDR_LEN) != 0) {
7400 BRIDGE_HF_DROP(brhf_arp_bad_sha, __func__, __LINE__);
7401 goto done;
7402 }
7403 /*
7404 * Verify source protocol address:
7405 * May be null for an ARP probe
7406 */
7407 if (bcmp(ea->arp_spa, &bif->bif_hf_ipsrc.s_addr,
7408 sizeof(struct in_addr)) != 0 &&
7409 bcmp(ea->arp_spa, &inaddr_any,
7410 sizeof(struct in_addr)) != 0) {
7411 BRIDGE_HF_DROP(brhf_arp_bad_spa, __func__, __LINE__);
7412 goto done;
7413 }
7414 bridge_hostfilter_stats.brhf_arp_ok += 1;
7415 error = 0;
7416 } else if (eh->ether_type == htons(ETHERTYPE_IP)) {
7417 size_t minlen = sizeof(struct ether_header) + sizeof(struct ip);
7418 struct ip iphdr;
7419 size_t offset;
7420
7421 /*
7422 * Make the Ethernet and IP headers contiguous
7423 */
7424 if (mbuf_pkthdr_len(m) < minlen) {
7425 BRIDGE_HF_DROP(brhf_ip_too_small, __func__, __LINE__);
7426 goto done;
7427 }
7428 offset = sizeof(struct ether_header);
7429 error = mbuf_copydata(m, offset, sizeof(struct ip), &iphdr);
7430 if (error != 0) {
7431 BRIDGE_HF_DROP(brhf_ip_too_small, __func__, __LINE__);
7432 goto done;
7433 }
7434 /*
7435 * Verify the source IP address
7436 */
7437 if (iphdr.ip_p == IPPROTO_UDP) {
7438 struct udphdr udp;
7439
7440 minlen += sizeof(struct udphdr);
7441 if (mbuf_pkthdr_len(m) < minlen) {
7442 BRIDGE_HF_DROP(brhf_ip_too_small,
7443 __func__, __LINE__);
7444 goto done;
7445 }
7446
7447 /*
7448 * Allow all zero addresses for DHCP requests
7449 */
7450 if (iphdr.ip_src.s_addr != bif->bif_hf_ipsrc.s_addr &&
7451 iphdr.ip_src.s_addr != INADDR_ANY) {
7452 BRIDGE_HF_DROP(brhf_ip_bad_srcaddr,
7453 __func__, __LINE__);
7454 goto done;
7455 }
7456 offset = sizeof(struct ether_header) +
7457 (IP_VHL_HL(iphdr.ip_vhl) << 2);
7458 error = mbuf_copydata(m, offset,
7459 sizeof(struct udphdr), &udp);
7460 if (error != 0) {
7461 BRIDGE_HF_DROP(brhf_ip_too_small,
7462 __func__, __LINE__);
7463 goto done;
7464 }
7465 /*
7466 * Either it's a Bootp/DHCP packet that we like or
7467 * it's a UDP packet from the host IP as source address
7468 */
7469 if (udp.uh_sport == htons(IPPORT_BOOTPC) &&
7470 udp.uh_dport == htons(IPPORT_BOOTPS)) {
7471 minlen += sizeof(struct dhcp);
7472 if (mbuf_pkthdr_len(m) < minlen) {
7473 BRIDGE_HF_DROP(brhf_ip_too_small,
7474 __func__, __LINE__);
7475 goto done;
7476 }
7477 offset += sizeof(struct udphdr);
7478 error = bridge_dhcp_filter(bif, m, offset);
7479 if (error != 0) {
7480 goto done;
7481 }
7482 } else if (iphdr.ip_src.s_addr == INADDR_ANY) {
7483 BRIDGE_HF_DROP(brhf_ip_bad_srcaddr,
7484 __func__, __LINE__);
7485 goto done;
7486 }
7487 } else if (iphdr.ip_src.s_addr != bif->bif_hf_ipsrc.s_addr ||
7488 bif->bif_hf_ipsrc.s_addr == INADDR_ANY) {
7489 BRIDGE_HF_DROP(brhf_ip_bad_srcaddr, __func__, __LINE__);
7490 goto done;
7491 }
7492 /*
7493 * Allow only boring IP protocols
7494 */
7495 if (iphdr.ip_p != IPPROTO_TCP &&
7496 iphdr.ip_p != IPPROTO_UDP &&
7497 iphdr.ip_p != IPPROTO_ICMP &&
7498 iphdr.ip_p != IPPROTO_ESP &&
7499 iphdr.ip_p != IPPROTO_AH &&
7500 iphdr.ip_p != IPPROTO_GRE) {
7501 BRIDGE_HF_DROP(brhf_ip_bad_proto, __func__, __LINE__);
7502 goto done;
7503 }
7504 bridge_hostfilter_stats.brhf_ip_ok += 1;
7505 error = 0;
7506 } else {
7507 BRIDGE_HF_DROP(brhf_bad_ether_type, __func__, __LINE__);
7508 goto done;
7509 }
7510 done:
7511 if (error != 0) {
7512 if (BRIDGE_DBGF_ENABLED(BR_DBGF_HOSTFILTER)) {
7513 if (m) {
7514 brlog_mbuf_data(m, 0,
7515 sizeof(struct ether_header) +
7516 sizeof(struct ip));
7517 }
7518 }
7519
7520 if (m != NULL) {
7521 m_freem(m);
7522 }
7523 }
7524 return error;
7525 }
7526
7527 /*
7528 * MAC NAT
7529 */
7530
7531 static errno_t
7532 bridge_mac_nat_enable(struct bridge_softc *sc, struct bridge_iflist *bif)
7533 {
7534 errno_t error = 0;
7535
7536 BRIDGE_LOCK_ASSERT_HELD(sc);
7537
7538 if (IFNET_IS_VMNET(bif->bif_ifp)) {
7539 error = EINVAL;
7540 goto done;
7541 }
7542 if (sc->sc_mac_nat_bif != NULL) {
7543 if (sc->sc_mac_nat_bif != bif) {
7544 error = EBUSY;
7545 }
7546 goto done;
7547 }
7548 sc->sc_mac_nat_bif = bif;
7549 bif->bif_ifflags |= IFBIF_MAC_NAT;
7550 bridge_mac_nat_populate_entries(sc);
7551
7552 done:
7553 return error;
7554 }
7555
7556 static void
7557 bridge_mac_nat_disable(struct bridge_softc *sc)
7558 {
7559 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7560
7561 assert(mac_nat_bif != NULL);
7562 bridge_mac_nat_flush_entries(sc, mac_nat_bif);
7563 mac_nat_bif->bif_ifflags &= ~IFBIF_MAC_NAT;
7564 sc->sc_mac_nat_bif = NULL;
7565 return;
7566 }
7567
7568 static void
7569 mac_nat_entry_print2(struct mac_nat_entry *mne,
7570 char *ifname, const char *msg1, const char *msg2)
7571 {
7572 int af;
7573 char etopbuf[24];
7574 char ntopbuf[MAX_IPv6_STR_LEN];
7575 const char *space;
7576
7577 af = ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) ? AF_INET6 : AF_INET;
7578 ether_ntop(etopbuf, sizeof(etopbuf), mne->mne_mac);
7579 (void)inet_ntop(af, &mne->mne_u, ntopbuf, sizeof(ntopbuf));
7580 if (msg2 == NULL) {
7581 msg2 = "";
7582 space = "";
7583 } else {
7584 space = " ";
7585 }
7586 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7587 "%s %s%s%s %p (%s, %s, %s)",
7588 ifname, msg1, space, msg2, mne, mne->mne_bif->bif_ifp->if_xname,
7589 ntopbuf, etopbuf);
7590 }
7591
7592 static void
7593 mac_nat_entry_print(struct mac_nat_entry *mne,
7594 char *ifname, const char *msg)
7595 {
7596 mac_nat_entry_print2(mne, ifname, msg, NULL);
7597 }
7598
7599 static struct mac_nat_entry *
7600 bridge_lookup_mac_nat_entry(struct bridge_softc *sc, int af, void * ip)
7601 {
7602 struct mac_nat_entry *mne;
7603 struct mac_nat_entry *ret_mne = NULL;
7604
7605 if (af == AF_INET) {
7606 in_addr_t s_addr = ((struct in_addr *)ip)->s_addr;
7607
7608 LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
7609 if (mne->mne_ip.s_addr == s_addr) {
7610 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7611 mac_nat_entry_print(mne, sc->sc_if_xname,
7612 "found");
7613 }
7614 ret_mne = mne;
7615 break;
7616 }
7617 }
7618 } else {
7619 const struct in6_addr *ip6 = (const struct in6_addr *)ip;
7620
7621 LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
7622 if (IN6_ARE_ADDR_EQUAL(&mne->mne_ip6, ip6)) {
7623 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7624 mac_nat_entry_print(mne, sc->sc_if_xname,
7625 "found");
7626 }
7627 ret_mne = mne;
7628 break;
7629 }
7630 }
7631 }
7632 return ret_mne;
7633 }
7634
7635 static void
7636 bridge_destroy_mac_nat_entry(struct bridge_softc *sc,
7637 struct mac_nat_entry *mne, const char *reason)
7638 {
7639 LIST_REMOVE(mne, mne_list);
7640 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7641 mac_nat_entry_print(mne, sc->sc_if_xname, reason);
7642 }
7643 zfree(bridge_mne_pool, mne);
7644 sc->sc_mne_count--;
7645 }
7646
7647 static struct mac_nat_entry *
7648 bridge_create_mac_nat_entry(struct bridge_softc *sc,
7649 struct bridge_iflist *bif, int af, const void *ip, uint8_t *eaddr)
7650 {
7651 struct mac_nat_entry_list *list;
7652 struct mac_nat_entry *mne;
7653
7654 if (sc->sc_mne_count >= sc->sc_mne_max) {
7655 sc->sc_mne_allocation_failures++;
7656 return NULL;
7657 }
7658 mne = zalloc_noblock(bridge_mne_pool);
7659 if (mne == NULL) {
7660 sc->sc_mne_allocation_failures++;
7661 return NULL;
7662 }
7663 sc->sc_mne_count++;
7664 bzero(mne, sizeof(*mne));
7665 bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
7666 mne->mne_bif = bif;
7667 if (af == AF_INET) {
7668 bcopy(ip, &mne->mne_ip, sizeof(mne->mne_ip));
7669 list = &sc->sc_mne_list;
7670 } else {
7671 bcopy(ip, &mne->mne_ip6, sizeof(mne->mne_ip6));
7672 mne->mne_flags |= MNE_FLAGS_IPV6;
7673 list = &sc->sc_mne_list_v6;
7674 }
7675 LIST_INSERT_HEAD(list, mne, mne_list);
7676 mne->mne_expire = (unsigned long)net_uptime() + sc->sc_brttimeout;
7677 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7678 mac_nat_entry_print(mne, sc->sc_if_xname, "created");
7679 }
7680 return mne;
7681 }
7682
7683 static struct mac_nat_entry *
7684 bridge_update_mac_nat_entry(struct bridge_softc *sc,
7685 struct bridge_iflist *bif, int af, void *ip, uint8_t *eaddr)
7686 {
7687 struct mac_nat_entry *mne;
7688
7689 mne = bridge_lookup_mac_nat_entry(sc, af, ip);
7690 if (mne != NULL) {
7691 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7692
7693 if (mne->mne_bif == mac_nat_bif) {
7694 /* the MAC NAT interface takes precedence */
7695 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7696 if (mne->mne_bif != bif) {
7697 mac_nat_entry_print2(mne,
7698 sc->sc_if_xname, "reject",
7699 bif->bif_ifp->if_xname);
7700 }
7701 }
7702 } else if (mne->mne_bif != bif) {
7703 const char *old_if = mne->mne_bif->bif_ifp->if_xname;
7704
7705 mne->mne_bif = bif;
7706 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7707 mac_nat_entry_print2(mne,
7708 sc->sc_if_xname, "replaced",
7709 old_if);
7710 }
7711 bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
7712 }
7713 mne->mne_expire = (unsigned long)net_uptime() +
7714 sc->sc_brttimeout;
7715 } else {
7716 mne = bridge_create_mac_nat_entry(sc, bif, af, ip, eaddr);
7717 }
7718 return mne;
7719 }
7720
7721 static void
7722 bridge_mac_nat_flush_entries_common(struct bridge_softc *sc,
7723 struct mac_nat_entry_list *list, struct bridge_iflist *bif)
7724 {
7725 struct mac_nat_entry *mne;
7726 struct mac_nat_entry *tmne;
7727
7728 LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
7729 if (bif != NULL && mne->mne_bif != bif) {
7730 continue;
7731 }
7732 bridge_destroy_mac_nat_entry(sc, mne, "flushed");
7733 }
7734 }
7735
7736 /*
7737 * bridge_mac_nat_flush_entries:
7738 *
7739 * Flush MAC NAT entries for the specified member. Flush all entries if
7740 * the member is the one that requires MAC NAT, otherwise just flush the
7741 * ones for the specified member.
7742 */
7743 static void
7744 bridge_mac_nat_flush_entries(struct bridge_softc *sc, struct bridge_iflist * bif)
7745 {
7746 struct bridge_iflist *flush_bif;
7747
7748 flush_bif = (bif == sc->sc_mac_nat_bif) ? NULL : bif;
7749 bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list, flush_bif);
7750 bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list_v6, flush_bif);
7751 }
7752
7753 static void
7754 bridge_mac_nat_populate_entries(struct bridge_softc *sc)
7755 {
7756 errno_t error;
7757 ifnet_t ifp;
7758 ifaddr_t *list;
7759 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7760
7761 assert(mac_nat_bif != NULL);
7762 ifp = mac_nat_bif->bif_ifp;
7763 error = ifnet_get_address_list(ifp, &list);
7764 if (error != 0) {
7765 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7766 "ifnet_get_address_list(%s) failed %d",
7767 ifp->if_xname, error);
7768 return;
7769 }
7770 for (ifaddr_t *scan = list; *scan != NULL; scan++) {
7771 sa_family_t af;
7772 void *ip;
7773
7774 union {
7775 struct sockaddr sa;
7776 struct sockaddr_in sin;
7777 struct sockaddr_in6 sin6;
7778 } u;
7779 af = ifaddr_address_family(*scan);
7780 switch (af) {
7781 case AF_INET:
7782 case AF_INET6:
7783 error = ifaddr_address(*scan, &u.sa, sizeof(u));
7784 if (error != 0) {
7785 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7786 "ifaddr_address failed %d",
7787 error);
7788 break;
7789 }
7790 if (af == AF_INET) {
7791 ip = (void *)&u.sin.sin_addr;
7792 } else {
7793 if (IN6_IS_ADDR_LINKLOCAL(&u.sin6.sin6_addr)) {
7794 /* remove scope ID */
7795 u.sin6.sin6_addr.s6_addr16[1] = 0;
7796 }
7797 ip = (void *)&u.sin6.sin6_addr;
7798 }
7799 bridge_create_mac_nat_entry(sc, mac_nat_bif, af, ip,
7800 (uint8_t *)IF_LLADDR(ifp));
7801 break;
7802 default:
7803 break;
7804 }
7805 }
7806 ifnet_free_address_list(list);
7807 return;
7808 }
7809
7810 static void
7811 bridge_mac_nat_age_entries_common(struct bridge_softc *sc,
7812 struct mac_nat_entry_list *list, unsigned long now)
7813 {
7814 struct mac_nat_entry *mne;
7815 struct mac_nat_entry *tmne;
7816
7817 LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
7818 if (now >= mne->mne_expire) {
7819 bridge_destroy_mac_nat_entry(sc, mne, "aged out");
7820 }
7821 }
7822 }
7823
7824 static void
7825 bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long now)
7826 {
7827 if (sc->sc_mac_nat_bif == NULL) {
7828 return;
7829 }
7830 bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list, now);
7831 bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list_v6, now);
7832 }
7833
7834 static const char *
7835 get_in_out_string(boolean_t is_output)
7836 {
7837 return is_output ? "OUT" : "IN";
7838 }
7839
7840 /*
7841 * is_valid_arp_packet:
7842 * Verify that this is a valid ARP packet.
7843 *
7844 * Returns TRUE if the packet is valid, FALSE otherwise.
7845 */
7846 static boolean_t
7847 is_valid_arp_packet(mbuf_t *data, boolean_t is_output,
7848 struct ether_header **eh_p, struct ether_arp **ea_p)
7849 {
7850 struct ether_arp *ea;
7851 struct ether_header *eh;
7852 size_t minlen = sizeof(struct ether_header) + sizeof(struct ether_arp);
7853 boolean_t is_valid = FALSE;
7854 int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
7855
7856 if (mbuf_pkthdr_len(*data) < minlen) {
7857 BRIDGE_LOG(LOG_DEBUG, flags,
7858 "ARP %s short frame %lu < %lu",
7859 get_in_out_string(is_output),
7860 mbuf_pkthdr_len(*data), minlen);
7861 goto done;
7862 }
7863 if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
7864 BRIDGE_LOG(LOG_DEBUG, flags,
7865 "ARP %s size %lu mbuf_pullup fail",
7866 get_in_out_string(is_output),
7867 minlen);
7868 *data = NULL;
7869 goto done;
7870 }
7871
7872 /* validate ARP packet */
7873 eh = mtod(*data, struct ether_header *);
7874 ea = (struct ether_arp *)(eh + 1);
7875 if (ntohs(ea->arp_hrd) != ARPHRD_ETHER) {
7876 BRIDGE_LOG(LOG_DEBUG, flags,
7877 "ARP %s htype not ethernet",
7878 get_in_out_string(is_output));
7879 goto done;
7880 }
7881 if (ea->arp_hln != ETHER_ADDR_LEN) {
7882 BRIDGE_LOG(LOG_DEBUG, flags,
7883 "ARP %s hlen not ethernet",
7884 get_in_out_string(is_output));
7885 goto done;
7886 }
7887 if (ntohs(ea->arp_pro) != ETHERTYPE_IP) {
7888 BRIDGE_LOG(LOG_DEBUG, flags,
7889 "ARP %s ptype not IP",
7890 get_in_out_string(is_output));
7891 goto done;
7892 }
7893 if (ea->arp_pln != sizeof(struct in_addr)) {
7894 BRIDGE_LOG(LOG_DEBUG, flags,
7895 "ARP %s plen not IP",
7896 get_in_out_string(is_output));
7897 goto done;
7898 }
7899 is_valid = TRUE;
7900 *ea_p = ea;
7901 *eh_p = eh;
7902 done:
7903 return is_valid;
7904 }
7905
7906 static struct mac_nat_entry *
7907 bridge_mac_nat_arp_input(struct bridge_softc *sc, mbuf_t *data)
7908 {
7909 struct ether_arp *ea;
7910 struct ether_header *eh;
7911 struct mac_nat_entry *mne = NULL;
7912 u_short op;
7913 struct in_addr tpa;
7914
7915 if (!is_valid_arp_packet(data, FALSE, &eh, &ea)) {
7916 goto done;
7917 }
7918 op = ntohs(ea->arp_op);
7919 switch (op) {
7920 case ARPOP_REQUEST:
7921 case ARPOP_REPLY:
7922 /* only care about REQUEST and REPLY */
7923 break;
7924 default:
7925 goto done;
7926 }
7927
7928 /* check the target IP address for a NAT entry */
7929 bcopy(ea->arp_tpa, &tpa, sizeof(tpa));
7930 if (tpa.s_addr != 0) {
7931 mne = bridge_lookup_mac_nat_entry(sc, AF_INET, &tpa);
7932 }
7933 if (mne != NULL) {
7934 if (op == ARPOP_REPLY) {
7935 /* translate the MAC address */
7936 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7937 char mac_src[24];
7938 char mac_dst[24];
7939
7940 ether_ntop(mac_src, sizeof(mac_src),
7941 ea->arp_tha);
7942 ether_ntop(mac_dst, sizeof(mac_dst),
7943 mne->mne_mac);
7944 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7945 "%s %s ARP %s -> %s",
7946 sc->sc_if_xname,
7947 mne->mne_bif->bif_ifp->if_xname,
7948 mac_src, mac_dst);
7949 }
7950 bcopy(mne->mne_mac, ea->arp_tha, sizeof(ea->arp_tha));
7951 }
7952 } else {
7953 /* handle conflicting ARP (sender matches mne) */
7954 struct in_addr spa;
7955
7956 bcopy(ea->arp_spa, &spa, sizeof(spa));
7957 if (spa.s_addr != 0 && spa.s_addr != tpa.s_addr) {
7958 /* check the source IP for a NAT entry */
7959 mne = bridge_lookup_mac_nat_entry(sc, AF_INET, &spa);
7960 }
7961 }
7962
7963 done:
7964 return mne;
7965 }
7966
7967 static boolean_t
7968 bridge_mac_nat_arp_output(struct bridge_softc *sc,
7969 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
7970 {
7971 struct ether_arp *ea;
7972 struct ether_header *eh;
7973 struct in_addr ip;
7974 struct mac_nat_entry *mne = NULL;
7975 u_short op;
7976 boolean_t translate = FALSE;
7977
7978 if (!is_valid_arp_packet(data, TRUE, &eh, &ea)) {
7979 goto done;
7980 }
7981 op = ntohs(ea->arp_op);
7982 switch (op) {
7983 case ARPOP_REQUEST:
7984 case ARPOP_REPLY:
7985 /* only care about REQUEST and REPLY */
7986 break;
7987 default:
7988 goto done;
7989 }
7990
7991 bcopy(ea->arp_spa, &ip, sizeof(ip));
7992 if (ip.s_addr == 0) {
7993 goto done;
7994 }
7995 /* XXX validate IP address: no multicast/broadcast */
7996 mne = bridge_update_mac_nat_entry(sc, bif, AF_INET, &ip, ea->arp_sha);
7997 if (mnr != NULL && mne != NULL) {
7998 /* record the offset to do the replacement */
7999 translate = TRUE;
8000 mnr->mnr_arp_offset = (char *)ea->arp_sha - (char *)eh;
8001 }
8002
8003 done:
8004 return translate;
8005 }
8006
8007 #define ETHER_IPV4_HEADER_LEN (sizeof(struct ether_header) + \
8008 + sizeof(struct ip))
8009 static struct ether_header *
8010 get_ether_ip_header(mbuf_t *data, boolean_t is_output)
8011 {
8012 struct ether_header *eh = NULL;
8013 int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
8014 size_t minlen = ETHER_IPV4_HEADER_LEN;
8015
8016 if (mbuf_pkthdr_len(*data) < minlen) {
8017 BRIDGE_LOG(LOG_DEBUG, flags,
8018 "IP %s short frame %lu < %lu",
8019 get_in_out_string(is_output),
8020 mbuf_pkthdr_len(*data), minlen);
8021 goto done;
8022 }
8023 if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
8024 BRIDGE_LOG(LOG_DEBUG, flags,
8025 "IP %s size %lu mbuf_pullup fail",
8026 get_in_out_string(is_output),
8027 minlen);
8028 *data = NULL;
8029 goto done;
8030 }
8031 eh = mtod(*data, struct ether_header *);
8032 done:
8033 return eh;
8034 }
8035
8036 static bool
8037 is_broadcast_ip_packet(mbuf_t *data)
8038 {
8039 struct ether_header *eh;
8040 uint16_t ether_type;
8041 bool is_broadcast = FALSE;
8042
8043 eh = mtod(*data, struct ether_header *);
8044 ether_type = ntohs(eh->ether_type);
8045 switch (ether_type) {
8046 case ETHERTYPE_IP:
8047 eh = get_ether_ip_header(data, FALSE);
8048 if (eh != NULL) {
8049 struct in_addr dst;
8050 struct ip *iphdr;
8051
8052 iphdr = (struct ip *)(void *)(eh + 1);
8053 bcopy(&iphdr->ip_dst, &dst, sizeof(dst));
8054 is_broadcast = (dst.s_addr == INADDR_BROADCAST);
8055 }
8056 break;
8057 default:
8058 break;
8059 }
8060 return is_broadcast;
8061 }
8062
8063 static struct mac_nat_entry *
8064 bridge_mac_nat_ip_input(struct bridge_softc *sc, mbuf_t *data)
8065 {
8066 struct in_addr dst;
8067 struct ether_header *eh;
8068 struct ip *iphdr;
8069 struct mac_nat_entry *mne = NULL;
8070
8071 eh = get_ether_ip_header(data, FALSE);
8072 if (eh == NULL) {
8073 goto done;
8074 }
8075 iphdr = (struct ip *)(void *)(eh + 1);
8076 bcopy(&iphdr->ip_dst, &dst, sizeof(dst));
8077 /* XXX validate IP address */
8078 if (dst.s_addr == 0) {
8079 goto done;
8080 }
8081 mne = bridge_lookup_mac_nat_entry(sc, AF_INET, &dst);
8082 done:
8083 return mne;
8084 }
8085
8086 static void
8087 bridge_mac_nat_udp_output(struct bridge_softc *sc,
8088 struct bridge_iflist *bif, mbuf_t m,
8089 uint8_t ip_header_len, struct mac_nat_record *mnr)
8090 {
8091 uint16_t dp_flags;
8092 errno_t error;
8093 size_t offset;
8094 struct udphdr udphdr;
8095
8096 /* copy the UDP header */
8097 offset = sizeof(struct ether_header) + ip_header_len;
8098 error = mbuf_copydata(m, offset, sizeof(struct udphdr), &udphdr);
8099 if (error != 0) {
8100 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8101 "mbuf_copydata udphdr failed %d",
8102 error);
8103 return;
8104 }
8105 if (ntohs(udphdr.uh_sport) != IPPORT_BOOTPC ||
8106 ntohs(udphdr.uh_dport) != IPPORT_BOOTPS) {
8107 /* not a BOOTP/DHCP packet */
8108 return;
8109 }
8110 /* check whether the broadcast bit is already set */
8111 offset += sizeof(struct udphdr) + offsetof(struct dhcp, dp_flags);
8112 error = mbuf_copydata(m, offset, sizeof(dp_flags), &dp_flags);
8113 if (error != 0) {
8114 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8115 "mbuf_copydata dp_flags failed %d",
8116 error);
8117 return;
8118 }
8119 if ((ntohs(dp_flags) & DHCP_FLAGS_BROADCAST) != 0) {
8120 /* it's already set, nothing to do */
8121 return;
8122 }
8123 /* broadcast bit needs to be set */
8124 mnr->mnr_ip_dhcp_flags = dp_flags | htons(DHCP_FLAGS_BROADCAST);
8125 mnr->mnr_ip_header_len = ip_header_len;
8126 if (udphdr.uh_sum != 0) {
8127 uint16_t delta;
8128
8129 /* adjust checksum to take modified dp_flags into account */
8130 delta = dp_flags - mnr->mnr_ip_dhcp_flags;
8131 mnr->mnr_ip_udp_csum = udphdr.uh_sum + delta;
8132 }
8133 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8134 "%s %s DHCP dp_flags 0x%x UDP cksum 0x%x",
8135 sc->sc_if_xname,
8136 bif->bif_ifp->if_xname,
8137 ntohs(mnr->mnr_ip_dhcp_flags),
8138 ntohs(mnr->mnr_ip_udp_csum));
8139 return;
8140 }
8141
8142 static boolean_t
8143 bridge_mac_nat_ip_output(struct bridge_softc *sc,
8144 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8145 {
8146 #pragma unused(mnr)
8147 struct ether_header *eh;
8148 struct in_addr ip;
8149 struct ip *iphdr;
8150 uint8_t ip_header_len;
8151 struct mac_nat_entry *mne = NULL;
8152 boolean_t translate = FALSE;
8153
8154 eh = get_ether_ip_header(data, TRUE);
8155 if (eh == NULL) {
8156 goto done;
8157 }
8158 iphdr = (struct ip *)(void *)(eh + 1);
8159 ip_header_len = IP_VHL_HL(iphdr->ip_vhl) << 2;
8160 if (ip_header_len < sizeof(ip)) {
8161 /* bogus IP header */
8162 goto done;
8163 }
8164 bcopy(&iphdr->ip_src, &ip, sizeof(ip));
8165 /* XXX validate the source address */
8166 if (ip.s_addr != 0) {
8167 mne = bridge_update_mac_nat_entry(sc, bif, AF_INET, &ip,
8168 eh->ether_shost);
8169 }
8170 if (mnr != NULL) {
8171 if (iphdr->ip_p == IPPROTO_UDP) {
8172 /* handle DHCP must broadcast */
8173 bridge_mac_nat_udp_output(sc, bif, *data,
8174 ip_header_len, mnr);
8175 }
8176 translate = TRUE;
8177 }
8178 done:
8179 return translate;
8180 }
8181
8182 #define ETHER_IPV6_HEADER_LEN (sizeof(struct ether_header) + \
8183 + sizeof(struct ip6_hdr))
8184 static struct ether_header *
8185 get_ether_ipv6_header(mbuf_t *data, boolean_t is_output)
8186 {
8187 struct ether_header *eh = NULL;
8188 int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
8189 size_t minlen = ETHER_IPV6_HEADER_LEN;
8190
8191 if (mbuf_pkthdr_len(*data) < minlen) {
8192 BRIDGE_LOG(LOG_DEBUG, flags,
8193 "IP %s short frame %lu < %lu",
8194 get_in_out_string(is_output),
8195 mbuf_pkthdr_len(*data), minlen);
8196 goto done;
8197 }
8198 if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
8199 BRIDGE_LOG(LOG_DEBUG, flags,
8200 "IP %s size %lu mbuf_pullup fail",
8201 get_in_out_string(is_output),
8202 minlen);
8203 *data = NULL;
8204 goto done;
8205 }
8206 eh = mtod(*data, struct ether_header *);
8207 done:
8208 return eh;
8209 }
8210
8211 #include <netinet/icmp6.h>
8212 #include <netinet6/nd6.h>
8213
8214 #define ETHER_ND_LLADDR_LEN (ETHER_ADDR_LEN + sizeof(struct nd_opt_hdr))
8215
8216 static void
8217 bridge_mac_nat_icmpv6_output(struct bridge_softc *sc, struct bridge_iflist *bif,
8218 mbuf_t *data, struct ether_header *eh,
8219 struct ip6_hdr *ip6h, struct in6_addr *saddrp, struct mac_nat_record *mnr)
8220 {
8221 struct icmp6_hdr *icmp6;
8222 unsigned int icmp6len;
8223 int lladdrlen = 0;
8224 char *lladdr = NULL;
8225 mbuf_t m = *data;
8226 unsigned int off = sizeof(*ip6h);
8227
8228 icmp6len = m->m_pkthdr.len - sizeof(*eh) - off;
8229 if (icmp6len < sizeof(*icmp6)) {
8230 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8231 "short packet %d < %lu",
8232 icmp6len, sizeof(*icmp6));
8233 return;
8234 }
8235 icmp6 = (struct icmp6_hdr *)((caddr_t)ip6h + off);
8236 switch (icmp6->icmp6_type) {
8237 case ND_NEIGHBOR_SOLICIT: {
8238 struct nd_neighbor_solicit *nd_ns;
8239 union nd_opts ndopts;
8240 boolean_t is_dad_probe;
8241 struct in6_addr taddr;
8242
8243 if (icmp6len < sizeof(*nd_ns)) {
8244 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8245 "short nd_ns %d < %lu",
8246 icmp6len, sizeof(*nd_ns));
8247 return;
8248 }
8249
8250 nd_ns = (struct nd_neighbor_solicit *)(void *)icmp6;
8251 bcopy(&nd_ns->nd_ns_target, &taddr, sizeof(taddr));
8252 if (IN6_IS_ADDR_MULTICAST(&taddr) ||
8253 IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
8254 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8255 "invalid target ignored");
8256 return;
8257 }
8258 /* parse options */
8259 nd6_option_init(nd_ns + 1, icmp6len - sizeof(*nd_ns), &ndopts);
8260 if (nd6_options(&ndopts) < 0) {
8261 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8262 "invalid ND6 NS option");
8263 return;
8264 }
8265 if (ndopts.nd_opts_src_lladdr != NULL) {
8266 lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
8267 lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
8268 }
8269 is_dad_probe = IN6_IS_ADDR_UNSPECIFIED(saddrp);
8270 if (lladdr != NULL) {
8271 if (is_dad_probe) {
8272 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8273 "bad ND6 DAD packet");
8274 return;
8275 }
8276 if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8277 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8278 "source lladdrlen %d != %lu",
8279 lladdrlen, ETHER_ND_LLADDR_LEN);
8280 return;
8281 }
8282 mnr->mnr_ip6_lladdr_offset = (uint16_t)((uintptr_t)lladdr -
8283 (uintptr_t)eh);
8284 mnr->mnr_ip6_icmp6_len = icmp6len;
8285 mnr->mnr_ip6_icmp6_type = icmp6->icmp6_type;
8286 mnr->mnr_ip6_header_len = off;
8287 }
8288 if (is_dad_probe) {
8289 /* node is trying use taddr, create an mne using taddr */
8290 *saddrp = taddr;
8291 }
8292 break;
8293 }
8294 case ND_NEIGHBOR_ADVERT: {
8295 struct nd_neighbor_advert *nd_na;
8296 union nd_opts ndopts;
8297 struct in6_addr taddr;
8298
8299
8300 nd_na = (struct nd_neighbor_advert *)(void *)icmp6;
8301
8302 if (icmp6len < sizeof(*nd_na)) {
8303 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8304 "short nd_na %d < %lu",
8305 icmp6len, sizeof(*nd_na));
8306 return;
8307 }
8308
8309 bcopy(&nd_na->nd_na_target, &taddr, sizeof(taddr));
8310 if (IN6_IS_ADDR_MULTICAST(&taddr) ||
8311 IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
8312 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8313 "invalid target ignored");
8314 return;
8315 }
8316 /* parse options */
8317 nd6_option_init(nd_na + 1, icmp6len - sizeof(*nd_na), &ndopts);
8318 if (nd6_options(&ndopts) < 0) {
8319 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8320 "invalid ND6 NA option");
8321 return;
8322 }
8323 if (ndopts.nd_opts_tgt_lladdr == NULL) {
8324 /* target linklayer, nothing to do */
8325 return;
8326 }
8327 lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1);
8328 lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
8329 if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8330 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8331 "target lladdrlen %d != %lu",
8332 lladdrlen, ETHER_ND_LLADDR_LEN);
8333 return;
8334 }
8335 mnr->mnr_ip6_lladdr_offset = (uint16_t)((uintptr_t)lladdr - (uintptr_t)eh);
8336 mnr->mnr_ip6_icmp6_len = icmp6len;
8337 mnr->mnr_ip6_header_len = off;
8338 mnr->mnr_ip6_icmp6_type = icmp6->icmp6_type;
8339 break;
8340 }
8341 case ND_ROUTER_SOLICIT: {
8342 struct nd_router_solicit *nd_rs;
8343 union nd_opts ndopts;
8344
8345 if (icmp6len < sizeof(*nd_rs)) {
8346 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8347 "short nd_rs %d < %lu",
8348 icmp6len, sizeof(*nd_rs));
8349 return;
8350 }
8351 nd_rs = (struct nd_router_solicit *)(void *)icmp6;
8352
8353 /* parse options */
8354 nd6_option_init(nd_rs + 1, icmp6len - sizeof(*nd_rs), &ndopts);
8355 if (nd6_options(&ndopts) < 0) {
8356 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8357 "invalid ND6 RS option");
8358 return;
8359 }
8360 if (ndopts.nd_opts_src_lladdr != NULL) {
8361 lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
8362 lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
8363 }
8364 if (lladdr != NULL) {
8365 if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8366 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8367 "source lladdrlen %d != %lu",
8368 lladdrlen, ETHER_ND_LLADDR_LEN);
8369 return;
8370 }
8371 mnr->mnr_ip6_lladdr_offset = (uint16_t)((uintptr_t)lladdr -
8372 (uintptr_t)eh);
8373 mnr->mnr_ip6_icmp6_len = icmp6len;
8374 mnr->mnr_ip6_icmp6_type = icmp6->icmp6_type;
8375 mnr->mnr_ip6_header_len = off;
8376 }
8377 break;
8378 }
8379 default:
8380 break;
8381 }
8382 if (mnr->mnr_ip6_lladdr_offset != 0 &&
8383 BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
8384 const char *str;
8385
8386 switch (mnr->mnr_ip6_icmp6_type) {
8387 case ND_ROUTER_SOLICIT:
8388 str = "ROUTER SOLICIT";
8389 break;
8390 case ND_NEIGHBOR_ADVERT:
8391 str = "NEIGHBOR ADVERT";
8392 break;
8393 case ND_NEIGHBOR_SOLICIT:
8394 str = "NEIGHBOR SOLICIT";
8395 break;
8396 default:
8397 str = "";
8398 break;
8399 }
8400 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8401 "%s %s %s ip6len %d icmp6len %d lladdr offset %d",
8402 sc->sc_if_xname, bif->bif_ifp->if_xname, str,
8403 mnr->mnr_ip6_header_len,
8404 mnr->mnr_ip6_icmp6_len, mnr->mnr_ip6_lladdr_offset);
8405 }
8406 }
8407
8408 static struct mac_nat_entry *
8409 bridge_mac_nat_ipv6_input(struct bridge_softc *sc, mbuf_t *data)
8410 {
8411 struct in6_addr dst;
8412 struct ether_header *eh;
8413 struct ip6_hdr *ip6h;
8414 struct mac_nat_entry *mne = NULL;
8415
8416 eh = get_ether_ipv6_header(data, FALSE);
8417 if (eh == NULL) {
8418 goto done;
8419 }
8420 ip6h = (struct ip6_hdr *)(void *)(eh + 1);
8421 bcopy(&ip6h->ip6_dst, &dst, sizeof(dst));
8422 /* XXX validate IPv6 address */
8423 if (IN6_IS_ADDR_UNSPECIFIED(&dst)) {
8424 goto done;
8425 }
8426 mne = bridge_lookup_mac_nat_entry(sc, AF_INET6, &dst);
8427
8428 done:
8429 return mne;
8430 }
8431
8432 static boolean_t
8433 bridge_mac_nat_ipv6_output(struct bridge_softc *sc,
8434 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8435 {
8436 struct ether_header *eh;
8437 struct ip6_hdr *ip6h;
8438 struct in6_addr saddr;
8439 boolean_t translate;
8440
8441 translate = (bif == sc->sc_mac_nat_bif) ? FALSE : TRUE;
8442 eh = get_ether_ipv6_header(data, TRUE);
8443 if (eh == NULL) {
8444 translate = FALSE;
8445 goto done;
8446 }
8447 ip6h = (struct ip6_hdr *)(void *)(eh + 1);
8448 bcopy(&ip6h->ip6_src, &saddr, sizeof(saddr));
8449 if (mnr != NULL && ip6h->ip6_nxt == IPPROTO_ICMPV6) {
8450 bridge_mac_nat_icmpv6_output(sc, bif, data,
8451 eh, ip6h, &saddr, mnr);
8452 }
8453 if (IN6_IS_ADDR_UNSPECIFIED(&saddr)) {
8454 goto done;
8455 }
8456 (void)bridge_update_mac_nat_entry(sc, bif, AF_INET6, &saddr,
8457 eh->ether_shost);
8458
8459 done:
8460 return translate;
8461 }
8462
8463 /*
8464 * bridge_mac_nat_input:
8465 * Process a packet arriving on the MAC NAT interface (sc_mac_nat_bif).
8466 * This interface is the "external" interface with respect to NAT.
8467 * The interface is only capable of receiving a single MAC address
8468 * (e.g. a Wi-Fi STA interface).
8469 *
8470 * When a packet arrives on the external interface, look up the destination
8471 * IP address in the mac_nat_entry table. If there is a match, *is_input
8472 * is set to TRUE if it's for the MAC NAT interface, otherwise *is_input
8473 * is set to FALSE and translate the MAC address if necessary.
8474 *
8475 * Returns:
8476 * The internal interface to direct the packet to, or NULL if the packet
8477 * should not be redirected.
8478 *
8479 * *data may be updated to point at a different mbuf chain, or set to NULL
8480 * if the chain was deallocated during processing.
8481 */
8482 static ifnet_t
8483 bridge_mac_nat_input(struct bridge_softc *sc, mbuf_t *data,
8484 boolean_t *is_input)
8485 {
8486 ifnet_t dst_if = NULL;
8487 struct ether_header *eh;
8488 uint16_t ether_type;
8489 boolean_t is_unicast;
8490 mbuf_t m = *data;
8491 struct mac_nat_entry *mne = NULL;
8492
8493 BRIDGE_LOCK_ASSERT_HELD(sc);
8494 *is_input = FALSE;
8495 assert(sc->sc_mac_nat_bif != NULL);
8496 is_unicast = ((m->m_flags & (M_BCAST | M_MCAST)) == 0);
8497 eh = mtod(m, struct ether_header *);
8498 ether_type = ntohs(eh->ether_type);
8499 switch (ether_type) {
8500 case ETHERTYPE_ARP:
8501 mne = bridge_mac_nat_arp_input(sc, data);
8502 break;
8503 case ETHERTYPE_IP:
8504 if (is_unicast) {
8505 mne = bridge_mac_nat_ip_input(sc, data);
8506 }
8507 break;
8508 case ETHERTYPE_IPV6:
8509 if (is_unicast) {
8510 mne = bridge_mac_nat_ipv6_input(sc, data);
8511 }
8512 break;
8513 default:
8514 break;
8515 }
8516 if (mne != NULL) {
8517 if (is_unicast) {
8518 if (m != *data) {
8519 /* it may have changed */
8520 eh = mtod(*data, struct ether_header *);
8521 }
8522 bcopy(mne->mne_mac, eh->ether_dhost,
8523 sizeof(eh->ether_dhost));
8524 }
8525 dst_if = mne->mne_bif->bif_ifp;
8526 *is_input = (mne->mne_bif == sc->sc_mac_nat_bif);
8527 }
8528 return dst_if;
8529 }
8530
8531 /*
8532 * bridge_mac_nat_output:
8533 * Process a packet destined to the MAC NAT interface (sc_mac_nat_bif)
8534 * from the interface 'bif'.
8535 *
8536 * Create a mac_nat_entry containing the source IP address and MAC address
8537 * from the packet. Populate a mac_nat_record with information detailing
8538 * how to translate the packet. Translation takes place later when
8539 * the bridge lock is no longer held.
8540 *
8541 * If 'bif' == sc_mac_nat_bif, the stack over the MAC NAT
8542 * interface is generating an output packet. No translation is required in this
8543 * case, we just record the IP address used to prevent another bif from
8544 * claiming our IP address.
8545 *
8546 * Returns:
8547 * TRUE if the packet should be translated (*mnr updated as well),
8548 * FALSE otherwise.
8549 *
8550 * *data may be updated to point at a different mbuf chain or NULL if
8551 * the chain was deallocated during processing.
8552 */
8553
8554 static boolean_t
8555 bridge_mac_nat_output(struct bridge_softc *sc,
8556 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8557 {
8558 struct ether_header *eh;
8559 uint16_t ether_type;
8560 boolean_t translate = FALSE;
8561
8562 BRIDGE_LOCK_ASSERT_HELD(sc);
8563 assert(sc->sc_mac_nat_bif != NULL);
8564
8565 eh = mtod(*data, struct ether_header *);
8566 ether_type = ntohs(eh->ether_type);
8567 if (mnr != NULL) {
8568 bzero(mnr, sizeof(*mnr));
8569 mnr->mnr_ether_type = ether_type;
8570 }
8571 switch (ether_type) {
8572 case ETHERTYPE_ARP:
8573 translate = bridge_mac_nat_arp_output(sc, bif, data, mnr);
8574 break;
8575 case ETHERTYPE_IP:
8576 translate = bridge_mac_nat_ip_output(sc, bif, data, mnr);
8577 break;
8578 case ETHERTYPE_IPV6:
8579 translate = bridge_mac_nat_ipv6_output(sc, bif, data, mnr);
8580 break;
8581 default:
8582 break;
8583 }
8584 return translate;
8585 }
8586
8587 static void
8588 bridge_mac_nat_arp_translate(mbuf_t *data, struct mac_nat_record *mnr,
8589 const caddr_t eaddr)
8590 {
8591 errno_t error;
8592
8593 if (mnr->mnr_arp_offset == 0) {
8594 return;
8595 }
8596 /* replace the source hardware address */
8597 error = mbuf_copyback(*data, mnr->mnr_arp_offset,
8598 ETHER_ADDR_LEN, eaddr,
8599 MBUF_DONTWAIT);
8600 if (error != 0) {
8601 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8602 "mbuf_copyback failed");
8603 m_freem(*data);
8604 *data = NULL;
8605 }
8606 return;
8607 }
8608
8609 static void
8610 bridge_mac_nat_ip_translate(mbuf_t *data, struct mac_nat_record *mnr)
8611 {
8612 errno_t error;
8613 size_t offset;
8614
8615 if (mnr->mnr_ip_header_len == 0) {
8616 return;
8617 }
8618 /* update the UDP checksum */
8619 offset = sizeof(struct ether_header) + mnr->mnr_ip_header_len;
8620 error = mbuf_copyback(*data, offset + offsetof(struct udphdr, uh_sum),
8621 sizeof(mnr->mnr_ip_udp_csum),
8622 &mnr->mnr_ip_udp_csum,
8623 MBUF_DONTWAIT);
8624 if (error != 0) {
8625 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8626 "mbuf_copyback uh_sum failed");
8627 m_freem(*data);
8628 *data = NULL;
8629 }
8630 /* update the DHCP must broadcast flag */
8631 offset += sizeof(struct udphdr);
8632 error = mbuf_copyback(*data, offset + offsetof(struct dhcp, dp_flags),
8633 sizeof(mnr->mnr_ip_dhcp_flags),
8634 &mnr->mnr_ip_dhcp_flags,
8635 MBUF_DONTWAIT);
8636 if (error != 0) {
8637 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8638 "mbuf_copyback dp_flags failed");
8639 m_freem(*data);
8640 *data = NULL;
8641 }
8642 }
8643
8644 static void
8645 bridge_mac_nat_ipv6_translate(mbuf_t *data, struct mac_nat_record *mnr,
8646 const caddr_t eaddr)
8647 {
8648 uint16_t cksum;
8649 errno_t error;
8650 mbuf_t m = *data;
8651
8652 if (mnr->mnr_ip6_header_len == 0) {
8653 return;
8654 }
8655 switch (mnr->mnr_ip6_icmp6_type) {
8656 case ND_ROUTER_SOLICIT:
8657 case ND_NEIGHBOR_SOLICIT:
8658 case ND_NEIGHBOR_ADVERT:
8659 if (mnr->mnr_ip6_lladdr_offset == 0) {
8660 /* nothing to do */
8661 return;
8662 }
8663 break;
8664 default:
8665 return;
8666 }
8667
8668 /*
8669 * replace the lladdr
8670 */
8671 error = mbuf_copyback(m, mnr->mnr_ip6_lladdr_offset,
8672 ETHER_ADDR_LEN, eaddr,
8673 MBUF_DONTWAIT);
8674 if (error != 0) {
8675 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8676 "mbuf_copyback lladdr failed");
8677 m_freem(m);
8678 *data = NULL;
8679 return;
8680 }
8681
8682 /*
8683 * recompute the icmp6 checksum
8684 */
8685
8686 /* skip past the ethernet header */
8687 mbuf_setdata(m, (char *)mbuf_data(m) + ETHER_HDR_LEN,
8688 mbuf_len(m) - ETHER_HDR_LEN);
8689 mbuf_pkthdr_adjustlen(m, -ETHER_HDR_LEN);
8690
8691 #define CKSUM_OFFSET_ICMP6 offsetof(struct icmp6_hdr, icmp6_cksum)
8692 /* set the checksum to zero */
8693 cksum = 0;
8694 error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
8695 sizeof(cksum), &cksum, MBUF_DONTWAIT);
8696 if (error != 0) {
8697 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8698 "mbuf_copyback cksum=0 failed");
8699 m_freem(m);
8700 *data = NULL;
8701 return;
8702 }
8703 /* compute and set the new checksum */
8704 cksum = in6_cksum(m, IPPROTO_ICMPV6, mnr->mnr_ip6_header_len,
8705 mnr->mnr_ip6_icmp6_len);
8706 error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
8707 sizeof(cksum), &cksum, MBUF_DONTWAIT);
8708 if (error != 0) {
8709 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8710 "mbuf_copyback cksum failed");
8711 m_freem(m);
8712 *data = NULL;
8713 return;
8714 }
8715 /* restore the ethernet header */
8716 mbuf_setdata(m, (char *)mbuf_data(m) - ETHER_HDR_LEN,
8717 mbuf_len(m) + ETHER_HDR_LEN);
8718 mbuf_pkthdr_adjustlen(m, ETHER_HDR_LEN);
8719 return;
8720 }
8721
8722 static void
8723 bridge_mac_nat_translate(mbuf_t *data, struct mac_nat_record *mnr,
8724 const caddr_t eaddr)
8725 {
8726 struct ether_header *eh;
8727
8728 /* replace the source ethernet address with the single MAC */
8729 eh = mtod(*data, struct ether_header *);
8730 bcopy(eaddr, eh->ether_shost, sizeof(eh->ether_shost));
8731 switch (mnr->mnr_ether_type) {
8732 case ETHERTYPE_ARP:
8733 bridge_mac_nat_arp_translate(data, mnr, eaddr);
8734 break;
8735
8736 case ETHERTYPE_IP:
8737 bridge_mac_nat_ip_translate(data, mnr);
8738 break;
8739
8740 case ETHERTYPE_IPV6:
8741 bridge_mac_nat_ipv6_translate(data, mnr, eaddr);
8742 break;
8743
8744 default:
8745 break;
8746 }
8747 return;
8748 }
8749
8750 /*
8751 * bridge packet filtering
8752 */
8753
8754 /*
8755 * Perform basic checks on header size since
8756 * pfil assumes ip_input has already processed
8757 * it for it. Cut-and-pasted from ip_input.c.
8758 * Given how simple the IPv6 version is,
8759 * does the IPv4 version really need to be
8760 * this complicated?
8761 *
8762 * XXX Should we update ipstat here, or not?
8763 * XXX Right now we update ipstat but not
8764 * XXX csum_counter.
8765 */
8766 static int
8767 bridge_ip_checkbasic(struct mbuf **mp)
8768 {
8769 struct mbuf *m = *mp;
8770 struct ip *ip;
8771 int len, hlen;
8772 u_short sum;
8773
8774 if (*mp == NULL) {
8775 return -1;
8776 }
8777
8778 if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
8779 /* max_linkhdr is already rounded up to nearest 4-byte */
8780 if ((m = m_copyup(m, sizeof(struct ip),
8781 max_linkhdr)) == NULL) {
8782 /* XXXJRT new stat, please */
8783 ipstat.ips_toosmall++;
8784 goto bad;
8785 }
8786 } else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip), 0)) {
8787 if ((m = m_pullup(m, sizeof(struct ip))) == NULL) {
8788 ipstat.ips_toosmall++;
8789 goto bad;
8790 }
8791 }
8792 ip = mtod(m, struct ip *);
8793 if (ip == NULL) {
8794 goto bad;
8795 }
8796
8797 if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
8798 ipstat.ips_badvers++;
8799 goto bad;
8800 }
8801 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
8802 if (hlen < (int)sizeof(struct ip)) { /* minimum header length */
8803 ipstat.ips_badhlen++;
8804 goto bad;
8805 }
8806 if (hlen > m->m_len) {
8807 if ((m = m_pullup(m, hlen)) == 0) {
8808 ipstat.ips_badhlen++;
8809 goto bad;
8810 }
8811 ip = mtod(m, struct ip *);
8812 if (ip == NULL) {
8813 goto bad;
8814 }
8815 }
8816
8817 if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
8818 sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
8819 } else {
8820 if (hlen == sizeof(struct ip)) {
8821 sum = in_cksum_hdr(ip);
8822 } else {
8823 sum = in_cksum(m, hlen);
8824 }
8825 }
8826 if (sum) {
8827 ipstat.ips_badsum++;
8828 goto bad;
8829 }
8830
8831 /* Retrieve the packet length. */
8832 len = ntohs(ip->ip_len);
8833
8834 /*
8835 * Check for additional length bogosity
8836 */
8837 if (len < hlen) {
8838 ipstat.ips_badlen++;
8839 goto bad;
8840 }
8841
8842 /*
8843 * Check that the amount of data in the buffers
8844 * is as at least much as the IP header would have us expect.
8845 * Drop packet if shorter than we expect.
8846 */
8847 if (m->m_pkthdr.len < len) {
8848 ipstat.ips_tooshort++;
8849 goto bad;
8850 }
8851
8852 /* Checks out, proceed */
8853 *mp = m;
8854 return 0;
8855
8856 bad:
8857 *mp = m;
8858 return -1;
8859 }
8860
8861 /*
8862 * Same as above, but for IPv6.
8863 * Cut-and-pasted from ip6_input.c.
8864 * XXX Should we update ip6stat, or not?
8865 */
8866 static int
8867 bridge_ip6_checkbasic(struct mbuf **mp)
8868 {
8869 struct mbuf *m = *mp;
8870 struct ip6_hdr *ip6;
8871
8872 /*
8873 * If the IPv6 header is not aligned, slurp it up into a new
8874 * mbuf with space for link headers, in the event we forward
8875 * it. Otherwise, if it is aligned, make sure the entire base
8876 * IPv6 header is in the first mbuf of the chain.
8877 */
8878 if (IP6_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
8879 struct ifnet *inifp = m->m_pkthdr.rcvif;
8880 /* max_linkhdr is already rounded up to nearest 4-byte */
8881 if ((m = m_copyup(m, sizeof(struct ip6_hdr),
8882 max_linkhdr)) == NULL) {
8883 /* XXXJRT new stat, please */
8884 ip6stat.ip6s_toosmall++;
8885 in6_ifstat_inc(inifp, ifs6_in_hdrerr);
8886 goto bad;
8887 }
8888 } else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip6_hdr), 0)) {
8889 struct ifnet *inifp = m->m_pkthdr.rcvif;
8890 if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
8891 ip6stat.ip6s_toosmall++;
8892 in6_ifstat_inc(inifp, ifs6_in_hdrerr);
8893 goto bad;
8894 }
8895 }
8896
8897 ip6 = mtod(m, struct ip6_hdr *);
8898
8899 if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
8900 ip6stat.ip6s_badvers++;
8901 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
8902 goto bad;
8903 }
8904
8905 /* Checks out, proceed */
8906 *mp = m;
8907 return 0;
8908
8909 bad:
8910 *mp = m;
8911 return -1;
8912 }
8913
8914 /*
8915 * the PF routines expect to be called from ip_input, so we
8916 * need to do and undo here some of the same processing.
8917 *
8918 * XXX : this is heavily inspired on bridge_pfil()
8919 */
8920 static int
8921 bridge_pf(struct mbuf **mp, struct ifnet *ifp, uint32_t sc_filter_flags,
8922 int input)
8923 {
8924 /*
8925 * XXX : mpetit : heavily inspired by bridge_pfil()
8926 */
8927
8928 int snap, error, i, hlen;
8929 struct ether_header *eh1, eh2;
8930 struct ip *ip;
8931 struct llc llc1;
8932 u_int16_t ether_type;
8933
8934 snap = 0;
8935 error = -1; /* Default error if not error == 0 */
8936
8937 if ((sc_filter_flags & IFBF_FILT_MEMBER) == 0) {
8938 return 0; /* filtering is disabled */
8939 }
8940 i = min((*mp)->m_pkthdr.len, max_protohdr);
8941 if ((*mp)->m_len < i) {
8942 *mp = m_pullup(*mp, i);
8943 if (*mp == NULL) {
8944 BRIDGE_LOG(LOG_NOTICE, 0, "m_pullup failed");
8945 return -1;
8946 }
8947 }
8948
8949 eh1 = mtod(*mp, struct ether_header *);
8950 ether_type = ntohs(eh1->ether_type);
8951
8952 /*
8953 * Check for SNAP/LLC.
8954 */
8955 if (ether_type < ETHERMTU) {
8956 struct llc *llc2 = (struct llc *)(eh1 + 1);
8957
8958 if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
8959 llc2->llc_dsap == LLC_SNAP_LSAP &&
8960 llc2->llc_ssap == LLC_SNAP_LSAP &&
8961 llc2->llc_control == LLC_UI) {
8962 ether_type = htons(llc2->llc_un.type_snap.ether_type);
8963 snap = 1;
8964 }
8965 }
8966
8967 /*
8968 * If we're trying to filter bridge traffic, don't look at anything
8969 * other than IP and ARP traffic. If the filter doesn't understand
8970 * IPv6, don't allow IPv6 through the bridge either. This is lame
8971 * since if we really wanted, say, an AppleTalk filter, we are hosed,
8972 * but of course we don't have an AppleTalk filter to begin with.
8973 * (Note that since pfil doesn't understand ARP it will pass *ALL*
8974 * ARP traffic.)
8975 */
8976 switch (ether_type) {
8977 case ETHERTYPE_ARP:
8978 case ETHERTYPE_REVARP:
8979 return 0; /* Automatically pass */
8980
8981 case ETHERTYPE_IP:
8982 case ETHERTYPE_IPV6:
8983 break;
8984 default:
8985 /*
8986 * Check to see if the user wants to pass non-ip
8987 * packets, these will not be checked by pf and
8988 * passed unconditionally so the default is to drop.
8989 */
8990 if ((sc_filter_flags & IFBF_FILT_ONLYIP)) {
8991 goto bad;
8992 }
8993 break;
8994 }
8995
8996 /* Strip off the Ethernet header and keep a copy. */
8997 m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t)&eh2);
8998 m_adj(*mp, ETHER_HDR_LEN);
8999
9000 /* Strip off snap header, if present */
9001 if (snap) {
9002 m_copydata(*mp, 0, sizeof(struct llc), (caddr_t)&llc1);
9003 m_adj(*mp, sizeof(struct llc));
9004 }
9005
9006 /*
9007 * Check the IP header for alignment and errors
9008 */
9009 switch (ether_type) {
9010 case ETHERTYPE_IP:
9011 error = bridge_ip_checkbasic(mp);
9012 break;
9013 case ETHERTYPE_IPV6:
9014 error = bridge_ip6_checkbasic(mp);
9015 break;
9016 default:
9017 error = 0;
9018 break;
9019 }
9020 if (error) {
9021 goto bad;
9022 }
9023
9024 error = 0;
9025
9026 /*
9027 * Run the packet through pf rules
9028 */
9029 switch (ether_type) {
9030 case ETHERTYPE_IP:
9031 /*
9032 * before calling the firewall, swap fields the same as
9033 * IP does. here we assume the header is contiguous
9034 */
9035 ip = mtod(*mp, struct ip *);
9036
9037 ip->ip_len = ntohs(ip->ip_len);
9038 ip->ip_off = ntohs(ip->ip_off);
9039
9040 if (ifp != NULL) {
9041 error = pf_af_hook(ifp, 0, mp, AF_INET, input, NULL);
9042 }
9043
9044 if (*mp == NULL || error != 0) { /* filter may consume */
9045 break;
9046 }
9047
9048 /* Recalculate the ip checksum and restore byte ordering */
9049 ip = mtod(*mp, struct ip *);
9050 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
9051 if (hlen < (int)sizeof(struct ip)) {
9052 goto bad;
9053 }
9054 if (hlen > (*mp)->m_len) {
9055 if ((*mp = m_pullup(*mp, hlen)) == 0) {
9056 goto bad;
9057 }
9058 ip = mtod(*mp, struct ip *);
9059 if (ip == NULL) {
9060 goto bad;
9061 }
9062 }
9063 ip->ip_len = htons(ip->ip_len);
9064 ip->ip_off = htons(ip->ip_off);
9065 ip->ip_sum = 0;
9066 if (hlen == sizeof(struct ip)) {
9067 ip->ip_sum = in_cksum_hdr(ip);
9068 } else {
9069 ip->ip_sum = in_cksum(*mp, hlen);
9070 }
9071 break;
9072
9073 case ETHERTYPE_IPV6:
9074 if (ifp != NULL) {
9075 error = pf_af_hook(ifp, 0, mp, AF_INET6, input, NULL);
9076 }
9077
9078 if (*mp == NULL || error != 0) { /* filter may consume */
9079 break;
9080 }
9081 break;
9082 default:
9083 error = 0;
9084 break;
9085 }
9086
9087 if (*mp == NULL) {
9088 return error;
9089 }
9090 if (error != 0) {
9091 goto bad;
9092 }
9093
9094 error = -1;
9095
9096 /*
9097 * Finally, put everything back the way it was and return
9098 */
9099 if (snap) {
9100 M_PREPEND(*mp, sizeof(struct llc), M_DONTWAIT, 0);
9101 if (*mp == NULL) {
9102 return error;
9103 }
9104 bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
9105 }
9106
9107 M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT, 0);
9108 if (*mp == NULL) {
9109 return error;
9110 }
9111 bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
9112
9113 return 0;
9114
9115 bad:
9116 m_freem(*mp);
9117 *mp = NULL;
9118 return error;
9119 }
9120
9121 /*
9122 * Copyright (C) 2014, Stefano Garzarella - Universita` di Pisa.
9123 * All rights reserved.
9124 *
9125 * Redistribution and use in source and binary forms, with or without
9126 * modification, are permitted provided that the following conditions
9127 * are met:
9128 * 1. Redistributions of source code must retain the above copyright
9129 * notice, this list of conditions and the following disclaimer.
9130 * 2. Redistributions in binary form must reproduce the above copyright
9131 * notice, this list of conditions and the following disclaimer in the
9132 * documentation and/or other materials provided with the distribution.
9133 *
9134 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
9135 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
9136 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
9137 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
9138 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
9139 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
9140 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
9141 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
9142 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
9143 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
9144 * SUCH DAMAGE.
9145 */
9146
9147 /*
9148 * XXX-ste: Maybe this function must be moved into kern/uipc_mbuf.c
9149 *
9150 * Create a queue of packets/segments which fit the given mss + hdr_len.
9151 * m0 points to mbuf chain to be segmented.
9152 * This function splits the payload (m0-> m_pkthdr.len - hdr_len)
9153 * into segments of length MSS bytes and then copy the first hdr_len bytes
9154 * from m0 at the top of each segment.
9155 * If hdr2_buf is not NULL (hdr2_len is the buf length), it is copied
9156 * in each segment after the first hdr_len bytes
9157 *
9158 * Return the new queue with the segments on success, NULL on failure.
9159 * (the mbuf queue is freed in this case).
9160 * nsegs contains the number of segments generated.
9161 */
9162
9163 static struct mbuf *
9164 m_seg(struct mbuf *m0, int hdr_len, int mss, int *nsegs,
9165 char * hdr2_buf, int hdr2_len)
9166 {
9167 int off = 0, n, firstlen;
9168 struct mbuf **mnext, *mseg;
9169 int total_len = m0->m_pkthdr.len;
9170
9171 /*
9172 * Segmentation useless
9173 */
9174 if (total_len <= hdr_len + mss) {
9175 return m0;
9176 }
9177
9178 if (hdr2_buf == NULL || hdr2_len <= 0) {
9179 hdr2_buf = NULL;
9180 hdr2_len = 0;
9181 }
9182
9183 off = hdr_len + mss;
9184 firstlen = mss; /* first segment stored in the original mbuf */
9185
9186 mnext = &(m0->m_nextpkt); /* pointer to next packet */
9187
9188 for (n = 1; off < total_len; off += mss, n++) {
9189 struct mbuf *m;
9190 /*
9191 * Copy the header from the original packet
9192 * and create a new mbuf chain
9193 */
9194 if (MHLEN < hdr_len) {
9195 m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
9196 } else {
9197 m = m_gethdr(M_NOWAIT, MT_DATA);
9198 }
9199
9200 if (m == NULL) {
9201 #ifdef GSO_DEBUG
9202 D("MGETHDR error\n");
9203 #endif
9204 goto err;
9205 }
9206
9207 m_copydata(m0, 0, hdr_len, mtod(m, caddr_t));
9208
9209 m->m_len = hdr_len;
9210 /*
9211 * if the optional header is present, copy it
9212 */
9213 if (hdr2_buf != NULL) {
9214 m_copyback(m, hdr_len, hdr2_len, hdr2_buf);
9215 }
9216
9217 m->m_flags |= (m0->m_flags & M_COPYFLAGS);
9218 if (off + mss >= total_len) { /* last segment */
9219 mss = total_len - off;
9220 }
9221 /*
9222 * Copy the payload from original packet
9223 */
9224 mseg = m_copym(m0, off, mss, M_NOWAIT);
9225 if (mseg == NULL) {
9226 m_freem(m);
9227 #ifdef GSO_DEBUG
9228 D("m_copym error\n");
9229 #endif
9230 goto err;
9231 }
9232 m_cat(m, mseg);
9233
9234 m->m_pkthdr.len = hdr_len + hdr2_len + mss;
9235 m->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
9236 /*
9237 * Copy the checksum flags and data (in_cksum() need this)
9238 */
9239 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
9240 m->m_pkthdr.csum_data = m0->m_pkthdr.csum_data;
9241 m->m_pkthdr.tso_segsz = m0->m_pkthdr.tso_segsz;
9242
9243 *mnext = m;
9244 mnext = &(m->m_nextpkt);
9245 }
9246
9247 /*
9248 * Update first segment.
9249 * If the optional header is present, is necessary
9250 * to insert it into the first segment.
9251 */
9252 if (hdr2_buf == NULL) {
9253 m_adj(m0, hdr_len + firstlen - total_len);
9254 m0->m_pkthdr.len = hdr_len + firstlen;
9255 } else {
9256 mseg = m_copym(m0, hdr_len, firstlen, M_NOWAIT);
9257 if (mseg == NULL) {
9258 #ifdef GSO_DEBUG
9259 D("m_copym error\n");
9260 #endif
9261 goto err;
9262 }
9263 m_adj(m0, hdr_len - total_len);
9264 m_copyback(m0, hdr_len, hdr2_len, hdr2_buf);
9265 m_cat(m0, mseg);
9266 m0->m_pkthdr.len = hdr_len + hdr2_len + firstlen;
9267 }
9268
9269 if (nsegs != NULL) {
9270 *nsegs = n;
9271 }
9272 return m0;
9273 err:
9274 while (m0 != NULL) {
9275 mseg = m0->m_nextpkt;
9276 m0->m_nextpkt = NULL;
9277 m_freem(m0);
9278 m0 = mseg;
9279 }
9280 return NULL;
9281 }
9282
9283 /*
9284 * Wrappers of IPv4 checksum functions
9285 */
9286 static inline void
9287 gso_ipv4_data_cksum(struct mbuf *m, struct ip *ip, int mac_hlen)
9288 {
9289 m->m_data += mac_hlen;
9290 m->m_len -= mac_hlen;
9291 m->m_pkthdr.len -= mac_hlen;
9292 #if __FreeBSD_version < 1000000
9293 ip->ip_len = ntohs(ip->ip_len); /* needed for in_delayed_cksum() */
9294 #endif
9295
9296 in_delayed_cksum(m);
9297
9298 #if __FreeBSD_version < 1000000
9299 ip->ip_len = htons(ip->ip_len);
9300 #endif
9301 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
9302 m->m_len += mac_hlen;
9303 m->m_pkthdr.len += mac_hlen;
9304 m->m_data -= mac_hlen;
9305 }
9306
9307 static inline void
9308 gso_ipv4_hdr_cksum(struct mbuf *m, struct ip *ip, int mac_hlen, int ip_hlen)
9309 {
9310 m->m_data += mac_hlen;
9311
9312 ip->ip_sum = in_cksum(m, ip_hlen);
9313
9314 m->m_pkthdr.csum_flags &= ~CSUM_IP;
9315 m->m_data -= mac_hlen;
9316 }
9317
9318 /*
9319 * Structure that contains the state during the TCP segmentation
9320 */
9321 struct gso_ip_tcp_state {
9322 void (*update)
9323 (struct gso_ip_tcp_state*, struct mbuf*);
9324 void (*internal)
9325 (struct gso_ip_tcp_state*, struct mbuf*);
9326 union iphdr hdr;
9327 struct tcphdr *tcp;
9328 int mac_hlen;
9329 int ip_hlen;
9330 int tcp_hlen;
9331 int hlen;
9332 int pay_len;
9333 int sw_csum;
9334 uint32_t tcp_seq;
9335 uint16_t ip_id;
9336 boolean_t is_tx;
9337 };
9338
9339 /*
9340 * Update the pointers to TCP and IPv4 headers
9341 */
9342 static inline void
9343 gso_ipv4_tcp_update(struct gso_ip_tcp_state *state, struct mbuf *m)
9344 {
9345 state->hdr.ip = (struct ip *)(void *)(mtod(m, uint8_t *) + state->mac_hlen);
9346 state->tcp = (struct tcphdr *)(void *)((caddr_t)(state->hdr.ip) + state->ip_hlen);
9347 state->pay_len = m->m_pkthdr.len - state->hlen;
9348 }
9349
9350 /*
9351 * Set properly the TCP and IPv4 headers
9352 */
9353 static inline void
9354 gso_ipv4_tcp_internal(struct gso_ip_tcp_state *state, struct mbuf *m)
9355 {
9356 /*
9357 * Update IP header
9358 */
9359 state->hdr.ip->ip_id = htons((state->ip_id)++);
9360 state->hdr.ip->ip_len = htons(m->m_pkthdr.len - state->mac_hlen);
9361 /*
9362 * TCP Checksum
9363 */
9364 state->tcp->th_sum = 0;
9365 state->tcp->th_sum = in_pseudo(state->hdr.ip->ip_src.s_addr,
9366 state->hdr.ip->ip_dst.s_addr,
9367 htons(state->tcp_hlen + IPPROTO_TCP + state->pay_len));
9368 /*
9369 * Checksum HW not supported (TCP)
9370 */
9371 if (state->sw_csum & CSUM_DELAY_DATA) {
9372 gso_ipv4_data_cksum(m, state->hdr.ip, state->mac_hlen);
9373 }
9374
9375 state->tcp_seq += state->pay_len;
9376 /*
9377 * IP Checksum
9378 */
9379 state->hdr.ip->ip_sum = 0;
9380 /*
9381 * Checksum HW not supported (IP)
9382 */
9383 if (state->sw_csum & CSUM_IP) {
9384 gso_ipv4_hdr_cksum(m, state->hdr.ip, state->mac_hlen, state->ip_hlen);
9385 }
9386 }
9387
9388
9389 /*
9390 * Updates the pointers to TCP and IPv6 headers
9391 */
9392 static inline void
9393 gso_ipv6_tcp_update(struct gso_ip_tcp_state *state, struct mbuf *m)
9394 {
9395 state->hdr.ip6 = (struct ip6_hdr *)(mtod(m, uint8_t *) + state->mac_hlen);
9396 state->tcp = (struct tcphdr *)(void *)((caddr_t)(state->hdr.ip6) + state->ip_hlen);
9397 state->pay_len = m->m_pkthdr.len - state->hlen;
9398 }
9399
9400 /*
9401 * Sets properly the TCP and IPv6 headers
9402 */
9403 static inline void
9404 gso_ipv6_tcp_internal(struct gso_ip_tcp_state *state, struct mbuf *m)
9405 {
9406 state->hdr.ip6->ip6_plen = htons(m->m_pkthdr.len -
9407 state->mac_hlen - state->ip_hlen);
9408 /*
9409 * TCP Checksum
9410 */
9411 state->tcp->th_sum = 0;
9412 state->tcp->th_sum = in6_pseudo(&state->hdr.ip6->ip6_src,
9413 &state->hdr.ip6->ip6_dst,
9414 htonl(state->tcp_hlen + state->pay_len + IPPROTO_TCP));
9415 /*
9416 * Checksum HW not supported (TCP)
9417 */
9418 if (state->sw_csum & CSUM_DELAY_IPV6_DATA) {
9419 (void)in6_finalize_cksum(m, state->mac_hlen, -1, -1, state->sw_csum);
9420 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA;
9421 }
9422 state->tcp_seq += state->pay_len;
9423 }
9424
9425 /*
9426 * Init the state during the TCP segmentation
9427 */
9428 static void
9429 gso_ip_tcp_init_state(struct gso_ip_tcp_state *state, struct ifnet *ifp,
9430 bool is_ipv4, int mac_hlen, int ip_hlen,
9431 void * ip_hdr, struct tcphdr * tcp_hdr)
9432 {
9433 #pragma unused(ifp)
9434
9435 state->hdr.ptr = ip_hdr;
9436 state->tcp = tcp_hdr;
9437 if (is_ipv4) {
9438 state->ip_id = ntohs(state->hdr.ip->ip_id);
9439 state->update = gso_ipv4_tcp_update;
9440 state->internal = gso_ipv4_tcp_internal;
9441 state->sw_csum = CSUM_DELAY_DATA | CSUM_IP; /* XXX */
9442 } else {
9443 state->update = gso_ipv6_tcp_update;
9444 state->internal = gso_ipv6_tcp_internal;
9445 state->sw_csum = CSUM_DELAY_IPV6_DATA; /* XXX */
9446 }
9447 state->mac_hlen = mac_hlen;
9448 state->ip_hlen = ip_hlen;
9449 state->tcp_hlen = state->tcp->th_off << 2;
9450 state->hlen = mac_hlen + ip_hlen + state->tcp_hlen;
9451 state->tcp_seq = ntohl(state->tcp->th_seq);
9452 //state->sw_csum = m->m_pkthdr.csum_flags & ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
9453 return;
9454 }
9455
9456 /*
9457 * GSO on TCP/IP (v4 or v6)
9458 *
9459 * If is_tx is TRUE, segmented packets are transmitted after they are
9460 * segmented.
9461 *
9462 * If is_tx is FALSE, the segmented packets are returned as a chain in *mp.
9463 */
9464 static int
9465 gso_ip_tcp(struct ifnet *ifp, struct mbuf **mp, struct gso_ip_tcp_state *state,
9466 boolean_t is_tx)
9467 {
9468 struct mbuf *m, *m_tx;
9469 int error = 0;
9470 int mss = 0;
9471 int nsegs = 0;
9472 struct mbuf *m0 = *mp;
9473 #ifdef GSO_STATS
9474 int total_len = m0->m_pkthdr.len;
9475 #endif /* GSO_STATS */
9476
9477 #if 1
9478 mss = ifp->if_mtu - state->ip_hlen - state->tcp_hlen;
9479 #else
9480 if (m0->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) {/* TSO with GSO */
9481 mss = ifp->if_hw_tsomax - state->ip_hlen - state->tcp_hlen;
9482 } else {
9483 mss = m0->m_pkthdr.tso_segsz;
9484 }
9485 #endif
9486
9487 *mp = m0 = m_seg(m0, state->hlen, mss, &nsegs, 0, 0);
9488 if (m0 == NULL) {
9489 return ENOBUFS; /* XXX ok? */
9490 }
9491 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
9492 "%s %s mss %d nsegs %d",
9493 ifp->if_xname,
9494 is_tx ? "TX" : "RX",
9495 mss, nsegs);
9496 /*
9497 * XXX-ste: can this happen?
9498 */
9499 if (m0->m_nextpkt == NULL) {
9500 #ifdef GSO_DEBUG
9501 D("only 1 segment");
9502 #endif
9503 if (is_tx) {
9504 error = bridge_transmit(ifp, m0);
9505 }
9506 return error;
9507 }
9508 #ifdef GSO_STATS
9509 GSOSTAT_SET_MAX(tcp.gsos_max_mss, mss);
9510 GSOSTAT_SET_MIN(tcp.gsos_min_mss, mss);
9511 GSOSTAT_ADD(tcp.gsos_osegments, nsegs);
9512 #endif /* GSO_STATS */
9513
9514 /* first pkt */
9515 m = m0;
9516
9517 state->update(state, m);
9518
9519 do {
9520 state->tcp->th_flags &= ~(TH_FIN | TH_PUSH);
9521
9522 state->internal(state, m);
9523 m_tx = m;
9524 m = m->m_nextpkt;
9525 if (is_tx) {
9526 m_tx->m_nextpkt = NULL;
9527 if ((error = bridge_transmit(ifp, m_tx)) != 0) {
9528 /*
9529 * XXX: If a segment can not be sent, discard the following
9530 * segments and propagate the error to the upper levels.
9531 * In this way the TCP retransmits all the initial packet.
9532 */
9533 #ifdef GSO_DEBUG
9534 D("if_transmit error\n");
9535 #endif
9536 goto err;
9537 }
9538 }
9539 state->update(state, m);
9540
9541 state->tcp->th_flags &= ~TH_CWR;
9542 state->tcp->th_seq = htonl(state->tcp_seq);
9543 } while (m->m_nextpkt);
9544
9545 /* last pkt */
9546 state->internal(state, m);
9547
9548 if (is_tx) {
9549 error = bridge_transmit(ifp, m);
9550 #ifdef GSO_DEBUG
9551 if (error) {
9552 D("last if_transmit error\n");
9553 D("error - type = %d \n", error);
9554 }
9555 #endif
9556 }
9557 #ifdef GSO_STATS
9558 if (!error) {
9559 GSOSTAT_INC(tcp.gsos_segmented);
9560 GSOSTAT_SET_MAX(tcp.gsos_maxsegmented, total_len);
9561 GSOSTAT_SET_MIN(tcp.gsos_minsegmented, total_len);
9562 GSOSTAT_ADD(tcp.gsos_totalbyteseg, total_len);
9563 }
9564 #endif /* GSO_STATS */
9565 return error;
9566
9567 err:
9568 #ifdef GSO_DEBUG
9569 D("error - type = %d \n", error);
9570 #endif
9571 while (m != NULL) {
9572 m_tx = m->m_nextpkt;
9573 m->m_nextpkt = NULL;
9574 m_freem(m);
9575 m = m_tx;
9576 }
9577 return error;
9578 }
9579
9580 /*
9581 * GSO for TCP/IPv[46]
9582 */
9583 static int
9584 gso_tcp(struct ifnet *ifp, struct mbuf **mp, u_int mac_hlen, bool is_ipv4,
9585 boolean_t is_tx)
9586 {
9587 int error;
9588 ip_packet_info info;
9589 uint32_t csum_flags;
9590 struct gso_ip_tcp_state state;
9591 struct bripstats stats; /* XXX ignored */
9592 struct tcphdr *tcp;
9593
9594 if (!is_tx && ipforwarding == 0) {
9595 /* no need to segment if the packet will not be forwarded */
9596 return 0;
9597 }
9598 error = bridge_get_tcp_header(mp, mac_hlen, is_ipv4, &info, &stats);
9599 if (error != 0) {
9600 if (*mp != NULL) {
9601 m_freem(*mp);
9602 *mp = NULL;
9603 }
9604 return error;
9605 }
9606 if (info.ip_proto_hdr == NULL) {
9607 /* not a TCP packet */
9608 return 0;
9609 }
9610 tcp = (struct tcphdr *)(void *)info.ip_proto_hdr;
9611 gso_ip_tcp_init_state(&state, ifp, is_ipv4, mac_hlen,
9612 info.ip_hlen, info.ip_hdr.ptr, tcp);
9613 if (is_ipv4) {
9614 csum_flags = CSUM_DELAY_DATA; /* XXX */
9615 if (!is_tx) {
9616 /* if RX to our local IP address, don't segment */
9617 struct in_addr dst_ip;
9618
9619 bcopy(&state.hdr.ip->ip_dst, &dst_ip, sizeof(dst_ip));
9620 if (in_addr_is_ours(dst_ip)) {
9621 return 0;
9622 }
9623 }
9624 } else {
9625 csum_flags = CSUM_DELAY_IPV6_DATA; /* XXX */
9626 if (!is_tx) {
9627 /* if RX to our local IP address, don't segment */
9628 if (in6_addr_is_ours(&state.hdr.ip6->ip6_dst,
9629 ifp->if_index)) {
9630 /* local IP address, no need to segment */
9631 return 0;
9632 }
9633 }
9634 }
9635 (*mp)->m_pkthdr.csum_flags = csum_flags;
9636 (*mp)->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
9637 return gso_ip_tcp(ifp, mp, &state, is_tx);
9638 }
9639