1 /*
2 * Copyright (c) 2004-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /* $NetBSD: if_bridge.c,v 1.31 2005/06/01 19:45:34 jdc Exp $ */
30 /*
31 * Copyright 2001 Wasabi Systems, Inc.
32 * All rights reserved.
33 *
34 * Written by Jason R. Thorpe for Wasabi Systems, Inc.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed for the NetBSD Project by
47 * Wasabi Systems, Inc.
48 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
49 * or promote products derived from this software without specific prior
50 * written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
54 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
55 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
56 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
57 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
58 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
59 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
60 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
61 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
62 * POSSIBILITY OF SUCH DAMAGE.
63 */
64
65 /*
66 * Copyright (c) 1999, 2000 Jason L. Wright ([email protected])
67 * All rights reserved.
68 *
69 * Redistribution and use in source and binary forms, with or without
70 * modification, are permitted provided that the following conditions
71 * are met:
72 * 1. Redistributions of source code must retain the above copyright
73 * notice, this list of conditions and the following disclaimer.
74 * 2. Redistributions in binary form must reproduce the above copyright
75 * notice, this list of conditions and the following disclaimer in the
76 * documentation and/or other materials provided with the distribution.
77 *
78 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
79 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
80 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
81 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
82 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
83 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
84 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
86 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
87 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
88 * POSSIBILITY OF SUCH DAMAGE.
89 *
90 * OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp
91 */
92
93 /*
94 * Network interface bridge support.
95 *
96 * TODO:
97 *
98 * - Currently only supports Ethernet-like interfaces (Ethernet,
99 * 802.11, VLANs on Ethernet, etc.) Figure out a nice way
100 * to bridge other types of interfaces (FDDI-FDDI, and maybe
101 * consider heterogenous bridges).
102 *
103 * - GIF isn't handled due to the lack of IPPROTO_ETHERIP support.
104 */
105
106 #include <sys/cdefs.h>
107
108 #include <sys/param.h>
109 #include <sys/mbuf.h>
110 #include <sys/malloc.h>
111 #include <sys/protosw.h>
112 #include <sys/systm.h>
113 #include <sys/time.h>
114 #include <sys/socket.h> /* for net/if.h */
115 #include <sys/sockio.h>
116 #include <sys/kernel.h>
117 #include <sys/random.h>
118 #include <sys/syslog.h>
119 #include <sys/sysctl.h>
120 #include <sys/proc.h>
121 #include <sys/lock.h>
122 #include <sys/mcache.h>
123
124 #include <sys/kauth.h>
125
126 #include <kern/thread_call.h>
127
128 #include <libkern/libkern.h>
129
130 #include <kern/zalloc.h>
131
132 #if NBPFILTER > 0
133 #include <net/bpf.h>
134 #endif
135 #include <net/if.h>
136 #include <net/if_dl.h>
137 #include <net/if_types.h>
138 #include <net/if_var.h>
139 #include <net/if_media.h>
140 #include <net/net_api_stats.h>
141 #include <net/pfvar.h>
142
143 #include <netinet/in.h> /* for struct arpcom */
144 #include <netinet/tcp.h> /* for struct tcphdr */
145 #include <netinet/in_systm.h>
146 #include <netinet/in_var.h>
147 #define _IP_VHL
148 #include <netinet/ip.h>
149 #include <netinet/ip_var.h>
150 #include <netinet/ip6.h>
151 #include <netinet6/ip6_var.h>
152 #ifdef DEV_CARP
153 #include <netinet/ip_carp.h>
154 #endif
155 #include <netinet/if_ether.h> /* for struct arpcom */
156 #include <net/bridgestp.h>
157 #include <net/if_bridgevar.h>
158 #include <net/if_llc.h>
159 #if NVLAN > 0
160 #include <net/if_vlan_var.h>
161 #endif /* NVLAN > 0 */
162
163 #include <net/if_ether.h>
164 #include <net/dlil.h>
165 #include <net/kpi_interfacefilter.h>
166
167 #include <net/route.h>
168 #include <dev/random/randomdev.h>
169
170 #include <netinet/bootp.h>
171 #include <netinet/dhcp.h>
172
173 #if SKYWALK
174 #include <skywalk/nexus/netif/nx_netif.h>
175 #endif /* SKYWALK */
176
177 #include <os/log.h>
178
179 /*
180 * if_bridge_debug, BR_DBGF_*
181 * - 'if_bridge_debug' is a bitmask of BR_DBGF_* flags that can be set
182 * to enable additional logs for the corresponding bridge function
183 * - "sysctl net.link.bridge.debug" controls the value of
184 * 'if_bridge_debug'
185 */
186 static uint32_t if_bridge_debug = 0;
187 #define BR_DBGF_LIFECYCLE 0x0001
188 #define BR_DBGF_INPUT 0x0002
189 #define BR_DBGF_OUTPUT 0x0004
190 #define BR_DBGF_RT_TABLE 0x0008
191 #define BR_DBGF_DELAYED_CALL 0x0010
192 #define BR_DBGF_IOCTL 0x0020
193 #define BR_DBGF_MBUF 0x0040
194 #define BR_DBGF_MCAST 0x0080
195 #define BR_DBGF_HOSTFILTER 0x0100
196 #define BR_DBGF_CHECKSUM 0x0200
197 #define BR_DBGF_MAC_NAT 0x0400
198
199 /*
200 * if_bridge_log_level
201 * - 'if_bridge_log_level' ensures that by default important logs are
202 * logged regardless of if_bridge_debug by comparing the log level
203 * in BRIDGE_LOG to if_bridge_log_level
204 * - use "sysctl net.link.bridge.log_level" controls the value of
205 * 'if_bridge_log_level'
206 * - the default value of 'if_bridge_log_level' is LOG_NOTICE; important
207 * logs must use LOG_NOTICE to ensure they appear by default
208 */
209 static int if_bridge_log_level = LOG_NOTICE;
210
211 #define BRIDGE_DBGF_ENABLED(__flag) ((if_bridge_debug & __flag) != 0)
212
213 /*
214 * BRIDGE_LOG, BRIDGE_LOG_SIMPLE
215 * - macros to generate the specified log conditionally based on
216 * the specified log level and debug flags
217 * - BRIDGE_LOG_SIMPLE does not include the function name in the log
218 */
219 #define BRIDGE_LOG(__level, __dbgf, __string, ...) \
220 do { \
221 if (__level <= if_bridge_log_level || \
222 BRIDGE_DBGF_ENABLED(__dbgf)) { \
223 os_log(OS_LOG_DEFAULT, "%s: " __string, \
224 __func__, ## __VA_ARGS__); \
225 } \
226 } while (0)
227 #define BRIDGE_LOG_SIMPLE(__level, __dbgf, __string, ...) \
228 do { \
229 if (__level <= if_bridge_log_level || \
230 BRIDGE_DBGF_ENABLED(__dbgf)) { \
231 os_log(OS_LOG_DEFAULT, __string, ## __VA_ARGS__); \
232 } \
233 } while (0)
234
235 #define _BRIDGE_LOCK(_sc) lck_mtx_lock(&(_sc)->sc_mtx)
236 #define _BRIDGE_UNLOCK(_sc) lck_mtx_unlock(&(_sc)->sc_mtx)
237 #define BRIDGE_LOCK_ASSERT_HELD(_sc) \
238 LCK_MTX_ASSERT(&(_sc)->sc_mtx, LCK_MTX_ASSERT_OWNED)
239 #define BRIDGE_LOCK_ASSERT_NOTHELD(_sc) \
240 LCK_MTX_ASSERT(&(_sc)->sc_mtx, LCK_MTX_ASSERT_NOTOWNED)
241
242 #define BRIDGE_LOCK_DEBUG 1
243 #if BRIDGE_LOCK_DEBUG
244
245 #define BR_LCKDBG_MAX 4
246
247 #define BRIDGE_LOCK(_sc) bridge_lock(_sc)
248 #define BRIDGE_UNLOCK(_sc) bridge_unlock(_sc)
249 #define BRIDGE_LOCK2REF(_sc, _err) _err = bridge_lock2ref(_sc)
250 #define BRIDGE_UNREF(_sc) bridge_unref(_sc)
251 #define BRIDGE_XLOCK(_sc) bridge_xlock(_sc)
252 #define BRIDGE_XDROP(_sc) bridge_xdrop(_sc)
253
254 #else /* !BRIDGE_LOCK_DEBUG */
255
256 #define BRIDGE_LOCK(_sc) _BRIDGE_LOCK(_sc)
257 #define BRIDGE_UNLOCK(_sc) _BRIDGE_UNLOCK(_sc)
258 #define BRIDGE_LOCK2REF(_sc, _err) do { \
259 BRIDGE_LOCK_ASSERT_HELD(_sc); \
260 if ((_sc)->sc_iflist_xcnt > 0) \
261 (_err) = EBUSY; \
262 else { \
263 (_sc)->sc_iflist_ref++; \
264 (_err) = 0; \
265 } \
266 _BRIDGE_UNLOCK(_sc); \
267 } while (0)
268 #define BRIDGE_UNREF(_sc) do { \
269 _BRIDGE_LOCK(_sc); \
270 (_sc)->sc_iflist_ref--; \
271 if (((_sc)->sc_iflist_xcnt > 0) && ((_sc)->sc_iflist_ref == 0)) { \
272 _BRIDGE_UNLOCK(_sc); \
273 wakeup(&(_sc)->sc_cv); \
274 } else \
275 _BRIDGE_UNLOCK(_sc); \
276 } while (0)
277 #define BRIDGE_XLOCK(_sc) do { \
278 BRIDGE_LOCK_ASSERT_HELD(_sc); \
279 (_sc)->sc_iflist_xcnt++; \
280 while ((_sc)->sc_iflist_ref > 0) \
281 msleep(&(_sc)->sc_cv, &(_sc)->sc_mtx, PZERO, \
282 "BRIDGE_XLOCK", NULL); \
283 } while (0)
284 #define BRIDGE_XDROP(_sc) do { \
285 BRIDGE_LOCK_ASSERT_HELD(_sc); \
286 (_sc)->sc_iflist_xcnt--; \
287 } while (0)
288
289 #endif /* BRIDGE_LOCK_DEBUG */
290
291 #if NBPFILTER > 0
292 #define BRIDGE_BPF_MTAP_INPUT(sc, m) \
293 if (sc->sc_bpf_input != NULL) \
294 bridge_bpf_input(sc->sc_ifp, m, __func__, __LINE__)
295 #else /* NBPFILTER */
296 #define BRIDGE_BPF_MTAP_INPUT(ifp, m)
297 #endif /* NBPFILTER */
298
299 /*
300 * Initial size of the route hash table. Must be a power of two.
301 */
302 #ifndef BRIDGE_RTHASH_SIZE
303 #define BRIDGE_RTHASH_SIZE 16
304 #endif
305
306 /*
307 * Maximum size of the routing hash table
308 */
309 #define BRIDGE_RTHASH_SIZE_MAX 2048
310
311 #define BRIDGE_RTHASH_MASK(sc) ((sc)->sc_rthash_size - 1)
312
313 /*
314 * Maximum number of addresses to cache.
315 */
316 #ifndef BRIDGE_RTABLE_MAX
317 #define BRIDGE_RTABLE_MAX 100
318 #endif
319
320
321 /*
322 * Timeout (in seconds) for entries learned dynamically.
323 */
324 #ifndef BRIDGE_RTABLE_TIMEOUT
325 #define BRIDGE_RTABLE_TIMEOUT (20 * 60) /* same as ARP */
326 #endif
327
328 /*
329 * Number of seconds between walks of the route list.
330 */
331 #ifndef BRIDGE_RTABLE_PRUNE_PERIOD
332 #define BRIDGE_RTABLE_PRUNE_PERIOD (5 * 60)
333 #endif
334
335 /*
336 * Number of MAC NAT entries
337 * - sized based on 16 clients (including MAC NAT interface)
338 * each with 4 addresses
339 */
340 #ifndef BRIDGE_MAC_NAT_ENTRY_MAX
341 #define BRIDGE_MAC_NAT_ENTRY_MAX 64
342 #endif /* BRIDGE_MAC_NAT_ENTRY_MAX */
343
344 /*
345 * List of capabilities to possibly mask on the member interface.
346 */
347 #define BRIDGE_IFCAPS_MASK (IFCAP_TSO | IFCAP_TXCSUM)
348 /*
349 * List of capabilities to disable on the member interface.
350 */
351 #define BRIDGE_IFCAPS_STRIP IFCAP_LRO
352
353 /*
354 * Bridge interface list entry.
355 */
356 struct bridge_iflist {
357 TAILQ_ENTRY(bridge_iflist) bif_next;
358 struct ifnet *bif_ifp; /* member if */
359 struct bstp_port bif_stp; /* STP state */
360 uint32_t bif_ifflags; /* member if flags */
361 int bif_savedcaps; /* saved capabilities */
362 uint32_t bif_addrmax; /* max # of addresses */
363 uint32_t bif_addrcnt; /* cur. # of addresses */
364 uint32_t bif_addrexceeded; /* # of address violations */
365
366 interface_filter_t bif_iff_ref;
367 struct bridge_softc *bif_sc;
368 uint32_t bif_flags;
369
370 /* host filter */
371 struct in_addr bif_hf_ipsrc;
372 uint8_t bif_hf_hwsrc[ETHER_ADDR_LEN];
373
374 struct ifbrmstats bif_stats;
375 };
376
377 static inline bool
bif_ifflags_are_set(struct bridge_iflist * bif,uint32_t flags)378 bif_ifflags_are_set(struct bridge_iflist * bif, uint32_t flags)
379 {
380 return (bif->bif_ifflags & flags) == flags;
381 }
382
383 static inline bool
bif_has_checksum_offload(struct bridge_iflist * bif)384 bif_has_checksum_offload(struct bridge_iflist * bif)
385 {
386 return bif_ifflags_are_set(bif, IFBIF_CHECKSUM_OFFLOAD);
387 }
388
389 /* fake errors to make the code clearer */
390 #define _EBADIP EJUSTRETURN
391 #define _EBADIPCHECKSUM EJUSTRETURN
392 #define _EBADIPV6 EJUSTRETURN
393 #define _EBADUDP EJUSTRETURN
394 #define _EBADTCP EJUSTRETURN
395 #define _EBADUDPCHECKSUM EJUSTRETURN
396 #define _EBADTCPCHECKSUM EJUSTRETURN
397
398 #define BIFF_PROMISC 0x01 /* promiscuous mode set */
399 #define BIFF_PROTO_ATTACHED 0x02 /* protocol attached */
400 #define BIFF_FILTER_ATTACHED 0x04 /* interface filter attached */
401 #define BIFF_MEDIA_ACTIVE 0x08 /* interface media active */
402 #define BIFF_HOST_FILTER 0x10 /* host filter enabled */
403 #define BIFF_HF_HWSRC 0x20 /* host filter source MAC is set */
404 #define BIFF_HF_IPSRC 0x40 /* host filter source IP is set */
405 #define BIFF_INPUT_BROADCAST 0x80 /* send broadcast packets in */
406 #define BIFF_IN_MEMBER_LIST 0x100 /* added to the member list */
407 #if SKYWALK
408 #define BIFF_FLOWSWITCH_ATTACHED 0x1000 /* we attached the flowswitch */
409 #define BIFF_NETAGENT_REMOVED 0x2000 /* we removed the netagent */
410 #endif /* SKYWALK */
411
412 /*
413 * mac_nat_entry
414 * - translates between an IP address and MAC address on a specific
415 * bridge interface member
416 */
417 struct mac_nat_entry {
418 LIST_ENTRY(mac_nat_entry) mne_list; /* list linkage */
419 struct bridge_iflist *mne_bif; /* originating interface */
420 unsigned long mne_expire; /* expiration time */
421 union {
422 struct in_addr mneu_ip; /* originating IPv4 address */
423 struct in6_addr mneu_ip6; /* originating IPv6 address */
424 } mne_u;
425 uint8_t mne_mac[ETHER_ADDR_LEN];
426 uint8_t mne_flags;
427 uint8_t mne_reserved;
428 };
429 #define mne_ip mne_u.mneu_ip
430 #define mne_ip6 mne_u.mneu_ip6
431
432 #define MNE_FLAGS_IPV6 0x01 /* IPv6 address */
433
434 LIST_HEAD(mac_nat_entry_list, mac_nat_entry);
435
436 /*
437 * mac_nat_record
438 * - used by bridge_mac_nat_output() to convey the translation that needs
439 * to take place in bridge_mac_nat_translate
440 * - holds enough information so that the translation can be done later without
441 * holding the bridge lock
442 */
443 struct mac_nat_record {
444 uint16_t mnr_ether_type;
445 union {
446 uint16_t mnru_arp_offset;
447 struct {
448 uint16_t mnruip_dhcp_flags;
449 uint16_t mnruip_udp_csum;
450 uint8_t mnruip_header_len;
451 } mnru_ip;
452 struct {
453 uint16_t mnruip6_icmp6_len;
454 uint16_t mnruip6_lladdr_offset;
455 uint8_t mnruip6_icmp6_type;
456 uint8_t mnruip6_header_len;
457 } mnru_ip6;
458 } mnr_u;
459 };
460
461 #define mnr_arp_offset mnr_u.mnru_arp_offset
462
463 #define mnr_ip_header_len mnr_u.mnru_ip.mnruip_header_len
464 #define mnr_ip_dhcp_flags mnr_u.mnru_ip.mnruip_dhcp_flags
465 #define mnr_ip_udp_csum mnr_u.mnru_ip.mnruip_udp_csum
466
467 #define mnr_ip6_icmp6_len mnr_u.mnru_ip6.mnruip6_icmp6_len
468 #define mnr_ip6_icmp6_type mnr_u.mnru_ip6.mnruip6_icmp6_type
469 #define mnr_ip6_header_len mnr_u.mnru_ip6.mnruip6_header_len
470 #define mnr_ip6_lladdr_offset mnr_u.mnru_ip6.mnruip6_lladdr_offset
471
472 /*
473 * Bridge route node.
474 */
475 struct bridge_rtnode {
476 LIST_ENTRY(bridge_rtnode) brt_hash; /* hash table linkage */
477 LIST_ENTRY(bridge_rtnode) brt_list; /* list linkage */
478 struct bridge_iflist *brt_dst; /* destination if */
479 unsigned long brt_expire; /* expiration time */
480 uint8_t brt_flags; /* address flags */
481 uint8_t brt_addr[ETHER_ADDR_LEN];
482 uint16_t brt_vlan; /* vlan id */
483
484 };
485 #define brt_ifp brt_dst->bif_ifp
486
487 /*
488 * Bridge delayed function call context
489 */
490 typedef void (*bridge_delayed_func_t)(struct bridge_softc *);
491
492 struct bridge_delayed_call {
493 struct bridge_softc *bdc_sc;
494 bridge_delayed_func_t bdc_func; /* Function to call */
495 struct timespec bdc_ts; /* Time to call */
496 u_int32_t bdc_flags;
497 thread_call_t bdc_thread_call;
498 };
499
500 #define BDCF_OUTSTANDING 0x01 /* Delayed call has been scheduled */
501 #define BDCF_CANCELLING 0x02 /* May be waiting for call completion */
502
503 /*
504 * Software state for each bridge.
505 */
506 LIST_HEAD(_bridge_rtnode_list, bridge_rtnode);
507
508 struct bridge_softc {
509 struct ifnet *sc_ifp; /* make this an interface */
510 u_int32_t sc_flags;
511 LIST_ENTRY(bridge_softc) sc_list;
512 decl_lck_mtx_data(, sc_mtx);
513 struct _bridge_rtnode_list *sc_rthash; /* our forwarding table */
514 struct _bridge_rtnode_list sc_rtlist; /* list version of above */
515 uint32_t sc_rthash_key; /* key for hash */
516 uint32_t sc_rthash_size; /* size of the hash table */
517 struct bridge_delayed_call sc_aging_timer;
518 struct bridge_delayed_call sc_resize_call;
519 TAILQ_HEAD(, bridge_iflist) sc_spanlist; /* span ports list */
520 struct bstp_state sc_stp; /* STP state */
521 bpf_packet_func sc_bpf_input;
522 bpf_packet_func sc_bpf_output;
523 void *sc_cv;
524 uint32_t sc_brtmax; /* max # of addresses */
525 uint32_t sc_brtcnt; /* cur. # of addresses */
526 uint32_t sc_brttimeout; /* rt timeout in seconds */
527 uint32_t sc_iflist_ref; /* refcount for sc_iflist */
528 uint32_t sc_iflist_xcnt; /* refcount for sc_iflist */
529 TAILQ_HEAD(, bridge_iflist) sc_iflist; /* member interface list */
530 uint32_t sc_brtexceeded; /* # of cache drops */
531 uint32_t sc_filter_flags; /* ipf and flags */
532 struct ifnet *sc_ifaddr; /* member mac copied from */
533 u_char sc_defaddr[6]; /* Default MAC address */
534 char sc_if_xname[IFNAMSIZ];
535
536 struct bridge_iflist *sc_mac_nat_bif; /* single MAC NAT interface */
537 struct mac_nat_entry_list sc_mne_list; /* MAC NAT IPv4 */
538 struct mac_nat_entry_list sc_mne_list_v6;/* MAC NAT IPv6 */
539 uint32_t sc_mne_max; /* max # of entries */
540 uint32_t sc_mne_count; /* cur. # of entries */
541 uint32_t sc_mne_allocation_failures;
542 #if BRIDGE_LOCK_DEBUG
543 /*
544 * Locking and unlocking calling history
545 */
546 void *lock_lr[BR_LCKDBG_MAX];
547 int next_lock_lr;
548 void *unlock_lr[BR_LCKDBG_MAX];
549 int next_unlock_lr;
550 #endif /* BRIDGE_LOCK_DEBUG */
551 };
552
553 #define SCF_DETACHING 0x01
554 #define SCF_RESIZING 0x02
555 #define SCF_MEDIA_ACTIVE 0x04
556
557 typedef enum {
558 CHECKSUM_OPERATION_NONE = 0,
559 CHECKSUM_OPERATION_CLEAR_OFFLOAD = 1,
560 CHECKSUM_OPERATION_FINALIZE = 2,
561 CHECKSUM_OPERATION_COMPUTE = 3,
562 } ChecksumOperation;
563
564 union iphdr {
565 struct ip *ip;
566 struct ip6_hdr *ip6;
567 void * ptr;
568 };
569
570 typedef struct {
571 u_int ip_hlen; /* IP header length */
572 u_int ip_pay_len; /* length of payload (exclusive of ip_hlen) */
573 u_int ip_opt_len; /* IPv6 options headers length */
574 uint8_t ip_proto; /* IPPROTO_TCP, IPPROTO_UDP, etc. */
575 bool ip_is_ipv4;
576 bool ip_is_fragmented;
577 union iphdr ip_hdr; /* pointer to IP header */
578 void * ip_proto_hdr; /* ptr to protocol header (TCP) */
579 } ip_packet_info, *ip_packet_info_t;
580
581 struct bridge_hostfilter_stats bridge_hostfilter_stats;
582
583 static LCK_GRP_DECLARE(bridge_lock_grp, "if_bridge");
584 #if BRIDGE_LOCK_DEBUG
585 static LCK_ATTR_DECLARE(bridge_lock_attr, 0, 0);
586 #else
587 static LCK_ATTR_DECLARE(bridge_lock_attr, LCK_ATTR_DEBUG, 0);
588 #endif
589 static LCK_MTX_DECLARE_ATTR(bridge_list_mtx, &bridge_lock_grp, &bridge_lock_attr);
590
591 static int bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
592
593 static ZONE_DEFINE(bridge_rtnode_pool, "bridge_rtnode",
594 sizeof(struct bridge_rtnode), ZC_NONE);
595 static ZONE_DEFINE(bridge_mne_pool, "bridge_mac_nat_entry",
596 sizeof(struct mac_nat_entry), ZC_NONE);
597
598 static int bridge_clone_create(struct if_clone *, uint32_t, void *);
599 static int bridge_clone_destroy(struct ifnet *);
600
601 static errno_t bridge_ioctl(struct ifnet *, u_long, void *);
602 #if HAS_IF_CAP
603 static void bridge_mutecaps(struct bridge_softc *);
604 static void bridge_set_ifcap(struct bridge_softc *, struct bridge_iflist *,
605 int);
606 #endif
607 static errno_t bridge_set_tso(struct bridge_softc *);
608 static void bridge_proto_attach_changed(struct ifnet *);
609 static int bridge_init(struct ifnet *);
610 #if HAS_BRIDGE_DUMMYNET
611 static void bridge_dummynet(struct mbuf *, struct ifnet *);
612 #endif
613 static void bridge_ifstop(struct ifnet *, int);
614 static int bridge_output(struct ifnet *, struct mbuf *);
615 static void bridge_finalize_cksum(struct ifnet *, struct mbuf *);
616 static void bridge_start(struct ifnet *);
617 static errno_t bridge_input(struct ifnet *, mbuf_t *);
618 static errno_t bridge_iff_input(void *, ifnet_t, protocol_family_t,
619 mbuf_t *, char **);
620 static errno_t bridge_iff_output(void *, ifnet_t, protocol_family_t,
621 mbuf_t *);
622 static errno_t bridge_member_output(struct bridge_softc *sc, ifnet_t ifp,
623 mbuf_t *m);
624
625 static int bridge_enqueue(ifnet_t, struct ifnet *,
626 struct ifnet *, struct mbuf *, ChecksumOperation);
627 static void bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int);
628
629 static void bridge_forward(struct bridge_softc *, struct bridge_iflist *,
630 struct mbuf *);
631
632 static void bridge_aging_timer(struct bridge_softc *sc);
633
634 static void bridge_broadcast(struct bridge_softc *, struct bridge_iflist *,
635 struct mbuf *, int);
636 static void bridge_span(struct bridge_softc *, struct mbuf *);
637
638 static int bridge_rtupdate(struct bridge_softc *, const uint8_t *,
639 uint16_t, struct bridge_iflist *, int, uint8_t);
640 static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *,
641 uint16_t);
642 static void bridge_rttrim(struct bridge_softc *);
643 static void bridge_rtage(struct bridge_softc *);
644 static void bridge_rtflush(struct bridge_softc *, int);
645 static int bridge_rtdaddr(struct bridge_softc *, const uint8_t *,
646 uint16_t);
647
648 static int bridge_rtable_init(struct bridge_softc *);
649 static void bridge_rtable_fini(struct bridge_softc *);
650
651 static void bridge_rthash_resize(struct bridge_softc *);
652
653 static int bridge_rtnode_addr_cmp(const uint8_t *, const uint8_t *);
654 static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
655 const uint8_t *, uint16_t);
656 static int bridge_rtnode_hash(struct bridge_softc *,
657 struct bridge_rtnode *);
658 static int bridge_rtnode_insert(struct bridge_softc *,
659 struct bridge_rtnode *);
660 static void bridge_rtnode_destroy(struct bridge_softc *,
661 struct bridge_rtnode *);
662 #if BRIDGESTP
663 static void bridge_rtable_expire(struct ifnet *, int);
664 static void bridge_state_change(struct ifnet *, int);
665 #endif /* BRIDGESTP */
666
667 static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
668 const char *name);
669 static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
670 struct ifnet *ifp);
671 static void bridge_delete_member(struct bridge_softc *,
672 struct bridge_iflist *);
673 static void bridge_delete_span(struct bridge_softc *,
674 struct bridge_iflist *);
675
676 static int bridge_ioctl_add(struct bridge_softc *, void *);
677 static int bridge_ioctl_del(struct bridge_softc *, void *);
678 static int bridge_ioctl_gifflags(struct bridge_softc *, void *);
679 static int bridge_ioctl_sifflags(struct bridge_softc *, void *);
680 static int bridge_ioctl_scache(struct bridge_softc *, void *);
681 static int bridge_ioctl_gcache(struct bridge_softc *, void *);
682 static int bridge_ioctl_gifs32(struct bridge_softc *, void *);
683 static int bridge_ioctl_gifs64(struct bridge_softc *, void *);
684 static int bridge_ioctl_rts32(struct bridge_softc *, void *);
685 static int bridge_ioctl_rts64(struct bridge_softc *, void *);
686 static int bridge_ioctl_saddr32(struct bridge_softc *, void *);
687 static int bridge_ioctl_saddr64(struct bridge_softc *, void *);
688 static int bridge_ioctl_sto(struct bridge_softc *, void *);
689 static int bridge_ioctl_gto(struct bridge_softc *, void *);
690 static int bridge_ioctl_daddr32(struct bridge_softc *, void *);
691 static int bridge_ioctl_daddr64(struct bridge_softc *, void *);
692 static int bridge_ioctl_flush(struct bridge_softc *, void *);
693 static int bridge_ioctl_gpri(struct bridge_softc *, void *);
694 static int bridge_ioctl_spri(struct bridge_softc *, void *);
695 static int bridge_ioctl_ght(struct bridge_softc *, void *);
696 static int bridge_ioctl_sht(struct bridge_softc *, void *);
697 static int bridge_ioctl_gfd(struct bridge_softc *, void *);
698 static int bridge_ioctl_sfd(struct bridge_softc *, void *);
699 static int bridge_ioctl_gma(struct bridge_softc *, void *);
700 static int bridge_ioctl_sma(struct bridge_softc *, void *);
701 static int bridge_ioctl_sifprio(struct bridge_softc *, void *);
702 static int bridge_ioctl_sifcost(struct bridge_softc *, void *);
703 static int bridge_ioctl_sifmaxaddr(struct bridge_softc *, void *);
704 static int bridge_ioctl_addspan(struct bridge_softc *, void *);
705 static int bridge_ioctl_delspan(struct bridge_softc *, void *);
706 static int bridge_ioctl_gbparam32(struct bridge_softc *, void *);
707 static int bridge_ioctl_gbparam64(struct bridge_softc *, void *);
708 static int bridge_ioctl_grte(struct bridge_softc *, void *);
709 static int bridge_ioctl_gifsstp32(struct bridge_softc *, void *);
710 static int bridge_ioctl_gifsstp64(struct bridge_softc *, void *);
711 static int bridge_ioctl_sproto(struct bridge_softc *, void *);
712 static int bridge_ioctl_stxhc(struct bridge_softc *, void *);
713 static int bridge_ioctl_purge(struct bridge_softc *sc, void *);
714 static int bridge_ioctl_gfilt(struct bridge_softc *, void *);
715 static int bridge_ioctl_sfilt(struct bridge_softc *, void *);
716 static int bridge_ioctl_ghostfilter(struct bridge_softc *, void *);
717 static int bridge_ioctl_shostfilter(struct bridge_softc *, void *);
718 static int bridge_ioctl_gmnelist32(struct bridge_softc *, void *);
719 static int bridge_ioctl_gmnelist64(struct bridge_softc *, void *);
720 static int bridge_ioctl_gifstats32(struct bridge_softc *, void *);
721 static int bridge_ioctl_gifstats64(struct bridge_softc *, void *);
722
723 static int bridge_pf(struct mbuf **, struct ifnet *, uint32_t sc_filter_flags, int input);
724 static int bridge_ip_checkbasic(struct mbuf **);
725 static int bridge_ip6_checkbasic(struct mbuf **);
726
727 static errno_t bridge_set_bpf_tap(ifnet_t, bpf_tap_mode, bpf_packet_func);
728 static errno_t bridge_bpf_input(ifnet_t, struct mbuf *, const char *, int);
729 static errno_t bridge_bpf_output(ifnet_t, struct mbuf *);
730
731 static void bridge_detach(ifnet_t);
732 static void bridge_link_event(struct ifnet *, u_int32_t);
733 static void bridge_iflinkevent(struct ifnet *);
734 static u_int32_t bridge_updatelinkstatus(struct bridge_softc *);
735 static int interface_media_active(struct ifnet *);
736 static void bridge_schedule_delayed_call(struct bridge_delayed_call *);
737 static void bridge_cancel_delayed_call(struct bridge_delayed_call *);
738 static void bridge_cleanup_delayed_call(struct bridge_delayed_call *);
739 static int bridge_host_filter(struct bridge_iflist *, mbuf_t *);
740
741 static errno_t bridge_mac_nat_enable(struct bridge_softc *,
742 struct bridge_iflist *);
743 static void bridge_mac_nat_disable(struct bridge_softc *sc);
744 static void bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long);
745 static void bridge_mac_nat_populate_entries(struct bridge_softc *sc);
746 static void bridge_mac_nat_flush_entries(struct bridge_softc *sc,
747 struct bridge_iflist *);
748 static ifnet_t bridge_mac_nat_input(struct bridge_softc *, mbuf_t *,
749 boolean_t *);
750 static boolean_t bridge_mac_nat_output(struct bridge_softc *,
751 struct bridge_iflist *, mbuf_t *, struct mac_nat_record *);
752 static void bridge_mac_nat_translate(mbuf_t *, struct mac_nat_record *,
753 const caddr_t);
754 static bool is_broadcast_ip_packet(mbuf_t *);
755 static bool in_addr_is_ours(const struct in_addr);
756 static bool in6_addr_is_ours(const struct in6_addr *, uint32_t);
757
758 #define m_copypacket(m, how) m_copym(m, 0, M_COPYALL, how)
759
760 static int
761 gso_tcp(struct ifnet *ifp, struct mbuf **mp, u_int mac_hlen, bool is_ipv4,
762 boolean_t is_tx);
763
764 /* The default bridge vlan is 1 (IEEE 802.1Q-2003 Table 9-2) */
765 #define VLANTAGOF(_m) 0
766
767 u_int8_t bstp_etheraddr[ETHER_ADDR_LEN] =
768 { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
769
770 static u_int8_t ethernulladdr[ETHER_ADDR_LEN] =
771 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
772
773 #if BRIDGESTP
774 static struct bstp_cb_ops bridge_ops = {
775 .bcb_state = bridge_state_change,
776 .bcb_rtage = bridge_rtable_expire
777 };
778 #endif /* BRIDGESTP */
779
780 SYSCTL_DECL(_net_link);
781 SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
782 "Bridge");
783
784 static int bridge_inherit_mac = 0; /* share MAC with first bridge member */
785 SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac,
786 CTLFLAG_RW | CTLFLAG_LOCKED,
787 &bridge_inherit_mac, 0,
788 "Inherit MAC address from the first bridge member");
789
790 SYSCTL_INT(_net_link_bridge, OID_AUTO, rtable_prune_period,
791 CTLFLAG_RW | CTLFLAG_LOCKED,
792 &bridge_rtable_prune_period, 0,
793 "Interval between pruning of routing table");
794
795 static unsigned int bridge_rtable_hash_size_max = BRIDGE_RTHASH_SIZE_MAX;
796 SYSCTL_UINT(_net_link_bridge, OID_AUTO, rtable_hash_size_max,
797 CTLFLAG_RW | CTLFLAG_LOCKED,
798 &bridge_rtable_hash_size_max, 0,
799 "Maximum size of the routing hash table");
800
801 #if BRIDGE_DELAYED_CALLBACK_DEBUG
802 static int bridge_delayed_callback_delay = 0;
803 SYSCTL_INT(_net_link_bridge, OID_AUTO, delayed_callback_delay,
804 CTLFLAG_RW | CTLFLAG_LOCKED,
805 &bridge_delayed_callback_delay, 0,
806 "Delay before calling delayed function");
807 #endif
808
809 SYSCTL_STRUCT(_net_link_bridge, OID_AUTO,
810 hostfilterstats, CTLFLAG_RD | CTLFLAG_LOCKED,
811 &bridge_hostfilter_stats, bridge_hostfilter_stats, "");
812
813 #if BRIDGESTP
814 static int log_stp = 0; /* log STP state changes */
815 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp, CTLFLAG_RW,
816 &log_stp, 0, "Log STP state changes");
817 #endif /* BRIDGESTP */
818
819 struct bridge_control {
820 int (*bc_func)(struct bridge_softc *, void *);
821 unsigned int bc_argsize;
822 unsigned int bc_flags;
823 };
824
825 #define VMNET_TAG "com.apple.vmnet"
826 #define VMNET_LOCAL_TAG VMNET_TAG ".local"
827 #define VMNET_BROADCAST_TAG VMNET_TAG ".broadcast"
828 #define VMNET_MULTICAST_TAG VMNET_TAG ".multicast"
829
830 static u_int16_t vmnet_tag;
831 static u_int16_t vmnet_local_tag;
832 static u_int16_t vmnet_broadcast_tag;
833 static u_int16_t vmnet_multicast_tag;
834
835 static u_int16_t
allocate_pf_tag(char * name)836 allocate_pf_tag(char * name)
837 {
838 u_int16_t tag;
839
840 tag = pf_tagname2tag_ext(name);
841 BRIDGE_LOG(LOG_NOTICE, 0, "%s %d", name, tag);
842 return tag;
843 }
844
845 static void
allocate_vmnet_pf_tags(void)846 allocate_vmnet_pf_tags(void)
847 {
848 /* allocate tags to use with PF */
849 if (vmnet_tag == 0) {
850 vmnet_tag = allocate_pf_tag(VMNET_TAG);
851 }
852 if (vmnet_local_tag == 0) {
853 vmnet_local_tag = allocate_pf_tag(VMNET_LOCAL_TAG);
854 }
855 if (vmnet_broadcast_tag == 0) {
856 vmnet_broadcast_tag = allocate_pf_tag(VMNET_BROADCAST_TAG);
857 }
858 if (vmnet_multicast_tag == 0) {
859 vmnet_multicast_tag = allocate_pf_tag(VMNET_MULTICAST_TAG);
860 }
861 }
862
863 #define BC_F_COPYIN 0x01 /* copy arguments in */
864 #define BC_F_COPYOUT 0x02 /* copy arguments out */
865 #define BC_F_SUSER 0x04 /* do super-user check */
866
867 static const struct bridge_control bridge_control_table32[] = {
868 { .bc_func = bridge_ioctl_add, .bc_argsize = sizeof(struct ifbreq), /* 0 */
869 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
870 { .bc_func = bridge_ioctl_del, .bc_argsize = sizeof(struct ifbreq),
871 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
872
873 { .bc_func = bridge_ioctl_gifflags, .bc_argsize = sizeof(struct ifbreq),
874 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
875 { .bc_func = bridge_ioctl_sifflags, .bc_argsize = sizeof(struct ifbreq),
876 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
877
878 { .bc_func = bridge_ioctl_scache, .bc_argsize = sizeof(struct ifbrparam),
879 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
880 { .bc_func = bridge_ioctl_gcache, .bc_argsize = sizeof(struct ifbrparam),
881 .bc_flags = BC_F_COPYOUT },
882
883 { .bc_func = bridge_ioctl_gifs32, .bc_argsize = sizeof(struct ifbifconf32),
884 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
885 { .bc_func = bridge_ioctl_rts32, .bc_argsize = sizeof(struct ifbaconf32),
886 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
887
888 { .bc_func = bridge_ioctl_saddr32, .bc_argsize = sizeof(struct ifbareq32),
889 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
890
891 { .bc_func = bridge_ioctl_sto, .bc_argsize = sizeof(struct ifbrparam),
892 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
893 { .bc_func = bridge_ioctl_gto, .bc_argsize = sizeof(struct ifbrparam), /* 10 */
894 .bc_flags = BC_F_COPYOUT },
895
896 { .bc_func = bridge_ioctl_daddr32, .bc_argsize = sizeof(struct ifbareq32),
897 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
898
899 { .bc_func = bridge_ioctl_flush, .bc_argsize = sizeof(struct ifbreq),
900 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
901
902 { .bc_func = bridge_ioctl_gpri, .bc_argsize = sizeof(struct ifbrparam),
903 .bc_flags = BC_F_COPYOUT },
904 { .bc_func = bridge_ioctl_spri, .bc_argsize = sizeof(struct ifbrparam),
905 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
906
907 { .bc_func = bridge_ioctl_ght, .bc_argsize = sizeof(struct ifbrparam),
908 .bc_flags = BC_F_COPYOUT },
909 { .bc_func = bridge_ioctl_sht, .bc_argsize = sizeof(struct ifbrparam),
910 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
911
912 { .bc_func = bridge_ioctl_gfd, .bc_argsize = sizeof(struct ifbrparam),
913 .bc_flags = BC_F_COPYOUT },
914 { .bc_func = bridge_ioctl_sfd, .bc_argsize = sizeof(struct ifbrparam),
915 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
916
917 { .bc_func = bridge_ioctl_gma, .bc_argsize = sizeof(struct ifbrparam),
918 .bc_flags = BC_F_COPYOUT },
919 { .bc_func = bridge_ioctl_sma, .bc_argsize = sizeof(struct ifbrparam), /* 20 */
920 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
921
922 { .bc_func = bridge_ioctl_sifprio, .bc_argsize = sizeof(struct ifbreq),
923 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
924
925 { .bc_func = bridge_ioctl_sifcost, .bc_argsize = sizeof(struct ifbreq),
926 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
927
928 { .bc_func = bridge_ioctl_gfilt, .bc_argsize = sizeof(struct ifbrparam),
929 .bc_flags = BC_F_COPYOUT },
930 { .bc_func = bridge_ioctl_sfilt, .bc_argsize = sizeof(struct ifbrparam),
931 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
932
933 { .bc_func = bridge_ioctl_purge, .bc_argsize = sizeof(struct ifbreq),
934 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
935
936 { .bc_func = bridge_ioctl_addspan, .bc_argsize = sizeof(struct ifbreq),
937 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
938 { .bc_func = bridge_ioctl_delspan, .bc_argsize = sizeof(struct ifbreq),
939 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
940
941 { .bc_func = bridge_ioctl_gbparam32, .bc_argsize = sizeof(struct ifbropreq32),
942 .bc_flags = BC_F_COPYOUT },
943
944 { .bc_func = bridge_ioctl_grte, .bc_argsize = sizeof(struct ifbrparam),
945 .bc_flags = BC_F_COPYOUT },
946
947 { .bc_func = bridge_ioctl_gifsstp32, .bc_argsize = sizeof(struct ifbpstpconf32), /* 30 */
948 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
949
950 { .bc_func = bridge_ioctl_sproto, .bc_argsize = sizeof(struct ifbrparam),
951 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
952
953 { .bc_func = bridge_ioctl_stxhc, .bc_argsize = sizeof(struct ifbrparam),
954 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
955
956 { .bc_func = bridge_ioctl_sifmaxaddr, .bc_argsize = sizeof(struct ifbreq),
957 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
958
959 { .bc_func = bridge_ioctl_ghostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
960 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
961 { .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
962 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
963
964 { .bc_func = bridge_ioctl_gmnelist32,
965 .bc_argsize = sizeof(struct ifbrmnelist32),
966 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
967 { .bc_func = bridge_ioctl_gifstats32,
968 .bc_argsize = sizeof(struct ifbrmreq32),
969 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
970 };
971
972 static const struct bridge_control bridge_control_table64[] = {
973 { .bc_func = bridge_ioctl_add, .bc_argsize = sizeof(struct ifbreq), /* 0 */
974 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
975 { .bc_func = bridge_ioctl_del, .bc_argsize = sizeof(struct ifbreq),
976 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
977
978 { .bc_func = bridge_ioctl_gifflags, .bc_argsize = sizeof(struct ifbreq),
979 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
980 { .bc_func = bridge_ioctl_sifflags, .bc_argsize = sizeof(struct ifbreq),
981 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
982
983 { .bc_func = bridge_ioctl_scache, .bc_argsize = sizeof(struct ifbrparam),
984 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
985 { .bc_func = bridge_ioctl_gcache, .bc_argsize = sizeof(struct ifbrparam),
986 .bc_flags = BC_F_COPYOUT },
987
988 { .bc_func = bridge_ioctl_gifs64, .bc_argsize = sizeof(struct ifbifconf64),
989 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
990 { .bc_func = bridge_ioctl_rts64, .bc_argsize = sizeof(struct ifbaconf64),
991 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
992
993 { .bc_func = bridge_ioctl_saddr64, .bc_argsize = sizeof(struct ifbareq64),
994 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
995
996 { .bc_func = bridge_ioctl_sto, .bc_argsize = sizeof(struct ifbrparam),
997 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
998 { .bc_func = bridge_ioctl_gto, .bc_argsize = sizeof(struct ifbrparam), /* 10 */
999 .bc_flags = BC_F_COPYOUT },
1000
1001 { .bc_func = bridge_ioctl_daddr64, .bc_argsize = sizeof(struct ifbareq64),
1002 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1003
1004 { .bc_func = bridge_ioctl_flush, .bc_argsize = sizeof(struct ifbreq),
1005 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1006
1007 { .bc_func = bridge_ioctl_gpri, .bc_argsize = sizeof(struct ifbrparam),
1008 .bc_flags = BC_F_COPYOUT },
1009 { .bc_func = bridge_ioctl_spri, .bc_argsize = sizeof(struct ifbrparam),
1010 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1011
1012 { .bc_func = bridge_ioctl_ght, .bc_argsize = sizeof(struct ifbrparam),
1013 .bc_flags = BC_F_COPYOUT },
1014 { .bc_func = bridge_ioctl_sht, .bc_argsize = sizeof(struct ifbrparam),
1015 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1016
1017 { .bc_func = bridge_ioctl_gfd, .bc_argsize = sizeof(struct ifbrparam),
1018 .bc_flags = BC_F_COPYOUT },
1019 { .bc_func = bridge_ioctl_sfd, .bc_argsize = sizeof(struct ifbrparam),
1020 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1021
1022 { .bc_func = bridge_ioctl_gma, .bc_argsize = sizeof(struct ifbrparam),
1023 .bc_flags = BC_F_COPYOUT },
1024 { .bc_func = bridge_ioctl_sma, .bc_argsize = sizeof(struct ifbrparam), /* 20 */
1025 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1026
1027 { .bc_func = bridge_ioctl_sifprio, .bc_argsize = sizeof(struct ifbreq),
1028 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1029
1030 { .bc_func = bridge_ioctl_sifcost, .bc_argsize = sizeof(struct ifbreq),
1031 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1032
1033 { .bc_func = bridge_ioctl_gfilt, .bc_argsize = sizeof(struct ifbrparam),
1034 .bc_flags = BC_F_COPYOUT },
1035 { .bc_func = bridge_ioctl_sfilt, .bc_argsize = sizeof(struct ifbrparam),
1036 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1037
1038 { .bc_func = bridge_ioctl_purge, .bc_argsize = sizeof(struct ifbreq),
1039 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1040
1041 { .bc_func = bridge_ioctl_addspan, .bc_argsize = sizeof(struct ifbreq),
1042 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1043 { .bc_func = bridge_ioctl_delspan, .bc_argsize = sizeof(struct ifbreq),
1044 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1045
1046 { .bc_func = bridge_ioctl_gbparam64, .bc_argsize = sizeof(struct ifbropreq64),
1047 .bc_flags = BC_F_COPYOUT },
1048
1049 { .bc_func = bridge_ioctl_grte, .bc_argsize = sizeof(struct ifbrparam),
1050 .bc_flags = BC_F_COPYOUT },
1051
1052 { .bc_func = bridge_ioctl_gifsstp64, .bc_argsize = sizeof(struct ifbpstpconf64), /* 30 */
1053 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1054
1055 { .bc_func = bridge_ioctl_sproto, .bc_argsize = sizeof(struct ifbrparam),
1056 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1057
1058 { .bc_func = bridge_ioctl_stxhc, .bc_argsize = sizeof(struct ifbrparam),
1059 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1060
1061 { .bc_func = bridge_ioctl_sifmaxaddr, .bc_argsize = sizeof(struct ifbreq),
1062 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1063
1064 { .bc_func = bridge_ioctl_ghostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1065 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1066 { .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1067 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1068
1069 { .bc_func = bridge_ioctl_gmnelist64,
1070 .bc_argsize = sizeof(struct ifbrmnelist64),
1071 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1072 { .bc_func = bridge_ioctl_gifstats64,
1073 .bc_argsize = sizeof(struct ifbrmreq64),
1074 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1075 };
1076
1077 static const unsigned int bridge_control_table_size =
1078 sizeof(bridge_control_table32) / sizeof(bridge_control_table32[0]);
1079
1080 static LIST_HEAD(, bridge_softc) bridge_list =
1081 LIST_HEAD_INITIALIZER(bridge_list);
1082
1083 #define BRIDGENAME "bridge"
1084 #define BRIDGES_MAX IF_MAXUNIT
1085 #define BRIDGE_ZONE_MAX_ELEM MIN(IFNETS_MAX, BRIDGES_MAX)
1086
1087 static struct if_clone bridge_cloner =
1088 IF_CLONE_INITIALIZER(BRIDGENAME, bridge_clone_create, bridge_clone_destroy,
1089 0, BRIDGES_MAX, BRIDGE_ZONE_MAX_ELEM, sizeof(struct bridge_softc));
1090
1091 static int if_bridge_txstart = 0;
1092 SYSCTL_INT(_net_link_bridge, OID_AUTO, txstart, CTLFLAG_RW | CTLFLAG_LOCKED,
1093 &if_bridge_txstart, 0, "Bridge interface uses TXSTART model");
1094
1095 SYSCTL_INT(_net_link_bridge, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
1096 &if_bridge_debug, 0, "Bridge debug flags");
1097
1098 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_level,
1099 CTLFLAG_RW | CTLFLAG_LOCKED,
1100 &if_bridge_log_level, 0, "Bridge log level");
1101
1102 static int if_bridge_segmentation = 1;
1103 SYSCTL_INT(_net_link_bridge, OID_AUTO, segmentation,
1104 CTLFLAG_RW | CTLFLAG_LOCKED,
1105 &if_bridge_segmentation, 0, "Bridge interface enable segmentation");
1106
1107 static int if_bridge_vmnet_pf_tagging = 1;
1108 SYSCTL_INT(_net_link_bridge, OID_AUTO, vmnet_pf_tagging,
1109 CTLFLAG_RW | CTLFLAG_LOCKED,
1110 &if_bridge_segmentation, 0, "Bridge interface enable vmnet PF tagging");
1111
1112 #define BRIDGE_TSO_REDUCE_MSS_FORWARDING_MAX 256
1113 #define BRIDGE_TSO_REDUCE_MSS_FORWARDING_DEFAULT 110
1114 #define BRIDGE_TSO_REDUCE_MSS_TX_MAX 256
1115 #define BRIDGE_TSO_REDUCE_MSS_TX_DEFAULT 0
1116
1117 static u_int if_bridge_tso_reduce_mss_forwarding
1118 = BRIDGE_TSO_REDUCE_MSS_FORWARDING_DEFAULT;
1119 static u_int if_bridge_tso_reduce_mss_tx
1120 = BRIDGE_TSO_REDUCE_MSS_TX_DEFAULT;
1121
1122 static int
bridge_tso_reduce_mss(struct sysctl_req * req,u_int * val,u_int val_max)1123 bridge_tso_reduce_mss(struct sysctl_req *req, u_int * val, u_int val_max)
1124 {
1125 int changed;
1126 int error;
1127 u_int new_value;
1128
1129 error = sysctl_io_number(req, *val, sizeof(*val), &new_value,
1130 &changed);
1131 if (error == 0 && changed != 0) {
1132 if (new_value > val_max) {
1133 return EINVAL;
1134 }
1135 *val = new_value;
1136 }
1137 return error;
1138 }
1139
1140 static int
1141 bridge_tso_reduce_mss_forwarding_sysctl SYSCTL_HANDLER_ARGS
1142 {
1143 return bridge_tso_reduce_mss(req, &if_bridge_tso_reduce_mss_forwarding,
1144 BRIDGE_TSO_REDUCE_MSS_FORWARDING_MAX);
1145 }
1146
1147 SYSCTL_PROC(_net_link_bridge, OID_AUTO, tso_reduce_mss_forwarding,
1148 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1149 0, 0, bridge_tso_reduce_mss_forwarding_sysctl, "IU",
1150 "Bridge tso reduce mss when forwarding");
1151
1152 static int
1153 bridge_tso_reduce_mss_tx_sysctl SYSCTL_HANDLER_ARGS
1154 {
1155 return bridge_tso_reduce_mss(req, &if_bridge_tso_reduce_mss_tx,
1156 BRIDGE_TSO_REDUCE_MSS_TX_MAX);
1157 }
1158
1159 SYSCTL_PROC(_net_link_bridge, OID_AUTO, tso_reduce_mss_tx,
1160 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1161 0, 0, bridge_tso_reduce_mss_tx_sysctl, "IU",
1162 "Bridge tso reduce mss on transmit");
1163
1164
1165 #if DEBUG || DEVELOPMENT
1166 #define BRIDGE_FORCE_ONE 0x00000001
1167 #define BRIDGE_FORCE_TWO 0x00000002
1168 static u_int32_t if_bridge_force_errors = 0;
1169 SYSCTL_INT(_net_link_bridge, OID_AUTO, force_errors,
1170 CTLFLAG_RW | CTLFLAG_LOCKED,
1171 &if_bridge_force_errors, 0, "Bridge interface force errors");
1172 static inline bool
bridge_error_is_forced(u_int32_t flags)1173 bridge_error_is_forced(u_int32_t flags)
1174 {
1175 return (if_bridge_force_errors & flags) != 0;
1176 }
1177
1178 #define BRIDGE_ERROR_GET_FORCED(__is_forced, __flags) \
1179 do { \
1180 __is_forced = bridge_error_is_forced(__flags); \
1181 if (__is_forced) { \
1182 BRIDGE_LOG(LOG_NOTICE, 0, "0x%x forced", __flags); \
1183 } \
1184 } while (0)
1185 #endif /* DEBUG || DEVELOPMENT */
1186
1187
1188 static void brlog_ether_header(struct ether_header *);
1189 static void brlog_mbuf_data(mbuf_t, size_t, size_t);
1190 static void brlog_mbuf_pkthdr(mbuf_t, const char *, const char *);
1191 static void brlog_mbuf(mbuf_t, const char *, const char *);
1192 static void brlog_link(struct bridge_softc * sc);
1193
1194 #if BRIDGE_LOCK_DEBUG
1195 static void bridge_lock(struct bridge_softc *);
1196 static void bridge_unlock(struct bridge_softc *);
1197 static int bridge_lock2ref(struct bridge_softc *);
1198 static void bridge_unref(struct bridge_softc *);
1199 static void bridge_xlock(struct bridge_softc *);
1200 static void bridge_xdrop(struct bridge_softc *);
1201
1202 static void
bridge_lock(struct bridge_softc * sc)1203 bridge_lock(struct bridge_softc *sc)
1204 {
1205 void *lr_saved = __builtin_return_address(0);
1206
1207 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1208
1209 _BRIDGE_LOCK(sc);
1210
1211 sc->lock_lr[sc->next_lock_lr] = lr_saved;
1212 sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1213 }
1214
1215 static void
bridge_unlock(struct bridge_softc * sc)1216 bridge_unlock(struct bridge_softc *sc)
1217 {
1218 void *lr_saved = __builtin_return_address(0);
1219
1220 BRIDGE_LOCK_ASSERT_HELD(sc);
1221
1222 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1223 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1224
1225 _BRIDGE_UNLOCK(sc);
1226 }
1227
1228 static int
bridge_lock2ref(struct bridge_softc * sc)1229 bridge_lock2ref(struct bridge_softc *sc)
1230 {
1231 int error = 0;
1232 void *lr_saved = __builtin_return_address(0);
1233
1234 BRIDGE_LOCK_ASSERT_HELD(sc);
1235
1236 if (sc->sc_iflist_xcnt > 0) {
1237 error = EBUSY;
1238 } else {
1239 sc->sc_iflist_ref++;
1240 }
1241
1242 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1243 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1244
1245 _BRIDGE_UNLOCK(sc);
1246
1247 return error;
1248 }
1249
1250 static void
bridge_unref(struct bridge_softc * sc)1251 bridge_unref(struct bridge_softc *sc)
1252 {
1253 void *lr_saved = __builtin_return_address(0);
1254
1255 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1256
1257 _BRIDGE_LOCK(sc);
1258 sc->lock_lr[sc->next_lock_lr] = lr_saved;
1259 sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1260
1261 sc->sc_iflist_ref--;
1262
1263 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1264 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1265 if ((sc->sc_iflist_xcnt > 0) && (sc->sc_iflist_ref == 0)) {
1266 _BRIDGE_UNLOCK(sc);
1267 wakeup(&sc->sc_cv);
1268 } else {
1269 _BRIDGE_UNLOCK(sc);
1270 }
1271 }
1272
1273 static void
bridge_xlock(struct bridge_softc * sc)1274 bridge_xlock(struct bridge_softc *sc)
1275 {
1276 void *lr_saved = __builtin_return_address(0);
1277
1278 BRIDGE_LOCK_ASSERT_HELD(sc);
1279
1280 sc->sc_iflist_xcnt++;
1281 while (sc->sc_iflist_ref > 0) {
1282 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1283 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1284
1285 msleep(&sc->sc_cv, &sc->sc_mtx, PZERO, "BRIDGE_XLOCK", NULL);
1286
1287 sc->lock_lr[sc->next_lock_lr] = lr_saved;
1288 sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1289 }
1290 }
1291
1292 static void
bridge_xdrop(struct bridge_softc * sc)1293 bridge_xdrop(struct bridge_softc *sc)
1294 {
1295 BRIDGE_LOCK_ASSERT_HELD(sc);
1296
1297 sc->sc_iflist_xcnt--;
1298 }
1299
1300 #endif /* BRIDGE_LOCK_DEBUG */
1301
1302 static void
brlog_mbuf_pkthdr(mbuf_t m,const char * prefix,const char * suffix)1303 brlog_mbuf_pkthdr(mbuf_t m, const char *prefix, const char *suffix)
1304 {
1305 if (m) {
1306 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1307 "%spktlen: %u rcvif: 0x%llx header: 0x%llx nextpkt: 0x%llx%s",
1308 prefix ? prefix : "", (unsigned int)mbuf_pkthdr_len(m),
1309 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_rcvif(m)),
1310 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_header(m)),
1311 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_nextpkt(m)),
1312 suffix ? suffix : "");
1313 } else {
1314 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0, "%s<NULL>%s", prefix, suffix);
1315 }
1316 }
1317
1318 static void
brlog_mbuf(mbuf_t m,const char * prefix,const char * suffix)1319 brlog_mbuf(mbuf_t m, const char *prefix, const char *suffix)
1320 {
1321 if (m) {
1322 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1323 "%s0x%llx type: %u flags: 0x%x len: %u data: 0x%llx "
1324 "maxlen: %u datastart: 0x%llx next: 0x%llx%s",
1325 prefix ? prefix : "", (uint64_t)VM_KERNEL_ADDRPERM(m),
1326 mbuf_type(m), mbuf_flags(m), (unsigned int)mbuf_len(m),
1327 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)),
1328 (unsigned int)mbuf_maxlen(m),
1329 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
1330 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_next(m)),
1331 !suffix || (mbuf_flags(m) & MBUF_PKTHDR) ? "" : suffix);
1332 if ((mbuf_flags(m) & MBUF_PKTHDR)) {
1333 brlog_mbuf_pkthdr(m, "", suffix);
1334 }
1335 } else {
1336 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0, "%s<NULL>%s", prefix, suffix);
1337 }
1338 }
1339
1340 static void
brlog_mbuf_data(mbuf_t m,size_t offset,size_t len)1341 brlog_mbuf_data(mbuf_t m, size_t offset, size_t len)
1342 {
1343 mbuf_t n;
1344 size_t i, j;
1345 size_t pktlen, mlen, maxlen;
1346 unsigned char *ptr;
1347
1348 pktlen = mbuf_pkthdr_len(m);
1349
1350 if (offset > pktlen) {
1351 return;
1352 }
1353
1354 maxlen = (pktlen - offset > len) ? len : pktlen - offset;
1355 n = m;
1356 mlen = mbuf_len(n);
1357 ptr = mbuf_data(n);
1358 for (i = 0, j = 0; i < maxlen; i++, j++) {
1359 if (j >= mlen) {
1360 n = mbuf_next(n);
1361 if (n == 0) {
1362 break;
1363 }
1364 ptr = mbuf_data(n);
1365 mlen = mbuf_len(n);
1366 j = 0;
1367 }
1368 if (i >= offset) {
1369 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1370 "%02x%s", ptr[j], i % 2 ? " " : "");
1371 }
1372 }
1373 }
1374
1375 static void
brlog_ether_header(struct ether_header * eh)1376 brlog_ether_header(struct ether_header *eh)
1377 {
1378 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1379 "%02x:%02x:%02x:%02x:%02x:%02x > "
1380 "%02x:%02x:%02x:%02x:%02x:%02x 0x%04x ",
1381 eh->ether_shost[0], eh->ether_shost[1], eh->ether_shost[2],
1382 eh->ether_shost[3], eh->ether_shost[4], eh->ether_shost[5],
1383 eh->ether_dhost[0], eh->ether_dhost[1], eh->ether_dhost[2],
1384 eh->ether_dhost[3], eh->ether_dhost[4], eh->ether_dhost[5],
1385 ntohs(eh->ether_type));
1386 }
1387
1388 static char *
ether_ntop(char * buf,size_t len,const u_char * ap)1389 ether_ntop(char *buf, size_t len, const u_char *ap)
1390 {
1391 snprintf(buf, len, "%02x:%02x:%02x:%02x:%02x:%02x",
1392 ap[0], ap[1], ap[2], ap[3], ap[4], ap[5]);
1393
1394 return buf;
1395 }
1396
1397 static void
brlog_link(struct bridge_softc * sc)1398 brlog_link(struct bridge_softc * sc)
1399 {
1400 int i;
1401 uint32_t sdl_buffer[offsetof(struct sockaddr_dl, sdl_data) +
1402 IFNAMSIZ + ETHER_ADDR_LEN];
1403 struct sockaddr_dl *sdl = (struct sockaddr_dl *)sdl_buffer;
1404 const u_char * lladdr;
1405 char lladdr_str[48];
1406
1407 memset(sdl, 0, sizeof(sdl_buffer));
1408 sdl->sdl_family = AF_LINK;
1409 sdl->sdl_nlen = strlen(sc->sc_if_xname);
1410 sdl->sdl_alen = ETHER_ADDR_LEN;
1411 sdl->sdl_len = offsetof(struct sockaddr_dl, sdl_data);
1412 memcpy(sdl->sdl_data, sc->sc_if_xname, sdl->sdl_nlen);
1413 memcpy(LLADDR(sdl), sc->sc_defaddr, ETHER_ADDR_LEN);
1414 lladdr_str[0] = '\0';
1415 for (i = 0, lladdr = CONST_LLADDR(sdl);
1416 i < sdl->sdl_alen;
1417 i++, lladdr++) {
1418 char byte_str[4];
1419
1420 snprintf(byte_str, sizeof(byte_str), "%s%x", i ? ":" : "",
1421 *lladdr);
1422 strlcat(lladdr_str, byte_str, sizeof(lladdr_str));
1423 }
1424 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1425 "%s sdl len %d index %d family %d type 0x%x nlen %d alen %d"
1426 " slen %d addr %s", sc->sc_if_xname,
1427 sdl->sdl_len, sdl->sdl_index,
1428 sdl->sdl_family, sdl->sdl_type, sdl->sdl_nlen,
1429 sdl->sdl_alen, sdl->sdl_slen, lladdr_str);
1430 }
1431
1432
1433 /*
1434 * bridgeattach:
1435 *
1436 * Pseudo-device attach routine.
1437 */
1438 __private_extern__ int
bridgeattach(int n)1439 bridgeattach(int n)
1440 {
1441 #pragma unused(n)
1442 int error;
1443
1444 LIST_INIT(&bridge_list);
1445
1446 #if BRIDGESTP
1447 bstp_sys_init();
1448 #endif /* BRIDGESTP */
1449
1450 error = if_clone_attach(&bridge_cloner);
1451 if (error != 0) {
1452 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_clone_attach failed %d", error);
1453 }
1454 return error;
1455 }
1456
1457
1458 static errno_t
bridge_ifnet_set_attrs(struct ifnet * ifp)1459 bridge_ifnet_set_attrs(struct ifnet * ifp)
1460 {
1461 errno_t error;
1462
1463 error = ifnet_set_mtu(ifp, ETHERMTU);
1464 if (error != 0) {
1465 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_mtu failed %d", error);
1466 goto done;
1467 }
1468 error = ifnet_set_addrlen(ifp, ETHER_ADDR_LEN);
1469 if (error != 0) {
1470 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_addrlen failed %d", error);
1471 goto done;
1472 }
1473 error = ifnet_set_hdrlen(ifp, ETHER_HDR_LEN);
1474 if (error != 0) {
1475 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_hdrlen failed %d", error);
1476 goto done;
1477 }
1478 error = ifnet_set_flags(ifp,
1479 IFF_BROADCAST | IFF_SIMPLEX | IFF_NOTRAILERS | IFF_MULTICAST,
1480 0xffff);
1481
1482 if (error != 0) {
1483 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_flags failed %d", error);
1484 goto done;
1485 }
1486 done:
1487 return error;
1488 }
1489
1490 /*
1491 * bridge_clone_create:
1492 *
1493 * Create a new bridge instance.
1494 */
1495 static int
bridge_clone_create(struct if_clone * ifc,uint32_t unit,void * params)1496 bridge_clone_create(struct if_clone *ifc, uint32_t unit, void *params)
1497 {
1498 #pragma unused(params)
1499 struct ifnet *ifp = NULL;
1500 struct bridge_softc *sc = NULL;
1501 struct bridge_softc *sc2 = NULL;
1502 struct ifnet_init_eparams init_params;
1503 errno_t error = 0;
1504 uint8_t eth_hostid[ETHER_ADDR_LEN];
1505 int fb, retry, has_hostid;
1506
1507 sc = if_clone_softc_allocate(&bridge_cloner);
1508 if (sc == NULL) {
1509 error = ENOMEM;
1510 goto done;
1511 }
1512
1513 lck_mtx_init(&sc->sc_mtx, &bridge_lock_grp, &bridge_lock_attr);
1514 sc->sc_brtmax = BRIDGE_RTABLE_MAX;
1515 sc->sc_mne_max = BRIDGE_MAC_NAT_ENTRY_MAX;
1516 sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
1517 sc->sc_filter_flags = 0;
1518
1519 TAILQ_INIT(&sc->sc_iflist);
1520
1521 /* use the interface name as the unique id for ifp recycle */
1522 snprintf(sc->sc_if_xname, sizeof(sc->sc_if_xname), "%s%d",
1523 ifc->ifc_name, unit);
1524 bzero(&init_params, sizeof(init_params));
1525 init_params.ver = IFNET_INIT_CURRENT_VERSION;
1526 init_params.len = sizeof(init_params);
1527 /* Initialize our routing table. */
1528 error = bridge_rtable_init(sc);
1529 if (error != 0) {
1530 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_rtable_init failed %d", error);
1531 goto done;
1532 }
1533 TAILQ_INIT(&sc->sc_spanlist);
1534 if (if_bridge_txstart) {
1535 init_params.start = bridge_start;
1536 } else {
1537 init_params.flags = IFNET_INIT_LEGACY;
1538 init_params.output = bridge_output;
1539 }
1540 init_params.set_bpf_tap = bridge_set_bpf_tap;
1541 init_params.uniqueid = sc->sc_if_xname;
1542 init_params.uniqueid_len = strlen(sc->sc_if_xname);
1543 init_params.sndq_maxlen = IFQ_MAXLEN;
1544 init_params.name = ifc->ifc_name;
1545 init_params.unit = unit;
1546 init_params.family = IFNET_FAMILY_ETHERNET;
1547 init_params.type = IFT_BRIDGE;
1548 init_params.demux = ether_demux;
1549 init_params.add_proto = ether_add_proto;
1550 init_params.del_proto = ether_del_proto;
1551 init_params.check_multi = ether_check_multi;
1552 init_params.framer_extended = ether_frameout_extended;
1553 init_params.softc = sc;
1554 init_params.ioctl = bridge_ioctl;
1555 init_params.detach = bridge_detach;
1556 init_params.broadcast_addr = etherbroadcastaddr;
1557 init_params.broadcast_len = ETHER_ADDR_LEN;
1558
1559 error = ifnet_allocate_extended(&init_params, &ifp);
1560 if (error != 0) {
1561 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_allocate failed %d", error);
1562 goto done;
1563 }
1564 LIST_INIT(&sc->sc_mne_list);
1565 LIST_INIT(&sc->sc_mne_list_v6);
1566 sc->sc_ifp = ifp;
1567 error = bridge_ifnet_set_attrs(ifp);
1568 if (error != 0) {
1569 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_ifnet_set_attrs failed %d",
1570 error);
1571 goto done;
1572 }
1573 /*
1574 * Generate an ethernet address with a locally administered address.
1575 *
1576 * Since we are using random ethernet addresses for the bridge, it is
1577 * possible that we might have address collisions, so make sure that
1578 * this hardware address isn't already in use on another bridge.
1579 * The first try uses the "hostid" and falls back to read_frandom();
1580 * for "hostid", we use the MAC address of the first-encountered
1581 * Ethernet-type interface that is currently configured.
1582 */
1583 fb = 0;
1584 has_hostid = (uuid_get_ethernet(ð_hostid[0]) == 0);
1585 for (retry = 1; retry != 0;) {
1586 if (fb || has_hostid == 0) {
1587 read_frandom(&sc->sc_defaddr, ETHER_ADDR_LEN);
1588 sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
1589 sc->sc_defaddr[0] |= 2; /* set the LAA bit */
1590 } else {
1591 bcopy(ð_hostid[0], &sc->sc_defaddr,
1592 ETHER_ADDR_LEN);
1593 sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
1594 sc->sc_defaddr[0] |= 2; /* set the LAA bit */
1595 sc->sc_defaddr[3] = /* stir it up a bit */
1596 ((sc->sc_defaddr[3] & 0x0f) << 4) |
1597 ((sc->sc_defaddr[3] & 0xf0) >> 4);
1598 /*
1599 * Mix in the LSB as it's actually pretty significant,
1600 * see rdar://14076061
1601 */
1602 sc->sc_defaddr[4] =
1603 (((sc->sc_defaddr[4] & 0x0f) << 4) |
1604 ((sc->sc_defaddr[4] & 0xf0) >> 4)) ^
1605 sc->sc_defaddr[5];
1606 sc->sc_defaddr[5] = ifp->if_unit & 0xff;
1607 }
1608
1609 fb = 1;
1610 retry = 0;
1611 lck_mtx_lock(&bridge_list_mtx);
1612 LIST_FOREACH(sc2, &bridge_list, sc_list) {
1613 if (_ether_cmp(sc->sc_defaddr,
1614 IF_LLADDR(sc2->sc_ifp)) == 0) {
1615 retry = 1;
1616 }
1617 }
1618 lck_mtx_unlock(&bridge_list_mtx);
1619 }
1620
1621 sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
1622
1623 if (BRIDGE_DBGF_ENABLED(BR_DBGF_LIFECYCLE)) {
1624 brlog_link(sc);
1625 }
1626 error = ifnet_attach(ifp, NULL);
1627 if (error != 0) {
1628 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_attach failed %d", error);
1629 goto done;
1630 }
1631
1632 error = ifnet_set_lladdr_and_type(ifp, sc->sc_defaddr, ETHER_ADDR_LEN,
1633 IFT_ETHER);
1634 if (error != 0) {
1635 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr_and_type failed %d",
1636 error);
1637 goto done;
1638 }
1639
1640 ifnet_set_offload(ifp,
1641 IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP |
1642 IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6 | IFNET_MULTIPAGES);
1643 error = bridge_set_tso(sc);
1644 if (error != 0) {
1645 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_set_tso failed %d", error);
1646 goto done;
1647 }
1648 #if BRIDGESTP
1649 bstp_attach(&sc->sc_stp, &bridge_ops);
1650 #endif /* BRIDGESTP */
1651
1652 lck_mtx_lock(&bridge_list_mtx);
1653 LIST_INSERT_HEAD(&bridge_list, sc, sc_list);
1654 lck_mtx_unlock(&bridge_list_mtx);
1655
1656 /* attach as ethernet */
1657 error = bpf_attach(ifp, DLT_EN10MB, sizeof(struct ether_header),
1658 NULL, NULL);
1659
1660 done:
1661 if (error != 0) {
1662 BRIDGE_LOG(LOG_NOTICE, 0, "failed error %d", error);
1663 /* TBD: Clean up: sc, sc_rthash etc */
1664 }
1665
1666 return error;
1667 }
1668
1669 /*
1670 * bridge_clone_destroy:
1671 *
1672 * Destroy a bridge instance.
1673 */
1674 static int
bridge_clone_destroy(struct ifnet * ifp)1675 bridge_clone_destroy(struct ifnet *ifp)
1676 {
1677 struct bridge_softc *sc = ifp->if_softc;
1678 struct bridge_iflist *bif;
1679 errno_t error;
1680
1681 BRIDGE_LOCK(sc);
1682 if ((sc->sc_flags & SCF_DETACHING)) {
1683 BRIDGE_UNLOCK(sc);
1684 return 0;
1685 }
1686 sc->sc_flags |= SCF_DETACHING;
1687
1688 bridge_ifstop(ifp, 1);
1689
1690 bridge_cancel_delayed_call(&sc->sc_resize_call);
1691
1692 bridge_cleanup_delayed_call(&sc->sc_resize_call);
1693 bridge_cleanup_delayed_call(&sc->sc_aging_timer);
1694
1695 error = ifnet_set_flags(ifp, 0, IFF_UP);
1696 if (error != 0) {
1697 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_flags failed %d", error);
1698 }
1699
1700 while ((bif = TAILQ_FIRST(&sc->sc_iflist)) != NULL) {
1701 bridge_delete_member(sc, bif);
1702 }
1703
1704 while ((bif = TAILQ_FIRST(&sc->sc_spanlist)) != NULL) {
1705 bridge_delete_span(sc, bif);
1706 }
1707 BRIDGE_UNLOCK(sc);
1708
1709 error = ifnet_detach(ifp);
1710 if (error != 0) {
1711 panic("%s (%d): ifnet_detach(%p) failed %d",
1712 __func__, __LINE__, ifp, error);
1713 }
1714 return 0;
1715 }
1716
1717 #define DRVSPEC do { \
1718 if (ifd->ifd_cmd >= bridge_control_table_size) { \
1719 error = EINVAL; \
1720 break; \
1721 } \
1722 bc = &bridge_control_table[ifd->ifd_cmd]; \
1723 \
1724 if (cmd == SIOCGDRVSPEC && \
1725 (bc->bc_flags & BC_F_COPYOUT) == 0) { \
1726 error = EINVAL; \
1727 break; \
1728 } else if (cmd == SIOCSDRVSPEC && \
1729 (bc->bc_flags & BC_F_COPYOUT) != 0) { \
1730 error = EINVAL; \
1731 break; \
1732 } \
1733 \
1734 if (bc->bc_flags & BC_F_SUSER) { \
1735 error = kauth_authorize_generic(kauth_cred_get(), \
1736 KAUTH_GENERIC_ISSUSER); \
1737 if (error) \
1738 break; \
1739 } \
1740 \
1741 if (ifd->ifd_len != bc->bc_argsize || \
1742 ifd->ifd_len > sizeof (args)) { \
1743 error = EINVAL; \
1744 break; \
1745 } \
1746 \
1747 bzero(&args, sizeof (args)); \
1748 if (bc->bc_flags & BC_F_COPYIN) { \
1749 error = copyin(ifd->ifd_data, &args, ifd->ifd_len); \
1750 if (error) \
1751 break; \
1752 } \
1753 \
1754 BRIDGE_LOCK(sc); \
1755 error = (*bc->bc_func)(sc, &args); \
1756 BRIDGE_UNLOCK(sc); \
1757 if (error) \
1758 break; \
1759 \
1760 if (bc->bc_flags & BC_F_COPYOUT) \
1761 error = copyout(&args, ifd->ifd_data, ifd->ifd_len); \
1762 } while (0)
1763
1764 static boolean_t
interface_needs_input_broadcast(struct ifnet * ifp)1765 interface_needs_input_broadcast(struct ifnet * ifp)
1766 {
1767 /*
1768 * Selectively enable input broadcast only when necessary.
1769 * The bridge interface itself attaches a fake protocol
1770 * so checking for at least two protocols means that the
1771 * interface is being used for something besides bridging
1772 * and needs to see broadcast packets from other members.
1773 */
1774 return if_get_protolist(ifp, NULL, 0) >= 2;
1775 }
1776
1777 static boolean_t
bif_set_input_broadcast(struct bridge_iflist * bif,boolean_t input_broadcast)1778 bif_set_input_broadcast(struct bridge_iflist * bif, boolean_t input_broadcast)
1779 {
1780 boolean_t old_input_broadcast;
1781
1782 old_input_broadcast = (bif->bif_flags & BIFF_INPUT_BROADCAST) != 0;
1783 if (input_broadcast) {
1784 bif->bif_flags |= BIFF_INPUT_BROADCAST;
1785 } else {
1786 bif->bif_flags &= ~BIFF_INPUT_BROADCAST;
1787 }
1788 return old_input_broadcast != input_broadcast;
1789 }
1790
1791 /*
1792 * bridge_ioctl:
1793 *
1794 * Handle a control request from the operator.
1795 */
1796 static errno_t
bridge_ioctl(struct ifnet * ifp,u_long cmd,void * data)1797 bridge_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1798 {
1799 struct bridge_softc *sc = ifp->if_softc;
1800 struct ifreq *ifr = (struct ifreq *)data;
1801 struct bridge_iflist *bif;
1802 int error = 0;
1803
1804 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1805
1806 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_IOCTL,
1807 "ifp %s cmd 0x%08lx (%c%c [%lu] %c %lu)",
1808 ifp->if_xname, cmd, (cmd & IOC_IN) ? 'I' : ' ',
1809 (cmd & IOC_OUT) ? 'O' : ' ', IOCPARM_LEN(cmd),
1810 (char)IOCGROUP(cmd), cmd & 0xff);
1811
1812 switch (cmd) {
1813 case SIOCSIFADDR:
1814 case SIOCAIFADDR:
1815 ifnet_set_flags(ifp, IFF_UP, IFF_UP);
1816 break;
1817
1818 case SIOCGIFMEDIA32:
1819 case SIOCGIFMEDIA64: {
1820 struct ifmediareq *ifmr = (struct ifmediareq *)data;
1821 user_addr_t user_addr;
1822
1823 user_addr = (cmd == SIOCGIFMEDIA64) ?
1824 ((struct ifmediareq64 *)ifmr)->ifmu_ulist :
1825 CAST_USER_ADDR_T(((struct ifmediareq32 *)ifmr)->ifmu_ulist);
1826
1827 ifmr->ifm_status = IFM_AVALID;
1828 ifmr->ifm_mask = 0;
1829 ifmr->ifm_count = 1;
1830
1831 BRIDGE_LOCK(sc);
1832 if (!(sc->sc_flags & SCF_DETACHING) &&
1833 (sc->sc_flags & SCF_MEDIA_ACTIVE)) {
1834 ifmr->ifm_status |= IFM_ACTIVE;
1835 ifmr->ifm_active = ifmr->ifm_current =
1836 IFM_ETHER | IFM_AUTO;
1837 } else {
1838 ifmr->ifm_active = ifmr->ifm_current = IFM_NONE;
1839 }
1840 BRIDGE_UNLOCK(sc);
1841
1842 if (user_addr != USER_ADDR_NULL) {
1843 error = copyout(&ifmr->ifm_current, user_addr,
1844 sizeof(int));
1845 }
1846 break;
1847 }
1848
1849 case SIOCADDMULTI:
1850 case SIOCDELMULTI:
1851 break;
1852
1853 case SIOCSDRVSPEC32:
1854 case SIOCGDRVSPEC32: {
1855 union {
1856 struct ifbreq ifbreq;
1857 struct ifbifconf32 ifbifconf;
1858 struct ifbareq32 ifbareq;
1859 struct ifbaconf32 ifbaconf;
1860 struct ifbrparam ifbrparam;
1861 struct ifbropreq32 ifbropreq;
1862 } args;
1863 struct ifdrv32 *ifd = (struct ifdrv32 *)data;
1864 const struct bridge_control *bridge_control_table =
1865 bridge_control_table32, *bc;
1866
1867 DRVSPEC;
1868
1869 break;
1870 }
1871 case SIOCSDRVSPEC64:
1872 case SIOCGDRVSPEC64: {
1873 union {
1874 struct ifbreq ifbreq;
1875 struct ifbifconf64 ifbifconf;
1876 struct ifbareq64 ifbareq;
1877 struct ifbaconf64 ifbaconf;
1878 struct ifbrparam ifbrparam;
1879 struct ifbropreq64 ifbropreq;
1880 } args;
1881 struct ifdrv64 *ifd = (struct ifdrv64 *)data;
1882 const struct bridge_control *bridge_control_table =
1883 bridge_control_table64, *bc;
1884
1885 DRVSPEC;
1886
1887 break;
1888 }
1889
1890 case SIOCSIFFLAGS:
1891 if (!(ifp->if_flags & IFF_UP) &&
1892 (ifp->if_flags & IFF_RUNNING)) {
1893 /*
1894 * If interface is marked down and it is running,
1895 * then stop and disable it.
1896 */
1897 BRIDGE_LOCK(sc);
1898 bridge_ifstop(ifp, 1);
1899 BRIDGE_UNLOCK(sc);
1900 } else if ((ifp->if_flags & IFF_UP) &&
1901 !(ifp->if_flags & IFF_RUNNING)) {
1902 /*
1903 * If interface is marked up and it is stopped, then
1904 * start it.
1905 */
1906 BRIDGE_LOCK(sc);
1907 error = bridge_init(ifp);
1908 BRIDGE_UNLOCK(sc);
1909 }
1910 break;
1911
1912 case SIOCSIFLLADDR:
1913 error = ifnet_set_lladdr(ifp, ifr->ifr_addr.sa_data,
1914 ifr->ifr_addr.sa_len);
1915 if (error != 0) {
1916 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_IOCTL,
1917 "%s SIOCSIFLLADDR error %d", ifp->if_xname,
1918 error);
1919 }
1920 break;
1921
1922 case SIOCSIFMTU:
1923 if (ifr->ifr_mtu < 576) {
1924 error = EINVAL;
1925 break;
1926 }
1927 BRIDGE_LOCK(sc);
1928 if (TAILQ_EMPTY(&sc->sc_iflist)) {
1929 sc->sc_ifp->if_mtu = ifr->ifr_mtu;
1930 BRIDGE_UNLOCK(sc);
1931 break;
1932 }
1933 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1934 if (bif->bif_ifp->if_mtu != (unsigned)ifr->ifr_mtu) {
1935 BRIDGE_LOG(LOG_NOTICE, 0,
1936 "%s invalid MTU: %u(%s) != %d",
1937 sc->sc_ifp->if_xname,
1938 bif->bif_ifp->if_mtu,
1939 bif->bif_ifp->if_xname, ifr->ifr_mtu);
1940 error = EINVAL;
1941 break;
1942 }
1943 }
1944 if (!error) {
1945 sc->sc_ifp->if_mtu = ifr->ifr_mtu;
1946 }
1947 BRIDGE_UNLOCK(sc);
1948 break;
1949
1950 default:
1951 error = ether_ioctl(ifp, cmd, data);
1952 if (error != 0 && error != EOPNOTSUPP) {
1953 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_IOCTL,
1954 "ifp %s cmd 0x%08lx "
1955 "(%c%c [%lu] %c %lu) failed error: %d",
1956 ifp->if_xname, cmd,
1957 (cmd & IOC_IN) ? 'I' : ' ',
1958 (cmd & IOC_OUT) ? 'O' : ' ',
1959 IOCPARM_LEN(cmd), (char)IOCGROUP(cmd),
1960 cmd & 0xff, error);
1961 }
1962 break;
1963 }
1964 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1965
1966 return error;
1967 }
1968
1969 #if HAS_IF_CAP
1970 /*
1971 * bridge_mutecaps:
1972 *
1973 * Clear or restore unwanted capabilities on the member interface
1974 */
1975 static void
bridge_mutecaps(struct bridge_softc * sc)1976 bridge_mutecaps(struct bridge_softc *sc)
1977 {
1978 struct bridge_iflist *bif;
1979 int enabled, mask;
1980
1981 /* Initial bitmask of capabilities to test */
1982 mask = BRIDGE_IFCAPS_MASK;
1983
1984 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1985 /* Every member must support it or its disabled */
1986 mask &= bif->bif_savedcaps;
1987 }
1988
1989 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1990 enabled = bif->bif_ifp->if_capenable;
1991 enabled &= ~BRIDGE_IFCAPS_STRIP;
1992 /* strip off mask bits and enable them again if allowed */
1993 enabled &= ~BRIDGE_IFCAPS_MASK;
1994 enabled |= mask;
1995
1996 bridge_set_ifcap(sc, bif, enabled);
1997 }
1998 }
1999
2000 static void
bridge_set_ifcap(struct bridge_softc * sc,struct bridge_iflist * bif,int set)2001 bridge_set_ifcap(struct bridge_softc *sc, struct bridge_iflist *bif, int set)
2002 {
2003 struct ifnet *ifp = bif->bif_ifp;
2004 struct ifreq ifr;
2005 int error;
2006
2007 bzero(&ifr, sizeof(ifr));
2008 ifr.ifr_reqcap = set;
2009
2010 if (ifp->if_capenable != set) {
2011 IFF_LOCKGIANT(ifp);
2012 error = (*ifp->if_ioctl)(ifp, SIOCSIFCAP, (caddr_t)&ifr);
2013 IFF_UNLOCKGIANT(ifp);
2014 if (error) {
2015 BRIDGE_LOG(LOG_NOTICE, 0,
2016 "%s error setting interface capabilities on %s",
2017 sc->sc_ifp->if_xname, ifp->if_xname);
2018 }
2019 }
2020 }
2021 #endif /* HAS_IF_CAP */
2022
2023 static errno_t
bridge_set_tso(struct bridge_softc * sc)2024 bridge_set_tso(struct bridge_softc *sc)
2025 {
2026 struct bridge_iflist *bif;
2027 u_int32_t tso_v4_mtu;
2028 u_int32_t tso_v6_mtu;
2029 ifnet_offload_t offload;
2030 errno_t error = 0;
2031
2032 /* By default, support TSO */
2033 offload = sc->sc_ifp->if_hwassist | IFNET_TSO_IPV4 | IFNET_TSO_IPV6;
2034 tso_v4_mtu = IP_MAXPACKET;
2035 tso_v6_mtu = IP_MAXPACKET;
2036
2037 /* Use the lowest common denominator of the members */
2038 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2039 ifnet_t ifp = bif->bif_ifp;
2040
2041 if (ifp == NULL) {
2042 continue;
2043 }
2044
2045 if (offload & IFNET_TSO_IPV4) {
2046 if (ifp->if_hwassist & IFNET_TSO_IPV4) {
2047 if (tso_v4_mtu > ifp->if_tso_v4_mtu) {
2048 tso_v4_mtu = ifp->if_tso_v4_mtu;
2049 }
2050 } else {
2051 offload &= ~IFNET_TSO_IPV4;
2052 tso_v4_mtu = 0;
2053 }
2054 }
2055 if (offload & IFNET_TSO_IPV6) {
2056 if (ifp->if_hwassist & IFNET_TSO_IPV6) {
2057 if (tso_v6_mtu > ifp->if_tso_v6_mtu) {
2058 tso_v6_mtu = ifp->if_tso_v6_mtu;
2059 }
2060 } else {
2061 offload &= ~IFNET_TSO_IPV6;
2062 tso_v6_mtu = 0;
2063 }
2064 }
2065 }
2066
2067 if (offload != sc->sc_ifp->if_hwassist) {
2068 error = ifnet_set_offload(sc->sc_ifp, offload);
2069 if (error != 0) {
2070 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2071 "ifnet_set_offload(%s, 0x%x) failed %d",
2072 sc->sc_ifp->if_xname, offload, error);
2073 goto done;
2074 }
2075 /*
2076 * For ifnet_set_tso_mtu() sake, the TSO MTU must be at least
2077 * as large as the interface MTU
2078 */
2079 if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV4) {
2080 if (tso_v4_mtu < sc->sc_ifp->if_mtu) {
2081 tso_v4_mtu = sc->sc_ifp->if_mtu;
2082 }
2083 error = ifnet_set_tso_mtu(sc->sc_ifp, AF_INET,
2084 tso_v4_mtu);
2085 if (error != 0) {
2086 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2087 "ifnet_set_tso_mtu(%s, "
2088 "AF_INET, %u) failed %d",
2089 sc->sc_ifp->if_xname,
2090 tso_v4_mtu, error);
2091 goto done;
2092 }
2093 }
2094 if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV6) {
2095 if (tso_v6_mtu < sc->sc_ifp->if_mtu) {
2096 tso_v6_mtu = sc->sc_ifp->if_mtu;
2097 }
2098 error = ifnet_set_tso_mtu(sc->sc_ifp, AF_INET6,
2099 tso_v6_mtu);
2100 if (error != 0) {
2101 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2102 "ifnet_set_tso_mtu(%s, "
2103 "AF_INET6, %u) failed %d",
2104 sc->sc_ifp->if_xname,
2105 tso_v6_mtu, error);
2106 goto done;
2107 }
2108 }
2109 }
2110 done:
2111 return error;
2112 }
2113
2114 /*
2115 * bridge_lookup_member:
2116 *
2117 * Lookup a bridge member interface.
2118 */
2119 static struct bridge_iflist *
bridge_lookup_member(struct bridge_softc * sc,const char * name)2120 bridge_lookup_member(struct bridge_softc *sc, const char *name)
2121 {
2122 struct bridge_iflist *bif;
2123 struct ifnet *ifp;
2124
2125 BRIDGE_LOCK_ASSERT_HELD(sc);
2126
2127 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2128 ifp = bif->bif_ifp;
2129 if (strcmp(ifp->if_xname, name) == 0) {
2130 return bif;
2131 }
2132 }
2133
2134 return NULL;
2135 }
2136
2137 /*
2138 * bridge_lookup_member_if:
2139 *
2140 * Lookup a bridge member interface by ifnet*.
2141 */
2142 static struct bridge_iflist *
bridge_lookup_member_if(struct bridge_softc * sc,struct ifnet * member_ifp)2143 bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
2144 {
2145 struct bridge_iflist *bif;
2146
2147 BRIDGE_LOCK_ASSERT_HELD(sc);
2148
2149 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2150 if (bif->bif_ifp == member_ifp) {
2151 return bif;
2152 }
2153 }
2154
2155 return NULL;
2156 }
2157
2158 static errno_t
bridge_iff_input(void * cookie,ifnet_t ifp,protocol_family_t protocol,mbuf_t * data,char ** frame_ptr)2159 bridge_iff_input(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2160 mbuf_t *data, char **frame_ptr)
2161 {
2162 #pragma unused(protocol)
2163 errno_t error = 0;
2164 struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2165 struct bridge_softc *sc = bif->bif_sc;
2166 int included = 0;
2167 size_t frmlen = 0;
2168 mbuf_t m = *data;
2169
2170 if ((m->m_flags & M_PROTO1)) {
2171 goto out;
2172 }
2173
2174 if (*frame_ptr >= (char *)mbuf_datastart(m) &&
2175 *frame_ptr <= (char *)mbuf_data(m)) {
2176 included = 1;
2177 frmlen = (char *)mbuf_data(m) - *frame_ptr;
2178 }
2179 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2180 "%s from %s m 0x%llx data 0x%llx frame 0x%llx %s "
2181 "frmlen %lu", sc->sc_ifp->if_xname,
2182 ifp->if_xname, (uint64_t)VM_KERNEL_ADDRPERM(m),
2183 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)),
2184 (uint64_t)VM_KERNEL_ADDRPERM(*frame_ptr),
2185 included ? "inside" : "outside", frmlen);
2186 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MBUF)) {
2187 brlog_mbuf(m, "bridge_iff_input[", "");
2188 brlog_ether_header((struct ether_header *)
2189 (void *)*frame_ptr);
2190 brlog_mbuf_data(m, 0, 20);
2191 }
2192 if (included == 0) {
2193 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT, "frame_ptr outside mbuf");
2194 goto out;
2195 }
2196
2197 /* Move data pointer to start of frame to the link layer header */
2198 (void) mbuf_setdata(m, (char *)mbuf_data(m) - frmlen,
2199 mbuf_len(m) + frmlen);
2200 (void) mbuf_pkthdr_adjustlen(m, frmlen);
2201
2202 /* make sure we can access the ethernet header */
2203 if (mbuf_pkthdr_len(m) < sizeof(struct ether_header)) {
2204 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2205 "short frame %lu < %lu",
2206 mbuf_pkthdr_len(m), sizeof(struct ether_header));
2207 goto out;
2208 }
2209 if (mbuf_len(m) < sizeof(struct ether_header)) {
2210 error = mbuf_pullup(data, sizeof(struct ether_header));
2211 if (error != 0) {
2212 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2213 "mbuf_pullup(%lu) failed %d",
2214 sizeof(struct ether_header),
2215 error);
2216 error = EJUSTRETURN;
2217 goto out;
2218 }
2219 if (m != *data) {
2220 m = *data;
2221 *frame_ptr = mbuf_data(m);
2222 }
2223 }
2224
2225 error = bridge_input(ifp, data);
2226
2227 /* Adjust packet back to original */
2228 if (error == 0) {
2229 /* bridge_input might have modified *data */
2230 if (*data != m) {
2231 m = *data;
2232 *frame_ptr = mbuf_data(m);
2233 }
2234 (void) mbuf_setdata(m, (char *)mbuf_data(m) + frmlen,
2235 mbuf_len(m) - frmlen);
2236 (void) mbuf_pkthdr_adjustlen(m, -frmlen);
2237 }
2238
2239 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MBUF) &&
2240 BRIDGE_DBGF_ENABLED(BR_DBGF_INPUT)) {
2241 brlog_mbuf(m, "bridge_iff_input]", "");
2242 }
2243
2244 out:
2245 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2246
2247 return error;
2248 }
2249
2250 static errno_t
bridge_iff_output(void * cookie,ifnet_t ifp,protocol_family_t protocol,mbuf_t * data)2251 bridge_iff_output(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2252 mbuf_t *data)
2253 {
2254 #pragma unused(protocol)
2255 errno_t error = 0;
2256 struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2257 struct bridge_softc *sc = bif->bif_sc;
2258 mbuf_t m = *data;
2259
2260 if ((m->m_flags & M_PROTO1)) {
2261 goto out;
2262 }
2263 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
2264 "%s from %s m 0x%llx data 0x%llx",
2265 sc->sc_ifp->if_xname, ifp->if_xname,
2266 (uint64_t)VM_KERNEL_ADDRPERM(m),
2267 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)));
2268
2269 error = bridge_member_output(sc, ifp, data);
2270 if (error != 0 && error != EJUSTRETURN) {
2271 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_OUTPUT,
2272 "bridge_member_output failed error %d",
2273 error);
2274 }
2275 out:
2276 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2277
2278 return error;
2279 }
2280
2281 static void
bridge_iff_event(void * cookie,ifnet_t ifp,protocol_family_t protocol,const struct kev_msg * event_msg)2282 bridge_iff_event(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2283 const struct kev_msg *event_msg)
2284 {
2285 #pragma unused(protocol)
2286 struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2287 struct bridge_softc *sc = bif->bif_sc;
2288
2289 if (event_msg->vendor_code == KEV_VENDOR_APPLE &&
2290 event_msg->kev_class == KEV_NETWORK_CLASS &&
2291 event_msg->kev_subclass == KEV_DL_SUBCLASS) {
2292 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
2293 "%s event_code %u - %s",
2294 ifp->if_xname, event_msg->event_code,
2295 dlil_kev_dl_code_str(event_msg->event_code));
2296
2297 switch (event_msg->event_code) {
2298 case KEV_DL_LINK_OFF:
2299 case KEV_DL_LINK_ON: {
2300 bridge_iflinkevent(ifp);
2301 #if BRIDGESTP
2302 bstp_linkstate(ifp, event_msg->event_code);
2303 #endif /* BRIDGESTP */
2304 break;
2305 }
2306 case KEV_DL_SIFFLAGS: {
2307 if ((bif->bif_flags & BIFF_PROMISC) == 0 &&
2308 (ifp->if_flags & IFF_UP)) {
2309 errno_t error;
2310
2311 error = ifnet_set_promiscuous(ifp, 1);
2312 if (error != 0) {
2313 BRIDGE_LOG(LOG_NOTICE, 0,
2314 "ifnet_set_promiscuous (%s)"
2315 " failed %d", ifp->if_xname,
2316 error);
2317 } else {
2318 bif->bif_flags |= BIFF_PROMISC;
2319 }
2320 }
2321 break;
2322 }
2323 case KEV_DL_IFCAP_CHANGED: {
2324 BRIDGE_LOCK(sc);
2325 bridge_set_tso(sc);
2326 BRIDGE_UNLOCK(sc);
2327 break;
2328 }
2329 case KEV_DL_PROTO_DETACHED:
2330 case KEV_DL_PROTO_ATTACHED: {
2331 bridge_proto_attach_changed(ifp);
2332 break;
2333 }
2334 default:
2335 break;
2336 }
2337 }
2338 }
2339
2340 /*
2341 * bridge_iff_detached:
2342 *
2343 * Called when our interface filter has been detached from a
2344 * member interface.
2345 */
2346 static void
bridge_iff_detached(void * cookie,ifnet_t ifp)2347 bridge_iff_detached(void *cookie, ifnet_t ifp)
2348 {
2349 #pragma unused(cookie)
2350 struct bridge_iflist *bif;
2351 struct bridge_softc *sc = ifp->if_bridge;
2352
2353 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2354
2355 /* Check if the interface is a bridge member */
2356 if (sc != NULL) {
2357 BRIDGE_LOCK(sc);
2358 bif = bridge_lookup_member_if(sc, ifp);
2359 if (bif != NULL) {
2360 bridge_delete_member(sc, bif);
2361 }
2362 BRIDGE_UNLOCK(sc);
2363 return;
2364 }
2365 /* Check if the interface is a span port */
2366 lck_mtx_lock(&bridge_list_mtx);
2367 LIST_FOREACH(sc, &bridge_list, sc_list) {
2368 BRIDGE_LOCK(sc);
2369 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
2370 if (ifp == bif->bif_ifp) {
2371 bridge_delete_span(sc, bif);
2372 break;
2373 }
2374 BRIDGE_UNLOCK(sc);
2375 }
2376 lck_mtx_unlock(&bridge_list_mtx);
2377 }
2378
2379 static errno_t
bridge_proto_input(ifnet_t ifp,protocol_family_t protocol,mbuf_t packet,char * header)2380 bridge_proto_input(ifnet_t ifp, protocol_family_t protocol, mbuf_t packet,
2381 char *header)
2382 {
2383 #pragma unused(protocol, packet, header)
2384 BRIDGE_LOG(LOG_NOTICE, 0, "%s unexpected packet",
2385 ifp->if_xname);
2386 return 0;
2387 }
2388
2389 static int
bridge_attach_protocol(struct ifnet * ifp)2390 bridge_attach_protocol(struct ifnet *ifp)
2391 {
2392 int error;
2393 struct ifnet_attach_proto_param reg;
2394
2395 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2396 bzero(®, sizeof(reg));
2397 reg.input = bridge_proto_input;
2398
2399 error = ifnet_attach_protocol(ifp, PF_BRIDGE, ®);
2400 if (error) {
2401 BRIDGE_LOG(LOG_NOTICE, 0,
2402 "ifnet_attach_protocol(%s) failed, %d",
2403 ifp->if_xname, error);
2404 }
2405
2406 return error;
2407 }
2408
2409 static int
bridge_detach_protocol(struct ifnet * ifp)2410 bridge_detach_protocol(struct ifnet *ifp)
2411 {
2412 int error;
2413
2414 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2415 error = ifnet_detach_protocol(ifp, PF_BRIDGE);
2416 if (error) {
2417 BRIDGE_LOG(LOG_NOTICE, 0,
2418 "ifnet_detach_protocol(%s) failed, %d",
2419 ifp->if_xname, error);
2420 }
2421
2422 return error;
2423 }
2424
2425 /*
2426 * bridge_delete_member:
2427 *
2428 * Delete the specified member interface.
2429 */
2430 static void
bridge_delete_member(struct bridge_softc * sc,struct bridge_iflist * bif)2431 bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif)
2432 {
2433 uint32_t bif_flags;
2434 struct ifnet *ifs = bif->bif_ifp, *bifp = sc->sc_ifp;
2435 int lladdr_changed = 0, error;
2436 uint8_t eaddr[ETHER_ADDR_LEN];
2437 u_int32_t event_code = 0;
2438
2439 BRIDGE_LOCK_ASSERT_HELD(sc);
2440 VERIFY(ifs != NULL);
2441
2442 /*
2443 * Remove the member from the list first so it cannot be found anymore
2444 * when we release the bridge lock below
2445 */
2446 if ((bif->bif_flags & BIFF_IN_MEMBER_LIST) != 0) {
2447 BRIDGE_XLOCK(sc);
2448 TAILQ_REMOVE(&sc->sc_iflist, bif, bif_next);
2449 BRIDGE_XDROP(sc);
2450 }
2451 if (sc->sc_mac_nat_bif != NULL) {
2452 if (bif == sc->sc_mac_nat_bif) {
2453 bridge_mac_nat_disable(sc);
2454 } else {
2455 bridge_mac_nat_flush_entries(sc, bif);
2456 }
2457 }
2458 #if BRIDGESTP
2459 if ((bif->bif_ifflags & IFBIF_STP) != 0) {
2460 bstp_disable(&bif->bif_stp);
2461 }
2462 #endif /* BRIDGESTP */
2463
2464 /*
2465 * If removing the interface that gave the bridge its mac address, set
2466 * the mac address of the bridge to the address of the next member, or
2467 * to its default address if no members are left.
2468 */
2469 if (bridge_inherit_mac && sc->sc_ifaddr == ifs) {
2470 ifnet_release(sc->sc_ifaddr);
2471 if (TAILQ_EMPTY(&sc->sc_iflist)) {
2472 bcopy(sc->sc_defaddr, eaddr, ETHER_ADDR_LEN);
2473 sc->sc_ifaddr = NULL;
2474 } else {
2475 struct ifnet *fif =
2476 TAILQ_FIRST(&sc->sc_iflist)->bif_ifp;
2477 bcopy(IF_LLADDR(fif), eaddr, ETHER_ADDR_LEN);
2478 sc->sc_ifaddr = fif;
2479 ifnet_reference(fif); /* for sc_ifaddr */
2480 }
2481 lladdr_changed = 1;
2482 }
2483
2484 #if HAS_IF_CAP
2485 bridge_mutecaps(sc); /* recalculate now this interface is removed */
2486 #endif /* HAS_IF_CAP */
2487
2488 error = bridge_set_tso(sc);
2489 if (error != 0) {
2490 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_set_tso failed %d", error);
2491 }
2492
2493 bridge_rtdelete(sc, ifs, IFBF_FLUSHALL);
2494
2495 KASSERT(bif->bif_addrcnt == 0,
2496 ("%s: %d bridge routes referenced", __func__, bif->bif_addrcnt));
2497
2498 /*
2499 * Update link status of the bridge based on its remaining members
2500 */
2501 event_code = bridge_updatelinkstatus(sc);
2502 bif_flags = bif->bif_flags;
2503 BRIDGE_UNLOCK(sc);
2504
2505 /* only perform these steps if the interface is still attached */
2506 if (ifnet_is_attached(ifs, 1)) {
2507 #if SKYWALK
2508 if ((bif_flags & BIFF_NETAGENT_REMOVED) != 0) {
2509 ifnet_add_netagent(ifs);
2510 }
2511 if ((bif_flags & BIFF_FLOWSWITCH_ATTACHED) != 0) {
2512 ifnet_detach_flowswitch_nexus(ifs);
2513 }
2514 #endif /* SKYWALK */
2515 /* disable promiscuous mode */
2516 if ((bif_flags & BIFF_PROMISC) != 0) {
2517 (void) ifnet_set_promiscuous(ifs, 0);
2518 }
2519 #if HAS_IF_CAP
2520 /* re-enable any interface capabilities */
2521 bridge_set_ifcap(sc, bif, bif->bif_savedcaps);
2522 #endif
2523 /* detach bridge "protocol" */
2524 if ((bif_flags & BIFF_PROTO_ATTACHED) != 0) {
2525 (void)bridge_detach_protocol(ifs);
2526 }
2527 /* detach interface filter */
2528 if ((bif_flags & BIFF_FILTER_ATTACHED) != 0) {
2529 iflt_detach(bif->bif_iff_ref);
2530 }
2531 ifnet_decr_iorefcnt(ifs);
2532 }
2533
2534 if (lladdr_changed &&
2535 (error = ifnet_set_lladdr(bifp, eaddr, ETHER_ADDR_LEN)) != 0) {
2536 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr failed %d", error);
2537 }
2538
2539 if (event_code != 0) {
2540 bridge_link_event(bifp, event_code);
2541 }
2542
2543 #if BRIDGESTP
2544 bstp_destroy(&bif->bif_stp); /* prepare to free */
2545 #endif /* BRIDGESTP */
2546
2547 kfree_type(struct bridge_iflist, bif);
2548 ifs->if_bridge = NULL;
2549 ifnet_release(ifs);
2550
2551 BRIDGE_LOCK(sc);
2552 }
2553
2554 /*
2555 * bridge_delete_span:
2556 *
2557 * Delete the specified span interface.
2558 */
2559 static void
bridge_delete_span(struct bridge_softc * sc,struct bridge_iflist * bif)2560 bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif)
2561 {
2562 BRIDGE_LOCK_ASSERT_HELD(sc);
2563
2564 KASSERT(bif->bif_ifp->if_bridge == NULL,
2565 ("%s: not a span interface", __func__));
2566
2567 ifnet_release(bif->bif_ifp);
2568
2569 TAILQ_REMOVE(&sc->sc_spanlist, bif, bif_next);
2570 kfree_type(struct bridge_iflist, bif);
2571 }
2572
2573 static int
bridge_ioctl_add(struct bridge_softc * sc,void * arg)2574 bridge_ioctl_add(struct bridge_softc *sc, void *arg)
2575 {
2576 struct ifbreq *req = arg;
2577 struct bridge_iflist *bif = NULL;
2578 struct ifnet *ifs, *bifp = sc->sc_ifp;
2579 int error = 0, lladdr_changed = 0;
2580 uint8_t eaddr[ETHER_ADDR_LEN];
2581 struct iff_filter iff;
2582 u_int32_t event_code = 0;
2583 boolean_t mac_nat = FALSE;
2584 boolean_t input_broadcast;
2585
2586 ifs = ifunit(req->ifbr_ifsname);
2587 if (ifs == NULL) {
2588 return ENOENT;
2589 }
2590 if (ifs->if_ioctl == NULL) { /* must be supported */
2591 return EINVAL;
2592 }
2593
2594 if (IFNET_IS_INTCOPROC(ifs)) {
2595 return EINVAL;
2596 }
2597
2598 /* If it's in the span list, it can't be a member. */
2599 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
2600 if (ifs == bif->bif_ifp) {
2601 return EBUSY;
2602 }
2603 }
2604
2605 if (ifs->if_bridge == sc) {
2606 return EEXIST;
2607 }
2608
2609 if (ifs->if_bridge != NULL) {
2610 return EBUSY;
2611 }
2612
2613 switch (ifs->if_type) {
2614 case IFT_ETHER:
2615 if (strcmp(ifs->if_name, "en") == 0 &&
2616 ifs->if_subfamily == IFNET_SUBFAMILY_WIFI &&
2617 (ifs->if_eflags & IFEF_IPV4_ROUTER) == 0) {
2618 /* XXX is there a better way to identify Wi-Fi STA? */
2619 mac_nat = TRUE;
2620 }
2621 break;
2622 case IFT_L2VLAN:
2623 case IFT_IEEE8023ADLAG:
2624 break;
2625 case IFT_GIF:
2626 /* currently not supported */
2627 /* FALLTHRU */
2628 default:
2629 return EINVAL;
2630 }
2631
2632 /* fail to add the interface if the MTU doesn't match */
2633 if (!TAILQ_EMPTY(&sc->sc_iflist) && sc->sc_ifp->if_mtu != ifs->if_mtu) {
2634 BRIDGE_LOG(LOG_NOTICE, 0, "%s invalid MTU for %s",
2635 sc->sc_ifp->if_xname,
2636 ifs->if_xname);
2637 return EINVAL;
2638 }
2639
2640 /* there's already an interface that's doing MAC NAT */
2641 if (mac_nat && sc->sc_mac_nat_bif != NULL) {
2642 return EBUSY;
2643 }
2644
2645 /* prevent the interface from detaching while we add the member */
2646 if (!ifnet_is_attached(ifs, 1)) {
2647 return ENXIO;
2648 }
2649
2650 /* allocate a new member */
2651 bif = kalloc_type(struct bridge_iflist, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2652 bif->bif_ifp = ifs;
2653 ifnet_reference(ifs);
2654 bif->bif_ifflags |= IFBIF_LEARNING | IFBIF_DISCOVER;
2655 #if HAS_IF_CAP
2656 bif->bif_savedcaps = ifs->if_capenable;
2657 #endif /* HAS_IF_CAP */
2658 bif->bif_sc = sc;
2659 if (mac_nat) {
2660 (void)bridge_mac_nat_enable(sc, bif);
2661 }
2662
2663 if (IFNET_IS_VMNET(ifs)) {
2664 allocate_vmnet_pf_tags();
2665 }
2666 /* Allow the first Ethernet member to define the MTU */
2667 if (TAILQ_EMPTY(&sc->sc_iflist)) {
2668 sc->sc_ifp->if_mtu = ifs->if_mtu;
2669 }
2670
2671 /*
2672 * Assign the interface's MAC address to the bridge if it's the first
2673 * member and the MAC address of the bridge has not been changed from
2674 * the default (randomly) generated one.
2675 */
2676 if (bridge_inherit_mac && TAILQ_EMPTY(&sc->sc_iflist) &&
2677 _ether_cmp(IF_LLADDR(sc->sc_ifp), sc->sc_defaddr) == 0) {
2678 bcopy(IF_LLADDR(ifs), eaddr, ETHER_ADDR_LEN);
2679 sc->sc_ifaddr = ifs;
2680 ifnet_reference(ifs); /* for sc_ifaddr */
2681 lladdr_changed = 1;
2682 }
2683
2684 ifs->if_bridge = sc;
2685 #if BRIDGESTP
2686 bstp_create(&sc->sc_stp, &bif->bif_stp, bif->bif_ifp);
2687 #endif /* BRIDGESTP */
2688
2689 #if HAS_IF_CAP
2690 /* Set interface capabilities to the intersection set of all members */
2691 bridge_mutecaps(sc);
2692 #endif /* HAS_IF_CAP */
2693
2694
2695 /*
2696 * Respect lock ordering with DLIL lock for the following operations
2697 */
2698 BRIDGE_UNLOCK(sc);
2699
2700 /* enable promiscuous mode */
2701 error = ifnet_set_promiscuous(ifs, 1);
2702 switch (error) {
2703 case 0:
2704 bif->bif_flags |= BIFF_PROMISC;
2705 break;
2706 case ENETDOWN:
2707 case EPWROFF:
2708 BRIDGE_LOG(LOG_NOTICE, 0,
2709 "ifnet_set_promiscuous(%s) failed %d, ignoring",
2710 ifs->if_xname, error);
2711 /* Ignore error when device is not up */
2712 error = 0;
2713 break;
2714 default:
2715 BRIDGE_LOG(LOG_NOTICE, 0,
2716 "ifnet_set_promiscuous(%s) failed %d",
2717 ifs->if_xname, error);
2718 BRIDGE_LOCK(sc);
2719 goto out;
2720 }
2721
2722 #if SKYWALK
2723 /* ensure that the flowswitch is present for native interface */
2724 if (SKYWALK_NATIVE(ifs)) {
2725 if (ifnet_attach_flowswitch_nexus(ifs)) {
2726 bif->bif_flags |= BIFF_FLOWSWITCH_ATTACHED;
2727 }
2728 }
2729 /* remove the netagent on the flowswitch (rdar://75050182) */
2730 if (ifnet_remove_netagent(ifs)) {
2731 bif->bif_flags |= BIFF_NETAGENT_REMOVED;
2732 }
2733 #endif /* SKYWALK */
2734
2735 /*
2736 * install an interface filter
2737 */
2738 memset(&iff, 0, sizeof(struct iff_filter));
2739 iff.iff_cookie = bif;
2740 iff.iff_name = "com.apple.kernel.bsd.net.if_bridge";
2741 iff.iff_input = bridge_iff_input;
2742 iff.iff_output = bridge_iff_output;
2743 iff.iff_event = bridge_iff_event;
2744 iff.iff_detached = bridge_iff_detached;
2745 error = dlil_attach_filter(ifs, &iff, &bif->bif_iff_ref,
2746 DLIL_IFF_TSO | DLIL_IFF_INTERNAL);
2747 if (error != 0) {
2748 BRIDGE_LOG(LOG_NOTICE, 0, "iflt_attach failed %d", error);
2749 BRIDGE_LOCK(sc);
2750 goto out;
2751 }
2752 bif->bif_flags |= BIFF_FILTER_ATTACHED;
2753
2754 /*
2755 * install a dummy "bridge" protocol
2756 */
2757 if ((error = bridge_attach_protocol(ifs)) != 0) {
2758 if (error != 0) {
2759 BRIDGE_LOG(LOG_NOTICE, 0,
2760 "bridge_attach_protocol failed %d", error);
2761 BRIDGE_LOCK(sc);
2762 goto out;
2763 }
2764 }
2765 bif->bif_flags |= BIFF_PROTO_ATTACHED;
2766
2767 if (lladdr_changed &&
2768 (error = ifnet_set_lladdr(bifp, eaddr, ETHER_ADDR_LEN)) != 0) {
2769 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr failed %d", error);
2770 }
2771
2772 /*
2773 * No failures past this point. Add the member to the list.
2774 */
2775 BRIDGE_LOCK(sc);
2776 bif->bif_flags |= BIFF_IN_MEMBER_LIST;
2777 BRIDGE_XLOCK(sc);
2778 TAILQ_INSERT_TAIL(&sc->sc_iflist, bif, bif_next);
2779 BRIDGE_XDROP(sc);
2780
2781 /* cache the member link status */
2782 if (interface_media_active(ifs)) {
2783 bif->bif_flags |= BIFF_MEDIA_ACTIVE;
2784 } else {
2785 bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
2786 }
2787
2788 /* the new member may change the link status of the bridge interface */
2789 event_code = bridge_updatelinkstatus(sc);
2790
2791 /* check whether we need input broadcast or not */
2792 input_broadcast = interface_needs_input_broadcast(ifs);
2793 bif_set_input_broadcast(bif, input_broadcast);
2794 BRIDGE_UNLOCK(sc);
2795
2796 if (event_code != 0) {
2797 bridge_link_event(bifp, event_code);
2798 }
2799 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
2800 "%s input broadcast %s", ifs->if_xname,
2801 input_broadcast ? "ENABLED" : "DISABLED");
2802
2803 BRIDGE_LOCK(sc);
2804 bridge_set_tso(sc);
2805
2806 out:
2807 /* allow the interface to detach */
2808 ifnet_decr_iorefcnt(ifs);
2809
2810 if (error != 0) {
2811 if (bif != NULL) {
2812 bridge_delete_member(sc, bif);
2813 }
2814 } else if (IFNET_IS_VMNET(ifs)) {
2815 INC_ATOMIC_INT64_LIM(net_api_stats.nas_vmnet_total);
2816 }
2817
2818 return error;
2819 }
2820
2821 static int
bridge_ioctl_del(struct bridge_softc * sc,void * arg)2822 bridge_ioctl_del(struct bridge_softc *sc, void *arg)
2823 {
2824 struct ifbreq *req = arg;
2825 struct bridge_iflist *bif;
2826
2827 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
2828 if (bif == NULL) {
2829 return ENOENT;
2830 }
2831
2832 bridge_delete_member(sc, bif);
2833
2834 return 0;
2835 }
2836
2837 static int
bridge_ioctl_purge(struct bridge_softc * sc,void * arg)2838 bridge_ioctl_purge(struct bridge_softc *sc, void *arg)
2839 {
2840 #pragma unused(sc, arg)
2841 return 0;
2842 }
2843
2844 static int
bridge_ioctl_gifflags(struct bridge_softc * sc,void * arg)2845 bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
2846 {
2847 struct ifbreq *req = arg;
2848 struct bridge_iflist *bif;
2849
2850 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
2851 if (bif == NULL) {
2852 return ENOENT;
2853 }
2854
2855 struct bstp_port *bp;
2856
2857 bp = &bif->bif_stp;
2858 req->ifbr_state = bp->bp_state;
2859 req->ifbr_priority = bp->bp_priority;
2860 req->ifbr_path_cost = bp->bp_path_cost;
2861 req->ifbr_proto = bp->bp_protover;
2862 req->ifbr_role = bp->bp_role;
2863 req->ifbr_stpflags = bp->bp_flags;
2864 req->ifbr_ifsflags = bif->bif_ifflags;
2865
2866 /* Copy STP state options as flags */
2867 if (bp->bp_operedge) {
2868 req->ifbr_ifsflags |= IFBIF_BSTP_EDGE;
2869 }
2870 if (bp->bp_flags & BSTP_PORT_AUTOEDGE) {
2871 req->ifbr_ifsflags |= IFBIF_BSTP_AUTOEDGE;
2872 }
2873 if (bp->bp_ptp_link) {
2874 req->ifbr_ifsflags |= IFBIF_BSTP_PTP;
2875 }
2876 if (bp->bp_flags & BSTP_PORT_AUTOPTP) {
2877 req->ifbr_ifsflags |= IFBIF_BSTP_AUTOPTP;
2878 }
2879 if (bp->bp_flags & BSTP_PORT_ADMEDGE) {
2880 req->ifbr_ifsflags |= IFBIF_BSTP_ADMEDGE;
2881 }
2882 if (bp->bp_flags & BSTP_PORT_ADMCOST) {
2883 req->ifbr_ifsflags |= IFBIF_BSTP_ADMCOST;
2884 }
2885
2886 req->ifbr_portno = bif->bif_ifp->if_index & 0xfff;
2887 req->ifbr_addrcnt = bif->bif_addrcnt;
2888 req->ifbr_addrmax = bif->bif_addrmax;
2889 req->ifbr_addrexceeded = bif->bif_addrexceeded;
2890
2891 return 0;
2892 }
2893
2894 static int
bridge_ioctl_sifflags(struct bridge_softc * sc,void * arg)2895 bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
2896 {
2897 struct ifbreq *req = arg;
2898 struct bridge_iflist *bif;
2899 #if BRIDGESTP
2900 struct bstp_port *bp;
2901 int error;
2902 #endif /* BRIDGESTP */
2903
2904 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
2905 if (bif == NULL) {
2906 return ENOENT;
2907 }
2908
2909 if (req->ifbr_ifsflags & IFBIF_SPAN) {
2910 /* SPAN is readonly */
2911 return EINVAL;
2912 }
2913 #define _EXCLUSIVE_FLAGS (IFBIF_CHECKSUM_OFFLOAD | IFBIF_MAC_NAT)
2914 if ((req->ifbr_ifsflags & _EXCLUSIVE_FLAGS) == _EXCLUSIVE_FLAGS) {
2915 /* can't specify both MAC-NAT and checksum offload */
2916 return EINVAL;
2917 }
2918 if ((req->ifbr_ifsflags & IFBIF_MAC_NAT) != 0) {
2919 errno_t error;
2920
2921 error = bridge_mac_nat_enable(sc, bif);
2922 if (error != 0) {
2923 return error;
2924 }
2925 } else if (sc->sc_mac_nat_bif == bif) {
2926 bridge_mac_nat_disable(sc);
2927 }
2928
2929
2930 #if BRIDGESTP
2931 if (req->ifbr_ifsflags & IFBIF_STP) {
2932 if ((bif->bif_ifflags & IFBIF_STP) == 0) {
2933 error = bstp_enable(&bif->bif_stp);
2934 if (error) {
2935 return error;
2936 }
2937 }
2938 } else {
2939 if ((bif->bif_ifflags & IFBIF_STP) != 0) {
2940 bstp_disable(&bif->bif_stp);
2941 }
2942 }
2943
2944 /* Pass on STP flags */
2945 bp = &bif->bif_stp;
2946 bstp_set_edge(bp, req->ifbr_ifsflags & IFBIF_BSTP_EDGE ? 1 : 0);
2947 bstp_set_autoedge(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOEDGE ? 1 : 0);
2948 bstp_set_ptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_PTP ? 1 : 0);
2949 bstp_set_autoptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOPTP ? 1 : 0);
2950 #else /* !BRIDGESTP */
2951 if (req->ifbr_ifsflags & IFBIF_STP) {
2952 return EOPNOTSUPP;
2953 }
2954 #endif /* !BRIDGESTP */
2955
2956 /* Save the bits relating to the bridge */
2957 bif->bif_ifflags = req->ifbr_ifsflags & IFBIFMASK;
2958
2959
2960 return 0;
2961 }
2962
2963 static int
bridge_ioctl_scache(struct bridge_softc * sc,void * arg)2964 bridge_ioctl_scache(struct bridge_softc *sc, void *arg)
2965 {
2966 struct ifbrparam *param = arg;
2967
2968 sc->sc_brtmax = param->ifbrp_csize;
2969 bridge_rttrim(sc);
2970 return 0;
2971 }
2972
2973 static int
bridge_ioctl_gcache(struct bridge_softc * sc,void * arg)2974 bridge_ioctl_gcache(struct bridge_softc *sc, void *arg)
2975 {
2976 struct ifbrparam *param = arg;
2977
2978 param->ifbrp_csize = sc->sc_brtmax;
2979
2980 return 0;
2981 }
2982
2983 #define BRIDGE_IOCTL_GIFS do { \
2984 struct bridge_iflist *bif; \
2985 struct ifbreq breq; \
2986 char *buf, *outbuf; \
2987 unsigned int count, buflen, len; \
2988 \
2989 count = 0; \
2990 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) \
2991 count++; \
2992 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) \
2993 count++; \
2994 \
2995 buflen = sizeof (breq) * count; \
2996 if (bifc->ifbic_len == 0) { \
2997 bifc->ifbic_len = buflen; \
2998 return (0); \
2999 } \
3000 BRIDGE_UNLOCK(sc); \
3001 outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO); \
3002 BRIDGE_LOCK(sc); \
3003 \
3004 count = 0; \
3005 buf = outbuf; \
3006 len = min(bifc->ifbic_len, buflen); \
3007 bzero(&breq, sizeof (breq)); \
3008 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { \
3009 if (len < sizeof (breq)) \
3010 break; \
3011 \
3012 snprintf(breq.ifbr_ifsname, sizeof (breq.ifbr_ifsname), \
3013 "%s", bif->bif_ifp->if_xname); \
3014 /* Fill in the ifbreq structure */ \
3015 error = bridge_ioctl_gifflags(sc, &breq); \
3016 if (error) \
3017 break; \
3018 memcpy(buf, &breq, sizeof (breq)); \
3019 count++; \
3020 buf += sizeof (breq); \
3021 len -= sizeof (breq); \
3022 } \
3023 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) { \
3024 if (len < sizeof (breq)) \
3025 break; \
3026 \
3027 snprintf(breq.ifbr_ifsname, \
3028 sizeof (breq.ifbr_ifsname), \
3029 "%s", bif->bif_ifp->if_xname); \
3030 breq.ifbr_ifsflags = bif->bif_ifflags; \
3031 breq.ifbr_portno \
3032 = bif->bif_ifp->if_index & 0xfff; \
3033 memcpy(buf, &breq, sizeof (breq)); \
3034 count++; \
3035 buf += sizeof (breq); \
3036 len -= sizeof (breq); \
3037 } \
3038 \
3039 BRIDGE_UNLOCK(sc); \
3040 bifc->ifbic_len = sizeof (breq) * count; \
3041 error = copyout(outbuf, bifc->ifbic_req, bifc->ifbic_len); \
3042 BRIDGE_LOCK(sc); \
3043 kfree_data(outbuf, buflen); \
3044 } while (0)
3045
3046 static int
bridge_ioctl_gifs64(struct bridge_softc * sc,void * arg)3047 bridge_ioctl_gifs64(struct bridge_softc *sc, void *arg)
3048 {
3049 struct ifbifconf64 *bifc = arg;
3050 int error = 0;
3051
3052 BRIDGE_IOCTL_GIFS;
3053
3054 return error;
3055 }
3056
3057 static int
bridge_ioctl_gifs32(struct bridge_softc * sc,void * arg)3058 bridge_ioctl_gifs32(struct bridge_softc *sc, void *arg)
3059 {
3060 struct ifbifconf32 *bifc = arg;
3061 int error = 0;
3062
3063 BRIDGE_IOCTL_GIFS;
3064
3065 return error;
3066 }
3067
3068 #define BRIDGE_IOCTL_RTS do { \
3069 struct bridge_rtnode *brt; \
3070 char *buf; \
3071 char *outbuf = NULL; \
3072 unsigned int count, buflen, len; \
3073 unsigned long now; \
3074 \
3075 if (bac->ifbac_len == 0) \
3076 return (0); \
3077 \
3078 bzero(&bareq, sizeof (bareq)); \
3079 count = 0; \
3080 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) \
3081 count++; \
3082 buflen = sizeof (bareq) * count; \
3083 \
3084 BRIDGE_UNLOCK(sc); \
3085 outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO); \
3086 BRIDGE_LOCK(sc); \
3087 \
3088 count = 0; \
3089 buf = outbuf; \
3090 len = min(bac->ifbac_len, buflen); \
3091 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) { \
3092 if (len < sizeof (bareq)) \
3093 goto out; \
3094 snprintf(bareq.ifba_ifsname, sizeof (bareq.ifba_ifsname), \
3095 "%s", brt->brt_ifp->if_xname); \
3096 memcpy(bareq.ifba_dst, brt->brt_addr, sizeof (brt->brt_addr)); \
3097 bareq.ifba_vlan = brt->brt_vlan; \
3098 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) { \
3099 now = (unsigned long) net_uptime(); \
3100 if (now < brt->brt_expire) \
3101 bareq.ifba_expire = \
3102 brt->brt_expire - now; \
3103 } else \
3104 bareq.ifba_expire = 0; \
3105 bareq.ifba_flags = brt->brt_flags; \
3106 \
3107 memcpy(buf, &bareq, sizeof (bareq)); \
3108 count++; \
3109 buf += sizeof (bareq); \
3110 len -= sizeof (bareq); \
3111 } \
3112 out: \
3113 bac->ifbac_len = sizeof (bareq) * count; \
3114 if (outbuf != NULL) { \
3115 BRIDGE_UNLOCK(sc); \
3116 error = copyout(outbuf, bac->ifbac_req, bac->ifbac_len); \
3117 kfree_data(outbuf, buflen); \
3118 BRIDGE_LOCK(sc); \
3119 } \
3120 return (error); \
3121 } while (0)
3122
3123 static int
bridge_ioctl_rts64(struct bridge_softc * sc,void * arg)3124 bridge_ioctl_rts64(struct bridge_softc *sc, void *arg)
3125 {
3126 struct ifbaconf64 *bac = arg;
3127 struct ifbareq64 bareq;
3128 int error = 0;
3129
3130 BRIDGE_IOCTL_RTS;
3131 return error;
3132 }
3133
3134 static int
bridge_ioctl_rts32(struct bridge_softc * sc,void * arg)3135 bridge_ioctl_rts32(struct bridge_softc *sc, void *arg)
3136 {
3137 struct ifbaconf32 *bac = arg;
3138 struct ifbareq32 bareq;
3139 int error = 0;
3140
3141 BRIDGE_IOCTL_RTS;
3142 return error;
3143 }
3144
3145 static int
bridge_ioctl_saddr32(struct bridge_softc * sc,void * arg)3146 bridge_ioctl_saddr32(struct bridge_softc *sc, void *arg)
3147 {
3148 struct ifbareq32 *req = arg;
3149 struct bridge_iflist *bif;
3150 int error;
3151
3152 bif = bridge_lookup_member(sc, req->ifba_ifsname);
3153 if (bif == NULL) {
3154 return ENOENT;
3155 }
3156
3157 error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
3158 req->ifba_flags);
3159
3160 return error;
3161 }
3162
3163 static int
bridge_ioctl_saddr64(struct bridge_softc * sc,void * arg)3164 bridge_ioctl_saddr64(struct bridge_softc *sc, void *arg)
3165 {
3166 struct ifbareq64 *req = arg;
3167 struct bridge_iflist *bif;
3168 int error;
3169
3170 bif = bridge_lookup_member(sc, req->ifba_ifsname);
3171 if (bif == NULL) {
3172 return ENOENT;
3173 }
3174
3175 error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
3176 req->ifba_flags);
3177
3178 return error;
3179 }
3180
3181 static int
bridge_ioctl_sto(struct bridge_softc * sc,void * arg)3182 bridge_ioctl_sto(struct bridge_softc *sc, void *arg)
3183 {
3184 struct ifbrparam *param = arg;
3185
3186 sc->sc_brttimeout = param->ifbrp_ctime;
3187 return 0;
3188 }
3189
3190 static int
bridge_ioctl_gto(struct bridge_softc * sc,void * arg)3191 bridge_ioctl_gto(struct bridge_softc *sc, void *arg)
3192 {
3193 struct ifbrparam *param = arg;
3194
3195 param->ifbrp_ctime = sc->sc_brttimeout;
3196 return 0;
3197 }
3198
3199 static int
bridge_ioctl_daddr32(struct bridge_softc * sc,void * arg)3200 bridge_ioctl_daddr32(struct bridge_softc *sc, void *arg)
3201 {
3202 struct ifbareq32 *req = arg;
3203
3204 return bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan);
3205 }
3206
3207 static int
bridge_ioctl_daddr64(struct bridge_softc * sc,void * arg)3208 bridge_ioctl_daddr64(struct bridge_softc *sc, void *arg)
3209 {
3210 struct ifbareq64 *req = arg;
3211
3212 return bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan);
3213 }
3214
3215 static int
bridge_ioctl_flush(struct bridge_softc * sc,void * arg)3216 bridge_ioctl_flush(struct bridge_softc *sc, void *arg)
3217 {
3218 struct ifbreq *req = arg;
3219
3220 bridge_rtflush(sc, req->ifbr_ifsflags);
3221 return 0;
3222 }
3223
3224 static int
bridge_ioctl_gpri(struct bridge_softc * sc,void * arg)3225 bridge_ioctl_gpri(struct bridge_softc *sc, void *arg)
3226 {
3227 struct ifbrparam *param = arg;
3228 struct bstp_state *bs = &sc->sc_stp;
3229
3230 param->ifbrp_prio = bs->bs_bridge_priority;
3231 return 0;
3232 }
3233
3234 static int
bridge_ioctl_spri(struct bridge_softc * sc,void * arg)3235 bridge_ioctl_spri(struct bridge_softc *sc, void *arg)
3236 {
3237 #if BRIDGESTP
3238 struct ifbrparam *param = arg;
3239
3240 return bstp_set_priority(&sc->sc_stp, param->ifbrp_prio);
3241 #else /* !BRIDGESTP */
3242 #pragma unused(sc, arg)
3243 return EOPNOTSUPP;
3244 #endif /* !BRIDGESTP */
3245 }
3246
3247 static int
bridge_ioctl_ght(struct bridge_softc * sc,void * arg)3248 bridge_ioctl_ght(struct bridge_softc *sc, void *arg)
3249 {
3250 struct ifbrparam *param = arg;
3251 struct bstp_state *bs = &sc->sc_stp;
3252
3253 param->ifbrp_hellotime = bs->bs_bridge_htime >> 8;
3254 return 0;
3255 }
3256
3257 static int
bridge_ioctl_sht(struct bridge_softc * sc,void * arg)3258 bridge_ioctl_sht(struct bridge_softc *sc, void *arg)
3259 {
3260 #if BRIDGESTP
3261 struct ifbrparam *param = arg;
3262
3263 return bstp_set_htime(&sc->sc_stp, param->ifbrp_hellotime);
3264 #else /* !BRIDGESTP */
3265 #pragma unused(sc, arg)
3266 return EOPNOTSUPP;
3267 #endif /* !BRIDGESTP */
3268 }
3269
3270 static int
bridge_ioctl_gfd(struct bridge_softc * sc,void * arg)3271 bridge_ioctl_gfd(struct bridge_softc *sc, void *arg)
3272 {
3273 struct ifbrparam *param;
3274 struct bstp_state *bs;
3275
3276 param = arg;
3277 bs = &sc->sc_stp;
3278 param->ifbrp_fwddelay = bs->bs_bridge_fdelay >> 8;
3279 return 0;
3280 }
3281
3282 static int
bridge_ioctl_sfd(struct bridge_softc * sc,void * arg)3283 bridge_ioctl_sfd(struct bridge_softc *sc, void *arg)
3284 {
3285 #if BRIDGESTP
3286 struct ifbrparam *param = arg;
3287
3288 return bstp_set_fdelay(&sc->sc_stp, param->ifbrp_fwddelay);
3289 #else /* !BRIDGESTP */
3290 #pragma unused(sc, arg)
3291 return EOPNOTSUPP;
3292 #endif /* !BRIDGESTP */
3293 }
3294
3295 static int
bridge_ioctl_gma(struct bridge_softc * sc,void * arg)3296 bridge_ioctl_gma(struct bridge_softc *sc, void *arg)
3297 {
3298 struct ifbrparam *param;
3299 struct bstp_state *bs;
3300
3301 param = arg;
3302 bs = &sc->sc_stp;
3303 param->ifbrp_maxage = bs->bs_bridge_max_age >> 8;
3304 return 0;
3305 }
3306
3307 static int
bridge_ioctl_sma(struct bridge_softc * sc,void * arg)3308 bridge_ioctl_sma(struct bridge_softc *sc, void *arg)
3309 {
3310 #if BRIDGESTP
3311 struct ifbrparam *param = arg;
3312
3313 return bstp_set_maxage(&sc->sc_stp, param->ifbrp_maxage);
3314 #else /* !BRIDGESTP */
3315 #pragma unused(sc, arg)
3316 return EOPNOTSUPP;
3317 #endif /* !BRIDGESTP */
3318 }
3319
3320 static int
bridge_ioctl_sifprio(struct bridge_softc * sc,void * arg)3321 bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg)
3322 {
3323 #if BRIDGESTP
3324 struct ifbreq *req = arg;
3325 struct bridge_iflist *bif;
3326
3327 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3328 if (bif == NULL) {
3329 return ENOENT;
3330 }
3331
3332 return bstp_set_port_priority(&bif->bif_stp, req->ifbr_priority);
3333 #else /* !BRIDGESTP */
3334 #pragma unused(sc, arg)
3335 return EOPNOTSUPP;
3336 #endif /* !BRIDGESTP */
3337 }
3338
3339 static int
bridge_ioctl_sifcost(struct bridge_softc * sc,void * arg)3340 bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg)
3341 {
3342 #if BRIDGESTP
3343 struct ifbreq *req = arg;
3344 struct bridge_iflist *bif;
3345
3346 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3347 if (bif == NULL) {
3348 return ENOENT;
3349 }
3350
3351 return bstp_set_path_cost(&bif->bif_stp, req->ifbr_path_cost);
3352 #else /* !BRIDGESTP */
3353 #pragma unused(sc, arg)
3354 return EOPNOTSUPP;
3355 #endif /* !BRIDGESTP */
3356 }
3357
3358 static int
bridge_ioctl_gfilt(struct bridge_softc * sc,void * arg)3359 bridge_ioctl_gfilt(struct bridge_softc *sc, void *arg)
3360 {
3361 struct ifbrparam *param = arg;
3362
3363 param->ifbrp_filter = sc->sc_filter_flags;
3364
3365 return 0;
3366 }
3367
3368 static int
bridge_ioctl_sfilt(struct bridge_softc * sc,void * arg)3369 bridge_ioctl_sfilt(struct bridge_softc *sc, void *arg)
3370 {
3371 struct ifbrparam *param = arg;
3372
3373 if (param->ifbrp_filter & ~IFBF_FILT_MASK) {
3374 return EINVAL;
3375 }
3376
3377 if (param->ifbrp_filter & IFBF_FILT_USEIPF) {
3378 return EINVAL;
3379 }
3380
3381 sc->sc_filter_flags = param->ifbrp_filter;
3382
3383 return 0;
3384 }
3385
3386 static int
bridge_ioctl_sifmaxaddr(struct bridge_softc * sc,void * arg)3387 bridge_ioctl_sifmaxaddr(struct bridge_softc *sc, void *arg)
3388 {
3389 struct ifbreq *req = arg;
3390 struct bridge_iflist *bif;
3391
3392 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3393 if (bif == NULL) {
3394 return ENOENT;
3395 }
3396
3397 bif->bif_addrmax = req->ifbr_addrmax;
3398 return 0;
3399 }
3400
3401 static int
bridge_ioctl_addspan(struct bridge_softc * sc,void * arg)3402 bridge_ioctl_addspan(struct bridge_softc *sc, void *arg)
3403 {
3404 struct ifbreq *req = arg;
3405 struct bridge_iflist *bif = NULL;
3406 struct ifnet *ifs;
3407
3408 ifs = ifunit(req->ifbr_ifsname);
3409 if (ifs == NULL) {
3410 return ENOENT;
3411 }
3412
3413 if (IFNET_IS_INTCOPROC(ifs)) {
3414 return EINVAL;
3415 }
3416
3417 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
3418 if (ifs == bif->bif_ifp) {
3419 return EBUSY;
3420 }
3421
3422 if (ifs->if_bridge != NULL) {
3423 return EBUSY;
3424 }
3425
3426 switch (ifs->if_type) {
3427 case IFT_ETHER:
3428 case IFT_L2VLAN:
3429 case IFT_IEEE8023ADLAG:
3430 break;
3431 case IFT_GIF:
3432 /* currently not supported */
3433 /* FALLTHRU */
3434 default:
3435 return EINVAL;
3436 }
3437
3438 bif = kalloc_type(struct bridge_iflist, Z_WAITOK | Z_ZERO | Z_NOFAIL);
3439
3440 bif->bif_ifp = ifs;
3441 bif->bif_ifflags = IFBIF_SPAN;
3442
3443 ifnet_reference(bif->bif_ifp);
3444
3445 TAILQ_INSERT_HEAD(&sc->sc_spanlist, bif, bif_next);
3446
3447 return 0;
3448 }
3449
3450 static int
bridge_ioctl_delspan(struct bridge_softc * sc,void * arg)3451 bridge_ioctl_delspan(struct bridge_softc *sc, void *arg)
3452 {
3453 struct ifbreq *req = arg;
3454 struct bridge_iflist *bif;
3455 struct ifnet *ifs;
3456
3457 ifs = ifunit(req->ifbr_ifsname);
3458 if (ifs == NULL) {
3459 return ENOENT;
3460 }
3461
3462 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
3463 if (ifs == bif->bif_ifp) {
3464 break;
3465 }
3466
3467 if (bif == NULL) {
3468 return ENOENT;
3469 }
3470
3471 bridge_delete_span(sc, bif);
3472
3473 return 0;
3474 }
3475
3476 #define BRIDGE_IOCTL_GBPARAM do { \
3477 struct bstp_state *bs = &sc->sc_stp; \
3478 struct bstp_port *root_port; \
3479 \
3480 req->ifbop_maxage = bs->bs_bridge_max_age >> 8; \
3481 req->ifbop_hellotime = bs->bs_bridge_htime >> 8; \
3482 req->ifbop_fwddelay = bs->bs_bridge_fdelay >> 8; \
3483 \
3484 root_port = bs->bs_root_port; \
3485 if (root_port == NULL) \
3486 req->ifbop_root_port = 0; \
3487 else \
3488 req->ifbop_root_port = root_port->bp_ifp->if_index; \
3489 \
3490 req->ifbop_holdcount = bs->bs_txholdcount; \
3491 req->ifbop_priority = bs->bs_bridge_priority; \
3492 req->ifbop_protocol = bs->bs_protover; \
3493 req->ifbop_root_path_cost = bs->bs_root_pv.pv_cost; \
3494 req->ifbop_bridgeid = bs->bs_bridge_pv.pv_dbridge_id; \
3495 req->ifbop_designated_root = bs->bs_root_pv.pv_root_id; \
3496 req->ifbop_designated_bridge = bs->bs_root_pv.pv_dbridge_id; \
3497 req->ifbop_last_tc_time.tv_sec = bs->bs_last_tc_time.tv_sec; \
3498 req->ifbop_last_tc_time.tv_usec = bs->bs_last_tc_time.tv_usec; \
3499 } while (0)
3500
3501 static int
bridge_ioctl_gbparam32(struct bridge_softc * sc,void * arg)3502 bridge_ioctl_gbparam32(struct bridge_softc *sc, void *arg)
3503 {
3504 struct ifbropreq32 *req = arg;
3505
3506 BRIDGE_IOCTL_GBPARAM;
3507 return 0;
3508 }
3509
3510 static int
bridge_ioctl_gbparam64(struct bridge_softc * sc,void * arg)3511 bridge_ioctl_gbparam64(struct bridge_softc *sc, void *arg)
3512 {
3513 struct ifbropreq64 *req = arg;
3514
3515 BRIDGE_IOCTL_GBPARAM;
3516 return 0;
3517 }
3518
3519 static int
bridge_ioctl_grte(struct bridge_softc * sc,void * arg)3520 bridge_ioctl_grte(struct bridge_softc *sc, void *arg)
3521 {
3522 struct ifbrparam *param = arg;
3523
3524 param->ifbrp_cexceeded = sc->sc_brtexceeded;
3525 return 0;
3526 }
3527
3528 #define BRIDGE_IOCTL_GIFSSTP do { \
3529 struct bridge_iflist *bif; \
3530 struct bstp_port *bp; \
3531 struct ifbpstpreq bpreq; \
3532 char *buf, *outbuf; \
3533 unsigned int count, buflen, len; \
3534 \
3535 count = 0; \
3536 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { \
3537 if ((bif->bif_ifflags & IFBIF_STP) != 0) \
3538 count++; \
3539 } \
3540 \
3541 buflen = sizeof (bpreq) * count; \
3542 if (bifstp->ifbpstp_len == 0) { \
3543 bifstp->ifbpstp_len = buflen; \
3544 return (0); \
3545 } \
3546 \
3547 BRIDGE_UNLOCK(sc); \
3548 outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO); \
3549 BRIDGE_LOCK(sc); \
3550 \
3551 count = 0; \
3552 buf = outbuf; \
3553 len = min(bifstp->ifbpstp_len, buflen); \
3554 bzero(&bpreq, sizeof (bpreq)); \
3555 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { \
3556 if (len < sizeof (bpreq)) \
3557 break; \
3558 \
3559 if ((bif->bif_ifflags & IFBIF_STP) == 0) \
3560 continue; \
3561 \
3562 bp = &bif->bif_stp; \
3563 bpreq.ifbp_portno = bif->bif_ifp->if_index & 0xfff; \
3564 bpreq.ifbp_fwd_trans = bp->bp_forward_transitions; \
3565 bpreq.ifbp_design_cost = bp->bp_desg_pv.pv_cost; \
3566 bpreq.ifbp_design_port = bp->bp_desg_pv.pv_port_id; \
3567 bpreq.ifbp_design_bridge = bp->bp_desg_pv.pv_dbridge_id; \
3568 bpreq.ifbp_design_root = bp->bp_desg_pv.pv_root_id; \
3569 \
3570 memcpy(buf, &bpreq, sizeof (bpreq)); \
3571 count++; \
3572 buf += sizeof (bpreq); \
3573 len -= sizeof (bpreq); \
3574 } \
3575 \
3576 BRIDGE_UNLOCK(sc); \
3577 bifstp->ifbpstp_len = sizeof (bpreq) * count; \
3578 error = copyout(outbuf, bifstp->ifbpstp_req, bifstp->ifbpstp_len); \
3579 BRIDGE_LOCK(sc); \
3580 kfree_data(outbuf, buflen); \
3581 return (error); \
3582 } while (0)
3583
3584 static int
bridge_ioctl_gifsstp32(struct bridge_softc * sc,void * arg)3585 bridge_ioctl_gifsstp32(struct bridge_softc *sc, void *arg)
3586 {
3587 struct ifbpstpconf32 *bifstp = arg;
3588 int error = 0;
3589
3590 BRIDGE_IOCTL_GIFSSTP;
3591 return error;
3592 }
3593
3594 static int
bridge_ioctl_gifsstp64(struct bridge_softc * sc,void * arg)3595 bridge_ioctl_gifsstp64(struct bridge_softc *sc, void *arg)
3596 {
3597 struct ifbpstpconf64 *bifstp = arg;
3598 int error = 0;
3599
3600 BRIDGE_IOCTL_GIFSSTP;
3601 return error;
3602 }
3603
3604 static int
bridge_ioctl_sproto(struct bridge_softc * sc,void * arg)3605 bridge_ioctl_sproto(struct bridge_softc *sc, void *arg)
3606 {
3607 #if BRIDGESTP
3608 struct ifbrparam *param = arg;
3609
3610 return bstp_set_protocol(&sc->sc_stp, param->ifbrp_proto);
3611 #else /* !BRIDGESTP */
3612 #pragma unused(sc, arg)
3613 return EOPNOTSUPP;
3614 #endif /* !BRIDGESTP */
3615 }
3616
3617 static int
bridge_ioctl_stxhc(struct bridge_softc * sc,void * arg)3618 bridge_ioctl_stxhc(struct bridge_softc *sc, void *arg)
3619 {
3620 #if BRIDGESTP
3621 struct ifbrparam *param = arg;
3622
3623 return bstp_set_holdcount(&sc->sc_stp, param->ifbrp_txhc);
3624 #else /* !BRIDGESTP */
3625 #pragma unused(sc, arg)
3626 return EOPNOTSUPP;
3627 #endif /* !BRIDGESTP */
3628 }
3629
3630
3631 static int
bridge_ioctl_ghostfilter(struct bridge_softc * sc,void * arg)3632 bridge_ioctl_ghostfilter(struct bridge_softc *sc, void *arg)
3633 {
3634 struct ifbrhostfilter *req = arg;
3635 struct bridge_iflist *bif;
3636
3637 bif = bridge_lookup_member(sc, req->ifbrhf_ifsname);
3638 if (bif == NULL) {
3639 return ENOENT;
3640 }
3641
3642 bzero(req, sizeof(struct ifbrhostfilter));
3643 if (bif->bif_flags & BIFF_HOST_FILTER) {
3644 req->ifbrhf_flags |= IFBRHF_ENABLED;
3645 bcopy(bif->bif_hf_hwsrc, req->ifbrhf_hwsrca,
3646 ETHER_ADDR_LEN);
3647 req->ifbrhf_ipsrc = bif->bif_hf_ipsrc.s_addr;
3648 }
3649 return 0;
3650 }
3651
3652 static int
bridge_ioctl_shostfilter(struct bridge_softc * sc,void * arg)3653 bridge_ioctl_shostfilter(struct bridge_softc *sc, void *arg)
3654 {
3655 struct ifbrhostfilter *req = arg;
3656 struct bridge_iflist *bif;
3657
3658 bif = bridge_lookup_member(sc, req->ifbrhf_ifsname);
3659 if (bif == NULL) {
3660 return ENOENT;
3661 }
3662
3663 if (req->ifbrhf_flags & IFBRHF_ENABLED) {
3664 bif->bif_flags |= BIFF_HOST_FILTER;
3665
3666 if (req->ifbrhf_flags & IFBRHF_HWSRC) {
3667 bcopy(req->ifbrhf_hwsrca, bif->bif_hf_hwsrc,
3668 ETHER_ADDR_LEN);
3669 if (bcmp(req->ifbrhf_hwsrca, ethernulladdr,
3670 ETHER_ADDR_LEN) != 0) {
3671 bif->bif_flags |= BIFF_HF_HWSRC;
3672 } else {
3673 bif->bif_flags &= ~BIFF_HF_HWSRC;
3674 }
3675 }
3676 if (req->ifbrhf_flags & IFBRHF_IPSRC) {
3677 bif->bif_hf_ipsrc.s_addr = req->ifbrhf_ipsrc;
3678 if (bif->bif_hf_ipsrc.s_addr != INADDR_ANY) {
3679 bif->bif_flags |= BIFF_HF_IPSRC;
3680 } else {
3681 bif->bif_flags &= ~BIFF_HF_IPSRC;
3682 }
3683 }
3684 } else {
3685 bif->bif_flags &= ~(BIFF_HOST_FILTER | BIFF_HF_HWSRC |
3686 BIFF_HF_IPSRC);
3687 bzero(bif->bif_hf_hwsrc, ETHER_ADDR_LEN);
3688 bif->bif_hf_ipsrc.s_addr = INADDR_ANY;
3689 }
3690
3691 return 0;
3692 }
3693
3694 static char *
bridge_mac_nat_entry_out(struct mac_nat_entry_list * list,unsigned int * count_p,char * buf,unsigned int * len_p)3695 bridge_mac_nat_entry_out(struct mac_nat_entry_list * list,
3696 unsigned int * count_p, char *buf, unsigned int *len_p)
3697 {
3698 unsigned int count = *count_p;
3699 struct ifbrmne ifbmne;
3700 unsigned int len = *len_p;
3701 struct mac_nat_entry *mne;
3702 unsigned long now;
3703
3704 bzero(&ifbmne, sizeof(ifbmne));
3705 LIST_FOREACH(mne, list, mne_list) {
3706 if (len < sizeof(ifbmne)) {
3707 break;
3708 }
3709 snprintf(ifbmne.ifbmne_ifname, sizeof(ifbmne.ifbmne_ifname),
3710 "%s", mne->mne_bif->bif_ifp->if_xname);
3711 memcpy(ifbmne.ifbmne_mac, mne->mne_mac,
3712 sizeof(ifbmne.ifbmne_mac));
3713 now = (unsigned long) net_uptime();
3714 if (now < mne->mne_expire) {
3715 ifbmne.ifbmne_expire = mne->mne_expire - now;
3716 } else {
3717 ifbmne.ifbmne_expire = 0;
3718 }
3719 if ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) {
3720 ifbmne.ifbmne_af = AF_INET6;
3721 ifbmne.ifbmne_ip6_addr = mne->mne_ip6;
3722 } else {
3723 ifbmne.ifbmne_af = AF_INET;
3724 ifbmne.ifbmne_ip_addr = mne->mne_ip;
3725 }
3726 memcpy(buf, &ifbmne, sizeof(ifbmne));
3727 count++;
3728 buf += sizeof(ifbmne);
3729 len -= sizeof(ifbmne);
3730 }
3731 *count_p = count;
3732 *len_p = len;
3733 return buf;
3734 }
3735
3736 /*
3737 * bridge_ioctl_gmnelist()
3738 * Perform the get mac_nat_entry list ioctl.
3739 *
3740 * Note:
3741 * The struct ifbrmnelist32 and struct ifbrmnelist64 have the same
3742 * field size/layout except for the last field ifbml_buf, the user-supplied
3743 * buffer pointer. That is passed in separately via the 'user_addr'
3744 * parameter from the respective 32-bit or 64-bit ioctl routine.
3745 */
3746 static int
bridge_ioctl_gmnelist(struct bridge_softc * sc,struct ifbrmnelist32 * mnl,user_addr_t user_addr)3747 bridge_ioctl_gmnelist(struct bridge_softc *sc, struct ifbrmnelist32 *mnl,
3748 user_addr_t user_addr)
3749 {
3750 unsigned int count;
3751 char *buf;
3752 int error = 0;
3753 char *outbuf = NULL;
3754 struct mac_nat_entry *mne;
3755 unsigned int buflen;
3756 unsigned int len;
3757
3758 mnl->ifbml_elsize = sizeof(struct ifbrmne);
3759 count = 0;
3760 LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
3761 count++;
3762 }
3763 LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
3764 count++;
3765 }
3766 buflen = sizeof(struct ifbrmne) * count;
3767 if (buflen == 0 || mnl->ifbml_len == 0) {
3768 mnl->ifbml_len = buflen;
3769 return error;
3770 }
3771 BRIDGE_UNLOCK(sc);
3772 outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO);
3773 BRIDGE_LOCK(sc);
3774 count = 0;
3775 buf = outbuf;
3776 len = min(mnl->ifbml_len, buflen);
3777 buf = bridge_mac_nat_entry_out(&sc->sc_mne_list, &count, buf, &len);
3778 buf = bridge_mac_nat_entry_out(&sc->sc_mne_list_v6, &count, buf, &len);
3779 mnl->ifbml_len = count * sizeof(struct ifbrmne);
3780 BRIDGE_UNLOCK(sc);
3781 error = copyout(outbuf, user_addr, mnl->ifbml_len);
3782 kfree_data(outbuf, buflen);
3783 BRIDGE_LOCK(sc);
3784 return error;
3785 }
3786
3787 static int
bridge_ioctl_gmnelist64(struct bridge_softc * sc,void * arg)3788 bridge_ioctl_gmnelist64(struct bridge_softc *sc, void *arg)
3789 {
3790 struct ifbrmnelist64 *mnl = arg;
3791
3792 return bridge_ioctl_gmnelist(sc, arg, mnl->ifbml_buf);
3793 }
3794
3795 static int
bridge_ioctl_gmnelist32(struct bridge_softc * sc,void * arg)3796 bridge_ioctl_gmnelist32(struct bridge_softc *sc, void *arg)
3797 {
3798 struct ifbrmnelist32 *mnl = arg;
3799
3800 return bridge_ioctl_gmnelist(sc, arg,
3801 CAST_USER_ADDR_T(mnl->ifbml_buf));
3802 }
3803
3804 /*
3805 * bridge_ioctl_gifstats()
3806 * Return per-member stats.
3807 *
3808 * Note:
3809 * The ifbrmreq32 and ifbrmreq64 structures have the same
3810 * field size/layout except for the last field brmr_buf, the user-supplied
3811 * buffer pointer. That is passed in separately via the 'user_addr'
3812 * parameter from the respective 32-bit or 64-bit ioctl routine.
3813 */
3814 static int
bridge_ioctl_gifstats(struct bridge_softc * sc,struct ifbrmreq32 * mreq,user_addr_t user_addr)3815 bridge_ioctl_gifstats(struct bridge_softc *sc, struct ifbrmreq32 *mreq,
3816 user_addr_t user_addr)
3817 {
3818 struct bridge_iflist *bif;
3819 int error = 0;
3820 unsigned int buflen;
3821
3822 bif = bridge_lookup_member(sc, mreq->brmr_ifname);
3823 if (bif == NULL) {
3824 error = ENOENT;
3825 goto done;
3826 }
3827
3828 buflen = mreq->brmr_elsize = sizeof(struct ifbrmstats);
3829 if (buflen == 0 || mreq->brmr_len == 0) {
3830 mreq->brmr_len = buflen;
3831 goto done;
3832 }
3833 if (mreq->brmr_len != 0 && mreq->brmr_len < buflen) {
3834 error = ENOBUFS;
3835 goto done;
3836 }
3837 mreq->brmr_len = buflen;
3838 error = copyout(&bif->bif_stats, user_addr, buflen);
3839 done:
3840 return error;
3841 }
3842
3843 static int
bridge_ioctl_gifstats32(struct bridge_softc * sc,void * arg)3844 bridge_ioctl_gifstats32(struct bridge_softc *sc, void *arg)
3845 {
3846 struct ifbrmreq32 *mreq = arg;
3847
3848 return bridge_ioctl_gifstats(sc, arg, mreq->brmr_buf);
3849 }
3850
3851 static int
bridge_ioctl_gifstats64(struct bridge_softc * sc,void * arg)3852 bridge_ioctl_gifstats64(struct bridge_softc *sc, void *arg)
3853 {
3854 struct ifbrmreq64 *mreq = arg;
3855
3856 return bridge_ioctl_gifstats(sc, arg, mreq->brmr_buf);
3857 }
3858
3859 /*
3860 * bridge_proto_attach_changed
3861 *
3862 * Called when protocol attachment on the interface changes.
3863 */
3864 static void
bridge_proto_attach_changed(struct ifnet * ifp)3865 bridge_proto_attach_changed(struct ifnet *ifp)
3866 {
3867 boolean_t changed = FALSE;
3868 struct bridge_iflist *bif;
3869 boolean_t input_broadcast;
3870 struct bridge_softc *sc = ifp->if_bridge;
3871
3872 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
3873 if (sc == NULL) {
3874 return;
3875 }
3876 input_broadcast = interface_needs_input_broadcast(ifp);
3877 BRIDGE_LOCK(sc);
3878 bif = bridge_lookup_member_if(sc, ifp);
3879 if (bif != NULL) {
3880 changed = bif_set_input_broadcast(bif, input_broadcast);
3881 }
3882 BRIDGE_UNLOCK(sc);
3883 if (changed) {
3884 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
3885 "%s input broadcast %s", ifp->if_xname,
3886 input_broadcast ? "ENABLED" : "DISABLED");
3887 }
3888 return;
3889 }
3890
3891 /*
3892 * interface_media_active:
3893 *
3894 * Tells if an interface media is active.
3895 */
3896 static int
interface_media_active(struct ifnet * ifp)3897 interface_media_active(struct ifnet *ifp)
3898 {
3899 struct ifmediareq ifmr;
3900 int status = 0;
3901
3902 bzero(&ifmr, sizeof(ifmr));
3903 if (ifnet_ioctl(ifp, 0, SIOCGIFMEDIA, &ifmr) == 0) {
3904 if ((ifmr.ifm_status & IFM_AVALID) && ifmr.ifm_count > 0) {
3905 status = ifmr.ifm_status & IFM_ACTIVE ? 1 : 0;
3906 }
3907 }
3908
3909 return status;
3910 }
3911
3912 /*
3913 * bridge_updatelinkstatus:
3914 *
3915 * Update the media active status of the bridge based on the
3916 * media active status of its member.
3917 * If changed, return the corresponding onf/off link event.
3918 */
3919 static u_int32_t
bridge_updatelinkstatus(struct bridge_softc * sc)3920 bridge_updatelinkstatus(struct bridge_softc *sc)
3921 {
3922 struct bridge_iflist *bif;
3923 int active_member = 0;
3924 u_int32_t event_code = 0;
3925
3926 BRIDGE_LOCK_ASSERT_HELD(sc);
3927
3928 /*
3929 * Find out if we have an active interface
3930 */
3931 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
3932 if (bif->bif_flags & BIFF_MEDIA_ACTIVE) {
3933 active_member = 1;
3934 break;
3935 }
3936 }
3937
3938 if (active_member && !(sc->sc_flags & SCF_MEDIA_ACTIVE)) {
3939 sc->sc_flags |= SCF_MEDIA_ACTIVE;
3940 event_code = KEV_DL_LINK_ON;
3941 } else if (!active_member && (sc->sc_flags & SCF_MEDIA_ACTIVE)) {
3942 sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
3943 event_code = KEV_DL_LINK_OFF;
3944 }
3945
3946 return event_code;
3947 }
3948
3949 /*
3950 * bridge_iflinkevent:
3951 */
3952 static void
bridge_iflinkevent(struct ifnet * ifp)3953 bridge_iflinkevent(struct ifnet *ifp)
3954 {
3955 struct bridge_softc *sc = ifp->if_bridge;
3956 struct bridge_iflist *bif;
3957 u_int32_t event_code = 0;
3958 int media_active;
3959
3960 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
3961
3962 /* Check if the interface is a bridge member */
3963 if (sc == NULL) {
3964 return;
3965 }
3966
3967 media_active = interface_media_active(ifp);
3968 BRIDGE_LOCK(sc);
3969 bif = bridge_lookup_member_if(sc, ifp);
3970 if (bif != NULL) {
3971 if (media_active) {
3972 bif->bif_flags |= BIFF_MEDIA_ACTIVE;
3973 } else {
3974 bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
3975 }
3976 if (sc->sc_mac_nat_bif != NULL) {
3977 bridge_mac_nat_flush_entries(sc, bif);
3978 }
3979
3980 event_code = bridge_updatelinkstatus(sc);
3981 }
3982 BRIDGE_UNLOCK(sc);
3983
3984 if (event_code != 0) {
3985 bridge_link_event(sc->sc_ifp, event_code);
3986 }
3987 }
3988
3989 /*
3990 * bridge_delayed_callback:
3991 *
3992 * Makes a delayed call
3993 */
3994 static void
bridge_delayed_callback(void * param,__unused void * param2)3995 bridge_delayed_callback(void *param, __unused void *param2)
3996 {
3997 struct bridge_delayed_call *call = (struct bridge_delayed_call *)param;
3998 struct bridge_softc *sc = call->bdc_sc;
3999
4000 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4001 if (bridge_delayed_callback_delay > 0) {
4002 struct timespec ts;
4003
4004 ts.tv_sec = bridge_delayed_callback_delay;
4005 ts.tv_nsec = 0;
4006
4007 BRIDGE_LOG(LOG_NOTICE, 0,
4008 "sleeping for %d seconds",
4009 bridge_delayed_callback_delay);
4010
4011 msleep(&bridge_delayed_callback_delay, NULL, PZERO,
4012 __func__, &ts);
4013
4014 BRIDGE_LOG(LOG_NOTICE, 0, "awoken");
4015 }
4016 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4017
4018 BRIDGE_LOCK(sc);
4019
4020 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4021 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4022 "%s call 0x%llx flags 0x%x",
4023 sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4024 call->bdc_flags);
4025 }
4026 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4027
4028 if (call->bdc_flags & BDCF_CANCELLING) {
4029 wakeup(call);
4030 } else {
4031 if ((sc->sc_flags & SCF_DETACHING) == 0) {
4032 (*call->bdc_func)(sc);
4033 }
4034 }
4035 call->bdc_flags &= ~BDCF_OUTSTANDING;
4036 BRIDGE_UNLOCK(sc);
4037 }
4038
4039 /*
4040 * bridge_schedule_delayed_call:
4041 *
4042 * Schedule a function to be called on a separate thread
4043 * The actual call may be scheduled to run at a given time or ASAP.
4044 */
4045 static void
4046 bridge_schedule_delayed_call(struct bridge_delayed_call *call)
4047 {
4048 uint64_t deadline = 0;
4049 struct bridge_softc *sc = call->bdc_sc;
4050
4051 BRIDGE_LOCK_ASSERT_HELD(sc);
4052
4053 if ((sc->sc_flags & SCF_DETACHING) ||
4054 (call->bdc_flags & (BDCF_OUTSTANDING | BDCF_CANCELLING))) {
4055 return;
4056 }
4057
4058 if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) {
4059 nanoseconds_to_absolutetime(
4060 (uint64_t)call->bdc_ts.tv_sec * NSEC_PER_SEC +
4061 call->bdc_ts.tv_nsec, &deadline);
4062 clock_absolutetime_interval_to_deadline(deadline, &deadline);
4063 }
4064
4065 call->bdc_flags = BDCF_OUTSTANDING;
4066
4067 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4068 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4069 "%s call 0x%llx flags 0x%x",
4070 sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4071 call->bdc_flags);
4072 }
4073 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4074
4075 if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) {
4076 thread_call_func_delayed(
4077 (thread_call_func_t)bridge_delayed_callback,
4078 call, deadline);
4079 } else {
4080 if (call->bdc_thread_call == NULL) {
4081 call->bdc_thread_call = thread_call_allocate(
4082 (thread_call_func_t)bridge_delayed_callback,
4083 call);
4084 }
4085 thread_call_enter(call->bdc_thread_call);
4086 }
4087 }
4088
4089 /*
4090 * bridge_cancel_delayed_call:
4091 *
4092 * Cancel a queued or running delayed call.
4093 * If call is running, does not return until the call is done to
4094 * prevent race condition with the brigde interface getting destroyed
4095 */
4096 static void
4097 bridge_cancel_delayed_call(struct bridge_delayed_call *call)
4098 {
4099 boolean_t result;
4100 struct bridge_softc *sc = call->bdc_sc;
4101
4102 /*
4103 * The call was never scheduled
4104 */
4105 if (sc == NULL) {
4106 return;
4107 }
4108
4109 BRIDGE_LOCK_ASSERT_HELD(sc);
4110
4111 call->bdc_flags |= BDCF_CANCELLING;
4112
4113 while (call->bdc_flags & BDCF_OUTSTANDING) {
4114 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4115 "%s call 0x%llx flags 0x%x",
4116 sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4117 call->bdc_flags);
4118 result = thread_call_func_cancel(
4119 (thread_call_func_t)bridge_delayed_callback, call, FALSE);
4120
4121 if (result) {
4122 /*
4123 * We managed to dequeue the delayed call
4124 */
4125 call->bdc_flags &= ~BDCF_OUTSTANDING;
4126 } else {
4127 /*
4128 * Wait for delayed call do be done running
4129 */
4130 msleep(call, &sc->sc_mtx, PZERO, __func__, NULL);
4131 }
4132 }
4133 call->bdc_flags &= ~BDCF_CANCELLING;
4134 }
4135
4136 /*
4137 * bridge_cleanup_delayed_call:
4138 *
4139 * Dispose resource allocated for a delayed call
4140 * Assume the delayed call is not queued or running .
4141 */
4142 static void
4143 bridge_cleanup_delayed_call(struct bridge_delayed_call *call)
4144 {
4145 boolean_t result;
4146 struct bridge_softc *sc = call->bdc_sc;
4147
4148 /*
4149 * The call was never scheduled
4150 */
4151 if (sc == NULL) {
4152 return;
4153 }
4154
4155 BRIDGE_LOCK_ASSERT_HELD(sc);
4156
4157 VERIFY((call->bdc_flags & BDCF_OUTSTANDING) == 0);
4158 VERIFY((call->bdc_flags & BDCF_CANCELLING) == 0);
4159
4160 if (call->bdc_thread_call != NULL) {
4161 result = thread_call_free(call->bdc_thread_call);
4162 if (result == FALSE) {
4163 panic("%s thread_call_free() failed for call %p",
4164 __func__, call);
4165 }
4166 call->bdc_thread_call = NULL;
4167 }
4168 }
4169
4170 /*
4171 * bridge_init:
4172 *
4173 * Initialize a bridge interface.
4174 */
4175 static int
4176 bridge_init(struct ifnet *ifp)
4177 {
4178 struct bridge_softc *sc = (struct bridge_softc *)ifp->if_softc;
4179 errno_t error;
4180
4181 BRIDGE_LOCK_ASSERT_HELD(sc);
4182
4183 if ((ifnet_flags(ifp) & IFF_RUNNING)) {
4184 return 0;
4185 }
4186
4187 error = ifnet_set_flags(ifp, IFF_RUNNING, IFF_RUNNING);
4188
4189 /*
4190 * Calling bridge_aging_timer() is OK as there are no entries to
4191 * age so we're just going to arm the timer
4192 */
4193 bridge_aging_timer(sc);
4194 #if BRIDGESTP
4195 if (error == 0) {
4196 bstp_init(&sc->sc_stp); /* Initialize Spanning Tree */
4197 }
4198 #endif /* BRIDGESTP */
4199 return error;
4200 }
4201
4202 /*
4203 * bridge_ifstop:
4204 *
4205 * Stop the bridge interface.
4206 */
4207 static void
4208 bridge_ifstop(struct ifnet *ifp, int disable)
4209 {
4210 #pragma unused(disable)
4211 struct bridge_softc *sc = ifp->if_softc;
4212
4213 BRIDGE_LOCK_ASSERT_HELD(sc);
4214
4215 if ((ifnet_flags(ifp) & IFF_RUNNING) == 0) {
4216 return;
4217 }
4218
4219 bridge_cancel_delayed_call(&sc->sc_aging_timer);
4220
4221 #if BRIDGESTP
4222 bstp_stop(&sc->sc_stp);
4223 #endif /* BRIDGESTP */
4224
4225 bridge_rtflush(sc, IFBF_FLUSHDYN);
4226 (void) ifnet_set_flags(ifp, 0, IFF_RUNNING);
4227 }
4228
4229 /*
4230 * bridge_compute_cksum:
4231 *
4232 * If the packet has checksum flags, compare the hardware checksum
4233 * capabilities of the source and destination interfaces. If they
4234 * are the same, there's nothing to do. If they are different,
4235 * finalize the checksum so that it can be sent on the destination
4236 * interface.
4237 */
4238 static void
4239 bridge_compute_cksum(struct ifnet *src_if, struct ifnet *dst_if, struct mbuf *m)
4240 {
4241 uint32_t csum_flags;
4242 uint16_t dst_hw_csum;
4243 uint32_t did_sw = 0;
4244 struct ether_header *eh;
4245 uint16_t src_hw_csum;
4246
4247 if (src_if == dst_if) {
4248 return;
4249 }
4250 csum_flags = m->m_pkthdr.csum_flags & IF_HWASSIST_CSUM_MASK;
4251 if (csum_flags == 0) {
4252 /* no checksum offload */
4253 return;
4254 }
4255
4256 /*
4257 * if destination/source differ in checksum offload
4258 * capabilities, finalize/compute the checksum
4259 */
4260 dst_hw_csum = IF_HWASSIST_CSUM_FLAGS(dst_if->if_hwassist);
4261 src_hw_csum = IF_HWASSIST_CSUM_FLAGS(src_if->if_hwassist);
4262 if (dst_hw_csum == src_hw_csum) {
4263 return;
4264 }
4265 eh = mtod(m, struct ether_header *);
4266 switch (ntohs(eh->ether_type)) {
4267 case ETHERTYPE_IP:
4268 did_sw = in_finalize_cksum(m, sizeof(*eh), csum_flags);
4269 break;
4270 case ETHERTYPE_IPV6:
4271 did_sw = in6_finalize_cksum(m, sizeof(*eh), -1, -1, csum_flags);
4272 break;
4273 }
4274 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4275 "[%s -> %s] before 0x%x did 0x%x after 0x%x",
4276 src_if->if_xname, dst_if->if_xname, csum_flags, did_sw,
4277 m->m_pkthdr.csum_flags);
4278 }
4279
4280 static errno_t
4281 bridge_transmit(struct ifnet * ifp, struct mbuf *m)
4282 {
4283 struct flowadv adv = { .code = FADV_SUCCESS };
4284 errno_t error;
4285
4286 error = dlil_output(ifp, 0, m, NULL, NULL, 1, &adv);
4287 if (error == 0) {
4288 if (adv.code == FADV_FLOW_CONTROLLED) {
4289 error = EQFULL;
4290 } else if (adv.code == FADV_SUSPENDED) {
4291 error = EQSUSPENDED;
4292 }
4293 }
4294 return error;
4295 }
4296
4297 static int
4298 get_last_ip6_hdr(struct mbuf *m, int off, int proto, int * nxtp,
4299 bool *is_fragmented)
4300 {
4301 int newoff;
4302
4303 *is_fragmented = false;
4304 while (1) {
4305 newoff = ip6_nexthdr(m, off, proto, nxtp);
4306 if (newoff < 0) {
4307 return off;
4308 } else if (newoff < off) {
4309 return -1; /* invalid */
4310 } else if (newoff == off) {
4311 return newoff;
4312 }
4313 off = newoff;
4314 proto = *nxtp;
4315 if (proto == IPPROTO_FRAGMENT) {
4316 *is_fragmented = true;
4317 }
4318 }
4319 }
4320
4321 static int
4322 bridge_get_ip_proto(struct mbuf * * mp, u_int mac_hlen, bool is_ipv4,
4323 ip_packet_info_t info_p, struct bripstats * stats_p)
4324 {
4325 int error = 0;
4326 u_int hlen;
4327 u_int ip_hlen;
4328 u_int ip_pay_len;
4329 struct mbuf * m0 = *mp;
4330 int off;
4331 int opt_len = 0;
4332 int proto = 0;
4333
4334 bzero(info_p, sizeof(*info_p));
4335 if (is_ipv4) {
4336 struct ip * ip;
4337 u_int ip_total_len;
4338
4339 /* IPv4 */
4340 hlen = mac_hlen + sizeof(struct ip);
4341 if (m0->m_pkthdr.len < hlen) {
4342 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4343 "Short IP packet %d < %d",
4344 m0->m_pkthdr.len, hlen);
4345 error = _EBADIP;
4346 stats_p->bips_bad_ip++;
4347 goto done;
4348 }
4349 if (m0->m_len < hlen) {
4350 *mp = m0 = m_pullup(m0, hlen);
4351 if (m0 == NULL) {
4352 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4353 "m_pullup failed hlen %d",
4354 hlen);
4355 error = ENOBUFS;
4356 stats_p->bips_bad_ip++;
4357 goto done;
4358 }
4359 }
4360 ip = (struct ip *)(void *)(mtod(m0, uint8_t *) + mac_hlen);
4361 if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
4362 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4363 "bad IP version");
4364 error = _EBADIP;
4365 stats_p->bips_bad_ip++;
4366 goto done;
4367 }
4368 ip_hlen = IP_VHL_HL(ip->ip_vhl) << 2;
4369 if (ip_hlen < sizeof(struct ip)) {
4370 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4371 "bad IP header length %d < %d",
4372 ip_hlen,
4373 (int)sizeof(struct ip));
4374 error = _EBADIP;
4375 stats_p->bips_bad_ip++;
4376 goto done;
4377 }
4378 hlen = mac_hlen + ip_hlen;
4379 if (m0->m_len < hlen) {
4380 *mp = m0 = m_pullup(m0, hlen);
4381 if (m0 == NULL) {
4382 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4383 "m_pullup failed hlen %d",
4384 hlen);
4385 error = ENOBUFS;
4386 stats_p->bips_bad_ip++;
4387 goto done;
4388 }
4389 }
4390
4391 ip_total_len = ntohs(ip->ip_len);
4392 if (ip_total_len < ip_hlen) {
4393 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4394 "IP total len %d < header len %d",
4395 ip_total_len, ip_hlen);
4396 error = _EBADIP;
4397 stats_p->bips_bad_ip++;
4398 goto done;
4399 }
4400 if (ip_total_len > (m0->m_pkthdr.len - mac_hlen)) {
4401 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4402 "invalid IP payload length %d > %d",
4403 ip_total_len,
4404 (m0->m_pkthdr.len - mac_hlen));
4405 error = _EBADIP;
4406 stats_p->bips_bad_ip++;
4407 goto done;
4408 }
4409 ip_pay_len = ip_total_len - ip_hlen;
4410 info_p->ip_proto = ip->ip_p;
4411 info_p->ip_hdr.ip = ip;
4412 #define FRAG_BITS (IP_OFFMASK | IP_MF)
4413 if ((ntohs(ip->ip_off) & FRAG_BITS) != 0) {
4414 info_p->ip_is_fragmented = true;
4415 }
4416 stats_p->bips_ip++;
4417 } else {
4418 struct ip6_hdr *ip6;
4419
4420 /* IPv6 */
4421 hlen = mac_hlen + sizeof(struct ip6_hdr);
4422 if (m0->m_pkthdr.len < hlen) {
4423 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4424 "short IPv6 packet %d < %d",
4425 m0->m_pkthdr.len, hlen);
4426 error = _EBADIPV6;
4427 stats_p->bips_bad_ip6++;
4428 goto done;
4429 }
4430 if (m0->m_len < hlen) {
4431 *mp = m0 = m_pullup(m0, hlen);
4432 if (m0 == NULL) {
4433 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4434 "m_pullup failed hlen %d",
4435 hlen);
4436 error = ENOBUFS;
4437 stats_p->bips_bad_ip6++;
4438 goto done;
4439 }
4440 }
4441 ip6 = (struct ip6_hdr *)(mtod(m0, uint8_t *) + mac_hlen);
4442 if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
4443 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4444 "bad IPv6 version");
4445 error = _EBADIPV6;
4446 stats_p->bips_bad_ip6++;
4447 goto done;
4448 }
4449 off = get_last_ip6_hdr(m0, mac_hlen, IPPROTO_IPV6, &proto,
4450 &info_p->ip_is_fragmented);
4451 if (off < 0 || m0->m_pkthdr.len < off) {
4452 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4453 "ip6_lasthdr() returned %d",
4454 off);
4455 error = _EBADIPV6;
4456 stats_p->bips_bad_ip6++;
4457 goto done;
4458 }
4459 ip_hlen = sizeof(*ip6);
4460 opt_len = off - mac_hlen - ip_hlen;
4461 if (opt_len < 0) {
4462 error = _EBADIPV6;
4463 stats_p->bips_bad_ip6++;
4464 goto done;
4465 }
4466 info_p->ip_proto = proto;
4467 info_p->ip_hdr.ip6 = ip6;
4468 ip_pay_len = ntohs(ip6->ip6_plen);
4469 if (ip_pay_len > (m0->m_pkthdr.len - mac_hlen - ip_hlen)) {
4470 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4471 "invalid IPv6 payload length %d > %d",
4472 ip_pay_len,
4473 (m0->m_pkthdr.len - mac_hlen - ip_hlen));
4474 error = _EBADIPV6;
4475 stats_p->bips_bad_ip6++;
4476 goto done;
4477 }
4478 stats_p->bips_ip6++;
4479 }
4480 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4481 "IPv%c proto %d ip %u pay %u opt %u pkt %u%s",
4482 is_ipv4 ? '4' : '6',
4483 proto, ip_hlen, ip_pay_len, opt_len,
4484 m0->m_pkthdr.len, info_p->ip_is_fragmented ? " frag" : "");
4485 info_p->ip_hlen = ip_hlen;
4486 info_p->ip_pay_len = ip_pay_len;
4487 info_p->ip_opt_len = opt_len;
4488 info_p->ip_is_ipv4 = is_ipv4;
4489 done:
4490 return error;
4491 }
4492
4493 static int
4494 bridge_get_tcp_header(struct mbuf * * mp, u_int mac_hlen, bool is_ipv4,
4495 ip_packet_info_t info_p, struct bripstats * stats_p)
4496 {
4497 int error;
4498 u_int hlen;
4499
4500 error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, info_p, stats_p);
4501 if (error != 0) {
4502 goto done;
4503 }
4504 if (info_p->ip_proto != IPPROTO_TCP) {
4505 /* not a TCP frame, not an error, just a bad guess */
4506 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4507 "non-TCP (%d) IPv%c frame %d bytes",
4508 info_p->ip_proto, is_ipv4 ? '4' : '6',
4509 (*mp)->m_pkthdr.len);
4510 goto done;
4511 }
4512 if (info_p->ip_is_fragmented) {
4513 /* both TSO and IP fragmentation don't make sense */
4514 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4515 "fragmented TSO packet?");
4516 stats_p->bips_bad_tcp++;
4517 error = _EBADTCP;
4518 goto done;
4519 }
4520 hlen = mac_hlen + info_p->ip_hlen + sizeof(struct tcphdr) +
4521 info_p->ip_opt_len;
4522 if ((*mp)->m_len < hlen) {
4523 *mp = m_pullup(*mp, hlen);
4524 if (*mp == NULL) {
4525 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4526 "m_pullup %d failed",
4527 hlen);
4528 stats_p->bips_bad_tcp++;
4529 error = _EBADTCP;
4530 goto done;
4531 }
4532 }
4533 info_p->ip_proto_hdr = ((caddr_t)info_p->ip_hdr.ptr) +
4534 info_p->ip_hlen + info_p->ip_opt_len;
4535 done:
4536 return error;
4537 }
4538
4539 static inline void
4540 proto_csum_stats_increment(uint8_t proto, struct brcsumstats * stats_p)
4541 {
4542 if (proto == IPPROTO_TCP) {
4543 stats_p->brcs_tcp_checksum++;
4544 } else {
4545 stats_p->brcs_udp_checksum++;
4546 }
4547 return;
4548 }
4549
4550 static bool
4551 ether_header_type_is_ip(struct ether_header * eh, bool *is_ipv4)
4552 {
4553 uint16_t ether_type;
4554 bool is_ip = TRUE;
4555
4556 ether_type = ntohs(eh->ether_type);
4557 switch (ether_type) {
4558 case ETHERTYPE_IP:
4559 *is_ipv4 = TRUE;
4560 break;
4561 case ETHERTYPE_IPV6:
4562 *is_ipv4 = FALSE;
4563 break;
4564 default:
4565 is_ip = FALSE;
4566 break;
4567 }
4568 return is_ip;
4569 }
4570
4571 static errno_t
4572 bridge_verify_checksum(struct mbuf * * mp, struct ifbrmstats *stats_p)
4573 {
4574 struct brcsumstats *csum_stats_p;
4575 struct ether_header *eh;
4576 errno_t error = 0;
4577 ip_packet_info info;
4578 bool is_ipv4;
4579 struct mbuf * m;
4580 u_int mac_hlen = sizeof(struct ether_header);
4581 uint16_t sum;
4582 bool valid;
4583
4584 eh = mtod(*mp, struct ether_header *);
4585 if (!ether_header_type_is_ip(eh, &is_ipv4)) {
4586 goto done;
4587 }
4588 error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, &info,
4589 &stats_p->brms_out_ip);
4590 m = *mp;
4591 if (error != 0) {
4592 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4593 "bridge_get_ip_proto failed %d",
4594 error);
4595 goto done;
4596 }
4597 if (is_ipv4) {
4598 if ((m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) != 0) {
4599 /* hardware offloaded IP header checksum */
4600 valid = (m->m_pkthdr.csum_flags & CSUM_IP_VALID) != 0;
4601 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4602 "IP checksum HW %svalid",
4603 valid ? "" : "in");
4604 if (!valid) {
4605 stats_p->brms_out_cksum_bad_hw.brcs_ip_checksum++;
4606 error = _EBADIPCHECKSUM;
4607 goto done;
4608 }
4609 stats_p->brms_out_cksum_good_hw.brcs_ip_checksum++;
4610 } else {
4611 /* verify */
4612 sum = inet_cksum(m, 0, mac_hlen, info.ip_hlen);
4613 valid = (sum == 0);
4614 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4615 "IP checksum SW %svalid",
4616 valid ? "" : "in");
4617 if (!valid) {
4618 stats_p->brms_out_cksum_bad.brcs_ip_checksum++;
4619 error = _EBADIPCHECKSUM;
4620 goto done;
4621 }
4622 stats_p->brms_out_cksum_good.brcs_ip_checksum++;
4623 }
4624 }
4625 if (info.ip_is_fragmented) {
4626 /* can't verify checksum on fragmented packets */
4627 goto done;
4628 }
4629 switch (info.ip_proto) {
4630 case IPPROTO_TCP:
4631 stats_p->brms_out_ip.bips_tcp++;
4632 break;
4633 case IPPROTO_UDP:
4634 stats_p->brms_out_ip.bips_udp++;
4635 break;
4636 default:
4637 goto done;
4638 }
4639 /* check for hardware offloaded UDP/TCP checksum */
4640 #define HW_CSUM (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)
4641 if ((m->m_pkthdr.csum_flags & HW_CSUM) == HW_CSUM) {
4642 /* checksum verified by hardware */
4643 valid = (m->m_pkthdr.csum_rx_val == 0xffff);
4644 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4645 "IPv%c %s checksum HW 0x%x %svalid",
4646 is_ipv4 ? '4' : '6',
4647 (info.ip_proto == IPPROTO_TCP)
4648 ? "TCP" : "UDP",
4649 m->m_pkthdr.csum_data,
4650 valid ? "" : "in" );
4651 if (!valid) {
4652 /* bad checksum */
4653 csum_stats_p = &stats_p->brms_out_cksum_bad_hw;
4654 error = (info.ip_proto == IPPROTO_TCP) ? _EBADTCPCHECKSUM
4655 : _EBADTCPCHECKSUM;
4656 } else {
4657 /* good checksum */
4658 csum_stats_p = &stats_p->brms_out_cksum_good_hw;
4659 }
4660 proto_csum_stats_increment(info.ip_proto, csum_stats_p);
4661 goto done;
4662 }
4663 m->m_data += mac_hlen;
4664 m->m_len -= mac_hlen;
4665 m->m_pkthdr.len -= mac_hlen;
4666 if (is_ipv4) {
4667 sum = inet_cksum(m, info.ip_proto,
4668 info.ip_hlen,
4669 info.ip_pay_len);
4670 } else {
4671 sum = inet6_cksum(m, info.ip_proto,
4672 info.ip_hlen + info.ip_opt_len,
4673 info.ip_pay_len - info.ip_opt_len);
4674 }
4675 valid = (sum == 0);
4676 if (valid) {
4677 csum_stats_p = &stats_p->brms_out_cksum_good;
4678 } else {
4679 csum_stats_p = &stats_p->brms_out_cksum_bad;
4680 error = (info.ip_proto == IPPROTO_TCP)
4681 ? _EBADTCPCHECKSUM : _EBADUDPCHECKSUM;
4682 }
4683 proto_csum_stats_increment(info.ip_proto, csum_stats_p);
4684 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4685 "IPv%c %s checksum SW %svalid (0x%x) hlen %d paylen %d",
4686 is_ipv4 ? '4' : '6',
4687 (info.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
4688 valid ? "" : "in",
4689 sum, info.ip_hlen, info.ip_pay_len);
4690 m->m_data -= mac_hlen;
4691 m->m_len += mac_hlen;
4692 m->m_pkthdr.len += mac_hlen;
4693 done:
4694 return error;
4695 }
4696
4697 static errno_t
4698 bridge_offload_checksum(struct mbuf * * mp, ip_packet_info * info_p,
4699 struct ifbrmstats * stats_p)
4700 {
4701 uint16_t * csum_p;
4702 errno_t error = 0;
4703 u_int hlen;
4704 struct mbuf * m0 = *mp;
4705 u_int mac_hlen = sizeof(struct ether_header);
4706 u_int pkt_hdr_len;
4707 struct tcphdr * tcp;
4708 u_int tcp_hlen;
4709 struct udphdr * udp;
4710
4711 if (info_p->ip_is_ipv4) {
4712 /* compute IP header checksum */
4713 info_p->ip_hdr.ip->ip_sum = 0;
4714 info_p->ip_hdr.ip->ip_sum = inet_cksum(m0, 0, mac_hlen,
4715 info_p->ip_hlen);
4716 stats_p->brms_in_computed_cksum.brcs_ip_checksum++;
4717 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4718 "IPv4 checksum 0x%x",
4719 ntohs(info_p->ip_hdr.ip->ip_sum));
4720 }
4721 if (info_p->ip_is_fragmented) {
4722 /* can't compute checksum on fragmented packets */
4723 goto done;
4724 }
4725 pkt_hdr_len = m0->m_pkthdr.len;
4726 switch (info_p->ip_proto) {
4727 case IPPROTO_TCP:
4728 hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len
4729 + sizeof(struct tcphdr);
4730 if (m0->m_len < hlen) {
4731 *mp = m0 = m_pullup(m0, hlen);
4732 if (m0 == NULL) {
4733 stats_p->brms_in_ip.bips_bad_tcp++;
4734 error = _EBADTCP;
4735 goto done;
4736 }
4737 }
4738 tcp = (struct tcphdr *)(void *)
4739 ((caddr_t)info_p->ip_hdr.ptr + info_p->ip_hlen
4740 + info_p->ip_opt_len);
4741 tcp_hlen = tcp->th_off << 2;
4742 hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len + tcp_hlen;
4743 if (hlen > pkt_hdr_len) {
4744 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4745 "bad tcp header length %u",
4746 tcp_hlen);
4747 stats_p->brms_in_ip.bips_bad_tcp++;
4748 error = _EBADTCP;
4749 goto done;
4750 }
4751 csum_p = &tcp->th_sum;
4752 stats_p->brms_in_ip.bips_tcp++;
4753 break;
4754 case IPPROTO_UDP:
4755 hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len + sizeof(*udp);
4756 if (m0->m_len < hlen) {
4757 *mp = m0 = m_pullup(m0, hlen);
4758 if (m0 == NULL) {
4759 stats_p->brms_in_ip.bips_bad_udp++;
4760 error = ENOBUFS;
4761 goto done;
4762 }
4763 }
4764 udp = (struct udphdr *)(void *)
4765 ((caddr_t)info_p->ip_hdr.ptr + info_p->ip_hlen
4766 + info_p->ip_opt_len);
4767 csum_p = &udp->uh_sum;
4768 stats_p->brms_in_ip.bips_udp++;
4769 break;
4770 default:
4771 /* not TCP or UDP */
4772 goto done;
4773 }
4774 *csum_p = 0;
4775 m0->m_data += mac_hlen;
4776 m0->m_len -= mac_hlen;
4777 m0->m_pkthdr.len -= mac_hlen;
4778 if (info_p->ip_is_ipv4) {
4779 *csum_p = inet_cksum(m0, info_p->ip_proto, info_p->ip_hlen,
4780 info_p->ip_pay_len);
4781 } else {
4782 *csum_p = inet6_cksum(m0, info_p->ip_proto,
4783 info_p->ip_hlen + info_p->ip_opt_len,
4784 info_p->ip_pay_len - info_p->ip_opt_len);
4785 }
4786 if (info_p->ip_proto == IPPROTO_UDP && *csum_p == 0) {
4787 /* RFC 1122 4.1.3.4 */
4788 *csum_p = 0xffff;
4789 }
4790 m0->m_data -= mac_hlen;
4791 m0->m_len += mac_hlen;
4792 m0->m_pkthdr.len += mac_hlen;
4793 proto_csum_stats_increment(info_p->ip_proto,
4794 &stats_p->brms_in_computed_cksum);
4795
4796 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4797 "IPv%c %s set checksum 0x%x",
4798 info_p->ip_is_ipv4 ? '4' : '6',
4799 (info_p->ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
4800 ntohs(*csum_p));
4801 done:
4802 return error;
4803 }
4804
4805 static errno_t
4806 bridge_send(struct ifnet *src_ifp,
4807 struct ifnet *dst_ifp, struct mbuf *m, ChecksumOperation cksum_op)
4808 {
4809 switch (cksum_op) {
4810 case CHECKSUM_OPERATION_CLEAR_OFFLOAD:
4811 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
4812 break;
4813 case CHECKSUM_OPERATION_FINALIZE:
4814 /* the checksum might not be correct, finalize now */
4815 bridge_finalize_cksum(dst_ifp, m);
4816 break;
4817 case CHECKSUM_OPERATION_COMPUTE:
4818 bridge_compute_cksum(src_ifp, dst_ifp, m);
4819 break;
4820 default:
4821 break;
4822 }
4823 #if HAS_IF_CAP
4824 /*
4825 * If underlying interface can not do VLAN tag insertion itself
4826 * then attach a packet tag that holds it.
4827 */
4828 if ((m->m_flags & M_VLANTAG) &&
4829 (dst_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) {
4830 m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
4831 if (m == NULL) {
4832 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4833 "%s: unable to prepend VLAN header",
4834 dst_ifp->if_xname);
4835 (void) ifnet_stat_increment_out(dst_ifp,
4836 0, 0, 1);
4837 return 0;
4838 }
4839 m->m_flags &= ~M_VLANTAG;
4840 }
4841 #endif /* HAS_IF_CAP */
4842 return bridge_transmit(dst_ifp, m);
4843 }
4844
4845 static errno_t
4846 bridge_send_tso(struct ifnet *dst_ifp, struct mbuf *m, bool is_ipv4)
4847 {
4848 errno_t error;
4849 u_int mac_hlen;
4850
4851 mac_hlen = sizeof(struct ether_header);
4852
4853 #if HAS_IF_CAP
4854 /*
4855 * If underlying interface can not do VLAN tag insertion itself
4856 * then attach a packet tag that holds it.
4857 */
4858 if ((m->m_flags & M_VLANTAG) &&
4859 (dst_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) {
4860 m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
4861 if (m == NULL) {
4862 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4863 "%s: unable to prepend VLAN header",
4864 dst_ifp->if_xname);
4865 (void) ifnet_stat_increment_out(dst_ifp,
4866 0, 0, 1);
4867 error = ENOBUFS;
4868 goto done;
4869 }
4870 m->m_flags &= ~M_VLANTAG;
4871 mac_hlen += ETHER_VLAN_ENCAP_LEN;
4872 }
4873 #endif /* HAS_IF_CAP */
4874 error = gso_tcp(dst_ifp, &m, mac_hlen, is_ipv4, TRUE);
4875 return error;
4876 }
4877
4878 /*
4879 * tso_hwassist:
4880 * - determine whether the destination interface supports TSO offload
4881 * - if the packet is already marked for offload and the hardware supports
4882 * it, just allow the packet to continue on
4883 * - if not, parse the packet headers to verify that this is a large TCP
4884 * packet requiring segmentation; if the hardware doesn't support it
4885 * set need_sw_tso; otherwise, mark the packet for TSO offload
4886 */
4887 static int
4888 tso_hwassist(struct mbuf **mp, bool is_ipv4, struct ifnet * ifp, u_int mac_hlen,
4889 bool * need_sw_tso, bool * is_large_tcp)
4890 {
4891 int error = 0;
4892 u_int32_t if_csum;
4893 u_int32_t if_tso;
4894 u_int32_t mbuf_tso;
4895 bool supports_cksum = false;
4896
4897 *need_sw_tso = false;
4898 *is_large_tcp = false;
4899 if (is_ipv4) {
4900 /*
4901 * Enable both TCP and IP offload if the hardware supports it.
4902 * If the hardware doesn't support TCP offload, supports_cksum
4903 * will be false so we won't set either offload.
4904 */
4905 if_csum = ifp->if_hwassist & (CSUM_TCP | CSUM_IP);
4906 supports_cksum = (if_csum & CSUM_TCP) != 0;
4907 if_tso = IFNET_TSO_IPV4;
4908 mbuf_tso = CSUM_TSO_IPV4;
4909 } else {
4910 supports_cksum = (ifp->if_hwassist & CSUM_TCPIPV6) != 0;
4911 if_csum = CSUM_TCPIPV6;
4912 if_tso = IFNET_TSO_IPV6;
4913 mbuf_tso = CSUM_TSO_IPV6;
4914 }
4915 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4916 "%s: does%s support checksum 0x%x if_csum 0x%x",
4917 ifp->if_xname, supports_cksum ? "" : " not",
4918 ifp->if_hwassist, if_csum);
4919 if ((ifp->if_hwassist & if_tso) != 0 &&
4920 ((*mp)->m_pkthdr.csum_flags & mbuf_tso) != 0) {
4921 /* hardware TSO, mbuf already marked */
4922 } else {
4923 /* verify that this is a large TCP frame */
4924 uint32_t csum_flags;
4925 ip_packet_info info;
4926 int mss;
4927 struct bripstats stats;
4928 struct tcphdr * tcp;
4929
4930 error = bridge_get_tcp_header(mp, mac_hlen, is_ipv4,
4931 &info, &stats);
4932 if (error != 0) {
4933 /* bad packet */
4934 goto done;
4935 }
4936 if ((info.ip_hlen + info.ip_pay_len + info.ip_opt_len) <=
4937 ifp->if_mtu) {
4938 /* not actually a large packet */
4939 goto done;
4940 }
4941 if (info.ip_proto_hdr == NULL) {
4942 /* not a TCP packet */
4943 goto done;
4944 }
4945 if ((ifp->if_hwassist & if_tso) == 0) {
4946 /* hardware does not support TSO, enable sw tso */
4947 *need_sw_tso = if_bridge_segmentation != 0;
4948 goto done;
4949 }
4950 /* use hardware TSO */
4951 (*mp)->m_pkthdr.pkt_proto = IPPROTO_TCP;
4952 tcp = (struct tcphdr *)info.ip_proto_hdr;
4953 mss = ifp->if_mtu - info.ip_hlen - info.ip_opt_len
4954 - (tcp->th_off << 2) - if_bridge_tso_reduce_mss_tx;
4955 assert(mss > 0);
4956 csum_flags = mbuf_tso;
4957 if (supports_cksum) {
4958 csum_flags |= if_csum;
4959 }
4960 (*mp)->m_pkthdr.tso_segsz = mss;
4961 (*mp)->m_pkthdr.csum_flags |= csum_flags;
4962 (*mp)->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
4963 *is_large_tcp = true;
4964 }
4965 done:
4966 return error;
4967 }
4968
4969 /*
4970 * bridge_enqueue:
4971 *
4972 * Enqueue a packet on a bridge member interface.
4973 *
4974 */
4975 static errno_t
4976 bridge_enqueue(ifnet_t bridge_ifp, struct ifnet *src_ifp,
4977 struct ifnet *dst_ifp, struct mbuf *m, ChecksumOperation cksum_op)
4978 {
4979 errno_t error = 0;
4980 int len;
4981
4982 VERIFY(dst_ifp != NULL);
4983
4984 /*
4985 * We may be sending a fragment so traverse the mbuf
4986 *
4987 * NOTE: bridge_fragment() is called only when PFIL_HOOKS is enabled.
4988 */
4989 for (struct mbuf *next_m = NULL; m != NULL; m = next_m) {
4990 bool need_sw_tso = false;
4991 bool is_ipv4 = false;
4992 bool is_large_pkt;
4993 errno_t _error = 0;
4994
4995 len = m->m_pkthdr.len;
4996 m->m_flags |= M_PROTO1; /* set to avoid loops */
4997 next_m = m->m_nextpkt;
4998 m->m_nextpkt = NULL;
4999 /*
5000 * Need to segment the packet if it is a large frame
5001 * and the destination interface does not support TSO.
5002 *
5003 * Note that with trailers, it's possible for a packet to
5004 * be large but not actually require segmentation.
5005 */
5006 is_large_pkt = (len > (bridge_ifp->if_mtu + ETHER_HDR_LEN));
5007 if (is_large_pkt) {
5008 struct ether_header *eh;
5009 bool is_large_tcp = false;
5010
5011 eh = mtod(m, struct ether_header *);
5012 if (ether_header_type_is_ip(eh, &is_ipv4)) {
5013 _error = tso_hwassist(&m, is_ipv4,
5014 dst_ifp, sizeof(struct ether_header),
5015 &need_sw_tso, &is_large_tcp);
5016 if (is_large_tcp) {
5017 cksum_op = CHECKSUM_OPERATION_NONE;
5018 }
5019 } else {
5020 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5021 "large non IP packet");
5022 }
5023 }
5024 if (_error != 0) {
5025 if (m != NULL) {
5026 m_freem(m);
5027 }
5028 } else if (need_sw_tso) {
5029 _error = bridge_send_tso(dst_ifp, m, is_ipv4);
5030 } else {
5031 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5032 "%s bridge_send(%s) len %d op %d",
5033 bridge_ifp->if_xname,
5034 dst_ifp->if_xname,
5035 len, cksum_op);
5036 _error = bridge_send(src_ifp, dst_ifp, m, cksum_op);
5037 }
5038
5039 /* Preserve first error value */
5040 if (error == 0 && _error != 0) {
5041 error = _error;
5042 }
5043 if (_error == 0) {
5044 (void) ifnet_stat_increment_out(bridge_ifp, 1, len, 0);
5045 } else {
5046 (void) ifnet_stat_increment_out(bridge_ifp, 0, 0, 1);
5047 }
5048 }
5049
5050 return error;
5051 }
5052
5053 #if HAS_BRIDGE_DUMMYNET
5054 /*
5055 * bridge_dummynet:
5056 *
5057 * Receive a queued packet from dummynet and pass it on to the output
5058 * interface.
5059 *
5060 * The mbuf has the Ethernet header already attached.
5061 */
5062 static void
5063 bridge_dummynet(struct mbuf *m, struct ifnet *ifp)
5064 {
5065 struct bridge_softc *sc;
5066
5067 sc = ifp->if_bridge;
5068
5069 /*
5070 * The packet didn't originate from a member interface. This should only
5071 * ever happen if a member interface is removed while packets are
5072 * queued for it.
5073 */
5074 if (sc == NULL) {
5075 m_freem(m);
5076 return;
5077 }
5078
5079 if (PFIL_HOOKED(&inet_pfil_hook) || PFIL_HOOKED_INET6) {
5080 if (bridge_pfil(&m, sc->sc_ifp, ifp, PFIL_OUT) != 0) {
5081 return;
5082 }
5083 if (m == NULL) {
5084 return;
5085 }
5086 }
5087 (void) bridge_enqueue(sc->sc_ifp, NULL, ifp, m, CHECKSUM_OPERATION_NONE);
5088 }
5089
5090 #endif /* HAS_BRIDGE_DUMMYNET */
5091
5092 /*
5093 * bridge_member_output:
5094 *
5095 * Send output from a bridge member interface. This
5096 * performs the bridging function for locally originated
5097 * packets.
5098 *
5099 * The mbuf has the Ethernet header already attached.
5100 */
5101 static errno_t
5102 bridge_member_output(struct bridge_softc *sc, ifnet_t ifp, mbuf_t *data)
5103 {
5104 ifnet_t bridge_ifp;
5105 struct ether_header *eh;
5106 struct ifnet *dst_if;
5107 uint16_t vlan;
5108 struct bridge_iflist *mac_nat_bif;
5109 ifnet_t mac_nat_ifp;
5110 mbuf_t m = *data;
5111
5112 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
5113 "ifp %s", ifp->if_xname);
5114 if (m->m_len < ETHER_HDR_LEN) {
5115 m = m_pullup(m, ETHER_HDR_LEN);
5116 if (m == NULL) {
5117 *data = NULL;
5118 return EJUSTRETURN;
5119 }
5120 }
5121
5122 eh = mtod(m, struct ether_header *);
5123 vlan = VLANTAGOF(m);
5124
5125 BRIDGE_LOCK(sc);
5126 mac_nat_bif = sc->sc_mac_nat_bif;
5127 mac_nat_ifp = (mac_nat_bif != NULL) ? mac_nat_bif->bif_ifp : NULL;
5128 if (mac_nat_ifp == ifp) {
5129 /* record the IP address used by the MAC NAT interface */
5130 (void)bridge_mac_nat_output(sc, mac_nat_bif, data, NULL);
5131 m = *data;
5132 if (m == NULL) {
5133 /* packet was deallocated */
5134 BRIDGE_UNLOCK(sc);
5135 return EJUSTRETURN;
5136 }
5137 }
5138 bridge_ifp = sc->sc_ifp;
5139
5140 /*
5141 * APPLE MODIFICATION
5142 * If the packet is an 802.1X ethertype, then only send on the
5143 * original output interface.
5144 */
5145 if (eh->ether_type == htons(ETHERTYPE_PAE)) {
5146 dst_if = ifp;
5147 goto sendunicast;
5148 }
5149
5150 /*
5151 * If bridge is down, but the original output interface is up,
5152 * go ahead and send out that interface. Otherwise, the packet
5153 * is dropped below.
5154 */
5155 if ((bridge_ifp->if_flags & IFF_RUNNING) == 0) {
5156 dst_if = ifp;
5157 goto sendunicast;
5158 }
5159
5160 /*
5161 * If the packet is a multicast, or we don't know a better way to
5162 * get there, send to all interfaces.
5163 */
5164 if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
5165 dst_if = NULL;
5166 } else {
5167 dst_if = bridge_rtlookup(sc, eh->ether_dhost, vlan);
5168 }
5169 if (dst_if == NULL) {
5170 struct bridge_iflist *bif;
5171 struct mbuf *mc;
5172 errno_t error;
5173
5174
5175 bridge_span(sc, m);
5176
5177 BRIDGE_LOCK2REF(sc, error);
5178 if (error != 0) {
5179 m_freem(m);
5180 return EJUSTRETURN;
5181 }
5182
5183 /*
5184 * Duplicate and send the packet across all member interfaces
5185 * except the originating interface.
5186 */
5187 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
5188 dst_if = bif->bif_ifp;
5189 if (dst_if == ifp) {
5190 /* skip the originating interface */
5191 continue;
5192 }
5193 /* skip interface with inactive link status */
5194 if ((bif->bif_flags & BIFF_MEDIA_ACTIVE) == 0) {
5195 continue;
5196 }
5197 #if 0
5198 if (dst_if->if_type == IFT_GIF) {
5199 continue;
5200 }
5201 #endif
5202 /* skip interface that isn't running */
5203 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5204 continue;
5205 }
5206 /*
5207 * If the interface is participating in spanning
5208 * tree, make sure the port is in a state that
5209 * allows forwarding.
5210 */
5211 if ((bif->bif_ifflags & IFBIF_STP) &&
5212 bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5213 continue;
5214 }
5215 /*
5216 * If the destination is the MAC NAT interface,
5217 * skip sending the packet. The packet can't be sent
5218 * if the source MAC is incorrect.
5219 */
5220 if (dst_if == mac_nat_ifp) {
5221 continue;
5222 }
5223
5224 /* make a deep copy to send on this member interface */
5225 mc = m_dup(m, M_DONTWAIT);
5226 if (mc == NULL) {
5227 (void)ifnet_stat_increment_out(bridge_ifp,
5228 0, 0, 1);
5229 continue;
5230 }
5231 (void)bridge_enqueue(bridge_ifp, ifp, dst_if,
5232 mc, CHECKSUM_OPERATION_COMPUTE);
5233 }
5234 BRIDGE_UNREF(sc);
5235
5236 if ((ifp->if_flags & IFF_RUNNING) == 0) {
5237 m_freem(m);
5238 return EJUSTRETURN;
5239 }
5240 /* allow packet to continue on the originating interface */
5241 return 0;
5242 }
5243
5244 sendunicast:
5245 /*
5246 * XXX Spanning tree consideration here?
5247 */
5248
5249 bridge_span(sc, m);
5250 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5251 m_freem(m);
5252 BRIDGE_UNLOCK(sc);
5253 return EJUSTRETURN;
5254 }
5255
5256 BRIDGE_UNLOCK(sc);
5257 if (dst_if == ifp) {
5258 /* allow packet to continue on the originating interface */
5259 return 0;
5260 }
5261 if (dst_if != mac_nat_ifp) {
5262 (void) bridge_enqueue(bridge_ifp, ifp, dst_if, m,
5263 CHECKSUM_OPERATION_COMPUTE);
5264 } else {
5265 /*
5266 * This is not the original output interface
5267 * and the destination is the MAC NAT interface.
5268 * Drop the packet because the packet can't be sent
5269 * if the source MAC is incorrect.
5270 */
5271 m_freem(m);
5272 }
5273 return EJUSTRETURN;
5274 }
5275
5276 /*
5277 * Output callback.
5278 *
5279 * This routine is called externally from above only when if_bridge_txstart
5280 * is disabled; otherwise it is called internally by bridge_start().
5281 */
5282 static int
5283 bridge_output(struct ifnet *ifp, struct mbuf *m)
5284 {
5285 struct bridge_softc *sc = ifnet_softc(ifp);
5286 struct ether_header *eh;
5287 struct ifnet *dst_if = NULL;
5288 int error = 0;
5289
5290 eh = mtod(m, struct ether_header *);
5291
5292 BRIDGE_LOCK(sc);
5293
5294 if (!(m->m_flags & (M_BCAST | M_MCAST))) {
5295 dst_if = bridge_rtlookup(sc, eh->ether_dhost, 0);
5296 }
5297
5298 (void) ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0);
5299
5300 #if NBPFILTER > 0
5301 if (sc->sc_bpf_output) {
5302 bridge_bpf_output(ifp, m);
5303 }
5304 #endif
5305
5306 if (dst_if == NULL) {
5307 /* callee will unlock */
5308 bridge_broadcast(sc, NULL, m, 0);
5309 } else {
5310 ifnet_t bridge_ifp;
5311
5312 bridge_ifp = sc->sc_ifp;
5313 BRIDGE_UNLOCK(sc);
5314
5315 error = bridge_enqueue(bridge_ifp, NULL, dst_if, m,
5316 CHECKSUM_OPERATION_FINALIZE);
5317 }
5318
5319 return error;
5320 }
5321
5322 static void
5323 bridge_finalize_cksum(struct ifnet *ifp, struct mbuf *m)
5324 {
5325 struct ether_header *eh;
5326 bool is_ipv4;
5327 uint32_t sw_csum, hwcap;
5328 uint32_t did_sw;
5329 uint32_t csum_flags;
5330
5331 eh = mtod(m, struct ether_header *);
5332 if (!ether_header_type_is_ip(eh, &is_ipv4)) {
5333 return;
5334 }
5335
5336 /* do in software what the hardware cannot */
5337 hwcap = (ifp->if_hwassist | CSUM_DATA_VALID);
5338 csum_flags = m->m_pkthdr.csum_flags;
5339 sw_csum = csum_flags & ~IF_HWASSIST_CSUM_FLAGS(hwcap);
5340 sw_csum &= IF_HWASSIST_CSUM_MASK;
5341
5342 if (is_ipv4) {
5343 if ((hwcap & CSUM_PARTIAL) && !(sw_csum & CSUM_DELAY_DATA) &&
5344 (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) {
5345 if (m->m_pkthdr.csum_flags & CSUM_TCP) {
5346 uint16_t start =
5347 sizeof(*eh) + sizeof(struct ip);
5348 uint16_t ulpoff =
5349 m->m_pkthdr.csum_data & 0xffff;
5350 m->m_pkthdr.csum_flags |=
5351 (CSUM_DATA_VALID | CSUM_PARTIAL);
5352 m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
5353 m->m_pkthdr.csum_tx_start = start;
5354 } else {
5355 sw_csum |= (CSUM_DELAY_DATA &
5356 m->m_pkthdr.csum_flags);
5357 }
5358 }
5359 did_sw = in_finalize_cksum(m, sizeof(*eh), sw_csum);
5360 } else {
5361 if ((hwcap & CSUM_PARTIAL) &&
5362 !(sw_csum & CSUM_DELAY_IPV6_DATA) &&
5363 (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA)) {
5364 if (m->m_pkthdr.csum_flags & CSUM_TCPIPV6) {
5365 uint16_t start =
5366 sizeof(*eh) + sizeof(struct ip6_hdr);
5367 uint16_t ulpoff =
5368 m->m_pkthdr.csum_data & 0xffff;
5369 m->m_pkthdr.csum_flags |=
5370 (CSUM_DATA_VALID | CSUM_PARTIAL);
5371 m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
5372 m->m_pkthdr.csum_tx_start = start;
5373 } else {
5374 sw_csum |= (CSUM_DELAY_IPV6_DATA &
5375 m->m_pkthdr.csum_flags);
5376 }
5377 }
5378 did_sw = in6_finalize_cksum(m, sizeof(*eh), -1, -1, sw_csum);
5379 }
5380 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5381 "[%s] before 0x%x hwcap 0x%x sw_csum 0x%x did 0x%x after 0x%x",
5382 ifp->if_xname, csum_flags, hwcap, sw_csum,
5383 did_sw, m->m_pkthdr.csum_flags);
5384 }
5385
5386 /*
5387 * bridge_start:
5388 *
5389 * Start output on a bridge.
5390 *
5391 * This routine is invoked by the start worker thread; because we never call
5392 * it directly, there is no need do deploy any serialization mechanism other
5393 * than what's already used by the worker thread, i.e. this is already single
5394 * threaded.
5395 *
5396 * This routine is called only when if_bridge_txstart is enabled.
5397 */
5398 static void
5399 bridge_start(struct ifnet *ifp)
5400 {
5401 struct mbuf *m;
5402
5403 for (;;) {
5404 if (ifnet_dequeue(ifp, &m) != 0) {
5405 break;
5406 }
5407
5408 (void) bridge_output(ifp, m);
5409 }
5410 }
5411
5412 /*
5413 * bridge_forward:
5414 *
5415 * The forwarding function of the bridge.
5416 *
5417 * NOTE: Releases the lock on return.
5418 */
5419 static void
5420 bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif,
5421 struct mbuf *m)
5422 {
5423 struct bridge_iflist *dbif;
5424 ifnet_t bridge_ifp;
5425 struct ifnet *src_if, *dst_if;
5426 struct ether_header *eh;
5427 uint16_t vlan;
5428 uint8_t *dst;
5429 int error;
5430 struct mac_nat_record mnr;
5431 bool translate_mac = FALSE;
5432 uint32_t sc_filter_flags = 0;
5433
5434 BRIDGE_LOCK_ASSERT_HELD(sc);
5435
5436 bridge_ifp = sc->sc_ifp;
5437 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
5438 "%s m 0x%llx", bridge_ifp->if_xname,
5439 (uint64_t)VM_KERNEL_ADDRPERM(m));
5440
5441 src_if = m->m_pkthdr.rcvif;
5442 if (src_if != sbif->bif_ifp) {
5443 const char * src_if_name;
5444
5445 src_if_name = (src_if != NULL) ? src_if->if_xname : "?";
5446 BRIDGE_LOG(LOG_NOTICE, 0,
5447 "src_if %s != bif_ifp %s",
5448 src_if_name, sbif->bif_ifp->if_xname);
5449 goto drop;
5450 }
5451
5452 (void) ifnet_stat_increment_in(bridge_ifp, 1, m->m_pkthdr.len, 0);
5453 vlan = VLANTAGOF(m);
5454
5455
5456 if ((sbif->bif_ifflags & IFBIF_STP) &&
5457 sbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5458 goto drop;
5459 }
5460
5461 eh = mtod(m, struct ether_header *);
5462 dst = eh->ether_dhost;
5463
5464 /* If the interface is learning, record the address. */
5465 if (sbif->bif_ifflags & IFBIF_LEARNING) {
5466 error = bridge_rtupdate(sc, eh->ether_shost, vlan,
5467 sbif, 0, IFBAF_DYNAMIC);
5468 /*
5469 * If the interface has addresses limits then deny any source
5470 * that is not in the cache.
5471 */
5472 if (error && sbif->bif_addrmax) {
5473 goto drop;
5474 }
5475 }
5476
5477 if ((sbif->bif_ifflags & IFBIF_STP) != 0 &&
5478 sbif->bif_stp.bp_state == BSTP_IFSTATE_LEARNING) {
5479 goto drop;
5480 }
5481
5482 /*
5483 * At this point, the port either doesn't participate
5484 * in spanning tree or it is in the forwarding state.
5485 */
5486
5487 /*
5488 * If the packet is unicast, destined for someone on
5489 * "this" side of the bridge, drop it.
5490 */
5491 if ((m->m_flags & (M_BCAST | M_MCAST)) == 0) {
5492 /* unicast */
5493 dst_if = bridge_rtlookup(sc, dst, vlan);
5494 if (src_if == dst_if) {
5495 goto drop;
5496 }
5497 } else {
5498 /* broadcast/multicast */
5499
5500 /*
5501 * Check if its a reserved multicast address, any address
5502 * listed in 802.1D section 7.12.6 may not be forwarded by the
5503 * bridge.
5504 * This is currently 01-80-C2-00-00-00 to 01-80-C2-00-00-0F
5505 */
5506 if (dst[0] == 0x01 && dst[1] == 0x80 &&
5507 dst[2] == 0xc2 && dst[3] == 0x00 &&
5508 dst[4] == 0x00 && dst[5] <= 0x0f) {
5509 goto drop;
5510 }
5511
5512
5513 /* ...forward it to all interfaces. */
5514 atomic_add_64(&bridge_ifp->if_imcasts, 1);
5515 dst_if = NULL;
5516 }
5517
5518 /*
5519 * If we have a destination interface which is a member of our bridge,
5520 * OR this is a unicast packet, push it through the bpf(4) machinery.
5521 * For broadcast or multicast packets, don't bother because it will
5522 * be reinjected into ether_input. We do this before we pass the packets
5523 * through the pfil(9) framework, as it is possible that pfil(9) will
5524 * drop the packet, or possibly modify it, making it difficult to debug
5525 * firewall issues on the bridge.
5526 */
5527 #if NBPFILTER > 0
5528 if (eh->ether_type == htons(ETHERTYPE_RSN_PREAUTH) ||
5529 dst_if != NULL || (m->m_flags & (M_BCAST | M_MCAST)) == 0) {
5530 m->m_pkthdr.rcvif = bridge_ifp;
5531 BRIDGE_BPF_MTAP_INPUT(sc, m);
5532 }
5533 #endif /* NBPFILTER */
5534
5535 if (dst_if == NULL) {
5536 /* bridge_broadcast will unlock */
5537 bridge_broadcast(sc, sbif, m, 1);
5538 return;
5539 }
5540
5541 /*
5542 * Unicast.
5543 */
5544 /*
5545 * At this point, we're dealing with a unicast frame
5546 * going to a different interface.
5547 */
5548 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5549 goto drop;
5550 }
5551
5552 dbif = bridge_lookup_member_if(sc, dst_if);
5553 if (dbif == NULL) {
5554 /* Not a member of the bridge (anymore?) */
5555 goto drop;
5556 }
5557
5558 /* Private segments can not talk to each other */
5559 if (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE) {
5560 goto drop;
5561 }
5562
5563 if ((dbif->bif_ifflags & IFBIF_STP) &&
5564 dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5565 goto drop;
5566 }
5567
5568 #if HAS_DHCPRA_MASK
5569 /* APPLE MODIFICATION <rdar:6985737> */
5570 if ((dst_if->if_extflags & IFEXTF_DHCPRA_MASK) != 0) {
5571 m = ip_xdhcpra_output(dst_if, m);
5572 if (!m) {
5573 ++bridge_ifp.if_xdhcpra;
5574 BRIDGE_UNLOCK(sc);
5575 return;
5576 }
5577 }
5578 #endif /* HAS_DHCPRA_MASK */
5579
5580 if (dbif == sc->sc_mac_nat_bif) {
5581 /* determine how to translate the packet */
5582 translate_mac
5583 = bridge_mac_nat_output(sc, sbif, &m, &mnr);
5584 if (m == NULL) {
5585 /* packet was deallocated */
5586 BRIDGE_UNLOCK(sc);
5587 return;
5588 }
5589 } else if (bif_has_checksum_offload(dbif) &&
5590 !bif_has_checksum_offload(sbif)) {
5591 /*
5592 * If the destination interface has checksum offload enabled,
5593 * verify the checksum now, unless the source interface also has
5594 * checksum offload enabled. The checksum in that case has
5595 * already just been computed and verifying it is unnecessary.
5596 */
5597 error = bridge_verify_checksum(&m, &dbif->bif_stats);
5598 if (error != 0) {
5599 BRIDGE_UNLOCK(sc);
5600 if (m != NULL) {
5601 m_freem(m);
5602 }
5603 return;
5604 }
5605 }
5606
5607 sc_filter_flags = sc->sc_filter_flags;
5608
5609 BRIDGE_UNLOCK(sc);
5610 if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
5611 if (bridge_pf(&m, dst_if, sc_filter_flags, FALSE) != 0) {
5612 return;
5613 }
5614 if (m == NULL) {
5615 return;
5616 }
5617 }
5618
5619 /* if we need to, translate the MAC address */
5620 if (translate_mac) {
5621 bridge_mac_nat_translate(&m, &mnr, IF_LLADDR(dst_if));
5622 }
5623 /*
5624 * We're forwarding an inbound packet in which the checksum must
5625 * already have been computed and if required, verified.
5626 */
5627 if (m != NULL) {
5628 (void) bridge_enqueue(bridge_ifp, src_if, dst_if, m,
5629 CHECKSUM_OPERATION_CLEAR_OFFLOAD);
5630 }
5631 return;
5632
5633 drop:
5634 BRIDGE_UNLOCK(sc);
5635 m_freem(m);
5636 }
5637
5638 static void
5639 inject_input_packet(ifnet_t ifp, mbuf_t m)
5640 {
5641 mbuf_pkthdr_setrcvif(m, ifp);
5642 mbuf_pkthdr_setheader(m, mbuf_data(m));
5643 mbuf_setdata(m, (char *)mbuf_data(m) + ETHER_HDR_LEN,
5644 mbuf_len(m) - ETHER_HDR_LEN);
5645 mbuf_pkthdr_adjustlen(m, -ETHER_HDR_LEN);
5646 m->m_flags |= M_PROTO1; /* set to avoid loops */
5647 dlil_input_packet_list(ifp, m);
5648 return;
5649 }
5650
5651 static bool
5652 in_addr_is_ours(struct in_addr ip)
5653 {
5654 struct in_ifaddr *ia;
5655 bool ours = false;
5656
5657 lck_rw_lock_shared(&in_ifaddr_rwlock);
5658 TAILQ_FOREACH(ia, INADDR_HASH(ip.s_addr), ia_hash) {
5659 if (IA_SIN(ia)->sin_addr.s_addr == ip.s_addr) {
5660 ours = true;
5661 break;
5662 }
5663 }
5664 lck_rw_done(&in_ifaddr_rwlock);
5665 return ours;
5666 }
5667
5668 static bool
5669 in6_addr_is_ours(const struct in6_addr * ip6_p, uint32_t ifscope)
5670 {
5671 struct in6_ifaddr *ia6;
5672 bool ours = false;
5673
5674 if (in6_embedded_scope && IN6_IS_ADDR_LINKLOCAL(ip6_p)) {
5675 struct in6_addr dst_ip;
5676
5677 /* need to embed scope ID for comparison */
5678 bcopy(ip6_p, &dst_ip, sizeof(dst_ip));
5679 dst_ip.s6_addr16[1] = htons(ifscope);
5680 ip6_p = &dst_ip;
5681 }
5682 lck_rw_lock_shared(&in6_ifaddr_rwlock);
5683 TAILQ_FOREACH(ia6, IN6ADDR_HASH(ip6_p), ia6_hash) {
5684 if (in6_are_addr_equal_scoped(&ia6->ia_addr.sin6_addr, ip6_p,
5685 ia6->ia_addr.sin6_scope_id, ifscope)) {
5686 ours = true;
5687 break;
5688 }
5689 }
5690 lck_rw_done(&in6_ifaddr_rwlock);
5691 return ours;
5692 }
5693
5694 static void
5695 bridge_interface_input(ifnet_t bridge_ifp, mbuf_t m,
5696 bpf_packet_func bpf_input_func)
5697 {
5698 size_t byte_count;
5699 struct ether_header *eh;
5700 errno_t error;
5701 bool is_ipv4;
5702 int len;
5703 u_int mac_hlen;
5704 int pkt_count;
5705
5706 /* segment large packets before sending them up */
5707 if (if_bridge_segmentation == 0) {
5708 goto done;
5709 }
5710 len = m->m_pkthdr.len;
5711 if (len <= (bridge_ifp->if_mtu + ETHER_HDR_LEN)) {
5712 goto done;
5713 }
5714 eh = mtod(m, struct ether_header *);
5715 if (!ether_header_type_is_ip(eh, &is_ipv4)) {
5716 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5717 "large non IPv4/IPv6 packet");
5718 goto done;
5719 }
5720
5721 /*
5722 * We have a large IPv4/IPv6 TCP packet. Segment it if required.
5723 *
5724 * If gso_tcp() returns success (0), the packet(s) are
5725 * ready to be passed up. If the destination is a local IP address,
5726 * the packet will be passed up as a large, single packet.
5727 *
5728 * If gso_tcp() returns an error, the packet has already
5729 * been freed.
5730 */
5731 mac_hlen = sizeof(*eh);
5732 error = gso_tcp(bridge_ifp, &m, mac_hlen, is_ipv4, FALSE);
5733 if (error != 0) {
5734 return;
5735 }
5736
5737 done:
5738 pkt_count = 0;
5739 byte_count = 0;
5740 for (mbuf_t scan = m; scan != NULL; scan = scan->m_nextpkt) {
5741 /* Mark the packet as arriving on the bridge interface */
5742 mbuf_pkthdr_setrcvif(scan, bridge_ifp);
5743 mbuf_pkthdr_setheader(scan, mbuf_data(scan));
5744 if (bpf_input_func != NULL) {
5745 (*bpf_input_func)(bridge_ifp, scan);
5746 }
5747 mbuf_setdata(scan, (char *)mbuf_data(scan) + ETHER_HDR_LEN,
5748 mbuf_len(scan) - ETHER_HDR_LEN);
5749 mbuf_pkthdr_adjustlen(scan, -ETHER_HDR_LEN);
5750 byte_count += mbuf_pkthdr_len(scan);
5751 pkt_count++;
5752 }
5753 (void)ifnet_stat_increment_in(bridge_ifp, pkt_count, byte_count, 0);
5754 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
5755 "%s %d packet(s) %ld bytes",
5756 bridge_ifp->if_xname, pkt_count, byte_count);
5757 dlil_input_packet_list(bridge_ifp, m);
5758 return;
5759 }
5760
5761 static bool
5762 is_our_ip(ip_packet_info_t info_p, uint32_t ifscope)
5763 {
5764 bool ours;
5765
5766 if (info_p->ip_is_ipv4) {
5767 struct in_addr dst_ip;
5768
5769 bcopy(&info_p->ip_hdr.ip->ip_dst, &dst_ip, sizeof(dst_ip));
5770 ours = in_addr_is_ours(dst_ip);
5771 } else {
5772 ours = in6_addr_is_ours(&info_p->ip_hdr.ip6->ip6_dst, ifscope);
5773 }
5774 return ours;
5775 }
5776
5777 static inline errno_t
5778 bridge_vmnet_tag_input(ifnet_t bridge_ifp, ifnet_t ifp,
5779 const u_char * ether_dhost, mbuf_t *mp,
5780 bool is_broadcast, bool is_ip, bool is_ipv4,
5781 ip_packet_info * info_p, struct bripstats * stats_p,
5782 bool *info_initialized)
5783 {
5784 errno_t error = 0;
5785 bool is_local = false;
5786 struct pf_mtag *pf_mtag;
5787 u_int16_t tag = vmnet_tag;
5788
5789 *info_initialized = false;
5790 if (is_broadcast) {
5791 if (_ether_cmp(ether_dhost, etherbroadcastaddr) == 0) {
5792 tag = vmnet_broadcast_tag;
5793 } else {
5794 tag = vmnet_multicast_tag;
5795 }
5796 } else if (is_ip) {
5797 unsigned int mac_hlen = sizeof(struct ether_header);
5798
5799 bzero(stats_p, sizeof(*stats_p));
5800 *info_initialized = true;
5801 error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, info_p,
5802 stats_p);
5803 if (error != 0) {
5804 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_INPUT,
5805 "%s(%s) bridge_get_ip_proto failed %d",
5806 bridge_ifp->if_xname,
5807 ifp->if_xname, error);
5808 if (*mp == NULL) {
5809 return EJUSTRETURN;
5810 }
5811 } else {
5812 is_local = is_our_ip(info_p, bridge_ifp->if_index);
5813 if (is_local) {
5814 tag = vmnet_local_tag;
5815 }
5816 }
5817 }
5818 pf_mtag = pf_get_mtag(*mp);
5819 if (pf_mtag != NULL) {
5820 pf_mtag->pftag_tag = tag;
5821 }
5822 #if DEBUG || DEVELOPMENT
5823 {
5824 bool forced;
5825
5826 BRIDGE_ERROR_GET_FORCED(forced, BRIDGE_FORCE_ONE);
5827 if (forced) {
5828 m_freem(*mp);
5829 *mp = NULL;
5830 error = EJUSTRETURN;
5831 goto done;
5832 }
5833 BRIDGE_ERROR_GET_FORCED(forced, BRIDGE_FORCE_TWO);
5834 if (forced) {
5835 error = _EBADIP;
5836 goto done;
5837 }
5838 }
5839 done:
5840 #endif /* DEBUG || DEVELOPMENT */
5841 return error;
5842 }
5843
5844 static void
5845 bripstats_apply(struct bripstats *dst_p, const struct bripstats *src_p)
5846 {
5847 dst_p->bips_ip += src_p->bips_ip;
5848 dst_p->bips_ip6 += src_p->bips_ip6;
5849 dst_p->bips_udp += src_p->bips_udp;
5850 dst_p->bips_tcp += src_p->bips_tcp;
5851
5852 dst_p->bips_bad_ip += src_p->bips_bad_ip;
5853 dst_p->bips_bad_ip6 += src_p->bips_bad_ip6;
5854 dst_p->bips_bad_udp += src_p->bips_bad_udp;
5855 dst_p->bips_bad_tcp += src_p->bips_bad_tcp;
5856 }
5857
5858 static void
5859 bridge_bripstats_apply(ifnet_t ifp, const struct bripstats *stats_p)
5860 {
5861 struct bridge_iflist *bif;
5862 struct bridge_softc *sc = ifp->if_bridge;
5863
5864 BRIDGE_LOCK(sc);
5865 bif = bridge_lookup_member_if(sc, ifp);
5866 if (bif == NULL) {
5867 goto done;
5868 }
5869 if (!bif_has_checksum_offload(bif)) {
5870 goto done;
5871 }
5872 bripstats_apply(&bif->bif_stats.brms_in_ip, stats_p);
5873
5874 done:
5875 BRIDGE_UNLOCK(sc);
5876 return;
5877 }
5878
5879 /*
5880 * bridge_input:
5881 *
5882 * Filter input from a member interface. Queue the packet for
5883 * bridging if it is not for us.
5884 */
5885 errno_t
5886 bridge_input(struct ifnet *ifp, mbuf_t *data)
5887 {
5888 struct bridge_softc *sc = ifp->if_bridge;
5889 struct bridge_iflist *bif, *bif2;
5890 struct ether_header eh_in;
5891 bool is_ip = false;
5892 bool is_ipv4 = false;
5893 ifnet_t bridge_ifp;
5894 struct mbuf *mc, *mc2;
5895 unsigned int mac_hlen = sizeof(struct ether_header);
5896 uint16_t vlan;
5897 errno_t error;
5898 ip_packet_info info;
5899 struct bripstats stats;
5900 bool info_initialized = false;
5901 errno_t ip_packet_error = 0;
5902 bool is_broadcast;
5903 bool is_ip_broadcast = false;
5904 bool is_ifp_mac = false;
5905 mbuf_t m = *data;
5906 uint32_t sc_filter_flags = 0;
5907
5908 bridge_ifp = sc->sc_ifp;
5909 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
5910 "%s from %s m 0x%llx data 0x%llx",
5911 bridge_ifp->if_xname, ifp->if_xname,
5912 (uint64_t)VM_KERNEL_ADDRPERM(m),
5913 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)));
5914 if ((sc->sc_ifp->if_flags & IFF_RUNNING) == 0) {
5915 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
5916 "%s not running passing along",
5917 bridge_ifp->if_xname);
5918 return 0;
5919 }
5920
5921 vlan = VLANTAGOF(m);
5922
5923 #ifdef IFF_MONITOR
5924 /*
5925 * Implement support for bridge monitoring. If this flag has been
5926 * set on this interface, discard the packet once we push it through
5927 * the bpf(4) machinery, but before we do, increment the byte and
5928 * packet counters associated with this interface.
5929 */
5930 if ((bridge_ifp->if_flags & IFF_MONITOR) != 0) {
5931 m->m_pkthdr.rcvif = bridge_ifp;
5932 BRIDGE_BPF_MTAP_INPUT(sc, m);
5933 (void) ifnet_stat_increment_in(bridge_ifp, 1, m->m_pkthdr.len, 0);
5934 *data = NULL;
5935 m_freem(m);
5936 return EJUSTRETURN;
5937 }
5938 #endif /* IFF_MONITOR */
5939
5940 is_broadcast = (m->m_flags & (M_BCAST | M_MCAST)) != 0;
5941
5942 /*
5943 * Need to clear the promiscuous flag otherwise it will be
5944 * dropped by DLIL after processing filters
5945 */
5946 if ((mbuf_flags(m) & MBUF_PROMISC)) {
5947 mbuf_setflags_mask(m, 0, MBUF_PROMISC);
5948 }
5949
5950 /* copy the ethernet header */
5951 eh_in = *(mtod(m, struct ether_header *));
5952
5953 is_ip = ether_header_type_is_ip(&eh_in, &is_ipv4);
5954
5955 if (if_bridge_vmnet_pf_tagging != 0 && IFNET_IS_VMNET(ifp)) {
5956 /* tag packets coming from VMNET interfaces */
5957 ip_packet_error = bridge_vmnet_tag_input(bridge_ifp, ifp,
5958 eh_in.ether_dhost, data, is_broadcast, is_ip, is_ipv4,
5959 &info, &stats, &info_initialized);
5960 m = *data;
5961 if (m == NULL) {
5962 bridge_bripstats_apply(ifp, &stats);
5963 return EJUSTRETURN;
5964 }
5965 }
5966
5967 sc_filter_flags = sc->sc_filter_flags;
5968 if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
5969 error = bridge_pf(data, ifp, sc_filter_flags, TRUE);
5970 m = *data;
5971 if (error != 0 || m == NULL) {
5972 return EJUSTRETURN;
5973 }
5974 }
5975
5976 BRIDGE_LOCK(sc);
5977 bif = bridge_lookup_member_if(sc, ifp);
5978 if (bif == NULL) {
5979 BRIDGE_UNLOCK(sc);
5980 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
5981 "%s bridge_lookup_member_if failed",
5982 bridge_ifp->if_xname);
5983 return 0;
5984 }
5985 if (is_ip && bif_has_checksum_offload(bif)) {
5986 if (info_initialized) {
5987 bripstats_apply(&bif->bif_stats.brms_in_ip, &stats);
5988 } else {
5989 error = bridge_get_ip_proto(data, mac_hlen, is_ipv4,
5990 &info, &bif->bif_stats.brms_in_ip);
5991 if (error != 0) {
5992 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
5993 "%s(%s) bridge_get_ip_proto failed %d",
5994 bridge_ifp->if_xname,
5995 bif->bif_ifp->if_xname, error);
5996 ip_packet_error = error;
5997 }
5998 }
5999 if (ip_packet_error == 0) {
6000 /* need to compute IP/UDP/TCP/checksums */
6001 error = bridge_offload_checksum(data, &info,
6002 &bif->bif_stats);
6003 if (error != 0) {
6004 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
6005 "%s(%s) bridge_offload_checksum failed %d",
6006 bridge_ifp->if_xname,
6007 bif->bif_ifp->if_xname, error);
6008 ip_packet_error = error;
6009 }
6010 }
6011 if (ip_packet_error != 0) {
6012 BRIDGE_UNLOCK(sc);
6013 if (*data != NULL) {
6014 m_freem(*data);
6015 *data = NULL;
6016 }
6017 return EJUSTRETURN;
6018 }
6019 m = *data;
6020 }
6021
6022 if (bif->bif_flags & BIFF_HOST_FILTER) {
6023 error = bridge_host_filter(bif, data);
6024 if (error != 0) {
6025 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
6026 "%s bridge_host_filter failed",
6027 bif->bif_ifp->if_xname);
6028 BRIDGE_UNLOCK(sc);
6029 return EJUSTRETURN;
6030 }
6031 m = *data;
6032 }
6033
6034 if (!is_broadcast &&
6035 _ether_cmp(eh_in.ether_dhost, IF_LLADDR(ifp)) == 0) {
6036 /* the packet is unicast to the interface's MAC address */
6037 if (is_ip && sc->sc_mac_nat_bif == bif) {
6038 /* doing MAC-NAT, check if destination is IP broadcast */
6039 is_ip_broadcast = is_broadcast_ip_packet(data);
6040 if (*data == NULL) {
6041 BRIDGE_UNLOCK(sc);
6042 return EJUSTRETURN;
6043 }
6044 m = *data;
6045 }
6046 if (!is_ip_broadcast) {
6047 is_ifp_mac = TRUE;
6048 }
6049 }
6050
6051 bridge_span(sc, m);
6052
6053 if (is_broadcast || is_ip_broadcast) {
6054 if (is_broadcast && (m->m_flags & M_MCAST) != 0) {
6055 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MCAST,
6056 " multicast: "
6057 "%02x:%02x:%02x:%02x:%02x:%02x",
6058 eh_in.ether_dhost[0], eh_in.ether_dhost[1],
6059 eh_in.ether_dhost[2], eh_in.ether_dhost[3],
6060 eh_in.ether_dhost[4], eh_in.ether_dhost[5]);
6061 }
6062 /* Tap off 802.1D packets; they do not get forwarded. */
6063 if (is_broadcast &&
6064 _ether_cmp(eh_in.ether_dhost, bstp_etheraddr) == 0) {
6065 #if BRIDGESTP
6066 m = bstp_input(&bif->bif_stp, ifp, m);
6067 #else /* !BRIDGESTP */
6068 m_freem(m);
6069 m = NULL;
6070 #endif /* !BRIDGESTP */
6071 if (m == NULL) {
6072 BRIDGE_UNLOCK(sc);
6073 return EJUSTRETURN;
6074 }
6075 }
6076
6077 if ((bif->bif_ifflags & IFBIF_STP) &&
6078 bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6079 BRIDGE_UNLOCK(sc);
6080 return 0;
6081 }
6082
6083 /*
6084 * Make a deep copy of the packet and enqueue the copy
6085 * for bridge processing.
6086 */
6087 mc = m_dup(m, M_DONTWAIT);
6088 if (mc == NULL) {
6089 BRIDGE_UNLOCK(sc);
6090 return 0;
6091 }
6092
6093 /*
6094 * Perform the bridge forwarding function with the copy.
6095 *
6096 * Note that bridge_forward calls BRIDGE_UNLOCK
6097 */
6098 if (is_ip_broadcast) {
6099 struct ether_header *eh;
6100
6101 /* make the copy look like it is actually broadcast */
6102 mc->m_flags |= M_BCAST;
6103 eh = mtod(mc, struct ether_header *);
6104 bcopy(etherbroadcastaddr, eh->ether_dhost,
6105 ETHER_ADDR_LEN);
6106 }
6107 bridge_forward(sc, bif, mc);
6108
6109 /*
6110 * Reinject the mbuf as arriving on the bridge so we have a
6111 * chance at claiming multicast packets. We can not loop back
6112 * here from ether_input as a bridge is never a member of a
6113 * bridge.
6114 */
6115 VERIFY(bridge_ifp->if_bridge == NULL);
6116 mc2 = m_dup(m, M_DONTWAIT);
6117 if (mc2 != NULL) {
6118 /* Keep the layer3 header aligned */
6119 int i = min(mc2->m_pkthdr.len, max_protohdr);
6120 mc2 = m_copyup(mc2, i, ETHER_ALIGN);
6121 }
6122 if (mc2 != NULL) {
6123 /* mark packet as arriving on the bridge */
6124 mc2->m_pkthdr.rcvif = bridge_ifp;
6125 mc2->m_pkthdr.pkt_hdr = mbuf_data(mc2);
6126 BRIDGE_BPF_MTAP_INPUT(sc, mc2);
6127 (void) mbuf_setdata(mc2,
6128 (char *)mbuf_data(mc2) + ETHER_HDR_LEN,
6129 mbuf_len(mc2) - ETHER_HDR_LEN);
6130 (void) mbuf_pkthdr_adjustlen(mc2, -ETHER_HDR_LEN);
6131 (void) ifnet_stat_increment_in(bridge_ifp, 1,
6132 mbuf_pkthdr_len(mc2), 0);
6133 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MCAST,
6134 "%s mcast for us", bridge_ifp->if_xname);
6135 dlil_input_packet_list(bridge_ifp, mc2);
6136 }
6137
6138 /* Return the original packet for local processing. */
6139 return 0;
6140 }
6141
6142 if ((bif->bif_ifflags & IFBIF_STP) &&
6143 bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6144 BRIDGE_UNLOCK(sc);
6145 return 0;
6146 }
6147
6148 #ifdef DEV_CARP
6149 #define CARP_CHECK_WE_ARE_DST(iface) \
6150 ((iface)->if_carp &&\
6151 carp_forus((iface)->if_carp, eh_in.ether_dhost))
6152 #define CARP_CHECK_WE_ARE_SRC(iface) \
6153 ((iface)->if_carp &&\
6154 carp_forus((iface)->if_carp, eh_in.ether_shost))
6155 #else
6156 #define CARP_CHECK_WE_ARE_DST(iface) 0
6157 #define CARP_CHECK_WE_ARE_SRC(iface) 0
6158 #endif
6159
6160 #define PFIL_HOOKED_INET6 PFIL_HOOKED(&inet6_pfil_hook)
6161
6162 #define PFIL_PHYS(sc, ifp, m)
6163
6164 #define GRAB_OUR_PACKETS(iface) \
6165 if ((iface)->if_type == IFT_GIF) \
6166 continue; \
6167 /* It is destined for us. */ \
6168 if (_ether_cmp(IF_LLADDR((iface)), eh_in.ether_dhost) == 0 || \
6169 CARP_CHECK_WE_ARE_DST((iface))) { \
6170 if ((iface)->if_type == IFT_BRIDGE) { \
6171 BRIDGE_BPF_MTAP_INPUT(sc, m); \
6172 /* Filter on the physical interface. */ \
6173 PFIL_PHYS(sc, iface, m); \
6174 } else { \
6175 bpf_tap_in(iface, DLT_EN10MB, m, NULL, 0); \
6176 } \
6177 if (bif->bif_ifflags & IFBIF_LEARNING) { \
6178 error = bridge_rtupdate(sc, eh_in.ether_shost, \
6179 vlan, bif, 0, IFBAF_DYNAMIC); \
6180 if (error && bif->bif_addrmax) { \
6181 BRIDGE_UNLOCK(sc); \
6182 m_freem(m); \
6183 return (EJUSTRETURN); \
6184 } \
6185 } \
6186 BRIDGE_UNLOCK(sc); \
6187 inject_input_packet(iface, m); \
6188 return (EJUSTRETURN); \
6189 } \
6190 \
6191 /* We just received a packet that we sent out. */ \
6192 if (_ether_cmp(IF_LLADDR((iface)), eh_in.ether_shost) == 0 || \
6193 CARP_CHECK_WE_ARE_SRC((iface))) { \
6194 BRIDGE_UNLOCK(sc); \
6195 m_freem(m); \
6196 return (EJUSTRETURN); \
6197 }
6198
6199 /*
6200 * Unicast.
6201 */
6202
6203 /* handle MAC-NAT if enabled */
6204 if (is_ifp_mac && sc->sc_mac_nat_bif == bif) {
6205 ifnet_t dst_if;
6206 boolean_t is_input = FALSE;
6207
6208 dst_if = bridge_mac_nat_input(sc, data, &is_input);
6209 m = *data;
6210 if (dst_if == ifp) {
6211 /* our input packet */
6212 } else if (dst_if != NULL || m == NULL) {
6213 BRIDGE_UNLOCK(sc);
6214 if (dst_if != NULL) {
6215 ASSERT(m != NULL);
6216 if (is_input) {
6217 inject_input_packet(dst_if, m);
6218 } else {
6219 (void)bridge_enqueue(bridge_ifp, NULL,
6220 dst_if, m,
6221 CHECKSUM_OPERATION_CLEAR_OFFLOAD);
6222 }
6223 }
6224 return EJUSTRETURN;
6225 }
6226 }
6227
6228 /*
6229 * If the packet is for the bridge, pass it up for local processing.
6230 */
6231 if (_ether_cmp(eh_in.ether_dhost, IF_LLADDR(bridge_ifp)) == 0 ||
6232 CARP_CHECK_WE_ARE_DST(bridge_ifp)) {
6233 bpf_packet_func bpf_input_func = sc->sc_bpf_input;
6234
6235 /*
6236 * If the interface is learning, and the source
6237 * address is valid and not multicast, record
6238 * the address.
6239 */
6240 if (bif->bif_ifflags & IFBIF_LEARNING) {
6241 (void) bridge_rtupdate(sc, eh_in.ether_shost,
6242 vlan, bif, 0, IFBAF_DYNAMIC);
6243 }
6244 BRIDGE_UNLOCK(sc);
6245
6246 bridge_interface_input(bridge_ifp, m, bpf_input_func);
6247 return EJUSTRETURN;
6248 }
6249
6250 /*
6251 * if the destination of the packet is for the MAC address of
6252 * the member interface itself, then we don't need to forward
6253 * it -- just pass it back. Note that it'll likely just be
6254 * dropped by the stack, but if something else is bound to
6255 * the interface directly (for example, the wireless stats
6256 * protocol -- although that actually uses BPF right now),
6257 * then it will consume the packet
6258 *
6259 * ALSO, note that we do this check AFTER checking for the
6260 * bridge's own MAC address, because the bridge may be
6261 * using the SAME MAC address as one of its interfaces
6262 */
6263 if (is_ifp_mac) {
6264
6265 #ifdef VERY_VERY_VERY_DIAGNOSTIC
6266 BRIDGE_LOG(LOG_NOTICE, 0,
6267 "not forwarding packet bound for member interface");
6268 #endif
6269
6270 BRIDGE_UNLOCK(sc);
6271 return 0;
6272 }
6273
6274 /* Now check the remaining bridge members. */
6275 TAILQ_FOREACH(bif2, &sc->sc_iflist, bif_next) {
6276 if (bif2->bif_ifp != ifp) {
6277 GRAB_OUR_PACKETS(bif2->bif_ifp);
6278 }
6279 }
6280
6281 #undef CARP_CHECK_WE_ARE_DST
6282 #undef CARP_CHECK_WE_ARE_SRC
6283 #undef GRAB_OUR_PACKETS
6284
6285 /*
6286 * Perform the bridge forwarding function.
6287 *
6288 * Note that bridge_forward calls BRIDGE_UNLOCK
6289 */
6290 bridge_forward(sc, bif, m);
6291
6292 return EJUSTRETURN;
6293 }
6294
6295 /*
6296 * bridge_broadcast:
6297 *
6298 * Send a frame to all interfaces that are members of
6299 * the bridge, except for the one on which the packet
6300 * arrived.
6301 *
6302 * NOTE: Releases the lock on return.
6303 */
6304 static void
6305 bridge_broadcast(struct bridge_softc *sc, struct bridge_iflist * sbif,
6306 struct mbuf *m, int runfilt)
6307 {
6308 ifnet_t bridge_ifp;
6309 struct bridge_iflist *dbif;
6310 struct ifnet * src_if;
6311 struct mbuf *mc;
6312 struct mbuf *mc_in;
6313 struct ifnet *dst_if;
6314 int error = 0, used = 0;
6315 boolean_t bridge_if_out;
6316 ChecksumOperation cksum_op;
6317 struct mac_nat_record mnr;
6318 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
6319 boolean_t translate_mac = FALSE;
6320 uint32_t sc_filter_flags = 0;
6321
6322 bridge_ifp = sc->sc_ifp;
6323 if (sbif != NULL) {
6324 bridge_if_out = FALSE;
6325 src_if = sbif->bif_ifp;
6326 cksum_op = CHECKSUM_OPERATION_CLEAR_OFFLOAD;
6327 if (mac_nat_bif != NULL && sbif != mac_nat_bif) {
6328 /* get the translation record while holding the lock */
6329 translate_mac
6330 = bridge_mac_nat_output(sc, sbif, &m, &mnr);
6331 if (m == NULL) {
6332 /* packet was deallocated */
6333 BRIDGE_UNLOCK(sc);
6334 return;
6335 }
6336 }
6337 } else {
6338 /*
6339 * sbif is NULL when the bridge interface calls
6340 * bridge_broadcast().
6341 */
6342 bridge_if_out = TRUE;
6343 cksum_op = CHECKSUM_OPERATION_FINALIZE;
6344 sbif = NULL;
6345 src_if = NULL;
6346 }
6347
6348 BRIDGE_LOCK2REF(sc, error);
6349 if (error) {
6350 m_freem(m);
6351 return;
6352 }
6353
6354 TAILQ_FOREACH(dbif, &sc->sc_iflist, bif_next) {
6355 dst_if = dbif->bif_ifp;
6356 if (dst_if == src_if) {
6357 /* skip the interface that the packet came in on */
6358 continue;
6359 }
6360
6361 /* Private segments can not talk to each other */
6362 if (sbif != NULL &&
6363 (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE)) {
6364 continue;
6365 }
6366
6367 if ((dbif->bif_ifflags & IFBIF_STP) &&
6368 dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6369 continue;
6370 }
6371
6372 if ((dbif->bif_ifflags & IFBIF_DISCOVER) == 0 &&
6373 (m->m_flags & (M_BCAST | M_MCAST)) == 0) {
6374 continue;
6375 }
6376
6377 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6378 continue;
6379 }
6380
6381 if (!(dbif->bif_flags & BIFF_MEDIA_ACTIVE)) {
6382 continue;
6383 }
6384
6385 if (TAILQ_NEXT(dbif, bif_next) == NULL) {
6386 mc = m;
6387 used = 1;
6388 } else {
6389 mc = m_dup(m, M_DONTWAIT);
6390 if (mc == NULL) {
6391 (void) ifnet_stat_increment_out(bridge_ifp,
6392 0, 0, 1);
6393 continue;
6394 }
6395 }
6396
6397 /*
6398 * If broadcast input is enabled, do so only if this
6399 * is an input packet.
6400 */
6401 if (!bridge_if_out &&
6402 (dbif->bif_flags & BIFF_INPUT_BROADCAST) != 0) {
6403 mc_in = m_dup(mc, M_DONTWAIT);
6404 /* this could fail, but we continue anyways */
6405 } else {
6406 mc_in = NULL;
6407 }
6408
6409 /* out */
6410 if (translate_mac && mac_nat_bif == dbif) {
6411 /* translate the packet without holding the lock */
6412 bridge_mac_nat_translate(&mc, &mnr, IF_LLADDR(dst_if));
6413 }
6414
6415 sc_filter_flags = sc->sc_filter_flags;
6416 if (runfilt &&
6417 PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6418 if (used == 0) {
6419 /* Keep the layer3 header aligned */
6420 int i = min(mc->m_pkthdr.len, max_protohdr);
6421 mc = m_copyup(mc, i, ETHER_ALIGN);
6422 if (mc == NULL) {
6423 (void) ifnet_stat_increment_out(
6424 sc->sc_ifp, 0, 0, 1);
6425 if (mc_in != NULL) {
6426 m_freem(mc_in);
6427 mc_in = NULL;
6428 }
6429 continue;
6430 }
6431 }
6432 if (bridge_pf(&mc, dst_if, sc_filter_flags, FALSE) != 0) {
6433 if (mc_in != NULL) {
6434 m_freem(mc_in);
6435 mc_in = NULL;
6436 }
6437 continue;
6438 }
6439 if (mc == NULL) {
6440 if (mc_in != NULL) {
6441 m_freem(mc_in);
6442 mc_in = NULL;
6443 }
6444 continue;
6445 }
6446 }
6447
6448 if (mc != NULL) {
6449 /* verify checksum if necessary */
6450 if (bif_has_checksum_offload(dbif) && sbif != NULL &&
6451 !bif_has_checksum_offload(sbif)) {
6452 error = bridge_verify_checksum(&mc,
6453 &dbif->bif_stats);
6454 if (error != 0) {
6455 if (mc != NULL) {
6456 m_freem(mc);
6457 }
6458 mc = NULL;
6459 }
6460 }
6461 if (mc != NULL) {
6462 (void) bridge_enqueue(bridge_ifp,
6463 NULL, dst_if, mc, cksum_op);
6464 }
6465 }
6466
6467 /* in */
6468 if (mc_in == NULL) {
6469 continue;
6470 }
6471 bpf_tap_in(dst_if, DLT_EN10MB, mc_in, NULL, 0);
6472 mbuf_pkthdr_setrcvif(mc_in, dst_if);
6473 mbuf_pkthdr_setheader(mc_in, mbuf_data(mc_in));
6474 mbuf_setdata(mc_in, (char *)mbuf_data(mc_in) + ETHER_HDR_LEN,
6475 mbuf_len(mc_in) - ETHER_HDR_LEN);
6476 mbuf_pkthdr_adjustlen(mc_in, -ETHER_HDR_LEN);
6477 mc_in->m_flags |= M_PROTO1; /* set to avoid loops */
6478 dlil_input_packet_list(dst_if, mc_in);
6479 }
6480 if (used == 0) {
6481 m_freem(m);
6482 }
6483
6484
6485 BRIDGE_UNREF(sc);
6486 }
6487
6488 /*
6489 * bridge_span:
6490 *
6491 * Duplicate a packet out one or more interfaces that are in span mode,
6492 * the original mbuf is unmodified.
6493 */
6494 static void
6495 bridge_span(struct bridge_softc *sc, struct mbuf *m)
6496 {
6497 struct bridge_iflist *bif;
6498 struct ifnet *dst_if;
6499 struct mbuf *mc;
6500
6501 if (TAILQ_EMPTY(&sc->sc_spanlist)) {
6502 return;
6503 }
6504
6505 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
6506 dst_if = bif->bif_ifp;
6507
6508 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6509 continue;
6510 }
6511
6512 mc = m_copypacket(m, M_DONTWAIT);
6513 if (mc == NULL) {
6514 (void) ifnet_stat_increment_out(sc->sc_ifp, 0, 0, 1);
6515 continue;
6516 }
6517
6518 (void) bridge_enqueue(sc->sc_ifp, NULL, dst_if, mc,
6519 CHECKSUM_OPERATION_NONE);
6520 }
6521 }
6522
6523
6524 /*
6525 * bridge_rtupdate:
6526 *
6527 * Add a bridge routing entry.
6528 */
6529 static int
6530 bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst, uint16_t vlan,
6531 struct bridge_iflist *bif, int setflags, uint8_t flags)
6532 {
6533 struct bridge_rtnode *brt;
6534 int error;
6535
6536 BRIDGE_LOCK_ASSERT_HELD(sc);
6537
6538 /* Check the source address is valid and not multicast. */
6539 if (ETHER_IS_MULTICAST(dst) ||
6540 (dst[0] == 0 && dst[1] == 0 && dst[2] == 0 &&
6541 dst[3] == 0 && dst[4] == 0 && dst[5] == 0) != 0) {
6542 return EINVAL;
6543 }
6544
6545
6546 /* 802.1p frames map to vlan 1 */
6547 if (vlan == 0) {
6548 vlan = 1;
6549 }
6550
6551 /*
6552 * A route for this destination might already exist. If so,
6553 * update it, otherwise create a new one.
6554 */
6555 if ((brt = bridge_rtnode_lookup(sc, dst, vlan)) == NULL) {
6556 if (sc->sc_brtcnt >= sc->sc_brtmax) {
6557 sc->sc_brtexceeded++;
6558 return ENOSPC;
6559 }
6560 /* Check per interface address limits (if enabled) */
6561 if (bif->bif_addrmax && bif->bif_addrcnt >= bif->bif_addrmax) {
6562 bif->bif_addrexceeded++;
6563 return ENOSPC;
6564 }
6565
6566 /*
6567 * Allocate a new bridge forwarding node, and
6568 * initialize the expiration time and Ethernet
6569 * address.
6570 */
6571 brt = zalloc_noblock(bridge_rtnode_pool);
6572 if (brt == NULL) {
6573 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6574 "zalloc_nolock failed");
6575 return ENOMEM;
6576 }
6577 bzero(brt, sizeof(struct bridge_rtnode));
6578
6579 if (bif->bif_ifflags & IFBIF_STICKY) {
6580 brt->brt_flags = IFBAF_STICKY;
6581 } else {
6582 brt->brt_flags = IFBAF_DYNAMIC;
6583 }
6584
6585 memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
6586 brt->brt_vlan = vlan;
6587
6588
6589 if ((error = bridge_rtnode_insert(sc, brt)) != 0) {
6590 zfree(bridge_rtnode_pool, brt);
6591 return error;
6592 }
6593 brt->brt_dst = bif;
6594 bif->bif_addrcnt++;
6595 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6596 "added %02x:%02x:%02x:%02x:%02x:%02x "
6597 "on %s count %u hashsize %u",
6598 dst[0], dst[1], dst[2], dst[3], dst[4], dst[5],
6599 sc->sc_ifp->if_xname, sc->sc_brtcnt,
6600 sc->sc_rthash_size);
6601 }
6602
6603 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
6604 brt->brt_dst != bif) {
6605 brt->brt_dst->bif_addrcnt--;
6606 brt->brt_dst = bif;
6607 brt->brt_dst->bif_addrcnt++;
6608 }
6609
6610 if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6611 unsigned long now;
6612
6613 now = (unsigned long) net_uptime();
6614 brt->brt_expire = now + sc->sc_brttimeout;
6615 }
6616 if (setflags) {
6617 brt->brt_flags = flags;
6618 }
6619
6620
6621 return 0;
6622 }
6623
6624 /*
6625 * bridge_rtlookup:
6626 *
6627 * Lookup the destination interface for an address.
6628 */
6629 static struct ifnet *
6630 bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
6631 {
6632 struct bridge_rtnode *brt;
6633
6634 BRIDGE_LOCK_ASSERT_HELD(sc);
6635
6636 if ((brt = bridge_rtnode_lookup(sc, addr, vlan)) == NULL) {
6637 return NULL;
6638 }
6639
6640 return brt->brt_ifp;
6641 }
6642
6643 /*
6644 * bridge_rttrim:
6645 *
6646 * Trim the routine table so that we have a number
6647 * of routing entries less than or equal to the
6648 * maximum number.
6649 */
6650 static void
6651 bridge_rttrim(struct bridge_softc *sc)
6652 {
6653 struct bridge_rtnode *brt, *nbrt;
6654
6655 BRIDGE_LOCK_ASSERT_HELD(sc);
6656
6657 /* Make sure we actually need to do this. */
6658 if (sc->sc_brtcnt <= sc->sc_brtmax) {
6659 return;
6660 }
6661
6662 /* Force an aging cycle; this might trim enough addresses. */
6663 bridge_rtage(sc);
6664 if (sc->sc_brtcnt <= sc->sc_brtmax) {
6665 return;
6666 }
6667
6668 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6669 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6670 bridge_rtnode_destroy(sc, brt);
6671 if (sc->sc_brtcnt <= sc->sc_brtmax) {
6672 return;
6673 }
6674 }
6675 }
6676 }
6677
6678 /*
6679 * bridge_aging_timer:
6680 *
6681 * Aging periodic timer for the bridge routing table.
6682 */
6683 static void
6684 bridge_aging_timer(struct bridge_softc *sc)
6685 {
6686 BRIDGE_LOCK_ASSERT_HELD(sc);
6687
6688 bridge_rtage(sc);
6689 if ((sc->sc_ifp->if_flags & IFF_RUNNING) &&
6690 (sc->sc_flags & SCF_DETACHING) == 0) {
6691 sc->sc_aging_timer.bdc_sc = sc;
6692 sc->sc_aging_timer.bdc_func = bridge_aging_timer;
6693 sc->sc_aging_timer.bdc_ts.tv_sec = bridge_rtable_prune_period;
6694 bridge_schedule_delayed_call(&sc->sc_aging_timer);
6695 }
6696 }
6697
6698 /*
6699 * bridge_rtage:
6700 *
6701 * Perform an aging cycle.
6702 */
6703 static void
6704 bridge_rtage(struct bridge_softc *sc)
6705 {
6706 struct bridge_rtnode *brt, *nbrt;
6707 unsigned long now;
6708
6709 BRIDGE_LOCK_ASSERT_HELD(sc);
6710
6711 now = (unsigned long) net_uptime();
6712
6713 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6714 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6715 if (now >= brt->brt_expire) {
6716 bridge_rtnode_destroy(sc, brt);
6717 }
6718 }
6719 }
6720 if (sc->sc_mac_nat_bif != NULL) {
6721 bridge_mac_nat_age_entries(sc, now);
6722 }
6723 }
6724
6725 /*
6726 * bridge_rtflush:
6727 *
6728 * Remove all dynamic addresses from the bridge.
6729 */
6730 static void
6731 bridge_rtflush(struct bridge_softc *sc, int full)
6732 {
6733 struct bridge_rtnode *brt, *nbrt;
6734
6735 BRIDGE_LOCK_ASSERT_HELD(sc);
6736
6737 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6738 if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6739 bridge_rtnode_destroy(sc, brt);
6740 }
6741 }
6742 }
6743
6744 /*
6745 * bridge_rtdaddr:
6746 *
6747 * Remove an address from the table.
6748 */
6749 static int
6750 bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
6751 {
6752 struct bridge_rtnode *brt;
6753 int found = 0;
6754
6755 BRIDGE_LOCK_ASSERT_HELD(sc);
6756
6757 /*
6758 * If vlan is zero then we want to delete for all vlans so the lookup
6759 * may return more than one.
6760 */
6761 while ((brt = bridge_rtnode_lookup(sc, addr, vlan)) != NULL) {
6762 bridge_rtnode_destroy(sc, brt);
6763 found = 1;
6764 }
6765
6766 return found ? 0 : ENOENT;
6767 }
6768
6769 /*
6770 * bridge_rtdelete:
6771 *
6772 * Delete routes to a specific member interface.
6773 */
6774 static void
6775 bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int full)
6776 {
6777 struct bridge_rtnode *brt, *nbrt;
6778
6779 BRIDGE_LOCK_ASSERT_HELD(sc);
6780
6781 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6782 if (brt->brt_ifp == ifp && (full ||
6783 (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)) {
6784 bridge_rtnode_destroy(sc, brt);
6785 }
6786 }
6787 }
6788
6789 /*
6790 * bridge_rtable_init:
6791 *
6792 * Initialize the route table for this bridge.
6793 */
6794 static int
6795 bridge_rtable_init(struct bridge_softc *sc)
6796 {
6797 u_int32_t i;
6798
6799 sc->sc_rthash = kalloc_type(struct _bridge_rtnode_list,
6800 BRIDGE_RTHASH_SIZE, Z_WAITOK_ZERO_NOFAIL);
6801 sc->sc_rthash_size = BRIDGE_RTHASH_SIZE;
6802
6803 for (i = 0; i < sc->sc_rthash_size; i++) {
6804 LIST_INIT(&sc->sc_rthash[i]);
6805 }
6806
6807 sc->sc_rthash_key = RandomULong();
6808
6809 LIST_INIT(&sc->sc_rtlist);
6810
6811 return 0;
6812 }
6813
6814 /*
6815 * bridge_rthash_delayed_resize:
6816 *
6817 * Resize the routing table hash on a delayed thread call.
6818 */
6819 static void
6820 bridge_rthash_delayed_resize(struct bridge_softc *sc)
6821 {
6822 u_int32_t new_rthash_size = 0;
6823 u_int32_t old_rthash_size = 0;
6824 struct _bridge_rtnode_list *new_rthash = NULL;
6825 struct _bridge_rtnode_list *old_rthash = NULL;
6826 u_int32_t i;
6827 struct bridge_rtnode *brt;
6828 int error = 0;
6829
6830 BRIDGE_LOCK_ASSERT_HELD(sc);
6831
6832 /*
6833 * Four entries per hash bucket is our ideal load factor
6834 */
6835 if (sc->sc_brtcnt < sc->sc_rthash_size * 4) {
6836 goto out;
6837 }
6838
6839 /*
6840 * Doubling the number of hash buckets may be too simplistic
6841 * especially when facing a spike of new entries
6842 */
6843 new_rthash_size = sc->sc_rthash_size * 2;
6844
6845 sc->sc_flags |= SCF_RESIZING;
6846 BRIDGE_UNLOCK(sc);
6847
6848 new_rthash = kalloc_type(struct _bridge_rtnode_list, new_rthash_size,
6849 Z_WAITOK | Z_ZERO);
6850
6851 BRIDGE_LOCK(sc);
6852 sc->sc_flags &= ~SCF_RESIZING;
6853
6854 if (new_rthash == NULL) {
6855 error = ENOMEM;
6856 goto out;
6857 }
6858 if ((sc->sc_flags & SCF_DETACHING)) {
6859 error = ENODEV;
6860 goto out;
6861 }
6862 /*
6863 * Fail safe from here on
6864 */
6865 old_rthash = sc->sc_rthash;
6866 old_rthash_size = sc->sc_rthash_size;
6867 sc->sc_rthash = new_rthash;
6868 sc->sc_rthash_size = new_rthash_size;
6869
6870 /*
6871 * Get a new key to force entries to be shuffled around to reduce
6872 * the likelihood they will land in the same buckets
6873 */
6874 sc->sc_rthash_key = RandomULong();
6875
6876 for (i = 0; i < sc->sc_rthash_size; i++) {
6877 LIST_INIT(&sc->sc_rthash[i]);
6878 }
6879
6880 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
6881 LIST_REMOVE(brt, brt_hash);
6882 (void) bridge_rtnode_hash(sc, brt);
6883 }
6884 out:
6885 if (error == 0) {
6886 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6887 "%s new size %u",
6888 sc->sc_ifp->if_xname, sc->sc_rthash_size);
6889 kfree_type(struct _bridge_rtnode_list, old_rthash_size, old_rthash);
6890 } else {
6891 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_RT_TABLE,
6892 "%s failed %d", sc->sc_ifp->if_xname, error);
6893 kfree_type(struct _bridge_rtnode_list, new_rthash_size, new_rthash);
6894 }
6895 }
6896
6897 /*
6898 * Resize the number of hash buckets based on the load factor
6899 * Currently only grow
6900 * Failing to resize the hash table is not fatal
6901 */
6902 static void
6903 bridge_rthash_resize(struct bridge_softc *sc)
6904 {
6905 BRIDGE_LOCK_ASSERT_HELD(sc);
6906
6907 if ((sc->sc_flags & SCF_DETACHING) || (sc->sc_flags & SCF_RESIZING)) {
6908 return;
6909 }
6910
6911 /*
6912 * Four entries per hash bucket is our ideal load factor
6913 */
6914 if (sc->sc_brtcnt < sc->sc_rthash_size * 4) {
6915 return;
6916 }
6917 /*
6918 * Hard limit on the size of the routing hash table
6919 */
6920 if (sc->sc_rthash_size >= bridge_rtable_hash_size_max) {
6921 return;
6922 }
6923
6924 sc->sc_resize_call.bdc_sc = sc;
6925 sc->sc_resize_call.bdc_func = bridge_rthash_delayed_resize;
6926 bridge_schedule_delayed_call(&sc->sc_resize_call);
6927 }
6928
6929 /*
6930 * bridge_rtable_fini:
6931 *
6932 * Deconstruct the route table for this bridge.
6933 */
6934 static void
6935 bridge_rtable_fini(struct bridge_softc *sc)
6936 {
6937 KASSERT(sc->sc_brtcnt == 0,
6938 ("%s: %d bridge routes referenced", __func__, sc->sc_brtcnt));
6939 kfree_type(struct _bridge_rtnode_list, sc->sc_rthash_size,
6940 sc->sc_rthash);
6941 sc->sc_rthash = NULL;
6942 sc->sc_rthash_size = 0;
6943 }
6944
6945 /*
6946 * The following hash function is adapted from "Hash Functions" by Bob Jenkins
6947 * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
6948 */
6949 #define mix(a, b, c) \
6950 do { \
6951 a -= b; a -= c; a ^= (c >> 13); \
6952 b -= c; b -= a; b ^= (a << 8); \
6953 c -= a; c -= b; c ^= (b >> 13); \
6954 a -= b; a -= c; a ^= (c >> 12); \
6955 b -= c; b -= a; b ^= (a << 16); \
6956 c -= a; c -= b; c ^= (b >> 5); \
6957 a -= b; a -= c; a ^= (c >> 3); \
6958 b -= c; b -= a; b ^= (a << 10); \
6959 c -= a; c -= b; c ^= (b >> 15); \
6960 } while ( /*CONSTCOND*/ 0)
6961
6962 static __inline uint32_t
6963 bridge_rthash(struct bridge_softc *sc, const uint8_t *addr)
6964 {
6965 uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
6966
6967 b += addr[5] << 8;
6968 b += addr[4];
6969 a += addr[3] << 24;
6970 a += addr[2] << 16;
6971 a += addr[1] << 8;
6972 a += addr[0];
6973
6974 mix(a, b, c);
6975
6976 return c & BRIDGE_RTHASH_MASK(sc);
6977 }
6978
6979 #undef mix
6980
6981 static int
6982 bridge_rtnode_addr_cmp(const uint8_t *a, const uint8_t *b)
6983 {
6984 int i, d;
6985
6986 for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) {
6987 d = ((int)a[i]) - ((int)b[i]);
6988 }
6989
6990 return d;
6991 }
6992
6993 /*
6994 * bridge_rtnode_lookup:
6995 *
6996 * Look up a bridge route node for the specified destination. Compare the
6997 * vlan id or if zero then just return the first match.
6998 */
6999 static struct bridge_rtnode *
7000 bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr,
7001 uint16_t vlan)
7002 {
7003 struct bridge_rtnode *brt;
7004 uint32_t hash;
7005 int dir;
7006
7007 BRIDGE_LOCK_ASSERT_HELD(sc);
7008
7009 hash = bridge_rthash(sc, addr);
7010 LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) {
7011 dir = bridge_rtnode_addr_cmp(addr, brt->brt_addr);
7012 if (dir == 0 && (brt->brt_vlan == vlan || vlan == 0)) {
7013 return brt;
7014 }
7015 if (dir > 0) {
7016 return NULL;
7017 }
7018 }
7019
7020 return NULL;
7021 }
7022
7023 /*
7024 * bridge_rtnode_hash:
7025 *
7026 * Insert the specified bridge node into the route hash table.
7027 * This is used when adding a new node or to rehash when resizing
7028 * the hash table
7029 */
7030 static int
7031 bridge_rtnode_hash(struct bridge_softc *sc, struct bridge_rtnode *brt)
7032 {
7033 struct bridge_rtnode *lbrt;
7034 uint32_t hash;
7035 int dir;
7036
7037 BRIDGE_LOCK_ASSERT_HELD(sc);
7038
7039 hash = bridge_rthash(sc, brt->brt_addr);
7040
7041 lbrt = LIST_FIRST(&sc->sc_rthash[hash]);
7042 if (lbrt == NULL) {
7043 LIST_INSERT_HEAD(&sc->sc_rthash[hash], brt, brt_hash);
7044 goto out;
7045 }
7046
7047 do {
7048 dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr);
7049 if (dir == 0 && brt->brt_vlan == lbrt->brt_vlan) {
7050 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
7051 "%s EEXIST %02x:%02x:%02x:%02x:%02x:%02x",
7052 sc->sc_ifp->if_xname,
7053 brt->brt_addr[0], brt->brt_addr[1],
7054 brt->brt_addr[2], brt->brt_addr[3],
7055 brt->brt_addr[4], brt->brt_addr[5]);
7056 return EEXIST;
7057 }
7058 if (dir > 0) {
7059 LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
7060 goto out;
7061 }
7062 if (LIST_NEXT(lbrt, brt_hash) == NULL) {
7063 LIST_INSERT_AFTER(lbrt, brt, brt_hash);
7064 goto out;
7065 }
7066 lbrt = LIST_NEXT(lbrt, brt_hash);
7067 } while (lbrt != NULL);
7068
7069 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
7070 "%s impossible %02x:%02x:%02x:%02x:%02x:%02x",
7071 sc->sc_ifp->if_xname,
7072 brt->brt_addr[0], brt->brt_addr[1], brt->brt_addr[2],
7073 brt->brt_addr[3], brt->brt_addr[4], brt->brt_addr[5]);
7074 out:
7075 return 0;
7076 }
7077
7078 /*
7079 * bridge_rtnode_insert:
7080 *
7081 * Insert the specified bridge node into the route table. We
7082 * assume the entry is not already in the table.
7083 */
7084 static int
7085 bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
7086 {
7087 int error;
7088
7089 error = bridge_rtnode_hash(sc, brt);
7090 if (error != 0) {
7091 return error;
7092 }
7093
7094 LIST_INSERT_HEAD(&sc->sc_rtlist, brt, brt_list);
7095 sc->sc_brtcnt++;
7096
7097 bridge_rthash_resize(sc);
7098
7099 return 0;
7100 }
7101
7102 /*
7103 * bridge_rtnode_destroy:
7104 *
7105 * Destroy a bridge rtnode.
7106 */
7107 static void
7108 bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
7109 {
7110 BRIDGE_LOCK_ASSERT_HELD(sc);
7111
7112 LIST_REMOVE(brt, brt_hash);
7113
7114 LIST_REMOVE(brt, brt_list);
7115 sc->sc_brtcnt--;
7116 brt->brt_dst->bif_addrcnt--;
7117 zfree(bridge_rtnode_pool, brt);
7118 }
7119
7120 #if BRIDGESTP
7121 /*
7122 * bridge_rtable_expire:
7123 *
7124 * Set the expiry time for all routes on an interface.
7125 */
7126 static void
7127 bridge_rtable_expire(struct ifnet *ifp, int age)
7128 {
7129 struct bridge_softc *sc = ifp->if_bridge;
7130 struct bridge_rtnode *brt;
7131
7132 BRIDGE_LOCK(sc);
7133
7134 /*
7135 * If the age is zero then flush, otherwise set all the expiry times to
7136 * age for the interface
7137 */
7138 if (age == 0) {
7139 bridge_rtdelete(sc, ifp, IFBF_FLUSHDYN);
7140 } else {
7141 unsigned long now;
7142
7143 now = (unsigned long) net_uptime();
7144
7145 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
7146 /* Cap the expiry time to 'age' */
7147 if (brt->brt_ifp == ifp &&
7148 brt->brt_expire > now + age &&
7149 (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
7150 brt->brt_expire = now + age;
7151 }
7152 }
7153 }
7154 BRIDGE_UNLOCK(sc);
7155 }
7156
7157 /*
7158 * bridge_state_change:
7159 *
7160 * Callback from the bridgestp code when a port changes states.
7161 */
7162 static void
7163 bridge_state_change(struct ifnet *ifp, int state)
7164 {
7165 struct bridge_softc *sc = ifp->if_bridge;
7166 static const char *stpstates[] = {
7167 "disabled",
7168 "listening",
7169 "learning",
7170 "forwarding",
7171 "blocking",
7172 "discarding"
7173 };
7174
7175 if (log_stp) {
7176 log(LOG_NOTICE, "%s: state changed to %s on %s",
7177 sc->sc_ifp->if_xname,
7178 stpstates[state], ifp->if_xname);
7179 }
7180 }
7181 #endif /* BRIDGESTP */
7182
7183 /*
7184 * bridge_set_bpf_tap:
7185 *
7186 * Sets ups the BPF callbacks.
7187 */
7188 static errno_t
7189 bridge_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func bpf_callback)
7190 {
7191 struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7192
7193 /* TBD locking */
7194 if (sc == NULL || (sc->sc_flags & SCF_DETACHING)) {
7195 return ENODEV;
7196 }
7197 switch (mode) {
7198 case BPF_TAP_DISABLE:
7199 sc->sc_bpf_input = sc->sc_bpf_output = NULL;
7200 break;
7201
7202 case BPF_TAP_INPUT:
7203 sc->sc_bpf_input = bpf_callback;
7204 break;
7205
7206 case BPF_TAP_OUTPUT:
7207 sc->sc_bpf_output = bpf_callback;
7208 break;
7209
7210 case BPF_TAP_INPUT_OUTPUT:
7211 sc->sc_bpf_input = sc->sc_bpf_output = bpf_callback;
7212 break;
7213
7214 default:
7215 break;
7216 }
7217
7218 return 0;
7219 }
7220
7221 /*
7222 * bridge_detach:
7223 *
7224 * Callback when interface has been detached.
7225 */
7226 static void
7227 bridge_detach(ifnet_t ifp)
7228 {
7229 struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7230
7231 #if BRIDGESTP
7232 bstp_detach(&sc->sc_stp);
7233 #endif /* BRIDGESTP */
7234
7235 /* Tear down the routing table. */
7236 bridge_rtable_fini(sc);
7237
7238 lck_mtx_lock(&bridge_list_mtx);
7239 LIST_REMOVE(sc, sc_list);
7240 lck_mtx_unlock(&bridge_list_mtx);
7241
7242 ifnet_release(ifp);
7243
7244 lck_mtx_destroy(&sc->sc_mtx, &bridge_lock_grp);
7245 if_clone_softc_deallocate(&bridge_cloner, sc);
7246 }
7247
7248 /*
7249 * bridge_bpf_input:
7250 *
7251 * Invoke the input BPF callback if enabled
7252 */
7253 static errno_t
7254 bridge_bpf_input(ifnet_t ifp, struct mbuf *m, const char * func, int line)
7255 {
7256 struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7257 bpf_packet_func input_func = sc->sc_bpf_input;
7258
7259 if (input_func != NULL) {
7260 if (mbuf_pkthdr_rcvif(m) != ifp) {
7261 BRIDGE_LOG(LOG_NOTICE, 0,
7262 "%s.%d: rcvif: 0x%llx != ifp 0x%llx", func, line,
7263 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_rcvif(m)),
7264 (uint64_t)VM_KERNEL_ADDRPERM(ifp));
7265 }
7266 (*input_func)(ifp, m);
7267 }
7268 return 0;
7269 }
7270
7271 /*
7272 * bridge_bpf_output:
7273 *
7274 * Invoke the output BPF callback if enabled
7275 */
7276 static errno_t
7277 bridge_bpf_output(ifnet_t ifp, struct mbuf *m)
7278 {
7279 struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7280 bpf_packet_func output_func = sc->sc_bpf_output;
7281
7282 if (output_func != NULL) {
7283 (*output_func)(ifp, m);
7284 }
7285 return 0;
7286 }
7287
7288 /*
7289 * bridge_link_event:
7290 *
7291 * Report a data link event on an interface
7292 */
7293 static void
7294 bridge_link_event(struct ifnet *ifp, u_int32_t event_code)
7295 {
7296 struct event {
7297 u_int32_t ifnet_family;
7298 u_int32_t unit;
7299 char if_name[IFNAMSIZ];
7300 };
7301 _Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
7302 struct kern_event_msg *header = (struct kern_event_msg*)message;
7303 struct event *data = (struct event *)(header + 1);
7304
7305 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
7306 "%s event_code %u - %s", ifp->if_xname,
7307 event_code, dlil_kev_dl_code_str(event_code));
7308 header->total_size = sizeof(message);
7309 header->vendor_code = KEV_VENDOR_APPLE;
7310 header->kev_class = KEV_NETWORK_CLASS;
7311 header->kev_subclass = KEV_DL_SUBCLASS;
7312 header->event_code = event_code;
7313 data->ifnet_family = ifnet_family(ifp);
7314 data->unit = (u_int32_t)ifnet_unit(ifp);
7315 strlcpy(data->if_name, ifnet_name(ifp), IFNAMSIZ);
7316 ifnet_event(ifp, header);
7317 }
7318
7319 #define BRIDGE_HF_DROP(reason, func, line) { \
7320 bridge_hostfilter_stats.reason++; \
7321 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_HOSTFILTER, \
7322 "%s.%d" #reason, func, line); \
7323 error = EINVAL; \
7324 }
7325
7326 /*
7327 * Make sure this is a DHCP or Bootp request that match the host filter
7328 */
7329 static int
7330 bridge_dhcp_filter(struct bridge_iflist *bif, struct mbuf *m, size_t offset)
7331 {
7332 int error = EINVAL;
7333 struct dhcp dhcp;
7334
7335 /*
7336 * Note: We use the dhcp structure because bootp structure definition
7337 * is larger and some vendors do not pad the request
7338 */
7339 error = mbuf_copydata(m, offset, sizeof(struct dhcp), &dhcp);
7340 if (error != 0) {
7341 BRIDGE_HF_DROP(brhf_dhcp_too_small, __func__, __LINE__);
7342 goto done;
7343 }
7344 if (dhcp.dp_op != BOOTREQUEST) {
7345 BRIDGE_HF_DROP(brhf_dhcp_bad_op, __func__, __LINE__);
7346 goto done;
7347 }
7348 /*
7349 * The hardware address must be an exact match
7350 */
7351 if (dhcp.dp_htype != ARPHRD_ETHER) {
7352 BRIDGE_HF_DROP(brhf_dhcp_bad_htype, __func__, __LINE__);
7353 goto done;
7354 }
7355 if (dhcp.dp_hlen != ETHER_ADDR_LEN) {
7356 BRIDGE_HF_DROP(brhf_dhcp_bad_hlen, __func__, __LINE__);
7357 goto done;
7358 }
7359 if (bcmp(dhcp.dp_chaddr, bif->bif_hf_hwsrc,
7360 ETHER_ADDR_LEN) != 0) {
7361 BRIDGE_HF_DROP(brhf_dhcp_bad_chaddr, __func__, __LINE__);
7362 goto done;
7363 }
7364 /*
7365 * Client address must match the host address or be not specified
7366 */
7367 if (dhcp.dp_ciaddr.s_addr != bif->bif_hf_ipsrc.s_addr &&
7368 dhcp.dp_ciaddr.s_addr != INADDR_ANY) {
7369 BRIDGE_HF_DROP(brhf_dhcp_bad_ciaddr, __func__, __LINE__);
7370 goto done;
7371 }
7372 error = 0;
7373 done:
7374 return error;
7375 }
7376
7377 static int
7378 bridge_host_filter(struct bridge_iflist *bif, mbuf_t *data)
7379 {
7380 int error = EINVAL;
7381 struct ether_header *eh;
7382 static struct in_addr inaddr_any = { .s_addr = INADDR_ANY };
7383 mbuf_t m = *data;
7384
7385 eh = mtod(m, struct ether_header *);
7386
7387 /*
7388 * Restrict the source hardware address
7389 */
7390 if ((bif->bif_flags & BIFF_HF_HWSRC) == 0 ||
7391 bcmp(eh->ether_shost, bif->bif_hf_hwsrc,
7392 ETHER_ADDR_LEN) != 0) {
7393 BRIDGE_HF_DROP(brhf_bad_ether_srchw_addr, __func__, __LINE__);
7394 goto done;
7395 }
7396
7397 /*
7398 * Restrict Ethernet protocols to ARP and IP
7399 */
7400 if (eh->ether_type == htons(ETHERTYPE_ARP)) {
7401 struct ether_arp *ea;
7402 size_t minlen = sizeof(struct ether_header) +
7403 sizeof(struct ether_arp);
7404
7405 /*
7406 * Make the Ethernet and ARP headers contiguous
7407 */
7408 if (mbuf_pkthdr_len(m) < minlen) {
7409 BRIDGE_HF_DROP(brhf_arp_too_small, __func__, __LINE__);
7410 goto done;
7411 }
7412 if (mbuf_len(m) < minlen && mbuf_pullup(data, minlen) != 0) {
7413 BRIDGE_HF_DROP(brhf_arp_pullup_failed,
7414 __func__, __LINE__);
7415 goto done;
7416 }
7417 m = *data;
7418
7419 /*
7420 * Verify this is an ethernet/ip arp
7421 */
7422 eh = mtod(m, struct ether_header *);
7423 ea = (struct ether_arp *)(eh + 1);
7424 if (ea->arp_hrd != htons(ARPHRD_ETHER)) {
7425 BRIDGE_HF_DROP(brhf_arp_bad_hw_type,
7426 __func__, __LINE__);
7427 goto done;
7428 }
7429 if (ea->arp_pro != htons(ETHERTYPE_IP)) {
7430 BRIDGE_HF_DROP(brhf_arp_bad_pro_type,
7431 __func__, __LINE__);
7432 goto done;
7433 }
7434 /*
7435 * Verify the address lengths are correct
7436 */
7437 if (ea->arp_hln != ETHER_ADDR_LEN) {
7438 BRIDGE_HF_DROP(brhf_arp_bad_hw_len, __func__, __LINE__);
7439 goto done;
7440 }
7441 if (ea->arp_pln != sizeof(struct in_addr)) {
7442 BRIDGE_HF_DROP(brhf_arp_bad_pro_len,
7443 __func__, __LINE__);
7444 goto done;
7445 }
7446
7447 /*
7448 * Allow only ARP request or ARP reply
7449 */
7450 if (ea->arp_op != htons(ARPOP_REQUEST) &&
7451 ea->arp_op != htons(ARPOP_REPLY)) {
7452 BRIDGE_HF_DROP(brhf_arp_bad_op, __func__, __LINE__);
7453 goto done;
7454 }
7455 /*
7456 * Verify source hardware address matches
7457 */
7458 if (bcmp(ea->arp_sha, bif->bif_hf_hwsrc,
7459 ETHER_ADDR_LEN) != 0) {
7460 BRIDGE_HF_DROP(brhf_arp_bad_sha, __func__, __LINE__);
7461 goto done;
7462 }
7463 /*
7464 * Verify source protocol address:
7465 * May be null for an ARP probe
7466 */
7467 if (bcmp(ea->arp_spa, &bif->bif_hf_ipsrc.s_addr,
7468 sizeof(struct in_addr)) != 0 &&
7469 bcmp(ea->arp_spa, &inaddr_any,
7470 sizeof(struct in_addr)) != 0) {
7471 BRIDGE_HF_DROP(brhf_arp_bad_spa, __func__, __LINE__);
7472 goto done;
7473 }
7474 bridge_hostfilter_stats.brhf_arp_ok += 1;
7475 error = 0;
7476 } else if (eh->ether_type == htons(ETHERTYPE_IP)) {
7477 size_t minlen = sizeof(struct ether_header) + sizeof(struct ip);
7478 struct ip iphdr;
7479 size_t offset;
7480
7481 /*
7482 * Make the Ethernet and IP headers contiguous
7483 */
7484 if (mbuf_pkthdr_len(m) < minlen) {
7485 BRIDGE_HF_DROP(brhf_ip_too_small, __func__, __LINE__);
7486 goto done;
7487 }
7488 offset = sizeof(struct ether_header);
7489 error = mbuf_copydata(m, offset, sizeof(struct ip), &iphdr);
7490 if (error != 0) {
7491 BRIDGE_HF_DROP(brhf_ip_too_small, __func__, __LINE__);
7492 goto done;
7493 }
7494 /*
7495 * Verify the source IP address
7496 */
7497 if (iphdr.ip_p == IPPROTO_UDP) {
7498 struct udphdr udp;
7499
7500 minlen += sizeof(struct udphdr);
7501 if (mbuf_pkthdr_len(m) < minlen) {
7502 BRIDGE_HF_DROP(brhf_ip_too_small,
7503 __func__, __LINE__);
7504 goto done;
7505 }
7506
7507 /*
7508 * Allow all zero addresses for DHCP requests
7509 */
7510 if (iphdr.ip_src.s_addr != bif->bif_hf_ipsrc.s_addr &&
7511 iphdr.ip_src.s_addr != INADDR_ANY) {
7512 BRIDGE_HF_DROP(brhf_ip_bad_srcaddr,
7513 __func__, __LINE__);
7514 goto done;
7515 }
7516 offset = sizeof(struct ether_header) +
7517 (IP_VHL_HL(iphdr.ip_vhl) << 2);
7518 error = mbuf_copydata(m, offset,
7519 sizeof(struct udphdr), &udp);
7520 if (error != 0) {
7521 BRIDGE_HF_DROP(brhf_ip_too_small,
7522 __func__, __LINE__);
7523 goto done;
7524 }
7525 /*
7526 * Either it's a Bootp/DHCP packet that we like or
7527 * it's a UDP packet from the host IP as source address
7528 */
7529 if (udp.uh_sport == htons(IPPORT_BOOTPC) &&
7530 udp.uh_dport == htons(IPPORT_BOOTPS)) {
7531 minlen += sizeof(struct dhcp);
7532 if (mbuf_pkthdr_len(m) < minlen) {
7533 BRIDGE_HF_DROP(brhf_ip_too_small,
7534 __func__, __LINE__);
7535 goto done;
7536 }
7537 offset += sizeof(struct udphdr);
7538 error = bridge_dhcp_filter(bif, m, offset);
7539 if (error != 0) {
7540 goto done;
7541 }
7542 } else if (iphdr.ip_src.s_addr == INADDR_ANY) {
7543 BRIDGE_HF_DROP(brhf_ip_bad_srcaddr,
7544 __func__, __LINE__);
7545 goto done;
7546 }
7547 } else if (iphdr.ip_src.s_addr != bif->bif_hf_ipsrc.s_addr ||
7548 bif->bif_hf_ipsrc.s_addr == INADDR_ANY) {
7549 BRIDGE_HF_DROP(brhf_ip_bad_srcaddr, __func__, __LINE__);
7550 goto done;
7551 }
7552 /*
7553 * Allow only boring IP protocols
7554 */
7555 if (iphdr.ip_p != IPPROTO_TCP &&
7556 iphdr.ip_p != IPPROTO_UDP &&
7557 iphdr.ip_p != IPPROTO_ICMP &&
7558 iphdr.ip_p != IPPROTO_ESP &&
7559 iphdr.ip_p != IPPROTO_AH &&
7560 iphdr.ip_p != IPPROTO_GRE) {
7561 BRIDGE_HF_DROP(brhf_ip_bad_proto, __func__, __LINE__);
7562 goto done;
7563 }
7564 bridge_hostfilter_stats.brhf_ip_ok += 1;
7565 error = 0;
7566 } else {
7567 BRIDGE_HF_DROP(brhf_bad_ether_type, __func__, __LINE__);
7568 goto done;
7569 }
7570 done:
7571 if (error != 0) {
7572 if (BRIDGE_DBGF_ENABLED(BR_DBGF_HOSTFILTER)) {
7573 if (m) {
7574 brlog_mbuf_data(m, 0,
7575 sizeof(struct ether_header) +
7576 sizeof(struct ip));
7577 }
7578 }
7579
7580 if (m != NULL) {
7581 m_freem(m);
7582 }
7583 }
7584 return error;
7585 }
7586
7587 /*
7588 * MAC NAT
7589 */
7590
7591 static errno_t
7592 bridge_mac_nat_enable(struct bridge_softc *sc, struct bridge_iflist *bif)
7593 {
7594 errno_t error = 0;
7595
7596 BRIDGE_LOCK_ASSERT_HELD(sc);
7597
7598 if (IFNET_IS_VMNET(bif->bif_ifp)) {
7599 error = EINVAL;
7600 goto done;
7601 }
7602 if (sc->sc_mac_nat_bif != NULL) {
7603 if (sc->sc_mac_nat_bif != bif) {
7604 error = EBUSY;
7605 }
7606 goto done;
7607 }
7608 sc->sc_mac_nat_bif = bif;
7609 bif->bif_ifflags |= IFBIF_MAC_NAT;
7610 bridge_mac_nat_populate_entries(sc);
7611
7612 done:
7613 return error;
7614 }
7615
7616 static void
7617 bridge_mac_nat_disable(struct bridge_softc *sc)
7618 {
7619 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7620
7621 assert(mac_nat_bif != NULL);
7622 bridge_mac_nat_flush_entries(sc, mac_nat_bif);
7623 mac_nat_bif->bif_ifflags &= ~IFBIF_MAC_NAT;
7624 sc->sc_mac_nat_bif = NULL;
7625 return;
7626 }
7627
7628 static void
7629 mac_nat_entry_print2(struct mac_nat_entry *mne,
7630 char *ifname, const char *msg1, const char *msg2)
7631 {
7632 int af;
7633 char etopbuf[24];
7634 char ntopbuf[MAX_IPv6_STR_LEN];
7635 const char *space;
7636
7637 af = ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) ? AF_INET6 : AF_INET;
7638 ether_ntop(etopbuf, sizeof(etopbuf), mne->mne_mac);
7639 (void)inet_ntop(af, &mne->mne_u, ntopbuf, sizeof(ntopbuf));
7640 if (msg2 == NULL) {
7641 msg2 = "";
7642 space = "";
7643 } else {
7644 space = " ";
7645 }
7646 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7647 "%s %s%s%s %p (%s, %s, %s)",
7648 ifname, msg1, space, msg2, mne, mne->mne_bif->bif_ifp->if_xname,
7649 ntopbuf, etopbuf);
7650 }
7651
7652 static void
7653 mac_nat_entry_print(struct mac_nat_entry *mne,
7654 char *ifname, const char *msg)
7655 {
7656 mac_nat_entry_print2(mne, ifname, msg, NULL);
7657 }
7658
7659 static struct mac_nat_entry *
7660 bridge_lookup_mac_nat_entry(struct bridge_softc *sc, int af, void * ip)
7661 {
7662 struct mac_nat_entry *mne;
7663 struct mac_nat_entry *ret_mne = NULL;
7664
7665 if (af == AF_INET) {
7666 in_addr_t s_addr = ((struct in_addr *)ip)->s_addr;
7667
7668 LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
7669 if (mne->mne_ip.s_addr == s_addr) {
7670 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7671 mac_nat_entry_print(mne, sc->sc_if_xname,
7672 "found");
7673 }
7674 ret_mne = mne;
7675 break;
7676 }
7677 }
7678 } else {
7679 const struct in6_addr *ip6 = (const struct in6_addr *)ip;
7680
7681 LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
7682 if (IN6_ARE_ADDR_EQUAL(&mne->mne_ip6, ip6)) {
7683 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7684 mac_nat_entry_print(mne, sc->sc_if_xname,
7685 "found");
7686 }
7687 ret_mne = mne;
7688 break;
7689 }
7690 }
7691 }
7692 return ret_mne;
7693 }
7694
7695 static void
7696 bridge_destroy_mac_nat_entry(struct bridge_softc *sc,
7697 struct mac_nat_entry *mne, const char *reason)
7698 {
7699 LIST_REMOVE(mne, mne_list);
7700 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7701 mac_nat_entry_print(mne, sc->sc_if_xname, reason);
7702 }
7703 zfree(bridge_mne_pool, mne);
7704 sc->sc_mne_count--;
7705 }
7706
7707 static struct mac_nat_entry *
7708 bridge_create_mac_nat_entry(struct bridge_softc *sc,
7709 struct bridge_iflist *bif, int af, const void *ip, uint8_t *eaddr)
7710 {
7711 struct mac_nat_entry_list *list;
7712 struct mac_nat_entry *mne;
7713
7714 if (sc->sc_mne_count >= sc->sc_mne_max) {
7715 sc->sc_mne_allocation_failures++;
7716 return NULL;
7717 }
7718 mne = zalloc_noblock(bridge_mne_pool);
7719 if (mne == NULL) {
7720 sc->sc_mne_allocation_failures++;
7721 return NULL;
7722 }
7723 sc->sc_mne_count++;
7724 bzero(mne, sizeof(*mne));
7725 bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
7726 mne->mne_bif = bif;
7727 if (af == AF_INET) {
7728 bcopy(ip, &mne->mne_ip, sizeof(mne->mne_ip));
7729 list = &sc->sc_mne_list;
7730 } else {
7731 bcopy(ip, &mne->mne_ip6, sizeof(mne->mne_ip6));
7732 mne->mne_flags |= MNE_FLAGS_IPV6;
7733 list = &sc->sc_mne_list_v6;
7734 }
7735 LIST_INSERT_HEAD(list, mne, mne_list);
7736 mne->mne_expire = (unsigned long)net_uptime() + sc->sc_brttimeout;
7737 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7738 mac_nat_entry_print(mne, sc->sc_if_xname, "created");
7739 }
7740 return mne;
7741 }
7742
7743 static struct mac_nat_entry *
7744 bridge_update_mac_nat_entry(struct bridge_softc *sc,
7745 struct bridge_iflist *bif, int af, void *ip, uint8_t *eaddr)
7746 {
7747 struct mac_nat_entry *mne;
7748
7749 mne = bridge_lookup_mac_nat_entry(sc, af, ip);
7750 if (mne != NULL) {
7751 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7752
7753 if (mne->mne_bif == mac_nat_bif) {
7754 /* the MAC NAT interface takes precedence */
7755 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7756 if (mne->mne_bif != bif) {
7757 mac_nat_entry_print2(mne,
7758 sc->sc_if_xname, "reject",
7759 bif->bif_ifp->if_xname);
7760 }
7761 }
7762 } else if (mne->mne_bif != bif) {
7763 const char *old_if = mne->mne_bif->bif_ifp->if_xname;
7764
7765 mne->mne_bif = bif;
7766 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7767 mac_nat_entry_print2(mne,
7768 sc->sc_if_xname, "replaced",
7769 old_if);
7770 }
7771 bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
7772 }
7773 mne->mne_expire = (unsigned long)net_uptime() +
7774 sc->sc_brttimeout;
7775 } else {
7776 mne = bridge_create_mac_nat_entry(sc, bif, af, ip, eaddr);
7777 }
7778 return mne;
7779 }
7780
7781 static void
7782 bridge_mac_nat_flush_entries_common(struct bridge_softc *sc,
7783 struct mac_nat_entry_list *list, struct bridge_iflist *bif)
7784 {
7785 struct mac_nat_entry *mne;
7786 struct mac_nat_entry *tmne;
7787
7788 LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
7789 if (bif != NULL && mne->mne_bif != bif) {
7790 continue;
7791 }
7792 bridge_destroy_mac_nat_entry(sc, mne, "flushed");
7793 }
7794 }
7795
7796 /*
7797 * bridge_mac_nat_flush_entries:
7798 *
7799 * Flush MAC NAT entries for the specified member. Flush all entries if
7800 * the member is the one that requires MAC NAT, otherwise just flush the
7801 * ones for the specified member.
7802 */
7803 static void
7804 bridge_mac_nat_flush_entries(struct bridge_softc *sc, struct bridge_iflist * bif)
7805 {
7806 struct bridge_iflist *flush_bif;
7807
7808 flush_bif = (bif == sc->sc_mac_nat_bif) ? NULL : bif;
7809 bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list, flush_bif);
7810 bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list_v6, flush_bif);
7811 }
7812
7813 static void
7814 bridge_mac_nat_populate_entries(struct bridge_softc *sc)
7815 {
7816 errno_t error;
7817 ifnet_t ifp;
7818 ifaddr_t *list;
7819 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7820
7821 assert(mac_nat_bif != NULL);
7822 ifp = mac_nat_bif->bif_ifp;
7823 error = ifnet_get_address_list(ifp, &list);
7824 if (error != 0) {
7825 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7826 "ifnet_get_address_list(%s) failed %d",
7827 ifp->if_xname, error);
7828 return;
7829 }
7830 for (ifaddr_t *scan = list; *scan != NULL; scan++) {
7831 sa_family_t af;
7832 void *ip;
7833
7834 union {
7835 struct sockaddr sa;
7836 struct sockaddr_in sin;
7837 struct sockaddr_in6 sin6;
7838 } u;
7839 af = ifaddr_address_family(*scan);
7840 switch (af) {
7841 case AF_INET:
7842 case AF_INET6:
7843 error = ifaddr_address(*scan, &u.sa, sizeof(u));
7844 if (error != 0) {
7845 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7846 "ifaddr_address failed %d",
7847 error);
7848 break;
7849 }
7850 if (af == AF_INET) {
7851 ip = (void *)&u.sin.sin_addr;
7852 } else {
7853 if (IN6_IS_ADDR_LINKLOCAL(&u.sin6.sin6_addr)) {
7854 /* remove scope ID */
7855 u.sin6.sin6_addr.s6_addr16[1] = 0;
7856 }
7857 ip = (void *)&u.sin6.sin6_addr;
7858 }
7859 bridge_create_mac_nat_entry(sc, mac_nat_bif, af, ip,
7860 (uint8_t *)IF_LLADDR(ifp));
7861 break;
7862 default:
7863 break;
7864 }
7865 }
7866 ifnet_free_address_list(list);
7867 return;
7868 }
7869
7870 static void
7871 bridge_mac_nat_age_entries_common(struct bridge_softc *sc,
7872 struct mac_nat_entry_list *list, unsigned long now)
7873 {
7874 struct mac_nat_entry *mne;
7875 struct mac_nat_entry *tmne;
7876
7877 LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
7878 if (now >= mne->mne_expire) {
7879 bridge_destroy_mac_nat_entry(sc, mne, "aged out");
7880 }
7881 }
7882 }
7883
7884 static void
7885 bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long now)
7886 {
7887 if (sc->sc_mac_nat_bif == NULL) {
7888 return;
7889 }
7890 bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list, now);
7891 bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list_v6, now);
7892 }
7893
7894 static const char *
7895 get_in_out_string(boolean_t is_output)
7896 {
7897 return is_output ? "OUT" : "IN";
7898 }
7899
7900 /*
7901 * is_valid_arp_packet:
7902 * Verify that this is a valid ARP packet.
7903 *
7904 * Returns TRUE if the packet is valid, FALSE otherwise.
7905 */
7906 static boolean_t
7907 is_valid_arp_packet(mbuf_t *data, boolean_t is_output,
7908 struct ether_header **eh_p, struct ether_arp **ea_p)
7909 {
7910 struct ether_arp *ea;
7911 struct ether_header *eh;
7912 size_t minlen = sizeof(struct ether_header) + sizeof(struct ether_arp);
7913 boolean_t is_valid = FALSE;
7914 int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
7915
7916 if (mbuf_pkthdr_len(*data) < minlen) {
7917 BRIDGE_LOG(LOG_DEBUG, flags,
7918 "ARP %s short frame %lu < %lu",
7919 get_in_out_string(is_output),
7920 mbuf_pkthdr_len(*data), minlen);
7921 goto done;
7922 }
7923 if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
7924 BRIDGE_LOG(LOG_DEBUG, flags,
7925 "ARP %s size %lu mbuf_pullup fail",
7926 get_in_out_string(is_output),
7927 minlen);
7928 *data = NULL;
7929 goto done;
7930 }
7931
7932 /* validate ARP packet */
7933 eh = mtod(*data, struct ether_header *);
7934 ea = (struct ether_arp *)(eh + 1);
7935 if (ntohs(ea->arp_hrd) != ARPHRD_ETHER) {
7936 BRIDGE_LOG(LOG_DEBUG, flags,
7937 "ARP %s htype not ethernet",
7938 get_in_out_string(is_output));
7939 goto done;
7940 }
7941 if (ea->arp_hln != ETHER_ADDR_LEN) {
7942 BRIDGE_LOG(LOG_DEBUG, flags,
7943 "ARP %s hlen not ethernet",
7944 get_in_out_string(is_output));
7945 goto done;
7946 }
7947 if (ntohs(ea->arp_pro) != ETHERTYPE_IP) {
7948 BRIDGE_LOG(LOG_DEBUG, flags,
7949 "ARP %s ptype not IP",
7950 get_in_out_string(is_output));
7951 goto done;
7952 }
7953 if (ea->arp_pln != sizeof(struct in_addr)) {
7954 BRIDGE_LOG(LOG_DEBUG, flags,
7955 "ARP %s plen not IP",
7956 get_in_out_string(is_output));
7957 goto done;
7958 }
7959 is_valid = TRUE;
7960 *ea_p = ea;
7961 *eh_p = eh;
7962 done:
7963 return is_valid;
7964 }
7965
7966 static struct mac_nat_entry *
7967 bridge_mac_nat_arp_input(struct bridge_softc *sc, mbuf_t *data)
7968 {
7969 struct ether_arp *ea;
7970 struct ether_header *eh;
7971 struct mac_nat_entry *mne = NULL;
7972 u_short op;
7973 struct in_addr tpa;
7974
7975 if (!is_valid_arp_packet(data, FALSE, &eh, &ea)) {
7976 goto done;
7977 }
7978 op = ntohs(ea->arp_op);
7979 switch (op) {
7980 case ARPOP_REQUEST:
7981 case ARPOP_REPLY:
7982 /* only care about REQUEST and REPLY */
7983 break;
7984 default:
7985 goto done;
7986 }
7987
7988 /* check the target IP address for a NAT entry */
7989 bcopy(ea->arp_tpa, &tpa, sizeof(tpa));
7990 if (tpa.s_addr != 0) {
7991 mne = bridge_lookup_mac_nat_entry(sc, AF_INET, &tpa);
7992 }
7993 if (mne != NULL) {
7994 if (op == ARPOP_REPLY) {
7995 /* translate the MAC address */
7996 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7997 char mac_src[24];
7998 char mac_dst[24];
7999
8000 ether_ntop(mac_src, sizeof(mac_src),
8001 ea->arp_tha);
8002 ether_ntop(mac_dst, sizeof(mac_dst),
8003 mne->mne_mac);
8004 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8005 "%s %s ARP %s -> %s",
8006 sc->sc_if_xname,
8007 mne->mne_bif->bif_ifp->if_xname,
8008 mac_src, mac_dst);
8009 }
8010 bcopy(mne->mne_mac, ea->arp_tha, sizeof(ea->arp_tha));
8011 }
8012 } else {
8013 /* handle conflicting ARP (sender matches mne) */
8014 struct in_addr spa;
8015
8016 bcopy(ea->arp_spa, &spa, sizeof(spa));
8017 if (spa.s_addr != 0 && spa.s_addr != tpa.s_addr) {
8018 /* check the source IP for a NAT entry */
8019 mne = bridge_lookup_mac_nat_entry(sc, AF_INET, &spa);
8020 }
8021 }
8022
8023 done:
8024 return mne;
8025 }
8026
8027 static boolean_t
8028 bridge_mac_nat_arp_output(struct bridge_softc *sc,
8029 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8030 {
8031 struct ether_arp *ea;
8032 struct ether_header *eh;
8033 struct in_addr ip;
8034 struct mac_nat_entry *mne = NULL;
8035 u_short op;
8036 boolean_t translate = FALSE;
8037
8038 if (!is_valid_arp_packet(data, TRUE, &eh, &ea)) {
8039 goto done;
8040 }
8041 op = ntohs(ea->arp_op);
8042 switch (op) {
8043 case ARPOP_REQUEST:
8044 case ARPOP_REPLY:
8045 /* only care about REQUEST and REPLY */
8046 break;
8047 default:
8048 goto done;
8049 }
8050
8051 bcopy(ea->arp_spa, &ip, sizeof(ip));
8052 if (ip.s_addr == 0) {
8053 goto done;
8054 }
8055 /* XXX validate IP address: no multicast/broadcast */
8056 mne = bridge_update_mac_nat_entry(sc, bif, AF_INET, &ip, ea->arp_sha);
8057 if (mnr != NULL && mne != NULL) {
8058 /* record the offset to do the replacement */
8059 translate = TRUE;
8060 mnr->mnr_arp_offset = (char *)ea->arp_sha - (char *)eh;
8061 }
8062
8063 done:
8064 return translate;
8065 }
8066
8067 #define ETHER_IPV4_HEADER_LEN (sizeof(struct ether_header) + \
8068 + sizeof(struct ip))
8069 static struct ether_header *
8070 get_ether_ip_header(mbuf_t *data, boolean_t is_output)
8071 {
8072 struct ether_header *eh = NULL;
8073 int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
8074 size_t minlen = ETHER_IPV4_HEADER_LEN;
8075
8076 if (mbuf_pkthdr_len(*data) < minlen) {
8077 BRIDGE_LOG(LOG_DEBUG, flags,
8078 "IP %s short frame %lu < %lu",
8079 get_in_out_string(is_output),
8080 mbuf_pkthdr_len(*data), minlen);
8081 goto done;
8082 }
8083 if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
8084 BRIDGE_LOG(LOG_DEBUG, flags,
8085 "IP %s size %lu mbuf_pullup fail",
8086 get_in_out_string(is_output),
8087 minlen);
8088 *data = NULL;
8089 goto done;
8090 }
8091 eh = mtod(*data, struct ether_header *);
8092 done:
8093 return eh;
8094 }
8095
8096 static bool
8097 is_broadcast_ip_packet(mbuf_t *data)
8098 {
8099 struct ether_header *eh;
8100 uint16_t ether_type;
8101 bool is_broadcast = FALSE;
8102
8103 eh = mtod(*data, struct ether_header *);
8104 ether_type = ntohs(eh->ether_type);
8105 switch (ether_type) {
8106 case ETHERTYPE_IP:
8107 eh = get_ether_ip_header(data, FALSE);
8108 if (eh != NULL) {
8109 struct in_addr dst;
8110 struct ip *iphdr;
8111
8112 iphdr = (struct ip *)(void *)(eh + 1);
8113 bcopy(&iphdr->ip_dst, &dst, sizeof(dst));
8114 is_broadcast = (dst.s_addr == INADDR_BROADCAST);
8115 }
8116 break;
8117 default:
8118 break;
8119 }
8120 return is_broadcast;
8121 }
8122
8123 static struct mac_nat_entry *
8124 bridge_mac_nat_ip_input(struct bridge_softc *sc, mbuf_t *data)
8125 {
8126 struct in_addr dst;
8127 struct ether_header *eh;
8128 struct ip *iphdr;
8129 struct mac_nat_entry *mne = NULL;
8130
8131 eh = get_ether_ip_header(data, FALSE);
8132 if (eh == NULL) {
8133 goto done;
8134 }
8135 iphdr = (struct ip *)(void *)(eh + 1);
8136 bcopy(&iphdr->ip_dst, &dst, sizeof(dst));
8137 /* XXX validate IP address */
8138 if (dst.s_addr == 0) {
8139 goto done;
8140 }
8141 mne = bridge_lookup_mac_nat_entry(sc, AF_INET, &dst);
8142 done:
8143 return mne;
8144 }
8145
8146 static void
8147 bridge_mac_nat_udp_output(struct bridge_softc *sc,
8148 struct bridge_iflist *bif, mbuf_t m,
8149 uint8_t ip_header_len, struct mac_nat_record *mnr)
8150 {
8151 uint16_t dp_flags;
8152 errno_t error;
8153 size_t offset;
8154 struct udphdr udphdr;
8155
8156 /* copy the UDP header */
8157 offset = sizeof(struct ether_header) + ip_header_len;
8158 error = mbuf_copydata(m, offset, sizeof(struct udphdr), &udphdr);
8159 if (error != 0) {
8160 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8161 "mbuf_copydata udphdr failed %d",
8162 error);
8163 return;
8164 }
8165 if (ntohs(udphdr.uh_sport) != IPPORT_BOOTPC ||
8166 ntohs(udphdr.uh_dport) != IPPORT_BOOTPS) {
8167 /* not a BOOTP/DHCP packet */
8168 return;
8169 }
8170 /* check whether the broadcast bit is already set */
8171 offset += sizeof(struct udphdr) + offsetof(struct dhcp, dp_flags);
8172 error = mbuf_copydata(m, offset, sizeof(dp_flags), &dp_flags);
8173 if (error != 0) {
8174 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8175 "mbuf_copydata dp_flags failed %d",
8176 error);
8177 return;
8178 }
8179 if ((ntohs(dp_flags) & DHCP_FLAGS_BROADCAST) != 0) {
8180 /* it's already set, nothing to do */
8181 return;
8182 }
8183 /* broadcast bit needs to be set */
8184 mnr->mnr_ip_dhcp_flags = dp_flags | htons(DHCP_FLAGS_BROADCAST);
8185 mnr->mnr_ip_header_len = ip_header_len;
8186 if (udphdr.uh_sum != 0) {
8187 uint16_t delta;
8188
8189 /* adjust checksum to take modified dp_flags into account */
8190 delta = dp_flags - mnr->mnr_ip_dhcp_flags;
8191 mnr->mnr_ip_udp_csum = udphdr.uh_sum + delta;
8192 }
8193 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8194 "%s %s DHCP dp_flags 0x%x UDP cksum 0x%x",
8195 sc->sc_if_xname,
8196 bif->bif_ifp->if_xname,
8197 ntohs(mnr->mnr_ip_dhcp_flags),
8198 ntohs(mnr->mnr_ip_udp_csum));
8199 return;
8200 }
8201
8202 static boolean_t
8203 bridge_mac_nat_ip_output(struct bridge_softc *sc,
8204 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8205 {
8206 #pragma unused(mnr)
8207 struct ether_header *eh;
8208 struct in_addr ip;
8209 struct ip *iphdr;
8210 uint8_t ip_header_len;
8211 struct mac_nat_entry *mne = NULL;
8212 boolean_t translate = FALSE;
8213
8214 eh = get_ether_ip_header(data, TRUE);
8215 if (eh == NULL) {
8216 goto done;
8217 }
8218 iphdr = (struct ip *)(void *)(eh + 1);
8219 ip_header_len = IP_VHL_HL(iphdr->ip_vhl) << 2;
8220 if (ip_header_len < sizeof(ip)) {
8221 /* bogus IP header */
8222 goto done;
8223 }
8224 bcopy(&iphdr->ip_src, &ip, sizeof(ip));
8225 /* XXX validate the source address */
8226 if (ip.s_addr != 0) {
8227 mne = bridge_update_mac_nat_entry(sc, bif, AF_INET, &ip,
8228 eh->ether_shost);
8229 }
8230 if (mnr != NULL) {
8231 if (iphdr->ip_p == IPPROTO_UDP) {
8232 /* handle DHCP must broadcast */
8233 bridge_mac_nat_udp_output(sc, bif, *data,
8234 ip_header_len, mnr);
8235 }
8236 translate = TRUE;
8237 }
8238 done:
8239 return translate;
8240 }
8241
8242 #define ETHER_IPV6_HEADER_LEN (sizeof(struct ether_header) + \
8243 + sizeof(struct ip6_hdr))
8244 static struct ether_header *
8245 get_ether_ipv6_header(mbuf_t *data, boolean_t is_output)
8246 {
8247 struct ether_header *eh = NULL;
8248 int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
8249 size_t minlen = ETHER_IPV6_HEADER_LEN;
8250
8251 if (mbuf_pkthdr_len(*data) < minlen) {
8252 BRIDGE_LOG(LOG_DEBUG, flags,
8253 "IP %s short frame %lu < %lu",
8254 get_in_out_string(is_output),
8255 mbuf_pkthdr_len(*data), minlen);
8256 goto done;
8257 }
8258 if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
8259 BRIDGE_LOG(LOG_DEBUG, flags,
8260 "IP %s size %lu mbuf_pullup fail",
8261 get_in_out_string(is_output),
8262 minlen);
8263 *data = NULL;
8264 goto done;
8265 }
8266 eh = mtod(*data, struct ether_header *);
8267 done:
8268 return eh;
8269 }
8270
8271 #include <netinet/icmp6.h>
8272 #include <netinet6/nd6.h>
8273
8274 #define ETHER_ND_LLADDR_LEN (ETHER_ADDR_LEN + sizeof(struct nd_opt_hdr))
8275
8276 static void
8277 bridge_mac_nat_icmpv6_output(struct bridge_softc *sc, struct bridge_iflist *bif,
8278 mbuf_t *data, struct ether_header *eh,
8279 struct ip6_hdr *ip6h, struct in6_addr *saddrp, struct mac_nat_record *mnr)
8280 {
8281 struct icmp6_hdr *icmp6;
8282 unsigned int icmp6len;
8283 int lladdrlen = 0;
8284 char *lladdr = NULL;
8285 mbuf_t m = *data;
8286 unsigned int off = sizeof(*ip6h);
8287
8288 icmp6len = m->m_pkthdr.len - sizeof(*eh) - off;
8289 if (icmp6len < sizeof(*icmp6)) {
8290 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8291 "short packet %d < %lu",
8292 icmp6len, sizeof(*icmp6));
8293 return;
8294 }
8295 icmp6 = (struct icmp6_hdr *)((caddr_t)ip6h + off);
8296 switch (icmp6->icmp6_type) {
8297 case ND_NEIGHBOR_SOLICIT: {
8298 struct nd_neighbor_solicit *nd_ns;
8299 union nd_opts ndopts;
8300 boolean_t is_dad_probe;
8301 struct in6_addr taddr;
8302
8303 if (icmp6len < sizeof(*nd_ns)) {
8304 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8305 "short nd_ns %d < %lu",
8306 icmp6len, sizeof(*nd_ns));
8307 return;
8308 }
8309
8310 nd_ns = (struct nd_neighbor_solicit *)(void *)icmp6;
8311 bcopy(&nd_ns->nd_ns_target, &taddr, sizeof(taddr));
8312 if (IN6_IS_ADDR_MULTICAST(&taddr) ||
8313 IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
8314 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8315 "invalid target ignored");
8316 return;
8317 }
8318 /* parse options */
8319 nd6_option_init(nd_ns + 1, icmp6len - sizeof(*nd_ns), &ndopts);
8320 if (nd6_options(&ndopts) < 0) {
8321 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8322 "invalid ND6 NS option");
8323 return;
8324 }
8325 if (ndopts.nd_opts_src_lladdr != NULL) {
8326 lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
8327 lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
8328 }
8329 is_dad_probe = IN6_IS_ADDR_UNSPECIFIED(saddrp);
8330 if (lladdr != NULL) {
8331 if (is_dad_probe) {
8332 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8333 "bad ND6 DAD packet");
8334 return;
8335 }
8336 if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8337 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8338 "source lladdrlen %d != %lu",
8339 lladdrlen, ETHER_ND_LLADDR_LEN);
8340 return;
8341 }
8342 mnr->mnr_ip6_lladdr_offset = (uint16_t)((uintptr_t)lladdr -
8343 (uintptr_t)eh);
8344 mnr->mnr_ip6_icmp6_len = icmp6len;
8345 mnr->mnr_ip6_icmp6_type = icmp6->icmp6_type;
8346 mnr->mnr_ip6_header_len = off;
8347 }
8348 if (is_dad_probe) {
8349 /* node is trying use taddr, create an mne using taddr */
8350 *saddrp = taddr;
8351 }
8352 break;
8353 }
8354 case ND_NEIGHBOR_ADVERT: {
8355 struct nd_neighbor_advert *nd_na;
8356 union nd_opts ndopts;
8357 struct in6_addr taddr;
8358
8359
8360 nd_na = (struct nd_neighbor_advert *)(void *)icmp6;
8361
8362 if (icmp6len < sizeof(*nd_na)) {
8363 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8364 "short nd_na %d < %lu",
8365 icmp6len, sizeof(*nd_na));
8366 return;
8367 }
8368
8369 bcopy(&nd_na->nd_na_target, &taddr, sizeof(taddr));
8370 if (IN6_IS_ADDR_MULTICAST(&taddr) ||
8371 IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
8372 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8373 "invalid target ignored");
8374 return;
8375 }
8376 /* parse options */
8377 nd6_option_init(nd_na + 1, icmp6len - sizeof(*nd_na), &ndopts);
8378 if (nd6_options(&ndopts) < 0) {
8379 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8380 "invalid ND6 NA option");
8381 return;
8382 }
8383 if (ndopts.nd_opts_tgt_lladdr == NULL) {
8384 /* target linklayer, nothing to do */
8385 return;
8386 }
8387 lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1);
8388 lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
8389 if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8390 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8391 "target lladdrlen %d != %lu",
8392 lladdrlen, ETHER_ND_LLADDR_LEN);
8393 return;
8394 }
8395 mnr->mnr_ip6_lladdr_offset = (uint16_t)((uintptr_t)lladdr - (uintptr_t)eh);
8396 mnr->mnr_ip6_icmp6_len = icmp6len;
8397 mnr->mnr_ip6_header_len = off;
8398 mnr->mnr_ip6_icmp6_type = icmp6->icmp6_type;
8399 break;
8400 }
8401 case ND_ROUTER_SOLICIT: {
8402 struct nd_router_solicit *nd_rs;
8403 union nd_opts ndopts;
8404
8405 if (icmp6len < sizeof(*nd_rs)) {
8406 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8407 "short nd_rs %d < %lu",
8408 icmp6len, sizeof(*nd_rs));
8409 return;
8410 }
8411 nd_rs = (struct nd_router_solicit *)(void *)icmp6;
8412
8413 /* parse options */
8414 nd6_option_init(nd_rs + 1, icmp6len - sizeof(*nd_rs), &ndopts);
8415 if (nd6_options(&ndopts) < 0) {
8416 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8417 "invalid ND6 RS option");
8418 return;
8419 }
8420 if (ndopts.nd_opts_src_lladdr != NULL) {
8421 lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
8422 lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
8423 }
8424 if (lladdr != NULL) {
8425 if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8426 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8427 "source lladdrlen %d != %lu",
8428 lladdrlen, ETHER_ND_LLADDR_LEN);
8429 return;
8430 }
8431 mnr->mnr_ip6_lladdr_offset = (uint16_t)((uintptr_t)lladdr -
8432 (uintptr_t)eh);
8433 mnr->mnr_ip6_icmp6_len = icmp6len;
8434 mnr->mnr_ip6_icmp6_type = icmp6->icmp6_type;
8435 mnr->mnr_ip6_header_len = off;
8436 }
8437 break;
8438 }
8439 default:
8440 break;
8441 }
8442 if (mnr->mnr_ip6_lladdr_offset != 0 &&
8443 BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
8444 const char *str;
8445
8446 switch (mnr->mnr_ip6_icmp6_type) {
8447 case ND_ROUTER_SOLICIT:
8448 str = "ROUTER SOLICIT";
8449 break;
8450 case ND_NEIGHBOR_ADVERT:
8451 str = "NEIGHBOR ADVERT";
8452 break;
8453 case ND_NEIGHBOR_SOLICIT:
8454 str = "NEIGHBOR SOLICIT";
8455 break;
8456 default:
8457 str = "";
8458 break;
8459 }
8460 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8461 "%s %s %s ip6len %d icmp6len %d lladdr offset %d",
8462 sc->sc_if_xname, bif->bif_ifp->if_xname, str,
8463 mnr->mnr_ip6_header_len,
8464 mnr->mnr_ip6_icmp6_len, mnr->mnr_ip6_lladdr_offset);
8465 }
8466 }
8467
8468 static struct mac_nat_entry *
8469 bridge_mac_nat_ipv6_input(struct bridge_softc *sc, mbuf_t *data)
8470 {
8471 struct in6_addr dst;
8472 struct ether_header *eh;
8473 struct ip6_hdr *ip6h;
8474 struct mac_nat_entry *mne = NULL;
8475
8476 eh = get_ether_ipv6_header(data, FALSE);
8477 if (eh == NULL) {
8478 goto done;
8479 }
8480 ip6h = (struct ip6_hdr *)(void *)(eh + 1);
8481 bcopy(&ip6h->ip6_dst, &dst, sizeof(dst));
8482 /* XXX validate IPv6 address */
8483 if (IN6_IS_ADDR_UNSPECIFIED(&dst)) {
8484 goto done;
8485 }
8486 mne = bridge_lookup_mac_nat_entry(sc, AF_INET6, &dst);
8487
8488 done:
8489 return mne;
8490 }
8491
8492 static boolean_t
8493 bridge_mac_nat_ipv6_output(struct bridge_softc *sc,
8494 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8495 {
8496 struct ether_header *eh;
8497 struct ip6_hdr *ip6h;
8498 struct in6_addr saddr;
8499 boolean_t translate;
8500
8501 translate = (bif == sc->sc_mac_nat_bif) ? FALSE : TRUE;
8502 eh = get_ether_ipv6_header(data, TRUE);
8503 if (eh == NULL) {
8504 translate = FALSE;
8505 goto done;
8506 }
8507 ip6h = (struct ip6_hdr *)(void *)(eh + 1);
8508 bcopy(&ip6h->ip6_src, &saddr, sizeof(saddr));
8509 if (mnr != NULL && ip6h->ip6_nxt == IPPROTO_ICMPV6) {
8510 bridge_mac_nat_icmpv6_output(sc, bif, data,
8511 eh, ip6h, &saddr, mnr);
8512 }
8513 if (IN6_IS_ADDR_UNSPECIFIED(&saddr)) {
8514 goto done;
8515 }
8516 (void)bridge_update_mac_nat_entry(sc, bif, AF_INET6, &saddr,
8517 eh->ether_shost);
8518
8519 done:
8520 return translate;
8521 }
8522
8523 /*
8524 * bridge_mac_nat_input:
8525 * Process a packet arriving on the MAC NAT interface (sc_mac_nat_bif).
8526 * This interface is the "external" interface with respect to NAT.
8527 * The interface is only capable of receiving a single MAC address
8528 * (e.g. a Wi-Fi STA interface).
8529 *
8530 * When a packet arrives on the external interface, look up the destination
8531 * IP address in the mac_nat_entry table. If there is a match, *is_input
8532 * is set to TRUE if it's for the MAC NAT interface, otherwise *is_input
8533 * is set to FALSE and translate the MAC address if necessary.
8534 *
8535 * Returns:
8536 * The internal interface to direct the packet to, or NULL if the packet
8537 * should not be redirected.
8538 *
8539 * *data may be updated to point at a different mbuf chain, or set to NULL
8540 * if the chain was deallocated during processing.
8541 */
8542 static ifnet_t
8543 bridge_mac_nat_input(struct bridge_softc *sc, mbuf_t *data,
8544 boolean_t *is_input)
8545 {
8546 ifnet_t dst_if = NULL;
8547 struct ether_header *eh;
8548 uint16_t ether_type;
8549 boolean_t is_unicast;
8550 mbuf_t m = *data;
8551 struct mac_nat_entry *mne = NULL;
8552
8553 BRIDGE_LOCK_ASSERT_HELD(sc);
8554 *is_input = FALSE;
8555 assert(sc->sc_mac_nat_bif != NULL);
8556 is_unicast = ((m->m_flags & (M_BCAST | M_MCAST)) == 0);
8557 eh = mtod(m, struct ether_header *);
8558 ether_type = ntohs(eh->ether_type);
8559 switch (ether_type) {
8560 case ETHERTYPE_ARP:
8561 mne = bridge_mac_nat_arp_input(sc, data);
8562 break;
8563 case ETHERTYPE_IP:
8564 if (is_unicast) {
8565 mne = bridge_mac_nat_ip_input(sc, data);
8566 }
8567 break;
8568 case ETHERTYPE_IPV6:
8569 if (is_unicast) {
8570 mne = bridge_mac_nat_ipv6_input(sc, data);
8571 }
8572 break;
8573 default:
8574 break;
8575 }
8576 if (mne != NULL) {
8577 if (is_unicast) {
8578 if (m != *data) {
8579 /* it may have changed */
8580 eh = mtod(*data, struct ether_header *);
8581 }
8582 bcopy(mne->mne_mac, eh->ether_dhost,
8583 sizeof(eh->ether_dhost));
8584 }
8585 dst_if = mne->mne_bif->bif_ifp;
8586 *is_input = (mne->mne_bif == sc->sc_mac_nat_bif);
8587 }
8588 return dst_if;
8589 }
8590
8591 /*
8592 * bridge_mac_nat_output:
8593 * Process a packet destined to the MAC NAT interface (sc_mac_nat_bif)
8594 * from the interface 'bif'.
8595 *
8596 * Create a mac_nat_entry containing the source IP address and MAC address
8597 * from the packet. Populate a mac_nat_record with information detailing
8598 * how to translate the packet. Translation takes place later when
8599 * the bridge lock is no longer held.
8600 *
8601 * If 'bif' == sc_mac_nat_bif, the stack over the MAC NAT
8602 * interface is generating an output packet. No translation is required in this
8603 * case, we just record the IP address used to prevent another bif from
8604 * claiming our IP address.
8605 *
8606 * Returns:
8607 * TRUE if the packet should be translated (*mnr updated as well),
8608 * FALSE otherwise.
8609 *
8610 * *data may be updated to point at a different mbuf chain or NULL if
8611 * the chain was deallocated during processing.
8612 */
8613
8614 static boolean_t
8615 bridge_mac_nat_output(struct bridge_softc *sc,
8616 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8617 {
8618 struct ether_header *eh;
8619 uint16_t ether_type;
8620 boolean_t translate = FALSE;
8621
8622 BRIDGE_LOCK_ASSERT_HELD(sc);
8623 assert(sc->sc_mac_nat_bif != NULL);
8624
8625 eh = mtod(*data, struct ether_header *);
8626 ether_type = ntohs(eh->ether_type);
8627 if (mnr != NULL) {
8628 bzero(mnr, sizeof(*mnr));
8629 mnr->mnr_ether_type = ether_type;
8630 }
8631 switch (ether_type) {
8632 case ETHERTYPE_ARP:
8633 translate = bridge_mac_nat_arp_output(sc, bif, data, mnr);
8634 break;
8635 case ETHERTYPE_IP:
8636 translate = bridge_mac_nat_ip_output(sc, bif, data, mnr);
8637 break;
8638 case ETHERTYPE_IPV6:
8639 translate = bridge_mac_nat_ipv6_output(sc, bif, data, mnr);
8640 break;
8641 default:
8642 break;
8643 }
8644 return translate;
8645 }
8646
8647 static void
8648 bridge_mac_nat_arp_translate(mbuf_t *data, struct mac_nat_record *mnr,
8649 const caddr_t eaddr)
8650 {
8651 errno_t error;
8652
8653 if (mnr->mnr_arp_offset == 0) {
8654 return;
8655 }
8656 /* replace the source hardware address */
8657 error = mbuf_copyback(*data, mnr->mnr_arp_offset,
8658 ETHER_ADDR_LEN, eaddr,
8659 MBUF_DONTWAIT);
8660 if (error != 0) {
8661 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8662 "mbuf_copyback failed");
8663 m_freem(*data);
8664 *data = NULL;
8665 }
8666 return;
8667 }
8668
8669 static void
8670 bridge_mac_nat_ip_translate(mbuf_t *data, struct mac_nat_record *mnr)
8671 {
8672 errno_t error;
8673 size_t offset;
8674
8675 if (mnr->mnr_ip_header_len == 0) {
8676 return;
8677 }
8678 /* update the UDP checksum */
8679 offset = sizeof(struct ether_header) + mnr->mnr_ip_header_len;
8680 error = mbuf_copyback(*data, offset + offsetof(struct udphdr, uh_sum),
8681 sizeof(mnr->mnr_ip_udp_csum),
8682 &mnr->mnr_ip_udp_csum,
8683 MBUF_DONTWAIT);
8684 if (error != 0) {
8685 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8686 "mbuf_copyback uh_sum failed");
8687 m_freem(*data);
8688 *data = NULL;
8689 }
8690 /* update the DHCP must broadcast flag */
8691 offset += sizeof(struct udphdr);
8692 error = mbuf_copyback(*data, offset + offsetof(struct dhcp, dp_flags),
8693 sizeof(mnr->mnr_ip_dhcp_flags),
8694 &mnr->mnr_ip_dhcp_flags,
8695 MBUF_DONTWAIT);
8696 if (error != 0) {
8697 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8698 "mbuf_copyback dp_flags failed");
8699 m_freem(*data);
8700 *data = NULL;
8701 }
8702 }
8703
8704 static void
8705 bridge_mac_nat_ipv6_translate(mbuf_t *data, struct mac_nat_record *mnr,
8706 const caddr_t eaddr)
8707 {
8708 uint16_t cksum;
8709 errno_t error;
8710 mbuf_t m = *data;
8711
8712 if (mnr->mnr_ip6_header_len == 0) {
8713 return;
8714 }
8715 switch (mnr->mnr_ip6_icmp6_type) {
8716 case ND_ROUTER_SOLICIT:
8717 case ND_NEIGHBOR_SOLICIT:
8718 case ND_NEIGHBOR_ADVERT:
8719 if (mnr->mnr_ip6_lladdr_offset == 0) {
8720 /* nothing to do */
8721 return;
8722 }
8723 break;
8724 default:
8725 return;
8726 }
8727
8728 /*
8729 * replace the lladdr
8730 */
8731 error = mbuf_copyback(m, mnr->mnr_ip6_lladdr_offset,
8732 ETHER_ADDR_LEN, eaddr,
8733 MBUF_DONTWAIT);
8734 if (error != 0) {
8735 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8736 "mbuf_copyback lladdr failed");
8737 m_freem(m);
8738 *data = NULL;
8739 return;
8740 }
8741
8742 /*
8743 * recompute the icmp6 checksum
8744 */
8745
8746 /* skip past the ethernet header */
8747 mbuf_setdata(m, (char *)mbuf_data(m) + ETHER_HDR_LEN,
8748 mbuf_len(m) - ETHER_HDR_LEN);
8749 mbuf_pkthdr_adjustlen(m, -ETHER_HDR_LEN);
8750
8751 #define CKSUM_OFFSET_ICMP6 offsetof(struct icmp6_hdr, icmp6_cksum)
8752 /* set the checksum to zero */
8753 cksum = 0;
8754 error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
8755 sizeof(cksum), &cksum, MBUF_DONTWAIT);
8756 if (error != 0) {
8757 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8758 "mbuf_copyback cksum=0 failed");
8759 m_freem(m);
8760 *data = NULL;
8761 return;
8762 }
8763 /* compute and set the new checksum */
8764 cksum = in6_cksum(m, IPPROTO_ICMPV6, mnr->mnr_ip6_header_len,
8765 mnr->mnr_ip6_icmp6_len);
8766 error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
8767 sizeof(cksum), &cksum, MBUF_DONTWAIT);
8768 if (error != 0) {
8769 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8770 "mbuf_copyback cksum failed");
8771 m_freem(m);
8772 *data = NULL;
8773 return;
8774 }
8775 /* restore the ethernet header */
8776 mbuf_setdata(m, (char *)mbuf_data(m) - ETHER_HDR_LEN,
8777 mbuf_len(m) + ETHER_HDR_LEN);
8778 mbuf_pkthdr_adjustlen(m, ETHER_HDR_LEN);
8779 return;
8780 }
8781
8782 static void
8783 bridge_mac_nat_translate(mbuf_t *data, struct mac_nat_record *mnr,
8784 const caddr_t eaddr)
8785 {
8786 struct ether_header *eh;
8787
8788 /* replace the source ethernet address with the single MAC */
8789 eh = mtod(*data, struct ether_header *);
8790 bcopy(eaddr, eh->ether_shost, sizeof(eh->ether_shost));
8791 switch (mnr->mnr_ether_type) {
8792 case ETHERTYPE_ARP:
8793 bridge_mac_nat_arp_translate(data, mnr, eaddr);
8794 break;
8795
8796 case ETHERTYPE_IP:
8797 bridge_mac_nat_ip_translate(data, mnr);
8798 break;
8799
8800 case ETHERTYPE_IPV6:
8801 bridge_mac_nat_ipv6_translate(data, mnr, eaddr);
8802 break;
8803
8804 default:
8805 break;
8806 }
8807 return;
8808 }
8809
8810 /*
8811 * bridge packet filtering
8812 */
8813
8814 /*
8815 * Perform basic checks on header size since
8816 * pfil assumes ip_input has already processed
8817 * it for it. Cut-and-pasted from ip_input.c.
8818 * Given how simple the IPv6 version is,
8819 * does the IPv4 version really need to be
8820 * this complicated?
8821 *
8822 * XXX Should we update ipstat here, or not?
8823 * XXX Right now we update ipstat but not
8824 * XXX csum_counter.
8825 */
8826 static int
8827 bridge_ip_checkbasic(struct mbuf **mp)
8828 {
8829 struct mbuf *m = *mp;
8830 struct ip *ip;
8831 int len, hlen;
8832 u_short sum;
8833
8834 if (*mp == NULL) {
8835 return -1;
8836 }
8837
8838 if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
8839 /* max_linkhdr is already rounded up to nearest 4-byte */
8840 if ((m = m_copyup(m, sizeof(struct ip),
8841 max_linkhdr)) == NULL) {
8842 /* XXXJRT new stat, please */
8843 ipstat.ips_toosmall++;
8844 goto bad;
8845 }
8846 } else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip), 0)) {
8847 if ((m = m_pullup(m, sizeof(struct ip))) == NULL) {
8848 ipstat.ips_toosmall++;
8849 goto bad;
8850 }
8851 }
8852 ip = mtod(m, struct ip *);
8853 if (ip == NULL) {
8854 goto bad;
8855 }
8856
8857 if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
8858 ipstat.ips_badvers++;
8859 goto bad;
8860 }
8861 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
8862 if (hlen < (int)sizeof(struct ip)) { /* minimum header length */
8863 ipstat.ips_badhlen++;
8864 goto bad;
8865 }
8866 if (hlen > m->m_len) {
8867 if ((m = m_pullup(m, hlen)) == 0) {
8868 ipstat.ips_badhlen++;
8869 goto bad;
8870 }
8871 ip = mtod(m, struct ip *);
8872 if (ip == NULL) {
8873 goto bad;
8874 }
8875 }
8876
8877 if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
8878 sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
8879 } else {
8880 if (hlen == sizeof(struct ip)) {
8881 sum = in_cksum_hdr(ip);
8882 } else {
8883 sum = in_cksum(m, hlen);
8884 }
8885 }
8886 if (sum) {
8887 ipstat.ips_badsum++;
8888 goto bad;
8889 }
8890
8891 /* Retrieve the packet length. */
8892 len = ntohs(ip->ip_len);
8893
8894 /*
8895 * Check for additional length bogosity
8896 */
8897 if (len < hlen) {
8898 ipstat.ips_badlen++;
8899 goto bad;
8900 }
8901
8902 /*
8903 * Check that the amount of data in the buffers
8904 * is as at least much as the IP header would have us expect.
8905 * Drop packet if shorter than we expect.
8906 */
8907 if (m->m_pkthdr.len < len) {
8908 ipstat.ips_tooshort++;
8909 goto bad;
8910 }
8911
8912 /* Checks out, proceed */
8913 *mp = m;
8914 return 0;
8915
8916 bad:
8917 *mp = m;
8918 return -1;
8919 }
8920
8921 /*
8922 * Same as above, but for IPv6.
8923 * Cut-and-pasted from ip6_input.c.
8924 * XXX Should we update ip6stat, or not?
8925 */
8926 static int
8927 bridge_ip6_checkbasic(struct mbuf **mp)
8928 {
8929 struct mbuf *m = *mp;
8930 struct ip6_hdr *ip6;
8931
8932 /*
8933 * If the IPv6 header is not aligned, slurp it up into a new
8934 * mbuf with space for link headers, in the event we forward
8935 * it. Otherwise, if it is aligned, make sure the entire base
8936 * IPv6 header is in the first mbuf of the chain.
8937 */
8938 if (IP6_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
8939 struct ifnet *inifp = m->m_pkthdr.rcvif;
8940 /* max_linkhdr is already rounded up to nearest 4-byte */
8941 if ((m = m_copyup(m, sizeof(struct ip6_hdr),
8942 max_linkhdr)) == NULL) {
8943 /* XXXJRT new stat, please */
8944 ip6stat.ip6s_toosmall++;
8945 in6_ifstat_inc(inifp, ifs6_in_hdrerr);
8946 goto bad;
8947 }
8948 } else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip6_hdr), 0)) {
8949 struct ifnet *inifp = m->m_pkthdr.rcvif;
8950 if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
8951 ip6stat.ip6s_toosmall++;
8952 in6_ifstat_inc(inifp, ifs6_in_hdrerr);
8953 goto bad;
8954 }
8955 }
8956
8957 ip6 = mtod(m, struct ip6_hdr *);
8958
8959 if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
8960 ip6stat.ip6s_badvers++;
8961 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
8962 goto bad;
8963 }
8964
8965 /* Checks out, proceed */
8966 *mp = m;
8967 return 0;
8968
8969 bad:
8970 *mp = m;
8971 return -1;
8972 }
8973
8974 /*
8975 * the PF routines expect to be called from ip_input, so we
8976 * need to do and undo here some of the same processing.
8977 *
8978 * XXX : this is heavily inspired on bridge_pfil()
8979 */
8980 static int
8981 bridge_pf(struct mbuf **mp, struct ifnet *ifp, uint32_t sc_filter_flags,
8982 int input)
8983 {
8984 /*
8985 * XXX : mpetit : heavily inspired by bridge_pfil()
8986 */
8987
8988 int snap, error, i, hlen;
8989 struct ether_header *eh1, eh2;
8990 struct ip *ip;
8991 struct llc llc1;
8992 u_int16_t ether_type;
8993
8994 snap = 0;
8995 error = -1; /* Default error if not error == 0 */
8996
8997 if ((sc_filter_flags & IFBF_FILT_MEMBER) == 0) {
8998 return 0; /* filtering is disabled */
8999 }
9000 i = min((*mp)->m_pkthdr.len, max_protohdr);
9001 if ((*mp)->m_len < i) {
9002 *mp = m_pullup(*mp, i);
9003 if (*mp == NULL) {
9004 BRIDGE_LOG(LOG_NOTICE, 0, "m_pullup failed");
9005 return -1;
9006 }
9007 }
9008
9009 eh1 = mtod(*mp, struct ether_header *);
9010 ether_type = ntohs(eh1->ether_type);
9011
9012 /*
9013 * Check for SNAP/LLC.
9014 */
9015 if (ether_type < ETHERMTU) {
9016 struct llc *llc2 = (struct llc *)(eh1 + 1);
9017
9018 if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
9019 llc2->llc_dsap == LLC_SNAP_LSAP &&
9020 llc2->llc_ssap == LLC_SNAP_LSAP &&
9021 llc2->llc_control == LLC_UI) {
9022 ether_type = htons(llc2->llc_un.type_snap.ether_type);
9023 snap = 1;
9024 }
9025 }
9026
9027 /*
9028 * If we're trying to filter bridge traffic, don't look at anything
9029 * other than IP and ARP traffic. If the filter doesn't understand
9030 * IPv6, don't allow IPv6 through the bridge either. This is lame
9031 * since if we really wanted, say, an AppleTalk filter, we are hosed,
9032 * but of course we don't have an AppleTalk filter to begin with.
9033 * (Note that since pfil doesn't understand ARP it will pass *ALL*
9034 * ARP traffic.)
9035 */
9036 switch (ether_type) {
9037 case ETHERTYPE_ARP:
9038 case ETHERTYPE_REVARP:
9039 return 0; /* Automatically pass */
9040
9041 case ETHERTYPE_IP:
9042 case ETHERTYPE_IPV6:
9043 break;
9044 default:
9045 /*
9046 * Check to see if the user wants to pass non-ip
9047 * packets, these will not be checked by pf and
9048 * passed unconditionally so the default is to drop.
9049 */
9050 if ((sc_filter_flags & IFBF_FILT_ONLYIP)) {
9051 goto bad;
9052 }
9053 break;
9054 }
9055
9056 /* Strip off the Ethernet header and keep a copy. */
9057 m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t)&eh2);
9058 m_adj(*mp, ETHER_HDR_LEN);
9059
9060 /* Strip off snap header, if present */
9061 if (snap) {
9062 m_copydata(*mp, 0, sizeof(struct llc), (caddr_t)&llc1);
9063 m_adj(*mp, sizeof(struct llc));
9064 }
9065
9066 /*
9067 * Check the IP header for alignment and errors
9068 */
9069 switch (ether_type) {
9070 case ETHERTYPE_IP:
9071 error = bridge_ip_checkbasic(mp);
9072 break;
9073 case ETHERTYPE_IPV6:
9074 error = bridge_ip6_checkbasic(mp);
9075 break;
9076 default:
9077 error = 0;
9078 break;
9079 }
9080 if (error) {
9081 goto bad;
9082 }
9083
9084 error = 0;
9085
9086 /*
9087 * Run the packet through pf rules
9088 */
9089 switch (ether_type) {
9090 case ETHERTYPE_IP:
9091 /*
9092 * before calling the firewall, swap fields the same as
9093 * IP does. here we assume the header is contiguous
9094 */
9095 ip = mtod(*mp, struct ip *);
9096
9097 ip->ip_len = ntohs(ip->ip_len);
9098 ip->ip_off = ntohs(ip->ip_off);
9099
9100 if (ifp != NULL) {
9101 error = pf_af_hook(ifp, 0, mp, AF_INET, input, NULL);
9102 }
9103
9104 if (*mp == NULL || error != 0) { /* filter may consume */
9105 break;
9106 }
9107
9108 /* Recalculate the ip checksum and restore byte ordering */
9109 ip = mtod(*mp, struct ip *);
9110 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
9111 if (hlen < (int)sizeof(struct ip)) {
9112 goto bad;
9113 }
9114 if (hlen > (*mp)->m_len) {
9115 if ((*mp = m_pullup(*mp, hlen)) == 0) {
9116 goto bad;
9117 }
9118 ip = mtod(*mp, struct ip *);
9119 if (ip == NULL) {
9120 goto bad;
9121 }
9122 }
9123 ip->ip_len = htons(ip->ip_len);
9124 ip->ip_off = htons(ip->ip_off);
9125 ip->ip_sum = 0;
9126 if (hlen == sizeof(struct ip)) {
9127 ip->ip_sum = in_cksum_hdr(ip);
9128 } else {
9129 ip->ip_sum = in_cksum(*mp, hlen);
9130 }
9131 break;
9132
9133 case ETHERTYPE_IPV6:
9134 if (ifp != NULL) {
9135 error = pf_af_hook(ifp, 0, mp, AF_INET6, input, NULL);
9136 }
9137
9138 if (*mp == NULL || error != 0) { /* filter may consume */
9139 break;
9140 }
9141 break;
9142 default:
9143 error = 0;
9144 break;
9145 }
9146
9147 if (*mp == NULL) {
9148 return error;
9149 }
9150 if (error != 0) {
9151 goto bad;
9152 }
9153
9154 error = -1;
9155
9156 /*
9157 * Finally, put everything back the way it was and return
9158 */
9159 if (snap) {
9160 M_PREPEND(*mp, sizeof(struct llc), M_DONTWAIT, 0);
9161 if (*mp == NULL) {
9162 return error;
9163 }
9164 bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
9165 }
9166
9167 M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT, 0);
9168 if (*mp == NULL) {
9169 return error;
9170 }
9171 bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
9172
9173 return 0;
9174
9175 bad:
9176 m_freem(*mp);
9177 *mp = NULL;
9178 return error;
9179 }
9180
9181 /*
9182 * Copyright (C) 2014, Stefano Garzarella - Universita` di Pisa.
9183 * All rights reserved.
9184 *
9185 * Redistribution and use in source and binary forms, with or without
9186 * modification, are permitted provided that the following conditions
9187 * are met:
9188 * 1. Redistributions of source code must retain the above copyright
9189 * notice, this list of conditions and the following disclaimer.
9190 * 2. Redistributions in binary form must reproduce the above copyright
9191 * notice, this list of conditions and the following disclaimer in the
9192 * documentation and/or other materials provided with the distribution.
9193 *
9194 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
9195 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
9196 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
9197 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
9198 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
9199 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
9200 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
9201 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
9202 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
9203 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
9204 * SUCH DAMAGE.
9205 */
9206
9207 /*
9208 * XXX-ste: Maybe this function must be moved into kern/uipc_mbuf.c
9209 *
9210 * Create a queue of packets/segments which fit the given mss + hdr_len.
9211 * m0 points to mbuf chain to be segmented.
9212 * This function splits the payload (m0-> m_pkthdr.len - hdr_len)
9213 * into segments of length MSS bytes and then copy the first hdr_len bytes
9214 * from m0 at the top of each segment.
9215 * If hdr2_buf is not NULL (hdr2_len is the buf length), it is copied
9216 * in each segment after the first hdr_len bytes
9217 *
9218 * Return the new queue with the segments on success, NULL on failure.
9219 * (the mbuf queue is freed in this case).
9220 * nsegs contains the number of segments generated.
9221 */
9222
9223 static struct mbuf *
9224 m_seg(struct mbuf *m0, int hdr_len, int mss, int *nsegs,
9225 char * hdr2_buf, int hdr2_len)
9226 {
9227 int off = 0, n, firstlen;
9228 struct mbuf **mnext, *mseg;
9229 int total_len = m0->m_pkthdr.len;
9230
9231 /*
9232 * Segmentation useless
9233 */
9234 if (total_len <= hdr_len + mss) {
9235 return m0;
9236 }
9237
9238 if (hdr2_buf == NULL || hdr2_len <= 0) {
9239 hdr2_buf = NULL;
9240 hdr2_len = 0;
9241 }
9242
9243 off = hdr_len + mss;
9244 firstlen = mss; /* first segment stored in the original mbuf */
9245
9246 mnext = &(m0->m_nextpkt); /* pointer to next packet */
9247
9248 for (n = 1; off < total_len; off += mss, n++) {
9249 struct mbuf *m;
9250 /*
9251 * Copy the header from the original packet
9252 * and create a new mbuf chain
9253 */
9254 if (MHLEN < hdr_len) {
9255 m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
9256 } else {
9257 m = m_gethdr(M_NOWAIT, MT_DATA);
9258 }
9259
9260 if (m == NULL) {
9261 #ifdef GSO_DEBUG
9262 D("MGETHDR error\n");
9263 #endif
9264 goto err;
9265 }
9266
9267 m_copydata(m0, 0, hdr_len, mtod(m, caddr_t));
9268
9269 m->m_len = hdr_len;
9270 /*
9271 * if the optional header is present, copy it
9272 */
9273 if (hdr2_buf != NULL) {
9274 m_copyback(m, hdr_len, hdr2_len, hdr2_buf);
9275 }
9276
9277 m->m_flags |= (m0->m_flags & M_COPYFLAGS);
9278 if (off + mss >= total_len) { /* last segment */
9279 mss = total_len - off;
9280 }
9281 /*
9282 * Copy the payload from original packet
9283 */
9284 mseg = m_copym(m0, off, mss, M_NOWAIT);
9285 if (mseg == NULL) {
9286 m_freem(m);
9287 #ifdef GSO_DEBUG
9288 D("m_copym error\n");
9289 #endif
9290 goto err;
9291 }
9292 m_cat(m, mseg);
9293
9294 m->m_pkthdr.len = hdr_len + hdr2_len + mss;
9295 m->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
9296 /*
9297 * Copy the checksum flags and data (in_cksum() need this)
9298 */
9299 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
9300 m->m_pkthdr.csum_data = m0->m_pkthdr.csum_data;
9301 m->m_pkthdr.tso_segsz = m0->m_pkthdr.tso_segsz;
9302
9303 *mnext = m;
9304 mnext = &(m->m_nextpkt);
9305 }
9306
9307 /*
9308 * Update first segment.
9309 * If the optional header is present, is necessary
9310 * to insert it into the first segment.
9311 */
9312 if (hdr2_buf == NULL) {
9313 m_adj(m0, hdr_len + firstlen - total_len);
9314 m0->m_pkthdr.len = hdr_len + firstlen;
9315 } else {
9316 mseg = m_copym(m0, hdr_len, firstlen, M_NOWAIT);
9317 if (mseg == NULL) {
9318 #ifdef GSO_DEBUG
9319 D("m_copym error\n");
9320 #endif
9321 goto err;
9322 }
9323 m_adj(m0, hdr_len - total_len);
9324 m_copyback(m0, hdr_len, hdr2_len, hdr2_buf);
9325 m_cat(m0, mseg);
9326 m0->m_pkthdr.len = hdr_len + hdr2_len + firstlen;
9327 }
9328
9329 if (nsegs != NULL) {
9330 *nsegs = n;
9331 }
9332 return m0;
9333 err:
9334 while (m0 != NULL) {
9335 mseg = m0->m_nextpkt;
9336 m0->m_nextpkt = NULL;
9337 m_freem(m0);
9338 m0 = mseg;
9339 }
9340 return NULL;
9341 }
9342
9343 /*
9344 * Wrappers of IPv4 checksum functions
9345 */
9346 static inline void
9347 gso_ipv4_data_cksum(struct mbuf *m, struct ip *ip, int mac_hlen)
9348 {
9349 m->m_data += mac_hlen;
9350 m->m_len -= mac_hlen;
9351 m->m_pkthdr.len -= mac_hlen;
9352 #if __FreeBSD_version < 1000000
9353 ip->ip_len = ntohs(ip->ip_len); /* needed for in_delayed_cksum() */
9354 #endif
9355
9356 in_delayed_cksum(m);
9357
9358 #if __FreeBSD_version < 1000000
9359 ip->ip_len = htons(ip->ip_len);
9360 #endif
9361 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
9362 m->m_len += mac_hlen;
9363 m->m_pkthdr.len += mac_hlen;
9364 m->m_data -= mac_hlen;
9365 }
9366
9367 static inline void
9368 gso_ipv4_hdr_cksum(struct mbuf *m, struct ip *ip, int mac_hlen, int ip_hlen)
9369 {
9370 m->m_data += mac_hlen;
9371
9372 ip->ip_sum = in_cksum(m, ip_hlen);
9373
9374 m->m_pkthdr.csum_flags &= ~CSUM_IP;
9375 m->m_data -= mac_hlen;
9376 }
9377
9378 /*
9379 * Structure that contains the state during the TCP segmentation
9380 */
9381 struct gso_ip_tcp_state {
9382 void (*update)
9383 (struct gso_ip_tcp_state*, struct mbuf*);
9384 void (*internal)
9385 (struct gso_ip_tcp_state*, struct mbuf*);
9386 union iphdr hdr;
9387 struct tcphdr *tcp;
9388 int mac_hlen;
9389 int ip_hlen;
9390 int tcp_hlen;
9391 int hlen;
9392 int pay_len;
9393 int sw_csum;
9394 uint32_t tcp_seq;
9395 uint16_t ip_id;
9396 boolean_t is_tx;
9397 };
9398
9399 /*
9400 * Update the pointers to TCP and IPv4 headers
9401 */
9402 static inline void
9403 gso_ipv4_tcp_update(struct gso_ip_tcp_state *state, struct mbuf *m)
9404 {
9405 state->hdr.ip = (struct ip *)(void *)(mtod(m, uint8_t *) + state->mac_hlen);
9406 state->tcp = (struct tcphdr *)(void *)((caddr_t)(state->hdr.ip) + state->ip_hlen);
9407 state->pay_len = m->m_pkthdr.len - state->hlen;
9408 }
9409
9410 /*
9411 * Set properly the TCP and IPv4 headers
9412 */
9413 static inline void
9414 gso_ipv4_tcp_internal(struct gso_ip_tcp_state *state, struct mbuf *m)
9415 {
9416 /*
9417 * Update IP header
9418 */
9419 state->hdr.ip->ip_id = htons((state->ip_id)++);
9420 state->hdr.ip->ip_len = htons(m->m_pkthdr.len - state->mac_hlen);
9421 /*
9422 * TCP Checksum
9423 */
9424 state->tcp->th_sum = 0;
9425 state->tcp->th_sum = in_pseudo(state->hdr.ip->ip_src.s_addr,
9426 state->hdr.ip->ip_dst.s_addr,
9427 htons(state->tcp_hlen + IPPROTO_TCP + state->pay_len));
9428 /*
9429 * Checksum HW not supported (TCP)
9430 */
9431 if (state->sw_csum & CSUM_DELAY_DATA) {
9432 gso_ipv4_data_cksum(m, state->hdr.ip, state->mac_hlen);
9433 }
9434
9435 state->tcp_seq += state->pay_len;
9436 /*
9437 * IP Checksum
9438 */
9439 state->hdr.ip->ip_sum = 0;
9440 /*
9441 * Checksum HW not supported (IP)
9442 */
9443 if (state->sw_csum & CSUM_IP) {
9444 gso_ipv4_hdr_cksum(m, state->hdr.ip, state->mac_hlen, state->ip_hlen);
9445 }
9446 }
9447
9448
9449 /*
9450 * Updates the pointers to TCP and IPv6 headers
9451 */
9452 static inline void
9453 gso_ipv6_tcp_update(struct gso_ip_tcp_state *state, struct mbuf *m)
9454 {
9455 state->hdr.ip6 = (struct ip6_hdr *)(mtod(m, uint8_t *) + state->mac_hlen);
9456 state->tcp = (struct tcphdr *)(void *)((caddr_t)(state->hdr.ip6) + state->ip_hlen);
9457 state->pay_len = m->m_pkthdr.len - state->hlen;
9458 }
9459
9460 /*
9461 * Sets properly the TCP and IPv6 headers
9462 */
9463 static inline void
9464 gso_ipv6_tcp_internal(struct gso_ip_tcp_state *state, struct mbuf *m)
9465 {
9466 state->hdr.ip6->ip6_plen = htons(m->m_pkthdr.len -
9467 state->mac_hlen - state->ip_hlen);
9468 /*
9469 * TCP Checksum
9470 */
9471 state->tcp->th_sum = 0;
9472 state->tcp->th_sum = in6_pseudo(&state->hdr.ip6->ip6_src,
9473 &state->hdr.ip6->ip6_dst,
9474 htonl(state->tcp_hlen + state->pay_len + IPPROTO_TCP));
9475 /*
9476 * Checksum HW not supported (TCP)
9477 */
9478 if (state->sw_csum & CSUM_DELAY_IPV6_DATA) {
9479 (void)in6_finalize_cksum(m, state->mac_hlen, -1, -1, state->sw_csum);
9480 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA;
9481 }
9482 state->tcp_seq += state->pay_len;
9483 }
9484
9485 /*
9486 * Init the state during the TCP segmentation
9487 */
9488 static void
9489 gso_ip_tcp_init_state(struct gso_ip_tcp_state *state, struct ifnet *ifp,
9490 bool is_ipv4, int mac_hlen, int ip_hlen,
9491 void * ip_hdr, struct tcphdr * tcp_hdr)
9492 {
9493 #pragma unused(ifp)
9494
9495 state->hdr.ptr = ip_hdr;
9496 state->tcp = tcp_hdr;
9497 if (is_ipv4) {
9498 state->ip_id = ntohs(state->hdr.ip->ip_id);
9499 state->update = gso_ipv4_tcp_update;
9500 state->internal = gso_ipv4_tcp_internal;
9501 state->sw_csum = CSUM_DELAY_DATA | CSUM_IP; /* XXX */
9502 } else {
9503 state->update = gso_ipv6_tcp_update;
9504 state->internal = gso_ipv6_tcp_internal;
9505 state->sw_csum = CSUM_DELAY_IPV6_DATA; /* XXX */
9506 }
9507 state->mac_hlen = mac_hlen;
9508 state->ip_hlen = ip_hlen;
9509 state->tcp_hlen = state->tcp->th_off << 2;
9510 state->hlen = mac_hlen + ip_hlen + state->tcp_hlen;
9511 state->tcp_seq = ntohl(state->tcp->th_seq);
9512 //state->sw_csum = m->m_pkthdr.csum_flags & ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
9513 return;
9514 }
9515
9516 /*
9517 * GSO on TCP/IP (v4 or v6)
9518 *
9519 * If is_tx is TRUE, segmented packets are transmitted after they are
9520 * segmented.
9521 *
9522 * If is_tx is FALSE, the segmented packets are returned as a chain in *mp.
9523 */
9524 static int
9525 gso_ip_tcp(struct ifnet *ifp, struct mbuf **mp, struct gso_ip_tcp_state *state,
9526 boolean_t is_tx)
9527 {
9528 struct mbuf *m, *m_tx;
9529 int error = 0;
9530 int mss = 0;
9531 int nsegs = 0;
9532 struct mbuf *m0 = *mp;
9533 #ifdef GSO_STATS
9534 int total_len = m0->m_pkthdr.len;
9535 #endif /* GSO_STATS */
9536
9537 #if 1
9538 u_int reduce_mss;
9539
9540 reduce_mss = is_tx ? if_bridge_tso_reduce_mss_tx
9541 : if_bridge_tso_reduce_mss_forwarding;
9542 mss = ifp->if_mtu - state->ip_hlen - state->tcp_hlen - reduce_mss;
9543 assert(mss > 0);
9544 #else
9545 if (m0->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) {/* TSO with GSO */
9546 mss = ifp->if_hw_tsomax - state->ip_hlen - state->tcp_hlen;
9547 } else {
9548 mss = m0->m_pkthdr.tso_segsz;
9549 }
9550 #endif
9551
9552 *mp = m0 = m_seg(m0, state->hlen, mss, &nsegs, 0, 0);
9553 if (m0 == NULL) {
9554 return ENOBUFS; /* XXX ok? */
9555 }
9556 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
9557 "%s %s mss %d nsegs %d",
9558 ifp->if_xname,
9559 is_tx ? "TX" : "RX",
9560 mss, nsegs);
9561 /*
9562 * XXX-ste: can this happen?
9563 */
9564 if (m0->m_nextpkt == NULL) {
9565 #ifdef GSO_DEBUG
9566 D("only 1 segment");
9567 #endif
9568 if (is_tx) {
9569 error = bridge_transmit(ifp, m0);
9570 }
9571 return error;
9572 }
9573 #ifdef GSO_STATS
9574 GSOSTAT_SET_MAX(tcp.gsos_max_mss, mss);
9575 GSOSTAT_SET_MIN(tcp.gsos_min_mss, mss);
9576 GSOSTAT_ADD(tcp.gsos_osegments, nsegs);
9577 #endif /* GSO_STATS */
9578
9579 /* first pkt */
9580 m = m0;
9581
9582 state->update(state, m);
9583
9584 do {
9585 state->tcp->th_flags &= ~(TH_FIN | TH_PUSH);
9586
9587 state->internal(state, m);
9588 m_tx = m;
9589 m = m->m_nextpkt;
9590 if (is_tx) {
9591 m_tx->m_nextpkt = NULL;
9592 if ((error = bridge_transmit(ifp, m_tx)) != 0) {
9593 /*
9594 * XXX: If a segment can not be sent, discard the following
9595 * segments and propagate the error to the upper levels.
9596 * In this way the TCP retransmits all the initial packet.
9597 */
9598 #ifdef GSO_DEBUG
9599 D("if_transmit error\n");
9600 #endif
9601 goto err;
9602 }
9603 }
9604 state->update(state, m);
9605
9606 state->tcp->th_flags &= ~TH_CWR;
9607 state->tcp->th_seq = htonl(state->tcp_seq);
9608 } while (m->m_nextpkt);
9609
9610 /* last pkt */
9611 state->internal(state, m);
9612
9613 if (is_tx) {
9614 error = bridge_transmit(ifp, m);
9615 #ifdef GSO_DEBUG
9616 if (error) {
9617 D("last if_transmit error\n");
9618 D("error - type = %d \n", error);
9619 }
9620 #endif
9621 }
9622 #ifdef GSO_STATS
9623 if (!error) {
9624 GSOSTAT_INC(tcp.gsos_segmented);
9625 GSOSTAT_SET_MAX(tcp.gsos_maxsegmented, total_len);
9626 GSOSTAT_SET_MIN(tcp.gsos_minsegmented, total_len);
9627 GSOSTAT_ADD(tcp.gsos_totalbyteseg, total_len);
9628 }
9629 #endif /* GSO_STATS */
9630 return error;
9631
9632 err:
9633 #ifdef GSO_DEBUG
9634 D("error - type = %d \n", error);
9635 #endif
9636 while (m != NULL) {
9637 m_tx = m->m_nextpkt;
9638 m->m_nextpkt = NULL;
9639 m_freem(m);
9640 m = m_tx;
9641 }
9642 return error;
9643 }
9644
9645 /*
9646 * GSO for TCP/IPv[46]
9647 */
9648 static int
9649 gso_tcp(struct ifnet *ifp, struct mbuf **mp, u_int mac_hlen, bool is_ipv4,
9650 boolean_t is_tx)
9651 {
9652 int error;
9653 ip_packet_info info;
9654 uint32_t csum_flags;
9655 struct gso_ip_tcp_state state;
9656 struct bripstats stats; /* XXX ignored */
9657 struct tcphdr *tcp;
9658
9659 if (!is_tx && ipforwarding == 0) {
9660 /* no need to segment if the packet will not be forwarded */
9661 return 0;
9662 }
9663 error = bridge_get_tcp_header(mp, mac_hlen, is_ipv4, &info, &stats);
9664 if (error != 0) {
9665 if (*mp != NULL) {
9666 m_freem(*mp);
9667 *mp = NULL;
9668 }
9669 return error;
9670 }
9671 if (info.ip_proto_hdr == NULL) {
9672 /* not a TCP packet */
9673 return 0;
9674 }
9675 tcp = (struct tcphdr *)(void *)info.ip_proto_hdr;
9676 gso_ip_tcp_init_state(&state, ifp, is_ipv4, mac_hlen,
9677 info.ip_hlen, info.ip_hdr.ptr, tcp);
9678 if (is_ipv4) {
9679 csum_flags = CSUM_DELAY_DATA; /* XXX */
9680 if (!is_tx) {
9681 /* if RX to our local IP address, don't segment */
9682 struct in_addr dst_ip;
9683
9684 bcopy(&state.hdr.ip->ip_dst, &dst_ip, sizeof(dst_ip));
9685 if (in_addr_is_ours(dst_ip)) {
9686 return 0;
9687 }
9688 }
9689 } else {
9690 csum_flags = CSUM_DELAY_IPV6_DATA; /* XXX */
9691 if (!is_tx) {
9692 /* if RX to our local IP address, don't segment */
9693 if (in6_addr_is_ours(&state.hdr.ip6->ip6_dst,
9694 ifp->if_index)) {
9695 /* local IP address, no need to segment */
9696 return 0;
9697 }
9698 }
9699 }
9700 (*mp)->m_pkthdr.csum_flags = csum_flags;
9701 (*mp)->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
9702 return gso_ip_tcp(ifp, mp, &state, is_tx);
9703 }
9704