1 /*
2 * Copyright (c) 2004-2023 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /* $NetBSD: if_bridge.c,v 1.31 2005/06/01 19:45:34 jdc Exp $ */
30 /*
31 * Copyright 2001 Wasabi Systems, Inc.
32 * All rights reserved.
33 *
34 * Written by Jason R. Thorpe for Wasabi Systems, Inc.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed for the NetBSD Project by
47 * Wasabi Systems, Inc.
48 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
49 * or promote products derived from this software without specific prior
50 * written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
54 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
55 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
56 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
57 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
58 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
59 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
60 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
61 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
62 * POSSIBILITY OF SUCH DAMAGE.
63 */
64
65 /*
66 * Copyright (c) 1999, 2000 Jason L. Wright ([email protected])
67 * All rights reserved.
68 *
69 * Redistribution and use in source and binary forms, with or without
70 * modification, are permitted provided that the following conditions
71 * are met:
72 * 1. Redistributions of source code must retain the above copyright
73 * notice, this list of conditions and the following disclaimer.
74 * 2. Redistributions in binary form must reproduce the above copyright
75 * notice, this list of conditions and the following disclaimer in the
76 * documentation and/or other materials provided with the distribution.
77 *
78 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
79 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
80 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
81 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
82 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
83 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
84 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
86 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
87 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
88 * POSSIBILITY OF SUCH DAMAGE.
89 *
90 * OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp
91 */
92
93 /*
94 * Network interface bridge support.
95 *
96 * TODO:
97 *
98 * - Currently only supports Ethernet-like interfaces (Ethernet,
99 * 802.11, VLANs on Ethernet, etc.) Figure out a nice way
100 * to bridge other types of interfaces (FDDI-FDDI, and maybe
101 * consider heterogenous bridges).
102 *
103 * - GIF isn't handled due to the lack of IPPROTO_ETHERIP support.
104 */
105
106 #include <sys/cdefs.h>
107
108 #include <sys/param.h>
109 #include <sys/mbuf.h>
110 #include <sys/malloc.h>
111 #include <sys/protosw.h>
112 #include <sys/systm.h>
113 #include <sys/time.h>
114 #include <sys/socket.h> /* for net/if.h */
115 #include <sys/sockio.h>
116 #include <sys/kernel.h>
117 #include <sys/random.h>
118 #include <sys/syslog.h>
119 #include <sys/sysctl.h>
120 #include <sys/proc.h>
121 #include <sys/lock.h>
122 #include <sys/mcache.h>
123
124 #include <sys/kauth.h>
125
126 #include <kern/thread_call.h>
127
128 #include <libkern/libkern.h>
129
130 #include <kern/zalloc.h>
131
132 #if NBPFILTER > 0
133 #include <net/bpf.h>
134 #endif
135 #include <net/if.h>
136 #include <net/if_dl.h>
137 #include <net/if_types.h>
138 #include <net/if_var.h>
139 #include <net/if_media.h>
140 #include <net/net_api_stats.h>
141 #include <net/pfvar.h>
142
143 #include <netinet/in.h> /* for struct arpcom */
144 #include <netinet/tcp.h> /* for struct tcphdr */
145 #include <netinet/in_systm.h>
146 #include <netinet/in_var.h>
147 #define _IP_VHL
148 #include <netinet/ip.h>
149 #include <netinet/ip_var.h>
150 #include <netinet/ip6.h>
151 #include <netinet6/ip6_var.h>
152 #ifdef DEV_CARP
153 #include <netinet/ip_carp.h>
154 #endif
155 #include <netinet/if_ether.h> /* for struct arpcom */
156 #include <net/bridgestp.h>
157 #include <net/if_bridgevar.h>
158 #include <net/if_llc.h>
159 #if NVLAN > 0
160 #include <net/if_vlan_var.h>
161 #endif /* NVLAN > 0 */
162
163 #include <net/if_ether.h>
164 #include <net/dlil.h>
165 #include <net/kpi_interfacefilter.h>
166
167 #include <net/route.h>
168 #include <dev/random/randomdev.h>
169
170 #include <netinet/bootp.h>
171 #include <netinet/dhcp.h>
172
173 #if SKYWALK
174 #include <skywalk/nexus/netif/nx_netif.h>
175 #endif /* SKYWALK */
176
177 #include <os/log.h>
178
179 /*
180 * if_bridge_debug, BR_DBGF_*
181 * - 'if_bridge_debug' is a bitmask of BR_DBGF_* flags that can be set
182 * to enable additional logs for the corresponding bridge function
183 * - "sysctl net.link.bridge.debug" controls the value of
184 * 'if_bridge_debug'
185 */
186 static uint32_t if_bridge_debug = 0;
187 #define BR_DBGF_LIFECYCLE 0x0001
188 #define BR_DBGF_INPUT 0x0002
189 #define BR_DBGF_OUTPUT 0x0004
190 #define BR_DBGF_RT_TABLE 0x0008
191 #define BR_DBGF_DELAYED_CALL 0x0010
192 #define BR_DBGF_IOCTL 0x0020
193 #define BR_DBGF_MBUF 0x0040
194 #define BR_DBGF_MCAST 0x0080
195 #define BR_DBGF_HOSTFILTER 0x0100
196 #define BR_DBGF_CHECKSUM 0x0200
197 #define BR_DBGF_MAC_NAT 0x0400
198
199 /*
200 * if_bridge_log_level
201 * - 'if_bridge_log_level' ensures that by default important logs are
202 * logged regardless of if_bridge_debug by comparing the log level
203 * in BRIDGE_LOG to if_bridge_log_level
204 * - use "sysctl net.link.bridge.log_level" controls the value of
205 * 'if_bridge_log_level'
206 * - the default value of 'if_bridge_log_level' is LOG_NOTICE; important
207 * logs must use LOG_NOTICE to ensure they appear by default
208 */
209 static int if_bridge_log_level = LOG_NOTICE;
210
211 #define BRIDGE_DBGF_ENABLED(__flag) ((if_bridge_debug & __flag) != 0)
212
213 /*
214 * BRIDGE_LOG, BRIDGE_LOG_SIMPLE
215 * - macros to generate the specified log conditionally based on
216 * the specified log level and debug flags
217 * - BRIDGE_LOG_SIMPLE does not include the function name in the log
218 */
219 #define BRIDGE_LOG(__level, __dbgf, __string, ...) \
220 do { \
221 if (__level <= if_bridge_log_level || \
222 BRIDGE_DBGF_ENABLED(__dbgf)) { \
223 os_log(OS_LOG_DEFAULT, "%s: " __string, \
224 __func__, ## __VA_ARGS__); \
225 } \
226 } while (0)
227 #define BRIDGE_LOG_SIMPLE(__level, __dbgf, __string, ...) \
228 do { \
229 if (__level <= if_bridge_log_level || \
230 BRIDGE_DBGF_ENABLED(__dbgf)) { \
231 os_log(OS_LOG_DEFAULT, __string, ## __VA_ARGS__); \
232 } \
233 } while (0)
234
235 #define _BRIDGE_LOCK(_sc) lck_mtx_lock(&(_sc)->sc_mtx)
236 #define _BRIDGE_UNLOCK(_sc) lck_mtx_unlock(&(_sc)->sc_mtx)
237 #define BRIDGE_LOCK_ASSERT_HELD(_sc) \
238 LCK_MTX_ASSERT(&(_sc)->sc_mtx, LCK_MTX_ASSERT_OWNED)
239 #define BRIDGE_LOCK_ASSERT_NOTHELD(_sc) \
240 LCK_MTX_ASSERT(&(_sc)->sc_mtx, LCK_MTX_ASSERT_NOTOWNED)
241
242 #define BRIDGE_LOCK_DEBUG 1
243 #if BRIDGE_LOCK_DEBUG
244
245 #define BR_LCKDBG_MAX 4
246
247 #define BRIDGE_LOCK(_sc) bridge_lock(_sc)
248 #define BRIDGE_UNLOCK(_sc) bridge_unlock(_sc)
249 #define BRIDGE_LOCK2REF(_sc, _err) _err = bridge_lock2ref(_sc)
250 #define BRIDGE_UNREF(_sc) bridge_unref(_sc)
251 #define BRIDGE_XLOCK(_sc) bridge_xlock(_sc)
252 #define BRIDGE_XDROP(_sc) bridge_xdrop(_sc)
253
254 #else /* !BRIDGE_LOCK_DEBUG */
255
256 #define BRIDGE_LOCK(_sc) _BRIDGE_LOCK(_sc)
257 #define BRIDGE_UNLOCK(_sc) _BRIDGE_UNLOCK(_sc)
258 #define BRIDGE_LOCK2REF(_sc, _err) do { \
259 BRIDGE_LOCK_ASSERT_HELD(_sc); \
260 if ((_sc)->sc_iflist_xcnt > 0) \
261 (_err) = EBUSY; \
262 else { \
263 (_sc)->sc_iflist_ref++; \
264 (_err) = 0; \
265 } \
266 _BRIDGE_UNLOCK(_sc); \
267 } while (0)
268 #define BRIDGE_UNREF(_sc) do { \
269 _BRIDGE_LOCK(_sc); \
270 (_sc)->sc_iflist_ref--; \
271 if (((_sc)->sc_iflist_xcnt > 0) && ((_sc)->sc_iflist_ref == 0)) { \
272 _BRIDGE_UNLOCK(_sc); \
273 wakeup(&(_sc)->sc_cv); \
274 } else \
275 _BRIDGE_UNLOCK(_sc); \
276 } while (0)
277 #define BRIDGE_XLOCK(_sc) do { \
278 BRIDGE_LOCK_ASSERT_HELD(_sc); \
279 (_sc)->sc_iflist_xcnt++; \
280 while ((_sc)->sc_iflist_ref > 0) \
281 msleep(&(_sc)->sc_cv, &(_sc)->sc_mtx, PZERO, \
282 "BRIDGE_XLOCK", NULL); \
283 } while (0)
284 #define BRIDGE_XDROP(_sc) do { \
285 BRIDGE_LOCK_ASSERT_HELD(_sc); \
286 (_sc)->sc_iflist_xcnt--; \
287 } while (0)
288
289 #endif /* BRIDGE_LOCK_DEBUG */
290
291 #if NBPFILTER > 0
292 #define BRIDGE_BPF_MTAP_INPUT(sc, m) \
293 if (sc->sc_bpf_input != NULL) \
294 bridge_bpf_input(sc->sc_ifp, m, __func__, __LINE__)
295 #else /* NBPFILTER */
296 #define BRIDGE_BPF_MTAP_INPUT(ifp, m)
297 #endif /* NBPFILTER */
298
299 /*
300 * Initial size of the route hash table. Must be a power of two.
301 */
302 #ifndef BRIDGE_RTHASH_SIZE
303 #define BRIDGE_RTHASH_SIZE 16
304 #endif
305
306 /*
307 * Maximum size of the routing hash table
308 */
309 #define BRIDGE_RTHASH_SIZE_MAX 2048
310
311 #define BRIDGE_RTHASH_MASK(sc) ((sc)->sc_rthash_size - 1)
312
313 /*
314 * Maximum number of addresses to cache.
315 */
316 #ifndef BRIDGE_RTABLE_MAX
317 #define BRIDGE_RTABLE_MAX 100
318 #endif
319
320
321 /*
322 * Timeout (in seconds) for entries learned dynamically.
323 */
324 #ifndef BRIDGE_RTABLE_TIMEOUT
325 #define BRIDGE_RTABLE_TIMEOUT (20 * 60) /* same as ARP */
326 #endif
327
328 /*
329 * Number of seconds between walks of the route list.
330 */
331 #ifndef BRIDGE_RTABLE_PRUNE_PERIOD
332 #define BRIDGE_RTABLE_PRUNE_PERIOD (5 * 60)
333 #endif
334
335 /*
336 * Number of MAC NAT entries
337 * - sized based on 16 clients (including MAC NAT interface)
338 * each with 4 addresses
339 */
340 #ifndef BRIDGE_MAC_NAT_ENTRY_MAX
341 #define BRIDGE_MAC_NAT_ENTRY_MAX 64
342 #endif /* BRIDGE_MAC_NAT_ENTRY_MAX */
343
344 /*
345 * List of capabilities to possibly mask on the member interface.
346 */
347 #define BRIDGE_IFCAPS_MASK (IFCAP_TSO | IFCAP_TXCSUM)
348 /*
349 * List of capabilities to disable on the member interface.
350 */
351 #define BRIDGE_IFCAPS_STRIP IFCAP_LRO
352
353 /*
354 * Bridge interface list entry.
355 */
356 struct bridge_iflist {
357 TAILQ_ENTRY(bridge_iflist) bif_next;
358 struct ifnet *bif_ifp; /* member if */
359 struct bstp_port bif_stp; /* STP state */
360 uint32_t bif_ifflags; /* member if flags */
361 int bif_savedcaps; /* saved capabilities */
362 uint32_t bif_addrmax; /* max # of addresses */
363 uint32_t bif_addrcnt; /* cur. # of addresses */
364 uint32_t bif_addrexceeded; /* # of address violations */
365
366 interface_filter_t bif_iff_ref;
367 struct bridge_softc *bif_sc;
368 uint32_t bif_flags;
369
370 /* host filter */
371 struct in_addr bif_hf_ipsrc;
372 uint8_t bif_hf_hwsrc[ETHER_ADDR_LEN];
373
374 struct ifbrmstats bif_stats;
375 };
376
377 static inline bool
bif_ifflags_are_set(struct bridge_iflist * bif,uint32_t flags)378 bif_ifflags_are_set(struct bridge_iflist * bif, uint32_t flags)
379 {
380 return (bif->bif_ifflags & flags) == flags;
381 }
382
383 static inline bool
bif_has_checksum_offload(struct bridge_iflist * bif)384 bif_has_checksum_offload(struct bridge_iflist * bif)
385 {
386 return bif_ifflags_are_set(bif, IFBIF_CHECKSUM_OFFLOAD);
387 }
388
389 /* fake errors to make the code clearer */
390 #define _EBADIP EJUSTRETURN
391 #define _EBADIPCHECKSUM EJUSTRETURN
392 #define _EBADIPV6 EJUSTRETURN
393 #define _EBADUDP EJUSTRETURN
394 #define _EBADTCP EJUSTRETURN
395 #define _EBADUDPCHECKSUM EJUSTRETURN
396 #define _EBADTCPCHECKSUM EJUSTRETURN
397
398 #define BIFF_PROMISC 0x01 /* promiscuous mode set */
399 #define BIFF_PROTO_ATTACHED 0x02 /* protocol attached */
400 #define BIFF_FILTER_ATTACHED 0x04 /* interface filter attached */
401 #define BIFF_MEDIA_ACTIVE 0x08 /* interface media active */
402 #define BIFF_HOST_FILTER 0x10 /* host filter enabled */
403 #define BIFF_HF_HWSRC 0x20 /* host filter source MAC is set */
404 #define BIFF_HF_IPSRC 0x40 /* host filter source IP is set */
405 #define BIFF_INPUT_BROADCAST 0x80 /* send broadcast packets in */
406 #define BIFF_IN_MEMBER_LIST 0x100 /* added to the member list */
407 #define BIFF_WIFI_INFRA 0x200 /* interface is Wi-Fi infra */
408 #define BIFF_ALL_MULTI 0x400 /* allmulti set */
409 #define BIFF_LRO_DISABLED 0x800 /* LRO was disabled */
410 #if SKYWALK
411 #define BIFF_FLOWSWITCH_ATTACHED 0x1000 /* we attached the flowswitch */
412 #define BIFF_NETAGENT_REMOVED 0x2000 /* we removed the netagent */
413 #endif /* SKYWALK */
414
415 /*
416 * mac_nat_entry
417 * - translates between an IP address and MAC address on a specific
418 * bridge interface member
419 */
420 struct mac_nat_entry {
421 LIST_ENTRY(mac_nat_entry) mne_list; /* list linkage */
422 struct bridge_iflist *mne_bif; /* originating interface */
423 unsigned long mne_expire; /* expiration time */
424 union {
425 struct in_addr mneu_ip; /* originating IPv4 address */
426 struct in6_addr mneu_ip6; /* originating IPv6 address */
427 } mne_u;
428 uint8_t mne_mac[ETHER_ADDR_LEN];
429 uint8_t mne_flags;
430 uint8_t mne_reserved;
431 };
432 #define mne_ip mne_u.mneu_ip
433 #define mne_ip6 mne_u.mneu_ip6
434
435 #define MNE_FLAGS_IPV6 0x01 /* IPv6 address */
436
437 LIST_HEAD(mac_nat_entry_list, mac_nat_entry);
438
439 /*
440 * mac_nat_record
441 * - used by bridge_mac_nat_output() to convey the translation that needs
442 * to take place in bridge_mac_nat_translate
443 * - holds enough information so that the translation can be done later without
444 * holding the bridge lock
445 */
446 struct mac_nat_record {
447 uint16_t mnr_ether_type;
448 union {
449 uint16_t mnru_arp_offset;
450 struct {
451 uint16_t mnruip_dhcp_flags;
452 uint16_t mnruip_udp_csum;
453 uint8_t mnruip_header_len;
454 } mnru_ip;
455 struct {
456 uint16_t mnruip6_icmp6_len;
457 uint16_t mnruip6_lladdr_offset;
458 uint8_t mnruip6_icmp6_type;
459 uint8_t mnruip6_header_len;
460 } mnru_ip6;
461 } mnr_u;
462 };
463
464 #define mnr_arp_offset mnr_u.mnru_arp_offset
465
466 #define mnr_ip_header_len mnr_u.mnru_ip.mnruip_header_len
467 #define mnr_ip_dhcp_flags mnr_u.mnru_ip.mnruip_dhcp_flags
468 #define mnr_ip_udp_csum mnr_u.mnru_ip.mnruip_udp_csum
469
470 #define mnr_ip6_icmp6_len mnr_u.mnru_ip6.mnruip6_icmp6_len
471 #define mnr_ip6_icmp6_type mnr_u.mnru_ip6.mnruip6_icmp6_type
472 #define mnr_ip6_header_len mnr_u.mnru_ip6.mnruip6_header_len
473 #define mnr_ip6_lladdr_offset mnr_u.mnru_ip6.mnruip6_lladdr_offset
474
475 /*
476 * Bridge route node.
477 */
478 struct bridge_rtnode {
479 LIST_ENTRY(bridge_rtnode) brt_hash; /* hash table linkage */
480 LIST_ENTRY(bridge_rtnode) brt_list; /* list linkage */
481 struct bridge_iflist *brt_dst; /* destination if */
482 unsigned long brt_expire; /* expiration time */
483 uint8_t brt_flags; /* address flags */
484 uint8_t brt_addr[ETHER_ADDR_LEN];
485 uint16_t brt_vlan; /* vlan id */
486
487 };
488 #define brt_ifp brt_dst->bif_ifp
489
490 /*
491 * Bridge delayed function call context
492 */
493 typedef void (*bridge_delayed_func_t)(struct bridge_softc *);
494
495 struct bridge_delayed_call {
496 struct bridge_softc *bdc_sc;
497 bridge_delayed_func_t bdc_func; /* Function to call */
498 struct timespec bdc_ts; /* Time to call */
499 u_int32_t bdc_flags;
500 thread_call_t bdc_thread_call;
501 };
502
503 #define BDCF_OUTSTANDING 0x01 /* Delayed call has been scheduled */
504 #define BDCF_CANCELLING 0x02 /* May be waiting for call completion */
505
506 /*
507 * Software state for each bridge.
508 */
509 LIST_HEAD(_bridge_rtnode_list, bridge_rtnode);
510
511 struct bridge_softc {
512 struct ifnet *sc_ifp; /* make this an interface */
513 u_int32_t sc_flags;
514 LIST_ENTRY(bridge_softc) sc_list;
515 decl_lck_mtx_data(, sc_mtx);
516 struct _bridge_rtnode_list *sc_rthash; /* our forwarding table */
517 struct _bridge_rtnode_list sc_rtlist; /* list version of above */
518 uint32_t sc_rthash_key; /* key for hash */
519 uint32_t sc_rthash_size; /* size of the hash table */
520 struct bridge_delayed_call sc_aging_timer;
521 struct bridge_delayed_call sc_resize_call;
522 TAILQ_HEAD(, bridge_iflist) sc_spanlist; /* span ports list */
523 struct bstp_state sc_stp; /* STP state */
524 bpf_packet_func sc_bpf_input;
525 bpf_packet_func sc_bpf_output;
526 void *sc_cv;
527 uint32_t sc_brtmax; /* max # of addresses */
528 uint32_t sc_brtcnt; /* cur. # of addresses */
529 uint32_t sc_brttimeout; /* rt timeout in seconds */
530 uint32_t sc_iflist_ref; /* refcount for sc_iflist */
531 uint32_t sc_iflist_xcnt; /* refcount for sc_iflist */
532 TAILQ_HEAD(, bridge_iflist) sc_iflist; /* member interface list */
533 uint32_t sc_brtexceeded; /* # of cache drops */
534 uint32_t sc_filter_flags; /* ipf and flags */
535 struct ifnet *sc_ifaddr; /* member mac copied from */
536 u_char sc_defaddr[6]; /* Default MAC address */
537 char sc_if_xname[IFNAMSIZ];
538
539 struct bridge_iflist *sc_mac_nat_bif; /* single MAC NAT interface */
540 struct mac_nat_entry_list sc_mne_list; /* MAC NAT IPv4 */
541 struct mac_nat_entry_list sc_mne_list_v6;/* MAC NAT IPv6 */
542 uint32_t sc_mne_max; /* max # of entries */
543 uint32_t sc_mne_count; /* cur. # of entries */
544 uint32_t sc_mne_allocation_failures;
545 #if BRIDGE_LOCK_DEBUG
546 /*
547 * Locking and unlocking calling history
548 */
549 void *lock_lr[BR_LCKDBG_MAX];
550 int next_lock_lr;
551 void *unlock_lr[BR_LCKDBG_MAX];
552 int next_unlock_lr;
553 #endif /* BRIDGE_LOCK_DEBUG */
554 };
555
556 #define SCF_DETACHING 0x01
557 #define SCF_RESIZING 0x02
558 #define SCF_MEDIA_ACTIVE 0x04
559
560 typedef enum {
561 CHECKSUM_OPERATION_NONE = 0,
562 CHECKSUM_OPERATION_CLEAR_OFFLOAD = 1,
563 CHECKSUM_OPERATION_FINALIZE = 2,
564 CHECKSUM_OPERATION_COMPUTE = 3,
565 } ChecksumOperation;
566
567 union iphdr {
568 struct ip *ip;
569 struct ip6_hdr *ip6;
570 void * ptr;
571 };
572
573 typedef struct {
574 u_int ip_hlen; /* IP header length */
575 u_int ip_pay_len; /* length of payload (exclusive of ip_hlen) */
576 u_int ip_opt_len; /* IPv6 options headers length */
577 uint8_t ip_proto; /* IPPROTO_TCP, IPPROTO_UDP, etc. */
578 bool ip_is_ipv4;
579 bool ip_is_fragmented;
580 union iphdr ip_hdr; /* pointer to IP header */
581 void * ip_proto_hdr; /* ptr to protocol header (TCP) */
582 } ip_packet_info, *ip_packet_info_t;
583
584 struct bridge_hostfilter_stats bridge_hostfilter_stats;
585
586 static LCK_GRP_DECLARE(bridge_lock_grp, "if_bridge");
587 #if BRIDGE_LOCK_DEBUG
588 static LCK_ATTR_DECLARE(bridge_lock_attr, 0, 0);
589 #else
590 static LCK_ATTR_DECLARE(bridge_lock_attr, LCK_ATTR_DEBUG, 0);
591 #endif
592 static LCK_MTX_DECLARE_ATTR(bridge_list_mtx, &bridge_lock_grp, &bridge_lock_attr);
593
594 static int bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
595
596 static KALLOC_TYPE_DEFINE(bridge_rtnode_pool, struct bridge_rtnode, NET_KT_DEFAULT);
597 static KALLOC_TYPE_DEFINE(bridge_mne_pool, struct mac_nat_entry, NET_KT_DEFAULT);
598
599 static int bridge_clone_create(struct if_clone *, uint32_t, void *);
600 static int bridge_clone_destroy(struct ifnet *);
601
602 static errno_t bridge_ioctl(struct ifnet *, u_long, void *);
603 #if HAS_IF_CAP
604 static void bridge_mutecaps(struct bridge_softc *);
605 static void bridge_set_ifcap(struct bridge_softc *, struct bridge_iflist *,
606 int);
607 #endif
608 static errno_t bridge_set_tso(struct bridge_softc *);
609 static void bridge_proto_attach_changed(struct ifnet *);
610 static int bridge_init(struct ifnet *);
611 #if HAS_BRIDGE_DUMMYNET
612 static void bridge_dummynet(struct mbuf *, struct ifnet *);
613 #endif
614 static void bridge_ifstop(struct ifnet *, int);
615 static int bridge_output(struct ifnet *, struct mbuf *);
616 static void bridge_finalize_cksum(struct ifnet *, struct mbuf *);
617 static void bridge_start(struct ifnet *);
618 static errno_t bridge_input(struct ifnet *, mbuf_t *);
619 static errno_t bridge_iff_input(void *, ifnet_t, protocol_family_t,
620 mbuf_t *, char **);
621 static errno_t bridge_iff_output(void *, ifnet_t, protocol_family_t,
622 mbuf_t *);
623 static errno_t bridge_member_output(struct bridge_softc *sc, ifnet_t ifp,
624 mbuf_t *m);
625
626 static int bridge_enqueue(ifnet_t, struct ifnet *,
627 struct ifnet *, struct mbuf *, ChecksumOperation);
628 static void bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int);
629
630 static void bridge_forward(struct bridge_softc *, struct bridge_iflist *,
631 struct mbuf *);
632
633 static void bridge_aging_timer(struct bridge_softc *sc);
634
635 static void bridge_broadcast(struct bridge_softc *, struct bridge_iflist *,
636 struct mbuf *, int);
637 static void bridge_span(struct bridge_softc *, struct mbuf *);
638
639 static int bridge_rtupdate(struct bridge_softc *, const uint8_t *,
640 uint16_t, struct bridge_iflist *, int, uint8_t);
641 static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *,
642 uint16_t);
643 static void bridge_rttrim(struct bridge_softc *);
644 static void bridge_rtage(struct bridge_softc *);
645 static void bridge_rtflush(struct bridge_softc *, int);
646 static int bridge_rtdaddr(struct bridge_softc *, const uint8_t *,
647 uint16_t);
648
649 static int bridge_rtable_init(struct bridge_softc *);
650 static void bridge_rtable_fini(struct bridge_softc *);
651
652 static void bridge_rthash_resize(struct bridge_softc *);
653
654 static int bridge_rtnode_addr_cmp(const uint8_t *, const uint8_t *);
655 static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
656 const uint8_t *, uint16_t);
657 static int bridge_rtnode_hash(struct bridge_softc *,
658 struct bridge_rtnode *);
659 static int bridge_rtnode_insert(struct bridge_softc *,
660 struct bridge_rtnode *);
661 static void bridge_rtnode_destroy(struct bridge_softc *,
662 struct bridge_rtnode *);
663 #if BRIDGESTP
664 static void bridge_rtable_expire(struct ifnet *, int);
665 static void bridge_state_change(struct ifnet *, int);
666 #endif /* BRIDGESTP */
667
668 static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
669 const char *name);
670 static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
671 struct ifnet *ifp);
672 static void bridge_delete_member(struct bridge_softc *,
673 struct bridge_iflist *);
674 static void bridge_delete_span(struct bridge_softc *,
675 struct bridge_iflist *);
676
677 static int bridge_ioctl_add(struct bridge_softc *, void *);
678 static int bridge_ioctl_del(struct bridge_softc *, void *);
679 static int bridge_ioctl_gifflags(struct bridge_softc *, void *);
680 static int bridge_ioctl_sifflags(struct bridge_softc *, void *);
681 static int bridge_ioctl_scache(struct bridge_softc *, void *);
682 static int bridge_ioctl_gcache(struct bridge_softc *, void *);
683 static int bridge_ioctl_gifs32(struct bridge_softc *, void *);
684 static int bridge_ioctl_gifs64(struct bridge_softc *, void *);
685 static int bridge_ioctl_rts32(struct bridge_softc *, void *);
686 static int bridge_ioctl_rts64(struct bridge_softc *, void *);
687 static int bridge_ioctl_saddr32(struct bridge_softc *, void *);
688 static int bridge_ioctl_saddr64(struct bridge_softc *, void *);
689 static int bridge_ioctl_sto(struct bridge_softc *, void *);
690 static int bridge_ioctl_gto(struct bridge_softc *, void *);
691 static int bridge_ioctl_daddr32(struct bridge_softc *, void *);
692 static int bridge_ioctl_daddr64(struct bridge_softc *, void *);
693 static int bridge_ioctl_flush(struct bridge_softc *, void *);
694 static int bridge_ioctl_gpri(struct bridge_softc *, void *);
695 static int bridge_ioctl_spri(struct bridge_softc *, void *);
696 static int bridge_ioctl_ght(struct bridge_softc *, void *);
697 static int bridge_ioctl_sht(struct bridge_softc *, void *);
698 static int bridge_ioctl_gfd(struct bridge_softc *, void *);
699 static int bridge_ioctl_sfd(struct bridge_softc *, void *);
700 static int bridge_ioctl_gma(struct bridge_softc *, void *);
701 static int bridge_ioctl_sma(struct bridge_softc *, void *);
702 static int bridge_ioctl_sifprio(struct bridge_softc *, void *);
703 static int bridge_ioctl_sifcost(struct bridge_softc *, void *);
704 static int bridge_ioctl_sifmaxaddr(struct bridge_softc *, void *);
705 static int bridge_ioctl_addspan(struct bridge_softc *, void *);
706 static int bridge_ioctl_delspan(struct bridge_softc *, void *);
707 static int bridge_ioctl_gbparam32(struct bridge_softc *, void *);
708 static int bridge_ioctl_gbparam64(struct bridge_softc *, void *);
709 static int bridge_ioctl_grte(struct bridge_softc *, void *);
710 static int bridge_ioctl_gifsstp32(struct bridge_softc *, void *);
711 static int bridge_ioctl_gifsstp64(struct bridge_softc *, void *);
712 static int bridge_ioctl_sproto(struct bridge_softc *, void *);
713 static int bridge_ioctl_stxhc(struct bridge_softc *, void *);
714 static int bridge_ioctl_purge(struct bridge_softc *sc, void *);
715 static int bridge_ioctl_gfilt(struct bridge_softc *, void *);
716 static int bridge_ioctl_sfilt(struct bridge_softc *, void *);
717 static int bridge_ioctl_ghostfilter(struct bridge_softc *, void *);
718 static int bridge_ioctl_shostfilter(struct bridge_softc *, void *);
719 static int bridge_ioctl_gmnelist32(struct bridge_softc *, void *);
720 static int bridge_ioctl_gmnelist64(struct bridge_softc *, void *);
721 static int bridge_ioctl_gifstats32(struct bridge_softc *, void *);
722 static int bridge_ioctl_gifstats64(struct bridge_softc *, void *);
723
724 static int bridge_pf(struct mbuf **, struct ifnet *, uint32_t sc_filter_flags, int input);
725 static int bridge_ip_checkbasic(struct mbuf **);
726 static int bridge_ip6_checkbasic(struct mbuf **);
727
728 static errno_t bridge_set_bpf_tap(ifnet_t, bpf_tap_mode, bpf_packet_func);
729 static errno_t bridge_bpf_input(ifnet_t, struct mbuf *, const char *, int);
730 static errno_t bridge_bpf_output(ifnet_t, struct mbuf *);
731
732 static void bridge_detach(ifnet_t);
733 static void bridge_link_event(struct ifnet *, u_int32_t);
734 static void bridge_iflinkevent(struct ifnet *);
735 static u_int32_t bridge_updatelinkstatus(struct bridge_softc *);
736 static int interface_media_active(struct ifnet *);
737 static void bridge_schedule_delayed_call(struct bridge_delayed_call *);
738 static void bridge_cancel_delayed_call(struct bridge_delayed_call *);
739 static void bridge_cleanup_delayed_call(struct bridge_delayed_call *);
740 static int bridge_host_filter(struct bridge_iflist *, mbuf_t *);
741
742 static errno_t bridge_mac_nat_enable(struct bridge_softc *,
743 struct bridge_iflist *);
744 static void bridge_mac_nat_disable(struct bridge_softc *sc);
745 static void bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long);
746 static void bridge_mac_nat_populate_entries(struct bridge_softc *sc);
747 static void bridge_mac_nat_flush_entries(struct bridge_softc *sc,
748 struct bridge_iflist *);
749 static ifnet_t bridge_mac_nat_input(struct bridge_softc *, mbuf_t *,
750 boolean_t *);
751 static boolean_t bridge_mac_nat_output(struct bridge_softc *,
752 struct bridge_iflist *, mbuf_t *, struct mac_nat_record *);
753 static void bridge_mac_nat_translate(mbuf_t *, struct mac_nat_record *,
754 const caddr_t);
755 static bool is_broadcast_ip_packet(mbuf_t *);
756 static bool in_addr_is_ours(const struct in_addr);
757 static bool in6_addr_is_ours(const struct in6_addr *, uint32_t);
758
759 #define m_copypacket(m, how) m_copym(m, 0, M_COPYALL, how)
760
761 static int
762 gso_tcp(struct ifnet *ifp, struct mbuf **mp, u_int mac_hlen, bool is_ipv4,
763 boolean_t is_tx);
764
765 /* The default bridge vlan is 1 (IEEE 802.1Q-2003 Table 9-2) */
766 #define VLANTAGOF(_m) 0
767
768 u_int8_t bstp_etheraddr[ETHER_ADDR_LEN] =
769 { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
770
771 static u_int8_t ethernulladdr[ETHER_ADDR_LEN] =
772 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
773
774 #if BRIDGESTP
775 static struct bstp_cb_ops bridge_ops = {
776 .bcb_state = bridge_state_change,
777 .bcb_rtage = bridge_rtable_expire
778 };
779 #endif /* BRIDGESTP */
780
781 SYSCTL_DECL(_net_link);
782 SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
783 "Bridge");
784
785 static int bridge_inherit_mac = 0; /* share MAC with first bridge member */
786 SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac,
787 CTLFLAG_RW | CTLFLAG_LOCKED,
788 &bridge_inherit_mac, 0,
789 "Inherit MAC address from the first bridge member");
790
791 SYSCTL_INT(_net_link_bridge, OID_AUTO, rtable_prune_period,
792 CTLFLAG_RW | CTLFLAG_LOCKED,
793 &bridge_rtable_prune_period, 0,
794 "Interval between pruning of routing table");
795
796 static unsigned int bridge_rtable_hash_size_max = BRIDGE_RTHASH_SIZE_MAX;
797 SYSCTL_UINT(_net_link_bridge, OID_AUTO, rtable_hash_size_max,
798 CTLFLAG_RW | CTLFLAG_LOCKED,
799 &bridge_rtable_hash_size_max, 0,
800 "Maximum size of the routing hash table");
801
802 #if BRIDGE_DELAYED_CALLBACK_DEBUG
803 static int bridge_delayed_callback_delay = 0;
804 SYSCTL_INT(_net_link_bridge, OID_AUTO, delayed_callback_delay,
805 CTLFLAG_RW | CTLFLAG_LOCKED,
806 &bridge_delayed_callback_delay, 0,
807 "Delay before calling delayed function");
808 #endif
809
810 SYSCTL_STRUCT(_net_link_bridge, OID_AUTO,
811 hostfilterstats, CTLFLAG_RD | CTLFLAG_LOCKED,
812 &bridge_hostfilter_stats, bridge_hostfilter_stats, "");
813
814 #if BRIDGESTP
815 static int log_stp = 0; /* log STP state changes */
816 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp, CTLFLAG_RW,
817 &log_stp, 0, "Log STP state changes");
818 #endif /* BRIDGESTP */
819
820 struct bridge_control {
821 int (*bc_func)(struct bridge_softc *, void *);
822 unsigned int bc_argsize;
823 unsigned int bc_flags;
824 };
825
826 #define VMNET_TAG "com.apple.vmnet"
827 #define VMNET_LOCAL_TAG VMNET_TAG ".local"
828 #define VMNET_BROADCAST_TAG VMNET_TAG ".broadcast"
829 #define VMNET_MULTICAST_TAG VMNET_TAG ".multicast"
830
831 static u_int16_t vmnet_tag;
832 static u_int16_t vmnet_local_tag;
833 static u_int16_t vmnet_broadcast_tag;
834 static u_int16_t vmnet_multicast_tag;
835
836 static u_int16_t
allocate_pf_tag(char * name)837 allocate_pf_tag(char * name)
838 {
839 u_int16_t tag;
840
841 tag = pf_tagname2tag_ext(name);
842 BRIDGE_LOG(LOG_NOTICE, 0, "%s %d", name, tag);
843 return tag;
844 }
845
846 static void
allocate_vmnet_pf_tags(void)847 allocate_vmnet_pf_tags(void)
848 {
849 /* allocate tags to use with PF */
850 if (vmnet_tag == 0) {
851 vmnet_tag = allocate_pf_tag(VMNET_TAG);
852 }
853 if (vmnet_local_tag == 0) {
854 vmnet_local_tag = allocate_pf_tag(VMNET_LOCAL_TAG);
855 }
856 if (vmnet_broadcast_tag == 0) {
857 vmnet_broadcast_tag = allocate_pf_tag(VMNET_BROADCAST_TAG);
858 }
859 if (vmnet_multicast_tag == 0) {
860 vmnet_multicast_tag = allocate_pf_tag(VMNET_MULTICAST_TAG);
861 }
862 }
863
864 #define BC_F_COPYIN 0x01 /* copy arguments in */
865 #define BC_F_COPYOUT 0x02 /* copy arguments out */
866 #define BC_F_SUSER 0x04 /* do super-user check */
867
868 static const struct bridge_control bridge_control_table32[] = {
869 { .bc_func = bridge_ioctl_add, .bc_argsize = sizeof(struct ifbreq), /* 0 */
870 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
871 { .bc_func = bridge_ioctl_del, .bc_argsize = sizeof(struct ifbreq),
872 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
873
874 { .bc_func = bridge_ioctl_gifflags, .bc_argsize = sizeof(struct ifbreq),
875 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
876 { .bc_func = bridge_ioctl_sifflags, .bc_argsize = sizeof(struct ifbreq),
877 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
878
879 { .bc_func = bridge_ioctl_scache, .bc_argsize = sizeof(struct ifbrparam),
880 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
881 { .bc_func = bridge_ioctl_gcache, .bc_argsize = sizeof(struct ifbrparam),
882 .bc_flags = BC_F_COPYOUT },
883
884 { .bc_func = bridge_ioctl_gifs32, .bc_argsize = sizeof(struct ifbifconf32),
885 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
886 { .bc_func = bridge_ioctl_rts32, .bc_argsize = sizeof(struct ifbaconf32),
887 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
888
889 { .bc_func = bridge_ioctl_saddr32, .bc_argsize = sizeof(struct ifbareq32),
890 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
891
892 { .bc_func = bridge_ioctl_sto, .bc_argsize = sizeof(struct ifbrparam),
893 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
894 { .bc_func = bridge_ioctl_gto, .bc_argsize = sizeof(struct ifbrparam), /* 10 */
895 .bc_flags = BC_F_COPYOUT },
896
897 { .bc_func = bridge_ioctl_daddr32, .bc_argsize = sizeof(struct ifbareq32),
898 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
899
900 { .bc_func = bridge_ioctl_flush, .bc_argsize = sizeof(struct ifbreq),
901 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
902
903 { .bc_func = bridge_ioctl_gpri, .bc_argsize = sizeof(struct ifbrparam),
904 .bc_flags = BC_F_COPYOUT },
905 { .bc_func = bridge_ioctl_spri, .bc_argsize = sizeof(struct ifbrparam),
906 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
907
908 { .bc_func = bridge_ioctl_ght, .bc_argsize = sizeof(struct ifbrparam),
909 .bc_flags = BC_F_COPYOUT },
910 { .bc_func = bridge_ioctl_sht, .bc_argsize = sizeof(struct ifbrparam),
911 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
912
913 { .bc_func = bridge_ioctl_gfd, .bc_argsize = sizeof(struct ifbrparam),
914 .bc_flags = BC_F_COPYOUT },
915 { .bc_func = bridge_ioctl_sfd, .bc_argsize = sizeof(struct ifbrparam),
916 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
917
918 { .bc_func = bridge_ioctl_gma, .bc_argsize = sizeof(struct ifbrparam),
919 .bc_flags = BC_F_COPYOUT },
920 { .bc_func = bridge_ioctl_sma, .bc_argsize = sizeof(struct ifbrparam), /* 20 */
921 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
922
923 { .bc_func = bridge_ioctl_sifprio, .bc_argsize = sizeof(struct ifbreq),
924 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
925
926 { .bc_func = bridge_ioctl_sifcost, .bc_argsize = sizeof(struct ifbreq),
927 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
928
929 { .bc_func = bridge_ioctl_gfilt, .bc_argsize = sizeof(struct ifbrparam),
930 .bc_flags = BC_F_COPYOUT },
931 { .bc_func = bridge_ioctl_sfilt, .bc_argsize = sizeof(struct ifbrparam),
932 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
933
934 { .bc_func = bridge_ioctl_purge, .bc_argsize = sizeof(struct ifbreq),
935 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
936
937 { .bc_func = bridge_ioctl_addspan, .bc_argsize = sizeof(struct ifbreq),
938 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
939 { .bc_func = bridge_ioctl_delspan, .bc_argsize = sizeof(struct ifbreq),
940 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
941
942 { .bc_func = bridge_ioctl_gbparam32, .bc_argsize = sizeof(struct ifbropreq32),
943 .bc_flags = BC_F_COPYOUT },
944
945 { .bc_func = bridge_ioctl_grte, .bc_argsize = sizeof(struct ifbrparam),
946 .bc_flags = BC_F_COPYOUT },
947
948 { .bc_func = bridge_ioctl_gifsstp32, .bc_argsize = sizeof(struct ifbpstpconf32), /* 30 */
949 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
950
951 { .bc_func = bridge_ioctl_sproto, .bc_argsize = sizeof(struct ifbrparam),
952 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
953
954 { .bc_func = bridge_ioctl_stxhc, .bc_argsize = sizeof(struct ifbrparam),
955 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
956
957 { .bc_func = bridge_ioctl_sifmaxaddr, .bc_argsize = sizeof(struct ifbreq),
958 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
959
960 { .bc_func = bridge_ioctl_ghostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
961 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
962 { .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
963 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
964
965 { .bc_func = bridge_ioctl_gmnelist32,
966 .bc_argsize = sizeof(struct ifbrmnelist32),
967 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
968 { .bc_func = bridge_ioctl_gifstats32,
969 .bc_argsize = sizeof(struct ifbrmreq32),
970 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
971 };
972
973 static const struct bridge_control bridge_control_table64[] = {
974 { .bc_func = bridge_ioctl_add, .bc_argsize = sizeof(struct ifbreq), /* 0 */
975 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
976 { .bc_func = bridge_ioctl_del, .bc_argsize = sizeof(struct ifbreq),
977 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
978
979 { .bc_func = bridge_ioctl_gifflags, .bc_argsize = sizeof(struct ifbreq),
980 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
981 { .bc_func = bridge_ioctl_sifflags, .bc_argsize = sizeof(struct ifbreq),
982 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
983
984 { .bc_func = bridge_ioctl_scache, .bc_argsize = sizeof(struct ifbrparam),
985 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
986 { .bc_func = bridge_ioctl_gcache, .bc_argsize = sizeof(struct ifbrparam),
987 .bc_flags = BC_F_COPYOUT },
988
989 { .bc_func = bridge_ioctl_gifs64, .bc_argsize = sizeof(struct ifbifconf64),
990 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
991 { .bc_func = bridge_ioctl_rts64, .bc_argsize = sizeof(struct ifbaconf64),
992 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
993
994 { .bc_func = bridge_ioctl_saddr64, .bc_argsize = sizeof(struct ifbareq64),
995 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
996
997 { .bc_func = bridge_ioctl_sto, .bc_argsize = sizeof(struct ifbrparam),
998 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
999 { .bc_func = bridge_ioctl_gto, .bc_argsize = sizeof(struct ifbrparam), /* 10 */
1000 .bc_flags = BC_F_COPYOUT },
1001
1002 { .bc_func = bridge_ioctl_daddr64, .bc_argsize = sizeof(struct ifbareq64),
1003 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1004
1005 { .bc_func = bridge_ioctl_flush, .bc_argsize = sizeof(struct ifbreq),
1006 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1007
1008 { .bc_func = bridge_ioctl_gpri, .bc_argsize = sizeof(struct ifbrparam),
1009 .bc_flags = BC_F_COPYOUT },
1010 { .bc_func = bridge_ioctl_spri, .bc_argsize = sizeof(struct ifbrparam),
1011 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1012
1013 { .bc_func = bridge_ioctl_ght, .bc_argsize = sizeof(struct ifbrparam),
1014 .bc_flags = BC_F_COPYOUT },
1015 { .bc_func = bridge_ioctl_sht, .bc_argsize = sizeof(struct ifbrparam),
1016 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1017
1018 { .bc_func = bridge_ioctl_gfd, .bc_argsize = sizeof(struct ifbrparam),
1019 .bc_flags = BC_F_COPYOUT },
1020 { .bc_func = bridge_ioctl_sfd, .bc_argsize = sizeof(struct ifbrparam),
1021 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1022
1023 { .bc_func = bridge_ioctl_gma, .bc_argsize = sizeof(struct ifbrparam),
1024 .bc_flags = BC_F_COPYOUT },
1025 { .bc_func = bridge_ioctl_sma, .bc_argsize = sizeof(struct ifbrparam), /* 20 */
1026 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1027
1028 { .bc_func = bridge_ioctl_sifprio, .bc_argsize = sizeof(struct ifbreq),
1029 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1030
1031 { .bc_func = bridge_ioctl_sifcost, .bc_argsize = sizeof(struct ifbreq),
1032 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1033
1034 { .bc_func = bridge_ioctl_gfilt, .bc_argsize = sizeof(struct ifbrparam),
1035 .bc_flags = BC_F_COPYOUT },
1036 { .bc_func = bridge_ioctl_sfilt, .bc_argsize = sizeof(struct ifbrparam),
1037 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1038
1039 { .bc_func = bridge_ioctl_purge, .bc_argsize = sizeof(struct ifbreq),
1040 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1041
1042 { .bc_func = bridge_ioctl_addspan, .bc_argsize = sizeof(struct ifbreq),
1043 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1044 { .bc_func = bridge_ioctl_delspan, .bc_argsize = sizeof(struct ifbreq),
1045 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1046
1047 { .bc_func = bridge_ioctl_gbparam64, .bc_argsize = sizeof(struct ifbropreq64),
1048 .bc_flags = BC_F_COPYOUT },
1049
1050 { .bc_func = bridge_ioctl_grte, .bc_argsize = sizeof(struct ifbrparam),
1051 .bc_flags = BC_F_COPYOUT },
1052
1053 { .bc_func = bridge_ioctl_gifsstp64, .bc_argsize = sizeof(struct ifbpstpconf64), /* 30 */
1054 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1055
1056 { .bc_func = bridge_ioctl_sproto, .bc_argsize = sizeof(struct ifbrparam),
1057 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1058
1059 { .bc_func = bridge_ioctl_stxhc, .bc_argsize = sizeof(struct ifbrparam),
1060 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1061
1062 { .bc_func = bridge_ioctl_sifmaxaddr, .bc_argsize = sizeof(struct ifbreq),
1063 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1064
1065 { .bc_func = bridge_ioctl_ghostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1066 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1067 { .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1068 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1069
1070 { .bc_func = bridge_ioctl_gmnelist64,
1071 .bc_argsize = sizeof(struct ifbrmnelist64),
1072 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1073 { .bc_func = bridge_ioctl_gifstats64,
1074 .bc_argsize = sizeof(struct ifbrmreq64),
1075 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1076 };
1077
1078 static const unsigned int bridge_control_table_size =
1079 sizeof(bridge_control_table32) / sizeof(bridge_control_table32[0]);
1080
1081 static LIST_HEAD(, bridge_softc) bridge_list =
1082 LIST_HEAD_INITIALIZER(bridge_list);
1083
1084 #define BRIDGENAME "bridge"
1085 #define BRIDGES_MAX IF_MAXUNIT
1086 #define BRIDGE_ZONE_MAX_ELEM MIN(IFNETS_MAX, BRIDGES_MAX)
1087
1088 static struct if_clone bridge_cloner =
1089 IF_CLONE_INITIALIZER(BRIDGENAME, bridge_clone_create, bridge_clone_destroy,
1090 0, BRIDGES_MAX);
1091
1092 static int if_bridge_txstart = 0;
1093 SYSCTL_INT(_net_link_bridge, OID_AUTO, txstart, CTLFLAG_RW | CTLFLAG_LOCKED,
1094 &if_bridge_txstart, 0, "Bridge interface uses TXSTART model");
1095
1096 SYSCTL_INT(_net_link_bridge, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
1097 &if_bridge_debug, 0, "Bridge debug flags");
1098
1099 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_level,
1100 CTLFLAG_RW | CTLFLAG_LOCKED,
1101 &if_bridge_log_level, 0, "Bridge log level");
1102
1103 static int if_bridge_segmentation = 1;
1104 SYSCTL_INT(_net_link_bridge, OID_AUTO, segmentation,
1105 CTLFLAG_RW | CTLFLAG_LOCKED,
1106 &if_bridge_segmentation, 0, "Bridge interface enable segmentation");
1107
1108 static int if_bridge_vmnet_pf_tagging = 1;
1109 SYSCTL_INT(_net_link_bridge, OID_AUTO, vmnet_pf_tagging,
1110 CTLFLAG_RW | CTLFLAG_LOCKED,
1111 &if_bridge_segmentation, 0, "Bridge interface enable vmnet PF tagging");
1112
1113 #define BRIDGE_TSO_REDUCE_MSS_FORWARDING_MAX 256
1114 #define BRIDGE_TSO_REDUCE_MSS_FORWARDING_DEFAULT 110
1115 #define BRIDGE_TSO_REDUCE_MSS_TX_MAX 256
1116 #define BRIDGE_TSO_REDUCE_MSS_TX_DEFAULT 0
1117
1118 static u_int if_bridge_tso_reduce_mss_forwarding
1119 = BRIDGE_TSO_REDUCE_MSS_FORWARDING_DEFAULT;
1120 static u_int if_bridge_tso_reduce_mss_tx
1121 = BRIDGE_TSO_REDUCE_MSS_TX_DEFAULT;
1122
1123 static int
bridge_tso_reduce_mss(struct sysctl_req * req,u_int * val,u_int val_max)1124 bridge_tso_reduce_mss(struct sysctl_req *req, u_int * val, u_int val_max)
1125 {
1126 int changed;
1127 int error;
1128 u_int new_value;
1129
1130 error = sysctl_io_number(req, *val, sizeof(*val), &new_value,
1131 &changed);
1132 if (error == 0 && changed != 0) {
1133 if (new_value > val_max) {
1134 return EINVAL;
1135 }
1136 *val = new_value;
1137 }
1138 return error;
1139 }
1140
1141 static int
1142 bridge_tso_reduce_mss_forwarding_sysctl SYSCTL_HANDLER_ARGS
1143 {
1144 return bridge_tso_reduce_mss(req, &if_bridge_tso_reduce_mss_forwarding,
1145 BRIDGE_TSO_REDUCE_MSS_FORWARDING_MAX);
1146 }
1147
1148 SYSCTL_PROC(_net_link_bridge, OID_AUTO, tso_reduce_mss_forwarding,
1149 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1150 0, 0, bridge_tso_reduce_mss_forwarding_sysctl, "IU",
1151 "Bridge tso reduce mss when forwarding");
1152
1153 static int
1154 bridge_tso_reduce_mss_tx_sysctl SYSCTL_HANDLER_ARGS
1155 {
1156 return bridge_tso_reduce_mss(req, &if_bridge_tso_reduce_mss_tx,
1157 BRIDGE_TSO_REDUCE_MSS_TX_MAX);
1158 }
1159
1160 SYSCTL_PROC(_net_link_bridge, OID_AUTO, tso_reduce_mss_tx,
1161 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1162 0, 0, bridge_tso_reduce_mss_tx_sysctl, "IU",
1163 "Bridge tso reduce mss on transmit");
1164
1165
1166 #if DEBUG || DEVELOPMENT
1167 #define BRIDGE_FORCE_ONE 0x00000001
1168 #define BRIDGE_FORCE_TWO 0x00000002
1169 static u_int32_t if_bridge_force_errors = 0;
1170 SYSCTL_INT(_net_link_bridge, OID_AUTO, force_errors,
1171 CTLFLAG_RW | CTLFLAG_LOCKED,
1172 &if_bridge_force_errors, 0, "Bridge interface force errors");
1173 static inline bool
bridge_error_is_forced(u_int32_t flags)1174 bridge_error_is_forced(u_int32_t flags)
1175 {
1176 return (if_bridge_force_errors & flags) != 0;
1177 }
1178
1179 #define BRIDGE_ERROR_GET_FORCED(__is_forced, __flags) \
1180 do { \
1181 __is_forced = bridge_error_is_forced(__flags); \
1182 if (__is_forced) { \
1183 BRIDGE_LOG(LOG_NOTICE, 0, "0x%x forced", __flags); \
1184 } \
1185 } while (0)
1186 #endif /* DEBUG || DEVELOPMENT */
1187
1188
1189 static void brlog_ether_header(struct ether_header *);
1190 static void brlog_mbuf_data(mbuf_t, size_t, size_t);
1191 static void brlog_mbuf_pkthdr(mbuf_t, const char *, const char *);
1192 static void brlog_mbuf(mbuf_t, const char *, const char *);
1193 static void brlog_link(struct bridge_softc * sc);
1194
1195 #if BRIDGE_LOCK_DEBUG
1196 static void bridge_lock(struct bridge_softc *);
1197 static void bridge_unlock(struct bridge_softc *);
1198 static int bridge_lock2ref(struct bridge_softc *);
1199 static void bridge_unref(struct bridge_softc *);
1200 static void bridge_xlock(struct bridge_softc *);
1201 static void bridge_xdrop(struct bridge_softc *);
1202
1203 static void
bridge_lock(struct bridge_softc * sc)1204 bridge_lock(struct bridge_softc *sc)
1205 {
1206 void *lr_saved = __builtin_return_address(0);
1207
1208 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1209
1210 _BRIDGE_LOCK(sc);
1211
1212 sc->lock_lr[sc->next_lock_lr] = lr_saved;
1213 sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1214 }
1215
1216 static void
bridge_unlock(struct bridge_softc * sc)1217 bridge_unlock(struct bridge_softc *sc)
1218 {
1219 void *lr_saved = __builtin_return_address(0);
1220
1221 BRIDGE_LOCK_ASSERT_HELD(sc);
1222
1223 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1224 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1225
1226 _BRIDGE_UNLOCK(sc);
1227 }
1228
1229 static int
bridge_lock2ref(struct bridge_softc * sc)1230 bridge_lock2ref(struct bridge_softc *sc)
1231 {
1232 int error = 0;
1233 void *lr_saved = __builtin_return_address(0);
1234
1235 BRIDGE_LOCK_ASSERT_HELD(sc);
1236
1237 if (sc->sc_iflist_xcnt > 0) {
1238 error = EBUSY;
1239 } else {
1240 sc->sc_iflist_ref++;
1241 }
1242
1243 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1244 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1245
1246 _BRIDGE_UNLOCK(sc);
1247
1248 return error;
1249 }
1250
1251 static void
bridge_unref(struct bridge_softc * sc)1252 bridge_unref(struct bridge_softc *sc)
1253 {
1254 void *lr_saved = __builtin_return_address(0);
1255
1256 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1257
1258 _BRIDGE_LOCK(sc);
1259 sc->lock_lr[sc->next_lock_lr] = lr_saved;
1260 sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1261
1262 sc->sc_iflist_ref--;
1263
1264 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1265 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1266 if ((sc->sc_iflist_xcnt > 0) && (sc->sc_iflist_ref == 0)) {
1267 _BRIDGE_UNLOCK(sc);
1268 wakeup(&sc->sc_cv);
1269 } else {
1270 _BRIDGE_UNLOCK(sc);
1271 }
1272 }
1273
1274 static void
bridge_xlock(struct bridge_softc * sc)1275 bridge_xlock(struct bridge_softc *sc)
1276 {
1277 void *lr_saved = __builtin_return_address(0);
1278
1279 BRIDGE_LOCK_ASSERT_HELD(sc);
1280
1281 sc->sc_iflist_xcnt++;
1282 while (sc->sc_iflist_ref > 0) {
1283 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1284 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1285
1286 msleep(&sc->sc_cv, &sc->sc_mtx, PZERO, "BRIDGE_XLOCK", NULL);
1287
1288 sc->lock_lr[sc->next_lock_lr] = lr_saved;
1289 sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1290 }
1291 }
1292
1293 static void
bridge_xdrop(struct bridge_softc * sc)1294 bridge_xdrop(struct bridge_softc *sc)
1295 {
1296 BRIDGE_LOCK_ASSERT_HELD(sc);
1297
1298 sc->sc_iflist_xcnt--;
1299 }
1300
1301 #endif /* BRIDGE_LOCK_DEBUG */
1302
1303 static void
brlog_mbuf_pkthdr(mbuf_t m,const char * prefix,const char * suffix)1304 brlog_mbuf_pkthdr(mbuf_t m, const char *prefix, const char *suffix)
1305 {
1306 if (m) {
1307 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1308 "%spktlen: %u rcvif: 0x%llx header: 0x%llx nextpkt: 0x%llx%s",
1309 prefix ? prefix : "", (unsigned int)mbuf_pkthdr_len(m),
1310 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_rcvif(m)),
1311 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_header(m)),
1312 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_nextpkt(m)),
1313 suffix ? suffix : "");
1314 } else {
1315 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0, "%s<NULL>%s", prefix, suffix);
1316 }
1317 }
1318
1319 static void
brlog_mbuf(mbuf_t m,const char * prefix,const char * suffix)1320 brlog_mbuf(mbuf_t m, const char *prefix, const char *suffix)
1321 {
1322 if (m) {
1323 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1324 "%s0x%llx type: %u flags: 0x%x len: %u data: 0x%llx "
1325 "maxlen: %u datastart: 0x%llx next: 0x%llx%s",
1326 prefix ? prefix : "", (uint64_t)VM_KERNEL_ADDRPERM(m),
1327 mbuf_type(m), mbuf_flags(m), (unsigned int)mbuf_len(m),
1328 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)),
1329 (unsigned int)mbuf_maxlen(m),
1330 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
1331 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_next(m)),
1332 !suffix || (mbuf_flags(m) & MBUF_PKTHDR) ? "" : suffix);
1333 if ((mbuf_flags(m) & MBUF_PKTHDR)) {
1334 brlog_mbuf_pkthdr(m, "", suffix);
1335 }
1336 } else {
1337 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0, "%s<NULL>%s", prefix, suffix);
1338 }
1339 }
1340
1341 static void
brlog_mbuf_data(mbuf_t m,size_t offset,size_t len)1342 brlog_mbuf_data(mbuf_t m, size_t offset, size_t len)
1343 {
1344 mbuf_t n;
1345 size_t i, j;
1346 size_t pktlen, mlen, maxlen;
1347 unsigned char *ptr;
1348
1349 pktlen = mbuf_pkthdr_len(m);
1350
1351 if (offset > pktlen) {
1352 return;
1353 }
1354
1355 maxlen = (pktlen - offset > len) ? len : pktlen - offset;
1356 n = m;
1357 mlen = mbuf_len(n);
1358 ptr = mbuf_data(n);
1359 for (i = 0, j = 0; i < maxlen; i++, j++) {
1360 if (j >= mlen) {
1361 n = mbuf_next(n);
1362 if (n == 0) {
1363 break;
1364 }
1365 ptr = mbuf_data(n);
1366 mlen = mbuf_len(n);
1367 j = 0;
1368 }
1369 if (i >= offset) {
1370 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1371 "%02x%s", ptr[j], i % 2 ? " " : "");
1372 }
1373 }
1374 }
1375
1376 static void
brlog_ether_header(struct ether_header * eh)1377 brlog_ether_header(struct ether_header *eh)
1378 {
1379 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1380 "%02x:%02x:%02x:%02x:%02x:%02x > "
1381 "%02x:%02x:%02x:%02x:%02x:%02x 0x%04x ",
1382 eh->ether_shost[0], eh->ether_shost[1], eh->ether_shost[2],
1383 eh->ether_shost[3], eh->ether_shost[4], eh->ether_shost[5],
1384 eh->ether_dhost[0], eh->ether_dhost[1], eh->ether_dhost[2],
1385 eh->ether_dhost[3], eh->ether_dhost[4], eh->ether_dhost[5],
1386 ntohs(eh->ether_type));
1387 }
1388
1389 static char *
ether_ntop(char * buf,size_t len,const u_char * ap)1390 ether_ntop(char *buf, size_t len, const u_char *ap)
1391 {
1392 snprintf(buf, len, "%02x:%02x:%02x:%02x:%02x:%02x",
1393 ap[0], ap[1], ap[2], ap[3], ap[4], ap[5]);
1394
1395 return buf;
1396 }
1397
1398 static void
brlog_link(struct bridge_softc * sc)1399 brlog_link(struct bridge_softc * sc)
1400 {
1401 int i;
1402 uint32_t sdl_buffer[offsetof(struct sockaddr_dl, sdl_data) +
1403 IFNAMSIZ + ETHER_ADDR_LEN];
1404 struct sockaddr_dl *sdl = (struct sockaddr_dl *)sdl_buffer;
1405 const u_char * lladdr;
1406 char lladdr_str[48];
1407
1408 memset(sdl, 0, sizeof(sdl_buffer));
1409 sdl->sdl_family = AF_LINK;
1410 sdl->sdl_nlen = strlen(sc->sc_if_xname);
1411 sdl->sdl_alen = ETHER_ADDR_LEN;
1412 sdl->sdl_len = offsetof(struct sockaddr_dl, sdl_data);
1413 memcpy(sdl->sdl_data, sc->sc_if_xname, sdl->sdl_nlen);
1414 memcpy(LLADDR(sdl), sc->sc_defaddr, ETHER_ADDR_LEN);
1415 lladdr_str[0] = '\0';
1416 for (i = 0, lladdr = CONST_LLADDR(sdl);
1417 i < sdl->sdl_alen;
1418 i++, lladdr++) {
1419 char byte_str[4];
1420
1421 snprintf(byte_str, sizeof(byte_str), "%s%x", i ? ":" : "",
1422 *lladdr);
1423 strlcat(lladdr_str, byte_str, sizeof(lladdr_str));
1424 }
1425 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1426 "%s sdl len %d index %d family %d type 0x%x nlen %d alen %d"
1427 " slen %d addr %s", sc->sc_if_xname,
1428 sdl->sdl_len, sdl->sdl_index,
1429 sdl->sdl_family, sdl->sdl_type, sdl->sdl_nlen,
1430 sdl->sdl_alen, sdl->sdl_slen, lladdr_str);
1431 }
1432
1433
1434 /*
1435 * bridgeattach:
1436 *
1437 * Pseudo-device attach routine.
1438 */
1439 __private_extern__ int
bridgeattach(int n)1440 bridgeattach(int n)
1441 {
1442 #pragma unused(n)
1443 int error;
1444
1445 LIST_INIT(&bridge_list);
1446
1447 #if BRIDGESTP
1448 bstp_sys_init();
1449 #endif /* BRIDGESTP */
1450
1451 error = if_clone_attach(&bridge_cloner);
1452 if (error != 0) {
1453 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_clone_attach failed %d", error);
1454 }
1455 return error;
1456 }
1457
1458
1459 static errno_t
bridge_ifnet_set_attrs(struct ifnet * ifp)1460 bridge_ifnet_set_attrs(struct ifnet * ifp)
1461 {
1462 errno_t error;
1463
1464 error = ifnet_set_mtu(ifp, ETHERMTU);
1465 if (error != 0) {
1466 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_mtu failed %d", error);
1467 goto done;
1468 }
1469 error = ifnet_set_addrlen(ifp, ETHER_ADDR_LEN);
1470 if (error != 0) {
1471 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_addrlen failed %d", error);
1472 goto done;
1473 }
1474 error = ifnet_set_hdrlen(ifp, ETHER_HDR_LEN);
1475 if (error != 0) {
1476 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_hdrlen failed %d", error);
1477 goto done;
1478 }
1479 error = ifnet_set_flags(ifp,
1480 IFF_BROADCAST | IFF_SIMPLEX | IFF_NOTRAILERS | IFF_MULTICAST,
1481 0xffff);
1482
1483 if (error != 0) {
1484 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_flags failed %d", error);
1485 goto done;
1486 }
1487 done:
1488 return error;
1489 }
1490
1491 /*
1492 * bridge_clone_create:
1493 *
1494 * Create a new bridge instance.
1495 */
1496 static int
bridge_clone_create(struct if_clone * ifc,uint32_t unit,void * params)1497 bridge_clone_create(struct if_clone *ifc, uint32_t unit, void *params)
1498 {
1499 #pragma unused(params)
1500 struct ifnet *ifp = NULL;
1501 struct bridge_softc *sc = NULL;
1502 struct bridge_softc *sc2 = NULL;
1503 struct ifnet_init_eparams init_params;
1504 errno_t error = 0;
1505 uint8_t eth_hostid[ETHER_ADDR_LEN];
1506 int fb, retry, has_hostid;
1507
1508 sc = kalloc_type(struct bridge_softc, Z_WAITOK_ZERO_NOFAIL);
1509 lck_mtx_init(&sc->sc_mtx, &bridge_lock_grp, &bridge_lock_attr);
1510 sc->sc_brtmax = BRIDGE_RTABLE_MAX;
1511 sc->sc_mne_max = BRIDGE_MAC_NAT_ENTRY_MAX;
1512 sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
1513 sc->sc_filter_flags = 0;
1514
1515 TAILQ_INIT(&sc->sc_iflist);
1516
1517 /* use the interface name as the unique id for ifp recycle */
1518 snprintf(sc->sc_if_xname, sizeof(sc->sc_if_xname), "%s%d",
1519 ifc->ifc_name, unit);
1520 bzero(&init_params, sizeof(init_params));
1521 init_params.ver = IFNET_INIT_CURRENT_VERSION;
1522 init_params.len = sizeof(init_params);
1523 /* Initialize our routing table. */
1524 error = bridge_rtable_init(sc);
1525 if (error != 0) {
1526 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_rtable_init failed %d", error);
1527 goto done;
1528 }
1529 TAILQ_INIT(&sc->sc_spanlist);
1530 if (if_bridge_txstart) {
1531 init_params.start = bridge_start;
1532 } else {
1533 init_params.flags = IFNET_INIT_LEGACY;
1534 init_params.output = bridge_output;
1535 }
1536 init_params.set_bpf_tap = bridge_set_bpf_tap;
1537 init_params.uniqueid = sc->sc_if_xname;
1538 init_params.uniqueid_len = strlen(sc->sc_if_xname);
1539 init_params.sndq_maxlen = IFQ_MAXLEN;
1540 init_params.name = ifc->ifc_name;
1541 init_params.unit = unit;
1542 init_params.family = IFNET_FAMILY_ETHERNET;
1543 init_params.type = IFT_BRIDGE;
1544 init_params.demux = ether_demux;
1545 init_params.add_proto = ether_add_proto;
1546 init_params.del_proto = ether_del_proto;
1547 init_params.check_multi = ether_check_multi;
1548 init_params.framer_extended = ether_frameout_extended;
1549 init_params.softc = sc;
1550 init_params.ioctl = bridge_ioctl;
1551 init_params.detach = bridge_detach;
1552 init_params.broadcast_addr = etherbroadcastaddr;
1553 init_params.broadcast_len = ETHER_ADDR_LEN;
1554
1555 error = ifnet_allocate_extended(&init_params, &ifp);
1556 if (error != 0) {
1557 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_allocate failed %d", error);
1558 goto done;
1559 }
1560 LIST_INIT(&sc->sc_mne_list);
1561 LIST_INIT(&sc->sc_mne_list_v6);
1562 sc->sc_ifp = ifp;
1563 error = bridge_ifnet_set_attrs(ifp);
1564 if (error != 0) {
1565 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_ifnet_set_attrs failed %d",
1566 error);
1567 goto done;
1568 }
1569 /*
1570 * Generate an ethernet address with a locally administered address.
1571 *
1572 * Since we are using random ethernet addresses for the bridge, it is
1573 * possible that we might have address collisions, so make sure that
1574 * this hardware address isn't already in use on another bridge.
1575 * The first try uses the "hostid" and falls back to read_frandom();
1576 * for "hostid", we use the MAC address of the first-encountered
1577 * Ethernet-type interface that is currently configured.
1578 */
1579 fb = 0;
1580 has_hostid = (uuid_get_ethernet(ð_hostid[0]) == 0);
1581 for (retry = 1; retry != 0;) {
1582 if (fb || has_hostid == 0) {
1583 read_frandom(&sc->sc_defaddr, ETHER_ADDR_LEN);
1584 sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
1585 sc->sc_defaddr[0] |= 2; /* set the LAA bit */
1586 } else {
1587 bcopy(ð_hostid[0], &sc->sc_defaddr,
1588 ETHER_ADDR_LEN);
1589 sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
1590 sc->sc_defaddr[0] |= 2; /* set the LAA bit */
1591 sc->sc_defaddr[3] = /* stir it up a bit */
1592 ((sc->sc_defaddr[3] & 0x0f) << 4) |
1593 ((sc->sc_defaddr[3] & 0xf0) >> 4);
1594 /*
1595 * Mix in the LSB as it's actually pretty significant,
1596 * see rdar://14076061
1597 */
1598 sc->sc_defaddr[4] =
1599 (((sc->sc_defaddr[4] & 0x0f) << 4) |
1600 ((sc->sc_defaddr[4] & 0xf0) >> 4)) ^
1601 sc->sc_defaddr[5];
1602 sc->sc_defaddr[5] = ifp->if_unit & 0xff;
1603 }
1604
1605 fb = 1;
1606 retry = 0;
1607 lck_mtx_lock(&bridge_list_mtx);
1608 LIST_FOREACH(sc2, &bridge_list, sc_list) {
1609 if (_ether_cmp(sc->sc_defaddr,
1610 IF_LLADDR(sc2->sc_ifp)) == 0) {
1611 retry = 1;
1612 }
1613 }
1614 lck_mtx_unlock(&bridge_list_mtx);
1615 }
1616
1617 sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
1618
1619 if (BRIDGE_DBGF_ENABLED(BR_DBGF_LIFECYCLE)) {
1620 brlog_link(sc);
1621 }
1622 error = ifnet_attach(ifp, NULL);
1623 if (error != 0) {
1624 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_attach failed %d", error);
1625 goto done;
1626 }
1627
1628 error = ifnet_set_lladdr_and_type(ifp, sc->sc_defaddr, ETHER_ADDR_LEN,
1629 IFT_ETHER);
1630 if (error != 0) {
1631 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr_and_type failed %d",
1632 error);
1633 goto done;
1634 }
1635
1636 ifnet_set_offload(ifp,
1637 IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP |
1638 IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6 | IFNET_MULTIPAGES);
1639 error = bridge_set_tso(sc);
1640 if (error != 0) {
1641 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_set_tso failed %d", error);
1642 goto done;
1643 }
1644 #if BRIDGESTP
1645 bstp_attach(&sc->sc_stp, &bridge_ops);
1646 #endif /* BRIDGESTP */
1647
1648 lck_mtx_lock(&bridge_list_mtx);
1649 LIST_INSERT_HEAD(&bridge_list, sc, sc_list);
1650 lck_mtx_unlock(&bridge_list_mtx);
1651
1652 /* attach as ethernet */
1653 error = bpf_attach(ifp, DLT_EN10MB, sizeof(struct ether_header),
1654 NULL, NULL);
1655
1656 done:
1657 if (error != 0) {
1658 BRIDGE_LOG(LOG_NOTICE, 0, "failed error %d", error);
1659 /* TBD: Clean up: sc, sc_rthash etc */
1660 }
1661
1662 return error;
1663 }
1664
1665 /*
1666 * bridge_clone_destroy:
1667 *
1668 * Destroy a bridge instance.
1669 */
1670 static int
bridge_clone_destroy(struct ifnet * ifp)1671 bridge_clone_destroy(struct ifnet *ifp)
1672 {
1673 struct bridge_softc *sc = ifp->if_softc;
1674 struct bridge_iflist *bif;
1675 errno_t error;
1676
1677 BRIDGE_LOCK(sc);
1678 if ((sc->sc_flags & SCF_DETACHING)) {
1679 BRIDGE_UNLOCK(sc);
1680 return 0;
1681 }
1682 sc->sc_flags |= SCF_DETACHING;
1683
1684 bridge_ifstop(ifp, 1);
1685
1686 bridge_cancel_delayed_call(&sc->sc_resize_call);
1687
1688 bridge_cleanup_delayed_call(&sc->sc_resize_call);
1689 bridge_cleanup_delayed_call(&sc->sc_aging_timer);
1690
1691 error = ifnet_set_flags(ifp, 0, IFF_UP);
1692 if (error != 0) {
1693 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_flags failed %d", error);
1694 }
1695
1696 while ((bif = TAILQ_FIRST(&sc->sc_iflist)) != NULL) {
1697 bridge_delete_member(sc, bif);
1698 }
1699
1700 while ((bif = TAILQ_FIRST(&sc->sc_spanlist)) != NULL) {
1701 bridge_delete_span(sc, bif);
1702 }
1703 BRIDGE_UNLOCK(sc);
1704
1705 error = ifnet_detach(ifp);
1706 if (error != 0) {
1707 panic("%s (%d): ifnet_detach(%p) failed %d",
1708 __func__, __LINE__, ifp, error);
1709 }
1710 return 0;
1711 }
1712
1713 #define DRVSPEC do { \
1714 if (ifd->ifd_cmd >= bridge_control_table_size) { \
1715 error = EINVAL; \
1716 break; \
1717 } \
1718 bc = &bridge_control_table[ifd->ifd_cmd]; \
1719 \
1720 if (cmd == SIOCGDRVSPEC && \
1721 (bc->bc_flags & BC_F_COPYOUT) == 0) { \
1722 error = EINVAL; \
1723 break; \
1724 } else if (cmd == SIOCSDRVSPEC && \
1725 (bc->bc_flags & BC_F_COPYOUT) != 0) { \
1726 error = EINVAL; \
1727 break; \
1728 } \
1729 \
1730 if (bc->bc_flags & BC_F_SUSER) { \
1731 error = kauth_authorize_generic(kauth_cred_get(), \
1732 KAUTH_GENERIC_ISSUSER); \
1733 if (error) \
1734 break; \
1735 } \
1736 \
1737 if (ifd->ifd_len != bc->bc_argsize || \
1738 ifd->ifd_len > sizeof (args)) { \
1739 error = EINVAL; \
1740 break; \
1741 } \
1742 \
1743 bzero(&args, sizeof (args)); \
1744 if (bc->bc_flags & BC_F_COPYIN) { \
1745 error = copyin(ifd->ifd_data, &args, ifd->ifd_len); \
1746 if (error) \
1747 break; \
1748 } \
1749 \
1750 BRIDGE_LOCK(sc); \
1751 error = (*bc->bc_func)(sc, &args); \
1752 BRIDGE_UNLOCK(sc); \
1753 if (error) \
1754 break; \
1755 \
1756 if (bc->bc_flags & BC_F_COPYOUT) \
1757 error = copyout(&args, ifd->ifd_data, ifd->ifd_len); \
1758 } while (0)
1759
1760 static boolean_t
interface_needs_input_broadcast(struct ifnet * ifp)1761 interface_needs_input_broadcast(struct ifnet * ifp)
1762 {
1763 /*
1764 * Selectively enable input broadcast only when necessary.
1765 * The bridge interface itself attaches a fake protocol
1766 * so checking for at least two protocols means that the
1767 * interface is being used for something besides bridging
1768 * and needs to see broadcast packets from other members.
1769 */
1770 return if_get_protolist(ifp, NULL, 0) >= 2;
1771 }
1772
1773 static boolean_t
bif_set_input_broadcast(struct bridge_iflist * bif,boolean_t input_broadcast)1774 bif_set_input_broadcast(struct bridge_iflist * bif, boolean_t input_broadcast)
1775 {
1776 boolean_t old_input_broadcast;
1777
1778 old_input_broadcast = (bif->bif_flags & BIFF_INPUT_BROADCAST) != 0;
1779 if (input_broadcast) {
1780 bif->bif_flags |= BIFF_INPUT_BROADCAST;
1781 } else {
1782 bif->bif_flags &= ~BIFF_INPUT_BROADCAST;
1783 }
1784 return old_input_broadcast != input_broadcast;
1785 }
1786
1787 /*
1788 * bridge_ioctl:
1789 *
1790 * Handle a control request from the operator.
1791 */
1792 static errno_t
bridge_ioctl(struct ifnet * ifp,u_long cmd,void * data)1793 bridge_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1794 {
1795 struct bridge_softc *sc = ifp->if_softc;
1796 struct ifreq *ifr = (struct ifreq *)data;
1797 struct bridge_iflist *bif;
1798 int error = 0;
1799
1800 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1801
1802 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_IOCTL,
1803 "ifp %s cmd 0x%08lx (%c%c [%lu] %c %lu)",
1804 ifp->if_xname, cmd, (cmd & IOC_IN) ? 'I' : ' ',
1805 (cmd & IOC_OUT) ? 'O' : ' ', IOCPARM_LEN(cmd),
1806 (char)IOCGROUP(cmd), cmd & 0xff);
1807
1808 switch (cmd) {
1809 case SIOCSIFADDR:
1810 case SIOCAIFADDR:
1811 ifnet_set_flags(ifp, IFF_UP, IFF_UP);
1812 break;
1813
1814 case SIOCGIFMEDIA32:
1815 case SIOCGIFMEDIA64: {
1816 struct ifmediareq *ifmr = (struct ifmediareq *)data;
1817 user_addr_t user_addr;
1818
1819 user_addr = (cmd == SIOCGIFMEDIA64) ?
1820 ((struct ifmediareq64 *)ifmr)->ifmu_ulist :
1821 CAST_USER_ADDR_T(((struct ifmediareq32 *)ifmr)->ifmu_ulist);
1822
1823 ifmr->ifm_status = IFM_AVALID;
1824 ifmr->ifm_mask = 0;
1825 ifmr->ifm_count = 1;
1826
1827 BRIDGE_LOCK(sc);
1828 if (!(sc->sc_flags & SCF_DETACHING) &&
1829 (sc->sc_flags & SCF_MEDIA_ACTIVE)) {
1830 ifmr->ifm_status |= IFM_ACTIVE;
1831 ifmr->ifm_active = ifmr->ifm_current =
1832 IFM_ETHER | IFM_AUTO;
1833 } else {
1834 ifmr->ifm_active = ifmr->ifm_current = IFM_NONE;
1835 }
1836 BRIDGE_UNLOCK(sc);
1837
1838 if (user_addr != USER_ADDR_NULL) {
1839 error = copyout(&ifmr->ifm_current, user_addr,
1840 sizeof(int));
1841 }
1842 break;
1843 }
1844
1845 case SIOCADDMULTI:
1846 case SIOCDELMULTI:
1847 break;
1848
1849 case SIOCSDRVSPEC32:
1850 case SIOCGDRVSPEC32: {
1851 union {
1852 struct ifbreq ifbreq;
1853 struct ifbifconf32 ifbifconf;
1854 struct ifbareq32 ifbareq;
1855 struct ifbaconf32 ifbaconf;
1856 struct ifbrparam ifbrparam;
1857 struct ifbropreq32 ifbropreq;
1858 } args;
1859 struct ifdrv32 *ifd = (struct ifdrv32 *)data;
1860 const struct bridge_control *bridge_control_table =
1861 bridge_control_table32, *bc;
1862
1863 DRVSPEC;
1864
1865 break;
1866 }
1867 case SIOCSDRVSPEC64:
1868 case SIOCGDRVSPEC64: {
1869 union {
1870 struct ifbreq ifbreq;
1871 struct ifbifconf64 ifbifconf;
1872 struct ifbareq64 ifbareq;
1873 struct ifbaconf64 ifbaconf;
1874 struct ifbrparam ifbrparam;
1875 struct ifbropreq64 ifbropreq;
1876 } args;
1877 struct ifdrv64 *ifd = (struct ifdrv64 *)data;
1878 const struct bridge_control *bridge_control_table =
1879 bridge_control_table64, *bc;
1880
1881 DRVSPEC;
1882
1883 break;
1884 }
1885
1886 case SIOCSIFFLAGS:
1887 if (!(ifp->if_flags & IFF_UP) &&
1888 (ifp->if_flags & IFF_RUNNING)) {
1889 /*
1890 * If interface is marked down and it is running,
1891 * then stop and disable it.
1892 */
1893 BRIDGE_LOCK(sc);
1894 bridge_ifstop(ifp, 1);
1895 BRIDGE_UNLOCK(sc);
1896 } else if ((ifp->if_flags & IFF_UP) &&
1897 !(ifp->if_flags & IFF_RUNNING)) {
1898 /*
1899 * If interface is marked up and it is stopped, then
1900 * start it.
1901 */
1902 BRIDGE_LOCK(sc);
1903 error = bridge_init(ifp);
1904 BRIDGE_UNLOCK(sc);
1905 }
1906 break;
1907
1908 case SIOCSIFLLADDR:
1909 error = ifnet_set_lladdr(ifp, ifr->ifr_addr.sa_data,
1910 ifr->ifr_addr.sa_len);
1911 if (error != 0) {
1912 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_IOCTL,
1913 "%s SIOCSIFLLADDR error %d", ifp->if_xname,
1914 error);
1915 }
1916 break;
1917
1918 case SIOCSIFMTU:
1919 if (ifr->ifr_mtu < 576) {
1920 error = EINVAL;
1921 break;
1922 }
1923 BRIDGE_LOCK(sc);
1924 if (TAILQ_EMPTY(&sc->sc_iflist)) {
1925 sc->sc_ifp->if_mtu = ifr->ifr_mtu;
1926 BRIDGE_UNLOCK(sc);
1927 break;
1928 }
1929 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1930 if (bif->bif_ifp->if_mtu != (unsigned)ifr->ifr_mtu) {
1931 BRIDGE_LOG(LOG_NOTICE, 0,
1932 "%s invalid MTU: %u(%s) != %d",
1933 sc->sc_ifp->if_xname,
1934 bif->bif_ifp->if_mtu,
1935 bif->bif_ifp->if_xname, ifr->ifr_mtu);
1936 error = EINVAL;
1937 break;
1938 }
1939 }
1940 if (!error) {
1941 sc->sc_ifp->if_mtu = ifr->ifr_mtu;
1942 }
1943 BRIDGE_UNLOCK(sc);
1944 break;
1945
1946 default:
1947 error = ether_ioctl(ifp, cmd, data);
1948 if (error != 0 && error != EOPNOTSUPP) {
1949 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_IOCTL,
1950 "ifp %s cmd 0x%08lx "
1951 "(%c%c [%lu] %c %lu) failed error: %d",
1952 ifp->if_xname, cmd,
1953 (cmd & IOC_IN) ? 'I' : ' ',
1954 (cmd & IOC_OUT) ? 'O' : ' ',
1955 IOCPARM_LEN(cmd), (char)IOCGROUP(cmd),
1956 cmd & 0xff, error);
1957 }
1958 break;
1959 }
1960 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1961
1962 return error;
1963 }
1964
1965 #if HAS_IF_CAP
1966 /*
1967 * bridge_mutecaps:
1968 *
1969 * Clear or restore unwanted capabilities on the member interface
1970 */
1971 static void
bridge_mutecaps(struct bridge_softc * sc)1972 bridge_mutecaps(struct bridge_softc *sc)
1973 {
1974 struct bridge_iflist *bif;
1975 int enabled, mask;
1976
1977 /* Initial bitmask of capabilities to test */
1978 mask = BRIDGE_IFCAPS_MASK;
1979
1980 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1981 /* Every member must support it or its disabled */
1982 mask &= bif->bif_savedcaps;
1983 }
1984
1985 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1986 enabled = bif->bif_ifp->if_capenable;
1987 enabled &= ~BRIDGE_IFCAPS_STRIP;
1988 /* strip off mask bits and enable them again if allowed */
1989 enabled &= ~BRIDGE_IFCAPS_MASK;
1990 enabled |= mask;
1991
1992 bridge_set_ifcap(sc, bif, enabled);
1993 }
1994 }
1995
1996 static void
bridge_set_ifcap(struct bridge_softc * sc,struct bridge_iflist * bif,int set)1997 bridge_set_ifcap(struct bridge_softc *sc, struct bridge_iflist *bif, int set)
1998 {
1999 struct ifnet *ifp = bif->bif_ifp;
2000 struct ifreq ifr;
2001 int error;
2002
2003 bzero(&ifr, sizeof(ifr));
2004 ifr.ifr_reqcap = set;
2005
2006 if (ifp->if_capenable != set) {
2007 IFF_LOCKGIANT(ifp);
2008 error = (*ifp->if_ioctl)(ifp, SIOCSIFCAP, (caddr_t)&ifr);
2009 IFF_UNLOCKGIANT(ifp);
2010 if (error) {
2011 BRIDGE_LOG(LOG_NOTICE, 0,
2012 "%s error setting interface capabilities on %s",
2013 sc->sc_ifp->if_xname, ifp->if_xname);
2014 }
2015 }
2016 }
2017 #endif /* HAS_IF_CAP */
2018
2019 static errno_t
siocsifcap(struct ifnet * ifp,uint32_t cap_enable)2020 siocsifcap(struct ifnet * ifp, uint32_t cap_enable)
2021 {
2022 struct ifreq ifr;
2023
2024 bzero(&ifr, sizeof(ifr));
2025 ifr.ifr_reqcap = cap_enable;
2026 return ifnet_ioctl(ifp, 0, SIOCSIFCAP, &ifr);
2027 }
2028
2029 static const char *
enable_disable_str(boolean_t enable)2030 enable_disable_str(boolean_t enable)
2031 {
2032 return enable ? "enable" : "disable";
2033 }
2034
2035 static boolean_t
bridge_set_lro(struct ifnet * ifp,boolean_t enable)2036 bridge_set_lro(struct ifnet * ifp, boolean_t enable)
2037 {
2038 uint32_t cap_enable;
2039 uint32_t cap_supported;
2040 boolean_t changed = FALSE;
2041 boolean_t lro_enabled;
2042
2043 cap_supported = ifnet_capabilities_supported(ifp);
2044 if ((cap_supported & IFCAP_LRO) == 0) {
2045 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2046 "%s doesn't support LRO",
2047 ifp->if_xname);
2048 goto done;
2049 }
2050 cap_enable = ifnet_capabilities_enabled(ifp);
2051 lro_enabled = (cap_enable & IFCAP_LRO) != 0;
2052 if (lro_enabled != enable) {
2053 errno_t error;
2054
2055 if (enable) {
2056 cap_enable |= IFCAP_LRO;
2057 } else {
2058 cap_enable &= ~IFCAP_LRO;
2059 }
2060 error = siocsifcap(ifp, cap_enable);
2061 if (error != 0) {
2062 BRIDGE_LOG(LOG_NOTICE, 0,
2063 "%s %s failed (cap 0x%x) %d",
2064 ifp->if_xname,
2065 enable_disable_str(enable),
2066 cap_enable,
2067 error);
2068 } else {
2069 changed = TRUE;
2070 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2071 "%s %s success (cap 0x%x)",
2072 ifp->if_xname,
2073 enable_disable_str(enable),
2074 cap_enable);
2075 }
2076 }
2077 done:
2078 return changed;
2079 }
2080
2081 static errno_t
bridge_set_tso(struct bridge_softc * sc)2082 bridge_set_tso(struct bridge_softc *sc)
2083 {
2084 struct bridge_iflist *bif;
2085 u_int32_t tso_v4_mtu;
2086 u_int32_t tso_v6_mtu;
2087 ifnet_offload_t offload;
2088 errno_t error = 0;
2089
2090 /* By default, support TSO */
2091 offload = sc->sc_ifp->if_hwassist | IFNET_TSO_IPV4 | IFNET_TSO_IPV6;
2092 tso_v4_mtu = IP_MAXPACKET;
2093 tso_v6_mtu = IP_MAXPACKET;
2094
2095 /* Use the lowest common denominator of the members */
2096 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2097 ifnet_t ifp = bif->bif_ifp;
2098
2099 if (ifp == NULL) {
2100 continue;
2101 }
2102
2103 if (offload & IFNET_TSO_IPV4) {
2104 if (ifp->if_hwassist & IFNET_TSO_IPV4) {
2105 if (tso_v4_mtu > ifp->if_tso_v4_mtu) {
2106 tso_v4_mtu = ifp->if_tso_v4_mtu;
2107 }
2108 } else {
2109 offload &= ~IFNET_TSO_IPV4;
2110 tso_v4_mtu = 0;
2111 }
2112 }
2113 if (offload & IFNET_TSO_IPV6) {
2114 if (ifp->if_hwassist & IFNET_TSO_IPV6) {
2115 if (tso_v6_mtu > ifp->if_tso_v6_mtu) {
2116 tso_v6_mtu = ifp->if_tso_v6_mtu;
2117 }
2118 } else {
2119 offload &= ~IFNET_TSO_IPV6;
2120 tso_v6_mtu = 0;
2121 }
2122 }
2123 }
2124
2125 if (offload != sc->sc_ifp->if_hwassist) {
2126 error = ifnet_set_offload(sc->sc_ifp, offload);
2127 if (error != 0) {
2128 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2129 "ifnet_set_offload(%s, 0x%x) failed %d",
2130 sc->sc_ifp->if_xname, offload, error);
2131 goto done;
2132 }
2133 /*
2134 * For ifnet_set_tso_mtu() sake, the TSO MTU must be at least
2135 * as large as the interface MTU
2136 */
2137 if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV4) {
2138 if (tso_v4_mtu < sc->sc_ifp->if_mtu) {
2139 tso_v4_mtu = sc->sc_ifp->if_mtu;
2140 }
2141 error = ifnet_set_tso_mtu(sc->sc_ifp, AF_INET,
2142 tso_v4_mtu);
2143 if (error != 0) {
2144 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2145 "ifnet_set_tso_mtu(%s, "
2146 "AF_INET, %u) failed %d",
2147 sc->sc_ifp->if_xname,
2148 tso_v4_mtu, error);
2149 goto done;
2150 }
2151 }
2152 if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV6) {
2153 if (tso_v6_mtu < sc->sc_ifp->if_mtu) {
2154 tso_v6_mtu = sc->sc_ifp->if_mtu;
2155 }
2156 error = ifnet_set_tso_mtu(sc->sc_ifp, AF_INET6,
2157 tso_v6_mtu);
2158 if (error != 0) {
2159 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2160 "ifnet_set_tso_mtu(%s, "
2161 "AF_INET6, %u) failed %d",
2162 sc->sc_ifp->if_xname,
2163 tso_v6_mtu, error);
2164 goto done;
2165 }
2166 }
2167 }
2168 done:
2169 return error;
2170 }
2171
2172 /*
2173 * bridge_lookup_member:
2174 *
2175 * Lookup a bridge member interface.
2176 */
2177 static struct bridge_iflist *
bridge_lookup_member(struct bridge_softc * sc,const char * name)2178 bridge_lookup_member(struct bridge_softc *sc, const char *name)
2179 {
2180 struct bridge_iflist *bif;
2181 struct ifnet *ifp;
2182
2183 BRIDGE_LOCK_ASSERT_HELD(sc);
2184
2185 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2186 ifp = bif->bif_ifp;
2187 if (strcmp(ifp->if_xname, name) == 0) {
2188 return bif;
2189 }
2190 }
2191
2192 return NULL;
2193 }
2194
2195 /*
2196 * bridge_lookup_member_if:
2197 *
2198 * Lookup a bridge member interface by ifnet*.
2199 */
2200 static struct bridge_iflist *
bridge_lookup_member_if(struct bridge_softc * sc,struct ifnet * member_ifp)2201 bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
2202 {
2203 struct bridge_iflist *bif;
2204
2205 BRIDGE_LOCK_ASSERT_HELD(sc);
2206
2207 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2208 if (bif->bif_ifp == member_ifp) {
2209 return bif;
2210 }
2211 }
2212
2213 return NULL;
2214 }
2215
2216 static errno_t
bridge_iff_input(void * cookie,ifnet_t ifp,protocol_family_t protocol,mbuf_t * data,char ** frame_ptr)2217 bridge_iff_input(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2218 mbuf_t *data, char **frame_ptr)
2219 {
2220 #pragma unused(protocol)
2221 errno_t error = 0;
2222 struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2223 struct bridge_softc *sc = bif->bif_sc;
2224 int included = 0;
2225 size_t frmlen = 0;
2226 mbuf_t m = *data;
2227
2228 if ((m->m_flags & M_PROTO1)) {
2229 goto out;
2230 }
2231
2232 if (*frame_ptr >= (char *)mbuf_datastart(m) &&
2233 *frame_ptr <= (char *)mbuf_data(m)) {
2234 included = 1;
2235 frmlen = (char *)mbuf_data(m) - *frame_ptr;
2236 }
2237 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2238 "%s from %s m 0x%llx data 0x%llx frame 0x%llx %s "
2239 "frmlen %lu", sc->sc_ifp->if_xname,
2240 ifp->if_xname, (uint64_t)VM_KERNEL_ADDRPERM(m),
2241 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)),
2242 (uint64_t)VM_KERNEL_ADDRPERM(*frame_ptr),
2243 included ? "inside" : "outside", frmlen);
2244 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MBUF)) {
2245 brlog_mbuf(m, "bridge_iff_input[", "");
2246 brlog_ether_header((struct ether_header *)
2247 (void *)*frame_ptr);
2248 brlog_mbuf_data(m, 0, 20);
2249 }
2250 if (included == 0) {
2251 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT, "frame_ptr outside mbuf");
2252 goto out;
2253 }
2254
2255 /* Move data pointer to start of frame to the link layer header */
2256 (void) mbuf_setdata(m, (char *)mbuf_data(m) - frmlen,
2257 mbuf_len(m) + frmlen);
2258 (void) mbuf_pkthdr_adjustlen(m, frmlen);
2259
2260 /* make sure we can access the ethernet header */
2261 if (mbuf_pkthdr_len(m) < sizeof(struct ether_header)) {
2262 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2263 "short frame %lu < %lu",
2264 mbuf_pkthdr_len(m), sizeof(struct ether_header));
2265 goto out;
2266 }
2267 if (mbuf_len(m) < sizeof(struct ether_header)) {
2268 error = mbuf_pullup(data, sizeof(struct ether_header));
2269 if (error != 0) {
2270 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2271 "mbuf_pullup(%lu) failed %d",
2272 sizeof(struct ether_header),
2273 error);
2274 error = EJUSTRETURN;
2275 goto out;
2276 }
2277 if (m != *data) {
2278 m = *data;
2279 *frame_ptr = mbuf_data(m);
2280 }
2281 }
2282
2283 error = bridge_input(ifp, data);
2284
2285 /* Adjust packet back to original */
2286 if (error == 0) {
2287 /* bridge_input might have modified *data */
2288 if (*data != m) {
2289 m = *data;
2290 *frame_ptr = mbuf_data(m);
2291 }
2292 (void) mbuf_setdata(m, (char *)mbuf_data(m) + frmlen,
2293 mbuf_len(m) - frmlen);
2294 (void) mbuf_pkthdr_adjustlen(m, -frmlen);
2295 }
2296
2297 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MBUF) &&
2298 BRIDGE_DBGF_ENABLED(BR_DBGF_INPUT)) {
2299 brlog_mbuf(m, "bridge_iff_input]", "");
2300 }
2301
2302 out:
2303 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2304
2305 return error;
2306 }
2307
2308 static errno_t
bridge_iff_output(void * cookie,ifnet_t ifp,protocol_family_t protocol,mbuf_t * data)2309 bridge_iff_output(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2310 mbuf_t *data)
2311 {
2312 #pragma unused(protocol)
2313 errno_t error = 0;
2314 struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2315 struct bridge_softc *sc = bif->bif_sc;
2316 mbuf_t m = *data;
2317
2318 if ((m->m_flags & M_PROTO1)) {
2319 goto out;
2320 }
2321 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
2322 "%s from %s m 0x%llx data 0x%llx",
2323 sc->sc_ifp->if_xname, ifp->if_xname,
2324 (uint64_t)VM_KERNEL_ADDRPERM(m),
2325 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)));
2326
2327 error = bridge_member_output(sc, ifp, data);
2328 if (error != 0 && error != EJUSTRETURN) {
2329 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_OUTPUT,
2330 "bridge_member_output failed error %d",
2331 error);
2332 }
2333 out:
2334 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2335
2336 return error;
2337 }
2338
2339 static void
bridge_iff_event(void * cookie,ifnet_t ifp,protocol_family_t protocol,const struct kev_msg * event_msg)2340 bridge_iff_event(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2341 const struct kev_msg *event_msg)
2342 {
2343 #pragma unused(protocol)
2344 struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2345 struct bridge_softc *sc = bif->bif_sc;
2346
2347 if (event_msg->vendor_code == KEV_VENDOR_APPLE &&
2348 event_msg->kev_class == KEV_NETWORK_CLASS &&
2349 event_msg->kev_subclass == KEV_DL_SUBCLASS) {
2350 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
2351 "%s event_code %u - %s",
2352 ifp->if_xname, event_msg->event_code,
2353 dlil_kev_dl_code_str(event_msg->event_code));
2354
2355 switch (event_msg->event_code) {
2356 case KEV_DL_LINK_OFF:
2357 case KEV_DL_LINK_ON: {
2358 bridge_iflinkevent(ifp);
2359 #if BRIDGESTP
2360 bstp_linkstate(ifp, event_msg->event_code);
2361 #endif /* BRIDGESTP */
2362 break;
2363 }
2364 case KEV_DL_SIFFLAGS: {
2365 if ((ifp->if_flags & IFF_UP) == 0) {
2366 break;
2367 }
2368 if ((bif->bif_flags & BIFF_PROMISC) == 0) {
2369 errno_t error;
2370
2371 error = ifnet_set_promiscuous(ifp, 1);
2372 if (error != 0) {
2373 BRIDGE_LOG(LOG_NOTICE, 0,
2374 "ifnet_set_promiscuous (%s)"
2375 " failed %d", ifp->if_xname,
2376 error);
2377 } else {
2378 bif->bif_flags |= BIFF_PROMISC;
2379 }
2380 }
2381 if ((bif->bif_flags & BIFF_WIFI_INFRA) != 0 &&
2382 (bif->bif_flags & BIFF_ALL_MULTI) == 0) {
2383 errno_t error;
2384
2385 error = if_allmulti(ifp, 1);
2386 if (error != 0) {
2387 BRIDGE_LOG(LOG_NOTICE, 0,
2388 "if_allmulti (%s)"
2389 " failed %d", ifp->if_xname,
2390 error);
2391 } else {
2392 bif->bif_flags |= BIFF_ALL_MULTI;
2393 #ifdef XNU_PLATFORM_AppleTVOS
2394 ip6_forwarding = 1;
2395 #endif /* XNU_PLATFORM_AppleTVOS */
2396 }
2397 }
2398 break;
2399 }
2400 case KEV_DL_IFCAP_CHANGED: {
2401 BRIDGE_LOCK(sc);
2402 bridge_set_tso(sc);
2403 BRIDGE_UNLOCK(sc);
2404 break;
2405 }
2406 case KEV_DL_PROTO_DETACHED:
2407 case KEV_DL_PROTO_ATTACHED: {
2408 bridge_proto_attach_changed(ifp);
2409 break;
2410 }
2411 default:
2412 break;
2413 }
2414 }
2415 }
2416
2417 /*
2418 * bridge_iff_detached:
2419 *
2420 * Called when our interface filter has been detached from a
2421 * member interface.
2422 */
2423 static void
bridge_iff_detached(void * cookie,ifnet_t ifp)2424 bridge_iff_detached(void *cookie, ifnet_t ifp)
2425 {
2426 #pragma unused(cookie)
2427 struct bridge_iflist *bif;
2428 struct bridge_softc *sc = ifp->if_bridge;
2429
2430 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2431
2432 /* Check if the interface is a bridge member */
2433 if (sc != NULL) {
2434 BRIDGE_LOCK(sc);
2435 bif = bridge_lookup_member_if(sc, ifp);
2436 if (bif != NULL) {
2437 bridge_delete_member(sc, bif);
2438 }
2439 BRIDGE_UNLOCK(sc);
2440 return;
2441 }
2442 /* Check if the interface is a span port */
2443 lck_mtx_lock(&bridge_list_mtx);
2444 LIST_FOREACH(sc, &bridge_list, sc_list) {
2445 BRIDGE_LOCK(sc);
2446 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
2447 if (ifp == bif->bif_ifp) {
2448 bridge_delete_span(sc, bif);
2449 break;
2450 }
2451 BRIDGE_UNLOCK(sc);
2452 }
2453 lck_mtx_unlock(&bridge_list_mtx);
2454 }
2455
2456 static errno_t
bridge_proto_input(ifnet_t ifp,protocol_family_t protocol,mbuf_t packet,char * header)2457 bridge_proto_input(ifnet_t ifp, protocol_family_t protocol, mbuf_t packet,
2458 char *header)
2459 {
2460 #pragma unused(protocol, packet, header)
2461 BRIDGE_LOG(LOG_NOTICE, 0, "%s unexpected packet",
2462 ifp->if_xname);
2463 return 0;
2464 }
2465
2466 static int
bridge_attach_protocol(struct ifnet * ifp)2467 bridge_attach_protocol(struct ifnet *ifp)
2468 {
2469 int error;
2470 struct ifnet_attach_proto_param reg;
2471
2472 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2473 bzero(®, sizeof(reg));
2474 reg.input = bridge_proto_input;
2475
2476 error = ifnet_attach_protocol(ifp, PF_BRIDGE, ®);
2477 if (error) {
2478 BRIDGE_LOG(LOG_NOTICE, 0,
2479 "ifnet_attach_protocol(%s) failed, %d",
2480 ifp->if_xname, error);
2481 }
2482
2483 return error;
2484 }
2485
2486 static int
bridge_detach_protocol(struct ifnet * ifp)2487 bridge_detach_protocol(struct ifnet *ifp)
2488 {
2489 int error;
2490
2491 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2492 error = ifnet_detach_protocol(ifp, PF_BRIDGE);
2493 if (error) {
2494 BRIDGE_LOG(LOG_NOTICE, 0,
2495 "ifnet_detach_protocol(%s) failed, %d",
2496 ifp->if_xname, error);
2497 }
2498
2499 return error;
2500 }
2501
2502 /*
2503 * bridge_delete_member:
2504 *
2505 * Delete the specified member interface.
2506 */
2507 static void
bridge_delete_member(struct bridge_softc * sc,struct bridge_iflist * bif)2508 bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif)
2509 {
2510 #if SKYWALK
2511 boolean_t add_netagent = FALSE;
2512 #endif /* SKYWALK */
2513 uint32_t bif_flags;
2514 struct ifnet *ifs = bif->bif_ifp, *bifp = sc->sc_ifp;
2515 int lladdr_changed = 0, error;
2516 uint8_t eaddr[ETHER_ADDR_LEN];
2517 u_int32_t event_code = 0;
2518
2519 BRIDGE_LOCK_ASSERT_HELD(sc);
2520 VERIFY(ifs != NULL);
2521
2522 /*
2523 * Remove the member from the list first so it cannot be found anymore
2524 * when we release the bridge lock below
2525 */
2526 if ((bif->bif_flags & BIFF_IN_MEMBER_LIST) != 0) {
2527 BRIDGE_XLOCK(sc);
2528 TAILQ_REMOVE(&sc->sc_iflist, bif, bif_next);
2529 BRIDGE_XDROP(sc);
2530 }
2531 if (sc->sc_mac_nat_bif != NULL) {
2532 if (bif == sc->sc_mac_nat_bif) {
2533 bridge_mac_nat_disable(sc);
2534 } else {
2535 bridge_mac_nat_flush_entries(sc, bif);
2536 }
2537 }
2538 #if BRIDGESTP
2539 if ((bif->bif_ifflags & IFBIF_STP) != 0) {
2540 bstp_disable(&bif->bif_stp);
2541 }
2542 #endif /* BRIDGESTP */
2543
2544 /*
2545 * If removing the interface that gave the bridge its mac address, set
2546 * the mac address of the bridge to the address of the next member, or
2547 * to its default address if no members are left.
2548 */
2549 if (bridge_inherit_mac && sc->sc_ifaddr == ifs) {
2550 ifnet_release(sc->sc_ifaddr);
2551 if (TAILQ_EMPTY(&sc->sc_iflist)) {
2552 bcopy(sc->sc_defaddr, eaddr, ETHER_ADDR_LEN);
2553 sc->sc_ifaddr = NULL;
2554 } else {
2555 struct ifnet *fif =
2556 TAILQ_FIRST(&sc->sc_iflist)->bif_ifp;
2557 bcopy(IF_LLADDR(fif), eaddr, ETHER_ADDR_LEN);
2558 sc->sc_ifaddr = fif;
2559 ifnet_reference(fif); /* for sc_ifaddr */
2560 }
2561 lladdr_changed = 1;
2562 }
2563
2564 #if HAS_IF_CAP
2565 bridge_mutecaps(sc); /* recalculate now this interface is removed */
2566 #endif /* HAS_IF_CAP */
2567
2568 error = bridge_set_tso(sc);
2569 if (error != 0) {
2570 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_set_tso failed %d", error);
2571 }
2572
2573 bridge_rtdelete(sc, ifs, IFBF_FLUSHALL);
2574
2575 KASSERT(bif->bif_addrcnt == 0,
2576 ("%s: %d bridge routes referenced", __func__, bif->bif_addrcnt));
2577
2578 /*
2579 * Update link status of the bridge based on its remaining members
2580 */
2581 event_code = bridge_updatelinkstatus(sc);
2582 bif_flags = bif->bif_flags;
2583 BRIDGE_UNLOCK(sc);
2584
2585 /* only perform these steps if the interface is still attached */
2586 if (ifnet_is_attached(ifs, 1)) {
2587 #if SKYWALK
2588 add_netagent = (bif_flags & BIFF_NETAGENT_REMOVED) != 0;
2589
2590 if ((bif_flags & BIFF_FLOWSWITCH_ATTACHED) != 0) {
2591 ifnet_detach_flowswitch_nexus(ifs);
2592 }
2593 #endif /* SKYWALK */
2594 /* disable promiscuous mode */
2595 if ((bif_flags & BIFF_PROMISC) != 0) {
2596 (void) ifnet_set_promiscuous(ifs, 0);
2597 }
2598 /* disable all multi */
2599 if ((bif_flags & BIFF_ALL_MULTI) != 0) {
2600 (void)if_allmulti(ifs, 0);
2601 }
2602 #if HAS_IF_CAP
2603 /* re-enable any interface capabilities */
2604 bridge_set_ifcap(sc, bif, bif->bif_savedcaps);
2605 #endif
2606 /* detach bridge "protocol" */
2607 if ((bif_flags & BIFF_PROTO_ATTACHED) != 0) {
2608 (void)bridge_detach_protocol(ifs);
2609 }
2610 /* detach interface filter */
2611 if ((bif_flags & BIFF_FILTER_ATTACHED) != 0) {
2612 iflt_detach(bif->bif_iff_ref);
2613 }
2614 /* re-enable LRO */
2615 if ((bif_flags & BIFF_LRO_DISABLED) != 0) {
2616 (void)bridge_set_lro(ifs, TRUE);
2617 }
2618 ifnet_decr_iorefcnt(ifs);
2619 }
2620
2621 if (lladdr_changed &&
2622 (error = ifnet_set_lladdr(bifp, eaddr, ETHER_ADDR_LEN)) != 0) {
2623 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr failed %d", error);
2624 }
2625
2626 if (event_code != 0) {
2627 bridge_link_event(bifp, event_code);
2628 }
2629
2630 #if BRIDGESTP
2631 bstp_destroy(&bif->bif_stp); /* prepare to free */
2632 #endif /* BRIDGESTP */
2633
2634 kfree_type(struct bridge_iflist, bif);
2635 ifs->if_bridge = NULL;
2636 #if SKYWALK
2637 if (add_netagent && ifnet_is_attached(ifs, 1)) {
2638 (void)ifnet_add_netagent(ifs);
2639 ifnet_decr_iorefcnt(ifs);
2640 }
2641 #endif /* SKYWALK */
2642
2643 ifnet_release(ifs);
2644
2645 BRIDGE_LOCK(sc);
2646 }
2647
2648 /*
2649 * bridge_delete_span:
2650 *
2651 * Delete the specified span interface.
2652 */
2653 static void
bridge_delete_span(struct bridge_softc * sc,struct bridge_iflist * bif)2654 bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif)
2655 {
2656 BRIDGE_LOCK_ASSERT_HELD(sc);
2657
2658 KASSERT(bif->bif_ifp->if_bridge == NULL,
2659 ("%s: not a span interface", __func__));
2660
2661 ifnet_release(bif->bif_ifp);
2662
2663 TAILQ_REMOVE(&sc->sc_spanlist, bif, bif_next);
2664 kfree_type(struct bridge_iflist, bif);
2665 }
2666
2667 static int
bridge_ioctl_add(struct bridge_softc * sc,void * arg)2668 bridge_ioctl_add(struct bridge_softc *sc, void *arg)
2669 {
2670 struct ifbreq *req = arg;
2671 struct bridge_iflist *bif = NULL;
2672 struct ifnet *ifs, *bifp = sc->sc_ifp;
2673 int error = 0, lladdr_changed = 0;
2674 uint8_t eaddr[ETHER_ADDR_LEN];
2675 struct iff_filter iff;
2676 u_int32_t event_code = 0;
2677 boolean_t input_broadcast;
2678 int media_active;
2679 boolean_t wifi_infra = FALSE;
2680
2681 ifs = ifunit(req->ifbr_ifsname);
2682 if (ifs == NULL) {
2683 return ENOENT;
2684 }
2685 if (ifs->if_ioctl == NULL) { /* must be supported */
2686 return EINVAL;
2687 }
2688
2689 if (IFNET_IS_INTCOPROC(ifs) || IFNET_IS_MANAGEMENT(ifs)) {
2690 return EINVAL;
2691 }
2692
2693 /* If it's in the span list, it can't be a member. */
2694 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
2695 if (ifs == bif->bif_ifp) {
2696 return EBUSY;
2697 }
2698 }
2699
2700 if (ifs->if_bridge == sc) {
2701 return EEXIST;
2702 }
2703
2704 if (ifs->if_bridge != NULL) {
2705 return EBUSY;
2706 }
2707
2708 switch (ifs->if_type) {
2709 case IFT_ETHER:
2710 if (strcmp(ifs->if_name, "en") == 0 &&
2711 ifs->if_subfamily == IFNET_SUBFAMILY_WIFI &&
2712 (ifs->if_eflags & IFEF_IPV4_ROUTER) == 0) {
2713 /* XXX is there a better way to identify Wi-Fi STA? */
2714 wifi_infra = TRUE;
2715 }
2716 break;
2717 case IFT_L2VLAN:
2718 case IFT_IEEE8023ADLAG:
2719 break;
2720 case IFT_GIF:
2721 /* currently not supported */
2722 /* FALLTHRU */
2723 default:
2724 return EINVAL;
2725 }
2726
2727 /* fail to add the interface if the MTU doesn't match */
2728 if (!TAILQ_EMPTY(&sc->sc_iflist) && sc->sc_ifp->if_mtu != ifs->if_mtu) {
2729 BRIDGE_LOG(LOG_NOTICE, 0, "%s invalid MTU for %s",
2730 sc->sc_ifp->if_xname,
2731 ifs->if_xname);
2732 return EINVAL;
2733 }
2734
2735 /* there's already an interface that's doing MAC NAT */
2736 if (wifi_infra && sc->sc_mac_nat_bif != NULL) {
2737 return EBUSY;
2738 }
2739
2740 /* prevent the interface from detaching while we add the member */
2741 if (!ifnet_is_attached(ifs, 1)) {
2742 return ENXIO;
2743 }
2744
2745 /* allocate a new member */
2746 bif = kalloc_type(struct bridge_iflist, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2747 bif->bif_ifp = ifs;
2748 ifnet_reference(ifs);
2749 bif->bif_ifflags |= IFBIF_LEARNING | IFBIF_DISCOVER;
2750 #if HAS_IF_CAP
2751 bif->bif_savedcaps = ifs->if_capenable;
2752 #endif /* HAS_IF_CAP */
2753 bif->bif_sc = sc;
2754 if (wifi_infra) {
2755 (void)bridge_mac_nat_enable(sc, bif);
2756 }
2757
2758 if (IFNET_IS_VMNET(ifs)) {
2759 allocate_vmnet_pf_tags();
2760 }
2761 /* Allow the first Ethernet member to define the MTU */
2762 if (TAILQ_EMPTY(&sc->sc_iflist)) {
2763 sc->sc_ifp->if_mtu = ifs->if_mtu;
2764 }
2765
2766 /*
2767 * Assign the interface's MAC address to the bridge if it's the first
2768 * member and the MAC address of the bridge has not been changed from
2769 * the default (randomly) generated one.
2770 */
2771 if (bridge_inherit_mac && TAILQ_EMPTY(&sc->sc_iflist) &&
2772 _ether_cmp(IF_LLADDR(sc->sc_ifp), sc->sc_defaddr) == 0) {
2773 bcopy(IF_LLADDR(ifs), eaddr, ETHER_ADDR_LEN);
2774 sc->sc_ifaddr = ifs;
2775 ifnet_reference(ifs); /* for sc_ifaddr */
2776 lladdr_changed = 1;
2777 }
2778
2779 ifs->if_bridge = sc;
2780 #if BRIDGESTP
2781 bstp_create(&sc->sc_stp, &bif->bif_stp, bif->bif_ifp);
2782 #endif /* BRIDGESTP */
2783
2784 #if HAS_IF_CAP
2785 /* Set interface capabilities to the intersection set of all members */
2786 bridge_mutecaps(sc);
2787 #endif /* HAS_IF_CAP */
2788
2789
2790 /*
2791 * Respect lock ordering with DLIL lock for the following operations
2792 */
2793 BRIDGE_UNLOCK(sc);
2794
2795 /* enable promiscuous mode */
2796 error = ifnet_set_promiscuous(ifs, 1);
2797 switch (error) {
2798 case 0:
2799 bif->bif_flags |= BIFF_PROMISC;
2800 break;
2801 case ENETDOWN:
2802 case EPWROFF:
2803 BRIDGE_LOG(LOG_NOTICE, 0,
2804 "ifnet_set_promiscuous(%s) failed %d, ignoring",
2805 ifs->if_xname, error);
2806 /* Ignore error when device is not up */
2807 error = 0;
2808 break;
2809 default:
2810 BRIDGE_LOG(LOG_NOTICE, 0,
2811 "ifnet_set_promiscuous(%s) failed %d",
2812 ifs->if_xname, error);
2813 BRIDGE_LOCK(sc);
2814 goto out;
2815 }
2816 if (wifi_infra) {
2817 int this_error;
2818
2819 /* Wi-Fi doesn't really support promiscuous, set allmulti */
2820 bif->bif_flags |= BIFF_WIFI_INFRA;
2821 this_error = if_allmulti(ifs, 1);
2822 if (this_error == 0) {
2823 bif->bif_flags |= BIFF_ALL_MULTI;
2824 #ifdef XNU_PLATFORM_AppleTVOS
2825 ip6_forwarding = 1;
2826 #endif /* XNU_PLATFORM_AppleTVOS */
2827 } else {
2828 BRIDGE_LOG(LOG_NOTICE, 0,
2829 "if_allmulti(%s) failed %d, ignoring",
2830 ifs->if_xname, this_error);
2831 }
2832 }
2833 #if SKYWALK
2834 /* ensure that the flowswitch is present for native interface */
2835 if (SKYWALK_NATIVE(ifs)) {
2836 if (ifnet_attach_flowswitch_nexus(ifs)) {
2837 bif->bif_flags |= BIFF_FLOWSWITCH_ATTACHED;
2838 }
2839 }
2840 /* remove the netagent on the flowswitch (rdar://75050182) */
2841 if (if_is_fsw_netagent_enabled()) {
2842 (void)ifnet_remove_netagent(ifs);
2843 bif->bif_flags |= BIFF_NETAGENT_REMOVED;
2844 }
2845 #endif /* SKYWALK */
2846
2847 /*
2848 * install an interface filter
2849 */
2850 memset(&iff, 0, sizeof(struct iff_filter));
2851 iff.iff_cookie = bif;
2852 iff.iff_name = "com.apple.kernel.bsd.net.if_bridge";
2853 iff.iff_input = bridge_iff_input;
2854 iff.iff_output = bridge_iff_output;
2855 iff.iff_event = bridge_iff_event;
2856 iff.iff_detached = bridge_iff_detached;
2857 error = dlil_attach_filter(ifs, &iff, &bif->bif_iff_ref,
2858 DLIL_IFF_TSO | DLIL_IFF_INTERNAL);
2859 if (error != 0) {
2860 BRIDGE_LOG(LOG_NOTICE, 0, "iflt_attach failed %d", error);
2861 BRIDGE_LOCK(sc);
2862 goto out;
2863 }
2864 bif->bif_flags |= BIFF_FILTER_ATTACHED;
2865
2866 /*
2867 * install a dummy "bridge" protocol
2868 */
2869 if ((error = bridge_attach_protocol(ifs)) != 0) {
2870 if (error != 0) {
2871 BRIDGE_LOG(LOG_NOTICE, 0,
2872 "bridge_attach_protocol failed %d", error);
2873 BRIDGE_LOCK(sc);
2874 goto out;
2875 }
2876 }
2877 bif->bif_flags |= BIFF_PROTO_ATTACHED;
2878
2879 if (lladdr_changed &&
2880 (error = ifnet_set_lladdr(bifp, eaddr, ETHER_ADDR_LEN)) != 0) {
2881 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr failed %d", error);
2882 }
2883
2884 media_active = interface_media_active(ifs);
2885
2886 /* disable LRO */
2887 if (bridge_set_lro(ifs, FALSE)) {
2888 bif->bif_flags |= BIFF_LRO_DISABLED;
2889 }
2890
2891 /*
2892 * No failures past this point. Add the member to the list.
2893 */
2894 BRIDGE_LOCK(sc);
2895 bif->bif_flags |= BIFF_IN_MEMBER_LIST;
2896 BRIDGE_XLOCK(sc);
2897 TAILQ_INSERT_TAIL(&sc->sc_iflist, bif, bif_next);
2898 BRIDGE_XDROP(sc);
2899
2900 /* cache the member link status */
2901 if (media_active != 0) {
2902 bif->bif_flags |= BIFF_MEDIA_ACTIVE;
2903 } else {
2904 bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
2905 }
2906
2907 /* the new member may change the link status of the bridge interface */
2908 event_code = bridge_updatelinkstatus(sc);
2909
2910 /* check whether we need input broadcast or not */
2911 input_broadcast = interface_needs_input_broadcast(ifs);
2912 bif_set_input_broadcast(bif, input_broadcast);
2913 BRIDGE_UNLOCK(sc);
2914
2915 if (event_code != 0) {
2916 bridge_link_event(bifp, event_code);
2917 }
2918 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
2919 "%s input broadcast %s", ifs->if_xname,
2920 input_broadcast ? "ENABLED" : "DISABLED");
2921
2922 BRIDGE_LOCK(sc);
2923 bridge_set_tso(sc);
2924
2925 out:
2926 /* allow the interface to detach */
2927 ifnet_decr_iorefcnt(ifs);
2928
2929 if (error != 0) {
2930 if (bif != NULL) {
2931 bridge_delete_member(sc, bif);
2932 }
2933 } else if (IFNET_IS_VMNET(ifs)) {
2934 INC_ATOMIC_INT64_LIM(net_api_stats.nas_vmnet_total);
2935 }
2936
2937 return error;
2938 }
2939
2940 static int
bridge_ioctl_del(struct bridge_softc * sc,void * arg)2941 bridge_ioctl_del(struct bridge_softc *sc, void *arg)
2942 {
2943 struct ifbreq *req = arg;
2944 struct bridge_iflist *bif;
2945
2946 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
2947 if (bif == NULL) {
2948 return ENOENT;
2949 }
2950
2951 bridge_delete_member(sc, bif);
2952
2953 return 0;
2954 }
2955
2956 static int
bridge_ioctl_purge(struct bridge_softc * sc,void * arg)2957 bridge_ioctl_purge(struct bridge_softc *sc, void *arg)
2958 {
2959 #pragma unused(sc, arg)
2960 return 0;
2961 }
2962
2963 static int
bridge_ioctl_gifflags(struct bridge_softc * sc,void * arg)2964 bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
2965 {
2966 struct ifbreq *req = arg;
2967 struct bridge_iflist *bif;
2968
2969 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
2970 if (bif == NULL) {
2971 return ENOENT;
2972 }
2973
2974 struct bstp_port *bp;
2975
2976 bp = &bif->bif_stp;
2977 req->ifbr_state = bp->bp_state;
2978 req->ifbr_priority = bp->bp_priority;
2979 req->ifbr_path_cost = bp->bp_path_cost;
2980 req->ifbr_proto = bp->bp_protover;
2981 req->ifbr_role = bp->bp_role;
2982 req->ifbr_stpflags = bp->bp_flags;
2983 req->ifbr_ifsflags = bif->bif_ifflags;
2984
2985 /* Copy STP state options as flags */
2986 if (bp->bp_operedge) {
2987 req->ifbr_ifsflags |= IFBIF_BSTP_EDGE;
2988 }
2989 if (bp->bp_flags & BSTP_PORT_AUTOEDGE) {
2990 req->ifbr_ifsflags |= IFBIF_BSTP_AUTOEDGE;
2991 }
2992 if (bp->bp_ptp_link) {
2993 req->ifbr_ifsflags |= IFBIF_BSTP_PTP;
2994 }
2995 if (bp->bp_flags & BSTP_PORT_AUTOPTP) {
2996 req->ifbr_ifsflags |= IFBIF_BSTP_AUTOPTP;
2997 }
2998 if (bp->bp_flags & BSTP_PORT_ADMEDGE) {
2999 req->ifbr_ifsflags |= IFBIF_BSTP_ADMEDGE;
3000 }
3001 if (bp->bp_flags & BSTP_PORT_ADMCOST) {
3002 req->ifbr_ifsflags |= IFBIF_BSTP_ADMCOST;
3003 }
3004
3005 req->ifbr_portno = bif->bif_ifp->if_index & 0xfff;
3006 req->ifbr_addrcnt = bif->bif_addrcnt;
3007 req->ifbr_addrmax = bif->bif_addrmax;
3008 req->ifbr_addrexceeded = bif->bif_addrexceeded;
3009
3010 return 0;
3011 }
3012
3013 static int
bridge_ioctl_sifflags(struct bridge_softc * sc,void * arg)3014 bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
3015 {
3016 struct ifbreq *req = arg;
3017 struct bridge_iflist *bif;
3018 #if BRIDGESTP
3019 struct bstp_port *bp;
3020 int error;
3021 #endif /* BRIDGESTP */
3022
3023 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3024 if (bif == NULL) {
3025 return ENOENT;
3026 }
3027
3028 if (req->ifbr_ifsflags & IFBIF_SPAN) {
3029 /* SPAN is readonly */
3030 return EINVAL;
3031 }
3032 #define _EXCLUSIVE_FLAGS (IFBIF_CHECKSUM_OFFLOAD | IFBIF_MAC_NAT)
3033 if ((req->ifbr_ifsflags & _EXCLUSIVE_FLAGS) == _EXCLUSIVE_FLAGS) {
3034 /* can't specify both MAC-NAT and checksum offload */
3035 return EINVAL;
3036 }
3037 if ((req->ifbr_ifsflags & IFBIF_MAC_NAT) != 0) {
3038 errno_t error;
3039
3040 error = bridge_mac_nat_enable(sc, bif);
3041 if (error != 0) {
3042 return error;
3043 }
3044 } else if (sc->sc_mac_nat_bif == bif) {
3045 bridge_mac_nat_disable(sc);
3046 }
3047
3048
3049 #if BRIDGESTP
3050 if (req->ifbr_ifsflags & IFBIF_STP) {
3051 if ((bif->bif_ifflags & IFBIF_STP) == 0) {
3052 error = bstp_enable(&bif->bif_stp);
3053 if (error) {
3054 return error;
3055 }
3056 }
3057 } else {
3058 if ((bif->bif_ifflags & IFBIF_STP) != 0) {
3059 bstp_disable(&bif->bif_stp);
3060 }
3061 }
3062
3063 /* Pass on STP flags */
3064 bp = &bif->bif_stp;
3065 bstp_set_edge(bp, req->ifbr_ifsflags & IFBIF_BSTP_EDGE ? 1 : 0);
3066 bstp_set_autoedge(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOEDGE ? 1 : 0);
3067 bstp_set_ptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_PTP ? 1 : 0);
3068 bstp_set_autoptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOPTP ? 1 : 0);
3069 #else /* !BRIDGESTP */
3070 if (req->ifbr_ifsflags & IFBIF_STP) {
3071 return EOPNOTSUPP;
3072 }
3073 #endif /* !BRIDGESTP */
3074
3075 /* Save the bits relating to the bridge */
3076 bif->bif_ifflags = req->ifbr_ifsflags & IFBIFMASK;
3077
3078
3079 return 0;
3080 }
3081
3082 static int
bridge_ioctl_scache(struct bridge_softc * sc,void * arg)3083 bridge_ioctl_scache(struct bridge_softc *sc, void *arg)
3084 {
3085 struct ifbrparam *param = arg;
3086
3087 sc->sc_brtmax = param->ifbrp_csize;
3088 bridge_rttrim(sc);
3089 return 0;
3090 }
3091
3092 static int
bridge_ioctl_gcache(struct bridge_softc * sc,void * arg)3093 bridge_ioctl_gcache(struct bridge_softc *sc, void *arg)
3094 {
3095 struct ifbrparam *param = arg;
3096
3097 param->ifbrp_csize = sc->sc_brtmax;
3098
3099 return 0;
3100 }
3101
3102 #define BRIDGE_IOCTL_GIFS do { \
3103 struct bridge_iflist *bif; \
3104 struct ifbreq breq; \
3105 char *buf, *outbuf; \
3106 unsigned int count, buflen, len; \
3107 \
3108 count = 0; \
3109 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) \
3110 count++; \
3111 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) \
3112 count++; \
3113 \
3114 buflen = sizeof (breq) * count; \
3115 if (bifc->ifbic_len == 0) { \
3116 bifc->ifbic_len = buflen; \
3117 return (0); \
3118 } \
3119 BRIDGE_UNLOCK(sc); \
3120 outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO); \
3121 BRIDGE_LOCK(sc); \
3122 \
3123 count = 0; \
3124 buf = outbuf; \
3125 len = min(bifc->ifbic_len, buflen); \
3126 bzero(&breq, sizeof (breq)); \
3127 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { \
3128 if (len < sizeof (breq)) \
3129 break; \
3130 \
3131 snprintf(breq.ifbr_ifsname, sizeof (breq.ifbr_ifsname), \
3132 "%s", bif->bif_ifp->if_xname); \
3133 /* Fill in the ifbreq structure */ \
3134 error = bridge_ioctl_gifflags(sc, &breq); \
3135 if (error) \
3136 break; \
3137 memcpy(buf, &breq, sizeof (breq)); \
3138 count++; \
3139 buf += sizeof (breq); \
3140 len -= sizeof (breq); \
3141 } \
3142 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) { \
3143 if (len < sizeof (breq)) \
3144 break; \
3145 \
3146 snprintf(breq.ifbr_ifsname, \
3147 sizeof (breq.ifbr_ifsname), \
3148 "%s", bif->bif_ifp->if_xname); \
3149 breq.ifbr_ifsflags = bif->bif_ifflags; \
3150 breq.ifbr_portno \
3151 = bif->bif_ifp->if_index & 0xfff; \
3152 memcpy(buf, &breq, sizeof (breq)); \
3153 count++; \
3154 buf += sizeof (breq); \
3155 len -= sizeof (breq); \
3156 } \
3157 \
3158 BRIDGE_UNLOCK(sc); \
3159 bifc->ifbic_len = sizeof (breq) * count; \
3160 error = copyout(outbuf, bifc->ifbic_req, bifc->ifbic_len); \
3161 BRIDGE_LOCK(sc); \
3162 kfree_data(outbuf, buflen); \
3163 } while (0)
3164
3165 static int
bridge_ioctl_gifs64(struct bridge_softc * sc,void * arg)3166 bridge_ioctl_gifs64(struct bridge_softc *sc, void *arg)
3167 {
3168 struct ifbifconf64 *bifc = arg;
3169 int error = 0;
3170
3171 BRIDGE_IOCTL_GIFS;
3172
3173 return error;
3174 }
3175
3176 static int
bridge_ioctl_gifs32(struct bridge_softc * sc,void * arg)3177 bridge_ioctl_gifs32(struct bridge_softc *sc, void *arg)
3178 {
3179 struct ifbifconf32 *bifc = arg;
3180 int error = 0;
3181
3182 BRIDGE_IOCTL_GIFS;
3183
3184 return error;
3185 }
3186
3187 #define BRIDGE_IOCTL_RTS do { \
3188 struct bridge_rtnode *brt; \
3189 char *buf; \
3190 char *outbuf = NULL; \
3191 unsigned int count, buflen, len; \
3192 unsigned long now; \
3193 \
3194 if (bac->ifbac_len == 0) \
3195 return (0); \
3196 \
3197 bzero(&bareq, sizeof (bareq)); \
3198 count = 0; \
3199 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) \
3200 count++; \
3201 buflen = sizeof (bareq) * count; \
3202 \
3203 BRIDGE_UNLOCK(sc); \
3204 outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO); \
3205 BRIDGE_LOCK(sc); \
3206 \
3207 count = 0; \
3208 buf = outbuf; \
3209 len = min(bac->ifbac_len, buflen); \
3210 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) { \
3211 if (len < sizeof (bareq)) \
3212 goto out; \
3213 snprintf(bareq.ifba_ifsname, sizeof (bareq.ifba_ifsname), \
3214 "%s", brt->brt_ifp->if_xname); \
3215 memcpy(bareq.ifba_dst, brt->brt_addr, sizeof (brt->brt_addr)); \
3216 bareq.ifba_vlan = brt->brt_vlan; \
3217 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) { \
3218 now = (unsigned long) net_uptime(); \
3219 if (now < brt->brt_expire) \
3220 bareq.ifba_expire = \
3221 brt->brt_expire - now; \
3222 } else \
3223 bareq.ifba_expire = 0; \
3224 bareq.ifba_flags = brt->brt_flags; \
3225 \
3226 memcpy(buf, &bareq, sizeof (bareq)); \
3227 count++; \
3228 buf += sizeof (bareq); \
3229 len -= sizeof (bareq); \
3230 } \
3231 out: \
3232 bac->ifbac_len = sizeof (bareq) * count; \
3233 if (outbuf != NULL) { \
3234 BRIDGE_UNLOCK(sc); \
3235 error = copyout(outbuf, bac->ifbac_req, bac->ifbac_len); \
3236 kfree_data(outbuf, buflen); \
3237 BRIDGE_LOCK(sc); \
3238 } \
3239 return (error); \
3240 } while (0)
3241
3242 static int
bridge_ioctl_rts64(struct bridge_softc * sc,void * arg)3243 bridge_ioctl_rts64(struct bridge_softc *sc, void *arg)
3244 {
3245 struct ifbaconf64 *bac = arg;
3246 struct ifbareq64 bareq;
3247 int error = 0;
3248
3249 BRIDGE_IOCTL_RTS;
3250 return error;
3251 }
3252
3253 static int
bridge_ioctl_rts32(struct bridge_softc * sc,void * arg)3254 bridge_ioctl_rts32(struct bridge_softc *sc, void *arg)
3255 {
3256 struct ifbaconf32 *bac = arg;
3257 struct ifbareq32 bareq;
3258 int error = 0;
3259
3260 BRIDGE_IOCTL_RTS;
3261 return error;
3262 }
3263
3264 static int
bridge_ioctl_saddr32(struct bridge_softc * sc,void * arg)3265 bridge_ioctl_saddr32(struct bridge_softc *sc, void *arg)
3266 {
3267 struct ifbareq32 *req = arg;
3268 struct bridge_iflist *bif;
3269 int error;
3270
3271 bif = bridge_lookup_member(sc, req->ifba_ifsname);
3272 if (bif == NULL) {
3273 return ENOENT;
3274 }
3275
3276 error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
3277 req->ifba_flags);
3278
3279 return error;
3280 }
3281
3282 static int
bridge_ioctl_saddr64(struct bridge_softc * sc,void * arg)3283 bridge_ioctl_saddr64(struct bridge_softc *sc, void *arg)
3284 {
3285 struct ifbareq64 *req = arg;
3286 struct bridge_iflist *bif;
3287 int error;
3288
3289 bif = bridge_lookup_member(sc, req->ifba_ifsname);
3290 if (bif == NULL) {
3291 return ENOENT;
3292 }
3293
3294 error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
3295 req->ifba_flags);
3296
3297 return error;
3298 }
3299
3300 static int
bridge_ioctl_sto(struct bridge_softc * sc,void * arg)3301 bridge_ioctl_sto(struct bridge_softc *sc, void *arg)
3302 {
3303 struct ifbrparam *param = arg;
3304
3305 sc->sc_brttimeout = param->ifbrp_ctime;
3306 return 0;
3307 }
3308
3309 static int
bridge_ioctl_gto(struct bridge_softc * sc,void * arg)3310 bridge_ioctl_gto(struct bridge_softc *sc, void *arg)
3311 {
3312 struct ifbrparam *param = arg;
3313
3314 param->ifbrp_ctime = sc->sc_brttimeout;
3315 return 0;
3316 }
3317
3318 static int
bridge_ioctl_daddr32(struct bridge_softc * sc,void * arg)3319 bridge_ioctl_daddr32(struct bridge_softc *sc, void *arg)
3320 {
3321 struct ifbareq32 *req = arg;
3322
3323 return bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan);
3324 }
3325
3326 static int
bridge_ioctl_daddr64(struct bridge_softc * sc,void * arg)3327 bridge_ioctl_daddr64(struct bridge_softc *sc, void *arg)
3328 {
3329 struct ifbareq64 *req = arg;
3330
3331 return bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan);
3332 }
3333
3334 static int
bridge_ioctl_flush(struct bridge_softc * sc,void * arg)3335 bridge_ioctl_flush(struct bridge_softc *sc, void *arg)
3336 {
3337 struct ifbreq *req = arg;
3338
3339 bridge_rtflush(sc, req->ifbr_ifsflags);
3340 return 0;
3341 }
3342
3343 static int
bridge_ioctl_gpri(struct bridge_softc * sc,void * arg)3344 bridge_ioctl_gpri(struct bridge_softc *sc, void *arg)
3345 {
3346 struct ifbrparam *param = arg;
3347 struct bstp_state *bs = &sc->sc_stp;
3348
3349 param->ifbrp_prio = bs->bs_bridge_priority;
3350 return 0;
3351 }
3352
3353 static int
bridge_ioctl_spri(struct bridge_softc * sc,void * arg)3354 bridge_ioctl_spri(struct bridge_softc *sc, void *arg)
3355 {
3356 #if BRIDGESTP
3357 struct ifbrparam *param = arg;
3358
3359 return bstp_set_priority(&sc->sc_stp, param->ifbrp_prio);
3360 #else /* !BRIDGESTP */
3361 #pragma unused(sc, arg)
3362 return EOPNOTSUPP;
3363 #endif /* !BRIDGESTP */
3364 }
3365
3366 static int
bridge_ioctl_ght(struct bridge_softc * sc,void * arg)3367 bridge_ioctl_ght(struct bridge_softc *sc, void *arg)
3368 {
3369 struct ifbrparam *param = arg;
3370 struct bstp_state *bs = &sc->sc_stp;
3371
3372 param->ifbrp_hellotime = bs->bs_bridge_htime >> 8;
3373 return 0;
3374 }
3375
3376 static int
bridge_ioctl_sht(struct bridge_softc * sc,void * arg)3377 bridge_ioctl_sht(struct bridge_softc *sc, void *arg)
3378 {
3379 #if BRIDGESTP
3380 struct ifbrparam *param = arg;
3381
3382 return bstp_set_htime(&sc->sc_stp, param->ifbrp_hellotime);
3383 #else /* !BRIDGESTP */
3384 #pragma unused(sc, arg)
3385 return EOPNOTSUPP;
3386 #endif /* !BRIDGESTP */
3387 }
3388
3389 static int
bridge_ioctl_gfd(struct bridge_softc * sc,void * arg)3390 bridge_ioctl_gfd(struct bridge_softc *sc, void *arg)
3391 {
3392 struct ifbrparam *param;
3393 struct bstp_state *bs;
3394
3395 param = arg;
3396 bs = &sc->sc_stp;
3397 param->ifbrp_fwddelay = bs->bs_bridge_fdelay >> 8;
3398 return 0;
3399 }
3400
3401 static int
bridge_ioctl_sfd(struct bridge_softc * sc,void * arg)3402 bridge_ioctl_sfd(struct bridge_softc *sc, void *arg)
3403 {
3404 #if BRIDGESTP
3405 struct ifbrparam *param = arg;
3406
3407 return bstp_set_fdelay(&sc->sc_stp, param->ifbrp_fwddelay);
3408 #else /* !BRIDGESTP */
3409 #pragma unused(sc, arg)
3410 return EOPNOTSUPP;
3411 #endif /* !BRIDGESTP */
3412 }
3413
3414 static int
bridge_ioctl_gma(struct bridge_softc * sc,void * arg)3415 bridge_ioctl_gma(struct bridge_softc *sc, void *arg)
3416 {
3417 struct ifbrparam *param;
3418 struct bstp_state *bs;
3419
3420 param = arg;
3421 bs = &sc->sc_stp;
3422 param->ifbrp_maxage = bs->bs_bridge_max_age >> 8;
3423 return 0;
3424 }
3425
3426 static int
bridge_ioctl_sma(struct bridge_softc * sc,void * arg)3427 bridge_ioctl_sma(struct bridge_softc *sc, void *arg)
3428 {
3429 #if BRIDGESTP
3430 struct ifbrparam *param = arg;
3431
3432 return bstp_set_maxage(&sc->sc_stp, param->ifbrp_maxage);
3433 #else /* !BRIDGESTP */
3434 #pragma unused(sc, arg)
3435 return EOPNOTSUPP;
3436 #endif /* !BRIDGESTP */
3437 }
3438
3439 static int
bridge_ioctl_sifprio(struct bridge_softc * sc,void * arg)3440 bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg)
3441 {
3442 #if BRIDGESTP
3443 struct ifbreq *req = arg;
3444 struct bridge_iflist *bif;
3445
3446 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3447 if (bif == NULL) {
3448 return ENOENT;
3449 }
3450
3451 return bstp_set_port_priority(&bif->bif_stp, req->ifbr_priority);
3452 #else /* !BRIDGESTP */
3453 #pragma unused(sc, arg)
3454 return EOPNOTSUPP;
3455 #endif /* !BRIDGESTP */
3456 }
3457
3458 static int
bridge_ioctl_sifcost(struct bridge_softc * sc,void * arg)3459 bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg)
3460 {
3461 #if BRIDGESTP
3462 struct ifbreq *req = arg;
3463 struct bridge_iflist *bif;
3464
3465 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3466 if (bif == NULL) {
3467 return ENOENT;
3468 }
3469
3470 return bstp_set_path_cost(&bif->bif_stp, req->ifbr_path_cost);
3471 #else /* !BRIDGESTP */
3472 #pragma unused(sc, arg)
3473 return EOPNOTSUPP;
3474 #endif /* !BRIDGESTP */
3475 }
3476
3477 static int
bridge_ioctl_gfilt(struct bridge_softc * sc,void * arg)3478 bridge_ioctl_gfilt(struct bridge_softc *sc, void *arg)
3479 {
3480 struct ifbrparam *param = arg;
3481
3482 param->ifbrp_filter = sc->sc_filter_flags;
3483
3484 return 0;
3485 }
3486
3487 static int
bridge_ioctl_sfilt(struct bridge_softc * sc,void * arg)3488 bridge_ioctl_sfilt(struct bridge_softc *sc, void *arg)
3489 {
3490 struct ifbrparam *param = arg;
3491
3492 if (param->ifbrp_filter & ~IFBF_FILT_MASK) {
3493 return EINVAL;
3494 }
3495
3496 if (param->ifbrp_filter & IFBF_FILT_USEIPF) {
3497 return EINVAL;
3498 }
3499
3500 sc->sc_filter_flags = param->ifbrp_filter;
3501
3502 return 0;
3503 }
3504
3505 static int
bridge_ioctl_sifmaxaddr(struct bridge_softc * sc,void * arg)3506 bridge_ioctl_sifmaxaddr(struct bridge_softc *sc, void *arg)
3507 {
3508 struct ifbreq *req = arg;
3509 struct bridge_iflist *bif;
3510
3511 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3512 if (bif == NULL) {
3513 return ENOENT;
3514 }
3515
3516 bif->bif_addrmax = req->ifbr_addrmax;
3517 return 0;
3518 }
3519
3520 static int
bridge_ioctl_addspan(struct bridge_softc * sc,void * arg)3521 bridge_ioctl_addspan(struct bridge_softc *sc, void *arg)
3522 {
3523 struct ifbreq *req = arg;
3524 struct bridge_iflist *bif = NULL;
3525 struct ifnet *ifs;
3526
3527 ifs = ifunit(req->ifbr_ifsname);
3528 if (ifs == NULL) {
3529 return ENOENT;
3530 }
3531
3532 if (IFNET_IS_INTCOPROC(ifs) || IFNET_IS_MANAGEMENT(ifs)) {
3533 return EINVAL;
3534 }
3535
3536 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
3537 if (ifs == bif->bif_ifp) {
3538 return EBUSY;
3539 }
3540
3541 if (ifs->if_bridge != NULL) {
3542 return EBUSY;
3543 }
3544
3545 switch (ifs->if_type) {
3546 case IFT_ETHER:
3547 case IFT_L2VLAN:
3548 case IFT_IEEE8023ADLAG:
3549 break;
3550 case IFT_GIF:
3551 /* currently not supported */
3552 /* FALLTHRU */
3553 default:
3554 return EINVAL;
3555 }
3556
3557 bif = kalloc_type(struct bridge_iflist, Z_WAITOK | Z_ZERO | Z_NOFAIL);
3558
3559 bif->bif_ifp = ifs;
3560 bif->bif_ifflags = IFBIF_SPAN;
3561
3562 ifnet_reference(bif->bif_ifp);
3563
3564 TAILQ_INSERT_HEAD(&sc->sc_spanlist, bif, bif_next);
3565
3566 return 0;
3567 }
3568
3569 static int
bridge_ioctl_delspan(struct bridge_softc * sc,void * arg)3570 bridge_ioctl_delspan(struct bridge_softc *sc, void *arg)
3571 {
3572 struct ifbreq *req = arg;
3573 struct bridge_iflist *bif;
3574 struct ifnet *ifs;
3575
3576 ifs = ifunit(req->ifbr_ifsname);
3577 if (ifs == NULL) {
3578 return ENOENT;
3579 }
3580
3581 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
3582 if (ifs == bif->bif_ifp) {
3583 break;
3584 }
3585
3586 if (bif == NULL) {
3587 return ENOENT;
3588 }
3589
3590 bridge_delete_span(sc, bif);
3591
3592 return 0;
3593 }
3594
3595 #define BRIDGE_IOCTL_GBPARAM do { \
3596 struct bstp_state *bs = &sc->sc_stp; \
3597 struct bstp_port *root_port; \
3598 \
3599 req->ifbop_maxage = bs->bs_bridge_max_age >> 8; \
3600 req->ifbop_hellotime = bs->bs_bridge_htime >> 8; \
3601 req->ifbop_fwddelay = bs->bs_bridge_fdelay >> 8; \
3602 \
3603 root_port = bs->bs_root_port; \
3604 if (root_port == NULL) \
3605 req->ifbop_root_port = 0; \
3606 else \
3607 req->ifbop_root_port = root_port->bp_ifp->if_index; \
3608 \
3609 req->ifbop_holdcount = bs->bs_txholdcount; \
3610 req->ifbop_priority = bs->bs_bridge_priority; \
3611 req->ifbop_protocol = bs->bs_protover; \
3612 req->ifbop_root_path_cost = bs->bs_root_pv.pv_cost; \
3613 req->ifbop_bridgeid = bs->bs_bridge_pv.pv_dbridge_id; \
3614 req->ifbop_designated_root = bs->bs_root_pv.pv_root_id; \
3615 req->ifbop_designated_bridge = bs->bs_root_pv.pv_dbridge_id; \
3616 req->ifbop_last_tc_time.tv_sec = bs->bs_last_tc_time.tv_sec; \
3617 req->ifbop_last_tc_time.tv_usec = bs->bs_last_tc_time.tv_usec; \
3618 } while (0)
3619
3620 static int
bridge_ioctl_gbparam32(struct bridge_softc * sc,void * arg)3621 bridge_ioctl_gbparam32(struct bridge_softc *sc, void *arg)
3622 {
3623 struct ifbropreq32 *req = arg;
3624
3625 BRIDGE_IOCTL_GBPARAM;
3626 return 0;
3627 }
3628
3629 static int
bridge_ioctl_gbparam64(struct bridge_softc * sc,void * arg)3630 bridge_ioctl_gbparam64(struct bridge_softc *sc, void *arg)
3631 {
3632 struct ifbropreq64 *req = arg;
3633
3634 BRIDGE_IOCTL_GBPARAM;
3635 return 0;
3636 }
3637
3638 static int
bridge_ioctl_grte(struct bridge_softc * sc,void * arg)3639 bridge_ioctl_grte(struct bridge_softc *sc, void *arg)
3640 {
3641 struct ifbrparam *param = arg;
3642
3643 param->ifbrp_cexceeded = sc->sc_brtexceeded;
3644 return 0;
3645 }
3646
3647 #define BRIDGE_IOCTL_GIFSSTP do { \
3648 struct bridge_iflist *bif; \
3649 struct bstp_port *bp; \
3650 struct ifbpstpreq bpreq; \
3651 char *buf, *outbuf; \
3652 unsigned int count, buflen, len; \
3653 \
3654 count = 0; \
3655 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { \
3656 if ((bif->bif_ifflags & IFBIF_STP) != 0) \
3657 count++; \
3658 } \
3659 \
3660 buflen = sizeof (bpreq) * count; \
3661 if (bifstp->ifbpstp_len == 0) { \
3662 bifstp->ifbpstp_len = buflen; \
3663 return (0); \
3664 } \
3665 \
3666 BRIDGE_UNLOCK(sc); \
3667 outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO); \
3668 BRIDGE_LOCK(sc); \
3669 \
3670 count = 0; \
3671 buf = outbuf; \
3672 len = min(bifstp->ifbpstp_len, buflen); \
3673 bzero(&bpreq, sizeof (bpreq)); \
3674 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { \
3675 if (len < sizeof (bpreq)) \
3676 break; \
3677 \
3678 if ((bif->bif_ifflags & IFBIF_STP) == 0) \
3679 continue; \
3680 \
3681 bp = &bif->bif_stp; \
3682 bpreq.ifbp_portno = bif->bif_ifp->if_index & 0xfff; \
3683 bpreq.ifbp_fwd_trans = bp->bp_forward_transitions; \
3684 bpreq.ifbp_design_cost = bp->bp_desg_pv.pv_cost; \
3685 bpreq.ifbp_design_port = bp->bp_desg_pv.pv_port_id; \
3686 bpreq.ifbp_design_bridge = bp->bp_desg_pv.pv_dbridge_id; \
3687 bpreq.ifbp_design_root = bp->bp_desg_pv.pv_root_id; \
3688 \
3689 memcpy(buf, &bpreq, sizeof (bpreq)); \
3690 count++; \
3691 buf += sizeof (bpreq); \
3692 len -= sizeof (bpreq); \
3693 } \
3694 \
3695 BRIDGE_UNLOCK(sc); \
3696 bifstp->ifbpstp_len = sizeof (bpreq) * count; \
3697 error = copyout(outbuf, bifstp->ifbpstp_req, bifstp->ifbpstp_len); \
3698 BRIDGE_LOCK(sc); \
3699 kfree_data(outbuf, buflen); \
3700 return (error); \
3701 } while (0)
3702
3703 static int
bridge_ioctl_gifsstp32(struct bridge_softc * sc,void * arg)3704 bridge_ioctl_gifsstp32(struct bridge_softc *sc, void *arg)
3705 {
3706 struct ifbpstpconf32 *bifstp = arg;
3707 int error = 0;
3708
3709 BRIDGE_IOCTL_GIFSSTP;
3710 return error;
3711 }
3712
3713 static int
bridge_ioctl_gifsstp64(struct bridge_softc * sc,void * arg)3714 bridge_ioctl_gifsstp64(struct bridge_softc *sc, void *arg)
3715 {
3716 struct ifbpstpconf64 *bifstp = arg;
3717 int error = 0;
3718
3719 BRIDGE_IOCTL_GIFSSTP;
3720 return error;
3721 }
3722
3723 static int
bridge_ioctl_sproto(struct bridge_softc * sc,void * arg)3724 bridge_ioctl_sproto(struct bridge_softc *sc, void *arg)
3725 {
3726 #if BRIDGESTP
3727 struct ifbrparam *param = arg;
3728
3729 return bstp_set_protocol(&sc->sc_stp, param->ifbrp_proto);
3730 #else /* !BRIDGESTP */
3731 #pragma unused(sc, arg)
3732 return EOPNOTSUPP;
3733 #endif /* !BRIDGESTP */
3734 }
3735
3736 static int
bridge_ioctl_stxhc(struct bridge_softc * sc,void * arg)3737 bridge_ioctl_stxhc(struct bridge_softc *sc, void *arg)
3738 {
3739 #if BRIDGESTP
3740 struct ifbrparam *param = arg;
3741
3742 return bstp_set_holdcount(&sc->sc_stp, param->ifbrp_txhc);
3743 #else /* !BRIDGESTP */
3744 #pragma unused(sc, arg)
3745 return EOPNOTSUPP;
3746 #endif /* !BRIDGESTP */
3747 }
3748
3749
3750 static int
bridge_ioctl_ghostfilter(struct bridge_softc * sc,void * arg)3751 bridge_ioctl_ghostfilter(struct bridge_softc *sc, void *arg)
3752 {
3753 struct ifbrhostfilter *req = arg;
3754 struct bridge_iflist *bif;
3755
3756 bif = bridge_lookup_member(sc, req->ifbrhf_ifsname);
3757 if (bif == NULL) {
3758 return ENOENT;
3759 }
3760
3761 bzero(req, sizeof(struct ifbrhostfilter));
3762 if (bif->bif_flags & BIFF_HOST_FILTER) {
3763 req->ifbrhf_flags |= IFBRHF_ENABLED;
3764 bcopy(bif->bif_hf_hwsrc, req->ifbrhf_hwsrca,
3765 ETHER_ADDR_LEN);
3766 req->ifbrhf_ipsrc = bif->bif_hf_ipsrc.s_addr;
3767 }
3768 return 0;
3769 }
3770
3771 static int
bridge_ioctl_shostfilter(struct bridge_softc * sc,void * arg)3772 bridge_ioctl_shostfilter(struct bridge_softc *sc, void *arg)
3773 {
3774 struct ifbrhostfilter *req = arg;
3775 struct bridge_iflist *bif;
3776
3777 bif = bridge_lookup_member(sc, req->ifbrhf_ifsname);
3778 if (bif == NULL) {
3779 return ENOENT;
3780 }
3781
3782 if (req->ifbrhf_flags & IFBRHF_ENABLED) {
3783 bif->bif_flags |= BIFF_HOST_FILTER;
3784
3785 if (req->ifbrhf_flags & IFBRHF_HWSRC) {
3786 bcopy(req->ifbrhf_hwsrca, bif->bif_hf_hwsrc,
3787 ETHER_ADDR_LEN);
3788 if (bcmp(req->ifbrhf_hwsrca, ethernulladdr,
3789 ETHER_ADDR_LEN) != 0) {
3790 bif->bif_flags |= BIFF_HF_HWSRC;
3791 } else {
3792 bif->bif_flags &= ~BIFF_HF_HWSRC;
3793 }
3794 }
3795 if (req->ifbrhf_flags & IFBRHF_IPSRC) {
3796 bif->bif_hf_ipsrc.s_addr = req->ifbrhf_ipsrc;
3797 if (bif->bif_hf_ipsrc.s_addr != INADDR_ANY) {
3798 bif->bif_flags |= BIFF_HF_IPSRC;
3799 } else {
3800 bif->bif_flags &= ~BIFF_HF_IPSRC;
3801 }
3802 }
3803 } else {
3804 bif->bif_flags &= ~(BIFF_HOST_FILTER | BIFF_HF_HWSRC |
3805 BIFF_HF_IPSRC);
3806 bzero(bif->bif_hf_hwsrc, ETHER_ADDR_LEN);
3807 bif->bif_hf_ipsrc.s_addr = INADDR_ANY;
3808 }
3809
3810 return 0;
3811 }
3812
3813 static char *
bridge_mac_nat_entry_out(struct mac_nat_entry_list * list,unsigned int * count_p,char * buf,unsigned int * len_p)3814 bridge_mac_nat_entry_out(struct mac_nat_entry_list * list,
3815 unsigned int * count_p, char *buf, unsigned int *len_p)
3816 {
3817 unsigned int count = *count_p;
3818 struct ifbrmne ifbmne;
3819 unsigned int len = *len_p;
3820 struct mac_nat_entry *mne;
3821 unsigned long now;
3822
3823 bzero(&ifbmne, sizeof(ifbmne));
3824 LIST_FOREACH(mne, list, mne_list) {
3825 if (len < sizeof(ifbmne)) {
3826 break;
3827 }
3828 snprintf(ifbmne.ifbmne_ifname, sizeof(ifbmne.ifbmne_ifname),
3829 "%s", mne->mne_bif->bif_ifp->if_xname);
3830 memcpy(ifbmne.ifbmne_mac, mne->mne_mac,
3831 sizeof(ifbmne.ifbmne_mac));
3832 now = (unsigned long) net_uptime();
3833 if (now < mne->mne_expire) {
3834 ifbmne.ifbmne_expire = mne->mne_expire - now;
3835 } else {
3836 ifbmne.ifbmne_expire = 0;
3837 }
3838 if ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) {
3839 ifbmne.ifbmne_af = AF_INET6;
3840 ifbmne.ifbmne_ip6_addr = mne->mne_ip6;
3841 } else {
3842 ifbmne.ifbmne_af = AF_INET;
3843 ifbmne.ifbmne_ip_addr = mne->mne_ip;
3844 }
3845 memcpy(buf, &ifbmne, sizeof(ifbmne));
3846 count++;
3847 buf += sizeof(ifbmne);
3848 len -= sizeof(ifbmne);
3849 }
3850 *count_p = count;
3851 *len_p = len;
3852 return buf;
3853 }
3854
3855 /*
3856 * bridge_ioctl_gmnelist()
3857 * Perform the get mac_nat_entry list ioctl.
3858 *
3859 * Note:
3860 * The struct ifbrmnelist32 and struct ifbrmnelist64 have the same
3861 * field size/layout except for the last field ifbml_buf, the user-supplied
3862 * buffer pointer. That is passed in separately via the 'user_addr'
3863 * parameter from the respective 32-bit or 64-bit ioctl routine.
3864 */
3865 static int
bridge_ioctl_gmnelist(struct bridge_softc * sc,struct ifbrmnelist32 * mnl,user_addr_t user_addr)3866 bridge_ioctl_gmnelist(struct bridge_softc *sc, struct ifbrmnelist32 *mnl,
3867 user_addr_t user_addr)
3868 {
3869 unsigned int count;
3870 char *buf;
3871 int error = 0;
3872 char *outbuf = NULL;
3873 struct mac_nat_entry *mne;
3874 unsigned int buflen;
3875 unsigned int len;
3876
3877 mnl->ifbml_elsize = sizeof(struct ifbrmne);
3878 count = 0;
3879 LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
3880 count++;
3881 }
3882 LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
3883 count++;
3884 }
3885 buflen = sizeof(struct ifbrmne) * count;
3886 if (buflen == 0 || mnl->ifbml_len == 0) {
3887 mnl->ifbml_len = buflen;
3888 return error;
3889 }
3890 BRIDGE_UNLOCK(sc);
3891 outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO);
3892 BRIDGE_LOCK(sc);
3893 count = 0;
3894 buf = outbuf;
3895 len = min(mnl->ifbml_len, buflen);
3896 buf = bridge_mac_nat_entry_out(&sc->sc_mne_list, &count, buf, &len);
3897 buf = bridge_mac_nat_entry_out(&sc->sc_mne_list_v6, &count, buf, &len);
3898 mnl->ifbml_len = count * sizeof(struct ifbrmne);
3899 BRIDGE_UNLOCK(sc);
3900 error = copyout(outbuf, user_addr, mnl->ifbml_len);
3901 kfree_data(outbuf, buflen);
3902 BRIDGE_LOCK(sc);
3903 return error;
3904 }
3905
3906 static int
bridge_ioctl_gmnelist64(struct bridge_softc * sc,void * arg)3907 bridge_ioctl_gmnelist64(struct bridge_softc *sc, void *arg)
3908 {
3909 struct ifbrmnelist64 *mnl = arg;
3910
3911 return bridge_ioctl_gmnelist(sc, arg, mnl->ifbml_buf);
3912 }
3913
3914 static int
bridge_ioctl_gmnelist32(struct bridge_softc * sc,void * arg)3915 bridge_ioctl_gmnelist32(struct bridge_softc *sc, void *arg)
3916 {
3917 struct ifbrmnelist32 *mnl = arg;
3918
3919 return bridge_ioctl_gmnelist(sc, arg,
3920 CAST_USER_ADDR_T(mnl->ifbml_buf));
3921 }
3922
3923 /*
3924 * bridge_ioctl_gifstats()
3925 * Return per-member stats.
3926 *
3927 * Note:
3928 * The ifbrmreq32 and ifbrmreq64 structures have the same
3929 * field size/layout except for the last field brmr_buf, the user-supplied
3930 * buffer pointer. That is passed in separately via the 'user_addr'
3931 * parameter from the respective 32-bit or 64-bit ioctl routine.
3932 */
3933 static int
bridge_ioctl_gifstats(struct bridge_softc * sc,struct ifbrmreq32 * mreq,user_addr_t user_addr)3934 bridge_ioctl_gifstats(struct bridge_softc *sc, struct ifbrmreq32 *mreq,
3935 user_addr_t user_addr)
3936 {
3937 struct bridge_iflist *bif;
3938 int error = 0;
3939 unsigned int buflen;
3940
3941 bif = bridge_lookup_member(sc, mreq->brmr_ifname);
3942 if (bif == NULL) {
3943 error = ENOENT;
3944 goto done;
3945 }
3946
3947 buflen = mreq->brmr_elsize = sizeof(struct ifbrmstats);
3948 if (buflen == 0 || mreq->brmr_len == 0) {
3949 mreq->brmr_len = buflen;
3950 goto done;
3951 }
3952 if (mreq->brmr_len != 0 && mreq->brmr_len < buflen) {
3953 error = ENOBUFS;
3954 goto done;
3955 }
3956 mreq->brmr_len = buflen;
3957 error = copyout(&bif->bif_stats, user_addr, buflen);
3958 done:
3959 return error;
3960 }
3961
3962 static int
bridge_ioctl_gifstats32(struct bridge_softc * sc,void * arg)3963 bridge_ioctl_gifstats32(struct bridge_softc *sc, void *arg)
3964 {
3965 struct ifbrmreq32 *mreq = arg;
3966
3967 return bridge_ioctl_gifstats(sc, arg, mreq->brmr_buf);
3968 }
3969
3970 static int
bridge_ioctl_gifstats64(struct bridge_softc * sc,void * arg)3971 bridge_ioctl_gifstats64(struct bridge_softc *sc, void *arg)
3972 {
3973 struct ifbrmreq64 *mreq = arg;
3974
3975 return bridge_ioctl_gifstats(sc, arg, mreq->brmr_buf);
3976 }
3977
3978 /*
3979 * bridge_proto_attach_changed
3980 *
3981 * Called when protocol attachment on the interface changes.
3982 */
3983 static void
bridge_proto_attach_changed(struct ifnet * ifp)3984 bridge_proto_attach_changed(struct ifnet *ifp)
3985 {
3986 boolean_t changed = FALSE;
3987 struct bridge_iflist *bif;
3988 boolean_t input_broadcast;
3989 struct bridge_softc *sc = ifp->if_bridge;
3990
3991 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
3992 if (sc == NULL) {
3993 return;
3994 }
3995 input_broadcast = interface_needs_input_broadcast(ifp);
3996 BRIDGE_LOCK(sc);
3997 bif = bridge_lookup_member_if(sc, ifp);
3998 if (bif != NULL) {
3999 changed = bif_set_input_broadcast(bif, input_broadcast);
4000 }
4001 BRIDGE_UNLOCK(sc);
4002 if (changed) {
4003 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
4004 "%s input broadcast %s", ifp->if_xname,
4005 input_broadcast ? "ENABLED" : "DISABLED");
4006 }
4007 return;
4008 }
4009
4010 /*
4011 * interface_media_active:
4012 *
4013 * Tells if an interface media is active.
4014 */
4015 static int
interface_media_active(struct ifnet * ifp)4016 interface_media_active(struct ifnet *ifp)
4017 {
4018 struct ifmediareq ifmr;
4019 int status = 0;
4020
4021 bzero(&ifmr, sizeof(ifmr));
4022 if (ifnet_ioctl(ifp, 0, SIOCGIFMEDIA, &ifmr) == 0) {
4023 if ((ifmr.ifm_status & IFM_AVALID) && ifmr.ifm_count > 0) {
4024 status = ifmr.ifm_status & IFM_ACTIVE ? 1 : 0;
4025 }
4026 }
4027
4028 return status;
4029 }
4030
4031 /*
4032 * bridge_updatelinkstatus:
4033 *
4034 * Update the media active status of the bridge based on the
4035 * media active status of its member.
4036 * If changed, return the corresponding onf/off link event.
4037 */
4038 static u_int32_t
bridge_updatelinkstatus(struct bridge_softc * sc)4039 bridge_updatelinkstatus(struct bridge_softc *sc)
4040 {
4041 struct bridge_iflist *bif;
4042 int active_member = 0;
4043 u_int32_t event_code = 0;
4044
4045 BRIDGE_LOCK_ASSERT_HELD(sc);
4046
4047 /*
4048 * Find out if we have an active interface
4049 */
4050 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
4051 if (bif->bif_flags & BIFF_MEDIA_ACTIVE) {
4052 active_member = 1;
4053 break;
4054 }
4055 }
4056
4057 if (active_member && !(sc->sc_flags & SCF_MEDIA_ACTIVE)) {
4058 sc->sc_flags |= SCF_MEDIA_ACTIVE;
4059 event_code = KEV_DL_LINK_ON;
4060 } else if (!active_member && (sc->sc_flags & SCF_MEDIA_ACTIVE)) {
4061 sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
4062 event_code = KEV_DL_LINK_OFF;
4063 }
4064
4065 return event_code;
4066 }
4067
4068 /*
4069 * bridge_iflinkevent:
4070 */
4071 static void
bridge_iflinkevent(struct ifnet * ifp)4072 bridge_iflinkevent(struct ifnet *ifp)
4073 {
4074 struct bridge_softc *sc = ifp->if_bridge;
4075 struct bridge_iflist *bif;
4076 u_int32_t event_code = 0;
4077 int media_active;
4078
4079 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
4080
4081 /* Check if the interface is a bridge member */
4082 if (sc == NULL) {
4083 return;
4084 }
4085
4086 media_active = interface_media_active(ifp);
4087 BRIDGE_LOCK(sc);
4088 bif = bridge_lookup_member_if(sc, ifp);
4089 if (bif != NULL) {
4090 if (media_active) {
4091 bif->bif_flags |= BIFF_MEDIA_ACTIVE;
4092 } else {
4093 bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
4094 }
4095 if (sc->sc_mac_nat_bif != NULL) {
4096 bridge_mac_nat_flush_entries(sc, bif);
4097 }
4098
4099 event_code = bridge_updatelinkstatus(sc);
4100 }
4101 BRIDGE_UNLOCK(sc);
4102
4103 if (event_code != 0) {
4104 bridge_link_event(sc->sc_ifp, event_code);
4105 }
4106 }
4107
4108 /*
4109 * bridge_delayed_callback:
4110 *
4111 * Makes a delayed call
4112 */
4113 static void
bridge_delayed_callback(void * param,__unused void * param2)4114 bridge_delayed_callback(void *param, __unused void *param2)
4115 {
4116 struct bridge_delayed_call *call = (struct bridge_delayed_call *)param;
4117 struct bridge_softc *sc = call->bdc_sc;
4118
4119 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4120 if (bridge_delayed_callback_delay > 0) {
4121 struct timespec ts;
4122
4123 ts.tv_sec = bridge_delayed_callback_delay;
4124 ts.tv_nsec = 0;
4125
4126 BRIDGE_LOG(LOG_NOTICE, 0,
4127 "sleeping for %d seconds",
4128 bridge_delayed_callback_delay);
4129
4130 msleep(&bridge_delayed_callback_delay, NULL, PZERO,
4131 __func__, &ts);
4132
4133 BRIDGE_LOG(LOG_NOTICE, 0, "awoken");
4134 }
4135 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4136
4137 BRIDGE_LOCK(sc);
4138
4139 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4140 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4141 "%s call 0x%llx flags 0x%x",
4142 sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4143 call->bdc_flags);
4144 }
4145 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4146
4147 if (call->bdc_flags & BDCF_CANCELLING) {
4148 wakeup(call);
4149 } else {
4150 if ((sc->sc_flags & SCF_DETACHING) == 0) {
4151 (*call->bdc_func)(sc);
4152 }
4153 }
4154 call->bdc_flags &= ~BDCF_OUTSTANDING;
4155 BRIDGE_UNLOCK(sc);
4156 }
4157
4158 /*
4159 * bridge_schedule_delayed_call:
4160 *
4161 * Schedule a function to be called on a separate thread
4162 * The actual call may be scheduled to run at a given time or ASAP.
4163 */
4164 static void
4165 bridge_schedule_delayed_call(struct bridge_delayed_call *call)
4166 {
4167 uint64_t deadline = 0;
4168 struct bridge_softc *sc = call->bdc_sc;
4169
4170 BRIDGE_LOCK_ASSERT_HELD(sc);
4171
4172 if ((sc->sc_flags & SCF_DETACHING) ||
4173 (call->bdc_flags & (BDCF_OUTSTANDING | BDCF_CANCELLING))) {
4174 return;
4175 }
4176
4177 if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) {
4178 nanoseconds_to_absolutetime(
4179 (uint64_t)call->bdc_ts.tv_sec * NSEC_PER_SEC +
4180 call->bdc_ts.tv_nsec, &deadline);
4181 clock_absolutetime_interval_to_deadline(deadline, &deadline);
4182 }
4183
4184 call->bdc_flags = BDCF_OUTSTANDING;
4185
4186 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4187 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4188 "%s call 0x%llx flags 0x%x",
4189 sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4190 call->bdc_flags);
4191 }
4192 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4193
4194 if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) {
4195 thread_call_func_delayed(
4196 (thread_call_func_t)bridge_delayed_callback,
4197 call, deadline);
4198 } else {
4199 if (call->bdc_thread_call == NULL) {
4200 call->bdc_thread_call = thread_call_allocate(
4201 (thread_call_func_t)bridge_delayed_callback,
4202 call);
4203 }
4204 thread_call_enter(call->bdc_thread_call);
4205 }
4206 }
4207
4208 /*
4209 * bridge_cancel_delayed_call:
4210 *
4211 * Cancel a queued or running delayed call.
4212 * If call is running, does not return until the call is done to
4213 * prevent race condition with the brigde interface getting destroyed
4214 */
4215 static void
4216 bridge_cancel_delayed_call(struct bridge_delayed_call *call)
4217 {
4218 boolean_t result;
4219 struct bridge_softc *sc = call->bdc_sc;
4220
4221 /*
4222 * The call was never scheduled
4223 */
4224 if (sc == NULL) {
4225 return;
4226 }
4227
4228 BRIDGE_LOCK_ASSERT_HELD(sc);
4229
4230 call->bdc_flags |= BDCF_CANCELLING;
4231
4232 while (call->bdc_flags & BDCF_OUTSTANDING) {
4233 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4234 "%s call 0x%llx flags 0x%x",
4235 sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4236 call->bdc_flags);
4237 result = thread_call_func_cancel(
4238 (thread_call_func_t)bridge_delayed_callback, call, FALSE);
4239
4240 if (result) {
4241 /*
4242 * We managed to dequeue the delayed call
4243 */
4244 call->bdc_flags &= ~BDCF_OUTSTANDING;
4245 } else {
4246 /*
4247 * Wait for delayed call do be done running
4248 */
4249 msleep(call, &sc->sc_mtx, PZERO, __func__, NULL);
4250 }
4251 }
4252 call->bdc_flags &= ~BDCF_CANCELLING;
4253 }
4254
4255 /*
4256 * bridge_cleanup_delayed_call:
4257 *
4258 * Dispose resource allocated for a delayed call
4259 * Assume the delayed call is not queued or running .
4260 */
4261 static void
4262 bridge_cleanup_delayed_call(struct bridge_delayed_call *call)
4263 {
4264 boolean_t result;
4265 struct bridge_softc *sc = call->bdc_sc;
4266
4267 /*
4268 * The call was never scheduled
4269 */
4270 if (sc == NULL) {
4271 return;
4272 }
4273
4274 BRIDGE_LOCK_ASSERT_HELD(sc);
4275
4276 VERIFY((call->bdc_flags & BDCF_OUTSTANDING) == 0);
4277 VERIFY((call->bdc_flags & BDCF_CANCELLING) == 0);
4278
4279 if (call->bdc_thread_call != NULL) {
4280 result = thread_call_free(call->bdc_thread_call);
4281 if (result == FALSE) {
4282 panic("%s thread_call_free() failed for call %p",
4283 __func__, call);
4284 }
4285 call->bdc_thread_call = NULL;
4286 }
4287 }
4288
4289 /*
4290 * bridge_init:
4291 *
4292 * Initialize a bridge interface.
4293 */
4294 static int
4295 bridge_init(struct ifnet *ifp)
4296 {
4297 struct bridge_softc *sc = (struct bridge_softc *)ifp->if_softc;
4298 errno_t error;
4299
4300 BRIDGE_LOCK_ASSERT_HELD(sc);
4301
4302 if ((ifnet_flags(ifp) & IFF_RUNNING)) {
4303 return 0;
4304 }
4305
4306 error = ifnet_set_flags(ifp, IFF_RUNNING, IFF_RUNNING);
4307
4308 /*
4309 * Calling bridge_aging_timer() is OK as there are no entries to
4310 * age so we're just going to arm the timer
4311 */
4312 bridge_aging_timer(sc);
4313 #if BRIDGESTP
4314 if (error == 0) {
4315 bstp_init(&sc->sc_stp); /* Initialize Spanning Tree */
4316 }
4317 #endif /* BRIDGESTP */
4318 return error;
4319 }
4320
4321 /*
4322 * bridge_ifstop:
4323 *
4324 * Stop the bridge interface.
4325 */
4326 static void
4327 bridge_ifstop(struct ifnet *ifp, int disable)
4328 {
4329 #pragma unused(disable)
4330 struct bridge_softc *sc = ifp->if_softc;
4331
4332 BRIDGE_LOCK_ASSERT_HELD(sc);
4333
4334 if ((ifnet_flags(ifp) & IFF_RUNNING) == 0) {
4335 return;
4336 }
4337
4338 bridge_cancel_delayed_call(&sc->sc_aging_timer);
4339
4340 #if BRIDGESTP
4341 bstp_stop(&sc->sc_stp);
4342 #endif /* BRIDGESTP */
4343
4344 bridge_rtflush(sc, IFBF_FLUSHDYN);
4345 (void) ifnet_set_flags(ifp, 0, IFF_RUNNING);
4346 }
4347
4348 /*
4349 * bridge_compute_cksum:
4350 *
4351 * If the packet has checksum flags, compare the hardware checksum
4352 * capabilities of the source and destination interfaces. If they
4353 * are the same, there's nothing to do. If they are different,
4354 * finalize the checksum so that it can be sent on the destination
4355 * interface.
4356 */
4357 static void
4358 bridge_compute_cksum(struct ifnet *src_if, struct ifnet *dst_if, struct mbuf *m)
4359 {
4360 uint32_t csum_flags;
4361 uint16_t dst_hw_csum;
4362 uint32_t did_sw = 0;
4363 struct ether_header *eh;
4364 uint16_t src_hw_csum;
4365
4366 if (src_if == dst_if) {
4367 return;
4368 }
4369 csum_flags = m->m_pkthdr.csum_flags & IF_HWASSIST_CSUM_MASK;
4370 if (csum_flags == 0) {
4371 /* no checksum offload */
4372 return;
4373 }
4374
4375 /*
4376 * if destination/source differ in checksum offload
4377 * capabilities, finalize/compute the checksum
4378 */
4379 dst_hw_csum = IF_HWASSIST_CSUM_FLAGS(dst_if->if_hwassist);
4380 src_hw_csum = IF_HWASSIST_CSUM_FLAGS(src_if->if_hwassist);
4381 if (dst_hw_csum == src_hw_csum) {
4382 return;
4383 }
4384 eh = mtod(m, struct ether_header *);
4385 switch (ntohs(eh->ether_type)) {
4386 case ETHERTYPE_IP:
4387 did_sw = in_finalize_cksum(m, sizeof(*eh), csum_flags);
4388 break;
4389 case ETHERTYPE_IPV6:
4390 did_sw = in6_finalize_cksum(m, sizeof(*eh), -1, -1, csum_flags);
4391 break;
4392 }
4393 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4394 "[%s -> %s] before 0x%x did 0x%x after 0x%x",
4395 src_if->if_xname, dst_if->if_xname, csum_flags, did_sw,
4396 m->m_pkthdr.csum_flags);
4397 }
4398
4399 static errno_t
4400 bridge_transmit(struct ifnet * ifp, struct mbuf *m)
4401 {
4402 struct flowadv adv = { .code = FADV_SUCCESS };
4403 errno_t error;
4404
4405 error = dlil_output(ifp, 0, m, NULL, NULL, 1, &adv);
4406 if (error == 0) {
4407 if (adv.code == FADV_FLOW_CONTROLLED) {
4408 error = EQFULL;
4409 } else if (adv.code == FADV_SUSPENDED) {
4410 error = EQSUSPENDED;
4411 }
4412 }
4413 return error;
4414 }
4415
4416 static int
4417 get_last_ip6_hdr(struct mbuf *m, int off, int proto, int * nxtp,
4418 bool *is_fragmented)
4419 {
4420 int newoff;
4421
4422 *is_fragmented = false;
4423 while (1) {
4424 newoff = ip6_nexthdr(m, off, proto, nxtp);
4425 if (newoff < 0) {
4426 return off;
4427 } else if (newoff < off) {
4428 return -1; /* invalid */
4429 } else if (newoff == off) {
4430 return newoff;
4431 }
4432 off = newoff;
4433 proto = *nxtp;
4434 if (proto == IPPROTO_FRAGMENT) {
4435 *is_fragmented = true;
4436 }
4437 }
4438 }
4439
4440 static int
4441 bridge_get_ip_proto(struct mbuf * * mp, u_int mac_hlen, bool is_ipv4,
4442 ip_packet_info_t info_p, struct bripstats * stats_p)
4443 {
4444 int error = 0;
4445 u_int hlen;
4446 u_int ip_hlen;
4447 u_int ip_pay_len;
4448 struct mbuf * m0 = *mp;
4449 int off;
4450 int opt_len = 0;
4451 int proto = 0;
4452
4453 bzero(info_p, sizeof(*info_p));
4454 if (is_ipv4) {
4455 struct ip * ip;
4456 u_int ip_total_len;
4457
4458 /* IPv4 */
4459 hlen = mac_hlen + sizeof(struct ip);
4460 if (m0->m_pkthdr.len < hlen) {
4461 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4462 "Short IP packet %d < %d",
4463 m0->m_pkthdr.len, hlen);
4464 error = _EBADIP;
4465 stats_p->bips_bad_ip++;
4466 goto done;
4467 }
4468 if (m0->m_len < hlen) {
4469 *mp = m0 = m_pullup(m0, hlen);
4470 if (m0 == NULL) {
4471 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4472 "m_pullup failed hlen %d",
4473 hlen);
4474 error = ENOBUFS;
4475 stats_p->bips_bad_ip++;
4476 goto done;
4477 }
4478 }
4479 ip = (struct ip *)(void *)(mtod(m0, uint8_t *) + mac_hlen);
4480 if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
4481 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4482 "bad IP version");
4483 error = _EBADIP;
4484 stats_p->bips_bad_ip++;
4485 goto done;
4486 }
4487 ip_hlen = IP_VHL_HL(ip->ip_vhl) << 2;
4488 if (ip_hlen < sizeof(struct ip)) {
4489 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4490 "bad IP header length %d < %d",
4491 ip_hlen,
4492 (int)sizeof(struct ip));
4493 error = _EBADIP;
4494 stats_p->bips_bad_ip++;
4495 goto done;
4496 }
4497 hlen = mac_hlen + ip_hlen;
4498 if (m0->m_len < hlen) {
4499 *mp = m0 = m_pullup(m0, hlen);
4500 if (m0 == NULL) {
4501 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4502 "m_pullup failed hlen %d",
4503 hlen);
4504 error = ENOBUFS;
4505 stats_p->bips_bad_ip++;
4506 goto done;
4507 }
4508 }
4509
4510 ip_total_len = ntohs(ip->ip_len);
4511 if (ip_total_len < ip_hlen) {
4512 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4513 "IP total len %d < header len %d",
4514 ip_total_len, ip_hlen);
4515 error = _EBADIP;
4516 stats_p->bips_bad_ip++;
4517 goto done;
4518 }
4519 if (ip_total_len > (m0->m_pkthdr.len - mac_hlen)) {
4520 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4521 "invalid IP payload length %d > %d",
4522 ip_total_len,
4523 (m0->m_pkthdr.len - mac_hlen));
4524 error = _EBADIP;
4525 stats_p->bips_bad_ip++;
4526 goto done;
4527 }
4528 ip_pay_len = ip_total_len - ip_hlen;
4529 info_p->ip_proto = ip->ip_p;
4530 info_p->ip_hdr.ip = ip;
4531 #define FRAG_BITS (IP_OFFMASK | IP_MF)
4532 if ((ntohs(ip->ip_off) & FRAG_BITS) != 0) {
4533 info_p->ip_is_fragmented = true;
4534 }
4535 stats_p->bips_ip++;
4536 } else {
4537 struct ip6_hdr *ip6;
4538
4539 /* IPv6 */
4540 hlen = mac_hlen + sizeof(struct ip6_hdr);
4541 if (m0->m_pkthdr.len < hlen) {
4542 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4543 "short IPv6 packet %d < %d",
4544 m0->m_pkthdr.len, hlen);
4545 error = _EBADIPV6;
4546 stats_p->bips_bad_ip6++;
4547 goto done;
4548 }
4549 if (m0->m_len < hlen) {
4550 *mp = m0 = m_pullup(m0, hlen);
4551 if (m0 == NULL) {
4552 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4553 "m_pullup failed hlen %d",
4554 hlen);
4555 error = ENOBUFS;
4556 stats_p->bips_bad_ip6++;
4557 goto done;
4558 }
4559 }
4560 ip6 = (struct ip6_hdr *)(mtod(m0, uint8_t *) + mac_hlen);
4561 if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
4562 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4563 "bad IPv6 version");
4564 error = _EBADIPV6;
4565 stats_p->bips_bad_ip6++;
4566 goto done;
4567 }
4568 off = get_last_ip6_hdr(m0, mac_hlen, IPPROTO_IPV6, &proto,
4569 &info_p->ip_is_fragmented);
4570 if (off < 0 || m0->m_pkthdr.len < off) {
4571 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4572 "ip6_lasthdr() returned %d",
4573 off);
4574 error = _EBADIPV6;
4575 stats_p->bips_bad_ip6++;
4576 goto done;
4577 }
4578 ip_hlen = sizeof(*ip6);
4579 opt_len = off - mac_hlen - ip_hlen;
4580 if (opt_len < 0) {
4581 error = _EBADIPV6;
4582 stats_p->bips_bad_ip6++;
4583 goto done;
4584 }
4585 info_p->ip_proto = proto;
4586 info_p->ip_hdr.ip6 = ip6;
4587 ip_pay_len = ntohs(ip6->ip6_plen);
4588 if (ip_pay_len > (m0->m_pkthdr.len - mac_hlen - ip_hlen)) {
4589 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4590 "invalid IPv6 payload length %d > %d",
4591 ip_pay_len,
4592 (m0->m_pkthdr.len - mac_hlen - ip_hlen));
4593 error = _EBADIPV6;
4594 stats_p->bips_bad_ip6++;
4595 goto done;
4596 }
4597 stats_p->bips_ip6++;
4598 }
4599 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4600 "IPv%c proto %d ip %u pay %u opt %u pkt %u%s",
4601 is_ipv4 ? '4' : '6',
4602 proto, ip_hlen, ip_pay_len, opt_len,
4603 m0->m_pkthdr.len, info_p->ip_is_fragmented ? " frag" : "");
4604 info_p->ip_hlen = ip_hlen;
4605 info_p->ip_pay_len = ip_pay_len;
4606 info_p->ip_opt_len = opt_len;
4607 info_p->ip_is_ipv4 = is_ipv4;
4608 done:
4609 return error;
4610 }
4611
4612 static int
4613 bridge_get_tcp_header(struct mbuf * * mp, u_int mac_hlen, bool is_ipv4,
4614 ip_packet_info_t info_p, struct bripstats * stats_p)
4615 {
4616 int error;
4617 u_int hlen;
4618
4619 error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, info_p, stats_p);
4620 if (error != 0) {
4621 goto done;
4622 }
4623 if (info_p->ip_proto != IPPROTO_TCP) {
4624 /* not a TCP frame, not an error, just a bad guess */
4625 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4626 "non-TCP (%d) IPv%c frame %d bytes",
4627 info_p->ip_proto, is_ipv4 ? '4' : '6',
4628 (*mp)->m_pkthdr.len);
4629 goto done;
4630 }
4631 if (info_p->ip_is_fragmented) {
4632 /* both TSO and IP fragmentation don't make sense */
4633 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4634 "fragmented TSO packet?");
4635 stats_p->bips_bad_tcp++;
4636 error = _EBADTCP;
4637 goto done;
4638 }
4639 hlen = mac_hlen + info_p->ip_hlen + sizeof(struct tcphdr) +
4640 info_p->ip_opt_len;
4641 if ((*mp)->m_len < hlen) {
4642 *mp = m_pullup(*mp, hlen);
4643 if (*mp == NULL) {
4644 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4645 "m_pullup %d failed",
4646 hlen);
4647 stats_p->bips_bad_tcp++;
4648 error = _EBADTCP;
4649 goto done;
4650 }
4651 }
4652 info_p->ip_proto_hdr = ((caddr_t)info_p->ip_hdr.ptr) +
4653 info_p->ip_hlen + info_p->ip_opt_len;
4654 done:
4655 return error;
4656 }
4657
4658 static inline void
4659 proto_csum_stats_increment(uint8_t proto, struct brcsumstats * stats_p)
4660 {
4661 if (proto == IPPROTO_TCP) {
4662 stats_p->brcs_tcp_checksum++;
4663 } else {
4664 stats_p->brcs_udp_checksum++;
4665 }
4666 return;
4667 }
4668
4669 static bool
4670 ether_header_type_is_ip(struct ether_header * eh, bool *is_ipv4)
4671 {
4672 uint16_t ether_type;
4673 bool is_ip = TRUE;
4674
4675 ether_type = ntohs(eh->ether_type);
4676 switch (ether_type) {
4677 case ETHERTYPE_IP:
4678 *is_ipv4 = TRUE;
4679 break;
4680 case ETHERTYPE_IPV6:
4681 *is_ipv4 = FALSE;
4682 break;
4683 default:
4684 is_ip = FALSE;
4685 break;
4686 }
4687 return is_ip;
4688 }
4689
4690 static errno_t
4691 bridge_verify_checksum(struct mbuf * * mp, struct ifbrmstats *stats_p)
4692 {
4693 struct brcsumstats *csum_stats_p;
4694 struct ether_header *eh;
4695 errno_t error = 0;
4696 ip_packet_info info;
4697 bool is_ipv4;
4698 struct mbuf * m;
4699 u_int mac_hlen = sizeof(struct ether_header);
4700 uint16_t sum;
4701 bool valid;
4702
4703 eh = mtod(*mp, struct ether_header *);
4704 if (!ether_header_type_is_ip(eh, &is_ipv4)) {
4705 goto done;
4706 }
4707 error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, &info,
4708 &stats_p->brms_out_ip);
4709 m = *mp;
4710 if (error != 0) {
4711 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4712 "bridge_get_ip_proto failed %d",
4713 error);
4714 goto done;
4715 }
4716 if (is_ipv4) {
4717 if ((m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) != 0) {
4718 /* hardware offloaded IP header checksum */
4719 valid = (m->m_pkthdr.csum_flags & CSUM_IP_VALID) != 0;
4720 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4721 "IP checksum HW %svalid",
4722 valid ? "" : "in");
4723 if (!valid) {
4724 stats_p->brms_out_cksum_bad_hw.brcs_ip_checksum++;
4725 error = _EBADIPCHECKSUM;
4726 goto done;
4727 }
4728 stats_p->brms_out_cksum_good_hw.brcs_ip_checksum++;
4729 } else {
4730 /* verify */
4731 sum = inet_cksum(m, 0, mac_hlen, info.ip_hlen);
4732 valid = (sum == 0);
4733 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4734 "IP checksum SW %svalid",
4735 valid ? "" : "in");
4736 if (!valid) {
4737 stats_p->brms_out_cksum_bad.brcs_ip_checksum++;
4738 error = _EBADIPCHECKSUM;
4739 goto done;
4740 }
4741 stats_p->brms_out_cksum_good.brcs_ip_checksum++;
4742 }
4743 }
4744 if (info.ip_is_fragmented) {
4745 /* can't verify checksum on fragmented packets */
4746 goto done;
4747 }
4748 switch (info.ip_proto) {
4749 case IPPROTO_TCP:
4750 stats_p->brms_out_ip.bips_tcp++;
4751 break;
4752 case IPPROTO_UDP:
4753 stats_p->brms_out_ip.bips_udp++;
4754 break;
4755 default:
4756 goto done;
4757 }
4758 /* check for hardware offloaded UDP/TCP checksum */
4759 #define HW_CSUM (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)
4760 if ((m->m_pkthdr.csum_flags & HW_CSUM) == HW_CSUM) {
4761 /* checksum verified by hardware */
4762 valid = (m->m_pkthdr.csum_rx_val == 0xffff);
4763 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4764 "IPv%c %s checksum HW 0x%x %svalid",
4765 is_ipv4 ? '4' : '6',
4766 (info.ip_proto == IPPROTO_TCP)
4767 ? "TCP" : "UDP",
4768 m->m_pkthdr.csum_data,
4769 valid ? "" : "in" );
4770 if (!valid) {
4771 /* bad checksum */
4772 csum_stats_p = &stats_p->brms_out_cksum_bad_hw;
4773 error = (info.ip_proto == IPPROTO_TCP) ? _EBADTCPCHECKSUM
4774 : _EBADTCPCHECKSUM;
4775 } else {
4776 /* good checksum */
4777 csum_stats_p = &stats_p->brms_out_cksum_good_hw;
4778 }
4779 proto_csum_stats_increment(info.ip_proto, csum_stats_p);
4780 goto done;
4781 }
4782 m->m_data += mac_hlen;
4783 m->m_len -= mac_hlen;
4784 m->m_pkthdr.len -= mac_hlen;
4785 if (is_ipv4) {
4786 sum = inet_cksum(m, info.ip_proto,
4787 info.ip_hlen,
4788 info.ip_pay_len);
4789 } else {
4790 sum = inet6_cksum(m, info.ip_proto,
4791 info.ip_hlen + info.ip_opt_len,
4792 info.ip_pay_len - info.ip_opt_len);
4793 }
4794 valid = (sum == 0);
4795 if (valid) {
4796 csum_stats_p = &stats_p->brms_out_cksum_good;
4797 } else {
4798 csum_stats_p = &stats_p->brms_out_cksum_bad;
4799 error = (info.ip_proto == IPPROTO_TCP)
4800 ? _EBADTCPCHECKSUM : _EBADUDPCHECKSUM;
4801 }
4802 proto_csum_stats_increment(info.ip_proto, csum_stats_p);
4803 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4804 "IPv%c %s checksum SW %svalid (0x%x) hlen %d paylen %d",
4805 is_ipv4 ? '4' : '6',
4806 (info.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
4807 valid ? "" : "in",
4808 sum, info.ip_hlen, info.ip_pay_len);
4809 m->m_data -= mac_hlen;
4810 m->m_len += mac_hlen;
4811 m->m_pkthdr.len += mac_hlen;
4812 done:
4813 return error;
4814 }
4815
4816 static errno_t
4817 bridge_offload_checksum(struct mbuf * * mp, ip_packet_info * info_p,
4818 struct ifbrmstats * stats_p)
4819 {
4820 uint16_t * csum_p;
4821 errno_t error = 0;
4822 u_int hlen;
4823 struct mbuf * m0 = *mp;
4824 u_int mac_hlen = sizeof(struct ether_header);
4825 u_int pkt_hdr_len;
4826 struct tcphdr * tcp;
4827 u_int tcp_hlen;
4828 struct udphdr * udp;
4829
4830 if (info_p->ip_is_ipv4) {
4831 /* compute IP header checksum */
4832 info_p->ip_hdr.ip->ip_sum = 0;
4833 info_p->ip_hdr.ip->ip_sum = inet_cksum(m0, 0, mac_hlen,
4834 info_p->ip_hlen);
4835 stats_p->brms_in_computed_cksum.brcs_ip_checksum++;
4836 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4837 "IPv4 checksum 0x%x",
4838 ntohs(info_p->ip_hdr.ip->ip_sum));
4839 }
4840 if (info_p->ip_is_fragmented) {
4841 /* can't compute checksum on fragmented packets */
4842 goto done;
4843 }
4844 pkt_hdr_len = m0->m_pkthdr.len;
4845 switch (info_p->ip_proto) {
4846 case IPPROTO_TCP:
4847 hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len
4848 + sizeof(struct tcphdr);
4849 if (m0->m_len < hlen) {
4850 *mp = m0 = m_pullup(m0, hlen);
4851 if (m0 == NULL) {
4852 stats_p->brms_in_ip.bips_bad_tcp++;
4853 error = _EBADTCP;
4854 goto done;
4855 }
4856 }
4857 tcp = (struct tcphdr *)(void *)
4858 ((caddr_t)info_p->ip_hdr.ptr + info_p->ip_hlen
4859 + info_p->ip_opt_len);
4860 tcp_hlen = tcp->th_off << 2;
4861 hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len + tcp_hlen;
4862 if (hlen > pkt_hdr_len) {
4863 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4864 "bad tcp header length %u",
4865 tcp_hlen);
4866 stats_p->brms_in_ip.bips_bad_tcp++;
4867 error = _EBADTCP;
4868 goto done;
4869 }
4870 csum_p = &tcp->th_sum;
4871 stats_p->brms_in_ip.bips_tcp++;
4872 break;
4873 case IPPROTO_UDP:
4874 hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len + sizeof(*udp);
4875 if (m0->m_len < hlen) {
4876 *mp = m0 = m_pullup(m0, hlen);
4877 if (m0 == NULL) {
4878 stats_p->brms_in_ip.bips_bad_udp++;
4879 error = ENOBUFS;
4880 goto done;
4881 }
4882 }
4883 udp = (struct udphdr *)(void *)
4884 ((caddr_t)info_p->ip_hdr.ptr + info_p->ip_hlen
4885 + info_p->ip_opt_len);
4886 csum_p = &udp->uh_sum;
4887 stats_p->brms_in_ip.bips_udp++;
4888 break;
4889 default:
4890 /* not TCP or UDP */
4891 goto done;
4892 }
4893 *csum_p = 0;
4894 m0->m_data += mac_hlen;
4895 m0->m_len -= mac_hlen;
4896 m0->m_pkthdr.len -= mac_hlen;
4897 if (info_p->ip_is_ipv4) {
4898 *csum_p = inet_cksum(m0, info_p->ip_proto, info_p->ip_hlen,
4899 info_p->ip_pay_len);
4900 } else {
4901 *csum_p = inet6_cksum(m0, info_p->ip_proto,
4902 info_p->ip_hlen + info_p->ip_opt_len,
4903 info_p->ip_pay_len - info_p->ip_opt_len);
4904 }
4905 if (info_p->ip_proto == IPPROTO_UDP && *csum_p == 0) {
4906 /* RFC 1122 4.1.3.4 */
4907 *csum_p = 0xffff;
4908 }
4909 m0->m_data -= mac_hlen;
4910 m0->m_len += mac_hlen;
4911 m0->m_pkthdr.len += mac_hlen;
4912 proto_csum_stats_increment(info_p->ip_proto,
4913 &stats_p->brms_in_computed_cksum);
4914
4915 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4916 "IPv%c %s set checksum 0x%x",
4917 info_p->ip_is_ipv4 ? '4' : '6',
4918 (info_p->ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
4919 ntohs(*csum_p));
4920 done:
4921 return error;
4922 }
4923
4924 static errno_t
4925 bridge_send(struct ifnet *src_ifp,
4926 struct ifnet *dst_ifp, struct mbuf *m, ChecksumOperation cksum_op)
4927 {
4928 switch (cksum_op) {
4929 case CHECKSUM_OPERATION_CLEAR_OFFLOAD:
4930 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
4931 break;
4932 case CHECKSUM_OPERATION_FINALIZE:
4933 /* the checksum might not be correct, finalize now */
4934 bridge_finalize_cksum(dst_ifp, m);
4935 break;
4936 case CHECKSUM_OPERATION_COMPUTE:
4937 bridge_compute_cksum(src_ifp, dst_ifp, m);
4938 break;
4939 default:
4940 break;
4941 }
4942 #if HAS_IF_CAP
4943 /*
4944 * If underlying interface can not do VLAN tag insertion itself
4945 * then attach a packet tag that holds it.
4946 */
4947 if ((m->m_flags & M_VLANTAG) &&
4948 (dst_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) {
4949 m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
4950 if (m == NULL) {
4951 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4952 "%s: unable to prepend VLAN header",
4953 dst_ifp->if_xname);
4954 (void) ifnet_stat_increment_out(dst_ifp,
4955 0, 0, 1);
4956 return 0;
4957 }
4958 m->m_flags &= ~M_VLANTAG;
4959 }
4960 #endif /* HAS_IF_CAP */
4961 return bridge_transmit(dst_ifp, m);
4962 }
4963
4964 static errno_t
4965 bridge_send_tso(struct ifnet *dst_ifp, struct mbuf *m, bool is_ipv4)
4966 {
4967 errno_t error;
4968 u_int mac_hlen;
4969
4970 mac_hlen = sizeof(struct ether_header);
4971
4972 #if HAS_IF_CAP
4973 /*
4974 * If underlying interface can not do VLAN tag insertion itself
4975 * then attach a packet tag that holds it.
4976 */
4977 if ((m->m_flags & M_VLANTAG) &&
4978 (dst_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) {
4979 m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
4980 if (m == NULL) {
4981 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4982 "%s: unable to prepend VLAN header",
4983 dst_ifp->if_xname);
4984 (void) ifnet_stat_increment_out(dst_ifp,
4985 0, 0, 1);
4986 error = ENOBUFS;
4987 goto done;
4988 }
4989 m->m_flags &= ~M_VLANTAG;
4990 mac_hlen += ETHER_VLAN_ENCAP_LEN;
4991 }
4992 #endif /* HAS_IF_CAP */
4993 error = gso_tcp(dst_ifp, &m, mac_hlen, is_ipv4, TRUE);
4994 return error;
4995 }
4996
4997 /*
4998 * tso_hwassist:
4999 * - determine whether the destination interface supports TSO offload
5000 * - if the packet is already marked for offload and the hardware supports
5001 * it, just allow the packet to continue on
5002 * - if not, parse the packet headers to verify that this is a large TCP
5003 * packet requiring segmentation; if the hardware doesn't support it
5004 * set need_sw_tso; otherwise, mark the packet for TSO offload
5005 */
5006 static int
5007 tso_hwassist(struct mbuf **mp, bool is_ipv4, struct ifnet * ifp, u_int mac_hlen,
5008 bool * need_sw_tso, bool * is_large_tcp)
5009 {
5010 int error = 0;
5011 u_int32_t if_csum;
5012 u_int32_t if_tso;
5013 u_int32_t mbuf_tso;
5014 bool supports_cksum = false;
5015
5016 *need_sw_tso = false;
5017 *is_large_tcp = false;
5018 if (is_ipv4) {
5019 /*
5020 * Enable both TCP and IP offload if the hardware supports it.
5021 * If the hardware doesn't support TCP offload, supports_cksum
5022 * will be false so we won't set either offload.
5023 */
5024 if_csum = ifp->if_hwassist & (CSUM_TCP | CSUM_IP);
5025 supports_cksum = (if_csum & CSUM_TCP) != 0;
5026 if_tso = IFNET_TSO_IPV4;
5027 mbuf_tso = CSUM_TSO_IPV4;
5028 } else {
5029 supports_cksum = (ifp->if_hwassist & CSUM_TCPIPV6) != 0;
5030 if_csum = CSUM_TCPIPV6;
5031 if_tso = IFNET_TSO_IPV6;
5032 mbuf_tso = CSUM_TSO_IPV6;
5033 }
5034 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5035 "%s: does%s support checksum 0x%x if_csum 0x%x",
5036 ifp->if_xname, supports_cksum ? "" : " not",
5037 ifp->if_hwassist, if_csum);
5038 if ((ifp->if_hwassist & if_tso) != 0 &&
5039 ((*mp)->m_pkthdr.csum_flags & mbuf_tso) != 0) {
5040 /* hardware TSO, mbuf already marked */
5041 } else {
5042 /* verify that this is a large TCP frame */
5043 uint32_t csum_flags;
5044 ip_packet_info info;
5045 int mss;
5046 struct bripstats stats;
5047 struct tcphdr * tcp;
5048
5049 error = bridge_get_tcp_header(mp, mac_hlen, is_ipv4,
5050 &info, &stats);
5051 if (error != 0) {
5052 /* bad packet */
5053 goto done;
5054 }
5055 if ((info.ip_hlen + info.ip_pay_len + info.ip_opt_len) <=
5056 ifp->if_mtu) {
5057 /* not actually a large packet */
5058 goto done;
5059 }
5060 if (info.ip_proto_hdr == NULL) {
5061 /* not a TCP packet */
5062 goto done;
5063 }
5064 if ((ifp->if_hwassist & if_tso) == 0) {
5065 /* hardware does not support TSO, enable sw tso */
5066 *need_sw_tso = if_bridge_segmentation != 0;
5067 goto done;
5068 }
5069 /* use hardware TSO */
5070 (*mp)->m_pkthdr.pkt_proto = IPPROTO_TCP;
5071 tcp = (struct tcphdr *)info.ip_proto_hdr;
5072 mss = ifp->if_mtu - info.ip_hlen - info.ip_opt_len
5073 - (tcp->th_off << 2) - if_bridge_tso_reduce_mss_tx;
5074 assert(mss > 0);
5075 csum_flags = mbuf_tso;
5076 if (supports_cksum) {
5077 csum_flags |= if_csum;
5078 }
5079 (*mp)->m_pkthdr.tso_segsz = mss;
5080 (*mp)->m_pkthdr.csum_flags |= csum_flags;
5081 (*mp)->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
5082 *is_large_tcp = true;
5083 }
5084 done:
5085 return error;
5086 }
5087
5088 /*
5089 * bridge_enqueue:
5090 *
5091 * Enqueue a packet on a bridge member interface.
5092 *
5093 */
5094 static errno_t
5095 bridge_enqueue(ifnet_t bridge_ifp, struct ifnet *src_ifp,
5096 struct ifnet *dst_ifp, struct mbuf *m, ChecksumOperation cksum_op)
5097 {
5098 errno_t error = 0;
5099 int len;
5100
5101 VERIFY(dst_ifp != NULL);
5102
5103 /*
5104 * We may be sending a fragment so traverse the mbuf
5105 *
5106 * NOTE: bridge_fragment() is called only when PFIL_HOOKS is enabled.
5107 */
5108 for (struct mbuf *next_m = NULL; m != NULL; m = next_m) {
5109 bool need_sw_tso = false;
5110 bool is_ipv4 = false;
5111 bool is_large_pkt;
5112 errno_t _error = 0;
5113
5114 len = m->m_pkthdr.len;
5115 m->m_flags |= M_PROTO1; /* set to avoid loops */
5116 next_m = m->m_nextpkt;
5117 m->m_nextpkt = NULL;
5118 /*
5119 * Need to segment the packet if it is a large frame
5120 * and the destination interface does not support TSO.
5121 *
5122 * Note that with trailers, it's possible for a packet to
5123 * be large but not actually require segmentation.
5124 */
5125 is_large_pkt = (len > (bridge_ifp->if_mtu + ETHER_HDR_LEN));
5126 if (is_large_pkt) {
5127 struct ether_header *eh;
5128 bool is_large_tcp = false;
5129
5130 eh = mtod(m, struct ether_header *);
5131 if (ether_header_type_is_ip(eh, &is_ipv4)) {
5132 _error = tso_hwassist(&m, is_ipv4,
5133 dst_ifp, sizeof(struct ether_header),
5134 &need_sw_tso, &is_large_tcp);
5135 if (is_large_tcp) {
5136 cksum_op = CHECKSUM_OPERATION_NONE;
5137 }
5138 } else {
5139 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5140 "large non IP packet");
5141 }
5142 }
5143 if (_error != 0) {
5144 if (m != NULL) {
5145 m_freem(m);
5146 }
5147 } else if (need_sw_tso) {
5148 _error = bridge_send_tso(dst_ifp, m, is_ipv4);
5149 } else {
5150 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5151 "%s bridge_send(%s) len %d op %d",
5152 bridge_ifp->if_xname,
5153 dst_ifp->if_xname,
5154 len, cksum_op);
5155 _error = bridge_send(src_ifp, dst_ifp, m, cksum_op);
5156 }
5157
5158 /* Preserve first error value */
5159 if (error == 0 && _error != 0) {
5160 error = _error;
5161 }
5162 if (_error == 0) {
5163 (void) ifnet_stat_increment_out(bridge_ifp, 1, len, 0);
5164 } else {
5165 (void) ifnet_stat_increment_out(bridge_ifp, 0, 0, 1);
5166 }
5167 }
5168
5169 return error;
5170 }
5171
5172 #if HAS_BRIDGE_DUMMYNET
5173 /*
5174 * bridge_dummynet:
5175 *
5176 * Receive a queued packet from dummynet and pass it on to the output
5177 * interface.
5178 *
5179 * The mbuf has the Ethernet header already attached.
5180 */
5181 static void
5182 bridge_dummynet(struct mbuf *m, struct ifnet *ifp)
5183 {
5184 struct bridge_softc *sc;
5185
5186 sc = ifp->if_bridge;
5187
5188 /*
5189 * The packet didn't originate from a member interface. This should only
5190 * ever happen if a member interface is removed while packets are
5191 * queued for it.
5192 */
5193 if (sc == NULL) {
5194 m_freem(m);
5195 return;
5196 }
5197
5198 if (PFIL_HOOKED(&inet_pfil_hook) || PFIL_HOOKED_INET6) {
5199 if (bridge_pfil(&m, sc->sc_ifp, ifp, PFIL_OUT) != 0) {
5200 return;
5201 }
5202 if (m == NULL) {
5203 return;
5204 }
5205 }
5206 (void) bridge_enqueue(sc->sc_ifp, NULL, ifp, m, CHECKSUM_OPERATION_NONE);
5207 }
5208
5209 #endif /* HAS_BRIDGE_DUMMYNET */
5210
5211 /*
5212 * bridge_member_output:
5213 *
5214 * Send output from a bridge member interface. This
5215 * performs the bridging function for locally originated
5216 * packets.
5217 *
5218 * The mbuf has the Ethernet header already attached.
5219 */
5220 static errno_t
5221 bridge_member_output(struct bridge_softc *sc, ifnet_t ifp, mbuf_t *data)
5222 {
5223 ifnet_t bridge_ifp;
5224 struct ether_header *eh;
5225 struct ifnet *dst_if;
5226 uint16_t vlan;
5227 struct bridge_iflist *mac_nat_bif;
5228 ifnet_t mac_nat_ifp;
5229 mbuf_t m = *data;
5230
5231 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
5232 "ifp %s", ifp->if_xname);
5233 if (m->m_len < ETHER_HDR_LEN) {
5234 m = m_pullup(m, ETHER_HDR_LEN);
5235 if (m == NULL) {
5236 *data = NULL;
5237 return EJUSTRETURN;
5238 }
5239 }
5240
5241 eh = mtod(m, struct ether_header *);
5242 vlan = VLANTAGOF(m);
5243
5244 BRIDGE_LOCK(sc);
5245 mac_nat_bif = sc->sc_mac_nat_bif;
5246 mac_nat_ifp = (mac_nat_bif != NULL) ? mac_nat_bif->bif_ifp : NULL;
5247 if (mac_nat_ifp == ifp) {
5248 /* record the IP address used by the MAC NAT interface */
5249 (void)bridge_mac_nat_output(sc, mac_nat_bif, data, NULL);
5250 m = *data;
5251 if (m == NULL) {
5252 /* packet was deallocated */
5253 BRIDGE_UNLOCK(sc);
5254 return EJUSTRETURN;
5255 }
5256 }
5257 bridge_ifp = sc->sc_ifp;
5258
5259 /*
5260 * APPLE MODIFICATION
5261 * If the packet is an 802.1X ethertype, then only send on the
5262 * original output interface.
5263 */
5264 if (eh->ether_type == htons(ETHERTYPE_PAE)) {
5265 dst_if = ifp;
5266 goto sendunicast;
5267 }
5268
5269 /*
5270 * If bridge is down, but the original output interface is up,
5271 * go ahead and send out that interface. Otherwise, the packet
5272 * is dropped below.
5273 */
5274 if ((bridge_ifp->if_flags & IFF_RUNNING) == 0) {
5275 dst_if = ifp;
5276 goto sendunicast;
5277 }
5278
5279 /*
5280 * If the packet is a multicast, or we don't know a better way to
5281 * get there, send to all interfaces.
5282 */
5283 if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
5284 dst_if = NULL;
5285 } else {
5286 dst_if = bridge_rtlookup(sc, eh->ether_dhost, vlan);
5287 }
5288 if (dst_if == NULL) {
5289 struct bridge_iflist *bif;
5290 struct mbuf *mc;
5291 errno_t error;
5292
5293
5294 bridge_span(sc, m);
5295
5296 BRIDGE_LOCK2REF(sc, error);
5297 if (error != 0) {
5298 m_freem(m);
5299 return EJUSTRETURN;
5300 }
5301
5302 /*
5303 * Duplicate and send the packet across all member interfaces
5304 * except the originating interface.
5305 */
5306 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
5307 dst_if = bif->bif_ifp;
5308 if (dst_if == ifp) {
5309 /* skip the originating interface */
5310 continue;
5311 }
5312 /* skip interface with inactive link status */
5313 if ((bif->bif_flags & BIFF_MEDIA_ACTIVE) == 0) {
5314 continue;
5315 }
5316 #if 0
5317 if (dst_if->if_type == IFT_GIF) {
5318 continue;
5319 }
5320 #endif
5321 /* skip interface that isn't running */
5322 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5323 continue;
5324 }
5325 /*
5326 * If the interface is participating in spanning
5327 * tree, make sure the port is in a state that
5328 * allows forwarding.
5329 */
5330 if ((bif->bif_ifflags & IFBIF_STP) &&
5331 bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5332 continue;
5333 }
5334 /*
5335 * If the destination is the MAC NAT interface,
5336 * skip sending the packet. The packet can't be sent
5337 * if the source MAC is incorrect.
5338 */
5339 if (dst_if == mac_nat_ifp) {
5340 continue;
5341 }
5342
5343 /* make a deep copy to send on this member interface */
5344 mc = m_dup(m, M_DONTWAIT);
5345 if (mc == NULL) {
5346 (void)ifnet_stat_increment_out(bridge_ifp,
5347 0, 0, 1);
5348 continue;
5349 }
5350 (void)bridge_enqueue(bridge_ifp, ifp, dst_if,
5351 mc, CHECKSUM_OPERATION_COMPUTE);
5352 }
5353 BRIDGE_UNREF(sc);
5354
5355 if ((ifp->if_flags & IFF_RUNNING) == 0) {
5356 m_freem(m);
5357 return EJUSTRETURN;
5358 }
5359 /* allow packet to continue on the originating interface */
5360 return 0;
5361 }
5362
5363 sendunicast:
5364 /*
5365 * XXX Spanning tree consideration here?
5366 */
5367
5368 bridge_span(sc, m);
5369 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5370 m_freem(m);
5371 BRIDGE_UNLOCK(sc);
5372 return EJUSTRETURN;
5373 }
5374
5375 BRIDGE_UNLOCK(sc);
5376 if (dst_if == ifp) {
5377 /* allow packet to continue on the originating interface */
5378 return 0;
5379 }
5380 if (dst_if != mac_nat_ifp) {
5381 (void) bridge_enqueue(bridge_ifp, ifp, dst_if, m,
5382 CHECKSUM_OPERATION_COMPUTE);
5383 } else {
5384 /*
5385 * This is not the original output interface
5386 * and the destination is the MAC NAT interface.
5387 * Drop the packet because the packet can't be sent
5388 * if the source MAC is incorrect.
5389 */
5390 m_freem(m);
5391 }
5392 return EJUSTRETURN;
5393 }
5394
5395 /*
5396 * Output callback.
5397 *
5398 * This routine is called externally from above only when if_bridge_txstart
5399 * is disabled; otherwise it is called internally by bridge_start().
5400 */
5401 static int
5402 bridge_output(struct ifnet *ifp, struct mbuf *m)
5403 {
5404 struct bridge_softc *sc = ifnet_softc(ifp);
5405 struct ether_header *eh;
5406 struct ifnet *dst_if = NULL;
5407 int error = 0;
5408
5409 eh = mtod(m, struct ether_header *);
5410
5411 BRIDGE_LOCK(sc);
5412
5413 if (!(m->m_flags & (M_BCAST | M_MCAST))) {
5414 dst_if = bridge_rtlookup(sc, eh->ether_dhost, 0);
5415 }
5416
5417 (void) ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0);
5418
5419 #if NBPFILTER > 0
5420 if (sc->sc_bpf_output) {
5421 bridge_bpf_output(ifp, m);
5422 }
5423 #endif
5424
5425 if (dst_if == NULL) {
5426 /* callee will unlock */
5427 bridge_broadcast(sc, NULL, m, 0);
5428 } else {
5429 ifnet_t bridge_ifp;
5430
5431 bridge_ifp = sc->sc_ifp;
5432 BRIDGE_UNLOCK(sc);
5433
5434 error = bridge_enqueue(bridge_ifp, NULL, dst_if, m,
5435 CHECKSUM_OPERATION_FINALIZE);
5436 }
5437
5438 return error;
5439 }
5440
5441 static void
5442 bridge_finalize_cksum(struct ifnet *ifp, struct mbuf *m)
5443 {
5444 struct ether_header *eh;
5445 bool is_ipv4;
5446 uint32_t sw_csum, hwcap;
5447 uint32_t did_sw;
5448 uint32_t csum_flags;
5449
5450 eh = mtod(m, struct ether_header *);
5451 if (!ether_header_type_is_ip(eh, &is_ipv4)) {
5452 return;
5453 }
5454
5455 /* do in software what the hardware cannot */
5456 hwcap = (ifp->if_hwassist | CSUM_DATA_VALID);
5457 csum_flags = m->m_pkthdr.csum_flags;
5458 sw_csum = csum_flags & ~IF_HWASSIST_CSUM_FLAGS(hwcap);
5459 sw_csum &= IF_HWASSIST_CSUM_MASK;
5460
5461 if (is_ipv4) {
5462 if ((hwcap & CSUM_PARTIAL) && !(sw_csum & CSUM_DELAY_DATA) &&
5463 (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) {
5464 if (m->m_pkthdr.csum_flags & CSUM_TCP) {
5465 uint16_t start =
5466 sizeof(*eh) + sizeof(struct ip);
5467 uint16_t ulpoff =
5468 m->m_pkthdr.csum_data & 0xffff;
5469 m->m_pkthdr.csum_flags |=
5470 (CSUM_DATA_VALID | CSUM_PARTIAL);
5471 m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
5472 m->m_pkthdr.csum_tx_start = start;
5473 } else {
5474 sw_csum |= (CSUM_DELAY_DATA &
5475 m->m_pkthdr.csum_flags);
5476 }
5477 }
5478 did_sw = in_finalize_cksum(m, sizeof(*eh), sw_csum);
5479 } else {
5480 if ((hwcap & CSUM_PARTIAL) &&
5481 !(sw_csum & CSUM_DELAY_IPV6_DATA) &&
5482 (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA)) {
5483 if (m->m_pkthdr.csum_flags & CSUM_TCPIPV6) {
5484 uint16_t start =
5485 sizeof(*eh) + sizeof(struct ip6_hdr);
5486 uint16_t ulpoff =
5487 m->m_pkthdr.csum_data & 0xffff;
5488 m->m_pkthdr.csum_flags |=
5489 (CSUM_DATA_VALID | CSUM_PARTIAL);
5490 m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
5491 m->m_pkthdr.csum_tx_start = start;
5492 } else {
5493 sw_csum |= (CSUM_DELAY_IPV6_DATA &
5494 m->m_pkthdr.csum_flags);
5495 }
5496 }
5497 did_sw = in6_finalize_cksum(m, sizeof(*eh), -1, -1, sw_csum);
5498 }
5499 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5500 "[%s] before 0x%x hwcap 0x%x sw_csum 0x%x did 0x%x after 0x%x",
5501 ifp->if_xname, csum_flags, hwcap, sw_csum,
5502 did_sw, m->m_pkthdr.csum_flags);
5503 }
5504
5505 /*
5506 * bridge_start:
5507 *
5508 * Start output on a bridge.
5509 *
5510 * This routine is invoked by the start worker thread; because we never call
5511 * it directly, there is no need do deploy any serialization mechanism other
5512 * than what's already used by the worker thread, i.e. this is already single
5513 * threaded.
5514 *
5515 * This routine is called only when if_bridge_txstart is enabled.
5516 */
5517 static void
5518 bridge_start(struct ifnet *ifp)
5519 {
5520 struct mbuf *m;
5521
5522 for (;;) {
5523 if (ifnet_dequeue(ifp, &m) != 0) {
5524 break;
5525 }
5526
5527 (void) bridge_output(ifp, m);
5528 }
5529 }
5530
5531 /*
5532 * bridge_forward:
5533 *
5534 * The forwarding function of the bridge.
5535 *
5536 * NOTE: Releases the lock on return.
5537 */
5538 static void
5539 bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif,
5540 struct mbuf *m)
5541 {
5542 struct bridge_iflist *dbif;
5543 ifnet_t bridge_ifp;
5544 struct ifnet *src_if, *dst_if;
5545 struct ether_header *eh;
5546 uint16_t vlan;
5547 uint8_t *dst;
5548 int error;
5549 struct mac_nat_record mnr;
5550 bool translate_mac = FALSE;
5551 uint32_t sc_filter_flags = 0;
5552
5553 BRIDGE_LOCK_ASSERT_HELD(sc);
5554
5555 bridge_ifp = sc->sc_ifp;
5556 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
5557 "%s m 0x%llx", bridge_ifp->if_xname,
5558 (uint64_t)VM_KERNEL_ADDRPERM(m));
5559
5560 src_if = m->m_pkthdr.rcvif;
5561 if (src_if != sbif->bif_ifp) {
5562 const char * src_if_name;
5563
5564 src_if_name = (src_if != NULL) ? src_if->if_xname : "?";
5565 BRIDGE_LOG(LOG_NOTICE, 0,
5566 "src_if %s != bif_ifp %s",
5567 src_if_name, sbif->bif_ifp->if_xname);
5568 goto drop;
5569 }
5570
5571 (void) ifnet_stat_increment_in(bridge_ifp, 1, m->m_pkthdr.len, 0);
5572 vlan = VLANTAGOF(m);
5573
5574
5575 if ((sbif->bif_ifflags & IFBIF_STP) &&
5576 sbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5577 goto drop;
5578 }
5579
5580 eh = mtod(m, struct ether_header *);
5581 dst = eh->ether_dhost;
5582
5583 /* If the interface is learning, record the address. */
5584 if (sbif->bif_ifflags & IFBIF_LEARNING) {
5585 error = bridge_rtupdate(sc, eh->ether_shost, vlan,
5586 sbif, 0, IFBAF_DYNAMIC);
5587 /*
5588 * If the interface has addresses limits then deny any source
5589 * that is not in the cache.
5590 */
5591 if (error && sbif->bif_addrmax) {
5592 goto drop;
5593 }
5594 }
5595
5596 if ((sbif->bif_ifflags & IFBIF_STP) != 0 &&
5597 sbif->bif_stp.bp_state == BSTP_IFSTATE_LEARNING) {
5598 goto drop;
5599 }
5600
5601 /*
5602 * At this point, the port either doesn't participate
5603 * in spanning tree or it is in the forwarding state.
5604 */
5605
5606 /*
5607 * If the packet is unicast, destined for someone on
5608 * "this" side of the bridge, drop it.
5609 */
5610 if ((m->m_flags & (M_BCAST | M_MCAST)) == 0) {
5611 /* unicast */
5612 dst_if = bridge_rtlookup(sc, dst, vlan);
5613 if (src_if == dst_if) {
5614 goto drop;
5615 }
5616 } else {
5617 /* broadcast/multicast */
5618
5619 /*
5620 * Check if its a reserved multicast address, any address
5621 * listed in 802.1D section 7.12.6 may not be forwarded by the
5622 * bridge.
5623 * This is currently 01-80-C2-00-00-00 to 01-80-C2-00-00-0F
5624 */
5625 if (dst[0] == 0x01 && dst[1] == 0x80 &&
5626 dst[2] == 0xc2 && dst[3] == 0x00 &&
5627 dst[4] == 0x00 && dst[5] <= 0x0f) {
5628 goto drop;
5629 }
5630
5631
5632 /* ...forward it to all interfaces. */
5633 os_atomic_inc(&bridge_ifp->if_imcasts, relaxed);
5634 dst_if = NULL;
5635 }
5636
5637 /*
5638 * If we have a destination interface which is a member of our bridge,
5639 * OR this is a unicast packet, push it through the bpf(4) machinery.
5640 * For broadcast or multicast packets, don't bother because it will
5641 * be reinjected into ether_input. We do this before we pass the packets
5642 * through the pfil(9) framework, as it is possible that pfil(9) will
5643 * drop the packet, or possibly modify it, making it difficult to debug
5644 * firewall issues on the bridge.
5645 */
5646 #if NBPFILTER > 0
5647 if (eh->ether_type == htons(ETHERTYPE_RSN_PREAUTH) ||
5648 dst_if != NULL || (m->m_flags & (M_BCAST | M_MCAST)) == 0) {
5649 m->m_pkthdr.rcvif = bridge_ifp;
5650 BRIDGE_BPF_MTAP_INPUT(sc, m);
5651 }
5652 #endif /* NBPFILTER */
5653
5654 if (dst_if == NULL) {
5655 /* bridge_broadcast will unlock */
5656 bridge_broadcast(sc, sbif, m, 1);
5657 return;
5658 }
5659
5660 /*
5661 * Unicast.
5662 */
5663 /*
5664 * At this point, we're dealing with a unicast frame
5665 * going to a different interface.
5666 */
5667 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5668 goto drop;
5669 }
5670
5671 dbif = bridge_lookup_member_if(sc, dst_if);
5672 if (dbif == NULL) {
5673 /* Not a member of the bridge (anymore?) */
5674 goto drop;
5675 }
5676
5677 /* Private segments can not talk to each other */
5678 if (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE) {
5679 goto drop;
5680 }
5681
5682 if ((dbif->bif_ifflags & IFBIF_STP) &&
5683 dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5684 goto drop;
5685 }
5686
5687 #if HAS_DHCPRA_MASK
5688 /* APPLE MODIFICATION <rdar:6985737> */
5689 if ((dst_if->if_extflags & IFEXTF_DHCPRA_MASK) != 0) {
5690 m = ip_xdhcpra_output(dst_if, m);
5691 if (!m) {
5692 ++bridge_ifp.if_xdhcpra;
5693 BRIDGE_UNLOCK(sc);
5694 return;
5695 }
5696 }
5697 #endif /* HAS_DHCPRA_MASK */
5698
5699 if (dbif == sc->sc_mac_nat_bif) {
5700 /* determine how to translate the packet */
5701 translate_mac
5702 = bridge_mac_nat_output(sc, sbif, &m, &mnr);
5703 if (m == NULL) {
5704 /* packet was deallocated */
5705 BRIDGE_UNLOCK(sc);
5706 return;
5707 }
5708 } else if (bif_has_checksum_offload(dbif) &&
5709 !bif_has_checksum_offload(sbif)) {
5710 /*
5711 * If the destination interface has checksum offload enabled,
5712 * verify the checksum now, unless the source interface also has
5713 * checksum offload enabled. The checksum in that case has
5714 * already just been computed and verifying it is unnecessary.
5715 */
5716 error = bridge_verify_checksum(&m, &dbif->bif_stats);
5717 if (error != 0) {
5718 BRIDGE_UNLOCK(sc);
5719 if (m != NULL) {
5720 m_freem(m);
5721 }
5722 return;
5723 }
5724 }
5725
5726 sc_filter_flags = sc->sc_filter_flags;
5727
5728 BRIDGE_UNLOCK(sc);
5729 if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
5730 if (bridge_pf(&m, dst_if, sc_filter_flags, FALSE) != 0) {
5731 return;
5732 }
5733 if (m == NULL) {
5734 return;
5735 }
5736 }
5737
5738 /* if we need to, translate the MAC address */
5739 if (translate_mac) {
5740 bridge_mac_nat_translate(&m, &mnr, IF_LLADDR(dst_if));
5741 }
5742 /*
5743 * We're forwarding an inbound packet in which the checksum must
5744 * already have been computed and if required, verified.
5745 */
5746 if (m != NULL) {
5747 (void) bridge_enqueue(bridge_ifp, src_if, dst_if, m,
5748 CHECKSUM_OPERATION_CLEAR_OFFLOAD);
5749 }
5750 return;
5751
5752 drop:
5753 BRIDGE_UNLOCK(sc);
5754 m_freem(m);
5755 }
5756
5757 static void
5758 inject_input_packet(ifnet_t ifp, mbuf_t m)
5759 {
5760 mbuf_pkthdr_setrcvif(m, ifp);
5761 mbuf_pkthdr_setheader(m, mbuf_data(m));
5762 mbuf_setdata(m, (char *)mbuf_data(m) + ETHER_HDR_LEN,
5763 mbuf_len(m) - ETHER_HDR_LEN);
5764 mbuf_pkthdr_adjustlen(m, -ETHER_HDR_LEN);
5765 m->m_flags |= M_PROTO1; /* set to avoid loops */
5766 dlil_input_packet_list(ifp, m);
5767 return;
5768 }
5769
5770 static bool
5771 in_addr_is_ours(struct in_addr ip)
5772 {
5773 struct in_ifaddr *ia;
5774 bool ours = false;
5775
5776 lck_rw_lock_shared(&in_ifaddr_rwlock);
5777 TAILQ_FOREACH(ia, INADDR_HASH(ip.s_addr), ia_hash) {
5778 if (IA_SIN(ia)->sin_addr.s_addr == ip.s_addr) {
5779 ours = true;
5780 break;
5781 }
5782 }
5783 lck_rw_done(&in_ifaddr_rwlock);
5784 return ours;
5785 }
5786
5787 static bool
5788 in6_addr_is_ours(const struct in6_addr * ip6_p, uint32_t ifscope)
5789 {
5790 struct in6_ifaddr *ia6;
5791 bool ours = false;
5792
5793 if (in6_embedded_scope && IN6_IS_ADDR_LINKLOCAL(ip6_p)) {
5794 struct in6_addr dst_ip;
5795
5796 /* need to embed scope ID for comparison */
5797 bcopy(ip6_p, &dst_ip, sizeof(dst_ip));
5798 dst_ip.s6_addr16[1] = htons(ifscope);
5799 ip6_p = &dst_ip;
5800 }
5801 lck_rw_lock_shared(&in6_ifaddr_rwlock);
5802 TAILQ_FOREACH(ia6, IN6ADDR_HASH(ip6_p), ia6_hash) {
5803 if (in6_are_addr_equal_scoped(&ia6->ia_addr.sin6_addr, ip6_p,
5804 ia6->ia_addr.sin6_scope_id, ifscope)) {
5805 ours = true;
5806 break;
5807 }
5808 }
5809 lck_rw_done(&in6_ifaddr_rwlock);
5810 return ours;
5811 }
5812
5813 static void
5814 bridge_interface_input(ifnet_t bridge_ifp, mbuf_t m,
5815 bpf_packet_func bpf_input_func)
5816 {
5817 size_t byte_count;
5818 struct ether_header *eh;
5819 errno_t error;
5820 bool is_ipv4;
5821 int len;
5822 u_int mac_hlen;
5823 int pkt_count;
5824
5825 /* segment large packets before sending them up */
5826 if (if_bridge_segmentation == 0) {
5827 goto done;
5828 }
5829 len = m->m_pkthdr.len;
5830 if (len <= (bridge_ifp->if_mtu + ETHER_HDR_LEN)) {
5831 goto done;
5832 }
5833 eh = mtod(m, struct ether_header *);
5834 if (!ether_header_type_is_ip(eh, &is_ipv4)) {
5835 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5836 "large non IPv4/IPv6 packet");
5837 goto done;
5838 }
5839
5840 /*
5841 * We have a large IPv4/IPv6 TCP packet. Segment it if required.
5842 *
5843 * If gso_tcp() returns success (0), the packet(s) are
5844 * ready to be passed up. If the destination is a local IP address,
5845 * the packet will be passed up as a large, single packet.
5846 *
5847 * If gso_tcp() returns an error, the packet has already
5848 * been freed.
5849 */
5850 mac_hlen = sizeof(*eh);
5851 error = gso_tcp(bridge_ifp, &m, mac_hlen, is_ipv4, FALSE);
5852 if (error != 0) {
5853 return;
5854 }
5855
5856 done:
5857 pkt_count = 0;
5858 byte_count = 0;
5859 for (mbuf_t scan = m; scan != NULL; scan = scan->m_nextpkt) {
5860 /* Mark the packet as arriving on the bridge interface */
5861 mbuf_pkthdr_setrcvif(scan, bridge_ifp);
5862 mbuf_pkthdr_setheader(scan, mbuf_data(scan));
5863 if (bpf_input_func != NULL) {
5864 (*bpf_input_func)(bridge_ifp, scan);
5865 }
5866 mbuf_setdata(scan, (char *)mbuf_data(scan) + ETHER_HDR_LEN,
5867 mbuf_len(scan) - ETHER_HDR_LEN);
5868 mbuf_pkthdr_adjustlen(scan, -ETHER_HDR_LEN);
5869 byte_count += mbuf_pkthdr_len(scan);
5870 pkt_count++;
5871 }
5872 (void)ifnet_stat_increment_in(bridge_ifp, pkt_count, byte_count, 0);
5873 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
5874 "%s %d packet(s) %ld bytes",
5875 bridge_ifp->if_xname, pkt_count, byte_count);
5876 dlil_input_packet_list(bridge_ifp, m);
5877 return;
5878 }
5879
5880 static bool
5881 is_our_ip(ip_packet_info_t info_p, uint32_t ifscope)
5882 {
5883 bool ours;
5884
5885 if (info_p->ip_is_ipv4) {
5886 struct in_addr dst_ip;
5887
5888 bcopy(&info_p->ip_hdr.ip->ip_dst, &dst_ip, sizeof(dst_ip));
5889 ours = in_addr_is_ours(dst_ip);
5890 } else {
5891 ours = in6_addr_is_ours(&info_p->ip_hdr.ip6->ip6_dst, ifscope);
5892 }
5893 return ours;
5894 }
5895
5896 static inline errno_t
5897 bridge_vmnet_tag_input(ifnet_t bridge_ifp, ifnet_t ifp,
5898 const u_char * ether_dhost, mbuf_t *mp,
5899 bool is_broadcast, bool is_ip, bool is_ipv4,
5900 ip_packet_info * info_p, struct bripstats * stats_p,
5901 bool *info_initialized)
5902 {
5903 errno_t error = 0;
5904 bool is_local = false;
5905 struct pf_mtag *pf_mtag;
5906 u_int16_t tag = vmnet_tag;
5907
5908 *info_initialized = false;
5909 if (is_broadcast) {
5910 if (_ether_cmp(ether_dhost, etherbroadcastaddr) == 0) {
5911 tag = vmnet_broadcast_tag;
5912 } else {
5913 tag = vmnet_multicast_tag;
5914 }
5915 } else if (is_ip) {
5916 unsigned int mac_hlen = sizeof(struct ether_header);
5917
5918 bzero(stats_p, sizeof(*stats_p));
5919 *info_initialized = true;
5920 error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, info_p,
5921 stats_p);
5922 if (error != 0) {
5923 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_INPUT,
5924 "%s(%s) bridge_get_ip_proto failed %d",
5925 bridge_ifp->if_xname,
5926 ifp->if_xname, error);
5927 if (*mp == NULL) {
5928 return EJUSTRETURN;
5929 }
5930 } else {
5931 is_local = is_our_ip(info_p, bridge_ifp->if_index);
5932 if (is_local) {
5933 tag = vmnet_local_tag;
5934 }
5935 }
5936 }
5937 pf_mtag = pf_get_mtag(*mp);
5938 if (pf_mtag != NULL) {
5939 pf_mtag->pftag_tag = tag;
5940 }
5941 #if DEBUG || DEVELOPMENT
5942 {
5943 bool forced;
5944
5945 BRIDGE_ERROR_GET_FORCED(forced, BRIDGE_FORCE_ONE);
5946 if (forced) {
5947 m_freem(*mp);
5948 *mp = NULL;
5949 error = EJUSTRETURN;
5950 goto done;
5951 }
5952 BRIDGE_ERROR_GET_FORCED(forced, BRIDGE_FORCE_TWO);
5953 if (forced) {
5954 error = _EBADIP;
5955 goto done;
5956 }
5957 }
5958 done:
5959 #endif /* DEBUG || DEVELOPMENT */
5960 return error;
5961 }
5962
5963 static void
5964 bripstats_apply(struct bripstats *dst_p, const struct bripstats *src_p)
5965 {
5966 dst_p->bips_ip += src_p->bips_ip;
5967 dst_p->bips_ip6 += src_p->bips_ip6;
5968 dst_p->bips_udp += src_p->bips_udp;
5969 dst_p->bips_tcp += src_p->bips_tcp;
5970
5971 dst_p->bips_bad_ip += src_p->bips_bad_ip;
5972 dst_p->bips_bad_ip6 += src_p->bips_bad_ip6;
5973 dst_p->bips_bad_udp += src_p->bips_bad_udp;
5974 dst_p->bips_bad_tcp += src_p->bips_bad_tcp;
5975 }
5976
5977 static void
5978 bridge_bripstats_apply(ifnet_t ifp, const struct bripstats *stats_p)
5979 {
5980 struct bridge_iflist *bif;
5981 struct bridge_softc *sc = ifp->if_bridge;
5982
5983 BRIDGE_LOCK(sc);
5984 bif = bridge_lookup_member_if(sc, ifp);
5985 if (bif == NULL) {
5986 goto done;
5987 }
5988 if (!bif_has_checksum_offload(bif)) {
5989 goto done;
5990 }
5991 bripstats_apply(&bif->bif_stats.brms_in_ip, stats_p);
5992
5993 done:
5994 BRIDGE_UNLOCK(sc);
5995 return;
5996 }
5997
5998 /*
5999 * bridge_input:
6000 *
6001 * Filter input from a member interface. Queue the packet for
6002 * bridging if it is not for us.
6003 */
6004 errno_t
6005 bridge_input(struct ifnet *ifp, mbuf_t *data)
6006 {
6007 struct bridge_softc *sc = ifp->if_bridge;
6008 struct bridge_iflist *bif, *bif2;
6009 struct ether_header eh_in;
6010 bool is_ip = false;
6011 bool is_ipv4 = false;
6012 ifnet_t bridge_ifp;
6013 struct mbuf *mc, *mc2;
6014 unsigned int mac_hlen = sizeof(struct ether_header);
6015 uint16_t vlan;
6016 errno_t error;
6017 ip_packet_info info;
6018 struct bripstats stats;
6019 bool info_initialized = false;
6020 errno_t ip_packet_error = 0;
6021 bool is_broadcast;
6022 bool is_ip_broadcast = false;
6023 bool is_ifp_mac = false;
6024 mbuf_t m = *data;
6025 uint32_t sc_filter_flags = 0;
6026
6027 bridge_ifp = sc->sc_ifp;
6028 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
6029 "%s from %s m 0x%llx data 0x%llx",
6030 bridge_ifp->if_xname, ifp->if_xname,
6031 (uint64_t)VM_KERNEL_ADDRPERM(m),
6032 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)));
6033 if ((sc->sc_ifp->if_flags & IFF_RUNNING) == 0) {
6034 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
6035 "%s not running passing along",
6036 bridge_ifp->if_xname);
6037 return 0;
6038 }
6039
6040 vlan = VLANTAGOF(m);
6041
6042 #ifdef IFF_MONITOR
6043 /*
6044 * Implement support for bridge monitoring. If this flag has been
6045 * set on this interface, discard the packet once we push it through
6046 * the bpf(4) machinery, but before we do, increment the byte and
6047 * packet counters associated with this interface.
6048 */
6049 if ((bridge_ifp->if_flags & IFF_MONITOR) != 0) {
6050 m->m_pkthdr.rcvif = bridge_ifp;
6051 BRIDGE_BPF_MTAP_INPUT(sc, m);
6052 (void) ifnet_stat_increment_in(bridge_ifp, 1, m->m_pkthdr.len, 0);
6053 *data = NULL;
6054 m_freem(m);
6055 return EJUSTRETURN;
6056 }
6057 #endif /* IFF_MONITOR */
6058
6059 is_broadcast = (m->m_flags & (M_BCAST | M_MCAST)) != 0;
6060
6061 /*
6062 * Need to clear the promiscuous flag otherwise it will be
6063 * dropped by DLIL after processing filters
6064 */
6065 if ((mbuf_flags(m) & MBUF_PROMISC)) {
6066 mbuf_setflags_mask(m, 0, MBUF_PROMISC);
6067 }
6068
6069 /* copy the ethernet header */
6070 eh_in = *(mtod(m, struct ether_header *));
6071
6072 is_ip = ether_header_type_is_ip(&eh_in, &is_ipv4);
6073
6074 if (if_bridge_vmnet_pf_tagging != 0 && IFNET_IS_VMNET(ifp)) {
6075 /* tag packets coming from VMNET interfaces */
6076 ip_packet_error = bridge_vmnet_tag_input(bridge_ifp, ifp,
6077 eh_in.ether_dhost, data, is_broadcast, is_ip, is_ipv4,
6078 &info, &stats, &info_initialized);
6079 m = *data;
6080 if (m == NULL) {
6081 bridge_bripstats_apply(ifp, &stats);
6082 return EJUSTRETURN;
6083 }
6084 }
6085
6086 sc_filter_flags = sc->sc_filter_flags;
6087 if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6088 error = bridge_pf(data, ifp, sc_filter_flags, TRUE);
6089 m = *data;
6090 if (error != 0 || m == NULL) {
6091 return EJUSTRETURN;
6092 }
6093 }
6094
6095 BRIDGE_LOCK(sc);
6096 bif = bridge_lookup_member_if(sc, ifp);
6097 if (bif == NULL) {
6098 BRIDGE_UNLOCK(sc);
6099 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
6100 "%s bridge_lookup_member_if failed",
6101 bridge_ifp->if_xname);
6102 return 0;
6103 }
6104 if (is_ip && bif_has_checksum_offload(bif)) {
6105 if (info_initialized) {
6106 bripstats_apply(&bif->bif_stats.brms_in_ip, &stats);
6107 } else {
6108 error = bridge_get_ip_proto(data, mac_hlen, is_ipv4,
6109 &info, &bif->bif_stats.brms_in_ip);
6110 if (error != 0) {
6111 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
6112 "%s(%s) bridge_get_ip_proto failed %d",
6113 bridge_ifp->if_xname,
6114 bif->bif_ifp->if_xname, error);
6115 ip_packet_error = error;
6116 }
6117 }
6118 if (ip_packet_error == 0) {
6119 /* need to compute IP/UDP/TCP/checksums */
6120 error = bridge_offload_checksum(data, &info,
6121 &bif->bif_stats);
6122 if (error != 0) {
6123 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
6124 "%s(%s) bridge_offload_checksum failed %d",
6125 bridge_ifp->if_xname,
6126 bif->bif_ifp->if_xname, error);
6127 ip_packet_error = error;
6128 }
6129 }
6130 if (ip_packet_error != 0) {
6131 BRIDGE_UNLOCK(sc);
6132 if (*data != NULL) {
6133 m_freem(*data);
6134 *data = NULL;
6135 }
6136 return EJUSTRETURN;
6137 }
6138 m = *data;
6139 }
6140
6141 if (bif->bif_flags & BIFF_HOST_FILTER) {
6142 error = bridge_host_filter(bif, data);
6143 if (error != 0) {
6144 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
6145 "%s bridge_host_filter failed",
6146 bif->bif_ifp->if_xname);
6147 BRIDGE_UNLOCK(sc);
6148 return EJUSTRETURN;
6149 }
6150 m = *data;
6151 }
6152
6153 if (!is_broadcast &&
6154 _ether_cmp(eh_in.ether_dhost, IF_LLADDR(ifp)) == 0) {
6155 /* the packet is unicast to the interface's MAC address */
6156 if (is_ip && sc->sc_mac_nat_bif == bif) {
6157 /* doing MAC-NAT, check if destination is IP broadcast */
6158 is_ip_broadcast = is_broadcast_ip_packet(data);
6159 if (*data == NULL) {
6160 BRIDGE_UNLOCK(sc);
6161 return EJUSTRETURN;
6162 }
6163 m = *data;
6164 }
6165 if (!is_ip_broadcast) {
6166 is_ifp_mac = TRUE;
6167 }
6168 }
6169
6170 bridge_span(sc, m);
6171
6172 if (is_broadcast || is_ip_broadcast) {
6173 if (is_broadcast && (m->m_flags & M_MCAST) != 0) {
6174 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MCAST,
6175 " multicast: "
6176 "%02x:%02x:%02x:%02x:%02x:%02x",
6177 eh_in.ether_dhost[0], eh_in.ether_dhost[1],
6178 eh_in.ether_dhost[2], eh_in.ether_dhost[3],
6179 eh_in.ether_dhost[4], eh_in.ether_dhost[5]);
6180 }
6181 /* Tap off 802.1D packets; they do not get forwarded. */
6182 if (is_broadcast &&
6183 _ether_cmp(eh_in.ether_dhost, bstp_etheraddr) == 0) {
6184 #if BRIDGESTP
6185 m = bstp_input(&bif->bif_stp, ifp, m);
6186 #else /* !BRIDGESTP */
6187 m_freem(m);
6188 m = NULL;
6189 #endif /* !BRIDGESTP */
6190 if (m == NULL) {
6191 BRIDGE_UNLOCK(sc);
6192 return EJUSTRETURN;
6193 }
6194 }
6195
6196 if ((bif->bif_ifflags & IFBIF_STP) &&
6197 bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6198 BRIDGE_UNLOCK(sc);
6199 return 0;
6200 }
6201
6202 /*
6203 * Make a deep copy of the packet and enqueue the copy
6204 * for bridge processing.
6205 */
6206 mc = m_dup(m, M_DONTWAIT);
6207 if (mc == NULL) {
6208 BRIDGE_UNLOCK(sc);
6209 return 0;
6210 }
6211
6212 /*
6213 * Perform the bridge forwarding function with the copy.
6214 *
6215 * Note that bridge_forward calls BRIDGE_UNLOCK
6216 */
6217 if (is_ip_broadcast) {
6218 struct ether_header *eh;
6219
6220 /* make the copy look like it is actually broadcast */
6221 mc->m_flags |= M_BCAST;
6222 eh = mtod(mc, struct ether_header *);
6223 bcopy(etherbroadcastaddr, eh->ether_dhost,
6224 ETHER_ADDR_LEN);
6225 }
6226 bridge_forward(sc, bif, mc);
6227
6228 /*
6229 * Reinject the mbuf as arriving on the bridge so we have a
6230 * chance at claiming multicast packets. We can not loop back
6231 * here from ether_input as a bridge is never a member of a
6232 * bridge.
6233 */
6234 VERIFY(bridge_ifp->if_bridge == NULL);
6235 mc2 = m_dup(m, M_DONTWAIT);
6236 if (mc2 != NULL) {
6237 /* Keep the layer3 header aligned */
6238 int i = min(mc2->m_pkthdr.len, max_protohdr);
6239 mc2 = m_copyup(mc2, i, ETHER_ALIGN);
6240 }
6241 if (mc2 != NULL) {
6242 /* mark packet as arriving on the bridge */
6243 mc2->m_pkthdr.rcvif = bridge_ifp;
6244 mc2->m_pkthdr.pkt_hdr = mbuf_data(mc2);
6245 BRIDGE_BPF_MTAP_INPUT(sc, mc2);
6246 (void) mbuf_setdata(mc2,
6247 (char *)mbuf_data(mc2) + ETHER_HDR_LEN,
6248 mbuf_len(mc2) - ETHER_HDR_LEN);
6249 (void) mbuf_pkthdr_adjustlen(mc2, -ETHER_HDR_LEN);
6250 (void) ifnet_stat_increment_in(bridge_ifp, 1,
6251 mbuf_pkthdr_len(mc2), 0);
6252 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MCAST,
6253 "%s mcast for us", bridge_ifp->if_xname);
6254 dlil_input_packet_list(bridge_ifp, mc2);
6255 }
6256
6257 /* Return the original packet for local processing. */
6258 return 0;
6259 }
6260
6261 if ((bif->bif_ifflags & IFBIF_STP) &&
6262 bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6263 BRIDGE_UNLOCK(sc);
6264 return 0;
6265 }
6266
6267 #ifdef DEV_CARP
6268 #define CARP_CHECK_WE_ARE_DST(iface) \
6269 ((iface)->if_carp &&\
6270 carp_forus((iface)->if_carp, eh_in.ether_dhost))
6271 #define CARP_CHECK_WE_ARE_SRC(iface) \
6272 ((iface)->if_carp &&\
6273 carp_forus((iface)->if_carp, eh_in.ether_shost))
6274 #else
6275 #define CARP_CHECK_WE_ARE_DST(iface) 0
6276 #define CARP_CHECK_WE_ARE_SRC(iface) 0
6277 #endif
6278
6279 #define PFIL_HOOKED_INET6 PFIL_HOOKED(&inet6_pfil_hook)
6280
6281 #define PFIL_PHYS(sc, ifp, m)
6282
6283 #define GRAB_OUR_PACKETS(iface) \
6284 if ((iface)->if_type == IFT_GIF) \
6285 continue; \
6286 /* It is destined for us. */ \
6287 if (_ether_cmp(IF_LLADDR((iface)), eh_in.ether_dhost) == 0 || \
6288 CARP_CHECK_WE_ARE_DST((iface))) { \
6289 if ((iface)->if_type == IFT_BRIDGE) { \
6290 BRIDGE_BPF_MTAP_INPUT(sc, m); \
6291 /* Filter on the physical interface. */ \
6292 PFIL_PHYS(sc, iface, m); \
6293 } else { \
6294 bpf_tap_in(iface, DLT_EN10MB, m, NULL, 0); \
6295 } \
6296 if (bif->bif_ifflags & IFBIF_LEARNING) { \
6297 error = bridge_rtupdate(sc, eh_in.ether_shost, \
6298 vlan, bif, 0, IFBAF_DYNAMIC); \
6299 if (error && bif->bif_addrmax) { \
6300 BRIDGE_UNLOCK(sc); \
6301 m_freem(m); \
6302 return (EJUSTRETURN); \
6303 } \
6304 } \
6305 BRIDGE_UNLOCK(sc); \
6306 inject_input_packet(iface, m); \
6307 return (EJUSTRETURN); \
6308 } \
6309 \
6310 /* We just received a packet that we sent out. */ \
6311 if (_ether_cmp(IF_LLADDR((iface)), eh_in.ether_shost) == 0 || \
6312 CARP_CHECK_WE_ARE_SRC((iface))) { \
6313 BRIDGE_UNLOCK(sc); \
6314 m_freem(m); \
6315 return (EJUSTRETURN); \
6316 }
6317
6318 /*
6319 * Unicast.
6320 */
6321
6322 /* handle MAC-NAT if enabled */
6323 if (is_ifp_mac && sc->sc_mac_nat_bif == bif) {
6324 ifnet_t dst_if;
6325 boolean_t is_input = FALSE;
6326
6327 dst_if = bridge_mac_nat_input(sc, data, &is_input);
6328 m = *data;
6329 if (dst_if == ifp) {
6330 /* our input packet */
6331 } else if (dst_if != NULL || m == NULL) {
6332 BRIDGE_UNLOCK(sc);
6333 if (dst_if != NULL) {
6334 ASSERT(m != NULL);
6335 if (is_input) {
6336 inject_input_packet(dst_if, m);
6337 } else {
6338 (void)bridge_enqueue(bridge_ifp, NULL,
6339 dst_if, m,
6340 CHECKSUM_OPERATION_CLEAR_OFFLOAD);
6341 }
6342 }
6343 return EJUSTRETURN;
6344 }
6345 }
6346
6347 /*
6348 * If the packet is for the bridge, pass it up for local processing.
6349 */
6350 if (_ether_cmp(eh_in.ether_dhost, IF_LLADDR(bridge_ifp)) == 0 ||
6351 CARP_CHECK_WE_ARE_DST(bridge_ifp)) {
6352 bpf_packet_func bpf_input_func = sc->sc_bpf_input;
6353
6354 /*
6355 * If the interface is learning, and the source
6356 * address is valid and not multicast, record
6357 * the address.
6358 */
6359 if (bif->bif_ifflags & IFBIF_LEARNING) {
6360 (void) bridge_rtupdate(sc, eh_in.ether_shost,
6361 vlan, bif, 0, IFBAF_DYNAMIC);
6362 }
6363 BRIDGE_UNLOCK(sc);
6364
6365 bridge_interface_input(bridge_ifp, m, bpf_input_func);
6366 return EJUSTRETURN;
6367 }
6368
6369 /*
6370 * if the destination of the packet is for the MAC address of
6371 * the member interface itself, then we don't need to forward
6372 * it -- just pass it back. Note that it'll likely just be
6373 * dropped by the stack, but if something else is bound to
6374 * the interface directly (for example, the wireless stats
6375 * protocol -- although that actually uses BPF right now),
6376 * then it will consume the packet
6377 *
6378 * ALSO, note that we do this check AFTER checking for the
6379 * bridge's own MAC address, because the bridge may be
6380 * using the SAME MAC address as one of its interfaces
6381 */
6382 if (is_ifp_mac) {
6383
6384 #ifdef VERY_VERY_VERY_DIAGNOSTIC
6385 BRIDGE_LOG(LOG_NOTICE, 0,
6386 "not forwarding packet bound for member interface");
6387 #endif
6388
6389 BRIDGE_UNLOCK(sc);
6390 return 0;
6391 }
6392
6393 /* Now check the remaining bridge members. */
6394 TAILQ_FOREACH(bif2, &sc->sc_iflist, bif_next) {
6395 if (bif2->bif_ifp != ifp) {
6396 GRAB_OUR_PACKETS(bif2->bif_ifp);
6397 }
6398 }
6399
6400 #undef CARP_CHECK_WE_ARE_DST
6401 #undef CARP_CHECK_WE_ARE_SRC
6402 #undef GRAB_OUR_PACKETS
6403
6404 /*
6405 * Perform the bridge forwarding function.
6406 *
6407 * Note that bridge_forward calls BRIDGE_UNLOCK
6408 */
6409 bridge_forward(sc, bif, m);
6410
6411 return EJUSTRETURN;
6412 }
6413
6414 /*
6415 * bridge_broadcast:
6416 *
6417 * Send a frame to all interfaces that are members of
6418 * the bridge, except for the one on which the packet
6419 * arrived.
6420 *
6421 * NOTE: Releases the lock on return.
6422 */
6423 static void
6424 bridge_broadcast(struct bridge_softc *sc, struct bridge_iflist * sbif,
6425 struct mbuf *m, int runfilt)
6426 {
6427 ifnet_t bridge_ifp;
6428 struct bridge_iflist *dbif;
6429 struct ifnet * src_if;
6430 struct mbuf *mc;
6431 struct mbuf *mc_in;
6432 struct ifnet *dst_if;
6433 int error = 0, used = 0;
6434 boolean_t bridge_if_out;
6435 ChecksumOperation cksum_op;
6436 struct mac_nat_record mnr;
6437 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
6438 boolean_t translate_mac = FALSE;
6439 uint32_t sc_filter_flags = 0;
6440
6441 bridge_ifp = sc->sc_ifp;
6442 if (sbif != NULL) {
6443 bridge_if_out = FALSE;
6444 src_if = sbif->bif_ifp;
6445 cksum_op = CHECKSUM_OPERATION_CLEAR_OFFLOAD;
6446 if (mac_nat_bif != NULL && sbif != mac_nat_bif) {
6447 /* get the translation record while holding the lock */
6448 translate_mac
6449 = bridge_mac_nat_output(sc, sbif, &m, &mnr);
6450 if (m == NULL) {
6451 /* packet was deallocated */
6452 BRIDGE_UNLOCK(sc);
6453 return;
6454 }
6455 }
6456 } else {
6457 /*
6458 * sbif is NULL when the bridge interface calls
6459 * bridge_broadcast().
6460 */
6461 bridge_if_out = TRUE;
6462 cksum_op = CHECKSUM_OPERATION_FINALIZE;
6463 sbif = NULL;
6464 src_if = NULL;
6465 }
6466
6467 BRIDGE_LOCK2REF(sc, error);
6468 if (error) {
6469 m_freem(m);
6470 return;
6471 }
6472
6473 TAILQ_FOREACH(dbif, &sc->sc_iflist, bif_next) {
6474 dst_if = dbif->bif_ifp;
6475 if (dst_if == src_if) {
6476 /* skip the interface that the packet came in on */
6477 continue;
6478 }
6479
6480 /* Private segments can not talk to each other */
6481 if (sbif != NULL &&
6482 (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE)) {
6483 continue;
6484 }
6485
6486 if ((dbif->bif_ifflags & IFBIF_STP) &&
6487 dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6488 continue;
6489 }
6490
6491 if ((dbif->bif_ifflags & IFBIF_DISCOVER) == 0 &&
6492 (m->m_flags & (M_BCAST | M_MCAST)) == 0) {
6493 continue;
6494 }
6495
6496 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6497 continue;
6498 }
6499
6500 if (!(dbif->bif_flags & BIFF_MEDIA_ACTIVE)) {
6501 continue;
6502 }
6503
6504 if (TAILQ_NEXT(dbif, bif_next) == NULL) {
6505 mc = m;
6506 used = 1;
6507 } else {
6508 mc = m_dup(m, M_DONTWAIT);
6509 if (mc == NULL) {
6510 (void) ifnet_stat_increment_out(bridge_ifp,
6511 0, 0, 1);
6512 continue;
6513 }
6514 }
6515
6516 /*
6517 * If broadcast input is enabled, do so only if this
6518 * is an input packet.
6519 */
6520 if (!bridge_if_out &&
6521 (dbif->bif_flags & BIFF_INPUT_BROADCAST) != 0) {
6522 mc_in = m_dup(mc, M_DONTWAIT);
6523 /* this could fail, but we continue anyways */
6524 } else {
6525 mc_in = NULL;
6526 }
6527
6528 /* out */
6529 if (translate_mac && mac_nat_bif == dbif) {
6530 /* translate the packet without holding the lock */
6531 bridge_mac_nat_translate(&mc, &mnr, IF_LLADDR(dst_if));
6532 }
6533
6534 sc_filter_flags = sc->sc_filter_flags;
6535 if (runfilt &&
6536 PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6537 if (used == 0) {
6538 /* Keep the layer3 header aligned */
6539 int i = min(mc->m_pkthdr.len, max_protohdr);
6540 mc = m_copyup(mc, i, ETHER_ALIGN);
6541 if (mc == NULL) {
6542 (void) ifnet_stat_increment_out(
6543 sc->sc_ifp, 0, 0, 1);
6544 if (mc_in != NULL) {
6545 m_freem(mc_in);
6546 mc_in = NULL;
6547 }
6548 continue;
6549 }
6550 }
6551 if (bridge_pf(&mc, dst_if, sc_filter_flags, FALSE) != 0) {
6552 if (mc_in != NULL) {
6553 m_freem(mc_in);
6554 mc_in = NULL;
6555 }
6556 continue;
6557 }
6558 if (mc == NULL) {
6559 if (mc_in != NULL) {
6560 m_freem(mc_in);
6561 mc_in = NULL;
6562 }
6563 continue;
6564 }
6565 }
6566
6567 if (mc != NULL) {
6568 /* verify checksum if necessary */
6569 if (bif_has_checksum_offload(dbif) && sbif != NULL &&
6570 !bif_has_checksum_offload(sbif)) {
6571 error = bridge_verify_checksum(&mc,
6572 &dbif->bif_stats);
6573 if (error != 0) {
6574 if (mc != NULL) {
6575 m_freem(mc);
6576 }
6577 mc = NULL;
6578 }
6579 }
6580 if (mc != NULL) {
6581 (void) bridge_enqueue(bridge_ifp,
6582 NULL, dst_if, mc, cksum_op);
6583 }
6584 }
6585
6586 /* in */
6587 if (mc_in == NULL) {
6588 continue;
6589 }
6590 bpf_tap_in(dst_if, DLT_EN10MB, mc_in, NULL, 0);
6591 mbuf_pkthdr_setrcvif(mc_in, dst_if);
6592 mbuf_pkthdr_setheader(mc_in, mbuf_data(mc_in));
6593 mbuf_setdata(mc_in, (char *)mbuf_data(mc_in) + ETHER_HDR_LEN,
6594 mbuf_len(mc_in) - ETHER_HDR_LEN);
6595 mbuf_pkthdr_adjustlen(mc_in, -ETHER_HDR_LEN);
6596 mc_in->m_flags |= M_PROTO1; /* set to avoid loops */
6597 dlil_input_packet_list(dst_if, mc_in);
6598 }
6599 if (used == 0) {
6600 m_freem(m);
6601 }
6602
6603
6604 BRIDGE_UNREF(sc);
6605 }
6606
6607 /*
6608 * bridge_span:
6609 *
6610 * Duplicate a packet out one or more interfaces that are in span mode,
6611 * the original mbuf is unmodified.
6612 */
6613 static void
6614 bridge_span(struct bridge_softc *sc, struct mbuf *m)
6615 {
6616 struct bridge_iflist *bif;
6617 struct ifnet *dst_if;
6618 struct mbuf *mc;
6619
6620 if (TAILQ_EMPTY(&sc->sc_spanlist)) {
6621 return;
6622 }
6623
6624 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
6625 dst_if = bif->bif_ifp;
6626
6627 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6628 continue;
6629 }
6630
6631 mc = m_copypacket(m, M_DONTWAIT);
6632 if (mc == NULL) {
6633 (void) ifnet_stat_increment_out(sc->sc_ifp, 0, 0, 1);
6634 continue;
6635 }
6636
6637 (void) bridge_enqueue(sc->sc_ifp, NULL, dst_if, mc,
6638 CHECKSUM_OPERATION_NONE);
6639 }
6640 }
6641
6642
6643 /*
6644 * bridge_rtupdate:
6645 *
6646 * Add a bridge routing entry.
6647 */
6648 static int
6649 bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst, uint16_t vlan,
6650 struct bridge_iflist *bif, int setflags, uint8_t flags)
6651 {
6652 struct bridge_rtnode *brt;
6653 int error;
6654
6655 BRIDGE_LOCK_ASSERT_HELD(sc);
6656
6657 /* Check the source address is valid and not multicast. */
6658 if (ETHER_IS_MULTICAST(dst) ||
6659 (dst[0] == 0 && dst[1] == 0 && dst[2] == 0 &&
6660 dst[3] == 0 && dst[4] == 0 && dst[5] == 0) != 0) {
6661 return EINVAL;
6662 }
6663
6664
6665 /* 802.1p frames map to vlan 1 */
6666 if (vlan == 0) {
6667 vlan = 1;
6668 }
6669
6670 /*
6671 * A route for this destination might already exist. If so,
6672 * update it, otherwise create a new one.
6673 */
6674 if ((brt = bridge_rtnode_lookup(sc, dst, vlan)) == NULL) {
6675 if (sc->sc_brtcnt >= sc->sc_brtmax) {
6676 sc->sc_brtexceeded++;
6677 return ENOSPC;
6678 }
6679 /* Check per interface address limits (if enabled) */
6680 if (bif->bif_addrmax && bif->bif_addrcnt >= bif->bif_addrmax) {
6681 bif->bif_addrexceeded++;
6682 return ENOSPC;
6683 }
6684
6685 /*
6686 * Allocate a new bridge forwarding node, and
6687 * initialize the expiration time and Ethernet
6688 * address.
6689 */
6690 brt = zalloc_noblock(bridge_rtnode_pool);
6691 if (brt == NULL) {
6692 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6693 "zalloc_nolock failed");
6694 return ENOMEM;
6695 }
6696 bzero(brt, sizeof(struct bridge_rtnode));
6697
6698 if (bif->bif_ifflags & IFBIF_STICKY) {
6699 brt->brt_flags = IFBAF_STICKY;
6700 } else {
6701 brt->brt_flags = IFBAF_DYNAMIC;
6702 }
6703
6704 memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
6705 brt->brt_vlan = vlan;
6706
6707
6708 if ((error = bridge_rtnode_insert(sc, brt)) != 0) {
6709 zfree(bridge_rtnode_pool, brt);
6710 return error;
6711 }
6712 brt->brt_dst = bif;
6713 bif->bif_addrcnt++;
6714 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6715 "added %02x:%02x:%02x:%02x:%02x:%02x "
6716 "on %s count %u hashsize %u",
6717 dst[0], dst[1], dst[2], dst[3], dst[4], dst[5],
6718 sc->sc_ifp->if_xname, sc->sc_brtcnt,
6719 sc->sc_rthash_size);
6720 }
6721
6722 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
6723 brt->brt_dst != bif) {
6724 brt->brt_dst->bif_addrcnt--;
6725 brt->brt_dst = bif;
6726 brt->brt_dst->bif_addrcnt++;
6727 }
6728
6729 if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6730 unsigned long now;
6731
6732 now = (unsigned long) net_uptime();
6733 brt->brt_expire = now + sc->sc_brttimeout;
6734 }
6735 if (setflags) {
6736 brt->brt_flags = flags;
6737 }
6738
6739
6740 return 0;
6741 }
6742
6743 /*
6744 * bridge_rtlookup:
6745 *
6746 * Lookup the destination interface for an address.
6747 */
6748 static struct ifnet *
6749 bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
6750 {
6751 struct bridge_rtnode *brt;
6752
6753 BRIDGE_LOCK_ASSERT_HELD(sc);
6754
6755 if ((brt = bridge_rtnode_lookup(sc, addr, vlan)) == NULL) {
6756 return NULL;
6757 }
6758
6759 return brt->brt_ifp;
6760 }
6761
6762 /*
6763 * bridge_rttrim:
6764 *
6765 * Trim the routine table so that we have a number
6766 * of routing entries less than or equal to the
6767 * maximum number.
6768 */
6769 static void
6770 bridge_rttrim(struct bridge_softc *sc)
6771 {
6772 struct bridge_rtnode *brt, *nbrt;
6773
6774 BRIDGE_LOCK_ASSERT_HELD(sc);
6775
6776 /* Make sure we actually need to do this. */
6777 if (sc->sc_brtcnt <= sc->sc_brtmax) {
6778 return;
6779 }
6780
6781 /* Force an aging cycle; this might trim enough addresses. */
6782 bridge_rtage(sc);
6783 if (sc->sc_brtcnt <= sc->sc_brtmax) {
6784 return;
6785 }
6786
6787 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6788 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6789 bridge_rtnode_destroy(sc, brt);
6790 if (sc->sc_brtcnt <= sc->sc_brtmax) {
6791 return;
6792 }
6793 }
6794 }
6795 }
6796
6797 /*
6798 * bridge_aging_timer:
6799 *
6800 * Aging periodic timer for the bridge routing table.
6801 */
6802 static void
6803 bridge_aging_timer(struct bridge_softc *sc)
6804 {
6805 BRIDGE_LOCK_ASSERT_HELD(sc);
6806
6807 bridge_rtage(sc);
6808 if ((sc->sc_ifp->if_flags & IFF_RUNNING) &&
6809 (sc->sc_flags & SCF_DETACHING) == 0) {
6810 sc->sc_aging_timer.bdc_sc = sc;
6811 sc->sc_aging_timer.bdc_func = bridge_aging_timer;
6812 sc->sc_aging_timer.bdc_ts.tv_sec = bridge_rtable_prune_period;
6813 bridge_schedule_delayed_call(&sc->sc_aging_timer);
6814 }
6815 }
6816
6817 /*
6818 * bridge_rtage:
6819 *
6820 * Perform an aging cycle.
6821 */
6822 static void
6823 bridge_rtage(struct bridge_softc *sc)
6824 {
6825 struct bridge_rtnode *brt, *nbrt;
6826 unsigned long now;
6827
6828 BRIDGE_LOCK_ASSERT_HELD(sc);
6829
6830 now = (unsigned long) net_uptime();
6831
6832 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6833 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6834 if (now >= brt->brt_expire) {
6835 bridge_rtnode_destroy(sc, brt);
6836 }
6837 }
6838 }
6839 if (sc->sc_mac_nat_bif != NULL) {
6840 bridge_mac_nat_age_entries(sc, now);
6841 }
6842 }
6843
6844 /*
6845 * bridge_rtflush:
6846 *
6847 * Remove all dynamic addresses from the bridge.
6848 */
6849 static void
6850 bridge_rtflush(struct bridge_softc *sc, int full)
6851 {
6852 struct bridge_rtnode *brt, *nbrt;
6853
6854 BRIDGE_LOCK_ASSERT_HELD(sc);
6855
6856 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6857 if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6858 bridge_rtnode_destroy(sc, brt);
6859 }
6860 }
6861 }
6862
6863 /*
6864 * bridge_rtdaddr:
6865 *
6866 * Remove an address from the table.
6867 */
6868 static int
6869 bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
6870 {
6871 struct bridge_rtnode *brt;
6872 int found = 0;
6873
6874 BRIDGE_LOCK_ASSERT_HELD(sc);
6875
6876 /*
6877 * If vlan is zero then we want to delete for all vlans so the lookup
6878 * may return more than one.
6879 */
6880 while ((brt = bridge_rtnode_lookup(sc, addr, vlan)) != NULL) {
6881 bridge_rtnode_destroy(sc, brt);
6882 found = 1;
6883 }
6884
6885 return found ? 0 : ENOENT;
6886 }
6887
6888 /*
6889 * bridge_rtdelete:
6890 *
6891 * Delete routes to a specific member interface.
6892 */
6893 static void
6894 bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int full)
6895 {
6896 struct bridge_rtnode *brt, *nbrt;
6897
6898 BRIDGE_LOCK_ASSERT_HELD(sc);
6899
6900 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6901 if (brt->brt_ifp == ifp && (full ||
6902 (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)) {
6903 bridge_rtnode_destroy(sc, brt);
6904 }
6905 }
6906 }
6907
6908 /*
6909 * bridge_rtable_init:
6910 *
6911 * Initialize the route table for this bridge.
6912 */
6913 static int
6914 bridge_rtable_init(struct bridge_softc *sc)
6915 {
6916 u_int32_t i;
6917
6918 sc->sc_rthash = kalloc_type(struct _bridge_rtnode_list,
6919 BRIDGE_RTHASH_SIZE, Z_WAITOK_ZERO_NOFAIL);
6920 sc->sc_rthash_size = BRIDGE_RTHASH_SIZE;
6921
6922 for (i = 0; i < sc->sc_rthash_size; i++) {
6923 LIST_INIT(&sc->sc_rthash[i]);
6924 }
6925
6926 sc->sc_rthash_key = RandomULong();
6927
6928 LIST_INIT(&sc->sc_rtlist);
6929
6930 return 0;
6931 }
6932
6933 /*
6934 * bridge_rthash_delayed_resize:
6935 *
6936 * Resize the routing table hash on a delayed thread call.
6937 */
6938 static void
6939 bridge_rthash_delayed_resize(struct bridge_softc *sc)
6940 {
6941 u_int32_t new_rthash_size = 0;
6942 u_int32_t old_rthash_size = 0;
6943 struct _bridge_rtnode_list *new_rthash = NULL;
6944 struct _bridge_rtnode_list *old_rthash = NULL;
6945 u_int32_t i;
6946 struct bridge_rtnode *brt;
6947 int error = 0;
6948
6949 BRIDGE_LOCK_ASSERT_HELD(sc);
6950
6951 /*
6952 * Four entries per hash bucket is our ideal load factor
6953 */
6954 if (sc->sc_brtcnt < sc->sc_rthash_size * 4) {
6955 goto out;
6956 }
6957
6958 /*
6959 * Doubling the number of hash buckets may be too simplistic
6960 * especially when facing a spike of new entries
6961 */
6962 new_rthash_size = sc->sc_rthash_size * 2;
6963
6964 sc->sc_flags |= SCF_RESIZING;
6965 BRIDGE_UNLOCK(sc);
6966
6967 new_rthash = kalloc_type(struct _bridge_rtnode_list, new_rthash_size,
6968 Z_WAITOK | Z_ZERO);
6969
6970 BRIDGE_LOCK(sc);
6971 sc->sc_flags &= ~SCF_RESIZING;
6972
6973 if (new_rthash == NULL) {
6974 error = ENOMEM;
6975 goto out;
6976 }
6977 if ((sc->sc_flags & SCF_DETACHING)) {
6978 error = ENODEV;
6979 goto out;
6980 }
6981 /*
6982 * Fail safe from here on
6983 */
6984 old_rthash = sc->sc_rthash;
6985 old_rthash_size = sc->sc_rthash_size;
6986 sc->sc_rthash = new_rthash;
6987 sc->sc_rthash_size = new_rthash_size;
6988
6989 /*
6990 * Get a new key to force entries to be shuffled around to reduce
6991 * the likelihood they will land in the same buckets
6992 */
6993 sc->sc_rthash_key = RandomULong();
6994
6995 for (i = 0; i < sc->sc_rthash_size; i++) {
6996 LIST_INIT(&sc->sc_rthash[i]);
6997 }
6998
6999 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
7000 LIST_REMOVE(brt, brt_hash);
7001 (void) bridge_rtnode_hash(sc, brt);
7002 }
7003 out:
7004 if (error == 0) {
7005 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
7006 "%s new size %u",
7007 sc->sc_ifp->if_xname, sc->sc_rthash_size);
7008 kfree_type(struct _bridge_rtnode_list, old_rthash_size, old_rthash);
7009 } else {
7010 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_RT_TABLE,
7011 "%s failed %d", sc->sc_ifp->if_xname, error);
7012 kfree_type(struct _bridge_rtnode_list, new_rthash_size, new_rthash);
7013 }
7014 }
7015
7016 /*
7017 * Resize the number of hash buckets based on the load factor
7018 * Currently only grow
7019 * Failing to resize the hash table is not fatal
7020 */
7021 static void
7022 bridge_rthash_resize(struct bridge_softc *sc)
7023 {
7024 BRIDGE_LOCK_ASSERT_HELD(sc);
7025
7026 if ((sc->sc_flags & SCF_DETACHING) || (sc->sc_flags & SCF_RESIZING)) {
7027 return;
7028 }
7029
7030 /*
7031 * Four entries per hash bucket is our ideal load factor
7032 */
7033 if (sc->sc_brtcnt < sc->sc_rthash_size * 4) {
7034 return;
7035 }
7036 /*
7037 * Hard limit on the size of the routing hash table
7038 */
7039 if (sc->sc_rthash_size >= bridge_rtable_hash_size_max) {
7040 return;
7041 }
7042
7043 sc->sc_resize_call.bdc_sc = sc;
7044 sc->sc_resize_call.bdc_func = bridge_rthash_delayed_resize;
7045 bridge_schedule_delayed_call(&sc->sc_resize_call);
7046 }
7047
7048 /*
7049 * bridge_rtable_fini:
7050 *
7051 * Deconstruct the route table for this bridge.
7052 */
7053 static void
7054 bridge_rtable_fini(struct bridge_softc *sc)
7055 {
7056 KASSERT(sc->sc_brtcnt == 0,
7057 ("%s: %d bridge routes referenced", __func__, sc->sc_brtcnt));
7058 kfree_type(struct _bridge_rtnode_list, sc->sc_rthash_size,
7059 sc->sc_rthash);
7060 sc->sc_rthash = NULL;
7061 sc->sc_rthash_size = 0;
7062 }
7063
7064 /*
7065 * The following hash function is adapted from "Hash Functions" by Bob Jenkins
7066 * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
7067 */
7068 #define mix(a, b, c) \
7069 do { \
7070 a -= b; a -= c; a ^= (c >> 13); \
7071 b -= c; b -= a; b ^= (a << 8); \
7072 c -= a; c -= b; c ^= (b >> 13); \
7073 a -= b; a -= c; a ^= (c >> 12); \
7074 b -= c; b -= a; b ^= (a << 16); \
7075 c -= a; c -= b; c ^= (b >> 5); \
7076 a -= b; a -= c; a ^= (c >> 3); \
7077 b -= c; b -= a; b ^= (a << 10); \
7078 c -= a; c -= b; c ^= (b >> 15); \
7079 } while ( /*CONSTCOND*/ 0)
7080
7081 static __inline uint32_t
7082 bridge_rthash(struct bridge_softc *sc, const uint8_t *addr)
7083 {
7084 uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
7085
7086 b += addr[5] << 8;
7087 b += addr[4];
7088 a += addr[3] << 24;
7089 a += addr[2] << 16;
7090 a += addr[1] << 8;
7091 a += addr[0];
7092
7093 mix(a, b, c);
7094
7095 return c & BRIDGE_RTHASH_MASK(sc);
7096 }
7097
7098 #undef mix
7099
7100 static int
7101 bridge_rtnode_addr_cmp(const uint8_t *a, const uint8_t *b)
7102 {
7103 int i, d;
7104
7105 for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) {
7106 d = ((int)a[i]) - ((int)b[i]);
7107 }
7108
7109 return d;
7110 }
7111
7112 /*
7113 * bridge_rtnode_lookup:
7114 *
7115 * Look up a bridge route node for the specified destination. Compare the
7116 * vlan id or if zero then just return the first match.
7117 */
7118 static struct bridge_rtnode *
7119 bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr,
7120 uint16_t vlan)
7121 {
7122 struct bridge_rtnode *brt;
7123 uint32_t hash;
7124 int dir;
7125
7126 BRIDGE_LOCK_ASSERT_HELD(sc);
7127
7128 hash = bridge_rthash(sc, addr);
7129 LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) {
7130 dir = bridge_rtnode_addr_cmp(addr, brt->brt_addr);
7131 if (dir == 0 && (brt->brt_vlan == vlan || vlan == 0)) {
7132 return brt;
7133 }
7134 if (dir > 0) {
7135 return NULL;
7136 }
7137 }
7138
7139 return NULL;
7140 }
7141
7142 /*
7143 * bridge_rtnode_hash:
7144 *
7145 * Insert the specified bridge node into the route hash table.
7146 * This is used when adding a new node or to rehash when resizing
7147 * the hash table
7148 */
7149 static int
7150 bridge_rtnode_hash(struct bridge_softc *sc, struct bridge_rtnode *brt)
7151 {
7152 struct bridge_rtnode *lbrt;
7153 uint32_t hash;
7154 int dir;
7155
7156 BRIDGE_LOCK_ASSERT_HELD(sc);
7157
7158 hash = bridge_rthash(sc, brt->brt_addr);
7159
7160 lbrt = LIST_FIRST(&sc->sc_rthash[hash]);
7161 if (lbrt == NULL) {
7162 LIST_INSERT_HEAD(&sc->sc_rthash[hash], brt, brt_hash);
7163 goto out;
7164 }
7165
7166 do {
7167 dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr);
7168 if (dir == 0 && brt->brt_vlan == lbrt->brt_vlan) {
7169 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
7170 "%s EEXIST %02x:%02x:%02x:%02x:%02x:%02x",
7171 sc->sc_ifp->if_xname,
7172 brt->brt_addr[0], brt->brt_addr[1],
7173 brt->brt_addr[2], brt->brt_addr[3],
7174 brt->brt_addr[4], brt->brt_addr[5]);
7175 return EEXIST;
7176 }
7177 if (dir > 0) {
7178 LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
7179 goto out;
7180 }
7181 if (LIST_NEXT(lbrt, brt_hash) == NULL) {
7182 LIST_INSERT_AFTER(lbrt, brt, brt_hash);
7183 goto out;
7184 }
7185 lbrt = LIST_NEXT(lbrt, brt_hash);
7186 } while (lbrt != NULL);
7187
7188 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
7189 "%s impossible %02x:%02x:%02x:%02x:%02x:%02x",
7190 sc->sc_ifp->if_xname,
7191 brt->brt_addr[0], brt->brt_addr[1], brt->brt_addr[2],
7192 brt->brt_addr[3], brt->brt_addr[4], brt->brt_addr[5]);
7193 out:
7194 return 0;
7195 }
7196
7197 /*
7198 * bridge_rtnode_insert:
7199 *
7200 * Insert the specified bridge node into the route table. We
7201 * assume the entry is not already in the table.
7202 */
7203 static int
7204 bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
7205 {
7206 int error;
7207
7208 error = bridge_rtnode_hash(sc, brt);
7209 if (error != 0) {
7210 return error;
7211 }
7212
7213 LIST_INSERT_HEAD(&sc->sc_rtlist, brt, brt_list);
7214 sc->sc_brtcnt++;
7215
7216 bridge_rthash_resize(sc);
7217
7218 return 0;
7219 }
7220
7221 /*
7222 * bridge_rtnode_destroy:
7223 *
7224 * Destroy a bridge rtnode.
7225 */
7226 static void
7227 bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
7228 {
7229 BRIDGE_LOCK_ASSERT_HELD(sc);
7230
7231 LIST_REMOVE(brt, brt_hash);
7232
7233 LIST_REMOVE(brt, brt_list);
7234 sc->sc_brtcnt--;
7235 brt->brt_dst->bif_addrcnt--;
7236 zfree(bridge_rtnode_pool, brt);
7237 }
7238
7239 #if BRIDGESTP
7240 /*
7241 * bridge_rtable_expire:
7242 *
7243 * Set the expiry time for all routes on an interface.
7244 */
7245 static void
7246 bridge_rtable_expire(struct ifnet *ifp, int age)
7247 {
7248 struct bridge_softc *sc = ifp->if_bridge;
7249 struct bridge_rtnode *brt;
7250
7251 BRIDGE_LOCK(sc);
7252
7253 /*
7254 * If the age is zero then flush, otherwise set all the expiry times to
7255 * age for the interface
7256 */
7257 if (age == 0) {
7258 bridge_rtdelete(sc, ifp, IFBF_FLUSHDYN);
7259 } else {
7260 unsigned long now;
7261
7262 now = (unsigned long) net_uptime();
7263
7264 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
7265 /* Cap the expiry time to 'age' */
7266 if (brt->brt_ifp == ifp &&
7267 brt->brt_expire > now + age &&
7268 (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
7269 brt->brt_expire = now + age;
7270 }
7271 }
7272 }
7273 BRIDGE_UNLOCK(sc);
7274 }
7275
7276 /*
7277 * bridge_state_change:
7278 *
7279 * Callback from the bridgestp code when a port changes states.
7280 */
7281 static void
7282 bridge_state_change(struct ifnet *ifp, int state)
7283 {
7284 struct bridge_softc *sc = ifp->if_bridge;
7285 static const char *stpstates[] = {
7286 "disabled",
7287 "listening",
7288 "learning",
7289 "forwarding",
7290 "blocking",
7291 "discarding"
7292 };
7293
7294 if (log_stp) {
7295 log(LOG_NOTICE, "%s: state changed to %s on %s",
7296 sc->sc_ifp->if_xname,
7297 stpstates[state], ifp->if_xname);
7298 }
7299 }
7300 #endif /* BRIDGESTP */
7301
7302 /*
7303 * bridge_set_bpf_tap:
7304 *
7305 * Sets ups the BPF callbacks.
7306 */
7307 static errno_t
7308 bridge_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func bpf_callback)
7309 {
7310 struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7311
7312 /* TBD locking */
7313 if (sc == NULL || (sc->sc_flags & SCF_DETACHING)) {
7314 return ENODEV;
7315 }
7316 switch (mode) {
7317 case BPF_TAP_DISABLE:
7318 sc->sc_bpf_input = sc->sc_bpf_output = NULL;
7319 break;
7320
7321 case BPF_TAP_INPUT:
7322 sc->sc_bpf_input = bpf_callback;
7323 break;
7324
7325 case BPF_TAP_OUTPUT:
7326 sc->sc_bpf_output = bpf_callback;
7327 break;
7328
7329 case BPF_TAP_INPUT_OUTPUT:
7330 sc->sc_bpf_input = sc->sc_bpf_output = bpf_callback;
7331 break;
7332
7333 default:
7334 break;
7335 }
7336
7337 return 0;
7338 }
7339
7340 /*
7341 * bridge_detach:
7342 *
7343 * Callback when interface has been detached.
7344 */
7345 static void
7346 bridge_detach(ifnet_t ifp)
7347 {
7348 struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7349
7350 #if BRIDGESTP
7351 bstp_detach(&sc->sc_stp);
7352 #endif /* BRIDGESTP */
7353
7354 /* Tear down the routing table. */
7355 bridge_rtable_fini(sc);
7356
7357 lck_mtx_lock(&bridge_list_mtx);
7358 LIST_REMOVE(sc, sc_list);
7359 lck_mtx_unlock(&bridge_list_mtx);
7360
7361 ifnet_release(ifp);
7362
7363 lck_mtx_destroy(&sc->sc_mtx, &bridge_lock_grp);
7364 kfree_type(struct bridge_softc, sc);
7365 }
7366
7367 /*
7368 * bridge_bpf_input:
7369 *
7370 * Invoke the input BPF callback if enabled
7371 */
7372 static errno_t
7373 bridge_bpf_input(ifnet_t ifp, struct mbuf *m, const char * func, int line)
7374 {
7375 struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7376 bpf_packet_func input_func = sc->sc_bpf_input;
7377
7378 if (input_func != NULL) {
7379 if (mbuf_pkthdr_rcvif(m) != ifp) {
7380 BRIDGE_LOG(LOG_NOTICE, 0,
7381 "%s.%d: rcvif: 0x%llx != ifp 0x%llx", func, line,
7382 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_rcvif(m)),
7383 (uint64_t)VM_KERNEL_ADDRPERM(ifp));
7384 }
7385 (*input_func)(ifp, m);
7386 }
7387 return 0;
7388 }
7389
7390 /*
7391 * bridge_bpf_output:
7392 *
7393 * Invoke the output BPF callback if enabled
7394 */
7395 static errno_t
7396 bridge_bpf_output(ifnet_t ifp, struct mbuf *m)
7397 {
7398 struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7399 bpf_packet_func output_func = sc->sc_bpf_output;
7400
7401 if (output_func != NULL) {
7402 (*output_func)(ifp, m);
7403 }
7404 return 0;
7405 }
7406
7407 /*
7408 * bridge_link_event:
7409 *
7410 * Report a data link event on an interface
7411 */
7412 static void
7413 bridge_link_event(struct ifnet *ifp, u_int32_t event_code)
7414 {
7415 struct event {
7416 u_int32_t ifnet_family;
7417 u_int32_t unit;
7418 char if_name[IFNAMSIZ];
7419 };
7420 _Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
7421 struct kern_event_msg *header = (struct kern_event_msg*)message;
7422 struct event *data = (struct event *)(header + 1);
7423
7424 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
7425 "%s event_code %u - %s", ifp->if_xname,
7426 event_code, dlil_kev_dl_code_str(event_code));
7427 header->total_size = sizeof(message);
7428 header->vendor_code = KEV_VENDOR_APPLE;
7429 header->kev_class = KEV_NETWORK_CLASS;
7430 header->kev_subclass = KEV_DL_SUBCLASS;
7431 header->event_code = event_code;
7432 data->ifnet_family = ifnet_family(ifp);
7433 data->unit = (u_int32_t)ifnet_unit(ifp);
7434 strlcpy(data->if_name, ifnet_name(ifp), IFNAMSIZ);
7435 ifnet_event(ifp, header);
7436 }
7437
7438 #define BRIDGE_HF_DROP(reason, func, line) { \
7439 bridge_hostfilter_stats.reason++; \
7440 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_HOSTFILTER, \
7441 "%s.%d" #reason, func, line); \
7442 error = EINVAL; \
7443 }
7444
7445 /*
7446 * Make sure this is a DHCP or Bootp request that match the host filter
7447 */
7448 static int
7449 bridge_dhcp_filter(struct bridge_iflist *bif, struct mbuf *m, size_t offset)
7450 {
7451 int error = EINVAL;
7452 struct dhcp dhcp;
7453
7454 /*
7455 * Note: We use the dhcp structure because bootp structure definition
7456 * is larger and some vendors do not pad the request
7457 */
7458 error = mbuf_copydata(m, offset, sizeof(struct dhcp), &dhcp);
7459 if (error != 0) {
7460 BRIDGE_HF_DROP(brhf_dhcp_too_small, __func__, __LINE__);
7461 goto done;
7462 }
7463 if (dhcp.dp_op != BOOTREQUEST) {
7464 BRIDGE_HF_DROP(brhf_dhcp_bad_op, __func__, __LINE__);
7465 goto done;
7466 }
7467 /*
7468 * The hardware address must be an exact match
7469 */
7470 if (dhcp.dp_htype != ARPHRD_ETHER) {
7471 BRIDGE_HF_DROP(brhf_dhcp_bad_htype, __func__, __LINE__);
7472 goto done;
7473 }
7474 if (dhcp.dp_hlen != ETHER_ADDR_LEN) {
7475 BRIDGE_HF_DROP(brhf_dhcp_bad_hlen, __func__, __LINE__);
7476 goto done;
7477 }
7478 if (bcmp(dhcp.dp_chaddr, bif->bif_hf_hwsrc,
7479 ETHER_ADDR_LEN) != 0) {
7480 BRIDGE_HF_DROP(brhf_dhcp_bad_chaddr, __func__, __LINE__);
7481 goto done;
7482 }
7483 /*
7484 * Client address must match the host address or be not specified
7485 */
7486 if (dhcp.dp_ciaddr.s_addr != bif->bif_hf_ipsrc.s_addr &&
7487 dhcp.dp_ciaddr.s_addr != INADDR_ANY) {
7488 BRIDGE_HF_DROP(brhf_dhcp_bad_ciaddr, __func__, __LINE__);
7489 goto done;
7490 }
7491 error = 0;
7492 done:
7493 return error;
7494 }
7495
7496 static int
7497 bridge_host_filter(struct bridge_iflist *bif, mbuf_t *data)
7498 {
7499 int error = EINVAL;
7500 struct ether_header *eh;
7501 static struct in_addr inaddr_any = { .s_addr = INADDR_ANY };
7502 mbuf_t m = *data;
7503
7504 eh = mtod(m, struct ether_header *);
7505
7506 /*
7507 * Restrict the source hardware address
7508 */
7509 if ((bif->bif_flags & BIFF_HF_HWSRC) != 0 &&
7510 bcmp(eh->ether_shost, bif->bif_hf_hwsrc,
7511 ETHER_ADDR_LEN) != 0) {
7512 BRIDGE_HF_DROP(brhf_bad_ether_srchw_addr, __func__, __LINE__);
7513 goto done;
7514 }
7515
7516 /*
7517 * Restrict Ethernet protocols to ARP and IP/IPv6
7518 */
7519 if (eh->ether_type == htons(ETHERTYPE_ARP)) {
7520 struct ether_arp *ea;
7521 size_t minlen = sizeof(struct ether_header) +
7522 sizeof(struct ether_arp);
7523
7524 /*
7525 * Make the Ethernet and ARP headers contiguous
7526 */
7527 if (mbuf_pkthdr_len(m) < minlen) {
7528 BRIDGE_HF_DROP(brhf_arp_too_small, __func__, __LINE__);
7529 goto done;
7530 }
7531 if (mbuf_len(m) < minlen && mbuf_pullup(data, minlen) != 0) {
7532 BRIDGE_HF_DROP(brhf_arp_pullup_failed,
7533 __func__, __LINE__);
7534 goto done;
7535 }
7536 m = *data;
7537
7538 /*
7539 * Verify this is an ethernet/ip arp
7540 */
7541 eh = mtod(m, struct ether_header *);
7542 ea = (struct ether_arp *)(eh + 1);
7543 if (ea->arp_hrd != htons(ARPHRD_ETHER)) {
7544 BRIDGE_HF_DROP(brhf_arp_bad_hw_type,
7545 __func__, __LINE__);
7546 goto done;
7547 }
7548 if (ea->arp_pro != htons(ETHERTYPE_IP)) {
7549 BRIDGE_HF_DROP(brhf_arp_bad_pro_type,
7550 __func__, __LINE__);
7551 goto done;
7552 }
7553 /*
7554 * Verify the address lengths are correct
7555 */
7556 if (ea->arp_hln != ETHER_ADDR_LEN) {
7557 BRIDGE_HF_DROP(brhf_arp_bad_hw_len, __func__, __LINE__);
7558 goto done;
7559 }
7560 if (ea->arp_pln != sizeof(struct in_addr)) {
7561 BRIDGE_HF_DROP(brhf_arp_bad_pro_len,
7562 __func__, __LINE__);
7563 goto done;
7564 }
7565 /*
7566 * Allow only ARP request or ARP reply
7567 */
7568 if (ea->arp_op != htons(ARPOP_REQUEST) &&
7569 ea->arp_op != htons(ARPOP_REPLY)) {
7570 BRIDGE_HF_DROP(brhf_arp_bad_op, __func__, __LINE__);
7571 goto done;
7572 }
7573 if ((bif->bif_flags & BIFF_HF_HWSRC) != 0) {
7574 /*
7575 * Verify source hardware address matches
7576 */
7577 if (bcmp(ea->arp_sha, bif->bif_hf_hwsrc,
7578 ETHER_ADDR_LEN) != 0) {
7579 BRIDGE_HF_DROP(brhf_arp_bad_sha, __func__, __LINE__);
7580 goto done;
7581 }
7582 }
7583 if ((bif->bif_flags & BIFF_HF_IPSRC) != 0) {
7584 /*
7585 * Verify source protocol address:
7586 * May be null for an ARP probe
7587 */
7588 if (bcmp(ea->arp_spa, &bif->bif_hf_ipsrc.s_addr,
7589 sizeof(struct in_addr)) != 0 &&
7590 bcmp(ea->arp_spa, &inaddr_any,
7591 sizeof(struct in_addr)) != 0) {
7592 BRIDGE_HF_DROP(brhf_arp_bad_spa, __func__, __LINE__);
7593 goto done;
7594 }
7595 }
7596 bridge_hostfilter_stats.brhf_arp_ok += 1;
7597 error = 0;
7598 } else if (eh->ether_type == htons(ETHERTYPE_IP)) {
7599 size_t minlen = sizeof(struct ether_header) + sizeof(struct ip);
7600 struct ip iphdr;
7601 size_t offset;
7602
7603 /*
7604 * Make the Ethernet and IP headers contiguous
7605 */
7606 if (mbuf_pkthdr_len(m) < minlen) {
7607 BRIDGE_HF_DROP(brhf_ip_too_small, __func__, __LINE__);
7608 goto done;
7609 }
7610 offset = sizeof(struct ether_header);
7611 error = mbuf_copydata(m, offset, sizeof(struct ip), &iphdr);
7612 if (error != 0) {
7613 BRIDGE_HF_DROP(brhf_ip_too_small, __func__, __LINE__);
7614 goto done;
7615 }
7616 if ((bif->bif_flags & BIFF_HF_IPSRC) != 0) {
7617 /*
7618 * Verify the source IP address
7619 */
7620 if (iphdr.ip_p == IPPROTO_UDP) {
7621 struct udphdr udp;
7622
7623 minlen += sizeof(struct udphdr);
7624 if (mbuf_pkthdr_len(m) < minlen) {
7625 BRIDGE_HF_DROP(brhf_ip_too_small,
7626 __func__, __LINE__);
7627 goto done;
7628 }
7629
7630 /*
7631 * Allow all zero addresses for DHCP requests
7632 */
7633 if (iphdr.ip_src.s_addr != bif->bif_hf_ipsrc.s_addr &&
7634 iphdr.ip_src.s_addr != INADDR_ANY) {
7635 BRIDGE_HF_DROP(brhf_ip_bad_srcaddr,
7636 __func__, __LINE__);
7637 goto done;
7638 }
7639 offset = sizeof(struct ether_header) +
7640 (IP_VHL_HL(iphdr.ip_vhl) << 2);
7641 error = mbuf_copydata(m, offset,
7642 sizeof(struct udphdr), &udp);
7643 if (error != 0) {
7644 BRIDGE_HF_DROP(brhf_ip_too_small,
7645 __func__, __LINE__);
7646 goto done;
7647 }
7648 /*
7649 * Either it's a Bootp/DHCP packet that we like or
7650 * it's a UDP packet from the host IP as source address
7651 */
7652 if (udp.uh_sport == htons(IPPORT_BOOTPC) &&
7653 udp.uh_dport == htons(IPPORT_BOOTPS)) {
7654 minlen += sizeof(struct dhcp);
7655 if (mbuf_pkthdr_len(m) < minlen) {
7656 BRIDGE_HF_DROP(brhf_ip_too_small,
7657 __func__, __LINE__);
7658 goto done;
7659 }
7660 offset += sizeof(struct udphdr);
7661 error = bridge_dhcp_filter(bif, m, offset);
7662 if (error != 0) {
7663 goto done;
7664 }
7665 } else if (iphdr.ip_src.s_addr == INADDR_ANY) {
7666 BRIDGE_HF_DROP(brhf_ip_bad_srcaddr,
7667 __func__, __LINE__);
7668 goto done;
7669 }
7670 } else if (iphdr.ip_src.s_addr != bif->bif_hf_ipsrc.s_addr) {
7671 assert(bif->bif_hf_ipsrc.s_addr != INADDR_ANY);
7672 BRIDGE_HF_DROP(brhf_ip_bad_srcaddr, __func__, __LINE__);
7673 goto done;
7674 }
7675 }
7676 /*
7677 * Allow only boring IP protocols
7678 */
7679 if (iphdr.ip_p != IPPROTO_TCP &&
7680 iphdr.ip_p != IPPROTO_UDP &&
7681 iphdr.ip_p != IPPROTO_ICMP &&
7682 iphdr.ip_p != IPPROTO_IGMP) {
7683 BRIDGE_HF_DROP(brhf_ip_bad_proto, __func__, __LINE__);
7684 goto done;
7685 }
7686 bridge_hostfilter_stats.brhf_ip_ok += 1;
7687 error = 0;
7688 } else if (eh->ether_type == htons(ETHERTYPE_IPV6)) {
7689 size_t minlen = sizeof(struct ether_header) + sizeof(struct ip6_hdr);
7690 struct ip6_hdr ip6hdr;
7691 size_t offset;
7692
7693 /*
7694 * Make the Ethernet and IP headers contiguous
7695 */
7696 if (mbuf_pkthdr_len(m) < minlen) {
7697 BRIDGE_HF_DROP(brhf_ip_too_small, __func__, __LINE__);
7698 goto done;
7699 }
7700 offset = sizeof(struct ether_header);
7701 error = mbuf_copydata(m, offset, sizeof(struct ip6_hdr), &ip6hdr);
7702 if (error != 0) {
7703 BRIDGE_HF_DROP(brhf_ip_too_small, __func__, __LINE__);
7704 goto done;
7705 }
7706 /*
7707 * Allow only boring IPv6 protocols
7708 */
7709 if (ip6hdr.ip6_nxt != IPPROTO_TCP &&
7710 ip6hdr.ip6_nxt != IPPROTO_UDP &&
7711 ip6hdr.ip6_nxt != IPPROTO_ICMPV6) {
7712 BRIDGE_HF_DROP(brhf_ip_bad_proto, __func__, __LINE__);
7713 goto done;
7714 }
7715 bridge_hostfilter_stats.brhf_ip_ok += 1;
7716 error = 0;
7717 } else {
7718 BRIDGE_HF_DROP(brhf_bad_ether_type, __func__, __LINE__);
7719 goto done;
7720 }
7721 done:
7722 if (error != 0) {
7723 if (BRIDGE_DBGF_ENABLED(BR_DBGF_HOSTFILTER)) {
7724 if (m) {
7725 brlog_mbuf_data(m, 0,
7726 sizeof(struct ether_header) +
7727 sizeof(struct ip));
7728 }
7729 }
7730
7731 if (m != NULL) {
7732 m_freem(m);
7733 }
7734 }
7735 return error;
7736 }
7737
7738 /*
7739 * MAC NAT
7740 */
7741
7742 static errno_t
7743 bridge_mac_nat_enable(struct bridge_softc *sc, struct bridge_iflist *bif)
7744 {
7745 errno_t error = 0;
7746
7747 BRIDGE_LOCK_ASSERT_HELD(sc);
7748
7749 if (IFNET_IS_VMNET(bif->bif_ifp)) {
7750 error = EINVAL;
7751 goto done;
7752 }
7753 if (sc->sc_mac_nat_bif != NULL) {
7754 if (sc->sc_mac_nat_bif != bif) {
7755 error = EBUSY;
7756 }
7757 goto done;
7758 }
7759 sc->sc_mac_nat_bif = bif;
7760 bif->bif_ifflags |= IFBIF_MAC_NAT;
7761 bridge_mac_nat_populate_entries(sc);
7762
7763 done:
7764 return error;
7765 }
7766
7767 static void
7768 bridge_mac_nat_disable(struct bridge_softc *sc)
7769 {
7770 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7771
7772 assert(mac_nat_bif != NULL);
7773 bridge_mac_nat_flush_entries(sc, mac_nat_bif);
7774 mac_nat_bif->bif_ifflags &= ~IFBIF_MAC_NAT;
7775 sc->sc_mac_nat_bif = NULL;
7776 return;
7777 }
7778
7779 static void
7780 mac_nat_entry_print2(struct mac_nat_entry *mne,
7781 char *ifname, const char *msg1, const char *msg2)
7782 {
7783 int af;
7784 char etopbuf[24];
7785 char ntopbuf[MAX_IPv6_STR_LEN];
7786 const char *space;
7787
7788 af = ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) ? AF_INET6 : AF_INET;
7789 ether_ntop(etopbuf, sizeof(etopbuf), mne->mne_mac);
7790 (void)inet_ntop(af, &mne->mne_u, ntopbuf, sizeof(ntopbuf));
7791 if (msg2 == NULL) {
7792 msg2 = "";
7793 space = "";
7794 } else {
7795 space = " ";
7796 }
7797 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7798 "%s %s%s%s %p (%s, %s, %s)",
7799 ifname, msg1, space, msg2, mne, mne->mne_bif->bif_ifp->if_xname,
7800 ntopbuf, etopbuf);
7801 }
7802
7803 static void
7804 mac_nat_entry_print(struct mac_nat_entry *mne,
7805 char *ifname, const char *msg)
7806 {
7807 mac_nat_entry_print2(mne, ifname, msg, NULL);
7808 }
7809
7810 static struct mac_nat_entry *
7811 bridge_lookup_mac_nat_entry(struct bridge_softc *sc, int af, void * ip)
7812 {
7813 struct mac_nat_entry *mne;
7814 struct mac_nat_entry *ret_mne = NULL;
7815
7816 if (af == AF_INET) {
7817 in_addr_t s_addr = ((struct in_addr *)ip)->s_addr;
7818
7819 LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
7820 if (mne->mne_ip.s_addr == s_addr) {
7821 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7822 mac_nat_entry_print(mne, sc->sc_if_xname,
7823 "found");
7824 }
7825 ret_mne = mne;
7826 break;
7827 }
7828 }
7829 } else {
7830 const struct in6_addr *ip6 = (const struct in6_addr *)ip;
7831
7832 LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
7833 if (IN6_ARE_ADDR_EQUAL(&mne->mne_ip6, ip6)) {
7834 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7835 mac_nat_entry_print(mne, sc->sc_if_xname,
7836 "found");
7837 }
7838 ret_mne = mne;
7839 break;
7840 }
7841 }
7842 }
7843 return ret_mne;
7844 }
7845
7846 static void
7847 bridge_destroy_mac_nat_entry(struct bridge_softc *sc,
7848 struct mac_nat_entry *mne, const char *reason)
7849 {
7850 LIST_REMOVE(mne, mne_list);
7851 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7852 mac_nat_entry_print(mne, sc->sc_if_xname, reason);
7853 }
7854 zfree(bridge_mne_pool, mne);
7855 sc->sc_mne_count--;
7856 }
7857
7858 static struct mac_nat_entry *
7859 bridge_create_mac_nat_entry(struct bridge_softc *sc,
7860 struct bridge_iflist *bif, int af, const void *ip, uint8_t *eaddr)
7861 {
7862 struct mac_nat_entry_list *list;
7863 struct mac_nat_entry *mne;
7864
7865 if (sc->sc_mne_count >= sc->sc_mne_max) {
7866 sc->sc_mne_allocation_failures++;
7867 return NULL;
7868 }
7869 mne = zalloc_noblock(bridge_mne_pool);
7870 if (mne == NULL) {
7871 sc->sc_mne_allocation_failures++;
7872 return NULL;
7873 }
7874 sc->sc_mne_count++;
7875 bzero(mne, sizeof(*mne));
7876 bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
7877 mne->mne_bif = bif;
7878 if (af == AF_INET) {
7879 bcopy(ip, &mne->mne_ip, sizeof(mne->mne_ip));
7880 list = &sc->sc_mne_list;
7881 } else {
7882 bcopy(ip, &mne->mne_ip6, sizeof(mne->mne_ip6));
7883 mne->mne_flags |= MNE_FLAGS_IPV6;
7884 list = &sc->sc_mne_list_v6;
7885 }
7886 LIST_INSERT_HEAD(list, mne, mne_list);
7887 mne->mne_expire = (unsigned long)net_uptime() + sc->sc_brttimeout;
7888 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7889 mac_nat_entry_print(mne, sc->sc_if_xname, "created");
7890 }
7891 return mne;
7892 }
7893
7894 static struct mac_nat_entry *
7895 bridge_update_mac_nat_entry(struct bridge_softc *sc,
7896 struct bridge_iflist *bif, int af, void *ip, uint8_t *eaddr)
7897 {
7898 struct mac_nat_entry *mne;
7899
7900 mne = bridge_lookup_mac_nat_entry(sc, af, ip);
7901 if (mne != NULL) {
7902 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7903
7904 if (mne->mne_bif == mac_nat_bif) {
7905 /* the MAC NAT interface takes precedence */
7906 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7907 if (mne->mne_bif != bif) {
7908 mac_nat_entry_print2(mne,
7909 sc->sc_if_xname, "reject",
7910 bif->bif_ifp->if_xname);
7911 }
7912 }
7913 } else if (mne->mne_bif != bif) {
7914 const char *old_if = mne->mne_bif->bif_ifp->if_xname;
7915
7916 mne->mne_bif = bif;
7917 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7918 mac_nat_entry_print2(mne,
7919 sc->sc_if_xname, "replaced",
7920 old_if);
7921 }
7922 bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
7923 }
7924 mne->mne_expire = (unsigned long)net_uptime() +
7925 sc->sc_brttimeout;
7926 } else {
7927 mne = bridge_create_mac_nat_entry(sc, bif, af, ip, eaddr);
7928 }
7929 return mne;
7930 }
7931
7932 static void
7933 bridge_mac_nat_flush_entries_common(struct bridge_softc *sc,
7934 struct mac_nat_entry_list *list, struct bridge_iflist *bif)
7935 {
7936 struct mac_nat_entry *mne;
7937 struct mac_nat_entry *tmne;
7938
7939 LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
7940 if (bif != NULL && mne->mne_bif != bif) {
7941 continue;
7942 }
7943 bridge_destroy_mac_nat_entry(sc, mne, "flushed");
7944 }
7945 }
7946
7947 /*
7948 * bridge_mac_nat_flush_entries:
7949 *
7950 * Flush MAC NAT entries for the specified member. Flush all entries if
7951 * the member is the one that requires MAC NAT, otherwise just flush the
7952 * ones for the specified member.
7953 */
7954 static void
7955 bridge_mac_nat_flush_entries(struct bridge_softc *sc, struct bridge_iflist * bif)
7956 {
7957 struct bridge_iflist *flush_bif;
7958
7959 flush_bif = (bif == sc->sc_mac_nat_bif) ? NULL : bif;
7960 bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list, flush_bif);
7961 bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list_v6, flush_bif);
7962 }
7963
7964 static void
7965 bridge_mac_nat_populate_entries(struct bridge_softc *sc)
7966 {
7967 errno_t error;
7968 ifnet_t ifp;
7969 ifaddr_t *list;
7970 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7971
7972 assert(mac_nat_bif != NULL);
7973 ifp = mac_nat_bif->bif_ifp;
7974 error = ifnet_get_address_list(ifp, &list);
7975 if (error != 0) {
7976 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7977 "ifnet_get_address_list(%s) failed %d",
7978 ifp->if_xname, error);
7979 return;
7980 }
7981 for (ifaddr_t *scan = list; *scan != NULL; scan++) {
7982 sa_family_t af;
7983 void *ip;
7984
7985 union {
7986 struct sockaddr sa;
7987 struct sockaddr_in sin;
7988 struct sockaddr_in6 sin6;
7989 } u;
7990 af = ifaddr_address_family(*scan);
7991 switch (af) {
7992 case AF_INET:
7993 case AF_INET6:
7994 error = ifaddr_address(*scan, &u.sa, sizeof(u));
7995 if (error != 0) {
7996 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7997 "ifaddr_address failed %d",
7998 error);
7999 break;
8000 }
8001 if (af == AF_INET) {
8002 ip = (void *)&u.sin.sin_addr;
8003 } else {
8004 if (IN6_IS_ADDR_LINKLOCAL(&u.sin6.sin6_addr)) {
8005 /* remove scope ID */
8006 u.sin6.sin6_addr.s6_addr16[1] = 0;
8007 }
8008 ip = (void *)&u.sin6.sin6_addr;
8009 }
8010 bridge_create_mac_nat_entry(sc, mac_nat_bif, af, ip,
8011 (uint8_t *)IF_LLADDR(ifp));
8012 break;
8013 default:
8014 break;
8015 }
8016 }
8017 ifnet_free_address_list(list);
8018 return;
8019 }
8020
8021 static void
8022 bridge_mac_nat_age_entries_common(struct bridge_softc *sc,
8023 struct mac_nat_entry_list *list, unsigned long now)
8024 {
8025 struct mac_nat_entry *mne;
8026 struct mac_nat_entry *tmne;
8027
8028 LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
8029 if (now >= mne->mne_expire) {
8030 bridge_destroy_mac_nat_entry(sc, mne, "aged out");
8031 }
8032 }
8033 }
8034
8035 static void
8036 bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long now)
8037 {
8038 if (sc->sc_mac_nat_bif == NULL) {
8039 return;
8040 }
8041 bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list, now);
8042 bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list_v6, now);
8043 }
8044
8045 static const char *
8046 get_in_out_string(boolean_t is_output)
8047 {
8048 return is_output ? "OUT" : "IN";
8049 }
8050
8051 /*
8052 * is_valid_arp_packet:
8053 * Verify that this is a valid ARP packet.
8054 *
8055 * Returns TRUE if the packet is valid, FALSE otherwise.
8056 */
8057 static boolean_t
8058 is_valid_arp_packet(mbuf_t *data, boolean_t is_output,
8059 struct ether_header **eh_p, struct ether_arp **ea_p)
8060 {
8061 struct ether_arp *ea;
8062 struct ether_header *eh;
8063 size_t minlen = sizeof(struct ether_header) + sizeof(struct ether_arp);
8064 boolean_t is_valid = FALSE;
8065 int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
8066
8067 if (mbuf_pkthdr_len(*data) < minlen) {
8068 BRIDGE_LOG(LOG_DEBUG, flags,
8069 "ARP %s short frame %lu < %lu",
8070 get_in_out_string(is_output),
8071 mbuf_pkthdr_len(*data), minlen);
8072 goto done;
8073 }
8074 if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
8075 BRIDGE_LOG(LOG_DEBUG, flags,
8076 "ARP %s size %lu mbuf_pullup fail",
8077 get_in_out_string(is_output),
8078 minlen);
8079 *data = NULL;
8080 goto done;
8081 }
8082
8083 /* validate ARP packet */
8084 eh = mtod(*data, struct ether_header *);
8085 ea = (struct ether_arp *)(eh + 1);
8086 if (ntohs(ea->arp_hrd) != ARPHRD_ETHER) {
8087 BRIDGE_LOG(LOG_DEBUG, flags,
8088 "ARP %s htype not ethernet",
8089 get_in_out_string(is_output));
8090 goto done;
8091 }
8092 if (ea->arp_hln != ETHER_ADDR_LEN) {
8093 BRIDGE_LOG(LOG_DEBUG, flags,
8094 "ARP %s hlen not ethernet",
8095 get_in_out_string(is_output));
8096 goto done;
8097 }
8098 if (ntohs(ea->arp_pro) != ETHERTYPE_IP) {
8099 BRIDGE_LOG(LOG_DEBUG, flags,
8100 "ARP %s ptype not IP",
8101 get_in_out_string(is_output));
8102 goto done;
8103 }
8104 if (ea->arp_pln != sizeof(struct in_addr)) {
8105 BRIDGE_LOG(LOG_DEBUG, flags,
8106 "ARP %s plen not IP",
8107 get_in_out_string(is_output));
8108 goto done;
8109 }
8110 is_valid = TRUE;
8111 *ea_p = ea;
8112 *eh_p = eh;
8113 done:
8114 return is_valid;
8115 }
8116
8117 static struct mac_nat_entry *
8118 bridge_mac_nat_arp_input(struct bridge_softc *sc, mbuf_t *data)
8119 {
8120 struct ether_arp *ea;
8121 struct ether_header *eh;
8122 struct mac_nat_entry *mne = NULL;
8123 u_short op;
8124 struct in_addr tpa;
8125
8126 if (!is_valid_arp_packet(data, FALSE, &eh, &ea)) {
8127 goto done;
8128 }
8129 op = ntohs(ea->arp_op);
8130 switch (op) {
8131 case ARPOP_REQUEST:
8132 case ARPOP_REPLY:
8133 /* only care about REQUEST and REPLY */
8134 break;
8135 default:
8136 goto done;
8137 }
8138
8139 /* check the target IP address for a NAT entry */
8140 bcopy(ea->arp_tpa, &tpa, sizeof(tpa));
8141 if (tpa.s_addr != 0) {
8142 mne = bridge_lookup_mac_nat_entry(sc, AF_INET, &tpa);
8143 }
8144 if (mne != NULL) {
8145 if (op == ARPOP_REPLY) {
8146 /* translate the MAC address */
8147 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
8148 char mac_src[24];
8149 char mac_dst[24];
8150
8151 ether_ntop(mac_src, sizeof(mac_src),
8152 ea->arp_tha);
8153 ether_ntop(mac_dst, sizeof(mac_dst),
8154 mne->mne_mac);
8155 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8156 "%s %s ARP %s -> %s",
8157 sc->sc_if_xname,
8158 mne->mne_bif->bif_ifp->if_xname,
8159 mac_src, mac_dst);
8160 }
8161 bcopy(mne->mne_mac, ea->arp_tha, sizeof(ea->arp_tha));
8162 }
8163 } else {
8164 /* handle conflicting ARP (sender matches mne) */
8165 struct in_addr spa;
8166
8167 bcopy(ea->arp_spa, &spa, sizeof(spa));
8168 if (spa.s_addr != 0 && spa.s_addr != tpa.s_addr) {
8169 /* check the source IP for a NAT entry */
8170 mne = bridge_lookup_mac_nat_entry(sc, AF_INET, &spa);
8171 }
8172 }
8173
8174 done:
8175 return mne;
8176 }
8177
8178 static boolean_t
8179 bridge_mac_nat_arp_output(struct bridge_softc *sc,
8180 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8181 {
8182 struct ether_arp *ea;
8183 struct ether_header *eh;
8184 struct in_addr ip;
8185 struct mac_nat_entry *mne = NULL;
8186 u_short op;
8187 boolean_t translate = FALSE;
8188
8189 if (!is_valid_arp_packet(data, TRUE, &eh, &ea)) {
8190 goto done;
8191 }
8192 op = ntohs(ea->arp_op);
8193 switch (op) {
8194 case ARPOP_REQUEST:
8195 case ARPOP_REPLY:
8196 /* only care about REQUEST and REPLY */
8197 break;
8198 default:
8199 goto done;
8200 }
8201
8202 bcopy(ea->arp_spa, &ip, sizeof(ip));
8203 if (ip.s_addr == 0) {
8204 goto done;
8205 }
8206 /* XXX validate IP address: no multicast/broadcast */
8207 mne = bridge_update_mac_nat_entry(sc, bif, AF_INET, &ip, ea->arp_sha);
8208 if (mnr != NULL && mne != NULL) {
8209 /* record the offset to do the replacement */
8210 translate = TRUE;
8211 mnr->mnr_arp_offset = (char *)ea->arp_sha - (char *)eh;
8212 }
8213
8214 done:
8215 return translate;
8216 }
8217
8218 #define ETHER_IPV4_HEADER_LEN (sizeof(struct ether_header) + \
8219 + sizeof(struct ip))
8220 static struct ether_header *
8221 get_ether_ip_header(mbuf_t *data, boolean_t is_output)
8222 {
8223 struct ether_header *eh = NULL;
8224 int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
8225 size_t minlen = ETHER_IPV4_HEADER_LEN;
8226
8227 if (mbuf_pkthdr_len(*data) < minlen) {
8228 BRIDGE_LOG(LOG_DEBUG, flags,
8229 "IP %s short frame %lu < %lu",
8230 get_in_out_string(is_output),
8231 mbuf_pkthdr_len(*data), minlen);
8232 goto done;
8233 }
8234 if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
8235 BRIDGE_LOG(LOG_DEBUG, flags,
8236 "IP %s size %lu mbuf_pullup fail",
8237 get_in_out_string(is_output),
8238 minlen);
8239 *data = NULL;
8240 goto done;
8241 }
8242 eh = mtod(*data, struct ether_header *);
8243 done:
8244 return eh;
8245 }
8246
8247 static bool
8248 is_broadcast_ip_packet(mbuf_t *data)
8249 {
8250 struct ether_header *eh;
8251 uint16_t ether_type;
8252 bool is_broadcast = FALSE;
8253
8254 eh = mtod(*data, struct ether_header *);
8255 ether_type = ntohs(eh->ether_type);
8256 switch (ether_type) {
8257 case ETHERTYPE_IP:
8258 eh = get_ether_ip_header(data, FALSE);
8259 if (eh != NULL) {
8260 struct in_addr dst;
8261 struct ip *iphdr;
8262
8263 iphdr = (struct ip *)(void *)(eh + 1);
8264 bcopy(&iphdr->ip_dst, &dst, sizeof(dst));
8265 is_broadcast = (dst.s_addr == INADDR_BROADCAST);
8266 }
8267 break;
8268 default:
8269 break;
8270 }
8271 return is_broadcast;
8272 }
8273
8274 static struct mac_nat_entry *
8275 bridge_mac_nat_ip_input(struct bridge_softc *sc, mbuf_t *data)
8276 {
8277 struct in_addr dst;
8278 struct ether_header *eh;
8279 struct ip *iphdr;
8280 struct mac_nat_entry *mne = NULL;
8281
8282 eh = get_ether_ip_header(data, FALSE);
8283 if (eh == NULL) {
8284 goto done;
8285 }
8286 iphdr = (struct ip *)(void *)(eh + 1);
8287 bcopy(&iphdr->ip_dst, &dst, sizeof(dst));
8288 /* XXX validate IP address */
8289 if (dst.s_addr == 0) {
8290 goto done;
8291 }
8292 mne = bridge_lookup_mac_nat_entry(sc, AF_INET, &dst);
8293 done:
8294 return mne;
8295 }
8296
8297 static void
8298 bridge_mac_nat_udp_output(struct bridge_softc *sc,
8299 struct bridge_iflist *bif, mbuf_t m,
8300 uint8_t ip_header_len, struct mac_nat_record *mnr)
8301 {
8302 uint16_t dp_flags;
8303 errno_t error;
8304 size_t offset;
8305 struct udphdr udphdr;
8306
8307 /* copy the UDP header */
8308 offset = sizeof(struct ether_header) + ip_header_len;
8309 error = mbuf_copydata(m, offset, sizeof(struct udphdr), &udphdr);
8310 if (error != 0) {
8311 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8312 "mbuf_copydata udphdr failed %d",
8313 error);
8314 return;
8315 }
8316 if (ntohs(udphdr.uh_sport) != IPPORT_BOOTPC ||
8317 ntohs(udphdr.uh_dport) != IPPORT_BOOTPS) {
8318 /* not a BOOTP/DHCP packet */
8319 return;
8320 }
8321 /* check whether the broadcast bit is already set */
8322 offset += sizeof(struct udphdr) + offsetof(struct dhcp, dp_flags);
8323 error = mbuf_copydata(m, offset, sizeof(dp_flags), &dp_flags);
8324 if (error != 0) {
8325 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8326 "mbuf_copydata dp_flags failed %d",
8327 error);
8328 return;
8329 }
8330 if ((ntohs(dp_flags) & DHCP_FLAGS_BROADCAST) != 0) {
8331 /* it's already set, nothing to do */
8332 return;
8333 }
8334 /* broadcast bit needs to be set */
8335 mnr->mnr_ip_dhcp_flags = dp_flags | htons(DHCP_FLAGS_BROADCAST);
8336 mnr->mnr_ip_header_len = ip_header_len;
8337 if (udphdr.uh_sum != 0) {
8338 uint16_t delta;
8339
8340 /* adjust checksum to take modified dp_flags into account */
8341 delta = dp_flags - mnr->mnr_ip_dhcp_flags;
8342 mnr->mnr_ip_udp_csum = udphdr.uh_sum + delta;
8343 }
8344 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8345 "%s %s DHCP dp_flags 0x%x UDP cksum 0x%x",
8346 sc->sc_if_xname,
8347 bif->bif_ifp->if_xname,
8348 ntohs(mnr->mnr_ip_dhcp_flags),
8349 ntohs(mnr->mnr_ip_udp_csum));
8350 return;
8351 }
8352
8353 static boolean_t
8354 bridge_mac_nat_ip_output(struct bridge_softc *sc,
8355 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8356 {
8357 #pragma unused(mnr)
8358 struct ether_header *eh;
8359 struct in_addr ip;
8360 struct ip *iphdr;
8361 uint8_t ip_header_len;
8362 struct mac_nat_entry *mne = NULL;
8363 boolean_t translate = FALSE;
8364
8365 eh = get_ether_ip_header(data, TRUE);
8366 if (eh == NULL) {
8367 goto done;
8368 }
8369 iphdr = (struct ip *)(void *)(eh + 1);
8370 ip_header_len = IP_VHL_HL(iphdr->ip_vhl) << 2;
8371 if (ip_header_len < sizeof(ip)) {
8372 /* bogus IP header */
8373 goto done;
8374 }
8375 bcopy(&iphdr->ip_src, &ip, sizeof(ip));
8376 /* XXX validate the source address */
8377 if (ip.s_addr != 0) {
8378 mne = bridge_update_mac_nat_entry(sc, bif, AF_INET, &ip,
8379 eh->ether_shost);
8380 }
8381 if (mnr != NULL) {
8382 if (ip.s_addr == 0 && iphdr->ip_p == IPPROTO_UDP) {
8383 /* handle DHCP must broadcast */
8384 bridge_mac_nat_udp_output(sc, bif, *data,
8385 ip_header_len, mnr);
8386 }
8387 translate = TRUE;
8388 }
8389 done:
8390 return translate;
8391 }
8392
8393 #define ETHER_IPV6_HEADER_LEN (sizeof(struct ether_header) + \
8394 + sizeof(struct ip6_hdr))
8395 static struct ether_header *
8396 get_ether_ipv6_header(mbuf_t *data, size_t plen, boolean_t is_output)
8397 {
8398 struct ether_header *eh = NULL;
8399 int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
8400 size_t minlen = ETHER_IPV6_HEADER_LEN + plen;
8401
8402 if (mbuf_pkthdr_len(*data) < minlen) {
8403 BRIDGE_LOG(LOG_DEBUG, flags,
8404 "IP %s short frame %lu < %lu",
8405 get_in_out_string(is_output),
8406 mbuf_pkthdr_len(*data), minlen);
8407 goto done;
8408 }
8409 if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
8410 BRIDGE_LOG(LOG_DEBUG, flags,
8411 "IP %s size %lu mbuf_pullup fail",
8412 get_in_out_string(is_output),
8413 minlen);
8414 *data = NULL;
8415 goto done;
8416 }
8417 eh = mtod(*data, struct ether_header *);
8418 done:
8419 return eh;
8420 }
8421
8422 #include <netinet/icmp6.h>
8423 #include <netinet6/nd6.h>
8424
8425 #define ETHER_ND_LLADDR_LEN (ETHER_ADDR_LEN + sizeof(struct nd_opt_hdr))
8426
8427 static void
8428 bridge_mac_nat_icmpv6_output(struct bridge_softc *sc,
8429 struct bridge_iflist *bif,
8430 mbuf_t *data, struct ip6_hdr *ip6h,
8431 struct in6_addr *saddrp,
8432 struct mac_nat_record *mnr)
8433 {
8434 struct ether_header *eh;
8435 struct icmp6_hdr *icmp6;
8436 uint8_t icmp6_type;
8437 uint32_t icmp6len;
8438 int lladdrlen = 0;
8439 char *lladdr = NULL;
8440 unsigned int off = sizeof(*ip6h);
8441
8442 icmp6len = (u_int32_t)ntohs(ip6h->ip6_plen);
8443 if (icmp6len < sizeof(*icmp6)) {
8444 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8445 "short IPv6 payload length %d < %lu",
8446 icmp6len, sizeof(*icmp6));
8447 return;
8448 }
8449
8450 /* pullup IP6 header + ICMPv6 header */
8451 eh = get_ether_ipv6_header(data, sizeof(*icmp6), TRUE);
8452 if (eh == NULL) {
8453 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8454 "failed to pullup icmp6 header");
8455 return;
8456 }
8457 ip6h = (struct ip6_hdr *)(void *)(eh + 1);
8458 icmp6 = (struct icmp6_hdr *)((caddr_t)ip6h + off);
8459 icmp6_type = icmp6->icmp6_type;
8460 switch (icmp6_type) {
8461 case ND_NEIGHBOR_SOLICIT:
8462 case ND_NEIGHBOR_ADVERT:
8463 case ND_ROUTER_ADVERT:
8464 case ND_ROUTER_SOLICIT:
8465 break;
8466 default:
8467 return;
8468 }
8469
8470 /* pullup IP6 header + payload */
8471 eh = get_ether_ipv6_header(data, icmp6len, TRUE);
8472 if (eh == NULL) {
8473 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8474 "failed to pullup icmp6 + payload");
8475 return;
8476 }
8477 ip6h = (struct ip6_hdr *)(void *)(eh + 1);
8478 icmp6 = (struct icmp6_hdr *)((caddr_t)ip6h + off);
8479 switch (icmp6_type) {
8480 case ND_NEIGHBOR_SOLICIT: {
8481 struct nd_neighbor_solicit *nd_ns;
8482 union nd_opts ndopts;
8483 boolean_t is_dad_probe;
8484 struct in6_addr taddr;
8485
8486 if (icmp6len < sizeof(*nd_ns)) {
8487 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8488 "short nd_ns %d < %lu",
8489 icmp6len, sizeof(*nd_ns));
8490 return;
8491 }
8492
8493 nd_ns = (struct nd_neighbor_solicit *)(void *)icmp6;
8494 bcopy(&nd_ns->nd_ns_target, &taddr, sizeof(taddr));
8495 if (IN6_IS_ADDR_MULTICAST(&taddr) ||
8496 IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
8497 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8498 "invalid target ignored");
8499 return;
8500 }
8501 /* parse options */
8502 nd6_option_init(nd_ns + 1, icmp6len - sizeof(*nd_ns), &ndopts);
8503 if (nd6_options(&ndopts) < 0) {
8504 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8505 "invalid ND6 NS option");
8506 return;
8507 }
8508 if (ndopts.nd_opts_src_lladdr != NULL) {
8509 lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
8510 lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
8511 }
8512 is_dad_probe = IN6_IS_ADDR_UNSPECIFIED(saddrp);
8513 if (lladdr != NULL) {
8514 if (is_dad_probe) {
8515 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8516 "bad ND6 DAD packet");
8517 return;
8518 }
8519 if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8520 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8521 "source lladdrlen %d != %lu",
8522 lladdrlen, ETHER_ND_LLADDR_LEN);
8523 return;
8524 }
8525 }
8526 if (is_dad_probe) {
8527 /* node is trying use taddr, create an mne for taddr */
8528 *saddrp = taddr;
8529 }
8530 break;
8531 }
8532 case ND_NEIGHBOR_ADVERT: {
8533 struct nd_neighbor_advert *nd_na;
8534 union nd_opts ndopts;
8535 struct in6_addr taddr;
8536
8537
8538 nd_na = (struct nd_neighbor_advert *)(void *)icmp6;
8539
8540 if (icmp6len < sizeof(*nd_na)) {
8541 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8542 "short nd_na %d < %lu",
8543 icmp6len, sizeof(*nd_na));
8544 return;
8545 }
8546
8547 bcopy(&nd_na->nd_na_target, &taddr, sizeof(taddr));
8548 if (IN6_IS_ADDR_MULTICAST(&taddr) ||
8549 IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
8550 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8551 "invalid target ignored");
8552 return;
8553 }
8554 /* parse options */
8555 nd6_option_init(nd_na + 1, icmp6len - sizeof(*nd_na), &ndopts);
8556 if (nd6_options(&ndopts) < 0) {
8557 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8558 "invalid ND6 NA option");
8559 return;
8560 }
8561 if (ndopts.nd_opts_tgt_lladdr == NULL) {
8562 /* target linklayer, nothing to do */
8563 return;
8564 }
8565 lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1);
8566 lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
8567 if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8568 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8569 "target lladdrlen %d != %lu",
8570 lladdrlen, ETHER_ND_LLADDR_LEN);
8571 return;
8572 }
8573 break;
8574 }
8575 case ND_ROUTER_ADVERT:
8576 case ND_ROUTER_SOLICIT: {
8577 union nd_opts ndopts;
8578 uint32_t type_length;
8579 const char *description;
8580
8581 if (icmp6_type == ND_ROUTER_ADVERT) {
8582 type_length = sizeof(struct nd_router_advert);
8583 description = "RA";
8584 } else {
8585 type_length = sizeof(struct nd_router_solicit);
8586 description = "RS";
8587 }
8588 if (icmp6len < type_length) {
8589 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8590 "short ND6 %s %d < %d",
8591 description, icmp6len, type_length);
8592 return;
8593 }
8594 /* parse options */
8595 nd6_option_init(((uint8_t *)icmp6) + type_length,
8596 icmp6len - type_length, &ndopts);
8597 if (nd6_options(&ndopts) < 0) {
8598 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8599 "invalid ND6 %s option", description);
8600 return;
8601 }
8602 if (ndopts.nd_opts_src_lladdr != NULL) {
8603 lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
8604 lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
8605 if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8606 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8607 "source lladdrlen %d != %lu",
8608 lladdrlen, ETHER_ND_LLADDR_LEN);
8609 return;
8610 }
8611 }
8612 break;
8613 }
8614 default:
8615 break;
8616 }
8617 if (lladdr != NULL) {
8618 mnr->mnr_ip6_lladdr_offset = (uint16_t)
8619 ((uintptr_t)lladdr - (uintptr_t)eh);
8620 mnr->mnr_ip6_icmp6_len = icmp6len;
8621 mnr->mnr_ip6_icmp6_type = icmp6_type;
8622 mnr->mnr_ip6_header_len = off;
8623 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
8624 const char *str;
8625
8626 switch (mnr->mnr_ip6_icmp6_type) {
8627 case ND_ROUTER_ADVERT:
8628 str = "ROUTER ADVERT";
8629 break;
8630 case ND_ROUTER_SOLICIT:
8631 str = "ROUTER SOLICIT";
8632 break;
8633 case ND_NEIGHBOR_ADVERT:
8634 str = "NEIGHBOR ADVERT";
8635 break;
8636 case ND_NEIGHBOR_SOLICIT:
8637 str = "NEIGHBOR SOLICIT";
8638 break;
8639 default:
8640 str = "";
8641 break;
8642 }
8643 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8644 "%s %s %s ip6len %d icmp6len %d lladdr offset %d",
8645 sc->sc_if_xname, bif->bif_ifp->if_xname, str,
8646 mnr->mnr_ip6_header_len,
8647 mnr->mnr_ip6_icmp6_len, mnr->mnr_ip6_lladdr_offset);
8648 }
8649 }
8650 }
8651
8652 static struct mac_nat_entry *
8653 bridge_mac_nat_ipv6_input(struct bridge_softc *sc, mbuf_t *data)
8654 {
8655 struct in6_addr dst;
8656 struct ether_header *eh;
8657 struct ip6_hdr *ip6h;
8658 struct mac_nat_entry *mne = NULL;
8659
8660 eh = get_ether_ipv6_header(data, 0, FALSE);
8661 if (eh == NULL) {
8662 goto done;
8663 }
8664 ip6h = (struct ip6_hdr *)(void *)(eh + 1);
8665 bcopy(&ip6h->ip6_dst, &dst, sizeof(dst));
8666 /* XXX validate IPv6 address */
8667 if (IN6_IS_ADDR_UNSPECIFIED(&dst)) {
8668 goto done;
8669 }
8670 mne = bridge_lookup_mac_nat_entry(sc, AF_INET6, &dst);
8671
8672 done:
8673 return mne;
8674 }
8675
8676 static boolean_t
8677 bridge_mac_nat_ipv6_output(struct bridge_softc *sc,
8678 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8679 {
8680 struct ether_header *eh;
8681 ether_addr_t ether_shost;
8682 struct ip6_hdr *ip6h;
8683 struct in6_addr saddr;
8684 boolean_t translate;
8685
8686 translate = (bif == sc->sc_mac_nat_bif) ? FALSE : TRUE;
8687 eh = get_ether_ipv6_header(data, 0, TRUE);
8688 if (eh == NULL) {
8689 translate = FALSE;
8690 goto done;
8691 }
8692 bcopy(eh->ether_shost, ðer_shost, sizeof(ether_shost));
8693 ip6h = (struct ip6_hdr *)(void *)(eh + 1);
8694 bcopy(&ip6h->ip6_src, &saddr, sizeof(saddr));
8695 if (mnr != NULL && ip6h->ip6_nxt == IPPROTO_ICMPV6) {
8696 bridge_mac_nat_icmpv6_output(sc, bif, data, ip6h, &saddr, mnr);
8697 }
8698 if (IN6_IS_ADDR_UNSPECIFIED(&saddr)) {
8699 goto done;
8700 }
8701 (void)bridge_update_mac_nat_entry(sc, bif, AF_INET6, &saddr,
8702 ether_shost.octet);
8703
8704 done:
8705 return translate;
8706 }
8707
8708 /*
8709 * bridge_mac_nat_input:
8710 * Process a packet arriving on the MAC NAT interface (sc_mac_nat_bif).
8711 * This interface is the "external" interface with respect to NAT.
8712 * The interface is only capable of receiving a single MAC address
8713 * (e.g. a Wi-Fi STA interface).
8714 *
8715 * When a packet arrives on the external interface, look up the destination
8716 * IP address in the mac_nat_entry table. If there is a match, *is_input
8717 * is set to TRUE if it's for the MAC NAT interface, otherwise *is_input
8718 * is set to FALSE and translate the MAC address if necessary.
8719 *
8720 * Returns:
8721 * The internal interface to direct the packet to, or NULL if the packet
8722 * should not be redirected.
8723 *
8724 * *data may be updated to point at a different mbuf chain, or set to NULL
8725 * if the chain was deallocated during processing.
8726 */
8727 static ifnet_t
8728 bridge_mac_nat_input(struct bridge_softc *sc, mbuf_t *data,
8729 boolean_t *is_input)
8730 {
8731 ifnet_t dst_if = NULL;
8732 struct ether_header *eh;
8733 uint16_t ether_type;
8734 boolean_t is_unicast;
8735 mbuf_t m = *data;
8736 struct mac_nat_entry *mne = NULL;
8737
8738 BRIDGE_LOCK_ASSERT_HELD(sc);
8739 *is_input = FALSE;
8740 assert(sc->sc_mac_nat_bif != NULL);
8741 is_unicast = ((m->m_flags & (M_BCAST | M_MCAST)) == 0);
8742 eh = mtod(m, struct ether_header *);
8743 ether_type = ntohs(eh->ether_type);
8744 switch (ether_type) {
8745 case ETHERTYPE_ARP:
8746 mne = bridge_mac_nat_arp_input(sc, data);
8747 break;
8748 case ETHERTYPE_IP:
8749 if (is_unicast) {
8750 mne = bridge_mac_nat_ip_input(sc, data);
8751 }
8752 break;
8753 case ETHERTYPE_IPV6:
8754 if (is_unicast) {
8755 mne = bridge_mac_nat_ipv6_input(sc, data);
8756 }
8757 break;
8758 default:
8759 break;
8760 }
8761 if (mne != NULL) {
8762 if (is_unicast) {
8763 if (m != *data) {
8764 /* it may have changed */
8765 eh = mtod(*data, struct ether_header *);
8766 }
8767 bcopy(mne->mne_mac, eh->ether_dhost,
8768 sizeof(eh->ether_dhost));
8769 }
8770 dst_if = mne->mne_bif->bif_ifp;
8771 *is_input = (mne->mne_bif == sc->sc_mac_nat_bif);
8772 }
8773 return dst_if;
8774 }
8775
8776 /*
8777 * bridge_mac_nat_output:
8778 * Process a packet destined to the MAC NAT interface (sc_mac_nat_bif)
8779 * from the interface 'bif'.
8780 *
8781 * Create a mac_nat_entry containing the source IP address and MAC address
8782 * from the packet. Populate a mac_nat_record with information detailing
8783 * how to translate the packet. Translation takes place later when
8784 * the bridge lock is no longer held.
8785 *
8786 * If 'bif' == sc_mac_nat_bif, the stack over the MAC NAT
8787 * interface is generating an output packet. No translation is required in this
8788 * case, we just record the IP address used to prevent another bif from
8789 * claiming our IP address.
8790 *
8791 * Returns:
8792 * TRUE if the packet should be translated (*mnr updated as well),
8793 * FALSE otherwise.
8794 *
8795 * *data may be updated to point at a different mbuf chain or NULL if
8796 * the chain was deallocated during processing.
8797 */
8798
8799 static boolean_t
8800 bridge_mac_nat_output(struct bridge_softc *sc,
8801 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8802 {
8803 struct ether_header *eh;
8804 uint16_t ether_type;
8805 boolean_t translate = FALSE;
8806
8807 BRIDGE_LOCK_ASSERT_HELD(sc);
8808 assert(sc->sc_mac_nat_bif != NULL);
8809
8810 eh = mtod(*data, struct ether_header *);
8811 ether_type = ntohs(eh->ether_type);
8812 if (mnr != NULL) {
8813 bzero(mnr, sizeof(*mnr));
8814 mnr->mnr_ether_type = ether_type;
8815 }
8816 switch (ether_type) {
8817 case ETHERTYPE_ARP:
8818 translate = bridge_mac_nat_arp_output(sc, bif, data, mnr);
8819 break;
8820 case ETHERTYPE_IP:
8821 translate = bridge_mac_nat_ip_output(sc, bif, data, mnr);
8822 break;
8823 case ETHERTYPE_IPV6:
8824 translate = bridge_mac_nat_ipv6_output(sc, bif, data, mnr);
8825 break;
8826 default:
8827 break;
8828 }
8829 return translate;
8830 }
8831
8832 static void
8833 bridge_mac_nat_arp_translate(mbuf_t *data, struct mac_nat_record *mnr,
8834 const caddr_t eaddr)
8835 {
8836 errno_t error;
8837
8838 if (mnr->mnr_arp_offset == 0) {
8839 return;
8840 }
8841 /* replace the source hardware address */
8842 error = mbuf_copyback(*data, mnr->mnr_arp_offset,
8843 ETHER_ADDR_LEN, eaddr,
8844 MBUF_DONTWAIT);
8845 if (error != 0) {
8846 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8847 "mbuf_copyback failed");
8848 m_freem(*data);
8849 *data = NULL;
8850 }
8851 return;
8852 }
8853
8854 static void
8855 bridge_mac_nat_ip_translate(mbuf_t *data, struct mac_nat_record *mnr)
8856 {
8857 errno_t error;
8858 size_t offset;
8859
8860 if (mnr->mnr_ip_header_len == 0) {
8861 return;
8862 }
8863 /* update the UDP checksum */
8864 offset = sizeof(struct ether_header) + mnr->mnr_ip_header_len;
8865 error = mbuf_copyback(*data, offset + offsetof(struct udphdr, uh_sum),
8866 sizeof(mnr->mnr_ip_udp_csum),
8867 &mnr->mnr_ip_udp_csum,
8868 MBUF_DONTWAIT);
8869 if (error != 0) {
8870 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8871 "mbuf_copyback uh_sum failed");
8872 m_freem(*data);
8873 *data = NULL;
8874 }
8875 /* update the DHCP must broadcast flag */
8876 offset += sizeof(struct udphdr);
8877 error = mbuf_copyback(*data, offset + offsetof(struct dhcp, dp_flags),
8878 sizeof(mnr->mnr_ip_dhcp_flags),
8879 &mnr->mnr_ip_dhcp_flags,
8880 MBUF_DONTWAIT);
8881 if (error != 0) {
8882 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8883 "mbuf_copyback dp_flags failed");
8884 m_freem(*data);
8885 *data = NULL;
8886 }
8887 }
8888
8889 static void
8890 bridge_mac_nat_ipv6_translate(mbuf_t *data, struct mac_nat_record *mnr,
8891 const caddr_t eaddr)
8892 {
8893 uint16_t cksum;
8894 errno_t error;
8895 mbuf_t m = *data;
8896
8897 if (mnr->mnr_ip6_header_len == 0) {
8898 return;
8899 }
8900 switch (mnr->mnr_ip6_icmp6_type) {
8901 case ND_ROUTER_ADVERT:
8902 case ND_ROUTER_SOLICIT:
8903 case ND_NEIGHBOR_SOLICIT:
8904 case ND_NEIGHBOR_ADVERT:
8905 if (mnr->mnr_ip6_lladdr_offset == 0) {
8906 /* nothing to do */
8907 return;
8908 }
8909 break;
8910 default:
8911 return;
8912 }
8913
8914 /*
8915 * replace the lladdr
8916 */
8917 error = mbuf_copyback(m, mnr->mnr_ip6_lladdr_offset,
8918 ETHER_ADDR_LEN, eaddr,
8919 MBUF_DONTWAIT);
8920 if (error != 0) {
8921 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8922 "mbuf_copyback lladdr failed");
8923 m_freem(m);
8924 *data = NULL;
8925 return;
8926 }
8927
8928 /*
8929 * recompute the icmp6 checksum
8930 */
8931
8932 /* skip past the ethernet header */
8933 mbuf_setdata(m, (char *)mbuf_data(m) + ETHER_HDR_LEN,
8934 mbuf_len(m) - ETHER_HDR_LEN);
8935 mbuf_pkthdr_adjustlen(m, -ETHER_HDR_LEN);
8936
8937 #define CKSUM_OFFSET_ICMP6 offsetof(struct icmp6_hdr, icmp6_cksum)
8938 /* set the checksum to zero */
8939 cksum = 0;
8940 error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
8941 sizeof(cksum), &cksum, MBUF_DONTWAIT);
8942 if (error != 0) {
8943 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8944 "mbuf_copyback cksum=0 failed");
8945 m_freem(m);
8946 *data = NULL;
8947 return;
8948 }
8949 /* compute and set the new checksum */
8950 cksum = in6_cksum(m, IPPROTO_ICMPV6, mnr->mnr_ip6_header_len,
8951 mnr->mnr_ip6_icmp6_len);
8952 error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
8953 sizeof(cksum), &cksum, MBUF_DONTWAIT);
8954 if (error != 0) {
8955 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8956 "mbuf_copyback cksum failed");
8957 m_freem(m);
8958 *data = NULL;
8959 return;
8960 }
8961 /* restore the ethernet header */
8962 mbuf_setdata(m, (char *)mbuf_data(m) - ETHER_HDR_LEN,
8963 mbuf_len(m) + ETHER_HDR_LEN);
8964 mbuf_pkthdr_adjustlen(m, ETHER_HDR_LEN);
8965 return;
8966 }
8967
8968 static void
8969 bridge_mac_nat_translate(mbuf_t *data, struct mac_nat_record *mnr,
8970 const caddr_t eaddr)
8971 {
8972 struct ether_header *eh;
8973
8974 /* replace the source ethernet address with the single MAC */
8975 eh = mtod(*data, struct ether_header *);
8976 bcopy(eaddr, eh->ether_shost, sizeof(eh->ether_shost));
8977 switch (mnr->mnr_ether_type) {
8978 case ETHERTYPE_ARP:
8979 bridge_mac_nat_arp_translate(data, mnr, eaddr);
8980 break;
8981
8982 case ETHERTYPE_IP:
8983 bridge_mac_nat_ip_translate(data, mnr);
8984 break;
8985
8986 case ETHERTYPE_IPV6:
8987 bridge_mac_nat_ipv6_translate(data, mnr, eaddr);
8988 break;
8989
8990 default:
8991 break;
8992 }
8993 return;
8994 }
8995
8996 /*
8997 * bridge packet filtering
8998 */
8999
9000 /*
9001 * Perform basic checks on header size since
9002 * pfil assumes ip_input has already processed
9003 * it for it. Cut-and-pasted from ip_input.c.
9004 * Given how simple the IPv6 version is,
9005 * does the IPv4 version really need to be
9006 * this complicated?
9007 *
9008 * XXX Should we update ipstat here, or not?
9009 * XXX Right now we update ipstat but not
9010 * XXX csum_counter.
9011 */
9012 static int
9013 bridge_ip_checkbasic(struct mbuf **mp)
9014 {
9015 struct mbuf *m = *mp;
9016 struct ip *ip;
9017 int len, hlen;
9018 u_short sum;
9019
9020 if (*mp == NULL) {
9021 return -1;
9022 }
9023
9024 if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
9025 /* max_linkhdr is already rounded up to nearest 4-byte */
9026 if ((m = m_copyup(m, sizeof(struct ip),
9027 max_linkhdr)) == NULL) {
9028 /* XXXJRT new stat, please */
9029 ipstat.ips_toosmall++;
9030 goto bad;
9031 }
9032 } else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip), 0)) {
9033 if ((m = m_pullup(m, sizeof(struct ip))) == NULL) {
9034 ipstat.ips_toosmall++;
9035 goto bad;
9036 }
9037 }
9038 ip = mtod(m, struct ip *);
9039 if (ip == NULL) {
9040 goto bad;
9041 }
9042
9043 if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
9044 ipstat.ips_badvers++;
9045 goto bad;
9046 }
9047 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
9048 if (hlen < (int)sizeof(struct ip)) { /* minimum header length */
9049 ipstat.ips_badhlen++;
9050 goto bad;
9051 }
9052 if (hlen > m->m_len) {
9053 if ((m = m_pullup(m, hlen)) == 0) {
9054 ipstat.ips_badhlen++;
9055 goto bad;
9056 }
9057 ip = mtod(m, struct ip *);
9058 if (ip == NULL) {
9059 goto bad;
9060 }
9061 }
9062
9063 if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
9064 sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
9065 } else {
9066 if (hlen == sizeof(struct ip)) {
9067 sum = in_cksum_hdr(ip);
9068 } else {
9069 sum = in_cksum(m, hlen);
9070 }
9071 }
9072 if (sum) {
9073 ipstat.ips_badsum++;
9074 goto bad;
9075 }
9076
9077 /* Retrieve the packet length. */
9078 len = ntohs(ip->ip_len);
9079
9080 /*
9081 * Check for additional length bogosity
9082 */
9083 if (len < hlen) {
9084 ipstat.ips_badlen++;
9085 goto bad;
9086 }
9087
9088 /*
9089 * Check that the amount of data in the buffers
9090 * is as at least much as the IP header would have us expect.
9091 * Drop packet if shorter than we expect.
9092 */
9093 if (m->m_pkthdr.len < len) {
9094 ipstat.ips_tooshort++;
9095 goto bad;
9096 }
9097
9098 /* Checks out, proceed */
9099 *mp = m;
9100 return 0;
9101
9102 bad:
9103 *mp = m;
9104 return -1;
9105 }
9106
9107 /*
9108 * Same as above, but for IPv6.
9109 * Cut-and-pasted from ip6_input.c.
9110 * XXX Should we update ip6stat, or not?
9111 */
9112 static int
9113 bridge_ip6_checkbasic(struct mbuf **mp)
9114 {
9115 struct mbuf *m = *mp;
9116 struct ip6_hdr *ip6;
9117
9118 /*
9119 * If the IPv6 header is not aligned, slurp it up into a new
9120 * mbuf with space for link headers, in the event we forward
9121 * it. Otherwise, if it is aligned, make sure the entire base
9122 * IPv6 header is in the first mbuf of the chain.
9123 */
9124 if (IP6_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
9125 struct ifnet *inifp = m->m_pkthdr.rcvif;
9126 /* max_linkhdr is already rounded up to nearest 4-byte */
9127 if ((m = m_copyup(m, sizeof(struct ip6_hdr),
9128 max_linkhdr)) == NULL) {
9129 /* XXXJRT new stat, please */
9130 ip6stat.ip6s_toosmall++;
9131 in6_ifstat_inc(inifp, ifs6_in_hdrerr);
9132 goto bad;
9133 }
9134 } else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip6_hdr), 0)) {
9135 struct ifnet *inifp = m->m_pkthdr.rcvif;
9136 if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
9137 ip6stat.ip6s_toosmall++;
9138 in6_ifstat_inc(inifp, ifs6_in_hdrerr);
9139 goto bad;
9140 }
9141 }
9142
9143 ip6 = mtod(m, struct ip6_hdr *);
9144
9145 if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
9146 ip6stat.ip6s_badvers++;
9147 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
9148 goto bad;
9149 }
9150
9151 /* Checks out, proceed */
9152 *mp = m;
9153 return 0;
9154
9155 bad:
9156 *mp = m;
9157 return -1;
9158 }
9159
9160 /*
9161 * the PF routines expect to be called from ip_input, so we
9162 * need to do and undo here some of the same processing.
9163 *
9164 * XXX : this is heavily inspired on bridge_pfil()
9165 */
9166 static int
9167 bridge_pf(struct mbuf **mp, struct ifnet *ifp, uint32_t sc_filter_flags,
9168 int input)
9169 {
9170 /*
9171 * XXX : mpetit : heavily inspired by bridge_pfil()
9172 */
9173
9174 int snap, error, i, hlen;
9175 struct ether_header *eh1, eh2;
9176 struct ip *ip;
9177 struct llc llc1;
9178 u_int16_t ether_type;
9179
9180 snap = 0;
9181 error = -1; /* Default error if not error == 0 */
9182
9183 if ((sc_filter_flags & IFBF_FILT_MEMBER) == 0) {
9184 return 0; /* filtering is disabled */
9185 }
9186 i = min((*mp)->m_pkthdr.len, max_protohdr);
9187 if ((*mp)->m_len < i) {
9188 *mp = m_pullup(*mp, i);
9189 if (*mp == NULL) {
9190 BRIDGE_LOG(LOG_NOTICE, 0, "m_pullup failed");
9191 return -1;
9192 }
9193 }
9194
9195 eh1 = mtod(*mp, struct ether_header *);
9196 ether_type = ntohs(eh1->ether_type);
9197
9198 /*
9199 * Check for SNAP/LLC.
9200 */
9201 if (ether_type < ETHERMTU) {
9202 struct llc *llc2 = (struct llc *)(eh1 + 1);
9203
9204 if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
9205 llc2->llc_dsap == LLC_SNAP_LSAP &&
9206 llc2->llc_ssap == LLC_SNAP_LSAP &&
9207 llc2->llc_control == LLC_UI) {
9208 ether_type = htons(llc2->llc_un.type_snap.ether_type);
9209 snap = 1;
9210 }
9211 }
9212
9213 /*
9214 * If we're trying to filter bridge traffic, don't look at anything
9215 * other than IP and ARP traffic. If the filter doesn't understand
9216 * IPv6, don't allow IPv6 through the bridge either. This is lame
9217 * since if we really wanted, say, an AppleTalk filter, we are hosed,
9218 * but of course we don't have an AppleTalk filter to begin with.
9219 * (Note that since pfil doesn't understand ARP it will pass *ALL*
9220 * ARP traffic.)
9221 */
9222 switch (ether_type) {
9223 case ETHERTYPE_ARP:
9224 case ETHERTYPE_REVARP:
9225 return 0; /* Automatically pass */
9226
9227 case ETHERTYPE_IP:
9228 case ETHERTYPE_IPV6:
9229 break;
9230 default:
9231 /*
9232 * Check to see if the user wants to pass non-ip
9233 * packets, these will not be checked by pf and
9234 * passed unconditionally so the default is to drop.
9235 */
9236 if ((sc_filter_flags & IFBF_FILT_ONLYIP)) {
9237 goto bad;
9238 }
9239 break;
9240 }
9241
9242 /* Strip off the Ethernet header and keep a copy. */
9243 m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t)&eh2);
9244 m_adj(*mp, ETHER_HDR_LEN);
9245
9246 /* Strip off snap header, if present */
9247 if (snap) {
9248 m_copydata(*mp, 0, sizeof(struct llc), (caddr_t)&llc1);
9249 m_adj(*mp, sizeof(struct llc));
9250 }
9251
9252 /*
9253 * Check the IP header for alignment and errors
9254 */
9255 switch (ether_type) {
9256 case ETHERTYPE_IP:
9257 error = bridge_ip_checkbasic(mp);
9258 break;
9259 case ETHERTYPE_IPV6:
9260 error = bridge_ip6_checkbasic(mp);
9261 break;
9262 default:
9263 error = 0;
9264 break;
9265 }
9266 if (error) {
9267 goto bad;
9268 }
9269
9270 error = 0;
9271
9272 /*
9273 * Run the packet through pf rules
9274 */
9275 switch (ether_type) {
9276 case ETHERTYPE_IP:
9277 /*
9278 * before calling the firewall, swap fields the same as
9279 * IP does. here we assume the header is contiguous
9280 */
9281 ip = mtod(*mp, struct ip *);
9282
9283 ip->ip_len = ntohs(ip->ip_len);
9284 ip->ip_off = ntohs(ip->ip_off);
9285
9286 if (ifp != NULL) {
9287 error = pf_af_hook(ifp, 0, mp, AF_INET, input, NULL);
9288 }
9289
9290 if (*mp == NULL || error != 0) { /* filter may consume */
9291 break;
9292 }
9293
9294 /* Recalculate the ip checksum and restore byte ordering */
9295 ip = mtod(*mp, struct ip *);
9296 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
9297 if (hlen < (int)sizeof(struct ip)) {
9298 goto bad;
9299 }
9300 if (hlen > (*mp)->m_len) {
9301 if ((*mp = m_pullup(*mp, hlen)) == 0) {
9302 goto bad;
9303 }
9304 ip = mtod(*mp, struct ip *);
9305 if (ip == NULL) {
9306 goto bad;
9307 }
9308 }
9309 ip->ip_len = htons(ip->ip_len);
9310 ip->ip_off = htons(ip->ip_off);
9311 ip->ip_sum = 0;
9312 if (hlen == sizeof(struct ip)) {
9313 ip->ip_sum = in_cksum_hdr(ip);
9314 } else {
9315 ip->ip_sum = in_cksum(*mp, hlen);
9316 }
9317 break;
9318
9319 case ETHERTYPE_IPV6:
9320 if (ifp != NULL) {
9321 error = pf_af_hook(ifp, 0, mp, AF_INET6, input, NULL);
9322 }
9323
9324 if (*mp == NULL || error != 0) { /* filter may consume */
9325 break;
9326 }
9327 break;
9328 default:
9329 error = 0;
9330 break;
9331 }
9332
9333 if (*mp == NULL) {
9334 return error;
9335 }
9336 if (error != 0) {
9337 goto bad;
9338 }
9339
9340 error = -1;
9341
9342 /*
9343 * Finally, put everything back the way it was and return
9344 */
9345 if (snap) {
9346 M_PREPEND(*mp, sizeof(struct llc), M_DONTWAIT, 0);
9347 if (*mp == NULL) {
9348 return error;
9349 }
9350 bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
9351 }
9352
9353 M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT, 0);
9354 if (*mp == NULL) {
9355 return error;
9356 }
9357 bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
9358
9359 return 0;
9360
9361 bad:
9362 m_freem(*mp);
9363 *mp = NULL;
9364 return error;
9365 }
9366
9367 /*
9368 * Copyright (C) 2014, Stefano Garzarella - Universita` di Pisa.
9369 * All rights reserved.
9370 *
9371 * Redistribution and use in source and binary forms, with or without
9372 * modification, are permitted provided that the following conditions
9373 * are met:
9374 * 1. Redistributions of source code must retain the above copyright
9375 * notice, this list of conditions and the following disclaimer.
9376 * 2. Redistributions in binary form must reproduce the above copyright
9377 * notice, this list of conditions and the following disclaimer in the
9378 * documentation and/or other materials provided with the distribution.
9379 *
9380 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
9381 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
9382 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
9383 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
9384 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
9385 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
9386 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
9387 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
9388 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
9389 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
9390 * SUCH DAMAGE.
9391 */
9392
9393 /*
9394 * XXX-ste: Maybe this function must be moved into kern/uipc_mbuf.c
9395 *
9396 * Create a queue of packets/segments which fit the given mss + hdr_len.
9397 * m0 points to mbuf chain to be segmented.
9398 * This function splits the payload (m0-> m_pkthdr.len - hdr_len)
9399 * into segments of length MSS bytes and then copy the first hdr_len bytes
9400 * from m0 at the top of each segment.
9401 * If hdr2_buf is not NULL (hdr2_len is the buf length), it is copied
9402 * in each segment after the first hdr_len bytes
9403 *
9404 * Return the new queue with the segments on success, NULL on failure.
9405 * (the mbuf queue is freed in this case).
9406 * nsegs contains the number of segments generated.
9407 */
9408
9409 static struct mbuf *
9410 m_seg(struct mbuf *m0, int hdr_len, int mss, int *nsegs,
9411 char * hdr2_buf, int hdr2_len)
9412 {
9413 int off = 0, n, firstlen;
9414 struct mbuf **mnext, *mseg;
9415 int total_len = m0->m_pkthdr.len;
9416
9417 /*
9418 * Segmentation useless
9419 */
9420 if (total_len <= hdr_len + mss) {
9421 return m0;
9422 }
9423
9424 if (hdr2_buf == NULL || hdr2_len <= 0) {
9425 hdr2_buf = NULL;
9426 hdr2_len = 0;
9427 }
9428
9429 off = hdr_len + mss;
9430 firstlen = mss; /* first segment stored in the original mbuf */
9431
9432 mnext = &(m0->m_nextpkt); /* pointer to next packet */
9433
9434 for (n = 1; off < total_len; off += mss, n++) {
9435 struct mbuf *m;
9436 /*
9437 * Copy the header from the original packet
9438 * and create a new mbuf chain
9439 */
9440 if (MHLEN < hdr_len) {
9441 m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
9442 } else {
9443 m = m_gethdr(M_NOWAIT, MT_DATA);
9444 }
9445
9446 if (m == NULL) {
9447 #ifdef GSO_DEBUG
9448 D("MGETHDR error\n");
9449 #endif
9450 goto err;
9451 }
9452
9453 m_copydata(m0, 0, hdr_len, mtod(m, caddr_t));
9454
9455 m->m_len = hdr_len;
9456 /*
9457 * if the optional header is present, copy it
9458 */
9459 if (hdr2_buf != NULL) {
9460 m_copyback(m, hdr_len, hdr2_len, hdr2_buf);
9461 }
9462
9463 m->m_flags |= (m0->m_flags & M_COPYFLAGS);
9464 if (off + mss >= total_len) { /* last segment */
9465 mss = total_len - off;
9466 }
9467 /*
9468 * Copy the payload from original packet
9469 */
9470 mseg = m_copym(m0, off, mss, M_NOWAIT);
9471 if (mseg == NULL) {
9472 m_freem(m);
9473 #ifdef GSO_DEBUG
9474 D("m_copym error\n");
9475 #endif
9476 goto err;
9477 }
9478 m_cat(m, mseg);
9479
9480 m->m_pkthdr.len = hdr_len + hdr2_len + mss;
9481 m->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
9482 /*
9483 * Copy the checksum flags and data (in_cksum() need this)
9484 */
9485 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
9486 m->m_pkthdr.csum_data = m0->m_pkthdr.csum_data;
9487 m->m_pkthdr.tso_segsz = m0->m_pkthdr.tso_segsz;
9488
9489 *mnext = m;
9490 mnext = &(m->m_nextpkt);
9491 }
9492
9493 /*
9494 * Update first segment.
9495 * If the optional header is present, is necessary
9496 * to insert it into the first segment.
9497 */
9498 if (hdr2_buf == NULL) {
9499 m_adj(m0, hdr_len + firstlen - total_len);
9500 m0->m_pkthdr.len = hdr_len + firstlen;
9501 } else {
9502 mseg = m_copym(m0, hdr_len, firstlen, M_NOWAIT);
9503 if (mseg == NULL) {
9504 #ifdef GSO_DEBUG
9505 D("m_copym error\n");
9506 #endif
9507 goto err;
9508 }
9509 m_adj(m0, hdr_len - total_len);
9510 m_copyback(m0, hdr_len, hdr2_len, hdr2_buf);
9511 m_cat(m0, mseg);
9512 m0->m_pkthdr.len = hdr_len + hdr2_len + firstlen;
9513 }
9514
9515 if (nsegs != NULL) {
9516 *nsegs = n;
9517 }
9518 return m0;
9519 err:
9520 while (m0 != NULL) {
9521 mseg = m0->m_nextpkt;
9522 m0->m_nextpkt = NULL;
9523 m_freem(m0);
9524 m0 = mseg;
9525 }
9526 return NULL;
9527 }
9528
9529 /*
9530 * Wrappers of IPv4 checksum functions
9531 */
9532 static inline void
9533 gso_ipv4_data_cksum(struct mbuf *m, struct ip *ip, int mac_hlen)
9534 {
9535 m->m_data += mac_hlen;
9536 m->m_len -= mac_hlen;
9537 m->m_pkthdr.len -= mac_hlen;
9538 #if __FreeBSD_version < 1000000
9539 ip->ip_len = ntohs(ip->ip_len); /* needed for in_delayed_cksum() */
9540 #endif
9541
9542 in_delayed_cksum(m);
9543
9544 #if __FreeBSD_version < 1000000
9545 ip->ip_len = htons(ip->ip_len);
9546 #endif
9547 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
9548 m->m_len += mac_hlen;
9549 m->m_pkthdr.len += mac_hlen;
9550 m->m_data -= mac_hlen;
9551 }
9552
9553 static inline void
9554 gso_ipv4_hdr_cksum(struct mbuf *m, struct ip *ip, int mac_hlen, int ip_hlen)
9555 {
9556 m->m_data += mac_hlen;
9557
9558 ip->ip_sum = in_cksum(m, ip_hlen);
9559
9560 m->m_pkthdr.csum_flags &= ~CSUM_IP;
9561 m->m_data -= mac_hlen;
9562 }
9563
9564 /*
9565 * Structure that contains the state during the TCP segmentation
9566 */
9567 struct gso_ip_tcp_state {
9568 void (*update)
9569 (struct gso_ip_tcp_state*, struct mbuf*);
9570 void (*internal)
9571 (struct gso_ip_tcp_state*, struct mbuf*);
9572 union iphdr hdr;
9573 struct tcphdr *tcp;
9574 int mac_hlen;
9575 int ip_hlen;
9576 int tcp_hlen;
9577 int hlen;
9578 int pay_len;
9579 int sw_csum;
9580 uint32_t tcp_seq;
9581 uint16_t ip_id;
9582 boolean_t is_tx;
9583 };
9584
9585 /*
9586 * Update the pointers to TCP and IPv4 headers
9587 */
9588 static inline void
9589 gso_ipv4_tcp_update(struct gso_ip_tcp_state *state, struct mbuf *m)
9590 {
9591 state->hdr.ip = (struct ip *)(void *)(mtod(m, uint8_t *) + state->mac_hlen);
9592 state->tcp = (struct tcphdr *)(void *)((caddr_t)(state->hdr.ip) + state->ip_hlen);
9593 state->pay_len = m->m_pkthdr.len - state->hlen;
9594 }
9595
9596 /*
9597 * Set properly the TCP and IPv4 headers
9598 */
9599 static inline void
9600 gso_ipv4_tcp_internal(struct gso_ip_tcp_state *state, struct mbuf *m)
9601 {
9602 /*
9603 * Update IP header
9604 */
9605 state->hdr.ip->ip_id = htons((state->ip_id)++);
9606 state->hdr.ip->ip_len = htons(m->m_pkthdr.len - state->mac_hlen);
9607 /*
9608 * TCP Checksum
9609 */
9610 state->tcp->th_sum = 0;
9611 state->tcp->th_sum = in_pseudo(state->hdr.ip->ip_src.s_addr,
9612 state->hdr.ip->ip_dst.s_addr,
9613 htons(state->tcp_hlen + IPPROTO_TCP + state->pay_len));
9614 /*
9615 * Checksum HW not supported (TCP)
9616 */
9617 if (state->sw_csum & CSUM_DELAY_DATA) {
9618 gso_ipv4_data_cksum(m, state->hdr.ip, state->mac_hlen);
9619 }
9620
9621 state->tcp_seq += state->pay_len;
9622 /*
9623 * IP Checksum
9624 */
9625 state->hdr.ip->ip_sum = 0;
9626 /*
9627 * Checksum HW not supported (IP)
9628 */
9629 if (state->sw_csum & CSUM_IP) {
9630 gso_ipv4_hdr_cksum(m, state->hdr.ip, state->mac_hlen, state->ip_hlen);
9631 }
9632 }
9633
9634
9635 /*
9636 * Updates the pointers to TCP and IPv6 headers
9637 */
9638 static inline void
9639 gso_ipv6_tcp_update(struct gso_ip_tcp_state *state, struct mbuf *m)
9640 {
9641 state->hdr.ip6 = (struct ip6_hdr *)(mtod(m, uint8_t *) + state->mac_hlen);
9642 state->tcp = (struct tcphdr *)(void *)((caddr_t)(state->hdr.ip6) + state->ip_hlen);
9643 state->pay_len = m->m_pkthdr.len - state->hlen;
9644 }
9645
9646 /*
9647 * Sets properly the TCP and IPv6 headers
9648 */
9649 static inline void
9650 gso_ipv6_tcp_internal(struct gso_ip_tcp_state *state, struct mbuf *m)
9651 {
9652 state->hdr.ip6->ip6_plen = htons(m->m_pkthdr.len -
9653 state->mac_hlen - state->ip_hlen);
9654 /*
9655 * TCP Checksum
9656 */
9657 state->tcp->th_sum = 0;
9658 state->tcp->th_sum = in6_pseudo(&state->hdr.ip6->ip6_src,
9659 &state->hdr.ip6->ip6_dst,
9660 htonl(state->tcp_hlen + state->pay_len + IPPROTO_TCP));
9661 /*
9662 * Checksum HW not supported (TCP)
9663 */
9664 if (state->sw_csum & CSUM_DELAY_IPV6_DATA) {
9665 (void)in6_finalize_cksum(m, state->mac_hlen, -1, -1, state->sw_csum);
9666 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA;
9667 }
9668 state->tcp_seq += state->pay_len;
9669 }
9670
9671 /*
9672 * Init the state during the TCP segmentation
9673 */
9674 static void
9675 gso_ip_tcp_init_state(struct gso_ip_tcp_state *state, struct ifnet *ifp,
9676 bool is_ipv4, int mac_hlen, int ip_hlen,
9677 void * ip_hdr, struct tcphdr * tcp_hdr)
9678 {
9679 #pragma unused(ifp)
9680
9681 state->hdr.ptr = ip_hdr;
9682 state->tcp = tcp_hdr;
9683 if (is_ipv4) {
9684 state->ip_id = ntohs(state->hdr.ip->ip_id);
9685 state->update = gso_ipv4_tcp_update;
9686 state->internal = gso_ipv4_tcp_internal;
9687 state->sw_csum = CSUM_DELAY_DATA | CSUM_IP; /* XXX */
9688 } else {
9689 state->update = gso_ipv6_tcp_update;
9690 state->internal = gso_ipv6_tcp_internal;
9691 state->sw_csum = CSUM_DELAY_IPV6_DATA; /* XXX */
9692 }
9693 state->mac_hlen = mac_hlen;
9694 state->ip_hlen = ip_hlen;
9695 state->tcp_hlen = state->tcp->th_off << 2;
9696 state->hlen = mac_hlen + ip_hlen + state->tcp_hlen;
9697 state->tcp_seq = ntohl(state->tcp->th_seq);
9698 //state->sw_csum = m->m_pkthdr.csum_flags & ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
9699 return;
9700 }
9701
9702 /*
9703 * GSO on TCP/IP (v4 or v6)
9704 *
9705 * If is_tx is TRUE, segmented packets are transmitted after they are
9706 * segmented.
9707 *
9708 * If is_tx is FALSE, the segmented packets are returned as a chain in *mp.
9709 */
9710 static int
9711 gso_ip_tcp(struct ifnet *ifp, struct mbuf **mp, struct gso_ip_tcp_state *state,
9712 boolean_t is_tx)
9713 {
9714 struct mbuf *m, *m_tx;
9715 int error = 0;
9716 int mss = 0;
9717 int nsegs = 0;
9718 struct mbuf *m0 = *mp;
9719 #ifdef GSO_STATS
9720 int total_len = m0->m_pkthdr.len;
9721 #endif /* GSO_STATS */
9722
9723 #if 1
9724 u_int reduce_mss;
9725
9726 reduce_mss = is_tx ? if_bridge_tso_reduce_mss_tx
9727 : if_bridge_tso_reduce_mss_forwarding;
9728 mss = ifp->if_mtu - state->ip_hlen - state->tcp_hlen - reduce_mss;
9729 assert(mss > 0);
9730 #else
9731 if (m0->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) {/* TSO with GSO */
9732 mss = ifp->if_hw_tsomax - state->ip_hlen - state->tcp_hlen;
9733 } else {
9734 mss = m0->m_pkthdr.tso_segsz;
9735 }
9736 #endif
9737
9738 *mp = m0 = m_seg(m0, state->hlen, mss, &nsegs, 0, 0);
9739 if (m0 == NULL) {
9740 return ENOBUFS; /* XXX ok? */
9741 }
9742 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
9743 "%s %s mss %d nsegs %d",
9744 ifp->if_xname,
9745 is_tx ? "TX" : "RX",
9746 mss, nsegs);
9747 /*
9748 * XXX-ste: can this happen?
9749 */
9750 if (m0->m_nextpkt == NULL) {
9751 #ifdef GSO_DEBUG
9752 D("only 1 segment");
9753 #endif
9754 if (is_tx) {
9755 error = bridge_transmit(ifp, m0);
9756 }
9757 return error;
9758 }
9759 #ifdef GSO_STATS
9760 GSOSTAT_SET_MAX(tcp.gsos_max_mss, mss);
9761 GSOSTAT_SET_MIN(tcp.gsos_min_mss, mss);
9762 GSOSTAT_ADD(tcp.gsos_osegments, nsegs);
9763 #endif /* GSO_STATS */
9764
9765 /* first pkt */
9766 m = m0;
9767
9768 state->update(state, m);
9769
9770 do {
9771 state->tcp->th_flags &= ~(TH_FIN | TH_PUSH);
9772
9773 state->internal(state, m);
9774 m_tx = m;
9775 m = m->m_nextpkt;
9776 if (is_tx) {
9777 m_tx->m_nextpkt = NULL;
9778 if ((error = bridge_transmit(ifp, m_tx)) != 0) {
9779 /*
9780 * XXX: If a segment can not be sent, discard the following
9781 * segments and propagate the error to the upper levels.
9782 * In this way the TCP retransmits all the initial packet.
9783 */
9784 #ifdef GSO_DEBUG
9785 D("if_transmit error\n");
9786 #endif
9787 goto err;
9788 }
9789 }
9790 state->update(state, m);
9791
9792 state->tcp->th_flags &= ~TH_CWR;
9793 state->tcp->th_seq = htonl(state->tcp_seq);
9794 } while (m->m_nextpkt);
9795
9796 /* last pkt */
9797 state->internal(state, m);
9798
9799 if (is_tx) {
9800 error = bridge_transmit(ifp, m);
9801 #ifdef GSO_DEBUG
9802 if (error) {
9803 D("last if_transmit error\n");
9804 D("error - type = %d \n", error);
9805 }
9806 #endif
9807 }
9808 #ifdef GSO_STATS
9809 if (!error) {
9810 GSOSTAT_INC(tcp.gsos_segmented);
9811 GSOSTAT_SET_MAX(tcp.gsos_maxsegmented, total_len);
9812 GSOSTAT_SET_MIN(tcp.gsos_minsegmented, total_len);
9813 GSOSTAT_ADD(tcp.gsos_totalbyteseg, total_len);
9814 }
9815 #endif /* GSO_STATS */
9816 return error;
9817
9818 err:
9819 #ifdef GSO_DEBUG
9820 D("error - type = %d \n", error);
9821 #endif
9822 while (m != NULL) {
9823 m_tx = m->m_nextpkt;
9824 m->m_nextpkt = NULL;
9825 m_freem(m);
9826 m = m_tx;
9827 }
9828 return error;
9829 }
9830
9831 /*
9832 * GSO for TCP/IPv[46]
9833 */
9834 static int
9835 gso_tcp(struct ifnet *ifp, struct mbuf **mp, u_int mac_hlen, bool is_ipv4,
9836 boolean_t is_tx)
9837 {
9838 int error;
9839 ip_packet_info info;
9840 uint32_t csum_flags;
9841 struct gso_ip_tcp_state state;
9842 struct bripstats stats; /* XXX ignored */
9843 struct tcphdr *tcp;
9844
9845 if (!is_tx && ipforwarding == 0) {
9846 /* no need to segment if the packet will not be forwarded */
9847 return 0;
9848 }
9849 error = bridge_get_tcp_header(mp, mac_hlen, is_ipv4, &info, &stats);
9850 if (error != 0) {
9851 if (*mp != NULL) {
9852 m_freem(*mp);
9853 *mp = NULL;
9854 }
9855 return error;
9856 }
9857 if (info.ip_proto_hdr == NULL) {
9858 /* not a TCP packet */
9859 return 0;
9860 }
9861 tcp = (struct tcphdr *)(void *)info.ip_proto_hdr;
9862 gso_ip_tcp_init_state(&state, ifp, is_ipv4, mac_hlen,
9863 info.ip_hlen, info.ip_hdr.ptr, tcp);
9864 if (is_ipv4) {
9865 csum_flags = CSUM_DELAY_DATA; /* XXX */
9866 if (!is_tx) {
9867 /* if RX to our local IP address, don't segment */
9868 struct in_addr dst_ip;
9869
9870 bcopy(&state.hdr.ip->ip_dst, &dst_ip, sizeof(dst_ip));
9871 if (in_addr_is_ours(dst_ip)) {
9872 return 0;
9873 }
9874 }
9875 } else {
9876 csum_flags = CSUM_DELAY_IPV6_DATA; /* XXX */
9877 if (!is_tx) {
9878 /* if RX to our local IP address, don't segment */
9879 if (in6_addr_is_ours(&state.hdr.ip6->ip6_dst,
9880 ifp->if_index)) {
9881 /* local IP address, no need to segment */
9882 return 0;
9883 }
9884 }
9885 }
9886 (*mp)->m_pkthdr.csum_flags = csum_flags;
9887 (*mp)->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
9888 return gso_ip_tcp(ifp, mp, &state, is_tx);
9889 }
9890