1 /*
2 * Copyright (c) 2004-2025 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /* $NetBSD: if_bridge.c,v 1.31 2005/06/01 19:45:34 jdc Exp $ */
30 /*
31 * Copyright 2001 Wasabi Systems, Inc.
32 * All rights reserved.
33 *
34 * Written by Jason R. Thorpe for Wasabi Systems, Inc.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed for the NetBSD Project by
47 * Wasabi Systems, Inc.
48 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
49 * or promote products derived from this software without specific prior
50 * written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
54 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
55 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
56 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
57 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
58 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
59 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
60 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
61 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
62 * POSSIBILITY OF SUCH DAMAGE.
63 */
64
65 /*
66 * Copyright (c) 1999, 2000 Jason L. Wright ([email protected])
67 * All rights reserved.
68 *
69 * Redistribution and use in source and binary forms, with or without
70 * modification, are permitted provided that the following conditions
71 * are met:
72 * 1. Redistributions of source code must retain the above copyright
73 * notice, this list of conditions and the following disclaimer.
74 * 2. Redistributions in binary form must reproduce the above copyright
75 * notice, this list of conditions and the following disclaimer in the
76 * documentation and/or other materials provided with the distribution.
77 *
78 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
79 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
80 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
81 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
82 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
83 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
84 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
86 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
87 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
88 * POSSIBILITY OF SUCH DAMAGE.
89 *
90 * OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp
91 */
92
93 /*
94 * Network interface bridge support.
95 *
96 * TODO:
97 *
98 * - Currently only supports Ethernet-like interfaces (Ethernet,
99 * 802.11, VLANs on Ethernet, etc.) Figure out a nice way
100 * to bridge other types of interfaces (FDDI-FDDI, and maybe
101 * consider heterogenous bridges).
102 *
103 * - GIF isn't handled due to the lack of IPPROTO_ETHERIP support.
104 */
105
106 #include <sys/cdefs.h>
107
108 #include <sys/param.h>
109 #include <sys/mbuf.h>
110 #include <sys/malloc.h>
111 #include <sys/protosw.h>
112 #include <sys/systm.h>
113 #include <sys/time.h>
114 #include <sys/socket.h> /* for net/if.h */
115 #include <sys/sockio.h>
116 #include <sys/kernel.h>
117 #include <sys/random.h>
118 #include <sys/syslog.h>
119 #include <sys/sysctl.h>
120 #include <sys/proc.h>
121 #include <sys/lock.h>
122 #include <sys/mcache.h>
123
124 #include <sys/kauth.h>
125
126 #include <kern/thread_call.h>
127
128 #include <libkern/libkern.h>
129
130 #include <kern/uipc_domain.h>
131 #include <kern/zalloc.h>
132
133 #if NBPFILTER > 0
134 #include <net/bpf.h>
135 #endif
136 #include <net/if.h>
137 #include <net/if_dl.h>
138 #include <net/if_types.h>
139 #include <net/if_var.h>
140 #include <net/if_media.h>
141 #include <net/net_api_stats.h>
142
143 #include <netinet/in.h> /* for struct arpcom */
144 #include <netinet/tcp.h> /* for struct tcphdr */
145 #include <netinet/in_systm.h>
146 #include <netinet/in_var.h>
147 #define _IP_VHL
148 #include <netinet/ip.h>
149 #include <netinet/ip_var.h>
150 #include <netinet/ip6.h>
151 #include <netinet6/ip6_var.h>
152 #include <netinet/if_ether.h> /* for struct arpcom */
153 #include <net/bridgestp.h>
154 #include <net/if_bridgevar.h>
155 #include <net/if_llc.h>
156 #if NVLAN > 0
157 #include <net/if_vlan_var.h>
158 #endif /* NVLAN > 0 */
159
160 #include <net/if_ether.h>
161 #include <net/dlil.h>
162 #include <net/kpi_interfacefilter.h>
163 #include <net/pfvar.h>
164
165 #include <net/route.h>
166 #include <net/droptap.h>
167 #include <dev/random/randomdev.h>
168
169 #include <netinet/bootp.h>
170 #include <netinet/dhcp.h>
171
172 #if SKYWALK
173 #include <skywalk/nexus/netif/nx_netif.h>
174 #endif /* SKYWALK */
175
176 #include <net/sockaddr_utils.h>
177 #include <net/mblist.h>
178
179 #include <os/log.h>
180
181 #define _TSO_CSUM (CSUM_TSO_IPV4 | CSUM_TSO_IPV6)
182
183 static struct in_addr inaddr_any = { .s_addr = INADDR_ANY };
184
185
186 #define __M_FLAGS_ARE_SET(m, flags) (((m)->m_flags & (flags)) != 0)
187 #define IS_BCAST(m) __M_FLAGS_ARE_SET(m, M_BCAST)
188 #define IS_MCAST(m) __M_FLAGS_ARE_SET(m, M_MCAST)
189 #define IS_BCAST_MCAST(m) __M_FLAGS_ARE_SET(m, M_BCAST | M_MCAST)
190
191 #define HTONS_ETHERTYPE_ARP htons(ETHERTYPE_ARP)
192 #define HTONS_ETHERTYPE_IP htons(ETHERTYPE_IP)
193 #define HTONS_ETHERTYPE_IPV6 htons(ETHERTYPE_IPV6)
194 #define HTONS_ARPHRD_ETHER htons(ARPHRD_ETHER)
195 #define HTONS_ARPOP_REQUEST htons(ARPOP_REQUEST)
196 #define HTONS_ARPOP_REPLY htons(ARPOP_REPLY)
197 #define HTONS_IPPORT_BOOTPC htons(IPPORT_BOOTPC)
198 #define HTONS_IPPORT_BOOTPS htons(IPPORT_BOOTPS)
199 #define HTONS_DHCP_FLAGS_BROADCAST htons(DHCP_FLAGS_BROADCAST)
200
201 /*
202 * if_bridge_debug, BR_DBGF_*
203 * - 'if_bridge_debug' is a bitmask of BR_DBGF_* flags that can be set
204 * to enable additional logs for the corresponding bridge function
205 * - "sysctl net.link.bridge.debug" controls the value of
206 * 'if_bridge_debug'
207 */
208 static uint32_t if_bridge_debug = 0;
209 #define BR_DBGF_LIFECYCLE 0x0001
210 #define BR_DBGF_INPUT 0x0002
211 #define BR_DBGF_OUTPUT 0x0004
212 #define BR_DBGF_RT_TABLE 0x0008
213 #define BR_DBGF_DELAYED_CALL 0x0010
214 #define BR_DBGF_IOCTL 0x0020
215 #define BR_DBGF_MBUF 0x0040
216 #define BR_DBGF_MCAST 0x0080
217 #define BR_DBGF_HOSTFILTER 0x0100
218 #define BR_DBGF_CHECKSUM 0x0200
219 #define BR_DBGF_MAC_NAT 0x0400
220 #define BR_DBGF_INPUT_LIST 0x0800
221
222 /*
223 * if_bridge_log_level
224 * - 'if_bridge_log_level' ensures that by default important logs are
225 * logged regardless of if_bridge_debug by comparing the log level
226 * in BRIDGE_LOG to if_bridge_log_level
227 * - use "sysctl net.link.bridge.log_level" controls the value of
228 * 'if_bridge_log_level'
229 * - the default value of 'if_bridge_log_level' is LOG_NOTICE; important
230 * logs must use LOG_NOTICE to ensure they appear by default
231 */
232 static int if_bridge_log_level = LOG_NOTICE;
233
234 #define BRIDGE_DBGF_ENABLED(__flag) ((if_bridge_debug & __flag) != 0)
235
236 /*
237 * BRIDGE_LOG, BRIDGE_LOG_SIMPLE
238 * - macros to generate the specified log conditionally based on
239 * the specified log level and debug flags
240 * - BRIDGE_LOG_SIMPLE does not include the function name in the log
241 */
242 #define BRIDGE_LOG(__level, __dbgf, __string, ...) \
243 do { \
244 if (__level <= if_bridge_log_level || \
245 BRIDGE_DBGF_ENABLED(__dbgf)) { \
246 os_log(OS_LOG_DEFAULT, "%s: " __string, \
247 __func__, ## __VA_ARGS__); \
248 } \
249 } while (0)
250 #define BRIDGE_LOG_SIMPLE(__level, __dbgf, __string, ...) \
251 do { \
252 if (__level <= if_bridge_log_level || \
253 BRIDGE_DBGF_ENABLED(__dbgf)) { \
254 os_log(OS_LOG_DEFAULT, __string, ## __VA_ARGS__); \
255 } \
256 } while (0)
257
258 #define _BRIDGE_LOCK(_sc) lck_mtx_lock(&(_sc)->sc_mtx)
259 #define _BRIDGE_UNLOCK(_sc) lck_mtx_unlock(&(_sc)->sc_mtx)
260 #define BRIDGE_LOCK_ASSERT_HELD(_sc) \
261 LCK_MTX_ASSERT(&(_sc)->sc_mtx, LCK_MTX_ASSERT_OWNED)
262 #define BRIDGE_LOCK_ASSERT_NOTHELD(_sc) \
263 LCK_MTX_ASSERT(&(_sc)->sc_mtx, LCK_MTX_ASSERT_NOTOWNED)
264
265 #define BRIDGE_LOCK_DEBUG 1
266 #if BRIDGE_LOCK_DEBUG
267
268 #define BR_LCKDBG_MAX 4
269
270 #define BRIDGE_LOCK(_sc) bridge_lock(_sc)
271 #define BRIDGE_UNLOCK(_sc) bridge_unlock(_sc)
272 #define BRIDGE_LOCK2REF(_sc, _err) _err = bridge_lock2ref(_sc)
273 #define BRIDGE_UNREF(_sc) bridge_unref(_sc)
274 #define BRIDGE_XLOCK(_sc) bridge_xlock(_sc)
275 #define BRIDGE_XDROP(_sc) bridge_xdrop(_sc)
276
277 #else /* !BRIDGE_LOCK_DEBUG */
278
279 #define BRIDGE_LOCK(_sc) _BRIDGE_LOCK(_sc)
280 #define BRIDGE_UNLOCK(_sc) _BRIDGE_UNLOCK(_sc)
281 #define BRIDGE_LOCK2REF(_sc, _err) do { \
282 BRIDGE_LOCK_ASSERT_HELD(_sc); \
283 if ((_sc)->sc_iflist_xcnt > 0) \
284 (_err) = EBUSY; \
285 else { \
286 (_sc)->sc_iflist_ref++; \
287 (_err) = 0; \
288 } \
289 _BRIDGE_UNLOCK(_sc); \
290 } while (0)
291 #define BRIDGE_UNREF(_sc) do { \
292 _BRIDGE_LOCK(_sc); \
293 (_sc)->sc_iflist_ref--; \
294 if (((_sc)->sc_iflist_xcnt > 0) && ((_sc)->sc_iflist_ref == 0)) { \
295 _BRIDGE_UNLOCK(_sc); \
296 wakeup(&(_sc)->sc_cv); \
297 } else \
298 _BRIDGE_UNLOCK(_sc); \
299 } while (0)
300 #define BRIDGE_XLOCK(_sc) do { \
301 BRIDGE_LOCK_ASSERT_HELD(_sc); \
302 (_sc)->sc_iflist_xcnt++; \
303 while ((_sc)->sc_iflist_ref > 0) \
304 msleep(&(_sc)->sc_cv, &(_sc)->sc_mtx, PZERO, \
305 "BRIDGE_XLOCK", NULL); \
306 } while (0)
307 #define BRIDGE_XDROP(_sc) do { \
308 BRIDGE_LOCK_ASSERT_HELD(_sc); \
309 (_sc)->sc_iflist_xcnt--; \
310 } while (0)
311
312 #endif /* BRIDGE_LOCK_DEBUG */
313
314 #define BRIDGE_BPF_TAP_IN(ifp, m) \
315 do { \
316 if (ifp->if_bpf != NULL) { \
317 bpf_tap_in(ifp, DLT_EN10MB, m, NULL, 0); \
318 } \
319 } while(0)
320
321 #define BRIDGE_BPF_TAP_OUT(ifp, m) \
322 do { \
323 if (ifp->if_bpf != NULL) { \
324 bpf_tap_out(ifp, DLT_EN10MB, m, NULL, 0); \
325 } \
326 } while(0)
327
328
329 /*
330 * Initial size of the route hash table. Must be a power of two.
331 */
332 #ifndef BRIDGE_RTHASH_SIZE
333 #define BRIDGE_RTHASH_SIZE 16
334 #endif
335
336 /*
337 * Maximum size of the routing hash table
338 */
339 #define BRIDGE_RTHASH_SIZE_MAX 2048
340
341 #define BRIDGE_RTHASH_MASK(sc) ((sc)->sc_rthash_size - 1)
342
343 /*
344 * Maximum number of addresses to cache.
345 */
346 #ifndef BRIDGE_RTABLE_MAX
347 #define BRIDGE_RTABLE_MAX 100
348 #endif
349
350 /*
351 * Timeout (in seconds) for entries learned dynamically.
352 */
353 #ifndef BRIDGE_RTABLE_TIMEOUT
354 #define BRIDGE_RTABLE_TIMEOUT (20 * 60) /* same as ARP */
355 #endif
356
357 /*
358 * Number of seconds between walks of the route list.
359 */
360 #ifndef BRIDGE_RTABLE_PRUNE_PERIOD
361 #define BRIDGE_RTABLE_PRUNE_PERIOD (5 * 60)
362 #endif
363
364 /*
365 * Number of MAC NAT entries
366 * - sized based on 16 clients (including MAC NAT interface)
367 * each with 4 addresses
368 */
369 #ifndef BRIDGE_MAC_NAT_ENTRY_MAX
370 #define BRIDGE_MAC_NAT_ENTRY_MAX 64
371 #endif /* BRIDGE_MAC_NAT_ENTRY_MAX */
372
373 /*
374 * List of capabilities to possibly mask on the member interface.
375 */
376 #define BRIDGE_IFCAPS_MASK (IFCAP_TSO | IFCAP_TXCSUM)
377 /*
378 * List of capabilities to disable on the member interface.
379 */
380 #define BRIDGE_IFCAPS_STRIP IFCAP_LRO
381
382 /*
383 * Bridge interface list entry.
384 */
385 struct bridge_iflist {
386 TAILQ_ENTRY(bridge_iflist) bif_next;
387 struct ifnet *bif_ifp; /* member if */
388 struct bstp_port bif_stp; /* STP state */
389 uint32_t bif_ifflags; /* member if flags */
390 int bif_savedcaps; /* saved capabilities */
391 uint32_t bif_addrmax; /* max # of addresses */
392 uint32_t bif_addrcnt; /* cur. # of addresses */
393 uint32_t bif_addrexceeded; /* # of address violations */
394
395 interface_filter_t bif_iff_ref;
396 struct bridge_softc *bif_sc;
397 uint32_t bif_flags;
398
399 /* host filter */
400 struct in_addr bif_hf_ipsrc;
401 uint8_t bif_hf_hwsrc[ETHER_ADDR_LEN];
402
403 struct ifbrmstats bif_stats;
404 };
405
406 static inline bool
bif_ifflags_are_set(struct bridge_iflist * bif,uint32_t flags)407 bif_ifflags_are_set(struct bridge_iflist * bif, uint32_t flags)
408 {
409 return (bif->bif_ifflags & flags) != 0;
410 }
411
412 static inline bool
bif_has_checksum_offload(struct bridge_iflist * bif)413 bif_has_checksum_offload(struct bridge_iflist * bif)
414 {
415 return bif_ifflags_are_set(bif, IFBIF_CHECKSUM_OFFLOAD);
416 }
417
418 static inline bool
bif_has_mac_nat(struct bridge_iflist * bif)419 bif_has_mac_nat(struct bridge_iflist * bif)
420 {
421 return bif_ifflags_are_set(bif, IFBIF_MAC_NAT);
422 }
423
424 static inline bool
bif_uses_virtio(struct bridge_iflist * bif)425 bif_uses_virtio(struct bridge_iflist * bif)
426 {
427 return bif_ifflags_are_set(bif, IFBIF_USES_VIRTIO);
428 }
429
430 /* fake errors to make the code clearer */
431 #define _EBADIP EJUSTRETURN
432 #define _EBADIPCHECKSUM EJUSTRETURN
433 #define _EBADIPV6 EJUSTRETURN
434 #define _EBADUDP EJUSTRETURN
435 #define _EBADTCP EJUSTRETURN
436 #define _EBADUDPCHECKSUM EJUSTRETURN
437 #define _EBADTCPCHECKSUM EJUSTRETURN
438
439 #define BIFF_PROMISC 0x01 /* promiscuous mode set */
440 #define BIFF_PROTO_ATTACHED 0x02 /* protocol attached */
441 #define BIFF_FILTER_ATTACHED 0x04 /* interface filter attached */
442 #define BIFF_MEDIA_ACTIVE 0x08 /* interface media active */
443 #define BIFF_HOST_FILTER 0x10 /* host filter enabled */
444 #define BIFF_HF_HWSRC 0x20 /* host filter source MAC is set */
445 #define BIFF_HF_IPSRC 0x40 /* host filter source IP is set */
446 #define BIFF_INPUT_BROADCAST 0x80 /* send broadcast packets in */
447 #define BIFF_IN_MEMBER_LIST 0x100 /* added to the member list */
448 #define BIFF_WIFI_INFRA 0x200 /* interface is Wi-Fi infra */
449 #define BIFF_ALL_MULTI 0x400 /* allmulti set */
450 #define BIFF_LRO_DISABLED 0x800 /* LRO was disabled */
451 #if SKYWALK
452 #define BIFF_FLOWSWITCH_ATTACHED 0x1000 /* we attached the flowswitch */
453 #define BIFF_NETAGENT_REMOVED 0x2000 /* we removed the netagent */
454 #endif /* SKYWALK */
455
456 /*
457 * mac_nat_entry
458 * - translates between an IP address and MAC address on a specific
459 * bridge interface member
460 */
461 struct mac_nat_entry {
462 LIST_ENTRY(mac_nat_entry) mne_list; /* list linkage */
463 struct bridge_iflist *mne_bif; /* originating interface */
464 unsigned long mne_expire; /* expiration time */
465 union {
466 struct in_addr mneu_ip; /* originating IPv4 address */
467 struct in6_addr mneu_ip6; /* originating IPv6 address */
468 } mne_u;
469 uint8_t mne_mac[ETHER_ADDR_LEN];
470 uint8_t mne_flags;
471 uint8_t mne_reserved;
472 };
473 #define mne_ip mne_u.mneu_ip
474 #define mne_ip6 mne_u.mneu_ip6
475
476 #define MNE_FLAGS_IPV6 0x01 /* IPv6 address */
477
478 LIST_HEAD(mac_nat_entry_list, mac_nat_entry);
479
480 /*
481 * mac_nat_record
482 * - used by bridge_mac_nat_output() to convey the translation that needs
483 * to take place in bridge_mac_nat_translate
484 * - holds enough information so that the translation can be done later
485 * when the destination interface is the MAC-NAT interface
486 */
487 struct mac_nat_record {
488 uint16_t mnr_ether_type;
489 union {
490 uint16_t mnru_arp_offset;
491 struct {
492 uint16_t mnruip_dhcp_flags;
493 uint16_t mnruip_udp_csum;
494 uint8_t mnruip_header_len;
495 } mnru_ip;
496 struct {
497 uint16_t mnruip6_icmp6_len;
498 uint16_t mnruip6_lladdr_offset;
499 uint8_t mnruip6_icmp6_type;
500 uint8_t mnruip6_header_len;
501 } mnru_ip6;
502 } mnr_u;
503 };
504
505 #define mnr_arp_offset mnr_u.mnru_arp_offset
506
507 #define mnr_ip_header_len mnr_u.mnru_ip.mnruip_header_len
508 #define mnr_ip_dhcp_flags mnr_u.mnru_ip.mnruip_dhcp_flags
509 #define mnr_ip_udp_csum mnr_u.mnru_ip.mnruip_udp_csum
510
511 #define mnr_ip6_icmp6_len mnr_u.mnru_ip6.mnruip6_icmp6_len
512 #define mnr_ip6_icmp6_type mnr_u.mnru_ip6.mnruip6_icmp6_type
513 #define mnr_ip6_header_len mnr_u.mnru_ip6.mnruip6_header_len
514 #define mnr_ip6_lladdr_offset mnr_u.mnru_ip6.mnruip6_lladdr_offset
515
516 /*
517 * Bridge route node.
518 */
519 struct bridge_rtnode {
520 LIST_ENTRY(bridge_rtnode) brt_hash; /* hash table linkage */
521 LIST_ENTRY(bridge_rtnode) brt_list; /* list linkage */
522 struct bridge_iflist *brt_dst; /* destination if */
523 unsigned long brt_expire; /* expiration time */
524 uint8_t brt_flags; /* address flags */
525 uint8_t brt_addr[ETHER_ADDR_LEN];
526 uint16_t brt_vlan; /* vlan id */
527 };
528
529 #define brt_ifp brt_dst->bif_ifp
530
531 /*
532 * Bridge delayed function call context
533 */
534 typedef void (*bridge_delayed_func_t)(struct bridge_softc *);
535
536 struct bridge_delayed_call {
537 struct bridge_softc *bdc_sc;
538 bridge_delayed_func_t bdc_func; /* Function to call */
539 struct timespec bdc_ts; /* Time to call */
540 u_int32_t bdc_flags;
541 thread_call_t bdc_thread_call;
542 };
543
544 #define BDCF_OUTSTANDING 0x01 /* Delayed call has been scheduled */
545 #define BDCF_CANCELLING 0x02 /* May be waiting for call completion */
546
547 /*
548 * Software state for each bridge.
549 */
550 LIST_HEAD(_bridge_rtnode_list, bridge_rtnode);
551
552 struct bridge_softc {
553 struct ifnet *sc_ifp; /* make this an interface */
554 uint32_t sc_flags;
555 LIST_ENTRY(bridge_softc) sc_list;
556 decl_lck_mtx_data(, sc_mtx);
557 struct _bridge_rtnode_list * __counted_by(sc_rthash_size) sc_rthash; /* our forwarding table */
558 struct _bridge_rtnode_list sc_rtlist; /* list version of above */
559 uint32_t sc_rthash_key; /* key for hash */
560 uint32_t sc_rthash_size; /* size of the hash table */
561 struct bridge_delayed_call sc_aging_timer;
562 struct bridge_delayed_call sc_resize_call;
563 TAILQ_HEAD(, bridge_iflist) sc_spanlist; /* span ports list */
564 struct bstp_state sc_stp; /* STP state */
565 void *sc_cv;
566 uint32_t sc_brtmax; /* max # of addresses */
567 uint32_t sc_brtcnt; /* cur. # of addresses */
568 uint32_t sc_brttimeout; /* rt timeout in seconds */
569 uint32_t sc_iflist_ref; /* refcount for sc_iflist */
570 uint32_t sc_iflist_xcnt; /* refcount for sc_iflist */
571 TAILQ_HEAD(, bridge_iflist) sc_iflist; /* member interface list */
572 uint32_t sc_brtexceeded; /* # of cache drops */
573 uint32_t sc_filter_flags; /* ipf and flags */
574 struct ifnet *sc_ifaddr; /* member mac copied from */
575 u_char sc_defaddr[6]; /* Default MAC address */
576 char sc_if_xname[IFNAMSIZ];
577
578 struct bridge_iflist *sc_mac_nat_bif; /* single MAC NAT interface */
579 struct mac_nat_entry_list sc_mne_list; /* MAC NAT IPv4 */
580 struct mac_nat_entry_list sc_mne_list_v6;/* MAC NAT IPv6 */
581 uint32_t sc_mne_max; /* max # of entries */
582 uint32_t sc_mne_count; /* cur. # of entries */
583 uint32_t sc_mne_allocation_failures;
584 #if BRIDGE_LOCK_DEBUG
585 /*
586 * Locking and unlocking calling history
587 */
588 void *lock_lr[BR_LCKDBG_MAX];
589 int next_lock_lr;
590 void *unlock_lr[BR_LCKDBG_MAX];
591 int next_unlock_lr;
592 #endif /* BRIDGE_LOCK_DEBUG */
593 };
594
595 #define SCF_DETACHING 0x01
596 #define SCF_RESIZING 0x02
597 #define SCF_MEDIA_ACTIVE 0x04
598 #define SCF_PROTO_ATTACHED 0x08
599
600 typedef enum {
601 CHECKSUM_OPERATION_NONE = 0,
602 CHECKSUM_OPERATION_CLEAR_OFFLOAD = 1,
603 CHECKSUM_OPERATION_FINALIZE = 2,
604 CHECKSUM_OPERATION_COMPUTE = 3,
605 } ChecksumOperation;
606
607 typedef struct {
608 u_int ip_hlen; /* IP header length */
609 u_int ip_pay_len; /* length of payload (exclusive of ip_hlen) */
610 u_int ip_m0_len; /* bytes available at ip_hdr (without jumping mbufs) */
611 u_int ip_opt_len; /* IPv6 options headers length */
612 uint8_t ip_proto; /* IPPROTO_TCP, IPPROTO_UDP, etc. */
613 bool ip_is_ipv4;
614 bool ip_is_fragmented;
615 uint8_t *__sized_by(ip_m0_len) ip_hdr; /* pointer to IP header */
616 uint8_t *__indexable ip_proto_hdr; /* ptr to protocol header (TCP) */
617 } ip_packet_info, *ip_packet_info_t;
618
619 struct bridge_hostfilter_stats bridge_hostfilter_stats;
620
621 typedef uint8_t ether_type_flag_t;
622
623 typedef enum {
624 pkt_direction_RX,
625 pkt_direction_TX
626 } pkt_direction_t;
627
628 static LCK_GRP_DECLARE(bridge_lock_grp, "if_bridge");
629 #if BRIDGE_LOCK_DEBUG
630 static LCK_ATTR_DECLARE(bridge_lock_attr, 0, 0);
631 #else
632 static LCK_ATTR_DECLARE(bridge_lock_attr, LCK_ATTR_DEBUG, 0);
633 #endif
634 static LCK_MTX_DECLARE_ATTR(bridge_list_mtx, &bridge_lock_grp, &bridge_lock_attr);
635
636 static int bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
637
638 static KALLOC_TYPE_DEFINE(bridge_rtnode_pool, struct bridge_rtnode, NET_KT_DEFAULT);
639 static KALLOC_TYPE_DEFINE(bridge_mne_pool, struct mac_nat_entry, NET_KT_DEFAULT);
640
641 static int bridge_clone_create(struct if_clone *, uint32_t, void *);
642 static int bridge_clone_destroy(struct ifnet *);
643
644 static errno_t bridge_ioctl(struct ifnet *, u_long cmd, void *__sized_by(IOCPARM_LEN(cmd)));
645 #if HAS_IF_CAP
646 static void bridge_mutecaps(struct bridge_softc *);
647 static void bridge_set_ifcap(struct bridge_softc *, struct bridge_iflist *,
648 int);
649 #endif
650 static errno_t bridge_set_tso(struct bridge_softc *);
651 static void bridge_proto_attach_changed(struct ifnet *);
652 static int bridge_init(struct ifnet *);
653 static void bridge_ifstop(struct ifnet *, int);
654 static int bridge_output(struct ifnet *, struct mbuf *);
655 static void bridge_finalize_cksum(struct ifnet *, struct mbuf *);
656 static void bridge_start(struct ifnet *);
657 static mblist bridge_input_list(struct bridge_softc *, ifnet_t,
658 struct ether_header *, mblist, bool);
659 static errno_t bridge_iff_input(void *, ifnet_t, protocol_family_t,
660 mbuf_t *, char **);
661 static errno_t bridge_iff_output(void *, ifnet_t, protocol_family_t,
662 mbuf_t *);
663 static errno_t bridge_member_output(struct bridge_softc *sc, ifnet_t ifp,
664 mbuf_t *m);
665 static int bridge_enqueue(ifnet_t, ifnet_t, ifnet_t,
666 ether_type_flag_t, mbuf_t, ChecksumOperation, pkt_direction_t);
667 static mbuf_t bridge_checksum_offload_list(ifnet_t, struct bridge_iflist *,
668 mbuf_t, bool);
669 static mbuf_t bridge_filter_checksum(ifnet_t, struct bridge_iflist * bif,
670 mbuf_t m, bool, bool, bool);
671 static void bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int);
672
673 static void bridge_aging_timer(struct bridge_softc *sc);
674
675 static void bridge_broadcast(struct bridge_softc *, struct bridge_iflist *,
676 ether_type_flag_t, mbuf_t);
677 static void bridge_broadcast_list(struct bridge_softc *,
678 struct bridge_iflist *, ether_type_flag_t, mbuf_t, pkt_direction_t);
679
680 static void bridge_span(struct bridge_softc *, ether_type_flag_t, struct mbuf *);
681
682 static int bridge_rtupdate(struct bridge_softc *, const uint8_t[ETHER_ADDR_LEN],
683 uint16_t, struct bridge_iflist *, int, uint8_t);
684 static struct bridge_iflist * bridge_rtlookup_bif(struct bridge_softc *,
685 const uint8_t[ETHER_ADDR_LEN], uint16_t);
686 static void bridge_rttrim(struct bridge_softc *);
687 static void bridge_rtage(struct bridge_softc *);
688 static void bridge_rtflush(struct bridge_softc *, int);
689 static int bridge_rtdaddr(struct bridge_softc *, const uint8_t[ETHER_ADDR_LEN],
690 uint16_t);
691
692 static int bridge_rtable_init(struct bridge_softc *);
693 static void bridge_rtable_fini(struct bridge_softc *);
694
695 static void bridge_rthash_resize(struct bridge_softc *);
696
697 static int bridge_rtnode_addr_cmp(const uint8_t[ETHER_ADDR_LEN], const uint8_t[ETHER_ADDR_LEN]);
698 static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
699 const uint8_t[ETHER_ADDR_LEN], uint16_t);
700 static int bridge_rtnode_hash(struct bridge_softc *,
701 struct bridge_rtnode *);
702 static int bridge_rtnode_insert(struct bridge_softc *,
703 struct bridge_rtnode *);
704 static void bridge_rtnode_destroy(struct bridge_softc *,
705 struct bridge_rtnode *);
706 #if BRIDGESTP
707 static void bridge_rtable_expire(struct ifnet *, int);
708 static void bridge_state_change(struct ifnet *, int);
709 #endif /* BRIDGESTP */
710
711 static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
712 char * __sized_by(IFNAMSIZ) name);
713 static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
714 struct ifnet *ifp);
715 static void bridge_delete_member(struct bridge_softc *,
716 struct bridge_iflist *);
717 static void bridge_delete_span(struct bridge_softc *,
718 struct bridge_iflist *);
719
720 static int bridge_ioctl_add(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
721 static int bridge_ioctl_del(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
722 static int bridge_ioctl_gifflags(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
723 static int bridge_ioctl_sifflags(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
724 static int bridge_ioctl_scache(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
725 static int bridge_ioctl_gcache(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
726 static int bridge_ioctl_gifs32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
727 static int bridge_ioctl_gifs64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
728 static int bridge_ioctl_rts32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
729 static int bridge_ioctl_rts64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
730 static int bridge_ioctl_saddr32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
731 static int bridge_ioctl_saddr64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
732 static int bridge_ioctl_sto(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
733 static int bridge_ioctl_gto(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
734 static int bridge_ioctl_daddr32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
735 static int bridge_ioctl_daddr64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
736 static int bridge_ioctl_flush(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
737 static int bridge_ioctl_gpri(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
738 static int bridge_ioctl_spri(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
739 static int bridge_ioctl_ght(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
740 static int bridge_ioctl_sht(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
741 static int bridge_ioctl_gfd(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
742 static int bridge_ioctl_sfd(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
743 static int bridge_ioctl_gma(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
744 static int bridge_ioctl_sma(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
745 static int bridge_ioctl_sifprio(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
746 static int bridge_ioctl_sifcost(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
747 static int bridge_ioctl_sifmaxaddr(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
748 static int bridge_ioctl_addspan(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
749 static int bridge_ioctl_delspan(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
750 static int bridge_ioctl_gbparam32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
751 static int bridge_ioctl_gbparam64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
752 static int bridge_ioctl_grte(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
753 static int bridge_ioctl_gifsstp32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
754 static int bridge_ioctl_gifsstp64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
755 static int bridge_ioctl_sproto(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
756 static int bridge_ioctl_stxhc(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
757 static int bridge_ioctl_purge(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len);
758 static int bridge_ioctl_gfilt(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
759 static int bridge_ioctl_sfilt(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
760 static int bridge_ioctl_ghostfilter(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
761 static int bridge_ioctl_shostfilter(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
762 static int bridge_ioctl_gmnelist32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
763 static int bridge_ioctl_gmnelist64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
764 static int bridge_ioctl_gifstats32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
765 static int bridge_ioctl_gifstats64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
766
767 static int bridge_pf(struct mbuf **, struct ifnet *,
768 uint32_t sc_filter_flags, bool input);
769 static int bridge_ip_checkbasic(struct mbuf **);
770 static int bridge_ip6_checkbasic(struct mbuf **);
771
772 static void bridge_detach(ifnet_t);
773 static void bridge_link_event(struct ifnet *, u_int32_t);
774 static void bridge_iflinkevent(struct ifnet *);
775 static u_int32_t bridge_updatelinkstatus(struct bridge_softc *);
776 static int interface_media_active(struct ifnet *);
777 static void bridge_schedule_delayed_call(struct bridge_delayed_call *);
778 static void bridge_cancel_delayed_call(struct bridge_delayed_call *);
779 static void bridge_cleanup_delayed_call(struct bridge_delayed_call *);
780
781 static errno_t bridge_mac_nat_enable(struct bridge_softc *,
782 struct bridge_iflist *);
783 static void bridge_mac_nat_disable(struct bridge_softc *sc);
784 static void bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long);
785 static void bridge_mac_nat_populate_entries(struct bridge_softc *sc);
786 static void bridge_mac_nat_flush_entries(struct bridge_softc *sc,
787 struct bridge_iflist *);
788 static mbuf_t bridge_mac_nat_input(struct bridge_softc *, ifnet_t, mbuf_t,
789 ifnet_t * dst_if);
790 static boolean_t bridge_mac_nat_output(struct bridge_softc *,
791 struct bridge_iflist *, mbuf_t *, struct mac_nat_record *);
792 static void bridge_mac_nat_translate(mbuf_t *, struct mac_nat_record *,
793 const char[ETHER_ADDR_LEN]);
794
795 static mblist bridge_mac_nat_input_list(struct bridge_softc *sc,
796 ifnet_t external_ifp, mbuf_t m, mbuf_t * forward_head);
797 static mbuf_t bridge_mac_nat_translate_list(struct bridge_softc * sc,
798 struct bridge_iflist *sbif, ifnet_t dst_if, mbuf_t m);
799 static mbuf_t bridge_mac_nat_copy_and_translate_list(struct bridge_softc * sc,
800 struct bridge_iflist *sbif, ifnet_t dst_if, mbuf_t m);
801
802 static mbuf_t bridge_pf_list_out(mbuf_t m, ifnet_t ifp,
803 uint32_t sc_filter_flags);
804
805 static inline ifnet_t
bridge_rtlookup(struct bridge_softc * sc,const uint8_t addr[ETHER_ADDR_LEN],uint16_t vlan)806 bridge_rtlookup(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN],
807 uint16_t vlan)
808 {
809 struct bridge_iflist * bif;
810 ifnet_t ifp = NULL;
811
812 bif = bridge_rtlookup_bif(sc, addr, vlan);
813 if (bif != NULL) {
814 ifp = bif->bif_ifp;
815 }
816 return ifp;
817 }
818
819 static bool in_addr_is_ours(const struct in_addr);
820 static bool in6_addr_is_ours(const struct in6_addr *, uint32_t);
821
822 #define m_copypacket(m, how) m_copym(m, 0, M_COPYALL, how)
823
824 static mblist
825 gso_tcp(ifnet_t ifp, mbuf_t m, u_int mac_hlen, bool is_ipv4, bool is_tx);
826
827 static mblist
828 gso_tcp_with_info(ifnet_t ifp, mbuf_t m, ip_packet_info_t info_p,
829 u_int mac_hlen, bool is_ipv4, bool is_tx);
830
831 static inline mblist
gso_tcp_transmit(ifnet_t ifp,mbuf_t m,u_int mac_hlen,bool is_ipv4)832 gso_tcp_transmit(ifnet_t ifp, mbuf_t m, u_int mac_hlen, bool is_ipv4)
833 {
834 return gso_tcp(ifp, m, mac_hlen, is_ipv4, true);
835 }
836
837 /* The default bridge vlan is 1 (IEEE 802.1Q-2003 Table 9-2) */
838 #define VLANTAGOF(_m) 0
839
840 #define BSTP_ETHERADDR_RANGE_FIRST 0x00
841 #define BSTP_ETHERADDR_RANGE_LAST 0x0f
842
843 u_int8_t bstp_etheraddr[ETHER_ADDR_LEN] =
844 { 0x01, 0x80, 0xc2, 0x00, 0x00, BSTP_ETHERADDR_RANGE_FIRST };
845
846
847 static u_int8_t ethernulladdr[ETHER_ADDR_LEN] =
848 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
849
850 #if BRIDGESTP
851 static struct bstp_cb_ops bridge_ops = {
852 .bcb_state = bridge_state_change,
853 .bcb_rtage = bridge_rtable_expire
854 };
855 #endif /* BRIDGESTP */
856
857 SYSCTL_DECL(_net_link);
858 SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
859 "Bridge");
860
861 static int bridge_inherit_mac = 0; /* share MAC with first bridge member */
862 SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac,
863 CTLFLAG_RW | CTLFLAG_LOCKED,
864 &bridge_inherit_mac, 0,
865 "Inherit MAC address from the first bridge member");
866
867 SYSCTL_INT(_net_link_bridge, OID_AUTO, rtable_prune_period,
868 CTLFLAG_RW | CTLFLAG_LOCKED,
869 &bridge_rtable_prune_period, 0,
870 "Interval between pruning of routing table");
871
872 static unsigned int bridge_rtable_hash_size_max = BRIDGE_RTHASH_SIZE_MAX;
873 SYSCTL_UINT(_net_link_bridge, OID_AUTO, rtable_hash_size_max,
874 CTLFLAG_RW | CTLFLAG_LOCKED,
875 &bridge_rtable_hash_size_max, 0,
876 "Maximum size of the routing hash table");
877
878 #if BRIDGE_DELAYED_CALLBACK_DEBUG
879 static int bridge_delayed_callback_delay = 0;
880 SYSCTL_INT(_net_link_bridge, OID_AUTO, delayed_callback_delay,
881 CTLFLAG_RW | CTLFLAG_LOCKED,
882 &bridge_delayed_callback_delay, 0,
883 "Delay before calling delayed function");
884 #endif
885
886 SYSCTL_STRUCT(_net_link_bridge, OID_AUTO,
887 hostfilterstats, CTLFLAG_RD | CTLFLAG_LOCKED,
888 &bridge_hostfilter_stats, bridge_hostfilter_stats, "");
889
890 #if BRIDGESTP
891 static int log_stp = 0; /* log STP state changes */
892 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp, CTLFLAG_RW,
893 &log_stp, 0, "Log STP state changes");
894 #endif /* BRIDGESTP */
895
896 struct bridge_control {
897 int (*bc_func)(struct bridge_softc *, void *__sized_by(arg_len) args, size_t arg_len);
898 unsigned int bc_argsize;
899 unsigned int bc_flags;
900 };
901
902 #define BC_F_COPYIN 0x01 /* copy arguments in */
903 #define BC_F_COPYOUT 0x02 /* copy arguments out */
904 #define BC_F_SUSER 0x04 /* do super-user check */
905
906 static const struct bridge_control bridge_control_table32[] = {
907 { .bc_func = bridge_ioctl_add, .bc_argsize = sizeof(struct ifbreq), /* 0 */
908 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
909 { .bc_func = bridge_ioctl_del, .bc_argsize = sizeof(struct ifbreq),
910 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
911
912 { .bc_func = bridge_ioctl_gifflags, .bc_argsize = sizeof(struct ifbreq),
913 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
914 { .bc_func = bridge_ioctl_sifflags, .bc_argsize = sizeof(struct ifbreq),
915 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
916
917 { .bc_func = bridge_ioctl_scache, .bc_argsize = sizeof(struct ifbrparam),
918 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
919 { .bc_func = bridge_ioctl_gcache, .bc_argsize = sizeof(struct ifbrparam),
920 .bc_flags = BC_F_COPYOUT },
921
922 { .bc_func = bridge_ioctl_gifs32, .bc_argsize = sizeof(struct ifbifconf32),
923 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
924 { .bc_func = bridge_ioctl_rts32, .bc_argsize = sizeof(struct ifbaconf32),
925 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
926
927 { .bc_func = bridge_ioctl_saddr32, .bc_argsize = sizeof(struct ifbareq32),
928 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
929
930 { .bc_func = bridge_ioctl_sto, .bc_argsize = sizeof(struct ifbrparam),
931 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
932 { .bc_func = bridge_ioctl_gto, .bc_argsize = sizeof(struct ifbrparam), /* 10 */
933 .bc_flags = BC_F_COPYOUT },
934
935 { .bc_func = bridge_ioctl_daddr32, .bc_argsize = sizeof(struct ifbareq32),
936 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
937
938 { .bc_func = bridge_ioctl_flush, .bc_argsize = sizeof(struct ifbreq),
939 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
940
941 { .bc_func = bridge_ioctl_gpri, .bc_argsize = sizeof(struct ifbrparam),
942 .bc_flags = BC_F_COPYOUT },
943 { .bc_func = bridge_ioctl_spri, .bc_argsize = sizeof(struct ifbrparam),
944 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
945
946 { .bc_func = bridge_ioctl_ght, .bc_argsize = sizeof(struct ifbrparam),
947 .bc_flags = BC_F_COPYOUT },
948 { .bc_func = bridge_ioctl_sht, .bc_argsize = sizeof(struct ifbrparam),
949 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
950
951 { .bc_func = bridge_ioctl_gfd, .bc_argsize = sizeof(struct ifbrparam),
952 .bc_flags = BC_F_COPYOUT },
953 { .bc_func = bridge_ioctl_sfd, .bc_argsize = sizeof(struct ifbrparam),
954 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
955
956 { .bc_func = bridge_ioctl_gma, .bc_argsize = sizeof(struct ifbrparam),
957 .bc_flags = BC_F_COPYOUT },
958 { .bc_func = bridge_ioctl_sma, .bc_argsize = sizeof(struct ifbrparam), /* 20 */
959 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
960
961 { .bc_func = bridge_ioctl_sifprio, .bc_argsize = sizeof(struct ifbreq),
962 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
963
964 { .bc_func = bridge_ioctl_sifcost, .bc_argsize = sizeof(struct ifbreq),
965 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
966
967 { .bc_func = bridge_ioctl_gfilt, .bc_argsize = sizeof(struct ifbrparam),
968 .bc_flags = BC_F_COPYOUT },
969 { .bc_func = bridge_ioctl_sfilt, .bc_argsize = sizeof(struct ifbrparam),
970 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
971
972 { .bc_func = bridge_ioctl_purge, .bc_argsize = sizeof(struct ifbreq),
973 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
974
975 { .bc_func = bridge_ioctl_addspan, .bc_argsize = sizeof(struct ifbreq),
976 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
977 { .bc_func = bridge_ioctl_delspan, .bc_argsize = sizeof(struct ifbreq),
978 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
979
980 { .bc_func = bridge_ioctl_gbparam32, .bc_argsize = sizeof(struct ifbropreq32),
981 .bc_flags = BC_F_COPYOUT },
982
983 { .bc_func = bridge_ioctl_grte, .bc_argsize = sizeof(struct ifbrparam),
984 .bc_flags = BC_F_COPYOUT },
985
986 { .bc_func = bridge_ioctl_gifsstp32, .bc_argsize = sizeof(struct ifbpstpconf32), /* 30 */
987 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
988
989 { .bc_func = bridge_ioctl_sproto, .bc_argsize = sizeof(struct ifbrparam),
990 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
991
992 { .bc_func = bridge_ioctl_stxhc, .bc_argsize = sizeof(struct ifbrparam),
993 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
994
995 { .bc_func = bridge_ioctl_sifmaxaddr, .bc_argsize = sizeof(struct ifbreq),
996 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
997
998 { .bc_func = bridge_ioctl_ghostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
999 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1000 { .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1001 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1002
1003 { .bc_func = bridge_ioctl_gmnelist32,
1004 .bc_argsize = sizeof(struct ifbrmnelist32),
1005 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1006 { .bc_func = bridge_ioctl_gifstats32,
1007 .bc_argsize = sizeof(struct ifbrmreq32),
1008 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1009 };
1010
1011 static const struct bridge_control bridge_control_table64[] = {
1012 { .bc_func = bridge_ioctl_add, .bc_argsize = sizeof(struct ifbreq), /* 0 */
1013 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1014 { .bc_func = bridge_ioctl_del, .bc_argsize = sizeof(struct ifbreq),
1015 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1016
1017 { .bc_func = bridge_ioctl_gifflags, .bc_argsize = sizeof(struct ifbreq),
1018 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1019 { .bc_func = bridge_ioctl_sifflags, .bc_argsize = sizeof(struct ifbreq),
1020 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1021
1022 { .bc_func = bridge_ioctl_scache, .bc_argsize = sizeof(struct ifbrparam),
1023 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1024 { .bc_func = bridge_ioctl_gcache, .bc_argsize = sizeof(struct ifbrparam),
1025 .bc_flags = BC_F_COPYOUT },
1026
1027 { .bc_func = bridge_ioctl_gifs64, .bc_argsize = sizeof(struct ifbifconf64),
1028 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1029 { .bc_func = bridge_ioctl_rts64, .bc_argsize = sizeof(struct ifbaconf64),
1030 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1031
1032 { .bc_func = bridge_ioctl_saddr64, .bc_argsize = sizeof(struct ifbareq64),
1033 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1034
1035 { .bc_func = bridge_ioctl_sto, .bc_argsize = sizeof(struct ifbrparam),
1036 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1037 { .bc_func = bridge_ioctl_gto, .bc_argsize = sizeof(struct ifbrparam), /* 10 */
1038 .bc_flags = BC_F_COPYOUT },
1039
1040 { .bc_func = bridge_ioctl_daddr64, .bc_argsize = sizeof(struct ifbareq64),
1041 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1042
1043 { .bc_func = bridge_ioctl_flush, .bc_argsize = sizeof(struct ifbreq),
1044 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1045
1046 { .bc_func = bridge_ioctl_gpri, .bc_argsize = sizeof(struct ifbrparam),
1047 .bc_flags = BC_F_COPYOUT },
1048 { .bc_func = bridge_ioctl_spri, .bc_argsize = sizeof(struct ifbrparam),
1049 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1050
1051 { .bc_func = bridge_ioctl_ght, .bc_argsize = sizeof(struct ifbrparam),
1052 .bc_flags = BC_F_COPYOUT },
1053 { .bc_func = bridge_ioctl_sht, .bc_argsize = sizeof(struct ifbrparam),
1054 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1055
1056 { .bc_func = bridge_ioctl_gfd, .bc_argsize = sizeof(struct ifbrparam),
1057 .bc_flags = BC_F_COPYOUT },
1058 { .bc_func = bridge_ioctl_sfd, .bc_argsize = sizeof(struct ifbrparam),
1059 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1060
1061 { .bc_func = bridge_ioctl_gma, .bc_argsize = sizeof(struct ifbrparam),
1062 .bc_flags = BC_F_COPYOUT },
1063 { .bc_func = bridge_ioctl_sma, .bc_argsize = sizeof(struct ifbrparam), /* 20 */
1064 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1065
1066 { .bc_func = bridge_ioctl_sifprio, .bc_argsize = sizeof(struct ifbreq),
1067 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1068
1069 { .bc_func = bridge_ioctl_sifcost, .bc_argsize = sizeof(struct ifbreq),
1070 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1071
1072 { .bc_func = bridge_ioctl_gfilt, .bc_argsize = sizeof(struct ifbrparam),
1073 .bc_flags = BC_F_COPYOUT },
1074 { .bc_func = bridge_ioctl_sfilt, .bc_argsize = sizeof(struct ifbrparam),
1075 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1076
1077 { .bc_func = bridge_ioctl_purge, .bc_argsize = sizeof(struct ifbreq),
1078 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1079
1080 { .bc_func = bridge_ioctl_addspan, .bc_argsize = sizeof(struct ifbreq),
1081 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1082 { .bc_func = bridge_ioctl_delspan, .bc_argsize = sizeof(struct ifbreq),
1083 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1084
1085 { .bc_func = bridge_ioctl_gbparam64, .bc_argsize = sizeof(struct ifbropreq64),
1086 .bc_flags = BC_F_COPYOUT },
1087
1088 { .bc_func = bridge_ioctl_grte, .bc_argsize = sizeof(struct ifbrparam),
1089 .bc_flags = BC_F_COPYOUT },
1090
1091 { .bc_func = bridge_ioctl_gifsstp64, .bc_argsize = sizeof(struct ifbpstpconf64), /* 30 */
1092 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1093
1094 { .bc_func = bridge_ioctl_sproto, .bc_argsize = sizeof(struct ifbrparam),
1095 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1096
1097 { .bc_func = bridge_ioctl_stxhc, .bc_argsize = sizeof(struct ifbrparam),
1098 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1099
1100 { .bc_func = bridge_ioctl_sifmaxaddr, .bc_argsize = sizeof(struct ifbreq),
1101 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1102
1103 { .bc_func = bridge_ioctl_ghostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1104 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1105 { .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1106 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1107
1108 { .bc_func = bridge_ioctl_gmnelist64,
1109 .bc_argsize = sizeof(struct ifbrmnelist64),
1110 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1111 { .bc_func = bridge_ioctl_gifstats64,
1112 .bc_argsize = sizeof(struct ifbrmreq64),
1113 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1114 };
1115
1116 static const unsigned int bridge_control_table_size =
1117 sizeof(bridge_control_table32) / sizeof(bridge_control_table32[0]);
1118
1119 static LIST_HEAD(, bridge_softc) bridge_list =
1120 LIST_HEAD_INITIALIZER(bridge_list);
1121
1122 #define BRIDGENAME "bridge"
1123 #define BRIDGES_MAX IF_MAXUNIT
1124 #define BRIDGE_ZONE_MAX_ELEM MIN(IFNETS_MAX, BRIDGES_MAX)
1125
1126 static struct if_clone bridge_cloner =
1127 IF_CLONE_INITIALIZER(BRIDGENAME, bridge_clone_create, bridge_clone_destroy,
1128 0, BRIDGES_MAX);
1129
1130 static int if_bridge_txstart = 0;
1131 SYSCTL_INT(_net_link_bridge, OID_AUTO, txstart, CTLFLAG_RW | CTLFLAG_LOCKED,
1132 &if_bridge_txstart, 0, "Bridge interface uses TXSTART model");
1133
1134 SYSCTL_INT(_net_link_bridge, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
1135 &if_bridge_debug, 0, "Bridge debug flags");
1136
1137 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_level,
1138 CTLFLAG_RW | CTLFLAG_LOCKED,
1139 &if_bridge_log_level, 0, "Bridge log level");
1140
1141 static int if_bridge_output_skip_filters = 1;
1142 SYSCTL_INT(_net_link_bridge, OID_AUTO, output_skip_filters,
1143 CTLFLAG_RW | CTLFLAG_LOCKED,
1144 &if_bridge_output_skip_filters, 0, "Bridge skip output filters");
1145
1146 int bridge_enable_early_input = 1; /* DLIL early input */
1147 SYSCTL_INT(_net_link_bridge, OID_AUTO, enable_early_input,
1148 CTLFLAG_RW | CTLFLAG_LOCKED,
1149 &bridge_enable_early_input, 0,
1150 "Bridge enable early input");
1151
1152 int bridge_allow_lro_num_seg = 1; /* allow LRO_NUM_SEG to keep LRO enabled */
1153 SYSCTL_INT(_net_link_bridge, OID_AUTO, allow_lro_num_seg,
1154 CTLFLAG_RW | CTLFLAG_LOCKED,
1155 &bridge_allow_lro_num_seg, 0,
1156 "Bridge allow LRO_NUM_SEG to keep LRO enabled");
1157
1158 #define BRIDGE_TSO_REDUCE_MSS_FORWARDING_MAX 256
1159 #define BRIDGE_TSO_REDUCE_MSS_FORWARDING_DEFAULT 110
1160 #define BRIDGE_TSO_REDUCE_MSS_TX_MAX 256
1161 #define BRIDGE_TSO_REDUCE_MSS_TX_DEFAULT 0
1162
1163 static u_int if_bridge_tso_reduce_mss_forwarding
1164 = BRIDGE_TSO_REDUCE_MSS_FORWARDING_DEFAULT;
1165 static u_int if_bridge_tso_reduce_mss_tx
1166 = BRIDGE_TSO_REDUCE_MSS_TX_DEFAULT;
1167
1168 static int
bridge_tso_reduce_mss(struct sysctl_req * req,u_int * val,u_int val_max)1169 bridge_tso_reduce_mss(struct sysctl_req *req, u_int * val, u_int val_max)
1170 {
1171 int changed;
1172 int error;
1173 u_int new_value;
1174
1175 error = sysctl_io_number(req, *val, sizeof(*val), &new_value,
1176 &changed);
1177 if (error == 0 && changed != 0) {
1178 if (new_value > val_max) {
1179 return EINVAL;
1180 }
1181 *val = new_value;
1182 }
1183 return error;
1184 }
1185
1186 static int
1187 bridge_tso_reduce_mss_forwarding_sysctl SYSCTL_HANDLER_ARGS
1188 {
1189 return bridge_tso_reduce_mss(req, &if_bridge_tso_reduce_mss_forwarding,
1190 BRIDGE_TSO_REDUCE_MSS_FORWARDING_MAX);
1191 }
1192
1193 SYSCTL_PROC(_net_link_bridge, OID_AUTO, tso_reduce_mss_forwarding,
1194 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1195 0, 0, bridge_tso_reduce_mss_forwarding_sysctl, "IU",
1196 "Bridge tso reduce mss when forwarding");
1197
1198 static int
1199 bridge_tso_reduce_mss_tx_sysctl SYSCTL_HANDLER_ARGS
1200 {
1201 return bridge_tso_reduce_mss(req, &if_bridge_tso_reduce_mss_tx,
1202 BRIDGE_TSO_REDUCE_MSS_TX_MAX);
1203 }
1204
1205 SYSCTL_PROC(_net_link_bridge, OID_AUTO, tso_reduce_mss_tx,
1206 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1207 0, 0, bridge_tso_reduce_mss_tx_sysctl, "IU",
1208 "Bridge tso reduce mss on transmit");
1209
1210 #if DEBUG || DEVELOPMENT
1211 /*
1212 * net.link.bridge.reduce_tso_mtu
1213 * - when non-zero, the bridge overrides the interface TSO MTU to a lower
1214 * value (i.e. 16K) to enable testing the "use GSO instead" path
1215 */
1216 static int if_bridge_reduce_tso_mtu = 0;
1217 SYSCTL_INT(_net_link_bridge, OID_AUTO, reduce_tso_mtu,
1218 CTLFLAG_RW | CTLFLAG_LOCKED,
1219 &if_bridge_reduce_tso_mtu, 0, "Bridge interface reduce TSO MTU");
1220
1221 #endif /* DEBUG || DEVELOPMENT */
1222
1223 static void brlog_ether_header(struct ether_header *);
1224 static void brlog_mbuf_data(mbuf_t, size_t, size_t);
1225 static void brlog_mbuf_pkthdr(mbuf_t, const char *, const char *);
1226 static void brlog_mbuf(mbuf_t, const char *, const char *);
1227 static void brlog_link(struct bridge_softc * sc);
1228
1229 #if BRIDGE_LOCK_DEBUG
1230 static void bridge_lock(struct bridge_softc *);
1231 static void bridge_unlock(struct bridge_softc *);
1232 static int bridge_lock2ref(struct bridge_softc *);
1233 static void bridge_unref(struct bridge_softc *);
1234 static void bridge_xlock(struct bridge_softc *);
1235 static void bridge_xdrop(struct bridge_softc *);
1236
1237 #define DECL_RETURN_ADDR(v) void * __single v = __unsafe_forge_single(void *, __builtin_return_address(0))
1238
1239 static void
bridge_lock(struct bridge_softc * sc)1240 bridge_lock(struct bridge_softc *sc)
1241 {
1242 DECL_RETURN_ADDR(lr_saved);
1243
1244 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1245
1246 _BRIDGE_LOCK(sc);
1247
1248 sc->lock_lr[sc->next_lock_lr] = lr_saved;
1249 sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1250 }
1251
1252 static void
bridge_unlock(struct bridge_softc * sc)1253 bridge_unlock(struct bridge_softc *sc)
1254 {
1255 DECL_RETURN_ADDR(lr_saved);
1256
1257 BRIDGE_LOCK_ASSERT_HELD(sc);
1258
1259 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1260 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1261
1262 _BRIDGE_UNLOCK(sc);
1263 }
1264
1265 static int
bridge_lock2ref(struct bridge_softc * sc)1266 bridge_lock2ref(struct bridge_softc *sc)
1267 {
1268 int error = 0;
1269 DECL_RETURN_ADDR(lr_saved);
1270
1271 BRIDGE_LOCK_ASSERT_HELD(sc);
1272
1273 if (sc->sc_iflist_xcnt > 0) {
1274 error = EBUSY;
1275 } else {
1276 sc->sc_iflist_ref++;
1277 }
1278
1279 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1280 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1281
1282 _BRIDGE_UNLOCK(sc);
1283
1284 return error;
1285 }
1286
1287 static void
bridge_unref(struct bridge_softc * sc)1288 bridge_unref(struct bridge_softc *sc)
1289 {
1290 DECL_RETURN_ADDR(lr_saved);
1291
1292 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1293
1294 _BRIDGE_LOCK(sc);
1295 sc->lock_lr[sc->next_lock_lr] = lr_saved;
1296 sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1297
1298 sc->sc_iflist_ref--;
1299
1300 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1301 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1302 if ((sc->sc_iflist_xcnt > 0) && (sc->sc_iflist_ref == 0)) {
1303 _BRIDGE_UNLOCK(sc);
1304 wakeup(&sc->sc_cv);
1305 } else {
1306 _BRIDGE_UNLOCK(sc);
1307 }
1308 }
1309
1310 static void
bridge_xlock(struct bridge_softc * sc)1311 bridge_xlock(struct bridge_softc *sc)
1312 {
1313 DECL_RETURN_ADDR(lr_saved);
1314
1315 BRIDGE_LOCK_ASSERT_HELD(sc);
1316
1317 sc->sc_iflist_xcnt++;
1318 while (sc->sc_iflist_ref > 0) {
1319 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1320 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1321
1322 msleep(&sc->sc_cv, &sc->sc_mtx, PZERO, "BRIDGE_XLOCK", NULL);
1323
1324 sc->lock_lr[sc->next_lock_lr] = lr_saved;
1325 sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1326 }
1327 }
1328
1329 #undef DECL_RETURN_ADDR
1330
1331 static void
bridge_xdrop(struct bridge_softc * sc)1332 bridge_xdrop(struct bridge_softc *sc)
1333 {
1334 BRIDGE_LOCK_ASSERT_HELD(sc);
1335
1336 sc->sc_iflist_xcnt--;
1337 }
1338
1339 #endif /* BRIDGE_LOCK_DEBUG */
1340
1341 static void
brlog_mbuf_pkthdr(mbuf_t m,const char * prefix,const char * suffix)1342 brlog_mbuf_pkthdr(mbuf_t m, const char *prefix, const char *suffix)
1343 {
1344 if (m) {
1345 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1346 "%spktlen: %u rcvif: 0x%llx header: 0x%llx nextpkt: 0x%llx%s",
1347 prefix ? prefix : "", (unsigned int)mbuf_pkthdr_len(m),
1348 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_rcvif(m)),
1349 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_header(m)),
1350 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_nextpkt(m)),
1351 suffix ? suffix : "");
1352 } else {
1353 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0, "%s<NULL>%s", prefix, suffix);
1354 }
1355 }
1356
1357 static void
brlog_mbuf(mbuf_t m,const char * prefix,const char * suffix)1358 brlog_mbuf(mbuf_t m, const char *prefix, const char *suffix)
1359 {
1360 if (m) {
1361 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1362 "%s0x%llx type: %u flags: 0x%x len: %u data: 0x%llx "
1363 "maxlen: %u datastart: 0x%llx next: 0x%llx%s",
1364 prefix ? prefix : "", (uint64_t)VM_KERNEL_ADDRPERM(m),
1365 mbuf_type(m), mbuf_flags(m), (unsigned int)mbuf_len(m),
1366 (uint64_t)VM_KERNEL_ADDRPERM(mtod(m, void *)),
1367 (unsigned int)mbuf_maxlen(m),
1368 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
1369 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_next(m)),
1370 !suffix || (mbuf_flags(m) & MBUF_PKTHDR) ? "" : suffix);
1371 if ((mbuf_flags(m) & MBUF_PKTHDR)) {
1372 brlog_mbuf_pkthdr(m, "", suffix);
1373 }
1374 } else {
1375 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0, "%s<NULL>%s", prefix, suffix);
1376 }
1377 }
1378
1379 static void
brlog_mbuf_data(mbuf_t m,size_t offset,size_t len)1380 brlog_mbuf_data(mbuf_t m, size_t offset, size_t len)
1381 {
1382 mbuf_t n;
1383 size_t i, j;
1384 size_t pktlen, mlen, maxlen;
1385 unsigned char *ptr;
1386
1387 pktlen = mbuf_pkthdr_len(m);
1388
1389 if (offset > pktlen) {
1390 return;
1391 }
1392
1393 maxlen = (pktlen - offset > len) ? len : pktlen - offset;
1394 n = m;
1395 mlen = mbuf_len(n);
1396 ptr = mtod(n, unsigned char *);
1397 for (i = 0, j = 0; i < maxlen; i++, j++) {
1398 if (j >= mlen) {
1399 n = mbuf_next(n);
1400 if (n == 0) {
1401 break;
1402 }
1403 ptr = mtod(n, unsigned char *);
1404 mlen = mbuf_len(n);
1405 j = 0;
1406 }
1407 if (i >= offset) {
1408 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1409 "%02x%s", ptr[j], i % 2 ? " " : "");
1410 }
1411 }
1412 }
1413
1414 static void
brlog_ether_header(struct ether_header * eh)1415 brlog_ether_header(struct ether_header *eh)
1416 {
1417 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1418 "%02x:%02x:%02x:%02x:%02x:%02x > "
1419 "%02x:%02x:%02x:%02x:%02x:%02x 0x%04x ",
1420 eh->ether_shost[0], eh->ether_shost[1], eh->ether_shost[2],
1421 eh->ether_shost[3], eh->ether_shost[4], eh->ether_shost[5],
1422 eh->ether_dhost[0], eh->ether_dhost[1], eh->ether_dhost[2],
1423 eh->ether_dhost[3], eh->ether_dhost[4], eh->ether_dhost[5],
1424 ntohs(eh->ether_type));
1425 }
1426
1427 static char *
ether_ntop(char * __sized_by (len)buf,size_t len,const u_char ap[ETHER_ADDR_LEN])1428 ether_ntop(char * __sized_by(len) buf, size_t len, const u_char ap[ETHER_ADDR_LEN])
1429 {
1430 snprintf(buf, len, "%02x:%02x:%02x:%02x:%02x:%02x",
1431 ap[0], ap[1], ap[2], ap[3], ap[4], ap[5]);
1432
1433 return buf;
1434 }
1435
1436 static void
brlog_link(struct bridge_softc * sc)1437 brlog_link(struct bridge_softc * sc)
1438 {
1439 int i;
1440 uint32_t sdl_buffer[(offsetof(struct sockaddr_dl, sdl_data) +
1441 IFNAMSIZ + ETHER_ADDR_LEN)];
1442 struct sockaddr_dl *sdl = SDL((uint8_t*)&sdl_buffer); /* SDL requires byte pointer */
1443 const u_char * lladdr;
1444 char lladdr_str[48];
1445
1446 memset(sdl_buffer, 0, sizeof(sdl_buffer));
1447 sdl->sdl_family = AF_LINK;
1448 sdl->sdl_nlen = strbuflen(sc->sc_if_xname);
1449 sdl->sdl_alen = ETHER_ADDR_LEN;
1450 sdl->sdl_len = offsetof(struct sockaddr_dl, sdl_data);
1451 memcpy(sdl->sdl_data, sc->sc_if_xname, sdl->sdl_nlen);
1452 memcpy(LLADDR(sdl), sc->sc_defaddr, ETHER_ADDR_LEN);
1453 lladdr_str[0] = '\0';
1454 for (i = 0, lladdr = CONST_LLADDR(sdl);
1455 i < sdl->sdl_alen;
1456 i++, lladdr++) {
1457 char byte_str[4];
1458
1459 snprintf(byte_str, sizeof(byte_str), "%s%x", i ? ":" : "",
1460 *lladdr);
1461 strbufcat(lladdr_str, byte_str);
1462 }
1463 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1464 "%s sdl len %d index %d family %d type 0x%x nlen %d alen %d"
1465 " slen %d addr %s", sc->sc_if_xname,
1466 sdl->sdl_len, sdl->sdl_index,
1467 sdl->sdl_family, sdl->sdl_type, sdl->sdl_nlen,
1468 sdl->sdl_alen, sdl->sdl_slen, lladdr_str);
1469 }
1470
1471 static int
_mbuf_get_tso_mss(mbuf_t m)1472 _mbuf_get_tso_mss(mbuf_t m)
1473 {
1474 int mss = 0;
1475
1476 if ((m->m_pkthdr.csum_flags & _TSO_CSUM) != 0) {
1477 mss = m->m_pkthdr.tso_segsz;
1478 }
1479 return mss;
1480 }
1481
1482 /*
1483 * bridgeattach:
1484 *
1485 * Pseudo-device attach routine.
1486 */
1487 __private_extern__ int
bridgeattach(int n)1488 bridgeattach(int n)
1489 {
1490 #pragma unused(n)
1491 int error;
1492
1493 LIST_INIT(&bridge_list);
1494
1495 #if BRIDGESTP
1496 bstp_sys_init();
1497 #endif /* BRIDGESTP */
1498
1499 error = if_clone_attach(&bridge_cloner);
1500 if (error != 0) {
1501 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_clone_attach failed %d", error);
1502 }
1503 return error;
1504 }
1505
1506 static void
_mbuf_adjust_pkthdr_and_data(mbuf_t m,int len)1507 _mbuf_adjust_pkthdr_and_data(mbuf_t m, int len)
1508 {
1509 mbuf_setdata(m, mtodo(m, len), mbuf_len(m) - len);
1510 mbuf_pkthdr_adjustlen(m, -len);
1511 }
1512
1513 static errno_t
bridge_ifnet_set_attrs(struct ifnet * ifp)1514 bridge_ifnet_set_attrs(struct ifnet * ifp)
1515 {
1516 errno_t error;
1517
1518 error = ifnet_set_mtu(ifp, ETHERMTU);
1519 if (error != 0) {
1520 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_mtu failed %d", error);
1521 goto done;
1522 }
1523 error = ifnet_set_addrlen(ifp, ETHER_ADDR_LEN);
1524 if (error != 0) {
1525 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_addrlen failed %d", error);
1526 goto done;
1527 }
1528 error = ifnet_set_hdrlen(ifp, ETHER_HDR_LEN);
1529 if (error != 0) {
1530 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_hdrlen failed %d", error);
1531 goto done;
1532 }
1533 error = ifnet_set_flags(ifp,
1534 IFF_BROADCAST | IFF_SIMPLEX | IFF_NOTRAILERS | IFF_MULTICAST,
1535 0xffff);
1536
1537 if (error != 0) {
1538 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_flags failed %d", error);
1539 goto done;
1540 }
1541 done:
1542 return error;
1543 }
1544
1545 static void
bridge_interface_proto_attach_changed(ifnet_t ifp)1546 bridge_interface_proto_attach_changed(ifnet_t ifp)
1547 {
1548 uint32_t proto_count;
1549 struct bridge_softc * __single sc = ifp->if_softc;
1550
1551 proto_count = if_get_protolist(ifp, NULL, 0);
1552 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
1553 "%s: proto count %d", ifp->if_xname, proto_count);
1554
1555 if (sc == NULL) {
1556 return;
1557 }
1558 BRIDGE_LOCK(sc);
1559 if ((sc->sc_flags & SCF_DETACHING) != 0) {
1560 BRIDGE_UNLOCK(sc);
1561 return;
1562 }
1563 if (proto_count >= 2) {
1564 /* an upper layer protocol is attached */
1565 sc->sc_flags |= SCF_PROTO_ATTACHED;
1566 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
1567 "%s: setting SCF_PROTO_ATTACHED", ifp->if_xname);
1568 } else {
1569 /* an upper layer protocol was detached */
1570 sc->sc_flags &= ~SCF_PROTO_ATTACHED;
1571 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
1572 "%s: clearing SCF_PROTO_ATTACHED", ifp->if_xname);
1573 }
1574 BRIDGE_UNLOCK(sc);
1575 }
1576
1577 static void
bridge_interface_event(struct ifnet * ifp,__unused protocol_family_t protocol,const struct kev_msg * event)1578 bridge_interface_event(struct ifnet * ifp,
1579 __unused protocol_family_t protocol, const struct kev_msg * event)
1580 {
1581 int event_code;
1582
1583 if (event->vendor_code != KEV_VENDOR_APPLE
1584 || event->kev_class != KEV_NETWORK_CLASS
1585 || event->kev_subclass != KEV_DL_SUBCLASS) {
1586 return;
1587 }
1588 event_code = event->event_code;
1589 switch (event_code) {
1590 case KEV_DL_PROTO_DETACHED:
1591 case KEV_DL_PROTO_ATTACHED:
1592 bridge_interface_proto_attach_changed(ifp);
1593 break;
1594 default:
1595 break;
1596 }
1597 return;
1598 }
1599
1600 /*
1601 * Function: bridge_interface_attach_protocol
1602 * Purpose:
1603 * Attach a protocol to the bridge to get events on the interface,
1604 * in particular, whether protocols are attached/detached.
1605 */
1606 static int
bridge_interface_attach_protocol(ifnet_t ifp)1607 bridge_interface_attach_protocol(ifnet_t ifp)
1608 {
1609 int error;
1610 struct ifnet_attach_proto_param_v2 reg;
1611
1612 bzero(®, sizeof(reg));
1613 reg.event = bridge_interface_event;
1614
1615 error = ifnet_attach_protocol_v2(ifp, PF_BRIDGE, ®);
1616 if (error != 0) {
1617 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
1618 "%s: ifnet_attach_protocol failed, %d",
1619 ifp->if_xname, error);
1620 }
1621 return error;
1622 }
1623
1624 static void
bridge_interface_detach_protocol(ifnet_t ifp)1625 bridge_interface_detach_protocol(ifnet_t ifp)
1626 {
1627 (void)ifnet_detach_protocol(ifp, PF_BRIDGE);
1628 }
1629
1630 /*
1631 * bridge_clone_create:
1632 *
1633 * Create a new bridge instance.
1634 */
1635 static int
bridge_clone_create(struct if_clone * ifc,uint32_t unit,void * params)1636 bridge_clone_create(struct if_clone *ifc, uint32_t unit, void *params)
1637 {
1638 #pragma unused(params)
1639 ifnet_ref_t ifp = NULL;
1640 struct bridge_softc *sc = NULL;
1641 struct bridge_softc *sc2 = NULL;
1642 struct ifnet_init_eparams init_params;
1643 errno_t error = 0;
1644 uint8_t eth_hostid[ETHER_ADDR_LEN];
1645 int fb, retry, has_hostid;
1646
1647 sc = kalloc_type(struct bridge_softc, Z_WAITOK_ZERO_NOFAIL);
1648 lck_mtx_init(&sc->sc_mtx, &bridge_lock_grp, &bridge_lock_attr);
1649 sc->sc_brtmax = BRIDGE_RTABLE_MAX;
1650 sc->sc_mne_max = BRIDGE_MAC_NAT_ENTRY_MAX;
1651 sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
1652 sc->sc_filter_flags = 0;
1653
1654 TAILQ_INIT(&sc->sc_iflist);
1655
1656 /* use the interface name as the unique id for ifp recycle */
1657 snprintf(sc->sc_if_xname, sizeof(sc->sc_if_xname), "%s%d",
1658 ifc->ifc_name, unit);
1659 bzero(&init_params, sizeof(init_params));
1660 init_params.ver = IFNET_INIT_CURRENT_VERSION;
1661 init_params.len = sizeof(init_params);
1662 /* Initialize our routing table. */
1663 error = bridge_rtable_init(sc);
1664 if (error != 0) {
1665 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_rtable_init failed %d", error);
1666 goto done;
1667 }
1668 TAILQ_INIT(&sc->sc_spanlist);
1669 if (if_bridge_txstart) {
1670 init_params.start = bridge_start;
1671 } else {
1672 init_params.flags = IFNET_INIT_LEGACY;
1673 init_params.output = bridge_output;
1674 }
1675 init_params.uniqueid_len = strbuflen(sc->sc_if_xname);
1676 init_params.uniqueid = sc->sc_if_xname;
1677 init_params.sndq_maxlen = IFQ_MAXLEN;
1678 init_params.name = __unsafe_null_terminated_from_indexable(ifc->ifc_name);
1679 init_params.unit = unit;
1680 init_params.family = IFNET_FAMILY_ETHERNET;
1681 init_params.type = IFT_BRIDGE;
1682 init_params.demux = ether_demux;
1683 init_params.add_proto = ether_add_proto;
1684 init_params.del_proto = ether_del_proto;
1685 init_params.check_multi = ether_check_multi;
1686 init_params.framer_extended = ether_frameout_extended;
1687 init_params.softc = sc;
1688 init_params.ioctl = bridge_ioctl;
1689 init_params.detach = bridge_detach;
1690 init_params.broadcast_addr = etherbroadcastaddr;
1691 init_params.broadcast_len = ETHER_ADDR_LEN;
1692
1693 error = ifnet_allocate_extended(&init_params, &ifp);
1694 if (error != 0) {
1695 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_allocate failed %d", error);
1696 goto done;
1697 }
1698 LIST_INIT(&sc->sc_mne_list);
1699 LIST_INIT(&sc->sc_mne_list_v6);
1700 sc->sc_ifp = ifp;
1701 error = bridge_ifnet_set_attrs(ifp);
1702 if (error != 0) {
1703 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_ifnet_set_attrs failed %d",
1704 error);
1705 goto done;
1706 }
1707 /*
1708 * Generate an ethernet address with a locally administered address.
1709 *
1710 * Since we are using random ethernet addresses for the bridge, it is
1711 * possible that we might have address collisions, so make sure that
1712 * this hardware address isn't already in use on another bridge.
1713 * The first try uses the "hostid" and falls back to read_frandom();
1714 * for "hostid", we use the MAC address of the first-encountered
1715 * Ethernet-type interface that is currently configured.
1716 */
1717 fb = 0;
1718 has_hostid = (uuid_get_ethernet(ð_hostid[0]) == 0);
1719 for (retry = 1; retry != 0;) {
1720 if (fb || has_hostid == 0) {
1721 read_frandom(&sc->sc_defaddr, ETHER_ADDR_LEN);
1722 sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
1723 sc->sc_defaddr[0] |= 2; /* set the LAA bit */
1724 } else {
1725 bcopy(ð_hostid[0], &sc->sc_defaddr,
1726 ETHER_ADDR_LEN);
1727 sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
1728 sc->sc_defaddr[0] |= 2; /* set the LAA bit */
1729 sc->sc_defaddr[3] = /* stir it up a bit */
1730 ((sc->sc_defaddr[3] & 0x0f) << 4) |
1731 ((sc->sc_defaddr[3] & 0xf0) >> 4);
1732 /*
1733 * Mix in the LSB as it's actually pretty significant,
1734 * see rdar://14076061
1735 */
1736 sc->sc_defaddr[4] =
1737 (((sc->sc_defaddr[4] & 0x0f) << 4) |
1738 ((sc->sc_defaddr[4] & 0xf0) >> 4)) ^
1739 sc->sc_defaddr[5];
1740 sc->sc_defaddr[5] = ifp->if_unit & 0xff;
1741 }
1742
1743 fb = 1;
1744 retry = 0;
1745 lck_mtx_lock(&bridge_list_mtx);
1746 LIST_FOREACH(sc2, &bridge_list, sc_list) {
1747 if (_ether_cmp(sc->sc_defaddr,
1748 IF_LLADDR(sc2->sc_ifp)) == 0) {
1749 retry = 1;
1750 }
1751 }
1752 lck_mtx_unlock(&bridge_list_mtx);
1753 }
1754
1755 sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
1756
1757 if (BRIDGE_DBGF_ENABLED(BR_DBGF_LIFECYCLE)) {
1758 brlog_link(sc);
1759 }
1760 error = ifnet_attach(ifp, NULL);
1761 if (error != 0) {
1762 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_attach failed %d", error);
1763 goto done;
1764 }
1765 (void)bridge_interface_attach_protocol(ifp);
1766
1767 error = ifnet_set_lladdr_and_type(ifp, sc->sc_defaddr, ETHER_ADDR_LEN,
1768 IFT_ETHER);
1769 if (error != 0) {
1770 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr_and_type failed %d",
1771 error);
1772 goto done;
1773 }
1774
1775 ifnet_set_offload(ifp,
1776 IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP |
1777 IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6 | IFNET_MULTIPAGES);
1778 error = bridge_set_tso(sc);
1779 if (error != 0) {
1780 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_set_tso failed %d", error);
1781 goto done;
1782 }
1783 #if BRIDGESTP
1784 bstp_attach(&sc->sc_stp, &bridge_ops);
1785 #endif /* BRIDGESTP */
1786
1787 lck_mtx_lock(&bridge_list_mtx);
1788 LIST_INSERT_HEAD(&bridge_list, sc, sc_list);
1789 lck_mtx_unlock(&bridge_list_mtx);
1790
1791 /* attach as ethernet */
1792 error = bpf_attach(ifp, DLT_EN10MB, sizeof(struct ether_header),
1793 NULL, NULL);
1794
1795 done:
1796 if (error != 0) {
1797 if (ifp != NULL) {
1798 bridge_interface_detach_protocol(ifp);
1799 }
1800 BRIDGE_LOG(LOG_NOTICE, 0, "failed error %d", error);
1801 /* TBD: Clean up: sc, sc_rthash etc */
1802 }
1803
1804 return error;
1805 }
1806
1807 /*
1808 * bridge_clone_destroy:
1809 *
1810 * Destroy a bridge instance.
1811 */
1812 static int
bridge_clone_destroy(struct ifnet * ifp)1813 bridge_clone_destroy(struct ifnet *ifp)
1814 {
1815 struct bridge_softc * __single sc = ifp->if_softc;
1816 struct bridge_iflist *bif;
1817 errno_t error;
1818
1819 bridge_interface_detach_protocol(ifp);
1820
1821 BRIDGE_LOCK(sc);
1822 if ((sc->sc_flags & SCF_DETACHING)) {
1823 BRIDGE_UNLOCK(sc);
1824 return 0;
1825 }
1826 sc->sc_flags |= SCF_DETACHING;
1827
1828 bridge_ifstop(ifp, 1);
1829
1830 bridge_cancel_delayed_call(&sc->sc_resize_call);
1831
1832 bridge_cleanup_delayed_call(&sc->sc_resize_call);
1833 bridge_cleanup_delayed_call(&sc->sc_aging_timer);
1834
1835 error = ifnet_set_flags(ifp, 0, IFF_UP);
1836 if (error != 0) {
1837 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_flags failed %d", error);
1838 }
1839
1840 while ((bif = TAILQ_FIRST(&sc->sc_iflist)) != NULL) {
1841 bridge_delete_member(sc, bif);
1842 }
1843
1844 while ((bif = TAILQ_FIRST(&sc->sc_spanlist)) != NULL) {
1845 bridge_delete_span(sc, bif);
1846 }
1847 BRIDGE_UNLOCK(sc);
1848
1849 error = ifnet_detach(ifp);
1850 if (error != 0) {
1851 panic("%s (%d): ifnet_detach(%p) failed %d",
1852 __func__, __LINE__, ifp, error);
1853 }
1854 return 0;
1855 }
1856
1857 #define DRVSPEC do { \
1858 if (ifd->ifd_cmd >= bridge_control_table_size) { \
1859 error = EINVAL; \
1860 break; \
1861 } \
1862 bc = &bridge_control_table[ifd->ifd_cmd]; \
1863 \
1864 if (cmd == SIOCGDRVSPEC && \
1865 (bc->bc_flags & BC_F_COPYOUT) == 0) { \
1866 error = EINVAL; \
1867 break; \
1868 } else if (cmd == SIOCSDRVSPEC && \
1869 (bc->bc_flags & BC_F_COPYOUT) != 0) { \
1870 error = EINVAL; \
1871 break; \
1872 } \
1873 \
1874 if (bc->bc_flags & BC_F_SUSER) { \
1875 error = kauth_authorize_generic(kauth_cred_get(), \
1876 KAUTH_GENERIC_ISSUSER); \
1877 if (error) \
1878 break; \
1879 } \
1880 \
1881 if (ifd->ifd_len != bc->bc_argsize || \
1882 ifd->ifd_len > sizeof (args)) { \
1883 error = EINVAL; \
1884 break; \
1885 } \
1886 \
1887 bzero(&args, sizeof (args)); \
1888 if (bc->bc_flags & BC_F_COPYIN) { \
1889 error = copyin(ifd->ifd_data, &args, ifd->ifd_len); \
1890 if (error) \
1891 break; \
1892 } \
1893 \
1894 BRIDGE_LOCK(sc); \
1895 error = (*bc->bc_func)(sc, &args, sizeof(args)); \
1896 BRIDGE_UNLOCK(sc); \
1897 if (error) \
1898 break; \
1899 \
1900 if (bc->bc_flags & BC_F_COPYOUT) \
1901 error = copyout(&args, ifd->ifd_data, ifd->ifd_len); \
1902 } while (0)
1903
1904 static boolean_t
interface_needs_input_broadcast(struct ifnet * ifp)1905 interface_needs_input_broadcast(struct ifnet * ifp)
1906 {
1907 /*
1908 * Selectively enable input broadcast only when necessary.
1909 * The bridge interface itself attaches a fake protocol
1910 * so checking for at least two protocols means that the
1911 * interface is being used for something besides bridging
1912 * and needs to see broadcast packets from other members.
1913 */
1914 return if_get_protolist(ifp, NULL, 0) >= 2;
1915 }
1916
1917 static boolean_t
bif_set_input_broadcast(struct bridge_iflist * bif,boolean_t input_broadcast)1918 bif_set_input_broadcast(struct bridge_iflist * bif, boolean_t input_broadcast)
1919 {
1920 boolean_t old_input_broadcast;
1921
1922 old_input_broadcast = (bif->bif_flags & BIFF_INPUT_BROADCAST) != 0;
1923 if (input_broadcast) {
1924 bif->bif_flags |= BIFF_INPUT_BROADCAST;
1925 } else {
1926 bif->bif_flags &= ~BIFF_INPUT_BROADCAST;
1927 }
1928 return old_input_broadcast != input_broadcast;
1929 }
1930
1931 /*
1932 * bridge_ioctl:
1933 *
1934 * Handle a control request from the operator.
1935 */
1936 static errno_t
bridge_ioctl(struct ifnet * ifp,u_long cmd,void * __sized_by (IOCPARM_LEN (cmd))data)1937 bridge_ioctl(struct ifnet *ifp, u_long cmd, void *__sized_by(IOCPARM_LEN(cmd)) data)
1938 {
1939 struct bridge_softc * __single sc = ifp->if_softc;
1940 struct ifreq *ifr = (struct ifreq *)data;
1941 struct bridge_iflist *bif;
1942 int error = 0;
1943
1944 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1945
1946 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_IOCTL,
1947 "ifp %s cmd 0x%08lx (%c%c [%lu] %c %lu)",
1948 ifp->if_xname, cmd, (cmd & IOC_IN) ? 'I' : ' ',
1949 (cmd & IOC_OUT) ? 'O' : ' ', IOCPARM_LEN(cmd),
1950 (char)IOCGROUP(cmd), cmd & 0xff);
1951
1952 switch (cmd) {
1953 case SIOCSIFADDR:
1954 case SIOCAIFADDR:
1955 ifnet_set_flags(ifp, IFF_UP, IFF_UP);
1956 break;
1957
1958 case SIOCGIFMEDIA32:
1959 case SIOCGIFMEDIA64: {
1960 // cast to 32bit version to work within bounds with 32bit userspace
1961 struct ifmediareq32 *ifmr = (struct ifmediareq32 *)data;
1962 user_addr_t user_addr;
1963
1964 user_addr = (cmd == SIOCGIFMEDIA64) ?
1965 ((struct ifmediareq64 *)data)->ifmu_ulist :
1966 CAST_USER_ADDR_T(((struct ifmediareq32 *)data)->ifmu_ulist);
1967
1968 ifmr->ifm_status = IFM_AVALID;
1969 ifmr->ifm_mask = 0;
1970 ifmr->ifm_count = 1;
1971
1972 BRIDGE_LOCK(sc);
1973 if (!(sc->sc_flags & SCF_DETACHING) &&
1974 (sc->sc_flags & SCF_MEDIA_ACTIVE)) {
1975 ifmr->ifm_status |= IFM_ACTIVE;
1976 ifmr->ifm_active = ifmr->ifm_current =
1977 IFM_ETHER | IFM_AUTO;
1978 } else {
1979 ifmr->ifm_active = ifmr->ifm_current = IFM_NONE;
1980 }
1981 BRIDGE_UNLOCK(sc);
1982
1983 if (user_addr != USER_ADDR_NULL) {
1984 error = copyout(&ifmr->ifm_current, user_addr,
1985 sizeof(int));
1986 }
1987 break;
1988 }
1989
1990 case SIOCADDMULTI:
1991 case SIOCDELMULTI:
1992 break;
1993
1994 case SIOCSDRVSPEC32:
1995 case SIOCGDRVSPEC32: {
1996 union {
1997 struct ifbreq ifbreq;
1998 struct ifbifconf32 ifbifconf;
1999 struct ifbareq32 ifbareq;
2000 struct ifbaconf32 ifbaconf;
2001 struct ifbrparam ifbrparam;
2002 struct ifbropreq32 ifbropreq;
2003 } args;
2004 struct ifdrv32 *ifd = (struct ifdrv32 *)data;
2005 const struct bridge_control *bridge_control_table =
2006 bridge_control_table32, *bc;
2007
2008 DRVSPEC;
2009
2010 break;
2011 }
2012 case SIOCSDRVSPEC64:
2013 case SIOCGDRVSPEC64: {
2014 union {
2015 struct ifbreq ifbreq;
2016 struct ifbifconf64 ifbifconf;
2017 struct ifbareq64 ifbareq;
2018 struct ifbaconf64 ifbaconf;
2019 struct ifbrparam ifbrparam;
2020 struct ifbropreq64 ifbropreq;
2021 } args;
2022 struct ifdrv64 *ifd = (struct ifdrv64 *)data;
2023 const struct bridge_control *bridge_control_table =
2024 bridge_control_table64, *bc;
2025
2026 DRVSPEC;
2027
2028 break;
2029 }
2030
2031 case SIOCSIFFLAGS:
2032 if (!(ifp->if_flags & IFF_UP) &&
2033 (ifp->if_flags & IFF_RUNNING)) {
2034 /*
2035 * If interface is marked down and it is running,
2036 * then stop and disable it.
2037 */
2038 BRIDGE_LOCK(sc);
2039 bridge_ifstop(ifp, 1);
2040 BRIDGE_UNLOCK(sc);
2041 } else if ((ifp->if_flags & IFF_UP) &&
2042 !(ifp->if_flags & IFF_RUNNING)) {
2043 /*
2044 * If interface is marked up and it is stopped, then
2045 * start it.
2046 */
2047 BRIDGE_LOCK(sc);
2048 error = bridge_init(ifp);
2049 BRIDGE_UNLOCK(sc);
2050 }
2051 break;
2052
2053 case SIOCSIFLLADDR:
2054 error = ifnet_set_lladdr(ifp, ifr->ifr_addr.sa_data,
2055 ifr->ifr_addr.sa_len);
2056 if (error != 0) {
2057 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_IOCTL,
2058 "%s SIOCSIFLLADDR error %d", ifp->if_xname,
2059 error);
2060 }
2061 break;
2062
2063 case SIOCSIFMTU:
2064 if (ifr->ifr_mtu < 576) {
2065 error = EINVAL;
2066 break;
2067 }
2068 BRIDGE_LOCK(sc);
2069 if (TAILQ_EMPTY(&sc->sc_iflist)) {
2070 sc->sc_ifp->if_mtu = ifr->ifr_mtu;
2071 BRIDGE_UNLOCK(sc);
2072 break;
2073 }
2074 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2075 if (bif->bif_ifp->if_mtu != (unsigned)ifr->ifr_mtu) {
2076 BRIDGE_LOG(LOG_NOTICE, 0,
2077 "%s invalid MTU: %u(%s) != %d",
2078 sc->sc_ifp->if_xname,
2079 bif->bif_ifp->if_mtu,
2080 bif->bif_ifp->if_xname, ifr->ifr_mtu);
2081 error = EINVAL;
2082 break;
2083 }
2084 }
2085 if (!error) {
2086 sc->sc_ifp->if_mtu = ifr->ifr_mtu;
2087 }
2088 BRIDGE_UNLOCK(sc);
2089 break;
2090
2091 default:
2092 error = ether_ioctl(ifp, cmd, data);
2093 if (error != 0 && error != EOPNOTSUPP) {
2094 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_IOCTL,
2095 "ifp %s cmd 0x%08lx "
2096 "(%c%c [%lu] %c %lu) failed error: %d",
2097 ifp->if_xname, cmd,
2098 (cmd & IOC_IN) ? 'I' : ' ',
2099 (cmd & IOC_OUT) ? 'O' : ' ',
2100 IOCPARM_LEN(cmd), (char)IOCGROUP(cmd),
2101 cmd & 0xff, error);
2102 }
2103 break;
2104 }
2105 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2106
2107 return error;
2108 }
2109
2110 #if HAS_IF_CAP
2111 /*
2112 * bridge_mutecaps:
2113 *
2114 * Clear or restore unwanted capabilities on the member interface
2115 */
2116 static void
bridge_mutecaps(struct bridge_softc * sc)2117 bridge_mutecaps(struct bridge_softc *sc)
2118 {
2119 struct bridge_iflist *bif;
2120 int enabled, mask;
2121
2122 /* Initial bitmask of capabilities to test */
2123 mask = BRIDGE_IFCAPS_MASK;
2124
2125 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2126 /* Every member must support it or its disabled */
2127 mask &= bif->bif_savedcaps;
2128 }
2129
2130 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2131 enabled = bif->bif_ifp->if_capenable;
2132 enabled &= ~BRIDGE_IFCAPS_STRIP;
2133 /* strip off mask bits and enable them again if allowed */
2134 enabled &= ~BRIDGE_IFCAPS_MASK;
2135 enabled |= mask;
2136
2137 bridge_set_ifcap(sc, bif, enabled);
2138 }
2139 }
2140
2141 static void
bridge_set_ifcap(struct bridge_softc * sc,struct bridge_iflist * bif,int set)2142 bridge_set_ifcap(struct bridge_softc *sc, struct bridge_iflist *bif, int set)
2143 {
2144 struct ifnet *ifp = bif->bif_ifp;
2145 struct ifreq ifr;
2146 int error;
2147
2148 bzero(&ifr, sizeof(ifr));
2149 ifr.ifr_reqcap = set;
2150
2151 if (ifp->if_capenable != set) {
2152 IFF_LOCKGIANT(ifp);
2153 error = (*ifp->if_ioctl)(ifp, SIOCSIFCAP, (caddr_t)&ifr);
2154 IFF_UNLOCKGIANT(ifp);
2155 if (error) {
2156 BRIDGE_LOG(LOG_NOTICE, 0,
2157 "%s error setting interface capabilities on %s",
2158 sc->sc_ifp->if_xname, ifp->if_xname);
2159 }
2160 }
2161 }
2162 #endif /* HAS_IF_CAP */
2163
2164 static errno_t
siocsifcap(struct ifnet * ifp,uint32_t cap_enable)2165 siocsifcap(struct ifnet * ifp, uint32_t cap_enable)
2166 {
2167 struct ifreq ifr;
2168
2169 bzero(&ifr, sizeof(ifr));
2170 ifr.ifr_reqcap = cap_enable;
2171 return ifnet_ioctl(ifp, 0, SIOCSIFCAP, &ifr);
2172 }
2173
2174 static const char *
enable_disable_str(boolean_t enable)2175 enable_disable_str(boolean_t enable)
2176 {
2177 return (const char * __null_terminated)(enable ? "enable" : "disable");
2178 }
2179
2180 static boolean_t
bridge_set_lro(struct ifnet * ifp,boolean_t enable)2181 bridge_set_lro(struct ifnet * ifp, boolean_t enable)
2182 {
2183 uint32_t cap_enable;
2184 uint32_t cap_supported;
2185 boolean_t changed = FALSE;
2186 boolean_t lro_enabled;
2187
2188 cap_supported = ifnet_capabilities_supported(ifp);
2189 if ((cap_supported & IFCAP_LRO) == 0) {
2190 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2191 "%s doesn't support LRO",
2192 ifp->if_xname);
2193 goto done;
2194 }
2195 if (bridge_allow_lro_num_seg != 0 &&
2196 (cap_supported & IFCAP_LRO_NUM_SEG) != 0) {
2197 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2198 "%s supports LRO_NUM_SEG, leaving LRO enabled",
2199 ifp->if_xname);
2200 goto done;
2201 }
2202 cap_enable = ifnet_capabilities_enabled(ifp);
2203 lro_enabled = (cap_enable & IFCAP_LRO) != 0;
2204 if (lro_enabled != enable) {
2205 errno_t error;
2206
2207 if (enable) {
2208 cap_enable |= IFCAP_LRO;
2209 } else {
2210 cap_enable &= ~IFCAP_LRO;
2211 }
2212 error = siocsifcap(ifp, cap_enable);
2213 if (error != 0) {
2214 BRIDGE_LOG(LOG_NOTICE, 0,
2215 "%s %s failed (cap 0x%x) %d",
2216 ifp->if_xname,
2217 enable_disable_str(enable),
2218 cap_enable,
2219 error);
2220 } else {
2221 changed = TRUE;
2222 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2223 "%s %s success (cap 0x%x)",
2224 ifp->if_xname,
2225 enable_disable_str(enable),
2226 cap_enable);
2227 }
2228 }
2229 done:
2230 return changed;
2231 }
2232
2233 static errno_t
bridge_set_tso(struct bridge_softc * sc)2234 bridge_set_tso(struct bridge_softc *sc)
2235 {
2236 struct bridge_iflist *bif;
2237 u_int32_t tso_v4_mtu;
2238 u_int32_t tso_v6_mtu;
2239 ifnet_offload_t offload;
2240 errno_t error = 0;
2241
2242 /* By default, support TSO */
2243 offload = sc->sc_ifp->if_hwassist | IFNET_TSO_IPV4 | IFNET_TSO_IPV6;
2244 tso_v4_mtu = IP_MAXPACKET;
2245 tso_v6_mtu = IP_MAXPACKET;
2246
2247 /* Use the lowest common denominator of the members */
2248 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2249 ifnet_t ifp = bif->bif_ifp;
2250
2251 if (ifp == NULL) {
2252 continue;
2253 }
2254
2255 if (offload & IFNET_TSO_IPV4) {
2256 if (ifp->if_hwassist & IFNET_TSO_IPV4) {
2257 if (tso_v4_mtu > ifp->if_tso_v4_mtu) {
2258 tso_v4_mtu = ifp->if_tso_v4_mtu;
2259 }
2260 } else {
2261 offload &= ~IFNET_TSO_IPV4;
2262 tso_v4_mtu = 0;
2263 }
2264 }
2265 if (offload & IFNET_TSO_IPV6) {
2266 if (ifp->if_hwassist & IFNET_TSO_IPV6) {
2267 if (tso_v6_mtu > ifp->if_tso_v6_mtu) {
2268 tso_v6_mtu = ifp->if_tso_v6_mtu;
2269 }
2270 } else {
2271 offload &= ~IFNET_TSO_IPV6;
2272 tso_v6_mtu = 0;
2273 }
2274 }
2275 }
2276
2277 if (offload != sc->sc_ifp->if_hwassist) {
2278 error = ifnet_set_offload(sc->sc_ifp, offload);
2279 if (error != 0) {
2280 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2281 "ifnet_set_offload(%s, 0x%x) failed %d",
2282 sc->sc_ifp->if_xname, offload, error);
2283 goto done;
2284 }
2285 /*
2286 * For ifnet_set_tso_mtu() sake, the TSO MTU must be at least
2287 * as large as the interface MTU
2288 */
2289 if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV4) {
2290 if (tso_v4_mtu < sc->sc_ifp->if_mtu) {
2291 tso_v4_mtu = sc->sc_ifp->if_mtu;
2292 }
2293 error = ifnet_set_tso_mtu(sc->sc_ifp, AF_INET,
2294 tso_v4_mtu);
2295 if (error != 0) {
2296 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2297 "ifnet_set_tso_mtu(%s, "
2298 "AF_INET, %u) failed %d",
2299 sc->sc_ifp->if_xname,
2300 tso_v4_mtu, error);
2301 goto done;
2302 }
2303 }
2304 if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV6) {
2305 if (tso_v6_mtu < sc->sc_ifp->if_mtu) {
2306 tso_v6_mtu = sc->sc_ifp->if_mtu;
2307 }
2308 error = ifnet_set_tso_mtu(sc->sc_ifp, AF_INET6,
2309 tso_v6_mtu);
2310 if (error != 0) {
2311 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2312 "ifnet_set_tso_mtu(%s, "
2313 "AF_INET6, %u) failed %d",
2314 sc->sc_ifp->if_xname,
2315 tso_v6_mtu, error);
2316 goto done;
2317 }
2318 }
2319 }
2320 done:
2321 return error;
2322 }
2323
2324 static const char *
sanitize_ifname(char * __sized_by (IFNAMSIZ)ifname)2325 sanitize_ifname(char * __sized_by(IFNAMSIZ) ifname)
2326 {
2327 ifname[IFNAMSIZ - 1] = '\0';
2328 return __unsafe_null_terminated_from_indexable(ifname, &ifname[IFNAMSIZ - 1]);
2329 }
2330
2331 /*
2332 * bridge_lookup_member:
2333 *
2334 * Lookup a bridge member interface.
2335 */
2336 static struct bridge_iflist *
bridge_lookup_member(struct bridge_softc * sc,char * __sized_by (IFNAMSIZ)name_unsanitized)2337 bridge_lookup_member(struct bridge_softc *sc, char * __sized_by(IFNAMSIZ) name_unsanitized)
2338 {
2339 struct bridge_iflist *bif;
2340 struct ifnet *ifp;
2341 const char * __null_terminated name = sanitize_ifname(name_unsanitized);
2342
2343 BRIDGE_LOCK_ASSERT_HELD(sc);
2344
2345 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2346 ifp = bif->bif_ifp;
2347 if (strcmp(ifp->if_xname, name) == 0) {
2348 return bif;
2349 }
2350 }
2351
2352 return NULL;
2353 }
2354
2355 /*
2356 * bridge_lookup_member_if:
2357 *
2358 * Lookup a bridge member interface by ifnet*.
2359 */
2360 static struct bridge_iflist *
bridge_lookup_member_if(struct bridge_softc * sc,struct ifnet * member_ifp)2361 bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
2362 {
2363 struct bridge_iflist *bif;
2364
2365 BRIDGE_LOCK_ASSERT_HELD(sc);
2366
2367 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2368 if (bif->bif_ifp == member_ifp) {
2369 return bif;
2370 }
2371 }
2372
2373 return NULL;
2374 }
2375
2376 static inline bool
get_and_clear_promisc(mbuf_t m)2377 get_and_clear_promisc(mbuf_t m)
2378 {
2379 bool is_promisc;
2380
2381 /*
2382 * Need to clear the promiscuous flag otherwise the packet will be
2383 * dropped by DLIL after processing filters
2384 */
2385 is_promisc = (mbuf_flags(m) & MBUF_PROMISC) != 0;
2386 if (is_promisc) {
2387 mbuf_setflags_mask(m, 0, MBUF_PROMISC);
2388 }
2389 return is_promisc;
2390 }
2391
2392 static errno_t
bridge_iff_input(void * cookie,ifnet_t ifp,protocol_family_t protocol,mbuf_t * data,char ** frame_ptr)2393 bridge_iff_input(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2394 mbuf_t *data, char **frame_ptr)
2395 {
2396 #pragma unused(protocol)
2397 errno_t error = 0;
2398 struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2399 struct bridge_softc *sc = bif->bif_sc;
2400 int included = 0;
2401 struct ether_header * eh_p;
2402 size_t frmlen = 0;
2403 bool is_promisc;
2404 mblist list;
2405 mbuf_t m = *data;
2406 uint32_t sc_filter_flags;
2407
2408 if ((m->m_flags & M_PROTO1)) {
2409 goto out;
2410 }
2411
2412 if (*frame_ptr >= (char *)mbuf_datastart(m) &&
2413 *frame_ptr <= mtod(m, char *)) {
2414 included = 1;
2415 frmlen = mtod(m, char *) - *frame_ptr;
2416 }
2417 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2418 "%s from %s m 0x%llx data 0x%llx frame 0x%llx %s "
2419 "frmlen %lu", sc->sc_ifp->if_xname,
2420 ifp->if_xname, (uint64_t)VM_KERNEL_ADDRPERM(m),
2421 (uint64_t)VM_KERNEL_ADDRPERM(mtod(m, void *)),
2422 (uint64_t)VM_KERNEL_ADDRPERM(*frame_ptr),
2423 included ? "inside" : "outside", frmlen);
2424 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MBUF)) {
2425 brlog_mbuf(m, "bridge_iff_input[", "");
2426 brlog_ether_header((struct ether_header *)
2427 (void *)*frame_ptr);
2428 brlog_mbuf_data(m, 0, 20);
2429 }
2430 if (included == 0) {
2431 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT, "frame_ptr outside mbuf");
2432 goto out;
2433 }
2434
2435 /* Move data pointer to start of frame to the link layer header */
2436 _mbuf_adjust_pkthdr_and_data(m, -frmlen);
2437
2438 /* make sure we can access the ethernet header */
2439 if (mbuf_pkthdr_len(m) < sizeof(struct ether_header)) {
2440 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2441 "short frame %lu < %lu",
2442 mbuf_pkthdr_len(m), sizeof(struct ether_header));
2443 goto out;
2444 }
2445 if (mbuf_len(m) < sizeof(struct ether_header)) {
2446 error = mbuf_pullup(data, sizeof(struct ether_header));
2447 if (error != 0) {
2448 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2449 "mbuf_pullup(%lu) failed %d",
2450 sizeof(struct ether_header),
2451 error);
2452 error = EJUSTRETURN;
2453 goto out;
2454 }
2455 if (m != *data) {
2456 m = *data;
2457 *frame_ptr = mtod(m, char *);
2458 }
2459 }
2460 sc_filter_flags = sc->sc_filter_flags;
2461 if ((sc_filter_flags & IFBF_FILT_MEMBER) != 0 && PF_IS_ENABLED) {
2462 error = bridge_pf(data, ifp, sc_filter_flags, true);
2463 m = *data;
2464 if (error != 0 || m == NULL) {
2465 return EJUSTRETURN;
2466 }
2467 }
2468 mblist_init(&list);
2469 mblist_append(&list, m);
2470 is_promisc = get_and_clear_promisc(m);
2471 eh_p = __unsafe_forge_single(struct ether_header *, *frame_ptr);
2472 list = bridge_input_list(sc, ifp, eh_p, list, is_promisc);
2473 m = *data = list.head;
2474 if (m == NULL) {
2475 error = EJUSTRETURN;
2476 }
2477 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MBUF) &&
2478 BRIDGE_DBGF_ENABLED(BR_DBGF_INPUT)) {
2479 brlog_mbuf(m, "bridge_iff_input]", "");
2480 }
2481
2482 out:
2483 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2484
2485 return error;
2486 }
2487
2488 static errno_t
bridge_iff_output(void * cookie,ifnet_t ifp,protocol_family_t protocol,mbuf_t * data)2489 bridge_iff_output(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2490 mbuf_t *data)
2491 {
2492 #pragma unused(protocol)
2493 errno_t error = 0;
2494 struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2495 struct bridge_softc *sc = bif->bif_sc;
2496 mbuf_t m = *data;
2497
2498 if ((m->m_flags & M_PROTO1)) {
2499 goto out;
2500 }
2501 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
2502 "%s from %s m 0x%llx data 0x%llx",
2503 sc->sc_ifp->if_xname, ifp->if_xname,
2504 (uint64_t)VM_KERNEL_ADDRPERM(m),
2505 (uint64_t)VM_KERNEL_ADDRPERM(mtod(m, void *)));
2506
2507 error = bridge_member_output(sc, ifp, data);
2508 if (error != 0 && error != EJUSTRETURN) {
2509 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_OUTPUT,
2510 "bridge_member_output failed error %d",
2511 error);
2512 }
2513 out:
2514 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2515
2516 return error;
2517 }
2518
2519 static void
bridge_iff_event(void * cookie,ifnet_t ifp,protocol_family_t protocol,const struct kev_msg * event_msg)2520 bridge_iff_event(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2521 const struct kev_msg *event_msg)
2522 {
2523 #pragma unused(protocol)
2524 struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2525 struct bridge_softc *sc = bif->bif_sc;
2526
2527 if (event_msg->vendor_code == KEV_VENDOR_APPLE &&
2528 event_msg->kev_class == KEV_NETWORK_CLASS &&
2529 event_msg->kev_subclass == KEV_DL_SUBCLASS) {
2530 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
2531 "%s event_code %u - %s",
2532 ifp->if_xname, event_msg->event_code,
2533 dlil_kev_dl_code_str(event_msg->event_code));
2534
2535 switch (event_msg->event_code) {
2536 case KEV_DL_LINK_OFF:
2537 case KEV_DL_LINK_ON: {
2538 bridge_iflinkevent(ifp);
2539 #if BRIDGESTP
2540 bstp_linkstate(ifp, event_msg->event_code);
2541 #endif /* BRIDGESTP */
2542 break;
2543 }
2544 case KEV_DL_SIFFLAGS: {
2545 if ((ifp->if_flags & IFF_UP) == 0) {
2546 break;
2547 }
2548 if ((bif->bif_flags & BIFF_PROMISC) == 0) {
2549 errno_t error;
2550
2551 error = ifnet_set_promiscuous(ifp, 1);
2552 if (error != 0) {
2553 BRIDGE_LOG(LOG_NOTICE, 0,
2554 "ifnet_set_promiscuous (%s)"
2555 " failed %d", ifp->if_xname,
2556 error);
2557 } else {
2558 bif->bif_flags |= BIFF_PROMISC;
2559 }
2560 }
2561 if ((bif->bif_flags & BIFF_WIFI_INFRA) != 0 &&
2562 (bif->bif_flags & BIFF_ALL_MULTI) == 0) {
2563 errno_t error;
2564
2565 error = if_allmulti(ifp, 1);
2566 if (error != 0) {
2567 BRIDGE_LOG(LOG_NOTICE, 0,
2568 "if_allmulti (%s)"
2569 " failed %d", ifp->if_xname,
2570 error);
2571 } else {
2572 bif->bif_flags |= BIFF_ALL_MULTI;
2573 #ifdef XNU_PLATFORM_AppleTVOS
2574 ip6_forwarding = 1;
2575 #endif /* XNU_PLATFORM_AppleTVOS */
2576 }
2577 }
2578 break;
2579 }
2580 case KEV_DL_IFCAP_CHANGED: {
2581 BRIDGE_LOCK(sc);
2582 bridge_set_tso(sc);
2583 BRIDGE_UNLOCK(sc);
2584 break;
2585 }
2586 case KEV_DL_PROTO_DETACHED:
2587 case KEV_DL_PROTO_ATTACHED: {
2588 bridge_proto_attach_changed(ifp);
2589 break;
2590 }
2591 default:
2592 break;
2593 }
2594 }
2595 }
2596
2597 /*
2598 * bridge_iff_detached:
2599 *
2600 * Called when our interface filter has been detached from a
2601 * member interface.
2602 */
2603 static void
bridge_iff_detached(void * cookie,ifnet_t ifp)2604 bridge_iff_detached(void *cookie, ifnet_t ifp)
2605 {
2606 #pragma unused(cookie)
2607 struct bridge_iflist *bif;
2608 struct bridge_softc * __single sc = ifp->if_bridge;
2609
2610 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2611
2612 /* Check if the interface is a bridge member */
2613 if (sc != NULL) {
2614 BRIDGE_LOCK(sc);
2615 bif = bridge_lookup_member_if(sc, ifp);
2616 if (bif != NULL) {
2617 bridge_delete_member(sc, bif);
2618 }
2619 BRIDGE_UNLOCK(sc);
2620 return;
2621 }
2622 /* Check if the interface is a span port */
2623 lck_mtx_lock(&bridge_list_mtx);
2624 LIST_FOREACH(sc, &bridge_list, sc_list) {
2625 BRIDGE_LOCK(sc);
2626 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
2627 if (ifp == bif->bif_ifp) {
2628 bridge_delete_span(sc, bif);
2629 break;
2630 }
2631 BRIDGE_UNLOCK(sc);
2632 }
2633 lck_mtx_unlock(&bridge_list_mtx);
2634 }
2635
2636 static errno_t
bridge_proto_input(ifnet_t ifp,protocol_family_t protocol,mbuf_t packet,char * header)2637 bridge_proto_input(ifnet_t ifp, protocol_family_t protocol, mbuf_t packet,
2638 char *header)
2639 {
2640 #pragma unused(protocol, packet, header)
2641 BRIDGE_LOG(LOG_NOTICE, 0, "%s unexpected packet",
2642 ifp->if_xname);
2643 return 0;
2644 }
2645
2646 static int
bridge_attach_protocol(struct ifnet * ifp)2647 bridge_attach_protocol(struct ifnet *ifp)
2648 {
2649 int error;
2650 struct ifnet_attach_proto_param reg;
2651
2652 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2653 bzero(®, sizeof(reg));
2654 reg.input = bridge_proto_input;
2655
2656 error = ifnet_attach_protocol(ifp, PF_BRIDGE, ®);
2657 if (error) {
2658 BRIDGE_LOG(LOG_NOTICE, 0,
2659 "ifnet_attach_protocol(%s) failed, %d",
2660 ifp->if_xname, error);
2661 }
2662
2663 return error;
2664 }
2665
2666 static int
bridge_detach_protocol(struct ifnet * ifp)2667 bridge_detach_protocol(struct ifnet *ifp)
2668 {
2669 int error;
2670
2671 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2672 error = ifnet_detach_protocol(ifp, PF_BRIDGE);
2673 if (error) {
2674 BRIDGE_LOG(LOG_NOTICE, 0,
2675 "ifnet_detach_protocol(%s) failed, %d",
2676 ifp->if_xname, error);
2677 }
2678
2679 return error;
2680 }
2681
2682 /*
2683 * bridge_delete_member:
2684 *
2685 * Delete the specified member interface.
2686 */
2687 static void
bridge_delete_member(struct bridge_softc * sc,struct bridge_iflist * bif)2688 bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif)
2689 {
2690 #if SKYWALK
2691 boolean_t add_netagent = FALSE;
2692 #endif /* SKYWALK */
2693 uint32_t bif_flags;
2694 struct ifnet *ifs = bif->bif_ifp, *bifp = sc->sc_ifp;
2695 int lladdr_changed = 0, error;
2696 uint8_t eaddr[ETHER_ADDR_LEN];
2697 u_int32_t event_code = 0;
2698
2699 BRIDGE_LOCK_ASSERT_HELD(sc);
2700 VERIFY(ifs != NULL);
2701
2702 /*
2703 * Remove the member from the list first so it cannot be found anymore
2704 * when we release the bridge lock below
2705 */
2706 if ((bif->bif_flags & BIFF_IN_MEMBER_LIST) != 0) {
2707 bif->bif_flags &= ~BIFF_IN_MEMBER_LIST;
2708 BRIDGE_XLOCK(sc);
2709 TAILQ_REMOVE(&sc->sc_iflist, bif, bif_next);
2710 BRIDGE_XDROP(sc);
2711 }
2712 if (sc->sc_mac_nat_bif != NULL) {
2713 if (bif == sc->sc_mac_nat_bif) {
2714 bridge_mac_nat_disable(sc);
2715 } else {
2716 bridge_mac_nat_flush_entries(sc, bif);
2717 }
2718 }
2719 #if BRIDGESTP
2720 if ((bif->bif_ifflags & IFBIF_STP) != 0) {
2721 bstp_disable(&bif->bif_stp);
2722 }
2723 #endif /* BRIDGESTP */
2724
2725 /*
2726 * If removing the interface that gave the bridge its mac address, set
2727 * the mac address of the bridge to the address of the next member, or
2728 * to its default address if no members are left.
2729 */
2730 if (bridge_inherit_mac && sc->sc_ifaddr == ifs) {
2731 ifnet_release(sc->sc_ifaddr);
2732 if (TAILQ_EMPTY(&sc->sc_iflist)) {
2733 bcopy(sc->sc_defaddr, eaddr, ETHER_ADDR_LEN);
2734 sc->sc_ifaddr = NULL;
2735 } else {
2736 struct ifnet *fif =
2737 TAILQ_FIRST(&sc->sc_iflist)->bif_ifp;
2738 bcopy(IF_LLADDR(fif), eaddr, ETHER_ADDR_LEN);
2739 sc->sc_ifaddr = fif;
2740 ifnet_reference(fif); /* for sc_ifaddr */
2741 }
2742 lladdr_changed = 1;
2743 }
2744
2745 #if HAS_IF_CAP
2746 bridge_mutecaps(sc); /* recalculate now this interface is removed */
2747 #endif /* HAS_IF_CAP */
2748
2749 error = bridge_set_tso(sc);
2750 if (error != 0) {
2751 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_set_tso failed %d", error);
2752 }
2753
2754 bridge_rtdelete(sc, ifs, IFBF_FLUSHALL);
2755
2756 KASSERT(bif->bif_addrcnt == 0,
2757 ("%s: %d bridge routes referenced", __func__, bif->bif_addrcnt));
2758
2759 /*
2760 * Update link status of the bridge based on its remaining members
2761 */
2762 event_code = bridge_updatelinkstatus(sc);
2763 bif_flags = bif->bif_flags;
2764 BRIDGE_UNLOCK(sc);
2765
2766 /* only perform these steps if the interface is still attached */
2767 if (ifnet_get_ioref(ifs)) {
2768 #if SKYWALK
2769 add_netagent = (bif_flags & BIFF_NETAGENT_REMOVED) != 0;
2770
2771 if ((bif_flags & BIFF_FLOWSWITCH_ATTACHED) != 0) {
2772 ifnet_detach_flowswitch_nexus(ifs);
2773 }
2774 #endif /* SKYWALK */
2775 /* disable promiscuous mode */
2776 if ((bif_flags & BIFF_PROMISC) != 0) {
2777 (void) ifnet_set_promiscuous(ifs, 0);
2778 }
2779 /* disable all multi */
2780 if ((bif_flags & BIFF_ALL_MULTI) != 0) {
2781 (void)if_allmulti(ifs, 0);
2782 }
2783 #if HAS_IF_CAP
2784 /* re-enable any interface capabilities */
2785 bridge_set_ifcap(sc, bif, bif->bif_savedcaps);
2786 #endif
2787 /* detach bridge "protocol" */
2788 if ((bif_flags & BIFF_PROTO_ATTACHED) != 0) {
2789 (void)bridge_detach_protocol(ifs);
2790 }
2791 /* detach interface filter */
2792 if ((bif_flags & BIFF_FILTER_ATTACHED) != 0) {
2793 iflt_detach(bif->bif_iff_ref);
2794 }
2795 /* re-enable LRO */
2796 if ((bif_flags & BIFF_LRO_DISABLED) != 0) {
2797 (void)bridge_set_lro(ifs, TRUE);
2798 }
2799 ifnet_decr_iorefcnt(ifs);
2800 }
2801
2802 if (lladdr_changed &&
2803 (error = ifnet_set_lladdr(bifp, eaddr, ETHER_ADDR_LEN)) != 0) {
2804 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr failed %d", error);
2805 }
2806
2807 if (event_code != 0) {
2808 bridge_link_event(bifp, event_code);
2809 }
2810
2811 #if BRIDGESTP
2812 bstp_destroy(&bif->bif_stp); /* prepare to free */
2813 #endif /* BRIDGESTP */
2814
2815 kfree_type(struct bridge_iflist, bif);
2816 ifs->if_bridge = NULL;
2817 #if SKYWALK
2818 if (add_netagent && ifnet_get_ioref(ifs)) {
2819 (void)ifnet_add_netagent(ifs);
2820 ifnet_decr_iorefcnt(ifs);
2821 }
2822 #endif /* SKYWALK */
2823
2824 ifnet_release(ifs);
2825
2826 BRIDGE_LOCK(sc);
2827 }
2828
2829 /*
2830 * bridge_delete_span:
2831 *
2832 * Delete the specified span interface.
2833 */
2834 static void
bridge_delete_span(struct bridge_softc * sc,struct bridge_iflist * bif)2835 bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif)
2836 {
2837 BRIDGE_LOCK_ASSERT_HELD(sc);
2838
2839 KASSERT(bif->bif_ifp->if_bridge == NULL,
2840 ("%s: not a span interface", __func__));
2841
2842 ifnet_release(bif->bif_ifp);
2843
2844 TAILQ_REMOVE(&sc->sc_spanlist, bif, bif_next);
2845 kfree_type(struct bridge_iflist, bif);
2846 }
2847
2848 static int
bridge_ioctl_add(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)2849 bridge_ioctl_add(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
2850 {
2851 struct ifbreq * __single req = arg;
2852 struct bridge_iflist *bif = NULL;
2853 struct ifnet *ifs, *bifp = sc->sc_ifp;
2854 int error = 0, lladdr_changed = 0;
2855 uint8_t eaddr[ETHER_ADDR_LEN];
2856 struct iff_filter iff;
2857 u_int32_t event_code = 0;
2858 boolean_t input_broadcast;
2859 int media_active;
2860 boolean_t wifi_infra = FALSE;
2861
2862 ifs = ifunit(sanitize_ifname(req->ifbr_ifsname));
2863 if (ifs == NULL) {
2864 return ENOENT;
2865 }
2866 if (ifs->if_ioctl == NULL) { /* must be supported */
2867 return EINVAL;
2868 }
2869
2870 if (IFNET_IS_INTCOPROC(ifs) || IFNET_IS_MANAGEMENT(ifs)) {
2871 return EINVAL;
2872 }
2873
2874 /* If it's in the span list, it can't be a member. */
2875 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
2876 if (ifs == bif->bif_ifp) {
2877 return EBUSY;
2878 }
2879 }
2880
2881 if (ifs->if_bridge == sc) {
2882 return EEXIST;
2883 }
2884
2885 if (ifs->if_bridge != NULL) {
2886 return EBUSY;
2887 }
2888
2889 switch (ifs->if_type) {
2890 case IFT_ETHER:
2891 if (strcmp(ifs->if_name, "en") == 0 &&
2892 ifs->if_subfamily == IFNET_SUBFAMILY_WIFI &&
2893 (ifs->if_eflags & IFEF_IPV4_ROUTER) == 0) {
2894 /* XXX is there a better way to identify Wi-Fi STA? */
2895 wifi_infra = TRUE;
2896 }
2897 break;
2898 case IFT_L2VLAN:
2899 case IFT_IEEE8023ADLAG:
2900 break;
2901 default:
2902 return EINVAL;
2903 }
2904
2905 /* fail to add the interface if the MTU doesn't match */
2906 if (!TAILQ_EMPTY(&sc->sc_iflist) && sc->sc_ifp->if_mtu != ifs->if_mtu) {
2907 BRIDGE_LOG(LOG_NOTICE, 0, "%s invalid MTU for %s",
2908 sc->sc_ifp->if_xname,
2909 ifs->if_xname);
2910 return EINVAL;
2911 }
2912
2913 if (wifi_infra && sc->sc_mac_nat_bif != NULL) {
2914 /* there's already an interface that's doing MAC NAT */
2915 return EBUSY;
2916 }
2917
2918 /* prevent the interface from detaching while we add the member */
2919 if (!ifnet_get_ioref(ifs)) {
2920 return ENXIO;
2921 }
2922
2923 /* allocate a new member */
2924 bif = kalloc_type(struct bridge_iflist, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2925 bif->bif_ifp = ifs;
2926 ifnet_reference(ifs);
2927 bif->bif_ifflags |= IFBIF_LEARNING | IFBIF_DISCOVER;
2928 #if HAS_IF_CAP
2929 bif->bif_savedcaps = ifs->if_capenable;
2930 #endif /* HAS_IF_CAP */
2931 bif->bif_sc = sc;
2932 if (wifi_infra) {
2933 (void)bridge_mac_nat_enable(sc, bif);
2934 }
2935
2936 /* Allow the first Ethernet member to define the MTU */
2937 if (TAILQ_EMPTY(&sc->sc_iflist)) {
2938 sc->sc_ifp->if_mtu = ifs->if_mtu;
2939 }
2940
2941 /*
2942 * Assign the interface's MAC address to the bridge if it's the first
2943 * member and the MAC address of the bridge has not been changed from
2944 * the default (randomly) generated one.
2945 */
2946 if (bridge_inherit_mac && TAILQ_EMPTY(&sc->sc_iflist) &&
2947 _ether_cmp(IF_LLADDR(sc->sc_ifp), sc->sc_defaddr) == 0) {
2948 bcopy(IF_LLADDR(ifs), eaddr, ETHER_ADDR_LEN);
2949 sc->sc_ifaddr = ifs;
2950 ifnet_reference(ifs); /* for sc_ifaddr */
2951 lladdr_changed = 1;
2952 }
2953
2954 ifs->if_bridge = sc;
2955 #if BRIDGESTP
2956 bstp_create(&sc->sc_stp, &bif->bif_stp, bif->bif_ifp);
2957 #endif /* BRIDGESTP */
2958
2959 #if HAS_IF_CAP
2960 /* Set interface capabilities to the intersection set of all members */
2961 bridge_mutecaps(sc);
2962 #endif /* HAS_IF_CAP */
2963
2964 /*
2965 * Respect lock ordering with DLIL lock for the following operations
2966 */
2967 BRIDGE_UNLOCK(sc);
2968
2969 /* enable promiscuous mode */
2970 error = ifnet_set_promiscuous(ifs, 1);
2971 switch (error) {
2972 case 0:
2973 bif->bif_flags |= BIFF_PROMISC;
2974 break;
2975 case ENETDOWN:
2976 case EPWROFF:
2977 BRIDGE_LOG(LOG_NOTICE, 0,
2978 "ifnet_set_promiscuous(%s) failed %d, ignoring",
2979 ifs->if_xname, error);
2980 /* Ignore error when device is not up */
2981 error = 0;
2982 break;
2983 default:
2984 BRIDGE_LOG(LOG_NOTICE, 0,
2985 "ifnet_set_promiscuous(%s) failed %d",
2986 ifs->if_xname, error);
2987 BRIDGE_LOCK(sc);
2988 goto out;
2989 }
2990 if (wifi_infra) {
2991 int this_error;
2992
2993 /* Wi-Fi doesn't really support promiscuous, set allmulti */
2994 bif->bif_flags |= BIFF_WIFI_INFRA;
2995 this_error = if_allmulti(ifs, 1);
2996 if (this_error == 0) {
2997 bif->bif_flags |= BIFF_ALL_MULTI;
2998 #ifdef XNU_PLATFORM_AppleTVOS
2999 ip6_forwarding = 1;
3000 #endif /* XNU_PLATFORM_AppleTVOS */
3001 } else {
3002 BRIDGE_LOG(LOG_NOTICE, 0,
3003 "if_allmulti(%s) failed %d, ignoring",
3004 ifs->if_xname, this_error);
3005 }
3006 }
3007 #if SKYWALK
3008 /* ensure that the flowswitch is present for native interface */
3009 if (SKYWALK_NATIVE(ifs)) {
3010 if (ifnet_attach_flowswitch_nexus(ifs)) {
3011 bif->bif_flags |= BIFF_FLOWSWITCH_ATTACHED;
3012 }
3013 }
3014 /* remove the netagent on the flowswitch (rdar://75050182) */
3015 if (if_is_fsw_netagent_enabled()) {
3016 (void)ifnet_remove_netagent(ifs);
3017 bif->bif_flags |= BIFF_NETAGENT_REMOVED;
3018 }
3019 #endif /* SKYWALK */
3020
3021 /*
3022 * install an interface filter
3023 */
3024 memset(&iff, 0, sizeof(struct iff_filter));
3025 iff.iff_cookie = bif;
3026 iff.iff_name = "com.apple.kernel.bsd.net.if_bridge";
3027 iff.iff_input = bridge_iff_input;
3028 iff.iff_output = bridge_iff_output;
3029 iff.iff_event = bridge_iff_event;
3030 iff.iff_detached = bridge_iff_detached;
3031 error = dlil_attach_filter(ifs, &iff, &bif->bif_iff_ref,
3032 DLIL_IFF_TSO | DLIL_IFF_INTERNAL | DLIL_IFF_BRIDGE);
3033 if (error != 0) {
3034 BRIDGE_LOG(LOG_NOTICE, 0, "iflt_attach failed %d", error);
3035 BRIDGE_LOCK(sc);
3036 goto out;
3037 }
3038 bif->bif_flags |= BIFF_FILTER_ATTACHED;
3039
3040 /*
3041 * install a dummy "bridge" protocol
3042 */
3043 if ((error = bridge_attach_protocol(ifs)) != 0) {
3044 if (error != 0) {
3045 BRIDGE_LOG(LOG_NOTICE, 0,
3046 "bridge_attach_protocol failed %d", error);
3047 BRIDGE_LOCK(sc);
3048 goto out;
3049 }
3050 }
3051 bif->bif_flags |= BIFF_PROTO_ATTACHED;
3052
3053 if (lladdr_changed &&
3054 (error = ifnet_set_lladdr(bifp, eaddr, ETHER_ADDR_LEN)) != 0) {
3055 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr failed %d", error);
3056 }
3057
3058 media_active = interface_media_active(ifs);
3059
3060 /* disable LRO if needed */
3061 if (bridge_set_lro(ifs, FALSE)) {
3062 bif->bif_flags |= BIFF_LRO_DISABLED;
3063 }
3064
3065 /*
3066 * No failures past this point. Add the member to the list.
3067 */
3068 BRIDGE_LOCK(sc);
3069 bif->bif_flags |= BIFF_IN_MEMBER_LIST;
3070 BRIDGE_XLOCK(sc);
3071 TAILQ_INSERT_TAIL(&sc->sc_iflist, bif, bif_next);
3072 BRIDGE_XDROP(sc);
3073
3074 /* cache the member link status */
3075 if (media_active != 0) {
3076 bif->bif_flags |= BIFF_MEDIA_ACTIVE;
3077 } else {
3078 bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
3079 }
3080
3081 /* the new member may change the link status of the bridge interface */
3082 event_code = bridge_updatelinkstatus(sc);
3083
3084 /* check whether we need input broadcast or not */
3085 input_broadcast = interface_needs_input_broadcast(ifs);
3086 bif_set_input_broadcast(bif, input_broadcast);
3087 BRIDGE_UNLOCK(sc);
3088
3089 if (event_code != 0) {
3090 bridge_link_event(bifp, event_code);
3091 }
3092 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
3093 "%s input broadcast %s", ifs->if_xname,
3094 input_broadcast ? "ENABLED" : "DISABLED");
3095
3096 BRIDGE_LOCK(sc);
3097 bridge_set_tso(sc);
3098
3099 out:
3100 /* allow the interface to detach */
3101 ifnet_decr_iorefcnt(ifs);
3102
3103 if (error != 0) {
3104 if (bif != NULL) {
3105 bridge_delete_member(sc, bif);
3106 }
3107 } else if (IFNET_IS_VMNET(ifs)) {
3108 INC_ATOMIC_INT64_LIM(net_api_stats.nas_vmnet_total);
3109 }
3110
3111 return error;
3112 }
3113
3114 static int
bridge_ioctl_del(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3115 bridge_ioctl_del(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3116 {
3117 struct ifbreq * __single req = arg;
3118 struct bridge_iflist *bif;
3119
3120 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3121 if (bif == NULL) {
3122 return ENOENT;
3123 }
3124
3125 bridge_delete_member(sc, bif);
3126
3127 return 0;
3128 }
3129
3130 static int
bridge_ioctl_purge(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3131 bridge_ioctl_purge(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3132 {
3133 #pragma unused(sc, arg, arg_len)
3134 return 0;
3135 }
3136
3137 static int
bridge_ioctl_gifflags(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3138 bridge_ioctl_gifflags(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3139 {
3140 struct ifbreq * __single req = arg;
3141 struct bridge_iflist *bif;
3142
3143 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3144 if (bif == NULL) {
3145 return ENOENT;
3146 }
3147
3148 struct bstp_port *bp;
3149
3150 bp = &bif->bif_stp;
3151 req->ifbr_state = bp->bp_state;
3152 req->ifbr_priority = bp->bp_priority;
3153 req->ifbr_path_cost = bp->bp_path_cost;
3154 req->ifbr_proto = bp->bp_protover;
3155 req->ifbr_role = bp->bp_role;
3156 req->ifbr_stpflags = bp->bp_flags;
3157 req->ifbr_ifsflags = bif->bif_ifflags;
3158
3159 /* Copy STP state options as flags */
3160 if (bp->bp_operedge) {
3161 req->ifbr_ifsflags |= IFBIF_BSTP_EDGE;
3162 }
3163 if (bp->bp_flags & BSTP_PORT_AUTOEDGE) {
3164 req->ifbr_ifsflags |= IFBIF_BSTP_AUTOEDGE;
3165 }
3166 if (bp->bp_ptp_link) {
3167 req->ifbr_ifsflags |= IFBIF_BSTP_PTP;
3168 }
3169 if (bp->bp_flags & BSTP_PORT_AUTOPTP) {
3170 req->ifbr_ifsflags |= IFBIF_BSTP_AUTOPTP;
3171 }
3172 if (bp->bp_flags & BSTP_PORT_ADMEDGE) {
3173 req->ifbr_ifsflags |= IFBIF_BSTP_ADMEDGE;
3174 }
3175 if (bp->bp_flags & BSTP_PORT_ADMCOST) {
3176 req->ifbr_ifsflags |= IFBIF_BSTP_ADMCOST;
3177 }
3178
3179 req->ifbr_portno = bif->bif_ifp->if_index & 0xfff;
3180 req->ifbr_addrcnt = bif->bif_addrcnt;
3181 req->ifbr_addrmax = bif->bif_addrmax;
3182 req->ifbr_addrexceeded = bif->bif_addrexceeded;
3183
3184 return 0;
3185 }
3186
3187 static int
bridge_ioctl_sifflags(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3188 bridge_ioctl_sifflags(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3189 {
3190 struct ifbreq * __single req = arg;
3191 struct bridge_iflist *bif;
3192 #if BRIDGESTP
3193 struct bstp_port *bp;
3194 #endif /* BRIDGESTP */
3195 errno_t error;
3196 uint32_t ifsflags;
3197
3198 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3199 if (bif == NULL) {
3200 return ENOENT;
3201 }
3202
3203 ifsflags = req->ifbr_ifsflags;
3204 if (ifsflags & IFBIF_SPAN) {
3205 /* SPAN is readonly */
3206 return EINVAL;
3207 }
3208 #define CHECKSUM_VIRTIO (IFBIF_CHECKSUM_OFFLOAD | IFBIF_USES_VIRTIO)
3209 if ((ifsflags & CHECKSUM_VIRTIO) == CHECKSUM_VIRTIO) {
3210 /* can't specify checksum and virtio */
3211 return EINVAL;
3212 }
3213 if ((ifsflags & IFBIF_MAC_NAT) != 0 &&
3214 ((ifsflags & CHECKSUM_VIRTIO) != 0 ||
3215 (bif->bif_flags & BIFF_HOST_FILTER) != 0)) {
3216 /* MAC-NAT can't be used with checksum, host filter, or virtio */
3217 return EINVAL;
3218 }
3219 if ((ifsflags & IFBIF_MAC_NAT) != 0) {
3220 error = bridge_mac_nat_enable(sc, bif);
3221 if (error != 0) {
3222 return error;
3223 }
3224 } else if (sc->sc_mac_nat_bif == bif) {
3225 bridge_mac_nat_disable(sc);
3226 }
3227
3228 #if BRIDGESTP
3229 if (ifsflags & IFBIF_STP) {
3230 if ((bif->bif_ifflags & IFBIF_STP) == 0) {
3231 error = bstp_enable(&bif->bif_stp);
3232 if (error) {
3233 return error;
3234 }
3235 }
3236 } else {
3237 if ((bif->bif_ifflags & IFBIF_STP) != 0) {
3238 bstp_disable(&bif->bif_stp);
3239 }
3240 }
3241
3242 /* Pass on STP flags */
3243 bp = &bif->bif_stp;
3244 bstp_set_edge(bp, ifsflags & IFBIF_BSTP_EDGE ? 1 : 0);
3245 bstp_set_autoedge(bp, ifsflags & IFBIF_BSTP_AUTOEDGE ? 1 : 0);
3246 bstp_set_ptp(bp, ifsflags & IFBIF_BSTP_PTP ? 1 : 0);
3247 bstp_set_autoptp(bp, ifsflags & IFBIF_BSTP_AUTOPTP ? 1 : 0);
3248 #else /* !BRIDGESTP */
3249 if (ifsflags & IFBIF_STP) {
3250 return EOPNOTSUPP;
3251 }
3252 #endif /* !BRIDGESTP */
3253
3254 /* Save the bits relating to the bridge */
3255 bif->bif_ifflags = ifsflags & IFBIFMASK;
3256
3257 return 0;
3258 }
3259
3260 static int
bridge_ioctl_scache(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3261 bridge_ioctl_scache(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3262 {
3263 struct ifbrparam * __single param = arg;
3264
3265 sc->sc_brtmax = param->ifbrp_csize;
3266 bridge_rttrim(sc);
3267 return 0;
3268 }
3269
3270 static int
bridge_ioctl_gcache(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3271 bridge_ioctl_gcache(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3272 {
3273 struct ifbrparam * __single param = arg;
3274
3275 param->ifbrp_csize = sc->sc_brtmax;
3276
3277 return 0;
3278 }
3279
3280 #define BRIDGE_IOCTL_GIFS do { \
3281 struct bridge_iflist *bif; \
3282 struct ifbreq breq; \
3283 char *buf, *outbuf; \
3284 unsigned int count, buflen, len; \
3285 \
3286 count = 0; \
3287 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) \
3288 count++; \
3289 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) \
3290 count++; \
3291 \
3292 buflen = sizeof (breq) * count; \
3293 if (bifc->ifbic_len == 0) { \
3294 bifc->ifbic_len = buflen; \
3295 return (0); \
3296 } \
3297 BRIDGE_UNLOCK(sc); \
3298 outbuf = kalloc_data(buflen, Z_WAITOK | Z_ZERO); \
3299 BRIDGE_LOCK(sc); \
3300 \
3301 count = 0; \
3302 buf = outbuf; \
3303 len = min(bifc->ifbic_len, buflen); \
3304 bzero(&breq, sizeof (breq)); \
3305 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { \
3306 if (len < sizeof (breq)) \
3307 break; \
3308 \
3309 snprintf(breq.ifbr_ifsname, sizeof (breq.ifbr_ifsname), \
3310 "%s", bif->bif_ifp->if_xname); \
3311 /* Fill in the ifbreq structure */ \
3312 error = bridge_ioctl_gifflags(sc, &breq, sizeof(breq)); \
3313 if (error) \
3314 break; \
3315 memcpy(buf, &breq, sizeof (breq)); \
3316 count++; \
3317 buf += sizeof (breq); \
3318 len -= sizeof (breq); \
3319 } \
3320 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) { \
3321 if (len < sizeof (breq)) \
3322 break; \
3323 \
3324 snprintf(breq.ifbr_ifsname, \
3325 sizeof (breq.ifbr_ifsname), \
3326 "%s", bif->bif_ifp->if_xname); \
3327 breq.ifbr_ifsflags = bif->bif_ifflags; \
3328 breq.ifbr_portno \
3329 = bif->bif_ifp->if_index & 0xfff; \
3330 memcpy(buf, &breq, sizeof (breq)); \
3331 count++; \
3332 buf += sizeof (breq); \
3333 len -= sizeof (breq); \
3334 } \
3335 \
3336 BRIDGE_UNLOCK(sc); \
3337 bifc->ifbic_len = sizeof (breq) * count; \
3338 if (bifc->ifbic_len > 0) { \
3339 error = copyout(outbuf, bifc->ifbic_req, bifc->ifbic_len);\
3340 } \
3341 BRIDGE_LOCK(sc); \
3342 kfree_data(outbuf, buflen); \
3343 } while (0)
3344
3345 static int
bridge_ioctl_gifs64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3346 bridge_ioctl_gifs64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3347 {
3348 struct ifbifconf64 * __single bifc = arg;
3349 int error = 0;
3350
3351 BRIDGE_IOCTL_GIFS;
3352
3353 return error;
3354 }
3355
3356 static int
bridge_ioctl_gifs32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3357 bridge_ioctl_gifs32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3358 {
3359 struct ifbifconf32 * __single bifc = arg;
3360 int error = 0;
3361
3362 BRIDGE_IOCTL_GIFS;
3363
3364 return error;
3365 }
3366
3367 #define BRIDGE_IOCTL_RTS do { \
3368 struct bridge_rtnode *brt; \
3369 char *buf; \
3370 char *outbuf = NULL; \
3371 unsigned int count, buflen, len; \
3372 unsigned long now; \
3373 \
3374 if (bac->ifbac_len == 0) \
3375 return (0); \
3376 \
3377 bzero(&bareq, sizeof (bareq)); \
3378 count = 0; \
3379 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) \
3380 count++; \
3381 buflen = sizeof (bareq) * count; \
3382 \
3383 BRIDGE_UNLOCK(sc); \
3384 outbuf = kalloc_data(buflen, Z_WAITOK | Z_ZERO); \
3385 BRIDGE_LOCK(sc); \
3386 \
3387 count = 0; \
3388 buf = outbuf; \
3389 len = min(bac->ifbac_len, buflen); \
3390 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) { \
3391 if (len < sizeof (bareq)) \
3392 goto out; \
3393 snprintf(bareq.ifba_ifsname, sizeof (bareq.ifba_ifsname), \
3394 "%s", brt->brt_ifp->if_xname); \
3395 memcpy(bareq.ifba_dst, brt->brt_addr, sizeof (brt->brt_addr)); \
3396 bareq.ifba_vlan = brt->brt_vlan; \
3397 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) { \
3398 now = (unsigned long) net_uptime(); \
3399 if (now < brt->brt_expire) \
3400 bareq.ifba_expire = \
3401 brt->brt_expire - now; \
3402 } else \
3403 bareq.ifba_expire = 0; \
3404 bareq.ifba_flags = brt->brt_flags; \
3405 \
3406 memcpy(buf, &bareq, sizeof (bareq)); \
3407 count++; \
3408 buf += sizeof (bareq); \
3409 len -= sizeof (bareq); \
3410 } \
3411 out: \
3412 bac->ifbac_len = sizeof (bareq) * count; \
3413 if (outbuf != NULL) { \
3414 BRIDGE_UNLOCK(sc); \
3415 if (bac->ifbac_len > 0) { \
3416 error = copyout(outbuf, bac->ifbac_req, bac->ifbac_len);\
3417 } \
3418 kfree_data(outbuf, buflen); \
3419 BRIDGE_LOCK(sc); \
3420 } \
3421 return (error); \
3422 } while (0)
3423
3424 static int
bridge_ioctl_rts64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3425 bridge_ioctl_rts64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3426 {
3427 struct ifbaconf64 * __single bac = arg;
3428 struct ifbareq64 bareq;
3429 int error = 0;
3430
3431 BRIDGE_IOCTL_RTS;
3432 return error;
3433 }
3434
3435 static int
bridge_ioctl_rts32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3436 bridge_ioctl_rts32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3437 {
3438 struct ifbaconf32 * __single bac = arg;
3439 struct ifbareq32 bareq;
3440 int error = 0;
3441
3442 BRIDGE_IOCTL_RTS;
3443 return error;
3444 }
3445
3446 static int
bridge_ioctl_saddr32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3447 bridge_ioctl_saddr32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3448 {
3449 struct ifbareq32 * __single req = arg;
3450 struct bridge_iflist *bif;
3451 int error;
3452
3453 bif = bridge_lookup_member(sc, req->ifba_ifsname);
3454 if (bif == NULL) {
3455 return ENOENT;
3456 }
3457
3458 error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
3459 req->ifba_flags);
3460
3461 return error;
3462 }
3463
3464 static int
bridge_ioctl_saddr64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3465 bridge_ioctl_saddr64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3466 {
3467 struct ifbareq64 * __single req = arg;
3468 struct bridge_iflist *bif;
3469 int error;
3470
3471 bif = bridge_lookup_member(sc, req->ifba_ifsname);
3472 if (bif == NULL) {
3473 return ENOENT;
3474 }
3475
3476 error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
3477 req->ifba_flags);
3478
3479 return error;
3480 }
3481
3482 static int
bridge_ioctl_sto(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3483 bridge_ioctl_sto(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3484 {
3485 struct ifbrparam * __single param = arg;
3486
3487 sc->sc_brttimeout = param->ifbrp_ctime;
3488 return 0;
3489 }
3490
3491 static int
bridge_ioctl_gto(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3492 bridge_ioctl_gto(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3493 {
3494 struct ifbrparam * __single param = arg;
3495
3496 param->ifbrp_ctime = sc->sc_brttimeout;
3497 return 0;
3498 }
3499
3500 static int
bridge_ioctl_daddr32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3501 bridge_ioctl_daddr32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3502 {
3503 struct ifbareq32 * __single req = arg;
3504
3505 return bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan);
3506 }
3507
3508 static int
bridge_ioctl_daddr64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3509 bridge_ioctl_daddr64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3510 {
3511 struct ifbareq64 * __single req = arg;
3512
3513 return bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan);
3514 }
3515
3516 static int
bridge_ioctl_flush(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3517 bridge_ioctl_flush(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3518 {
3519 struct ifbreq * __single req = arg;
3520
3521 bridge_rtflush(sc, req->ifbr_ifsflags);
3522 return 0;
3523 }
3524
3525 static int
bridge_ioctl_gpri(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3526 bridge_ioctl_gpri(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3527 {
3528 struct ifbrparam * __single param = arg;
3529 struct bstp_state *bs = &sc->sc_stp;
3530
3531 param->ifbrp_prio = bs->bs_bridge_priority;
3532 return 0;
3533 }
3534
3535 static int
bridge_ioctl_spri(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3536 bridge_ioctl_spri(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3537 {
3538 #if BRIDGESTP
3539 struct ifbrparam *param = arg;
3540
3541 return bstp_set_priority(&sc->sc_stp, param->ifbrp_prio);
3542 #else /* !BRIDGESTP */
3543 #pragma unused(sc, arg)
3544 return EOPNOTSUPP;
3545 #endif /* !BRIDGESTP */
3546 }
3547
3548 static int
bridge_ioctl_ght(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3549 bridge_ioctl_ght(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3550 {
3551 struct ifbrparam * __single param = arg;
3552 struct bstp_state *bs = &sc->sc_stp;
3553
3554 param->ifbrp_hellotime = bs->bs_bridge_htime >> 8;
3555 return 0;
3556 }
3557
3558 static int
bridge_ioctl_sht(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3559 bridge_ioctl_sht(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3560 {
3561 #if BRIDGESTP
3562 struct ifbrparam *param = arg;
3563
3564 return bstp_set_htime(&sc->sc_stp, param->ifbrp_hellotime);
3565 #else /* !BRIDGESTP */
3566 #pragma unused(sc, arg)
3567 return EOPNOTSUPP;
3568 #endif /* !BRIDGESTP */
3569 }
3570
3571 static int
bridge_ioctl_gfd(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3572 bridge_ioctl_gfd(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3573 {
3574 struct ifbrparam * __single param;
3575 struct bstp_state *bs;
3576
3577 param = arg;
3578 bs = &sc->sc_stp;
3579 param->ifbrp_fwddelay = bs->bs_bridge_fdelay >> 8;
3580 return 0;
3581 }
3582
3583 static int
bridge_ioctl_sfd(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3584 bridge_ioctl_sfd(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3585 {
3586 #if BRIDGESTP
3587 struct ifbrparam *param = arg;
3588
3589 return bstp_set_fdelay(&sc->sc_stp, param->ifbrp_fwddelay);
3590 #else /* !BRIDGESTP */
3591 #pragma unused(sc, arg)
3592 return EOPNOTSUPP;
3593 #endif /* !BRIDGESTP */
3594 }
3595
3596 static int
bridge_ioctl_gma(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3597 bridge_ioctl_gma(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3598 {
3599 struct ifbrparam * __single param;
3600 struct bstp_state *bs;
3601
3602 param = arg;
3603 bs = &sc->sc_stp;
3604 param->ifbrp_maxage = bs->bs_bridge_max_age >> 8;
3605 return 0;
3606 }
3607
3608 static int
bridge_ioctl_sma(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3609 bridge_ioctl_sma(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3610 {
3611 #if BRIDGESTP
3612 struct ifbrparam *param = arg;
3613
3614 return bstp_set_maxage(&sc->sc_stp, param->ifbrp_maxage);
3615 #else /* !BRIDGESTP */
3616 #pragma unused(sc, arg)
3617 return EOPNOTSUPP;
3618 #endif /* !BRIDGESTP */
3619 }
3620
3621 static int
bridge_ioctl_sifprio(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3622 bridge_ioctl_sifprio(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3623 {
3624 #if BRIDGESTP
3625 struct ifbreq *req = arg;
3626 struct bridge_iflist *bif;
3627
3628 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3629 if (bif == NULL) {
3630 return ENOENT;
3631 }
3632
3633 return bstp_set_port_priority(&bif->bif_stp, req->ifbr_priority);
3634 #else /* !BRIDGESTP */
3635 #pragma unused(sc, arg)
3636 return EOPNOTSUPP;
3637 #endif /* !BRIDGESTP */
3638 }
3639
3640 static int
bridge_ioctl_sifcost(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3641 bridge_ioctl_sifcost(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3642 {
3643 #if BRIDGESTP
3644 struct ifbreq *req = arg;
3645 struct bridge_iflist *bif;
3646
3647 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3648 if (bif == NULL) {
3649 return ENOENT;
3650 }
3651
3652 return bstp_set_path_cost(&bif->bif_stp, req->ifbr_path_cost);
3653 #else /* !BRIDGESTP */
3654 #pragma unused(sc, arg)
3655 return EOPNOTSUPP;
3656 #endif /* !BRIDGESTP */
3657 }
3658
3659 static int
bridge_ioctl_gfilt(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3660 bridge_ioctl_gfilt(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3661 {
3662 struct ifbrparam * __single param = arg;
3663
3664 param->ifbrp_filter = sc->sc_filter_flags;
3665
3666 return 0;
3667 }
3668
3669 static int
bridge_ioctl_sfilt(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3670 bridge_ioctl_sfilt(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3671 {
3672 struct ifbrparam * __single param = arg;
3673
3674 if (param->ifbrp_filter & ~IFBF_FILT_MASK) {
3675 return EINVAL;
3676 }
3677
3678 if (param->ifbrp_filter & IFBF_FILT_USEIPF) {
3679 return EINVAL;
3680 }
3681
3682 sc->sc_filter_flags = param->ifbrp_filter;
3683
3684 return 0;
3685 }
3686
3687 static int
bridge_ioctl_sifmaxaddr(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3688 bridge_ioctl_sifmaxaddr(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3689 {
3690 struct ifbreq * __single req = arg;
3691 struct bridge_iflist *bif;
3692
3693 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3694 if (bif == NULL) {
3695 return ENOENT;
3696 }
3697
3698 bif->bif_addrmax = req->ifbr_addrmax;
3699 return 0;
3700 }
3701
3702 static int
bridge_ioctl_addspan(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3703 bridge_ioctl_addspan(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3704 {
3705 struct ifbreq * __single req = arg;
3706 struct bridge_iflist *bif = NULL;
3707 struct ifnet *ifs;
3708
3709 ifs = ifunit(sanitize_ifname(req->ifbr_ifsname));
3710 if (ifs == NULL) {
3711 return ENOENT;
3712 }
3713
3714 if (IFNET_IS_INTCOPROC(ifs) || IFNET_IS_MANAGEMENT(ifs)) {
3715 return EINVAL;
3716 }
3717
3718 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
3719 if (ifs == bif->bif_ifp) {
3720 return EBUSY;
3721 }
3722
3723 if (ifs->if_bridge != NULL) {
3724 return EBUSY;
3725 }
3726
3727 switch (ifs->if_type) {
3728 case IFT_ETHER:
3729 case IFT_L2VLAN:
3730 case IFT_IEEE8023ADLAG:
3731 break;
3732 default:
3733 return EINVAL;
3734 }
3735
3736 bif = kalloc_type(struct bridge_iflist, Z_WAITOK | Z_ZERO | Z_NOFAIL);
3737
3738 bif->bif_ifp = ifs;
3739 bif->bif_ifflags = IFBIF_SPAN;
3740
3741 ifnet_reference(bif->bif_ifp);
3742
3743 TAILQ_INSERT_HEAD(&sc->sc_spanlist, bif, bif_next);
3744
3745 return 0;
3746 }
3747
3748 static int
bridge_ioctl_delspan(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3749 bridge_ioctl_delspan(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3750 {
3751 struct ifbreq * __single req = arg;
3752 struct bridge_iflist *bif;
3753 struct ifnet *ifs;
3754
3755 ifs = ifunit(sanitize_ifname(req->ifbr_ifsname));
3756 if (ifs == NULL) {
3757 return ENOENT;
3758 }
3759
3760 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
3761 if (ifs == bif->bif_ifp) {
3762 break;
3763 }
3764
3765 if (bif == NULL) {
3766 return ENOENT;
3767 }
3768
3769 bridge_delete_span(sc, bif);
3770
3771 return 0;
3772 }
3773
3774 #define BRIDGE_IOCTL_GBPARAM do { \
3775 struct bstp_state *bs = &sc->sc_stp; \
3776 struct bstp_port *root_port; \
3777 \
3778 req->ifbop_maxage = bs->bs_bridge_max_age >> 8; \
3779 req->ifbop_hellotime = bs->bs_bridge_htime >> 8; \
3780 req->ifbop_fwddelay = bs->bs_bridge_fdelay >> 8; \
3781 \
3782 root_port = bs->bs_root_port; \
3783 if (root_port == NULL) \
3784 req->ifbop_root_port = 0; \
3785 else \
3786 req->ifbop_root_port = root_port->bp_ifp->if_index; \
3787 \
3788 req->ifbop_holdcount = bs->bs_txholdcount; \
3789 req->ifbop_priority = bs->bs_bridge_priority; \
3790 req->ifbop_protocol = bs->bs_protover; \
3791 req->ifbop_root_path_cost = bs->bs_root_pv.pv_cost; \
3792 req->ifbop_bridgeid = bs->bs_bridge_pv.pv_dbridge_id; \
3793 req->ifbop_designated_root = bs->bs_root_pv.pv_root_id; \
3794 req->ifbop_designated_bridge = bs->bs_root_pv.pv_dbridge_id; \
3795 req->ifbop_last_tc_time.tv_sec = bs->bs_last_tc_time.tv_sec; \
3796 req->ifbop_last_tc_time.tv_usec = bs->bs_last_tc_time.tv_usec; \
3797 } while (0)
3798
3799 static int
bridge_ioctl_gbparam32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3800 bridge_ioctl_gbparam32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3801 {
3802 struct ifbropreq32 * __single req = arg;
3803
3804 BRIDGE_IOCTL_GBPARAM;
3805 return 0;
3806 }
3807
3808 static int
bridge_ioctl_gbparam64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3809 bridge_ioctl_gbparam64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3810 {
3811 struct ifbropreq64 * __single req = arg;
3812
3813 BRIDGE_IOCTL_GBPARAM;
3814 return 0;
3815 }
3816
3817 static int
bridge_ioctl_grte(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3818 bridge_ioctl_grte(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3819 {
3820 struct ifbrparam * __single param = arg;
3821
3822 param->ifbrp_cexceeded = sc->sc_brtexceeded;
3823 return 0;
3824 }
3825
3826 #define BRIDGE_IOCTL_GIFSSTP do { \
3827 struct bridge_iflist *bif; \
3828 struct bstp_port *bp; \
3829 struct ifbpstpreq bpreq; \
3830 char *buf, *outbuf; \
3831 unsigned int count, buflen, len; \
3832 \
3833 count = 0; \
3834 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { \
3835 if ((bif->bif_ifflags & IFBIF_STP) != 0) \
3836 count++; \
3837 } \
3838 \
3839 buflen = sizeof (bpreq) * count; \
3840 if (bifstp->ifbpstp_len == 0) { \
3841 bifstp->ifbpstp_len = buflen; \
3842 return (0); \
3843 } \
3844 \
3845 BRIDGE_UNLOCK(sc); \
3846 outbuf = kalloc_data(buflen, Z_WAITOK | Z_ZERO); \
3847 BRIDGE_LOCK(sc); \
3848 \
3849 count = 0; \
3850 buf = outbuf; \
3851 len = min(bifstp->ifbpstp_len, buflen); \
3852 bzero(&bpreq, sizeof (bpreq)); \
3853 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { \
3854 if (len < sizeof (bpreq)) \
3855 break; \
3856 \
3857 if ((bif->bif_ifflags & IFBIF_STP) == 0) \
3858 continue; \
3859 \
3860 bp = &bif->bif_stp; \
3861 bpreq.ifbp_portno = bif->bif_ifp->if_index & 0xfff; \
3862 bpreq.ifbp_fwd_trans = bp->bp_forward_transitions; \
3863 bpreq.ifbp_design_cost = bp->bp_desg_pv.pv_cost; \
3864 bpreq.ifbp_design_port = bp->bp_desg_pv.pv_port_id; \
3865 bpreq.ifbp_design_bridge = bp->bp_desg_pv.pv_dbridge_id; \
3866 bpreq.ifbp_design_root = bp->bp_desg_pv.pv_root_id; \
3867 \
3868 memcpy(buf, &bpreq, sizeof (bpreq)); \
3869 count++; \
3870 buf += sizeof (bpreq); \
3871 len -= sizeof (bpreq); \
3872 } \
3873 \
3874 BRIDGE_UNLOCK(sc); \
3875 bifstp->ifbpstp_len = sizeof (bpreq) * count; \
3876 if (bifstp->ifbpstp_len > 0) { \
3877 error = copyout(outbuf, bifstp->ifbpstp_req, bifstp->ifbpstp_len);\
3878 } \
3879 BRIDGE_LOCK(sc); \
3880 kfree_data(outbuf, buflen); \
3881 return (error); \
3882 } while (0)
3883
3884 static int
bridge_ioctl_gifsstp32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3885 bridge_ioctl_gifsstp32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3886 {
3887 struct ifbpstpconf32 * __single bifstp = arg;
3888 int error = 0;
3889
3890 BRIDGE_IOCTL_GIFSSTP;
3891 return error;
3892 }
3893
3894 static int
bridge_ioctl_gifsstp64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3895 bridge_ioctl_gifsstp64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3896 {
3897 struct ifbpstpconf64 * __single bifstp = arg;
3898 int error = 0;
3899
3900 BRIDGE_IOCTL_GIFSSTP;
3901 return error;
3902 }
3903
3904 static int
bridge_ioctl_sproto(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3905 bridge_ioctl_sproto(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3906 {
3907 #if BRIDGESTP
3908 struct ifbrparam *param = arg;
3909
3910 return bstp_set_protocol(&sc->sc_stp, param->ifbrp_proto);
3911 #else /* !BRIDGESTP */
3912 #pragma unused(sc, arg)
3913 return EOPNOTSUPP;
3914 #endif /* !BRIDGESTP */
3915 }
3916
3917 static int
bridge_ioctl_stxhc(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3918 bridge_ioctl_stxhc(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3919 {
3920 #if BRIDGESTP
3921 struct ifbrparam *param = arg;
3922
3923 return bstp_set_holdcount(&sc->sc_stp, param->ifbrp_txhc);
3924 #else /* !BRIDGESTP */
3925 #pragma unused(sc, arg)
3926 return EOPNOTSUPP;
3927 #endif /* !BRIDGESTP */
3928 }
3929
3930
3931 static int
bridge_ioctl_ghostfilter(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3932 bridge_ioctl_ghostfilter(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3933 {
3934 struct ifbrhostfilter * __single req = arg;
3935 struct bridge_iflist *bif;
3936
3937 bif = bridge_lookup_member(sc, req->ifbrhf_ifsname);
3938 if (bif == NULL) {
3939 return ENOENT;
3940 }
3941
3942 bzero(req, sizeof(struct ifbrhostfilter));
3943 if (bif->bif_flags & BIFF_HOST_FILTER) {
3944 req->ifbrhf_flags |= IFBRHF_ENABLED;
3945 bcopy(bif->bif_hf_hwsrc, req->ifbrhf_hwsrca,
3946 ETHER_ADDR_LEN);
3947 req->ifbrhf_ipsrc = bif->bif_hf_ipsrc.s_addr;
3948 }
3949 return 0;
3950 }
3951
3952 static int
bridge_ioctl_shostfilter(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3953 bridge_ioctl_shostfilter(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3954 {
3955 struct ifbrhostfilter * __single req = arg;
3956 struct bridge_iflist *bif;
3957
3958 bif = bridge_lookup_member(sc, req->ifbrhf_ifsname);
3959 if (bif == NULL) {
3960 return ENOENT;
3961 }
3962 if (bif_has_mac_nat(bif)) {
3963 /* no host filter with MAC-NAT */
3964 return EINVAL;
3965 }
3966 if (req->ifbrhf_flags & IFBRHF_ENABLED) {
3967 bif->bif_flags |= BIFF_HOST_FILTER;
3968
3969 if (req->ifbrhf_flags & IFBRHF_HWSRC) {
3970 bcopy(req->ifbrhf_hwsrca, bif->bif_hf_hwsrc,
3971 ETHER_ADDR_LEN);
3972 if (bcmp(req->ifbrhf_hwsrca, ethernulladdr,
3973 ETHER_ADDR_LEN) != 0) {
3974 bif->bif_flags |= BIFF_HF_HWSRC;
3975 } else {
3976 bif->bif_flags &= ~BIFF_HF_HWSRC;
3977 }
3978 }
3979 if (req->ifbrhf_flags & IFBRHF_IPSRC) {
3980 bif->bif_hf_ipsrc.s_addr = req->ifbrhf_ipsrc;
3981 if (bif->bif_hf_ipsrc.s_addr != INADDR_ANY) {
3982 bif->bif_flags |= BIFF_HF_IPSRC;
3983 } else {
3984 bif->bif_flags &= ~BIFF_HF_IPSRC;
3985 }
3986 }
3987 } else {
3988 bif->bif_flags &= ~(BIFF_HOST_FILTER | BIFF_HF_HWSRC |
3989 BIFF_HF_IPSRC);
3990 bzero(bif->bif_hf_hwsrc, ETHER_ADDR_LEN);
3991 bif->bif_hf_ipsrc.s_addr = INADDR_ANY;
3992 }
3993
3994 return 0;
3995 }
3996
3997 static char *__indexable
bridge_mac_nat_entry_out(struct mac_nat_entry_list * list,unsigned int * count_p,char * __indexable buf,unsigned int * len_p)3998 bridge_mac_nat_entry_out(struct mac_nat_entry_list * list,
3999 unsigned int * count_p, char *__indexable buf,
4000 unsigned int * len_p)
4001 {
4002 unsigned int count = *count_p;
4003 struct ifbrmne ifbmne;
4004 unsigned int len = *len_p;
4005 struct mac_nat_entry *mne;
4006 unsigned long now;
4007
4008 bzero(&ifbmne, sizeof(ifbmne));
4009 LIST_FOREACH(mne, list, mne_list) {
4010 if (len < sizeof(ifbmne)) {
4011 break;
4012 }
4013 snprintf(ifbmne.ifbmne_ifname, sizeof(ifbmne.ifbmne_ifname),
4014 "%s", mne->mne_bif->bif_ifp->if_xname);
4015 memcpy(ifbmne.ifbmne_mac, mne->mne_mac,
4016 sizeof(ifbmne.ifbmne_mac));
4017 now = (unsigned long) net_uptime();
4018 if (now < mne->mne_expire) {
4019 ifbmne.ifbmne_expire = mne->mne_expire - now;
4020 } else {
4021 ifbmne.ifbmne_expire = 0;
4022 }
4023 if ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) {
4024 ifbmne.ifbmne_af = AF_INET6;
4025 ifbmne.ifbmne_ip6_addr = mne->mne_ip6;
4026 } else {
4027 ifbmne.ifbmne_af = AF_INET;
4028 ifbmne.ifbmne_ip_addr = mne->mne_ip;
4029 }
4030 memcpy(buf, &ifbmne, sizeof(ifbmne));
4031 count++;
4032 buf += sizeof(ifbmne);
4033 len -= sizeof(ifbmne);
4034 }
4035 *count_p = count;
4036 *len_p = len;
4037 return buf;
4038 }
4039
4040 /*
4041 * bridge_ioctl_gmnelist()
4042 * Perform the get mac_nat_entry list ioctl.
4043 *
4044 * Note:
4045 * The struct ifbrmnelist32 and struct ifbrmnelist64 have the same
4046 * field size/layout except for the last field ifbml_buf, the user-supplied
4047 * buffer pointer. That is passed in separately via the 'user_addr'
4048 * parameter from the respective 32-bit or 64-bit ioctl routine.
4049 */
4050 static int
bridge_ioctl_gmnelist(struct bridge_softc * sc,struct ifbrmnelist32 * mnl,user_addr_t user_addr)4051 bridge_ioctl_gmnelist(struct bridge_softc *sc, struct ifbrmnelist32 *mnl,
4052 user_addr_t user_addr)
4053 {
4054 unsigned int count;
4055 char *buf;
4056 int error = 0;
4057 char *outbuf = NULL;
4058 struct mac_nat_entry *mne;
4059 unsigned int buflen;
4060 unsigned int len;
4061
4062 mnl->ifbml_elsize = sizeof(struct ifbrmne);
4063 count = 0;
4064 LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
4065 count++;
4066 }
4067 LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
4068 count++;
4069 }
4070 buflen = sizeof(struct ifbrmne) * count;
4071 if (buflen == 0 || mnl->ifbml_len == 0) {
4072 mnl->ifbml_len = buflen;
4073 return error;
4074 }
4075 BRIDGE_UNLOCK(sc);
4076 outbuf = kalloc_data(buflen, Z_WAITOK | Z_ZERO);
4077 BRIDGE_LOCK(sc);
4078 count = 0;
4079 buf = outbuf;
4080 len = min(mnl->ifbml_len, buflen);
4081 buf = bridge_mac_nat_entry_out(&sc->sc_mne_list, &count, buf, &len);
4082 buf = bridge_mac_nat_entry_out(&sc->sc_mne_list_v6, &count, buf, &len);
4083 mnl->ifbml_len = count * sizeof(struct ifbrmne);
4084 BRIDGE_UNLOCK(sc);
4085 if (mnl->ifbml_len > 0) {
4086 error = copyout(outbuf, user_addr, mnl->ifbml_len);
4087 }
4088 kfree_data(outbuf, buflen);
4089 BRIDGE_LOCK(sc);
4090 return error;
4091 }
4092
4093 static int
bridge_ioctl_gmnelist64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)4094 bridge_ioctl_gmnelist64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
4095 {
4096 struct ifbrmnelist64 * __single mnl = arg;
4097
4098 return bridge_ioctl_gmnelist(sc, arg, mnl->ifbml_buf);
4099 }
4100
4101 static int
bridge_ioctl_gmnelist32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)4102 bridge_ioctl_gmnelist32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
4103 {
4104 struct ifbrmnelist32 * __single mnl = arg;
4105
4106 return bridge_ioctl_gmnelist(sc, arg,
4107 CAST_USER_ADDR_T(mnl->ifbml_buf));
4108 }
4109
4110 /*
4111 * bridge_ioctl_gifstats()
4112 * Return per-member stats.
4113 *
4114 * Note:
4115 * The ifbrmreq32 and ifbrmreq64 structures have the same
4116 * field size/layout except for the last field brmr_buf, the user-supplied
4117 * buffer pointer. That is passed in separately via the 'user_addr'
4118 * parameter from the respective 32-bit or 64-bit ioctl routine.
4119 */
4120 static int
bridge_ioctl_gifstats(struct bridge_softc * sc,struct ifbrmreq32 * mreq,user_addr_t user_addr)4121 bridge_ioctl_gifstats(struct bridge_softc *sc, struct ifbrmreq32 *mreq,
4122 user_addr_t user_addr)
4123 {
4124 struct bridge_iflist *bif;
4125 int error = 0;
4126 unsigned int buflen;
4127
4128 bif = bridge_lookup_member(sc, mreq->brmr_ifname);
4129 if (bif == NULL) {
4130 error = ENOENT;
4131 goto done;
4132 }
4133
4134 buflen = mreq->brmr_elsize = sizeof(struct ifbrmstats);
4135 if (buflen == 0 || mreq->brmr_len == 0) {
4136 mreq->brmr_len = buflen;
4137 goto done;
4138 }
4139 if (mreq->brmr_len != 0 && mreq->brmr_len < buflen) {
4140 error = ENOBUFS;
4141 goto done;
4142 }
4143 mreq->brmr_len = buflen;
4144 error = copyout(&bif->bif_stats, user_addr, buflen);
4145 done:
4146 return error;
4147 }
4148
4149 static int
bridge_ioctl_gifstats32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)4150 bridge_ioctl_gifstats32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
4151 {
4152 struct ifbrmreq32 * __single mreq = arg;
4153
4154 return bridge_ioctl_gifstats(sc, arg, mreq->brmr_buf);
4155 }
4156
4157 static int
bridge_ioctl_gifstats64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)4158 bridge_ioctl_gifstats64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
4159 {
4160 struct ifbrmreq64 * __single mreq = arg;
4161
4162 return bridge_ioctl_gifstats(sc, arg, mreq->brmr_buf);
4163 }
4164
4165 /*
4166 * bridge_proto_attach_changed
4167 *
4168 * Called when protocol attachment on the interface changes.
4169 */
4170 static void
bridge_proto_attach_changed(struct ifnet * ifp)4171 bridge_proto_attach_changed(struct ifnet *ifp)
4172 {
4173 boolean_t changed = FALSE;
4174 struct bridge_iflist *bif;
4175 boolean_t input_broadcast;
4176 struct bridge_softc * __single sc = ifp->if_bridge;
4177
4178 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
4179 if (sc == NULL) {
4180 return;
4181 }
4182 input_broadcast = interface_needs_input_broadcast(ifp);
4183 BRIDGE_LOCK(sc);
4184 bif = bridge_lookup_member_if(sc, ifp);
4185 if (bif != NULL) {
4186 changed = bif_set_input_broadcast(bif, input_broadcast);
4187 }
4188 BRIDGE_UNLOCK(sc);
4189 if (changed) {
4190 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
4191 "%s input broadcast %s", ifp->if_xname,
4192 input_broadcast ? "ENABLED" : "DISABLED");
4193 }
4194 return;
4195 }
4196
4197 /*
4198 * interface_media_active:
4199 *
4200 * Tells if an interface media is active.
4201 */
4202 static int
interface_media_active(struct ifnet * ifp)4203 interface_media_active(struct ifnet *ifp)
4204 {
4205 struct ifmediareq ifmr;
4206 int status = 0;
4207
4208 bzero(&ifmr, sizeof(ifmr));
4209 if (ifnet_ioctl(ifp, 0, SIOCGIFMEDIA, &ifmr) == 0) {
4210 if ((ifmr.ifm_status & IFM_AVALID) && ifmr.ifm_count > 0) {
4211 status = ifmr.ifm_status & IFM_ACTIVE ? 1 : 0;
4212 }
4213 }
4214
4215 return status;
4216 }
4217
4218 /*
4219 * bridge_updatelinkstatus:
4220 *
4221 * Update the media active status of the bridge based on the
4222 * media active status of its member.
4223 * If changed, return the corresponding onf/off link event.
4224 */
4225 static u_int32_t
bridge_updatelinkstatus(struct bridge_softc * sc)4226 bridge_updatelinkstatus(struct bridge_softc *sc)
4227 {
4228 struct bridge_iflist *bif;
4229 int active_member = 0;
4230 u_int32_t event_code = 0;
4231
4232 BRIDGE_LOCK_ASSERT_HELD(sc);
4233
4234 /*
4235 * Find out if we have an active interface
4236 */
4237 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
4238 if (bif->bif_flags & BIFF_MEDIA_ACTIVE) {
4239 active_member = 1;
4240 break;
4241 }
4242 }
4243
4244 if (active_member && !(sc->sc_flags & SCF_MEDIA_ACTIVE)) {
4245 sc->sc_flags |= SCF_MEDIA_ACTIVE;
4246 event_code = KEV_DL_LINK_ON;
4247 } else if (!active_member && (sc->sc_flags & SCF_MEDIA_ACTIVE)) {
4248 sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
4249 event_code = KEV_DL_LINK_OFF;
4250 }
4251
4252 return event_code;
4253 }
4254
4255 /*
4256 * bridge_iflinkevent:
4257 */
4258 static void
bridge_iflinkevent(struct ifnet * ifp)4259 bridge_iflinkevent(struct ifnet *ifp)
4260 {
4261 struct bridge_softc * __single sc = ifp->if_bridge;
4262 struct bridge_iflist *bif;
4263 u_int32_t event_code = 0;
4264 int media_active;
4265
4266 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
4267
4268 /* Check if the interface is a bridge member */
4269 if (sc == NULL) {
4270 return;
4271 }
4272
4273 media_active = interface_media_active(ifp);
4274 BRIDGE_LOCK(sc);
4275 bif = bridge_lookup_member_if(sc, ifp);
4276 if (bif != NULL) {
4277 if (media_active) {
4278 bif->bif_flags |= BIFF_MEDIA_ACTIVE;
4279 } else {
4280 bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
4281 }
4282 if (sc->sc_mac_nat_bif != NULL) {
4283 bridge_mac_nat_flush_entries(sc, bif);
4284 }
4285
4286 event_code = bridge_updatelinkstatus(sc);
4287 }
4288 BRIDGE_UNLOCK(sc);
4289
4290 if (event_code != 0) {
4291 bridge_link_event(sc->sc_ifp, event_code);
4292 }
4293 }
4294
4295 /*
4296 * bridge_delayed_callback:
4297 *
4298 * Makes a delayed call
4299 */
4300 static void
bridge_delayed_callback(void * param,__unused void * param2)4301 bridge_delayed_callback(void *param, __unused void *param2)
4302 {
4303 struct bridge_delayed_call *call = (struct bridge_delayed_call *)param;
4304 struct bridge_softc *sc = call->bdc_sc;
4305
4306 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4307 if (bridge_delayed_callback_delay > 0) {
4308 struct timespec ts;
4309
4310 ts.tv_sec = bridge_delayed_callback_delay;
4311 ts.tv_nsec = 0;
4312
4313 BRIDGE_LOG(LOG_NOTICE, 0,
4314 "sleeping for %d seconds",
4315 bridge_delayed_callback_delay);
4316
4317 msleep(&bridge_delayed_callback_delay, NULL, PZERO,
4318 __func__, &ts);
4319
4320 BRIDGE_LOG(LOG_NOTICE, 0, "awoken");
4321 }
4322 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4323
4324 BRIDGE_LOCK(sc);
4325
4326 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4327 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4328 "%s call 0x%llx flags 0x%x",
4329 sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4330 call->bdc_flags);
4331 }
4332 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4333
4334 if (call->bdc_flags & BDCF_CANCELLING) {
4335 wakeup(call);
4336 } else {
4337 if ((sc->sc_flags & SCF_DETACHING) == 0) {
4338 (*call->bdc_func)(sc);
4339 }
4340 }
4341 call->bdc_flags &= ~BDCF_OUTSTANDING;
4342 BRIDGE_UNLOCK(sc);
4343 }
4344
4345 /*
4346 * bridge_schedule_delayed_call:
4347 *
4348 * Schedule a function to be called on a separate thread
4349 * The actual call may be scheduled to run at a given time or ASAP.
4350 */
4351 static void
4352 bridge_schedule_delayed_call(struct bridge_delayed_call *call)
4353 {
4354 uint64_t deadline = 0;
4355 struct bridge_softc *sc = call->bdc_sc;
4356
4357 BRIDGE_LOCK_ASSERT_HELD(sc);
4358
4359 if ((sc->sc_flags & SCF_DETACHING) ||
4360 (call->bdc_flags & (BDCF_OUTSTANDING | BDCF_CANCELLING))) {
4361 return;
4362 }
4363
4364 if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) {
4365 nanoseconds_to_absolutetime(
4366 (uint64_t)call->bdc_ts.tv_sec * NSEC_PER_SEC +
4367 call->bdc_ts.tv_nsec, &deadline);
4368 clock_absolutetime_interval_to_deadline(deadline, &deadline);
4369 }
4370
4371 call->bdc_flags = BDCF_OUTSTANDING;
4372
4373 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4374 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4375 "%s call 0x%llx flags 0x%x",
4376 sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4377 call->bdc_flags);
4378 }
4379 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4380
4381 if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) {
4382 thread_call_func_delayed(
4383 (thread_call_func_t)bridge_delayed_callback,
4384 call, deadline);
4385 } else {
4386 if (call->bdc_thread_call == NULL) {
4387 call->bdc_thread_call = thread_call_allocate(
4388 (thread_call_func_t)bridge_delayed_callback,
4389 call);
4390 }
4391 thread_call_enter(call->bdc_thread_call);
4392 }
4393 }
4394
4395 /*
4396 * bridge_cancel_delayed_call:
4397 *
4398 * Cancel a queued or running delayed call.
4399 * If call is running, does not return until the call is done to
4400 * prevent race condition with the brigde interface getting destroyed
4401 */
4402 static void
4403 bridge_cancel_delayed_call(struct bridge_delayed_call *call)
4404 {
4405 boolean_t result;
4406 struct bridge_softc *sc = call->bdc_sc;
4407
4408 /*
4409 * The call was never scheduled
4410 */
4411 if (sc == NULL) {
4412 return;
4413 }
4414
4415 BRIDGE_LOCK_ASSERT_HELD(sc);
4416
4417 call->bdc_flags |= BDCF_CANCELLING;
4418
4419 while (call->bdc_flags & BDCF_OUTSTANDING) {
4420 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4421 "%s call 0x%llx flags 0x%x",
4422 sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4423 call->bdc_flags);
4424 result = thread_call_func_cancel(
4425 (thread_call_func_t)bridge_delayed_callback, call, FALSE);
4426
4427 if (result) {
4428 /*
4429 * We managed to dequeue the delayed call
4430 */
4431 call->bdc_flags &= ~BDCF_OUTSTANDING;
4432 } else {
4433 /*
4434 * Wait for delayed call do be done running
4435 */
4436 msleep(call, &sc->sc_mtx, PZERO, __func__, NULL);
4437 }
4438 }
4439 call->bdc_flags &= ~BDCF_CANCELLING;
4440 }
4441
4442 /*
4443 * bridge_cleanup_delayed_call:
4444 *
4445 * Dispose resource allocated for a delayed call
4446 * Assume the delayed call is not queued or running .
4447 */
4448 static void
4449 bridge_cleanup_delayed_call(struct bridge_delayed_call *call)
4450 {
4451 boolean_t result;
4452 struct bridge_softc *sc = call->bdc_sc;
4453
4454 /*
4455 * The call was never scheduled
4456 */
4457 if (sc == NULL) {
4458 return;
4459 }
4460
4461 BRIDGE_LOCK_ASSERT_HELD(sc);
4462
4463 VERIFY((call->bdc_flags & BDCF_OUTSTANDING) == 0);
4464 VERIFY((call->bdc_flags & BDCF_CANCELLING) == 0);
4465
4466 if (call->bdc_thread_call != NULL) {
4467 result = thread_call_free(call->bdc_thread_call);
4468 if (result == FALSE) {
4469 panic("%s thread_call_free() failed for call %p",
4470 __func__, call);
4471 }
4472 call->bdc_thread_call = NULL;
4473 }
4474 }
4475
4476 /*
4477 * bridge_init:
4478 *
4479 * Initialize a bridge interface.
4480 */
4481 static int
4482 bridge_init(struct ifnet *ifp)
4483 {
4484 struct bridge_softc *sc = (struct bridge_softc *)ifp->if_softc;
4485 errno_t error;
4486
4487 BRIDGE_LOCK_ASSERT_HELD(sc);
4488
4489 if ((ifnet_flags(ifp) & IFF_RUNNING)) {
4490 return 0;
4491 }
4492
4493 error = ifnet_set_flags(ifp, IFF_RUNNING, IFF_RUNNING);
4494
4495 /*
4496 * Calling bridge_aging_timer() is OK as there are no entries to
4497 * age so we're just going to arm the timer
4498 */
4499 bridge_aging_timer(sc);
4500 #if BRIDGESTP
4501 if (error == 0) {
4502 bstp_init(&sc->sc_stp); /* Initialize Spanning Tree */
4503 }
4504 #endif /* BRIDGESTP */
4505 return error;
4506 }
4507
4508 /*
4509 * bridge_ifstop:
4510 *
4511 * Stop the bridge interface.
4512 */
4513 static void
4514 bridge_ifstop(struct ifnet *ifp, int disable)
4515 {
4516 #pragma unused(disable)
4517 struct bridge_softc * __single sc = ifp->if_softc;
4518
4519 BRIDGE_LOCK_ASSERT_HELD(sc);
4520
4521 if ((ifnet_flags(ifp) & IFF_RUNNING) == 0) {
4522 return;
4523 }
4524
4525 bridge_cancel_delayed_call(&sc->sc_aging_timer);
4526
4527 #if BRIDGESTP
4528 bstp_stop(&sc->sc_stp);
4529 #endif /* BRIDGESTP */
4530
4531 bridge_rtflush(sc, IFBF_FLUSHDYN);
4532 (void) ifnet_set_flags(ifp, 0, IFF_RUNNING);
4533 }
4534
4535 static const uint32_t checksum_request_flags = (MBUF_CSUM_REQ_TCP |
4536 MBUF_CSUM_REQ_UDP | MBUF_CSUM_REQ_TCPIPV6 | MBUF_CSUM_REQ_UDPIPV6);
4537
4538 static const mbuf_csum_performed_flags_t checksum_performed_all_good =
4539 (MBUF_CSUM_DID_IP | MBUF_CSUM_IP_GOOD
4540 | MBUF_CSUM_DID_DATA | MBUF_CSUM_PSEUDO_HDR);
4541
4542 /*
4543 * bridge_compute_cksum:
4544 *
4545 * If the packet has checksum flags, compare the hardware checksum
4546 * capabilities of the source and destination interfaces. If they
4547 * are the same, there's nothing to do. If they are different,
4548 * finalize the checksum so that it can be sent on the destination
4549 * interface.
4550 */
4551 static void
4552 bridge_compute_cksum(struct ifnet *src_if, struct ifnet *dst_if, struct mbuf *m)
4553 {
4554 uint32_t csum_flags;
4555 uint16_t dst_hw_csum;
4556 uint32_t did_sw = 0;
4557 struct ether_header *eh;
4558 uint16_t src_hw_csum;
4559
4560 if (src_if == dst_if) {
4561 return;
4562 }
4563 csum_flags = m->m_pkthdr.csum_flags & IF_HWASSIST_CSUM_MASK;
4564 if (csum_flags == 0) {
4565 /* no checksum offload */
4566 return;
4567 }
4568
4569 /*
4570 * if destination/source differ in checksum offload
4571 * capabilities, finalize/compute the checksum
4572 */
4573 dst_hw_csum = IF_HWASSIST_CSUM_FLAGS(dst_if->if_hwassist);
4574 src_hw_csum = IF_HWASSIST_CSUM_FLAGS(src_if->if_hwassist);
4575 if (dst_hw_csum == src_hw_csum) {
4576 return;
4577 }
4578 eh = mtod(m, struct ether_header *);
4579 switch (eh->ether_type) {
4580 case HTONS_ETHERTYPE_IP:
4581 did_sw = in_finalize_cksum(m, sizeof(*eh), csum_flags);
4582 break;
4583 case HTONS_ETHERTYPE_IPV6:
4584 did_sw = in6_finalize_cksum(m, sizeof(*eh), -1, -1, csum_flags);
4585 break;
4586 }
4587 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4588 "[%s -> %s] before 0x%x did 0x%x after 0x%x",
4589 src_if->if_xname, dst_if->if_xname, csum_flags, did_sw,
4590 m->m_pkthdr.csum_flags);
4591 }
4592
4593 static inline errno_t
4594 bridge_transmit(ifnet_t ifp, mbuf_t m)
4595 {
4596 struct flowadv adv = { .code = FADV_SUCCESS };
4597 errno_t error;
4598 int flags = DLIL_OUTPUT_FLAGS_RAW;
4599
4600 flags = (if_bridge_output_skip_filters != 0)
4601 ? (DLIL_OUTPUT_FLAGS_RAW | DLIL_OUTPUT_FLAGS_SKIP_IF_FILTERS)
4602 : DLIL_OUTPUT_FLAGS_RAW;
4603 error = dlil_output(ifp, 0, m, NULL, NULL, flags, &adv);
4604 if (error == 0) {
4605 if (adv.code == FADV_FLOW_CONTROLLED) {
4606 error = EQFULL;
4607 } else if (adv.code == FADV_SUSPENDED) {
4608 error = EQSUSPENDED;
4609 }
4610 }
4611 return error;
4612 }
4613
4614 static int
4615 get_last_ip6_hdr(struct mbuf *m, int off, int proto, int * nxtp,
4616 bool *is_fragmented)
4617 {
4618 int newoff;
4619
4620 *is_fragmented = false;
4621 while (1) {
4622 newoff = ip6_nexthdr(m, off, proto, nxtp);
4623 if (newoff < 0) {
4624 return off;
4625 } else if (newoff < off) {
4626 return -1; /* invalid */
4627 } else if (newoff == off) {
4628 return newoff;
4629 }
4630 off = newoff;
4631 proto = *nxtp;
4632 if (proto == IPPROTO_FRAGMENT) {
4633 *is_fragmented = true;
4634 }
4635 }
4636 }
4637
4638 #define __ATOMIC_INC(s) os_atomic_inc(&s, relaxed)
4639
4640 static int
4641 bridge_get_ip_proto(struct mbuf * * mp, u_int mac_hlen, bool is_ipv4,
4642 ip_packet_info_t info_p, struct bripstats * stats_p)
4643 {
4644 int error = 0;
4645 u_int hlen;
4646 u_int ip_hlen;
4647 u_int ip_pay_len;
4648 struct mbuf * m0 = *mp;
4649 int off;
4650 int opt_len = 0;
4651 int proto = 0;
4652
4653 bzero(info_p, sizeof(*info_p));
4654 if (is_ipv4) {
4655 struct ip * ip;
4656 u_int ip_total_len;
4657
4658 /* IPv4 */
4659 hlen = mac_hlen + sizeof(struct ip);
4660 if (m0->m_pkthdr.len < hlen) {
4661 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4662 "Short IP packet %d < %d",
4663 m0->m_pkthdr.len, hlen);
4664 error = _EBADIP;
4665 __ATOMIC_INC(stats_p->bips_bad_ip);
4666 goto done;
4667 }
4668 if (m0->m_len < hlen) {
4669 *mp = m0 = m_pullup(m0, hlen);
4670 if (m0 == NULL) {
4671 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4672 "m_pullup failed hlen %d",
4673 hlen);
4674 error = ENOBUFS;
4675 __ATOMIC_INC(stats_p->bips_bad_ip);
4676 goto done;
4677 }
4678 }
4679 ip = (struct ip *)mtodo(m0, mac_hlen);
4680 if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
4681 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4682 "bad IP version");
4683 error = _EBADIP;
4684 __ATOMIC_INC(stats_p->bips_bad_ip);
4685 goto done;
4686 }
4687 ip_hlen = IP_VHL_HL(ip->ip_vhl) << 2;
4688 if (ip_hlen < sizeof(struct ip)) {
4689 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4690 "bad IP header length %d < %d",
4691 ip_hlen,
4692 (int)sizeof(struct ip));
4693 error = _EBADIP;
4694 __ATOMIC_INC(stats_p->bips_bad_ip);
4695 goto done;
4696 }
4697 hlen = mac_hlen + ip_hlen;
4698 if (m0->m_len < hlen) {
4699 *mp = m0 = m_pullup(m0, hlen);
4700 if (m0 == NULL) {
4701 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4702 "m_pullup failed hlen %d",
4703 hlen);
4704 error = ENOBUFS;
4705 __ATOMIC_INC(stats_p->bips_bad_ip);
4706 goto done;
4707 }
4708 ip = (struct ip *)mtodo(m0, mac_hlen);
4709 }
4710
4711 ip_total_len = ntohs(ip->ip_len);
4712 if (ip_total_len < ip_hlen) {
4713 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4714 "IP total len %d < header len %d",
4715 ip_total_len, ip_hlen);
4716 error = _EBADIP;
4717 __ATOMIC_INC(stats_p->bips_bad_ip);
4718 goto done;
4719 }
4720 if (ip_total_len > (m0->m_pkthdr.len - mac_hlen)) {
4721 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4722 "invalid IP payload length %d > %d",
4723 ip_total_len,
4724 (m0->m_pkthdr.len - mac_hlen));
4725 error = _EBADIP;
4726 __ATOMIC_INC(stats_p->bips_bad_ip);
4727 goto done;
4728 }
4729 ip_pay_len = ip_total_len - ip_hlen;
4730 info_p->ip_proto = ip->ip_p;
4731 info_p->ip_hdr = mtodo(m0, mac_hlen);
4732 info_p->ip_m0_len = m0->m_len - mac_hlen;
4733 info_p->ip_hlen = ip_hlen;
4734 #define FRAG_BITS (IP_OFFMASK | IP_MF)
4735 if ((ntohs(ip->ip_off) & FRAG_BITS) != 0) {
4736 info_p->ip_is_fragmented = true;
4737 }
4738 __ATOMIC_INC(stats_p->bips_ip);
4739 } else {
4740 struct ip6_hdr *ip6;
4741
4742 /* IPv6 */
4743 hlen = mac_hlen + sizeof(struct ip6_hdr);
4744 if (m0->m_pkthdr.len < hlen) {
4745 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4746 "short IPv6 packet %d < %d",
4747 m0->m_pkthdr.len, hlen);
4748 error = _EBADIPV6;
4749 __ATOMIC_INC(stats_p->bips_bad_ip6);
4750 goto done;
4751 }
4752 if (m0->m_len < hlen) {
4753 *mp = m0 = m_pullup(m0, hlen);
4754 if (m0 == NULL) {
4755 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4756 "m_pullup failed hlen %d",
4757 hlen);
4758 error = ENOBUFS;
4759 __ATOMIC_INC(stats_p->bips_bad_ip6);
4760 goto done;
4761 }
4762 }
4763 ip6 = (struct ip6_hdr *)(mtodo(m0, mac_hlen));
4764 if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
4765 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4766 "bad IPv6 version");
4767 error = _EBADIPV6;
4768 __ATOMIC_INC(stats_p->bips_bad_ip6);
4769 goto done;
4770 }
4771 off = get_last_ip6_hdr(m0, mac_hlen, IPPROTO_IPV6, &proto,
4772 &info_p->ip_is_fragmented);
4773 if (off < 0 || m0->m_pkthdr.len < off) {
4774 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4775 "ip6_lasthdr() returned %d",
4776 off);
4777 error = _EBADIPV6;
4778 __ATOMIC_INC(stats_p->bips_bad_ip6);
4779 goto done;
4780 }
4781 ip_hlen = sizeof(*ip6);
4782 opt_len = off - mac_hlen - ip_hlen;
4783 if (opt_len < 0) {
4784 error = _EBADIPV6;
4785 __ATOMIC_INC(stats_p->bips_bad_ip6);
4786 goto done;
4787 }
4788 ip_pay_len = ntohs(ip6->ip6_plen);
4789 if (ip_pay_len > (m0->m_pkthdr.len - mac_hlen - ip_hlen)) {
4790 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4791 "invalid IPv6 payload length %d > %d",
4792 ip_pay_len,
4793 (m0->m_pkthdr.len - mac_hlen - ip_hlen));
4794 error = _EBADIPV6;
4795 __ATOMIC_INC(stats_p->bips_bad_ip6);
4796 goto done;
4797 }
4798 info_p->ip_proto = proto;
4799 info_p->ip_hdr = mtodo(m0, mac_hlen);
4800 info_p->ip_m0_len = m0->m_len - mac_hlen;
4801 info_p->ip_hlen = ip_hlen;
4802 __ATOMIC_INC(stats_p->bips_ip6);
4803 }
4804 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4805 "IPv%c proto %d ip %u pay %u opt %u pkt %u%s",
4806 is_ipv4 ? '4' : '6',
4807 proto, ip_hlen, ip_pay_len, opt_len,
4808 m0->m_pkthdr.len, info_p->ip_is_fragmented ? " frag" : "");
4809 info_p->ip_pay_len = ip_pay_len;
4810 info_p->ip_opt_len = opt_len;
4811 info_p->ip_is_ipv4 = is_ipv4;
4812 done:
4813 return error;
4814 }
4815
4816 static int
4817 bridge_get_tcp_header(struct mbuf * * mp, u_int mac_hlen, bool is_ipv4,
4818 ip_packet_info_t info_p, struct bripstats * stats_p)
4819 {
4820 int error;
4821 u_int hlen;
4822
4823 error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, info_p, stats_p);
4824 if (error != 0) {
4825 goto done;
4826 }
4827 if (info_p->ip_proto != IPPROTO_TCP) {
4828 /* not a TCP frame, not an error, just a bad guess */
4829 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4830 "non-TCP (%d) IPv%c frame %d bytes",
4831 info_p->ip_proto, is_ipv4 ? '4' : '6',
4832 (*mp)->m_pkthdr.len);
4833 goto done;
4834 }
4835 if (info_p->ip_is_fragmented) {
4836 /* both TSO and IP fragmentation don't make sense */
4837 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4838 "fragmented TSO packet?");
4839 __ATOMIC_INC(stats_p->bips_bad_tcp);
4840 error = _EBADTCP;
4841 goto done;
4842 }
4843 hlen = mac_hlen + info_p->ip_hlen + sizeof(struct tcphdr) +
4844 info_p->ip_opt_len;
4845 if ((*mp)->m_len < hlen) {
4846 *mp = m_pullup(*mp, hlen);
4847 if (*mp == NULL) {
4848 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4849 "m_pullup %d failed",
4850 hlen);
4851 __ATOMIC_INC(stats_p->bips_bad_tcp);
4852 error = _EBADTCP;
4853 goto done;
4854 }
4855 }
4856 info_p->ip_proto_hdr = info_p->ip_hdr + info_p->ip_hlen +
4857 info_p->ip_opt_len;
4858 done:
4859 return error;
4860 }
4861
4862 static inline void
4863 proto_csum_stats_increment(uint8_t proto, struct brcsumstats * stats_p)
4864 {
4865 if (proto == IPPROTO_TCP) {
4866 __ATOMIC_INC(stats_p->brcs_tcp_checksum);
4867 } else {
4868 __ATOMIC_INC(stats_p->brcs_udp_checksum);
4869 }
4870 return;
4871 }
4872
4873 #define ETHER_TYPE_FLAG_NONE 0x00
4874 #define ETHER_TYPE_FLAG_IPV4 0x01
4875 #define ETHER_TYPE_FLAG_IPV6 0x02
4876 #define ETHER_TYPE_FLAG_ARP 0x04
4877 #define ETHER_TYPE_FLAG_IP (ETHER_TYPE_FLAG_IPV4 | ETHER_TYPE_FLAG_IPV6)
4878 #define ETHER_TYPE_FLAG_IP_ARP (ETHER_TYPE_FLAG_IP | ETHER_TYPE_FLAG_ARP)
4879
4880 static inline bool
4881 ether_type_flag_is_ip(ether_type_flag_t flag)
4882 {
4883 return (flag & ETHER_TYPE_FLAG_IP) != 0;
4884 }
4885
4886 static inline ether_type_flag_t
4887 ether_type_flag_get(uint16_t ether_type)
4888 {
4889 ether_type_flag_t flag = ETHER_TYPE_FLAG_NONE;
4890
4891 switch (ether_type) {
4892 case HTONS_ETHERTYPE_IP:
4893 flag = ETHER_TYPE_FLAG_IPV4;
4894 break;
4895 case HTONS_ETHERTYPE_IPV6:
4896 flag = ETHER_TYPE_FLAG_IPV6;
4897 break;
4898 case HTONS_ETHERTYPE_ARP:
4899 flag = ETHER_TYPE_FLAG_ARP;
4900 break;
4901 default:
4902 break;
4903 }
4904 return flag;
4905 }
4906
4907 static bool
4908 ether_header_type_is_ip(struct ether_header * eh, bool *is_ipv4)
4909 {
4910 uint16_t ether_type;
4911 bool is_ip = TRUE;
4912
4913 ether_type = ntohs(eh->ether_type);
4914 switch (ether_type) {
4915 case ETHERTYPE_IP:
4916 *is_ipv4 = TRUE;
4917 break;
4918 case ETHERTYPE_IPV6:
4919 *is_ipv4 = FALSE;
4920 break;
4921 default:
4922 is_ip = FALSE;
4923 break;
4924 }
4925 return is_ip;
4926 }
4927
4928 static errno_t
4929 bridge_verify_checksum(struct mbuf * * mp, struct ifbrmstats *stats_p)
4930 {
4931 struct brcsumstats *csum_stats_p;
4932 struct ether_header *eh;
4933 errno_t error = 0;
4934 ip_packet_info info;
4935 bool is_ipv4;
4936 struct mbuf * m;
4937 u_int mac_hlen = sizeof(struct ether_header);
4938 uint16_t sum;
4939 bool valid;
4940
4941 eh = mtod(*mp, struct ether_header *);
4942 if (!ether_header_type_is_ip(eh, &is_ipv4)) {
4943 goto done;
4944 }
4945 error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, &info,
4946 &stats_p->brms_out_ip);
4947 m = *mp;
4948 if (error != 0) {
4949 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4950 "bridge_get_ip_proto failed %d",
4951 error);
4952 goto done;
4953 }
4954 if (is_ipv4) {
4955 if ((m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) != 0) {
4956 /* hardware offloaded IP header checksum */
4957 valid = (m->m_pkthdr.csum_flags & CSUM_IP_VALID) != 0;
4958 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4959 "IP checksum HW %svalid",
4960 valid ? "" : "in");
4961 if (!valid) {
4962 __ATOMIC_INC(stats_p->brms_out_cksum_bad_hw.brcs_ip_checksum);
4963 error = _EBADIPCHECKSUM;
4964 goto done;
4965 }
4966 __ATOMIC_INC(stats_p->brms_out_cksum_good_hw.brcs_ip_checksum);
4967 } else {
4968 /* verify */
4969 sum = inet_cksum(m, 0, mac_hlen, info.ip_hlen);
4970 valid = (sum == 0);
4971 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4972 "IP checksum SW %svalid",
4973 valid ? "" : "in");
4974 if (!valid) {
4975 __ATOMIC_INC(stats_p->brms_out_cksum_bad.brcs_ip_checksum);
4976 error = _EBADIPCHECKSUM;
4977 goto done;
4978 }
4979 __ATOMIC_INC(stats_p->brms_out_cksum_good.brcs_ip_checksum);
4980 }
4981 }
4982 if (info.ip_is_fragmented) {
4983 /* can't verify checksum on fragmented packets */
4984 goto done;
4985 }
4986 switch (info.ip_proto) {
4987 case IPPROTO_TCP:
4988 __ATOMIC_INC(stats_p->brms_out_ip.bips_tcp);
4989 break;
4990 case IPPROTO_UDP:
4991 __ATOMIC_INC(stats_p->brms_out_ip.bips_udp);
4992 break;
4993 default:
4994 goto done;
4995 }
4996 /* check for hardware offloaded UDP/TCP checksum */
4997 #define HW_CSUM (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)
4998 if ((m->m_pkthdr.csum_flags & HW_CSUM) == HW_CSUM) {
4999 /* checksum verified by hardware */
5000 valid = (m->m_pkthdr.csum_rx_val == 0xffff);
5001 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5002 "IPv%c %s checksum HW 0x%x %svalid",
5003 is_ipv4 ? '4' : '6',
5004 (info.ip_proto == IPPROTO_TCP)
5005 ? "TCP" : "UDP",
5006 m->m_pkthdr.csum_data,
5007 valid ? "" : "in" );
5008 if (!valid) {
5009 /* bad checksum */
5010 csum_stats_p = &stats_p->brms_out_cksum_bad_hw;
5011 error = (info.ip_proto == IPPROTO_TCP) ? _EBADTCPCHECKSUM
5012 : _EBADTCPCHECKSUM;
5013 } else {
5014 /* good checksum */
5015 csum_stats_p = &stats_p->brms_out_cksum_good_hw;
5016 }
5017 proto_csum_stats_increment(info.ip_proto, csum_stats_p);
5018 goto done;
5019 }
5020 /* adjust frame to skip mac-layer header */
5021 _mbuf_adjust_pkthdr_and_data(m, mac_hlen);
5022 if (is_ipv4) {
5023 sum = inet_cksum(m, info.ip_proto,
5024 info.ip_hlen,
5025 info.ip_pay_len);
5026 } else {
5027 sum = inet6_cksum(m, info.ip_proto,
5028 info.ip_hlen + info.ip_opt_len,
5029 info.ip_pay_len - info.ip_opt_len);
5030 }
5031 valid = (sum == 0);
5032 if (valid) {
5033 csum_stats_p = &stats_p->brms_out_cksum_good;
5034 } else {
5035 csum_stats_p = &stats_p->brms_out_cksum_bad;
5036 error = (info.ip_proto == IPPROTO_TCP)
5037 ? _EBADTCPCHECKSUM : _EBADUDPCHECKSUM;
5038 }
5039 proto_csum_stats_increment(info.ip_proto, csum_stats_p);
5040 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5041 "IPv%c %s checksum SW %svalid (0x%x) hlen %d paylen %d",
5042 is_ipv4 ? '4' : '6',
5043 (info.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
5044 valid ? "" : "in",
5045 sum, info.ip_hlen, info.ip_pay_len);
5046 /* adjust frame back to start of mac-layer header */
5047 _mbuf_adjust_pkthdr_and_data(m, -mac_hlen);
5048
5049 done:
5050 return error;
5051 }
5052
5053 static mbuf_t
5054 bridge_verify_checksum_list(ifnet_t bridge_ifp, struct bridge_iflist * dbif,
5055 mbuf_t in_list, bool is_ipv4)
5056 {
5057 mbuf_t next_packet;
5058 mblist ret;
5059
5060 mblist_init(&ret);
5061 for (mbuf_ref_t scan = in_list; scan != NULL; scan = next_packet) {
5062 errno_t error;
5063
5064 /* take packet out of the list */
5065 next_packet = scan->m_nextpkt;
5066 scan->m_nextpkt = NULL;
5067
5068 if (scan->m_pkthdr.rx_seg_cnt > 1) {
5069 /* LRO packet, compute checksum on large packet */
5070 scan = bridge_filter_checksum(bridge_ifp, dbif, scan,
5071 is_ipv4, false, true);
5072 } else {
5073 /* verify checksum */
5074 error = bridge_verify_checksum(&scan, &dbif->bif_stats);
5075 if (error != 0) {
5076 if (scan != NULL) {
5077 m_drop(scan, DROPTAP_FLAG_DIR_IN,
5078 DROP_REASON_BRIDGE_CHECKSUM, NULL, 0);
5079 scan = NULL;
5080 }
5081 }
5082 }
5083
5084 /* add it back to the list */
5085 if (scan != NULL) {
5086 mblist_append(&ret, scan);
5087 }
5088 }
5089 return ret.head;
5090 }
5091
5092
5093 static errno_t
5094 bridge_offload_checksum(struct mbuf * * mp, ip_packet_info * info_p,
5095 struct ifbrmstats * stats_p)
5096 {
5097 uint16_t * csum_p;
5098 errno_t error = 0;
5099 u_int hlen;
5100 struct mbuf * m0 = *mp;
5101 u_int mac_hlen = sizeof(struct ether_header);
5102 u_int pkt_hdr_len;
5103 struct tcphdr * tcp;
5104 u_int tcp_hlen;
5105 struct udphdr * udp;
5106
5107 if (info_p->ip_is_ipv4) {
5108 /* compute IP header checksum */
5109 struct ip *ip = (struct ip *)info_p->ip_hdr;
5110 ip->ip_sum = 0;
5111 ip->ip_sum = inet_cksum(m0, 0, mac_hlen, info_p->ip_hlen);
5112 __ATOMIC_INC(stats_p->brms_in_computed_cksum.brcs_ip_checksum);
5113 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5114 "IPv4 checksum 0x%x",
5115 ntohs(ip->ip_sum));
5116 }
5117 if (info_p->ip_is_fragmented) {
5118 /* can't compute checksum on fragmented packets */
5119 goto done;
5120 }
5121 pkt_hdr_len = m0->m_pkthdr.len;
5122 switch (info_p->ip_proto) {
5123 case IPPROTO_TCP:
5124 hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len
5125 + sizeof(struct tcphdr);
5126 if (m0->m_len < hlen) {
5127 *mp = m0 = m_pullup(m0, hlen);
5128 if (m0 == NULL) {
5129 __ATOMIC_INC(stats_p->brms_in_ip.bips_bad_tcp);
5130 error = _EBADTCP;
5131 goto done;
5132 }
5133 }
5134 tcp = (struct tcphdr *)(info_p->ip_hdr + info_p->ip_hlen
5135 + info_p->ip_opt_len);
5136 tcp_hlen = tcp->th_off << 2;
5137 hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len + tcp_hlen;
5138 if (hlen > pkt_hdr_len) {
5139 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5140 "bad tcp header length %u",
5141 tcp_hlen);
5142 __ATOMIC_INC(stats_p->brms_in_ip.bips_bad_tcp);
5143 error = _EBADTCP;
5144 goto done;
5145 }
5146 csum_p = &tcp->th_sum;
5147 __ATOMIC_INC(stats_p->brms_in_ip.bips_tcp);
5148 break;
5149 case IPPROTO_UDP:
5150 hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len + sizeof(*udp);
5151 if (m0->m_len < hlen) {
5152 *mp = m0 = m_pullup(m0, hlen);
5153 if (m0 == NULL) {
5154 __ATOMIC_INC(stats_p->brms_in_ip.bips_bad_udp);
5155 error = ENOBUFS;
5156 goto done;
5157 }
5158 }
5159 udp = (struct udphdr *)(info_p->ip_hdr + info_p->ip_hlen
5160 + info_p->ip_opt_len);
5161 csum_p = &udp->uh_sum;
5162 __ATOMIC_INC(stats_p->brms_in_ip.bips_udp);
5163 break;
5164 default:
5165 /* not TCP or UDP */
5166 goto done;
5167 }
5168 *csum_p = 0;
5169 /* adjust frame to skip mac-layer header */
5170 _mbuf_adjust_pkthdr_and_data(m0, mac_hlen);
5171 if (info_p->ip_is_ipv4) {
5172 *csum_p = inet_cksum(m0, info_p->ip_proto, info_p->ip_hlen,
5173 info_p->ip_pay_len);
5174 } else {
5175 *csum_p = inet6_cksum(m0, info_p->ip_proto,
5176 info_p->ip_hlen + info_p->ip_opt_len,
5177 info_p->ip_pay_len - info_p->ip_opt_len);
5178 }
5179 if (info_p->ip_proto == IPPROTO_UDP && *csum_p == 0) {
5180 /* RFC 1122 4.1.3.4 */
5181 *csum_p = 0xffff;
5182 }
5183 /* adjust frame back to start of mac-layer header */
5184 _mbuf_adjust_pkthdr_and_data(m0, -mac_hlen);
5185 proto_csum_stats_increment(info_p->ip_proto,
5186 &stats_p->brms_in_computed_cksum);
5187
5188 /* indicate that the checksum is good */
5189 mbuf_set_csum_performed(m0, checksum_performed_all_good, 0xffff);
5190
5191 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5192 "IPv%c %s set checksum 0x%x",
5193 info_p->ip_is_ipv4 ? '4' : '6',
5194 (info_p->ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
5195 ntohs(*csum_p));
5196 done:
5197 return error;
5198 }
5199
5200 static inline void
5201 bridge_handle_checksum_op(ifnet_t src_ifp, ifnet_t dst_ifp,
5202 mbuf_t m, ChecksumOperation cksum_op)
5203 {
5204 switch (cksum_op) {
5205 case CHECKSUM_OPERATION_CLEAR_OFFLOAD:
5206 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
5207 break;
5208 case CHECKSUM_OPERATION_FINALIZE:
5209 /* the checksum might not be correct, finalize now */
5210 VERIFY(dst_ifp != NULL);
5211 bridge_finalize_cksum(dst_ifp, m);
5212 break;
5213 case CHECKSUM_OPERATION_COMPUTE:
5214 VERIFY(dst_ifp != NULL && src_ifp != NULL);
5215 bridge_compute_cksum(src_ifp, dst_ifp, m);
5216 break;
5217 default:
5218 break;
5219 }
5220 return;
5221 }
5222
5223 static uint32_t
5224 get_if_tso_mtu(struct ifnet * ifp, bool is_ipv4)
5225 {
5226 uint32_t tso_mtu;
5227
5228 tso_mtu = is_ipv4 ? ifp->if_tso_v4_mtu : ifp->if_tso_v6_mtu;
5229 if (tso_mtu == 0) {
5230 tso_mtu = IP_MAXPACKET;
5231 }
5232
5233 #if DEBUG || DEVELOPMENT
5234 #define REDUCED_TSO_MTU (16 * 1024)
5235 if (if_bridge_reduce_tso_mtu != 0 && tso_mtu > REDUCED_TSO_MTU) {
5236 tso_mtu = REDUCED_TSO_MTU;
5237 }
5238 #endif /* DEBUG || DEVELOPMENT */
5239 return tso_mtu;
5240 }
5241
5242 /*
5243 * tso_hwassist:
5244 * - determine whether the destination interface supports TSO offload
5245 * - if the packet is already marked for offload and the hardware supports
5246 * it, just allow the packet to continue on
5247 * - if not, parse the packet headers to verify that this is a large TCP
5248 * packet requiring segmentation; if the hardware doesn't support it
5249 * set need_sw_tso; otherwise, mark the packet for TSO offload
5250 */
5251 static int
5252 tso_hwassist(struct mbuf **mp, bool is_ipv4, struct ifnet * ifp, u_int mac_hlen,
5253 int * mss_p, bool * need_gso, bool * is_large_tcp)
5254 {
5255 uint32_t csum_flags;
5256 int error = 0;
5257 ip_packet_info info;
5258 u_int32_t if_csum;
5259 u_int32_t if_tso;
5260 u_int32_t mbuf_tso;
5261 int mss = *mss_p;
5262 uint8_t seg_cnt = 0;
5263 bool supports_cksum = false;
5264 uint32_t pkt_mtu;
5265 struct bripstats stats;
5266
5267 *need_gso = false;
5268 *is_large_tcp = false;
5269 if (is_ipv4) {
5270 /*
5271 * Enable both TCP and IP offload if the hardware supports it.
5272 * If the hardware doesn't support TCP offload, supports_cksum
5273 * will be false so we won't set either offload.
5274 */
5275 if_csum = ifp->if_hwassist & (CSUM_TCP | CSUM_IP);
5276 supports_cksum = (if_csum & CSUM_TCP) != 0;
5277 if_tso = IFNET_TSO_IPV4;
5278 mbuf_tso = CSUM_TSO_IPV4;
5279 } else {
5280 if_csum = (ifp->if_hwassist & CSUM_TCPIPV6);
5281 supports_cksum = (if_csum & CSUM_TCPIPV6) != 0;
5282 if_tso = IFNET_TSO_IPV6;
5283 mbuf_tso = CSUM_TSO_IPV6;
5284 }
5285 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5286 "%s: does%s support checksum 0x%x if_csum 0x%x",
5287 ifp->if_xname, supports_cksum ? "" : " not",
5288 ifp->if_hwassist, if_csum);
5289
5290 /* verify that this is a large TCP frame */
5291 error = bridge_get_tcp_header(mp, mac_hlen, is_ipv4,
5292 &info, &stats);
5293 if (error != 0) {
5294 /* bad packet */
5295 goto done;
5296 }
5297 if (info.ip_proto_hdr == NULL) {
5298 /* not a TCP packet */
5299 goto done;
5300 }
5301 pkt_mtu = info.ip_hlen + info.ip_pay_len + info.ip_opt_len;
5302 if (mss == 0) {
5303 /* check for LRO */
5304 seg_cnt = (*mp)->m_pkthdr.rx_seg_cnt;
5305 if (seg_cnt == 1 || (seg_cnt == 0 && pkt_mtu <= ifp->if_mtu)) {
5306 /* not actually a large packet */
5307 goto done;
5308 }
5309 }
5310 if (mss == 0) {
5311 uint32_t hdr_len;
5312 struct tcphdr * tcp;
5313
5314 tcp = (struct tcphdr *)info.ip_proto_hdr;
5315 hdr_len = info.ip_hlen + info.ip_opt_len + (tcp->th_off << 2);
5316
5317 /* packet isn't marked, mark it now */
5318 if (seg_cnt != 0) {
5319 uint32_t len;
5320
5321 /* approximate the MSS using the LRO seg cnt */
5322 len = mbuf_pkthdr_len(*mp) - hdr_len - ETHER_HDR_LEN;
5323 mss = len / seg_cnt;
5324 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5325 "%s: mss %d = len %d / seg cnt %d",
5326 ifp->if_xname, mss, len, seg_cnt);
5327 if (mss <= 0) {
5328 /* unexpected value */
5329 mss = 0;
5330 goto done;
5331 }
5332 } else {
5333 mss = ifp->if_mtu - hdr_len
5334 - if_bridge_tso_reduce_mss_tx;
5335 assert(mss > 0);
5336 }
5337 csum_flags = mbuf_tso;
5338 if (supports_cksum) {
5339 csum_flags |= if_csum;
5340 }
5341 (*mp)->m_pkthdr.tso_segsz = mss;
5342 (*mp)->m_pkthdr.csum_flags |= csum_flags;
5343 (*mp)->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
5344 }
5345 *is_large_tcp = true;
5346 (*mp)->m_pkthdr.pkt_proto = IPPROTO_TCP;
5347 if ((ifp->if_hwassist & if_tso) == 0) {
5348 /* need gso if no hardware support */
5349 *need_gso = true;
5350 } else {
5351 uint32_t tso_mtu = 0;
5352
5353 tso_mtu = get_if_tso_mtu(ifp, is_ipv4);
5354 if (pkt_mtu > tso_mtu) {
5355 /* need gso if tso_mtu too small */
5356 *need_gso = true;
5357 }
5358 }
5359 done:
5360 *mss_p = mss;
5361 return error;
5362 }
5363
5364 /*
5365 * bridge_enqueue:
5366 *
5367 * Enqueue a packet list on a bridge member interface.
5368 *
5369 */
5370 static int
5371 bridge_enqueue(ifnet_t bridge_ifp, ifnet_t src_if, ifnet_t dst_if,
5372 ether_type_flag_t etypef, mbuf_t in_list, ChecksumOperation orig_cksum_op,
5373 pkt_direction_t direction)
5374 {
5375 int enqueue_error = 0;
5376 mbuf_t next_packet;
5377 uint32_t out_errors = 0;
5378 mblist out_list;
5379
5380 VERIFY(dst_if != NULL);
5381
5382 mblist_init(&out_list);
5383 for (mbuf_ref_t scan = in_list; scan != NULL; scan = next_packet) {
5384 bool check_gso = false;
5385 ChecksumOperation cksum_op = orig_cksum_op;
5386 errno_t error = 0;
5387 bool is_ipv4 = false;
5388 int len;
5389 int mss = 0;
5390 bool need_gso = false;
5391
5392 scan->m_flags |= M_PROTO1; /* set to avoid loops */
5393 next_packet = scan->m_nextpkt;
5394 scan->m_nextpkt = NULL;
5395 len = mbuf_pkthdr_len(scan);
5396 is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
5397 mss = _mbuf_get_tso_mss(scan);
5398 if (mss != 0) {
5399 /* packet is marked for segmentation */
5400 check_gso = true;
5401 } else if (direction == pkt_direction_RX &&
5402 scan->m_pkthdr.rx_seg_cnt != 0) {
5403 /* LRO packet */
5404 check_gso = true;
5405 } else if (ether_type_flag_is_ip(etypef) &&
5406 len > (bridge_ifp->if_mtu + ETHER_HDR_LEN)) {
5407 /*
5408 * Need to segment the packet if it is a large frame
5409 * and the destination interface does not support TSO.
5410 *
5411 * Note that with trailers, it's possible for a packet to
5412 * be large but not actually require segmentation.
5413 */
5414 check_gso = true;
5415 }
5416 if (check_gso) {
5417 bool is_large_tcp = false;
5418
5419 error = tso_hwassist(&scan, is_ipv4,
5420 dst_if, sizeof(struct ether_header), &mss,
5421 &need_gso, &is_large_tcp);
5422 if (is_large_tcp &&
5423 cksum_op == CHECKSUM_OPERATION_CLEAR_OFFLOAD) {
5424 cksum_op = CHECKSUM_OPERATION_NONE;
5425 }
5426 }
5427 if (error != 0) {
5428 if (scan != NULL) {
5429 m_drop(scan,
5430 direction == pkt_direction_RX ? DROPTAP_FLAG_DIR_IN : DROPTAP_FLAG_DIR_OUT,
5431 DROP_REASON_BRIDGE_HWASSIST, NULL, 0);
5432 scan = NULL;
5433 }
5434 out_errors++;
5435 } else if (need_gso) {
5436 int mac_hlen = sizeof(struct ether_header);
5437 mblist segs;
5438
5439 /* segment packets, add to list */
5440 segs = gso_tcp_transmit(dst_if, scan, mac_hlen,
5441 is_ipv4);
5442 if (segs.head != NULL) {
5443 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5444 "%s (%s) append gso #segs %u bytes %u",
5445 bridge_ifp->if_xname,
5446 dst_if->if_xname,
5447 segs.count, segs.bytes);
5448 mblist_append_list(&out_list, segs);
5449 } else {
5450 out_errors++;
5451 }
5452 } else {
5453 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5454 "%s (%s) append %d bytes mss %d op %d",
5455 bridge_ifp->if_xname,
5456 dst_if->if_xname,
5457 len, mss, cksum_op);
5458 bridge_handle_checksum_op(src_if, dst_if,
5459 scan, cksum_op);
5460 mblist_append(&out_list, scan);
5461 }
5462 }
5463 if (out_list.head != NULL) {
5464 enqueue_error = bridge_transmit(dst_if, out_list.head);
5465 if (enqueue_error != 0) {
5466 out_errors++;
5467 }
5468 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5469 "%s (%s) bridge_transmit packets %u bytes %u error %d",
5470 bridge_ifp->if_xname,
5471 dst_if->if_xname,
5472 out_list.count, out_list.bytes, enqueue_error);
5473 }
5474 if (out_list.count != 0 || out_errors != 0) {
5475 ifnet_stat_increment_out(bridge_ifp, out_list.count,
5476 out_list.bytes, out_errors);
5477 }
5478 return enqueue_error;
5479 }
5480
5481 /*
5482 * bridge_member_output:
5483 *
5484 * Send output from a bridge member interface. This
5485 * performs the bridging function for locally originated
5486 * packets.
5487 *
5488 * The mbuf has the Ethernet header already attached.
5489 */
5490 static errno_t
5491 bridge_member_output(struct bridge_softc *sc, ifnet_t ifp, mbuf_t *data)
5492 {
5493 struct bridge_iflist * bif = NULL;
5494 ifnet_t bridge_ifp;
5495 struct ether_header *eh;
5496 ether_type_flag_t etypef;
5497 struct ifnet *dst_if = NULL;
5498 uint16_t vlan;
5499 struct bridge_iflist *mac_nat_bif;
5500 ifnet_t mac_nat_ifp;
5501 mbuf_t m = *data;
5502
5503 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
5504 "ifp %s", ifp->if_xname);
5505 if (m->m_len < ETHER_HDR_LEN) {
5506 m = m_pullup(m, ETHER_HDR_LEN);
5507 if (m == NULL) {
5508 *data = NULL;
5509 return EJUSTRETURN;
5510 }
5511 }
5512
5513 BRIDGE_LOCK(sc);
5514 mac_nat_bif = sc->sc_mac_nat_bif;
5515 mac_nat_ifp = (mac_nat_bif != NULL) ? mac_nat_bif->bif_ifp : NULL;
5516 if (mac_nat_ifp == ifp) {
5517 /* record the IP address used by the MAC NAT interface */
5518 (void)bridge_mac_nat_output(sc, mac_nat_bif, data, NULL);
5519 m = *data;
5520 if (m == NULL) {
5521 /* packet was deallocated */
5522 BRIDGE_UNLOCK(sc);
5523 return EJUSTRETURN;
5524 }
5525 }
5526 bridge_ifp = sc->sc_ifp;
5527 eh = mtod(m, struct ether_header *);
5528 vlan = VLANTAGOF(m);
5529 etypef = ether_type_flag_get(eh->ether_type);
5530
5531 /*
5532 * APPLE MODIFICATION
5533 * If the packet is an 802.1X ethertype, then only send on the
5534 * original output interface.
5535 */
5536 if (eh->ether_type == htons(ETHERTYPE_PAE)) {
5537 dst_if = ifp;
5538 goto sendunicast;
5539 }
5540
5541 /*
5542 * If bridge is down, but the original output interface is up,
5543 * go ahead and send out that interface. Otherwise, the packet
5544 * is dropped below.
5545 */
5546 if ((bridge_ifp->if_flags & IFF_RUNNING) == 0) {
5547 dst_if = ifp;
5548 goto sendunicast;
5549 }
5550
5551 /*
5552 * If the packet is a multicast, or we don't know a better way to
5553 * get there, send to all interfaces.
5554 */
5555 if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
5556 dst_if = NULL;
5557 } else {
5558 bif = bridge_rtlookup_bif(sc, eh->ether_dhost, vlan);
5559 if (bif != NULL) {
5560 dst_if = bif->bif_ifp;
5561 }
5562 }
5563 if (dst_if == NULL) {
5564 struct mbuf *mc;
5565 errno_t error;
5566
5567
5568 bridge_span(sc, etypef, m);
5569
5570 BRIDGE_LOCK2REF(sc, error);
5571 if (error != 0) {
5572 m_drop(m, DROPTAP_FLAG_DIR_OUT,
5573 DROP_REASON_BRIDGE_NOREF, NULL, 0);
5574 return EJUSTRETURN;
5575 }
5576
5577 /*
5578 * Duplicate and send the packet across all member interfaces
5579 * except the originating interface.
5580 */
5581 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
5582 dst_if = bif->bif_ifp;
5583 if (dst_if == ifp) {
5584 /* skip the originating interface */
5585 continue;
5586 }
5587 /* skip interface with inactive link status */
5588 if ((bif->bif_flags & BIFF_MEDIA_ACTIVE) == 0) {
5589 continue;
5590 }
5591
5592 /* skip interface that isn't running */
5593 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5594 continue;
5595 }
5596 /*
5597 * If the interface is participating in spanning
5598 * tree, make sure the port is in a state that
5599 * allows forwarding.
5600 */
5601 if ((bif->bif_ifflags & IFBIF_STP) &&
5602 bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5603 continue;
5604 }
5605 /*
5606 * If the destination is the MAC NAT interface,
5607 * skip sending the packet. The packet can't be sent
5608 * if the source MAC is incorrect.
5609 */
5610 if (dst_if == mac_nat_ifp) {
5611 continue;
5612 }
5613
5614 /* make a deep copy to send on this member interface */
5615 mc = m_dup(m, M_DONTWAIT);
5616 if (mc == NULL) {
5617 (void)ifnet_stat_increment_out(bridge_ifp,
5618 0, 0, 1);
5619 continue;
5620 }
5621 (void)bridge_enqueue(bridge_ifp, ifp, dst_if, etypef,
5622 mc, CHECKSUM_OPERATION_COMPUTE, pkt_direction_TX);
5623 }
5624 BRIDGE_UNREF(sc);
5625
5626 if ((ifp->if_flags & IFF_RUNNING) == 0) {
5627 m_drop(m, DROPTAP_FLAG_DIR_OUT,
5628 DROP_REASON_BRIDGE_NOT_RUNNING, NULL, 0);
5629 return EJUSTRETURN;
5630 }
5631 /* allow packet to continue on the originating interface */
5632 return 0;
5633 }
5634
5635 sendunicast:
5636 /*
5637 * XXX Spanning tree consideration here?
5638 */
5639
5640 bridge_span(sc, etypef, m);
5641 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5642 m_drop(m, DROPTAP_FLAG_DIR_OUT,
5643 DROP_REASON_BRIDGE_NOT_RUNNING, NULL, 0);
5644 BRIDGE_UNLOCK(sc);
5645 return EJUSTRETURN;
5646 }
5647
5648 BRIDGE_UNLOCK(sc);
5649 if (dst_if == ifp) {
5650 /* allow packet to continue on the originating interface */
5651 return 0;
5652 }
5653 if (dst_if != mac_nat_ifp) {
5654 (void) bridge_enqueue(bridge_ifp, ifp, dst_if, etypef, m,
5655 CHECKSUM_OPERATION_COMPUTE, pkt_direction_TX);
5656 } else {
5657 /*
5658 * This is not the original output interface
5659 * and the destination is the MAC NAT interface.
5660 * Drop the packet because the packet can't be sent
5661 * if the source MAC is incorrect.
5662 */
5663 m_drop(m, DROPTAP_FLAG_DIR_OUT,
5664 DROP_REASON_BRIDGE_MAC_NAT_FAILURE, NULL, 0);
5665 }
5666 return EJUSTRETURN;
5667 }
5668
5669 /*
5670 * Output callback.
5671 *
5672 * This routine is called externally from above only when if_bridge_txstart
5673 * is disabled; otherwise it is called internally by bridge_start().
5674 */
5675 static int
5676 bridge_output(struct ifnet *ifp, struct mbuf *m)
5677 {
5678 struct bridge_iflist *bif;
5679 struct bridge_softc * __single sc = ifnet_softc(ifp);
5680 struct ether_header *eh;
5681 ether_type_flag_t etypef;
5682 struct ifnet *dst_if = NULL;
5683 int error = 0;
5684
5685 eh = mtod(m, struct ether_header *);
5686 etypef = ether_type_flag_get(eh->ether_type);
5687 BRIDGE_LOCK(sc);
5688
5689 if (!IS_BCAST_MCAST(m)) {
5690 bif = bridge_rtlookup_bif(sc, eh->ether_dhost, 0);
5691 if (bif != NULL) {
5692 dst_if = bif->bif_ifp;
5693 }
5694 }
5695
5696 (void) ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0);
5697
5698 BRIDGE_BPF_TAP_OUT(ifp, m);
5699
5700 if (dst_if == NULL) {
5701 /* callee will unlock */
5702 bridge_broadcast(sc, NULL, etypef, m);
5703 } else {
5704 ifnet_t bridge_ifp;
5705
5706 bridge_ifp = sc->sc_ifp;
5707 BRIDGE_UNLOCK(sc);
5708
5709 error = bridge_enqueue(bridge_ifp, NULL, dst_if, etypef, m,
5710 CHECKSUM_OPERATION_FINALIZE, pkt_direction_TX);
5711 }
5712
5713 return error;
5714 }
5715
5716 static void
5717 bridge_finalize_cksum(struct ifnet *ifp, struct mbuf *m)
5718 {
5719 struct ether_header *eh;
5720 bool is_ipv4;
5721 uint32_t sw_csum, hwcap;
5722 uint32_t did_sw;
5723 uint32_t csum_flags;
5724
5725 eh = mtod(m, struct ether_header *);
5726 if (!ether_header_type_is_ip(eh, &is_ipv4)) {
5727 return;
5728 }
5729
5730 /* do in software what the hardware cannot */
5731 hwcap = (ifp->if_hwassist | CSUM_DATA_VALID);
5732 csum_flags = m->m_pkthdr.csum_flags;
5733 sw_csum = csum_flags & ~IF_HWASSIST_CSUM_FLAGS(hwcap);
5734 sw_csum &= IF_HWASSIST_CSUM_MASK;
5735
5736 if (is_ipv4) {
5737 if ((hwcap & CSUM_PARTIAL) && !(sw_csum & CSUM_DELAY_DATA) &&
5738 (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) {
5739 if (m->m_pkthdr.csum_flags & CSUM_TCP) {
5740 uint16_t start =
5741 sizeof(*eh) + sizeof(struct ip);
5742 uint16_t ulpoff =
5743 m->m_pkthdr.csum_data & 0xffff;
5744 m->m_pkthdr.csum_flags |=
5745 (CSUM_DATA_VALID | CSUM_PARTIAL);
5746 m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
5747 m->m_pkthdr.csum_tx_start = start;
5748 } else {
5749 sw_csum |= (CSUM_DELAY_DATA &
5750 m->m_pkthdr.csum_flags);
5751 }
5752 }
5753 did_sw = in_finalize_cksum(m, sizeof(*eh), sw_csum);
5754 } else {
5755 if ((hwcap & CSUM_PARTIAL) &&
5756 !(sw_csum & CSUM_DELAY_IPV6_DATA) &&
5757 (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA)) {
5758 if (m->m_pkthdr.csum_flags & CSUM_TCPIPV6) {
5759 uint16_t start =
5760 sizeof(*eh) + sizeof(struct ip6_hdr);
5761 uint16_t ulpoff =
5762 m->m_pkthdr.csum_data & 0xffff;
5763 m->m_pkthdr.csum_flags |=
5764 (CSUM_DATA_VALID | CSUM_PARTIAL);
5765 m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
5766 m->m_pkthdr.csum_tx_start = start;
5767 } else {
5768 sw_csum |= (CSUM_DELAY_IPV6_DATA &
5769 m->m_pkthdr.csum_flags);
5770 }
5771 }
5772 did_sw = in6_finalize_cksum(m, sizeof(*eh), -1, -1, sw_csum);
5773 }
5774 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5775 "[%s] before 0x%x hwcap 0x%x sw_csum 0x%x did 0x%x after 0x%x",
5776 ifp->if_xname, csum_flags, hwcap, sw_csum,
5777 did_sw, m->m_pkthdr.csum_flags);
5778 }
5779
5780 /*
5781 * bridge_start:
5782 *
5783 * Start output on a bridge.
5784 *
5785 * This routine is invoked by the start worker thread; because we never call
5786 * it directly, there is no need do deploy any serialization mechanism other
5787 * than what's already used by the worker thread, i.e. this is already single
5788 * threaded.
5789 *
5790 * This routine is called only when if_bridge_txstart is enabled.
5791 */
5792 static void
5793 bridge_start(struct ifnet *ifp)
5794 {
5795 mbuf_ref_t m;
5796
5797 for (;;) {
5798 if (ifnet_dequeue(ifp, &m) != 0) {
5799 break;
5800 }
5801
5802 (void) bridge_output(ifp, m);
5803 }
5804 }
5805
5806 static void
5807 prepare_input_packet(ifnet_t ifp, mbuf_t m)
5808 {
5809 mbuf_pkthdr_setrcvif(m, ifp);
5810 mbuf_pkthdr_setheader(m, mtod(m, void *));
5811 /* adjust frame to skip mac-layer header */
5812 _mbuf_adjust_pkthdr_and_data(m, ETHER_HDR_LEN);
5813 }
5814
5815 static void
5816 mark_tso_checksum_ok(mbuf_t m)
5817 {
5818 if (_mbuf_get_tso_mss(m) != 0 ||
5819 (m->m_pkthdr.csum_flags & checksum_request_flags) != 0) {
5820 mbuf_set_csum_performed(m, checksum_performed_all_good, 0xffff);
5821 }
5822 }
5823
5824 static void
5825 inject_input_packet_list(ifnet_t ifp, mbuf_t in_list, bool m_proto1)
5826 {
5827 for (mbuf_t scan = in_list; scan != NULL; scan = scan->m_nextpkt) {
5828 /* mark the packets as arriving on the interface */
5829 BRIDGE_BPF_TAP_IN(ifp, scan);
5830 if (m_proto1) {
5831 scan->m_flags |= M_PROTO1; /* set to avoid loops */
5832 }
5833 prepare_input_packet(ifp, scan);
5834 mark_tso_checksum_ok(scan);
5835 }
5836 dlil_input_packet_list(ifp, in_list);
5837 return;
5838 }
5839
5840 static void
5841 adjust_input_packet_list(mbuf_t in_list)
5842 {
5843 for (mbuf_t scan = in_list; scan != NULL; scan = scan->m_nextpkt) {
5844 mbuf_pkthdr_setheader(scan, mtod(scan, void *));
5845 _mbuf_adjust_pkthdr_and_data(scan, ETHER_HDR_LEN);
5846 }
5847 }
5848
5849 static bool
5850 in_addr_is_ours(struct in_addr ip)
5851 {
5852 struct in_ifaddr *ia;
5853 bool ours = false;
5854
5855 lck_rw_lock_shared(&in_ifaddr_rwlock);
5856 TAILQ_FOREACH(ia, INADDR_HASH(ip.s_addr), ia_hash) {
5857 if (ia->ia_addr.sin_addr.s_addr == ip.s_addr) {
5858 ours = true;
5859 break;
5860 }
5861 }
5862 lck_rw_done(&in_ifaddr_rwlock);
5863 return ours;
5864 }
5865
5866 static bool
5867 in6_addr_is_ours(const struct in6_addr * ip6_p, uint32_t ifscope)
5868 {
5869 struct in6_addr dst_ip;
5870 struct in6_ifaddr *ia6;
5871 bool ours = false;
5872
5873 if (in6_embedded_scope && IN6_IS_ADDR_LINKLOCAL(ip6_p)) {
5874 /* need to embed scope ID for comparison */
5875 bcopy(ip6_p, &dst_ip, sizeof(dst_ip));
5876 dst_ip.s6_addr16[1] = htons(ifscope);
5877 ip6_p = &dst_ip;
5878 }
5879 lck_rw_lock_shared(&in6_ifaddr_rwlock);
5880 TAILQ_FOREACH(ia6, IN6ADDR_HASH(ip6_p), ia6_hash) {
5881 if (in6_are_addr_equal_scoped(&ia6->ia_addr.sin6_addr, ip6_p,
5882 ia6->ia_addr.sin6_scope_id, ifscope)) {
5883 ours = true;
5884 break;
5885 }
5886 }
5887 lck_rw_done(&in6_ifaddr_rwlock);
5888 return ours;
5889 }
5890
5891 static bool
5892 ip_packet_info_dst_is_our_ip(ip_packet_info_t info_p, int index)
5893 {
5894 /* if the destination is our IP address, don't segment */
5895 bool our_ip = false;
5896
5897 if (info_p->ip_is_ipv4) {
5898 struct ip * hdr;
5899 struct in_addr dst_ip;
5900
5901 hdr = (struct ip *)(info_p->ip_hdr);
5902 bcopy(&hdr->ip_dst, &dst_ip, sizeof(dst_ip));
5903 our_ip = in_addr_is_ours(dst_ip);
5904 } else {
5905 struct ip6_hdr * hdr;
5906
5907 hdr = (struct ip6_hdr *)(info_p->ip_hdr);
5908 our_ip = in6_addr_is_ours(&hdr->ip6_dst, index);
5909 }
5910 return our_ip;
5911 }
5912
5913 typedef union {
5914 struct in_addr ip;
5915 struct in6_addr ip6;
5916 } ip_addr, *ip_addr_t;
5917
5918 static void
5919 ip_packet_info_copy_dst_ip_addr(ip_packet_info_t info_p, ip_addr_t ipaddr)
5920 {
5921 if (info_p->ip_is_ipv4) {
5922 struct ip * hdr;
5923
5924 hdr = (struct ip *)(info_p->ip_hdr);
5925 bcopy(&hdr->ip_dst, &ipaddr->ip, sizeof(ipaddr->ip));
5926 } else {
5927 struct ip6_hdr * hdr;
5928
5929 hdr = (struct ip6_hdr *)(info_p->ip_hdr);
5930 bcopy(&hdr->ip6_dst, &ipaddr->ip6, sizeof(ipaddr->ip6));
5931 }
5932 }
5933
5934 static bool
5935 ip_addr_are_equal(ip_addr_t addr1, ip_addr_t addr2, bool is_ipv4)
5936 {
5937 bool equal;
5938
5939 if (is_ipv4) {
5940 equal = addr1->ip.s_addr == addr2->ip.s_addr;
5941 } else {
5942 equal = IN6_ARE_ADDR_EQUAL(&addr1->ip6, &addr2->ip6);
5943 }
5944 return equal;
5945 }
5946
5947 static bool
5948 ip_addr_is_ours(ip_addr_t ipaddr, int index, bool is_ipv4)
5949 {
5950 bool our_ip;
5951
5952 if (is_ipv4) {
5953 our_ip = in_addr_is_ours(ipaddr->ip);
5954 } else {
5955 our_ip = in6_addr_is_ours(&ipaddr->ip6, index);
5956 }
5957 return our_ip;
5958 }
5959
5960 static void
5961 bridge_interface_input_list(ifnet_t bridge_ifp, ether_type_flag_t etypef,
5962 mblist list, bool bif_uses_virtio)
5963 {
5964 uint32_t in_errors = 0;
5965 bool is_ipv4;
5966 mblist in_list;
5967 ip_addr last_ip;
5968 bool last_ip_ours = false;
5969 bool last_ip_valid = false;
5970 u_int mac_hlen;
5971 bool may_forward = false;
5972 mbuf_t next_packet;
5973
5974 switch (etypef) {
5975 case ETHER_TYPE_FLAG_IPV4:
5976 is_ipv4 = true;
5977 may_forward = (ipforwarding != 0);
5978 break;
5979 case ETHER_TYPE_FLAG_IPV6:
5980 is_ipv4 = false;
5981 may_forward = (ip6_forwarding != 0);
5982 break;
5983 }
5984 if (!may_forward) {
5985 in_list = list;
5986 goto done;
5987 }
5988
5989 mblist_init(&in_list);
5990 mac_hlen = sizeof(struct ether_header);
5991 bzero(&last_ip, sizeof(last_ip));
5992 for (mbuf_ref_t scan = list.head; scan != NULL; scan = next_packet) {
5993 int error;
5994 ip_packet_info info;
5995 bool ip_ours;
5996 struct ifbrmstats stats; /* XXX should really be accounted */
5997 ip_addr this_ip;
5998
5999 /* take it out of the list */
6000 next_packet = scan->m_nextpkt;
6001 scan->m_nextpkt = NULL;
6002
6003 /* check for TCP packet and get IP header */
6004 error = bridge_get_tcp_header(&scan, mac_hlen, is_ipv4,
6005 &info, &stats.brms_in_ip);
6006 if (error != 0) {
6007 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
6008 "%s bridge_get_tcp_header failed %d",
6009 bridge_ifp->if_xname, error);
6010 if (scan != NULL) {
6011 m_freem(scan);
6012 scan = NULL;
6013 }
6014 in_errors++;
6015 continue;
6016 }
6017 ip_packet_info_copy_dst_ip_addr(&info, &this_ip);
6018 if (last_ip_valid &&
6019 ip_addr_are_equal(&last_ip, &this_ip, is_ipv4)) {
6020 /* use cached result */
6021 ip_ours = last_ip_ours;
6022 } else {
6023 ip_ours = ip_addr_is_ours(&this_ip,
6024 bridge_ifp->if_index,
6025 is_ipv4);
6026 /* cache the result */
6027 last_ip_valid = true;
6028 last_ip_ours = ip_ours;
6029 last_ip = this_ip;
6030 }
6031
6032 /* if the packet is destined to us, just send it up */
6033 if (ip_ours) {
6034 mblist_append(&in_list, scan);
6035 continue;
6036 }
6037 /*
6038 * If this is a TCP packet that's marked for TSO or LRO, or
6039 * we think it's a large packet, segment it.
6040 */
6041 if (info.ip_proto_hdr != NULL &&
6042 ((bif_uses_virtio && _mbuf_get_tso_mss(scan) != 0) ||
6043 (!bif_uses_virtio &&
6044 (scan->m_pkthdr.rx_seg_cnt > 1 ||
6045 (mbuf_pkthdr_len(scan) >
6046 (bridge_ifp->if_mtu + ETHER_HDR_LEN)))))) {
6047 mblist seg;
6048
6049 seg = gso_tcp_with_info(bridge_ifp, scan, &info,
6050 mac_hlen, is_ipv4, false);
6051 if (seg.head == NULL) {
6052 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
6053 "gso_tcp returned no packets");
6054 in_errors++;
6055 continue;
6056 }
6057 if (seg.count > 1) {
6058 /* packet was segmented+checksummed */
6059 mblist_append_list(&in_list, seg);
6060 continue;
6061 }
6062 /* there's just one packet, no segmentation */
6063 scan = seg.head;
6064 }
6065 /* need checksum if it's marked for checksum offload */
6066 if (bif_uses_virtio &&
6067 (scan->m_pkthdr.csum_flags & checksum_request_flags) != 0) {
6068 error = bridge_offload_checksum(&scan, &info, &stats);
6069 if (error != 0) {
6070 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
6071 "%s bridge_offload_checksum failed %d",
6072 bridge_ifp->if_xname, error);
6073 if (scan != NULL) {
6074 m_freem(scan);
6075 scan = NULL;
6076 }
6077 in_errors++;
6078 continue;
6079 }
6080 }
6081 mblist_append(&in_list, scan);
6082 }
6083
6084 done:
6085 if (in_list.head != NULL) {
6086 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
6087 "%s packets %d bytes %d",
6088 bridge_ifp->if_xname,
6089 in_list.count, in_list.bytes);
6090 /* Mark the packets as arriving on the bridge interface */
6091 inject_input_packet_list(bridge_ifp, in_list.head, false);
6092 ifnet_stat_increment_in(bridge_ifp, in_list.count,
6093 in_list.bytes, in_errors);
6094 } else if (in_errors != 0) {
6095 ifnet_stat_increment_in(bridge_ifp, 0, 0, in_errors);
6096 }
6097 return;
6098 }
6099
6100 /*
6101 * bridge_broadcast:
6102 *
6103 * Send a frame to all interfaces that are members of
6104 * the bridge, except for the one on which the packet
6105 * arrived.
6106 *
6107 * NOTE: Releases the lock on return.
6108 */
6109 static void
6110 bridge_broadcast(struct bridge_softc *sc, struct bridge_iflist * sbif,
6111 ether_type_flag_t etypef, mbuf_t m)
6112 {
6113 ifnet_t bridge_ifp;
6114 struct bridge_iflist *dbif;
6115 struct ifnet * src_if;
6116 mbuf_ref_t mc;
6117 struct mbuf *mc_in;
6118 int error = 0, used = 0;
6119 ChecksumOperation cksum_op;
6120 struct mac_nat_record mnr;
6121 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
6122 boolean_t translate_mac = FALSE;
6123 uint32_t sc_filter_flags;
6124 bool is_bcast_mcast;
6125
6126 bridge_ifp = sc->sc_ifp;
6127 if (sbif != NULL) {
6128 src_if = sbif->bif_ifp;
6129 cksum_op = CHECKSUM_OPERATION_CLEAR_OFFLOAD;
6130 if (mac_nat_bif != NULL && sbif != mac_nat_bif) {
6131 /* get the translation record */
6132 translate_mac
6133 = bridge_mac_nat_output(sc, sbif, &m, &mnr);
6134 if (m == NULL) {
6135 /* packet was deallocated */
6136 BRIDGE_UNLOCK(sc);
6137 return;
6138 }
6139 }
6140 } else {
6141 /*
6142 * sbif is NULL when the bridge interface calls
6143 * bridge_broadcast().
6144 */
6145 cksum_op = CHECKSUM_OPERATION_FINALIZE;
6146 src_if = NULL;
6147 }
6148
6149 BRIDGE_LOCK2REF(sc, error);
6150 if (error) {
6151 m_freem(m);
6152 return;
6153 }
6154 is_bcast_mcast = IS_BCAST_MCAST(m);
6155 sc_filter_flags = sc->sc_filter_flags;
6156 TAILQ_FOREACH(dbif, &sc->sc_iflist, bif_next) {
6157 ifnet_t dst_if;
6158
6159 dst_if = dbif->bif_ifp;
6160 if (dst_if == src_if) {
6161 /* skip the interface that the packet came in on */
6162 continue;
6163 }
6164
6165 /* Private segments can not talk to each other */
6166 if (sbif != NULL &&
6167 (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE)) {
6168 continue;
6169 }
6170
6171 if ((dbif->bif_ifflags & IFBIF_STP) &&
6172 dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6173 continue;
6174 }
6175
6176 if ((dbif->bif_ifflags & IFBIF_DISCOVER) == 0 &&
6177 !is_bcast_mcast) {
6178 continue;
6179 }
6180
6181 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6182 continue;
6183 }
6184
6185 if ((dbif->bif_flags & BIFF_MEDIA_ACTIVE) == 0) {
6186 continue;
6187 }
6188
6189 if (TAILQ_NEXT(dbif, bif_next) == NULL) {
6190 mc = m;
6191 used = 1;
6192 } else {
6193 mc = m_dup(m, M_DONTWAIT);
6194 if (mc == NULL) {
6195 (void) ifnet_stat_increment_out(bridge_ifp,
6196 0, 0, 1);
6197 continue;
6198 }
6199 }
6200
6201 /*
6202 * If broadcast input is enabled, do so only if this
6203 * is an input packet.
6204 */
6205 if (sbif != NULL && is_bcast_mcast &&
6206 (dbif->bif_flags & BIFF_INPUT_BROADCAST) != 0) {
6207 mc_in = m_dup(mc, M_DONTWAIT);
6208 /* this could fail, but we continue anyways */
6209 } else {
6210 mc_in = NULL;
6211 }
6212
6213 /* out */
6214 if (translate_mac && mac_nat_bif == dbif) {
6215 /* translate the packet */
6216 bridge_mac_nat_translate(&mc, &mnr, IF_LLADDR(dst_if));
6217 }
6218
6219 if (mc != NULL && sbif != NULL &&
6220 PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6221 if (used == 0) {
6222 /* Keep the layer3 header aligned */
6223 int i = min(mc->m_pkthdr.len, max_protohdr);
6224 mc = m_copyup(mc, i, ETHER_ALIGN);
6225 if (mc == NULL) {
6226 (void) ifnet_stat_increment_out(
6227 sc->sc_ifp, 0, 0, 1);
6228 if (mc_in != NULL) {
6229 m_freem(mc_in);
6230 mc_in = NULL;
6231 }
6232 continue;
6233 }
6234 }
6235 if (bridge_pf(&mc, dst_if, sc_filter_flags, false) != 0) {
6236 if (mc_in != NULL) {
6237 m_freem(mc_in);
6238 mc_in = NULL;
6239 }
6240 continue;
6241 }
6242 if (mc == NULL) {
6243 if (mc_in != NULL) {
6244 m_freem(mc_in);
6245 mc_in = NULL;
6246 }
6247 continue;
6248 }
6249 }
6250
6251 if (mc != NULL) {
6252 /* verify checksum if necessary */
6253 if (bif_has_checksum_offload(dbif) && sbif != NULL &&
6254 !bif_has_checksum_offload(sbif)) {
6255 error = bridge_verify_checksum(&mc,
6256 &dbif->bif_stats);
6257 if (error != 0) {
6258 if (mc != NULL) {
6259 m_freem(mc);
6260 }
6261 mc = NULL;
6262 }
6263 }
6264 if (mc != NULL) {
6265 (void) bridge_enqueue(bridge_ifp,
6266 NULL, dst_if, etypef, mc, cksum_op,
6267 pkt_direction_TX);
6268 }
6269 }
6270
6271 /* in */
6272 if (mc_in == NULL) {
6273 continue;
6274 }
6275 BRIDGE_BPF_TAP_IN(dst_if, mc_in);
6276 prepare_input_packet(dst_if, mc_in);
6277 mc_in->m_flags |= M_PROTO1; /* set to avoid loops */
6278 dlil_input_packet_list(dst_if, mc_in);
6279 }
6280 if (used == 0) {
6281 m_freem(m);
6282 }
6283
6284
6285 BRIDGE_UNREF(sc);
6286 }
6287
6288 static mbuf_t
6289 copy_packet_list(mbuf_t m)
6290 {
6291 mblist ret;
6292 mbuf_t next_packet;
6293
6294 mblist_init(&ret);
6295 for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
6296 mbuf_t copy_m;
6297
6298 /* take it out of the list */
6299 next_packet = scan->m_nextpkt;
6300 scan->m_nextpkt = NULL;
6301
6302 /* create a copy and add it to the new list */
6303 copy_m = m_dup(scan, M_DONTWAIT);
6304 if (copy_m != NULL) {
6305 mblist_append(&ret, copy_m);
6306 }
6307
6308 /* put it back in the original list */
6309 scan->m_nextpkt = next_packet;
6310 }
6311 return ret.head;
6312 }
6313
6314 /*
6315 * bridge_broadcast_list:
6316 *
6317 * Broadcast a list of packets to all members except `sbif`.
6318 * Consumes `m` before returning.
6319 *
6320 * NOTE: Releases the lock on return.
6321 */
6322 static void
6323 bridge_broadcast_list(struct bridge_softc *sc, struct bridge_iflist * sbif,
6324 ether_type_flag_t etypef, mbuf_t m, pkt_direction_t direction)
6325 {
6326 ifnet_t bridge_ifp;
6327 bool bridge_needs_input;
6328 struct bridge_iflist * dbif;
6329 bool is_bcast_mcast;
6330 errno_t error = 0;
6331 ChecksumOperation cksum_op;
6332 struct bridge_iflist * mac_nat_bif = sc->sc_mac_nat_bif;
6333 ifnet_t mac_nat_if = NULL;
6334 bool need_mac_nat = false;
6335 mbuf_t out_mac_nat = NULL;
6336 ifnet_t src_if;
6337 uint32_t sc_filter_flags;
6338 bool used = false;
6339
6340 bridge_ifp = sc->sc_ifp;
6341 if (sbif != NULL) {
6342 src_if = sbif->bif_ifp;
6343
6344 if (ether_type_flag_is_ip(etypef) && bif_uses_virtio(sbif)) {
6345 bool is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
6346
6347 /* compute checksum on packets marked with offload */
6348 m = bridge_checksum_offload_list(bridge_ifp, sbif,
6349 m, is_ipv4);
6350 if (m == NULL) {
6351 BRIDGE_UNLOCK(sc);
6352 goto done;
6353 }
6354 cksum_op = CHECKSUM_OPERATION_NONE;
6355 } else {
6356 cksum_op = CHECKSUM_OPERATION_CLEAR_OFFLOAD;
6357 }
6358
6359 /*
6360 * If MAC-NAT is enabled and we'll be sending the packets
6361 * over it, verify that it is up and active before
6362 * deciding to make a translated copy.
6363 */
6364 if (mac_nat_bif != NULL && sbif != mac_nat_bif) {
6365 mac_nat_if = mac_nat_bif->bif_ifp;
6366 if ((mac_nat_if->if_flags & IFF_RUNNING) != 0 &&
6367 (mac_nat_bif->bif_flags & BIFF_MEDIA_ACTIVE) != 0) {
6368 need_mac_nat = true;
6369 }
6370 }
6371 } else {
6372 /*
6373 * sbif is NULL when the bridge interface calls
6374 * bridge_broadcast_list() (TBD).
6375 */
6376 cksum_op = CHECKSUM_OPERATION_FINALIZE;
6377 src_if = NULL;
6378 }
6379
6380 /*
6381 * Create a translated copy for packets destined to MAC-NAT interface.
6382 */
6383 if (need_mac_nat) {
6384 out_mac_nat
6385 = bridge_mac_nat_copy_and_translate_list(sc, sbif,
6386 mac_nat_if, m);
6387 }
6388 sc_filter_flags = sc->sc_filter_flags;
6389 bridge_needs_input = (sc->sc_flags & SCF_PROTO_ATTACHED) != 0;
6390 BRIDGE_LOCK2REF(sc, error);
6391 if (error) {
6392 goto done;
6393 }
6394 is_bcast_mcast = IS_BCAST_MCAST(m);
6395
6396 /* make a copy for the bridge interface */
6397 if (sbif != NULL && is_bcast_mcast && bridge_needs_input) {
6398 mbuf_t in_list;
6399
6400 in_list = copy_packet_list(m);
6401 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MCAST,
6402 "%s mcast for us in_m %p",
6403 bridge_ifp->if_xname, in_list);
6404 if (in_list != NULL) {
6405 inject_input_packet_list(bridge_ifp, in_list, false);
6406 }
6407 }
6408
6409 TAILQ_FOREACH(dbif, &sc->sc_iflist, bif_next) {
6410 ifnet_t dst_if;
6411 mbuf_t in_m = NULL;
6412 mbuf_t out_m = NULL;
6413
6414 dst_if = dbif->bif_ifp;
6415 if (dst_if == src_if) {
6416 /* skip the interface that the packet came in on */
6417 continue;
6418 }
6419
6420 /* Private segments can not talk to each other */
6421 if (sbif != NULL &&
6422 (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE)) {
6423 continue;
6424 }
6425
6426 if ((dbif->bif_ifflags & IFBIF_STP) &&
6427 dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6428 continue;
6429 }
6430
6431 if ((dbif->bif_ifflags & IFBIF_DISCOVER) == 0 &&
6432 !is_bcast_mcast) {
6433 continue;
6434 }
6435
6436 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6437 continue;
6438 }
6439
6440 if ((dbif->bif_flags & BIFF_MEDIA_ACTIVE) == 0) {
6441 continue;
6442 }
6443 if (dbif == mac_nat_bif) {
6444 /* translated copy was created above, use that */
6445 out_m = out_mac_nat;
6446 out_mac_nat = NULL;
6447 } else if (TAILQ_NEXT(dbif, bif_next) == NULL) {
6448 /* consume `m` */
6449 out_m = m;
6450 used = true;
6451 } else {
6452 /* needs a copy */
6453 out_m = copy_packet_list(m);
6454 }
6455
6456 if (out_m == NULL) {
6457 ifnet_stat_increment_out(bridge_ifp, 0, 0, 1);
6458 continue;
6459 }
6460 /*
6461 * If broadcast input is enabled, do so only if this
6462 * is an input packet.
6463 */
6464 if (sbif != NULL && is_bcast_mcast &&
6465 (dbif->bif_flags & BIFF_INPUT_BROADCAST) != 0) {
6466 in_m = copy_packet_list(m);
6467 /* this could fail, but we continue anyways */
6468 } else {
6469 in_m = NULL;
6470 }
6471
6472 if (sbif != NULL &&
6473 PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6474 out_m = bridge_pf_list_out(out_m, dst_if,
6475 sc_filter_flags);
6476 }
6477 if (out_m != NULL) {
6478 /* verify checksum if necessary */
6479 if (sbif != NULL &&
6480 ether_type_flag_is_ip(etypef) &&
6481 bif_has_checksum_offload(dbif) &&
6482 !bif_has_checksum_offload(sbif)) {
6483 bool is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
6484
6485 out_m = bridge_verify_checksum_list(bridge_ifp,
6486 dbif, out_m, is_ipv4);
6487 }
6488 if (out_m != NULL) {
6489 bridge_enqueue(bridge_ifp, src_if, dst_if,
6490 etypef, out_m, cksum_op, direction);
6491 }
6492 }
6493
6494 /* in */
6495 if (in_m != NULL) {
6496 inject_input_packet_list(dst_if, in_m, true);
6497 }
6498 }
6499
6500 BRIDGE_UNREF(sc);
6501
6502 done:
6503 if (out_mac_nat != NULL) {
6504 m_freem_list(out_mac_nat);
6505 }
6506 if (!used) {
6507 m_freem_list(m);
6508 }
6509 return;
6510 }
6511
6512 #define NEEDED_CSUM_IPV4 (IF_HWASSIST_CSUM_UDP | IF_HWASSIST_CSUM_TCP)
6513 #define NEEDED_CSUM_IPV6 (IF_HWASSIST_CSUM_UDPIPV6 | IF_HWASSIST_CSUM_TCPIPV6)
6514
6515 static bool
6516 interface_supports_hw_checksum(ifnet_t ifp, bool is_ipv4)
6517 {
6518 uint32_t hwcap = IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
6519 uint32_t needed = is_ipv4 ? NEEDED_CSUM_IPV4 : NEEDED_CSUM_IPV6;
6520 bool supports;
6521
6522 supports = (hwcap & needed) == needed;
6523 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM, "%s: does %ssupport checksum",
6524 ifp->if_xname, supports ? "" : "not ");
6525 return supports;
6526 }
6527
6528 static void
6529 bridge_forward_list(struct bridge_softc *sc, struct bridge_iflist * sbif,
6530 ifnet_t dst_if, ether_type_flag_t etypef, mbuf_t m)
6531 {
6532 bool checksum_ok = false;
6533 ChecksumOperation cksum_op;
6534 ifnet_t bridge_ifp = NULL;
6535 struct bridge_iflist * dbif;
6536 uint32_t sc_filter_flags;
6537 ifnet_t src_if;
6538 drop_reason_t drop_reason = DROP_REASON_BRIDGE_UNSPECIFIED;
6539
6540 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6541 drop_reason = DROP_REASON_BRIDGE_NOT_RUNNING;
6542 goto drop;
6543 }
6544 dbif = bridge_lookup_member_if(sc, dst_if);
6545 if (dbif == NULL) {
6546 /* Not a member of the bridge (anymore?) */
6547 drop_reason = DROP_REASON_BRIDGE_NOT_A_MEMBER;
6548 goto drop;
6549 }
6550
6551 /* Private segments can not talk to each other */
6552 if ((sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE) != 0) {
6553 drop_reason = DROP_REASON_BRIDGE_PRIVATE_SEGMENT;
6554 goto drop;
6555 }
6556 bridge_ifp = sc->sc_ifp;
6557 src_if = sbif->bif_ifp;
6558 cksum_op = CHECKSUM_OPERATION_CLEAR_OFFLOAD;
6559 if (ether_type_flag_is_ip(etypef) && bif_uses_virtio(sbif)) {
6560 bool is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
6561
6562 if (dbif == sc->sc_mac_nat_bif ||
6563 (IFNET_IS_VMNET(dst_if) && !bif_uses_virtio(dbif)) ||
6564 !interface_supports_hw_checksum(dst_if, is_ipv4)) {
6565 /* compute checksums now if necessary */
6566 m = bridge_checksum_offload_list(bridge_ifp, sbif,
6567 m, is_ipv4);
6568 checksum_ok = true;
6569 } else {
6570 cksum_op = CHECKSUM_OPERATION_NONE;
6571 }
6572 }
6573
6574 if (dbif == sc->sc_mac_nat_bif) {
6575 /* translate the packets before forwarding them */
6576 if ((etypef & ETHER_TYPE_FLAG_IP_ARP) != 0) {
6577 m = bridge_mac_nat_translate_list(sc, sbif, dst_if, m);
6578 }
6579 } else if (!checksum_ok && ether_type_flag_is_ip(etypef) &&
6580 bif_has_checksum_offload(dbif) && !bif_has_checksum_offload(sbif)) {
6581 bool is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
6582
6583 /*
6584 * If the destination interface has checksum offload enabled,
6585 * verify the checksum now, unless the source interface also has
6586 * checksum offload enabled. The checksum in that case has
6587 * already just been computed and verifying it is unnecessary.
6588 */
6589 m = bridge_verify_checksum_list(bridge_ifp, dbif, m, is_ipv4);
6590 }
6591 sc_filter_flags = sc->sc_filter_flags;
6592 BRIDGE_UNLOCK(sc);
6593 if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6594 m = bridge_pf_list_out(m, dst_if, sc_filter_flags);
6595 }
6596
6597 /*
6598 * We're forwarding inbound packets for which the checksums must
6599 * already have been computed and if required, verified, or
6600 * packets from a virtio-enabled interface for which we rely
6601 * on the packet containing appropriate offload flags.
6602 */
6603 if (m != NULL) {
6604 bridge_enqueue(bridge_ifp, src_if, dst_if, etypef, m,
6605 cksum_op, pkt_direction_RX);
6606 }
6607 return;
6608
6609 drop:
6610 BRIDGE_UNLOCK(sc);
6611 m_drop_list(m, bridge_ifp, DROPTAP_FLAG_DIR_IN, drop_reason, NULL, 0);
6612 return;
6613 }
6614
6615 /*
6616 * bridge_span:
6617 *
6618 * Duplicate a packet out one or more interfaces that are in span mode,
6619 * the original mbuf is unmodified.
6620 */
6621 static void
6622 bridge_span(struct bridge_softc *sc, ether_type_flag_t etypef, struct mbuf *m)
6623 {
6624 struct bridge_iflist *bif;
6625 struct ifnet *dst_if;
6626 struct mbuf *mc;
6627
6628 if (TAILQ_EMPTY(&sc->sc_spanlist)) {
6629 return;
6630 }
6631
6632 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
6633 dst_if = bif->bif_ifp;
6634
6635 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6636 continue;
6637 }
6638
6639 mc = m_copypacket(m, M_DONTWAIT);
6640 if (mc == NULL) {
6641 (void) ifnet_stat_increment_out(sc->sc_ifp, 0, 0, 1);
6642 continue;
6643 }
6644
6645 (void) bridge_enqueue(sc->sc_ifp, NULL, dst_if, etypef, mc,
6646 CHECKSUM_OPERATION_NONE, pkt_direction_TX);
6647 }
6648 }
6649
6650 /*
6651 * bridge_rtupdate:
6652 *
6653 * Add a bridge routing entry.
6654 */
6655 static int
6656 bridge_rtupdate(struct bridge_softc *sc, const uint8_t dst[ETHER_ADDR_LEN], uint16_t vlan,
6657 struct bridge_iflist *bif, int setflags, uint8_t flags)
6658 {
6659 struct bridge_rtnode *brt;
6660 int error;
6661
6662 BRIDGE_LOCK_ASSERT_HELD(sc);
6663
6664 /* Check the source address is valid and not multicast. */
6665 if (ETHER_IS_MULTICAST(dst) ||
6666 (dst[0] == 0 && dst[1] == 0 && dst[2] == 0 &&
6667 dst[3] == 0 && dst[4] == 0 && dst[5] == 0) != 0) {
6668 return EINVAL;
6669 }
6670
6671 /* 802.1p frames map to vlan 1 */
6672 if (vlan == 0) {
6673 vlan = 1;
6674 }
6675
6676 /*
6677 * A route for this destination might already exist. If so,
6678 * update it, otherwise create a new one.
6679 */
6680 if ((brt = bridge_rtnode_lookup(sc, dst, vlan)) == NULL) {
6681 if (sc->sc_brtcnt >= sc->sc_brtmax) {
6682 sc->sc_brtexceeded++;
6683 return ENOSPC;
6684 }
6685 /* Check per interface address limits (if enabled) */
6686 if (bif->bif_addrmax && bif->bif_addrcnt >= bif->bif_addrmax) {
6687 bif->bif_addrexceeded++;
6688 return ENOSPC;
6689 }
6690
6691 /*
6692 * Allocate a new bridge forwarding node, and
6693 * initialize the expiration time and Ethernet
6694 * address.
6695 */
6696 brt = zalloc_noblock(bridge_rtnode_pool);
6697 if (brt == NULL) {
6698 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6699 "zalloc_nolock failed");
6700 return ENOMEM;
6701 }
6702 bzero(brt, sizeof(struct bridge_rtnode));
6703
6704 if (bif->bif_ifflags & IFBIF_STICKY) {
6705 brt->brt_flags = IFBAF_STICKY;
6706 } else {
6707 brt->brt_flags = IFBAF_DYNAMIC;
6708 }
6709
6710 memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
6711 brt->brt_vlan = vlan;
6712
6713 if ((error = bridge_rtnode_insert(sc, brt)) != 0) {
6714 zfree(bridge_rtnode_pool, brt);
6715 return error;
6716 }
6717 brt->brt_dst = bif;
6718 bif->bif_addrcnt++;
6719 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6720 "added %02x:%02x:%02x:%02x:%02x:%02x "
6721 "on %s count %u hashsize %u",
6722 dst[0], dst[1], dst[2], dst[3], dst[4], dst[5],
6723 sc->sc_ifp->if_xname, sc->sc_brtcnt,
6724 sc->sc_rthash_size);
6725 }
6726
6727 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
6728 brt->brt_dst != bif) {
6729 brt->brt_dst->bif_addrcnt--;
6730 brt->brt_dst = bif;
6731 brt->brt_dst->bif_addrcnt++;
6732 }
6733
6734 if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6735 unsigned long now;
6736
6737 now = (unsigned long) net_uptime();
6738 brt->brt_expire = now + sc->sc_brttimeout;
6739 }
6740 if (setflags) {
6741 brt->brt_flags = flags;
6742 }
6743
6744 return 0;
6745 }
6746
6747 /*
6748 * bridge_rtlookup:
6749 *
6750 * Lookup the destination interface for an address.
6751 */
6752 static struct bridge_iflist *
6753 bridge_rtlookup_bif(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN],
6754 uint16_t vlan)
6755 {
6756 struct bridge_rtnode *brt;
6757
6758 BRIDGE_LOCK_ASSERT_HELD(sc);
6759
6760 if ((brt = bridge_rtnode_lookup(sc, addr, vlan)) == NULL) {
6761 return NULL;
6762 }
6763
6764 return brt->brt_dst;
6765 }
6766
6767 /*
6768 * bridge_rttrim:
6769 *
6770 * Trim the routine table so that we have a number
6771 * of routing entries less than or equal to the
6772 * maximum number.
6773 */
6774 static void
6775 bridge_rttrim(struct bridge_softc *sc)
6776 {
6777 struct bridge_rtnode *brt, *nbrt;
6778
6779 BRIDGE_LOCK_ASSERT_HELD(sc);
6780
6781 /* Make sure we actually need to do this. */
6782 if (sc->sc_brtcnt <= sc->sc_brtmax) {
6783 return;
6784 }
6785
6786 /* Force an aging cycle; this might trim enough addresses. */
6787 bridge_rtage(sc);
6788 if (sc->sc_brtcnt <= sc->sc_brtmax) {
6789 return;
6790 }
6791
6792 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6793 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6794 bridge_rtnode_destroy(sc, brt);
6795 if (sc->sc_brtcnt <= sc->sc_brtmax) {
6796 return;
6797 }
6798 }
6799 }
6800 }
6801
6802 /*
6803 * bridge_aging_timer:
6804 *
6805 * Aging periodic timer for the bridge routing table.
6806 */
6807 static void
6808 bridge_aging_timer(struct bridge_softc *sc)
6809 {
6810 BRIDGE_LOCK_ASSERT_HELD(sc);
6811
6812 bridge_rtage(sc);
6813 if ((sc->sc_ifp->if_flags & IFF_RUNNING) &&
6814 (sc->sc_flags & SCF_DETACHING) == 0) {
6815 sc->sc_aging_timer.bdc_sc = sc;
6816 sc->sc_aging_timer.bdc_func = bridge_aging_timer;
6817 sc->sc_aging_timer.bdc_ts.tv_sec = bridge_rtable_prune_period;
6818 bridge_schedule_delayed_call(&sc->sc_aging_timer);
6819 }
6820 }
6821
6822 /*
6823 * bridge_rtage:
6824 *
6825 * Perform an aging cycle.
6826 */
6827 static void
6828 bridge_rtage(struct bridge_softc *sc)
6829 {
6830 struct bridge_rtnode *brt, *nbrt;
6831 unsigned long now;
6832
6833 BRIDGE_LOCK_ASSERT_HELD(sc);
6834
6835 now = (unsigned long) net_uptime();
6836
6837 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6838 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6839 if (now >= brt->brt_expire) {
6840 bridge_rtnode_destroy(sc, brt);
6841 }
6842 }
6843 }
6844 if (sc->sc_mac_nat_bif != NULL) {
6845 bridge_mac_nat_age_entries(sc, now);
6846 }
6847 }
6848
6849 /*
6850 * bridge_rtflush:
6851 *
6852 * Remove all dynamic addresses from the bridge.
6853 */
6854 static void
6855 bridge_rtflush(struct bridge_softc *sc, int full)
6856 {
6857 struct bridge_rtnode *brt, *nbrt;
6858
6859 BRIDGE_LOCK_ASSERT_HELD(sc);
6860
6861 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6862 if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6863 bridge_rtnode_destroy(sc, brt);
6864 }
6865 }
6866 }
6867
6868 /*
6869 * bridge_rtdaddr:
6870 *
6871 * Remove an address from the table.
6872 */
6873 static int
6874 bridge_rtdaddr(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN], uint16_t vlan)
6875 {
6876 struct bridge_rtnode *brt;
6877 int found = 0;
6878
6879 BRIDGE_LOCK_ASSERT_HELD(sc);
6880
6881 /*
6882 * If vlan is zero then we want to delete for all vlans so the lookup
6883 * may return more than one.
6884 */
6885 while ((brt = bridge_rtnode_lookup(sc, addr, vlan)) != NULL) {
6886 bridge_rtnode_destroy(sc, brt);
6887 found = 1;
6888 }
6889
6890 return found ? 0 : ENOENT;
6891 }
6892
6893 /*
6894 * bridge_rtdelete:
6895 *
6896 * Delete routes to a specific member interface.
6897 */
6898 static void
6899 bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int full)
6900 {
6901 struct bridge_rtnode *brt, *nbrt;
6902
6903 BRIDGE_LOCK_ASSERT_HELD(sc);
6904
6905 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6906 if (brt->brt_ifp == ifp && (full ||
6907 (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)) {
6908 bridge_rtnode_destroy(sc, brt);
6909 }
6910 }
6911 }
6912
6913 /*
6914 * bridge_rtable_init:
6915 *
6916 * Initialize the route table for this bridge.
6917 */
6918 static int
6919 bridge_rtable_init(struct bridge_softc *sc)
6920 {
6921 u_int32_t i;
6922
6923 sc->sc_rthash = kalloc_type(struct _bridge_rtnode_list,
6924 BRIDGE_RTHASH_SIZE, Z_WAITOK_ZERO_NOFAIL);
6925 sc->sc_rthash_size = BRIDGE_RTHASH_SIZE;
6926
6927 for (i = 0; i < sc->sc_rthash_size; i++) {
6928 LIST_INIT(&sc->sc_rthash[i]);
6929 }
6930
6931 sc->sc_rthash_key = RandomULong();
6932
6933 LIST_INIT(&sc->sc_rtlist);
6934
6935 return 0;
6936 }
6937
6938 /*
6939 * bridge_rthash_delayed_resize:
6940 *
6941 * Resize the routing table hash on a delayed thread call.
6942 */
6943 static void
6944 bridge_rthash_delayed_resize(struct bridge_softc *sc)
6945 {
6946 u_int32_t new_rthash_size = 0;
6947 u_int32_t old_rthash_size = 0;
6948 struct _bridge_rtnode_list *new_rthash = NULL;
6949 struct _bridge_rtnode_list *old_rthash = NULL;
6950 u_int32_t i;
6951 struct bridge_rtnode *brt;
6952 int error = 0;
6953
6954 BRIDGE_LOCK_ASSERT_HELD(sc);
6955
6956 /*
6957 * Four entries per hash bucket is our ideal load factor
6958 */
6959 if (sc->sc_brtcnt < sc->sc_rthash_size * 4) {
6960 goto out;
6961 }
6962
6963 /*
6964 * Doubling the number of hash buckets may be too simplistic
6965 * especially when facing a spike of new entries
6966 */
6967 new_rthash_size = sc->sc_rthash_size * 2;
6968
6969 sc->sc_flags |= SCF_RESIZING;
6970 BRIDGE_UNLOCK(sc);
6971
6972 new_rthash = kalloc_type(struct _bridge_rtnode_list, new_rthash_size,
6973 Z_WAITOK | Z_ZERO);
6974
6975 BRIDGE_LOCK(sc);
6976 sc->sc_flags &= ~SCF_RESIZING;
6977
6978 if (new_rthash == NULL) {
6979 error = ENOMEM;
6980 goto out;
6981 }
6982 if ((sc->sc_flags & SCF_DETACHING)) {
6983 error = ENODEV;
6984 goto out;
6985 }
6986 /*
6987 * Fail safe from here on
6988 */
6989 old_rthash = sc->sc_rthash;
6990 old_rthash_size = sc->sc_rthash_size;
6991 sc->sc_rthash = new_rthash;
6992 sc->sc_rthash_size = new_rthash_size;
6993
6994 /*
6995 * Get a new key to force entries to be shuffled around to reduce
6996 * the likelihood they will land in the same buckets
6997 */
6998 sc->sc_rthash_key = RandomULong();
6999
7000 for (i = 0; i < sc->sc_rthash_size; i++) {
7001 LIST_INIT(&sc->sc_rthash[i]);
7002 }
7003
7004 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
7005 LIST_REMOVE(brt, brt_hash);
7006 (void) bridge_rtnode_hash(sc, brt);
7007 }
7008 out:
7009 if (error == 0) {
7010 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
7011 "%s new size %u",
7012 sc->sc_ifp->if_xname, sc->sc_rthash_size);
7013 kfree_type(struct _bridge_rtnode_list, old_rthash_size, old_rthash);
7014 } else {
7015 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_RT_TABLE,
7016 "%s failed %d", sc->sc_ifp->if_xname, error);
7017 kfree_type(struct _bridge_rtnode_list, new_rthash_size, new_rthash);
7018 }
7019 }
7020
7021 /*
7022 * Resize the number of hash buckets based on the load factor
7023 * Currently only grow
7024 * Failing to resize the hash table is not fatal
7025 */
7026 static void
7027 bridge_rthash_resize(struct bridge_softc *sc)
7028 {
7029 BRIDGE_LOCK_ASSERT_HELD(sc);
7030
7031 if ((sc->sc_flags & SCF_DETACHING) || (sc->sc_flags & SCF_RESIZING)) {
7032 return;
7033 }
7034
7035 /*
7036 * Four entries per hash bucket is our ideal load factor
7037 */
7038 if (sc->sc_brtcnt < sc->sc_rthash_size * 4) {
7039 return;
7040 }
7041 /*
7042 * Hard limit on the size of the routing hash table
7043 */
7044 if (sc->sc_rthash_size >= bridge_rtable_hash_size_max) {
7045 return;
7046 }
7047
7048 sc->sc_resize_call.bdc_sc = sc;
7049 sc->sc_resize_call.bdc_func = bridge_rthash_delayed_resize;
7050 bridge_schedule_delayed_call(&sc->sc_resize_call);
7051 }
7052
7053 /*
7054 * bridge_rtable_fini:
7055 *
7056 * Deconstruct the route table for this bridge.
7057 */
7058 static void
7059 bridge_rtable_fini(struct bridge_softc *sc)
7060 {
7061 KASSERT(sc->sc_brtcnt == 0,
7062 ("%s: %d bridge routes referenced", __func__, sc->sc_brtcnt));
7063 kfree_type_counted_by(struct _bridge_rtnode_list, sc->sc_rthash_size,
7064 sc->sc_rthash);
7065 sc->sc_rthash = NULL;
7066 sc->sc_rthash_size = 0;
7067 }
7068
7069 /*
7070 * The following hash function is adapted from "Hash Functions" by Bob Jenkins
7071 * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
7072 */
7073 #define mix(a, b, c) \
7074 do { \
7075 a -= b; a -= c; a ^= (c >> 13); \
7076 b -= c; b -= a; b ^= (a << 8); \
7077 c -= a; c -= b; c ^= (b >> 13); \
7078 a -= b; a -= c; a ^= (c >> 12); \
7079 b -= c; b -= a; b ^= (a << 16); \
7080 c -= a; c -= b; c ^= (b >> 5); \
7081 a -= b; a -= c; a ^= (c >> 3); \
7082 b -= c; b -= a; b ^= (a << 10); \
7083 c -= a; c -= b; c ^= (b >> 15); \
7084 } while ( /*CONSTCOND*/ 0)
7085
7086 static __inline uint32_t
7087 bridge_rthash(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN])
7088 {
7089 uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
7090
7091 b += addr[5] << 8;
7092 b += addr[4];
7093 a += addr[3] << 24;
7094 a += addr[2] << 16;
7095 a += addr[1] << 8;
7096 a += addr[0];
7097
7098 mix(a, b, c);
7099
7100 return c & BRIDGE_RTHASH_MASK(sc);
7101 }
7102
7103 #undef mix
7104
7105 static int
7106 bridge_rtnode_addr_cmp(const uint8_t a[ETHER_ADDR_LEN], const uint8_t b[ETHER_ADDR_LEN])
7107 {
7108 int i, d;
7109
7110 for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) {
7111 d = ((int)a[i]) - ((int)b[i]);
7112 }
7113
7114 return d;
7115 }
7116
7117 /*
7118 * bridge_rtnode_lookup:
7119 *
7120 * Look up a bridge route node for the specified destination. Compare the
7121 * vlan id or if zero then just return the first match.
7122 */
7123 static struct bridge_rtnode *
7124 bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN],
7125 uint16_t vlan)
7126 {
7127 struct bridge_rtnode *brt;
7128 uint32_t hash;
7129 int dir;
7130
7131 BRIDGE_LOCK_ASSERT_HELD(sc);
7132
7133 hash = bridge_rthash(sc, addr);
7134 LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) {
7135 dir = bridge_rtnode_addr_cmp(addr, brt->brt_addr);
7136 if (dir == 0 && (brt->brt_vlan == vlan || vlan == 0)) {
7137 return brt;
7138 }
7139 if (dir > 0) {
7140 return NULL;
7141 }
7142 }
7143
7144 return NULL;
7145 }
7146
7147 /*
7148 * bridge_rtnode_hash:
7149 *
7150 * Insert the specified bridge node into the route hash table.
7151 * This is used when adding a new node or to rehash when resizing
7152 * the hash table
7153 */
7154 static int
7155 bridge_rtnode_hash(struct bridge_softc *sc, struct bridge_rtnode *brt)
7156 {
7157 struct bridge_rtnode *lbrt;
7158 uint32_t hash;
7159 int dir;
7160
7161 BRIDGE_LOCK_ASSERT_HELD(sc);
7162
7163 hash = bridge_rthash(sc, brt->brt_addr);
7164
7165 lbrt = LIST_FIRST(&sc->sc_rthash[hash]);
7166 if (lbrt == NULL) {
7167 LIST_INSERT_HEAD(&sc->sc_rthash[hash], brt, brt_hash);
7168 goto out;
7169 }
7170
7171 do {
7172 dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr);
7173 if (dir == 0 && brt->brt_vlan == lbrt->brt_vlan) {
7174 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
7175 "%s EEXIST %02x:%02x:%02x:%02x:%02x:%02x",
7176 sc->sc_ifp->if_xname,
7177 brt->brt_addr[0], brt->brt_addr[1],
7178 brt->brt_addr[2], brt->brt_addr[3],
7179 brt->brt_addr[4], brt->brt_addr[5]);
7180 return EEXIST;
7181 }
7182 if (dir > 0) {
7183 LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
7184 goto out;
7185 }
7186 if (LIST_NEXT(lbrt, brt_hash) == NULL) {
7187 LIST_INSERT_AFTER(lbrt, brt, brt_hash);
7188 goto out;
7189 }
7190 lbrt = LIST_NEXT(lbrt, brt_hash);
7191 } while (lbrt != NULL);
7192
7193 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
7194 "%s impossible %02x:%02x:%02x:%02x:%02x:%02x",
7195 sc->sc_ifp->if_xname,
7196 brt->brt_addr[0], brt->brt_addr[1], brt->brt_addr[2],
7197 brt->brt_addr[3], brt->brt_addr[4], brt->brt_addr[5]);
7198 out:
7199 return 0;
7200 }
7201
7202 /*
7203 * bridge_rtnode_insert:
7204 *
7205 * Insert the specified bridge node into the route table. We
7206 * assume the entry is not already in the table.
7207 */
7208 static int
7209 bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
7210 {
7211 int error;
7212
7213 error = bridge_rtnode_hash(sc, brt);
7214 if (error != 0) {
7215 return error;
7216 }
7217
7218 LIST_INSERT_HEAD(&sc->sc_rtlist, brt, brt_list);
7219 sc->sc_brtcnt++;
7220
7221 bridge_rthash_resize(sc);
7222
7223 return 0;
7224 }
7225
7226 /*
7227 * bridge_rtnode_destroy:
7228 *
7229 * Destroy a bridge rtnode.
7230 */
7231 static void
7232 bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
7233 {
7234 BRIDGE_LOCK_ASSERT_HELD(sc);
7235
7236 LIST_REMOVE(brt, brt_hash);
7237
7238 LIST_REMOVE(brt, brt_list);
7239 sc->sc_brtcnt--;
7240 brt->brt_dst->bif_addrcnt--;
7241 zfree(bridge_rtnode_pool, brt);
7242 }
7243
7244 #if BRIDGESTP
7245 /*
7246 * bridge_rtable_expire:
7247 *
7248 * Set the expiry time for all routes on an interface.
7249 */
7250 static void
7251 bridge_rtable_expire(struct ifnet *ifp, int age)
7252 {
7253 struct bridge_softc *sc = ifp->if_bridge;
7254 struct bridge_rtnode *brt;
7255
7256 BRIDGE_LOCK(sc);
7257
7258 /*
7259 * If the age is zero then flush, otherwise set all the expiry times to
7260 * age for the interface
7261 */
7262 if (age == 0) {
7263 bridge_rtdelete(sc, ifp, IFBF_FLUSHDYN);
7264 } else {
7265 unsigned long now;
7266
7267 now = (unsigned long) net_uptime();
7268
7269 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
7270 /* Cap the expiry time to 'age' */
7271 if (brt->brt_ifp == ifp &&
7272 brt->brt_expire > now + age &&
7273 (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
7274 brt->brt_expire = now + age;
7275 }
7276 }
7277 }
7278 BRIDGE_UNLOCK(sc);
7279 }
7280
7281 /*
7282 * bridge_state_change:
7283 *
7284 * Callback from the bridgestp code when a port changes states.
7285 */
7286 static void
7287 bridge_state_change(struct ifnet *ifp, int state)
7288 {
7289 struct bridge_softc *sc = ifp->if_bridge;
7290 static const char *stpstates[] = {
7291 "disabled",
7292 "listening",
7293 "learning",
7294 "forwarding",
7295 "blocking",
7296 "discarding"
7297 };
7298
7299 if (log_stp) {
7300 log(LOG_NOTICE, "%s: state changed to %s on %s",
7301 sc->sc_ifp->if_xname,
7302 stpstates[state], ifp->if_xname);
7303 }
7304 }
7305 #endif /* BRIDGESTP */
7306
7307 /*
7308 * bridge_detach:
7309 *
7310 * Callback when interface has been detached.
7311 */
7312 static void
7313 bridge_detach(ifnet_t ifp)
7314 {
7315 struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7316
7317 #if BRIDGESTP
7318 bstp_detach(&sc->sc_stp);
7319 #endif /* BRIDGESTP */
7320
7321 /* Tear down the routing table. */
7322 bridge_rtable_fini(sc);
7323
7324 lck_mtx_lock(&bridge_list_mtx);
7325 LIST_REMOVE(sc, sc_list);
7326 lck_mtx_unlock(&bridge_list_mtx);
7327
7328 ifnet_release(ifp);
7329
7330 lck_mtx_destroy(&sc->sc_mtx, &bridge_lock_grp);
7331 kfree_type(struct bridge_softc, sc);
7332 }
7333
7334 /*
7335 * bridge_link_event:
7336 *
7337 * Report a data link event on an interface
7338 */
7339 static void
7340 bridge_link_event(struct ifnet *ifp, u_int32_t event_code)
7341 {
7342 struct event {
7343 u_int32_t ifnet_family;
7344 u_int32_t unit;
7345 char if_name[IFNAMSIZ];
7346 };
7347 _Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
7348 struct kern_event_msg *header = (struct kern_event_msg*)message;
7349 struct event *data = (struct event *)(message + KEV_MSG_HEADER_SIZE);
7350
7351 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
7352 "%s event_code %u - %s", ifp->if_xname,
7353 event_code, dlil_kev_dl_code_str(event_code));
7354 header->total_size = sizeof(message);
7355 header->vendor_code = KEV_VENDOR_APPLE;
7356 header->kev_class = KEV_NETWORK_CLASS;
7357 header->kev_subclass = KEV_DL_SUBCLASS;
7358 header->event_code = event_code;
7359 data->ifnet_family = ifnet_family(ifp);
7360 data->unit = (u_int32_t)ifnet_unit(ifp);
7361 strlcpy(data->if_name, ifnet_name(ifp), IFNAMSIZ);
7362 ifnet_event(ifp, header);
7363 }
7364
7365 #define BRIDGE_HF_DROP(reason, func, line) { \
7366 bridge_hostfilter_stats.reason++; \
7367 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_HOSTFILTER, \
7368 "%s.%d" #reason, func, line); \
7369 error = EINVAL; \
7370 }
7371
7372 static int
7373 bridge_host_filter_arp(struct bridge_iflist *bif, mbuf_t *data)
7374 {
7375 struct ether_arp *ea;
7376 struct ether_header *eh;
7377 int error = EINVAL;
7378 mbuf_t m = *data;
7379 size_t minlen = sizeof(struct ether_header) + sizeof(struct ether_arp);
7380
7381 /*
7382 * Make the Ethernet and ARP headers contiguous
7383 */
7384 if (mbuf_pkthdr_len(m) < minlen) {
7385 BRIDGE_HF_DROP(brhf_arp_too_small, __func__, __LINE__);
7386 goto done;
7387 }
7388 if (mbuf_len(m) < minlen && mbuf_pullup(data, minlen) != 0) {
7389 BRIDGE_HF_DROP(brhf_arp_pullup_failed,
7390 __func__, __LINE__);
7391 goto done;
7392 }
7393 m = *data;
7394
7395 /*
7396 * Restrict Ethernet protocols to ARP and IP/IPv6
7397 */
7398 eh = mtod(m, struct ether_header *);
7399 ea = (struct ether_arp *)(eh + 1);
7400 if (ea->arp_hrd != HTONS_ARPHRD_ETHER) {
7401 BRIDGE_HF_DROP(brhf_arp_bad_hw_type,
7402 __func__, __LINE__);
7403 goto done;
7404 }
7405 if (ea->arp_pro != HTONS_ETHERTYPE_IP) {
7406 BRIDGE_HF_DROP(brhf_arp_bad_pro_type,
7407 __func__, __LINE__);
7408 goto done;
7409 }
7410 /*
7411 * Verify the address lengths are correct
7412 */
7413 if (ea->arp_hln != ETHER_ADDR_LEN) {
7414 BRIDGE_HF_DROP(brhf_arp_bad_hw_len, __func__, __LINE__);
7415 goto done;
7416 }
7417 if (ea->arp_pln != sizeof(struct in_addr)) {
7418 BRIDGE_HF_DROP(brhf_arp_bad_pro_len,
7419 __func__, __LINE__);
7420 goto done;
7421 }
7422 /*
7423 * Allow only ARP request or ARP reply
7424 */
7425 if (ea->arp_op != HTONS_ARPOP_REQUEST &&
7426 ea->arp_op != HTONS_ARPOP_REPLY) {
7427 BRIDGE_HF_DROP(brhf_arp_bad_op, __func__, __LINE__);
7428 goto done;
7429 }
7430 if ((bif->bif_flags & BIFF_HF_HWSRC) != 0) {
7431 /*
7432 * Verify source hardware address matches
7433 */
7434 if (bcmp(ea->arp_sha, bif->bif_hf_hwsrc,
7435 ETHER_ADDR_LEN) != 0) {
7436 BRIDGE_HF_DROP(brhf_arp_bad_sha, __func__, __LINE__);
7437 goto done;
7438 }
7439 }
7440 if ((bif->bif_flags & BIFF_HF_IPSRC) != 0) {
7441 /*
7442 * Verify source protocol address:
7443 * May be null for an ARP probe
7444 */
7445 if (bcmp(ea->arp_spa, &bif->bif_hf_ipsrc.s_addr,
7446 sizeof(struct in_addr)) != 0 &&
7447 bcmp(ea->arp_spa, &inaddr_any,
7448 sizeof(struct in_addr)) != 0) {
7449 BRIDGE_HF_DROP(brhf_arp_bad_spa, __func__, __LINE__);
7450 goto done;
7451 }
7452 }
7453 bridge_hostfilter_stats.brhf_arp_ok += 1;
7454 error = 0;
7455 done:
7456 return error;
7457 }
7458
7459 /*
7460 * MAC NAT
7461 */
7462
7463 static errno_t
7464 bridge_mac_nat_enable(struct bridge_softc *sc, struct bridge_iflist *bif)
7465 {
7466 errno_t error = 0;
7467
7468 BRIDGE_LOCK_ASSERT_HELD(sc);
7469
7470 if (IFNET_IS_VMNET(bif->bif_ifp)) {
7471 error = EINVAL;
7472 goto done;
7473 }
7474 if (sc->sc_mac_nat_bif != NULL) {
7475 if (sc->sc_mac_nat_bif != bif) {
7476 error = EBUSY;
7477 }
7478 goto done;
7479 }
7480 sc->sc_mac_nat_bif = bif;
7481 bif->bif_ifflags |= IFBIF_MAC_NAT;
7482 bridge_mac_nat_populate_entries(sc);
7483
7484 done:
7485 return error;
7486 }
7487
7488 static void
7489 bridge_mac_nat_disable(struct bridge_softc *sc)
7490 {
7491 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7492
7493 assert(mac_nat_bif != NULL);
7494 bridge_mac_nat_flush_entries(sc, mac_nat_bif);
7495 mac_nat_bif->bif_ifflags &= ~IFBIF_MAC_NAT;
7496 sc->sc_mac_nat_bif = NULL;
7497 return;
7498 }
7499
7500 static void
7501 mac_nat_entry_print2(struct mac_nat_entry *mne,
7502 const char ifname[IFNAMSIZ], const char *msg1, const char *msg2)
7503 {
7504 int af;
7505 char etopbuf[24];
7506 char ntopbuf[MAX_IPv6_STR_LEN];
7507 const char *space;
7508
7509 af = ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) ? AF_INET6 : AF_INET;
7510 ether_ntop(etopbuf, sizeof(etopbuf), mne->mne_mac);
7511 (void)inet_ntop(af, &mne->mne_u, ntopbuf, sizeof(ntopbuf));
7512 if (msg2 == NULL) {
7513 msg2 = "";
7514 space = "";
7515 } else {
7516 space = " ";
7517 }
7518 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7519 "%.*s %s%s%s %p (%s, %s, %s)", IFNAMSIZ, ifname, msg1, space, msg2, mne,
7520 mne->mne_bif->bif_ifp->if_xname, ntopbuf, etopbuf);
7521 }
7522
7523 static void
7524 mac_nat_entry_print(struct mac_nat_entry *mne,
7525 const char ifname[IFNAMSIZ], const char *msg)
7526 {
7527 mac_nat_entry_print2(mne, ifname, msg, NULL);
7528 }
7529
7530 static struct mac_nat_entry *
7531 bridge_lookup_mac_nat_entry_ipv4(const struct bridge_softc *sc, const struct in_addr *ip)
7532 {
7533 struct mac_nat_entry *mne;
7534 struct mac_nat_entry *ret_mne = NULL;
7535
7536 LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
7537 if (mne->mne_ip.s_addr == ip->s_addr) {
7538 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7539 mac_nat_entry_print(mne, sc->sc_if_xname,
7540 "found");
7541 }
7542 ret_mne = mne;
7543 break;
7544 }
7545 }
7546
7547 return ret_mne;
7548 }
7549
7550 static struct mac_nat_entry *
7551 bridge_lookup_mac_nat_entry_ipv6(const struct bridge_softc *sc, const struct in6_addr *ip6)
7552 {
7553 struct mac_nat_entry *mne;
7554 struct mac_nat_entry *ret_mne = NULL;
7555
7556 LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
7557 if (IN6_ARE_ADDR_EQUAL(&mne->mne_ip6, ip6)) {
7558 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7559 mac_nat_entry_print(mne, sc->sc_if_xname,
7560 "found");
7561 }
7562 ret_mne = mne;
7563 break;
7564 }
7565 }
7566
7567 return ret_mne;
7568 }
7569
7570 static void
7571 bridge_destroy_mac_nat_entry(struct bridge_softc *sc,
7572 struct mac_nat_entry *mne, const char *reason)
7573 {
7574 LIST_REMOVE(mne, mne_list);
7575 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7576 mac_nat_entry_print(mne, sc->sc_if_xname, reason);
7577 }
7578 zfree(bridge_mne_pool, mne);
7579 sc->sc_mne_count--;
7580 }
7581
7582 static struct mac_nat_entry *
7583 bridge_create_mac_nat_entry_common(struct bridge_softc *sc,
7584 struct bridge_iflist *bif, const char eaddr[ETHER_ADDR_LEN])
7585 {
7586 struct mac_nat_entry *mne;
7587
7588 if (sc->sc_mne_count >= sc->sc_mne_max) {
7589 sc->sc_mne_allocation_failures++;
7590 return NULL;
7591 }
7592
7593 mne = zalloc_noblock(bridge_mne_pool);
7594 if (mne == NULL) {
7595 sc->sc_mne_allocation_failures++;
7596 return NULL;
7597 }
7598
7599 sc->sc_mne_count++;
7600 bzero(mne, sizeof(*mne));
7601 bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
7602
7603 mne->mne_bif = bif;
7604 mne->mne_expire = (unsigned long)net_uptime() + sc->sc_brttimeout;
7605
7606 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7607 mac_nat_entry_print(mne, sc->sc_if_xname, "created");
7608 }
7609
7610 return mne;
7611 }
7612
7613 static struct mac_nat_entry *
7614 bridge_create_mac_nat_entry_ipv4(struct bridge_softc *sc,
7615 struct bridge_iflist *bif, const struct in_addr *ip, const char eaddr[ETHER_ADDR_LEN])
7616 {
7617 struct mac_nat_entry *mne;
7618
7619 mne = bridge_create_mac_nat_entry_common(sc, bif, eaddr);
7620 if (mne == NULL) {
7621 return NULL;
7622 }
7623
7624 bcopy(ip, &mne->mne_ip, sizeof(mne->mne_ip));
7625 LIST_INSERT_HEAD(&sc->sc_mne_list, mne, mne_list);
7626
7627 return mne;
7628 }
7629
7630 static struct mac_nat_entry *
7631 bridge_create_mac_nat_entry_ipv6(struct bridge_softc *sc,
7632 struct bridge_iflist *bif, const struct in6_addr *ip6, const char eaddr[ETHER_ADDR_LEN])
7633 {
7634 struct mac_nat_entry *mne;
7635
7636 mne = bridge_create_mac_nat_entry_common(sc, bif, eaddr);
7637 if (mne == NULL) {
7638 return NULL;
7639 }
7640
7641 bcopy(ip6, &mne->mne_ip6, sizeof(mne->mne_ip6));
7642 mne->mne_flags |= MNE_FLAGS_IPV6;
7643 LIST_INSERT_HEAD(&sc->sc_mne_list_v6, mne, mne_list);
7644
7645 return mne;
7646 }
7647
7648 static struct mac_nat_entry *
7649 bridge_update_mac_nat_entry_common(struct bridge_softc *sc, struct bridge_iflist *bif,
7650 struct mac_nat_entry *mne, const char eaddr[ETHER_ADDR_LEN])
7651 {
7652 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7653
7654 if (mne->mne_bif == mac_nat_bif) {
7655 /* the MAC NAT interface takes precedence */
7656 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7657 if (mne->mne_bif != bif) {
7658 mac_nat_entry_print2(mne,
7659 sc->sc_if_xname, "reject",
7660 bif->bif_ifp->if_xname);
7661 }
7662 }
7663 } else if (mne->mne_bif != bif) {
7664 const char *__null_terminated old_if = mne->mne_bif->bif_ifp->if_xname;
7665
7666 mne->mne_bif = bif;
7667 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7668 mac_nat_entry_print2(mne,
7669 sc->sc_if_xname, "replaced",
7670 old_if);
7671 }
7672 bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
7673 }
7674
7675 mne->mne_expire = (unsigned long)net_uptime() + sc->sc_brttimeout;
7676
7677 return mne;
7678 }
7679
7680 static struct mac_nat_entry *
7681 bridge_update_mac_nat_entry_ipv4(struct bridge_softc *sc,
7682 struct bridge_iflist *bif, struct in_addr *ip, const char eaddr[ETHER_ADDR_LEN])
7683 {
7684 struct mac_nat_entry *mne;
7685
7686 mne = bridge_lookup_mac_nat_entry_ipv4(sc, ip);
7687 if (mne != NULL) {
7688 return bridge_update_mac_nat_entry_common(sc, bif, mne, eaddr);
7689 }
7690
7691 mne = bridge_create_mac_nat_entry_ipv4(sc, bif, ip, eaddr);
7692 return mne;
7693 }
7694
7695 static struct mac_nat_entry *
7696 bridge_update_mac_nat_entry_ipv6(struct bridge_softc *sc,
7697 struct bridge_iflist *bif, struct in6_addr *ip6, const char eaddr[ETHER_ADDR_LEN])
7698 {
7699 struct mac_nat_entry *mne;
7700
7701 mne = bridge_lookup_mac_nat_entry_ipv6(sc, ip6);
7702 if (mne != NULL) {
7703 return bridge_update_mac_nat_entry_common(sc, bif, mne, eaddr);
7704 }
7705
7706 mne = bridge_create_mac_nat_entry_ipv6(sc, bif, ip6, eaddr);
7707 return mne;
7708 }
7709
7710 static void
7711 bridge_mac_nat_flush_entries_common(struct bridge_softc *sc,
7712 struct mac_nat_entry_list *list, struct bridge_iflist *bif)
7713 {
7714 struct mac_nat_entry *mne;
7715 struct mac_nat_entry *tmne;
7716
7717 LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
7718 if (bif != NULL && mne->mne_bif != bif) {
7719 continue;
7720 }
7721 bridge_destroy_mac_nat_entry(sc, mne, "flushed");
7722 }
7723 }
7724
7725 /*
7726 * bridge_mac_nat_flush_entries:
7727 *
7728 * Flush MAC NAT entries for the specified member. Flush all entries if
7729 * the member is the one that requires MAC NAT, otherwise just flush the
7730 * ones for the specified member.
7731 */
7732 static void
7733 bridge_mac_nat_flush_entries(struct bridge_softc *sc, struct bridge_iflist * bif)
7734 {
7735 struct bridge_iflist *flush_bif;
7736
7737 flush_bif = (bif == sc->sc_mac_nat_bif) ? NULL : bif;
7738 bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list, flush_bif);
7739 bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list_v6, flush_bif);
7740 }
7741
7742 static void
7743 bridge_mac_nat_populate_entries(struct bridge_softc *sc)
7744 {
7745 errno_t error;
7746 ifnet_t ifp;
7747 uint16_t addresses_count = 0;
7748 ifaddr_t * __counted_by(addresses_count) list;
7749 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7750
7751 assert(mac_nat_bif != NULL);
7752 ifp = mac_nat_bif->bif_ifp;
7753 error = ifnet_get_address_list_family_with_count(ifp, &list, &addresses_count, 0);
7754 if (error != 0) {
7755 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7756 "ifnet_get_address_list(%s) failed %d",
7757 ifp->if_xname, error);
7758 return;
7759 }
7760
7761 for (uint16_t i = 0; i < addresses_count; ++i) {
7762 sa_family_t af;
7763
7764 af = ifaddr_address_family(list[i]);
7765 switch (af) {
7766 case AF_INET: {
7767 struct sockaddr_in sin;
7768
7769 error = ifaddr_address(list[i], (struct sockaddr *)&sin, sizeof(sin));
7770 if (error != 0) {
7771 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7772 "ifaddr_address failed %d",
7773 error);
7774 break;
7775 }
7776
7777 bridge_create_mac_nat_entry_ipv4(sc, mac_nat_bif, &sin.sin_addr, IF_LLADDR(ifp));
7778 break;
7779 }
7780
7781 case AF_INET6: {
7782 struct sockaddr_in6 sin6;
7783
7784 error = ifaddr_address(list[i], (struct sockaddr *)&sin6, sizeof(sin6));
7785 if (error != 0) {
7786 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7787 "ifaddr_address failed %d",
7788 error);
7789 break;
7790 }
7791
7792 if (IN6_IS_ADDR_LINKLOCAL(&sin6.sin6_addr)) {
7793 /* remove scope ID */
7794 sin6.sin6_addr.s6_addr16[1] = 0;
7795 }
7796
7797 bridge_create_mac_nat_entry_ipv6(sc, mac_nat_bif, &sin6.sin6_addr, IF_LLADDR(ifp));
7798 break;
7799 }
7800
7801 default:
7802 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7803 "ifaddr_address_family unknown %d",
7804 af);
7805 break;
7806 }
7807 }
7808
7809 ifnet_address_list_free_counted_by(list, addresses_count);
7810 return;
7811 }
7812
7813 static void
7814 bridge_mac_nat_age_entries_common(struct bridge_softc *sc,
7815 struct mac_nat_entry_list *list, unsigned long now)
7816 {
7817 struct mac_nat_entry *mne;
7818 struct mac_nat_entry *tmne;
7819
7820 LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
7821 if (now >= mne->mne_expire) {
7822 bridge_destroy_mac_nat_entry(sc, mne, "aged out");
7823 }
7824 }
7825 }
7826
7827 static void
7828 bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long now)
7829 {
7830 if (sc->sc_mac_nat_bif == NULL) {
7831 return;
7832 }
7833 bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list, now);
7834 bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list_v6, now);
7835 }
7836
7837 static const char *
7838 get_in_out_string(boolean_t is_output)
7839 {
7840 return (const char * __null_terminated)(is_output ? "OUT" : "IN");
7841 }
7842
7843 /*
7844 * is_valid_arp_packet:
7845 * Verify that this is a valid ARP packet.
7846 *
7847 * Returns TRUE if the packet is valid, FALSE otherwise.
7848 */
7849 static boolean_t
7850 is_valid_arp_packet(mbuf_t *data, bool is_output,
7851 struct ether_header **eh_p, struct ether_arp **ea_p)
7852 {
7853 struct ether_arp *ea;
7854 struct ether_header *eh;
7855 size_t minlen = sizeof(struct ether_header) + sizeof(struct ether_arp);
7856 boolean_t is_valid = FALSE;
7857 int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
7858
7859 if (mbuf_pkthdr_len(*data) < minlen) {
7860 BRIDGE_LOG(LOG_DEBUG, flags,
7861 "ARP %s short frame %lu < %lu",
7862 get_in_out_string(is_output),
7863 mbuf_pkthdr_len(*data), minlen);
7864 goto done;
7865 }
7866 if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
7867 BRIDGE_LOG(LOG_DEBUG, flags,
7868 "ARP %s size %lu mbuf_pullup fail",
7869 get_in_out_string(is_output),
7870 minlen);
7871 *data = NULL;
7872 goto done;
7873 }
7874
7875 /* validate ARP packet */
7876 eh = mtod(*data, struct ether_header *);
7877 ea = (struct ether_arp *)(eh + 1);
7878 if (ea->arp_hrd != HTONS_ARPHRD_ETHER) {
7879 BRIDGE_LOG(LOG_DEBUG, flags,
7880 "ARP %s htype not ethernet",
7881 get_in_out_string(is_output));
7882 goto done;
7883 }
7884 if (ea->arp_hln != ETHER_ADDR_LEN) {
7885 BRIDGE_LOG(LOG_DEBUG, flags,
7886 "ARP %s hlen not ethernet",
7887 get_in_out_string(is_output));
7888 goto done;
7889 }
7890 if (ea->arp_pro != HTONS_ETHERTYPE_IP) {
7891 BRIDGE_LOG(LOG_DEBUG, flags,
7892 "ARP %s ptype not IP",
7893 get_in_out_string(is_output));
7894 goto done;
7895 }
7896 if (ea->arp_pln != sizeof(struct in_addr)) {
7897 BRIDGE_LOG(LOG_DEBUG, flags,
7898 "ARP %s plen not IP",
7899 get_in_out_string(is_output));
7900 goto done;
7901 }
7902 is_valid = TRUE;
7903 *ea_p = ea;
7904 *eh_p = eh;
7905 done:
7906 return is_valid;
7907 }
7908
7909 static struct mac_nat_entry *
7910 bridge_mac_nat_arp_input(struct bridge_softc *sc, mbuf_t *data)
7911 {
7912 struct ether_arp * __single ea;
7913 struct ether_header * __single eh;
7914 struct mac_nat_entry *mne = NULL;
7915 u_short op;
7916 struct in_addr tpa;
7917
7918 if (!is_valid_arp_packet(data, FALSE, &eh, &ea)) {
7919 goto done;
7920 }
7921 op = ea->arp_op;
7922 switch (op) {
7923 case HTONS_ARPOP_REQUEST:
7924 case HTONS_ARPOP_REPLY:
7925 /* only care about REQUEST and REPLY */
7926 break;
7927 default:
7928 goto done;
7929 }
7930
7931 /* check the target IP address for a NAT entry */
7932 bcopy(ea->arp_tpa, &tpa, sizeof(tpa));
7933 if (tpa.s_addr != 0) {
7934 mne = bridge_lookup_mac_nat_entry_ipv4(sc, &tpa);
7935 }
7936 if (mne != NULL) {
7937 if (op == HTONS_ARPOP_REPLY) {
7938 /* translate the MAC address */
7939 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7940 char mac_src[24];
7941 char mac_dst[24];
7942
7943 ether_ntop(mac_src, sizeof(mac_src),
7944 ea->arp_tha);
7945 ether_ntop(mac_dst, sizeof(mac_dst),
7946 mne->mne_mac);
7947 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7948 "%s %s ARP %s -> %s",
7949 sc->sc_if_xname,
7950 mne->mne_bif->bif_ifp->if_xname,
7951 mac_src, mac_dst);
7952 }
7953 bcopy(mne->mne_mac, ea->arp_tha, sizeof(ea->arp_tha));
7954 }
7955 } else {
7956 /* handle conflicting ARP (sender matches mne) */
7957 struct in_addr spa;
7958
7959 bcopy(ea->arp_spa, &spa, sizeof(spa));
7960 if (spa.s_addr != 0 && spa.s_addr != tpa.s_addr) {
7961 /* check the source IP for a NAT entry */
7962 mne = bridge_lookup_mac_nat_entry_ipv4(sc, &spa);
7963 }
7964 }
7965
7966 done:
7967 return mne;
7968 }
7969
7970 static boolean_t
7971 bridge_mac_nat_arp_output(struct bridge_softc *sc,
7972 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
7973 {
7974 struct ether_arp * __single ea;
7975 struct ether_header * __single eh;
7976 struct in_addr ip;
7977 struct mac_nat_entry *mne = NULL;
7978 u_short op;
7979 boolean_t translate = FALSE;
7980
7981 if (!is_valid_arp_packet(data, TRUE, &eh, &ea)) {
7982 goto done;
7983 }
7984 op = ea->arp_op;
7985 switch (op) {
7986 case HTONS_ARPOP_REQUEST:
7987 case HTONS_ARPOP_REPLY:
7988 /* only care about REQUEST and REPLY */
7989 break;
7990 default:
7991 goto done;
7992 }
7993
7994 bcopy(ea->arp_spa, &ip, sizeof(ip));
7995 if (ip.s_addr == 0) {
7996 goto done;
7997 }
7998 /* XXX validate IP address: no multicast/broadcast */
7999 mne = bridge_update_mac_nat_entry_ipv4(sc, bif, &ip,
8000 (const char *)ea->arp_sha);
8001 if (mnr != NULL && mne != NULL) {
8002 /* record the offset to do the replacement */
8003 translate = TRUE;
8004 mnr->mnr_arp_offset = (char *)ea->arp_sha - (char *)eh;
8005 }
8006
8007 done:
8008 return translate;
8009 }
8010
8011 #define ETHER_IPV4_HEADER_LEN (sizeof(struct ether_header) + \
8012 + sizeof(struct ip))
8013 static uint8_t * __indexable
8014 get_ether_ip_header_ptr(mbuf_t *data, boolean_t is_output)
8015 {
8016 uint8_t *header = NULL;
8017 int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
8018 size_t minlen = ETHER_IPV4_HEADER_LEN;
8019
8020 if (mbuf_pkthdr_len(*data) < minlen) {
8021 BRIDGE_LOG(LOG_DEBUG, flags,
8022 "IP %s short frame %lu < %lu",
8023 get_in_out_string(is_output),
8024 mbuf_pkthdr_len(*data), minlen);
8025 goto done;
8026 }
8027 if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
8028 BRIDGE_LOG(LOG_DEBUG, flags,
8029 "IP %s size %lu mbuf_pullup fail",
8030 get_in_out_string(is_output),
8031 minlen);
8032 *data = NULL;
8033 goto done;
8034 }
8035 header = mtod(*data, uint8_t *);
8036 done:
8037 return header;
8038 }
8039
8040 static struct mac_nat_entry *
8041 bridge_mac_nat_ip_input(struct bridge_softc *sc, mbuf_t *data)
8042 {
8043 struct in_addr dst;
8044 uint8_t *header;
8045 struct ip *iphdr;
8046 struct mac_nat_entry *mne = NULL;
8047
8048 header = get_ether_ip_header_ptr(data, FALSE);
8049 if (header == NULL) {
8050 goto done;
8051 }
8052 iphdr = (struct ip *)(void *)(header + sizeof(struct ether_header));
8053 bcopy(&iphdr->ip_dst, &dst, sizeof(dst));
8054 /* XXX validate IP address */
8055 if (dst.s_addr == 0) {
8056 goto done;
8057 }
8058 mne = bridge_lookup_mac_nat_entry_ipv4(sc, &dst);
8059 done:
8060 return mne;
8061 }
8062
8063 static void
8064 bridge_mac_nat_udp_output(struct bridge_softc *sc,
8065 struct bridge_iflist *bif, mbuf_t m,
8066 uint8_t ip_header_len, struct mac_nat_record *mnr)
8067 {
8068 uint16_t dp_flags;
8069 errno_t error;
8070 size_t offset;
8071 struct udphdr udphdr;
8072
8073 /* copy the UDP header */
8074 offset = sizeof(struct ether_header) + ip_header_len;
8075 error = mbuf_copydata(m, offset, sizeof(struct udphdr), &udphdr);
8076 if (error != 0) {
8077 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8078 "mbuf_copydata udphdr failed %d",
8079 error);
8080 return;
8081 }
8082 if (udphdr.uh_sport != HTONS_IPPORT_BOOTPC ||
8083 udphdr.uh_dport != HTONS_IPPORT_BOOTPS) {
8084 /* not a BOOTP/DHCP packet */
8085 return;
8086 }
8087 /* check whether the broadcast bit is already set */
8088 offset += sizeof(struct udphdr) + offsetof(struct dhcp, dp_flags);
8089 error = mbuf_copydata(m, offset, sizeof(dp_flags), &dp_flags);
8090 if (error != 0) {
8091 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8092 "mbuf_copydata dp_flags failed %d",
8093 error);
8094 return;
8095 }
8096 if ((dp_flags & HTONS_DHCP_FLAGS_BROADCAST) != 0) {
8097 /* it's already set, nothing to do */
8098 return;
8099 }
8100 /* broadcast bit needs to be set */
8101 mnr->mnr_ip_dhcp_flags = dp_flags | htons(DHCP_FLAGS_BROADCAST);
8102 mnr->mnr_ip_header_len = ip_header_len;
8103 if (udphdr.uh_sum != 0) {
8104 uint16_t delta;
8105
8106 /* adjust checksum to take modified dp_flags into account */
8107 delta = dp_flags - mnr->mnr_ip_dhcp_flags;
8108 mnr->mnr_ip_udp_csum = udphdr.uh_sum + delta;
8109 }
8110 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8111 "%s %s DHCP dp_flags 0x%x UDP cksum 0x%x",
8112 sc->sc_if_xname,
8113 bif->bif_ifp->if_xname,
8114 ntohs(mnr->mnr_ip_dhcp_flags),
8115 ntohs(mnr->mnr_ip_udp_csum));
8116 return;
8117 }
8118
8119 static boolean_t
8120 bridge_mac_nat_ip_output(struct bridge_softc *sc,
8121 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8122 {
8123 #pragma unused(mnr)
8124 uint8_t *header;
8125 struct ether_header *eh;
8126 struct in_addr ip;
8127 struct ip *iphdr;
8128 uint8_t ip_header_len;
8129 struct mac_nat_entry *mne = NULL;
8130 boolean_t translate = FALSE;
8131
8132 header = get_ether_ip_header_ptr(data, TRUE);
8133 if (header == NULL) {
8134 goto done;
8135 }
8136
8137 eh = (struct ether_header *)header;
8138 iphdr = (struct ip *)(header + sizeof(*eh));
8139 ip_header_len = IP_VHL_HL(iphdr->ip_vhl) << 2;
8140 if (ip_header_len < sizeof(ip)) {
8141 /* bogus IP header */
8142 goto done;
8143 }
8144 bcopy(&iphdr->ip_src, &ip, sizeof(ip));
8145 /* XXX validate the source address */
8146 if (ip.s_addr != 0) {
8147 mne = bridge_update_mac_nat_entry_ipv4(sc, bif, &ip,
8148 (const char *)eh->ether_shost);
8149 }
8150 if (mnr != NULL) {
8151 if (ip.s_addr == 0 && iphdr->ip_p == IPPROTO_UDP) {
8152 /* handle DHCP must broadcast */
8153 bridge_mac_nat_udp_output(sc, bif, *data,
8154 ip_header_len, mnr);
8155 }
8156 translate = TRUE;
8157 }
8158 done:
8159 return translate;
8160 }
8161
8162 #define ETHER_IPV6_HEADER_LEN (sizeof(struct ether_header) + \
8163 + sizeof(struct ip6_hdr))
8164 static uint8_t * __indexable
8165 get_ether_ipv6_header_ptr(mbuf_t *data, size_t plen, boolean_t is_output)
8166 {
8167 uint8_t *header = NULL;
8168 int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
8169 size_t minlen = ETHER_IPV6_HEADER_LEN + plen;
8170
8171 if (mbuf_pkthdr_len(*data) < minlen) {
8172 BRIDGE_LOG(LOG_DEBUG, flags,
8173 "IP %s short frame %lu < %lu",
8174 get_in_out_string(is_output),
8175 mbuf_pkthdr_len(*data), minlen);
8176 goto done;
8177 }
8178 if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
8179 BRIDGE_LOG(LOG_DEBUG, flags,
8180 "IP %s size %lu mbuf_pullup fail",
8181 get_in_out_string(is_output),
8182 minlen);
8183 *data = NULL;
8184 goto done;
8185 }
8186 header = mtod(*data, uint8_t *);
8187 done:
8188 return header;
8189 }
8190
8191 #include <netinet/icmp6.h>
8192 #include <netinet6/nd6.h>
8193
8194 #define ETHER_ND_LLADDR_LEN (ETHER_ADDR_LEN + sizeof(struct nd_opt_hdr))
8195
8196 static void
8197 bridge_mac_nat_icmpv6_output(struct bridge_softc *sc,
8198 struct bridge_iflist *bif,
8199 mbuf_t *data, struct ip6_hdr *ip6h,
8200 struct in6_addr *saddrp,
8201 struct mac_nat_record *mnr)
8202 {
8203 uint8_t *header;
8204 struct ether_header *eh;
8205 struct icmp6_hdr *icmp6;
8206 uint8_t icmp6_type;
8207 uint32_t icmp6len;
8208 int lladdrlen = 0;
8209 char *lladdr = NULL;
8210 unsigned int off = sizeof(*ip6h);
8211
8212 icmp6len = (u_int32_t)ntohs(ip6h->ip6_plen);
8213 if (icmp6len < sizeof(*icmp6)) {
8214 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8215 "short IPv6 payload length %d < %lu",
8216 icmp6len, sizeof(*icmp6));
8217 return;
8218 }
8219
8220 /* pullup IP6 header + ICMPv6 header */
8221 header = get_ether_ipv6_header_ptr(data, sizeof(*icmp6), TRUE);
8222 if (header == NULL) {
8223 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8224 "failed to pullup icmp6 header");
8225 return;
8226 }
8227 eh = (struct ether_header *)header;
8228 ip6h = (struct ip6_hdr *)(header + sizeof(*eh));
8229 icmp6 = (struct icmp6_hdr *)(header + sizeof(*eh) + off);
8230 icmp6_type = icmp6->icmp6_type;
8231 switch (icmp6_type) {
8232 case ND_NEIGHBOR_SOLICIT:
8233 case ND_NEIGHBOR_ADVERT:
8234 case ND_ROUTER_ADVERT:
8235 case ND_ROUTER_SOLICIT:
8236 break;
8237 default:
8238 return;
8239 }
8240
8241 /* pullup IP6 header + payload */
8242 header = get_ether_ipv6_header_ptr(data, icmp6len, TRUE);
8243 if (header == NULL) {
8244 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8245 "failed to pullup icmp6 + payload");
8246 return;
8247 }
8248 eh = (struct ether_header *)header;
8249 ip6h = (struct ip6_hdr *)(header + sizeof(*eh));
8250 icmp6 = (struct icmp6_hdr *)(header + sizeof(*eh) + off);
8251
8252 switch (icmp6_type) {
8253 case ND_NEIGHBOR_SOLICIT: {
8254 struct nd_neighbor_solicit *nd_ns;
8255 union nd_opts ndopts;
8256 boolean_t is_dad_probe;
8257 struct in6_addr taddr;
8258
8259 if (icmp6len < sizeof(*nd_ns)) {
8260 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8261 "short nd_ns %d < %lu",
8262 icmp6len, sizeof(*nd_ns));
8263 return;
8264 }
8265
8266 nd_ns = (struct nd_neighbor_solicit *)(void *)icmp6;
8267 bcopy(&nd_ns->nd_ns_target, &taddr, sizeof(taddr));
8268 if (IN6_IS_ADDR_MULTICAST(&taddr) ||
8269 IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
8270 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8271 "invalid target ignored");
8272 return;
8273 }
8274
8275 /* parse options */
8276 nd6_option_init(nd_ns + 1, icmp6len - sizeof(*nd_ns), &ndopts);
8277 if (nd6_options(&ndopts) < 0) {
8278 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8279 "invalid ND6 NS option");
8280 return;
8281 }
8282 if (ndopts.nd_opts_src_lladdr != NULL) {
8283 ND_OPT_LLADDR(ndopts.nd_opts_src_lladdr, nd_opt_len,
8284 lladdr, lladdrlen);
8285 }
8286 is_dad_probe = IN6_IS_ADDR_UNSPECIFIED(saddrp);
8287 if (lladdr != NULL) {
8288 if (is_dad_probe) {
8289 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8290 "bad ND6 DAD packet");
8291 return;
8292 }
8293 if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8294 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8295 "source lladdrlen %d != %lu",
8296 lladdrlen, ETHER_ND_LLADDR_LEN);
8297 return;
8298 }
8299 }
8300 if (is_dad_probe) {
8301 /* node is trying use taddr, create an mne for taddr */
8302 *saddrp = taddr;
8303 }
8304 break;
8305 }
8306 case ND_NEIGHBOR_ADVERT: {
8307 struct nd_neighbor_advert *nd_na;
8308 union nd_opts ndopts;
8309 struct in6_addr taddr;
8310
8311
8312 nd_na = (struct nd_neighbor_advert *)(void *)icmp6;
8313
8314 if (icmp6len < sizeof(*nd_na)) {
8315 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8316 "short nd_na %d < %lu",
8317 icmp6len, sizeof(*nd_na));
8318 return;
8319 }
8320
8321 bcopy(&nd_na->nd_na_target, &taddr, sizeof(taddr));
8322 if (IN6_IS_ADDR_MULTICAST(&taddr) ||
8323 IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
8324 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8325 "invalid target ignored");
8326 return;
8327 }
8328
8329 /* parse options */
8330 nd6_option_init(nd_na + 1, icmp6len - sizeof(*nd_na), &ndopts);
8331 if (nd6_options(&ndopts) < 0) {
8332 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8333 "invalid ND6 NA option");
8334 return;
8335 }
8336 if (ndopts.nd_opts_tgt_lladdr == NULL) {
8337 /* target linklayer, nothing to do */
8338 return;
8339 }
8340
8341 ND_OPT_LLADDR(ndopts.nd_opts_tgt_lladdr, nd_opt_len, lladdr, lladdrlen);
8342 if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8343 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8344 "target lladdrlen %d != %lu",
8345 lladdrlen, ETHER_ND_LLADDR_LEN);
8346 return;
8347 }
8348 break;
8349 }
8350 case ND_ROUTER_ADVERT:
8351 case ND_ROUTER_SOLICIT: {
8352 union nd_opts ndopts;
8353 uint32_t type_length;
8354 const char *description;
8355
8356 if (icmp6_type == ND_ROUTER_ADVERT) {
8357 type_length = sizeof(struct nd_router_advert);
8358 description = "RA";
8359 } else {
8360 type_length = sizeof(struct nd_router_solicit);
8361 description = "RS";
8362 }
8363 if (icmp6len < type_length) {
8364 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8365 "short ND6 %s %d < %d",
8366 description, icmp6len, type_length);
8367 return;
8368 }
8369
8370 /* parse options */
8371 nd6_option_init(((uint8_t *)icmp6) + type_length,
8372 icmp6len - type_length, &ndopts);
8373 if (nd6_options(&ndopts) < 0) {
8374 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8375 "invalid ND6 %s option", description);
8376 return;
8377 }
8378 if (ndopts.nd_opts_src_lladdr != NULL) {
8379 ND_OPT_LLADDR(ndopts.nd_opts_src_lladdr, nd_opt_len, lladdr, lladdrlen);
8380
8381 if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8382 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8383 "source lladdrlen %d != %lu",
8384 lladdrlen, ETHER_ND_LLADDR_LEN);
8385 return;
8386 }
8387 }
8388 break;
8389 }
8390 default:
8391 break;
8392 }
8393
8394 if (lladdr != NULL) {
8395 mnr->mnr_ip6_lladdr_offset = (uint16_t)
8396 ((uintptr_t)lladdr - (uintptr_t)eh);
8397 mnr->mnr_ip6_icmp6_len = icmp6len;
8398 mnr->mnr_ip6_icmp6_type = icmp6_type;
8399 mnr->mnr_ip6_header_len = off;
8400 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
8401 const char *str;
8402
8403 switch (mnr->mnr_ip6_icmp6_type) {
8404 case ND_ROUTER_ADVERT:
8405 str = "ROUTER ADVERT";
8406 break;
8407 case ND_ROUTER_SOLICIT:
8408 str = "ROUTER SOLICIT";
8409 break;
8410 case ND_NEIGHBOR_ADVERT:
8411 str = "NEIGHBOR ADVERT";
8412 break;
8413 case ND_NEIGHBOR_SOLICIT:
8414 str = "NEIGHBOR SOLICIT";
8415 break;
8416 default:
8417 str = "";
8418 break;
8419 }
8420 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8421 "%s %s %s ip6len %d icmp6len %d lladdr offset %d",
8422 sc->sc_if_xname, bif->bif_ifp->if_xname, str,
8423 mnr->mnr_ip6_header_len,
8424 mnr->mnr_ip6_icmp6_len, mnr->mnr_ip6_lladdr_offset);
8425 }
8426 }
8427 }
8428
8429 static struct mac_nat_entry *
8430 bridge_mac_nat_ipv6_input(struct bridge_softc *sc, mbuf_t *data)
8431 {
8432 struct in6_addr dst;
8433 uint8_t *header;
8434 struct ether_header *eh;
8435 struct ip6_hdr *ip6h;
8436 struct mac_nat_entry *mne = NULL;
8437
8438 header = get_ether_ipv6_header_ptr(data, 0, FALSE);
8439 if (header == NULL) {
8440 goto done;
8441 }
8442 eh = (struct ether_header *)header;
8443 ip6h = (struct ip6_hdr *)(header + sizeof(*eh));
8444 bcopy(&ip6h->ip6_dst, &dst, sizeof(dst));
8445 /* XXX validate IPv6 address */
8446 if (IN6_IS_ADDR_UNSPECIFIED(&dst)) {
8447 goto done;
8448 }
8449 mne = bridge_lookup_mac_nat_entry_ipv6(sc, &dst);
8450
8451 done:
8452 return mne;
8453 }
8454
8455 static boolean_t
8456 bridge_mac_nat_ipv6_output(struct bridge_softc *sc,
8457 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8458 {
8459 uint8_t *header;
8460 struct ether_header *eh;
8461 ether_addr_t ether_shost;
8462 struct ip6_hdr *ip6h;
8463 struct in6_addr saddr;
8464 boolean_t translate;
8465
8466 translate = (bif == sc->sc_mac_nat_bif) ? FALSE : TRUE;
8467 header = get_ether_ipv6_header_ptr(data, 0, TRUE);
8468 if (header == NULL) {
8469 translate = FALSE;
8470 goto done;
8471 }
8472 eh = (struct ether_header *)header;
8473 bcopy(eh->ether_shost, ðer_shost, sizeof(ether_shost));
8474 ip6h = (struct ip6_hdr *)(header + sizeof(*eh));
8475 bcopy(&ip6h->ip6_src, &saddr, sizeof(saddr));
8476 if (mnr != NULL && ip6h->ip6_nxt == IPPROTO_ICMPV6) {
8477 bridge_mac_nat_icmpv6_output(sc, bif, data, ip6h, &saddr, mnr);
8478 }
8479 if (IN6_IS_ADDR_UNSPECIFIED(&saddr)) {
8480 goto done;
8481 }
8482 (void)bridge_update_mac_nat_entry_ipv6(sc, bif, &saddr,
8483 (const char *)ether_shost.octet);
8484
8485 done:
8486 return translate;
8487 }
8488
8489 /*
8490 * Function: bridge_mac_nat_input:
8491 *
8492 * Purpose:
8493 * Process a unicast packet arriving on the external interface `external_ifp`.
8494 *
8495 * If the packet is ARP, IPv4, or IPv6, lookup the address from the packet in
8496 * the mac_nat_entry table. If an entry is found, and the interface is
8497 * not `external_ifp`, replace the destination MAC address in the
8498 * ethernet header with the corresponding internal MAC address, and return
8499 * the interface via `*dst_if`.
8500 *
8501 * Returns:
8502 * NULL if the packet was deallocated during processing.
8503 *
8504 * Otherwise, returns non-NULL packet that should:
8505 * 1) if `*dst_if` is NULL, continue on as an input packet
8506 * over `external_ifp`, OR
8507 * 2) if `*dst_if` is not NULL, be delivered as an output packet
8508 * over `*dst_if`.
8509 */
8510 static mbuf_t
8511 bridge_mac_nat_input(struct bridge_softc *sc, ifnet_t external_ifp,
8512 mbuf_t m, ifnet_t * dst_if)
8513 {
8514 struct ether_header *eh;
8515 mbuf_t m0 = m;
8516 struct mac_nat_entry *mne = NULL;
8517
8518 BRIDGE_LOCK_ASSERT_HELD(sc);
8519 *dst_if = NULL;
8520 eh = mtod(m, struct ether_header *);
8521 switch (eh->ether_type) {
8522 case HTONS_ETHERTYPE_ARP:
8523 mne = bridge_mac_nat_arp_input(sc, &m);
8524 break;
8525 case HTONS_ETHERTYPE_IP:
8526 mne = bridge_mac_nat_ip_input(sc, &m);
8527 break;
8528 case HTONS_ETHERTYPE_IPV6:
8529 mne = bridge_mac_nat_ipv6_input(sc, &m);
8530 break;
8531 default:
8532 break;
8533 }
8534 if (m != NULL & mne != NULL) {
8535 *dst_if = mne->mne_bif->bif_ifp;
8536 if (*dst_if == external_ifp) {
8537 /* receive packet for ifp */
8538 *dst_if = NULL;
8539 } else {
8540 /* replace the destination MAC with internal one */
8541 if (m != m0) {
8542 /* it may have changed */
8543 eh = mtod(m, struct ether_header *);
8544 }
8545 bcopy(mne->mne_mac, eh->ether_dhost,
8546 sizeof(eh->ether_dhost));
8547 }
8548 }
8549 return m;
8550 }
8551
8552
8553 static mblist
8554 bridge_mac_nat_input_list(struct bridge_softc *sc, ifnet_t external_ifp,
8555 mbuf_t m, mbuf_t * forward_head)
8556 {
8557 mblist forward;
8558 mbuf_t next_packet;
8559 mblist ret;
8560
8561 mblist_init(&ret);
8562 mblist_init(&forward);
8563 for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
8564 ifnet_ref_t dst_if;
8565
8566 /* take packet out of the list */
8567 next_packet = scan->m_nextpkt;
8568 scan->m_nextpkt = NULL;
8569
8570 scan = bridge_mac_nat_input(sc, external_ifp, scan, &dst_if);
8571 if (scan != NULL) {
8572 if (dst_if != NULL) {
8573 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8574 "%s MAC-NAT input translate to %s",
8575 sc->sc_if_xname, dst_if->if_xname);
8576 /* use rcvif to store the egress interface */
8577 mbuf_pkthdr_setrcvif(scan, dst_if);
8578 /* add it to the forwarding list */
8579 mblist_append(&forward, scan);
8580 } else {
8581 /* add it to the "continue on as input" list */
8582 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8583 "%s MAC-NAT input for %s",
8584 sc->sc_if_xname,
8585 external_ifp->if_xname);
8586 mblist_append(&ret, scan);
8587 }
8588 }
8589 }
8590 *forward_head = forward.head;
8591 return ret;
8592 }
8593
8594 /*
8595 * bridge_mac_nat_translate_list:
8596 * Process a list of packets destined to the MAC-NAT interface `dst_if`
8597 * from the bridge member `sbif`.
8598 *
8599 * For each packet in the list, update the MAC-NAT record, and if
8600 * translation is required, translate it.
8601 *
8602 * Returns the list of packets that should be delivered to the MAC-NAT
8603 * interface.
8604 */
8605 static mbuf_t
8606 bridge_mac_nat_translate_list(struct bridge_softc * sc,
8607 struct bridge_iflist *sbif, ifnet_t dst_if, mbuf_t m)
8608 {
8609 mbuf_t next_packet;
8610 mblist ret;
8611
8612 mblist_init(&ret);
8613 for (mbuf_ref_t scan = m; scan != NULL; scan = next_packet) {
8614 struct mac_nat_record mnr;
8615 bool translate_mac;
8616
8617 /* take packet out of the list */
8618 next_packet = scan->m_nextpkt;
8619 scan->m_nextpkt = NULL;
8620 translate_mac = bridge_mac_nat_output(sc, sbif, &scan, &mnr);
8621 if (scan != NULL) {
8622 if (translate_mac) {
8623 bridge_mac_nat_translate(&scan, &mnr,
8624 IF_LLADDR(dst_if));
8625 }
8626 if (scan != NULL) {
8627 /* add it back to the list */
8628 mblist_append(&ret, scan);
8629 }
8630 }
8631 }
8632 return ret.head;
8633 }
8634
8635 /*
8636 * bridge_mac_nat_copy_and_translate_list:
8637 * Same as bridge_mac_nat_translate_list() except that a copy of the
8638 * packet list is returned instead.
8639 *
8640 * The packet list `m` is left unaltered.
8641 */
8642 static mbuf_t
8643 bridge_mac_nat_copy_and_translate_list(struct bridge_softc * sc,
8644 struct bridge_iflist *sbif, ifnet_t dst_if, mbuf_t m)
8645 {
8646 mbuf_t next_packet;
8647 mblist ret;
8648
8649 mblist_init(&ret);
8650 for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
8651 mbuf_ref_t mc = NULL;
8652 struct mac_nat_record mnr;
8653 bool translate_mac;
8654
8655 /* take packet out of the list, make a copy, put it back */
8656 next_packet = scan->m_nextpkt;
8657 scan->m_nextpkt = NULL;
8658 mc = m_dup(scan, M_DONTWAIT);
8659 scan->m_nextpkt = next_packet;
8660 if (mc == NULL) {
8661 continue;
8662 }
8663 translate_mac = bridge_mac_nat_output(sc, sbif, &mc, &mnr);
8664 if (mc != NULL) {
8665 if (translate_mac) {
8666 bridge_mac_nat_translate(&mc, &mnr,
8667 IF_LLADDR(dst_if));
8668 }
8669 if (mc != NULL) {
8670 /* add it to the new list */
8671 mblist_append(&ret, mc);
8672 }
8673 }
8674 }
8675 return ret.head;
8676 }
8677
8678 static void
8679 bridge_mac_nat_forward_list(ifnet_t bridge_ifp, ether_type_flag_t etypef,
8680 mbuf_t m)
8681 {
8682 int count = 0;
8683 ifnet_t dst_if;
8684 mblist list;
8685 int n_lists = 0;
8686 mbuf_t next_packet;
8687
8688 mblist_init(&list);
8689 for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
8690 ifnet_t this_if;
8691
8692 next_packet = scan->m_nextpkt;
8693 this_if = mbuf_pkthdr_rcvif(scan);
8694 mbuf_pkthdr_setrcvif(scan, NULL);
8695 if (list.head == NULL) {
8696 /* start a new list */
8697 list.head = list.tail = scan;
8698 count = 1;
8699 dst_if = this_if;
8700 } else if (dst_if != this_if) {
8701 /* send up the previous chain */
8702 if (list.tail != NULL) {
8703 /* terminate the list */
8704 list.tail->m_nextpkt = NULL;
8705 }
8706 n_lists++;
8707 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8708 "(%s): sublist %u pkts %u",
8709 dst_if->if_xname, n_lists, count);
8710 bridge_enqueue(bridge_ifp, NULL,
8711 dst_if, etypef, list.head,
8712 CHECKSUM_OPERATION_CLEAR_OFFLOAD, pkt_direction_RX);
8713
8714 /* start new list */
8715 list.head = list.tail = scan;
8716 count = 1;
8717 dst_if = this_if;
8718 } else {
8719 count++;
8720 list.tail = scan;
8721 }
8722 if (next_packet == NULL) {
8723 /* last list */
8724 n_lists++;
8725 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8726 "(%s): sublist %u pkts %u",
8727 dst_if->if_xname, n_lists, count);
8728 bridge_enqueue(bridge_ifp, NULL,
8729 dst_if, etypef, list.head,
8730 CHECKSUM_OPERATION_CLEAR_OFFLOAD, pkt_direction_RX);
8731 }
8732 }
8733 return;
8734 }
8735
8736 /*
8737 * bridge_mac_nat_output:
8738 * Process a packet destined to the MAC NAT interface (sc_mac_nat_bif)
8739 * from the interface 'bif'.
8740 *
8741 * Create a mac_nat_entry containing the source IP address and MAC address
8742 * from the packet. Populate a mac_nat_record with information detailing
8743 * how to translate the packet. Translation takes place later by calling
8744 * `bridge_mac_nat_translate()`.
8745 *
8746 * If 'bif' == sc_mac_nat_bif, the stack over the MAC NAT
8747 * interface is generating an output packet. No translation is required in this
8748 * case, we just record the IP address used to prevent another bif from
8749 * claiming our IP address.
8750 *
8751 * Returns:
8752 * TRUE if the packet should be translated (*mnr updated as well),
8753 * FALSE otherwise.
8754 *
8755 * *data may be updated to point at a different mbuf chain or NULL if
8756 * the chain was deallocated during processing.
8757 */
8758
8759 static boolean_t
8760 bridge_mac_nat_output(struct bridge_softc *sc,
8761 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8762 {
8763 struct ether_header *eh;
8764 boolean_t translate = FALSE;
8765
8766 BRIDGE_LOCK_ASSERT_HELD(sc);
8767 assert(sc->sc_mac_nat_bif != NULL);
8768
8769 eh = mtod(*data, struct ether_header *);
8770 if (mnr != NULL) {
8771 bzero(mnr, sizeof(*mnr));
8772 mnr->mnr_ether_type = eh->ether_type;
8773 }
8774 switch (eh->ether_type) {
8775 case HTONS_ETHERTYPE_ARP:
8776 translate = bridge_mac_nat_arp_output(sc, bif, data, mnr);
8777 break;
8778 case HTONS_ETHERTYPE_IP:
8779 translate = bridge_mac_nat_ip_output(sc, bif, data, mnr);
8780 break;
8781 case HTONS_ETHERTYPE_IPV6:
8782 translate = bridge_mac_nat_ipv6_output(sc, bif, data, mnr);
8783 break;
8784 default:
8785 break;
8786 }
8787 return translate;
8788 }
8789
8790 static void
8791 bridge_mac_nat_arp_translate(mbuf_t *data, struct mac_nat_record *mnr,
8792 const char eaddr[ETHER_ADDR_LEN])
8793 {
8794 errno_t error;
8795
8796 if (mnr->mnr_arp_offset == 0) {
8797 return;
8798 }
8799 /* replace the source hardware address */
8800 error = mbuf_copyback(*data, mnr->mnr_arp_offset,
8801 ETHER_ADDR_LEN, eaddr,
8802 MBUF_DONTWAIT);
8803 if (error != 0) {
8804 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8805 "mbuf_copyback failed");
8806 m_drop(*data, DROPTAP_FLAG_DIR_IN,
8807 DROP_REASON_BRIDGE_MAC_NAT_FAILURE, NULL, 0);
8808 *data = NULL;
8809 }
8810 return;
8811 }
8812
8813 static void
8814 bridge_mac_nat_ip_translate(mbuf_t *data, struct mac_nat_record *mnr)
8815 {
8816 errno_t error;
8817 size_t offset;
8818
8819 if (mnr->mnr_ip_header_len == 0) {
8820 return;
8821 }
8822 /* update the UDP checksum */
8823 offset = sizeof(struct ether_header) + mnr->mnr_ip_header_len;
8824 error = mbuf_copyback(*data, offset + offsetof(struct udphdr, uh_sum),
8825 sizeof(mnr->mnr_ip_udp_csum),
8826 &mnr->mnr_ip_udp_csum,
8827 MBUF_DONTWAIT);
8828 if (error != 0) {
8829 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8830 "mbuf_copyback uh_sum failed");
8831 m_drop(*data, DROPTAP_FLAG_DIR_IN,
8832 DROP_REASON_BRIDGE_MAC_NAT_FAILURE, NULL, 0);
8833 *data = NULL;
8834 }
8835 /* update the DHCP must broadcast flag */
8836 offset += sizeof(struct udphdr);
8837 error = mbuf_copyback(*data, offset + offsetof(struct dhcp, dp_flags),
8838 sizeof(mnr->mnr_ip_dhcp_flags),
8839 &mnr->mnr_ip_dhcp_flags,
8840 MBUF_DONTWAIT);
8841 if (error != 0) {
8842 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8843 "mbuf_copyback dp_flags failed");
8844 m_drop(*data, DROPTAP_FLAG_DIR_IN,
8845 DROP_REASON_BRIDGE_MAC_NAT_FAILURE, NULL, 0);
8846 *data = NULL;
8847 }
8848 }
8849
8850 static void
8851 bridge_mac_nat_ipv6_translate(mbuf_t *data, struct mac_nat_record *mnr,
8852 const char eaddr[ETHER_ADDR_LEN])
8853 {
8854 uint16_t cksum;
8855 errno_t error;
8856 mbuf_t m = *data;
8857
8858 if (mnr->mnr_ip6_header_len == 0) {
8859 return;
8860 }
8861 switch (mnr->mnr_ip6_icmp6_type) {
8862 case ND_ROUTER_ADVERT:
8863 case ND_ROUTER_SOLICIT:
8864 case ND_NEIGHBOR_SOLICIT:
8865 case ND_NEIGHBOR_ADVERT:
8866 if (mnr->mnr_ip6_lladdr_offset == 0) {
8867 /* nothing to do */
8868 return;
8869 }
8870 break;
8871 default:
8872 return;
8873 }
8874
8875 /*
8876 * replace the lladdr
8877 */
8878 error = mbuf_copyback(m, mnr->mnr_ip6_lladdr_offset,
8879 ETHER_ADDR_LEN, eaddr,
8880 MBUF_DONTWAIT);
8881 if (error != 0) {
8882 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8883 "mbuf_copyback lladdr failed");
8884 m_drop(m, DROPTAP_FLAG_DIR_IN,
8885 DROP_REASON_BRIDGE_MAC_NAT_FAILURE, NULL, 0);
8886 *data = NULL;
8887 return;
8888 }
8889
8890 /*
8891 * recompute the icmp6 checksum
8892 */
8893
8894 /* skip past the ethernet header */
8895 _mbuf_adjust_pkthdr_and_data(m, ETHER_HDR_LEN);
8896
8897 #define CKSUM_OFFSET_ICMP6 offsetof(struct icmp6_hdr, icmp6_cksum)
8898 /* set the checksum to zero */
8899 cksum = 0;
8900 error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
8901 sizeof(cksum), &cksum, MBUF_DONTWAIT);
8902 if (error != 0) {
8903 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8904 "mbuf_copyback cksum=0 failed");
8905 m_drop(m, DROPTAP_FLAG_DIR_IN,
8906 DROP_REASON_BRIDGE_CHECKSUM, NULL, 0);
8907 *data = NULL;
8908 return;
8909 }
8910 /* compute and set the new checksum */
8911 cksum = in6_cksum(m, IPPROTO_ICMPV6, mnr->mnr_ip6_header_len,
8912 mnr->mnr_ip6_icmp6_len);
8913 error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
8914 sizeof(cksum), &cksum, MBUF_DONTWAIT);
8915 if (error != 0) {
8916 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8917 "mbuf_copyback cksum failed");
8918 m_drop(m, DROPTAP_FLAG_DIR_IN,
8919 DROP_REASON_BRIDGE_CHECKSUM, NULL, 0);
8920 *data = NULL;
8921 return;
8922 }
8923 /* restore the ethernet header */
8924 _mbuf_adjust_pkthdr_and_data(m, -ETHER_HDR_LEN);
8925 return;
8926 }
8927
8928 static void
8929 bridge_mac_nat_translate(mbuf_t *data, struct mac_nat_record *mnr,
8930 const char eaddr[ETHER_ADDR_LEN])
8931 {
8932 struct ether_header *eh;
8933
8934 /* replace the source ethernet address with the single MAC */
8935 eh = mtod(*data, struct ether_header *);
8936 bcopy(eaddr, eh->ether_shost, sizeof(eh->ether_shost));
8937 switch (mnr->mnr_ether_type) {
8938 case HTONS_ETHERTYPE_ARP:
8939 bridge_mac_nat_arp_translate(data, mnr, eaddr);
8940 break;
8941
8942 case HTONS_ETHERTYPE_IP:
8943 bridge_mac_nat_ip_translate(data, mnr);
8944 break;
8945
8946 case HTONS_ETHERTYPE_IPV6:
8947 bridge_mac_nat_ipv6_translate(data, mnr, eaddr);
8948 break;
8949
8950 default:
8951 break;
8952 }
8953 return;
8954 }
8955
8956 /*
8957 * bridge packet filtering
8958 */
8959
8960 /*
8961 * Perform basic checks on header size since
8962 * pfil assumes ip_input has already processed
8963 * it for it. Cut-and-pasted from ip_input.c.
8964 * Given how simple the IPv6 version is,
8965 * does the IPv4 version really need to be
8966 * this complicated?
8967 *
8968 * XXX Should we update ipstat here, or not?
8969 * XXX Right now we update ipstat but not
8970 * XXX csum_counter.
8971 */
8972 static int
8973 bridge_ip_checkbasic(struct mbuf **mp)
8974 {
8975 struct mbuf *m = *mp;
8976 struct ip *ip;
8977 int len, hlen;
8978 u_short sum;
8979
8980 if (*mp == NULL) {
8981 return -1;
8982 }
8983
8984 if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
8985 /* max_linkhdr is already rounded up to nearest 4-byte */
8986 if ((m = m_copyup(m, sizeof(struct ip),
8987 max_linkhdr)) == NULL) {
8988 /* XXXJRT new stat, please */
8989 ipstat.ips_toosmall++;
8990 goto bad;
8991 }
8992 } else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip), 0)) {
8993 if ((m = m_pullup(m, sizeof(struct ip))) == NULL) {
8994 ipstat.ips_toosmall++;
8995 goto bad;
8996 }
8997 }
8998 ip = mtod(m, struct ip *);
8999 if (ip == NULL) {
9000 goto bad;
9001 }
9002
9003 if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
9004 ipstat.ips_badvers++;
9005 goto bad;
9006 }
9007 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
9008 if (hlen < (int)sizeof(struct ip)) { /* minimum header length */
9009 ipstat.ips_badhlen++;
9010 goto bad;
9011 }
9012 if (hlen > m->m_len) {
9013 if ((m = m_pullup(m, hlen)) == 0) {
9014 ipstat.ips_badhlen++;
9015 goto bad;
9016 }
9017 ip = mtod(m, struct ip *);
9018 if (ip == NULL) {
9019 goto bad;
9020 }
9021 }
9022
9023 if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
9024 sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
9025 } else {
9026 if (hlen == sizeof(struct ip)) {
9027 sum = in_cksum_hdr(ip);
9028 } else {
9029 sum = in_cksum(m, hlen);
9030 }
9031 }
9032 if (sum) {
9033 ipstat.ips_badsum++;
9034 goto bad;
9035 }
9036
9037 /* Retrieve the packet length. */
9038 len = ntohs(ip->ip_len);
9039
9040 /*
9041 * Check for additional length bogosity
9042 */
9043 if (len < hlen) {
9044 ipstat.ips_badlen++;
9045 goto bad;
9046 }
9047
9048 /*
9049 * Check that the amount of data in the buffers
9050 * is as at least much as the IP header would have us expect.
9051 * Drop packet if shorter than we expect.
9052 */
9053 if (m->m_pkthdr.len < len) {
9054 ipstat.ips_tooshort++;
9055 goto bad;
9056 }
9057
9058 /* Checks out, proceed */
9059 *mp = m;
9060 return 0;
9061
9062 bad:
9063 *mp = m;
9064 return -1;
9065 }
9066
9067 /*
9068 * Same as above, but for IPv6.
9069 * Cut-and-pasted from ip6_input.c.
9070 * XXX Should we update ip6stat, or not?
9071 */
9072 static int
9073 bridge_ip6_checkbasic(struct mbuf **mp)
9074 {
9075 struct mbuf *m = *mp;
9076 struct ip6_hdr *ip6;
9077
9078 /*
9079 * If the IPv6 header is not aligned, slurp it up into a new
9080 * mbuf with space for link headers, in the event we forward
9081 * it. Otherwise, if it is aligned, make sure the entire base
9082 * IPv6 header is in the first mbuf of the chain.
9083 */
9084 if (IP6_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
9085 struct ifnet *inifp = m->m_pkthdr.rcvif;
9086 /* max_linkhdr is already rounded up to nearest 4-byte */
9087 if ((m = m_copyup(m, sizeof(struct ip6_hdr),
9088 max_linkhdr)) == NULL) {
9089 /* XXXJRT new stat, please */
9090 ip6stat.ip6s_toosmall++;
9091 in6_ifstat_inc(inifp, ifs6_in_hdrerr);
9092 goto bad;
9093 }
9094 } else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip6_hdr), 0)) {
9095 struct ifnet *inifp = m->m_pkthdr.rcvif;
9096 if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
9097 ip6stat.ip6s_toosmall++;
9098 in6_ifstat_inc(inifp, ifs6_in_hdrerr);
9099 goto bad;
9100 }
9101 }
9102
9103 ip6 = mtod(m, struct ip6_hdr *);
9104
9105 if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
9106 ip6stat.ip6s_badvers++;
9107 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
9108 goto bad;
9109 }
9110
9111 /* Checks out, proceed */
9112 *mp = m;
9113 return 0;
9114
9115 bad:
9116 *mp = m;
9117 return -1;
9118 }
9119
9120 /*
9121 * the PF routines expect to be called from ip_input, so we
9122 * need to do and undo here some of the same processing.
9123 *
9124 * XXX : this is heavily inspired on bridge_pfil()
9125 */
9126 static int
9127 bridge_pf(struct mbuf **mp, struct ifnet *ifp, uint32_t sc_filter_flags,
9128 bool input)
9129 {
9130 /*
9131 * XXX : mpetit : heavily inspired by bridge_pfil()
9132 */
9133
9134 int snap, error, i, hlen;
9135 struct ether_header *eh1, eh2;
9136 struct ip *ip;
9137 struct llc llc1;
9138 u_int16_t ether_type;
9139
9140 snap = 0;
9141 error = -1; /* Default error if not error == 0 */
9142
9143 if ((sc_filter_flags & IFBF_FILT_MEMBER) == 0) {
9144 return 0; /* filtering is disabled */
9145 }
9146 i = min((*mp)->m_pkthdr.len, max_protohdr);
9147 if ((*mp)->m_len < i) {
9148 *mp = m_pullup(*mp, i);
9149 if (*mp == NULL) {
9150 BRIDGE_LOG(LOG_NOTICE, 0, "m_pullup failed");
9151 return -1;
9152 }
9153 }
9154
9155 eh1 = mtod(*mp, struct ether_header *);
9156 ether_type = ntohs(eh1->ether_type);
9157
9158 /*
9159 * Check for SNAP/LLC.
9160 */
9161 if (ether_type < ETHERMTU) {
9162 struct llc *llc2 = (struct llc *)(eh1 + 1);
9163
9164 if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
9165 llc2->llc_dsap == LLC_SNAP_LSAP &&
9166 llc2->llc_ssap == LLC_SNAP_LSAP &&
9167 llc2->llc_control == LLC_UI) {
9168 ether_type = htons(llc2->llc_un.type_snap.ether_type);
9169 snap = 1;
9170 }
9171 }
9172
9173 /*
9174 * If we're trying to filter bridge traffic, don't look at anything
9175 * other than IP and ARP traffic. If the filter doesn't understand
9176 * IPv6, don't allow IPv6 through the bridge either. This is lame
9177 * since if we really wanted, say, an AppleTalk filter, we are hosed,
9178 * but of course we don't have an AppleTalk filter to begin with.
9179 * (Note that since pfil doesn't understand ARP it will pass *ALL*
9180 * ARP traffic.)
9181 */
9182 switch (ether_type) {
9183 case ETHERTYPE_ARP:
9184 case ETHERTYPE_REVARP:
9185 return 0; /* Automatically pass */
9186
9187 case ETHERTYPE_IP:
9188 case ETHERTYPE_IPV6:
9189 break;
9190 default:
9191 /*
9192 * Check to see if the user wants to pass non-ip
9193 * packets, these will not be checked by pf and
9194 * passed unconditionally so the default is to drop.
9195 */
9196 if ((sc_filter_flags & IFBF_FILT_ONLYIP)) {
9197 goto bad;
9198 }
9199 break;
9200 }
9201
9202 /* Strip off the Ethernet header and keep a copy. */
9203 m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t)&eh2);
9204 m_adj(*mp, ETHER_HDR_LEN);
9205
9206 /* Strip off snap header, if present */
9207 if (snap) {
9208 m_copydata(*mp, 0, sizeof(struct llc), (caddr_t)&llc1);
9209 m_adj(*mp, sizeof(struct llc));
9210 }
9211
9212 /*
9213 * Check the IP header for alignment and errors
9214 */
9215 switch (ether_type) {
9216 case ETHERTYPE_IP:
9217 error = bridge_ip_checkbasic(mp);
9218 break;
9219 case ETHERTYPE_IPV6:
9220 error = bridge_ip6_checkbasic(mp);
9221 break;
9222 default:
9223 error = 0;
9224 break;
9225 }
9226 if (error) {
9227 goto bad;
9228 }
9229
9230 error = 0;
9231
9232 /*
9233 * Run the packet through pf rules
9234 */
9235 switch (ether_type) {
9236 case ETHERTYPE_IP:
9237 /*
9238 * before calling the firewall, swap fields the same as
9239 * IP does. here we assume the header is contiguous
9240 */
9241 ip = mtod(*mp, struct ip *);
9242
9243 ip->ip_len = ntohs(ip->ip_len);
9244 ip->ip_off = ntohs(ip->ip_off);
9245
9246 if (ifp != NULL) {
9247 error = pf_af_hook(ifp, 0, mp, AF_INET, input, NULL);
9248 }
9249
9250 if (*mp == NULL || error != 0) { /* filter may consume */
9251 break;
9252 }
9253
9254 /* Recalculate the ip checksum and restore byte ordering */
9255 ip = mtod(*mp, struct ip *);
9256 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
9257 if (hlen < (int)sizeof(struct ip)) {
9258 goto bad;
9259 }
9260 if (hlen > (*mp)->m_len) {
9261 if ((*mp = m_pullup(*mp, hlen)) == 0) {
9262 goto bad;
9263 }
9264 ip = mtod(*mp, struct ip *);
9265 if (ip == NULL) {
9266 goto bad;
9267 }
9268 }
9269 ip->ip_len = htons(ip->ip_len);
9270 ip->ip_off = htons(ip->ip_off);
9271 ip->ip_sum = 0;
9272 if (hlen == sizeof(struct ip)) {
9273 ip->ip_sum = in_cksum_hdr(ip);
9274 } else {
9275 ip->ip_sum = in_cksum(*mp, hlen);
9276 }
9277 break;
9278
9279 case ETHERTYPE_IPV6:
9280 if (ifp != NULL) {
9281 error = pf_af_hook(ifp, 0, mp, AF_INET6, input, NULL);
9282 }
9283
9284 if (*mp == NULL || error != 0) { /* filter may consume */
9285 break;
9286 }
9287 break;
9288 default:
9289 error = 0;
9290 break;
9291 }
9292
9293 if (*mp == NULL) {
9294 return error;
9295 }
9296 if (error != 0) {
9297 goto bad;
9298 }
9299
9300 error = -1;
9301
9302 /*
9303 * Finally, put everything back the way it was and return
9304 */
9305 if (snap) {
9306 M_PREPEND(*mp, sizeof(struct llc), M_DONTWAIT, 0);
9307 if (*mp == NULL) {
9308 return error;
9309 }
9310 bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
9311 }
9312
9313 M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT, 0);
9314 if (*mp == NULL) {
9315 return error;
9316 }
9317 bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
9318
9319 return 0;
9320
9321 bad:
9322 m_drop(*mp, DROPTAP_FLAG_DIR_IN, DROP_REASON_BRIDGE_PF, NULL, 0);
9323 *mp = NULL;
9324 return error;
9325 }
9326
9327 #if BRIDGESTP
9328 static void
9329 bridge_bstp_input_list(struct bstp_port *bp, struct mbuf *head)
9330 {
9331 mbuf_t next_packet = NULL;
9332
9333 for (mbuf_t scan = head; scan != NULL; scan = next_packet) {
9334 next_packet = scan->m_nextpkt;
9335 scan->m_nextpkt = NULL;
9336 bstp_input(bp, scan);
9337 }
9338 }
9339 #endif /* BRIDGESTP */
9340
9341 static mblist
9342 bridge_filter_arp_list(struct bridge_iflist * bif, mbuf_t m)
9343 {
9344 mbuf_t next_packet = NULL;
9345 mblist ret;
9346
9347 mblist_init(&ret);
9348 for (mbuf_ref_t scan = m; scan != NULL; scan = next_packet) {
9349 errno_t error;
9350
9351 /* take packet out of the list */
9352 next_packet = scan->m_nextpkt;
9353 scan->m_nextpkt = NULL;
9354 /* filter the ARP packet */
9355 error = bridge_host_filter_arp(bif, &scan);
9356 if (error != 0 && scan != NULL) {
9357 if (BRIDGE_DBGF_ENABLED(BR_DBGF_HOSTFILTER)) {
9358 brlog_mbuf_data(scan, 0,
9359 sizeof(struct ether_header) +
9360 sizeof(struct ip));
9361 }
9362 m_drop(scan, DROPTAP_FLAG_DIR_IN,
9363 DROP_REASON_BRIDGE_HOST_FILTER, NULL, 0);
9364 scan = NULL;
9365 }
9366 if (scan != NULL) {
9367 /* add it to the list */
9368 mblist_append(&ret, scan);
9369 }
9370 }
9371 return ret;
9372 }
9373
9374 static mbuf_t
9375 bridge_filter_checksum(ifnet_t bridge_ifp, struct bridge_iflist * bif, mbuf_t m,
9376 bool is_ipv4, bool host_filter, bool checksum)
9377 {
9378 uint32_t dbgf = 0;
9379 errno_t error;
9380 ip_packet_info info;
9381 u_int mac_hlen = sizeof(struct ether_header);
9382 drop_reason_t drop_reason = DROP_REASON_BRIDGE_UNSPECIFIED;
9383
9384 if (host_filter) {
9385 dbgf |= BR_DBGF_HOSTFILTER;
9386 }
9387 if (checksum) {
9388 dbgf |= BR_DBGF_CHECKSUM;
9389 }
9390 /* get the IP protocol header */
9391 error = bridge_get_ip_proto(&m, mac_hlen, is_ipv4, &info,
9392 &bif->bif_stats.brms_in_ip);
9393 if (error != 0) {
9394 BRIDGE_LOG(LOG_NOTICE, dbgf,
9395 "%s(%s) bridge_get_ip_proto failed %d",
9396 bridge_ifp->if_xname,
9397 bif->bif_ifp->if_xname, error);
9398 drop_reason = DROP_REASON_BRIDGE_NO_PROTO;
9399 goto drop;
9400 }
9401 if (host_filter) {
9402 bool drop = true;
9403
9404 /* restrict IP protocols */
9405 switch (info.ip_proto) {
9406 case IPPROTO_ICMP:
9407 case IPPROTO_IGMP:
9408 drop = !is_ipv4;
9409 break;
9410 case IPPROTO_TCP:
9411 case IPPROTO_UDP:
9412 drop = false;
9413 break;
9414 case IPPROTO_ICMPV6:
9415 drop = is_ipv4;
9416 break;
9417 default:
9418 break;
9419 }
9420 if (drop) {
9421 BRIDGE_HF_DROP(brhf_ip_bad_proto, __func__, __LINE__);
9422 drop_reason = DROP_REASON_BRIDGE_BAD_PROTO;
9423 goto drop;
9424 }
9425 bridge_hostfilter_stats.brhf_ip_ok += 1;
9426 }
9427 if (checksum) {
9428 /* need to compute IP/UDP/TCP/checksums */
9429 error = bridge_offload_checksum(&m, &info, &bif->bif_stats);
9430 if (error != 0) {
9431 BRIDGE_LOG(LOG_NOTICE, dbgf,
9432 "%s(%s) bridge_offload_checksum failed %d",
9433 bridge_ifp->if_xname,
9434 bif->bif_ifp->if_xname, error);
9435 drop_reason = DROP_REASON_BRIDGE_CHECKSUM;
9436 goto drop;
9437 }
9438 }
9439 return m;
9440
9441 drop:
9442 /* toss the packet */
9443 if (m != NULL) {
9444 if (host_filter &&
9445 BRIDGE_DBGF_ENABLED(BR_DBGF_HOSTFILTER)) {
9446 brlog_mbuf_data(m, 0,
9447 sizeof(struct ether_header) +
9448 sizeof(struct ip));
9449 }
9450 m_drop(m, DROPTAP_FLAG_DIR_IN, drop_reason, NULL, 0);
9451 m = NULL;
9452 }
9453 return NULL;
9454 }
9455
9456 static mblist
9457 bridge_filter_checksum_list(ifnet_t bridge_ifp, struct bridge_iflist * bif,
9458 mbuf_t in_list, ether_type_flag_t etypef, bool host_filter, bool checksum)
9459 {
9460 bool is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
9461 mbuf_t next_packet = NULL;
9462 mblist ret;
9463
9464 mblist_init(&ret);
9465 for (mbuf_t scan = in_list; scan != NULL; scan = next_packet) {
9466 /* take packet out of the list */
9467 next_packet = scan->m_nextpkt;
9468 scan->m_nextpkt = NULL;
9469 scan = bridge_filter_checksum(bridge_ifp, bif,
9470 scan, is_ipv4, host_filter, checksum);
9471 if (scan != NULL) {
9472 /* add packet to the list */
9473 mblist_append(&ret, scan);
9474 }
9475 }
9476 return ret;
9477 }
9478
9479 static mbuf_t
9480 bridge_checksum_offload_list(ifnet_t bridge_ifp, struct bridge_iflist * bif,
9481 mbuf_t m, bool is_ipv4)
9482 {
9483 mblist ret;
9484 mbuf_t next_packet;
9485
9486 mblist_init(&ret);
9487 for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
9488 uint32_t csum_flags;
9489
9490 /* take it out of the list */
9491 next_packet = scan->m_nextpkt;
9492 scan->m_nextpkt = NULL;
9493
9494 csum_flags = scan->m_pkthdr.csum_flags;
9495 if ((csum_flags & checksum_request_flags) != 0) {
9496 /* compute the checksum now */
9497 scan = bridge_filter_checksum(bridge_ifp, bif, scan,
9498 is_ipv4, false, true);
9499 if (scan != NULL) {
9500 /* clear offload now */
9501 scan->m_pkthdr.csum_flags &= csum_flags;
9502 }
9503 }
9504 if (scan != NULL) {
9505 mblist_append(&ret, scan);
9506 }
9507 }
9508 return ret.head;
9509 }
9510
9511 static mbuf_t
9512 copy_broadcast_packet(mbuf_t m)
9513 {
9514 mbuf_t mc;
9515
9516 /* make a copy of the packet */
9517 mc = m_dup(m, M_DONTWAIT);
9518 if (mc != NULL) {
9519 struct ether_header *eh;
9520
9521 /* make copy look like it is broadcast */
9522 mc->m_flags |= M_BCAST;
9523 eh = mtod(mc, struct ether_header *);
9524 bcopy(etherbroadcastaddr, eh->ether_dhost, ETHER_ADDR_LEN);
9525 }
9526 return mc;
9527 }
9528
9529 static mblist
9530 bridge_find_broadcast_ipv4(mbuf_t in_list, mbuf_t * ip_bcast_head)
9531 {
9532 mblist ip_bcast;
9533 mbuf_t next_packet = NULL;
9534 mblist ret;
9535
9536 mblist_init(&ret);
9537 mblist_init(&ip_bcast);
9538 for (mbuf_ref_t scan = in_list; scan != NULL; scan = next_packet) {
9539 mbuf_t bcast_pkt = NULL;
9540 uint8_t *header;
9541
9542 /* take packet out of the list */
9543 next_packet = scan->m_nextpkt;
9544 scan->m_nextpkt = NULL;
9545
9546 header = get_ether_ip_header_ptr(&scan, FALSE);
9547 if (header != NULL) {
9548 struct in_addr dst;
9549 struct ip *iphdr;
9550
9551 iphdr = (struct ip *)(header + sizeof(struct ether_header));
9552 bcopy(&iphdr->ip_dst, &dst, sizeof(dst));
9553 if (dst.s_addr == INADDR_BROADCAST) {
9554 bcast_pkt = copy_broadcast_packet(scan);
9555 }
9556 }
9557 if (bcast_pkt != NULL) {
9558 /* add packet to broadcast list */
9559 mblist_append(&ip_bcast, bcast_pkt);
9560 }
9561 if (scan != NULL) {
9562 /* add packet back into the list */
9563 mblist_append(&ret, scan);
9564 }
9565 }
9566 *ip_bcast_head = ip_bcast.head;
9567 return ret;
9568 }
9569
9570 static ifnet_t
9571 bridge_find_member(struct bridge_softc * sc, uint8_t * lladdr,
9572 struct bridge_iflist * sbif)
9573 {
9574 struct bridge_iflist * bif;
9575
9576 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
9577 if (bif == sbif) {
9578 /* skip the input member */
9579 continue;
9580 }
9581 if (_ether_cmp(IF_LLADDR(bif->bif_ifp), lladdr) == 0) {
9582 return bif->bif_ifp;
9583 }
9584 }
9585 return NULL;
9586 }
9587
9588
9589 /*
9590 * Function: bridge_input_list
9591 *
9592 * Purpose:
9593 * Process a list of input packets through the bridge.
9594 * The caller ensures that all of the packets in the list
9595 * `list_head` .. `list_tail` have the same ethernet header.
9596 *
9597 * Returns:
9598 * Non-NULL head of the chain of packets that were not consumed/freed,
9599 * *tail_p set to the tail of that chain.
9600 *
9601 * NULL if all of the packets were consumed.
9602 */
9603 static mblist
9604 bridge_input_list(struct bridge_softc * sc, ifnet_t ifp,
9605 struct ether_header * eh_in_p, mblist list, bool is_promisc)
9606 {
9607 struct bridge_iflist * bif;
9608 ifnet_t bridge_ifp;
9609 bool bridge_needs_input;
9610 bool checksum_offload;
9611 uint8_t * dhost;
9612 #if BRIDGESTP
9613 bool discarding = false;
9614 #endif /* BRIDGESTP */
9615 ifnet_t dst_if = NULL;
9616 errno_t error;
9617 ether_type_flag_t etypef;
9618 bool host_filter;
9619 bool host_filter_drop = false;
9620 mbuf_ref_t ip_bcast = NULL;
9621 bool is_bridge_mac = false;
9622 bool is_broadcast;
9623 bool is_ifp_mac;
9624 ifnet_t member_input = NULL;
9625 uint8_t * shost;
9626 bool uses_virtio = false;
9627 uint16_t vlan;
9628
9629 if (ifp->if_bridge == NULL) {
9630 /* no longer part of bridge */
9631 goto done;
9632 }
9633 bridge_ifp = sc->sc_ifp;
9634 is_broadcast = IS_BCAST_MCAST(list.head);
9635 is_ifp_mac = (!is_broadcast && !is_promisc);
9636 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9637 "%s from %s count %d head 0x%llx.0x%llx tail 0x%llx.0x%llx",
9638 bridge_ifp->if_xname, ifp->if_xname, list.count,
9639 (uint64_t)VM_KERNEL_ADDRPERM(list.head),
9640 (uint64_t)VM_KERNEL_ADDRPERM(mtod(list.head, void *)),
9641 (uint64_t)VM_KERNEL_ADDRPERM(list.tail),
9642 (uint64_t)VM_KERNEL_ADDRPERM(mtod(list.tail, void *)));
9643
9644 /* assume we'll return all packets */
9645 if ((bridge_ifp->if_flags & IFF_RUNNING) == 0) {
9646 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9647 "%s not running passing along",
9648 bridge_ifp->if_xname);
9649 goto done;
9650 }
9651
9652 vlan = VLANTAGOF(m);
9653
9654 /* lookup the bridge member */
9655 BRIDGE_LOCK(sc);
9656 bif = bridge_lookup_member_if(sc, ifp);
9657 if (bif == NULL) {
9658 BRIDGE_UNLOCK(sc);
9659 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9660 "%s bridge_lookup_member_if failed",
9661 bridge_ifp->if_xname);
9662 goto done;
9663 }
9664
9665 uses_virtio = bif_uses_virtio(bif);
9666
9667 /*
9668 * host filter drops packets that:
9669 * - are not ARP, IPv4, or IPv6
9670 * - have incorrect source MAC address
9671 */
9672 host_filter = (bif->bif_flags & BIFF_HOST_FILTER) != 0;
9673 etypef = ether_type_flag_get(eh_in_p->ether_type);
9674 if (host_filter
9675 && (etypef & ETHER_TYPE_FLAG_IP_ARP) == 0) {
9676 /* ether type not one of ARP, IPv4, or IPv6 */
9677 BRIDGE_HF_DROP(brhf_bad_ether_type, __func__, __LINE__);
9678 host_filter_drop = true;
9679 } else if ((bif->bif_flags & BIFF_HF_HWSRC) != 0 &&
9680 bcmp(eh_in_p->ether_shost, bif->bif_hf_hwsrc, ETHER_ADDR_LEN)
9681 != 0) {
9682 /* only allow the single source MAC address */
9683 BRIDGE_HF_DROP(brhf_bad_ether_srchw_addr,
9684 __func__, __LINE__);
9685 host_filter_drop = true;
9686 }
9687 if (host_filter_drop) {
9688 BRIDGE_UNLOCK(sc);
9689 m_drop_list(list.head, bridge_ifp, DROPTAP_FLAG_DIR_IN,
9690 DROP_REASON_BRIDGE_HOST_FILTER, NULL, 0);
9691 list.head = list.tail = NULL;
9692 goto done;
9693 }
9694
9695 #if BRIDGESTP
9696 discarding = (bif->bif_ifflags & IFBIF_STP) != 0 &&
9697 bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING;
9698 #endif /* BRIDGESTP */
9699
9700 dhost = eh_in_p->ether_dhost;
9701 shost = eh_in_p->ether_shost;
9702 /*
9703 * Reserved multicast address listed in 802.1D section 7.12.6
9704 * must not be forwarded by the bridge.
9705 * This is currently 01-80-C2-00-00-00 to 01-80-C2-00-00-0F
9706 */
9707 if (is_broadcast) {
9708 if (IS_MCAST(list.head)) {
9709 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MCAST,
9710 " multicast: "
9711 "%02x:%02x:%02x:%02x:%02x:%02x",
9712 dhost[0], dhost[1],
9713 dhost[2], dhost[3],
9714 dhost[4], dhost[5]);
9715 }
9716 if (bcmp(dhost, bstp_etheraddr, (ETHER_ADDR_LEN - 1)) == 0) {
9717 if (dhost[5] == BSTP_ETHERADDR_RANGE_FIRST) {
9718 /* multicast for spanning tree */
9719 #if BRIDGESTP
9720 bridge_bstp_input_list(&bif->bif_stp, list.head);
9721 #else /* BRIDGESTP */
9722 m_freem_list(list.head);
9723 #endif /* BRIDGESTP */
9724 list.head = list.tail = NULL;
9725 BRIDGE_UNLOCK(sc);
9726 goto done;
9727 }
9728 if (dhost[5] <= BSTP_ETHERADDR_RANGE_LAST) {
9729 /* allow packet to continue up the stack */
9730 BRIDGE_UNLOCK(sc);
9731 goto done;
9732 }
9733 }
9734 /* broadcast to all members */
9735 os_atomic_add(&bridge_ifp->if_imcasts, list.count, relaxed);
9736 }
9737
9738 #if BRIDGESTP
9739 if (discarding) {
9740 BRIDGE_UNLOCK(sc);
9741 goto done;
9742 }
9743 #endif /* BRIDGESTP */
9744
9745 /* If the interface is learning, record the address. */
9746 if ((bif->bif_ifflags & IFBIF_LEARNING) != 0) {
9747 error = bridge_rtupdate(sc, shost, vlan, bif, 0, IFBAF_DYNAMIC);
9748 /*
9749 * If the interface has addresses limits then deny any source
9750 * that is not in the cache.
9751 */
9752 if (error != 0 && bif->bif_addrmax) {
9753 BRIDGE_UNLOCK(sc);
9754 goto done;
9755 }
9756 }
9757 #if BRIDGESTP
9758 if ((bif->bif_ifflags & IFBIF_STP) != 0 &&
9759 bif->bif_stp.bp_state == BSTP_IFSTATE_LEARNING) {
9760 BRIDGE_UNLOCK(sc);
9761 goto done;
9762 }
9763 #endif /* BRIDGESTP */
9764
9765 /*
9766 * If the packet is not IP, let the host filter drop ARP packets.
9767 * Otherwise, if the host filter is enabled or we need to compute
9768 * checksums, do that.
9769 * Otherwise, if MAC-NAT is enabled and this is an IPv4 packet,
9770 * check for IPv4 broadcast packets. Accumulate those in a separate
9771 * list `ip_bcast`.
9772 */
9773 checksum_offload = bif_has_checksum_offload(bif);
9774 if (!ether_type_flag_is_ip(etypef)) {
9775 /* host filter process ARP */
9776 if (host_filter) {
9777 /* host filter check earlier means this must be ARP */
9778 VERIFY(etypef == ETHER_TYPE_FLAG_ARP);
9779 list = bridge_filter_arp_list(bif, list.head);
9780 if (list.head == NULL) {
9781 VERIFY(list.tail == NULL);
9782 BRIDGE_UNLOCK(sc);
9783 goto done;
9784 }
9785 }
9786 } else if (host_filter || checksum_offload) {
9787 /* host filter and/or checksum */
9788 list = bridge_filter_checksum_list(bridge_ifp, bif,
9789 list.head, etypef, host_filter, checksum_offload);
9790 if (list.head == NULL) {
9791 VERIFY(list.tail == NULL);
9792 BRIDGE_UNLOCK(sc);
9793 goto done;
9794 }
9795 } else if (is_ifp_mac && bif == sc->sc_mac_nat_bif &&
9796 etypef == ETHER_TYPE_FLAG_IPV4) {
9797 /* look for broadcast IPv4 packet */
9798 list = bridge_find_broadcast_ipv4(list.head, &ip_bcast);
9799 if (list.head == NULL && ip_bcast == NULL) {
9800 /* all packets were consumed */
9801 BRIDGE_UNLOCK(sc);
9802 goto done;
9803 }
9804 }
9805
9806 /*
9807 * If the bridge has ULP attached, and the destination MAC
9808 * matches the bridge interface, claim the packets for the bridge
9809 * interface.
9810 */
9811 bridge_needs_input = (sc->sc_flags & SCF_PROTO_ATTACHED) != 0;
9812 if (bridge_needs_input &&
9813 !is_broadcast && _ether_cmp(dhost, IF_LLADDR(bridge_ifp)) == 0) {
9814 is_bridge_mac = true;
9815 }
9816 if (is_ifp_mac) {
9817 /* unicast to the interface */
9818 if (sc->sc_mac_nat_bif == bif) {
9819 mbuf_ref_t forward = NULL;
9820
9821 if (list.head != NULL) {
9822 /* handle MAC-NAT if enabled */
9823 list = bridge_mac_nat_input_list(sc, ifp,
9824 list.head, &forward);
9825 }
9826 if (ip_bcast != NULL) {
9827 /* forward to all members except this one */
9828 /* bridge_broadcast_list unlocks */
9829 bridge_broadcast_list(sc, bif, etypef,
9830 ip_bcast, pkt_direction_RX);
9831 } else {
9832 BRIDGE_UNLOCK(sc);
9833 }
9834 if (forward != NULL) {
9835 bridge_mac_nat_forward_list(bridge_ifp, etypef,
9836 forward);
9837 }
9838 } else {
9839 BRIDGE_UNLOCK(sc);
9840 }
9841 /* unicast packets for this interface do not get forwarded */
9842 goto done;
9843 }
9844 if (is_bridge_mac || list.head == NULL) {
9845 BRIDGE_UNLOCK(sc);
9846 goto done;
9847 }
9848 if (!is_broadcast) {
9849 /* find where to send the packet */
9850 dst_if = bridge_rtlookup(sc, dhost, vlan);
9851 if (ifp == dst_if) {
9852 /* nothing to forward */
9853 BRIDGE_UNLOCK(sc);
9854 goto done;
9855 }
9856 if (dst_if == NULL) {
9857 /* if a member is the dhost, deliver as input */
9858 member_input = bridge_find_member(sc, dhost, bif);
9859 if (member_input != NULL) {
9860 /* grab packets destined to member */
9861 BRIDGE_UNLOCK(sc);
9862 goto done;
9863 }
9864 /* if a member is shost, there's a loop, drop it */
9865 if (bridge_find_member(sc, shost, bif) != NULL) {
9866 BRIDGE_UNLOCK(sc);
9867 m_drop_list(list.head, bridge_ifp, DROPTAP_FLAG_DIR_IN,
9868 DROP_REASON_BRIDGE_LOOP, NULL, 0);
9869 list.head = list.tail = NULL;
9870 goto done;
9871 }
9872 }
9873 }
9874 if (dst_if == NULL) {
9875 mbuf_t m;
9876
9877 m = copy_packet_list(list.head);
9878 if (m != NULL) {
9879 /* bridge_broadcast_list unlocks */
9880 bridge_broadcast_list(sc, bif, etypef, m,
9881 pkt_direction_RX);
9882 } else {
9883 BRIDGE_UNLOCK(sc);
9884 }
9885 } else {
9886 /* bridge_forward_list() consumes list and unlocks */
9887 bridge_forward_list(sc, bif, dst_if, etypef, list.head);
9888 list.head = list.tail = NULL;
9889 }
9890
9891 done:
9892 if (list.head != NULL) {
9893 if (member_input != NULL) {
9894 /* member gets the packets */
9895 inject_input_packet_list(member_input, list.head, true);
9896 list.head = list.tail = NULL;
9897 } else if (is_bridge_mac) {
9898 /* bridge consumes all the unicast packets */
9899 bridge_interface_input_list(bridge_ifp, etypef, list,
9900 uses_virtio);
9901 list.head = list.tail = NULL;
9902 } else {
9903 adjust_input_packet_list(list.head);
9904 }
9905 }
9906 return list;
9907 }
9908
9909 static inline void
9910 update_mbuf_flags(struct ifnet * ifp, mbuf_t m, struct ether_header * eh)
9911 {
9912 /* duplicate some of the work done in ether_demux */
9913 if ((eh->ether_dhost[0] & 1) == 0) {
9914 if (_ether_cmp(eh->ether_dhost, IF_LLADDR(ifp)) != 0) {
9915 m->m_flags |= M_PROMISC;
9916 }
9917 } else {
9918 /* Check for broadcast */
9919 if (_ether_cmp(etherbroadcastaddr, eh->ether_dhost) == 0) {
9920 m->m_flags |= M_BCAST;
9921 } else {
9922 m->m_flags |= M_MCAST;
9923 }
9924 }
9925 if (m->m_flags & M_HASFCS) {
9926 /*
9927 * If the M_HASFCS is set by the driver we want to make sure
9928 * that we strip off the trailing FCS data before handing it
9929 * up the stack.
9930 */
9931 m_adj(m, -ETHER_CRC_LEN);
9932 m->m_flags &= ~M_HASFCS;
9933 }
9934 return;
9935 }
9936
9937 static mbuf_t
9938 bridge_pf_list_out(mbuf_t m, ifnet_t ifp, uint32_t sc_filter_flags)
9939 {
9940 mbuf_t next_packet = NULL;
9941 mblist ret;
9942
9943 mblist_init(&ret);
9944 for (mbuf_ref_t scan = m; scan != NULL; scan = next_packet) {
9945 next_packet = scan->m_nextpkt;
9946
9947 /* remove packet from list, and pass through PF */
9948 scan->m_nextpkt = NULL;
9949 bridge_pf(&scan, ifp, sc_filter_flags, false);
9950 if (scan != NULL) {
9951 /* add packet back to the list */
9952 mblist_append(&ret, scan);
9953 }
9954 }
9955 return ret.head;
9956 }
9957
9958 static inline bool
9959 bridge_check_frame_header(struct bridge_softc * sc, ifnet_t ifp, mbuf_t m)
9960 {
9961 bool included = false;
9962 char * __single header;
9963 size_t header_length = 0;
9964
9965 header = m->m_pkthdr.pkt_hdr;
9966 if (header >= (char *)mbuf_datastart(m) &&
9967 header <= mtod(m, char *)) {
9968 header_length = mtod(m, char *) - header;
9969 if (header_length >= ETHER_HDR_LEN) {
9970 included = true;
9971 }
9972 }
9973 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9974 "%s from %s m 0x%llx data 0x%llx frame 0x%llx %s "
9975 "header length %lu", sc->sc_ifp->if_xname,
9976 ifp->if_xname, (uint64_t)VM_KERNEL_ADDRPERM(m),
9977 (uint64_t)VM_KERNEL_ADDRPERM(mtod(m, void *)),
9978 (uint64_t)VM_KERNEL_ADDRPERM(header),
9979 included ? "inside" : "outside", header_length);
9980 if (!included) {
9981 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9982 "%s: frame_header outside mbuf", ifp->if_xname);
9983 }
9984 return included;
9985 }
9986
9987
9988 mbuf_t
9989 bridge_early_input(struct ifnet *ifp, mbuf_t in_list, u_int32_t cnt)
9990 {
9991 struct ether_header eh;
9992 mblist list;
9993 volatile bool list_is_promisc;
9994 int n_lists = 0;
9995 bool need_pf;
9996 mbuf_t next_packet = NULL;
9997 mblist ret;
9998 struct bridge_softc * __single sc = ifp->if_bridge;
9999 uint32_t sc_filter_flags;
10000
10001 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT_LIST,
10002 "(%s): count %u", ifp->if_xname, cnt);
10003
10004 sc_filter_flags = sc->sc_filter_flags;
10005 need_pf = (sc_filter_flags & IFBF_FILT_MEMBER) != 0 && PF_IS_ENABLED;
10006
10007 /* form sublists with the same ethernet header */
10008 mblist_init(&list);
10009 mblist_init(&ret);
10010 for (mbuf_ref_t scan = in_list; scan != NULL; scan = next_packet) {
10011 struct ether_header * eh_p;
10012 volatile bool is_promisc;
10013 mblist resid;
10014
10015 /* take it out of the list */
10016 next_packet = scan->m_nextpkt;
10017 scan->m_nextpkt = NULL;
10018
10019 /* don't loop the packet */
10020 if ((scan->m_flags & M_PROTO1) != 0) {
10021 mblist_append(&ret, scan);
10022 continue;
10023 }
10024 /* Check if this mbuf looks valid */
10025 MBUF_INPUT_CHECK(scan, ifp);
10026
10027 /* if the frame header isn't in the first mbuf, ignore */
10028 if (!bridge_check_frame_header(sc, ifp, scan)) {
10029 mblist_append(&ret, scan);
10030 continue;
10031 }
10032 /* set start back to include ether header */
10033 _mbuf_adjust_pkthdr_and_data(scan, -ETHER_HDR_LEN);
10034 eh_p = mtod(scan, struct ether_header *);
10035 update_mbuf_flags(ifp, scan, eh_p);
10036
10037 /* pass through PF if required */
10038 if (need_pf) {
10039 bridge_pf(&scan, ifp, sc_filter_flags, true);
10040 if (scan == NULL) {
10041 continue;
10042 }
10043 /* `eh_p` could have changed */
10044 eh_p = mtod(scan, struct ether_header *);
10045 }
10046
10047 is_promisc = get_and_clear_promisc(scan);
10048 if (list.head == NULL) {
10049 /* start a new list */
10050 mblist_append(&list, scan);
10051 bcopy(eh_p, &eh, sizeof(eh));
10052 list_is_promisc = is_promisc;
10053 } else if (bcmp(eh_p, &eh, sizeof(eh)) != 0) {
10054 n_lists++;
10055 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT_LIST,
10056 "(%s): sublist %u pkts %u",
10057 ifp->if_xname, n_lists, list.count);
10058 if (BRIDGE_DBGF_ENABLED(BR_DBGF_INPUT_LIST)) {
10059 brlog_ether_header(&eh);
10060 }
10061 resid = bridge_input_list(sc, ifp, &eh, list,
10062 list_is_promisc);
10063 if (resid.head != NULL) {
10064 /* add to the packets to be returned */
10065 mblist_append_list(&ret, resid);
10066 }
10067 /* start new list */
10068 mblist_init(&list);
10069 mblist_append(&list, scan);
10070 list_is_promisc = is_promisc;
10071 bcopy(eh_p, &eh, sizeof(eh));
10072 } else {
10073 mblist_append(&list, scan);
10074 VERIFY(is_promisc == list_is_promisc);
10075 }
10076 if (next_packet == NULL) {
10077 /* last list */
10078 n_lists++;
10079 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT_LIST,
10080 "(%s): sublist %u pkts %u",
10081 ifp->if_xname, n_lists, list.count);
10082 if (BRIDGE_DBGF_ENABLED(BR_DBGF_INPUT_LIST)) {
10083 brlog_ether_header(&eh);
10084 }
10085 resid = bridge_input_list(sc, ifp, &eh, list,
10086 list_is_promisc);
10087 if (resid.head != NULL) {
10088 /* add to the packets to be returned */
10089 mblist_append_list(&ret, resid);
10090 }
10091 }
10092 }
10093 return ret.head;
10094 }
10095
10096 /*
10097 * Copyright (C) 2014, Stefano Garzarella - Universita` di Pisa.
10098 * All rights reserved.
10099 *
10100 * Redistribution and use in source and binary forms, with or without
10101 * modification, are permitted provided that the following conditions
10102 * are met:
10103 * 1. Redistributions of source code must retain the above copyright
10104 * notice, this list of conditions and the following disclaimer.
10105 * 2. Redistributions in binary form must reproduce the above copyright
10106 * notice, this list of conditions and the following disclaimer in the
10107 * documentation and/or other materials provided with the distribution.
10108 *
10109 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
10110 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
10111 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
10112 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
10113 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
10114 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
10115 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
10116 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
10117 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
10118 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
10119 * SUCH DAMAGE.
10120 */
10121
10122 /*
10123 * XXX-ste: Maybe this function must be moved into kern/uipc_mbuf.c
10124 *
10125 * Create a queue of packets/segments which fit the given mss + hdr_len.
10126 * m0 points to mbuf chain to be segmented.
10127 * This function splits the payload (m0-> m_pkthdr.len - hdr_len)
10128 * into segments of length MSS bytes and then copy the first hdr_len bytes
10129 * from m0 at the top of each segment.
10130 * If hdr2_buf is not NULL (hdr2_len is the buf length), it is copied
10131 * in each segment after the first hdr_len bytes
10132 *
10133 * Return the new queue with the segments on success, NULL on failure.
10134 * (the mbuf queue is freed in this case).
10135 */
10136
10137 static mblist
10138 m_seg(struct mbuf *m0, int hdr_len, int mss, char * hdr2_buf __sized_by_or_null(hdr2_len), int hdr2_len)
10139 {
10140 int off = 0, n, firstlen;
10141 struct mbuf *mseg;
10142 int total_len = m0->m_pkthdr.len;
10143 mblist ret;
10144
10145 mblist_init(&ret);
10146 mblist_append(&ret, m0);
10147
10148 /*
10149 * Segmentation useless
10150 */
10151 if (total_len <= hdr_len + mss) {
10152 n = 1;
10153 goto done;
10154 }
10155 if (hdr2_buf == NULL || hdr2_len <= 0) {
10156 hdr2_buf = NULL;
10157 hdr2_len = 0;
10158 }
10159
10160 off = hdr_len + mss;
10161 firstlen = mss; /* first segment stored in the original mbuf */
10162 ret.bytes = off;
10163 for (n = 1; off < total_len; off += mss, n++) {
10164 struct mbuf *m;
10165 /*
10166 * Copy the header from the original packet
10167 * and create a new mbuf chain
10168 */
10169 if (MHLEN < hdr_len) {
10170 m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
10171 } else {
10172 m = m_gethdr(M_NOWAIT, MT_DATA);
10173 }
10174
10175 if (m == NULL) {
10176 #ifdef GSO_DEBUG
10177 D("MGETHDR error\n");
10178 #endif
10179 goto err;
10180 }
10181
10182 m_copydata(m0, 0, hdr_len, mtod(m, caddr_t));
10183
10184 m->m_len = hdr_len;
10185 /*
10186 * if the optional header is present, copy it
10187 */
10188 if (hdr2_buf != NULL) {
10189 m_copyback(m, hdr_len, hdr2_len, hdr2_buf);
10190 }
10191
10192 m->m_flags |= (m0->m_flags & M_COPYFLAGS);
10193 if (off + mss >= total_len) { /* last segment */
10194 mss = total_len - off;
10195 }
10196 /*
10197 * Copy the payload from original packet
10198 */
10199 mseg = m_copym(m0, off, mss, M_NOWAIT);
10200 if (mseg == NULL) {
10201 m_freem(m);
10202 #ifdef GSO_DEBUG
10203 D("m_copym error\n");
10204 #endif
10205 goto err;
10206 }
10207 m_cat(m, mseg);
10208
10209 m->m_pkthdr.len = hdr_len + hdr2_len + mss;
10210 m->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
10211 /*
10212 * Copy the checksum flags and data (in_cksum() need this)
10213 */
10214 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
10215 m->m_pkthdr.csum_data = m0->m_pkthdr.csum_data;
10216 m->m_pkthdr.tso_segsz = m0->m_pkthdr.tso_segsz;
10217
10218 mblist_append(&ret, m);
10219 }
10220
10221 /*
10222 * Update first segment.
10223 * If the optional header is present, is necessary
10224 * to insert it into the first segment.
10225 */
10226 if (hdr2_buf == NULL) {
10227 m_adj(m0, hdr_len + firstlen - total_len);
10228 m0->m_pkthdr.len = hdr_len + firstlen;
10229 } else {
10230 mseg = m_copym(m0, hdr_len, firstlen, M_NOWAIT);
10231 if (mseg == NULL) {
10232 #ifdef GSO_DEBUG
10233 D("m_copym error\n");
10234 #endif
10235 goto err;
10236 }
10237 m_adj(m0, hdr_len - total_len);
10238 m_copyback(m0, hdr_len, hdr2_len, hdr2_buf);
10239 m_cat(m0, mseg);
10240 m0->m_pkthdr.len = hdr_len + hdr2_len + firstlen;
10241 }
10242
10243 done:
10244 return ret;
10245
10246 err:
10247 if (ret.head != NULL) {
10248 m_freem_list(ret.head);
10249 mblist_init(&ret);
10250 }
10251 return ret;
10252 }
10253
10254 /*
10255 * Wrappers of IPv4 checksum functions
10256 */
10257 static inline void
10258 gso_ipv4_data_cksum(struct mbuf *m, struct ip *ip, int mac_hlen)
10259 {
10260 m->m_data += mac_hlen;
10261 m->m_len -= mac_hlen;
10262 m->m_pkthdr.len -= mac_hlen;
10263 #if __FreeBSD_version < 1000000
10264 ip->ip_len = ntohs(ip->ip_len); /* needed for in_delayed_cksum() */
10265 #endif
10266
10267 in_delayed_cksum(m);
10268
10269 #if __FreeBSD_version < 1000000
10270 ip->ip_len = htons(ip->ip_len);
10271 #endif
10272 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
10273 m->m_len += mac_hlen;
10274 m->m_pkthdr.len += mac_hlen;
10275 m->m_data -= mac_hlen;
10276 }
10277
10278 static inline void
10279 gso_ipv4_hdr_cksum(struct mbuf *m, struct ip *ip, int mac_hlen, int ip_hlen)
10280 {
10281 m->m_data += mac_hlen;
10282
10283 ip->ip_sum = in_cksum(m, ip_hlen);
10284
10285 m->m_pkthdr.csum_flags &= ~CSUM_IP;
10286 m->m_data -= mac_hlen;
10287 }
10288
10289 /*
10290 * Structure that contains the state during the TCP segmentation
10291 */
10292 struct gso_ip_tcp_state {
10293 void (*update)
10294 (struct gso_ip_tcp_state*, struct mbuf*);
10295 void (*internal)
10296 (struct gso_ip_tcp_state*, struct mbuf*);
10297 u_int ip_m0_len;
10298 uint8_t * __counted_by(ip_m0_len) hdr;
10299 struct tcphdr *tcp;
10300 int mac_hlen;
10301 int ip_hlen;
10302 int tcp_hlen;
10303 int hlen;
10304 int pay_len;
10305 int sw_csum;
10306 uint32_t tcp_seq;
10307 uint16_t ip_id;
10308 boolean_t is_tx;
10309 };
10310
10311 /*
10312 * Update the pointers to TCP and IPv4 headers
10313 */
10314 static inline void
10315 gso_ipv4_tcp_update(struct gso_ip_tcp_state *state, struct mbuf *m)
10316 {
10317 state->hdr = mtodo(m, state->mac_hlen);
10318 state->ip_m0_len = m->m_len - state->mac_hlen;
10319 state->ip_hlen = state->ip_hlen;
10320 state->tcp = (struct tcphdr *)(state->hdr + state->ip_hlen);
10321 state->pay_len = m->m_pkthdr.len - state->hlen;
10322 }
10323
10324 /*
10325 * Set properly the TCP and IPv4 headers
10326 */
10327 static inline void
10328 gso_ipv4_tcp_internal(struct gso_ip_tcp_state *state, struct mbuf *m)
10329 {
10330 struct ip *ip;
10331 /*
10332 * Update IP header
10333 */
10334 ip = (struct ip *)state->hdr;
10335 ip->ip_id = htons((state->ip_id)++);
10336 ip->ip_len = htons(m->m_pkthdr.len - state->mac_hlen);
10337 /*
10338 * TCP Checksum
10339 */
10340 state->tcp->th_sum = 0;
10341 state->tcp->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
10342 htons(state->tcp_hlen + IPPROTO_TCP + state->pay_len));
10343 /*
10344 * Checksum HW not supported (TCP)
10345 */
10346 if (state->sw_csum & CSUM_DELAY_DATA) {
10347 gso_ipv4_data_cksum(m, ip, state->mac_hlen);
10348 }
10349
10350 state->tcp_seq += state->pay_len;
10351 /*
10352 * IP Checksum
10353 */
10354 ip->ip_sum = 0;
10355 /*
10356 * Checksum HW not supported (IP)
10357 */
10358 if (state->sw_csum & CSUM_IP) {
10359 gso_ipv4_hdr_cksum(m, ip, state->mac_hlen, state->ip_hlen);
10360 }
10361 }
10362
10363
10364 /*
10365 * Updates the pointers to TCP and IPv6 headers
10366 */
10367 static inline void
10368 gso_ipv6_tcp_update(struct gso_ip_tcp_state *state, struct mbuf *m)
10369 {
10370 state->hdr = mtodo(m, state->mac_hlen);
10371 state->ip_m0_len = m->m_len - state->mac_hlen;
10372 state->ip_hlen = state->ip_hlen;
10373 state->tcp = (struct tcphdr *)(state->hdr + state->ip_hlen);
10374 state->pay_len = m->m_pkthdr.len - state->hlen;
10375 }
10376
10377 /*
10378 * Sets properly the TCP and IPv6 headers
10379 */
10380 static inline void
10381 gso_ipv6_tcp_internal(struct gso_ip_tcp_state *state, struct mbuf *m)
10382 {
10383 struct ip6_hdr *ip6;
10384
10385 ip6 = (struct ip6_hdr *)state->hdr;
10386 ip6->ip6_plen = htons(m->m_pkthdr.len - state->mac_hlen - state->ip_hlen);
10387 /*
10388 * TCP Checksum
10389 */
10390 state->tcp->th_sum = 0;
10391 state->tcp->th_sum = in6_pseudo(&ip6->ip6_src, &ip6->ip6_dst,
10392 htonl(state->tcp_hlen + state->pay_len + IPPROTO_TCP));
10393 /*
10394 * Checksum HW not supported (TCP)
10395 */
10396 if (state->sw_csum & CSUM_DELAY_IPV6_DATA) {
10397 (void)in6_finalize_cksum(m, state->mac_hlen, -1, -1, state->sw_csum);
10398 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA;
10399 }
10400 state->tcp_seq += state->pay_len;
10401 }
10402
10403 /*
10404 * Init the state during the TCP segmentation
10405 */
10406 static void
10407 gso_ip_tcp_init_state(struct gso_ip_tcp_state *state, struct ifnet *ifp,
10408 bool is_ipv4, int mac_hlen, int ip_hlen,
10409 uint8_t *__counted_by(ip_m0_len) ip_hdr, u_int ip_m0_len,
10410 struct tcphdr * tcp_hdr)
10411 {
10412 #pragma unused(ifp)
10413
10414 state->hdr = ip_hdr;
10415 state->ip_m0_len = ip_m0_len;
10416 state->ip_hlen = ip_hlen;
10417 state->tcp = tcp_hdr;
10418 if (is_ipv4) {
10419 state->ip_id = ntohs(((struct ip *)state->hdr)->ip_id);
10420 state->update = gso_ipv4_tcp_update;
10421 state->internal = gso_ipv4_tcp_internal;
10422 state->sw_csum = CSUM_DELAY_DATA | CSUM_IP; /* XXX */
10423 } else {
10424 state->update = gso_ipv6_tcp_update;
10425 state->internal = gso_ipv6_tcp_internal;
10426 state->sw_csum = CSUM_DELAY_IPV6_DATA; /* XXX */
10427 }
10428 state->mac_hlen = mac_hlen;
10429 state->tcp_hlen = state->tcp->th_off << 2;
10430 state->hlen = mac_hlen + ip_hlen + state->tcp_hlen;
10431 state->tcp_seq = ntohl(state->tcp->th_seq);
10432 //state->sw_csum = m->m_pkthdr.csum_flags & ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
10433 return;
10434 }
10435
10436 /*
10437 * GSO on TCP/IP (v4 or v6)
10438 *
10439 * Segment the given mbuf and return the list of packets.
10440 *
10441 */
10442 static mblist
10443 gso_ip_tcp(ifnet_t ifp, mbuf_t m0, struct gso_ip_tcp_state *state, bool is_tx)
10444 {
10445 struct mbuf *m;
10446 int orig_mss;
10447 int mss = 0;
10448 #ifdef GSO_STATS
10449 int total_len = m0->m_pkthdr.len;
10450 #endif /* GSO_STATS */
10451 mblist seg;
10452 bool tso_with_gso = false;
10453
10454 orig_mss = mss = _mbuf_get_tso_mss(m0);
10455 if (mss == 0 && !is_tx) {
10456 uint8_t seg_cnt = m0->m_pkthdr.rx_seg_cnt;
10457
10458 if (seg_cnt != 0) {
10459 uint32_t hdr_len;
10460 uint32_t len;
10461
10462 /* approximate the MSS using LRO seg cnt */
10463 hdr_len = state->ip_hlen + state->tcp_hlen;
10464 len = mbuf_pkthdr_len(m0) - hdr_len - ETHER_HDR_LEN;
10465 mss = len / seg_cnt;
10466 m0->m_pkthdr.rx_seg_cnt = 0;
10467 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
10468 "%s: mss %d = len %d / seg cnt %d",
10469 ifp->if_xname, mss, len, seg_cnt);
10470 }
10471 }
10472 if (mss == 0) {
10473 /* hack: we don't have the actual MSS */
10474 u_int reduce_mss;
10475
10476 reduce_mss = is_tx ? if_bridge_tso_reduce_mss_tx
10477 : if_bridge_tso_reduce_mss_forwarding;
10478 mss = ifp->if_mtu - state->ip_hlen - state->tcp_hlen -
10479 reduce_mss;
10480 assert(mss > 0);
10481 } else if (is_tx) {
10482 bool is_ipv4;
10483 bool do_tso = true;
10484
10485 if (TSO_IPV4_OK(ifp, m0)) {
10486 is_ipv4 = true;
10487 } else if (TSO_IPV6_OK(ifp, m0)) {
10488 is_ipv4 = false;
10489 } else {
10490 do_tso = false;
10491 }
10492 if (do_tso) { /* TSO with GSO */
10493 uint32_t if_tso_max;
10494
10495 if_tso_max = get_if_tso_mtu(ifp, is_ipv4);
10496 mss = if_tso_max - state->ip_hlen - state->tcp_hlen
10497 - ETHER_HDR_LEN;
10498 tso_with_gso = true;
10499 }
10500 }
10501 if (!tso_with_gso) {
10502 /* clear TSO flags */
10503 m0->m_pkthdr.csum_flags &= ~_TSO_CSUM;
10504 }
10505 seg = m_seg(m0, state->hlen, mss, 0, 0);
10506 if (seg.head == NULL || seg.head->m_nextpkt == NULL) {
10507 return seg;
10508 }
10509 if (tso_with_gso) {
10510 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
10511 "%s TX gso size %d mss %d nsegs %d",
10512 ifp->if_xname,
10513 mss, orig_mss, seg.count);
10514 } else {
10515 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
10516 "%s %s mss %d nsegs %d",
10517 ifp->if_xname,
10518 is_tx ? "TX" : "RX",
10519 mss, seg.count);
10520 }
10521 #ifdef GSO_STATS
10522 GSOSTAT_SET_MAX(tcp.gsos_max_mss, mss);
10523 GSOSTAT_SET_MIN(tcp.gsos_min_mss, mss);
10524 GSOSTAT_ADD(tcp.gsos_osegments, seg.count);
10525 #endif /* GSO_STATS */
10526
10527 /* first pkt */
10528 VERIFY(seg.head == m0);
10529 m = m0;
10530
10531 state->update(state, m);
10532
10533 do {
10534 state->tcp->th_flags &= ~(TH_FIN | TH_PUSH);
10535
10536 state->internal(state, m);
10537 m = m->m_nextpkt;
10538 state->update(state, m);
10539 state->tcp->th_flags &= ~TH_CWR;
10540 state->tcp->th_seq = htonl(state->tcp_seq);
10541 } while (m->m_nextpkt);
10542
10543 /* last pkt */
10544 state->internal(state, m);
10545
10546 #ifdef GSO_STATS
10547 if (!error) {
10548 GSOSTAT_INC(tcp.gsos_segmented);
10549 GSOSTAT_SET_MAX(tcp.gsos_maxsegmented, total_len);
10550 GSOSTAT_SET_MIN(tcp.gsos_minsegmented, total_len);
10551 GSOSTAT_ADD(tcp.gsos_totalbyteseg, total_len);
10552 }
10553 #endif /* GSO_STATS */
10554 return seg;
10555 }
10556
10557 /*
10558 * GSO for TCP/IPv[46]
10559 */
10560 static mblist
10561 gso_tcp_with_info(ifnet_t ifp, mbuf_t m, ip_packet_info_t info_p,
10562 u_int mac_hlen, bool is_ipv4, bool is_tx)
10563 {
10564 uint32_t csum_flags;
10565 struct gso_ip_tcp_state state;
10566 struct tcphdr *tcp;
10567
10568 assert(info_p->ip_proto_hdr != NULL);
10569 tcp = (struct tcphdr *)(void *)info_p->ip_proto_hdr;
10570 gso_ip_tcp_init_state(&state, ifp, is_ipv4, mac_hlen,
10571 info_p->ip_hlen + info_p->ip_opt_len,
10572 info_p->ip_hdr, info_p->ip_m0_len, tcp);
10573 csum_flags = is_ipv4 ? CSUM_DELAY_DATA : CSUM_DELAY_IPV6_DATA; /* XXX */
10574 m->m_pkthdr.csum_flags |= csum_flags;
10575 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
10576 return gso_ip_tcp(ifp, m, &state, is_tx);
10577 }
10578
10579 static mblist
10580 gso_tcp(ifnet_t ifp, mbuf_t m, u_int mac_hlen, bool is_ipv4, bool is_tx)
10581 {
10582 int error;
10583 ip_packet_info info;
10584 struct bripstats stats; /* XXX ignored */
10585 mblist ret;
10586
10587 error = bridge_get_tcp_header(&m, mac_hlen, is_ipv4, &info, &stats);
10588 if (error != 0) {
10589 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
10590 "%s bridge_get_tcp_header failed %d (%s)",
10591 ifp->if_xname, error,
10592 is_tx ? "TX" : "RX");
10593 if (m != NULL) {
10594 m_drop(m, DROPTAP_FLAG_DIR_IN,
10595 DROP_REASON_BRIDGE_CHECKSUM, NULL, 0);
10596 m = NULL;
10597 }
10598 goto no_segment;
10599 }
10600 if (info.ip_proto_hdr == NULL) {
10601 /* not actually a TCP packet, no segmentation */
10602 goto no_segment;
10603 }
10604 if (!is_tx && ip_packet_info_dst_is_our_ip(&info, ifp->if_index)) {
10605 goto no_segment;
10606 }
10607 return gso_tcp_with_info(ifp, m, &info, mac_hlen, is_ipv4, is_tx);
10608
10609 no_segment:
10610 mblist_init(&ret);
10611 if (m != NULL) {
10612 mblist_append(&ret, m);
10613 }
10614 return ret;
10615 }
10616