1 /*
2 * Copyright (c) 2004-2025 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /* $NetBSD: if_bridge.c,v 1.31 2005/06/01 19:45:34 jdc Exp $ */
30 /*
31 * Copyright 2001 Wasabi Systems, Inc.
32 * All rights reserved.
33 *
34 * Written by Jason R. Thorpe for Wasabi Systems, Inc.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed for the NetBSD Project by
47 * Wasabi Systems, Inc.
48 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
49 * or promote products derived from this software without specific prior
50 * written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
54 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
55 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
56 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
57 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
58 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
59 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
60 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
61 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
62 * POSSIBILITY OF SUCH DAMAGE.
63 */
64
65 /*
66 * Copyright (c) 1999, 2000 Jason L. Wright ([email protected])
67 * All rights reserved.
68 *
69 * Redistribution and use in source and binary forms, with or without
70 * modification, are permitted provided that the following conditions
71 * are met:
72 * 1. Redistributions of source code must retain the above copyright
73 * notice, this list of conditions and the following disclaimer.
74 * 2. Redistributions in binary form must reproduce the above copyright
75 * notice, this list of conditions and the following disclaimer in the
76 * documentation and/or other materials provided with the distribution.
77 *
78 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
79 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
80 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
81 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
82 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
83 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
84 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
86 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
87 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
88 * POSSIBILITY OF SUCH DAMAGE.
89 *
90 * OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp
91 */
92
93 /*
94 * Network interface bridge support.
95 *
96 * TODO:
97 *
98 * - Currently only supports Ethernet-like interfaces (Ethernet,
99 * 802.11, VLANs on Ethernet, etc.) Figure out a nice way
100 * to bridge other types of interfaces (FDDI-FDDI, and maybe
101 * consider heterogenous bridges).
102 *
103 * - GIF isn't handled due to the lack of IPPROTO_ETHERIP support.
104 */
105
106 #include <sys/cdefs.h>
107
108 #include <sys/param.h>
109 #include <sys/mbuf.h>
110 #include <sys/malloc.h>
111 #include <sys/protosw.h>
112 #include <sys/systm.h>
113 #include <sys/time.h>
114 #include <sys/socket.h> /* for net/if.h */
115 #include <sys/sockio.h>
116 #include <sys/kernel.h>
117 #include <sys/random.h>
118 #include <sys/syslog.h>
119 #include <sys/sysctl.h>
120 #include <sys/proc.h>
121 #include <sys/lock.h>
122 #include <sys/mcache.h>
123
124 #include <sys/kauth.h>
125
126 #include <kern/thread_call.h>
127
128 #include <libkern/libkern.h>
129
130 #include <kern/zalloc.h>
131
132 #if NBPFILTER > 0
133 #include <net/bpf.h>
134 #endif
135 #include <net/if.h>
136 #include <net/if_dl.h>
137 #include <net/if_types.h>
138 #include <net/if_var.h>
139 #include <net/if_media.h>
140 #include <net/net_api_stats.h>
141
142 #include <netinet/in.h> /* for struct arpcom */
143 #include <netinet/tcp.h> /* for struct tcphdr */
144 #include <netinet/in_systm.h>
145 #include <netinet/in_var.h>
146 #define _IP_VHL
147 #include <netinet/ip.h>
148 #include <netinet/ip_var.h>
149 #include <netinet/ip6.h>
150 #include <netinet6/ip6_var.h>
151 #include <netinet/if_ether.h> /* for struct arpcom */
152 #include <net/bridgestp.h>
153 #include <net/if_bridgevar.h>
154 #include <net/if_llc.h>
155 #if NVLAN > 0
156 #include <net/if_vlan_var.h>
157 #endif /* NVLAN > 0 */
158
159 #include <net/if_ether.h>
160 #include <net/dlil.h>
161 #include <net/kpi_interfacefilter.h>
162 #include <net/pfvar.h>
163
164 #include <net/route.h>
165 #include <dev/random/randomdev.h>
166
167 #include <netinet/bootp.h>
168 #include <netinet/dhcp.h>
169
170 #if SKYWALK
171 #include <skywalk/nexus/netif/nx_netif.h>
172 #endif /* SKYWALK */
173
174 #include <net/sockaddr_utils.h>
175 #include <net/mblist.h>
176
177 #include <os/log.h>
178
179 static struct in_addr inaddr_any = { .s_addr = INADDR_ANY };
180
181
182 #define __M_FLAGS_ARE_SET(m, flags) (((m)->m_flags & (flags)) != 0)
183 #define IS_BCAST(m) __M_FLAGS_ARE_SET(m, M_BCAST)
184 #define IS_MCAST(m) __M_FLAGS_ARE_SET(m, M_MCAST)
185 #define IS_BCAST_MCAST(m) __M_FLAGS_ARE_SET(m, M_BCAST | M_MCAST)
186
187 #define HTONS_ETHERTYPE_ARP htons(ETHERTYPE_ARP)
188 #define HTONS_ETHERTYPE_IP htons(ETHERTYPE_IP)
189 #define HTONS_ETHERTYPE_IPV6 htons(ETHERTYPE_IPV6)
190 #define HTONS_ARPHRD_ETHER htons(ARPHRD_ETHER)
191 #define HTONS_ARPOP_REQUEST htons(ARPOP_REQUEST)
192 #define HTONS_ARPOP_REPLY htons(ARPOP_REPLY)
193 #define HTONS_IPPORT_BOOTPC htons(IPPORT_BOOTPC)
194 #define HTONS_IPPORT_BOOTPS htons(IPPORT_BOOTPS)
195 #define HTONS_DHCP_FLAGS_BROADCAST htons(DHCP_FLAGS_BROADCAST)
196
197 /*
198 * if_bridge_debug, BR_DBGF_*
199 * - 'if_bridge_debug' is a bitmask of BR_DBGF_* flags that can be set
200 * to enable additional logs for the corresponding bridge function
201 * - "sysctl net.link.bridge.debug" controls the value of
202 * 'if_bridge_debug'
203 */
204 static uint32_t if_bridge_debug = 0;
205 #define BR_DBGF_LIFECYCLE 0x0001
206 #define BR_DBGF_INPUT 0x0002
207 #define BR_DBGF_OUTPUT 0x0004
208 #define BR_DBGF_RT_TABLE 0x0008
209 #define BR_DBGF_DELAYED_CALL 0x0010
210 #define BR_DBGF_IOCTL 0x0020
211 #define BR_DBGF_MBUF 0x0040
212 #define BR_DBGF_MCAST 0x0080
213 #define BR_DBGF_HOSTFILTER 0x0100
214 #define BR_DBGF_CHECKSUM 0x0200
215 #define BR_DBGF_MAC_NAT 0x0400
216 #define BR_DBGF_INPUT_LIST 0x0800
217
218 /*
219 * if_bridge_log_level
220 * - 'if_bridge_log_level' ensures that by default important logs are
221 * logged regardless of if_bridge_debug by comparing the log level
222 * in BRIDGE_LOG to if_bridge_log_level
223 * - use "sysctl net.link.bridge.log_level" controls the value of
224 * 'if_bridge_log_level'
225 * - the default value of 'if_bridge_log_level' is LOG_NOTICE; important
226 * logs must use LOG_NOTICE to ensure they appear by default
227 */
228 static int if_bridge_log_level = LOG_NOTICE;
229
230 #define BRIDGE_DBGF_ENABLED(__flag) ((if_bridge_debug & __flag) != 0)
231
232 /*
233 * BRIDGE_LOG, BRIDGE_LOG_SIMPLE
234 * - macros to generate the specified log conditionally based on
235 * the specified log level and debug flags
236 * - BRIDGE_LOG_SIMPLE does not include the function name in the log
237 */
238 #define BRIDGE_LOG(__level, __dbgf, __string, ...) \
239 do { \
240 if (__level <= if_bridge_log_level || \
241 BRIDGE_DBGF_ENABLED(__dbgf)) { \
242 os_log(OS_LOG_DEFAULT, "%s: " __string, \
243 __func__, ## __VA_ARGS__); \
244 } \
245 } while (0)
246 #define BRIDGE_LOG_SIMPLE(__level, __dbgf, __string, ...) \
247 do { \
248 if (__level <= if_bridge_log_level || \
249 BRIDGE_DBGF_ENABLED(__dbgf)) { \
250 os_log(OS_LOG_DEFAULT, __string, ## __VA_ARGS__); \
251 } \
252 } while (0)
253
254 #define _BRIDGE_LOCK(_sc) lck_mtx_lock(&(_sc)->sc_mtx)
255 #define _BRIDGE_UNLOCK(_sc) lck_mtx_unlock(&(_sc)->sc_mtx)
256 #define BRIDGE_LOCK_ASSERT_HELD(_sc) \
257 LCK_MTX_ASSERT(&(_sc)->sc_mtx, LCK_MTX_ASSERT_OWNED)
258 #define BRIDGE_LOCK_ASSERT_NOTHELD(_sc) \
259 LCK_MTX_ASSERT(&(_sc)->sc_mtx, LCK_MTX_ASSERT_NOTOWNED)
260
261 #define BRIDGE_LOCK_DEBUG 1
262 #if BRIDGE_LOCK_DEBUG
263
264 #define BR_LCKDBG_MAX 4
265
266 #define BRIDGE_LOCK(_sc) bridge_lock(_sc)
267 #define BRIDGE_UNLOCK(_sc) bridge_unlock(_sc)
268 #define BRIDGE_LOCK2REF(_sc, _err) _err = bridge_lock2ref(_sc)
269 #define BRIDGE_UNREF(_sc) bridge_unref(_sc)
270 #define BRIDGE_XLOCK(_sc) bridge_xlock(_sc)
271 #define BRIDGE_XDROP(_sc) bridge_xdrop(_sc)
272
273 #else /* !BRIDGE_LOCK_DEBUG */
274
275 #define BRIDGE_LOCK(_sc) _BRIDGE_LOCK(_sc)
276 #define BRIDGE_UNLOCK(_sc) _BRIDGE_UNLOCK(_sc)
277 #define BRIDGE_LOCK2REF(_sc, _err) do { \
278 BRIDGE_LOCK_ASSERT_HELD(_sc); \
279 if ((_sc)->sc_iflist_xcnt > 0) \
280 (_err) = EBUSY; \
281 else { \
282 (_sc)->sc_iflist_ref++; \
283 (_err) = 0; \
284 } \
285 _BRIDGE_UNLOCK(_sc); \
286 } while (0)
287 #define BRIDGE_UNREF(_sc) do { \
288 _BRIDGE_LOCK(_sc); \
289 (_sc)->sc_iflist_ref--; \
290 if (((_sc)->sc_iflist_xcnt > 0) && ((_sc)->sc_iflist_ref == 0)) { \
291 _BRIDGE_UNLOCK(_sc); \
292 wakeup(&(_sc)->sc_cv); \
293 } else \
294 _BRIDGE_UNLOCK(_sc); \
295 } while (0)
296 #define BRIDGE_XLOCK(_sc) do { \
297 BRIDGE_LOCK_ASSERT_HELD(_sc); \
298 (_sc)->sc_iflist_xcnt++; \
299 while ((_sc)->sc_iflist_ref > 0) \
300 msleep(&(_sc)->sc_cv, &(_sc)->sc_mtx, PZERO, \
301 "BRIDGE_XLOCK", NULL); \
302 } while (0)
303 #define BRIDGE_XDROP(_sc) do { \
304 BRIDGE_LOCK_ASSERT_HELD(_sc); \
305 (_sc)->sc_iflist_xcnt--; \
306 } while (0)
307
308 #endif /* BRIDGE_LOCK_DEBUG */
309
310 #define BRIDGE_BPF_TAP_IN(ifp, m) \
311 do { \
312 if (ifp->if_bpf != NULL) { \
313 bpf_tap_in(ifp, DLT_EN10MB, m, NULL, 0); \
314 } \
315 } while(0)
316
317 #define BRIDGE_BPF_TAP_OUT(ifp, m) \
318 do { \
319 if (ifp->if_bpf != NULL) { \
320 bpf_tap_out(ifp, DLT_EN10MB, m, NULL, 0); \
321 } \
322 } while(0)
323
324
325 /*
326 * Initial size of the route hash table. Must be a power of two.
327 */
328 #ifndef BRIDGE_RTHASH_SIZE
329 #define BRIDGE_RTHASH_SIZE 16
330 #endif
331
332 /*
333 * Maximum size of the routing hash table
334 */
335 #define BRIDGE_RTHASH_SIZE_MAX 2048
336
337 #define BRIDGE_RTHASH_MASK(sc) ((sc)->sc_rthash_size - 1)
338
339 /*
340 * Maximum number of addresses to cache.
341 */
342 #ifndef BRIDGE_RTABLE_MAX
343 #define BRIDGE_RTABLE_MAX 100
344 #endif
345
346 /*
347 * Timeout (in seconds) for entries learned dynamically.
348 */
349 #ifndef BRIDGE_RTABLE_TIMEOUT
350 #define BRIDGE_RTABLE_TIMEOUT (20 * 60) /* same as ARP */
351 #endif
352
353 /*
354 * Number of seconds between walks of the route list.
355 */
356 #ifndef BRIDGE_RTABLE_PRUNE_PERIOD
357 #define BRIDGE_RTABLE_PRUNE_PERIOD (5 * 60)
358 #endif
359
360 /*
361 * Number of MAC NAT entries
362 * - sized based on 16 clients (including MAC NAT interface)
363 * each with 4 addresses
364 */
365 #ifndef BRIDGE_MAC_NAT_ENTRY_MAX
366 #define BRIDGE_MAC_NAT_ENTRY_MAX 64
367 #endif /* BRIDGE_MAC_NAT_ENTRY_MAX */
368
369 /*
370 * List of capabilities to possibly mask on the member interface.
371 */
372 #define BRIDGE_IFCAPS_MASK (IFCAP_TSO | IFCAP_TXCSUM)
373 /*
374 * List of capabilities to disable on the member interface.
375 */
376 #define BRIDGE_IFCAPS_STRIP IFCAP_LRO
377
378 /*
379 * Bridge interface list entry.
380 */
381 struct bridge_iflist {
382 TAILQ_ENTRY(bridge_iflist) bif_next;
383 struct ifnet *bif_ifp; /* member if */
384 struct bstp_port bif_stp; /* STP state */
385 uint32_t bif_ifflags; /* member if flags */
386 int bif_savedcaps; /* saved capabilities */
387 uint32_t bif_addrmax; /* max # of addresses */
388 uint32_t bif_addrcnt; /* cur. # of addresses */
389 uint32_t bif_addrexceeded; /* # of address violations */
390
391 interface_filter_t bif_iff_ref;
392 struct bridge_softc *bif_sc;
393 uint32_t bif_flags;
394
395 /* host filter */
396 struct in_addr bif_hf_ipsrc;
397 uint8_t bif_hf_hwsrc[ETHER_ADDR_LEN];
398
399 struct ifbrmstats bif_stats;
400 };
401
402 static inline bool
bif_ifflags_are_set(struct bridge_iflist * bif,uint32_t flags)403 bif_ifflags_are_set(struct bridge_iflist * bif, uint32_t flags)
404 {
405 return (bif->bif_ifflags & flags) != 0;
406 }
407
408 static inline bool
bif_has_checksum_offload(struct bridge_iflist * bif)409 bif_has_checksum_offload(struct bridge_iflist * bif)
410 {
411 return bif_ifflags_are_set(bif, IFBIF_CHECKSUM_OFFLOAD);
412 }
413
414 static inline bool
bif_has_mac_nat(struct bridge_iflist * bif)415 bif_has_mac_nat(struct bridge_iflist * bif)
416 {
417 return bif_ifflags_are_set(bif, IFBIF_MAC_NAT);
418 }
419
420 static inline bool
bif_uses_virtio(struct bridge_iflist * bif)421 bif_uses_virtio(struct bridge_iflist * bif)
422 {
423 return bif_ifflags_are_set(bif, IFBIF_USES_VIRTIO);
424 }
425
426 /* fake errors to make the code clearer */
427 #define _EBADIP EJUSTRETURN
428 #define _EBADIPCHECKSUM EJUSTRETURN
429 #define _EBADIPV6 EJUSTRETURN
430 #define _EBADUDP EJUSTRETURN
431 #define _EBADTCP EJUSTRETURN
432 #define _EBADUDPCHECKSUM EJUSTRETURN
433 #define _EBADTCPCHECKSUM EJUSTRETURN
434
435 #define BIFF_PROMISC 0x01 /* promiscuous mode set */
436 #define BIFF_PROTO_ATTACHED 0x02 /* protocol attached */
437 #define BIFF_FILTER_ATTACHED 0x04 /* interface filter attached */
438 #define BIFF_MEDIA_ACTIVE 0x08 /* interface media active */
439 #define BIFF_HOST_FILTER 0x10 /* host filter enabled */
440 #define BIFF_HF_HWSRC 0x20 /* host filter source MAC is set */
441 #define BIFF_HF_IPSRC 0x40 /* host filter source IP is set */
442 #define BIFF_INPUT_BROADCAST 0x80 /* send broadcast packets in */
443 #define BIFF_IN_MEMBER_LIST 0x100 /* added to the member list */
444 #define BIFF_WIFI_INFRA 0x200 /* interface is Wi-Fi infra */
445 #define BIFF_ALL_MULTI 0x400 /* allmulti set */
446 #define BIFF_LRO_DISABLED 0x800 /* LRO was disabled */
447 #if SKYWALK
448 #define BIFF_FLOWSWITCH_ATTACHED 0x1000 /* we attached the flowswitch */
449 #define BIFF_NETAGENT_REMOVED 0x2000 /* we removed the netagent */
450 #endif /* SKYWALK */
451
452 /*
453 * mac_nat_entry
454 * - translates between an IP address and MAC address on a specific
455 * bridge interface member
456 */
457 struct mac_nat_entry {
458 LIST_ENTRY(mac_nat_entry) mne_list; /* list linkage */
459 struct bridge_iflist *mne_bif; /* originating interface */
460 unsigned long mne_expire; /* expiration time */
461 union {
462 struct in_addr mneu_ip; /* originating IPv4 address */
463 struct in6_addr mneu_ip6; /* originating IPv6 address */
464 } mne_u;
465 uint8_t mne_mac[ETHER_ADDR_LEN];
466 uint8_t mne_flags;
467 uint8_t mne_reserved;
468 };
469 #define mne_ip mne_u.mneu_ip
470 #define mne_ip6 mne_u.mneu_ip6
471
472 #define MNE_FLAGS_IPV6 0x01 /* IPv6 address */
473
474 LIST_HEAD(mac_nat_entry_list, mac_nat_entry);
475
476 /*
477 * mac_nat_record
478 * - used by bridge_mac_nat_output() to convey the translation that needs
479 * to take place in bridge_mac_nat_translate
480 * - holds enough information so that the translation can be done later
481 * when the destination interface is the MAC-NAT interface
482 */
483 struct mac_nat_record {
484 uint16_t mnr_ether_type;
485 union {
486 uint16_t mnru_arp_offset;
487 struct {
488 uint16_t mnruip_dhcp_flags;
489 uint16_t mnruip_udp_csum;
490 uint8_t mnruip_header_len;
491 } mnru_ip;
492 struct {
493 uint16_t mnruip6_icmp6_len;
494 uint16_t mnruip6_lladdr_offset;
495 uint8_t mnruip6_icmp6_type;
496 uint8_t mnruip6_header_len;
497 } mnru_ip6;
498 } mnr_u;
499 };
500
501 #define mnr_arp_offset mnr_u.mnru_arp_offset
502
503 #define mnr_ip_header_len mnr_u.mnru_ip.mnruip_header_len
504 #define mnr_ip_dhcp_flags mnr_u.mnru_ip.mnruip_dhcp_flags
505 #define mnr_ip_udp_csum mnr_u.mnru_ip.mnruip_udp_csum
506
507 #define mnr_ip6_icmp6_len mnr_u.mnru_ip6.mnruip6_icmp6_len
508 #define mnr_ip6_icmp6_type mnr_u.mnru_ip6.mnruip6_icmp6_type
509 #define mnr_ip6_header_len mnr_u.mnru_ip6.mnruip6_header_len
510 #define mnr_ip6_lladdr_offset mnr_u.mnru_ip6.mnruip6_lladdr_offset
511
512 /*
513 * Bridge route node.
514 */
515 struct bridge_rtnode {
516 LIST_ENTRY(bridge_rtnode) brt_hash; /* hash table linkage */
517 LIST_ENTRY(bridge_rtnode) brt_list; /* list linkage */
518 struct bridge_iflist *brt_dst; /* destination if */
519 unsigned long brt_expire; /* expiration time */
520 uint8_t brt_flags; /* address flags */
521 uint8_t brt_addr[ETHER_ADDR_LEN];
522 uint16_t brt_vlan; /* vlan id */
523 };
524
525 #define brt_ifp brt_dst->bif_ifp
526
527 /*
528 * Bridge delayed function call context
529 */
530 typedef void (*bridge_delayed_func_t)(struct bridge_softc *);
531
532 struct bridge_delayed_call {
533 struct bridge_softc *bdc_sc;
534 bridge_delayed_func_t bdc_func; /* Function to call */
535 struct timespec bdc_ts; /* Time to call */
536 u_int32_t bdc_flags;
537 thread_call_t bdc_thread_call;
538 };
539
540 #define BDCF_OUTSTANDING 0x01 /* Delayed call has been scheduled */
541 #define BDCF_CANCELLING 0x02 /* May be waiting for call completion */
542
543 /*
544 * Software state for each bridge.
545 */
546 LIST_HEAD(_bridge_rtnode_list, bridge_rtnode);
547
548 struct bridge_softc {
549 struct ifnet *sc_ifp; /* make this an interface */
550 uint32_t sc_flags;
551 LIST_ENTRY(bridge_softc) sc_list;
552 decl_lck_mtx_data(, sc_mtx);
553 struct _bridge_rtnode_list * __counted_by(sc_rthash_size) sc_rthash; /* our forwarding table */
554 struct _bridge_rtnode_list sc_rtlist; /* list version of above */
555 uint32_t sc_rthash_key; /* key for hash */
556 uint32_t sc_rthash_size; /* size of the hash table */
557 struct bridge_delayed_call sc_aging_timer;
558 struct bridge_delayed_call sc_resize_call;
559 TAILQ_HEAD(, bridge_iflist) sc_spanlist; /* span ports list */
560 struct bstp_state sc_stp; /* STP state */
561 void *sc_cv;
562 uint32_t sc_brtmax; /* max # of addresses */
563 uint32_t sc_brtcnt; /* cur. # of addresses */
564 uint32_t sc_brttimeout; /* rt timeout in seconds */
565 uint32_t sc_iflist_ref; /* refcount for sc_iflist */
566 uint32_t sc_iflist_xcnt; /* refcount for sc_iflist */
567 TAILQ_HEAD(, bridge_iflist) sc_iflist; /* member interface list */
568 uint32_t sc_brtexceeded; /* # of cache drops */
569 uint32_t sc_filter_flags; /* ipf and flags */
570 struct ifnet *sc_ifaddr; /* member mac copied from */
571 u_char sc_defaddr[6]; /* Default MAC address */
572 char sc_if_xname[IFNAMSIZ];
573
574 struct bridge_iflist *sc_mac_nat_bif; /* single MAC NAT interface */
575 struct mac_nat_entry_list sc_mne_list; /* MAC NAT IPv4 */
576 struct mac_nat_entry_list sc_mne_list_v6;/* MAC NAT IPv6 */
577 uint32_t sc_mne_max; /* max # of entries */
578 uint32_t sc_mne_count; /* cur. # of entries */
579 uint32_t sc_mne_allocation_failures;
580 #if BRIDGE_LOCK_DEBUG
581 /*
582 * Locking and unlocking calling history
583 */
584 void *lock_lr[BR_LCKDBG_MAX];
585 int next_lock_lr;
586 void *unlock_lr[BR_LCKDBG_MAX];
587 int next_unlock_lr;
588 #endif /* BRIDGE_LOCK_DEBUG */
589 };
590
591 #define SCF_DETACHING 0x01
592 #define SCF_RESIZING 0x02
593 #define SCF_MEDIA_ACTIVE 0x04
594 #define SCF_ADDRESS_ASSIGNED 0x08
595
596 typedef enum {
597 CHECKSUM_OPERATION_NONE = 0,
598 CHECKSUM_OPERATION_CLEAR_OFFLOAD = 1,
599 CHECKSUM_OPERATION_FINALIZE = 2,
600 CHECKSUM_OPERATION_COMPUTE = 3,
601 } ChecksumOperation;
602
603 typedef struct {
604 u_int ip_hlen; /* IP header length */
605 u_int ip_pay_len; /* length of payload (exclusive of ip_hlen) */
606 u_int ip_m0_len; /* bytes available at ip_hdr (without jumping mbufs) */
607 u_int ip_opt_len; /* IPv6 options headers length */
608 uint8_t ip_proto; /* IPPROTO_TCP, IPPROTO_UDP, etc. */
609 bool ip_is_ipv4;
610 bool ip_is_fragmented;
611 uint8_t *__sized_by(ip_m0_len) ip_hdr; /* pointer to IP header */
612 uint8_t *__indexable ip_proto_hdr; /* ptr to protocol header (TCP) */
613 } ip_packet_info, *ip_packet_info_t;
614
615 struct bridge_hostfilter_stats bridge_hostfilter_stats;
616
617 typedef uint8_t ether_type_flag_t;
618
619 static LCK_GRP_DECLARE(bridge_lock_grp, "if_bridge");
620 #if BRIDGE_LOCK_DEBUG
621 static LCK_ATTR_DECLARE(bridge_lock_attr, 0, 0);
622 #else
623 static LCK_ATTR_DECLARE(bridge_lock_attr, LCK_ATTR_DEBUG, 0);
624 #endif
625 static LCK_MTX_DECLARE_ATTR(bridge_list_mtx, &bridge_lock_grp, &bridge_lock_attr);
626
627 static int bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
628
629 static KALLOC_TYPE_DEFINE(bridge_rtnode_pool, struct bridge_rtnode, NET_KT_DEFAULT);
630 static KALLOC_TYPE_DEFINE(bridge_mne_pool, struct mac_nat_entry, NET_KT_DEFAULT);
631
632 static int bridge_clone_create(struct if_clone *, uint32_t, void *);
633 static int bridge_clone_destroy(struct ifnet *);
634
635 static errno_t bridge_ioctl(struct ifnet *, u_long cmd, void *__sized_by(IOCPARM_LEN(cmd)));
636 #if HAS_IF_CAP
637 static void bridge_mutecaps(struct bridge_softc *);
638 static void bridge_set_ifcap(struct bridge_softc *, struct bridge_iflist *,
639 int);
640 #endif
641 static errno_t bridge_set_tso(struct bridge_softc *);
642 static void bridge_proto_attach_changed(struct ifnet *);
643 static int bridge_init(struct ifnet *);
644 static void bridge_ifstop(struct ifnet *, int);
645 static int bridge_output(struct ifnet *, struct mbuf *);
646 static void bridge_finalize_cksum(struct ifnet *, struct mbuf *);
647 static void bridge_start(struct ifnet *);
648 static mblist bridge_input_list(struct bridge_softc *, ifnet_t,
649 struct ether_header *, mblist, bool);
650 static errno_t bridge_iff_input(void *, ifnet_t, protocol_family_t,
651 mbuf_t *, char **);
652 static errno_t bridge_iff_output(void *, ifnet_t, protocol_family_t,
653 mbuf_t *);
654 static errno_t bridge_member_output(struct bridge_softc *sc, ifnet_t ifp,
655 mbuf_t *m);
656 static int bridge_enqueue(ifnet_t, ifnet_t, ifnet_t,
657 ether_type_flag_t, mbuf_t, ChecksumOperation);
658 static mbuf_t bridge_checksum_offload_list(ifnet_t, struct bridge_iflist *,
659 mbuf_t, bool);
660 static mbuf_t bridge_filter_checksum(ifnet_t, struct bridge_iflist * bif,
661 mbuf_t m, bool, bool, bool);
662 static void bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int);
663
664 static void bridge_aging_timer(struct bridge_softc *sc);
665
666 static void bridge_broadcast(struct bridge_softc *, struct bridge_iflist *,
667 ether_type_flag_t, mbuf_t);
668 static void bridge_span(struct bridge_softc *, ether_type_flag_t, struct mbuf *);
669
670 static int bridge_rtupdate(struct bridge_softc *, const uint8_t[ETHER_ADDR_LEN],
671 uint16_t, struct bridge_iflist *, int, uint8_t);
672 static struct bridge_iflist * bridge_rtlookup_bif(struct bridge_softc *,
673 const uint8_t[ETHER_ADDR_LEN], uint16_t);
674 static void bridge_rttrim(struct bridge_softc *);
675 static void bridge_rtage(struct bridge_softc *);
676 static void bridge_rtflush(struct bridge_softc *, int);
677 static int bridge_rtdaddr(struct bridge_softc *, const uint8_t[ETHER_ADDR_LEN],
678 uint16_t);
679
680 static int bridge_rtable_init(struct bridge_softc *);
681 static void bridge_rtable_fini(struct bridge_softc *);
682
683 static void bridge_rthash_resize(struct bridge_softc *);
684
685 static int bridge_rtnode_addr_cmp(const uint8_t[ETHER_ADDR_LEN], const uint8_t[ETHER_ADDR_LEN]);
686 static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
687 const uint8_t[ETHER_ADDR_LEN], uint16_t);
688 static int bridge_rtnode_hash(struct bridge_softc *,
689 struct bridge_rtnode *);
690 static int bridge_rtnode_insert(struct bridge_softc *,
691 struct bridge_rtnode *);
692 static void bridge_rtnode_destroy(struct bridge_softc *,
693 struct bridge_rtnode *);
694 #if BRIDGESTP
695 static void bridge_rtable_expire(struct ifnet *, int);
696 static void bridge_state_change(struct ifnet *, int);
697 #endif /* BRIDGESTP */
698
699 static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
700 char * __sized_by(IFNAMSIZ) name);
701 static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
702 struct ifnet *ifp);
703 static void bridge_delete_member(struct bridge_softc *,
704 struct bridge_iflist *);
705 static void bridge_delete_span(struct bridge_softc *,
706 struct bridge_iflist *);
707
708 static int bridge_ioctl_add(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
709 static int bridge_ioctl_del(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
710 static int bridge_ioctl_gifflags(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
711 static int bridge_ioctl_sifflags(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
712 static int bridge_ioctl_scache(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
713 static int bridge_ioctl_gcache(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
714 static int bridge_ioctl_gifs32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
715 static int bridge_ioctl_gifs64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
716 static int bridge_ioctl_rts32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
717 static int bridge_ioctl_rts64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
718 static int bridge_ioctl_saddr32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
719 static int bridge_ioctl_saddr64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
720 static int bridge_ioctl_sto(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
721 static int bridge_ioctl_gto(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
722 static int bridge_ioctl_daddr32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
723 static int bridge_ioctl_daddr64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
724 static int bridge_ioctl_flush(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
725 static int bridge_ioctl_gpri(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
726 static int bridge_ioctl_spri(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
727 static int bridge_ioctl_ght(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
728 static int bridge_ioctl_sht(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
729 static int bridge_ioctl_gfd(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
730 static int bridge_ioctl_sfd(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
731 static int bridge_ioctl_gma(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
732 static int bridge_ioctl_sma(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
733 static int bridge_ioctl_sifprio(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
734 static int bridge_ioctl_sifcost(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
735 static int bridge_ioctl_sifmaxaddr(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
736 static int bridge_ioctl_addspan(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
737 static int bridge_ioctl_delspan(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
738 static int bridge_ioctl_gbparam32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
739 static int bridge_ioctl_gbparam64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
740 static int bridge_ioctl_grte(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
741 static int bridge_ioctl_gifsstp32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
742 static int bridge_ioctl_gifsstp64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
743 static int bridge_ioctl_sproto(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
744 static int bridge_ioctl_stxhc(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
745 static int bridge_ioctl_purge(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len);
746 static int bridge_ioctl_gfilt(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
747 static int bridge_ioctl_sfilt(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
748 static int bridge_ioctl_ghostfilter(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
749 static int bridge_ioctl_shostfilter(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
750 static int bridge_ioctl_gmnelist32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
751 static int bridge_ioctl_gmnelist64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
752 static int bridge_ioctl_gifstats32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
753 static int bridge_ioctl_gifstats64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
754
755 static int bridge_pf(struct mbuf **, struct ifnet *,
756 uint32_t sc_filter_flags, bool input);
757 static int bridge_ip_checkbasic(struct mbuf **);
758 static int bridge_ip6_checkbasic(struct mbuf **);
759
760 static void bridge_detach(ifnet_t);
761 static void bridge_link_event(struct ifnet *, u_int32_t);
762 static void bridge_iflinkevent(struct ifnet *);
763 static u_int32_t bridge_updatelinkstatus(struct bridge_softc *);
764 static int interface_media_active(struct ifnet *);
765 static void bridge_schedule_delayed_call(struct bridge_delayed_call *);
766 static void bridge_cancel_delayed_call(struct bridge_delayed_call *);
767 static void bridge_cleanup_delayed_call(struct bridge_delayed_call *);
768
769 static errno_t bridge_mac_nat_enable(struct bridge_softc *,
770 struct bridge_iflist *);
771 static void bridge_mac_nat_disable(struct bridge_softc *sc);
772 static void bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long);
773 static void bridge_mac_nat_populate_entries(struct bridge_softc *sc);
774 static void bridge_mac_nat_flush_entries(struct bridge_softc *sc,
775 struct bridge_iflist *);
776 static mbuf_t bridge_mac_nat_input(struct bridge_softc *, ifnet_t, mbuf_t,
777 ifnet_t * dst_if);
778 static boolean_t bridge_mac_nat_output(struct bridge_softc *,
779 struct bridge_iflist *, mbuf_t *, struct mac_nat_record *);
780 static void bridge_mac_nat_translate(mbuf_t *, struct mac_nat_record *,
781 const char[ETHER_ADDR_LEN]);
782
783 static mblist bridge_mac_nat_input_list(struct bridge_softc *sc,
784 ifnet_t external_ifp, mbuf_t m, mbuf_t * forward_head);
785 static mbuf_t bridge_mac_nat_translate_list(struct bridge_softc * sc,
786 struct bridge_iflist *sbif, ifnet_t dst_if, mbuf_t m);
787 static mbuf_t bridge_mac_nat_copy_and_translate_list(struct bridge_softc * sc,
788 struct bridge_iflist *sbif, ifnet_t dst_if, mbuf_t m);
789
790 static mbuf_t bridge_pf_list(mbuf_t m, ifnet_t ifp,
791 uint32_t sc_filter_flags, bool input);
792
793 static inline ifnet_t
bridge_rtlookup(struct bridge_softc * sc,const uint8_t addr[ETHER_ADDR_LEN],uint16_t vlan)794 bridge_rtlookup(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN],
795 uint16_t vlan)
796 {
797 struct bridge_iflist * bif;
798 ifnet_t ifp = NULL;
799
800 bif = bridge_rtlookup_bif(sc, addr, vlan);
801 if (bif != NULL) {
802 ifp = bif->bif_ifp;
803 }
804 return ifp;
805 }
806
807 static bool in_addr_is_ours(const struct in_addr);
808 static bool in6_addr_is_ours(const struct in6_addr *, uint32_t);
809
810 #define m_copypacket(m, how) m_copym(m, 0, M_COPYALL, how)
811
812 static mblist
813 gso_tcp(ifnet_t ifp, mbuf_t m, u_int mac_hlen, bool is_ipv4, bool is_tx);
814
815 static mblist
816 gso_tcp_with_info(ifnet_t ifp, mbuf_t m, ip_packet_info_t info_p,
817 u_int mac_hlen, bool is_ipv4, bool is_tx);
818
819 static inline mblist
gso_tcp_transmit(ifnet_t ifp,mbuf_t m,u_int mac_hlen,bool is_ipv4)820 gso_tcp_transmit(ifnet_t ifp, mbuf_t m, u_int mac_hlen, bool is_ipv4)
821 {
822 return gso_tcp(ifp, m, mac_hlen, is_ipv4, true);
823 }
824
825 /* The default bridge vlan is 1 (IEEE 802.1Q-2003 Table 9-2) */
826 #define VLANTAGOF(_m) 0
827
828 #define BSTP_ETHERADDR_RANGE_FIRST 0x00
829 #define BSTP_ETHERADDR_RANGE_LAST 0x0f
830
831 u_int8_t bstp_etheraddr[ETHER_ADDR_LEN] =
832 { 0x01, 0x80, 0xc2, 0x00, 0x00, BSTP_ETHERADDR_RANGE_FIRST };
833
834
835 static u_int8_t ethernulladdr[ETHER_ADDR_LEN] =
836 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
837
838 #if BRIDGESTP
839 static struct bstp_cb_ops bridge_ops = {
840 .bcb_state = bridge_state_change,
841 .bcb_rtage = bridge_rtable_expire
842 };
843 #endif /* BRIDGESTP */
844
845 SYSCTL_DECL(_net_link);
846 SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
847 "Bridge");
848
849 static int bridge_inherit_mac = 0; /* share MAC with first bridge member */
850 SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac,
851 CTLFLAG_RW | CTLFLAG_LOCKED,
852 &bridge_inherit_mac, 0,
853 "Inherit MAC address from the first bridge member");
854
855 SYSCTL_INT(_net_link_bridge, OID_AUTO, rtable_prune_period,
856 CTLFLAG_RW | CTLFLAG_LOCKED,
857 &bridge_rtable_prune_period, 0,
858 "Interval between pruning of routing table");
859
860 static unsigned int bridge_rtable_hash_size_max = BRIDGE_RTHASH_SIZE_MAX;
861 SYSCTL_UINT(_net_link_bridge, OID_AUTO, rtable_hash_size_max,
862 CTLFLAG_RW | CTLFLAG_LOCKED,
863 &bridge_rtable_hash_size_max, 0,
864 "Maximum size of the routing hash table");
865
866 #if BRIDGE_DELAYED_CALLBACK_DEBUG
867 static int bridge_delayed_callback_delay = 0;
868 SYSCTL_INT(_net_link_bridge, OID_AUTO, delayed_callback_delay,
869 CTLFLAG_RW | CTLFLAG_LOCKED,
870 &bridge_delayed_callback_delay, 0,
871 "Delay before calling delayed function");
872 #endif
873
874 SYSCTL_STRUCT(_net_link_bridge, OID_AUTO,
875 hostfilterstats, CTLFLAG_RD | CTLFLAG_LOCKED,
876 &bridge_hostfilter_stats, bridge_hostfilter_stats, "");
877
878 #if BRIDGESTP
879 static int log_stp = 0; /* log STP state changes */
880 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp, CTLFLAG_RW,
881 &log_stp, 0, "Log STP state changes");
882 #endif /* BRIDGESTP */
883
884 struct bridge_control {
885 int (*bc_func)(struct bridge_softc *, void *__sized_by(arg_len) args, size_t arg_len);
886 unsigned int bc_argsize;
887 unsigned int bc_flags;
888 };
889
890 #define BC_F_COPYIN 0x01 /* copy arguments in */
891 #define BC_F_COPYOUT 0x02 /* copy arguments out */
892 #define BC_F_SUSER 0x04 /* do super-user check */
893
894 static const struct bridge_control bridge_control_table32[] = {
895 { .bc_func = bridge_ioctl_add, .bc_argsize = sizeof(struct ifbreq), /* 0 */
896 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
897 { .bc_func = bridge_ioctl_del, .bc_argsize = sizeof(struct ifbreq),
898 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
899
900 { .bc_func = bridge_ioctl_gifflags, .bc_argsize = sizeof(struct ifbreq),
901 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
902 { .bc_func = bridge_ioctl_sifflags, .bc_argsize = sizeof(struct ifbreq),
903 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
904
905 { .bc_func = bridge_ioctl_scache, .bc_argsize = sizeof(struct ifbrparam),
906 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
907 { .bc_func = bridge_ioctl_gcache, .bc_argsize = sizeof(struct ifbrparam),
908 .bc_flags = BC_F_COPYOUT },
909
910 { .bc_func = bridge_ioctl_gifs32, .bc_argsize = sizeof(struct ifbifconf32),
911 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
912 { .bc_func = bridge_ioctl_rts32, .bc_argsize = sizeof(struct ifbaconf32),
913 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
914
915 { .bc_func = bridge_ioctl_saddr32, .bc_argsize = sizeof(struct ifbareq32),
916 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
917
918 { .bc_func = bridge_ioctl_sto, .bc_argsize = sizeof(struct ifbrparam),
919 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
920 { .bc_func = bridge_ioctl_gto, .bc_argsize = sizeof(struct ifbrparam), /* 10 */
921 .bc_flags = BC_F_COPYOUT },
922
923 { .bc_func = bridge_ioctl_daddr32, .bc_argsize = sizeof(struct ifbareq32),
924 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
925
926 { .bc_func = bridge_ioctl_flush, .bc_argsize = sizeof(struct ifbreq),
927 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
928
929 { .bc_func = bridge_ioctl_gpri, .bc_argsize = sizeof(struct ifbrparam),
930 .bc_flags = BC_F_COPYOUT },
931 { .bc_func = bridge_ioctl_spri, .bc_argsize = sizeof(struct ifbrparam),
932 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
933
934 { .bc_func = bridge_ioctl_ght, .bc_argsize = sizeof(struct ifbrparam),
935 .bc_flags = BC_F_COPYOUT },
936 { .bc_func = bridge_ioctl_sht, .bc_argsize = sizeof(struct ifbrparam),
937 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
938
939 { .bc_func = bridge_ioctl_gfd, .bc_argsize = sizeof(struct ifbrparam),
940 .bc_flags = BC_F_COPYOUT },
941 { .bc_func = bridge_ioctl_sfd, .bc_argsize = sizeof(struct ifbrparam),
942 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
943
944 { .bc_func = bridge_ioctl_gma, .bc_argsize = sizeof(struct ifbrparam),
945 .bc_flags = BC_F_COPYOUT },
946 { .bc_func = bridge_ioctl_sma, .bc_argsize = sizeof(struct ifbrparam), /* 20 */
947 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
948
949 { .bc_func = bridge_ioctl_sifprio, .bc_argsize = sizeof(struct ifbreq),
950 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
951
952 { .bc_func = bridge_ioctl_sifcost, .bc_argsize = sizeof(struct ifbreq),
953 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
954
955 { .bc_func = bridge_ioctl_gfilt, .bc_argsize = sizeof(struct ifbrparam),
956 .bc_flags = BC_F_COPYOUT },
957 { .bc_func = bridge_ioctl_sfilt, .bc_argsize = sizeof(struct ifbrparam),
958 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
959
960 { .bc_func = bridge_ioctl_purge, .bc_argsize = sizeof(struct ifbreq),
961 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
962
963 { .bc_func = bridge_ioctl_addspan, .bc_argsize = sizeof(struct ifbreq),
964 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
965 { .bc_func = bridge_ioctl_delspan, .bc_argsize = sizeof(struct ifbreq),
966 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
967
968 { .bc_func = bridge_ioctl_gbparam32, .bc_argsize = sizeof(struct ifbropreq32),
969 .bc_flags = BC_F_COPYOUT },
970
971 { .bc_func = bridge_ioctl_grte, .bc_argsize = sizeof(struct ifbrparam),
972 .bc_flags = BC_F_COPYOUT },
973
974 { .bc_func = bridge_ioctl_gifsstp32, .bc_argsize = sizeof(struct ifbpstpconf32), /* 30 */
975 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
976
977 { .bc_func = bridge_ioctl_sproto, .bc_argsize = sizeof(struct ifbrparam),
978 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
979
980 { .bc_func = bridge_ioctl_stxhc, .bc_argsize = sizeof(struct ifbrparam),
981 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
982
983 { .bc_func = bridge_ioctl_sifmaxaddr, .bc_argsize = sizeof(struct ifbreq),
984 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
985
986 { .bc_func = bridge_ioctl_ghostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
987 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
988 { .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
989 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
990
991 { .bc_func = bridge_ioctl_gmnelist32,
992 .bc_argsize = sizeof(struct ifbrmnelist32),
993 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
994 { .bc_func = bridge_ioctl_gifstats32,
995 .bc_argsize = sizeof(struct ifbrmreq32),
996 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
997 };
998
999 static const struct bridge_control bridge_control_table64[] = {
1000 { .bc_func = bridge_ioctl_add, .bc_argsize = sizeof(struct ifbreq), /* 0 */
1001 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1002 { .bc_func = bridge_ioctl_del, .bc_argsize = sizeof(struct ifbreq),
1003 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1004
1005 { .bc_func = bridge_ioctl_gifflags, .bc_argsize = sizeof(struct ifbreq),
1006 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1007 { .bc_func = bridge_ioctl_sifflags, .bc_argsize = sizeof(struct ifbreq),
1008 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1009
1010 { .bc_func = bridge_ioctl_scache, .bc_argsize = sizeof(struct ifbrparam),
1011 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1012 { .bc_func = bridge_ioctl_gcache, .bc_argsize = sizeof(struct ifbrparam),
1013 .bc_flags = BC_F_COPYOUT },
1014
1015 { .bc_func = bridge_ioctl_gifs64, .bc_argsize = sizeof(struct ifbifconf64),
1016 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1017 { .bc_func = bridge_ioctl_rts64, .bc_argsize = sizeof(struct ifbaconf64),
1018 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1019
1020 { .bc_func = bridge_ioctl_saddr64, .bc_argsize = sizeof(struct ifbareq64),
1021 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1022
1023 { .bc_func = bridge_ioctl_sto, .bc_argsize = sizeof(struct ifbrparam),
1024 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1025 { .bc_func = bridge_ioctl_gto, .bc_argsize = sizeof(struct ifbrparam), /* 10 */
1026 .bc_flags = BC_F_COPYOUT },
1027
1028 { .bc_func = bridge_ioctl_daddr64, .bc_argsize = sizeof(struct ifbareq64),
1029 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1030
1031 { .bc_func = bridge_ioctl_flush, .bc_argsize = sizeof(struct ifbreq),
1032 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1033
1034 { .bc_func = bridge_ioctl_gpri, .bc_argsize = sizeof(struct ifbrparam),
1035 .bc_flags = BC_F_COPYOUT },
1036 { .bc_func = bridge_ioctl_spri, .bc_argsize = sizeof(struct ifbrparam),
1037 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1038
1039 { .bc_func = bridge_ioctl_ght, .bc_argsize = sizeof(struct ifbrparam),
1040 .bc_flags = BC_F_COPYOUT },
1041 { .bc_func = bridge_ioctl_sht, .bc_argsize = sizeof(struct ifbrparam),
1042 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1043
1044 { .bc_func = bridge_ioctl_gfd, .bc_argsize = sizeof(struct ifbrparam),
1045 .bc_flags = BC_F_COPYOUT },
1046 { .bc_func = bridge_ioctl_sfd, .bc_argsize = sizeof(struct ifbrparam),
1047 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1048
1049 { .bc_func = bridge_ioctl_gma, .bc_argsize = sizeof(struct ifbrparam),
1050 .bc_flags = BC_F_COPYOUT },
1051 { .bc_func = bridge_ioctl_sma, .bc_argsize = sizeof(struct ifbrparam), /* 20 */
1052 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1053
1054 { .bc_func = bridge_ioctl_sifprio, .bc_argsize = sizeof(struct ifbreq),
1055 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1056
1057 { .bc_func = bridge_ioctl_sifcost, .bc_argsize = sizeof(struct ifbreq),
1058 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1059
1060 { .bc_func = bridge_ioctl_gfilt, .bc_argsize = sizeof(struct ifbrparam),
1061 .bc_flags = BC_F_COPYOUT },
1062 { .bc_func = bridge_ioctl_sfilt, .bc_argsize = sizeof(struct ifbrparam),
1063 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1064
1065 { .bc_func = bridge_ioctl_purge, .bc_argsize = sizeof(struct ifbreq),
1066 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1067
1068 { .bc_func = bridge_ioctl_addspan, .bc_argsize = sizeof(struct ifbreq),
1069 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1070 { .bc_func = bridge_ioctl_delspan, .bc_argsize = sizeof(struct ifbreq),
1071 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1072
1073 { .bc_func = bridge_ioctl_gbparam64, .bc_argsize = sizeof(struct ifbropreq64),
1074 .bc_flags = BC_F_COPYOUT },
1075
1076 { .bc_func = bridge_ioctl_grte, .bc_argsize = sizeof(struct ifbrparam),
1077 .bc_flags = BC_F_COPYOUT },
1078
1079 { .bc_func = bridge_ioctl_gifsstp64, .bc_argsize = sizeof(struct ifbpstpconf64), /* 30 */
1080 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1081
1082 { .bc_func = bridge_ioctl_sproto, .bc_argsize = sizeof(struct ifbrparam),
1083 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1084
1085 { .bc_func = bridge_ioctl_stxhc, .bc_argsize = sizeof(struct ifbrparam),
1086 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1087
1088 { .bc_func = bridge_ioctl_sifmaxaddr, .bc_argsize = sizeof(struct ifbreq),
1089 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1090
1091 { .bc_func = bridge_ioctl_ghostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1092 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1093 { .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1094 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1095
1096 { .bc_func = bridge_ioctl_gmnelist64,
1097 .bc_argsize = sizeof(struct ifbrmnelist64),
1098 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1099 { .bc_func = bridge_ioctl_gifstats64,
1100 .bc_argsize = sizeof(struct ifbrmreq64),
1101 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1102 };
1103
1104 static const unsigned int bridge_control_table_size =
1105 sizeof(bridge_control_table32) / sizeof(bridge_control_table32[0]);
1106
1107 static LIST_HEAD(, bridge_softc) bridge_list =
1108 LIST_HEAD_INITIALIZER(bridge_list);
1109
1110 #define BRIDGENAME "bridge"
1111 #define BRIDGES_MAX IF_MAXUNIT
1112 #define BRIDGE_ZONE_MAX_ELEM MIN(IFNETS_MAX, BRIDGES_MAX)
1113
1114 static struct if_clone bridge_cloner =
1115 IF_CLONE_INITIALIZER(BRIDGENAME, bridge_clone_create, bridge_clone_destroy,
1116 0, BRIDGES_MAX);
1117
1118 static int if_bridge_txstart = 0;
1119 SYSCTL_INT(_net_link_bridge, OID_AUTO, txstart, CTLFLAG_RW | CTLFLAG_LOCKED,
1120 &if_bridge_txstart, 0, "Bridge interface uses TXSTART model");
1121
1122 SYSCTL_INT(_net_link_bridge, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
1123 &if_bridge_debug, 0, "Bridge debug flags");
1124
1125 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_level,
1126 CTLFLAG_RW | CTLFLAG_LOCKED,
1127 &if_bridge_log_level, 0, "Bridge log level");
1128
1129 static int if_bridge_output_skip_filters = 1;
1130 SYSCTL_INT(_net_link_bridge, OID_AUTO, output_skip_filters,
1131 CTLFLAG_RW | CTLFLAG_LOCKED,
1132 &if_bridge_output_skip_filters, 0, "Bridge skip output filters");
1133
1134 int bridge_enable_early_input = 1; /* DLIL early input */
1135 SYSCTL_INT(_net_link_bridge, OID_AUTO, enable_early_input,
1136 CTLFLAG_RW | CTLFLAG_LOCKED,
1137 &bridge_enable_early_input, 0,
1138 "Bridge enable early input");
1139
1140 int bridge_allow_lro_num_seg = 1; /* allow LRO_NUM_SEG to keep LRO enabled */
1141 SYSCTL_INT(_net_link_bridge, OID_AUTO, allow_lro_num_seg,
1142 CTLFLAG_RW | CTLFLAG_LOCKED,
1143 &bridge_allow_lro_num_seg, 0,
1144 "Bridge allow LRO_NUM_SEG to keep LRO enabled");
1145
1146 #define BRIDGE_TSO_REDUCE_MSS_FORWARDING_MAX 256
1147 #define BRIDGE_TSO_REDUCE_MSS_FORWARDING_DEFAULT 110
1148 #define BRIDGE_TSO_REDUCE_MSS_TX_MAX 256
1149 #define BRIDGE_TSO_REDUCE_MSS_TX_DEFAULT 0
1150
1151 static u_int if_bridge_tso_reduce_mss_forwarding
1152 = BRIDGE_TSO_REDUCE_MSS_FORWARDING_DEFAULT;
1153 static u_int if_bridge_tso_reduce_mss_tx
1154 = BRIDGE_TSO_REDUCE_MSS_TX_DEFAULT;
1155
1156 static int
bridge_tso_reduce_mss(struct sysctl_req * req,u_int * val,u_int val_max)1157 bridge_tso_reduce_mss(struct sysctl_req *req, u_int * val, u_int val_max)
1158 {
1159 int changed;
1160 int error;
1161 u_int new_value;
1162
1163 error = sysctl_io_number(req, *val, sizeof(*val), &new_value,
1164 &changed);
1165 if (error == 0 && changed != 0) {
1166 if (new_value > val_max) {
1167 return EINVAL;
1168 }
1169 *val = new_value;
1170 }
1171 return error;
1172 }
1173
1174 static int
1175 bridge_tso_reduce_mss_forwarding_sysctl SYSCTL_HANDLER_ARGS
1176 {
1177 return bridge_tso_reduce_mss(req, &if_bridge_tso_reduce_mss_forwarding,
1178 BRIDGE_TSO_REDUCE_MSS_FORWARDING_MAX);
1179 }
1180
1181 SYSCTL_PROC(_net_link_bridge, OID_AUTO, tso_reduce_mss_forwarding,
1182 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1183 0, 0, bridge_tso_reduce_mss_forwarding_sysctl, "IU",
1184 "Bridge tso reduce mss when forwarding");
1185
1186 static int
1187 bridge_tso_reduce_mss_tx_sysctl SYSCTL_HANDLER_ARGS
1188 {
1189 return bridge_tso_reduce_mss(req, &if_bridge_tso_reduce_mss_tx,
1190 BRIDGE_TSO_REDUCE_MSS_TX_MAX);
1191 }
1192
1193 SYSCTL_PROC(_net_link_bridge, OID_AUTO, tso_reduce_mss_tx,
1194 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1195 0, 0, bridge_tso_reduce_mss_tx_sysctl, "IU",
1196 "Bridge tso reduce mss on transmit");
1197
1198 #if DEBUG || DEVELOPMENT
1199 /*
1200 * net.link.bridge.reduce_tso_mtu
1201 * - when non-zero, the bridge overrides the interface TSO MTU to a lower
1202 * value (i.e. 16K) to enable testing the "use GSO instead" path
1203 */
1204 static int if_bridge_reduce_tso_mtu = 0;
1205 SYSCTL_INT(_net_link_bridge, OID_AUTO, reduce_tso_mtu,
1206 CTLFLAG_RW | CTLFLAG_LOCKED,
1207 &if_bridge_reduce_tso_mtu, 0, "Bridge interface reduce TSO MTU");
1208
1209 #endif /* DEBUG || DEVELOPMENT */
1210
1211 static void brlog_ether_header(struct ether_header *);
1212 static void brlog_mbuf_data(mbuf_t, size_t, size_t);
1213 static void brlog_mbuf_pkthdr(mbuf_t, const char *, const char *);
1214 static void brlog_mbuf(mbuf_t, const char *, const char *);
1215 static void brlog_link(struct bridge_softc * sc);
1216
1217 #if BRIDGE_LOCK_DEBUG
1218 static void bridge_lock(struct bridge_softc *);
1219 static void bridge_unlock(struct bridge_softc *);
1220 static int bridge_lock2ref(struct bridge_softc *);
1221 static void bridge_unref(struct bridge_softc *);
1222 static void bridge_xlock(struct bridge_softc *);
1223 static void bridge_xdrop(struct bridge_softc *);
1224
1225 #define DECL_RETURN_ADDR(v) void * __single v = __unsafe_forge_single(void *, __builtin_return_address(0))
1226
1227 static void
bridge_lock(struct bridge_softc * sc)1228 bridge_lock(struct bridge_softc *sc)
1229 {
1230 DECL_RETURN_ADDR(lr_saved);
1231
1232 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1233
1234 _BRIDGE_LOCK(sc);
1235
1236 sc->lock_lr[sc->next_lock_lr] = lr_saved;
1237 sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1238 }
1239
1240 static void
bridge_unlock(struct bridge_softc * sc)1241 bridge_unlock(struct bridge_softc *sc)
1242 {
1243 DECL_RETURN_ADDR(lr_saved);
1244
1245 BRIDGE_LOCK_ASSERT_HELD(sc);
1246
1247 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1248 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1249
1250 _BRIDGE_UNLOCK(sc);
1251 }
1252
1253 static int
bridge_lock2ref(struct bridge_softc * sc)1254 bridge_lock2ref(struct bridge_softc *sc)
1255 {
1256 int error = 0;
1257 DECL_RETURN_ADDR(lr_saved);
1258
1259 BRIDGE_LOCK_ASSERT_HELD(sc);
1260
1261 if (sc->sc_iflist_xcnt > 0) {
1262 error = EBUSY;
1263 } else {
1264 sc->sc_iflist_ref++;
1265 }
1266
1267 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1268 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1269
1270 _BRIDGE_UNLOCK(sc);
1271
1272 return error;
1273 }
1274
1275 static void
bridge_unref(struct bridge_softc * sc)1276 bridge_unref(struct bridge_softc *sc)
1277 {
1278 DECL_RETURN_ADDR(lr_saved);
1279
1280 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1281
1282 _BRIDGE_LOCK(sc);
1283 sc->lock_lr[sc->next_lock_lr] = lr_saved;
1284 sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1285
1286 sc->sc_iflist_ref--;
1287
1288 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1289 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1290 if ((sc->sc_iflist_xcnt > 0) && (sc->sc_iflist_ref == 0)) {
1291 _BRIDGE_UNLOCK(sc);
1292 wakeup(&sc->sc_cv);
1293 } else {
1294 _BRIDGE_UNLOCK(sc);
1295 }
1296 }
1297
1298 static void
bridge_xlock(struct bridge_softc * sc)1299 bridge_xlock(struct bridge_softc *sc)
1300 {
1301 DECL_RETURN_ADDR(lr_saved);
1302
1303 BRIDGE_LOCK_ASSERT_HELD(sc);
1304
1305 sc->sc_iflist_xcnt++;
1306 while (sc->sc_iflist_ref > 0) {
1307 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1308 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1309
1310 msleep(&sc->sc_cv, &sc->sc_mtx, PZERO, "BRIDGE_XLOCK", NULL);
1311
1312 sc->lock_lr[sc->next_lock_lr] = lr_saved;
1313 sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1314 }
1315 }
1316
1317 #undef DECL_RETURN_ADDR
1318
1319 static void
bridge_xdrop(struct bridge_softc * sc)1320 bridge_xdrop(struct bridge_softc *sc)
1321 {
1322 BRIDGE_LOCK_ASSERT_HELD(sc);
1323
1324 sc->sc_iflist_xcnt--;
1325 }
1326
1327 #endif /* BRIDGE_LOCK_DEBUG */
1328
1329 static void
brlog_mbuf_pkthdr(mbuf_t m,const char * prefix,const char * suffix)1330 brlog_mbuf_pkthdr(mbuf_t m, const char *prefix, const char *suffix)
1331 {
1332 if (m) {
1333 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1334 "%spktlen: %u rcvif: 0x%llx header: 0x%llx nextpkt: 0x%llx%s",
1335 prefix ? prefix : "", (unsigned int)mbuf_pkthdr_len(m),
1336 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_rcvif(m)),
1337 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_header(m)),
1338 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_nextpkt(m)),
1339 suffix ? suffix : "");
1340 } else {
1341 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0, "%s<NULL>%s", prefix, suffix);
1342 }
1343 }
1344
1345 static void
brlog_mbuf(mbuf_t m,const char * prefix,const char * suffix)1346 brlog_mbuf(mbuf_t m, const char *prefix, const char *suffix)
1347 {
1348 if (m) {
1349 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1350 "%s0x%llx type: %u flags: 0x%x len: %u data: 0x%llx "
1351 "maxlen: %u datastart: 0x%llx next: 0x%llx%s",
1352 prefix ? prefix : "", (uint64_t)VM_KERNEL_ADDRPERM(m),
1353 mbuf_type(m), mbuf_flags(m), (unsigned int)mbuf_len(m),
1354 (uint64_t)VM_KERNEL_ADDRPERM(mtod(m, void *)),
1355 (unsigned int)mbuf_maxlen(m),
1356 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
1357 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_next(m)),
1358 !suffix || (mbuf_flags(m) & MBUF_PKTHDR) ? "" : suffix);
1359 if ((mbuf_flags(m) & MBUF_PKTHDR)) {
1360 brlog_mbuf_pkthdr(m, "", suffix);
1361 }
1362 } else {
1363 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0, "%s<NULL>%s", prefix, suffix);
1364 }
1365 }
1366
1367 static void
brlog_mbuf_data(mbuf_t m,size_t offset,size_t len)1368 brlog_mbuf_data(mbuf_t m, size_t offset, size_t len)
1369 {
1370 mbuf_t n;
1371 size_t i, j;
1372 size_t pktlen, mlen, maxlen;
1373 unsigned char *ptr;
1374
1375 pktlen = mbuf_pkthdr_len(m);
1376
1377 if (offset > pktlen) {
1378 return;
1379 }
1380
1381 maxlen = (pktlen - offset > len) ? len : pktlen - offset;
1382 n = m;
1383 mlen = mbuf_len(n);
1384 ptr = mtod(n, unsigned char *);
1385 for (i = 0, j = 0; i < maxlen; i++, j++) {
1386 if (j >= mlen) {
1387 n = mbuf_next(n);
1388 if (n == 0) {
1389 break;
1390 }
1391 ptr = mtod(n, unsigned char *);
1392 mlen = mbuf_len(n);
1393 j = 0;
1394 }
1395 if (i >= offset) {
1396 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1397 "%02x%s", ptr[j], i % 2 ? " " : "");
1398 }
1399 }
1400 }
1401
1402 static void
brlog_ether_header(struct ether_header * eh)1403 brlog_ether_header(struct ether_header *eh)
1404 {
1405 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1406 "%02x:%02x:%02x:%02x:%02x:%02x > "
1407 "%02x:%02x:%02x:%02x:%02x:%02x 0x%04x ",
1408 eh->ether_shost[0], eh->ether_shost[1], eh->ether_shost[2],
1409 eh->ether_shost[3], eh->ether_shost[4], eh->ether_shost[5],
1410 eh->ether_dhost[0], eh->ether_dhost[1], eh->ether_dhost[2],
1411 eh->ether_dhost[3], eh->ether_dhost[4], eh->ether_dhost[5],
1412 ntohs(eh->ether_type));
1413 }
1414
1415 static char *
ether_ntop(char * __sized_by (len)buf,size_t len,const u_char ap[ETHER_ADDR_LEN])1416 ether_ntop(char * __sized_by(len) buf, size_t len, const u_char ap[ETHER_ADDR_LEN])
1417 {
1418 snprintf(buf, len, "%02x:%02x:%02x:%02x:%02x:%02x",
1419 ap[0], ap[1], ap[2], ap[3], ap[4], ap[5]);
1420
1421 return buf;
1422 }
1423
1424 static void
brlog_link(struct bridge_softc * sc)1425 brlog_link(struct bridge_softc * sc)
1426 {
1427 int i;
1428 uint32_t sdl_buffer[(offsetof(struct sockaddr_dl, sdl_data) +
1429 IFNAMSIZ + ETHER_ADDR_LEN)];
1430 struct sockaddr_dl *sdl = SDL((uint8_t*)&sdl_buffer); /* SDL requires byte pointer */
1431 const u_char * lladdr;
1432 char lladdr_str[48];
1433
1434 memset(sdl_buffer, 0, sizeof(sdl_buffer));
1435 sdl->sdl_family = AF_LINK;
1436 sdl->sdl_nlen = strbuflen(sc->sc_if_xname);
1437 sdl->sdl_alen = ETHER_ADDR_LEN;
1438 sdl->sdl_len = offsetof(struct sockaddr_dl, sdl_data);
1439 memcpy(sdl->sdl_data, sc->sc_if_xname, sdl->sdl_nlen);
1440 memcpy(LLADDR(sdl), sc->sc_defaddr, ETHER_ADDR_LEN);
1441 lladdr_str[0] = '\0';
1442 for (i = 0, lladdr = CONST_LLADDR(sdl);
1443 i < sdl->sdl_alen;
1444 i++, lladdr++) {
1445 char byte_str[4];
1446
1447 snprintf(byte_str, sizeof(byte_str), "%s%x", i ? ":" : "",
1448 *lladdr);
1449 strbufcat(lladdr_str, byte_str);
1450 }
1451 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1452 "%s sdl len %d index %d family %d type 0x%x nlen %d alen %d"
1453 " slen %d addr %s", sc->sc_if_xname,
1454 sdl->sdl_len, sdl->sdl_index,
1455 sdl->sdl_family, sdl->sdl_type, sdl->sdl_nlen,
1456 sdl->sdl_alen, sdl->sdl_slen, lladdr_str);
1457 }
1458
1459 static int
_mbuf_get_tso_mss(mbuf_t m)1460 _mbuf_get_tso_mss(mbuf_t m)
1461 {
1462 int mss = 0;
1463
1464 #define _TSO_CSUM (CSUM_TSO_IPV4 | CSUM_TSO_IPV6)
1465 if ((m->m_pkthdr.csum_flags & _TSO_CSUM) != 0) {
1466 mss = m->m_pkthdr.tso_segsz;
1467 }
1468 return mss;
1469 }
1470
1471 /*
1472 * bridgeattach:
1473 *
1474 * Pseudo-device attach routine.
1475 */
1476 __private_extern__ int
bridgeattach(int n)1477 bridgeattach(int n)
1478 {
1479 #pragma unused(n)
1480 int error;
1481
1482 LIST_INIT(&bridge_list);
1483
1484 #if BRIDGESTP
1485 bstp_sys_init();
1486 #endif /* BRIDGESTP */
1487
1488 error = if_clone_attach(&bridge_cloner);
1489 if (error != 0) {
1490 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_clone_attach failed %d", error);
1491 }
1492 return error;
1493 }
1494
1495 static void
_mbuf_adjust_pkthdr_and_data(mbuf_t m,int len)1496 _mbuf_adjust_pkthdr_and_data(mbuf_t m, int len)
1497 {
1498 mbuf_setdata(m, mtodo(m, len), mbuf_len(m) - len);
1499 mbuf_pkthdr_adjustlen(m, -len);
1500 }
1501
1502 static errno_t
bridge_ifnet_set_attrs(struct ifnet * ifp)1503 bridge_ifnet_set_attrs(struct ifnet * ifp)
1504 {
1505 errno_t error;
1506
1507 error = ifnet_set_mtu(ifp, ETHERMTU);
1508 if (error != 0) {
1509 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_mtu failed %d", error);
1510 goto done;
1511 }
1512 error = ifnet_set_addrlen(ifp, ETHER_ADDR_LEN);
1513 if (error != 0) {
1514 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_addrlen failed %d", error);
1515 goto done;
1516 }
1517 error = ifnet_set_hdrlen(ifp, ETHER_HDR_LEN);
1518 if (error != 0) {
1519 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_hdrlen failed %d", error);
1520 goto done;
1521 }
1522 error = ifnet_set_flags(ifp,
1523 IFF_BROADCAST | IFF_SIMPLEX | IFF_NOTRAILERS | IFF_MULTICAST,
1524 0xffff);
1525
1526 if (error != 0) {
1527 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_flags failed %d", error);
1528 goto done;
1529 }
1530 done:
1531 return error;
1532 }
1533
1534 /*
1535 * bridge_clone_create:
1536 *
1537 * Create a new bridge instance.
1538 */
1539 static int
bridge_clone_create(struct if_clone * ifc,uint32_t unit,void * params)1540 bridge_clone_create(struct if_clone *ifc, uint32_t unit, void *params)
1541 {
1542 #pragma unused(params)
1543 ifnet_ref_t ifp = NULL;
1544 struct bridge_softc *sc = NULL;
1545 struct bridge_softc *sc2 = NULL;
1546 struct ifnet_init_eparams init_params;
1547 errno_t error = 0;
1548 uint8_t eth_hostid[ETHER_ADDR_LEN];
1549 int fb, retry, has_hostid;
1550
1551 sc = kalloc_type(struct bridge_softc, Z_WAITOK_ZERO_NOFAIL);
1552 lck_mtx_init(&sc->sc_mtx, &bridge_lock_grp, &bridge_lock_attr);
1553 sc->sc_brtmax = BRIDGE_RTABLE_MAX;
1554 sc->sc_mne_max = BRIDGE_MAC_NAT_ENTRY_MAX;
1555 sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
1556 sc->sc_filter_flags = 0;
1557
1558 TAILQ_INIT(&sc->sc_iflist);
1559
1560 /* use the interface name as the unique id for ifp recycle */
1561 snprintf(sc->sc_if_xname, sizeof(sc->sc_if_xname), "%s%d",
1562 ifc->ifc_name, unit);
1563 bzero(&init_params, sizeof(init_params));
1564 init_params.ver = IFNET_INIT_CURRENT_VERSION;
1565 init_params.len = sizeof(init_params);
1566 /* Initialize our routing table. */
1567 error = bridge_rtable_init(sc);
1568 if (error != 0) {
1569 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_rtable_init failed %d", error);
1570 goto done;
1571 }
1572 TAILQ_INIT(&sc->sc_spanlist);
1573 if (if_bridge_txstart) {
1574 init_params.start = bridge_start;
1575 } else {
1576 init_params.flags = IFNET_INIT_LEGACY;
1577 init_params.output = bridge_output;
1578 }
1579 init_params.uniqueid_len = strbuflen(sc->sc_if_xname);
1580 init_params.uniqueid = sc->sc_if_xname;
1581 init_params.sndq_maxlen = IFQ_MAXLEN;
1582 init_params.name = __unsafe_null_terminated_from_indexable(ifc->ifc_name);
1583 init_params.unit = unit;
1584 init_params.family = IFNET_FAMILY_ETHERNET;
1585 init_params.type = IFT_BRIDGE;
1586 init_params.demux = ether_demux;
1587 init_params.add_proto = ether_add_proto;
1588 init_params.del_proto = ether_del_proto;
1589 init_params.check_multi = ether_check_multi;
1590 init_params.framer_extended = ether_frameout_extended;
1591 init_params.softc = sc;
1592 init_params.ioctl = bridge_ioctl;
1593 init_params.detach = bridge_detach;
1594 init_params.broadcast_addr = etherbroadcastaddr;
1595 init_params.broadcast_len = ETHER_ADDR_LEN;
1596
1597 error = ifnet_allocate_extended(&init_params, &ifp);
1598 if (error != 0) {
1599 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_allocate failed %d", error);
1600 goto done;
1601 }
1602 LIST_INIT(&sc->sc_mne_list);
1603 LIST_INIT(&sc->sc_mne_list_v6);
1604 sc->sc_ifp = ifp;
1605 error = bridge_ifnet_set_attrs(ifp);
1606 if (error != 0) {
1607 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_ifnet_set_attrs failed %d",
1608 error);
1609 goto done;
1610 }
1611 /*
1612 * Generate an ethernet address with a locally administered address.
1613 *
1614 * Since we are using random ethernet addresses for the bridge, it is
1615 * possible that we might have address collisions, so make sure that
1616 * this hardware address isn't already in use on another bridge.
1617 * The first try uses the "hostid" and falls back to read_frandom();
1618 * for "hostid", we use the MAC address of the first-encountered
1619 * Ethernet-type interface that is currently configured.
1620 */
1621 fb = 0;
1622 has_hostid = (uuid_get_ethernet(ð_hostid[0]) == 0);
1623 for (retry = 1; retry != 0;) {
1624 if (fb || has_hostid == 0) {
1625 read_frandom(&sc->sc_defaddr, ETHER_ADDR_LEN);
1626 sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
1627 sc->sc_defaddr[0] |= 2; /* set the LAA bit */
1628 } else {
1629 bcopy(ð_hostid[0], &sc->sc_defaddr,
1630 ETHER_ADDR_LEN);
1631 sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
1632 sc->sc_defaddr[0] |= 2; /* set the LAA bit */
1633 sc->sc_defaddr[3] = /* stir it up a bit */
1634 ((sc->sc_defaddr[3] & 0x0f) << 4) |
1635 ((sc->sc_defaddr[3] & 0xf0) >> 4);
1636 /*
1637 * Mix in the LSB as it's actually pretty significant,
1638 * see rdar://14076061
1639 */
1640 sc->sc_defaddr[4] =
1641 (((sc->sc_defaddr[4] & 0x0f) << 4) |
1642 ((sc->sc_defaddr[4] & 0xf0) >> 4)) ^
1643 sc->sc_defaddr[5];
1644 sc->sc_defaddr[5] = ifp->if_unit & 0xff;
1645 }
1646
1647 fb = 1;
1648 retry = 0;
1649 lck_mtx_lock(&bridge_list_mtx);
1650 LIST_FOREACH(sc2, &bridge_list, sc_list) {
1651 if (_ether_cmp(sc->sc_defaddr,
1652 IF_LLADDR(sc2->sc_ifp)) == 0) {
1653 retry = 1;
1654 }
1655 }
1656 lck_mtx_unlock(&bridge_list_mtx);
1657 }
1658
1659 sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
1660
1661 if (BRIDGE_DBGF_ENABLED(BR_DBGF_LIFECYCLE)) {
1662 brlog_link(sc);
1663 }
1664 error = ifnet_attach(ifp, NULL);
1665 if (error != 0) {
1666 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_attach failed %d", error);
1667 goto done;
1668 }
1669
1670 error = ifnet_set_lladdr_and_type(ifp, sc->sc_defaddr, ETHER_ADDR_LEN,
1671 IFT_ETHER);
1672 if (error != 0) {
1673 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr_and_type failed %d",
1674 error);
1675 goto done;
1676 }
1677
1678 ifnet_set_offload(ifp,
1679 IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP |
1680 IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6 | IFNET_MULTIPAGES);
1681 error = bridge_set_tso(sc);
1682 if (error != 0) {
1683 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_set_tso failed %d", error);
1684 goto done;
1685 }
1686 #if BRIDGESTP
1687 bstp_attach(&sc->sc_stp, &bridge_ops);
1688 #endif /* BRIDGESTP */
1689
1690 lck_mtx_lock(&bridge_list_mtx);
1691 LIST_INSERT_HEAD(&bridge_list, sc, sc_list);
1692 lck_mtx_unlock(&bridge_list_mtx);
1693
1694 /* attach as ethernet */
1695 error = bpf_attach(ifp, DLT_EN10MB, sizeof(struct ether_header),
1696 NULL, NULL);
1697
1698 done:
1699 if (error != 0) {
1700 BRIDGE_LOG(LOG_NOTICE, 0, "failed error %d", error);
1701 /* TBD: Clean up: sc, sc_rthash etc */
1702 }
1703
1704 return error;
1705 }
1706
1707 /*
1708 * bridge_clone_destroy:
1709 *
1710 * Destroy a bridge instance.
1711 */
1712 static int
bridge_clone_destroy(struct ifnet * ifp)1713 bridge_clone_destroy(struct ifnet *ifp)
1714 {
1715 struct bridge_softc * __single sc = ifp->if_softc;
1716 struct bridge_iflist *bif;
1717 errno_t error;
1718
1719 BRIDGE_LOCK(sc);
1720 if ((sc->sc_flags & SCF_DETACHING)) {
1721 BRIDGE_UNLOCK(sc);
1722 return 0;
1723 }
1724 sc->sc_flags |= SCF_DETACHING;
1725
1726 bridge_ifstop(ifp, 1);
1727
1728 bridge_cancel_delayed_call(&sc->sc_resize_call);
1729
1730 bridge_cleanup_delayed_call(&sc->sc_resize_call);
1731 bridge_cleanup_delayed_call(&sc->sc_aging_timer);
1732
1733 error = ifnet_set_flags(ifp, 0, IFF_UP);
1734 if (error != 0) {
1735 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_flags failed %d", error);
1736 }
1737
1738 while ((bif = TAILQ_FIRST(&sc->sc_iflist)) != NULL) {
1739 bridge_delete_member(sc, bif);
1740 }
1741
1742 while ((bif = TAILQ_FIRST(&sc->sc_spanlist)) != NULL) {
1743 bridge_delete_span(sc, bif);
1744 }
1745 BRIDGE_UNLOCK(sc);
1746
1747 error = ifnet_detach(ifp);
1748 if (error != 0) {
1749 panic("%s (%d): ifnet_detach(%p) failed %d",
1750 __func__, __LINE__, ifp, error);
1751 }
1752 return 0;
1753 }
1754
1755 #define DRVSPEC do { \
1756 if (ifd->ifd_cmd >= bridge_control_table_size) { \
1757 error = EINVAL; \
1758 break; \
1759 } \
1760 bc = &bridge_control_table[ifd->ifd_cmd]; \
1761 \
1762 if (cmd == SIOCGDRVSPEC && \
1763 (bc->bc_flags & BC_F_COPYOUT) == 0) { \
1764 error = EINVAL; \
1765 break; \
1766 } else if (cmd == SIOCSDRVSPEC && \
1767 (bc->bc_flags & BC_F_COPYOUT) != 0) { \
1768 error = EINVAL; \
1769 break; \
1770 } \
1771 \
1772 if (bc->bc_flags & BC_F_SUSER) { \
1773 error = kauth_authorize_generic(kauth_cred_get(), \
1774 KAUTH_GENERIC_ISSUSER); \
1775 if (error) \
1776 break; \
1777 } \
1778 \
1779 if (ifd->ifd_len != bc->bc_argsize || \
1780 ifd->ifd_len > sizeof (args)) { \
1781 error = EINVAL; \
1782 break; \
1783 } \
1784 \
1785 bzero(&args, sizeof (args)); \
1786 if (bc->bc_flags & BC_F_COPYIN) { \
1787 error = copyin(ifd->ifd_data, &args, ifd->ifd_len); \
1788 if (error) \
1789 break; \
1790 } \
1791 \
1792 BRIDGE_LOCK(sc); \
1793 error = (*bc->bc_func)(sc, &args, sizeof(args)); \
1794 BRIDGE_UNLOCK(sc); \
1795 if (error) \
1796 break; \
1797 \
1798 if (bc->bc_flags & BC_F_COPYOUT) \
1799 error = copyout(&args, ifd->ifd_data, ifd->ifd_len); \
1800 } while (0)
1801
1802 static boolean_t
interface_needs_input_broadcast(struct ifnet * ifp)1803 interface_needs_input_broadcast(struct ifnet * ifp)
1804 {
1805 /*
1806 * Selectively enable input broadcast only when necessary.
1807 * The bridge interface itself attaches a fake protocol
1808 * so checking for at least two protocols means that the
1809 * interface is being used for something besides bridging
1810 * and needs to see broadcast packets from other members.
1811 */
1812 return if_get_protolist(ifp, NULL, 0) >= 2;
1813 }
1814
1815 static boolean_t
bif_set_input_broadcast(struct bridge_iflist * bif,boolean_t input_broadcast)1816 bif_set_input_broadcast(struct bridge_iflist * bif, boolean_t input_broadcast)
1817 {
1818 boolean_t old_input_broadcast;
1819
1820 old_input_broadcast = (bif->bif_flags & BIFF_INPUT_BROADCAST) != 0;
1821 if (input_broadcast) {
1822 bif->bif_flags |= BIFF_INPUT_BROADCAST;
1823 } else {
1824 bif->bif_flags &= ~BIFF_INPUT_BROADCAST;
1825 }
1826 return old_input_broadcast != input_broadcast;
1827 }
1828
1829 /*
1830 * bridge_ioctl:
1831 *
1832 * Handle a control request from the operator.
1833 */
1834 static errno_t
bridge_ioctl(struct ifnet * ifp,u_long cmd,void * __sized_by (IOCPARM_LEN (cmd))data)1835 bridge_ioctl(struct ifnet *ifp, u_long cmd, void *__sized_by(IOCPARM_LEN(cmd)) data)
1836 {
1837 struct bridge_softc * __single sc = ifp->if_softc;
1838 struct ifreq *ifr = (struct ifreq *)data;
1839 struct bridge_iflist *bif;
1840 int error = 0;
1841
1842 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1843
1844 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_IOCTL,
1845 "ifp %s cmd 0x%08lx (%c%c [%lu] %c %lu)",
1846 ifp->if_xname, cmd, (cmd & IOC_IN) ? 'I' : ' ',
1847 (cmd & IOC_OUT) ? 'O' : ' ', IOCPARM_LEN(cmd),
1848 (char)IOCGROUP(cmd), cmd & 0xff);
1849
1850 switch (cmd) {
1851 case SIOCAIFADDR_IN6_32:
1852 case SIOCAIFADDR_IN6_64:
1853 case SIOCSIFADDR:
1854 case SIOCAIFADDR:
1855 ifnet_set_flags(ifp, IFF_UP, IFF_UP);
1856 BRIDGE_LOCK(sc);
1857 sc->sc_flags |= SCF_ADDRESS_ASSIGNED;
1858 BRIDGE_UNLOCK(sc);
1859 BRIDGE_LOG(LOG_NOTICE, 0,
1860 "ifp %s has address", ifp->if_xname);
1861 break;
1862
1863 case SIOCGIFMEDIA32:
1864 case SIOCGIFMEDIA64: {
1865 // cast to 32bit version to work within bounds with 32bit userspace
1866 struct ifmediareq32 *ifmr = (struct ifmediareq32 *)data;
1867 user_addr_t user_addr;
1868
1869 user_addr = (cmd == SIOCGIFMEDIA64) ?
1870 ((struct ifmediareq64 *)data)->ifmu_ulist :
1871 CAST_USER_ADDR_T(((struct ifmediareq32 *)data)->ifmu_ulist);
1872
1873 ifmr->ifm_status = IFM_AVALID;
1874 ifmr->ifm_mask = 0;
1875 ifmr->ifm_count = 1;
1876
1877 BRIDGE_LOCK(sc);
1878 if (!(sc->sc_flags & SCF_DETACHING) &&
1879 (sc->sc_flags & SCF_MEDIA_ACTIVE)) {
1880 ifmr->ifm_status |= IFM_ACTIVE;
1881 ifmr->ifm_active = ifmr->ifm_current =
1882 IFM_ETHER | IFM_AUTO;
1883 } else {
1884 ifmr->ifm_active = ifmr->ifm_current = IFM_NONE;
1885 }
1886 BRIDGE_UNLOCK(sc);
1887
1888 if (user_addr != USER_ADDR_NULL) {
1889 error = copyout(&ifmr->ifm_current, user_addr,
1890 sizeof(int));
1891 }
1892 break;
1893 }
1894
1895 case SIOCADDMULTI:
1896 case SIOCDELMULTI:
1897 break;
1898
1899 case SIOCSDRVSPEC32:
1900 case SIOCGDRVSPEC32: {
1901 union {
1902 struct ifbreq ifbreq;
1903 struct ifbifconf32 ifbifconf;
1904 struct ifbareq32 ifbareq;
1905 struct ifbaconf32 ifbaconf;
1906 struct ifbrparam ifbrparam;
1907 struct ifbropreq32 ifbropreq;
1908 } args;
1909 struct ifdrv32 *ifd = (struct ifdrv32 *)data;
1910 const struct bridge_control *bridge_control_table =
1911 bridge_control_table32, *bc;
1912
1913 DRVSPEC;
1914
1915 break;
1916 }
1917 case SIOCSDRVSPEC64:
1918 case SIOCGDRVSPEC64: {
1919 union {
1920 struct ifbreq ifbreq;
1921 struct ifbifconf64 ifbifconf;
1922 struct ifbareq64 ifbareq;
1923 struct ifbaconf64 ifbaconf;
1924 struct ifbrparam ifbrparam;
1925 struct ifbropreq64 ifbropreq;
1926 } args;
1927 struct ifdrv64 *ifd = (struct ifdrv64 *)data;
1928 const struct bridge_control *bridge_control_table =
1929 bridge_control_table64, *bc;
1930
1931 DRVSPEC;
1932
1933 break;
1934 }
1935
1936 case SIOCSIFFLAGS:
1937 if (!(ifp->if_flags & IFF_UP) &&
1938 (ifp->if_flags & IFF_RUNNING)) {
1939 /*
1940 * If interface is marked down and it is running,
1941 * then stop and disable it.
1942 */
1943 BRIDGE_LOCK(sc);
1944 bridge_ifstop(ifp, 1);
1945 BRIDGE_UNLOCK(sc);
1946 } else if ((ifp->if_flags & IFF_UP) &&
1947 !(ifp->if_flags & IFF_RUNNING)) {
1948 /*
1949 * If interface is marked up and it is stopped, then
1950 * start it.
1951 */
1952 BRIDGE_LOCK(sc);
1953 error = bridge_init(ifp);
1954 BRIDGE_UNLOCK(sc);
1955 }
1956 break;
1957
1958 case SIOCSIFLLADDR:
1959 error = ifnet_set_lladdr(ifp, ifr->ifr_addr.sa_data,
1960 ifr->ifr_addr.sa_len);
1961 if (error != 0) {
1962 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_IOCTL,
1963 "%s SIOCSIFLLADDR error %d", ifp->if_xname,
1964 error);
1965 }
1966 break;
1967
1968 case SIOCSIFMTU:
1969 if (ifr->ifr_mtu < 576) {
1970 error = EINVAL;
1971 break;
1972 }
1973 BRIDGE_LOCK(sc);
1974 if (TAILQ_EMPTY(&sc->sc_iflist)) {
1975 sc->sc_ifp->if_mtu = ifr->ifr_mtu;
1976 BRIDGE_UNLOCK(sc);
1977 break;
1978 }
1979 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1980 if (bif->bif_ifp->if_mtu != (unsigned)ifr->ifr_mtu) {
1981 BRIDGE_LOG(LOG_NOTICE, 0,
1982 "%s invalid MTU: %u(%s) != %d",
1983 sc->sc_ifp->if_xname,
1984 bif->bif_ifp->if_mtu,
1985 bif->bif_ifp->if_xname, ifr->ifr_mtu);
1986 error = EINVAL;
1987 break;
1988 }
1989 }
1990 if (!error) {
1991 sc->sc_ifp->if_mtu = ifr->ifr_mtu;
1992 }
1993 BRIDGE_UNLOCK(sc);
1994 break;
1995
1996 default:
1997 error = ether_ioctl(ifp, cmd, data);
1998 if (error != 0 && error != EOPNOTSUPP) {
1999 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_IOCTL,
2000 "ifp %s cmd 0x%08lx "
2001 "(%c%c [%lu] %c %lu) failed error: %d",
2002 ifp->if_xname, cmd,
2003 (cmd & IOC_IN) ? 'I' : ' ',
2004 (cmd & IOC_OUT) ? 'O' : ' ',
2005 IOCPARM_LEN(cmd), (char)IOCGROUP(cmd),
2006 cmd & 0xff, error);
2007 }
2008 break;
2009 }
2010 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2011
2012 return error;
2013 }
2014
2015 #if HAS_IF_CAP
2016 /*
2017 * bridge_mutecaps:
2018 *
2019 * Clear or restore unwanted capabilities on the member interface
2020 */
2021 static void
bridge_mutecaps(struct bridge_softc * sc)2022 bridge_mutecaps(struct bridge_softc *sc)
2023 {
2024 struct bridge_iflist *bif;
2025 int enabled, mask;
2026
2027 /* Initial bitmask of capabilities to test */
2028 mask = BRIDGE_IFCAPS_MASK;
2029
2030 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2031 /* Every member must support it or its disabled */
2032 mask &= bif->bif_savedcaps;
2033 }
2034
2035 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2036 enabled = bif->bif_ifp->if_capenable;
2037 enabled &= ~BRIDGE_IFCAPS_STRIP;
2038 /* strip off mask bits and enable them again if allowed */
2039 enabled &= ~BRIDGE_IFCAPS_MASK;
2040 enabled |= mask;
2041
2042 bridge_set_ifcap(sc, bif, enabled);
2043 }
2044 }
2045
2046 static void
bridge_set_ifcap(struct bridge_softc * sc,struct bridge_iflist * bif,int set)2047 bridge_set_ifcap(struct bridge_softc *sc, struct bridge_iflist *bif, int set)
2048 {
2049 struct ifnet *ifp = bif->bif_ifp;
2050 struct ifreq ifr;
2051 int error;
2052
2053 bzero(&ifr, sizeof(ifr));
2054 ifr.ifr_reqcap = set;
2055
2056 if (ifp->if_capenable != set) {
2057 IFF_LOCKGIANT(ifp);
2058 error = (*ifp->if_ioctl)(ifp, SIOCSIFCAP, (caddr_t)&ifr);
2059 IFF_UNLOCKGIANT(ifp);
2060 if (error) {
2061 BRIDGE_LOG(LOG_NOTICE, 0,
2062 "%s error setting interface capabilities on %s",
2063 sc->sc_ifp->if_xname, ifp->if_xname);
2064 }
2065 }
2066 }
2067 #endif /* HAS_IF_CAP */
2068
2069 static errno_t
siocsifcap(struct ifnet * ifp,uint32_t cap_enable)2070 siocsifcap(struct ifnet * ifp, uint32_t cap_enable)
2071 {
2072 struct ifreq ifr;
2073
2074 bzero(&ifr, sizeof(ifr));
2075 ifr.ifr_reqcap = cap_enable;
2076 return ifnet_ioctl(ifp, 0, SIOCSIFCAP, &ifr);
2077 }
2078
2079 static const char *
enable_disable_str(boolean_t enable)2080 enable_disable_str(boolean_t enable)
2081 {
2082 return (const char * __null_terminated)(enable ? "enable" : "disable");
2083 }
2084
2085 static boolean_t
bridge_set_lro(struct ifnet * ifp,boolean_t enable)2086 bridge_set_lro(struct ifnet * ifp, boolean_t enable)
2087 {
2088 uint32_t cap_enable;
2089 uint32_t cap_supported;
2090 boolean_t changed = FALSE;
2091 boolean_t lro_enabled;
2092
2093 cap_supported = ifnet_capabilities_supported(ifp);
2094 if ((cap_supported & IFCAP_LRO) == 0) {
2095 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2096 "%s doesn't support LRO",
2097 ifp->if_xname);
2098 goto done;
2099 }
2100 if (bridge_allow_lro_num_seg != 0 &&
2101 (cap_supported & IFCAP_LRO_NUM_SEG) != 0) {
2102 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2103 "%s supports LRO_NUM_SEG, leaving LRO enabled",
2104 ifp->if_xname);
2105 goto done;
2106 }
2107 cap_enable = ifnet_capabilities_enabled(ifp);
2108 lro_enabled = (cap_enable & IFCAP_LRO) != 0;
2109 if (lro_enabled != enable) {
2110 errno_t error;
2111
2112 if (enable) {
2113 cap_enable |= IFCAP_LRO;
2114 } else {
2115 cap_enable &= ~IFCAP_LRO;
2116 }
2117 error = siocsifcap(ifp, cap_enable);
2118 if (error != 0) {
2119 BRIDGE_LOG(LOG_NOTICE, 0,
2120 "%s %s failed (cap 0x%x) %d",
2121 ifp->if_xname,
2122 enable_disable_str(enable),
2123 cap_enable,
2124 error);
2125 } else {
2126 changed = TRUE;
2127 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2128 "%s %s success (cap 0x%x)",
2129 ifp->if_xname,
2130 enable_disable_str(enable),
2131 cap_enable);
2132 }
2133 }
2134 done:
2135 return changed;
2136 }
2137
2138 static errno_t
bridge_set_tso(struct bridge_softc * sc)2139 bridge_set_tso(struct bridge_softc *sc)
2140 {
2141 struct bridge_iflist *bif;
2142 u_int32_t tso_v4_mtu;
2143 u_int32_t tso_v6_mtu;
2144 ifnet_offload_t offload;
2145 errno_t error = 0;
2146
2147 /* By default, support TSO */
2148 offload = sc->sc_ifp->if_hwassist | IFNET_TSO_IPV4 | IFNET_TSO_IPV6;
2149 tso_v4_mtu = IP_MAXPACKET;
2150 tso_v6_mtu = IP_MAXPACKET;
2151
2152 /* Use the lowest common denominator of the members */
2153 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2154 ifnet_t ifp = bif->bif_ifp;
2155
2156 if (ifp == NULL) {
2157 continue;
2158 }
2159
2160 if (offload & IFNET_TSO_IPV4) {
2161 if (ifp->if_hwassist & IFNET_TSO_IPV4) {
2162 if (tso_v4_mtu > ifp->if_tso_v4_mtu) {
2163 tso_v4_mtu = ifp->if_tso_v4_mtu;
2164 }
2165 } else {
2166 offload &= ~IFNET_TSO_IPV4;
2167 tso_v4_mtu = 0;
2168 }
2169 }
2170 if (offload & IFNET_TSO_IPV6) {
2171 if (ifp->if_hwassist & IFNET_TSO_IPV6) {
2172 if (tso_v6_mtu > ifp->if_tso_v6_mtu) {
2173 tso_v6_mtu = ifp->if_tso_v6_mtu;
2174 }
2175 } else {
2176 offload &= ~IFNET_TSO_IPV6;
2177 tso_v6_mtu = 0;
2178 }
2179 }
2180 }
2181
2182 if (offload != sc->sc_ifp->if_hwassist) {
2183 error = ifnet_set_offload(sc->sc_ifp, offload);
2184 if (error != 0) {
2185 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2186 "ifnet_set_offload(%s, 0x%x) failed %d",
2187 sc->sc_ifp->if_xname, offload, error);
2188 goto done;
2189 }
2190 /*
2191 * For ifnet_set_tso_mtu() sake, the TSO MTU must be at least
2192 * as large as the interface MTU
2193 */
2194 if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV4) {
2195 if (tso_v4_mtu < sc->sc_ifp->if_mtu) {
2196 tso_v4_mtu = sc->sc_ifp->if_mtu;
2197 }
2198 error = ifnet_set_tso_mtu(sc->sc_ifp, AF_INET,
2199 tso_v4_mtu);
2200 if (error != 0) {
2201 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2202 "ifnet_set_tso_mtu(%s, "
2203 "AF_INET, %u) failed %d",
2204 sc->sc_ifp->if_xname,
2205 tso_v4_mtu, error);
2206 goto done;
2207 }
2208 }
2209 if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV6) {
2210 if (tso_v6_mtu < sc->sc_ifp->if_mtu) {
2211 tso_v6_mtu = sc->sc_ifp->if_mtu;
2212 }
2213 error = ifnet_set_tso_mtu(sc->sc_ifp, AF_INET6,
2214 tso_v6_mtu);
2215 if (error != 0) {
2216 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2217 "ifnet_set_tso_mtu(%s, "
2218 "AF_INET6, %u) failed %d",
2219 sc->sc_ifp->if_xname,
2220 tso_v6_mtu, error);
2221 goto done;
2222 }
2223 }
2224 }
2225 done:
2226 return error;
2227 }
2228
2229 static const char *
sanitize_ifname(char * __sized_by (IFNAMSIZ)ifname)2230 sanitize_ifname(char * __sized_by(IFNAMSIZ) ifname)
2231 {
2232 ifname[IFNAMSIZ - 1] = '\0';
2233 return __unsafe_null_terminated_from_indexable(ifname, &ifname[IFNAMSIZ - 1]);
2234 }
2235
2236 /*
2237 * bridge_lookup_member:
2238 *
2239 * Lookup a bridge member interface.
2240 */
2241 static struct bridge_iflist *
bridge_lookup_member(struct bridge_softc * sc,char * __sized_by (IFNAMSIZ)name_unsanitized)2242 bridge_lookup_member(struct bridge_softc *sc, char * __sized_by(IFNAMSIZ) name_unsanitized)
2243 {
2244 struct bridge_iflist *bif;
2245 struct ifnet *ifp;
2246 const char * __null_terminated name = sanitize_ifname(name_unsanitized);
2247
2248 BRIDGE_LOCK_ASSERT_HELD(sc);
2249
2250 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2251 ifp = bif->bif_ifp;
2252 if (strcmp(ifp->if_xname, name) == 0) {
2253 return bif;
2254 }
2255 }
2256
2257 return NULL;
2258 }
2259
2260 /*
2261 * bridge_lookup_member_if:
2262 *
2263 * Lookup a bridge member interface by ifnet*.
2264 */
2265 static struct bridge_iflist *
bridge_lookup_member_if(struct bridge_softc * sc,struct ifnet * member_ifp)2266 bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
2267 {
2268 struct bridge_iflist *bif;
2269
2270 BRIDGE_LOCK_ASSERT_HELD(sc);
2271
2272 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2273 if (bif->bif_ifp == member_ifp) {
2274 return bif;
2275 }
2276 }
2277
2278 return NULL;
2279 }
2280
2281 static inline bool
get_and_clear_promisc(mbuf_t m)2282 get_and_clear_promisc(mbuf_t m)
2283 {
2284 bool is_promisc;
2285
2286 /*
2287 * Need to clear the promiscuous flag otherwise the packet will be
2288 * dropped by DLIL after processing filters
2289 */
2290 is_promisc = (mbuf_flags(m) & MBUF_PROMISC) != 0;
2291 if (is_promisc) {
2292 mbuf_setflags_mask(m, 0, MBUF_PROMISC);
2293 }
2294 return is_promisc;
2295 }
2296
2297 static errno_t
bridge_iff_input(void * cookie,ifnet_t ifp,protocol_family_t protocol,mbuf_t * data,char ** frame_ptr)2298 bridge_iff_input(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2299 mbuf_t *data, char **frame_ptr)
2300 {
2301 #pragma unused(protocol)
2302 errno_t error = 0;
2303 struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2304 struct bridge_softc *sc = bif->bif_sc;
2305 int included = 0;
2306 struct ether_header * eh_p;
2307 size_t frmlen = 0;
2308 bool is_promisc;
2309 mblist list;
2310 mbuf_t m = *data;
2311
2312 if ((m->m_flags & M_PROTO1)) {
2313 goto out;
2314 }
2315
2316 if (*frame_ptr >= (char *)mbuf_datastart(m) &&
2317 *frame_ptr <= mtod(m, char *)) {
2318 included = 1;
2319 frmlen = mtod(m, char *) - *frame_ptr;
2320 }
2321 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2322 "%s from %s m 0x%llx data 0x%llx frame 0x%llx %s "
2323 "frmlen %lu", sc->sc_ifp->if_xname,
2324 ifp->if_xname, (uint64_t)VM_KERNEL_ADDRPERM(m),
2325 (uint64_t)VM_KERNEL_ADDRPERM(mtod(m, void *)),
2326 (uint64_t)VM_KERNEL_ADDRPERM(*frame_ptr),
2327 included ? "inside" : "outside", frmlen);
2328 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MBUF)) {
2329 brlog_mbuf(m, "bridge_iff_input[", "");
2330 brlog_ether_header((struct ether_header *)
2331 (void *)*frame_ptr);
2332 brlog_mbuf_data(m, 0, 20);
2333 }
2334 if (included == 0) {
2335 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT, "frame_ptr outside mbuf");
2336 goto out;
2337 }
2338
2339 /* Move data pointer to start of frame to the link layer header */
2340 _mbuf_adjust_pkthdr_and_data(m, -frmlen);
2341
2342 /* make sure we can access the ethernet header */
2343 if (mbuf_pkthdr_len(m) < sizeof(struct ether_header)) {
2344 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2345 "short frame %lu < %lu",
2346 mbuf_pkthdr_len(m), sizeof(struct ether_header));
2347 goto out;
2348 }
2349 if (mbuf_len(m) < sizeof(struct ether_header)) {
2350 error = mbuf_pullup(data, sizeof(struct ether_header));
2351 if (error != 0) {
2352 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2353 "mbuf_pullup(%lu) failed %d",
2354 sizeof(struct ether_header),
2355 error);
2356 error = EJUSTRETURN;
2357 goto out;
2358 }
2359 if (m != *data) {
2360 m = *data;
2361 *frame_ptr = mtod(m, char *);
2362 }
2363 }
2364 mblist_init(&list);
2365 mblist_append(&list, m);
2366 is_promisc = get_and_clear_promisc(m);
2367 eh_p = __unsafe_forge_single(struct ether_header *, *frame_ptr);
2368 list = bridge_input_list(sc, ifp, eh_p, list, is_promisc);
2369 m = *data = list.head;
2370 if (m == NULL) {
2371 error = EJUSTRETURN;
2372 }
2373 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MBUF) &&
2374 BRIDGE_DBGF_ENABLED(BR_DBGF_INPUT)) {
2375 brlog_mbuf(m, "bridge_iff_input]", "");
2376 }
2377
2378 out:
2379 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2380
2381 return error;
2382 }
2383
2384 static errno_t
bridge_iff_output(void * cookie,ifnet_t ifp,protocol_family_t protocol,mbuf_t * data)2385 bridge_iff_output(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2386 mbuf_t *data)
2387 {
2388 #pragma unused(protocol)
2389 errno_t error = 0;
2390 struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2391 struct bridge_softc *sc = bif->bif_sc;
2392 mbuf_t m = *data;
2393
2394 if ((m->m_flags & M_PROTO1)) {
2395 goto out;
2396 }
2397 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
2398 "%s from %s m 0x%llx data 0x%llx",
2399 sc->sc_ifp->if_xname, ifp->if_xname,
2400 (uint64_t)VM_KERNEL_ADDRPERM(m),
2401 (uint64_t)VM_KERNEL_ADDRPERM(mtod(m, void *)));
2402
2403 error = bridge_member_output(sc, ifp, data);
2404 if (error != 0 && error != EJUSTRETURN) {
2405 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_OUTPUT,
2406 "bridge_member_output failed error %d",
2407 error);
2408 }
2409 out:
2410 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2411
2412 return error;
2413 }
2414
2415 static void
bridge_iff_event(void * cookie,ifnet_t ifp,protocol_family_t protocol,const struct kev_msg * event_msg)2416 bridge_iff_event(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2417 const struct kev_msg *event_msg)
2418 {
2419 #pragma unused(protocol)
2420 struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2421 struct bridge_softc *sc = bif->bif_sc;
2422
2423 if (event_msg->vendor_code == KEV_VENDOR_APPLE &&
2424 event_msg->kev_class == KEV_NETWORK_CLASS &&
2425 event_msg->kev_subclass == KEV_DL_SUBCLASS) {
2426 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
2427 "%s event_code %u - %s",
2428 ifp->if_xname, event_msg->event_code,
2429 dlil_kev_dl_code_str(event_msg->event_code));
2430
2431 switch (event_msg->event_code) {
2432 case KEV_DL_LINK_OFF:
2433 case KEV_DL_LINK_ON: {
2434 bridge_iflinkevent(ifp);
2435 #if BRIDGESTP
2436 bstp_linkstate(ifp, event_msg->event_code);
2437 #endif /* BRIDGESTP */
2438 break;
2439 }
2440 case KEV_DL_SIFFLAGS: {
2441 if ((ifp->if_flags & IFF_UP) == 0) {
2442 break;
2443 }
2444 if ((bif->bif_flags & BIFF_PROMISC) == 0) {
2445 errno_t error;
2446
2447 error = ifnet_set_promiscuous(ifp, 1);
2448 if (error != 0) {
2449 BRIDGE_LOG(LOG_NOTICE, 0,
2450 "ifnet_set_promiscuous (%s)"
2451 " failed %d", ifp->if_xname,
2452 error);
2453 } else {
2454 bif->bif_flags |= BIFF_PROMISC;
2455 }
2456 }
2457 if ((bif->bif_flags & BIFF_WIFI_INFRA) != 0 &&
2458 (bif->bif_flags & BIFF_ALL_MULTI) == 0) {
2459 errno_t error;
2460
2461 error = if_allmulti(ifp, 1);
2462 if (error != 0) {
2463 BRIDGE_LOG(LOG_NOTICE, 0,
2464 "if_allmulti (%s)"
2465 " failed %d", ifp->if_xname,
2466 error);
2467 } else {
2468 bif->bif_flags |= BIFF_ALL_MULTI;
2469 #ifdef XNU_PLATFORM_AppleTVOS
2470 ip6_forwarding = 1;
2471 #endif /* XNU_PLATFORM_AppleTVOS */
2472 }
2473 }
2474 break;
2475 }
2476 case KEV_DL_IFCAP_CHANGED: {
2477 BRIDGE_LOCK(sc);
2478 bridge_set_tso(sc);
2479 BRIDGE_UNLOCK(sc);
2480 break;
2481 }
2482 case KEV_DL_PROTO_DETACHED:
2483 case KEV_DL_PROTO_ATTACHED: {
2484 bridge_proto_attach_changed(ifp);
2485 break;
2486 }
2487 default:
2488 break;
2489 }
2490 }
2491 }
2492
2493 /*
2494 * bridge_iff_detached:
2495 *
2496 * Called when our interface filter has been detached from a
2497 * member interface.
2498 */
2499 static void
bridge_iff_detached(void * cookie,ifnet_t ifp)2500 bridge_iff_detached(void *cookie, ifnet_t ifp)
2501 {
2502 #pragma unused(cookie)
2503 struct bridge_iflist *bif;
2504 struct bridge_softc * __single sc = ifp->if_bridge;
2505
2506 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2507
2508 /* Check if the interface is a bridge member */
2509 if (sc != NULL) {
2510 BRIDGE_LOCK(sc);
2511 bif = bridge_lookup_member_if(sc, ifp);
2512 if (bif != NULL) {
2513 bridge_delete_member(sc, bif);
2514 }
2515 BRIDGE_UNLOCK(sc);
2516 return;
2517 }
2518 /* Check if the interface is a span port */
2519 lck_mtx_lock(&bridge_list_mtx);
2520 LIST_FOREACH(sc, &bridge_list, sc_list) {
2521 BRIDGE_LOCK(sc);
2522 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
2523 if (ifp == bif->bif_ifp) {
2524 bridge_delete_span(sc, bif);
2525 break;
2526 }
2527 BRIDGE_UNLOCK(sc);
2528 }
2529 lck_mtx_unlock(&bridge_list_mtx);
2530 }
2531
2532 static errno_t
bridge_proto_input(ifnet_t ifp,protocol_family_t protocol,mbuf_t packet,char * header)2533 bridge_proto_input(ifnet_t ifp, protocol_family_t protocol, mbuf_t packet,
2534 char *header)
2535 {
2536 #pragma unused(protocol, packet, header)
2537 BRIDGE_LOG(LOG_NOTICE, 0, "%s unexpected packet",
2538 ifp->if_xname);
2539 return 0;
2540 }
2541
2542 static int
bridge_attach_protocol(struct ifnet * ifp)2543 bridge_attach_protocol(struct ifnet *ifp)
2544 {
2545 int error;
2546 struct ifnet_attach_proto_param reg;
2547
2548 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2549 bzero(®, sizeof(reg));
2550 reg.input = bridge_proto_input;
2551
2552 error = ifnet_attach_protocol(ifp, PF_BRIDGE, ®);
2553 if (error) {
2554 BRIDGE_LOG(LOG_NOTICE, 0,
2555 "ifnet_attach_protocol(%s) failed, %d",
2556 ifp->if_xname, error);
2557 }
2558
2559 return error;
2560 }
2561
2562 static int
bridge_detach_protocol(struct ifnet * ifp)2563 bridge_detach_protocol(struct ifnet *ifp)
2564 {
2565 int error;
2566
2567 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2568 error = ifnet_detach_protocol(ifp, PF_BRIDGE);
2569 if (error) {
2570 BRIDGE_LOG(LOG_NOTICE, 0,
2571 "ifnet_detach_protocol(%s) failed, %d",
2572 ifp->if_xname, error);
2573 }
2574
2575 return error;
2576 }
2577
2578 /*
2579 * bridge_delete_member:
2580 *
2581 * Delete the specified member interface.
2582 */
2583 static void
bridge_delete_member(struct bridge_softc * sc,struct bridge_iflist * bif)2584 bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif)
2585 {
2586 #if SKYWALK
2587 boolean_t add_netagent = FALSE;
2588 #endif /* SKYWALK */
2589 uint32_t bif_flags;
2590 struct ifnet *ifs = bif->bif_ifp, *bifp = sc->sc_ifp;
2591 int lladdr_changed = 0, error;
2592 uint8_t eaddr[ETHER_ADDR_LEN];
2593 u_int32_t event_code = 0;
2594
2595 BRIDGE_LOCK_ASSERT_HELD(sc);
2596 VERIFY(ifs != NULL);
2597
2598 /*
2599 * Remove the member from the list first so it cannot be found anymore
2600 * when we release the bridge lock below
2601 */
2602 if ((bif->bif_flags & BIFF_IN_MEMBER_LIST) != 0) {
2603 bif->bif_flags &= ~BIFF_IN_MEMBER_LIST;
2604 BRIDGE_XLOCK(sc);
2605 TAILQ_REMOVE(&sc->sc_iflist, bif, bif_next);
2606 BRIDGE_XDROP(sc);
2607 }
2608 if (sc->sc_mac_nat_bif != NULL) {
2609 if (bif == sc->sc_mac_nat_bif) {
2610 bridge_mac_nat_disable(sc);
2611 } else {
2612 bridge_mac_nat_flush_entries(sc, bif);
2613 }
2614 }
2615 #if BRIDGESTP
2616 if ((bif->bif_ifflags & IFBIF_STP) != 0) {
2617 bstp_disable(&bif->bif_stp);
2618 }
2619 #endif /* BRIDGESTP */
2620
2621 /*
2622 * If removing the interface that gave the bridge its mac address, set
2623 * the mac address of the bridge to the address of the next member, or
2624 * to its default address if no members are left.
2625 */
2626 if (bridge_inherit_mac && sc->sc_ifaddr == ifs) {
2627 ifnet_release(sc->sc_ifaddr);
2628 if (TAILQ_EMPTY(&sc->sc_iflist)) {
2629 bcopy(sc->sc_defaddr, eaddr, ETHER_ADDR_LEN);
2630 sc->sc_ifaddr = NULL;
2631 } else {
2632 struct ifnet *fif =
2633 TAILQ_FIRST(&sc->sc_iflist)->bif_ifp;
2634 bcopy(IF_LLADDR(fif), eaddr, ETHER_ADDR_LEN);
2635 sc->sc_ifaddr = fif;
2636 ifnet_reference(fif); /* for sc_ifaddr */
2637 }
2638 lladdr_changed = 1;
2639 }
2640
2641 #if HAS_IF_CAP
2642 bridge_mutecaps(sc); /* recalculate now this interface is removed */
2643 #endif /* HAS_IF_CAP */
2644
2645 error = bridge_set_tso(sc);
2646 if (error != 0) {
2647 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_set_tso failed %d", error);
2648 }
2649
2650 bridge_rtdelete(sc, ifs, IFBF_FLUSHALL);
2651
2652 KASSERT(bif->bif_addrcnt == 0,
2653 ("%s: %d bridge routes referenced", __func__, bif->bif_addrcnt));
2654
2655 /*
2656 * Update link status of the bridge based on its remaining members
2657 */
2658 event_code = bridge_updatelinkstatus(sc);
2659 bif_flags = bif->bif_flags;
2660 BRIDGE_UNLOCK(sc);
2661
2662 /* only perform these steps if the interface is still attached */
2663 if (ifnet_is_attached(ifs, 1)) {
2664 #if SKYWALK
2665 add_netagent = (bif_flags & BIFF_NETAGENT_REMOVED) != 0;
2666
2667 if ((bif_flags & BIFF_FLOWSWITCH_ATTACHED) != 0) {
2668 ifnet_detach_flowswitch_nexus(ifs);
2669 }
2670 #endif /* SKYWALK */
2671 /* disable promiscuous mode */
2672 if ((bif_flags & BIFF_PROMISC) != 0) {
2673 (void) ifnet_set_promiscuous(ifs, 0);
2674 }
2675 /* disable all multi */
2676 if ((bif_flags & BIFF_ALL_MULTI) != 0) {
2677 (void)if_allmulti(ifs, 0);
2678 }
2679 #if HAS_IF_CAP
2680 /* re-enable any interface capabilities */
2681 bridge_set_ifcap(sc, bif, bif->bif_savedcaps);
2682 #endif
2683 /* detach bridge "protocol" */
2684 if ((bif_flags & BIFF_PROTO_ATTACHED) != 0) {
2685 (void)bridge_detach_protocol(ifs);
2686 }
2687 /* detach interface filter */
2688 if ((bif_flags & BIFF_FILTER_ATTACHED) != 0) {
2689 iflt_detach(bif->bif_iff_ref);
2690 }
2691 /* re-enable LRO */
2692 if ((bif_flags & BIFF_LRO_DISABLED) != 0) {
2693 (void)bridge_set_lro(ifs, TRUE);
2694 }
2695 ifnet_decr_iorefcnt(ifs);
2696 }
2697
2698 if (lladdr_changed &&
2699 (error = ifnet_set_lladdr(bifp, eaddr, ETHER_ADDR_LEN)) != 0) {
2700 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr failed %d", error);
2701 }
2702
2703 if (event_code != 0) {
2704 bridge_link_event(bifp, event_code);
2705 }
2706
2707 #if BRIDGESTP
2708 bstp_destroy(&bif->bif_stp); /* prepare to free */
2709 #endif /* BRIDGESTP */
2710
2711 kfree_type(struct bridge_iflist, bif);
2712 ifs->if_bridge = NULL;
2713 #if SKYWALK
2714 if (add_netagent && ifnet_is_attached(ifs, 1)) {
2715 (void)ifnet_add_netagent(ifs);
2716 ifnet_decr_iorefcnt(ifs);
2717 }
2718 #endif /* SKYWALK */
2719
2720 ifnet_release(ifs);
2721
2722 BRIDGE_LOCK(sc);
2723 }
2724
2725 /*
2726 * bridge_delete_span:
2727 *
2728 * Delete the specified span interface.
2729 */
2730 static void
bridge_delete_span(struct bridge_softc * sc,struct bridge_iflist * bif)2731 bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif)
2732 {
2733 BRIDGE_LOCK_ASSERT_HELD(sc);
2734
2735 KASSERT(bif->bif_ifp->if_bridge == NULL,
2736 ("%s: not a span interface", __func__));
2737
2738 ifnet_release(bif->bif_ifp);
2739
2740 TAILQ_REMOVE(&sc->sc_spanlist, bif, bif_next);
2741 kfree_type(struct bridge_iflist, bif);
2742 }
2743
2744 static int
bridge_ioctl_add(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)2745 bridge_ioctl_add(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
2746 {
2747 struct ifbreq * __single req = arg;
2748 struct bridge_iflist *bif = NULL;
2749 struct ifnet *ifs, *bifp = sc->sc_ifp;
2750 int error = 0, lladdr_changed = 0;
2751 uint8_t eaddr[ETHER_ADDR_LEN];
2752 struct iff_filter iff;
2753 u_int32_t event_code = 0;
2754 boolean_t input_broadcast;
2755 int media_active;
2756 boolean_t wifi_infra = FALSE;
2757
2758 ifs = ifunit(sanitize_ifname(req->ifbr_ifsname));
2759 if (ifs == NULL) {
2760 return ENOENT;
2761 }
2762 if (ifs->if_ioctl == NULL) { /* must be supported */
2763 return EINVAL;
2764 }
2765
2766 if (IFNET_IS_INTCOPROC(ifs) || IFNET_IS_MANAGEMENT(ifs)) {
2767 return EINVAL;
2768 }
2769
2770 /* If it's in the span list, it can't be a member. */
2771 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
2772 if (ifs == bif->bif_ifp) {
2773 return EBUSY;
2774 }
2775 }
2776
2777 if (ifs->if_bridge == sc) {
2778 return EEXIST;
2779 }
2780
2781 if (ifs->if_bridge != NULL) {
2782 return EBUSY;
2783 }
2784
2785 switch (ifs->if_type) {
2786 case IFT_ETHER:
2787 if (strcmp(ifs->if_name, "en") == 0 &&
2788 ifs->if_subfamily == IFNET_SUBFAMILY_WIFI &&
2789 (ifs->if_eflags & IFEF_IPV4_ROUTER) == 0) {
2790 /* XXX is there a better way to identify Wi-Fi STA? */
2791 wifi_infra = TRUE;
2792 }
2793 break;
2794 case IFT_L2VLAN:
2795 case IFT_IEEE8023ADLAG:
2796 break;
2797 default:
2798 return EINVAL;
2799 }
2800
2801 /* fail to add the interface if the MTU doesn't match */
2802 if (!TAILQ_EMPTY(&sc->sc_iflist) && sc->sc_ifp->if_mtu != ifs->if_mtu) {
2803 BRIDGE_LOG(LOG_NOTICE, 0, "%s invalid MTU for %s",
2804 sc->sc_ifp->if_xname,
2805 ifs->if_xname);
2806 return EINVAL;
2807 }
2808
2809 if (wifi_infra && sc->sc_mac_nat_bif != NULL) {
2810 /* there's already an interface that's doing MAC NAT */
2811 return EBUSY;
2812 }
2813
2814 /* prevent the interface from detaching while we add the member */
2815 if (!ifnet_is_attached(ifs, 1)) {
2816 return ENXIO;
2817 }
2818
2819 /* allocate a new member */
2820 bif = kalloc_type(struct bridge_iflist, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2821 bif->bif_ifp = ifs;
2822 ifnet_reference(ifs);
2823 bif->bif_ifflags |= IFBIF_LEARNING | IFBIF_DISCOVER;
2824 #if HAS_IF_CAP
2825 bif->bif_savedcaps = ifs->if_capenable;
2826 #endif /* HAS_IF_CAP */
2827 bif->bif_sc = sc;
2828 if (wifi_infra) {
2829 (void)bridge_mac_nat_enable(sc, bif);
2830 }
2831
2832 /* Allow the first Ethernet member to define the MTU */
2833 if (TAILQ_EMPTY(&sc->sc_iflist)) {
2834 sc->sc_ifp->if_mtu = ifs->if_mtu;
2835 }
2836
2837 /*
2838 * Assign the interface's MAC address to the bridge if it's the first
2839 * member and the MAC address of the bridge has not been changed from
2840 * the default (randomly) generated one.
2841 */
2842 if (bridge_inherit_mac && TAILQ_EMPTY(&sc->sc_iflist) &&
2843 _ether_cmp(IF_LLADDR(sc->sc_ifp), sc->sc_defaddr) == 0) {
2844 bcopy(IF_LLADDR(ifs), eaddr, ETHER_ADDR_LEN);
2845 sc->sc_ifaddr = ifs;
2846 ifnet_reference(ifs); /* for sc_ifaddr */
2847 lladdr_changed = 1;
2848 }
2849
2850 ifs->if_bridge = sc;
2851 #if BRIDGESTP
2852 bstp_create(&sc->sc_stp, &bif->bif_stp, bif->bif_ifp);
2853 #endif /* BRIDGESTP */
2854
2855 #if HAS_IF_CAP
2856 /* Set interface capabilities to the intersection set of all members */
2857 bridge_mutecaps(sc);
2858 #endif /* HAS_IF_CAP */
2859
2860 /*
2861 * Respect lock ordering with DLIL lock for the following operations
2862 */
2863 BRIDGE_UNLOCK(sc);
2864
2865 /* enable promiscuous mode */
2866 error = ifnet_set_promiscuous(ifs, 1);
2867 switch (error) {
2868 case 0:
2869 bif->bif_flags |= BIFF_PROMISC;
2870 break;
2871 case ENETDOWN:
2872 case EPWROFF:
2873 BRIDGE_LOG(LOG_NOTICE, 0,
2874 "ifnet_set_promiscuous(%s) failed %d, ignoring",
2875 ifs->if_xname, error);
2876 /* Ignore error when device is not up */
2877 error = 0;
2878 break;
2879 default:
2880 BRIDGE_LOG(LOG_NOTICE, 0,
2881 "ifnet_set_promiscuous(%s) failed %d",
2882 ifs->if_xname, error);
2883 BRIDGE_LOCK(sc);
2884 goto out;
2885 }
2886 if (wifi_infra) {
2887 int this_error;
2888
2889 /* Wi-Fi doesn't really support promiscuous, set allmulti */
2890 bif->bif_flags |= BIFF_WIFI_INFRA;
2891 this_error = if_allmulti(ifs, 1);
2892 if (this_error == 0) {
2893 bif->bif_flags |= BIFF_ALL_MULTI;
2894 #ifdef XNU_PLATFORM_AppleTVOS
2895 ip6_forwarding = 1;
2896 #endif /* XNU_PLATFORM_AppleTVOS */
2897 } else {
2898 BRIDGE_LOG(LOG_NOTICE, 0,
2899 "if_allmulti(%s) failed %d, ignoring",
2900 ifs->if_xname, this_error);
2901 }
2902 }
2903 #if SKYWALK
2904 /* ensure that the flowswitch is present for native interface */
2905 if (SKYWALK_NATIVE(ifs)) {
2906 if (ifnet_attach_flowswitch_nexus(ifs)) {
2907 bif->bif_flags |= BIFF_FLOWSWITCH_ATTACHED;
2908 }
2909 }
2910 /* remove the netagent on the flowswitch (rdar://75050182) */
2911 if (if_is_fsw_netagent_enabled()) {
2912 (void)ifnet_remove_netagent(ifs);
2913 bif->bif_flags |= BIFF_NETAGENT_REMOVED;
2914 }
2915 #endif /* SKYWALK */
2916
2917 /*
2918 * install an interface filter
2919 */
2920 memset(&iff, 0, sizeof(struct iff_filter));
2921 iff.iff_cookie = bif;
2922 iff.iff_name = "com.apple.kernel.bsd.net.if_bridge";
2923 iff.iff_input = bridge_iff_input;
2924 iff.iff_output = bridge_iff_output;
2925 iff.iff_event = bridge_iff_event;
2926 iff.iff_detached = bridge_iff_detached;
2927 error = dlil_attach_filter(ifs, &iff, &bif->bif_iff_ref,
2928 DLIL_IFF_TSO | DLIL_IFF_INTERNAL | DLIL_IFF_BRIDGE);
2929 if (error != 0) {
2930 BRIDGE_LOG(LOG_NOTICE, 0, "iflt_attach failed %d", error);
2931 BRIDGE_LOCK(sc);
2932 goto out;
2933 }
2934 bif->bif_flags |= BIFF_FILTER_ATTACHED;
2935
2936 /*
2937 * install a dummy "bridge" protocol
2938 */
2939 if ((error = bridge_attach_protocol(ifs)) != 0) {
2940 if (error != 0) {
2941 BRIDGE_LOG(LOG_NOTICE, 0,
2942 "bridge_attach_protocol failed %d", error);
2943 BRIDGE_LOCK(sc);
2944 goto out;
2945 }
2946 }
2947 bif->bif_flags |= BIFF_PROTO_ATTACHED;
2948
2949 if (lladdr_changed &&
2950 (error = ifnet_set_lladdr(bifp, eaddr, ETHER_ADDR_LEN)) != 0) {
2951 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr failed %d", error);
2952 }
2953
2954 media_active = interface_media_active(ifs);
2955
2956 /* disable LRO if needed */
2957 if (bridge_set_lro(ifs, FALSE)) {
2958 bif->bif_flags |= BIFF_LRO_DISABLED;
2959 }
2960
2961 /*
2962 * No failures past this point. Add the member to the list.
2963 */
2964 BRIDGE_LOCK(sc);
2965 bif->bif_flags |= BIFF_IN_MEMBER_LIST;
2966 BRIDGE_XLOCK(sc);
2967 TAILQ_INSERT_TAIL(&sc->sc_iflist, bif, bif_next);
2968 BRIDGE_XDROP(sc);
2969
2970 /* cache the member link status */
2971 if (media_active != 0) {
2972 bif->bif_flags |= BIFF_MEDIA_ACTIVE;
2973 } else {
2974 bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
2975 }
2976
2977 /* the new member may change the link status of the bridge interface */
2978 event_code = bridge_updatelinkstatus(sc);
2979
2980 /* check whether we need input broadcast or not */
2981 input_broadcast = interface_needs_input_broadcast(ifs);
2982 bif_set_input_broadcast(bif, input_broadcast);
2983 BRIDGE_UNLOCK(sc);
2984
2985 if (event_code != 0) {
2986 bridge_link_event(bifp, event_code);
2987 }
2988 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
2989 "%s input broadcast %s", ifs->if_xname,
2990 input_broadcast ? "ENABLED" : "DISABLED");
2991
2992 BRIDGE_LOCK(sc);
2993 bridge_set_tso(sc);
2994
2995 out:
2996 /* allow the interface to detach */
2997 ifnet_decr_iorefcnt(ifs);
2998
2999 if (error != 0) {
3000 if (bif != NULL) {
3001 bridge_delete_member(sc, bif);
3002 }
3003 } else if (IFNET_IS_VMNET(ifs)) {
3004 INC_ATOMIC_INT64_LIM(net_api_stats.nas_vmnet_total);
3005 }
3006
3007 return error;
3008 }
3009
3010 static int
bridge_ioctl_del(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3011 bridge_ioctl_del(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3012 {
3013 struct ifbreq * __single req = arg;
3014 struct bridge_iflist *bif;
3015
3016 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3017 if (bif == NULL) {
3018 return ENOENT;
3019 }
3020
3021 bridge_delete_member(sc, bif);
3022
3023 return 0;
3024 }
3025
3026 static int
bridge_ioctl_purge(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3027 bridge_ioctl_purge(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3028 {
3029 #pragma unused(sc, arg, arg_len)
3030 return 0;
3031 }
3032
3033 static int
bridge_ioctl_gifflags(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3034 bridge_ioctl_gifflags(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3035 {
3036 struct ifbreq * __single req = arg;
3037 struct bridge_iflist *bif;
3038
3039 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3040 if (bif == NULL) {
3041 return ENOENT;
3042 }
3043
3044 struct bstp_port *bp;
3045
3046 bp = &bif->bif_stp;
3047 req->ifbr_state = bp->bp_state;
3048 req->ifbr_priority = bp->bp_priority;
3049 req->ifbr_path_cost = bp->bp_path_cost;
3050 req->ifbr_proto = bp->bp_protover;
3051 req->ifbr_role = bp->bp_role;
3052 req->ifbr_stpflags = bp->bp_flags;
3053 req->ifbr_ifsflags = bif->bif_ifflags;
3054
3055 /* Copy STP state options as flags */
3056 if (bp->bp_operedge) {
3057 req->ifbr_ifsflags |= IFBIF_BSTP_EDGE;
3058 }
3059 if (bp->bp_flags & BSTP_PORT_AUTOEDGE) {
3060 req->ifbr_ifsflags |= IFBIF_BSTP_AUTOEDGE;
3061 }
3062 if (bp->bp_ptp_link) {
3063 req->ifbr_ifsflags |= IFBIF_BSTP_PTP;
3064 }
3065 if (bp->bp_flags & BSTP_PORT_AUTOPTP) {
3066 req->ifbr_ifsflags |= IFBIF_BSTP_AUTOPTP;
3067 }
3068 if (bp->bp_flags & BSTP_PORT_ADMEDGE) {
3069 req->ifbr_ifsflags |= IFBIF_BSTP_ADMEDGE;
3070 }
3071 if (bp->bp_flags & BSTP_PORT_ADMCOST) {
3072 req->ifbr_ifsflags |= IFBIF_BSTP_ADMCOST;
3073 }
3074
3075 req->ifbr_portno = bif->bif_ifp->if_index & 0xfff;
3076 req->ifbr_addrcnt = bif->bif_addrcnt;
3077 req->ifbr_addrmax = bif->bif_addrmax;
3078 req->ifbr_addrexceeded = bif->bif_addrexceeded;
3079
3080 return 0;
3081 }
3082
3083 static int
bridge_ioctl_sifflags(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3084 bridge_ioctl_sifflags(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3085 {
3086 struct ifbreq * __single req = arg;
3087 struct bridge_iflist *bif;
3088 #if BRIDGESTP
3089 struct bstp_port *bp;
3090 #endif /* BRIDGESTP */
3091 errno_t error;
3092 uint32_t ifsflags;
3093
3094 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3095 if (bif == NULL) {
3096 return ENOENT;
3097 }
3098
3099 ifsflags = req->ifbr_ifsflags;
3100 if (ifsflags & IFBIF_SPAN) {
3101 /* SPAN is readonly */
3102 return EINVAL;
3103 }
3104 #define CHECKSUM_VIRTIO (IFBIF_CHECKSUM_OFFLOAD | IFBIF_USES_VIRTIO)
3105 if ((ifsflags & CHECKSUM_VIRTIO) == CHECKSUM_VIRTIO) {
3106 /* can't specify checksum and virtio */
3107 return EINVAL;
3108 }
3109 if ((ifsflags & IFBIF_MAC_NAT) != 0 &&
3110 ((ifsflags & CHECKSUM_VIRTIO) != 0 ||
3111 (bif->bif_flags & BIFF_HOST_FILTER) != 0)) {
3112 /* MAC-NAT can't be used with checksum, host filter, or virtio */
3113 return EINVAL;
3114 }
3115 if ((ifsflags & IFBIF_MAC_NAT) != 0) {
3116 error = bridge_mac_nat_enable(sc, bif);
3117 if (error != 0) {
3118 return error;
3119 }
3120 } else if (sc->sc_mac_nat_bif == bif) {
3121 bridge_mac_nat_disable(sc);
3122 }
3123
3124 #if BRIDGESTP
3125 if (ifsflags & IFBIF_STP) {
3126 if ((bif->bif_ifflags & IFBIF_STP) == 0) {
3127 error = bstp_enable(&bif->bif_stp);
3128 if (error) {
3129 return error;
3130 }
3131 }
3132 } else {
3133 if ((bif->bif_ifflags & IFBIF_STP) != 0) {
3134 bstp_disable(&bif->bif_stp);
3135 }
3136 }
3137
3138 /* Pass on STP flags */
3139 bp = &bif->bif_stp;
3140 bstp_set_edge(bp, ifsflags & IFBIF_BSTP_EDGE ? 1 : 0);
3141 bstp_set_autoedge(bp, ifsflags & IFBIF_BSTP_AUTOEDGE ? 1 : 0);
3142 bstp_set_ptp(bp, ifsflags & IFBIF_BSTP_PTP ? 1 : 0);
3143 bstp_set_autoptp(bp, ifsflags & IFBIF_BSTP_AUTOPTP ? 1 : 0);
3144 #else /* !BRIDGESTP */
3145 if (ifsflags & IFBIF_STP) {
3146 return EOPNOTSUPP;
3147 }
3148 #endif /* !BRIDGESTP */
3149
3150 /* Save the bits relating to the bridge */
3151 bif->bif_ifflags = ifsflags & IFBIFMASK;
3152
3153 return 0;
3154 }
3155
3156 static int
bridge_ioctl_scache(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3157 bridge_ioctl_scache(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3158 {
3159 struct ifbrparam * __single param = arg;
3160
3161 sc->sc_brtmax = param->ifbrp_csize;
3162 bridge_rttrim(sc);
3163 return 0;
3164 }
3165
3166 static int
bridge_ioctl_gcache(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3167 bridge_ioctl_gcache(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3168 {
3169 struct ifbrparam * __single param = arg;
3170
3171 param->ifbrp_csize = sc->sc_brtmax;
3172
3173 return 0;
3174 }
3175
3176 #define BRIDGE_IOCTL_GIFS do { \
3177 struct bridge_iflist *bif; \
3178 struct ifbreq breq; \
3179 char *buf, *outbuf; \
3180 unsigned int count, buflen, len; \
3181 \
3182 count = 0; \
3183 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) \
3184 count++; \
3185 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) \
3186 count++; \
3187 \
3188 buflen = sizeof (breq) * count; \
3189 if (bifc->ifbic_len == 0) { \
3190 bifc->ifbic_len = buflen; \
3191 return (0); \
3192 } \
3193 BRIDGE_UNLOCK(sc); \
3194 outbuf = kalloc_data(buflen, Z_WAITOK | Z_ZERO); \
3195 BRIDGE_LOCK(sc); \
3196 \
3197 count = 0; \
3198 buf = outbuf; \
3199 len = min(bifc->ifbic_len, buflen); \
3200 bzero(&breq, sizeof (breq)); \
3201 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { \
3202 if (len < sizeof (breq)) \
3203 break; \
3204 \
3205 snprintf(breq.ifbr_ifsname, sizeof (breq.ifbr_ifsname), \
3206 "%s", bif->bif_ifp->if_xname); \
3207 /* Fill in the ifbreq structure */ \
3208 error = bridge_ioctl_gifflags(sc, &breq, sizeof(breq)); \
3209 if (error) \
3210 break; \
3211 memcpy(buf, &breq, sizeof (breq)); \
3212 count++; \
3213 buf += sizeof (breq); \
3214 len -= sizeof (breq); \
3215 } \
3216 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) { \
3217 if (len < sizeof (breq)) \
3218 break; \
3219 \
3220 snprintf(breq.ifbr_ifsname, \
3221 sizeof (breq.ifbr_ifsname), \
3222 "%s", bif->bif_ifp->if_xname); \
3223 breq.ifbr_ifsflags = bif->bif_ifflags; \
3224 breq.ifbr_portno \
3225 = bif->bif_ifp->if_index & 0xfff; \
3226 memcpy(buf, &breq, sizeof (breq)); \
3227 count++; \
3228 buf += sizeof (breq); \
3229 len -= sizeof (breq); \
3230 } \
3231 \
3232 BRIDGE_UNLOCK(sc); \
3233 bifc->ifbic_len = sizeof (breq) * count; \
3234 if (bifc->ifbic_len > 0) { \
3235 error = copyout(outbuf, bifc->ifbic_req, bifc->ifbic_len);\
3236 } \
3237 BRIDGE_LOCK(sc); \
3238 kfree_data(outbuf, buflen); \
3239 } while (0)
3240
3241 static int
bridge_ioctl_gifs64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3242 bridge_ioctl_gifs64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3243 {
3244 struct ifbifconf64 * __single bifc = arg;
3245 int error = 0;
3246
3247 BRIDGE_IOCTL_GIFS;
3248
3249 return error;
3250 }
3251
3252 static int
bridge_ioctl_gifs32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3253 bridge_ioctl_gifs32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3254 {
3255 struct ifbifconf32 * __single bifc = arg;
3256 int error = 0;
3257
3258 BRIDGE_IOCTL_GIFS;
3259
3260 return error;
3261 }
3262
3263 #define BRIDGE_IOCTL_RTS do { \
3264 struct bridge_rtnode *brt; \
3265 char *buf; \
3266 char *outbuf = NULL; \
3267 unsigned int count, buflen, len; \
3268 unsigned long now; \
3269 \
3270 if (bac->ifbac_len == 0) \
3271 return (0); \
3272 \
3273 bzero(&bareq, sizeof (bareq)); \
3274 count = 0; \
3275 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) \
3276 count++; \
3277 buflen = sizeof (bareq) * count; \
3278 \
3279 BRIDGE_UNLOCK(sc); \
3280 outbuf = kalloc_data(buflen, Z_WAITOK | Z_ZERO); \
3281 BRIDGE_LOCK(sc); \
3282 \
3283 count = 0; \
3284 buf = outbuf; \
3285 len = min(bac->ifbac_len, buflen); \
3286 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) { \
3287 if (len < sizeof (bareq)) \
3288 goto out; \
3289 snprintf(bareq.ifba_ifsname, sizeof (bareq.ifba_ifsname), \
3290 "%s", brt->brt_ifp->if_xname); \
3291 memcpy(bareq.ifba_dst, brt->brt_addr, sizeof (brt->brt_addr)); \
3292 bareq.ifba_vlan = brt->brt_vlan; \
3293 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) { \
3294 now = (unsigned long) net_uptime(); \
3295 if (now < brt->brt_expire) \
3296 bareq.ifba_expire = \
3297 brt->brt_expire - now; \
3298 } else \
3299 bareq.ifba_expire = 0; \
3300 bareq.ifba_flags = brt->brt_flags; \
3301 \
3302 memcpy(buf, &bareq, sizeof (bareq)); \
3303 count++; \
3304 buf += sizeof (bareq); \
3305 len -= sizeof (bareq); \
3306 } \
3307 out: \
3308 bac->ifbac_len = sizeof (bareq) * count; \
3309 if (outbuf != NULL) { \
3310 BRIDGE_UNLOCK(sc); \
3311 if (bac->ifbac_len > 0) { \
3312 error = copyout(outbuf, bac->ifbac_req, bac->ifbac_len);\
3313 } \
3314 kfree_data(outbuf, buflen); \
3315 BRIDGE_LOCK(sc); \
3316 } \
3317 return (error); \
3318 } while (0)
3319
3320 static int
bridge_ioctl_rts64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3321 bridge_ioctl_rts64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3322 {
3323 struct ifbaconf64 * __single bac = arg;
3324 struct ifbareq64 bareq;
3325 int error = 0;
3326
3327 BRIDGE_IOCTL_RTS;
3328 return error;
3329 }
3330
3331 static int
bridge_ioctl_rts32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3332 bridge_ioctl_rts32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3333 {
3334 struct ifbaconf32 * __single bac = arg;
3335 struct ifbareq32 bareq;
3336 int error = 0;
3337
3338 BRIDGE_IOCTL_RTS;
3339 return error;
3340 }
3341
3342 static int
bridge_ioctl_saddr32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3343 bridge_ioctl_saddr32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3344 {
3345 struct ifbareq32 * __single req = arg;
3346 struct bridge_iflist *bif;
3347 int error;
3348
3349 bif = bridge_lookup_member(sc, req->ifba_ifsname);
3350 if (bif == NULL) {
3351 return ENOENT;
3352 }
3353
3354 error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
3355 req->ifba_flags);
3356
3357 return error;
3358 }
3359
3360 static int
bridge_ioctl_saddr64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3361 bridge_ioctl_saddr64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3362 {
3363 struct ifbareq64 * __single req = arg;
3364 struct bridge_iflist *bif;
3365 int error;
3366
3367 bif = bridge_lookup_member(sc, req->ifba_ifsname);
3368 if (bif == NULL) {
3369 return ENOENT;
3370 }
3371
3372 error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
3373 req->ifba_flags);
3374
3375 return error;
3376 }
3377
3378 static int
bridge_ioctl_sto(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3379 bridge_ioctl_sto(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3380 {
3381 struct ifbrparam * __single param = arg;
3382
3383 sc->sc_brttimeout = param->ifbrp_ctime;
3384 return 0;
3385 }
3386
3387 static int
bridge_ioctl_gto(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3388 bridge_ioctl_gto(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3389 {
3390 struct ifbrparam * __single param = arg;
3391
3392 param->ifbrp_ctime = sc->sc_brttimeout;
3393 return 0;
3394 }
3395
3396 static int
bridge_ioctl_daddr32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3397 bridge_ioctl_daddr32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3398 {
3399 struct ifbareq32 * __single req = arg;
3400
3401 return bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan);
3402 }
3403
3404 static int
bridge_ioctl_daddr64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3405 bridge_ioctl_daddr64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3406 {
3407 struct ifbareq64 * __single req = arg;
3408
3409 return bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan);
3410 }
3411
3412 static int
bridge_ioctl_flush(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3413 bridge_ioctl_flush(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3414 {
3415 struct ifbreq * __single req = arg;
3416
3417 bridge_rtflush(sc, req->ifbr_ifsflags);
3418 return 0;
3419 }
3420
3421 static int
bridge_ioctl_gpri(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3422 bridge_ioctl_gpri(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3423 {
3424 struct ifbrparam * __single param = arg;
3425 struct bstp_state *bs = &sc->sc_stp;
3426
3427 param->ifbrp_prio = bs->bs_bridge_priority;
3428 return 0;
3429 }
3430
3431 static int
bridge_ioctl_spri(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3432 bridge_ioctl_spri(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3433 {
3434 #if BRIDGESTP
3435 struct ifbrparam *param = arg;
3436
3437 return bstp_set_priority(&sc->sc_stp, param->ifbrp_prio);
3438 #else /* !BRIDGESTP */
3439 #pragma unused(sc, arg)
3440 return EOPNOTSUPP;
3441 #endif /* !BRIDGESTP */
3442 }
3443
3444 static int
bridge_ioctl_ght(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3445 bridge_ioctl_ght(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3446 {
3447 struct ifbrparam * __single param = arg;
3448 struct bstp_state *bs = &sc->sc_stp;
3449
3450 param->ifbrp_hellotime = bs->bs_bridge_htime >> 8;
3451 return 0;
3452 }
3453
3454 static int
bridge_ioctl_sht(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3455 bridge_ioctl_sht(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3456 {
3457 #if BRIDGESTP
3458 struct ifbrparam *param = arg;
3459
3460 return bstp_set_htime(&sc->sc_stp, param->ifbrp_hellotime);
3461 #else /* !BRIDGESTP */
3462 #pragma unused(sc, arg)
3463 return EOPNOTSUPP;
3464 #endif /* !BRIDGESTP */
3465 }
3466
3467 static int
bridge_ioctl_gfd(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3468 bridge_ioctl_gfd(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3469 {
3470 struct ifbrparam * __single param;
3471 struct bstp_state *bs;
3472
3473 param = arg;
3474 bs = &sc->sc_stp;
3475 param->ifbrp_fwddelay = bs->bs_bridge_fdelay >> 8;
3476 return 0;
3477 }
3478
3479 static int
bridge_ioctl_sfd(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3480 bridge_ioctl_sfd(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3481 {
3482 #if BRIDGESTP
3483 struct ifbrparam *param = arg;
3484
3485 return bstp_set_fdelay(&sc->sc_stp, param->ifbrp_fwddelay);
3486 #else /* !BRIDGESTP */
3487 #pragma unused(sc, arg)
3488 return EOPNOTSUPP;
3489 #endif /* !BRIDGESTP */
3490 }
3491
3492 static int
bridge_ioctl_gma(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3493 bridge_ioctl_gma(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3494 {
3495 struct ifbrparam * __single param;
3496 struct bstp_state *bs;
3497
3498 param = arg;
3499 bs = &sc->sc_stp;
3500 param->ifbrp_maxage = bs->bs_bridge_max_age >> 8;
3501 return 0;
3502 }
3503
3504 static int
bridge_ioctl_sma(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3505 bridge_ioctl_sma(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3506 {
3507 #if BRIDGESTP
3508 struct ifbrparam *param = arg;
3509
3510 return bstp_set_maxage(&sc->sc_stp, param->ifbrp_maxage);
3511 #else /* !BRIDGESTP */
3512 #pragma unused(sc, arg)
3513 return EOPNOTSUPP;
3514 #endif /* !BRIDGESTP */
3515 }
3516
3517 static int
bridge_ioctl_sifprio(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3518 bridge_ioctl_sifprio(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3519 {
3520 #if BRIDGESTP
3521 struct ifbreq *req = arg;
3522 struct bridge_iflist *bif;
3523
3524 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3525 if (bif == NULL) {
3526 return ENOENT;
3527 }
3528
3529 return bstp_set_port_priority(&bif->bif_stp, req->ifbr_priority);
3530 #else /* !BRIDGESTP */
3531 #pragma unused(sc, arg)
3532 return EOPNOTSUPP;
3533 #endif /* !BRIDGESTP */
3534 }
3535
3536 static int
bridge_ioctl_sifcost(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3537 bridge_ioctl_sifcost(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3538 {
3539 #if BRIDGESTP
3540 struct ifbreq *req = arg;
3541 struct bridge_iflist *bif;
3542
3543 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3544 if (bif == NULL) {
3545 return ENOENT;
3546 }
3547
3548 return bstp_set_path_cost(&bif->bif_stp, req->ifbr_path_cost);
3549 #else /* !BRIDGESTP */
3550 #pragma unused(sc, arg)
3551 return EOPNOTSUPP;
3552 #endif /* !BRIDGESTP */
3553 }
3554
3555 static int
bridge_ioctl_gfilt(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3556 bridge_ioctl_gfilt(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3557 {
3558 struct ifbrparam * __single param = arg;
3559
3560 param->ifbrp_filter = sc->sc_filter_flags;
3561
3562 return 0;
3563 }
3564
3565 static int
bridge_ioctl_sfilt(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3566 bridge_ioctl_sfilt(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3567 {
3568 struct ifbrparam * __single param = arg;
3569
3570 if (param->ifbrp_filter & ~IFBF_FILT_MASK) {
3571 return EINVAL;
3572 }
3573
3574 if (param->ifbrp_filter & IFBF_FILT_USEIPF) {
3575 return EINVAL;
3576 }
3577
3578 sc->sc_filter_flags = param->ifbrp_filter;
3579
3580 return 0;
3581 }
3582
3583 static int
bridge_ioctl_sifmaxaddr(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3584 bridge_ioctl_sifmaxaddr(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3585 {
3586 struct ifbreq * __single req = arg;
3587 struct bridge_iflist *bif;
3588
3589 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3590 if (bif == NULL) {
3591 return ENOENT;
3592 }
3593
3594 bif->bif_addrmax = req->ifbr_addrmax;
3595 return 0;
3596 }
3597
3598 static int
bridge_ioctl_addspan(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3599 bridge_ioctl_addspan(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3600 {
3601 struct ifbreq * __single req = arg;
3602 struct bridge_iflist *bif = NULL;
3603 struct ifnet *ifs;
3604
3605 ifs = ifunit(sanitize_ifname(req->ifbr_ifsname));
3606 if (ifs == NULL) {
3607 return ENOENT;
3608 }
3609
3610 if (IFNET_IS_INTCOPROC(ifs) || IFNET_IS_MANAGEMENT(ifs)) {
3611 return EINVAL;
3612 }
3613
3614 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
3615 if (ifs == bif->bif_ifp) {
3616 return EBUSY;
3617 }
3618
3619 if (ifs->if_bridge != NULL) {
3620 return EBUSY;
3621 }
3622
3623 switch (ifs->if_type) {
3624 case IFT_ETHER:
3625 case IFT_L2VLAN:
3626 case IFT_IEEE8023ADLAG:
3627 break;
3628 default:
3629 return EINVAL;
3630 }
3631
3632 bif = kalloc_type(struct bridge_iflist, Z_WAITOK | Z_ZERO | Z_NOFAIL);
3633
3634 bif->bif_ifp = ifs;
3635 bif->bif_ifflags = IFBIF_SPAN;
3636
3637 ifnet_reference(bif->bif_ifp);
3638
3639 TAILQ_INSERT_HEAD(&sc->sc_spanlist, bif, bif_next);
3640
3641 return 0;
3642 }
3643
3644 static int
bridge_ioctl_delspan(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3645 bridge_ioctl_delspan(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3646 {
3647 struct ifbreq * __single req = arg;
3648 struct bridge_iflist *bif;
3649 struct ifnet *ifs;
3650
3651 ifs = ifunit(sanitize_ifname(req->ifbr_ifsname));
3652 if (ifs == NULL) {
3653 return ENOENT;
3654 }
3655
3656 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
3657 if (ifs == bif->bif_ifp) {
3658 break;
3659 }
3660
3661 if (bif == NULL) {
3662 return ENOENT;
3663 }
3664
3665 bridge_delete_span(sc, bif);
3666
3667 return 0;
3668 }
3669
3670 #define BRIDGE_IOCTL_GBPARAM do { \
3671 struct bstp_state *bs = &sc->sc_stp; \
3672 struct bstp_port *root_port; \
3673 \
3674 req->ifbop_maxage = bs->bs_bridge_max_age >> 8; \
3675 req->ifbop_hellotime = bs->bs_bridge_htime >> 8; \
3676 req->ifbop_fwddelay = bs->bs_bridge_fdelay >> 8; \
3677 \
3678 root_port = bs->bs_root_port; \
3679 if (root_port == NULL) \
3680 req->ifbop_root_port = 0; \
3681 else \
3682 req->ifbop_root_port = root_port->bp_ifp->if_index; \
3683 \
3684 req->ifbop_holdcount = bs->bs_txholdcount; \
3685 req->ifbop_priority = bs->bs_bridge_priority; \
3686 req->ifbop_protocol = bs->bs_protover; \
3687 req->ifbop_root_path_cost = bs->bs_root_pv.pv_cost; \
3688 req->ifbop_bridgeid = bs->bs_bridge_pv.pv_dbridge_id; \
3689 req->ifbop_designated_root = bs->bs_root_pv.pv_root_id; \
3690 req->ifbop_designated_bridge = bs->bs_root_pv.pv_dbridge_id; \
3691 req->ifbop_last_tc_time.tv_sec = bs->bs_last_tc_time.tv_sec; \
3692 req->ifbop_last_tc_time.tv_usec = bs->bs_last_tc_time.tv_usec; \
3693 } while (0)
3694
3695 static int
bridge_ioctl_gbparam32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3696 bridge_ioctl_gbparam32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3697 {
3698 struct ifbropreq32 * __single req = arg;
3699
3700 BRIDGE_IOCTL_GBPARAM;
3701 return 0;
3702 }
3703
3704 static int
bridge_ioctl_gbparam64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3705 bridge_ioctl_gbparam64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3706 {
3707 struct ifbropreq64 * __single req = arg;
3708
3709 BRIDGE_IOCTL_GBPARAM;
3710 return 0;
3711 }
3712
3713 static int
bridge_ioctl_grte(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3714 bridge_ioctl_grte(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3715 {
3716 struct ifbrparam * __single param = arg;
3717
3718 param->ifbrp_cexceeded = sc->sc_brtexceeded;
3719 return 0;
3720 }
3721
3722 #define BRIDGE_IOCTL_GIFSSTP do { \
3723 struct bridge_iflist *bif; \
3724 struct bstp_port *bp; \
3725 struct ifbpstpreq bpreq; \
3726 char *buf, *outbuf; \
3727 unsigned int count, buflen, len; \
3728 \
3729 count = 0; \
3730 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { \
3731 if ((bif->bif_ifflags & IFBIF_STP) != 0) \
3732 count++; \
3733 } \
3734 \
3735 buflen = sizeof (bpreq) * count; \
3736 if (bifstp->ifbpstp_len == 0) { \
3737 bifstp->ifbpstp_len = buflen; \
3738 return (0); \
3739 } \
3740 \
3741 BRIDGE_UNLOCK(sc); \
3742 outbuf = kalloc_data(buflen, Z_WAITOK | Z_ZERO); \
3743 BRIDGE_LOCK(sc); \
3744 \
3745 count = 0; \
3746 buf = outbuf; \
3747 len = min(bifstp->ifbpstp_len, buflen); \
3748 bzero(&bpreq, sizeof (bpreq)); \
3749 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { \
3750 if (len < sizeof (bpreq)) \
3751 break; \
3752 \
3753 if ((bif->bif_ifflags & IFBIF_STP) == 0) \
3754 continue; \
3755 \
3756 bp = &bif->bif_stp; \
3757 bpreq.ifbp_portno = bif->bif_ifp->if_index & 0xfff; \
3758 bpreq.ifbp_fwd_trans = bp->bp_forward_transitions; \
3759 bpreq.ifbp_design_cost = bp->bp_desg_pv.pv_cost; \
3760 bpreq.ifbp_design_port = bp->bp_desg_pv.pv_port_id; \
3761 bpreq.ifbp_design_bridge = bp->bp_desg_pv.pv_dbridge_id; \
3762 bpreq.ifbp_design_root = bp->bp_desg_pv.pv_root_id; \
3763 \
3764 memcpy(buf, &bpreq, sizeof (bpreq)); \
3765 count++; \
3766 buf += sizeof (bpreq); \
3767 len -= sizeof (bpreq); \
3768 } \
3769 \
3770 BRIDGE_UNLOCK(sc); \
3771 bifstp->ifbpstp_len = sizeof (bpreq) * count; \
3772 if (bifstp->ifbpstp_len > 0) { \
3773 error = copyout(outbuf, bifstp->ifbpstp_req, bifstp->ifbpstp_len);\
3774 } \
3775 BRIDGE_LOCK(sc); \
3776 kfree_data(outbuf, buflen); \
3777 return (error); \
3778 } while (0)
3779
3780 static int
bridge_ioctl_gifsstp32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3781 bridge_ioctl_gifsstp32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3782 {
3783 struct ifbpstpconf32 * __single bifstp = arg;
3784 int error = 0;
3785
3786 BRIDGE_IOCTL_GIFSSTP;
3787 return error;
3788 }
3789
3790 static int
bridge_ioctl_gifsstp64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3791 bridge_ioctl_gifsstp64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3792 {
3793 struct ifbpstpconf64 * __single bifstp = arg;
3794 int error = 0;
3795
3796 BRIDGE_IOCTL_GIFSSTP;
3797 return error;
3798 }
3799
3800 static int
bridge_ioctl_sproto(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3801 bridge_ioctl_sproto(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3802 {
3803 #if BRIDGESTP
3804 struct ifbrparam *param = arg;
3805
3806 return bstp_set_protocol(&sc->sc_stp, param->ifbrp_proto);
3807 #else /* !BRIDGESTP */
3808 #pragma unused(sc, arg)
3809 return EOPNOTSUPP;
3810 #endif /* !BRIDGESTP */
3811 }
3812
3813 static int
bridge_ioctl_stxhc(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3814 bridge_ioctl_stxhc(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3815 {
3816 #if BRIDGESTP
3817 struct ifbrparam *param = arg;
3818
3819 return bstp_set_holdcount(&sc->sc_stp, param->ifbrp_txhc);
3820 #else /* !BRIDGESTP */
3821 #pragma unused(sc, arg)
3822 return EOPNOTSUPP;
3823 #endif /* !BRIDGESTP */
3824 }
3825
3826
3827 static int
bridge_ioctl_ghostfilter(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3828 bridge_ioctl_ghostfilter(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3829 {
3830 struct ifbrhostfilter * __single req = arg;
3831 struct bridge_iflist *bif;
3832
3833 bif = bridge_lookup_member(sc, req->ifbrhf_ifsname);
3834 if (bif == NULL) {
3835 return ENOENT;
3836 }
3837
3838 bzero(req, sizeof(struct ifbrhostfilter));
3839 if (bif->bif_flags & BIFF_HOST_FILTER) {
3840 req->ifbrhf_flags |= IFBRHF_ENABLED;
3841 bcopy(bif->bif_hf_hwsrc, req->ifbrhf_hwsrca,
3842 ETHER_ADDR_LEN);
3843 req->ifbrhf_ipsrc = bif->bif_hf_ipsrc.s_addr;
3844 }
3845 return 0;
3846 }
3847
3848 static int
bridge_ioctl_shostfilter(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3849 bridge_ioctl_shostfilter(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3850 {
3851 struct ifbrhostfilter * __single req = arg;
3852 struct bridge_iflist *bif;
3853
3854 bif = bridge_lookup_member(sc, req->ifbrhf_ifsname);
3855 if (bif == NULL) {
3856 return ENOENT;
3857 }
3858 if (bif_has_mac_nat(bif)) {
3859 /* no host filter with MAC-NAT */
3860 return EINVAL;
3861 }
3862 if (req->ifbrhf_flags & IFBRHF_ENABLED) {
3863 bif->bif_flags |= BIFF_HOST_FILTER;
3864
3865 if (req->ifbrhf_flags & IFBRHF_HWSRC) {
3866 bcopy(req->ifbrhf_hwsrca, bif->bif_hf_hwsrc,
3867 ETHER_ADDR_LEN);
3868 if (bcmp(req->ifbrhf_hwsrca, ethernulladdr,
3869 ETHER_ADDR_LEN) != 0) {
3870 bif->bif_flags |= BIFF_HF_HWSRC;
3871 } else {
3872 bif->bif_flags &= ~BIFF_HF_HWSRC;
3873 }
3874 }
3875 if (req->ifbrhf_flags & IFBRHF_IPSRC) {
3876 bif->bif_hf_ipsrc.s_addr = req->ifbrhf_ipsrc;
3877 if (bif->bif_hf_ipsrc.s_addr != INADDR_ANY) {
3878 bif->bif_flags |= BIFF_HF_IPSRC;
3879 } else {
3880 bif->bif_flags &= ~BIFF_HF_IPSRC;
3881 }
3882 }
3883 } else {
3884 bif->bif_flags &= ~(BIFF_HOST_FILTER | BIFF_HF_HWSRC |
3885 BIFF_HF_IPSRC);
3886 bzero(bif->bif_hf_hwsrc, ETHER_ADDR_LEN);
3887 bif->bif_hf_ipsrc.s_addr = INADDR_ANY;
3888 }
3889
3890 return 0;
3891 }
3892
3893 static char *__indexable
bridge_mac_nat_entry_out(struct mac_nat_entry_list * list,unsigned int * count_p,char * __indexable buf,unsigned int * len_p)3894 bridge_mac_nat_entry_out(struct mac_nat_entry_list * list,
3895 unsigned int * count_p, char *__indexable buf,
3896 unsigned int * len_p)
3897 {
3898 unsigned int count = *count_p;
3899 struct ifbrmne ifbmne;
3900 unsigned int len = *len_p;
3901 struct mac_nat_entry *mne;
3902 unsigned long now;
3903
3904 bzero(&ifbmne, sizeof(ifbmne));
3905 LIST_FOREACH(mne, list, mne_list) {
3906 if (len < sizeof(ifbmne)) {
3907 break;
3908 }
3909 snprintf(ifbmne.ifbmne_ifname, sizeof(ifbmne.ifbmne_ifname),
3910 "%s", mne->mne_bif->bif_ifp->if_xname);
3911 memcpy(ifbmne.ifbmne_mac, mne->mne_mac,
3912 sizeof(ifbmne.ifbmne_mac));
3913 now = (unsigned long) net_uptime();
3914 if (now < mne->mne_expire) {
3915 ifbmne.ifbmne_expire = mne->mne_expire - now;
3916 } else {
3917 ifbmne.ifbmne_expire = 0;
3918 }
3919 if ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) {
3920 ifbmne.ifbmne_af = AF_INET6;
3921 ifbmne.ifbmne_ip6_addr = mne->mne_ip6;
3922 } else {
3923 ifbmne.ifbmne_af = AF_INET;
3924 ifbmne.ifbmne_ip_addr = mne->mne_ip;
3925 }
3926 memcpy(buf, &ifbmne, sizeof(ifbmne));
3927 count++;
3928 buf += sizeof(ifbmne);
3929 len -= sizeof(ifbmne);
3930 }
3931 *count_p = count;
3932 *len_p = len;
3933 return buf;
3934 }
3935
3936 /*
3937 * bridge_ioctl_gmnelist()
3938 * Perform the get mac_nat_entry list ioctl.
3939 *
3940 * Note:
3941 * The struct ifbrmnelist32 and struct ifbrmnelist64 have the same
3942 * field size/layout except for the last field ifbml_buf, the user-supplied
3943 * buffer pointer. That is passed in separately via the 'user_addr'
3944 * parameter from the respective 32-bit or 64-bit ioctl routine.
3945 */
3946 static int
bridge_ioctl_gmnelist(struct bridge_softc * sc,struct ifbrmnelist32 * mnl,user_addr_t user_addr)3947 bridge_ioctl_gmnelist(struct bridge_softc *sc, struct ifbrmnelist32 *mnl,
3948 user_addr_t user_addr)
3949 {
3950 unsigned int count;
3951 char *buf;
3952 int error = 0;
3953 char *outbuf = NULL;
3954 struct mac_nat_entry *mne;
3955 unsigned int buflen;
3956 unsigned int len;
3957
3958 mnl->ifbml_elsize = sizeof(struct ifbrmne);
3959 count = 0;
3960 LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
3961 count++;
3962 }
3963 LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
3964 count++;
3965 }
3966 buflen = sizeof(struct ifbrmne) * count;
3967 if (buflen == 0 || mnl->ifbml_len == 0) {
3968 mnl->ifbml_len = buflen;
3969 return error;
3970 }
3971 BRIDGE_UNLOCK(sc);
3972 outbuf = kalloc_data(buflen, Z_WAITOK | Z_ZERO);
3973 BRIDGE_LOCK(sc);
3974 count = 0;
3975 buf = outbuf;
3976 len = min(mnl->ifbml_len, buflen);
3977 buf = bridge_mac_nat_entry_out(&sc->sc_mne_list, &count, buf, &len);
3978 buf = bridge_mac_nat_entry_out(&sc->sc_mne_list_v6, &count, buf, &len);
3979 mnl->ifbml_len = count * sizeof(struct ifbrmne);
3980 BRIDGE_UNLOCK(sc);
3981 if (mnl->ifbml_len > 0) {
3982 error = copyout(outbuf, user_addr, mnl->ifbml_len);
3983 }
3984 kfree_data(outbuf, buflen);
3985 BRIDGE_LOCK(sc);
3986 return error;
3987 }
3988
3989 static int
bridge_ioctl_gmnelist64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3990 bridge_ioctl_gmnelist64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3991 {
3992 struct ifbrmnelist64 * __single mnl = arg;
3993
3994 return bridge_ioctl_gmnelist(sc, arg, mnl->ifbml_buf);
3995 }
3996
3997 static int
bridge_ioctl_gmnelist32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3998 bridge_ioctl_gmnelist32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3999 {
4000 struct ifbrmnelist32 * __single mnl = arg;
4001
4002 return bridge_ioctl_gmnelist(sc, arg,
4003 CAST_USER_ADDR_T(mnl->ifbml_buf));
4004 }
4005
4006 /*
4007 * bridge_ioctl_gifstats()
4008 * Return per-member stats.
4009 *
4010 * Note:
4011 * The ifbrmreq32 and ifbrmreq64 structures have the same
4012 * field size/layout except for the last field brmr_buf, the user-supplied
4013 * buffer pointer. That is passed in separately via the 'user_addr'
4014 * parameter from the respective 32-bit or 64-bit ioctl routine.
4015 */
4016 static int
bridge_ioctl_gifstats(struct bridge_softc * sc,struct ifbrmreq32 * mreq,user_addr_t user_addr)4017 bridge_ioctl_gifstats(struct bridge_softc *sc, struct ifbrmreq32 *mreq,
4018 user_addr_t user_addr)
4019 {
4020 struct bridge_iflist *bif;
4021 int error = 0;
4022 unsigned int buflen;
4023
4024 bif = bridge_lookup_member(sc, mreq->brmr_ifname);
4025 if (bif == NULL) {
4026 error = ENOENT;
4027 goto done;
4028 }
4029
4030 buflen = mreq->brmr_elsize = sizeof(struct ifbrmstats);
4031 if (buflen == 0 || mreq->brmr_len == 0) {
4032 mreq->brmr_len = buflen;
4033 goto done;
4034 }
4035 if (mreq->brmr_len != 0 && mreq->brmr_len < buflen) {
4036 error = ENOBUFS;
4037 goto done;
4038 }
4039 mreq->brmr_len = buflen;
4040 error = copyout(&bif->bif_stats, user_addr, buflen);
4041 done:
4042 return error;
4043 }
4044
4045 static int
bridge_ioctl_gifstats32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)4046 bridge_ioctl_gifstats32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
4047 {
4048 struct ifbrmreq32 * __single mreq = arg;
4049
4050 return bridge_ioctl_gifstats(sc, arg, mreq->brmr_buf);
4051 }
4052
4053 static int
bridge_ioctl_gifstats64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)4054 bridge_ioctl_gifstats64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
4055 {
4056 struct ifbrmreq64 * __single mreq = arg;
4057
4058 return bridge_ioctl_gifstats(sc, arg, mreq->brmr_buf);
4059 }
4060
4061 /*
4062 * bridge_proto_attach_changed
4063 *
4064 * Called when protocol attachment on the interface changes.
4065 */
4066 static void
bridge_proto_attach_changed(struct ifnet * ifp)4067 bridge_proto_attach_changed(struct ifnet *ifp)
4068 {
4069 boolean_t changed = FALSE;
4070 struct bridge_iflist *bif;
4071 boolean_t input_broadcast;
4072 struct bridge_softc * __single sc = ifp->if_bridge;
4073
4074 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
4075 if (sc == NULL) {
4076 return;
4077 }
4078 input_broadcast = interface_needs_input_broadcast(ifp);
4079 BRIDGE_LOCK(sc);
4080 bif = bridge_lookup_member_if(sc, ifp);
4081 if (bif != NULL) {
4082 changed = bif_set_input_broadcast(bif, input_broadcast);
4083 }
4084 BRIDGE_UNLOCK(sc);
4085 if (changed) {
4086 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
4087 "%s input broadcast %s", ifp->if_xname,
4088 input_broadcast ? "ENABLED" : "DISABLED");
4089 }
4090 return;
4091 }
4092
4093 /*
4094 * interface_media_active:
4095 *
4096 * Tells if an interface media is active.
4097 */
4098 static int
interface_media_active(struct ifnet * ifp)4099 interface_media_active(struct ifnet *ifp)
4100 {
4101 struct ifmediareq ifmr;
4102 int status = 0;
4103
4104 bzero(&ifmr, sizeof(ifmr));
4105 if (ifnet_ioctl(ifp, 0, SIOCGIFMEDIA, &ifmr) == 0) {
4106 if ((ifmr.ifm_status & IFM_AVALID) && ifmr.ifm_count > 0) {
4107 status = ifmr.ifm_status & IFM_ACTIVE ? 1 : 0;
4108 }
4109 }
4110
4111 return status;
4112 }
4113
4114 /*
4115 * bridge_updatelinkstatus:
4116 *
4117 * Update the media active status of the bridge based on the
4118 * media active status of its member.
4119 * If changed, return the corresponding onf/off link event.
4120 */
4121 static u_int32_t
bridge_updatelinkstatus(struct bridge_softc * sc)4122 bridge_updatelinkstatus(struct bridge_softc *sc)
4123 {
4124 struct bridge_iflist *bif;
4125 int active_member = 0;
4126 u_int32_t event_code = 0;
4127
4128 BRIDGE_LOCK_ASSERT_HELD(sc);
4129
4130 /*
4131 * Find out if we have an active interface
4132 */
4133 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
4134 if (bif->bif_flags & BIFF_MEDIA_ACTIVE) {
4135 active_member = 1;
4136 break;
4137 }
4138 }
4139
4140 if (active_member && !(sc->sc_flags & SCF_MEDIA_ACTIVE)) {
4141 sc->sc_flags |= SCF_MEDIA_ACTIVE;
4142 event_code = KEV_DL_LINK_ON;
4143 } else if (!active_member && (sc->sc_flags & SCF_MEDIA_ACTIVE)) {
4144 sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
4145 event_code = KEV_DL_LINK_OFF;
4146 }
4147
4148 return event_code;
4149 }
4150
4151 /*
4152 * bridge_iflinkevent:
4153 */
4154 static void
bridge_iflinkevent(struct ifnet * ifp)4155 bridge_iflinkevent(struct ifnet *ifp)
4156 {
4157 struct bridge_softc * __single sc = ifp->if_bridge;
4158 struct bridge_iflist *bif;
4159 u_int32_t event_code = 0;
4160 int media_active;
4161
4162 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
4163
4164 /* Check if the interface is a bridge member */
4165 if (sc == NULL) {
4166 return;
4167 }
4168
4169 media_active = interface_media_active(ifp);
4170 BRIDGE_LOCK(sc);
4171 bif = bridge_lookup_member_if(sc, ifp);
4172 if (bif != NULL) {
4173 if (media_active) {
4174 bif->bif_flags |= BIFF_MEDIA_ACTIVE;
4175 } else {
4176 bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
4177 }
4178 if (sc->sc_mac_nat_bif != NULL) {
4179 bridge_mac_nat_flush_entries(sc, bif);
4180 }
4181
4182 event_code = bridge_updatelinkstatus(sc);
4183 }
4184 BRIDGE_UNLOCK(sc);
4185
4186 if (event_code != 0) {
4187 bridge_link_event(sc->sc_ifp, event_code);
4188 }
4189 }
4190
4191 /*
4192 * bridge_delayed_callback:
4193 *
4194 * Makes a delayed call
4195 */
4196 static void
bridge_delayed_callback(void * param,__unused void * param2)4197 bridge_delayed_callback(void *param, __unused void *param2)
4198 {
4199 struct bridge_delayed_call *call = (struct bridge_delayed_call *)param;
4200 struct bridge_softc *sc = call->bdc_sc;
4201
4202 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4203 if (bridge_delayed_callback_delay > 0) {
4204 struct timespec ts;
4205
4206 ts.tv_sec = bridge_delayed_callback_delay;
4207 ts.tv_nsec = 0;
4208
4209 BRIDGE_LOG(LOG_NOTICE, 0,
4210 "sleeping for %d seconds",
4211 bridge_delayed_callback_delay);
4212
4213 msleep(&bridge_delayed_callback_delay, NULL, PZERO,
4214 __func__, &ts);
4215
4216 BRIDGE_LOG(LOG_NOTICE, 0, "awoken");
4217 }
4218 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4219
4220 BRIDGE_LOCK(sc);
4221
4222 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4223 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4224 "%s call 0x%llx flags 0x%x",
4225 sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4226 call->bdc_flags);
4227 }
4228 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4229
4230 if (call->bdc_flags & BDCF_CANCELLING) {
4231 wakeup(call);
4232 } else {
4233 if ((sc->sc_flags & SCF_DETACHING) == 0) {
4234 (*call->bdc_func)(sc);
4235 }
4236 }
4237 call->bdc_flags &= ~BDCF_OUTSTANDING;
4238 BRIDGE_UNLOCK(sc);
4239 }
4240
4241 /*
4242 * bridge_schedule_delayed_call:
4243 *
4244 * Schedule a function to be called on a separate thread
4245 * The actual call may be scheduled to run at a given time or ASAP.
4246 */
4247 static void
4248 bridge_schedule_delayed_call(struct bridge_delayed_call *call)
4249 {
4250 uint64_t deadline = 0;
4251 struct bridge_softc *sc = call->bdc_sc;
4252
4253 BRIDGE_LOCK_ASSERT_HELD(sc);
4254
4255 if ((sc->sc_flags & SCF_DETACHING) ||
4256 (call->bdc_flags & (BDCF_OUTSTANDING | BDCF_CANCELLING))) {
4257 return;
4258 }
4259
4260 if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) {
4261 nanoseconds_to_absolutetime(
4262 (uint64_t)call->bdc_ts.tv_sec * NSEC_PER_SEC +
4263 call->bdc_ts.tv_nsec, &deadline);
4264 clock_absolutetime_interval_to_deadline(deadline, &deadline);
4265 }
4266
4267 call->bdc_flags = BDCF_OUTSTANDING;
4268
4269 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4270 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4271 "%s call 0x%llx flags 0x%x",
4272 sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4273 call->bdc_flags);
4274 }
4275 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4276
4277 if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) {
4278 thread_call_func_delayed(
4279 (thread_call_func_t)bridge_delayed_callback,
4280 call, deadline);
4281 } else {
4282 if (call->bdc_thread_call == NULL) {
4283 call->bdc_thread_call = thread_call_allocate(
4284 (thread_call_func_t)bridge_delayed_callback,
4285 call);
4286 }
4287 thread_call_enter(call->bdc_thread_call);
4288 }
4289 }
4290
4291 /*
4292 * bridge_cancel_delayed_call:
4293 *
4294 * Cancel a queued or running delayed call.
4295 * If call is running, does not return until the call is done to
4296 * prevent race condition with the brigde interface getting destroyed
4297 */
4298 static void
4299 bridge_cancel_delayed_call(struct bridge_delayed_call *call)
4300 {
4301 boolean_t result;
4302 struct bridge_softc *sc = call->bdc_sc;
4303
4304 /*
4305 * The call was never scheduled
4306 */
4307 if (sc == NULL) {
4308 return;
4309 }
4310
4311 BRIDGE_LOCK_ASSERT_HELD(sc);
4312
4313 call->bdc_flags |= BDCF_CANCELLING;
4314
4315 while (call->bdc_flags & BDCF_OUTSTANDING) {
4316 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4317 "%s call 0x%llx flags 0x%x",
4318 sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4319 call->bdc_flags);
4320 result = thread_call_func_cancel(
4321 (thread_call_func_t)bridge_delayed_callback, call, FALSE);
4322
4323 if (result) {
4324 /*
4325 * We managed to dequeue the delayed call
4326 */
4327 call->bdc_flags &= ~BDCF_OUTSTANDING;
4328 } else {
4329 /*
4330 * Wait for delayed call do be done running
4331 */
4332 msleep(call, &sc->sc_mtx, PZERO, __func__, NULL);
4333 }
4334 }
4335 call->bdc_flags &= ~BDCF_CANCELLING;
4336 }
4337
4338 /*
4339 * bridge_cleanup_delayed_call:
4340 *
4341 * Dispose resource allocated for a delayed call
4342 * Assume the delayed call is not queued or running .
4343 */
4344 static void
4345 bridge_cleanup_delayed_call(struct bridge_delayed_call *call)
4346 {
4347 boolean_t result;
4348 struct bridge_softc *sc = call->bdc_sc;
4349
4350 /*
4351 * The call was never scheduled
4352 */
4353 if (sc == NULL) {
4354 return;
4355 }
4356
4357 BRIDGE_LOCK_ASSERT_HELD(sc);
4358
4359 VERIFY((call->bdc_flags & BDCF_OUTSTANDING) == 0);
4360 VERIFY((call->bdc_flags & BDCF_CANCELLING) == 0);
4361
4362 if (call->bdc_thread_call != NULL) {
4363 result = thread_call_free(call->bdc_thread_call);
4364 if (result == FALSE) {
4365 panic("%s thread_call_free() failed for call %p",
4366 __func__, call);
4367 }
4368 call->bdc_thread_call = NULL;
4369 }
4370 }
4371
4372 /*
4373 * bridge_init:
4374 *
4375 * Initialize a bridge interface.
4376 */
4377 static int
4378 bridge_init(struct ifnet *ifp)
4379 {
4380 struct bridge_softc *sc = (struct bridge_softc *)ifp->if_softc;
4381 errno_t error;
4382
4383 BRIDGE_LOCK_ASSERT_HELD(sc);
4384
4385 if ((ifnet_flags(ifp) & IFF_RUNNING)) {
4386 return 0;
4387 }
4388
4389 error = ifnet_set_flags(ifp, IFF_RUNNING, IFF_RUNNING);
4390
4391 /*
4392 * Calling bridge_aging_timer() is OK as there are no entries to
4393 * age so we're just going to arm the timer
4394 */
4395 bridge_aging_timer(sc);
4396 #if BRIDGESTP
4397 if (error == 0) {
4398 bstp_init(&sc->sc_stp); /* Initialize Spanning Tree */
4399 }
4400 #endif /* BRIDGESTP */
4401 return error;
4402 }
4403
4404 /*
4405 * bridge_ifstop:
4406 *
4407 * Stop the bridge interface.
4408 */
4409 static void
4410 bridge_ifstop(struct ifnet *ifp, int disable)
4411 {
4412 #pragma unused(disable)
4413 struct bridge_softc * __single sc = ifp->if_softc;
4414
4415 BRIDGE_LOCK_ASSERT_HELD(sc);
4416
4417 if ((ifnet_flags(ifp) & IFF_RUNNING) == 0) {
4418 return;
4419 }
4420
4421 bridge_cancel_delayed_call(&sc->sc_aging_timer);
4422
4423 #if BRIDGESTP
4424 bstp_stop(&sc->sc_stp);
4425 #endif /* BRIDGESTP */
4426
4427 bridge_rtflush(sc, IFBF_FLUSHDYN);
4428 (void) ifnet_set_flags(ifp, 0, IFF_RUNNING);
4429 }
4430
4431 static const uint32_t checksum_request_flags = (MBUF_CSUM_REQ_TCP |
4432 MBUF_CSUM_REQ_UDP | MBUF_CSUM_REQ_TCPIPV6 | MBUF_CSUM_REQ_UDPIPV6);
4433
4434 static const mbuf_csum_performed_flags_t checksum_performed_all_good =
4435 (MBUF_CSUM_DID_IP | MBUF_CSUM_IP_GOOD
4436 | MBUF_CSUM_DID_DATA | MBUF_CSUM_PSEUDO_HDR);
4437
4438 /*
4439 * bridge_compute_cksum:
4440 *
4441 * If the packet has checksum flags, compare the hardware checksum
4442 * capabilities of the source and destination interfaces. If they
4443 * are the same, there's nothing to do. If they are different,
4444 * finalize the checksum so that it can be sent on the destination
4445 * interface.
4446 */
4447 static void
4448 bridge_compute_cksum(struct ifnet *src_if, struct ifnet *dst_if, struct mbuf *m)
4449 {
4450 uint32_t csum_flags;
4451 uint16_t dst_hw_csum;
4452 uint32_t did_sw = 0;
4453 struct ether_header *eh;
4454 uint16_t src_hw_csum;
4455
4456 if (src_if == dst_if) {
4457 return;
4458 }
4459 csum_flags = m->m_pkthdr.csum_flags & IF_HWASSIST_CSUM_MASK;
4460 if (csum_flags == 0) {
4461 /* no checksum offload */
4462 return;
4463 }
4464
4465 /*
4466 * if destination/source differ in checksum offload
4467 * capabilities, finalize/compute the checksum
4468 */
4469 dst_hw_csum = IF_HWASSIST_CSUM_FLAGS(dst_if->if_hwassist);
4470 src_hw_csum = IF_HWASSIST_CSUM_FLAGS(src_if->if_hwassist);
4471 if (dst_hw_csum == src_hw_csum) {
4472 return;
4473 }
4474 eh = mtod(m, struct ether_header *);
4475 switch (eh->ether_type) {
4476 case HTONS_ETHERTYPE_IP:
4477 did_sw = in_finalize_cksum(m, sizeof(*eh), csum_flags);
4478 break;
4479 case HTONS_ETHERTYPE_IPV6:
4480 did_sw = in6_finalize_cksum(m, sizeof(*eh), -1, -1, csum_flags);
4481 break;
4482 }
4483 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4484 "[%s -> %s] before 0x%x did 0x%x after 0x%x",
4485 src_if->if_xname, dst_if->if_xname, csum_flags, did_sw,
4486 m->m_pkthdr.csum_flags);
4487 }
4488
4489 static inline errno_t
4490 bridge_transmit(ifnet_t ifp, mbuf_t m)
4491 {
4492 struct flowadv adv = { .code = FADV_SUCCESS };
4493 errno_t error;
4494 int flags = DLIL_OUTPUT_FLAGS_RAW;
4495
4496 flags = (if_bridge_output_skip_filters != 0)
4497 ? (DLIL_OUTPUT_FLAGS_RAW | DLIL_OUTPUT_FLAGS_SKIP_IF_FILTERS)
4498 : DLIL_OUTPUT_FLAGS_RAW;
4499 error = dlil_output(ifp, 0, m, NULL, NULL, flags, &adv);
4500 if (error == 0) {
4501 if (adv.code == FADV_FLOW_CONTROLLED) {
4502 error = EQFULL;
4503 } else if (adv.code == FADV_SUSPENDED) {
4504 error = EQSUSPENDED;
4505 }
4506 }
4507 return error;
4508 }
4509
4510 static int
4511 get_last_ip6_hdr(struct mbuf *m, int off, int proto, int * nxtp,
4512 bool *is_fragmented)
4513 {
4514 int newoff;
4515
4516 *is_fragmented = false;
4517 while (1) {
4518 newoff = ip6_nexthdr(m, off, proto, nxtp);
4519 if (newoff < 0) {
4520 return off;
4521 } else if (newoff < off) {
4522 return -1; /* invalid */
4523 } else if (newoff == off) {
4524 return newoff;
4525 }
4526 off = newoff;
4527 proto = *nxtp;
4528 if (proto == IPPROTO_FRAGMENT) {
4529 *is_fragmented = true;
4530 }
4531 }
4532 }
4533
4534 #define __ATOMIC_INC(s) os_atomic_inc(&s, relaxed)
4535
4536 static int
4537 bridge_get_ip_proto(struct mbuf * * mp, u_int mac_hlen, bool is_ipv4,
4538 ip_packet_info_t info_p, struct bripstats * stats_p)
4539 {
4540 int error = 0;
4541 u_int hlen;
4542 u_int ip_hlen;
4543 u_int ip_pay_len;
4544 struct mbuf * m0 = *mp;
4545 int off;
4546 int opt_len = 0;
4547 int proto = 0;
4548
4549 bzero(info_p, sizeof(*info_p));
4550 if (is_ipv4) {
4551 struct ip * ip;
4552 u_int ip_total_len;
4553
4554 /* IPv4 */
4555 hlen = mac_hlen + sizeof(struct ip);
4556 if (m0->m_pkthdr.len < hlen) {
4557 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4558 "Short IP packet %d < %d",
4559 m0->m_pkthdr.len, hlen);
4560 error = _EBADIP;
4561 __ATOMIC_INC(stats_p->bips_bad_ip);
4562 goto done;
4563 }
4564 if (m0->m_len < hlen) {
4565 *mp = m0 = m_pullup(m0, hlen);
4566 if (m0 == NULL) {
4567 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4568 "m_pullup failed hlen %d",
4569 hlen);
4570 error = ENOBUFS;
4571 __ATOMIC_INC(stats_p->bips_bad_ip);
4572 goto done;
4573 }
4574 }
4575 ip = (struct ip *)mtodo(m0, mac_hlen);
4576 if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
4577 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4578 "bad IP version");
4579 error = _EBADIP;
4580 __ATOMIC_INC(stats_p->bips_bad_ip);
4581 goto done;
4582 }
4583 ip_hlen = IP_VHL_HL(ip->ip_vhl) << 2;
4584 if (ip_hlen < sizeof(struct ip)) {
4585 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4586 "bad IP header length %d < %d",
4587 ip_hlen,
4588 (int)sizeof(struct ip));
4589 error = _EBADIP;
4590 __ATOMIC_INC(stats_p->bips_bad_ip);
4591 goto done;
4592 }
4593 hlen = mac_hlen + ip_hlen;
4594 if (m0->m_len < hlen) {
4595 *mp = m0 = m_pullup(m0, hlen);
4596 if (m0 == NULL) {
4597 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4598 "m_pullup failed hlen %d",
4599 hlen);
4600 error = ENOBUFS;
4601 __ATOMIC_INC(stats_p->bips_bad_ip);
4602 goto done;
4603 }
4604 ip = (struct ip *)mtodo(m0, mac_hlen);
4605 }
4606
4607 ip_total_len = ntohs(ip->ip_len);
4608 if (ip_total_len < ip_hlen) {
4609 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4610 "IP total len %d < header len %d",
4611 ip_total_len, ip_hlen);
4612 error = _EBADIP;
4613 __ATOMIC_INC(stats_p->bips_bad_ip);
4614 goto done;
4615 }
4616 if (ip_total_len > (m0->m_pkthdr.len - mac_hlen)) {
4617 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4618 "invalid IP payload length %d > %d",
4619 ip_total_len,
4620 (m0->m_pkthdr.len - mac_hlen));
4621 error = _EBADIP;
4622 __ATOMIC_INC(stats_p->bips_bad_ip);
4623 goto done;
4624 }
4625 ip_pay_len = ip_total_len - ip_hlen;
4626 info_p->ip_proto = ip->ip_p;
4627 info_p->ip_hdr = mtodo(m0, mac_hlen);
4628 info_p->ip_m0_len = m0->m_len - mac_hlen;
4629 info_p->ip_hlen = ip_hlen;
4630 #define FRAG_BITS (IP_OFFMASK | IP_MF)
4631 if ((ntohs(ip->ip_off) & FRAG_BITS) != 0) {
4632 info_p->ip_is_fragmented = true;
4633 }
4634 __ATOMIC_INC(stats_p->bips_ip);
4635 } else {
4636 struct ip6_hdr *ip6;
4637
4638 /* IPv6 */
4639 hlen = mac_hlen + sizeof(struct ip6_hdr);
4640 if (m0->m_pkthdr.len < hlen) {
4641 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4642 "short IPv6 packet %d < %d",
4643 m0->m_pkthdr.len, hlen);
4644 error = _EBADIPV6;
4645 __ATOMIC_INC(stats_p->bips_bad_ip6);
4646 goto done;
4647 }
4648 if (m0->m_len < hlen) {
4649 *mp = m0 = m_pullup(m0, hlen);
4650 if (m0 == NULL) {
4651 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4652 "m_pullup failed hlen %d",
4653 hlen);
4654 error = ENOBUFS;
4655 __ATOMIC_INC(stats_p->bips_bad_ip6);
4656 goto done;
4657 }
4658 }
4659 ip6 = (struct ip6_hdr *)(mtodo(m0, mac_hlen));
4660 if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
4661 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4662 "bad IPv6 version");
4663 error = _EBADIPV6;
4664 __ATOMIC_INC(stats_p->bips_bad_ip6);
4665 goto done;
4666 }
4667 off = get_last_ip6_hdr(m0, mac_hlen, IPPROTO_IPV6, &proto,
4668 &info_p->ip_is_fragmented);
4669 if (off < 0 || m0->m_pkthdr.len < off) {
4670 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4671 "ip6_lasthdr() returned %d",
4672 off);
4673 error = _EBADIPV6;
4674 __ATOMIC_INC(stats_p->bips_bad_ip6);
4675 goto done;
4676 }
4677 ip_hlen = sizeof(*ip6);
4678 opt_len = off - mac_hlen - ip_hlen;
4679 if (opt_len < 0) {
4680 error = _EBADIPV6;
4681 __ATOMIC_INC(stats_p->bips_bad_ip6);
4682 goto done;
4683 }
4684 ip_pay_len = ntohs(ip6->ip6_plen);
4685 if (ip_pay_len > (m0->m_pkthdr.len - mac_hlen - ip_hlen)) {
4686 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4687 "invalid IPv6 payload length %d > %d",
4688 ip_pay_len,
4689 (m0->m_pkthdr.len - mac_hlen - ip_hlen));
4690 error = _EBADIPV6;
4691 __ATOMIC_INC(stats_p->bips_bad_ip6);
4692 goto done;
4693 }
4694 info_p->ip_proto = proto;
4695 info_p->ip_hdr = mtodo(m0, mac_hlen);
4696 info_p->ip_m0_len = m0->m_len - mac_hlen;
4697 info_p->ip_hlen = ip_hlen;
4698 __ATOMIC_INC(stats_p->bips_ip6);
4699 }
4700 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4701 "IPv%c proto %d ip %u pay %u opt %u pkt %u%s",
4702 is_ipv4 ? '4' : '6',
4703 proto, ip_hlen, ip_pay_len, opt_len,
4704 m0->m_pkthdr.len, info_p->ip_is_fragmented ? " frag" : "");
4705 info_p->ip_pay_len = ip_pay_len;
4706 info_p->ip_opt_len = opt_len;
4707 info_p->ip_is_ipv4 = is_ipv4;
4708 done:
4709 return error;
4710 }
4711
4712 static int
4713 bridge_get_tcp_header(struct mbuf * * mp, u_int mac_hlen, bool is_ipv4,
4714 ip_packet_info_t info_p, struct bripstats * stats_p)
4715 {
4716 int error;
4717 u_int hlen;
4718
4719 error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, info_p, stats_p);
4720 if (error != 0) {
4721 goto done;
4722 }
4723 if (info_p->ip_proto != IPPROTO_TCP) {
4724 /* not a TCP frame, not an error, just a bad guess */
4725 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4726 "non-TCP (%d) IPv%c frame %d bytes",
4727 info_p->ip_proto, is_ipv4 ? '4' : '6',
4728 (*mp)->m_pkthdr.len);
4729 goto done;
4730 }
4731 if (info_p->ip_is_fragmented) {
4732 /* both TSO and IP fragmentation don't make sense */
4733 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4734 "fragmented TSO packet?");
4735 __ATOMIC_INC(stats_p->bips_bad_tcp);
4736 error = _EBADTCP;
4737 goto done;
4738 }
4739 hlen = mac_hlen + info_p->ip_hlen + sizeof(struct tcphdr) +
4740 info_p->ip_opt_len;
4741 if ((*mp)->m_len < hlen) {
4742 *mp = m_pullup(*mp, hlen);
4743 if (*mp == NULL) {
4744 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4745 "m_pullup %d failed",
4746 hlen);
4747 __ATOMIC_INC(stats_p->bips_bad_tcp);
4748 error = _EBADTCP;
4749 goto done;
4750 }
4751 }
4752 info_p->ip_proto_hdr = info_p->ip_hdr + info_p->ip_hlen +
4753 info_p->ip_opt_len;
4754 done:
4755 return error;
4756 }
4757
4758 static inline void
4759 proto_csum_stats_increment(uint8_t proto, struct brcsumstats * stats_p)
4760 {
4761 if (proto == IPPROTO_TCP) {
4762 __ATOMIC_INC(stats_p->brcs_tcp_checksum);
4763 } else {
4764 __ATOMIC_INC(stats_p->brcs_udp_checksum);
4765 }
4766 return;
4767 }
4768
4769 #define ETHER_TYPE_FLAG_NONE 0x00
4770 #define ETHER_TYPE_FLAG_IPV4 0x01
4771 #define ETHER_TYPE_FLAG_IPV6 0x02
4772 #define ETHER_TYPE_FLAG_ARP 0x04
4773 #define ETHER_TYPE_FLAG_IP (ETHER_TYPE_FLAG_IPV4 | ETHER_TYPE_FLAG_IPV6)
4774 #define ETHER_TYPE_FLAG_IP_ARP (ETHER_TYPE_FLAG_IP | ETHER_TYPE_FLAG_ARP)
4775
4776 static inline bool
4777 ether_type_flag_is_ip(ether_type_flag_t flag)
4778 {
4779 return (flag & ETHER_TYPE_FLAG_IP) != 0;
4780 }
4781
4782 static inline ether_type_flag_t
4783 ether_type_flag_get(uint16_t ether_type)
4784 {
4785 ether_type_flag_t flag = ETHER_TYPE_FLAG_NONE;
4786
4787 switch (ether_type) {
4788 case HTONS_ETHERTYPE_IP:
4789 flag = ETHER_TYPE_FLAG_IPV4;
4790 break;
4791 case HTONS_ETHERTYPE_IPV6:
4792 flag = ETHER_TYPE_FLAG_IPV6;
4793 break;
4794 case HTONS_ETHERTYPE_ARP:
4795 flag = ETHER_TYPE_FLAG_ARP;
4796 break;
4797 default:
4798 break;
4799 }
4800 return flag;
4801 }
4802
4803 static bool
4804 ether_header_type_is_ip(struct ether_header * eh, bool *is_ipv4)
4805 {
4806 uint16_t ether_type;
4807 bool is_ip = TRUE;
4808
4809 ether_type = ntohs(eh->ether_type);
4810 switch (ether_type) {
4811 case ETHERTYPE_IP:
4812 *is_ipv4 = TRUE;
4813 break;
4814 case ETHERTYPE_IPV6:
4815 *is_ipv4 = FALSE;
4816 break;
4817 default:
4818 is_ip = FALSE;
4819 break;
4820 }
4821 return is_ip;
4822 }
4823
4824 static errno_t
4825 bridge_verify_checksum(struct mbuf * * mp, struct ifbrmstats *stats_p)
4826 {
4827 struct brcsumstats *csum_stats_p;
4828 struct ether_header *eh;
4829 errno_t error = 0;
4830 ip_packet_info info;
4831 bool is_ipv4;
4832 struct mbuf * m;
4833 u_int mac_hlen = sizeof(struct ether_header);
4834 uint16_t sum;
4835 bool valid;
4836
4837 eh = mtod(*mp, struct ether_header *);
4838 if (!ether_header_type_is_ip(eh, &is_ipv4)) {
4839 goto done;
4840 }
4841 error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, &info,
4842 &stats_p->brms_out_ip);
4843 m = *mp;
4844 if (error != 0) {
4845 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4846 "bridge_get_ip_proto failed %d",
4847 error);
4848 goto done;
4849 }
4850 if (is_ipv4) {
4851 if ((m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) != 0) {
4852 /* hardware offloaded IP header checksum */
4853 valid = (m->m_pkthdr.csum_flags & CSUM_IP_VALID) != 0;
4854 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4855 "IP checksum HW %svalid",
4856 valid ? "" : "in");
4857 if (!valid) {
4858 __ATOMIC_INC(stats_p->brms_out_cksum_bad_hw.brcs_ip_checksum);
4859 error = _EBADIPCHECKSUM;
4860 goto done;
4861 }
4862 __ATOMIC_INC(stats_p->brms_out_cksum_good_hw.brcs_ip_checksum);
4863 } else {
4864 /* verify */
4865 sum = inet_cksum(m, 0, mac_hlen, info.ip_hlen);
4866 valid = (sum == 0);
4867 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4868 "IP checksum SW %svalid",
4869 valid ? "" : "in");
4870 if (!valid) {
4871 __ATOMIC_INC(stats_p->brms_out_cksum_bad.brcs_ip_checksum);
4872 error = _EBADIPCHECKSUM;
4873 goto done;
4874 }
4875 __ATOMIC_INC(stats_p->brms_out_cksum_good.brcs_ip_checksum);
4876 }
4877 }
4878 if (info.ip_is_fragmented) {
4879 /* can't verify checksum on fragmented packets */
4880 goto done;
4881 }
4882 switch (info.ip_proto) {
4883 case IPPROTO_TCP:
4884 __ATOMIC_INC(stats_p->brms_out_ip.bips_tcp);
4885 break;
4886 case IPPROTO_UDP:
4887 __ATOMIC_INC(stats_p->brms_out_ip.bips_udp);
4888 break;
4889 default:
4890 goto done;
4891 }
4892 /* check for hardware offloaded UDP/TCP checksum */
4893 #define HW_CSUM (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)
4894 if ((m->m_pkthdr.csum_flags & HW_CSUM) == HW_CSUM) {
4895 /* checksum verified by hardware */
4896 valid = (m->m_pkthdr.csum_rx_val == 0xffff);
4897 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4898 "IPv%c %s checksum HW 0x%x %svalid",
4899 is_ipv4 ? '4' : '6',
4900 (info.ip_proto == IPPROTO_TCP)
4901 ? "TCP" : "UDP",
4902 m->m_pkthdr.csum_data,
4903 valid ? "" : "in" );
4904 if (!valid) {
4905 /* bad checksum */
4906 csum_stats_p = &stats_p->brms_out_cksum_bad_hw;
4907 error = (info.ip_proto == IPPROTO_TCP) ? _EBADTCPCHECKSUM
4908 : _EBADTCPCHECKSUM;
4909 } else {
4910 /* good checksum */
4911 csum_stats_p = &stats_p->brms_out_cksum_good_hw;
4912 }
4913 proto_csum_stats_increment(info.ip_proto, csum_stats_p);
4914 goto done;
4915 }
4916 /* adjust frame to skip mac-layer header */
4917 _mbuf_adjust_pkthdr_and_data(m, mac_hlen);
4918 if (is_ipv4) {
4919 sum = inet_cksum(m, info.ip_proto,
4920 info.ip_hlen,
4921 info.ip_pay_len);
4922 } else {
4923 sum = inet6_cksum(m, info.ip_proto,
4924 info.ip_hlen + info.ip_opt_len,
4925 info.ip_pay_len - info.ip_opt_len);
4926 }
4927 valid = (sum == 0);
4928 if (valid) {
4929 csum_stats_p = &stats_p->brms_out_cksum_good;
4930 } else {
4931 csum_stats_p = &stats_p->brms_out_cksum_bad;
4932 error = (info.ip_proto == IPPROTO_TCP)
4933 ? _EBADTCPCHECKSUM : _EBADUDPCHECKSUM;
4934 }
4935 proto_csum_stats_increment(info.ip_proto, csum_stats_p);
4936 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4937 "IPv%c %s checksum SW %svalid (0x%x) hlen %d paylen %d",
4938 is_ipv4 ? '4' : '6',
4939 (info.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
4940 valid ? "" : "in",
4941 sum, info.ip_hlen, info.ip_pay_len);
4942 /* adjust frame back to start of mac-layer header */
4943 _mbuf_adjust_pkthdr_and_data(m, -mac_hlen);
4944
4945 done:
4946 return error;
4947 }
4948
4949 static mbuf_t
4950 bridge_verify_checksum_list(ifnet_t bridge_ifp, struct bridge_iflist * dbif,
4951 mbuf_t in_list, bool is_ipv4)
4952 {
4953 mbuf_t next_packet;
4954 mblist ret;
4955
4956 mblist_init(&ret);
4957 for (mbuf_ref_t scan = in_list; scan != NULL; scan = next_packet) {
4958 errno_t error;
4959
4960 /* take packet out of the list */
4961 next_packet = scan->m_nextpkt;
4962 scan->m_nextpkt = NULL;
4963
4964 if (scan->m_pkthdr.rx_seg_cnt > 1) {
4965 /* LRO packet, compute checksum on large packet */
4966 scan = bridge_filter_checksum(bridge_ifp, dbif, scan,
4967 is_ipv4, false, true);
4968 } else {
4969 /* verify checksum */
4970 error = bridge_verify_checksum(&scan, &dbif->bif_stats);
4971 if (error != 0) {
4972 if (scan != NULL) {
4973 m_freem(scan);
4974 scan = NULL;
4975 }
4976 }
4977 }
4978
4979 /* add it back to the list */
4980 if (scan != NULL) {
4981 mblist_append(&ret, scan);
4982 }
4983 }
4984 return ret.head;
4985 }
4986
4987
4988 static errno_t
4989 bridge_offload_checksum(struct mbuf * * mp, ip_packet_info * info_p,
4990 struct ifbrmstats * stats_p)
4991 {
4992 uint16_t * csum_p;
4993 errno_t error = 0;
4994 u_int hlen;
4995 struct mbuf * m0 = *mp;
4996 u_int mac_hlen = sizeof(struct ether_header);
4997 u_int pkt_hdr_len;
4998 struct tcphdr * tcp;
4999 u_int tcp_hlen;
5000 struct udphdr * udp;
5001
5002 if (info_p->ip_is_ipv4) {
5003 /* compute IP header checksum */
5004 struct ip *ip = (struct ip *)info_p->ip_hdr;
5005 ip->ip_sum = 0;
5006 ip->ip_sum = inet_cksum(m0, 0, mac_hlen, info_p->ip_hlen);
5007 __ATOMIC_INC(stats_p->brms_in_computed_cksum.brcs_ip_checksum);
5008 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5009 "IPv4 checksum 0x%x",
5010 ntohs(ip->ip_sum));
5011 }
5012 if (info_p->ip_is_fragmented) {
5013 /* can't compute checksum on fragmented packets */
5014 goto done;
5015 }
5016 pkt_hdr_len = m0->m_pkthdr.len;
5017 switch (info_p->ip_proto) {
5018 case IPPROTO_TCP:
5019 hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len
5020 + sizeof(struct tcphdr);
5021 if (m0->m_len < hlen) {
5022 *mp = m0 = m_pullup(m0, hlen);
5023 if (m0 == NULL) {
5024 __ATOMIC_INC(stats_p->brms_in_ip.bips_bad_tcp);
5025 error = _EBADTCP;
5026 goto done;
5027 }
5028 }
5029 tcp = (struct tcphdr *)(info_p->ip_hdr + info_p->ip_hlen
5030 + info_p->ip_opt_len);
5031 tcp_hlen = tcp->th_off << 2;
5032 hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len + tcp_hlen;
5033 if (hlen > pkt_hdr_len) {
5034 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5035 "bad tcp header length %u",
5036 tcp_hlen);
5037 __ATOMIC_INC(stats_p->brms_in_ip.bips_bad_tcp);
5038 error = _EBADTCP;
5039 goto done;
5040 }
5041 csum_p = &tcp->th_sum;
5042 __ATOMIC_INC(stats_p->brms_in_ip.bips_tcp);
5043 break;
5044 case IPPROTO_UDP:
5045 hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len + sizeof(*udp);
5046 if (m0->m_len < hlen) {
5047 *mp = m0 = m_pullup(m0, hlen);
5048 if (m0 == NULL) {
5049 __ATOMIC_INC(stats_p->brms_in_ip.bips_bad_udp);
5050 error = ENOBUFS;
5051 goto done;
5052 }
5053 }
5054 udp = (struct udphdr *)(info_p->ip_hdr + info_p->ip_hlen
5055 + info_p->ip_opt_len);
5056 csum_p = &udp->uh_sum;
5057 __ATOMIC_INC(stats_p->brms_in_ip.bips_udp);
5058 break;
5059 default:
5060 /* not TCP or UDP */
5061 goto done;
5062 }
5063 *csum_p = 0;
5064 /* adjust frame to skip mac-layer header */
5065 _mbuf_adjust_pkthdr_and_data(m0, mac_hlen);
5066 if (info_p->ip_is_ipv4) {
5067 *csum_p = inet_cksum(m0, info_p->ip_proto, info_p->ip_hlen,
5068 info_p->ip_pay_len);
5069 } else {
5070 *csum_p = inet6_cksum(m0, info_p->ip_proto,
5071 info_p->ip_hlen + info_p->ip_opt_len,
5072 info_p->ip_pay_len - info_p->ip_opt_len);
5073 }
5074 if (info_p->ip_proto == IPPROTO_UDP && *csum_p == 0) {
5075 /* RFC 1122 4.1.3.4 */
5076 *csum_p = 0xffff;
5077 }
5078 /* adjust frame back to start of mac-layer header */
5079 _mbuf_adjust_pkthdr_and_data(m0, -mac_hlen);
5080 proto_csum_stats_increment(info_p->ip_proto,
5081 &stats_p->brms_in_computed_cksum);
5082
5083 /* indicate that the checksum is good */
5084 mbuf_set_csum_performed(m0, checksum_performed_all_good, 0xffff);
5085
5086 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5087 "IPv%c %s set checksum 0x%x",
5088 info_p->ip_is_ipv4 ? '4' : '6',
5089 (info_p->ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
5090 ntohs(*csum_p));
5091 done:
5092 return error;
5093 }
5094
5095 static inline void
5096 bridge_handle_checksum_op(ifnet_t src_ifp, ifnet_t dst_ifp,
5097 mbuf_t m, ChecksumOperation cksum_op)
5098 {
5099 switch (cksum_op) {
5100 case CHECKSUM_OPERATION_CLEAR_OFFLOAD:
5101 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
5102 break;
5103 case CHECKSUM_OPERATION_FINALIZE:
5104 /* the checksum might not be correct, finalize now */
5105 VERIFY(dst_ifp != NULL);
5106 bridge_finalize_cksum(dst_ifp, m);
5107 break;
5108 case CHECKSUM_OPERATION_COMPUTE:
5109 VERIFY(dst_ifp != NULL && src_ifp != NULL);
5110 bridge_compute_cksum(src_ifp, dst_ifp, m);
5111 break;
5112 default:
5113 break;
5114 }
5115 return;
5116 }
5117
5118 static uint32_t
5119 get_if_tso_mtu(struct ifnet * ifp, bool is_ipv4)
5120 {
5121 uint32_t tso_mtu;
5122
5123 tso_mtu = is_ipv4 ? ifp->if_tso_v4_mtu : ifp->if_tso_v6_mtu;
5124 if (tso_mtu == 0) {
5125 tso_mtu = IP_MAXPACKET;
5126 }
5127
5128 #if DEBUG || DEVELOPMENT
5129 #define REDUCED_TSO_MTU (16 * 1024)
5130 if (if_bridge_reduce_tso_mtu != 0 && tso_mtu > REDUCED_TSO_MTU) {
5131 tso_mtu = REDUCED_TSO_MTU;
5132 }
5133 #endif /* DEBUG || DEVELOPMENT */
5134 return tso_mtu;
5135 }
5136
5137 /*
5138 * tso_hwassist:
5139 * - determine whether the destination interface supports TSO offload
5140 * - if the packet is already marked for offload and the hardware supports
5141 * it, just allow the packet to continue on
5142 * - if not, parse the packet headers to verify that this is a large TCP
5143 * packet requiring segmentation; if the hardware doesn't support it
5144 * set need_sw_tso; otherwise, mark the packet for TSO offload
5145 */
5146 static int
5147 tso_hwassist(struct mbuf **mp, bool is_ipv4, struct ifnet * ifp, u_int mac_hlen,
5148 int * mss_p, bool * need_gso, bool * is_large_tcp)
5149 {
5150 uint32_t csum_flags;
5151 int error = 0;
5152 ip_packet_info info;
5153 u_int32_t if_csum;
5154 u_int32_t if_tso;
5155 u_int32_t mbuf_tso;
5156 int mss = *mss_p;
5157 uint8_t seg_cnt = 0;
5158 bool supports_cksum = false;
5159 uint32_t pkt_mtu;
5160 struct bripstats stats;
5161
5162 *need_gso = false;
5163 *is_large_tcp = false;
5164 if (is_ipv4) {
5165 /*
5166 * Enable both TCP and IP offload if the hardware supports it.
5167 * If the hardware doesn't support TCP offload, supports_cksum
5168 * will be false so we won't set either offload.
5169 */
5170 if_csum = ifp->if_hwassist & (CSUM_TCP | CSUM_IP);
5171 supports_cksum = (if_csum & CSUM_TCP) != 0;
5172 if_tso = IFNET_TSO_IPV4;
5173 mbuf_tso = CSUM_TSO_IPV4;
5174 } else {
5175 if_csum = (ifp->if_hwassist & CSUM_TCPIPV6);
5176 supports_cksum = (if_csum & CSUM_TCPIPV6) != 0;
5177 if_tso = IFNET_TSO_IPV6;
5178 mbuf_tso = CSUM_TSO_IPV6;
5179 }
5180 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5181 "%s: does%s support checksum 0x%x if_csum 0x%x",
5182 ifp->if_xname, supports_cksum ? "" : " not",
5183 ifp->if_hwassist, if_csum);
5184
5185 /* verify that this is a large TCP frame */
5186 error = bridge_get_tcp_header(mp, mac_hlen, is_ipv4,
5187 &info, &stats);
5188 if (error != 0) {
5189 /* bad packet */
5190 goto done;
5191 }
5192 if (info.ip_proto_hdr == NULL) {
5193 /* not a TCP packet */
5194 goto done;
5195 }
5196 pkt_mtu = info.ip_hlen + info.ip_pay_len + info.ip_opt_len;
5197 if (mss == 0) {
5198 /* check for LRO */
5199 seg_cnt = (*mp)->m_pkthdr.rx_seg_cnt;
5200 if (seg_cnt == 1 || (seg_cnt == 0 && pkt_mtu <= ifp->if_mtu)) {
5201 /* not actually a large packet */
5202 goto done;
5203 }
5204 }
5205 *is_large_tcp = true;
5206 (*mp)->m_pkthdr.pkt_proto = IPPROTO_TCP;
5207 if (mss == 0) {
5208 uint32_t hdr_len;
5209 struct tcphdr * tcp;
5210
5211 tcp = (struct tcphdr *)info.ip_proto_hdr;
5212 hdr_len = info.ip_hlen + info.ip_opt_len + (tcp->th_off << 2);
5213
5214 /* packet isn't marked, mark it now */
5215 if (seg_cnt != 0) {
5216 uint32_t len;
5217
5218 /* approximate the MSS using the LRO seg cnt */
5219 len = mbuf_pkthdr_len(*mp) - hdr_len - ETHER_HDR_LEN;
5220 mss = len / seg_cnt;
5221 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5222 "%s: mss %d = len %d / seg cnt %d",
5223 ifp->if_xname, mss, len, seg_cnt);
5224 } else {
5225 mss = ifp->if_mtu - hdr_len
5226 - if_bridge_tso_reduce_mss_tx;
5227 }
5228 assert(mss > 0);
5229 csum_flags = mbuf_tso;
5230 if (supports_cksum) {
5231 csum_flags |= if_csum;
5232 }
5233 (*mp)->m_pkthdr.tso_segsz = mss;
5234 (*mp)->m_pkthdr.csum_flags |= csum_flags;
5235 (*mp)->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
5236 }
5237 if ((ifp->if_hwassist & if_tso) == 0) {
5238 /* need gso if no hardware support */
5239 *need_gso = true;
5240 } else {
5241 uint32_t tso_mtu = 0;
5242
5243 tso_mtu = get_if_tso_mtu(ifp, is_ipv4);
5244 if (pkt_mtu > tso_mtu) {
5245 /* need gso if tso_mtu too small */
5246 *need_gso = true;
5247 }
5248 }
5249 done:
5250 *mss_p = mss;
5251 return error;
5252 }
5253
5254 /*
5255 * bridge_enqueue:
5256 *
5257 * Enqueue a packet list on a bridge member interface.
5258 *
5259 */
5260 static int
5261 bridge_enqueue(ifnet_t bridge_ifp, ifnet_t src_if, ifnet_t dst_if,
5262 ether_type_flag_t etypef, mbuf_t in_list, ChecksumOperation orig_cksum_op)
5263 {
5264 int enqueue_error = 0;
5265 mbuf_t next_packet;
5266 uint32_t out_errors = 0;
5267 mblist out_list;
5268
5269 VERIFY(dst_if != NULL);
5270
5271 mblist_init(&out_list);
5272 for (mbuf_ref_t scan = in_list; scan != NULL; scan = next_packet) {
5273 bool check_gso = false;
5274 ChecksumOperation cksum_op = orig_cksum_op;
5275 errno_t error = 0;
5276 bool is_ipv4 = false;
5277 int len;
5278 int mss = 0;
5279 bool need_gso = false;
5280
5281 scan->m_flags |= M_PROTO1; /* set to avoid loops */
5282 next_packet = scan->m_nextpkt;
5283 scan->m_nextpkt = NULL;
5284 len = mbuf_pkthdr_len(scan);
5285 is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
5286 mss = _mbuf_get_tso_mss(scan);
5287 if (mss != 0) {
5288 /* packet is marked for segmentation */
5289 check_gso = true;
5290 } else if (scan->m_pkthdr.rx_seg_cnt != 0) {
5291 /* LRO packet */
5292 check_gso = true;
5293 } else if (ether_type_flag_is_ip(etypef) &&
5294 len > (bridge_ifp->if_mtu + ETHER_HDR_LEN)) {
5295 /*
5296 * Need to segment the packet if it is a large frame
5297 * and the destination interface does not support TSO.
5298 *
5299 * Note that with trailers, it's possible for a packet to
5300 * be large but not actually require segmentation.
5301 */
5302 check_gso = true;
5303 }
5304 if (check_gso) {
5305 bool is_large_tcp = false;
5306
5307 error = tso_hwassist(&scan, is_ipv4,
5308 dst_if, sizeof(struct ether_header), &mss,
5309 &need_gso, &is_large_tcp);
5310 if (is_large_tcp &&
5311 cksum_op == CHECKSUM_OPERATION_CLEAR_OFFLOAD) {
5312 cksum_op = CHECKSUM_OPERATION_NONE;
5313 }
5314 }
5315 if (error != 0) {
5316 if (scan != NULL) {
5317 m_freem(scan);
5318 scan = NULL;
5319 }
5320 out_errors++;
5321 } else if (need_gso) {
5322 int mac_hlen = sizeof(struct ether_header);
5323 mblist segs;
5324
5325 /* segment packets, add to list */
5326 segs = gso_tcp_transmit(dst_if, scan, mac_hlen,
5327 is_ipv4);
5328 if (segs.head != NULL) {
5329 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5330 "%s (%s) append gso #segs %u bytes %u",
5331 bridge_ifp->if_xname,
5332 dst_if->if_xname,
5333 segs.count, segs.bytes);
5334 mblist_append_list(&out_list, segs);
5335 } else {
5336 out_errors++;
5337 }
5338 } else {
5339 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5340 "%s (%s) append %d bytes mss %d op %d",
5341 bridge_ifp->if_xname,
5342 dst_if->if_xname,
5343 len, mss, cksum_op);
5344 bridge_handle_checksum_op(src_if, dst_if,
5345 scan, cksum_op);
5346 mblist_append(&out_list, scan);
5347 }
5348 }
5349 if (out_list.head != NULL) {
5350 enqueue_error = bridge_transmit(dst_if, out_list.head);
5351 if (enqueue_error != 0) {
5352 out_errors++;
5353 }
5354 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5355 "%s (%s) bridge_transmit packets %u bytes %u error %d",
5356 bridge_ifp->if_xname,
5357 dst_if->if_xname,
5358 out_list.count, out_list.bytes, enqueue_error);
5359 }
5360 if (out_list.count != 0 || out_errors != 0) {
5361 ifnet_stat_increment_out(bridge_ifp, out_list.count,
5362 out_list.bytes, out_errors);
5363 }
5364 return enqueue_error;
5365 }
5366
5367 /*
5368 * bridge_member_output:
5369 *
5370 * Send output from a bridge member interface. This
5371 * performs the bridging function for locally originated
5372 * packets.
5373 *
5374 * The mbuf has the Ethernet header already attached.
5375 */
5376 static errno_t
5377 bridge_member_output(struct bridge_softc *sc, ifnet_t ifp, mbuf_t *data)
5378 {
5379 struct bridge_iflist * bif = NULL;
5380 ifnet_t bridge_ifp;
5381 struct ether_header *eh;
5382 ether_type_flag_t etypef;
5383 struct ifnet *dst_if = NULL;
5384 uint16_t vlan;
5385 struct bridge_iflist *mac_nat_bif;
5386 ifnet_t mac_nat_ifp;
5387 mbuf_t m = *data;
5388
5389 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
5390 "ifp %s", ifp->if_xname);
5391 if (m->m_len < ETHER_HDR_LEN) {
5392 m = m_pullup(m, ETHER_HDR_LEN);
5393 if (m == NULL) {
5394 *data = NULL;
5395 return EJUSTRETURN;
5396 }
5397 }
5398
5399 eh = mtod(m, struct ether_header *);
5400 vlan = VLANTAGOF(m);
5401 etypef = ether_type_flag_get(eh->ether_type);
5402
5403 BRIDGE_LOCK(sc);
5404 mac_nat_bif = sc->sc_mac_nat_bif;
5405 mac_nat_ifp = (mac_nat_bif != NULL) ? mac_nat_bif->bif_ifp : NULL;
5406 if (mac_nat_ifp == ifp) {
5407 /* record the IP address used by the MAC NAT interface */
5408 (void)bridge_mac_nat_output(sc, mac_nat_bif, data, NULL);
5409 m = *data;
5410 if (m == NULL) {
5411 /* packet was deallocated */
5412 BRIDGE_UNLOCK(sc);
5413 return EJUSTRETURN;
5414 }
5415 }
5416 bridge_ifp = sc->sc_ifp;
5417
5418 /*
5419 * APPLE MODIFICATION
5420 * If the packet is an 802.1X ethertype, then only send on the
5421 * original output interface.
5422 */
5423 if (eh->ether_type == htons(ETHERTYPE_PAE)) {
5424 dst_if = ifp;
5425 goto sendunicast;
5426 }
5427
5428 /*
5429 * If bridge is down, but the original output interface is up,
5430 * go ahead and send out that interface. Otherwise, the packet
5431 * is dropped below.
5432 */
5433 if ((bridge_ifp->if_flags & IFF_RUNNING) == 0) {
5434 dst_if = ifp;
5435 goto sendunicast;
5436 }
5437
5438 /*
5439 * If the packet is a multicast, or we don't know a better way to
5440 * get there, send to all interfaces.
5441 */
5442 if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
5443 dst_if = NULL;
5444 } else {
5445 bif = bridge_rtlookup_bif(sc, eh->ether_dhost, vlan);
5446 if (bif != NULL) {
5447 dst_if = bif->bif_ifp;
5448 }
5449 }
5450 if (dst_if == NULL) {
5451 struct mbuf *mc;
5452 errno_t error;
5453
5454
5455 bridge_span(sc, etypef, m);
5456
5457 BRIDGE_LOCK2REF(sc, error);
5458 if (error != 0) {
5459 m_freem(m);
5460 return EJUSTRETURN;
5461 }
5462
5463 /*
5464 * Duplicate and send the packet across all member interfaces
5465 * except the originating interface.
5466 */
5467 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
5468 dst_if = bif->bif_ifp;
5469 if (dst_if == ifp) {
5470 /* skip the originating interface */
5471 continue;
5472 }
5473 /* skip interface with inactive link status */
5474 if ((bif->bif_flags & BIFF_MEDIA_ACTIVE) == 0) {
5475 continue;
5476 }
5477
5478 /* skip interface that isn't running */
5479 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5480 continue;
5481 }
5482 /*
5483 * If the interface is participating in spanning
5484 * tree, make sure the port is in a state that
5485 * allows forwarding.
5486 */
5487 if ((bif->bif_ifflags & IFBIF_STP) &&
5488 bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5489 continue;
5490 }
5491 /*
5492 * If the destination is the MAC NAT interface,
5493 * skip sending the packet. The packet can't be sent
5494 * if the source MAC is incorrect.
5495 */
5496 if (dst_if == mac_nat_ifp) {
5497 continue;
5498 }
5499
5500 /* make a deep copy to send on this member interface */
5501 mc = m_dup(m, M_DONTWAIT);
5502 if (mc == NULL) {
5503 (void)ifnet_stat_increment_out(bridge_ifp,
5504 0, 0, 1);
5505 continue;
5506 }
5507 (void)bridge_enqueue(bridge_ifp, ifp, dst_if, etypef,
5508 mc, CHECKSUM_OPERATION_COMPUTE);
5509 }
5510 BRIDGE_UNREF(sc);
5511
5512 if ((ifp->if_flags & IFF_RUNNING) == 0) {
5513 m_freem(m);
5514 return EJUSTRETURN;
5515 }
5516 /* allow packet to continue on the originating interface */
5517 return 0;
5518 }
5519
5520 sendunicast:
5521 /*
5522 * XXX Spanning tree consideration here?
5523 */
5524
5525 bridge_span(sc, etypef, m);
5526 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5527 m_freem(m);
5528 BRIDGE_UNLOCK(sc);
5529 return EJUSTRETURN;
5530 }
5531
5532 BRIDGE_UNLOCK(sc);
5533 if (dst_if == ifp) {
5534 /* allow packet to continue on the originating interface */
5535 return 0;
5536 }
5537 if (dst_if != mac_nat_ifp) {
5538 (void) bridge_enqueue(bridge_ifp, ifp, dst_if, etypef, m,
5539 CHECKSUM_OPERATION_COMPUTE);
5540 } else {
5541 /*
5542 * This is not the original output interface
5543 * and the destination is the MAC NAT interface.
5544 * Drop the packet because the packet can't be sent
5545 * if the source MAC is incorrect.
5546 */
5547 m_freem(m);
5548 }
5549 return EJUSTRETURN;
5550 }
5551
5552 /*
5553 * Output callback.
5554 *
5555 * This routine is called externally from above only when if_bridge_txstart
5556 * is disabled; otherwise it is called internally by bridge_start().
5557 */
5558 static int
5559 bridge_output(struct ifnet *ifp, struct mbuf *m)
5560 {
5561 struct bridge_iflist *bif;
5562 struct bridge_softc * __single sc = ifnet_softc(ifp);
5563 struct ether_header *eh;
5564 ether_type_flag_t etypef;
5565 struct ifnet *dst_if = NULL;
5566 int error = 0;
5567
5568 eh = mtod(m, struct ether_header *);
5569 etypef = ether_type_flag_get(eh->ether_type);
5570 BRIDGE_LOCK(sc);
5571
5572 if (!IS_BCAST_MCAST(m)) {
5573 bif = bridge_rtlookup_bif(sc, eh->ether_dhost, 0);
5574 if (bif != NULL) {
5575 dst_if = bif->bif_ifp;
5576 }
5577 }
5578
5579 (void) ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0);
5580
5581 BRIDGE_BPF_TAP_OUT(ifp, m);
5582
5583 if (dst_if == NULL) {
5584 /* callee will unlock */
5585 bridge_broadcast(sc, NULL, etypef, m);
5586 } else {
5587 ifnet_t bridge_ifp;
5588
5589 bridge_ifp = sc->sc_ifp;
5590 BRIDGE_UNLOCK(sc);
5591
5592 error = bridge_enqueue(bridge_ifp, NULL, dst_if, etypef, m,
5593 CHECKSUM_OPERATION_FINALIZE);
5594 }
5595
5596 return error;
5597 }
5598
5599 static void
5600 bridge_finalize_cksum(struct ifnet *ifp, struct mbuf *m)
5601 {
5602 struct ether_header *eh;
5603 bool is_ipv4;
5604 uint32_t sw_csum, hwcap;
5605 uint32_t did_sw;
5606 uint32_t csum_flags;
5607
5608 eh = mtod(m, struct ether_header *);
5609 if (!ether_header_type_is_ip(eh, &is_ipv4)) {
5610 return;
5611 }
5612
5613 /* do in software what the hardware cannot */
5614 hwcap = (ifp->if_hwassist | CSUM_DATA_VALID);
5615 csum_flags = m->m_pkthdr.csum_flags;
5616 sw_csum = csum_flags & ~IF_HWASSIST_CSUM_FLAGS(hwcap);
5617 sw_csum &= IF_HWASSIST_CSUM_MASK;
5618
5619 if (is_ipv4) {
5620 if ((hwcap & CSUM_PARTIAL) && !(sw_csum & CSUM_DELAY_DATA) &&
5621 (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) {
5622 if (m->m_pkthdr.csum_flags & CSUM_TCP) {
5623 uint16_t start =
5624 sizeof(*eh) + sizeof(struct ip);
5625 uint16_t ulpoff =
5626 m->m_pkthdr.csum_data & 0xffff;
5627 m->m_pkthdr.csum_flags |=
5628 (CSUM_DATA_VALID | CSUM_PARTIAL);
5629 m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
5630 m->m_pkthdr.csum_tx_start = start;
5631 } else {
5632 sw_csum |= (CSUM_DELAY_DATA &
5633 m->m_pkthdr.csum_flags);
5634 }
5635 }
5636 did_sw = in_finalize_cksum(m, sizeof(*eh), sw_csum);
5637 } else {
5638 if ((hwcap & CSUM_PARTIAL) &&
5639 !(sw_csum & CSUM_DELAY_IPV6_DATA) &&
5640 (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA)) {
5641 if (m->m_pkthdr.csum_flags & CSUM_TCPIPV6) {
5642 uint16_t start =
5643 sizeof(*eh) + sizeof(struct ip6_hdr);
5644 uint16_t ulpoff =
5645 m->m_pkthdr.csum_data & 0xffff;
5646 m->m_pkthdr.csum_flags |=
5647 (CSUM_DATA_VALID | CSUM_PARTIAL);
5648 m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
5649 m->m_pkthdr.csum_tx_start = start;
5650 } else {
5651 sw_csum |= (CSUM_DELAY_IPV6_DATA &
5652 m->m_pkthdr.csum_flags);
5653 }
5654 }
5655 did_sw = in6_finalize_cksum(m, sizeof(*eh), -1, -1, sw_csum);
5656 }
5657 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5658 "[%s] before 0x%x hwcap 0x%x sw_csum 0x%x did 0x%x after 0x%x",
5659 ifp->if_xname, csum_flags, hwcap, sw_csum,
5660 did_sw, m->m_pkthdr.csum_flags);
5661 }
5662
5663 /*
5664 * bridge_start:
5665 *
5666 * Start output on a bridge.
5667 *
5668 * This routine is invoked by the start worker thread; because we never call
5669 * it directly, there is no need do deploy any serialization mechanism other
5670 * than what's already used by the worker thread, i.e. this is already single
5671 * threaded.
5672 *
5673 * This routine is called only when if_bridge_txstart is enabled.
5674 */
5675 static void
5676 bridge_start(struct ifnet *ifp)
5677 {
5678 mbuf_ref_t m;
5679
5680 for (;;) {
5681 if (ifnet_dequeue(ifp, &m) != 0) {
5682 break;
5683 }
5684
5685 (void) bridge_output(ifp, m);
5686 }
5687 }
5688
5689 static void
5690 prepare_input_packet(ifnet_t ifp, mbuf_t m)
5691 {
5692 mbuf_pkthdr_setrcvif(m, ifp);
5693 mbuf_pkthdr_setheader(m, mtod(m, void *));
5694 /* adjust frame to skip mac-layer header */
5695 _mbuf_adjust_pkthdr_and_data(m, ETHER_HDR_LEN);
5696 }
5697
5698 static void
5699 mark_tso_checksum_ok(mbuf_t m)
5700 {
5701 if (_mbuf_get_tso_mss(m) != 0 ||
5702 (m->m_pkthdr.csum_flags & checksum_request_flags) != 0) {
5703 mbuf_set_csum_performed(m, checksum_performed_all_good, 0xffff);
5704 }
5705 }
5706
5707 static void
5708 inject_input_packet_list(ifnet_t ifp, mbuf_t in_list, bool m_proto1)
5709 {
5710 for (mbuf_t scan = in_list; scan != NULL; scan = scan->m_nextpkt) {
5711 /* mark the packets as arriving on the interface */
5712 BRIDGE_BPF_TAP_IN(ifp, scan);
5713 if (m_proto1) {
5714 scan->m_flags |= M_PROTO1; /* set to avoid loops */
5715 }
5716 prepare_input_packet(ifp, scan);
5717 mark_tso_checksum_ok(scan);
5718 }
5719 dlil_input_packet_list(ifp, in_list);
5720 return;
5721 }
5722
5723 static void
5724 adjust_input_packet_list(mbuf_t in_list)
5725 {
5726 for (mbuf_t scan = in_list; scan != NULL; scan = scan->m_nextpkt) {
5727 mbuf_pkthdr_setheader(scan, mtod(scan, void *));
5728 _mbuf_adjust_pkthdr_and_data(scan, ETHER_HDR_LEN);
5729 }
5730 }
5731
5732 static bool
5733 in_addr_is_ours(struct in_addr ip)
5734 {
5735 struct in_ifaddr *ia;
5736 bool ours = false;
5737
5738 lck_rw_lock_shared(&in_ifaddr_rwlock);
5739 TAILQ_FOREACH(ia, INADDR_HASH(ip.s_addr), ia_hash) {
5740 if (ia->ia_addr.sin_addr.s_addr == ip.s_addr) {
5741 ours = true;
5742 break;
5743 }
5744 }
5745 lck_rw_done(&in_ifaddr_rwlock);
5746 return ours;
5747 }
5748
5749 static bool
5750 in6_addr_is_ours(const struct in6_addr * ip6_p, uint32_t ifscope)
5751 {
5752 struct in6_addr dst_ip;
5753 struct in6_ifaddr *ia6;
5754 bool ours = false;
5755
5756 if (in6_embedded_scope && IN6_IS_ADDR_LINKLOCAL(ip6_p)) {
5757 /* need to embed scope ID for comparison */
5758 bcopy(ip6_p, &dst_ip, sizeof(dst_ip));
5759 dst_ip.s6_addr16[1] = htons(ifscope);
5760 ip6_p = &dst_ip;
5761 }
5762 lck_rw_lock_shared(&in6_ifaddr_rwlock);
5763 TAILQ_FOREACH(ia6, IN6ADDR_HASH(ip6_p), ia6_hash) {
5764 if (in6_are_addr_equal_scoped(&ia6->ia_addr.sin6_addr, ip6_p,
5765 ia6->ia_addr.sin6_scope_id, ifscope)) {
5766 ours = true;
5767 break;
5768 }
5769 }
5770 lck_rw_done(&in6_ifaddr_rwlock);
5771 return ours;
5772 }
5773
5774 static bool
5775 ip_packet_info_dst_is_our_ip(ip_packet_info_t info_p, int index)
5776 {
5777 /* if the destination is our IP address, don't segment */
5778 bool our_ip = false;
5779
5780 if (info_p->ip_is_ipv4) {
5781 struct ip * hdr;
5782 struct in_addr dst_ip;
5783
5784 hdr = (struct ip *)(info_p->ip_hdr);
5785 bcopy(&hdr->ip_dst, &dst_ip, sizeof(dst_ip));
5786 our_ip = in_addr_is_ours(dst_ip);
5787 } else {
5788 struct ip6_hdr * hdr;
5789
5790 hdr = (struct ip6_hdr *)(info_p->ip_hdr);
5791 our_ip = in6_addr_is_ours(&hdr->ip6_dst, index);
5792 }
5793 return our_ip;
5794 }
5795
5796 typedef union {
5797 struct in_addr ip;
5798 struct in6_addr ip6;
5799 } ip_addr, *ip_addr_t;
5800
5801 static void
5802 ip_packet_info_copy_dst_ip_addr(ip_packet_info_t info_p, ip_addr_t ipaddr)
5803 {
5804 if (info_p->ip_is_ipv4) {
5805 struct ip * hdr;
5806
5807 hdr = (struct ip *)(info_p->ip_hdr);
5808 bcopy(&hdr->ip_dst, &ipaddr->ip, sizeof(ipaddr->ip));
5809 } else {
5810 struct ip6_hdr * hdr;
5811
5812 hdr = (struct ip6_hdr *)(info_p->ip_hdr);
5813 bcopy(&hdr->ip6_dst, &ipaddr->ip6, sizeof(ipaddr->ip6));
5814 }
5815 }
5816
5817 static bool
5818 ip_addr_are_equal(ip_addr_t addr1, ip_addr_t addr2, bool is_ipv4)
5819 {
5820 bool equal;
5821
5822 if (is_ipv4) {
5823 equal = addr1->ip.s_addr == addr2->ip.s_addr;
5824 } else {
5825 equal = IN6_ARE_ADDR_EQUAL(&addr1->ip6, &addr2->ip6);
5826 }
5827 return equal;
5828 }
5829
5830 static bool
5831 ip_addr_is_ours(ip_addr_t ipaddr, int index, bool is_ipv4)
5832 {
5833 bool our_ip;
5834
5835 if (is_ipv4) {
5836 our_ip = in_addr_is_ours(ipaddr->ip);
5837 } else {
5838 our_ip = in6_addr_is_ours(&ipaddr->ip6, index);
5839 }
5840 return our_ip;
5841 }
5842
5843 static void
5844 bridge_interface_input_list(ifnet_t bridge_ifp, ether_type_flag_t etypef,
5845 mblist list, bool bif_uses_virtio)
5846 {
5847 uint32_t in_errors = 0;
5848 bool is_ipv4;
5849 mblist in_list;
5850 ip_addr last_ip;
5851 bool last_ip_ours = false;
5852 bool last_ip_valid = false;
5853 u_int mac_hlen;
5854 bool may_forward = false;
5855 mbuf_t next_packet;
5856
5857 switch (etypef) {
5858 case ETHER_TYPE_FLAG_IPV4:
5859 is_ipv4 = true;
5860 may_forward = (ipforwarding != 0);
5861 break;
5862 case ETHER_TYPE_FLAG_IPV6:
5863 is_ipv4 = false;
5864 may_forward = (ip6_forwarding != 0);
5865 break;
5866 }
5867 if (!may_forward) {
5868 in_list = list;
5869 goto done;
5870 }
5871
5872 mblist_init(&in_list);
5873 mac_hlen = sizeof(struct ether_header);
5874 bzero(&last_ip, sizeof(last_ip));
5875 for (mbuf_ref_t scan = list.head; scan != NULL; scan = next_packet) {
5876 int error;
5877 ip_packet_info info;
5878 bool ip_ours;
5879 struct ifbrmstats stats; /* XXX should really be accounted */
5880 ip_addr this_ip;
5881
5882 /* take it out of the list */
5883 next_packet = scan->m_nextpkt;
5884 scan->m_nextpkt = NULL;
5885
5886 /* check for TCP packet and get IP header */
5887 error = bridge_get_tcp_header(&scan, mac_hlen, is_ipv4,
5888 &info, &stats.brms_in_ip);
5889 if (error != 0) {
5890 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5891 "%s bridge_get_tcp_header failed %d",
5892 bridge_ifp->if_xname, error);
5893 if (scan != NULL) {
5894 m_freem(scan);
5895 scan = NULL;
5896 }
5897 in_errors++;
5898 continue;
5899 }
5900 ip_packet_info_copy_dst_ip_addr(&info, &this_ip);
5901 if (last_ip_valid &&
5902 ip_addr_are_equal(&last_ip, &this_ip, is_ipv4)) {
5903 /* use cached result */
5904 ip_ours = last_ip_ours;
5905 } else {
5906 ip_ours = ip_addr_is_ours(&this_ip,
5907 bridge_ifp->if_index,
5908 is_ipv4);
5909 /* cache the result */
5910 last_ip_valid = true;
5911 last_ip_ours = ip_ours;
5912 last_ip = this_ip;
5913 }
5914
5915 /* if the packet is destined to us, just send it up */
5916 if (ip_ours) {
5917 mblist_append(&in_list, scan);
5918 continue;
5919 }
5920 /*
5921 * If this is a TCP packet that's marked for TSO or LRO, or
5922 * we think it's a large packet, segment it.
5923 */
5924 if (info.ip_proto_hdr != NULL &&
5925 (_mbuf_get_tso_mss(scan) != 0 ||
5926 scan->m_pkthdr.rx_seg_cnt > 1 ||
5927 (!bif_uses_virtio &&
5928 (mbuf_pkthdr_len(scan) >
5929 (bridge_ifp->if_mtu + ETHER_HDR_LEN))))) {
5930 mblist seg;
5931
5932 seg = gso_tcp_with_info(bridge_ifp, scan, &info,
5933 mac_hlen, is_ipv4, false);
5934 if (seg.head == NULL) {
5935 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5936 "gso_tcp returned no packets");
5937 in_errors++;
5938 continue;
5939 }
5940 if (seg.count > 1) {
5941 /* packet was segmented+checksummed */
5942 mblist_append_list(&in_list, seg);
5943 continue;
5944 }
5945 /* there's just one packet, no segmentation */
5946 scan = seg.head;
5947 }
5948 /* need checksum if it's marked for checksum offload */
5949 if (bif_uses_virtio &&
5950 (scan->m_pkthdr.csum_flags & checksum_request_flags) != 0) {
5951 error = bridge_offload_checksum(&scan, &info, &stats);
5952 if (error != 0) {
5953 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
5954 "%s bridge_offload_checksum failed %d",
5955 bridge_ifp->if_xname, error);
5956 if (scan != NULL) {
5957 m_freem(scan);
5958 scan = NULL;
5959 }
5960 in_errors++;
5961 continue;
5962 }
5963 }
5964 mblist_append(&in_list, scan);
5965 }
5966
5967 done:
5968 if (in_list.head != NULL) {
5969 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
5970 "%s packets %d bytes %d",
5971 bridge_ifp->if_xname,
5972 in_list.count, in_list.bytes);
5973 /* Mark the packets as arriving on the bridge interface */
5974 inject_input_packet_list(bridge_ifp, in_list.head, false);
5975 ifnet_stat_increment_in(bridge_ifp, in_list.count,
5976 in_list.bytes, in_errors);
5977 } else if (in_errors != 0) {
5978 ifnet_stat_increment_in(bridge_ifp, 0, 0, in_errors);
5979 }
5980 return;
5981 }
5982
5983 /*
5984 * bridge_broadcast:
5985 *
5986 * Send a frame to all interfaces that are members of
5987 * the bridge, except for the one on which the packet
5988 * arrived.
5989 *
5990 * NOTE: Releases the lock on return.
5991 */
5992 static void
5993 bridge_broadcast(struct bridge_softc *sc, struct bridge_iflist * sbif,
5994 ether_type_flag_t etypef, mbuf_t m)
5995 {
5996 ifnet_t bridge_ifp;
5997 struct bridge_iflist *dbif;
5998 struct ifnet * src_if;
5999 mbuf_ref_t mc;
6000 struct mbuf *mc_in;
6001 int error = 0, used = 0;
6002 ChecksumOperation cksum_op;
6003 struct mac_nat_record mnr;
6004 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
6005 boolean_t translate_mac = FALSE;
6006 uint32_t sc_filter_flags;
6007 bool is_bcast_mcast;
6008
6009 bridge_ifp = sc->sc_ifp;
6010 if (sbif != NULL) {
6011 src_if = sbif->bif_ifp;
6012 cksum_op = CHECKSUM_OPERATION_CLEAR_OFFLOAD;
6013 if (mac_nat_bif != NULL && sbif != mac_nat_bif) {
6014 /* get the translation record */
6015 translate_mac
6016 = bridge_mac_nat_output(sc, sbif, &m, &mnr);
6017 if (m == NULL) {
6018 /* packet was deallocated */
6019 BRIDGE_UNLOCK(sc);
6020 return;
6021 }
6022 }
6023 } else {
6024 /*
6025 * sbif is NULL when the bridge interface calls
6026 * bridge_broadcast().
6027 */
6028 cksum_op = CHECKSUM_OPERATION_FINALIZE;
6029 src_if = NULL;
6030 }
6031
6032 BRIDGE_LOCK2REF(sc, error);
6033 if (error) {
6034 m_freem(m);
6035 return;
6036 }
6037 is_bcast_mcast = IS_BCAST_MCAST(m);
6038 sc_filter_flags = sc->sc_filter_flags;
6039 TAILQ_FOREACH(dbif, &sc->sc_iflist, bif_next) {
6040 ifnet_t dst_if;
6041
6042 dst_if = dbif->bif_ifp;
6043 if (dst_if == src_if) {
6044 /* skip the interface that the packet came in on */
6045 continue;
6046 }
6047
6048 /* Private segments can not talk to each other */
6049 if (sbif != NULL &&
6050 (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE)) {
6051 continue;
6052 }
6053
6054 if ((dbif->bif_ifflags & IFBIF_STP) &&
6055 dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6056 continue;
6057 }
6058
6059 if ((dbif->bif_ifflags & IFBIF_DISCOVER) == 0 &&
6060 !is_bcast_mcast) {
6061 continue;
6062 }
6063
6064 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6065 continue;
6066 }
6067
6068 if ((dbif->bif_flags & BIFF_MEDIA_ACTIVE) == 0) {
6069 continue;
6070 }
6071
6072 if (TAILQ_NEXT(dbif, bif_next) == NULL) {
6073 mc = m;
6074 used = 1;
6075 } else {
6076 mc = m_dup(m, M_DONTWAIT);
6077 if (mc == NULL) {
6078 (void) ifnet_stat_increment_out(bridge_ifp,
6079 0, 0, 1);
6080 continue;
6081 }
6082 }
6083
6084 /*
6085 * If broadcast input is enabled, do so only if this
6086 * is an input packet.
6087 */
6088 if (sbif != NULL && is_bcast_mcast &&
6089 (dbif->bif_flags & BIFF_INPUT_BROADCAST) != 0) {
6090 mc_in = m_dup(mc, M_DONTWAIT);
6091 /* this could fail, but we continue anyways */
6092 } else {
6093 mc_in = NULL;
6094 }
6095
6096 /* out */
6097 if (translate_mac && mac_nat_bif == dbif) {
6098 /* translate the packet */
6099 bridge_mac_nat_translate(&mc, &mnr, IF_LLADDR(dst_if));
6100 }
6101
6102 if (mc != NULL && sbif != NULL &&
6103 PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6104 if (used == 0) {
6105 /* Keep the layer3 header aligned */
6106 int i = min(mc->m_pkthdr.len, max_protohdr);
6107 mc = m_copyup(mc, i, ETHER_ALIGN);
6108 if (mc == NULL) {
6109 (void) ifnet_stat_increment_out(
6110 sc->sc_ifp, 0, 0, 1);
6111 if (mc_in != NULL) {
6112 m_freem(mc_in);
6113 mc_in = NULL;
6114 }
6115 continue;
6116 }
6117 }
6118 if (bridge_pf(&mc, dst_if, sc_filter_flags, false) != 0) {
6119 if (mc_in != NULL) {
6120 m_freem(mc_in);
6121 mc_in = NULL;
6122 }
6123 continue;
6124 }
6125 if (mc == NULL) {
6126 if (mc_in != NULL) {
6127 m_freem(mc_in);
6128 mc_in = NULL;
6129 }
6130 continue;
6131 }
6132 }
6133
6134 if (mc != NULL) {
6135 /* verify checksum if necessary */
6136 if (bif_has_checksum_offload(dbif) && sbif != NULL &&
6137 !bif_has_checksum_offload(sbif)) {
6138 error = bridge_verify_checksum(&mc,
6139 &dbif->bif_stats);
6140 if (error != 0) {
6141 if (mc != NULL) {
6142 m_freem(mc);
6143 }
6144 mc = NULL;
6145 }
6146 }
6147 if (mc != NULL) {
6148 (void) bridge_enqueue(bridge_ifp,
6149 NULL, dst_if, etypef, mc, cksum_op);
6150 }
6151 }
6152
6153 /* in */
6154 if (mc_in == NULL) {
6155 continue;
6156 }
6157 BRIDGE_BPF_TAP_IN(dst_if, mc_in);
6158 prepare_input_packet(dst_if, mc_in);
6159 mc_in->m_flags |= M_PROTO1; /* set to avoid loops */
6160 dlil_input_packet_list(dst_if, mc_in);
6161 }
6162 if (used == 0) {
6163 m_freem(m);
6164 }
6165
6166
6167 BRIDGE_UNREF(sc);
6168 }
6169
6170 static mbuf_t
6171 copy_packet_list(mbuf_t m)
6172 {
6173 mblist ret;
6174 mbuf_t next_packet;
6175
6176 mblist_init(&ret);
6177 for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
6178 mbuf_t copy_m;
6179
6180 /* take it out of the list */
6181 next_packet = scan->m_nextpkt;
6182 scan->m_nextpkt = NULL;
6183
6184 /* create a copy and add it to the new list */
6185 copy_m = m_dup(scan, M_DONTWAIT);
6186 if (copy_m != NULL) {
6187 mblist_append(&ret, copy_m);
6188 }
6189
6190 /* put it back in the original list */
6191 scan->m_nextpkt = next_packet;
6192 }
6193 return ret.head;
6194 }
6195
6196 /*
6197 * bridge_broadcast_list:
6198 *
6199 * Broadcast a list of packets to all members except `sbif`.
6200 * Consumes `m` before returning.
6201 *
6202 * NOTE: Releases the lock on return.
6203 */
6204 static void
6205 bridge_broadcast_list(struct bridge_softc *sc, struct bridge_iflist * sbif,
6206 ether_type_flag_t etypef, mbuf_t m)
6207 {
6208 bool bridge_has_address;
6209 ifnet_t bridge_ifp;
6210 struct bridge_iflist * dbif;
6211 bool is_bcast_mcast;
6212 errno_t error = 0;
6213 ChecksumOperation cksum_op;
6214 struct bridge_iflist * mac_nat_bif = sc->sc_mac_nat_bif;
6215 ifnet_t mac_nat_if = NULL;
6216 bool need_mac_nat = false;
6217 mbuf_t out_mac_nat = NULL;
6218 ifnet_t src_if;
6219 uint32_t sc_filter_flags;
6220 bool used = false;
6221
6222 bridge_ifp = sc->sc_ifp;
6223 if (sbif != NULL) {
6224 src_if = sbif->bif_ifp;
6225
6226 if (ether_type_flag_is_ip(etypef) && bif_uses_virtio(sbif)) {
6227 bool is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
6228
6229 /* compute checksum on packets marked with offload */
6230 m = bridge_checksum_offload_list(bridge_ifp, sbif,
6231 m, is_ipv4);
6232 if (m == NULL) {
6233 BRIDGE_UNLOCK(sc);
6234 goto done;
6235 }
6236 cksum_op = CHECKSUM_OPERATION_NONE;
6237 } else {
6238 cksum_op = CHECKSUM_OPERATION_CLEAR_OFFLOAD;
6239 }
6240
6241 /*
6242 * If MAC-NAT is enabled and we'll be sending the packets
6243 * over it, verify that it is up and active before
6244 * deciding to make a translated copy.
6245 */
6246 if (mac_nat_bif != NULL && sbif != mac_nat_bif) {
6247 mac_nat_if = mac_nat_bif->bif_ifp;
6248 if ((mac_nat_if->if_flags & IFF_RUNNING) != 0 &&
6249 (mac_nat_bif->bif_flags & BIFF_MEDIA_ACTIVE) != 0) {
6250 need_mac_nat = true;
6251 }
6252 }
6253 } else {
6254 /*
6255 * sbif is NULL when the bridge interface calls
6256 * bridge_broadcast().
6257 */
6258 cksum_op = CHECKSUM_OPERATION_FINALIZE;
6259 src_if = NULL;
6260 }
6261
6262 /*
6263 * Create a translated copy for packets destined to MAC-NAT interface.
6264 */
6265 if (need_mac_nat) {
6266 out_mac_nat
6267 = bridge_mac_nat_copy_and_translate_list(sc, sbif,
6268 mac_nat_if, m);
6269 }
6270 sc_filter_flags = sc->sc_filter_flags;
6271 bridge_has_address = (sc->sc_flags & SCF_ADDRESS_ASSIGNED) != 0;
6272 BRIDGE_LOCK2REF(sc, error);
6273 if (error) {
6274 goto done;
6275 }
6276 is_bcast_mcast = IS_BCAST_MCAST(m);
6277
6278 /* make a copy for the bridge interface */
6279 if (is_bcast_mcast && bridge_has_address) {
6280 mbuf_t in_list;
6281
6282 in_list = copy_packet_list(m);
6283 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MCAST,
6284 "%s mcast for us in_m %p",
6285 bridge_ifp->if_xname, in_list);
6286 if (in_list != NULL) {
6287 inject_input_packet_list(bridge_ifp, in_list, false);
6288 }
6289 }
6290
6291 TAILQ_FOREACH(dbif, &sc->sc_iflist, bif_next) {
6292 ifnet_t dst_if;
6293 mbuf_t in_m = NULL;
6294 mbuf_t out_m = NULL;
6295
6296 dst_if = dbif->bif_ifp;
6297 if (dst_if == src_if) {
6298 /* skip the interface that the packet came in on */
6299 continue;
6300 }
6301
6302 /* Private segments can not talk to each other */
6303 if (sbif != NULL &&
6304 (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE)) {
6305 continue;
6306 }
6307
6308 if ((dbif->bif_ifflags & IFBIF_STP) &&
6309 dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6310 continue;
6311 }
6312
6313 if ((dbif->bif_ifflags & IFBIF_DISCOVER) == 0 &&
6314 !is_bcast_mcast) {
6315 continue;
6316 }
6317
6318 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6319 continue;
6320 }
6321
6322 if ((dbif->bif_flags & BIFF_MEDIA_ACTIVE) == 0) {
6323 continue;
6324 }
6325 if (dbif == mac_nat_bif) {
6326 /* translated copy was created above, use that */
6327 out_m = out_mac_nat;
6328 out_mac_nat = NULL;
6329 } else if (TAILQ_NEXT(dbif, bif_next) == NULL) {
6330 /* consume `m` */
6331 out_m = m;
6332 used = true;
6333 } else {
6334 /* needs a copy */
6335 out_m = copy_packet_list(m);
6336 }
6337
6338 if (out_m == NULL) {
6339 ifnet_stat_increment_out(bridge_ifp, 0, 0, 1);
6340 continue;
6341 }
6342 /*
6343 * If broadcast input is enabled, do so only if this
6344 * is an input packet.
6345 */
6346 if (sbif != NULL && is_bcast_mcast &&
6347 (dbif->bif_flags & BIFF_INPUT_BROADCAST) != 0) {
6348 in_m = copy_packet_list(m);
6349 /* this could fail, but we continue anyways */
6350 } else {
6351 in_m = NULL;
6352 }
6353
6354 if (sbif != NULL &&
6355 PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6356 out_m = bridge_pf_list(out_m, dst_if,
6357 sc_filter_flags, false);
6358 }
6359 if (out_m != NULL) {
6360 /* verify checksum if necessary */
6361 if (sbif != NULL &&
6362 ether_type_flag_is_ip(etypef) &&
6363 bif_has_checksum_offload(dbif) &&
6364 !bif_has_checksum_offload(sbif)) {
6365 bool is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
6366
6367 out_m = bridge_verify_checksum_list(bridge_ifp,
6368 dbif, out_m, is_ipv4);
6369 }
6370 if (out_m != NULL) {
6371 bridge_enqueue(bridge_ifp, src_if, dst_if,
6372 etypef, out_m, cksum_op);
6373 }
6374 }
6375
6376 /* in */
6377 if (in_m != NULL) {
6378 inject_input_packet_list(dst_if, in_m, true);
6379 }
6380 }
6381
6382 BRIDGE_UNREF(sc);
6383
6384 done:
6385 if (out_mac_nat != NULL) {
6386 m_freem_list(out_mac_nat);
6387 }
6388 if (!used) {
6389 m_freem_list(m);
6390 }
6391 return;
6392 }
6393
6394 #define NEEDED_CSUM_IPV4 (IF_HWASSIST_CSUM_UDP | IF_HWASSIST_CSUM_TCP)
6395 #define NEEDED_CSUM_IPV6 (IF_HWASSIST_CSUM_UDPIPV6 | IF_HWASSIST_CSUM_TCPIPV6)
6396
6397 static bool
6398 interface_supports_hw_checksum(ifnet_t ifp, bool is_ipv4)
6399 {
6400 uint32_t hwcap = IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
6401 uint32_t needed = is_ipv4 ? NEEDED_CSUM_IPV4 : NEEDED_CSUM_IPV6;
6402 bool supports;
6403
6404 supports = (hwcap & needed) == needed;
6405 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM, "%s: does %ssupport checksum",
6406 ifp->if_xname, supports ? "" : "not ");
6407 return supports;
6408 }
6409
6410 static void
6411 bridge_forward_list(struct bridge_softc *sc, struct bridge_iflist * sbif,
6412 ifnet_t dst_if, ether_type_flag_t etypef, mbuf_t m)
6413 {
6414 bool checksum_ok = false;
6415 ChecksumOperation cksum_op;
6416 ifnet_t bridge_ifp;
6417 struct bridge_iflist * dbif;
6418 uint32_t sc_filter_flags;
6419 ifnet_t src_if;
6420
6421 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6422 goto drop;
6423 }
6424 dbif = bridge_lookup_member_if(sc, dst_if);
6425 if (dbif == NULL) {
6426 /* Not a member of the bridge (anymore?) */
6427 goto drop;
6428 }
6429
6430 /* Private segments can not talk to each other */
6431 if ((sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE) != 0) {
6432 goto drop;
6433 }
6434 bridge_ifp = sc->sc_ifp;
6435 src_if = sbif->bif_ifp;
6436 cksum_op = CHECKSUM_OPERATION_CLEAR_OFFLOAD;
6437 if (ether_type_flag_is_ip(etypef) && bif_uses_virtio(sbif)) {
6438 bool is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
6439
6440 if (dbif == sc->sc_mac_nat_bif ||
6441 (IFNET_IS_VMNET(dst_if) && !bif_uses_virtio(dbif)) ||
6442 !interface_supports_hw_checksum(dst_if, is_ipv4)) {
6443 /* compute checksums now if necessary */
6444 m = bridge_checksum_offload_list(bridge_ifp, sbif,
6445 m, is_ipv4);
6446 checksum_ok = true;
6447 } else {
6448 cksum_op = CHECKSUM_OPERATION_NONE;
6449 }
6450 }
6451
6452 if (dbif == sc->sc_mac_nat_bif) {
6453 /* translate the packets before forwarding them */
6454 if ((etypef & ETHER_TYPE_FLAG_IP_ARP) != 0) {
6455 m = bridge_mac_nat_translate_list(sc, sbif, dst_if, m);
6456 }
6457 } else if (!checksum_ok && ether_type_flag_is_ip(etypef) &&
6458 bif_has_checksum_offload(dbif) && !bif_has_checksum_offload(sbif)) {
6459 bool is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
6460
6461 /*
6462 * If the destination interface has checksum offload enabled,
6463 * verify the checksum now, unless the source interface also has
6464 * checksum offload enabled. The checksum in that case has
6465 * already just been computed and verifying it is unnecessary.
6466 */
6467 m = bridge_verify_checksum_list(bridge_ifp, dbif, m, is_ipv4);
6468 }
6469 sc_filter_flags = sc->sc_filter_flags;
6470 BRIDGE_UNLOCK(sc);
6471 if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6472 m = bridge_pf_list(m, dst_if, sc_filter_flags, false);
6473 }
6474
6475 /*
6476 * We're forwarding inbound packets for which the checksums must
6477 * already have been computed and if required, verified, or
6478 * packets from a virtio-enabled interface for which we rely
6479 * on the packet containing appropriate offload flags.
6480 */
6481 if (m != NULL) {
6482 bridge_enqueue(bridge_ifp, src_if, dst_if, etypef, m,
6483 cksum_op);
6484 }
6485 return;
6486
6487 drop:
6488 BRIDGE_UNLOCK(sc);
6489 m_freem_list(m);
6490 return;
6491 }
6492
6493 /*
6494 * bridge_span:
6495 *
6496 * Duplicate a packet out one or more interfaces that are in span mode,
6497 * the original mbuf is unmodified.
6498 */
6499 static void
6500 bridge_span(struct bridge_softc *sc, ether_type_flag_t etypef, struct mbuf *m)
6501 {
6502 struct bridge_iflist *bif;
6503 struct ifnet *dst_if;
6504 struct mbuf *mc;
6505
6506 if (TAILQ_EMPTY(&sc->sc_spanlist)) {
6507 return;
6508 }
6509
6510 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
6511 dst_if = bif->bif_ifp;
6512
6513 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6514 continue;
6515 }
6516
6517 mc = m_copypacket(m, M_DONTWAIT);
6518 if (mc == NULL) {
6519 (void) ifnet_stat_increment_out(sc->sc_ifp, 0, 0, 1);
6520 continue;
6521 }
6522
6523 (void) bridge_enqueue(sc->sc_ifp, NULL, dst_if, etypef, mc,
6524 CHECKSUM_OPERATION_NONE);
6525 }
6526 }
6527
6528 /*
6529 * bridge_rtupdate:
6530 *
6531 * Add a bridge routing entry.
6532 */
6533 static int
6534 bridge_rtupdate(struct bridge_softc *sc, const uint8_t dst[ETHER_ADDR_LEN], uint16_t vlan,
6535 struct bridge_iflist *bif, int setflags, uint8_t flags)
6536 {
6537 struct bridge_rtnode *brt;
6538 int error;
6539
6540 BRIDGE_LOCK_ASSERT_HELD(sc);
6541
6542 /* Check the source address is valid and not multicast. */
6543 if (ETHER_IS_MULTICAST(dst) ||
6544 (dst[0] == 0 && dst[1] == 0 && dst[2] == 0 &&
6545 dst[3] == 0 && dst[4] == 0 && dst[5] == 0) != 0) {
6546 return EINVAL;
6547 }
6548
6549 /* 802.1p frames map to vlan 1 */
6550 if (vlan == 0) {
6551 vlan = 1;
6552 }
6553
6554 /*
6555 * A route for this destination might already exist. If so,
6556 * update it, otherwise create a new one.
6557 */
6558 if ((brt = bridge_rtnode_lookup(sc, dst, vlan)) == NULL) {
6559 if (sc->sc_brtcnt >= sc->sc_brtmax) {
6560 sc->sc_brtexceeded++;
6561 return ENOSPC;
6562 }
6563 /* Check per interface address limits (if enabled) */
6564 if (bif->bif_addrmax && bif->bif_addrcnt >= bif->bif_addrmax) {
6565 bif->bif_addrexceeded++;
6566 return ENOSPC;
6567 }
6568
6569 /*
6570 * Allocate a new bridge forwarding node, and
6571 * initialize the expiration time and Ethernet
6572 * address.
6573 */
6574 brt = zalloc_noblock(bridge_rtnode_pool);
6575 if (brt == NULL) {
6576 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6577 "zalloc_nolock failed");
6578 return ENOMEM;
6579 }
6580 bzero(brt, sizeof(struct bridge_rtnode));
6581
6582 if (bif->bif_ifflags & IFBIF_STICKY) {
6583 brt->brt_flags = IFBAF_STICKY;
6584 } else {
6585 brt->brt_flags = IFBAF_DYNAMIC;
6586 }
6587
6588 memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
6589 brt->brt_vlan = vlan;
6590
6591 if ((error = bridge_rtnode_insert(sc, brt)) != 0) {
6592 zfree(bridge_rtnode_pool, brt);
6593 return error;
6594 }
6595 brt->brt_dst = bif;
6596 bif->bif_addrcnt++;
6597 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6598 "added %02x:%02x:%02x:%02x:%02x:%02x "
6599 "on %s count %u hashsize %u",
6600 dst[0], dst[1], dst[2], dst[3], dst[4], dst[5],
6601 sc->sc_ifp->if_xname, sc->sc_brtcnt,
6602 sc->sc_rthash_size);
6603 }
6604
6605 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
6606 brt->brt_dst != bif) {
6607 brt->brt_dst->bif_addrcnt--;
6608 brt->brt_dst = bif;
6609 brt->brt_dst->bif_addrcnt++;
6610 }
6611
6612 if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6613 unsigned long now;
6614
6615 now = (unsigned long) net_uptime();
6616 brt->brt_expire = now + sc->sc_brttimeout;
6617 }
6618 if (setflags) {
6619 brt->brt_flags = flags;
6620 }
6621
6622 return 0;
6623 }
6624
6625 /*
6626 * bridge_rtlookup:
6627 *
6628 * Lookup the destination interface for an address.
6629 */
6630 static struct bridge_iflist *
6631 bridge_rtlookup_bif(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN],
6632 uint16_t vlan)
6633 {
6634 struct bridge_rtnode *brt;
6635
6636 BRIDGE_LOCK_ASSERT_HELD(sc);
6637
6638 if ((brt = bridge_rtnode_lookup(sc, addr, vlan)) == NULL) {
6639 return NULL;
6640 }
6641
6642 return brt->brt_dst;
6643 }
6644
6645 /*
6646 * bridge_rttrim:
6647 *
6648 * Trim the routine table so that we have a number
6649 * of routing entries less than or equal to the
6650 * maximum number.
6651 */
6652 static void
6653 bridge_rttrim(struct bridge_softc *sc)
6654 {
6655 struct bridge_rtnode *brt, *nbrt;
6656
6657 BRIDGE_LOCK_ASSERT_HELD(sc);
6658
6659 /* Make sure we actually need to do this. */
6660 if (sc->sc_brtcnt <= sc->sc_brtmax) {
6661 return;
6662 }
6663
6664 /* Force an aging cycle; this might trim enough addresses. */
6665 bridge_rtage(sc);
6666 if (sc->sc_brtcnt <= sc->sc_brtmax) {
6667 return;
6668 }
6669
6670 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6671 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6672 bridge_rtnode_destroy(sc, brt);
6673 if (sc->sc_brtcnt <= sc->sc_brtmax) {
6674 return;
6675 }
6676 }
6677 }
6678 }
6679
6680 /*
6681 * bridge_aging_timer:
6682 *
6683 * Aging periodic timer for the bridge routing table.
6684 */
6685 static void
6686 bridge_aging_timer(struct bridge_softc *sc)
6687 {
6688 BRIDGE_LOCK_ASSERT_HELD(sc);
6689
6690 bridge_rtage(sc);
6691 if ((sc->sc_ifp->if_flags & IFF_RUNNING) &&
6692 (sc->sc_flags & SCF_DETACHING) == 0) {
6693 sc->sc_aging_timer.bdc_sc = sc;
6694 sc->sc_aging_timer.bdc_func = bridge_aging_timer;
6695 sc->sc_aging_timer.bdc_ts.tv_sec = bridge_rtable_prune_period;
6696 bridge_schedule_delayed_call(&sc->sc_aging_timer);
6697 }
6698 }
6699
6700 /*
6701 * bridge_rtage:
6702 *
6703 * Perform an aging cycle.
6704 */
6705 static void
6706 bridge_rtage(struct bridge_softc *sc)
6707 {
6708 struct bridge_rtnode *brt, *nbrt;
6709 unsigned long now;
6710
6711 BRIDGE_LOCK_ASSERT_HELD(sc);
6712
6713 now = (unsigned long) net_uptime();
6714
6715 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6716 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6717 if (now >= brt->brt_expire) {
6718 bridge_rtnode_destroy(sc, brt);
6719 }
6720 }
6721 }
6722 if (sc->sc_mac_nat_bif != NULL) {
6723 bridge_mac_nat_age_entries(sc, now);
6724 }
6725 }
6726
6727 /*
6728 * bridge_rtflush:
6729 *
6730 * Remove all dynamic addresses from the bridge.
6731 */
6732 static void
6733 bridge_rtflush(struct bridge_softc *sc, int full)
6734 {
6735 struct bridge_rtnode *brt, *nbrt;
6736
6737 BRIDGE_LOCK_ASSERT_HELD(sc);
6738
6739 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6740 if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6741 bridge_rtnode_destroy(sc, brt);
6742 }
6743 }
6744 }
6745
6746 /*
6747 * bridge_rtdaddr:
6748 *
6749 * Remove an address from the table.
6750 */
6751 static int
6752 bridge_rtdaddr(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN], uint16_t vlan)
6753 {
6754 struct bridge_rtnode *brt;
6755 int found = 0;
6756
6757 BRIDGE_LOCK_ASSERT_HELD(sc);
6758
6759 /*
6760 * If vlan is zero then we want to delete for all vlans so the lookup
6761 * may return more than one.
6762 */
6763 while ((brt = bridge_rtnode_lookup(sc, addr, vlan)) != NULL) {
6764 bridge_rtnode_destroy(sc, brt);
6765 found = 1;
6766 }
6767
6768 return found ? 0 : ENOENT;
6769 }
6770
6771 /*
6772 * bridge_rtdelete:
6773 *
6774 * Delete routes to a specific member interface.
6775 */
6776 static void
6777 bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int full)
6778 {
6779 struct bridge_rtnode *brt, *nbrt;
6780
6781 BRIDGE_LOCK_ASSERT_HELD(sc);
6782
6783 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6784 if (brt->brt_ifp == ifp && (full ||
6785 (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)) {
6786 bridge_rtnode_destroy(sc, brt);
6787 }
6788 }
6789 }
6790
6791 /*
6792 * bridge_rtable_init:
6793 *
6794 * Initialize the route table for this bridge.
6795 */
6796 static int
6797 bridge_rtable_init(struct bridge_softc *sc)
6798 {
6799 u_int32_t i;
6800
6801 sc->sc_rthash = kalloc_type(struct _bridge_rtnode_list,
6802 BRIDGE_RTHASH_SIZE, Z_WAITOK_ZERO_NOFAIL);
6803 sc->sc_rthash_size = BRIDGE_RTHASH_SIZE;
6804
6805 for (i = 0; i < sc->sc_rthash_size; i++) {
6806 LIST_INIT(&sc->sc_rthash[i]);
6807 }
6808
6809 sc->sc_rthash_key = RandomULong();
6810
6811 LIST_INIT(&sc->sc_rtlist);
6812
6813 return 0;
6814 }
6815
6816 /*
6817 * bridge_rthash_delayed_resize:
6818 *
6819 * Resize the routing table hash on a delayed thread call.
6820 */
6821 static void
6822 bridge_rthash_delayed_resize(struct bridge_softc *sc)
6823 {
6824 u_int32_t new_rthash_size = 0;
6825 u_int32_t old_rthash_size = 0;
6826 struct _bridge_rtnode_list *new_rthash = NULL;
6827 struct _bridge_rtnode_list *old_rthash = NULL;
6828 u_int32_t i;
6829 struct bridge_rtnode *brt;
6830 int error = 0;
6831
6832 BRIDGE_LOCK_ASSERT_HELD(sc);
6833
6834 /*
6835 * Four entries per hash bucket is our ideal load factor
6836 */
6837 if (sc->sc_brtcnt < sc->sc_rthash_size * 4) {
6838 goto out;
6839 }
6840
6841 /*
6842 * Doubling the number of hash buckets may be too simplistic
6843 * especially when facing a spike of new entries
6844 */
6845 new_rthash_size = sc->sc_rthash_size * 2;
6846
6847 sc->sc_flags |= SCF_RESIZING;
6848 BRIDGE_UNLOCK(sc);
6849
6850 new_rthash = kalloc_type(struct _bridge_rtnode_list, new_rthash_size,
6851 Z_WAITOK | Z_ZERO);
6852
6853 BRIDGE_LOCK(sc);
6854 sc->sc_flags &= ~SCF_RESIZING;
6855
6856 if (new_rthash == NULL) {
6857 error = ENOMEM;
6858 goto out;
6859 }
6860 if ((sc->sc_flags & SCF_DETACHING)) {
6861 error = ENODEV;
6862 goto out;
6863 }
6864 /*
6865 * Fail safe from here on
6866 */
6867 old_rthash = sc->sc_rthash;
6868 old_rthash_size = sc->sc_rthash_size;
6869 sc->sc_rthash = new_rthash;
6870 sc->sc_rthash_size = new_rthash_size;
6871
6872 /*
6873 * Get a new key to force entries to be shuffled around to reduce
6874 * the likelihood they will land in the same buckets
6875 */
6876 sc->sc_rthash_key = RandomULong();
6877
6878 for (i = 0; i < sc->sc_rthash_size; i++) {
6879 LIST_INIT(&sc->sc_rthash[i]);
6880 }
6881
6882 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
6883 LIST_REMOVE(brt, brt_hash);
6884 (void) bridge_rtnode_hash(sc, brt);
6885 }
6886 out:
6887 if (error == 0) {
6888 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6889 "%s new size %u",
6890 sc->sc_ifp->if_xname, sc->sc_rthash_size);
6891 kfree_type(struct _bridge_rtnode_list, old_rthash_size, old_rthash);
6892 } else {
6893 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_RT_TABLE,
6894 "%s failed %d", sc->sc_ifp->if_xname, error);
6895 kfree_type(struct _bridge_rtnode_list, new_rthash_size, new_rthash);
6896 }
6897 }
6898
6899 /*
6900 * Resize the number of hash buckets based on the load factor
6901 * Currently only grow
6902 * Failing to resize the hash table is not fatal
6903 */
6904 static void
6905 bridge_rthash_resize(struct bridge_softc *sc)
6906 {
6907 BRIDGE_LOCK_ASSERT_HELD(sc);
6908
6909 if ((sc->sc_flags & SCF_DETACHING) || (sc->sc_flags & SCF_RESIZING)) {
6910 return;
6911 }
6912
6913 /*
6914 * Four entries per hash bucket is our ideal load factor
6915 */
6916 if (sc->sc_brtcnt < sc->sc_rthash_size * 4) {
6917 return;
6918 }
6919 /*
6920 * Hard limit on the size of the routing hash table
6921 */
6922 if (sc->sc_rthash_size >= bridge_rtable_hash_size_max) {
6923 return;
6924 }
6925
6926 sc->sc_resize_call.bdc_sc = sc;
6927 sc->sc_resize_call.bdc_func = bridge_rthash_delayed_resize;
6928 bridge_schedule_delayed_call(&sc->sc_resize_call);
6929 }
6930
6931 /*
6932 * bridge_rtable_fini:
6933 *
6934 * Deconstruct the route table for this bridge.
6935 */
6936 static void
6937 bridge_rtable_fini(struct bridge_softc *sc)
6938 {
6939 KASSERT(sc->sc_brtcnt == 0,
6940 ("%s: %d bridge routes referenced", __func__, sc->sc_brtcnt));
6941 kfree_type_counted_by(struct _bridge_rtnode_list, sc->sc_rthash_size,
6942 sc->sc_rthash);
6943 sc->sc_rthash = NULL;
6944 sc->sc_rthash_size = 0;
6945 }
6946
6947 /*
6948 * The following hash function is adapted from "Hash Functions" by Bob Jenkins
6949 * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
6950 */
6951 #define mix(a, b, c) \
6952 do { \
6953 a -= b; a -= c; a ^= (c >> 13); \
6954 b -= c; b -= a; b ^= (a << 8); \
6955 c -= a; c -= b; c ^= (b >> 13); \
6956 a -= b; a -= c; a ^= (c >> 12); \
6957 b -= c; b -= a; b ^= (a << 16); \
6958 c -= a; c -= b; c ^= (b >> 5); \
6959 a -= b; a -= c; a ^= (c >> 3); \
6960 b -= c; b -= a; b ^= (a << 10); \
6961 c -= a; c -= b; c ^= (b >> 15); \
6962 } while ( /*CONSTCOND*/ 0)
6963
6964 static __inline uint32_t
6965 bridge_rthash(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN])
6966 {
6967 uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
6968
6969 b += addr[5] << 8;
6970 b += addr[4];
6971 a += addr[3] << 24;
6972 a += addr[2] << 16;
6973 a += addr[1] << 8;
6974 a += addr[0];
6975
6976 mix(a, b, c);
6977
6978 return c & BRIDGE_RTHASH_MASK(sc);
6979 }
6980
6981 #undef mix
6982
6983 static int
6984 bridge_rtnode_addr_cmp(const uint8_t a[ETHER_ADDR_LEN], const uint8_t b[ETHER_ADDR_LEN])
6985 {
6986 int i, d;
6987
6988 for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) {
6989 d = ((int)a[i]) - ((int)b[i]);
6990 }
6991
6992 return d;
6993 }
6994
6995 /*
6996 * bridge_rtnode_lookup:
6997 *
6998 * Look up a bridge route node for the specified destination. Compare the
6999 * vlan id or if zero then just return the first match.
7000 */
7001 static struct bridge_rtnode *
7002 bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN],
7003 uint16_t vlan)
7004 {
7005 struct bridge_rtnode *brt;
7006 uint32_t hash;
7007 int dir;
7008
7009 BRIDGE_LOCK_ASSERT_HELD(sc);
7010
7011 hash = bridge_rthash(sc, addr);
7012 LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) {
7013 dir = bridge_rtnode_addr_cmp(addr, brt->brt_addr);
7014 if (dir == 0 && (brt->brt_vlan == vlan || vlan == 0)) {
7015 return brt;
7016 }
7017 if (dir > 0) {
7018 return NULL;
7019 }
7020 }
7021
7022 return NULL;
7023 }
7024
7025 /*
7026 * bridge_rtnode_hash:
7027 *
7028 * Insert the specified bridge node into the route hash table.
7029 * This is used when adding a new node or to rehash when resizing
7030 * the hash table
7031 */
7032 static int
7033 bridge_rtnode_hash(struct bridge_softc *sc, struct bridge_rtnode *brt)
7034 {
7035 struct bridge_rtnode *lbrt;
7036 uint32_t hash;
7037 int dir;
7038
7039 BRIDGE_LOCK_ASSERT_HELD(sc);
7040
7041 hash = bridge_rthash(sc, brt->brt_addr);
7042
7043 lbrt = LIST_FIRST(&sc->sc_rthash[hash]);
7044 if (lbrt == NULL) {
7045 LIST_INSERT_HEAD(&sc->sc_rthash[hash], brt, brt_hash);
7046 goto out;
7047 }
7048
7049 do {
7050 dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr);
7051 if (dir == 0 && brt->brt_vlan == lbrt->brt_vlan) {
7052 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
7053 "%s EEXIST %02x:%02x:%02x:%02x:%02x:%02x",
7054 sc->sc_ifp->if_xname,
7055 brt->brt_addr[0], brt->brt_addr[1],
7056 brt->brt_addr[2], brt->brt_addr[3],
7057 brt->brt_addr[4], brt->brt_addr[5]);
7058 return EEXIST;
7059 }
7060 if (dir > 0) {
7061 LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
7062 goto out;
7063 }
7064 if (LIST_NEXT(lbrt, brt_hash) == NULL) {
7065 LIST_INSERT_AFTER(lbrt, brt, brt_hash);
7066 goto out;
7067 }
7068 lbrt = LIST_NEXT(lbrt, brt_hash);
7069 } while (lbrt != NULL);
7070
7071 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
7072 "%s impossible %02x:%02x:%02x:%02x:%02x:%02x",
7073 sc->sc_ifp->if_xname,
7074 brt->brt_addr[0], brt->brt_addr[1], brt->brt_addr[2],
7075 brt->brt_addr[3], brt->brt_addr[4], brt->brt_addr[5]);
7076 out:
7077 return 0;
7078 }
7079
7080 /*
7081 * bridge_rtnode_insert:
7082 *
7083 * Insert the specified bridge node into the route table. We
7084 * assume the entry is not already in the table.
7085 */
7086 static int
7087 bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
7088 {
7089 int error;
7090
7091 error = bridge_rtnode_hash(sc, brt);
7092 if (error != 0) {
7093 return error;
7094 }
7095
7096 LIST_INSERT_HEAD(&sc->sc_rtlist, brt, brt_list);
7097 sc->sc_brtcnt++;
7098
7099 bridge_rthash_resize(sc);
7100
7101 return 0;
7102 }
7103
7104 /*
7105 * bridge_rtnode_destroy:
7106 *
7107 * Destroy a bridge rtnode.
7108 */
7109 static void
7110 bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
7111 {
7112 BRIDGE_LOCK_ASSERT_HELD(sc);
7113
7114 LIST_REMOVE(brt, brt_hash);
7115
7116 LIST_REMOVE(brt, brt_list);
7117 sc->sc_brtcnt--;
7118 brt->brt_dst->bif_addrcnt--;
7119 zfree(bridge_rtnode_pool, brt);
7120 }
7121
7122 #if BRIDGESTP
7123 /*
7124 * bridge_rtable_expire:
7125 *
7126 * Set the expiry time for all routes on an interface.
7127 */
7128 static void
7129 bridge_rtable_expire(struct ifnet *ifp, int age)
7130 {
7131 struct bridge_softc *sc = ifp->if_bridge;
7132 struct bridge_rtnode *brt;
7133
7134 BRIDGE_LOCK(sc);
7135
7136 /*
7137 * If the age is zero then flush, otherwise set all the expiry times to
7138 * age for the interface
7139 */
7140 if (age == 0) {
7141 bridge_rtdelete(sc, ifp, IFBF_FLUSHDYN);
7142 } else {
7143 unsigned long now;
7144
7145 now = (unsigned long) net_uptime();
7146
7147 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
7148 /* Cap the expiry time to 'age' */
7149 if (brt->brt_ifp == ifp &&
7150 brt->brt_expire > now + age &&
7151 (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
7152 brt->brt_expire = now + age;
7153 }
7154 }
7155 }
7156 BRIDGE_UNLOCK(sc);
7157 }
7158
7159 /*
7160 * bridge_state_change:
7161 *
7162 * Callback from the bridgestp code when a port changes states.
7163 */
7164 static void
7165 bridge_state_change(struct ifnet *ifp, int state)
7166 {
7167 struct bridge_softc *sc = ifp->if_bridge;
7168 static const char *stpstates[] = {
7169 "disabled",
7170 "listening",
7171 "learning",
7172 "forwarding",
7173 "blocking",
7174 "discarding"
7175 };
7176
7177 if (log_stp) {
7178 log(LOG_NOTICE, "%s: state changed to %s on %s",
7179 sc->sc_ifp->if_xname,
7180 stpstates[state], ifp->if_xname);
7181 }
7182 }
7183 #endif /* BRIDGESTP */
7184
7185 /*
7186 * bridge_detach:
7187 *
7188 * Callback when interface has been detached.
7189 */
7190 static void
7191 bridge_detach(ifnet_t ifp)
7192 {
7193 struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7194
7195 #if BRIDGESTP
7196 bstp_detach(&sc->sc_stp);
7197 #endif /* BRIDGESTP */
7198
7199 /* Tear down the routing table. */
7200 bridge_rtable_fini(sc);
7201
7202 lck_mtx_lock(&bridge_list_mtx);
7203 LIST_REMOVE(sc, sc_list);
7204 lck_mtx_unlock(&bridge_list_mtx);
7205
7206 ifnet_release(ifp);
7207
7208 lck_mtx_destroy(&sc->sc_mtx, &bridge_lock_grp);
7209 kfree_type(struct bridge_softc, sc);
7210 }
7211
7212 /*
7213 * bridge_link_event:
7214 *
7215 * Report a data link event on an interface
7216 */
7217 static void
7218 bridge_link_event(struct ifnet *ifp, u_int32_t event_code)
7219 {
7220 struct event {
7221 u_int32_t ifnet_family;
7222 u_int32_t unit;
7223 char if_name[IFNAMSIZ];
7224 };
7225 _Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
7226 struct kern_event_msg *header = (struct kern_event_msg*)message;
7227 struct event *data = (struct event *)(message + KEV_MSG_HEADER_SIZE);
7228
7229 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
7230 "%s event_code %u - %s", ifp->if_xname,
7231 event_code, dlil_kev_dl_code_str(event_code));
7232 header->total_size = sizeof(message);
7233 header->vendor_code = KEV_VENDOR_APPLE;
7234 header->kev_class = KEV_NETWORK_CLASS;
7235 header->kev_subclass = KEV_DL_SUBCLASS;
7236 header->event_code = event_code;
7237 data->ifnet_family = ifnet_family(ifp);
7238 data->unit = (u_int32_t)ifnet_unit(ifp);
7239 strlcpy(data->if_name, ifnet_name(ifp), IFNAMSIZ);
7240 ifnet_event(ifp, header);
7241 }
7242
7243 #define BRIDGE_HF_DROP(reason, func, line) { \
7244 bridge_hostfilter_stats.reason++; \
7245 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_HOSTFILTER, \
7246 "%s.%d" #reason, func, line); \
7247 error = EINVAL; \
7248 }
7249
7250 static int
7251 bridge_host_filter_arp(struct bridge_iflist *bif, mbuf_t *data)
7252 {
7253 struct ether_arp *ea;
7254 struct ether_header *eh;
7255 int error = EINVAL;
7256 mbuf_t m = *data;
7257 size_t minlen = sizeof(struct ether_header) + sizeof(struct ether_arp);
7258
7259 /*
7260 * Make the Ethernet and ARP headers contiguous
7261 */
7262 if (mbuf_pkthdr_len(m) < minlen) {
7263 BRIDGE_HF_DROP(brhf_arp_too_small, __func__, __LINE__);
7264 goto done;
7265 }
7266 if (mbuf_len(m) < minlen && mbuf_pullup(data, minlen) != 0) {
7267 BRIDGE_HF_DROP(brhf_arp_pullup_failed,
7268 __func__, __LINE__);
7269 goto done;
7270 }
7271 m = *data;
7272
7273 /*
7274 * Restrict Ethernet protocols to ARP and IP/IPv6
7275 */
7276 eh = mtod(m, struct ether_header *);
7277 ea = (struct ether_arp *)(eh + 1);
7278 if (ea->arp_hrd != HTONS_ARPHRD_ETHER) {
7279 BRIDGE_HF_DROP(brhf_arp_bad_hw_type,
7280 __func__, __LINE__);
7281 goto done;
7282 }
7283 if (ea->arp_pro != HTONS_ETHERTYPE_IP) {
7284 BRIDGE_HF_DROP(brhf_arp_bad_pro_type,
7285 __func__, __LINE__);
7286 goto done;
7287 }
7288 /*
7289 * Verify the address lengths are correct
7290 */
7291 if (ea->arp_hln != ETHER_ADDR_LEN) {
7292 BRIDGE_HF_DROP(brhf_arp_bad_hw_len, __func__, __LINE__);
7293 goto done;
7294 }
7295 if (ea->arp_pln != sizeof(struct in_addr)) {
7296 BRIDGE_HF_DROP(brhf_arp_bad_pro_len,
7297 __func__, __LINE__);
7298 goto done;
7299 }
7300 /*
7301 * Allow only ARP request or ARP reply
7302 */
7303 if (ea->arp_op != HTONS_ARPOP_REQUEST &&
7304 ea->arp_op != HTONS_ARPOP_REPLY) {
7305 BRIDGE_HF_DROP(brhf_arp_bad_op, __func__, __LINE__);
7306 goto done;
7307 }
7308 if ((bif->bif_flags & BIFF_HF_HWSRC) != 0) {
7309 /*
7310 * Verify source hardware address matches
7311 */
7312 if (bcmp(ea->arp_sha, bif->bif_hf_hwsrc,
7313 ETHER_ADDR_LEN) != 0) {
7314 BRIDGE_HF_DROP(brhf_arp_bad_sha, __func__, __LINE__);
7315 goto done;
7316 }
7317 }
7318 if ((bif->bif_flags & BIFF_HF_IPSRC) != 0) {
7319 /*
7320 * Verify source protocol address:
7321 * May be null for an ARP probe
7322 */
7323 if (bcmp(ea->arp_spa, &bif->bif_hf_ipsrc.s_addr,
7324 sizeof(struct in_addr)) != 0 &&
7325 bcmp(ea->arp_spa, &inaddr_any,
7326 sizeof(struct in_addr)) != 0) {
7327 BRIDGE_HF_DROP(brhf_arp_bad_spa, __func__, __LINE__);
7328 goto done;
7329 }
7330 }
7331 bridge_hostfilter_stats.brhf_arp_ok += 1;
7332 error = 0;
7333 done:
7334 return error;
7335 }
7336
7337 /*
7338 * MAC NAT
7339 */
7340
7341 static errno_t
7342 bridge_mac_nat_enable(struct bridge_softc *sc, struct bridge_iflist *bif)
7343 {
7344 errno_t error = 0;
7345
7346 BRIDGE_LOCK_ASSERT_HELD(sc);
7347
7348 if (IFNET_IS_VMNET(bif->bif_ifp)) {
7349 error = EINVAL;
7350 goto done;
7351 }
7352 if (sc->sc_mac_nat_bif != NULL) {
7353 if (sc->sc_mac_nat_bif != bif) {
7354 error = EBUSY;
7355 }
7356 goto done;
7357 }
7358 sc->sc_mac_nat_bif = bif;
7359 bif->bif_ifflags |= IFBIF_MAC_NAT;
7360 bridge_mac_nat_populate_entries(sc);
7361
7362 done:
7363 return error;
7364 }
7365
7366 static void
7367 bridge_mac_nat_disable(struct bridge_softc *sc)
7368 {
7369 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7370
7371 assert(mac_nat_bif != NULL);
7372 bridge_mac_nat_flush_entries(sc, mac_nat_bif);
7373 mac_nat_bif->bif_ifflags &= ~IFBIF_MAC_NAT;
7374 sc->sc_mac_nat_bif = NULL;
7375 return;
7376 }
7377
7378 static void
7379 mac_nat_entry_print2(struct mac_nat_entry *mne,
7380 const char ifname[IFNAMSIZ], const char *msg1, const char *msg2)
7381 {
7382 int af;
7383 char etopbuf[24];
7384 char ntopbuf[MAX_IPv6_STR_LEN];
7385 const char *space;
7386
7387 af = ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) ? AF_INET6 : AF_INET;
7388 ether_ntop(etopbuf, sizeof(etopbuf), mne->mne_mac);
7389 (void)inet_ntop(af, &mne->mne_u, ntopbuf, sizeof(ntopbuf));
7390 if (msg2 == NULL) {
7391 msg2 = "";
7392 space = "";
7393 } else {
7394 space = " ";
7395 }
7396 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7397 "%.*s %s%s%s %p (%s, %s, %s)", IFNAMSIZ, ifname, msg1, space, msg2, mne,
7398 mne->mne_bif->bif_ifp->if_xname, ntopbuf, etopbuf);
7399 }
7400
7401 static void
7402 mac_nat_entry_print(struct mac_nat_entry *mne,
7403 const char ifname[IFNAMSIZ], const char *msg)
7404 {
7405 mac_nat_entry_print2(mne, ifname, msg, NULL);
7406 }
7407
7408 static struct mac_nat_entry *
7409 bridge_lookup_mac_nat_entry_ipv4(const struct bridge_softc *sc, const struct in_addr *ip)
7410 {
7411 struct mac_nat_entry *mne;
7412 struct mac_nat_entry *ret_mne = NULL;
7413
7414 LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
7415 if (mne->mne_ip.s_addr == ip->s_addr) {
7416 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7417 mac_nat_entry_print(mne, sc->sc_if_xname,
7418 "found");
7419 }
7420 ret_mne = mne;
7421 break;
7422 }
7423 }
7424
7425 return ret_mne;
7426 }
7427
7428 static struct mac_nat_entry *
7429 bridge_lookup_mac_nat_entry_ipv6(const struct bridge_softc *sc, const struct in6_addr *ip6)
7430 {
7431 struct mac_nat_entry *mne;
7432 struct mac_nat_entry *ret_mne = NULL;
7433
7434 LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
7435 if (IN6_ARE_ADDR_EQUAL(&mne->mne_ip6, ip6)) {
7436 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7437 mac_nat_entry_print(mne, sc->sc_if_xname,
7438 "found");
7439 }
7440 ret_mne = mne;
7441 break;
7442 }
7443 }
7444
7445 return ret_mne;
7446 }
7447
7448 static void
7449 bridge_destroy_mac_nat_entry(struct bridge_softc *sc,
7450 struct mac_nat_entry *mne, const char *reason)
7451 {
7452 LIST_REMOVE(mne, mne_list);
7453 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7454 mac_nat_entry_print(mne, sc->sc_if_xname, reason);
7455 }
7456 zfree(bridge_mne_pool, mne);
7457 sc->sc_mne_count--;
7458 }
7459
7460 static struct mac_nat_entry *
7461 bridge_create_mac_nat_entry_common(struct bridge_softc *sc,
7462 struct bridge_iflist *bif, const char eaddr[ETHER_ADDR_LEN])
7463 {
7464 struct mac_nat_entry *mne;
7465
7466 if (sc->sc_mne_count >= sc->sc_mne_max) {
7467 sc->sc_mne_allocation_failures++;
7468 return NULL;
7469 }
7470
7471 mne = zalloc_noblock(bridge_mne_pool);
7472 if (mne == NULL) {
7473 sc->sc_mne_allocation_failures++;
7474 return NULL;
7475 }
7476
7477 sc->sc_mne_count++;
7478 bzero(mne, sizeof(*mne));
7479 bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
7480
7481 mne->mne_bif = bif;
7482 mne->mne_expire = (unsigned long)net_uptime() + sc->sc_brttimeout;
7483
7484 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7485 mac_nat_entry_print(mne, sc->sc_if_xname, "created");
7486 }
7487
7488 return mne;
7489 }
7490
7491 static struct mac_nat_entry *
7492 bridge_create_mac_nat_entry_ipv4(struct bridge_softc *sc,
7493 struct bridge_iflist *bif, const struct in_addr *ip, const char eaddr[ETHER_ADDR_LEN])
7494 {
7495 struct mac_nat_entry *mne;
7496
7497 mne = bridge_create_mac_nat_entry_common(sc, bif, eaddr);
7498 if (mne == NULL) {
7499 return NULL;
7500 }
7501
7502 bcopy(ip, &mne->mne_ip, sizeof(mne->mne_ip));
7503 LIST_INSERT_HEAD(&sc->sc_mne_list, mne, mne_list);
7504
7505 return mne;
7506 }
7507
7508 static struct mac_nat_entry *
7509 bridge_create_mac_nat_entry_ipv6(struct bridge_softc *sc,
7510 struct bridge_iflist *bif, const struct in6_addr *ip6, const char eaddr[ETHER_ADDR_LEN])
7511 {
7512 struct mac_nat_entry *mne;
7513
7514 mne = bridge_create_mac_nat_entry_common(sc, bif, eaddr);
7515 if (mne == NULL) {
7516 return NULL;
7517 }
7518
7519 bcopy(ip6, &mne->mne_ip6, sizeof(mne->mne_ip6));
7520 mne->mne_flags |= MNE_FLAGS_IPV6;
7521 LIST_INSERT_HEAD(&sc->sc_mne_list_v6, mne, mne_list);
7522
7523 return mne;
7524 }
7525
7526 static struct mac_nat_entry *
7527 bridge_update_mac_nat_entry_common(struct bridge_softc *sc, struct bridge_iflist *bif,
7528 struct mac_nat_entry *mne, const char eaddr[ETHER_ADDR_LEN])
7529 {
7530 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7531
7532 if (mne->mne_bif == mac_nat_bif) {
7533 /* the MAC NAT interface takes precedence */
7534 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7535 if (mne->mne_bif != bif) {
7536 mac_nat_entry_print2(mne,
7537 sc->sc_if_xname, "reject",
7538 bif->bif_ifp->if_xname);
7539 }
7540 }
7541 } else if (mne->mne_bif != bif) {
7542 const char *__null_terminated old_if = mne->mne_bif->bif_ifp->if_xname;
7543
7544 mne->mne_bif = bif;
7545 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7546 mac_nat_entry_print2(mne,
7547 sc->sc_if_xname, "replaced",
7548 old_if);
7549 }
7550 bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
7551 }
7552
7553 mne->mne_expire = (unsigned long)net_uptime() + sc->sc_brttimeout;
7554
7555 return mne;
7556 }
7557
7558 static struct mac_nat_entry *
7559 bridge_update_mac_nat_entry_ipv4(struct bridge_softc *sc,
7560 struct bridge_iflist *bif, struct in_addr *ip, const char eaddr[ETHER_ADDR_LEN])
7561 {
7562 struct mac_nat_entry *mne;
7563
7564 mne = bridge_lookup_mac_nat_entry_ipv4(sc, ip);
7565 if (mne != NULL) {
7566 return bridge_update_mac_nat_entry_common(sc, bif, mne, eaddr);
7567 }
7568
7569 mne = bridge_create_mac_nat_entry_ipv4(sc, bif, ip, eaddr);
7570 return mne;
7571 }
7572
7573 static struct mac_nat_entry *
7574 bridge_update_mac_nat_entry_ipv6(struct bridge_softc *sc,
7575 struct bridge_iflist *bif, struct in6_addr *ip6, const char eaddr[ETHER_ADDR_LEN])
7576 {
7577 struct mac_nat_entry *mne;
7578
7579 mne = bridge_lookup_mac_nat_entry_ipv6(sc, ip6);
7580 if (mne != NULL) {
7581 return bridge_update_mac_nat_entry_common(sc, bif, mne, eaddr);
7582 }
7583
7584 mne = bridge_create_mac_nat_entry_ipv6(sc, bif, ip6, eaddr);
7585 return mne;
7586 }
7587
7588 static void
7589 bridge_mac_nat_flush_entries_common(struct bridge_softc *sc,
7590 struct mac_nat_entry_list *list, struct bridge_iflist *bif)
7591 {
7592 struct mac_nat_entry *mne;
7593 struct mac_nat_entry *tmne;
7594
7595 LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
7596 if (bif != NULL && mne->mne_bif != bif) {
7597 continue;
7598 }
7599 bridge_destroy_mac_nat_entry(sc, mne, "flushed");
7600 }
7601 }
7602
7603 /*
7604 * bridge_mac_nat_flush_entries:
7605 *
7606 * Flush MAC NAT entries for the specified member. Flush all entries if
7607 * the member is the one that requires MAC NAT, otherwise just flush the
7608 * ones for the specified member.
7609 */
7610 static void
7611 bridge_mac_nat_flush_entries(struct bridge_softc *sc, struct bridge_iflist * bif)
7612 {
7613 struct bridge_iflist *flush_bif;
7614
7615 flush_bif = (bif == sc->sc_mac_nat_bif) ? NULL : bif;
7616 bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list, flush_bif);
7617 bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list_v6, flush_bif);
7618 }
7619
7620 static void
7621 bridge_mac_nat_populate_entries(struct bridge_softc *sc)
7622 {
7623 errno_t error;
7624 ifnet_t ifp;
7625 uint16_t addresses_count = 0;
7626 ifaddr_t * __counted_by(addresses_count) list;
7627 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7628
7629 assert(mac_nat_bif != NULL);
7630 ifp = mac_nat_bif->bif_ifp;
7631 error = ifnet_get_address_list_family_with_count(ifp, &list, &addresses_count, 0);
7632 if (error != 0) {
7633 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7634 "ifnet_get_address_list(%s) failed %d",
7635 ifp->if_xname, error);
7636 return;
7637 }
7638
7639 for (uint16_t i = 0; i < addresses_count; ++i) {
7640 sa_family_t af;
7641
7642 af = ifaddr_address_family(list[i]);
7643 switch (af) {
7644 case AF_INET: {
7645 struct sockaddr_in sin;
7646
7647 error = ifaddr_address(list[i], (struct sockaddr *)&sin, sizeof(sin));
7648 if (error != 0) {
7649 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7650 "ifaddr_address failed %d",
7651 error);
7652 break;
7653 }
7654
7655 bridge_create_mac_nat_entry_ipv4(sc, mac_nat_bif, &sin.sin_addr, IF_LLADDR(ifp));
7656 break;
7657 }
7658
7659 case AF_INET6: {
7660 struct sockaddr_in6 sin6;
7661
7662 error = ifaddr_address(list[i], (struct sockaddr *)&sin6, sizeof(sin6));
7663 if (error != 0) {
7664 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7665 "ifaddr_address failed %d",
7666 error);
7667 break;
7668 }
7669
7670 if (IN6_IS_ADDR_LINKLOCAL(&sin6.sin6_addr)) {
7671 /* remove scope ID */
7672 sin6.sin6_addr.s6_addr16[1] = 0;
7673 }
7674
7675 bridge_create_mac_nat_entry_ipv6(sc, mac_nat_bif, &sin6.sin6_addr, IF_LLADDR(ifp));
7676 break;
7677 }
7678
7679 default:
7680 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7681 "ifaddr_address_family unknown %d",
7682 af);
7683 break;
7684 }
7685 }
7686
7687 ifnet_address_list_free_counted_by(list, addresses_count);
7688 return;
7689 }
7690
7691 static void
7692 bridge_mac_nat_age_entries_common(struct bridge_softc *sc,
7693 struct mac_nat_entry_list *list, unsigned long now)
7694 {
7695 struct mac_nat_entry *mne;
7696 struct mac_nat_entry *tmne;
7697
7698 LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
7699 if (now >= mne->mne_expire) {
7700 bridge_destroy_mac_nat_entry(sc, mne, "aged out");
7701 }
7702 }
7703 }
7704
7705 static void
7706 bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long now)
7707 {
7708 if (sc->sc_mac_nat_bif == NULL) {
7709 return;
7710 }
7711 bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list, now);
7712 bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list_v6, now);
7713 }
7714
7715 static const char *
7716 get_in_out_string(boolean_t is_output)
7717 {
7718 return (const char * __null_terminated)(is_output ? "OUT" : "IN");
7719 }
7720
7721 /*
7722 * is_valid_arp_packet:
7723 * Verify that this is a valid ARP packet.
7724 *
7725 * Returns TRUE if the packet is valid, FALSE otherwise.
7726 */
7727 static boolean_t
7728 is_valid_arp_packet(mbuf_t *data, bool is_output,
7729 struct ether_header **eh_p, struct ether_arp **ea_p)
7730 {
7731 struct ether_arp *ea;
7732 struct ether_header *eh;
7733 size_t minlen = sizeof(struct ether_header) + sizeof(struct ether_arp);
7734 boolean_t is_valid = FALSE;
7735 int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
7736
7737 if (mbuf_pkthdr_len(*data) < minlen) {
7738 BRIDGE_LOG(LOG_DEBUG, flags,
7739 "ARP %s short frame %lu < %lu",
7740 get_in_out_string(is_output),
7741 mbuf_pkthdr_len(*data), minlen);
7742 goto done;
7743 }
7744 if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
7745 BRIDGE_LOG(LOG_DEBUG, flags,
7746 "ARP %s size %lu mbuf_pullup fail",
7747 get_in_out_string(is_output),
7748 minlen);
7749 *data = NULL;
7750 goto done;
7751 }
7752
7753 /* validate ARP packet */
7754 eh = mtod(*data, struct ether_header *);
7755 ea = (struct ether_arp *)(eh + 1);
7756 if (ea->arp_hrd != HTONS_ARPHRD_ETHER) {
7757 BRIDGE_LOG(LOG_DEBUG, flags,
7758 "ARP %s htype not ethernet",
7759 get_in_out_string(is_output));
7760 goto done;
7761 }
7762 if (ea->arp_hln != ETHER_ADDR_LEN) {
7763 BRIDGE_LOG(LOG_DEBUG, flags,
7764 "ARP %s hlen not ethernet",
7765 get_in_out_string(is_output));
7766 goto done;
7767 }
7768 if (ea->arp_pro != HTONS_ETHERTYPE_IP) {
7769 BRIDGE_LOG(LOG_DEBUG, flags,
7770 "ARP %s ptype not IP",
7771 get_in_out_string(is_output));
7772 goto done;
7773 }
7774 if (ea->arp_pln != sizeof(struct in_addr)) {
7775 BRIDGE_LOG(LOG_DEBUG, flags,
7776 "ARP %s plen not IP",
7777 get_in_out_string(is_output));
7778 goto done;
7779 }
7780 is_valid = TRUE;
7781 *ea_p = ea;
7782 *eh_p = eh;
7783 done:
7784 return is_valid;
7785 }
7786
7787 static struct mac_nat_entry *
7788 bridge_mac_nat_arp_input(struct bridge_softc *sc, mbuf_t *data)
7789 {
7790 struct ether_arp * __single ea;
7791 struct ether_header * __single eh;
7792 struct mac_nat_entry *mne = NULL;
7793 u_short op;
7794 struct in_addr tpa;
7795
7796 if (!is_valid_arp_packet(data, FALSE, &eh, &ea)) {
7797 goto done;
7798 }
7799 op = ea->arp_op;
7800 switch (op) {
7801 case HTONS_ARPOP_REQUEST:
7802 case HTONS_ARPOP_REPLY:
7803 /* only care about REQUEST and REPLY */
7804 break;
7805 default:
7806 goto done;
7807 }
7808
7809 /* check the target IP address for a NAT entry */
7810 bcopy(ea->arp_tpa, &tpa, sizeof(tpa));
7811 if (tpa.s_addr != 0) {
7812 mne = bridge_lookup_mac_nat_entry_ipv4(sc, &tpa);
7813 }
7814 if (mne != NULL) {
7815 if (op == HTONS_ARPOP_REPLY) {
7816 /* translate the MAC address */
7817 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7818 char mac_src[24];
7819 char mac_dst[24];
7820
7821 ether_ntop(mac_src, sizeof(mac_src),
7822 ea->arp_tha);
7823 ether_ntop(mac_dst, sizeof(mac_dst),
7824 mne->mne_mac);
7825 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7826 "%s %s ARP %s -> %s",
7827 sc->sc_if_xname,
7828 mne->mne_bif->bif_ifp->if_xname,
7829 mac_src, mac_dst);
7830 }
7831 bcopy(mne->mne_mac, ea->arp_tha, sizeof(ea->arp_tha));
7832 }
7833 } else {
7834 /* handle conflicting ARP (sender matches mne) */
7835 struct in_addr spa;
7836
7837 bcopy(ea->arp_spa, &spa, sizeof(spa));
7838 if (spa.s_addr != 0 && spa.s_addr != tpa.s_addr) {
7839 /* check the source IP for a NAT entry */
7840 mne = bridge_lookup_mac_nat_entry_ipv4(sc, &spa);
7841 }
7842 }
7843
7844 done:
7845 return mne;
7846 }
7847
7848 static boolean_t
7849 bridge_mac_nat_arp_output(struct bridge_softc *sc,
7850 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
7851 {
7852 struct ether_arp * __single ea;
7853 struct ether_header * __single eh;
7854 struct in_addr ip;
7855 struct mac_nat_entry *mne = NULL;
7856 u_short op;
7857 boolean_t translate = FALSE;
7858
7859 if (!is_valid_arp_packet(data, TRUE, &eh, &ea)) {
7860 goto done;
7861 }
7862 op = ea->arp_op;
7863 switch (op) {
7864 case HTONS_ARPOP_REQUEST:
7865 case HTONS_ARPOP_REPLY:
7866 /* only care about REQUEST and REPLY */
7867 break;
7868 default:
7869 goto done;
7870 }
7871
7872 bcopy(ea->arp_spa, &ip, sizeof(ip));
7873 if (ip.s_addr == 0) {
7874 goto done;
7875 }
7876 /* XXX validate IP address: no multicast/broadcast */
7877 mne = bridge_update_mac_nat_entry_ipv4(sc, bif, &ip,
7878 (const char *)ea->arp_sha);
7879 if (mnr != NULL && mne != NULL) {
7880 /* record the offset to do the replacement */
7881 translate = TRUE;
7882 mnr->mnr_arp_offset = (char *)ea->arp_sha - (char *)eh;
7883 }
7884
7885 done:
7886 return translate;
7887 }
7888
7889 #define ETHER_IPV4_HEADER_LEN (sizeof(struct ether_header) + \
7890 + sizeof(struct ip))
7891 static uint8_t * __indexable
7892 get_ether_ip_header_ptr(mbuf_t *data, boolean_t is_output)
7893 {
7894 uint8_t *header = NULL;
7895 int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
7896 size_t minlen = ETHER_IPV4_HEADER_LEN;
7897
7898 if (mbuf_pkthdr_len(*data) < minlen) {
7899 BRIDGE_LOG(LOG_DEBUG, flags,
7900 "IP %s short frame %lu < %lu",
7901 get_in_out_string(is_output),
7902 mbuf_pkthdr_len(*data), minlen);
7903 goto done;
7904 }
7905 if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
7906 BRIDGE_LOG(LOG_DEBUG, flags,
7907 "IP %s size %lu mbuf_pullup fail",
7908 get_in_out_string(is_output),
7909 minlen);
7910 *data = NULL;
7911 goto done;
7912 }
7913 header = mtod(*data, uint8_t *);
7914 done:
7915 return header;
7916 }
7917
7918 static struct mac_nat_entry *
7919 bridge_mac_nat_ip_input(struct bridge_softc *sc, mbuf_t *data)
7920 {
7921 struct in_addr dst;
7922 uint8_t *header;
7923 struct ip *iphdr;
7924 struct mac_nat_entry *mne = NULL;
7925
7926 header = get_ether_ip_header_ptr(data, FALSE);
7927 if (header == NULL) {
7928 goto done;
7929 }
7930 iphdr = (struct ip *)(void *)(header + sizeof(struct ether_header));
7931 bcopy(&iphdr->ip_dst, &dst, sizeof(dst));
7932 /* XXX validate IP address */
7933 if (dst.s_addr == 0) {
7934 goto done;
7935 }
7936 mne = bridge_lookup_mac_nat_entry_ipv4(sc, &dst);
7937 done:
7938 return mne;
7939 }
7940
7941 static void
7942 bridge_mac_nat_udp_output(struct bridge_softc *sc,
7943 struct bridge_iflist *bif, mbuf_t m,
7944 uint8_t ip_header_len, struct mac_nat_record *mnr)
7945 {
7946 uint16_t dp_flags;
7947 errno_t error;
7948 size_t offset;
7949 struct udphdr udphdr;
7950
7951 /* copy the UDP header */
7952 offset = sizeof(struct ether_header) + ip_header_len;
7953 error = mbuf_copydata(m, offset, sizeof(struct udphdr), &udphdr);
7954 if (error != 0) {
7955 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7956 "mbuf_copydata udphdr failed %d",
7957 error);
7958 return;
7959 }
7960 if (udphdr.uh_sport != HTONS_IPPORT_BOOTPC ||
7961 udphdr.uh_dport != HTONS_IPPORT_BOOTPS) {
7962 /* not a BOOTP/DHCP packet */
7963 return;
7964 }
7965 /* check whether the broadcast bit is already set */
7966 offset += sizeof(struct udphdr) + offsetof(struct dhcp, dp_flags);
7967 error = mbuf_copydata(m, offset, sizeof(dp_flags), &dp_flags);
7968 if (error != 0) {
7969 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7970 "mbuf_copydata dp_flags failed %d",
7971 error);
7972 return;
7973 }
7974 if ((dp_flags & HTONS_DHCP_FLAGS_BROADCAST) != 0) {
7975 /* it's already set, nothing to do */
7976 return;
7977 }
7978 /* broadcast bit needs to be set */
7979 mnr->mnr_ip_dhcp_flags = dp_flags | htons(DHCP_FLAGS_BROADCAST);
7980 mnr->mnr_ip_header_len = ip_header_len;
7981 if (udphdr.uh_sum != 0) {
7982 uint16_t delta;
7983
7984 /* adjust checksum to take modified dp_flags into account */
7985 delta = dp_flags - mnr->mnr_ip_dhcp_flags;
7986 mnr->mnr_ip_udp_csum = udphdr.uh_sum + delta;
7987 }
7988 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7989 "%s %s DHCP dp_flags 0x%x UDP cksum 0x%x",
7990 sc->sc_if_xname,
7991 bif->bif_ifp->if_xname,
7992 ntohs(mnr->mnr_ip_dhcp_flags),
7993 ntohs(mnr->mnr_ip_udp_csum));
7994 return;
7995 }
7996
7997 static boolean_t
7998 bridge_mac_nat_ip_output(struct bridge_softc *sc,
7999 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8000 {
8001 #pragma unused(mnr)
8002 uint8_t *header;
8003 struct ether_header *eh;
8004 struct in_addr ip;
8005 struct ip *iphdr;
8006 uint8_t ip_header_len;
8007 struct mac_nat_entry *mne = NULL;
8008 boolean_t translate = FALSE;
8009
8010 header = get_ether_ip_header_ptr(data, TRUE);
8011 if (header == NULL) {
8012 goto done;
8013 }
8014
8015 eh = (struct ether_header *)header;
8016 iphdr = (struct ip *)(header + sizeof(*eh));
8017 ip_header_len = IP_VHL_HL(iphdr->ip_vhl) << 2;
8018 if (ip_header_len < sizeof(ip)) {
8019 /* bogus IP header */
8020 goto done;
8021 }
8022 bcopy(&iphdr->ip_src, &ip, sizeof(ip));
8023 /* XXX validate the source address */
8024 if (ip.s_addr != 0) {
8025 mne = bridge_update_mac_nat_entry_ipv4(sc, bif, &ip,
8026 (const char *)eh->ether_shost);
8027 }
8028 if (mnr != NULL) {
8029 if (ip.s_addr == 0 && iphdr->ip_p == IPPROTO_UDP) {
8030 /* handle DHCP must broadcast */
8031 bridge_mac_nat_udp_output(sc, bif, *data,
8032 ip_header_len, mnr);
8033 }
8034 translate = TRUE;
8035 }
8036 done:
8037 return translate;
8038 }
8039
8040 #define ETHER_IPV6_HEADER_LEN (sizeof(struct ether_header) + \
8041 + sizeof(struct ip6_hdr))
8042 static uint8_t * __indexable
8043 get_ether_ipv6_header_ptr(mbuf_t *data, size_t plen, boolean_t is_output)
8044 {
8045 uint8_t *header = NULL;
8046 int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
8047 size_t minlen = ETHER_IPV6_HEADER_LEN + plen;
8048
8049 if (mbuf_pkthdr_len(*data) < minlen) {
8050 BRIDGE_LOG(LOG_DEBUG, flags,
8051 "IP %s short frame %lu < %lu",
8052 get_in_out_string(is_output),
8053 mbuf_pkthdr_len(*data), minlen);
8054 goto done;
8055 }
8056 if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
8057 BRIDGE_LOG(LOG_DEBUG, flags,
8058 "IP %s size %lu mbuf_pullup fail",
8059 get_in_out_string(is_output),
8060 minlen);
8061 *data = NULL;
8062 goto done;
8063 }
8064 header = mtod(*data, uint8_t *);
8065 done:
8066 return header;
8067 }
8068
8069 #include <netinet/icmp6.h>
8070 #include <netinet6/nd6.h>
8071
8072 #define ETHER_ND_LLADDR_LEN (ETHER_ADDR_LEN + sizeof(struct nd_opt_hdr))
8073
8074 static void
8075 bridge_mac_nat_icmpv6_output(struct bridge_softc *sc,
8076 struct bridge_iflist *bif,
8077 mbuf_t *data, struct ip6_hdr *ip6h,
8078 struct in6_addr *saddrp,
8079 struct mac_nat_record *mnr)
8080 {
8081 uint8_t *header;
8082 struct ether_header *eh;
8083 struct icmp6_hdr *icmp6;
8084 uint8_t icmp6_type;
8085 uint32_t icmp6len;
8086 int lladdrlen = 0;
8087 char *lladdr = NULL;
8088 unsigned int off = sizeof(*ip6h);
8089
8090 icmp6len = (u_int32_t)ntohs(ip6h->ip6_plen);
8091 if (icmp6len < sizeof(*icmp6)) {
8092 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8093 "short IPv6 payload length %d < %lu",
8094 icmp6len, sizeof(*icmp6));
8095 return;
8096 }
8097
8098 /* pullup IP6 header + ICMPv6 header */
8099 header = get_ether_ipv6_header_ptr(data, sizeof(*icmp6), TRUE);
8100 if (header == NULL) {
8101 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8102 "failed to pullup icmp6 header");
8103 return;
8104 }
8105 eh = (struct ether_header *)header;
8106 ip6h = (struct ip6_hdr *)(header + sizeof(*eh));
8107 icmp6 = (struct icmp6_hdr *)(header + sizeof(*eh) + off);
8108 icmp6_type = icmp6->icmp6_type;
8109 switch (icmp6_type) {
8110 case ND_NEIGHBOR_SOLICIT:
8111 case ND_NEIGHBOR_ADVERT:
8112 case ND_ROUTER_ADVERT:
8113 case ND_ROUTER_SOLICIT:
8114 break;
8115 default:
8116 return;
8117 }
8118
8119 /* pullup IP6 header + payload */
8120 header = get_ether_ipv6_header_ptr(data, icmp6len, TRUE);
8121 if (header == NULL) {
8122 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8123 "failed to pullup icmp6 + payload");
8124 return;
8125 }
8126 eh = (struct ether_header *)header;
8127 ip6h = (struct ip6_hdr *)(header + sizeof(*eh));
8128 icmp6 = (struct icmp6_hdr *)(header + sizeof(*eh) + off);
8129
8130 switch (icmp6_type) {
8131 case ND_NEIGHBOR_SOLICIT: {
8132 struct nd_neighbor_solicit *nd_ns;
8133 union nd_opts ndopts;
8134 boolean_t is_dad_probe;
8135 struct in6_addr taddr;
8136
8137 if (icmp6len < sizeof(*nd_ns)) {
8138 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8139 "short nd_ns %d < %lu",
8140 icmp6len, sizeof(*nd_ns));
8141 return;
8142 }
8143
8144 nd_ns = (struct nd_neighbor_solicit *)(void *)icmp6;
8145 bcopy(&nd_ns->nd_ns_target, &taddr, sizeof(taddr));
8146 if (IN6_IS_ADDR_MULTICAST(&taddr) ||
8147 IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
8148 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8149 "invalid target ignored");
8150 return;
8151 }
8152
8153 /* parse options */
8154 nd6_option_init(nd_ns + 1, icmp6len - sizeof(*nd_ns), &ndopts);
8155 if (nd6_options(&ndopts) < 0) {
8156 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8157 "invalid ND6 NS option");
8158 return;
8159 }
8160 if (ndopts.nd_opts_src_lladdr != NULL) {
8161 ND_OPT_LLADDR(ndopts.nd_opts_src_lladdr, nd_opt_len,
8162 lladdr, lladdrlen);
8163 }
8164 is_dad_probe = IN6_IS_ADDR_UNSPECIFIED(saddrp);
8165 if (lladdr != NULL) {
8166 if (is_dad_probe) {
8167 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8168 "bad ND6 DAD packet");
8169 return;
8170 }
8171 if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8172 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8173 "source lladdrlen %d != %lu",
8174 lladdrlen, ETHER_ND_LLADDR_LEN);
8175 return;
8176 }
8177 }
8178 if (is_dad_probe) {
8179 /* node is trying use taddr, create an mne for taddr */
8180 *saddrp = taddr;
8181 }
8182 break;
8183 }
8184 case ND_NEIGHBOR_ADVERT: {
8185 struct nd_neighbor_advert *nd_na;
8186 union nd_opts ndopts;
8187 struct in6_addr taddr;
8188
8189
8190 nd_na = (struct nd_neighbor_advert *)(void *)icmp6;
8191
8192 if (icmp6len < sizeof(*nd_na)) {
8193 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8194 "short nd_na %d < %lu",
8195 icmp6len, sizeof(*nd_na));
8196 return;
8197 }
8198
8199 bcopy(&nd_na->nd_na_target, &taddr, sizeof(taddr));
8200 if (IN6_IS_ADDR_MULTICAST(&taddr) ||
8201 IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
8202 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8203 "invalid target ignored");
8204 return;
8205 }
8206
8207 /* parse options */
8208 nd6_option_init(nd_na + 1, icmp6len - sizeof(*nd_na), &ndopts);
8209 if (nd6_options(&ndopts) < 0) {
8210 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8211 "invalid ND6 NA option");
8212 return;
8213 }
8214 if (ndopts.nd_opts_tgt_lladdr == NULL) {
8215 /* target linklayer, nothing to do */
8216 return;
8217 }
8218
8219 ND_OPT_LLADDR(ndopts.nd_opts_tgt_lladdr, nd_opt_len, lladdr, lladdrlen);
8220 if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8221 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8222 "target lladdrlen %d != %lu",
8223 lladdrlen, ETHER_ND_LLADDR_LEN);
8224 return;
8225 }
8226 break;
8227 }
8228 case ND_ROUTER_ADVERT:
8229 case ND_ROUTER_SOLICIT: {
8230 union nd_opts ndopts;
8231 uint32_t type_length;
8232 const char *description;
8233
8234 if (icmp6_type == ND_ROUTER_ADVERT) {
8235 type_length = sizeof(struct nd_router_advert);
8236 description = "RA";
8237 } else {
8238 type_length = sizeof(struct nd_router_solicit);
8239 description = "RS";
8240 }
8241 if (icmp6len < type_length) {
8242 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8243 "short ND6 %s %d < %d",
8244 description, icmp6len, type_length);
8245 return;
8246 }
8247
8248 /* parse options */
8249 nd6_option_init(((uint8_t *)icmp6) + type_length,
8250 icmp6len - type_length, &ndopts);
8251 if (nd6_options(&ndopts) < 0) {
8252 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8253 "invalid ND6 %s option", description);
8254 return;
8255 }
8256 if (ndopts.nd_opts_src_lladdr != NULL) {
8257 ND_OPT_LLADDR(ndopts.nd_opts_src_lladdr, nd_opt_len, lladdr, lladdrlen);
8258
8259 if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8260 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8261 "source lladdrlen %d != %lu",
8262 lladdrlen, ETHER_ND_LLADDR_LEN);
8263 return;
8264 }
8265 }
8266 break;
8267 }
8268 default:
8269 break;
8270 }
8271
8272 if (lladdr != NULL) {
8273 mnr->mnr_ip6_lladdr_offset = (uint16_t)
8274 ((uintptr_t)lladdr - (uintptr_t)eh);
8275 mnr->mnr_ip6_icmp6_len = icmp6len;
8276 mnr->mnr_ip6_icmp6_type = icmp6_type;
8277 mnr->mnr_ip6_header_len = off;
8278 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
8279 const char *str;
8280
8281 switch (mnr->mnr_ip6_icmp6_type) {
8282 case ND_ROUTER_ADVERT:
8283 str = "ROUTER ADVERT";
8284 break;
8285 case ND_ROUTER_SOLICIT:
8286 str = "ROUTER SOLICIT";
8287 break;
8288 case ND_NEIGHBOR_ADVERT:
8289 str = "NEIGHBOR ADVERT";
8290 break;
8291 case ND_NEIGHBOR_SOLICIT:
8292 str = "NEIGHBOR SOLICIT";
8293 break;
8294 default:
8295 str = "";
8296 break;
8297 }
8298 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8299 "%s %s %s ip6len %d icmp6len %d lladdr offset %d",
8300 sc->sc_if_xname, bif->bif_ifp->if_xname, str,
8301 mnr->mnr_ip6_header_len,
8302 mnr->mnr_ip6_icmp6_len, mnr->mnr_ip6_lladdr_offset);
8303 }
8304 }
8305 }
8306
8307 static struct mac_nat_entry *
8308 bridge_mac_nat_ipv6_input(struct bridge_softc *sc, mbuf_t *data)
8309 {
8310 struct in6_addr dst;
8311 uint8_t *header;
8312 struct ether_header *eh;
8313 struct ip6_hdr *ip6h;
8314 struct mac_nat_entry *mne = NULL;
8315
8316 header = get_ether_ipv6_header_ptr(data, 0, FALSE);
8317 if (header == NULL) {
8318 goto done;
8319 }
8320 eh = (struct ether_header *)header;
8321 ip6h = (struct ip6_hdr *)(header + sizeof(*eh));
8322 bcopy(&ip6h->ip6_dst, &dst, sizeof(dst));
8323 /* XXX validate IPv6 address */
8324 if (IN6_IS_ADDR_UNSPECIFIED(&dst)) {
8325 goto done;
8326 }
8327 mne = bridge_lookup_mac_nat_entry_ipv6(sc, &dst);
8328
8329 done:
8330 return mne;
8331 }
8332
8333 static boolean_t
8334 bridge_mac_nat_ipv6_output(struct bridge_softc *sc,
8335 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8336 {
8337 uint8_t *header;
8338 struct ether_header *eh;
8339 ether_addr_t ether_shost;
8340 struct ip6_hdr *ip6h;
8341 struct in6_addr saddr;
8342 boolean_t translate;
8343
8344 translate = (bif == sc->sc_mac_nat_bif) ? FALSE : TRUE;
8345 header = get_ether_ipv6_header_ptr(data, 0, TRUE);
8346 if (header == NULL) {
8347 translate = FALSE;
8348 goto done;
8349 }
8350 eh = (struct ether_header *)header;
8351 bcopy(eh->ether_shost, ðer_shost, sizeof(ether_shost));
8352 ip6h = (struct ip6_hdr *)(header + sizeof(*eh));
8353 bcopy(&ip6h->ip6_src, &saddr, sizeof(saddr));
8354 if (mnr != NULL && ip6h->ip6_nxt == IPPROTO_ICMPV6) {
8355 bridge_mac_nat_icmpv6_output(sc, bif, data, ip6h, &saddr, mnr);
8356 }
8357 if (IN6_IS_ADDR_UNSPECIFIED(&saddr)) {
8358 goto done;
8359 }
8360 (void)bridge_update_mac_nat_entry_ipv6(sc, bif, &saddr,
8361 (const char *)ether_shost.octet);
8362
8363 done:
8364 return translate;
8365 }
8366
8367 /*
8368 * Function: bridge_mac_nat_input:
8369 *
8370 * Purpose:
8371 * Process a unicast packet arriving on the external interface `external_ifp`.
8372 *
8373 * If the packet is ARP, IPv4, or IPv6, lookup the address from the packet in
8374 * the mac_nat_entry table. If an entry is found, and the interface is
8375 * not `external_ifp`, replace the destination MAC address in the
8376 * ethernet header with the corresponding internal MAC address, and return
8377 * the interface via `*dst_if`.
8378 *
8379 * Returns:
8380 * NULL if the packet was deallocated during processing.
8381 *
8382 * Otherwise, returns non-NULL packet that should:
8383 * 1) if `*dst_if` is NULL, continue on as an input packet
8384 * over `external_ifp`, OR
8385 * 2) if `*dst_if` is not NULL, be delivered as an output packet
8386 * over `*dst_if`.
8387 */
8388 static mbuf_t
8389 bridge_mac_nat_input(struct bridge_softc *sc, ifnet_t external_ifp,
8390 mbuf_t m, ifnet_t * dst_if)
8391 {
8392 struct ether_header *eh;
8393 mbuf_t m0 = m;
8394 struct mac_nat_entry *mne = NULL;
8395
8396 BRIDGE_LOCK_ASSERT_HELD(sc);
8397 *dst_if = NULL;
8398 eh = mtod(m, struct ether_header *);
8399 switch (eh->ether_type) {
8400 case HTONS_ETHERTYPE_ARP:
8401 mne = bridge_mac_nat_arp_input(sc, &m);
8402 break;
8403 case HTONS_ETHERTYPE_IP:
8404 mne = bridge_mac_nat_ip_input(sc, &m);
8405 break;
8406 case HTONS_ETHERTYPE_IPV6:
8407 mne = bridge_mac_nat_ipv6_input(sc, &m);
8408 break;
8409 default:
8410 break;
8411 }
8412 if (m != NULL & mne != NULL) {
8413 *dst_if = mne->mne_bif->bif_ifp;
8414 if (*dst_if == external_ifp) {
8415 /* receive packet for ifp */
8416 *dst_if = NULL;
8417 } else {
8418 /* replace the destination MAC with internal one */
8419 if (m != m0) {
8420 /* it may have changed */
8421 eh = mtod(m, struct ether_header *);
8422 }
8423 bcopy(mne->mne_mac, eh->ether_dhost,
8424 sizeof(eh->ether_dhost));
8425 }
8426 }
8427 return m;
8428 }
8429
8430
8431 static mblist
8432 bridge_mac_nat_input_list(struct bridge_softc *sc, ifnet_t external_ifp,
8433 mbuf_t m, mbuf_t * forward_head)
8434 {
8435 mblist forward;
8436 mbuf_t next_packet;
8437 mblist ret;
8438
8439 mblist_init(&ret);
8440 mblist_init(&forward);
8441 for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
8442 ifnet_ref_t dst_if;
8443
8444 /* take packet out of the list */
8445 next_packet = scan->m_nextpkt;
8446 scan->m_nextpkt = NULL;
8447
8448 scan = bridge_mac_nat_input(sc, external_ifp, scan, &dst_if);
8449 if (scan != NULL) {
8450 if (dst_if != NULL) {
8451 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8452 "%s MAC-NAT input translate to %s",
8453 sc->sc_if_xname, dst_if->if_xname);
8454 /* use rcvif to store the egress interface */
8455 mbuf_pkthdr_setrcvif(scan, dst_if);
8456 /* add it to the forwarding list */
8457 mblist_append(&forward, scan);
8458 } else {
8459 /* add it to the "continue on as input" list */
8460 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8461 "%s MAC-NAT input for %s",
8462 sc->sc_if_xname,
8463 external_ifp->if_xname);
8464 mblist_append(&ret, scan);
8465 }
8466 }
8467 }
8468 *forward_head = forward.head;
8469 return ret;
8470 }
8471
8472 /*
8473 * bridge_mac_nat_translate_list:
8474 * Process a list of packets destined to the MAC-NAT interface `dst_if`
8475 * from the bridge member `sbif`.
8476 *
8477 * For each packet in the list, update the MAC-NAT record, and if
8478 * translation is required, translate it.
8479 *
8480 * Returns the list of packets that should be delivered to the MAC-NAT
8481 * interface.
8482 */
8483 static mbuf_t
8484 bridge_mac_nat_translate_list(struct bridge_softc * sc,
8485 struct bridge_iflist *sbif, ifnet_t dst_if, mbuf_t m)
8486 {
8487 mbuf_t next_packet;
8488 mblist ret;
8489
8490 mblist_init(&ret);
8491 for (mbuf_ref_t scan = m; scan != NULL; scan = next_packet) {
8492 struct mac_nat_record mnr;
8493 bool translate_mac;
8494
8495 /* take packet out of the list */
8496 next_packet = scan->m_nextpkt;
8497 scan->m_nextpkt = NULL;
8498 translate_mac = bridge_mac_nat_output(sc, sbif, &scan, &mnr);
8499 if (scan != NULL) {
8500 if (translate_mac) {
8501 bridge_mac_nat_translate(&scan, &mnr,
8502 IF_LLADDR(dst_if));
8503 }
8504 if (scan != NULL) {
8505 /* add it back to the list */
8506 mblist_append(&ret, scan);
8507 }
8508 }
8509 }
8510 return ret.head;
8511 }
8512
8513 /*
8514 * bridge_mac_nat_copy_and_translate_list:
8515 * Same as bridge_mac_nat_translate_list() except that a copy of the
8516 * packet list is returned instead.
8517 *
8518 * The packet list `m` is left unaltered.
8519 */
8520 static mbuf_t
8521 bridge_mac_nat_copy_and_translate_list(struct bridge_softc * sc,
8522 struct bridge_iflist *sbif, ifnet_t dst_if, mbuf_t m)
8523 {
8524 mbuf_t next_packet;
8525 mblist ret;
8526
8527 mblist_init(&ret);
8528 for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
8529 mbuf_ref_t mc = NULL;
8530 struct mac_nat_record mnr;
8531 bool translate_mac;
8532
8533 /* take packet out of the list, make a copy, put it back */
8534 next_packet = scan->m_nextpkt;
8535 scan->m_nextpkt = NULL;
8536 mc = m_dup(scan, M_DONTWAIT);
8537 scan->m_nextpkt = next_packet;
8538 if (mc == NULL) {
8539 continue;
8540 }
8541 translate_mac = bridge_mac_nat_output(sc, sbif, &mc, &mnr);
8542 if (mc != NULL) {
8543 if (translate_mac) {
8544 bridge_mac_nat_translate(&mc, &mnr,
8545 IF_LLADDR(dst_if));
8546 }
8547 if (mc != NULL) {
8548 /* add it to the new list */
8549 mblist_append(&ret, mc);
8550 }
8551 }
8552 }
8553 return ret.head;
8554 }
8555
8556 static void
8557 bridge_mac_nat_forward_list(ifnet_t bridge_ifp, ether_type_flag_t etypef,
8558 mbuf_t m)
8559 {
8560 int count = 0;
8561 ifnet_t dst_if;
8562 mblist list;
8563 int n_lists = 0;
8564 mbuf_t next_packet;
8565
8566 mblist_init(&list);
8567 for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
8568 ifnet_t this_if;
8569
8570 next_packet = scan->m_nextpkt;
8571 this_if = mbuf_pkthdr_rcvif(scan);
8572 mbuf_pkthdr_setrcvif(scan, NULL);
8573 if (list.head == NULL) {
8574 /* start a new list */
8575 list.head = list.tail = scan;
8576 count = 1;
8577 dst_if = this_if;
8578 } else if (dst_if != this_if) {
8579 /* send up the previous chain */
8580 if (list.tail != NULL) {
8581 /* terminate the list */
8582 list.tail->m_nextpkt = NULL;
8583 }
8584 n_lists++;
8585 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8586 "(%s): sublist %u pkts %u",
8587 dst_if->if_xname, n_lists, count);
8588 bridge_enqueue(bridge_ifp, NULL,
8589 dst_if, etypef, list.head,
8590 CHECKSUM_OPERATION_CLEAR_OFFLOAD);
8591
8592 /* start new list */
8593 list.head = list.tail = scan;
8594 count = 1;
8595 dst_if = this_if;
8596 } else {
8597 count++;
8598 list.tail = scan;
8599 }
8600 if (next_packet == NULL) {
8601 /* last list */
8602 n_lists++;
8603 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8604 "(%s): sublist %u pkts %u",
8605 dst_if->if_xname, n_lists, count);
8606 bridge_enqueue(bridge_ifp, NULL,
8607 dst_if, etypef, list.head,
8608 CHECKSUM_OPERATION_CLEAR_OFFLOAD);
8609 }
8610 }
8611 return;
8612 }
8613
8614 /*
8615 * bridge_mac_nat_output:
8616 * Process a packet destined to the MAC NAT interface (sc_mac_nat_bif)
8617 * from the interface 'bif'.
8618 *
8619 * Create a mac_nat_entry containing the source IP address and MAC address
8620 * from the packet. Populate a mac_nat_record with information detailing
8621 * how to translate the packet. Translation takes place later by calling
8622 * `bridge_mac_nat_translate()`.
8623 *
8624 * If 'bif' == sc_mac_nat_bif, the stack over the MAC NAT
8625 * interface is generating an output packet. No translation is required in this
8626 * case, we just record the IP address used to prevent another bif from
8627 * claiming our IP address.
8628 *
8629 * Returns:
8630 * TRUE if the packet should be translated (*mnr updated as well),
8631 * FALSE otherwise.
8632 *
8633 * *data may be updated to point at a different mbuf chain or NULL if
8634 * the chain was deallocated during processing.
8635 */
8636
8637 static boolean_t
8638 bridge_mac_nat_output(struct bridge_softc *sc,
8639 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8640 {
8641 struct ether_header *eh;
8642 boolean_t translate = FALSE;
8643
8644 BRIDGE_LOCK_ASSERT_HELD(sc);
8645 assert(sc->sc_mac_nat_bif != NULL);
8646
8647 eh = mtod(*data, struct ether_header *);
8648 if (mnr != NULL) {
8649 bzero(mnr, sizeof(*mnr));
8650 mnr->mnr_ether_type = eh->ether_type;
8651 }
8652 switch (eh->ether_type) {
8653 case HTONS_ETHERTYPE_ARP:
8654 translate = bridge_mac_nat_arp_output(sc, bif, data, mnr);
8655 break;
8656 case HTONS_ETHERTYPE_IP:
8657 translate = bridge_mac_nat_ip_output(sc, bif, data, mnr);
8658 break;
8659 case HTONS_ETHERTYPE_IPV6:
8660 translate = bridge_mac_nat_ipv6_output(sc, bif, data, mnr);
8661 break;
8662 default:
8663 break;
8664 }
8665 return translate;
8666 }
8667
8668 static void
8669 bridge_mac_nat_arp_translate(mbuf_t *data, struct mac_nat_record *mnr,
8670 const char eaddr[ETHER_ADDR_LEN])
8671 {
8672 errno_t error;
8673
8674 if (mnr->mnr_arp_offset == 0) {
8675 return;
8676 }
8677 /* replace the source hardware address */
8678 error = mbuf_copyback(*data, mnr->mnr_arp_offset,
8679 ETHER_ADDR_LEN, eaddr,
8680 MBUF_DONTWAIT);
8681 if (error != 0) {
8682 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8683 "mbuf_copyback failed");
8684 m_freem(*data);
8685 *data = NULL;
8686 }
8687 return;
8688 }
8689
8690 static void
8691 bridge_mac_nat_ip_translate(mbuf_t *data, struct mac_nat_record *mnr)
8692 {
8693 errno_t error;
8694 size_t offset;
8695
8696 if (mnr->mnr_ip_header_len == 0) {
8697 return;
8698 }
8699 /* update the UDP checksum */
8700 offset = sizeof(struct ether_header) + mnr->mnr_ip_header_len;
8701 error = mbuf_copyback(*data, offset + offsetof(struct udphdr, uh_sum),
8702 sizeof(mnr->mnr_ip_udp_csum),
8703 &mnr->mnr_ip_udp_csum,
8704 MBUF_DONTWAIT);
8705 if (error != 0) {
8706 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8707 "mbuf_copyback uh_sum failed");
8708 m_freem(*data);
8709 *data = NULL;
8710 }
8711 /* update the DHCP must broadcast flag */
8712 offset += sizeof(struct udphdr);
8713 error = mbuf_copyback(*data, offset + offsetof(struct dhcp, dp_flags),
8714 sizeof(mnr->mnr_ip_dhcp_flags),
8715 &mnr->mnr_ip_dhcp_flags,
8716 MBUF_DONTWAIT);
8717 if (error != 0) {
8718 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8719 "mbuf_copyback dp_flags failed");
8720 m_freem(*data);
8721 *data = NULL;
8722 }
8723 }
8724
8725 static void
8726 bridge_mac_nat_ipv6_translate(mbuf_t *data, struct mac_nat_record *mnr,
8727 const char eaddr[ETHER_ADDR_LEN])
8728 {
8729 uint16_t cksum;
8730 errno_t error;
8731 mbuf_t m = *data;
8732
8733 if (mnr->mnr_ip6_header_len == 0) {
8734 return;
8735 }
8736 switch (mnr->mnr_ip6_icmp6_type) {
8737 case ND_ROUTER_ADVERT:
8738 case ND_ROUTER_SOLICIT:
8739 case ND_NEIGHBOR_SOLICIT:
8740 case ND_NEIGHBOR_ADVERT:
8741 if (mnr->mnr_ip6_lladdr_offset == 0) {
8742 /* nothing to do */
8743 return;
8744 }
8745 break;
8746 default:
8747 return;
8748 }
8749
8750 /*
8751 * replace the lladdr
8752 */
8753 error = mbuf_copyback(m, mnr->mnr_ip6_lladdr_offset,
8754 ETHER_ADDR_LEN, eaddr,
8755 MBUF_DONTWAIT);
8756 if (error != 0) {
8757 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8758 "mbuf_copyback lladdr failed");
8759 m_freem(m);
8760 *data = NULL;
8761 return;
8762 }
8763
8764 /*
8765 * recompute the icmp6 checksum
8766 */
8767
8768 /* skip past the ethernet header */
8769 _mbuf_adjust_pkthdr_and_data(m, ETHER_HDR_LEN);
8770
8771 #define CKSUM_OFFSET_ICMP6 offsetof(struct icmp6_hdr, icmp6_cksum)
8772 /* set the checksum to zero */
8773 cksum = 0;
8774 error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
8775 sizeof(cksum), &cksum, MBUF_DONTWAIT);
8776 if (error != 0) {
8777 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8778 "mbuf_copyback cksum=0 failed");
8779 m_freem(m);
8780 *data = NULL;
8781 return;
8782 }
8783 /* compute and set the new checksum */
8784 cksum = in6_cksum(m, IPPROTO_ICMPV6, mnr->mnr_ip6_header_len,
8785 mnr->mnr_ip6_icmp6_len);
8786 error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
8787 sizeof(cksum), &cksum, MBUF_DONTWAIT);
8788 if (error != 0) {
8789 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8790 "mbuf_copyback cksum failed");
8791 m_freem(m);
8792 *data = NULL;
8793 return;
8794 }
8795 /* restore the ethernet header */
8796 _mbuf_adjust_pkthdr_and_data(m, -ETHER_HDR_LEN);
8797 return;
8798 }
8799
8800 static void
8801 bridge_mac_nat_translate(mbuf_t *data, struct mac_nat_record *mnr,
8802 const char eaddr[ETHER_ADDR_LEN])
8803 {
8804 struct ether_header *eh;
8805
8806 /* replace the source ethernet address with the single MAC */
8807 eh = mtod(*data, struct ether_header *);
8808 bcopy(eaddr, eh->ether_shost, sizeof(eh->ether_shost));
8809 switch (mnr->mnr_ether_type) {
8810 case HTONS_ETHERTYPE_ARP:
8811 bridge_mac_nat_arp_translate(data, mnr, eaddr);
8812 break;
8813
8814 case HTONS_ETHERTYPE_IP:
8815 bridge_mac_nat_ip_translate(data, mnr);
8816 break;
8817
8818 case HTONS_ETHERTYPE_IPV6:
8819 bridge_mac_nat_ipv6_translate(data, mnr, eaddr);
8820 break;
8821
8822 default:
8823 break;
8824 }
8825 return;
8826 }
8827
8828 /*
8829 * bridge packet filtering
8830 */
8831
8832 /*
8833 * Perform basic checks on header size since
8834 * pfil assumes ip_input has already processed
8835 * it for it. Cut-and-pasted from ip_input.c.
8836 * Given how simple the IPv6 version is,
8837 * does the IPv4 version really need to be
8838 * this complicated?
8839 *
8840 * XXX Should we update ipstat here, or not?
8841 * XXX Right now we update ipstat but not
8842 * XXX csum_counter.
8843 */
8844 static int
8845 bridge_ip_checkbasic(struct mbuf **mp)
8846 {
8847 struct mbuf *m = *mp;
8848 struct ip *ip;
8849 int len, hlen;
8850 u_short sum;
8851
8852 if (*mp == NULL) {
8853 return -1;
8854 }
8855
8856 if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
8857 /* max_linkhdr is already rounded up to nearest 4-byte */
8858 if ((m = m_copyup(m, sizeof(struct ip),
8859 max_linkhdr)) == NULL) {
8860 /* XXXJRT new stat, please */
8861 ipstat.ips_toosmall++;
8862 goto bad;
8863 }
8864 } else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip), 0)) {
8865 if ((m = m_pullup(m, sizeof(struct ip))) == NULL) {
8866 ipstat.ips_toosmall++;
8867 goto bad;
8868 }
8869 }
8870 ip = mtod(m, struct ip *);
8871 if (ip == NULL) {
8872 goto bad;
8873 }
8874
8875 if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
8876 ipstat.ips_badvers++;
8877 goto bad;
8878 }
8879 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
8880 if (hlen < (int)sizeof(struct ip)) { /* minimum header length */
8881 ipstat.ips_badhlen++;
8882 goto bad;
8883 }
8884 if (hlen > m->m_len) {
8885 if ((m = m_pullup(m, hlen)) == 0) {
8886 ipstat.ips_badhlen++;
8887 goto bad;
8888 }
8889 ip = mtod(m, struct ip *);
8890 if (ip == NULL) {
8891 goto bad;
8892 }
8893 }
8894
8895 if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
8896 sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
8897 } else {
8898 if (hlen == sizeof(struct ip)) {
8899 sum = in_cksum_hdr(ip);
8900 } else {
8901 sum = in_cksum(m, hlen);
8902 }
8903 }
8904 if (sum) {
8905 ipstat.ips_badsum++;
8906 goto bad;
8907 }
8908
8909 /* Retrieve the packet length. */
8910 len = ntohs(ip->ip_len);
8911
8912 /*
8913 * Check for additional length bogosity
8914 */
8915 if (len < hlen) {
8916 ipstat.ips_badlen++;
8917 goto bad;
8918 }
8919
8920 /*
8921 * Check that the amount of data in the buffers
8922 * is as at least much as the IP header would have us expect.
8923 * Drop packet if shorter than we expect.
8924 */
8925 if (m->m_pkthdr.len < len) {
8926 ipstat.ips_tooshort++;
8927 goto bad;
8928 }
8929
8930 /* Checks out, proceed */
8931 *mp = m;
8932 return 0;
8933
8934 bad:
8935 *mp = m;
8936 return -1;
8937 }
8938
8939 /*
8940 * Same as above, but for IPv6.
8941 * Cut-and-pasted from ip6_input.c.
8942 * XXX Should we update ip6stat, or not?
8943 */
8944 static int
8945 bridge_ip6_checkbasic(struct mbuf **mp)
8946 {
8947 struct mbuf *m = *mp;
8948 struct ip6_hdr *ip6;
8949
8950 /*
8951 * If the IPv6 header is not aligned, slurp it up into a new
8952 * mbuf with space for link headers, in the event we forward
8953 * it. Otherwise, if it is aligned, make sure the entire base
8954 * IPv6 header is in the first mbuf of the chain.
8955 */
8956 if (IP6_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
8957 struct ifnet *inifp = m->m_pkthdr.rcvif;
8958 /* max_linkhdr is already rounded up to nearest 4-byte */
8959 if ((m = m_copyup(m, sizeof(struct ip6_hdr),
8960 max_linkhdr)) == NULL) {
8961 /* XXXJRT new stat, please */
8962 ip6stat.ip6s_toosmall++;
8963 in6_ifstat_inc(inifp, ifs6_in_hdrerr);
8964 goto bad;
8965 }
8966 } else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip6_hdr), 0)) {
8967 struct ifnet *inifp = m->m_pkthdr.rcvif;
8968 if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
8969 ip6stat.ip6s_toosmall++;
8970 in6_ifstat_inc(inifp, ifs6_in_hdrerr);
8971 goto bad;
8972 }
8973 }
8974
8975 ip6 = mtod(m, struct ip6_hdr *);
8976
8977 if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
8978 ip6stat.ip6s_badvers++;
8979 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
8980 goto bad;
8981 }
8982
8983 /* Checks out, proceed */
8984 *mp = m;
8985 return 0;
8986
8987 bad:
8988 *mp = m;
8989 return -1;
8990 }
8991
8992 /*
8993 * the PF routines expect to be called from ip_input, so we
8994 * need to do and undo here some of the same processing.
8995 *
8996 * XXX : this is heavily inspired on bridge_pfil()
8997 */
8998 static int
8999 bridge_pf(struct mbuf **mp, struct ifnet *ifp, uint32_t sc_filter_flags,
9000 bool input)
9001 {
9002 /*
9003 * XXX : mpetit : heavily inspired by bridge_pfil()
9004 */
9005
9006 int snap, error, i, hlen;
9007 struct ether_header *eh1, eh2;
9008 struct ip *ip;
9009 struct llc llc1;
9010 u_int16_t ether_type;
9011
9012 snap = 0;
9013 error = -1; /* Default error if not error == 0 */
9014
9015 if ((sc_filter_flags & IFBF_FILT_MEMBER) == 0) {
9016 return 0; /* filtering is disabled */
9017 }
9018 i = min((*mp)->m_pkthdr.len, max_protohdr);
9019 if ((*mp)->m_len < i) {
9020 *mp = m_pullup(*mp, i);
9021 if (*mp == NULL) {
9022 BRIDGE_LOG(LOG_NOTICE, 0, "m_pullup failed");
9023 return -1;
9024 }
9025 }
9026
9027 eh1 = mtod(*mp, struct ether_header *);
9028 ether_type = ntohs(eh1->ether_type);
9029
9030 /*
9031 * Check for SNAP/LLC.
9032 */
9033 if (ether_type < ETHERMTU) {
9034 struct llc *llc2 = (struct llc *)(eh1 + 1);
9035
9036 if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
9037 llc2->llc_dsap == LLC_SNAP_LSAP &&
9038 llc2->llc_ssap == LLC_SNAP_LSAP &&
9039 llc2->llc_control == LLC_UI) {
9040 ether_type = htons(llc2->llc_un.type_snap.ether_type);
9041 snap = 1;
9042 }
9043 }
9044
9045 /*
9046 * If we're trying to filter bridge traffic, don't look at anything
9047 * other than IP and ARP traffic. If the filter doesn't understand
9048 * IPv6, don't allow IPv6 through the bridge either. This is lame
9049 * since if we really wanted, say, an AppleTalk filter, we are hosed,
9050 * but of course we don't have an AppleTalk filter to begin with.
9051 * (Note that since pfil doesn't understand ARP it will pass *ALL*
9052 * ARP traffic.)
9053 */
9054 switch (ether_type) {
9055 case ETHERTYPE_ARP:
9056 case ETHERTYPE_REVARP:
9057 return 0; /* Automatically pass */
9058
9059 case ETHERTYPE_IP:
9060 case ETHERTYPE_IPV6:
9061 break;
9062 default:
9063 /*
9064 * Check to see if the user wants to pass non-ip
9065 * packets, these will not be checked by pf and
9066 * passed unconditionally so the default is to drop.
9067 */
9068 if ((sc_filter_flags & IFBF_FILT_ONLYIP)) {
9069 goto bad;
9070 }
9071 break;
9072 }
9073
9074 /* Strip off the Ethernet header and keep a copy. */
9075 m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t)&eh2);
9076 m_adj(*mp, ETHER_HDR_LEN);
9077
9078 /* Strip off snap header, if present */
9079 if (snap) {
9080 m_copydata(*mp, 0, sizeof(struct llc), (caddr_t)&llc1);
9081 m_adj(*mp, sizeof(struct llc));
9082 }
9083
9084 /*
9085 * Check the IP header for alignment and errors
9086 */
9087 switch (ether_type) {
9088 case ETHERTYPE_IP:
9089 error = bridge_ip_checkbasic(mp);
9090 break;
9091 case ETHERTYPE_IPV6:
9092 error = bridge_ip6_checkbasic(mp);
9093 break;
9094 default:
9095 error = 0;
9096 break;
9097 }
9098 if (error) {
9099 goto bad;
9100 }
9101
9102 error = 0;
9103
9104 /*
9105 * Run the packet through pf rules
9106 */
9107 switch (ether_type) {
9108 case ETHERTYPE_IP:
9109 /*
9110 * before calling the firewall, swap fields the same as
9111 * IP does. here we assume the header is contiguous
9112 */
9113 ip = mtod(*mp, struct ip *);
9114
9115 ip->ip_len = ntohs(ip->ip_len);
9116 ip->ip_off = ntohs(ip->ip_off);
9117
9118 if (ifp != NULL) {
9119 error = pf_af_hook(ifp, 0, mp, AF_INET, input, NULL);
9120 }
9121
9122 if (*mp == NULL || error != 0) { /* filter may consume */
9123 break;
9124 }
9125
9126 /* Recalculate the ip checksum and restore byte ordering */
9127 ip = mtod(*mp, struct ip *);
9128 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
9129 if (hlen < (int)sizeof(struct ip)) {
9130 goto bad;
9131 }
9132 if (hlen > (*mp)->m_len) {
9133 if ((*mp = m_pullup(*mp, hlen)) == 0) {
9134 goto bad;
9135 }
9136 ip = mtod(*mp, struct ip *);
9137 if (ip == NULL) {
9138 goto bad;
9139 }
9140 }
9141 ip->ip_len = htons(ip->ip_len);
9142 ip->ip_off = htons(ip->ip_off);
9143 ip->ip_sum = 0;
9144 if (hlen == sizeof(struct ip)) {
9145 ip->ip_sum = in_cksum_hdr(ip);
9146 } else {
9147 ip->ip_sum = in_cksum(*mp, hlen);
9148 }
9149 break;
9150
9151 case ETHERTYPE_IPV6:
9152 if (ifp != NULL) {
9153 error = pf_af_hook(ifp, 0, mp, AF_INET6, input, NULL);
9154 }
9155
9156 if (*mp == NULL || error != 0) { /* filter may consume */
9157 break;
9158 }
9159 break;
9160 default:
9161 error = 0;
9162 break;
9163 }
9164
9165 if (*mp == NULL) {
9166 return error;
9167 }
9168 if (error != 0) {
9169 goto bad;
9170 }
9171
9172 error = -1;
9173
9174 /*
9175 * Finally, put everything back the way it was and return
9176 */
9177 if (snap) {
9178 M_PREPEND(*mp, sizeof(struct llc), M_DONTWAIT, 0);
9179 if (*mp == NULL) {
9180 return error;
9181 }
9182 bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
9183 }
9184
9185 M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT, 0);
9186 if (*mp == NULL) {
9187 return error;
9188 }
9189 bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
9190
9191 return 0;
9192
9193 bad:
9194 m_freem(*mp);
9195 *mp = NULL;
9196 return error;
9197 }
9198
9199 #if BRIDGESTP
9200 static void
9201 bridge_bstp_input_list(struct bstp_port *bp, struct mbuf *head)
9202 {
9203 mbuf_t next_packet = NULL;
9204
9205 for (mbuf_t scan = head; scan != NULL; scan = next_packet) {
9206 next_packet = scan->m_nextpkt;
9207 scan->m_nextpkt = NULL;
9208 bstp_input(bp, scan);
9209 }
9210 }
9211 #endif /* BRIDGESTP */
9212
9213 static mblist
9214 bridge_filter_arp_list(struct bridge_iflist * bif, mbuf_t m)
9215 {
9216 mbuf_t next_packet = NULL;
9217 mblist ret;
9218
9219 mblist_init(&ret);
9220 for (mbuf_ref_t scan = m; scan != NULL; scan = next_packet) {
9221 errno_t error;
9222
9223 /* take packet out of the list */
9224 next_packet = scan->m_nextpkt;
9225 scan->m_nextpkt = NULL;
9226 /* filter the ARP packet */
9227 error = bridge_host_filter_arp(bif, &scan);
9228 if (error != 0 && scan != NULL) {
9229 if (BRIDGE_DBGF_ENABLED(BR_DBGF_HOSTFILTER)) {
9230 brlog_mbuf_data(scan, 0,
9231 sizeof(struct ether_header) +
9232 sizeof(struct ip));
9233 }
9234 m_freem(scan);
9235 scan = NULL;
9236 }
9237 if (scan != NULL) {
9238 /* add it to the list */
9239 mblist_append(&ret, scan);
9240 }
9241 }
9242 return ret;
9243 }
9244
9245 static mbuf_t
9246 bridge_filter_checksum(ifnet_t bridge_ifp, struct bridge_iflist * bif, mbuf_t m,
9247 bool is_ipv4, bool host_filter, bool checksum)
9248 {
9249 uint32_t dbgf = 0;
9250 errno_t error;
9251 ip_packet_info info;
9252 u_int mac_hlen = sizeof(struct ether_header);
9253
9254 if (host_filter) {
9255 dbgf |= BR_DBGF_HOSTFILTER;
9256 }
9257 if (checksum) {
9258 dbgf |= BR_DBGF_CHECKSUM;
9259 }
9260 /* get the IP protocol header */
9261 error = bridge_get_ip_proto(&m, mac_hlen, is_ipv4, &info,
9262 &bif->bif_stats.brms_in_ip);
9263 if (error != 0) {
9264 BRIDGE_LOG(LOG_NOTICE, dbgf,
9265 "%s(%s) bridge_get_ip_proto failed %d",
9266 bridge_ifp->if_xname,
9267 bif->bif_ifp->if_xname, error);
9268 goto drop;
9269 }
9270 if (host_filter) {
9271 bool drop = true;
9272
9273 /* restrict IP protocols */
9274 switch (info.ip_proto) {
9275 case IPPROTO_ICMP:
9276 case IPPROTO_IGMP:
9277 drop = !is_ipv4;
9278 break;
9279 case IPPROTO_TCP:
9280 case IPPROTO_UDP:
9281 drop = false;
9282 break;
9283 case IPPROTO_ICMPV6:
9284 drop = is_ipv4;
9285 break;
9286 default:
9287 break;
9288 }
9289 if (drop) {
9290 BRIDGE_HF_DROP(brhf_ip_bad_proto, __func__, __LINE__);
9291 goto drop;
9292 }
9293 bridge_hostfilter_stats.brhf_ip_ok += 1;
9294 }
9295 if (checksum) {
9296 /* need to compute IP/UDP/TCP/checksums */
9297 error = bridge_offload_checksum(&m, &info, &bif->bif_stats);
9298 if (error != 0) {
9299 BRIDGE_LOG(LOG_NOTICE, dbgf,
9300 "%s(%s) bridge_offload_checksum failed %d",
9301 bridge_ifp->if_xname,
9302 bif->bif_ifp->if_xname, error);
9303 goto drop;
9304 }
9305 }
9306 return m;
9307
9308 drop:
9309 /* toss the packet */
9310 if (m != NULL) {
9311 if (host_filter &&
9312 BRIDGE_DBGF_ENABLED(BR_DBGF_HOSTFILTER)) {
9313 brlog_mbuf_data(m, 0,
9314 sizeof(struct ether_header) +
9315 sizeof(struct ip));
9316 }
9317 m_freem(m);
9318 m = NULL;
9319 }
9320 return NULL;
9321 }
9322
9323 static mblist
9324 bridge_filter_checksum_list(ifnet_t bridge_ifp, struct bridge_iflist * bif,
9325 mbuf_t in_list, ether_type_flag_t etypef, bool host_filter, bool checksum)
9326 {
9327 bool is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
9328 mbuf_t next_packet = NULL;
9329 mblist ret;
9330
9331 mblist_init(&ret);
9332 for (mbuf_t scan = in_list; scan != NULL; scan = next_packet) {
9333 /* take packet out of the list */
9334 next_packet = scan->m_nextpkt;
9335 scan->m_nextpkt = NULL;
9336 scan = bridge_filter_checksum(bridge_ifp, bif,
9337 scan, is_ipv4, host_filter, checksum);
9338 if (scan != NULL) {
9339 /* add packet to the list */
9340 mblist_append(&ret, scan);
9341 }
9342 }
9343 return ret;
9344 }
9345
9346 static mbuf_t
9347 bridge_checksum_offload_list(ifnet_t bridge_ifp, struct bridge_iflist * bif,
9348 mbuf_t m, bool is_ipv4)
9349 {
9350 mblist ret;
9351 mbuf_t next_packet;
9352
9353 mblist_init(&ret);
9354 for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
9355 uint32_t csum_flags;
9356
9357 /* take it out of the list */
9358 next_packet = scan->m_nextpkt;
9359 scan->m_nextpkt = NULL;
9360
9361 csum_flags = scan->m_pkthdr.csum_flags;
9362 if ((csum_flags & checksum_request_flags) != 0) {
9363 /* compute the checksum now */
9364 scan = bridge_filter_checksum(bridge_ifp, bif, scan,
9365 is_ipv4, false, true);
9366 if (scan != NULL) {
9367 /* clear offload now */
9368 scan->m_pkthdr.csum_flags &= csum_flags;
9369 }
9370 }
9371 if (scan != NULL) {
9372 mblist_append(&ret, scan);
9373 }
9374 }
9375 return ret.head;
9376 }
9377
9378 static mbuf_t
9379 copy_broadcast_packet(mbuf_t m)
9380 {
9381 mbuf_t mc;
9382
9383 /* make a copy of the packet */
9384 mc = m_dup(m, M_DONTWAIT);
9385 if (mc != NULL) {
9386 struct ether_header *eh;
9387
9388 /* make copy look like it is broadcast */
9389 mc->m_flags |= M_BCAST;
9390 eh = mtod(mc, struct ether_header *);
9391 bcopy(etherbroadcastaddr, eh->ether_dhost, ETHER_ADDR_LEN);
9392 }
9393 return mc;
9394 }
9395
9396 static mblist
9397 bridge_find_broadcast_ipv4(mbuf_t in_list, mbuf_t * ip_bcast_head)
9398 {
9399 mblist ip_bcast;
9400 mbuf_t next_packet = NULL;
9401 mblist ret;
9402
9403 mblist_init(&ret);
9404 mblist_init(&ip_bcast);
9405 for (mbuf_ref_t scan = in_list; scan != NULL; scan = next_packet) {
9406 mbuf_t bcast_pkt = NULL;
9407 uint8_t *header;
9408
9409 /* take packet out of the list */
9410 next_packet = scan->m_nextpkt;
9411 scan->m_nextpkt = NULL;
9412
9413 header = get_ether_ip_header_ptr(&scan, FALSE);
9414 if (header != NULL) {
9415 struct in_addr dst;
9416 struct ip *iphdr;
9417
9418 iphdr = (struct ip *)(header + sizeof(struct ether_header));
9419 bcopy(&iphdr->ip_dst, &dst, sizeof(dst));
9420 if (dst.s_addr == INADDR_BROADCAST) {
9421 bcast_pkt = copy_broadcast_packet(scan);
9422 }
9423 }
9424 if (bcast_pkt != NULL) {
9425 /* add packet to broadcast list */
9426 mblist_append(&ip_bcast, bcast_pkt);
9427 }
9428 if (scan != NULL) {
9429 /* add packet back into the list */
9430 mblist_append(&ret, scan);
9431 }
9432 }
9433 *ip_bcast_head = ip_bcast.head;
9434 return ret;
9435 }
9436
9437 static ifnet_t
9438 bridge_find_member(struct bridge_softc * sc, uint8_t * lladdr,
9439 struct bridge_iflist * sbif)
9440 {
9441 struct bridge_iflist * bif;
9442
9443 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
9444 if (bif == sbif) {
9445 /* skip the input member */
9446 continue;
9447 }
9448 if (_ether_cmp(IF_LLADDR(bif->bif_ifp), lladdr) == 0) {
9449 return bif->bif_ifp;
9450 }
9451 }
9452 return NULL;
9453 }
9454
9455
9456 /*
9457 * Function: bridge_input_list
9458 *
9459 * Purpose:
9460 * Process a list of input packets through the bridge.
9461 * The caller ensures that all of the packets in the list
9462 * `list_head` .. `list_tail` have the same ethernet header.
9463 *
9464 * Returns:
9465 * Non-NULL head of the chain of packets that were not consumed/freed,
9466 * *tail_p set to the tail of that chain.
9467 *
9468 * NULL if all of the packets were consumed.
9469 */
9470 static mblist
9471 bridge_input_list(struct bridge_softc * sc, ifnet_t ifp,
9472 struct ether_header * eh_in_p, mblist list, bool is_promisc)
9473 {
9474 struct bridge_iflist * bif;
9475 ifnet_t bridge_ifp;
9476 bool checksum_offload;
9477 uint8_t * dhost;
9478 #if BRIDGESTP
9479 bool discarding = false;
9480 #endif /* BRIDGESTP */
9481 ifnet_t dst_if = NULL;
9482 errno_t error;
9483 ether_type_flag_t etypef;
9484 bool host_filter;
9485 bool host_filter_drop = false;
9486 mbuf_ref_t ip_bcast = NULL;
9487 bool is_bridge_mac = false;
9488 bool is_broadcast;
9489 bool is_ifp_mac;
9490 ifnet_t member_input = NULL;
9491 uint8_t * shost;
9492 bool uses_virtio = false;
9493 uint16_t vlan;
9494
9495 if (ifp->if_bridge == NULL) {
9496 /* no longer part of bridge */
9497 goto done;
9498 }
9499 bridge_ifp = sc->sc_ifp;
9500 is_broadcast = IS_BCAST_MCAST(list.head);
9501 is_ifp_mac = (!is_broadcast && !is_promisc);
9502 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9503 "%s from %s count %d head 0x%llx.0x%llx tail 0x%llx.0x%llx",
9504 bridge_ifp->if_xname, ifp->if_xname, list.count,
9505 (uint64_t)VM_KERNEL_ADDRPERM(list.head),
9506 (uint64_t)VM_KERNEL_ADDRPERM(mtod(list.head, void *)),
9507 (uint64_t)VM_KERNEL_ADDRPERM(list.tail),
9508 (uint64_t)VM_KERNEL_ADDRPERM(mtod(list.tail, void *)));
9509
9510 /* assume we'll return all packets */
9511 if ((bridge_ifp->if_flags & IFF_RUNNING) == 0) {
9512 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9513 "%s not running passing along",
9514 bridge_ifp->if_xname);
9515 goto done;
9516 }
9517
9518 vlan = VLANTAGOF(m);
9519
9520 /* lookup the bridge member */
9521 BRIDGE_LOCK(sc);
9522 bif = bridge_lookup_member_if(sc, ifp);
9523 if (bif == NULL) {
9524 BRIDGE_UNLOCK(sc);
9525 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9526 "%s bridge_lookup_member_if failed",
9527 bridge_ifp->if_xname);
9528 goto done;
9529 }
9530
9531 uses_virtio = bif_uses_virtio(bif);
9532
9533 /*
9534 * host filter drops packets that:
9535 * - are not ARP, IPv4, or IPv6
9536 * - have incorrect source MAC address
9537 */
9538 host_filter = (bif->bif_flags & BIFF_HOST_FILTER) != 0;
9539 etypef = ether_type_flag_get(eh_in_p->ether_type);
9540 if (host_filter
9541 && (etypef & ETHER_TYPE_FLAG_IP_ARP) == 0) {
9542 /* ether type not one of ARP, IPv4, or IPv6 */
9543 BRIDGE_HF_DROP(brhf_bad_ether_type, __func__, __LINE__);
9544 host_filter_drop = true;
9545 } else if ((bif->bif_flags & BIFF_HF_HWSRC) != 0 &&
9546 bcmp(eh_in_p->ether_shost, bif->bif_hf_hwsrc, ETHER_ADDR_LEN)
9547 != 0) {
9548 /* only allow the single source MAC address */
9549 BRIDGE_HF_DROP(brhf_bad_ether_srchw_addr,
9550 __func__, __LINE__);
9551 host_filter_drop = true;
9552 }
9553 if (host_filter_drop) {
9554 BRIDGE_UNLOCK(sc);
9555 m_freem_list(list.head);
9556 list.head = list.tail = NULL;
9557 goto done;
9558 }
9559
9560 #if BRIDGESTP
9561 discarding = (bif->bif_ifflags & IFBIF_STP) != 0 &&
9562 bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING;
9563 #endif /* BRIDGESTP */
9564
9565 dhost = eh_in_p->ether_dhost;
9566 shost = eh_in_p->ether_shost;
9567 /*
9568 * Reserved multicast address listed in 802.1D section 7.12.6
9569 * must not be forwarded by the bridge.
9570 * This is currently 01-80-C2-00-00-00 to 01-80-C2-00-00-0F
9571 */
9572 if (is_broadcast) {
9573 if (IS_MCAST(list.head)) {
9574 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MCAST,
9575 " multicast: "
9576 "%02x:%02x:%02x:%02x:%02x:%02x",
9577 dhost[0], dhost[1],
9578 dhost[2], dhost[3],
9579 dhost[4], dhost[5]);
9580 }
9581 if (bcmp(dhost, bstp_etheraddr, (ETHER_ADDR_LEN - 1)) == 0) {
9582 if (dhost[5] == BSTP_ETHERADDR_RANGE_FIRST) {
9583 /* multicast for spanning tree */
9584 #if BRIDGESTP
9585 bridge_bstp_input_list(&bif->bif_stp, list.head);
9586 #else /* BRIDGESTP */
9587 m_freem_list(list.head);
9588 #endif /* BRIDGESTP */
9589 list.head = list.tail = NULL;
9590 BRIDGE_UNLOCK(sc);
9591 goto done;
9592 }
9593 if (dhost[5] <= BSTP_ETHERADDR_RANGE_LAST) {
9594 /* allow packet to continue up the stack */
9595 BRIDGE_UNLOCK(sc);
9596 goto done;
9597 }
9598 }
9599 /* broadcast to all members */
9600 os_atomic_add(&bridge_ifp->if_imcasts, list.count, relaxed);
9601 }
9602
9603 #if BRIDGESTP
9604 if (discarding) {
9605 BRIDGE_UNLOCK(sc);
9606 goto done;
9607 }
9608 #endif /* BRIDGESTP */
9609
9610 /* If the interface is learning, record the address. */
9611 if ((bif->bif_ifflags & IFBIF_LEARNING) != 0) {
9612 error = bridge_rtupdate(sc, shost, vlan, bif, 0, IFBAF_DYNAMIC);
9613 /*
9614 * If the interface has addresses limits then deny any source
9615 * that is not in the cache.
9616 */
9617 if (error != 0 && bif->bif_addrmax) {
9618 BRIDGE_UNLOCK(sc);
9619 goto done;
9620 }
9621 }
9622 #if BRIDGESTP
9623 if ((bif->bif_ifflags & IFBIF_STP) != 0 &&
9624 bif->bif_stp.bp_state == BSTP_IFSTATE_LEARNING) {
9625 BRIDGE_UNLOCK(sc);
9626 goto done;
9627 }
9628 #endif /* BRIDGESTP */
9629
9630 /*
9631 * If the packet is not IP, let the host filter drop ARP packets.
9632 * Otherwise, if the host filter is enabled or we need to compute
9633 * checksums, do that.
9634 * Otherwise, if MAC-NAT is enabled and this is an IPv4 packet,
9635 * check for IPv4 broadcast packets. Accumulate those in a separate
9636 * list `ip_bcast`.
9637 */
9638 checksum_offload = bif_has_checksum_offload(bif);
9639 if (!ether_type_flag_is_ip(etypef)) {
9640 /* host filter process ARP */
9641 if (host_filter) {
9642 /* host filter check earlier means this must be ARP */
9643 VERIFY(etypef == ETHER_TYPE_FLAG_ARP);
9644 list = bridge_filter_arp_list(bif, list.head);
9645 if (list.head == NULL) {
9646 VERIFY(list.tail == NULL);
9647 BRIDGE_UNLOCK(sc);
9648 goto done;
9649 }
9650 }
9651 } else if (host_filter || checksum_offload) {
9652 /* host filter and/or checksum */
9653 list = bridge_filter_checksum_list(bridge_ifp, bif,
9654 list.head, etypef, host_filter, checksum_offload);
9655 if (list.head == NULL) {
9656 VERIFY(list.tail == NULL);
9657 BRIDGE_UNLOCK(sc);
9658 goto done;
9659 }
9660 } else if (is_ifp_mac && bif == sc->sc_mac_nat_bif &&
9661 etypef == ETHER_TYPE_FLAG_IPV4) {
9662 /* look for broadcast IPv4 packet */
9663 list = bridge_find_broadcast_ipv4(list.head, &ip_bcast);
9664 if (list.head == NULL && ip_bcast == NULL) {
9665 /* all packets were consumed */
9666 BRIDGE_UNLOCK(sc);
9667 goto done;
9668 }
9669 }
9670
9671 /*
9672 * If the bridge has an address assigned, and the destination MAC
9673 * matches the bridge interface, claim the packets for the bridge
9674 * interface.
9675 */
9676 if ((sc->sc_flags & SCF_ADDRESS_ASSIGNED) != 0 &&
9677 !is_broadcast && _ether_cmp(dhost, IF_LLADDR(bridge_ifp)) == 0) {
9678 is_bridge_mac = true;
9679 }
9680 if (is_ifp_mac) {
9681 /* unicast to the interface */
9682 if (sc->sc_mac_nat_bif == bif) {
9683 mbuf_ref_t forward = NULL;
9684
9685 if (list.head != NULL) {
9686 /* handle MAC-NAT if enabled */
9687 list = bridge_mac_nat_input_list(sc, ifp,
9688 list.head, &forward);
9689 }
9690 if (ip_bcast != NULL) {
9691 /* forward to all members except this one */
9692 /* bridge_broadcast_list unlocks */
9693 bridge_broadcast_list(sc, bif, etypef,
9694 ip_bcast);
9695 } else {
9696 BRIDGE_UNLOCK(sc);
9697 }
9698 if (forward != NULL) {
9699 bridge_mac_nat_forward_list(bridge_ifp, etypef,
9700 forward);
9701 }
9702 } else {
9703 BRIDGE_UNLOCK(sc);
9704 }
9705 /* unicast packets for this interface do not get forwarded */
9706 goto done;
9707 }
9708 if (is_bridge_mac || list.head == NULL) {
9709 BRIDGE_UNLOCK(sc);
9710 goto done;
9711 }
9712 if (!is_broadcast) {
9713 /* find where to send the packet */
9714 dst_if = bridge_rtlookup(sc, dhost, vlan);
9715 if (ifp == dst_if) {
9716 /* nothing to forward */
9717 BRIDGE_UNLOCK(sc);
9718 goto done;
9719 }
9720 if (dst_if == NULL) {
9721 /* if a member is the dhost, deliver as input */
9722 member_input = bridge_find_member(sc, dhost, bif);
9723 if (member_input != NULL) {
9724 /* grab packets destined to member */
9725 BRIDGE_UNLOCK(sc);
9726 goto done;
9727 }
9728 /* if a member is shost, there's a loop, drop it */
9729 if (bridge_find_member(sc, shost, bif) != NULL) {
9730 BRIDGE_UNLOCK(sc);
9731 m_freem_list(list.head);
9732 list.head = list.tail = NULL;
9733 goto done;
9734 }
9735 }
9736 }
9737 if (dst_if == NULL) {
9738 mbuf_t m;
9739
9740 m = copy_packet_list(list.head);
9741 if (m != NULL) {
9742 /* bridge_broadcast_list unlocks */
9743 bridge_broadcast_list(sc, bif, etypef, m);
9744 } else {
9745 BRIDGE_UNLOCK(sc);
9746 }
9747 } else {
9748 /* bridge_forward_list() consumes list and unlocks */
9749 bridge_forward_list(sc, bif, dst_if, etypef, list.head);
9750 list.head = list.tail = NULL;
9751 }
9752
9753 done:
9754 if (list.head != NULL) {
9755 if (member_input != NULL) {
9756 /* member gets the packets */
9757 inject_input_packet_list(member_input, list.head, true);
9758 list.head = list.tail = NULL;
9759 } else if (is_bridge_mac) {
9760 /* bridge consumes all the unicast packets */
9761 bridge_interface_input_list(bridge_ifp, etypef, list,
9762 uses_virtio);
9763 list.head = list.tail = NULL;
9764 } else {
9765 adjust_input_packet_list(list.head);
9766 }
9767 }
9768 return list;
9769 }
9770
9771 static inline void
9772 update_mbuf_flags(struct ifnet * ifp, mbuf_t m, struct ether_header * eh)
9773 {
9774 /* duplicate some of the work done in ether_demux */
9775 if ((eh->ether_dhost[0] & 1) == 0) {
9776 if (_ether_cmp(eh->ether_dhost, IF_LLADDR(ifp)) != 0) {
9777 m->m_flags |= M_PROMISC;
9778 }
9779 } else {
9780 /* Check for broadcast */
9781 if (_ether_cmp(etherbroadcastaddr, eh->ether_dhost) == 0) {
9782 m->m_flags |= M_BCAST;
9783 } else {
9784 m->m_flags |= M_MCAST;
9785 }
9786 }
9787 if (m->m_flags & M_HASFCS) {
9788 /*
9789 * If the M_HASFCS is set by the driver we want to make sure
9790 * that we strip off the trailing FCS data before handing it
9791 * up the stack.
9792 */
9793 m_adj(m, -ETHER_CRC_LEN);
9794 m->m_flags &= ~M_HASFCS;
9795 }
9796 return;
9797 }
9798
9799 static mbuf_t
9800 bridge_pf_list(mbuf_t m, ifnet_t ifp, uint32_t sc_filter_flags, bool input)
9801 {
9802 mbuf_t next_packet = NULL;
9803 mblist ret;
9804
9805 mblist_init(&ret);
9806 for (mbuf_ref_t scan = m; scan != NULL; scan = next_packet) {
9807 next_packet = scan->m_nextpkt;
9808
9809 /* remove packet from list, and pass through PF */
9810 scan->m_nextpkt = NULL;
9811 MBUF_INPUT_CHECK(scan, ifp);
9812 bridge_pf(&scan, ifp, sc_filter_flags, input);
9813 if (scan != NULL) {
9814 /* add packet back to the list */
9815 mblist_append(&ret, scan);
9816 }
9817 }
9818 return ret.head;
9819 }
9820
9821 static inline bool
9822 bridge_check_frame_header(struct bridge_softc * sc, ifnet_t ifp, mbuf_t m)
9823 {
9824 bool included = false;
9825 char * __single header;
9826 size_t header_length = 0;
9827
9828 header = m->m_pkthdr.pkt_hdr;
9829 if (header >= (char *)mbuf_datastart(m) &&
9830 header <= mtod(m, char *)) {
9831 header_length = mtod(m, char *) - header;
9832 if (header_length >= ETHER_HDR_LEN) {
9833 included = true;
9834 }
9835 }
9836 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9837 "%s from %s m 0x%llx data 0x%llx frame 0x%llx %s "
9838 "header length %lu", sc->sc_ifp->if_xname,
9839 ifp->if_xname, (uint64_t)VM_KERNEL_ADDRPERM(m),
9840 (uint64_t)VM_KERNEL_ADDRPERM(mtod(m, void *)),
9841 (uint64_t)VM_KERNEL_ADDRPERM(header),
9842 included ? "inside" : "outside", header_length);
9843 if (!included) {
9844 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9845 "%s: frame_header outside mbuf", ifp->if_xname);
9846 }
9847 return included;
9848 }
9849
9850
9851 mbuf_t
9852 bridge_early_input(struct ifnet *ifp, mbuf_t in_list, u_int32_t cnt)
9853 {
9854 struct ether_header eh;
9855 mblist list;
9856 volatile bool list_is_promisc;
9857 int n_lists = 0;
9858 mbuf_t next_packet = NULL;
9859 mblist ret;
9860 struct bridge_softc * __single sc = ifp->if_bridge;
9861 uint32_t sc_filter_flags;
9862
9863 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT_LIST,
9864 "(%s): count %u", ifp->if_xname, cnt);
9865
9866 /* run packet list through PF first */
9867 sc_filter_flags = sc->sc_filter_flags;
9868 if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
9869 in_list = bridge_pf_list(in_list, ifp, sc_filter_flags, true);
9870 }
9871
9872 /* form sublists with the same ethernet header */
9873 mblist_init(&list);
9874 mblist_init(&ret);
9875 for (mbuf_t scan = in_list; scan != NULL; scan = next_packet) {
9876 struct ether_header * eh_p;
9877 volatile bool is_promisc;
9878 mblist resid;
9879
9880 /* take it out of the list */
9881 next_packet = scan->m_nextpkt;
9882 scan->m_nextpkt = NULL;
9883
9884 /* don't loop the packet */
9885 if ((scan->m_flags & M_PROTO1) != 0) {
9886 mblist_append(&ret, scan);
9887 continue;
9888 }
9889 /* Check if this mbuf looks valid */
9890 MBUF_INPUT_CHECK(scan, ifp);
9891
9892 /* if the frame header isn't in the first mbuf, ignore */
9893 if (!bridge_check_frame_header(sc, ifp, scan)) {
9894 mblist_append(&ret, scan);
9895 continue;
9896 }
9897 eh_p = __unsafe_forge_single(struct ether_header *,
9898 scan->m_pkthdr.pkt_hdr);
9899 update_mbuf_flags(ifp, scan, eh_p);
9900
9901 /* set start back to include ether header */
9902 _mbuf_adjust_pkthdr_and_data(scan, -ETHER_HDR_LEN);
9903
9904 is_promisc = get_and_clear_promisc(scan);
9905 if (list.head == NULL) {
9906 /* start a new list */
9907 mblist_append(&list, scan);
9908 bcopy(eh_p, &eh, sizeof(eh));
9909 list_is_promisc = is_promisc;
9910 } else if (bcmp(eh_p, &eh, sizeof(eh)) != 0) {
9911 n_lists++;
9912 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT_LIST,
9913 "(%s): sublist %u pkts %u",
9914 ifp->if_xname, n_lists, list.count);
9915 if (BRIDGE_DBGF_ENABLED(BR_DBGF_INPUT_LIST)) {
9916 brlog_ether_header(&eh);
9917 }
9918 resid = bridge_input_list(sc, ifp, &eh, list,
9919 list_is_promisc);
9920 if (resid.head != NULL) {
9921 /* add to the packets to be returned */
9922 mblist_append_list(&ret, resid);
9923 }
9924 /* start new list */
9925 mblist_init(&list);
9926 mblist_append(&list, scan);
9927 list_is_promisc = is_promisc;
9928 bcopy(eh_p, &eh, sizeof(eh));
9929 } else {
9930 mblist_append(&list, scan);
9931 VERIFY(is_promisc == list_is_promisc);
9932 }
9933 if (next_packet == NULL) {
9934 /* last list */
9935 n_lists++;
9936 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT_LIST,
9937 "(%s): sublist %u pkts %u",
9938 ifp->if_xname, n_lists, list.count);
9939 if (BRIDGE_DBGF_ENABLED(BR_DBGF_INPUT_LIST)) {
9940 brlog_ether_header(&eh);
9941 }
9942 resid = bridge_input_list(sc, ifp, &eh, list,
9943 list_is_promisc);
9944 if (resid.head != NULL) {
9945 /* add to the packets to be returned */
9946 mblist_append_list(&ret, resid);
9947 }
9948 }
9949 }
9950 return ret.head;
9951 }
9952
9953 /*
9954 * Copyright (C) 2014, Stefano Garzarella - Universita` di Pisa.
9955 * All rights reserved.
9956 *
9957 * Redistribution and use in source and binary forms, with or without
9958 * modification, are permitted provided that the following conditions
9959 * are met:
9960 * 1. Redistributions of source code must retain the above copyright
9961 * notice, this list of conditions and the following disclaimer.
9962 * 2. Redistributions in binary form must reproduce the above copyright
9963 * notice, this list of conditions and the following disclaimer in the
9964 * documentation and/or other materials provided with the distribution.
9965 *
9966 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
9967 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
9968 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
9969 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
9970 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
9971 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
9972 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
9973 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
9974 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
9975 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
9976 * SUCH DAMAGE.
9977 */
9978
9979 /*
9980 * XXX-ste: Maybe this function must be moved into kern/uipc_mbuf.c
9981 *
9982 * Create a queue of packets/segments which fit the given mss + hdr_len.
9983 * m0 points to mbuf chain to be segmented.
9984 * This function splits the payload (m0-> m_pkthdr.len - hdr_len)
9985 * into segments of length MSS bytes and then copy the first hdr_len bytes
9986 * from m0 at the top of each segment.
9987 * If hdr2_buf is not NULL (hdr2_len is the buf length), it is copied
9988 * in each segment after the first hdr_len bytes
9989 *
9990 * Return the new queue with the segments on success, NULL on failure.
9991 * (the mbuf queue is freed in this case).
9992 */
9993
9994 static mblist
9995 m_seg(struct mbuf *m0, int hdr_len, int mss, char * hdr2_buf __sized_by_or_null(hdr2_len), int hdr2_len)
9996 {
9997 int off = 0, n, firstlen;
9998 struct mbuf *mseg;
9999 int total_len = m0->m_pkthdr.len;
10000 mblist ret;
10001
10002 mblist_init(&ret);
10003 mblist_append(&ret, m0);
10004
10005 /*
10006 * Segmentation useless
10007 */
10008 if (total_len <= hdr_len + mss) {
10009 n = 1;
10010 goto done;
10011 }
10012
10013 if (hdr2_buf == NULL || hdr2_len <= 0) {
10014 hdr2_buf = NULL;
10015 hdr2_len = 0;
10016 }
10017
10018 off = hdr_len + mss;
10019 firstlen = mss; /* first segment stored in the original mbuf */
10020 ret.bytes = off;
10021 for (n = 1; off < total_len; off += mss, n++) {
10022 struct mbuf *m;
10023 /*
10024 * Copy the header from the original packet
10025 * and create a new mbuf chain
10026 */
10027 if (MHLEN < hdr_len) {
10028 m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
10029 } else {
10030 m = m_gethdr(M_NOWAIT, MT_DATA);
10031 }
10032
10033 if (m == NULL) {
10034 #ifdef GSO_DEBUG
10035 D("MGETHDR error\n");
10036 #endif
10037 goto err;
10038 }
10039
10040 m_copydata(m0, 0, hdr_len, mtod(m, caddr_t));
10041
10042 m->m_len = hdr_len;
10043 /*
10044 * if the optional header is present, copy it
10045 */
10046 if (hdr2_buf != NULL) {
10047 m_copyback(m, hdr_len, hdr2_len, hdr2_buf);
10048 }
10049
10050 m->m_flags |= (m0->m_flags & M_COPYFLAGS);
10051 if (off + mss >= total_len) { /* last segment */
10052 mss = total_len - off;
10053 }
10054 /*
10055 * Copy the payload from original packet
10056 */
10057 mseg = m_copym(m0, off, mss, M_NOWAIT);
10058 if (mseg == NULL) {
10059 m_freem(m);
10060 #ifdef GSO_DEBUG
10061 D("m_copym error\n");
10062 #endif
10063 goto err;
10064 }
10065 m_cat(m, mseg);
10066
10067 m->m_pkthdr.len = hdr_len + hdr2_len + mss;
10068 m->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
10069 /*
10070 * Copy the checksum flags and data (in_cksum() need this)
10071 */
10072 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
10073 m->m_pkthdr.csum_data = m0->m_pkthdr.csum_data;
10074 m->m_pkthdr.tso_segsz = m0->m_pkthdr.tso_segsz;
10075
10076 mblist_append(&ret, m);
10077 }
10078
10079 /*
10080 * Update first segment.
10081 * If the optional header is present, is necessary
10082 * to insert it into the first segment.
10083 */
10084 if (hdr2_buf == NULL) {
10085 m_adj(m0, hdr_len + firstlen - total_len);
10086 m0->m_pkthdr.len = hdr_len + firstlen;
10087 } else {
10088 mseg = m_copym(m0, hdr_len, firstlen, M_NOWAIT);
10089 if (mseg == NULL) {
10090 #ifdef GSO_DEBUG
10091 D("m_copym error\n");
10092 #endif
10093 goto err;
10094 }
10095 m_adj(m0, hdr_len - total_len);
10096 m_copyback(m0, hdr_len, hdr2_len, hdr2_buf);
10097 m_cat(m0, mseg);
10098 m0->m_pkthdr.len = hdr_len + hdr2_len + firstlen;
10099 }
10100
10101 done:
10102 return ret;
10103
10104 err:
10105 if (ret.head != NULL) {
10106 m_freem_list(ret.head);
10107 mblist_init(&ret);
10108 }
10109 return ret;
10110 }
10111
10112 /*
10113 * Wrappers of IPv4 checksum functions
10114 */
10115 static inline void
10116 gso_ipv4_data_cksum(struct mbuf *m, struct ip *ip, int mac_hlen)
10117 {
10118 m->m_data += mac_hlen;
10119 m->m_len -= mac_hlen;
10120 m->m_pkthdr.len -= mac_hlen;
10121 #if __FreeBSD_version < 1000000
10122 ip->ip_len = ntohs(ip->ip_len); /* needed for in_delayed_cksum() */
10123 #endif
10124
10125 in_delayed_cksum(m);
10126
10127 #if __FreeBSD_version < 1000000
10128 ip->ip_len = htons(ip->ip_len);
10129 #endif
10130 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
10131 m->m_len += mac_hlen;
10132 m->m_pkthdr.len += mac_hlen;
10133 m->m_data -= mac_hlen;
10134 }
10135
10136 static inline void
10137 gso_ipv4_hdr_cksum(struct mbuf *m, struct ip *ip, int mac_hlen, int ip_hlen)
10138 {
10139 m->m_data += mac_hlen;
10140
10141 ip->ip_sum = in_cksum(m, ip_hlen);
10142
10143 m->m_pkthdr.csum_flags &= ~CSUM_IP;
10144 m->m_data -= mac_hlen;
10145 }
10146
10147 /*
10148 * Structure that contains the state during the TCP segmentation
10149 */
10150 struct gso_ip_tcp_state {
10151 void (*update)
10152 (struct gso_ip_tcp_state*, struct mbuf*);
10153 void (*internal)
10154 (struct gso_ip_tcp_state*, struct mbuf*);
10155 u_int ip_m0_len;
10156 uint8_t * __counted_by(ip_m0_len) hdr;
10157 struct tcphdr *tcp;
10158 int mac_hlen;
10159 int ip_hlen;
10160 int tcp_hlen;
10161 int hlen;
10162 int pay_len;
10163 int sw_csum;
10164 uint32_t tcp_seq;
10165 uint16_t ip_id;
10166 boolean_t is_tx;
10167 };
10168
10169 /*
10170 * Update the pointers to TCP and IPv4 headers
10171 */
10172 static inline void
10173 gso_ipv4_tcp_update(struct gso_ip_tcp_state *state, struct mbuf *m)
10174 {
10175 state->hdr = mtodo(m, state->mac_hlen);
10176 state->ip_m0_len = m->m_len - state->mac_hlen;
10177 state->ip_hlen = state->ip_hlen;
10178 state->tcp = (struct tcphdr *)(state->hdr + state->ip_hlen);
10179 state->pay_len = m->m_pkthdr.len - state->hlen;
10180 }
10181
10182 /*
10183 * Set properly the TCP and IPv4 headers
10184 */
10185 static inline void
10186 gso_ipv4_tcp_internal(struct gso_ip_tcp_state *state, struct mbuf *m)
10187 {
10188 struct ip *ip;
10189 /*
10190 * Update IP header
10191 */
10192 ip = (struct ip *)state->hdr;
10193 ip->ip_id = htons((state->ip_id)++);
10194 ip->ip_len = htons(m->m_pkthdr.len - state->mac_hlen);
10195 /*
10196 * TCP Checksum
10197 */
10198 state->tcp->th_sum = 0;
10199 state->tcp->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
10200 htons(state->tcp_hlen + IPPROTO_TCP + state->pay_len));
10201 /*
10202 * Checksum HW not supported (TCP)
10203 */
10204 if (state->sw_csum & CSUM_DELAY_DATA) {
10205 gso_ipv4_data_cksum(m, ip, state->mac_hlen);
10206 }
10207
10208 state->tcp_seq += state->pay_len;
10209 /*
10210 * IP Checksum
10211 */
10212 ip->ip_sum = 0;
10213 /*
10214 * Checksum HW not supported (IP)
10215 */
10216 if (state->sw_csum & CSUM_IP) {
10217 gso_ipv4_hdr_cksum(m, ip, state->mac_hlen, state->ip_hlen);
10218 }
10219 }
10220
10221
10222 /*
10223 * Updates the pointers to TCP and IPv6 headers
10224 */
10225 static inline void
10226 gso_ipv6_tcp_update(struct gso_ip_tcp_state *state, struct mbuf *m)
10227 {
10228 state->hdr = mtodo(m, state->mac_hlen);
10229 state->ip_m0_len = m->m_len - state->mac_hlen;
10230 state->ip_hlen = state->ip_hlen;
10231 state->tcp = (struct tcphdr *)(state->hdr + state->ip_hlen);
10232 state->pay_len = m->m_pkthdr.len - state->hlen;
10233 }
10234
10235 /*
10236 * Sets properly the TCP and IPv6 headers
10237 */
10238 static inline void
10239 gso_ipv6_tcp_internal(struct gso_ip_tcp_state *state, struct mbuf *m)
10240 {
10241 struct ip6_hdr *ip6;
10242
10243 ip6 = (struct ip6_hdr *)state->hdr;
10244 ip6->ip6_plen = htons(m->m_pkthdr.len - state->mac_hlen - state->ip_hlen);
10245 /*
10246 * TCP Checksum
10247 */
10248 state->tcp->th_sum = 0;
10249 state->tcp->th_sum = in6_pseudo(&ip6->ip6_src, &ip6->ip6_dst,
10250 htonl(state->tcp_hlen + state->pay_len + IPPROTO_TCP));
10251 /*
10252 * Checksum HW not supported (TCP)
10253 */
10254 if (state->sw_csum & CSUM_DELAY_IPV6_DATA) {
10255 (void)in6_finalize_cksum(m, state->mac_hlen, -1, -1, state->sw_csum);
10256 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA;
10257 }
10258 state->tcp_seq += state->pay_len;
10259 }
10260
10261 /*
10262 * Init the state during the TCP segmentation
10263 */
10264 static void
10265 gso_ip_tcp_init_state(struct gso_ip_tcp_state *state, struct ifnet *ifp,
10266 bool is_ipv4, int mac_hlen, int ip_hlen,
10267 uint8_t *__counted_by(ip_m0_len) ip_hdr, u_int ip_m0_len,
10268 struct tcphdr * tcp_hdr)
10269 {
10270 #pragma unused(ifp)
10271
10272 state->hdr = ip_hdr;
10273 state->ip_m0_len = ip_m0_len;
10274 state->ip_hlen = ip_hlen;
10275 state->tcp = tcp_hdr;
10276 if (is_ipv4) {
10277 state->ip_id = ntohs(((struct ip *)state->hdr)->ip_id);
10278 state->update = gso_ipv4_tcp_update;
10279 state->internal = gso_ipv4_tcp_internal;
10280 state->sw_csum = CSUM_DELAY_DATA | CSUM_IP; /* XXX */
10281 } else {
10282 state->update = gso_ipv6_tcp_update;
10283 state->internal = gso_ipv6_tcp_internal;
10284 state->sw_csum = CSUM_DELAY_IPV6_DATA; /* XXX */
10285 }
10286 state->mac_hlen = mac_hlen;
10287 state->tcp_hlen = state->tcp->th_off << 2;
10288 state->hlen = mac_hlen + ip_hlen + state->tcp_hlen;
10289 state->tcp_seq = ntohl(state->tcp->th_seq);
10290 //state->sw_csum = m->m_pkthdr.csum_flags & ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
10291 return;
10292 }
10293
10294 /*
10295 * GSO on TCP/IP (v4 or v6)
10296 *
10297 * Segment the given mbuf and return the list of packets.
10298 *
10299 */
10300 static mblist
10301 gso_ip_tcp(ifnet_t ifp, mbuf_t m0, struct gso_ip_tcp_state *state, bool is_tx)
10302 {
10303 struct mbuf *m;
10304 int mss = 0;
10305 #ifdef GSO_STATS
10306 int total_len = m0->m_pkthdr.len;
10307 #endif /* GSO_STATS */
10308 mblist seg;
10309
10310 mss = _mbuf_get_tso_mss(m0);
10311 if (mss == 0 && !is_tx) {
10312 uint8_t seg_cnt = m0->m_pkthdr.rx_seg_cnt;
10313
10314 if (seg_cnt != 0) {
10315 uint32_t hdr_len;
10316 uint32_t len;
10317
10318 /* approximate the MSS using LRO seg cnt */
10319 hdr_len = state->ip_hlen + state->tcp_hlen;
10320 len = mbuf_pkthdr_len(m0) - hdr_len - ETHER_HDR_LEN;
10321 mss = len / seg_cnt;
10322 m0->m_pkthdr.rx_seg_cnt = 0;
10323 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
10324 "%s: mss %d = len %d / seg cnt %d",
10325 ifp->if_xname, mss, len, seg_cnt);
10326 }
10327 }
10328 if (mss == 0) {
10329 /* hack: we don't have the actual MSS */
10330 u_int reduce_mss;
10331
10332 reduce_mss = is_tx ? if_bridge_tso_reduce_mss_tx
10333 : if_bridge_tso_reduce_mss_forwarding;
10334 mss = ifp->if_mtu - state->ip_hlen - state->tcp_hlen -
10335 reduce_mss;
10336 assert(mss > 0);
10337 } else if (is_tx) {
10338 bool is_ipv4;
10339 bool do_tso = true;
10340
10341 if (TSO_IPV4_OK(ifp, m0)) {
10342 is_ipv4 = true;
10343 } else if (TSO_IPV6_OK(ifp, m0)) {
10344 is_ipv4 = false;
10345 } else {
10346 do_tso = false;
10347 }
10348 if (do_tso) { /* TSO with GSO */
10349 uint32_t if_tso_max;
10350
10351 if_tso_max = get_if_tso_mtu(ifp, is_ipv4);
10352 mss = if_tso_max - state->ip_hlen - state->tcp_hlen;
10353 }
10354 }
10355 seg = m_seg(m0, state->hlen, mss, 0, 0);
10356 if (seg.head == NULL || seg.head->m_nextpkt == NULL) {
10357 return seg;
10358 }
10359 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
10360 "%s %s mss %d nsegs %d",
10361 ifp->if_xname,
10362 is_tx ? "TX" : "RX",
10363 mss, seg.count);
10364 #ifdef GSO_STATS
10365 GSOSTAT_SET_MAX(tcp.gsos_max_mss, mss);
10366 GSOSTAT_SET_MIN(tcp.gsos_min_mss, mss);
10367 GSOSTAT_ADD(tcp.gsos_osegments, seg.count);
10368 #endif /* GSO_STATS */
10369
10370 /* first pkt */
10371 VERIFY(seg.head == m0);
10372 m = m0;
10373
10374 state->update(state, m);
10375
10376 do {
10377 state->tcp->th_flags &= ~(TH_FIN | TH_PUSH);
10378
10379 state->internal(state, m);
10380 m = m->m_nextpkt;
10381 state->update(state, m);
10382 state->tcp->th_flags &= ~TH_CWR;
10383 state->tcp->th_seq = htonl(state->tcp_seq);
10384 } while (m->m_nextpkt);
10385
10386 /* last pkt */
10387 state->internal(state, m);
10388
10389 #ifdef GSO_STATS
10390 if (!error) {
10391 GSOSTAT_INC(tcp.gsos_segmented);
10392 GSOSTAT_SET_MAX(tcp.gsos_maxsegmented, total_len);
10393 GSOSTAT_SET_MIN(tcp.gsos_minsegmented, total_len);
10394 GSOSTAT_ADD(tcp.gsos_totalbyteseg, total_len);
10395 }
10396 #endif /* GSO_STATS */
10397 return seg;
10398 }
10399
10400 /*
10401 * GSO for TCP/IPv[46]
10402 */
10403 static mblist
10404 gso_tcp_with_info(ifnet_t ifp, mbuf_t m, ip_packet_info_t info_p,
10405 u_int mac_hlen, bool is_ipv4, bool is_tx)
10406 {
10407 uint32_t csum_flags;
10408 struct gso_ip_tcp_state state;
10409 struct tcphdr *tcp;
10410
10411 assert(info_p->ip_proto_hdr != NULL);
10412 tcp = (struct tcphdr *)(void *)info_p->ip_proto_hdr;
10413 gso_ip_tcp_init_state(&state, ifp, is_ipv4, mac_hlen,
10414 info_p->ip_hlen + info_p->ip_opt_len,
10415 info_p->ip_hdr, info_p->ip_m0_len, tcp);
10416 csum_flags = is_ipv4 ? CSUM_DELAY_DATA : CSUM_DELAY_IPV6_DATA; /* XXX */
10417 m->m_pkthdr.csum_flags = csum_flags;
10418 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
10419 return gso_ip_tcp(ifp, m, &state, is_tx);
10420 }
10421
10422 static mblist
10423 gso_tcp(ifnet_t ifp, mbuf_t m, u_int mac_hlen, bool is_ipv4, bool is_tx)
10424 {
10425 int error;
10426 ip_packet_info info;
10427 struct bripstats stats; /* XXX ignored */
10428 mblist ret;
10429
10430 error = bridge_get_tcp_header(&m, mac_hlen, is_ipv4, &info, &stats);
10431 if (error != 0) {
10432 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
10433 "%s bridge_get_tcp_header failed %d (%s)",
10434 ifp->if_xname, error,
10435 is_tx ? "TX" : "RX");
10436 if (m != NULL) {
10437 m_freem(m);
10438 m = NULL;
10439 }
10440 goto no_segment;
10441 }
10442 if (info.ip_proto_hdr == NULL) {
10443 /* not actually a TCP packet, no segmentation */
10444 goto no_segment;
10445 }
10446 if (!is_tx && ip_packet_info_dst_is_our_ip(&info, ifp->if_index)) {
10447 goto no_segment;
10448 }
10449 return gso_tcp_with_info(ifp, m, &info, mac_hlen, is_ipv4, is_tx);
10450
10451 no_segment:
10452 mblist_init(&ret);
10453 if (m != NULL) {
10454 mblist_append(&ret, m);
10455 }
10456 return ret;
10457 }
10458