1 /*
2 * Copyright (c) 2004-2025 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /* $NetBSD: if_bridge.c,v 1.31 2005/06/01 19:45:34 jdc Exp $ */
30 /*
31 * Copyright 2001 Wasabi Systems, Inc.
32 * All rights reserved.
33 *
34 * Written by Jason R. Thorpe for Wasabi Systems, Inc.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed for the NetBSD Project by
47 * Wasabi Systems, Inc.
48 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
49 * or promote products derived from this software without specific prior
50 * written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
54 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
55 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
56 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
57 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
58 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
59 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
60 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
61 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
62 * POSSIBILITY OF SUCH DAMAGE.
63 */
64
65 /*
66 * Copyright (c) 1999, 2000 Jason L. Wright ([email protected])
67 * All rights reserved.
68 *
69 * Redistribution and use in source and binary forms, with or without
70 * modification, are permitted provided that the following conditions
71 * are met:
72 * 1. Redistributions of source code must retain the above copyright
73 * notice, this list of conditions and the following disclaimer.
74 * 2. Redistributions in binary form must reproduce the above copyright
75 * notice, this list of conditions and the following disclaimer in the
76 * documentation and/or other materials provided with the distribution.
77 *
78 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
79 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
80 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
81 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
82 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
83 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
84 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
86 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
87 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
88 * POSSIBILITY OF SUCH DAMAGE.
89 *
90 * OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp
91 */
92
93 /*
94 * Network interface bridge support.
95 *
96 * TODO:
97 *
98 * - Currently only supports Ethernet-like interfaces (Ethernet,
99 * 802.11, VLANs on Ethernet, etc.) Figure out a nice way
100 * to bridge other types of interfaces (FDDI-FDDI, and maybe
101 * consider heterogenous bridges).
102 *
103 * - GIF isn't handled due to the lack of IPPROTO_ETHERIP support.
104 */
105
106 #include <sys/cdefs.h>
107
108 #include <sys/param.h>
109 #include <sys/mbuf.h>
110 #include <sys/malloc.h>
111 #include <sys/protosw.h>
112 #include <sys/systm.h>
113 #include <sys/time.h>
114 #include <sys/socket.h> /* for net/if.h */
115 #include <sys/sockio.h>
116 #include <sys/kernel.h>
117 #include <sys/random.h>
118 #include <sys/syslog.h>
119 #include <sys/sysctl.h>
120 #include <sys/proc.h>
121 #include <sys/lock.h>
122 #include <sys/mcache.h>
123
124 #include <sys/kauth.h>
125
126 #include <kern/thread_call.h>
127
128 #include <libkern/libkern.h>
129
130 #include <kern/uipc_domain.h>
131 #include <kern/zalloc.h>
132
133 #if NBPFILTER > 0
134 #include <net/bpf.h>
135 #endif
136 #include <net/if.h>
137 #include <net/if_dl.h>
138 #include <net/if_types.h>
139 #include <net/if_var.h>
140 #include <net/if_media.h>
141 #include <net/net_api_stats.h>
142
143 #include <netinet/in.h> /* for struct arpcom */
144 #include <netinet/tcp.h> /* for struct tcphdr */
145 #include <netinet/in_systm.h>
146 #include <netinet/in_var.h>
147 #define _IP_VHL
148 #include <netinet/ip.h>
149 #include <netinet/ip_var.h>
150 #include <netinet/ip6.h>
151 #include <netinet6/ip6_var.h>
152 #include <netinet/if_ether.h> /* for struct arpcom */
153 #include <net/bridgestp.h>
154 #include <net/if_bridgevar.h>
155 #include <net/if_llc.h>
156 #if NVLAN > 0
157 #include <net/if_vlan_var.h>
158 #endif /* NVLAN > 0 */
159
160 #include <net/if_ether.h>
161 #include <net/dlil.h>
162 #include <net/kpi_interfacefilter.h>
163 #include <net/pfvar.h>
164
165 #include <net/route.h>
166 #include <net/droptap.h>
167 #include <dev/random/randomdev.h>
168
169 #include <netinet/bootp.h>
170 #include <netinet/dhcp.h>
171
172 #if SKYWALK
173 #include <skywalk/nexus/netif/nx_netif.h>
174 #endif /* SKYWALK */
175
176 #include <net/sockaddr_utils.h>
177 #include <net/mblist.h>
178
179 #include <os/log.h>
180
181 #define _TSO_CSUM (CSUM_TSO_IPV4 | CSUM_TSO_IPV6)
182
183 static struct in_addr inaddr_any = { .s_addr = INADDR_ANY };
184
185
186 #define __M_FLAGS_ARE_SET(m, flags) (((m)->m_flags & (flags)) != 0)
187 #define IS_BCAST(m) __M_FLAGS_ARE_SET(m, M_BCAST)
188 #define IS_MCAST(m) __M_FLAGS_ARE_SET(m, M_MCAST)
189 #define IS_BCAST_MCAST(m) __M_FLAGS_ARE_SET(m, M_BCAST | M_MCAST)
190
191 #define HTONS_ETHERTYPE_ARP htons(ETHERTYPE_ARP)
192 #define HTONS_ETHERTYPE_IP htons(ETHERTYPE_IP)
193 #define HTONS_ETHERTYPE_IPV6 htons(ETHERTYPE_IPV6)
194 #define HTONS_ARPHRD_ETHER htons(ARPHRD_ETHER)
195 #define HTONS_ARPOP_REQUEST htons(ARPOP_REQUEST)
196 #define HTONS_ARPOP_REPLY htons(ARPOP_REPLY)
197 #define HTONS_IPPORT_BOOTPC htons(IPPORT_BOOTPC)
198 #define HTONS_IPPORT_BOOTPS htons(IPPORT_BOOTPS)
199 #define HTONS_DHCP_FLAGS_BROADCAST htons(DHCP_FLAGS_BROADCAST)
200
201 /*
202 * if_bridge_debug, BR_DBGF_*
203 * - 'if_bridge_debug' is a bitmask of BR_DBGF_* flags that can be set
204 * to enable additional logs for the corresponding bridge function
205 * - "sysctl net.link.bridge.debug" controls the value of
206 * 'if_bridge_debug'
207 */
208 static uint32_t if_bridge_debug = 0;
209 #define BR_DBGF_LIFECYCLE 0x0001
210 #define BR_DBGF_INPUT 0x0002
211 #define BR_DBGF_OUTPUT 0x0004
212 #define BR_DBGF_RT_TABLE 0x0008
213 #define BR_DBGF_DELAYED_CALL 0x0010
214 #define BR_DBGF_IOCTL 0x0020
215 #define BR_DBGF_MBUF 0x0040
216 #define BR_DBGF_MCAST 0x0080
217 #define BR_DBGF_HOSTFILTER 0x0100
218 #define BR_DBGF_CHECKSUM 0x0200
219 #define BR_DBGF_MAC_NAT 0x0400
220 #define BR_DBGF_INPUT_LIST 0x0800
221
222 /*
223 * if_bridge_log_level
224 * - 'if_bridge_log_level' ensures that by default important logs are
225 * logged regardless of if_bridge_debug by comparing the log level
226 * in BRIDGE_LOG to if_bridge_log_level
227 * - use "sysctl net.link.bridge.log_level" controls the value of
228 * 'if_bridge_log_level'
229 * - the default value of 'if_bridge_log_level' is LOG_NOTICE; important
230 * logs must use LOG_NOTICE to ensure they appear by default
231 */
232 static int if_bridge_log_level = LOG_NOTICE;
233
234 #define BRIDGE_DBGF_ENABLED(__flag) ((if_bridge_debug & __flag) != 0)
235
236 /*
237 * BRIDGE_LOG, BRIDGE_LOG_SIMPLE
238 * - macros to generate the specified log conditionally based on
239 * the specified log level and debug flags
240 * - BRIDGE_LOG_SIMPLE does not include the function name in the log
241 */
242 #define BRIDGE_LOG(__level, __dbgf, __string, ...) \
243 do { \
244 if (__level <= if_bridge_log_level || \
245 BRIDGE_DBGF_ENABLED(__dbgf)) { \
246 os_log(OS_LOG_DEFAULT, "%s: " __string, \
247 __func__, ## __VA_ARGS__); \
248 } \
249 } while (0)
250 #define BRIDGE_LOG_SIMPLE(__level, __dbgf, __string, ...) \
251 do { \
252 if (__level <= if_bridge_log_level || \
253 BRIDGE_DBGF_ENABLED(__dbgf)) { \
254 os_log(OS_LOG_DEFAULT, __string, ## __VA_ARGS__); \
255 } \
256 } while (0)
257
258 #define _BRIDGE_LOCK(_sc) lck_mtx_lock(&(_sc)->sc_mtx)
259 #define _BRIDGE_UNLOCK(_sc) lck_mtx_unlock(&(_sc)->sc_mtx)
260 #define BRIDGE_LOCK_ASSERT_HELD(_sc) \
261 LCK_MTX_ASSERT(&(_sc)->sc_mtx, LCK_MTX_ASSERT_OWNED)
262 #define BRIDGE_LOCK_ASSERT_NOTHELD(_sc) \
263 LCK_MTX_ASSERT(&(_sc)->sc_mtx, LCK_MTX_ASSERT_NOTOWNED)
264
265 #define BRIDGE_LOCK_DEBUG 1
266 #if BRIDGE_LOCK_DEBUG
267
268 #define BR_LCKDBG_MAX 4
269
270 #define BRIDGE_LOCK(_sc) bridge_lock(_sc)
271 #define BRIDGE_UNLOCK(_sc) bridge_unlock(_sc)
272 #define BRIDGE_LOCK2REF(_sc, _err) _err = bridge_lock2ref(_sc)
273 #define BRIDGE_UNREF(_sc) bridge_unref(_sc)
274 #define BRIDGE_XLOCK(_sc) bridge_xlock(_sc)
275 #define BRIDGE_XDROP(_sc) bridge_xdrop(_sc)
276
277 #else /* !BRIDGE_LOCK_DEBUG */
278
279 #define BRIDGE_LOCK(_sc) _BRIDGE_LOCK(_sc)
280 #define BRIDGE_UNLOCK(_sc) _BRIDGE_UNLOCK(_sc)
281 #define BRIDGE_LOCK2REF(_sc, _err) do { \
282 BRIDGE_LOCK_ASSERT_HELD(_sc); \
283 if ((_sc)->sc_iflist_xcnt > 0) \
284 (_err) = EBUSY; \
285 else { \
286 (_sc)->sc_iflist_ref++; \
287 (_err) = 0; \
288 } \
289 _BRIDGE_UNLOCK(_sc); \
290 } while (0)
291 #define BRIDGE_UNREF(_sc) do { \
292 _BRIDGE_LOCK(_sc); \
293 (_sc)->sc_iflist_ref--; \
294 if (((_sc)->sc_iflist_xcnt > 0) && ((_sc)->sc_iflist_ref == 0)) { \
295 _BRIDGE_UNLOCK(_sc); \
296 wakeup(&(_sc)->sc_cv); \
297 } else \
298 _BRIDGE_UNLOCK(_sc); \
299 } while (0)
300 #define BRIDGE_XLOCK(_sc) do { \
301 BRIDGE_LOCK_ASSERT_HELD(_sc); \
302 (_sc)->sc_iflist_xcnt++; \
303 while ((_sc)->sc_iflist_ref > 0) \
304 msleep(&(_sc)->sc_cv, &(_sc)->sc_mtx, PZERO, \
305 "BRIDGE_XLOCK", NULL); \
306 } while (0)
307 #define BRIDGE_XDROP(_sc) do { \
308 BRIDGE_LOCK_ASSERT_HELD(_sc); \
309 (_sc)->sc_iflist_xcnt--; \
310 } while (0)
311
312 #endif /* BRIDGE_LOCK_DEBUG */
313
314 #define BRIDGE_BPF_TAP_IN(ifp, m) \
315 do { \
316 if (ifp->if_bpf != NULL) { \
317 bpf_tap_in(ifp, DLT_EN10MB, m, NULL, 0); \
318 } \
319 } while(0)
320
321 #define BRIDGE_BPF_TAP_OUT(ifp, m) \
322 do { \
323 if (ifp->if_bpf != NULL) { \
324 bpf_tap_out(ifp, DLT_EN10MB, m, NULL, 0); \
325 } \
326 } while(0)
327
328
329 /*
330 * Initial size of the route hash table. Must be a power of two.
331 */
332 #ifndef BRIDGE_RTHASH_SIZE
333 #define BRIDGE_RTHASH_SIZE 16
334 #endif
335
336 /*
337 * Maximum size of the routing hash table
338 */
339 #define BRIDGE_RTHASH_SIZE_MAX 2048
340
341 #define BRIDGE_RTHASH_MASK(sc) ((sc)->sc_rthash_size - 1)
342
343 /*
344 * Maximum number of addresses to cache.
345 */
346 #ifndef BRIDGE_RTABLE_MAX
347 #define BRIDGE_RTABLE_MAX 100
348 #endif
349
350 /*
351 * Timeout (in seconds) for entries learned dynamically.
352 */
353 #ifndef BRIDGE_RTABLE_TIMEOUT
354 #define BRIDGE_RTABLE_TIMEOUT (20 * 60) /* same as ARP */
355 #endif
356
357 /*
358 * Number of seconds between walks of the route list.
359 */
360 #ifndef BRIDGE_RTABLE_PRUNE_PERIOD
361 #define BRIDGE_RTABLE_PRUNE_PERIOD (5 * 60)
362 #endif
363
364 /*
365 * Number of MAC NAT entries
366 * - sized based on 16 clients (including MAC NAT interface)
367 * each with 4 addresses
368 */
369 #ifndef BRIDGE_MAC_NAT_ENTRY_MAX
370 #define BRIDGE_MAC_NAT_ENTRY_MAX 64
371 #endif /* BRIDGE_MAC_NAT_ENTRY_MAX */
372
373 /*
374 * List of capabilities to possibly mask on the member interface.
375 */
376 #define BRIDGE_IFCAPS_MASK (IFCAP_TSO | IFCAP_TXCSUM)
377 /*
378 * List of capabilities to disable on the member interface.
379 */
380 #define BRIDGE_IFCAPS_STRIP IFCAP_LRO
381
382 /*
383 * Bridge interface list entry.
384 */
385 struct bridge_iflist {
386 TAILQ_ENTRY(bridge_iflist) bif_next;
387 struct ifnet *bif_ifp; /* member if */
388 struct bstp_port bif_stp; /* STP state */
389 uint32_t bif_ifflags; /* member if flags */
390 int bif_savedcaps; /* saved capabilities */
391 uint32_t bif_addrmax; /* max # of addresses */
392 uint32_t bif_addrcnt; /* cur. # of addresses */
393 uint32_t bif_addrexceeded; /* # of address violations */
394
395 interface_filter_t bif_iff_ref;
396 struct bridge_softc *bif_sc;
397 uint32_t bif_flags;
398
399 /* host filter */
400 struct in_addr bif_hf_ipsrc;
401 uint8_t bif_hf_hwsrc[ETHER_ADDR_LEN];
402
403 struct ifbrmstats bif_stats;
404 };
405
406 static inline bool
bif_ifflags_are_set(struct bridge_iflist * bif,uint32_t flags)407 bif_ifflags_are_set(struct bridge_iflist * bif, uint32_t flags)
408 {
409 return (bif->bif_ifflags & flags) != 0;
410 }
411
412 static inline bool
bif_has_checksum_offload(struct bridge_iflist * bif)413 bif_has_checksum_offload(struct bridge_iflist * bif)
414 {
415 return bif_ifflags_are_set(bif, IFBIF_CHECKSUM_OFFLOAD);
416 }
417
418 static inline bool
bif_has_mac_nat(struct bridge_iflist * bif)419 bif_has_mac_nat(struct bridge_iflist * bif)
420 {
421 return bif_ifflags_are_set(bif, IFBIF_MAC_NAT);
422 }
423
424 static inline bool
bif_uses_virtio(struct bridge_iflist * bif)425 bif_uses_virtio(struct bridge_iflist * bif)
426 {
427 return bif_ifflags_are_set(bif, IFBIF_USES_VIRTIO);
428 }
429
430 /* fake errors to make the code clearer */
431 #define _EBADIP EJUSTRETURN
432 #define _EBADIPCHECKSUM EJUSTRETURN
433 #define _EBADIPV6 EJUSTRETURN
434 #define _EBADUDP EJUSTRETURN
435 #define _EBADTCP EJUSTRETURN
436 #define _EBADUDPCHECKSUM EJUSTRETURN
437 #define _EBADTCPCHECKSUM EJUSTRETURN
438
439 #define BIFF_PROMISC 0x01 /* promiscuous mode set */
440 #define BIFF_PROTO_ATTACHED 0x02 /* protocol attached */
441 #define BIFF_FILTER_ATTACHED 0x04 /* interface filter attached */
442 #define BIFF_MEDIA_ACTIVE 0x08 /* interface media active */
443 #define BIFF_HOST_FILTER 0x10 /* host filter enabled */
444 #define BIFF_HF_HWSRC 0x20 /* host filter source MAC is set */
445 #define BIFF_HF_IPSRC 0x40 /* host filter source IP is set */
446 #define BIFF_INPUT_BROADCAST 0x80 /* send broadcast packets in */
447 #define BIFF_IN_MEMBER_LIST 0x100 /* added to the member list */
448 #define BIFF_WIFI_INFRA 0x200 /* interface is Wi-Fi infra */
449 #define BIFF_ALL_MULTI 0x400 /* allmulti set */
450 #define BIFF_LRO_DISABLED 0x800 /* LRO was disabled */
451 #if SKYWALK
452 #define BIFF_FLOWSWITCH_ATTACHED 0x1000 /* we attached the flowswitch */
453 #define BIFF_NETAGENT_REMOVED 0x2000 /* we removed the netagent */
454 #endif /* SKYWALK */
455
456 /*
457 * mac_nat_entry
458 * - translates between an IP address and MAC address on a specific
459 * bridge interface member
460 */
461 struct mac_nat_entry {
462 LIST_ENTRY(mac_nat_entry) mne_list; /* list linkage */
463 struct bridge_iflist *mne_bif; /* originating interface */
464 unsigned long mne_expire; /* expiration time */
465 union {
466 struct in_addr mneu_ip; /* originating IPv4 address */
467 struct in6_addr mneu_ip6; /* originating IPv6 address */
468 } mne_u;
469 uint8_t mne_mac[ETHER_ADDR_LEN];
470 uint8_t mne_flags;
471 uint8_t mne_reserved;
472 };
473 #define mne_ip mne_u.mneu_ip
474 #define mne_ip6 mne_u.mneu_ip6
475
476 #define MNE_FLAGS_IPV6 0x01 /* IPv6 address */
477
478 LIST_HEAD(mac_nat_entry_list, mac_nat_entry);
479
480 /*
481 * mac_nat_record
482 * - used by bridge_mac_nat_output() to convey the translation that needs
483 * to take place in bridge_mac_nat_translate
484 * - holds enough information so that the translation can be done later
485 * when the destination interface is the MAC-NAT interface
486 */
487 struct mac_nat_record {
488 uint16_t mnr_ether_type;
489 union {
490 uint16_t mnru_arp_offset;
491 struct {
492 uint16_t mnruip_dhcp_flags;
493 uint16_t mnruip_udp_csum;
494 uint8_t mnruip_header_len;
495 } mnru_ip;
496 struct {
497 uint16_t mnruip6_icmp6_len;
498 uint16_t mnruip6_lladdr_offset;
499 uint8_t mnruip6_icmp6_type;
500 uint8_t mnruip6_header_len;
501 } mnru_ip6;
502 } mnr_u;
503 };
504
505 #define mnr_arp_offset mnr_u.mnru_arp_offset
506
507 #define mnr_ip_header_len mnr_u.mnru_ip.mnruip_header_len
508 #define mnr_ip_dhcp_flags mnr_u.mnru_ip.mnruip_dhcp_flags
509 #define mnr_ip_udp_csum mnr_u.mnru_ip.mnruip_udp_csum
510
511 #define mnr_ip6_icmp6_len mnr_u.mnru_ip6.mnruip6_icmp6_len
512 #define mnr_ip6_icmp6_type mnr_u.mnru_ip6.mnruip6_icmp6_type
513 #define mnr_ip6_header_len mnr_u.mnru_ip6.mnruip6_header_len
514 #define mnr_ip6_lladdr_offset mnr_u.mnru_ip6.mnruip6_lladdr_offset
515
516 /*
517 * Bridge route node.
518 */
519 struct bridge_rtnode {
520 LIST_ENTRY(bridge_rtnode) brt_hash; /* hash table linkage */
521 LIST_ENTRY(bridge_rtnode) brt_list; /* list linkage */
522 struct bridge_iflist *brt_dst; /* destination if */
523 unsigned long brt_expire; /* expiration time */
524 uint8_t brt_flags; /* address flags */
525 uint8_t brt_addr[ETHER_ADDR_LEN];
526 uint16_t brt_vlan; /* vlan id */
527 };
528
529 #define brt_ifp brt_dst->bif_ifp
530
531 /*
532 * Bridge delayed function call context
533 */
534 typedef void (*bridge_delayed_func_t)(struct bridge_softc *);
535
536 struct bridge_delayed_call {
537 struct bridge_softc *bdc_sc;
538 bridge_delayed_func_t bdc_func; /* Function to call */
539 struct timespec bdc_ts; /* Time to call */
540 u_int32_t bdc_flags;
541 thread_call_t bdc_thread_call;
542 };
543
544 #define BDCF_OUTSTANDING 0x01 /* Delayed call has been scheduled */
545 #define BDCF_CANCELLING 0x02 /* May be waiting for call completion */
546
547 /*
548 * Software state for each bridge.
549 */
550 LIST_HEAD(_bridge_rtnode_list, bridge_rtnode);
551
552 struct bridge_softc {
553 struct ifnet *sc_ifp; /* make this an interface */
554 uint32_t sc_flags;
555 LIST_ENTRY(bridge_softc) sc_list;
556 decl_lck_mtx_data(, sc_mtx);
557 struct _bridge_rtnode_list * __counted_by(sc_rthash_size) sc_rthash; /* our forwarding table */
558 struct _bridge_rtnode_list sc_rtlist; /* list version of above */
559 uint32_t sc_rthash_key; /* key for hash */
560 uint32_t sc_rthash_size; /* size of the hash table */
561 struct bridge_delayed_call sc_aging_timer;
562 struct bridge_delayed_call sc_resize_call;
563 TAILQ_HEAD(, bridge_iflist) sc_spanlist; /* span ports list */
564 struct bstp_state sc_stp; /* STP state */
565 void *sc_cv;
566 uint32_t sc_brtmax; /* max # of addresses */
567 uint32_t sc_brtcnt; /* cur. # of addresses */
568 uint32_t sc_brttimeout; /* rt timeout in seconds */
569 uint32_t sc_iflist_ref; /* refcount for sc_iflist */
570 uint32_t sc_iflist_xcnt; /* refcount for sc_iflist */
571 TAILQ_HEAD(, bridge_iflist) sc_iflist; /* member interface list */
572 uint32_t sc_brtexceeded; /* # of cache drops */
573 uint32_t sc_filter_flags; /* ipf and flags */
574 struct ifnet *sc_ifaddr; /* member mac copied from */
575 u_char sc_defaddr[6]; /* Default MAC address */
576 char sc_if_xname[IFNAMSIZ];
577
578 struct bridge_iflist *sc_mac_nat_bif; /* single MAC NAT interface */
579 struct mac_nat_entry_list sc_mne_list; /* MAC NAT IPv4 */
580 struct mac_nat_entry_list sc_mne_list_v6;/* MAC NAT IPv6 */
581 uint32_t sc_mne_max; /* max # of entries */
582 uint32_t sc_mne_count; /* cur. # of entries */
583 uint32_t sc_mne_allocation_failures;
584 #if BRIDGE_LOCK_DEBUG
585 /*
586 * Locking and unlocking calling history
587 */
588 void *lock_lr[BR_LCKDBG_MAX];
589 int next_lock_lr;
590 void *unlock_lr[BR_LCKDBG_MAX];
591 int next_unlock_lr;
592 #endif /* BRIDGE_LOCK_DEBUG */
593 };
594
595 #define SCF_DETACHING 0x01
596 #define SCF_RESIZING 0x02
597 #define SCF_MEDIA_ACTIVE 0x04
598 #define SCF_PROTO_ATTACHED 0x08
599
600 typedef enum {
601 CHECKSUM_OPERATION_NONE = 0,
602 CHECKSUM_OPERATION_CLEAR_OFFLOAD = 1,
603 CHECKSUM_OPERATION_FINALIZE = 2,
604 CHECKSUM_OPERATION_COMPUTE = 3,
605 } ChecksumOperation;
606
607 typedef struct {
608 u_int ip_hlen; /* IP header length */
609 u_int ip_pay_len; /* length of payload (exclusive of ip_hlen) */
610 u_int ip_m0_len; /* bytes available at ip_hdr (without jumping mbufs) */
611 u_int ip_opt_len; /* IPv6 options headers length */
612 uint8_t ip_proto; /* IPPROTO_TCP, IPPROTO_UDP, etc. */
613 bool ip_is_ipv4;
614 bool ip_is_fragmented;
615 uint8_t *__sized_by(ip_m0_len) ip_hdr; /* pointer to IP header */
616 uint8_t *__indexable ip_proto_hdr; /* ptr to protocol header (TCP) */
617 } ip_packet_info, *ip_packet_info_t;
618
619 struct bridge_hostfilter_stats bridge_hostfilter_stats;
620
621 typedef uint8_t ether_type_flag_t;
622
623 typedef enum {
624 pkt_direction_RX,
625 pkt_direction_TX
626 } pkt_direction_t;
627
628 static LCK_GRP_DECLARE(bridge_lock_grp, "if_bridge");
629 #if BRIDGE_LOCK_DEBUG
630 static LCK_ATTR_DECLARE(bridge_lock_attr, 0, 0);
631 #else
632 static LCK_ATTR_DECLARE(bridge_lock_attr, LCK_ATTR_DEBUG, 0);
633 #endif
634 static LCK_MTX_DECLARE_ATTR(bridge_list_mtx, &bridge_lock_grp, &bridge_lock_attr);
635
636 static int bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
637
638 static KALLOC_TYPE_DEFINE(bridge_rtnode_pool, struct bridge_rtnode, NET_KT_DEFAULT);
639 static KALLOC_TYPE_DEFINE(bridge_mne_pool, struct mac_nat_entry, NET_KT_DEFAULT);
640
641 static int bridge_clone_create(struct if_clone *, uint32_t, void *);
642 static int bridge_clone_destroy(struct ifnet *);
643
644 static errno_t bridge_ioctl(struct ifnet *, u_long cmd, void *__sized_by(IOCPARM_LEN(cmd)));
645 #if HAS_IF_CAP
646 static void bridge_mutecaps(struct bridge_softc *);
647 static void bridge_set_ifcap(struct bridge_softc *, struct bridge_iflist *,
648 int);
649 #endif
650 static errno_t bridge_set_tso(struct bridge_softc *);
651 static void bridge_proto_attach_changed(struct ifnet *);
652 static int bridge_init(struct ifnet *);
653 static void bridge_ifstop(struct ifnet *, int);
654 static int bridge_output(struct ifnet *, struct mbuf *);
655 static void bridge_finalize_cksum(struct ifnet *, struct mbuf *);
656 static void bridge_start(struct ifnet *);
657 static mblist bridge_input_list(struct bridge_softc *, ifnet_t,
658 struct ether_header *, mblist, bool);
659 static errno_t bridge_iff_input(void *, ifnet_t, protocol_family_t,
660 mbuf_t *, char **);
661 static errno_t bridge_iff_output(void *, ifnet_t, protocol_family_t,
662 mbuf_t *);
663 static errno_t bridge_member_output(struct bridge_softc *sc, ifnet_t ifp,
664 mbuf_t *m);
665 static int bridge_enqueue(ifnet_t, ifnet_t, ifnet_t,
666 ether_type_flag_t, mbuf_t, ChecksumOperation, pkt_direction_t);
667 static mbuf_t bridge_checksum_offload_list(ifnet_t, struct bridge_iflist *,
668 mbuf_t, bool);
669 static mbuf_t bridge_filter_checksum(ifnet_t, struct bridge_iflist * bif,
670 mbuf_t m, bool, bool, bool);
671 static void bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int);
672
673 static void bridge_aging_timer(struct bridge_softc *sc);
674
675 static void bridge_broadcast(struct bridge_softc *, struct bridge_iflist *,
676 ether_type_flag_t, mbuf_t);
677 static void bridge_broadcast_list(struct bridge_softc *,
678 struct bridge_iflist *, ether_type_flag_t, mbuf_t, pkt_direction_t);
679
680 static void bridge_span(struct bridge_softc *, ether_type_flag_t, struct mbuf *);
681
682 static int bridge_rtupdate(struct bridge_softc *, const uint8_t[ETHER_ADDR_LEN],
683 uint16_t, struct bridge_iflist *, int, uint8_t);
684 static struct bridge_iflist * bridge_rtlookup_bif(struct bridge_softc *,
685 const uint8_t[ETHER_ADDR_LEN], uint16_t);
686 static void bridge_rttrim(struct bridge_softc *);
687 static void bridge_rtage(struct bridge_softc *);
688 static void bridge_rtflush(struct bridge_softc *, int);
689 static int bridge_rtdaddr(struct bridge_softc *, const uint8_t[ETHER_ADDR_LEN],
690 uint16_t);
691
692 static int bridge_rtable_init(struct bridge_softc *);
693 static void bridge_rtable_fini(struct bridge_softc *);
694
695 static void bridge_rthash_resize(struct bridge_softc *);
696
697 static int bridge_rtnode_addr_cmp(const uint8_t[ETHER_ADDR_LEN], const uint8_t[ETHER_ADDR_LEN]);
698 static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
699 const uint8_t[ETHER_ADDR_LEN], uint16_t);
700 static int bridge_rtnode_hash(struct bridge_softc *,
701 struct bridge_rtnode *);
702 static int bridge_rtnode_insert(struct bridge_softc *,
703 struct bridge_rtnode *);
704 static void bridge_rtnode_destroy(struct bridge_softc *,
705 struct bridge_rtnode *);
706 #if BRIDGESTP
707 static void bridge_rtable_expire(struct ifnet *, int);
708 static void bridge_state_change(struct ifnet *, int);
709 #endif /* BRIDGESTP */
710
711 static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
712 char * __sized_by(IFNAMSIZ) name);
713 static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
714 struct ifnet *ifp);
715 static void bridge_delete_member(struct bridge_softc *,
716 struct bridge_iflist *);
717 static void bridge_delete_span(struct bridge_softc *,
718 struct bridge_iflist *);
719
720 static int bridge_ioctl_add(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
721 static int bridge_ioctl_del(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
722 static int bridge_ioctl_gifflags(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
723 static int bridge_ioctl_sifflags(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
724 static int bridge_ioctl_scache(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
725 static int bridge_ioctl_gcache(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
726 static int bridge_ioctl_gifs32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
727 static int bridge_ioctl_gifs64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
728 static int bridge_ioctl_rts32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
729 static int bridge_ioctl_rts64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
730 static int bridge_ioctl_saddr32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
731 static int bridge_ioctl_saddr64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
732 static int bridge_ioctl_sto(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
733 static int bridge_ioctl_gto(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
734 static int bridge_ioctl_daddr32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
735 static int bridge_ioctl_daddr64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
736 static int bridge_ioctl_flush(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
737 static int bridge_ioctl_gpri(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
738 static int bridge_ioctl_spri(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
739 static int bridge_ioctl_ght(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
740 static int bridge_ioctl_sht(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
741 static int bridge_ioctl_gfd(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
742 static int bridge_ioctl_sfd(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
743 static int bridge_ioctl_gma(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
744 static int bridge_ioctl_sma(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
745 static int bridge_ioctl_sifprio(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
746 static int bridge_ioctl_sifcost(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
747 static int bridge_ioctl_sifmaxaddr(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
748 static int bridge_ioctl_addspan(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
749 static int bridge_ioctl_delspan(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
750 static int bridge_ioctl_gbparam32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
751 static int bridge_ioctl_gbparam64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
752 static int bridge_ioctl_grte(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
753 static int bridge_ioctl_gifsstp32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
754 static int bridge_ioctl_gifsstp64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
755 static int bridge_ioctl_sproto(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
756 static int bridge_ioctl_stxhc(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
757 static int bridge_ioctl_purge(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len);
758 static int bridge_ioctl_gfilt(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
759 static int bridge_ioctl_sfilt(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
760 static int bridge_ioctl_ghostfilter(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
761 static int bridge_ioctl_shostfilter(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
762 static int bridge_ioctl_gmnelist32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
763 static int bridge_ioctl_gmnelist64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
764 static int bridge_ioctl_gifstats32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
765 static int bridge_ioctl_gifstats64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
766
767 static int bridge_pf(struct mbuf **, struct ifnet *,
768 uint32_t sc_filter_flags, bool input);
769 static int bridge_ip_checkbasic(struct mbuf **);
770 static int bridge_ip6_checkbasic(struct mbuf **);
771
772 static void bridge_detach(ifnet_t);
773 static void bridge_link_event(struct ifnet *, u_int32_t);
774 static void bridge_iflinkevent(struct ifnet *);
775 static u_int32_t bridge_updatelinkstatus(struct bridge_softc *);
776 static int interface_media_active(struct ifnet *);
777 static void bridge_schedule_delayed_call(struct bridge_delayed_call *);
778 static void bridge_cancel_delayed_call(struct bridge_delayed_call *);
779 static void bridge_cleanup_delayed_call(struct bridge_delayed_call *);
780
781 static errno_t bridge_mac_nat_enable(struct bridge_softc *,
782 struct bridge_iflist *);
783 static void bridge_mac_nat_disable(struct bridge_softc *sc);
784 static void bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long);
785 static void bridge_mac_nat_populate_entries(struct bridge_softc *sc);
786 static void bridge_mac_nat_flush_entries(struct bridge_softc *sc,
787 struct bridge_iflist *);
788 static mbuf_t bridge_mac_nat_input(struct bridge_softc *, ifnet_t, mbuf_t,
789 ifnet_t * dst_if);
790 static boolean_t bridge_mac_nat_output(struct bridge_softc *,
791 struct bridge_iflist *, mbuf_t *, struct mac_nat_record *);
792 static void bridge_mac_nat_translate(mbuf_t *, struct mac_nat_record *,
793 const char[ETHER_ADDR_LEN]);
794
795 static mblist bridge_mac_nat_input_list(struct bridge_softc *sc,
796 ifnet_t external_ifp, mbuf_t m, mbuf_t * forward_head);
797 static mbuf_t bridge_mac_nat_translate_list(struct bridge_softc * sc,
798 struct bridge_iflist *sbif, ifnet_t dst_if, mbuf_t m);
799 static mbuf_t bridge_mac_nat_copy_and_translate_list(struct bridge_softc * sc,
800 struct bridge_iflist *sbif, ifnet_t dst_if, mbuf_t m);
801
802 static mbuf_t bridge_pf_list(mbuf_t m, ifnet_t ifp,
803 uint32_t sc_filter_flags, bool input);
804
805 static inline ifnet_t
bridge_rtlookup(struct bridge_softc * sc,const uint8_t addr[ETHER_ADDR_LEN],uint16_t vlan)806 bridge_rtlookup(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN],
807 uint16_t vlan)
808 {
809 struct bridge_iflist * bif;
810 ifnet_t ifp = NULL;
811
812 bif = bridge_rtlookup_bif(sc, addr, vlan);
813 if (bif != NULL) {
814 ifp = bif->bif_ifp;
815 }
816 return ifp;
817 }
818
819 static bool in_addr_is_ours(const struct in_addr);
820 static bool in6_addr_is_ours(const struct in6_addr *, uint32_t);
821
822 #define m_copypacket(m, how) m_copym(m, 0, M_COPYALL, how)
823
824 static mblist
825 gso_tcp(ifnet_t ifp, mbuf_t m, u_int mac_hlen, bool is_ipv4, bool is_tx);
826
827 static mblist
828 gso_tcp_with_info(ifnet_t ifp, mbuf_t m, ip_packet_info_t info_p,
829 u_int mac_hlen, bool is_ipv4, bool is_tx);
830
831 static inline mblist
gso_tcp_transmit(ifnet_t ifp,mbuf_t m,u_int mac_hlen,bool is_ipv4)832 gso_tcp_transmit(ifnet_t ifp, mbuf_t m, u_int mac_hlen, bool is_ipv4)
833 {
834 return gso_tcp(ifp, m, mac_hlen, is_ipv4, true);
835 }
836
837 /* The default bridge vlan is 1 (IEEE 802.1Q-2003 Table 9-2) */
838 #define VLANTAGOF(_m) 0
839
840 #define BSTP_ETHERADDR_RANGE_FIRST 0x00
841 #define BSTP_ETHERADDR_RANGE_LAST 0x0f
842
843 u_int8_t bstp_etheraddr[ETHER_ADDR_LEN] =
844 { 0x01, 0x80, 0xc2, 0x00, 0x00, BSTP_ETHERADDR_RANGE_FIRST };
845
846
847 static u_int8_t ethernulladdr[ETHER_ADDR_LEN] =
848 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
849
850 #if BRIDGESTP
851 static struct bstp_cb_ops bridge_ops = {
852 .bcb_state = bridge_state_change,
853 .bcb_rtage = bridge_rtable_expire
854 };
855 #endif /* BRIDGESTP */
856
857 SYSCTL_DECL(_net_link);
858 SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
859 "Bridge");
860
861 static int bridge_inherit_mac = 0; /* share MAC with first bridge member */
862 SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac,
863 CTLFLAG_RW | CTLFLAG_LOCKED,
864 &bridge_inherit_mac, 0,
865 "Inherit MAC address from the first bridge member");
866
867 SYSCTL_INT(_net_link_bridge, OID_AUTO, rtable_prune_period,
868 CTLFLAG_RW | CTLFLAG_LOCKED,
869 &bridge_rtable_prune_period, 0,
870 "Interval between pruning of routing table");
871
872 static unsigned int bridge_rtable_hash_size_max = BRIDGE_RTHASH_SIZE_MAX;
873 SYSCTL_UINT(_net_link_bridge, OID_AUTO, rtable_hash_size_max,
874 CTLFLAG_RW | CTLFLAG_LOCKED,
875 &bridge_rtable_hash_size_max, 0,
876 "Maximum size of the routing hash table");
877
878 #if BRIDGE_DELAYED_CALLBACK_DEBUG
879 static int bridge_delayed_callback_delay = 0;
880 SYSCTL_INT(_net_link_bridge, OID_AUTO, delayed_callback_delay,
881 CTLFLAG_RW | CTLFLAG_LOCKED,
882 &bridge_delayed_callback_delay, 0,
883 "Delay before calling delayed function");
884 #endif
885
886 SYSCTL_STRUCT(_net_link_bridge, OID_AUTO,
887 hostfilterstats, CTLFLAG_RD | CTLFLAG_LOCKED,
888 &bridge_hostfilter_stats, bridge_hostfilter_stats, "");
889
890 #if BRIDGESTP
891 static int log_stp = 0; /* log STP state changes */
892 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp, CTLFLAG_RW,
893 &log_stp, 0, "Log STP state changes");
894 #endif /* BRIDGESTP */
895
896 struct bridge_control {
897 int (*bc_func)(struct bridge_softc *, void *__sized_by(arg_len) args, size_t arg_len);
898 unsigned int bc_argsize;
899 unsigned int bc_flags;
900 };
901
902 #define BC_F_COPYIN 0x01 /* copy arguments in */
903 #define BC_F_COPYOUT 0x02 /* copy arguments out */
904 #define BC_F_SUSER 0x04 /* do super-user check */
905
906 static const struct bridge_control bridge_control_table32[] = {
907 { .bc_func = bridge_ioctl_add, .bc_argsize = sizeof(struct ifbreq), /* 0 */
908 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
909 { .bc_func = bridge_ioctl_del, .bc_argsize = sizeof(struct ifbreq),
910 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
911
912 { .bc_func = bridge_ioctl_gifflags, .bc_argsize = sizeof(struct ifbreq),
913 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
914 { .bc_func = bridge_ioctl_sifflags, .bc_argsize = sizeof(struct ifbreq),
915 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
916
917 { .bc_func = bridge_ioctl_scache, .bc_argsize = sizeof(struct ifbrparam),
918 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
919 { .bc_func = bridge_ioctl_gcache, .bc_argsize = sizeof(struct ifbrparam),
920 .bc_flags = BC_F_COPYOUT },
921
922 { .bc_func = bridge_ioctl_gifs32, .bc_argsize = sizeof(struct ifbifconf32),
923 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
924 { .bc_func = bridge_ioctl_rts32, .bc_argsize = sizeof(struct ifbaconf32),
925 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
926
927 { .bc_func = bridge_ioctl_saddr32, .bc_argsize = sizeof(struct ifbareq32),
928 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
929
930 { .bc_func = bridge_ioctl_sto, .bc_argsize = sizeof(struct ifbrparam),
931 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
932 { .bc_func = bridge_ioctl_gto, .bc_argsize = sizeof(struct ifbrparam), /* 10 */
933 .bc_flags = BC_F_COPYOUT },
934
935 { .bc_func = bridge_ioctl_daddr32, .bc_argsize = sizeof(struct ifbareq32),
936 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
937
938 { .bc_func = bridge_ioctl_flush, .bc_argsize = sizeof(struct ifbreq),
939 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
940
941 { .bc_func = bridge_ioctl_gpri, .bc_argsize = sizeof(struct ifbrparam),
942 .bc_flags = BC_F_COPYOUT },
943 { .bc_func = bridge_ioctl_spri, .bc_argsize = sizeof(struct ifbrparam),
944 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
945
946 { .bc_func = bridge_ioctl_ght, .bc_argsize = sizeof(struct ifbrparam),
947 .bc_flags = BC_F_COPYOUT },
948 { .bc_func = bridge_ioctl_sht, .bc_argsize = sizeof(struct ifbrparam),
949 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
950
951 { .bc_func = bridge_ioctl_gfd, .bc_argsize = sizeof(struct ifbrparam),
952 .bc_flags = BC_F_COPYOUT },
953 { .bc_func = bridge_ioctl_sfd, .bc_argsize = sizeof(struct ifbrparam),
954 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
955
956 { .bc_func = bridge_ioctl_gma, .bc_argsize = sizeof(struct ifbrparam),
957 .bc_flags = BC_F_COPYOUT },
958 { .bc_func = bridge_ioctl_sma, .bc_argsize = sizeof(struct ifbrparam), /* 20 */
959 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
960
961 { .bc_func = bridge_ioctl_sifprio, .bc_argsize = sizeof(struct ifbreq),
962 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
963
964 { .bc_func = bridge_ioctl_sifcost, .bc_argsize = sizeof(struct ifbreq),
965 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
966
967 { .bc_func = bridge_ioctl_gfilt, .bc_argsize = sizeof(struct ifbrparam),
968 .bc_flags = BC_F_COPYOUT },
969 { .bc_func = bridge_ioctl_sfilt, .bc_argsize = sizeof(struct ifbrparam),
970 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
971
972 { .bc_func = bridge_ioctl_purge, .bc_argsize = sizeof(struct ifbreq),
973 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
974
975 { .bc_func = bridge_ioctl_addspan, .bc_argsize = sizeof(struct ifbreq),
976 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
977 { .bc_func = bridge_ioctl_delspan, .bc_argsize = sizeof(struct ifbreq),
978 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
979
980 { .bc_func = bridge_ioctl_gbparam32, .bc_argsize = sizeof(struct ifbropreq32),
981 .bc_flags = BC_F_COPYOUT },
982
983 { .bc_func = bridge_ioctl_grte, .bc_argsize = sizeof(struct ifbrparam),
984 .bc_flags = BC_F_COPYOUT },
985
986 { .bc_func = bridge_ioctl_gifsstp32, .bc_argsize = sizeof(struct ifbpstpconf32), /* 30 */
987 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
988
989 { .bc_func = bridge_ioctl_sproto, .bc_argsize = sizeof(struct ifbrparam),
990 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
991
992 { .bc_func = bridge_ioctl_stxhc, .bc_argsize = sizeof(struct ifbrparam),
993 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
994
995 { .bc_func = bridge_ioctl_sifmaxaddr, .bc_argsize = sizeof(struct ifbreq),
996 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
997
998 { .bc_func = bridge_ioctl_ghostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
999 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1000 { .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1001 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1002
1003 { .bc_func = bridge_ioctl_gmnelist32,
1004 .bc_argsize = sizeof(struct ifbrmnelist32),
1005 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1006 { .bc_func = bridge_ioctl_gifstats32,
1007 .bc_argsize = sizeof(struct ifbrmreq32),
1008 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1009 };
1010
1011 static const struct bridge_control bridge_control_table64[] = {
1012 { .bc_func = bridge_ioctl_add, .bc_argsize = sizeof(struct ifbreq), /* 0 */
1013 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1014 { .bc_func = bridge_ioctl_del, .bc_argsize = sizeof(struct ifbreq),
1015 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1016
1017 { .bc_func = bridge_ioctl_gifflags, .bc_argsize = sizeof(struct ifbreq),
1018 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1019 { .bc_func = bridge_ioctl_sifflags, .bc_argsize = sizeof(struct ifbreq),
1020 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1021
1022 { .bc_func = bridge_ioctl_scache, .bc_argsize = sizeof(struct ifbrparam),
1023 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1024 { .bc_func = bridge_ioctl_gcache, .bc_argsize = sizeof(struct ifbrparam),
1025 .bc_flags = BC_F_COPYOUT },
1026
1027 { .bc_func = bridge_ioctl_gifs64, .bc_argsize = sizeof(struct ifbifconf64),
1028 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1029 { .bc_func = bridge_ioctl_rts64, .bc_argsize = sizeof(struct ifbaconf64),
1030 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1031
1032 { .bc_func = bridge_ioctl_saddr64, .bc_argsize = sizeof(struct ifbareq64),
1033 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1034
1035 { .bc_func = bridge_ioctl_sto, .bc_argsize = sizeof(struct ifbrparam),
1036 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1037 { .bc_func = bridge_ioctl_gto, .bc_argsize = sizeof(struct ifbrparam), /* 10 */
1038 .bc_flags = BC_F_COPYOUT },
1039
1040 { .bc_func = bridge_ioctl_daddr64, .bc_argsize = sizeof(struct ifbareq64),
1041 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1042
1043 { .bc_func = bridge_ioctl_flush, .bc_argsize = sizeof(struct ifbreq),
1044 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1045
1046 { .bc_func = bridge_ioctl_gpri, .bc_argsize = sizeof(struct ifbrparam),
1047 .bc_flags = BC_F_COPYOUT },
1048 { .bc_func = bridge_ioctl_spri, .bc_argsize = sizeof(struct ifbrparam),
1049 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1050
1051 { .bc_func = bridge_ioctl_ght, .bc_argsize = sizeof(struct ifbrparam),
1052 .bc_flags = BC_F_COPYOUT },
1053 { .bc_func = bridge_ioctl_sht, .bc_argsize = sizeof(struct ifbrparam),
1054 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1055
1056 { .bc_func = bridge_ioctl_gfd, .bc_argsize = sizeof(struct ifbrparam),
1057 .bc_flags = BC_F_COPYOUT },
1058 { .bc_func = bridge_ioctl_sfd, .bc_argsize = sizeof(struct ifbrparam),
1059 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1060
1061 { .bc_func = bridge_ioctl_gma, .bc_argsize = sizeof(struct ifbrparam),
1062 .bc_flags = BC_F_COPYOUT },
1063 { .bc_func = bridge_ioctl_sma, .bc_argsize = sizeof(struct ifbrparam), /* 20 */
1064 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1065
1066 { .bc_func = bridge_ioctl_sifprio, .bc_argsize = sizeof(struct ifbreq),
1067 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1068
1069 { .bc_func = bridge_ioctl_sifcost, .bc_argsize = sizeof(struct ifbreq),
1070 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1071
1072 { .bc_func = bridge_ioctl_gfilt, .bc_argsize = sizeof(struct ifbrparam),
1073 .bc_flags = BC_F_COPYOUT },
1074 { .bc_func = bridge_ioctl_sfilt, .bc_argsize = sizeof(struct ifbrparam),
1075 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1076
1077 { .bc_func = bridge_ioctl_purge, .bc_argsize = sizeof(struct ifbreq),
1078 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1079
1080 { .bc_func = bridge_ioctl_addspan, .bc_argsize = sizeof(struct ifbreq),
1081 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1082 { .bc_func = bridge_ioctl_delspan, .bc_argsize = sizeof(struct ifbreq),
1083 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1084
1085 { .bc_func = bridge_ioctl_gbparam64, .bc_argsize = sizeof(struct ifbropreq64),
1086 .bc_flags = BC_F_COPYOUT },
1087
1088 { .bc_func = bridge_ioctl_grte, .bc_argsize = sizeof(struct ifbrparam),
1089 .bc_flags = BC_F_COPYOUT },
1090
1091 { .bc_func = bridge_ioctl_gifsstp64, .bc_argsize = sizeof(struct ifbpstpconf64), /* 30 */
1092 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1093
1094 { .bc_func = bridge_ioctl_sproto, .bc_argsize = sizeof(struct ifbrparam),
1095 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1096
1097 { .bc_func = bridge_ioctl_stxhc, .bc_argsize = sizeof(struct ifbrparam),
1098 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1099
1100 { .bc_func = bridge_ioctl_sifmaxaddr, .bc_argsize = sizeof(struct ifbreq),
1101 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1102
1103 { .bc_func = bridge_ioctl_ghostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1104 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1105 { .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1106 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1107
1108 { .bc_func = bridge_ioctl_gmnelist64,
1109 .bc_argsize = sizeof(struct ifbrmnelist64),
1110 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1111 { .bc_func = bridge_ioctl_gifstats64,
1112 .bc_argsize = sizeof(struct ifbrmreq64),
1113 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1114 };
1115
1116 static const unsigned int bridge_control_table_size =
1117 sizeof(bridge_control_table32) / sizeof(bridge_control_table32[0]);
1118
1119 static LIST_HEAD(, bridge_softc) bridge_list =
1120 LIST_HEAD_INITIALIZER(bridge_list);
1121
1122 #define BRIDGENAME "bridge"
1123 #define BRIDGES_MAX IF_MAXUNIT
1124 #define BRIDGE_ZONE_MAX_ELEM MIN(IFNETS_MAX, BRIDGES_MAX)
1125
1126 static struct if_clone bridge_cloner =
1127 IF_CLONE_INITIALIZER(BRIDGENAME, bridge_clone_create, bridge_clone_destroy,
1128 0, BRIDGES_MAX);
1129
1130 static int if_bridge_txstart = 0;
1131 SYSCTL_INT(_net_link_bridge, OID_AUTO, txstart, CTLFLAG_RW | CTLFLAG_LOCKED,
1132 &if_bridge_txstart, 0, "Bridge interface uses TXSTART model");
1133
1134 SYSCTL_INT(_net_link_bridge, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
1135 &if_bridge_debug, 0, "Bridge debug flags");
1136
1137 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_level,
1138 CTLFLAG_RW | CTLFLAG_LOCKED,
1139 &if_bridge_log_level, 0, "Bridge log level");
1140
1141 static int if_bridge_output_skip_filters = 1;
1142 SYSCTL_INT(_net_link_bridge, OID_AUTO, output_skip_filters,
1143 CTLFLAG_RW | CTLFLAG_LOCKED,
1144 &if_bridge_output_skip_filters, 0, "Bridge skip output filters");
1145
1146 int bridge_enable_early_input = 1; /* DLIL early input */
1147 SYSCTL_INT(_net_link_bridge, OID_AUTO, enable_early_input,
1148 CTLFLAG_RW | CTLFLAG_LOCKED,
1149 &bridge_enable_early_input, 0,
1150 "Bridge enable early input");
1151
1152 int bridge_allow_lro_num_seg = 1; /* allow LRO_NUM_SEG to keep LRO enabled */
1153 SYSCTL_INT(_net_link_bridge, OID_AUTO, allow_lro_num_seg,
1154 CTLFLAG_RW | CTLFLAG_LOCKED,
1155 &bridge_allow_lro_num_seg, 0,
1156 "Bridge allow LRO_NUM_SEG to keep LRO enabled");
1157
1158 #define BRIDGE_TSO_REDUCE_MSS_FORWARDING_MAX 256
1159 #define BRIDGE_TSO_REDUCE_MSS_FORWARDING_DEFAULT 110
1160 #define BRIDGE_TSO_REDUCE_MSS_TX_MAX 256
1161 #define BRIDGE_TSO_REDUCE_MSS_TX_DEFAULT 0
1162
1163 static u_int if_bridge_tso_reduce_mss_forwarding
1164 = BRIDGE_TSO_REDUCE_MSS_FORWARDING_DEFAULT;
1165 static u_int if_bridge_tso_reduce_mss_tx
1166 = BRIDGE_TSO_REDUCE_MSS_TX_DEFAULT;
1167
1168 static int
bridge_tso_reduce_mss(struct sysctl_req * req,u_int * val,u_int val_max)1169 bridge_tso_reduce_mss(struct sysctl_req *req, u_int * val, u_int val_max)
1170 {
1171 int changed;
1172 int error;
1173 u_int new_value;
1174
1175 error = sysctl_io_number(req, *val, sizeof(*val), &new_value,
1176 &changed);
1177 if (error == 0 && changed != 0) {
1178 if (new_value > val_max) {
1179 return EINVAL;
1180 }
1181 *val = new_value;
1182 }
1183 return error;
1184 }
1185
1186 static int
1187 bridge_tso_reduce_mss_forwarding_sysctl SYSCTL_HANDLER_ARGS
1188 {
1189 return bridge_tso_reduce_mss(req, &if_bridge_tso_reduce_mss_forwarding,
1190 BRIDGE_TSO_REDUCE_MSS_FORWARDING_MAX);
1191 }
1192
1193 SYSCTL_PROC(_net_link_bridge, OID_AUTO, tso_reduce_mss_forwarding,
1194 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1195 0, 0, bridge_tso_reduce_mss_forwarding_sysctl, "IU",
1196 "Bridge tso reduce mss when forwarding");
1197
1198 static int
1199 bridge_tso_reduce_mss_tx_sysctl SYSCTL_HANDLER_ARGS
1200 {
1201 return bridge_tso_reduce_mss(req, &if_bridge_tso_reduce_mss_tx,
1202 BRIDGE_TSO_REDUCE_MSS_TX_MAX);
1203 }
1204
1205 SYSCTL_PROC(_net_link_bridge, OID_AUTO, tso_reduce_mss_tx,
1206 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1207 0, 0, bridge_tso_reduce_mss_tx_sysctl, "IU",
1208 "Bridge tso reduce mss on transmit");
1209
1210 #if DEBUG || DEVELOPMENT
1211 /*
1212 * net.link.bridge.reduce_tso_mtu
1213 * - when non-zero, the bridge overrides the interface TSO MTU to a lower
1214 * value (i.e. 16K) to enable testing the "use GSO instead" path
1215 */
1216 static int if_bridge_reduce_tso_mtu = 0;
1217 SYSCTL_INT(_net_link_bridge, OID_AUTO, reduce_tso_mtu,
1218 CTLFLAG_RW | CTLFLAG_LOCKED,
1219 &if_bridge_reduce_tso_mtu, 0, "Bridge interface reduce TSO MTU");
1220
1221 #endif /* DEBUG || DEVELOPMENT */
1222
1223 static void brlog_ether_header(struct ether_header *);
1224 static void brlog_mbuf_data(mbuf_t, size_t, size_t);
1225 static void brlog_mbuf_pkthdr(mbuf_t, const char *, const char *);
1226 static void brlog_mbuf(mbuf_t, const char *, const char *);
1227 static void brlog_link(struct bridge_softc * sc);
1228
1229 #if BRIDGE_LOCK_DEBUG
1230 static void bridge_lock(struct bridge_softc *);
1231 static void bridge_unlock(struct bridge_softc *);
1232 static int bridge_lock2ref(struct bridge_softc *);
1233 static void bridge_unref(struct bridge_softc *);
1234 static void bridge_xlock(struct bridge_softc *);
1235 static void bridge_xdrop(struct bridge_softc *);
1236
1237 #define DECL_RETURN_ADDR(v) void * __single v = __unsafe_forge_single(void *, __builtin_return_address(0))
1238
1239 static void
bridge_lock(struct bridge_softc * sc)1240 bridge_lock(struct bridge_softc *sc)
1241 {
1242 DECL_RETURN_ADDR(lr_saved);
1243
1244 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1245
1246 _BRIDGE_LOCK(sc);
1247
1248 sc->lock_lr[sc->next_lock_lr] = lr_saved;
1249 sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1250 }
1251
1252 static void
bridge_unlock(struct bridge_softc * sc)1253 bridge_unlock(struct bridge_softc *sc)
1254 {
1255 DECL_RETURN_ADDR(lr_saved);
1256
1257 BRIDGE_LOCK_ASSERT_HELD(sc);
1258
1259 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1260 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1261
1262 _BRIDGE_UNLOCK(sc);
1263 }
1264
1265 static int
bridge_lock2ref(struct bridge_softc * sc)1266 bridge_lock2ref(struct bridge_softc *sc)
1267 {
1268 int error = 0;
1269 DECL_RETURN_ADDR(lr_saved);
1270
1271 BRIDGE_LOCK_ASSERT_HELD(sc);
1272
1273 if (sc->sc_iflist_xcnt > 0) {
1274 error = EBUSY;
1275 } else {
1276 sc->sc_iflist_ref++;
1277 }
1278
1279 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1280 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1281
1282 _BRIDGE_UNLOCK(sc);
1283
1284 return error;
1285 }
1286
1287 static void
bridge_unref(struct bridge_softc * sc)1288 bridge_unref(struct bridge_softc *sc)
1289 {
1290 DECL_RETURN_ADDR(lr_saved);
1291
1292 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1293
1294 _BRIDGE_LOCK(sc);
1295 sc->lock_lr[sc->next_lock_lr] = lr_saved;
1296 sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1297
1298 sc->sc_iflist_ref--;
1299
1300 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1301 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1302 if ((sc->sc_iflist_xcnt > 0) && (sc->sc_iflist_ref == 0)) {
1303 _BRIDGE_UNLOCK(sc);
1304 wakeup(&sc->sc_cv);
1305 } else {
1306 _BRIDGE_UNLOCK(sc);
1307 }
1308 }
1309
1310 static void
bridge_xlock(struct bridge_softc * sc)1311 bridge_xlock(struct bridge_softc *sc)
1312 {
1313 DECL_RETURN_ADDR(lr_saved);
1314
1315 BRIDGE_LOCK_ASSERT_HELD(sc);
1316
1317 sc->sc_iflist_xcnt++;
1318 while (sc->sc_iflist_ref > 0) {
1319 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1320 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1321
1322 msleep(&sc->sc_cv, &sc->sc_mtx, PZERO, "BRIDGE_XLOCK", NULL);
1323
1324 sc->lock_lr[sc->next_lock_lr] = lr_saved;
1325 sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1326 }
1327 }
1328
1329 #undef DECL_RETURN_ADDR
1330
1331 static void
bridge_xdrop(struct bridge_softc * sc)1332 bridge_xdrop(struct bridge_softc *sc)
1333 {
1334 BRIDGE_LOCK_ASSERT_HELD(sc);
1335
1336 sc->sc_iflist_xcnt--;
1337 }
1338
1339 #endif /* BRIDGE_LOCK_DEBUG */
1340
1341 static void
brlog_mbuf_pkthdr(mbuf_t m,const char * prefix,const char * suffix)1342 brlog_mbuf_pkthdr(mbuf_t m, const char *prefix, const char *suffix)
1343 {
1344 if (m) {
1345 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1346 "%spktlen: %u rcvif: 0x%llx header: 0x%llx nextpkt: 0x%llx%s",
1347 prefix ? prefix : "", (unsigned int)mbuf_pkthdr_len(m),
1348 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_rcvif(m)),
1349 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_header(m)),
1350 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_nextpkt(m)),
1351 suffix ? suffix : "");
1352 } else {
1353 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0, "%s<NULL>%s", prefix, suffix);
1354 }
1355 }
1356
1357 static void
brlog_mbuf(mbuf_t m,const char * prefix,const char * suffix)1358 brlog_mbuf(mbuf_t m, const char *prefix, const char *suffix)
1359 {
1360 if (m) {
1361 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1362 "%s0x%llx type: %u flags: 0x%x len: %u data: 0x%llx "
1363 "maxlen: %u datastart: 0x%llx next: 0x%llx%s",
1364 prefix ? prefix : "", (uint64_t)VM_KERNEL_ADDRPERM(m),
1365 mbuf_type(m), mbuf_flags(m), (unsigned int)mbuf_len(m),
1366 (uint64_t)VM_KERNEL_ADDRPERM(mtod(m, void *)),
1367 (unsigned int)mbuf_maxlen(m),
1368 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
1369 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_next(m)),
1370 !suffix || (mbuf_flags(m) & MBUF_PKTHDR) ? "" : suffix);
1371 if ((mbuf_flags(m) & MBUF_PKTHDR)) {
1372 brlog_mbuf_pkthdr(m, "", suffix);
1373 }
1374 } else {
1375 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0, "%s<NULL>%s", prefix, suffix);
1376 }
1377 }
1378
1379 static void
brlog_mbuf_data(mbuf_t m,size_t offset,size_t len)1380 brlog_mbuf_data(mbuf_t m, size_t offset, size_t len)
1381 {
1382 mbuf_t n;
1383 size_t i, j;
1384 size_t pktlen, mlen, maxlen;
1385 unsigned char *ptr;
1386
1387 pktlen = mbuf_pkthdr_len(m);
1388
1389 if (offset > pktlen) {
1390 return;
1391 }
1392
1393 maxlen = (pktlen - offset > len) ? len : pktlen - offset;
1394 n = m;
1395 mlen = mbuf_len(n);
1396 ptr = mtod(n, unsigned char *);
1397 for (i = 0, j = 0; i < maxlen; i++, j++) {
1398 if (j >= mlen) {
1399 n = mbuf_next(n);
1400 if (n == 0) {
1401 break;
1402 }
1403 ptr = mtod(n, unsigned char *);
1404 mlen = mbuf_len(n);
1405 j = 0;
1406 }
1407 if (i >= offset) {
1408 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1409 "%02x%s", ptr[j], i % 2 ? " " : "");
1410 }
1411 }
1412 }
1413
1414 static void
brlog_ether_header(struct ether_header * eh)1415 brlog_ether_header(struct ether_header *eh)
1416 {
1417 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1418 "%02x:%02x:%02x:%02x:%02x:%02x > "
1419 "%02x:%02x:%02x:%02x:%02x:%02x 0x%04x ",
1420 eh->ether_shost[0], eh->ether_shost[1], eh->ether_shost[2],
1421 eh->ether_shost[3], eh->ether_shost[4], eh->ether_shost[5],
1422 eh->ether_dhost[0], eh->ether_dhost[1], eh->ether_dhost[2],
1423 eh->ether_dhost[3], eh->ether_dhost[4], eh->ether_dhost[5],
1424 ntohs(eh->ether_type));
1425 }
1426
1427 static char *
ether_ntop(char * __sized_by (len)buf,size_t len,const u_char ap[ETHER_ADDR_LEN])1428 ether_ntop(char * __sized_by(len) buf, size_t len, const u_char ap[ETHER_ADDR_LEN])
1429 {
1430 snprintf(buf, len, "%02x:%02x:%02x:%02x:%02x:%02x",
1431 ap[0], ap[1], ap[2], ap[3], ap[4], ap[5]);
1432
1433 return buf;
1434 }
1435
1436 static void
brlog_link(struct bridge_softc * sc)1437 brlog_link(struct bridge_softc * sc)
1438 {
1439 int i;
1440 uint32_t sdl_buffer[(offsetof(struct sockaddr_dl, sdl_data) +
1441 IFNAMSIZ + ETHER_ADDR_LEN)];
1442 struct sockaddr_dl *sdl = SDL((uint8_t*)&sdl_buffer); /* SDL requires byte pointer */
1443 const u_char * lladdr;
1444 char lladdr_str[48];
1445
1446 memset(sdl_buffer, 0, sizeof(sdl_buffer));
1447 sdl->sdl_family = AF_LINK;
1448 sdl->sdl_nlen = strbuflen(sc->sc_if_xname);
1449 sdl->sdl_alen = ETHER_ADDR_LEN;
1450 sdl->sdl_len = offsetof(struct sockaddr_dl, sdl_data);
1451 memcpy(sdl->sdl_data, sc->sc_if_xname, sdl->sdl_nlen);
1452 memcpy(LLADDR(sdl), sc->sc_defaddr, ETHER_ADDR_LEN);
1453 lladdr_str[0] = '\0';
1454 for (i = 0, lladdr = CONST_LLADDR(sdl);
1455 i < sdl->sdl_alen;
1456 i++, lladdr++) {
1457 char byte_str[4];
1458
1459 snprintf(byte_str, sizeof(byte_str), "%s%x", i ? ":" : "",
1460 *lladdr);
1461 strbufcat(lladdr_str, byte_str);
1462 }
1463 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1464 "%s sdl len %d index %d family %d type 0x%x nlen %d alen %d"
1465 " slen %d addr %s", sc->sc_if_xname,
1466 sdl->sdl_len, sdl->sdl_index,
1467 sdl->sdl_family, sdl->sdl_type, sdl->sdl_nlen,
1468 sdl->sdl_alen, sdl->sdl_slen, lladdr_str);
1469 }
1470
1471 static int
_mbuf_get_tso_mss(mbuf_t m)1472 _mbuf_get_tso_mss(mbuf_t m)
1473 {
1474 int mss = 0;
1475
1476 if ((m->m_pkthdr.csum_flags & _TSO_CSUM) != 0) {
1477 mss = m->m_pkthdr.tso_segsz;
1478 }
1479 return mss;
1480 }
1481
1482 /*
1483 * bridgeattach:
1484 *
1485 * Pseudo-device attach routine.
1486 */
1487 __private_extern__ int
bridgeattach(int n)1488 bridgeattach(int n)
1489 {
1490 #pragma unused(n)
1491 int error;
1492
1493 LIST_INIT(&bridge_list);
1494
1495 #if BRIDGESTP
1496 bstp_sys_init();
1497 #endif /* BRIDGESTP */
1498
1499 error = if_clone_attach(&bridge_cloner);
1500 if (error != 0) {
1501 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_clone_attach failed %d", error);
1502 }
1503 return error;
1504 }
1505
1506 static void
_mbuf_adjust_pkthdr_and_data(mbuf_t m,int len)1507 _mbuf_adjust_pkthdr_and_data(mbuf_t m, int len)
1508 {
1509 mbuf_setdata(m, mtodo(m, len), mbuf_len(m) - len);
1510 mbuf_pkthdr_adjustlen(m, -len);
1511 }
1512
1513 static errno_t
bridge_ifnet_set_attrs(struct ifnet * ifp)1514 bridge_ifnet_set_attrs(struct ifnet * ifp)
1515 {
1516 errno_t error;
1517
1518 error = ifnet_set_mtu(ifp, ETHERMTU);
1519 if (error != 0) {
1520 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_mtu failed %d", error);
1521 goto done;
1522 }
1523 error = ifnet_set_addrlen(ifp, ETHER_ADDR_LEN);
1524 if (error != 0) {
1525 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_addrlen failed %d", error);
1526 goto done;
1527 }
1528 error = ifnet_set_hdrlen(ifp, ETHER_HDR_LEN);
1529 if (error != 0) {
1530 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_hdrlen failed %d", error);
1531 goto done;
1532 }
1533 error = ifnet_set_flags(ifp,
1534 IFF_BROADCAST | IFF_SIMPLEX | IFF_NOTRAILERS | IFF_MULTICAST,
1535 0xffff);
1536
1537 if (error != 0) {
1538 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_flags failed %d", error);
1539 goto done;
1540 }
1541 done:
1542 return error;
1543 }
1544
1545 static void
bridge_interface_proto_attach_changed(ifnet_t ifp)1546 bridge_interface_proto_attach_changed(ifnet_t ifp)
1547 {
1548 uint32_t proto_count;
1549 struct bridge_softc * __single sc = ifp->if_softc;
1550
1551 proto_count = if_get_protolist(ifp, NULL, 0);
1552 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
1553 "%s: proto count %d", ifp->if_xname, proto_count);
1554
1555 if (sc == NULL) {
1556 return;
1557 }
1558 BRIDGE_LOCK(sc);
1559 if ((sc->sc_flags & SCF_DETACHING) != 0) {
1560 BRIDGE_UNLOCK(sc);
1561 return;
1562 }
1563 if (proto_count >= 2) {
1564 /* an upper layer protocol is attached */
1565 sc->sc_flags |= SCF_PROTO_ATTACHED;
1566 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
1567 "%s: setting SCF_PROTO_ATTACHED", ifp->if_xname);
1568 } else {
1569 /* an upper layer protocol was detached */
1570 sc->sc_flags &= ~SCF_PROTO_ATTACHED;
1571 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
1572 "%s: clearing SCF_PROTO_ATTACHED", ifp->if_xname);
1573 }
1574 BRIDGE_UNLOCK(sc);
1575 }
1576
1577 static void
bridge_interface_event(struct ifnet * ifp,__unused protocol_family_t protocol,const struct kev_msg * event)1578 bridge_interface_event(struct ifnet * ifp,
1579 __unused protocol_family_t protocol, const struct kev_msg * event)
1580 {
1581 int event_code;
1582
1583 if (event->vendor_code != KEV_VENDOR_APPLE
1584 || event->kev_class != KEV_NETWORK_CLASS
1585 || event->kev_subclass != KEV_DL_SUBCLASS) {
1586 return;
1587 }
1588 event_code = event->event_code;
1589 switch (event_code) {
1590 case KEV_DL_PROTO_DETACHED:
1591 case KEV_DL_PROTO_ATTACHED:
1592 bridge_interface_proto_attach_changed(ifp);
1593 break;
1594 default:
1595 break;
1596 }
1597 return;
1598 }
1599
1600 /*
1601 * Function: bridge_interface_attach_protocol
1602 * Purpose:
1603 * Attach a protocol to the bridge to get events on the interface,
1604 * in particular, whether protocols are attached/detached.
1605 */
1606 static int
bridge_interface_attach_protocol(ifnet_t ifp)1607 bridge_interface_attach_protocol(ifnet_t ifp)
1608 {
1609 int error;
1610 struct ifnet_attach_proto_param_v2 reg;
1611
1612 bzero(®, sizeof(reg));
1613 reg.event = bridge_interface_event;
1614
1615 error = ifnet_attach_protocol_v2(ifp, PF_BRIDGE, ®);
1616 if (error != 0) {
1617 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
1618 "%s: ifnet_attach_protocol failed, %d",
1619 ifp->if_xname, error);
1620 }
1621 return error;
1622 }
1623
1624 static void
bridge_interface_detach_protocol(ifnet_t ifp)1625 bridge_interface_detach_protocol(ifnet_t ifp)
1626 {
1627 (void)ifnet_detach_protocol(ifp, PF_BRIDGE);
1628 }
1629
1630 /*
1631 * bridge_clone_create:
1632 *
1633 * Create a new bridge instance.
1634 */
1635 static int
bridge_clone_create(struct if_clone * ifc,uint32_t unit,void * params)1636 bridge_clone_create(struct if_clone *ifc, uint32_t unit, void *params)
1637 {
1638 #pragma unused(params)
1639 ifnet_ref_t ifp = NULL;
1640 struct bridge_softc *sc = NULL;
1641 struct bridge_softc *sc2 = NULL;
1642 struct ifnet_init_eparams init_params;
1643 errno_t error = 0;
1644 uint8_t eth_hostid[ETHER_ADDR_LEN];
1645 int fb, retry, has_hostid;
1646
1647 sc = kalloc_type(struct bridge_softc, Z_WAITOK_ZERO_NOFAIL);
1648 lck_mtx_init(&sc->sc_mtx, &bridge_lock_grp, &bridge_lock_attr);
1649 sc->sc_brtmax = BRIDGE_RTABLE_MAX;
1650 sc->sc_mne_max = BRIDGE_MAC_NAT_ENTRY_MAX;
1651 sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
1652 sc->sc_filter_flags = 0;
1653
1654 TAILQ_INIT(&sc->sc_iflist);
1655
1656 /* use the interface name as the unique id for ifp recycle */
1657 snprintf(sc->sc_if_xname, sizeof(sc->sc_if_xname), "%s%d",
1658 ifc->ifc_name, unit);
1659 bzero(&init_params, sizeof(init_params));
1660 init_params.ver = IFNET_INIT_CURRENT_VERSION;
1661 init_params.len = sizeof(init_params);
1662 /* Initialize our routing table. */
1663 error = bridge_rtable_init(sc);
1664 if (error != 0) {
1665 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_rtable_init failed %d", error);
1666 goto done;
1667 }
1668 TAILQ_INIT(&sc->sc_spanlist);
1669 if (if_bridge_txstart) {
1670 init_params.start = bridge_start;
1671 } else {
1672 init_params.flags = IFNET_INIT_LEGACY;
1673 init_params.output = bridge_output;
1674 }
1675 init_params.uniqueid_len = strbuflen(sc->sc_if_xname);
1676 init_params.uniqueid = sc->sc_if_xname;
1677 init_params.sndq_maxlen = IFQ_MAXLEN;
1678 init_params.name = __unsafe_null_terminated_from_indexable(ifc->ifc_name);
1679 init_params.unit = unit;
1680 init_params.family = IFNET_FAMILY_ETHERNET;
1681 init_params.type = IFT_BRIDGE;
1682 init_params.demux = ether_demux;
1683 init_params.add_proto = ether_add_proto;
1684 init_params.del_proto = ether_del_proto;
1685 init_params.check_multi = ether_check_multi;
1686 init_params.framer_extended = ether_frameout_extended;
1687 init_params.softc = sc;
1688 init_params.ioctl = bridge_ioctl;
1689 init_params.detach = bridge_detach;
1690 init_params.broadcast_addr = etherbroadcastaddr;
1691 init_params.broadcast_len = ETHER_ADDR_LEN;
1692
1693 error = ifnet_allocate_extended(&init_params, &ifp);
1694 if (error != 0) {
1695 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_allocate failed %d", error);
1696 goto done;
1697 }
1698 LIST_INIT(&sc->sc_mne_list);
1699 LIST_INIT(&sc->sc_mne_list_v6);
1700 sc->sc_ifp = ifp;
1701 error = bridge_ifnet_set_attrs(ifp);
1702 if (error != 0) {
1703 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_ifnet_set_attrs failed %d",
1704 error);
1705 goto done;
1706 }
1707 /*
1708 * Generate an ethernet address with a locally administered address.
1709 *
1710 * Since we are using random ethernet addresses for the bridge, it is
1711 * possible that we might have address collisions, so make sure that
1712 * this hardware address isn't already in use on another bridge.
1713 * The first try uses the "hostid" and falls back to read_frandom();
1714 * for "hostid", we use the MAC address of the first-encountered
1715 * Ethernet-type interface that is currently configured.
1716 */
1717 fb = 0;
1718 has_hostid = (uuid_get_ethernet(ð_hostid[0]) == 0);
1719 for (retry = 1; retry != 0;) {
1720 if (fb || has_hostid == 0) {
1721 read_frandom(&sc->sc_defaddr, ETHER_ADDR_LEN);
1722 sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
1723 sc->sc_defaddr[0] |= 2; /* set the LAA bit */
1724 } else {
1725 bcopy(ð_hostid[0], &sc->sc_defaddr,
1726 ETHER_ADDR_LEN);
1727 sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
1728 sc->sc_defaddr[0] |= 2; /* set the LAA bit */
1729 sc->sc_defaddr[3] = /* stir it up a bit */
1730 ((sc->sc_defaddr[3] & 0x0f) << 4) |
1731 ((sc->sc_defaddr[3] & 0xf0) >> 4);
1732 /*
1733 * Mix in the LSB as it's actually pretty significant,
1734 * see rdar://14076061
1735 */
1736 sc->sc_defaddr[4] =
1737 (((sc->sc_defaddr[4] & 0x0f) << 4) |
1738 ((sc->sc_defaddr[4] & 0xf0) >> 4)) ^
1739 sc->sc_defaddr[5];
1740 sc->sc_defaddr[5] = ifp->if_unit & 0xff;
1741 }
1742
1743 fb = 1;
1744 retry = 0;
1745 lck_mtx_lock(&bridge_list_mtx);
1746 LIST_FOREACH(sc2, &bridge_list, sc_list) {
1747 if (_ether_cmp(sc->sc_defaddr,
1748 IF_LLADDR(sc2->sc_ifp)) == 0) {
1749 retry = 1;
1750 }
1751 }
1752 lck_mtx_unlock(&bridge_list_mtx);
1753 }
1754
1755 sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
1756
1757 if (BRIDGE_DBGF_ENABLED(BR_DBGF_LIFECYCLE)) {
1758 brlog_link(sc);
1759 }
1760 error = ifnet_attach(ifp, NULL);
1761 if (error != 0) {
1762 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_attach failed %d", error);
1763 goto done;
1764 }
1765 (void)bridge_interface_attach_protocol(ifp);
1766
1767 error = ifnet_set_lladdr_and_type(ifp, sc->sc_defaddr, ETHER_ADDR_LEN,
1768 IFT_ETHER);
1769 if (error != 0) {
1770 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr_and_type failed %d",
1771 error);
1772 goto done;
1773 }
1774
1775 ifnet_set_offload(ifp,
1776 IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP |
1777 IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6 | IFNET_MULTIPAGES);
1778 error = bridge_set_tso(sc);
1779 if (error != 0) {
1780 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_set_tso failed %d", error);
1781 goto done;
1782 }
1783 #if BRIDGESTP
1784 bstp_attach(&sc->sc_stp, &bridge_ops);
1785 #endif /* BRIDGESTP */
1786
1787 lck_mtx_lock(&bridge_list_mtx);
1788 LIST_INSERT_HEAD(&bridge_list, sc, sc_list);
1789 lck_mtx_unlock(&bridge_list_mtx);
1790
1791 /* attach as ethernet */
1792 error = bpf_attach(ifp, DLT_EN10MB, sizeof(struct ether_header),
1793 NULL, NULL);
1794
1795 done:
1796 if (error != 0) {
1797 if (ifp != NULL) {
1798 bridge_interface_detach_protocol(ifp);
1799 }
1800 BRIDGE_LOG(LOG_NOTICE, 0, "failed error %d", error);
1801 /* TBD: Clean up: sc, sc_rthash etc */
1802 }
1803
1804 return error;
1805 }
1806
1807 /*
1808 * bridge_clone_destroy:
1809 *
1810 * Destroy a bridge instance.
1811 */
1812 static int
bridge_clone_destroy(struct ifnet * ifp)1813 bridge_clone_destroy(struct ifnet *ifp)
1814 {
1815 struct bridge_softc * __single sc = ifp->if_softc;
1816 struct bridge_iflist *bif;
1817 errno_t error;
1818
1819 bridge_interface_detach_protocol(ifp);
1820
1821 BRIDGE_LOCK(sc);
1822 if ((sc->sc_flags & SCF_DETACHING)) {
1823 BRIDGE_UNLOCK(sc);
1824 return 0;
1825 }
1826 sc->sc_flags |= SCF_DETACHING;
1827
1828 bridge_ifstop(ifp, 1);
1829
1830 bridge_cancel_delayed_call(&sc->sc_resize_call);
1831
1832 bridge_cleanup_delayed_call(&sc->sc_resize_call);
1833 bridge_cleanup_delayed_call(&sc->sc_aging_timer);
1834
1835 error = ifnet_set_flags(ifp, 0, IFF_UP);
1836 if (error != 0) {
1837 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_flags failed %d", error);
1838 }
1839
1840 while ((bif = TAILQ_FIRST(&sc->sc_iflist)) != NULL) {
1841 bridge_delete_member(sc, bif);
1842 }
1843
1844 while ((bif = TAILQ_FIRST(&sc->sc_spanlist)) != NULL) {
1845 bridge_delete_span(sc, bif);
1846 }
1847 BRIDGE_UNLOCK(sc);
1848
1849 error = ifnet_detach(ifp);
1850 if (error != 0) {
1851 panic("%s (%d): ifnet_detach(%p) failed %d",
1852 __func__, __LINE__, ifp, error);
1853 }
1854 return 0;
1855 }
1856
1857 #define DRVSPEC do { \
1858 if (ifd->ifd_cmd >= bridge_control_table_size) { \
1859 error = EINVAL; \
1860 break; \
1861 } \
1862 bc = &bridge_control_table[ifd->ifd_cmd]; \
1863 \
1864 if (cmd == SIOCGDRVSPEC && \
1865 (bc->bc_flags & BC_F_COPYOUT) == 0) { \
1866 error = EINVAL; \
1867 break; \
1868 } else if (cmd == SIOCSDRVSPEC && \
1869 (bc->bc_flags & BC_F_COPYOUT) != 0) { \
1870 error = EINVAL; \
1871 break; \
1872 } \
1873 \
1874 if (bc->bc_flags & BC_F_SUSER) { \
1875 error = kauth_authorize_generic(kauth_cred_get(), \
1876 KAUTH_GENERIC_ISSUSER); \
1877 if (error) \
1878 break; \
1879 } \
1880 \
1881 if (ifd->ifd_len != bc->bc_argsize || \
1882 ifd->ifd_len > sizeof (args)) { \
1883 error = EINVAL; \
1884 break; \
1885 } \
1886 \
1887 bzero(&args, sizeof (args)); \
1888 if (bc->bc_flags & BC_F_COPYIN) { \
1889 error = copyin(ifd->ifd_data, &args, ifd->ifd_len); \
1890 if (error) \
1891 break; \
1892 } \
1893 \
1894 BRIDGE_LOCK(sc); \
1895 error = (*bc->bc_func)(sc, &args, sizeof(args)); \
1896 BRIDGE_UNLOCK(sc); \
1897 if (error) \
1898 break; \
1899 \
1900 if (bc->bc_flags & BC_F_COPYOUT) \
1901 error = copyout(&args, ifd->ifd_data, ifd->ifd_len); \
1902 } while (0)
1903
1904 static boolean_t
interface_needs_input_broadcast(struct ifnet * ifp)1905 interface_needs_input_broadcast(struct ifnet * ifp)
1906 {
1907 /*
1908 * Selectively enable input broadcast only when necessary.
1909 * The bridge interface itself attaches a fake protocol
1910 * so checking for at least two protocols means that the
1911 * interface is being used for something besides bridging
1912 * and needs to see broadcast packets from other members.
1913 */
1914 return if_get_protolist(ifp, NULL, 0) >= 2;
1915 }
1916
1917 static boolean_t
bif_set_input_broadcast(struct bridge_iflist * bif,boolean_t input_broadcast)1918 bif_set_input_broadcast(struct bridge_iflist * bif, boolean_t input_broadcast)
1919 {
1920 boolean_t old_input_broadcast;
1921
1922 old_input_broadcast = (bif->bif_flags & BIFF_INPUT_BROADCAST) != 0;
1923 if (input_broadcast) {
1924 bif->bif_flags |= BIFF_INPUT_BROADCAST;
1925 } else {
1926 bif->bif_flags &= ~BIFF_INPUT_BROADCAST;
1927 }
1928 return old_input_broadcast != input_broadcast;
1929 }
1930
1931 /*
1932 * bridge_ioctl:
1933 *
1934 * Handle a control request from the operator.
1935 */
1936 static errno_t
bridge_ioctl(struct ifnet * ifp,u_long cmd,void * __sized_by (IOCPARM_LEN (cmd))data)1937 bridge_ioctl(struct ifnet *ifp, u_long cmd, void *__sized_by(IOCPARM_LEN(cmd)) data)
1938 {
1939 struct bridge_softc * __single sc = ifp->if_softc;
1940 struct ifreq *ifr = (struct ifreq *)data;
1941 struct bridge_iflist *bif;
1942 int error = 0;
1943
1944 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1945
1946 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_IOCTL,
1947 "ifp %s cmd 0x%08lx (%c%c [%lu] %c %lu)",
1948 ifp->if_xname, cmd, (cmd & IOC_IN) ? 'I' : ' ',
1949 (cmd & IOC_OUT) ? 'O' : ' ', IOCPARM_LEN(cmd),
1950 (char)IOCGROUP(cmd), cmd & 0xff);
1951
1952 switch (cmd) {
1953 case SIOCSIFADDR:
1954 case SIOCAIFADDR:
1955 ifnet_set_flags(ifp, IFF_UP, IFF_UP);
1956 break;
1957
1958 case SIOCGIFMEDIA32:
1959 case SIOCGIFMEDIA64: {
1960 // cast to 32bit version to work within bounds with 32bit userspace
1961 struct ifmediareq32 *ifmr = (struct ifmediareq32 *)data;
1962 user_addr_t user_addr;
1963
1964 user_addr = (cmd == SIOCGIFMEDIA64) ?
1965 ((struct ifmediareq64 *)data)->ifmu_ulist :
1966 CAST_USER_ADDR_T(((struct ifmediareq32 *)data)->ifmu_ulist);
1967
1968 ifmr->ifm_status = IFM_AVALID;
1969 ifmr->ifm_mask = 0;
1970 ifmr->ifm_count = 1;
1971
1972 BRIDGE_LOCK(sc);
1973 if (!(sc->sc_flags & SCF_DETACHING) &&
1974 (sc->sc_flags & SCF_MEDIA_ACTIVE)) {
1975 ifmr->ifm_status |= IFM_ACTIVE;
1976 ifmr->ifm_active = ifmr->ifm_current =
1977 IFM_ETHER | IFM_AUTO;
1978 } else {
1979 ifmr->ifm_active = ifmr->ifm_current = IFM_NONE;
1980 }
1981 BRIDGE_UNLOCK(sc);
1982
1983 if (user_addr != USER_ADDR_NULL) {
1984 error = copyout(&ifmr->ifm_current, user_addr,
1985 sizeof(int));
1986 }
1987 break;
1988 }
1989
1990 case SIOCADDMULTI:
1991 case SIOCDELMULTI:
1992 break;
1993
1994 case SIOCSDRVSPEC32:
1995 case SIOCGDRVSPEC32: {
1996 union {
1997 struct ifbreq ifbreq;
1998 struct ifbifconf32 ifbifconf;
1999 struct ifbareq32 ifbareq;
2000 struct ifbaconf32 ifbaconf;
2001 struct ifbrparam ifbrparam;
2002 struct ifbropreq32 ifbropreq;
2003 } args;
2004 struct ifdrv32 *ifd = (struct ifdrv32 *)data;
2005 const struct bridge_control *bridge_control_table =
2006 bridge_control_table32, *bc;
2007
2008 DRVSPEC;
2009
2010 break;
2011 }
2012 case SIOCSDRVSPEC64:
2013 case SIOCGDRVSPEC64: {
2014 union {
2015 struct ifbreq ifbreq;
2016 struct ifbifconf64 ifbifconf;
2017 struct ifbareq64 ifbareq;
2018 struct ifbaconf64 ifbaconf;
2019 struct ifbrparam ifbrparam;
2020 struct ifbropreq64 ifbropreq;
2021 } args;
2022 struct ifdrv64 *ifd = (struct ifdrv64 *)data;
2023 const struct bridge_control *bridge_control_table =
2024 bridge_control_table64, *bc;
2025
2026 DRVSPEC;
2027
2028 break;
2029 }
2030
2031 case SIOCSIFFLAGS:
2032 if (!(ifp->if_flags & IFF_UP) &&
2033 (ifp->if_flags & IFF_RUNNING)) {
2034 /*
2035 * If interface is marked down and it is running,
2036 * then stop and disable it.
2037 */
2038 BRIDGE_LOCK(sc);
2039 bridge_ifstop(ifp, 1);
2040 BRIDGE_UNLOCK(sc);
2041 } else if ((ifp->if_flags & IFF_UP) &&
2042 !(ifp->if_flags & IFF_RUNNING)) {
2043 /*
2044 * If interface is marked up and it is stopped, then
2045 * start it.
2046 */
2047 BRIDGE_LOCK(sc);
2048 error = bridge_init(ifp);
2049 BRIDGE_UNLOCK(sc);
2050 }
2051 break;
2052
2053 case SIOCSIFLLADDR:
2054 error = ifnet_set_lladdr(ifp, ifr->ifr_addr.sa_data,
2055 ifr->ifr_addr.sa_len);
2056 if (error != 0) {
2057 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_IOCTL,
2058 "%s SIOCSIFLLADDR error %d", ifp->if_xname,
2059 error);
2060 }
2061 break;
2062
2063 case SIOCSIFMTU:
2064 if (ifr->ifr_mtu < 576) {
2065 error = EINVAL;
2066 break;
2067 }
2068 BRIDGE_LOCK(sc);
2069 if (TAILQ_EMPTY(&sc->sc_iflist)) {
2070 sc->sc_ifp->if_mtu = ifr->ifr_mtu;
2071 BRIDGE_UNLOCK(sc);
2072 break;
2073 }
2074 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2075 if (bif->bif_ifp->if_mtu != (unsigned)ifr->ifr_mtu) {
2076 BRIDGE_LOG(LOG_NOTICE, 0,
2077 "%s invalid MTU: %u(%s) != %d",
2078 sc->sc_ifp->if_xname,
2079 bif->bif_ifp->if_mtu,
2080 bif->bif_ifp->if_xname, ifr->ifr_mtu);
2081 error = EINVAL;
2082 break;
2083 }
2084 }
2085 if (!error) {
2086 sc->sc_ifp->if_mtu = ifr->ifr_mtu;
2087 }
2088 BRIDGE_UNLOCK(sc);
2089 break;
2090
2091 default:
2092 error = ether_ioctl(ifp, cmd, data);
2093 if (error != 0 && error != EOPNOTSUPP) {
2094 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_IOCTL,
2095 "ifp %s cmd 0x%08lx "
2096 "(%c%c [%lu] %c %lu) failed error: %d",
2097 ifp->if_xname, cmd,
2098 (cmd & IOC_IN) ? 'I' : ' ',
2099 (cmd & IOC_OUT) ? 'O' : ' ',
2100 IOCPARM_LEN(cmd), (char)IOCGROUP(cmd),
2101 cmd & 0xff, error);
2102 }
2103 break;
2104 }
2105 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2106
2107 return error;
2108 }
2109
2110 #if HAS_IF_CAP
2111 /*
2112 * bridge_mutecaps:
2113 *
2114 * Clear or restore unwanted capabilities on the member interface
2115 */
2116 static void
bridge_mutecaps(struct bridge_softc * sc)2117 bridge_mutecaps(struct bridge_softc *sc)
2118 {
2119 struct bridge_iflist *bif;
2120 int enabled, mask;
2121
2122 /* Initial bitmask of capabilities to test */
2123 mask = BRIDGE_IFCAPS_MASK;
2124
2125 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2126 /* Every member must support it or its disabled */
2127 mask &= bif->bif_savedcaps;
2128 }
2129
2130 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2131 enabled = bif->bif_ifp->if_capenable;
2132 enabled &= ~BRIDGE_IFCAPS_STRIP;
2133 /* strip off mask bits and enable them again if allowed */
2134 enabled &= ~BRIDGE_IFCAPS_MASK;
2135 enabled |= mask;
2136
2137 bridge_set_ifcap(sc, bif, enabled);
2138 }
2139 }
2140
2141 static void
bridge_set_ifcap(struct bridge_softc * sc,struct bridge_iflist * bif,int set)2142 bridge_set_ifcap(struct bridge_softc *sc, struct bridge_iflist *bif, int set)
2143 {
2144 struct ifnet *ifp = bif->bif_ifp;
2145 struct ifreq ifr;
2146 int error;
2147
2148 bzero(&ifr, sizeof(ifr));
2149 ifr.ifr_reqcap = set;
2150
2151 if (ifp->if_capenable != set) {
2152 IFF_LOCKGIANT(ifp);
2153 error = (*ifp->if_ioctl)(ifp, SIOCSIFCAP, (caddr_t)&ifr);
2154 IFF_UNLOCKGIANT(ifp);
2155 if (error) {
2156 BRIDGE_LOG(LOG_NOTICE, 0,
2157 "%s error setting interface capabilities on %s",
2158 sc->sc_ifp->if_xname, ifp->if_xname);
2159 }
2160 }
2161 }
2162 #endif /* HAS_IF_CAP */
2163
2164 static errno_t
siocsifcap(struct ifnet * ifp,uint32_t cap_enable)2165 siocsifcap(struct ifnet * ifp, uint32_t cap_enable)
2166 {
2167 struct ifreq ifr;
2168
2169 bzero(&ifr, sizeof(ifr));
2170 ifr.ifr_reqcap = cap_enable;
2171 return ifnet_ioctl(ifp, 0, SIOCSIFCAP, &ifr);
2172 }
2173
2174 static const char *
enable_disable_str(boolean_t enable)2175 enable_disable_str(boolean_t enable)
2176 {
2177 return (const char * __null_terminated)(enable ? "enable" : "disable");
2178 }
2179
2180 static boolean_t
bridge_set_lro(struct ifnet * ifp,boolean_t enable)2181 bridge_set_lro(struct ifnet * ifp, boolean_t enable)
2182 {
2183 uint32_t cap_enable;
2184 uint32_t cap_supported;
2185 boolean_t changed = FALSE;
2186 boolean_t lro_enabled;
2187
2188 cap_supported = ifnet_capabilities_supported(ifp);
2189 if ((cap_supported & IFCAP_LRO) == 0) {
2190 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2191 "%s doesn't support LRO",
2192 ifp->if_xname);
2193 goto done;
2194 }
2195 if (bridge_allow_lro_num_seg != 0 &&
2196 (cap_supported & IFCAP_LRO_NUM_SEG) != 0) {
2197 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2198 "%s supports LRO_NUM_SEG, leaving LRO enabled",
2199 ifp->if_xname);
2200 goto done;
2201 }
2202 cap_enable = ifnet_capabilities_enabled(ifp);
2203 lro_enabled = (cap_enable & IFCAP_LRO) != 0;
2204 if (lro_enabled != enable) {
2205 errno_t error;
2206
2207 if (enable) {
2208 cap_enable |= IFCAP_LRO;
2209 } else {
2210 cap_enable &= ~IFCAP_LRO;
2211 }
2212 error = siocsifcap(ifp, cap_enable);
2213 if (error != 0) {
2214 BRIDGE_LOG(LOG_NOTICE, 0,
2215 "%s %s failed (cap 0x%x) %d",
2216 ifp->if_xname,
2217 enable_disable_str(enable),
2218 cap_enable,
2219 error);
2220 } else {
2221 changed = TRUE;
2222 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2223 "%s %s success (cap 0x%x)",
2224 ifp->if_xname,
2225 enable_disable_str(enable),
2226 cap_enable);
2227 }
2228 }
2229 done:
2230 return changed;
2231 }
2232
2233 static errno_t
bridge_set_tso(struct bridge_softc * sc)2234 bridge_set_tso(struct bridge_softc *sc)
2235 {
2236 struct bridge_iflist *bif;
2237 u_int32_t tso_v4_mtu;
2238 u_int32_t tso_v6_mtu;
2239 ifnet_offload_t offload;
2240 errno_t error = 0;
2241
2242 /* By default, support TSO */
2243 offload = sc->sc_ifp->if_hwassist | IFNET_TSO_IPV4 | IFNET_TSO_IPV6;
2244 tso_v4_mtu = IP_MAXPACKET;
2245 tso_v6_mtu = IP_MAXPACKET;
2246
2247 /* Use the lowest common denominator of the members */
2248 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2249 ifnet_t ifp = bif->bif_ifp;
2250
2251 if (ifp == NULL) {
2252 continue;
2253 }
2254
2255 if (offload & IFNET_TSO_IPV4) {
2256 if (ifp->if_hwassist & IFNET_TSO_IPV4) {
2257 if (tso_v4_mtu > ifp->if_tso_v4_mtu) {
2258 tso_v4_mtu = ifp->if_tso_v4_mtu;
2259 }
2260 } else {
2261 offload &= ~IFNET_TSO_IPV4;
2262 tso_v4_mtu = 0;
2263 }
2264 }
2265 if (offload & IFNET_TSO_IPV6) {
2266 if (ifp->if_hwassist & IFNET_TSO_IPV6) {
2267 if (tso_v6_mtu > ifp->if_tso_v6_mtu) {
2268 tso_v6_mtu = ifp->if_tso_v6_mtu;
2269 }
2270 } else {
2271 offload &= ~IFNET_TSO_IPV6;
2272 tso_v6_mtu = 0;
2273 }
2274 }
2275 }
2276
2277 if (offload != sc->sc_ifp->if_hwassist) {
2278 error = ifnet_set_offload(sc->sc_ifp, offload);
2279 if (error != 0) {
2280 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2281 "ifnet_set_offload(%s, 0x%x) failed %d",
2282 sc->sc_ifp->if_xname, offload, error);
2283 goto done;
2284 }
2285 /*
2286 * For ifnet_set_tso_mtu() sake, the TSO MTU must be at least
2287 * as large as the interface MTU
2288 */
2289 if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV4) {
2290 if (tso_v4_mtu < sc->sc_ifp->if_mtu) {
2291 tso_v4_mtu = sc->sc_ifp->if_mtu;
2292 }
2293 error = ifnet_set_tso_mtu(sc->sc_ifp, AF_INET,
2294 tso_v4_mtu);
2295 if (error != 0) {
2296 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2297 "ifnet_set_tso_mtu(%s, "
2298 "AF_INET, %u) failed %d",
2299 sc->sc_ifp->if_xname,
2300 tso_v4_mtu, error);
2301 goto done;
2302 }
2303 }
2304 if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV6) {
2305 if (tso_v6_mtu < sc->sc_ifp->if_mtu) {
2306 tso_v6_mtu = sc->sc_ifp->if_mtu;
2307 }
2308 error = ifnet_set_tso_mtu(sc->sc_ifp, AF_INET6,
2309 tso_v6_mtu);
2310 if (error != 0) {
2311 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2312 "ifnet_set_tso_mtu(%s, "
2313 "AF_INET6, %u) failed %d",
2314 sc->sc_ifp->if_xname,
2315 tso_v6_mtu, error);
2316 goto done;
2317 }
2318 }
2319 }
2320 done:
2321 return error;
2322 }
2323
2324 static const char *
sanitize_ifname(char * __sized_by (IFNAMSIZ)ifname)2325 sanitize_ifname(char * __sized_by(IFNAMSIZ) ifname)
2326 {
2327 ifname[IFNAMSIZ - 1] = '\0';
2328 return __unsafe_null_terminated_from_indexable(ifname, &ifname[IFNAMSIZ - 1]);
2329 }
2330
2331 /*
2332 * bridge_lookup_member:
2333 *
2334 * Lookup a bridge member interface.
2335 */
2336 static struct bridge_iflist *
bridge_lookup_member(struct bridge_softc * sc,char * __sized_by (IFNAMSIZ)name_unsanitized)2337 bridge_lookup_member(struct bridge_softc *sc, char * __sized_by(IFNAMSIZ) name_unsanitized)
2338 {
2339 struct bridge_iflist *bif;
2340 struct ifnet *ifp;
2341 const char * __null_terminated name = sanitize_ifname(name_unsanitized);
2342
2343 BRIDGE_LOCK_ASSERT_HELD(sc);
2344
2345 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2346 ifp = bif->bif_ifp;
2347 if (strcmp(ifp->if_xname, name) == 0) {
2348 return bif;
2349 }
2350 }
2351
2352 return NULL;
2353 }
2354
2355 /*
2356 * bridge_lookup_member_if:
2357 *
2358 * Lookup a bridge member interface by ifnet*.
2359 */
2360 static struct bridge_iflist *
bridge_lookup_member_if(struct bridge_softc * sc,struct ifnet * member_ifp)2361 bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
2362 {
2363 struct bridge_iflist *bif;
2364
2365 BRIDGE_LOCK_ASSERT_HELD(sc);
2366
2367 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2368 if (bif->bif_ifp == member_ifp) {
2369 return bif;
2370 }
2371 }
2372
2373 return NULL;
2374 }
2375
2376 static inline bool
get_and_clear_promisc(mbuf_t m)2377 get_and_clear_promisc(mbuf_t m)
2378 {
2379 bool is_promisc;
2380
2381 /*
2382 * Need to clear the promiscuous flag otherwise the packet will be
2383 * dropped by DLIL after processing filters
2384 */
2385 is_promisc = (mbuf_flags(m) & MBUF_PROMISC) != 0;
2386 if (is_promisc) {
2387 mbuf_setflags_mask(m, 0, MBUF_PROMISC);
2388 }
2389 return is_promisc;
2390 }
2391
2392 static errno_t
bridge_iff_input(void * cookie,ifnet_t ifp,protocol_family_t protocol,mbuf_t * data,char ** frame_ptr)2393 bridge_iff_input(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2394 mbuf_t *data, char **frame_ptr)
2395 {
2396 #pragma unused(protocol)
2397 errno_t error = 0;
2398 struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2399 struct bridge_softc *sc = bif->bif_sc;
2400 int included = 0;
2401 struct ether_header * eh_p;
2402 size_t frmlen = 0;
2403 bool is_promisc;
2404 mblist list;
2405 mbuf_t m = *data;
2406
2407 if ((m->m_flags & M_PROTO1)) {
2408 goto out;
2409 }
2410
2411 if (*frame_ptr >= (char *)mbuf_datastart(m) &&
2412 *frame_ptr <= mtod(m, char *)) {
2413 included = 1;
2414 frmlen = mtod(m, char *) - *frame_ptr;
2415 }
2416 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2417 "%s from %s m 0x%llx data 0x%llx frame 0x%llx %s "
2418 "frmlen %lu", sc->sc_ifp->if_xname,
2419 ifp->if_xname, (uint64_t)VM_KERNEL_ADDRPERM(m),
2420 (uint64_t)VM_KERNEL_ADDRPERM(mtod(m, void *)),
2421 (uint64_t)VM_KERNEL_ADDRPERM(*frame_ptr),
2422 included ? "inside" : "outside", frmlen);
2423 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MBUF)) {
2424 brlog_mbuf(m, "bridge_iff_input[", "");
2425 brlog_ether_header((struct ether_header *)
2426 (void *)*frame_ptr);
2427 brlog_mbuf_data(m, 0, 20);
2428 }
2429 if (included == 0) {
2430 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT, "frame_ptr outside mbuf");
2431 goto out;
2432 }
2433
2434 /* Move data pointer to start of frame to the link layer header */
2435 _mbuf_adjust_pkthdr_and_data(m, -frmlen);
2436
2437 /* make sure we can access the ethernet header */
2438 if (mbuf_pkthdr_len(m) < sizeof(struct ether_header)) {
2439 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2440 "short frame %lu < %lu",
2441 mbuf_pkthdr_len(m), sizeof(struct ether_header));
2442 goto out;
2443 }
2444 if (mbuf_len(m) < sizeof(struct ether_header)) {
2445 error = mbuf_pullup(data, sizeof(struct ether_header));
2446 if (error != 0) {
2447 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2448 "mbuf_pullup(%lu) failed %d",
2449 sizeof(struct ether_header),
2450 error);
2451 error = EJUSTRETURN;
2452 goto out;
2453 }
2454 if (m != *data) {
2455 m = *data;
2456 *frame_ptr = mtod(m, char *);
2457 }
2458 }
2459 mblist_init(&list);
2460 mblist_append(&list, m);
2461 is_promisc = get_and_clear_promisc(m);
2462 eh_p = __unsafe_forge_single(struct ether_header *, *frame_ptr);
2463 list = bridge_input_list(sc, ifp, eh_p, list, is_promisc);
2464 m = *data = list.head;
2465 if (m == NULL) {
2466 error = EJUSTRETURN;
2467 }
2468 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MBUF) &&
2469 BRIDGE_DBGF_ENABLED(BR_DBGF_INPUT)) {
2470 brlog_mbuf(m, "bridge_iff_input]", "");
2471 }
2472
2473 out:
2474 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2475
2476 return error;
2477 }
2478
2479 static errno_t
bridge_iff_output(void * cookie,ifnet_t ifp,protocol_family_t protocol,mbuf_t * data)2480 bridge_iff_output(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2481 mbuf_t *data)
2482 {
2483 #pragma unused(protocol)
2484 errno_t error = 0;
2485 struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2486 struct bridge_softc *sc = bif->bif_sc;
2487 mbuf_t m = *data;
2488
2489 if ((m->m_flags & M_PROTO1)) {
2490 goto out;
2491 }
2492 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
2493 "%s from %s m 0x%llx data 0x%llx",
2494 sc->sc_ifp->if_xname, ifp->if_xname,
2495 (uint64_t)VM_KERNEL_ADDRPERM(m),
2496 (uint64_t)VM_KERNEL_ADDRPERM(mtod(m, void *)));
2497
2498 error = bridge_member_output(sc, ifp, data);
2499 if (error != 0 && error != EJUSTRETURN) {
2500 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_OUTPUT,
2501 "bridge_member_output failed error %d",
2502 error);
2503 }
2504 out:
2505 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2506
2507 return error;
2508 }
2509
2510 static void
bridge_iff_event(void * cookie,ifnet_t ifp,protocol_family_t protocol,const struct kev_msg * event_msg)2511 bridge_iff_event(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2512 const struct kev_msg *event_msg)
2513 {
2514 #pragma unused(protocol)
2515 struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2516 struct bridge_softc *sc = bif->bif_sc;
2517
2518 if (event_msg->vendor_code == KEV_VENDOR_APPLE &&
2519 event_msg->kev_class == KEV_NETWORK_CLASS &&
2520 event_msg->kev_subclass == KEV_DL_SUBCLASS) {
2521 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
2522 "%s event_code %u - %s",
2523 ifp->if_xname, event_msg->event_code,
2524 dlil_kev_dl_code_str(event_msg->event_code));
2525
2526 switch (event_msg->event_code) {
2527 case KEV_DL_LINK_OFF:
2528 case KEV_DL_LINK_ON: {
2529 bridge_iflinkevent(ifp);
2530 #if BRIDGESTP
2531 bstp_linkstate(ifp, event_msg->event_code);
2532 #endif /* BRIDGESTP */
2533 break;
2534 }
2535 case KEV_DL_SIFFLAGS: {
2536 if ((ifp->if_flags & IFF_UP) == 0) {
2537 break;
2538 }
2539 if ((bif->bif_flags & BIFF_PROMISC) == 0) {
2540 errno_t error;
2541
2542 error = ifnet_set_promiscuous(ifp, 1);
2543 if (error != 0) {
2544 BRIDGE_LOG(LOG_NOTICE, 0,
2545 "ifnet_set_promiscuous (%s)"
2546 " failed %d", ifp->if_xname,
2547 error);
2548 } else {
2549 bif->bif_flags |= BIFF_PROMISC;
2550 }
2551 }
2552 if ((bif->bif_flags & BIFF_WIFI_INFRA) != 0 &&
2553 (bif->bif_flags & BIFF_ALL_MULTI) == 0) {
2554 errno_t error;
2555
2556 error = if_allmulti(ifp, 1);
2557 if (error != 0) {
2558 BRIDGE_LOG(LOG_NOTICE, 0,
2559 "if_allmulti (%s)"
2560 " failed %d", ifp->if_xname,
2561 error);
2562 } else {
2563 bif->bif_flags |= BIFF_ALL_MULTI;
2564 #ifdef XNU_PLATFORM_AppleTVOS
2565 ip6_forwarding = 1;
2566 #endif /* XNU_PLATFORM_AppleTVOS */
2567 }
2568 }
2569 break;
2570 }
2571 case KEV_DL_IFCAP_CHANGED: {
2572 BRIDGE_LOCK(sc);
2573 bridge_set_tso(sc);
2574 BRIDGE_UNLOCK(sc);
2575 break;
2576 }
2577 case KEV_DL_PROTO_DETACHED:
2578 case KEV_DL_PROTO_ATTACHED: {
2579 bridge_proto_attach_changed(ifp);
2580 break;
2581 }
2582 default:
2583 break;
2584 }
2585 }
2586 }
2587
2588 /*
2589 * bridge_iff_detached:
2590 *
2591 * Called when our interface filter has been detached from a
2592 * member interface.
2593 */
2594 static void
bridge_iff_detached(void * cookie,ifnet_t ifp)2595 bridge_iff_detached(void *cookie, ifnet_t ifp)
2596 {
2597 #pragma unused(cookie)
2598 struct bridge_iflist *bif;
2599 struct bridge_softc * __single sc = ifp->if_bridge;
2600
2601 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2602
2603 /* Check if the interface is a bridge member */
2604 if (sc != NULL) {
2605 BRIDGE_LOCK(sc);
2606 bif = bridge_lookup_member_if(sc, ifp);
2607 if (bif != NULL) {
2608 bridge_delete_member(sc, bif);
2609 }
2610 BRIDGE_UNLOCK(sc);
2611 return;
2612 }
2613 /* Check if the interface is a span port */
2614 lck_mtx_lock(&bridge_list_mtx);
2615 LIST_FOREACH(sc, &bridge_list, sc_list) {
2616 BRIDGE_LOCK(sc);
2617 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
2618 if (ifp == bif->bif_ifp) {
2619 bridge_delete_span(sc, bif);
2620 break;
2621 }
2622 BRIDGE_UNLOCK(sc);
2623 }
2624 lck_mtx_unlock(&bridge_list_mtx);
2625 }
2626
2627 static errno_t
bridge_proto_input(ifnet_t ifp,protocol_family_t protocol,mbuf_t packet,char * header)2628 bridge_proto_input(ifnet_t ifp, protocol_family_t protocol, mbuf_t packet,
2629 char *header)
2630 {
2631 #pragma unused(protocol, packet, header)
2632 BRIDGE_LOG(LOG_NOTICE, 0, "%s unexpected packet",
2633 ifp->if_xname);
2634 return 0;
2635 }
2636
2637 static int
bridge_attach_protocol(struct ifnet * ifp)2638 bridge_attach_protocol(struct ifnet *ifp)
2639 {
2640 int error;
2641 struct ifnet_attach_proto_param reg;
2642
2643 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2644 bzero(®, sizeof(reg));
2645 reg.input = bridge_proto_input;
2646
2647 error = ifnet_attach_protocol(ifp, PF_BRIDGE, ®);
2648 if (error) {
2649 BRIDGE_LOG(LOG_NOTICE, 0,
2650 "ifnet_attach_protocol(%s) failed, %d",
2651 ifp->if_xname, error);
2652 }
2653
2654 return error;
2655 }
2656
2657 static int
bridge_detach_protocol(struct ifnet * ifp)2658 bridge_detach_protocol(struct ifnet *ifp)
2659 {
2660 int error;
2661
2662 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2663 error = ifnet_detach_protocol(ifp, PF_BRIDGE);
2664 if (error) {
2665 BRIDGE_LOG(LOG_NOTICE, 0,
2666 "ifnet_detach_protocol(%s) failed, %d",
2667 ifp->if_xname, error);
2668 }
2669
2670 return error;
2671 }
2672
2673 /*
2674 * bridge_delete_member:
2675 *
2676 * Delete the specified member interface.
2677 */
2678 static void
bridge_delete_member(struct bridge_softc * sc,struct bridge_iflist * bif)2679 bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif)
2680 {
2681 #if SKYWALK
2682 boolean_t add_netagent = FALSE;
2683 #endif /* SKYWALK */
2684 uint32_t bif_flags;
2685 struct ifnet *ifs = bif->bif_ifp, *bifp = sc->sc_ifp;
2686 int lladdr_changed = 0, error;
2687 uint8_t eaddr[ETHER_ADDR_LEN];
2688 u_int32_t event_code = 0;
2689
2690 BRIDGE_LOCK_ASSERT_HELD(sc);
2691 VERIFY(ifs != NULL);
2692
2693 /*
2694 * Remove the member from the list first so it cannot be found anymore
2695 * when we release the bridge lock below
2696 */
2697 if ((bif->bif_flags & BIFF_IN_MEMBER_LIST) != 0) {
2698 bif->bif_flags &= ~BIFF_IN_MEMBER_LIST;
2699 BRIDGE_XLOCK(sc);
2700 TAILQ_REMOVE(&sc->sc_iflist, bif, bif_next);
2701 BRIDGE_XDROP(sc);
2702 }
2703 if (sc->sc_mac_nat_bif != NULL) {
2704 if (bif == sc->sc_mac_nat_bif) {
2705 bridge_mac_nat_disable(sc);
2706 } else {
2707 bridge_mac_nat_flush_entries(sc, bif);
2708 }
2709 }
2710 #if BRIDGESTP
2711 if ((bif->bif_ifflags & IFBIF_STP) != 0) {
2712 bstp_disable(&bif->bif_stp);
2713 }
2714 #endif /* BRIDGESTP */
2715
2716 /*
2717 * If removing the interface that gave the bridge its mac address, set
2718 * the mac address of the bridge to the address of the next member, or
2719 * to its default address if no members are left.
2720 */
2721 if (bridge_inherit_mac && sc->sc_ifaddr == ifs) {
2722 ifnet_release(sc->sc_ifaddr);
2723 if (TAILQ_EMPTY(&sc->sc_iflist)) {
2724 bcopy(sc->sc_defaddr, eaddr, ETHER_ADDR_LEN);
2725 sc->sc_ifaddr = NULL;
2726 } else {
2727 struct ifnet *fif =
2728 TAILQ_FIRST(&sc->sc_iflist)->bif_ifp;
2729 bcopy(IF_LLADDR(fif), eaddr, ETHER_ADDR_LEN);
2730 sc->sc_ifaddr = fif;
2731 ifnet_reference(fif); /* for sc_ifaddr */
2732 }
2733 lladdr_changed = 1;
2734 }
2735
2736 #if HAS_IF_CAP
2737 bridge_mutecaps(sc); /* recalculate now this interface is removed */
2738 #endif /* HAS_IF_CAP */
2739
2740 error = bridge_set_tso(sc);
2741 if (error != 0) {
2742 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_set_tso failed %d", error);
2743 }
2744
2745 bridge_rtdelete(sc, ifs, IFBF_FLUSHALL);
2746
2747 KASSERT(bif->bif_addrcnt == 0,
2748 ("%s: %d bridge routes referenced", __func__, bif->bif_addrcnt));
2749
2750 /*
2751 * Update link status of the bridge based on its remaining members
2752 */
2753 event_code = bridge_updatelinkstatus(sc);
2754 bif_flags = bif->bif_flags;
2755 BRIDGE_UNLOCK(sc);
2756
2757 /* only perform these steps if the interface is still attached */
2758 if (ifnet_get_ioref(ifs)) {
2759 #if SKYWALK
2760 add_netagent = (bif_flags & BIFF_NETAGENT_REMOVED) != 0;
2761
2762 if ((bif_flags & BIFF_FLOWSWITCH_ATTACHED) != 0) {
2763 ifnet_detach_flowswitch_nexus(ifs);
2764 }
2765 #endif /* SKYWALK */
2766 /* disable promiscuous mode */
2767 if ((bif_flags & BIFF_PROMISC) != 0) {
2768 (void) ifnet_set_promiscuous(ifs, 0);
2769 }
2770 /* disable all multi */
2771 if ((bif_flags & BIFF_ALL_MULTI) != 0) {
2772 (void)if_allmulti(ifs, 0);
2773 }
2774 #if HAS_IF_CAP
2775 /* re-enable any interface capabilities */
2776 bridge_set_ifcap(sc, bif, bif->bif_savedcaps);
2777 #endif
2778 /* detach bridge "protocol" */
2779 if ((bif_flags & BIFF_PROTO_ATTACHED) != 0) {
2780 (void)bridge_detach_protocol(ifs);
2781 }
2782 /* detach interface filter */
2783 if ((bif_flags & BIFF_FILTER_ATTACHED) != 0) {
2784 iflt_detach(bif->bif_iff_ref);
2785 }
2786 /* re-enable LRO */
2787 if ((bif_flags & BIFF_LRO_DISABLED) != 0) {
2788 (void)bridge_set_lro(ifs, TRUE);
2789 }
2790 ifnet_decr_iorefcnt(ifs);
2791 }
2792
2793 if (lladdr_changed &&
2794 (error = ifnet_set_lladdr(bifp, eaddr, ETHER_ADDR_LEN)) != 0) {
2795 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr failed %d", error);
2796 }
2797
2798 if (event_code != 0) {
2799 bridge_link_event(bifp, event_code);
2800 }
2801
2802 #if BRIDGESTP
2803 bstp_destroy(&bif->bif_stp); /* prepare to free */
2804 #endif /* BRIDGESTP */
2805
2806 kfree_type(struct bridge_iflist, bif);
2807 ifs->if_bridge = NULL;
2808 #if SKYWALK
2809 if (add_netagent && ifnet_get_ioref(ifs)) {
2810 (void)ifnet_add_netagent(ifs);
2811 ifnet_decr_iorefcnt(ifs);
2812 }
2813 #endif /* SKYWALK */
2814
2815 ifnet_release(ifs);
2816
2817 BRIDGE_LOCK(sc);
2818 }
2819
2820 /*
2821 * bridge_delete_span:
2822 *
2823 * Delete the specified span interface.
2824 */
2825 static void
bridge_delete_span(struct bridge_softc * sc,struct bridge_iflist * bif)2826 bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif)
2827 {
2828 BRIDGE_LOCK_ASSERT_HELD(sc);
2829
2830 KASSERT(bif->bif_ifp->if_bridge == NULL,
2831 ("%s: not a span interface", __func__));
2832
2833 ifnet_release(bif->bif_ifp);
2834
2835 TAILQ_REMOVE(&sc->sc_spanlist, bif, bif_next);
2836 kfree_type(struct bridge_iflist, bif);
2837 }
2838
2839 static int
bridge_ioctl_add(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)2840 bridge_ioctl_add(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
2841 {
2842 struct ifbreq * __single req = arg;
2843 struct bridge_iflist *bif = NULL;
2844 struct ifnet *ifs, *bifp = sc->sc_ifp;
2845 int error = 0, lladdr_changed = 0;
2846 uint8_t eaddr[ETHER_ADDR_LEN];
2847 struct iff_filter iff;
2848 u_int32_t event_code = 0;
2849 boolean_t input_broadcast;
2850 int media_active;
2851 boolean_t wifi_infra = FALSE;
2852
2853 ifs = ifunit(sanitize_ifname(req->ifbr_ifsname));
2854 if (ifs == NULL) {
2855 return ENOENT;
2856 }
2857 if (ifs->if_ioctl == NULL) { /* must be supported */
2858 return EINVAL;
2859 }
2860
2861 if (IFNET_IS_INTCOPROC(ifs) || IFNET_IS_MANAGEMENT(ifs)) {
2862 return EINVAL;
2863 }
2864
2865 /* If it's in the span list, it can't be a member. */
2866 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
2867 if (ifs == bif->bif_ifp) {
2868 return EBUSY;
2869 }
2870 }
2871
2872 if (ifs->if_bridge == sc) {
2873 return EEXIST;
2874 }
2875
2876 if (ifs->if_bridge != NULL) {
2877 return EBUSY;
2878 }
2879
2880 switch (ifs->if_type) {
2881 case IFT_ETHER:
2882 if (strcmp(ifs->if_name, "en") == 0 &&
2883 ifs->if_subfamily == IFNET_SUBFAMILY_WIFI &&
2884 (ifs->if_eflags & IFEF_IPV4_ROUTER) == 0) {
2885 /* XXX is there a better way to identify Wi-Fi STA? */
2886 wifi_infra = TRUE;
2887 }
2888 break;
2889 case IFT_L2VLAN:
2890 case IFT_IEEE8023ADLAG:
2891 break;
2892 default:
2893 return EINVAL;
2894 }
2895
2896 /* fail to add the interface if the MTU doesn't match */
2897 if (!TAILQ_EMPTY(&sc->sc_iflist) && sc->sc_ifp->if_mtu != ifs->if_mtu) {
2898 BRIDGE_LOG(LOG_NOTICE, 0, "%s invalid MTU for %s",
2899 sc->sc_ifp->if_xname,
2900 ifs->if_xname);
2901 return EINVAL;
2902 }
2903
2904 if (wifi_infra && sc->sc_mac_nat_bif != NULL) {
2905 /* there's already an interface that's doing MAC NAT */
2906 return EBUSY;
2907 }
2908
2909 /* prevent the interface from detaching while we add the member */
2910 if (!ifnet_get_ioref(ifs)) {
2911 return ENXIO;
2912 }
2913
2914 /* allocate a new member */
2915 bif = kalloc_type(struct bridge_iflist, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2916 bif->bif_ifp = ifs;
2917 ifnet_reference(ifs);
2918 bif->bif_ifflags |= IFBIF_LEARNING | IFBIF_DISCOVER;
2919 #if HAS_IF_CAP
2920 bif->bif_savedcaps = ifs->if_capenable;
2921 #endif /* HAS_IF_CAP */
2922 bif->bif_sc = sc;
2923 if (wifi_infra) {
2924 (void)bridge_mac_nat_enable(sc, bif);
2925 }
2926
2927 /* Allow the first Ethernet member to define the MTU */
2928 if (TAILQ_EMPTY(&sc->sc_iflist)) {
2929 sc->sc_ifp->if_mtu = ifs->if_mtu;
2930 }
2931
2932 /*
2933 * Assign the interface's MAC address to the bridge if it's the first
2934 * member and the MAC address of the bridge has not been changed from
2935 * the default (randomly) generated one.
2936 */
2937 if (bridge_inherit_mac && TAILQ_EMPTY(&sc->sc_iflist) &&
2938 _ether_cmp(IF_LLADDR(sc->sc_ifp), sc->sc_defaddr) == 0) {
2939 bcopy(IF_LLADDR(ifs), eaddr, ETHER_ADDR_LEN);
2940 sc->sc_ifaddr = ifs;
2941 ifnet_reference(ifs); /* for sc_ifaddr */
2942 lladdr_changed = 1;
2943 }
2944
2945 ifs->if_bridge = sc;
2946 #if BRIDGESTP
2947 bstp_create(&sc->sc_stp, &bif->bif_stp, bif->bif_ifp);
2948 #endif /* BRIDGESTP */
2949
2950 #if HAS_IF_CAP
2951 /* Set interface capabilities to the intersection set of all members */
2952 bridge_mutecaps(sc);
2953 #endif /* HAS_IF_CAP */
2954
2955 /*
2956 * Respect lock ordering with DLIL lock for the following operations
2957 */
2958 BRIDGE_UNLOCK(sc);
2959
2960 /* enable promiscuous mode */
2961 error = ifnet_set_promiscuous(ifs, 1);
2962 switch (error) {
2963 case 0:
2964 bif->bif_flags |= BIFF_PROMISC;
2965 break;
2966 case ENETDOWN:
2967 case EPWROFF:
2968 BRIDGE_LOG(LOG_NOTICE, 0,
2969 "ifnet_set_promiscuous(%s) failed %d, ignoring",
2970 ifs->if_xname, error);
2971 /* Ignore error when device is not up */
2972 error = 0;
2973 break;
2974 default:
2975 BRIDGE_LOG(LOG_NOTICE, 0,
2976 "ifnet_set_promiscuous(%s) failed %d",
2977 ifs->if_xname, error);
2978 BRIDGE_LOCK(sc);
2979 goto out;
2980 }
2981 if (wifi_infra) {
2982 int this_error;
2983
2984 /* Wi-Fi doesn't really support promiscuous, set allmulti */
2985 bif->bif_flags |= BIFF_WIFI_INFRA;
2986 this_error = if_allmulti(ifs, 1);
2987 if (this_error == 0) {
2988 bif->bif_flags |= BIFF_ALL_MULTI;
2989 #ifdef XNU_PLATFORM_AppleTVOS
2990 ip6_forwarding = 1;
2991 #endif /* XNU_PLATFORM_AppleTVOS */
2992 } else {
2993 BRIDGE_LOG(LOG_NOTICE, 0,
2994 "if_allmulti(%s) failed %d, ignoring",
2995 ifs->if_xname, this_error);
2996 }
2997 }
2998 #if SKYWALK
2999 /* ensure that the flowswitch is present for native interface */
3000 if (SKYWALK_NATIVE(ifs)) {
3001 if (ifnet_attach_flowswitch_nexus(ifs)) {
3002 bif->bif_flags |= BIFF_FLOWSWITCH_ATTACHED;
3003 }
3004 }
3005 /* remove the netagent on the flowswitch (rdar://75050182) */
3006 if (if_is_fsw_netagent_enabled()) {
3007 (void)ifnet_remove_netagent(ifs);
3008 bif->bif_flags |= BIFF_NETAGENT_REMOVED;
3009 }
3010 #endif /* SKYWALK */
3011
3012 /*
3013 * install an interface filter
3014 */
3015 memset(&iff, 0, sizeof(struct iff_filter));
3016 iff.iff_cookie = bif;
3017 iff.iff_name = "com.apple.kernel.bsd.net.if_bridge";
3018 iff.iff_input = bridge_iff_input;
3019 iff.iff_output = bridge_iff_output;
3020 iff.iff_event = bridge_iff_event;
3021 iff.iff_detached = bridge_iff_detached;
3022 error = dlil_attach_filter(ifs, &iff, &bif->bif_iff_ref,
3023 DLIL_IFF_TSO | DLIL_IFF_INTERNAL | DLIL_IFF_BRIDGE);
3024 if (error != 0) {
3025 BRIDGE_LOG(LOG_NOTICE, 0, "iflt_attach failed %d", error);
3026 BRIDGE_LOCK(sc);
3027 goto out;
3028 }
3029 bif->bif_flags |= BIFF_FILTER_ATTACHED;
3030
3031 /*
3032 * install a dummy "bridge" protocol
3033 */
3034 if ((error = bridge_attach_protocol(ifs)) != 0) {
3035 if (error != 0) {
3036 BRIDGE_LOG(LOG_NOTICE, 0,
3037 "bridge_attach_protocol failed %d", error);
3038 BRIDGE_LOCK(sc);
3039 goto out;
3040 }
3041 }
3042 bif->bif_flags |= BIFF_PROTO_ATTACHED;
3043
3044 if (lladdr_changed &&
3045 (error = ifnet_set_lladdr(bifp, eaddr, ETHER_ADDR_LEN)) != 0) {
3046 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr failed %d", error);
3047 }
3048
3049 media_active = interface_media_active(ifs);
3050
3051 /* disable LRO if needed */
3052 if (bridge_set_lro(ifs, FALSE)) {
3053 bif->bif_flags |= BIFF_LRO_DISABLED;
3054 }
3055
3056 /*
3057 * No failures past this point. Add the member to the list.
3058 */
3059 BRIDGE_LOCK(sc);
3060 bif->bif_flags |= BIFF_IN_MEMBER_LIST;
3061 BRIDGE_XLOCK(sc);
3062 TAILQ_INSERT_TAIL(&sc->sc_iflist, bif, bif_next);
3063 BRIDGE_XDROP(sc);
3064
3065 /* cache the member link status */
3066 if (media_active != 0) {
3067 bif->bif_flags |= BIFF_MEDIA_ACTIVE;
3068 } else {
3069 bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
3070 }
3071
3072 /* the new member may change the link status of the bridge interface */
3073 event_code = bridge_updatelinkstatus(sc);
3074
3075 /* check whether we need input broadcast or not */
3076 input_broadcast = interface_needs_input_broadcast(ifs);
3077 bif_set_input_broadcast(bif, input_broadcast);
3078 BRIDGE_UNLOCK(sc);
3079
3080 if (event_code != 0) {
3081 bridge_link_event(bifp, event_code);
3082 }
3083 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
3084 "%s input broadcast %s", ifs->if_xname,
3085 input_broadcast ? "ENABLED" : "DISABLED");
3086
3087 BRIDGE_LOCK(sc);
3088 bridge_set_tso(sc);
3089
3090 out:
3091 /* allow the interface to detach */
3092 ifnet_decr_iorefcnt(ifs);
3093
3094 if (error != 0) {
3095 if (bif != NULL) {
3096 bridge_delete_member(sc, bif);
3097 }
3098 } else if (IFNET_IS_VMNET(ifs)) {
3099 INC_ATOMIC_INT64_LIM(net_api_stats.nas_vmnet_total);
3100 }
3101
3102 return error;
3103 }
3104
3105 static int
bridge_ioctl_del(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3106 bridge_ioctl_del(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3107 {
3108 struct ifbreq * __single req = arg;
3109 struct bridge_iflist *bif;
3110
3111 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3112 if (bif == NULL) {
3113 return ENOENT;
3114 }
3115
3116 bridge_delete_member(sc, bif);
3117
3118 return 0;
3119 }
3120
3121 static int
bridge_ioctl_purge(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3122 bridge_ioctl_purge(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3123 {
3124 #pragma unused(sc, arg, arg_len)
3125 return 0;
3126 }
3127
3128 static int
bridge_ioctl_gifflags(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3129 bridge_ioctl_gifflags(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3130 {
3131 struct ifbreq * __single req = arg;
3132 struct bridge_iflist *bif;
3133
3134 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3135 if (bif == NULL) {
3136 return ENOENT;
3137 }
3138
3139 struct bstp_port *bp;
3140
3141 bp = &bif->bif_stp;
3142 req->ifbr_state = bp->bp_state;
3143 req->ifbr_priority = bp->bp_priority;
3144 req->ifbr_path_cost = bp->bp_path_cost;
3145 req->ifbr_proto = bp->bp_protover;
3146 req->ifbr_role = bp->bp_role;
3147 req->ifbr_stpflags = bp->bp_flags;
3148 req->ifbr_ifsflags = bif->bif_ifflags;
3149
3150 /* Copy STP state options as flags */
3151 if (bp->bp_operedge) {
3152 req->ifbr_ifsflags |= IFBIF_BSTP_EDGE;
3153 }
3154 if (bp->bp_flags & BSTP_PORT_AUTOEDGE) {
3155 req->ifbr_ifsflags |= IFBIF_BSTP_AUTOEDGE;
3156 }
3157 if (bp->bp_ptp_link) {
3158 req->ifbr_ifsflags |= IFBIF_BSTP_PTP;
3159 }
3160 if (bp->bp_flags & BSTP_PORT_AUTOPTP) {
3161 req->ifbr_ifsflags |= IFBIF_BSTP_AUTOPTP;
3162 }
3163 if (bp->bp_flags & BSTP_PORT_ADMEDGE) {
3164 req->ifbr_ifsflags |= IFBIF_BSTP_ADMEDGE;
3165 }
3166 if (bp->bp_flags & BSTP_PORT_ADMCOST) {
3167 req->ifbr_ifsflags |= IFBIF_BSTP_ADMCOST;
3168 }
3169
3170 req->ifbr_portno = bif->bif_ifp->if_index & 0xfff;
3171 req->ifbr_addrcnt = bif->bif_addrcnt;
3172 req->ifbr_addrmax = bif->bif_addrmax;
3173 req->ifbr_addrexceeded = bif->bif_addrexceeded;
3174
3175 return 0;
3176 }
3177
3178 static int
bridge_ioctl_sifflags(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3179 bridge_ioctl_sifflags(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3180 {
3181 struct ifbreq * __single req = arg;
3182 struct bridge_iflist *bif;
3183 #if BRIDGESTP
3184 struct bstp_port *bp;
3185 #endif /* BRIDGESTP */
3186 errno_t error;
3187 uint32_t ifsflags;
3188
3189 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3190 if (bif == NULL) {
3191 return ENOENT;
3192 }
3193
3194 ifsflags = req->ifbr_ifsflags;
3195 if (ifsflags & IFBIF_SPAN) {
3196 /* SPAN is readonly */
3197 return EINVAL;
3198 }
3199 #define CHECKSUM_VIRTIO (IFBIF_CHECKSUM_OFFLOAD | IFBIF_USES_VIRTIO)
3200 if ((ifsflags & CHECKSUM_VIRTIO) == CHECKSUM_VIRTIO) {
3201 /* can't specify checksum and virtio */
3202 return EINVAL;
3203 }
3204 if ((ifsflags & IFBIF_MAC_NAT) != 0 &&
3205 ((ifsflags & CHECKSUM_VIRTIO) != 0 ||
3206 (bif->bif_flags & BIFF_HOST_FILTER) != 0)) {
3207 /* MAC-NAT can't be used with checksum, host filter, or virtio */
3208 return EINVAL;
3209 }
3210 if ((ifsflags & IFBIF_MAC_NAT) != 0) {
3211 error = bridge_mac_nat_enable(sc, bif);
3212 if (error != 0) {
3213 return error;
3214 }
3215 } else if (sc->sc_mac_nat_bif == bif) {
3216 bridge_mac_nat_disable(sc);
3217 }
3218
3219 #if BRIDGESTP
3220 if (ifsflags & IFBIF_STP) {
3221 if ((bif->bif_ifflags & IFBIF_STP) == 0) {
3222 error = bstp_enable(&bif->bif_stp);
3223 if (error) {
3224 return error;
3225 }
3226 }
3227 } else {
3228 if ((bif->bif_ifflags & IFBIF_STP) != 0) {
3229 bstp_disable(&bif->bif_stp);
3230 }
3231 }
3232
3233 /* Pass on STP flags */
3234 bp = &bif->bif_stp;
3235 bstp_set_edge(bp, ifsflags & IFBIF_BSTP_EDGE ? 1 : 0);
3236 bstp_set_autoedge(bp, ifsflags & IFBIF_BSTP_AUTOEDGE ? 1 : 0);
3237 bstp_set_ptp(bp, ifsflags & IFBIF_BSTP_PTP ? 1 : 0);
3238 bstp_set_autoptp(bp, ifsflags & IFBIF_BSTP_AUTOPTP ? 1 : 0);
3239 #else /* !BRIDGESTP */
3240 if (ifsflags & IFBIF_STP) {
3241 return EOPNOTSUPP;
3242 }
3243 #endif /* !BRIDGESTP */
3244
3245 /* Save the bits relating to the bridge */
3246 bif->bif_ifflags = ifsflags & IFBIFMASK;
3247
3248 return 0;
3249 }
3250
3251 static int
bridge_ioctl_scache(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3252 bridge_ioctl_scache(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3253 {
3254 struct ifbrparam * __single param = arg;
3255
3256 sc->sc_brtmax = param->ifbrp_csize;
3257 bridge_rttrim(sc);
3258 return 0;
3259 }
3260
3261 static int
bridge_ioctl_gcache(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3262 bridge_ioctl_gcache(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3263 {
3264 struct ifbrparam * __single param = arg;
3265
3266 param->ifbrp_csize = sc->sc_brtmax;
3267
3268 return 0;
3269 }
3270
3271 #define BRIDGE_IOCTL_GIFS do { \
3272 struct bridge_iflist *bif; \
3273 struct ifbreq breq; \
3274 char *buf, *outbuf; \
3275 unsigned int count, buflen, len; \
3276 \
3277 count = 0; \
3278 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) \
3279 count++; \
3280 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) \
3281 count++; \
3282 \
3283 buflen = sizeof (breq) * count; \
3284 if (bifc->ifbic_len == 0) { \
3285 bifc->ifbic_len = buflen; \
3286 return (0); \
3287 } \
3288 BRIDGE_UNLOCK(sc); \
3289 outbuf = kalloc_data(buflen, Z_WAITOK | Z_ZERO); \
3290 BRIDGE_LOCK(sc); \
3291 \
3292 count = 0; \
3293 buf = outbuf; \
3294 len = min(bifc->ifbic_len, buflen); \
3295 bzero(&breq, sizeof (breq)); \
3296 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { \
3297 if (len < sizeof (breq)) \
3298 break; \
3299 \
3300 snprintf(breq.ifbr_ifsname, sizeof (breq.ifbr_ifsname), \
3301 "%s", bif->bif_ifp->if_xname); \
3302 /* Fill in the ifbreq structure */ \
3303 error = bridge_ioctl_gifflags(sc, &breq, sizeof(breq)); \
3304 if (error) \
3305 break; \
3306 memcpy(buf, &breq, sizeof (breq)); \
3307 count++; \
3308 buf += sizeof (breq); \
3309 len -= sizeof (breq); \
3310 } \
3311 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) { \
3312 if (len < sizeof (breq)) \
3313 break; \
3314 \
3315 snprintf(breq.ifbr_ifsname, \
3316 sizeof (breq.ifbr_ifsname), \
3317 "%s", bif->bif_ifp->if_xname); \
3318 breq.ifbr_ifsflags = bif->bif_ifflags; \
3319 breq.ifbr_portno \
3320 = bif->bif_ifp->if_index & 0xfff; \
3321 memcpy(buf, &breq, sizeof (breq)); \
3322 count++; \
3323 buf += sizeof (breq); \
3324 len -= sizeof (breq); \
3325 } \
3326 \
3327 BRIDGE_UNLOCK(sc); \
3328 bifc->ifbic_len = sizeof (breq) * count; \
3329 if (bifc->ifbic_len > 0) { \
3330 error = copyout(outbuf, bifc->ifbic_req, bifc->ifbic_len);\
3331 } \
3332 BRIDGE_LOCK(sc); \
3333 kfree_data(outbuf, buflen); \
3334 } while (0)
3335
3336 static int
bridge_ioctl_gifs64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3337 bridge_ioctl_gifs64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3338 {
3339 struct ifbifconf64 * __single bifc = arg;
3340 int error = 0;
3341
3342 BRIDGE_IOCTL_GIFS;
3343
3344 return error;
3345 }
3346
3347 static int
bridge_ioctl_gifs32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3348 bridge_ioctl_gifs32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3349 {
3350 struct ifbifconf32 * __single bifc = arg;
3351 int error = 0;
3352
3353 BRIDGE_IOCTL_GIFS;
3354
3355 return error;
3356 }
3357
3358 #define BRIDGE_IOCTL_RTS do { \
3359 struct bridge_rtnode *brt; \
3360 char *buf; \
3361 char *outbuf = NULL; \
3362 unsigned int count, buflen, len; \
3363 unsigned long now; \
3364 \
3365 if (bac->ifbac_len == 0) \
3366 return (0); \
3367 \
3368 bzero(&bareq, sizeof (bareq)); \
3369 count = 0; \
3370 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) \
3371 count++; \
3372 buflen = sizeof (bareq) * count; \
3373 \
3374 BRIDGE_UNLOCK(sc); \
3375 outbuf = kalloc_data(buflen, Z_WAITOK | Z_ZERO); \
3376 BRIDGE_LOCK(sc); \
3377 \
3378 count = 0; \
3379 buf = outbuf; \
3380 len = min(bac->ifbac_len, buflen); \
3381 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) { \
3382 if (len < sizeof (bareq)) \
3383 goto out; \
3384 snprintf(bareq.ifba_ifsname, sizeof (bareq.ifba_ifsname), \
3385 "%s", brt->brt_ifp->if_xname); \
3386 memcpy(bareq.ifba_dst, brt->brt_addr, sizeof (brt->brt_addr)); \
3387 bareq.ifba_vlan = brt->brt_vlan; \
3388 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) { \
3389 now = (unsigned long) net_uptime(); \
3390 if (now < brt->brt_expire) \
3391 bareq.ifba_expire = \
3392 brt->brt_expire - now; \
3393 } else \
3394 bareq.ifba_expire = 0; \
3395 bareq.ifba_flags = brt->brt_flags; \
3396 \
3397 memcpy(buf, &bareq, sizeof (bareq)); \
3398 count++; \
3399 buf += sizeof (bareq); \
3400 len -= sizeof (bareq); \
3401 } \
3402 out: \
3403 bac->ifbac_len = sizeof (bareq) * count; \
3404 if (outbuf != NULL) { \
3405 BRIDGE_UNLOCK(sc); \
3406 if (bac->ifbac_len > 0) { \
3407 error = copyout(outbuf, bac->ifbac_req, bac->ifbac_len);\
3408 } \
3409 kfree_data(outbuf, buflen); \
3410 BRIDGE_LOCK(sc); \
3411 } \
3412 return (error); \
3413 } while (0)
3414
3415 static int
bridge_ioctl_rts64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3416 bridge_ioctl_rts64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3417 {
3418 struct ifbaconf64 * __single bac = arg;
3419 struct ifbareq64 bareq;
3420 int error = 0;
3421
3422 BRIDGE_IOCTL_RTS;
3423 return error;
3424 }
3425
3426 static int
bridge_ioctl_rts32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3427 bridge_ioctl_rts32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3428 {
3429 struct ifbaconf32 * __single bac = arg;
3430 struct ifbareq32 bareq;
3431 int error = 0;
3432
3433 BRIDGE_IOCTL_RTS;
3434 return error;
3435 }
3436
3437 static int
bridge_ioctl_saddr32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3438 bridge_ioctl_saddr32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3439 {
3440 struct ifbareq32 * __single req = arg;
3441 struct bridge_iflist *bif;
3442 int error;
3443
3444 bif = bridge_lookup_member(sc, req->ifba_ifsname);
3445 if (bif == NULL) {
3446 return ENOENT;
3447 }
3448
3449 error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
3450 req->ifba_flags);
3451
3452 return error;
3453 }
3454
3455 static int
bridge_ioctl_saddr64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3456 bridge_ioctl_saddr64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3457 {
3458 struct ifbareq64 * __single req = arg;
3459 struct bridge_iflist *bif;
3460 int error;
3461
3462 bif = bridge_lookup_member(sc, req->ifba_ifsname);
3463 if (bif == NULL) {
3464 return ENOENT;
3465 }
3466
3467 error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
3468 req->ifba_flags);
3469
3470 return error;
3471 }
3472
3473 static int
bridge_ioctl_sto(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3474 bridge_ioctl_sto(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3475 {
3476 struct ifbrparam * __single param = arg;
3477
3478 sc->sc_brttimeout = param->ifbrp_ctime;
3479 return 0;
3480 }
3481
3482 static int
bridge_ioctl_gto(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3483 bridge_ioctl_gto(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3484 {
3485 struct ifbrparam * __single param = arg;
3486
3487 param->ifbrp_ctime = sc->sc_brttimeout;
3488 return 0;
3489 }
3490
3491 static int
bridge_ioctl_daddr32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3492 bridge_ioctl_daddr32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3493 {
3494 struct ifbareq32 * __single req = arg;
3495
3496 return bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan);
3497 }
3498
3499 static int
bridge_ioctl_daddr64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3500 bridge_ioctl_daddr64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3501 {
3502 struct ifbareq64 * __single req = arg;
3503
3504 return bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan);
3505 }
3506
3507 static int
bridge_ioctl_flush(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3508 bridge_ioctl_flush(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3509 {
3510 struct ifbreq * __single req = arg;
3511
3512 bridge_rtflush(sc, req->ifbr_ifsflags);
3513 return 0;
3514 }
3515
3516 static int
bridge_ioctl_gpri(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3517 bridge_ioctl_gpri(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3518 {
3519 struct ifbrparam * __single param = arg;
3520 struct bstp_state *bs = &sc->sc_stp;
3521
3522 param->ifbrp_prio = bs->bs_bridge_priority;
3523 return 0;
3524 }
3525
3526 static int
bridge_ioctl_spri(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3527 bridge_ioctl_spri(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3528 {
3529 #if BRIDGESTP
3530 struct ifbrparam *param = arg;
3531
3532 return bstp_set_priority(&sc->sc_stp, param->ifbrp_prio);
3533 #else /* !BRIDGESTP */
3534 #pragma unused(sc, arg)
3535 return EOPNOTSUPP;
3536 #endif /* !BRIDGESTP */
3537 }
3538
3539 static int
bridge_ioctl_ght(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3540 bridge_ioctl_ght(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3541 {
3542 struct ifbrparam * __single param = arg;
3543 struct bstp_state *bs = &sc->sc_stp;
3544
3545 param->ifbrp_hellotime = bs->bs_bridge_htime >> 8;
3546 return 0;
3547 }
3548
3549 static int
bridge_ioctl_sht(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3550 bridge_ioctl_sht(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3551 {
3552 #if BRIDGESTP
3553 struct ifbrparam *param = arg;
3554
3555 return bstp_set_htime(&sc->sc_stp, param->ifbrp_hellotime);
3556 #else /* !BRIDGESTP */
3557 #pragma unused(sc, arg)
3558 return EOPNOTSUPP;
3559 #endif /* !BRIDGESTP */
3560 }
3561
3562 static int
bridge_ioctl_gfd(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3563 bridge_ioctl_gfd(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3564 {
3565 struct ifbrparam * __single param;
3566 struct bstp_state *bs;
3567
3568 param = arg;
3569 bs = &sc->sc_stp;
3570 param->ifbrp_fwddelay = bs->bs_bridge_fdelay >> 8;
3571 return 0;
3572 }
3573
3574 static int
bridge_ioctl_sfd(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3575 bridge_ioctl_sfd(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3576 {
3577 #if BRIDGESTP
3578 struct ifbrparam *param = arg;
3579
3580 return bstp_set_fdelay(&sc->sc_stp, param->ifbrp_fwddelay);
3581 #else /* !BRIDGESTP */
3582 #pragma unused(sc, arg)
3583 return EOPNOTSUPP;
3584 #endif /* !BRIDGESTP */
3585 }
3586
3587 static int
bridge_ioctl_gma(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3588 bridge_ioctl_gma(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3589 {
3590 struct ifbrparam * __single param;
3591 struct bstp_state *bs;
3592
3593 param = arg;
3594 bs = &sc->sc_stp;
3595 param->ifbrp_maxage = bs->bs_bridge_max_age >> 8;
3596 return 0;
3597 }
3598
3599 static int
bridge_ioctl_sma(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3600 bridge_ioctl_sma(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3601 {
3602 #if BRIDGESTP
3603 struct ifbrparam *param = arg;
3604
3605 return bstp_set_maxage(&sc->sc_stp, param->ifbrp_maxage);
3606 #else /* !BRIDGESTP */
3607 #pragma unused(sc, arg)
3608 return EOPNOTSUPP;
3609 #endif /* !BRIDGESTP */
3610 }
3611
3612 static int
bridge_ioctl_sifprio(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3613 bridge_ioctl_sifprio(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3614 {
3615 #if BRIDGESTP
3616 struct ifbreq *req = arg;
3617 struct bridge_iflist *bif;
3618
3619 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3620 if (bif == NULL) {
3621 return ENOENT;
3622 }
3623
3624 return bstp_set_port_priority(&bif->bif_stp, req->ifbr_priority);
3625 #else /* !BRIDGESTP */
3626 #pragma unused(sc, arg)
3627 return EOPNOTSUPP;
3628 #endif /* !BRIDGESTP */
3629 }
3630
3631 static int
bridge_ioctl_sifcost(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3632 bridge_ioctl_sifcost(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3633 {
3634 #if BRIDGESTP
3635 struct ifbreq *req = arg;
3636 struct bridge_iflist *bif;
3637
3638 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3639 if (bif == NULL) {
3640 return ENOENT;
3641 }
3642
3643 return bstp_set_path_cost(&bif->bif_stp, req->ifbr_path_cost);
3644 #else /* !BRIDGESTP */
3645 #pragma unused(sc, arg)
3646 return EOPNOTSUPP;
3647 #endif /* !BRIDGESTP */
3648 }
3649
3650 static int
bridge_ioctl_gfilt(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3651 bridge_ioctl_gfilt(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3652 {
3653 struct ifbrparam * __single param = arg;
3654
3655 param->ifbrp_filter = sc->sc_filter_flags;
3656
3657 return 0;
3658 }
3659
3660 static int
bridge_ioctl_sfilt(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3661 bridge_ioctl_sfilt(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3662 {
3663 struct ifbrparam * __single param = arg;
3664
3665 if (param->ifbrp_filter & ~IFBF_FILT_MASK) {
3666 return EINVAL;
3667 }
3668
3669 if (param->ifbrp_filter & IFBF_FILT_USEIPF) {
3670 return EINVAL;
3671 }
3672
3673 sc->sc_filter_flags = param->ifbrp_filter;
3674
3675 return 0;
3676 }
3677
3678 static int
bridge_ioctl_sifmaxaddr(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3679 bridge_ioctl_sifmaxaddr(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3680 {
3681 struct ifbreq * __single req = arg;
3682 struct bridge_iflist *bif;
3683
3684 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3685 if (bif == NULL) {
3686 return ENOENT;
3687 }
3688
3689 bif->bif_addrmax = req->ifbr_addrmax;
3690 return 0;
3691 }
3692
3693 static int
bridge_ioctl_addspan(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3694 bridge_ioctl_addspan(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3695 {
3696 struct ifbreq * __single req = arg;
3697 struct bridge_iflist *bif = NULL;
3698 struct ifnet *ifs;
3699
3700 ifs = ifunit(sanitize_ifname(req->ifbr_ifsname));
3701 if (ifs == NULL) {
3702 return ENOENT;
3703 }
3704
3705 if (IFNET_IS_INTCOPROC(ifs) || IFNET_IS_MANAGEMENT(ifs)) {
3706 return EINVAL;
3707 }
3708
3709 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
3710 if (ifs == bif->bif_ifp) {
3711 return EBUSY;
3712 }
3713
3714 if (ifs->if_bridge != NULL) {
3715 return EBUSY;
3716 }
3717
3718 switch (ifs->if_type) {
3719 case IFT_ETHER:
3720 case IFT_L2VLAN:
3721 case IFT_IEEE8023ADLAG:
3722 break;
3723 default:
3724 return EINVAL;
3725 }
3726
3727 bif = kalloc_type(struct bridge_iflist, Z_WAITOK | Z_ZERO | Z_NOFAIL);
3728
3729 bif->bif_ifp = ifs;
3730 bif->bif_ifflags = IFBIF_SPAN;
3731
3732 ifnet_reference(bif->bif_ifp);
3733
3734 TAILQ_INSERT_HEAD(&sc->sc_spanlist, bif, bif_next);
3735
3736 return 0;
3737 }
3738
3739 static int
bridge_ioctl_delspan(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3740 bridge_ioctl_delspan(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3741 {
3742 struct ifbreq * __single req = arg;
3743 struct bridge_iflist *bif;
3744 struct ifnet *ifs;
3745
3746 ifs = ifunit(sanitize_ifname(req->ifbr_ifsname));
3747 if (ifs == NULL) {
3748 return ENOENT;
3749 }
3750
3751 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
3752 if (ifs == bif->bif_ifp) {
3753 break;
3754 }
3755
3756 if (bif == NULL) {
3757 return ENOENT;
3758 }
3759
3760 bridge_delete_span(sc, bif);
3761
3762 return 0;
3763 }
3764
3765 #define BRIDGE_IOCTL_GBPARAM do { \
3766 struct bstp_state *bs = &sc->sc_stp; \
3767 struct bstp_port *root_port; \
3768 \
3769 req->ifbop_maxage = bs->bs_bridge_max_age >> 8; \
3770 req->ifbop_hellotime = bs->bs_bridge_htime >> 8; \
3771 req->ifbop_fwddelay = bs->bs_bridge_fdelay >> 8; \
3772 \
3773 root_port = bs->bs_root_port; \
3774 if (root_port == NULL) \
3775 req->ifbop_root_port = 0; \
3776 else \
3777 req->ifbop_root_port = root_port->bp_ifp->if_index; \
3778 \
3779 req->ifbop_holdcount = bs->bs_txholdcount; \
3780 req->ifbop_priority = bs->bs_bridge_priority; \
3781 req->ifbop_protocol = bs->bs_protover; \
3782 req->ifbop_root_path_cost = bs->bs_root_pv.pv_cost; \
3783 req->ifbop_bridgeid = bs->bs_bridge_pv.pv_dbridge_id; \
3784 req->ifbop_designated_root = bs->bs_root_pv.pv_root_id; \
3785 req->ifbop_designated_bridge = bs->bs_root_pv.pv_dbridge_id; \
3786 req->ifbop_last_tc_time.tv_sec = bs->bs_last_tc_time.tv_sec; \
3787 req->ifbop_last_tc_time.tv_usec = bs->bs_last_tc_time.tv_usec; \
3788 } while (0)
3789
3790 static int
bridge_ioctl_gbparam32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3791 bridge_ioctl_gbparam32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3792 {
3793 struct ifbropreq32 * __single req = arg;
3794
3795 BRIDGE_IOCTL_GBPARAM;
3796 return 0;
3797 }
3798
3799 static int
bridge_ioctl_gbparam64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3800 bridge_ioctl_gbparam64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3801 {
3802 struct ifbropreq64 * __single req = arg;
3803
3804 BRIDGE_IOCTL_GBPARAM;
3805 return 0;
3806 }
3807
3808 static int
bridge_ioctl_grte(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3809 bridge_ioctl_grte(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3810 {
3811 struct ifbrparam * __single param = arg;
3812
3813 param->ifbrp_cexceeded = sc->sc_brtexceeded;
3814 return 0;
3815 }
3816
3817 #define BRIDGE_IOCTL_GIFSSTP do { \
3818 struct bridge_iflist *bif; \
3819 struct bstp_port *bp; \
3820 struct ifbpstpreq bpreq; \
3821 char *buf, *outbuf; \
3822 unsigned int count, buflen, len; \
3823 \
3824 count = 0; \
3825 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { \
3826 if ((bif->bif_ifflags & IFBIF_STP) != 0) \
3827 count++; \
3828 } \
3829 \
3830 buflen = sizeof (bpreq) * count; \
3831 if (bifstp->ifbpstp_len == 0) { \
3832 bifstp->ifbpstp_len = buflen; \
3833 return (0); \
3834 } \
3835 \
3836 BRIDGE_UNLOCK(sc); \
3837 outbuf = kalloc_data(buflen, Z_WAITOK | Z_ZERO); \
3838 BRIDGE_LOCK(sc); \
3839 \
3840 count = 0; \
3841 buf = outbuf; \
3842 len = min(bifstp->ifbpstp_len, buflen); \
3843 bzero(&bpreq, sizeof (bpreq)); \
3844 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { \
3845 if (len < sizeof (bpreq)) \
3846 break; \
3847 \
3848 if ((bif->bif_ifflags & IFBIF_STP) == 0) \
3849 continue; \
3850 \
3851 bp = &bif->bif_stp; \
3852 bpreq.ifbp_portno = bif->bif_ifp->if_index & 0xfff; \
3853 bpreq.ifbp_fwd_trans = bp->bp_forward_transitions; \
3854 bpreq.ifbp_design_cost = bp->bp_desg_pv.pv_cost; \
3855 bpreq.ifbp_design_port = bp->bp_desg_pv.pv_port_id; \
3856 bpreq.ifbp_design_bridge = bp->bp_desg_pv.pv_dbridge_id; \
3857 bpreq.ifbp_design_root = bp->bp_desg_pv.pv_root_id; \
3858 \
3859 memcpy(buf, &bpreq, sizeof (bpreq)); \
3860 count++; \
3861 buf += sizeof (bpreq); \
3862 len -= sizeof (bpreq); \
3863 } \
3864 \
3865 BRIDGE_UNLOCK(sc); \
3866 bifstp->ifbpstp_len = sizeof (bpreq) * count; \
3867 if (bifstp->ifbpstp_len > 0) { \
3868 error = copyout(outbuf, bifstp->ifbpstp_req, bifstp->ifbpstp_len);\
3869 } \
3870 BRIDGE_LOCK(sc); \
3871 kfree_data(outbuf, buflen); \
3872 return (error); \
3873 } while (0)
3874
3875 static int
bridge_ioctl_gifsstp32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3876 bridge_ioctl_gifsstp32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3877 {
3878 struct ifbpstpconf32 * __single bifstp = arg;
3879 int error = 0;
3880
3881 BRIDGE_IOCTL_GIFSSTP;
3882 return error;
3883 }
3884
3885 static int
bridge_ioctl_gifsstp64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3886 bridge_ioctl_gifsstp64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3887 {
3888 struct ifbpstpconf64 * __single bifstp = arg;
3889 int error = 0;
3890
3891 BRIDGE_IOCTL_GIFSSTP;
3892 return error;
3893 }
3894
3895 static int
bridge_ioctl_sproto(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3896 bridge_ioctl_sproto(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3897 {
3898 #if BRIDGESTP
3899 struct ifbrparam *param = arg;
3900
3901 return bstp_set_protocol(&sc->sc_stp, param->ifbrp_proto);
3902 #else /* !BRIDGESTP */
3903 #pragma unused(sc, arg)
3904 return EOPNOTSUPP;
3905 #endif /* !BRIDGESTP */
3906 }
3907
3908 static int
bridge_ioctl_stxhc(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3909 bridge_ioctl_stxhc(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3910 {
3911 #if BRIDGESTP
3912 struct ifbrparam *param = arg;
3913
3914 return bstp_set_holdcount(&sc->sc_stp, param->ifbrp_txhc);
3915 #else /* !BRIDGESTP */
3916 #pragma unused(sc, arg)
3917 return EOPNOTSUPP;
3918 #endif /* !BRIDGESTP */
3919 }
3920
3921
3922 static int
bridge_ioctl_ghostfilter(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3923 bridge_ioctl_ghostfilter(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3924 {
3925 struct ifbrhostfilter * __single req = arg;
3926 struct bridge_iflist *bif;
3927
3928 bif = bridge_lookup_member(sc, req->ifbrhf_ifsname);
3929 if (bif == NULL) {
3930 return ENOENT;
3931 }
3932
3933 bzero(req, sizeof(struct ifbrhostfilter));
3934 if (bif->bif_flags & BIFF_HOST_FILTER) {
3935 req->ifbrhf_flags |= IFBRHF_ENABLED;
3936 bcopy(bif->bif_hf_hwsrc, req->ifbrhf_hwsrca,
3937 ETHER_ADDR_LEN);
3938 req->ifbrhf_ipsrc = bif->bif_hf_ipsrc.s_addr;
3939 }
3940 return 0;
3941 }
3942
3943 static int
bridge_ioctl_shostfilter(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3944 bridge_ioctl_shostfilter(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3945 {
3946 struct ifbrhostfilter * __single req = arg;
3947 struct bridge_iflist *bif;
3948
3949 bif = bridge_lookup_member(sc, req->ifbrhf_ifsname);
3950 if (bif == NULL) {
3951 return ENOENT;
3952 }
3953 if (bif_has_mac_nat(bif)) {
3954 /* no host filter with MAC-NAT */
3955 return EINVAL;
3956 }
3957 if (req->ifbrhf_flags & IFBRHF_ENABLED) {
3958 bif->bif_flags |= BIFF_HOST_FILTER;
3959
3960 if (req->ifbrhf_flags & IFBRHF_HWSRC) {
3961 bcopy(req->ifbrhf_hwsrca, bif->bif_hf_hwsrc,
3962 ETHER_ADDR_LEN);
3963 if (bcmp(req->ifbrhf_hwsrca, ethernulladdr,
3964 ETHER_ADDR_LEN) != 0) {
3965 bif->bif_flags |= BIFF_HF_HWSRC;
3966 } else {
3967 bif->bif_flags &= ~BIFF_HF_HWSRC;
3968 }
3969 }
3970 if (req->ifbrhf_flags & IFBRHF_IPSRC) {
3971 bif->bif_hf_ipsrc.s_addr = req->ifbrhf_ipsrc;
3972 if (bif->bif_hf_ipsrc.s_addr != INADDR_ANY) {
3973 bif->bif_flags |= BIFF_HF_IPSRC;
3974 } else {
3975 bif->bif_flags &= ~BIFF_HF_IPSRC;
3976 }
3977 }
3978 } else {
3979 bif->bif_flags &= ~(BIFF_HOST_FILTER | BIFF_HF_HWSRC |
3980 BIFF_HF_IPSRC);
3981 bzero(bif->bif_hf_hwsrc, ETHER_ADDR_LEN);
3982 bif->bif_hf_ipsrc.s_addr = INADDR_ANY;
3983 }
3984
3985 return 0;
3986 }
3987
3988 static char *__indexable
bridge_mac_nat_entry_out(struct mac_nat_entry_list * list,unsigned int * count_p,char * __indexable buf,unsigned int * len_p)3989 bridge_mac_nat_entry_out(struct mac_nat_entry_list * list,
3990 unsigned int * count_p, char *__indexable buf,
3991 unsigned int * len_p)
3992 {
3993 unsigned int count = *count_p;
3994 struct ifbrmne ifbmne;
3995 unsigned int len = *len_p;
3996 struct mac_nat_entry *mne;
3997 unsigned long now;
3998
3999 bzero(&ifbmne, sizeof(ifbmne));
4000 LIST_FOREACH(mne, list, mne_list) {
4001 if (len < sizeof(ifbmne)) {
4002 break;
4003 }
4004 snprintf(ifbmne.ifbmne_ifname, sizeof(ifbmne.ifbmne_ifname),
4005 "%s", mne->mne_bif->bif_ifp->if_xname);
4006 memcpy(ifbmne.ifbmne_mac, mne->mne_mac,
4007 sizeof(ifbmne.ifbmne_mac));
4008 now = (unsigned long) net_uptime();
4009 if (now < mne->mne_expire) {
4010 ifbmne.ifbmne_expire = mne->mne_expire - now;
4011 } else {
4012 ifbmne.ifbmne_expire = 0;
4013 }
4014 if ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) {
4015 ifbmne.ifbmne_af = AF_INET6;
4016 ifbmne.ifbmne_ip6_addr = mne->mne_ip6;
4017 } else {
4018 ifbmne.ifbmne_af = AF_INET;
4019 ifbmne.ifbmne_ip_addr = mne->mne_ip;
4020 }
4021 memcpy(buf, &ifbmne, sizeof(ifbmne));
4022 count++;
4023 buf += sizeof(ifbmne);
4024 len -= sizeof(ifbmne);
4025 }
4026 *count_p = count;
4027 *len_p = len;
4028 return buf;
4029 }
4030
4031 /*
4032 * bridge_ioctl_gmnelist()
4033 * Perform the get mac_nat_entry list ioctl.
4034 *
4035 * Note:
4036 * The struct ifbrmnelist32 and struct ifbrmnelist64 have the same
4037 * field size/layout except for the last field ifbml_buf, the user-supplied
4038 * buffer pointer. That is passed in separately via the 'user_addr'
4039 * parameter from the respective 32-bit or 64-bit ioctl routine.
4040 */
4041 static int
bridge_ioctl_gmnelist(struct bridge_softc * sc,struct ifbrmnelist32 * mnl,user_addr_t user_addr)4042 bridge_ioctl_gmnelist(struct bridge_softc *sc, struct ifbrmnelist32 *mnl,
4043 user_addr_t user_addr)
4044 {
4045 unsigned int count;
4046 char *buf;
4047 int error = 0;
4048 char *outbuf = NULL;
4049 struct mac_nat_entry *mne;
4050 unsigned int buflen;
4051 unsigned int len;
4052
4053 mnl->ifbml_elsize = sizeof(struct ifbrmne);
4054 count = 0;
4055 LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
4056 count++;
4057 }
4058 LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
4059 count++;
4060 }
4061 buflen = sizeof(struct ifbrmne) * count;
4062 if (buflen == 0 || mnl->ifbml_len == 0) {
4063 mnl->ifbml_len = buflen;
4064 return error;
4065 }
4066 BRIDGE_UNLOCK(sc);
4067 outbuf = kalloc_data(buflen, Z_WAITOK | Z_ZERO);
4068 BRIDGE_LOCK(sc);
4069 count = 0;
4070 buf = outbuf;
4071 len = min(mnl->ifbml_len, buflen);
4072 buf = bridge_mac_nat_entry_out(&sc->sc_mne_list, &count, buf, &len);
4073 buf = bridge_mac_nat_entry_out(&sc->sc_mne_list_v6, &count, buf, &len);
4074 mnl->ifbml_len = count * sizeof(struct ifbrmne);
4075 BRIDGE_UNLOCK(sc);
4076 if (mnl->ifbml_len > 0) {
4077 error = copyout(outbuf, user_addr, mnl->ifbml_len);
4078 }
4079 kfree_data(outbuf, buflen);
4080 BRIDGE_LOCK(sc);
4081 return error;
4082 }
4083
4084 static int
bridge_ioctl_gmnelist64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)4085 bridge_ioctl_gmnelist64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
4086 {
4087 struct ifbrmnelist64 * __single mnl = arg;
4088
4089 return bridge_ioctl_gmnelist(sc, arg, mnl->ifbml_buf);
4090 }
4091
4092 static int
bridge_ioctl_gmnelist32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)4093 bridge_ioctl_gmnelist32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
4094 {
4095 struct ifbrmnelist32 * __single mnl = arg;
4096
4097 return bridge_ioctl_gmnelist(sc, arg,
4098 CAST_USER_ADDR_T(mnl->ifbml_buf));
4099 }
4100
4101 /*
4102 * bridge_ioctl_gifstats()
4103 * Return per-member stats.
4104 *
4105 * Note:
4106 * The ifbrmreq32 and ifbrmreq64 structures have the same
4107 * field size/layout except for the last field brmr_buf, the user-supplied
4108 * buffer pointer. That is passed in separately via the 'user_addr'
4109 * parameter from the respective 32-bit or 64-bit ioctl routine.
4110 */
4111 static int
bridge_ioctl_gifstats(struct bridge_softc * sc,struct ifbrmreq32 * mreq,user_addr_t user_addr)4112 bridge_ioctl_gifstats(struct bridge_softc *sc, struct ifbrmreq32 *mreq,
4113 user_addr_t user_addr)
4114 {
4115 struct bridge_iflist *bif;
4116 int error = 0;
4117 unsigned int buflen;
4118
4119 bif = bridge_lookup_member(sc, mreq->brmr_ifname);
4120 if (bif == NULL) {
4121 error = ENOENT;
4122 goto done;
4123 }
4124
4125 buflen = mreq->brmr_elsize = sizeof(struct ifbrmstats);
4126 if (buflen == 0 || mreq->brmr_len == 0) {
4127 mreq->brmr_len = buflen;
4128 goto done;
4129 }
4130 if (mreq->brmr_len != 0 && mreq->brmr_len < buflen) {
4131 error = ENOBUFS;
4132 goto done;
4133 }
4134 mreq->brmr_len = buflen;
4135 error = copyout(&bif->bif_stats, user_addr, buflen);
4136 done:
4137 return error;
4138 }
4139
4140 static int
bridge_ioctl_gifstats32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)4141 bridge_ioctl_gifstats32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
4142 {
4143 struct ifbrmreq32 * __single mreq = arg;
4144
4145 return bridge_ioctl_gifstats(sc, arg, mreq->brmr_buf);
4146 }
4147
4148 static int
bridge_ioctl_gifstats64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)4149 bridge_ioctl_gifstats64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
4150 {
4151 struct ifbrmreq64 * __single mreq = arg;
4152
4153 return bridge_ioctl_gifstats(sc, arg, mreq->brmr_buf);
4154 }
4155
4156 /*
4157 * bridge_proto_attach_changed
4158 *
4159 * Called when protocol attachment on the interface changes.
4160 */
4161 static void
bridge_proto_attach_changed(struct ifnet * ifp)4162 bridge_proto_attach_changed(struct ifnet *ifp)
4163 {
4164 boolean_t changed = FALSE;
4165 struct bridge_iflist *bif;
4166 boolean_t input_broadcast;
4167 struct bridge_softc * __single sc = ifp->if_bridge;
4168
4169 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
4170 if (sc == NULL) {
4171 return;
4172 }
4173 input_broadcast = interface_needs_input_broadcast(ifp);
4174 BRIDGE_LOCK(sc);
4175 bif = bridge_lookup_member_if(sc, ifp);
4176 if (bif != NULL) {
4177 changed = bif_set_input_broadcast(bif, input_broadcast);
4178 }
4179 BRIDGE_UNLOCK(sc);
4180 if (changed) {
4181 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
4182 "%s input broadcast %s", ifp->if_xname,
4183 input_broadcast ? "ENABLED" : "DISABLED");
4184 }
4185 return;
4186 }
4187
4188 /*
4189 * interface_media_active:
4190 *
4191 * Tells if an interface media is active.
4192 */
4193 static int
interface_media_active(struct ifnet * ifp)4194 interface_media_active(struct ifnet *ifp)
4195 {
4196 struct ifmediareq ifmr;
4197 int status = 0;
4198
4199 bzero(&ifmr, sizeof(ifmr));
4200 if (ifnet_ioctl(ifp, 0, SIOCGIFMEDIA, &ifmr) == 0) {
4201 if ((ifmr.ifm_status & IFM_AVALID) && ifmr.ifm_count > 0) {
4202 status = ifmr.ifm_status & IFM_ACTIVE ? 1 : 0;
4203 }
4204 }
4205
4206 return status;
4207 }
4208
4209 /*
4210 * bridge_updatelinkstatus:
4211 *
4212 * Update the media active status of the bridge based on the
4213 * media active status of its member.
4214 * If changed, return the corresponding onf/off link event.
4215 */
4216 static u_int32_t
bridge_updatelinkstatus(struct bridge_softc * sc)4217 bridge_updatelinkstatus(struct bridge_softc *sc)
4218 {
4219 struct bridge_iflist *bif;
4220 int active_member = 0;
4221 u_int32_t event_code = 0;
4222
4223 BRIDGE_LOCK_ASSERT_HELD(sc);
4224
4225 /*
4226 * Find out if we have an active interface
4227 */
4228 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
4229 if (bif->bif_flags & BIFF_MEDIA_ACTIVE) {
4230 active_member = 1;
4231 break;
4232 }
4233 }
4234
4235 if (active_member && !(sc->sc_flags & SCF_MEDIA_ACTIVE)) {
4236 sc->sc_flags |= SCF_MEDIA_ACTIVE;
4237 event_code = KEV_DL_LINK_ON;
4238 } else if (!active_member && (sc->sc_flags & SCF_MEDIA_ACTIVE)) {
4239 sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
4240 event_code = KEV_DL_LINK_OFF;
4241 }
4242
4243 return event_code;
4244 }
4245
4246 /*
4247 * bridge_iflinkevent:
4248 */
4249 static void
bridge_iflinkevent(struct ifnet * ifp)4250 bridge_iflinkevent(struct ifnet *ifp)
4251 {
4252 struct bridge_softc * __single sc = ifp->if_bridge;
4253 struct bridge_iflist *bif;
4254 u_int32_t event_code = 0;
4255 int media_active;
4256
4257 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
4258
4259 /* Check if the interface is a bridge member */
4260 if (sc == NULL) {
4261 return;
4262 }
4263
4264 media_active = interface_media_active(ifp);
4265 BRIDGE_LOCK(sc);
4266 bif = bridge_lookup_member_if(sc, ifp);
4267 if (bif != NULL) {
4268 if (media_active) {
4269 bif->bif_flags |= BIFF_MEDIA_ACTIVE;
4270 } else {
4271 bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
4272 }
4273 if (sc->sc_mac_nat_bif != NULL) {
4274 bridge_mac_nat_flush_entries(sc, bif);
4275 }
4276
4277 event_code = bridge_updatelinkstatus(sc);
4278 }
4279 BRIDGE_UNLOCK(sc);
4280
4281 if (event_code != 0) {
4282 bridge_link_event(sc->sc_ifp, event_code);
4283 }
4284 }
4285
4286 /*
4287 * bridge_delayed_callback:
4288 *
4289 * Makes a delayed call
4290 */
4291 static void
bridge_delayed_callback(void * param,__unused void * param2)4292 bridge_delayed_callback(void *param, __unused void *param2)
4293 {
4294 struct bridge_delayed_call *call = (struct bridge_delayed_call *)param;
4295 struct bridge_softc *sc = call->bdc_sc;
4296
4297 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4298 if (bridge_delayed_callback_delay > 0) {
4299 struct timespec ts;
4300
4301 ts.tv_sec = bridge_delayed_callback_delay;
4302 ts.tv_nsec = 0;
4303
4304 BRIDGE_LOG(LOG_NOTICE, 0,
4305 "sleeping for %d seconds",
4306 bridge_delayed_callback_delay);
4307
4308 msleep(&bridge_delayed_callback_delay, NULL, PZERO,
4309 __func__, &ts);
4310
4311 BRIDGE_LOG(LOG_NOTICE, 0, "awoken");
4312 }
4313 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4314
4315 BRIDGE_LOCK(sc);
4316
4317 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4318 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4319 "%s call 0x%llx flags 0x%x",
4320 sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4321 call->bdc_flags);
4322 }
4323 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4324
4325 if (call->bdc_flags & BDCF_CANCELLING) {
4326 wakeup(call);
4327 } else {
4328 if ((sc->sc_flags & SCF_DETACHING) == 0) {
4329 (*call->bdc_func)(sc);
4330 }
4331 }
4332 call->bdc_flags &= ~BDCF_OUTSTANDING;
4333 BRIDGE_UNLOCK(sc);
4334 }
4335
4336 /*
4337 * bridge_schedule_delayed_call:
4338 *
4339 * Schedule a function to be called on a separate thread
4340 * The actual call may be scheduled to run at a given time or ASAP.
4341 */
4342 static void
4343 bridge_schedule_delayed_call(struct bridge_delayed_call *call)
4344 {
4345 uint64_t deadline = 0;
4346 struct bridge_softc *sc = call->bdc_sc;
4347
4348 BRIDGE_LOCK_ASSERT_HELD(sc);
4349
4350 if ((sc->sc_flags & SCF_DETACHING) ||
4351 (call->bdc_flags & (BDCF_OUTSTANDING | BDCF_CANCELLING))) {
4352 return;
4353 }
4354
4355 if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) {
4356 nanoseconds_to_absolutetime(
4357 (uint64_t)call->bdc_ts.tv_sec * NSEC_PER_SEC +
4358 call->bdc_ts.tv_nsec, &deadline);
4359 clock_absolutetime_interval_to_deadline(deadline, &deadline);
4360 }
4361
4362 call->bdc_flags = BDCF_OUTSTANDING;
4363
4364 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4365 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4366 "%s call 0x%llx flags 0x%x",
4367 sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4368 call->bdc_flags);
4369 }
4370 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4371
4372 if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) {
4373 thread_call_func_delayed(
4374 (thread_call_func_t)bridge_delayed_callback,
4375 call, deadline);
4376 } else {
4377 if (call->bdc_thread_call == NULL) {
4378 call->bdc_thread_call = thread_call_allocate(
4379 (thread_call_func_t)bridge_delayed_callback,
4380 call);
4381 }
4382 thread_call_enter(call->bdc_thread_call);
4383 }
4384 }
4385
4386 /*
4387 * bridge_cancel_delayed_call:
4388 *
4389 * Cancel a queued or running delayed call.
4390 * If call is running, does not return until the call is done to
4391 * prevent race condition with the brigde interface getting destroyed
4392 */
4393 static void
4394 bridge_cancel_delayed_call(struct bridge_delayed_call *call)
4395 {
4396 boolean_t result;
4397 struct bridge_softc *sc = call->bdc_sc;
4398
4399 /*
4400 * The call was never scheduled
4401 */
4402 if (sc == NULL) {
4403 return;
4404 }
4405
4406 BRIDGE_LOCK_ASSERT_HELD(sc);
4407
4408 call->bdc_flags |= BDCF_CANCELLING;
4409
4410 while (call->bdc_flags & BDCF_OUTSTANDING) {
4411 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4412 "%s call 0x%llx flags 0x%x",
4413 sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4414 call->bdc_flags);
4415 result = thread_call_func_cancel(
4416 (thread_call_func_t)bridge_delayed_callback, call, FALSE);
4417
4418 if (result) {
4419 /*
4420 * We managed to dequeue the delayed call
4421 */
4422 call->bdc_flags &= ~BDCF_OUTSTANDING;
4423 } else {
4424 /*
4425 * Wait for delayed call do be done running
4426 */
4427 msleep(call, &sc->sc_mtx, PZERO, __func__, NULL);
4428 }
4429 }
4430 call->bdc_flags &= ~BDCF_CANCELLING;
4431 }
4432
4433 /*
4434 * bridge_cleanup_delayed_call:
4435 *
4436 * Dispose resource allocated for a delayed call
4437 * Assume the delayed call is not queued or running .
4438 */
4439 static void
4440 bridge_cleanup_delayed_call(struct bridge_delayed_call *call)
4441 {
4442 boolean_t result;
4443 struct bridge_softc *sc = call->bdc_sc;
4444
4445 /*
4446 * The call was never scheduled
4447 */
4448 if (sc == NULL) {
4449 return;
4450 }
4451
4452 BRIDGE_LOCK_ASSERT_HELD(sc);
4453
4454 VERIFY((call->bdc_flags & BDCF_OUTSTANDING) == 0);
4455 VERIFY((call->bdc_flags & BDCF_CANCELLING) == 0);
4456
4457 if (call->bdc_thread_call != NULL) {
4458 result = thread_call_free(call->bdc_thread_call);
4459 if (result == FALSE) {
4460 panic("%s thread_call_free() failed for call %p",
4461 __func__, call);
4462 }
4463 call->bdc_thread_call = NULL;
4464 }
4465 }
4466
4467 /*
4468 * bridge_init:
4469 *
4470 * Initialize a bridge interface.
4471 */
4472 static int
4473 bridge_init(struct ifnet *ifp)
4474 {
4475 struct bridge_softc *sc = (struct bridge_softc *)ifp->if_softc;
4476 errno_t error;
4477
4478 BRIDGE_LOCK_ASSERT_HELD(sc);
4479
4480 if ((ifnet_flags(ifp) & IFF_RUNNING)) {
4481 return 0;
4482 }
4483
4484 error = ifnet_set_flags(ifp, IFF_RUNNING, IFF_RUNNING);
4485
4486 /*
4487 * Calling bridge_aging_timer() is OK as there are no entries to
4488 * age so we're just going to arm the timer
4489 */
4490 bridge_aging_timer(sc);
4491 #if BRIDGESTP
4492 if (error == 0) {
4493 bstp_init(&sc->sc_stp); /* Initialize Spanning Tree */
4494 }
4495 #endif /* BRIDGESTP */
4496 return error;
4497 }
4498
4499 /*
4500 * bridge_ifstop:
4501 *
4502 * Stop the bridge interface.
4503 */
4504 static void
4505 bridge_ifstop(struct ifnet *ifp, int disable)
4506 {
4507 #pragma unused(disable)
4508 struct bridge_softc * __single sc = ifp->if_softc;
4509
4510 BRIDGE_LOCK_ASSERT_HELD(sc);
4511
4512 if ((ifnet_flags(ifp) & IFF_RUNNING) == 0) {
4513 return;
4514 }
4515
4516 bridge_cancel_delayed_call(&sc->sc_aging_timer);
4517
4518 #if BRIDGESTP
4519 bstp_stop(&sc->sc_stp);
4520 #endif /* BRIDGESTP */
4521
4522 bridge_rtflush(sc, IFBF_FLUSHDYN);
4523 (void) ifnet_set_flags(ifp, 0, IFF_RUNNING);
4524 }
4525
4526 static const uint32_t checksum_request_flags = (MBUF_CSUM_REQ_TCP |
4527 MBUF_CSUM_REQ_UDP | MBUF_CSUM_REQ_TCPIPV6 | MBUF_CSUM_REQ_UDPIPV6);
4528
4529 static const mbuf_csum_performed_flags_t checksum_performed_all_good =
4530 (MBUF_CSUM_DID_IP | MBUF_CSUM_IP_GOOD
4531 | MBUF_CSUM_DID_DATA | MBUF_CSUM_PSEUDO_HDR);
4532
4533 /*
4534 * bridge_compute_cksum:
4535 *
4536 * If the packet has checksum flags, compare the hardware checksum
4537 * capabilities of the source and destination interfaces. If they
4538 * are the same, there's nothing to do. If they are different,
4539 * finalize the checksum so that it can be sent on the destination
4540 * interface.
4541 */
4542 static void
4543 bridge_compute_cksum(struct ifnet *src_if, struct ifnet *dst_if, struct mbuf *m)
4544 {
4545 uint32_t csum_flags;
4546 uint16_t dst_hw_csum;
4547 uint32_t did_sw = 0;
4548 struct ether_header *eh;
4549 uint16_t src_hw_csum;
4550
4551 if (src_if == dst_if) {
4552 return;
4553 }
4554 csum_flags = m->m_pkthdr.csum_flags & IF_HWASSIST_CSUM_MASK;
4555 if (csum_flags == 0) {
4556 /* no checksum offload */
4557 return;
4558 }
4559
4560 /*
4561 * if destination/source differ in checksum offload
4562 * capabilities, finalize/compute the checksum
4563 */
4564 dst_hw_csum = IF_HWASSIST_CSUM_FLAGS(dst_if->if_hwassist);
4565 src_hw_csum = IF_HWASSIST_CSUM_FLAGS(src_if->if_hwassist);
4566 if (dst_hw_csum == src_hw_csum) {
4567 return;
4568 }
4569 eh = mtod(m, struct ether_header *);
4570 switch (eh->ether_type) {
4571 case HTONS_ETHERTYPE_IP:
4572 did_sw = in_finalize_cksum(m, sizeof(*eh), csum_flags);
4573 break;
4574 case HTONS_ETHERTYPE_IPV6:
4575 did_sw = in6_finalize_cksum(m, sizeof(*eh), -1, -1, csum_flags);
4576 break;
4577 }
4578 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4579 "[%s -> %s] before 0x%x did 0x%x after 0x%x",
4580 src_if->if_xname, dst_if->if_xname, csum_flags, did_sw,
4581 m->m_pkthdr.csum_flags);
4582 }
4583
4584 static inline errno_t
4585 bridge_transmit(ifnet_t ifp, mbuf_t m)
4586 {
4587 struct flowadv adv = { .code = FADV_SUCCESS };
4588 errno_t error;
4589 int flags = DLIL_OUTPUT_FLAGS_RAW;
4590
4591 flags = (if_bridge_output_skip_filters != 0)
4592 ? (DLIL_OUTPUT_FLAGS_RAW | DLIL_OUTPUT_FLAGS_SKIP_IF_FILTERS)
4593 : DLIL_OUTPUT_FLAGS_RAW;
4594 error = dlil_output(ifp, 0, m, NULL, NULL, flags, &adv);
4595 if (error == 0) {
4596 if (adv.code == FADV_FLOW_CONTROLLED) {
4597 error = EQFULL;
4598 } else if (adv.code == FADV_SUSPENDED) {
4599 error = EQSUSPENDED;
4600 }
4601 }
4602 return error;
4603 }
4604
4605 static int
4606 get_last_ip6_hdr(struct mbuf *m, int off, int proto, int * nxtp,
4607 bool *is_fragmented)
4608 {
4609 int newoff;
4610
4611 *is_fragmented = false;
4612 while (1) {
4613 newoff = ip6_nexthdr(m, off, proto, nxtp);
4614 if (newoff < 0) {
4615 return off;
4616 } else if (newoff < off) {
4617 return -1; /* invalid */
4618 } else if (newoff == off) {
4619 return newoff;
4620 }
4621 off = newoff;
4622 proto = *nxtp;
4623 if (proto == IPPROTO_FRAGMENT) {
4624 *is_fragmented = true;
4625 }
4626 }
4627 }
4628
4629 #define __ATOMIC_INC(s) os_atomic_inc(&s, relaxed)
4630
4631 static int
4632 bridge_get_ip_proto(struct mbuf * * mp, u_int mac_hlen, bool is_ipv4,
4633 ip_packet_info_t info_p, struct bripstats * stats_p)
4634 {
4635 int error = 0;
4636 u_int hlen;
4637 u_int ip_hlen;
4638 u_int ip_pay_len;
4639 struct mbuf * m0 = *mp;
4640 int off;
4641 int opt_len = 0;
4642 int proto = 0;
4643
4644 bzero(info_p, sizeof(*info_p));
4645 if (is_ipv4) {
4646 struct ip * ip;
4647 u_int ip_total_len;
4648
4649 /* IPv4 */
4650 hlen = mac_hlen + sizeof(struct ip);
4651 if (m0->m_pkthdr.len < hlen) {
4652 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4653 "Short IP packet %d < %d",
4654 m0->m_pkthdr.len, hlen);
4655 error = _EBADIP;
4656 __ATOMIC_INC(stats_p->bips_bad_ip);
4657 goto done;
4658 }
4659 if (m0->m_len < hlen) {
4660 *mp = m0 = m_pullup(m0, hlen);
4661 if (m0 == NULL) {
4662 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4663 "m_pullup failed hlen %d",
4664 hlen);
4665 error = ENOBUFS;
4666 __ATOMIC_INC(stats_p->bips_bad_ip);
4667 goto done;
4668 }
4669 }
4670 ip = (struct ip *)mtodo(m0, mac_hlen);
4671 if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
4672 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4673 "bad IP version");
4674 error = _EBADIP;
4675 __ATOMIC_INC(stats_p->bips_bad_ip);
4676 goto done;
4677 }
4678 ip_hlen = IP_VHL_HL(ip->ip_vhl) << 2;
4679 if (ip_hlen < sizeof(struct ip)) {
4680 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4681 "bad IP header length %d < %d",
4682 ip_hlen,
4683 (int)sizeof(struct ip));
4684 error = _EBADIP;
4685 __ATOMIC_INC(stats_p->bips_bad_ip);
4686 goto done;
4687 }
4688 hlen = mac_hlen + ip_hlen;
4689 if (m0->m_len < hlen) {
4690 *mp = m0 = m_pullup(m0, hlen);
4691 if (m0 == NULL) {
4692 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4693 "m_pullup failed hlen %d",
4694 hlen);
4695 error = ENOBUFS;
4696 __ATOMIC_INC(stats_p->bips_bad_ip);
4697 goto done;
4698 }
4699 ip = (struct ip *)mtodo(m0, mac_hlen);
4700 }
4701
4702 ip_total_len = ntohs(ip->ip_len);
4703 if (ip_total_len < ip_hlen) {
4704 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4705 "IP total len %d < header len %d",
4706 ip_total_len, ip_hlen);
4707 error = _EBADIP;
4708 __ATOMIC_INC(stats_p->bips_bad_ip);
4709 goto done;
4710 }
4711 if (ip_total_len > (m0->m_pkthdr.len - mac_hlen)) {
4712 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4713 "invalid IP payload length %d > %d",
4714 ip_total_len,
4715 (m0->m_pkthdr.len - mac_hlen));
4716 error = _EBADIP;
4717 __ATOMIC_INC(stats_p->bips_bad_ip);
4718 goto done;
4719 }
4720 ip_pay_len = ip_total_len - ip_hlen;
4721 info_p->ip_proto = ip->ip_p;
4722 info_p->ip_hdr = mtodo(m0, mac_hlen);
4723 info_p->ip_m0_len = m0->m_len - mac_hlen;
4724 info_p->ip_hlen = ip_hlen;
4725 #define FRAG_BITS (IP_OFFMASK | IP_MF)
4726 if ((ntohs(ip->ip_off) & FRAG_BITS) != 0) {
4727 info_p->ip_is_fragmented = true;
4728 }
4729 __ATOMIC_INC(stats_p->bips_ip);
4730 } else {
4731 struct ip6_hdr *ip6;
4732
4733 /* IPv6 */
4734 hlen = mac_hlen + sizeof(struct ip6_hdr);
4735 if (m0->m_pkthdr.len < hlen) {
4736 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4737 "short IPv6 packet %d < %d",
4738 m0->m_pkthdr.len, hlen);
4739 error = _EBADIPV6;
4740 __ATOMIC_INC(stats_p->bips_bad_ip6);
4741 goto done;
4742 }
4743 if (m0->m_len < hlen) {
4744 *mp = m0 = m_pullup(m0, hlen);
4745 if (m0 == NULL) {
4746 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4747 "m_pullup failed hlen %d",
4748 hlen);
4749 error = ENOBUFS;
4750 __ATOMIC_INC(stats_p->bips_bad_ip6);
4751 goto done;
4752 }
4753 }
4754 ip6 = (struct ip6_hdr *)(mtodo(m0, mac_hlen));
4755 if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
4756 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4757 "bad IPv6 version");
4758 error = _EBADIPV6;
4759 __ATOMIC_INC(stats_p->bips_bad_ip6);
4760 goto done;
4761 }
4762 off = get_last_ip6_hdr(m0, mac_hlen, IPPROTO_IPV6, &proto,
4763 &info_p->ip_is_fragmented);
4764 if (off < 0 || m0->m_pkthdr.len < off) {
4765 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4766 "ip6_lasthdr() returned %d",
4767 off);
4768 error = _EBADIPV6;
4769 __ATOMIC_INC(stats_p->bips_bad_ip6);
4770 goto done;
4771 }
4772 ip_hlen = sizeof(*ip6);
4773 opt_len = off - mac_hlen - ip_hlen;
4774 if (opt_len < 0) {
4775 error = _EBADIPV6;
4776 __ATOMIC_INC(stats_p->bips_bad_ip6);
4777 goto done;
4778 }
4779 ip_pay_len = ntohs(ip6->ip6_plen);
4780 if (ip_pay_len > (m0->m_pkthdr.len - mac_hlen - ip_hlen)) {
4781 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4782 "invalid IPv6 payload length %d > %d",
4783 ip_pay_len,
4784 (m0->m_pkthdr.len - mac_hlen - ip_hlen));
4785 error = _EBADIPV6;
4786 __ATOMIC_INC(stats_p->bips_bad_ip6);
4787 goto done;
4788 }
4789 info_p->ip_proto = proto;
4790 info_p->ip_hdr = mtodo(m0, mac_hlen);
4791 info_p->ip_m0_len = m0->m_len - mac_hlen;
4792 info_p->ip_hlen = ip_hlen;
4793 __ATOMIC_INC(stats_p->bips_ip6);
4794 }
4795 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4796 "IPv%c proto %d ip %u pay %u opt %u pkt %u%s",
4797 is_ipv4 ? '4' : '6',
4798 proto, ip_hlen, ip_pay_len, opt_len,
4799 m0->m_pkthdr.len, info_p->ip_is_fragmented ? " frag" : "");
4800 info_p->ip_pay_len = ip_pay_len;
4801 info_p->ip_opt_len = opt_len;
4802 info_p->ip_is_ipv4 = is_ipv4;
4803 done:
4804 return error;
4805 }
4806
4807 static int
4808 bridge_get_tcp_header(struct mbuf * * mp, u_int mac_hlen, bool is_ipv4,
4809 ip_packet_info_t info_p, struct bripstats * stats_p)
4810 {
4811 int error;
4812 u_int hlen;
4813
4814 error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, info_p, stats_p);
4815 if (error != 0) {
4816 goto done;
4817 }
4818 if (info_p->ip_proto != IPPROTO_TCP) {
4819 /* not a TCP frame, not an error, just a bad guess */
4820 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4821 "non-TCP (%d) IPv%c frame %d bytes",
4822 info_p->ip_proto, is_ipv4 ? '4' : '6',
4823 (*mp)->m_pkthdr.len);
4824 goto done;
4825 }
4826 if (info_p->ip_is_fragmented) {
4827 /* both TSO and IP fragmentation don't make sense */
4828 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4829 "fragmented TSO packet?");
4830 __ATOMIC_INC(stats_p->bips_bad_tcp);
4831 error = _EBADTCP;
4832 goto done;
4833 }
4834 hlen = mac_hlen + info_p->ip_hlen + sizeof(struct tcphdr) +
4835 info_p->ip_opt_len;
4836 if ((*mp)->m_len < hlen) {
4837 *mp = m_pullup(*mp, hlen);
4838 if (*mp == NULL) {
4839 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4840 "m_pullup %d failed",
4841 hlen);
4842 __ATOMIC_INC(stats_p->bips_bad_tcp);
4843 error = _EBADTCP;
4844 goto done;
4845 }
4846 }
4847 info_p->ip_proto_hdr = info_p->ip_hdr + info_p->ip_hlen +
4848 info_p->ip_opt_len;
4849 done:
4850 return error;
4851 }
4852
4853 static inline void
4854 proto_csum_stats_increment(uint8_t proto, struct brcsumstats * stats_p)
4855 {
4856 if (proto == IPPROTO_TCP) {
4857 __ATOMIC_INC(stats_p->brcs_tcp_checksum);
4858 } else {
4859 __ATOMIC_INC(stats_p->brcs_udp_checksum);
4860 }
4861 return;
4862 }
4863
4864 #define ETHER_TYPE_FLAG_NONE 0x00
4865 #define ETHER_TYPE_FLAG_IPV4 0x01
4866 #define ETHER_TYPE_FLAG_IPV6 0x02
4867 #define ETHER_TYPE_FLAG_ARP 0x04
4868 #define ETHER_TYPE_FLAG_IP (ETHER_TYPE_FLAG_IPV4 | ETHER_TYPE_FLAG_IPV6)
4869 #define ETHER_TYPE_FLAG_IP_ARP (ETHER_TYPE_FLAG_IP | ETHER_TYPE_FLAG_ARP)
4870
4871 static inline bool
4872 ether_type_flag_is_ip(ether_type_flag_t flag)
4873 {
4874 return (flag & ETHER_TYPE_FLAG_IP) != 0;
4875 }
4876
4877 static inline ether_type_flag_t
4878 ether_type_flag_get(uint16_t ether_type)
4879 {
4880 ether_type_flag_t flag = ETHER_TYPE_FLAG_NONE;
4881
4882 switch (ether_type) {
4883 case HTONS_ETHERTYPE_IP:
4884 flag = ETHER_TYPE_FLAG_IPV4;
4885 break;
4886 case HTONS_ETHERTYPE_IPV6:
4887 flag = ETHER_TYPE_FLAG_IPV6;
4888 break;
4889 case HTONS_ETHERTYPE_ARP:
4890 flag = ETHER_TYPE_FLAG_ARP;
4891 break;
4892 default:
4893 break;
4894 }
4895 return flag;
4896 }
4897
4898 static bool
4899 ether_header_type_is_ip(struct ether_header * eh, bool *is_ipv4)
4900 {
4901 uint16_t ether_type;
4902 bool is_ip = TRUE;
4903
4904 ether_type = ntohs(eh->ether_type);
4905 switch (ether_type) {
4906 case ETHERTYPE_IP:
4907 *is_ipv4 = TRUE;
4908 break;
4909 case ETHERTYPE_IPV6:
4910 *is_ipv4 = FALSE;
4911 break;
4912 default:
4913 is_ip = FALSE;
4914 break;
4915 }
4916 return is_ip;
4917 }
4918
4919 static errno_t
4920 bridge_verify_checksum(struct mbuf * * mp, struct ifbrmstats *stats_p)
4921 {
4922 struct brcsumstats *csum_stats_p;
4923 struct ether_header *eh;
4924 errno_t error = 0;
4925 ip_packet_info info;
4926 bool is_ipv4;
4927 struct mbuf * m;
4928 u_int mac_hlen = sizeof(struct ether_header);
4929 uint16_t sum;
4930 bool valid;
4931
4932 eh = mtod(*mp, struct ether_header *);
4933 if (!ether_header_type_is_ip(eh, &is_ipv4)) {
4934 goto done;
4935 }
4936 error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, &info,
4937 &stats_p->brms_out_ip);
4938 m = *mp;
4939 if (error != 0) {
4940 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4941 "bridge_get_ip_proto failed %d",
4942 error);
4943 goto done;
4944 }
4945 if (is_ipv4) {
4946 if ((m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) != 0) {
4947 /* hardware offloaded IP header checksum */
4948 valid = (m->m_pkthdr.csum_flags & CSUM_IP_VALID) != 0;
4949 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4950 "IP checksum HW %svalid",
4951 valid ? "" : "in");
4952 if (!valid) {
4953 __ATOMIC_INC(stats_p->brms_out_cksum_bad_hw.brcs_ip_checksum);
4954 error = _EBADIPCHECKSUM;
4955 goto done;
4956 }
4957 __ATOMIC_INC(stats_p->brms_out_cksum_good_hw.brcs_ip_checksum);
4958 } else {
4959 /* verify */
4960 sum = inet_cksum(m, 0, mac_hlen, info.ip_hlen);
4961 valid = (sum == 0);
4962 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4963 "IP checksum SW %svalid",
4964 valid ? "" : "in");
4965 if (!valid) {
4966 __ATOMIC_INC(stats_p->brms_out_cksum_bad.brcs_ip_checksum);
4967 error = _EBADIPCHECKSUM;
4968 goto done;
4969 }
4970 __ATOMIC_INC(stats_p->brms_out_cksum_good.brcs_ip_checksum);
4971 }
4972 }
4973 if (info.ip_is_fragmented) {
4974 /* can't verify checksum on fragmented packets */
4975 goto done;
4976 }
4977 switch (info.ip_proto) {
4978 case IPPROTO_TCP:
4979 __ATOMIC_INC(stats_p->brms_out_ip.bips_tcp);
4980 break;
4981 case IPPROTO_UDP:
4982 __ATOMIC_INC(stats_p->brms_out_ip.bips_udp);
4983 break;
4984 default:
4985 goto done;
4986 }
4987 /* check for hardware offloaded UDP/TCP checksum */
4988 #define HW_CSUM (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)
4989 if ((m->m_pkthdr.csum_flags & HW_CSUM) == HW_CSUM) {
4990 /* checksum verified by hardware */
4991 valid = (m->m_pkthdr.csum_rx_val == 0xffff);
4992 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4993 "IPv%c %s checksum HW 0x%x %svalid",
4994 is_ipv4 ? '4' : '6',
4995 (info.ip_proto == IPPROTO_TCP)
4996 ? "TCP" : "UDP",
4997 m->m_pkthdr.csum_data,
4998 valid ? "" : "in" );
4999 if (!valid) {
5000 /* bad checksum */
5001 csum_stats_p = &stats_p->brms_out_cksum_bad_hw;
5002 error = (info.ip_proto == IPPROTO_TCP) ? _EBADTCPCHECKSUM
5003 : _EBADTCPCHECKSUM;
5004 } else {
5005 /* good checksum */
5006 csum_stats_p = &stats_p->brms_out_cksum_good_hw;
5007 }
5008 proto_csum_stats_increment(info.ip_proto, csum_stats_p);
5009 goto done;
5010 }
5011 /* adjust frame to skip mac-layer header */
5012 _mbuf_adjust_pkthdr_and_data(m, mac_hlen);
5013 if (is_ipv4) {
5014 sum = inet_cksum(m, info.ip_proto,
5015 info.ip_hlen,
5016 info.ip_pay_len);
5017 } else {
5018 sum = inet6_cksum(m, info.ip_proto,
5019 info.ip_hlen + info.ip_opt_len,
5020 info.ip_pay_len - info.ip_opt_len);
5021 }
5022 valid = (sum == 0);
5023 if (valid) {
5024 csum_stats_p = &stats_p->brms_out_cksum_good;
5025 } else {
5026 csum_stats_p = &stats_p->brms_out_cksum_bad;
5027 error = (info.ip_proto == IPPROTO_TCP)
5028 ? _EBADTCPCHECKSUM : _EBADUDPCHECKSUM;
5029 }
5030 proto_csum_stats_increment(info.ip_proto, csum_stats_p);
5031 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5032 "IPv%c %s checksum SW %svalid (0x%x) hlen %d paylen %d",
5033 is_ipv4 ? '4' : '6',
5034 (info.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
5035 valid ? "" : "in",
5036 sum, info.ip_hlen, info.ip_pay_len);
5037 /* adjust frame back to start of mac-layer header */
5038 _mbuf_adjust_pkthdr_and_data(m, -mac_hlen);
5039
5040 done:
5041 return error;
5042 }
5043
5044 static mbuf_t
5045 bridge_verify_checksum_list(ifnet_t bridge_ifp, struct bridge_iflist * dbif,
5046 mbuf_t in_list, bool is_ipv4)
5047 {
5048 mbuf_t next_packet;
5049 mblist ret;
5050
5051 mblist_init(&ret);
5052 for (mbuf_ref_t scan = in_list; scan != NULL; scan = next_packet) {
5053 errno_t error;
5054
5055 /* take packet out of the list */
5056 next_packet = scan->m_nextpkt;
5057 scan->m_nextpkt = NULL;
5058
5059 if (scan->m_pkthdr.rx_seg_cnt > 1) {
5060 /* LRO packet, compute checksum on large packet */
5061 scan = bridge_filter_checksum(bridge_ifp, dbif, scan,
5062 is_ipv4, false, true);
5063 } else {
5064 /* verify checksum */
5065 error = bridge_verify_checksum(&scan, &dbif->bif_stats);
5066 if (error != 0) {
5067 if (scan != NULL) {
5068 m_drop(scan, DROPTAP_FLAG_DIR_IN,
5069 DROP_REASON_BRIDGE_CHECKSUM, NULL, 0);
5070 scan = NULL;
5071 }
5072 }
5073 }
5074
5075 /* add it back to the list */
5076 if (scan != NULL) {
5077 mblist_append(&ret, scan);
5078 }
5079 }
5080 return ret.head;
5081 }
5082
5083
5084 static errno_t
5085 bridge_offload_checksum(struct mbuf * * mp, ip_packet_info * info_p,
5086 struct ifbrmstats * stats_p)
5087 {
5088 uint16_t * csum_p;
5089 errno_t error = 0;
5090 u_int hlen;
5091 struct mbuf * m0 = *mp;
5092 u_int mac_hlen = sizeof(struct ether_header);
5093 u_int pkt_hdr_len;
5094 struct tcphdr * tcp;
5095 u_int tcp_hlen;
5096 struct udphdr * udp;
5097
5098 if (info_p->ip_is_ipv4) {
5099 /* compute IP header checksum */
5100 struct ip *ip = (struct ip *)info_p->ip_hdr;
5101 ip->ip_sum = 0;
5102 ip->ip_sum = inet_cksum(m0, 0, mac_hlen, info_p->ip_hlen);
5103 __ATOMIC_INC(stats_p->brms_in_computed_cksum.brcs_ip_checksum);
5104 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5105 "IPv4 checksum 0x%x",
5106 ntohs(ip->ip_sum));
5107 }
5108 if (info_p->ip_is_fragmented) {
5109 /* can't compute checksum on fragmented packets */
5110 goto done;
5111 }
5112 pkt_hdr_len = m0->m_pkthdr.len;
5113 switch (info_p->ip_proto) {
5114 case IPPROTO_TCP:
5115 hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len
5116 + sizeof(struct tcphdr);
5117 if (m0->m_len < hlen) {
5118 *mp = m0 = m_pullup(m0, hlen);
5119 if (m0 == NULL) {
5120 __ATOMIC_INC(stats_p->brms_in_ip.bips_bad_tcp);
5121 error = _EBADTCP;
5122 goto done;
5123 }
5124 }
5125 tcp = (struct tcphdr *)(info_p->ip_hdr + info_p->ip_hlen
5126 + info_p->ip_opt_len);
5127 tcp_hlen = tcp->th_off << 2;
5128 hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len + tcp_hlen;
5129 if (hlen > pkt_hdr_len) {
5130 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5131 "bad tcp header length %u",
5132 tcp_hlen);
5133 __ATOMIC_INC(stats_p->brms_in_ip.bips_bad_tcp);
5134 error = _EBADTCP;
5135 goto done;
5136 }
5137 csum_p = &tcp->th_sum;
5138 __ATOMIC_INC(stats_p->brms_in_ip.bips_tcp);
5139 break;
5140 case IPPROTO_UDP:
5141 hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len + sizeof(*udp);
5142 if (m0->m_len < hlen) {
5143 *mp = m0 = m_pullup(m0, hlen);
5144 if (m0 == NULL) {
5145 __ATOMIC_INC(stats_p->brms_in_ip.bips_bad_udp);
5146 error = ENOBUFS;
5147 goto done;
5148 }
5149 }
5150 udp = (struct udphdr *)(info_p->ip_hdr + info_p->ip_hlen
5151 + info_p->ip_opt_len);
5152 csum_p = &udp->uh_sum;
5153 __ATOMIC_INC(stats_p->brms_in_ip.bips_udp);
5154 break;
5155 default:
5156 /* not TCP or UDP */
5157 goto done;
5158 }
5159 *csum_p = 0;
5160 /* adjust frame to skip mac-layer header */
5161 _mbuf_adjust_pkthdr_and_data(m0, mac_hlen);
5162 if (info_p->ip_is_ipv4) {
5163 *csum_p = inet_cksum(m0, info_p->ip_proto, info_p->ip_hlen,
5164 info_p->ip_pay_len);
5165 } else {
5166 *csum_p = inet6_cksum(m0, info_p->ip_proto,
5167 info_p->ip_hlen + info_p->ip_opt_len,
5168 info_p->ip_pay_len - info_p->ip_opt_len);
5169 }
5170 if (info_p->ip_proto == IPPROTO_UDP && *csum_p == 0) {
5171 /* RFC 1122 4.1.3.4 */
5172 *csum_p = 0xffff;
5173 }
5174 /* adjust frame back to start of mac-layer header */
5175 _mbuf_adjust_pkthdr_and_data(m0, -mac_hlen);
5176 proto_csum_stats_increment(info_p->ip_proto,
5177 &stats_p->brms_in_computed_cksum);
5178
5179 /* indicate that the checksum is good */
5180 mbuf_set_csum_performed(m0, checksum_performed_all_good, 0xffff);
5181
5182 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5183 "IPv%c %s set checksum 0x%x",
5184 info_p->ip_is_ipv4 ? '4' : '6',
5185 (info_p->ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
5186 ntohs(*csum_p));
5187 done:
5188 return error;
5189 }
5190
5191 static inline void
5192 bridge_handle_checksum_op(ifnet_t src_ifp, ifnet_t dst_ifp,
5193 mbuf_t m, ChecksumOperation cksum_op)
5194 {
5195 switch (cksum_op) {
5196 case CHECKSUM_OPERATION_CLEAR_OFFLOAD:
5197 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
5198 break;
5199 case CHECKSUM_OPERATION_FINALIZE:
5200 /* the checksum might not be correct, finalize now */
5201 VERIFY(dst_ifp != NULL);
5202 bridge_finalize_cksum(dst_ifp, m);
5203 break;
5204 case CHECKSUM_OPERATION_COMPUTE:
5205 VERIFY(dst_ifp != NULL && src_ifp != NULL);
5206 bridge_compute_cksum(src_ifp, dst_ifp, m);
5207 break;
5208 default:
5209 break;
5210 }
5211 return;
5212 }
5213
5214 static uint32_t
5215 get_if_tso_mtu(struct ifnet * ifp, bool is_ipv4)
5216 {
5217 uint32_t tso_mtu;
5218
5219 tso_mtu = is_ipv4 ? ifp->if_tso_v4_mtu : ifp->if_tso_v6_mtu;
5220 if (tso_mtu == 0) {
5221 tso_mtu = IP_MAXPACKET;
5222 }
5223
5224 #if DEBUG || DEVELOPMENT
5225 #define REDUCED_TSO_MTU (16 * 1024)
5226 if (if_bridge_reduce_tso_mtu != 0 && tso_mtu > REDUCED_TSO_MTU) {
5227 tso_mtu = REDUCED_TSO_MTU;
5228 }
5229 #endif /* DEBUG || DEVELOPMENT */
5230 return tso_mtu;
5231 }
5232
5233 /*
5234 * tso_hwassist:
5235 * - determine whether the destination interface supports TSO offload
5236 * - if the packet is already marked for offload and the hardware supports
5237 * it, just allow the packet to continue on
5238 * - if not, parse the packet headers to verify that this is a large TCP
5239 * packet requiring segmentation; if the hardware doesn't support it
5240 * set need_sw_tso; otherwise, mark the packet for TSO offload
5241 */
5242 static int
5243 tso_hwassist(struct mbuf **mp, bool is_ipv4, struct ifnet * ifp, u_int mac_hlen,
5244 int * mss_p, bool * need_gso, bool * is_large_tcp)
5245 {
5246 uint32_t csum_flags;
5247 int error = 0;
5248 ip_packet_info info;
5249 u_int32_t if_csum;
5250 u_int32_t if_tso;
5251 u_int32_t mbuf_tso;
5252 int mss = *mss_p;
5253 uint8_t seg_cnt = 0;
5254 bool supports_cksum = false;
5255 uint32_t pkt_mtu;
5256 struct bripstats stats;
5257
5258 *need_gso = false;
5259 *is_large_tcp = false;
5260 if (is_ipv4) {
5261 /*
5262 * Enable both TCP and IP offload if the hardware supports it.
5263 * If the hardware doesn't support TCP offload, supports_cksum
5264 * will be false so we won't set either offload.
5265 */
5266 if_csum = ifp->if_hwassist & (CSUM_TCP | CSUM_IP);
5267 supports_cksum = (if_csum & CSUM_TCP) != 0;
5268 if_tso = IFNET_TSO_IPV4;
5269 mbuf_tso = CSUM_TSO_IPV4;
5270 } else {
5271 if_csum = (ifp->if_hwassist & CSUM_TCPIPV6);
5272 supports_cksum = (if_csum & CSUM_TCPIPV6) != 0;
5273 if_tso = IFNET_TSO_IPV6;
5274 mbuf_tso = CSUM_TSO_IPV6;
5275 }
5276 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5277 "%s: does%s support checksum 0x%x if_csum 0x%x",
5278 ifp->if_xname, supports_cksum ? "" : " not",
5279 ifp->if_hwassist, if_csum);
5280
5281 /* verify that this is a large TCP frame */
5282 error = bridge_get_tcp_header(mp, mac_hlen, is_ipv4,
5283 &info, &stats);
5284 if (error != 0) {
5285 /* bad packet */
5286 goto done;
5287 }
5288 if (info.ip_proto_hdr == NULL) {
5289 /* not a TCP packet */
5290 goto done;
5291 }
5292 pkt_mtu = info.ip_hlen + info.ip_pay_len + info.ip_opt_len;
5293 if (mss == 0) {
5294 /* check for LRO */
5295 seg_cnt = (*mp)->m_pkthdr.rx_seg_cnt;
5296 if (seg_cnt == 1 || (seg_cnt == 0 && pkt_mtu <= ifp->if_mtu)) {
5297 /* not actually a large packet */
5298 goto done;
5299 }
5300 }
5301 if (mss == 0) {
5302 uint32_t hdr_len;
5303 struct tcphdr * tcp;
5304
5305 tcp = (struct tcphdr *)info.ip_proto_hdr;
5306 hdr_len = info.ip_hlen + info.ip_opt_len + (tcp->th_off << 2);
5307
5308 /* packet isn't marked, mark it now */
5309 if (seg_cnt != 0) {
5310 uint32_t len;
5311
5312 /* approximate the MSS using the LRO seg cnt */
5313 len = mbuf_pkthdr_len(*mp) - hdr_len - ETHER_HDR_LEN;
5314 mss = len / seg_cnt;
5315 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5316 "%s: mss %d = len %d / seg cnt %d",
5317 ifp->if_xname, mss, len, seg_cnt);
5318 if (mss <= 0) {
5319 /* unexpected value */
5320 mss = 0;
5321 goto done;
5322 }
5323 } else {
5324 mss = ifp->if_mtu - hdr_len
5325 - if_bridge_tso_reduce_mss_tx;
5326 assert(mss > 0);
5327 }
5328 csum_flags = mbuf_tso;
5329 if (supports_cksum) {
5330 csum_flags |= if_csum;
5331 }
5332 (*mp)->m_pkthdr.tso_segsz = mss;
5333 (*mp)->m_pkthdr.csum_flags |= csum_flags;
5334 (*mp)->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
5335 }
5336 *is_large_tcp = true;
5337 (*mp)->m_pkthdr.pkt_proto = IPPROTO_TCP;
5338 if ((ifp->if_hwassist & if_tso) == 0) {
5339 /* need gso if no hardware support */
5340 *need_gso = true;
5341 } else {
5342 uint32_t tso_mtu = 0;
5343
5344 tso_mtu = get_if_tso_mtu(ifp, is_ipv4);
5345 if (pkt_mtu > tso_mtu) {
5346 /* need gso if tso_mtu too small */
5347 *need_gso = true;
5348 }
5349 }
5350 done:
5351 *mss_p = mss;
5352 return error;
5353 }
5354
5355 /*
5356 * bridge_enqueue:
5357 *
5358 * Enqueue a packet list on a bridge member interface.
5359 *
5360 */
5361 static int
5362 bridge_enqueue(ifnet_t bridge_ifp, ifnet_t src_if, ifnet_t dst_if,
5363 ether_type_flag_t etypef, mbuf_t in_list, ChecksumOperation orig_cksum_op,
5364 pkt_direction_t direction)
5365 {
5366 int enqueue_error = 0;
5367 mbuf_t next_packet;
5368 uint32_t out_errors = 0;
5369 mblist out_list;
5370
5371 VERIFY(dst_if != NULL);
5372
5373 mblist_init(&out_list);
5374 for (mbuf_ref_t scan = in_list; scan != NULL; scan = next_packet) {
5375 bool check_gso = false;
5376 ChecksumOperation cksum_op = orig_cksum_op;
5377 errno_t error = 0;
5378 bool is_ipv4 = false;
5379 int len;
5380 int mss = 0;
5381 bool need_gso = false;
5382
5383 scan->m_flags |= M_PROTO1; /* set to avoid loops */
5384 next_packet = scan->m_nextpkt;
5385 scan->m_nextpkt = NULL;
5386 len = mbuf_pkthdr_len(scan);
5387 is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
5388 mss = _mbuf_get_tso_mss(scan);
5389 if (mss != 0) {
5390 /* packet is marked for segmentation */
5391 check_gso = true;
5392 } else if (direction == pkt_direction_RX &&
5393 scan->m_pkthdr.rx_seg_cnt != 0) {
5394 /* LRO packet */
5395 check_gso = true;
5396 } else if (ether_type_flag_is_ip(etypef) &&
5397 len > (bridge_ifp->if_mtu + ETHER_HDR_LEN)) {
5398 /*
5399 * Need to segment the packet if it is a large frame
5400 * and the destination interface does not support TSO.
5401 *
5402 * Note that with trailers, it's possible for a packet to
5403 * be large but not actually require segmentation.
5404 */
5405 check_gso = true;
5406 }
5407 if (check_gso) {
5408 bool is_large_tcp = false;
5409
5410 error = tso_hwassist(&scan, is_ipv4,
5411 dst_if, sizeof(struct ether_header), &mss,
5412 &need_gso, &is_large_tcp);
5413 if (is_large_tcp &&
5414 cksum_op == CHECKSUM_OPERATION_CLEAR_OFFLOAD) {
5415 cksum_op = CHECKSUM_OPERATION_NONE;
5416 }
5417 }
5418 if (error != 0) {
5419 if (scan != NULL) {
5420 m_drop(scan,
5421 direction == pkt_direction_RX ? DROPTAP_FLAG_DIR_IN : DROPTAP_FLAG_DIR_OUT,
5422 DROP_REASON_BRIDGE_HWASSIST, NULL, 0);
5423 scan = NULL;
5424 }
5425 out_errors++;
5426 } else if (need_gso) {
5427 int mac_hlen = sizeof(struct ether_header);
5428 mblist segs;
5429
5430 /* segment packets, add to list */
5431 segs = gso_tcp_transmit(dst_if, scan, mac_hlen,
5432 is_ipv4);
5433 if (segs.head != NULL) {
5434 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5435 "%s (%s) append gso #segs %u bytes %u",
5436 bridge_ifp->if_xname,
5437 dst_if->if_xname,
5438 segs.count, segs.bytes);
5439 mblist_append_list(&out_list, segs);
5440 } else {
5441 out_errors++;
5442 }
5443 } else {
5444 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5445 "%s (%s) append %d bytes mss %d op %d",
5446 bridge_ifp->if_xname,
5447 dst_if->if_xname,
5448 len, mss, cksum_op);
5449 bridge_handle_checksum_op(src_if, dst_if,
5450 scan, cksum_op);
5451 mblist_append(&out_list, scan);
5452 }
5453 }
5454 if (out_list.head != NULL) {
5455 enqueue_error = bridge_transmit(dst_if, out_list.head);
5456 if (enqueue_error != 0) {
5457 out_errors++;
5458 }
5459 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5460 "%s (%s) bridge_transmit packets %u bytes %u error %d",
5461 bridge_ifp->if_xname,
5462 dst_if->if_xname,
5463 out_list.count, out_list.bytes, enqueue_error);
5464 }
5465 if (out_list.count != 0 || out_errors != 0) {
5466 ifnet_stat_increment_out(bridge_ifp, out_list.count,
5467 out_list.bytes, out_errors);
5468 }
5469 return enqueue_error;
5470 }
5471
5472 /*
5473 * bridge_member_output:
5474 *
5475 * Send output from a bridge member interface. This
5476 * performs the bridging function for locally originated
5477 * packets.
5478 *
5479 * The mbuf has the Ethernet header already attached.
5480 */
5481 static errno_t
5482 bridge_member_output(struct bridge_softc *sc, ifnet_t ifp, mbuf_t *data)
5483 {
5484 struct bridge_iflist * bif = NULL;
5485 ifnet_t bridge_ifp;
5486 struct ether_header *eh;
5487 ether_type_flag_t etypef;
5488 struct ifnet *dst_if = NULL;
5489 uint16_t vlan;
5490 struct bridge_iflist *mac_nat_bif;
5491 ifnet_t mac_nat_ifp;
5492 mbuf_t m = *data;
5493
5494 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
5495 "ifp %s", ifp->if_xname);
5496 if (m->m_len < ETHER_HDR_LEN) {
5497 m = m_pullup(m, ETHER_HDR_LEN);
5498 if (m == NULL) {
5499 *data = NULL;
5500 return EJUSTRETURN;
5501 }
5502 }
5503
5504 BRIDGE_LOCK(sc);
5505 mac_nat_bif = sc->sc_mac_nat_bif;
5506 mac_nat_ifp = (mac_nat_bif != NULL) ? mac_nat_bif->bif_ifp : NULL;
5507 if (mac_nat_ifp == ifp) {
5508 /* record the IP address used by the MAC NAT interface */
5509 (void)bridge_mac_nat_output(sc, mac_nat_bif, data, NULL);
5510 m = *data;
5511 if (m == NULL) {
5512 /* packet was deallocated */
5513 BRIDGE_UNLOCK(sc);
5514 return EJUSTRETURN;
5515 }
5516 }
5517 bridge_ifp = sc->sc_ifp;
5518 eh = mtod(m, struct ether_header *);
5519 vlan = VLANTAGOF(m);
5520 etypef = ether_type_flag_get(eh->ether_type);
5521
5522 /*
5523 * APPLE MODIFICATION
5524 * If the packet is an 802.1X ethertype, then only send on the
5525 * original output interface.
5526 */
5527 if (eh->ether_type == htons(ETHERTYPE_PAE)) {
5528 dst_if = ifp;
5529 goto sendunicast;
5530 }
5531
5532 /*
5533 * If bridge is down, but the original output interface is up,
5534 * go ahead and send out that interface. Otherwise, the packet
5535 * is dropped below.
5536 */
5537 if ((bridge_ifp->if_flags & IFF_RUNNING) == 0) {
5538 dst_if = ifp;
5539 goto sendunicast;
5540 }
5541
5542 /*
5543 * If the packet is a multicast, or we don't know a better way to
5544 * get there, send to all interfaces.
5545 */
5546 if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
5547 dst_if = NULL;
5548 } else {
5549 bif = bridge_rtlookup_bif(sc, eh->ether_dhost, vlan);
5550 if (bif != NULL) {
5551 dst_if = bif->bif_ifp;
5552 }
5553 }
5554 if (dst_if == NULL) {
5555 struct mbuf *mc;
5556 errno_t error;
5557
5558
5559 bridge_span(sc, etypef, m);
5560
5561 BRIDGE_LOCK2REF(sc, error);
5562 if (error != 0) {
5563 m_drop(m, DROPTAP_FLAG_DIR_OUT,
5564 DROP_REASON_BRIDGE_NOREF, NULL, 0);
5565 return EJUSTRETURN;
5566 }
5567
5568 /*
5569 * Duplicate and send the packet across all member interfaces
5570 * except the originating interface.
5571 */
5572 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
5573 dst_if = bif->bif_ifp;
5574 if (dst_if == ifp) {
5575 /* skip the originating interface */
5576 continue;
5577 }
5578 /* skip interface with inactive link status */
5579 if ((bif->bif_flags & BIFF_MEDIA_ACTIVE) == 0) {
5580 continue;
5581 }
5582
5583 /* skip interface that isn't running */
5584 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5585 continue;
5586 }
5587 /*
5588 * If the interface is participating in spanning
5589 * tree, make sure the port is in a state that
5590 * allows forwarding.
5591 */
5592 if ((bif->bif_ifflags & IFBIF_STP) &&
5593 bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5594 continue;
5595 }
5596 /*
5597 * If the destination is the MAC NAT interface,
5598 * skip sending the packet. The packet can't be sent
5599 * if the source MAC is incorrect.
5600 */
5601 if (dst_if == mac_nat_ifp) {
5602 continue;
5603 }
5604
5605 /* make a deep copy to send on this member interface */
5606 mc = m_dup(m, M_DONTWAIT);
5607 if (mc == NULL) {
5608 (void)ifnet_stat_increment_out(bridge_ifp,
5609 0, 0, 1);
5610 continue;
5611 }
5612 (void)bridge_enqueue(bridge_ifp, ifp, dst_if, etypef,
5613 mc, CHECKSUM_OPERATION_COMPUTE, pkt_direction_TX);
5614 }
5615 BRIDGE_UNREF(sc);
5616
5617 if ((ifp->if_flags & IFF_RUNNING) == 0) {
5618 m_drop(m, DROPTAP_FLAG_DIR_OUT,
5619 DROP_REASON_BRIDGE_NOT_RUNNING, NULL, 0);
5620 return EJUSTRETURN;
5621 }
5622 /* allow packet to continue on the originating interface */
5623 return 0;
5624 }
5625
5626 sendunicast:
5627 /*
5628 * XXX Spanning tree consideration here?
5629 */
5630
5631 bridge_span(sc, etypef, m);
5632 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5633 m_drop(m, DROPTAP_FLAG_DIR_OUT,
5634 DROP_REASON_BRIDGE_NOT_RUNNING, NULL, 0);
5635 BRIDGE_UNLOCK(sc);
5636 return EJUSTRETURN;
5637 }
5638
5639 BRIDGE_UNLOCK(sc);
5640 if (dst_if == ifp) {
5641 /* allow packet to continue on the originating interface */
5642 return 0;
5643 }
5644 if (dst_if != mac_nat_ifp) {
5645 (void) bridge_enqueue(bridge_ifp, ifp, dst_if, etypef, m,
5646 CHECKSUM_OPERATION_COMPUTE, pkt_direction_TX);
5647 } else {
5648 /*
5649 * This is not the original output interface
5650 * and the destination is the MAC NAT interface.
5651 * Drop the packet because the packet can't be sent
5652 * if the source MAC is incorrect.
5653 */
5654 m_drop(m, DROPTAP_FLAG_DIR_OUT,
5655 DROP_REASON_BRIDGE_MAC_NAT_FAILURE, NULL, 0);
5656 }
5657 return EJUSTRETURN;
5658 }
5659
5660 /*
5661 * Output callback.
5662 *
5663 * This routine is called externally from above only when if_bridge_txstart
5664 * is disabled; otherwise it is called internally by bridge_start().
5665 */
5666 static int
5667 bridge_output(struct ifnet *ifp, struct mbuf *m)
5668 {
5669 struct bridge_iflist *bif;
5670 struct bridge_softc * __single sc = ifnet_softc(ifp);
5671 struct ether_header *eh;
5672 ether_type_flag_t etypef;
5673 struct ifnet *dst_if = NULL;
5674 int error = 0;
5675
5676 eh = mtod(m, struct ether_header *);
5677 etypef = ether_type_flag_get(eh->ether_type);
5678 BRIDGE_LOCK(sc);
5679
5680 if (!IS_BCAST_MCAST(m)) {
5681 bif = bridge_rtlookup_bif(sc, eh->ether_dhost, 0);
5682 if (bif != NULL) {
5683 dst_if = bif->bif_ifp;
5684 }
5685 }
5686
5687 (void) ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0);
5688
5689 BRIDGE_BPF_TAP_OUT(ifp, m);
5690
5691 if (dst_if == NULL) {
5692 /* callee will unlock */
5693 bridge_broadcast(sc, NULL, etypef, m);
5694 } else {
5695 ifnet_t bridge_ifp;
5696
5697 bridge_ifp = sc->sc_ifp;
5698 BRIDGE_UNLOCK(sc);
5699
5700 error = bridge_enqueue(bridge_ifp, NULL, dst_if, etypef, m,
5701 CHECKSUM_OPERATION_FINALIZE, pkt_direction_TX);
5702 }
5703
5704 return error;
5705 }
5706
5707 static void
5708 bridge_finalize_cksum(struct ifnet *ifp, struct mbuf *m)
5709 {
5710 struct ether_header *eh;
5711 bool is_ipv4;
5712 uint32_t sw_csum, hwcap;
5713 uint32_t did_sw;
5714 uint32_t csum_flags;
5715
5716 eh = mtod(m, struct ether_header *);
5717 if (!ether_header_type_is_ip(eh, &is_ipv4)) {
5718 return;
5719 }
5720
5721 /* do in software what the hardware cannot */
5722 hwcap = (ifp->if_hwassist | CSUM_DATA_VALID);
5723 csum_flags = m->m_pkthdr.csum_flags;
5724 sw_csum = csum_flags & ~IF_HWASSIST_CSUM_FLAGS(hwcap);
5725 sw_csum &= IF_HWASSIST_CSUM_MASK;
5726
5727 if (is_ipv4) {
5728 if ((hwcap & CSUM_PARTIAL) && !(sw_csum & CSUM_DELAY_DATA) &&
5729 (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) {
5730 if (m->m_pkthdr.csum_flags & CSUM_TCP) {
5731 uint16_t start =
5732 sizeof(*eh) + sizeof(struct ip);
5733 uint16_t ulpoff =
5734 m->m_pkthdr.csum_data & 0xffff;
5735 m->m_pkthdr.csum_flags |=
5736 (CSUM_DATA_VALID | CSUM_PARTIAL);
5737 m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
5738 m->m_pkthdr.csum_tx_start = start;
5739 } else {
5740 sw_csum |= (CSUM_DELAY_DATA &
5741 m->m_pkthdr.csum_flags);
5742 }
5743 }
5744 did_sw = in_finalize_cksum(m, sizeof(*eh), sw_csum);
5745 } else {
5746 if ((hwcap & CSUM_PARTIAL) &&
5747 !(sw_csum & CSUM_DELAY_IPV6_DATA) &&
5748 (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA)) {
5749 if (m->m_pkthdr.csum_flags & CSUM_TCPIPV6) {
5750 uint16_t start =
5751 sizeof(*eh) + sizeof(struct ip6_hdr);
5752 uint16_t ulpoff =
5753 m->m_pkthdr.csum_data & 0xffff;
5754 m->m_pkthdr.csum_flags |=
5755 (CSUM_DATA_VALID | CSUM_PARTIAL);
5756 m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
5757 m->m_pkthdr.csum_tx_start = start;
5758 } else {
5759 sw_csum |= (CSUM_DELAY_IPV6_DATA &
5760 m->m_pkthdr.csum_flags);
5761 }
5762 }
5763 did_sw = in6_finalize_cksum(m, sizeof(*eh), -1, -1, sw_csum);
5764 }
5765 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5766 "[%s] before 0x%x hwcap 0x%x sw_csum 0x%x did 0x%x after 0x%x",
5767 ifp->if_xname, csum_flags, hwcap, sw_csum,
5768 did_sw, m->m_pkthdr.csum_flags);
5769 }
5770
5771 /*
5772 * bridge_start:
5773 *
5774 * Start output on a bridge.
5775 *
5776 * This routine is invoked by the start worker thread; because we never call
5777 * it directly, there is no need do deploy any serialization mechanism other
5778 * than what's already used by the worker thread, i.e. this is already single
5779 * threaded.
5780 *
5781 * This routine is called only when if_bridge_txstart is enabled.
5782 */
5783 static void
5784 bridge_start(struct ifnet *ifp)
5785 {
5786 mbuf_ref_t m;
5787
5788 for (;;) {
5789 if (ifnet_dequeue(ifp, &m) != 0) {
5790 break;
5791 }
5792
5793 (void) bridge_output(ifp, m);
5794 }
5795 }
5796
5797 static void
5798 prepare_input_packet(ifnet_t ifp, mbuf_t m)
5799 {
5800 mbuf_pkthdr_setrcvif(m, ifp);
5801 mbuf_pkthdr_setheader(m, mtod(m, void *));
5802 /* adjust frame to skip mac-layer header */
5803 _mbuf_adjust_pkthdr_and_data(m, ETHER_HDR_LEN);
5804 }
5805
5806 static void
5807 mark_tso_checksum_ok(mbuf_t m)
5808 {
5809 if (_mbuf_get_tso_mss(m) != 0 ||
5810 (m->m_pkthdr.csum_flags & checksum_request_flags) != 0) {
5811 mbuf_set_csum_performed(m, checksum_performed_all_good, 0xffff);
5812 }
5813 }
5814
5815 static void
5816 inject_input_packet_list(ifnet_t ifp, mbuf_t in_list, bool m_proto1)
5817 {
5818 for (mbuf_t scan = in_list; scan != NULL; scan = scan->m_nextpkt) {
5819 /* mark the packets as arriving on the interface */
5820 BRIDGE_BPF_TAP_IN(ifp, scan);
5821 if (m_proto1) {
5822 scan->m_flags |= M_PROTO1; /* set to avoid loops */
5823 }
5824 prepare_input_packet(ifp, scan);
5825 mark_tso_checksum_ok(scan);
5826 }
5827 dlil_input_packet_list(ifp, in_list);
5828 return;
5829 }
5830
5831 static void
5832 adjust_input_packet_list(mbuf_t in_list)
5833 {
5834 for (mbuf_t scan = in_list; scan != NULL; scan = scan->m_nextpkt) {
5835 mbuf_pkthdr_setheader(scan, mtod(scan, void *));
5836 _mbuf_adjust_pkthdr_and_data(scan, ETHER_HDR_LEN);
5837 }
5838 }
5839
5840 static bool
5841 in_addr_is_ours(struct in_addr ip)
5842 {
5843 struct in_ifaddr *ia;
5844 bool ours = false;
5845
5846 lck_rw_lock_shared(&in_ifaddr_rwlock);
5847 TAILQ_FOREACH(ia, INADDR_HASH(ip.s_addr), ia_hash) {
5848 if (ia->ia_addr.sin_addr.s_addr == ip.s_addr) {
5849 ours = true;
5850 break;
5851 }
5852 }
5853 lck_rw_done(&in_ifaddr_rwlock);
5854 return ours;
5855 }
5856
5857 static bool
5858 in6_addr_is_ours(const struct in6_addr * ip6_p, uint32_t ifscope)
5859 {
5860 struct in6_addr dst_ip;
5861 struct in6_ifaddr *ia6;
5862 bool ours = false;
5863
5864 if (in6_embedded_scope && IN6_IS_ADDR_LINKLOCAL(ip6_p)) {
5865 /* need to embed scope ID for comparison */
5866 bcopy(ip6_p, &dst_ip, sizeof(dst_ip));
5867 dst_ip.s6_addr16[1] = htons(ifscope);
5868 ip6_p = &dst_ip;
5869 }
5870 lck_rw_lock_shared(&in6_ifaddr_rwlock);
5871 TAILQ_FOREACH(ia6, IN6ADDR_HASH(ip6_p), ia6_hash) {
5872 if (in6_are_addr_equal_scoped(&ia6->ia_addr.sin6_addr, ip6_p,
5873 ia6->ia_addr.sin6_scope_id, ifscope)) {
5874 ours = true;
5875 break;
5876 }
5877 }
5878 lck_rw_done(&in6_ifaddr_rwlock);
5879 return ours;
5880 }
5881
5882 static bool
5883 ip_packet_info_dst_is_our_ip(ip_packet_info_t info_p, int index)
5884 {
5885 /* if the destination is our IP address, don't segment */
5886 bool our_ip = false;
5887
5888 if (info_p->ip_is_ipv4) {
5889 struct ip * hdr;
5890 struct in_addr dst_ip;
5891
5892 hdr = (struct ip *)(info_p->ip_hdr);
5893 bcopy(&hdr->ip_dst, &dst_ip, sizeof(dst_ip));
5894 our_ip = in_addr_is_ours(dst_ip);
5895 } else {
5896 struct ip6_hdr * hdr;
5897
5898 hdr = (struct ip6_hdr *)(info_p->ip_hdr);
5899 our_ip = in6_addr_is_ours(&hdr->ip6_dst, index);
5900 }
5901 return our_ip;
5902 }
5903
5904 typedef union {
5905 struct in_addr ip;
5906 struct in6_addr ip6;
5907 } ip_addr, *ip_addr_t;
5908
5909 static void
5910 ip_packet_info_copy_dst_ip_addr(ip_packet_info_t info_p, ip_addr_t ipaddr)
5911 {
5912 if (info_p->ip_is_ipv4) {
5913 struct ip * hdr;
5914
5915 hdr = (struct ip *)(info_p->ip_hdr);
5916 bcopy(&hdr->ip_dst, &ipaddr->ip, sizeof(ipaddr->ip));
5917 } else {
5918 struct ip6_hdr * hdr;
5919
5920 hdr = (struct ip6_hdr *)(info_p->ip_hdr);
5921 bcopy(&hdr->ip6_dst, &ipaddr->ip6, sizeof(ipaddr->ip6));
5922 }
5923 }
5924
5925 static bool
5926 ip_addr_are_equal(ip_addr_t addr1, ip_addr_t addr2, bool is_ipv4)
5927 {
5928 bool equal;
5929
5930 if (is_ipv4) {
5931 equal = addr1->ip.s_addr == addr2->ip.s_addr;
5932 } else {
5933 equal = IN6_ARE_ADDR_EQUAL(&addr1->ip6, &addr2->ip6);
5934 }
5935 return equal;
5936 }
5937
5938 static bool
5939 ip_addr_is_ours(ip_addr_t ipaddr, int index, bool is_ipv4)
5940 {
5941 bool our_ip;
5942
5943 if (is_ipv4) {
5944 our_ip = in_addr_is_ours(ipaddr->ip);
5945 } else {
5946 our_ip = in6_addr_is_ours(&ipaddr->ip6, index);
5947 }
5948 return our_ip;
5949 }
5950
5951 static void
5952 bridge_interface_input_list(ifnet_t bridge_ifp, ether_type_flag_t etypef,
5953 mblist list, bool bif_uses_virtio)
5954 {
5955 uint32_t in_errors = 0;
5956 bool is_ipv4;
5957 mblist in_list;
5958 ip_addr last_ip;
5959 bool last_ip_ours = false;
5960 bool last_ip_valid = false;
5961 u_int mac_hlen;
5962 bool may_forward = false;
5963 mbuf_t next_packet;
5964
5965 switch (etypef) {
5966 case ETHER_TYPE_FLAG_IPV4:
5967 is_ipv4 = true;
5968 may_forward = (ipforwarding != 0);
5969 break;
5970 case ETHER_TYPE_FLAG_IPV6:
5971 is_ipv4 = false;
5972 may_forward = (ip6_forwarding != 0);
5973 break;
5974 }
5975 if (!may_forward) {
5976 in_list = list;
5977 goto done;
5978 }
5979
5980 mblist_init(&in_list);
5981 mac_hlen = sizeof(struct ether_header);
5982 bzero(&last_ip, sizeof(last_ip));
5983 for (mbuf_ref_t scan = list.head; scan != NULL; scan = next_packet) {
5984 int error;
5985 ip_packet_info info;
5986 bool ip_ours;
5987 struct ifbrmstats stats; /* XXX should really be accounted */
5988 ip_addr this_ip;
5989
5990 /* take it out of the list */
5991 next_packet = scan->m_nextpkt;
5992 scan->m_nextpkt = NULL;
5993
5994 /* check for TCP packet and get IP header */
5995 error = bridge_get_tcp_header(&scan, mac_hlen, is_ipv4,
5996 &info, &stats.brms_in_ip);
5997 if (error != 0) {
5998 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5999 "%s bridge_get_tcp_header failed %d",
6000 bridge_ifp->if_xname, error);
6001 if (scan != NULL) {
6002 m_freem(scan);
6003 scan = NULL;
6004 }
6005 in_errors++;
6006 continue;
6007 }
6008 ip_packet_info_copy_dst_ip_addr(&info, &this_ip);
6009 if (last_ip_valid &&
6010 ip_addr_are_equal(&last_ip, &this_ip, is_ipv4)) {
6011 /* use cached result */
6012 ip_ours = last_ip_ours;
6013 } else {
6014 ip_ours = ip_addr_is_ours(&this_ip,
6015 bridge_ifp->if_index,
6016 is_ipv4);
6017 /* cache the result */
6018 last_ip_valid = true;
6019 last_ip_ours = ip_ours;
6020 last_ip = this_ip;
6021 }
6022
6023 /* if the packet is destined to us, just send it up */
6024 if (ip_ours) {
6025 mblist_append(&in_list, scan);
6026 continue;
6027 }
6028 /*
6029 * If this is a TCP packet that's marked for TSO or LRO, or
6030 * we think it's a large packet, segment it.
6031 */
6032 if (info.ip_proto_hdr != NULL &&
6033 ((bif_uses_virtio && _mbuf_get_tso_mss(scan) != 0) ||
6034 (!bif_uses_virtio &&
6035 (scan->m_pkthdr.rx_seg_cnt > 1 ||
6036 (mbuf_pkthdr_len(scan) >
6037 (bridge_ifp->if_mtu + ETHER_HDR_LEN)))))) {
6038 mblist seg;
6039
6040 seg = gso_tcp_with_info(bridge_ifp, scan, &info,
6041 mac_hlen, is_ipv4, false);
6042 if (seg.head == NULL) {
6043 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
6044 "gso_tcp returned no packets");
6045 in_errors++;
6046 continue;
6047 }
6048 if (seg.count > 1) {
6049 /* packet was segmented+checksummed */
6050 mblist_append_list(&in_list, seg);
6051 continue;
6052 }
6053 /* there's just one packet, no segmentation */
6054 scan = seg.head;
6055 }
6056 /* need checksum if it's marked for checksum offload */
6057 if (bif_uses_virtio &&
6058 (scan->m_pkthdr.csum_flags & checksum_request_flags) != 0) {
6059 error = bridge_offload_checksum(&scan, &info, &stats);
6060 if (error != 0) {
6061 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
6062 "%s bridge_offload_checksum failed %d",
6063 bridge_ifp->if_xname, error);
6064 if (scan != NULL) {
6065 m_freem(scan);
6066 scan = NULL;
6067 }
6068 in_errors++;
6069 continue;
6070 }
6071 }
6072 mblist_append(&in_list, scan);
6073 }
6074
6075 done:
6076 if (in_list.head != NULL) {
6077 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
6078 "%s packets %d bytes %d",
6079 bridge_ifp->if_xname,
6080 in_list.count, in_list.bytes);
6081 /* Mark the packets as arriving on the bridge interface */
6082 inject_input_packet_list(bridge_ifp, in_list.head, false);
6083 ifnet_stat_increment_in(bridge_ifp, in_list.count,
6084 in_list.bytes, in_errors);
6085 } else if (in_errors != 0) {
6086 ifnet_stat_increment_in(bridge_ifp, 0, 0, in_errors);
6087 }
6088 return;
6089 }
6090
6091 /*
6092 * bridge_broadcast:
6093 *
6094 * Send a frame to all interfaces that are members of
6095 * the bridge, except for the one on which the packet
6096 * arrived.
6097 *
6098 * NOTE: Releases the lock on return.
6099 */
6100 static void
6101 bridge_broadcast(struct bridge_softc *sc, struct bridge_iflist * sbif,
6102 ether_type_flag_t etypef, mbuf_t m)
6103 {
6104 ifnet_t bridge_ifp;
6105 struct bridge_iflist *dbif;
6106 struct ifnet * src_if;
6107 mbuf_ref_t mc;
6108 struct mbuf *mc_in;
6109 int error = 0, used = 0;
6110 ChecksumOperation cksum_op;
6111 struct mac_nat_record mnr;
6112 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
6113 boolean_t translate_mac = FALSE;
6114 uint32_t sc_filter_flags;
6115 bool is_bcast_mcast;
6116
6117 bridge_ifp = sc->sc_ifp;
6118 if (sbif != NULL) {
6119 src_if = sbif->bif_ifp;
6120 cksum_op = CHECKSUM_OPERATION_CLEAR_OFFLOAD;
6121 if (mac_nat_bif != NULL && sbif != mac_nat_bif) {
6122 /* get the translation record */
6123 translate_mac
6124 = bridge_mac_nat_output(sc, sbif, &m, &mnr);
6125 if (m == NULL) {
6126 /* packet was deallocated */
6127 BRIDGE_UNLOCK(sc);
6128 return;
6129 }
6130 }
6131 } else {
6132 /*
6133 * sbif is NULL when the bridge interface calls
6134 * bridge_broadcast().
6135 */
6136 cksum_op = CHECKSUM_OPERATION_FINALIZE;
6137 src_if = NULL;
6138 }
6139
6140 BRIDGE_LOCK2REF(sc, error);
6141 if (error) {
6142 m_freem(m);
6143 return;
6144 }
6145 is_bcast_mcast = IS_BCAST_MCAST(m);
6146 sc_filter_flags = sc->sc_filter_flags;
6147 TAILQ_FOREACH(dbif, &sc->sc_iflist, bif_next) {
6148 ifnet_t dst_if;
6149
6150 dst_if = dbif->bif_ifp;
6151 if (dst_if == src_if) {
6152 /* skip the interface that the packet came in on */
6153 continue;
6154 }
6155
6156 /* Private segments can not talk to each other */
6157 if (sbif != NULL &&
6158 (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE)) {
6159 continue;
6160 }
6161
6162 if ((dbif->bif_ifflags & IFBIF_STP) &&
6163 dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6164 continue;
6165 }
6166
6167 if ((dbif->bif_ifflags & IFBIF_DISCOVER) == 0 &&
6168 !is_bcast_mcast) {
6169 continue;
6170 }
6171
6172 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6173 continue;
6174 }
6175
6176 if ((dbif->bif_flags & BIFF_MEDIA_ACTIVE) == 0) {
6177 continue;
6178 }
6179
6180 if (TAILQ_NEXT(dbif, bif_next) == NULL) {
6181 mc = m;
6182 used = 1;
6183 } else {
6184 mc = m_dup(m, M_DONTWAIT);
6185 if (mc == NULL) {
6186 (void) ifnet_stat_increment_out(bridge_ifp,
6187 0, 0, 1);
6188 continue;
6189 }
6190 }
6191
6192 /*
6193 * If broadcast input is enabled, do so only if this
6194 * is an input packet.
6195 */
6196 if (sbif != NULL && is_bcast_mcast &&
6197 (dbif->bif_flags & BIFF_INPUT_BROADCAST) != 0) {
6198 mc_in = m_dup(mc, M_DONTWAIT);
6199 /* this could fail, but we continue anyways */
6200 } else {
6201 mc_in = NULL;
6202 }
6203
6204 /* out */
6205 if (translate_mac && mac_nat_bif == dbif) {
6206 /* translate the packet */
6207 bridge_mac_nat_translate(&mc, &mnr, IF_LLADDR(dst_if));
6208 }
6209
6210 if (mc != NULL && sbif != NULL &&
6211 PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6212 if (used == 0) {
6213 /* Keep the layer3 header aligned */
6214 int i = min(mc->m_pkthdr.len, max_protohdr);
6215 mc = m_copyup(mc, i, ETHER_ALIGN);
6216 if (mc == NULL) {
6217 (void) ifnet_stat_increment_out(
6218 sc->sc_ifp, 0, 0, 1);
6219 if (mc_in != NULL) {
6220 m_freem(mc_in);
6221 mc_in = NULL;
6222 }
6223 continue;
6224 }
6225 }
6226 if (bridge_pf(&mc, dst_if, sc_filter_flags, false) != 0) {
6227 if (mc_in != NULL) {
6228 m_freem(mc_in);
6229 mc_in = NULL;
6230 }
6231 continue;
6232 }
6233 if (mc == NULL) {
6234 if (mc_in != NULL) {
6235 m_freem(mc_in);
6236 mc_in = NULL;
6237 }
6238 continue;
6239 }
6240 }
6241
6242 if (mc != NULL) {
6243 /* verify checksum if necessary */
6244 if (bif_has_checksum_offload(dbif) && sbif != NULL &&
6245 !bif_has_checksum_offload(sbif)) {
6246 error = bridge_verify_checksum(&mc,
6247 &dbif->bif_stats);
6248 if (error != 0) {
6249 if (mc != NULL) {
6250 m_freem(mc);
6251 }
6252 mc = NULL;
6253 }
6254 }
6255 if (mc != NULL) {
6256 (void) bridge_enqueue(bridge_ifp,
6257 NULL, dst_if, etypef, mc, cksum_op,
6258 pkt_direction_TX);
6259 }
6260 }
6261
6262 /* in */
6263 if (mc_in == NULL) {
6264 continue;
6265 }
6266 BRIDGE_BPF_TAP_IN(dst_if, mc_in);
6267 prepare_input_packet(dst_if, mc_in);
6268 mc_in->m_flags |= M_PROTO1; /* set to avoid loops */
6269 dlil_input_packet_list(dst_if, mc_in);
6270 }
6271 if (used == 0) {
6272 m_freem(m);
6273 }
6274
6275
6276 BRIDGE_UNREF(sc);
6277 }
6278
6279 static mbuf_t
6280 copy_packet_list(mbuf_t m)
6281 {
6282 mblist ret;
6283 mbuf_t next_packet;
6284
6285 mblist_init(&ret);
6286 for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
6287 mbuf_t copy_m;
6288
6289 /* take it out of the list */
6290 next_packet = scan->m_nextpkt;
6291 scan->m_nextpkt = NULL;
6292
6293 /* create a copy and add it to the new list */
6294 copy_m = m_dup(scan, M_DONTWAIT);
6295 if (copy_m != NULL) {
6296 mblist_append(&ret, copy_m);
6297 }
6298
6299 /* put it back in the original list */
6300 scan->m_nextpkt = next_packet;
6301 }
6302 return ret.head;
6303 }
6304
6305 /*
6306 * bridge_broadcast_list:
6307 *
6308 * Broadcast a list of packets to all members except `sbif`.
6309 * Consumes `m` before returning.
6310 *
6311 * NOTE: Releases the lock on return.
6312 */
6313 static void
6314 bridge_broadcast_list(struct bridge_softc *sc, struct bridge_iflist * sbif,
6315 ether_type_flag_t etypef, mbuf_t m, pkt_direction_t direction)
6316 {
6317 ifnet_t bridge_ifp;
6318 bool bridge_needs_input;
6319 struct bridge_iflist * dbif;
6320 bool is_bcast_mcast;
6321 errno_t error = 0;
6322 ChecksumOperation cksum_op;
6323 struct bridge_iflist * mac_nat_bif = sc->sc_mac_nat_bif;
6324 ifnet_t mac_nat_if = NULL;
6325 bool need_mac_nat = false;
6326 mbuf_t out_mac_nat = NULL;
6327 ifnet_t src_if;
6328 uint32_t sc_filter_flags;
6329 bool used = false;
6330
6331 bridge_ifp = sc->sc_ifp;
6332 if (sbif != NULL) {
6333 src_if = sbif->bif_ifp;
6334
6335 if (ether_type_flag_is_ip(etypef) && bif_uses_virtio(sbif)) {
6336 bool is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
6337
6338 /* compute checksum on packets marked with offload */
6339 m = bridge_checksum_offload_list(bridge_ifp, sbif,
6340 m, is_ipv4);
6341 if (m == NULL) {
6342 BRIDGE_UNLOCK(sc);
6343 goto done;
6344 }
6345 cksum_op = CHECKSUM_OPERATION_NONE;
6346 } else {
6347 cksum_op = CHECKSUM_OPERATION_CLEAR_OFFLOAD;
6348 }
6349
6350 /*
6351 * If MAC-NAT is enabled and we'll be sending the packets
6352 * over it, verify that it is up and active before
6353 * deciding to make a translated copy.
6354 */
6355 if (mac_nat_bif != NULL && sbif != mac_nat_bif) {
6356 mac_nat_if = mac_nat_bif->bif_ifp;
6357 if ((mac_nat_if->if_flags & IFF_RUNNING) != 0 &&
6358 (mac_nat_bif->bif_flags & BIFF_MEDIA_ACTIVE) != 0) {
6359 need_mac_nat = true;
6360 }
6361 }
6362 } else {
6363 /*
6364 * sbif is NULL when the bridge interface calls
6365 * bridge_broadcast_list() (TBD).
6366 */
6367 cksum_op = CHECKSUM_OPERATION_FINALIZE;
6368 src_if = NULL;
6369 }
6370
6371 /*
6372 * Create a translated copy for packets destined to MAC-NAT interface.
6373 */
6374 if (need_mac_nat) {
6375 out_mac_nat
6376 = bridge_mac_nat_copy_and_translate_list(sc, sbif,
6377 mac_nat_if, m);
6378 }
6379 sc_filter_flags = sc->sc_filter_flags;
6380 bridge_needs_input = (sc->sc_flags & SCF_PROTO_ATTACHED) != 0;
6381 BRIDGE_LOCK2REF(sc, error);
6382 if (error) {
6383 goto done;
6384 }
6385 is_bcast_mcast = IS_BCAST_MCAST(m);
6386
6387 /* make a copy for the bridge interface */
6388 if (sbif != NULL && is_bcast_mcast && bridge_needs_input) {
6389 mbuf_t in_list;
6390
6391 in_list = copy_packet_list(m);
6392 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MCAST,
6393 "%s mcast for us in_m %p",
6394 bridge_ifp->if_xname, in_list);
6395 if (in_list != NULL) {
6396 inject_input_packet_list(bridge_ifp, in_list, false);
6397 }
6398 }
6399
6400 TAILQ_FOREACH(dbif, &sc->sc_iflist, bif_next) {
6401 ifnet_t dst_if;
6402 mbuf_t in_m = NULL;
6403 mbuf_t out_m = NULL;
6404
6405 dst_if = dbif->bif_ifp;
6406 if (dst_if == src_if) {
6407 /* skip the interface that the packet came in on */
6408 continue;
6409 }
6410
6411 /* Private segments can not talk to each other */
6412 if (sbif != NULL &&
6413 (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE)) {
6414 continue;
6415 }
6416
6417 if ((dbif->bif_ifflags & IFBIF_STP) &&
6418 dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6419 continue;
6420 }
6421
6422 if ((dbif->bif_ifflags & IFBIF_DISCOVER) == 0 &&
6423 !is_bcast_mcast) {
6424 continue;
6425 }
6426
6427 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6428 continue;
6429 }
6430
6431 if ((dbif->bif_flags & BIFF_MEDIA_ACTIVE) == 0) {
6432 continue;
6433 }
6434 if (dbif == mac_nat_bif) {
6435 /* translated copy was created above, use that */
6436 out_m = out_mac_nat;
6437 out_mac_nat = NULL;
6438 } else if (TAILQ_NEXT(dbif, bif_next) == NULL) {
6439 /* consume `m` */
6440 out_m = m;
6441 used = true;
6442 } else {
6443 /* needs a copy */
6444 out_m = copy_packet_list(m);
6445 }
6446
6447 if (out_m == NULL) {
6448 ifnet_stat_increment_out(bridge_ifp, 0, 0, 1);
6449 continue;
6450 }
6451 /*
6452 * If broadcast input is enabled, do so only if this
6453 * is an input packet.
6454 */
6455 if (sbif != NULL && is_bcast_mcast &&
6456 (dbif->bif_flags & BIFF_INPUT_BROADCAST) != 0) {
6457 in_m = copy_packet_list(m);
6458 /* this could fail, but we continue anyways */
6459 } else {
6460 in_m = NULL;
6461 }
6462
6463 if (sbif != NULL &&
6464 PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6465 out_m = bridge_pf_list(out_m, dst_if,
6466 sc_filter_flags, false);
6467 }
6468 if (out_m != NULL) {
6469 /* verify checksum if necessary */
6470 if (sbif != NULL &&
6471 ether_type_flag_is_ip(etypef) &&
6472 bif_has_checksum_offload(dbif) &&
6473 !bif_has_checksum_offload(sbif)) {
6474 bool is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
6475
6476 out_m = bridge_verify_checksum_list(bridge_ifp,
6477 dbif, out_m, is_ipv4);
6478 }
6479 if (out_m != NULL) {
6480 bridge_enqueue(bridge_ifp, src_if, dst_if,
6481 etypef, out_m, cksum_op, direction);
6482 }
6483 }
6484
6485 /* in */
6486 if (in_m != NULL) {
6487 inject_input_packet_list(dst_if, in_m, true);
6488 }
6489 }
6490
6491 BRIDGE_UNREF(sc);
6492
6493 done:
6494 if (out_mac_nat != NULL) {
6495 m_freem_list(out_mac_nat);
6496 }
6497 if (!used) {
6498 m_freem_list(m);
6499 }
6500 return;
6501 }
6502
6503 #define NEEDED_CSUM_IPV4 (IF_HWASSIST_CSUM_UDP | IF_HWASSIST_CSUM_TCP)
6504 #define NEEDED_CSUM_IPV6 (IF_HWASSIST_CSUM_UDPIPV6 | IF_HWASSIST_CSUM_TCPIPV6)
6505
6506 static bool
6507 interface_supports_hw_checksum(ifnet_t ifp, bool is_ipv4)
6508 {
6509 uint32_t hwcap = IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
6510 uint32_t needed = is_ipv4 ? NEEDED_CSUM_IPV4 : NEEDED_CSUM_IPV6;
6511 bool supports;
6512
6513 supports = (hwcap & needed) == needed;
6514 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM, "%s: does %ssupport checksum",
6515 ifp->if_xname, supports ? "" : "not ");
6516 return supports;
6517 }
6518
6519 static void
6520 bridge_forward_list(struct bridge_softc *sc, struct bridge_iflist * sbif,
6521 ifnet_t dst_if, ether_type_flag_t etypef, mbuf_t m)
6522 {
6523 bool checksum_ok = false;
6524 ChecksumOperation cksum_op;
6525 ifnet_t bridge_ifp = NULL;
6526 struct bridge_iflist * dbif;
6527 uint32_t sc_filter_flags;
6528 ifnet_t src_if;
6529 drop_reason_t drop_reason = DROP_REASON_BRIDGE_UNSPECIFIED;
6530
6531 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6532 drop_reason = DROP_REASON_BRIDGE_NOT_RUNNING;
6533 goto drop;
6534 }
6535 dbif = bridge_lookup_member_if(sc, dst_if);
6536 if (dbif == NULL) {
6537 /* Not a member of the bridge (anymore?) */
6538 drop_reason = DROP_REASON_BRIDGE_NOT_A_MEMBER;
6539 goto drop;
6540 }
6541
6542 /* Private segments can not talk to each other */
6543 if ((sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE) != 0) {
6544 drop_reason = DROP_REASON_BRIDGE_PRIVATE_SEGMENT;
6545 goto drop;
6546 }
6547 bridge_ifp = sc->sc_ifp;
6548 src_if = sbif->bif_ifp;
6549 cksum_op = CHECKSUM_OPERATION_CLEAR_OFFLOAD;
6550 if (ether_type_flag_is_ip(etypef) && bif_uses_virtio(sbif)) {
6551 bool is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
6552
6553 if (dbif == sc->sc_mac_nat_bif ||
6554 (IFNET_IS_VMNET(dst_if) && !bif_uses_virtio(dbif)) ||
6555 !interface_supports_hw_checksum(dst_if, is_ipv4)) {
6556 /* compute checksums now if necessary */
6557 m = bridge_checksum_offload_list(bridge_ifp, sbif,
6558 m, is_ipv4);
6559 checksum_ok = true;
6560 } else {
6561 cksum_op = CHECKSUM_OPERATION_NONE;
6562 }
6563 }
6564
6565 if (dbif == sc->sc_mac_nat_bif) {
6566 /* translate the packets before forwarding them */
6567 if ((etypef & ETHER_TYPE_FLAG_IP_ARP) != 0) {
6568 m = bridge_mac_nat_translate_list(sc, sbif, dst_if, m);
6569 }
6570 } else if (!checksum_ok && ether_type_flag_is_ip(etypef) &&
6571 bif_has_checksum_offload(dbif) && !bif_has_checksum_offload(sbif)) {
6572 bool is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
6573
6574 /*
6575 * If the destination interface has checksum offload enabled,
6576 * verify the checksum now, unless the source interface also has
6577 * checksum offload enabled. The checksum in that case has
6578 * already just been computed and verifying it is unnecessary.
6579 */
6580 m = bridge_verify_checksum_list(bridge_ifp, dbif, m, is_ipv4);
6581 }
6582 sc_filter_flags = sc->sc_filter_flags;
6583 BRIDGE_UNLOCK(sc);
6584 if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6585 m = bridge_pf_list(m, dst_if, sc_filter_flags, false);
6586 }
6587
6588 /*
6589 * We're forwarding inbound packets for which the checksums must
6590 * already have been computed and if required, verified, or
6591 * packets from a virtio-enabled interface for which we rely
6592 * on the packet containing appropriate offload flags.
6593 */
6594 if (m != NULL) {
6595 bridge_enqueue(bridge_ifp, src_if, dst_if, etypef, m,
6596 cksum_op, pkt_direction_RX);
6597 }
6598 return;
6599
6600 drop:
6601 BRIDGE_UNLOCK(sc);
6602 m_drop_list(m, bridge_ifp, DROPTAP_FLAG_DIR_IN, drop_reason, NULL, 0);
6603 return;
6604 }
6605
6606 /*
6607 * bridge_span:
6608 *
6609 * Duplicate a packet out one or more interfaces that are in span mode,
6610 * the original mbuf is unmodified.
6611 */
6612 static void
6613 bridge_span(struct bridge_softc *sc, ether_type_flag_t etypef, struct mbuf *m)
6614 {
6615 struct bridge_iflist *bif;
6616 struct ifnet *dst_if;
6617 struct mbuf *mc;
6618
6619 if (TAILQ_EMPTY(&sc->sc_spanlist)) {
6620 return;
6621 }
6622
6623 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
6624 dst_if = bif->bif_ifp;
6625
6626 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6627 continue;
6628 }
6629
6630 mc = m_copypacket(m, M_DONTWAIT);
6631 if (mc == NULL) {
6632 (void) ifnet_stat_increment_out(sc->sc_ifp, 0, 0, 1);
6633 continue;
6634 }
6635
6636 (void) bridge_enqueue(sc->sc_ifp, NULL, dst_if, etypef, mc,
6637 CHECKSUM_OPERATION_NONE, pkt_direction_TX);
6638 }
6639 }
6640
6641 /*
6642 * bridge_rtupdate:
6643 *
6644 * Add a bridge routing entry.
6645 */
6646 static int
6647 bridge_rtupdate(struct bridge_softc *sc, const uint8_t dst[ETHER_ADDR_LEN], uint16_t vlan,
6648 struct bridge_iflist *bif, int setflags, uint8_t flags)
6649 {
6650 struct bridge_rtnode *brt;
6651 int error;
6652
6653 BRIDGE_LOCK_ASSERT_HELD(sc);
6654
6655 /* Check the source address is valid and not multicast. */
6656 if (ETHER_IS_MULTICAST(dst) ||
6657 (dst[0] == 0 && dst[1] == 0 && dst[2] == 0 &&
6658 dst[3] == 0 && dst[4] == 0 && dst[5] == 0) != 0) {
6659 return EINVAL;
6660 }
6661
6662 /* 802.1p frames map to vlan 1 */
6663 if (vlan == 0) {
6664 vlan = 1;
6665 }
6666
6667 /*
6668 * A route for this destination might already exist. If so,
6669 * update it, otherwise create a new one.
6670 */
6671 if ((brt = bridge_rtnode_lookup(sc, dst, vlan)) == NULL) {
6672 if (sc->sc_brtcnt >= sc->sc_brtmax) {
6673 sc->sc_brtexceeded++;
6674 return ENOSPC;
6675 }
6676 /* Check per interface address limits (if enabled) */
6677 if (bif->bif_addrmax && bif->bif_addrcnt >= bif->bif_addrmax) {
6678 bif->bif_addrexceeded++;
6679 return ENOSPC;
6680 }
6681
6682 /*
6683 * Allocate a new bridge forwarding node, and
6684 * initialize the expiration time and Ethernet
6685 * address.
6686 */
6687 brt = zalloc_noblock(bridge_rtnode_pool);
6688 if (brt == NULL) {
6689 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6690 "zalloc_nolock failed");
6691 return ENOMEM;
6692 }
6693 bzero(brt, sizeof(struct bridge_rtnode));
6694
6695 if (bif->bif_ifflags & IFBIF_STICKY) {
6696 brt->brt_flags = IFBAF_STICKY;
6697 } else {
6698 brt->brt_flags = IFBAF_DYNAMIC;
6699 }
6700
6701 memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
6702 brt->brt_vlan = vlan;
6703
6704 if ((error = bridge_rtnode_insert(sc, brt)) != 0) {
6705 zfree(bridge_rtnode_pool, brt);
6706 return error;
6707 }
6708 brt->brt_dst = bif;
6709 bif->bif_addrcnt++;
6710 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6711 "added %02x:%02x:%02x:%02x:%02x:%02x "
6712 "on %s count %u hashsize %u",
6713 dst[0], dst[1], dst[2], dst[3], dst[4], dst[5],
6714 sc->sc_ifp->if_xname, sc->sc_brtcnt,
6715 sc->sc_rthash_size);
6716 }
6717
6718 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
6719 brt->brt_dst != bif) {
6720 brt->brt_dst->bif_addrcnt--;
6721 brt->brt_dst = bif;
6722 brt->brt_dst->bif_addrcnt++;
6723 }
6724
6725 if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6726 unsigned long now;
6727
6728 now = (unsigned long) net_uptime();
6729 brt->brt_expire = now + sc->sc_brttimeout;
6730 }
6731 if (setflags) {
6732 brt->brt_flags = flags;
6733 }
6734
6735 return 0;
6736 }
6737
6738 /*
6739 * bridge_rtlookup:
6740 *
6741 * Lookup the destination interface for an address.
6742 */
6743 static struct bridge_iflist *
6744 bridge_rtlookup_bif(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN],
6745 uint16_t vlan)
6746 {
6747 struct bridge_rtnode *brt;
6748
6749 BRIDGE_LOCK_ASSERT_HELD(sc);
6750
6751 if ((brt = bridge_rtnode_lookup(sc, addr, vlan)) == NULL) {
6752 return NULL;
6753 }
6754
6755 return brt->brt_dst;
6756 }
6757
6758 /*
6759 * bridge_rttrim:
6760 *
6761 * Trim the routine table so that we have a number
6762 * of routing entries less than or equal to the
6763 * maximum number.
6764 */
6765 static void
6766 bridge_rttrim(struct bridge_softc *sc)
6767 {
6768 struct bridge_rtnode *brt, *nbrt;
6769
6770 BRIDGE_LOCK_ASSERT_HELD(sc);
6771
6772 /* Make sure we actually need to do this. */
6773 if (sc->sc_brtcnt <= sc->sc_brtmax) {
6774 return;
6775 }
6776
6777 /* Force an aging cycle; this might trim enough addresses. */
6778 bridge_rtage(sc);
6779 if (sc->sc_brtcnt <= sc->sc_brtmax) {
6780 return;
6781 }
6782
6783 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6784 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6785 bridge_rtnode_destroy(sc, brt);
6786 if (sc->sc_brtcnt <= sc->sc_brtmax) {
6787 return;
6788 }
6789 }
6790 }
6791 }
6792
6793 /*
6794 * bridge_aging_timer:
6795 *
6796 * Aging periodic timer for the bridge routing table.
6797 */
6798 static void
6799 bridge_aging_timer(struct bridge_softc *sc)
6800 {
6801 BRIDGE_LOCK_ASSERT_HELD(sc);
6802
6803 bridge_rtage(sc);
6804 if ((sc->sc_ifp->if_flags & IFF_RUNNING) &&
6805 (sc->sc_flags & SCF_DETACHING) == 0) {
6806 sc->sc_aging_timer.bdc_sc = sc;
6807 sc->sc_aging_timer.bdc_func = bridge_aging_timer;
6808 sc->sc_aging_timer.bdc_ts.tv_sec = bridge_rtable_prune_period;
6809 bridge_schedule_delayed_call(&sc->sc_aging_timer);
6810 }
6811 }
6812
6813 /*
6814 * bridge_rtage:
6815 *
6816 * Perform an aging cycle.
6817 */
6818 static void
6819 bridge_rtage(struct bridge_softc *sc)
6820 {
6821 struct bridge_rtnode *brt, *nbrt;
6822 unsigned long now;
6823
6824 BRIDGE_LOCK_ASSERT_HELD(sc);
6825
6826 now = (unsigned long) net_uptime();
6827
6828 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6829 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6830 if (now >= brt->brt_expire) {
6831 bridge_rtnode_destroy(sc, brt);
6832 }
6833 }
6834 }
6835 if (sc->sc_mac_nat_bif != NULL) {
6836 bridge_mac_nat_age_entries(sc, now);
6837 }
6838 }
6839
6840 /*
6841 * bridge_rtflush:
6842 *
6843 * Remove all dynamic addresses from the bridge.
6844 */
6845 static void
6846 bridge_rtflush(struct bridge_softc *sc, int full)
6847 {
6848 struct bridge_rtnode *brt, *nbrt;
6849
6850 BRIDGE_LOCK_ASSERT_HELD(sc);
6851
6852 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6853 if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6854 bridge_rtnode_destroy(sc, brt);
6855 }
6856 }
6857 }
6858
6859 /*
6860 * bridge_rtdaddr:
6861 *
6862 * Remove an address from the table.
6863 */
6864 static int
6865 bridge_rtdaddr(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN], uint16_t vlan)
6866 {
6867 struct bridge_rtnode *brt;
6868 int found = 0;
6869
6870 BRIDGE_LOCK_ASSERT_HELD(sc);
6871
6872 /*
6873 * If vlan is zero then we want to delete for all vlans so the lookup
6874 * may return more than one.
6875 */
6876 while ((brt = bridge_rtnode_lookup(sc, addr, vlan)) != NULL) {
6877 bridge_rtnode_destroy(sc, brt);
6878 found = 1;
6879 }
6880
6881 return found ? 0 : ENOENT;
6882 }
6883
6884 /*
6885 * bridge_rtdelete:
6886 *
6887 * Delete routes to a specific member interface.
6888 */
6889 static void
6890 bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int full)
6891 {
6892 struct bridge_rtnode *brt, *nbrt;
6893
6894 BRIDGE_LOCK_ASSERT_HELD(sc);
6895
6896 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6897 if (brt->brt_ifp == ifp && (full ||
6898 (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)) {
6899 bridge_rtnode_destroy(sc, brt);
6900 }
6901 }
6902 }
6903
6904 /*
6905 * bridge_rtable_init:
6906 *
6907 * Initialize the route table for this bridge.
6908 */
6909 static int
6910 bridge_rtable_init(struct bridge_softc *sc)
6911 {
6912 u_int32_t i;
6913
6914 sc->sc_rthash = kalloc_type(struct _bridge_rtnode_list,
6915 BRIDGE_RTHASH_SIZE, Z_WAITOK_ZERO_NOFAIL);
6916 sc->sc_rthash_size = BRIDGE_RTHASH_SIZE;
6917
6918 for (i = 0; i < sc->sc_rthash_size; i++) {
6919 LIST_INIT(&sc->sc_rthash[i]);
6920 }
6921
6922 sc->sc_rthash_key = RandomULong();
6923
6924 LIST_INIT(&sc->sc_rtlist);
6925
6926 return 0;
6927 }
6928
6929 /*
6930 * bridge_rthash_delayed_resize:
6931 *
6932 * Resize the routing table hash on a delayed thread call.
6933 */
6934 static void
6935 bridge_rthash_delayed_resize(struct bridge_softc *sc)
6936 {
6937 u_int32_t new_rthash_size = 0;
6938 u_int32_t old_rthash_size = 0;
6939 struct _bridge_rtnode_list *new_rthash = NULL;
6940 struct _bridge_rtnode_list *old_rthash = NULL;
6941 u_int32_t i;
6942 struct bridge_rtnode *brt;
6943 int error = 0;
6944
6945 BRIDGE_LOCK_ASSERT_HELD(sc);
6946
6947 /*
6948 * Four entries per hash bucket is our ideal load factor
6949 */
6950 if (sc->sc_brtcnt < sc->sc_rthash_size * 4) {
6951 goto out;
6952 }
6953
6954 /*
6955 * Doubling the number of hash buckets may be too simplistic
6956 * especially when facing a spike of new entries
6957 */
6958 new_rthash_size = sc->sc_rthash_size * 2;
6959
6960 sc->sc_flags |= SCF_RESIZING;
6961 BRIDGE_UNLOCK(sc);
6962
6963 new_rthash = kalloc_type(struct _bridge_rtnode_list, new_rthash_size,
6964 Z_WAITOK | Z_ZERO);
6965
6966 BRIDGE_LOCK(sc);
6967 sc->sc_flags &= ~SCF_RESIZING;
6968
6969 if (new_rthash == NULL) {
6970 error = ENOMEM;
6971 goto out;
6972 }
6973 if ((sc->sc_flags & SCF_DETACHING)) {
6974 error = ENODEV;
6975 goto out;
6976 }
6977 /*
6978 * Fail safe from here on
6979 */
6980 old_rthash = sc->sc_rthash;
6981 old_rthash_size = sc->sc_rthash_size;
6982 sc->sc_rthash = new_rthash;
6983 sc->sc_rthash_size = new_rthash_size;
6984
6985 /*
6986 * Get a new key to force entries to be shuffled around to reduce
6987 * the likelihood they will land in the same buckets
6988 */
6989 sc->sc_rthash_key = RandomULong();
6990
6991 for (i = 0; i < sc->sc_rthash_size; i++) {
6992 LIST_INIT(&sc->sc_rthash[i]);
6993 }
6994
6995 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
6996 LIST_REMOVE(brt, brt_hash);
6997 (void) bridge_rtnode_hash(sc, brt);
6998 }
6999 out:
7000 if (error == 0) {
7001 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
7002 "%s new size %u",
7003 sc->sc_ifp->if_xname, sc->sc_rthash_size);
7004 kfree_type(struct _bridge_rtnode_list, old_rthash_size, old_rthash);
7005 } else {
7006 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_RT_TABLE,
7007 "%s failed %d", sc->sc_ifp->if_xname, error);
7008 kfree_type(struct _bridge_rtnode_list, new_rthash_size, new_rthash);
7009 }
7010 }
7011
7012 /*
7013 * Resize the number of hash buckets based on the load factor
7014 * Currently only grow
7015 * Failing to resize the hash table is not fatal
7016 */
7017 static void
7018 bridge_rthash_resize(struct bridge_softc *sc)
7019 {
7020 BRIDGE_LOCK_ASSERT_HELD(sc);
7021
7022 if ((sc->sc_flags & SCF_DETACHING) || (sc->sc_flags & SCF_RESIZING)) {
7023 return;
7024 }
7025
7026 /*
7027 * Four entries per hash bucket is our ideal load factor
7028 */
7029 if (sc->sc_brtcnt < sc->sc_rthash_size * 4) {
7030 return;
7031 }
7032 /*
7033 * Hard limit on the size of the routing hash table
7034 */
7035 if (sc->sc_rthash_size >= bridge_rtable_hash_size_max) {
7036 return;
7037 }
7038
7039 sc->sc_resize_call.bdc_sc = sc;
7040 sc->sc_resize_call.bdc_func = bridge_rthash_delayed_resize;
7041 bridge_schedule_delayed_call(&sc->sc_resize_call);
7042 }
7043
7044 /*
7045 * bridge_rtable_fini:
7046 *
7047 * Deconstruct the route table for this bridge.
7048 */
7049 static void
7050 bridge_rtable_fini(struct bridge_softc *sc)
7051 {
7052 KASSERT(sc->sc_brtcnt == 0,
7053 ("%s: %d bridge routes referenced", __func__, sc->sc_brtcnt));
7054 kfree_type_counted_by(struct _bridge_rtnode_list, sc->sc_rthash_size,
7055 sc->sc_rthash);
7056 sc->sc_rthash = NULL;
7057 sc->sc_rthash_size = 0;
7058 }
7059
7060 /*
7061 * The following hash function is adapted from "Hash Functions" by Bob Jenkins
7062 * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
7063 */
7064 #define mix(a, b, c) \
7065 do { \
7066 a -= b; a -= c; a ^= (c >> 13); \
7067 b -= c; b -= a; b ^= (a << 8); \
7068 c -= a; c -= b; c ^= (b >> 13); \
7069 a -= b; a -= c; a ^= (c >> 12); \
7070 b -= c; b -= a; b ^= (a << 16); \
7071 c -= a; c -= b; c ^= (b >> 5); \
7072 a -= b; a -= c; a ^= (c >> 3); \
7073 b -= c; b -= a; b ^= (a << 10); \
7074 c -= a; c -= b; c ^= (b >> 15); \
7075 } while ( /*CONSTCOND*/ 0)
7076
7077 static __inline uint32_t
7078 bridge_rthash(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN])
7079 {
7080 uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
7081
7082 b += addr[5] << 8;
7083 b += addr[4];
7084 a += addr[3] << 24;
7085 a += addr[2] << 16;
7086 a += addr[1] << 8;
7087 a += addr[0];
7088
7089 mix(a, b, c);
7090
7091 return c & BRIDGE_RTHASH_MASK(sc);
7092 }
7093
7094 #undef mix
7095
7096 static int
7097 bridge_rtnode_addr_cmp(const uint8_t a[ETHER_ADDR_LEN], const uint8_t b[ETHER_ADDR_LEN])
7098 {
7099 int i, d;
7100
7101 for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) {
7102 d = ((int)a[i]) - ((int)b[i]);
7103 }
7104
7105 return d;
7106 }
7107
7108 /*
7109 * bridge_rtnode_lookup:
7110 *
7111 * Look up a bridge route node for the specified destination. Compare the
7112 * vlan id or if zero then just return the first match.
7113 */
7114 static struct bridge_rtnode *
7115 bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN],
7116 uint16_t vlan)
7117 {
7118 struct bridge_rtnode *brt;
7119 uint32_t hash;
7120 int dir;
7121
7122 BRIDGE_LOCK_ASSERT_HELD(sc);
7123
7124 hash = bridge_rthash(sc, addr);
7125 LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) {
7126 dir = bridge_rtnode_addr_cmp(addr, brt->brt_addr);
7127 if (dir == 0 && (brt->brt_vlan == vlan || vlan == 0)) {
7128 return brt;
7129 }
7130 if (dir > 0) {
7131 return NULL;
7132 }
7133 }
7134
7135 return NULL;
7136 }
7137
7138 /*
7139 * bridge_rtnode_hash:
7140 *
7141 * Insert the specified bridge node into the route hash table.
7142 * This is used when adding a new node or to rehash when resizing
7143 * the hash table
7144 */
7145 static int
7146 bridge_rtnode_hash(struct bridge_softc *sc, struct bridge_rtnode *brt)
7147 {
7148 struct bridge_rtnode *lbrt;
7149 uint32_t hash;
7150 int dir;
7151
7152 BRIDGE_LOCK_ASSERT_HELD(sc);
7153
7154 hash = bridge_rthash(sc, brt->brt_addr);
7155
7156 lbrt = LIST_FIRST(&sc->sc_rthash[hash]);
7157 if (lbrt == NULL) {
7158 LIST_INSERT_HEAD(&sc->sc_rthash[hash], brt, brt_hash);
7159 goto out;
7160 }
7161
7162 do {
7163 dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr);
7164 if (dir == 0 && brt->brt_vlan == lbrt->brt_vlan) {
7165 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
7166 "%s EEXIST %02x:%02x:%02x:%02x:%02x:%02x",
7167 sc->sc_ifp->if_xname,
7168 brt->brt_addr[0], brt->brt_addr[1],
7169 brt->brt_addr[2], brt->brt_addr[3],
7170 brt->brt_addr[4], brt->brt_addr[5]);
7171 return EEXIST;
7172 }
7173 if (dir > 0) {
7174 LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
7175 goto out;
7176 }
7177 if (LIST_NEXT(lbrt, brt_hash) == NULL) {
7178 LIST_INSERT_AFTER(lbrt, brt, brt_hash);
7179 goto out;
7180 }
7181 lbrt = LIST_NEXT(lbrt, brt_hash);
7182 } while (lbrt != NULL);
7183
7184 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
7185 "%s impossible %02x:%02x:%02x:%02x:%02x:%02x",
7186 sc->sc_ifp->if_xname,
7187 brt->brt_addr[0], brt->brt_addr[1], brt->brt_addr[2],
7188 brt->brt_addr[3], brt->brt_addr[4], brt->brt_addr[5]);
7189 out:
7190 return 0;
7191 }
7192
7193 /*
7194 * bridge_rtnode_insert:
7195 *
7196 * Insert the specified bridge node into the route table. We
7197 * assume the entry is not already in the table.
7198 */
7199 static int
7200 bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
7201 {
7202 int error;
7203
7204 error = bridge_rtnode_hash(sc, brt);
7205 if (error != 0) {
7206 return error;
7207 }
7208
7209 LIST_INSERT_HEAD(&sc->sc_rtlist, brt, brt_list);
7210 sc->sc_brtcnt++;
7211
7212 bridge_rthash_resize(sc);
7213
7214 return 0;
7215 }
7216
7217 /*
7218 * bridge_rtnode_destroy:
7219 *
7220 * Destroy a bridge rtnode.
7221 */
7222 static void
7223 bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
7224 {
7225 BRIDGE_LOCK_ASSERT_HELD(sc);
7226
7227 LIST_REMOVE(brt, brt_hash);
7228
7229 LIST_REMOVE(brt, brt_list);
7230 sc->sc_brtcnt--;
7231 brt->brt_dst->bif_addrcnt--;
7232 zfree(bridge_rtnode_pool, brt);
7233 }
7234
7235 #if BRIDGESTP
7236 /*
7237 * bridge_rtable_expire:
7238 *
7239 * Set the expiry time for all routes on an interface.
7240 */
7241 static void
7242 bridge_rtable_expire(struct ifnet *ifp, int age)
7243 {
7244 struct bridge_softc *sc = ifp->if_bridge;
7245 struct bridge_rtnode *brt;
7246
7247 BRIDGE_LOCK(sc);
7248
7249 /*
7250 * If the age is zero then flush, otherwise set all the expiry times to
7251 * age for the interface
7252 */
7253 if (age == 0) {
7254 bridge_rtdelete(sc, ifp, IFBF_FLUSHDYN);
7255 } else {
7256 unsigned long now;
7257
7258 now = (unsigned long) net_uptime();
7259
7260 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
7261 /* Cap the expiry time to 'age' */
7262 if (brt->brt_ifp == ifp &&
7263 brt->brt_expire > now + age &&
7264 (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
7265 brt->brt_expire = now + age;
7266 }
7267 }
7268 }
7269 BRIDGE_UNLOCK(sc);
7270 }
7271
7272 /*
7273 * bridge_state_change:
7274 *
7275 * Callback from the bridgestp code when a port changes states.
7276 */
7277 static void
7278 bridge_state_change(struct ifnet *ifp, int state)
7279 {
7280 struct bridge_softc *sc = ifp->if_bridge;
7281 static const char *stpstates[] = {
7282 "disabled",
7283 "listening",
7284 "learning",
7285 "forwarding",
7286 "blocking",
7287 "discarding"
7288 };
7289
7290 if (log_stp) {
7291 log(LOG_NOTICE, "%s: state changed to %s on %s",
7292 sc->sc_ifp->if_xname,
7293 stpstates[state], ifp->if_xname);
7294 }
7295 }
7296 #endif /* BRIDGESTP */
7297
7298 /*
7299 * bridge_detach:
7300 *
7301 * Callback when interface has been detached.
7302 */
7303 static void
7304 bridge_detach(ifnet_t ifp)
7305 {
7306 struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7307
7308 #if BRIDGESTP
7309 bstp_detach(&sc->sc_stp);
7310 #endif /* BRIDGESTP */
7311
7312 /* Tear down the routing table. */
7313 bridge_rtable_fini(sc);
7314
7315 lck_mtx_lock(&bridge_list_mtx);
7316 LIST_REMOVE(sc, sc_list);
7317 lck_mtx_unlock(&bridge_list_mtx);
7318
7319 ifnet_release(ifp);
7320
7321 lck_mtx_destroy(&sc->sc_mtx, &bridge_lock_grp);
7322 kfree_type(struct bridge_softc, sc);
7323 }
7324
7325 /*
7326 * bridge_link_event:
7327 *
7328 * Report a data link event on an interface
7329 */
7330 static void
7331 bridge_link_event(struct ifnet *ifp, u_int32_t event_code)
7332 {
7333 struct event {
7334 u_int32_t ifnet_family;
7335 u_int32_t unit;
7336 char if_name[IFNAMSIZ];
7337 };
7338 _Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
7339 struct kern_event_msg *header = (struct kern_event_msg*)message;
7340 struct event *data = (struct event *)(message + KEV_MSG_HEADER_SIZE);
7341
7342 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
7343 "%s event_code %u - %s", ifp->if_xname,
7344 event_code, dlil_kev_dl_code_str(event_code));
7345 header->total_size = sizeof(message);
7346 header->vendor_code = KEV_VENDOR_APPLE;
7347 header->kev_class = KEV_NETWORK_CLASS;
7348 header->kev_subclass = KEV_DL_SUBCLASS;
7349 header->event_code = event_code;
7350 data->ifnet_family = ifnet_family(ifp);
7351 data->unit = (u_int32_t)ifnet_unit(ifp);
7352 strlcpy(data->if_name, ifnet_name(ifp), IFNAMSIZ);
7353 ifnet_event(ifp, header);
7354 }
7355
7356 #define BRIDGE_HF_DROP(reason, func, line) { \
7357 bridge_hostfilter_stats.reason++; \
7358 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_HOSTFILTER, \
7359 "%s.%d" #reason, func, line); \
7360 error = EINVAL; \
7361 }
7362
7363 static int
7364 bridge_host_filter_arp(struct bridge_iflist *bif, mbuf_t *data)
7365 {
7366 struct ether_arp *ea;
7367 struct ether_header *eh;
7368 int error = EINVAL;
7369 mbuf_t m = *data;
7370 size_t minlen = sizeof(struct ether_header) + sizeof(struct ether_arp);
7371
7372 /*
7373 * Make the Ethernet and ARP headers contiguous
7374 */
7375 if (mbuf_pkthdr_len(m) < minlen) {
7376 BRIDGE_HF_DROP(brhf_arp_too_small, __func__, __LINE__);
7377 goto done;
7378 }
7379 if (mbuf_len(m) < minlen && mbuf_pullup(data, minlen) != 0) {
7380 BRIDGE_HF_DROP(brhf_arp_pullup_failed,
7381 __func__, __LINE__);
7382 goto done;
7383 }
7384 m = *data;
7385
7386 /*
7387 * Restrict Ethernet protocols to ARP and IP/IPv6
7388 */
7389 eh = mtod(m, struct ether_header *);
7390 ea = (struct ether_arp *)(eh + 1);
7391 if (ea->arp_hrd != HTONS_ARPHRD_ETHER) {
7392 BRIDGE_HF_DROP(brhf_arp_bad_hw_type,
7393 __func__, __LINE__);
7394 goto done;
7395 }
7396 if (ea->arp_pro != HTONS_ETHERTYPE_IP) {
7397 BRIDGE_HF_DROP(brhf_arp_bad_pro_type,
7398 __func__, __LINE__);
7399 goto done;
7400 }
7401 /*
7402 * Verify the address lengths are correct
7403 */
7404 if (ea->arp_hln != ETHER_ADDR_LEN) {
7405 BRIDGE_HF_DROP(brhf_arp_bad_hw_len, __func__, __LINE__);
7406 goto done;
7407 }
7408 if (ea->arp_pln != sizeof(struct in_addr)) {
7409 BRIDGE_HF_DROP(brhf_arp_bad_pro_len,
7410 __func__, __LINE__);
7411 goto done;
7412 }
7413 /*
7414 * Allow only ARP request or ARP reply
7415 */
7416 if (ea->arp_op != HTONS_ARPOP_REQUEST &&
7417 ea->arp_op != HTONS_ARPOP_REPLY) {
7418 BRIDGE_HF_DROP(brhf_arp_bad_op, __func__, __LINE__);
7419 goto done;
7420 }
7421 if ((bif->bif_flags & BIFF_HF_HWSRC) != 0) {
7422 /*
7423 * Verify source hardware address matches
7424 */
7425 if (bcmp(ea->arp_sha, bif->bif_hf_hwsrc,
7426 ETHER_ADDR_LEN) != 0) {
7427 BRIDGE_HF_DROP(brhf_arp_bad_sha, __func__, __LINE__);
7428 goto done;
7429 }
7430 }
7431 if ((bif->bif_flags & BIFF_HF_IPSRC) != 0) {
7432 /*
7433 * Verify source protocol address:
7434 * May be null for an ARP probe
7435 */
7436 if (bcmp(ea->arp_spa, &bif->bif_hf_ipsrc.s_addr,
7437 sizeof(struct in_addr)) != 0 &&
7438 bcmp(ea->arp_spa, &inaddr_any,
7439 sizeof(struct in_addr)) != 0) {
7440 BRIDGE_HF_DROP(brhf_arp_bad_spa, __func__, __LINE__);
7441 goto done;
7442 }
7443 }
7444 bridge_hostfilter_stats.brhf_arp_ok += 1;
7445 error = 0;
7446 done:
7447 return error;
7448 }
7449
7450 /*
7451 * MAC NAT
7452 */
7453
7454 static errno_t
7455 bridge_mac_nat_enable(struct bridge_softc *sc, struct bridge_iflist *bif)
7456 {
7457 errno_t error = 0;
7458
7459 BRIDGE_LOCK_ASSERT_HELD(sc);
7460
7461 if (IFNET_IS_VMNET(bif->bif_ifp)) {
7462 error = EINVAL;
7463 goto done;
7464 }
7465 if (sc->sc_mac_nat_bif != NULL) {
7466 if (sc->sc_mac_nat_bif != bif) {
7467 error = EBUSY;
7468 }
7469 goto done;
7470 }
7471 sc->sc_mac_nat_bif = bif;
7472 bif->bif_ifflags |= IFBIF_MAC_NAT;
7473 bridge_mac_nat_populate_entries(sc);
7474
7475 done:
7476 return error;
7477 }
7478
7479 static void
7480 bridge_mac_nat_disable(struct bridge_softc *sc)
7481 {
7482 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7483
7484 assert(mac_nat_bif != NULL);
7485 bridge_mac_nat_flush_entries(sc, mac_nat_bif);
7486 mac_nat_bif->bif_ifflags &= ~IFBIF_MAC_NAT;
7487 sc->sc_mac_nat_bif = NULL;
7488 return;
7489 }
7490
7491 static void
7492 mac_nat_entry_print2(struct mac_nat_entry *mne,
7493 const char ifname[IFNAMSIZ], const char *msg1, const char *msg2)
7494 {
7495 int af;
7496 char etopbuf[24];
7497 char ntopbuf[MAX_IPv6_STR_LEN];
7498 const char *space;
7499
7500 af = ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) ? AF_INET6 : AF_INET;
7501 ether_ntop(etopbuf, sizeof(etopbuf), mne->mne_mac);
7502 (void)inet_ntop(af, &mne->mne_u, ntopbuf, sizeof(ntopbuf));
7503 if (msg2 == NULL) {
7504 msg2 = "";
7505 space = "";
7506 } else {
7507 space = " ";
7508 }
7509 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7510 "%.*s %s%s%s %p (%s, %s, %s)", IFNAMSIZ, ifname, msg1, space, msg2, mne,
7511 mne->mne_bif->bif_ifp->if_xname, ntopbuf, etopbuf);
7512 }
7513
7514 static void
7515 mac_nat_entry_print(struct mac_nat_entry *mne,
7516 const char ifname[IFNAMSIZ], const char *msg)
7517 {
7518 mac_nat_entry_print2(mne, ifname, msg, NULL);
7519 }
7520
7521 static struct mac_nat_entry *
7522 bridge_lookup_mac_nat_entry_ipv4(const struct bridge_softc *sc, const struct in_addr *ip)
7523 {
7524 struct mac_nat_entry *mne;
7525 struct mac_nat_entry *ret_mne = NULL;
7526
7527 LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
7528 if (mne->mne_ip.s_addr == ip->s_addr) {
7529 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7530 mac_nat_entry_print(mne, sc->sc_if_xname,
7531 "found");
7532 }
7533 ret_mne = mne;
7534 break;
7535 }
7536 }
7537
7538 return ret_mne;
7539 }
7540
7541 static struct mac_nat_entry *
7542 bridge_lookup_mac_nat_entry_ipv6(const struct bridge_softc *sc, const struct in6_addr *ip6)
7543 {
7544 struct mac_nat_entry *mne;
7545 struct mac_nat_entry *ret_mne = NULL;
7546
7547 LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
7548 if (IN6_ARE_ADDR_EQUAL(&mne->mne_ip6, ip6)) {
7549 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7550 mac_nat_entry_print(mne, sc->sc_if_xname,
7551 "found");
7552 }
7553 ret_mne = mne;
7554 break;
7555 }
7556 }
7557
7558 return ret_mne;
7559 }
7560
7561 static void
7562 bridge_destroy_mac_nat_entry(struct bridge_softc *sc,
7563 struct mac_nat_entry *mne, const char *reason)
7564 {
7565 LIST_REMOVE(mne, mne_list);
7566 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7567 mac_nat_entry_print(mne, sc->sc_if_xname, reason);
7568 }
7569 zfree(bridge_mne_pool, mne);
7570 sc->sc_mne_count--;
7571 }
7572
7573 static struct mac_nat_entry *
7574 bridge_create_mac_nat_entry_common(struct bridge_softc *sc,
7575 struct bridge_iflist *bif, const char eaddr[ETHER_ADDR_LEN])
7576 {
7577 struct mac_nat_entry *mne;
7578
7579 if (sc->sc_mne_count >= sc->sc_mne_max) {
7580 sc->sc_mne_allocation_failures++;
7581 return NULL;
7582 }
7583
7584 mne = zalloc_noblock(bridge_mne_pool);
7585 if (mne == NULL) {
7586 sc->sc_mne_allocation_failures++;
7587 return NULL;
7588 }
7589
7590 sc->sc_mne_count++;
7591 bzero(mne, sizeof(*mne));
7592 bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
7593
7594 mne->mne_bif = bif;
7595 mne->mne_expire = (unsigned long)net_uptime() + sc->sc_brttimeout;
7596
7597 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7598 mac_nat_entry_print(mne, sc->sc_if_xname, "created");
7599 }
7600
7601 return mne;
7602 }
7603
7604 static struct mac_nat_entry *
7605 bridge_create_mac_nat_entry_ipv4(struct bridge_softc *sc,
7606 struct bridge_iflist *bif, const struct in_addr *ip, const char eaddr[ETHER_ADDR_LEN])
7607 {
7608 struct mac_nat_entry *mne;
7609
7610 mne = bridge_create_mac_nat_entry_common(sc, bif, eaddr);
7611 if (mne == NULL) {
7612 return NULL;
7613 }
7614
7615 bcopy(ip, &mne->mne_ip, sizeof(mne->mne_ip));
7616 LIST_INSERT_HEAD(&sc->sc_mne_list, mne, mne_list);
7617
7618 return mne;
7619 }
7620
7621 static struct mac_nat_entry *
7622 bridge_create_mac_nat_entry_ipv6(struct bridge_softc *sc,
7623 struct bridge_iflist *bif, const struct in6_addr *ip6, const char eaddr[ETHER_ADDR_LEN])
7624 {
7625 struct mac_nat_entry *mne;
7626
7627 mne = bridge_create_mac_nat_entry_common(sc, bif, eaddr);
7628 if (mne == NULL) {
7629 return NULL;
7630 }
7631
7632 bcopy(ip6, &mne->mne_ip6, sizeof(mne->mne_ip6));
7633 mne->mne_flags |= MNE_FLAGS_IPV6;
7634 LIST_INSERT_HEAD(&sc->sc_mne_list_v6, mne, mne_list);
7635
7636 return mne;
7637 }
7638
7639 static struct mac_nat_entry *
7640 bridge_update_mac_nat_entry_common(struct bridge_softc *sc, struct bridge_iflist *bif,
7641 struct mac_nat_entry *mne, const char eaddr[ETHER_ADDR_LEN])
7642 {
7643 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7644
7645 if (mne->mne_bif == mac_nat_bif) {
7646 /* the MAC NAT interface takes precedence */
7647 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7648 if (mne->mne_bif != bif) {
7649 mac_nat_entry_print2(mne,
7650 sc->sc_if_xname, "reject",
7651 bif->bif_ifp->if_xname);
7652 }
7653 }
7654 } else if (mne->mne_bif != bif) {
7655 const char *__null_terminated old_if = mne->mne_bif->bif_ifp->if_xname;
7656
7657 mne->mne_bif = bif;
7658 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7659 mac_nat_entry_print2(mne,
7660 sc->sc_if_xname, "replaced",
7661 old_if);
7662 }
7663 bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
7664 }
7665
7666 mne->mne_expire = (unsigned long)net_uptime() + sc->sc_brttimeout;
7667
7668 return mne;
7669 }
7670
7671 static struct mac_nat_entry *
7672 bridge_update_mac_nat_entry_ipv4(struct bridge_softc *sc,
7673 struct bridge_iflist *bif, struct in_addr *ip, const char eaddr[ETHER_ADDR_LEN])
7674 {
7675 struct mac_nat_entry *mne;
7676
7677 mne = bridge_lookup_mac_nat_entry_ipv4(sc, ip);
7678 if (mne != NULL) {
7679 return bridge_update_mac_nat_entry_common(sc, bif, mne, eaddr);
7680 }
7681
7682 mne = bridge_create_mac_nat_entry_ipv4(sc, bif, ip, eaddr);
7683 return mne;
7684 }
7685
7686 static struct mac_nat_entry *
7687 bridge_update_mac_nat_entry_ipv6(struct bridge_softc *sc,
7688 struct bridge_iflist *bif, struct in6_addr *ip6, const char eaddr[ETHER_ADDR_LEN])
7689 {
7690 struct mac_nat_entry *mne;
7691
7692 mne = bridge_lookup_mac_nat_entry_ipv6(sc, ip6);
7693 if (mne != NULL) {
7694 return bridge_update_mac_nat_entry_common(sc, bif, mne, eaddr);
7695 }
7696
7697 mne = bridge_create_mac_nat_entry_ipv6(sc, bif, ip6, eaddr);
7698 return mne;
7699 }
7700
7701 static void
7702 bridge_mac_nat_flush_entries_common(struct bridge_softc *sc,
7703 struct mac_nat_entry_list *list, struct bridge_iflist *bif)
7704 {
7705 struct mac_nat_entry *mne;
7706 struct mac_nat_entry *tmne;
7707
7708 LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
7709 if (bif != NULL && mne->mne_bif != bif) {
7710 continue;
7711 }
7712 bridge_destroy_mac_nat_entry(sc, mne, "flushed");
7713 }
7714 }
7715
7716 /*
7717 * bridge_mac_nat_flush_entries:
7718 *
7719 * Flush MAC NAT entries for the specified member. Flush all entries if
7720 * the member is the one that requires MAC NAT, otherwise just flush the
7721 * ones for the specified member.
7722 */
7723 static void
7724 bridge_mac_nat_flush_entries(struct bridge_softc *sc, struct bridge_iflist * bif)
7725 {
7726 struct bridge_iflist *flush_bif;
7727
7728 flush_bif = (bif == sc->sc_mac_nat_bif) ? NULL : bif;
7729 bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list, flush_bif);
7730 bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list_v6, flush_bif);
7731 }
7732
7733 static void
7734 bridge_mac_nat_populate_entries(struct bridge_softc *sc)
7735 {
7736 errno_t error;
7737 ifnet_t ifp;
7738 uint16_t addresses_count = 0;
7739 ifaddr_t * __counted_by(addresses_count) list;
7740 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7741
7742 assert(mac_nat_bif != NULL);
7743 ifp = mac_nat_bif->bif_ifp;
7744 error = ifnet_get_address_list_family_with_count(ifp, &list, &addresses_count, 0);
7745 if (error != 0) {
7746 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7747 "ifnet_get_address_list(%s) failed %d",
7748 ifp->if_xname, error);
7749 return;
7750 }
7751
7752 for (uint16_t i = 0; i < addresses_count; ++i) {
7753 sa_family_t af;
7754
7755 af = ifaddr_address_family(list[i]);
7756 switch (af) {
7757 case AF_INET: {
7758 struct sockaddr_in sin;
7759
7760 error = ifaddr_address(list[i], (struct sockaddr *)&sin, sizeof(sin));
7761 if (error != 0) {
7762 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7763 "ifaddr_address failed %d",
7764 error);
7765 break;
7766 }
7767
7768 bridge_create_mac_nat_entry_ipv4(sc, mac_nat_bif, &sin.sin_addr, IF_LLADDR(ifp));
7769 break;
7770 }
7771
7772 case AF_INET6: {
7773 struct sockaddr_in6 sin6;
7774
7775 error = ifaddr_address(list[i], (struct sockaddr *)&sin6, sizeof(sin6));
7776 if (error != 0) {
7777 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7778 "ifaddr_address failed %d",
7779 error);
7780 break;
7781 }
7782
7783 if (IN6_IS_ADDR_LINKLOCAL(&sin6.sin6_addr)) {
7784 /* remove scope ID */
7785 sin6.sin6_addr.s6_addr16[1] = 0;
7786 }
7787
7788 bridge_create_mac_nat_entry_ipv6(sc, mac_nat_bif, &sin6.sin6_addr, IF_LLADDR(ifp));
7789 break;
7790 }
7791
7792 default:
7793 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7794 "ifaddr_address_family unknown %d",
7795 af);
7796 break;
7797 }
7798 }
7799
7800 ifnet_address_list_free_counted_by(list, addresses_count);
7801 return;
7802 }
7803
7804 static void
7805 bridge_mac_nat_age_entries_common(struct bridge_softc *sc,
7806 struct mac_nat_entry_list *list, unsigned long now)
7807 {
7808 struct mac_nat_entry *mne;
7809 struct mac_nat_entry *tmne;
7810
7811 LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
7812 if (now >= mne->mne_expire) {
7813 bridge_destroy_mac_nat_entry(sc, mne, "aged out");
7814 }
7815 }
7816 }
7817
7818 static void
7819 bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long now)
7820 {
7821 if (sc->sc_mac_nat_bif == NULL) {
7822 return;
7823 }
7824 bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list, now);
7825 bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list_v6, now);
7826 }
7827
7828 static const char *
7829 get_in_out_string(boolean_t is_output)
7830 {
7831 return (const char * __null_terminated)(is_output ? "OUT" : "IN");
7832 }
7833
7834 /*
7835 * is_valid_arp_packet:
7836 * Verify that this is a valid ARP packet.
7837 *
7838 * Returns TRUE if the packet is valid, FALSE otherwise.
7839 */
7840 static boolean_t
7841 is_valid_arp_packet(mbuf_t *data, bool is_output,
7842 struct ether_header **eh_p, struct ether_arp **ea_p)
7843 {
7844 struct ether_arp *ea;
7845 struct ether_header *eh;
7846 size_t minlen = sizeof(struct ether_header) + sizeof(struct ether_arp);
7847 boolean_t is_valid = FALSE;
7848 int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
7849
7850 if (mbuf_pkthdr_len(*data) < minlen) {
7851 BRIDGE_LOG(LOG_DEBUG, flags,
7852 "ARP %s short frame %lu < %lu",
7853 get_in_out_string(is_output),
7854 mbuf_pkthdr_len(*data), minlen);
7855 goto done;
7856 }
7857 if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
7858 BRIDGE_LOG(LOG_DEBUG, flags,
7859 "ARP %s size %lu mbuf_pullup fail",
7860 get_in_out_string(is_output),
7861 minlen);
7862 *data = NULL;
7863 goto done;
7864 }
7865
7866 /* validate ARP packet */
7867 eh = mtod(*data, struct ether_header *);
7868 ea = (struct ether_arp *)(eh + 1);
7869 if (ea->arp_hrd != HTONS_ARPHRD_ETHER) {
7870 BRIDGE_LOG(LOG_DEBUG, flags,
7871 "ARP %s htype not ethernet",
7872 get_in_out_string(is_output));
7873 goto done;
7874 }
7875 if (ea->arp_hln != ETHER_ADDR_LEN) {
7876 BRIDGE_LOG(LOG_DEBUG, flags,
7877 "ARP %s hlen not ethernet",
7878 get_in_out_string(is_output));
7879 goto done;
7880 }
7881 if (ea->arp_pro != HTONS_ETHERTYPE_IP) {
7882 BRIDGE_LOG(LOG_DEBUG, flags,
7883 "ARP %s ptype not IP",
7884 get_in_out_string(is_output));
7885 goto done;
7886 }
7887 if (ea->arp_pln != sizeof(struct in_addr)) {
7888 BRIDGE_LOG(LOG_DEBUG, flags,
7889 "ARP %s plen not IP",
7890 get_in_out_string(is_output));
7891 goto done;
7892 }
7893 is_valid = TRUE;
7894 *ea_p = ea;
7895 *eh_p = eh;
7896 done:
7897 return is_valid;
7898 }
7899
7900 static struct mac_nat_entry *
7901 bridge_mac_nat_arp_input(struct bridge_softc *sc, mbuf_t *data)
7902 {
7903 struct ether_arp * __single ea;
7904 struct ether_header * __single eh;
7905 struct mac_nat_entry *mne = NULL;
7906 u_short op;
7907 struct in_addr tpa;
7908
7909 if (!is_valid_arp_packet(data, FALSE, &eh, &ea)) {
7910 goto done;
7911 }
7912 op = ea->arp_op;
7913 switch (op) {
7914 case HTONS_ARPOP_REQUEST:
7915 case HTONS_ARPOP_REPLY:
7916 /* only care about REQUEST and REPLY */
7917 break;
7918 default:
7919 goto done;
7920 }
7921
7922 /* check the target IP address for a NAT entry */
7923 bcopy(ea->arp_tpa, &tpa, sizeof(tpa));
7924 if (tpa.s_addr != 0) {
7925 mne = bridge_lookup_mac_nat_entry_ipv4(sc, &tpa);
7926 }
7927 if (mne != NULL) {
7928 if (op == HTONS_ARPOP_REPLY) {
7929 /* translate the MAC address */
7930 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7931 char mac_src[24];
7932 char mac_dst[24];
7933
7934 ether_ntop(mac_src, sizeof(mac_src),
7935 ea->arp_tha);
7936 ether_ntop(mac_dst, sizeof(mac_dst),
7937 mne->mne_mac);
7938 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7939 "%s %s ARP %s -> %s",
7940 sc->sc_if_xname,
7941 mne->mne_bif->bif_ifp->if_xname,
7942 mac_src, mac_dst);
7943 }
7944 bcopy(mne->mne_mac, ea->arp_tha, sizeof(ea->arp_tha));
7945 }
7946 } else {
7947 /* handle conflicting ARP (sender matches mne) */
7948 struct in_addr spa;
7949
7950 bcopy(ea->arp_spa, &spa, sizeof(spa));
7951 if (spa.s_addr != 0 && spa.s_addr != tpa.s_addr) {
7952 /* check the source IP for a NAT entry */
7953 mne = bridge_lookup_mac_nat_entry_ipv4(sc, &spa);
7954 }
7955 }
7956
7957 done:
7958 return mne;
7959 }
7960
7961 static boolean_t
7962 bridge_mac_nat_arp_output(struct bridge_softc *sc,
7963 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
7964 {
7965 struct ether_arp * __single ea;
7966 struct ether_header * __single eh;
7967 struct in_addr ip;
7968 struct mac_nat_entry *mne = NULL;
7969 u_short op;
7970 boolean_t translate = FALSE;
7971
7972 if (!is_valid_arp_packet(data, TRUE, &eh, &ea)) {
7973 goto done;
7974 }
7975 op = ea->arp_op;
7976 switch (op) {
7977 case HTONS_ARPOP_REQUEST:
7978 case HTONS_ARPOP_REPLY:
7979 /* only care about REQUEST and REPLY */
7980 break;
7981 default:
7982 goto done;
7983 }
7984
7985 bcopy(ea->arp_spa, &ip, sizeof(ip));
7986 if (ip.s_addr == 0) {
7987 goto done;
7988 }
7989 /* XXX validate IP address: no multicast/broadcast */
7990 mne = bridge_update_mac_nat_entry_ipv4(sc, bif, &ip,
7991 (const char *)ea->arp_sha);
7992 if (mnr != NULL && mne != NULL) {
7993 /* record the offset to do the replacement */
7994 translate = TRUE;
7995 mnr->mnr_arp_offset = (char *)ea->arp_sha - (char *)eh;
7996 }
7997
7998 done:
7999 return translate;
8000 }
8001
8002 #define ETHER_IPV4_HEADER_LEN (sizeof(struct ether_header) + \
8003 + sizeof(struct ip))
8004 static uint8_t * __indexable
8005 get_ether_ip_header_ptr(mbuf_t *data, boolean_t is_output)
8006 {
8007 uint8_t *header = NULL;
8008 int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
8009 size_t minlen = ETHER_IPV4_HEADER_LEN;
8010
8011 if (mbuf_pkthdr_len(*data) < minlen) {
8012 BRIDGE_LOG(LOG_DEBUG, flags,
8013 "IP %s short frame %lu < %lu",
8014 get_in_out_string(is_output),
8015 mbuf_pkthdr_len(*data), minlen);
8016 goto done;
8017 }
8018 if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
8019 BRIDGE_LOG(LOG_DEBUG, flags,
8020 "IP %s size %lu mbuf_pullup fail",
8021 get_in_out_string(is_output),
8022 minlen);
8023 *data = NULL;
8024 goto done;
8025 }
8026 header = mtod(*data, uint8_t *);
8027 done:
8028 return header;
8029 }
8030
8031 static struct mac_nat_entry *
8032 bridge_mac_nat_ip_input(struct bridge_softc *sc, mbuf_t *data)
8033 {
8034 struct in_addr dst;
8035 uint8_t *header;
8036 struct ip *iphdr;
8037 struct mac_nat_entry *mne = NULL;
8038
8039 header = get_ether_ip_header_ptr(data, FALSE);
8040 if (header == NULL) {
8041 goto done;
8042 }
8043 iphdr = (struct ip *)(void *)(header + sizeof(struct ether_header));
8044 bcopy(&iphdr->ip_dst, &dst, sizeof(dst));
8045 /* XXX validate IP address */
8046 if (dst.s_addr == 0) {
8047 goto done;
8048 }
8049 mne = bridge_lookup_mac_nat_entry_ipv4(sc, &dst);
8050 done:
8051 return mne;
8052 }
8053
8054 static void
8055 bridge_mac_nat_udp_output(struct bridge_softc *sc,
8056 struct bridge_iflist *bif, mbuf_t m,
8057 uint8_t ip_header_len, struct mac_nat_record *mnr)
8058 {
8059 uint16_t dp_flags;
8060 errno_t error;
8061 size_t offset;
8062 struct udphdr udphdr;
8063
8064 /* copy the UDP header */
8065 offset = sizeof(struct ether_header) + ip_header_len;
8066 error = mbuf_copydata(m, offset, sizeof(struct udphdr), &udphdr);
8067 if (error != 0) {
8068 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8069 "mbuf_copydata udphdr failed %d",
8070 error);
8071 return;
8072 }
8073 if (udphdr.uh_sport != HTONS_IPPORT_BOOTPC ||
8074 udphdr.uh_dport != HTONS_IPPORT_BOOTPS) {
8075 /* not a BOOTP/DHCP packet */
8076 return;
8077 }
8078 /* check whether the broadcast bit is already set */
8079 offset += sizeof(struct udphdr) + offsetof(struct dhcp, dp_flags);
8080 error = mbuf_copydata(m, offset, sizeof(dp_flags), &dp_flags);
8081 if (error != 0) {
8082 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8083 "mbuf_copydata dp_flags failed %d",
8084 error);
8085 return;
8086 }
8087 if ((dp_flags & HTONS_DHCP_FLAGS_BROADCAST) != 0) {
8088 /* it's already set, nothing to do */
8089 return;
8090 }
8091 /* broadcast bit needs to be set */
8092 mnr->mnr_ip_dhcp_flags = dp_flags | htons(DHCP_FLAGS_BROADCAST);
8093 mnr->mnr_ip_header_len = ip_header_len;
8094 if (udphdr.uh_sum != 0) {
8095 uint16_t delta;
8096
8097 /* adjust checksum to take modified dp_flags into account */
8098 delta = dp_flags - mnr->mnr_ip_dhcp_flags;
8099 mnr->mnr_ip_udp_csum = udphdr.uh_sum + delta;
8100 }
8101 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8102 "%s %s DHCP dp_flags 0x%x UDP cksum 0x%x",
8103 sc->sc_if_xname,
8104 bif->bif_ifp->if_xname,
8105 ntohs(mnr->mnr_ip_dhcp_flags),
8106 ntohs(mnr->mnr_ip_udp_csum));
8107 return;
8108 }
8109
8110 static boolean_t
8111 bridge_mac_nat_ip_output(struct bridge_softc *sc,
8112 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8113 {
8114 #pragma unused(mnr)
8115 uint8_t *header;
8116 struct ether_header *eh;
8117 struct in_addr ip;
8118 struct ip *iphdr;
8119 uint8_t ip_header_len;
8120 struct mac_nat_entry *mne = NULL;
8121 boolean_t translate = FALSE;
8122
8123 header = get_ether_ip_header_ptr(data, TRUE);
8124 if (header == NULL) {
8125 goto done;
8126 }
8127
8128 eh = (struct ether_header *)header;
8129 iphdr = (struct ip *)(header + sizeof(*eh));
8130 ip_header_len = IP_VHL_HL(iphdr->ip_vhl) << 2;
8131 if (ip_header_len < sizeof(ip)) {
8132 /* bogus IP header */
8133 goto done;
8134 }
8135 bcopy(&iphdr->ip_src, &ip, sizeof(ip));
8136 /* XXX validate the source address */
8137 if (ip.s_addr != 0) {
8138 mne = bridge_update_mac_nat_entry_ipv4(sc, bif, &ip,
8139 (const char *)eh->ether_shost);
8140 }
8141 if (mnr != NULL) {
8142 if (ip.s_addr == 0 && iphdr->ip_p == IPPROTO_UDP) {
8143 /* handle DHCP must broadcast */
8144 bridge_mac_nat_udp_output(sc, bif, *data,
8145 ip_header_len, mnr);
8146 }
8147 translate = TRUE;
8148 }
8149 done:
8150 return translate;
8151 }
8152
8153 #define ETHER_IPV6_HEADER_LEN (sizeof(struct ether_header) + \
8154 + sizeof(struct ip6_hdr))
8155 static uint8_t * __indexable
8156 get_ether_ipv6_header_ptr(mbuf_t *data, size_t plen, boolean_t is_output)
8157 {
8158 uint8_t *header = NULL;
8159 int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
8160 size_t minlen = ETHER_IPV6_HEADER_LEN + plen;
8161
8162 if (mbuf_pkthdr_len(*data) < minlen) {
8163 BRIDGE_LOG(LOG_DEBUG, flags,
8164 "IP %s short frame %lu < %lu",
8165 get_in_out_string(is_output),
8166 mbuf_pkthdr_len(*data), minlen);
8167 goto done;
8168 }
8169 if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
8170 BRIDGE_LOG(LOG_DEBUG, flags,
8171 "IP %s size %lu mbuf_pullup fail",
8172 get_in_out_string(is_output),
8173 minlen);
8174 *data = NULL;
8175 goto done;
8176 }
8177 header = mtod(*data, uint8_t *);
8178 done:
8179 return header;
8180 }
8181
8182 #include <netinet/icmp6.h>
8183 #include <netinet6/nd6.h>
8184
8185 #define ETHER_ND_LLADDR_LEN (ETHER_ADDR_LEN + sizeof(struct nd_opt_hdr))
8186
8187 static void
8188 bridge_mac_nat_icmpv6_output(struct bridge_softc *sc,
8189 struct bridge_iflist *bif,
8190 mbuf_t *data, struct ip6_hdr *ip6h,
8191 struct in6_addr *saddrp,
8192 struct mac_nat_record *mnr)
8193 {
8194 uint8_t *header;
8195 struct ether_header *eh;
8196 struct icmp6_hdr *icmp6;
8197 uint8_t icmp6_type;
8198 uint32_t icmp6len;
8199 int lladdrlen = 0;
8200 char *lladdr = NULL;
8201 unsigned int off = sizeof(*ip6h);
8202
8203 icmp6len = (u_int32_t)ntohs(ip6h->ip6_plen);
8204 if (icmp6len < sizeof(*icmp6)) {
8205 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8206 "short IPv6 payload length %d < %lu",
8207 icmp6len, sizeof(*icmp6));
8208 return;
8209 }
8210
8211 /* pullup IP6 header + ICMPv6 header */
8212 header = get_ether_ipv6_header_ptr(data, sizeof(*icmp6), TRUE);
8213 if (header == NULL) {
8214 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8215 "failed to pullup icmp6 header");
8216 return;
8217 }
8218 eh = (struct ether_header *)header;
8219 ip6h = (struct ip6_hdr *)(header + sizeof(*eh));
8220 icmp6 = (struct icmp6_hdr *)(header + sizeof(*eh) + off);
8221 icmp6_type = icmp6->icmp6_type;
8222 switch (icmp6_type) {
8223 case ND_NEIGHBOR_SOLICIT:
8224 case ND_NEIGHBOR_ADVERT:
8225 case ND_ROUTER_ADVERT:
8226 case ND_ROUTER_SOLICIT:
8227 break;
8228 default:
8229 return;
8230 }
8231
8232 /* pullup IP6 header + payload */
8233 header = get_ether_ipv6_header_ptr(data, icmp6len, TRUE);
8234 if (header == NULL) {
8235 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8236 "failed to pullup icmp6 + payload");
8237 return;
8238 }
8239 eh = (struct ether_header *)header;
8240 ip6h = (struct ip6_hdr *)(header + sizeof(*eh));
8241 icmp6 = (struct icmp6_hdr *)(header + sizeof(*eh) + off);
8242
8243 switch (icmp6_type) {
8244 case ND_NEIGHBOR_SOLICIT: {
8245 struct nd_neighbor_solicit *nd_ns;
8246 union nd_opts ndopts;
8247 boolean_t is_dad_probe;
8248 struct in6_addr taddr;
8249
8250 if (icmp6len < sizeof(*nd_ns)) {
8251 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8252 "short nd_ns %d < %lu",
8253 icmp6len, sizeof(*nd_ns));
8254 return;
8255 }
8256
8257 nd_ns = (struct nd_neighbor_solicit *)(void *)icmp6;
8258 bcopy(&nd_ns->nd_ns_target, &taddr, sizeof(taddr));
8259 if (IN6_IS_ADDR_MULTICAST(&taddr) ||
8260 IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
8261 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8262 "invalid target ignored");
8263 return;
8264 }
8265
8266 /* parse options */
8267 nd6_option_init(nd_ns + 1, icmp6len - sizeof(*nd_ns), &ndopts);
8268 if (nd6_options(&ndopts) < 0) {
8269 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8270 "invalid ND6 NS option");
8271 return;
8272 }
8273 if (ndopts.nd_opts_src_lladdr != NULL) {
8274 ND_OPT_LLADDR(ndopts.nd_opts_src_lladdr, nd_opt_len,
8275 lladdr, lladdrlen);
8276 }
8277 is_dad_probe = IN6_IS_ADDR_UNSPECIFIED(saddrp);
8278 if (lladdr != NULL) {
8279 if (is_dad_probe) {
8280 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8281 "bad ND6 DAD packet");
8282 return;
8283 }
8284 if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8285 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8286 "source lladdrlen %d != %lu",
8287 lladdrlen, ETHER_ND_LLADDR_LEN);
8288 return;
8289 }
8290 }
8291 if (is_dad_probe) {
8292 /* node is trying use taddr, create an mne for taddr */
8293 *saddrp = taddr;
8294 }
8295 break;
8296 }
8297 case ND_NEIGHBOR_ADVERT: {
8298 struct nd_neighbor_advert *nd_na;
8299 union nd_opts ndopts;
8300 struct in6_addr taddr;
8301
8302
8303 nd_na = (struct nd_neighbor_advert *)(void *)icmp6;
8304
8305 if (icmp6len < sizeof(*nd_na)) {
8306 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8307 "short nd_na %d < %lu",
8308 icmp6len, sizeof(*nd_na));
8309 return;
8310 }
8311
8312 bcopy(&nd_na->nd_na_target, &taddr, sizeof(taddr));
8313 if (IN6_IS_ADDR_MULTICAST(&taddr) ||
8314 IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
8315 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8316 "invalid target ignored");
8317 return;
8318 }
8319
8320 /* parse options */
8321 nd6_option_init(nd_na + 1, icmp6len - sizeof(*nd_na), &ndopts);
8322 if (nd6_options(&ndopts) < 0) {
8323 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8324 "invalid ND6 NA option");
8325 return;
8326 }
8327 if (ndopts.nd_opts_tgt_lladdr == NULL) {
8328 /* target linklayer, nothing to do */
8329 return;
8330 }
8331
8332 ND_OPT_LLADDR(ndopts.nd_opts_tgt_lladdr, nd_opt_len, lladdr, lladdrlen);
8333 if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8334 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8335 "target lladdrlen %d != %lu",
8336 lladdrlen, ETHER_ND_LLADDR_LEN);
8337 return;
8338 }
8339 break;
8340 }
8341 case ND_ROUTER_ADVERT:
8342 case ND_ROUTER_SOLICIT: {
8343 union nd_opts ndopts;
8344 uint32_t type_length;
8345 const char *description;
8346
8347 if (icmp6_type == ND_ROUTER_ADVERT) {
8348 type_length = sizeof(struct nd_router_advert);
8349 description = "RA";
8350 } else {
8351 type_length = sizeof(struct nd_router_solicit);
8352 description = "RS";
8353 }
8354 if (icmp6len < type_length) {
8355 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8356 "short ND6 %s %d < %d",
8357 description, icmp6len, type_length);
8358 return;
8359 }
8360
8361 /* parse options */
8362 nd6_option_init(((uint8_t *)icmp6) + type_length,
8363 icmp6len - type_length, &ndopts);
8364 if (nd6_options(&ndopts) < 0) {
8365 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8366 "invalid ND6 %s option", description);
8367 return;
8368 }
8369 if (ndopts.nd_opts_src_lladdr != NULL) {
8370 ND_OPT_LLADDR(ndopts.nd_opts_src_lladdr, nd_opt_len, lladdr, lladdrlen);
8371
8372 if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8373 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8374 "source lladdrlen %d != %lu",
8375 lladdrlen, ETHER_ND_LLADDR_LEN);
8376 return;
8377 }
8378 }
8379 break;
8380 }
8381 default:
8382 break;
8383 }
8384
8385 if (lladdr != NULL) {
8386 mnr->mnr_ip6_lladdr_offset = (uint16_t)
8387 ((uintptr_t)lladdr - (uintptr_t)eh);
8388 mnr->mnr_ip6_icmp6_len = icmp6len;
8389 mnr->mnr_ip6_icmp6_type = icmp6_type;
8390 mnr->mnr_ip6_header_len = off;
8391 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
8392 const char *str;
8393
8394 switch (mnr->mnr_ip6_icmp6_type) {
8395 case ND_ROUTER_ADVERT:
8396 str = "ROUTER ADVERT";
8397 break;
8398 case ND_ROUTER_SOLICIT:
8399 str = "ROUTER SOLICIT";
8400 break;
8401 case ND_NEIGHBOR_ADVERT:
8402 str = "NEIGHBOR ADVERT";
8403 break;
8404 case ND_NEIGHBOR_SOLICIT:
8405 str = "NEIGHBOR SOLICIT";
8406 break;
8407 default:
8408 str = "";
8409 break;
8410 }
8411 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8412 "%s %s %s ip6len %d icmp6len %d lladdr offset %d",
8413 sc->sc_if_xname, bif->bif_ifp->if_xname, str,
8414 mnr->mnr_ip6_header_len,
8415 mnr->mnr_ip6_icmp6_len, mnr->mnr_ip6_lladdr_offset);
8416 }
8417 }
8418 }
8419
8420 static struct mac_nat_entry *
8421 bridge_mac_nat_ipv6_input(struct bridge_softc *sc, mbuf_t *data)
8422 {
8423 struct in6_addr dst;
8424 uint8_t *header;
8425 struct ether_header *eh;
8426 struct ip6_hdr *ip6h;
8427 struct mac_nat_entry *mne = NULL;
8428
8429 header = get_ether_ipv6_header_ptr(data, 0, FALSE);
8430 if (header == NULL) {
8431 goto done;
8432 }
8433 eh = (struct ether_header *)header;
8434 ip6h = (struct ip6_hdr *)(header + sizeof(*eh));
8435 bcopy(&ip6h->ip6_dst, &dst, sizeof(dst));
8436 /* XXX validate IPv6 address */
8437 if (IN6_IS_ADDR_UNSPECIFIED(&dst)) {
8438 goto done;
8439 }
8440 mne = bridge_lookup_mac_nat_entry_ipv6(sc, &dst);
8441
8442 done:
8443 return mne;
8444 }
8445
8446 static boolean_t
8447 bridge_mac_nat_ipv6_output(struct bridge_softc *sc,
8448 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8449 {
8450 uint8_t *header;
8451 struct ether_header *eh;
8452 ether_addr_t ether_shost;
8453 struct ip6_hdr *ip6h;
8454 struct in6_addr saddr;
8455 boolean_t translate;
8456
8457 translate = (bif == sc->sc_mac_nat_bif) ? FALSE : TRUE;
8458 header = get_ether_ipv6_header_ptr(data, 0, TRUE);
8459 if (header == NULL) {
8460 translate = FALSE;
8461 goto done;
8462 }
8463 eh = (struct ether_header *)header;
8464 bcopy(eh->ether_shost, ðer_shost, sizeof(ether_shost));
8465 ip6h = (struct ip6_hdr *)(header + sizeof(*eh));
8466 bcopy(&ip6h->ip6_src, &saddr, sizeof(saddr));
8467 if (mnr != NULL && ip6h->ip6_nxt == IPPROTO_ICMPV6) {
8468 bridge_mac_nat_icmpv6_output(sc, bif, data, ip6h, &saddr, mnr);
8469 }
8470 if (IN6_IS_ADDR_UNSPECIFIED(&saddr)) {
8471 goto done;
8472 }
8473 (void)bridge_update_mac_nat_entry_ipv6(sc, bif, &saddr,
8474 (const char *)ether_shost.octet);
8475
8476 done:
8477 return translate;
8478 }
8479
8480 /*
8481 * Function: bridge_mac_nat_input:
8482 *
8483 * Purpose:
8484 * Process a unicast packet arriving on the external interface `external_ifp`.
8485 *
8486 * If the packet is ARP, IPv4, or IPv6, lookup the address from the packet in
8487 * the mac_nat_entry table. If an entry is found, and the interface is
8488 * not `external_ifp`, replace the destination MAC address in the
8489 * ethernet header with the corresponding internal MAC address, and return
8490 * the interface via `*dst_if`.
8491 *
8492 * Returns:
8493 * NULL if the packet was deallocated during processing.
8494 *
8495 * Otherwise, returns non-NULL packet that should:
8496 * 1) if `*dst_if` is NULL, continue on as an input packet
8497 * over `external_ifp`, OR
8498 * 2) if `*dst_if` is not NULL, be delivered as an output packet
8499 * over `*dst_if`.
8500 */
8501 static mbuf_t
8502 bridge_mac_nat_input(struct bridge_softc *sc, ifnet_t external_ifp,
8503 mbuf_t m, ifnet_t * dst_if)
8504 {
8505 struct ether_header *eh;
8506 mbuf_t m0 = m;
8507 struct mac_nat_entry *mne = NULL;
8508
8509 BRIDGE_LOCK_ASSERT_HELD(sc);
8510 *dst_if = NULL;
8511 eh = mtod(m, struct ether_header *);
8512 switch (eh->ether_type) {
8513 case HTONS_ETHERTYPE_ARP:
8514 mne = bridge_mac_nat_arp_input(sc, &m);
8515 break;
8516 case HTONS_ETHERTYPE_IP:
8517 mne = bridge_mac_nat_ip_input(sc, &m);
8518 break;
8519 case HTONS_ETHERTYPE_IPV6:
8520 mne = bridge_mac_nat_ipv6_input(sc, &m);
8521 break;
8522 default:
8523 break;
8524 }
8525 if (m != NULL & mne != NULL) {
8526 *dst_if = mne->mne_bif->bif_ifp;
8527 if (*dst_if == external_ifp) {
8528 /* receive packet for ifp */
8529 *dst_if = NULL;
8530 } else {
8531 /* replace the destination MAC with internal one */
8532 if (m != m0) {
8533 /* it may have changed */
8534 eh = mtod(m, struct ether_header *);
8535 }
8536 bcopy(mne->mne_mac, eh->ether_dhost,
8537 sizeof(eh->ether_dhost));
8538 }
8539 }
8540 return m;
8541 }
8542
8543
8544 static mblist
8545 bridge_mac_nat_input_list(struct bridge_softc *sc, ifnet_t external_ifp,
8546 mbuf_t m, mbuf_t * forward_head)
8547 {
8548 mblist forward;
8549 mbuf_t next_packet;
8550 mblist ret;
8551
8552 mblist_init(&ret);
8553 mblist_init(&forward);
8554 for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
8555 ifnet_ref_t dst_if;
8556
8557 /* take packet out of the list */
8558 next_packet = scan->m_nextpkt;
8559 scan->m_nextpkt = NULL;
8560
8561 scan = bridge_mac_nat_input(sc, external_ifp, scan, &dst_if);
8562 if (scan != NULL) {
8563 if (dst_if != NULL) {
8564 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8565 "%s MAC-NAT input translate to %s",
8566 sc->sc_if_xname, dst_if->if_xname);
8567 /* use rcvif to store the egress interface */
8568 mbuf_pkthdr_setrcvif(scan, dst_if);
8569 /* add it to the forwarding list */
8570 mblist_append(&forward, scan);
8571 } else {
8572 /* add it to the "continue on as input" list */
8573 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8574 "%s MAC-NAT input for %s",
8575 sc->sc_if_xname,
8576 external_ifp->if_xname);
8577 mblist_append(&ret, scan);
8578 }
8579 }
8580 }
8581 *forward_head = forward.head;
8582 return ret;
8583 }
8584
8585 /*
8586 * bridge_mac_nat_translate_list:
8587 * Process a list of packets destined to the MAC-NAT interface `dst_if`
8588 * from the bridge member `sbif`.
8589 *
8590 * For each packet in the list, update the MAC-NAT record, and if
8591 * translation is required, translate it.
8592 *
8593 * Returns the list of packets that should be delivered to the MAC-NAT
8594 * interface.
8595 */
8596 static mbuf_t
8597 bridge_mac_nat_translate_list(struct bridge_softc * sc,
8598 struct bridge_iflist *sbif, ifnet_t dst_if, mbuf_t m)
8599 {
8600 mbuf_t next_packet;
8601 mblist ret;
8602
8603 mblist_init(&ret);
8604 for (mbuf_ref_t scan = m; scan != NULL; scan = next_packet) {
8605 struct mac_nat_record mnr;
8606 bool translate_mac;
8607
8608 /* take packet out of the list */
8609 next_packet = scan->m_nextpkt;
8610 scan->m_nextpkt = NULL;
8611 translate_mac = bridge_mac_nat_output(sc, sbif, &scan, &mnr);
8612 if (scan != NULL) {
8613 if (translate_mac) {
8614 bridge_mac_nat_translate(&scan, &mnr,
8615 IF_LLADDR(dst_if));
8616 }
8617 if (scan != NULL) {
8618 /* add it back to the list */
8619 mblist_append(&ret, scan);
8620 }
8621 }
8622 }
8623 return ret.head;
8624 }
8625
8626 /*
8627 * bridge_mac_nat_copy_and_translate_list:
8628 * Same as bridge_mac_nat_translate_list() except that a copy of the
8629 * packet list is returned instead.
8630 *
8631 * The packet list `m` is left unaltered.
8632 */
8633 static mbuf_t
8634 bridge_mac_nat_copy_and_translate_list(struct bridge_softc * sc,
8635 struct bridge_iflist *sbif, ifnet_t dst_if, mbuf_t m)
8636 {
8637 mbuf_t next_packet;
8638 mblist ret;
8639
8640 mblist_init(&ret);
8641 for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
8642 mbuf_ref_t mc = NULL;
8643 struct mac_nat_record mnr;
8644 bool translate_mac;
8645
8646 /* take packet out of the list, make a copy, put it back */
8647 next_packet = scan->m_nextpkt;
8648 scan->m_nextpkt = NULL;
8649 mc = m_dup(scan, M_DONTWAIT);
8650 scan->m_nextpkt = next_packet;
8651 if (mc == NULL) {
8652 continue;
8653 }
8654 translate_mac = bridge_mac_nat_output(sc, sbif, &mc, &mnr);
8655 if (mc != NULL) {
8656 if (translate_mac) {
8657 bridge_mac_nat_translate(&mc, &mnr,
8658 IF_LLADDR(dst_if));
8659 }
8660 if (mc != NULL) {
8661 /* add it to the new list */
8662 mblist_append(&ret, mc);
8663 }
8664 }
8665 }
8666 return ret.head;
8667 }
8668
8669 static void
8670 bridge_mac_nat_forward_list(ifnet_t bridge_ifp, ether_type_flag_t etypef,
8671 mbuf_t m)
8672 {
8673 int count = 0;
8674 ifnet_t dst_if;
8675 mblist list;
8676 int n_lists = 0;
8677 mbuf_t next_packet;
8678
8679 mblist_init(&list);
8680 for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
8681 ifnet_t this_if;
8682
8683 next_packet = scan->m_nextpkt;
8684 this_if = mbuf_pkthdr_rcvif(scan);
8685 mbuf_pkthdr_setrcvif(scan, NULL);
8686 if (list.head == NULL) {
8687 /* start a new list */
8688 list.head = list.tail = scan;
8689 count = 1;
8690 dst_if = this_if;
8691 } else if (dst_if != this_if) {
8692 /* send up the previous chain */
8693 if (list.tail != NULL) {
8694 /* terminate the list */
8695 list.tail->m_nextpkt = NULL;
8696 }
8697 n_lists++;
8698 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8699 "(%s): sublist %u pkts %u",
8700 dst_if->if_xname, n_lists, count);
8701 bridge_enqueue(bridge_ifp, NULL,
8702 dst_if, etypef, list.head,
8703 CHECKSUM_OPERATION_CLEAR_OFFLOAD, pkt_direction_RX);
8704
8705 /* start new list */
8706 list.head = list.tail = scan;
8707 count = 1;
8708 dst_if = this_if;
8709 } else {
8710 count++;
8711 list.tail = scan;
8712 }
8713 if (next_packet == NULL) {
8714 /* last list */
8715 n_lists++;
8716 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8717 "(%s): sublist %u pkts %u",
8718 dst_if->if_xname, n_lists, count);
8719 bridge_enqueue(bridge_ifp, NULL,
8720 dst_if, etypef, list.head,
8721 CHECKSUM_OPERATION_CLEAR_OFFLOAD, pkt_direction_RX);
8722 }
8723 }
8724 return;
8725 }
8726
8727 /*
8728 * bridge_mac_nat_output:
8729 * Process a packet destined to the MAC NAT interface (sc_mac_nat_bif)
8730 * from the interface 'bif'.
8731 *
8732 * Create a mac_nat_entry containing the source IP address and MAC address
8733 * from the packet. Populate a mac_nat_record with information detailing
8734 * how to translate the packet. Translation takes place later by calling
8735 * `bridge_mac_nat_translate()`.
8736 *
8737 * If 'bif' == sc_mac_nat_bif, the stack over the MAC NAT
8738 * interface is generating an output packet. No translation is required in this
8739 * case, we just record the IP address used to prevent another bif from
8740 * claiming our IP address.
8741 *
8742 * Returns:
8743 * TRUE if the packet should be translated (*mnr updated as well),
8744 * FALSE otherwise.
8745 *
8746 * *data may be updated to point at a different mbuf chain or NULL if
8747 * the chain was deallocated during processing.
8748 */
8749
8750 static boolean_t
8751 bridge_mac_nat_output(struct bridge_softc *sc,
8752 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8753 {
8754 struct ether_header *eh;
8755 boolean_t translate = FALSE;
8756
8757 BRIDGE_LOCK_ASSERT_HELD(sc);
8758 assert(sc->sc_mac_nat_bif != NULL);
8759
8760 eh = mtod(*data, struct ether_header *);
8761 if (mnr != NULL) {
8762 bzero(mnr, sizeof(*mnr));
8763 mnr->mnr_ether_type = eh->ether_type;
8764 }
8765 switch (eh->ether_type) {
8766 case HTONS_ETHERTYPE_ARP:
8767 translate = bridge_mac_nat_arp_output(sc, bif, data, mnr);
8768 break;
8769 case HTONS_ETHERTYPE_IP:
8770 translate = bridge_mac_nat_ip_output(sc, bif, data, mnr);
8771 break;
8772 case HTONS_ETHERTYPE_IPV6:
8773 translate = bridge_mac_nat_ipv6_output(sc, bif, data, mnr);
8774 break;
8775 default:
8776 break;
8777 }
8778 return translate;
8779 }
8780
8781 static void
8782 bridge_mac_nat_arp_translate(mbuf_t *data, struct mac_nat_record *mnr,
8783 const char eaddr[ETHER_ADDR_LEN])
8784 {
8785 errno_t error;
8786
8787 if (mnr->mnr_arp_offset == 0) {
8788 return;
8789 }
8790 /* replace the source hardware address */
8791 error = mbuf_copyback(*data, mnr->mnr_arp_offset,
8792 ETHER_ADDR_LEN, eaddr,
8793 MBUF_DONTWAIT);
8794 if (error != 0) {
8795 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8796 "mbuf_copyback failed");
8797 m_drop(*data, DROPTAP_FLAG_DIR_IN,
8798 DROP_REASON_BRIDGE_MAC_NAT_FAILURE, NULL, 0);
8799 *data = NULL;
8800 }
8801 return;
8802 }
8803
8804 static void
8805 bridge_mac_nat_ip_translate(mbuf_t *data, struct mac_nat_record *mnr)
8806 {
8807 errno_t error;
8808 size_t offset;
8809
8810 if (mnr->mnr_ip_header_len == 0) {
8811 return;
8812 }
8813 /* update the UDP checksum */
8814 offset = sizeof(struct ether_header) + mnr->mnr_ip_header_len;
8815 error = mbuf_copyback(*data, offset + offsetof(struct udphdr, uh_sum),
8816 sizeof(mnr->mnr_ip_udp_csum),
8817 &mnr->mnr_ip_udp_csum,
8818 MBUF_DONTWAIT);
8819 if (error != 0) {
8820 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8821 "mbuf_copyback uh_sum failed");
8822 m_drop(*data, DROPTAP_FLAG_DIR_IN,
8823 DROP_REASON_BRIDGE_MAC_NAT_FAILURE, NULL, 0);
8824 *data = NULL;
8825 }
8826 /* update the DHCP must broadcast flag */
8827 offset += sizeof(struct udphdr);
8828 error = mbuf_copyback(*data, offset + offsetof(struct dhcp, dp_flags),
8829 sizeof(mnr->mnr_ip_dhcp_flags),
8830 &mnr->mnr_ip_dhcp_flags,
8831 MBUF_DONTWAIT);
8832 if (error != 0) {
8833 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8834 "mbuf_copyback dp_flags failed");
8835 m_drop(*data, DROPTAP_FLAG_DIR_IN,
8836 DROP_REASON_BRIDGE_MAC_NAT_FAILURE, NULL, 0);
8837 *data = NULL;
8838 }
8839 }
8840
8841 static void
8842 bridge_mac_nat_ipv6_translate(mbuf_t *data, struct mac_nat_record *mnr,
8843 const char eaddr[ETHER_ADDR_LEN])
8844 {
8845 uint16_t cksum;
8846 errno_t error;
8847 mbuf_t m = *data;
8848
8849 if (mnr->mnr_ip6_header_len == 0) {
8850 return;
8851 }
8852 switch (mnr->mnr_ip6_icmp6_type) {
8853 case ND_ROUTER_ADVERT:
8854 case ND_ROUTER_SOLICIT:
8855 case ND_NEIGHBOR_SOLICIT:
8856 case ND_NEIGHBOR_ADVERT:
8857 if (mnr->mnr_ip6_lladdr_offset == 0) {
8858 /* nothing to do */
8859 return;
8860 }
8861 break;
8862 default:
8863 return;
8864 }
8865
8866 /*
8867 * replace the lladdr
8868 */
8869 error = mbuf_copyback(m, mnr->mnr_ip6_lladdr_offset,
8870 ETHER_ADDR_LEN, eaddr,
8871 MBUF_DONTWAIT);
8872 if (error != 0) {
8873 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8874 "mbuf_copyback lladdr failed");
8875 m_drop(m, DROPTAP_FLAG_DIR_IN,
8876 DROP_REASON_BRIDGE_MAC_NAT_FAILURE, NULL, 0);
8877 *data = NULL;
8878 return;
8879 }
8880
8881 /*
8882 * recompute the icmp6 checksum
8883 */
8884
8885 /* skip past the ethernet header */
8886 _mbuf_adjust_pkthdr_and_data(m, ETHER_HDR_LEN);
8887
8888 #define CKSUM_OFFSET_ICMP6 offsetof(struct icmp6_hdr, icmp6_cksum)
8889 /* set the checksum to zero */
8890 cksum = 0;
8891 error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
8892 sizeof(cksum), &cksum, MBUF_DONTWAIT);
8893 if (error != 0) {
8894 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8895 "mbuf_copyback cksum=0 failed");
8896 m_drop(m, DROPTAP_FLAG_DIR_IN,
8897 DROP_REASON_BRIDGE_CHECKSUM, NULL, 0);
8898 *data = NULL;
8899 return;
8900 }
8901 /* compute and set the new checksum */
8902 cksum = in6_cksum(m, IPPROTO_ICMPV6, mnr->mnr_ip6_header_len,
8903 mnr->mnr_ip6_icmp6_len);
8904 error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
8905 sizeof(cksum), &cksum, MBUF_DONTWAIT);
8906 if (error != 0) {
8907 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8908 "mbuf_copyback cksum failed");
8909 m_drop(m, DROPTAP_FLAG_DIR_IN,
8910 DROP_REASON_BRIDGE_CHECKSUM, NULL, 0);
8911 *data = NULL;
8912 return;
8913 }
8914 /* restore the ethernet header */
8915 _mbuf_adjust_pkthdr_and_data(m, -ETHER_HDR_LEN);
8916 return;
8917 }
8918
8919 static void
8920 bridge_mac_nat_translate(mbuf_t *data, struct mac_nat_record *mnr,
8921 const char eaddr[ETHER_ADDR_LEN])
8922 {
8923 struct ether_header *eh;
8924
8925 /* replace the source ethernet address with the single MAC */
8926 eh = mtod(*data, struct ether_header *);
8927 bcopy(eaddr, eh->ether_shost, sizeof(eh->ether_shost));
8928 switch (mnr->mnr_ether_type) {
8929 case HTONS_ETHERTYPE_ARP:
8930 bridge_mac_nat_arp_translate(data, mnr, eaddr);
8931 break;
8932
8933 case HTONS_ETHERTYPE_IP:
8934 bridge_mac_nat_ip_translate(data, mnr);
8935 break;
8936
8937 case HTONS_ETHERTYPE_IPV6:
8938 bridge_mac_nat_ipv6_translate(data, mnr, eaddr);
8939 break;
8940
8941 default:
8942 break;
8943 }
8944 return;
8945 }
8946
8947 /*
8948 * bridge packet filtering
8949 */
8950
8951 /*
8952 * Perform basic checks on header size since
8953 * pfil assumes ip_input has already processed
8954 * it for it. Cut-and-pasted from ip_input.c.
8955 * Given how simple the IPv6 version is,
8956 * does the IPv4 version really need to be
8957 * this complicated?
8958 *
8959 * XXX Should we update ipstat here, or not?
8960 * XXX Right now we update ipstat but not
8961 * XXX csum_counter.
8962 */
8963 static int
8964 bridge_ip_checkbasic(struct mbuf **mp)
8965 {
8966 struct mbuf *m = *mp;
8967 struct ip *ip;
8968 int len, hlen;
8969 u_short sum;
8970
8971 if (*mp == NULL) {
8972 return -1;
8973 }
8974
8975 if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
8976 /* max_linkhdr is already rounded up to nearest 4-byte */
8977 if ((m = m_copyup(m, sizeof(struct ip),
8978 max_linkhdr)) == NULL) {
8979 /* XXXJRT new stat, please */
8980 ipstat.ips_toosmall++;
8981 goto bad;
8982 }
8983 } else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip), 0)) {
8984 if ((m = m_pullup(m, sizeof(struct ip))) == NULL) {
8985 ipstat.ips_toosmall++;
8986 goto bad;
8987 }
8988 }
8989 ip = mtod(m, struct ip *);
8990 if (ip == NULL) {
8991 goto bad;
8992 }
8993
8994 if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
8995 ipstat.ips_badvers++;
8996 goto bad;
8997 }
8998 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
8999 if (hlen < (int)sizeof(struct ip)) { /* minimum header length */
9000 ipstat.ips_badhlen++;
9001 goto bad;
9002 }
9003 if (hlen > m->m_len) {
9004 if ((m = m_pullup(m, hlen)) == 0) {
9005 ipstat.ips_badhlen++;
9006 goto bad;
9007 }
9008 ip = mtod(m, struct ip *);
9009 if (ip == NULL) {
9010 goto bad;
9011 }
9012 }
9013
9014 if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
9015 sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
9016 } else {
9017 if (hlen == sizeof(struct ip)) {
9018 sum = in_cksum_hdr(ip);
9019 } else {
9020 sum = in_cksum(m, hlen);
9021 }
9022 }
9023 if (sum) {
9024 ipstat.ips_badsum++;
9025 goto bad;
9026 }
9027
9028 /* Retrieve the packet length. */
9029 len = ntohs(ip->ip_len);
9030
9031 /*
9032 * Check for additional length bogosity
9033 */
9034 if (len < hlen) {
9035 ipstat.ips_badlen++;
9036 goto bad;
9037 }
9038
9039 /*
9040 * Check that the amount of data in the buffers
9041 * is as at least much as the IP header would have us expect.
9042 * Drop packet if shorter than we expect.
9043 */
9044 if (m->m_pkthdr.len < len) {
9045 ipstat.ips_tooshort++;
9046 goto bad;
9047 }
9048
9049 /* Checks out, proceed */
9050 *mp = m;
9051 return 0;
9052
9053 bad:
9054 *mp = m;
9055 return -1;
9056 }
9057
9058 /*
9059 * Same as above, but for IPv6.
9060 * Cut-and-pasted from ip6_input.c.
9061 * XXX Should we update ip6stat, or not?
9062 */
9063 static int
9064 bridge_ip6_checkbasic(struct mbuf **mp)
9065 {
9066 struct mbuf *m = *mp;
9067 struct ip6_hdr *ip6;
9068
9069 /*
9070 * If the IPv6 header is not aligned, slurp it up into a new
9071 * mbuf with space for link headers, in the event we forward
9072 * it. Otherwise, if it is aligned, make sure the entire base
9073 * IPv6 header is in the first mbuf of the chain.
9074 */
9075 if (IP6_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
9076 struct ifnet *inifp = m->m_pkthdr.rcvif;
9077 /* max_linkhdr is already rounded up to nearest 4-byte */
9078 if ((m = m_copyup(m, sizeof(struct ip6_hdr),
9079 max_linkhdr)) == NULL) {
9080 /* XXXJRT new stat, please */
9081 ip6stat.ip6s_toosmall++;
9082 in6_ifstat_inc(inifp, ifs6_in_hdrerr);
9083 goto bad;
9084 }
9085 } else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip6_hdr), 0)) {
9086 struct ifnet *inifp = m->m_pkthdr.rcvif;
9087 if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
9088 ip6stat.ip6s_toosmall++;
9089 in6_ifstat_inc(inifp, ifs6_in_hdrerr);
9090 goto bad;
9091 }
9092 }
9093
9094 ip6 = mtod(m, struct ip6_hdr *);
9095
9096 if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
9097 ip6stat.ip6s_badvers++;
9098 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
9099 goto bad;
9100 }
9101
9102 /* Checks out, proceed */
9103 *mp = m;
9104 return 0;
9105
9106 bad:
9107 *mp = m;
9108 return -1;
9109 }
9110
9111 /*
9112 * the PF routines expect to be called from ip_input, so we
9113 * need to do and undo here some of the same processing.
9114 *
9115 * XXX : this is heavily inspired on bridge_pfil()
9116 */
9117 static int
9118 bridge_pf(struct mbuf **mp, struct ifnet *ifp, uint32_t sc_filter_flags,
9119 bool input)
9120 {
9121 /*
9122 * XXX : mpetit : heavily inspired by bridge_pfil()
9123 */
9124
9125 int snap, error, i, hlen;
9126 struct ether_header *eh1, eh2;
9127 struct ip *ip;
9128 struct llc llc1;
9129 u_int16_t ether_type;
9130
9131 snap = 0;
9132 error = -1; /* Default error if not error == 0 */
9133
9134 if ((sc_filter_flags & IFBF_FILT_MEMBER) == 0) {
9135 return 0; /* filtering is disabled */
9136 }
9137 i = min((*mp)->m_pkthdr.len, max_protohdr);
9138 if ((*mp)->m_len < i) {
9139 *mp = m_pullup(*mp, i);
9140 if (*mp == NULL) {
9141 BRIDGE_LOG(LOG_NOTICE, 0, "m_pullup failed");
9142 return -1;
9143 }
9144 }
9145
9146 eh1 = mtod(*mp, struct ether_header *);
9147 ether_type = ntohs(eh1->ether_type);
9148
9149 /*
9150 * Check for SNAP/LLC.
9151 */
9152 if (ether_type < ETHERMTU) {
9153 struct llc *llc2 = (struct llc *)(eh1 + 1);
9154
9155 if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
9156 llc2->llc_dsap == LLC_SNAP_LSAP &&
9157 llc2->llc_ssap == LLC_SNAP_LSAP &&
9158 llc2->llc_control == LLC_UI) {
9159 ether_type = htons(llc2->llc_un.type_snap.ether_type);
9160 snap = 1;
9161 }
9162 }
9163
9164 /*
9165 * If we're trying to filter bridge traffic, don't look at anything
9166 * other than IP and ARP traffic. If the filter doesn't understand
9167 * IPv6, don't allow IPv6 through the bridge either. This is lame
9168 * since if we really wanted, say, an AppleTalk filter, we are hosed,
9169 * but of course we don't have an AppleTalk filter to begin with.
9170 * (Note that since pfil doesn't understand ARP it will pass *ALL*
9171 * ARP traffic.)
9172 */
9173 switch (ether_type) {
9174 case ETHERTYPE_ARP:
9175 case ETHERTYPE_REVARP:
9176 return 0; /* Automatically pass */
9177
9178 case ETHERTYPE_IP:
9179 case ETHERTYPE_IPV6:
9180 break;
9181 default:
9182 /*
9183 * Check to see if the user wants to pass non-ip
9184 * packets, these will not be checked by pf and
9185 * passed unconditionally so the default is to drop.
9186 */
9187 if ((sc_filter_flags & IFBF_FILT_ONLYIP)) {
9188 goto bad;
9189 }
9190 break;
9191 }
9192
9193 /* Strip off the Ethernet header and keep a copy. */
9194 m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t)&eh2);
9195 m_adj(*mp, ETHER_HDR_LEN);
9196
9197 /* Strip off snap header, if present */
9198 if (snap) {
9199 m_copydata(*mp, 0, sizeof(struct llc), (caddr_t)&llc1);
9200 m_adj(*mp, sizeof(struct llc));
9201 }
9202
9203 /*
9204 * Check the IP header for alignment and errors
9205 */
9206 switch (ether_type) {
9207 case ETHERTYPE_IP:
9208 error = bridge_ip_checkbasic(mp);
9209 break;
9210 case ETHERTYPE_IPV6:
9211 error = bridge_ip6_checkbasic(mp);
9212 break;
9213 default:
9214 error = 0;
9215 break;
9216 }
9217 if (error) {
9218 goto bad;
9219 }
9220
9221 error = 0;
9222
9223 /*
9224 * Run the packet through pf rules
9225 */
9226 switch (ether_type) {
9227 case ETHERTYPE_IP:
9228 /*
9229 * before calling the firewall, swap fields the same as
9230 * IP does. here we assume the header is contiguous
9231 */
9232 ip = mtod(*mp, struct ip *);
9233
9234 ip->ip_len = ntohs(ip->ip_len);
9235 ip->ip_off = ntohs(ip->ip_off);
9236
9237 if (ifp != NULL) {
9238 error = pf_af_hook(ifp, 0, mp, AF_INET, input, NULL);
9239 }
9240
9241 if (*mp == NULL || error != 0) { /* filter may consume */
9242 break;
9243 }
9244
9245 /* Recalculate the ip checksum and restore byte ordering */
9246 ip = mtod(*mp, struct ip *);
9247 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
9248 if (hlen < (int)sizeof(struct ip)) {
9249 goto bad;
9250 }
9251 if (hlen > (*mp)->m_len) {
9252 if ((*mp = m_pullup(*mp, hlen)) == 0) {
9253 goto bad;
9254 }
9255 ip = mtod(*mp, struct ip *);
9256 if (ip == NULL) {
9257 goto bad;
9258 }
9259 }
9260 ip->ip_len = htons(ip->ip_len);
9261 ip->ip_off = htons(ip->ip_off);
9262 ip->ip_sum = 0;
9263 if (hlen == sizeof(struct ip)) {
9264 ip->ip_sum = in_cksum_hdr(ip);
9265 } else {
9266 ip->ip_sum = in_cksum(*mp, hlen);
9267 }
9268 break;
9269
9270 case ETHERTYPE_IPV6:
9271 if (ifp != NULL) {
9272 error = pf_af_hook(ifp, 0, mp, AF_INET6, input, NULL);
9273 }
9274
9275 if (*mp == NULL || error != 0) { /* filter may consume */
9276 break;
9277 }
9278 break;
9279 default:
9280 error = 0;
9281 break;
9282 }
9283
9284 if (*mp == NULL) {
9285 return error;
9286 }
9287 if (error != 0) {
9288 goto bad;
9289 }
9290
9291 error = -1;
9292
9293 /*
9294 * Finally, put everything back the way it was and return
9295 */
9296 if (snap) {
9297 M_PREPEND(*mp, sizeof(struct llc), M_DONTWAIT, 0);
9298 if (*mp == NULL) {
9299 return error;
9300 }
9301 bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
9302 }
9303
9304 M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT, 0);
9305 if (*mp == NULL) {
9306 return error;
9307 }
9308 bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
9309
9310 return 0;
9311
9312 bad:
9313 m_drop(*mp, DROPTAP_FLAG_DIR_IN, DROP_REASON_BRIDGE_PF, NULL, 0);
9314 *mp = NULL;
9315 return error;
9316 }
9317
9318 #if BRIDGESTP
9319 static void
9320 bridge_bstp_input_list(struct bstp_port *bp, struct mbuf *head)
9321 {
9322 mbuf_t next_packet = NULL;
9323
9324 for (mbuf_t scan = head; scan != NULL; scan = next_packet) {
9325 next_packet = scan->m_nextpkt;
9326 scan->m_nextpkt = NULL;
9327 bstp_input(bp, scan);
9328 }
9329 }
9330 #endif /* BRIDGESTP */
9331
9332 static mblist
9333 bridge_filter_arp_list(struct bridge_iflist * bif, mbuf_t m)
9334 {
9335 mbuf_t next_packet = NULL;
9336 mblist ret;
9337
9338 mblist_init(&ret);
9339 for (mbuf_ref_t scan = m; scan != NULL; scan = next_packet) {
9340 errno_t error;
9341
9342 /* take packet out of the list */
9343 next_packet = scan->m_nextpkt;
9344 scan->m_nextpkt = NULL;
9345 /* filter the ARP packet */
9346 error = bridge_host_filter_arp(bif, &scan);
9347 if (error != 0 && scan != NULL) {
9348 if (BRIDGE_DBGF_ENABLED(BR_DBGF_HOSTFILTER)) {
9349 brlog_mbuf_data(scan, 0,
9350 sizeof(struct ether_header) +
9351 sizeof(struct ip));
9352 }
9353 m_drop(scan, DROPTAP_FLAG_DIR_IN,
9354 DROP_REASON_BRIDGE_HOST_FILTER, NULL, 0);
9355 scan = NULL;
9356 }
9357 if (scan != NULL) {
9358 /* add it to the list */
9359 mblist_append(&ret, scan);
9360 }
9361 }
9362 return ret;
9363 }
9364
9365 static mbuf_t
9366 bridge_filter_checksum(ifnet_t bridge_ifp, struct bridge_iflist * bif, mbuf_t m,
9367 bool is_ipv4, bool host_filter, bool checksum)
9368 {
9369 uint32_t dbgf = 0;
9370 errno_t error;
9371 ip_packet_info info;
9372 u_int mac_hlen = sizeof(struct ether_header);
9373 drop_reason_t drop_reason = DROP_REASON_BRIDGE_UNSPECIFIED;
9374
9375 if (host_filter) {
9376 dbgf |= BR_DBGF_HOSTFILTER;
9377 }
9378 if (checksum) {
9379 dbgf |= BR_DBGF_CHECKSUM;
9380 }
9381 /* get the IP protocol header */
9382 error = bridge_get_ip_proto(&m, mac_hlen, is_ipv4, &info,
9383 &bif->bif_stats.brms_in_ip);
9384 if (error != 0) {
9385 BRIDGE_LOG(LOG_NOTICE, dbgf,
9386 "%s(%s) bridge_get_ip_proto failed %d",
9387 bridge_ifp->if_xname,
9388 bif->bif_ifp->if_xname, error);
9389 drop_reason = DROP_REASON_BRIDGE_NO_PROTO;
9390 goto drop;
9391 }
9392 if (host_filter) {
9393 bool drop = true;
9394
9395 /* restrict IP protocols */
9396 switch (info.ip_proto) {
9397 case IPPROTO_ICMP:
9398 case IPPROTO_IGMP:
9399 drop = !is_ipv4;
9400 break;
9401 case IPPROTO_TCP:
9402 case IPPROTO_UDP:
9403 drop = false;
9404 break;
9405 case IPPROTO_ICMPV6:
9406 drop = is_ipv4;
9407 break;
9408 default:
9409 break;
9410 }
9411 if (drop) {
9412 BRIDGE_HF_DROP(brhf_ip_bad_proto, __func__, __LINE__);
9413 drop_reason = DROP_REASON_BRIDGE_BAD_PROTO;
9414 goto drop;
9415 }
9416 bridge_hostfilter_stats.brhf_ip_ok += 1;
9417 }
9418 if (checksum) {
9419 /* need to compute IP/UDP/TCP/checksums */
9420 error = bridge_offload_checksum(&m, &info, &bif->bif_stats);
9421 if (error != 0) {
9422 BRIDGE_LOG(LOG_NOTICE, dbgf,
9423 "%s(%s) bridge_offload_checksum failed %d",
9424 bridge_ifp->if_xname,
9425 bif->bif_ifp->if_xname, error);
9426 drop_reason = DROP_REASON_BRIDGE_CHECKSUM;
9427 goto drop;
9428 }
9429 }
9430 return m;
9431
9432 drop:
9433 /* toss the packet */
9434 if (m != NULL) {
9435 if (host_filter &&
9436 BRIDGE_DBGF_ENABLED(BR_DBGF_HOSTFILTER)) {
9437 brlog_mbuf_data(m, 0,
9438 sizeof(struct ether_header) +
9439 sizeof(struct ip));
9440 }
9441 m_drop(m, DROPTAP_FLAG_DIR_IN, drop_reason, NULL, 0);
9442 m = NULL;
9443 }
9444 return NULL;
9445 }
9446
9447 static mblist
9448 bridge_filter_checksum_list(ifnet_t bridge_ifp, struct bridge_iflist * bif,
9449 mbuf_t in_list, ether_type_flag_t etypef, bool host_filter, bool checksum)
9450 {
9451 bool is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
9452 mbuf_t next_packet = NULL;
9453 mblist ret;
9454
9455 mblist_init(&ret);
9456 for (mbuf_t scan = in_list; scan != NULL; scan = next_packet) {
9457 /* take packet out of the list */
9458 next_packet = scan->m_nextpkt;
9459 scan->m_nextpkt = NULL;
9460 scan = bridge_filter_checksum(bridge_ifp, bif,
9461 scan, is_ipv4, host_filter, checksum);
9462 if (scan != NULL) {
9463 /* add packet to the list */
9464 mblist_append(&ret, scan);
9465 }
9466 }
9467 return ret;
9468 }
9469
9470 static mbuf_t
9471 bridge_checksum_offload_list(ifnet_t bridge_ifp, struct bridge_iflist * bif,
9472 mbuf_t m, bool is_ipv4)
9473 {
9474 mblist ret;
9475 mbuf_t next_packet;
9476
9477 mblist_init(&ret);
9478 for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
9479 uint32_t csum_flags;
9480
9481 /* take it out of the list */
9482 next_packet = scan->m_nextpkt;
9483 scan->m_nextpkt = NULL;
9484
9485 csum_flags = scan->m_pkthdr.csum_flags;
9486 if ((csum_flags & checksum_request_flags) != 0) {
9487 /* compute the checksum now */
9488 scan = bridge_filter_checksum(bridge_ifp, bif, scan,
9489 is_ipv4, false, true);
9490 if (scan != NULL) {
9491 /* clear offload now */
9492 scan->m_pkthdr.csum_flags &= csum_flags;
9493 }
9494 }
9495 if (scan != NULL) {
9496 mblist_append(&ret, scan);
9497 }
9498 }
9499 return ret.head;
9500 }
9501
9502 static mbuf_t
9503 copy_broadcast_packet(mbuf_t m)
9504 {
9505 mbuf_t mc;
9506
9507 /* make a copy of the packet */
9508 mc = m_dup(m, M_DONTWAIT);
9509 if (mc != NULL) {
9510 struct ether_header *eh;
9511
9512 /* make copy look like it is broadcast */
9513 mc->m_flags |= M_BCAST;
9514 eh = mtod(mc, struct ether_header *);
9515 bcopy(etherbroadcastaddr, eh->ether_dhost, ETHER_ADDR_LEN);
9516 }
9517 return mc;
9518 }
9519
9520 static mblist
9521 bridge_find_broadcast_ipv4(mbuf_t in_list, mbuf_t * ip_bcast_head)
9522 {
9523 mblist ip_bcast;
9524 mbuf_t next_packet = NULL;
9525 mblist ret;
9526
9527 mblist_init(&ret);
9528 mblist_init(&ip_bcast);
9529 for (mbuf_ref_t scan = in_list; scan != NULL; scan = next_packet) {
9530 mbuf_t bcast_pkt = NULL;
9531 uint8_t *header;
9532
9533 /* take packet out of the list */
9534 next_packet = scan->m_nextpkt;
9535 scan->m_nextpkt = NULL;
9536
9537 header = get_ether_ip_header_ptr(&scan, FALSE);
9538 if (header != NULL) {
9539 struct in_addr dst;
9540 struct ip *iphdr;
9541
9542 iphdr = (struct ip *)(header + sizeof(struct ether_header));
9543 bcopy(&iphdr->ip_dst, &dst, sizeof(dst));
9544 if (dst.s_addr == INADDR_BROADCAST) {
9545 bcast_pkt = copy_broadcast_packet(scan);
9546 }
9547 }
9548 if (bcast_pkt != NULL) {
9549 /* add packet to broadcast list */
9550 mblist_append(&ip_bcast, bcast_pkt);
9551 }
9552 if (scan != NULL) {
9553 /* add packet back into the list */
9554 mblist_append(&ret, scan);
9555 }
9556 }
9557 *ip_bcast_head = ip_bcast.head;
9558 return ret;
9559 }
9560
9561 static ifnet_t
9562 bridge_find_member(struct bridge_softc * sc, uint8_t * lladdr,
9563 struct bridge_iflist * sbif)
9564 {
9565 struct bridge_iflist * bif;
9566
9567 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
9568 if (bif == sbif) {
9569 /* skip the input member */
9570 continue;
9571 }
9572 if (_ether_cmp(IF_LLADDR(bif->bif_ifp), lladdr) == 0) {
9573 return bif->bif_ifp;
9574 }
9575 }
9576 return NULL;
9577 }
9578
9579
9580 /*
9581 * Function: bridge_input_list
9582 *
9583 * Purpose:
9584 * Process a list of input packets through the bridge.
9585 * The caller ensures that all of the packets in the list
9586 * `list_head` .. `list_tail` have the same ethernet header.
9587 *
9588 * Returns:
9589 * Non-NULL head of the chain of packets that were not consumed/freed,
9590 * *tail_p set to the tail of that chain.
9591 *
9592 * NULL if all of the packets were consumed.
9593 */
9594 static mblist
9595 bridge_input_list(struct bridge_softc * sc, ifnet_t ifp,
9596 struct ether_header * eh_in_p, mblist list, bool is_promisc)
9597 {
9598 struct bridge_iflist * bif;
9599 ifnet_t bridge_ifp;
9600 bool bridge_needs_input;
9601 bool checksum_offload;
9602 uint8_t * dhost;
9603 #if BRIDGESTP
9604 bool discarding = false;
9605 #endif /* BRIDGESTP */
9606 ifnet_t dst_if = NULL;
9607 errno_t error;
9608 ether_type_flag_t etypef;
9609 bool host_filter;
9610 bool host_filter_drop = false;
9611 mbuf_ref_t ip_bcast = NULL;
9612 bool is_bridge_mac = false;
9613 bool is_broadcast;
9614 bool is_ifp_mac;
9615 ifnet_t member_input = NULL;
9616 uint8_t * shost;
9617 bool uses_virtio = false;
9618 uint16_t vlan;
9619
9620 if (ifp->if_bridge == NULL) {
9621 /* no longer part of bridge */
9622 goto done;
9623 }
9624 bridge_ifp = sc->sc_ifp;
9625 is_broadcast = IS_BCAST_MCAST(list.head);
9626 is_ifp_mac = (!is_broadcast && !is_promisc);
9627 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9628 "%s from %s count %d head 0x%llx.0x%llx tail 0x%llx.0x%llx",
9629 bridge_ifp->if_xname, ifp->if_xname, list.count,
9630 (uint64_t)VM_KERNEL_ADDRPERM(list.head),
9631 (uint64_t)VM_KERNEL_ADDRPERM(mtod(list.head, void *)),
9632 (uint64_t)VM_KERNEL_ADDRPERM(list.tail),
9633 (uint64_t)VM_KERNEL_ADDRPERM(mtod(list.tail, void *)));
9634
9635 /* assume we'll return all packets */
9636 if ((bridge_ifp->if_flags & IFF_RUNNING) == 0) {
9637 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9638 "%s not running passing along",
9639 bridge_ifp->if_xname);
9640 goto done;
9641 }
9642
9643 vlan = VLANTAGOF(m);
9644
9645 /* lookup the bridge member */
9646 BRIDGE_LOCK(sc);
9647 bif = bridge_lookup_member_if(sc, ifp);
9648 if (bif == NULL) {
9649 BRIDGE_UNLOCK(sc);
9650 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9651 "%s bridge_lookup_member_if failed",
9652 bridge_ifp->if_xname);
9653 goto done;
9654 }
9655
9656 uses_virtio = bif_uses_virtio(bif);
9657
9658 /*
9659 * host filter drops packets that:
9660 * - are not ARP, IPv4, or IPv6
9661 * - have incorrect source MAC address
9662 */
9663 host_filter = (bif->bif_flags & BIFF_HOST_FILTER) != 0;
9664 etypef = ether_type_flag_get(eh_in_p->ether_type);
9665 if (host_filter
9666 && (etypef & ETHER_TYPE_FLAG_IP_ARP) == 0) {
9667 /* ether type not one of ARP, IPv4, or IPv6 */
9668 BRIDGE_HF_DROP(brhf_bad_ether_type, __func__, __LINE__);
9669 host_filter_drop = true;
9670 } else if ((bif->bif_flags & BIFF_HF_HWSRC) != 0 &&
9671 bcmp(eh_in_p->ether_shost, bif->bif_hf_hwsrc, ETHER_ADDR_LEN)
9672 != 0) {
9673 /* only allow the single source MAC address */
9674 BRIDGE_HF_DROP(brhf_bad_ether_srchw_addr,
9675 __func__, __LINE__);
9676 host_filter_drop = true;
9677 }
9678 if (host_filter_drop) {
9679 BRIDGE_UNLOCK(sc);
9680 m_drop_list(list.head, bridge_ifp, DROPTAP_FLAG_DIR_IN,
9681 DROP_REASON_BRIDGE_HOST_FILTER, NULL, 0);
9682 list.head = list.tail = NULL;
9683 goto done;
9684 }
9685
9686 #if BRIDGESTP
9687 discarding = (bif->bif_ifflags & IFBIF_STP) != 0 &&
9688 bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING;
9689 #endif /* BRIDGESTP */
9690
9691 dhost = eh_in_p->ether_dhost;
9692 shost = eh_in_p->ether_shost;
9693 /*
9694 * Reserved multicast address listed in 802.1D section 7.12.6
9695 * must not be forwarded by the bridge.
9696 * This is currently 01-80-C2-00-00-00 to 01-80-C2-00-00-0F
9697 */
9698 if (is_broadcast) {
9699 if (IS_MCAST(list.head)) {
9700 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MCAST,
9701 " multicast: "
9702 "%02x:%02x:%02x:%02x:%02x:%02x",
9703 dhost[0], dhost[1],
9704 dhost[2], dhost[3],
9705 dhost[4], dhost[5]);
9706 }
9707 if (bcmp(dhost, bstp_etheraddr, (ETHER_ADDR_LEN - 1)) == 0) {
9708 if (dhost[5] == BSTP_ETHERADDR_RANGE_FIRST) {
9709 /* multicast for spanning tree */
9710 #if BRIDGESTP
9711 bridge_bstp_input_list(&bif->bif_stp, list.head);
9712 #else /* BRIDGESTP */
9713 m_freem_list(list.head);
9714 #endif /* BRIDGESTP */
9715 list.head = list.tail = NULL;
9716 BRIDGE_UNLOCK(sc);
9717 goto done;
9718 }
9719 if (dhost[5] <= BSTP_ETHERADDR_RANGE_LAST) {
9720 /* allow packet to continue up the stack */
9721 BRIDGE_UNLOCK(sc);
9722 goto done;
9723 }
9724 }
9725 /* broadcast to all members */
9726 os_atomic_add(&bridge_ifp->if_imcasts, list.count, relaxed);
9727 }
9728
9729 #if BRIDGESTP
9730 if (discarding) {
9731 BRIDGE_UNLOCK(sc);
9732 goto done;
9733 }
9734 #endif /* BRIDGESTP */
9735
9736 /* If the interface is learning, record the address. */
9737 if ((bif->bif_ifflags & IFBIF_LEARNING) != 0) {
9738 error = bridge_rtupdate(sc, shost, vlan, bif, 0, IFBAF_DYNAMIC);
9739 /*
9740 * If the interface has addresses limits then deny any source
9741 * that is not in the cache.
9742 */
9743 if (error != 0 && bif->bif_addrmax) {
9744 BRIDGE_UNLOCK(sc);
9745 goto done;
9746 }
9747 }
9748 #if BRIDGESTP
9749 if ((bif->bif_ifflags & IFBIF_STP) != 0 &&
9750 bif->bif_stp.bp_state == BSTP_IFSTATE_LEARNING) {
9751 BRIDGE_UNLOCK(sc);
9752 goto done;
9753 }
9754 #endif /* BRIDGESTP */
9755
9756 /*
9757 * If the packet is not IP, let the host filter drop ARP packets.
9758 * Otherwise, if the host filter is enabled or we need to compute
9759 * checksums, do that.
9760 * Otherwise, if MAC-NAT is enabled and this is an IPv4 packet,
9761 * check for IPv4 broadcast packets. Accumulate those in a separate
9762 * list `ip_bcast`.
9763 */
9764 checksum_offload = bif_has_checksum_offload(bif);
9765 if (!ether_type_flag_is_ip(etypef)) {
9766 /* host filter process ARP */
9767 if (host_filter) {
9768 /* host filter check earlier means this must be ARP */
9769 VERIFY(etypef == ETHER_TYPE_FLAG_ARP);
9770 list = bridge_filter_arp_list(bif, list.head);
9771 if (list.head == NULL) {
9772 VERIFY(list.tail == NULL);
9773 BRIDGE_UNLOCK(sc);
9774 goto done;
9775 }
9776 }
9777 } else if (host_filter || checksum_offload) {
9778 /* host filter and/or checksum */
9779 list = bridge_filter_checksum_list(bridge_ifp, bif,
9780 list.head, etypef, host_filter, checksum_offload);
9781 if (list.head == NULL) {
9782 VERIFY(list.tail == NULL);
9783 BRIDGE_UNLOCK(sc);
9784 goto done;
9785 }
9786 } else if (is_ifp_mac && bif == sc->sc_mac_nat_bif &&
9787 etypef == ETHER_TYPE_FLAG_IPV4) {
9788 /* look for broadcast IPv4 packet */
9789 list = bridge_find_broadcast_ipv4(list.head, &ip_bcast);
9790 if (list.head == NULL && ip_bcast == NULL) {
9791 /* all packets were consumed */
9792 BRIDGE_UNLOCK(sc);
9793 goto done;
9794 }
9795 }
9796
9797 /*
9798 * If the bridge has ULP attached, and the destination MAC
9799 * matches the bridge interface, claim the packets for the bridge
9800 * interface.
9801 */
9802 bridge_needs_input = (sc->sc_flags & SCF_PROTO_ATTACHED) != 0;
9803 if (bridge_needs_input &&
9804 !is_broadcast && _ether_cmp(dhost, IF_LLADDR(bridge_ifp)) == 0) {
9805 is_bridge_mac = true;
9806 }
9807 if (is_ifp_mac) {
9808 /* unicast to the interface */
9809 if (sc->sc_mac_nat_bif == bif) {
9810 mbuf_ref_t forward = NULL;
9811
9812 if (list.head != NULL) {
9813 /* handle MAC-NAT if enabled */
9814 list = bridge_mac_nat_input_list(sc, ifp,
9815 list.head, &forward);
9816 }
9817 if (ip_bcast != NULL) {
9818 /* forward to all members except this one */
9819 /* bridge_broadcast_list unlocks */
9820 bridge_broadcast_list(sc, bif, etypef,
9821 ip_bcast, pkt_direction_RX);
9822 } else {
9823 BRIDGE_UNLOCK(sc);
9824 }
9825 if (forward != NULL) {
9826 bridge_mac_nat_forward_list(bridge_ifp, etypef,
9827 forward);
9828 }
9829 } else {
9830 BRIDGE_UNLOCK(sc);
9831 }
9832 /* unicast packets for this interface do not get forwarded */
9833 goto done;
9834 }
9835 if (is_bridge_mac || list.head == NULL) {
9836 BRIDGE_UNLOCK(sc);
9837 goto done;
9838 }
9839 if (!is_broadcast) {
9840 /* find where to send the packet */
9841 dst_if = bridge_rtlookup(sc, dhost, vlan);
9842 if (ifp == dst_if) {
9843 /* nothing to forward */
9844 BRIDGE_UNLOCK(sc);
9845 goto done;
9846 }
9847 if (dst_if == NULL) {
9848 /* if a member is the dhost, deliver as input */
9849 member_input = bridge_find_member(sc, dhost, bif);
9850 if (member_input != NULL) {
9851 /* grab packets destined to member */
9852 BRIDGE_UNLOCK(sc);
9853 goto done;
9854 }
9855 /* if a member is shost, there's a loop, drop it */
9856 if (bridge_find_member(sc, shost, bif) != NULL) {
9857 BRIDGE_UNLOCK(sc);
9858 m_drop_list(list.head, bridge_ifp, DROPTAP_FLAG_DIR_IN,
9859 DROP_REASON_BRIDGE_LOOP, NULL, 0);
9860 list.head = list.tail = NULL;
9861 goto done;
9862 }
9863 }
9864 }
9865 if (dst_if == NULL) {
9866 mbuf_t m;
9867
9868 m = copy_packet_list(list.head);
9869 if (m != NULL) {
9870 /* bridge_broadcast_list unlocks */
9871 bridge_broadcast_list(sc, bif, etypef, m,
9872 pkt_direction_RX);
9873 } else {
9874 BRIDGE_UNLOCK(sc);
9875 }
9876 } else {
9877 /* bridge_forward_list() consumes list and unlocks */
9878 bridge_forward_list(sc, bif, dst_if, etypef, list.head);
9879 list.head = list.tail = NULL;
9880 }
9881
9882 done:
9883 if (list.head != NULL) {
9884 if (member_input != NULL) {
9885 /* member gets the packets */
9886 inject_input_packet_list(member_input, list.head, true);
9887 list.head = list.tail = NULL;
9888 } else if (is_bridge_mac) {
9889 /* bridge consumes all the unicast packets */
9890 bridge_interface_input_list(bridge_ifp, etypef, list,
9891 uses_virtio);
9892 list.head = list.tail = NULL;
9893 } else {
9894 adjust_input_packet_list(list.head);
9895 }
9896 }
9897 return list;
9898 }
9899
9900 static inline void
9901 update_mbuf_flags(struct ifnet * ifp, mbuf_t m, struct ether_header * eh)
9902 {
9903 /* duplicate some of the work done in ether_demux */
9904 if ((eh->ether_dhost[0] & 1) == 0) {
9905 if (_ether_cmp(eh->ether_dhost, IF_LLADDR(ifp)) != 0) {
9906 m->m_flags |= M_PROMISC;
9907 }
9908 } else {
9909 /* Check for broadcast */
9910 if (_ether_cmp(etherbroadcastaddr, eh->ether_dhost) == 0) {
9911 m->m_flags |= M_BCAST;
9912 } else {
9913 m->m_flags |= M_MCAST;
9914 }
9915 }
9916 if (m->m_flags & M_HASFCS) {
9917 /*
9918 * If the M_HASFCS is set by the driver we want to make sure
9919 * that we strip off the trailing FCS data before handing it
9920 * up the stack.
9921 */
9922 m_adj(m, -ETHER_CRC_LEN);
9923 m->m_flags &= ~M_HASFCS;
9924 }
9925 return;
9926 }
9927
9928 static mbuf_t
9929 bridge_pf_list(mbuf_t m, ifnet_t ifp, uint32_t sc_filter_flags, bool input)
9930 {
9931 mbuf_t next_packet = NULL;
9932 mblist ret;
9933
9934 mblist_init(&ret);
9935 for (mbuf_ref_t scan = m; scan != NULL; scan = next_packet) {
9936 next_packet = scan->m_nextpkt;
9937
9938 /* remove packet from list, and pass through PF */
9939 scan->m_nextpkt = NULL;
9940 MBUF_INPUT_CHECK(scan, ifp);
9941 bridge_pf(&scan, ifp, sc_filter_flags, input);
9942 if (scan != NULL) {
9943 /* add packet back to the list */
9944 mblist_append(&ret, scan);
9945 }
9946 }
9947 return ret.head;
9948 }
9949
9950 static inline bool
9951 bridge_check_frame_header(struct bridge_softc * sc, ifnet_t ifp, mbuf_t m)
9952 {
9953 bool included = false;
9954 char * __single header;
9955 size_t header_length = 0;
9956
9957 header = m->m_pkthdr.pkt_hdr;
9958 if (header >= (char *)mbuf_datastart(m) &&
9959 header <= mtod(m, char *)) {
9960 header_length = mtod(m, char *) - header;
9961 if (header_length >= ETHER_HDR_LEN) {
9962 included = true;
9963 }
9964 }
9965 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9966 "%s from %s m 0x%llx data 0x%llx frame 0x%llx %s "
9967 "header length %lu", sc->sc_ifp->if_xname,
9968 ifp->if_xname, (uint64_t)VM_KERNEL_ADDRPERM(m),
9969 (uint64_t)VM_KERNEL_ADDRPERM(mtod(m, void *)),
9970 (uint64_t)VM_KERNEL_ADDRPERM(header),
9971 included ? "inside" : "outside", header_length);
9972 if (!included) {
9973 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9974 "%s: frame_header outside mbuf", ifp->if_xname);
9975 }
9976 return included;
9977 }
9978
9979
9980 mbuf_t
9981 bridge_early_input(struct ifnet *ifp, mbuf_t in_list, u_int32_t cnt)
9982 {
9983 struct ether_header eh;
9984 mblist list;
9985 volatile bool list_is_promisc;
9986 int n_lists = 0;
9987 mbuf_t next_packet = NULL;
9988 mblist ret;
9989 struct bridge_softc * __single sc = ifp->if_bridge;
9990 uint32_t sc_filter_flags;
9991
9992 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT_LIST,
9993 "(%s): count %u", ifp->if_xname, cnt);
9994
9995 /* run packet list through PF first */
9996 sc_filter_flags = sc->sc_filter_flags;
9997 if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
9998 in_list = bridge_pf_list(in_list, ifp, sc_filter_flags, true);
9999 }
10000
10001 /* form sublists with the same ethernet header */
10002 mblist_init(&list);
10003 mblist_init(&ret);
10004 for (mbuf_t scan = in_list; scan != NULL; scan = next_packet) {
10005 struct ether_header * eh_p;
10006 volatile bool is_promisc;
10007 mblist resid;
10008
10009 /* take it out of the list */
10010 next_packet = scan->m_nextpkt;
10011 scan->m_nextpkt = NULL;
10012
10013 /* don't loop the packet */
10014 if ((scan->m_flags & M_PROTO1) != 0) {
10015 mblist_append(&ret, scan);
10016 continue;
10017 }
10018 /* Check if this mbuf looks valid */
10019 MBUF_INPUT_CHECK(scan, ifp);
10020
10021 /* if the frame header isn't in the first mbuf, ignore */
10022 if (!bridge_check_frame_header(sc, ifp, scan)) {
10023 mblist_append(&ret, scan);
10024 continue;
10025 }
10026 eh_p = __unsafe_forge_single(struct ether_header *,
10027 scan->m_pkthdr.pkt_hdr);
10028 update_mbuf_flags(ifp, scan, eh_p);
10029
10030 /* set start back to include ether header */
10031 _mbuf_adjust_pkthdr_and_data(scan, -ETHER_HDR_LEN);
10032
10033 is_promisc = get_and_clear_promisc(scan);
10034 if (list.head == NULL) {
10035 /* start a new list */
10036 mblist_append(&list, scan);
10037 bcopy(eh_p, &eh, sizeof(eh));
10038 list_is_promisc = is_promisc;
10039 } else if (bcmp(eh_p, &eh, sizeof(eh)) != 0) {
10040 n_lists++;
10041 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT_LIST,
10042 "(%s): sublist %u pkts %u",
10043 ifp->if_xname, n_lists, list.count);
10044 if (BRIDGE_DBGF_ENABLED(BR_DBGF_INPUT_LIST)) {
10045 brlog_ether_header(&eh);
10046 }
10047 resid = bridge_input_list(sc, ifp, &eh, list,
10048 list_is_promisc);
10049 if (resid.head != NULL) {
10050 /* add to the packets to be returned */
10051 mblist_append_list(&ret, resid);
10052 }
10053 /* start new list */
10054 mblist_init(&list);
10055 mblist_append(&list, scan);
10056 list_is_promisc = is_promisc;
10057 bcopy(eh_p, &eh, sizeof(eh));
10058 } else {
10059 mblist_append(&list, scan);
10060 VERIFY(is_promisc == list_is_promisc);
10061 }
10062 if (next_packet == NULL) {
10063 /* last list */
10064 n_lists++;
10065 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT_LIST,
10066 "(%s): sublist %u pkts %u",
10067 ifp->if_xname, n_lists, list.count);
10068 if (BRIDGE_DBGF_ENABLED(BR_DBGF_INPUT_LIST)) {
10069 brlog_ether_header(&eh);
10070 }
10071 resid = bridge_input_list(sc, ifp, &eh, list,
10072 list_is_promisc);
10073 if (resid.head != NULL) {
10074 /* add to the packets to be returned */
10075 mblist_append_list(&ret, resid);
10076 }
10077 }
10078 }
10079 return ret.head;
10080 }
10081
10082 /*
10083 * Copyright (C) 2014, Stefano Garzarella - Universita` di Pisa.
10084 * All rights reserved.
10085 *
10086 * Redistribution and use in source and binary forms, with or without
10087 * modification, are permitted provided that the following conditions
10088 * are met:
10089 * 1. Redistributions of source code must retain the above copyright
10090 * notice, this list of conditions and the following disclaimer.
10091 * 2. Redistributions in binary form must reproduce the above copyright
10092 * notice, this list of conditions and the following disclaimer in the
10093 * documentation and/or other materials provided with the distribution.
10094 *
10095 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
10096 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
10097 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
10098 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
10099 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
10100 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
10101 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
10102 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
10103 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
10104 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
10105 * SUCH DAMAGE.
10106 */
10107
10108 /*
10109 * XXX-ste: Maybe this function must be moved into kern/uipc_mbuf.c
10110 *
10111 * Create a queue of packets/segments which fit the given mss + hdr_len.
10112 * m0 points to mbuf chain to be segmented.
10113 * This function splits the payload (m0-> m_pkthdr.len - hdr_len)
10114 * into segments of length MSS bytes and then copy the first hdr_len bytes
10115 * from m0 at the top of each segment.
10116 * If hdr2_buf is not NULL (hdr2_len is the buf length), it is copied
10117 * in each segment after the first hdr_len bytes
10118 *
10119 * Return the new queue with the segments on success, NULL on failure.
10120 * (the mbuf queue is freed in this case).
10121 */
10122
10123 static mblist
10124 m_seg(struct mbuf *m0, int hdr_len, int mss, char * hdr2_buf __sized_by_or_null(hdr2_len), int hdr2_len)
10125 {
10126 int off = 0, n, firstlen;
10127 struct mbuf *mseg;
10128 int total_len = m0->m_pkthdr.len;
10129 mblist ret;
10130
10131 mblist_init(&ret);
10132 mblist_append(&ret, m0);
10133
10134 /*
10135 * Segmentation useless
10136 */
10137 if (total_len <= hdr_len + mss) {
10138 n = 1;
10139 goto done;
10140 }
10141 if (hdr2_buf == NULL || hdr2_len <= 0) {
10142 hdr2_buf = NULL;
10143 hdr2_len = 0;
10144 }
10145
10146 off = hdr_len + mss;
10147 firstlen = mss; /* first segment stored in the original mbuf */
10148 ret.bytes = off;
10149 for (n = 1; off < total_len; off += mss, n++) {
10150 struct mbuf *m;
10151 /*
10152 * Copy the header from the original packet
10153 * and create a new mbuf chain
10154 */
10155 if (MHLEN < hdr_len) {
10156 m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
10157 } else {
10158 m = m_gethdr(M_NOWAIT, MT_DATA);
10159 }
10160
10161 if (m == NULL) {
10162 #ifdef GSO_DEBUG
10163 D("MGETHDR error\n");
10164 #endif
10165 goto err;
10166 }
10167
10168 m_copydata(m0, 0, hdr_len, mtod(m, caddr_t));
10169
10170 m->m_len = hdr_len;
10171 /*
10172 * if the optional header is present, copy it
10173 */
10174 if (hdr2_buf != NULL) {
10175 m_copyback(m, hdr_len, hdr2_len, hdr2_buf);
10176 }
10177
10178 m->m_flags |= (m0->m_flags & M_COPYFLAGS);
10179 if (off + mss >= total_len) { /* last segment */
10180 mss = total_len - off;
10181 }
10182 /*
10183 * Copy the payload from original packet
10184 */
10185 mseg = m_copym(m0, off, mss, M_NOWAIT);
10186 if (mseg == NULL) {
10187 m_freem(m);
10188 #ifdef GSO_DEBUG
10189 D("m_copym error\n");
10190 #endif
10191 goto err;
10192 }
10193 m_cat(m, mseg);
10194
10195 m->m_pkthdr.len = hdr_len + hdr2_len + mss;
10196 m->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
10197 /*
10198 * Copy the checksum flags and data (in_cksum() need this)
10199 */
10200 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
10201 m->m_pkthdr.csum_data = m0->m_pkthdr.csum_data;
10202 m->m_pkthdr.tso_segsz = m0->m_pkthdr.tso_segsz;
10203
10204 mblist_append(&ret, m);
10205 }
10206
10207 /*
10208 * Update first segment.
10209 * If the optional header is present, is necessary
10210 * to insert it into the first segment.
10211 */
10212 if (hdr2_buf == NULL) {
10213 m_adj(m0, hdr_len + firstlen - total_len);
10214 m0->m_pkthdr.len = hdr_len + firstlen;
10215 } else {
10216 mseg = m_copym(m0, hdr_len, firstlen, M_NOWAIT);
10217 if (mseg == NULL) {
10218 #ifdef GSO_DEBUG
10219 D("m_copym error\n");
10220 #endif
10221 goto err;
10222 }
10223 m_adj(m0, hdr_len - total_len);
10224 m_copyback(m0, hdr_len, hdr2_len, hdr2_buf);
10225 m_cat(m0, mseg);
10226 m0->m_pkthdr.len = hdr_len + hdr2_len + firstlen;
10227 }
10228
10229 done:
10230 return ret;
10231
10232 err:
10233 if (ret.head != NULL) {
10234 m_freem_list(ret.head);
10235 mblist_init(&ret);
10236 }
10237 return ret;
10238 }
10239
10240 /*
10241 * Wrappers of IPv4 checksum functions
10242 */
10243 static inline void
10244 gso_ipv4_data_cksum(struct mbuf *m, struct ip *ip, int mac_hlen)
10245 {
10246 m->m_data += mac_hlen;
10247 m->m_len -= mac_hlen;
10248 m->m_pkthdr.len -= mac_hlen;
10249 #if __FreeBSD_version < 1000000
10250 ip->ip_len = ntohs(ip->ip_len); /* needed for in_delayed_cksum() */
10251 #endif
10252
10253 in_delayed_cksum(m);
10254
10255 #if __FreeBSD_version < 1000000
10256 ip->ip_len = htons(ip->ip_len);
10257 #endif
10258 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
10259 m->m_len += mac_hlen;
10260 m->m_pkthdr.len += mac_hlen;
10261 m->m_data -= mac_hlen;
10262 }
10263
10264 static inline void
10265 gso_ipv4_hdr_cksum(struct mbuf *m, struct ip *ip, int mac_hlen, int ip_hlen)
10266 {
10267 m->m_data += mac_hlen;
10268
10269 ip->ip_sum = in_cksum(m, ip_hlen);
10270
10271 m->m_pkthdr.csum_flags &= ~CSUM_IP;
10272 m->m_data -= mac_hlen;
10273 }
10274
10275 /*
10276 * Structure that contains the state during the TCP segmentation
10277 */
10278 struct gso_ip_tcp_state {
10279 void (*update)
10280 (struct gso_ip_tcp_state*, struct mbuf*);
10281 void (*internal)
10282 (struct gso_ip_tcp_state*, struct mbuf*);
10283 u_int ip_m0_len;
10284 uint8_t * __counted_by(ip_m0_len) hdr;
10285 struct tcphdr *tcp;
10286 int mac_hlen;
10287 int ip_hlen;
10288 int tcp_hlen;
10289 int hlen;
10290 int pay_len;
10291 int sw_csum;
10292 uint32_t tcp_seq;
10293 uint16_t ip_id;
10294 boolean_t is_tx;
10295 };
10296
10297 /*
10298 * Update the pointers to TCP and IPv4 headers
10299 */
10300 static inline void
10301 gso_ipv4_tcp_update(struct gso_ip_tcp_state *state, struct mbuf *m)
10302 {
10303 state->hdr = mtodo(m, state->mac_hlen);
10304 state->ip_m0_len = m->m_len - state->mac_hlen;
10305 state->ip_hlen = state->ip_hlen;
10306 state->tcp = (struct tcphdr *)(state->hdr + state->ip_hlen);
10307 state->pay_len = m->m_pkthdr.len - state->hlen;
10308 }
10309
10310 /*
10311 * Set properly the TCP and IPv4 headers
10312 */
10313 static inline void
10314 gso_ipv4_tcp_internal(struct gso_ip_tcp_state *state, struct mbuf *m)
10315 {
10316 struct ip *ip;
10317 /*
10318 * Update IP header
10319 */
10320 ip = (struct ip *)state->hdr;
10321 ip->ip_id = htons((state->ip_id)++);
10322 ip->ip_len = htons(m->m_pkthdr.len - state->mac_hlen);
10323 /*
10324 * TCP Checksum
10325 */
10326 state->tcp->th_sum = 0;
10327 state->tcp->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
10328 htons(state->tcp_hlen + IPPROTO_TCP + state->pay_len));
10329 /*
10330 * Checksum HW not supported (TCP)
10331 */
10332 if (state->sw_csum & CSUM_DELAY_DATA) {
10333 gso_ipv4_data_cksum(m, ip, state->mac_hlen);
10334 }
10335
10336 state->tcp_seq += state->pay_len;
10337 /*
10338 * IP Checksum
10339 */
10340 ip->ip_sum = 0;
10341 /*
10342 * Checksum HW not supported (IP)
10343 */
10344 if (state->sw_csum & CSUM_IP) {
10345 gso_ipv4_hdr_cksum(m, ip, state->mac_hlen, state->ip_hlen);
10346 }
10347 }
10348
10349
10350 /*
10351 * Updates the pointers to TCP and IPv6 headers
10352 */
10353 static inline void
10354 gso_ipv6_tcp_update(struct gso_ip_tcp_state *state, struct mbuf *m)
10355 {
10356 state->hdr = mtodo(m, state->mac_hlen);
10357 state->ip_m0_len = m->m_len - state->mac_hlen;
10358 state->ip_hlen = state->ip_hlen;
10359 state->tcp = (struct tcphdr *)(state->hdr + state->ip_hlen);
10360 state->pay_len = m->m_pkthdr.len - state->hlen;
10361 }
10362
10363 /*
10364 * Sets properly the TCP and IPv6 headers
10365 */
10366 static inline void
10367 gso_ipv6_tcp_internal(struct gso_ip_tcp_state *state, struct mbuf *m)
10368 {
10369 struct ip6_hdr *ip6;
10370
10371 ip6 = (struct ip6_hdr *)state->hdr;
10372 ip6->ip6_plen = htons(m->m_pkthdr.len - state->mac_hlen - state->ip_hlen);
10373 /*
10374 * TCP Checksum
10375 */
10376 state->tcp->th_sum = 0;
10377 state->tcp->th_sum = in6_pseudo(&ip6->ip6_src, &ip6->ip6_dst,
10378 htonl(state->tcp_hlen + state->pay_len + IPPROTO_TCP));
10379 /*
10380 * Checksum HW not supported (TCP)
10381 */
10382 if (state->sw_csum & CSUM_DELAY_IPV6_DATA) {
10383 (void)in6_finalize_cksum(m, state->mac_hlen, -1, -1, state->sw_csum);
10384 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA;
10385 }
10386 state->tcp_seq += state->pay_len;
10387 }
10388
10389 /*
10390 * Init the state during the TCP segmentation
10391 */
10392 static void
10393 gso_ip_tcp_init_state(struct gso_ip_tcp_state *state, struct ifnet *ifp,
10394 bool is_ipv4, int mac_hlen, int ip_hlen,
10395 uint8_t *__counted_by(ip_m0_len) ip_hdr, u_int ip_m0_len,
10396 struct tcphdr * tcp_hdr)
10397 {
10398 #pragma unused(ifp)
10399
10400 state->hdr = ip_hdr;
10401 state->ip_m0_len = ip_m0_len;
10402 state->ip_hlen = ip_hlen;
10403 state->tcp = tcp_hdr;
10404 if (is_ipv4) {
10405 state->ip_id = ntohs(((struct ip *)state->hdr)->ip_id);
10406 state->update = gso_ipv4_tcp_update;
10407 state->internal = gso_ipv4_tcp_internal;
10408 state->sw_csum = CSUM_DELAY_DATA | CSUM_IP; /* XXX */
10409 } else {
10410 state->update = gso_ipv6_tcp_update;
10411 state->internal = gso_ipv6_tcp_internal;
10412 state->sw_csum = CSUM_DELAY_IPV6_DATA; /* XXX */
10413 }
10414 state->mac_hlen = mac_hlen;
10415 state->tcp_hlen = state->tcp->th_off << 2;
10416 state->hlen = mac_hlen + ip_hlen + state->tcp_hlen;
10417 state->tcp_seq = ntohl(state->tcp->th_seq);
10418 //state->sw_csum = m->m_pkthdr.csum_flags & ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
10419 return;
10420 }
10421
10422 /*
10423 * GSO on TCP/IP (v4 or v6)
10424 *
10425 * Segment the given mbuf and return the list of packets.
10426 *
10427 */
10428 static mblist
10429 gso_ip_tcp(ifnet_t ifp, mbuf_t m0, struct gso_ip_tcp_state *state, bool is_tx)
10430 {
10431 struct mbuf *m;
10432 int orig_mss;
10433 int mss = 0;
10434 #ifdef GSO_STATS
10435 int total_len = m0->m_pkthdr.len;
10436 #endif /* GSO_STATS */
10437 mblist seg;
10438 bool tso_with_gso = false;
10439
10440 orig_mss = mss = _mbuf_get_tso_mss(m0);
10441 if (mss == 0 && !is_tx) {
10442 uint8_t seg_cnt = m0->m_pkthdr.rx_seg_cnt;
10443
10444 if (seg_cnt != 0) {
10445 uint32_t hdr_len;
10446 uint32_t len;
10447
10448 /* approximate the MSS using LRO seg cnt */
10449 hdr_len = state->ip_hlen + state->tcp_hlen;
10450 len = mbuf_pkthdr_len(m0) - hdr_len - ETHER_HDR_LEN;
10451 mss = len / seg_cnt;
10452 m0->m_pkthdr.rx_seg_cnt = 0;
10453 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
10454 "%s: mss %d = len %d / seg cnt %d",
10455 ifp->if_xname, mss, len, seg_cnt);
10456 }
10457 }
10458 if (mss == 0) {
10459 /* hack: we don't have the actual MSS */
10460 u_int reduce_mss;
10461
10462 reduce_mss = is_tx ? if_bridge_tso_reduce_mss_tx
10463 : if_bridge_tso_reduce_mss_forwarding;
10464 mss = ifp->if_mtu - state->ip_hlen - state->tcp_hlen -
10465 reduce_mss;
10466 assert(mss > 0);
10467 } else if (is_tx) {
10468 bool is_ipv4;
10469 bool do_tso = true;
10470
10471 if (TSO_IPV4_OK(ifp, m0)) {
10472 is_ipv4 = true;
10473 } else if (TSO_IPV6_OK(ifp, m0)) {
10474 is_ipv4 = false;
10475 } else {
10476 do_tso = false;
10477 }
10478 if (do_tso) { /* TSO with GSO */
10479 uint32_t if_tso_max;
10480
10481 if_tso_max = get_if_tso_mtu(ifp, is_ipv4);
10482 mss = if_tso_max - state->ip_hlen - state->tcp_hlen
10483 - ETHER_HDR_LEN;
10484 tso_with_gso = true;
10485 }
10486 }
10487 if (!tso_with_gso) {
10488 /* clear TSO flags */
10489 m0->m_pkthdr.csum_flags &= ~_TSO_CSUM;
10490 }
10491 seg = m_seg(m0, state->hlen, mss, 0, 0);
10492 if (seg.head == NULL || seg.head->m_nextpkt == NULL) {
10493 return seg;
10494 }
10495 if (tso_with_gso) {
10496 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
10497 "%s TX gso size %d mss %d nsegs %d",
10498 ifp->if_xname,
10499 mss, orig_mss, seg.count);
10500 } else {
10501 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
10502 "%s %s mss %d nsegs %d",
10503 ifp->if_xname,
10504 is_tx ? "TX" : "RX",
10505 mss, seg.count);
10506 }
10507 #ifdef GSO_STATS
10508 GSOSTAT_SET_MAX(tcp.gsos_max_mss, mss);
10509 GSOSTAT_SET_MIN(tcp.gsos_min_mss, mss);
10510 GSOSTAT_ADD(tcp.gsos_osegments, seg.count);
10511 #endif /* GSO_STATS */
10512
10513 /* first pkt */
10514 VERIFY(seg.head == m0);
10515 m = m0;
10516
10517 state->update(state, m);
10518
10519 do {
10520 state->tcp->th_flags &= ~(TH_FIN | TH_PUSH);
10521
10522 state->internal(state, m);
10523 m = m->m_nextpkt;
10524 state->update(state, m);
10525 state->tcp->th_flags &= ~TH_CWR;
10526 state->tcp->th_seq = htonl(state->tcp_seq);
10527 } while (m->m_nextpkt);
10528
10529 /* last pkt */
10530 state->internal(state, m);
10531
10532 #ifdef GSO_STATS
10533 if (!error) {
10534 GSOSTAT_INC(tcp.gsos_segmented);
10535 GSOSTAT_SET_MAX(tcp.gsos_maxsegmented, total_len);
10536 GSOSTAT_SET_MIN(tcp.gsos_minsegmented, total_len);
10537 GSOSTAT_ADD(tcp.gsos_totalbyteseg, total_len);
10538 }
10539 #endif /* GSO_STATS */
10540 return seg;
10541 }
10542
10543 /*
10544 * GSO for TCP/IPv[46]
10545 */
10546 static mblist
10547 gso_tcp_with_info(ifnet_t ifp, mbuf_t m, ip_packet_info_t info_p,
10548 u_int mac_hlen, bool is_ipv4, bool is_tx)
10549 {
10550 uint32_t csum_flags;
10551 struct gso_ip_tcp_state state;
10552 struct tcphdr *tcp;
10553
10554 assert(info_p->ip_proto_hdr != NULL);
10555 tcp = (struct tcphdr *)(void *)info_p->ip_proto_hdr;
10556 gso_ip_tcp_init_state(&state, ifp, is_ipv4, mac_hlen,
10557 info_p->ip_hlen + info_p->ip_opt_len,
10558 info_p->ip_hdr, info_p->ip_m0_len, tcp);
10559 csum_flags = is_ipv4 ? CSUM_DELAY_DATA : CSUM_DELAY_IPV6_DATA; /* XXX */
10560 m->m_pkthdr.csum_flags |= csum_flags;
10561 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
10562 return gso_ip_tcp(ifp, m, &state, is_tx);
10563 }
10564
10565 static mblist
10566 gso_tcp(ifnet_t ifp, mbuf_t m, u_int mac_hlen, bool is_ipv4, bool is_tx)
10567 {
10568 int error;
10569 ip_packet_info info;
10570 struct bripstats stats; /* XXX ignored */
10571 mblist ret;
10572
10573 error = bridge_get_tcp_header(&m, mac_hlen, is_ipv4, &info, &stats);
10574 if (error != 0) {
10575 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
10576 "%s bridge_get_tcp_header failed %d (%s)",
10577 ifp->if_xname, error,
10578 is_tx ? "TX" : "RX");
10579 if (m != NULL) {
10580 m_drop(m, DROPTAP_FLAG_DIR_IN,
10581 DROP_REASON_BRIDGE_CHECKSUM, NULL, 0);
10582 m = NULL;
10583 }
10584 goto no_segment;
10585 }
10586 if (info.ip_proto_hdr == NULL) {
10587 /* not actually a TCP packet, no segmentation */
10588 goto no_segment;
10589 }
10590 if (!is_tx && ip_packet_info_dst_is_our_ip(&info, ifp->if_index)) {
10591 goto no_segment;
10592 }
10593 return gso_tcp_with_info(ifp, m, &info, mac_hlen, is_ipv4, is_tx);
10594
10595 no_segment:
10596 mblist_init(&ret);
10597 if (m != NULL) {
10598 mblist_append(&ret, m);
10599 }
10600 return ret;
10601 }
10602