1 /*
2 * Copyright (c) 2004-2025 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /* $NetBSD: if_bridge.c,v 1.31 2005/06/01 19:45:34 jdc Exp $ */
30 /*
31 * Copyright 2001 Wasabi Systems, Inc.
32 * All rights reserved.
33 *
34 * Written by Jason R. Thorpe for Wasabi Systems, Inc.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed for the NetBSD Project by
47 * Wasabi Systems, Inc.
48 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
49 * or promote products derived from this software without specific prior
50 * written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
54 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
55 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
56 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
57 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
58 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
59 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
60 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
61 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
62 * POSSIBILITY OF SUCH DAMAGE.
63 */
64
65 /*
66 * Copyright (c) 1999, 2000 Jason L. Wright ([email protected])
67 * All rights reserved.
68 *
69 * Redistribution and use in source and binary forms, with or without
70 * modification, are permitted provided that the following conditions
71 * are met:
72 * 1. Redistributions of source code must retain the above copyright
73 * notice, this list of conditions and the following disclaimer.
74 * 2. Redistributions in binary form must reproduce the above copyright
75 * notice, this list of conditions and the following disclaimer in the
76 * documentation and/or other materials provided with the distribution.
77 *
78 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
79 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
80 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
81 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
82 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
83 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
84 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
86 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
87 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
88 * POSSIBILITY OF SUCH DAMAGE.
89 *
90 * OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp
91 */
92
93 /*
94 * Network interface bridge support.
95 *
96 * TODO:
97 *
98 * - Currently only supports Ethernet-like interfaces (Ethernet,
99 * 802.11, VLANs on Ethernet, etc.) Figure out a nice way
100 * to bridge other types of interfaces (FDDI-FDDI, and maybe
101 * consider heterogenous bridges).
102 *
103 * - GIF isn't handled due to the lack of IPPROTO_ETHERIP support.
104 */
105
106 #include <sys/cdefs.h>
107
108 #include <sys/param.h>
109 #include <sys/mbuf.h>
110 #include <sys/malloc.h>
111 #include <sys/protosw.h>
112 #include <sys/systm.h>
113 #include <sys/time.h>
114 #include <sys/socket.h> /* for net/if.h */
115 #include <sys/sockio.h>
116 #include <sys/kernel.h>
117 #include <sys/random.h>
118 #include <sys/syslog.h>
119 #include <sys/sysctl.h>
120 #include <sys/proc.h>
121 #include <sys/lock.h>
122 #include <sys/mcache.h>
123
124 #include <sys/kauth.h>
125
126 #include <kern/thread_call.h>
127
128 #include <libkern/libkern.h>
129
130 #include <kern/zalloc.h>
131
132 #if NBPFILTER > 0
133 #include <net/bpf.h>
134 #endif
135 #include <net/if.h>
136 #include <net/if_dl.h>
137 #include <net/if_types.h>
138 #include <net/if_var.h>
139 #include <net/if_media.h>
140 #include <net/net_api_stats.h>
141
142 #include <netinet/in.h> /* for struct arpcom */
143 #include <netinet/tcp.h> /* for struct tcphdr */
144 #include <netinet/in_systm.h>
145 #include <netinet/in_var.h>
146 #define _IP_VHL
147 #include <netinet/ip.h>
148 #include <netinet/ip_var.h>
149 #include <netinet/ip6.h>
150 #include <netinet6/ip6_var.h>
151 #include <netinet/if_ether.h> /* for struct arpcom */
152 #include <net/bridgestp.h>
153 #include <net/if_bridgevar.h>
154 #include <net/if_llc.h>
155 #if NVLAN > 0
156 #include <net/if_vlan_var.h>
157 #endif /* NVLAN > 0 */
158
159 #include <net/if_ether.h>
160 #include <net/dlil.h>
161 #include <net/kpi_interfacefilter.h>
162 #include <net/pfvar.h>
163
164 #include <net/route.h>
165 #include <dev/random/randomdev.h>
166
167 #include <netinet/bootp.h>
168 #include <netinet/dhcp.h>
169
170 #if SKYWALK
171 #include <skywalk/nexus/netif/nx_netif.h>
172 #endif /* SKYWALK */
173
174 #include <net/sockaddr_utils.h>
175 #include <net/mblist.h>
176
177 #include <os/log.h>
178
179 #define _TSO_CSUM (CSUM_TSO_IPV4 | CSUM_TSO_IPV6)
180
181 static struct in_addr inaddr_any = { .s_addr = INADDR_ANY };
182
183
184 #define __M_FLAGS_ARE_SET(m, flags) (((m)->m_flags & (flags)) != 0)
185 #define IS_BCAST(m) __M_FLAGS_ARE_SET(m, M_BCAST)
186 #define IS_MCAST(m) __M_FLAGS_ARE_SET(m, M_MCAST)
187 #define IS_BCAST_MCAST(m) __M_FLAGS_ARE_SET(m, M_BCAST | M_MCAST)
188
189 #define HTONS_ETHERTYPE_ARP htons(ETHERTYPE_ARP)
190 #define HTONS_ETHERTYPE_IP htons(ETHERTYPE_IP)
191 #define HTONS_ETHERTYPE_IPV6 htons(ETHERTYPE_IPV6)
192 #define HTONS_ARPHRD_ETHER htons(ARPHRD_ETHER)
193 #define HTONS_ARPOP_REQUEST htons(ARPOP_REQUEST)
194 #define HTONS_ARPOP_REPLY htons(ARPOP_REPLY)
195 #define HTONS_IPPORT_BOOTPC htons(IPPORT_BOOTPC)
196 #define HTONS_IPPORT_BOOTPS htons(IPPORT_BOOTPS)
197 #define HTONS_DHCP_FLAGS_BROADCAST htons(DHCP_FLAGS_BROADCAST)
198
199 /*
200 * if_bridge_debug, BR_DBGF_*
201 * - 'if_bridge_debug' is a bitmask of BR_DBGF_* flags that can be set
202 * to enable additional logs for the corresponding bridge function
203 * - "sysctl net.link.bridge.debug" controls the value of
204 * 'if_bridge_debug'
205 */
206 static uint32_t if_bridge_debug = 0;
207 #define BR_DBGF_LIFECYCLE 0x0001
208 #define BR_DBGF_INPUT 0x0002
209 #define BR_DBGF_OUTPUT 0x0004
210 #define BR_DBGF_RT_TABLE 0x0008
211 #define BR_DBGF_DELAYED_CALL 0x0010
212 #define BR_DBGF_IOCTL 0x0020
213 #define BR_DBGF_MBUF 0x0040
214 #define BR_DBGF_MCAST 0x0080
215 #define BR_DBGF_HOSTFILTER 0x0100
216 #define BR_DBGF_CHECKSUM 0x0200
217 #define BR_DBGF_MAC_NAT 0x0400
218 #define BR_DBGF_INPUT_LIST 0x0800
219
220 /*
221 * if_bridge_log_level
222 * - 'if_bridge_log_level' ensures that by default important logs are
223 * logged regardless of if_bridge_debug by comparing the log level
224 * in BRIDGE_LOG to if_bridge_log_level
225 * - use "sysctl net.link.bridge.log_level" controls the value of
226 * 'if_bridge_log_level'
227 * - the default value of 'if_bridge_log_level' is LOG_NOTICE; important
228 * logs must use LOG_NOTICE to ensure they appear by default
229 */
230 static int if_bridge_log_level = LOG_NOTICE;
231
232 #define BRIDGE_DBGF_ENABLED(__flag) ((if_bridge_debug & __flag) != 0)
233
234 /*
235 * BRIDGE_LOG, BRIDGE_LOG_SIMPLE
236 * - macros to generate the specified log conditionally based on
237 * the specified log level and debug flags
238 * - BRIDGE_LOG_SIMPLE does not include the function name in the log
239 */
240 #define BRIDGE_LOG(__level, __dbgf, __string, ...) \
241 do { \
242 if (__level <= if_bridge_log_level || \
243 BRIDGE_DBGF_ENABLED(__dbgf)) { \
244 os_log(OS_LOG_DEFAULT, "%s: " __string, \
245 __func__, ## __VA_ARGS__); \
246 } \
247 } while (0)
248 #define BRIDGE_LOG_SIMPLE(__level, __dbgf, __string, ...) \
249 do { \
250 if (__level <= if_bridge_log_level || \
251 BRIDGE_DBGF_ENABLED(__dbgf)) { \
252 os_log(OS_LOG_DEFAULT, __string, ## __VA_ARGS__); \
253 } \
254 } while (0)
255
256 #define _BRIDGE_LOCK(_sc) lck_mtx_lock(&(_sc)->sc_mtx)
257 #define _BRIDGE_UNLOCK(_sc) lck_mtx_unlock(&(_sc)->sc_mtx)
258 #define BRIDGE_LOCK_ASSERT_HELD(_sc) \
259 LCK_MTX_ASSERT(&(_sc)->sc_mtx, LCK_MTX_ASSERT_OWNED)
260 #define BRIDGE_LOCK_ASSERT_NOTHELD(_sc) \
261 LCK_MTX_ASSERT(&(_sc)->sc_mtx, LCK_MTX_ASSERT_NOTOWNED)
262
263 #define BRIDGE_LOCK_DEBUG 1
264 #if BRIDGE_LOCK_DEBUG
265
266 #define BR_LCKDBG_MAX 4
267
268 #define BRIDGE_LOCK(_sc) bridge_lock(_sc)
269 #define BRIDGE_UNLOCK(_sc) bridge_unlock(_sc)
270 #define BRIDGE_LOCK2REF(_sc, _err) _err = bridge_lock2ref(_sc)
271 #define BRIDGE_UNREF(_sc) bridge_unref(_sc)
272 #define BRIDGE_XLOCK(_sc) bridge_xlock(_sc)
273 #define BRIDGE_XDROP(_sc) bridge_xdrop(_sc)
274
275 #else /* !BRIDGE_LOCK_DEBUG */
276
277 #define BRIDGE_LOCK(_sc) _BRIDGE_LOCK(_sc)
278 #define BRIDGE_UNLOCK(_sc) _BRIDGE_UNLOCK(_sc)
279 #define BRIDGE_LOCK2REF(_sc, _err) do { \
280 BRIDGE_LOCK_ASSERT_HELD(_sc); \
281 if ((_sc)->sc_iflist_xcnt > 0) \
282 (_err) = EBUSY; \
283 else { \
284 (_sc)->sc_iflist_ref++; \
285 (_err) = 0; \
286 } \
287 _BRIDGE_UNLOCK(_sc); \
288 } while (0)
289 #define BRIDGE_UNREF(_sc) do { \
290 _BRIDGE_LOCK(_sc); \
291 (_sc)->sc_iflist_ref--; \
292 if (((_sc)->sc_iflist_xcnt > 0) && ((_sc)->sc_iflist_ref == 0)) { \
293 _BRIDGE_UNLOCK(_sc); \
294 wakeup(&(_sc)->sc_cv); \
295 } else \
296 _BRIDGE_UNLOCK(_sc); \
297 } while (0)
298 #define BRIDGE_XLOCK(_sc) do { \
299 BRIDGE_LOCK_ASSERT_HELD(_sc); \
300 (_sc)->sc_iflist_xcnt++; \
301 while ((_sc)->sc_iflist_ref > 0) \
302 msleep(&(_sc)->sc_cv, &(_sc)->sc_mtx, PZERO, \
303 "BRIDGE_XLOCK", NULL); \
304 } while (0)
305 #define BRIDGE_XDROP(_sc) do { \
306 BRIDGE_LOCK_ASSERT_HELD(_sc); \
307 (_sc)->sc_iflist_xcnt--; \
308 } while (0)
309
310 #endif /* BRIDGE_LOCK_DEBUG */
311
312 #define BRIDGE_BPF_TAP_IN(ifp, m) \
313 do { \
314 if (ifp->if_bpf != NULL) { \
315 bpf_tap_in(ifp, DLT_EN10MB, m, NULL, 0); \
316 } \
317 } while(0)
318
319 #define BRIDGE_BPF_TAP_OUT(ifp, m) \
320 do { \
321 if (ifp->if_bpf != NULL) { \
322 bpf_tap_out(ifp, DLT_EN10MB, m, NULL, 0); \
323 } \
324 } while(0)
325
326
327 /*
328 * Initial size of the route hash table. Must be a power of two.
329 */
330 #ifndef BRIDGE_RTHASH_SIZE
331 #define BRIDGE_RTHASH_SIZE 16
332 #endif
333
334 /*
335 * Maximum size of the routing hash table
336 */
337 #define BRIDGE_RTHASH_SIZE_MAX 2048
338
339 #define BRIDGE_RTHASH_MASK(sc) ((sc)->sc_rthash_size - 1)
340
341 /*
342 * Maximum number of addresses to cache.
343 */
344 #ifndef BRIDGE_RTABLE_MAX
345 #define BRIDGE_RTABLE_MAX 100
346 #endif
347
348 /*
349 * Timeout (in seconds) for entries learned dynamically.
350 */
351 #ifndef BRIDGE_RTABLE_TIMEOUT
352 #define BRIDGE_RTABLE_TIMEOUT (20 * 60) /* same as ARP */
353 #endif
354
355 /*
356 * Number of seconds between walks of the route list.
357 */
358 #ifndef BRIDGE_RTABLE_PRUNE_PERIOD
359 #define BRIDGE_RTABLE_PRUNE_PERIOD (5 * 60)
360 #endif
361
362 /*
363 * Number of MAC NAT entries
364 * - sized based on 16 clients (including MAC NAT interface)
365 * each with 4 addresses
366 */
367 #ifndef BRIDGE_MAC_NAT_ENTRY_MAX
368 #define BRIDGE_MAC_NAT_ENTRY_MAX 64
369 #endif /* BRIDGE_MAC_NAT_ENTRY_MAX */
370
371 /*
372 * List of capabilities to possibly mask on the member interface.
373 */
374 #define BRIDGE_IFCAPS_MASK (IFCAP_TSO | IFCAP_TXCSUM)
375 /*
376 * List of capabilities to disable on the member interface.
377 */
378 #define BRIDGE_IFCAPS_STRIP IFCAP_LRO
379
380 /*
381 * Bridge interface list entry.
382 */
383 struct bridge_iflist {
384 TAILQ_ENTRY(bridge_iflist) bif_next;
385 struct ifnet *bif_ifp; /* member if */
386 struct bstp_port bif_stp; /* STP state */
387 uint32_t bif_ifflags; /* member if flags */
388 int bif_savedcaps; /* saved capabilities */
389 uint32_t bif_addrmax; /* max # of addresses */
390 uint32_t bif_addrcnt; /* cur. # of addresses */
391 uint32_t bif_addrexceeded; /* # of address violations */
392
393 interface_filter_t bif_iff_ref;
394 struct bridge_softc *bif_sc;
395 uint32_t bif_flags;
396
397 /* host filter */
398 struct in_addr bif_hf_ipsrc;
399 uint8_t bif_hf_hwsrc[ETHER_ADDR_LEN];
400
401 struct ifbrmstats bif_stats;
402 };
403
404 static inline bool
bif_ifflags_are_set(struct bridge_iflist * bif,uint32_t flags)405 bif_ifflags_are_set(struct bridge_iflist * bif, uint32_t flags)
406 {
407 return (bif->bif_ifflags & flags) != 0;
408 }
409
410 static inline bool
bif_has_checksum_offload(struct bridge_iflist * bif)411 bif_has_checksum_offload(struct bridge_iflist * bif)
412 {
413 return bif_ifflags_are_set(bif, IFBIF_CHECKSUM_OFFLOAD);
414 }
415
416 static inline bool
bif_has_mac_nat(struct bridge_iflist * bif)417 bif_has_mac_nat(struct bridge_iflist * bif)
418 {
419 return bif_ifflags_are_set(bif, IFBIF_MAC_NAT);
420 }
421
422 static inline bool
bif_uses_virtio(struct bridge_iflist * bif)423 bif_uses_virtio(struct bridge_iflist * bif)
424 {
425 return bif_ifflags_are_set(bif, IFBIF_USES_VIRTIO);
426 }
427
428 /* fake errors to make the code clearer */
429 #define _EBADIP EJUSTRETURN
430 #define _EBADIPCHECKSUM EJUSTRETURN
431 #define _EBADIPV6 EJUSTRETURN
432 #define _EBADUDP EJUSTRETURN
433 #define _EBADTCP EJUSTRETURN
434 #define _EBADUDPCHECKSUM EJUSTRETURN
435 #define _EBADTCPCHECKSUM EJUSTRETURN
436
437 #define BIFF_PROMISC 0x01 /* promiscuous mode set */
438 #define BIFF_PROTO_ATTACHED 0x02 /* protocol attached */
439 #define BIFF_FILTER_ATTACHED 0x04 /* interface filter attached */
440 #define BIFF_MEDIA_ACTIVE 0x08 /* interface media active */
441 #define BIFF_HOST_FILTER 0x10 /* host filter enabled */
442 #define BIFF_HF_HWSRC 0x20 /* host filter source MAC is set */
443 #define BIFF_HF_IPSRC 0x40 /* host filter source IP is set */
444 #define BIFF_INPUT_BROADCAST 0x80 /* send broadcast packets in */
445 #define BIFF_IN_MEMBER_LIST 0x100 /* added to the member list */
446 #define BIFF_WIFI_INFRA 0x200 /* interface is Wi-Fi infra */
447 #define BIFF_ALL_MULTI 0x400 /* allmulti set */
448 #define BIFF_LRO_DISABLED 0x800 /* LRO was disabled */
449 #if SKYWALK
450 #define BIFF_FLOWSWITCH_ATTACHED 0x1000 /* we attached the flowswitch */
451 #define BIFF_NETAGENT_REMOVED 0x2000 /* we removed the netagent */
452 #endif /* SKYWALK */
453
454 /*
455 * mac_nat_entry
456 * - translates between an IP address and MAC address on a specific
457 * bridge interface member
458 */
459 struct mac_nat_entry {
460 LIST_ENTRY(mac_nat_entry) mne_list; /* list linkage */
461 struct bridge_iflist *mne_bif; /* originating interface */
462 unsigned long mne_expire; /* expiration time */
463 union {
464 struct in_addr mneu_ip; /* originating IPv4 address */
465 struct in6_addr mneu_ip6; /* originating IPv6 address */
466 } mne_u;
467 uint8_t mne_mac[ETHER_ADDR_LEN];
468 uint8_t mne_flags;
469 uint8_t mne_reserved;
470 };
471 #define mne_ip mne_u.mneu_ip
472 #define mne_ip6 mne_u.mneu_ip6
473
474 #define MNE_FLAGS_IPV6 0x01 /* IPv6 address */
475
476 LIST_HEAD(mac_nat_entry_list, mac_nat_entry);
477
478 /*
479 * mac_nat_record
480 * - used by bridge_mac_nat_output() to convey the translation that needs
481 * to take place in bridge_mac_nat_translate
482 * - holds enough information so that the translation can be done later
483 * when the destination interface is the MAC-NAT interface
484 */
485 struct mac_nat_record {
486 uint16_t mnr_ether_type;
487 union {
488 uint16_t mnru_arp_offset;
489 struct {
490 uint16_t mnruip_dhcp_flags;
491 uint16_t mnruip_udp_csum;
492 uint8_t mnruip_header_len;
493 } mnru_ip;
494 struct {
495 uint16_t mnruip6_icmp6_len;
496 uint16_t mnruip6_lladdr_offset;
497 uint8_t mnruip6_icmp6_type;
498 uint8_t mnruip6_header_len;
499 } mnru_ip6;
500 } mnr_u;
501 };
502
503 #define mnr_arp_offset mnr_u.mnru_arp_offset
504
505 #define mnr_ip_header_len mnr_u.mnru_ip.mnruip_header_len
506 #define mnr_ip_dhcp_flags mnr_u.mnru_ip.mnruip_dhcp_flags
507 #define mnr_ip_udp_csum mnr_u.mnru_ip.mnruip_udp_csum
508
509 #define mnr_ip6_icmp6_len mnr_u.mnru_ip6.mnruip6_icmp6_len
510 #define mnr_ip6_icmp6_type mnr_u.mnru_ip6.mnruip6_icmp6_type
511 #define mnr_ip6_header_len mnr_u.mnru_ip6.mnruip6_header_len
512 #define mnr_ip6_lladdr_offset mnr_u.mnru_ip6.mnruip6_lladdr_offset
513
514 /*
515 * Bridge route node.
516 */
517 struct bridge_rtnode {
518 LIST_ENTRY(bridge_rtnode) brt_hash; /* hash table linkage */
519 LIST_ENTRY(bridge_rtnode) brt_list; /* list linkage */
520 struct bridge_iflist *brt_dst; /* destination if */
521 unsigned long brt_expire; /* expiration time */
522 uint8_t brt_flags; /* address flags */
523 uint8_t brt_addr[ETHER_ADDR_LEN];
524 uint16_t brt_vlan; /* vlan id */
525 };
526
527 #define brt_ifp brt_dst->bif_ifp
528
529 /*
530 * Bridge delayed function call context
531 */
532 typedef void (*bridge_delayed_func_t)(struct bridge_softc *);
533
534 struct bridge_delayed_call {
535 struct bridge_softc *bdc_sc;
536 bridge_delayed_func_t bdc_func; /* Function to call */
537 struct timespec bdc_ts; /* Time to call */
538 u_int32_t bdc_flags;
539 thread_call_t bdc_thread_call;
540 };
541
542 #define BDCF_OUTSTANDING 0x01 /* Delayed call has been scheduled */
543 #define BDCF_CANCELLING 0x02 /* May be waiting for call completion */
544
545 /*
546 * Software state for each bridge.
547 */
548 LIST_HEAD(_bridge_rtnode_list, bridge_rtnode);
549
550 struct bridge_softc {
551 struct ifnet *sc_ifp; /* make this an interface */
552 uint32_t sc_flags;
553 LIST_ENTRY(bridge_softc) sc_list;
554 decl_lck_mtx_data(, sc_mtx);
555 struct _bridge_rtnode_list * __counted_by(sc_rthash_size) sc_rthash; /* our forwarding table */
556 struct _bridge_rtnode_list sc_rtlist; /* list version of above */
557 uint32_t sc_rthash_key; /* key for hash */
558 uint32_t sc_rthash_size; /* size of the hash table */
559 struct bridge_delayed_call sc_aging_timer;
560 struct bridge_delayed_call sc_resize_call;
561 TAILQ_HEAD(, bridge_iflist) sc_spanlist; /* span ports list */
562 struct bstp_state sc_stp; /* STP state */
563 void *sc_cv;
564 uint32_t sc_brtmax; /* max # of addresses */
565 uint32_t sc_brtcnt; /* cur. # of addresses */
566 uint32_t sc_brttimeout; /* rt timeout in seconds */
567 uint32_t sc_iflist_ref; /* refcount for sc_iflist */
568 uint32_t sc_iflist_xcnt; /* refcount for sc_iflist */
569 TAILQ_HEAD(, bridge_iflist) sc_iflist; /* member interface list */
570 uint32_t sc_brtexceeded; /* # of cache drops */
571 uint32_t sc_filter_flags; /* ipf and flags */
572 struct ifnet *sc_ifaddr; /* member mac copied from */
573 u_char sc_defaddr[6]; /* Default MAC address */
574 char sc_if_xname[IFNAMSIZ];
575
576 struct bridge_iflist *sc_mac_nat_bif; /* single MAC NAT interface */
577 struct mac_nat_entry_list sc_mne_list; /* MAC NAT IPv4 */
578 struct mac_nat_entry_list sc_mne_list_v6;/* MAC NAT IPv6 */
579 uint32_t sc_mne_max; /* max # of entries */
580 uint32_t sc_mne_count; /* cur. # of entries */
581 uint32_t sc_mne_allocation_failures;
582 #if BRIDGE_LOCK_DEBUG
583 /*
584 * Locking and unlocking calling history
585 */
586 void *lock_lr[BR_LCKDBG_MAX];
587 int next_lock_lr;
588 void *unlock_lr[BR_LCKDBG_MAX];
589 int next_unlock_lr;
590 #endif /* BRIDGE_LOCK_DEBUG */
591 };
592
593 #define SCF_DETACHING 0x01
594 #define SCF_RESIZING 0x02
595 #define SCF_MEDIA_ACTIVE 0x04
596 #define SCF_ADDRESS_ASSIGNED 0x08
597
598 typedef enum {
599 CHECKSUM_OPERATION_NONE = 0,
600 CHECKSUM_OPERATION_CLEAR_OFFLOAD = 1,
601 CHECKSUM_OPERATION_FINALIZE = 2,
602 CHECKSUM_OPERATION_COMPUTE = 3,
603 } ChecksumOperation;
604
605 typedef struct {
606 u_int ip_hlen; /* IP header length */
607 u_int ip_pay_len; /* length of payload (exclusive of ip_hlen) */
608 u_int ip_m0_len; /* bytes available at ip_hdr (without jumping mbufs) */
609 u_int ip_opt_len; /* IPv6 options headers length */
610 uint8_t ip_proto; /* IPPROTO_TCP, IPPROTO_UDP, etc. */
611 bool ip_is_ipv4;
612 bool ip_is_fragmented;
613 uint8_t *__sized_by(ip_m0_len) ip_hdr; /* pointer to IP header */
614 uint8_t *__indexable ip_proto_hdr; /* ptr to protocol header (TCP) */
615 } ip_packet_info, *ip_packet_info_t;
616
617 struct bridge_hostfilter_stats bridge_hostfilter_stats;
618
619 typedef uint8_t ether_type_flag_t;
620
621 typedef enum {
622 pkt_direction_RX,
623 pkt_direction_TX
624 } pkt_direction_t;
625
626 static LCK_GRP_DECLARE(bridge_lock_grp, "if_bridge");
627 #if BRIDGE_LOCK_DEBUG
628 static LCK_ATTR_DECLARE(bridge_lock_attr, 0, 0);
629 #else
630 static LCK_ATTR_DECLARE(bridge_lock_attr, LCK_ATTR_DEBUG, 0);
631 #endif
632 static LCK_MTX_DECLARE_ATTR(bridge_list_mtx, &bridge_lock_grp, &bridge_lock_attr);
633
634 static int bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
635
636 static KALLOC_TYPE_DEFINE(bridge_rtnode_pool, struct bridge_rtnode, NET_KT_DEFAULT);
637 static KALLOC_TYPE_DEFINE(bridge_mne_pool, struct mac_nat_entry, NET_KT_DEFAULT);
638
639 static int bridge_clone_create(struct if_clone *, uint32_t, void *);
640 static int bridge_clone_destroy(struct ifnet *);
641
642 static errno_t bridge_ioctl(struct ifnet *, u_long cmd, void *__sized_by(IOCPARM_LEN(cmd)));
643 #if HAS_IF_CAP
644 static void bridge_mutecaps(struct bridge_softc *);
645 static void bridge_set_ifcap(struct bridge_softc *, struct bridge_iflist *,
646 int);
647 #endif
648 static errno_t bridge_set_tso(struct bridge_softc *);
649 static void bridge_proto_attach_changed(struct ifnet *);
650 static int bridge_init(struct ifnet *);
651 static void bridge_ifstop(struct ifnet *, int);
652 static int bridge_output(struct ifnet *, struct mbuf *);
653 static void bridge_finalize_cksum(struct ifnet *, struct mbuf *);
654 static void bridge_start(struct ifnet *);
655 static mblist bridge_input_list(struct bridge_softc *, ifnet_t,
656 struct ether_header *, mblist, bool);
657 static errno_t bridge_iff_input(void *, ifnet_t, protocol_family_t,
658 mbuf_t *, char **);
659 static errno_t bridge_iff_output(void *, ifnet_t, protocol_family_t,
660 mbuf_t *);
661 static errno_t bridge_member_output(struct bridge_softc *sc, ifnet_t ifp,
662 mbuf_t *m);
663 static int bridge_enqueue(ifnet_t, ifnet_t, ifnet_t,
664 ether_type_flag_t, mbuf_t, ChecksumOperation, pkt_direction_t);
665 static mbuf_t bridge_checksum_offload_list(ifnet_t, struct bridge_iflist *,
666 mbuf_t, bool);
667 static mbuf_t bridge_filter_checksum(ifnet_t, struct bridge_iflist * bif,
668 mbuf_t m, bool, bool, bool);
669 static void bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int);
670
671 static void bridge_aging_timer(struct bridge_softc *sc);
672
673 static void bridge_broadcast(struct bridge_softc *, struct bridge_iflist *,
674 ether_type_flag_t, mbuf_t);
675 static void bridge_broadcast_list(struct bridge_softc *,
676 struct bridge_iflist *, ether_type_flag_t, mbuf_t, pkt_direction_t);
677
678 static void bridge_span(struct bridge_softc *, ether_type_flag_t, struct mbuf *);
679
680 static int bridge_rtupdate(struct bridge_softc *, const uint8_t[ETHER_ADDR_LEN],
681 uint16_t, struct bridge_iflist *, int, uint8_t);
682 static struct bridge_iflist * bridge_rtlookup_bif(struct bridge_softc *,
683 const uint8_t[ETHER_ADDR_LEN], uint16_t);
684 static void bridge_rttrim(struct bridge_softc *);
685 static void bridge_rtage(struct bridge_softc *);
686 static void bridge_rtflush(struct bridge_softc *, int);
687 static int bridge_rtdaddr(struct bridge_softc *, const uint8_t[ETHER_ADDR_LEN],
688 uint16_t);
689
690 static int bridge_rtable_init(struct bridge_softc *);
691 static void bridge_rtable_fini(struct bridge_softc *);
692
693 static void bridge_rthash_resize(struct bridge_softc *);
694
695 static int bridge_rtnode_addr_cmp(const uint8_t[ETHER_ADDR_LEN], const uint8_t[ETHER_ADDR_LEN]);
696 static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
697 const uint8_t[ETHER_ADDR_LEN], uint16_t);
698 static int bridge_rtnode_hash(struct bridge_softc *,
699 struct bridge_rtnode *);
700 static int bridge_rtnode_insert(struct bridge_softc *,
701 struct bridge_rtnode *);
702 static void bridge_rtnode_destroy(struct bridge_softc *,
703 struct bridge_rtnode *);
704 #if BRIDGESTP
705 static void bridge_rtable_expire(struct ifnet *, int);
706 static void bridge_state_change(struct ifnet *, int);
707 #endif /* BRIDGESTP */
708
709 static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
710 char * __sized_by(IFNAMSIZ) name);
711 static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
712 struct ifnet *ifp);
713 static void bridge_delete_member(struct bridge_softc *,
714 struct bridge_iflist *);
715 static void bridge_delete_span(struct bridge_softc *,
716 struct bridge_iflist *);
717
718 static int bridge_ioctl_add(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
719 static int bridge_ioctl_del(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
720 static int bridge_ioctl_gifflags(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
721 static int bridge_ioctl_sifflags(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
722 static int bridge_ioctl_scache(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
723 static int bridge_ioctl_gcache(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
724 static int bridge_ioctl_gifs32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
725 static int bridge_ioctl_gifs64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
726 static int bridge_ioctl_rts32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
727 static int bridge_ioctl_rts64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
728 static int bridge_ioctl_saddr32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
729 static int bridge_ioctl_saddr64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
730 static int bridge_ioctl_sto(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
731 static int bridge_ioctl_gto(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
732 static int bridge_ioctl_daddr32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
733 static int bridge_ioctl_daddr64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
734 static int bridge_ioctl_flush(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
735 static int bridge_ioctl_gpri(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
736 static int bridge_ioctl_spri(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
737 static int bridge_ioctl_ght(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
738 static int bridge_ioctl_sht(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
739 static int bridge_ioctl_gfd(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
740 static int bridge_ioctl_sfd(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
741 static int bridge_ioctl_gma(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
742 static int bridge_ioctl_sma(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
743 static int bridge_ioctl_sifprio(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
744 static int bridge_ioctl_sifcost(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
745 static int bridge_ioctl_sifmaxaddr(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
746 static int bridge_ioctl_addspan(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
747 static int bridge_ioctl_delspan(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
748 static int bridge_ioctl_gbparam32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
749 static int bridge_ioctl_gbparam64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
750 static int bridge_ioctl_grte(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
751 static int bridge_ioctl_gifsstp32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
752 static int bridge_ioctl_gifsstp64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
753 static int bridge_ioctl_sproto(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
754 static int bridge_ioctl_stxhc(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
755 static int bridge_ioctl_purge(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len);
756 static int bridge_ioctl_gfilt(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
757 static int bridge_ioctl_sfilt(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
758 static int bridge_ioctl_ghostfilter(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
759 static int bridge_ioctl_shostfilter(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
760 static int bridge_ioctl_gmnelist32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
761 static int bridge_ioctl_gmnelist64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
762 static int bridge_ioctl_gifstats32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
763 static int bridge_ioctl_gifstats64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
764
765 static int bridge_pf(struct mbuf **, struct ifnet *,
766 uint32_t sc_filter_flags, bool input);
767 static int bridge_ip_checkbasic(struct mbuf **);
768 static int bridge_ip6_checkbasic(struct mbuf **);
769
770 static void bridge_detach(ifnet_t);
771 static void bridge_link_event(struct ifnet *, u_int32_t);
772 static void bridge_iflinkevent(struct ifnet *);
773 static u_int32_t bridge_updatelinkstatus(struct bridge_softc *);
774 static int interface_media_active(struct ifnet *);
775 static void bridge_schedule_delayed_call(struct bridge_delayed_call *);
776 static void bridge_cancel_delayed_call(struct bridge_delayed_call *);
777 static void bridge_cleanup_delayed_call(struct bridge_delayed_call *);
778
779 static errno_t bridge_mac_nat_enable(struct bridge_softc *,
780 struct bridge_iflist *);
781 static void bridge_mac_nat_disable(struct bridge_softc *sc);
782 static void bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long);
783 static void bridge_mac_nat_populate_entries(struct bridge_softc *sc);
784 static void bridge_mac_nat_flush_entries(struct bridge_softc *sc,
785 struct bridge_iflist *);
786 static mbuf_t bridge_mac_nat_input(struct bridge_softc *, ifnet_t, mbuf_t,
787 ifnet_t * dst_if);
788 static boolean_t bridge_mac_nat_output(struct bridge_softc *,
789 struct bridge_iflist *, mbuf_t *, struct mac_nat_record *);
790 static void bridge_mac_nat_translate(mbuf_t *, struct mac_nat_record *,
791 const char[ETHER_ADDR_LEN]);
792
793 static mblist bridge_mac_nat_input_list(struct bridge_softc *sc,
794 ifnet_t external_ifp, mbuf_t m, mbuf_t * forward_head);
795 static mbuf_t bridge_mac_nat_translate_list(struct bridge_softc * sc,
796 struct bridge_iflist *sbif, ifnet_t dst_if, mbuf_t m);
797 static mbuf_t bridge_mac_nat_copy_and_translate_list(struct bridge_softc * sc,
798 struct bridge_iflist *sbif, ifnet_t dst_if, mbuf_t m);
799
800 static mbuf_t bridge_pf_list(mbuf_t m, ifnet_t ifp,
801 uint32_t sc_filter_flags, bool input);
802
803 static inline ifnet_t
bridge_rtlookup(struct bridge_softc * sc,const uint8_t addr[ETHER_ADDR_LEN],uint16_t vlan)804 bridge_rtlookup(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN],
805 uint16_t vlan)
806 {
807 struct bridge_iflist * bif;
808 ifnet_t ifp = NULL;
809
810 bif = bridge_rtlookup_bif(sc, addr, vlan);
811 if (bif != NULL) {
812 ifp = bif->bif_ifp;
813 }
814 return ifp;
815 }
816
817 static bool in_addr_is_ours(const struct in_addr);
818 static bool in6_addr_is_ours(const struct in6_addr *, uint32_t);
819
820 #define m_copypacket(m, how) m_copym(m, 0, M_COPYALL, how)
821
822 static mblist
823 gso_tcp(ifnet_t ifp, mbuf_t m, u_int mac_hlen, bool is_ipv4, bool is_tx);
824
825 static mblist
826 gso_tcp_with_info(ifnet_t ifp, mbuf_t m, ip_packet_info_t info_p,
827 u_int mac_hlen, bool is_ipv4, bool is_tx);
828
829 static inline mblist
gso_tcp_transmit(ifnet_t ifp,mbuf_t m,u_int mac_hlen,bool is_ipv4)830 gso_tcp_transmit(ifnet_t ifp, mbuf_t m, u_int mac_hlen, bool is_ipv4)
831 {
832 return gso_tcp(ifp, m, mac_hlen, is_ipv4, true);
833 }
834
835 /* The default bridge vlan is 1 (IEEE 802.1Q-2003 Table 9-2) */
836 #define VLANTAGOF(_m) 0
837
838 #define BSTP_ETHERADDR_RANGE_FIRST 0x00
839 #define BSTP_ETHERADDR_RANGE_LAST 0x0f
840
841 u_int8_t bstp_etheraddr[ETHER_ADDR_LEN] =
842 { 0x01, 0x80, 0xc2, 0x00, 0x00, BSTP_ETHERADDR_RANGE_FIRST };
843
844
845 static u_int8_t ethernulladdr[ETHER_ADDR_LEN] =
846 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
847
848 #if BRIDGESTP
849 static struct bstp_cb_ops bridge_ops = {
850 .bcb_state = bridge_state_change,
851 .bcb_rtage = bridge_rtable_expire
852 };
853 #endif /* BRIDGESTP */
854
855 SYSCTL_DECL(_net_link);
856 SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
857 "Bridge");
858
859 static int bridge_inherit_mac = 0; /* share MAC with first bridge member */
860 SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac,
861 CTLFLAG_RW | CTLFLAG_LOCKED,
862 &bridge_inherit_mac, 0,
863 "Inherit MAC address from the first bridge member");
864
865 SYSCTL_INT(_net_link_bridge, OID_AUTO, rtable_prune_period,
866 CTLFLAG_RW | CTLFLAG_LOCKED,
867 &bridge_rtable_prune_period, 0,
868 "Interval between pruning of routing table");
869
870 static unsigned int bridge_rtable_hash_size_max = BRIDGE_RTHASH_SIZE_MAX;
871 SYSCTL_UINT(_net_link_bridge, OID_AUTO, rtable_hash_size_max,
872 CTLFLAG_RW | CTLFLAG_LOCKED,
873 &bridge_rtable_hash_size_max, 0,
874 "Maximum size of the routing hash table");
875
876 #if BRIDGE_DELAYED_CALLBACK_DEBUG
877 static int bridge_delayed_callback_delay = 0;
878 SYSCTL_INT(_net_link_bridge, OID_AUTO, delayed_callback_delay,
879 CTLFLAG_RW | CTLFLAG_LOCKED,
880 &bridge_delayed_callback_delay, 0,
881 "Delay before calling delayed function");
882 #endif
883
884 SYSCTL_STRUCT(_net_link_bridge, OID_AUTO,
885 hostfilterstats, CTLFLAG_RD | CTLFLAG_LOCKED,
886 &bridge_hostfilter_stats, bridge_hostfilter_stats, "");
887
888 #if BRIDGESTP
889 static int log_stp = 0; /* log STP state changes */
890 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp, CTLFLAG_RW,
891 &log_stp, 0, "Log STP state changes");
892 #endif /* BRIDGESTP */
893
894 struct bridge_control {
895 int (*bc_func)(struct bridge_softc *, void *__sized_by(arg_len) args, size_t arg_len);
896 unsigned int bc_argsize;
897 unsigned int bc_flags;
898 };
899
900 #define BC_F_COPYIN 0x01 /* copy arguments in */
901 #define BC_F_COPYOUT 0x02 /* copy arguments out */
902 #define BC_F_SUSER 0x04 /* do super-user check */
903
904 static const struct bridge_control bridge_control_table32[] = {
905 { .bc_func = bridge_ioctl_add, .bc_argsize = sizeof(struct ifbreq), /* 0 */
906 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
907 { .bc_func = bridge_ioctl_del, .bc_argsize = sizeof(struct ifbreq),
908 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
909
910 { .bc_func = bridge_ioctl_gifflags, .bc_argsize = sizeof(struct ifbreq),
911 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
912 { .bc_func = bridge_ioctl_sifflags, .bc_argsize = sizeof(struct ifbreq),
913 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
914
915 { .bc_func = bridge_ioctl_scache, .bc_argsize = sizeof(struct ifbrparam),
916 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
917 { .bc_func = bridge_ioctl_gcache, .bc_argsize = sizeof(struct ifbrparam),
918 .bc_flags = BC_F_COPYOUT },
919
920 { .bc_func = bridge_ioctl_gifs32, .bc_argsize = sizeof(struct ifbifconf32),
921 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
922 { .bc_func = bridge_ioctl_rts32, .bc_argsize = sizeof(struct ifbaconf32),
923 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
924
925 { .bc_func = bridge_ioctl_saddr32, .bc_argsize = sizeof(struct ifbareq32),
926 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
927
928 { .bc_func = bridge_ioctl_sto, .bc_argsize = sizeof(struct ifbrparam),
929 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
930 { .bc_func = bridge_ioctl_gto, .bc_argsize = sizeof(struct ifbrparam), /* 10 */
931 .bc_flags = BC_F_COPYOUT },
932
933 { .bc_func = bridge_ioctl_daddr32, .bc_argsize = sizeof(struct ifbareq32),
934 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
935
936 { .bc_func = bridge_ioctl_flush, .bc_argsize = sizeof(struct ifbreq),
937 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
938
939 { .bc_func = bridge_ioctl_gpri, .bc_argsize = sizeof(struct ifbrparam),
940 .bc_flags = BC_F_COPYOUT },
941 { .bc_func = bridge_ioctl_spri, .bc_argsize = sizeof(struct ifbrparam),
942 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
943
944 { .bc_func = bridge_ioctl_ght, .bc_argsize = sizeof(struct ifbrparam),
945 .bc_flags = BC_F_COPYOUT },
946 { .bc_func = bridge_ioctl_sht, .bc_argsize = sizeof(struct ifbrparam),
947 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
948
949 { .bc_func = bridge_ioctl_gfd, .bc_argsize = sizeof(struct ifbrparam),
950 .bc_flags = BC_F_COPYOUT },
951 { .bc_func = bridge_ioctl_sfd, .bc_argsize = sizeof(struct ifbrparam),
952 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
953
954 { .bc_func = bridge_ioctl_gma, .bc_argsize = sizeof(struct ifbrparam),
955 .bc_flags = BC_F_COPYOUT },
956 { .bc_func = bridge_ioctl_sma, .bc_argsize = sizeof(struct ifbrparam), /* 20 */
957 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
958
959 { .bc_func = bridge_ioctl_sifprio, .bc_argsize = sizeof(struct ifbreq),
960 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
961
962 { .bc_func = bridge_ioctl_sifcost, .bc_argsize = sizeof(struct ifbreq),
963 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
964
965 { .bc_func = bridge_ioctl_gfilt, .bc_argsize = sizeof(struct ifbrparam),
966 .bc_flags = BC_F_COPYOUT },
967 { .bc_func = bridge_ioctl_sfilt, .bc_argsize = sizeof(struct ifbrparam),
968 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
969
970 { .bc_func = bridge_ioctl_purge, .bc_argsize = sizeof(struct ifbreq),
971 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
972
973 { .bc_func = bridge_ioctl_addspan, .bc_argsize = sizeof(struct ifbreq),
974 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
975 { .bc_func = bridge_ioctl_delspan, .bc_argsize = sizeof(struct ifbreq),
976 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
977
978 { .bc_func = bridge_ioctl_gbparam32, .bc_argsize = sizeof(struct ifbropreq32),
979 .bc_flags = BC_F_COPYOUT },
980
981 { .bc_func = bridge_ioctl_grte, .bc_argsize = sizeof(struct ifbrparam),
982 .bc_flags = BC_F_COPYOUT },
983
984 { .bc_func = bridge_ioctl_gifsstp32, .bc_argsize = sizeof(struct ifbpstpconf32), /* 30 */
985 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
986
987 { .bc_func = bridge_ioctl_sproto, .bc_argsize = sizeof(struct ifbrparam),
988 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
989
990 { .bc_func = bridge_ioctl_stxhc, .bc_argsize = sizeof(struct ifbrparam),
991 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
992
993 { .bc_func = bridge_ioctl_sifmaxaddr, .bc_argsize = sizeof(struct ifbreq),
994 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
995
996 { .bc_func = bridge_ioctl_ghostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
997 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
998 { .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
999 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1000
1001 { .bc_func = bridge_ioctl_gmnelist32,
1002 .bc_argsize = sizeof(struct ifbrmnelist32),
1003 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1004 { .bc_func = bridge_ioctl_gifstats32,
1005 .bc_argsize = sizeof(struct ifbrmreq32),
1006 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1007 };
1008
1009 static const struct bridge_control bridge_control_table64[] = {
1010 { .bc_func = bridge_ioctl_add, .bc_argsize = sizeof(struct ifbreq), /* 0 */
1011 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1012 { .bc_func = bridge_ioctl_del, .bc_argsize = sizeof(struct ifbreq),
1013 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1014
1015 { .bc_func = bridge_ioctl_gifflags, .bc_argsize = sizeof(struct ifbreq),
1016 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1017 { .bc_func = bridge_ioctl_sifflags, .bc_argsize = sizeof(struct ifbreq),
1018 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1019
1020 { .bc_func = bridge_ioctl_scache, .bc_argsize = sizeof(struct ifbrparam),
1021 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1022 { .bc_func = bridge_ioctl_gcache, .bc_argsize = sizeof(struct ifbrparam),
1023 .bc_flags = BC_F_COPYOUT },
1024
1025 { .bc_func = bridge_ioctl_gifs64, .bc_argsize = sizeof(struct ifbifconf64),
1026 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1027 { .bc_func = bridge_ioctl_rts64, .bc_argsize = sizeof(struct ifbaconf64),
1028 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1029
1030 { .bc_func = bridge_ioctl_saddr64, .bc_argsize = sizeof(struct ifbareq64),
1031 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1032
1033 { .bc_func = bridge_ioctl_sto, .bc_argsize = sizeof(struct ifbrparam),
1034 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1035 { .bc_func = bridge_ioctl_gto, .bc_argsize = sizeof(struct ifbrparam), /* 10 */
1036 .bc_flags = BC_F_COPYOUT },
1037
1038 { .bc_func = bridge_ioctl_daddr64, .bc_argsize = sizeof(struct ifbareq64),
1039 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1040
1041 { .bc_func = bridge_ioctl_flush, .bc_argsize = sizeof(struct ifbreq),
1042 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1043
1044 { .bc_func = bridge_ioctl_gpri, .bc_argsize = sizeof(struct ifbrparam),
1045 .bc_flags = BC_F_COPYOUT },
1046 { .bc_func = bridge_ioctl_spri, .bc_argsize = sizeof(struct ifbrparam),
1047 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1048
1049 { .bc_func = bridge_ioctl_ght, .bc_argsize = sizeof(struct ifbrparam),
1050 .bc_flags = BC_F_COPYOUT },
1051 { .bc_func = bridge_ioctl_sht, .bc_argsize = sizeof(struct ifbrparam),
1052 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1053
1054 { .bc_func = bridge_ioctl_gfd, .bc_argsize = sizeof(struct ifbrparam),
1055 .bc_flags = BC_F_COPYOUT },
1056 { .bc_func = bridge_ioctl_sfd, .bc_argsize = sizeof(struct ifbrparam),
1057 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1058
1059 { .bc_func = bridge_ioctl_gma, .bc_argsize = sizeof(struct ifbrparam),
1060 .bc_flags = BC_F_COPYOUT },
1061 { .bc_func = bridge_ioctl_sma, .bc_argsize = sizeof(struct ifbrparam), /* 20 */
1062 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1063
1064 { .bc_func = bridge_ioctl_sifprio, .bc_argsize = sizeof(struct ifbreq),
1065 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1066
1067 { .bc_func = bridge_ioctl_sifcost, .bc_argsize = sizeof(struct ifbreq),
1068 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1069
1070 { .bc_func = bridge_ioctl_gfilt, .bc_argsize = sizeof(struct ifbrparam),
1071 .bc_flags = BC_F_COPYOUT },
1072 { .bc_func = bridge_ioctl_sfilt, .bc_argsize = sizeof(struct ifbrparam),
1073 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1074
1075 { .bc_func = bridge_ioctl_purge, .bc_argsize = sizeof(struct ifbreq),
1076 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1077
1078 { .bc_func = bridge_ioctl_addspan, .bc_argsize = sizeof(struct ifbreq),
1079 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1080 { .bc_func = bridge_ioctl_delspan, .bc_argsize = sizeof(struct ifbreq),
1081 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1082
1083 { .bc_func = bridge_ioctl_gbparam64, .bc_argsize = sizeof(struct ifbropreq64),
1084 .bc_flags = BC_F_COPYOUT },
1085
1086 { .bc_func = bridge_ioctl_grte, .bc_argsize = sizeof(struct ifbrparam),
1087 .bc_flags = BC_F_COPYOUT },
1088
1089 { .bc_func = bridge_ioctl_gifsstp64, .bc_argsize = sizeof(struct ifbpstpconf64), /* 30 */
1090 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1091
1092 { .bc_func = bridge_ioctl_sproto, .bc_argsize = sizeof(struct ifbrparam),
1093 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1094
1095 { .bc_func = bridge_ioctl_stxhc, .bc_argsize = sizeof(struct ifbrparam),
1096 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1097
1098 { .bc_func = bridge_ioctl_sifmaxaddr, .bc_argsize = sizeof(struct ifbreq),
1099 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1100
1101 { .bc_func = bridge_ioctl_ghostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1102 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1103 { .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1104 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1105
1106 { .bc_func = bridge_ioctl_gmnelist64,
1107 .bc_argsize = sizeof(struct ifbrmnelist64),
1108 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1109 { .bc_func = bridge_ioctl_gifstats64,
1110 .bc_argsize = sizeof(struct ifbrmreq64),
1111 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1112 };
1113
1114 static const unsigned int bridge_control_table_size =
1115 sizeof(bridge_control_table32) / sizeof(bridge_control_table32[0]);
1116
1117 static LIST_HEAD(, bridge_softc) bridge_list =
1118 LIST_HEAD_INITIALIZER(bridge_list);
1119
1120 #define BRIDGENAME "bridge"
1121 #define BRIDGES_MAX IF_MAXUNIT
1122 #define BRIDGE_ZONE_MAX_ELEM MIN(IFNETS_MAX, BRIDGES_MAX)
1123
1124 static struct if_clone bridge_cloner =
1125 IF_CLONE_INITIALIZER(BRIDGENAME, bridge_clone_create, bridge_clone_destroy,
1126 0, BRIDGES_MAX);
1127
1128 static int if_bridge_txstart = 0;
1129 SYSCTL_INT(_net_link_bridge, OID_AUTO, txstart, CTLFLAG_RW | CTLFLAG_LOCKED,
1130 &if_bridge_txstart, 0, "Bridge interface uses TXSTART model");
1131
1132 SYSCTL_INT(_net_link_bridge, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
1133 &if_bridge_debug, 0, "Bridge debug flags");
1134
1135 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_level,
1136 CTLFLAG_RW | CTLFLAG_LOCKED,
1137 &if_bridge_log_level, 0, "Bridge log level");
1138
1139 static int if_bridge_output_skip_filters = 1;
1140 SYSCTL_INT(_net_link_bridge, OID_AUTO, output_skip_filters,
1141 CTLFLAG_RW | CTLFLAG_LOCKED,
1142 &if_bridge_output_skip_filters, 0, "Bridge skip output filters");
1143
1144 int bridge_enable_early_input = 1; /* DLIL early input */
1145 SYSCTL_INT(_net_link_bridge, OID_AUTO, enable_early_input,
1146 CTLFLAG_RW | CTLFLAG_LOCKED,
1147 &bridge_enable_early_input, 0,
1148 "Bridge enable early input");
1149
1150 int bridge_allow_lro_num_seg = 1; /* allow LRO_NUM_SEG to keep LRO enabled */
1151 SYSCTL_INT(_net_link_bridge, OID_AUTO, allow_lro_num_seg,
1152 CTLFLAG_RW | CTLFLAG_LOCKED,
1153 &bridge_allow_lro_num_seg, 0,
1154 "Bridge allow LRO_NUM_SEG to keep LRO enabled");
1155
1156 #define BRIDGE_TSO_REDUCE_MSS_FORWARDING_MAX 256
1157 #define BRIDGE_TSO_REDUCE_MSS_FORWARDING_DEFAULT 110
1158 #define BRIDGE_TSO_REDUCE_MSS_TX_MAX 256
1159 #define BRIDGE_TSO_REDUCE_MSS_TX_DEFAULT 0
1160
1161 static u_int if_bridge_tso_reduce_mss_forwarding
1162 = BRIDGE_TSO_REDUCE_MSS_FORWARDING_DEFAULT;
1163 static u_int if_bridge_tso_reduce_mss_tx
1164 = BRIDGE_TSO_REDUCE_MSS_TX_DEFAULT;
1165
1166 static int
bridge_tso_reduce_mss(struct sysctl_req * req,u_int * val,u_int val_max)1167 bridge_tso_reduce_mss(struct sysctl_req *req, u_int * val, u_int val_max)
1168 {
1169 int changed;
1170 int error;
1171 u_int new_value;
1172
1173 error = sysctl_io_number(req, *val, sizeof(*val), &new_value,
1174 &changed);
1175 if (error == 0 && changed != 0) {
1176 if (new_value > val_max) {
1177 return EINVAL;
1178 }
1179 *val = new_value;
1180 }
1181 return error;
1182 }
1183
1184 static int
1185 bridge_tso_reduce_mss_forwarding_sysctl SYSCTL_HANDLER_ARGS
1186 {
1187 return bridge_tso_reduce_mss(req, &if_bridge_tso_reduce_mss_forwarding,
1188 BRIDGE_TSO_REDUCE_MSS_FORWARDING_MAX);
1189 }
1190
1191 SYSCTL_PROC(_net_link_bridge, OID_AUTO, tso_reduce_mss_forwarding,
1192 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1193 0, 0, bridge_tso_reduce_mss_forwarding_sysctl, "IU",
1194 "Bridge tso reduce mss when forwarding");
1195
1196 static int
1197 bridge_tso_reduce_mss_tx_sysctl SYSCTL_HANDLER_ARGS
1198 {
1199 return bridge_tso_reduce_mss(req, &if_bridge_tso_reduce_mss_tx,
1200 BRIDGE_TSO_REDUCE_MSS_TX_MAX);
1201 }
1202
1203 SYSCTL_PROC(_net_link_bridge, OID_AUTO, tso_reduce_mss_tx,
1204 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1205 0, 0, bridge_tso_reduce_mss_tx_sysctl, "IU",
1206 "Bridge tso reduce mss on transmit");
1207
1208 #if DEBUG || DEVELOPMENT
1209 /*
1210 * net.link.bridge.reduce_tso_mtu
1211 * - when non-zero, the bridge overrides the interface TSO MTU to a lower
1212 * value (i.e. 16K) to enable testing the "use GSO instead" path
1213 */
1214 static int if_bridge_reduce_tso_mtu = 0;
1215 SYSCTL_INT(_net_link_bridge, OID_AUTO, reduce_tso_mtu,
1216 CTLFLAG_RW | CTLFLAG_LOCKED,
1217 &if_bridge_reduce_tso_mtu, 0, "Bridge interface reduce TSO MTU");
1218
1219 #endif /* DEBUG || DEVELOPMENT */
1220
1221 static void brlog_ether_header(struct ether_header *);
1222 static void brlog_mbuf_data(mbuf_t, size_t, size_t);
1223 static void brlog_mbuf_pkthdr(mbuf_t, const char *, const char *);
1224 static void brlog_mbuf(mbuf_t, const char *, const char *);
1225 static void brlog_link(struct bridge_softc * sc);
1226
1227 #if BRIDGE_LOCK_DEBUG
1228 static void bridge_lock(struct bridge_softc *);
1229 static void bridge_unlock(struct bridge_softc *);
1230 static int bridge_lock2ref(struct bridge_softc *);
1231 static void bridge_unref(struct bridge_softc *);
1232 static void bridge_xlock(struct bridge_softc *);
1233 static void bridge_xdrop(struct bridge_softc *);
1234
1235 #define DECL_RETURN_ADDR(v) void * __single v = __unsafe_forge_single(void *, __builtin_return_address(0))
1236
1237 static void
bridge_lock(struct bridge_softc * sc)1238 bridge_lock(struct bridge_softc *sc)
1239 {
1240 DECL_RETURN_ADDR(lr_saved);
1241
1242 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1243
1244 _BRIDGE_LOCK(sc);
1245
1246 sc->lock_lr[sc->next_lock_lr] = lr_saved;
1247 sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1248 }
1249
1250 static void
bridge_unlock(struct bridge_softc * sc)1251 bridge_unlock(struct bridge_softc *sc)
1252 {
1253 DECL_RETURN_ADDR(lr_saved);
1254
1255 BRIDGE_LOCK_ASSERT_HELD(sc);
1256
1257 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1258 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1259
1260 _BRIDGE_UNLOCK(sc);
1261 }
1262
1263 static int
bridge_lock2ref(struct bridge_softc * sc)1264 bridge_lock2ref(struct bridge_softc *sc)
1265 {
1266 int error = 0;
1267 DECL_RETURN_ADDR(lr_saved);
1268
1269 BRIDGE_LOCK_ASSERT_HELD(sc);
1270
1271 if (sc->sc_iflist_xcnt > 0) {
1272 error = EBUSY;
1273 } else {
1274 sc->sc_iflist_ref++;
1275 }
1276
1277 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1278 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1279
1280 _BRIDGE_UNLOCK(sc);
1281
1282 return error;
1283 }
1284
1285 static void
bridge_unref(struct bridge_softc * sc)1286 bridge_unref(struct bridge_softc *sc)
1287 {
1288 DECL_RETURN_ADDR(lr_saved);
1289
1290 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1291
1292 _BRIDGE_LOCK(sc);
1293 sc->lock_lr[sc->next_lock_lr] = lr_saved;
1294 sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1295
1296 sc->sc_iflist_ref--;
1297
1298 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1299 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1300 if ((sc->sc_iflist_xcnt > 0) && (sc->sc_iflist_ref == 0)) {
1301 _BRIDGE_UNLOCK(sc);
1302 wakeup(&sc->sc_cv);
1303 } else {
1304 _BRIDGE_UNLOCK(sc);
1305 }
1306 }
1307
1308 static void
bridge_xlock(struct bridge_softc * sc)1309 bridge_xlock(struct bridge_softc *sc)
1310 {
1311 DECL_RETURN_ADDR(lr_saved);
1312
1313 BRIDGE_LOCK_ASSERT_HELD(sc);
1314
1315 sc->sc_iflist_xcnt++;
1316 while (sc->sc_iflist_ref > 0) {
1317 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1318 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1319
1320 msleep(&sc->sc_cv, &sc->sc_mtx, PZERO, "BRIDGE_XLOCK", NULL);
1321
1322 sc->lock_lr[sc->next_lock_lr] = lr_saved;
1323 sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1324 }
1325 }
1326
1327 #undef DECL_RETURN_ADDR
1328
1329 static void
bridge_xdrop(struct bridge_softc * sc)1330 bridge_xdrop(struct bridge_softc *sc)
1331 {
1332 BRIDGE_LOCK_ASSERT_HELD(sc);
1333
1334 sc->sc_iflist_xcnt--;
1335 }
1336
1337 #endif /* BRIDGE_LOCK_DEBUG */
1338
1339 static void
brlog_mbuf_pkthdr(mbuf_t m,const char * prefix,const char * suffix)1340 brlog_mbuf_pkthdr(mbuf_t m, const char *prefix, const char *suffix)
1341 {
1342 if (m) {
1343 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1344 "%spktlen: %u rcvif: 0x%llx header: 0x%llx nextpkt: 0x%llx%s",
1345 prefix ? prefix : "", (unsigned int)mbuf_pkthdr_len(m),
1346 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_rcvif(m)),
1347 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_header(m)),
1348 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_nextpkt(m)),
1349 suffix ? suffix : "");
1350 } else {
1351 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0, "%s<NULL>%s", prefix, suffix);
1352 }
1353 }
1354
1355 static void
brlog_mbuf(mbuf_t m,const char * prefix,const char * suffix)1356 brlog_mbuf(mbuf_t m, const char *prefix, const char *suffix)
1357 {
1358 if (m) {
1359 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1360 "%s0x%llx type: %u flags: 0x%x len: %u data: 0x%llx "
1361 "maxlen: %u datastart: 0x%llx next: 0x%llx%s",
1362 prefix ? prefix : "", (uint64_t)VM_KERNEL_ADDRPERM(m),
1363 mbuf_type(m), mbuf_flags(m), (unsigned int)mbuf_len(m),
1364 (uint64_t)VM_KERNEL_ADDRPERM(mtod(m, void *)),
1365 (unsigned int)mbuf_maxlen(m),
1366 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
1367 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_next(m)),
1368 !suffix || (mbuf_flags(m) & MBUF_PKTHDR) ? "" : suffix);
1369 if ((mbuf_flags(m) & MBUF_PKTHDR)) {
1370 brlog_mbuf_pkthdr(m, "", suffix);
1371 }
1372 } else {
1373 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0, "%s<NULL>%s", prefix, suffix);
1374 }
1375 }
1376
1377 static void
brlog_mbuf_data(mbuf_t m,size_t offset,size_t len)1378 brlog_mbuf_data(mbuf_t m, size_t offset, size_t len)
1379 {
1380 mbuf_t n;
1381 size_t i, j;
1382 size_t pktlen, mlen, maxlen;
1383 unsigned char *ptr;
1384
1385 pktlen = mbuf_pkthdr_len(m);
1386
1387 if (offset > pktlen) {
1388 return;
1389 }
1390
1391 maxlen = (pktlen - offset > len) ? len : pktlen - offset;
1392 n = m;
1393 mlen = mbuf_len(n);
1394 ptr = mtod(n, unsigned char *);
1395 for (i = 0, j = 0; i < maxlen; i++, j++) {
1396 if (j >= mlen) {
1397 n = mbuf_next(n);
1398 if (n == 0) {
1399 break;
1400 }
1401 ptr = mtod(n, unsigned char *);
1402 mlen = mbuf_len(n);
1403 j = 0;
1404 }
1405 if (i >= offset) {
1406 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1407 "%02x%s", ptr[j], i % 2 ? " " : "");
1408 }
1409 }
1410 }
1411
1412 static void
brlog_ether_header(struct ether_header * eh)1413 brlog_ether_header(struct ether_header *eh)
1414 {
1415 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1416 "%02x:%02x:%02x:%02x:%02x:%02x > "
1417 "%02x:%02x:%02x:%02x:%02x:%02x 0x%04x ",
1418 eh->ether_shost[0], eh->ether_shost[1], eh->ether_shost[2],
1419 eh->ether_shost[3], eh->ether_shost[4], eh->ether_shost[5],
1420 eh->ether_dhost[0], eh->ether_dhost[1], eh->ether_dhost[2],
1421 eh->ether_dhost[3], eh->ether_dhost[4], eh->ether_dhost[5],
1422 ntohs(eh->ether_type));
1423 }
1424
1425 static char *
ether_ntop(char * __sized_by (len)buf,size_t len,const u_char ap[ETHER_ADDR_LEN])1426 ether_ntop(char * __sized_by(len) buf, size_t len, const u_char ap[ETHER_ADDR_LEN])
1427 {
1428 snprintf(buf, len, "%02x:%02x:%02x:%02x:%02x:%02x",
1429 ap[0], ap[1], ap[2], ap[3], ap[4], ap[5]);
1430
1431 return buf;
1432 }
1433
1434 static void
brlog_link(struct bridge_softc * sc)1435 brlog_link(struct bridge_softc * sc)
1436 {
1437 int i;
1438 uint32_t sdl_buffer[(offsetof(struct sockaddr_dl, sdl_data) +
1439 IFNAMSIZ + ETHER_ADDR_LEN)];
1440 struct sockaddr_dl *sdl = SDL((uint8_t*)&sdl_buffer); /* SDL requires byte pointer */
1441 const u_char * lladdr;
1442 char lladdr_str[48];
1443
1444 memset(sdl_buffer, 0, sizeof(sdl_buffer));
1445 sdl->sdl_family = AF_LINK;
1446 sdl->sdl_nlen = strbuflen(sc->sc_if_xname);
1447 sdl->sdl_alen = ETHER_ADDR_LEN;
1448 sdl->sdl_len = offsetof(struct sockaddr_dl, sdl_data);
1449 memcpy(sdl->sdl_data, sc->sc_if_xname, sdl->sdl_nlen);
1450 memcpy(LLADDR(sdl), sc->sc_defaddr, ETHER_ADDR_LEN);
1451 lladdr_str[0] = '\0';
1452 for (i = 0, lladdr = CONST_LLADDR(sdl);
1453 i < sdl->sdl_alen;
1454 i++, lladdr++) {
1455 char byte_str[4];
1456
1457 snprintf(byte_str, sizeof(byte_str), "%s%x", i ? ":" : "",
1458 *lladdr);
1459 strbufcat(lladdr_str, byte_str);
1460 }
1461 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1462 "%s sdl len %d index %d family %d type 0x%x nlen %d alen %d"
1463 " slen %d addr %s", sc->sc_if_xname,
1464 sdl->sdl_len, sdl->sdl_index,
1465 sdl->sdl_family, sdl->sdl_type, sdl->sdl_nlen,
1466 sdl->sdl_alen, sdl->sdl_slen, lladdr_str);
1467 }
1468
1469 static int
_mbuf_get_tso_mss(mbuf_t m)1470 _mbuf_get_tso_mss(mbuf_t m)
1471 {
1472 int mss = 0;
1473
1474 if ((m->m_pkthdr.csum_flags & _TSO_CSUM) != 0) {
1475 mss = m->m_pkthdr.tso_segsz;
1476 }
1477 return mss;
1478 }
1479
1480 /*
1481 * bridgeattach:
1482 *
1483 * Pseudo-device attach routine.
1484 */
1485 __private_extern__ int
bridgeattach(int n)1486 bridgeattach(int n)
1487 {
1488 #pragma unused(n)
1489 int error;
1490
1491 LIST_INIT(&bridge_list);
1492
1493 #if BRIDGESTP
1494 bstp_sys_init();
1495 #endif /* BRIDGESTP */
1496
1497 error = if_clone_attach(&bridge_cloner);
1498 if (error != 0) {
1499 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_clone_attach failed %d", error);
1500 }
1501 return error;
1502 }
1503
1504 static void
_mbuf_adjust_pkthdr_and_data(mbuf_t m,int len)1505 _mbuf_adjust_pkthdr_and_data(mbuf_t m, int len)
1506 {
1507 mbuf_setdata(m, mtodo(m, len), mbuf_len(m) - len);
1508 mbuf_pkthdr_adjustlen(m, -len);
1509 }
1510
1511 static errno_t
bridge_ifnet_set_attrs(struct ifnet * ifp)1512 bridge_ifnet_set_attrs(struct ifnet * ifp)
1513 {
1514 errno_t error;
1515
1516 error = ifnet_set_mtu(ifp, ETHERMTU);
1517 if (error != 0) {
1518 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_mtu failed %d", error);
1519 goto done;
1520 }
1521 error = ifnet_set_addrlen(ifp, ETHER_ADDR_LEN);
1522 if (error != 0) {
1523 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_addrlen failed %d", error);
1524 goto done;
1525 }
1526 error = ifnet_set_hdrlen(ifp, ETHER_HDR_LEN);
1527 if (error != 0) {
1528 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_hdrlen failed %d", error);
1529 goto done;
1530 }
1531 error = ifnet_set_flags(ifp,
1532 IFF_BROADCAST | IFF_SIMPLEX | IFF_NOTRAILERS | IFF_MULTICAST,
1533 0xffff);
1534
1535 if (error != 0) {
1536 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_flags failed %d", error);
1537 goto done;
1538 }
1539 done:
1540 return error;
1541 }
1542
1543 /*
1544 * bridge_clone_create:
1545 *
1546 * Create a new bridge instance.
1547 */
1548 static int
bridge_clone_create(struct if_clone * ifc,uint32_t unit,void * params)1549 bridge_clone_create(struct if_clone *ifc, uint32_t unit, void *params)
1550 {
1551 #pragma unused(params)
1552 ifnet_ref_t ifp = NULL;
1553 struct bridge_softc *sc = NULL;
1554 struct bridge_softc *sc2 = NULL;
1555 struct ifnet_init_eparams init_params;
1556 errno_t error = 0;
1557 uint8_t eth_hostid[ETHER_ADDR_LEN];
1558 int fb, retry, has_hostid;
1559
1560 sc = kalloc_type(struct bridge_softc, Z_WAITOK_ZERO_NOFAIL);
1561 lck_mtx_init(&sc->sc_mtx, &bridge_lock_grp, &bridge_lock_attr);
1562 sc->sc_brtmax = BRIDGE_RTABLE_MAX;
1563 sc->sc_mne_max = BRIDGE_MAC_NAT_ENTRY_MAX;
1564 sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
1565 sc->sc_filter_flags = 0;
1566
1567 TAILQ_INIT(&sc->sc_iflist);
1568
1569 /* use the interface name as the unique id for ifp recycle */
1570 snprintf(sc->sc_if_xname, sizeof(sc->sc_if_xname), "%s%d",
1571 ifc->ifc_name, unit);
1572 bzero(&init_params, sizeof(init_params));
1573 init_params.ver = IFNET_INIT_CURRENT_VERSION;
1574 init_params.len = sizeof(init_params);
1575 /* Initialize our routing table. */
1576 error = bridge_rtable_init(sc);
1577 if (error != 0) {
1578 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_rtable_init failed %d", error);
1579 goto done;
1580 }
1581 TAILQ_INIT(&sc->sc_spanlist);
1582 if (if_bridge_txstart) {
1583 init_params.start = bridge_start;
1584 } else {
1585 init_params.flags = IFNET_INIT_LEGACY;
1586 init_params.output = bridge_output;
1587 }
1588 init_params.uniqueid_len = strbuflen(sc->sc_if_xname);
1589 init_params.uniqueid = sc->sc_if_xname;
1590 init_params.sndq_maxlen = IFQ_MAXLEN;
1591 init_params.name = __unsafe_null_terminated_from_indexable(ifc->ifc_name);
1592 init_params.unit = unit;
1593 init_params.family = IFNET_FAMILY_ETHERNET;
1594 init_params.type = IFT_BRIDGE;
1595 init_params.demux = ether_demux;
1596 init_params.add_proto = ether_add_proto;
1597 init_params.del_proto = ether_del_proto;
1598 init_params.check_multi = ether_check_multi;
1599 init_params.framer_extended = ether_frameout_extended;
1600 init_params.softc = sc;
1601 init_params.ioctl = bridge_ioctl;
1602 init_params.detach = bridge_detach;
1603 init_params.broadcast_addr = etherbroadcastaddr;
1604 init_params.broadcast_len = ETHER_ADDR_LEN;
1605
1606 error = ifnet_allocate_extended(&init_params, &ifp);
1607 if (error != 0) {
1608 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_allocate failed %d", error);
1609 goto done;
1610 }
1611 LIST_INIT(&sc->sc_mne_list);
1612 LIST_INIT(&sc->sc_mne_list_v6);
1613 sc->sc_ifp = ifp;
1614 error = bridge_ifnet_set_attrs(ifp);
1615 if (error != 0) {
1616 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_ifnet_set_attrs failed %d",
1617 error);
1618 goto done;
1619 }
1620 /*
1621 * Generate an ethernet address with a locally administered address.
1622 *
1623 * Since we are using random ethernet addresses for the bridge, it is
1624 * possible that we might have address collisions, so make sure that
1625 * this hardware address isn't already in use on another bridge.
1626 * The first try uses the "hostid" and falls back to read_frandom();
1627 * for "hostid", we use the MAC address of the first-encountered
1628 * Ethernet-type interface that is currently configured.
1629 */
1630 fb = 0;
1631 has_hostid = (uuid_get_ethernet(ð_hostid[0]) == 0);
1632 for (retry = 1; retry != 0;) {
1633 if (fb || has_hostid == 0) {
1634 read_frandom(&sc->sc_defaddr, ETHER_ADDR_LEN);
1635 sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
1636 sc->sc_defaddr[0] |= 2; /* set the LAA bit */
1637 } else {
1638 bcopy(ð_hostid[0], &sc->sc_defaddr,
1639 ETHER_ADDR_LEN);
1640 sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
1641 sc->sc_defaddr[0] |= 2; /* set the LAA bit */
1642 sc->sc_defaddr[3] = /* stir it up a bit */
1643 ((sc->sc_defaddr[3] & 0x0f) << 4) |
1644 ((sc->sc_defaddr[3] & 0xf0) >> 4);
1645 /*
1646 * Mix in the LSB as it's actually pretty significant,
1647 * see rdar://14076061
1648 */
1649 sc->sc_defaddr[4] =
1650 (((sc->sc_defaddr[4] & 0x0f) << 4) |
1651 ((sc->sc_defaddr[4] & 0xf0) >> 4)) ^
1652 sc->sc_defaddr[5];
1653 sc->sc_defaddr[5] = ifp->if_unit & 0xff;
1654 }
1655
1656 fb = 1;
1657 retry = 0;
1658 lck_mtx_lock(&bridge_list_mtx);
1659 LIST_FOREACH(sc2, &bridge_list, sc_list) {
1660 if (_ether_cmp(sc->sc_defaddr,
1661 IF_LLADDR(sc2->sc_ifp)) == 0) {
1662 retry = 1;
1663 }
1664 }
1665 lck_mtx_unlock(&bridge_list_mtx);
1666 }
1667
1668 sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
1669
1670 if (BRIDGE_DBGF_ENABLED(BR_DBGF_LIFECYCLE)) {
1671 brlog_link(sc);
1672 }
1673 error = ifnet_attach(ifp, NULL);
1674 if (error != 0) {
1675 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_attach failed %d", error);
1676 goto done;
1677 }
1678
1679 error = ifnet_set_lladdr_and_type(ifp, sc->sc_defaddr, ETHER_ADDR_LEN,
1680 IFT_ETHER);
1681 if (error != 0) {
1682 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr_and_type failed %d",
1683 error);
1684 goto done;
1685 }
1686
1687 ifnet_set_offload(ifp,
1688 IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP |
1689 IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6 | IFNET_MULTIPAGES);
1690 error = bridge_set_tso(sc);
1691 if (error != 0) {
1692 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_set_tso failed %d", error);
1693 goto done;
1694 }
1695 #if BRIDGESTP
1696 bstp_attach(&sc->sc_stp, &bridge_ops);
1697 #endif /* BRIDGESTP */
1698
1699 lck_mtx_lock(&bridge_list_mtx);
1700 LIST_INSERT_HEAD(&bridge_list, sc, sc_list);
1701 lck_mtx_unlock(&bridge_list_mtx);
1702
1703 /* attach as ethernet */
1704 error = bpf_attach(ifp, DLT_EN10MB, sizeof(struct ether_header),
1705 NULL, NULL);
1706
1707 done:
1708 if (error != 0) {
1709 BRIDGE_LOG(LOG_NOTICE, 0, "failed error %d", error);
1710 /* TBD: Clean up: sc, sc_rthash etc */
1711 }
1712
1713 return error;
1714 }
1715
1716 /*
1717 * bridge_clone_destroy:
1718 *
1719 * Destroy a bridge instance.
1720 */
1721 static int
bridge_clone_destroy(struct ifnet * ifp)1722 bridge_clone_destroy(struct ifnet *ifp)
1723 {
1724 struct bridge_softc * __single sc = ifp->if_softc;
1725 struct bridge_iflist *bif;
1726 errno_t error;
1727
1728 BRIDGE_LOCK(sc);
1729 if ((sc->sc_flags & SCF_DETACHING)) {
1730 BRIDGE_UNLOCK(sc);
1731 return 0;
1732 }
1733 sc->sc_flags |= SCF_DETACHING;
1734
1735 bridge_ifstop(ifp, 1);
1736
1737 bridge_cancel_delayed_call(&sc->sc_resize_call);
1738
1739 bridge_cleanup_delayed_call(&sc->sc_resize_call);
1740 bridge_cleanup_delayed_call(&sc->sc_aging_timer);
1741
1742 error = ifnet_set_flags(ifp, 0, IFF_UP);
1743 if (error != 0) {
1744 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_flags failed %d", error);
1745 }
1746
1747 while ((bif = TAILQ_FIRST(&sc->sc_iflist)) != NULL) {
1748 bridge_delete_member(sc, bif);
1749 }
1750
1751 while ((bif = TAILQ_FIRST(&sc->sc_spanlist)) != NULL) {
1752 bridge_delete_span(sc, bif);
1753 }
1754 BRIDGE_UNLOCK(sc);
1755
1756 error = ifnet_detach(ifp);
1757 if (error != 0) {
1758 panic("%s (%d): ifnet_detach(%p) failed %d",
1759 __func__, __LINE__, ifp, error);
1760 }
1761 return 0;
1762 }
1763
1764 #define DRVSPEC do { \
1765 if (ifd->ifd_cmd >= bridge_control_table_size) { \
1766 error = EINVAL; \
1767 break; \
1768 } \
1769 bc = &bridge_control_table[ifd->ifd_cmd]; \
1770 \
1771 if (cmd == SIOCGDRVSPEC && \
1772 (bc->bc_flags & BC_F_COPYOUT) == 0) { \
1773 error = EINVAL; \
1774 break; \
1775 } else if (cmd == SIOCSDRVSPEC && \
1776 (bc->bc_flags & BC_F_COPYOUT) != 0) { \
1777 error = EINVAL; \
1778 break; \
1779 } \
1780 \
1781 if (bc->bc_flags & BC_F_SUSER) { \
1782 error = kauth_authorize_generic(kauth_cred_get(), \
1783 KAUTH_GENERIC_ISSUSER); \
1784 if (error) \
1785 break; \
1786 } \
1787 \
1788 if (ifd->ifd_len != bc->bc_argsize || \
1789 ifd->ifd_len > sizeof (args)) { \
1790 error = EINVAL; \
1791 break; \
1792 } \
1793 \
1794 bzero(&args, sizeof (args)); \
1795 if (bc->bc_flags & BC_F_COPYIN) { \
1796 error = copyin(ifd->ifd_data, &args, ifd->ifd_len); \
1797 if (error) \
1798 break; \
1799 } \
1800 \
1801 BRIDGE_LOCK(sc); \
1802 error = (*bc->bc_func)(sc, &args, sizeof(args)); \
1803 BRIDGE_UNLOCK(sc); \
1804 if (error) \
1805 break; \
1806 \
1807 if (bc->bc_flags & BC_F_COPYOUT) \
1808 error = copyout(&args, ifd->ifd_data, ifd->ifd_len); \
1809 } while (0)
1810
1811 static boolean_t
interface_needs_input_broadcast(struct ifnet * ifp)1812 interface_needs_input_broadcast(struct ifnet * ifp)
1813 {
1814 /*
1815 * Selectively enable input broadcast only when necessary.
1816 * The bridge interface itself attaches a fake protocol
1817 * so checking for at least two protocols means that the
1818 * interface is being used for something besides bridging
1819 * and needs to see broadcast packets from other members.
1820 */
1821 return if_get_protolist(ifp, NULL, 0) >= 2;
1822 }
1823
1824 static boolean_t
bif_set_input_broadcast(struct bridge_iflist * bif,boolean_t input_broadcast)1825 bif_set_input_broadcast(struct bridge_iflist * bif, boolean_t input_broadcast)
1826 {
1827 boolean_t old_input_broadcast;
1828
1829 old_input_broadcast = (bif->bif_flags & BIFF_INPUT_BROADCAST) != 0;
1830 if (input_broadcast) {
1831 bif->bif_flags |= BIFF_INPUT_BROADCAST;
1832 } else {
1833 bif->bif_flags &= ~BIFF_INPUT_BROADCAST;
1834 }
1835 return old_input_broadcast != input_broadcast;
1836 }
1837
1838 /*
1839 * bridge_ioctl:
1840 *
1841 * Handle a control request from the operator.
1842 */
1843 static errno_t
bridge_ioctl(struct ifnet * ifp,u_long cmd,void * __sized_by (IOCPARM_LEN (cmd))data)1844 bridge_ioctl(struct ifnet *ifp, u_long cmd, void *__sized_by(IOCPARM_LEN(cmd)) data)
1845 {
1846 struct bridge_softc * __single sc = ifp->if_softc;
1847 struct ifreq *ifr = (struct ifreq *)data;
1848 struct bridge_iflist *bif;
1849 int error = 0;
1850
1851 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1852
1853 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_IOCTL,
1854 "ifp %s cmd 0x%08lx (%c%c [%lu] %c %lu)",
1855 ifp->if_xname, cmd, (cmd & IOC_IN) ? 'I' : ' ',
1856 (cmd & IOC_OUT) ? 'O' : ' ', IOCPARM_LEN(cmd),
1857 (char)IOCGROUP(cmd), cmd & 0xff);
1858
1859 switch (cmd) {
1860 case SIOCAIFADDR_IN6_32:
1861 case SIOCAIFADDR_IN6_64:
1862 case SIOCSIFADDR:
1863 case SIOCAIFADDR:
1864 ifnet_set_flags(ifp, IFF_UP, IFF_UP);
1865 BRIDGE_LOCK(sc);
1866 sc->sc_flags |= SCF_ADDRESS_ASSIGNED;
1867 BRIDGE_UNLOCK(sc);
1868 BRIDGE_LOG(LOG_NOTICE, 0,
1869 "ifp %s has address", ifp->if_xname);
1870 break;
1871
1872 case SIOCGIFMEDIA32:
1873 case SIOCGIFMEDIA64: {
1874 // cast to 32bit version to work within bounds with 32bit userspace
1875 struct ifmediareq32 *ifmr = (struct ifmediareq32 *)data;
1876 user_addr_t user_addr;
1877
1878 user_addr = (cmd == SIOCGIFMEDIA64) ?
1879 ((struct ifmediareq64 *)data)->ifmu_ulist :
1880 CAST_USER_ADDR_T(((struct ifmediareq32 *)data)->ifmu_ulist);
1881
1882 ifmr->ifm_status = IFM_AVALID;
1883 ifmr->ifm_mask = 0;
1884 ifmr->ifm_count = 1;
1885
1886 BRIDGE_LOCK(sc);
1887 if (!(sc->sc_flags & SCF_DETACHING) &&
1888 (sc->sc_flags & SCF_MEDIA_ACTIVE)) {
1889 ifmr->ifm_status |= IFM_ACTIVE;
1890 ifmr->ifm_active = ifmr->ifm_current =
1891 IFM_ETHER | IFM_AUTO;
1892 } else {
1893 ifmr->ifm_active = ifmr->ifm_current = IFM_NONE;
1894 }
1895 BRIDGE_UNLOCK(sc);
1896
1897 if (user_addr != USER_ADDR_NULL) {
1898 error = copyout(&ifmr->ifm_current, user_addr,
1899 sizeof(int));
1900 }
1901 break;
1902 }
1903
1904 case SIOCADDMULTI:
1905 case SIOCDELMULTI:
1906 break;
1907
1908 case SIOCSDRVSPEC32:
1909 case SIOCGDRVSPEC32: {
1910 union {
1911 struct ifbreq ifbreq;
1912 struct ifbifconf32 ifbifconf;
1913 struct ifbareq32 ifbareq;
1914 struct ifbaconf32 ifbaconf;
1915 struct ifbrparam ifbrparam;
1916 struct ifbropreq32 ifbropreq;
1917 } args;
1918 struct ifdrv32 *ifd = (struct ifdrv32 *)data;
1919 const struct bridge_control *bridge_control_table =
1920 bridge_control_table32, *bc;
1921
1922 DRVSPEC;
1923
1924 break;
1925 }
1926 case SIOCSDRVSPEC64:
1927 case SIOCGDRVSPEC64: {
1928 union {
1929 struct ifbreq ifbreq;
1930 struct ifbifconf64 ifbifconf;
1931 struct ifbareq64 ifbareq;
1932 struct ifbaconf64 ifbaconf;
1933 struct ifbrparam ifbrparam;
1934 struct ifbropreq64 ifbropreq;
1935 } args;
1936 struct ifdrv64 *ifd = (struct ifdrv64 *)data;
1937 const struct bridge_control *bridge_control_table =
1938 bridge_control_table64, *bc;
1939
1940 DRVSPEC;
1941
1942 break;
1943 }
1944
1945 case SIOCSIFFLAGS:
1946 if (!(ifp->if_flags & IFF_UP) &&
1947 (ifp->if_flags & IFF_RUNNING)) {
1948 /*
1949 * If interface is marked down and it is running,
1950 * then stop and disable it.
1951 */
1952 BRIDGE_LOCK(sc);
1953 bridge_ifstop(ifp, 1);
1954 BRIDGE_UNLOCK(sc);
1955 } else if ((ifp->if_flags & IFF_UP) &&
1956 !(ifp->if_flags & IFF_RUNNING)) {
1957 /*
1958 * If interface is marked up and it is stopped, then
1959 * start it.
1960 */
1961 BRIDGE_LOCK(sc);
1962 error = bridge_init(ifp);
1963 BRIDGE_UNLOCK(sc);
1964 }
1965 break;
1966
1967 case SIOCSIFLLADDR:
1968 error = ifnet_set_lladdr(ifp, ifr->ifr_addr.sa_data,
1969 ifr->ifr_addr.sa_len);
1970 if (error != 0) {
1971 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_IOCTL,
1972 "%s SIOCSIFLLADDR error %d", ifp->if_xname,
1973 error);
1974 }
1975 break;
1976
1977 case SIOCSIFMTU:
1978 if (ifr->ifr_mtu < 576) {
1979 error = EINVAL;
1980 break;
1981 }
1982 BRIDGE_LOCK(sc);
1983 if (TAILQ_EMPTY(&sc->sc_iflist)) {
1984 sc->sc_ifp->if_mtu = ifr->ifr_mtu;
1985 BRIDGE_UNLOCK(sc);
1986 break;
1987 }
1988 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1989 if (bif->bif_ifp->if_mtu != (unsigned)ifr->ifr_mtu) {
1990 BRIDGE_LOG(LOG_NOTICE, 0,
1991 "%s invalid MTU: %u(%s) != %d",
1992 sc->sc_ifp->if_xname,
1993 bif->bif_ifp->if_mtu,
1994 bif->bif_ifp->if_xname, ifr->ifr_mtu);
1995 error = EINVAL;
1996 break;
1997 }
1998 }
1999 if (!error) {
2000 sc->sc_ifp->if_mtu = ifr->ifr_mtu;
2001 }
2002 BRIDGE_UNLOCK(sc);
2003 break;
2004
2005 default:
2006 error = ether_ioctl(ifp, cmd, data);
2007 if (error != 0 && error != EOPNOTSUPP) {
2008 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_IOCTL,
2009 "ifp %s cmd 0x%08lx "
2010 "(%c%c [%lu] %c %lu) failed error: %d",
2011 ifp->if_xname, cmd,
2012 (cmd & IOC_IN) ? 'I' : ' ',
2013 (cmd & IOC_OUT) ? 'O' : ' ',
2014 IOCPARM_LEN(cmd), (char)IOCGROUP(cmd),
2015 cmd & 0xff, error);
2016 }
2017 break;
2018 }
2019 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2020
2021 return error;
2022 }
2023
2024 #if HAS_IF_CAP
2025 /*
2026 * bridge_mutecaps:
2027 *
2028 * Clear or restore unwanted capabilities on the member interface
2029 */
2030 static void
bridge_mutecaps(struct bridge_softc * sc)2031 bridge_mutecaps(struct bridge_softc *sc)
2032 {
2033 struct bridge_iflist *bif;
2034 int enabled, mask;
2035
2036 /* Initial bitmask of capabilities to test */
2037 mask = BRIDGE_IFCAPS_MASK;
2038
2039 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2040 /* Every member must support it or its disabled */
2041 mask &= bif->bif_savedcaps;
2042 }
2043
2044 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2045 enabled = bif->bif_ifp->if_capenable;
2046 enabled &= ~BRIDGE_IFCAPS_STRIP;
2047 /* strip off mask bits and enable them again if allowed */
2048 enabled &= ~BRIDGE_IFCAPS_MASK;
2049 enabled |= mask;
2050
2051 bridge_set_ifcap(sc, bif, enabled);
2052 }
2053 }
2054
2055 static void
bridge_set_ifcap(struct bridge_softc * sc,struct bridge_iflist * bif,int set)2056 bridge_set_ifcap(struct bridge_softc *sc, struct bridge_iflist *bif, int set)
2057 {
2058 struct ifnet *ifp = bif->bif_ifp;
2059 struct ifreq ifr;
2060 int error;
2061
2062 bzero(&ifr, sizeof(ifr));
2063 ifr.ifr_reqcap = set;
2064
2065 if (ifp->if_capenable != set) {
2066 IFF_LOCKGIANT(ifp);
2067 error = (*ifp->if_ioctl)(ifp, SIOCSIFCAP, (caddr_t)&ifr);
2068 IFF_UNLOCKGIANT(ifp);
2069 if (error) {
2070 BRIDGE_LOG(LOG_NOTICE, 0,
2071 "%s error setting interface capabilities on %s",
2072 sc->sc_ifp->if_xname, ifp->if_xname);
2073 }
2074 }
2075 }
2076 #endif /* HAS_IF_CAP */
2077
2078 static errno_t
siocsifcap(struct ifnet * ifp,uint32_t cap_enable)2079 siocsifcap(struct ifnet * ifp, uint32_t cap_enable)
2080 {
2081 struct ifreq ifr;
2082
2083 bzero(&ifr, sizeof(ifr));
2084 ifr.ifr_reqcap = cap_enable;
2085 return ifnet_ioctl(ifp, 0, SIOCSIFCAP, &ifr);
2086 }
2087
2088 static const char *
enable_disable_str(boolean_t enable)2089 enable_disable_str(boolean_t enable)
2090 {
2091 return (const char * __null_terminated)(enable ? "enable" : "disable");
2092 }
2093
2094 static boolean_t
bridge_set_lro(struct ifnet * ifp,boolean_t enable)2095 bridge_set_lro(struct ifnet * ifp, boolean_t enable)
2096 {
2097 uint32_t cap_enable;
2098 uint32_t cap_supported;
2099 boolean_t changed = FALSE;
2100 boolean_t lro_enabled;
2101
2102 cap_supported = ifnet_capabilities_supported(ifp);
2103 if ((cap_supported & IFCAP_LRO) == 0) {
2104 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2105 "%s doesn't support LRO",
2106 ifp->if_xname);
2107 goto done;
2108 }
2109 if (bridge_allow_lro_num_seg != 0 &&
2110 (cap_supported & IFCAP_LRO_NUM_SEG) != 0) {
2111 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2112 "%s supports LRO_NUM_SEG, leaving LRO enabled",
2113 ifp->if_xname);
2114 goto done;
2115 }
2116 cap_enable = ifnet_capabilities_enabled(ifp);
2117 lro_enabled = (cap_enable & IFCAP_LRO) != 0;
2118 if (lro_enabled != enable) {
2119 errno_t error;
2120
2121 if (enable) {
2122 cap_enable |= IFCAP_LRO;
2123 } else {
2124 cap_enable &= ~IFCAP_LRO;
2125 }
2126 error = siocsifcap(ifp, cap_enable);
2127 if (error != 0) {
2128 BRIDGE_LOG(LOG_NOTICE, 0,
2129 "%s %s failed (cap 0x%x) %d",
2130 ifp->if_xname,
2131 enable_disable_str(enable),
2132 cap_enable,
2133 error);
2134 } else {
2135 changed = TRUE;
2136 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2137 "%s %s success (cap 0x%x)",
2138 ifp->if_xname,
2139 enable_disable_str(enable),
2140 cap_enable);
2141 }
2142 }
2143 done:
2144 return changed;
2145 }
2146
2147 static errno_t
bridge_set_tso(struct bridge_softc * sc)2148 bridge_set_tso(struct bridge_softc *sc)
2149 {
2150 struct bridge_iflist *bif;
2151 u_int32_t tso_v4_mtu;
2152 u_int32_t tso_v6_mtu;
2153 ifnet_offload_t offload;
2154 errno_t error = 0;
2155
2156 /* By default, support TSO */
2157 offload = sc->sc_ifp->if_hwassist | IFNET_TSO_IPV4 | IFNET_TSO_IPV6;
2158 tso_v4_mtu = IP_MAXPACKET;
2159 tso_v6_mtu = IP_MAXPACKET;
2160
2161 /* Use the lowest common denominator of the members */
2162 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2163 ifnet_t ifp = bif->bif_ifp;
2164
2165 if (ifp == NULL) {
2166 continue;
2167 }
2168
2169 if (offload & IFNET_TSO_IPV4) {
2170 if (ifp->if_hwassist & IFNET_TSO_IPV4) {
2171 if (tso_v4_mtu > ifp->if_tso_v4_mtu) {
2172 tso_v4_mtu = ifp->if_tso_v4_mtu;
2173 }
2174 } else {
2175 offload &= ~IFNET_TSO_IPV4;
2176 tso_v4_mtu = 0;
2177 }
2178 }
2179 if (offload & IFNET_TSO_IPV6) {
2180 if (ifp->if_hwassist & IFNET_TSO_IPV6) {
2181 if (tso_v6_mtu > ifp->if_tso_v6_mtu) {
2182 tso_v6_mtu = ifp->if_tso_v6_mtu;
2183 }
2184 } else {
2185 offload &= ~IFNET_TSO_IPV6;
2186 tso_v6_mtu = 0;
2187 }
2188 }
2189 }
2190
2191 if (offload != sc->sc_ifp->if_hwassist) {
2192 error = ifnet_set_offload(sc->sc_ifp, offload);
2193 if (error != 0) {
2194 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2195 "ifnet_set_offload(%s, 0x%x) failed %d",
2196 sc->sc_ifp->if_xname, offload, error);
2197 goto done;
2198 }
2199 /*
2200 * For ifnet_set_tso_mtu() sake, the TSO MTU must be at least
2201 * as large as the interface MTU
2202 */
2203 if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV4) {
2204 if (tso_v4_mtu < sc->sc_ifp->if_mtu) {
2205 tso_v4_mtu = sc->sc_ifp->if_mtu;
2206 }
2207 error = ifnet_set_tso_mtu(sc->sc_ifp, AF_INET,
2208 tso_v4_mtu);
2209 if (error != 0) {
2210 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2211 "ifnet_set_tso_mtu(%s, "
2212 "AF_INET, %u) failed %d",
2213 sc->sc_ifp->if_xname,
2214 tso_v4_mtu, error);
2215 goto done;
2216 }
2217 }
2218 if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV6) {
2219 if (tso_v6_mtu < sc->sc_ifp->if_mtu) {
2220 tso_v6_mtu = sc->sc_ifp->if_mtu;
2221 }
2222 error = ifnet_set_tso_mtu(sc->sc_ifp, AF_INET6,
2223 tso_v6_mtu);
2224 if (error != 0) {
2225 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2226 "ifnet_set_tso_mtu(%s, "
2227 "AF_INET6, %u) failed %d",
2228 sc->sc_ifp->if_xname,
2229 tso_v6_mtu, error);
2230 goto done;
2231 }
2232 }
2233 }
2234 done:
2235 return error;
2236 }
2237
2238 static const char *
sanitize_ifname(char * __sized_by (IFNAMSIZ)ifname)2239 sanitize_ifname(char * __sized_by(IFNAMSIZ) ifname)
2240 {
2241 ifname[IFNAMSIZ - 1] = '\0';
2242 return __unsafe_null_terminated_from_indexable(ifname, &ifname[IFNAMSIZ - 1]);
2243 }
2244
2245 /*
2246 * bridge_lookup_member:
2247 *
2248 * Lookup a bridge member interface.
2249 */
2250 static struct bridge_iflist *
bridge_lookup_member(struct bridge_softc * sc,char * __sized_by (IFNAMSIZ)name_unsanitized)2251 bridge_lookup_member(struct bridge_softc *sc, char * __sized_by(IFNAMSIZ) name_unsanitized)
2252 {
2253 struct bridge_iflist *bif;
2254 struct ifnet *ifp;
2255 const char * __null_terminated name = sanitize_ifname(name_unsanitized);
2256
2257 BRIDGE_LOCK_ASSERT_HELD(sc);
2258
2259 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2260 ifp = bif->bif_ifp;
2261 if (strcmp(ifp->if_xname, name) == 0) {
2262 return bif;
2263 }
2264 }
2265
2266 return NULL;
2267 }
2268
2269 /*
2270 * bridge_lookup_member_if:
2271 *
2272 * Lookup a bridge member interface by ifnet*.
2273 */
2274 static struct bridge_iflist *
bridge_lookup_member_if(struct bridge_softc * sc,struct ifnet * member_ifp)2275 bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
2276 {
2277 struct bridge_iflist *bif;
2278
2279 BRIDGE_LOCK_ASSERT_HELD(sc);
2280
2281 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2282 if (bif->bif_ifp == member_ifp) {
2283 return bif;
2284 }
2285 }
2286
2287 return NULL;
2288 }
2289
2290 static inline bool
get_and_clear_promisc(mbuf_t m)2291 get_and_clear_promisc(mbuf_t m)
2292 {
2293 bool is_promisc;
2294
2295 /*
2296 * Need to clear the promiscuous flag otherwise the packet will be
2297 * dropped by DLIL after processing filters
2298 */
2299 is_promisc = (mbuf_flags(m) & MBUF_PROMISC) != 0;
2300 if (is_promisc) {
2301 mbuf_setflags_mask(m, 0, MBUF_PROMISC);
2302 }
2303 return is_promisc;
2304 }
2305
2306 static errno_t
bridge_iff_input(void * cookie,ifnet_t ifp,protocol_family_t protocol,mbuf_t * data,char ** frame_ptr)2307 bridge_iff_input(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2308 mbuf_t *data, char **frame_ptr)
2309 {
2310 #pragma unused(protocol)
2311 errno_t error = 0;
2312 struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2313 struct bridge_softc *sc = bif->bif_sc;
2314 int included = 0;
2315 struct ether_header * eh_p;
2316 size_t frmlen = 0;
2317 bool is_promisc;
2318 mblist list;
2319 mbuf_t m = *data;
2320
2321 if ((m->m_flags & M_PROTO1)) {
2322 goto out;
2323 }
2324
2325 if (*frame_ptr >= (char *)mbuf_datastart(m) &&
2326 *frame_ptr <= mtod(m, char *)) {
2327 included = 1;
2328 frmlen = mtod(m, char *) - *frame_ptr;
2329 }
2330 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2331 "%s from %s m 0x%llx data 0x%llx frame 0x%llx %s "
2332 "frmlen %lu", sc->sc_ifp->if_xname,
2333 ifp->if_xname, (uint64_t)VM_KERNEL_ADDRPERM(m),
2334 (uint64_t)VM_KERNEL_ADDRPERM(mtod(m, void *)),
2335 (uint64_t)VM_KERNEL_ADDRPERM(*frame_ptr),
2336 included ? "inside" : "outside", frmlen);
2337 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MBUF)) {
2338 brlog_mbuf(m, "bridge_iff_input[", "");
2339 brlog_ether_header((struct ether_header *)
2340 (void *)*frame_ptr);
2341 brlog_mbuf_data(m, 0, 20);
2342 }
2343 if (included == 0) {
2344 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT, "frame_ptr outside mbuf");
2345 goto out;
2346 }
2347
2348 /* Move data pointer to start of frame to the link layer header */
2349 _mbuf_adjust_pkthdr_and_data(m, -frmlen);
2350
2351 /* make sure we can access the ethernet header */
2352 if (mbuf_pkthdr_len(m) < sizeof(struct ether_header)) {
2353 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2354 "short frame %lu < %lu",
2355 mbuf_pkthdr_len(m), sizeof(struct ether_header));
2356 goto out;
2357 }
2358 if (mbuf_len(m) < sizeof(struct ether_header)) {
2359 error = mbuf_pullup(data, sizeof(struct ether_header));
2360 if (error != 0) {
2361 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2362 "mbuf_pullup(%lu) failed %d",
2363 sizeof(struct ether_header),
2364 error);
2365 error = EJUSTRETURN;
2366 goto out;
2367 }
2368 if (m != *data) {
2369 m = *data;
2370 *frame_ptr = mtod(m, char *);
2371 }
2372 }
2373 mblist_init(&list);
2374 mblist_append(&list, m);
2375 is_promisc = get_and_clear_promisc(m);
2376 eh_p = __unsafe_forge_single(struct ether_header *, *frame_ptr);
2377 list = bridge_input_list(sc, ifp, eh_p, list, is_promisc);
2378 m = *data = list.head;
2379 if (m == NULL) {
2380 error = EJUSTRETURN;
2381 }
2382 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MBUF) &&
2383 BRIDGE_DBGF_ENABLED(BR_DBGF_INPUT)) {
2384 brlog_mbuf(m, "bridge_iff_input]", "");
2385 }
2386
2387 out:
2388 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2389
2390 return error;
2391 }
2392
2393 static errno_t
bridge_iff_output(void * cookie,ifnet_t ifp,protocol_family_t protocol,mbuf_t * data)2394 bridge_iff_output(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2395 mbuf_t *data)
2396 {
2397 #pragma unused(protocol)
2398 errno_t error = 0;
2399 struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2400 struct bridge_softc *sc = bif->bif_sc;
2401 mbuf_t m = *data;
2402
2403 if ((m->m_flags & M_PROTO1)) {
2404 goto out;
2405 }
2406 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
2407 "%s from %s m 0x%llx data 0x%llx",
2408 sc->sc_ifp->if_xname, ifp->if_xname,
2409 (uint64_t)VM_KERNEL_ADDRPERM(m),
2410 (uint64_t)VM_KERNEL_ADDRPERM(mtod(m, void *)));
2411
2412 error = bridge_member_output(sc, ifp, data);
2413 if (error != 0 && error != EJUSTRETURN) {
2414 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_OUTPUT,
2415 "bridge_member_output failed error %d",
2416 error);
2417 }
2418 out:
2419 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2420
2421 return error;
2422 }
2423
2424 static void
bridge_iff_event(void * cookie,ifnet_t ifp,protocol_family_t protocol,const struct kev_msg * event_msg)2425 bridge_iff_event(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2426 const struct kev_msg *event_msg)
2427 {
2428 #pragma unused(protocol)
2429 struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2430 struct bridge_softc *sc = bif->bif_sc;
2431
2432 if (event_msg->vendor_code == KEV_VENDOR_APPLE &&
2433 event_msg->kev_class == KEV_NETWORK_CLASS &&
2434 event_msg->kev_subclass == KEV_DL_SUBCLASS) {
2435 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
2436 "%s event_code %u - %s",
2437 ifp->if_xname, event_msg->event_code,
2438 dlil_kev_dl_code_str(event_msg->event_code));
2439
2440 switch (event_msg->event_code) {
2441 case KEV_DL_LINK_OFF:
2442 case KEV_DL_LINK_ON: {
2443 bridge_iflinkevent(ifp);
2444 #if BRIDGESTP
2445 bstp_linkstate(ifp, event_msg->event_code);
2446 #endif /* BRIDGESTP */
2447 break;
2448 }
2449 case KEV_DL_SIFFLAGS: {
2450 if ((ifp->if_flags & IFF_UP) == 0) {
2451 break;
2452 }
2453 if ((bif->bif_flags & BIFF_PROMISC) == 0) {
2454 errno_t error;
2455
2456 error = ifnet_set_promiscuous(ifp, 1);
2457 if (error != 0) {
2458 BRIDGE_LOG(LOG_NOTICE, 0,
2459 "ifnet_set_promiscuous (%s)"
2460 " failed %d", ifp->if_xname,
2461 error);
2462 } else {
2463 bif->bif_flags |= BIFF_PROMISC;
2464 }
2465 }
2466 if ((bif->bif_flags & BIFF_WIFI_INFRA) != 0 &&
2467 (bif->bif_flags & BIFF_ALL_MULTI) == 0) {
2468 errno_t error;
2469
2470 error = if_allmulti(ifp, 1);
2471 if (error != 0) {
2472 BRIDGE_LOG(LOG_NOTICE, 0,
2473 "if_allmulti (%s)"
2474 " failed %d", ifp->if_xname,
2475 error);
2476 } else {
2477 bif->bif_flags |= BIFF_ALL_MULTI;
2478 #ifdef XNU_PLATFORM_AppleTVOS
2479 ip6_forwarding = 1;
2480 #endif /* XNU_PLATFORM_AppleTVOS */
2481 }
2482 }
2483 break;
2484 }
2485 case KEV_DL_IFCAP_CHANGED: {
2486 BRIDGE_LOCK(sc);
2487 bridge_set_tso(sc);
2488 BRIDGE_UNLOCK(sc);
2489 break;
2490 }
2491 case KEV_DL_PROTO_DETACHED:
2492 case KEV_DL_PROTO_ATTACHED: {
2493 bridge_proto_attach_changed(ifp);
2494 break;
2495 }
2496 default:
2497 break;
2498 }
2499 }
2500 }
2501
2502 /*
2503 * bridge_iff_detached:
2504 *
2505 * Called when our interface filter has been detached from a
2506 * member interface.
2507 */
2508 static void
bridge_iff_detached(void * cookie,ifnet_t ifp)2509 bridge_iff_detached(void *cookie, ifnet_t ifp)
2510 {
2511 #pragma unused(cookie)
2512 struct bridge_iflist *bif;
2513 struct bridge_softc * __single sc = ifp->if_bridge;
2514
2515 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2516
2517 /* Check if the interface is a bridge member */
2518 if (sc != NULL) {
2519 BRIDGE_LOCK(sc);
2520 bif = bridge_lookup_member_if(sc, ifp);
2521 if (bif != NULL) {
2522 bridge_delete_member(sc, bif);
2523 }
2524 BRIDGE_UNLOCK(sc);
2525 return;
2526 }
2527 /* Check if the interface is a span port */
2528 lck_mtx_lock(&bridge_list_mtx);
2529 LIST_FOREACH(sc, &bridge_list, sc_list) {
2530 BRIDGE_LOCK(sc);
2531 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
2532 if (ifp == bif->bif_ifp) {
2533 bridge_delete_span(sc, bif);
2534 break;
2535 }
2536 BRIDGE_UNLOCK(sc);
2537 }
2538 lck_mtx_unlock(&bridge_list_mtx);
2539 }
2540
2541 static errno_t
bridge_proto_input(ifnet_t ifp,protocol_family_t protocol,mbuf_t packet,char * header)2542 bridge_proto_input(ifnet_t ifp, protocol_family_t protocol, mbuf_t packet,
2543 char *header)
2544 {
2545 #pragma unused(protocol, packet, header)
2546 BRIDGE_LOG(LOG_NOTICE, 0, "%s unexpected packet",
2547 ifp->if_xname);
2548 return 0;
2549 }
2550
2551 static int
bridge_attach_protocol(struct ifnet * ifp)2552 bridge_attach_protocol(struct ifnet *ifp)
2553 {
2554 int error;
2555 struct ifnet_attach_proto_param reg;
2556
2557 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2558 bzero(®, sizeof(reg));
2559 reg.input = bridge_proto_input;
2560
2561 error = ifnet_attach_protocol(ifp, PF_BRIDGE, ®);
2562 if (error) {
2563 BRIDGE_LOG(LOG_NOTICE, 0,
2564 "ifnet_attach_protocol(%s) failed, %d",
2565 ifp->if_xname, error);
2566 }
2567
2568 return error;
2569 }
2570
2571 static int
bridge_detach_protocol(struct ifnet * ifp)2572 bridge_detach_protocol(struct ifnet *ifp)
2573 {
2574 int error;
2575
2576 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2577 error = ifnet_detach_protocol(ifp, PF_BRIDGE);
2578 if (error) {
2579 BRIDGE_LOG(LOG_NOTICE, 0,
2580 "ifnet_detach_protocol(%s) failed, %d",
2581 ifp->if_xname, error);
2582 }
2583
2584 return error;
2585 }
2586
2587 /*
2588 * bridge_delete_member:
2589 *
2590 * Delete the specified member interface.
2591 */
2592 static void
bridge_delete_member(struct bridge_softc * sc,struct bridge_iflist * bif)2593 bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif)
2594 {
2595 #if SKYWALK
2596 boolean_t add_netagent = FALSE;
2597 #endif /* SKYWALK */
2598 uint32_t bif_flags;
2599 struct ifnet *ifs = bif->bif_ifp, *bifp = sc->sc_ifp;
2600 int lladdr_changed = 0, error;
2601 uint8_t eaddr[ETHER_ADDR_LEN];
2602 u_int32_t event_code = 0;
2603
2604 BRIDGE_LOCK_ASSERT_HELD(sc);
2605 VERIFY(ifs != NULL);
2606
2607 /*
2608 * Remove the member from the list first so it cannot be found anymore
2609 * when we release the bridge lock below
2610 */
2611 if ((bif->bif_flags & BIFF_IN_MEMBER_LIST) != 0) {
2612 bif->bif_flags &= ~BIFF_IN_MEMBER_LIST;
2613 BRIDGE_XLOCK(sc);
2614 TAILQ_REMOVE(&sc->sc_iflist, bif, bif_next);
2615 BRIDGE_XDROP(sc);
2616 }
2617 if (sc->sc_mac_nat_bif != NULL) {
2618 if (bif == sc->sc_mac_nat_bif) {
2619 bridge_mac_nat_disable(sc);
2620 } else {
2621 bridge_mac_nat_flush_entries(sc, bif);
2622 }
2623 }
2624 #if BRIDGESTP
2625 if ((bif->bif_ifflags & IFBIF_STP) != 0) {
2626 bstp_disable(&bif->bif_stp);
2627 }
2628 #endif /* BRIDGESTP */
2629
2630 /*
2631 * If removing the interface that gave the bridge its mac address, set
2632 * the mac address of the bridge to the address of the next member, or
2633 * to its default address if no members are left.
2634 */
2635 if (bridge_inherit_mac && sc->sc_ifaddr == ifs) {
2636 ifnet_release(sc->sc_ifaddr);
2637 if (TAILQ_EMPTY(&sc->sc_iflist)) {
2638 bcopy(sc->sc_defaddr, eaddr, ETHER_ADDR_LEN);
2639 sc->sc_ifaddr = NULL;
2640 } else {
2641 struct ifnet *fif =
2642 TAILQ_FIRST(&sc->sc_iflist)->bif_ifp;
2643 bcopy(IF_LLADDR(fif), eaddr, ETHER_ADDR_LEN);
2644 sc->sc_ifaddr = fif;
2645 ifnet_reference(fif); /* for sc_ifaddr */
2646 }
2647 lladdr_changed = 1;
2648 }
2649
2650 #if HAS_IF_CAP
2651 bridge_mutecaps(sc); /* recalculate now this interface is removed */
2652 #endif /* HAS_IF_CAP */
2653
2654 error = bridge_set_tso(sc);
2655 if (error != 0) {
2656 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_set_tso failed %d", error);
2657 }
2658
2659 bridge_rtdelete(sc, ifs, IFBF_FLUSHALL);
2660
2661 KASSERT(bif->bif_addrcnt == 0,
2662 ("%s: %d bridge routes referenced", __func__, bif->bif_addrcnt));
2663
2664 /*
2665 * Update link status of the bridge based on its remaining members
2666 */
2667 event_code = bridge_updatelinkstatus(sc);
2668 bif_flags = bif->bif_flags;
2669 BRIDGE_UNLOCK(sc);
2670
2671 /* only perform these steps if the interface is still attached */
2672 if (ifnet_is_attached(ifs, 1)) {
2673 #if SKYWALK
2674 add_netagent = (bif_flags & BIFF_NETAGENT_REMOVED) != 0;
2675
2676 if ((bif_flags & BIFF_FLOWSWITCH_ATTACHED) != 0) {
2677 ifnet_detach_flowswitch_nexus(ifs);
2678 }
2679 #endif /* SKYWALK */
2680 /* disable promiscuous mode */
2681 if ((bif_flags & BIFF_PROMISC) != 0) {
2682 (void) ifnet_set_promiscuous(ifs, 0);
2683 }
2684 /* disable all multi */
2685 if ((bif_flags & BIFF_ALL_MULTI) != 0) {
2686 (void)if_allmulti(ifs, 0);
2687 }
2688 #if HAS_IF_CAP
2689 /* re-enable any interface capabilities */
2690 bridge_set_ifcap(sc, bif, bif->bif_savedcaps);
2691 #endif
2692 /* detach bridge "protocol" */
2693 if ((bif_flags & BIFF_PROTO_ATTACHED) != 0) {
2694 (void)bridge_detach_protocol(ifs);
2695 }
2696 /* detach interface filter */
2697 if ((bif_flags & BIFF_FILTER_ATTACHED) != 0) {
2698 iflt_detach(bif->bif_iff_ref);
2699 }
2700 /* re-enable LRO */
2701 if ((bif_flags & BIFF_LRO_DISABLED) != 0) {
2702 (void)bridge_set_lro(ifs, TRUE);
2703 }
2704 ifnet_decr_iorefcnt(ifs);
2705 }
2706
2707 if (lladdr_changed &&
2708 (error = ifnet_set_lladdr(bifp, eaddr, ETHER_ADDR_LEN)) != 0) {
2709 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr failed %d", error);
2710 }
2711
2712 if (event_code != 0) {
2713 bridge_link_event(bifp, event_code);
2714 }
2715
2716 #if BRIDGESTP
2717 bstp_destroy(&bif->bif_stp); /* prepare to free */
2718 #endif /* BRIDGESTP */
2719
2720 kfree_type(struct bridge_iflist, bif);
2721 ifs->if_bridge = NULL;
2722 #if SKYWALK
2723 if (add_netagent && ifnet_is_attached(ifs, 1)) {
2724 (void)ifnet_add_netagent(ifs);
2725 ifnet_decr_iorefcnt(ifs);
2726 }
2727 #endif /* SKYWALK */
2728
2729 ifnet_release(ifs);
2730
2731 BRIDGE_LOCK(sc);
2732 }
2733
2734 /*
2735 * bridge_delete_span:
2736 *
2737 * Delete the specified span interface.
2738 */
2739 static void
bridge_delete_span(struct bridge_softc * sc,struct bridge_iflist * bif)2740 bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif)
2741 {
2742 BRIDGE_LOCK_ASSERT_HELD(sc);
2743
2744 KASSERT(bif->bif_ifp->if_bridge == NULL,
2745 ("%s: not a span interface", __func__));
2746
2747 ifnet_release(bif->bif_ifp);
2748
2749 TAILQ_REMOVE(&sc->sc_spanlist, bif, bif_next);
2750 kfree_type(struct bridge_iflist, bif);
2751 }
2752
2753 static int
bridge_ioctl_add(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)2754 bridge_ioctl_add(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
2755 {
2756 struct ifbreq * __single req = arg;
2757 struct bridge_iflist *bif = NULL;
2758 struct ifnet *ifs, *bifp = sc->sc_ifp;
2759 int error = 0, lladdr_changed = 0;
2760 uint8_t eaddr[ETHER_ADDR_LEN];
2761 struct iff_filter iff;
2762 u_int32_t event_code = 0;
2763 boolean_t input_broadcast;
2764 int media_active;
2765 boolean_t wifi_infra = FALSE;
2766
2767 ifs = ifunit(sanitize_ifname(req->ifbr_ifsname));
2768 if (ifs == NULL) {
2769 return ENOENT;
2770 }
2771 if (ifs->if_ioctl == NULL) { /* must be supported */
2772 return EINVAL;
2773 }
2774
2775 if (IFNET_IS_INTCOPROC(ifs) || IFNET_IS_MANAGEMENT(ifs)) {
2776 return EINVAL;
2777 }
2778
2779 /* If it's in the span list, it can't be a member. */
2780 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
2781 if (ifs == bif->bif_ifp) {
2782 return EBUSY;
2783 }
2784 }
2785
2786 if (ifs->if_bridge == sc) {
2787 return EEXIST;
2788 }
2789
2790 if (ifs->if_bridge != NULL) {
2791 return EBUSY;
2792 }
2793
2794 switch (ifs->if_type) {
2795 case IFT_ETHER:
2796 if (strcmp(ifs->if_name, "en") == 0 &&
2797 ifs->if_subfamily == IFNET_SUBFAMILY_WIFI &&
2798 (ifs->if_eflags & IFEF_IPV4_ROUTER) == 0) {
2799 /* XXX is there a better way to identify Wi-Fi STA? */
2800 wifi_infra = TRUE;
2801 }
2802 break;
2803 case IFT_L2VLAN:
2804 case IFT_IEEE8023ADLAG:
2805 break;
2806 default:
2807 return EINVAL;
2808 }
2809
2810 /* fail to add the interface if the MTU doesn't match */
2811 if (!TAILQ_EMPTY(&sc->sc_iflist) && sc->sc_ifp->if_mtu != ifs->if_mtu) {
2812 BRIDGE_LOG(LOG_NOTICE, 0, "%s invalid MTU for %s",
2813 sc->sc_ifp->if_xname,
2814 ifs->if_xname);
2815 return EINVAL;
2816 }
2817
2818 if (wifi_infra && sc->sc_mac_nat_bif != NULL) {
2819 /* there's already an interface that's doing MAC NAT */
2820 return EBUSY;
2821 }
2822
2823 /* prevent the interface from detaching while we add the member */
2824 if (!ifnet_is_attached(ifs, 1)) {
2825 return ENXIO;
2826 }
2827
2828 /* allocate a new member */
2829 bif = kalloc_type(struct bridge_iflist, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2830 bif->bif_ifp = ifs;
2831 ifnet_reference(ifs);
2832 bif->bif_ifflags |= IFBIF_LEARNING | IFBIF_DISCOVER;
2833 #if HAS_IF_CAP
2834 bif->bif_savedcaps = ifs->if_capenable;
2835 #endif /* HAS_IF_CAP */
2836 bif->bif_sc = sc;
2837 if (wifi_infra) {
2838 (void)bridge_mac_nat_enable(sc, bif);
2839 }
2840
2841 /* Allow the first Ethernet member to define the MTU */
2842 if (TAILQ_EMPTY(&sc->sc_iflist)) {
2843 sc->sc_ifp->if_mtu = ifs->if_mtu;
2844 }
2845
2846 /*
2847 * Assign the interface's MAC address to the bridge if it's the first
2848 * member and the MAC address of the bridge has not been changed from
2849 * the default (randomly) generated one.
2850 */
2851 if (bridge_inherit_mac && TAILQ_EMPTY(&sc->sc_iflist) &&
2852 _ether_cmp(IF_LLADDR(sc->sc_ifp), sc->sc_defaddr) == 0) {
2853 bcopy(IF_LLADDR(ifs), eaddr, ETHER_ADDR_LEN);
2854 sc->sc_ifaddr = ifs;
2855 ifnet_reference(ifs); /* for sc_ifaddr */
2856 lladdr_changed = 1;
2857 }
2858
2859 ifs->if_bridge = sc;
2860 #if BRIDGESTP
2861 bstp_create(&sc->sc_stp, &bif->bif_stp, bif->bif_ifp);
2862 #endif /* BRIDGESTP */
2863
2864 #if HAS_IF_CAP
2865 /* Set interface capabilities to the intersection set of all members */
2866 bridge_mutecaps(sc);
2867 #endif /* HAS_IF_CAP */
2868
2869 /*
2870 * Respect lock ordering with DLIL lock for the following operations
2871 */
2872 BRIDGE_UNLOCK(sc);
2873
2874 /* enable promiscuous mode */
2875 error = ifnet_set_promiscuous(ifs, 1);
2876 switch (error) {
2877 case 0:
2878 bif->bif_flags |= BIFF_PROMISC;
2879 break;
2880 case ENETDOWN:
2881 case EPWROFF:
2882 BRIDGE_LOG(LOG_NOTICE, 0,
2883 "ifnet_set_promiscuous(%s) failed %d, ignoring",
2884 ifs->if_xname, error);
2885 /* Ignore error when device is not up */
2886 error = 0;
2887 break;
2888 default:
2889 BRIDGE_LOG(LOG_NOTICE, 0,
2890 "ifnet_set_promiscuous(%s) failed %d",
2891 ifs->if_xname, error);
2892 BRIDGE_LOCK(sc);
2893 goto out;
2894 }
2895 if (wifi_infra) {
2896 int this_error;
2897
2898 /* Wi-Fi doesn't really support promiscuous, set allmulti */
2899 bif->bif_flags |= BIFF_WIFI_INFRA;
2900 this_error = if_allmulti(ifs, 1);
2901 if (this_error == 0) {
2902 bif->bif_flags |= BIFF_ALL_MULTI;
2903 #ifdef XNU_PLATFORM_AppleTVOS
2904 ip6_forwarding = 1;
2905 #endif /* XNU_PLATFORM_AppleTVOS */
2906 } else {
2907 BRIDGE_LOG(LOG_NOTICE, 0,
2908 "if_allmulti(%s) failed %d, ignoring",
2909 ifs->if_xname, this_error);
2910 }
2911 }
2912 #if SKYWALK
2913 /* ensure that the flowswitch is present for native interface */
2914 if (SKYWALK_NATIVE(ifs)) {
2915 if (ifnet_attach_flowswitch_nexus(ifs)) {
2916 bif->bif_flags |= BIFF_FLOWSWITCH_ATTACHED;
2917 }
2918 }
2919 /* remove the netagent on the flowswitch (rdar://75050182) */
2920 if (if_is_fsw_netagent_enabled()) {
2921 (void)ifnet_remove_netagent(ifs);
2922 bif->bif_flags |= BIFF_NETAGENT_REMOVED;
2923 }
2924 #endif /* SKYWALK */
2925
2926 /*
2927 * install an interface filter
2928 */
2929 memset(&iff, 0, sizeof(struct iff_filter));
2930 iff.iff_cookie = bif;
2931 iff.iff_name = "com.apple.kernel.bsd.net.if_bridge";
2932 iff.iff_input = bridge_iff_input;
2933 iff.iff_output = bridge_iff_output;
2934 iff.iff_event = bridge_iff_event;
2935 iff.iff_detached = bridge_iff_detached;
2936 error = dlil_attach_filter(ifs, &iff, &bif->bif_iff_ref,
2937 DLIL_IFF_TSO | DLIL_IFF_INTERNAL | DLIL_IFF_BRIDGE);
2938 if (error != 0) {
2939 BRIDGE_LOG(LOG_NOTICE, 0, "iflt_attach failed %d", error);
2940 BRIDGE_LOCK(sc);
2941 goto out;
2942 }
2943 bif->bif_flags |= BIFF_FILTER_ATTACHED;
2944
2945 /*
2946 * install a dummy "bridge" protocol
2947 */
2948 if ((error = bridge_attach_protocol(ifs)) != 0) {
2949 if (error != 0) {
2950 BRIDGE_LOG(LOG_NOTICE, 0,
2951 "bridge_attach_protocol failed %d", error);
2952 BRIDGE_LOCK(sc);
2953 goto out;
2954 }
2955 }
2956 bif->bif_flags |= BIFF_PROTO_ATTACHED;
2957
2958 if (lladdr_changed &&
2959 (error = ifnet_set_lladdr(bifp, eaddr, ETHER_ADDR_LEN)) != 0) {
2960 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr failed %d", error);
2961 }
2962
2963 media_active = interface_media_active(ifs);
2964
2965 /* disable LRO if needed */
2966 if (bridge_set_lro(ifs, FALSE)) {
2967 bif->bif_flags |= BIFF_LRO_DISABLED;
2968 }
2969
2970 /*
2971 * No failures past this point. Add the member to the list.
2972 */
2973 BRIDGE_LOCK(sc);
2974 bif->bif_flags |= BIFF_IN_MEMBER_LIST;
2975 BRIDGE_XLOCK(sc);
2976 TAILQ_INSERT_TAIL(&sc->sc_iflist, bif, bif_next);
2977 BRIDGE_XDROP(sc);
2978
2979 /* cache the member link status */
2980 if (media_active != 0) {
2981 bif->bif_flags |= BIFF_MEDIA_ACTIVE;
2982 } else {
2983 bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
2984 }
2985
2986 /* the new member may change the link status of the bridge interface */
2987 event_code = bridge_updatelinkstatus(sc);
2988
2989 /* check whether we need input broadcast or not */
2990 input_broadcast = interface_needs_input_broadcast(ifs);
2991 bif_set_input_broadcast(bif, input_broadcast);
2992 BRIDGE_UNLOCK(sc);
2993
2994 if (event_code != 0) {
2995 bridge_link_event(bifp, event_code);
2996 }
2997 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
2998 "%s input broadcast %s", ifs->if_xname,
2999 input_broadcast ? "ENABLED" : "DISABLED");
3000
3001 BRIDGE_LOCK(sc);
3002 bridge_set_tso(sc);
3003
3004 out:
3005 /* allow the interface to detach */
3006 ifnet_decr_iorefcnt(ifs);
3007
3008 if (error != 0) {
3009 if (bif != NULL) {
3010 bridge_delete_member(sc, bif);
3011 }
3012 } else if (IFNET_IS_VMNET(ifs)) {
3013 INC_ATOMIC_INT64_LIM(net_api_stats.nas_vmnet_total);
3014 }
3015
3016 return error;
3017 }
3018
3019 static int
bridge_ioctl_del(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3020 bridge_ioctl_del(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3021 {
3022 struct ifbreq * __single req = arg;
3023 struct bridge_iflist *bif;
3024
3025 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3026 if (bif == NULL) {
3027 return ENOENT;
3028 }
3029
3030 bridge_delete_member(sc, bif);
3031
3032 return 0;
3033 }
3034
3035 static int
bridge_ioctl_purge(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3036 bridge_ioctl_purge(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3037 {
3038 #pragma unused(sc, arg, arg_len)
3039 return 0;
3040 }
3041
3042 static int
bridge_ioctl_gifflags(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3043 bridge_ioctl_gifflags(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3044 {
3045 struct ifbreq * __single req = arg;
3046 struct bridge_iflist *bif;
3047
3048 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3049 if (bif == NULL) {
3050 return ENOENT;
3051 }
3052
3053 struct bstp_port *bp;
3054
3055 bp = &bif->bif_stp;
3056 req->ifbr_state = bp->bp_state;
3057 req->ifbr_priority = bp->bp_priority;
3058 req->ifbr_path_cost = bp->bp_path_cost;
3059 req->ifbr_proto = bp->bp_protover;
3060 req->ifbr_role = bp->bp_role;
3061 req->ifbr_stpflags = bp->bp_flags;
3062 req->ifbr_ifsflags = bif->bif_ifflags;
3063
3064 /* Copy STP state options as flags */
3065 if (bp->bp_operedge) {
3066 req->ifbr_ifsflags |= IFBIF_BSTP_EDGE;
3067 }
3068 if (bp->bp_flags & BSTP_PORT_AUTOEDGE) {
3069 req->ifbr_ifsflags |= IFBIF_BSTP_AUTOEDGE;
3070 }
3071 if (bp->bp_ptp_link) {
3072 req->ifbr_ifsflags |= IFBIF_BSTP_PTP;
3073 }
3074 if (bp->bp_flags & BSTP_PORT_AUTOPTP) {
3075 req->ifbr_ifsflags |= IFBIF_BSTP_AUTOPTP;
3076 }
3077 if (bp->bp_flags & BSTP_PORT_ADMEDGE) {
3078 req->ifbr_ifsflags |= IFBIF_BSTP_ADMEDGE;
3079 }
3080 if (bp->bp_flags & BSTP_PORT_ADMCOST) {
3081 req->ifbr_ifsflags |= IFBIF_BSTP_ADMCOST;
3082 }
3083
3084 req->ifbr_portno = bif->bif_ifp->if_index & 0xfff;
3085 req->ifbr_addrcnt = bif->bif_addrcnt;
3086 req->ifbr_addrmax = bif->bif_addrmax;
3087 req->ifbr_addrexceeded = bif->bif_addrexceeded;
3088
3089 return 0;
3090 }
3091
3092 static int
bridge_ioctl_sifflags(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3093 bridge_ioctl_sifflags(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3094 {
3095 struct ifbreq * __single req = arg;
3096 struct bridge_iflist *bif;
3097 #if BRIDGESTP
3098 struct bstp_port *bp;
3099 #endif /* BRIDGESTP */
3100 errno_t error;
3101 uint32_t ifsflags;
3102
3103 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3104 if (bif == NULL) {
3105 return ENOENT;
3106 }
3107
3108 ifsflags = req->ifbr_ifsflags;
3109 if (ifsflags & IFBIF_SPAN) {
3110 /* SPAN is readonly */
3111 return EINVAL;
3112 }
3113 #define CHECKSUM_VIRTIO (IFBIF_CHECKSUM_OFFLOAD | IFBIF_USES_VIRTIO)
3114 if ((ifsflags & CHECKSUM_VIRTIO) == CHECKSUM_VIRTIO) {
3115 /* can't specify checksum and virtio */
3116 return EINVAL;
3117 }
3118 if ((ifsflags & IFBIF_MAC_NAT) != 0 &&
3119 ((ifsflags & CHECKSUM_VIRTIO) != 0 ||
3120 (bif->bif_flags & BIFF_HOST_FILTER) != 0)) {
3121 /* MAC-NAT can't be used with checksum, host filter, or virtio */
3122 return EINVAL;
3123 }
3124 if ((ifsflags & IFBIF_MAC_NAT) != 0) {
3125 error = bridge_mac_nat_enable(sc, bif);
3126 if (error != 0) {
3127 return error;
3128 }
3129 } else if (sc->sc_mac_nat_bif == bif) {
3130 bridge_mac_nat_disable(sc);
3131 }
3132
3133 #if BRIDGESTP
3134 if (ifsflags & IFBIF_STP) {
3135 if ((bif->bif_ifflags & IFBIF_STP) == 0) {
3136 error = bstp_enable(&bif->bif_stp);
3137 if (error) {
3138 return error;
3139 }
3140 }
3141 } else {
3142 if ((bif->bif_ifflags & IFBIF_STP) != 0) {
3143 bstp_disable(&bif->bif_stp);
3144 }
3145 }
3146
3147 /* Pass on STP flags */
3148 bp = &bif->bif_stp;
3149 bstp_set_edge(bp, ifsflags & IFBIF_BSTP_EDGE ? 1 : 0);
3150 bstp_set_autoedge(bp, ifsflags & IFBIF_BSTP_AUTOEDGE ? 1 : 0);
3151 bstp_set_ptp(bp, ifsflags & IFBIF_BSTP_PTP ? 1 : 0);
3152 bstp_set_autoptp(bp, ifsflags & IFBIF_BSTP_AUTOPTP ? 1 : 0);
3153 #else /* !BRIDGESTP */
3154 if (ifsflags & IFBIF_STP) {
3155 return EOPNOTSUPP;
3156 }
3157 #endif /* !BRIDGESTP */
3158
3159 /* Save the bits relating to the bridge */
3160 bif->bif_ifflags = ifsflags & IFBIFMASK;
3161
3162 return 0;
3163 }
3164
3165 static int
bridge_ioctl_scache(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3166 bridge_ioctl_scache(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3167 {
3168 struct ifbrparam * __single param = arg;
3169
3170 sc->sc_brtmax = param->ifbrp_csize;
3171 bridge_rttrim(sc);
3172 return 0;
3173 }
3174
3175 static int
bridge_ioctl_gcache(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3176 bridge_ioctl_gcache(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3177 {
3178 struct ifbrparam * __single param = arg;
3179
3180 param->ifbrp_csize = sc->sc_brtmax;
3181
3182 return 0;
3183 }
3184
3185 #define BRIDGE_IOCTL_GIFS do { \
3186 struct bridge_iflist *bif; \
3187 struct ifbreq breq; \
3188 char *buf, *outbuf; \
3189 unsigned int count, buflen, len; \
3190 \
3191 count = 0; \
3192 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) \
3193 count++; \
3194 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) \
3195 count++; \
3196 \
3197 buflen = sizeof (breq) * count; \
3198 if (bifc->ifbic_len == 0) { \
3199 bifc->ifbic_len = buflen; \
3200 return (0); \
3201 } \
3202 BRIDGE_UNLOCK(sc); \
3203 outbuf = kalloc_data(buflen, Z_WAITOK | Z_ZERO); \
3204 BRIDGE_LOCK(sc); \
3205 \
3206 count = 0; \
3207 buf = outbuf; \
3208 len = min(bifc->ifbic_len, buflen); \
3209 bzero(&breq, sizeof (breq)); \
3210 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { \
3211 if (len < sizeof (breq)) \
3212 break; \
3213 \
3214 snprintf(breq.ifbr_ifsname, sizeof (breq.ifbr_ifsname), \
3215 "%s", bif->bif_ifp->if_xname); \
3216 /* Fill in the ifbreq structure */ \
3217 error = bridge_ioctl_gifflags(sc, &breq, sizeof(breq)); \
3218 if (error) \
3219 break; \
3220 memcpy(buf, &breq, sizeof (breq)); \
3221 count++; \
3222 buf += sizeof (breq); \
3223 len -= sizeof (breq); \
3224 } \
3225 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) { \
3226 if (len < sizeof (breq)) \
3227 break; \
3228 \
3229 snprintf(breq.ifbr_ifsname, \
3230 sizeof (breq.ifbr_ifsname), \
3231 "%s", bif->bif_ifp->if_xname); \
3232 breq.ifbr_ifsflags = bif->bif_ifflags; \
3233 breq.ifbr_portno \
3234 = bif->bif_ifp->if_index & 0xfff; \
3235 memcpy(buf, &breq, sizeof (breq)); \
3236 count++; \
3237 buf += sizeof (breq); \
3238 len -= sizeof (breq); \
3239 } \
3240 \
3241 BRIDGE_UNLOCK(sc); \
3242 bifc->ifbic_len = sizeof (breq) * count; \
3243 if (bifc->ifbic_len > 0) { \
3244 error = copyout(outbuf, bifc->ifbic_req, bifc->ifbic_len);\
3245 } \
3246 BRIDGE_LOCK(sc); \
3247 kfree_data(outbuf, buflen); \
3248 } while (0)
3249
3250 static int
bridge_ioctl_gifs64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3251 bridge_ioctl_gifs64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3252 {
3253 struct ifbifconf64 * __single bifc = arg;
3254 int error = 0;
3255
3256 BRIDGE_IOCTL_GIFS;
3257
3258 return error;
3259 }
3260
3261 static int
bridge_ioctl_gifs32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3262 bridge_ioctl_gifs32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3263 {
3264 struct ifbifconf32 * __single bifc = arg;
3265 int error = 0;
3266
3267 BRIDGE_IOCTL_GIFS;
3268
3269 return error;
3270 }
3271
3272 #define BRIDGE_IOCTL_RTS do { \
3273 struct bridge_rtnode *brt; \
3274 char *buf; \
3275 char *outbuf = NULL; \
3276 unsigned int count, buflen, len; \
3277 unsigned long now; \
3278 \
3279 if (bac->ifbac_len == 0) \
3280 return (0); \
3281 \
3282 bzero(&bareq, sizeof (bareq)); \
3283 count = 0; \
3284 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) \
3285 count++; \
3286 buflen = sizeof (bareq) * count; \
3287 \
3288 BRIDGE_UNLOCK(sc); \
3289 outbuf = kalloc_data(buflen, Z_WAITOK | Z_ZERO); \
3290 BRIDGE_LOCK(sc); \
3291 \
3292 count = 0; \
3293 buf = outbuf; \
3294 len = min(bac->ifbac_len, buflen); \
3295 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) { \
3296 if (len < sizeof (bareq)) \
3297 goto out; \
3298 snprintf(bareq.ifba_ifsname, sizeof (bareq.ifba_ifsname), \
3299 "%s", brt->brt_ifp->if_xname); \
3300 memcpy(bareq.ifba_dst, brt->brt_addr, sizeof (brt->brt_addr)); \
3301 bareq.ifba_vlan = brt->brt_vlan; \
3302 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) { \
3303 now = (unsigned long) net_uptime(); \
3304 if (now < brt->brt_expire) \
3305 bareq.ifba_expire = \
3306 brt->brt_expire - now; \
3307 } else \
3308 bareq.ifba_expire = 0; \
3309 bareq.ifba_flags = brt->brt_flags; \
3310 \
3311 memcpy(buf, &bareq, sizeof (bareq)); \
3312 count++; \
3313 buf += sizeof (bareq); \
3314 len -= sizeof (bareq); \
3315 } \
3316 out: \
3317 bac->ifbac_len = sizeof (bareq) * count; \
3318 if (outbuf != NULL) { \
3319 BRIDGE_UNLOCK(sc); \
3320 if (bac->ifbac_len > 0) { \
3321 error = copyout(outbuf, bac->ifbac_req, bac->ifbac_len);\
3322 } \
3323 kfree_data(outbuf, buflen); \
3324 BRIDGE_LOCK(sc); \
3325 } \
3326 return (error); \
3327 } while (0)
3328
3329 static int
bridge_ioctl_rts64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3330 bridge_ioctl_rts64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3331 {
3332 struct ifbaconf64 * __single bac = arg;
3333 struct ifbareq64 bareq;
3334 int error = 0;
3335
3336 BRIDGE_IOCTL_RTS;
3337 return error;
3338 }
3339
3340 static int
bridge_ioctl_rts32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3341 bridge_ioctl_rts32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3342 {
3343 struct ifbaconf32 * __single bac = arg;
3344 struct ifbareq32 bareq;
3345 int error = 0;
3346
3347 BRIDGE_IOCTL_RTS;
3348 return error;
3349 }
3350
3351 static int
bridge_ioctl_saddr32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3352 bridge_ioctl_saddr32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3353 {
3354 struct ifbareq32 * __single req = arg;
3355 struct bridge_iflist *bif;
3356 int error;
3357
3358 bif = bridge_lookup_member(sc, req->ifba_ifsname);
3359 if (bif == NULL) {
3360 return ENOENT;
3361 }
3362
3363 error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
3364 req->ifba_flags);
3365
3366 return error;
3367 }
3368
3369 static int
bridge_ioctl_saddr64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3370 bridge_ioctl_saddr64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3371 {
3372 struct ifbareq64 * __single req = arg;
3373 struct bridge_iflist *bif;
3374 int error;
3375
3376 bif = bridge_lookup_member(sc, req->ifba_ifsname);
3377 if (bif == NULL) {
3378 return ENOENT;
3379 }
3380
3381 error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
3382 req->ifba_flags);
3383
3384 return error;
3385 }
3386
3387 static int
bridge_ioctl_sto(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3388 bridge_ioctl_sto(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3389 {
3390 struct ifbrparam * __single param = arg;
3391
3392 sc->sc_brttimeout = param->ifbrp_ctime;
3393 return 0;
3394 }
3395
3396 static int
bridge_ioctl_gto(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3397 bridge_ioctl_gto(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3398 {
3399 struct ifbrparam * __single param = arg;
3400
3401 param->ifbrp_ctime = sc->sc_brttimeout;
3402 return 0;
3403 }
3404
3405 static int
bridge_ioctl_daddr32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3406 bridge_ioctl_daddr32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3407 {
3408 struct ifbareq32 * __single req = arg;
3409
3410 return bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan);
3411 }
3412
3413 static int
bridge_ioctl_daddr64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3414 bridge_ioctl_daddr64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3415 {
3416 struct ifbareq64 * __single req = arg;
3417
3418 return bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan);
3419 }
3420
3421 static int
bridge_ioctl_flush(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3422 bridge_ioctl_flush(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3423 {
3424 struct ifbreq * __single req = arg;
3425
3426 bridge_rtflush(sc, req->ifbr_ifsflags);
3427 return 0;
3428 }
3429
3430 static int
bridge_ioctl_gpri(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3431 bridge_ioctl_gpri(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3432 {
3433 struct ifbrparam * __single param = arg;
3434 struct bstp_state *bs = &sc->sc_stp;
3435
3436 param->ifbrp_prio = bs->bs_bridge_priority;
3437 return 0;
3438 }
3439
3440 static int
bridge_ioctl_spri(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3441 bridge_ioctl_spri(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3442 {
3443 #if BRIDGESTP
3444 struct ifbrparam *param = arg;
3445
3446 return bstp_set_priority(&sc->sc_stp, param->ifbrp_prio);
3447 #else /* !BRIDGESTP */
3448 #pragma unused(sc, arg)
3449 return EOPNOTSUPP;
3450 #endif /* !BRIDGESTP */
3451 }
3452
3453 static int
bridge_ioctl_ght(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3454 bridge_ioctl_ght(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3455 {
3456 struct ifbrparam * __single param = arg;
3457 struct bstp_state *bs = &sc->sc_stp;
3458
3459 param->ifbrp_hellotime = bs->bs_bridge_htime >> 8;
3460 return 0;
3461 }
3462
3463 static int
bridge_ioctl_sht(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3464 bridge_ioctl_sht(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3465 {
3466 #if BRIDGESTP
3467 struct ifbrparam *param = arg;
3468
3469 return bstp_set_htime(&sc->sc_stp, param->ifbrp_hellotime);
3470 #else /* !BRIDGESTP */
3471 #pragma unused(sc, arg)
3472 return EOPNOTSUPP;
3473 #endif /* !BRIDGESTP */
3474 }
3475
3476 static int
bridge_ioctl_gfd(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3477 bridge_ioctl_gfd(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3478 {
3479 struct ifbrparam * __single param;
3480 struct bstp_state *bs;
3481
3482 param = arg;
3483 bs = &sc->sc_stp;
3484 param->ifbrp_fwddelay = bs->bs_bridge_fdelay >> 8;
3485 return 0;
3486 }
3487
3488 static int
bridge_ioctl_sfd(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3489 bridge_ioctl_sfd(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3490 {
3491 #if BRIDGESTP
3492 struct ifbrparam *param = arg;
3493
3494 return bstp_set_fdelay(&sc->sc_stp, param->ifbrp_fwddelay);
3495 #else /* !BRIDGESTP */
3496 #pragma unused(sc, arg)
3497 return EOPNOTSUPP;
3498 #endif /* !BRIDGESTP */
3499 }
3500
3501 static int
bridge_ioctl_gma(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3502 bridge_ioctl_gma(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3503 {
3504 struct ifbrparam * __single param;
3505 struct bstp_state *bs;
3506
3507 param = arg;
3508 bs = &sc->sc_stp;
3509 param->ifbrp_maxage = bs->bs_bridge_max_age >> 8;
3510 return 0;
3511 }
3512
3513 static int
bridge_ioctl_sma(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3514 bridge_ioctl_sma(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3515 {
3516 #if BRIDGESTP
3517 struct ifbrparam *param = arg;
3518
3519 return bstp_set_maxage(&sc->sc_stp, param->ifbrp_maxage);
3520 #else /* !BRIDGESTP */
3521 #pragma unused(sc, arg)
3522 return EOPNOTSUPP;
3523 #endif /* !BRIDGESTP */
3524 }
3525
3526 static int
bridge_ioctl_sifprio(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3527 bridge_ioctl_sifprio(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3528 {
3529 #if BRIDGESTP
3530 struct ifbreq *req = arg;
3531 struct bridge_iflist *bif;
3532
3533 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3534 if (bif == NULL) {
3535 return ENOENT;
3536 }
3537
3538 return bstp_set_port_priority(&bif->bif_stp, req->ifbr_priority);
3539 #else /* !BRIDGESTP */
3540 #pragma unused(sc, arg)
3541 return EOPNOTSUPP;
3542 #endif /* !BRIDGESTP */
3543 }
3544
3545 static int
bridge_ioctl_sifcost(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3546 bridge_ioctl_sifcost(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3547 {
3548 #if BRIDGESTP
3549 struct ifbreq *req = arg;
3550 struct bridge_iflist *bif;
3551
3552 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3553 if (bif == NULL) {
3554 return ENOENT;
3555 }
3556
3557 return bstp_set_path_cost(&bif->bif_stp, req->ifbr_path_cost);
3558 #else /* !BRIDGESTP */
3559 #pragma unused(sc, arg)
3560 return EOPNOTSUPP;
3561 #endif /* !BRIDGESTP */
3562 }
3563
3564 static int
bridge_ioctl_gfilt(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3565 bridge_ioctl_gfilt(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3566 {
3567 struct ifbrparam * __single param = arg;
3568
3569 param->ifbrp_filter = sc->sc_filter_flags;
3570
3571 return 0;
3572 }
3573
3574 static int
bridge_ioctl_sfilt(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3575 bridge_ioctl_sfilt(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3576 {
3577 struct ifbrparam * __single param = arg;
3578
3579 if (param->ifbrp_filter & ~IFBF_FILT_MASK) {
3580 return EINVAL;
3581 }
3582
3583 if (param->ifbrp_filter & IFBF_FILT_USEIPF) {
3584 return EINVAL;
3585 }
3586
3587 sc->sc_filter_flags = param->ifbrp_filter;
3588
3589 return 0;
3590 }
3591
3592 static int
bridge_ioctl_sifmaxaddr(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3593 bridge_ioctl_sifmaxaddr(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3594 {
3595 struct ifbreq * __single req = arg;
3596 struct bridge_iflist *bif;
3597
3598 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3599 if (bif == NULL) {
3600 return ENOENT;
3601 }
3602
3603 bif->bif_addrmax = req->ifbr_addrmax;
3604 return 0;
3605 }
3606
3607 static int
bridge_ioctl_addspan(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3608 bridge_ioctl_addspan(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3609 {
3610 struct ifbreq * __single req = arg;
3611 struct bridge_iflist *bif = NULL;
3612 struct ifnet *ifs;
3613
3614 ifs = ifunit(sanitize_ifname(req->ifbr_ifsname));
3615 if (ifs == NULL) {
3616 return ENOENT;
3617 }
3618
3619 if (IFNET_IS_INTCOPROC(ifs) || IFNET_IS_MANAGEMENT(ifs)) {
3620 return EINVAL;
3621 }
3622
3623 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
3624 if (ifs == bif->bif_ifp) {
3625 return EBUSY;
3626 }
3627
3628 if (ifs->if_bridge != NULL) {
3629 return EBUSY;
3630 }
3631
3632 switch (ifs->if_type) {
3633 case IFT_ETHER:
3634 case IFT_L2VLAN:
3635 case IFT_IEEE8023ADLAG:
3636 break;
3637 default:
3638 return EINVAL;
3639 }
3640
3641 bif = kalloc_type(struct bridge_iflist, Z_WAITOK | Z_ZERO | Z_NOFAIL);
3642
3643 bif->bif_ifp = ifs;
3644 bif->bif_ifflags = IFBIF_SPAN;
3645
3646 ifnet_reference(bif->bif_ifp);
3647
3648 TAILQ_INSERT_HEAD(&sc->sc_spanlist, bif, bif_next);
3649
3650 return 0;
3651 }
3652
3653 static int
bridge_ioctl_delspan(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3654 bridge_ioctl_delspan(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3655 {
3656 struct ifbreq * __single req = arg;
3657 struct bridge_iflist *bif;
3658 struct ifnet *ifs;
3659
3660 ifs = ifunit(sanitize_ifname(req->ifbr_ifsname));
3661 if (ifs == NULL) {
3662 return ENOENT;
3663 }
3664
3665 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
3666 if (ifs == bif->bif_ifp) {
3667 break;
3668 }
3669
3670 if (bif == NULL) {
3671 return ENOENT;
3672 }
3673
3674 bridge_delete_span(sc, bif);
3675
3676 return 0;
3677 }
3678
3679 #define BRIDGE_IOCTL_GBPARAM do { \
3680 struct bstp_state *bs = &sc->sc_stp; \
3681 struct bstp_port *root_port; \
3682 \
3683 req->ifbop_maxage = bs->bs_bridge_max_age >> 8; \
3684 req->ifbop_hellotime = bs->bs_bridge_htime >> 8; \
3685 req->ifbop_fwddelay = bs->bs_bridge_fdelay >> 8; \
3686 \
3687 root_port = bs->bs_root_port; \
3688 if (root_port == NULL) \
3689 req->ifbop_root_port = 0; \
3690 else \
3691 req->ifbop_root_port = root_port->bp_ifp->if_index; \
3692 \
3693 req->ifbop_holdcount = bs->bs_txholdcount; \
3694 req->ifbop_priority = bs->bs_bridge_priority; \
3695 req->ifbop_protocol = bs->bs_protover; \
3696 req->ifbop_root_path_cost = bs->bs_root_pv.pv_cost; \
3697 req->ifbop_bridgeid = bs->bs_bridge_pv.pv_dbridge_id; \
3698 req->ifbop_designated_root = bs->bs_root_pv.pv_root_id; \
3699 req->ifbop_designated_bridge = bs->bs_root_pv.pv_dbridge_id; \
3700 req->ifbop_last_tc_time.tv_sec = bs->bs_last_tc_time.tv_sec; \
3701 req->ifbop_last_tc_time.tv_usec = bs->bs_last_tc_time.tv_usec; \
3702 } while (0)
3703
3704 static int
bridge_ioctl_gbparam32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3705 bridge_ioctl_gbparam32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3706 {
3707 struct ifbropreq32 * __single req = arg;
3708
3709 BRIDGE_IOCTL_GBPARAM;
3710 return 0;
3711 }
3712
3713 static int
bridge_ioctl_gbparam64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3714 bridge_ioctl_gbparam64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3715 {
3716 struct ifbropreq64 * __single req = arg;
3717
3718 BRIDGE_IOCTL_GBPARAM;
3719 return 0;
3720 }
3721
3722 static int
bridge_ioctl_grte(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3723 bridge_ioctl_grte(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3724 {
3725 struct ifbrparam * __single param = arg;
3726
3727 param->ifbrp_cexceeded = sc->sc_brtexceeded;
3728 return 0;
3729 }
3730
3731 #define BRIDGE_IOCTL_GIFSSTP do { \
3732 struct bridge_iflist *bif; \
3733 struct bstp_port *bp; \
3734 struct ifbpstpreq bpreq; \
3735 char *buf, *outbuf; \
3736 unsigned int count, buflen, len; \
3737 \
3738 count = 0; \
3739 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { \
3740 if ((bif->bif_ifflags & IFBIF_STP) != 0) \
3741 count++; \
3742 } \
3743 \
3744 buflen = sizeof (bpreq) * count; \
3745 if (bifstp->ifbpstp_len == 0) { \
3746 bifstp->ifbpstp_len = buflen; \
3747 return (0); \
3748 } \
3749 \
3750 BRIDGE_UNLOCK(sc); \
3751 outbuf = kalloc_data(buflen, Z_WAITOK | Z_ZERO); \
3752 BRIDGE_LOCK(sc); \
3753 \
3754 count = 0; \
3755 buf = outbuf; \
3756 len = min(bifstp->ifbpstp_len, buflen); \
3757 bzero(&bpreq, sizeof (bpreq)); \
3758 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { \
3759 if (len < sizeof (bpreq)) \
3760 break; \
3761 \
3762 if ((bif->bif_ifflags & IFBIF_STP) == 0) \
3763 continue; \
3764 \
3765 bp = &bif->bif_stp; \
3766 bpreq.ifbp_portno = bif->bif_ifp->if_index & 0xfff; \
3767 bpreq.ifbp_fwd_trans = bp->bp_forward_transitions; \
3768 bpreq.ifbp_design_cost = bp->bp_desg_pv.pv_cost; \
3769 bpreq.ifbp_design_port = bp->bp_desg_pv.pv_port_id; \
3770 bpreq.ifbp_design_bridge = bp->bp_desg_pv.pv_dbridge_id; \
3771 bpreq.ifbp_design_root = bp->bp_desg_pv.pv_root_id; \
3772 \
3773 memcpy(buf, &bpreq, sizeof (bpreq)); \
3774 count++; \
3775 buf += sizeof (bpreq); \
3776 len -= sizeof (bpreq); \
3777 } \
3778 \
3779 BRIDGE_UNLOCK(sc); \
3780 bifstp->ifbpstp_len = sizeof (bpreq) * count; \
3781 if (bifstp->ifbpstp_len > 0) { \
3782 error = copyout(outbuf, bifstp->ifbpstp_req, bifstp->ifbpstp_len);\
3783 } \
3784 BRIDGE_LOCK(sc); \
3785 kfree_data(outbuf, buflen); \
3786 return (error); \
3787 } while (0)
3788
3789 static int
bridge_ioctl_gifsstp32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3790 bridge_ioctl_gifsstp32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3791 {
3792 struct ifbpstpconf32 * __single bifstp = arg;
3793 int error = 0;
3794
3795 BRIDGE_IOCTL_GIFSSTP;
3796 return error;
3797 }
3798
3799 static int
bridge_ioctl_gifsstp64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3800 bridge_ioctl_gifsstp64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3801 {
3802 struct ifbpstpconf64 * __single bifstp = arg;
3803 int error = 0;
3804
3805 BRIDGE_IOCTL_GIFSSTP;
3806 return error;
3807 }
3808
3809 static int
bridge_ioctl_sproto(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3810 bridge_ioctl_sproto(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3811 {
3812 #if BRIDGESTP
3813 struct ifbrparam *param = arg;
3814
3815 return bstp_set_protocol(&sc->sc_stp, param->ifbrp_proto);
3816 #else /* !BRIDGESTP */
3817 #pragma unused(sc, arg)
3818 return EOPNOTSUPP;
3819 #endif /* !BRIDGESTP */
3820 }
3821
3822 static int
bridge_ioctl_stxhc(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3823 bridge_ioctl_stxhc(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3824 {
3825 #if BRIDGESTP
3826 struct ifbrparam *param = arg;
3827
3828 return bstp_set_holdcount(&sc->sc_stp, param->ifbrp_txhc);
3829 #else /* !BRIDGESTP */
3830 #pragma unused(sc, arg)
3831 return EOPNOTSUPP;
3832 #endif /* !BRIDGESTP */
3833 }
3834
3835
3836 static int
bridge_ioctl_ghostfilter(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3837 bridge_ioctl_ghostfilter(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3838 {
3839 struct ifbrhostfilter * __single req = arg;
3840 struct bridge_iflist *bif;
3841
3842 bif = bridge_lookup_member(sc, req->ifbrhf_ifsname);
3843 if (bif == NULL) {
3844 return ENOENT;
3845 }
3846
3847 bzero(req, sizeof(struct ifbrhostfilter));
3848 if (bif->bif_flags & BIFF_HOST_FILTER) {
3849 req->ifbrhf_flags |= IFBRHF_ENABLED;
3850 bcopy(bif->bif_hf_hwsrc, req->ifbrhf_hwsrca,
3851 ETHER_ADDR_LEN);
3852 req->ifbrhf_ipsrc = bif->bif_hf_ipsrc.s_addr;
3853 }
3854 return 0;
3855 }
3856
3857 static int
bridge_ioctl_shostfilter(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3858 bridge_ioctl_shostfilter(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3859 {
3860 struct ifbrhostfilter * __single req = arg;
3861 struct bridge_iflist *bif;
3862
3863 bif = bridge_lookup_member(sc, req->ifbrhf_ifsname);
3864 if (bif == NULL) {
3865 return ENOENT;
3866 }
3867 if (bif_has_mac_nat(bif)) {
3868 /* no host filter with MAC-NAT */
3869 return EINVAL;
3870 }
3871 if (req->ifbrhf_flags & IFBRHF_ENABLED) {
3872 bif->bif_flags |= BIFF_HOST_FILTER;
3873
3874 if (req->ifbrhf_flags & IFBRHF_HWSRC) {
3875 bcopy(req->ifbrhf_hwsrca, bif->bif_hf_hwsrc,
3876 ETHER_ADDR_LEN);
3877 if (bcmp(req->ifbrhf_hwsrca, ethernulladdr,
3878 ETHER_ADDR_LEN) != 0) {
3879 bif->bif_flags |= BIFF_HF_HWSRC;
3880 } else {
3881 bif->bif_flags &= ~BIFF_HF_HWSRC;
3882 }
3883 }
3884 if (req->ifbrhf_flags & IFBRHF_IPSRC) {
3885 bif->bif_hf_ipsrc.s_addr = req->ifbrhf_ipsrc;
3886 if (bif->bif_hf_ipsrc.s_addr != INADDR_ANY) {
3887 bif->bif_flags |= BIFF_HF_IPSRC;
3888 } else {
3889 bif->bif_flags &= ~BIFF_HF_IPSRC;
3890 }
3891 }
3892 } else {
3893 bif->bif_flags &= ~(BIFF_HOST_FILTER | BIFF_HF_HWSRC |
3894 BIFF_HF_IPSRC);
3895 bzero(bif->bif_hf_hwsrc, ETHER_ADDR_LEN);
3896 bif->bif_hf_ipsrc.s_addr = INADDR_ANY;
3897 }
3898
3899 return 0;
3900 }
3901
3902 static char *__indexable
bridge_mac_nat_entry_out(struct mac_nat_entry_list * list,unsigned int * count_p,char * __indexable buf,unsigned int * len_p)3903 bridge_mac_nat_entry_out(struct mac_nat_entry_list * list,
3904 unsigned int * count_p, char *__indexable buf,
3905 unsigned int * len_p)
3906 {
3907 unsigned int count = *count_p;
3908 struct ifbrmne ifbmne;
3909 unsigned int len = *len_p;
3910 struct mac_nat_entry *mne;
3911 unsigned long now;
3912
3913 bzero(&ifbmne, sizeof(ifbmne));
3914 LIST_FOREACH(mne, list, mne_list) {
3915 if (len < sizeof(ifbmne)) {
3916 break;
3917 }
3918 snprintf(ifbmne.ifbmne_ifname, sizeof(ifbmne.ifbmne_ifname),
3919 "%s", mne->mne_bif->bif_ifp->if_xname);
3920 memcpy(ifbmne.ifbmne_mac, mne->mne_mac,
3921 sizeof(ifbmne.ifbmne_mac));
3922 now = (unsigned long) net_uptime();
3923 if (now < mne->mne_expire) {
3924 ifbmne.ifbmne_expire = mne->mne_expire - now;
3925 } else {
3926 ifbmne.ifbmne_expire = 0;
3927 }
3928 if ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) {
3929 ifbmne.ifbmne_af = AF_INET6;
3930 ifbmne.ifbmne_ip6_addr = mne->mne_ip6;
3931 } else {
3932 ifbmne.ifbmne_af = AF_INET;
3933 ifbmne.ifbmne_ip_addr = mne->mne_ip;
3934 }
3935 memcpy(buf, &ifbmne, sizeof(ifbmne));
3936 count++;
3937 buf += sizeof(ifbmne);
3938 len -= sizeof(ifbmne);
3939 }
3940 *count_p = count;
3941 *len_p = len;
3942 return buf;
3943 }
3944
3945 /*
3946 * bridge_ioctl_gmnelist()
3947 * Perform the get mac_nat_entry list ioctl.
3948 *
3949 * Note:
3950 * The struct ifbrmnelist32 and struct ifbrmnelist64 have the same
3951 * field size/layout except for the last field ifbml_buf, the user-supplied
3952 * buffer pointer. That is passed in separately via the 'user_addr'
3953 * parameter from the respective 32-bit or 64-bit ioctl routine.
3954 */
3955 static int
bridge_ioctl_gmnelist(struct bridge_softc * sc,struct ifbrmnelist32 * mnl,user_addr_t user_addr)3956 bridge_ioctl_gmnelist(struct bridge_softc *sc, struct ifbrmnelist32 *mnl,
3957 user_addr_t user_addr)
3958 {
3959 unsigned int count;
3960 char *buf;
3961 int error = 0;
3962 char *outbuf = NULL;
3963 struct mac_nat_entry *mne;
3964 unsigned int buflen;
3965 unsigned int len;
3966
3967 mnl->ifbml_elsize = sizeof(struct ifbrmne);
3968 count = 0;
3969 LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
3970 count++;
3971 }
3972 LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
3973 count++;
3974 }
3975 buflen = sizeof(struct ifbrmne) * count;
3976 if (buflen == 0 || mnl->ifbml_len == 0) {
3977 mnl->ifbml_len = buflen;
3978 return error;
3979 }
3980 BRIDGE_UNLOCK(sc);
3981 outbuf = kalloc_data(buflen, Z_WAITOK | Z_ZERO);
3982 BRIDGE_LOCK(sc);
3983 count = 0;
3984 buf = outbuf;
3985 len = min(mnl->ifbml_len, buflen);
3986 buf = bridge_mac_nat_entry_out(&sc->sc_mne_list, &count, buf, &len);
3987 buf = bridge_mac_nat_entry_out(&sc->sc_mne_list_v6, &count, buf, &len);
3988 mnl->ifbml_len = count * sizeof(struct ifbrmne);
3989 BRIDGE_UNLOCK(sc);
3990 if (mnl->ifbml_len > 0) {
3991 error = copyout(outbuf, user_addr, mnl->ifbml_len);
3992 }
3993 kfree_data(outbuf, buflen);
3994 BRIDGE_LOCK(sc);
3995 return error;
3996 }
3997
3998 static int
bridge_ioctl_gmnelist64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3999 bridge_ioctl_gmnelist64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
4000 {
4001 struct ifbrmnelist64 * __single mnl = arg;
4002
4003 return bridge_ioctl_gmnelist(sc, arg, mnl->ifbml_buf);
4004 }
4005
4006 static int
bridge_ioctl_gmnelist32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)4007 bridge_ioctl_gmnelist32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
4008 {
4009 struct ifbrmnelist32 * __single mnl = arg;
4010
4011 return bridge_ioctl_gmnelist(sc, arg,
4012 CAST_USER_ADDR_T(mnl->ifbml_buf));
4013 }
4014
4015 /*
4016 * bridge_ioctl_gifstats()
4017 * Return per-member stats.
4018 *
4019 * Note:
4020 * The ifbrmreq32 and ifbrmreq64 structures have the same
4021 * field size/layout except for the last field brmr_buf, the user-supplied
4022 * buffer pointer. That is passed in separately via the 'user_addr'
4023 * parameter from the respective 32-bit or 64-bit ioctl routine.
4024 */
4025 static int
bridge_ioctl_gifstats(struct bridge_softc * sc,struct ifbrmreq32 * mreq,user_addr_t user_addr)4026 bridge_ioctl_gifstats(struct bridge_softc *sc, struct ifbrmreq32 *mreq,
4027 user_addr_t user_addr)
4028 {
4029 struct bridge_iflist *bif;
4030 int error = 0;
4031 unsigned int buflen;
4032
4033 bif = bridge_lookup_member(sc, mreq->brmr_ifname);
4034 if (bif == NULL) {
4035 error = ENOENT;
4036 goto done;
4037 }
4038
4039 buflen = mreq->brmr_elsize = sizeof(struct ifbrmstats);
4040 if (buflen == 0 || mreq->brmr_len == 0) {
4041 mreq->brmr_len = buflen;
4042 goto done;
4043 }
4044 if (mreq->brmr_len != 0 && mreq->brmr_len < buflen) {
4045 error = ENOBUFS;
4046 goto done;
4047 }
4048 mreq->brmr_len = buflen;
4049 error = copyout(&bif->bif_stats, user_addr, buflen);
4050 done:
4051 return error;
4052 }
4053
4054 static int
bridge_ioctl_gifstats32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)4055 bridge_ioctl_gifstats32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
4056 {
4057 struct ifbrmreq32 * __single mreq = arg;
4058
4059 return bridge_ioctl_gifstats(sc, arg, mreq->brmr_buf);
4060 }
4061
4062 static int
bridge_ioctl_gifstats64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)4063 bridge_ioctl_gifstats64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
4064 {
4065 struct ifbrmreq64 * __single mreq = arg;
4066
4067 return bridge_ioctl_gifstats(sc, arg, mreq->brmr_buf);
4068 }
4069
4070 /*
4071 * bridge_proto_attach_changed
4072 *
4073 * Called when protocol attachment on the interface changes.
4074 */
4075 static void
bridge_proto_attach_changed(struct ifnet * ifp)4076 bridge_proto_attach_changed(struct ifnet *ifp)
4077 {
4078 boolean_t changed = FALSE;
4079 struct bridge_iflist *bif;
4080 boolean_t input_broadcast;
4081 struct bridge_softc * __single sc = ifp->if_bridge;
4082
4083 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
4084 if (sc == NULL) {
4085 return;
4086 }
4087 input_broadcast = interface_needs_input_broadcast(ifp);
4088 BRIDGE_LOCK(sc);
4089 bif = bridge_lookup_member_if(sc, ifp);
4090 if (bif != NULL) {
4091 changed = bif_set_input_broadcast(bif, input_broadcast);
4092 }
4093 BRIDGE_UNLOCK(sc);
4094 if (changed) {
4095 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
4096 "%s input broadcast %s", ifp->if_xname,
4097 input_broadcast ? "ENABLED" : "DISABLED");
4098 }
4099 return;
4100 }
4101
4102 /*
4103 * interface_media_active:
4104 *
4105 * Tells if an interface media is active.
4106 */
4107 static int
interface_media_active(struct ifnet * ifp)4108 interface_media_active(struct ifnet *ifp)
4109 {
4110 struct ifmediareq ifmr;
4111 int status = 0;
4112
4113 bzero(&ifmr, sizeof(ifmr));
4114 if (ifnet_ioctl(ifp, 0, SIOCGIFMEDIA, &ifmr) == 0) {
4115 if ((ifmr.ifm_status & IFM_AVALID) && ifmr.ifm_count > 0) {
4116 status = ifmr.ifm_status & IFM_ACTIVE ? 1 : 0;
4117 }
4118 }
4119
4120 return status;
4121 }
4122
4123 /*
4124 * bridge_updatelinkstatus:
4125 *
4126 * Update the media active status of the bridge based on the
4127 * media active status of its member.
4128 * If changed, return the corresponding onf/off link event.
4129 */
4130 static u_int32_t
bridge_updatelinkstatus(struct bridge_softc * sc)4131 bridge_updatelinkstatus(struct bridge_softc *sc)
4132 {
4133 struct bridge_iflist *bif;
4134 int active_member = 0;
4135 u_int32_t event_code = 0;
4136
4137 BRIDGE_LOCK_ASSERT_HELD(sc);
4138
4139 /*
4140 * Find out if we have an active interface
4141 */
4142 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
4143 if (bif->bif_flags & BIFF_MEDIA_ACTIVE) {
4144 active_member = 1;
4145 break;
4146 }
4147 }
4148
4149 if (active_member && !(sc->sc_flags & SCF_MEDIA_ACTIVE)) {
4150 sc->sc_flags |= SCF_MEDIA_ACTIVE;
4151 event_code = KEV_DL_LINK_ON;
4152 } else if (!active_member && (sc->sc_flags & SCF_MEDIA_ACTIVE)) {
4153 sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
4154 event_code = KEV_DL_LINK_OFF;
4155 }
4156
4157 return event_code;
4158 }
4159
4160 /*
4161 * bridge_iflinkevent:
4162 */
4163 static void
bridge_iflinkevent(struct ifnet * ifp)4164 bridge_iflinkevent(struct ifnet *ifp)
4165 {
4166 struct bridge_softc * __single sc = ifp->if_bridge;
4167 struct bridge_iflist *bif;
4168 u_int32_t event_code = 0;
4169 int media_active;
4170
4171 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
4172
4173 /* Check if the interface is a bridge member */
4174 if (sc == NULL) {
4175 return;
4176 }
4177
4178 media_active = interface_media_active(ifp);
4179 BRIDGE_LOCK(sc);
4180 bif = bridge_lookup_member_if(sc, ifp);
4181 if (bif != NULL) {
4182 if (media_active) {
4183 bif->bif_flags |= BIFF_MEDIA_ACTIVE;
4184 } else {
4185 bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
4186 }
4187 if (sc->sc_mac_nat_bif != NULL) {
4188 bridge_mac_nat_flush_entries(sc, bif);
4189 }
4190
4191 event_code = bridge_updatelinkstatus(sc);
4192 }
4193 BRIDGE_UNLOCK(sc);
4194
4195 if (event_code != 0) {
4196 bridge_link_event(sc->sc_ifp, event_code);
4197 }
4198 }
4199
4200 /*
4201 * bridge_delayed_callback:
4202 *
4203 * Makes a delayed call
4204 */
4205 static void
bridge_delayed_callback(void * param,__unused void * param2)4206 bridge_delayed_callback(void *param, __unused void *param2)
4207 {
4208 struct bridge_delayed_call *call = (struct bridge_delayed_call *)param;
4209 struct bridge_softc *sc = call->bdc_sc;
4210
4211 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4212 if (bridge_delayed_callback_delay > 0) {
4213 struct timespec ts;
4214
4215 ts.tv_sec = bridge_delayed_callback_delay;
4216 ts.tv_nsec = 0;
4217
4218 BRIDGE_LOG(LOG_NOTICE, 0,
4219 "sleeping for %d seconds",
4220 bridge_delayed_callback_delay);
4221
4222 msleep(&bridge_delayed_callback_delay, NULL, PZERO,
4223 __func__, &ts);
4224
4225 BRIDGE_LOG(LOG_NOTICE, 0, "awoken");
4226 }
4227 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4228
4229 BRIDGE_LOCK(sc);
4230
4231 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4232 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4233 "%s call 0x%llx flags 0x%x",
4234 sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4235 call->bdc_flags);
4236 }
4237 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4238
4239 if (call->bdc_flags & BDCF_CANCELLING) {
4240 wakeup(call);
4241 } else {
4242 if ((sc->sc_flags & SCF_DETACHING) == 0) {
4243 (*call->bdc_func)(sc);
4244 }
4245 }
4246 call->bdc_flags &= ~BDCF_OUTSTANDING;
4247 BRIDGE_UNLOCK(sc);
4248 }
4249
4250 /*
4251 * bridge_schedule_delayed_call:
4252 *
4253 * Schedule a function to be called on a separate thread
4254 * The actual call may be scheduled to run at a given time or ASAP.
4255 */
4256 static void
4257 bridge_schedule_delayed_call(struct bridge_delayed_call *call)
4258 {
4259 uint64_t deadline = 0;
4260 struct bridge_softc *sc = call->bdc_sc;
4261
4262 BRIDGE_LOCK_ASSERT_HELD(sc);
4263
4264 if ((sc->sc_flags & SCF_DETACHING) ||
4265 (call->bdc_flags & (BDCF_OUTSTANDING | BDCF_CANCELLING))) {
4266 return;
4267 }
4268
4269 if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) {
4270 nanoseconds_to_absolutetime(
4271 (uint64_t)call->bdc_ts.tv_sec * NSEC_PER_SEC +
4272 call->bdc_ts.tv_nsec, &deadline);
4273 clock_absolutetime_interval_to_deadline(deadline, &deadline);
4274 }
4275
4276 call->bdc_flags = BDCF_OUTSTANDING;
4277
4278 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4279 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4280 "%s call 0x%llx flags 0x%x",
4281 sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4282 call->bdc_flags);
4283 }
4284 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4285
4286 if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) {
4287 thread_call_func_delayed(
4288 (thread_call_func_t)bridge_delayed_callback,
4289 call, deadline);
4290 } else {
4291 if (call->bdc_thread_call == NULL) {
4292 call->bdc_thread_call = thread_call_allocate(
4293 (thread_call_func_t)bridge_delayed_callback,
4294 call);
4295 }
4296 thread_call_enter(call->bdc_thread_call);
4297 }
4298 }
4299
4300 /*
4301 * bridge_cancel_delayed_call:
4302 *
4303 * Cancel a queued or running delayed call.
4304 * If call is running, does not return until the call is done to
4305 * prevent race condition with the brigde interface getting destroyed
4306 */
4307 static void
4308 bridge_cancel_delayed_call(struct bridge_delayed_call *call)
4309 {
4310 boolean_t result;
4311 struct bridge_softc *sc = call->bdc_sc;
4312
4313 /*
4314 * The call was never scheduled
4315 */
4316 if (sc == NULL) {
4317 return;
4318 }
4319
4320 BRIDGE_LOCK_ASSERT_HELD(sc);
4321
4322 call->bdc_flags |= BDCF_CANCELLING;
4323
4324 while (call->bdc_flags & BDCF_OUTSTANDING) {
4325 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4326 "%s call 0x%llx flags 0x%x",
4327 sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4328 call->bdc_flags);
4329 result = thread_call_func_cancel(
4330 (thread_call_func_t)bridge_delayed_callback, call, FALSE);
4331
4332 if (result) {
4333 /*
4334 * We managed to dequeue the delayed call
4335 */
4336 call->bdc_flags &= ~BDCF_OUTSTANDING;
4337 } else {
4338 /*
4339 * Wait for delayed call do be done running
4340 */
4341 msleep(call, &sc->sc_mtx, PZERO, __func__, NULL);
4342 }
4343 }
4344 call->bdc_flags &= ~BDCF_CANCELLING;
4345 }
4346
4347 /*
4348 * bridge_cleanup_delayed_call:
4349 *
4350 * Dispose resource allocated for a delayed call
4351 * Assume the delayed call is not queued or running .
4352 */
4353 static void
4354 bridge_cleanup_delayed_call(struct bridge_delayed_call *call)
4355 {
4356 boolean_t result;
4357 struct bridge_softc *sc = call->bdc_sc;
4358
4359 /*
4360 * The call was never scheduled
4361 */
4362 if (sc == NULL) {
4363 return;
4364 }
4365
4366 BRIDGE_LOCK_ASSERT_HELD(sc);
4367
4368 VERIFY((call->bdc_flags & BDCF_OUTSTANDING) == 0);
4369 VERIFY((call->bdc_flags & BDCF_CANCELLING) == 0);
4370
4371 if (call->bdc_thread_call != NULL) {
4372 result = thread_call_free(call->bdc_thread_call);
4373 if (result == FALSE) {
4374 panic("%s thread_call_free() failed for call %p",
4375 __func__, call);
4376 }
4377 call->bdc_thread_call = NULL;
4378 }
4379 }
4380
4381 /*
4382 * bridge_init:
4383 *
4384 * Initialize a bridge interface.
4385 */
4386 static int
4387 bridge_init(struct ifnet *ifp)
4388 {
4389 struct bridge_softc *sc = (struct bridge_softc *)ifp->if_softc;
4390 errno_t error;
4391
4392 BRIDGE_LOCK_ASSERT_HELD(sc);
4393
4394 if ((ifnet_flags(ifp) & IFF_RUNNING)) {
4395 return 0;
4396 }
4397
4398 error = ifnet_set_flags(ifp, IFF_RUNNING, IFF_RUNNING);
4399
4400 /*
4401 * Calling bridge_aging_timer() is OK as there are no entries to
4402 * age so we're just going to arm the timer
4403 */
4404 bridge_aging_timer(sc);
4405 #if BRIDGESTP
4406 if (error == 0) {
4407 bstp_init(&sc->sc_stp); /* Initialize Spanning Tree */
4408 }
4409 #endif /* BRIDGESTP */
4410 return error;
4411 }
4412
4413 /*
4414 * bridge_ifstop:
4415 *
4416 * Stop the bridge interface.
4417 */
4418 static void
4419 bridge_ifstop(struct ifnet *ifp, int disable)
4420 {
4421 #pragma unused(disable)
4422 struct bridge_softc * __single sc = ifp->if_softc;
4423
4424 BRIDGE_LOCK_ASSERT_HELD(sc);
4425
4426 if ((ifnet_flags(ifp) & IFF_RUNNING) == 0) {
4427 return;
4428 }
4429
4430 bridge_cancel_delayed_call(&sc->sc_aging_timer);
4431
4432 #if BRIDGESTP
4433 bstp_stop(&sc->sc_stp);
4434 #endif /* BRIDGESTP */
4435
4436 bridge_rtflush(sc, IFBF_FLUSHDYN);
4437 (void) ifnet_set_flags(ifp, 0, IFF_RUNNING);
4438 }
4439
4440 static const uint32_t checksum_request_flags = (MBUF_CSUM_REQ_TCP |
4441 MBUF_CSUM_REQ_UDP | MBUF_CSUM_REQ_TCPIPV6 | MBUF_CSUM_REQ_UDPIPV6);
4442
4443 static const mbuf_csum_performed_flags_t checksum_performed_all_good =
4444 (MBUF_CSUM_DID_IP | MBUF_CSUM_IP_GOOD
4445 | MBUF_CSUM_DID_DATA | MBUF_CSUM_PSEUDO_HDR);
4446
4447 /*
4448 * bridge_compute_cksum:
4449 *
4450 * If the packet has checksum flags, compare the hardware checksum
4451 * capabilities of the source and destination interfaces. If they
4452 * are the same, there's nothing to do. If they are different,
4453 * finalize the checksum so that it can be sent on the destination
4454 * interface.
4455 */
4456 static void
4457 bridge_compute_cksum(struct ifnet *src_if, struct ifnet *dst_if, struct mbuf *m)
4458 {
4459 uint32_t csum_flags;
4460 uint16_t dst_hw_csum;
4461 uint32_t did_sw = 0;
4462 struct ether_header *eh;
4463 uint16_t src_hw_csum;
4464
4465 if (src_if == dst_if) {
4466 return;
4467 }
4468 csum_flags = m->m_pkthdr.csum_flags & IF_HWASSIST_CSUM_MASK;
4469 if (csum_flags == 0) {
4470 /* no checksum offload */
4471 return;
4472 }
4473
4474 /*
4475 * if destination/source differ in checksum offload
4476 * capabilities, finalize/compute the checksum
4477 */
4478 dst_hw_csum = IF_HWASSIST_CSUM_FLAGS(dst_if->if_hwassist);
4479 src_hw_csum = IF_HWASSIST_CSUM_FLAGS(src_if->if_hwassist);
4480 if (dst_hw_csum == src_hw_csum) {
4481 return;
4482 }
4483 eh = mtod(m, struct ether_header *);
4484 switch (eh->ether_type) {
4485 case HTONS_ETHERTYPE_IP:
4486 did_sw = in_finalize_cksum(m, sizeof(*eh), csum_flags);
4487 break;
4488 case HTONS_ETHERTYPE_IPV6:
4489 did_sw = in6_finalize_cksum(m, sizeof(*eh), -1, -1, csum_flags);
4490 break;
4491 }
4492 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4493 "[%s -> %s] before 0x%x did 0x%x after 0x%x",
4494 src_if->if_xname, dst_if->if_xname, csum_flags, did_sw,
4495 m->m_pkthdr.csum_flags);
4496 }
4497
4498 static inline errno_t
4499 bridge_transmit(ifnet_t ifp, mbuf_t m)
4500 {
4501 struct flowadv adv = { .code = FADV_SUCCESS };
4502 errno_t error;
4503 int flags = DLIL_OUTPUT_FLAGS_RAW;
4504
4505 flags = (if_bridge_output_skip_filters != 0)
4506 ? (DLIL_OUTPUT_FLAGS_RAW | DLIL_OUTPUT_FLAGS_SKIP_IF_FILTERS)
4507 : DLIL_OUTPUT_FLAGS_RAW;
4508 error = dlil_output(ifp, 0, m, NULL, NULL, flags, &adv);
4509 if (error == 0) {
4510 if (adv.code == FADV_FLOW_CONTROLLED) {
4511 error = EQFULL;
4512 } else if (adv.code == FADV_SUSPENDED) {
4513 error = EQSUSPENDED;
4514 }
4515 }
4516 return error;
4517 }
4518
4519 static int
4520 get_last_ip6_hdr(struct mbuf *m, int off, int proto, int * nxtp,
4521 bool *is_fragmented)
4522 {
4523 int newoff;
4524
4525 *is_fragmented = false;
4526 while (1) {
4527 newoff = ip6_nexthdr(m, off, proto, nxtp);
4528 if (newoff < 0) {
4529 return off;
4530 } else if (newoff < off) {
4531 return -1; /* invalid */
4532 } else if (newoff == off) {
4533 return newoff;
4534 }
4535 off = newoff;
4536 proto = *nxtp;
4537 if (proto == IPPROTO_FRAGMENT) {
4538 *is_fragmented = true;
4539 }
4540 }
4541 }
4542
4543 #define __ATOMIC_INC(s) os_atomic_inc(&s, relaxed)
4544
4545 static int
4546 bridge_get_ip_proto(struct mbuf * * mp, u_int mac_hlen, bool is_ipv4,
4547 ip_packet_info_t info_p, struct bripstats * stats_p)
4548 {
4549 int error = 0;
4550 u_int hlen;
4551 u_int ip_hlen;
4552 u_int ip_pay_len;
4553 struct mbuf * m0 = *mp;
4554 int off;
4555 int opt_len = 0;
4556 int proto = 0;
4557
4558 bzero(info_p, sizeof(*info_p));
4559 if (is_ipv4) {
4560 struct ip * ip;
4561 u_int ip_total_len;
4562
4563 /* IPv4 */
4564 hlen = mac_hlen + sizeof(struct ip);
4565 if (m0->m_pkthdr.len < hlen) {
4566 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4567 "Short IP packet %d < %d",
4568 m0->m_pkthdr.len, hlen);
4569 error = _EBADIP;
4570 __ATOMIC_INC(stats_p->bips_bad_ip);
4571 goto done;
4572 }
4573 if (m0->m_len < hlen) {
4574 *mp = m0 = m_pullup(m0, hlen);
4575 if (m0 == NULL) {
4576 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4577 "m_pullup failed hlen %d",
4578 hlen);
4579 error = ENOBUFS;
4580 __ATOMIC_INC(stats_p->bips_bad_ip);
4581 goto done;
4582 }
4583 }
4584 ip = (struct ip *)mtodo(m0, mac_hlen);
4585 if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
4586 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4587 "bad IP version");
4588 error = _EBADIP;
4589 __ATOMIC_INC(stats_p->bips_bad_ip);
4590 goto done;
4591 }
4592 ip_hlen = IP_VHL_HL(ip->ip_vhl) << 2;
4593 if (ip_hlen < sizeof(struct ip)) {
4594 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4595 "bad IP header length %d < %d",
4596 ip_hlen,
4597 (int)sizeof(struct ip));
4598 error = _EBADIP;
4599 __ATOMIC_INC(stats_p->bips_bad_ip);
4600 goto done;
4601 }
4602 hlen = mac_hlen + ip_hlen;
4603 if (m0->m_len < hlen) {
4604 *mp = m0 = m_pullup(m0, hlen);
4605 if (m0 == NULL) {
4606 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4607 "m_pullup failed hlen %d",
4608 hlen);
4609 error = ENOBUFS;
4610 __ATOMIC_INC(stats_p->bips_bad_ip);
4611 goto done;
4612 }
4613 ip = (struct ip *)mtodo(m0, mac_hlen);
4614 }
4615
4616 ip_total_len = ntohs(ip->ip_len);
4617 if (ip_total_len < ip_hlen) {
4618 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4619 "IP total len %d < header len %d",
4620 ip_total_len, ip_hlen);
4621 error = _EBADIP;
4622 __ATOMIC_INC(stats_p->bips_bad_ip);
4623 goto done;
4624 }
4625 if (ip_total_len > (m0->m_pkthdr.len - mac_hlen)) {
4626 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4627 "invalid IP payload length %d > %d",
4628 ip_total_len,
4629 (m0->m_pkthdr.len - mac_hlen));
4630 error = _EBADIP;
4631 __ATOMIC_INC(stats_p->bips_bad_ip);
4632 goto done;
4633 }
4634 ip_pay_len = ip_total_len - ip_hlen;
4635 info_p->ip_proto = ip->ip_p;
4636 info_p->ip_hdr = mtodo(m0, mac_hlen);
4637 info_p->ip_m0_len = m0->m_len - mac_hlen;
4638 info_p->ip_hlen = ip_hlen;
4639 #define FRAG_BITS (IP_OFFMASK | IP_MF)
4640 if ((ntohs(ip->ip_off) & FRAG_BITS) != 0) {
4641 info_p->ip_is_fragmented = true;
4642 }
4643 __ATOMIC_INC(stats_p->bips_ip);
4644 } else {
4645 struct ip6_hdr *ip6;
4646
4647 /* IPv6 */
4648 hlen = mac_hlen + sizeof(struct ip6_hdr);
4649 if (m0->m_pkthdr.len < hlen) {
4650 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4651 "short IPv6 packet %d < %d",
4652 m0->m_pkthdr.len, hlen);
4653 error = _EBADIPV6;
4654 __ATOMIC_INC(stats_p->bips_bad_ip6);
4655 goto done;
4656 }
4657 if (m0->m_len < hlen) {
4658 *mp = m0 = m_pullup(m0, hlen);
4659 if (m0 == NULL) {
4660 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4661 "m_pullup failed hlen %d",
4662 hlen);
4663 error = ENOBUFS;
4664 __ATOMIC_INC(stats_p->bips_bad_ip6);
4665 goto done;
4666 }
4667 }
4668 ip6 = (struct ip6_hdr *)(mtodo(m0, mac_hlen));
4669 if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
4670 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4671 "bad IPv6 version");
4672 error = _EBADIPV6;
4673 __ATOMIC_INC(stats_p->bips_bad_ip6);
4674 goto done;
4675 }
4676 off = get_last_ip6_hdr(m0, mac_hlen, IPPROTO_IPV6, &proto,
4677 &info_p->ip_is_fragmented);
4678 if (off < 0 || m0->m_pkthdr.len < off) {
4679 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4680 "ip6_lasthdr() returned %d",
4681 off);
4682 error = _EBADIPV6;
4683 __ATOMIC_INC(stats_p->bips_bad_ip6);
4684 goto done;
4685 }
4686 ip_hlen = sizeof(*ip6);
4687 opt_len = off - mac_hlen - ip_hlen;
4688 if (opt_len < 0) {
4689 error = _EBADIPV6;
4690 __ATOMIC_INC(stats_p->bips_bad_ip6);
4691 goto done;
4692 }
4693 ip_pay_len = ntohs(ip6->ip6_plen);
4694 if (ip_pay_len > (m0->m_pkthdr.len - mac_hlen - ip_hlen)) {
4695 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4696 "invalid IPv6 payload length %d > %d",
4697 ip_pay_len,
4698 (m0->m_pkthdr.len - mac_hlen - ip_hlen));
4699 error = _EBADIPV6;
4700 __ATOMIC_INC(stats_p->bips_bad_ip6);
4701 goto done;
4702 }
4703 info_p->ip_proto = proto;
4704 info_p->ip_hdr = mtodo(m0, mac_hlen);
4705 info_p->ip_m0_len = m0->m_len - mac_hlen;
4706 info_p->ip_hlen = ip_hlen;
4707 __ATOMIC_INC(stats_p->bips_ip6);
4708 }
4709 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4710 "IPv%c proto %d ip %u pay %u opt %u pkt %u%s",
4711 is_ipv4 ? '4' : '6',
4712 proto, ip_hlen, ip_pay_len, opt_len,
4713 m0->m_pkthdr.len, info_p->ip_is_fragmented ? " frag" : "");
4714 info_p->ip_pay_len = ip_pay_len;
4715 info_p->ip_opt_len = opt_len;
4716 info_p->ip_is_ipv4 = is_ipv4;
4717 done:
4718 return error;
4719 }
4720
4721 static int
4722 bridge_get_tcp_header(struct mbuf * * mp, u_int mac_hlen, bool is_ipv4,
4723 ip_packet_info_t info_p, struct bripstats * stats_p)
4724 {
4725 int error;
4726 u_int hlen;
4727
4728 error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, info_p, stats_p);
4729 if (error != 0) {
4730 goto done;
4731 }
4732 if (info_p->ip_proto != IPPROTO_TCP) {
4733 /* not a TCP frame, not an error, just a bad guess */
4734 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4735 "non-TCP (%d) IPv%c frame %d bytes",
4736 info_p->ip_proto, is_ipv4 ? '4' : '6',
4737 (*mp)->m_pkthdr.len);
4738 goto done;
4739 }
4740 if (info_p->ip_is_fragmented) {
4741 /* both TSO and IP fragmentation don't make sense */
4742 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4743 "fragmented TSO packet?");
4744 __ATOMIC_INC(stats_p->bips_bad_tcp);
4745 error = _EBADTCP;
4746 goto done;
4747 }
4748 hlen = mac_hlen + info_p->ip_hlen + sizeof(struct tcphdr) +
4749 info_p->ip_opt_len;
4750 if ((*mp)->m_len < hlen) {
4751 *mp = m_pullup(*mp, hlen);
4752 if (*mp == NULL) {
4753 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4754 "m_pullup %d failed",
4755 hlen);
4756 __ATOMIC_INC(stats_p->bips_bad_tcp);
4757 error = _EBADTCP;
4758 goto done;
4759 }
4760 }
4761 info_p->ip_proto_hdr = info_p->ip_hdr + info_p->ip_hlen +
4762 info_p->ip_opt_len;
4763 done:
4764 return error;
4765 }
4766
4767 static inline void
4768 proto_csum_stats_increment(uint8_t proto, struct brcsumstats * stats_p)
4769 {
4770 if (proto == IPPROTO_TCP) {
4771 __ATOMIC_INC(stats_p->brcs_tcp_checksum);
4772 } else {
4773 __ATOMIC_INC(stats_p->brcs_udp_checksum);
4774 }
4775 return;
4776 }
4777
4778 #define ETHER_TYPE_FLAG_NONE 0x00
4779 #define ETHER_TYPE_FLAG_IPV4 0x01
4780 #define ETHER_TYPE_FLAG_IPV6 0x02
4781 #define ETHER_TYPE_FLAG_ARP 0x04
4782 #define ETHER_TYPE_FLAG_IP (ETHER_TYPE_FLAG_IPV4 | ETHER_TYPE_FLAG_IPV6)
4783 #define ETHER_TYPE_FLAG_IP_ARP (ETHER_TYPE_FLAG_IP | ETHER_TYPE_FLAG_ARP)
4784
4785 static inline bool
4786 ether_type_flag_is_ip(ether_type_flag_t flag)
4787 {
4788 return (flag & ETHER_TYPE_FLAG_IP) != 0;
4789 }
4790
4791 static inline ether_type_flag_t
4792 ether_type_flag_get(uint16_t ether_type)
4793 {
4794 ether_type_flag_t flag = ETHER_TYPE_FLAG_NONE;
4795
4796 switch (ether_type) {
4797 case HTONS_ETHERTYPE_IP:
4798 flag = ETHER_TYPE_FLAG_IPV4;
4799 break;
4800 case HTONS_ETHERTYPE_IPV6:
4801 flag = ETHER_TYPE_FLAG_IPV6;
4802 break;
4803 case HTONS_ETHERTYPE_ARP:
4804 flag = ETHER_TYPE_FLAG_ARP;
4805 break;
4806 default:
4807 break;
4808 }
4809 return flag;
4810 }
4811
4812 static bool
4813 ether_header_type_is_ip(struct ether_header * eh, bool *is_ipv4)
4814 {
4815 uint16_t ether_type;
4816 bool is_ip = TRUE;
4817
4818 ether_type = ntohs(eh->ether_type);
4819 switch (ether_type) {
4820 case ETHERTYPE_IP:
4821 *is_ipv4 = TRUE;
4822 break;
4823 case ETHERTYPE_IPV6:
4824 *is_ipv4 = FALSE;
4825 break;
4826 default:
4827 is_ip = FALSE;
4828 break;
4829 }
4830 return is_ip;
4831 }
4832
4833 static errno_t
4834 bridge_verify_checksum(struct mbuf * * mp, struct ifbrmstats *stats_p)
4835 {
4836 struct brcsumstats *csum_stats_p;
4837 struct ether_header *eh;
4838 errno_t error = 0;
4839 ip_packet_info info;
4840 bool is_ipv4;
4841 struct mbuf * m;
4842 u_int mac_hlen = sizeof(struct ether_header);
4843 uint16_t sum;
4844 bool valid;
4845
4846 eh = mtod(*mp, struct ether_header *);
4847 if (!ether_header_type_is_ip(eh, &is_ipv4)) {
4848 goto done;
4849 }
4850 error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, &info,
4851 &stats_p->brms_out_ip);
4852 m = *mp;
4853 if (error != 0) {
4854 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4855 "bridge_get_ip_proto failed %d",
4856 error);
4857 goto done;
4858 }
4859 if (is_ipv4) {
4860 if ((m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) != 0) {
4861 /* hardware offloaded IP header checksum */
4862 valid = (m->m_pkthdr.csum_flags & CSUM_IP_VALID) != 0;
4863 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4864 "IP checksum HW %svalid",
4865 valid ? "" : "in");
4866 if (!valid) {
4867 __ATOMIC_INC(stats_p->brms_out_cksum_bad_hw.brcs_ip_checksum);
4868 error = _EBADIPCHECKSUM;
4869 goto done;
4870 }
4871 __ATOMIC_INC(stats_p->brms_out_cksum_good_hw.brcs_ip_checksum);
4872 } else {
4873 /* verify */
4874 sum = inet_cksum(m, 0, mac_hlen, info.ip_hlen);
4875 valid = (sum == 0);
4876 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4877 "IP checksum SW %svalid",
4878 valid ? "" : "in");
4879 if (!valid) {
4880 __ATOMIC_INC(stats_p->brms_out_cksum_bad.brcs_ip_checksum);
4881 error = _EBADIPCHECKSUM;
4882 goto done;
4883 }
4884 __ATOMIC_INC(stats_p->brms_out_cksum_good.brcs_ip_checksum);
4885 }
4886 }
4887 if (info.ip_is_fragmented) {
4888 /* can't verify checksum on fragmented packets */
4889 goto done;
4890 }
4891 switch (info.ip_proto) {
4892 case IPPROTO_TCP:
4893 __ATOMIC_INC(stats_p->brms_out_ip.bips_tcp);
4894 break;
4895 case IPPROTO_UDP:
4896 __ATOMIC_INC(stats_p->brms_out_ip.bips_udp);
4897 break;
4898 default:
4899 goto done;
4900 }
4901 /* check for hardware offloaded UDP/TCP checksum */
4902 #define HW_CSUM (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)
4903 if ((m->m_pkthdr.csum_flags & HW_CSUM) == HW_CSUM) {
4904 /* checksum verified by hardware */
4905 valid = (m->m_pkthdr.csum_rx_val == 0xffff);
4906 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4907 "IPv%c %s checksum HW 0x%x %svalid",
4908 is_ipv4 ? '4' : '6',
4909 (info.ip_proto == IPPROTO_TCP)
4910 ? "TCP" : "UDP",
4911 m->m_pkthdr.csum_data,
4912 valid ? "" : "in" );
4913 if (!valid) {
4914 /* bad checksum */
4915 csum_stats_p = &stats_p->brms_out_cksum_bad_hw;
4916 error = (info.ip_proto == IPPROTO_TCP) ? _EBADTCPCHECKSUM
4917 : _EBADTCPCHECKSUM;
4918 } else {
4919 /* good checksum */
4920 csum_stats_p = &stats_p->brms_out_cksum_good_hw;
4921 }
4922 proto_csum_stats_increment(info.ip_proto, csum_stats_p);
4923 goto done;
4924 }
4925 /* adjust frame to skip mac-layer header */
4926 _mbuf_adjust_pkthdr_and_data(m, mac_hlen);
4927 if (is_ipv4) {
4928 sum = inet_cksum(m, info.ip_proto,
4929 info.ip_hlen,
4930 info.ip_pay_len);
4931 } else {
4932 sum = inet6_cksum(m, info.ip_proto,
4933 info.ip_hlen + info.ip_opt_len,
4934 info.ip_pay_len - info.ip_opt_len);
4935 }
4936 valid = (sum == 0);
4937 if (valid) {
4938 csum_stats_p = &stats_p->brms_out_cksum_good;
4939 } else {
4940 csum_stats_p = &stats_p->brms_out_cksum_bad;
4941 error = (info.ip_proto == IPPROTO_TCP)
4942 ? _EBADTCPCHECKSUM : _EBADUDPCHECKSUM;
4943 }
4944 proto_csum_stats_increment(info.ip_proto, csum_stats_p);
4945 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4946 "IPv%c %s checksum SW %svalid (0x%x) hlen %d paylen %d",
4947 is_ipv4 ? '4' : '6',
4948 (info.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
4949 valid ? "" : "in",
4950 sum, info.ip_hlen, info.ip_pay_len);
4951 /* adjust frame back to start of mac-layer header */
4952 _mbuf_adjust_pkthdr_and_data(m, -mac_hlen);
4953
4954 done:
4955 return error;
4956 }
4957
4958 static mbuf_t
4959 bridge_verify_checksum_list(ifnet_t bridge_ifp, struct bridge_iflist * dbif,
4960 mbuf_t in_list, bool is_ipv4)
4961 {
4962 mbuf_t next_packet;
4963 mblist ret;
4964
4965 mblist_init(&ret);
4966 for (mbuf_ref_t scan = in_list; scan != NULL; scan = next_packet) {
4967 errno_t error;
4968
4969 /* take packet out of the list */
4970 next_packet = scan->m_nextpkt;
4971 scan->m_nextpkt = NULL;
4972
4973 if (scan->m_pkthdr.rx_seg_cnt > 1) {
4974 /* LRO packet, compute checksum on large packet */
4975 scan = bridge_filter_checksum(bridge_ifp, dbif, scan,
4976 is_ipv4, false, true);
4977 } else {
4978 /* verify checksum */
4979 error = bridge_verify_checksum(&scan, &dbif->bif_stats);
4980 if (error != 0) {
4981 if (scan != NULL) {
4982 m_freem(scan);
4983 scan = NULL;
4984 }
4985 }
4986 }
4987
4988 /* add it back to the list */
4989 if (scan != NULL) {
4990 mblist_append(&ret, scan);
4991 }
4992 }
4993 return ret.head;
4994 }
4995
4996
4997 static errno_t
4998 bridge_offload_checksum(struct mbuf * * mp, ip_packet_info * info_p,
4999 struct ifbrmstats * stats_p)
5000 {
5001 uint16_t * csum_p;
5002 errno_t error = 0;
5003 u_int hlen;
5004 struct mbuf * m0 = *mp;
5005 u_int mac_hlen = sizeof(struct ether_header);
5006 u_int pkt_hdr_len;
5007 struct tcphdr * tcp;
5008 u_int tcp_hlen;
5009 struct udphdr * udp;
5010
5011 if (info_p->ip_is_ipv4) {
5012 /* compute IP header checksum */
5013 struct ip *ip = (struct ip *)info_p->ip_hdr;
5014 ip->ip_sum = 0;
5015 ip->ip_sum = inet_cksum(m0, 0, mac_hlen, info_p->ip_hlen);
5016 __ATOMIC_INC(stats_p->brms_in_computed_cksum.brcs_ip_checksum);
5017 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5018 "IPv4 checksum 0x%x",
5019 ntohs(ip->ip_sum));
5020 }
5021 if (info_p->ip_is_fragmented) {
5022 /* can't compute checksum on fragmented packets */
5023 goto done;
5024 }
5025 pkt_hdr_len = m0->m_pkthdr.len;
5026 switch (info_p->ip_proto) {
5027 case IPPROTO_TCP:
5028 hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len
5029 + sizeof(struct tcphdr);
5030 if (m0->m_len < hlen) {
5031 *mp = m0 = m_pullup(m0, hlen);
5032 if (m0 == NULL) {
5033 __ATOMIC_INC(stats_p->brms_in_ip.bips_bad_tcp);
5034 error = _EBADTCP;
5035 goto done;
5036 }
5037 }
5038 tcp = (struct tcphdr *)(info_p->ip_hdr + info_p->ip_hlen
5039 + info_p->ip_opt_len);
5040 tcp_hlen = tcp->th_off << 2;
5041 hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len + tcp_hlen;
5042 if (hlen > pkt_hdr_len) {
5043 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5044 "bad tcp header length %u",
5045 tcp_hlen);
5046 __ATOMIC_INC(stats_p->brms_in_ip.bips_bad_tcp);
5047 error = _EBADTCP;
5048 goto done;
5049 }
5050 csum_p = &tcp->th_sum;
5051 __ATOMIC_INC(stats_p->brms_in_ip.bips_tcp);
5052 break;
5053 case IPPROTO_UDP:
5054 hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len + sizeof(*udp);
5055 if (m0->m_len < hlen) {
5056 *mp = m0 = m_pullup(m0, hlen);
5057 if (m0 == NULL) {
5058 __ATOMIC_INC(stats_p->brms_in_ip.bips_bad_udp);
5059 error = ENOBUFS;
5060 goto done;
5061 }
5062 }
5063 udp = (struct udphdr *)(info_p->ip_hdr + info_p->ip_hlen
5064 + info_p->ip_opt_len);
5065 csum_p = &udp->uh_sum;
5066 __ATOMIC_INC(stats_p->brms_in_ip.bips_udp);
5067 break;
5068 default:
5069 /* not TCP or UDP */
5070 goto done;
5071 }
5072 *csum_p = 0;
5073 /* adjust frame to skip mac-layer header */
5074 _mbuf_adjust_pkthdr_and_data(m0, mac_hlen);
5075 if (info_p->ip_is_ipv4) {
5076 *csum_p = inet_cksum(m0, info_p->ip_proto, info_p->ip_hlen,
5077 info_p->ip_pay_len);
5078 } else {
5079 *csum_p = inet6_cksum(m0, info_p->ip_proto,
5080 info_p->ip_hlen + info_p->ip_opt_len,
5081 info_p->ip_pay_len - info_p->ip_opt_len);
5082 }
5083 if (info_p->ip_proto == IPPROTO_UDP && *csum_p == 0) {
5084 /* RFC 1122 4.1.3.4 */
5085 *csum_p = 0xffff;
5086 }
5087 /* adjust frame back to start of mac-layer header */
5088 _mbuf_adjust_pkthdr_and_data(m0, -mac_hlen);
5089 proto_csum_stats_increment(info_p->ip_proto,
5090 &stats_p->brms_in_computed_cksum);
5091
5092 /* indicate that the checksum is good */
5093 mbuf_set_csum_performed(m0, checksum_performed_all_good, 0xffff);
5094
5095 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5096 "IPv%c %s set checksum 0x%x",
5097 info_p->ip_is_ipv4 ? '4' : '6',
5098 (info_p->ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
5099 ntohs(*csum_p));
5100 done:
5101 return error;
5102 }
5103
5104 static inline void
5105 bridge_handle_checksum_op(ifnet_t src_ifp, ifnet_t dst_ifp,
5106 mbuf_t m, ChecksumOperation cksum_op)
5107 {
5108 switch (cksum_op) {
5109 case CHECKSUM_OPERATION_CLEAR_OFFLOAD:
5110 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
5111 break;
5112 case CHECKSUM_OPERATION_FINALIZE:
5113 /* the checksum might not be correct, finalize now */
5114 VERIFY(dst_ifp != NULL);
5115 bridge_finalize_cksum(dst_ifp, m);
5116 break;
5117 case CHECKSUM_OPERATION_COMPUTE:
5118 VERIFY(dst_ifp != NULL && src_ifp != NULL);
5119 bridge_compute_cksum(src_ifp, dst_ifp, m);
5120 break;
5121 default:
5122 break;
5123 }
5124 return;
5125 }
5126
5127 static uint32_t
5128 get_if_tso_mtu(struct ifnet * ifp, bool is_ipv4)
5129 {
5130 uint32_t tso_mtu;
5131
5132 tso_mtu = is_ipv4 ? ifp->if_tso_v4_mtu : ifp->if_tso_v6_mtu;
5133 if (tso_mtu == 0) {
5134 tso_mtu = IP_MAXPACKET;
5135 }
5136
5137 #if DEBUG || DEVELOPMENT
5138 #define REDUCED_TSO_MTU (16 * 1024)
5139 if (if_bridge_reduce_tso_mtu != 0 && tso_mtu > REDUCED_TSO_MTU) {
5140 tso_mtu = REDUCED_TSO_MTU;
5141 }
5142 #endif /* DEBUG || DEVELOPMENT */
5143 return tso_mtu;
5144 }
5145
5146 /*
5147 * tso_hwassist:
5148 * - determine whether the destination interface supports TSO offload
5149 * - if the packet is already marked for offload and the hardware supports
5150 * it, just allow the packet to continue on
5151 * - if not, parse the packet headers to verify that this is a large TCP
5152 * packet requiring segmentation; if the hardware doesn't support it
5153 * set need_sw_tso; otherwise, mark the packet for TSO offload
5154 */
5155 static int
5156 tso_hwassist(struct mbuf **mp, bool is_ipv4, struct ifnet * ifp, u_int mac_hlen,
5157 int * mss_p, bool * need_gso, bool * is_large_tcp)
5158 {
5159 uint32_t csum_flags;
5160 int error = 0;
5161 ip_packet_info info;
5162 u_int32_t if_csum;
5163 u_int32_t if_tso;
5164 u_int32_t mbuf_tso;
5165 int mss = *mss_p;
5166 uint8_t seg_cnt = 0;
5167 bool supports_cksum = false;
5168 uint32_t pkt_mtu;
5169 struct bripstats stats;
5170
5171 *need_gso = false;
5172 *is_large_tcp = false;
5173 if (is_ipv4) {
5174 /*
5175 * Enable both TCP and IP offload if the hardware supports it.
5176 * If the hardware doesn't support TCP offload, supports_cksum
5177 * will be false so we won't set either offload.
5178 */
5179 if_csum = ifp->if_hwassist & (CSUM_TCP | CSUM_IP);
5180 supports_cksum = (if_csum & CSUM_TCP) != 0;
5181 if_tso = IFNET_TSO_IPV4;
5182 mbuf_tso = CSUM_TSO_IPV4;
5183 } else {
5184 if_csum = (ifp->if_hwassist & CSUM_TCPIPV6);
5185 supports_cksum = (if_csum & CSUM_TCPIPV6) != 0;
5186 if_tso = IFNET_TSO_IPV6;
5187 mbuf_tso = CSUM_TSO_IPV6;
5188 }
5189 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5190 "%s: does%s support checksum 0x%x if_csum 0x%x",
5191 ifp->if_xname, supports_cksum ? "" : " not",
5192 ifp->if_hwassist, if_csum);
5193
5194 /* verify that this is a large TCP frame */
5195 error = bridge_get_tcp_header(mp, mac_hlen, is_ipv4,
5196 &info, &stats);
5197 if (error != 0) {
5198 /* bad packet */
5199 goto done;
5200 }
5201 if (info.ip_proto_hdr == NULL) {
5202 /* not a TCP packet */
5203 goto done;
5204 }
5205 pkt_mtu = info.ip_hlen + info.ip_pay_len + info.ip_opt_len;
5206 if (mss == 0) {
5207 /* check for LRO */
5208 seg_cnt = (*mp)->m_pkthdr.rx_seg_cnt;
5209 if (seg_cnt == 1 || (seg_cnt == 0 && pkt_mtu <= ifp->if_mtu)) {
5210 /* not actually a large packet */
5211 goto done;
5212 }
5213 }
5214 if (mss == 0) {
5215 uint32_t hdr_len;
5216 struct tcphdr * tcp;
5217
5218 tcp = (struct tcphdr *)info.ip_proto_hdr;
5219 hdr_len = info.ip_hlen + info.ip_opt_len + (tcp->th_off << 2);
5220
5221 /* packet isn't marked, mark it now */
5222 if (seg_cnt != 0) {
5223 uint32_t len;
5224
5225 /* approximate the MSS using the LRO seg cnt */
5226 len = mbuf_pkthdr_len(*mp) - hdr_len - ETHER_HDR_LEN;
5227 mss = len / seg_cnt;
5228 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5229 "%s: mss %d = len %d / seg cnt %d",
5230 ifp->if_xname, mss, len, seg_cnt);
5231 if (mss <= 0) {
5232 /* unexpected value */
5233 mss = 0;
5234 goto done;
5235 }
5236 } else {
5237 mss = ifp->if_mtu - hdr_len
5238 - if_bridge_tso_reduce_mss_tx;
5239 assert(mss > 0);
5240 }
5241 csum_flags = mbuf_tso;
5242 if (supports_cksum) {
5243 csum_flags |= if_csum;
5244 }
5245 (*mp)->m_pkthdr.tso_segsz = mss;
5246 (*mp)->m_pkthdr.csum_flags |= csum_flags;
5247 (*mp)->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
5248 }
5249 *is_large_tcp = true;
5250 (*mp)->m_pkthdr.pkt_proto = IPPROTO_TCP;
5251 if ((ifp->if_hwassist & if_tso) == 0) {
5252 /* need gso if no hardware support */
5253 *need_gso = true;
5254 } else {
5255 uint32_t tso_mtu = 0;
5256
5257 tso_mtu = get_if_tso_mtu(ifp, is_ipv4);
5258 if (pkt_mtu > tso_mtu) {
5259 /* need gso if tso_mtu too small */
5260 *need_gso = true;
5261 }
5262 }
5263 done:
5264 *mss_p = mss;
5265 return error;
5266 }
5267
5268 /*
5269 * bridge_enqueue:
5270 *
5271 * Enqueue a packet list on a bridge member interface.
5272 *
5273 */
5274 static int
5275 bridge_enqueue(ifnet_t bridge_ifp, ifnet_t src_if, ifnet_t dst_if,
5276 ether_type_flag_t etypef, mbuf_t in_list, ChecksumOperation orig_cksum_op,
5277 pkt_direction_t direction)
5278 {
5279 int enqueue_error = 0;
5280 mbuf_t next_packet;
5281 uint32_t out_errors = 0;
5282 mblist out_list;
5283
5284 VERIFY(dst_if != NULL);
5285
5286 mblist_init(&out_list);
5287 for (mbuf_ref_t scan = in_list; scan != NULL; scan = next_packet) {
5288 bool check_gso = false;
5289 ChecksumOperation cksum_op = orig_cksum_op;
5290 errno_t error = 0;
5291 bool is_ipv4 = false;
5292 int len;
5293 int mss = 0;
5294 bool need_gso = false;
5295
5296 scan->m_flags |= M_PROTO1; /* set to avoid loops */
5297 next_packet = scan->m_nextpkt;
5298 scan->m_nextpkt = NULL;
5299 len = mbuf_pkthdr_len(scan);
5300 is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
5301 mss = _mbuf_get_tso_mss(scan);
5302 if (mss != 0) {
5303 /* packet is marked for segmentation */
5304 check_gso = true;
5305 } else if (direction == pkt_direction_RX &&
5306 scan->m_pkthdr.rx_seg_cnt != 0) {
5307 /* LRO packet */
5308 check_gso = true;
5309 } else if (ether_type_flag_is_ip(etypef) &&
5310 len > (bridge_ifp->if_mtu + ETHER_HDR_LEN)) {
5311 /*
5312 * Need to segment the packet if it is a large frame
5313 * and the destination interface does not support TSO.
5314 *
5315 * Note that with trailers, it's possible for a packet to
5316 * be large but not actually require segmentation.
5317 */
5318 check_gso = true;
5319 }
5320 if (check_gso) {
5321 bool is_large_tcp = false;
5322
5323 error = tso_hwassist(&scan, is_ipv4,
5324 dst_if, sizeof(struct ether_header), &mss,
5325 &need_gso, &is_large_tcp);
5326 if (is_large_tcp &&
5327 cksum_op == CHECKSUM_OPERATION_CLEAR_OFFLOAD) {
5328 cksum_op = CHECKSUM_OPERATION_NONE;
5329 }
5330 }
5331 if (error != 0) {
5332 if (scan != NULL) {
5333 m_freem(scan);
5334 scan = NULL;
5335 }
5336 out_errors++;
5337 } else if (need_gso) {
5338 int mac_hlen = sizeof(struct ether_header);
5339 mblist segs;
5340
5341 /* segment packets, add to list */
5342 segs = gso_tcp_transmit(dst_if, scan, mac_hlen,
5343 is_ipv4);
5344 if (segs.head != NULL) {
5345 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5346 "%s (%s) append gso #segs %u bytes %u",
5347 bridge_ifp->if_xname,
5348 dst_if->if_xname,
5349 segs.count, segs.bytes);
5350 mblist_append_list(&out_list, segs);
5351 } else {
5352 out_errors++;
5353 }
5354 } else {
5355 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5356 "%s (%s) append %d bytes mss %d op %d",
5357 bridge_ifp->if_xname,
5358 dst_if->if_xname,
5359 len, mss, cksum_op);
5360 bridge_handle_checksum_op(src_if, dst_if,
5361 scan, cksum_op);
5362 mblist_append(&out_list, scan);
5363 }
5364 }
5365 if (out_list.head != NULL) {
5366 enqueue_error = bridge_transmit(dst_if, out_list.head);
5367 if (enqueue_error != 0) {
5368 out_errors++;
5369 }
5370 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5371 "%s (%s) bridge_transmit packets %u bytes %u error %d",
5372 bridge_ifp->if_xname,
5373 dst_if->if_xname,
5374 out_list.count, out_list.bytes, enqueue_error);
5375 }
5376 if (out_list.count != 0 || out_errors != 0) {
5377 ifnet_stat_increment_out(bridge_ifp, out_list.count,
5378 out_list.bytes, out_errors);
5379 }
5380 return enqueue_error;
5381 }
5382
5383 /*
5384 * bridge_member_output:
5385 *
5386 * Send output from a bridge member interface. This
5387 * performs the bridging function for locally originated
5388 * packets.
5389 *
5390 * The mbuf has the Ethernet header already attached.
5391 */
5392 static errno_t
5393 bridge_member_output(struct bridge_softc *sc, ifnet_t ifp, mbuf_t *data)
5394 {
5395 struct bridge_iflist * bif = NULL;
5396 ifnet_t bridge_ifp;
5397 struct ether_header *eh;
5398 ether_type_flag_t etypef;
5399 struct ifnet *dst_if = NULL;
5400 uint16_t vlan;
5401 struct bridge_iflist *mac_nat_bif;
5402 ifnet_t mac_nat_ifp;
5403 mbuf_t m = *data;
5404
5405 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
5406 "ifp %s", ifp->if_xname);
5407 if (m->m_len < ETHER_HDR_LEN) {
5408 m = m_pullup(m, ETHER_HDR_LEN);
5409 if (m == NULL) {
5410 *data = NULL;
5411 return EJUSTRETURN;
5412 }
5413 }
5414
5415 eh = mtod(m, struct ether_header *);
5416 vlan = VLANTAGOF(m);
5417 etypef = ether_type_flag_get(eh->ether_type);
5418
5419 BRIDGE_LOCK(sc);
5420 mac_nat_bif = sc->sc_mac_nat_bif;
5421 mac_nat_ifp = (mac_nat_bif != NULL) ? mac_nat_bif->bif_ifp : NULL;
5422 if (mac_nat_ifp == ifp) {
5423 /* record the IP address used by the MAC NAT interface */
5424 (void)bridge_mac_nat_output(sc, mac_nat_bif, data, NULL);
5425 m = *data;
5426 if (m == NULL) {
5427 /* packet was deallocated */
5428 BRIDGE_UNLOCK(sc);
5429 return EJUSTRETURN;
5430 }
5431 }
5432 bridge_ifp = sc->sc_ifp;
5433
5434 /*
5435 * APPLE MODIFICATION
5436 * If the packet is an 802.1X ethertype, then only send on the
5437 * original output interface.
5438 */
5439 if (eh->ether_type == htons(ETHERTYPE_PAE)) {
5440 dst_if = ifp;
5441 goto sendunicast;
5442 }
5443
5444 /*
5445 * If bridge is down, but the original output interface is up,
5446 * go ahead and send out that interface. Otherwise, the packet
5447 * is dropped below.
5448 */
5449 if ((bridge_ifp->if_flags & IFF_RUNNING) == 0) {
5450 dst_if = ifp;
5451 goto sendunicast;
5452 }
5453
5454 /*
5455 * If the packet is a multicast, or we don't know a better way to
5456 * get there, send to all interfaces.
5457 */
5458 if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
5459 dst_if = NULL;
5460 } else {
5461 bif = bridge_rtlookup_bif(sc, eh->ether_dhost, vlan);
5462 if (bif != NULL) {
5463 dst_if = bif->bif_ifp;
5464 }
5465 }
5466 if (dst_if == NULL) {
5467 struct mbuf *mc;
5468 errno_t error;
5469
5470
5471 bridge_span(sc, etypef, m);
5472
5473 BRIDGE_LOCK2REF(sc, error);
5474 if (error != 0) {
5475 m_freem(m);
5476 return EJUSTRETURN;
5477 }
5478
5479 /*
5480 * Duplicate and send the packet across all member interfaces
5481 * except the originating interface.
5482 */
5483 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
5484 dst_if = bif->bif_ifp;
5485 if (dst_if == ifp) {
5486 /* skip the originating interface */
5487 continue;
5488 }
5489 /* skip interface with inactive link status */
5490 if ((bif->bif_flags & BIFF_MEDIA_ACTIVE) == 0) {
5491 continue;
5492 }
5493
5494 /* skip interface that isn't running */
5495 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5496 continue;
5497 }
5498 /*
5499 * If the interface is participating in spanning
5500 * tree, make sure the port is in a state that
5501 * allows forwarding.
5502 */
5503 if ((bif->bif_ifflags & IFBIF_STP) &&
5504 bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5505 continue;
5506 }
5507 /*
5508 * If the destination is the MAC NAT interface,
5509 * skip sending the packet. The packet can't be sent
5510 * if the source MAC is incorrect.
5511 */
5512 if (dst_if == mac_nat_ifp) {
5513 continue;
5514 }
5515
5516 /* make a deep copy to send on this member interface */
5517 mc = m_dup(m, M_DONTWAIT);
5518 if (mc == NULL) {
5519 (void)ifnet_stat_increment_out(bridge_ifp,
5520 0, 0, 1);
5521 continue;
5522 }
5523 (void)bridge_enqueue(bridge_ifp, ifp, dst_if, etypef,
5524 mc, CHECKSUM_OPERATION_COMPUTE, pkt_direction_TX);
5525 }
5526 BRIDGE_UNREF(sc);
5527
5528 if ((ifp->if_flags & IFF_RUNNING) == 0) {
5529 m_freem(m);
5530 return EJUSTRETURN;
5531 }
5532 /* allow packet to continue on the originating interface */
5533 return 0;
5534 }
5535
5536 sendunicast:
5537 /*
5538 * XXX Spanning tree consideration here?
5539 */
5540
5541 bridge_span(sc, etypef, m);
5542 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5543 m_freem(m);
5544 BRIDGE_UNLOCK(sc);
5545 return EJUSTRETURN;
5546 }
5547
5548 BRIDGE_UNLOCK(sc);
5549 if (dst_if == ifp) {
5550 /* allow packet to continue on the originating interface */
5551 return 0;
5552 }
5553 if (dst_if != mac_nat_ifp) {
5554 (void) bridge_enqueue(bridge_ifp, ifp, dst_if, etypef, m,
5555 CHECKSUM_OPERATION_COMPUTE, pkt_direction_TX);
5556 } else {
5557 /*
5558 * This is not the original output interface
5559 * and the destination is the MAC NAT interface.
5560 * Drop the packet because the packet can't be sent
5561 * if the source MAC is incorrect.
5562 */
5563 m_freem(m);
5564 }
5565 return EJUSTRETURN;
5566 }
5567
5568 /*
5569 * Output callback.
5570 *
5571 * This routine is called externally from above only when if_bridge_txstart
5572 * is disabled; otherwise it is called internally by bridge_start().
5573 */
5574 static int
5575 bridge_output(struct ifnet *ifp, struct mbuf *m)
5576 {
5577 struct bridge_iflist *bif;
5578 struct bridge_softc * __single sc = ifnet_softc(ifp);
5579 struct ether_header *eh;
5580 ether_type_flag_t etypef;
5581 struct ifnet *dst_if = NULL;
5582 int error = 0;
5583
5584 eh = mtod(m, struct ether_header *);
5585 etypef = ether_type_flag_get(eh->ether_type);
5586 BRIDGE_LOCK(sc);
5587
5588 if (!IS_BCAST_MCAST(m)) {
5589 bif = bridge_rtlookup_bif(sc, eh->ether_dhost, 0);
5590 if (bif != NULL) {
5591 dst_if = bif->bif_ifp;
5592 }
5593 }
5594
5595 (void) ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0);
5596
5597 BRIDGE_BPF_TAP_OUT(ifp, m);
5598
5599 if (dst_if == NULL) {
5600 /* callee will unlock */
5601 bridge_broadcast(sc, NULL, etypef, m);
5602 } else {
5603 ifnet_t bridge_ifp;
5604
5605 bridge_ifp = sc->sc_ifp;
5606 BRIDGE_UNLOCK(sc);
5607
5608 error = bridge_enqueue(bridge_ifp, NULL, dst_if, etypef, m,
5609 CHECKSUM_OPERATION_FINALIZE, pkt_direction_TX);
5610 }
5611
5612 return error;
5613 }
5614
5615 static void
5616 bridge_finalize_cksum(struct ifnet *ifp, struct mbuf *m)
5617 {
5618 struct ether_header *eh;
5619 bool is_ipv4;
5620 uint32_t sw_csum, hwcap;
5621 uint32_t did_sw;
5622 uint32_t csum_flags;
5623
5624 eh = mtod(m, struct ether_header *);
5625 if (!ether_header_type_is_ip(eh, &is_ipv4)) {
5626 return;
5627 }
5628
5629 /* do in software what the hardware cannot */
5630 hwcap = (ifp->if_hwassist | CSUM_DATA_VALID);
5631 csum_flags = m->m_pkthdr.csum_flags;
5632 sw_csum = csum_flags & ~IF_HWASSIST_CSUM_FLAGS(hwcap);
5633 sw_csum &= IF_HWASSIST_CSUM_MASK;
5634
5635 if (is_ipv4) {
5636 if ((hwcap & CSUM_PARTIAL) && !(sw_csum & CSUM_DELAY_DATA) &&
5637 (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) {
5638 if (m->m_pkthdr.csum_flags & CSUM_TCP) {
5639 uint16_t start =
5640 sizeof(*eh) + sizeof(struct ip);
5641 uint16_t ulpoff =
5642 m->m_pkthdr.csum_data & 0xffff;
5643 m->m_pkthdr.csum_flags |=
5644 (CSUM_DATA_VALID | CSUM_PARTIAL);
5645 m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
5646 m->m_pkthdr.csum_tx_start = start;
5647 } else {
5648 sw_csum |= (CSUM_DELAY_DATA &
5649 m->m_pkthdr.csum_flags);
5650 }
5651 }
5652 did_sw = in_finalize_cksum(m, sizeof(*eh), sw_csum);
5653 } else {
5654 if ((hwcap & CSUM_PARTIAL) &&
5655 !(sw_csum & CSUM_DELAY_IPV6_DATA) &&
5656 (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA)) {
5657 if (m->m_pkthdr.csum_flags & CSUM_TCPIPV6) {
5658 uint16_t start =
5659 sizeof(*eh) + sizeof(struct ip6_hdr);
5660 uint16_t ulpoff =
5661 m->m_pkthdr.csum_data & 0xffff;
5662 m->m_pkthdr.csum_flags |=
5663 (CSUM_DATA_VALID | CSUM_PARTIAL);
5664 m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
5665 m->m_pkthdr.csum_tx_start = start;
5666 } else {
5667 sw_csum |= (CSUM_DELAY_IPV6_DATA &
5668 m->m_pkthdr.csum_flags);
5669 }
5670 }
5671 did_sw = in6_finalize_cksum(m, sizeof(*eh), -1, -1, sw_csum);
5672 }
5673 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5674 "[%s] before 0x%x hwcap 0x%x sw_csum 0x%x did 0x%x after 0x%x",
5675 ifp->if_xname, csum_flags, hwcap, sw_csum,
5676 did_sw, m->m_pkthdr.csum_flags);
5677 }
5678
5679 /*
5680 * bridge_start:
5681 *
5682 * Start output on a bridge.
5683 *
5684 * This routine is invoked by the start worker thread; because we never call
5685 * it directly, there is no need do deploy any serialization mechanism other
5686 * than what's already used by the worker thread, i.e. this is already single
5687 * threaded.
5688 *
5689 * This routine is called only when if_bridge_txstart is enabled.
5690 */
5691 static void
5692 bridge_start(struct ifnet *ifp)
5693 {
5694 mbuf_ref_t m;
5695
5696 for (;;) {
5697 if (ifnet_dequeue(ifp, &m) != 0) {
5698 break;
5699 }
5700
5701 (void) bridge_output(ifp, m);
5702 }
5703 }
5704
5705 static void
5706 prepare_input_packet(ifnet_t ifp, mbuf_t m)
5707 {
5708 mbuf_pkthdr_setrcvif(m, ifp);
5709 mbuf_pkthdr_setheader(m, mtod(m, void *));
5710 /* adjust frame to skip mac-layer header */
5711 _mbuf_adjust_pkthdr_and_data(m, ETHER_HDR_LEN);
5712 }
5713
5714 static void
5715 mark_tso_checksum_ok(mbuf_t m)
5716 {
5717 if (_mbuf_get_tso_mss(m) != 0 ||
5718 (m->m_pkthdr.csum_flags & checksum_request_flags) != 0) {
5719 mbuf_set_csum_performed(m, checksum_performed_all_good, 0xffff);
5720 }
5721 }
5722
5723 static void
5724 inject_input_packet_list(ifnet_t ifp, mbuf_t in_list, bool m_proto1)
5725 {
5726 for (mbuf_t scan = in_list; scan != NULL; scan = scan->m_nextpkt) {
5727 /* mark the packets as arriving on the interface */
5728 BRIDGE_BPF_TAP_IN(ifp, scan);
5729 if (m_proto1) {
5730 scan->m_flags |= M_PROTO1; /* set to avoid loops */
5731 }
5732 prepare_input_packet(ifp, scan);
5733 mark_tso_checksum_ok(scan);
5734 }
5735 dlil_input_packet_list(ifp, in_list);
5736 return;
5737 }
5738
5739 static void
5740 adjust_input_packet_list(mbuf_t in_list)
5741 {
5742 for (mbuf_t scan = in_list; scan != NULL; scan = scan->m_nextpkt) {
5743 mbuf_pkthdr_setheader(scan, mtod(scan, void *));
5744 _mbuf_adjust_pkthdr_and_data(scan, ETHER_HDR_LEN);
5745 }
5746 }
5747
5748 static bool
5749 in_addr_is_ours(struct in_addr ip)
5750 {
5751 struct in_ifaddr *ia;
5752 bool ours = false;
5753
5754 lck_rw_lock_shared(&in_ifaddr_rwlock);
5755 TAILQ_FOREACH(ia, INADDR_HASH(ip.s_addr), ia_hash) {
5756 if (ia->ia_addr.sin_addr.s_addr == ip.s_addr) {
5757 ours = true;
5758 break;
5759 }
5760 }
5761 lck_rw_done(&in_ifaddr_rwlock);
5762 return ours;
5763 }
5764
5765 static bool
5766 in6_addr_is_ours(const struct in6_addr * ip6_p, uint32_t ifscope)
5767 {
5768 struct in6_addr dst_ip;
5769 struct in6_ifaddr *ia6;
5770 bool ours = false;
5771
5772 if (in6_embedded_scope && IN6_IS_ADDR_LINKLOCAL(ip6_p)) {
5773 /* need to embed scope ID for comparison */
5774 bcopy(ip6_p, &dst_ip, sizeof(dst_ip));
5775 dst_ip.s6_addr16[1] = htons(ifscope);
5776 ip6_p = &dst_ip;
5777 }
5778 lck_rw_lock_shared(&in6_ifaddr_rwlock);
5779 TAILQ_FOREACH(ia6, IN6ADDR_HASH(ip6_p), ia6_hash) {
5780 if (in6_are_addr_equal_scoped(&ia6->ia_addr.sin6_addr, ip6_p,
5781 ia6->ia_addr.sin6_scope_id, ifscope)) {
5782 ours = true;
5783 break;
5784 }
5785 }
5786 lck_rw_done(&in6_ifaddr_rwlock);
5787 return ours;
5788 }
5789
5790 static bool
5791 ip_packet_info_dst_is_our_ip(ip_packet_info_t info_p, int index)
5792 {
5793 /* if the destination is our IP address, don't segment */
5794 bool our_ip = false;
5795
5796 if (info_p->ip_is_ipv4) {
5797 struct ip * hdr;
5798 struct in_addr dst_ip;
5799
5800 hdr = (struct ip *)(info_p->ip_hdr);
5801 bcopy(&hdr->ip_dst, &dst_ip, sizeof(dst_ip));
5802 our_ip = in_addr_is_ours(dst_ip);
5803 } else {
5804 struct ip6_hdr * hdr;
5805
5806 hdr = (struct ip6_hdr *)(info_p->ip_hdr);
5807 our_ip = in6_addr_is_ours(&hdr->ip6_dst, index);
5808 }
5809 return our_ip;
5810 }
5811
5812 typedef union {
5813 struct in_addr ip;
5814 struct in6_addr ip6;
5815 } ip_addr, *ip_addr_t;
5816
5817 static void
5818 ip_packet_info_copy_dst_ip_addr(ip_packet_info_t info_p, ip_addr_t ipaddr)
5819 {
5820 if (info_p->ip_is_ipv4) {
5821 struct ip * hdr;
5822
5823 hdr = (struct ip *)(info_p->ip_hdr);
5824 bcopy(&hdr->ip_dst, &ipaddr->ip, sizeof(ipaddr->ip));
5825 } else {
5826 struct ip6_hdr * hdr;
5827
5828 hdr = (struct ip6_hdr *)(info_p->ip_hdr);
5829 bcopy(&hdr->ip6_dst, &ipaddr->ip6, sizeof(ipaddr->ip6));
5830 }
5831 }
5832
5833 static bool
5834 ip_addr_are_equal(ip_addr_t addr1, ip_addr_t addr2, bool is_ipv4)
5835 {
5836 bool equal;
5837
5838 if (is_ipv4) {
5839 equal = addr1->ip.s_addr == addr2->ip.s_addr;
5840 } else {
5841 equal = IN6_ARE_ADDR_EQUAL(&addr1->ip6, &addr2->ip6);
5842 }
5843 return equal;
5844 }
5845
5846 static bool
5847 ip_addr_is_ours(ip_addr_t ipaddr, int index, bool is_ipv4)
5848 {
5849 bool our_ip;
5850
5851 if (is_ipv4) {
5852 our_ip = in_addr_is_ours(ipaddr->ip);
5853 } else {
5854 our_ip = in6_addr_is_ours(&ipaddr->ip6, index);
5855 }
5856 return our_ip;
5857 }
5858
5859 static void
5860 bridge_interface_input_list(ifnet_t bridge_ifp, ether_type_flag_t etypef,
5861 mblist list, bool bif_uses_virtio)
5862 {
5863 uint32_t in_errors = 0;
5864 bool is_ipv4;
5865 mblist in_list;
5866 ip_addr last_ip;
5867 bool last_ip_ours = false;
5868 bool last_ip_valid = false;
5869 u_int mac_hlen;
5870 bool may_forward = false;
5871 mbuf_t next_packet;
5872
5873 switch (etypef) {
5874 case ETHER_TYPE_FLAG_IPV4:
5875 is_ipv4 = true;
5876 may_forward = (ipforwarding != 0);
5877 break;
5878 case ETHER_TYPE_FLAG_IPV6:
5879 is_ipv4 = false;
5880 may_forward = (ip6_forwarding != 0);
5881 break;
5882 }
5883 if (!may_forward) {
5884 in_list = list;
5885 goto done;
5886 }
5887
5888 mblist_init(&in_list);
5889 mac_hlen = sizeof(struct ether_header);
5890 bzero(&last_ip, sizeof(last_ip));
5891 for (mbuf_ref_t scan = list.head; scan != NULL; scan = next_packet) {
5892 int error;
5893 ip_packet_info info;
5894 bool ip_ours;
5895 struct ifbrmstats stats; /* XXX should really be accounted */
5896 ip_addr this_ip;
5897
5898 /* take it out of the list */
5899 next_packet = scan->m_nextpkt;
5900 scan->m_nextpkt = NULL;
5901
5902 /* check for TCP packet and get IP header */
5903 error = bridge_get_tcp_header(&scan, mac_hlen, is_ipv4,
5904 &info, &stats.brms_in_ip);
5905 if (error != 0) {
5906 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5907 "%s bridge_get_tcp_header failed %d",
5908 bridge_ifp->if_xname, error);
5909 if (scan != NULL) {
5910 m_freem(scan);
5911 scan = NULL;
5912 }
5913 in_errors++;
5914 continue;
5915 }
5916 ip_packet_info_copy_dst_ip_addr(&info, &this_ip);
5917 if (last_ip_valid &&
5918 ip_addr_are_equal(&last_ip, &this_ip, is_ipv4)) {
5919 /* use cached result */
5920 ip_ours = last_ip_ours;
5921 } else {
5922 ip_ours = ip_addr_is_ours(&this_ip,
5923 bridge_ifp->if_index,
5924 is_ipv4);
5925 /* cache the result */
5926 last_ip_valid = true;
5927 last_ip_ours = ip_ours;
5928 last_ip = this_ip;
5929 }
5930
5931 /* if the packet is destined to us, just send it up */
5932 if (ip_ours) {
5933 mblist_append(&in_list, scan);
5934 continue;
5935 }
5936 /*
5937 * If this is a TCP packet that's marked for TSO or LRO, or
5938 * we think it's a large packet, segment it.
5939 */
5940 if (info.ip_proto_hdr != NULL &&
5941 ((bif_uses_virtio && _mbuf_get_tso_mss(scan) != 0) ||
5942 (!bif_uses_virtio &&
5943 (scan->m_pkthdr.rx_seg_cnt > 1 ||
5944 (mbuf_pkthdr_len(scan) >
5945 (bridge_ifp->if_mtu + ETHER_HDR_LEN)))))) {
5946 mblist seg;
5947
5948 seg = gso_tcp_with_info(bridge_ifp, scan, &info,
5949 mac_hlen, is_ipv4, false);
5950 if (seg.head == NULL) {
5951 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5952 "gso_tcp returned no packets");
5953 in_errors++;
5954 continue;
5955 }
5956 if (seg.count > 1) {
5957 /* packet was segmented+checksummed */
5958 mblist_append_list(&in_list, seg);
5959 continue;
5960 }
5961 /* there's just one packet, no segmentation */
5962 scan = seg.head;
5963 }
5964 /* need checksum if it's marked for checksum offload */
5965 if (bif_uses_virtio &&
5966 (scan->m_pkthdr.csum_flags & checksum_request_flags) != 0) {
5967 error = bridge_offload_checksum(&scan, &info, &stats);
5968 if (error != 0) {
5969 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
5970 "%s bridge_offload_checksum failed %d",
5971 bridge_ifp->if_xname, error);
5972 if (scan != NULL) {
5973 m_freem(scan);
5974 scan = NULL;
5975 }
5976 in_errors++;
5977 continue;
5978 }
5979 }
5980 mblist_append(&in_list, scan);
5981 }
5982
5983 done:
5984 if (in_list.head != NULL) {
5985 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
5986 "%s packets %d bytes %d",
5987 bridge_ifp->if_xname,
5988 in_list.count, in_list.bytes);
5989 /* Mark the packets as arriving on the bridge interface */
5990 inject_input_packet_list(bridge_ifp, in_list.head, false);
5991 ifnet_stat_increment_in(bridge_ifp, in_list.count,
5992 in_list.bytes, in_errors);
5993 } else if (in_errors != 0) {
5994 ifnet_stat_increment_in(bridge_ifp, 0, 0, in_errors);
5995 }
5996 return;
5997 }
5998
5999 /*
6000 * bridge_broadcast:
6001 *
6002 * Send a frame to all interfaces that are members of
6003 * the bridge, except for the one on which the packet
6004 * arrived.
6005 *
6006 * NOTE: Releases the lock on return.
6007 */
6008 static void
6009 bridge_broadcast(struct bridge_softc *sc, struct bridge_iflist * sbif,
6010 ether_type_flag_t etypef, mbuf_t m)
6011 {
6012 ifnet_t bridge_ifp;
6013 struct bridge_iflist *dbif;
6014 struct ifnet * src_if;
6015 mbuf_ref_t mc;
6016 struct mbuf *mc_in;
6017 int error = 0, used = 0;
6018 ChecksumOperation cksum_op;
6019 struct mac_nat_record mnr;
6020 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
6021 boolean_t translate_mac = FALSE;
6022 uint32_t sc_filter_flags;
6023 bool is_bcast_mcast;
6024
6025 bridge_ifp = sc->sc_ifp;
6026 if (sbif != NULL) {
6027 src_if = sbif->bif_ifp;
6028 cksum_op = CHECKSUM_OPERATION_CLEAR_OFFLOAD;
6029 if (mac_nat_bif != NULL && sbif != mac_nat_bif) {
6030 /* get the translation record */
6031 translate_mac
6032 = bridge_mac_nat_output(sc, sbif, &m, &mnr);
6033 if (m == NULL) {
6034 /* packet was deallocated */
6035 BRIDGE_UNLOCK(sc);
6036 return;
6037 }
6038 }
6039 } else {
6040 /*
6041 * sbif is NULL when the bridge interface calls
6042 * bridge_broadcast().
6043 */
6044 cksum_op = CHECKSUM_OPERATION_FINALIZE;
6045 src_if = NULL;
6046 }
6047
6048 BRIDGE_LOCK2REF(sc, error);
6049 if (error) {
6050 m_freem(m);
6051 return;
6052 }
6053 is_bcast_mcast = IS_BCAST_MCAST(m);
6054 sc_filter_flags = sc->sc_filter_flags;
6055 TAILQ_FOREACH(dbif, &sc->sc_iflist, bif_next) {
6056 ifnet_t dst_if;
6057
6058 dst_if = dbif->bif_ifp;
6059 if (dst_if == src_if) {
6060 /* skip the interface that the packet came in on */
6061 continue;
6062 }
6063
6064 /* Private segments can not talk to each other */
6065 if (sbif != NULL &&
6066 (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE)) {
6067 continue;
6068 }
6069
6070 if ((dbif->bif_ifflags & IFBIF_STP) &&
6071 dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6072 continue;
6073 }
6074
6075 if ((dbif->bif_ifflags & IFBIF_DISCOVER) == 0 &&
6076 !is_bcast_mcast) {
6077 continue;
6078 }
6079
6080 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6081 continue;
6082 }
6083
6084 if ((dbif->bif_flags & BIFF_MEDIA_ACTIVE) == 0) {
6085 continue;
6086 }
6087
6088 if (TAILQ_NEXT(dbif, bif_next) == NULL) {
6089 mc = m;
6090 used = 1;
6091 } else {
6092 mc = m_dup(m, M_DONTWAIT);
6093 if (mc == NULL) {
6094 (void) ifnet_stat_increment_out(bridge_ifp,
6095 0, 0, 1);
6096 continue;
6097 }
6098 }
6099
6100 /*
6101 * If broadcast input is enabled, do so only if this
6102 * is an input packet.
6103 */
6104 if (sbif != NULL && is_bcast_mcast &&
6105 (dbif->bif_flags & BIFF_INPUT_BROADCAST) != 0) {
6106 mc_in = m_dup(mc, M_DONTWAIT);
6107 /* this could fail, but we continue anyways */
6108 } else {
6109 mc_in = NULL;
6110 }
6111
6112 /* out */
6113 if (translate_mac && mac_nat_bif == dbif) {
6114 /* translate the packet */
6115 bridge_mac_nat_translate(&mc, &mnr, IF_LLADDR(dst_if));
6116 }
6117
6118 if (mc != NULL && sbif != NULL &&
6119 PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6120 if (used == 0) {
6121 /* Keep the layer3 header aligned */
6122 int i = min(mc->m_pkthdr.len, max_protohdr);
6123 mc = m_copyup(mc, i, ETHER_ALIGN);
6124 if (mc == NULL) {
6125 (void) ifnet_stat_increment_out(
6126 sc->sc_ifp, 0, 0, 1);
6127 if (mc_in != NULL) {
6128 m_freem(mc_in);
6129 mc_in = NULL;
6130 }
6131 continue;
6132 }
6133 }
6134 if (bridge_pf(&mc, dst_if, sc_filter_flags, false) != 0) {
6135 if (mc_in != NULL) {
6136 m_freem(mc_in);
6137 mc_in = NULL;
6138 }
6139 continue;
6140 }
6141 if (mc == NULL) {
6142 if (mc_in != NULL) {
6143 m_freem(mc_in);
6144 mc_in = NULL;
6145 }
6146 continue;
6147 }
6148 }
6149
6150 if (mc != NULL) {
6151 /* verify checksum if necessary */
6152 if (bif_has_checksum_offload(dbif) && sbif != NULL &&
6153 !bif_has_checksum_offload(sbif)) {
6154 error = bridge_verify_checksum(&mc,
6155 &dbif->bif_stats);
6156 if (error != 0) {
6157 if (mc != NULL) {
6158 m_freem(mc);
6159 }
6160 mc = NULL;
6161 }
6162 }
6163 if (mc != NULL) {
6164 (void) bridge_enqueue(bridge_ifp,
6165 NULL, dst_if, etypef, mc, cksum_op,
6166 pkt_direction_TX);
6167 }
6168 }
6169
6170 /* in */
6171 if (mc_in == NULL) {
6172 continue;
6173 }
6174 BRIDGE_BPF_TAP_IN(dst_if, mc_in);
6175 prepare_input_packet(dst_if, mc_in);
6176 mc_in->m_flags |= M_PROTO1; /* set to avoid loops */
6177 dlil_input_packet_list(dst_if, mc_in);
6178 }
6179 if (used == 0) {
6180 m_freem(m);
6181 }
6182
6183
6184 BRIDGE_UNREF(sc);
6185 }
6186
6187 static mbuf_t
6188 copy_packet_list(mbuf_t m)
6189 {
6190 mblist ret;
6191 mbuf_t next_packet;
6192
6193 mblist_init(&ret);
6194 for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
6195 mbuf_t copy_m;
6196
6197 /* take it out of the list */
6198 next_packet = scan->m_nextpkt;
6199 scan->m_nextpkt = NULL;
6200
6201 /* create a copy and add it to the new list */
6202 copy_m = m_dup(scan, M_DONTWAIT);
6203 if (copy_m != NULL) {
6204 mblist_append(&ret, copy_m);
6205 }
6206
6207 /* put it back in the original list */
6208 scan->m_nextpkt = next_packet;
6209 }
6210 return ret.head;
6211 }
6212
6213 /*
6214 * bridge_broadcast_list:
6215 *
6216 * Broadcast a list of packets to all members except `sbif`.
6217 * Consumes `m` before returning.
6218 *
6219 * NOTE: Releases the lock on return.
6220 */
6221 static void
6222 bridge_broadcast_list(struct bridge_softc *sc, struct bridge_iflist * sbif,
6223 ether_type_flag_t etypef, mbuf_t m, pkt_direction_t direction)
6224 {
6225 bool bridge_has_address;
6226 ifnet_t bridge_ifp;
6227 struct bridge_iflist * dbif;
6228 bool is_bcast_mcast;
6229 errno_t error = 0;
6230 ChecksumOperation cksum_op;
6231 struct bridge_iflist * mac_nat_bif = sc->sc_mac_nat_bif;
6232 ifnet_t mac_nat_if = NULL;
6233 bool need_mac_nat = false;
6234 mbuf_t out_mac_nat = NULL;
6235 ifnet_t src_if;
6236 uint32_t sc_filter_flags;
6237 bool used = false;
6238
6239 bridge_ifp = sc->sc_ifp;
6240 if (sbif != NULL) {
6241 src_if = sbif->bif_ifp;
6242
6243 if (ether_type_flag_is_ip(etypef) && bif_uses_virtio(sbif)) {
6244 bool is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
6245
6246 /* compute checksum on packets marked with offload */
6247 m = bridge_checksum_offload_list(bridge_ifp, sbif,
6248 m, is_ipv4);
6249 if (m == NULL) {
6250 BRIDGE_UNLOCK(sc);
6251 goto done;
6252 }
6253 cksum_op = CHECKSUM_OPERATION_NONE;
6254 } else {
6255 cksum_op = CHECKSUM_OPERATION_CLEAR_OFFLOAD;
6256 }
6257
6258 /*
6259 * If MAC-NAT is enabled and we'll be sending the packets
6260 * over it, verify that it is up and active before
6261 * deciding to make a translated copy.
6262 */
6263 if (mac_nat_bif != NULL && sbif != mac_nat_bif) {
6264 mac_nat_if = mac_nat_bif->bif_ifp;
6265 if ((mac_nat_if->if_flags & IFF_RUNNING) != 0 &&
6266 (mac_nat_bif->bif_flags & BIFF_MEDIA_ACTIVE) != 0) {
6267 need_mac_nat = true;
6268 }
6269 }
6270 } else {
6271 /*
6272 * sbif is NULL when the bridge interface calls
6273 * bridge_broadcast_list() (TBD).
6274 */
6275 cksum_op = CHECKSUM_OPERATION_FINALIZE;
6276 src_if = NULL;
6277 }
6278
6279 /*
6280 * Create a translated copy for packets destined to MAC-NAT interface.
6281 */
6282 if (need_mac_nat) {
6283 out_mac_nat
6284 = bridge_mac_nat_copy_and_translate_list(sc, sbif,
6285 mac_nat_if, m);
6286 }
6287 sc_filter_flags = sc->sc_filter_flags;
6288 bridge_has_address = (sc->sc_flags & SCF_ADDRESS_ASSIGNED) != 0;
6289 BRIDGE_LOCK2REF(sc, error);
6290 if (error) {
6291 goto done;
6292 }
6293 is_bcast_mcast = IS_BCAST_MCAST(m);
6294
6295 /* make a copy for the bridge interface */
6296 if (sbif != NULL && is_bcast_mcast && bridge_has_address) {
6297 mbuf_t in_list;
6298
6299 in_list = copy_packet_list(m);
6300 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MCAST,
6301 "%s mcast for us in_m %p",
6302 bridge_ifp->if_xname, in_list);
6303 if (in_list != NULL) {
6304 inject_input_packet_list(bridge_ifp, in_list, false);
6305 }
6306 }
6307
6308 TAILQ_FOREACH(dbif, &sc->sc_iflist, bif_next) {
6309 ifnet_t dst_if;
6310 mbuf_t in_m = NULL;
6311 mbuf_t out_m = NULL;
6312
6313 dst_if = dbif->bif_ifp;
6314 if (dst_if == src_if) {
6315 /* skip the interface that the packet came in on */
6316 continue;
6317 }
6318
6319 /* Private segments can not talk to each other */
6320 if (sbif != NULL &&
6321 (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE)) {
6322 continue;
6323 }
6324
6325 if ((dbif->bif_ifflags & IFBIF_STP) &&
6326 dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6327 continue;
6328 }
6329
6330 if ((dbif->bif_ifflags & IFBIF_DISCOVER) == 0 &&
6331 !is_bcast_mcast) {
6332 continue;
6333 }
6334
6335 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6336 continue;
6337 }
6338
6339 if ((dbif->bif_flags & BIFF_MEDIA_ACTIVE) == 0) {
6340 continue;
6341 }
6342 if (dbif == mac_nat_bif) {
6343 /* translated copy was created above, use that */
6344 out_m = out_mac_nat;
6345 out_mac_nat = NULL;
6346 } else if (TAILQ_NEXT(dbif, bif_next) == NULL) {
6347 /* consume `m` */
6348 out_m = m;
6349 used = true;
6350 } else {
6351 /* needs a copy */
6352 out_m = copy_packet_list(m);
6353 }
6354
6355 if (out_m == NULL) {
6356 ifnet_stat_increment_out(bridge_ifp, 0, 0, 1);
6357 continue;
6358 }
6359 /*
6360 * If broadcast input is enabled, do so only if this
6361 * is an input packet.
6362 */
6363 if (sbif != NULL && is_bcast_mcast &&
6364 (dbif->bif_flags & BIFF_INPUT_BROADCAST) != 0) {
6365 in_m = copy_packet_list(m);
6366 /* this could fail, but we continue anyways */
6367 } else {
6368 in_m = NULL;
6369 }
6370
6371 if (sbif != NULL &&
6372 PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6373 out_m = bridge_pf_list(out_m, dst_if,
6374 sc_filter_flags, false);
6375 }
6376 if (out_m != NULL) {
6377 /* verify checksum if necessary */
6378 if (sbif != NULL &&
6379 ether_type_flag_is_ip(etypef) &&
6380 bif_has_checksum_offload(dbif) &&
6381 !bif_has_checksum_offload(sbif)) {
6382 bool is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
6383
6384 out_m = bridge_verify_checksum_list(bridge_ifp,
6385 dbif, out_m, is_ipv4);
6386 }
6387 if (out_m != NULL) {
6388 bridge_enqueue(bridge_ifp, src_if, dst_if,
6389 etypef, out_m, cksum_op, direction);
6390 }
6391 }
6392
6393 /* in */
6394 if (in_m != NULL) {
6395 inject_input_packet_list(dst_if, in_m, true);
6396 }
6397 }
6398
6399 BRIDGE_UNREF(sc);
6400
6401 done:
6402 if (out_mac_nat != NULL) {
6403 m_freem_list(out_mac_nat);
6404 }
6405 if (!used) {
6406 m_freem_list(m);
6407 }
6408 return;
6409 }
6410
6411 #define NEEDED_CSUM_IPV4 (IF_HWASSIST_CSUM_UDP | IF_HWASSIST_CSUM_TCP)
6412 #define NEEDED_CSUM_IPV6 (IF_HWASSIST_CSUM_UDPIPV6 | IF_HWASSIST_CSUM_TCPIPV6)
6413
6414 static bool
6415 interface_supports_hw_checksum(ifnet_t ifp, bool is_ipv4)
6416 {
6417 uint32_t hwcap = IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
6418 uint32_t needed = is_ipv4 ? NEEDED_CSUM_IPV4 : NEEDED_CSUM_IPV6;
6419 bool supports;
6420
6421 supports = (hwcap & needed) == needed;
6422 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM, "%s: does %ssupport checksum",
6423 ifp->if_xname, supports ? "" : "not ");
6424 return supports;
6425 }
6426
6427 static void
6428 bridge_forward_list(struct bridge_softc *sc, struct bridge_iflist * sbif,
6429 ifnet_t dst_if, ether_type_flag_t etypef, mbuf_t m)
6430 {
6431 bool checksum_ok = false;
6432 ChecksumOperation cksum_op;
6433 ifnet_t bridge_ifp;
6434 struct bridge_iflist * dbif;
6435 uint32_t sc_filter_flags;
6436 ifnet_t src_if;
6437
6438 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6439 goto drop;
6440 }
6441 dbif = bridge_lookup_member_if(sc, dst_if);
6442 if (dbif == NULL) {
6443 /* Not a member of the bridge (anymore?) */
6444 goto drop;
6445 }
6446
6447 /* Private segments can not talk to each other */
6448 if ((sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE) != 0) {
6449 goto drop;
6450 }
6451 bridge_ifp = sc->sc_ifp;
6452 src_if = sbif->bif_ifp;
6453 cksum_op = CHECKSUM_OPERATION_CLEAR_OFFLOAD;
6454 if (ether_type_flag_is_ip(etypef) && bif_uses_virtio(sbif)) {
6455 bool is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
6456
6457 if (dbif == sc->sc_mac_nat_bif ||
6458 (IFNET_IS_VMNET(dst_if) && !bif_uses_virtio(dbif)) ||
6459 !interface_supports_hw_checksum(dst_if, is_ipv4)) {
6460 /* compute checksums now if necessary */
6461 m = bridge_checksum_offload_list(bridge_ifp, sbif,
6462 m, is_ipv4);
6463 checksum_ok = true;
6464 } else {
6465 cksum_op = CHECKSUM_OPERATION_NONE;
6466 }
6467 }
6468
6469 if (dbif == sc->sc_mac_nat_bif) {
6470 /* translate the packets before forwarding them */
6471 if ((etypef & ETHER_TYPE_FLAG_IP_ARP) != 0) {
6472 m = bridge_mac_nat_translate_list(sc, sbif, dst_if, m);
6473 }
6474 } else if (!checksum_ok && ether_type_flag_is_ip(etypef) &&
6475 bif_has_checksum_offload(dbif) && !bif_has_checksum_offload(sbif)) {
6476 bool is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
6477
6478 /*
6479 * If the destination interface has checksum offload enabled,
6480 * verify the checksum now, unless the source interface also has
6481 * checksum offload enabled. The checksum in that case has
6482 * already just been computed and verifying it is unnecessary.
6483 */
6484 m = bridge_verify_checksum_list(bridge_ifp, dbif, m, is_ipv4);
6485 }
6486 sc_filter_flags = sc->sc_filter_flags;
6487 BRIDGE_UNLOCK(sc);
6488 if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6489 m = bridge_pf_list(m, dst_if, sc_filter_flags, false);
6490 }
6491
6492 /*
6493 * We're forwarding inbound packets for which the checksums must
6494 * already have been computed and if required, verified, or
6495 * packets from a virtio-enabled interface for which we rely
6496 * on the packet containing appropriate offload flags.
6497 */
6498 if (m != NULL) {
6499 bridge_enqueue(bridge_ifp, src_if, dst_if, etypef, m,
6500 cksum_op, pkt_direction_RX);
6501 }
6502 return;
6503
6504 drop:
6505 BRIDGE_UNLOCK(sc);
6506 m_freem_list(m);
6507 return;
6508 }
6509
6510 /*
6511 * bridge_span:
6512 *
6513 * Duplicate a packet out one or more interfaces that are in span mode,
6514 * the original mbuf is unmodified.
6515 */
6516 static void
6517 bridge_span(struct bridge_softc *sc, ether_type_flag_t etypef, struct mbuf *m)
6518 {
6519 struct bridge_iflist *bif;
6520 struct ifnet *dst_if;
6521 struct mbuf *mc;
6522
6523 if (TAILQ_EMPTY(&sc->sc_spanlist)) {
6524 return;
6525 }
6526
6527 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
6528 dst_if = bif->bif_ifp;
6529
6530 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6531 continue;
6532 }
6533
6534 mc = m_copypacket(m, M_DONTWAIT);
6535 if (mc == NULL) {
6536 (void) ifnet_stat_increment_out(sc->sc_ifp, 0, 0, 1);
6537 continue;
6538 }
6539
6540 (void) bridge_enqueue(sc->sc_ifp, NULL, dst_if, etypef, mc,
6541 CHECKSUM_OPERATION_NONE, pkt_direction_TX);
6542 }
6543 }
6544
6545 /*
6546 * bridge_rtupdate:
6547 *
6548 * Add a bridge routing entry.
6549 */
6550 static int
6551 bridge_rtupdate(struct bridge_softc *sc, const uint8_t dst[ETHER_ADDR_LEN], uint16_t vlan,
6552 struct bridge_iflist *bif, int setflags, uint8_t flags)
6553 {
6554 struct bridge_rtnode *brt;
6555 int error;
6556
6557 BRIDGE_LOCK_ASSERT_HELD(sc);
6558
6559 /* Check the source address is valid and not multicast. */
6560 if (ETHER_IS_MULTICAST(dst) ||
6561 (dst[0] == 0 && dst[1] == 0 && dst[2] == 0 &&
6562 dst[3] == 0 && dst[4] == 0 && dst[5] == 0) != 0) {
6563 return EINVAL;
6564 }
6565
6566 /* 802.1p frames map to vlan 1 */
6567 if (vlan == 0) {
6568 vlan = 1;
6569 }
6570
6571 /*
6572 * A route for this destination might already exist. If so,
6573 * update it, otherwise create a new one.
6574 */
6575 if ((brt = bridge_rtnode_lookup(sc, dst, vlan)) == NULL) {
6576 if (sc->sc_brtcnt >= sc->sc_brtmax) {
6577 sc->sc_brtexceeded++;
6578 return ENOSPC;
6579 }
6580 /* Check per interface address limits (if enabled) */
6581 if (bif->bif_addrmax && bif->bif_addrcnt >= bif->bif_addrmax) {
6582 bif->bif_addrexceeded++;
6583 return ENOSPC;
6584 }
6585
6586 /*
6587 * Allocate a new bridge forwarding node, and
6588 * initialize the expiration time and Ethernet
6589 * address.
6590 */
6591 brt = zalloc_noblock(bridge_rtnode_pool);
6592 if (brt == NULL) {
6593 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6594 "zalloc_nolock failed");
6595 return ENOMEM;
6596 }
6597 bzero(brt, sizeof(struct bridge_rtnode));
6598
6599 if (bif->bif_ifflags & IFBIF_STICKY) {
6600 brt->brt_flags = IFBAF_STICKY;
6601 } else {
6602 brt->brt_flags = IFBAF_DYNAMIC;
6603 }
6604
6605 memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
6606 brt->brt_vlan = vlan;
6607
6608 if ((error = bridge_rtnode_insert(sc, brt)) != 0) {
6609 zfree(bridge_rtnode_pool, brt);
6610 return error;
6611 }
6612 brt->brt_dst = bif;
6613 bif->bif_addrcnt++;
6614 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6615 "added %02x:%02x:%02x:%02x:%02x:%02x "
6616 "on %s count %u hashsize %u",
6617 dst[0], dst[1], dst[2], dst[3], dst[4], dst[5],
6618 sc->sc_ifp->if_xname, sc->sc_brtcnt,
6619 sc->sc_rthash_size);
6620 }
6621
6622 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
6623 brt->brt_dst != bif) {
6624 brt->brt_dst->bif_addrcnt--;
6625 brt->brt_dst = bif;
6626 brt->brt_dst->bif_addrcnt++;
6627 }
6628
6629 if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6630 unsigned long now;
6631
6632 now = (unsigned long) net_uptime();
6633 brt->brt_expire = now + sc->sc_brttimeout;
6634 }
6635 if (setflags) {
6636 brt->brt_flags = flags;
6637 }
6638
6639 return 0;
6640 }
6641
6642 /*
6643 * bridge_rtlookup:
6644 *
6645 * Lookup the destination interface for an address.
6646 */
6647 static struct bridge_iflist *
6648 bridge_rtlookup_bif(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN],
6649 uint16_t vlan)
6650 {
6651 struct bridge_rtnode *brt;
6652
6653 BRIDGE_LOCK_ASSERT_HELD(sc);
6654
6655 if ((brt = bridge_rtnode_lookup(sc, addr, vlan)) == NULL) {
6656 return NULL;
6657 }
6658
6659 return brt->brt_dst;
6660 }
6661
6662 /*
6663 * bridge_rttrim:
6664 *
6665 * Trim the routine table so that we have a number
6666 * of routing entries less than or equal to the
6667 * maximum number.
6668 */
6669 static void
6670 bridge_rttrim(struct bridge_softc *sc)
6671 {
6672 struct bridge_rtnode *brt, *nbrt;
6673
6674 BRIDGE_LOCK_ASSERT_HELD(sc);
6675
6676 /* Make sure we actually need to do this. */
6677 if (sc->sc_brtcnt <= sc->sc_brtmax) {
6678 return;
6679 }
6680
6681 /* Force an aging cycle; this might trim enough addresses. */
6682 bridge_rtage(sc);
6683 if (sc->sc_brtcnt <= sc->sc_brtmax) {
6684 return;
6685 }
6686
6687 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6688 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6689 bridge_rtnode_destroy(sc, brt);
6690 if (sc->sc_brtcnt <= sc->sc_brtmax) {
6691 return;
6692 }
6693 }
6694 }
6695 }
6696
6697 /*
6698 * bridge_aging_timer:
6699 *
6700 * Aging periodic timer for the bridge routing table.
6701 */
6702 static void
6703 bridge_aging_timer(struct bridge_softc *sc)
6704 {
6705 BRIDGE_LOCK_ASSERT_HELD(sc);
6706
6707 bridge_rtage(sc);
6708 if ((sc->sc_ifp->if_flags & IFF_RUNNING) &&
6709 (sc->sc_flags & SCF_DETACHING) == 0) {
6710 sc->sc_aging_timer.bdc_sc = sc;
6711 sc->sc_aging_timer.bdc_func = bridge_aging_timer;
6712 sc->sc_aging_timer.bdc_ts.tv_sec = bridge_rtable_prune_period;
6713 bridge_schedule_delayed_call(&sc->sc_aging_timer);
6714 }
6715 }
6716
6717 /*
6718 * bridge_rtage:
6719 *
6720 * Perform an aging cycle.
6721 */
6722 static void
6723 bridge_rtage(struct bridge_softc *sc)
6724 {
6725 struct bridge_rtnode *brt, *nbrt;
6726 unsigned long now;
6727
6728 BRIDGE_LOCK_ASSERT_HELD(sc);
6729
6730 now = (unsigned long) net_uptime();
6731
6732 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6733 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6734 if (now >= brt->brt_expire) {
6735 bridge_rtnode_destroy(sc, brt);
6736 }
6737 }
6738 }
6739 if (sc->sc_mac_nat_bif != NULL) {
6740 bridge_mac_nat_age_entries(sc, now);
6741 }
6742 }
6743
6744 /*
6745 * bridge_rtflush:
6746 *
6747 * Remove all dynamic addresses from the bridge.
6748 */
6749 static void
6750 bridge_rtflush(struct bridge_softc *sc, int full)
6751 {
6752 struct bridge_rtnode *brt, *nbrt;
6753
6754 BRIDGE_LOCK_ASSERT_HELD(sc);
6755
6756 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6757 if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6758 bridge_rtnode_destroy(sc, brt);
6759 }
6760 }
6761 }
6762
6763 /*
6764 * bridge_rtdaddr:
6765 *
6766 * Remove an address from the table.
6767 */
6768 static int
6769 bridge_rtdaddr(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN], uint16_t vlan)
6770 {
6771 struct bridge_rtnode *brt;
6772 int found = 0;
6773
6774 BRIDGE_LOCK_ASSERT_HELD(sc);
6775
6776 /*
6777 * If vlan is zero then we want to delete for all vlans so the lookup
6778 * may return more than one.
6779 */
6780 while ((brt = bridge_rtnode_lookup(sc, addr, vlan)) != NULL) {
6781 bridge_rtnode_destroy(sc, brt);
6782 found = 1;
6783 }
6784
6785 return found ? 0 : ENOENT;
6786 }
6787
6788 /*
6789 * bridge_rtdelete:
6790 *
6791 * Delete routes to a specific member interface.
6792 */
6793 static void
6794 bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int full)
6795 {
6796 struct bridge_rtnode *brt, *nbrt;
6797
6798 BRIDGE_LOCK_ASSERT_HELD(sc);
6799
6800 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6801 if (brt->brt_ifp == ifp && (full ||
6802 (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)) {
6803 bridge_rtnode_destroy(sc, brt);
6804 }
6805 }
6806 }
6807
6808 /*
6809 * bridge_rtable_init:
6810 *
6811 * Initialize the route table for this bridge.
6812 */
6813 static int
6814 bridge_rtable_init(struct bridge_softc *sc)
6815 {
6816 u_int32_t i;
6817
6818 sc->sc_rthash = kalloc_type(struct _bridge_rtnode_list,
6819 BRIDGE_RTHASH_SIZE, Z_WAITOK_ZERO_NOFAIL);
6820 sc->sc_rthash_size = BRIDGE_RTHASH_SIZE;
6821
6822 for (i = 0; i < sc->sc_rthash_size; i++) {
6823 LIST_INIT(&sc->sc_rthash[i]);
6824 }
6825
6826 sc->sc_rthash_key = RandomULong();
6827
6828 LIST_INIT(&sc->sc_rtlist);
6829
6830 return 0;
6831 }
6832
6833 /*
6834 * bridge_rthash_delayed_resize:
6835 *
6836 * Resize the routing table hash on a delayed thread call.
6837 */
6838 static void
6839 bridge_rthash_delayed_resize(struct bridge_softc *sc)
6840 {
6841 u_int32_t new_rthash_size = 0;
6842 u_int32_t old_rthash_size = 0;
6843 struct _bridge_rtnode_list *new_rthash = NULL;
6844 struct _bridge_rtnode_list *old_rthash = NULL;
6845 u_int32_t i;
6846 struct bridge_rtnode *brt;
6847 int error = 0;
6848
6849 BRIDGE_LOCK_ASSERT_HELD(sc);
6850
6851 /*
6852 * Four entries per hash bucket is our ideal load factor
6853 */
6854 if (sc->sc_brtcnt < sc->sc_rthash_size * 4) {
6855 goto out;
6856 }
6857
6858 /*
6859 * Doubling the number of hash buckets may be too simplistic
6860 * especially when facing a spike of new entries
6861 */
6862 new_rthash_size = sc->sc_rthash_size * 2;
6863
6864 sc->sc_flags |= SCF_RESIZING;
6865 BRIDGE_UNLOCK(sc);
6866
6867 new_rthash = kalloc_type(struct _bridge_rtnode_list, new_rthash_size,
6868 Z_WAITOK | Z_ZERO);
6869
6870 BRIDGE_LOCK(sc);
6871 sc->sc_flags &= ~SCF_RESIZING;
6872
6873 if (new_rthash == NULL) {
6874 error = ENOMEM;
6875 goto out;
6876 }
6877 if ((sc->sc_flags & SCF_DETACHING)) {
6878 error = ENODEV;
6879 goto out;
6880 }
6881 /*
6882 * Fail safe from here on
6883 */
6884 old_rthash = sc->sc_rthash;
6885 old_rthash_size = sc->sc_rthash_size;
6886 sc->sc_rthash = new_rthash;
6887 sc->sc_rthash_size = new_rthash_size;
6888
6889 /*
6890 * Get a new key to force entries to be shuffled around to reduce
6891 * the likelihood they will land in the same buckets
6892 */
6893 sc->sc_rthash_key = RandomULong();
6894
6895 for (i = 0; i < sc->sc_rthash_size; i++) {
6896 LIST_INIT(&sc->sc_rthash[i]);
6897 }
6898
6899 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
6900 LIST_REMOVE(brt, brt_hash);
6901 (void) bridge_rtnode_hash(sc, brt);
6902 }
6903 out:
6904 if (error == 0) {
6905 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6906 "%s new size %u",
6907 sc->sc_ifp->if_xname, sc->sc_rthash_size);
6908 kfree_type(struct _bridge_rtnode_list, old_rthash_size, old_rthash);
6909 } else {
6910 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_RT_TABLE,
6911 "%s failed %d", sc->sc_ifp->if_xname, error);
6912 kfree_type(struct _bridge_rtnode_list, new_rthash_size, new_rthash);
6913 }
6914 }
6915
6916 /*
6917 * Resize the number of hash buckets based on the load factor
6918 * Currently only grow
6919 * Failing to resize the hash table is not fatal
6920 */
6921 static void
6922 bridge_rthash_resize(struct bridge_softc *sc)
6923 {
6924 BRIDGE_LOCK_ASSERT_HELD(sc);
6925
6926 if ((sc->sc_flags & SCF_DETACHING) || (sc->sc_flags & SCF_RESIZING)) {
6927 return;
6928 }
6929
6930 /*
6931 * Four entries per hash bucket is our ideal load factor
6932 */
6933 if (sc->sc_brtcnt < sc->sc_rthash_size * 4) {
6934 return;
6935 }
6936 /*
6937 * Hard limit on the size of the routing hash table
6938 */
6939 if (sc->sc_rthash_size >= bridge_rtable_hash_size_max) {
6940 return;
6941 }
6942
6943 sc->sc_resize_call.bdc_sc = sc;
6944 sc->sc_resize_call.bdc_func = bridge_rthash_delayed_resize;
6945 bridge_schedule_delayed_call(&sc->sc_resize_call);
6946 }
6947
6948 /*
6949 * bridge_rtable_fini:
6950 *
6951 * Deconstruct the route table for this bridge.
6952 */
6953 static void
6954 bridge_rtable_fini(struct bridge_softc *sc)
6955 {
6956 KASSERT(sc->sc_brtcnt == 0,
6957 ("%s: %d bridge routes referenced", __func__, sc->sc_brtcnt));
6958 kfree_type_counted_by(struct _bridge_rtnode_list, sc->sc_rthash_size,
6959 sc->sc_rthash);
6960 sc->sc_rthash = NULL;
6961 sc->sc_rthash_size = 0;
6962 }
6963
6964 /*
6965 * The following hash function is adapted from "Hash Functions" by Bob Jenkins
6966 * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
6967 */
6968 #define mix(a, b, c) \
6969 do { \
6970 a -= b; a -= c; a ^= (c >> 13); \
6971 b -= c; b -= a; b ^= (a << 8); \
6972 c -= a; c -= b; c ^= (b >> 13); \
6973 a -= b; a -= c; a ^= (c >> 12); \
6974 b -= c; b -= a; b ^= (a << 16); \
6975 c -= a; c -= b; c ^= (b >> 5); \
6976 a -= b; a -= c; a ^= (c >> 3); \
6977 b -= c; b -= a; b ^= (a << 10); \
6978 c -= a; c -= b; c ^= (b >> 15); \
6979 } while ( /*CONSTCOND*/ 0)
6980
6981 static __inline uint32_t
6982 bridge_rthash(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN])
6983 {
6984 uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
6985
6986 b += addr[5] << 8;
6987 b += addr[4];
6988 a += addr[3] << 24;
6989 a += addr[2] << 16;
6990 a += addr[1] << 8;
6991 a += addr[0];
6992
6993 mix(a, b, c);
6994
6995 return c & BRIDGE_RTHASH_MASK(sc);
6996 }
6997
6998 #undef mix
6999
7000 static int
7001 bridge_rtnode_addr_cmp(const uint8_t a[ETHER_ADDR_LEN], const uint8_t b[ETHER_ADDR_LEN])
7002 {
7003 int i, d;
7004
7005 for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) {
7006 d = ((int)a[i]) - ((int)b[i]);
7007 }
7008
7009 return d;
7010 }
7011
7012 /*
7013 * bridge_rtnode_lookup:
7014 *
7015 * Look up a bridge route node for the specified destination. Compare the
7016 * vlan id or if zero then just return the first match.
7017 */
7018 static struct bridge_rtnode *
7019 bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN],
7020 uint16_t vlan)
7021 {
7022 struct bridge_rtnode *brt;
7023 uint32_t hash;
7024 int dir;
7025
7026 BRIDGE_LOCK_ASSERT_HELD(sc);
7027
7028 hash = bridge_rthash(sc, addr);
7029 LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) {
7030 dir = bridge_rtnode_addr_cmp(addr, brt->brt_addr);
7031 if (dir == 0 && (brt->brt_vlan == vlan || vlan == 0)) {
7032 return brt;
7033 }
7034 if (dir > 0) {
7035 return NULL;
7036 }
7037 }
7038
7039 return NULL;
7040 }
7041
7042 /*
7043 * bridge_rtnode_hash:
7044 *
7045 * Insert the specified bridge node into the route hash table.
7046 * This is used when adding a new node or to rehash when resizing
7047 * the hash table
7048 */
7049 static int
7050 bridge_rtnode_hash(struct bridge_softc *sc, struct bridge_rtnode *brt)
7051 {
7052 struct bridge_rtnode *lbrt;
7053 uint32_t hash;
7054 int dir;
7055
7056 BRIDGE_LOCK_ASSERT_HELD(sc);
7057
7058 hash = bridge_rthash(sc, brt->brt_addr);
7059
7060 lbrt = LIST_FIRST(&sc->sc_rthash[hash]);
7061 if (lbrt == NULL) {
7062 LIST_INSERT_HEAD(&sc->sc_rthash[hash], brt, brt_hash);
7063 goto out;
7064 }
7065
7066 do {
7067 dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr);
7068 if (dir == 0 && brt->brt_vlan == lbrt->brt_vlan) {
7069 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
7070 "%s EEXIST %02x:%02x:%02x:%02x:%02x:%02x",
7071 sc->sc_ifp->if_xname,
7072 brt->brt_addr[0], brt->brt_addr[1],
7073 brt->brt_addr[2], brt->brt_addr[3],
7074 brt->brt_addr[4], brt->brt_addr[5]);
7075 return EEXIST;
7076 }
7077 if (dir > 0) {
7078 LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
7079 goto out;
7080 }
7081 if (LIST_NEXT(lbrt, brt_hash) == NULL) {
7082 LIST_INSERT_AFTER(lbrt, brt, brt_hash);
7083 goto out;
7084 }
7085 lbrt = LIST_NEXT(lbrt, brt_hash);
7086 } while (lbrt != NULL);
7087
7088 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
7089 "%s impossible %02x:%02x:%02x:%02x:%02x:%02x",
7090 sc->sc_ifp->if_xname,
7091 brt->brt_addr[0], brt->brt_addr[1], brt->brt_addr[2],
7092 brt->brt_addr[3], brt->brt_addr[4], brt->brt_addr[5]);
7093 out:
7094 return 0;
7095 }
7096
7097 /*
7098 * bridge_rtnode_insert:
7099 *
7100 * Insert the specified bridge node into the route table. We
7101 * assume the entry is not already in the table.
7102 */
7103 static int
7104 bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
7105 {
7106 int error;
7107
7108 error = bridge_rtnode_hash(sc, brt);
7109 if (error != 0) {
7110 return error;
7111 }
7112
7113 LIST_INSERT_HEAD(&sc->sc_rtlist, brt, brt_list);
7114 sc->sc_brtcnt++;
7115
7116 bridge_rthash_resize(sc);
7117
7118 return 0;
7119 }
7120
7121 /*
7122 * bridge_rtnode_destroy:
7123 *
7124 * Destroy a bridge rtnode.
7125 */
7126 static void
7127 bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
7128 {
7129 BRIDGE_LOCK_ASSERT_HELD(sc);
7130
7131 LIST_REMOVE(brt, brt_hash);
7132
7133 LIST_REMOVE(brt, brt_list);
7134 sc->sc_brtcnt--;
7135 brt->brt_dst->bif_addrcnt--;
7136 zfree(bridge_rtnode_pool, brt);
7137 }
7138
7139 #if BRIDGESTP
7140 /*
7141 * bridge_rtable_expire:
7142 *
7143 * Set the expiry time for all routes on an interface.
7144 */
7145 static void
7146 bridge_rtable_expire(struct ifnet *ifp, int age)
7147 {
7148 struct bridge_softc *sc = ifp->if_bridge;
7149 struct bridge_rtnode *brt;
7150
7151 BRIDGE_LOCK(sc);
7152
7153 /*
7154 * If the age is zero then flush, otherwise set all the expiry times to
7155 * age for the interface
7156 */
7157 if (age == 0) {
7158 bridge_rtdelete(sc, ifp, IFBF_FLUSHDYN);
7159 } else {
7160 unsigned long now;
7161
7162 now = (unsigned long) net_uptime();
7163
7164 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
7165 /* Cap the expiry time to 'age' */
7166 if (brt->brt_ifp == ifp &&
7167 brt->brt_expire > now + age &&
7168 (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
7169 brt->brt_expire = now + age;
7170 }
7171 }
7172 }
7173 BRIDGE_UNLOCK(sc);
7174 }
7175
7176 /*
7177 * bridge_state_change:
7178 *
7179 * Callback from the bridgestp code when a port changes states.
7180 */
7181 static void
7182 bridge_state_change(struct ifnet *ifp, int state)
7183 {
7184 struct bridge_softc *sc = ifp->if_bridge;
7185 static const char *stpstates[] = {
7186 "disabled",
7187 "listening",
7188 "learning",
7189 "forwarding",
7190 "blocking",
7191 "discarding"
7192 };
7193
7194 if (log_stp) {
7195 log(LOG_NOTICE, "%s: state changed to %s on %s",
7196 sc->sc_ifp->if_xname,
7197 stpstates[state], ifp->if_xname);
7198 }
7199 }
7200 #endif /* BRIDGESTP */
7201
7202 /*
7203 * bridge_detach:
7204 *
7205 * Callback when interface has been detached.
7206 */
7207 static void
7208 bridge_detach(ifnet_t ifp)
7209 {
7210 struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7211
7212 #if BRIDGESTP
7213 bstp_detach(&sc->sc_stp);
7214 #endif /* BRIDGESTP */
7215
7216 /* Tear down the routing table. */
7217 bridge_rtable_fini(sc);
7218
7219 lck_mtx_lock(&bridge_list_mtx);
7220 LIST_REMOVE(sc, sc_list);
7221 lck_mtx_unlock(&bridge_list_mtx);
7222
7223 ifnet_release(ifp);
7224
7225 lck_mtx_destroy(&sc->sc_mtx, &bridge_lock_grp);
7226 kfree_type(struct bridge_softc, sc);
7227 }
7228
7229 /*
7230 * bridge_link_event:
7231 *
7232 * Report a data link event on an interface
7233 */
7234 static void
7235 bridge_link_event(struct ifnet *ifp, u_int32_t event_code)
7236 {
7237 struct event {
7238 u_int32_t ifnet_family;
7239 u_int32_t unit;
7240 char if_name[IFNAMSIZ];
7241 };
7242 _Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
7243 struct kern_event_msg *header = (struct kern_event_msg*)message;
7244 struct event *data = (struct event *)(message + KEV_MSG_HEADER_SIZE);
7245
7246 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
7247 "%s event_code %u - %s", ifp->if_xname,
7248 event_code, dlil_kev_dl_code_str(event_code));
7249 header->total_size = sizeof(message);
7250 header->vendor_code = KEV_VENDOR_APPLE;
7251 header->kev_class = KEV_NETWORK_CLASS;
7252 header->kev_subclass = KEV_DL_SUBCLASS;
7253 header->event_code = event_code;
7254 data->ifnet_family = ifnet_family(ifp);
7255 data->unit = (u_int32_t)ifnet_unit(ifp);
7256 strlcpy(data->if_name, ifnet_name(ifp), IFNAMSIZ);
7257 ifnet_event(ifp, header);
7258 }
7259
7260 #define BRIDGE_HF_DROP(reason, func, line) { \
7261 bridge_hostfilter_stats.reason++; \
7262 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_HOSTFILTER, \
7263 "%s.%d" #reason, func, line); \
7264 error = EINVAL; \
7265 }
7266
7267 static int
7268 bridge_host_filter_arp(struct bridge_iflist *bif, mbuf_t *data)
7269 {
7270 struct ether_arp *ea;
7271 struct ether_header *eh;
7272 int error = EINVAL;
7273 mbuf_t m = *data;
7274 size_t minlen = sizeof(struct ether_header) + sizeof(struct ether_arp);
7275
7276 /*
7277 * Make the Ethernet and ARP headers contiguous
7278 */
7279 if (mbuf_pkthdr_len(m) < minlen) {
7280 BRIDGE_HF_DROP(brhf_arp_too_small, __func__, __LINE__);
7281 goto done;
7282 }
7283 if (mbuf_len(m) < minlen && mbuf_pullup(data, minlen) != 0) {
7284 BRIDGE_HF_DROP(brhf_arp_pullup_failed,
7285 __func__, __LINE__);
7286 goto done;
7287 }
7288 m = *data;
7289
7290 /*
7291 * Restrict Ethernet protocols to ARP and IP/IPv6
7292 */
7293 eh = mtod(m, struct ether_header *);
7294 ea = (struct ether_arp *)(eh + 1);
7295 if (ea->arp_hrd != HTONS_ARPHRD_ETHER) {
7296 BRIDGE_HF_DROP(brhf_arp_bad_hw_type,
7297 __func__, __LINE__);
7298 goto done;
7299 }
7300 if (ea->arp_pro != HTONS_ETHERTYPE_IP) {
7301 BRIDGE_HF_DROP(brhf_arp_bad_pro_type,
7302 __func__, __LINE__);
7303 goto done;
7304 }
7305 /*
7306 * Verify the address lengths are correct
7307 */
7308 if (ea->arp_hln != ETHER_ADDR_LEN) {
7309 BRIDGE_HF_DROP(brhf_arp_bad_hw_len, __func__, __LINE__);
7310 goto done;
7311 }
7312 if (ea->arp_pln != sizeof(struct in_addr)) {
7313 BRIDGE_HF_DROP(brhf_arp_bad_pro_len,
7314 __func__, __LINE__);
7315 goto done;
7316 }
7317 /*
7318 * Allow only ARP request or ARP reply
7319 */
7320 if (ea->arp_op != HTONS_ARPOP_REQUEST &&
7321 ea->arp_op != HTONS_ARPOP_REPLY) {
7322 BRIDGE_HF_DROP(brhf_arp_bad_op, __func__, __LINE__);
7323 goto done;
7324 }
7325 if ((bif->bif_flags & BIFF_HF_HWSRC) != 0) {
7326 /*
7327 * Verify source hardware address matches
7328 */
7329 if (bcmp(ea->arp_sha, bif->bif_hf_hwsrc,
7330 ETHER_ADDR_LEN) != 0) {
7331 BRIDGE_HF_DROP(brhf_arp_bad_sha, __func__, __LINE__);
7332 goto done;
7333 }
7334 }
7335 if ((bif->bif_flags & BIFF_HF_IPSRC) != 0) {
7336 /*
7337 * Verify source protocol address:
7338 * May be null for an ARP probe
7339 */
7340 if (bcmp(ea->arp_spa, &bif->bif_hf_ipsrc.s_addr,
7341 sizeof(struct in_addr)) != 0 &&
7342 bcmp(ea->arp_spa, &inaddr_any,
7343 sizeof(struct in_addr)) != 0) {
7344 BRIDGE_HF_DROP(brhf_arp_bad_spa, __func__, __LINE__);
7345 goto done;
7346 }
7347 }
7348 bridge_hostfilter_stats.brhf_arp_ok += 1;
7349 error = 0;
7350 done:
7351 return error;
7352 }
7353
7354 /*
7355 * MAC NAT
7356 */
7357
7358 static errno_t
7359 bridge_mac_nat_enable(struct bridge_softc *sc, struct bridge_iflist *bif)
7360 {
7361 errno_t error = 0;
7362
7363 BRIDGE_LOCK_ASSERT_HELD(sc);
7364
7365 if (IFNET_IS_VMNET(bif->bif_ifp)) {
7366 error = EINVAL;
7367 goto done;
7368 }
7369 if (sc->sc_mac_nat_bif != NULL) {
7370 if (sc->sc_mac_nat_bif != bif) {
7371 error = EBUSY;
7372 }
7373 goto done;
7374 }
7375 sc->sc_mac_nat_bif = bif;
7376 bif->bif_ifflags |= IFBIF_MAC_NAT;
7377 bridge_mac_nat_populate_entries(sc);
7378
7379 done:
7380 return error;
7381 }
7382
7383 static void
7384 bridge_mac_nat_disable(struct bridge_softc *sc)
7385 {
7386 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7387
7388 assert(mac_nat_bif != NULL);
7389 bridge_mac_nat_flush_entries(sc, mac_nat_bif);
7390 mac_nat_bif->bif_ifflags &= ~IFBIF_MAC_NAT;
7391 sc->sc_mac_nat_bif = NULL;
7392 return;
7393 }
7394
7395 static void
7396 mac_nat_entry_print2(struct mac_nat_entry *mne,
7397 const char ifname[IFNAMSIZ], const char *msg1, const char *msg2)
7398 {
7399 int af;
7400 char etopbuf[24];
7401 char ntopbuf[MAX_IPv6_STR_LEN];
7402 const char *space;
7403
7404 af = ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) ? AF_INET6 : AF_INET;
7405 ether_ntop(etopbuf, sizeof(etopbuf), mne->mne_mac);
7406 (void)inet_ntop(af, &mne->mne_u, ntopbuf, sizeof(ntopbuf));
7407 if (msg2 == NULL) {
7408 msg2 = "";
7409 space = "";
7410 } else {
7411 space = " ";
7412 }
7413 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7414 "%.*s %s%s%s %p (%s, %s, %s)", IFNAMSIZ, ifname, msg1, space, msg2, mne,
7415 mne->mne_bif->bif_ifp->if_xname, ntopbuf, etopbuf);
7416 }
7417
7418 static void
7419 mac_nat_entry_print(struct mac_nat_entry *mne,
7420 const char ifname[IFNAMSIZ], const char *msg)
7421 {
7422 mac_nat_entry_print2(mne, ifname, msg, NULL);
7423 }
7424
7425 static struct mac_nat_entry *
7426 bridge_lookup_mac_nat_entry_ipv4(const struct bridge_softc *sc, const struct in_addr *ip)
7427 {
7428 struct mac_nat_entry *mne;
7429 struct mac_nat_entry *ret_mne = NULL;
7430
7431 LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
7432 if (mne->mne_ip.s_addr == ip->s_addr) {
7433 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7434 mac_nat_entry_print(mne, sc->sc_if_xname,
7435 "found");
7436 }
7437 ret_mne = mne;
7438 break;
7439 }
7440 }
7441
7442 return ret_mne;
7443 }
7444
7445 static struct mac_nat_entry *
7446 bridge_lookup_mac_nat_entry_ipv6(const struct bridge_softc *sc, const struct in6_addr *ip6)
7447 {
7448 struct mac_nat_entry *mne;
7449 struct mac_nat_entry *ret_mne = NULL;
7450
7451 LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
7452 if (IN6_ARE_ADDR_EQUAL(&mne->mne_ip6, ip6)) {
7453 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7454 mac_nat_entry_print(mne, sc->sc_if_xname,
7455 "found");
7456 }
7457 ret_mne = mne;
7458 break;
7459 }
7460 }
7461
7462 return ret_mne;
7463 }
7464
7465 static void
7466 bridge_destroy_mac_nat_entry(struct bridge_softc *sc,
7467 struct mac_nat_entry *mne, const char *reason)
7468 {
7469 LIST_REMOVE(mne, mne_list);
7470 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7471 mac_nat_entry_print(mne, sc->sc_if_xname, reason);
7472 }
7473 zfree(bridge_mne_pool, mne);
7474 sc->sc_mne_count--;
7475 }
7476
7477 static struct mac_nat_entry *
7478 bridge_create_mac_nat_entry_common(struct bridge_softc *sc,
7479 struct bridge_iflist *bif, const char eaddr[ETHER_ADDR_LEN])
7480 {
7481 struct mac_nat_entry *mne;
7482
7483 if (sc->sc_mne_count >= sc->sc_mne_max) {
7484 sc->sc_mne_allocation_failures++;
7485 return NULL;
7486 }
7487
7488 mne = zalloc_noblock(bridge_mne_pool);
7489 if (mne == NULL) {
7490 sc->sc_mne_allocation_failures++;
7491 return NULL;
7492 }
7493
7494 sc->sc_mne_count++;
7495 bzero(mne, sizeof(*mne));
7496 bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
7497
7498 mne->mne_bif = bif;
7499 mne->mne_expire = (unsigned long)net_uptime() + sc->sc_brttimeout;
7500
7501 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7502 mac_nat_entry_print(mne, sc->sc_if_xname, "created");
7503 }
7504
7505 return mne;
7506 }
7507
7508 static struct mac_nat_entry *
7509 bridge_create_mac_nat_entry_ipv4(struct bridge_softc *sc,
7510 struct bridge_iflist *bif, const struct in_addr *ip, const char eaddr[ETHER_ADDR_LEN])
7511 {
7512 struct mac_nat_entry *mne;
7513
7514 mne = bridge_create_mac_nat_entry_common(sc, bif, eaddr);
7515 if (mne == NULL) {
7516 return NULL;
7517 }
7518
7519 bcopy(ip, &mne->mne_ip, sizeof(mne->mne_ip));
7520 LIST_INSERT_HEAD(&sc->sc_mne_list, mne, mne_list);
7521
7522 return mne;
7523 }
7524
7525 static struct mac_nat_entry *
7526 bridge_create_mac_nat_entry_ipv6(struct bridge_softc *sc,
7527 struct bridge_iflist *bif, const struct in6_addr *ip6, const char eaddr[ETHER_ADDR_LEN])
7528 {
7529 struct mac_nat_entry *mne;
7530
7531 mne = bridge_create_mac_nat_entry_common(sc, bif, eaddr);
7532 if (mne == NULL) {
7533 return NULL;
7534 }
7535
7536 bcopy(ip6, &mne->mne_ip6, sizeof(mne->mne_ip6));
7537 mne->mne_flags |= MNE_FLAGS_IPV6;
7538 LIST_INSERT_HEAD(&sc->sc_mne_list_v6, mne, mne_list);
7539
7540 return mne;
7541 }
7542
7543 static struct mac_nat_entry *
7544 bridge_update_mac_nat_entry_common(struct bridge_softc *sc, struct bridge_iflist *bif,
7545 struct mac_nat_entry *mne, const char eaddr[ETHER_ADDR_LEN])
7546 {
7547 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7548
7549 if (mne->mne_bif == mac_nat_bif) {
7550 /* the MAC NAT interface takes precedence */
7551 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7552 if (mne->mne_bif != bif) {
7553 mac_nat_entry_print2(mne,
7554 sc->sc_if_xname, "reject",
7555 bif->bif_ifp->if_xname);
7556 }
7557 }
7558 } else if (mne->mne_bif != bif) {
7559 const char *__null_terminated old_if = mne->mne_bif->bif_ifp->if_xname;
7560
7561 mne->mne_bif = bif;
7562 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7563 mac_nat_entry_print2(mne,
7564 sc->sc_if_xname, "replaced",
7565 old_if);
7566 }
7567 bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
7568 }
7569
7570 mne->mne_expire = (unsigned long)net_uptime() + sc->sc_brttimeout;
7571
7572 return mne;
7573 }
7574
7575 static struct mac_nat_entry *
7576 bridge_update_mac_nat_entry_ipv4(struct bridge_softc *sc,
7577 struct bridge_iflist *bif, struct in_addr *ip, const char eaddr[ETHER_ADDR_LEN])
7578 {
7579 struct mac_nat_entry *mne;
7580
7581 mne = bridge_lookup_mac_nat_entry_ipv4(sc, ip);
7582 if (mne != NULL) {
7583 return bridge_update_mac_nat_entry_common(sc, bif, mne, eaddr);
7584 }
7585
7586 mne = bridge_create_mac_nat_entry_ipv4(sc, bif, ip, eaddr);
7587 return mne;
7588 }
7589
7590 static struct mac_nat_entry *
7591 bridge_update_mac_nat_entry_ipv6(struct bridge_softc *sc,
7592 struct bridge_iflist *bif, struct in6_addr *ip6, const char eaddr[ETHER_ADDR_LEN])
7593 {
7594 struct mac_nat_entry *mne;
7595
7596 mne = bridge_lookup_mac_nat_entry_ipv6(sc, ip6);
7597 if (mne != NULL) {
7598 return bridge_update_mac_nat_entry_common(sc, bif, mne, eaddr);
7599 }
7600
7601 mne = bridge_create_mac_nat_entry_ipv6(sc, bif, ip6, eaddr);
7602 return mne;
7603 }
7604
7605 static void
7606 bridge_mac_nat_flush_entries_common(struct bridge_softc *sc,
7607 struct mac_nat_entry_list *list, struct bridge_iflist *bif)
7608 {
7609 struct mac_nat_entry *mne;
7610 struct mac_nat_entry *tmne;
7611
7612 LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
7613 if (bif != NULL && mne->mne_bif != bif) {
7614 continue;
7615 }
7616 bridge_destroy_mac_nat_entry(sc, mne, "flushed");
7617 }
7618 }
7619
7620 /*
7621 * bridge_mac_nat_flush_entries:
7622 *
7623 * Flush MAC NAT entries for the specified member. Flush all entries if
7624 * the member is the one that requires MAC NAT, otherwise just flush the
7625 * ones for the specified member.
7626 */
7627 static void
7628 bridge_mac_nat_flush_entries(struct bridge_softc *sc, struct bridge_iflist * bif)
7629 {
7630 struct bridge_iflist *flush_bif;
7631
7632 flush_bif = (bif == sc->sc_mac_nat_bif) ? NULL : bif;
7633 bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list, flush_bif);
7634 bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list_v6, flush_bif);
7635 }
7636
7637 static void
7638 bridge_mac_nat_populate_entries(struct bridge_softc *sc)
7639 {
7640 errno_t error;
7641 ifnet_t ifp;
7642 uint16_t addresses_count = 0;
7643 ifaddr_t * __counted_by(addresses_count) list;
7644 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7645
7646 assert(mac_nat_bif != NULL);
7647 ifp = mac_nat_bif->bif_ifp;
7648 error = ifnet_get_address_list_family_with_count(ifp, &list, &addresses_count, 0);
7649 if (error != 0) {
7650 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7651 "ifnet_get_address_list(%s) failed %d",
7652 ifp->if_xname, error);
7653 return;
7654 }
7655
7656 for (uint16_t i = 0; i < addresses_count; ++i) {
7657 sa_family_t af;
7658
7659 af = ifaddr_address_family(list[i]);
7660 switch (af) {
7661 case AF_INET: {
7662 struct sockaddr_in sin;
7663
7664 error = ifaddr_address(list[i], (struct sockaddr *)&sin, sizeof(sin));
7665 if (error != 0) {
7666 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7667 "ifaddr_address failed %d",
7668 error);
7669 break;
7670 }
7671
7672 bridge_create_mac_nat_entry_ipv4(sc, mac_nat_bif, &sin.sin_addr, IF_LLADDR(ifp));
7673 break;
7674 }
7675
7676 case AF_INET6: {
7677 struct sockaddr_in6 sin6;
7678
7679 error = ifaddr_address(list[i], (struct sockaddr *)&sin6, sizeof(sin6));
7680 if (error != 0) {
7681 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7682 "ifaddr_address failed %d",
7683 error);
7684 break;
7685 }
7686
7687 if (IN6_IS_ADDR_LINKLOCAL(&sin6.sin6_addr)) {
7688 /* remove scope ID */
7689 sin6.sin6_addr.s6_addr16[1] = 0;
7690 }
7691
7692 bridge_create_mac_nat_entry_ipv6(sc, mac_nat_bif, &sin6.sin6_addr, IF_LLADDR(ifp));
7693 break;
7694 }
7695
7696 default:
7697 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7698 "ifaddr_address_family unknown %d",
7699 af);
7700 break;
7701 }
7702 }
7703
7704 ifnet_address_list_free_counted_by(list, addresses_count);
7705 return;
7706 }
7707
7708 static void
7709 bridge_mac_nat_age_entries_common(struct bridge_softc *sc,
7710 struct mac_nat_entry_list *list, unsigned long now)
7711 {
7712 struct mac_nat_entry *mne;
7713 struct mac_nat_entry *tmne;
7714
7715 LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
7716 if (now >= mne->mne_expire) {
7717 bridge_destroy_mac_nat_entry(sc, mne, "aged out");
7718 }
7719 }
7720 }
7721
7722 static void
7723 bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long now)
7724 {
7725 if (sc->sc_mac_nat_bif == NULL) {
7726 return;
7727 }
7728 bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list, now);
7729 bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list_v6, now);
7730 }
7731
7732 static const char *
7733 get_in_out_string(boolean_t is_output)
7734 {
7735 return (const char * __null_terminated)(is_output ? "OUT" : "IN");
7736 }
7737
7738 /*
7739 * is_valid_arp_packet:
7740 * Verify that this is a valid ARP packet.
7741 *
7742 * Returns TRUE if the packet is valid, FALSE otherwise.
7743 */
7744 static boolean_t
7745 is_valid_arp_packet(mbuf_t *data, bool is_output,
7746 struct ether_header **eh_p, struct ether_arp **ea_p)
7747 {
7748 struct ether_arp *ea;
7749 struct ether_header *eh;
7750 size_t minlen = sizeof(struct ether_header) + sizeof(struct ether_arp);
7751 boolean_t is_valid = FALSE;
7752 int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
7753
7754 if (mbuf_pkthdr_len(*data) < minlen) {
7755 BRIDGE_LOG(LOG_DEBUG, flags,
7756 "ARP %s short frame %lu < %lu",
7757 get_in_out_string(is_output),
7758 mbuf_pkthdr_len(*data), minlen);
7759 goto done;
7760 }
7761 if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
7762 BRIDGE_LOG(LOG_DEBUG, flags,
7763 "ARP %s size %lu mbuf_pullup fail",
7764 get_in_out_string(is_output),
7765 minlen);
7766 *data = NULL;
7767 goto done;
7768 }
7769
7770 /* validate ARP packet */
7771 eh = mtod(*data, struct ether_header *);
7772 ea = (struct ether_arp *)(eh + 1);
7773 if (ea->arp_hrd != HTONS_ARPHRD_ETHER) {
7774 BRIDGE_LOG(LOG_DEBUG, flags,
7775 "ARP %s htype not ethernet",
7776 get_in_out_string(is_output));
7777 goto done;
7778 }
7779 if (ea->arp_hln != ETHER_ADDR_LEN) {
7780 BRIDGE_LOG(LOG_DEBUG, flags,
7781 "ARP %s hlen not ethernet",
7782 get_in_out_string(is_output));
7783 goto done;
7784 }
7785 if (ea->arp_pro != HTONS_ETHERTYPE_IP) {
7786 BRIDGE_LOG(LOG_DEBUG, flags,
7787 "ARP %s ptype not IP",
7788 get_in_out_string(is_output));
7789 goto done;
7790 }
7791 if (ea->arp_pln != sizeof(struct in_addr)) {
7792 BRIDGE_LOG(LOG_DEBUG, flags,
7793 "ARP %s plen not IP",
7794 get_in_out_string(is_output));
7795 goto done;
7796 }
7797 is_valid = TRUE;
7798 *ea_p = ea;
7799 *eh_p = eh;
7800 done:
7801 return is_valid;
7802 }
7803
7804 static struct mac_nat_entry *
7805 bridge_mac_nat_arp_input(struct bridge_softc *sc, mbuf_t *data)
7806 {
7807 struct ether_arp * __single ea;
7808 struct ether_header * __single eh;
7809 struct mac_nat_entry *mne = NULL;
7810 u_short op;
7811 struct in_addr tpa;
7812
7813 if (!is_valid_arp_packet(data, FALSE, &eh, &ea)) {
7814 goto done;
7815 }
7816 op = ea->arp_op;
7817 switch (op) {
7818 case HTONS_ARPOP_REQUEST:
7819 case HTONS_ARPOP_REPLY:
7820 /* only care about REQUEST and REPLY */
7821 break;
7822 default:
7823 goto done;
7824 }
7825
7826 /* check the target IP address for a NAT entry */
7827 bcopy(ea->arp_tpa, &tpa, sizeof(tpa));
7828 if (tpa.s_addr != 0) {
7829 mne = bridge_lookup_mac_nat_entry_ipv4(sc, &tpa);
7830 }
7831 if (mne != NULL) {
7832 if (op == HTONS_ARPOP_REPLY) {
7833 /* translate the MAC address */
7834 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7835 char mac_src[24];
7836 char mac_dst[24];
7837
7838 ether_ntop(mac_src, sizeof(mac_src),
7839 ea->arp_tha);
7840 ether_ntop(mac_dst, sizeof(mac_dst),
7841 mne->mne_mac);
7842 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7843 "%s %s ARP %s -> %s",
7844 sc->sc_if_xname,
7845 mne->mne_bif->bif_ifp->if_xname,
7846 mac_src, mac_dst);
7847 }
7848 bcopy(mne->mne_mac, ea->arp_tha, sizeof(ea->arp_tha));
7849 }
7850 } else {
7851 /* handle conflicting ARP (sender matches mne) */
7852 struct in_addr spa;
7853
7854 bcopy(ea->arp_spa, &spa, sizeof(spa));
7855 if (spa.s_addr != 0 && spa.s_addr != tpa.s_addr) {
7856 /* check the source IP for a NAT entry */
7857 mne = bridge_lookup_mac_nat_entry_ipv4(sc, &spa);
7858 }
7859 }
7860
7861 done:
7862 return mne;
7863 }
7864
7865 static boolean_t
7866 bridge_mac_nat_arp_output(struct bridge_softc *sc,
7867 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
7868 {
7869 struct ether_arp * __single ea;
7870 struct ether_header * __single eh;
7871 struct in_addr ip;
7872 struct mac_nat_entry *mne = NULL;
7873 u_short op;
7874 boolean_t translate = FALSE;
7875
7876 if (!is_valid_arp_packet(data, TRUE, &eh, &ea)) {
7877 goto done;
7878 }
7879 op = ea->arp_op;
7880 switch (op) {
7881 case HTONS_ARPOP_REQUEST:
7882 case HTONS_ARPOP_REPLY:
7883 /* only care about REQUEST and REPLY */
7884 break;
7885 default:
7886 goto done;
7887 }
7888
7889 bcopy(ea->arp_spa, &ip, sizeof(ip));
7890 if (ip.s_addr == 0) {
7891 goto done;
7892 }
7893 /* XXX validate IP address: no multicast/broadcast */
7894 mne = bridge_update_mac_nat_entry_ipv4(sc, bif, &ip,
7895 (const char *)ea->arp_sha);
7896 if (mnr != NULL && mne != NULL) {
7897 /* record the offset to do the replacement */
7898 translate = TRUE;
7899 mnr->mnr_arp_offset = (char *)ea->arp_sha - (char *)eh;
7900 }
7901
7902 done:
7903 return translate;
7904 }
7905
7906 #define ETHER_IPV4_HEADER_LEN (sizeof(struct ether_header) + \
7907 + sizeof(struct ip))
7908 static uint8_t * __indexable
7909 get_ether_ip_header_ptr(mbuf_t *data, boolean_t is_output)
7910 {
7911 uint8_t *header = NULL;
7912 int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
7913 size_t minlen = ETHER_IPV4_HEADER_LEN;
7914
7915 if (mbuf_pkthdr_len(*data) < minlen) {
7916 BRIDGE_LOG(LOG_DEBUG, flags,
7917 "IP %s short frame %lu < %lu",
7918 get_in_out_string(is_output),
7919 mbuf_pkthdr_len(*data), minlen);
7920 goto done;
7921 }
7922 if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
7923 BRIDGE_LOG(LOG_DEBUG, flags,
7924 "IP %s size %lu mbuf_pullup fail",
7925 get_in_out_string(is_output),
7926 minlen);
7927 *data = NULL;
7928 goto done;
7929 }
7930 header = mtod(*data, uint8_t *);
7931 done:
7932 return header;
7933 }
7934
7935 static struct mac_nat_entry *
7936 bridge_mac_nat_ip_input(struct bridge_softc *sc, mbuf_t *data)
7937 {
7938 struct in_addr dst;
7939 uint8_t *header;
7940 struct ip *iphdr;
7941 struct mac_nat_entry *mne = NULL;
7942
7943 header = get_ether_ip_header_ptr(data, FALSE);
7944 if (header == NULL) {
7945 goto done;
7946 }
7947 iphdr = (struct ip *)(void *)(header + sizeof(struct ether_header));
7948 bcopy(&iphdr->ip_dst, &dst, sizeof(dst));
7949 /* XXX validate IP address */
7950 if (dst.s_addr == 0) {
7951 goto done;
7952 }
7953 mne = bridge_lookup_mac_nat_entry_ipv4(sc, &dst);
7954 done:
7955 return mne;
7956 }
7957
7958 static void
7959 bridge_mac_nat_udp_output(struct bridge_softc *sc,
7960 struct bridge_iflist *bif, mbuf_t m,
7961 uint8_t ip_header_len, struct mac_nat_record *mnr)
7962 {
7963 uint16_t dp_flags;
7964 errno_t error;
7965 size_t offset;
7966 struct udphdr udphdr;
7967
7968 /* copy the UDP header */
7969 offset = sizeof(struct ether_header) + ip_header_len;
7970 error = mbuf_copydata(m, offset, sizeof(struct udphdr), &udphdr);
7971 if (error != 0) {
7972 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7973 "mbuf_copydata udphdr failed %d",
7974 error);
7975 return;
7976 }
7977 if (udphdr.uh_sport != HTONS_IPPORT_BOOTPC ||
7978 udphdr.uh_dport != HTONS_IPPORT_BOOTPS) {
7979 /* not a BOOTP/DHCP packet */
7980 return;
7981 }
7982 /* check whether the broadcast bit is already set */
7983 offset += sizeof(struct udphdr) + offsetof(struct dhcp, dp_flags);
7984 error = mbuf_copydata(m, offset, sizeof(dp_flags), &dp_flags);
7985 if (error != 0) {
7986 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7987 "mbuf_copydata dp_flags failed %d",
7988 error);
7989 return;
7990 }
7991 if ((dp_flags & HTONS_DHCP_FLAGS_BROADCAST) != 0) {
7992 /* it's already set, nothing to do */
7993 return;
7994 }
7995 /* broadcast bit needs to be set */
7996 mnr->mnr_ip_dhcp_flags = dp_flags | htons(DHCP_FLAGS_BROADCAST);
7997 mnr->mnr_ip_header_len = ip_header_len;
7998 if (udphdr.uh_sum != 0) {
7999 uint16_t delta;
8000
8001 /* adjust checksum to take modified dp_flags into account */
8002 delta = dp_flags - mnr->mnr_ip_dhcp_flags;
8003 mnr->mnr_ip_udp_csum = udphdr.uh_sum + delta;
8004 }
8005 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8006 "%s %s DHCP dp_flags 0x%x UDP cksum 0x%x",
8007 sc->sc_if_xname,
8008 bif->bif_ifp->if_xname,
8009 ntohs(mnr->mnr_ip_dhcp_flags),
8010 ntohs(mnr->mnr_ip_udp_csum));
8011 return;
8012 }
8013
8014 static boolean_t
8015 bridge_mac_nat_ip_output(struct bridge_softc *sc,
8016 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8017 {
8018 #pragma unused(mnr)
8019 uint8_t *header;
8020 struct ether_header *eh;
8021 struct in_addr ip;
8022 struct ip *iphdr;
8023 uint8_t ip_header_len;
8024 struct mac_nat_entry *mne = NULL;
8025 boolean_t translate = FALSE;
8026
8027 header = get_ether_ip_header_ptr(data, TRUE);
8028 if (header == NULL) {
8029 goto done;
8030 }
8031
8032 eh = (struct ether_header *)header;
8033 iphdr = (struct ip *)(header + sizeof(*eh));
8034 ip_header_len = IP_VHL_HL(iphdr->ip_vhl) << 2;
8035 if (ip_header_len < sizeof(ip)) {
8036 /* bogus IP header */
8037 goto done;
8038 }
8039 bcopy(&iphdr->ip_src, &ip, sizeof(ip));
8040 /* XXX validate the source address */
8041 if (ip.s_addr != 0) {
8042 mne = bridge_update_mac_nat_entry_ipv4(sc, bif, &ip,
8043 (const char *)eh->ether_shost);
8044 }
8045 if (mnr != NULL) {
8046 if (ip.s_addr == 0 && iphdr->ip_p == IPPROTO_UDP) {
8047 /* handle DHCP must broadcast */
8048 bridge_mac_nat_udp_output(sc, bif, *data,
8049 ip_header_len, mnr);
8050 }
8051 translate = TRUE;
8052 }
8053 done:
8054 return translate;
8055 }
8056
8057 #define ETHER_IPV6_HEADER_LEN (sizeof(struct ether_header) + \
8058 + sizeof(struct ip6_hdr))
8059 static uint8_t * __indexable
8060 get_ether_ipv6_header_ptr(mbuf_t *data, size_t plen, boolean_t is_output)
8061 {
8062 uint8_t *header = NULL;
8063 int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
8064 size_t minlen = ETHER_IPV6_HEADER_LEN + plen;
8065
8066 if (mbuf_pkthdr_len(*data) < minlen) {
8067 BRIDGE_LOG(LOG_DEBUG, flags,
8068 "IP %s short frame %lu < %lu",
8069 get_in_out_string(is_output),
8070 mbuf_pkthdr_len(*data), minlen);
8071 goto done;
8072 }
8073 if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
8074 BRIDGE_LOG(LOG_DEBUG, flags,
8075 "IP %s size %lu mbuf_pullup fail",
8076 get_in_out_string(is_output),
8077 minlen);
8078 *data = NULL;
8079 goto done;
8080 }
8081 header = mtod(*data, uint8_t *);
8082 done:
8083 return header;
8084 }
8085
8086 #include <netinet/icmp6.h>
8087 #include <netinet6/nd6.h>
8088
8089 #define ETHER_ND_LLADDR_LEN (ETHER_ADDR_LEN + sizeof(struct nd_opt_hdr))
8090
8091 static void
8092 bridge_mac_nat_icmpv6_output(struct bridge_softc *sc,
8093 struct bridge_iflist *bif,
8094 mbuf_t *data, struct ip6_hdr *ip6h,
8095 struct in6_addr *saddrp,
8096 struct mac_nat_record *mnr)
8097 {
8098 uint8_t *header;
8099 struct ether_header *eh;
8100 struct icmp6_hdr *icmp6;
8101 uint8_t icmp6_type;
8102 uint32_t icmp6len;
8103 int lladdrlen = 0;
8104 char *lladdr = NULL;
8105 unsigned int off = sizeof(*ip6h);
8106
8107 icmp6len = (u_int32_t)ntohs(ip6h->ip6_plen);
8108 if (icmp6len < sizeof(*icmp6)) {
8109 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8110 "short IPv6 payload length %d < %lu",
8111 icmp6len, sizeof(*icmp6));
8112 return;
8113 }
8114
8115 /* pullup IP6 header + ICMPv6 header */
8116 header = get_ether_ipv6_header_ptr(data, sizeof(*icmp6), TRUE);
8117 if (header == NULL) {
8118 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8119 "failed to pullup icmp6 header");
8120 return;
8121 }
8122 eh = (struct ether_header *)header;
8123 ip6h = (struct ip6_hdr *)(header + sizeof(*eh));
8124 icmp6 = (struct icmp6_hdr *)(header + sizeof(*eh) + off);
8125 icmp6_type = icmp6->icmp6_type;
8126 switch (icmp6_type) {
8127 case ND_NEIGHBOR_SOLICIT:
8128 case ND_NEIGHBOR_ADVERT:
8129 case ND_ROUTER_ADVERT:
8130 case ND_ROUTER_SOLICIT:
8131 break;
8132 default:
8133 return;
8134 }
8135
8136 /* pullup IP6 header + payload */
8137 header = get_ether_ipv6_header_ptr(data, icmp6len, TRUE);
8138 if (header == NULL) {
8139 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8140 "failed to pullup icmp6 + payload");
8141 return;
8142 }
8143 eh = (struct ether_header *)header;
8144 ip6h = (struct ip6_hdr *)(header + sizeof(*eh));
8145 icmp6 = (struct icmp6_hdr *)(header + sizeof(*eh) + off);
8146
8147 switch (icmp6_type) {
8148 case ND_NEIGHBOR_SOLICIT: {
8149 struct nd_neighbor_solicit *nd_ns;
8150 union nd_opts ndopts;
8151 boolean_t is_dad_probe;
8152 struct in6_addr taddr;
8153
8154 if (icmp6len < sizeof(*nd_ns)) {
8155 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8156 "short nd_ns %d < %lu",
8157 icmp6len, sizeof(*nd_ns));
8158 return;
8159 }
8160
8161 nd_ns = (struct nd_neighbor_solicit *)(void *)icmp6;
8162 bcopy(&nd_ns->nd_ns_target, &taddr, sizeof(taddr));
8163 if (IN6_IS_ADDR_MULTICAST(&taddr) ||
8164 IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
8165 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8166 "invalid target ignored");
8167 return;
8168 }
8169
8170 /* parse options */
8171 nd6_option_init(nd_ns + 1, icmp6len - sizeof(*nd_ns), &ndopts);
8172 if (nd6_options(&ndopts) < 0) {
8173 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8174 "invalid ND6 NS option");
8175 return;
8176 }
8177 if (ndopts.nd_opts_src_lladdr != NULL) {
8178 ND_OPT_LLADDR(ndopts.nd_opts_src_lladdr, nd_opt_len,
8179 lladdr, lladdrlen);
8180 }
8181 is_dad_probe = IN6_IS_ADDR_UNSPECIFIED(saddrp);
8182 if (lladdr != NULL) {
8183 if (is_dad_probe) {
8184 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8185 "bad ND6 DAD packet");
8186 return;
8187 }
8188 if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8189 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8190 "source lladdrlen %d != %lu",
8191 lladdrlen, ETHER_ND_LLADDR_LEN);
8192 return;
8193 }
8194 }
8195 if (is_dad_probe) {
8196 /* node is trying use taddr, create an mne for taddr */
8197 *saddrp = taddr;
8198 }
8199 break;
8200 }
8201 case ND_NEIGHBOR_ADVERT: {
8202 struct nd_neighbor_advert *nd_na;
8203 union nd_opts ndopts;
8204 struct in6_addr taddr;
8205
8206
8207 nd_na = (struct nd_neighbor_advert *)(void *)icmp6;
8208
8209 if (icmp6len < sizeof(*nd_na)) {
8210 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8211 "short nd_na %d < %lu",
8212 icmp6len, sizeof(*nd_na));
8213 return;
8214 }
8215
8216 bcopy(&nd_na->nd_na_target, &taddr, sizeof(taddr));
8217 if (IN6_IS_ADDR_MULTICAST(&taddr) ||
8218 IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
8219 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8220 "invalid target ignored");
8221 return;
8222 }
8223
8224 /* parse options */
8225 nd6_option_init(nd_na + 1, icmp6len - sizeof(*nd_na), &ndopts);
8226 if (nd6_options(&ndopts) < 0) {
8227 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8228 "invalid ND6 NA option");
8229 return;
8230 }
8231 if (ndopts.nd_opts_tgt_lladdr == NULL) {
8232 /* target linklayer, nothing to do */
8233 return;
8234 }
8235
8236 ND_OPT_LLADDR(ndopts.nd_opts_tgt_lladdr, nd_opt_len, lladdr, lladdrlen);
8237 if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8238 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8239 "target lladdrlen %d != %lu",
8240 lladdrlen, ETHER_ND_LLADDR_LEN);
8241 return;
8242 }
8243 break;
8244 }
8245 case ND_ROUTER_ADVERT:
8246 case ND_ROUTER_SOLICIT: {
8247 union nd_opts ndopts;
8248 uint32_t type_length;
8249 const char *description;
8250
8251 if (icmp6_type == ND_ROUTER_ADVERT) {
8252 type_length = sizeof(struct nd_router_advert);
8253 description = "RA";
8254 } else {
8255 type_length = sizeof(struct nd_router_solicit);
8256 description = "RS";
8257 }
8258 if (icmp6len < type_length) {
8259 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8260 "short ND6 %s %d < %d",
8261 description, icmp6len, type_length);
8262 return;
8263 }
8264
8265 /* parse options */
8266 nd6_option_init(((uint8_t *)icmp6) + type_length,
8267 icmp6len - type_length, &ndopts);
8268 if (nd6_options(&ndopts) < 0) {
8269 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8270 "invalid ND6 %s option", description);
8271 return;
8272 }
8273 if (ndopts.nd_opts_src_lladdr != NULL) {
8274 ND_OPT_LLADDR(ndopts.nd_opts_src_lladdr, nd_opt_len, lladdr, lladdrlen);
8275
8276 if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8277 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8278 "source lladdrlen %d != %lu",
8279 lladdrlen, ETHER_ND_LLADDR_LEN);
8280 return;
8281 }
8282 }
8283 break;
8284 }
8285 default:
8286 break;
8287 }
8288
8289 if (lladdr != NULL) {
8290 mnr->mnr_ip6_lladdr_offset = (uint16_t)
8291 ((uintptr_t)lladdr - (uintptr_t)eh);
8292 mnr->mnr_ip6_icmp6_len = icmp6len;
8293 mnr->mnr_ip6_icmp6_type = icmp6_type;
8294 mnr->mnr_ip6_header_len = off;
8295 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
8296 const char *str;
8297
8298 switch (mnr->mnr_ip6_icmp6_type) {
8299 case ND_ROUTER_ADVERT:
8300 str = "ROUTER ADVERT";
8301 break;
8302 case ND_ROUTER_SOLICIT:
8303 str = "ROUTER SOLICIT";
8304 break;
8305 case ND_NEIGHBOR_ADVERT:
8306 str = "NEIGHBOR ADVERT";
8307 break;
8308 case ND_NEIGHBOR_SOLICIT:
8309 str = "NEIGHBOR SOLICIT";
8310 break;
8311 default:
8312 str = "";
8313 break;
8314 }
8315 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8316 "%s %s %s ip6len %d icmp6len %d lladdr offset %d",
8317 sc->sc_if_xname, bif->bif_ifp->if_xname, str,
8318 mnr->mnr_ip6_header_len,
8319 mnr->mnr_ip6_icmp6_len, mnr->mnr_ip6_lladdr_offset);
8320 }
8321 }
8322 }
8323
8324 static struct mac_nat_entry *
8325 bridge_mac_nat_ipv6_input(struct bridge_softc *sc, mbuf_t *data)
8326 {
8327 struct in6_addr dst;
8328 uint8_t *header;
8329 struct ether_header *eh;
8330 struct ip6_hdr *ip6h;
8331 struct mac_nat_entry *mne = NULL;
8332
8333 header = get_ether_ipv6_header_ptr(data, 0, FALSE);
8334 if (header == NULL) {
8335 goto done;
8336 }
8337 eh = (struct ether_header *)header;
8338 ip6h = (struct ip6_hdr *)(header + sizeof(*eh));
8339 bcopy(&ip6h->ip6_dst, &dst, sizeof(dst));
8340 /* XXX validate IPv6 address */
8341 if (IN6_IS_ADDR_UNSPECIFIED(&dst)) {
8342 goto done;
8343 }
8344 mne = bridge_lookup_mac_nat_entry_ipv6(sc, &dst);
8345
8346 done:
8347 return mne;
8348 }
8349
8350 static boolean_t
8351 bridge_mac_nat_ipv6_output(struct bridge_softc *sc,
8352 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8353 {
8354 uint8_t *header;
8355 struct ether_header *eh;
8356 ether_addr_t ether_shost;
8357 struct ip6_hdr *ip6h;
8358 struct in6_addr saddr;
8359 boolean_t translate;
8360
8361 translate = (bif == sc->sc_mac_nat_bif) ? FALSE : TRUE;
8362 header = get_ether_ipv6_header_ptr(data, 0, TRUE);
8363 if (header == NULL) {
8364 translate = FALSE;
8365 goto done;
8366 }
8367 eh = (struct ether_header *)header;
8368 bcopy(eh->ether_shost, ðer_shost, sizeof(ether_shost));
8369 ip6h = (struct ip6_hdr *)(header + sizeof(*eh));
8370 bcopy(&ip6h->ip6_src, &saddr, sizeof(saddr));
8371 if (mnr != NULL && ip6h->ip6_nxt == IPPROTO_ICMPV6) {
8372 bridge_mac_nat_icmpv6_output(sc, bif, data, ip6h, &saddr, mnr);
8373 }
8374 if (IN6_IS_ADDR_UNSPECIFIED(&saddr)) {
8375 goto done;
8376 }
8377 (void)bridge_update_mac_nat_entry_ipv6(sc, bif, &saddr,
8378 (const char *)ether_shost.octet);
8379
8380 done:
8381 return translate;
8382 }
8383
8384 /*
8385 * Function: bridge_mac_nat_input:
8386 *
8387 * Purpose:
8388 * Process a unicast packet arriving on the external interface `external_ifp`.
8389 *
8390 * If the packet is ARP, IPv4, or IPv6, lookup the address from the packet in
8391 * the mac_nat_entry table. If an entry is found, and the interface is
8392 * not `external_ifp`, replace the destination MAC address in the
8393 * ethernet header with the corresponding internal MAC address, and return
8394 * the interface via `*dst_if`.
8395 *
8396 * Returns:
8397 * NULL if the packet was deallocated during processing.
8398 *
8399 * Otherwise, returns non-NULL packet that should:
8400 * 1) if `*dst_if` is NULL, continue on as an input packet
8401 * over `external_ifp`, OR
8402 * 2) if `*dst_if` is not NULL, be delivered as an output packet
8403 * over `*dst_if`.
8404 */
8405 static mbuf_t
8406 bridge_mac_nat_input(struct bridge_softc *sc, ifnet_t external_ifp,
8407 mbuf_t m, ifnet_t * dst_if)
8408 {
8409 struct ether_header *eh;
8410 mbuf_t m0 = m;
8411 struct mac_nat_entry *mne = NULL;
8412
8413 BRIDGE_LOCK_ASSERT_HELD(sc);
8414 *dst_if = NULL;
8415 eh = mtod(m, struct ether_header *);
8416 switch (eh->ether_type) {
8417 case HTONS_ETHERTYPE_ARP:
8418 mne = bridge_mac_nat_arp_input(sc, &m);
8419 break;
8420 case HTONS_ETHERTYPE_IP:
8421 mne = bridge_mac_nat_ip_input(sc, &m);
8422 break;
8423 case HTONS_ETHERTYPE_IPV6:
8424 mne = bridge_mac_nat_ipv6_input(sc, &m);
8425 break;
8426 default:
8427 break;
8428 }
8429 if (m != NULL & mne != NULL) {
8430 *dst_if = mne->mne_bif->bif_ifp;
8431 if (*dst_if == external_ifp) {
8432 /* receive packet for ifp */
8433 *dst_if = NULL;
8434 } else {
8435 /* replace the destination MAC with internal one */
8436 if (m != m0) {
8437 /* it may have changed */
8438 eh = mtod(m, struct ether_header *);
8439 }
8440 bcopy(mne->mne_mac, eh->ether_dhost,
8441 sizeof(eh->ether_dhost));
8442 }
8443 }
8444 return m;
8445 }
8446
8447
8448 static mblist
8449 bridge_mac_nat_input_list(struct bridge_softc *sc, ifnet_t external_ifp,
8450 mbuf_t m, mbuf_t * forward_head)
8451 {
8452 mblist forward;
8453 mbuf_t next_packet;
8454 mblist ret;
8455
8456 mblist_init(&ret);
8457 mblist_init(&forward);
8458 for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
8459 ifnet_ref_t dst_if;
8460
8461 /* take packet out of the list */
8462 next_packet = scan->m_nextpkt;
8463 scan->m_nextpkt = NULL;
8464
8465 scan = bridge_mac_nat_input(sc, external_ifp, scan, &dst_if);
8466 if (scan != NULL) {
8467 if (dst_if != NULL) {
8468 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8469 "%s MAC-NAT input translate to %s",
8470 sc->sc_if_xname, dst_if->if_xname);
8471 /* use rcvif to store the egress interface */
8472 mbuf_pkthdr_setrcvif(scan, dst_if);
8473 /* add it to the forwarding list */
8474 mblist_append(&forward, scan);
8475 } else {
8476 /* add it to the "continue on as input" list */
8477 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8478 "%s MAC-NAT input for %s",
8479 sc->sc_if_xname,
8480 external_ifp->if_xname);
8481 mblist_append(&ret, scan);
8482 }
8483 }
8484 }
8485 *forward_head = forward.head;
8486 return ret;
8487 }
8488
8489 /*
8490 * bridge_mac_nat_translate_list:
8491 * Process a list of packets destined to the MAC-NAT interface `dst_if`
8492 * from the bridge member `sbif`.
8493 *
8494 * For each packet in the list, update the MAC-NAT record, and if
8495 * translation is required, translate it.
8496 *
8497 * Returns the list of packets that should be delivered to the MAC-NAT
8498 * interface.
8499 */
8500 static mbuf_t
8501 bridge_mac_nat_translate_list(struct bridge_softc * sc,
8502 struct bridge_iflist *sbif, ifnet_t dst_if, mbuf_t m)
8503 {
8504 mbuf_t next_packet;
8505 mblist ret;
8506
8507 mblist_init(&ret);
8508 for (mbuf_ref_t scan = m; scan != NULL; scan = next_packet) {
8509 struct mac_nat_record mnr;
8510 bool translate_mac;
8511
8512 /* take packet out of the list */
8513 next_packet = scan->m_nextpkt;
8514 scan->m_nextpkt = NULL;
8515 translate_mac = bridge_mac_nat_output(sc, sbif, &scan, &mnr);
8516 if (scan != NULL) {
8517 if (translate_mac) {
8518 bridge_mac_nat_translate(&scan, &mnr,
8519 IF_LLADDR(dst_if));
8520 }
8521 if (scan != NULL) {
8522 /* add it back to the list */
8523 mblist_append(&ret, scan);
8524 }
8525 }
8526 }
8527 return ret.head;
8528 }
8529
8530 /*
8531 * bridge_mac_nat_copy_and_translate_list:
8532 * Same as bridge_mac_nat_translate_list() except that a copy of the
8533 * packet list is returned instead.
8534 *
8535 * The packet list `m` is left unaltered.
8536 */
8537 static mbuf_t
8538 bridge_mac_nat_copy_and_translate_list(struct bridge_softc * sc,
8539 struct bridge_iflist *sbif, ifnet_t dst_if, mbuf_t m)
8540 {
8541 mbuf_t next_packet;
8542 mblist ret;
8543
8544 mblist_init(&ret);
8545 for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
8546 mbuf_ref_t mc = NULL;
8547 struct mac_nat_record mnr;
8548 bool translate_mac;
8549
8550 /* take packet out of the list, make a copy, put it back */
8551 next_packet = scan->m_nextpkt;
8552 scan->m_nextpkt = NULL;
8553 mc = m_dup(scan, M_DONTWAIT);
8554 scan->m_nextpkt = next_packet;
8555 if (mc == NULL) {
8556 continue;
8557 }
8558 translate_mac = bridge_mac_nat_output(sc, sbif, &mc, &mnr);
8559 if (mc != NULL) {
8560 if (translate_mac) {
8561 bridge_mac_nat_translate(&mc, &mnr,
8562 IF_LLADDR(dst_if));
8563 }
8564 if (mc != NULL) {
8565 /* add it to the new list */
8566 mblist_append(&ret, mc);
8567 }
8568 }
8569 }
8570 return ret.head;
8571 }
8572
8573 static void
8574 bridge_mac_nat_forward_list(ifnet_t bridge_ifp, ether_type_flag_t etypef,
8575 mbuf_t m)
8576 {
8577 int count = 0;
8578 ifnet_t dst_if;
8579 mblist list;
8580 int n_lists = 0;
8581 mbuf_t next_packet;
8582
8583 mblist_init(&list);
8584 for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
8585 ifnet_t this_if;
8586
8587 next_packet = scan->m_nextpkt;
8588 this_if = mbuf_pkthdr_rcvif(scan);
8589 mbuf_pkthdr_setrcvif(scan, NULL);
8590 if (list.head == NULL) {
8591 /* start a new list */
8592 list.head = list.tail = scan;
8593 count = 1;
8594 dst_if = this_if;
8595 } else if (dst_if != this_if) {
8596 /* send up the previous chain */
8597 if (list.tail != NULL) {
8598 /* terminate the list */
8599 list.tail->m_nextpkt = NULL;
8600 }
8601 n_lists++;
8602 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8603 "(%s): sublist %u pkts %u",
8604 dst_if->if_xname, n_lists, count);
8605 bridge_enqueue(bridge_ifp, NULL,
8606 dst_if, etypef, list.head,
8607 CHECKSUM_OPERATION_CLEAR_OFFLOAD, pkt_direction_RX);
8608
8609 /* start new list */
8610 list.head = list.tail = scan;
8611 count = 1;
8612 dst_if = this_if;
8613 } else {
8614 count++;
8615 list.tail = scan;
8616 }
8617 if (next_packet == NULL) {
8618 /* last list */
8619 n_lists++;
8620 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8621 "(%s): sublist %u pkts %u",
8622 dst_if->if_xname, n_lists, count);
8623 bridge_enqueue(bridge_ifp, NULL,
8624 dst_if, etypef, list.head,
8625 CHECKSUM_OPERATION_CLEAR_OFFLOAD, pkt_direction_RX);
8626 }
8627 }
8628 return;
8629 }
8630
8631 /*
8632 * bridge_mac_nat_output:
8633 * Process a packet destined to the MAC NAT interface (sc_mac_nat_bif)
8634 * from the interface 'bif'.
8635 *
8636 * Create a mac_nat_entry containing the source IP address and MAC address
8637 * from the packet. Populate a mac_nat_record with information detailing
8638 * how to translate the packet. Translation takes place later by calling
8639 * `bridge_mac_nat_translate()`.
8640 *
8641 * If 'bif' == sc_mac_nat_bif, the stack over the MAC NAT
8642 * interface is generating an output packet. No translation is required in this
8643 * case, we just record the IP address used to prevent another bif from
8644 * claiming our IP address.
8645 *
8646 * Returns:
8647 * TRUE if the packet should be translated (*mnr updated as well),
8648 * FALSE otherwise.
8649 *
8650 * *data may be updated to point at a different mbuf chain or NULL if
8651 * the chain was deallocated during processing.
8652 */
8653
8654 static boolean_t
8655 bridge_mac_nat_output(struct bridge_softc *sc,
8656 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8657 {
8658 struct ether_header *eh;
8659 boolean_t translate = FALSE;
8660
8661 BRIDGE_LOCK_ASSERT_HELD(sc);
8662 assert(sc->sc_mac_nat_bif != NULL);
8663
8664 eh = mtod(*data, struct ether_header *);
8665 if (mnr != NULL) {
8666 bzero(mnr, sizeof(*mnr));
8667 mnr->mnr_ether_type = eh->ether_type;
8668 }
8669 switch (eh->ether_type) {
8670 case HTONS_ETHERTYPE_ARP:
8671 translate = bridge_mac_nat_arp_output(sc, bif, data, mnr);
8672 break;
8673 case HTONS_ETHERTYPE_IP:
8674 translate = bridge_mac_nat_ip_output(sc, bif, data, mnr);
8675 break;
8676 case HTONS_ETHERTYPE_IPV6:
8677 translate = bridge_mac_nat_ipv6_output(sc, bif, data, mnr);
8678 break;
8679 default:
8680 break;
8681 }
8682 return translate;
8683 }
8684
8685 static void
8686 bridge_mac_nat_arp_translate(mbuf_t *data, struct mac_nat_record *mnr,
8687 const char eaddr[ETHER_ADDR_LEN])
8688 {
8689 errno_t error;
8690
8691 if (mnr->mnr_arp_offset == 0) {
8692 return;
8693 }
8694 /* replace the source hardware address */
8695 error = mbuf_copyback(*data, mnr->mnr_arp_offset,
8696 ETHER_ADDR_LEN, eaddr,
8697 MBUF_DONTWAIT);
8698 if (error != 0) {
8699 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8700 "mbuf_copyback failed");
8701 m_freem(*data);
8702 *data = NULL;
8703 }
8704 return;
8705 }
8706
8707 static void
8708 bridge_mac_nat_ip_translate(mbuf_t *data, struct mac_nat_record *mnr)
8709 {
8710 errno_t error;
8711 size_t offset;
8712
8713 if (mnr->mnr_ip_header_len == 0) {
8714 return;
8715 }
8716 /* update the UDP checksum */
8717 offset = sizeof(struct ether_header) + mnr->mnr_ip_header_len;
8718 error = mbuf_copyback(*data, offset + offsetof(struct udphdr, uh_sum),
8719 sizeof(mnr->mnr_ip_udp_csum),
8720 &mnr->mnr_ip_udp_csum,
8721 MBUF_DONTWAIT);
8722 if (error != 0) {
8723 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8724 "mbuf_copyback uh_sum failed");
8725 m_freem(*data);
8726 *data = NULL;
8727 }
8728 /* update the DHCP must broadcast flag */
8729 offset += sizeof(struct udphdr);
8730 error = mbuf_copyback(*data, offset + offsetof(struct dhcp, dp_flags),
8731 sizeof(mnr->mnr_ip_dhcp_flags),
8732 &mnr->mnr_ip_dhcp_flags,
8733 MBUF_DONTWAIT);
8734 if (error != 0) {
8735 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8736 "mbuf_copyback dp_flags failed");
8737 m_freem(*data);
8738 *data = NULL;
8739 }
8740 }
8741
8742 static void
8743 bridge_mac_nat_ipv6_translate(mbuf_t *data, struct mac_nat_record *mnr,
8744 const char eaddr[ETHER_ADDR_LEN])
8745 {
8746 uint16_t cksum;
8747 errno_t error;
8748 mbuf_t m = *data;
8749
8750 if (mnr->mnr_ip6_header_len == 0) {
8751 return;
8752 }
8753 switch (mnr->mnr_ip6_icmp6_type) {
8754 case ND_ROUTER_ADVERT:
8755 case ND_ROUTER_SOLICIT:
8756 case ND_NEIGHBOR_SOLICIT:
8757 case ND_NEIGHBOR_ADVERT:
8758 if (mnr->mnr_ip6_lladdr_offset == 0) {
8759 /* nothing to do */
8760 return;
8761 }
8762 break;
8763 default:
8764 return;
8765 }
8766
8767 /*
8768 * replace the lladdr
8769 */
8770 error = mbuf_copyback(m, mnr->mnr_ip6_lladdr_offset,
8771 ETHER_ADDR_LEN, eaddr,
8772 MBUF_DONTWAIT);
8773 if (error != 0) {
8774 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8775 "mbuf_copyback lladdr failed");
8776 m_freem(m);
8777 *data = NULL;
8778 return;
8779 }
8780
8781 /*
8782 * recompute the icmp6 checksum
8783 */
8784
8785 /* skip past the ethernet header */
8786 _mbuf_adjust_pkthdr_and_data(m, ETHER_HDR_LEN);
8787
8788 #define CKSUM_OFFSET_ICMP6 offsetof(struct icmp6_hdr, icmp6_cksum)
8789 /* set the checksum to zero */
8790 cksum = 0;
8791 error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
8792 sizeof(cksum), &cksum, MBUF_DONTWAIT);
8793 if (error != 0) {
8794 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8795 "mbuf_copyback cksum=0 failed");
8796 m_freem(m);
8797 *data = NULL;
8798 return;
8799 }
8800 /* compute and set the new checksum */
8801 cksum = in6_cksum(m, IPPROTO_ICMPV6, mnr->mnr_ip6_header_len,
8802 mnr->mnr_ip6_icmp6_len);
8803 error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
8804 sizeof(cksum), &cksum, MBUF_DONTWAIT);
8805 if (error != 0) {
8806 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8807 "mbuf_copyback cksum failed");
8808 m_freem(m);
8809 *data = NULL;
8810 return;
8811 }
8812 /* restore the ethernet header */
8813 _mbuf_adjust_pkthdr_and_data(m, -ETHER_HDR_LEN);
8814 return;
8815 }
8816
8817 static void
8818 bridge_mac_nat_translate(mbuf_t *data, struct mac_nat_record *mnr,
8819 const char eaddr[ETHER_ADDR_LEN])
8820 {
8821 struct ether_header *eh;
8822
8823 /* replace the source ethernet address with the single MAC */
8824 eh = mtod(*data, struct ether_header *);
8825 bcopy(eaddr, eh->ether_shost, sizeof(eh->ether_shost));
8826 switch (mnr->mnr_ether_type) {
8827 case HTONS_ETHERTYPE_ARP:
8828 bridge_mac_nat_arp_translate(data, mnr, eaddr);
8829 break;
8830
8831 case HTONS_ETHERTYPE_IP:
8832 bridge_mac_nat_ip_translate(data, mnr);
8833 break;
8834
8835 case HTONS_ETHERTYPE_IPV6:
8836 bridge_mac_nat_ipv6_translate(data, mnr, eaddr);
8837 break;
8838
8839 default:
8840 break;
8841 }
8842 return;
8843 }
8844
8845 /*
8846 * bridge packet filtering
8847 */
8848
8849 /*
8850 * Perform basic checks on header size since
8851 * pfil assumes ip_input has already processed
8852 * it for it. Cut-and-pasted from ip_input.c.
8853 * Given how simple the IPv6 version is,
8854 * does the IPv4 version really need to be
8855 * this complicated?
8856 *
8857 * XXX Should we update ipstat here, or not?
8858 * XXX Right now we update ipstat but not
8859 * XXX csum_counter.
8860 */
8861 static int
8862 bridge_ip_checkbasic(struct mbuf **mp)
8863 {
8864 struct mbuf *m = *mp;
8865 struct ip *ip;
8866 int len, hlen;
8867 u_short sum;
8868
8869 if (*mp == NULL) {
8870 return -1;
8871 }
8872
8873 if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
8874 /* max_linkhdr is already rounded up to nearest 4-byte */
8875 if ((m = m_copyup(m, sizeof(struct ip),
8876 max_linkhdr)) == NULL) {
8877 /* XXXJRT new stat, please */
8878 ipstat.ips_toosmall++;
8879 goto bad;
8880 }
8881 } else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip), 0)) {
8882 if ((m = m_pullup(m, sizeof(struct ip))) == NULL) {
8883 ipstat.ips_toosmall++;
8884 goto bad;
8885 }
8886 }
8887 ip = mtod(m, struct ip *);
8888 if (ip == NULL) {
8889 goto bad;
8890 }
8891
8892 if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
8893 ipstat.ips_badvers++;
8894 goto bad;
8895 }
8896 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
8897 if (hlen < (int)sizeof(struct ip)) { /* minimum header length */
8898 ipstat.ips_badhlen++;
8899 goto bad;
8900 }
8901 if (hlen > m->m_len) {
8902 if ((m = m_pullup(m, hlen)) == 0) {
8903 ipstat.ips_badhlen++;
8904 goto bad;
8905 }
8906 ip = mtod(m, struct ip *);
8907 if (ip == NULL) {
8908 goto bad;
8909 }
8910 }
8911
8912 if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
8913 sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
8914 } else {
8915 if (hlen == sizeof(struct ip)) {
8916 sum = in_cksum_hdr(ip);
8917 } else {
8918 sum = in_cksum(m, hlen);
8919 }
8920 }
8921 if (sum) {
8922 ipstat.ips_badsum++;
8923 goto bad;
8924 }
8925
8926 /* Retrieve the packet length. */
8927 len = ntohs(ip->ip_len);
8928
8929 /*
8930 * Check for additional length bogosity
8931 */
8932 if (len < hlen) {
8933 ipstat.ips_badlen++;
8934 goto bad;
8935 }
8936
8937 /*
8938 * Check that the amount of data in the buffers
8939 * is as at least much as the IP header would have us expect.
8940 * Drop packet if shorter than we expect.
8941 */
8942 if (m->m_pkthdr.len < len) {
8943 ipstat.ips_tooshort++;
8944 goto bad;
8945 }
8946
8947 /* Checks out, proceed */
8948 *mp = m;
8949 return 0;
8950
8951 bad:
8952 *mp = m;
8953 return -1;
8954 }
8955
8956 /*
8957 * Same as above, but for IPv6.
8958 * Cut-and-pasted from ip6_input.c.
8959 * XXX Should we update ip6stat, or not?
8960 */
8961 static int
8962 bridge_ip6_checkbasic(struct mbuf **mp)
8963 {
8964 struct mbuf *m = *mp;
8965 struct ip6_hdr *ip6;
8966
8967 /*
8968 * If the IPv6 header is not aligned, slurp it up into a new
8969 * mbuf with space for link headers, in the event we forward
8970 * it. Otherwise, if it is aligned, make sure the entire base
8971 * IPv6 header is in the first mbuf of the chain.
8972 */
8973 if (IP6_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
8974 struct ifnet *inifp = m->m_pkthdr.rcvif;
8975 /* max_linkhdr is already rounded up to nearest 4-byte */
8976 if ((m = m_copyup(m, sizeof(struct ip6_hdr),
8977 max_linkhdr)) == NULL) {
8978 /* XXXJRT new stat, please */
8979 ip6stat.ip6s_toosmall++;
8980 in6_ifstat_inc(inifp, ifs6_in_hdrerr);
8981 goto bad;
8982 }
8983 } else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip6_hdr), 0)) {
8984 struct ifnet *inifp = m->m_pkthdr.rcvif;
8985 if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
8986 ip6stat.ip6s_toosmall++;
8987 in6_ifstat_inc(inifp, ifs6_in_hdrerr);
8988 goto bad;
8989 }
8990 }
8991
8992 ip6 = mtod(m, struct ip6_hdr *);
8993
8994 if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
8995 ip6stat.ip6s_badvers++;
8996 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
8997 goto bad;
8998 }
8999
9000 /* Checks out, proceed */
9001 *mp = m;
9002 return 0;
9003
9004 bad:
9005 *mp = m;
9006 return -1;
9007 }
9008
9009 /*
9010 * the PF routines expect to be called from ip_input, so we
9011 * need to do and undo here some of the same processing.
9012 *
9013 * XXX : this is heavily inspired on bridge_pfil()
9014 */
9015 static int
9016 bridge_pf(struct mbuf **mp, struct ifnet *ifp, uint32_t sc_filter_flags,
9017 bool input)
9018 {
9019 /*
9020 * XXX : mpetit : heavily inspired by bridge_pfil()
9021 */
9022
9023 int snap, error, i, hlen;
9024 struct ether_header *eh1, eh2;
9025 struct ip *ip;
9026 struct llc llc1;
9027 u_int16_t ether_type;
9028
9029 snap = 0;
9030 error = -1; /* Default error if not error == 0 */
9031
9032 if ((sc_filter_flags & IFBF_FILT_MEMBER) == 0) {
9033 return 0; /* filtering is disabled */
9034 }
9035 i = min((*mp)->m_pkthdr.len, max_protohdr);
9036 if ((*mp)->m_len < i) {
9037 *mp = m_pullup(*mp, i);
9038 if (*mp == NULL) {
9039 BRIDGE_LOG(LOG_NOTICE, 0, "m_pullup failed");
9040 return -1;
9041 }
9042 }
9043
9044 eh1 = mtod(*mp, struct ether_header *);
9045 ether_type = ntohs(eh1->ether_type);
9046
9047 /*
9048 * Check for SNAP/LLC.
9049 */
9050 if (ether_type < ETHERMTU) {
9051 struct llc *llc2 = (struct llc *)(eh1 + 1);
9052
9053 if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
9054 llc2->llc_dsap == LLC_SNAP_LSAP &&
9055 llc2->llc_ssap == LLC_SNAP_LSAP &&
9056 llc2->llc_control == LLC_UI) {
9057 ether_type = htons(llc2->llc_un.type_snap.ether_type);
9058 snap = 1;
9059 }
9060 }
9061
9062 /*
9063 * If we're trying to filter bridge traffic, don't look at anything
9064 * other than IP and ARP traffic. If the filter doesn't understand
9065 * IPv6, don't allow IPv6 through the bridge either. This is lame
9066 * since if we really wanted, say, an AppleTalk filter, we are hosed,
9067 * but of course we don't have an AppleTalk filter to begin with.
9068 * (Note that since pfil doesn't understand ARP it will pass *ALL*
9069 * ARP traffic.)
9070 */
9071 switch (ether_type) {
9072 case ETHERTYPE_ARP:
9073 case ETHERTYPE_REVARP:
9074 return 0; /* Automatically pass */
9075
9076 case ETHERTYPE_IP:
9077 case ETHERTYPE_IPV6:
9078 break;
9079 default:
9080 /*
9081 * Check to see if the user wants to pass non-ip
9082 * packets, these will not be checked by pf and
9083 * passed unconditionally so the default is to drop.
9084 */
9085 if ((sc_filter_flags & IFBF_FILT_ONLYIP)) {
9086 goto bad;
9087 }
9088 break;
9089 }
9090
9091 /* Strip off the Ethernet header and keep a copy. */
9092 m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t)&eh2);
9093 m_adj(*mp, ETHER_HDR_LEN);
9094
9095 /* Strip off snap header, if present */
9096 if (snap) {
9097 m_copydata(*mp, 0, sizeof(struct llc), (caddr_t)&llc1);
9098 m_adj(*mp, sizeof(struct llc));
9099 }
9100
9101 /*
9102 * Check the IP header for alignment and errors
9103 */
9104 switch (ether_type) {
9105 case ETHERTYPE_IP:
9106 error = bridge_ip_checkbasic(mp);
9107 break;
9108 case ETHERTYPE_IPV6:
9109 error = bridge_ip6_checkbasic(mp);
9110 break;
9111 default:
9112 error = 0;
9113 break;
9114 }
9115 if (error) {
9116 goto bad;
9117 }
9118
9119 error = 0;
9120
9121 /*
9122 * Run the packet through pf rules
9123 */
9124 switch (ether_type) {
9125 case ETHERTYPE_IP:
9126 /*
9127 * before calling the firewall, swap fields the same as
9128 * IP does. here we assume the header is contiguous
9129 */
9130 ip = mtod(*mp, struct ip *);
9131
9132 ip->ip_len = ntohs(ip->ip_len);
9133 ip->ip_off = ntohs(ip->ip_off);
9134
9135 if (ifp != NULL) {
9136 error = pf_af_hook(ifp, 0, mp, AF_INET, input, NULL);
9137 }
9138
9139 if (*mp == NULL || error != 0) { /* filter may consume */
9140 break;
9141 }
9142
9143 /* Recalculate the ip checksum and restore byte ordering */
9144 ip = mtod(*mp, struct ip *);
9145 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
9146 if (hlen < (int)sizeof(struct ip)) {
9147 goto bad;
9148 }
9149 if (hlen > (*mp)->m_len) {
9150 if ((*mp = m_pullup(*mp, hlen)) == 0) {
9151 goto bad;
9152 }
9153 ip = mtod(*mp, struct ip *);
9154 if (ip == NULL) {
9155 goto bad;
9156 }
9157 }
9158 ip->ip_len = htons(ip->ip_len);
9159 ip->ip_off = htons(ip->ip_off);
9160 ip->ip_sum = 0;
9161 if (hlen == sizeof(struct ip)) {
9162 ip->ip_sum = in_cksum_hdr(ip);
9163 } else {
9164 ip->ip_sum = in_cksum(*mp, hlen);
9165 }
9166 break;
9167
9168 case ETHERTYPE_IPV6:
9169 if (ifp != NULL) {
9170 error = pf_af_hook(ifp, 0, mp, AF_INET6, input, NULL);
9171 }
9172
9173 if (*mp == NULL || error != 0) { /* filter may consume */
9174 break;
9175 }
9176 break;
9177 default:
9178 error = 0;
9179 break;
9180 }
9181
9182 if (*mp == NULL) {
9183 return error;
9184 }
9185 if (error != 0) {
9186 goto bad;
9187 }
9188
9189 error = -1;
9190
9191 /*
9192 * Finally, put everything back the way it was and return
9193 */
9194 if (snap) {
9195 M_PREPEND(*mp, sizeof(struct llc), M_DONTWAIT, 0);
9196 if (*mp == NULL) {
9197 return error;
9198 }
9199 bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
9200 }
9201
9202 M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT, 0);
9203 if (*mp == NULL) {
9204 return error;
9205 }
9206 bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
9207
9208 return 0;
9209
9210 bad:
9211 m_freem(*mp);
9212 *mp = NULL;
9213 return error;
9214 }
9215
9216 #if BRIDGESTP
9217 static void
9218 bridge_bstp_input_list(struct bstp_port *bp, struct mbuf *head)
9219 {
9220 mbuf_t next_packet = NULL;
9221
9222 for (mbuf_t scan = head; scan != NULL; scan = next_packet) {
9223 next_packet = scan->m_nextpkt;
9224 scan->m_nextpkt = NULL;
9225 bstp_input(bp, scan);
9226 }
9227 }
9228 #endif /* BRIDGESTP */
9229
9230 static mblist
9231 bridge_filter_arp_list(struct bridge_iflist * bif, mbuf_t m)
9232 {
9233 mbuf_t next_packet = NULL;
9234 mblist ret;
9235
9236 mblist_init(&ret);
9237 for (mbuf_ref_t scan = m; scan != NULL; scan = next_packet) {
9238 errno_t error;
9239
9240 /* take packet out of the list */
9241 next_packet = scan->m_nextpkt;
9242 scan->m_nextpkt = NULL;
9243 /* filter the ARP packet */
9244 error = bridge_host_filter_arp(bif, &scan);
9245 if (error != 0 && scan != NULL) {
9246 if (BRIDGE_DBGF_ENABLED(BR_DBGF_HOSTFILTER)) {
9247 brlog_mbuf_data(scan, 0,
9248 sizeof(struct ether_header) +
9249 sizeof(struct ip));
9250 }
9251 m_freem(scan);
9252 scan = NULL;
9253 }
9254 if (scan != NULL) {
9255 /* add it to the list */
9256 mblist_append(&ret, scan);
9257 }
9258 }
9259 return ret;
9260 }
9261
9262 static mbuf_t
9263 bridge_filter_checksum(ifnet_t bridge_ifp, struct bridge_iflist * bif, mbuf_t m,
9264 bool is_ipv4, bool host_filter, bool checksum)
9265 {
9266 uint32_t dbgf = 0;
9267 errno_t error;
9268 ip_packet_info info;
9269 u_int mac_hlen = sizeof(struct ether_header);
9270
9271 if (host_filter) {
9272 dbgf |= BR_DBGF_HOSTFILTER;
9273 }
9274 if (checksum) {
9275 dbgf |= BR_DBGF_CHECKSUM;
9276 }
9277 /* get the IP protocol header */
9278 error = bridge_get_ip_proto(&m, mac_hlen, is_ipv4, &info,
9279 &bif->bif_stats.brms_in_ip);
9280 if (error != 0) {
9281 BRIDGE_LOG(LOG_NOTICE, dbgf,
9282 "%s(%s) bridge_get_ip_proto failed %d",
9283 bridge_ifp->if_xname,
9284 bif->bif_ifp->if_xname, error);
9285 goto drop;
9286 }
9287 if (host_filter) {
9288 bool drop = true;
9289
9290 /* restrict IP protocols */
9291 switch (info.ip_proto) {
9292 case IPPROTO_ICMP:
9293 case IPPROTO_IGMP:
9294 drop = !is_ipv4;
9295 break;
9296 case IPPROTO_TCP:
9297 case IPPROTO_UDP:
9298 drop = false;
9299 break;
9300 case IPPROTO_ICMPV6:
9301 drop = is_ipv4;
9302 break;
9303 default:
9304 break;
9305 }
9306 if (drop) {
9307 BRIDGE_HF_DROP(brhf_ip_bad_proto, __func__, __LINE__);
9308 goto drop;
9309 }
9310 bridge_hostfilter_stats.brhf_ip_ok += 1;
9311 }
9312 if (checksum) {
9313 /* need to compute IP/UDP/TCP/checksums */
9314 error = bridge_offload_checksum(&m, &info, &bif->bif_stats);
9315 if (error != 0) {
9316 BRIDGE_LOG(LOG_NOTICE, dbgf,
9317 "%s(%s) bridge_offload_checksum failed %d",
9318 bridge_ifp->if_xname,
9319 bif->bif_ifp->if_xname, error);
9320 goto drop;
9321 }
9322 }
9323 return m;
9324
9325 drop:
9326 /* toss the packet */
9327 if (m != NULL) {
9328 if (host_filter &&
9329 BRIDGE_DBGF_ENABLED(BR_DBGF_HOSTFILTER)) {
9330 brlog_mbuf_data(m, 0,
9331 sizeof(struct ether_header) +
9332 sizeof(struct ip));
9333 }
9334 m_freem(m);
9335 m = NULL;
9336 }
9337 return NULL;
9338 }
9339
9340 static mblist
9341 bridge_filter_checksum_list(ifnet_t bridge_ifp, struct bridge_iflist * bif,
9342 mbuf_t in_list, ether_type_flag_t etypef, bool host_filter, bool checksum)
9343 {
9344 bool is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
9345 mbuf_t next_packet = NULL;
9346 mblist ret;
9347
9348 mblist_init(&ret);
9349 for (mbuf_t scan = in_list; scan != NULL; scan = next_packet) {
9350 /* take packet out of the list */
9351 next_packet = scan->m_nextpkt;
9352 scan->m_nextpkt = NULL;
9353 scan = bridge_filter_checksum(bridge_ifp, bif,
9354 scan, is_ipv4, host_filter, checksum);
9355 if (scan != NULL) {
9356 /* add packet to the list */
9357 mblist_append(&ret, scan);
9358 }
9359 }
9360 return ret;
9361 }
9362
9363 static mbuf_t
9364 bridge_checksum_offload_list(ifnet_t bridge_ifp, struct bridge_iflist * bif,
9365 mbuf_t m, bool is_ipv4)
9366 {
9367 mblist ret;
9368 mbuf_t next_packet;
9369
9370 mblist_init(&ret);
9371 for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
9372 uint32_t csum_flags;
9373
9374 /* take it out of the list */
9375 next_packet = scan->m_nextpkt;
9376 scan->m_nextpkt = NULL;
9377
9378 csum_flags = scan->m_pkthdr.csum_flags;
9379 if ((csum_flags & checksum_request_flags) != 0) {
9380 /* compute the checksum now */
9381 scan = bridge_filter_checksum(bridge_ifp, bif, scan,
9382 is_ipv4, false, true);
9383 if (scan != NULL) {
9384 /* clear offload now */
9385 scan->m_pkthdr.csum_flags &= csum_flags;
9386 }
9387 }
9388 if (scan != NULL) {
9389 mblist_append(&ret, scan);
9390 }
9391 }
9392 return ret.head;
9393 }
9394
9395 static mbuf_t
9396 copy_broadcast_packet(mbuf_t m)
9397 {
9398 mbuf_t mc;
9399
9400 /* make a copy of the packet */
9401 mc = m_dup(m, M_DONTWAIT);
9402 if (mc != NULL) {
9403 struct ether_header *eh;
9404
9405 /* make copy look like it is broadcast */
9406 mc->m_flags |= M_BCAST;
9407 eh = mtod(mc, struct ether_header *);
9408 bcopy(etherbroadcastaddr, eh->ether_dhost, ETHER_ADDR_LEN);
9409 }
9410 return mc;
9411 }
9412
9413 static mblist
9414 bridge_find_broadcast_ipv4(mbuf_t in_list, mbuf_t * ip_bcast_head)
9415 {
9416 mblist ip_bcast;
9417 mbuf_t next_packet = NULL;
9418 mblist ret;
9419
9420 mblist_init(&ret);
9421 mblist_init(&ip_bcast);
9422 for (mbuf_ref_t scan = in_list; scan != NULL; scan = next_packet) {
9423 mbuf_t bcast_pkt = NULL;
9424 uint8_t *header;
9425
9426 /* take packet out of the list */
9427 next_packet = scan->m_nextpkt;
9428 scan->m_nextpkt = NULL;
9429
9430 header = get_ether_ip_header_ptr(&scan, FALSE);
9431 if (header != NULL) {
9432 struct in_addr dst;
9433 struct ip *iphdr;
9434
9435 iphdr = (struct ip *)(header + sizeof(struct ether_header));
9436 bcopy(&iphdr->ip_dst, &dst, sizeof(dst));
9437 if (dst.s_addr == INADDR_BROADCAST) {
9438 bcast_pkt = copy_broadcast_packet(scan);
9439 }
9440 }
9441 if (bcast_pkt != NULL) {
9442 /* add packet to broadcast list */
9443 mblist_append(&ip_bcast, bcast_pkt);
9444 }
9445 if (scan != NULL) {
9446 /* add packet back into the list */
9447 mblist_append(&ret, scan);
9448 }
9449 }
9450 *ip_bcast_head = ip_bcast.head;
9451 return ret;
9452 }
9453
9454 static ifnet_t
9455 bridge_find_member(struct bridge_softc * sc, uint8_t * lladdr,
9456 struct bridge_iflist * sbif)
9457 {
9458 struct bridge_iflist * bif;
9459
9460 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
9461 if (bif == sbif) {
9462 /* skip the input member */
9463 continue;
9464 }
9465 if (_ether_cmp(IF_LLADDR(bif->bif_ifp), lladdr) == 0) {
9466 return bif->bif_ifp;
9467 }
9468 }
9469 return NULL;
9470 }
9471
9472
9473 /*
9474 * Function: bridge_input_list
9475 *
9476 * Purpose:
9477 * Process a list of input packets through the bridge.
9478 * The caller ensures that all of the packets in the list
9479 * `list_head` .. `list_tail` have the same ethernet header.
9480 *
9481 * Returns:
9482 * Non-NULL head of the chain of packets that were not consumed/freed,
9483 * *tail_p set to the tail of that chain.
9484 *
9485 * NULL if all of the packets were consumed.
9486 */
9487 static mblist
9488 bridge_input_list(struct bridge_softc * sc, ifnet_t ifp,
9489 struct ether_header * eh_in_p, mblist list, bool is_promisc)
9490 {
9491 struct bridge_iflist * bif;
9492 ifnet_t bridge_ifp;
9493 bool checksum_offload;
9494 uint8_t * dhost;
9495 #if BRIDGESTP
9496 bool discarding = false;
9497 #endif /* BRIDGESTP */
9498 ifnet_t dst_if = NULL;
9499 errno_t error;
9500 ether_type_flag_t etypef;
9501 bool host_filter;
9502 bool host_filter_drop = false;
9503 mbuf_ref_t ip_bcast = NULL;
9504 bool is_bridge_mac = false;
9505 bool is_broadcast;
9506 bool is_ifp_mac;
9507 ifnet_t member_input = NULL;
9508 uint8_t * shost;
9509 bool uses_virtio = false;
9510 uint16_t vlan;
9511
9512 if (ifp->if_bridge == NULL) {
9513 /* no longer part of bridge */
9514 goto done;
9515 }
9516 bridge_ifp = sc->sc_ifp;
9517 is_broadcast = IS_BCAST_MCAST(list.head);
9518 is_ifp_mac = (!is_broadcast && !is_promisc);
9519 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9520 "%s from %s count %d head 0x%llx.0x%llx tail 0x%llx.0x%llx",
9521 bridge_ifp->if_xname, ifp->if_xname, list.count,
9522 (uint64_t)VM_KERNEL_ADDRPERM(list.head),
9523 (uint64_t)VM_KERNEL_ADDRPERM(mtod(list.head, void *)),
9524 (uint64_t)VM_KERNEL_ADDRPERM(list.tail),
9525 (uint64_t)VM_KERNEL_ADDRPERM(mtod(list.tail, void *)));
9526
9527 /* assume we'll return all packets */
9528 if ((bridge_ifp->if_flags & IFF_RUNNING) == 0) {
9529 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9530 "%s not running passing along",
9531 bridge_ifp->if_xname);
9532 goto done;
9533 }
9534
9535 vlan = VLANTAGOF(m);
9536
9537 /* lookup the bridge member */
9538 BRIDGE_LOCK(sc);
9539 bif = bridge_lookup_member_if(sc, ifp);
9540 if (bif == NULL) {
9541 BRIDGE_UNLOCK(sc);
9542 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9543 "%s bridge_lookup_member_if failed",
9544 bridge_ifp->if_xname);
9545 goto done;
9546 }
9547
9548 uses_virtio = bif_uses_virtio(bif);
9549
9550 /*
9551 * host filter drops packets that:
9552 * - are not ARP, IPv4, or IPv6
9553 * - have incorrect source MAC address
9554 */
9555 host_filter = (bif->bif_flags & BIFF_HOST_FILTER) != 0;
9556 etypef = ether_type_flag_get(eh_in_p->ether_type);
9557 if (host_filter
9558 && (etypef & ETHER_TYPE_FLAG_IP_ARP) == 0) {
9559 /* ether type not one of ARP, IPv4, or IPv6 */
9560 BRIDGE_HF_DROP(brhf_bad_ether_type, __func__, __LINE__);
9561 host_filter_drop = true;
9562 } else if ((bif->bif_flags & BIFF_HF_HWSRC) != 0 &&
9563 bcmp(eh_in_p->ether_shost, bif->bif_hf_hwsrc, ETHER_ADDR_LEN)
9564 != 0) {
9565 /* only allow the single source MAC address */
9566 BRIDGE_HF_DROP(brhf_bad_ether_srchw_addr,
9567 __func__, __LINE__);
9568 host_filter_drop = true;
9569 }
9570 if (host_filter_drop) {
9571 BRIDGE_UNLOCK(sc);
9572 m_freem_list(list.head);
9573 list.head = list.tail = NULL;
9574 goto done;
9575 }
9576
9577 #if BRIDGESTP
9578 discarding = (bif->bif_ifflags & IFBIF_STP) != 0 &&
9579 bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING;
9580 #endif /* BRIDGESTP */
9581
9582 dhost = eh_in_p->ether_dhost;
9583 shost = eh_in_p->ether_shost;
9584 /*
9585 * Reserved multicast address listed in 802.1D section 7.12.6
9586 * must not be forwarded by the bridge.
9587 * This is currently 01-80-C2-00-00-00 to 01-80-C2-00-00-0F
9588 */
9589 if (is_broadcast) {
9590 if (IS_MCAST(list.head)) {
9591 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MCAST,
9592 " multicast: "
9593 "%02x:%02x:%02x:%02x:%02x:%02x",
9594 dhost[0], dhost[1],
9595 dhost[2], dhost[3],
9596 dhost[4], dhost[5]);
9597 }
9598 if (bcmp(dhost, bstp_etheraddr, (ETHER_ADDR_LEN - 1)) == 0) {
9599 if (dhost[5] == BSTP_ETHERADDR_RANGE_FIRST) {
9600 /* multicast for spanning tree */
9601 #if BRIDGESTP
9602 bridge_bstp_input_list(&bif->bif_stp, list.head);
9603 #else /* BRIDGESTP */
9604 m_freem_list(list.head);
9605 #endif /* BRIDGESTP */
9606 list.head = list.tail = NULL;
9607 BRIDGE_UNLOCK(sc);
9608 goto done;
9609 }
9610 if (dhost[5] <= BSTP_ETHERADDR_RANGE_LAST) {
9611 /* allow packet to continue up the stack */
9612 BRIDGE_UNLOCK(sc);
9613 goto done;
9614 }
9615 }
9616 /* broadcast to all members */
9617 os_atomic_add(&bridge_ifp->if_imcasts, list.count, relaxed);
9618 }
9619
9620 #if BRIDGESTP
9621 if (discarding) {
9622 BRIDGE_UNLOCK(sc);
9623 goto done;
9624 }
9625 #endif /* BRIDGESTP */
9626
9627 /* If the interface is learning, record the address. */
9628 if ((bif->bif_ifflags & IFBIF_LEARNING) != 0) {
9629 error = bridge_rtupdate(sc, shost, vlan, bif, 0, IFBAF_DYNAMIC);
9630 /*
9631 * If the interface has addresses limits then deny any source
9632 * that is not in the cache.
9633 */
9634 if (error != 0 && bif->bif_addrmax) {
9635 BRIDGE_UNLOCK(sc);
9636 goto done;
9637 }
9638 }
9639 #if BRIDGESTP
9640 if ((bif->bif_ifflags & IFBIF_STP) != 0 &&
9641 bif->bif_stp.bp_state == BSTP_IFSTATE_LEARNING) {
9642 BRIDGE_UNLOCK(sc);
9643 goto done;
9644 }
9645 #endif /* BRIDGESTP */
9646
9647 /*
9648 * If the packet is not IP, let the host filter drop ARP packets.
9649 * Otherwise, if the host filter is enabled or we need to compute
9650 * checksums, do that.
9651 * Otherwise, if MAC-NAT is enabled and this is an IPv4 packet,
9652 * check for IPv4 broadcast packets. Accumulate those in a separate
9653 * list `ip_bcast`.
9654 */
9655 checksum_offload = bif_has_checksum_offload(bif);
9656 if (!ether_type_flag_is_ip(etypef)) {
9657 /* host filter process ARP */
9658 if (host_filter) {
9659 /* host filter check earlier means this must be ARP */
9660 VERIFY(etypef == ETHER_TYPE_FLAG_ARP);
9661 list = bridge_filter_arp_list(bif, list.head);
9662 if (list.head == NULL) {
9663 VERIFY(list.tail == NULL);
9664 BRIDGE_UNLOCK(sc);
9665 goto done;
9666 }
9667 }
9668 } else if (host_filter || checksum_offload) {
9669 /* host filter and/or checksum */
9670 list = bridge_filter_checksum_list(bridge_ifp, bif,
9671 list.head, etypef, host_filter, checksum_offload);
9672 if (list.head == NULL) {
9673 VERIFY(list.tail == NULL);
9674 BRIDGE_UNLOCK(sc);
9675 goto done;
9676 }
9677 } else if (is_ifp_mac && bif == sc->sc_mac_nat_bif &&
9678 etypef == ETHER_TYPE_FLAG_IPV4) {
9679 /* look for broadcast IPv4 packet */
9680 list = bridge_find_broadcast_ipv4(list.head, &ip_bcast);
9681 if (list.head == NULL && ip_bcast == NULL) {
9682 /* all packets were consumed */
9683 BRIDGE_UNLOCK(sc);
9684 goto done;
9685 }
9686 }
9687
9688 /*
9689 * If the bridge has an address assigned, and the destination MAC
9690 * matches the bridge interface, claim the packets for the bridge
9691 * interface.
9692 */
9693 if ((sc->sc_flags & SCF_ADDRESS_ASSIGNED) != 0 &&
9694 !is_broadcast && _ether_cmp(dhost, IF_LLADDR(bridge_ifp)) == 0) {
9695 is_bridge_mac = true;
9696 }
9697 if (is_ifp_mac) {
9698 /* unicast to the interface */
9699 if (sc->sc_mac_nat_bif == bif) {
9700 mbuf_ref_t forward = NULL;
9701
9702 if (list.head != NULL) {
9703 /* handle MAC-NAT if enabled */
9704 list = bridge_mac_nat_input_list(sc, ifp,
9705 list.head, &forward);
9706 }
9707 if (ip_bcast != NULL) {
9708 /* forward to all members except this one */
9709 /* bridge_broadcast_list unlocks */
9710 bridge_broadcast_list(sc, bif, etypef,
9711 ip_bcast, pkt_direction_RX);
9712 } else {
9713 BRIDGE_UNLOCK(sc);
9714 }
9715 if (forward != NULL) {
9716 bridge_mac_nat_forward_list(bridge_ifp, etypef,
9717 forward);
9718 }
9719 } else {
9720 BRIDGE_UNLOCK(sc);
9721 }
9722 /* unicast packets for this interface do not get forwarded */
9723 goto done;
9724 }
9725 if (is_bridge_mac || list.head == NULL) {
9726 BRIDGE_UNLOCK(sc);
9727 goto done;
9728 }
9729 if (!is_broadcast) {
9730 /* find where to send the packet */
9731 dst_if = bridge_rtlookup(sc, dhost, vlan);
9732 if (ifp == dst_if) {
9733 /* nothing to forward */
9734 BRIDGE_UNLOCK(sc);
9735 goto done;
9736 }
9737 if (dst_if == NULL) {
9738 /* if a member is the dhost, deliver as input */
9739 member_input = bridge_find_member(sc, dhost, bif);
9740 if (member_input != NULL) {
9741 /* grab packets destined to member */
9742 BRIDGE_UNLOCK(sc);
9743 goto done;
9744 }
9745 /* if a member is shost, there's a loop, drop it */
9746 if (bridge_find_member(sc, shost, bif) != NULL) {
9747 BRIDGE_UNLOCK(sc);
9748 m_freem_list(list.head);
9749 list.head = list.tail = NULL;
9750 goto done;
9751 }
9752 }
9753 }
9754 if (dst_if == NULL) {
9755 mbuf_t m;
9756
9757 m = copy_packet_list(list.head);
9758 if (m != NULL) {
9759 /* bridge_broadcast_list unlocks */
9760 bridge_broadcast_list(sc, bif, etypef, m,
9761 pkt_direction_RX);
9762 } else {
9763 BRIDGE_UNLOCK(sc);
9764 }
9765 } else {
9766 /* bridge_forward_list() consumes list and unlocks */
9767 bridge_forward_list(sc, bif, dst_if, etypef, list.head);
9768 list.head = list.tail = NULL;
9769 }
9770
9771 done:
9772 if (list.head != NULL) {
9773 if (member_input != NULL) {
9774 /* member gets the packets */
9775 inject_input_packet_list(member_input, list.head, true);
9776 list.head = list.tail = NULL;
9777 } else if (is_bridge_mac) {
9778 /* bridge consumes all the unicast packets */
9779 bridge_interface_input_list(bridge_ifp, etypef, list,
9780 uses_virtio);
9781 list.head = list.tail = NULL;
9782 } else {
9783 adjust_input_packet_list(list.head);
9784 }
9785 }
9786 return list;
9787 }
9788
9789 static inline void
9790 update_mbuf_flags(struct ifnet * ifp, mbuf_t m, struct ether_header * eh)
9791 {
9792 /* duplicate some of the work done in ether_demux */
9793 if ((eh->ether_dhost[0] & 1) == 0) {
9794 if (_ether_cmp(eh->ether_dhost, IF_LLADDR(ifp)) != 0) {
9795 m->m_flags |= M_PROMISC;
9796 }
9797 } else {
9798 /* Check for broadcast */
9799 if (_ether_cmp(etherbroadcastaddr, eh->ether_dhost) == 0) {
9800 m->m_flags |= M_BCAST;
9801 } else {
9802 m->m_flags |= M_MCAST;
9803 }
9804 }
9805 if (m->m_flags & M_HASFCS) {
9806 /*
9807 * If the M_HASFCS is set by the driver we want to make sure
9808 * that we strip off the trailing FCS data before handing it
9809 * up the stack.
9810 */
9811 m_adj(m, -ETHER_CRC_LEN);
9812 m->m_flags &= ~M_HASFCS;
9813 }
9814 return;
9815 }
9816
9817 static mbuf_t
9818 bridge_pf_list(mbuf_t m, ifnet_t ifp, uint32_t sc_filter_flags, bool input)
9819 {
9820 mbuf_t next_packet = NULL;
9821 mblist ret;
9822
9823 mblist_init(&ret);
9824 for (mbuf_ref_t scan = m; scan != NULL; scan = next_packet) {
9825 next_packet = scan->m_nextpkt;
9826
9827 /* remove packet from list, and pass through PF */
9828 scan->m_nextpkt = NULL;
9829 MBUF_INPUT_CHECK(scan, ifp);
9830 bridge_pf(&scan, ifp, sc_filter_flags, input);
9831 if (scan != NULL) {
9832 /* add packet back to the list */
9833 mblist_append(&ret, scan);
9834 }
9835 }
9836 return ret.head;
9837 }
9838
9839 static inline bool
9840 bridge_check_frame_header(struct bridge_softc * sc, ifnet_t ifp, mbuf_t m)
9841 {
9842 bool included = false;
9843 char * __single header;
9844 size_t header_length = 0;
9845
9846 header = m->m_pkthdr.pkt_hdr;
9847 if (header >= (char *)mbuf_datastart(m) &&
9848 header <= mtod(m, char *)) {
9849 header_length = mtod(m, char *) - header;
9850 if (header_length >= ETHER_HDR_LEN) {
9851 included = true;
9852 }
9853 }
9854 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9855 "%s from %s m 0x%llx data 0x%llx frame 0x%llx %s "
9856 "header length %lu", sc->sc_ifp->if_xname,
9857 ifp->if_xname, (uint64_t)VM_KERNEL_ADDRPERM(m),
9858 (uint64_t)VM_KERNEL_ADDRPERM(mtod(m, void *)),
9859 (uint64_t)VM_KERNEL_ADDRPERM(header),
9860 included ? "inside" : "outside", header_length);
9861 if (!included) {
9862 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9863 "%s: frame_header outside mbuf", ifp->if_xname);
9864 }
9865 return included;
9866 }
9867
9868
9869 mbuf_t
9870 bridge_early_input(struct ifnet *ifp, mbuf_t in_list, u_int32_t cnt)
9871 {
9872 struct ether_header eh;
9873 mblist list;
9874 volatile bool list_is_promisc;
9875 int n_lists = 0;
9876 mbuf_t next_packet = NULL;
9877 mblist ret;
9878 struct bridge_softc * __single sc = ifp->if_bridge;
9879 uint32_t sc_filter_flags;
9880
9881 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT_LIST,
9882 "(%s): count %u", ifp->if_xname, cnt);
9883
9884 /* run packet list through PF first */
9885 sc_filter_flags = sc->sc_filter_flags;
9886 if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
9887 in_list = bridge_pf_list(in_list, ifp, sc_filter_flags, true);
9888 }
9889
9890 /* form sublists with the same ethernet header */
9891 mblist_init(&list);
9892 mblist_init(&ret);
9893 for (mbuf_t scan = in_list; scan != NULL; scan = next_packet) {
9894 struct ether_header * eh_p;
9895 volatile bool is_promisc;
9896 mblist resid;
9897
9898 /* take it out of the list */
9899 next_packet = scan->m_nextpkt;
9900 scan->m_nextpkt = NULL;
9901
9902 /* don't loop the packet */
9903 if ((scan->m_flags & M_PROTO1) != 0) {
9904 mblist_append(&ret, scan);
9905 continue;
9906 }
9907 /* Check if this mbuf looks valid */
9908 MBUF_INPUT_CHECK(scan, ifp);
9909
9910 /* if the frame header isn't in the first mbuf, ignore */
9911 if (!bridge_check_frame_header(sc, ifp, scan)) {
9912 mblist_append(&ret, scan);
9913 continue;
9914 }
9915 eh_p = __unsafe_forge_single(struct ether_header *,
9916 scan->m_pkthdr.pkt_hdr);
9917 update_mbuf_flags(ifp, scan, eh_p);
9918
9919 /* set start back to include ether header */
9920 _mbuf_adjust_pkthdr_and_data(scan, -ETHER_HDR_LEN);
9921
9922 is_promisc = get_and_clear_promisc(scan);
9923 if (list.head == NULL) {
9924 /* start a new list */
9925 mblist_append(&list, scan);
9926 bcopy(eh_p, &eh, sizeof(eh));
9927 list_is_promisc = is_promisc;
9928 } else if (bcmp(eh_p, &eh, sizeof(eh)) != 0) {
9929 n_lists++;
9930 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT_LIST,
9931 "(%s): sublist %u pkts %u",
9932 ifp->if_xname, n_lists, list.count);
9933 if (BRIDGE_DBGF_ENABLED(BR_DBGF_INPUT_LIST)) {
9934 brlog_ether_header(&eh);
9935 }
9936 resid = bridge_input_list(sc, ifp, &eh, list,
9937 list_is_promisc);
9938 if (resid.head != NULL) {
9939 /* add to the packets to be returned */
9940 mblist_append_list(&ret, resid);
9941 }
9942 /* start new list */
9943 mblist_init(&list);
9944 mblist_append(&list, scan);
9945 list_is_promisc = is_promisc;
9946 bcopy(eh_p, &eh, sizeof(eh));
9947 } else {
9948 mblist_append(&list, scan);
9949 VERIFY(is_promisc == list_is_promisc);
9950 }
9951 if (next_packet == NULL) {
9952 /* last list */
9953 n_lists++;
9954 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT_LIST,
9955 "(%s): sublist %u pkts %u",
9956 ifp->if_xname, n_lists, list.count);
9957 if (BRIDGE_DBGF_ENABLED(BR_DBGF_INPUT_LIST)) {
9958 brlog_ether_header(&eh);
9959 }
9960 resid = bridge_input_list(sc, ifp, &eh, list,
9961 list_is_promisc);
9962 if (resid.head != NULL) {
9963 /* add to the packets to be returned */
9964 mblist_append_list(&ret, resid);
9965 }
9966 }
9967 }
9968 return ret.head;
9969 }
9970
9971 /*
9972 * Copyright (C) 2014, Stefano Garzarella - Universita` di Pisa.
9973 * All rights reserved.
9974 *
9975 * Redistribution and use in source and binary forms, with or without
9976 * modification, are permitted provided that the following conditions
9977 * are met:
9978 * 1. Redistributions of source code must retain the above copyright
9979 * notice, this list of conditions and the following disclaimer.
9980 * 2. Redistributions in binary form must reproduce the above copyright
9981 * notice, this list of conditions and the following disclaimer in the
9982 * documentation and/or other materials provided with the distribution.
9983 *
9984 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
9985 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
9986 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
9987 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
9988 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
9989 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
9990 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
9991 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
9992 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
9993 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
9994 * SUCH DAMAGE.
9995 */
9996
9997 /*
9998 * XXX-ste: Maybe this function must be moved into kern/uipc_mbuf.c
9999 *
10000 * Create a queue of packets/segments which fit the given mss + hdr_len.
10001 * m0 points to mbuf chain to be segmented.
10002 * This function splits the payload (m0-> m_pkthdr.len - hdr_len)
10003 * into segments of length MSS bytes and then copy the first hdr_len bytes
10004 * from m0 at the top of each segment.
10005 * If hdr2_buf is not NULL (hdr2_len is the buf length), it is copied
10006 * in each segment after the first hdr_len bytes
10007 *
10008 * Return the new queue with the segments on success, NULL on failure.
10009 * (the mbuf queue is freed in this case).
10010 */
10011
10012 static mblist
10013 m_seg(struct mbuf *m0, int hdr_len, int mss, char * hdr2_buf __sized_by_or_null(hdr2_len), int hdr2_len)
10014 {
10015 int off = 0, n, firstlen;
10016 struct mbuf *mseg;
10017 int total_len = m0->m_pkthdr.len;
10018 mblist ret;
10019
10020 mblist_init(&ret);
10021 mblist_append(&ret, m0);
10022
10023 /*
10024 * Segmentation useless
10025 */
10026 if (total_len <= hdr_len + mss) {
10027 n = 1;
10028 goto done;
10029 }
10030 if (hdr2_buf == NULL || hdr2_len <= 0) {
10031 hdr2_buf = NULL;
10032 hdr2_len = 0;
10033 }
10034
10035 off = hdr_len + mss;
10036 firstlen = mss; /* first segment stored in the original mbuf */
10037 ret.bytes = off;
10038 for (n = 1; off < total_len; off += mss, n++) {
10039 struct mbuf *m;
10040 /*
10041 * Copy the header from the original packet
10042 * and create a new mbuf chain
10043 */
10044 if (MHLEN < hdr_len) {
10045 m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
10046 } else {
10047 m = m_gethdr(M_NOWAIT, MT_DATA);
10048 }
10049
10050 if (m == NULL) {
10051 #ifdef GSO_DEBUG
10052 D("MGETHDR error\n");
10053 #endif
10054 goto err;
10055 }
10056
10057 m_copydata(m0, 0, hdr_len, mtod(m, caddr_t));
10058
10059 m->m_len = hdr_len;
10060 /*
10061 * if the optional header is present, copy it
10062 */
10063 if (hdr2_buf != NULL) {
10064 m_copyback(m, hdr_len, hdr2_len, hdr2_buf);
10065 }
10066
10067 m->m_flags |= (m0->m_flags & M_COPYFLAGS);
10068 if (off + mss >= total_len) { /* last segment */
10069 mss = total_len - off;
10070 }
10071 /*
10072 * Copy the payload from original packet
10073 */
10074 mseg = m_copym(m0, off, mss, M_NOWAIT);
10075 if (mseg == NULL) {
10076 m_freem(m);
10077 #ifdef GSO_DEBUG
10078 D("m_copym error\n");
10079 #endif
10080 goto err;
10081 }
10082 m_cat(m, mseg);
10083
10084 m->m_pkthdr.len = hdr_len + hdr2_len + mss;
10085 m->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
10086 /*
10087 * Copy the checksum flags and data (in_cksum() need this)
10088 */
10089 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
10090 m->m_pkthdr.csum_data = m0->m_pkthdr.csum_data;
10091 m->m_pkthdr.tso_segsz = m0->m_pkthdr.tso_segsz;
10092
10093 mblist_append(&ret, m);
10094 }
10095
10096 /*
10097 * Update first segment.
10098 * If the optional header is present, is necessary
10099 * to insert it into the first segment.
10100 */
10101 if (hdr2_buf == NULL) {
10102 m_adj(m0, hdr_len + firstlen - total_len);
10103 m0->m_pkthdr.len = hdr_len + firstlen;
10104 } else {
10105 mseg = m_copym(m0, hdr_len, firstlen, M_NOWAIT);
10106 if (mseg == NULL) {
10107 #ifdef GSO_DEBUG
10108 D("m_copym error\n");
10109 #endif
10110 goto err;
10111 }
10112 m_adj(m0, hdr_len - total_len);
10113 m_copyback(m0, hdr_len, hdr2_len, hdr2_buf);
10114 m_cat(m0, mseg);
10115 m0->m_pkthdr.len = hdr_len + hdr2_len + firstlen;
10116 }
10117
10118 done:
10119 return ret;
10120
10121 err:
10122 if (ret.head != NULL) {
10123 m_freem_list(ret.head);
10124 mblist_init(&ret);
10125 }
10126 return ret;
10127 }
10128
10129 /*
10130 * Wrappers of IPv4 checksum functions
10131 */
10132 static inline void
10133 gso_ipv4_data_cksum(struct mbuf *m, struct ip *ip, int mac_hlen)
10134 {
10135 m->m_data += mac_hlen;
10136 m->m_len -= mac_hlen;
10137 m->m_pkthdr.len -= mac_hlen;
10138 #if __FreeBSD_version < 1000000
10139 ip->ip_len = ntohs(ip->ip_len); /* needed for in_delayed_cksum() */
10140 #endif
10141
10142 in_delayed_cksum(m);
10143
10144 #if __FreeBSD_version < 1000000
10145 ip->ip_len = htons(ip->ip_len);
10146 #endif
10147 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
10148 m->m_len += mac_hlen;
10149 m->m_pkthdr.len += mac_hlen;
10150 m->m_data -= mac_hlen;
10151 }
10152
10153 static inline void
10154 gso_ipv4_hdr_cksum(struct mbuf *m, struct ip *ip, int mac_hlen, int ip_hlen)
10155 {
10156 m->m_data += mac_hlen;
10157
10158 ip->ip_sum = in_cksum(m, ip_hlen);
10159
10160 m->m_pkthdr.csum_flags &= ~CSUM_IP;
10161 m->m_data -= mac_hlen;
10162 }
10163
10164 /*
10165 * Structure that contains the state during the TCP segmentation
10166 */
10167 struct gso_ip_tcp_state {
10168 void (*update)
10169 (struct gso_ip_tcp_state*, struct mbuf*);
10170 void (*internal)
10171 (struct gso_ip_tcp_state*, struct mbuf*);
10172 u_int ip_m0_len;
10173 uint8_t * __counted_by(ip_m0_len) hdr;
10174 struct tcphdr *tcp;
10175 int mac_hlen;
10176 int ip_hlen;
10177 int tcp_hlen;
10178 int hlen;
10179 int pay_len;
10180 int sw_csum;
10181 uint32_t tcp_seq;
10182 uint16_t ip_id;
10183 boolean_t is_tx;
10184 };
10185
10186 /*
10187 * Update the pointers to TCP and IPv4 headers
10188 */
10189 static inline void
10190 gso_ipv4_tcp_update(struct gso_ip_tcp_state *state, struct mbuf *m)
10191 {
10192 state->hdr = mtodo(m, state->mac_hlen);
10193 state->ip_m0_len = m->m_len - state->mac_hlen;
10194 state->ip_hlen = state->ip_hlen;
10195 state->tcp = (struct tcphdr *)(state->hdr + state->ip_hlen);
10196 state->pay_len = m->m_pkthdr.len - state->hlen;
10197 }
10198
10199 /*
10200 * Set properly the TCP and IPv4 headers
10201 */
10202 static inline void
10203 gso_ipv4_tcp_internal(struct gso_ip_tcp_state *state, struct mbuf *m)
10204 {
10205 struct ip *ip;
10206 /*
10207 * Update IP header
10208 */
10209 ip = (struct ip *)state->hdr;
10210 ip->ip_id = htons((state->ip_id)++);
10211 ip->ip_len = htons(m->m_pkthdr.len - state->mac_hlen);
10212 /*
10213 * TCP Checksum
10214 */
10215 state->tcp->th_sum = 0;
10216 state->tcp->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
10217 htons(state->tcp_hlen + IPPROTO_TCP + state->pay_len));
10218 /*
10219 * Checksum HW not supported (TCP)
10220 */
10221 if (state->sw_csum & CSUM_DELAY_DATA) {
10222 gso_ipv4_data_cksum(m, ip, state->mac_hlen);
10223 }
10224
10225 state->tcp_seq += state->pay_len;
10226 /*
10227 * IP Checksum
10228 */
10229 ip->ip_sum = 0;
10230 /*
10231 * Checksum HW not supported (IP)
10232 */
10233 if (state->sw_csum & CSUM_IP) {
10234 gso_ipv4_hdr_cksum(m, ip, state->mac_hlen, state->ip_hlen);
10235 }
10236 }
10237
10238
10239 /*
10240 * Updates the pointers to TCP and IPv6 headers
10241 */
10242 static inline void
10243 gso_ipv6_tcp_update(struct gso_ip_tcp_state *state, struct mbuf *m)
10244 {
10245 state->hdr = mtodo(m, state->mac_hlen);
10246 state->ip_m0_len = m->m_len - state->mac_hlen;
10247 state->ip_hlen = state->ip_hlen;
10248 state->tcp = (struct tcphdr *)(state->hdr + state->ip_hlen);
10249 state->pay_len = m->m_pkthdr.len - state->hlen;
10250 }
10251
10252 /*
10253 * Sets properly the TCP and IPv6 headers
10254 */
10255 static inline void
10256 gso_ipv6_tcp_internal(struct gso_ip_tcp_state *state, struct mbuf *m)
10257 {
10258 struct ip6_hdr *ip6;
10259
10260 ip6 = (struct ip6_hdr *)state->hdr;
10261 ip6->ip6_plen = htons(m->m_pkthdr.len - state->mac_hlen - state->ip_hlen);
10262 /*
10263 * TCP Checksum
10264 */
10265 state->tcp->th_sum = 0;
10266 state->tcp->th_sum = in6_pseudo(&ip6->ip6_src, &ip6->ip6_dst,
10267 htonl(state->tcp_hlen + state->pay_len + IPPROTO_TCP));
10268 /*
10269 * Checksum HW not supported (TCP)
10270 */
10271 if (state->sw_csum & CSUM_DELAY_IPV6_DATA) {
10272 (void)in6_finalize_cksum(m, state->mac_hlen, -1, -1, state->sw_csum);
10273 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA;
10274 }
10275 state->tcp_seq += state->pay_len;
10276 }
10277
10278 /*
10279 * Init the state during the TCP segmentation
10280 */
10281 static void
10282 gso_ip_tcp_init_state(struct gso_ip_tcp_state *state, struct ifnet *ifp,
10283 bool is_ipv4, int mac_hlen, int ip_hlen,
10284 uint8_t *__counted_by(ip_m0_len) ip_hdr, u_int ip_m0_len,
10285 struct tcphdr * tcp_hdr)
10286 {
10287 #pragma unused(ifp)
10288
10289 state->hdr = ip_hdr;
10290 state->ip_m0_len = ip_m0_len;
10291 state->ip_hlen = ip_hlen;
10292 state->tcp = tcp_hdr;
10293 if (is_ipv4) {
10294 state->ip_id = ntohs(((struct ip *)state->hdr)->ip_id);
10295 state->update = gso_ipv4_tcp_update;
10296 state->internal = gso_ipv4_tcp_internal;
10297 state->sw_csum = CSUM_DELAY_DATA | CSUM_IP; /* XXX */
10298 } else {
10299 state->update = gso_ipv6_tcp_update;
10300 state->internal = gso_ipv6_tcp_internal;
10301 state->sw_csum = CSUM_DELAY_IPV6_DATA; /* XXX */
10302 }
10303 state->mac_hlen = mac_hlen;
10304 state->tcp_hlen = state->tcp->th_off << 2;
10305 state->hlen = mac_hlen + ip_hlen + state->tcp_hlen;
10306 state->tcp_seq = ntohl(state->tcp->th_seq);
10307 //state->sw_csum = m->m_pkthdr.csum_flags & ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
10308 return;
10309 }
10310
10311 /*
10312 * GSO on TCP/IP (v4 or v6)
10313 *
10314 * Segment the given mbuf and return the list of packets.
10315 *
10316 */
10317 static mblist
10318 gso_ip_tcp(ifnet_t ifp, mbuf_t m0, struct gso_ip_tcp_state *state, bool is_tx)
10319 {
10320 struct mbuf *m;
10321 int orig_mss;
10322 int mss = 0;
10323 #ifdef GSO_STATS
10324 int total_len = m0->m_pkthdr.len;
10325 #endif /* GSO_STATS */
10326 mblist seg;
10327 bool tso_with_gso = false;
10328
10329 orig_mss = mss = _mbuf_get_tso_mss(m0);
10330 if (mss == 0 && !is_tx) {
10331 uint8_t seg_cnt = m0->m_pkthdr.rx_seg_cnt;
10332
10333 if (seg_cnt != 0) {
10334 uint32_t hdr_len;
10335 uint32_t len;
10336
10337 /* approximate the MSS using LRO seg cnt */
10338 hdr_len = state->ip_hlen + state->tcp_hlen;
10339 len = mbuf_pkthdr_len(m0) - hdr_len - ETHER_HDR_LEN;
10340 mss = len / seg_cnt;
10341 m0->m_pkthdr.rx_seg_cnt = 0;
10342 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
10343 "%s: mss %d = len %d / seg cnt %d",
10344 ifp->if_xname, mss, len, seg_cnt);
10345 }
10346 }
10347 if (mss == 0) {
10348 /* hack: we don't have the actual MSS */
10349 u_int reduce_mss;
10350
10351 reduce_mss = is_tx ? if_bridge_tso_reduce_mss_tx
10352 : if_bridge_tso_reduce_mss_forwarding;
10353 mss = ifp->if_mtu - state->ip_hlen - state->tcp_hlen -
10354 reduce_mss;
10355 assert(mss > 0);
10356 } else if (is_tx) {
10357 bool is_ipv4;
10358 bool do_tso = true;
10359
10360 if (TSO_IPV4_OK(ifp, m0)) {
10361 is_ipv4 = true;
10362 } else if (TSO_IPV6_OK(ifp, m0)) {
10363 is_ipv4 = false;
10364 } else {
10365 do_tso = false;
10366 }
10367 if (do_tso) { /* TSO with GSO */
10368 uint32_t if_tso_max;
10369
10370 if_tso_max = get_if_tso_mtu(ifp, is_ipv4);
10371 mss = if_tso_max - state->ip_hlen - state->tcp_hlen
10372 - ETHER_HDR_LEN;
10373 tso_with_gso = true;
10374 }
10375 }
10376 if (!tso_with_gso) {
10377 /* clear TSO flags */
10378 m0->m_pkthdr.csum_flags &= ~_TSO_CSUM;
10379 }
10380 seg = m_seg(m0, state->hlen, mss, 0, 0);
10381 if (seg.head == NULL || seg.head->m_nextpkt == NULL) {
10382 return seg;
10383 }
10384 if (tso_with_gso) {
10385 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
10386 "%s TX gso size %d mss %d nsegs %d",
10387 ifp->if_xname,
10388 mss, orig_mss, seg.count);
10389 } else {
10390 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
10391 "%s %s mss %d nsegs %d",
10392 ifp->if_xname,
10393 is_tx ? "TX" : "RX",
10394 mss, seg.count);
10395 }
10396 #ifdef GSO_STATS
10397 GSOSTAT_SET_MAX(tcp.gsos_max_mss, mss);
10398 GSOSTAT_SET_MIN(tcp.gsos_min_mss, mss);
10399 GSOSTAT_ADD(tcp.gsos_osegments, seg.count);
10400 #endif /* GSO_STATS */
10401
10402 /* first pkt */
10403 VERIFY(seg.head == m0);
10404 m = m0;
10405
10406 state->update(state, m);
10407
10408 do {
10409 state->tcp->th_flags &= ~(TH_FIN | TH_PUSH);
10410
10411 state->internal(state, m);
10412 m = m->m_nextpkt;
10413 state->update(state, m);
10414 state->tcp->th_flags &= ~TH_CWR;
10415 state->tcp->th_seq = htonl(state->tcp_seq);
10416 } while (m->m_nextpkt);
10417
10418 /* last pkt */
10419 state->internal(state, m);
10420
10421 #ifdef GSO_STATS
10422 if (!error) {
10423 GSOSTAT_INC(tcp.gsos_segmented);
10424 GSOSTAT_SET_MAX(tcp.gsos_maxsegmented, total_len);
10425 GSOSTAT_SET_MIN(tcp.gsos_minsegmented, total_len);
10426 GSOSTAT_ADD(tcp.gsos_totalbyteseg, total_len);
10427 }
10428 #endif /* GSO_STATS */
10429 return seg;
10430 }
10431
10432 /*
10433 * GSO for TCP/IPv[46]
10434 */
10435 static mblist
10436 gso_tcp_with_info(ifnet_t ifp, mbuf_t m, ip_packet_info_t info_p,
10437 u_int mac_hlen, bool is_ipv4, bool is_tx)
10438 {
10439 uint32_t csum_flags;
10440 struct gso_ip_tcp_state state;
10441 struct tcphdr *tcp;
10442
10443 assert(info_p->ip_proto_hdr != NULL);
10444 tcp = (struct tcphdr *)(void *)info_p->ip_proto_hdr;
10445 gso_ip_tcp_init_state(&state, ifp, is_ipv4, mac_hlen,
10446 info_p->ip_hlen + info_p->ip_opt_len,
10447 info_p->ip_hdr, info_p->ip_m0_len, tcp);
10448 csum_flags = is_ipv4 ? CSUM_DELAY_DATA : CSUM_DELAY_IPV6_DATA; /* XXX */
10449 m->m_pkthdr.csum_flags |= csum_flags;
10450 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
10451 return gso_ip_tcp(ifp, m, &state, is_tx);
10452 }
10453
10454 static mblist
10455 gso_tcp(ifnet_t ifp, mbuf_t m, u_int mac_hlen, bool is_ipv4, bool is_tx)
10456 {
10457 int error;
10458 ip_packet_info info;
10459 struct bripstats stats; /* XXX ignored */
10460 mblist ret;
10461
10462 error = bridge_get_tcp_header(&m, mac_hlen, is_ipv4, &info, &stats);
10463 if (error != 0) {
10464 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
10465 "%s bridge_get_tcp_header failed %d (%s)",
10466 ifp->if_xname, error,
10467 is_tx ? "TX" : "RX");
10468 if (m != NULL) {
10469 m_freem(m);
10470 m = NULL;
10471 }
10472 goto no_segment;
10473 }
10474 if (info.ip_proto_hdr == NULL) {
10475 /* not actually a TCP packet, no segmentation */
10476 goto no_segment;
10477 }
10478 if (!is_tx && ip_packet_info_dst_is_our_ip(&info, ifp->if_index)) {
10479 goto no_segment;
10480 }
10481 return gso_tcp_with_info(ifp, m, &info, mac_hlen, is_ipv4, is_tx);
10482
10483 no_segment:
10484 mblist_init(&ret);
10485 if (m != NULL) {
10486 mblist_append(&ret, m);
10487 }
10488 return ret;
10489 }
10490