1 /*
2 * Copyright (c) 2004-2025 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /* $NetBSD: if_bridge.c,v 1.31 2005/06/01 19:45:34 jdc Exp $ */
30 /*
31 * Copyright 2001 Wasabi Systems, Inc.
32 * All rights reserved.
33 *
34 * Written by Jason R. Thorpe for Wasabi Systems, Inc.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed for the NetBSD Project by
47 * Wasabi Systems, Inc.
48 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
49 * or promote products derived from this software without specific prior
50 * written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
54 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
55 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
56 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
57 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
58 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
59 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
60 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
61 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
62 * POSSIBILITY OF SUCH DAMAGE.
63 */
64
65 /*
66 * Copyright (c) 1999, 2000 Jason L. Wright ([email protected])
67 * All rights reserved.
68 *
69 * Redistribution and use in source and binary forms, with or without
70 * modification, are permitted provided that the following conditions
71 * are met:
72 * 1. Redistributions of source code must retain the above copyright
73 * notice, this list of conditions and the following disclaimer.
74 * 2. Redistributions in binary form must reproduce the above copyright
75 * notice, this list of conditions and the following disclaimer in the
76 * documentation and/or other materials provided with the distribution.
77 *
78 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
79 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
80 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
81 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
82 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
83 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
84 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
86 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
87 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
88 * POSSIBILITY OF SUCH DAMAGE.
89 *
90 * OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp
91 */
92
93 /*
94 * Network interface bridge support.
95 *
96 * TODO:
97 *
98 * - Currently only supports Ethernet-like interfaces (Ethernet,
99 * 802.11, VLANs on Ethernet, etc.) Figure out a nice way
100 * to bridge other types of interfaces (FDDI-FDDI, and maybe
101 * consider heterogenous bridges).
102 *
103 * - GIF isn't handled due to the lack of IPPROTO_ETHERIP support.
104 */
105
106 #include <sys/cdefs.h>
107
108 #include <sys/param.h>
109 #include <sys/mbuf.h>
110 #include <sys/malloc.h>
111 #include <sys/protosw.h>
112 #include <sys/systm.h>
113 #include <sys/time.h>
114 #include <sys/socket.h> /* for net/if.h */
115 #include <sys/sockio.h>
116 #include <sys/kernel.h>
117 #include <sys/random.h>
118 #include <sys/syslog.h>
119 #include <sys/sysctl.h>
120 #include <sys/proc.h>
121 #include <sys/lock.h>
122 #include <sys/mcache.h>
123
124 #include <sys/kauth.h>
125
126 #include <kern/thread_call.h>
127
128 #include <libkern/libkern.h>
129
130 #include <kern/zalloc.h>
131
132 #if NBPFILTER > 0
133 #include <net/bpf.h>
134 #endif
135 #include <net/if.h>
136 #include <net/if_dl.h>
137 #include <net/if_types.h>
138 #include <net/if_var.h>
139 #include <net/if_media.h>
140 #include <net/net_api_stats.h>
141
142 #include <netinet/in.h> /* for struct arpcom */
143 #include <netinet/tcp.h> /* for struct tcphdr */
144 #include <netinet/in_systm.h>
145 #include <netinet/in_var.h>
146 #define _IP_VHL
147 #include <netinet/ip.h>
148 #include <netinet/ip_var.h>
149 #include <netinet/ip6.h>
150 #include <netinet6/ip6_var.h>
151 #include <netinet/if_ether.h> /* for struct arpcom */
152 #include <net/bridgestp.h>
153 #include <net/if_bridgevar.h>
154 #include <net/if_llc.h>
155 #if NVLAN > 0
156 #include <net/if_vlan_var.h>
157 #endif /* NVLAN > 0 */
158
159 #include <net/if_ether.h>
160 #include <net/dlil.h>
161 #include <net/kpi_interfacefilter.h>
162 #include <net/pfvar.h>
163
164 #include <net/route.h>
165 #include <dev/random/randomdev.h>
166
167 #include <netinet/bootp.h>
168 #include <netinet/dhcp.h>
169
170 #if SKYWALK
171 #include <skywalk/nexus/netif/nx_netif.h>
172 #endif /* SKYWALK */
173
174 #include <net/sockaddr_utils.h>
175 #include <net/mblist.h>
176
177 #include <os/log.h>
178
179 #define _TSO_CSUM (CSUM_TSO_IPV4 | CSUM_TSO_IPV6)
180
181 static struct in_addr inaddr_any = { .s_addr = INADDR_ANY };
182
183
184 #define __M_FLAGS_ARE_SET(m, flags) (((m)->m_flags & (flags)) != 0)
185 #define IS_BCAST(m) __M_FLAGS_ARE_SET(m, M_BCAST)
186 #define IS_MCAST(m) __M_FLAGS_ARE_SET(m, M_MCAST)
187 #define IS_BCAST_MCAST(m) __M_FLAGS_ARE_SET(m, M_BCAST | M_MCAST)
188
189 #define HTONS_ETHERTYPE_ARP htons(ETHERTYPE_ARP)
190 #define HTONS_ETHERTYPE_IP htons(ETHERTYPE_IP)
191 #define HTONS_ETHERTYPE_IPV6 htons(ETHERTYPE_IPV6)
192 #define HTONS_ARPHRD_ETHER htons(ARPHRD_ETHER)
193 #define HTONS_ARPOP_REQUEST htons(ARPOP_REQUEST)
194 #define HTONS_ARPOP_REPLY htons(ARPOP_REPLY)
195 #define HTONS_IPPORT_BOOTPC htons(IPPORT_BOOTPC)
196 #define HTONS_IPPORT_BOOTPS htons(IPPORT_BOOTPS)
197 #define HTONS_DHCP_FLAGS_BROADCAST htons(DHCP_FLAGS_BROADCAST)
198
199 /*
200 * if_bridge_debug, BR_DBGF_*
201 * - 'if_bridge_debug' is a bitmask of BR_DBGF_* flags that can be set
202 * to enable additional logs for the corresponding bridge function
203 * - "sysctl net.link.bridge.debug" controls the value of
204 * 'if_bridge_debug'
205 */
206 static uint32_t if_bridge_debug = 0;
207 #define BR_DBGF_LIFECYCLE 0x0001
208 #define BR_DBGF_INPUT 0x0002
209 #define BR_DBGF_OUTPUT 0x0004
210 #define BR_DBGF_RT_TABLE 0x0008
211 #define BR_DBGF_DELAYED_CALL 0x0010
212 #define BR_DBGF_IOCTL 0x0020
213 #define BR_DBGF_MBUF 0x0040
214 #define BR_DBGF_MCAST 0x0080
215 #define BR_DBGF_HOSTFILTER 0x0100
216 #define BR_DBGF_CHECKSUM 0x0200
217 #define BR_DBGF_MAC_NAT 0x0400
218 #define BR_DBGF_INPUT_LIST 0x0800
219
220 /*
221 * if_bridge_log_level
222 * - 'if_bridge_log_level' ensures that by default important logs are
223 * logged regardless of if_bridge_debug by comparing the log level
224 * in BRIDGE_LOG to if_bridge_log_level
225 * - use "sysctl net.link.bridge.log_level" controls the value of
226 * 'if_bridge_log_level'
227 * - the default value of 'if_bridge_log_level' is LOG_NOTICE; important
228 * logs must use LOG_NOTICE to ensure they appear by default
229 */
230 static int if_bridge_log_level = LOG_NOTICE;
231
232 #define BRIDGE_DBGF_ENABLED(__flag) ((if_bridge_debug & __flag) != 0)
233
234 /*
235 * BRIDGE_LOG, BRIDGE_LOG_SIMPLE
236 * - macros to generate the specified log conditionally based on
237 * the specified log level and debug flags
238 * - BRIDGE_LOG_SIMPLE does not include the function name in the log
239 */
240 #define BRIDGE_LOG(__level, __dbgf, __string, ...) \
241 do { \
242 if (__level <= if_bridge_log_level || \
243 BRIDGE_DBGF_ENABLED(__dbgf)) { \
244 os_log(OS_LOG_DEFAULT, "%s: " __string, \
245 __func__, ## __VA_ARGS__); \
246 } \
247 } while (0)
248 #define BRIDGE_LOG_SIMPLE(__level, __dbgf, __string, ...) \
249 do { \
250 if (__level <= if_bridge_log_level || \
251 BRIDGE_DBGF_ENABLED(__dbgf)) { \
252 os_log(OS_LOG_DEFAULT, __string, ## __VA_ARGS__); \
253 } \
254 } while (0)
255
256 #define _BRIDGE_LOCK(_sc) lck_mtx_lock(&(_sc)->sc_mtx)
257 #define _BRIDGE_UNLOCK(_sc) lck_mtx_unlock(&(_sc)->sc_mtx)
258 #define BRIDGE_LOCK_ASSERT_HELD(_sc) \
259 LCK_MTX_ASSERT(&(_sc)->sc_mtx, LCK_MTX_ASSERT_OWNED)
260 #define BRIDGE_LOCK_ASSERT_NOTHELD(_sc) \
261 LCK_MTX_ASSERT(&(_sc)->sc_mtx, LCK_MTX_ASSERT_NOTOWNED)
262
263 #define BRIDGE_LOCK_DEBUG 1
264 #if BRIDGE_LOCK_DEBUG
265
266 #define BR_LCKDBG_MAX 4
267
268 #define BRIDGE_LOCK(_sc) bridge_lock(_sc)
269 #define BRIDGE_UNLOCK(_sc) bridge_unlock(_sc)
270 #define BRIDGE_LOCK2REF(_sc, _err) _err = bridge_lock2ref(_sc)
271 #define BRIDGE_UNREF(_sc) bridge_unref(_sc)
272 #define BRIDGE_XLOCK(_sc) bridge_xlock(_sc)
273 #define BRIDGE_XDROP(_sc) bridge_xdrop(_sc)
274
275 #else /* !BRIDGE_LOCK_DEBUG */
276
277 #define BRIDGE_LOCK(_sc) _BRIDGE_LOCK(_sc)
278 #define BRIDGE_UNLOCK(_sc) _BRIDGE_UNLOCK(_sc)
279 #define BRIDGE_LOCK2REF(_sc, _err) do { \
280 BRIDGE_LOCK_ASSERT_HELD(_sc); \
281 if ((_sc)->sc_iflist_xcnt > 0) \
282 (_err) = EBUSY; \
283 else { \
284 (_sc)->sc_iflist_ref++; \
285 (_err) = 0; \
286 } \
287 _BRIDGE_UNLOCK(_sc); \
288 } while (0)
289 #define BRIDGE_UNREF(_sc) do { \
290 _BRIDGE_LOCK(_sc); \
291 (_sc)->sc_iflist_ref--; \
292 if (((_sc)->sc_iflist_xcnt > 0) && ((_sc)->sc_iflist_ref == 0)) { \
293 _BRIDGE_UNLOCK(_sc); \
294 wakeup(&(_sc)->sc_cv); \
295 } else \
296 _BRIDGE_UNLOCK(_sc); \
297 } while (0)
298 #define BRIDGE_XLOCK(_sc) do { \
299 BRIDGE_LOCK_ASSERT_HELD(_sc); \
300 (_sc)->sc_iflist_xcnt++; \
301 while ((_sc)->sc_iflist_ref > 0) \
302 msleep(&(_sc)->sc_cv, &(_sc)->sc_mtx, PZERO, \
303 "BRIDGE_XLOCK", NULL); \
304 } while (0)
305 #define BRIDGE_XDROP(_sc) do { \
306 BRIDGE_LOCK_ASSERT_HELD(_sc); \
307 (_sc)->sc_iflist_xcnt--; \
308 } while (0)
309
310 #endif /* BRIDGE_LOCK_DEBUG */
311
312 #define BRIDGE_BPF_TAP_IN(ifp, m) \
313 do { \
314 if (ifp->if_bpf != NULL) { \
315 bpf_tap_in(ifp, DLT_EN10MB, m, NULL, 0); \
316 } \
317 } while(0)
318
319 #define BRIDGE_BPF_TAP_OUT(ifp, m) \
320 do { \
321 if (ifp->if_bpf != NULL) { \
322 bpf_tap_out(ifp, DLT_EN10MB, m, NULL, 0); \
323 } \
324 } while(0)
325
326
327 /*
328 * Initial size of the route hash table. Must be a power of two.
329 */
330 #ifndef BRIDGE_RTHASH_SIZE
331 #define BRIDGE_RTHASH_SIZE 16
332 #endif
333
334 /*
335 * Maximum size of the routing hash table
336 */
337 #define BRIDGE_RTHASH_SIZE_MAX 2048
338
339 #define BRIDGE_RTHASH_MASK(sc) ((sc)->sc_rthash_size - 1)
340
341 /*
342 * Maximum number of addresses to cache.
343 */
344 #ifndef BRIDGE_RTABLE_MAX
345 #define BRIDGE_RTABLE_MAX 100
346 #endif
347
348 /*
349 * Timeout (in seconds) for entries learned dynamically.
350 */
351 #ifndef BRIDGE_RTABLE_TIMEOUT
352 #define BRIDGE_RTABLE_TIMEOUT (20 * 60) /* same as ARP */
353 #endif
354
355 /*
356 * Number of seconds between walks of the route list.
357 */
358 #ifndef BRIDGE_RTABLE_PRUNE_PERIOD
359 #define BRIDGE_RTABLE_PRUNE_PERIOD (5 * 60)
360 #endif
361
362 /*
363 * Number of MAC NAT entries
364 * - sized based on 16 clients (including MAC NAT interface)
365 * each with 4 addresses
366 */
367 #ifndef BRIDGE_MAC_NAT_ENTRY_MAX
368 #define BRIDGE_MAC_NAT_ENTRY_MAX 64
369 #endif /* BRIDGE_MAC_NAT_ENTRY_MAX */
370
371 /*
372 * List of capabilities to possibly mask on the member interface.
373 */
374 #define BRIDGE_IFCAPS_MASK (IFCAP_TSO | IFCAP_TXCSUM)
375 /*
376 * List of capabilities to disable on the member interface.
377 */
378 #define BRIDGE_IFCAPS_STRIP IFCAP_LRO
379
380 /*
381 * Bridge interface list entry.
382 */
383 struct bridge_iflist {
384 TAILQ_ENTRY(bridge_iflist) bif_next;
385 struct ifnet *bif_ifp; /* member if */
386 struct bstp_port bif_stp; /* STP state */
387 uint32_t bif_ifflags; /* member if flags */
388 int bif_savedcaps; /* saved capabilities */
389 uint32_t bif_addrmax; /* max # of addresses */
390 uint32_t bif_addrcnt; /* cur. # of addresses */
391 uint32_t bif_addrexceeded; /* # of address violations */
392
393 interface_filter_t bif_iff_ref;
394 struct bridge_softc *bif_sc;
395 uint32_t bif_flags;
396
397 /* host filter */
398 struct in_addr bif_hf_ipsrc;
399 uint8_t bif_hf_hwsrc[ETHER_ADDR_LEN];
400
401 struct ifbrmstats bif_stats;
402 };
403
404 static inline bool
bif_ifflags_are_set(struct bridge_iflist * bif,uint32_t flags)405 bif_ifflags_are_set(struct bridge_iflist * bif, uint32_t flags)
406 {
407 return (bif->bif_ifflags & flags) != 0;
408 }
409
410 static inline bool
bif_has_checksum_offload(struct bridge_iflist * bif)411 bif_has_checksum_offload(struct bridge_iflist * bif)
412 {
413 return bif_ifflags_are_set(bif, IFBIF_CHECKSUM_OFFLOAD);
414 }
415
416 static inline bool
bif_has_mac_nat(struct bridge_iflist * bif)417 bif_has_mac_nat(struct bridge_iflist * bif)
418 {
419 return bif_ifflags_are_set(bif, IFBIF_MAC_NAT);
420 }
421
422 static inline bool
bif_uses_virtio(struct bridge_iflist * bif)423 bif_uses_virtio(struct bridge_iflist * bif)
424 {
425 return bif_ifflags_are_set(bif, IFBIF_USES_VIRTIO);
426 }
427
428 /* fake errors to make the code clearer */
429 #define _EBADIP EJUSTRETURN
430 #define _EBADIPCHECKSUM EJUSTRETURN
431 #define _EBADIPV6 EJUSTRETURN
432 #define _EBADUDP EJUSTRETURN
433 #define _EBADTCP EJUSTRETURN
434 #define _EBADUDPCHECKSUM EJUSTRETURN
435 #define _EBADTCPCHECKSUM EJUSTRETURN
436
437 #define BIFF_PROMISC 0x01 /* promiscuous mode set */
438 #define BIFF_PROTO_ATTACHED 0x02 /* protocol attached */
439 #define BIFF_FILTER_ATTACHED 0x04 /* interface filter attached */
440 #define BIFF_MEDIA_ACTIVE 0x08 /* interface media active */
441 #define BIFF_HOST_FILTER 0x10 /* host filter enabled */
442 #define BIFF_HF_HWSRC 0x20 /* host filter source MAC is set */
443 #define BIFF_HF_IPSRC 0x40 /* host filter source IP is set */
444 #define BIFF_INPUT_BROADCAST 0x80 /* send broadcast packets in */
445 #define BIFF_IN_MEMBER_LIST 0x100 /* added to the member list */
446 #define BIFF_WIFI_INFRA 0x200 /* interface is Wi-Fi infra */
447 #define BIFF_ALL_MULTI 0x400 /* allmulti set */
448 #define BIFF_LRO_DISABLED 0x800 /* LRO was disabled */
449 #if SKYWALK
450 #define BIFF_FLOWSWITCH_ATTACHED 0x1000 /* we attached the flowswitch */
451 #define BIFF_NETAGENT_REMOVED 0x2000 /* we removed the netagent */
452 #endif /* SKYWALK */
453
454 /*
455 * mac_nat_entry
456 * - translates between an IP address and MAC address on a specific
457 * bridge interface member
458 */
459 struct mac_nat_entry {
460 LIST_ENTRY(mac_nat_entry) mne_list; /* list linkage */
461 struct bridge_iflist *mne_bif; /* originating interface */
462 unsigned long mne_expire; /* expiration time */
463 union {
464 struct in_addr mneu_ip; /* originating IPv4 address */
465 struct in6_addr mneu_ip6; /* originating IPv6 address */
466 } mne_u;
467 uint8_t mne_mac[ETHER_ADDR_LEN];
468 uint8_t mne_flags;
469 uint8_t mne_reserved;
470 };
471 #define mne_ip mne_u.mneu_ip
472 #define mne_ip6 mne_u.mneu_ip6
473
474 #define MNE_FLAGS_IPV6 0x01 /* IPv6 address */
475
476 LIST_HEAD(mac_nat_entry_list, mac_nat_entry);
477
478 /*
479 * mac_nat_record
480 * - used by bridge_mac_nat_output() to convey the translation that needs
481 * to take place in bridge_mac_nat_translate
482 * - holds enough information so that the translation can be done later
483 * when the destination interface is the MAC-NAT interface
484 */
485 struct mac_nat_record {
486 uint16_t mnr_ether_type;
487 union {
488 uint16_t mnru_arp_offset;
489 struct {
490 uint16_t mnruip_dhcp_flags;
491 uint16_t mnruip_udp_csum;
492 uint8_t mnruip_header_len;
493 } mnru_ip;
494 struct {
495 uint16_t mnruip6_icmp6_len;
496 uint16_t mnruip6_lladdr_offset;
497 uint8_t mnruip6_icmp6_type;
498 uint8_t mnruip6_header_len;
499 } mnru_ip6;
500 } mnr_u;
501 };
502
503 #define mnr_arp_offset mnr_u.mnru_arp_offset
504
505 #define mnr_ip_header_len mnr_u.mnru_ip.mnruip_header_len
506 #define mnr_ip_dhcp_flags mnr_u.mnru_ip.mnruip_dhcp_flags
507 #define mnr_ip_udp_csum mnr_u.mnru_ip.mnruip_udp_csum
508
509 #define mnr_ip6_icmp6_len mnr_u.mnru_ip6.mnruip6_icmp6_len
510 #define mnr_ip6_icmp6_type mnr_u.mnru_ip6.mnruip6_icmp6_type
511 #define mnr_ip6_header_len mnr_u.mnru_ip6.mnruip6_header_len
512 #define mnr_ip6_lladdr_offset mnr_u.mnru_ip6.mnruip6_lladdr_offset
513
514 /*
515 * Bridge route node.
516 */
517 struct bridge_rtnode {
518 LIST_ENTRY(bridge_rtnode) brt_hash; /* hash table linkage */
519 LIST_ENTRY(bridge_rtnode) brt_list; /* list linkage */
520 struct bridge_iflist *brt_dst; /* destination if */
521 unsigned long brt_expire; /* expiration time */
522 uint8_t brt_flags; /* address flags */
523 uint8_t brt_addr[ETHER_ADDR_LEN];
524 uint16_t brt_vlan; /* vlan id */
525 };
526
527 #define brt_ifp brt_dst->bif_ifp
528
529 /*
530 * Bridge delayed function call context
531 */
532 typedef void (*bridge_delayed_func_t)(struct bridge_softc *);
533
534 struct bridge_delayed_call {
535 struct bridge_softc *bdc_sc;
536 bridge_delayed_func_t bdc_func; /* Function to call */
537 struct timespec bdc_ts; /* Time to call */
538 u_int32_t bdc_flags;
539 thread_call_t bdc_thread_call;
540 };
541
542 #define BDCF_OUTSTANDING 0x01 /* Delayed call has been scheduled */
543 #define BDCF_CANCELLING 0x02 /* May be waiting for call completion */
544
545 /*
546 * Software state for each bridge.
547 */
548 LIST_HEAD(_bridge_rtnode_list, bridge_rtnode);
549
550 struct bridge_softc {
551 struct ifnet *sc_ifp; /* make this an interface */
552 uint32_t sc_flags;
553 LIST_ENTRY(bridge_softc) sc_list;
554 decl_lck_mtx_data(, sc_mtx);
555 struct _bridge_rtnode_list * __counted_by(sc_rthash_size) sc_rthash; /* our forwarding table */
556 struct _bridge_rtnode_list sc_rtlist; /* list version of above */
557 uint32_t sc_rthash_key; /* key for hash */
558 uint32_t sc_rthash_size; /* size of the hash table */
559 struct bridge_delayed_call sc_aging_timer;
560 struct bridge_delayed_call sc_resize_call;
561 TAILQ_HEAD(, bridge_iflist) sc_spanlist; /* span ports list */
562 struct bstp_state sc_stp; /* STP state */
563 void *sc_cv;
564 uint32_t sc_brtmax; /* max # of addresses */
565 uint32_t sc_brtcnt; /* cur. # of addresses */
566 uint32_t sc_brttimeout; /* rt timeout in seconds */
567 uint32_t sc_iflist_ref; /* refcount for sc_iflist */
568 uint32_t sc_iflist_xcnt; /* refcount for sc_iflist */
569 TAILQ_HEAD(, bridge_iflist) sc_iflist; /* member interface list */
570 uint32_t sc_brtexceeded; /* # of cache drops */
571 uint32_t sc_filter_flags; /* ipf and flags */
572 struct ifnet *sc_ifaddr; /* member mac copied from */
573 u_char sc_defaddr[6]; /* Default MAC address */
574 char sc_if_xname[IFNAMSIZ];
575
576 struct bridge_iflist *sc_mac_nat_bif; /* single MAC NAT interface */
577 struct mac_nat_entry_list sc_mne_list; /* MAC NAT IPv4 */
578 struct mac_nat_entry_list sc_mne_list_v6;/* MAC NAT IPv6 */
579 uint32_t sc_mne_max; /* max # of entries */
580 uint32_t sc_mne_count; /* cur. # of entries */
581 uint32_t sc_mne_allocation_failures;
582 #if BRIDGE_LOCK_DEBUG
583 /*
584 * Locking and unlocking calling history
585 */
586 void *lock_lr[BR_LCKDBG_MAX];
587 int next_lock_lr;
588 void *unlock_lr[BR_LCKDBG_MAX];
589 int next_unlock_lr;
590 #endif /* BRIDGE_LOCK_DEBUG */
591 };
592
593 #define SCF_DETACHING 0x01
594 #define SCF_RESIZING 0x02
595 #define SCF_MEDIA_ACTIVE 0x04
596 #define SCF_PROTO_ATTACHED 0x08
597
598 typedef enum {
599 CHECKSUM_OPERATION_NONE = 0,
600 CHECKSUM_OPERATION_CLEAR_OFFLOAD = 1,
601 CHECKSUM_OPERATION_FINALIZE = 2,
602 CHECKSUM_OPERATION_COMPUTE = 3,
603 } ChecksumOperation;
604
605 typedef struct {
606 u_int ip_hlen; /* IP header length */
607 u_int ip_pay_len; /* length of payload (exclusive of ip_hlen) */
608 u_int ip_m0_len; /* bytes available at ip_hdr (without jumping mbufs) */
609 u_int ip_opt_len; /* IPv6 options headers length */
610 uint8_t ip_proto; /* IPPROTO_TCP, IPPROTO_UDP, etc. */
611 bool ip_is_ipv4;
612 bool ip_is_fragmented;
613 uint8_t *__sized_by(ip_m0_len) ip_hdr; /* pointer to IP header */
614 uint8_t *__indexable ip_proto_hdr; /* ptr to protocol header (TCP) */
615 } ip_packet_info, *ip_packet_info_t;
616
617 struct bridge_hostfilter_stats bridge_hostfilter_stats;
618
619 typedef uint8_t ether_type_flag_t;
620
621 typedef enum {
622 pkt_direction_RX,
623 pkt_direction_TX
624 } pkt_direction_t;
625
626 static LCK_GRP_DECLARE(bridge_lock_grp, "if_bridge");
627 #if BRIDGE_LOCK_DEBUG
628 static LCK_ATTR_DECLARE(bridge_lock_attr, 0, 0);
629 #else
630 static LCK_ATTR_DECLARE(bridge_lock_attr, LCK_ATTR_DEBUG, 0);
631 #endif
632 static LCK_MTX_DECLARE_ATTR(bridge_list_mtx, &bridge_lock_grp, &bridge_lock_attr);
633
634 static int bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
635
636 static KALLOC_TYPE_DEFINE(bridge_rtnode_pool, struct bridge_rtnode, NET_KT_DEFAULT);
637 static KALLOC_TYPE_DEFINE(bridge_mne_pool, struct mac_nat_entry, NET_KT_DEFAULT);
638
639 static int bridge_clone_create(struct if_clone *, uint32_t, void *);
640 static int bridge_clone_destroy(struct ifnet *);
641
642 static errno_t bridge_ioctl(struct ifnet *, u_long cmd, void *__sized_by(IOCPARM_LEN(cmd)));
643 #if HAS_IF_CAP
644 static void bridge_mutecaps(struct bridge_softc *);
645 static void bridge_set_ifcap(struct bridge_softc *, struct bridge_iflist *,
646 int);
647 #endif
648 static errno_t bridge_set_tso(struct bridge_softc *);
649 static void bridge_proto_attach_changed(struct ifnet *);
650 static int bridge_init(struct ifnet *);
651 static void bridge_ifstop(struct ifnet *, int);
652 static int bridge_output(struct ifnet *, struct mbuf *);
653 static void bridge_finalize_cksum(struct ifnet *, struct mbuf *);
654 static void bridge_start(struct ifnet *);
655 static mblist bridge_input_list(struct bridge_softc *, ifnet_t,
656 struct ether_header *, mblist, bool);
657 static errno_t bridge_iff_input(void *, ifnet_t, protocol_family_t,
658 mbuf_t *, char **);
659 static errno_t bridge_iff_output(void *, ifnet_t, protocol_family_t,
660 mbuf_t *);
661 static errno_t bridge_member_output(struct bridge_softc *sc, ifnet_t ifp,
662 mbuf_t *m);
663 static int bridge_enqueue(ifnet_t, ifnet_t, ifnet_t,
664 ether_type_flag_t, mbuf_t, ChecksumOperation, pkt_direction_t);
665 static mbuf_t bridge_checksum_offload_list(ifnet_t, struct bridge_iflist *,
666 mbuf_t, bool);
667 static mbuf_t bridge_filter_checksum(ifnet_t, struct bridge_iflist * bif,
668 mbuf_t m, bool, bool, bool);
669 static void bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int);
670
671 static void bridge_aging_timer(struct bridge_softc *sc);
672
673 static void bridge_broadcast(struct bridge_softc *, struct bridge_iflist *,
674 ether_type_flag_t, mbuf_t);
675 static void bridge_broadcast_list(struct bridge_softc *,
676 struct bridge_iflist *, ether_type_flag_t, mbuf_t, pkt_direction_t);
677
678 static void bridge_span(struct bridge_softc *, ether_type_flag_t, struct mbuf *);
679
680 static int bridge_rtupdate(struct bridge_softc *, const uint8_t[ETHER_ADDR_LEN],
681 uint16_t, struct bridge_iflist *, int, uint8_t);
682 static struct bridge_iflist * bridge_rtlookup_bif(struct bridge_softc *,
683 const uint8_t[ETHER_ADDR_LEN], uint16_t);
684 static void bridge_rttrim(struct bridge_softc *);
685 static void bridge_rtage(struct bridge_softc *);
686 static void bridge_rtflush(struct bridge_softc *, int);
687 static int bridge_rtdaddr(struct bridge_softc *, const uint8_t[ETHER_ADDR_LEN],
688 uint16_t);
689
690 static int bridge_rtable_init(struct bridge_softc *);
691 static void bridge_rtable_fini(struct bridge_softc *);
692
693 static void bridge_rthash_resize(struct bridge_softc *);
694
695 static int bridge_rtnode_addr_cmp(const uint8_t[ETHER_ADDR_LEN], const uint8_t[ETHER_ADDR_LEN]);
696 static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
697 const uint8_t[ETHER_ADDR_LEN], uint16_t);
698 static int bridge_rtnode_hash(struct bridge_softc *,
699 struct bridge_rtnode *);
700 static int bridge_rtnode_insert(struct bridge_softc *,
701 struct bridge_rtnode *);
702 static void bridge_rtnode_destroy(struct bridge_softc *,
703 struct bridge_rtnode *);
704 #if BRIDGESTP
705 static void bridge_rtable_expire(struct ifnet *, int);
706 static void bridge_state_change(struct ifnet *, int);
707 #endif /* BRIDGESTP */
708
709 static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
710 char * __sized_by(IFNAMSIZ) name);
711 static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
712 struct ifnet *ifp);
713 static void bridge_delete_member(struct bridge_softc *,
714 struct bridge_iflist *);
715 static void bridge_delete_span(struct bridge_softc *,
716 struct bridge_iflist *);
717
718 static int bridge_ioctl_add(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
719 static int bridge_ioctl_del(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
720 static int bridge_ioctl_gifflags(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
721 static int bridge_ioctl_sifflags(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
722 static int bridge_ioctl_scache(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
723 static int bridge_ioctl_gcache(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
724 static int bridge_ioctl_gifs32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
725 static int bridge_ioctl_gifs64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
726 static int bridge_ioctl_rts32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
727 static int bridge_ioctl_rts64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
728 static int bridge_ioctl_saddr32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
729 static int bridge_ioctl_saddr64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
730 static int bridge_ioctl_sto(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
731 static int bridge_ioctl_gto(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
732 static int bridge_ioctl_daddr32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
733 static int bridge_ioctl_daddr64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
734 static int bridge_ioctl_flush(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
735 static int bridge_ioctl_gpri(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
736 static int bridge_ioctl_spri(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
737 static int bridge_ioctl_ght(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
738 static int bridge_ioctl_sht(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
739 static int bridge_ioctl_gfd(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
740 static int bridge_ioctl_sfd(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
741 static int bridge_ioctl_gma(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
742 static int bridge_ioctl_sma(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
743 static int bridge_ioctl_sifprio(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
744 static int bridge_ioctl_sifcost(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
745 static int bridge_ioctl_sifmaxaddr(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
746 static int bridge_ioctl_addspan(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
747 static int bridge_ioctl_delspan(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
748 static int bridge_ioctl_gbparam32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
749 static int bridge_ioctl_gbparam64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
750 static int bridge_ioctl_grte(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
751 static int bridge_ioctl_gifsstp32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
752 static int bridge_ioctl_gifsstp64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
753 static int bridge_ioctl_sproto(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
754 static int bridge_ioctl_stxhc(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
755 static int bridge_ioctl_purge(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len);
756 static int bridge_ioctl_gfilt(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
757 static int bridge_ioctl_sfilt(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
758 static int bridge_ioctl_ghostfilter(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
759 static int bridge_ioctl_shostfilter(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
760 static int bridge_ioctl_gmnelist32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
761 static int bridge_ioctl_gmnelist64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
762 static int bridge_ioctl_gifstats32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
763 static int bridge_ioctl_gifstats64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
764
765 static int bridge_pf(struct mbuf **, struct ifnet *,
766 uint32_t sc_filter_flags, bool input);
767 static int bridge_ip_checkbasic(struct mbuf **);
768 static int bridge_ip6_checkbasic(struct mbuf **);
769
770 static void bridge_detach(ifnet_t);
771 static void bridge_link_event(struct ifnet *, u_int32_t);
772 static void bridge_iflinkevent(struct ifnet *);
773 static u_int32_t bridge_updatelinkstatus(struct bridge_softc *);
774 static int interface_media_active(struct ifnet *);
775 static void bridge_schedule_delayed_call(struct bridge_delayed_call *);
776 static void bridge_cancel_delayed_call(struct bridge_delayed_call *);
777 static void bridge_cleanup_delayed_call(struct bridge_delayed_call *);
778
779 static errno_t bridge_mac_nat_enable(struct bridge_softc *,
780 struct bridge_iflist *);
781 static void bridge_mac_nat_disable(struct bridge_softc *sc);
782 static void bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long);
783 static void bridge_mac_nat_populate_entries(struct bridge_softc *sc);
784 static void bridge_mac_nat_flush_entries(struct bridge_softc *sc,
785 struct bridge_iflist *);
786 static mbuf_t bridge_mac_nat_input(struct bridge_softc *, ifnet_t, mbuf_t,
787 ifnet_t * dst_if);
788 static boolean_t bridge_mac_nat_output(struct bridge_softc *,
789 struct bridge_iflist *, mbuf_t *, struct mac_nat_record *);
790 static void bridge_mac_nat_translate(mbuf_t *, struct mac_nat_record *,
791 const char[ETHER_ADDR_LEN]);
792
793 static mblist bridge_mac_nat_input_list(struct bridge_softc *sc,
794 ifnet_t external_ifp, mbuf_t m, mbuf_t * forward_head);
795 static mbuf_t bridge_mac_nat_translate_list(struct bridge_softc * sc,
796 struct bridge_iflist *sbif, ifnet_t dst_if, mbuf_t m);
797 static mbuf_t bridge_mac_nat_copy_and_translate_list(struct bridge_softc * sc,
798 struct bridge_iflist *sbif, ifnet_t dst_if, mbuf_t m);
799
800 static mbuf_t bridge_pf_list(mbuf_t m, ifnet_t ifp,
801 uint32_t sc_filter_flags, bool input);
802
803 static inline ifnet_t
bridge_rtlookup(struct bridge_softc * sc,const uint8_t addr[ETHER_ADDR_LEN],uint16_t vlan)804 bridge_rtlookup(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN],
805 uint16_t vlan)
806 {
807 struct bridge_iflist * bif;
808 ifnet_t ifp = NULL;
809
810 bif = bridge_rtlookup_bif(sc, addr, vlan);
811 if (bif != NULL) {
812 ifp = bif->bif_ifp;
813 }
814 return ifp;
815 }
816
817 static bool in_addr_is_ours(const struct in_addr);
818 static bool in6_addr_is_ours(const struct in6_addr *, uint32_t);
819
820 #define m_copypacket(m, how) m_copym(m, 0, M_COPYALL, how)
821
822 static mblist
823 gso_tcp(ifnet_t ifp, mbuf_t m, u_int mac_hlen, bool is_ipv4, bool is_tx);
824
825 static mblist
826 gso_tcp_with_info(ifnet_t ifp, mbuf_t m, ip_packet_info_t info_p,
827 u_int mac_hlen, bool is_ipv4, bool is_tx);
828
829 static inline mblist
gso_tcp_transmit(ifnet_t ifp,mbuf_t m,u_int mac_hlen,bool is_ipv4)830 gso_tcp_transmit(ifnet_t ifp, mbuf_t m, u_int mac_hlen, bool is_ipv4)
831 {
832 return gso_tcp(ifp, m, mac_hlen, is_ipv4, true);
833 }
834
835 /* The default bridge vlan is 1 (IEEE 802.1Q-2003 Table 9-2) */
836 #define VLANTAGOF(_m) 0
837
838 #define BSTP_ETHERADDR_RANGE_FIRST 0x00
839 #define BSTP_ETHERADDR_RANGE_LAST 0x0f
840
841 u_int8_t bstp_etheraddr[ETHER_ADDR_LEN] =
842 { 0x01, 0x80, 0xc2, 0x00, 0x00, BSTP_ETHERADDR_RANGE_FIRST };
843
844
845 static u_int8_t ethernulladdr[ETHER_ADDR_LEN] =
846 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
847
848 #if BRIDGESTP
849 static struct bstp_cb_ops bridge_ops = {
850 .bcb_state = bridge_state_change,
851 .bcb_rtage = bridge_rtable_expire
852 };
853 #endif /* BRIDGESTP */
854
855 SYSCTL_DECL(_net_link);
856 SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
857 "Bridge");
858
859 static int bridge_inherit_mac = 0; /* share MAC with first bridge member */
860 SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac,
861 CTLFLAG_RW | CTLFLAG_LOCKED,
862 &bridge_inherit_mac, 0,
863 "Inherit MAC address from the first bridge member");
864
865 SYSCTL_INT(_net_link_bridge, OID_AUTO, rtable_prune_period,
866 CTLFLAG_RW | CTLFLAG_LOCKED,
867 &bridge_rtable_prune_period, 0,
868 "Interval between pruning of routing table");
869
870 static unsigned int bridge_rtable_hash_size_max = BRIDGE_RTHASH_SIZE_MAX;
871 SYSCTL_UINT(_net_link_bridge, OID_AUTO, rtable_hash_size_max,
872 CTLFLAG_RW | CTLFLAG_LOCKED,
873 &bridge_rtable_hash_size_max, 0,
874 "Maximum size of the routing hash table");
875
876 #if BRIDGE_DELAYED_CALLBACK_DEBUG
877 static int bridge_delayed_callback_delay = 0;
878 SYSCTL_INT(_net_link_bridge, OID_AUTO, delayed_callback_delay,
879 CTLFLAG_RW | CTLFLAG_LOCKED,
880 &bridge_delayed_callback_delay, 0,
881 "Delay before calling delayed function");
882 #endif
883
884 SYSCTL_STRUCT(_net_link_bridge, OID_AUTO,
885 hostfilterstats, CTLFLAG_RD | CTLFLAG_LOCKED,
886 &bridge_hostfilter_stats, bridge_hostfilter_stats, "");
887
888 #if BRIDGESTP
889 static int log_stp = 0; /* log STP state changes */
890 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp, CTLFLAG_RW,
891 &log_stp, 0, "Log STP state changes");
892 #endif /* BRIDGESTP */
893
894 struct bridge_control {
895 int (*bc_func)(struct bridge_softc *, void *__sized_by(arg_len) args, size_t arg_len);
896 unsigned int bc_argsize;
897 unsigned int bc_flags;
898 };
899
900 #define BC_F_COPYIN 0x01 /* copy arguments in */
901 #define BC_F_COPYOUT 0x02 /* copy arguments out */
902 #define BC_F_SUSER 0x04 /* do super-user check */
903
904 static const struct bridge_control bridge_control_table32[] = {
905 { .bc_func = bridge_ioctl_add, .bc_argsize = sizeof(struct ifbreq), /* 0 */
906 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
907 { .bc_func = bridge_ioctl_del, .bc_argsize = sizeof(struct ifbreq),
908 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
909
910 { .bc_func = bridge_ioctl_gifflags, .bc_argsize = sizeof(struct ifbreq),
911 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
912 { .bc_func = bridge_ioctl_sifflags, .bc_argsize = sizeof(struct ifbreq),
913 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
914
915 { .bc_func = bridge_ioctl_scache, .bc_argsize = sizeof(struct ifbrparam),
916 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
917 { .bc_func = bridge_ioctl_gcache, .bc_argsize = sizeof(struct ifbrparam),
918 .bc_flags = BC_F_COPYOUT },
919
920 { .bc_func = bridge_ioctl_gifs32, .bc_argsize = sizeof(struct ifbifconf32),
921 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
922 { .bc_func = bridge_ioctl_rts32, .bc_argsize = sizeof(struct ifbaconf32),
923 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
924
925 { .bc_func = bridge_ioctl_saddr32, .bc_argsize = sizeof(struct ifbareq32),
926 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
927
928 { .bc_func = bridge_ioctl_sto, .bc_argsize = sizeof(struct ifbrparam),
929 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
930 { .bc_func = bridge_ioctl_gto, .bc_argsize = sizeof(struct ifbrparam), /* 10 */
931 .bc_flags = BC_F_COPYOUT },
932
933 { .bc_func = bridge_ioctl_daddr32, .bc_argsize = sizeof(struct ifbareq32),
934 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
935
936 { .bc_func = bridge_ioctl_flush, .bc_argsize = sizeof(struct ifbreq),
937 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
938
939 { .bc_func = bridge_ioctl_gpri, .bc_argsize = sizeof(struct ifbrparam),
940 .bc_flags = BC_F_COPYOUT },
941 { .bc_func = bridge_ioctl_spri, .bc_argsize = sizeof(struct ifbrparam),
942 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
943
944 { .bc_func = bridge_ioctl_ght, .bc_argsize = sizeof(struct ifbrparam),
945 .bc_flags = BC_F_COPYOUT },
946 { .bc_func = bridge_ioctl_sht, .bc_argsize = sizeof(struct ifbrparam),
947 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
948
949 { .bc_func = bridge_ioctl_gfd, .bc_argsize = sizeof(struct ifbrparam),
950 .bc_flags = BC_F_COPYOUT },
951 { .bc_func = bridge_ioctl_sfd, .bc_argsize = sizeof(struct ifbrparam),
952 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
953
954 { .bc_func = bridge_ioctl_gma, .bc_argsize = sizeof(struct ifbrparam),
955 .bc_flags = BC_F_COPYOUT },
956 { .bc_func = bridge_ioctl_sma, .bc_argsize = sizeof(struct ifbrparam), /* 20 */
957 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
958
959 { .bc_func = bridge_ioctl_sifprio, .bc_argsize = sizeof(struct ifbreq),
960 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
961
962 { .bc_func = bridge_ioctl_sifcost, .bc_argsize = sizeof(struct ifbreq),
963 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
964
965 { .bc_func = bridge_ioctl_gfilt, .bc_argsize = sizeof(struct ifbrparam),
966 .bc_flags = BC_F_COPYOUT },
967 { .bc_func = bridge_ioctl_sfilt, .bc_argsize = sizeof(struct ifbrparam),
968 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
969
970 { .bc_func = bridge_ioctl_purge, .bc_argsize = sizeof(struct ifbreq),
971 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
972
973 { .bc_func = bridge_ioctl_addspan, .bc_argsize = sizeof(struct ifbreq),
974 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
975 { .bc_func = bridge_ioctl_delspan, .bc_argsize = sizeof(struct ifbreq),
976 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
977
978 { .bc_func = bridge_ioctl_gbparam32, .bc_argsize = sizeof(struct ifbropreq32),
979 .bc_flags = BC_F_COPYOUT },
980
981 { .bc_func = bridge_ioctl_grte, .bc_argsize = sizeof(struct ifbrparam),
982 .bc_flags = BC_F_COPYOUT },
983
984 { .bc_func = bridge_ioctl_gifsstp32, .bc_argsize = sizeof(struct ifbpstpconf32), /* 30 */
985 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
986
987 { .bc_func = bridge_ioctl_sproto, .bc_argsize = sizeof(struct ifbrparam),
988 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
989
990 { .bc_func = bridge_ioctl_stxhc, .bc_argsize = sizeof(struct ifbrparam),
991 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
992
993 { .bc_func = bridge_ioctl_sifmaxaddr, .bc_argsize = sizeof(struct ifbreq),
994 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
995
996 { .bc_func = bridge_ioctl_ghostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
997 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
998 { .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
999 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1000
1001 { .bc_func = bridge_ioctl_gmnelist32,
1002 .bc_argsize = sizeof(struct ifbrmnelist32),
1003 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1004 { .bc_func = bridge_ioctl_gifstats32,
1005 .bc_argsize = sizeof(struct ifbrmreq32),
1006 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1007 };
1008
1009 static const struct bridge_control bridge_control_table64[] = {
1010 { .bc_func = bridge_ioctl_add, .bc_argsize = sizeof(struct ifbreq), /* 0 */
1011 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1012 { .bc_func = bridge_ioctl_del, .bc_argsize = sizeof(struct ifbreq),
1013 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1014
1015 { .bc_func = bridge_ioctl_gifflags, .bc_argsize = sizeof(struct ifbreq),
1016 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1017 { .bc_func = bridge_ioctl_sifflags, .bc_argsize = sizeof(struct ifbreq),
1018 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1019
1020 { .bc_func = bridge_ioctl_scache, .bc_argsize = sizeof(struct ifbrparam),
1021 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1022 { .bc_func = bridge_ioctl_gcache, .bc_argsize = sizeof(struct ifbrparam),
1023 .bc_flags = BC_F_COPYOUT },
1024
1025 { .bc_func = bridge_ioctl_gifs64, .bc_argsize = sizeof(struct ifbifconf64),
1026 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1027 { .bc_func = bridge_ioctl_rts64, .bc_argsize = sizeof(struct ifbaconf64),
1028 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1029
1030 { .bc_func = bridge_ioctl_saddr64, .bc_argsize = sizeof(struct ifbareq64),
1031 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1032
1033 { .bc_func = bridge_ioctl_sto, .bc_argsize = sizeof(struct ifbrparam),
1034 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1035 { .bc_func = bridge_ioctl_gto, .bc_argsize = sizeof(struct ifbrparam), /* 10 */
1036 .bc_flags = BC_F_COPYOUT },
1037
1038 { .bc_func = bridge_ioctl_daddr64, .bc_argsize = sizeof(struct ifbareq64),
1039 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1040
1041 { .bc_func = bridge_ioctl_flush, .bc_argsize = sizeof(struct ifbreq),
1042 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1043
1044 { .bc_func = bridge_ioctl_gpri, .bc_argsize = sizeof(struct ifbrparam),
1045 .bc_flags = BC_F_COPYOUT },
1046 { .bc_func = bridge_ioctl_spri, .bc_argsize = sizeof(struct ifbrparam),
1047 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1048
1049 { .bc_func = bridge_ioctl_ght, .bc_argsize = sizeof(struct ifbrparam),
1050 .bc_flags = BC_F_COPYOUT },
1051 { .bc_func = bridge_ioctl_sht, .bc_argsize = sizeof(struct ifbrparam),
1052 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1053
1054 { .bc_func = bridge_ioctl_gfd, .bc_argsize = sizeof(struct ifbrparam),
1055 .bc_flags = BC_F_COPYOUT },
1056 { .bc_func = bridge_ioctl_sfd, .bc_argsize = sizeof(struct ifbrparam),
1057 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1058
1059 { .bc_func = bridge_ioctl_gma, .bc_argsize = sizeof(struct ifbrparam),
1060 .bc_flags = BC_F_COPYOUT },
1061 { .bc_func = bridge_ioctl_sma, .bc_argsize = sizeof(struct ifbrparam), /* 20 */
1062 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1063
1064 { .bc_func = bridge_ioctl_sifprio, .bc_argsize = sizeof(struct ifbreq),
1065 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1066
1067 { .bc_func = bridge_ioctl_sifcost, .bc_argsize = sizeof(struct ifbreq),
1068 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1069
1070 { .bc_func = bridge_ioctl_gfilt, .bc_argsize = sizeof(struct ifbrparam),
1071 .bc_flags = BC_F_COPYOUT },
1072 { .bc_func = bridge_ioctl_sfilt, .bc_argsize = sizeof(struct ifbrparam),
1073 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1074
1075 { .bc_func = bridge_ioctl_purge, .bc_argsize = sizeof(struct ifbreq),
1076 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1077
1078 { .bc_func = bridge_ioctl_addspan, .bc_argsize = sizeof(struct ifbreq),
1079 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1080 { .bc_func = bridge_ioctl_delspan, .bc_argsize = sizeof(struct ifbreq),
1081 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1082
1083 { .bc_func = bridge_ioctl_gbparam64, .bc_argsize = sizeof(struct ifbropreq64),
1084 .bc_flags = BC_F_COPYOUT },
1085
1086 { .bc_func = bridge_ioctl_grte, .bc_argsize = sizeof(struct ifbrparam),
1087 .bc_flags = BC_F_COPYOUT },
1088
1089 { .bc_func = bridge_ioctl_gifsstp64, .bc_argsize = sizeof(struct ifbpstpconf64), /* 30 */
1090 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1091
1092 { .bc_func = bridge_ioctl_sproto, .bc_argsize = sizeof(struct ifbrparam),
1093 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1094
1095 { .bc_func = bridge_ioctl_stxhc, .bc_argsize = sizeof(struct ifbrparam),
1096 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1097
1098 { .bc_func = bridge_ioctl_sifmaxaddr, .bc_argsize = sizeof(struct ifbreq),
1099 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1100
1101 { .bc_func = bridge_ioctl_ghostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1102 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1103 { .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1104 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1105
1106 { .bc_func = bridge_ioctl_gmnelist64,
1107 .bc_argsize = sizeof(struct ifbrmnelist64),
1108 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1109 { .bc_func = bridge_ioctl_gifstats64,
1110 .bc_argsize = sizeof(struct ifbrmreq64),
1111 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1112 };
1113
1114 static const unsigned int bridge_control_table_size =
1115 sizeof(bridge_control_table32) / sizeof(bridge_control_table32[0]);
1116
1117 static LIST_HEAD(, bridge_softc) bridge_list =
1118 LIST_HEAD_INITIALIZER(bridge_list);
1119
1120 #define BRIDGENAME "bridge"
1121 #define BRIDGES_MAX IF_MAXUNIT
1122 #define BRIDGE_ZONE_MAX_ELEM MIN(IFNETS_MAX, BRIDGES_MAX)
1123
1124 static struct if_clone bridge_cloner =
1125 IF_CLONE_INITIALIZER(BRIDGENAME, bridge_clone_create, bridge_clone_destroy,
1126 0, BRIDGES_MAX);
1127
1128 static int if_bridge_txstart = 0;
1129 SYSCTL_INT(_net_link_bridge, OID_AUTO, txstart, CTLFLAG_RW | CTLFLAG_LOCKED,
1130 &if_bridge_txstart, 0, "Bridge interface uses TXSTART model");
1131
1132 SYSCTL_INT(_net_link_bridge, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
1133 &if_bridge_debug, 0, "Bridge debug flags");
1134
1135 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_level,
1136 CTLFLAG_RW | CTLFLAG_LOCKED,
1137 &if_bridge_log_level, 0, "Bridge log level");
1138
1139 static int if_bridge_output_skip_filters = 1;
1140 SYSCTL_INT(_net_link_bridge, OID_AUTO, output_skip_filters,
1141 CTLFLAG_RW | CTLFLAG_LOCKED,
1142 &if_bridge_output_skip_filters, 0, "Bridge skip output filters");
1143
1144 int bridge_enable_early_input = 1; /* DLIL early input */
1145 SYSCTL_INT(_net_link_bridge, OID_AUTO, enable_early_input,
1146 CTLFLAG_RW | CTLFLAG_LOCKED,
1147 &bridge_enable_early_input, 0,
1148 "Bridge enable early input");
1149
1150 int bridge_allow_lro_num_seg = 1; /* allow LRO_NUM_SEG to keep LRO enabled */
1151 SYSCTL_INT(_net_link_bridge, OID_AUTO, allow_lro_num_seg,
1152 CTLFLAG_RW | CTLFLAG_LOCKED,
1153 &bridge_allow_lro_num_seg, 0,
1154 "Bridge allow LRO_NUM_SEG to keep LRO enabled");
1155
1156 #define BRIDGE_TSO_REDUCE_MSS_FORWARDING_MAX 256
1157 #define BRIDGE_TSO_REDUCE_MSS_FORWARDING_DEFAULT 110
1158 #define BRIDGE_TSO_REDUCE_MSS_TX_MAX 256
1159 #define BRIDGE_TSO_REDUCE_MSS_TX_DEFAULT 0
1160
1161 static u_int if_bridge_tso_reduce_mss_forwarding
1162 = BRIDGE_TSO_REDUCE_MSS_FORWARDING_DEFAULT;
1163 static u_int if_bridge_tso_reduce_mss_tx
1164 = BRIDGE_TSO_REDUCE_MSS_TX_DEFAULT;
1165
1166 static int
bridge_tso_reduce_mss(struct sysctl_req * req,u_int * val,u_int val_max)1167 bridge_tso_reduce_mss(struct sysctl_req *req, u_int * val, u_int val_max)
1168 {
1169 int changed;
1170 int error;
1171 u_int new_value;
1172
1173 error = sysctl_io_number(req, *val, sizeof(*val), &new_value,
1174 &changed);
1175 if (error == 0 && changed != 0) {
1176 if (new_value > val_max) {
1177 return EINVAL;
1178 }
1179 *val = new_value;
1180 }
1181 return error;
1182 }
1183
1184 static int
1185 bridge_tso_reduce_mss_forwarding_sysctl SYSCTL_HANDLER_ARGS
1186 {
1187 return bridge_tso_reduce_mss(req, &if_bridge_tso_reduce_mss_forwarding,
1188 BRIDGE_TSO_REDUCE_MSS_FORWARDING_MAX);
1189 }
1190
1191 SYSCTL_PROC(_net_link_bridge, OID_AUTO, tso_reduce_mss_forwarding,
1192 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1193 0, 0, bridge_tso_reduce_mss_forwarding_sysctl, "IU",
1194 "Bridge tso reduce mss when forwarding");
1195
1196 static int
1197 bridge_tso_reduce_mss_tx_sysctl SYSCTL_HANDLER_ARGS
1198 {
1199 return bridge_tso_reduce_mss(req, &if_bridge_tso_reduce_mss_tx,
1200 BRIDGE_TSO_REDUCE_MSS_TX_MAX);
1201 }
1202
1203 SYSCTL_PROC(_net_link_bridge, OID_AUTO, tso_reduce_mss_tx,
1204 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1205 0, 0, bridge_tso_reduce_mss_tx_sysctl, "IU",
1206 "Bridge tso reduce mss on transmit");
1207
1208 #if DEBUG || DEVELOPMENT
1209 /*
1210 * net.link.bridge.reduce_tso_mtu
1211 * - when non-zero, the bridge overrides the interface TSO MTU to a lower
1212 * value (i.e. 16K) to enable testing the "use GSO instead" path
1213 */
1214 static int if_bridge_reduce_tso_mtu = 0;
1215 SYSCTL_INT(_net_link_bridge, OID_AUTO, reduce_tso_mtu,
1216 CTLFLAG_RW | CTLFLAG_LOCKED,
1217 &if_bridge_reduce_tso_mtu, 0, "Bridge interface reduce TSO MTU");
1218
1219 #endif /* DEBUG || DEVELOPMENT */
1220
1221 static void brlog_ether_header(struct ether_header *);
1222 static void brlog_mbuf_data(mbuf_t, size_t, size_t);
1223 static void brlog_mbuf_pkthdr(mbuf_t, const char *, const char *);
1224 static void brlog_mbuf(mbuf_t, const char *, const char *);
1225 static void brlog_link(struct bridge_softc * sc);
1226
1227 #if BRIDGE_LOCK_DEBUG
1228 static void bridge_lock(struct bridge_softc *);
1229 static void bridge_unlock(struct bridge_softc *);
1230 static int bridge_lock2ref(struct bridge_softc *);
1231 static void bridge_unref(struct bridge_softc *);
1232 static void bridge_xlock(struct bridge_softc *);
1233 static void bridge_xdrop(struct bridge_softc *);
1234
1235 #define DECL_RETURN_ADDR(v) void * __single v = __unsafe_forge_single(void *, __builtin_return_address(0))
1236
1237 static void
bridge_lock(struct bridge_softc * sc)1238 bridge_lock(struct bridge_softc *sc)
1239 {
1240 DECL_RETURN_ADDR(lr_saved);
1241
1242 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1243
1244 _BRIDGE_LOCK(sc);
1245
1246 sc->lock_lr[sc->next_lock_lr] = lr_saved;
1247 sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1248 }
1249
1250 static void
bridge_unlock(struct bridge_softc * sc)1251 bridge_unlock(struct bridge_softc *sc)
1252 {
1253 DECL_RETURN_ADDR(lr_saved);
1254
1255 BRIDGE_LOCK_ASSERT_HELD(sc);
1256
1257 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1258 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1259
1260 _BRIDGE_UNLOCK(sc);
1261 }
1262
1263 static int
bridge_lock2ref(struct bridge_softc * sc)1264 bridge_lock2ref(struct bridge_softc *sc)
1265 {
1266 int error = 0;
1267 DECL_RETURN_ADDR(lr_saved);
1268
1269 BRIDGE_LOCK_ASSERT_HELD(sc);
1270
1271 if (sc->sc_iflist_xcnt > 0) {
1272 error = EBUSY;
1273 } else {
1274 sc->sc_iflist_ref++;
1275 }
1276
1277 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1278 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1279
1280 _BRIDGE_UNLOCK(sc);
1281
1282 return error;
1283 }
1284
1285 static void
bridge_unref(struct bridge_softc * sc)1286 bridge_unref(struct bridge_softc *sc)
1287 {
1288 DECL_RETURN_ADDR(lr_saved);
1289
1290 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1291
1292 _BRIDGE_LOCK(sc);
1293 sc->lock_lr[sc->next_lock_lr] = lr_saved;
1294 sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1295
1296 sc->sc_iflist_ref--;
1297
1298 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1299 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1300 if ((sc->sc_iflist_xcnt > 0) && (sc->sc_iflist_ref == 0)) {
1301 _BRIDGE_UNLOCK(sc);
1302 wakeup(&sc->sc_cv);
1303 } else {
1304 _BRIDGE_UNLOCK(sc);
1305 }
1306 }
1307
1308 static void
bridge_xlock(struct bridge_softc * sc)1309 bridge_xlock(struct bridge_softc *sc)
1310 {
1311 DECL_RETURN_ADDR(lr_saved);
1312
1313 BRIDGE_LOCK_ASSERT_HELD(sc);
1314
1315 sc->sc_iflist_xcnt++;
1316 while (sc->sc_iflist_ref > 0) {
1317 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1318 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1319
1320 msleep(&sc->sc_cv, &sc->sc_mtx, PZERO, "BRIDGE_XLOCK", NULL);
1321
1322 sc->lock_lr[sc->next_lock_lr] = lr_saved;
1323 sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1324 }
1325 }
1326
1327 #undef DECL_RETURN_ADDR
1328
1329 static void
bridge_xdrop(struct bridge_softc * sc)1330 bridge_xdrop(struct bridge_softc *sc)
1331 {
1332 BRIDGE_LOCK_ASSERT_HELD(sc);
1333
1334 sc->sc_iflist_xcnt--;
1335 }
1336
1337 #endif /* BRIDGE_LOCK_DEBUG */
1338
1339 static void
brlog_mbuf_pkthdr(mbuf_t m,const char * prefix,const char * suffix)1340 brlog_mbuf_pkthdr(mbuf_t m, const char *prefix, const char *suffix)
1341 {
1342 if (m) {
1343 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1344 "%spktlen: %u rcvif: 0x%llx header: 0x%llx nextpkt: 0x%llx%s",
1345 prefix ? prefix : "", (unsigned int)mbuf_pkthdr_len(m),
1346 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_rcvif(m)),
1347 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_header(m)),
1348 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_nextpkt(m)),
1349 suffix ? suffix : "");
1350 } else {
1351 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0, "%s<NULL>%s", prefix, suffix);
1352 }
1353 }
1354
1355 static void
brlog_mbuf(mbuf_t m,const char * prefix,const char * suffix)1356 brlog_mbuf(mbuf_t m, const char *prefix, const char *suffix)
1357 {
1358 if (m) {
1359 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1360 "%s0x%llx type: %u flags: 0x%x len: %u data: 0x%llx "
1361 "maxlen: %u datastart: 0x%llx next: 0x%llx%s",
1362 prefix ? prefix : "", (uint64_t)VM_KERNEL_ADDRPERM(m),
1363 mbuf_type(m), mbuf_flags(m), (unsigned int)mbuf_len(m),
1364 (uint64_t)VM_KERNEL_ADDRPERM(mtod(m, void *)),
1365 (unsigned int)mbuf_maxlen(m),
1366 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
1367 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_next(m)),
1368 !suffix || (mbuf_flags(m) & MBUF_PKTHDR) ? "" : suffix);
1369 if ((mbuf_flags(m) & MBUF_PKTHDR)) {
1370 brlog_mbuf_pkthdr(m, "", suffix);
1371 }
1372 } else {
1373 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0, "%s<NULL>%s", prefix, suffix);
1374 }
1375 }
1376
1377 static void
brlog_mbuf_data(mbuf_t m,size_t offset,size_t len)1378 brlog_mbuf_data(mbuf_t m, size_t offset, size_t len)
1379 {
1380 mbuf_t n;
1381 size_t i, j;
1382 size_t pktlen, mlen, maxlen;
1383 unsigned char *ptr;
1384
1385 pktlen = mbuf_pkthdr_len(m);
1386
1387 if (offset > pktlen) {
1388 return;
1389 }
1390
1391 maxlen = (pktlen - offset > len) ? len : pktlen - offset;
1392 n = m;
1393 mlen = mbuf_len(n);
1394 ptr = mtod(n, unsigned char *);
1395 for (i = 0, j = 0; i < maxlen; i++, j++) {
1396 if (j >= mlen) {
1397 n = mbuf_next(n);
1398 if (n == 0) {
1399 break;
1400 }
1401 ptr = mtod(n, unsigned char *);
1402 mlen = mbuf_len(n);
1403 j = 0;
1404 }
1405 if (i >= offset) {
1406 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1407 "%02x%s", ptr[j], i % 2 ? " " : "");
1408 }
1409 }
1410 }
1411
1412 static void
brlog_ether_header(struct ether_header * eh)1413 brlog_ether_header(struct ether_header *eh)
1414 {
1415 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1416 "%02x:%02x:%02x:%02x:%02x:%02x > "
1417 "%02x:%02x:%02x:%02x:%02x:%02x 0x%04x ",
1418 eh->ether_shost[0], eh->ether_shost[1], eh->ether_shost[2],
1419 eh->ether_shost[3], eh->ether_shost[4], eh->ether_shost[5],
1420 eh->ether_dhost[0], eh->ether_dhost[1], eh->ether_dhost[2],
1421 eh->ether_dhost[3], eh->ether_dhost[4], eh->ether_dhost[5],
1422 ntohs(eh->ether_type));
1423 }
1424
1425 static char *
ether_ntop(char * __sized_by (len)buf,size_t len,const u_char ap[ETHER_ADDR_LEN])1426 ether_ntop(char * __sized_by(len) buf, size_t len, const u_char ap[ETHER_ADDR_LEN])
1427 {
1428 snprintf(buf, len, "%02x:%02x:%02x:%02x:%02x:%02x",
1429 ap[0], ap[1], ap[2], ap[3], ap[4], ap[5]);
1430
1431 return buf;
1432 }
1433
1434 static void
brlog_link(struct bridge_softc * sc)1435 brlog_link(struct bridge_softc * sc)
1436 {
1437 int i;
1438 uint32_t sdl_buffer[(offsetof(struct sockaddr_dl, sdl_data) +
1439 IFNAMSIZ + ETHER_ADDR_LEN)];
1440 struct sockaddr_dl *sdl = SDL((uint8_t*)&sdl_buffer); /* SDL requires byte pointer */
1441 const u_char * lladdr;
1442 char lladdr_str[48];
1443
1444 memset(sdl_buffer, 0, sizeof(sdl_buffer));
1445 sdl->sdl_family = AF_LINK;
1446 sdl->sdl_nlen = strbuflen(sc->sc_if_xname);
1447 sdl->sdl_alen = ETHER_ADDR_LEN;
1448 sdl->sdl_len = offsetof(struct sockaddr_dl, sdl_data);
1449 memcpy(sdl->sdl_data, sc->sc_if_xname, sdl->sdl_nlen);
1450 memcpy(LLADDR(sdl), sc->sc_defaddr, ETHER_ADDR_LEN);
1451 lladdr_str[0] = '\0';
1452 for (i = 0, lladdr = CONST_LLADDR(sdl);
1453 i < sdl->sdl_alen;
1454 i++, lladdr++) {
1455 char byte_str[4];
1456
1457 snprintf(byte_str, sizeof(byte_str), "%s%x", i ? ":" : "",
1458 *lladdr);
1459 strbufcat(lladdr_str, byte_str);
1460 }
1461 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1462 "%s sdl len %d index %d family %d type 0x%x nlen %d alen %d"
1463 " slen %d addr %s", sc->sc_if_xname,
1464 sdl->sdl_len, sdl->sdl_index,
1465 sdl->sdl_family, sdl->sdl_type, sdl->sdl_nlen,
1466 sdl->sdl_alen, sdl->sdl_slen, lladdr_str);
1467 }
1468
1469 static int
_mbuf_get_tso_mss(mbuf_t m)1470 _mbuf_get_tso_mss(mbuf_t m)
1471 {
1472 int mss = 0;
1473
1474 if ((m->m_pkthdr.csum_flags & _TSO_CSUM) != 0) {
1475 mss = m->m_pkthdr.tso_segsz;
1476 }
1477 return mss;
1478 }
1479
1480 /*
1481 * bridgeattach:
1482 *
1483 * Pseudo-device attach routine.
1484 */
1485 __private_extern__ int
bridgeattach(int n)1486 bridgeattach(int n)
1487 {
1488 #pragma unused(n)
1489 int error;
1490
1491 LIST_INIT(&bridge_list);
1492
1493 #if BRIDGESTP
1494 bstp_sys_init();
1495 #endif /* BRIDGESTP */
1496
1497 error = if_clone_attach(&bridge_cloner);
1498 if (error != 0) {
1499 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_clone_attach failed %d", error);
1500 }
1501 return error;
1502 }
1503
1504 static void
_mbuf_adjust_pkthdr_and_data(mbuf_t m,int len)1505 _mbuf_adjust_pkthdr_and_data(mbuf_t m, int len)
1506 {
1507 mbuf_setdata(m, mtodo(m, len), mbuf_len(m) - len);
1508 mbuf_pkthdr_adjustlen(m, -len);
1509 }
1510
1511 static errno_t
bridge_ifnet_set_attrs(struct ifnet * ifp)1512 bridge_ifnet_set_attrs(struct ifnet * ifp)
1513 {
1514 errno_t error;
1515
1516 error = ifnet_set_mtu(ifp, ETHERMTU);
1517 if (error != 0) {
1518 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_mtu failed %d", error);
1519 goto done;
1520 }
1521 error = ifnet_set_addrlen(ifp, ETHER_ADDR_LEN);
1522 if (error != 0) {
1523 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_addrlen failed %d", error);
1524 goto done;
1525 }
1526 error = ifnet_set_hdrlen(ifp, ETHER_HDR_LEN);
1527 if (error != 0) {
1528 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_hdrlen failed %d", error);
1529 goto done;
1530 }
1531 error = ifnet_set_flags(ifp,
1532 IFF_BROADCAST | IFF_SIMPLEX | IFF_NOTRAILERS | IFF_MULTICAST,
1533 0xffff);
1534
1535 if (error != 0) {
1536 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_flags failed %d", error);
1537 goto done;
1538 }
1539 done:
1540 return error;
1541 }
1542
1543 static void
bridge_interface_proto_attach_changed(ifnet_t ifp)1544 bridge_interface_proto_attach_changed(ifnet_t ifp)
1545 {
1546 uint32_t proto_count;
1547 struct bridge_softc * __single sc = ifp->if_softc;
1548
1549 proto_count = if_get_protolist(ifp, NULL, 0);
1550 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
1551 "%s: proto count %d", ifp->if_xname, proto_count);
1552
1553 if (sc == NULL) {
1554 return;
1555 }
1556 BRIDGE_LOCK(sc);
1557 if ((sc->sc_flags & SCF_DETACHING) != 0) {
1558 BRIDGE_UNLOCK(sc);
1559 return;
1560 }
1561 if (proto_count >= 2) {
1562 /* an upper layer protocol is attached */
1563 sc->sc_flags |= SCF_PROTO_ATTACHED;
1564 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
1565 "%s: setting SCF_PROTO_ATTACHED", ifp->if_xname);
1566 } else {
1567 /* an upper layer protocol was detached */
1568 sc->sc_flags &= ~SCF_PROTO_ATTACHED;
1569 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
1570 "%s: clearing SCF_PROTO_ATTACHED", ifp->if_xname);
1571 }
1572 BRIDGE_UNLOCK(sc);
1573 }
1574
1575 static void
bridge_interface_event(struct ifnet * ifp,__unused protocol_family_t protocol,const struct kev_msg * event)1576 bridge_interface_event(struct ifnet * ifp,
1577 __unused protocol_family_t protocol, const struct kev_msg * event)
1578 {
1579 int event_code;
1580
1581 if (event->vendor_code != KEV_VENDOR_APPLE
1582 || event->kev_class != KEV_NETWORK_CLASS
1583 || event->kev_subclass != KEV_DL_SUBCLASS) {
1584 return;
1585 }
1586 event_code = event->event_code;
1587 switch (event_code) {
1588 case KEV_DL_PROTO_DETACHED:
1589 case KEV_DL_PROTO_ATTACHED:
1590 bridge_interface_proto_attach_changed(ifp);
1591 break;
1592 default:
1593 break;
1594 }
1595 return;
1596 }
1597
1598 /*
1599 * Function: bridge_interface_attach_protocol
1600 * Purpose:
1601 * Attach a protocol to the bridge to get events on the interface,
1602 * in particular, whether protocols are attached/detached.
1603 */
1604 static int
bridge_interface_attach_protocol(ifnet_t ifp)1605 bridge_interface_attach_protocol(ifnet_t ifp)
1606 {
1607 int error;
1608 struct ifnet_attach_proto_param_v2 reg;
1609
1610 bzero(®, sizeof(reg));
1611 reg.event = bridge_interface_event;
1612
1613 error = ifnet_attach_protocol_v2(ifp, PF_BRIDGE, ®);
1614 if (error != 0) {
1615 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
1616 "%s: ifnet_attach_protocol failed, %d",
1617 ifp->if_xname, error);
1618 }
1619 return error;
1620 }
1621
1622 static void
bridge_interface_detach_protocol(ifnet_t ifp)1623 bridge_interface_detach_protocol(ifnet_t ifp)
1624 {
1625 (void)ifnet_detach_protocol(ifp, PF_BRIDGE);
1626 }
1627
1628 /*
1629 * bridge_clone_create:
1630 *
1631 * Create a new bridge instance.
1632 */
1633 static int
bridge_clone_create(struct if_clone * ifc,uint32_t unit,void * params)1634 bridge_clone_create(struct if_clone *ifc, uint32_t unit, void *params)
1635 {
1636 #pragma unused(params)
1637 ifnet_ref_t ifp = NULL;
1638 struct bridge_softc *sc = NULL;
1639 struct bridge_softc *sc2 = NULL;
1640 struct ifnet_init_eparams init_params;
1641 errno_t error = 0;
1642 uint8_t eth_hostid[ETHER_ADDR_LEN];
1643 int fb, retry, has_hostid;
1644
1645 sc = kalloc_type(struct bridge_softc, Z_WAITOK_ZERO_NOFAIL);
1646 lck_mtx_init(&sc->sc_mtx, &bridge_lock_grp, &bridge_lock_attr);
1647 sc->sc_brtmax = BRIDGE_RTABLE_MAX;
1648 sc->sc_mne_max = BRIDGE_MAC_NAT_ENTRY_MAX;
1649 sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
1650 sc->sc_filter_flags = 0;
1651
1652 TAILQ_INIT(&sc->sc_iflist);
1653
1654 /* use the interface name as the unique id for ifp recycle */
1655 snprintf(sc->sc_if_xname, sizeof(sc->sc_if_xname), "%s%d",
1656 ifc->ifc_name, unit);
1657 bzero(&init_params, sizeof(init_params));
1658 init_params.ver = IFNET_INIT_CURRENT_VERSION;
1659 init_params.len = sizeof(init_params);
1660 /* Initialize our routing table. */
1661 error = bridge_rtable_init(sc);
1662 if (error != 0) {
1663 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_rtable_init failed %d", error);
1664 goto done;
1665 }
1666 TAILQ_INIT(&sc->sc_spanlist);
1667 if (if_bridge_txstart) {
1668 init_params.start = bridge_start;
1669 } else {
1670 init_params.flags = IFNET_INIT_LEGACY;
1671 init_params.output = bridge_output;
1672 }
1673 init_params.uniqueid_len = strbuflen(sc->sc_if_xname);
1674 init_params.uniqueid = sc->sc_if_xname;
1675 init_params.sndq_maxlen = IFQ_MAXLEN;
1676 init_params.name = __unsafe_null_terminated_from_indexable(ifc->ifc_name);
1677 init_params.unit = unit;
1678 init_params.family = IFNET_FAMILY_ETHERNET;
1679 init_params.type = IFT_BRIDGE;
1680 init_params.demux = ether_demux;
1681 init_params.add_proto = ether_add_proto;
1682 init_params.del_proto = ether_del_proto;
1683 init_params.check_multi = ether_check_multi;
1684 init_params.framer_extended = ether_frameout_extended;
1685 init_params.softc = sc;
1686 init_params.ioctl = bridge_ioctl;
1687 init_params.detach = bridge_detach;
1688 init_params.broadcast_addr = etherbroadcastaddr;
1689 init_params.broadcast_len = ETHER_ADDR_LEN;
1690
1691 error = ifnet_allocate_extended(&init_params, &ifp);
1692 if (error != 0) {
1693 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_allocate failed %d", error);
1694 goto done;
1695 }
1696 LIST_INIT(&sc->sc_mne_list);
1697 LIST_INIT(&sc->sc_mne_list_v6);
1698 sc->sc_ifp = ifp;
1699 error = bridge_ifnet_set_attrs(ifp);
1700 if (error != 0) {
1701 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_ifnet_set_attrs failed %d",
1702 error);
1703 goto done;
1704 }
1705 /*
1706 * Generate an ethernet address with a locally administered address.
1707 *
1708 * Since we are using random ethernet addresses for the bridge, it is
1709 * possible that we might have address collisions, so make sure that
1710 * this hardware address isn't already in use on another bridge.
1711 * The first try uses the "hostid" and falls back to read_frandom();
1712 * for "hostid", we use the MAC address of the first-encountered
1713 * Ethernet-type interface that is currently configured.
1714 */
1715 fb = 0;
1716 has_hostid = (uuid_get_ethernet(ð_hostid[0]) == 0);
1717 for (retry = 1; retry != 0;) {
1718 if (fb || has_hostid == 0) {
1719 read_frandom(&sc->sc_defaddr, ETHER_ADDR_LEN);
1720 sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
1721 sc->sc_defaddr[0] |= 2; /* set the LAA bit */
1722 } else {
1723 bcopy(ð_hostid[0], &sc->sc_defaddr,
1724 ETHER_ADDR_LEN);
1725 sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
1726 sc->sc_defaddr[0] |= 2; /* set the LAA bit */
1727 sc->sc_defaddr[3] = /* stir it up a bit */
1728 ((sc->sc_defaddr[3] & 0x0f) << 4) |
1729 ((sc->sc_defaddr[3] & 0xf0) >> 4);
1730 /*
1731 * Mix in the LSB as it's actually pretty significant,
1732 * see rdar://14076061
1733 */
1734 sc->sc_defaddr[4] =
1735 (((sc->sc_defaddr[4] & 0x0f) << 4) |
1736 ((sc->sc_defaddr[4] & 0xf0) >> 4)) ^
1737 sc->sc_defaddr[5];
1738 sc->sc_defaddr[5] = ifp->if_unit & 0xff;
1739 }
1740
1741 fb = 1;
1742 retry = 0;
1743 lck_mtx_lock(&bridge_list_mtx);
1744 LIST_FOREACH(sc2, &bridge_list, sc_list) {
1745 if (_ether_cmp(sc->sc_defaddr,
1746 IF_LLADDR(sc2->sc_ifp)) == 0) {
1747 retry = 1;
1748 }
1749 }
1750 lck_mtx_unlock(&bridge_list_mtx);
1751 }
1752
1753 sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
1754
1755 if (BRIDGE_DBGF_ENABLED(BR_DBGF_LIFECYCLE)) {
1756 brlog_link(sc);
1757 }
1758 error = ifnet_attach(ifp, NULL);
1759 if (error != 0) {
1760 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_attach failed %d", error);
1761 goto done;
1762 }
1763 (void)bridge_interface_attach_protocol(ifp);
1764
1765 error = ifnet_set_lladdr_and_type(ifp, sc->sc_defaddr, ETHER_ADDR_LEN,
1766 IFT_ETHER);
1767 if (error != 0) {
1768 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr_and_type failed %d",
1769 error);
1770 goto done;
1771 }
1772
1773 ifnet_set_offload(ifp,
1774 IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP |
1775 IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6 | IFNET_MULTIPAGES);
1776 error = bridge_set_tso(sc);
1777 if (error != 0) {
1778 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_set_tso failed %d", error);
1779 goto done;
1780 }
1781 #if BRIDGESTP
1782 bstp_attach(&sc->sc_stp, &bridge_ops);
1783 #endif /* BRIDGESTP */
1784
1785 lck_mtx_lock(&bridge_list_mtx);
1786 LIST_INSERT_HEAD(&bridge_list, sc, sc_list);
1787 lck_mtx_unlock(&bridge_list_mtx);
1788
1789 /* attach as ethernet */
1790 error = bpf_attach(ifp, DLT_EN10MB, sizeof(struct ether_header),
1791 NULL, NULL);
1792
1793 done:
1794 if (error != 0) {
1795 if (ifp != NULL) {
1796 bridge_interface_detach_protocol(ifp);
1797 }
1798 BRIDGE_LOG(LOG_NOTICE, 0, "failed error %d", error);
1799 /* TBD: Clean up: sc, sc_rthash etc */
1800 }
1801
1802 return error;
1803 }
1804
1805 /*
1806 * bridge_clone_destroy:
1807 *
1808 * Destroy a bridge instance.
1809 */
1810 static int
bridge_clone_destroy(struct ifnet * ifp)1811 bridge_clone_destroy(struct ifnet *ifp)
1812 {
1813 struct bridge_softc * __single sc = ifp->if_softc;
1814 struct bridge_iflist *bif;
1815 errno_t error;
1816
1817 bridge_interface_detach_protocol(ifp);
1818
1819 BRIDGE_LOCK(sc);
1820 if ((sc->sc_flags & SCF_DETACHING)) {
1821 BRIDGE_UNLOCK(sc);
1822 return 0;
1823 }
1824 sc->sc_flags |= SCF_DETACHING;
1825
1826 bridge_ifstop(ifp, 1);
1827
1828 bridge_cancel_delayed_call(&sc->sc_resize_call);
1829
1830 bridge_cleanup_delayed_call(&sc->sc_resize_call);
1831 bridge_cleanup_delayed_call(&sc->sc_aging_timer);
1832
1833 error = ifnet_set_flags(ifp, 0, IFF_UP);
1834 if (error != 0) {
1835 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_flags failed %d", error);
1836 }
1837
1838 while ((bif = TAILQ_FIRST(&sc->sc_iflist)) != NULL) {
1839 bridge_delete_member(sc, bif);
1840 }
1841
1842 while ((bif = TAILQ_FIRST(&sc->sc_spanlist)) != NULL) {
1843 bridge_delete_span(sc, bif);
1844 }
1845 BRIDGE_UNLOCK(sc);
1846
1847 error = ifnet_detach(ifp);
1848 if (error != 0) {
1849 panic("%s (%d): ifnet_detach(%p) failed %d",
1850 __func__, __LINE__, ifp, error);
1851 }
1852 return 0;
1853 }
1854
1855 #define DRVSPEC do { \
1856 if (ifd->ifd_cmd >= bridge_control_table_size) { \
1857 error = EINVAL; \
1858 break; \
1859 } \
1860 bc = &bridge_control_table[ifd->ifd_cmd]; \
1861 \
1862 if (cmd == SIOCGDRVSPEC && \
1863 (bc->bc_flags & BC_F_COPYOUT) == 0) { \
1864 error = EINVAL; \
1865 break; \
1866 } else if (cmd == SIOCSDRVSPEC && \
1867 (bc->bc_flags & BC_F_COPYOUT) != 0) { \
1868 error = EINVAL; \
1869 break; \
1870 } \
1871 \
1872 if (bc->bc_flags & BC_F_SUSER) { \
1873 error = kauth_authorize_generic(kauth_cred_get(), \
1874 KAUTH_GENERIC_ISSUSER); \
1875 if (error) \
1876 break; \
1877 } \
1878 \
1879 if (ifd->ifd_len != bc->bc_argsize || \
1880 ifd->ifd_len > sizeof (args)) { \
1881 error = EINVAL; \
1882 break; \
1883 } \
1884 \
1885 bzero(&args, sizeof (args)); \
1886 if (bc->bc_flags & BC_F_COPYIN) { \
1887 error = copyin(ifd->ifd_data, &args, ifd->ifd_len); \
1888 if (error) \
1889 break; \
1890 } \
1891 \
1892 BRIDGE_LOCK(sc); \
1893 error = (*bc->bc_func)(sc, &args, sizeof(args)); \
1894 BRIDGE_UNLOCK(sc); \
1895 if (error) \
1896 break; \
1897 \
1898 if (bc->bc_flags & BC_F_COPYOUT) \
1899 error = copyout(&args, ifd->ifd_data, ifd->ifd_len); \
1900 } while (0)
1901
1902 static boolean_t
interface_needs_input_broadcast(struct ifnet * ifp)1903 interface_needs_input_broadcast(struct ifnet * ifp)
1904 {
1905 /*
1906 * Selectively enable input broadcast only when necessary.
1907 * The bridge interface itself attaches a fake protocol
1908 * so checking for at least two protocols means that the
1909 * interface is being used for something besides bridging
1910 * and needs to see broadcast packets from other members.
1911 */
1912 return if_get_protolist(ifp, NULL, 0) >= 2;
1913 }
1914
1915 static boolean_t
bif_set_input_broadcast(struct bridge_iflist * bif,boolean_t input_broadcast)1916 bif_set_input_broadcast(struct bridge_iflist * bif, boolean_t input_broadcast)
1917 {
1918 boolean_t old_input_broadcast;
1919
1920 old_input_broadcast = (bif->bif_flags & BIFF_INPUT_BROADCAST) != 0;
1921 if (input_broadcast) {
1922 bif->bif_flags |= BIFF_INPUT_BROADCAST;
1923 } else {
1924 bif->bif_flags &= ~BIFF_INPUT_BROADCAST;
1925 }
1926 return old_input_broadcast != input_broadcast;
1927 }
1928
1929 /*
1930 * bridge_ioctl:
1931 *
1932 * Handle a control request from the operator.
1933 */
1934 static errno_t
bridge_ioctl(struct ifnet * ifp,u_long cmd,void * __sized_by (IOCPARM_LEN (cmd))data)1935 bridge_ioctl(struct ifnet *ifp, u_long cmd, void *__sized_by(IOCPARM_LEN(cmd)) data)
1936 {
1937 struct bridge_softc * __single sc = ifp->if_softc;
1938 struct ifreq *ifr = (struct ifreq *)data;
1939 struct bridge_iflist *bif;
1940 int error = 0;
1941
1942 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1943
1944 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_IOCTL,
1945 "ifp %s cmd 0x%08lx (%c%c [%lu] %c %lu)",
1946 ifp->if_xname, cmd, (cmd & IOC_IN) ? 'I' : ' ',
1947 (cmd & IOC_OUT) ? 'O' : ' ', IOCPARM_LEN(cmd),
1948 (char)IOCGROUP(cmd), cmd & 0xff);
1949
1950 switch (cmd) {
1951 case SIOCSIFADDR:
1952 case SIOCAIFADDR:
1953 ifnet_set_flags(ifp, IFF_UP, IFF_UP);
1954 break;
1955
1956 case SIOCGIFMEDIA32:
1957 case SIOCGIFMEDIA64: {
1958 // cast to 32bit version to work within bounds with 32bit userspace
1959 struct ifmediareq32 *ifmr = (struct ifmediareq32 *)data;
1960 user_addr_t user_addr;
1961
1962 user_addr = (cmd == SIOCGIFMEDIA64) ?
1963 ((struct ifmediareq64 *)data)->ifmu_ulist :
1964 CAST_USER_ADDR_T(((struct ifmediareq32 *)data)->ifmu_ulist);
1965
1966 ifmr->ifm_status = IFM_AVALID;
1967 ifmr->ifm_mask = 0;
1968 ifmr->ifm_count = 1;
1969
1970 BRIDGE_LOCK(sc);
1971 if (!(sc->sc_flags & SCF_DETACHING) &&
1972 (sc->sc_flags & SCF_MEDIA_ACTIVE)) {
1973 ifmr->ifm_status |= IFM_ACTIVE;
1974 ifmr->ifm_active = ifmr->ifm_current =
1975 IFM_ETHER | IFM_AUTO;
1976 } else {
1977 ifmr->ifm_active = ifmr->ifm_current = IFM_NONE;
1978 }
1979 BRIDGE_UNLOCK(sc);
1980
1981 if (user_addr != USER_ADDR_NULL) {
1982 error = copyout(&ifmr->ifm_current, user_addr,
1983 sizeof(int));
1984 }
1985 break;
1986 }
1987
1988 case SIOCADDMULTI:
1989 case SIOCDELMULTI:
1990 break;
1991
1992 case SIOCSDRVSPEC32:
1993 case SIOCGDRVSPEC32: {
1994 union {
1995 struct ifbreq ifbreq;
1996 struct ifbifconf32 ifbifconf;
1997 struct ifbareq32 ifbareq;
1998 struct ifbaconf32 ifbaconf;
1999 struct ifbrparam ifbrparam;
2000 struct ifbropreq32 ifbropreq;
2001 } args;
2002 struct ifdrv32 *ifd = (struct ifdrv32 *)data;
2003 const struct bridge_control *bridge_control_table =
2004 bridge_control_table32, *bc;
2005
2006 DRVSPEC;
2007
2008 break;
2009 }
2010 case SIOCSDRVSPEC64:
2011 case SIOCGDRVSPEC64: {
2012 union {
2013 struct ifbreq ifbreq;
2014 struct ifbifconf64 ifbifconf;
2015 struct ifbareq64 ifbareq;
2016 struct ifbaconf64 ifbaconf;
2017 struct ifbrparam ifbrparam;
2018 struct ifbropreq64 ifbropreq;
2019 } args;
2020 struct ifdrv64 *ifd = (struct ifdrv64 *)data;
2021 const struct bridge_control *bridge_control_table =
2022 bridge_control_table64, *bc;
2023
2024 DRVSPEC;
2025
2026 break;
2027 }
2028
2029 case SIOCSIFFLAGS:
2030 if (!(ifp->if_flags & IFF_UP) &&
2031 (ifp->if_flags & IFF_RUNNING)) {
2032 /*
2033 * If interface is marked down and it is running,
2034 * then stop and disable it.
2035 */
2036 BRIDGE_LOCK(sc);
2037 bridge_ifstop(ifp, 1);
2038 BRIDGE_UNLOCK(sc);
2039 } else if ((ifp->if_flags & IFF_UP) &&
2040 !(ifp->if_flags & IFF_RUNNING)) {
2041 /*
2042 * If interface is marked up and it is stopped, then
2043 * start it.
2044 */
2045 BRIDGE_LOCK(sc);
2046 error = bridge_init(ifp);
2047 BRIDGE_UNLOCK(sc);
2048 }
2049 break;
2050
2051 case SIOCSIFLLADDR:
2052 error = ifnet_set_lladdr(ifp, ifr->ifr_addr.sa_data,
2053 ifr->ifr_addr.sa_len);
2054 if (error != 0) {
2055 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_IOCTL,
2056 "%s SIOCSIFLLADDR error %d", ifp->if_xname,
2057 error);
2058 }
2059 break;
2060
2061 case SIOCSIFMTU:
2062 if (ifr->ifr_mtu < 576) {
2063 error = EINVAL;
2064 break;
2065 }
2066 BRIDGE_LOCK(sc);
2067 if (TAILQ_EMPTY(&sc->sc_iflist)) {
2068 sc->sc_ifp->if_mtu = ifr->ifr_mtu;
2069 BRIDGE_UNLOCK(sc);
2070 break;
2071 }
2072 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2073 if (bif->bif_ifp->if_mtu != (unsigned)ifr->ifr_mtu) {
2074 BRIDGE_LOG(LOG_NOTICE, 0,
2075 "%s invalid MTU: %u(%s) != %d",
2076 sc->sc_ifp->if_xname,
2077 bif->bif_ifp->if_mtu,
2078 bif->bif_ifp->if_xname, ifr->ifr_mtu);
2079 error = EINVAL;
2080 break;
2081 }
2082 }
2083 if (!error) {
2084 sc->sc_ifp->if_mtu = ifr->ifr_mtu;
2085 }
2086 BRIDGE_UNLOCK(sc);
2087 break;
2088
2089 default:
2090 error = ether_ioctl(ifp, cmd, data);
2091 if (error != 0 && error != EOPNOTSUPP) {
2092 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_IOCTL,
2093 "ifp %s cmd 0x%08lx "
2094 "(%c%c [%lu] %c %lu) failed error: %d",
2095 ifp->if_xname, cmd,
2096 (cmd & IOC_IN) ? 'I' : ' ',
2097 (cmd & IOC_OUT) ? 'O' : ' ',
2098 IOCPARM_LEN(cmd), (char)IOCGROUP(cmd),
2099 cmd & 0xff, error);
2100 }
2101 break;
2102 }
2103 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2104
2105 return error;
2106 }
2107
2108 #if HAS_IF_CAP
2109 /*
2110 * bridge_mutecaps:
2111 *
2112 * Clear or restore unwanted capabilities on the member interface
2113 */
2114 static void
bridge_mutecaps(struct bridge_softc * sc)2115 bridge_mutecaps(struct bridge_softc *sc)
2116 {
2117 struct bridge_iflist *bif;
2118 int enabled, mask;
2119
2120 /* Initial bitmask of capabilities to test */
2121 mask = BRIDGE_IFCAPS_MASK;
2122
2123 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2124 /* Every member must support it or its disabled */
2125 mask &= bif->bif_savedcaps;
2126 }
2127
2128 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2129 enabled = bif->bif_ifp->if_capenable;
2130 enabled &= ~BRIDGE_IFCAPS_STRIP;
2131 /* strip off mask bits and enable them again if allowed */
2132 enabled &= ~BRIDGE_IFCAPS_MASK;
2133 enabled |= mask;
2134
2135 bridge_set_ifcap(sc, bif, enabled);
2136 }
2137 }
2138
2139 static void
bridge_set_ifcap(struct bridge_softc * sc,struct bridge_iflist * bif,int set)2140 bridge_set_ifcap(struct bridge_softc *sc, struct bridge_iflist *bif, int set)
2141 {
2142 struct ifnet *ifp = bif->bif_ifp;
2143 struct ifreq ifr;
2144 int error;
2145
2146 bzero(&ifr, sizeof(ifr));
2147 ifr.ifr_reqcap = set;
2148
2149 if (ifp->if_capenable != set) {
2150 IFF_LOCKGIANT(ifp);
2151 error = (*ifp->if_ioctl)(ifp, SIOCSIFCAP, (caddr_t)&ifr);
2152 IFF_UNLOCKGIANT(ifp);
2153 if (error) {
2154 BRIDGE_LOG(LOG_NOTICE, 0,
2155 "%s error setting interface capabilities on %s",
2156 sc->sc_ifp->if_xname, ifp->if_xname);
2157 }
2158 }
2159 }
2160 #endif /* HAS_IF_CAP */
2161
2162 static errno_t
siocsifcap(struct ifnet * ifp,uint32_t cap_enable)2163 siocsifcap(struct ifnet * ifp, uint32_t cap_enable)
2164 {
2165 struct ifreq ifr;
2166
2167 bzero(&ifr, sizeof(ifr));
2168 ifr.ifr_reqcap = cap_enable;
2169 return ifnet_ioctl(ifp, 0, SIOCSIFCAP, &ifr);
2170 }
2171
2172 static const char *
enable_disable_str(boolean_t enable)2173 enable_disable_str(boolean_t enable)
2174 {
2175 return (const char * __null_terminated)(enable ? "enable" : "disable");
2176 }
2177
2178 static boolean_t
bridge_set_lro(struct ifnet * ifp,boolean_t enable)2179 bridge_set_lro(struct ifnet * ifp, boolean_t enable)
2180 {
2181 uint32_t cap_enable;
2182 uint32_t cap_supported;
2183 boolean_t changed = FALSE;
2184 boolean_t lro_enabled;
2185
2186 cap_supported = ifnet_capabilities_supported(ifp);
2187 if ((cap_supported & IFCAP_LRO) == 0) {
2188 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2189 "%s doesn't support LRO",
2190 ifp->if_xname);
2191 goto done;
2192 }
2193 if (bridge_allow_lro_num_seg != 0 &&
2194 (cap_supported & IFCAP_LRO_NUM_SEG) != 0) {
2195 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2196 "%s supports LRO_NUM_SEG, leaving LRO enabled",
2197 ifp->if_xname);
2198 goto done;
2199 }
2200 cap_enable = ifnet_capabilities_enabled(ifp);
2201 lro_enabled = (cap_enable & IFCAP_LRO) != 0;
2202 if (lro_enabled != enable) {
2203 errno_t error;
2204
2205 if (enable) {
2206 cap_enable |= IFCAP_LRO;
2207 } else {
2208 cap_enable &= ~IFCAP_LRO;
2209 }
2210 error = siocsifcap(ifp, cap_enable);
2211 if (error != 0) {
2212 BRIDGE_LOG(LOG_NOTICE, 0,
2213 "%s %s failed (cap 0x%x) %d",
2214 ifp->if_xname,
2215 enable_disable_str(enable),
2216 cap_enable,
2217 error);
2218 } else {
2219 changed = TRUE;
2220 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2221 "%s %s success (cap 0x%x)",
2222 ifp->if_xname,
2223 enable_disable_str(enable),
2224 cap_enable);
2225 }
2226 }
2227 done:
2228 return changed;
2229 }
2230
2231 static errno_t
bridge_set_tso(struct bridge_softc * sc)2232 bridge_set_tso(struct bridge_softc *sc)
2233 {
2234 struct bridge_iflist *bif;
2235 u_int32_t tso_v4_mtu;
2236 u_int32_t tso_v6_mtu;
2237 ifnet_offload_t offload;
2238 errno_t error = 0;
2239
2240 /* By default, support TSO */
2241 offload = sc->sc_ifp->if_hwassist | IFNET_TSO_IPV4 | IFNET_TSO_IPV6;
2242 tso_v4_mtu = IP_MAXPACKET;
2243 tso_v6_mtu = IP_MAXPACKET;
2244
2245 /* Use the lowest common denominator of the members */
2246 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2247 ifnet_t ifp = bif->bif_ifp;
2248
2249 if (ifp == NULL) {
2250 continue;
2251 }
2252
2253 if (offload & IFNET_TSO_IPV4) {
2254 if (ifp->if_hwassist & IFNET_TSO_IPV4) {
2255 if (tso_v4_mtu > ifp->if_tso_v4_mtu) {
2256 tso_v4_mtu = ifp->if_tso_v4_mtu;
2257 }
2258 } else {
2259 offload &= ~IFNET_TSO_IPV4;
2260 tso_v4_mtu = 0;
2261 }
2262 }
2263 if (offload & IFNET_TSO_IPV6) {
2264 if (ifp->if_hwassist & IFNET_TSO_IPV6) {
2265 if (tso_v6_mtu > ifp->if_tso_v6_mtu) {
2266 tso_v6_mtu = ifp->if_tso_v6_mtu;
2267 }
2268 } else {
2269 offload &= ~IFNET_TSO_IPV6;
2270 tso_v6_mtu = 0;
2271 }
2272 }
2273 }
2274
2275 if (offload != sc->sc_ifp->if_hwassist) {
2276 error = ifnet_set_offload(sc->sc_ifp, offload);
2277 if (error != 0) {
2278 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2279 "ifnet_set_offload(%s, 0x%x) failed %d",
2280 sc->sc_ifp->if_xname, offload, error);
2281 goto done;
2282 }
2283 /*
2284 * For ifnet_set_tso_mtu() sake, the TSO MTU must be at least
2285 * as large as the interface MTU
2286 */
2287 if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV4) {
2288 if (tso_v4_mtu < sc->sc_ifp->if_mtu) {
2289 tso_v4_mtu = sc->sc_ifp->if_mtu;
2290 }
2291 error = ifnet_set_tso_mtu(sc->sc_ifp, AF_INET,
2292 tso_v4_mtu);
2293 if (error != 0) {
2294 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2295 "ifnet_set_tso_mtu(%s, "
2296 "AF_INET, %u) failed %d",
2297 sc->sc_ifp->if_xname,
2298 tso_v4_mtu, error);
2299 goto done;
2300 }
2301 }
2302 if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV6) {
2303 if (tso_v6_mtu < sc->sc_ifp->if_mtu) {
2304 tso_v6_mtu = sc->sc_ifp->if_mtu;
2305 }
2306 error = ifnet_set_tso_mtu(sc->sc_ifp, AF_INET6,
2307 tso_v6_mtu);
2308 if (error != 0) {
2309 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2310 "ifnet_set_tso_mtu(%s, "
2311 "AF_INET6, %u) failed %d",
2312 sc->sc_ifp->if_xname,
2313 tso_v6_mtu, error);
2314 goto done;
2315 }
2316 }
2317 }
2318 done:
2319 return error;
2320 }
2321
2322 static const char *
sanitize_ifname(char * __sized_by (IFNAMSIZ)ifname)2323 sanitize_ifname(char * __sized_by(IFNAMSIZ) ifname)
2324 {
2325 ifname[IFNAMSIZ - 1] = '\0';
2326 return __unsafe_null_terminated_from_indexable(ifname, &ifname[IFNAMSIZ - 1]);
2327 }
2328
2329 /*
2330 * bridge_lookup_member:
2331 *
2332 * Lookup a bridge member interface.
2333 */
2334 static struct bridge_iflist *
bridge_lookup_member(struct bridge_softc * sc,char * __sized_by (IFNAMSIZ)name_unsanitized)2335 bridge_lookup_member(struct bridge_softc *sc, char * __sized_by(IFNAMSIZ) name_unsanitized)
2336 {
2337 struct bridge_iflist *bif;
2338 struct ifnet *ifp;
2339 const char * __null_terminated name = sanitize_ifname(name_unsanitized);
2340
2341 BRIDGE_LOCK_ASSERT_HELD(sc);
2342
2343 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2344 ifp = bif->bif_ifp;
2345 if (strcmp(ifp->if_xname, name) == 0) {
2346 return bif;
2347 }
2348 }
2349
2350 return NULL;
2351 }
2352
2353 /*
2354 * bridge_lookup_member_if:
2355 *
2356 * Lookup a bridge member interface by ifnet*.
2357 */
2358 static struct bridge_iflist *
bridge_lookup_member_if(struct bridge_softc * sc,struct ifnet * member_ifp)2359 bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
2360 {
2361 struct bridge_iflist *bif;
2362
2363 BRIDGE_LOCK_ASSERT_HELD(sc);
2364
2365 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2366 if (bif->bif_ifp == member_ifp) {
2367 return bif;
2368 }
2369 }
2370
2371 return NULL;
2372 }
2373
2374 static inline bool
get_and_clear_promisc(mbuf_t m)2375 get_and_clear_promisc(mbuf_t m)
2376 {
2377 bool is_promisc;
2378
2379 /*
2380 * Need to clear the promiscuous flag otherwise the packet will be
2381 * dropped by DLIL after processing filters
2382 */
2383 is_promisc = (mbuf_flags(m) & MBUF_PROMISC) != 0;
2384 if (is_promisc) {
2385 mbuf_setflags_mask(m, 0, MBUF_PROMISC);
2386 }
2387 return is_promisc;
2388 }
2389
2390 static errno_t
bridge_iff_input(void * cookie,ifnet_t ifp,protocol_family_t protocol,mbuf_t * data,char ** frame_ptr)2391 bridge_iff_input(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2392 mbuf_t *data, char **frame_ptr)
2393 {
2394 #pragma unused(protocol)
2395 errno_t error = 0;
2396 struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2397 struct bridge_softc *sc = bif->bif_sc;
2398 int included = 0;
2399 struct ether_header * eh_p;
2400 size_t frmlen = 0;
2401 bool is_promisc;
2402 mblist list;
2403 mbuf_t m = *data;
2404
2405 if ((m->m_flags & M_PROTO1)) {
2406 goto out;
2407 }
2408
2409 if (*frame_ptr >= (char *)mbuf_datastart(m) &&
2410 *frame_ptr <= mtod(m, char *)) {
2411 included = 1;
2412 frmlen = mtod(m, char *) - *frame_ptr;
2413 }
2414 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2415 "%s from %s m 0x%llx data 0x%llx frame 0x%llx %s "
2416 "frmlen %lu", sc->sc_ifp->if_xname,
2417 ifp->if_xname, (uint64_t)VM_KERNEL_ADDRPERM(m),
2418 (uint64_t)VM_KERNEL_ADDRPERM(mtod(m, void *)),
2419 (uint64_t)VM_KERNEL_ADDRPERM(*frame_ptr),
2420 included ? "inside" : "outside", frmlen);
2421 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MBUF)) {
2422 brlog_mbuf(m, "bridge_iff_input[", "");
2423 brlog_ether_header((struct ether_header *)
2424 (void *)*frame_ptr);
2425 brlog_mbuf_data(m, 0, 20);
2426 }
2427 if (included == 0) {
2428 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT, "frame_ptr outside mbuf");
2429 goto out;
2430 }
2431
2432 /* Move data pointer to start of frame to the link layer header */
2433 _mbuf_adjust_pkthdr_and_data(m, -frmlen);
2434
2435 /* make sure we can access the ethernet header */
2436 if (mbuf_pkthdr_len(m) < sizeof(struct ether_header)) {
2437 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2438 "short frame %lu < %lu",
2439 mbuf_pkthdr_len(m), sizeof(struct ether_header));
2440 goto out;
2441 }
2442 if (mbuf_len(m) < sizeof(struct ether_header)) {
2443 error = mbuf_pullup(data, sizeof(struct ether_header));
2444 if (error != 0) {
2445 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2446 "mbuf_pullup(%lu) failed %d",
2447 sizeof(struct ether_header),
2448 error);
2449 error = EJUSTRETURN;
2450 goto out;
2451 }
2452 if (m != *data) {
2453 m = *data;
2454 *frame_ptr = mtod(m, char *);
2455 }
2456 }
2457 mblist_init(&list);
2458 mblist_append(&list, m);
2459 is_promisc = get_and_clear_promisc(m);
2460 eh_p = __unsafe_forge_single(struct ether_header *, *frame_ptr);
2461 list = bridge_input_list(sc, ifp, eh_p, list, is_promisc);
2462 m = *data = list.head;
2463 if (m == NULL) {
2464 error = EJUSTRETURN;
2465 }
2466 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MBUF) &&
2467 BRIDGE_DBGF_ENABLED(BR_DBGF_INPUT)) {
2468 brlog_mbuf(m, "bridge_iff_input]", "");
2469 }
2470
2471 out:
2472 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2473
2474 return error;
2475 }
2476
2477 static errno_t
bridge_iff_output(void * cookie,ifnet_t ifp,protocol_family_t protocol,mbuf_t * data)2478 bridge_iff_output(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2479 mbuf_t *data)
2480 {
2481 #pragma unused(protocol)
2482 errno_t error = 0;
2483 struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2484 struct bridge_softc *sc = bif->bif_sc;
2485 mbuf_t m = *data;
2486
2487 if ((m->m_flags & M_PROTO1)) {
2488 goto out;
2489 }
2490 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
2491 "%s from %s m 0x%llx data 0x%llx",
2492 sc->sc_ifp->if_xname, ifp->if_xname,
2493 (uint64_t)VM_KERNEL_ADDRPERM(m),
2494 (uint64_t)VM_KERNEL_ADDRPERM(mtod(m, void *)));
2495
2496 error = bridge_member_output(sc, ifp, data);
2497 if (error != 0 && error != EJUSTRETURN) {
2498 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_OUTPUT,
2499 "bridge_member_output failed error %d",
2500 error);
2501 }
2502 out:
2503 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2504
2505 return error;
2506 }
2507
2508 static void
bridge_iff_event(void * cookie,ifnet_t ifp,protocol_family_t protocol,const struct kev_msg * event_msg)2509 bridge_iff_event(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2510 const struct kev_msg *event_msg)
2511 {
2512 #pragma unused(protocol)
2513 struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2514 struct bridge_softc *sc = bif->bif_sc;
2515
2516 if (event_msg->vendor_code == KEV_VENDOR_APPLE &&
2517 event_msg->kev_class == KEV_NETWORK_CLASS &&
2518 event_msg->kev_subclass == KEV_DL_SUBCLASS) {
2519 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
2520 "%s event_code %u - %s",
2521 ifp->if_xname, event_msg->event_code,
2522 dlil_kev_dl_code_str(event_msg->event_code));
2523
2524 switch (event_msg->event_code) {
2525 case KEV_DL_LINK_OFF:
2526 case KEV_DL_LINK_ON: {
2527 bridge_iflinkevent(ifp);
2528 #if BRIDGESTP
2529 bstp_linkstate(ifp, event_msg->event_code);
2530 #endif /* BRIDGESTP */
2531 break;
2532 }
2533 case KEV_DL_SIFFLAGS: {
2534 if ((ifp->if_flags & IFF_UP) == 0) {
2535 break;
2536 }
2537 if ((bif->bif_flags & BIFF_PROMISC) == 0) {
2538 errno_t error;
2539
2540 error = ifnet_set_promiscuous(ifp, 1);
2541 if (error != 0) {
2542 BRIDGE_LOG(LOG_NOTICE, 0,
2543 "ifnet_set_promiscuous (%s)"
2544 " failed %d", ifp->if_xname,
2545 error);
2546 } else {
2547 bif->bif_flags |= BIFF_PROMISC;
2548 }
2549 }
2550 if ((bif->bif_flags & BIFF_WIFI_INFRA) != 0 &&
2551 (bif->bif_flags & BIFF_ALL_MULTI) == 0) {
2552 errno_t error;
2553
2554 error = if_allmulti(ifp, 1);
2555 if (error != 0) {
2556 BRIDGE_LOG(LOG_NOTICE, 0,
2557 "if_allmulti (%s)"
2558 " failed %d", ifp->if_xname,
2559 error);
2560 } else {
2561 bif->bif_flags |= BIFF_ALL_MULTI;
2562 #ifdef XNU_PLATFORM_AppleTVOS
2563 ip6_forwarding = 1;
2564 #endif /* XNU_PLATFORM_AppleTVOS */
2565 }
2566 }
2567 break;
2568 }
2569 case KEV_DL_IFCAP_CHANGED: {
2570 BRIDGE_LOCK(sc);
2571 bridge_set_tso(sc);
2572 BRIDGE_UNLOCK(sc);
2573 break;
2574 }
2575 case KEV_DL_PROTO_DETACHED:
2576 case KEV_DL_PROTO_ATTACHED: {
2577 bridge_proto_attach_changed(ifp);
2578 break;
2579 }
2580 default:
2581 break;
2582 }
2583 }
2584 }
2585
2586 /*
2587 * bridge_iff_detached:
2588 *
2589 * Called when our interface filter has been detached from a
2590 * member interface.
2591 */
2592 static void
bridge_iff_detached(void * cookie,ifnet_t ifp)2593 bridge_iff_detached(void *cookie, ifnet_t ifp)
2594 {
2595 #pragma unused(cookie)
2596 struct bridge_iflist *bif;
2597 struct bridge_softc * __single sc = ifp->if_bridge;
2598
2599 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2600
2601 /* Check if the interface is a bridge member */
2602 if (sc != NULL) {
2603 BRIDGE_LOCK(sc);
2604 bif = bridge_lookup_member_if(sc, ifp);
2605 if (bif != NULL) {
2606 bridge_delete_member(sc, bif);
2607 }
2608 BRIDGE_UNLOCK(sc);
2609 return;
2610 }
2611 /* Check if the interface is a span port */
2612 lck_mtx_lock(&bridge_list_mtx);
2613 LIST_FOREACH(sc, &bridge_list, sc_list) {
2614 BRIDGE_LOCK(sc);
2615 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
2616 if (ifp == bif->bif_ifp) {
2617 bridge_delete_span(sc, bif);
2618 break;
2619 }
2620 BRIDGE_UNLOCK(sc);
2621 }
2622 lck_mtx_unlock(&bridge_list_mtx);
2623 }
2624
2625 static errno_t
bridge_proto_input(ifnet_t ifp,protocol_family_t protocol,mbuf_t packet,char * header)2626 bridge_proto_input(ifnet_t ifp, protocol_family_t protocol, mbuf_t packet,
2627 char *header)
2628 {
2629 #pragma unused(protocol, packet, header)
2630 BRIDGE_LOG(LOG_NOTICE, 0, "%s unexpected packet",
2631 ifp->if_xname);
2632 return 0;
2633 }
2634
2635 static int
bridge_attach_protocol(struct ifnet * ifp)2636 bridge_attach_protocol(struct ifnet *ifp)
2637 {
2638 int error;
2639 struct ifnet_attach_proto_param reg;
2640
2641 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2642 bzero(®, sizeof(reg));
2643 reg.input = bridge_proto_input;
2644
2645 error = ifnet_attach_protocol(ifp, PF_BRIDGE, ®);
2646 if (error) {
2647 BRIDGE_LOG(LOG_NOTICE, 0,
2648 "ifnet_attach_protocol(%s) failed, %d",
2649 ifp->if_xname, error);
2650 }
2651
2652 return error;
2653 }
2654
2655 static int
bridge_detach_protocol(struct ifnet * ifp)2656 bridge_detach_protocol(struct ifnet *ifp)
2657 {
2658 int error;
2659
2660 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2661 error = ifnet_detach_protocol(ifp, PF_BRIDGE);
2662 if (error) {
2663 BRIDGE_LOG(LOG_NOTICE, 0,
2664 "ifnet_detach_protocol(%s) failed, %d",
2665 ifp->if_xname, error);
2666 }
2667
2668 return error;
2669 }
2670
2671 /*
2672 * bridge_delete_member:
2673 *
2674 * Delete the specified member interface.
2675 */
2676 static void
bridge_delete_member(struct bridge_softc * sc,struct bridge_iflist * bif)2677 bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif)
2678 {
2679 #if SKYWALK
2680 boolean_t add_netagent = FALSE;
2681 #endif /* SKYWALK */
2682 uint32_t bif_flags;
2683 struct ifnet *ifs = bif->bif_ifp, *bifp = sc->sc_ifp;
2684 int lladdr_changed = 0, error;
2685 uint8_t eaddr[ETHER_ADDR_LEN];
2686 u_int32_t event_code = 0;
2687
2688 BRIDGE_LOCK_ASSERT_HELD(sc);
2689 VERIFY(ifs != NULL);
2690
2691 /*
2692 * Remove the member from the list first so it cannot be found anymore
2693 * when we release the bridge lock below
2694 */
2695 if ((bif->bif_flags & BIFF_IN_MEMBER_LIST) != 0) {
2696 bif->bif_flags &= ~BIFF_IN_MEMBER_LIST;
2697 BRIDGE_XLOCK(sc);
2698 TAILQ_REMOVE(&sc->sc_iflist, bif, bif_next);
2699 BRIDGE_XDROP(sc);
2700 }
2701 if (sc->sc_mac_nat_bif != NULL) {
2702 if (bif == sc->sc_mac_nat_bif) {
2703 bridge_mac_nat_disable(sc);
2704 } else {
2705 bridge_mac_nat_flush_entries(sc, bif);
2706 }
2707 }
2708 #if BRIDGESTP
2709 if ((bif->bif_ifflags & IFBIF_STP) != 0) {
2710 bstp_disable(&bif->bif_stp);
2711 }
2712 #endif /* BRIDGESTP */
2713
2714 /*
2715 * If removing the interface that gave the bridge its mac address, set
2716 * the mac address of the bridge to the address of the next member, or
2717 * to its default address if no members are left.
2718 */
2719 if (bridge_inherit_mac && sc->sc_ifaddr == ifs) {
2720 ifnet_release(sc->sc_ifaddr);
2721 if (TAILQ_EMPTY(&sc->sc_iflist)) {
2722 bcopy(sc->sc_defaddr, eaddr, ETHER_ADDR_LEN);
2723 sc->sc_ifaddr = NULL;
2724 } else {
2725 struct ifnet *fif =
2726 TAILQ_FIRST(&sc->sc_iflist)->bif_ifp;
2727 bcopy(IF_LLADDR(fif), eaddr, ETHER_ADDR_LEN);
2728 sc->sc_ifaddr = fif;
2729 ifnet_reference(fif); /* for sc_ifaddr */
2730 }
2731 lladdr_changed = 1;
2732 }
2733
2734 #if HAS_IF_CAP
2735 bridge_mutecaps(sc); /* recalculate now this interface is removed */
2736 #endif /* HAS_IF_CAP */
2737
2738 error = bridge_set_tso(sc);
2739 if (error != 0) {
2740 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_set_tso failed %d", error);
2741 }
2742
2743 bridge_rtdelete(sc, ifs, IFBF_FLUSHALL);
2744
2745 KASSERT(bif->bif_addrcnt == 0,
2746 ("%s: %d bridge routes referenced", __func__, bif->bif_addrcnt));
2747
2748 /*
2749 * Update link status of the bridge based on its remaining members
2750 */
2751 event_code = bridge_updatelinkstatus(sc);
2752 bif_flags = bif->bif_flags;
2753 BRIDGE_UNLOCK(sc);
2754
2755 /* only perform these steps if the interface is still attached */
2756 if (ifnet_is_attached(ifs, 1)) {
2757 #if SKYWALK
2758 add_netagent = (bif_flags & BIFF_NETAGENT_REMOVED) != 0;
2759
2760 if ((bif_flags & BIFF_FLOWSWITCH_ATTACHED) != 0) {
2761 ifnet_detach_flowswitch_nexus(ifs);
2762 }
2763 #endif /* SKYWALK */
2764 /* disable promiscuous mode */
2765 if ((bif_flags & BIFF_PROMISC) != 0) {
2766 (void) ifnet_set_promiscuous(ifs, 0);
2767 }
2768 /* disable all multi */
2769 if ((bif_flags & BIFF_ALL_MULTI) != 0) {
2770 (void)if_allmulti(ifs, 0);
2771 }
2772 #if HAS_IF_CAP
2773 /* re-enable any interface capabilities */
2774 bridge_set_ifcap(sc, bif, bif->bif_savedcaps);
2775 #endif
2776 /* detach bridge "protocol" */
2777 if ((bif_flags & BIFF_PROTO_ATTACHED) != 0) {
2778 (void)bridge_detach_protocol(ifs);
2779 }
2780 /* detach interface filter */
2781 if ((bif_flags & BIFF_FILTER_ATTACHED) != 0) {
2782 iflt_detach(bif->bif_iff_ref);
2783 }
2784 /* re-enable LRO */
2785 if ((bif_flags & BIFF_LRO_DISABLED) != 0) {
2786 (void)bridge_set_lro(ifs, TRUE);
2787 }
2788 ifnet_decr_iorefcnt(ifs);
2789 }
2790
2791 if (lladdr_changed &&
2792 (error = ifnet_set_lladdr(bifp, eaddr, ETHER_ADDR_LEN)) != 0) {
2793 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr failed %d", error);
2794 }
2795
2796 if (event_code != 0) {
2797 bridge_link_event(bifp, event_code);
2798 }
2799
2800 #if BRIDGESTP
2801 bstp_destroy(&bif->bif_stp); /* prepare to free */
2802 #endif /* BRIDGESTP */
2803
2804 kfree_type(struct bridge_iflist, bif);
2805 ifs->if_bridge = NULL;
2806 #if SKYWALK
2807 if (add_netagent && ifnet_is_attached(ifs, 1)) {
2808 (void)ifnet_add_netagent(ifs);
2809 ifnet_decr_iorefcnt(ifs);
2810 }
2811 #endif /* SKYWALK */
2812
2813 ifnet_release(ifs);
2814
2815 BRIDGE_LOCK(sc);
2816 }
2817
2818 /*
2819 * bridge_delete_span:
2820 *
2821 * Delete the specified span interface.
2822 */
2823 static void
bridge_delete_span(struct bridge_softc * sc,struct bridge_iflist * bif)2824 bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif)
2825 {
2826 BRIDGE_LOCK_ASSERT_HELD(sc);
2827
2828 KASSERT(bif->bif_ifp->if_bridge == NULL,
2829 ("%s: not a span interface", __func__));
2830
2831 ifnet_release(bif->bif_ifp);
2832
2833 TAILQ_REMOVE(&sc->sc_spanlist, bif, bif_next);
2834 kfree_type(struct bridge_iflist, bif);
2835 }
2836
2837 static int
bridge_ioctl_add(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)2838 bridge_ioctl_add(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
2839 {
2840 struct ifbreq * __single req = arg;
2841 struct bridge_iflist *bif = NULL;
2842 struct ifnet *ifs, *bifp = sc->sc_ifp;
2843 int error = 0, lladdr_changed = 0;
2844 uint8_t eaddr[ETHER_ADDR_LEN];
2845 struct iff_filter iff;
2846 u_int32_t event_code = 0;
2847 boolean_t input_broadcast;
2848 int media_active;
2849 boolean_t wifi_infra = FALSE;
2850
2851 ifs = ifunit(sanitize_ifname(req->ifbr_ifsname));
2852 if (ifs == NULL) {
2853 return ENOENT;
2854 }
2855 if (ifs->if_ioctl == NULL) { /* must be supported */
2856 return EINVAL;
2857 }
2858
2859 if (IFNET_IS_INTCOPROC(ifs) || IFNET_IS_MANAGEMENT(ifs)) {
2860 return EINVAL;
2861 }
2862
2863 /* If it's in the span list, it can't be a member. */
2864 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
2865 if (ifs == bif->bif_ifp) {
2866 return EBUSY;
2867 }
2868 }
2869
2870 if (ifs->if_bridge == sc) {
2871 return EEXIST;
2872 }
2873
2874 if (ifs->if_bridge != NULL) {
2875 return EBUSY;
2876 }
2877
2878 switch (ifs->if_type) {
2879 case IFT_ETHER:
2880 if (strcmp(ifs->if_name, "en") == 0 &&
2881 ifs->if_subfamily == IFNET_SUBFAMILY_WIFI &&
2882 (ifs->if_eflags & IFEF_IPV4_ROUTER) == 0) {
2883 /* XXX is there a better way to identify Wi-Fi STA? */
2884 wifi_infra = TRUE;
2885 }
2886 break;
2887 case IFT_L2VLAN:
2888 case IFT_IEEE8023ADLAG:
2889 break;
2890 default:
2891 return EINVAL;
2892 }
2893
2894 /* fail to add the interface if the MTU doesn't match */
2895 if (!TAILQ_EMPTY(&sc->sc_iflist) && sc->sc_ifp->if_mtu != ifs->if_mtu) {
2896 BRIDGE_LOG(LOG_NOTICE, 0, "%s invalid MTU for %s",
2897 sc->sc_ifp->if_xname,
2898 ifs->if_xname);
2899 return EINVAL;
2900 }
2901
2902 if (wifi_infra && sc->sc_mac_nat_bif != NULL) {
2903 /* there's already an interface that's doing MAC NAT */
2904 return EBUSY;
2905 }
2906
2907 /* prevent the interface from detaching while we add the member */
2908 if (!ifnet_is_attached(ifs, 1)) {
2909 return ENXIO;
2910 }
2911
2912 /* allocate a new member */
2913 bif = kalloc_type(struct bridge_iflist, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2914 bif->bif_ifp = ifs;
2915 ifnet_reference(ifs);
2916 bif->bif_ifflags |= IFBIF_LEARNING | IFBIF_DISCOVER;
2917 #if HAS_IF_CAP
2918 bif->bif_savedcaps = ifs->if_capenable;
2919 #endif /* HAS_IF_CAP */
2920 bif->bif_sc = sc;
2921 if (wifi_infra) {
2922 (void)bridge_mac_nat_enable(sc, bif);
2923 }
2924
2925 /* Allow the first Ethernet member to define the MTU */
2926 if (TAILQ_EMPTY(&sc->sc_iflist)) {
2927 sc->sc_ifp->if_mtu = ifs->if_mtu;
2928 }
2929
2930 /*
2931 * Assign the interface's MAC address to the bridge if it's the first
2932 * member and the MAC address of the bridge has not been changed from
2933 * the default (randomly) generated one.
2934 */
2935 if (bridge_inherit_mac && TAILQ_EMPTY(&sc->sc_iflist) &&
2936 _ether_cmp(IF_LLADDR(sc->sc_ifp), sc->sc_defaddr) == 0) {
2937 bcopy(IF_LLADDR(ifs), eaddr, ETHER_ADDR_LEN);
2938 sc->sc_ifaddr = ifs;
2939 ifnet_reference(ifs); /* for sc_ifaddr */
2940 lladdr_changed = 1;
2941 }
2942
2943 ifs->if_bridge = sc;
2944 #if BRIDGESTP
2945 bstp_create(&sc->sc_stp, &bif->bif_stp, bif->bif_ifp);
2946 #endif /* BRIDGESTP */
2947
2948 #if HAS_IF_CAP
2949 /* Set interface capabilities to the intersection set of all members */
2950 bridge_mutecaps(sc);
2951 #endif /* HAS_IF_CAP */
2952
2953 /*
2954 * Respect lock ordering with DLIL lock for the following operations
2955 */
2956 BRIDGE_UNLOCK(sc);
2957
2958 /* enable promiscuous mode */
2959 error = ifnet_set_promiscuous(ifs, 1);
2960 switch (error) {
2961 case 0:
2962 bif->bif_flags |= BIFF_PROMISC;
2963 break;
2964 case ENETDOWN:
2965 case EPWROFF:
2966 BRIDGE_LOG(LOG_NOTICE, 0,
2967 "ifnet_set_promiscuous(%s) failed %d, ignoring",
2968 ifs->if_xname, error);
2969 /* Ignore error when device is not up */
2970 error = 0;
2971 break;
2972 default:
2973 BRIDGE_LOG(LOG_NOTICE, 0,
2974 "ifnet_set_promiscuous(%s) failed %d",
2975 ifs->if_xname, error);
2976 BRIDGE_LOCK(sc);
2977 goto out;
2978 }
2979 if (wifi_infra) {
2980 int this_error;
2981
2982 /* Wi-Fi doesn't really support promiscuous, set allmulti */
2983 bif->bif_flags |= BIFF_WIFI_INFRA;
2984 this_error = if_allmulti(ifs, 1);
2985 if (this_error == 0) {
2986 bif->bif_flags |= BIFF_ALL_MULTI;
2987 #ifdef XNU_PLATFORM_AppleTVOS
2988 ip6_forwarding = 1;
2989 #endif /* XNU_PLATFORM_AppleTVOS */
2990 } else {
2991 BRIDGE_LOG(LOG_NOTICE, 0,
2992 "if_allmulti(%s) failed %d, ignoring",
2993 ifs->if_xname, this_error);
2994 }
2995 }
2996 #if SKYWALK
2997 /* ensure that the flowswitch is present for native interface */
2998 if (SKYWALK_NATIVE(ifs)) {
2999 if (ifnet_attach_flowswitch_nexus(ifs)) {
3000 bif->bif_flags |= BIFF_FLOWSWITCH_ATTACHED;
3001 }
3002 }
3003 /* remove the netagent on the flowswitch (rdar://75050182) */
3004 if (if_is_fsw_netagent_enabled()) {
3005 (void)ifnet_remove_netagent(ifs);
3006 bif->bif_flags |= BIFF_NETAGENT_REMOVED;
3007 }
3008 #endif /* SKYWALK */
3009
3010 /*
3011 * install an interface filter
3012 */
3013 memset(&iff, 0, sizeof(struct iff_filter));
3014 iff.iff_cookie = bif;
3015 iff.iff_name = "com.apple.kernel.bsd.net.if_bridge";
3016 iff.iff_input = bridge_iff_input;
3017 iff.iff_output = bridge_iff_output;
3018 iff.iff_event = bridge_iff_event;
3019 iff.iff_detached = bridge_iff_detached;
3020 error = dlil_attach_filter(ifs, &iff, &bif->bif_iff_ref,
3021 DLIL_IFF_TSO | DLIL_IFF_INTERNAL | DLIL_IFF_BRIDGE);
3022 if (error != 0) {
3023 BRIDGE_LOG(LOG_NOTICE, 0, "iflt_attach failed %d", error);
3024 BRIDGE_LOCK(sc);
3025 goto out;
3026 }
3027 bif->bif_flags |= BIFF_FILTER_ATTACHED;
3028
3029 /*
3030 * install a dummy "bridge" protocol
3031 */
3032 if ((error = bridge_attach_protocol(ifs)) != 0) {
3033 if (error != 0) {
3034 BRIDGE_LOG(LOG_NOTICE, 0,
3035 "bridge_attach_protocol failed %d", error);
3036 BRIDGE_LOCK(sc);
3037 goto out;
3038 }
3039 }
3040 bif->bif_flags |= BIFF_PROTO_ATTACHED;
3041
3042 if (lladdr_changed &&
3043 (error = ifnet_set_lladdr(bifp, eaddr, ETHER_ADDR_LEN)) != 0) {
3044 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr failed %d", error);
3045 }
3046
3047 media_active = interface_media_active(ifs);
3048
3049 /* disable LRO if needed */
3050 if (bridge_set_lro(ifs, FALSE)) {
3051 bif->bif_flags |= BIFF_LRO_DISABLED;
3052 }
3053
3054 /*
3055 * No failures past this point. Add the member to the list.
3056 */
3057 BRIDGE_LOCK(sc);
3058 bif->bif_flags |= BIFF_IN_MEMBER_LIST;
3059 BRIDGE_XLOCK(sc);
3060 TAILQ_INSERT_TAIL(&sc->sc_iflist, bif, bif_next);
3061 BRIDGE_XDROP(sc);
3062
3063 /* cache the member link status */
3064 if (media_active != 0) {
3065 bif->bif_flags |= BIFF_MEDIA_ACTIVE;
3066 } else {
3067 bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
3068 }
3069
3070 /* the new member may change the link status of the bridge interface */
3071 event_code = bridge_updatelinkstatus(sc);
3072
3073 /* check whether we need input broadcast or not */
3074 input_broadcast = interface_needs_input_broadcast(ifs);
3075 bif_set_input_broadcast(bif, input_broadcast);
3076 BRIDGE_UNLOCK(sc);
3077
3078 if (event_code != 0) {
3079 bridge_link_event(bifp, event_code);
3080 }
3081 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
3082 "%s input broadcast %s", ifs->if_xname,
3083 input_broadcast ? "ENABLED" : "DISABLED");
3084
3085 BRIDGE_LOCK(sc);
3086 bridge_set_tso(sc);
3087
3088 out:
3089 /* allow the interface to detach */
3090 ifnet_decr_iorefcnt(ifs);
3091
3092 if (error != 0) {
3093 if (bif != NULL) {
3094 bridge_delete_member(sc, bif);
3095 }
3096 } else if (IFNET_IS_VMNET(ifs)) {
3097 INC_ATOMIC_INT64_LIM(net_api_stats.nas_vmnet_total);
3098 }
3099
3100 return error;
3101 }
3102
3103 static int
bridge_ioctl_del(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3104 bridge_ioctl_del(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3105 {
3106 struct ifbreq * __single req = arg;
3107 struct bridge_iflist *bif;
3108
3109 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3110 if (bif == NULL) {
3111 return ENOENT;
3112 }
3113
3114 bridge_delete_member(sc, bif);
3115
3116 return 0;
3117 }
3118
3119 static int
bridge_ioctl_purge(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3120 bridge_ioctl_purge(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3121 {
3122 #pragma unused(sc, arg, arg_len)
3123 return 0;
3124 }
3125
3126 static int
bridge_ioctl_gifflags(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3127 bridge_ioctl_gifflags(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3128 {
3129 struct ifbreq * __single req = arg;
3130 struct bridge_iflist *bif;
3131
3132 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3133 if (bif == NULL) {
3134 return ENOENT;
3135 }
3136
3137 struct bstp_port *bp;
3138
3139 bp = &bif->bif_stp;
3140 req->ifbr_state = bp->bp_state;
3141 req->ifbr_priority = bp->bp_priority;
3142 req->ifbr_path_cost = bp->bp_path_cost;
3143 req->ifbr_proto = bp->bp_protover;
3144 req->ifbr_role = bp->bp_role;
3145 req->ifbr_stpflags = bp->bp_flags;
3146 req->ifbr_ifsflags = bif->bif_ifflags;
3147
3148 /* Copy STP state options as flags */
3149 if (bp->bp_operedge) {
3150 req->ifbr_ifsflags |= IFBIF_BSTP_EDGE;
3151 }
3152 if (bp->bp_flags & BSTP_PORT_AUTOEDGE) {
3153 req->ifbr_ifsflags |= IFBIF_BSTP_AUTOEDGE;
3154 }
3155 if (bp->bp_ptp_link) {
3156 req->ifbr_ifsflags |= IFBIF_BSTP_PTP;
3157 }
3158 if (bp->bp_flags & BSTP_PORT_AUTOPTP) {
3159 req->ifbr_ifsflags |= IFBIF_BSTP_AUTOPTP;
3160 }
3161 if (bp->bp_flags & BSTP_PORT_ADMEDGE) {
3162 req->ifbr_ifsflags |= IFBIF_BSTP_ADMEDGE;
3163 }
3164 if (bp->bp_flags & BSTP_PORT_ADMCOST) {
3165 req->ifbr_ifsflags |= IFBIF_BSTP_ADMCOST;
3166 }
3167
3168 req->ifbr_portno = bif->bif_ifp->if_index & 0xfff;
3169 req->ifbr_addrcnt = bif->bif_addrcnt;
3170 req->ifbr_addrmax = bif->bif_addrmax;
3171 req->ifbr_addrexceeded = bif->bif_addrexceeded;
3172
3173 return 0;
3174 }
3175
3176 static int
bridge_ioctl_sifflags(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3177 bridge_ioctl_sifflags(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3178 {
3179 struct ifbreq * __single req = arg;
3180 struct bridge_iflist *bif;
3181 #if BRIDGESTP
3182 struct bstp_port *bp;
3183 #endif /* BRIDGESTP */
3184 errno_t error;
3185 uint32_t ifsflags;
3186
3187 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3188 if (bif == NULL) {
3189 return ENOENT;
3190 }
3191
3192 ifsflags = req->ifbr_ifsflags;
3193 if (ifsflags & IFBIF_SPAN) {
3194 /* SPAN is readonly */
3195 return EINVAL;
3196 }
3197 #define CHECKSUM_VIRTIO (IFBIF_CHECKSUM_OFFLOAD | IFBIF_USES_VIRTIO)
3198 if ((ifsflags & CHECKSUM_VIRTIO) == CHECKSUM_VIRTIO) {
3199 /* can't specify checksum and virtio */
3200 return EINVAL;
3201 }
3202 if ((ifsflags & IFBIF_MAC_NAT) != 0 &&
3203 ((ifsflags & CHECKSUM_VIRTIO) != 0 ||
3204 (bif->bif_flags & BIFF_HOST_FILTER) != 0)) {
3205 /* MAC-NAT can't be used with checksum, host filter, or virtio */
3206 return EINVAL;
3207 }
3208 if ((ifsflags & IFBIF_MAC_NAT) != 0) {
3209 error = bridge_mac_nat_enable(sc, bif);
3210 if (error != 0) {
3211 return error;
3212 }
3213 } else if (sc->sc_mac_nat_bif == bif) {
3214 bridge_mac_nat_disable(sc);
3215 }
3216
3217 #if BRIDGESTP
3218 if (ifsflags & IFBIF_STP) {
3219 if ((bif->bif_ifflags & IFBIF_STP) == 0) {
3220 error = bstp_enable(&bif->bif_stp);
3221 if (error) {
3222 return error;
3223 }
3224 }
3225 } else {
3226 if ((bif->bif_ifflags & IFBIF_STP) != 0) {
3227 bstp_disable(&bif->bif_stp);
3228 }
3229 }
3230
3231 /* Pass on STP flags */
3232 bp = &bif->bif_stp;
3233 bstp_set_edge(bp, ifsflags & IFBIF_BSTP_EDGE ? 1 : 0);
3234 bstp_set_autoedge(bp, ifsflags & IFBIF_BSTP_AUTOEDGE ? 1 : 0);
3235 bstp_set_ptp(bp, ifsflags & IFBIF_BSTP_PTP ? 1 : 0);
3236 bstp_set_autoptp(bp, ifsflags & IFBIF_BSTP_AUTOPTP ? 1 : 0);
3237 #else /* !BRIDGESTP */
3238 if (ifsflags & IFBIF_STP) {
3239 return EOPNOTSUPP;
3240 }
3241 #endif /* !BRIDGESTP */
3242
3243 /* Save the bits relating to the bridge */
3244 bif->bif_ifflags = ifsflags & IFBIFMASK;
3245
3246 return 0;
3247 }
3248
3249 static int
bridge_ioctl_scache(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3250 bridge_ioctl_scache(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3251 {
3252 struct ifbrparam * __single param = arg;
3253
3254 sc->sc_brtmax = param->ifbrp_csize;
3255 bridge_rttrim(sc);
3256 return 0;
3257 }
3258
3259 static int
bridge_ioctl_gcache(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3260 bridge_ioctl_gcache(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3261 {
3262 struct ifbrparam * __single param = arg;
3263
3264 param->ifbrp_csize = sc->sc_brtmax;
3265
3266 return 0;
3267 }
3268
3269 #define BRIDGE_IOCTL_GIFS do { \
3270 struct bridge_iflist *bif; \
3271 struct ifbreq breq; \
3272 char *buf, *outbuf; \
3273 unsigned int count, buflen, len; \
3274 \
3275 count = 0; \
3276 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) \
3277 count++; \
3278 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) \
3279 count++; \
3280 \
3281 buflen = sizeof (breq) * count; \
3282 if (bifc->ifbic_len == 0) { \
3283 bifc->ifbic_len = buflen; \
3284 return (0); \
3285 } \
3286 BRIDGE_UNLOCK(sc); \
3287 outbuf = kalloc_data(buflen, Z_WAITOK | Z_ZERO); \
3288 BRIDGE_LOCK(sc); \
3289 \
3290 count = 0; \
3291 buf = outbuf; \
3292 len = min(bifc->ifbic_len, buflen); \
3293 bzero(&breq, sizeof (breq)); \
3294 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { \
3295 if (len < sizeof (breq)) \
3296 break; \
3297 \
3298 snprintf(breq.ifbr_ifsname, sizeof (breq.ifbr_ifsname), \
3299 "%s", bif->bif_ifp->if_xname); \
3300 /* Fill in the ifbreq structure */ \
3301 error = bridge_ioctl_gifflags(sc, &breq, sizeof(breq)); \
3302 if (error) \
3303 break; \
3304 memcpy(buf, &breq, sizeof (breq)); \
3305 count++; \
3306 buf += sizeof (breq); \
3307 len -= sizeof (breq); \
3308 } \
3309 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) { \
3310 if (len < sizeof (breq)) \
3311 break; \
3312 \
3313 snprintf(breq.ifbr_ifsname, \
3314 sizeof (breq.ifbr_ifsname), \
3315 "%s", bif->bif_ifp->if_xname); \
3316 breq.ifbr_ifsflags = bif->bif_ifflags; \
3317 breq.ifbr_portno \
3318 = bif->bif_ifp->if_index & 0xfff; \
3319 memcpy(buf, &breq, sizeof (breq)); \
3320 count++; \
3321 buf += sizeof (breq); \
3322 len -= sizeof (breq); \
3323 } \
3324 \
3325 BRIDGE_UNLOCK(sc); \
3326 bifc->ifbic_len = sizeof (breq) * count; \
3327 if (bifc->ifbic_len > 0) { \
3328 error = copyout(outbuf, bifc->ifbic_req, bifc->ifbic_len);\
3329 } \
3330 BRIDGE_LOCK(sc); \
3331 kfree_data(outbuf, buflen); \
3332 } while (0)
3333
3334 static int
bridge_ioctl_gifs64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3335 bridge_ioctl_gifs64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3336 {
3337 struct ifbifconf64 * __single bifc = arg;
3338 int error = 0;
3339
3340 BRIDGE_IOCTL_GIFS;
3341
3342 return error;
3343 }
3344
3345 static int
bridge_ioctl_gifs32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3346 bridge_ioctl_gifs32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3347 {
3348 struct ifbifconf32 * __single bifc = arg;
3349 int error = 0;
3350
3351 BRIDGE_IOCTL_GIFS;
3352
3353 return error;
3354 }
3355
3356 #define BRIDGE_IOCTL_RTS do { \
3357 struct bridge_rtnode *brt; \
3358 char *buf; \
3359 char *outbuf = NULL; \
3360 unsigned int count, buflen, len; \
3361 unsigned long now; \
3362 \
3363 if (bac->ifbac_len == 0) \
3364 return (0); \
3365 \
3366 bzero(&bareq, sizeof (bareq)); \
3367 count = 0; \
3368 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) \
3369 count++; \
3370 buflen = sizeof (bareq) * count; \
3371 \
3372 BRIDGE_UNLOCK(sc); \
3373 outbuf = kalloc_data(buflen, Z_WAITOK | Z_ZERO); \
3374 BRIDGE_LOCK(sc); \
3375 \
3376 count = 0; \
3377 buf = outbuf; \
3378 len = min(bac->ifbac_len, buflen); \
3379 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) { \
3380 if (len < sizeof (bareq)) \
3381 goto out; \
3382 snprintf(bareq.ifba_ifsname, sizeof (bareq.ifba_ifsname), \
3383 "%s", brt->brt_ifp->if_xname); \
3384 memcpy(bareq.ifba_dst, brt->brt_addr, sizeof (brt->brt_addr)); \
3385 bareq.ifba_vlan = brt->brt_vlan; \
3386 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) { \
3387 now = (unsigned long) net_uptime(); \
3388 if (now < brt->brt_expire) \
3389 bareq.ifba_expire = \
3390 brt->brt_expire - now; \
3391 } else \
3392 bareq.ifba_expire = 0; \
3393 bareq.ifba_flags = brt->brt_flags; \
3394 \
3395 memcpy(buf, &bareq, sizeof (bareq)); \
3396 count++; \
3397 buf += sizeof (bareq); \
3398 len -= sizeof (bareq); \
3399 } \
3400 out: \
3401 bac->ifbac_len = sizeof (bareq) * count; \
3402 if (outbuf != NULL) { \
3403 BRIDGE_UNLOCK(sc); \
3404 if (bac->ifbac_len > 0) { \
3405 error = copyout(outbuf, bac->ifbac_req, bac->ifbac_len);\
3406 } \
3407 kfree_data(outbuf, buflen); \
3408 BRIDGE_LOCK(sc); \
3409 } \
3410 return (error); \
3411 } while (0)
3412
3413 static int
bridge_ioctl_rts64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3414 bridge_ioctl_rts64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3415 {
3416 struct ifbaconf64 * __single bac = arg;
3417 struct ifbareq64 bareq;
3418 int error = 0;
3419
3420 BRIDGE_IOCTL_RTS;
3421 return error;
3422 }
3423
3424 static int
bridge_ioctl_rts32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3425 bridge_ioctl_rts32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3426 {
3427 struct ifbaconf32 * __single bac = arg;
3428 struct ifbareq32 bareq;
3429 int error = 0;
3430
3431 BRIDGE_IOCTL_RTS;
3432 return error;
3433 }
3434
3435 static int
bridge_ioctl_saddr32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3436 bridge_ioctl_saddr32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3437 {
3438 struct ifbareq32 * __single req = arg;
3439 struct bridge_iflist *bif;
3440 int error;
3441
3442 bif = bridge_lookup_member(sc, req->ifba_ifsname);
3443 if (bif == NULL) {
3444 return ENOENT;
3445 }
3446
3447 error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
3448 req->ifba_flags);
3449
3450 return error;
3451 }
3452
3453 static int
bridge_ioctl_saddr64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3454 bridge_ioctl_saddr64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3455 {
3456 struct ifbareq64 * __single req = arg;
3457 struct bridge_iflist *bif;
3458 int error;
3459
3460 bif = bridge_lookup_member(sc, req->ifba_ifsname);
3461 if (bif == NULL) {
3462 return ENOENT;
3463 }
3464
3465 error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
3466 req->ifba_flags);
3467
3468 return error;
3469 }
3470
3471 static int
bridge_ioctl_sto(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3472 bridge_ioctl_sto(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3473 {
3474 struct ifbrparam * __single param = arg;
3475
3476 sc->sc_brttimeout = param->ifbrp_ctime;
3477 return 0;
3478 }
3479
3480 static int
bridge_ioctl_gto(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3481 bridge_ioctl_gto(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3482 {
3483 struct ifbrparam * __single param = arg;
3484
3485 param->ifbrp_ctime = sc->sc_brttimeout;
3486 return 0;
3487 }
3488
3489 static int
bridge_ioctl_daddr32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3490 bridge_ioctl_daddr32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3491 {
3492 struct ifbareq32 * __single req = arg;
3493
3494 return bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan);
3495 }
3496
3497 static int
bridge_ioctl_daddr64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3498 bridge_ioctl_daddr64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3499 {
3500 struct ifbareq64 * __single req = arg;
3501
3502 return bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan);
3503 }
3504
3505 static int
bridge_ioctl_flush(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3506 bridge_ioctl_flush(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3507 {
3508 struct ifbreq * __single req = arg;
3509
3510 bridge_rtflush(sc, req->ifbr_ifsflags);
3511 return 0;
3512 }
3513
3514 static int
bridge_ioctl_gpri(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3515 bridge_ioctl_gpri(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3516 {
3517 struct ifbrparam * __single param = arg;
3518 struct bstp_state *bs = &sc->sc_stp;
3519
3520 param->ifbrp_prio = bs->bs_bridge_priority;
3521 return 0;
3522 }
3523
3524 static int
bridge_ioctl_spri(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3525 bridge_ioctl_spri(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3526 {
3527 #if BRIDGESTP
3528 struct ifbrparam *param = arg;
3529
3530 return bstp_set_priority(&sc->sc_stp, param->ifbrp_prio);
3531 #else /* !BRIDGESTP */
3532 #pragma unused(sc, arg)
3533 return EOPNOTSUPP;
3534 #endif /* !BRIDGESTP */
3535 }
3536
3537 static int
bridge_ioctl_ght(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3538 bridge_ioctl_ght(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3539 {
3540 struct ifbrparam * __single param = arg;
3541 struct bstp_state *bs = &sc->sc_stp;
3542
3543 param->ifbrp_hellotime = bs->bs_bridge_htime >> 8;
3544 return 0;
3545 }
3546
3547 static int
bridge_ioctl_sht(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3548 bridge_ioctl_sht(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3549 {
3550 #if BRIDGESTP
3551 struct ifbrparam *param = arg;
3552
3553 return bstp_set_htime(&sc->sc_stp, param->ifbrp_hellotime);
3554 #else /* !BRIDGESTP */
3555 #pragma unused(sc, arg)
3556 return EOPNOTSUPP;
3557 #endif /* !BRIDGESTP */
3558 }
3559
3560 static int
bridge_ioctl_gfd(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3561 bridge_ioctl_gfd(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3562 {
3563 struct ifbrparam * __single param;
3564 struct bstp_state *bs;
3565
3566 param = arg;
3567 bs = &sc->sc_stp;
3568 param->ifbrp_fwddelay = bs->bs_bridge_fdelay >> 8;
3569 return 0;
3570 }
3571
3572 static int
bridge_ioctl_sfd(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3573 bridge_ioctl_sfd(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3574 {
3575 #if BRIDGESTP
3576 struct ifbrparam *param = arg;
3577
3578 return bstp_set_fdelay(&sc->sc_stp, param->ifbrp_fwddelay);
3579 #else /* !BRIDGESTP */
3580 #pragma unused(sc, arg)
3581 return EOPNOTSUPP;
3582 #endif /* !BRIDGESTP */
3583 }
3584
3585 static int
bridge_ioctl_gma(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3586 bridge_ioctl_gma(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3587 {
3588 struct ifbrparam * __single param;
3589 struct bstp_state *bs;
3590
3591 param = arg;
3592 bs = &sc->sc_stp;
3593 param->ifbrp_maxage = bs->bs_bridge_max_age >> 8;
3594 return 0;
3595 }
3596
3597 static int
bridge_ioctl_sma(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3598 bridge_ioctl_sma(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3599 {
3600 #if BRIDGESTP
3601 struct ifbrparam *param = arg;
3602
3603 return bstp_set_maxage(&sc->sc_stp, param->ifbrp_maxage);
3604 #else /* !BRIDGESTP */
3605 #pragma unused(sc, arg)
3606 return EOPNOTSUPP;
3607 #endif /* !BRIDGESTP */
3608 }
3609
3610 static int
bridge_ioctl_sifprio(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3611 bridge_ioctl_sifprio(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3612 {
3613 #if BRIDGESTP
3614 struct ifbreq *req = arg;
3615 struct bridge_iflist *bif;
3616
3617 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3618 if (bif == NULL) {
3619 return ENOENT;
3620 }
3621
3622 return bstp_set_port_priority(&bif->bif_stp, req->ifbr_priority);
3623 #else /* !BRIDGESTP */
3624 #pragma unused(sc, arg)
3625 return EOPNOTSUPP;
3626 #endif /* !BRIDGESTP */
3627 }
3628
3629 static int
bridge_ioctl_sifcost(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3630 bridge_ioctl_sifcost(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3631 {
3632 #if BRIDGESTP
3633 struct ifbreq *req = arg;
3634 struct bridge_iflist *bif;
3635
3636 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3637 if (bif == NULL) {
3638 return ENOENT;
3639 }
3640
3641 return bstp_set_path_cost(&bif->bif_stp, req->ifbr_path_cost);
3642 #else /* !BRIDGESTP */
3643 #pragma unused(sc, arg)
3644 return EOPNOTSUPP;
3645 #endif /* !BRIDGESTP */
3646 }
3647
3648 static int
bridge_ioctl_gfilt(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3649 bridge_ioctl_gfilt(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3650 {
3651 struct ifbrparam * __single param = arg;
3652
3653 param->ifbrp_filter = sc->sc_filter_flags;
3654
3655 return 0;
3656 }
3657
3658 static int
bridge_ioctl_sfilt(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3659 bridge_ioctl_sfilt(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3660 {
3661 struct ifbrparam * __single param = arg;
3662
3663 if (param->ifbrp_filter & ~IFBF_FILT_MASK) {
3664 return EINVAL;
3665 }
3666
3667 if (param->ifbrp_filter & IFBF_FILT_USEIPF) {
3668 return EINVAL;
3669 }
3670
3671 sc->sc_filter_flags = param->ifbrp_filter;
3672
3673 return 0;
3674 }
3675
3676 static int
bridge_ioctl_sifmaxaddr(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3677 bridge_ioctl_sifmaxaddr(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3678 {
3679 struct ifbreq * __single req = arg;
3680 struct bridge_iflist *bif;
3681
3682 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3683 if (bif == NULL) {
3684 return ENOENT;
3685 }
3686
3687 bif->bif_addrmax = req->ifbr_addrmax;
3688 return 0;
3689 }
3690
3691 static int
bridge_ioctl_addspan(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3692 bridge_ioctl_addspan(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3693 {
3694 struct ifbreq * __single req = arg;
3695 struct bridge_iflist *bif = NULL;
3696 struct ifnet *ifs;
3697
3698 ifs = ifunit(sanitize_ifname(req->ifbr_ifsname));
3699 if (ifs == NULL) {
3700 return ENOENT;
3701 }
3702
3703 if (IFNET_IS_INTCOPROC(ifs) || IFNET_IS_MANAGEMENT(ifs)) {
3704 return EINVAL;
3705 }
3706
3707 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
3708 if (ifs == bif->bif_ifp) {
3709 return EBUSY;
3710 }
3711
3712 if (ifs->if_bridge != NULL) {
3713 return EBUSY;
3714 }
3715
3716 switch (ifs->if_type) {
3717 case IFT_ETHER:
3718 case IFT_L2VLAN:
3719 case IFT_IEEE8023ADLAG:
3720 break;
3721 default:
3722 return EINVAL;
3723 }
3724
3725 bif = kalloc_type(struct bridge_iflist, Z_WAITOK | Z_ZERO | Z_NOFAIL);
3726
3727 bif->bif_ifp = ifs;
3728 bif->bif_ifflags = IFBIF_SPAN;
3729
3730 ifnet_reference(bif->bif_ifp);
3731
3732 TAILQ_INSERT_HEAD(&sc->sc_spanlist, bif, bif_next);
3733
3734 return 0;
3735 }
3736
3737 static int
bridge_ioctl_delspan(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3738 bridge_ioctl_delspan(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3739 {
3740 struct ifbreq * __single req = arg;
3741 struct bridge_iflist *bif;
3742 struct ifnet *ifs;
3743
3744 ifs = ifunit(sanitize_ifname(req->ifbr_ifsname));
3745 if (ifs == NULL) {
3746 return ENOENT;
3747 }
3748
3749 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
3750 if (ifs == bif->bif_ifp) {
3751 break;
3752 }
3753
3754 if (bif == NULL) {
3755 return ENOENT;
3756 }
3757
3758 bridge_delete_span(sc, bif);
3759
3760 return 0;
3761 }
3762
3763 #define BRIDGE_IOCTL_GBPARAM do { \
3764 struct bstp_state *bs = &sc->sc_stp; \
3765 struct bstp_port *root_port; \
3766 \
3767 req->ifbop_maxage = bs->bs_bridge_max_age >> 8; \
3768 req->ifbop_hellotime = bs->bs_bridge_htime >> 8; \
3769 req->ifbop_fwddelay = bs->bs_bridge_fdelay >> 8; \
3770 \
3771 root_port = bs->bs_root_port; \
3772 if (root_port == NULL) \
3773 req->ifbop_root_port = 0; \
3774 else \
3775 req->ifbop_root_port = root_port->bp_ifp->if_index; \
3776 \
3777 req->ifbop_holdcount = bs->bs_txholdcount; \
3778 req->ifbop_priority = bs->bs_bridge_priority; \
3779 req->ifbop_protocol = bs->bs_protover; \
3780 req->ifbop_root_path_cost = bs->bs_root_pv.pv_cost; \
3781 req->ifbop_bridgeid = bs->bs_bridge_pv.pv_dbridge_id; \
3782 req->ifbop_designated_root = bs->bs_root_pv.pv_root_id; \
3783 req->ifbop_designated_bridge = bs->bs_root_pv.pv_dbridge_id; \
3784 req->ifbop_last_tc_time.tv_sec = bs->bs_last_tc_time.tv_sec; \
3785 req->ifbop_last_tc_time.tv_usec = bs->bs_last_tc_time.tv_usec; \
3786 } while (0)
3787
3788 static int
bridge_ioctl_gbparam32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3789 bridge_ioctl_gbparam32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3790 {
3791 struct ifbropreq32 * __single req = arg;
3792
3793 BRIDGE_IOCTL_GBPARAM;
3794 return 0;
3795 }
3796
3797 static int
bridge_ioctl_gbparam64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3798 bridge_ioctl_gbparam64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3799 {
3800 struct ifbropreq64 * __single req = arg;
3801
3802 BRIDGE_IOCTL_GBPARAM;
3803 return 0;
3804 }
3805
3806 static int
bridge_ioctl_grte(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3807 bridge_ioctl_grte(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3808 {
3809 struct ifbrparam * __single param = arg;
3810
3811 param->ifbrp_cexceeded = sc->sc_brtexceeded;
3812 return 0;
3813 }
3814
3815 #define BRIDGE_IOCTL_GIFSSTP do { \
3816 struct bridge_iflist *bif; \
3817 struct bstp_port *bp; \
3818 struct ifbpstpreq bpreq; \
3819 char *buf, *outbuf; \
3820 unsigned int count, buflen, len; \
3821 \
3822 count = 0; \
3823 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { \
3824 if ((bif->bif_ifflags & IFBIF_STP) != 0) \
3825 count++; \
3826 } \
3827 \
3828 buflen = sizeof (bpreq) * count; \
3829 if (bifstp->ifbpstp_len == 0) { \
3830 bifstp->ifbpstp_len = buflen; \
3831 return (0); \
3832 } \
3833 \
3834 BRIDGE_UNLOCK(sc); \
3835 outbuf = kalloc_data(buflen, Z_WAITOK | Z_ZERO); \
3836 BRIDGE_LOCK(sc); \
3837 \
3838 count = 0; \
3839 buf = outbuf; \
3840 len = min(bifstp->ifbpstp_len, buflen); \
3841 bzero(&bpreq, sizeof (bpreq)); \
3842 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { \
3843 if (len < sizeof (bpreq)) \
3844 break; \
3845 \
3846 if ((bif->bif_ifflags & IFBIF_STP) == 0) \
3847 continue; \
3848 \
3849 bp = &bif->bif_stp; \
3850 bpreq.ifbp_portno = bif->bif_ifp->if_index & 0xfff; \
3851 bpreq.ifbp_fwd_trans = bp->bp_forward_transitions; \
3852 bpreq.ifbp_design_cost = bp->bp_desg_pv.pv_cost; \
3853 bpreq.ifbp_design_port = bp->bp_desg_pv.pv_port_id; \
3854 bpreq.ifbp_design_bridge = bp->bp_desg_pv.pv_dbridge_id; \
3855 bpreq.ifbp_design_root = bp->bp_desg_pv.pv_root_id; \
3856 \
3857 memcpy(buf, &bpreq, sizeof (bpreq)); \
3858 count++; \
3859 buf += sizeof (bpreq); \
3860 len -= sizeof (bpreq); \
3861 } \
3862 \
3863 BRIDGE_UNLOCK(sc); \
3864 bifstp->ifbpstp_len = sizeof (bpreq) * count; \
3865 if (bifstp->ifbpstp_len > 0) { \
3866 error = copyout(outbuf, bifstp->ifbpstp_req, bifstp->ifbpstp_len);\
3867 } \
3868 BRIDGE_LOCK(sc); \
3869 kfree_data(outbuf, buflen); \
3870 return (error); \
3871 } while (0)
3872
3873 static int
bridge_ioctl_gifsstp32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3874 bridge_ioctl_gifsstp32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3875 {
3876 struct ifbpstpconf32 * __single bifstp = arg;
3877 int error = 0;
3878
3879 BRIDGE_IOCTL_GIFSSTP;
3880 return error;
3881 }
3882
3883 static int
bridge_ioctl_gifsstp64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3884 bridge_ioctl_gifsstp64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3885 {
3886 struct ifbpstpconf64 * __single bifstp = arg;
3887 int error = 0;
3888
3889 BRIDGE_IOCTL_GIFSSTP;
3890 return error;
3891 }
3892
3893 static int
bridge_ioctl_sproto(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3894 bridge_ioctl_sproto(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3895 {
3896 #if BRIDGESTP
3897 struct ifbrparam *param = arg;
3898
3899 return bstp_set_protocol(&sc->sc_stp, param->ifbrp_proto);
3900 #else /* !BRIDGESTP */
3901 #pragma unused(sc, arg)
3902 return EOPNOTSUPP;
3903 #endif /* !BRIDGESTP */
3904 }
3905
3906 static int
bridge_ioctl_stxhc(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3907 bridge_ioctl_stxhc(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3908 {
3909 #if BRIDGESTP
3910 struct ifbrparam *param = arg;
3911
3912 return bstp_set_holdcount(&sc->sc_stp, param->ifbrp_txhc);
3913 #else /* !BRIDGESTP */
3914 #pragma unused(sc, arg)
3915 return EOPNOTSUPP;
3916 #endif /* !BRIDGESTP */
3917 }
3918
3919
3920 static int
bridge_ioctl_ghostfilter(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3921 bridge_ioctl_ghostfilter(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3922 {
3923 struct ifbrhostfilter * __single req = arg;
3924 struct bridge_iflist *bif;
3925
3926 bif = bridge_lookup_member(sc, req->ifbrhf_ifsname);
3927 if (bif == NULL) {
3928 return ENOENT;
3929 }
3930
3931 bzero(req, sizeof(struct ifbrhostfilter));
3932 if (bif->bif_flags & BIFF_HOST_FILTER) {
3933 req->ifbrhf_flags |= IFBRHF_ENABLED;
3934 bcopy(bif->bif_hf_hwsrc, req->ifbrhf_hwsrca,
3935 ETHER_ADDR_LEN);
3936 req->ifbrhf_ipsrc = bif->bif_hf_ipsrc.s_addr;
3937 }
3938 return 0;
3939 }
3940
3941 static int
bridge_ioctl_shostfilter(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3942 bridge_ioctl_shostfilter(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3943 {
3944 struct ifbrhostfilter * __single req = arg;
3945 struct bridge_iflist *bif;
3946
3947 bif = bridge_lookup_member(sc, req->ifbrhf_ifsname);
3948 if (bif == NULL) {
3949 return ENOENT;
3950 }
3951 if (bif_has_mac_nat(bif)) {
3952 /* no host filter with MAC-NAT */
3953 return EINVAL;
3954 }
3955 if (req->ifbrhf_flags & IFBRHF_ENABLED) {
3956 bif->bif_flags |= BIFF_HOST_FILTER;
3957
3958 if (req->ifbrhf_flags & IFBRHF_HWSRC) {
3959 bcopy(req->ifbrhf_hwsrca, bif->bif_hf_hwsrc,
3960 ETHER_ADDR_LEN);
3961 if (bcmp(req->ifbrhf_hwsrca, ethernulladdr,
3962 ETHER_ADDR_LEN) != 0) {
3963 bif->bif_flags |= BIFF_HF_HWSRC;
3964 } else {
3965 bif->bif_flags &= ~BIFF_HF_HWSRC;
3966 }
3967 }
3968 if (req->ifbrhf_flags & IFBRHF_IPSRC) {
3969 bif->bif_hf_ipsrc.s_addr = req->ifbrhf_ipsrc;
3970 if (bif->bif_hf_ipsrc.s_addr != INADDR_ANY) {
3971 bif->bif_flags |= BIFF_HF_IPSRC;
3972 } else {
3973 bif->bif_flags &= ~BIFF_HF_IPSRC;
3974 }
3975 }
3976 } else {
3977 bif->bif_flags &= ~(BIFF_HOST_FILTER | BIFF_HF_HWSRC |
3978 BIFF_HF_IPSRC);
3979 bzero(bif->bif_hf_hwsrc, ETHER_ADDR_LEN);
3980 bif->bif_hf_ipsrc.s_addr = INADDR_ANY;
3981 }
3982
3983 return 0;
3984 }
3985
3986 static char *__indexable
bridge_mac_nat_entry_out(struct mac_nat_entry_list * list,unsigned int * count_p,char * __indexable buf,unsigned int * len_p)3987 bridge_mac_nat_entry_out(struct mac_nat_entry_list * list,
3988 unsigned int * count_p, char *__indexable buf,
3989 unsigned int * len_p)
3990 {
3991 unsigned int count = *count_p;
3992 struct ifbrmne ifbmne;
3993 unsigned int len = *len_p;
3994 struct mac_nat_entry *mne;
3995 unsigned long now;
3996
3997 bzero(&ifbmne, sizeof(ifbmne));
3998 LIST_FOREACH(mne, list, mne_list) {
3999 if (len < sizeof(ifbmne)) {
4000 break;
4001 }
4002 snprintf(ifbmne.ifbmne_ifname, sizeof(ifbmne.ifbmne_ifname),
4003 "%s", mne->mne_bif->bif_ifp->if_xname);
4004 memcpy(ifbmne.ifbmne_mac, mne->mne_mac,
4005 sizeof(ifbmne.ifbmne_mac));
4006 now = (unsigned long) net_uptime();
4007 if (now < mne->mne_expire) {
4008 ifbmne.ifbmne_expire = mne->mne_expire - now;
4009 } else {
4010 ifbmne.ifbmne_expire = 0;
4011 }
4012 if ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) {
4013 ifbmne.ifbmne_af = AF_INET6;
4014 ifbmne.ifbmne_ip6_addr = mne->mne_ip6;
4015 } else {
4016 ifbmne.ifbmne_af = AF_INET;
4017 ifbmne.ifbmne_ip_addr = mne->mne_ip;
4018 }
4019 memcpy(buf, &ifbmne, sizeof(ifbmne));
4020 count++;
4021 buf += sizeof(ifbmne);
4022 len -= sizeof(ifbmne);
4023 }
4024 *count_p = count;
4025 *len_p = len;
4026 return buf;
4027 }
4028
4029 /*
4030 * bridge_ioctl_gmnelist()
4031 * Perform the get mac_nat_entry list ioctl.
4032 *
4033 * Note:
4034 * The struct ifbrmnelist32 and struct ifbrmnelist64 have the same
4035 * field size/layout except for the last field ifbml_buf, the user-supplied
4036 * buffer pointer. That is passed in separately via the 'user_addr'
4037 * parameter from the respective 32-bit or 64-bit ioctl routine.
4038 */
4039 static int
bridge_ioctl_gmnelist(struct bridge_softc * sc,struct ifbrmnelist32 * mnl,user_addr_t user_addr)4040 bridge_ioctl_gmnelist(struct bridge_softc *sc, struct ifbrmnelist32 *mnl,
4041 user_addr_t user_addr)
4042 {
4043 unsigned int count;
4044 char *buf;
4045 int error = 0;
4046 char *outbuf = NULL;
4047 struct mac_nat_entry *mne;
4048 unsigned int buflen;
4049 unsigned int len;
4050
4051 mnl->ifbml_elsize = sizeof(struct ifbrmne);
4052 count = 0;
4053 LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
4054 count++;
4055 }
4056 LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
4057 count++;
4058 }
4059 buflen = sizeof(struct ifbrmne) * count;
4060 if (buflen == 0 || mnl->ifbml_len == 0) {
4061 mnl->ifbml_len = buflen;
4062 return error;
4063 }
4064 BRIDGE_UNLOCK(sc);
4065 outbuf = kalloc_data(buflen, Z_WAITOK | Z_ZERO);
4066 BRIDGE_LOCK(sc);
4067 count = 0;
4068 buf = outbuf;
4069 len = min(mnl->ifbml_len, buflen);
4070 buf = bridge_mac_nat_entry_out(&sc->sc_mne_list, &count, buf, &len);
4071 buf = bridge_mac_nat_entry_out(&sc->sc_mne_list_v6, &count, buf, &len);
4072 mnl->ifbml_len = count * sizeof(struct ifbrmne);
4073 BRIDGE_UNLOCK(sc);
4074 if (mnl->ifbml_len > 0) {
4075 error = copyout(outbuf, user_addr, mnl->ifbml_len);
4076 }
4077 kfree_data(outbuf, buflen);
4078 BRIDGE_LOCK(sc);
4079 return error;
4080 }
4081
4082 static int
bridge_ioctl_gmnelist64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)4083 bridge_ioctl_gmnelist64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
4084 {
4085 struct ifbrmnelist64 * __single mnl = arg;
4086
4087 return bridge_ioctl_gmnelist(sc, arg, mnl->ifbml_buf);
4088 }
4089
4090 static int
bridge_ioctl_gmnelist32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)4091 bridge_ioctl_gmnelist32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
4092 {
4093 struct ifbrmnelist32 * __single mnl = arg;
4094
4095 return bridge_ioctl_gmnelist(sc, arg,
4096 CAST_USER_ADDR_T(mnl->ifbml_buf));
4097 }
4098
4099 /*
4100 * bridge_ioctl_gifstats()
4101 * Return per-member stats.
4102 *
4103 * Note:
4104 * The ifbrmreq32 and ifbrmreq64 structures have the same
4105 * field size/layout except for the last field brmr_buf, the user-supplied
4106 * buffer pointer. That is passed in separately via the 'user_addr'
4107 * parameter from the respective 32-bit or 64-bit ioctl routine.
4108 */
4109 static int
bridge_ioctl_gifstats(struct bridge_softc * sc,struct ifbrmreq32 * mreq,user_addr_t user_addr)4110 bridge_ioctl_gifstats(struct bridge_softc *sc, struct ifbrmreq32 *mreq,
4111 user_addr_t user_addr)
4112 {
4113 struct bridge_iflist *bif;
4114 int error = 0;
4115 unsigned int buflen;
4116
4117 bif = bridge_lookup_member(sc, mreq->brmr_ifname);
4118 if (bif == NULL) {
4119 error = ENOENT;
4120 goto done;
4121 }
4122
4123 buflen = mreq->brmr_elsize = sizeof(struct ifbrmstats);
4124 if (buflen == 0 || mreq->brmr_len == 0) {
4125 mreq->brmr_len = buflen;
4126 goto done;
4127 }
4128 if (mreq->brmr_len != 0 && mreq->brmr_len < buflen) {
4129 error = ENOBUFS;
4130 goto done;
4131 }
4132 mreq->brmr_len = buflen;
4133 error = copyout(&bif->bif_stats, user_addr, buflen);
4134 done:
4135 return error;
4136 }
4137
4138 static int
bridge_ioctl_gifstats32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)4139 bridge_ioctl_gifstats32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
4140 {
4141 struct ifbrmreq32 * __single mreq = arg;
4142
4143 return bridge_ioctl_gifstats(sc, arg, mreq->brmr_buf);
4144 }
4145
4146 static int
bridge_ioctl_gifstats64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)4147 bridge_ioctl_gifstats64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
4148 {
4149 struct ifbrmreq64 * __single mreq = arg;
4150
4151 return bridge_ioctl_gifstats(sc, arg, mreq->brmr_buf);
4152 }
4153
4154 /*
4155 * bridge_proto_attach_changed
4156 *
4157 * Called when protocol attachment on the interface changes.
4158 */
4159 static void
bridge_proto_attach_changed(struct ifnet * ifp)4160 bridge_proto_attach_changed(struct ifnet *ifp)
4161 {
4162 boolean_t changed = FALSE;
4163 struct bridge_iflist *bif;
4164 boolean_t input_broadcast;
4165 struct bridge_softc * __single sc = ifp->if_bridge;
4166
4167 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
4168 if (sc == NULL) {
4169 return;
4170 }
4171 input_broadcast = interface_needs_input_broadcast(ifp);
4172 BRIDGE_LOCK(sc);
4173 bif = bridge_lookup_member_if(sc, ifp);
4174 if (bif != NULL) {
4175 changed = bif_set_input_broadcast(bif, input_broadcast);
4176 }
4177 BRIDGE_UNLOCK(sc);
4178 if (changed) {
4179 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
4180 "%s input broadcast %s", ifp->if_xname,
4181 input_broadcast ? "ENABLED" : "DISABLED");
4182 }
4183 return;
4184 }
4185
4186 /*
4187 * interface_media_active:
4188 *
4189 * Tells if an interface media is active.
4190 */
4191 static int
interface_media_active(struct ifnet * ifp)4192 interface_media_active(struct ifnet *ifp)
4193 {
4194 struct ifmediareq ifmr;
4195 int status = 0;
4196
4197 bzero(&ifmr, sizeof(ifmr));
4198 if (ifnet_ioctl(ifp, 0, SIOCGIFMEDIA, &ifmr) == 0) {
4199 if ((ifmr.ifm_status & IFM_AVALID) && ifmr.ifm_count > 0) {
4200 status = ifmr.ifm_status & IFM_ACTIVE ? 1 : 0;
4201 }
4202 }
4203
4204 return status;
4205 }
4206
4207 /*
4208 * bridge_updatelinkstatus:
4209 *
4210 * Update the media active status of the bridge based on the
4211 * media active status of its member.
4212 * If changed, return the corresponding onf/off link event.
4213 */
4214 static u_int32_t
bridge_updatelinkstatus(struct bridge_softc * sc)4215 bridge_updatelinkstatus(struct bridge_softc *sc)
4216 {
4217 struct bridge_iflist *bif;
4218 int active_member = 0;
4219 u_int32_t event_code = 0;
4220
4221 BRIDGE_LOCK_ASSERT_HELD(sc);
4222
4223 /*
4224 * Find out if we have an active interface
4225 */
4226 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
4227 if (bif->bif_flags & BIFF_MEDIA_ACTIVE) {
4228 active_member = 1;
4229 break;
4230 }
4231 }
4232
4233 if (active_member && !(sc->sc_flags & SCF_MEDIA_ACTIVE)) {
4234 sc->sc_flags |= SCF_MEDIA_ACTIVE;
4235 event_code = KEV_DL_LINK_ON;
4236 } else if (!active_member && (sc->sc_flags & SCF_MEDIA_ACTIVE)) {
4237 sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
4238 event_code = KEV_DL_LINK_OFF;
4239 }
4240
4241 return event_code;
4242 }
4243
4244 /*
4245 * bridge_iflinkevent:
4246 */
4247 static void
bridge_iflinkevent(struct ifnet * ifp)4248 bridge_iflinkevent(struct ifnet *ifp)
4249 {
4250 struct bridge_softc * __single sc = ifp->if_bridge;
4251 struct bridge_iflist *bif;
4252 u_int32_t event_code = 0;
4253 int media_active;
4254
4255 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
4256
4257 /* Check if the interface is a bridge member */
4258 if (sc == NULL) {
4259 return;
4260 }
4261
4262 media_active = interface_media_active(ifp);
4263 BRIDGE_LOCK(sc);
4264 bif = bridge_lookup_member_if(sc, ifp);
4265 if (bif != NULL) {
4266 if (media_active) {
4267 bif->bif_flags |= BIFF_MEDIA_ACTIVE;
4268 } else {
4269 bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
4270 }
4271 if (sc->sc_mac_nat_bif != NULL) {
4272 bridge_mac_nat_flush_entries(sc, bif);
4273 }
4274
4275 event_code = bridge_updatelinkstatus(sc);
4276 }
4277 BRIDGE_UNLOCK(sc);
4278
4279 if (event_code != 0) {
4280 bridge_link_event(sc->sc_ifp, event_code);
4281 }
4282 }
4283
4284 /*
4285 * bridge_delayed_callback:
4286 *
4287 * Makes a delayed call
4288 */
4289 static void
bridge_delayed_callback(void * param,__unused void * param2)4290 bridge_delayed_callback(void *param, __unused void *param2)
4291 {
4292 struct bridge_delayed_call *call = (struct bridge_delayed_call *)param;
4293 struct bridge_softc *sc = call->bdc_sc;
4294
4295 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4296 if (bridge_delayed_callback_delay > 0) {
4297 struct timespec ts;
4298
4299 ts.tv_sec = bridge_delayed_callback_delay;
4300 ts.tv_nsec = 0;
4301
4302 BRIDGE_LOG(LOG_NOTICE, 0,
4303 "sleeping for %d seconds",
4304 bridge_delayed_callback_delay);
4305
4306 msleep(&bridge_delayed_callback_delay, NULL, PZERO,
4307 __func__, &ts);
4308
4309 BRIDGE_LOG(LOG_NOTICE, 0, "awoken");
4310 }
4311 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4312
4313 BRIDGE_LOCK(sc);
4314
4315 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4316 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4317 "%s call 0x%llx flags 0x%x",
4318 sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4319 call->bdc_flags);
4320 }
4321 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4322
4323 if (call->bdc_flags & BDCF_CANCELLING) {
4324 wakeup(call);
4325 } else {
4326 if ((sc->sc_flags & SCF_DETACHING) == 0) {
4327 (*call->bdc_func)(sc);
4328 }
4329 }
4330 call->bdc_flags &= ~BDCF_OUTSTANDING;
4331 BRIDGE_UNLOCK(sc);
4332 }
4333
4334 /*
4335 * bridge_schedule_delayed_call:
4336 *
4337 * Schedule a function to be called on a separate thread
4338 * The actual call may be scheduled to run at a given time or ASAP.
4339 */
4340 static void
4341 bridge_schedule_delayed_call(struct bridge_delayed_call *call)
4342 {
4343 uint64_t deadline = 0;
4344 struct bridge_softc *sc = call->bdc_sc;
4345
4346 BRIDGE_LOCK_ASSERT_HELD(sc);
4347
4348 if ((sc->sc_flags & SCF_DETACHING) ||
4349 (call->bdc_flags & (BDCF_OUTSTANDING | BDCF_CANCELLING))) {
4350 return;
4351 }
4352
4353 if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) {
4354 nanoseconds_to_absolutetime(
4355 (uint64_t)call->bdc_ts.tv_sec * NSEC_PER_SEC +
4356 call->bdc_ts.tv_nsec, &deadline);
4357 clock_absolutetime_interval_to_deadline(deadline, &deadline);
4358 }
4359
4360 call->bdc_flags = BDCF_OUTSTANDING;
4361
4362 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4363 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4364 "%s call 0x%llx flags 0x%x",
4365 sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4366 call->bdc_flags);
4367 }
4368 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4369
4370 if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) {
4371 thread_call_func_delayed(
4372 (thread_call_func_t)bridge_delayed_callback,
4373 call, deadline);
4374 } else {
4375 if (call->bdc_thread_call == NULL) {
4376 call->bdc_thread_call = thread_call_allocate(
4377 (thread_call_func_t)bridge_delayed_callback,
4378 call);
4379 }
4380 thread_call_enter(call->bdc_thread_call);
4381 }
4382 }
4383
4384 /*
4385 * bridge_cancel_delayed_call:
4386 *
4387 * Cancel a queued or running delayed call.
4388 * If call is running, does not return until the call is done to
4389 * prevent race condition with the brigde interface getting destroyed
4390 */
4391 static void
4392 bridge_cancel_delayed_call(struct bridge_delayed_call *call)
4393 {
4394 boolean_t result;
4395 struct bridge_softc *sc = call->bdc_sc;
4396
4397 /*
4398 * The call was never scheduled
4399 */
4400 if (sc == NULL) {
4401 return;
4402 }
4403
4404 BRIDGE_LOCK_ASSERT_HELD(sc);
4405
4406 call->bdc_flags |= BDCF_CANCELLING;
4407
4408 while (call->bdc_flags & BDCF_OUTSTANDING) {
4409 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4410 "%s call 0x%llx flags 0x%x",
4411 sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4412 call->bdc_flags);
4413 result = thread_call_func_cancel(
4414 (thread_call_func_t)bridge_delayed_callback, call, FALSE);
4415
4416 if (result) {
4417 /*
4418 * We managed to dequeue the delayed call
4419 */
4420 call->bdc_flags &= ~BDCF_OUTSTANDING;
4421 } else {
4422 /*
4423 * Wait for delayed call do be done running
4424 */
4425 msleep(call, &sc->sc_mtx, PZERO, __func__, NULL);
4426 }
4427 }
4428 call->bdc_flags &= ~BDCF_CANCELLING;
4429 }
4430
4431 /*
4432 * bridge_cleanup_delayed_call:
4433 *
4434 * Dispose resource allocated for a delayed call
4435 * Assume the delayed call is not queued or running .
4436 */
4437 static void
4438 bridge_cleanup_delayed_call(struct bridge_delayed_call *call)
4439 {
4440 boolean_t result;
4441 struct bridge_softc *sc = call->bdc_sc;
4442
4443 /*
4444 * The call was never scheduled
4445 */
4446 if (sc == NULL) {
4447 return;
4448 }
4449
4450 BRIDGE_LOCK_ASSERT_HELD(sc);
4451
4452 VERIFY((call->bdc_flags & BDCF_OUTSTANDING) == 0);
4453 VERIFY((call->bdc_flags & BDCF_CANCELLING) == 0);
4454
4455 if (call->bdc_thread_call != NULL) {
4456 result = thread_call_free(call->bdc_thread_call);
4457 if (result == FALSE) {
4458 panic("%s thread_call_free() failed for call %p",
4459 __func__, call);
4460 }
4461 call->bdc_thread_call = NULL;
4462 }
4463 }
4464
4465 /*
4466 * bridge_init:
4467 *
4468 * Initialize a bridge interface.
4469 */
4470 static int
4471 bridge_init(struct ifnet *ifp)
4472 {
4473 struct bridge_softc *sc = (struct bridge_softc *)ifp->if_softc;
4474 errno_t error;
4475
4476 BRIDGE_LOCK_ASSERT_HELD(sc);
4477
4478 if ((ifnet_flags(ifp) & IFF_RUNNING)) {
4479 return 0;
4480 }
4481
4482 error = ifnet_set_flags(ifp, IFF_RUNNING, IFF_RUNNING);
4483
4484 /*
4485 * Calling bridge_aging_timer() is OK as there are no entries to
4486 * age so we're just going to arm the timer
4487 */
4488 bridge_aging_timer(sc);
4489 #if BRIDGESTP
4490 if (error == 0) {
4491 bstp_init(&sc->sc_stp); /* Initialize Spanning Tree */
4492 }
4493 #endif /* BRIDGESTP */
4494 return error;
4495 }
4496
4497 /*
4498 * bridge_ifstop:
4499 *
4500 * Stop the bridge interface.
4501 */
4502 static void
4503 bridge_ifstop(struct ifnet *ifp, int disable)
4504 {
4505 #pragma unused(disable)
4506 struct bridge_softc * __single sc = ifp->if_softc;
4507
4508 BRIDGE_LOCK_ASSERT_HELD(sc);
4509
4510 if ((ifnet_flags(ifp) & IFF_RUNNING) == 0) {
4511 return;
4512 }
4513
4514 bridge_cancel_delayed_call(&sc->sc_aging_timer);
4515
4516 #if BRIDGESTP
4517 bstp_stop(&sc->sc_stp);
4518 #endif /* BRIDGESTP */
4519
4520 bridge_rtflush(sc, IFBF_FLUSHDYN);
4521 (void) ifnet_set_flags(ifp, 0, IFF_RUNNING);
4522 }
4523
4524 static const uint32_t checksum_request_flags = (MBUF_CSUM_REQ_TCP |
4525 MBUF_CSUM_REQ_UDP | MBUF_CSUM_REQ_TCPIPV6 | MBUF_CSUM_REQ_UDPIPV6);
4526
4527 static const mbuf_csum_performed_flags_t checksum_performed_all_good =
4528 (MBUF_CSUM_DID_IP | MBUF_CSUM_IP_GOOD
4529 | MBUF_CSUM_DID_DATA | MBUF_CSUM_PSEUDO_HDR);
4530
4531 /*
4532 * bridge_compute_cksum:
4533 *
4534 * If the packet has checksum flags, compare the hardware checksum
4535 * capabilities of the source and destination interfaces. If they
4536 * are the same, there's nothing to do. If they are different,
4537 * finalize the checksum so that it can be sent on the destination
4538 * interface.
4539 */
4540 static void
4541 bridge_compute_cksum(struct ifnet *src_if, struct ifnet *dst_if, struct mbuf *m)
4542 {
4543 uint32_t csum_flags;
4544 uint16_t dst_hw_csum;
4545 uint32_t did_sw = 0;
4546 struct ether_header *eh;
4547 uint16_t src_hw_csum;
4548
4549 if (src_if == dst_if) {
4550 return;
4551 }
4552 csum_flags = m->m_pkthdr.csum_flags & IF_HWASSIST_CSUM_MASK;
4553 if (csum_flags == 0) {
4554 /* no checksum offload */
4555 return;
4556 }
4557
4558 /*
4559 * if destination/source differ in checksum offload
4560 * capabilities, finalize/compute the checksum
4561 */
4562 dst_hw_csum = IF_HWASSIST_CSUM_FLAGS(dst_if->if_hwassist);
4563 src_hw_csum = IF_HWASSIST_CSUM_FLAGS(src_if->if_hwassist);
4564 if (dst_hw_csum == src_hw_csum) {
4565 return;
4566 }
4567 eh = mtod(m, struct ether_header *);
4568 switch (eh->ether_type) {
4569 case HTONS_ETHERTYPE_IP:
4570 did_sw = in_finalize_cksum(m, sizeof(*eh), csum_flags);
4571 break;
4572 case HTONS_ETHERTYPE_IPV6:
4573 did_sw = in6_finalize_cksum(m, sizeof(*eh), -1, -1, csum_flags);
4574 break;
4575 }
4576 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4577 "[%s -> %s] before 0x%x did 0x%x after 0x%x",
4578 src_if->if_xname, dst_if->if_xname, csum_flags, did_sw,
4579 m->m_pkthdr.csum_flags);
4580 }
4581
4582 static inline errno_t
4583 bridge_transmit(ifnet_t ifp, mbuf_t m)
4584 {
4585 struct flowadv adv = { .code = FADV_SUCCESS };
4586 errno_t error;
4587 int flags = DLIL_OUTPUT_FLAGS_RAW;
4588
4589 flags = (if_bridge_output_skip_filters != 0)
4590 ? (DLIL_OUTPUT_FLAGS_RAW | DLIL_OUTPUT_FLAGS_SKIP_IF_FILTERS)
4591 : DLIL_OUTPUT_FLAGS_RAW;
4592 error = dlil_output(ifp, 0, m, NULL, NULL, flags, &adv);
4593 if (error == 0) {
4594 if (adv.code == FADV_FLOW_CONTROLLED) {
4595 error = EQFULL;
4596 } else if (adv.code == FADV_SUSPENDED) {
4597 error = EQSUSPENDED;
4598 }
4599 }
4600 return error;
4601 }
4602
4603 static int
4604 get_last_ip6_hdr(struct mbuf *m, int off, int proto, int * nxtp,
4605 bool *is_fragmented)
4606 {
4607 int newoff;
4608
4609 *is_fragmented = false;
4610 while (1) {
4611 newoff = ip6_nexthdr(m, off, proto, nxtp);
4612 if (newoff < 0) {
4613 return off;
4614 } else if (newoff < off) {
4615 return -1; /* invalid */
4616 } else if (newoff == off) {
4617 return newoff;
4618 }
4619 off = newoff;
4620 proto = *nxtp;
4621 if (proto == IPPROTO_FRAGMENT) {
4622 *is_fragmented = true;
4623 }
4624 }
4625 }
4626
4627 #define __ATOMIC_INC(s) os_atomic_inc(&s, relaxed)
4628
4629 static int
4630 bridge_get_ip_proto(struct mbuf * * mp, u_int mac_hlen, bool is_ipv4,
4631 ip_packet_info_t info_p, struct bripstats * stats_p)
4632 {
4633 int error = 0;
4634 u_int hlen;
4635 u_int ip_hlen;
4636 u_int ip_pay_len;
4637 struct mbuf * m0 = *mp;
4638 int off;
4639 int opt_len = 0;
4640 int proto = 0;
4641
4642 bzero(info_p, sizeof(*info_p));
4643 if (is_ipv4) {
4644 struct ip * ip;
4645 u_int ip_total_len;
4646
4647 /* IPv4 */
4648 hlen = mac_hlen + sizeof(struct ip);
4649 if (m0->m_pkthdr.len < hlen) {
4650 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4651 "Short IP packet %d < %d",
4652 m0->m_pkthdr.len, hlen);
4653 error = _EBADIP;
4654 __ATOMIC_INC(stats_p->bips_bad_ip);
4655 goto done;
4656 }
4657 if (m0->m_len < hlen) {
4658 *mp = m0 = m_pullup(m0, hlen);
4659 if (m0 == NULL) {
4660 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4661 "m_pullup failed hlen %d",
4662 hlen);
4663 error = ENOBUFS;
4664 __ATOMIC_INC(stats_p->bips_bad_ip);
4665 goto done;
4666 }
4667 }
4668 ip = (struct ip *)mtodo(m0, mac_hlen);
4669 if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
4670 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4671 "bad IP version");
4672 error = _EBADIP;
4673 __ATOMIC_INC(stats_p->bips_bad_ip);
4674 goto done;
4675 }
4676 ip_hlen = IP_VHL_HL(ip->ip_vhl) << 2;
4677 if (ip_hlen < sizeof(struct ip)) {
4678 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4679 "bad IP header length %d < %d",
4680 ip_hlen,
4681 (int)sizeof(struct ip));
4682 error = _EBADIP;
4683 __ATOMIC_INC(stats_p->bips_bad_ip);
4684 goto done;
4685 }
4686 hlen = mac_hlen + ip_hlen;
4687 if (m0->m_len < hlen) {
4688 *mp = m0 = m_pullup(m0, hlen);
4689 if (m0 == NULL) {
4690 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4691 "m_pullup failed hlen %d",
4692 hlen);
4693 error = ENOBUFS;
4694 __ATOMIC_INC(stats_p->bips_bad_ip);
4695 goto done;
4696 }
4697 ip = (struct ip *)mtodo(m0, mac_hlen);
4698 }
4699
4700 ip_total_len = ntohs(ip->ip_len);
4701 if (ip_total_len < ip_hlen) {
4702 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4703 "IP total len %d < header len %d",
4704 ip_total_len, ip_hlen);
4705 error = _EBADIP;
4706 __ATOMIC_INC(stats_p->bips_bad_ip);
4707 goto done;
4708 }
4709 if (ip_total_len > (m0->m_pkthdr.len - mac_hlen)) {
4710 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4711 "invalid IP payload length %d > %d",
4712 ip_total_len,
4713 (m0->m_pkthdr.len - mac_hlen));
4714 error = _EBADIP;
4715 __ATOMIC_INC(stats_p->bips_bad_ip);
4716 goto done;
4717 }
4718 ip_pay_len = ip_total_len - ip_hlen;
4719 info_p->ip_proto = ip->ip_p;
4720 info_p->ip_hdr = mtodo(m0, mac_hlen);
4721 info_p->ip_m0_len = m0->m_len - mac_hlen;
4722 info_p->ip_hlen = ip_hlen;
4723 #define FRAG_BITS (IP_OFFMASK | IP_MF)
4724 if ((ntohs(ip->ip_off) & FRAG_BITS) != 0) {
4725 info_p->ip_is_fragmented = true;
4726 }
4727 __ATOMIC_INC(stats_p->bips_ip);
4728 } else {
4729 struct ip6_hdr *ip6;
4730
4731 /* IPv6 */
4732 hlen = mac_hlen + sizeof(struct ip6_hdr);
4733 if (m0->m_pkthdr.len < hlen) {
4734 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4735 "short IPv6 packet %d < %d",
4736 m0->m_pkthdr.len, hlen);
4737 error = _EBADIPV6;
4738 __ATOMIC_INC(stats_p->bips_bad_ip6);
4739 goto done;
4740 }
4741 if (m0->m_len < hlen) {
4742 *mp = m0 = m_pullup(m0, hlen);
4743 if (m0 == NULL) {
4744 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4745 "m_pullup failed hlen %d",
4746 hlen);
4747 error = ENOBUFS;
4748 __ATOMIC_INC(stats_p->bips_bad_ip6);
4749 goto done;
4750 }
4751 }
4752 ip6 = (struct ip6_hdr *)(mtodo(m0, mac_hlen));
4753 if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
4754 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4755 "bad IPv6 version");
4756 error = _EBADIPV6;
4757 __ATOMIC_INC(stats_p->bips_bad_ip6);
4758 goto done;
4759 }
4760 off = get_last_ip6_hdr(m0, mac_hlen, IPPROTO_IPV6, &proto,
4761 &info_p->ip_is_fragmented);
4762 if (off < 0 || m0->m_pkthdr.len < off) {
4763 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4764 "ip6_lasthdr() returned %d",
4765 off);
4766 error = _EBADIPV6;
4767 __ATOMIC_INC(stats_p->bips_bad_ip6);
4768 goto done;
4769 }
4770 ip_hlen = sizeof(*ip6);
4771 opt_len = off - mac_hlen - ip_hlen;
4772 if (opt_len < 0) {
4773 error = _EBADIPV6;
4774 __ATOMIC_INC(stats_p->bips_bad_ip6);
4775 goto done;
4776 }
4777 ip_pay_len = ntohs(ip6->ip6_plen);
4778 if (ip_pay_len > (m0->m_pkthdr.len - mac_hlen - ip_hlen)) {
4779 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4780 "invalid IPv6 payload length %d > %d",
4781 ip_pay_len,
4782 (m0->m_pkthdr.len - mac_hlen - ip_hlen));
4783 error = _EBADIPV6;
4784 __ATOMIC_INC(stats_p->bips_bad_ip6);
4785 goto done;
4786 }
4787 info_p->ip_proto = proto;
4788 info_p->ip_hdr = mtodo(m0, mac_hlen);
4789 info_p->ip_m0_len = m0->m_len - mac_hlen;
4790 info_p->ip_hlen = ip_hlen;
4791 __ATOMIC_INC(stats_p->bips_ip6);
4792 }
4793 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4794 "IPv%c proto %d ip %u pay %u opt %u pkt %u%s",
4795 is_ipv4 ? '4' : '6',
4796 proto, ip_hlen, ip_pay_len, opt_len,
4797 m0->m_pkthdr.len, info_p->ip_is_fragmented ? " frag" : "");
4798 info_p->ip_pay_len = ip_pay_len;
4799 info_p->ip_opt_len = opt_len;
4800 info_p->ip_is_ipv4 = is_ipv4;
4801 done:
4802 return error;
4803 }
4804
4805 static int
4806 bridge_get_tcp_header(struct mbuf * * mp, u_int mac_hlen, bool is_ipv4,
4807 ip_packet_info_t info_p, struct bripstats * stats_p)
4808 {
4809 int error;
4810 u_int hlen;
4811
4812 error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, info_p, stats_p);
4813 if (error != 0) {
4814 goto done;
4815 }
4816 if (info_p->ip_proto != IPPROTO_TCP) {
4817 /* not a TCP frame, not an error, just a bad guess */
4818 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4819 "non-TCP (%d) IPv%c frame %d bytes",
4820 info_p->ip_proto, is_ipv4 ? '4' : '6',
4821 (*mp)->m_pkthdr.len);
4822 goto done;
4823 }
4824 if (info_p->ip_is_fragmented) {
4825 /* both TSO and IP fragmentation don't make sense */
4826 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4827 "fragmented TSO packet?");
4828 __ATOMIC_INC(stats_p->bips_bad_tcp);
4829 error = _EBADTCP;
4830 goto done;
4831 }
4832 hlen = mac_hlen + info_p->ip_hlen + sizeof(struct tcphdr) +
4833 info_p->ip_opt_len;
4834 if ((*mp)->m_len < hlen) {
4835 *mp = m_pullup(*mp, hlen);
4836 if (*mp == NULL) {
4837 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4838 "m_pullup %d failed",
4839 hlen);
4840 __ATOMIC_INC(stats_p->bips_bad_tcp);
4841 error = _EBADTCP;
4842 goto done;
4843 }
4844 }
4845 info_p->ip_proto_hdr = info_p->ip_hdr + info_p->ip_hlen +
4846 info_p->ip_opt_len;
4847 done:
4848 return error;
4849 }
4850
4851 static inline void
4852 proto_csum_stats_increment(uint8_t proto, struct brcsumstats * stats_p)
4853 {
4854 if (proto == IPPROTO_TCP) {
4855 __ATOMIC_INC(stats_p->brcs_tcp_checksum);
4856 } else {
4857 __ATOMIC_INC(stats_p->brcs_udp_checksum);
4858 }
4859 return;
4860 }
4861
4862 #define ETHER_TYPE_FLAG_NONE 0x00
4863 #define ETHER_TYPE_FLAG_IPV4 0x01
4864 #define ETHER_TYPE_FLAG_IPV6 0x02
4865 #define ETHER_TYPE_FLAG_ARP 0x04
4866 #define ETHER_TYPE_FLAG_IP (ETHER_TYPE_FLAG_IPV4 | ETHER_TYPE_FLAG_IPV6)
4867 #define ETHER_TYPE_FLAG_IP_ARP (ETHER_TYPE_FLAG_IP | ETHER_TYPE_FLAG_ARP)
4868
4869 static inline bool
4870 ether_type_flag_is_ip(ether_type_flag_t flag)
4871 {
4872 return (flag & ETHER_TYPE_FLAG_IP) != 0;
4873 }
4874
4875 static inline ether_type_flag_t
4876 ether_type_flag_get(uint16_t ether_type)
4877 {
4878 ether_type_flag_t flag = ETHER_TYPE_FLAG_NONE;
4879
4880 switch (ether_type) {
4881 case HTONS_ETHERTYPE_IP:
4882 flag = ETHER_TYPE_FLAG_IPV4;
4883 break;
4884 case HTONS_ETHERTYPE_IPV6:
4885 flag = ETHER_TYPE_FLAG_IPV6;
4886 break;
4887 case HTONS_ETHERTYPE_ARP:
4888 flag = ETHER_TYPE_FLAG_ARP;
4889 break;
4890 default:
4891 break;
4892 }
4893 return flag;
4894 }
4895
4896 static bool
4897 ether_header_type_is_ip(struct ether_header * eh, bool *is_ipv4)
4898 {
4899 uint16_t ether_type;
4900 bool is_ip = TRUE;
4901
4902 ether_type = ntohs(eh->ether_type);
4903 switch (ether_type) {
4904 case ETHERTYPE_IP:
4905 *is_ipv4 = TRUE;
4906 break;
4907 case ETHERTYPE_IPV6:
4908 *is_ipv4 = FALSE;
4909 break;
4910 default:
4911 is_ip = FALSE;
4912 break;
4913 }
4914 return is_ip;
4915 }
4916
4917 static errno_t
4918 bridge_verify_checksum(struct mbuf * * mp, struct ifbrmstats *stats_p)
4919 {
4920 struct brcsumstats *csum_stats_p;
4921 struct ether_header *eh;
4922 errno_t error = 0;
4923 ip_packet_info info;
4924 bool is_ipv4;
4925 struct mbuf * m;
4926 u_int mac_hlen = sizeof(struct ether_header);
4927 uint16_t sum;
4928 bool valid;
4929
4930 eh = mtod(*mp, struct ether_header *);
4931 if (!ether_header_type_is_ip(eh, &is_ipv4)) {
4932 goto done;
4933 }
4934 error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, &info,
4935 &stats_p->brms_out_ip);
4936 m = *mp;
4937 if (error != 0) {
4938 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4939 "bridge_get_ip_proto failed %d",
4940 error);
4941 goto done;
4942 }
4943 if (is_ipv4) {
4944 if ((m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) != 0) {
4945 /* hardware offloaded IP header checksum */
4946 valid = (m->m_pkthdr.csum_flags & CSUM_IP_VALID) != 0;
4947 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4948 "IP checksum HW %svalid",
4949 valid ? "" : "in");
4950 if (!valid) {
4951 __ATOMIC_INC(stats_p->brms_out_cksum_bad_hw.brcs_ip_checksum);
4952 error = _EBADIPCHECKSUM;
4953 goto done;
4954 }
4955 __ATOMIC_INC(stats_p->brms_out_cksum_good_hw.brcs_ip_checksum);
4956 } else {
4957 /* verify */
4958 sum = inet_cksum(m, 0, mac_hlen, info.ip_hlen);
4959 valid = (sum == 0);
4960 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4961 "IP checksum SW %svalid",
4962 valid ? "" : "in");
4963 if (!valid) {
4964 __ATOMIC_INC(stats_p->brms_out_cksum_bad.brcs_ip_checksum);
4965 error = _EBADIPCHECKSUM;
4966 goto done;
4967 }
4968 __ATOMIC_INC(stats_p->brms_out_cksum_good.brcs_ip_checksum);
4969 }
4970 }
4971 if (info.ip_is_fragmented) {
4972 /* can't verify checksum on fragmented packets */
4973 goto done;
4974 }
4975 switch (info.ip_proto) {
4976 case IPPROTO_TCP:
4977 __ATOMIC_INC(stats_p->brms_out_ip.bips_tcp);
4978 break;
4979 case IPPROTO_UDP:
4980 __ATOMIC_INC(stats_p->brms_out_ip.bips_udp);
4981 break;
4982 default:
4983 goto done;
4984 }
4985 /* check for hardware offloaded UDP/TCP checksum */
4986 #define HW_CSUM (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)
4987 if ((m->m_pkthdr.csum_flags & HW_CSUM) == HW_CSUM) {
4988 /* checksum verified by hardware */
4989 valid = (m->m_pkthdr.csum_rx_val == 0xffff);
4990 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4991 "IPv%c %s checksum HW 0x%x %svalid",
4992 is_ipv4 ? '4' : '6',
4993 (info.ip_proto == IPPROTO_TCP)
4994 ? "TCP" : "UDP",
4995 m->m_pkthdr.csum_data,
4996 valid ? "" : "in" );
4997 if (!valid) {
4998 /* bad checksum */
4999 csum_stats_p = &stats_p->brms_out_cksum_bad_hw;
5000 error = (info.ip_proto == IPPROTO_TCP) ? _EBADTCPCHECKSUM
5001 : _EBADTCPCHECKSUM;
5002 } else {
5003 /* good checksum */
5004 csum_stats_p = &stats_p->brms_out_cksum_good_hw;
5005 }
5006 proto_csum_stats_increment(info.ip_proto, csum_stats_p);
5007 goto done;
5008 }
5009 /* adjust frame to skip mac-layer header */
5010 _mbuf_adjust_pkthdr_and_data(m, mac_hlen);
5011 if (is_ipv4) {
5012 sum = inet_cksum(m, info.ip_proto,
5013 info.ip_hlen,
5014 info.ip_pay_len);
5015 } else {
5016 sum = inet6_cksum(m, info.ip_proto,
5017 info.ip_hlen + info.ip_opt_len,
5018 info.ip_pay_len - info.ip_opt_len);
5019 }
5020 valid = (sum == 0);
5021 if (valid) {
5022 csum_stats_p = &stats_p->brms_out_cksum_good;
5023 } else {
5024 csum_stats_p = &stats_p->brms_out_cksum_bad;
5025 error = (info.ip_proto == IPPROTO_TCP)
5026 ? _EBADTCPCHECKSUM : _EBADUDPCHECKSUM;
5027 }
5028 proto_csum_stats_increment(info.ip_proto, csum_stats_p);
5029 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5030 "IPv%c %s checksum SW %svalid (0x%x) hlen %d paylen %d",
5031 is_ipv4 ? '4' : '6',
5032 (info.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
5033 valid ? "" : "in",
5034 sum, info.ip_hlen, info.ip_pay_len);
5035 /* adjust frame back to start of mac-layer header */
5036 _mbuf_adjust_pkthdr_and_data(m, -mac_hlen);
5037
5038 done:
5039 return error;
5040 }
5041
5042 static mbuf_t
5043 bridge_verify_checksum_list(ifnet_t bridge_ifp, struct bridge_iflist * dbif,
5044 mbuf_t in_list, bool is_ipv4)
5045 {
5046 mbuf_t next_packet;
5047 mblist ret;
5048
5049 mblist_init(&ret);
5050 for (mbuf_ref_t scan = in_list; scan != NULL; scan = next_packet) {
5051 errno_t error;
5052
5053 /* take packet out of the list */
5054 next_packet = scan->m_nextpkt;
5055 scan->m_nextpkt = NULL;
5056
5057 if (scan->m_pkthdr.rx_seg_cnt > 1) {
5058 /* LRO packet, compute checksum on large packet */
5059 scan = bridge_filter_checksum(bridge_ifp, dbif, scan,
5060 is_ipv4, false, true);
5061 } else {
5062 /* verify checksum */
5063 error = bridge_verify_checksum(&scan, &dbif->bif_stats);
5064 if (error != 0) {
5065 if (scan != NULL) {
5066 m_freem(scan);
5067 scan = NULL;
5068 }
5069 }
5070 }
5071
5072 /* add it back to the list */
5073 if (scan != NULL) {
5074 mblist_append(&ret, scan);
5075 }
5076 }
5077 return ret.head;
5078 }
5079
5080
5081 static errno_t
5082 bridge_offload_checksum(struct mbuf * * mp, ip_packet_info * info_p,
5083 struct ifbrmstats * stats_p)
5084 {
5085 uint16_t * csum_p;
5086 errno_t error = 0;
5087 u_int hlen;
5088 struct mbuf * m0 = *mp;
5089 u_int mac_hlen = sizeof(struct ether_header);
5090 u_int pkt_hdr_len;
5091 struct tcphdr * tcp;
5092 u_int tcp_hlen;
5093 struct udphdr * udp;
5094
5095 if (info_p->ip_is_ipv4) {
5096 /* compute IP header checksum */
5097 struct ip *ip = (struct ip *)info_p->ip_hdr;
5098 ip->ip_sum = 0;
5099 ip->ip_sum = inet_cksum(m0, 0, mac_hlen, info_p->ip_hlen);
5100 __ATOMIC_INC(stats_p->brms_in_computed_cksum.brcs_ip_checksum);
5101 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5102 "IPv4 checksum 0x%x",
5103 ntohs(ip->ip_sum));
5104 }
5105 if (info_p->ip_is_fragmented) {
5106 /* can't compute checksum on fragmented packets */
5107 goto done;
5108 }
5109 pkt_hdr_len = m0->m_pkthdr.len;
5110 switch (info_p->ip_proto) {
5111 case IPPROTO_TCP:
5112 hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len
5113 + sizeof(struct tcphdr);
5114 if (m0->m_len < hlen) {
5115 *mp = m0 = m_pullup(m0, hlen);
5116 if (m0 == NULL) {
5117 __ATOMIC_INC(stats_p->brms_in_ip.bips_bad_tcp);
5118 error = _EBADTCP;
5119 goto done;
5120 }
5121 }
5122 tcp = (struct tcphdr *)(info_p->ip_hdr + info_p->ip_hlen
5123 + info_p->ip_opt_len);
5124 tcp_hlen = tcp->th_off << 2;
5125 hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len + tcp_hlen;
5126 if (hlen > pkt_hdr_len) {
5127 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5128 "bad tcp header length %u",
5129 tcp_hlen);
5130 __ATOMIC_INC(stats_p->brms_in_ip.bips_bad_tcp);
5131 error = _EBADTCP;
5132 goto done;
5133 }
5134 csum_p = &tcp->th_sum;
5135 __ATOMIC_INC(stats_p->brms_in_ip.bips_tcp);
5136 break;
5137 case IPPROTO_UDP:
5138 hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len + sizeof(*udp);
5139 if (m0->m_len < hlen) {
5140 *mp = m0 = m_pullup(m0, hlen);
5141 if (m0 == NULL) {
5142 __ATOMIC_INC(stats_p->brms_in_ip.bips_bad_udp);
5143 error = ENOBUFS;
5144 goto done;
5145 }
5146 }
5147 udp = (struct udphdr *)(info_p->ip_hdr + info_p->ip_hlen
5148 + info_p->ip_opt_len);
5149 csum_p = &udp->uh_sum;
5150 __ATOMIC_INC(stats_p->brms_in_ip.bips_udp);
5151 break;
5152 default:
5153 /* not TCP or UDP */
5154 goto done;
5155 }
5156 *csum_p = 0;
5157 /* adjust frame to skip mac-layer header */
5158 _mbuf_adjust_pkthdr_and_data(m0, mac_hlen);
5159 if (info_p->ip_is_ipv4) {
5160 *csum_p = inet_cksum(m0, info_p->ip_proto, info_p->ip_hlen,
5161 info_p->ip_pay_len);
5162 } else {
5163 *csum_p = inet6_cksum(m0, info_p->ip_proto,
5164 info_p->ip_hlen + info_p->ip_opt_len,
5165 info_p->ip_pay_len - info_p->ip_opt_len);
5166 }
5167 if (info_p->ip_proto == IPPROTO_UDP && *csum_p == 0) {
5168 /* RFC 1122 4.1.3.4 */
5169 *csum_p = 0xffff;
5170 }
5171 /* adjust frame back to start of mac-layer header */
5172 _mbuf_adjust_pkthdr_and_data(m0, -mac_hlen);
5173 proto_csum_stats_increment(info_p->ip_proto,
5174 &stats_p->brms_in_computed_cksum);
5175
5176 /* indicate that the checksum is good */
5177 mbuf_set_csum_performed(m0, checksum_performed_all_good, 0xffff);
5178
5179 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5180 "IPv%c %s set checksum 0x%x",
5181 info_p->ip_is_ipv4 ? '4' : '6',
5182 (info_p->ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
5183 ntohs(*csum_p));
5184 done:
5185 return error;
5186 }
5187
5188 static inline void
5189 bridge_handle_checksum_op(ifnet_t src_ifp, ifnet_t dst_ifp,
5190 mbuf_t m, ChecksumOperation cksum_op)
5191 {
5192 switch (cksum_op) {
5193 case CHECKSUM_OPERATION_CLEAR_OFFLOAD:
5194 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
5195 break;
5196 case CHECKSUM_OPERATION_FINALIZE:
5197 /* the checksum might not be correct, finalize now */
5198 VERIFY(dst_ifp != NULL);
5199 bridge_finalize_cksum(dst_ifp, m);
5200 break;
5201 case CHECKSUM_OPERATION_COMPUTE:
5202 VERIFY(dst_ifp != NULL && src_ifp != NULL);
5203 bridge_compute_cksum(src_ifp, dst_ifp, m);
5204 break;
5205 default:
5206 break;
5207 }
5208 return;
5209 }
5210
5211 static uint32_t
5212 get_if_tso_mtu(struct ifnet * ifp, bool is_ipv4)
5213 {
5214 uint32_t tso_mtu;
5215
5216 tso_mtu = is_ipv4 ? ifp->if_tso_v4_mtu : ifp->if_tso_v6_mtu;
5217 if (tso_mtu == 0) {
5218 tso_mtu = IP_MAXPACKET;
5219 }
5220
5221 #if DEBUG || DEVELOPMENT
5222 #define REDUCED_TSO_MTU (16 * 1024)
5223 if (if_bridge_reduce_tso_mtu != 0 && tso_mtu > REDUCED_TSO_MTU) {
5224 tso_mtu = REDUCED_TSO_MTU;
5225 }
5226 #endif /* DEBUG || DEVELOPMENT */
5227 return tso_mtu;
5228 }
5229
5230 /*
5231 * tso_hwassist:
5232 * - determine whether the destination interface supports TSO offload
5233 * - if the packet is already marked for offload and the hardware supports
5234 * it, just allow the packet to continue on
5235 * - if not, parse the packet headers to verify that this is a large TCP
5236 * packet requiring segmentation; if the hardware doesn't support it
5237 * set need_sw_tso; otherwise, mark the packet for TSO offload
5238 */
5239 static int
5240 tso_hwassist(struct mbuf **mp, bool is_ipv4, struct ifnet * ifp, u_int mac_hlen,
5241 int * mss_p, bool * need_gso, bool * is_large_tcp)
5242 {
5243 uint32_t csum_flags;
5244 int error = 0;
5245 ip_packet_info info;
5246 u_int32_t if_csum;
5247 u_int32_t if_tso;
5248 u_int32_t mbuf_tso;
5249 int mss = *mss_p;
5250 uint8_t seg_cnt = 0;
5251 bool supports_cksum = false;
5252 uint32_t pkt_mtu;
5253 struct bripstats stats;
5254
5255 *need_gso = false;
5256 *is_large_tcp = false;
5257 if (is_ipv4) {
5258 /*
5259 * Enable both TCP and IP offload if the hardware supports it.
5260 * If the hardware doesn't support TCP offload, supports_cksum
5261 * will be false so we won't set either offload.
5262 */
5263 if_csum = ifp->if_hwassist & (CSUM_TCP | CSUM_IP);
5264 supports_cksum = (if_csum & CSUM_TCP) != 0;
5265 if_tso = IFNET_TSO_IPV4;
5266 mbuf_tso = CSUM_TSO_IPV4;
5267 } else {
5268 if_csum = (ifp->if_hwassist & CSUM_TCPIPV6);
5269 supports_cksum = (if_csum & CSUM_TCPIPV6) != 0;
5270 if_tso = IFNET_TSO_IPV6;
5271 mbuf_tso = CSUM_TSO_IPV6;
5272 }
5273 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5274 "%s: does%s support checksum 0x%x if_csum 0x%x",
5275 ifp->if_xname, supports_cksum ? "" : " not",
5276 ifp->if_hwassist, if_csum);
5277
5278 /* verify that this is a large TCP frame */
5279 error = bridge_get_tcp_header(mp, mac_hlen, is_ipv4,
5280 &info, &stats);
5281 if (error != 0) {
5282 /* bad packet */
5283 goto done;
5284 }
5285 if (info.ip_proto_hdr == NULL) {
5286 /* not a TCP packet */
5287 goto done;
5288 }
5289 pkt_mtu = info.ip_hlen + info.ip_pay_len + info.ip_opt_len;
5290 if (mss == 0) {
5291 /* check for LRO */
5292 seg_cnt = (*mp)->m_pkthdr.rx_seg_cnt;
5293 if (seg_cnt == 1 || (seg_cnt == 0 && pkt_mtu <= ifp->if_mtu)) {
5294 /* not actually a large packet */
5295 goto done;
5296 }
5297 }
5298 if (mss == 0) {
5299 uint32_t hdr_len;
5300 struct tcphdr * tcp;
5301
5302 tcp = (struct tcphdr *)info.ip_proto_hdr;
5303 hdr_len = info.ip_hlen + info.ip_opt_len + (tcp->th_off << 2);
5304
5305 /* packet isn't marked, mark it now */
5306 if (seg_cnt != 0) {
5307 uint32_t len;
5308
5309 /* approximate the MSS using the LRO seg cnt */
5310 len = mbuf_pkthdr_len(*mp) - hdr_len - ETHER_HDR_LEN;
5311 mss = len / seg_cnt;
5312 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5313 "%s: mss %d = len %d / seg cnt %d",
5314 ifp->if_xname, mss, len, seg_cnt);
5315 if (mss <= 0) {
5316 /* unexpected value */
5317 mss = 0;
5318 goto done;
5319 }
5320 } else {
5321 mss = ifp->if_mtu - hdr_len
5322 - if_bridge_tso_reduce_mss_tx;
5323 assert(mss > 0);
5324 }
5325 csum_flags = mbuf_tso;
5326 if (supports_cksum) {
5327 csum_flags |= if_csum;
5328 }
5329 (*mp)->m_pkthdr.tso_segsz = mss;
5330 (*mp)->m_pkthdr.csum_flags |= csum_flags;
5331 (*mp)->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
5332 }
5333 *is_large_tcp = true;
5334 (*mp)->m_pkthdr.pkt_proto = IPPROTO_TCP;
5335 if ((ifp->if_hwassist & if_tso) == 0) {
5336 /* need gso if no hardware support */
5337 *need_gso = true;
5338 } else {
5339 uint32_t tso_mtu = 0;
5340
5341 tso_mtu = get_if_tso_mtu(ifp, is_ipv4);
5342 if (pkt_mtu > tso_mtu) {
5343 /* need gso if tso_mtu too small */
5344 *need_gso = true;
5345 }
5346 }
5347 done:
5348 *mss_p = mss;
5349 return error;
5350 }
5351
5352 /*
5353 * bridge_enqueue:
5354 *
5355 * Enqueue a packet list on a bridge member interface.
5356 *
5357 */
5358 static int
5359 bridge_enqueue(ifnet_t bridge_ifp, ifnet_t src_if, ifnet_t dst_if,
5360 ether_type_flag_t etypef, mbuf_t in_list, ChecksumOperation orig_cksum_op,
5361 pkt_direction_t direction)
5362 {
5363 int enqueue_error = 0;
5364 mbuf_t next_packet;
5365 uint32_t out_errors = 0;
5366 mblist out_list;
5367
5368 VERIFY(dst_if != NULL);
5369
5370 mblist_init(&out_list);
5371 for (mbuf_ref_t scan = in_list; scan != NULL; scan = next_packet) {
5372 bool check_gso = false;
5373 ChecksumOperation cksum_op = orig_cksum_op;
5374 errno_t error = 0;
5375 bool is_ipv4 = false;
5376 int len;
5377 int mss = 0;
5378 bool need_gso = false;
5379
5380 scan->m_flags |= M_PROTO1; /* set to avoid loops */
5381 next_packet = scan->m_nextpkt;
5382 scan->m_nextpkt = NULL;
5383 len = mbuf_pkthdr_len(scan);
5384 is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
5385 mss = _mbuf_get_tso_mss(scan);
5386 if (mss != 0) {
5387 /* packet is marked for segmentation */
5388 check_gso = true;
5389 } else if (direction == pkt_direction_RX &&
5390 scan->m_pkthdr.rx_seg_cnt != 0) {
5391 /* LRO packet */
5392 check_gso = true;
5393 } else if (ether_type_flag_is_ip(etypef) &&
5394 len > (bridge_ifp->if_mtu + ETHER_HDR_LEN)) {
5395 /*
5396 * Need to segment the packet if it is a large frame
5397 * and the destination interface does not support TSO.
5398 *
5399 * Note that with trailers, it's possible for a packet to
5400 * be large but not actually require segmentation.
5401 */
5402 check_gso = true;
5403 }
5404 if (check_gso) {
5405 bool is_large_tcp = false;
5406
5407 error = tso_hwassist(&scan, is_ipv4,
5408 dst_if, sizeof(struct ether_header), &mss,
5409 &need_gso, &is_large_tcp);
5410 if (is_large_tcp &&
5411 cksum_op == CHECKSUM_OPERATION_CLEAR_OFFLOAD) {
5412 cksum_op = CHECKSUM_OPERATION_NONE;
5413 }
5414 }
5415 if (error != 0) {
5416 if (scan != NULL) {
5417 m_freem(scan);
5418 scan = NULL;
5419 }
5420 out_errors++;
5421 } else if (need_gso) {
5422 int mac_hlen = sizeof(struct ether_header);
5423 mblist segs;
5424
5425 /* segment packets, add to list */
5426 segs = gso_tcp_transmit(dst_if, scan, mac_hlen,
5427 is_ipv4);
5428 if (segs.head != NULL) {
5429 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5430 "%s (%s) append gso #segs %u bytes %u",
5431 bridge_ifp->if_xname,
5432 dst_if->if_xname,
5433 segs.count, segs.bytes);
5434 mblist_append_list(&out_list, segs);
5435 } else {
5436 out_errors++;
5437 }
5438 } else {
5439 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5440 "%s (%s) append %d bytes mss %d op %d",
5441 bridge_ifp->if_xname,
5442 dst_if->if_xname,
5443 len, mss, cksum_op);
5444 bridge_handle_checksum_op(src_if, dst_if,
5445 scan, cksum_op);
5446 mblist_append(&out_list, scan);
5447 }
5448 }
5449 if (out_list.head != NULL) {
5450 enqueue_error = bridge_transmit(dst_if, out_list.head);
5451 if (enqueue_error != 0) {
5452 out_errors++;
5453 }
5454 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5455 "%s (%s) bridge_transmit packets %u bytes %u error %d",
5456 bridge_ifp->if_xname,
5457 dst_if->if_xname,
5458 out_list.count, out_list.bytes, enqueue_error);
5459 }
5460 if (out_list.count != 0 || out_errors != 0) {
5461 ifnet_stat_increment_out(bridge_ifp, out_list.count,
5462 out_list.bytes, out_errors);
5463 }
5464 return enqueue_error;
5465 }
5466
5467 /*
5468 * bridge_member_output:
5469 *
5470 * Send output from a bridge member interface. This
5471 * performs the bridging function for locally originated
5472 * packets.
5473 *
5474 * The mbuf has the Ethernet header already attached.
5475 */
5476 static errno_t
5477 bridge_member_output(struct bridge_softc *sc, ifnet_t ifp, mbuf_t *data)
5478 {
5479 struct bridge_iflist * bif = NULL;
5480 ifnet_t bridge_ifp;
5481 struct ether_header *eh;
5482 ether_type_flag_t etypef;
5483 struct ifnet *dst_if = NULL;
5484 uint16_t vlan;
5485 struct bridge_iflist *mac_nat_bif;
5486 ifnet_t mac_nat_ifp;
5487 mbuf_t m = *data;
5488
5489 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
5490 "ifp %s", ifp->if_xname);
5491 if (m->m_len < ETHER_HDR_LEN) {
5492 m = m_pullup(m, ETHER_HDR_LEN);
5493 if (m == NULL) {
5494 *data = NULL;
5495 return EJUSTRETURN;
5496 }
5497 }
5498
5499 BRIDGE_LOCK(sc);
5500 mac_nat_bif = sc->sc_mac_nat_bif;
5501 mac_nat_ifp = (mac_nat_bif != NULL) ? mac_nat_bif->bif_ifp : NULL;
5502 if (mac_nat_ifp == ifp) {
5503 /* record the IP address used by the MAC NAT interface */
5504 (void)bridge_mac_nat_output(sc, mac_nat_bif, data, NULL);
5505 m = *data;
5506 if (m == NULL) {
5507 /* packet was deallocated */
5508 BRIDGE_UNLOCK(sc);
5509 return EJUSTRETURN;
5510 }
5511 }
5512 bridge_ifp = sc->sc_ifp;
5513 eh = mtod(m, struct ether_header *);
5514 vlan = VLANTAGOF(m);
5515 etypef = ether_type_flag_get(eh->ether_type);
5516
5517 /*
5518 * APPLE MODIFICATION
5519 * If the packet is an 802.1X ethertype, then only send on the
5520 * original output interface.
5521 */
5522 if (eh->ether_type == htons(ETHERTYPE_PAE)) {
5523 dst_if = ifp;
5524 goto sendunicast;
5525 }
5526
5527 /*
5528 * If bridge is down, but the original output interface is up,
5529 * go ahead and send out that interface. Otherwise, the packet
5530 * is dropped below.
5531 */
5532 if ((bridge_ifp->if_flags & IFF_RUNNING) == 0) {
5533 dst_if = ifp;
5534 goto sendunicast;
5535 }
5536
5537 /*
5538 * If the packet is a multicast, or we don't know a better way to
5539 * get there, send to all interfaces.
5540 */
5541 if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
5542 dst_if = NULL;
5543 } else {
5544 bif = bridge_rtlookup_bif(sc, eh->ether_dhost, vlan);
5545 if (bif != NULL) {
5546 dst_if = bif->bif_ifp;
5547 }
5548 }
5549 if (dst_if == NULL) {
5550 struct mbuf *mc;
5551 errno_t error;
5552
5553
5554 bridge_span(sc, etypef, m);
5555
5556 BRIDGE_LOCK2REF(sc, error);
5557 if (error != 0) {
5558 m_freem(m);
5559 return EJUSTRETURN;
5560 }
5561
5562 /*
5563 * Duplicate and send the packet across all member interfaces
5564 * except the originating interface.
5565 */
5566 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
5567 dst_if = bif->bif_ifp;
5568 if (dst_if == ifp) {
5569 /* skip the originating interface */
5570 continue;
5571 }
5572 /* skip interface with inactive link status */
5573 if ((bif->bif_flags & BIFF_MEDIA_ACTIVE) == 0) {
5574 continue;
5575 }
5576
5577 /* skip interface that isn't running */
5578 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5579 continue;
5580 }
5581 /*
5582 * If the interface is participating in spanning
5583 * tree, make sure the port is in a state that
5584 * allows forwarding.
5585 */
5586 if ((bif->bif_ifflags & IFBIF_STP) &&
5587 bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5588 continue;
5589 }
5590 /*
5591 * If the destination is the MAC NAT interface,
5592 * skip sending the packet. The packet can't be sent
5593 * if the source MAC is incorrect.
5594 */
5595 if (dst_if == mac_nat_ifp) {
5596 continue;
5597 }
5598
5599 /* make a deep copy to send on this member interface */
5600 mc = m_dup(m, M_DONTWAIT);
5601 if (mc == NULL) {
5602 (void)ifnet_stat_increment_out(bridge_ifp,
5603 0, 0, 1);
5604 continue;
5605 }
5606 (void)bridge_enqueue(bridge_ifp, ifp, dst_if, etypef,
5607 mc, CHECKSUM_OPERATION_COMPUTE, pkt_direction_TX);
5608 }
5609 BRIDGE_UNREF(sc);
5610
5611 if ((ifp->if_flags & IFF_RUNNING) == 0) {
5612 m_freem(m);
5613 return EJUSTRETURN;
5614 }
5615 /* allow packet to continue on the originating interface */
5616 return 0;
5617 }
5618
5619 sendunicast:
5620 /*
5621 * XXX Spanning tree consideration here?
5622 */
5623
5624 bridge_span(sc, etypef, m);
5625 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5626 m_freem(m);
5627 BRIDGE_UNLOCK(sc);
5628 return EJUSTRETURN;
5629 }
5630
5631 BRIDGE_UNLOCK(sc);
5632 if (dst_if == ifp) {
5633 /* allow packet to continue on the originating interface */
5634 return 0;
5635 }
5636 if (dst_if != mac_nat_ifp) {
5637 (void) bridge_enqueue(bridge_ifp, ifp, dst_if, etypef, m,
5638 CHECKSUM_OPERATION_COMPUTE, pkt_direction_TX);
5639 } else {
5640 /*
5641 * This is not the original output interface
5642 * and the destination is the MAC NAT interface.
5643 * Drop the packet because the packet can't be sent
5644 * if the source MAC is incorrect.
5645 */
5646 m_freem(m);
5647 }
5648 return EJUSTRETURN;
5649 }
5650
5651 /*
5652 * Output callback.
5653 *
5654 * This routine is called externally from above only when if_bridge_txstart
5655 * is disabled; otherwise it is called internally by bridge_start().
5656 */
5657 static int
5658 bridge_output(struct ifnet *ifp, struct mbuf *m)
5659 {
5660 struct bridge_iflist *bif;
5661 struct bridge_softc * __single sc = ifnet_softc(ifp);
5662 struct ether_header *eh;
5663 ether_type_flag_t etypef;
5664 struct ifnet *dst_if = NULL;
5665 int error = 0;
5666
5667 eh = mtod(m, struct ether_header *);
5668 etypef = ether_type_flag_get(eh->ether_type);
5669 BRIDGE_LOCK(sc);
5670
5671 if (!IS_BCAST_MCAST(m)) {
5672 bif = bridge_rtlookup_bif(sc, eh->ether_dhost, 0);
5673 if (bif != NULL) {
5674 dst_if = bif->bif_ifp;
5675 }
5676 }
5677
5678 (void) ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0);
5679
5680 BRIDGE_BPF_TAP_OUT(ifp, m);
5681
5682 if (dst_if == NULL) {
5683 /* callee will unlock */
5684 bridge_broadcast(sc, NULL, etypef, m);
5685 } else {
5686 ifnet_t bridge_ifp;
5687
5688 bridge_ifp = sc->sc_ifp;
5689 BRIDGE_UNLOCK(sc);
5690
5691 error = bridge_enqueue(bridge_ifp, NULL, dst_if, etypef, m,
5692 CHECKSUM_OPERATION_FINALIZE, pkt_direction_TX);
5693 }
5694
5695 return error;
5696 }
5697
5698 static void
5699 bridge_finalize_cksum(struct ifnet *ifp, struct mbuf *m)
5700 {
5701 struct ether_header *eh;
5702 bool is_ipv4;
5703 uint32_t sw_csum, hwcap;
5704 uint32_t did_sw;
5705 uint32_t csum_flags;
5706
5707 eh = mtod(m, struct ether_header *);
5708 if (!ether_header_type_is_ip(eh, &is_ipv4)) {
5709 return;
5710 }
5711
5712 /* do in software what the hardware cannot */
5713 hwcap = (ifp->if_hwassist | CSUM_DATA_VALID);
5714 csum_flags = m->m_pkthdr.csum_flags;
5715 sw_csum = csum_flags & ~IF_HWASSIST_CSUM_FLAGS(hwcap);
5716 sw_csum &= IF_HWASSIST_CSUM_MASK;
5717
5718 if (is_ipv4) {
5719 if ((hwcap & CSUM_PARTIAL) && !(sw_csum & CSUM_DELAY_DATA) &&
5720 (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) {
5721 if (m->m_pkthdr.csum_flags & CSUM_TCP) {
5722 uint16_t start =
5723 sizeof(*eh) + sizeof(struct ip);
5724 uint16_t ulpoff =
5725 m->m_pkthdr.csum_data & 0xffff;
5726 m->m_pkthdr.csum_flags |=
5727 (CSUM_DATA_VALID | CSUM_PARTIAL);
5728 m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
5729 m->m_pkthdr.csum_tx_start = start;
5730 } else {
5731 sw_csum |= (CSUM_DELAY_DATA &
5732 m->m_pkthdr.csum_flags);
5733 }
5734 }
5735 did_sw = in_finalize_cksum(m, sizeof(*eh), sw_csum);
5736 } else {
5737 if ((hwcap & CSUM_PARTIAL) &&
5738 !(sw_csum & CSUM_DELAY_IPV6_DATA) &&
5739 (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA)) {
5740 if (m->m_pkthdr.csum_flags & CSUM_TCPIPV6) {
5741 uint16_t start =
5742 sizeof(*eh) + sizeof(struct ip6_hdr);
5743 uint16_t ulpoff =
5744 m->m_pkthdr.csum_data & 0xffff;
5745 m->m_pkthdr.csum_flags |=
5746 (CSUM_DATA_VALID | CSUM_PARTIAL);
5747 m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
5748 m->m_pkthdr.csum_tx_start = start;
5749 } else {
5750 sw_csum |= (CSUM_DELAY_IPV6_DATA &
5751 m->m_pkthdr.csum_flags);
5752 }
5753 }
5754 did_sw = in6_finalize_cksum(m, sizeof(*eh), -1, -1, sw_csum);
5755 }
5756 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5757 "[%s] before 0x%x hwcap 0x%x sw_csum 0x%x did 0x%x after 0x%x",
5758 ifp->if_xname, csum_flags, hwcap, sw_csum,
5759 did_sw, m->m_pkthdr.csum_flags);
5760 }
5761
5762 /*
5763 * bridge_start:
5764 *
5765 * Start output on a bridge.
5766 *
5767 * This routine is invoked by the start worker thread; because we never call
5768 * it directly, there is no need do deploy any serialization mechanism other
5769 * than what's already used by the worker thread, i.e. this is already single
5770 * threaded.
5771 *
5772 * This routine is called only when if_bridge_txstart is enabled.
5773 */
5774 static void
5775 bridge_start(struct ifnet *ifp)
5776 {
5777 mbuf_ref_t m;
5778
5779 for (;;) {
5780 if (ifnet_dequeue(ifp, &m) != 0) {
5781 break;
5782 }
5783
5784 (void) bridge_output(ifp, m);
5785 }
5786 }
5787
5788 static void
5789 prepare_input_packet(ifnet_t ifp, mbuf_t m)
5790 {
5791 mbuf_pkthdr_setrcvif(m, ifp);
5792 mbuf_pkthdr_setheader(m, mtod(m, void *));
5793 /* adjust frame to skip mac-layer header */
5794 _mbuf_adjust_pkthdr_and_data(m, ETHER_HDR_LEN);
5795 }
5796
5797 static void
5798 mark_tso_checksum_ok(mbuf_t m)
5799 {
5800 if (_mbuf_get_tso_mss(m) != 0 ||
5801 (m->m_pkthdr.csum_flags & checksum_request_flags) != 0) {
5802 mbuf_set_csum_performed(m, checksum_performed_all_good, 0xffff);
5803 }
5804 }
5805
5806 static void
5807 inject_input_packet_list(ifnet_t ifp, mbuf_t in_list, bool m_proto1)
5808 {
5809 for (mbuf_t scan = in_list; scan != NULL; scan = scan->m_nextpkt) {
5810 /* mark the packets as arriving on the interface */
5811 BRIDGE_BPF_TAP_IN(ifp, scan);
5812 if (m_proto1) {
5813 scan->m_flags |= M_PROTO1; /* set to avoid loops */
5814 }
5815 prepare_input_packet(ifp, scan);
5816 mark_tso_checksum_ok(scan);
5817 }
5818 dlil_input_packet_list(ifp, in_list);
5819 return;
5820 }
5821
5822 static void
5823 adjust_input_packet_list(mbuf_t in_list)
5824 {
5825 for (mbuf_t scan = in_list; scan != NULL; scan = scan->m_nextpkt) {
5826 mbuf_pkthdr_setheader(scan, mtod(scan, void *));
5827 _mbuf_adjust_pkthdr_and_data(scan, ETHER_HDR_LEN);
5828 }
5829 }
5830
5831 static bool
5832 in_addr_is_ours(struct in_addr ip)
5833 {
5834 struct in_ifaddr *ia;
5835 bool ours = false;
5836
5837 lck_rw_lock_shared(&in_ifaddr_rwlock);
5838 TAILQ_FOREACH(ia, INADDR_HASH(ip.s_addr), ia_hash) {
5839 if (ia->ia_addr.sin_addr.s_addr == ip.s_addr) {
5840 ours = true;
5841 break;
5842 }
5843 }
5844 lck_rw_done(&in_ifaddr_rwlock);
5845 return ours;
5846 }
5847
5848 static bool
5849 in6_addr_is_ours(const struct in6_addr * ip6_p, uint32_t ifscope)
5850 {
5851 struct in6_addr dst_ip;
5852 struct in6_ifaddr *ia6;
5853 bool ours = false;
5854
5855 if (in6_embedded_scope && IN6_IS_ADDR_LINKLOCAL(ip6_p)) {
5856 /* need to embed scope ID for comparison */
5857 bcopy(ip6_p, &dst_ip, sizeof(dst_ip));
5858 dst_ip.s6_addr16[1] = htons(ifscope);
5859 ip6_p = &dst_ip;
5860 }
5861 lck_rw_lock_shared(&in6_ifaddr_rwlock);
5862 TAILQ_FOREACH(ia6, IN6ADDR_HASH(ip6_p), ia6_hash) {
5863 if (in6_are_addr_equal_scoped(&ia6->ia_addr.sin6_addr, ip6_p,
5864 ia6->ia_addr.sin6_scope_id, ifscope)) {
5865 ours = true;
5866 break;
5867 }
5868 }
5869 lck_rw_done(&in6_ifaddr_rwlock);
5870 return ours;
5871 }
5872
5873 static bool
5874 ip_packet_info_dst_is_our_ip(ip_packet_info_t info_p, int index)
5875 {
5876 /* if the destination is our IP address, don't segment */
5877 bool our_ip = false;
5878
5879 if (info_p->ip_is_ipv4) {
5880 struct ip * hdr;
5881 struct in_addr dst_ip;
5882
5883 hdr = (struct ip *)(info_p->ip_hdr);
5884 bcopy(&hdr->ip_dst, &dst_ip, sizeof(dst_ip));
5885 our_ip = in_addr_is_ours(dst_ip);
5886 } else {
5887 struct ip6_hdr * hdr;
5888
5889 hdr = (struct ip6_hdr *)(info_p->ip_hdr);
5890 our_ip = in6_addr_is_ours(&hdr->ip6_dst, index);
5891 }
5892 return our_ip;
5893 }
5894
5895 typedef union {
5896 struct in_addr ip;
5897 struct in6_addr ip6;
5898 } ip_addr, *ip_addr_t;
5899
5900 static void
5901 ip_packet_info_copy_dst_ip_addr(ip_packet_info_t info_p, ip_addr_t ipaddr)
5902 {
5903 if (info_p->ip_is_ipv4) {
5904 struct ip * hdr;
5905
5906 hdr = (struct ip *)(info_p->ip_hdr);
5907 bcopy(&hdr->ip_dst, &ipaddr->ip, sizeof(ipaddr->ip));
5908 } else {
5909 struct ip6_hdr * hdr;
5910
5911 hdr = (struct ip6_hdr *)(info_p->ip_hdr);
5912 bcopy(&hdr->ip6_dst, &ipaddr->ip6, sizeof(ipaddr->ip6));
5913 }
5914 }
5915
5916 static bool
5917 ip_addr_are_equal(ip_addr_t addr1, ip_addr_t addr2, bool is_ipv4)
5918 {
5919 bool equal;
5920
5921 if (is_ipv4) {
5922 equal = addr1->ip.s_addr == addr2->ip.s_addr;
5923 } else {
5924 equal = IN6_ARE_ADDR_EQUAL(&addr1->ip6, &addr2->ip6);
5925 }
5926 return equal;
5927 }
5928
5929 static bool
5930 ip_addr_is_ours(ip_addr_t ipaddr, int index, bool is_ipv4)
5931 {
5932 bool our_ip;
5933
5934 if (is_ipv4) {
5935 our_ip = in_addr_is_ours(ipaddr->ip);
5936 } else {
5937 our_ip = in6_addr_is_ours(&ipaddr->ip6, index);
5938 }
5939 return our_ip;
5940 }
5941
5942 static void
5943 bridge_interface_input_list(ifnet_t bridge_ifp, ether_type_flag_t etypef,
5944 mblist list, bool bif_uses_virtio)
5945 {
5946 uint32_t in_errors = 0;
5947 bool is_ipv4;
5948 mblist in_list;
5949 ip_addr last_ip;
5950 bool last_ip_ours = false;
5951 bool last_ip_valid = false;
5952 u_int mac_hlen;
5953 bool may_forward = false;
5954 mbuf_t next_packet;
5955
5956 switch (etypef) {
5957 case ETHER_TYPE_FLAG_IPV4:
5958 is_ipv4 = true;
5959 may_forward = (ipforwarding != 0);
5960 break;
5961 case ETHER_TYPE_FLAG_IPV6:
5962 is_ipv4 = false;
5963 may_forward = (ip6_forwarding != 0);
5964 break;
5965 }
5966 if (!may_forward) {
5967 in_list = list;
5968 goto done;
5969 }
5970
5971 mblist_init(&in_list);
5972 mac_hlen = sizeof(struct ether_header);
5973 bzero(&last_ip, sizeof(last_ip));
5974 for (mbuf_ref_t scan = list.head; scan != NULL; scan = next_packet) {
5975 int error;
5976 ip_packet_info info;
5977 bool ip_ours;
5978 struct ifbrmstats stats; /* XXX should really be accounted */
5979 ip_addr this_ip;
5980
5981 /* take it out of the list */
5982 next_packet = scan->m_nextpkt;
5983 scan->m_nextpkt = NULL;
5984
5985 /* check for TCP packet and get IP header */
5986 error = bridge_get_tcp_header(&scan, mac_hlen, is_ipv4,
5987 &info, &stats.brms_in_ip);
5988 if (error != 0) {
5989 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5990 "%s bridge_get_tcp_header failed %d",
5991 bridge_ifp->if_xname, error);
5992 if (scan != NULL) {
5993 m_freem(scan);
5994 scan = NULL;
5995 }
5996 in_errors++;
5997 continue;
5998 }
5999 ip_packet_info_copy_dst_ip_addr(&info, &this_ip);
6000 if (last_ip_valid &&
6001 ip_addr_are_equal(&last_ip, &this_ip, is_ipv4)) {
6002 /* use cached result */
6003 ip_ours = last_ip_ours;
6004 } else {
6005 ip_ours = ip_addr_is_ours(&this_ip,
6006 bridge_ifp->if_index,
6007 is_ipv4);
6008 /* cache the result */
6009 last_ip_valid = true;
6010 last_ip_ours = ip_ours;
6011 last_ip = this_ip;
6012 }
6013
6014 /* if the packet is destined to us, just send it up */
6015 if (ip_ours) {
6016 mblist_append(&in_list, scan);
6017 continue;
6018 }
6019 /*
6020 * If this is a TCP packet that's marked for TSO or LRO, or
6021 * we think it's a large packet, segment it.
6022 */
6023 if (info.ip_proto_hdr != NULL &&
6024 ((bif_uses_virtio && _mbuf_get_tso_mss(scan) != 0) ||
6025 (!bif_uses_virtio &&
6026 (scan->m_pkthdr.rx_seg_cnt > 1 ||
6027 (mbuf_pkthdr_len(scan) >
6028 (bridge_ifp->if_mtu + ETHER_HDR_LEN)))))) {
6029 mblist seg;
6030
6031 seg = gso_tcp_with_info(bridge_ifp, scan, &info,
6032 mac_hlen, is_ipv4, false);
6033 if (seg.head == NULL) {
6034 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
6035 "gso_tcp returned no packets");
6036 in_errors++;
6037 continue;
6038 }
6039 if (seg.count > 1) {
6040 /* packet was segmented+checksummed */
6041 mblist_append_list(&in_list, seg);
6042 continue;
6043 }
6044 /* there's just one packet, no segmentation */
6045 scan = seg.head;
6046 }
6047 /* need checksum if it's marked for checksum offload */
6048 if (bif_uses_virtio &&
6049 (scan->m_pkthdr.csum_flags & checksum_request_flags) != 0) {
6050 error = bridge_offload_checksum(&scan, &info, &stats);
6051 if (error != 0) {
6052 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
6053 "%s bridge_offload_checksum failed %d",
6054 bridge_ifp->if_xname, error);
6055 if (scan != NULL) {
6056 m_freem(scan);
6057 scan = NULL;
6058 }
6059 in_errors++;
6060 continue;
6061 }
6062 }
6063 mblist_append(&in_list, scan);
6064 }
6065
6066 done:
6067 if (in_list.head != NULL) {
6068 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
6069 "%s packets %d bytes %d",
6070 bridge_ifp->if_xname,
6071 in_list.count, in_list.bytes);
6072 /* Mark the packets as arriving on the bridge interface */
6073 inject_input_packet_list(bridge_ifp, in_list.head, false);
6074 ifnet_stat_increment_in(bridge_ifp, in_list.count,
6075 in_list.bytes, in_errors);
6076 } else if (in_errors != 0) {
6077 ifnet_stat_increment_in(bridge_ifp, 0, 0, in_errors);
6078 }
6079 return;
6080 }
6081
6082 /*
6083 * bridge_broadcast:
6084 *
6085 * Send a frame to all interfaces that are members of
6086 * the bridge, except for the one on which the packet
6087 * arrived.
6088 *
6089 * NOTE: Releases the lock on return.
6090 */
6091 static void
6092 bridge_broadcast(struct bridge_softc *sc, struct bridge_iflist * sbif,
6093 ether_type_flag_t etypef, mbuf_t m)
6094 {
6095 ifnet_t bridge_ifp;
6096 struct bridge_iflist *dbif;
6097 struct ifnet * src_if;
6098 mbuf_ref_t mc;
6099 struct mbuf *mc_in;
6100 int error = 0, used = 0;
6101 ChecksumOperation cksum_op;
6102 struct mac_nat_record mnr;
6103 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
6104 boolean_t translate_mac = FALSE;
6105 uint32_t sc_filter_flags;
6106 bool is_bcast_mcast;
6107
6108 bridge_ifp = sc->sc_ifp;
6109 if (sbif != NULL) {
6110 src_if = sbif->bif_ifp;
6111 cksum_op = CHECKSUM_OPERATION_CLEAR_OFFLOAD;
6112 if (mac_nat_bif != NULL && sbif != mac_nat_bif) {
6113 /* get the translation record */
6114 translate_mac
6115 = bridge_mac_nat_output(sc, sbif, &m, &mnr);
6116 if (m == NULL) {
6117 /* packet was deallocated */
6118 BRIDGE_UNLOCK(sc);
6119 return;
6120 }
6121 }
6122 } else {
6123 /*
6124 * sbif is NULL when the bridge interface calls
6125 * bridge_broadcast().
6126 */
6127 cksum_op = CHECKSUM_OPERATION_FINALIZE;
6128 src_if = NULL;
6129 }
6130
6131 BRIDGE_LOCK2REF(sc, error);
6132 if (error) {
6133 m_freem(m);
6134 return;
6135 }
6136 is_bcast_mcast = IS_BCAST_MCAST(m);
6137 sc_filter_flags = sc->sc_filter_flags;
6138 TAILQ_FOREACH(dbif, &sc->sc_iflist, bif_next) {
6139 ifnet_t dst_if;
6140
6141 dst_if = dbif->bif_ifp;
6142 if (dst_if == src_if) {
6143 /* skip the interface that the packet came in on */
6144 continue;
6145 }
6146
6147 /* Private segments can not talk to each other */
6148 if (sbif != NULL &&
6149 (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE)) {
6150 continue;
6151 }
6152
6153 if ((dbif->bif_ifflags & IFBIF_STP) &&
6154 dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6155 continue;
6156 }
6157
6158 if ((dbif->bif_ifflags & IFBIF_DISCOVER) == 0 &&
6159 !is_bcast_mcast) {
6160 continue;
6161 }
6162
6163 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6164 continue;
6165 }
6166
6167 if ((dbif->bif_flags & BIFF_MEDIA_ACTIVE) == 0) {
6168 continue;
6169 }
6170
6171 if (TAILQ_NEXT(dbif, bif_next) == NULL) {
6172 mc = m;
6173 used = 1;
6174 } else {
6175 mc = m_dup(m, M_DONTWAIT);
6176 if (mc == NULL) {
6177 (void) ifnet_stat_increment_out(bridge_ifp,
6178 0, 0, 1);
6179 continue;
6180 }
6181 }
6182
6183 /*
6184 * If broadcast input is enabled, do so only if this
6185 * is an input packet.
6186 */
6187 if (sbif != NULL && is_bcast_mcast &&
6188 (dbif->bif_flags & BIFF_INPUT_BROADCAST) != 0) {
6189 mc_in = m_dup(mc, M_DONTWAIT);
6190 /* this could fail, but we continue anyways */
6191 } else {
6192 mc_in = NULL;
6193 }
6194
6195 /* out */
6196 if (translate_mac && mac_nat_bif == dbif) {
6197 /* translate the packet */
6198 bridge_mac_nat_translate(&mc, &mnr, IF_LLADDR(dst_if));
6199 }
6200
6201 if (mc != NULL && sbif != NULL &&
6202 PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6203 if (used == 0) {
6204 /* Keep the layer3 header aligned */
6205 int i = min(mc->m_pkthdr.len, max_protohdr);
6206 mc = m_copyup(mc, i, ETHER_ALIGN);
6207 if (mc == NULL) {
6208 (void) ifnet_stat_increment_out(
6209 sc->sc_ifp, 0, 0, 1);
6210 if (mc_in != NULL) {
6211 m_freem(mc_in);
6212 mc_in = NULL;
6213 }
6214 continue;
6215 }
6216 }
6217 if (bridge_pf(&mc, dst_if, sc_filter_flags, false) != 0) {
6218 if (mc_in != NULL) {
6219 m_freem(mc_in);
6220 mc_in = NULL;
6221 }
6222 continue;
6223 }
6224 if (mc == NULL) {
6225 if (mc_in != NULL) {
6226 m_freem(mc_in);
6227 mc_in = NULL;
6228 }
6229 continue;
6230 }
6231 }
6232
6233 if (mc != NULL) {
6234 /* verify checksum if necessary */
6235 if (bif_has_checksum_offload(dbif) && sbif != NULL &&
6236 !bif_has_checksum_offload(sbif)) {
6237 error = bridge_verify_checksum(&mc,
6238 &dbif->bif_stats);
6239 if (error != 0) {
6240 if (mc != NULL) {
6241 m_freem(mc);
6242 }
6243 mc = NULL;
6244 }
6245 }
6246 if (mc != NULL) {
6247 (void) bridge_enqueue(bridge_ifp,
6248 NULL, dst_if, etypef, mc, cksum_op,
6249 pkt_direction_TX);
6250 }
6251 }
6252
6253 /* in */
6254 if (mc_in == NULL) {
6255 continue;
6256 }
6257 BRIDGE_BPF_TAP_IN(dst_if, mc_in);
6258 prepare_input_packet(dst_if, mc_in);
6259 mc_in->m_flags |= M_PROTO1; /* set to avoid loops */
6260 dlil_input_packet_list(dst_if, mc_in);
6261 }
6262 if (used == 0) {
6263 m_freem(m);
6264 }
6265
6266
6267 BRIDGE_UNREF(sc);
6268 }
6269
6270 static mbuf_t
6271 copy_packet_list(mbuf_t m)
6272 {
6273 mblist ret;
6274 mbuf_t next_packet;
6275
6276 mblist_init(&ret);
6277 for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
6278 mbuf_t copy_m;
6279
6280 /* take it out of the list */
6281 next_packet = scan->m_nextpkt;
6282 scan->m_nextpkt = NULL;
6283
6284 /* create a copy and add it to the new list */
6285 copy_m = m_dup(scan, M_DONTWAIT);
6286 if (copy_m != NULL) {
6287 mblist_append(&ret, copy_m);
6288 }
6289
6290 /* put it back in the original list */
6291 scan->m_nextpkt = next_packet;
6292 }
6293 return ret.head;
6294 }
6295
6296 /*
6297 * bridge_broadcast_list:
6298 *
6299 * Broadcast a list of packets to all members except `sbif`.
6300 * Consumes `m` before returning.
6301 *
6302 * NOTE: Releases the lock on return.
6303 */
6304 static void
6305 bridge_broadcast_list(struct bridge_softc *sc, struct bridge_iflist * sbif,
6306 ether_type_flag_t etypef, mbuf_t m, pkt_direction_t direction)
6307 {
6308 ifnet_t bridge_ifp;
6309 bool bridge_needs_input;
6310 struct bridge_iflist * dbif;
6311 bool is_bcast_mcast;
6312 errno_t error = 0;
6313 ChecksumOperation cksum_op;
6314 struct bridge_iflist * mac_nat_bif = sc->sc_mac_nat_bif;
6315 ifnet_t mac_nat_if = NULL;
6316 bool need_mac_nat = false;
6317 mbuf_t out_mac_nat = NULL;
6318 ifnet_t src_if;
6319 uint32_t sc_filter_flags;
6320 bool used = false;
6321
6322 bridge_ifp = sc->sc_ifp;
6323 if (sbif != NULL) {
6324 src_if = sbif->bif_ifp;
6325
6326 if (ether_type_flag_is_ip(etypef) && bif_uses_virtio(sbif)) {
6327 bool is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
6328
6329 /* compute checksum on packets marked with offload */
6330 m = bridge_checksum_offload_list(bridge_ifp, sbif,
6331 m, is_ipv4);
6332 if (m == NULL) {
6333 BRIDGE_UNLOCK(sc);
6334 goto done;
6335 }
6336 cksum_op = CHECKSUM_OPERATION_NONE;
6337 } else {
6338 cksum_op = CHECKSUM_OPERATION_CLEAR_OFFLOAD;
6339 }
6340
6341 /*
6342 * If MAC-NAT is enabled and we'll be sending the packets
6343 * over it, verify that it is up and active before
6344 * deciding to make a translated copy.
6345 */
6346 if (mac_nat_bif != NULL && sbif != mac_nat_bif) {
6347 mac_nat_if = mac_nat_bif->bif_ifp;
6348 if ((mac_nat_if->if_flags & IFF_RUNNING) != 0 &&
6349 (mac_nat_bif->bif_flags & BIFF_MEDIA_ACTIVE) != 0) {
6350 need_mac_nat = true;
6351 }
6352 }
6353 } else {
6354 /*
6355 * sbif is NULL when the bridge interface calls
6356 * bridge_broadcast_list() (TBD).
6357 */
6358 cksum_op = CHECKSUM_OPERATION_FINALIZE;
6359 src_if = NULL;
6360 }
6361
6362 /*
6363 * Create a translated copy for packets destined to MAC-NAT interface.
6364 */
6365 if (need_mac_nat) {
6366 out_mac_nat
6367 = bridge_mac_nat_copy_and_translate_list(sc, sbif,
6368 mac_nat_if, m);
6369 }
6370 sc_filter_flags = sc->sc_filter_flags;
6371 bridge_needs_input = (sc->sc_flags & SCF_PROTO_ATTACHED) != 0;
6372 BRIDGE_LOCK2REF(sc, error);
6373 if (error) {
6374 goto done;
6375 }
6376 is_bcast_mcast = IS_BCAST_MCAST(m);
6377
6378 /* make a copy for the bridge interface */
6379 if (sbif != NULL && is_bcast_mcast && bridge_needs_input) {
6380 mbuf_t in_list;
6381
6382 in_list = copy_packet_list(m);
6383 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MCAST,
6384 "%s mcast for us in_m %p",
6385 bridge_ifp->if_xname, in_list);
6386 if (in_list != NULL) {
6387 inject_input_packet_list(bridge_ifp, in_list, false);
6388 }
6389 }
6390
6391 TAILQ_FOREACH(dbif, &sc->sc_iflist, bif_next) {
6392 ifnet_t dst_if;
6393 mbuf_t in_m = NULL;
6394 mbuf_t out_m = NULL;
6395
6396 dst_if = dbif->bif_ifp;
6397 if (dst_if == src_if) {
6398 /* skip the interface that the packet came in on */
6399 continue;
6400 }
6401
6402 /* Private segments can not talk to each other */
6403 if (sbif != NULL &&
6404 (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE)) {
6405 continue;
6406 }
6407
6408 if ((dbif->bif_ifflags & IFBIF_STP) &&
6409 dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6410 continue;
6411 }
6412
6413 if ((dbif->bif_ifflags & IFBIF_DISCOVER) == 0 &&
6414 !is_bcast_mcast) {
6415 continue;
6416 }
6417
6418 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6419 continue;
6420 }
6421
6422 if ((dbif->bif_flags & BIFF_MEDIA_ACTIVE) == 0) {
6423 continue;
6424 }
6425 if (dbif == mac_nat_bif) {
6426 /* translated copy was created above, use that */
6427 out_m = out_mac_nat;
6428 out_mac_nat = NULL;
6429 } else if (TAILQ_NEXT(dbif, bif_next) == NULL) {
6430 /* consume `m` */
6431 out_m = m;
6432 used = true;
6433 } else {
6434 /* needs a copy */
6435 out_m = copy_packet_list(m);
6436 }
6437
6438 if (out_m == NULL) {
6439 ifnet_stat_increment_out(bridge_ifp, 0, 0, 1);
6440 continue;
6441 }
6442 /*
6443 * If broadcast input is enabled, do so only if this
6444 * is an input packet.
6445 */
6446 if (sbif != NULL && is_bcast_mcast &&
6447 (dbif->bif_flags & BIFF_INPUT_BROADCAST) != 0) {
6448 in_m = copy_packet_list(m);
6449 /* this could fail, but we continue anyways */
6450 } else {
6451 in_m = NULL;
6452 }
6453
6454 if (sbif != NULL &&
6455 PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6456 out_m = bridge_pf_list(out_m, dst_if,
6457 sc_filter_flags, false);
6458 }
6459 if (out_m != NULL) {
6460 /* verify checksum if necessary */
6461 if (sbif != NULL &&
6462 ether_type_flag_is_ip(etypef) &&
6463 bif_has_checksum_offload(dbif) &&
6464 !bif_has_checksum_offload(sbif)) {
6465 bool is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
6466
6467 out_m = bridge_verify_checksum_list(bridge_ifp,
6468 dbif, out_m, is_ipv4);
6469 }
6470 if (out_m != NULL) {
6471 bridge_enqueue(bridge_ifp, src_if, dst_if,
6472 etypef, out_m, cksum_op, direction);
6473 }
6474 }
6475
6476 /* in */
6477 if (in_m != NULL) {
6478 inject_input_packet_list(dst_if, in_m, true);
6479 }
6480 }
6481
6482 BRIDGE_UNREF(sc);
6483
6484 done:
6485 if (out_mac_nat != NULL) {
6486 m_freem_list(out_mac_nat);
6487 }
6488 if (!used) {
6489 m_freem_list(m);
6490 }
6491 return;
6492 }
6493
6494 #define NEEDED_CSUM_IPV4 (IF_HWASSIST_CSUM_UDP | IF_HWASSIST_CSUM_TCP)
6495 #define NEEDED_CSUM_IPV6 (IF_HWASSIST_CSUM_UDPIPV6 | IF_HWASSIST_CSUM_TCPIPV6)
6496
6497 static bool
6498 interface_supports_hw_checksum(ifnet_t ifp, bool is_ipv4)
6499 {
6500 uint32_t hwcap = IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
6501 uint32_t needed = is_ipv4 ? NEEDED_CSUM_IPV4 : NEEDED_CSUM_IPV6;
6502 bool supports;
6503
6504 supports = (hwcap & needed) == needed;
6505 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM, "%s: does %ssupport checksum",
6506 ifp->if_xname, supports ? "" : "not ");
6507 return supports;
6508 }
6509
6510 static void
6511 bridge_forward_list(struct bridge_softc *sc, struct bridge_iflist * sbif,
6512 ifnet_t dst_if, ether_type_flag_t etypef, mbuf_t m)
6513 {
6514 bool checksum_ok = false;
6515 ChecksumOperation cksum_op;
6516 ifnet_t bridge_ifp;
6517 struct bridge_iflist * dbif;
6518 uint32_t sc_filter_flags;
6519 ifnet_t src_if;
6520
6521 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6522 goto drop;
6523 }
6524 dbif = bridge_lookup_member_if(sc, dst_if);
6525 if (dbif == NULL) {
6526 /* Not a member of the bridge (anymore?) */
6527 goto drop;
6528 }
6529
6530 /* Private segments can not talk to each other */
6531 if ((sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE) != 0) {
6532 goto drop;
6533 }
6534 bridge_ifp = sc->sc_ifp;
6535 src_if = sbif->bif_ifp;
6536 cksum_op = CHECKSUM_OPERATION_CLEAR_OFFLOAD;
6537 if (ether_type_flag_is_ip(etypef) && bif_uses_virtio(sbif)) {
6538 bool is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
6539
6540 if (dbif == sc->sc_mac_nat_bif ||
6541 (IFNET_IS_VMNET(dst_if) && !bif_uses_virtio(dbif)) ||
6542 !interface_supports_hw_checksum(dst_if, is_ipv4)) {
6543 /* compute checksums now if necessary */
6544 m = bridge_checksum_offload_list(bridge_ifp, sbif,
6545 m, is_ipv4);
6546 checksum_ok = true;
6547 } else {
6548 cksum_op = CHECKSUM_OPERATION_NONE;
6549 }
6550 }
6551
6552 if (dbif == sc->sc_mac_nat_bif) {
6553 /* translate the packets before forwarding them */
6554 if ((etypef & ETHER_TYPE_FLAG_IP_ARP) != 0) {
6555 m = bridge_mac_nat_translate_list(sc, sbif, dst_if, m);
6556 }
6557 } else if (!checksum_ok && ether_type_flag_is_ip(etypef) &&
6558 bif_has_checksum_offload(dbif) && !bif_has_checksum_offload(sbif)) {
6559 bool is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
6560
6561 /*
6562 * If the destination interface has checksum offload enabled,
6563 * verify the checksum now, unless the source interface also has
6564 * checksum offload enabled. The checksum in that case has
6565 * already just been computed and verifying it is unnecessary.
6566 */
6567 m = bridge_verify_checksum_list(bridge_ifp, dbif, m, is_ipv4);
6568 }
6569 sc_filter_flags = sc->sc_filter_flags;
6570 BRIDGE_UNLOCK(sc);
6571 if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6572 m = bridge_pf_list(m, dst_if, sc_filter_flags, false);
6573 }
6574
6575 /*
6576 * We're forwarding inbound packets for which the checksums must
6577 * already have been computed and if required, verified, or
6578 * packets from a virtio-enabled interface for which we rely
6579 * on the packet containing appropriate offload flags.
6580 */
6581 if (m != NULL) {
6582 bridge_enqueue(bridge_ifp, src_if, dst_if, etypef, m,
6583 cksum_op, pkt_direction_RX);
6584 }
6585 return;
6586
6587 drop:
6588 BRIDGE_UNLOCK(sc);
6589 m_freem_list(m);
6590 return;
6591 }
6592
6593 /*
6594 * bridge_span:
6595 *
6596 * Duplicate a packet out one or more interfaces that are in span mode,
6597 * the original mbuf is unmodified.
6598 */
6599 static void
6600 bridge_span(struct bridge_softc *sc, ether_type_flag_t etypef, struct mbuf *m)
6601 {
6602 struct bridge_iflist *bif;
6603 struct ifnet *dst_if;
6604 struct mbuf *mc;
6605
6606 if (TAILQ_EMPTY(&sc->sc_spanlist)) {
6607 return;
6608 }
6609
6610 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
6611 dst_if = bif->bif_ifp;
6612
6613 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6614 continue;
6615 }
6616
6617 mc = m_copypacket(m, M_DONTWAIT);
6618 if (mc == NULL) {
6619 (void) ifnet_stat_increment_out(sc->sc_ifp, 0, 0, 1);
6620 continue;
6621 }
6622
6623 (void) bridge_enqueue(sc->sc_ifp, NULL, dst_if, etypef, mc,
6624 CHECKSUM_OPERATION_NONE, pkt_direction_TX);
6625 }
6626 }
6627
6628 /*
6629 * bridge_rtupdate:
6630 *
6631 * Add a bridge routing entry.
6632 */
6633 static int
6634 bridge_rtupdate(struct bridge_softc *sc, const uint8_t dst[ETHER_ADDR_LEN], uint16_t vlan,
6635 struct bridge_iflist *bif, int setflags, uint8_t flags)
6636 {
6637 struct bridge_rtnode *brt;
6638 int error;
6639
6640 BRIDGE_LOCK_ASSERT_HELD(sc);
6641
6642 /* Check the source address is valid and not multicast. */
6643 if (ETHER_IS_MULTICAST(dst) ||
6644 (dst[0] == 0 && dst[1] == 0 && dst[2] == 0 &&
6645 dst[3] == 0 && dst[4] == 0 && dst[5] == 0) != 0) {
6646 return EINVAL;
6647 }
6648
6649 /* 802.1p frames map to vlan 1 */
6650 if (vlan == 0) {
6651 vlan = 1;
6652 }
6653
6654 /*
6655 * A route for this destination might already exist. If so,
6656 * update it, otherwise create a new one.
6657 */
6658 if ((brt = bridge_rtnode_lookup(sc, dst, vlan)) == NULL) {
6659 if (sc->sc_brtcnt >= sc->sc_brtmax) {
6660 sc->sc_brtexceeded++;
6661 return ENOSPC;
6662 }
6663 /* Check per interface address limits (if enabled) */
6664 if (bif->bif_addrmax && bif->bif_addrcnt >= bif->bif_addrmax) {
6665 bif->bif_addrexceeded++;
6666 return ENOSPC;
6667 }
6668
6669 /*
6670 * Allocate a new bridge forwarding node, and
6671 * initialize the expiration time and Ethernet
6672 * address.
6673 */
6674 brt = zalloc_noblock(bridge_rtnode_pool);
6675 if (brt == NULL) {
6676 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6677 "zalloc_nolock failed");
6678 return ENOMEM;
6679 }
6680 bzero(brt, sizeof(struct bridge_rtnode));
6681
6682 if (bif->bif_ifflags & IFBIF_STICKY) {
6683 brt->brt_flags = IFBAF_STICKY;
6684 } else {
6685 brt->brt_flags = IFBAF_DYNAMIC;
6686 }
6687
6688 memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
6689 brt->brt_vlan = vlan;
6690
6691 if ((error = bridge_rtnode_insert(sc, brt)) != 0) {
6692 zfree(bridge_rtnode_pool, brt);
6693 return error;
6694 }
6695 brt->brt_dst = bif;
6696 bif->bif_addrcnt++;
6697 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6698 "added %02x:%02x:%02x:%02x:%02x:%02x "
6699 "on %s count %u hashsize %u",
6700 dst[0], dst[1], dst[2], dst[3], dst[4], dst[5],
6701 sc->sc_ifp->if_xname, sc->sc_brtcnt,
6702 sc->sc_rthash_size);
6703 }
6704
6705 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
6706 brt->brt_dst != bif) {
6707 brt->brt_dst->bif_addrcnt--;
6708 brt->brt_dst = bif;
6709 brt->brt_dst->bif_addrcnt++;
6710 }
6711
6712 if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6713 unsigned long now;
6714
6715 now = (unsigned long) net_uptime();
6716 brt->brt_expire = now + sc->sc_brttimeout;
6717 }
6718 if (setflags) {
6719 brt->brt_flags = flags;
6720 }
6721
6722 return 0;
6723 }
6724
6725 /*
6726 * bridge_rtlookup:
6727 *
6728 * Lookup the destination interface for an address.
6729 */
6730 static struct bridge_iflist *
6731 bridge_rtlookup_bif(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN],
6732 uint16_t vlan)
6733 {
6734 struct bridge_rtnode *brt;
6735
6736 BRIDGE_LOCK_ASSERT_HELD(sc);
6737
6738 if ((brt = bridge_rtnode_lookup(sc, addr, vlan)) == NULL) {
6739 return NULL;
6740 }
6741
6742 return brt->brt_dst;
6743 }
6744
6745 /*
6746 * bridge_rttrim:
6747 *
6748 * Trim the routine table so that we have a number
6749 * of routing entries less than or equal to the
6750 * maximum number.
6751 */
6752 static void
6753 bridge_rttrim(struct bridge_softc *sc)
6754 {
6755 struct bridge_rtnode *brt, *nbrt;
6756
6757 BRIDGE_LOCK_ASSERT_HELD(sc);
6758
6759 /* Make sure we actually need to do this. */
6760 if (sc->sc_brtcnt <= sc->sc_brtmax) {
6761 return;
6762 }
6763
6764 /* Force an aging cycle; this might trim enough addresses. */
6765 bridge_rtage(sc);
6766 if (sc->sc_brtcnt <= sc->sc_brtmax) {
6767 return;
6768 }
6769
6770 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6771 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6772 bridge_rtnode_destroy(sc, brt);
6773 if (sc->sc_brtcnt <= sc->sc_brtmax) {
6774 return;
6775 }
6776 }
6777 }
6778 }
6779
6780 /*
6781 * bridge_aging_timer:
6782 *
6783 * Aging periodic timer for the bridge routing table.
6784 */
6785 static void
6786 bridge_aging_timer(struct bridge_softc *sc)
6787 {
6788 BRIDGE_LOCK_ASSERT_HELD(sc);
6789
6790 bridge_rtage(sc);
6791 if ((sc->sc_ifp->if_flags & IFF_RUNNING) &&
6792 (sc->sc_flags & SCF_DETACHING) == 0) {
6793 sc->sc_aging_timer.bdc_sc = sc;
6794 sc->sc_aging_timer.bdc_func = bridge_aging_timer;
6795 sc->sc_aging_timer.bdc_ts.tv_sec = bridge_rtable_prune_period;
6796 bridge_schedule_delayed_call(&sc->sc_aging_timer);
6797 }
6798 }
6799
6800 /*
6801 * bridge_rtage:
6802 *
6803 * Perform an aging cycle.
6804 */
6805 static void
6806 bridge_rtage(struct bridge_softc *sc)
6807 {
6808 struct bridge_rtnode *brt, *nbrt;
6809 unsigned long now;
6810
6811 BRIDGE_LOCK_ASSERT_HELD(sc);
6812
6813 now = (unsigned long) net_uptime();
6814
6815 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6816 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6817 if (now >= brt->brt_expire) {
6818 bridge_rtnode_destroy(sc, brt);
6819 }
6820 }
6821 }
6822 if (sc->sc_mac_nat_bif != NULL) {
6823 bridge_mac_nat_age_entries(sc, now);
6824 }
6825 }
6826
6827 /*
6828 * bridge_rtflush:
6829 *
6830 * Remove all dynamic addresses from the bridge.
6831 */
6832 static void
6833 bridge_rtflush(struct bridge_softc *sc, int full)
6834 {
6835 struct bridge_rtnode *brt, *nbrt;
6836
6837 BRIDGE_LOCK_ASSERT_HELD(sc);
6838
6839 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6840 if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6841 bridge_rtnode_destroy(sc, brt);
6842 }
6843 }
6844 }
6845
6846 /*
6847 * bridge_rtdaddr:
6848 *
6849 * Remove an address from the table.
6850 */
6851 static int
6852 bridge_rtdaddr(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN], uint16_t vlan)
6853 {
6854 struct bridge_rtnode *brt;
6855 int found = 0;
6856
6857 BRIDGE_LOCK_ASSERT_HELD(sc);
6858
6859 /*
6860 * If vlan is zero then we want to delete for all vlans so the lookup
6861 * may return more than one.
6862 */
6863 while ((brt = bridge_rtnode_lookup(sc, addr, vlan)) != NULL) {
6864 bridge_rtnode_destroy(sc, brt);
6865 found = 1;
6866 }
6867
6868 return found ? 0 : ENOENT;
6869 }
6870
6871 /*
6872 * bridge_rtdelete:
6873 *
6874 * Delete routes to a specific member interface.
6875 */
6876 static void
6877 bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int full)
6878 {
6879 struct bridge_rtnode *brt, *nbrt;
6880
6881 BRIDGE_LOCK_ASSERT_HELD(sc);
6882
6883 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6884 if (brt->brt_ifp == ifp && (full ||
6885 (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)) {
6886 bridge_rtnode_destroy(sc, brt);
6887 }
6888 }
6889 }
6890
6891 /*
6892 * bridge_rtable_init:
6893 *
6894 * Initialize the route table for this bridge.
6895 */
6896 static int
6897 bridge_rtable_init(struct bridge_softc *sc)
6898 {
6899 u_int32_t i;
6900
6901 sc->sc_rthash = kalloc_type(struct _bridge_rtnode_list,
6902 BRIDGE_RTHASH_SIZE, Z_WAITOK_ZERO_NOFAIL);
6903 sc->sc_rthash_size = BRIDGE_RTHASH_SIZE;
6904
6905 for (i = 0; i < sc->sc_rthash_size; i++) {
6906 LIST_INIT(&sc->sc_rthash[i]);
6907 }
6908
6909 sc->sc_rthash_key = RandomULong();
6910
6911 LIST_INIT(&sc->sc_rtlist);
6912
6913 return 0;
6914 }
6915
6916 /*
6917 * bridge_rthash_delayed_resize:
6918 *
6919 * Resize the routing table hash on a delayed thread call.
6920 */
6921 static void
6922 bridge_rthash_delayed_resize(struct bridge_softc *sc)
6923 {
6924 u_int32_t new_rthash_size = 0;
6925 u_int32_t old_rthash_size = 0;
6926 struct _bridge_rtnode_list *new_rthash = NULL;
6927 struct _bridge_rtnode_list *old_rthash = NULL;
6928 u_int32_t i;
6929 struct bridge_rtnode *brt;
6930 int error = 0;
6931
6932 BRIDGE_LOCK_ASSERT_HELD(sc);
6933
6934 /*
6935 * Four entries per hash bucket is our ideal load factor
6936 */
6937 if (sc->sc_brtcnt < sc->sc_rthash_size * 4) {
6938 goto out;
6939 }
6940
6941 /*
6942 * Doubling the number of hash buckets may be too simplistic
6943 * especially when facing a spike of new entries
6944 */
6945 new_rthash_size = sc->sc_rthash_size * 2;
6946
6947 sc->sc_flags |= SCF_RESIZING;
6948 BRIDGE_UNLOCK(sc);
6949
6950 new_rthash = kalloc_type(struct _bridge_rtnode_list, new_rthash_size,
6951 Z_WAITOK | Z_ZERO);
6952
6953 BRIDGE_LOCK(sc);
6954 sc->sc_flags &= ~SCF_RESIZING;
6955
6956 if (new_rthash == NULL) {
6957 error = ENOMEM;
6958 goto out;
6959 }
6960 if ((sc->sc_flags & SCF_DETACHING)) {
6961 error = ENODEV;
6962 goto out;
6963 }
6964 /*
6965 * Fail safe from here on
6966 */
6967 old_rthash = sc->sc_rthash;
6968 old_rthash_size = sc->sc_rthash_size;
6969 sc->sc_rthash = new_rthash;
6970 sc->sc_rthash_size = new_rthash_size;
6971
6972 /*
6973 * Get a new key to force entries to be shuffled around to reduce
6974 * the likelihood they will land in the same buckets
6975 */
6976 sc->sc_rthash_key = RandomULong();
6977
6978 for (i = 0; i < sc->sc_rthash_size; i++) {
6979 LIST_INIT(&sc->sc_rthash[i]);
6980 }
6981
6982 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
6983 LIST_REMOVE(brt, brt_hash);
6984 (void) bridge_rtnode_hash(sc, brt);
6985 }
6986 out:
6987 if (error == 0) {
6988 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6989 "%s new size %u",
6990 sc->sc_ifp->if_xname, sc->sc_rthash_size);
6991 kfree_type(struct _bridge_rtnode_list, old_rthash_size, old_rthash);
6992 } else {
6993 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_RT_TABLE,
6994 "%s failed %d", sc->sc_ifp->if_xname, error);
6995 kfree_type(struct _bridge_rtnode_list, new_rthash_size, new_rthash);
6996 }
6997 }
6998
6999 /*
7000 * Resize the number of hash buckets based on the load factor
7001 * Currently only grow
7002 * Failing to resize the hash table is not fatal
7003 */
7004 static void
7005 bridge_rthash_resize(struct bridge_softc *sc)
7006 {
7007 BRIDGE_LOCK_ASSERT_HELD(sc);
7008
7009 if ((sc->sc_flags & SCF_DETACHING) || (sc->sc_flags & SCF_RESIZING)) {
7010 return;
7011 }
7012
7013 /*
7014 * Four entries per hash bucket is our ideal load factor
7015 */
7016 if (sc->sc_brtcnt < sc->sc_rthash_size * 4) {
7017 return;
7018 }
7019 /*
7020 * Hard limit on the size of the routing hash table
7021 */
7022 if (sc->sc_rthash_size >= bridge_rtable_hash_size_max) {
7023 return;
7024 }
7025
7026 sc->sc_resize_call.bdc_sc = sc;
7027 sc->sc_resize_call.bdc_func = bridge_rthash_delayed_resize;
7028 bridge_schedule_delayed_call(&sc->sc_resize_call);
7029 }
7030
7031 /*
7032 * bridge_rtable_fini:
7033 *
7034 * Deconstruct the route table for this bridge.
7035 */
7036 static void
7037 bridge_rtable_fini(struct bridge_softc *sc)
7038 {
7039 KASSERT(sc->sc_brtcnt == 0,
7040 ("%s: %d bridge routes referenced", __func__, sc->sc_brtcnt));
7041 kfree_type_counted_by(struct _bridge_rtnode_list, sc->sc_rthash_size,
7042 sc->sc_rthash);
7043 sc->sc_rthash = NULL;
7044 sc->sc_rthash_size = 0;
7045 }
7046
7047 /*
7048 * The following hash function is adapted from "Hash Functions" by Bob Jenkins
7049 * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
7050 */
7051 #define mix(a, b, c) \
7052 do { \
7053 a -= b; a -= c; a ^= (c >> 13); \
7054 b -= c; b -= a; b ^= (a << 8); \
7055 c -= a; c -= b; c ^= (b >> 13); \
7056 a -= b; a -= c; a ^= (c >> 12); \
7057 b -= c; b -= a; b ^= (a << 16); \
7058 c -= a; c -= b; c ^= (b >> 5); \
7059 a -= b; a -= c; a ^= (c >> 3); \
7060 b -= c; b -= a; b ^= (a << 10); \
7061 c -= a; c -= b; c ^= (b >> 15); \
7062 } while ( /*CONSTCOND*/ 0)
7063
7064 static __inline uint32_t
7065 bridge_rthash(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN])
7066 {
7067 uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
7068
7069 b += addr[5] << 8;
7070 b += addr[4];
7071 a += addr[3] << 24;
7072 a += addr[2] << 16;
7073 a += addr[1] << 8;
7074 a += addr[0];
7075
7076 mix(a, b, c);
7077
7078 return c & BRIDGE_RTHASH_MASK(sc);
7079 }
7080
7081 #undef mix
7082
7083 static int
7084 bridge_rtnode_addr_cmp(const uint8_t a[ETHER_ADDR_LEN], const uint8_t b[ETHER_ADDR_LEN])
7085 {
7086 int i, d;
7087
7088 for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) {
7089 d = ((int)a[i]) - ((int)b[i]);
7090 }
7091
7092 return d;
7093 }
7094
7095 /*
7096 * bridge_rtnode_lookup:
7097 *
7098 * Look up a bridge route node for the specified destination. Compare the
7099 * vlan id or if zero then just return the first match.
7100 */
7101 static struct bridge_rtnode *
7102 bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN],
7103 uint16_t vlan)
7104 {
7105 struct bridge_rtnode *brt;
7106 uint32_t hash;
7107 int dir;
7108
7109 BRIDGE_LOCK_ASSERT_HELD(sc);
7110
7111 hash = bridge_rthash(sc, addr);
7112 LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) {
7113 dir = bridge_rtnode_addr_cmp(addr, brt->brt_addr);
7114 if (dir == 0 && (brt->brt_vlan == vlan || vlan == 0)) {
7115 return brt;
7116 }
7117 if (dir > 0) {
7118 return NULL;
7119 }
7120 }
7121
7122 return NULL;
7123 }
7124
7125 /*
7126 * bridge_rtnode_hash:
7127 *
7128 * Insert the specified bridge node into the route hash table.
7129 * This is used when adding a new node or to rehash when resizing
7130 * the hash table
7131 */
7132 static int
7133 bridge_rtnode_hash(struct bridge_softc *sc, struct bridge_rtnode *brt)
7134 {
7135 struct bridge_rtnode *lbrt;
7136 uint32_t hash;
7137 int dir;
7138
7139 BRIDGE_LOCK_ASSERT_HELD(sc);
7140
7141 hash = bridge_rthash(sc, brt->brt_addr);
7142
7143 lbrt = LIST_FIRST(&sc->sc_rthash[hash]);
7144 if (lbrt == NULL) {
7145 LIST_INSERT_HEAD(&sc->sc_rthash[hash], brt, brt_hash);
7146 goto out;
7147 }
7148
7149 do {
7150 dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr);
7151 if (dir == 0 && brt->brt_vlan == lbrt->brt_vlan) {
7152 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
7153 "%s EEXIST %02x:%02x:%02x:%02x:%02x:%02x",
7154 sc->sc_ifp->if_xname,
7155 brt->brt_addr[0], brt->brt_addr[1],
7156 brt->brt_addr[2], brt->brt_addr[3],
7157 brt->brt_addr[4], brt->brt_addr[5]);
7158 return EEXIST;
7159 }
7160 if (dir > 0) {
7161 LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
7162 goto out;
7163 }
7164 if (LIST_NEXT(lbrt, brt_hash) == NULL) {
7165 LIST_INSERT_AFTER(lbrt, brt, brt_hash);
7166 goto out;
7167 }
7168 lbrt = LIST_NEXT(lbrt, brt_hash);
7169 } while (lbrt != NULL);
7170
7171 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
7172 "%s impossible %02x:%02x:%02x:%02x:%02x:%02x",
7173 sc->sc_ifp->if_xname,
7174 brt->brt_addr[0], brt->brt_addr[1], brt->brt_addr[2],
7175 brt->brt_addr[3], brt->brt_addr[4], brt->brt_addr[5]);
7176 out:
7177 return 0;
7178 }
7179
7180 /*
7181 * bridge_rtnode_insert:
7182 *
7183 * Insert the specified bridge node into the route table. We
7184 * assume the entry is not already in the table.
7185 */
7186 static int
7187 bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
7188 {
7189 int error;
7190
7191 error = bridge_rtnode_hash(sc, brt);
7192 if (error != 0) {
7193 return error;
7194 }
7195
7196 LIST_INSERT_HEAD(&sc->sc_rtlist, brt, brt_list);
7197 sc->sc_brtcnt++;
7198
7199 bridge_rthash_resize(sc);
7200
7201 return 0;
7202 }
7203
7204 /*
7205 * bridge_rtnode_destroy:
7206 *
7207 * Destroy a bridge rtnode.
7208 */
7209 static void
7210 bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
7211 {
7212 BRIDGE_LOCK_ASSERT_HELD(sc);
7213
7214 LIST_REMOVE(brt, brt_hash);
7215
7216 LIST_REMOVE(brt, brt_list);
7217 sc->sc_brtcnt--;
7218 brt->brt_dst->bif_addrcnt--;
7219 zfree(bridge_rtnode_pool, brt);
7220 }
7221
7222 #if BRIDGESTP
7223 /*
7224 * bridge_rtable_expire:
7225 *
7226 * Set the expiry time for all routes on an interface.
7227 */
7228 static void
7229 bridge_rtable_expire(struct ifnet *ifp, int age)
7230 {
7231 struct bridge_softc *sc = ifp->if_bridge;
7232 struct bridge_rtnode *brt;
7233
7234 BRIDGE_LOCK(sc);
7235
7236 /*
7237 * If the age is zero then flush, otherwise set all the expiry times to
7238 * age for the interface
7239 */
7240 if (age == 0) {
7241 bridge_rtdelete(sc, ifp, IFBF_FLUSHDYN);
7242 } else {
7243 unsigned long now;
7244
7245 now = (unsigned long) net_uptime();
7246
7247 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
7248 /* Cap the expiry time to 'age' */
7249 if (brt->brt_ifp == ifp &&
7250 brt->brt_expire > now + age &&
7251 (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
7252 brt->brt_expire = now + age;
7253 }
7254 }
7255 }
7256 BRIDGE_UNLOCK(sc);
7257 }
7258
7259 /*
7260 * bridge_state_change:
7261 *
7262 * Callback from the bridgestp code when a port changes states.
7263 */
7264 static void
7265 bridge_state_change(struct ifnet *ifp, int state)
7266 {
7267 struct bridge_softc *sc = ifp->if_bridge;
7268 static const char *stpstates[] = {
7269 "disabled",
7270 "listening",
7271 "learning",
7272 "forwarding",
7273 "blocking",
7274 "discarding"
7275 };
7276
7277 if (log_stp) {
7278 log(LOG_NOTICE, "%s: state changed to %s on %s",
7279 sc->sc_ifp->if_xname,
7280 stpstates[state], ifp->if_xname);
7281 }
7282 }
7283 #endif /* BRIDGESTP */
7284
7285 /*
7286 * bridge_detach:
7287 *
7288 * Callback when interface has been detached.
7289 */
7290 static void
7291 bridge_detach(ifnet_t ifp)
7292 {
7293 struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7294
7295 #if BRIDGESTP
7296 bstp_detach(&sc->sc_stp);
7297 #endif /* BRIDGESTP */
7298
7299 /* Tear down the routing table. */
7300 bridge_rtable_fini(sc);
7301
7302 lck_mtx_lock(&bridge_list_mtx);
7303 LIST_REMOVE(sc, sc_list);
7304 lck_mtx_unlock(&bridge_list_mtx);
7305
7306 ifnet_release(ifp);
7307
7308 lck_mtx_destroy(&sc->sc_mtx, &bridge_lock_grp);
7309 kfree_type(struct bridge_softc, sc);
7310 }
7311
7312 /*
7313 * bridge_link_event:
7314 *
7315 * Report a data link event on an interface
7316 */
7317 static void
7318 bridge_link_event(struct ifnet *ifp, u_int32_t event_code)
7319 {
7320 struct event {
7321 u_int32_t ifnet_family;
7322 u_int32_t unit;
7323 char if_name[IFNAMSIZ];
7324 };
7325 _Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
7326 struct kern_event_msg *header = (struct kern_event_msg*)message;
7327 struct event *data = (struct event *)(message + KEV_MSG_HEADER_SIZE);
7328
7329 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
7330 "%s event_code %u - %s", ifp->if_xname,
7331 event_code, dlil_kev_dl_code_str(event_code));
7332 header->total_size = sizeof(message);
7333 header->vendor_code = KEV_VENDOR_APPLE;
7334 header->kev_class = KEV_NETWORK_CLASS;
7335 header->kev_subclass = KEV_DL_SUBCLASS;
7336 header->event_code = event_code;
7337 data->ifnet_family = ifnet_family(ifp);
7338 data->unit = (u_int32_t)ifnet_unit(ifp);
7339 strlcpy(data->if_name, ifnet_name(ifp), IFNAMSIZ);
7340 ifnet_event(ifp, header);
7341 }
7342
7343 #define BRIDGE_HF_DROP(reason, func, line) { \
7344 bridge_hostfilter_stats.reason++; \
7345 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_HOSTFILTER, \
7346 "%s.%d" #reason, func, line); \
7347 error = EINVAL; \
7348 }
7349
7350 static int
7351 bridge_host_filter_arp(struct bridge_iflist *bif, mbuf_t *data)
7352 {
7353 struct ether_arp *ea;
7354 struct ether_header *eh;
7355 int error = EINVAL;
7356 mbuf_t m = *data;
7357 size_t minlen = sizeof(struct ether_header) + sizeof(struct ether_arp);
7358
7359 /*
7360 * Make the Ethernet and ARP headers contiguous
7361 */
7362 if (mbuf_pkthdr_len(m) < minlen) {
7363 BRIDGE_HF_DROP(brhf_arp_too_small, __func__, __LINE__);
7364 goto done;
7365 }
7366 if (mbuf_len(m) < minlen && mbuf_pullup(data, minlen) != 0) {
7367 BRIDGE_HF_DROP(brhf_arp_pullup_failed,
7368 __func__, __LINE__);
7369 goto done;
7370 }
7371 m = *data;
7372
7373 /*
7374 * Restrict Ethernet protocols to ARP and IP/IPv6
7375 */
7376 eh = mtod(m, struct ether_header *);
7377 ea = (struct ether_arp *)(eh + 1);
7378 if (ea->arp_hrd != HTONS_ARPHRD_ETHER) {
7379 BRIDGE_HF_DROP(brhf_arp_bad_hw_type,
7380 __func__, __LINE__);
7381 goto done;
7382 }
7383 if (ea->arp_pro != HTONS_ETHERTYPE_IP) {
7384 BRIDGE_HF_DROP(brhf_arp_bad_pro_type,
7385 __func__, __LINE__);
7386 goto done;
7387 }
7388 /*
7389 * Verify the address lengths are correct
7390 */
7391 if (ea->arp_hln != ETHER_ADDR_LEN) {
7392 BRIDGE_HF_DROP(brhf_arp_bad_hw_len, __func__, __LINE__);
7393 goto done;
7394 }
7395 if (ea->arp_pln != sizeof(struct in_addr)) {
7396 BRIDGE_HF_DROP(brhf_arp_bad_pro_len,
7397 __func__, __LINE__);
7398 goto done;
7399 }
7400 /*
7401 * Allow only ARP request or ARP reply
7402 */
7403 if (ea->arp_op != HTONS_ARPOP_REQUEST &&
7404 ea->arp_op != HTONS_ARPOP_REPLY) {
7405 BRIDGE_HF_DROP(brhf_arp_bad_op, __func__, __LINE__);
7406 goto done;
7407 }
7408 if ((bif->bif_flags & BIFF_HF_HWSRC) != 0) {
7409 /*
7410 * Verify source hardware address matches
7411 */
7412 if (bcmp(ea->arp_sha, bif->bif_hf_hwsrc,
7413 ETHER_ADDR_LEN) != 0) {
7414 BRIDGE_HF_DROP(brhf_arp_bad_sha, __func__, __LINE__);
7415 goto done;
7416 }
7417 }
7418 if ((bif->bif_flags & BIFF_HF_IPSRC) != 0) {
7419 /*
7420 * Verify source protocol address:
7421 * May be null for an ARP probe
7422 */
7423 if (bcmp(ea->arp_spa, &bif->bif_hf_ipsrc.s_addr,
7424 sizeof(struct in_addr)) != 0 &&
7425 bcmp(ea->arp_spa, &inaddr_any,
7426 sizeof(struct in_addr)) != 0) {
7427 BRIDGE_HF_DROP(brhf_arp_bad_spa, __func__, __LINE__);
7428 goto done;
7429 }
7430 }
7431 bridge_hostfilter_stats.brhf_arp_ok += 1;
7432 error = 0;
7433 done:
7434 return error;
7435 }
7436
7437 /*
7438 * MAC NAT
7439 */
7440
7441 static errno_t
7442 bridge_mac_nat_enable(struct bridge_softc *sc, struct bridge_iflist *bif)
7443 {
7444 errno_t error = 0;
7445
7446 BRIDGE_LOCK_ASSERT_HELD(sc);
7447
7448 if (IFNET_IS_VMNET(bif->bif_ifp)) {
7449 error = EINVAL;
7450 goto done;
7451 }
7452 if (sc->sc_mac_nat_bif != NULL) {
7453 if (sc->sc_mac_nat_bif != bif) {
7454 error = EBUSY;
7455 }
7456 goto done;
7457 }
7458 sc->sc_mac_nat_bif = bif;
7459 bif->bif_ifflags |= IFBIF_MAC_NAT;
7460 bridge_mac_nat_populate_entries(sc);
7461
7462 done:
7463 return error;
7464 }
7465
7466 static void
7467 bridge_mac_nat_disable(struct bridge_softc *sc)
7468 {
7469 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7470
7471 assert(mac_nat_bif != NULL);
7472 bridge_mac_nat_flush_entries(sc, mac_nat_bif);
7473 mac_nat_bif->bif_ifflags &= ~IFBIF_MAC_NAT;
7474 sc->sc_mac_nat_bif = NULL;
7475 return;
7476 }
7477
7478 static void
7479 mac_nat_entry_print2(struct mac_nat_entry *mne,
7480 const char ifname[IFNAMSIZ], const char *msg1, const char *msg2)
7481 {
7482 int af;
7483 char etopbuf[24];
7484 char ntopbuf[MAX_IPv6_STR_LEN];
7485 const char *space;
7486
7487 af = ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) ? AF_INET6 : AF_INET;
7488 ether_ntop(etopbuf, sizeof(etopbuf), mne->mne_mac);
7489 (void)inet_ntop(af, &mne->mne_u, ntopbuf, sizeof(ntopbuf));
7490 if (msg2 == NULL) {
7491 msg2 = "";
7492 space = "";
7493 } else {
7494 space = " ";
7495 }
7496 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7497 "%.*s %s%s%s %p (%s, %s, %s)", IFNAMSIZ, ifname, msg1, space, msg2, mne,
7498 mne->mne_bif->bif_ifp->if_xname, ntopbuf, etopbuf);
7499 }
7500
7501 static void
7502 mac_nat_entry_print(struct mac_nat_entry *mne,
7503 const char ifname[IFNAMSIZ], const char *msg)
7504 {
7505 mac_nat_entry_print2(mne, ifname, msg, NULL);
7506 }
7507
7508 static struct mac_nat_entry *
7509 bridge_lookup_mac_nat_entry_ipv4(const struct bridge_softc *sc, const struct in_addr *ip)
7510 {
7511 struct mac_nat_entry *mne;
7512 struct mac_nat_entry *ret_mne = NULL;
7513
7514 LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
7515 if (mne->mne_ip.s_addr == ip->s_addr) {
7516 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7517 mac_nat_entry_print(mne, sc->sc_if_xname,
7518 "found");
7519 }
7520 ret_mne = mne;
7521 break;
7522 }
7523 }
7524
7525 return ret_mne;
7526 }
7527
7528 static struct mac_nat_entry *
7529 bridge_lookup_mac_nat_entry_ipv6(const struct bridge_softc *sc, const struct in6_addr *ip6)
7530 {
7531 struct mac_nat_entry *mne;
7532 struct mac_nat_entry *ret_mne = NULL;
7533
7534 LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
7535 if (IN6_ARE_ADDR_EQUAL(&mne->mne_ip6, ip6)) {
7536 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7537 mac_nat_entry_print(mne, sc->sc_if_xname,
7538 "found");
7539 }
7540 ret_mne = mne;
7541 break;
7542 }
7543 }
7544
7545 return ret_mne;
7546 }
7547
7548 static void
7549 bridge_destroy_mac_nat_entry(struct bridge_softc *sc,
7550 struct mac_nat_entry *mne, const char *reason)
7551 {
7552 LIST_REMOVE(mne, mne_list);
7553 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7554 mac_nat_entry_print(mne, sc->sc_if_xname, reason);
7555 }
7556 zfree(bridge_mne_pool, mne);
7557 sc->sc_mne_count--;
7558 }
7559
7560 static struct mac_nat_entry *
7561 bridge_create_mac_nat_entry_common(struct bridge_softc *sc,
7562 struct bridge_iflist *bif, const char eaddr[ETHER_ADDR_LEN])
7563 {
7564 struct mac_nat_entry *mne;
7565
7566 if (sc->sc_mne_count >= sc->sc_mne_max) {
7567 sc->sc_mne_allocation_failures++;
7568 return NULL;
7569 }
7570
7571 mne = zalloc_noblock(bridge_mne_pool);
7572 if (mne == NULL) {
7573 sc->sc_mne_allocation_failures++;
7574 return NULL;
7575 }
7576
7577 sc->sc_mne_count++;
7578 bzero(mne, sizeof(*mne));
7579 bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
7580
7581 mne->mne_bif = bif;
7582 mne->mne_expire = (unsigned long)net_uptime() + sc->sc_brttimeout;
7583
7584 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7585 mac_nat_entry_print(mne, sc->sc_if_xname, "created");
7586 }
7587
7588 return mne;
7589 }
7590
7591 static struct mac_nat_entry *
7592 bridge_create_mac_nat_entry_ipv4(struct bridge_softc *sc,
7593 struct bridge_iflist *bif, const struct in_addr *ip, const char eaddr[ETHER_ADDR_LEN])
7594 {
7595 struct mac_nat_entry *mne;
7596
7597 mne = bridge_create_mac_nat_entry_common(sc, bif, eaddr);
7598 if (mne == NULL) {
7599 return NULL;
7600 }
7601
7602 bcopy(ip, &mne->mne_ip, sizeof(mne->mne_ip));
7603 LIST_INSERT_HEAD(&sc->sc_mne_list, mne, mne_list);
7604
7605 return mne;
7606 }
7607
7608 static struct mac_nat_entry *
7609 bridge_create_mac_nat_entry_ipv6(struct bridge_softc *sc,
7610 struct bridge_iflist *bif, const struct in6_addr *ip6, const char eaddr[ETHER_ADDR_LEN])
7611 {
7612 struct mac_nat_entry *mne;
7613
7614 mne = bridge_create_mac_nat_entry_common(sc, bif, eaddr);
7615 if (mne == NULL) {
7616 return NULL;
7617 }
7618
7619 bcopy(ip6, &mne->mne_ip6, sizeof(mne->mne_ip6));
7620 mne->mne_flags |= MNE_FLAGS_IPV6;
7621 LIST_INSERT_HEAD(&sc->sc_mne_list_v6, mne, mne_list);
7622
7623 return mne;
7624 }
7625
7626 static struct mac_nat_entry *
7627 bridge_update_mac_nat_entry_common(struct bridge_softc *sc, struct bridge_iflist *bif,
7628 struct mac_nat_entry *mne, const char eaddr[ETHER_ADDR_LEN])
7629 {
7630 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7631
7632 if (mne->mne_bif == mac_nat_bif) {
7633 /* the MAC NAT interface takes precedence */
7634 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7635 if (mne->mne_bif != bif) {
7636 mac_nat_entry_print2(mne,
7637 sc->sc_if_xname, "reject",
7638 bif->bif_ifp->if_xname);
7639 }
7640 }
7641 } else if (mne->mne_bif != bif) {
7642 const char *__null_terminated old_if = mne->mne_bif->bif_ifp->if_xname;
7643
7644 mne->mne_bif = bif;
7645 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7646 mac_nat_entry_print2(mne,
7647 sc->sc_if_xname, "replaced",
7648 old_if);
7649 }
7650 bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
7651 }
7652
7653 mne->mne_expire = (unsigned long)net_uptime() + sc->sc_brttimeout;
7654
7655 return mne;
7656 }
7657
7658 static struct mac_nat_entry *
7659 bridge_update_mac_nat_entry_ipv4(struct bridge_softc *sc,
7660 struct bridge_iflist *bif, struct in_addr *ip, const char eaddr[ETHER_ADDR_LEN])
7661 {
7662 struct mac_nat_entry *mne;
7663
7664 mne = bridge_lookup_mac_nat_entry_ipv4(sc, ip);
7665 if (mne != NULL) {
7666 return bridge_update_mac_nat_entry_common(sc, bif, mne, eaddr);
7667 }
7668
7669 mne = bridge_create_mac_nat_entry_ipv4(sc, bif, ip, eaddr);
7670 return mne;
7671 }
7672
7673 static struct mac_nat_entry *
7674 bridge_update_mac_nat_entry_ipv6(struct bridge_softc *sc,
7675 struct bridge_iflist *bif, struct in6_addr *ip6, const char eaddr[ETHER_ADDR_LEN])
7676 {
7677 struct mac_nat_entry *mne;
7678
7679 mne = bridge_lookup_mac_nat_entry_ipv6(sc, ip6);
7680 if (mne != NULL) {
7681 return bridge_update_mac_nat_entry_common(sc, bif, mne, eaddr);
7682 }
7683
7684 mne = bridge_create_mac_nat_entry_ipv6(sc, bif, ip6, eaddr);
7685 return mne;
7686 }
7687
7688 static void
7689 bridge_mac_nat_flush_entries_common(struct bridge_softc *sc,
7690 struct mac_nat_entry_list *list, struct bridge_iflist *bif)
7691 {
7692 struct mac_nat_entry *mne;
7693 struct mac_nat_entry *tmne;
7694
7695 LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
7696 if (bif != NULL && mne->mne_bif != bif) {
7697 continue;
7698 }
7699 bridge_destroy_mac_nat_entry(sc, mne, "flushed");
7700 }
7701 }
7702
7703 /*
7704 * bridge_mac_nat_flush_entries:
7705 *
7706 * Flush MAC NAT entries for the specified member. Flush all entries if
7707 * the member is the one that requires MAC NAT, otherwise just flush the
7708 * ones for the specified member.
7709 */
7710 static void
7711 bridge_mac_nat_flush_entries(struct bridge_softc *sc, struct bridge_iflist * bif)
7712 {
7713 struct bridge_iflist *flush_bif;
7714
7715 flush_bif = (bif == sc->sc_mac_nat_bif) ? NULL : bif;
7716 bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list, flush_bif);
7717 bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list_v6, flush_bif);
7718 }
7719
7720 static void
7721 bridge_mac_nat_populate_entries(struct bridge_softc *sc)
7722 {
7723 errno_t error;
7724 ifnet_t ifp;
7725 uint16_t addresses_count = 0;
7726 ifaddr_t * __counted_by(addresses_count) list;
7727 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7728
7729 assert(mac_nat_bif != NULL);
7730 ifp = mac_nat_bif->bif_ifp;
7731 error = ifnet_get_address_list_family_with_count(ifp, &list, &addresses_count, 0);
7732 if (error != 0) {
7733 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7734 "ifnet_get_address_list(%s) failed %d",
7735 ifp->if_xname, error);
7736 return;
7737 }
7738
7739 for (uint16_t i = 0; i < addresses_count; ++i) {
7740 sa_family_t af;
7741
7742 af = ifaddr_address_family(list[i]);
7743 switch (af) {
7744 case AF_INET: {
7745 struct sockaddr_in sin;
7746
7747 error = ifaddr_address(list[i], (struct sockaddr *)&sin, sizeof(sin));
7748 if (error != 0) {
7749 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7750 "ifaddr_address failed %d",
7751 error);
7752 break;
7753 }
7754
7755 bridge_create_mac_nat_entry_ipv4(sc, mac_nat_bif, &sin.sin_addr, IF_LLADDR(ifp));
7756 break;
7757 }
7758
7759 case AF_INET6: {
7760 struct sockaddr_in6 sin6;
7761
7762 error = ifaddr_address(list[i], (struct sockaddr *)&sin6, sizeof(sin6));
7763 if (error != 0) {
7764 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7765 "ifaddr_address failed %d",
7766 error);
7767 break;
7768 }
7769
7770 if (IN6_IS_ADDR_LINKLOCAL(&sin6.sin6_addr)) {
7771 /* remove scope ID */
7772 sin6.sin6_addr.s6_addr16[1] = 0;
7773 }
7774
7775 bridge_create_mac_nat_entry_ipv6(sc, mac_nat_bif, &sin6.sin6_addr, IF_LLADDR(ifp));
7776 break;
7777 }
7778
7779 default:
7780 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7781 "ifaddr_address_family unknown %d",
7782 af);
7783 break;
7784 }
7785 }
7786
7787 ifnet_address_list_free_counted_by(list, addresses_count);
7788 return;
7789 }
7790
7791 static void
7792 bridge_mac_nat_age_entries_common(struct bridge_softc *sc,
7793 struct mac_nat_entry_list *list, unsigned long now)
7794 {
7795 struct mac_nat_entry *mne;
7796 struct mac_nat_entry *tmne;
7797
7798 LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
7799 if (now >= mne->mne_expire) {
7800 bridge_destroy_mac_nat_entry(sc, mne, "aged out");
7801 }
7802 }
7803 }
7804
7805 static void
7806 bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long now)
7807 {
7808 if (sc->sc_mac_nat_bif == NULL) {
7809 return;
7810 }
7811 bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list, now);
7812 bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list_v6, now);
7813 }
7814
7815 static const char *
7816 get_in_out_string(boolean_t is_output)
7817 {
7818 return (const char * __null_terminated)(is_output ? "OUT" : "IN");
7819 }
7820
7821 /*
7822 * is_valid_arp_packet:
7823 * Verify that this is a valid ARP packet.
7824 *
7825 * Returns TRUE if the packet is valid, FALSE otherwise.
7826 */
7827 static boolean_t
7828 is_valid_arp_packet(mbuf_t *data, bool is_output,
7829 struct ether_header **eh_p, struct ether_arp **ea_p)
7830 {
7831 struct ether_arp *ea;
7832 struct ether_header *eh;
7833 size_t minlen = sizeof(struct ether_header) + sizeof(struct ether_arp);
7834 boolean_t is_valid = FALSE;
7835 int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
7836
7837 if (mbuf_pkthdr_len(*data) < minlen) {
7838 BRIDGE_LOG(LOG_DEBUG, flags,
7839 "ARP %s short frame %lu < %lu",
7840 get_in_out_string(is_output),
7841 mbuf_pkthdr_len(*data), minlen);
7842 goto done;
7843 }
7844 if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
7845 BRIDGE_LOG(LOG_DEBUG, flags,
7846 "ARP %s size %lu mbuf_pullup fail",
7847 get_in_out_string(is_output),
7848 minlen);
7849 *data = NULL;
7850 goto done;
7851 }
7852
7853 /* validate ARP packet */
7854 eh = mtod(*data, struct ether_header *);
7855 ea = (struct ether_arp *)(eh + 1);
7856 if (ea->arp_hrd != HTONS_ARPHRD_ETHER) {
7857 BRIDGE_LOG(LOG_DEBUG, flags,
7858 "ARP %s htype not ethernet",
7859 get_in_out_string(is_output));
7860 goto done;
7861 }
7862 if (ea->arp_hln != ETHER_ADDR_LEN) {
7863 BRIDGE_LOG(LOG_DEBUG, flags,
7864 "ARP %s hlen not ethernet",
7865 get_in_out_string(is_output));
7866 goto done;
7867 }
7868 if (ea->arp_pro != HTONS_ETHERTYPE_IP) {
7869 BRIDGE_LOG(LOG_DEBUG, flags,
7870 "ARP %s ptype not IP",
7871 get_in_out_string(is_output));
7872 goto done;
7873 }
7874 if (ea->arp_pln != sizeof(struct in_addr)) {
7875 BRIDGE_LOG(LOG_DEBUG, flags,
7876 "ARP %s plen not IP",
7877 get_in_out_string(is_output));
7878 goto done;
7879 }
7880 is_valid = TRUE;
7881 *ea_p = ea;
7882 *eh_p = eh;
7883 done:
7884 return is_valid;
7885 }
7886
7887 static struct mac_nat_entry *
7888 bridge_mac_nat_arp_input(struct bridge_softc *sc, mbuf_t *data)
7889 {
7890 struct ether_arp * __single ea;
7891 struct ether_header * __single eh;
7892 struct mac_nat_entry *mne = NULL;
7893 u_short op;
7894 struct in_addr tpa;
7895
7896 if (!is_valid_arp_packet(data, FALSE, &eh, &ea)) {
7897 goto done;
7898 }
7899 op = ea->arp_op;
7900 switch (op) {
7901 case HTONS_ARPOP_REQUEST:
7902 case HTONS_ARPOP_REPLY:
7903 /* only care about REQUEST and REPLY */
7904 break;
7905 default:
7906 goto done;
7907 }
7908
7909 /* check the target IP address for a NAT entry */
7910 bcopy(ea->arp_tpa, &tpa, sizeof(tpa));
7911 if (tpa.s_addr != 0) {
7912 mne = bridge_lookup_mac_nat_entry_ipv4(sc, &tpa);
7913 }
7914 if (mne != NULL) {
7915 if (op == HTONS_ARPOP_REPLY) {
7916 /* translate the MAC address */
7917 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7918 char mac_src[24];
7919 char mac_dst[24];
7920
7921 ether_ntop(mac_src, sizeof(mac_src),
7922 ea->arp_tha);
7923 ether_ntop(mac_dst, sizeof(mac_dst),
7924 mne->mne_mac);
7925 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7926 "%s %s ARP %s -> %s",
7927 sc->sc_if_xname,
7928 mne->mne_bif->bif_ifp->if_xname,
7929 mac_src, mac_dst);
7930 }
7931 bcopy(mne->mne_mac, ea->arp_tha, sizeof(ea->arp_tha));
7932 }
7933 } else {
7934 /* handle conflicting ARP (sender matches mne) */
7935 struct in_addr spa;
7936
7937 bcopy(ea->arp_spa, &spa, sizeof(spa));
7938 if (spa.s_addr != 0 && spa.s_addr != tpa.s_addr) {
7939 /* check the source IP for a NAT entry */
7940 mne = bridge_lookup_mac_nat_entry_ipv4(sc, &spa);
7941 }
7942 }
7943
7944 done:
7945 return mne;
7946 }
7947
7948 static boolean_t
7949 bridge_mac_nat_arp_output(struct bridge_softc *sc,
7950 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
7951 {
7952 struct ether_arp * __single ea;
7953 struct ether_header * __single eh;
7954 struct in_addr ip;
7955 struct mac_nat_entry *mne = NULL;
7956 u_short op;
7957 boolean_t translate = FALSE;
7958
7959 if (!is_valid_arp_packet(data, TRUE, &eh, &ea)) {
7960 goto done;
7961 }
7962 op = ea->arp_op;
7963 switch (op) {
7964 case HTONS_ARPOP_REQUEST:
7965 case HTONS_ARPOP_REPLY:
7966 /* only care about REQUEST and REPLY */
7967 break;
7968 default:
7969 goto done;
7970 }
7971
7972 bcopy(ea->arp_spa, &ip, sizeof(ip));
7973 if (ip.s_addr == 0) {
7974 goto done;
7975 }
7976 /* XXX validate IP address: no multicast/broadcast */
7977 mne = bridge_update_mac_nat_entry_ipv4(sc, bif, &ip,
7978 (const char *)ea->arp_sha);
7979 if (mnr != NULL && mne != NULL) {
7980 /* record the offset to do the replacement */
7981 translate = TRUE;
7982 mnr->mnr_arp_offset = (char *)ea->arp_sha - (char *)eh;
7983 }
7984
7985 done:
7986 return translate;
7987 }
7988
7989 #define ETHER_IPV4_HEADER_LEN (sizeof(struct ether_header) + \
7990 + sizeof(struct ip))
7991 static uint8_t * __indexable
7992 get_ether_ip_header_ptr(mbuf_t *data, boolean_t is_output)
7993 {
7994 uint8_t *header = NULL;
7995 int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
7996 size_t minlen = ETHER_IPV4_HEADER_LEN;
7997
7998 if (mbuf_pkthdr_len(*data) < minlen) {
7999 BRIDGE_LOG(LOG_DEBUG, flags,
8000 "IP %s short frame %lu < %lu",
8001 get_in_out_string(is_output),
8002 mbuf_pkthdr_len(*data), minlen);
8003 goto done;
8004 }
8005 if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
8006 BRIDGE_LOG(LOG_DEBUG, flags,
8007 "IP %s size %lu mbuf_pullup fail",
8008 get_in_out_string(is_output),
8009 minlen);
8010 *data = NULL;
8011 goto done;
8012 }
8013 header = mtod(*data, uint8_t *);
8014 done:
8015 return header;
8016 }
8017
8018 static struct mac_nat_entry *
8019 bridge_mac_nat_ip_input(struct bridge_softc *sc, mbuf_t *data)
8020 {
8021 struct in_addr dst;
8022 uint8_t *header;
8023 struct ip *iphdr;
8024 struct mac_nat_entry *mne = NULL;
8025
8026 header = get_ether_ip_header_ptr(data, FALSE);
8027 if (header == NULL) {
8028 goto done;
8029 }
8030 iphdr = (struct ip *)(void *)(header + sizeof(struct ether_header));
8031 bcopy(&iphdr->ip_dst, &dst, sizeof(dst));
8032 /* XXX validate IP address */
8033 if (dst.s_addr == 0) {
8034 goto done;
8035 }
8036 mne = bridge_lookup_mac_nat_entry_ipv4(sc, &dst);
8037 done:
8038 return mne;
8039 }
8040
8041 static void
8042 bridge_mac_nat_udp_output(struct bridge_softc *sc,
8043 struct bridge_iflist *bif, mbuf_t m,
8044 uint8_t ip_header_len, struct mac_nat_record *mnr)
8045 {
8046 uint16_t dp_flags;
8047 errno_t error;
8048 size_t offset;
8049 struct udphdr udphdr;
8050
8051 /* copy the UDP header */
8052 offset = sizeof(struct ether_header) + ip_header_len;
8053 error = mbuf_copydata(m, offset, sizeof(struct udphdr), &udphdr);
8054 if (error != 0) {
8055 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8056 "mbuf_copydata udphdr failed %d",
8057 error);
8058 return;
8059 }
8060 if (udphdr.uh_sport != HTONS_IPPORT_BOOTPC ||
8061 udphdr.uh_dport != HTONS_IPPORT_BOOTPS) {
8062 /* not a BOOTP/DHCP packet */
8063 return;
8064 }
8065 /* check whether the broadcast bit is already set */
8066 offset += sizeof(struct udphdr) + offsetof(struct dhcp, dp_flags);
8067 error = mbuf_copydata(m, offset, sizeof(dp_flags), &dp_flags);
8068 if (error != 0) {
8069 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8070 "mbuf_copydata dp_flags failed %d",
8071 error);
8072 return;
8073 }
8074 if ((dp_flags & HTONS_DHCP_FLAGS_BROADCAST) != 0) {
8075 /* it's already set, nothing to do */
8076 return;
8077 }
8078 /* broadcast bit needs to be set */
8079 mnr->mnr_ip_dhcp_flags = dp_flags | htons(DHCP_FLAGS_BROADCAST);
8080 mnr->mnr_ip_header_len = ip_header_len;
8081 if (udphdr.uh_sum != 0) {
8082 uint16_t delta;
8083
8084 /* adjust checksum to take modified dp_flags into account */
8085 delta = dp_flags - mnr->mnr_ip_dhcp_flags;
8086 mnr->mnr_ip_udp_csum = udphdr.uh_sum + delta;
8087 }
8088 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8089 "%s %s DHCP dp_flags 0x%x UDP cksum 0x%x",
8090 sc->sc_if_xname,
8091 bif->bif_ifp->if_xname,
8092 ntohs(mnr->mnr_ip_dhcp_flags),
8093 ntohs(mnr->mnr_ip_udp_csum));
8094 return;
8095 }
8096
8097 static boolean_t
8098 bridge_mac_nat_ip_output(struct bridge_softc *sc,
8099 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8100 {
8101 #pragma unused(mnr)
8102 uint8_t *header;
8103 struct ether_header *eh;
8104 struct in_addr ip;
8105 struct ip *iphdr;
8106 uint8_t ip_header_len;
8107 struct mac_nat_entry *mne = NULL;
8108 boolean_t translate = FALSE;
8109
8110 header = get_ether_ip_header_ptr(data, TRUE);
8111 if (header == NULL) {
8112 goto done;
8113 }
8114
8115 eh = (struct ether_header *)header;
8116 iphdr = (struct ip *)(header + sizeof(*eh));
8117 ip_header_len = IP_VHL_HL(iphdr->ip_vhl) << 2;
8118 if (ip_header_len < sizeof(ip)) {
8119 /* bogus IP header */
8120 goto done;
8121 }
8122 bcopy(&iphdr->ip_src, &ip, sizeof(ip));
8123 /* XXX validate the source address */
8124 if (ip.s_addr != 0) {
8125 mne = bridge_update_mac_nat_entry_ipv4(sc, bif, &ip,
8126 (const char *)eh->ether_shost);
8127 }
8128 if (mnr != NULL) {
8129 if (ip.s_addr == 0 && iphdr->ip_p == IPPROTO_UDP) {
8130 /* handle DHCP must broadcast */
8131 bridge_mac_nat_udp_output(sc, bif, *data,
8132 ip_header_len, mnr);
8133 }
8134 translate = TRUE;
8135 }
8136 done:
8137 return translate;
8138 }
8139
8140 #define ETHER_IPV6_HEADER_LEN (sizeof(struct ether_header) + \
8141 + sizeof(struct ip6_hdr))
8142 static uint8_t * __indexable
8143 get_ether_ipv6_header_ptr(mbuf_t *data, size_t plen, boolean_t is_output)
8144 {
8145 uint8_t *header = NULL;
8146 int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
8147 size_t minlen = ETHER_IPV6_HEADER_LEN + plen;
8148
8149 if (mbuf_pkthdr_len(*data) < minlen) {
8150 BRIDGE_LOG(LOG_DEBUG, flags,
8151 "IP %s short frame %lu < %lu",
8152 get_in_out_string(is_output),
8153 mbuf_pkthdr_len(*data), minlen);
8154 goto done;
8155 }
8156 if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
8157 BRIDGE_LOG(LOG_DEBUG, flags,
8158 "IP %s size %lu mbuf_pullup fail",
8159 get_in_out_string(is_output),
8160 minlen);
8161 *data = NULL;
8162 goto done;
8163 }
8164 header = mtod(*data, uint8_t *);
8165 done:
8166 return header;
8167 }
8168
8169 #include <netinet/icmp6.h>
8170 #include <netinet6/nd6.h>
8171
8172 #define ETHER_ND_LLADDR_LEN (ETHER_ADDR_LEN + sizeof(struct nd_opt_hdr))
8173
8174 static void
8175 bridge_mac_nat_icmpv6_output(struct bridge_softc *sc,
8176 struct bridge_iflist *bif,
8177 mbuf_t *data, struct ip6_hdr *ip6h,
8178 struct in6_addr *saddrp,
8179 struct mac_nat_record *mnr)
8180 {
8181 uint8_t *header;
8182 struct ether_header *eh;
8183 struct icmp6_hdr *icmp6;
8184 uint8_t icmp6_type;
8185 uint32_t icmp6len;
8186 int lladdrlen = 0;
8187 char *lladdr = NULL;
8188 unsigned int off = sizeof(*ip6h);
8189
8190 icmp6len = (u_int32_t)ntohs(ip6h->ip6_plen);
8191 if (icmp6len < sizeof(*icmp6)) {
8192 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8193 "short IPv6 payload length %d < %lu",
8194 icmp6len, sizeof(*icmp6));
8195 return;
8196 }
8197
8198 /* pullup IP6 header + ICMPv6 header */
8199 header = get_ether_ipv6_header_ptr(data, sizeof(*icmp6), TRUE);
8200 if (header == NULL) {
8201 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8202 "failed to pullup icmp6 header");
8203 return;
8204 }
8205 eh = (struct ether_header *)header;
8206 ip6h = (struct ip6_hdr *)(header + sizeof(*eh));
8207 icmp6 = (struct icmp6_hdr *)(header + sizeof(*eh) + off);
8208 icmp6_type = icmp6->icmp6_type;
8209 switch (icmp6_type) {
8210 case ND_NEIGHBOR_SOLICIT:
8211 case ND_NEIGHBOR_ADVERT:
8212 case ND_ROUTER_ADVERT:
8213 case ND_ROUTER_SOLICIT:
8214 break;
8215 default:
8216 return;
8217 }
8218
8219 /* pullup IP6 header + payload */
8220 header = get_ether_ipv6_header_ptr(data, icmp6len, TRUE);
8221 if (header == NULL) {
8222 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8223 "failed to pullup icmp6 + payload");
8224 return;
8225 }
8226 eh = (struct ether_header *)header;
8227 ip6h = (struct ip6_hdr *)(header + sizeof(*eh));
8228 icmp6 = (struct icmp6_hdr *)(header + sizeof(*eh) + off);
8229
8230 switch (icmp6_type) {
8231 case ND_NEIGHBOR_SOLICIT: {
8232 struct nd_neighbor_solicit *nd_ns;
8233 union nd_opts ndopts;
8234 boolean_t is_dad_probe;
8235 struct in6_addr taddr;
8236
8237 if (icmp6len < sizeof(*nd_ns)) {
8238 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8239 "short nd_ns %d < %lu",
8240 icmp6len, sizeof(*nd_ns));
8241 return;
8242 }
8243
8244 nd_ns = (struct nd_neighbor_solicit *)(void *)icmp6;
8245 bcopy(&nd_ns->nd_ns_target, &taddr, sizeof(taddr));
8246 if (IN6_IS_ADDR_MULTICAST(&taddr) ||
8247 IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
8248 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8249 "invalid target ignored");
8250 return;
8251 }
8252
8253 /* parse options */
8254 nd6_option_init(nd_ns + 1, icmp6len - sizeof(*nd_ns), &ndopts);
8255 if (nd6_options(&ndopts) < 0) {
8256 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8257 "invalid ND6 NS option");
8258 return;
8259 }
8260 if (ndopts.nd_opts_src_lladdr != NULL) {
8261 ND_OPT_LLADDR(ndopts.nd_opts_src_lladdr, nd_opt_len,
8262 lladdr, lladdrlen);
8263 }
8264 is_dad_probe = IN6_IS_ADDR_UNSPECIFIED(saddrp);
8265 if (lladdr != NULL) {
8266 if (is_dad_probe) {
8267 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8268 "bad ND6 DAD packet");
8269 return;
8270 }
8271 if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8272 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8273 "source lladdrlen %d != %lu",
8274 lladdrlen, ETHER_ND_LLADDR_LEN);
8275 return;
8276 }
8277 }
8278 if (is_dad_probe) {
8279 /* node is trying use taddr, create an mne for taddr */
8280 *saddrp = taddr;
8281 }
8282 break;
8283 }
8284 case ND_NEIGHBOR_ADVERT: {
8285 struct nd_neighbor_advert *nd_na;
8286 union nd_opts ndopts;
8287 struct in6_addr taddr;
8288
8289
8290 nd_na = (struct nd_neighbor_advert *)(void *)icmp6;
8291
8292 if (icmp6len < sizeof(*nd_na)) {
8293 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8294 "short nd_na %d < %lu",
8295 icmp6len, sizeof(*nd_na));
8296 return;
8297 }
8298
8299 bcopy(&nd_na->nd_na_target, &taddr, sizeof(taddr));
8300 if (IN6_IS_ADDR_MULTICAST(&taddr) ||
8301 IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
8302 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8303 "invalid target ignored");
8304 return;
8305 }
8306
8307 /* parse options */
8308 nd6_option_init(nd_na + 1, icmp6len - sizeof(*nd_na), &ndopts);
8309 if (nd6_options(&ndopts) < 0) {
8310 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8311 "invalid ND6 NA option");
8312 return;
8313 }
8314 if (ndopts.nd_opts_tgt_lladdr == NULL) {
8315 /* target linklayer, nothing to do */
8316 return;
8317 }
8318
8319 ND_OPT_LLADDR(ndopts.nd_opts_tgt_lladdr, nd_opt_len, lladdr, lladdrlen);
8320 if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8321 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8322 "target lladdrlen %d != %lu",
8323 lladdrlen, ETHER_ND_LLADDR_LEN);
8324 return;
8325 }
8326 break;
8327 }
8328 case ND_ROUTER_ADVERT:
8329 case ND_ROUTER_SOLICIT: {
8330 union nd_opts ndopts;
8331 uint32_t type_length;
8332 const char *description;
8333
8334 if (icmp6_type == ND_ROUTER_ADVERT) {
8335 type_length = sizeof(struct nd_router_advert);
8336 description = "RA";
8337 } else {
8338 type_length = sizeof(struct nd_router_solicit);
8339 description = "RS";
8340 }
8341 if (icmp6len < type_length) {
8342 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8343 "short ND6 %s %d < %d",
8344 description, icmp6len, type_length);
8345 return;
8346 }
8347
8348 /* parse options */
8349 nd6_option_init(((uint8_t *)icmp6) + type_length,
8350 icmp6len - type_length, &ndopts);
8351 if (nd6_options(&ndopts) < 0) {
8352 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8353 "invalid ND6 %s option", description);
8354 return;
8355 }
8356 if (ndopts.nd_opts_src_lladdr != NULL) {
8357 ND_OPT_LLADDR(ndopts.nd_opts_src_lladdr, nd_opt_len, lladdr, lladdrlen);
8358
8359 if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8360 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8361 "source lladdrlen %d != %lu",
8362 lladdrlen, ETHER_ND_LLADDR_LEN);
8363 return;
8364 }
8365 }
8366 break;
8367 }
8368 default:
8369 break;
8370 }
8371
8372 if (lladdr != NULL) {
8373 mnr->mnr_ip6_lladdr_offset = (uint16_t)
8374 ((uintptr_t)lladdr - (uintptr_t)eh);
8375 mnr->mnr_ip6_icmp6_len = icmp6len;
8376 mnr->mnr_ip6_icmp6_type = icmp6_type;
8377 mnr->mnr_ip6_header_len = off;
8378 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
8379 const char *str;
8380
8381 switch (mnr->mnr_ip6_icmp6_type) {
8382 case ND_ROUTER_ADVERT:
8383 str = "ROUTER ADVERT";
8384 break;
8385 case ND_ROUTER_SOLICIT:
8386 str = "ROUTER SOLICIT";
8387 break;
8388 case ND_NEIGHBOR_ADVERT:
8389 str = "NEIGHBOR ADVERT";
8390 break;
8391 case ND_NEIGHBOR_SOLICIT:
8392 str = "NEIGHBOR SOLICIT";
8393 break;
8394 default:
8395 str = "";
8396 break;
8397 }
8398 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8399 "%s %s %s ip6len %d icmp6len %d lladdr offset %d",
8400 sc->sc_if_xname, bif->bif_ifp->if_xname, str,
8401 mnr->mnr_ip6_header_len,
8402 mnr->mnr_ip6_icmp6_len, mnr->mnr_ip6_lladdr_offset);
8403 }
8404 }
8405 }
8406
8407 static struct mac_nat_entry *
8408 bridge_mac_nat_ipv6_input(struct bridge_softc *sc, mbuf_t *data)
8409 {
8410 struct in6_addr dst;
8411 uint8_t *header;
8412 struct ether_header *eh;
8413 struct ip6_hdr *ip6h;
8414 struct mac_nat_entry *mne = NULL;
8415
8416 header = get_ether_ipv6_header_ptr(data, 0, FALSE);
8417 if (header == NULL) {
8418 goto done;
8419 }
8420 eh = (struct ether_header *)header;
8421 ip6h = (struct ip6_hdr *)(header + sizeof(*eh));
8422 bcopy(&ip6h->ip6_dst, &dst, sizeof(dst));
8423 /* XXX validate IPv6 address */
8424 if (IN6_IS_ADDR_UNSPECIFIED(&dst)) {
8425 goto done;
8426 }
8427 mne = bridge_lookup_mac_nat_entry_ipv6(sc, &dst);
8428
8429 done:
8430 return mne;
8431 }
8432
8433 static boolean_t
8434 bridge_mac_nat_ipv6_output(struct bridge_softc *sc,
8435 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8436 {
8437 uint8_t *header;
8438 struct ether_header *eh;
8439 ether_addr_t ether_shost;
8440 struct ip6_hdr *ip6h;
8441 struct in6_addr saddr;
8442 boolean_t translate;
8443
8444 translate = (bif == sc->sc_mac_nat_bif) ? FALSE : TRUE;
8445 header = get_ether_ipv6_header_ptr(data, 0, TRUE);
8446 if (header == NULL) {
8447 translate = FALSE;
8448 goto done;
8449 }
8450 eh = (struct ether_header *)header;
8451 bcopy(eh->ether_shost, ðer_shost, sizeof(ether_shost));
8452 ip6h = (struct ip6_hdr *)(header + sizeof(*eh));
8453 bcopy(&ip6h->ip6_src, &saddr, sizeof(saddr));
8454 if (mnr != NULL && ip6h->ip6_nxt == IPPROTO_ICMPV6) {
8455 bridge_mac_nat_icmpv6_output(sc, bif, data, ip6h, &saddr, mnr);
8456 }
8457 if (IN6_IS_ADDR_UNSPECIFIED(&saddr)) {
8458 goto done;
8459 }
8460 (void)bridge_update_mac_nat_entry_ipv6(sc, bif, &saddr,
8461 (const char *)ether_shost.octet);
8462
8463 done:
8464 return translate;
8465 }
8466
8467 /*
8468 * Function: bridge_mac_nat_input:
8469 *
8470 * Purpose:
8471 * Process a unicast packet arriving on the external interface `external_ifp`.
8472 *
8473 * If the packet is ARP, IPv4, or IPv6, lookup the address from the packet in
8474 * the mac_nat_entry table. If an entry is found, and the interface is
8475 * not `external_ifp`, replace the destination MAC address in the
8476 * ethernet header with the corresponding internal MAC address, and return
8477 * the interface via `*dst_if`.
8478 *
8479 * Returns:
8480 * NULL if the packet was deallocated during processing.
8481 *
8482 * Otherwise, returns non-NULL packet that should:
8483 * 1) if `*dst_if` is NULL, continue on as an input packet
8484 * over `external_ifp`, OR
8485 * 2) if `*dst_if` is not NULL, be delivered as an output packet
8486 * over `*dst_if`.
8487 */
8488 static mbuf_t
8489 bridge_mac_nat_input(struct bridge_softc *sc, ifnet_t external_ifp,
8490 mbuf_t m, ifnet_t * dst_if)
8491 {
8492 struct ether_header *eh;
8493 mbuf_t m0 = m;
8494 struct mac_nat_entry *mne = NULL;
8495
8496 BRIDGE_LOCK_ASSERT_HELD(sc);
8497 *dst_if = NULL;
8498 eh = mtod(m, struct ether_header *);
8499 switch (eh->ether_type) {
8500 case HTONS_ETHERTYPE_ARP:
8501 mne = bridge_mac_nat_arp_input(sc, &m);
8502 break;
8503 case HTONS_ETHERTYPE_IP:
8504 mne = bridge_mac_nat_ip_input(sc, &m);
8505 break;
8506 case HTONS_ETHERTYPE_IPV6:
8507 mne = bridge_mac_nat_ipv6_input(sc, &m);
8508 break;
8509 default:
8510 break;
8511 }
8512 if (m != NULL & mne != NULL) {
8513 *dst_if = mne->mne_bif->bif_ifp;
8514 if (*dst_if == external_ifp) {
8515 /* receive packet for ifp */
8516 *dst_if = NULL;
8517 } else {
8518 /* replace the destination MAC with internal one */
8519 if (m != m0) {
8520 /* it may have changed */
8521 eh = mtod(m, struct ether_header *);
8522 }
8523 bcopy(mne->mne_mac, eh->ether_dhost,
8524 sizeof(eh->ether_dhost));
8525 }
8526 }
8527 return m;
8528 }
8529
8530
8531 static mblist
8532 bridge_mac_nat_input_list(struct bridge_softc *sc, ifnet_t external_ifp,
8533 mbuf_t m, mbuf_t * forward_head)
8534 {
8535 mblist forward;
8536 mbuf_t next_packet;
8537 mblist ret;
8538
8539 mblist_init(&ret);
8540 mblist_init(&forward);
8541 for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
8542 ifnet_ref_t dst_if;
8543
8544 /* take packet out of the list */
8545 next_packet = scan->m_nextpkt;
8546 scan->m_nextpkt = NULL;
8547
8548 scan = bridge_mac_nat_input(sc, external_ifp, scan, &dst_if);
8549 if (scan != NULL) {
8550 if (dst_if != NULL) {
8551 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8552 "%s MAC-NAT input translate to %s",
8553 sc->sc_if_xname, dst_if->if_xname);
8554 /* use rcvif to store the egress interface */
8555 mbuf_pkthdr_setrcvif(scan, dst_if);
8556 /* add it to the forwarding list */
8557 mblist_append(&forward, scan);
8558 } else {
8559 /* add it to the "continue on as input" list */
8560 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8561 "%s MAC-NAT input for %s",
8562 sc->sc_if_xname,
8563 external_ifp->if_xname);
8564 mblist_append(&ret, scan);
8565 }
8566 }
8567 }
8568 *forward_head = forward.head;
8569 return ret;
8570 }
8571
8572 /*
8573 * bridge_mac_nat_translate_list:
8574 * Process a list of packets destined to the MAC-NAT interface `dst_if`
8575 * from the bridge member `sbif`.
8576 *
8577 * For each packet in the list, update the MAC-NAT record, and if
8578 * translation is required, translate it.
8579 *
8580 * Returns the list of packets that should be delivered to the MAC-NAT
8581 * interface.
8582 */
8583 static mbuf_t
8584 bridge_mac_nat_translate_list(struct bridge_softc * sc,
8585 struct bridge_iflist *sbif, ifnet_t dst_if, mbuf_t m)
8586 {
8587 mbuf_t next_packet;
8588 mblist ret;
8589
8590 mblist_init(&ret);
8591 for (mbuf_ref_t scan = m; scan != NULL; scan = next_packet) {
8592 struct mac_nat_record mnr;
8593 bool translate_mac;
8594
8595 /* take packet out of the list */
8596 next_packet = scan->m_nextpkt;
8597 scan->m_nextpkt = NULL;
8598 translate_mac = bridge_mac_nat_output(sc, sbif, &scan, &mnr);
8599 if (scan != NULL) {
8600 if (translate_mac) {
8601 bridge_mac_nat_translate(&scan, &mnr,
8602 IF_LLADDR(dst_if));
8603 }
8604 if (scan != NULL) {
8605 /* add it back to the list */
8606 mblist_append(&ret, scan);
8607 }
8608 }
8609 }
8610 return ret.head;
8611 }
8612
8613 /*
8614 * bridge_mac_nat_copy_and_translate_list:
8615 * Same as bridge_mac_nat_translate_list() except that a copy of the
8616 * packet list is returned instead.
8617 *
8618 * The packet list `m` is left unaltered.
8619 */
8620 static mbuf_t
8621 bridge_mac_nat_copy_and_translate_list(struct bridge_softc * sc,
8622 struct bridge_iflist *sbif, ifnet_t dst_if, mbuf_t m)
8623 {
8624 mbuf_t next_packet;
8625 mblist ret;
8626
8627 mblist_init(&ret);
8628 for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
8629 mbuf_ref_t mc = NULL;
8630 struct mac_nat_record mnr;
8631 bool translate_mac;
8632
8633 /* take packet out of the list, make a copy, put it back */
8634 next_packet = scan->m_nextpkt;
8635 scan->m_nextpkt = NULL;
8636 mc = m_dup(scan, M_DONTWAIT);
8637 scan->m_nextpkt = next_packet;
8638 if (mc == NULL) {
8639 continue;
8640 }
8641 translate_mac = bridge_mac_nat_output(sc, sbif, &mc, &mnr);
8642 if (mc != NULL) {
8643 if (translate_mac) {
8644 bridge_mac_nat_translate(&mc, &mnr,
8645 IF_LLADDR(dst_if));
8646 }
8647 if (mc != NULL) {
8648 /* add it to the new list */
8649 mblist_append(&ret, mc);
8650 }
8651 }
8652 }
8653 return ret.head;
8654 }
8655
8656 static void
8657 bridge_mac_nat_forward_list(ifnet_t bridge_ifp, ether_type_flag_t etypef,
8658 mbuf_t m)
8659 {
8660 int count = 0;
8661 ifnet_t dst_if;
8662 mblist list;
8663 int n_lists = 0;
8664 mbuf_t next_packet;
8665
8666 mblist_init(&list);
8667 for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
8668 ifnet_t this_if;
8669
8670 next_packet = scan->m_nextpkt;
8671 this_if = mbuf_pkthdr_rcvif(scan);
8672 mbuf_pkthdr_setrcvif(scan, NULL);
8673 if (list.head == NULL) {
8674 /* start a new list */
8675 list.head = list.tail = scan;
8676 count = 1;
8677 dst_if = this_if;
8678 } else if (dst_if != this_if) {
8679 /* send up the previous chain */
8680 if (list.tail != NULL) {
8681 /* terminate the list */
8682 list.tail->m_nextpkt = NULL;
8683 }
8684 n_lists++;
8685 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8686 "(%s): sublist %u pkts %u",
8687 dst_if->if_xname, n_lists, count);
8688 bridge_enqueue(bridge_ifp, NULL,
8689 dst_if, etypef, list.head,
8690 CHECKSUM_OPERATION_CLEAR_OFFLOAD, pkt_direction_RX);
8691
8692 /* start new list */
8693 list.head = list.tail = scan;
8694 count = 1;
8695 dst_if = this_if;
8696 } else {
8697 count++;
8698 list.tail = scan;
8699 }
8700 if (next_packet == NULL) {
8701 /* last list */
8702 n_lists++;
8703 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8704 "(%s): sublist %u pkts %u",
8705 dst_if->if_xname, n_lists, count);
8706 bridge_enqueue(bridge_ifp, NULL,
8707 dst_if, etypef, list.head,
8708 CHECKSUM_OPERATION_CLEAR_OFFLOAD, pkt_direction_RX);
8709 }
8710 }
8711 return;
8712 }
8713
8714 /*
8715 * bridge_mac_nat_output:
8716 * Process a packet destined to the MAC NAT interface (sc_mac_nat_bif)
8717 * from the interface 'bif'.
8718 *
8719 * Create a mac_nat_entry containing the source IP address and MAC address
8720 * from the packet. Populate a mac_nat_record with information detailing
8721 * how to translate the packet. Translation takes place later by calling
8722 * `bridge_mac_nat_translate()`.
8723 *
8724 * If 'bif' == sc_mac_nat_bif, the stack over the MAC NAT
8725 * interface is generating an output packet. No translation is required in this
8726 * case, we just record the IP address used to prevent another bif from
8727 * claiming our IP address.
8728 *
8729 * Returns:
8730 * TRUE if the packet should be translated (*mnr updated as well),
8731 * FALSE otherwise.
8732 *
8733 * *data may be updated to point at a different mbuf chain or NULL if
8734 * the chain was deallocated during processing.
8735 */
8736
8737 static boolean_t
8738 bridge_mac_nat_output(struct bridge_softc *sc,
8739 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8740 {
8741 struct ether_header *eh;
8742 boolean_t translate = FALSE;
8743
8744 BRIDGE_LOCK_ASSERT_HELD(sc);
8745 assert(sc->sc_mac_nat_bif != NULL);
8746
8747 eh = mtod(*data, struct ether_header *);
8748 if (mnr != NULL) {
8749 bzero(mnr, sizeof(*mnr));
8750 mnr->mnr_ether_type = eh->ether_type;
8751 }
8752 switch (eh->ether_type) {
8753 case HTONS_ETHERTYPE_ARP:
8754 translate = bridge_mac_nat_arp_output(sc, bif, data, mnr);
8755 break;
8756 case HTONS_ETHERTYPE_IP:
8757 translate = bridge_mac_nat_ip_output(sc, bif, data, mnr);
8758 break;
8759 case HTONS_ETHERTYPE_IPV6:
8760 translate = bridge_mac_nat_ipv6_output(sc, bif, data, mnr);
8761 break;
8762 default:
8763 break;
8764 }
8765 return translate;
8766 }
8767
8768 static void
8769 bridge_mac_nat_arp_translate(mbuf_t *data, struct mac_nat_record *mnr,
8770 const char eaddr[ETHER_ADDR_LEN])
8771 {
8772 errno_t error;
8773
8774 if (mnr->mnr_arp_offset == 0) {
8775 return;
8776 }
8777 /* replace the source hardware address */
8778 error = mbuf_copyback(*data, mnr->mnr_arp_offset,
8779 ETHER_ADDR_LEN, eaddr,
8780 MBUF_DONTWAIT);
8781 if (error != 0) {
8782 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8783 "mbuf_copyback failed");
8784 m_freem(*data);
8785 *data = NULL;
8786 }
8787 return;
8788 }
8789
8790 static void
8791 bridge_mac_nat_ip_translate(mbuf_t *data, struct mac_nat_record *mnr)
8792 {
8793 errno_t error;
8794 size_t offset;
8795
8796 if (mnr->mnr_ip_header_len == 0) {
8797 return;
8798 }
8799 /* update the UDP checksum */
8800 offset = sizeof(struct ether_header) + mnr->mnr_ip_header_len;
8801 error = mbuf_copyback(*data, offset + offsetof(struct udphdr, uh_sum),
8802 sizeof(mnr->mnr_ip_udp_csum),
8803 &mnr->mnr_ip_udp_csum,
8804 MBUF_DONTWAIT);
8805 if (error != 0) {
8806 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8807 "mbuf_copyback uh_sum failed");
8808 m_freem(*data);
8809 *data = NULL;
8810 }
8811 /* update the DHCP must broadcast flag */
8812 offset += sizeof(struct udphdr);
8813 error = mbuf_copyback(*data, offset + offsetof(struct dhcp, dp_flags),
8814 sizeof(mnr->mnr_ip_dhcp_flags),
8815 &mnr->mnr_ip_dhcp_flags,
8816 MBUF_DONTWAIT);
8817 if (error != 0) {
8818 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8819 "mbuf_copyback dp_flags failed");
8820 m_freem(*data);
8821 *data = NULL;
8822 }
8823 }
8824
8825 static void
8826 bridge_mac_nat_ipv6_translate(mbuf_t *data, struct mac_nat_record *mnr,
8827 const char eaddr[ETHER_ADDR_LEN])
8828 {
8829 uint16_t cksum;
8830 errno_t error;
8831 mbuf_t m = *data;
8832
8833 if (mnr->mnr_ip6_header_len == 0) {
8834 return;
8835 }
8836 switch (mnr->mnr_ip6_icmp6_type) {
8837 case ND_ROUTER_ADVERT:
8838 case ND_ROUTER_SOLICIT:
8839 case ND_NEIGHBOR_SOLICIT:
8840 case ND_NEIGHBOR_ADVERT:
8841 if (mnr->mnr_ip6_lladdr_offset == 0) {
8842 /* nothing to do */
8843 return;
8844 }
8845 break;
8846 default:
8847 return;
8848 }
8849
8850 /*
8851 * replace the lladdr
8852 */
8853 error = mbuf_copyback(m, mnr->mnr_ip6_lladdr_offset,
8854 ETHER_ADDR_LEN, eaddr,
8855 MBUF_DONTWAIT);
8856 if (error != 0) {
8857 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8858 "mbuf_copyback lladdr failed");
8859 m_freem(m);
8860 *data = NULL;
8861 return;
8862 }
8863
8864 /*
8865 * recompute the icmp6 checksum
8866 */
8867
8868 /* skip past the ethernet header */
8869 _mbuf_adjust_pkthdr_and_data(m, ETHER_HDR_LEN);
8870
8871 #define CKSUM_OFFSET_ICMP6 offsetof(struct icmp6_hdr, icmp6_cksum)
8872 /* set the checksum to zero */
8873 cksum = 0;
8874 error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
8875 sizeof(cksum), &cksum, MBUF_DONTWAIT);
8876 if (error != 0) {
8877 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8878 "mbuf_copyback cksum=0 failed");
8879 m_freem(m);
8880 *data = NULL;
8881 return;
8882 }
8883 /* compute and set the new checksum */
8884 cksum = in6_cksum(m, IPPROTO_ICMPV6, mnr->mnr_ip6_header_len,
8885 mnr->mnr_ip6_icmp6_len);
8886 error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
8887 sizeof(cksum), &cksum, MBUF_DONTWAIT);
8888 if (error != 0) {
8889 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8890 "mbuf_copyback cksum failed");
8891 m_freem(m);
8892 *data = NULL;
8893 return;
8894 }
8895 /* restore the ethernet header */
8896 _mbuf_adjust_pkthdr_and_data(m, -ETHER_HDR_LEN);
8897 return;
8898 }
8899
8900 static void
8901 bridge_mac_nat_translate(mbuf_t *data, struct mac_nat_record *mnr,
8902 const char eaddr[ETHER_ADDR_LEN])
8903 {
8904 struct ether_header *eh;
8905
8906 /* replace the source ethernet address with the single MAC */
8907 eh = mtod(*data, struct ether_header *);
8908 bcopy(eaddr, eh->ether_shost, sizeof(eh->ether_shost));
8909 switch (mnr->mnr_ether_type) {
8910 case HTONS_ETHERTYPE_ARP:
8911 bridge_mac_nat_arp_translate(data, mnr, eaddr);
8912 break;
8913
8914 case HTONS_ETHERTYPE_IP:
8915 bridge_mac_nat_ip_translate(data, mnr);
8916 break;
8917
8918 case HTONS_ETHERTYPE_IPV6:
8919 bridge_mac_nat_ipv6_translate(data, mnr, eaddr);
8920 break;
8921
8922 default:
8923 break;
8924 }
8925 return;
8926 }
8927
8928 /*
8929 * bridge packet filtering
8930 */
8931
8932 /*
8933 * Perform basic checks on header size since
8934 * pfil assumes ip_input has already processed
8935 * it for it. Cut-and-pasted from ip_input.c.
8936 * Given how simple the IPv6 version is,
8937 * does the IPv4 version really need to be
8938 * this complicated?
8939 *
8940 * XXX Should we update ipstat here, or not?
8941 * XXX Right now we update ipstat but not
8942 * XXX csum_counter.
8943 */
8944 static int
8945 bridge_ip_checkbasic(struct mbuf **mp)
8946 {
8947 struct mbuf *m = *mp;
8948 struct ip *ip;
8949 int len, hlen;
8950 u_short sum;
8951
8952 if (*mp == NULL) {
8953 return -1;
8954 }
8955
8956 if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
8957 /* max_linkhdr is already rounded up to nearest 4-byte */
8958 if ((m = m_copyup(m, sizeof(struct ip),
8959 max_linkhdr)) == NULL) {
8960 /* XXXJRT new stat, please */
8961 ipstat.ips_toosmall++;
8962 goto bad;
8963 }
8964 } else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip), 0)) {
8965 if ((m = m_pullup(m, sizeof(struct ip))) == NULL) {
8966 ipstat.ips_toosmall++;
8967 goto bad;
8968 }
8969 }
8970 ip = mtod(m, struct ip *);
8971 if (ip == NULL) {
8972 goto bad;
8973 }
8974
8975 if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
8976 ipstat.ips_badvers++;
8977 goto bad;
8978 }
8979 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
8980 if (hlen < (int)sizeof(struct ip)) { /* minimum header length */
8981 ipstat.ips_badhlen++;
8982 goto bad;
8983 }
8984 if (hlen > m->m_len) {
8985 if ((m = m_pullup(m, hlen)) == 0) {
8986 ipstat.ips_badhlen++;
8987 goto bad;
8988 }
8989 ip = mtod(m, struct ip *);
8990 if (ip == NULL) {
8991 goto bad;
8992 }
8993 }
8994
8995 if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
8996 sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
8997 } else {
8998 if (hlen == sizeof(struct ip)) {
8999 sum = in_cksum_hdr(ip);
9000 } else {
9001 sum = in_cksum(m, hlen);
9002 }
9003 }
9004 if (sum) {
9005 ipstat.ips_badsum++;
9006 goto bad;
9007 }
9008
9009 /* Retrieve the packet length. */
9010 len = ntohs(ip->ip_len);
9011
9012 /*
9013 * Check for additional length bogosity
9014 */
9015 if (len < hlen) {
9016 ipstat.ips_badlen++;
9017 goto bad;
9018 }
9019
9020 /*
9021 * Check that the amount of data in the buffers
9022 * is as at least much as the IP header would have us expect.
9023 * Drop packet if shorter than we expect.
9024 */
9025 if (m->m_pkthdr.len < len) {
9026 ipstat.ips_tooshort++;
9027 goto bad;
9028 }
9029
9030 /* Checks out, proceed */
9031 *mp = m;
9032 return 0;
9033
9034 bad:
9035 *mp = m;
9036 return -1;
9037 }
9038
9039 /*
9040 * Same as above, but for IPv6.
9041 * Cut-and-pasted from ip6_input.c.
9042 * XXX Should we update ip6stat, or not?
9043 */
9044 static int
9045 bridge_ip6_checkbasic(struct mbuf **mp)
9046 {
9047 struct mbuf *m = *mp;
9048 struct ip6_hdr *ip6;
9049
9050 /*
9051 * If the IPv6 header is not aligned, slurp it up into a new
9052 * mbuf with space for link headers, in the event we forward
9053 * it. Otherwise, if it is aligned, make sure the entire base
9054 * IPv6 header is in the first mbuf of the chain.
9055 */
9056 if (IP6_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
9057 struct ifnet *inifp = m->m_pkthdr.rcvif;
9058 /* max_linkhdr is already rounded up to nearest 4-byte */
9059 if ((m = m_copyup(m, sizeof(struct ip6_hdr),
9060 max_linkhdr)) == NULL) {
9061 /* XXXJRT new stat, please */
9062 ip6stat.ip6s_toosmall++;
9063 in6_ifstat_inc(inifp, ifs6_in_hdrerr);
9064 goto bad;
9065 }
9066 } else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip6_hdr), 0)) {
9067 struct ifnet *inifp = m->m_pkthdr.rcvif;
9068 if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
9069 ip6stat.ip6s_toosmall++;
9070 in6_ifstat_inc(inifp, ifs6_in_hdrerr);
9071 goto bad;
9072 }
9073 }
9074
9075 ip6 = mtod(m, struct ip6_hdr *);
9076
9077 if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
9078 ip6stat.ip6s_badvers++;
9079 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
9080 goto bad;
9081 }
9082
9083 /* Checks out, proceed */
9084 *mp = m;
9085 return 0;
9086
9087 bad:
9088 *mp = m;
9089 return -1;
9090 }
9091
9092 /*
9093 * the PF routines expect to be called from ip_input, so we
9094 * need to do and undo here some of the same processing.
9095 *
9096 * XXX : this is heavily inspired on bridge_pfil()
9097 */
9098 static int
9099 bridge_pf(struct mbuf **mp, struct ifnet *ifp, uint32_t sc_filter_flags,
9100 bool input)
9101 {
9102 /*
9103 * XXX : mpetit : heavily inspired by bridge_pfil()
9104 */
9105
9106 int snap, error, i, hlen;
9107 struct ether_header *eh1, eh2;
9108 struct ip *ip;
9109 struct llc llc1;
9110 u_int16_t ether_type;
9111
9112 snap = 0;
9113 error = -1; /* Default error if not error == 0 */
9114
9115 if ((sc_filter_flags & IFBF_FILT_MEMBER) == 0) {
9116 return 0; /* filtering is disabled */
9117 }
9118 i = min((*mp)->m_pkthdr.len, max_protohdr);
9119 if ((*mp)->m_len < i) {
9120 *mp = m_pullup(*mp, i);
9121 if (*mp == NULL) {
9122 BRIDGE_LOG(LOG_NOTICE, 0, "m_pullup failed");
9123 return -1;
9124 }
9125 }
9126
9127 eh1 = mtod(*mp, struct ether_header *);
9128 ether_type = ntohs(eh1->ether_type);
9129
9130 /*
9131 * Check for SNAP/LLC.
9132 */
9133 if (ether_type < ETHERMTU) {
9134 struct llc *llc2 = (struct llc *)(eh1 + 1);
9135
9136 if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
9137 llc2->llc_dsap == LLC_SNAP_LSAP &&
9138 llc2->llc_ssap == LLC_SNAP_LSAP &&
9139 llc2->llc_control == LLC_UI) {
9140 ether_type = htons(llc2->llc_un.type_snap.ether_type);
9141 snap = 1;
9142 }
9143 }
9144
9145 /*
9146 * If we're trying to filter bridge traffic, don't look at anything
9147 * other than IP and ARP traffic. If the filter doesn't understand
9148 * IPv6, don't allow IPv6 through the bridge either. This is lame
9149 * since if we really wanted, say, an AppleTalk filter, we are hosed,
9150 * but of course we don't have an AppleTalk filter to begin with.
9151 * (Note that since pfil doesn't understand ARP it will pass *ALL*
9152 * ARP traffic.)
9153 */
9154 switch (ether_type) {
9155 case ETHERTYPE_ARP:
9156 case ETHERTYPE_REVARP:
9157 return 0; /* Automatically pass */
9158
9159 case ETHERTYPE_IP:
9160 case ETHERTYPE_IPV6:
9161 break;
9162 default:
9163 /*
9164 * Check to see if the user wants to pass non-ip
9165 * packets, these will not be checked by pf and
9166 * passed unconditionally so the default is to drop.
9167 */
9168 if ((sc_filter_flags & IFBF_FILT_ONLYIP)) {
9169 goto bad;
9170 }
9171 break;
9172 }
9173
9174 /* Strip off the Ethernet header and keep a copy. */
9175 m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t)&eh2);
9176 m_adj(*mp, ETHER_HDR_LEN);
9177
9178 /* Strip off snap header, if present */
9179 if (snap) {
9180 m_copydata(*mp, 0, sizeof(struct llc), (caddr_t)&llc1);
9181 m_adj(*mp, sizeof(struct llc));
9182 }
9183
9184 /*
9185 * Check the IP header for alignment and errors
9186 */
9187 switch (ether_type) {
9188 case ETHERTYPE_IP:
9189 error = bridge_ip_checkbasic(mp);
9190 break;
9191 case ETHERTYPE_IPV6:
9192 error = bridge_ip6_checkbasic(mp);
9193 break;
9194 default:
9195 error = 0;
9196 break;
9197 }
9198 if (error) {
9199 goto bad;
9200 }
9201
9202 error = 0;
9203
9204 /*
9205 * Run the packet through pf rules
9206 */
9207 switch (ether_type) {
9208 case ETHERTYPE_IP:
9209 /*
9210 * before calling the firewall, swap fields the same as
9211 * IP does. here we assume the header is contiguous
9212 */
9213 ip = mtod(*mp, struct ip *);
9214
9215 ip->ip_len = ntohs(ip->ip_len);
9216 ip->ip_off = ntohs(ip->ip_off);
9217
9218 if (ifp != NULL) {
9219 error = pf_af_hook(ifp, 0, mp, AF_INET, input, NULL);
9220 }
9221
9222 if (*mp == NULL || error != 0) { /* filter may consume */
9223 break;
9224 }
9225
9226 /* Recalculate the ip checksum and restore byte ordering */
9227 ip = mtod(*mp, struct ip *);
9228 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
9229 if (hlen < (int)sizeof(struct ip)) {
9230 goto bad;
9231 }
9232 if (hlen > (*mp)->m_len) {
9233 if ((*mp = m_pullup(*mp, hlen)) == 0) {
9234 goto bad;
9235 }
9236 ip = mtod(*mp, struct ip *);
9237 if (ip == NULL) {
9238 goto bad;
9239 }
9240 }
9241 ip->ip_len = htons(ip->ip_len);
9242 ip->ip_off = htons(ip->ip_off);
9243 ip->ip_sum = 0;
9244 if (hlen == sizeof(struct ip)) {
9245 ip->ip_sum = in_cksum_hdr(ip);
9246 } else {
9247 ip->ip_sum = in_cksum(*mp, hlen);
9248 }
9249 break;
9250
9251 case ETHERTYPE_IPV6:
9252 if (ifp != NULL) {
9253 error = pf_af_hook(ifp, 0, mp, AF_INET6, input, NULL);
9254 }
9255
9256 if (*mp == NULL || error != 0) { /* filter may consume */
9257 break;
9258 }
9259 break;
9260 default:
9261 error = 0;
9262 break;
9263 }
9264
9265 if (*mp == NULL) {
9266 return error;
9267 }
9268 if (error != 0) {
9269 goto bad;
9270 }
9271
9272 error = -1;
9273
9274 /*
9275 * Finally, put everything back the way it was and return
9276 */
9277 if (snap) {
9278 M_PREPEND(*mp, sizeof(struct llc), M_DONTWAIT, 0);
9279 if (*mp == NULL) {
9280 return error;
9281 }
9282 bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
9283 }
9284
9285 M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT, 0);
9286 if (*mp == NULL) {
9287 return error;
9288 }
9289 bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
9290
9291 return 0;
9292
9293 bad:
9294 m_freem(*mp);
9295 *mp = NULL;
9296 return error;
9297 }
9298
9299 #if BRIDGESTP
9300 static void
9301 bridge_bstp_input_list(struct bstp_port *bp, struct mbuf *head)
9302 {
9303 mbuf_t next_packet = NULL;
9304
9305 for (mbuf_t scan = head; scan != NULL; scan = next_packet) {
9306 next_packet = scan->m_nextpkt;
9307 scan->m_nextpkt = NULL;
9308 bstp_input(bp, scan);
9309 }
9310 }
9311 #endif /* BRIDGESTP */
9312
9313 static mblist
9314 bridge_filter_arp_list(struct bridge_iflist * bif, mbuf_t m)
9315 {
9316 mbuf_t next_packet = NULL;
9317 mblist ret;
9318
9319 mblist_init(&ret);
9320 for (mbuf_ref_t scan = m; scan != NULL; scan = next_packet) {
9321 errno_t error;
9322
9323 /* take packet out of the list */
9324 next_packet = scan->m_nextpkt;
9325 scan->m_nextpkt = NULL;
9326 /* filter the ARP packet */
9327 error = bridge_host_filter_arp(bif, &scan);
9328 if (error != 0 && scan != NULL) {
9329 if (BRIDGE_DBGF_ENABLED(BR_DBGF_HOSTFILTER)) {
9330 brlog_mbuf_data(scan, 0,
9331 sizeof(struct ether_header) +
9332 sizeof(struct ip));
9333 }
9334 m_freem(scan);
9335 scan = NULL;
9336 }
9337 if (scan != NULL) {
9338 /* add it to the list */
9339 mblist_append(&ret, scan);
9340 }
9341 }
9342 return ret;
9343 }
9344
9345 static mbuf_t
9346 bridge_filter_checksum(ifnet_t bridge_ifp, struct bridge_iflist * bif, mbuf_t m,
9347 bool is_ipv4, bool host_filter, bool checksum)
9348 {
9349 uint32_t dbgf = 0;
9350 errno_t error;
9351 ip_packet_info info;
9352 u_int mac_hlen = sizeof(struct ether_header);
9353
9354 if (host_filter) {
9355 dbgf |= BR_DBGF_HOSTFILTER;
9356 }
9357 if (checksum) {
9358 dbgf |= BR_DBGF_CHECKSUM;
9359 }
9360 /* get the IP protocol header */
9361 error = bridge_get_ip_proto(&m, mac_hlen, is_ipv4, &info,
9362 &bif->bif_stats.brms_in_ip);
9363 if (error != 0) {
9364 BRIDGE_LOG(LOG_NOTICE, dbgf,
9365 "%s(%s) bridge_get_ip_proto failed %d",
9366 bridge_ifp->if_xname,
9367 bif->bif_ifp->if_xname, error);
9368 goto drop;
9369 }
9370 if (host_filter) {
9371 bool drop = true;
9372
9373 /* restrict IP protocols */
9374 switch (info.ip_proto) {
9375 case IPPROTO_ICMP:
9376 case IPPROTO_IGMP:
9377 drop = !is_ipv4;
9378 break;
9379 case IPPROTO_TCP:
9380 case IPPROTO_UDP:
9381 drop = false;
9382 break;
9383 case IPPROTO_ICMPV6:
9384 drop = is_ipv4;
9385 break;
9386 default:
9387 break;
9388 }
9389 if (drop) {
9390 BRIDGE_HF_DROP(brhf_ip_bad_proto, __func__, __LINE__);
9391 goto drop;
9392 }
9393 bridge_hostfilter_stats.brhf_ip_ok += 1;
9394 }
9395 if (checksum) {
9396 /* need to compute IP/UDP/TCP/checksums */
9397 error = bridge_offload_checksum(&m, &info, &bif->bif_stats);
9398 if (error != 0) {
9399 BRIDGE_LOG(LOG_NOTICE, dbgf,
9400 "%s(%s) bridge_offload_checksum failed %d",
9401 bridge_ifp->if_xname,
9402 bif->bif_ifp->if_xname, error);
9403 goto drop;
9404 }
9405 }
9406 return m;
9407
9408 drop:
9409 /* toss the packet */
9410 if (m != NULL) {
9411 if (host_filter &&
9412 BRIDGE_DBGF_ENABLED(BR_DBGF_HOSTFILTER)) {
9413 brlog_mbuf_data(m, 0,
9414 sizeof(struct ether_header) +
9415 sizeof(struct ip));
9416 }
9417 m_freem(m);
9418 m = NULL;
9419 }
9420 return NULL;
9421 }
9422
9423 static mblist
9424 bridge_filter_checksum_list(ifnet_t bridge_ifp, struct bridge_iflist * bif,
9425 mbuf_t in_list, ether_type_flag_t etypef, bool host_filter, bool checksum)
9426 {
9427 bool is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
9428 mbuf_t next_packet = NULL;
9429 mblist ret;
9430
9431 mblist_init(&ret);
9432 for (mbuf_t scan = in_list; scan != NULL; scan = next_packet) {
9433 /* take packet out of the list */
9434 next_packet = scan->m_nextpkt;
9435 scan->m_nextpkt = NULL;
9436 scan = bridge_filter_checksum(bridge_ifp, bif,
9437 scan, is_ipv4, host_filter, checksum);
9438 if (scan != NULL) {
9439 /* add packet to the list */
9440 mblist_append(&ret, scan);
9441 }
9442 }
9443 return ret;
9444 }
9445
9446 static mbuf_t
9447 bridge_checksum_offload_list(ifnet_t bridge_ifp, struct bridge_iflist * bif,
9448 mbuf_t m, bool is_ipv4)
9449 {
9450 mblist ret;
9451 mbuf_t next_packet;
9452
9453 mblist_init(&ret);
9454 for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
9455 uint32_t csum_flags;
9456
9457 /* take it out of the list */
9458 next_packet = scan->m_nextpkt;
9459 scan->m_nextpkt = NULL;
9460
9461 csum_flags = scan->m_pkthdr.csum_flags;
9462 if ((csum_flags & checksum_request_flags) != 0) {
9463 /* compute the checksum now */
9464 scan = bridge_filter_checksum(bridge_ifp, bif, scan,
9465 is_ipv4, false, true);
9466 if (scan != NULL) {
9467 /* clear offload now */
9468 scan->m_pkthdr.csum_flags &= csum_flags;
9469 }
9470 }
9471 if (scan != NULL) {
9472 mblist_append(&ret, scan);
9473 }
9474 }
9475 return ret.head;
9476 }
9477
9478 static mbuf_t
9479 copy_broadcast_packet(mbuf_t m)
9480 {
9481 mbuf_t mc;
9482
9483 /* make a copy of the packet */
9484 mc = m_dup(m, M_DONTWAIT);
9485 if (mc != NULL) {
9486 struct ether_header *eh;
9487
9488 /* make copy look like it is broadcast */
9489 mc->m_flags |= M_BCAST;
9490 eh = mtod(mc, struct ether_header *);
9491 bcopy(etherbroadcastaddr, eh->ether_dhost, ETHER_ADDR_LEN);
9492 }
9493 return mc;
9494 }
9495
9496 static mblist
9497 bridge_find_broadcast_ipv4(mbuf_t in_list, mbuf_t * ip_bcast_head)
9498 {
9499 mblist ip_bcast;
9500 mbuf_t next_packet = NULL;
9501 mblist ret;
9502
9503 mblist_init(&ret);
9504 mblist_init(&ip_bcast);
9505 for (mbuf_ref_t scan = in_list; scan != NULL; scan = next_packet) {
9506 mbuf_t bcast_pkt = NULL;
9507 uint8_t *header;
9508
9509 /* take packet out of the list */
9510 next_packet = scan->m_nextpkt;
9511 scan->m_nextpkt = NULL;
9512
9513 header = get_ether_ip_header_ptr(&scan, FALSE);
9514 if (header != NULL) {
9515 struct in_addr dst;
9516 struct ip *iphdr;
9517
9518 iphdr = (struct ip *)(header + sizeof(struct ether_header));
9519 bcopy(&iphdr->ip_dst, &dst, sizeof(dst));
9520 if (dst.s_addr == INADDR_BROADCAST) {
9521 bcast_pkt = copy_broadcast_packet(scan);
9522 }
9523 }
9524 if (bcast_pkt != NULL) {
9525 /* add packet to broadcast list */
9526 mblist_append(&ip_bcast, bcast_pkt);
9527 }
9528 if (scan != NULL) {
9529 /* add packet back into the list */
9530 mblist_append(&ret, scan);
9531 }
9532 }
9533 *ip_bcast_head = ip_bcast.head;
9534 return ret;
9535 }
9536
9537 static ifnet_t
9538 bridge_find_member(struct bridge_softc * sc, uint8_t * lladdr,
9539 struct bridge_iflist * sbif)
9540 {
9541 struct bridge_iflist * bif;
9542
9543 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
9544 if (bif == sbif) {
9545 /* skip the input member */
9546 continue;
9547 }
9548 if (_ether_cmp(IF_LLADDR(bif->bif_ifp), lladdr) == 0) {
9549 return bif->bif_ifp;
9550 }
9551 }
9552 return NULL;
9553 }
9554
9555
9556 /*
9557 * Function: bridge_input_list
9558 *
9559 * Purpose:
9560 * Process a list of input packets through the bridge.
9561 * The caller ensures that all of the packets in the list
9562 * `list_head` .. `list_tail` have the same ethernet header.
9563 *
9564 * Returns:
9565 * Non-NULL head of the chain of packets that were not consumed/freed,
9566 * *tail_p set to the tail of that chain.
9567 *
9568 * NULL if all of the packets were consumed.
9569 */
9570 static mblist
9571 bridge_input_list(struct bridge_softc * sc, ifnet_t ifp,
9572 struct ether_header * eh_in_p, mblist list, bool is_promisc)
9573 {
9574 struct bridge_iflist * bif;
9575 ifnet_t bridge_ifp;
9576 bool bridge_needs_input;
9577 bool checksum_offload;
9578 uint8_t * dhost;
9579 #if BRIDGESTP
9580 bool discarding = false;
9581 #endif /* BRIDGESTP */
9582 ifnet_t dst_if = NULL;
9583 errno_t error;
9584 ether_type_flag_t etypef;
9585 bool host_filter;
9586 bool host_filter_drop = false;
9587 mbuf_ref_t ip_bcast = NULL;
9588 bool is_bridge_mac = false;
9589 bool is_broadcast;
9590 bool is_ifp_mac;
9591 ifnet_t member_input = NULL;
9592 uint8_t * shost;
9593 bool uses_virtio = false;
9594 uint16_t vlan;
9595
9596 if (ifp->if_bridge == NULL) {
9597 /* no longer part of bridge */
9598 goto done;
9599 }
9600 bridge_ifp = sc->sc_ifp;
9601 is_broadcast = IS_BCAST_MCAST(list.head);
9602 is_ifp_mac = (!is_broadcast && !is_promisc);
9603 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9604 "%s from %s count %d head 0x%llx.0x%llx tail 0x%llx.0x%llx",
9605 bridge_ifp->if_xname, ifp->if_xname, list.count,
9606 (uint64_t)VM_KERNEL_ADDRPERM(list.head),
9607 (uint64_t)VM_KERNEL_ADDRPERM(mtod(list.head, void *)),
9608 (uint64_t)VM_KERNEL_ADDRPERM(list.tail),
9609 (uint64_t)VM_KERNEL_ADDRPERM(mtod(list.tail, void *)));
9610
9611 /* assume we'll return all packets */
9612 if ((bridge_ifp->if_flags & IFF_RUNNING) == 0) {
9613 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9614 "%s not running passing along",
9615 bridge_ifp->if_xname);
9616 goto done;
9617 }
9618
9619 vlan = VLANTAGOF(m);
9620
9621 /* lookup the bridge member */
9622 BRIDGE_LOCK(sc);
9623 bif = bridge_lookup_member_if(sc, ifp);
9624 if (bif == NULL) {
9625 BRIDGE_UNLOCK(sc);
9626 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9627 "%s bridge_lookup_member_if failed",
9628 bridge_ifp->if_xname);
9629 goto done;
9630 }
9631
9632 uses_virtio = bif_uses_virtio(bif);
9633
9634 /*
9635 * host filter drops packets that:
9636 * - are not ARP, IPv4, or IPv6
9637 * - have incorrect source MAC address
9638 */
9639 host_filter = (bif->bif_flags & BIFF_HOST_FILTER) != 0;
9640 etypef = ether_type_flag_get(eh_in_p->ether_type);
9641 if (host_filter
9642 && (etypef & ETHER_TYPE_FLAG_IP_ARP) == 0) {
9643 /* ether type not one of ARP, IPv4, or IPv6 */
9644 BRIDGE_HF_DROP(brhf_bad_ether_type, __func__, __LINE__);
9645 host_filter_drop = true;
9646 } else if ((bif->bif_flags & BIFF_HF_HWSRC) != 0 &&
9647 bcmp(eh_in_p->ether_shost, bif->bif_hf_hwsrc, ETHER_ADDR_LEN)
9648 != 0) {
9649 /* only allow the single source MAC address */
9650 BRIDGE_HF_DROP(brhf_bad_ether_srchw_addr,
9651 __func__, __LINE__);
9652 host_filter_drop = true;
9653 }
9654 if (host_filter_drop) {
9655 BRIDGE_UNLOCK(sc);
9656 m_freem_list(list.head);
9657 list.head = list.tail = NULL;
9658 goto done;
9659 }
9660
9661 #if BRIDGESTP
9662 discarding = (bif->bif_ifflags & IFBIF_STP) != 0 &&
9663 bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING;
9664 #endif /* BRIDGESTP */
9665
9666 dhost = eh_in_p->ether_dhost;
9667 shost = eh_in_p->ether_shost;
9668 /*
9669 * Reserved multicast address listed in 802.1D section 7.12.6
9670 * must not be forwarded by the bridge.
9671 * This is currently 01-80-C2-00-00-00 to 01-80-C2-00-00-0F
9672 */
9673 if (is_broadcast) {
9674 if (IS_MCAST(list.head)) {
9675 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MCAST,
9676 " multicast: "
9677 "%02x:%02x:%02x:%02x:%02x:%02x",
9678 dhost[0], dhost[1],
9679 dhost[2], dhost[3],
9680 dhost[4], dhost[5]);
9681 }
9682 if (bcmp(dhost, bstp_etheraddr, (ETHER_ADDR_LEN - 1)) == 0) {
9683 if (dhost[5] == BSTP_ETHERADDR_RANGE_FIRST) {
9684 /* multicast for spanning tree */
9685 #if BRIDGESTP
9686 bridge_bstp_input_list(&bif->bif_stp, list.head);
9687 #else /* BRIDGESTP */
9688 m_freem_list(list.head);
9689 #endif /* BRIDGESTP */
9690 list.head = list.tail = NULL;
9691 BRIDGE_UNLOCK(sc);
9692 goto done;
9693 }
9694 if (dhost[5] <= BSTP_ETHERADDR_RANGE_LAST) {
9695 /* allow packet to continue up the stack */
9696 BRIDGE_UNLOCK(sc);
9697 goto done;
9698 }
9699 }
9700 /* broadcast to all members */
9701 os_atomic_add(&bridge_ifp->if_imcasts, list.count, relaxed);
9702 }
9703
9704 #if BRIDGESTP
9705 if (discarding) {
9706 BRIDGE_UNLOCK(sc);
9707 goto done;
9708 }
9709 #endif /* BRIDGESTP */
9710
9711 /* If the interface is learning, record the address. */
9712 if ((bif->bif_ifflags & IFBIF_LEARNING) != 0) {
9713 error = bridge_rtupdate(sc, shost, vlan, bif, 0, IFBAF_DYNAMIC);
9714 /*
9715 * If the interface has addresses limits then deny any source
9716 * that is not in the cache.
9717 */
9718 if (error != 0 && bif->bif_addrmax) {
9719 BRIDGE_UNLOCK(sc);
9720 goto done;
9721 }
9722 }
9723 #if BRIDGESTP
9724 if ((bif->bif_ifflags & IFBIF_STP) != 0 &&
9725 bif->bif_stp.bp_state == BSTP_IFSTATE_LEARNING) {
9726 BRIDGE_UNLOCK(sc);
9727 goto done;
9728 }
9729 #endif /* BRIDGESTP */
9730
9731 /*
9732 * If the packet is not IP, let the host filter drop ARP packets.
9733 * Otherwise, if the host filter is enabled or we need to compute
9734 * checksums, do that.
9735 * Otherwise, if MAC-NAT is enabled and this is an IPv4 packet,
9736 * check for IPv4 broadcast packets. Accumulate those in a separate
9737 * list `ip_bcast`.
9738 */
9739 checksum_offload = bif_has_checksum_offload(bif);
9740 if (!ether_type_flag_is_ip(etypef)) {
9741 /* host filter process ARP */
9742 if (host_filter) {
9743 /* host filter check earlier means this must be ARP */
9744 VERIFY(etypef == ETHER_TYPE_FLAG_ARP);
9745 list = bridge_filter_arp_list(bif, list.head);
9746 if (list.head == NULL) {
9747 VERIFY(list.tail == NULL);
9748 BRIDGE_UNLOCK(sc);
9749 goto done;
9750 }
9751 }
9752 } else if (host_filter || checksum_offload) {
9753 /* host filter and/or checksum */
9754 list = bridge_filter_checksum_list(bridge_ifp, bif,
9755 list.head, etypef, host_filter, checksum_offload);
9756 if (list.head == NULL) {
9757 VERIFY(list.tail == NULL);
9758 BRIDGE_UNLOCK(sc);
9759 goto done;
9760 }
9761 } else if (is_ifp_mac && bif == sc->sc_mac_nat_bif &&
9762 etypef == ETHER_TYPE_FLAG_IPV4) {
9763 /* look for broadcast IPv4 packet */
9764 list = bridge_find_broadcast_ipv4(list.head, &ip_bcast);
9765 if (list.head == NULL && ip_bcast == NULL) {
9766 /* all packets were consumed */
9767 BRIDGE_UNLOCK(sc);
9768 goto done;
9769 }
9770 }
9771
9772 /*
9773 * If the bridge has ULP attached, and the destination MAC
9774 * matches the bridge interface, claim the packets for the bridge
9775 * interface.
9776 */
9777 bridge_needs_input = (sc->sc_flags & SCF_PROTO_ATTACHED) != 0;
9778 if (bridge_needs_input &&
9779 !is_broadcast && _ether_cmp(dhost, IF_LLADDR(bridge_ifp)) == 0) {
9780 is_bridge_mac = true;
9781 }
9782 if (is_ifp_mac) {
9783 /* unicast to the interface */
9784 if (sc->sc_mac_nat_bif == bif) {
9785 mbuf_ref_t forward = NULL;
9786
9787 if (list.head != NULL) {
9788 /* handle MAC-NAT if enabled */
9789 list = bridge_mac_nat_input_list(sc, ifp,
9790 list.head, &forward);
9791 }
9792 if (ip_bcast != NULL) {
9793 /* forward to all members except this one */
9794 /* bridge_broadcast_list unlocks */
9795 bridge_broadcast_list(sc, bif, etypef,
9796 ip_bcast, pkt_direction_RX);
9797 } else {
9798 BRIDGE_UNLOCK(sc);
9799 }
9800 if (forward != NULL) {
9801 bridge_mac_nat_forward_list(bridge_ifp, etypef,
9802 forward);
9803 }
9804 } else {
9805 BRIDGE_UNLOCK(sc);
9806 }
9807 /* unicast packets for this interface do not get forwarded */
9808 goto done;
9809 }
9810 if (is_bridge_mac || list.head == NULL) {
9811 BRIDGE_UNLOCK(sc);
9812 goto done;
9813 }
9814 if (!is_broadcast) {
9815 /* find where to send the packet */
9816 dst_if = bridge_rtlookup(sc, dhost, vlan);
9817 if (ifp == dst_if) {
9818 /* nothing to forward */
9819 BRIDGE_UNLOCK(sc);
9820 goto done;
9821 }
9822 if (dst_if == NULL) {
9823 /* if a member is the dhost, deliver as input */
9824 member_input = bridge_find_member(sc, dhost, bif);
9825 if (member_input != NULL) {
9826 /* grab packets destined to member */
9827 BRIDGE_UNLOCK(sc);
9828 goto done;
9829 }
9830 /* if a member is shost, there's a loop, drop it */
9831 if (bridge_find_member(sc, shost, bif) != NULL) {
9832 BRIDGE_UNLOCK(sc);
9833 m_freem_list(list.head);
9834 list.head = list.tail = NULL;
9835 goto done;
9836 }
9837 }
9838 }
9839 if (dst_if == NULL) {
9840 mbuf_t m;
9841
9842 m = copy_packet_list(list.head);
9843 if (m != NULL) {
9844 /* bridge_broadcast_list unlocks */
9845 bridge_broadcast_list(sc, bif, etypef, m,
9846 pkt_direction_RX);
9847 } else {
9848 BRIDGE_UNLOCK(sc);
9849 }
9850 } else {
9851 /* bridge_forward_list() consumes list and unlocks */
9852 bridge_forward_list(sc, bif, dst_if, etypef, list.head);
9853 list.head = list.tail = NULL;
9854 }
9855
9856 done:
9857 if (list.head != NULL) {
9858 if (member_input != NULL) {
9859 /* member gets the packets */
9860 inject_input_packet_list(member_input, list.head, true);
9861 list.head = list.tail = NULL;
9862 } else if (is_bridge_mac) {
9863 /* bridge consumes all the unicast packets */
9864 bridge_interface_input_list(bridge_ifp, etypef, list,
9865 uses_virtio);
9866 list.head = list.tail = NULL;
9867 } else {
9868 adjust_input_packet_list(list.head);
9869 }
9870 }
9871 return list;
9872 }
9873
9874 static inline void
9875 update_mbuf_flags(struct ifnet * ifp, mbuf_t m, struct ether_header * eh)
9876 {
9877 /* duplicate some of the work done in ether_demux */
9878 if ((eh->ether_dhost[0] & 1) == 0) {
9879 if (_ether_cmp(eh->ether_dhost, IF_LLADDR(ifp)) != 0) {
9880 m->m_flags |= M_PROMISC;
9881 }
9882 } else {
9883 /* Check for broadcast */
9884 if (_ether_cmp(etherbroadcastaddr, eh->ether_dhost) == 0) {
9885 m->m_flags |= M_BCAST;
9886 } else {
9887 m->m_flags |= M_MCAST;
9888 }
9889 }
9890 if (m->m_flags & M_HASFCS) {
9891 /*
9892 * If the M_HASFCS is set by the driver we want to make sure
9893 * that we strip off the trailing FCS data before handing it
9894 * up the stack.
9895 */
9896 m_adj(m, -ETHER_CRC_LEN);
9897 m->m_flags &= ~M_HASFCS;
9898 }
9899 return;
9900 }
9901
9902 static mbuf_t
9903 bridge_pf_list(mbuf_t m, ifnet_t ifp, uint32_t sc_filter_flags, bool input)
9904 {
9905 mbuf_t next_packet = NULL;
9906 mblist ret;
9907
9908 mblist_init(&ret);
9909 for (mbuf_ref_t scan = m; scan != NULL; scan = next_packet) {
9910 next_packet = scan->m_nextpkt;
9911
9912 /* remove packet from list, and pass through PF */
9913 scan->m_nextpkt = NULL;
9914 MBUF_INPUT_CHECK(scan, ifp);
9915 bridge_pf(&scan, ifp, sc_filter_flags, input);
9916 if (scan != NULL) {
9917 /* add packet back to the list */
9918 mblist_append(&ret, scan);
9919 }
9920 }
9921 return ret.head;
9922 }
9923
9924 static inline bool
9925 bridge_check_frame_header(struct bridge_softc * sc, ifnet_t ifp, mbuf_t m)
9926 {
9927 bool included = false;
9928 char * __single header;
9929 size_t header_length = 0;
9930
9931 header = m->m_pkthdr.pkt_hdr;
9932 if (header >= (char *)mbuf_datastart(m) &&
9933 header <= mtod(m, char *)) {
9934 header_length = mtod(m, char *) - header;
9935 if (header_length >= ETHER_HDR_LEN) {
9936 included = true;
9937 }
9938 }
9939 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9940 "%s from %s m 0x%llx data 0x%llx frame 0x%llx %s "
9941 "header length %lu", sc->sc_ifp->if_xname,
9942 ifp->if_xname, (uint64_t)VM_KERNEL_ADDRPERM(m),
9943 (uint64_t)VM_KERNEL_ADDRPERM(mtod(m, void *)),
9944 (uint64_t)VM_KERNEL_ADDRPERM(header),
9945 included ? "inside" : "outside", header_length);
9946 if (!included) {
9947 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9948 "%s: frame_header outside mbuf", ifp->if_xname);
9949 }
9950 return included;
9951 }
9952
9953
9954 mbuf_t
9955 bridge_early_input(struct ifnet *ifp, mbuf_t in_list, u_int32_t cnt)
9956 {
9957 struct ether_header eh;
9958 mblist list;
9959 volatile bool list_is_promisc;
9960 int n_lists = 0;
9961 mbuf_t next_packet = NULL;
9962 mblist ret;
9963 struct bridge_softc * __single sc = ifp->if_bridge;
9964 uint32_t sc_filter_flags;
9965
9966 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT_LIST,
9967 "(%s): count %u", ifp->if_xname, cnt);
9968
9969 /* run packet list through PF first */
9970 sc_filter_flags = sc->sc_filter_flags;
9971 if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
9972 in_list = bridge_pf_list(in_list, ifp, sc_filter_flags, true);
9973 }
9974
9975 /* form sublists with the same ethernet header */
9976 mblist_init(&list);
9977 mblist_init(&ret);
9978 for (mbuf_t scan = in_list; scan != NULL; scan = next_packet) {
9979 struct ether_header * eh_p;
9980 volatile bool is_promisc;
9981 mblist resid;
9982
9983 /* take it out of the list */
9984 next_packet = scan->m_nextpkt;
9985 scan->m_nextpkt = NULL;
9986
9987 /* don't loop the packet */
9988 if ((scan->m_flags & M_PROTO1) != 0) {
9989 mblist_append(&ret, scan);
9990 continue;
9991 }
9992 /* Check if this mbuf looks valid */
9993 MBUF_INPUT_CHECK(scan, ifp);
9994
9995 /* if the frame header isn't in the first mbuf, ignore */
9996 if (!bridge_check_frame_header(sc, ifp, scan)) {
9997 mblist_append(&ret, scan);
9998 continue;
9999 }
10000 eh_p = __unsafe_forge_single(struct ether_header *,
10001 scan->m_pkthdr.pkt_hdr);
10002 update_mbuf_flags(ifp, scan, eh_p);
10003
10004 /* set start back to include ether header */
10005 _mbuf_adjust_pkthdr_and_data(scan, -ETHER_HDR_LEN);
10006
10007 is_promisc = get_and_clear_promisc(scan);
10008 if (list.head == NULL) {
10009 /* start a new list */
10010 mblist_append(&list, scan);
10011 bcopy(eh_p, &eh, sizeof(eh));
10012 list_is_promisc = is_promisc;
10013 } else if (bcmp(eh_p, &eh, sizeof(eh)) != 0) {
10014 n_lists++;
10015 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT_LIST,
10016 "(%s): sublist %u pkts %u",
10017 ifp->if_xname, n_lists, list.count);
10018 if (BRIDGE_DBGF_ENABLED(BR_DBGF_INPUT_LIST)) {
10019 brlog_ether_header(&eh);
10020 }
10021 resid = bridge_input_list(sc, ifp, &eh, list,
10022 list_is_promisc);
10023 if (resid.head != NULL) {
10024 /* add to the packets to be returned */
10025 mblist_append_list(&ret, resid);
10026 }
10027 /* start new list */
10028 mblist_init(&list);
10029 mblist_append(&list, scan);
10030 list_is_promisc = is_promisc;
10031 bcopy(eh_p, &eh, sizeof(eh));
10032 } else {
10033 mblist_append(&list, scan);
10034 VERIFY(is_promisc == list_is_promisc);
10035 }
10036 if (next_packet == NULL) {
10037 /* last list */
10038 n_lists++;
10039 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT_LIST,
10040 "(%s): sublist %u pkts %u",
10041 ifp->if_xname, n_lists, list.count);
10042 if (BRIDGE_DBGF_ENABLED(BR_DBGF_INPUT_LIST)) {
10043 brlog_ether_header(&eh);
10044 }
10045 resid = bridge_input_list(sc, ifp, &eh, list,
10046 list_is_promisc);
10047 if (resid.head != NULL) {
10048 /* add to the packets to be returned */
10049 mblist_append_list(&ret, resid);
10050 }
10051 }
10052 }
10053 return ret.head;
10054 }
10055
10056 /*
10057 * Copyright (C) 2014, Stefano Garzarella - Universita` di Pisa.
10058 * All rights reserved.
10059 *
10060 * Redistribution and use in source and binary forms, with or without
10061 * modification, are permitted provided that the following conditions
10062 * are met:
10063 * 1. Redistributions of source code must retain the above copyright
10064 * notice, this list of conditions and the following disclaimer.
10065 * 2. Redistributions in binary form must reproduce the above copyright
10066 * notice, this list of conditions and the following disclaimer in the
10067 * documentation and/or other materials provided with the distribution.
10068 *
10069 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
10070 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
10071 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
10072 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
10073 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
10074 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
10075 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
10076 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
10077 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
10078 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
10079 * SUCH DAMAGE.
10080 */
10081
10082 /*
10083 * XXX-ste: Maybe this function must be moved into kern/uipc_mbuf.c
10084 *
10085 * Create a queue of packets/segments which fit the given mss + hdr_len.
10086 * m0 points to mbuf chain to be segmented.
10087 * This function splits the payload (m0-> m_pkthdr.len - hdr_len)
10088 * into segments of length MSS bytes and then copy the first hdr_len bytes
10089 * from m0 at the top of each segment.
10090 * If hdr2_buf is not NULL (hdr2_len is the buf length), it is copied
10091 * in each segment after the first hdr_len bytes
10092 *
10093 * Return the new queue with the segments on success, NULL on failure.
10094 * (the mbuf queue is freed in this case).
10095 */
10096
10097 static mblist
10098 m_seg(struct mbuf *m0, int hdr_len, int mss, char * hdr2_buf __sized_by_or_null(hdr2_len), int hdr2_len)
10099 {
10100 int off = 0, n, firstlen;
10101 struct mbuf *mseg;
10102 int total_len = m0->m_pkthdr.len;
10103 mblist ret;
10104
10105 mblist_init(&ret);
10106 mblist_append(&ret, m0);
10107
10108 /*
10109 * Segmentation useless
10110 */
10111 if (total_len <= hdr_len + mss) {
10112 n = 1;
10113 goto done;
10114 }
10115 if (hdr2_buf == NULL || hdr2_len <= 0) {
10116 hdr2_buf = NULL;
10117 hdr2_len = 0;
10118 }
10119
10120 off = hdr_len + mss;
10121 firstlen = mss; /* first segment stored in the original mbuf */
10122 ret.bytes = off;
10123 for (n = 1; off < total_len; off += mss, n++) {
10124 struct mbuf *m;
10125 /*
10126 * Copy the header from the original packet
10127 * and create a new mbuf chain
10128 */
10129 if (MHLEN < hdr_len) {
10130 m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
10131 } else {
10132 m = m_gethdr(M_NOWAIT, MT_DATA);
10133 }
10134
10135 if (m == NULL) {
10136 #ifdef GSO_DEBUG
10137 D("MGETHDR error\n");
10138 #endif
10139 goto err;
10140 }
10141
10142 m_copydata(m0, 0, hdr_len, mtod(m, caddr_t));
10143
10144 m->m_len = hdr_len;
10145 /*
10146 * if the optional header is present, copy it
10147 */
10148 if (hdr2_buf != NULL) {
10149 m_copyback(m, hdr_len, hdr2_len, hdr2_buf);
10150 }
10151
10152 m->m_flags |= (m0->m_flags & M_COPYFLAGS);
10153 if (off + mss >= total_len) { /* last segment */
10154 mss = total_len - off;
10155 }
10156 /*
10157 * Copy the payload from original packet
10158 */
10159 mseg = m_copym(m0, off, mss, M_NOWAIT);
10160 if (mseg == NULL) {
10161 m_freem(m);
10162 #ifdef GSO_DEBUG
10163 D("m_copym error\n");
10164 #endif
10165 goto err;
10166 }
10167 m_cat(m, mseg);
10168
10169 m->m_pkthdr.len = hdr_len + hdr2_len + mss;
10170 m->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
10171 /*
10172 * Copy the checksum flags and data (in_cksum() need this)
10173 */
10174 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
10175 m->m_pkthdr.csum_data = m0->m_pkthdr.csum_data;
10176 m->m_pkthdr.tso_segsz = m0->m_pkthdr.tso_segsz;
10177
10178 mblist_append(&ret, m);
10179 }
10180
10181 /*
10182 * Update first segment.
10183 * If the optional header is present, is necessary
10184 * to insert it into the first segment.
10185 */
10186 if (hdr2_buf == NULL) {
10187 m_adj(m0, hdr_len + firstlen - total_len);
10188 m0->m_pkthdr.len = hdr_len + firstlen;
10189 } else {
10190 mseg = m_copym(m0, hdr_len, firstlen, M_NOWAIT);
10191 if (mseg == NULL) {
10192 #ifdef GSO_DEBUG
10193 D("m_copym error\n");
10194 #endif
10195 goto err;
10196 }
10197 m_adj(m0, hdr_len - total_len);
10198 m_copyback(m0, hdr_len, hdr2_len, hdr2_buf);
10199 m_cat(m0, mseg);
10200 m0->m_pkthdr.len = hdr_len + hdr2_len + firstlen;
10201 }
10202
10203 done:
10204 return ret;
10205
10206 err:
10207 if (ret.head != NULL) {
10208 m_freem_list(ret.head);
10209 mblist_init(&ret);
10210 }
10211 return ret;
10212 }
10213
10214 /*
10215 * Wrappers of IPv4 checksum functions
10216 */
10217 static inline void
10218 gso_ipv4_data_cksum(struct mbuf *m, struct ip *ip, int mac_hlen)
10219 {
10220 m->m_data += mac_hlen;
10221 m->m_len -= mac_hlen;
10222 m->m_pkthdr.len -= mac_hlen;
10223 #if __FreeBSD_version < 1000000
10224 ip->ip_len = ntohs(ip->ip_len); /* needed for in_delayed_cksum() */
10225 #endif
10226
10227 in_delayed_cksum(m);
10228
10229 #if __FreeBSD_version < 1000000
10230 ip->ip_len = htons(ip->ip_len);
10231 #endif
10232 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
10233 m->m_len += mac_hlen;
10234 m->m_pkthdr.len += mac_hlen;
10235 m->m_data -= mac_hlen;
10236 }
10237
10238 static inline void
10239 gso_ipv4_hdr_cksum(struct mbuf *m, struct ip *ip, int mac_hlen, int ip_hlen)
10240 {
10241 m->m_data += mac_hlen;
10242
10243 ip->ip_sum = in_cksum(m, ip_hlen);
10244
10245 m->m_pkthdr.csum_flags &= ~CSUM_IP;
10246 m->m_data -= mac_hlen;
10247 }
10248
10249 /*
10250 * Structure that contains the state during the TCP segmentation
10251 */
10252 struct gso_ip_tcp_state {
10253 void (*update)
10254 (struct gso_ip_tcp_state*, struct mbuf*);
10255 void (*internal)
10256 (struct gso_ip_tcp_state*, struct mbuf*);
10257 u_int ip_m0_len;
10258 uint8_t * __counted_by(ip_m0_len) hdr;
10259 struct tcphdr *tcp;
10260 int mac_hlen;
10261 int ip_hlen;
10262 int tcp_hlen;
10263 int hlen;
10264 int pay_len;
10265 int sw_csum;
10266 uint32_t tcp_seq;
10267 uint16_t ip_id;
10268 boolean_t is_tx;
10269 };
10270
10271 /*
10272 * Update the pointers to TCP and IPv4 headers
10273 */
10274 static inline void
10275 gso_ipv4_tcp_update(struct gso_ip_tcp_state *state, struct mbuf *m)
10276 {
10277 state->hdr = mtodo(m, state->mac_hlen);
10278 state->ip_m0_len = m->m_len - state->mac_hlen;
10279 state->ip_hlen = state->ip_hlen;
10280 state->tcp = (struct tcphdr *)(state->hdr + state->ip_hlen);
10281 state->pay_len = m->m_pkthdr.len - state->hlen;
10282 }
10283
10284 /*
10285 * Set properly the TCP and IPv4 headers
10286 */
10287 static inline void
10288 gso_ipv4_tcp_internal(struct gso_ip_tcp_state *state, struct mbuf *m)
10289 {
10290 struct ip *ip;
10291 /*
10292 * Update IP header
10293 */
10294 ip = (struct ip *)state->hdr;
10295 ip->ip_id = htons((state->ip_id)++);
10296 ip->ip_len = htons(m->m_pkthdr.len - state->mac_hlen);
10297 /*
10298 * TCP Checksum
10299 */
10300 state->tcp->th_sum = 0;
10301 state->tcp->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
10302 htons(state->tcp_hlen + IPPROTO_TCP + state->pay_len));
10303 /*
10304 * Checksum HW not supported (TCP)
10305 */
10306 if (state->sw_csum & CSUM_DELAY_DATA) {
10307 gso_ipv4_data_cksum(m, ip, state->mac_hlen);
10308 }
10309
10310 state->tcp_seq += state->pay_len;
10311 /*
10312 * IP Checksum
10313 */
10314 ip->ip_sum = 0;
10315 /*
10316 * Checksum HW not supported (IP)
10317 */
10318 if (state->sw_csum & CSUM_IP) {
10319 gso_ipv4_hdr_cksum(m, ip, state->mac_hlen, state->ip_hlen);
10320 }
10321 }
10322
10323
10324 /*
10325 * Updates the pointers to TCP and IPv6 headers
10326 */
10327 static inline void
10328 gso_ipv6_tcp_update(struct gso_ip_tcp_state *state, struct mbuf *m)
10329 {
10330 state->hdr = mtodo(m, state->mac_hlen);
10331 state->ip_m0_len = m->m_len - state->mac_hlen;
10332 state->ip_hlen = state->ip_hlen;
10333 state->tcp = (struct tcphdr *)(state->hdr + state->ip_hlen);
10334 state->pay_len = m->m_pkthdr.len - state->hlen;
10335 }
10336
10337 /*
10338 * Sets properly the TCP and IPv6 headers
10339 */
10340 static inline void
10341 gso_ipv6_tcp_internal(struct gso_ip_tcp_state *state, struct mbuf *m)
10342 {
10343 struct ip6_hdr *ip6;
10344
10345 ip6 = (struct ip6_hdr *)state->hdr;
10346 ip6->ip6_plen = htons(m->m_pkthdr.len - state->mac_hlen - state->ip_hlen);
10347 /*
10348 * TCP Checksum
10349 */
10350 state->tcp->th_sum = 0;
10351 state->tcp->th_sum = in6_pseudo(&ip6->ip6_src, &ip6->ip6_dst,
10352 htonl(state->tcp_hlen + state->pay_len + IPPROTO_TCP));
10353 /*
10354 * Checksum HW not supported (TCP)
10355 */
10356 if (state->sw_csum & CSUM_DELAY_IPV6_DATA) {
10357 (void)in6_finalize_cksum(m, state->mac_hlen, -1, -1, state->sw_csum);
10358 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA;
10359 }
10360 state->tcp_seq += state->pay_len;
10361 }
10362
10363 /*
10364 * Init the state during the TCP segmentation
10365 */
10366 static void
10367 gso_ip_tcp_init_state(struct gso_ip_tcp_state *state, struct ifnet *ifp,
10368 bool is_ipv4, int mac_hlen, int ip_hlen,
10369 uint8_t *__counted_by(ip_m0_len) ip_hdr, u_int ip_m0_len,
10370 struct tcphdr * tcp_hdr)
10371 {
10372 #pragma unused(ifp)
10373
10374 state->hdr = ip_hdr;
10375 state->ip_m0_len = ip_m0_len;
10376 state->ip_hlen = ip_hlen;
10377 state->tcp = tcp_hdr;
10378 if (is_ipv4) {
10379 state->ip_id = ntohs(((struct ip *)state->hdr)->ip_id);
10380 state->update = gso_ipv4_tcp_update;
10381 state->internal = gso_ipv4_tcp_internal;
10382 state->sw_csum = CSUM_DELAY_DATA | CSUM_IP; /* XXX */
10383 } else {
10384 state->update = gso_ipv6_tcp_update;
10385 state->internal = gso_ipv6_tcp_internal;
10386 state->sw_csum = CSUM_DELAY_IPV6_DATA; /* XXX */
10387 }
10388 state->mac_hlen = mac_hlen;
10389 state->tcp_hlen = state->tcp->th_off << 2;
10390 state->hlen = mac_hlen + ip_hlen + state->tcp_hlen;
10391 state->tcp_seq = ntohl(state->tcp->th_seq);
10392 //state->sw_csum = m->m_pkthdr.csum_flags & ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
10393 return;
10394 }
10395
10396 /*
10397 * GSO on TCP/IP (v4 or v6)
10398 *
10399 * Segment the given mbuf and return the list of packets.
10400 *
10401 */
10402 static mblist
10403 gso_ip_tcp(ifnet_t ifp, mbuf_t m0, struct gso_ip_tcp_state *state, bool is_tx)
10404 {
10405 struct mbuf *m;
10406 int orig_mss;
10407 int mss = 0;
10408 #ifdef GSO_STATS
10409 int total_len = m0->m_pkthdr.len;
10410 #endif /* GSO_STATS */
10411 mblist seg;
10412 bool tso_with_gso = false;
10413
10414 orig_mss = mss = _mbuf_get_tso_mss(m0);
10415 if (mss == 0 && !is_tx) {
10416 uint8_t seg_cnt = m0->m_pkthdr.rx_seg_cnt;
10417
10418 if (seg_cnt != 0) {
10419 uint32_t hdr_len;
10420 uint32_t len;
10421
10422 /* approximate the MSS using LRO seg cnt */
10423 hdr_len = state->ip_hlen + state->tcp_hlen;
10424 len = mbuf_pkthdr_len(m0) - hdr_len - ETHER_HDR_LEN;
10425 mss = len / seg_cnt;
10426 m0->m_pkthdr.rx_seg_cnt = 0;
10427 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
10428 "%s: mss %d = len %d / seg cnt %d",
10429 ifp->if_xname, mss, len, seg_cnt);
10430 }
10431 }
10432 if (mss == 0) {
10433 /* hack: we don't have the actual MSS */
10434 u_int reduce_mss;
10435
10436 reduce_mss = is_tx ? if_bridge_tso_reduce_mss_tx
10437 : if_bridge_tso_reduce_mss_forwarding;
10438 mss = ifp->if_mtu - state->ip_hlen - state->tcp_hlen -
10439 reduce_mss;
10440 assert(mss > 0);
10441 } else if (is_tx) {
10442 bool is_ipv4;
10443 bool do_tso = true;
10444
10445 if (TSO_IPV4_OK(ifp, m0)) {
10446 is_ipv4 = true;
10447 } else if (TSO_IPV6_OK(ifp, m0)) {
10448 is_ipv4 = false;
10449 } else {
10450 do_tso = false;
10451 }
10452 if (do_tso) { /* TSO with GSO */
10453 uint32_t if_tso_max;
10454
10455 if_tso_max = get_if_tso_mtu(ifp, is_ipv4);
10456 mss = if_tso_max - state->ip_hlen - state->tcp_hlen
10457 - ETHER_HDR_LEN;
10458 tso_with_gso = true;
10459 }
10460 }
10461 if (!tso_with_gso) {
10462 /* clear TSO flags */
10463 m0->m_pkthdr.csum_flags &= ~_TSO_CSUM;
10464 }
10465 seg = m_seg(m0, state->hlen, mss, 0, 0);
10466 if (seg.head == NULL || seg.head->m_nextpkt == NULL) {
10467 return seg;
10468 }
10469 if (tso_with_gso) {
10470 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
10471 "%s TX gso size %d mss %d nsegs %d",
10472 ifp->if_xname,
10473 mss, orig_mss, seg.count);
10474 } else {
10475 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
10476 "%s %s mss %d nsegs %d",
10477 ifp->if_xname,
10478 is_tx ? "TX" : "RX",
10479 mss, seg.count);
10480 }
10481 #ifdef GSO_STATS
10482 GSOSTAT_SET_MAX(tcp.gsos_max_mss, mss);
10483 GSOSTAT_SET_MIN(tcp.gsos_min_mss, mss);
10484 GSOSTAT_ADD(tcp.gsos_osegments, seg.count);
10485 #endif /* GSO_STATS */
10486
10487 /* first pkt */
10488 VERIFY(seg.head == m0);
10489 m = m0;
10490
10491 state->update(state, m);
10492
10493 do {
10494 state->tcp->th_flags &= ~(TH_FIN | TH_PUSH);
10495
10496 state->internal(state, m);
10497 m = m->m_nextpkt;
10498 state->update(state, m);
10499 state->tcp->th_flags &= ~TH_CWR;
10500 state->tcp->th_seq = htonl(state->tcp_seq);
10501 } while (m->m_nextpkt);
10502
10503 /* last pkt */
10504 state->internal(state, m);
10505
10506 #ifdef GSO_STATS
10507 if (!error) {
10508 GSOSTAT_INC(tcp.gsos_segmented);
10509 GSOSTAT_SET_MAX(tcp.gsos_maxsegmented, total_len);
10510 GSOSTAT_SET_MIN(tcp.gsos_minsegmented, total_len);
10511 GSOSTAT_ADD(tcp.gsos_totalbyteseg, total_len);
10512 }
10513 #endif /* GSO_STATS */
10514 return seg;
10515 }
10516
10517 /*
10518 * GSO for TCP/IPv[46]
10519 */
10520 static mblist
10521 gso_tcp_with_info(ifnet_t ifp, mbuf_t m, ip_packet_info_t info_p,
10522 u_int mac_hlen, bool is_ipv4, bool is_tx)
10523 {
10524 uint32_t csum_flags;
10525 struct gso_ip_tcp_state state;
10526 struct tcphdr *tcp;
10527
10528 assert(info_p->ip_proto_hdr != NULL);
10529 tcp = (struct tcphdr *)(void *)info_p->ip_proto_hdr;
10530 gso_ip_tcp_init_state(&state, ifp, is_ipv4, mac_hlen,
10531 info_p->ip_hlen + info_p->ip_opt_len,
10532 info_p->ip_hdr, info_p->ip_m0_len, tcp);
10533 csum_flags = is_ipv4 ? CSUM_DELAY_DATA : CSUM_DELAY_IPV6_DATA; /* XXX */
10534 m->m_pkthdr.csum_flags |= csum_flags;
10535 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
10536 return gso_ip_tcp(ifp, m, &state, is_tx);
10537 }
10538
10539 static mblist
10540 gso_tcp(ifnet_t ifp, mbuf_t m, u_int mac_hlen, bool is_ipv4, bool is_tx)
10541 {
10542 int error;
10543 ip_packet_info info;
10544 struct bripstats stats; /* XXX ignored */
10545 mblist ret;
10546
10547 error = bridge_get_tcp_header(&m, mac_hlen, is_ipv4, &info, &stats);
10548 if (error != 0) {
10549 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
10550 "%s bridge_get_tcp_header failed %d (%s)",
10551 ifp->if_xname, error,
10552 is_tx ? "TX" : "RX");
10553 if (m != NULL) {
10554 m_freem(m);
10555 m = NULL;
10556 }
10557 goto no_segment;
10558 }
10559 if (info.ip_proto_hdr == NULL) {
10560 /* not actually a TCP packet, no segmentation */
10561 goto no_segment;
10562 }
10563 if (!is_tx && ip_packet_info_dst_is_our_ip(&info, ifp->if_index)) {
10564 goto no_segment;
10565 }
10566 return gso_tcp_with_info(ifp, m, &info, mac_hlen, is_ipv4, is_tx);
10567
10568 no_segment:
10569 mblist_init(&ret);
10570 if (m != NULL) {
10571 mblist_append(&ret, m);
10572 }
10573 return ret;
10574 }
10575