1 /*
2 * Copyright (c) 2004-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * if_bond.c
31 * - bond/failover interface
32 * - implements IEEE 802.3ad Link Aggregation
33 */
34
35 /*
36 * Modification History:
37 *
38 * April 29, 2004 Dieter Siegmund ([email protected])
39 * - created
40 */
41
42 #include <sys/param.h>
43 #include <sys/kernel.h>
44 #include <sys/malloc.h>
45 #include <sys/mbuf.h>
46 #include <sys/queue.h>
47 #include <sys/socket.h>
48 #include <sys/sockio.h>
49 #include <sys/sysctl.h>
50 #include <sys/systm.h>
51 #include <sys/kern_event.h>
52 #include <net/bpf.h>
53 #include <net/ethernet.h>
54 #include <net/if.h>
55 #include <net/kpi_interface.h>
56 #include <net/kpi_interfacefilter.h>
57 #include <net/if_arp.h>
58 #include <net/if_dl.h>
59 #include <net/if_ether.h>
60 #include <net/if_types.h>
61 #include <net/if_bond_var.h>
62 #include <net/ieee8023ad.h>
63 #include <net/lacp.h>
64 #include <net/dlil.h>
65 #include <sys/time.h>
66 #include <net/devtimer.h>
67 #include <net/if_vlan_var.h>
68 #include <net/kpi_protocol.h>
69 #include <sys/protosw.h>
70 #include <kern/locks.h>
71 #include <kern/zalloc.h>
72 #include <os/refcnt.h>
73
74 #include <netinet/in.h>
75 #include <netinet/if_ether.h>
76 #include <netinet/in_systm.h>
77 #include <netinet/ip.h>
78 #include <netinet/ip6.h>
79
80 #include <net/if_media.h>
81 #include <net/multicast_list.h>
82
83 SYSCTL_DECL(_net_link);
84 SYSCTL_NODE(_net_link, OID_AUTO, bond, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
85 "Bond interface");
86
87 static int if_bond_debug = 0;
88 SYSCTL_INT(_net_link_bond, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
89 &if_bond_debug, 0, "Bond interface debug logs");
90
91 static struct ether_addr slow_proto_multicast = {
92 .octet = IEEE8023AD_SLOW_PROTO_MULTICAST
93 };
94
95 typedef struct ifbond_s ifbond, * ifbond_ref;
96 typedef struct bondport_s bondport, * bondport_ref;
97
98 #define BOND_MAXUNIT 128
99 #define BOND_ZONE_MAX_ELEM MIN(IFNETS_MAX, BOND_MAXUNIT)
100 #define BONDNAME "bond"
101
102 #define M_BOND M_DEVBUF
103
104 #define EA_FORMAT "%x:%x:%x:%x:%x:%x"
105 #define EA_CH(e, i) ((u_char)((u_char *)(e))[(i)])
106 #define EA_LIST(ea) EA_CH(ea,0),EA_CH(ea,1),EA_CH(ea,2),EA_CH(ea,3),EA_CH(ea,4),EA_CH(ea,5)
107
108 #define timestamp_printf printf
109
110 /**
111 ** bond locks
112 **/
113
114 static LCK_GRP_DECLARE(bond_lck_grp, "if_bond");
115 static LCK_MTX_DECLARE(bond_lck_mtx, &bond_lck_grp);
116
117 static __inline__ void
bond_assert_lock_held(void)118 bond_assert_lock_held(void)
119 {
120 LCK_MTX_ASSERT(&bond_lck_mtx, LCK_MTX_ASSERT_OWNED);
121 }
122
123 static __inline__ void
bond_assert_lock_not_held(void)124 bond_assert_lock_not_held(void)
125 {
126 LCK_MTX_ASSERT(&bond_lck_mtx, LCK_MTX_ASSERT_NOTOWNED);
127 }
128
129 static __inline__ void
bond_lock(void)130 bond_lock(void)
131 {
132 lck_mtx_lock(&bond_lck_mtx);
133 }
134
135 static __inline__ void
bond_unlock(void)136 bond_unlock(void)
137 {
138 lck_mtx_unlock(&bond_lck_mtx);
139 }
140
141 /**
142 ** bond structures, types
143 **/
144
145 struct LAG_info_s {
146 lacp_system li_system;
147 lacp_system_priority li_system_priority;
148 lacp_key li_key;
149 };
150 typedef struct LAG_info_s LAG_info, * LAG_info_ref;
151
152 struct bondport_s;
153 TAILQ_HEAD(port_list, bondport_s);
154 struct ifbond_s;
155 TAILQ_HEAD(ifbond_list, ifbond_s);
156 struct LAG_s;
157 TAILQ_HEAD(lag_list, LAG_s);
158
159 typedef struct ifbond_s ifbond, * ifbond_ref;
160 typedef struct bondport_s bondport, * bondport_ref;
161
162 struct LAG_s {
163 TAILQ_ENTRY(LAG_s) lag_list;
164 struct port_list lag_port_list;
165 short lag_port_count;
166 short lag_selected_port_count;
167 int lag_active_media;
168 LAG_info lag_info;
169 };
170 typedef struct LAG_s LAG, * LAG_ref;
171
172 typedef struct partner_state_s {
173 LAG_info ps_lag_info;
174 lacp_port ps_port;
175 lacp_port_priority ps_port_priority;
176 lacp_actor_partner_state ps_state;
177 } partner_state, * partner_state_ref;
178
179 struct ifbond_s {
180 TAILQ_ENTRY(ifbond_s) ifb_bond_list;
181 int ifb_flags;
182 struct os_refcnt ifb_retain_count;
183 char ifb_name[IFNAMSIZ];
184 struct ifnet * ifb_ifp;
185 bpf_packet_func ifb_bpf_input;
186 bpf_packet_func ifb_bpf_output;
187 int ifb_altmtu;
188 struct port_list ifb_port_list;
189 short ifb_port_count;
190 struct lag_list ifb_lag_list;
191 lacp_key ifb_key;
192 short ifb_max_active;/* 0 == unlimited */
193 LAG_ref ifb_active_lag;
194 struct ifmultiaddr * ifb_ifma_slow_proto;
195 bondport_ref * ifb_distributing_array;
196 int ifb_distributing_count;
197 int ifb_last_link_event;
198 int ifb_mode;/* LACP, STATIC */
199 };
200
201 struct media_info {
202 int mi_active;
203 int mi_status;
204 };
205
206 enum {
207 ReceiveState_none = 0,
208 ReceiveState_INITIALIZE = 1,
209 ReceiveState_PORT_DISABLED = 2,
210 ReceiveState_EXPIRED = 3,
211 ReceiveState_LACP_DISABLED = 4,
212 ReceiveState_DEFAULTED = 5,
213 ReceiveState_CURRENT = 6,
214 };
215
216 typedef u_char ReceiveState;
217
218 enum {
219 SelectedState_UNSELECTED = IF_BOND_STATUS_SELECTED_STATE_UNSELECTED,
220 SelectedState_SELECTED = IF_BOND_STATUS_SELECTED_STATE_SELECTED,
221 SelectedState_STANDBY = IF_BOND_STATUS_SELECTED_STATE_STANDBY
222 };
223 typedef u_char SelectedState;
224
225 static __inline__ const char *
SelectedStateString(SelectedState s)226 SelectedStateString(SelectedState s)
227 {
228 static const char * names[] = { "UNSELECTED", "SELECTED", "STANDBY" };
229
230 if (s <= SelectedState_STANDBY) {
231 return names[s];
232 }
233 return "<unknown>";
234 }
235
236 enum {
237 MuxState_none = 0,
238 MuxState_DETACHED = 1,
239 MuxState_WAITING = 2,
240 MuxState_ATTACHED = 3,
241 MuxState_COLLECTING_DISTRIBUTING = 4,
242 };
243
244 typedef u_char MuxState;
245
246 #define PORT_CONTROL_FLAGS_IN_LIST 0x01
247 #define PORT_CONTROL_FLAGS_PROTO_ATTACHED 0x02
248 #define PORT_CONTROL_FLAGS_FILTER_ATTACHED 0x04
249 #define PORT_CONTROL_FLAGS_LLADDR_SET 0x08
250 #define PORT_CONTROL_FLAGS_MTU_SET 0x10
251 #define PORT_CONTROL_FLAGS_PROMISCUOUS_SET 0x20
252 #define PORT_CONTROL_FLAGS_BOND_PROMISCUOUS_SET 0x40
253
254
255 static inline bool
uint32_bit_is_set(uint32_t flags,uint32_t flags_to_test)256 uint32_bit_is_set(uint32_t flags, uint32_t flags_to_test)
257 {
258 return (flags & flags_to_test) != 0;
259 }
260
261 static inline void
uint32_bit_set(uint32_t * flags_p,uint32_t flags_to_set)262 uint32_bit_set(uint32_t * flags_p, uint32_t flags_to_set)
263 {
264 *flags_p |= flags_to_set;
265 }
266
267 static inline void
uint32_bit_clear(uint32_t * flags_p,uint32_t flags_to_clear)268 uint32_bit_clear(uint32_t * flags_p, uint32_t flags_to_clear)
269 {
270 *flags_p &= ~flags_to_clear;
271 }
272
273 struct bondport_s {
274 TAILQ_ENTRY(bondport_s) po_port_list;
275 ifbond_ref po_bond;
276 struct multicast_list po_multicast;
277 struct ifnet * po_ifp;
278 struct ether_addr po_saved_addr;
279 int po_enabled;
280 char po_name[IFNAMSIZ];
281 struct ifdevmtu po_devmtu;
282 uint32_t po_control_flags;
283 interface_filter_t po_filter;
284
285 /* LACP */
286 TAILQ_ENTRY(bondport_s) po_lag_port_list;
287 devtimer_ref po_current_while_timer;
288 devtimer_ref po_periodic_timer;
289 devtimer_ref po_wait_while_timer;
290 devtimer_ref po_transmit_timer;
291 partner_state po_partner_state;
292 lacp_port_priority po_priority;
293 lacp_actor_partner_state po_actor_state;
294 u_char po_flags;
295 u_char po_periodic_interval;
296 u_char po_n_transmit;
297 ReceiveState po_receive_state;
298 MuxState po_mux_state;
299 SelectedState po_selected;
300 int32_t po_last_transmit_secs;
301 struct media_info po_media_info;
302 uint64_t po_force_link_event_time;
303 LAG_ref po_lag;
304 };
305
306 #define IFBF_PROMISC 0x1 /* promiscuous mode */
307 #define IFBF_IF_DETACHING 0x2 /* interface is detaching */
308 #define IFBF_LLADDR 0x4 /* specific link address requested */
309 #define IFBF_CHANGE_IN_PROGRESS 0x8 /* interface add/remove in progress */
310
311 static int bond_get_status(ifbond_ref ifb, struct if_bond_req * ibr_p,
312 user_addr_t datap);
313
314 static __inline__ bool
ifbond_flags_if_detaching(ifbond_ref ifb)315 ifbond_flags_if_detaching(ifbond_ref ifb)
316 {
317 return (ifb->ifb_flags & IFBF_IF_DETACHING) != 0;
318 }
319
320 static __inline__ void
ifbond_flags_set_if_detaching(ifbond_ref ifb)321 ifbond_flags_set_if_detaching(ifbond_ref ifb)
322 {
323 ifb->ifb_flags |= IFBF_IF_DETACHING;
324 return;
325 }
326
327 static __inline__ bool
ifbond_flags_lladdr(ifbond_ref ifb)328 ifbond_flags_lladdr(ifbond_ref ifb)
329 {
330 return (ifb->ifb_flags & IFBF_LLADDR) != 0;
331 }
332
333 static __inline__ bool
ifbond_flags_change_in_progress(ifbond_ref ifb)334 ifbond_flags_change_in_progress(ifbond_ref ifb)
335 {
336 return (ifb->ifb_flags & IFBF_CHANGE_IN_PROGRESS) != 0;
337 }
338
339 static __inline__ void
ifbond_flags_set_change_in_progress(ifbond_ref ifb)340 ifbond_flags_set_change_in_progress(ifbond_ref ifb)
341 {
342 ifb->ifb_flags |= IFBF_CHANGE_IN_PROGRESS;
343 return;
344 }
345
346 static __inline__ void
ifbond_flags_clear_change_in_progress(ifbond_ref ifb)347 ifbond_flags_clear_change_in_progress(ifbond_ref ifb)
348 {
349 ifb->ifb_flags &= ~IFBF_CHANGE_IN_PROGRESS;
350 return;
351 }
352
353 static __inline__ bool
ifbond_flags_promisc(ifbond_ref ifb)354 ifbond_flags_promisc(ifbond_ref ifb)
355 {
356 return (ifb->ifb_flags & IFBF_PROMISC) != 0;
357 }
358
359 static __inline__ void
ifbond_flags_set_promisc(ifbond_ref ifb)360 ifbond_flags_set_promisc(ifbond_ref ifb)
361 {
362 ifb->ifb_flags |= IFBF_PROMISC;
363 return;
364 }
365
366 static __inline__ void
ifbond_flags_clear_promisc(ifbond_ref ifb)367 ifbond_flags_clear_promisc(ifbond_ref ifb)
368 {
369 ifb->ifb_flags &= ~IFBF_PROMISC;
370 return;
371 }
372
373 /*
374 * bondport_ref->po_flags bits
375 */
376 #define BONDPORT_FLAGS_NTT 0x01
377 #define BONDPORT_FLAGS_READY 0x02
378 #define BONDPORT_FLAGS_SELECTED_CHANGED 0x04
379 #define BONDPORT_FLAGS_MUX_ATTACHED 0x08
380 #define BONDPORT_FLAGS_DISTRIBUTING 0x10
381 #define BONDPORT_FLAGS_UNUSED2 0x20
382 #define BONDPORT_FLAGS_UNUSED3 0x40
383 #define BONDPORT_FLAGS_UNUSED4 0x80
384
385 static __inline__ void
bondport_flags_set_ntt(bondport_ref p)386 bondport_flags_set_ntt(bondport_ref p)
387 {
388 p->po_flags |= BONDPORT_FLAGS_NTT;
389 return;
390 }
391
392 static __inline__ void
bondport_flags_clear_ntt(bondport_ref p)393 bondport_flags_clear_ntt(bondport_ref p)
394 {
395 p->po_flags &= ~BONDPORT_FLAGS_NTT;
396 return;
397 }
398
399 static __inline__ int
bondport_flags_ntt(bondport_ref p)400 bondport_flags_ntt(bondport_ref p)
401 {
402 return (p->po_flags & BONDPORT_FLAGS_NTT) != 0;
403 }
404
405 static __inline__ void
bondport_flags_set_ready(bondport_ref p)406 bondport_flags_set_ready(bondport_ref p)
407 {
408 p->po_flags |= BONDPORT_FLAGS_READY;
409 return;
410 }
411
412 static __inline__ void
bondport_flags_clear_ready(bondport_ref p)413 bondport_flags_clear_ready(bondport_ref p)
414 {
415 p->po_flags &= ~BONDPORT_FLAGS_READY;
416 return;
417 }
418
419 static __inline__ int
bondport_flags_ready(bondport_ref p)420 bondport_flags_ready(bondport_ref p)
421 {
422 return (p->po_flags & BONDPORT_FLAGS_READY) != 0;
423 }
424
425 static __inline__ void
bondport_flags_set_selected_changed(bondport_ref p)426 bondport_flags_set_selected_changed(bondport_ref p)
427 {
428 p->po_flags |= BONDPORT_FLAGS_SELECTED_CHANGED;
429 return;
430 }
431
432 static __inline__ void
bondport_flags_clear_selected_changed(bondport_ref p)433 bondport_flags_clear_selected_changed(bondport_ref p)
434 {
435 p->po_flags &= ~BONDPORT_FLAGS_SELECTED_CHANGED;
436 return;
437 }
438
439 static __inline__ int
bondport_flags_selected_changed(bondport_ref p)440 bondport_flags_selected_changed(bondport_ref p)
441 {
442 return (p->po_flags & BONDPORT_FLAGS_SELECTED_CHANGED) != 0;
443 }
444
445 static __inline__ void
bondport_flags_set_mux_attached(bondport_ref p)446 bondport_flags_set_mux_attached(bondport_ref p)
447 {
448 p->po_flags |= BONDPORT_FLAGS_MUX_ATTACHED;
449 return;
450 }
451
452 static __inline__ void
bondport_flags_clear_mux_attached(bondport_ref p)453 bondport_flags_clear_mux_attached(bondport_ref p)
454 {
455 p->po_flags &= ~BONDPORT_FLAGS_MUX_ATTACHED;
456 return;
457 }
458
459 static __inline__ int
bondport_flags_mux_attached(bondport_ref p)460 bondport_flags_mux_attached(bondport_ref p)
461 {
462 return (p->po_flags & BONDPORT_FLAGS_MUX_ATTACHED) != 0;
463 }
464
465 static __inline__ void
bondport_flags_set_distributing(bondport_ref p)466 bondport_flags_set_distributing(bondport_ref p)
467 {
468 p->po_flags |= BONDPORT_FLAGS_DISTRIBUTING;
469 return;
470 }
471
472 static __inline__ void
bondport_flags_clear_distributing(bondport_ref p)473 bondport_flags_clear_distributing(bondport_ref p)
474 {
475 p->po_flags &= ~BONDPORT_FLAGS_DISTRIBUTING;
476 return;
477 }
478
479 static __inline__ int
bondport_flags_distributing(bondport_ref p)480 bondport_flags_distributing(bondport_ref p)
481 {
482 return (p->po_flags & BONDPORT_FLAGS_DISTRIBUTING) != 0;
483 }
484
485 typedef struct bond_globals_s {
486 struct ifbond_list ifbond_list;
487 lacp_system system;
488 lacp_system_priority system_priority;
489 } * bond_globals_ref;
490
491 static bond_globals_ref g_bond;
492
493 /**
494 ** packet_buffer routines
495 ** - thin wrapper for mbuf
496 **/
497
498 typedef struct mbuf * packet_buffer_ref;
499
500 static packet_buffer_ref
packet_buffer_allocate(int length)501 packet_buffer_allocate(int length)
502 {
503 packet_buffer_ref m;
504 int size;
505
506 /* leave room for ethernet header */
507 size = length + sizeof(struct ether_header);
508 if (size > (int)MHLEN) {
509 if (size > (int)MCLBYTES) {
510 printf("bond: packet_buffer_allocate size %d > max %u\n",
511 size, MCLBYTES);
512 return NULL;
513 }
514 m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
515 } else {
516 m = m_gethdr(M_WAITOK, MT_DATA);
517 }
518 if (m == NULL) {
519 return NULL;
520 }
521 m->m_len = size;
522 m->m_pkthdr.len = size;
523 return m;
524 }
525
526 static void *
packet_buffer_byteptr(packet_buffer_ref buf)527 packet_buffer_byteptr(packet_buffer_ref buf)
528 {
529 return buf->m_data + sizeof(struct ether_header);
530 }
531
532 typedef enum {
533 LAEventStart,
534 LAEventTimeout,
535 LAEventPacket,
536 LAEventMediaChange,
537 LAEventSelectedChange,
538 LAEventPortMoved,
539 LAEventReady
540 } LAEvent;
541
542 /**
543 ** Receive machine
544 **/
545 static void
546 bondport_receive_machine(bondport_ref p, LAEvent event,
547 void * event_data);
548 /**
549 ** Periodic Transmission machine
550 **/
551 static void
552 bondport_periodic_transmit_machine(bondport_ref p, LAEvent event,
553 void * event_data);
554
555 /**
556 ** Transmit machine
557 **/
558 #define TRANSMIT_MACHINE_TX_IMMEDIATE ((void *)1)
559
560 static void
561 bondport_transmit_machine(bondport_ref p, LAEvent event,
562 void * event_data);
563
564 /**
565 ** Mux machine
566 **/
567 static void
568 bondport_mux_machine(bondport_ref p, LAEvent event,
569 void * event_data);
570
571 /**
572 ** bond, LAG
573 **/
574 static void
575 ifbond_activate_LAG(ifbond_ref bond, LAG_ref lag, int active_media);
576
577 static void
578 ifbond_deactivate_LAG(ifbond_ref bond, LAG_ref lag);
579
580 static int
581 ifbond_all_ports_ready(ifbond_ref bond);
582
583 static LAG_ref
584 ifbond_find_best_LAG(ifbond_ref bond, int * active_media);
585
586 static int
587 LAG_get_aggregatable_port_count(LAG_ref lag, int * active_media);
588
589 static int
590 ifbond_selection(ifbond_ref bond);
591
592 static void
593 bond_handle_event(struct ifnet * port_ifp, int event_code);
594
595 /**
596 ** bondport
597 **/
598
599 static void
600 bondport_receive_lacpdu(bondport_ref p, lacpdu_ref in_lacpdu_p);
601
602 static void
603 bondport_slow_proto_transmit(bondport_ref p, packet_buffer_ref buf);
604
605 static bondport_ref
606 bondport_create(struct ifnet * port_ifp, lacp_port_priority priority,
607 int active, int short_timeout, int * error);
608 static void
609 bondport_start(bondport_ref p);
610
611 static void
612 bondport_free(bondport_ref p);
613
614 static int
615 bondport_aggregatable(bondport_ref p);
616
617 static int
618 bondport_remove_from_LAG(bondport_ref p);
619
620 static void
621 bondport_set_selected(bondport_ref p, SelectedState s);
622
623 static int
624 bondport_matches_LAG(bondport_ref p, LAG_ref lag);
625
626 static void
627 bondport_link_status_changed(bondport_ref p);
628
629 static void
630 bondport_enable_distributing(bondport_ref p);
631
632 static void
633 bondport_disable_distributing(bondport_ref p);
634
635 static __inline__ int
bondport_collecting(bondport_ref p)636 bondport_collecting(bondport_ref p)
637 {
638 if (p->po_bond->ifb_mode == IF_BOND_MODE_LACP) {
639 return lacp_actor_partner_state_collecting(p->po_actor_state);
640 }
641 return TRUE;
642 }
643
644 /**
645 ** bond interface/dlil specific routines
646 **/
647 static int bond_clone_create(struct if_clone *, u_int32_t, void *);
648 static int bond_clone_destroy(struct ifnet *);
649 static int bond_output(struct ifnet *ifp, struct mbuf *m);
650 static int bond_ioctl(struct ifnet *ifp, u_long cmd, void * addr);
651 static int bond_set_bpf_tap(struct ifnet * ifp, bpf_tap_mode mode,
652 bpf_packet_func func);
653 static int bond_attach_protocol(struct ifnet *ifp);
654 static int bond_detach_protocol(struct ifnet *ifp);
655 static errno_t bond_iff_input(void *cookie, ifnet_t ifp,
656 protocol_family_t protocol, mbuf_t *data, char **frame_ptr);
657 static int bond_attach_filter(struct ifnet *ifp, interface_filter_t * filter_p);
658 static int bond_setmulti(struct ifnet *ifp);
659 static int bond_add_interface(struct ifnet * ifp, struct ifnet * port_ifp);
660 static int bond_remove_interface(ifbond_ref ifb, struct ifnet * port_ifp);
661 static void bond_if_free(struct ifnet * ifp);
662 static void interface_link_event(struct ifnet * ifp, u_int32_t event_code);
663
664 static struct if_clone bond_cloner = IF_CLONE_INITIALIZER(BONDNAME,
665 bond_clone_create,
666 bond_clone_destroy,
667 0,
668 BOND_MAXUNIT,
669 BOND_ZONE_MAX_ELEM,
670 sizeof(ifbond));
671
672 static int
siocsifmtu(struct ifnet * ifp,int mtu)673 siocsifmtu(struct ifnet * ifp, int mtu)
674 {
675 struct ifreq ifr;
676
677 bzero(&ifr, sizeof(ifr));
678 ifr.ifr_mtu = mtu;
679 return ifnet_ioctl(ifp, 0, SIOCSIFMTU, &ifr);
680 }
681
682 static int
siocgifdevmtu(struct ifnet * ifp,struct ifdevmtu * ifdm_p)683 siocgifdevmtu(struct ifnet * ifp, struct ifdevmtu * ifdm_p)
684 {
685 struct ifreq ifr;
686 int error;
687
688 bzero(&ifr, sizeof(ifr));
689 error = ifnet_ioctl(ifp, 0, SIOCGIFDEVMTU, &ifr);
690 if (error == 0) {
691 *ifdm_p = ifr.ifr_devmtu;
692 }
693 return error;
694 }
695
696 static __inline__ void
ether_addr_copy(void * dest,const void * source)697 ether_addr_copy(void * dest, const void * source)
698 {
699 bcopy(source, dest, ETHER_ADDR_LEN);
700 return;
701 }
702
703 static __inline__ void
ifbond_retain(ifbond_ref ifb)704 ifbond_retain(ifbond_ref ifb)
705 {
706 os_ref_retain(&ifb->ifb_retain_count);
707 }
708
709 static __inline__ void
ifbond_release(ifbond_ref ifb)710 ifbond_release(ifbond_ref ifb)
711 {
712 if (os_ref_release(&ifb->ifb_retain_count) != 0) {
713 return;
714 }
715
716 if (if_bond_debug) {
717 printf("ifbond_release(%s)\n", ifb->ifb_name);
718 }
719 if (ifb->ifb_ifma_slow_proto != NULL) {
720 if (if_bond_debug) {
721 printf("ifbond_release(%s) removing multicast\n",
722 ifb->ifb_name);
723 }
724 (void) if_delmulti_anon(ifb->ifb_ifma_slow_proto->ifma_ifp,
725 ifb->ifb_ifma_slow_proto->ifma_addr);
726 IFMA_REMREF(ifb->ifb_ifma_slow_proto);
727 }
728 if (ifb->ifb_distributing_array != NULL) {
729 FREE(ifb->ifb_distributing_array, M_BOND);
730 }
731 if_clone_softc_deallocate(&bond_cloner, ifb);
732 }
733
734 /*
735 * Function: ifbond_wait
736 * Purpose:
737 * Allows a single thread to gain exclusive access to the ifbond
738 * data structure. Some operations take a long time to complete,
739 * and some have side-effects that we can't predict. Holding the
740 * bond_lock() across such operations is not possible.
741 *
742 * For example:
743 * 1) The SIOCSIFLLADDR ioctl takes a long time (several seconds) to
744 * complete. Simply holding the bond_lock() would freeze all other
745 * data structure accesses during that time.
746 * 2) When we attach our protocol to the interface, a dlil event is
747 * generated and invokes our bond_event() function. bond_event()
748 * needs to take the bond_lock(), but we're already holding it, so
749 * we're deadlocked against ourselves.
750 * Notes:
751 * Before calling, you must be holding the bond_lock and have taken
752 * a reference on the ifbond_ref.
753 */
754 static void
ifbond_wait(ifbond_ref ifb,const char * msg)755 ifbond_wait(ifbond_ref ifb, const char * msg)
756 {
757 int waited = 0;
758
759 /* other add/remove in progress */
760 while (ifbond_flags_change_in_progress(ifb)) {
761 if (if_bond_debug) {
762 printf("%s: %s msleep\n", ifb->ifb_name, msg);
763 }
764 waited = 1;
765 (void)msleep(ifb, &bond_lck_mtx, PZERO, msg, 0);
766 }
767 /* prevent other bond list remove/add from taking place */
768 ifbond_flags_set_change_in_progress(ifb);
769 if (if_bond_debug && waited) {
770 printf("%s: %s woke up\n", ifb->ifb_name, msg);
771 }
772 return;
773 }
774
775 /*
776 * Function: ifbond_signal
777 * Purpose:
778 * Allows the thread that previously invoked ifbond_wait() to
779 * give up exclusive access to the ifbond data structure, and wake up
780 * any other threads waiting to access
781 * Notes:
782 * Before calling, you must be holding the bond_lock and have taken
783 * a reference on the ifbond_ref.
784 */
785 static void
ifbond_signal(ifbond_ref ifb,const char * msg)786 ifbond_signal(ifbond_ref ifb, const char * msg)
787 {
788 ifbond_flags_clear_change_in_progress(ifb);
789 wakeup((caddr_t)ifb);
790 if (if_bond_debug) {
791 printf("%s: %s wakeup\n", ifb->ifb_name, msg);
792 }
793 return;
794 }
795
796 /**
797 ** Media information
798 **/
799
800 static int
link_speed(int active)801 link_speed(int active)
802 {
803 switch (IFM_SUBTYPE(active)) {
804 case IFM_AUTO:
805 case IFM_MANUAL:
806 case IFM_NONE:
807 return 0;
808 case IFM_10_T:
809 case IFM_10_2:
810 case IFM_10_5:
811 case IFM_10_STP:
812 case IFM_10_FL:
813 return 10;
814 case IFM_100_TX:
815 case IFM_100_FX:
816 case IFM_100_T4:
817 case IFM_100_VG:
818 case IFM_100_T2:
819 return 100;
820 case IFM_1000_SX:
821 case IFM_1000_LX:
822 case IFM_1000_CX:
823 case IFM_1000_TX:
824 case IFM_1000_CX_SGMII:
825 case IFM_1000_KX:
826 return 1000;
827 case IFM_HPNA_1:
828 return 1;
829 default:
830 /* assume that new defined types are going to be at least 10GigE */
831 case IFM_10G_SR:
832 case IFM_10G_LR:
833 case IFM_10G_KX4:
834 case IFM_10G_KR:
835 case IFM_10G_CR1:
836 case IFM_10G_ER:
837 return 10000;
838 case IFM_2500_T:
839 return 2500;
840 case IFM_5000_T:
841 return 5000;
842 case IFM_20G_KR2:
843 return 20000;
844 case IFM_25G_CR:
845 case IFM_25G_KR:
846 case IFM_25G_SR:
847 case IFM_25G_LR:
848 return 25000;
849 case IFM_40G_CR4:
850 case IFM_40G_SR4:
851 case IFM_40G_LR4:
852 case IFM_40G_KR4:
853 return 40000;
854 case IFM_50G_CR2:
855 case IFM_50G_KR2:
856 case IFM_50G_SR2:
857 case IFM_50G_LR2:
858 return 50000;
859 case IFM_56G_R4:
860 return 56000;
861 case IFM_100G_CR4:
862 case IFM_100G_SR4:
863 case IFM_100G_KR4:
864 case IFM_100G_LR4:
865 return 100000;
866 }
867 }
868
869 static __inline__ int
media_active(const struct media_info * mi)870 media_active(const struct media_info * mi)
871 {
872 if ((mi->mi_status & IFM_AVALID) == 0) {
873 return 1;
874 }
875 return (mi->mi_status & IFM_ACTIVE) != 0;
876 }
877
878 static __inline__ int
media_full_duplex(const struct media_info * mi)879 media_full_duplex(const struct media_info * mi)
880 {
881 return (mi->mi_active & IFM_FDX) != 0;
882 }
883
884 static __inline__ int
media_type_unknown(const struct media_info * mi)885 media_type_unknown(const struct media_info * mi)
886 {
887 int unknown;
888
889 switch (IFM_SUBTYPE(mi->mi_active)) {
890 case IFM_AUTO:
891 case IFM_MANUAL:
892 case IFM_NONE:
893 unknown = 1;
894 break;
895 default:
896 unknown = 0;
897 break;
898 }
899 return unknown;
900 }
901
902 static __inline__ int
media_ok(const struct media_info * mi)903 media_ok(const struct media_info * mi)
904 {
905 return media_full_duplex(mi) || media_type_unknown(mi);
906 }
907
908 static __inline__ int
media_speed(const struct media_info * mi)909 media_speed(const struct media_info * mi)
910 {
911 return link_speed(mi->mi_active);
912 }
913
914 static struct media_info
interface_media_info(struct ifnet * ifp)915 interface_media_info(struct ifnet * ifp)
916 {
917 struct ifmediareq ifmr;
918 struct media_info mi;
919
920 bzero(&mi, sizeof(mi));
921 bzero(&ifmr, sizeof(ifmr));
922 if (ifnet_ioctl(ifp, 0, SIOCGIFMEDIA, &ifmr) == 0) {
923 if (ifmr.ifm_count != 0) {
924 mi.mi_status = ifmr.ifm_status;
925 mi.mi_active = ifmr.ifm_active;
926 }
927 }
928 return mi;
929 }
930
931 static int
if_siflladdr(struct ifnet * ifp,const struct ether_addr * ea_p)932 if_siflladdr(struct ifnet * ifp, const struct ether_addr * ea_p)
933 {
934 struct ifreq ifr;
935
936 /*
937 * XXX setting the sa_len to ETHER_ADDR_LEN is wrong, but the driver
938 * currently expects it that way
939 */
940 ifr.ifr_addr.sa_family = AF_UNSPEC;
941 ifr.ifr_addr.sa_len = ETHER_ADDR_LEN;
942 ether_addr_copy(ifr.ifr_addr.sa_data, ea_p);
943 return ifnet_ioctl(ifp, 0, SIOCSIFLLADDR, &ifr);
944 }
945
946 /**
947 ** bond_globals
948 **/
949 static bond_globals_ref
bond_globals_create(lacp_system_priority sys_pri,lacp_system_ref sys)950 bond_globals_create(lacp_system_priority sys_pri,
951 lacp_system_ref sys)
952 {
953 bond_globals_ref b;
954
955 b = kalloc_type(struct bond_globals_s, Z_WAITOK | Z_ZERO | Z_NOFAIL);
956 TAILQ_INIT(&b->ifbond_list);
957 b->system = *sys;
958 b->system_priority = sys_pri;
959 return b;
960 }
961
962 static int
bond_globals_init(void)963 bond_globals_init(void)
964 {
965 bond_globals_ref b;
966 int i;
967 struct ifnet * ifp;
968
969 bond_assert_lock_not_held();
970
971 if (g_bond != NULL) {
972 return 0;
973 }
974
975 /*
976 * use en0's ethernet address as the system identifier, and if it's not
977 * there, use en1 .. en3
978 */
979 ifp = NULL;
980 for (i = 0; i < 4; i++) {
981 char ifname[IFNAMSIZ + 1];
982 snprintf(ifname, sizeof(ifname), "en%d", i);
983 ifp = ifunit(ifname);
984 if (ifp != NULL) {
985 break;
986 }
987 }
988 b = NULL;
989 if (ifp != NULL) {
990 b = bond_globals_create(0x8000, (lacp_system_ref)IF_LLADDR(ifp));
991 }
992 bond_lock();
993 if (g_bond != NULL) {
994 bond_unlock();
995 kfree_type(struct bond_globals_s, b);
996 return 0;
997 }
998 g_bond = b;
999 bond_unlock();
1000 if (ifp == NULL) {
1001 return ENXIO;
1002 }
1003 if (b == NULL) {
1004 return ENOMEM;
1005 }
1006 return 0;
1007 }
1008
1009 static void
bond_bpf_vlan(struct ifnet * ifp,struct mbuf * m,const struct ether_header * eh_p,u_int16_t vlan_tag,bpf_packet_func func)1010 bond_bpf_vlan(struct ifnet * ifp, struct mbuf * m,
1011 const struct ether_header * eh_p,
1012 u_int16_t vlan_tag, bpf_packet_func func)
1013 {
1014 struct ether_vlan_header * vlh_p;
1015 struct mbuf * vl_m;
1016
1017 vl_m = m_get(M_DONTWAIT, MT_DATA);
1018 if (vl_m == NULL) {
1019 return;
1020 }
1021 /* populate a new mbuf containing the vlan ethernet header */
1022 vl_m->m_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1023 vlh_p = mtod(vl_m, struct ether_vlan_header *);
1024 bcopy(eh_p, vlh_p, offsetof(struct ether_header, ether_type));
1025 vlh_p->evl_encap_proto = htons(ETHERTYPE_VLAN);
1026 vlh_p->evl_tag = htons(vlan_tag);
1027 vlh_p->evl_proto = eh_p->ether_type;
1028 vl_m->m_next = m;
1029 (*func)(ifp, vl_m);
1030 vl_m->m_next = NULL;
1031 m_free(vl_m);
1032 return;
1033 }
1034
1035 static __inline__ void
bond_bpf_output(struct ifnet * ifp,struct mbuf * m,bpf_packet_func func)1036 bond_bpf_output(struct ifnet * ifp, struct mbuf * m,
1037 bpf_packet_func func)
1038 {
1039 if (func != NULL) {
1040 if (m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) {
1041 const struct ether_header * eh_p;
1042 eh_p = mtod(m, const struct ether_header *);
1043 m->m_data += ETHER_HDR_LEN;
1044 m->m_len -= ETHER_HDR_LEN;
1045 bond_bpf_vlan(ifp, m, eh_p, m->m_pkthdr.vlan_tag, func);
1046 m->m_data -= ETHER_HDR_LEN;
1047 m->m_len += ETHER_HDR_LEN;
1048 } else {
1049 (*func)(ifp, m);
1050 }
1051 }
1052 return;
1053 }
1054
1055 static __inline__ void
bond_bpf_input(ifnet_t ifp,mbuf_t m,const struct ether_header * eh_p,bpf_packet_func func)1056 bond_bpf_input(ifnet_t ifp, mbuf_t m, const struct ether_header * eh_p,
1057 bpf_packet_func func)
1058 {
1059 if (func != NULL) {
1060 if (m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) {
1061 bond_bpf_vlan(ifp, m, eh_p, m->m_pkthdr.vlan_tag, func);
1062 } else {
1063 /* restore the header */
1064 m->m_data -= ETHER_HDR_LEN;
1065 m->m_len += ETHER_HDR_LEN;
1066 (*func)(ifp, m);
1067 m->m_data += ETHER_HDR_LEN;
1068 m->m_len -= ETHER_HDR_LEN;
1069 }
1070 }
1071 return;
1072 }
1073
1074 /*
1075 * Function: bond_setmulti
1076 * Purpose:
1077 * Enable multicast reception on "our" interface by enabling multicasts on
1078 * each of the member ports.
1079 */
1080 static int
bond_setmulti(struct ifnet * ifp)1081 bond_setmulti(struct ifnet * ifp)
1082 {
1083 ifbond_ref ifb;
1084 int error;
1085 int result = 0;
1086 bondport_ref p;
1087
1088 bond_lock();
1089 ifb = ifnet_softc(ifp);
1090 if (ifb == NULL || ifbond_flags_if_detaching(ifb)
1091 || TAILQ_EMPTY(&ifb->ifb_port_list)) {
1092 bond_unlock();
1093 return 0;
1094 }
1095 ifbond_retain(ifb);
1096 ifbond_wait(ifb, "bond_setmulti");
1097
1098 if (ifbond_flags_if_detaching(ifb)) {
1099 /* someone destroyed the bond while we were waiting */
1100 result = EBUSY;
1101 goto signal_done;
1102 }
1103 bond_unlock();
1104
1105 /* ifbond_wait() let's us safely walk the list without holding the lock */
1106 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
1107 struct ifnet * port_ifp = p->po_ifp;
1108
1109 error = multicast_list_program(&p->po_multicast,
1110 ifp, port_ifp);
1111 if (error != 0) {
1112 printf("bond_setmulti(%s): "
1113 "multicast_list_program(%s%d) failed, %d\n",
1114 ifb->ifb_name, ifnet_name(port_ifp),
1115 ifnet_unit(port_ifp), error);
1116 result = error;
1117 }
1118 }
1119 bond_lock();
1120 signal_done:
1121 ifbond_signal(ifb, __func__);
1122 bond_unlock();
1123 ifbond_release(ifb);
1124 return result;
1125 }
1126
1127 static int
bond_clone_attach(void)1128 bond_clone_attach(void)
1129 {
1130 int error;
1131
1132 if ((error = if_clone_attach(&bond_cloner)) != 0) {
1133 return error;
1134 }
1135 return 0;
1136 }
1137
1138 static int
ifbond_add_slow_proto_multicast(ifbond_ref ifb)1139 ifbond_add_slow_proto_multicast(ifbond_ref ifb)
1140 {
1141 int error;
1142 struct ifmultiaddr * ifma = NULL;
1143 struct sockaddr_dl sdl;
1144
1145 bond_assert_lock_not_held();
1146
1147 bzero(&sdl, sizeof(sdl));
1148 sdl.sdl_len = sizeof(sdl);
1149 sdl.sdl_family = AF_LINK;
1150 sdl.sdl_type = IFT_ETHER;
1151 sdl.sdl_nlen = 0;
1152 sdl.sdl_alen = sizeof(slow_proto_multicast);
1153 bcopy(&slow_proto_multicast, sdl.sdl_data, sizeof(slow_proto_multicast));
1154 error = if_addmulti_anon(ifb->ifb_ifp, (struct sockaddr *)&sdl, &ifma);
1155 if (error == 0) {
1156 ifb->ifb_ifma_slow_proto = ifma;
1157 }
1158 return error;
1159 }
1160
1161 static int
bond_clone_create(struct if_clone * ifc,u_int32_t unit,__unused void * params)1162 bond_clone_create(struct if_clone * ifc, u_int32_t unit, __unused void *params)
1163 {
1164 int error;
1165 ifbond_ref ifb;
1166 ifnet_t ifp;
1167 struct ifnet_init_eparams bond_init;
1168
1169 error = bond_globals_init();
1170 if (error != 0) {
1171 return error;
1172 }
1173
1174 ifb = if_clone_softc_allocate(&bond_cloner);
1175 if (ifb == NULL) {
1176 return ENOMEM;
1177 }
1178
1179 os_ref_init(&ifb->ifb_retain_count, NULL);
1180 TAILQ_INIT(&ifb->ifb_port_list);
1181 TAILQ_INIT(&ifb->ifb_lag_list);
1182 ifb->ifb_key = unit + 1;
1183
1184 /* use the interface name as the unique id for ifp recycle */
1185 if ((u_int32_t)snprintf(ifb->ifb_name, sizeof(ifb->ifb_name), "%s%d",
1186 ifc->ifc_name, unit) >= sizeof(ifb->ifb_name)) {
1187 ifbond_release(ifb);
1188 return EINVAL;
1189 }
1190
1191 bzero(&bond_init, sizeof(bond_init));
1192 bond_init.ver = IFNET_INIT_CURRENT_VERSION;
1193 bond_init.len = sizeof(bond_init);
1194 bond_init.flags = IFNET_INIT_LEGACY;
1195 bond_init.uniqueid = ifb->ifb_name;
1196 bond_init.uniqueid_len = strlen(ifb->ifb_name);
1197 bond_init.name = ifc->ifc_name;
1198 bond_init.unit = unit;
1199 bond_init.family = IFNET_FAMILY_BOND;
1200 bond_init.type = IFT_IEEE8023ADLAG;
1201 bond_init.output = bond_output;
1202 bond_init.demux = ether_demux;
1203 bond_init.add_proto = ether_add_proto;
1204 bond_init.del_proto = ether_del_proto;
1205 bond_init.check_multi = ether_check_multi;
1206 bond_init.framer_extended = ether_frameout_extended;
1207 bond_init.ioctl = bond_ioctl;
1208 bond_init.set_bpf_tap = bond_set_bpf_tap;
1209 bond_init.detach = bond_if_free;
1210 bond_init.broadcast_addr = etherbroadcastaddr;
1211 bond_init.broadcast_len = ETHER_ADDR_LEN;
1212 bond_init.softc = ifb;
1213 error = ifnet_allocate_extended(&bond_init, &ifp);
1214
1215 if (error) {
1216 ifbond_release(ifb);
1217 return error;
1218 }
1219
1220 ifb->ifb_ifp = ifp;
1221 ifnet_set_offload(ifp, 0);
1222 ifnet_set_addrlen(ifp, ETHER_ADDR_LEN); /* XXX ethernet specific */
1223 ifnet_set_flags(ifp, IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX, 0xffff);
1224 ifnet_set_mtu(ifp, ETHERMTU);
1225
1226 error = ifnet_attach(ifp, NULL);
1227 if (error != 0) {
1228 ifnet_release(ifp);
1229 ifbond_release(ifb);
1230 return error;
1231 }
1232 error = ifbond_add_slow_proto_multicast(ifb);
1233 if (error != 0) {
1234 printf("bond_clone_create(%s): "
1235 "failed to add slow_proto multicast, %d\n",
1236 ifb->ifb_name, error);
1237 }
1238
1239 /* attach as ethernet */
1240 bpfattach(ifp, DLT_EN10MB, sizeof(struct ether_header));
1241
1242 bond_lock();
1243 TAILQ_INSERT_HEAD(&g_bond->ifbond_list, ifb, ifb_bond_list);
1244 bond_unlock();
1245
1246 return 0;
1247 }
1248
1249 static void
bond_remove_all_interfaces(ifbond_ref ifb)1250 bond_remove_all_interfaces(ifbond_ref ifb)
1251 {
1252 bondport_ref p;
1253
1254 bond_assert_lock_held();
1255
1256 /*
1257 * do this in reverse order to avoid re-programming the mac address
1258 * as each head interface is removed
1259 */
1260 while ((p = TAILQ_LAST(&ifb->ifb_port_list, port_list)) != NULL) {
1261 bond_remove_interface(ifb, p->po_ifp);
1262 }
1263 return;
1264 }
1265
1266 static void
bond_remove(ifbond_ref ifb)1267 bond_remove(ifbond_ref ifb)
1268 {
1269 bond_assert_lock_held();
1270 ifbond_flags_set_if_detaching(ifb);
1271 TAILQ_REMOVE(&g_bond->ifbond_list, ifb, ifb_bond_list);
1272 bond_remove_all_interfaces(ifb);
1273 return;
1274 }
1275
1276 static void
bond_if_detach(struct ifnet * ifp)1277 bond_if_detach(struct ifnet * ifp)
1278 {
1279 int error;
1280
1281 error = ifnet_detach(ifp);
1282 if (error) {
1283 printf("bond_if_detach %s%d: ifnet_detach failed, %d\n",
1284 ifnet_name(ifp), ifnet_unit(ifp), error);
1285 }
1286
1287 return;
1288 }
1289
1290 static int
bond_clone_destroy(struct ifnet * ifp)1291 bond_clone_destroy(struct ifnet * ifp)
1292 {
1293 ifbond_ref ifb;
1294
1295 bond_lock();
1296 ifb = ifnet_softc(ifp);
1297 if (ifb == NULL || ifnet_type(ifp) != IFT_IEEE8023ADLAG) {
1298 bond_unlock();
1299 return 0;
1300 }
1301 if (ifbond_flags_if_detaching(ifb)) {
1302 bond_unlock();
1303 return 0;
1304 }
1305 bond_remove(ifb);
1306 bond_unlock();
1307 bond_if_detach(ifp);
1308 return 0;
1309 }
1310
1311 static int
bond_set_bpf_tap(struct ifnet * ifp,bpf_tap_mode mode,bpf_packet_func func)1312 bond_set_bpf_tap(struct ifnet * ifp, bpf_tap_mode mode, bpf_packet_func func)
1313 {
1314 ifbond_ref ifb;
1315
1316 bond_lock();
1317 ifb = ifnet_softc(ifp);
1318 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
1319 bond_unlock();
1320 return ENODEV;
1321 }
1322 switch (mode) {
1323 case BPF_TAP_DISABLE:
1324 ifb->ifb_bpf_input = ifb->ifb_bpf_output = NULL;
1325 break;
1326
1327 case BPF_TAP_INPUT:
1328 ifb->ifb_bpf_input = func;
1329 break;
1330
1331 case BPF_TAP_OUTPUT:
1332 ifb->ifb_bpf_output = func;
1333 break;
1334
1335 case BPF_TAP_INPUT_OUTPUT:
1336 ifb->ifb_bpf_input = ifb->ifb_bpf_output = func;
1337 break;
1338 default:
1339 break;
1340 }
1341 bond_unlock();
1342 return 0;
1343 }
1344
1345 static uint32_t
ether_header_hash(struct ether_header * eh_p)1346 ether_header_hash(struct ether_header * eh_p)
1347 {
1348 uint32_t h;
1349
1350 /* get 32-bits from destination ether and ether type */
1351 h = (*((uint16_t *)&eh_p->ether_dhost[4]) << 16)
1352 | eh_p->ether_type;
1353 h ^= *((uint32_t *)&eh_p->ether_dhost[0]);
1354 return h;
1355 }
1356
1357 static struct mbuf *
S_mbuf_skip_to_offset(struct mbuf * m,int32_t * offset)1358 S_mbuf_skip_to_offset(struct mbuf * m, int32_t * offset)
1359 {
1360 int len;
1361
1362 len = m->m_len;
1363 while (*offset >= len) {
1364 *offset -= len;
1365 m = m->m_next;
1366 if (m == NULL) {
1367 break;
1368 }
1369 len = m->m_len;
1370 }
1371 return m;
1372 }
1373
1374 #if BYTE_ORDER == BIG_ENDIAN
1375 static __inline__ uint32_t
make_uint32(u_char c0,u_char c1,u_char c2,u_char c3)1376 make_uint32(u_char c0, u_char c1, u_char c2, u_char c3)
1377 {
1378 return ((uint32_t)c0 << 24) | ((uint32_t)c1 << 16)
1379 | ((uint32_t)c2 << 8) | (uint32_t)c3;
1380 }
1381 #else /* BYTE_ORDER == LITTLE_ENDIAN */
1382 static __inline__ uint32_t
make_uint32(u_char c0,u_char c1,u_char c2,u_char c3)1383 make_uint32(u_char c0, u_char c1, u_char c2, u_char c3)
1384 {
1385 return ((uint32_t)c3 << 24) | ((uint32_t)c2 << 16)
1386 | ((uint32_t)c1 << 8) | (uint32_t)c0;
1387 }
1388 #endif /* BYTE_ORDER == LITTLE_ENDIAN */
1389
1390 static int
S_mbuf_copy_uint32(struct mbuf * m,int32_t offset,uint32_t * val)1391 S_mbuf_copy_uint32(struct mbuf * m, int32_t offset, uint32_t * val)
1392 {
1393 struct mbuf * current;
1394 u_char * current_data;
1395 struct mbuf * next;
1396 u_char * next_data;
1397 int space_current;
1398
1399 current = S_mbuf_skip_to_offset(m, &offset);
1400 if (current == NULL) {
1401 return 1;
1402 }
1403 current_data = mtod(current, u_char *) + offset;
1404 space_current = current->m_len - offset;
1405 if (space_current >= (int)sizeof(uint32_t)) {
1406 *val = *((uint32_t *)current_data);
1407 return 0;
1408 }
1409 next = current->m_next;
1410 if (next == NULL || (next->m_len + space_current) < (int)sizeof(uint32_t)) {
1411 return 1;
1412 }
1413 next_data = mtod(next, u_char *);
1414 switch (space_current) {
1415 case 1:
1416 *val = make_uint32(current_data[0], next_data[0],
1417 next_data[1], next_data[2]);
1418 break;
1419 case 2:
1420 *val = make_uint32(current_data[0], current_data[1],
1421 next_data[0], next_data[1]);
1422 break;
1423 default:
1424 *val = make_uint32(current_data[0], current_data[1],
1425 current_data[2], next_data[0]);
1426 break;
1427 }
1428 return 0;
1429 }
1430
1431 #define IP_SRC_OFFSET (offsetof(struct ip, ip_src) - offsetof(struct ip, ip_p))
1432 #define IP_DST_OFFSET (offsetof(struct ip, ip_dst) - offsetof(struct ip, ip_p))
1433
1434 static uint32_t
ip_header_hash(struct mbuf * m)1435 ip_header_hash(struct mbuf * m)
1436 {
1437 u_char * data;
1438 struct in_addr ip_dst;
1439 struct in_addr ip_src;
1440 u_char ip_p;
1441 int32_t offset;
1442 struct mbuf * orig_m = m;
1443
1444 /* find the IP protocol field relative to the start of the packet */
1445 offset = offsetof(struct ip, ip_p) + sizeof(struct ether_header);
1446 m = S_mbuf_skip_to_offset(m, &offset);
1447 if (m == NULL || m->m_len < 1) {
1448 goto bad_ip_packet;
1449 }
1450 data = mtod(m, u_char *) + offset;
1451 ip_p = *data;
1452
1453 /* find the IP src relative to the IP protocol */
1454 if ((m->m_len - offset)
1455 >= (int)(IP_SRC_OFFSET + sizeof(struct in_addr) * 2)) {
1456 /* this should be the normal case */
1457 ip_src = *(struct in_addr *)(data + IP_SRC_OFFSET);
1458 ip_dst = *(struct in_addr *)(data + IP_DST_OFFSET);
1459 } else {
1460 if (S_mbuf_copy_uint32(m, offset + IP_SRC_OFFSET,
1461 (uint32_t *)&ip_src.s_addr)) {
1462 goto bad_ip_packet;
1463 }
1464 if (S_mbuf_copy_uint32(m, offset + IP_DST_OFFSET,
1465 (uint32_t *)&ip_dst.s_addr)) {
1466 goto bad_ip_packet;
1467 }
1468 }
1469 return ntohl(ip_dst.s_addr) ^ ntohl(ip_src.s_addr) ^ ((uint32_t)ip_p);
1470
1471 bad_ip_packet:
1472 return ether_header_hash(mtod(orig_m, struct ether_header *));
1473 }
1474
1475 #define IP6_ADDRS_LEN (sizeof(struct in6_addr) * 2)
1476 static uint32_t
ipv6_header_hash(struct mbuf * m)1477 ipv6_header_hash(struct mbuf * m)
1478 {
1479 u_char * data;
1480 int i;
1481 int32_t offset;
1482 struct mbuf * orig_m = m;
1483 uint32_t * scan;
1484 uint32_t val;
1485
1486 /* find the IP protocol field relative to the start of the packet */
1487 offset = offsetof(struct ip6_hdr, ip6_src) + sizeof(struct ether_header);
1488 m = S_mbuf_skip_to_offset(m, &offset);
1489 if (m == NULL) {
1490 goto bad_ipv6_packet;
1491 }
1492 data = mtod(m, u_char *) + offset;
1493 val = 0;
1494 if ((m->m_len - offset) >= (int)IP6_ADDRS_LEN) {
1495 /* this should be the normal case */
1496 for (i = 0, scan = (uint32_t *)data;
1497 i < (int)(IP6_ADDRS_LEN / sizeof(uint32_t));
1498 i++, scan++) {
1499 val ^= *scan;
1500 }
1501 } else {
1502 for (i = 0; i < (int)(IP6_ADDRS_LEN / sizeof(uint32_t)); i++) {
1503 uint32_t tmp;
1504 if (S_mbuf_copy_uint32(m, offset + i * sizeof(uint32_t),
1505 (uint32_t *)&tmp)) {
1506 goto bad_ipv6_packet;
1507 }
1508 val ^= tmp;
1509 }
1510 }
1511 return ntohl(val);
1512
1513 bad_ipv6_packet:
1514 return ether_header_hash(mtod(orig_m, struct ether_header *));
1515 }
1516
1517 static int
bond_output(struct ifnet * ifp,struct mbuf * m)1518 bond_output(struct ifnet * ifp, struct mbuf * m)
1519 {
1520 bpf_packet_func bpf_func;
1521 uint32_t h;
1522 ifbond_ref ifb;
1523 struct ifnet * port_ifp = NULL;
1524 int err;
1525 struct flowadv adv = { .code = FADV_SUCCESS };
1526
1527 if (m == 0) {
1528 return 0;
1529 }
1530 if ((m->m_flags & M_PKTHDR) == 0) {
1531 m_freem(m);
1532 return 0;
1533 }
1534 if (m->m_pkthdr.pkt_flowid != 0) {
1535 h = m->m_pkthdr.pkt_flowid;
1536 } else {
1537 struct ether_header * eh_p;
1538
1539 eh_p = mtod(m, struct ether_header *);
1540 switch (ntohs(eh_p->ether_type)) {
1541 case ETHERTYPE_IP:
1542 h = ip_header_hash(m);
1543 break;
1544 case ETHERTYPE_IPV6:
1545 h = ipv6_header_hash(m);
1546 break;
1547 default:
1548 h = ether_header_hash(eh_p);
1549 break;
1550 }
1551 }
1552 bond_lock();
1553 ifb = ifnet_softc(ifp);
1554 if (ifb == NULL || ifbond_flags_if_detaching(ifb)
1555 || ifb->ifb_distributing_count == 0) {
1556 goto done;
1557 }
1558 h %= ifb->ifb_distributing_count;
1559 port_ifp = ifb->ifb_distributing_array[h]->po_ifp;
1560 bpf_func = ifb->ifb_bpf_output;
1561 bond_unlock();
1562
1563 if (m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) {
1564 (void)ifnet_stat_increment_out(ifp, 1,
1565 m->m_pkthdr.len + ETHER_VLAN_ENCAP_LEN,
1566 0);
1567 } else {
1568 (void)ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0);
1569 }
1570 bond_bpf_output(ifp, m, bpf_func);
1571
1572 err = dlil_output(port_ifp, PF_BOND, m, NULL, NULL, 1, &adv);
1573
1574 if (err == 0) {
1575 if (adv.code == FADV_FLOW_CONTROLLED) {
1576 err = EQFULL;
1577 } else if (adv.code == FADV_SUSPENDED) {
1578 err = EQSUSPENDED;
1579 }
1580 }
1581
1582 return err;
1583
1584 done:
1585 bond_unlock();
1586 m_freem(m);
1587 return 0;
1588 }
1589
1590 static bondport_ref
ifbond_lookup_port(ifbond_ref ifb,struct ifnet * port_ifp)1591 ifbond_lookup_port(ifbond_ref ifb, struct ifnet * port_ifp)
1592 {
1593 bondport_ref p;
1594 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
1595 if (p->po_ifp == port_ifp) {
1596 return p;
1597 }
1598 }
1599 return NULL;
1600 }
1601
1602 static bondport_ref
bond_lookup_port(struct ifnet * port_ifp)1603 bond_lookup_port(struct ifnet * port_ifp)
1604 {
1605 ifbond_ref ifb;
1606 bondport_ref port;
1607
1608 TAILQ_FOREACH(ifb, &g_bond->ifbond_list, ifb_bond_list) {
1609 port = ifbond_lookup_port(ifb, port_ifp);
1610 if (port != NULL) {
1611 return port;
1612 }
1613 }
1614 return NULL;
1615 }
1616
1617 static void
bond_receive_lacpdu(struct mbuf * m,struct ifnet * port_ifp)1618 bond_receive_lacpdu(struct mbuf * m, struct ifnet * port_ifp)
1619 {
1620 struct ifnet * bond_ifp = NULL;
1621 ifbond_ref ifb;
1622 int event_code = 0;
1623 bool need_link_update = false;
1624 bondport_ref p;
1625
1626 bond_lock();
1627 if ((ifnet_eflags(port_ifp) & IFEF_BOND) == 0) {
1628 goto done;
1629 }
1630 p = bond_lookup_port(port_ifp);
1631 if (p == NULL) {
1632 goto done;
1633 }
1634 if (p->po_enabled == 0) {
1635 goto done;
1636 }
1637 ifb = p->po_bond;
1638 if (ifb->ifb_mode != IF_BOND_MODE_LACP) {
1639 goto done;
1640 }
1641 /*
1642 * Work-around for rdar://problem/51372042
1643 * Sometimes, the link comes up but the driver doesn't report the
1644 * negotiated medium at that time. When we receive an LACPDU packet,
1645 * and the medium is unknown, force a link status check. Don't force
1646 * the link status check more often than _FORCE_LINK_EVENT_INTERVAL
1647 * seconds.
1648 */
1649 #define _FORCE_LINK_EVENT_INTERVAL 1
1650 if (media_type_unknown(&p->po_media_info)) {
1651 uint64_t now = net_uptime();
1652
1653 if ((now - p->po_force_link_event_time) >=
1654 _FORCE_LINK_EVENT_INTERVAL) {
1655 need_link_update = true;
1656 p->po_force_link_event_time = now;
1657 }
1658 }
1659 bondport_receive_lacpdu(p, (lacpdu_ref)m->m_data);
1660 if (ifbond_selection(ifb)) {
1661 event_code = (ifb->ifb_active_lag == NULL)
1662 ? KEV_DL_LINK_OFF
1663 : KEV_DL_LINK_ON;
1664 /* XXX need to take a reference on bond_ifp */
1665 bond_ifp = ifb->ifb_ifp;
1666 ifb->ifb_last_link_event = event_code;
1667 } else {
1668 event_code = (ifb->ifb_active_lag == NULL)
1669 ? KEV_DL_LINK_OFF
1670 : KEV_DL_LINK_ON;
1671 if (event_code != ifb->ifb_last_link_event) {
1672 if (if_bond_debug) {
1673 timestamp_printf("%s: (receive) generating LINK event\n",
1674 ifb->ifb_name);
1675 }
1676 bond_ifp = ifb->ifb_ifp;
1677 ifb->ifb_last_link_event = event_code;
1678 }
1679 }
1680
1681 done:
1682 bond_unlock();
1683 if (bond_ifp != NULL) {
1684 interface_link_event(bond_ifp, event_code);
1685 }
1686 m_freem(m);
1687 if (need_link_update) {
1688 if (if_bond_debug != 0) {
1689 printf("bond: simulating link status changed event");
1690 }
1691 bond_handle_event(port_ifp, KEV_DL_LINK_ON);
1692 }
1693 return;
1694 }
1695
1696 static void
bond_receive_la_marker_pdu(struct mbuf * m,struct ifnet * port_ifp)1697 bond_receive_la_marker_pdu(struct mbuf * m, struct ifnet * port_ifp)
1698 {
1699 la_marker_pdu_ref marker_p;
1700 bondport_ref p;
1701
1702 marker_p = (la_marker_pdu_ref)(m->m_data + ETHER_HDR_LEN);
1703 if (marker_p->lm_marker_tlv_type != LA_MARKER_TLV_TYPE_MARKER) {
1704 goto failed;
1705 }
1706 bond_lock();
1707 if ((ifnet_eflags(port_ifp) & IFEF_BOND) == 0) {
1708 bond_unlock();
1709 goto failed;
1710 }
1711 p = bond_lookup_port(port_ifp);
1712 if (p == NULL || p->po_enabled == 0
1713 || p->po_bond->ifb_mode != IF_BOND_MODE_LACP) {
1714 bond_unlock();
1715 goto failed;
1716 }
1717 /* echo back the same packet as a marker response */
1718 marker_p->lm_marker_tlv_type = LA_MARKER_TLV_TYPE_MARKER_RESPONSE;
1719 bondport_slow_proto_transmit(p, (packet_buffer_ref)m);
1720 bond_unlock();
1721 return;
1722
1723 failed:
1724 m_freem(m);
1725 return;
1726 }
1727
1728 static void
bond_input(ifnet_t port_ifp,mbuf_t m,char * frame_header)1729 bond_input(ifnet_t port_ifp, mbuf_t m, char *frame_header)
1730 {
1731 bpf_packet_func bpf_func;
1732 const struct ether_header * eh_p;
1733 ifbond_ref ifb;
1734 struct ifnet * ifp;
1735 bondport_ref p;
1736
1737 eh_p = (const struct ether_header *)frame_header;
1738 if ((m->m_flags & M_MCAST) != 0
1739 && bcmp(eh_p->ether_dhost, &slow_proto_multicast,
1740 sizeof(eh_p->ether_dhost)) == 0
1741 && ntohs(eh_p->ether_type) == IEEE8023AD_SLOW_PROTO_ETHERTYPE) {
1742 u_char subtype = *mtod(m, u_char *);
1743
1744 if (subtype == IEEE8023AD_SLOW_PROTO_SUBTYPE_LACP) {
1745 if (m->m_pkthdr.len < (int)offsetof(lacpdu, la_reserved)) {
1746 m_freem(m);
1747 return;
1748 }
1749 /* send to lacp */
1750 if (m->m_len < (int)offsetof(lacpdu, la_reserved)) {
1751 m = m_pullup(m, offsetof(lacpdu, la_reserved));
1752 if (m == NULL) {
1753 return;
1754 }
1755 }
1756 bond_receive_lacpdu(m, port_ifp);
1757 return;
1758 } else if (subtype == IEEE8023AD_SLOW_PROTO_SUBTYPE_LA_MARKER_PROTOCOL) {
1759 int min_size;
1760
1761 /* restore the ethernet header pointer in the mbuf */
1762 m->m_pkthdr.len += ETHER_HDR_LEN;
1763 m->m_data -= ETHER_HDR_LEN;
1764 m->m_len += ETHER_HDR_LEN;
1765 min_size = ETHER_HDR_LEN + offsetof(la_marker_pdu, lm_reserved);
1766 if (m->m_pkthdr.len < min_size) {
1767 m_freem(m);
1768 return;
1769 }
1770 /* send to lacp */
1771 if (m->m_len < min_size) {
1772 m = m_pullup(m, min_size);
1773 if (m == NULL) {
1774 return;
1775 }
1776 }
1777 /* send to marker responder */
1778 bond_receive_la_marker_pdu(m, port_ifp);
1779 return;
1780 } else if (subtype == 0
1781 || subtype > IEEE8023AD_SLOW_PROTO_SUBTYPE_RESERVED_END) {
1782 /* invalid subtype, discard the frame */
1783 m_freem(m);
1784 return;
1785 }
1786 }
1787 bond_lock();
1788 if ((ifnet_eflags(port_ifp) & IFEF_BOND) == 0) {
1789 goto done;
1790 }
1791 p = bond_lookup_port(port_ifp);
1792 if (p == NULL || bondport_collecting(p) == 0) {
1793 goto done;
1794 }
1795
1796 ifb = p->po_bond;
1797 ifp = ifb->ifb_ifp;
1798 bpf_func = ifb->ifb_bpf_input;
1799 bond_unlock();
1800
1801 /*
1802 * Need to clear the promiscous flags otherwise it will be
1803 * dropped by DLIL after processing filters
1804 */
1805 if ((mbuf_flags(m) & MBUF_PROMISC)) {
1806 mbuf_setflags_mask(m, 0, MBUF_PROMISC);
1807 }
1808
1809 if (m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) {
1810 (void)ifnet_stat_increment_in(ifp, 1,
1811 (m->m_pkthdr.len + ETHER_HDR_LEN
1812 + ETHER_VLAN_ENCAP_LEN), 0);
1813 } else {
1814 (void)ifnet_stat_increment_in(ifp, 1,
1815 (m->m_pkthdr.len + ETHER_HDR_LEN), 0);
1816 }
1817
1818 /* make the packet appear as if it arrived on the bonded interface */
1819 m->m_pkthdr.rcvif = ifp;
1820 bond_bpf_input(ifp, m, eh_p, bpf_func);
1821 m->m_pkthdr.pkt_hdr = frame_header;
1822 dlil_input_packet_list(ifp, m);
1823 return;
1824
1825 done:
1826 bond_unlock();
1827 m_freem(m);
1828 return;
1829 }
1830
1831 static errno_t
bond_iff_input(void * cookie,ifnet_t port_ifp,protocol_family_t protocol,mbuf_t * data,char ** frame_header_ptr)1832 bond_iff_input(void *cookie, ifnet_t port_ifp, protocol_family_t protocol,
1833 mbuf_t *data, char **frame_header_ptr)
1834 {
1835 #pragma unused(cookie)
1836 #pragma unused(protocol)
1837 mbuf_t m = *data;
1838 char * frame_header = *frame_header_ptr;
1839
1840 bond_input(port_ifp, m, frame_header);
1841 return EJUSTRETURN;
1842 }
1843
1844 static __inline__ const char *
bondport_get_name(bondport_ref p)1845 bondport_get_name(bondport_ref p)
1846 {
1847 return p->po_name;
1848 }
1849
1850 static __inline__ int
bondport_get_index(bondport_ref p)1851 bondport_get_index(bondport_ref p)
1852 {
1853 return ifnet_index(p->po_ifp);
1854 }
1855
1856 static void
bondport_slow_proto_transmit(bondport_ref p,packet_buffer_ref buf)1857 bondport_slow_proto_transmit(bondport_ref p, packet_buffer_ref buf)
1858 {
1859 struct ether_header * eh_p;
1860 int error;
1861
1862 /* packet_buffer_allocate leaves room for ethernet header */
1863 eh_p = mtod(buf, struct ether_header *);
1864 bcopy(&slow_proto_multicast, &eh_p->ether_dhost, sizeof(eh_p->ether_dhost));
1865 bcopy(&p->po_saved_addr, eh_p->ether_shost, sizeof(eh_p->ether_shost));
1866 eh_p->ether_type = htons(IEEE8023AD_SLOW_PROTO_ETHERTYPE);
1867 error = ifnet_output_raw(p->po_ifp, PF_BOND, buf);
1868 if (error != 0) {
1869 printf("bondport_slow_proto_transmit(%s) failed %d\n",
1870 bondport_get_name(p), error);
1871 }
1872 return;
1873 }
1874
1875 static void
bondport_timer_process_func(devtimer_ref timer,devtimer_process_func_event event)1876 bondport_timer_process_func(devtimer_ref timer,
1877 devtimer_process_func_event event)
1878 {
1879 bondport_ref p;
1880
1881 switch (event) {
1882 case devtimer_process_func_event_lock:
1883 bond_lock();
1884 devtimer_retain(timer);
1885 break;
1886 case devtimer_process_func_event_unlock:
1887 if (devtimer_valid(timer)) {
1888 /* as long as the devtimer is valid, we can look at arg0 */
1889 int event_code = 0;
1890 struct ifnet * bond_ifp = NULL;
1891
1892 p = (bondport_ref)devtimer_arg0(timer);
1893 if (ifbond_selection(p->po_bond)) {
1894 event_code = (p->po_bond->ifb_active_lag == NULL)
1895 ? KEV_DL_LINK_OFF
1896 : KEV_DL_LINK_ON;
1897 /* XXX need to take a reference on bond_ifp */
1898 bond_ifp = p->po_bond->ifb_ifp;
1899 p->po_bond->ifb_last_link_event = event_code;
1900 } else {
1901 event_code = (p->po_bond->ifb_active_lag == NULL)
1902 ? KEV_DL_LINK_OFF
1903 : KEV_DL_LINK_ON;
1904 if (event_code != p->po_bond->ifb_last_link_event) {
1905 if (if_bond_debug) {
1906 timestamp_printf("%s: (timer) generating LINK event\n",
1907 p->po_bond->ifb_name);
1908 }
1909 bond_ifp = p->po_bond->ifb_ifp;
1910 p->po_bond->ifb_last_link_event = event_code;
1911 }
1912 }
1913 devtimer_release(timer);
1914 bond_unlock();
1915 if (bond_ifp != NULL) {
1916 interface_link_event(bond_ifp, event_code);
1917 }
1918 } else {
1919 /* timer is going away */
1920 devtimer_release(timer);
1921 bond_unlock();
1922 }
1923 break;
1924 default:
1925 break;
1926 }
1927 }
1928
1929 static bondport_ref
bondport_create(struct ifnet * port_ifp,lacp_port_priority priority,int active,int short_timeout,int * ret_error)1930 bondport_create(struct ifnet * port_ifp, lacp_port_priority priority,
1931 int active, int short_timeout, int * ret_error)
1932 {
1933 int error = 0;
1934 bondport_ref p = NULL;
1935 lacp_actor_partner_state s;
1936
1937 *ret_error = 0;
1938 p = kalloc_type(struct bondport_s, Z_WAITOK | Z_ZERO | Z_NOFAIL);
1939 multicast_list_init(&p->po_multicast);
1940 if ((u_int32_t)snprintf(p->po_name, sizeof(p->po_name), "%s%d",
1941 ifnet_name(port_ifp), ifnet_unit(port_ifp))
1942 >= sizeof(p->po_name)) {
1943 printf("if_bond: name too large\n");
1944 *ret_error = EINVAL;
1945 goto failed;
1946 }
1947 error = siocgifdevmtu(port_ifp, &p->po_devmtu);
1948 if (error != 0) {
1949 printf("if_bond: SIOCGIFDEVMTU %s failed, %d\n",
1950 bondport_get_name(p), error);
1951 goto failed;
1952 }
1953 /* remember the current interface MTU so it can be restored */
1954 p->po_devmtu.ifdm_current = ifnet_mtu(port_ifp);
1955 p->po_ifp = port_ifp;
1956 p->po_media_info = interface_media_info(port_ifp);
1957 p->po_current_while_timer = devtimer_create(bondport_timer_process_func, p);
1958 if (p->po_current_while_timer == NULL) {
1959 *ret_error = ENOMEM;
1960 goto failed;
1961 }
1962 p->po_periodic_timer = devtimer_create(bondport_timer_process_func, p);
1963 if (p->po_periodic_timer == NULL) {
1964 *ret_error = ENOMEM;
1965 goto failed;
1966 }
1967 p->po_wait_while_timer = devtimer_create(bondport_timer_process_func, p);
1968 if (p->po_wait_while_timer == NULL) {
1969 *ret_error = ENOMEM;
1970 goto failed;
1971 }
1972 p->po_transmit_timer = devtimer_create(bondport_timer_process_func, p);
1973 if (p->po_transmit_timer == NULL) {
1974 *ret_error = ENOMEM;
1975 goto failed;
1976 }
1977 p->po_receive_state = ReceiveState_none;
1978 p->po_mux_state = MuxState_none;
1979 p->po_priority = priority;
1980 s = 0;
1981 s = lacp_actor_partner_state_set_aggregatable(s);
1982 if (short_timeout) {
1983 s = lacp_actor_partner_state_set_short_timeout(s);
1984 }
1985 if (active) {
1986 s = lacp_actor_partner_state_set_active_lacp(s);
1987 }
1988 p->po_actor_state = s;
1989 return p;
1990
1991 failed:
1992 bondport_free(p);
1993 return NULL;
1994 }
1995
1996 static void
bondport_start(bondport_ref p)1997 bondport_start(bondport_ref p)
1998 {
1999 bondport_receive_machine(p, LAEventStart, NULL);
2000 bondport_mux_machine(p, LAEventStart, NULL);
2001 bondport_periodic_transmit_machine(p, LAEventStart, NULL);
2002 bondport_transmit_machine(p, LAEventStart, NULL);
2003 return;
2004 }
2005
2006 /*
2007 * Function: bondport_invalidate_timers
2008 * Purpose:
2009 * Invalidate all of the timers for the bondport.
2010 */
2011 static void
bondport_invalidate_timers(bondport_ref p)2012 bondport_invalidate_timers(bondport_ref p)
2013 {
2014 devtimer_invalidate(p->po_current_while_timer);
2015 devtimer_invalidate(p->po_periodic_timer);
2016 devtimer_invalidate(p->po_wait_while_timer);
2017 devtimer_invalidate(p->po_transmit_timer);
2018 }
2019
2020 /*
2021 * Function: bondport_cancel_timers
2022 * Purpose:
2023 * Cancel all of the timers for the bondport.
2024 */
2025 static void
bondport_cancel_timers(bondport_ref p)2026 bondport_cancel_timers(bondport_ref p)
2027 {
2028 devtimer_cancel(p->po_current_while_timer);
2029 devtimer_cancel(p->po_periodic_timer);
2030 devtimer_cancel(p->po_wait_while_timer);
2031 devtimer_cancel(p->po_transmit_timer);
2032 }
2033
2034 static void
bondport_free(bondport_ref p)2035 bondport_free(bondport_ref p)
2036 {
2037 multicast_list_remove(&p->po_multicast);
2038 devtimer_release(p->po_current_while_timer);
2039 devtimer_release(p->po_periodic_timer);
2040 devtimer_release(p->po_wait_while_timer);
2041 devtimer_release(p->po_transmit_timer);
2042 kfree_type(struct bondport_s, p);
2043 return;
2044 }
2045
2046 static __inline__ int
bond_device_mtu(struct ifnet * ifp,ifbond_ref ifb)2047 bond_device_mtu(struct ifnet * ifp, ifbond_ref ifb)
2048 {
2049 return ((int)ifnet_mtu(ifp) > ifb->ifb_altmtu)
2050 ? (int)ifnet_mtu(ifp) : ifb->ifb_altmtu;
2051 }
2052
2053 static int
bond_add_interface(struct ifnet * ifp,struct ifnet * port_ifp)2054 bond_add_interface(struct ifnet * ifp, struct ifnet * port_ifp)
2055 {
2056 u_int32_t eflags;
2057 uint32_t control_flags = 0;
2058 int devmtu;
2059 int error = 0;
2060 int event_code = 0;
2061 interface_filter_t filter = NULL;
2062 int first = FALSE;
2063 ifbond_ref ifb;
2064 bondport_ref * new_array = NULL;
2065 bondport_ref * old_array = NULL;
2066 bondport_ref p;
2067
2068 if (IFNET_IS_INTCOPROC(port_ifp)) {
2069 return EINVAL;
2070 }
2071
2072 /* pre-allocate space for new port */
2073 p = bondport_create(port_ifp, 0x8000, 1, 0, &error);
2074 if (p == NULL) {
2075 return error;
2076 }
2077 bond_lock();
2078 ifb = (ifbond_ref)ifnet_softc(ifp);
2079 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
2080 bond_unlock();
2081 bondport_free(p);
2082 return ifb == NULL ? EOPNOTSUPP : EBUSY;
2083 }
2084
2085 /* make sure this interface can handle our current MTU */
2086 devmtu = bond_device_mtu(ifp, ifb);
2087 if (devmtu != 0
2088 && (devmtu > p->po_devmtu.ifdm_max || devmtu < p->po_devmtu.ifdm_min)) {
2089 bond_unlock();
2090 printf("if_bond: interface %s doesn't support mtu %d",
2091 bondport_get_name(p), devmtu);
2092 bondport_free(p);
2093 return EINVAL;
2094 }
2095
2096 /* make sure ifb doesn't get de-allocated while we wait */
2097 ifbond_retain(ifb);
2098
2099 /* wait for other add or remove to complete */
2100 ifbond_wait(ifb, __func__);
2101
2102 if (ifbond_flags_if_detaching(ifb)) {
2103 /* someone destroyed the bond while we were waiting */
2104 error = EBUSY;
2105 goto signal_done;
2106 }
2107 if (bond_lookup_port(port_ifp) != NULL) {
2108 /* port is already part of a bond */
2109 error = EBUSY;
2110 goto signal_done;
2111 }
2112 if ((ifnet_eflags(port_ifp) & (IFEF_VLAN | IFEF_BOND)) != 0) {
2113 /* interface already has VLAN's, or is part of bond */
2114 error = EBUSY;
2115 goto signal_done;
2116 }
2117
2118 /* mark the interface busy */
2119 eflags = if_set_eflags(port_ifp, IFEF_BOND);
2120 if ((eflags & IFEF_VLAN) != 0) {
2121 /* vlan got in ahead of us */
2122 if_clear_eflags(port_ifp, IFEF_BOND);
2123 error = EBUSY;
2124 goto signal_done;
2125 }
2126
2127 if (TAILQ_EMPTY(&ifb->ifb_port_list)) {
2128 ifnet_set_offload(ifp, ifnet_offload(port_ifp));
2129 ifnet_set_flags(ifp, IFF_RUNNING, IFF_RUNNING);
2130 if (ifbond_flags_lladdr(ifb) == FALSE) {
2131 first = TRUE;
2132 }
2133 } else {
2134 ifnet_offload_t ifp_offload;
2135 ifnet_offload_t port_ifp_offload;
2136
2137 ifp_offload = ifnet_offload(ifp);
2138 port_ifp_offload = ifnet_offload(port_ifp);
2139 if (ifp_offload != port_ifp_offload) {
2140 ifnet_offload_t offload;
2141
2142 offload = ifp_offload & port_ifp_offload;
2143 printf("%s(%s, %s) "
2144 "hwassist values don't match 0x%x != 0x%x, using 0x%x instead\n",
2145 __func__,
2146 ifb->ifb_name, bondport_get_name(p),
2147 ifp_offload, port_ifp_offload, offload);
2148 /*
2149 * XXX
2150 * if the bond has VLAN's, we can't simply change the hwassist
2151 * field behind its back: this needs work
2152 */
2153 ifnet_set_offload(ifp, offload);
2154 }
2155 }
2156 p->po_bond = ifb;
2157
2158 /* remember the port's ethernet address so it can be restored */
2159 ether_addr_copy(&p->po_saved_addr, IF_LLADDR(port_ifp));
2160
2161 /* add it to the list of ports */
2162 TAILQ_INSERT_TAIL(&ifb->ifb_port_list, p, po_port_list);
2163 ifb->ifb_port_count++;
2164
2165 bond_unlock();
2166
2167
2168 /* first port added to bond determines bond's ethernet address */
2169 if (first) {
2170 ifnet_set_lladdr_and_type(ifp, IF_LLADDR(port_ifp), ETHER_ADDR_LEN,
2171 IFT_ETHER);
2172 }
2173 uint32_bit_set(&control_flags, PORT_CONTROL_FLAGS_IN_LIST);
2174
2175 /* allocate a larger distributing array */
2176 new_array = (bondport_ref *)
2177 _MALLOC(sizeof(*new_array) * ifb->ifb_port_count, M_BOND, M_WAITOK);
2178 if (new_array == NULL) {
2179 error = ENOMEM;
2180 goto failed;
2181 }
2182
2183 /* attach our BOND "protocol" to the interface */
2184 error = bond_attach_protocol(port_ifp);
2185 if (error) {
2186 goto failed;
2187 }
2188 uint32_bit_set(&control_flags, PORT_CONTROL_FLAGS_PROTO_ATTACHED);
2189
2190 /* attach our BOND interface filter */
2191 error = bond_attach_filter(port_ifp, &filter);
2192 if (error != 0) {
2193 goto failed;
2194 }
2195 uint32_bit_set(&control_flags, PORT_CONTROL_FLAGS_FILTER_ATTACHED);
2196
2197 /* set the interface MTU */
2198 devmtu = bond_device_mtu(ifp, ifb);
2199 error = siocsifmtu(port_ifp, devmtu);
2200 if (error != 0) {
2201 printf("%s(%s, %s):"
2202 " SIOCSIFMTU %d failed %d\n",
2203 __func__,
2204 ifb->ifb_name, bondport_get_name(p), devmtu, error);
2205 goto failed;
2206 }
2207 uint32_bit_set(&control_flags, PORT_CONTROL_FLAGS_MTU_SET);
2208
2209 /* program the port with our multicast addresses */
2210 error = multicast_list_program(&p->po_multicast, ifp, port_ifp);
2211 if (error) {
2212 printf("%s(%s, %s): multicast_list_program failed %d\n",
2213 __func__,
2214 ifb->ifb_name, bondport_get_name(p), error);
2215 goto failed;
2216 }
2217
2218 /* mark the interface up */
2219 ifnet_set_flags(port_ifp, IFF_UP, IFF_UP);
2220
2221 error = ifnet_ioctl(port_ifp, 0, SIOCSIFFLAGS, NULL);
2222 if (error != 0) {
2223 printf("%s(%s, %s): SIOCSIFFLAGS failed %d\n",
2224 __func__,
2225 ifb->ifb_name, bondport_get_name(p), error);
2226 goto failed;
2227 }
2228
2229 /* re-program the port's ethernet address */
2230 error = if_siflladdr(port_ifp,
2231 (const struct ether_addr *)IF_LLADDR(ifp));
2232 if (error == 0) {
2233 if (memcmp(IF_LLADDR(ifp), IF_LLADDR(port_ifp), ETHER_ADDR_LEN)
2234 != 0) {
2235 /* it lied, it really doesn't support setting lladdr */
2236 error = EOPNOTSUPP;
2237 }
2238 }
2239 if (error != 0) {
2240 /* port doesn't support setting the link address */
2241 printf("%s(%s, %s): if_siflladdr failed %d\n",
2242 __func__,
2243 ifb->ifb_name, bondport_get_name(p), error);
2244 error = ifnet_set_promiscuous(port_ifp, 1);
2245 if (error != 0) {
2246 /* port doesn't support setting promiscuous mode */
2247 printf("%s(%s, %s): set promiscuous failed %d\n",
2248 __func__,
2249 ifb->ifb_name, bondport_get_name(p), error);
2250 goto failed;
2251 }
2252 uint32_bit_set(&control_flags,
2253 PORT_CONTROL_FLAGS_PROMISCUOUS_SET);
2254 } else {
2255 uint32_bit_set(&control_flags,
2256 PORT_CONTROL_FLAGS_LLADDR_SET);
2257 }
2258
2259 /* if we're in promiscuous mode, enable that as well */
2260 if (ifbond_flags_promisc(ifb)) {
2261 error = ifnet_set_promiscuous(port_ifp, 1);
2262 if (error != 0) {
2263 /* port doesn't support setting promiscuous mode */
2264 printf("%s(%s, %s): set promiscuous failed %d\n",
2265 __func__,
2266 ifb->ifb_name, bondport_get_name(p), error);
2267 goto failed;
2268 }
2269 uint32_bit_set(&control_flags,
2270 PORT_CONTROL_FLAGS_BOND_PROMISCUOUS_SET);
2271 }
2272
2273 bond_lock();
2274
2275 /* no failures past this point */
2276 p->po_enabled = 1;
2277 p->po_control_flags = control_flags;
2278
2279 /* copy the contents of the existing distributing array */
2280 if (ifb->ifb_distributing_count) {
2281 bcopy(ifb->ifb_distributing_array, new_array,
2282 sizeof(*new_array) * ifb->ifb_distributing_count);
2283 }
2284 old_array = ifb->ifb_distributing_array;
2285 ifb->ifb_distributing_array = new_array;
2286
2287 if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
2288 bondport_start(p);
2289
2290 /* check if we need to generate a link status event */
2291 if (ifbond_selection(ifb)) {
2292 event_code = (ifb->ifb_active_lag == NULL)
2293 ? KEV_DL_LINK_OFF
2294 : KEV_DL_LINK_ON;
2295 ifb->ifb_last_link_event = event_code;
2296 }
2297 } else {
2298 /* are we adding the first distributing interface? */
2299 if (media_active(&p->po_media_info)) {
2300 if (ifb->ifb_distributing_count == 0) {
2301 ifb->ifb_last_link_event = event_code = KEV_DL_LINK_ON;
2302 }
2303 bondport_enable_distributing(p);
2304 } else {
2305 bondport_disable_distributing(p);
2306 }
2307 }
2308 p->po_filter = filter;
2309
2310 /* clear the busy state, and wakeup anyone waiting */
2311 ifbond_signal(ifb, __func__);
2312 bond_unlock();
2313 if (event_code != 0) {
2314 interface_link_event(ifp, event_code);
2315 }
2316 if (old_array != NULL) {
2317 FREE(old_array, M_BOND);
2318 }
2319 return 0;
2320
2321 failed:
2322 bond_assert_lock_not_held();
2323
2324 /* if this was the first port to be added, clear our address */
2325 if (first) {
2326 ifnet_set_lladdr_and_type(ifp, NULL, 0, IFT_IEEE8023ADLAG);
2327 }
2328
2329 if (new_array != NULL) {
2330 FREE(new_array, M_BOND);
2331 }
2332 if (uint32_bit_is_set(control_flags,
2333 PORT_CONTROL_FLAGS_LLADDR_SET)) {
2334 int error1;
2335
2336 error1 = if_siflladdr(port_ifp, &p->po_saved_addr);
2337 if (error1 != 0) {
2338 printf("%s(%s, %s): if_siflladdr restore failed %d\n",
2339 __func__,
2340 ifb->ifb_name, bondport_get_name(p), error1);
2341 }
2342 }
2343 if (uint32_bit_is_set(control_flags,
2344 PORT_CONTROL_FLAGS_PROMISCUOUS_SET)) {
2345 int error1;
2346
2347 error1 = ifnet_set_promiscuous(port_ifp, 0);
2348 if (error1 != 0) {
2349 printf("%s(%s, %s): promiscous mode disable failed %d\n",
2350 __func__,
2351 ifb->ifb_name, bondport_get_name(p), error1);
2352 }
2353 }
2354 if (uint32_bit_is_set(control_flags,
2355 PORT_CONTROL_FLAGS_BOND_PROMISCUOUS_SET)) {
2356 int error1;
2357
2358 error1 = ifnet_set_promiscuous(port_ifp, 0);
2359 if (error1 != 0) {
2360 printf("%s(%s, %s): promiscous mode disable failed %d\n",
2361 __func__,
2362 ifb->ifb_name, bondport_get_name(p), error1);
2363 }
2364 }
2365 if (uint32_bit_is_set(control_flags,
2366 PORT_CONTROL_FLAGS_PROTO_ATTACHED)) {
2367 (void)bond_detach_protocol(port_ifp);
2368 }
2369 if (uint32_bit_is_set(control_flags,
2370 PORT_CONTROL_FLAGS_FILTER_ATTACHED)) {
2371 iflt_detach(filter);
2372 }
2373 if (uint32_bit_is_set(control_flags,
2374 PORT_CONTROL_FLAGS_MTU_SET)) {
2375 int error1;
2376
2377 error1 = siocsifmtu(port_ifp, p->po_devmtu.ifdm_current);
2378 if (error1 != 0) {
2379 printf("%s(%s, %s): SIOCSIFMTU %d failed %d\n",
2380 __func__,
2381 ifb->ifb_name, bondport_get_name(p),
2382 p->po_devmtu.ifdm_current, error1);
2383 }
2384 }
2385 bond_lock();
2386 if (uint32_bit_is_set(control_flags,
2387 PORT_CONTROL_FLAGS_IN_LIST)) {
2388 TAILQ_REMOVE(&ifb->ifb_port_list, p, po_port_list);
2389 ifb->ifb_port_count--;
2390 }
2391 if_clear_eflags(ifp, IFEF_BOND);
2392 if (TAILQ_EMPTY(&ifb->ifb_port_list)) {
2393 ifb->ifb_altmtu = 0;
2394 ifnet_set_mtu(ifp, ETHERMTU);
2395 ifnet_set_offload(ifp, 0);
2396 }
2397
2398 signal_done:
2399 ifbond_signal(ifb, __func__);
2400 bond_unlock();
2401 ifbond_release(ifb);
2402 bondport_free(p);
2403 return error;
2404 }
2405
2406 static int
bond_remove_interface(ifbond_ref ifb,struct ifnet * port_ifp)2407 bond_remove_interface(ifbond_ref ifb, struct ifnet * port_ifp)
2408 {
2409 int active_lag = 0;
2410 int error = 0;
2411 int event_code = 0;
2412 bondport_ref head_port;
2413 struct ifnet * ifp;
2414 interface_filter_t filter;
2415 int last = FALSE;
2416 int new_link_address = FALSE;
2417 bondport_ref p;
2418 lacp_actor_partner_state s;
2419 int was_distributing;
2420
2421 bond_assert_lock_held();
2422
2423 ifbond_retain(ifb);
2424 ifbond_wait(ifb, "bond_remove_interface");
2425
2426 p = ifbond_lookup_port(ifb, port_ifp);
2427 if (p == NULL) {
2428 error = ENXIO;
2429 /* it got removed by another thread */
2430 goto signal_done;
2431 }
2432
2433 /* de-select it and remove it from the lists */
2434 was_distributing = bondport_flags_distributing(p);
2435 bondport_disable_distributing(p);
2436 if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
2437 bondport_set_selected(p, SelectedState_UNSELECTED);
2438 active_lag = bondport_remove_from_LAG(p);
2439 /* invalidate timers here while holding the bond_lock */
2440 bondport_invalidate_timers(p);
2441
2442 /* announce that we're Individual now */
2443 s = p->po_actor_state;
2444 s = lacp_actor_partner_state_set_individual(s);
2445 s = lacp_actor_partner_state_set_not_collecting(s);
2446 s = lacp_actor_partner_state_set_not_distributing(s);
2447 s = lacp_actor_partner_state_set_out_of_sync(s);
2448 p->po_actor_state = s;
2449 bondport_flags_set_ntt(p);
2450 }
2451
2452 TAILQ_REMOVE(&ifb->ifb_port_list, p, po_port_list);
2453 ifb->ifb_port_count--;
2454
2455 ifp = ifb->ifb_ifp;
2456 head_port = TAILQ_FIRST(&ifb->ifb_port_list);
2457 if (head_port == NULL) {
2458 ifnet_set_flags(ifp, 0, IFF_RUNNING);
2459 if (ifbond_flags_lladdr(ifb) == FALSE) {
2460 last = TRUE;
2461 }
2462 ifnet_set_offload(ifp, 0);
2463 ifnet_set_mtu(ifp, ETHERMTU);
2464 ifb->ifb_altmtu = 0;
2465 } else if (ifbond_flags_lladdr(ifb) == FALSE
2466 && bcmp(&p->po_saved_addr, IF_LLADDR(ifp),
2467 ETHER_ADDR_LEN) == 0) {
2468 new_link_address = TRUE;
2469 }
2470 /* check if we need to generate a link status event */
2471 if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
2472 if (ifbond_selection(ifb) || active_lag) {
2473 event_code = (ifb->ifb_active_lag == NULL)
2474 ? KEV_DL_LINK_OFF
2475 : KEV_DL_LINK_ON;
2476 ifb->ifb_last_link_event = event_code;
2477 }
2478 bondport_transmit_machine(p, LAEventStart,
2479 TRANSMIT_MACHINE_TX_IMMEDIATE);
2480 } else {
2481 /* are we removing the last distributing interface? */
2482 if (was_distributing && ifb->ifb_distributing_count == 0) {
2483 ifb->ifb_last_link_event = event_code = KEV_DL_LINK_OFF;
2484 }
2485 }
2486 filter = p->po_filter;
2487 bond_unlock();
2488
2489 if (last) {
2490 ifnet_set_lladdr_and_type(ifp, NULL, 0, IFT_IEEE8023ADLAG);
2491 } else if (new_link_address) {
2492 struct ifnet * scan_ifp;
2493 bondport_ref scan_port;
2494
2495 /* ifbond_wait() allows port list traversal without holding the lock */
2496
2497 /* this port gave the bond its ethernet address, switch to new one */
2498 ifnet_set_lladdr_and_type(ifp,
2499 &head_port->po_saved_addr, ETHER_ADDR_LEN,
2500 IFT_ETHER);
2501
2502 /* re-program each port with the new link address */
2503 TAILQ_FOREACH(scan_port, &ifb->ifb_port_list, po_port_list) {
2504 scan_ifp = scan_port->po_ifp;
2505
2506 if (!uint32_bit_is_set(scan_port->po_control_flags,
2507 PORT_CONTROL_FLAGS_LLADDR_SET)) {
2508 /* port doesn't support setting lladdr */
2509 continue;
2510 }
2511 error = if_siflladdr(scan_ifp,
2512 (const struct ether_addr *) IF_LLADDR(ifp));
2513 if (error != 0) {
2514 printf("%s(%s, %s): "
2515 "if_siflladdr (%s) failed %d\n",
2516 __func__,
2517 ifb->ifb_name, bondport_get_name(p),
2518 bondport_get_name(scan_port), error);
2519 }
2520 }
2521 }
2522
2523 /* restore the port's ethernet address */
2524 if (uint32_bit_is_set(p->po_control_flags,
2525 PORT_CONTROL_FLAGS_LLADDR_SET)) {
2526 error = if_siflladdr(port_ifp, &p->po_saved_addr);
2527 if (error != 0) {
2528 printf("%s(%s, %s): if_siflladdr failed %d\n",
2529 __func__,
2530 ifb->ifb_name, bondport_get_name(p), error);
2531 }
2532 }
2533
2534 /* disable promiscous mode (if we enabled it) */
2535 if (uint32_bit_is_set(p->po_control_flags,
2536 PORT_CONTROL_FLAGS_PROMISCUOUS_SET)) {
2537 error = ifnet_set_promiscuous(port_ifp, 0);
2538 if (error != 0) {
2539 printf("%s(%s, %s): disable promiscuous failed %d\n",
2540 __func__,
2541 ifb->ifb_name, bondport_get_name(p), error);
2542 }
2543 }
2544
2545 /* disable promiscous mode from bond (if we enabled it) */
2546 if (uint32_bit_is_set(p->po_control_flags,
2547 PORT_CONTROL_FLAGS_BOND_PROMISCUOUS_SET)) {
2548 error = ifnet_set_promiscuous(port_ifp, 0);
2549 if (error != 0) {
2550 printf("%s(%s, %s): disable promiscuous failed %d\n",
2551 __func__,
2552 ifb->ifb_name, bondport_get_name(p), error);
2553 }
2554 }
2555
2556 /* restore the port's MTU */
2557 error = siocsifmtu(port_ifp, p->po_devmtu.ifdm_current);
2558 if (error != 0) {
2559 printf("%s(%s, %s): SIOCSIFMTU %d failed %d\n",
2560 __func__,
2561 ifb->ifb_name, bondport_get_name(p),
2562 p->po_devmtu.ifdm_current, error);
2563 }
2564
2565 /* remove the bond "protocol" */
2566 bond_detach_protocol(port_ifp);
2567
2568 /* detach the filter */
2569 if (filter != NULL) {
2570 iflt_detach(filter);
2571 }
2572
2573 /* generate link event */
2574 if (event_code != 0) {
2575 interface_link_event(ifp, event_code);
2576 }
2577
2578 bond_lock();
2579 bondport_free(p);
2580 if_clear_eflags(port_ifp, IFEF_BOND);
2581 /* release this bondport's reference to the ifbond */
2582 ifbond_release(ifb);
2583
2584 signal_done:
2585 ifbond_signal(ifb, __func__);
2586 ifbond_release(ifb);
2587 return error;
2588 }
2589
2590 static void
bond_set_lacp_mode(ifbond_ref ifb)2591 bond_set_lacp_mode(ifbond_ref ifb)
2592 {
2593 bondport_ref p;
2594
2595 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
2596 bondport_disable_distributing(p);
2597 bondport_start(p);
2598 }
2599 return;
2600 }
2601
2602 static void
bond_set_static_mode(ifbond_ref ifb)2603 bond_set_static_mode(ifbond_ref ifb)
2604 {
2605 bondport_ref p;
2606 lacp_actor_partner_state s;
2607
2608 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
2609 bondport_disable_distributing(p);
2610 bondport_set_selected(p, SelectedState_UNSELECTED);
2611 (void)bondport_remove_from_LAG(p);
2612 bondport_cancel_timers(p);
2613
2614 /* announce that we're Individual now */
2615 s = p->po_actor_state;
2616 s = lacp_actor_partner_state_set_individual(s);
2617 s = lacp_actor_partner_state_set_not_collecting(s);
2618 s = lacp_actor_partner_state_set_not_distributing(s);
2619 s = lacp_actor_partner_state_set_out_of_sync(s);
2620 p->po_actor_state = s;
2621 bondport_flags_set_ntt(p);
2622 bondport_transmit_machine(p, LAEventStart,
2623 TRANSMIT_MACHINE_TX_IMMEDIATE);
2624 /* clear state */
2625 p->po_actor_state = 0;
2626 bzero(&p->po_partner_state, sizeof(p->po_partner_state));
2627
2628 if (media_active(&p->po_media_info)) {
2629 bondport_enable_distributing(p);
2630 } else {
2631 bondport_disable_distributing(p);
2632 }
2633 }
2634 return;
2635 }
2636
2637 static int
bond_set_mode(struct ifnet * ifp,int mode)2638 bond_set_mode(struct ifnet * ifp, int mode)
2639 {
2640 int error = 0;
2641 int event_code = 0;
2642 ifbond_ref ifb;
2643
2644 bond_lock();
2645 ifb = (ifbond_ref)ifnet_softc(ifp);
2646 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
2647 bond_unlock();
2648 return (ifb == NULL) ? EOPNOTSUPP : EBUSY;
2649 }
2650 if (ifb->ifb_mode == mode) {
2651 bond_unlock();
2652 return 0;
2653 }
2654
2655 ifbond_retain(ifb);
2656 ifbond_wait(ifb, "bond_set_mode");
2657
2658 /* verify (again) that the mode is actually different */
2659 if (ifb->ifb_mode == mode) {
2660 /* nothing to do */
2661 goto signal_done;
2662 }
2663
2664 ifb->ifb_mode = mode;
2665 if (mode == IF_BOND_MODE_LACP) {
2666 bond_set_lacp_mode(ifb);
2667
2668 /* check if we need to generate a link status event */
2669 if (ifbond_selection(ifb)) {
2670 event_code = (ifb->ifb_active_lag == NULL)
2671 ? KEV_DL_LINK_OFF
2672 : KEV_DL_LINK_ON;
2673 }
2674 } else {
2675 bond_set_static_mode(ifb);
2676 event_code = (ifb->ifb_distributing_count == 0)
2677 ? KEV_DL_LINK_OFF
2678 : KEV_DL_LINK_ON;
2679 }
2680 ifb->ifb_last_link_event = event_code;
2681
2682 signal_done:
2683 ifbond_signal(ifb, __func__);
2684 bond_unlock();
2685 ifbond_release(ifb);
2686
2687 if (event_code != 0) {
2688 interface_link_event(ifp, event_code);
2689 }
2690 return error;
2691 }
2692
2693 static int
bond_get_status(ifbond_ref ifb,struct if_bond_req * ibr_p,user_addr_t datap)2694 bond_get_status(ifbond_ref ifb, struct if_bond_req * ibr_p, user_addr_t datap)
2695 {
2696 int count;
2697 user_addr_t dst;
2698 int error = 0;
2699 struct if_bond_status_req * ibsr;
2700 struct if_bond_status ibs;
2701 bondport_ref port;
2702
2703 ibsr = &(ibr_p->ibr_ibru.ibru_status);
2704 if (ibsr->ibsr_version != IF_BOND_STATUS_REQ_VERSION) {
2705 return EINVAL;
2706 }
2707 ibsr->ibsr_key = ifb->ifb_key;
2708 ibsr->ibsr_mode = ifb->ifb_mode;
2709 ibsr->ibsr_total = ifb->ifb_port_count;
2710 dst = proc_is64bit(current_proc())
2711 ? ibsr->ibsr_ibsru.ibsru_buffer64
2712 : CAST_USER_ADDR_T(ibsr->ibsr_ibsru.ibsru_buffer);
2713 if (dst == USER_ADDR_NULL) {
2714 /* just want to know how many there are */
2715 goto done;
2716 }
2717 if (ibsr->ibsr_count < 0) {
2718 return EINVAL;
2719 }
2720 count = (ifb->ifb_port_count < ibsr->ibsr_count)
2721 ? ifb->ifb_port_count : ibsr->ibsr_count;
2722 TAILQ_FOREACH(port, &ifb->ifb_port_list, po_port_list) {
2723 struct if_bond_partner_state * ibps_p;
2724 partner_state_ref ps;
2725
2726 if (count == 0) {
2727 break;
2728 }
2729 bzero(&ibs, sizeof(ibs));
2730 strlcpy(ibs.ibs_if_name, port->po_name, sizeof(ibs.ibs_if_name));
2731 ibs.ibs_port_priority = port->po_priority;
2732 if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
2733 ibs.ibs_state = port->po_actor_state;
2734 ibs.ibs_selected_state = port->po_selected;
2735 ps = &port->po_partner_state;
2736 ibps_p = &ibs.ibs_partner_state;
2737 ibps_p->ibps_system = ps->ps_lag_info.li_system;
2738 ibps_p->ibps_system_priority = ps->ps_lag_info.li_system_priority;
2739 ibps_p->ibps_key = ps->ps_lag_info.li_key;
2740 ibps_p->ibps_port = ps->ps_port;
2741 ibps_p->ibps_port_priority = ps->ps_port_priority;
2742 ibps_p->ibps_state = ps->ps_state;
2743 } else {
2744 /* fake the selected information */
2745 ibs.ibs_selected_state = bondport_flags_distributing(port)
2746 ? SelectedState_SELECTED : SelectedState_UNSELECTED;
2747 }
2748 error = copyout(&ibs, dst, sizeof(ibs));
2749 if (error != 0) {
2750 break;
2751 }
2752 dst += sizeof(ibs);
2753 count--;
2754 }
2755
2756 done:
2757 if (error == 0) {
2758 error = copyout(ibr_p, datap, sizeof(*ibr_p));
2759 } else {
2760 (void)copyout(ibr_p, datap, sizeof(*ibr_p));
2761 }
2762 return error;
2763 }
2764
2765 static int
bond_set_promisc(struct ifnet * ifp)2766 bond_set_promisc(struct ifnet * ifp)
2767 {
2768 int error = 0;
2769 ifbond_ref ifb;
2770 bool is_promisc;
2771 bondport_ref p;
2772 int val;
2773
2774 is_promisc = (ifnet_flags(ifp) & IFF_PROMISC) != 0;
2775
2776 /* determine whether promiscuous state needs to be changed */
2777 bond_lock();
2778 ifb = (ifbond_ref)ifnet_softc(ifp);
2779 if (ifb == NULL) {
2780 bond_unlock();
2781 error = EBUSY;
2782 goto done;
2783 }
2784 if (is_promisc == ifbond_flags_promisc(ifb)) {
2785 /* already in the right state */
2786 bond_unlock();
2787 goto done;
2788 }
2789 ifbond_retain(ifb);
2790 ifbond_wait(ifb, __func__);
2791 if (ifbond_flags_if_detaching(ifb)) {
2792 /* someone destroyed the bond while we were waiting */
2793 error = EBUSY;
2794 goto signal_done;
2795 }
2796 bond_unlock();
2797
2798 /* update the promiscuous state of each memeber */
2799 val = is_promisc ? 1 : 0;
2800 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
2801 struct ifnet * port_ifp = p->po_ifp;
2802 bool port_is_promisc;
2803
2804 port_is_promisc = uint32_bit_is_set(p->po_control_flags,
2805 PORT_CONTROL_FLAGS_BOND_PROMISCUOUS_SET);
2806 if (port_is_promisc == is_promisc) {
2807 /* already in the right state */
2808 continue;
2809 }
2810 error = ifnet_set_promiscuous(port_ifp, val);
2811 if (error != 0) {
2812 printf("%s: ifnet_set_promiscuous(%s, %d): failed %d",
2813 ifb->ifb_name, port_ifp->if_xname, val, error);
2814 continue;
2815 }
2816 printf("%s: ifnet_set_promiscuous(%s, %d): succeeded",
2817 ifb->ifb_name, port_ifp->if_xname, val);
2818 if (is_promisc) {
2819 /* remember that we set it */
2820 uint32_bit_set(&p->po_control_flags,
2821 PORT_CONTROL_FLAGS_BOND_PROMISCUOUS_SET);
2822 } else {
2823 uint32_bit_clear(&p->po_control_flags,
2824 PORT_CONTROL_FLAGS_BOND_PROMISCUOUS_SET);
2825 }
2826 }
2827
2828 /* assume that updating promiscuous state succeeded */
2829 error = 0;
2830 bond_lock();
2831
2832 /* update our internal state */
2833 if (is_promisc) {
2834 ifbond_flags_set_promisc(ifb);
2835 } else {
2836 ifbond_flags_clear_promisc(ifb);
2837 }
2838
2839 signal_done:
2840 ifbond_signal(ifb, __func__);
2841 bond_unlock();
2842 ifbond_release(ifb);
2843
2844 done:
2845 return error;
2846 }
2847
2848 static void
bond_get_mtu_values(ifbond_ref ifb,int * ret_min,int * ret_max)2849 bond_get_mtu_values(ifbond_ref ifb, int * ret_min, int * ret_max)
2850 {
2851 int mtu_min = 0;
2852 int mtu_max = 0;
2853 bondport_ref p;
2854
2855 if (TAILQ_FIRST(&ifb->ifb_port_list) != NULL) {
2856 mtu_min = IF_MINMTU;
2857 }
2858 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
2859 struct ifdevmtu * devmtu_p = &p->po_devmtu;
2860
2861 if (devmtu_p->ifdm_min > mtu_min) {
2862 mtu_min = devmtu_p->ifdm_min;
2863 }
2864 if (mtu_max == 0 || devmtu_p->ifdm_max < mtu_max) {
2865 mtu_max = devmtu_p->ifdm_max;
2866 }
2867 }
2868 *ret_min = mtu_min;
2869 *ret_max = mtu_max;
2870 return;
2871 }
2872
2873 static int
bond_set_mtu_on_ports(ifbond_ref ifb,int mtu)2874 bond_set_mtu_on_ports(ifbond_ref ifb, int mtu)
2875 {
2876 int error = 0;
2877 bondport_ref p;
2878
2879 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
2880 error = siocsifmtu(p->po_ifp, mtu);
2881 if (error != 0) {
2882 printf("if_bond(%s): SIOCSIFMTU %s failed, %d\n",
2883 ifb->ifb_name, bondport_get_name(p), error);
2884 break;
2885 }
2886 }
2887 return error;
2888 }
2889
2890 static int
bond_set_mtu(struct ifnet * ifp,int mtu,int isdevmtu)2891 bond_set_mtu(struct ifnet * ifp, int mtu, int isdevmtu)
2892 {
2893 int error = 0;
2894 ifbond_ref ifb;
2895 int mtu_min;
2896 int mtu_max;
2897 int new_max;
2898 int old_max;
2899
2900 bond_lock();
2901 ifb = (ifbond_ref)ifnet_softc(ifp);
2902 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
2903 error = (ifb == NULL) ? EOPNOTSUPP : EBUSY;
2904 goto done;
2905 }
2906 ifbond_retain(ifb);
2907 ifbond_wait(ifb, "bond_set_mtu");
2908
2909 /* check again */
2910 if (ifnet_softc(ifp) == NULL || ifbond_flags_if_detaching(ifb)) {
2911 error = EBUSY;
2912 goto signal_done;
2913 }
2914 bond_get_mtu_values(ifb, &mtu_min, &mtu_max);
2915 if (mtu > mtu_max) {
2916 error = EINVAL;
2917 goto signal_done;
2918 }
2919 if (mtu < mtu_min && (isdevmtu == 0 || mtu != 0)) {
2920 /* allow SIOCSIFALTMTU to set the mtu to 0 */
2921 error = EINVAL;
2922 goto signal_done;
2923 }
2924 if (isdevmtu) {
2925 new_max = (mtu > (int)ifnet_mtu(ifp)) ? mtu : (int)ifnet_mtu(ifp);
2926 } else {
2927 new_max = (mtu > ifb->ifb_altmtu) ? mtu : ifb->ifb_altmtu;
2928 }
2929 old_max = ((int)ifnet_mtu(ifp) > ifb->ifb_altmtu)
2930 ? (int)ifnet_mtu(ifp) : ifb->ifb_altmtu;
2931 if (new_max != old_max) {
2932 /* we can safely walk the list of port without the lock held */
2933 bond_unlock();
2934 error = bond_set_mtu_on_ports(ifb, new_max);
2935 if (error != 0) {
2936 /* try our best to back out of it */
2937 (void)bond_set_mtu_on_ports(ifb, old_max);
2938 }
2939 bond_lock();
2940 }
2941 if (error == 0) {
2942 if (isdevmtu) {
2943 ifb->ifb_altmtu = mtu;
2944 } else {
2945 ifnet_set_mtu(ifp, mtu);
2946 }
2947 }
2948
2949 signal_done:
2950 ifbond_signal(ifb, __func__);
2951 ifbond_release(ifb);
2952
2953 done:
2954 bond_unlock();
2955 return error;
2956 }
2957
2958 static int
bond_ioctl(struct ifnet * ifp,u_long cmd,void * data)2959 bond_ioctl(struct ifnet *ifp, u_long cmd, void * data)
2960 {
2961 int error = 0;
2962 struct if_bond_req ibr;
2963 struct ifaddr * ifa;
2964 ifbond_ref ifb;
2965 struct ifreq * ifr;
2966 struct ifmediareq *ifmr;
2967 struct ifnet * port_ifp = NULL;
2968 user_addr_t user_addr;
2969
2970 if (ifnet_type(ifp) != IFT_IEEE8023ADLAG) {
2971 return EOPNOTSUPP;
2972 }
2973 ifr = (struct ifreq *)data;
2974 ifa = (struct ifaddr *)data;
2975
2976 switch (cmd) {
2977 case SIOCSIFADDR:
2978 ifnet_set_flags(ifp, IFF_UP, IFF_UP);
2979 break;
2980
2981 case SIOCGIFMEDIA32:
2982 case SIOCGIFMEDIA64:
2983 bond_lock();
2984 ifb = (ifbond_ref)ifnet_softc(ifp);
2985 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
2986 bond_unlock();
2987 return ifb == NULL ? EOPNOTSUPP : EBUSY;
2988 }
2989 ifmr = (struct ifmediareq *)data;
2990 ifmr->ifm_current = IFM_ETHER;
2991 ifmr->ifm_mask = 0;
2992 ifmr->ifm_status = IFM_AVALID;
2993 ifmr->ifm_active = IFM_ETHER;
2994 ifmr->ifm_count = 1;
2995 if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
2996 if (ifb->ifb_active_lag != NULL) {
2997 ifmr->ifm_active = ifb->ifb_active_lag->lag_active_media;
2998 ifmr->ifm_status |= IFM_ACTIVE;
2999 }
3000 } else if (ifb->ifb_distributing_count > 0) {
3001 ifmr->ifm_active
3002 = ifb->ifb_distributing_array[0]->po_media_info.mi_active;
3003 ifmr->ifm_status |= IFM_ACTIVE;
3004 }
3005 bond_unlock();
3006 user_addr = (cmd == SIOCGIFMEDIA64) ?
3007 ((struct ifmediareq64 *)ifmr)->ifmu_ulist :
3008 CAST_USER_ADDR_T(((struct ifmediareq32 *)ifmr)->ifmu_ulist);
3009 if (user_addr != USER_ADDR_NULL) {
3010 error = copyout(&ifmr->ifm_current,
3011 user_addr,
3012 sizeof(int));
3013 }
3014 break;
3015
3016 case SIOCSIFMEDIA:
3017 /* XXX send the SIFMEDIA to all children? Or force autoselect? */
3018 error = EINVAL;
3019 break;
3020
3021 case SIOCGIFDEVMTU:
3022 bond_lock();
3023 ifb = (ifbond_ref)ifnet_softc(ifp);
3024 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
3025 bond_unlock();
3026 error = (ifb == NULL) ? EOPNOTSUPP : EBUSY;
3027 break;
3028 }
3029 ifr->ifr_devmtu.ifdm_current = bond_device_mtu(ifp, ifb);
3030 bond_get_mtu_values(ifb, &ifr->ifr_devmtu.ifdm_min,
3031 &ifr->ifr_devmtu.ifdm_max);
3032 bond_unlock();
3033 break;
3034
3035 case SIOCGIFALTMTU:
3036 bond_lock();
3037 ifb = (ifbond_ref)ifnet_softc(ifp);
3038 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
3039 bond_unlock();
3040 error = (ifb == NULL) ? EOPNOTSUPP : EBUSY;
3041 break;
3042 }
3043 ifr->ifr_mtu = ifb->ifb_altmtu;
3044 bond_unlock();
3045 break;
3046
3047 case SIOCSIFALTMTU:
3048 error = bond_set_mtu(ifp, ifr->ifr_mtu, 1);
3049 break;
3050
3051 case SIOCSIFMTU:
3052 error = bond_set_mtu(ifp, ifr->ifr_mtu, 0);
3053 break;
3054
3055 case SIOCSIFBOND:
3056 user_addr = proc_is64bit(current_proc())
3057 ? ifr->ifr_data64 : CAST_USER_ADDR_T(ifr->ifr_data);
3058 error = copyin(user_addr, &ibr, sizeof(ibr));
3059 if (error) {
3060 break;
3061 }
3062 switch (ibr.ibr_op) {
3063 case IF_BOND_OP_ADD_INTERFACE:
3064 case IF_BOND_OP_REMOVE_INTERFACE:
3065 port_ifp = ifunit(ibr.ibr_ibru.ibru_if_name);
3066 if (port_ifp == NULL) {
3067 error = ENXIO;
3068 break;
3069 }
3070 if (ifnet_type(port_ifp) != IFT_ETHER) {
3071 error = EPROTONOSUPPORT;
3072 break;
3073 }
3074 break;
3075 case IF_BOND_OP_SET_VERBOSE:
3076 case IF_BOND_OP_SET_MODE:
3077 break;
3078 default:
3079 error = EOPNOTSUPP;
3080 break;
3081 }
3082 if (error != 0) {
3083 break;
3084 }
3085 switch (ibr.ibr_op) {
3086 case IF_BOND_OP_ADD_INTERFACE:
3087 error = bond_add_interface(ifp, port_ifp);
3088 break;
3089 case IF_BOND_OP_REMOVE_INTERFACE:
3090 bond_lock();
3091 ifb = (ifbond_ref)ifnet_softc(ifp);
3092 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
3093 bond_unlock();
3094 return ifb == NULL ? EOPNOTSUPP : EBUSY;
3095 }
3096 error = bond_remove_interface(ifb, port_ifp);
3097 bond_unlock();
3098 break;
3099 case IF_BOND_OP_SET_VERBOSE:
3100 bond_lock();
3101 if_bond_debug = ibr.ibr_ibru.ibru_int_val;
3102 bond_unlock();
3103 break;
3104 case IF_BOND_OP_SET_MODE:
3105 switch (ibr.ibr_ibru.ibru_int_val) {
3106 case IF_BOND_MODE_LACP:
3107 case IF_BOND_MODE_STATIC:
3108 break;
3109 default:
3110 error = EINVAL;
3111 break;
3112 }
3113 if (error != 0) {
3114 break;
3115 }
3116 error = bond_set_mode(ifp, ibr.ibr_ibru.ibru_int_val);
3117 break;
3118 }
3119 break; /* SIOCSIFBOND */
3120
3121 case SIOCGIFBOND:
3122 user_addr = proc_is64bit(current_proc())
3123 ? ifr->ifr_data64 : CAST_USER_ADDR_T(ifr->ifr_data);
3124 error = copyin(user_addr, &ibr, sizeof(ibr));
3125 if (error) {
3126 break;
3127 }
3128 switch (ibr.ibr_op) {
3129 case IF_BOND_OP_GET_STATUS:
3130 break;
3131 default:
3132 error = EOPNOTSUPP;
3133 break;
3134 }
3135 if (error != 0) {
3136 break;
3137 }
3138 bond_lock();
3139 ifb = (ifbond_ref)ifnet_softc(ifp);
3140 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
3141 bond_unlock();
3142 return ifb == NULL ? EOPNOTSUPP : EBUSY;
3143 }
3144 switch (ibr.ibr_op) {
3145 case IF_BOND_OP_GET_STATUS:
3146 error = bond_get_status(ifb, &ibr, user_addr);
3147 break;
3148 }
3149 bond_unlock();
3150 break; /* SIOCGIFBOND */
3151
3152 case SIOCSIFLLADDR:
3153 error = EOPNOTSUPP;
3154 break;
3155
3156 case SIOCSIFFLAGS:
3157 /* enable promiscuous mode on members */
3158 error = bond_set_promisc(ifp);
3159 break;
3160
3161 case SIOCADDMULTI:
3162 case SIOCDELMULTI:
3163 error = bond_setmulti(ifp);
3164 break;
3165 default:
3166 error = EOPNOTSUPP;
3167 }
3168 return error;
3169 }
3170
3171 static void
bond_if_free(struct ifnet * ifp)3172 bond_if_free(struct ifnet * ifp)
3173 {
3174 ifbond_ref ifb;
3175
3176 if (ifp == NULL) {
3177 return;
3178 }
3179 bond_lock();
3180 ifb = (ifbond_ref)ifnet_softc(ifp);
3181 if (ifb == NULL) {
3182 bond_unlock();
3183 return;
3184 }
3185 ifbond_release(ifb);
3186 bond_unlock();
3187 ifnet_release(ifp);
3188 return;
3189 }
3190
3191 static void
bond_handle_event(struct ifnet * port_ifp,int event_code)3192 bond_handle_event(struct ifnet * port_ifp, int event_code)
3193 {
3194 struct ifnet * bond_ifp = NULL;
3195 ifbond_ref ifb;
3196 int old_distributing_count;
3197 bondport_ref p;
3198 struct media_info media_info = { .mi_active = 0, .mi_status = 0 };
3199
3200 switch (event_code) {
3201 case KEV_DL_IF_DETACHED:
3202 case KEV_DL_IF_DETACHING:
3203 break;
3204 case KEV_DL_LINK_OFF:
3205 case KEV_DL_LINK_ON:
3206 media_info = interface_media_info(port_ifp);
3207 break;
3208 default:
3209 return;
3210 }
3211 bond_lock();
3212 p = bond_lookup_port(port_ifp);
3213 if (p == NULL) {
3214 bond_unlock();
3215 return;
3216 }
3217 ifb = p->po_bond;
3218 old_distributing_count = ifb->ifb_distributing_count;
3219 switch (event_code) {
3220 case KEV_DL_IF_DETACHED:
3221 case KEV_DL_IF_DETACHING:
3222 bond_remove_interface(ifb, p->po_ifp);
3223 break;
3224 case KEV_DL_LINK_OFF:
3225 case KEV_DL_LINK_ON:
3226 p->po_media_info = media_info;
3227 if (p->po_enabled) {
3228 bondport_link_status_changed(p);
3229 }
3230 break;
3231 }
3232 /* generate a link-event */
3233 if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
3234 if (ifbond_selection(ifb)) {
3235 event_code = (ifb->ifb_active_lag == NULL)
3236 ? KEV_DL_LINK_OFF
3237 : KEV_DL_LINK_ON;
3238 /* XXX need to take a reference on bond_ifp */
3239 bond_ifp = ifb->ifb_ifp;
3240 ifb->ifb_last_link_event = event_code;
3241 } else {
3242 event_code = (ifb->ifb_active_lag == NULL)
3243 ? KEV_DL_LINK_OFF
3244 : KEV_DL_LINK_ON;
3245 if (event_code != ifb->ifb_last_link_event) {
3246 if (if_bond_debug) {
3247 timestamp_printf("%s: (event) generating LINK event\n",
3248 ifb->ifb_name);
3249 }
3250 bond_ifp = ifb->ifb_ifp;
3251 ifb->ifb_last_link_event = event_code;
3252 }
3253 }
3254 } else {
3255 /*
3256 * if the distributing array membership changed from 0 <-> !0
3257 * generate a link event
3258 */
3259 if (old_distributing_count == 0
3260 && ifb->ifb_distributing_count != 0) {
3261 event_code = KEV_DL_LINK_ON;
3262 } else if (old_distributing_count != 0
3263 && ifb->ifb_distributing_count == 0) {
3264 event_code = KEV_DL_LINK_OFF;
3265 }
3266 if (event_code != 0 && event_code != ifb->ifb_last_link_event) {
3267 bond_ifp = ifb->ifb_ifp;
3268 ifb->ifb_last_link_event = event_code;
3269 }
3270 }
3271
3272 bond_unlock();
3273 if (bond_ifp != NULL) {
3274 interface_link_event(bond_ifp, event_code);
3275 }
3276 return;
3277 }
3278
3279 static void
bond_iff_event(__unused void * cookie,ifnet_t port_ifp,__unused protocol_family_t protocol,const struct kev_msg * event)3280 bond_iff_event(__unused void *cookie, ifnet_t port_ifp,
3281 __unused protocol_family_t protocol,
3282 const struct kev_msg *event)
3283 {
3284 int event_code;
3285
3286 if (event->vendor_code != KEV_VENDOR_APPLE
3287 || event->kev_class != KEV_NETWORK_CLASS
3288 || event->kev_subclass != KEV_DL_SUBCLASS) {
3289 return;
3290 }
3291 event_code = event->event_code;
3292 switch (event_code) {
3293 case KEV_DL_LINK_OFF:
3294 case KEV_DL_LINK_ON:
3295 case KEV_DL_IF_DETACHING:
3296 case KEV_DL_IF_DETACHED:
3297 bond_handle_event(port_ifp, event_code);
3298 break;
3299 default:
3300 break;
3301 }
3302 return;
3303 }
3304
3305 static void
bond_iff_detached(__unused void * cookie,ifnet_t port_ifp)3306 bond_iff_detached(__unused void *cookie, ifnet_t port_ifp)
3307 {
3308 bond_handle_event(port_ifp, KEV_DL_IF_DETACHED);
3309 return;
3310 }
3311
3312 static void
interface_link_event(struct ifnet * ifp,u_int32_t event_code)3313 interface_link_event(struct ifnet * ifp, u_int32_t event_code)
3314 {
3315 struct event {
3316 u_int32_t ifnet_family;
3317 u_int32_t unit;
3318 char if_name[IFNAMSIZ];
3319 };
3320 _Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
3321 struct kern_event_msg *header = (struct kern_event_msg*)message;
3322 struct event *data = (struct event *)(header + 1);
3323
3324 header->total_size = sizeof(message);
3325 header->vendor_code = KEV_VENDOR_APPLE;
3326 header->kev_class = KEV_NETWORK_CLASS;
3327 header->kev_subclass = KEV_DL_SUBCLASS;
3328 header->event_code = event_code;
3329 data->ifnet_family = ifnet_family(ifp);
3330 data->unit = (u_int32_t)ifnet_unit(ifp);
3331 strlcpy(data->if_name, ifnet_name(ifp), IFNAMSIZ);
3332 ifnet_event(ifp, header);
3333 }
3334
3335 static errno_t
bond_proto_input(ifnet_t ifp,protocol_family_t protocol,mbuf_t packet,char * header)3336 bond_proto_input(ifnet_t ifp, protocol_family_t protocol, mbuf_t packet,
3337 char *header)
3338 {
3339 #pragma unused(protocol, packet, header)
3340 if (if_bond_debug != 0) {
3341 printf("%s: unexpected packet from %s\n", __func__,
3342 ifp->if_xname);
3343 }
3344 return 0;
3345 }
3346
3347
3348 /*
3349 * Function: bond_attach_protocol
3350 * Purpose:
3351 * Attach a DLIL protocol to the interface.
3352 *
3353 * The ethernet demux special cases to always return PF_BOND if the
3354 * interface is bonded. That means we receive all traffic from that
3355 * interface without passing any of the traffic to any other attached
3356 * protocol.
3357 */
3358 static int
bond_attach_protocol(struct ifnet * ifp)3359 bond_attach_protocol(struct ifnet *ifp)
3360 {
3361 int error;
3362 struct ifnet_attach_proto_param reg;
3363
3364 bzero(®, sizeof(reg));
3365 reg.input = bond_proto_input;
3366
3367 error = ifnet_attach_protocol(ifp, PF_BOND, ®);
3368 if (error) {
3369 printf("bond over %s%d: ifnet_attach_protocol failed, %d\n",
3370 ifnet_name(ifp), ifnet_unit(ifp), error);
3371 }
3372 return error;
3373 }
3374
3375 /*
3376 * Function: bond_detach_protocol
3377 * Purpose:
3378 * Detach our DLIL protocol from an interface
3379 */
3380 static int
bond_detach_protocol(struct ifnet * ifp)3381 bond_detach_protocol(struct ifnet *ifp)
3382 {
3383 int error;
3384
3385 error = ifnet_detach_protocol(ifp, PF_BOND);
3386 if (error) {
3387 printf("bond over %s%d: ifnet_detach_protocol failed, %d\n",
3388 ifnet_name(ifp), ifnet_unit(ifp), error);
3389 }
3390 return error;
3391 }
3392
3393 /*
3394 * Function: bond_attach_filter
3395 * Purpose:
3396 * Attach our DLIL interface filter.
3397 */
3398 static int
bond_attach_filter(struct ifnet * ifp,interface_filter_t * filter_p)3399 bond_attach_filter(struct ifnet *ifp, interface_filter_t * filter_p)
3400 {
3401 int error;
3402 struct iff_filter iff;
3403
3404 /*
3405 * install an interface filter
3406 */
3407 memset(&iff, 0, sizeof(struct iff_filter));
3408 iff.iff_name = "com.apple.kernel.bsd.net.if_bond";
3409 iff.iff_input = bond_iff_input;
3410 iff.iff_event = bond_iff_event;
3411 iff.iff_detached = bond_iff_detached;
3412 error = dlil_attach_filter(ifp, &iff, filter_p,
3413 DLIL_IFF_TSO | DLIL_IFF_INTERNAL);
3414 if (error != 0) {
3415 printf("%s: dlil_attach_filter failed %d\n", __func__, error);
3416 }
3417 return error;
3418 }
3419
3420
3421 /*
3422 * DLIL interface family functions
3423 */
3424 extern int ether_attach_inet(ifnet_t ifp, protocol_family_t protocol_family);
3425 extern void ether_detach_inet(ifnet_t ifp, protocol_family_t protocol_family);
3426 extern int ether_attach_inet6(ifnet_t ifp, protocol_family_t protocol_family);
3427 extern void ether_detach_inet6(ifnet_t ifp, protocol_family_t protocol_family);
3428 extern int ether_attach_at(ifnet_t ifp, protocol_family_t protocol_family);
3429 extern void ether_detach_at(ifnet_t ifp, protocol_family_t protocol_family);
3430
3431 __private_extern__ int
bond_family_init(void)3432 bond_family_init(void)
3433 {
3434 int error = 0;
3435
3436 error = proto_register_plumber(PF_INET, APPLE_IF_FAM_BOND,
3437 ether_attach_inet,
3438 ether_detach_inet);
3439 if (error != 0) {
3440 printf("bond: proto_register_plumber failed for AF_INET error=%d\n",
3441 error);
3442 goto done;
3443 }
3444 error = proto_register_plumber(PF_INET6, APPLE_IF_FAM_BOND,
3445 ether_attach_inet6,
3446 ether_detach_inet6);
3447 if (error != 0) {
3448 printf("bond: proto_register_plumber failed for AF_INET6 error=%d\n",
3449 error);
3450 goto done;
3451 }
3452 error = bond_clone_attach();
3453 if (error != 0) {
3454 printf("bond: proto_register_plumber failed bond_clone_attach error=%d\n",
3455 error);
3456 goto done;
3457 }
3458
3459 done:
3460 return error;
3461 }
3462 /**
3463 **
3464 ** LACP routines:
3465 **
3466 **/
3467
3468 /**
3469 ** LACP ifbond_list routines
3470 **/
3471 static bondport_ref
ifbond_list_find_moved_port(bondport_ref rx_port,const lacp_actor_partner_tlv_ref atlv)3472 ifbond_list_find_moved_port(bondport_ref rx_port,
3473 const lacp_actor_partner_tlv_ref atlv)
3474 {
3475 ifbond_ref bond;
3476 bondport_ref p;
3477 partner_state_ref ps;
3478 LAG_info_ref ps_li;
3479
3480 TAILQ_FOREACH(bond, &g_bond->ifbond_list, ifb_bond_list) {
3481 TAILQ_FOREACH(p, &bond->ifb_port_list, po_port_list) {
3482 if (rx_port == p) {
3483 /* no point in comparing against ourselves */
3484 continue;
3485 }
3486 if (p->po_receive_state != ReceiveState_PORT_DISABLED) {
3487 /* it's not clear that we should be checking this */
3488 continue;
3489 }
3490 ps = &p->po_partner_state;
3491 if (lacp_actor_partner_state_defaulted(ps->ps_state)) {
3492 continue;
3493 }
3494 ps_li = &ps->ps_lag_info;
3495 if (ps->ps_port == lacp_actor_partner_tlv_get_port(atlv)
3496 && bcmp(&ps_li->li_system, atlv->lap_system,
3497 sizeof(ps_li->li_system)) == 0) {
3498 if (if_bond_debug) {
3499 timestamp_printf("System " EA_FORMAT
3500 " Port 0x%x moved from %s to %s\n",
3501 EA_LIST(&ps_li->li_system), ps->ps_port,
3502 bondport_get_name(p),
3503 bondport_get_name(rx_port));
3504 }
3505 return p;
3506 }
3507 }
3508 }
3509 return NULL;
3510 }
3511
3512 /**
3513 ** LACP ifbond, LAG routines
3514 **/
3515
3516 static int
ifbond_selection(ifbond_ref bond)3517 ifbond_selection(ifbond_ref bond)
3518 {
3519 int all_ports_ready = 0;
3520 int active_media = 0;
3521 LAG_ref lag = NULL;
3522 int lag_changed = 0;
3523 bondport_ref p;
3524 int port_speed = 0;
3525
3526 lag = ifbond_find_best_LAG(bond, &active_media);
3527 if (lag != bond->ifb_active_lag) {
3528 if (bond->ifb_active_lag != NULL) {
3529 ifbond_deactivate_LAG(bond, bond->ifb_active_lag);
3530 bond->ifb_active_lag = NULL;
3531 }
3532 bond->ifb_active_lag = lag;
3533 if (lag != NULL) {
3534 ifbond_activate_LAG(bond, lag, active_media);
3535 }
3536 lag_changed = 1;
3537 } else if (lag != NULL) {
3538 if (lag->lag_active_media != active_media) {
3539 if (if_bond_debug) {
3540 timestamp_printf("LAG PORT SPEED CHANGED from %d to %d\n",
3541 link_speed(lag->lag_active_media),
3542 link_speed(active_media));
3543 }
3544 ifbond_deactivate_LAG(bond, lag);
3545 ifbond_activate_LAG(bond, lag, active_media);
3546 lag_changed = 1;
3547 }
3548 }
3549 if (lag != NULL) {
3550 port_speed = link_speed(active_media);
3551 all_ports_ready = ifbond_all_ports_ready(bond);
3552 }
3553 TAILQ_FOREACH(p, &bond->ifb_port_list, po_port_list) {
3554 if (lag != NULL && p->po_lag == lag
3555 && media_speed(&p->po_media_info) == port_speed
3556 && (p->po_mux_state == MuxState_DETACHED
3557 || p->po_selected == SelectedState_SELECTED
3558 || p->po_selected == SelectedState_STANDBY)
3559 && bondport_aggregatable(p)) {
3560 if (bond->ifb_max_active > 0) {
3561 if (lag->lag_selected_port_count < bond->ifb_max_active) {
3562 if (p->po_selected == SelectedState_STANDBY
3563 || p->po_selected == SelectedState_UNSELECTED) {
3564 bondport_set_selected(p, SelectedState_SELECTED);
3565 }
3566 } else if (p->po_selected == SelectedState_UNSELECTED) {
3567 bondport_set_selected(p, SelectedState_STANDBY);
3568 }
3569 } else {
3570 bondport_set_selected(p, SelectedState_SELECTED);
3571 }
3572 }
3573 if (bondport_flags_selected_changed(p)) {
3574 bondport_flags_clear_selected_changed(p);
3575 bondport_mux_machine(p, LAEventSelectedChange, NULL);
3576 }
3577 if (all_ports_ready
3578 && bondport_flags_ready(p)
3579 && p->po_mux_state == MuxState_WAITING) {
3580 bondport_mux_machine(p, LAEventReady, NULL);
3581 }
3582 bondport_transmit_machine(p, LAEventStart, NULL);
3583 }
3584 return lag_changed;
3585 }
3586
3587 static LAG_ref
ifbond_find_best_LAG(ifbond_ref bond,int * active_media)3588 ifbond_find_best_LAG(ifbond_ref bond, int * active_media)
3589 {
3590 int best_active = 0;
3591 LAG_ref best_lag = NULL;
3592 int best_count = 0;
3593 int best_speed = 0;
3594 LAG_ref lag;
3595
3596 if (bond->ifb_active_lag != NULL) {
3597 best_lag = bond->ifb_active_lag;
3598 best_count = LAG_get_aggregatable_port_count(best_lag, &best_active);
3599 if (bond->ifb_max_active > 0
3600 && best_count > bond->ifb_max_active) {
3601 best_count = bond->ifb_max_active;
3602 }
3603 best_speed = link_speed(best_active);
3604 }
3605 TAILQ_FOREACH(lag, &bond->ifb_lag_list, lag_list) {
3606 int active;
3607 int count;
3608 int speed;
3609
3610 if (lag == bond->ifb_active_lag) {
3611 /* we've already computed it */
3612 continue;
3613 }
3614 count = LAG_get_aggregatable_port_count(lag, &active);
3615 if (count == 0) {
3616 continue;
3617 }
3618 if (bond->ifb_max_active > 0
3619 && count > bond->ifb_max_active) {
3620 /* if there's a limit, don't count extra links */
3621 count = bond->ifb_max_active;
3622 }
3623 speed = link_speed(active);
3624 if ((count * speed) > (best_count * best_speed)) {
3625 best_count = count;
3626 best_speed = speed;
3627 best_active = active;
3628 best_lag = lag;
3629 }
3630 }
3631 if (best_count == 0) {
3632 return NULL;
3633 }
3634 *active_media = best_active;
3635 return best_lag;
3636 }
3637
3638 static void
ifbond_deactivate_LAG(__unused ifbond_ref bond,LAG_ref lag)3639 ifbond_deactivate_LAG(__unused ifbond_ref bond, LAG_ref lag)
3640 {
3641 bondport_ref p;
3642
3643 TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) {
3644 bondport_set_selected(p, SelectedState_UNSELECTED);
3645 }
3646 return;
3647 }
3648
3649 static void
ifbond_activate_LAG(ifbond_ref bond,LAG_ref lag,int active_media)3650 ifbond_activate_LAG(ifbond_ref bond, LAG_ref lag, int active_media)
3651 {
3652 int need = 0;
3653 bondport_ref p;
3654
3655 if (bond->ifb_max_active > 0) {
3656 need = bond->ifb_max_active;
3657 }
3658 lag->lag_active_media = active_media;
3659 TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) {
3660 if (bondport_aggregatable(p) == 0) {
3661 bondport_set_selected(p, SelectedState_UNSELECTED);
3662 } else if (media_speed(&p->po_media_info) != link_speed(active_media)) {
3663 bondport_set_selected(p, SelectedState_UNSELECTED);
3664 } else if (p->po_mux_state == MuxState_DETACHED) {
3665 if (bond->ifb_max_active > 0) {
3666 if (need > 0) {
3667 bondport_set_selected(p, SelectedState_SELECTED);
3668 need--;
3669 } else {
3670 bondport_set_selected(p, SelectedState_STANDBY);
3671 }
3672 } else {
3673 bondport_set_selected(p, SelectedState_SELECTED);
3674 }
3675 } else {
3676 bondport_set_selected(p, SelectedState_UNSELECTED);
3677 }
3678 }
3679 return;
3680 }
3681
3682 #if 0
3683 static void
3684 ifbond_set_max_active(ifbond_ref bond, int max_active)
3685 {
3686 LAG_ref lag = bond->ifb_active_lag;
3687
3688 bond->ifb_max_active = max_active;
3689 if (bond->ifb_max_active <= 0 || lag == NULL) {
3690 return;
3691 }
3692 if (lag->lag_selected_port_count > bond->ifb_max_active) {
3693 bondport_ref p;
3694 int remove_count;
3695
3696 remove_count = lag->lag_selected_port_count - bond->ifb_max_active;
3697 TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) {
3698 if (p->po_selected == SelectedState_SELECTED) {
3699 bondport_set_selected(p, SelectedState_UNSELECTED);
3700 remove_count--;
3701 if (remove_count == 0) {
3702 break;
3703 }
3704 }
3705 }
3706 }
3707 return;
3708 }
3709 #endif
3710
3711 static int
ifbond_all_ports_ready(ifbond_ref bond)3712 ifbond_all_ports_ready(ifbond_ref bond)
3713 {
3714 int ready = 0;
3715 bondport_ref p;
3716
3717 if (bond->ifb_active_lag == NULL) {
3718 return 0;
3719 }
3720 TAILQ_FOREACH(p, &bond->ifb_active_lag->lag_port_list, po_lag_port_list) {
3721 if (p->po_mux_state == MuxState_WAITING
3722 && p->po_selected == SelectedState_SELECTED) {
3723 if (bondport_flags_ready(p) == 0) {
3724 return 0;
3725 }
3726 }
3727 /* note that there was at least one ready port */
3728 ready = 1;
3729 }
3730 return ready;
3731 }
3732
3733 static int
ifbond_all_ports_attached(ifbond_ref bond,bondport_ref this_port)3734 ifbond_all_ports_attached(ifbond_ref bond, bondport_ref this_port)
3735 {
3736 bondport_ref p;
3737
3738 TAILQ_FOREACH(p, &bond->ifb_port_list, po_port_list) {
3739 if (this_port == p) {
3740 continue;
3741 }
3742 if (bondport_flags_mux_attached(p) == 0) {
3743 return 0;
3744 }
3745 }
3746 return 1;
3747 }
3748
3749 static LAG_ref
ifbond_get_LAG_matching_port(ifbond_ref bond,bondport_ref p)3750 ifbond_get_LAG_matching_port(ifbond_ref bond, bondport_ref p)
3751 {
3752 LAG_ref lag;
3753
3754 TAILQ_FOREACH(lag, &bond->ifb_lag_list, lag_list) {
3755 if (bcmp(&lag->lag_info, &p->po_partner_state.ps_lag_info,
3756 sizeof(lag->lag_info)) == 0) {
3757 return lag;
3758 }
3759 }
3760 return NULL;
3761 }
3762
3763 static int
LAG_get_aggregatable_port_count(LAG_ref lag,int * active_media)3764 LAG_get_aggregatable_port_count(LAG_ref lag, int * active_media)
3765 {
3766 int active;
3767 int count;
3768 bondport_ref p;
3769 int speed;
3770
3771 active = 0;
3772 count = 0;
3773 speed = 0;
3774 TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) {
3775 if (bondport_aggregatable(p)) {
3776 int this_speed;
3777
3778 this_speed = media_speed(&p->po_media_info);
3779 if (this_speed == 0) {
3780 continue;
3781 }
3782 if (this_speed > speed) {
3783 active = p->po_media_info.mi_active;
3784 speed = this_speed;
3785 count = 1;
3786 } else if (this_speed == speed) {
3787 count++;
3788 }
3789 }
3790 }
3791 *active_media = active;
3792 return count;
3793 }
3794
3795
3796 /**
3797 ** LACP bondport routines
3798 **/
3799 static void
bondport_link_status_changed(bondport_ref p)3800 bondport_link_status_changed(bondport_ref p)
3801 {
3802 ifbond_ref bond = p->po_bond;
3803
3804 if (if_bond_debug) {
3805 if (media_active(&p->po_media_info)) {
3806 const char * duplex_string;
3807
3808 if (media_full_duplex(&p->po_media_info)) {
3809 duplex_string = "full";
3810 } else if (media_type_unknown(&p->po_media_info)) {
3811 duplex_string = "unknown";
3812 } else {
3813 duplex_string = "half";
3814 }
3815 timestamp_printf("[%s] Link UP %d Mbit/s %s duplex\n",
3816 bondport_get_name(p),
3817 media_speed(&p->po_media_info),
3818 duplex_string);
3819 } else {
3820 timestamp_printf("[%s] Link DOWN\n",
3821 bondport_get_name(p));
3822 }
3823 }
3824 if (bond->ifb_mode == IF_BOND_MODE_LACP) {
3825 if (media_active(&p->po_media_info)
3826 && bond->ifb_active_lag != NULL
3827 && p->po_lag == bond->ifb_active_lag
3828 && p->po_selected != SelectedState_UNSELECTED) {
3829 if (media_speed(&p->po_media_info) != p->po_lag->lag_active_media) {
3830 if (if_bond_debug) {
3831 timestamp_printf("[%s] Port speed %d differs from LAG %d\n",
3832 bondport_get_name(p),
3833 media_speed(&p->po_media_info),
3834 link_speed(p->po_lag->lag_active_media));
3835 }
3836 bondport_set_selected(p, SelectedState_UNSELECTED);
3837 }
3838 }
3839 bondport_receive_machine(p, LAEventMediaChange, NULL);
3840 bondport_mux_machine(p, LAEventMediaChange, NULL);
3841 bondport_periodic_transmit_machine(p, LAEventMediaChange, NULL);
3842 } else {
3843 if (media_active(&p->po_media_info)) {
3844 bondport_enable_distributing(p);
3845 } else {
3846 bondport_disable_distributing(p);
3847 }
3848 }
3849 return;
3850 }
3851
3852 static int
bondport_aggregatable(bondport_ref p)3853 bondport_aggregatable(bondport_ref p)
3854 {
3855 partner_state_ref ps = &p->po_partner_state;
3856
3857 if (lacp_actor_partner_state_aggregatable(p->po_actor_state) == 0
3858 || lacp_actor_partner_state_aggregatable(ps->ps_state) == 0) {
3859 /* we and/or our partner are individual */
3860 return 0;
3861 }
3862 if (p->po_lag == NULL) {
3863 return 0;
3864 }
3865 switch (p->po_receive_state) {
3866 default:
3867 if (if_bond_debug) {
3868 timestamp_printf("[%s] Port is not selectable\n",
3869 bondport_get_name(p));
3870 }
3871 return 0;
3872 case ReceiveState_CURRENT:
3873 case ReceiveState_EXPIRED:
3874 break;
3875 }
3876 return 1;
3877 }
3878
3879 static int
bondport_matches_LAG(bondport_ref p,LAG_ref lag)3880 bondport_matches_LAG(bondport_ref p, LAG_ref lag)
3881 {
3882 LAG_info_ref lag_li;
3883 partner_state_ref ps;
3884 LAG_info_ref ps_li;
3885
3886 ps = &p->po_partner_state;
3887 ps_li = &ps->ps_lag_info;
3888 lag_li = &lag->lag_info;
3889 if (ps_li->li_system_priority == lag_li->li_system_priority
3890 && ps_li->li_key == lag_li->li_key
3891 && (bcmp(&ps_li->li_system, &lag_li->li_system,
3892 sizeof(lag_li->li_system))
3893 == 0)) {
3894 return 1;
3895 }
3896 return 0;
3897 }
3898
3899 static int
bondport_remove_from_LAG(bondport_ref p)3900 bondport_remove_from_LAG(bondport_ref p)
3901 {
3902 int active_lag = 0;
3903 ifbond_ref bond = p->po_bond;
3904 LAG_ref lag = p->po_lag;
3905
3906 if (lag == NULL) {
3907 return 0;
3908 }
3909 TAILQ_REMOVE(&lag->lag_port_list, p, po_lag_port_list);
3910 if (if_bond_debug) {
3911 timestamp_printf("[%s] Removed from LAG (0x%04x," EA_FORMAT
3912 ",0x%04x)\n",
3913 bondport_get_name(p),
3914 lag->lag_info.li_system_priority,
3915 EA_LIST(&lag->lag_info.li_system),
3916 lag->lag_info.li_key);
3917 }
3918 p->po_lag = NULL;
3919 lag->lag_port_count--;
3920 if (lag->lag_port_count > 0) {
3921 return bond->ifb_active_lag == lag;
3922 }
3923 if (if_bond_debug) {
3924 timestamp_printf("Key 0x%04x: LAG Released (%04x," EA_FORMAT
3925 ",0x%04x)\n",
3926 bond->ifb_key,
3927 lag->lag_info.li_system_priority,
3928 EA_LIST(&lag->lag_info.li_system),
3929 lag->lag_info.li_key);
3930 }
3931 TAILQ_REMOVE(&bond->ifb_lag_list, lag, lag_list);
3932 if (bond->ifb_active_lag == lag) {
3933 bond->ifb_active_lag = NULL;
3934 active_lag = 1;
3935 }
3936 kfree_type(struct LAG_s, lag);
3937 return active_lag;
3938 }
3939
3940 static void
bondport_add_to_LAG(bondport_ref p,LAG_ref lag)3941 bondport_add_to_LAG(bondport_ref p, LAG_ref lag)
3942 {
3943 TAILQ_INSERT_TAIL(&lag->lag_port_list, p, po_lag_port_list);
3944 p->po_lag = lag;
3945 lag->lag_port_count++;
3946 if (if_bond_debug) {
3947 timestamp_printf("[%s] Added to LAG (0x%04x," EA_FORMAT "0x%04x)\n",
3948 bondport_get_name(p),
3949 lag->lag_info.li_system_priority,
3950 EA_LIST(&lag->lag_info.li_system),
3951 lag->lag_info.li_key);
3952 }
3953 return;
3954 }
3955
3956 static void
bondport_assign_to_LAG(bondport_ref p)3957 bondport_assign_to_LAG(bondport_ref p)
3958 {
3959 ifbond_ref bond = p->po_bond;
3960 LAG_ref lag;
3961
3962 if (lacp_actor_partner_state_defaulted(p->po_actor_state)) {
3963 bondport_remove_from_LAG(p);
3964 return;
3965 }
3966 lag = p->po_lag;
3967 if (lag != NULL) {
3968 if (bondport_matches_LAG(p, lag)) {
3969 /* still OK */
3970 return;
3971 }
3972 bondport_remove_from_LAG(p);
3973 }
3974 lag = ifbond_get_LAG_matching_port(bond, p);
3975 if (lag != NULL) {
3976 bondport_add_to_LAG(p, lag);
3977 return;
3978 }
3979 lag = kalloc_type(struct LAG_s, Z_WAITOK);
3980 TAILQ_INIT(&lag->lag_port_list);
3981 lag->lag_port_count = 0;
3982 lag->lag_selected_port_count = 0;
3983 lag->lag_info = p->po_partner_state.ps_lag_info;
3984 TAILQ_INSERT_TAIL(&bond->ifb_lag_list, lag, lag_list);
3985 if (if_bond_debug) {
3986 timestamp_printf("Key 0x%04x: LAG Created (0x%04x," EA_FORMAT
3987 ",0x%04x)\n",
3988 bond->ifb_key,
3989 lag->lag_info.li_system_priority,
3990 EA_LIST(&lag->lag_info.li_system),
3991 lag->lag_info.li_key);
3992 }
3993 bondport_add_to_LAG(p, lag);
3994 return;
3995 }
3996
3997 static void
bondport_receive_lacpdu(bondport_ref p,lacpdu_ref in_lacpdu_p)3998 bondport_receive_lacpdu(bondport_ref p, lacpdu_ref in_lacpdu_p)
3999 {
4000 bondport_ref moved_port;
4001
4002 moved_port
4003 = ifbond_list_find_moved_port(p, (const lacp_actor_partner_tlv_ref)
4004 &in_lacpdu_p->la_actor_tlv);
4005 if (moved_port != NULL) {
4006 bondport_receive_machine(moved_port, LAEventPortMoved, NULL);
4007 }
4008 bondport_receive_machine(p, LAEventPacket, in_lacpdu_p);
4009 bondport_mux_machine(p, LAEventPacket, in_lacpdu_p);
4010 bondport_periodic_transmit_machine(p, LAEventPacket, in_lacpdu_p);
4011 return;
4012 }
4013
4014 static void
bondport_set_selected(bondport_ref p,SelectedState s)4015 bondport_set_selected(bondport_ref p, SelectedState s)
4016 {
4017 if (s != p->po_selected) {
4018 ifbond_ref bond = p->po_bond;
4019 LAG_ref lag = p->po_lag;
4020
4021 bondport_flags_set_selected_changed(p);
4022 if (lag != NULL && bond->ifb_active_lag == lag) {
4023 if (p->po_selected == SelectedState_SELECTED) {
4024 lag->lag_selected_port_count--;
4025 } else if (s == SelectedState_SELECTED) {
4026 lag->lag_selected_port_count++;
4027 }
4028 if (if_bond_debug) {
4029 timestamp_printf("[%s] SetSelected: %s (was %s)\n",
4030 bondport_get_name(p),
4031 SelectedStateString(s),
4032 SelectedStateString(p->po_selected));
4033 }
4034 }
4035 }
4036 p->po_selected = s;
4037 return;
4038 }
4039
4040 /**
4041 ** Receive machine
4042 **/
4043
4044 static void
bondport_UpdateDefaultSelected(bondport_ref p)4045 bondport_UpdateDefaultSelected(bondport_ref p)
4046 {
4047 bondport_set_selected(p, SelectedState_UNSELECTED);
4048 return;
4049 }
4050
4051 static void
bondport_RecordDefault(bondport_ref p)4052 bondport_RecordDefault(bondport_ref p)
4053 {
4054 bzero(&p->po_partner_state, sizeof(p->po_partner_state));
4055 p->po_actor_state
4056 = lacp_actor_partner_state_set_defaulted(p->po_actor_state);
4057 bondport_assign_to_LAG(p);
4058 return;
4059 }
4060
4061 static void
bondport_UpdateSelected(bondport_ref p,lacpdu_ref lacpdu_p)4062 bondport_UpdateSelected(bondport_ref p, lacpdu_ref lacpdu_p)
4063 {
4064 lacp_actor_partner_tlv_ref actor;
4065 partner_state_ref ps;
4066 LAG_info_ref ps_li;
4067
4068 /* compare the PDU's Actor information to our Partner state */
4069 actor = (lacp_actor_partner_tlv_ref)lacpdu_p->la_actor_tlv;
4070 ps = &p->po_partner_state;
4071 ps_li = &ps->ps_lag_info;
4072 if (lacp_actor_partner_tlv_get_port(actor) != ps->ps_port
4073 || (lacp_actor_partner_tlv_get_port_priority(actor)
4074 != ps->ps_port_priority)
4075 || bcmp(actor->lap_system, &ps_li->li_system, sizeof(ps_li->li_system))
4076 || (lacp_actor_partner_tlv_get_system_priority(actor)
4077 != ps_li->li_system_priority)
4078 || (lacp_actor_partner_tlv_get_key(actor) != ps_li->li_key)
4079 || (lacp_actor_partner_state_aggregatable(actor->lap_state)
4080 != lacp_actor_partner_state_aggregatable(ps->ps_state))) {
4081 bondport_set_selected(p, SelectedState_UNSELECTED);
4082 if (if_bond_debug) {
4083 timestamp_printf("[%s] updateSelected UNSELECTED\n",
4084 bondport_get_name(p));
4085 }
4086 }
4087 return;
4088 }
4089
4090 static void
bondport_RecordPDU(bondport_ref p,lacpdu_ref lacpdu_p)4091 bondport_RecordPDU(bondport_ref p, lacpdu_ref lacpdu_p)
4092 {
4093 lacp_actor_partner_tlv_ref actor;
4094 ifbond_ref bond = p->po_bond;
4095 int lacp_maintain = 0;
4096 partner_state_ref ps;
4097 lacp_actor_partner_tlv_ref partner;
4098 LAG_info_ref ps_li;
4099
4100 /* copy the PDU's Actor information into our Partner state */
4101 actor = (lacp_actor_partner_tlv_ref)lacpdu_p->la_actor_tlv;
4102 ps = &p->po_partner_state;
4103 ps_li = &ps->ps_lag_info;
4104 ps->ps_port = lacp_actor_partner_tlv_get_port(actor);
4105 ps->ps_port_priority = lacp_actor_partner_tlv_get_port_priority(actor);
4106 ps_li->li_system = *((lacp_system_ref)actor->lap_system);
4107 ps_li->li_system_priority
4108 = lacp_actor_partner_tlv_get_system_priority(actor);
4109 ps_li->li_key = lacp_actor_partner_tlv_get_key(actor);
4110 ps->ps_state = lacp_actor_partner_state_set_out_of_sync(actor->lap_state);
4111 p->po_actor_state
4112 = lacp_actor_partner_state_set_not_defaulted(p->po_actor_state);
4113
4114 /* compare the PDU's Partner information to our own information */
4115 partner = (lacp_actor_partner_tlv_ref)lacpdu_p->la_partner_tlv;
4116
4117 if (lacp_actor_partner_state_active_lacp(ps->ps_state)
4118 || (lacp_actor_partner_state_active_lacp(p->po_actor_state)
4119 && lacp_actor_partner_state_active_lacp(partner->lap_state))) {
4120 if (if_bond_debug) {
4121 timestamp_printf("[%s] recordPDU: LACP will maintain\n",
4122 bondport_get_name(p));
4123 }
4124 lacp_maintain = 1;
4125 }
4126 if ((lacp_actor_partner_tlv_get_port(partner)
4127 == bondport_get_index(p))
4128 && lacp_actor_partner_tlv_get_port_priority(partner) == p->po_priority
4129 && bcmp(partner->lap_system, &g_bond->system,
4130 sizeof(g_bond->system)) == 0
4131 && (lacp_actor_partner_tlv_get_system_priority(partner)
4132 == g_bond->system_priority)
4133 && lacp_actor_partner_tlv_get_key(partner) == bond->ifb_key
4134 && (lacp_actor_partner_state_aggregatable(partner->lap_state)
4135 == lacp_actor_partner_state_aggregatable(p->po_actor_state))
4136 && lacp_actor_partner_state_in_sync(actor->lap_state)
4137 && lacp_maintain) {
4138 ps->ps_state = lacp_actor_partner_state_set_in_sync(ps->ps_state);
4139 if (if_bond_debug) {
4140 timestamp_printf("[%s] recordPDU: LACP partner in sync\n",
4141 bondport_get_name(p));
4142 }
4143 } else if (lacp_actor_partner_state_aggregatable(actor->lap_state) == 0
4144 && lacp_actor_partner_state_in_sync(actor->lap_state)
4145 && lacp_maintain) {
4146 ps->ps_state = lacp_actor_partner_state_set_in_sync(ps->ps_state);
4147 if (if_bond_debug) {
4148 timestamp_printf("[%s] recordPDU: LACP partner in sync (ind)\n",
4149 bondport_get_name(p));
4150 }
4151 }
4152 bondport_assign_to_LAG(p);
4153 return;
4154 }
4155
4156 static __inline__ lacp_actor_partner_state
updateNTTBits(lacp_actor_partner_state s)4157 updateNTTBits(lacp_actor_partner_state s)
4158 {
4159 return s & (LACP_ACTOR_PARTNER_STATE_LACP_ACTIVITY
4160 | LACP_ACTOR_PARTNER_STATE_LACP_TIMEOUT
4161 | LACP_ACTOR_PARTNER_STATE_AGGREGATION
4162 | LACP_ACTOR_PARTNER_STATE_SYNCHRONIZATION);
4163 }
4164
4165 static void
bondport_UpdateNTT(bondport_ref p,lacpdu_ref lacpdu_p)4166 bondport_UpdateNTT(bondport_ref p, lacpdu_ref lacpdu_p)
4167 {
4168 ifbond_ref bond = p->po_bond;
4169 lacp_actor_partner_tlv_ref partner;
4170
4171 /* compare the PDU's Actor information to our Partner state */
4172 partner = (lacp_actor_partner_tlv_ref)lacpdu_p->la_partner_tlv;
4173 if ((lacp_actor_partner_tlv_get_port(partner) != bondport_get_index(p))
4174 || lacp_actor_partner_tlv_get_port_priority(partner) != p->po_priority
4175 || bcmp(partner->lap_system, &g_bond->system, sizeof(g_bond->system))
4176 || (lacp_actor_partner_tlv_get_system_priority(partner)
4177 != g_bond->system_priority)
4178 || lacp_actor_partner_tlv_get_key(partner) != bond->ifb_key
4179 || (updateNTTBits(partner->lap_state)
4180 != updateNTTBits(p->po_actor_state))) {
4181 bondport_flags_set_ntt(p);
4182 if (if_bond_debug) {
4183 timestamp_printf("[%s] updateNTT: Need To Transmit\n",
4184 bondport_get_name(p));
4185 }
4186 }
4187 return;
4188 }
4189
4190 static void
bondport_AttachMuxToAggregator(bondport_ref p)4191 bondport_AttachMuxToAggregator(bondport_ref p)
4192 {
4193 if (bondport_flags_mux_attached(p) == 0) {
4194 if (if_bond_debug) {
4195 timestamp_printf("[%s] Attached Mux To Aggregator\n",
4196 bondport_get_name(p));
4197 }
4198 bondport_flags_set_mux_attached(p);
4199 }
4200 return;
4201 }
4202
4203 static void
bondport_DetachMuxFromAggregator(bondport_ref p)4204 bondport_DetachMuxFromAggregator(bondport_ref p)
4205 {
4206 if (bondport_flags_mux_attached(p)) {
4207 if (if_bond_debug) {
4208 timestamp_printf("[%s] Detached Mux From Aggregator\n",
4209 bondport_get_name(p));
4210 }
4211 bondport_flags_clear_mux_attached(p);
4212 }
4213 return;
4214 }
4215
4216 static void
bondport_enable_distributing(bondport_ref p)4217 bondport_enable_distributing(bondport_ref p)
4218 {
4219 if (bondport_flags_distributing(p) == 0) {
4220 ifbond_ref bond = p->po_bond;
4221
4222 bond->ifb_distributing_array[bond->ifb_distributing_count++] = p;
4223 if (if_bond_debug) {
4224 timestamp_printf("[%s] Distribution Enabled\n",
4225 bondport_get_name(p));
4226 }
4227 bondport_flags_set_distributing(p);
4228 }
4229 return;
4230 }
4231
4232 static void
bondport_disable_distributing(bondport_ref p)4233 bondport_disable_distributing(bondport_ref p)
4234 {
4235 if (bondport_flags_distributing(p)) {
4236 bondport_ref * array;
4237 ifbond_ref bond;
4238 int count;
4239 int i;
4240
4241 bond = p->po_bond;
4242 array = bond->ifb_distributing_array;
4243 count = bond->ifb_distributing_count;
4244 for (i = 0; i < count; i++) {
4245 if (array[i] == p) {
4246 int j;
4247
4248 for (j = i; j < (count - 1); j++) {
4249 array[j] = array[j + 1];
4250 }
4251 break;
4252 }
4253 }
4254 bond->ifb_distributing_count--;
4255 if (if_bond_debug) {
4256 timestamp_printf("[%s] Distribution Disabled\n",
4257 bondport_get_name(p));
4258 }
4259 bondport_flags_clear_distributing(p);
4260 }
4261 return;
4262 }
4263
4264 /**
4265 ** Receive machine functions
4266 **/
4267 static void
4268 bondport_receive_machine_initialize(bondport_ref p, LAEvent event,
4269 void * event_data);
4270 static void
4271 bondport_receive_machine_port_disabled(bondport_ref p, LAEvent event,
4272 void * event_data);
4273 static void
4274 bondport_receive_machine_expired(bondport_ref p, LAEvent event,
4275 void * event_data);
4276 static void
4277 bondport_receive_machine_lacp_disabled(bondport_ref p, LAEvent event,
4278 void * event_data);
4279 static void
4280 bondport_receive_machine_defaulted(bondport_ref p, LAEvent event,
4281 void * event_data);
4282 static void
4283 bondport_receive_machine_current(bondport_ref p, LAEvent event,
4284 void * event_data);
4285
4286 static void
bondport_receive_machine_event(bondport_ref p,LAEvent event,void * event_data)4287 bondport_receive_machine_event(bondport_ref p, LAEvent event,
4288 void * event_data)
4289 {
4290 switch (p->po_receive_state) {
4291 case ReceiveState_none:
4292 bondport_receive_machine_initialize(p, LAEventStart, NULL);
4293 break;
4294 case ReceiveState_INITIALIZE:
4295 bondport_receive_machine_initialize(p, event, event_data);
4296 break;
4297 case ReceiveState_PORT_DISABLED:
4298 bondport_receive_machine_port_disabled(p, event, event_data);
4299 break;
4300 case ReceiveState_EXPIRED:
4301 bondport_receive_machine_expired(p, event, event_data);
4302 break;
4303 case ReceiveState_LACP_DISABLED:
4304 bondport_receive_machine_lacp_disabled(p, event, event_data);
4305 break;
4306 case ReceiveState_DEFAULTED:
4307 bondport_receive_machine_defaulted(p, event, event_data);
4308 break;
4309 case ReceiveState_CURRENT:
4310 bondport_receive_machine_current(p, event, event_data);
4311 break;
4312 default:
4313 break;
4314 }
4315 return;
4316 }
4317
4318 static void
bondport_receive_machine(bondport_ref p,LAEvent event,void * event_data)4319 bondport_receive_machine(bondport_ref p, LAEvent event,
4320 void * event_data)
4321 {
4322 switch (event) {
4323 case LAEventPacket:
4324 if (p->po_receive_state != ReceiveState_LACP_DISABLED) {
4325 bondport_receive_machine_current(p, event, event_data);
4326 }
4327 break;
4328 case LAEventMediaChange:
4329 if (media_active(&p->po_media_info)) {
4330 switch (p->po_receive_state) {
4331 case ReceiveState_PORT_DISABLED:
4332 case ReceiveState_LACP_DISABLED:
4333 bondport_receive_machine_port_disabled(p, LAEventMediaChange, NULL);
4334 break;
4335 default:
4336 break;
4337 }
4338 } else {
4339 bondport_receive_machine_port_disabled(p, LAEventStart, NULL);
4340 }
4341 break;
4342 default:
4343 bondport_receive_machine_event(p, event, event_data);
4344 break;
4345 }
4346 return;
4347 }
4348
4349 static void
bondport_receive_machine_initialize(bondport_ref p,LAEvent event,__unused void * event_data)4350 bondport_receive_machine_initialize(bondport_ref p, LAEvent event,
4351 __unused void * event_data)
4352 {
4353 switch (event) {
4354 case LAEventStart:
4355 devtimer_cancel(p->po_current_while_timer);
4356 if (if_bond_debug) {
4357 timestamp_printf("[%s] Receive INITIALIZE\n",
4358 bondport_get_name(p));
4359 }
4360 p->po_receive_state = ReceiveState_INITIALIZE;
4361 bondport_set_selected(p, SelectedState_UNSELECTED);
4362 bondport_RecordDefault(p);
4363 p->po_actor_state
4364 = lacp_actor_partner_state_set_not_expired(p->po_actor_state);
4365 bondport_receive_machine_port_disabled(p, LAEventStart, NULL);
4366 break;
4367 default:
4368 break;
4369 }
4370 return;
4371 }
4372
4373 static void
bondport_receive_machine_port_disabled(bondport_ref p,LAEvent event,__unused void * event_data)4374 bondport_receive_machine_port_disabled(bondport_ref p, LAEvent event,
4375 __unused void * event_data)
4376 {
4377 partner_state_ref ps;
4378
4379 switch (event) {
4380 case LAEventStart:
4381 devtimer_cancel(p->po_current_while_timer);
4382 if (if_bond_debug) {
4383 timestamp_printf("[%s] Receive PORT_DISABLED\n",
4384 bondport_get_name(p));
4385 }
4386 p->po_receive_state = ReceiveState_PORT_DISABLED;
4387 ps = &p->po_partner_state;
4388 ps->ps_state = lacp_actor_partner_state_set_out_of_sync(ps->ps_state);
4389 OS_FALLTHROUGH;
4390 case LAEventMediaChange:
4391 if (media_active(&p->po_media_info)) {
4392 if (media_ok(&p->po_media_info)) {
4393 bondport_receive_machine_expired(p, LAEventStart, NULL);
4394 } else {
4395 bondport_receive_machine_lacp_disabled(p, LAEventStart, NULL);
4396 }
4397 } else if (p->po_selected == SelectedState_SELECTED) {
4398 struct timeval tv;
4399
4400 if (if_bond_debug) {
4401 timestamp_printf("[%s] Receive PORT_DISABLED: "
4402 "link timer started\n",
4403 bondport_get_name(p));
4404 }
4405 tv.tv_sec = 1;
4406 tv.tv_usec = 0;
4407 devtimer_set_relative(p->po_current_while_timer, tv,
4408 (devtimer_timeout_func)
4409 bondport_receive_machine_port_disabled,
4410 (void *)LAEventTimeout, NULL);
4411 } else if (p->po_selected == SelectedState_STANDBY) {
4412 bondport_set_selected(p, SelectedState_UNSELECTED);
4413 }
4414 break;
4415 case LAEventTimeout:
4416 if (p->po_selected == SelectedState_SELECTED) {
4417 if (if_bond_debug) {
4418 timestamp_printf("[%s] Receive PORT_DISABLED: "
4419 "link timer completed, marking UNSELECTED\n",
4420 bondport_get_name(p));
4421 }
4422 bondport_set_selected(p, SelectedState_UNSELECTED);
4423 }
4424 break;
4425 case LAEventPortMoved:
4426 bondport_receive_machine_initialize(p, LAEventStart, NULL);
4427 break;
4428 default:
4429 break;
4430 }
4431 return;
4432 }
4433
4434 static void
bondport_receive_machine_expired(bondport_ref p,LAEvent event,__unused void * event_data)4435 bondport_receive_machine_expired(bondport_ref p, LAEvent event,
4436 __unused void * event_data)
4437 {
4438 lacp_actor_partner_state s;
4439 struct timeval tv;
4440
4441 switch (event) {
4442 case LAEventStart:
4443 devtimer_cancel(p->po_current_while_timer);
4444 if (if_bond_debug) {
4445 timestamp_printf("[%s] Receive EXPIRED\n",
4446 bondport_get_name(p));
4447 }
4448 p->po_receive_state = ReceiveState_EXPIRED;
4449 s = p->po_partner_state.ps_state;
4450 s = lacp_actor_partner_state_set_out_of_sync(s);
4451 s = lacp_actor_partner_state_set_short_timeout(s);
4452 p->po_partner_state.ps_state = s;
4453 p->po_actor_state
4454 = lacp_actor_partner_state_set_expired(p->po_actor_state);
4455 /* start current_while timer */
4456 tv.tv_sec = LACP_SHORT_TIMEOUT_TIME;
4457 tv.tv_usec = 0;
4458 devtimer_set_relative(p->po_current_while_timer, tv,
4459 (devtimer_timeout_func)
4460 bondport_receive_machine_expired,
4461 (void *)LAEventTimeout, NULL);
4462
4463 break;
4464 case LAEventTimeout:
4465 bondport_receive_machine_defaulted(p, LAEventStart, NULL);
4466 break;
4467 default:
4468 break;
4469 }
4470 return;
4471 }
4472
4473 static void
bondport_receive_machine_lacp_disabled(bondport_ref p,LAEvent event,__unused void * event_data)4474 bondport_receive_machine_lacp_disabled(bondport_ref p, LAEvent event,
4475 __unused void * event_data)
4476 {
4477 partner_state_ref ps;
4478 switch (event) {
4479 case LAEventStart:
4480 devtimer_cancel(p->po_current_while_timer);
4481 if (if_bond_debug) {
4482 timestamp_printf("[%s] Receive LACP_DISABLED\n",
4483 bondport_get_name(p));
4484 }
4485 p->po_receive_state = ReceiveState_LACP_DISABLED;
4486 bondport_set_selected(p, SelectedState_UNSELECTED);
4487 bondport_RecordDefault(p);
4488 ps = &p->po_partner_state;
4489 ps->ps_state = lacp_actor_partner_state_set_individual(ps->ps_state);
4490 p->po_actor_state
4491 = lacp_actor_partner_state_set_not_expired(p->po_actor_state);
4492 break;
4493 default:
4494 break;
4495 }
4496 return;
4497 }
4498
4499 static void
bondport_receive_machine_defaulted(bondport_ref p,LAEvent event,__unused void * event_data)4500 bondport_receive_machine_defaulted(bondport_ref p, LAEvent event,
4501 __unused void * event_data)
4502 {
4503 switch (event) {
4504 case LAEventStart:
4505 devtimer_cancel(p->po_current_while_timer);
4506 if (if_bond_debug) {
4507 timestamp_printf("[%s] Receive DEFAULTED\n",
4508 bondport_get_name(p));
4509 }
4510 p->po_receive_state = ReceiveState_DEFAULTED;
4511 bondport_UpdateDefaultSelected(p);
4512 bondport_RecordDefault(p);
4513 p->po_actor_state
4514 = lacp_actor_partner_state_set_not_expired(p->po_actor_state);
4515 break;
4516 default:
4517 break;
4518 }
4519 return;
4520 }
4521
4522 static void
bondport_receive_machine_current(bondport_ref p,LAEvent event,void * event_data)4523 bondport_receive_machine_current(bondport_ref p, LAEvent event,
4524 void * event_data)
4525 {
4526 partner_state_ref ps;
4527 struct timeval tv;
4528
4529 switch (event) {
4530 case LAEventPacket:
4531 devtimer_cancel(p->po_current_while_timer);
4532 if (if_bond_debug) {
4533 timestamp_printf("[%s] Receive CURRENT\n",
4534 bondport_get_name(p));
4535 }
4536 p->po_receive_state = ReceiveState_CURRENT;
4537 bondport_UpdateSelected(p, event_data);
4538 bondport_UpdateNTT(p, event_data);
4539 bondport_RecordPDU(p, event_data);
4540 p->po_actor_state
4541 = lacp_actor_partner_state_set_not_expired(p->po_actor_state);
4542 bondport_assign_to_LAG(p);
4543 /* start current_while timer */
4544 ps = &p->po_partner_state;
4545 if (lacp_actor_partner_state_short_timeout(ps->ps_state)) {
4546 tv.tv_sec = LACP_SHORT_TIMEOUT_TIME;
4547 } else {
4548 tv.tv_sec = LACP_LONG_TIMEOUT_TIME;
4549 }
4550 tv.tv_usec = 0;
4551 devtimer_set_relative(p->po_current_while_timer, tv,
4552 (devtimer_timeout_func)
4553 bondport_receive_machine_current,
4554 (void *)LAEventTimeout, NULL);
4555 break;
4556 case LAEventTimeout:
4557 bondport_receive_machine_expired(p, LAEventStart, NULL);
4558 break;
4559 default:
4560 break;
4561 }
4562 return;
4563 }
4564
4565 /**
4566 ** Periodic Transmission machine
4567 **/
4568
4569 static void
bondport_periodic_transmit_machine(bondport_ref p,LAEvent event,__unused void * event_data)4570 bondport_periodic_transmit_machine(bondport_ref p, LAEvent event,
4571 __unused void * event_data)
4572 {
4573 int interval;
4574 partner_state_ref ps;
4575 struct timeval tv;
4576
4577 switch (event) {
4578 case LAEventStart:
4579 if (if_bond_debug) {
4580 timestamp_printf("[%s] periodic_transmit Start\n",
4581 bondport_get_name(p));
4582 }
4583 OS_FALLTHROUGH;
4584 case LAEventMediaChange:
4585 devtimer_cancel(p->po_periodic_timer);
4586 p->po_periodic_interval = 0;
4587 if (media_active(&p->po_media_info) == 0
4588 || media_ok(&p->po_media_info) == 0) {
4589 break;
4590 }
4591 OS_FALLTHROUGH;
4592 case LAEventPacket:
4593 /* Neither Partner nor Actor are LACP Active, no periodic tx */
4594 ps = &p->po_partner_state;
4595 if (lacp_actor_partner_state_active_lacp(p->po_actor_state) == 0
4596 && (lacp_actor_partner_state_active_lacp(ps->ps_state)
4597 == 0)) {
4598 devtimer_cancel(p->po_periodic_timer);
4599 p->po_periodic_interval = 0;
4600 break;
4601 }
4602 if (lacp_actor_partner_state_short_timeout(ps->ps_state)) {
4603 interval = LACP_FAST_PERIODIC_TIME;
4604 } else {
4605 interval = LACP_SLOW_PERIODIC_TIME;
4606 }
4607 if (p->po_periodic_interval != interval) {
4608 if (interval == LACP_FAST_PERIODIC_TIME
4609 && p->po_periodic_interval == LACP_SLOW_PERIODIC_TIME) {
4610 if (if_bond_debug) {
4611 timestamp_printf("[%s] periodic_transmit:"
4612 " Need To Transmit\n",
4613 bondport_get_name(p));
4614 }
4615 bondport_flags_set_ntt(p);
4616 }
4617 p->po_periodic_interval = interval;
4618 tv.tv_usec = 0;
4619 tv.tv_sec = interval;
4620 devtimer_set_relative(p->po_periodic_timer, tv,
4621 (devtimer_timeout_func)
4622 bondport_periodic_transmit_machine,
4623 (void *)LAEventTimeout, NULL);
4624 if (if_bond_debug) {
4625 timestamp_printf("[%s] Periodic Transmission Timer: %d secs\n",
4626 bondport_get_name(p),
4627 p->po_periodic_interval);
4628 }
4629 }
4630 break;
4631 case LAEventTimeout:
4632 bondport_flags_set_ntt(p);
4633 tv.tv_sec = p->po_periodic_interval;
4634 tv.tv_usec = 0;
4635 devtimer_set_relative(p->po_periodic_timer, tv, (devtimer_timeout_func)
4636 bondport_periodic_transmit_machine,
4637 (void *)LAEventTimeout, NULL);
4638 if (if_bond_debug > 1) {
4639 timestamp_printf("[%s] Periodic Transmission Timer: %d secs\n",
4640 bondport_get_name(p), p->po_periodic_interval);
4641 }
4642 break;
4643 default:
4644 break;
4645 }
4646 return;
4647 }
4648
4649 /**
4650 ** Transmit machine
4651 **/
4652 static int
bondport_can_transmit(bondport_ref p,int32_t current_secs,__darwin_time_t * next_secs)4653 bondport_can_transmit(bondport_ref p, int32_t current_secs,
4654 __darwin_time_t * next_secs)
4655 {
4656 if (p->po_last_transmit_secs != current_secs) {
4657 p->po_last_transmit_secs = current_secs;
4658 p->po_n_transmit = 0;
4659 }
4660 if (p->po_n_transmit < LACP_PACKET_RATE) {
4661 p->po_n_transmit++;
4662 return 1;
4663 }
4664 if (next_secs != NULL) {
4665 *next_secs = current_secs + 1;
4666 }
4667 return 0;
4668 }
4669
4670 static void
bondport_transmit_machine(bondport_ref p,LAEvent event,void * event_data)4671 bondport_transmit_machine(bondport_ref p, LAEvent event,
4672 void * event_data)
4673 {
4674 lacp_actor_partner_tlv_ref aptlv;
4675 lacp_collector_tlv_ref ctlv;
4676 struct timeval next_tick_time = {.tv_sec = 0, .tv_usec = 0};
4677 lacpdu_ref out_lacpdu_p;
4678 packet_buffer_ref pkt;
4679 partner_state_ref ps;
4680 LAG_info_ref ps_li;
4681
4682 switch (event) {
4683 case LAEventTimeout:
4684 case LAEventStart:
4685 if (p->po_periodic_interval == 0 || bondport_flags_ntt(p) == 0) {
4686 break;
4687 }
4688 if (event_data == TRANSMIT_MACHINE_TX_IMMEDIATE) {
4689 /* we're going away, transmit the packet no matter what */
4690 } else if (bondport_can_transmit(p, devtimer_current_secs(),
4691 &next_tick_time.tv_sec) == 0) {
4692 if (devtimer_enabled(p->po_transmit_timer)) {
4693 if (if_bond_debug > 0) {
4694 timestamp_printf("[%s] Transmit Timer Already Set\n",
4695 bondport_get_name(p));
4696 }
4697 } else {
4698 devtimer_set_absolute(p->po_transmit_timer, next_tick_time,
4699 (devtimer_timeout_func)
4700 bondport_transmit_machine,
4701 (void *)LAEventTimeout, NULL);
4702 if (if_bond_debug > 0) {
4703 timestamp_printf("[%s] Transmit Timer Deadline %d secs\n",
4704 bondport_get_name(p),
4705 (int)next_tick_time.tv_sec);
4706 }
4707 }
4708 break;
4709 }
4710 if (if_bond_debug > 0) {
4711 if (event == LAEventTimeout) {
4712 timestamp_printf("[%s] Transmit Timer Complete\n",
4713 bondport_get_name(p));
4714 }
4715 }
4716 pkt = packet_buffer_allocate(sizeof(*out_lacpdu_p));
4717 if (pkt == NULL) {
4718 printf("[%s] Transmit: failed to allocate packet buffer\n",
4719 bondport_get_name(p));
4720 break;
4721 }
4722 out_lacpdu_p = (lacpdu_ref)packet_buffer_byteptr(pkt);
4723 bzero(out_lacpdu_p, sizeof(*out_lacpdu_p));
4724 out_lacpdu_p->la_subtype = IEEE8023AD_SLOW_PROTO_SUBTYPE_LACP;
4725 out_lacpdu_p->la_version = LACPDU_VERSION_1;
4726
4727 /* Actor */
4728 aptlv = (lacp_actor_partner_tlv_ref)out_lacpdu_p->la_actor_tlv;
4729 aptlv->lap_tlv_type = LACPDU_TLV_TYPE_ACTOR;
4730 aptlv->lap_length = LACPDU_ACTOR_TLV_LENGTH;
4731 *((lacp_system_ref)aptlv->lap_system) = g_bond->system;
4732 lacp_actor_partner_tlv_set_system_priority(aptlv,
4733 g_bond->system_priority);
4734 lacp_actor_partner_tlv_set_port_priority(aptlv, p->po_priority);
4735 lacp_actor_partner_tlv_set_port(aptlv, bondport_get_index(p));
4736 lacp_actor_partner_tlv_set_key(aptlv, p->po_bond->ifb_key);
4737 aptlv->lap_state = p->po_actor_state;
4738
4739 /* Partner */
4740 aptlv = (lacp_actor_partner_tlv_ref)out_lacpdu_p->la_partner_tlv;
4741 aptlv->lap_tlv_type = LACPDU_TLV_TYPE_PARTNER;
4742 aptlv->lap_length = LACPDU_PARTNER_TLV_LENGTH;
4743 ps = &p->po_partner_state;
4744 ps_li = &ps->ps_lag_info;
4745 lacp_actor_partner_tlv_set_port(aptlv, ps->ps_port);
4746 lacp_actor_partner_tlv_set_port_priority(aptlv, ps->ps_port_priority);
4747 *((lacp_system_ref)aptlv->lap_system) = ps_li->li_system;
4748 lacp_actor_partner_tlv_set_system_priority(aptlv,
4749 ps_li->li_system_priority);
4750 lacp_actor_partner_tlv_set_key(aptlv, ps_li->li_key);
4751 aptlv->lap_state = ps->ps_state;
4752
4753 /* Collector */
4754 ctlv = (lacp_collector_tlv_ref)out_lacpdu_p->la_collector_tlv;
4755 ctlv->lac_tlv_type = LACPDU_TLV_TYPE_COLLECTOR;
4756 ctlv->lac_length = LACPDU_COLLECTOR_TLV_LENGTH;
4757
4758 bondport_slow_proto_transmit(p, pkt);
4759 bondport_flags_clear_ntt(p);
4760 if (if_bond_debug > 0) {
4761 timestamp_printf("[%s] Transmit Packet %d\n",
4762 bondport_get_name(p), p->po_n_transmit);
4763 }
4764 break;
4765 default:
4766 break;
4767 }
4768 return;
4769 }
4770
4771 /**
4772 ** Mux machine functions
4773 **/
4774
4775 static void
4776 bondport_mux_machine_detached(bondport_ref p, LAEvent event,
4777 void * event_data);
4778 static void
4779 bondport_mux_machine_waiting(bondport_ref p, LAEvent event,
4780 void * event_data);
4781 static void
4782 bondport_mux_machine_attached(bondport_ref p, LAEvent event,
4783 void * event_data);
4784
4785 static void
4786 bondport_mux_machine_collecting_distributing(bondport_ref p, LAEvent event,
4787 void * event_data);
4788
4789 static void
bondport_mux_machine(bondport_ref p,LAEvent event,void * event_data)4790 bondport_mux_machine(bondport_ref p, LAEvent event, void * event_data)
4791 {
4792 switch (p->po_mux_state) {
4793 case MuxState_none:
4794 bondport_mux_machine_detached(p, LAEventStart, NULL);
4795 break;
4796 case MuxState_DETACHED:
4797 bondport_mux_machine_detached(p, event, event_data);
4798 break;
4799 case MuxState_WAITING:
4800 bondport_mux_machine_waiting(p, event, event_data);
4801 break;
4802 case MuxState_ATTACHED:
4803 bondport_mux_machine_attached(p, event, event_data);
4804 break;
4805 case MuxState_COLLECTING_DISTRIBUTING:
4806 bondport_mux_machine_collecting_distributing(p, event, event_data);
4807 break;
4808 default:
4809 break;
4810 }
4811 return;
4812 }
4813
4814 static void
bondport_mux_machine_detached(bondport_ref p,LAEvent event,__unused void * event_data)4815 bondport_mux_machine_detached(bondport_ref p, LAEvent event,
4816 __unused void * event_data)
4817 {
4818 lacp_actor_partner_state s;
4819
4820 switch (event) {
4821 case LAEventStart:
4822 devtimer_cancel(p->po_wait_while_timer);
4823 if (if_bond_debug) {
4824 timestamp_printf("[%s] Mux DETACHED\n",
4825 bondport_get_name(p));
4826 }
4827 p->po_mux_state = MuxState_DETACHED;
4828 bondport_flags_clear_ready(p);
4829 bondport_DetachMuxFromAggregator(p);
4830 bondport_disable_distributing(p);
4831 s = p->po_actor_state;
4832 s = lacp_actor_partner_state_set_out_of_sync(s);
4833 s = lacp_actor_partner_state_set_not_collecting(s);
4834 s = lacp_actor_partner_state_set_not_distributing(s);
4835 p->po_actor_state = s;
4836 bondport_flags_set_ntt(p);
4837 break;
4838 case LAEventSelectedChange:
4839 case LAEventPacket:
4840 case LAEventMediaChange:
4841 if (p->po_selected == SelectedState_SELECTED
4842 || p->po_selected == SelectedState_STANDBY) {
4843 bondport_mux_machine_waiting(p, LAEventStart, NULL);
4844 }
4845 break;
4846 default:
4847 break;
4848 }
4849 return;
4850 }
4851
4852 static void
bondport_mux_machine_waiting(bondport_ref p,LAEvent event,__unused void * event_data)4853 bondport_mux_machine_waiting(bondport_ref p, LAEvent event,
4854 __unused void * event_data)
4855 {
4856 struct timeval tv;
4857
4858 switch (event) {
4859 case LAEventStart:
4860 devtimer_cancel(p->po_wait_while_timer);
4861 if (if_bond_debug) {
4862 timestamp_printf("[%s] Mux WAITING\n",
4863 bondport_get_name(p));
4864 }
4865 p->po_mux_state = MuxState_WAITING;
4866 OS_FALLTHROUGH;
4867 default:
4868 case LAEventSelectedChange:
4869 if (p->po_selected == SelectedState_UNSELECTED) {
4870 bondport_mux_machine_detached(p, LAEventStart, NULL);
4871 break;
4872 }
4873 if (p->po_selected == SelectedState_STANDBY) {
4874 devtimer_cancel(p->po_wait_while_timer);
4875 /* wait until state changes to SELECTED */
4876 if (if_bond_debug) {
4877 timestamp_printf("[%s] Mux WAITING: Standby\n",
4878 bondport_get_name(p));
4879 }
4880 break;
4881 }
4882 if (bondport_flags_ready(p)) {
4883 if (if_bond_debug) {
4884 timestamp_printf("[%s] Mux WAITING: Port is already ready\n",
4885 bondport_get_name(p));
4886 }
4887 break;
4888 }
4889 if (devtimer_enabled(p->po_wait_while_timer)) {
4890 if (if_bond_debug) {
4891 timestamp_printf("[%s] Mux WAITING: Timer already set\n",
4892 bondport_get_name(p));
4893 }
4894 break;
4895 }
4896 if (ifbond_all_ports_attached(p->po_bond, p)) {
4897 devtimer_cancel(p->po_wait_while_timer);
4898 if (if_bond_debug) {
4899 timestamp_printf("[%s] Mux WAITING: No waiting\n",
4900 bondport_get_name(p));
4901 }
4902 bondport_flags_set_ready(p);
4903 goto no_waiting;
4904 }
4905 if (if_bond_debug) {
4906 timestamp_printf("[%s] Mux WAITING: 2 seconds\n",
4907 bondport_get_name(p));
4908 }
4909 tv.tv_sec = LACP_AGGREGATE_WAIT_TIME;
4910 tv.tv_usec = 0;
4911 devtimer_set_relative(p->po_wait_while_timer, tv,
4912 (devtimer_timeout_func)
4913 bondport_mux_machine_waiting,
4914 (void *)LAEventTimeout, NULL);
4915 break;
4916 case LAEventTimeout:
4917 if (if_bond_debug) {
4918 timestamp_printf("[%s] Mux WAITING: Ready\n",
4919 bondport_get_name(p));
4920 }
4921 bondport_flags_set_ready(p);
4922 break;
4923 case LAEventReady:
4924 no_waiting:
4925 if (bondport_flags_ready(p)) {
4926 if (if_bond_debug) {
4927 timestamp_printf("[%s] Mux WAITING: All Ports Ready\n",
4928 bondport_get_name(p));
4929 }
4930 bondport_mux_machine_attached(p, LAEventStart, NULL);
4931 break;
4932 }
4933 break;
4934 }
4935 return;
4936 }
4937
4938 static void
bondport_mux_machine_attached(bondport_ref p,LAEvent event,__unused void * event_data)4939 bondport_mux_machine_attached(bondport_ref p, LAEvent event,
4940 __unused void * event_data)
4941 {
4942 lacp_actor_partner_state s;
4943
4944 switch (event) {
4945 case LAEventStart:
4946 devtimer_cancel(p->po_wait_while_timer);
4947 if (if_bond_debug) {
4948 timestamp_printf("[%s] Mux ATTACHED\n",
4949 bondport_get_name(p));
4950 }
4951 p->po_mux_state = MuxState_ATTACHED;
4952 bondport_AttachMuxToAggregator(p);
4953 s = p->po_actor_state;
4954 s = lacp_actor_partner_state_set_in_sync(s);
4955 s = lacp_actor_partner_state_set_not_collecting(s);
4956 s = lacp_actor_partner_state_set_not_distributing(s);
4957 bondport_disable_distributing(p);
4958 p->po_actor_state = s;
4959 bondport_flags_set_ntt(p);
4960 OS_FALLTHROUGH;
4961 default:
4962 switch (p->po_selected) {
4963 case SelectedState_SELECTED:
4964 s = p->po_partner_state.ps_state;
4965 if (lacp_actor_partner_state_in_sync(s)) {
4966 bondport_mux_machine_collecting_distributing(p, LAEventStart,
4967 NULL);
4968 }
4969 break;
4970 default:
4971 bondport_mux_machine_detached(p, LAEventStart, NULL);
4972 break;
4973 }
4974 break;
4975 }
4976 return;
4977 }
4978
4979 static void
bondport_mux_machine_collecting_distributing(bondport_ref p,LAEvent event,__unused void * event_data)4980 bondport_mux_machine_collecting_distributing(bondport_ref p,
4981 LAEvent event,
4982 __unused void * event_data)
4983 {
4984 lacp_actor_partner_state s;
4985
4986 switch (event) {
4987 case LAEventStart:
4988 devtimer_cancel(p->po_wait_while_timer);
4989 if (if_bond_debug) {
4990 timestamp_printf("[%s] Mux COLLECTING_DISTRIBUTING\n",
4991 bondport_get_name(p));
4992 }
4993 p->po_mux_state = MuxState_COLLECTING_DISTRIBUTING;
4994 bondport_enable_distributing(p);
4995 s = p->po_actor_state;
4996 s = lacp_actor_partner_state_set_collecting(s);
4997 s = lacp_actor_partner_state_set_distributing(s);
4998 p->po_actor_state = s;
4999 bondport_flags_set_ntt(p);
5000 OS_FALLTHROUGH;
5001 default:
5002 s = p->po_partner_state.ps_state;
5003 if (lacp_actor_partner_state_in_sync(s) == 0) {
5004 bondport_mux_machine_attached(p, LAEventStart, NULL);
5005 break;
5006 }
5007 switch (p->po_selected) {
5008 case SelectedState_UNSELECTED:
5009 case SelectedState_STANDBY:
5010 bondport_mux_machine_attached(p, LAEventStart, NULL);
5011 break;
5012 default:
5013 break;
5014 }
5015 break;
5016 }
5017 return;
5018 }
5019