1 /*
2 * Copyright (c) 2004-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * if_bond.c
31 * - bond/failover interface
32 * - implements IEEE 802.3ad Link Aggregation
33 */
34
35 /*
36 * Modification History:
37 *
38 * April 29, 2004 Dieter Siegmund ([email protected])
39 * - created
40 */
41
42 #include <sys/param.h>
43 #include <sys/kernel.h>
44 #include <sys/malloc.h>
45 #include <sys/mbuf.h>
46 #include <sys/queue.h>
47 #include <sys/socket.h>
48 #include <sys/sockio.h>
49 #include <sys/sysctl.h>
50 #include <sys/systm.h>
51 #include <sys/kern_event.h>
52 #include <net/bpf.h>
53 #include <net/ethernet.h>
54 #include <net/if.h>
55 #include <net/kpi_interface.h>
56 #include <net/kpi_interfacefilter.h>
57 #include <net/if_arp.h>
58 #include <net/if_dl.h>
59 #include <net/if_ether.h>
60 #include <net/if_types.h>
61 #include <net/if_bond_var.h>
62 #include <net/ieee8023ad.h>
63 #include <net/lacp.h>
64 #include <net/dlil.h>
65 #include <sys/time.h>
66 #include <net/devtimer.h>
67 #include <net/if_vlan_var.h>
68 #include <net/kpi_protocol.h>
69 #include <sys/protosw.h>
70 #include <kern/locks.h>
71 #include <kern/zalloc.h>
72 #include <os/refcnt.h>
73
74 #include <netinet/in.h>
75 #include <netinet/if_ether.h>
76 #include <netinet/in_systm.h>
77 #include <netinet/ip.h>
78 #include <netinet/ip6.h>
79
80 #include <net/if_media.h>
81 #include <net/multicast_list.h>
82
83 SYSCTL_DECL(_net_link);
84 SYSCTL_NODE(_net_link, OID_AUTO, bond, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
85 "Bond interface");
86
87 static int if_bond_debug = 0;
88 SYSCTL_INT(_net_link_bond, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
89 &if_bond_debug, 0, "Bond interface debug logs");
90
91 static struct ether_addr slow_proto_multicast = {
92 .octet = IEEE8023AD_SLOW_PROTO_MULTICAST
93 };
94
95 typedef struct ifbond_s ifbond, * ifbond_ref;
96 typedef struct bondport_s bondport, * bondport_ref;
97
98 #define BOND_MAXUNIT 128
99 #define BOND_ZONE_MAX_ELEM MIN(IFNETS_MAX, BOND_MAXUNIT)
100 #define BONDNAME "bond"
101
102 #define EA_FORMAT "%x:%x:%x:%x:%x:%x"
103 #define EA_CH(e, i) ((u_char)((u_char *)(e))[(i)])
104 #define EA_LIST(ea) EA_CH(ea,0),EA_CH(ea,1),EA_CH(ea,2),EA_CH(ea,3),EA_CH(ea,4),EA_CH(ea,5)
105
106 #define timestamp_printf printf
107
108 /**
109 ** bond locks
110 **/
111
112 static LCK_GRP_DECLARE(bond_lck_grp, "if_bond");
113 static LCK_MTX_DECLARE(bond_lck_mtx, &bond_lck_grp);
114
115 static __inline__ void
bond_assert_lock_held(void)116 bond_assert_lock_held(void)
117 {
118 LCK_MTX_ASSERT(&bond_lck_mtx, LCK_MTX_ASSERT_OWNED);
119 }
120
121 static __inline__ void
bond_assert_lock_not_held(void)122 bond_assert_lock_not_held(void)
123 {
124 LCK_MTX_ASSERT(&bond_lck_mtx, LCK_MTX_ASSERT_NOTOWNED);
125 }
126
127 static __inline__ void
bond_lock(void)128 bond_lock(void)
129 {
130 lck_mtx_lock(&bond_lck_mtx);
131 }
132
133 static __inline__ void
bond_unlock(void)134 bond_unlock(void)
135 {
136 lck_mtx_unlock(&bond_lck_mtx);
137 }
138
139 /**
140 ** bond structures, types
141 **/
142
143 struct LAG_info_s {
144 lacp_system li_system;
145 lacp_system_priority li_system_priority;
146 lacp_key li_key;
147 };
148 typedef struct LAG_info_s LAG_info, * LAG_info_ref;
149
150 struct bondport_s;
151 TAILQ_HEAD(port_list, bondport_s);
152 struct ifbond_s;
153 TAILQ_HEAD(ifbond_list, ifbond_s);
154 struct LAG_s;
155 TAILQ_HEAD(lag_list, LAG_s);
156
157 typedef struct ifbond_s ifbond, * ifbond_ref;
158 typedef struct bondport_s bondport, * bondport_ref;
159
160 struct LAG_s {
161 TAILQ_ENTRY(LAG_s) lag_list;
162 struct port_list lag_port_list;
163 short lag_port_count;
164 short lag_selected_port_count;
165 int lag_active_media;
166 LAG_info lag_info;
167 };
168 typedef struct LAG_s LAG, * LAG_ref;
169
170 typedef struct partner_state_s {
171 LAG_info ps_lag_info;
172 lacp_port ps_port;
173 lacp_port_priority ps_port_priority;
174 lacp_actor_partner_state ps_state;
175 } partner_state, * partner_state_ref;
176
177 struct ifbond_s {
178 TAILQ_ENTRY(ifbond_s) ifb_bond_list;
179 int ifb_flags;
180 struct os_refcnt ifb_retain_count;
181 char ifb_name[IFNAMSIZ];
182 struct ifnet * ifb_ifp;
183 bpf_packet_func ifb_bpf_input;
184 bpf_packet_func ifb_bpf_output;
185 int ifb_altmtu;
186 struct port_list ifb_port_list;
187 short ifb_port_count;
188 struct lag_list ifb_lag_list;
189 lacp_key ifb_key;
190 short ifb_max_active;/* 0 == unlimited */
191 LAG_ref ifb_active_lag;
192 struct ifmultiaddr * ifb_ifma_slow_proto;
193 bondport_ref * ifb_distributing_array;
194 int ifb_distributing_count;
195 int ifb_distributing_max;
196 int ifb_last_link_event;
197 int ifb_mode;/* LACP, STATIC */
198 };
199
200 struct media_info {
201 int mi_active;
202 int mi_status;
203 };
204
205 enum {
206 ReceiveState_none = 0,
207 ReceiveState_INITIALIZE = 1,
208 ReceiveState_PORT_DISABLED = 2,
209 ReceiveState_EXPIRED = 3,
210 ReceiveState_LACP_DISABLED = 4,
211 ReceiveState_DEFAULTED = 5,
212 ReceiveState_CURRENT = 6,
213 };
214
215 typedef u_char ReceiveState;
216
217 enum {
218 SelectedState_UNSELECTED = IF_BOND_STATUS_SELECTED_STATE_UNSELECTED,
219 SelectedState_SELECTED = IF_BOND_STATUS_SELECTED_STATE_SELECTED,
220 SelectedState_STANDBY = IF_BOND_STATUS_SELECTED_STATE_STANDBY
221 };
222 typedef u_char SelectedState;
223
224 static __inline__ const char *
SelectedStateString(SelectedState s)225 SelectedStateString(SelectedState s)
226 {
227 static const char * names[] = { "UNSELECTED", "SELECTED", "STANDBY" };
228
229 if (s <= SelectedState_STANDBY) {
230 return names[s];
231 }
232 return "<unknown>";
233 }
234
235 enum {
236 MuxState_none = 0,
237 MuxState_DETACHED = 1,
238 MuxState_WAITING = 2,
239 MuxState_ATTACHED = 3,
240 MuxState_COLLECTING_DISTRIBUTING = 4,
241 };
242
243 typedef u_char MuxState;
244
245 #define PORT_CONTROL_FLAGS_IN_LIST 0x01
246 #define PORT_CONTROL_FLAGS_PROTO_ATTACHED 0x02
247 #define PORT_CONTROL_FLAGS_FILTER_ATTACHED 0x04
248 #define PORT_CONTROL_FLAGS_LLADDR_SET 0x08
249 #define PORT_CONTROL_FLAGS_MTU_SET 0x10
250 #define PORT_CONTROL_FLAGS_PROMISCUOUS_SET 0x20
251 #define PORT_CONTROL_FLAGS_BOND_PROMISCUOUS_SET 0x40
252
253
254 static inline bool
uint32_bit_is_set(uint32_t flags,uint32_t flags_to_test)255 uint32_bit_is_set(uint32_t flags, uint32_t flags_to_test)
256 {
257 return (flags & flags_to_test) != 0;
258 }
259
260 static inline void
uint32_bit_set(uint32_t * flags_p,uint32_t flags_to_set)261 uint32_bit_set(uint32_t * flags_p, uint32_t flags_to_set)
262 {
263 *flags_p |= flags_to_set;
264 }
265
266 static inline void
uint32_bit_clear(uint32_t * flags_p,uint32_t flags_to_clear)267 uint32_bit_clear(uint32_t * flags_p, uint32_t flags_to_clear)
268 {
269 *flags_p &= ~flags_to_clear;
270 }
271
272 struct bondport_s {
273 TAILQ_ENTRY(bondport_s) po_port_list;
274 ifbond_ref po_bond;
275 struct multicast_list po_multicast;
276 struct ifnet * po_ifp;
277 struct ether_addr po_saved_addr;
278 int po_enabled;
279 char po_name[IFNAMSIZ];
280 struct ifdevmtu po_devmtu;
281 uint32_t po_control_flags;
282 interface_filter_t po_filter;
283
284 /* LACP */
285 TAILQ_ENTRY(bondport_s) po_lag_port_list;
286 devtimer_ref po_current_while_timer;
287 devtimer_ref po_periodic_timer;
288 devtimer_ref po_wait_while_timer;
289 devtimer_ref po_transmit_timer;
290 partner_state po_partner_state;
291 lacp_port_priority po_priority;
292 lacp_actor_partner_state po_actor_state;
293 u_char po_flags;
294 u_char po_periodic_interval;
295 u_char po_n_transmit;
296 ReceiveState po_receive_state;
297 MuxState po_mux_state;
298 SelectedState po_selected;
299 int32_t po_last_transmit_secs;
300 struct media_info po_media_info;
301 uint64_t po_force_link_event_time;
302 LAG_ref po_lag;
303 };
304
305 #define IFBF_PROMISC 0x1 /* promiscuous mode */
306 #define IFBF_IF_DETACHING 0x2 /* interface is detaching */
307 #define IFBF_LLADDR 0x4 /* specific link address requested */
308 #define IFBF_CHANGE_IN_PROGRESS 0x8 /* interface add/remove in progress */
309
310 static int bond_get_status(ifbond_ref ifb, struct if_bond_req * ibr_p,
311 user_addr_t datap);
312
313 static __inline__ bool
ifbond_flags_if_detaching(ifbond_ref ifb)314 ifbond_flags_if_detaching(ifbond_ref ifb)
315 {
316 return (ifb->ifb_flags & IFBF_IF_DETACHING) != 0;
317 }
318
319 static __inline__ void
ifbond_flags_set_if_detaching(ifbond_ref ifb)320 ifbond_flags_set_if_detaching(ifbond_ref ifb)
321 {
322 ifb->ifb_flags |= IFBF_IF_DETACHING;
323 return;
324 }
325
326 static __inline__ bool
ifbond_flags_lladdr(ifbond_ref ifb)327 ifbond_flags_lladdr(ifbond_ref ifb)
328 {
329 return (ifb->ifb_flags & IFBF_LLADDR) != 0;
330 }
331
332 static __inline__ bool
ifbond_flags_change_in_progress(ifbond_ref ifb)333 ifbond_flags_change_in_progress(ifbond_ref ifb)
334 {
335 return (ifb->ifb_flags & IFBF_CHANGE_IN_PROGRESS) != 0;
336 }
337
338 static __inline__ void
ifbond_flags_set_change_in_progress(ifbond_ref ifb)339 ifbond_flags_set_change_in_progress(ifbond_ref ifb)
340 {
341 ifb->ifb_flags |= IFBF_CHANGE_IN_PROGRESS;
342 return;
343 }
344
345 static __inline__ void
ifbond_flags_clear_change_in_progress(ifbond_ref ifb)346 ifbond_flags_clear_change_in_progress(ifbond_ref ifb)
347 {
348 ifb->ifb_flags &= ~IFBF_CHANGE_IN_PROGRESS;
349 return;
350 }
351
352 static __inline__ bool
ifbond_flags_promisc(ifbond_ref ifb)353 ifbond_flags_promisc(ifbond_ref ifb)
354 {
355 return (ifb->ifb_flags & IFBF_PROMISC) != 0;
356 }
357
358 static __inline__ void
ifbond_flags_set_promisc(ifbond_ref ifb)359 ifbond_flags_set_promisc(ifbond_ref ifb)
360 {
361 ifb->ifb_flags |= IFBF_PROMISC;
362 return;
363 }
364
365 static __inline__ void
ifbond_flags_clear_promisc(ifbond_ref ifb)366 ifbond_flags_clear_promisc(ifbond_ref ifb)
367 {
368 ifb->ifb_flags &= ~IFBF_PROMISC;
369 return;
370 }
371
372 /*
373 * bondport_ref->po_flags bits
374 */
375 #define BONDPORT_FLAGS_NTT 0x01
376 #define BONDPORT_FLAGS_READY 0x02
377 #define BONDPORT_FLAGS_SELECTED_CHANGED 0x04
378 #define BONDPORT_FLAGS_MUX_ATTACHED 0x08
379 #define BONDPORT_FLAGS_DISTRIBUTING 0x10
380 #define BONDPORT_FLAGS_UNUSED2 0x20
381 #define BONDPORT_FLAGS_UNUSED3 0x40
382 #define BONDPORT_FLAGS_UNUSED4 0x80
383
384 static __inline__ void
bondport_flags_set_ntt(bondport_ref p)385 bondport_flags_set_ntt(bondport_ref p)
386 {
387 p->po_flags |= BONDPORT_FLAGS_NTT;
388 return;
389 }
390
391 static __inline__ void
bondport_flags_clear_ntt(bondport_ref p)392 bondport_flags_clear_ntt(bondport_ref p)
393 {
394 p->po_flags &= ~BONDPORT_FLAGS_NTT;
395 return;
396 }
397
398 static __inline__ int
bondport_flags_ntt(bondport_ref p)399 bondport_flags_ntt(bondport_ref p)
400 {
401 return (p->po_flags & BONDPORT_FLAGS_NTT) != 0;
402 }
403
404 static __inline__ void
bondport_flags_set_ready(bondport_ref p)405 bondport_flags_set_ready(bondport_ref p)
406 {
407 p->po_flags |= BONDPORT_FLAGS_READY;
408 return;
409 }
410
411 static __inline__ void
bondport_flags_clear_ready(bondport_ref p)412 bondport_flags_clear_ready(bondport_ref p)
413 {
414 p->po_flags &= ~BONDPORT_FLAGS_READY;
415 return;
416 }
417
418 static __inline__ int
bondport_flags_ready(bondport_ref p)419 bondport_flags_ready(bondport_ref p)
420 {
421 return (p->po_flags & BONDPORT_FLAGS_READY) != 0;
422 }
423
424 static __inline__ void
bondport_flags_set_selected_changed(bondport_ref p)425 bondport_flags_set_selected_changed(bondport_ref p)
426 {
427 p->po_flags |= BONDPORT_FLAGS_SELECTED_CHANGED;
428 return;
429 }
430
431 static __inline__ void
bondport_flags_clear_selected_changed(bondport_ref p)432 bondport_flags_clear_selected_changed(bondport_ref p)
433 {
434 p->po_flags &= ~BONDPORT_FLAGS_SELECTED_CHANGED;
435 return;
436 }
437
438 static __inline__ int
bondport_flags_selected_changed(bondport_ref p)439 bondport_flags_selected_changed(bondport_ref p)
440 {
441 return (p->po_flags & BONDPORT_FLAGS_SELECTED_CHANGED) != 0;
442 }
443
444 static __inline__ void
bondport_flags_set_mux_attached(bondport_ref p)445 bondport_flags_set_mux_attached(bondport_ref p)
446 {
447 p->po_flags |= BONDPORT_FLAGS_MUX_ATTACHED;
448 return;
449 }
450
451 static __inline__ void
bondport_flags_clear_mux_attached(bondport_ref p)452 bondport_flags_clear_mux_attached(bondport_ref p)
453 {
454 p->po_flags &= ~BONDPORT_FLAGS_MUX_ATTACHED;
455 return;
456 }
457
458 static __inline__ int
bondport_flags_mux_attached(bondport_ref p)459 bondport_flags_mux_attached(bondport_ref p)
460 {
461 return (p->po_flags & BONDPORT_FLAGS_MUX_ATTACHED) != 0;
462 }
463
464 static __inline__ void
bondport_flags_set_distributing(bondport_ref p)465 bondport_flags_set_distributing(bondport_ref p)
466 {
467 p->po_flags |= BONDPORT_FLAGS_DISTRIBUTING;
468 return;
469 }
470
471 static __inline__ void
bondport_flags_clear_distributing(bondport_ref p)472 bondport_flags_clear_distributing(bondport_ref p)
473 {
474 p->po_flags &= ~BONDPORT_FLAGS_DISTRIBUTING;
475 return;
476 }
477
478 static __inline__ int
bondport_flags_distributing(bondport_ref p)479 bondport_flags_distributing(bondport_ref p)
480 {
481 return (p->po_flags & BONDPORT_FLAGS_DISTRIBUTING) != 0;
482 }
483
484 typedef struct bond_globals_s {
485 struct ifbond_list ifbond_list;
486 lacp_system system;
487 lacp_system_priority system_priority;
488 } * bond_globals_ref;
489
490 static bond_globals_ref g_bond;
491
492 /**
493 ** packet_buffer routines
494 ** - thin wrapper for mbuf
495 **/
496
497 typedef struct mbuf * packet_buffer_ref;
498
499 static packet_buffer_ref
packet_buffer_allocate(int length)500 packet_buffer_allocate(int length)
501 {
502 packet_buffer_ref m;
503 int size;
504
505 /* leave room for ethernet header */
506 size = length + sizeof(struct ether_header);
507 if (size > (int)MHLEN) {
508 if (size > (int)MCLBYTES) {
509 printf("bond: packet_buffer_allocate size %d > max %u\n",
510 size, MCLBYTES);
511 return NULL;
512 }
513 m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
514 } else {
515 m = m_gethdr(M_WAITOK, MT_DATA);
516 }
517 if (m == NULL) {
518 return NULL;
519 }
520 m->m_len = size;
521 m->m_pkthdr.len = size;
522 return m;
523 }
524
525 static void *
packet_buffer_byteptr(packet_buffer_ref buf)526 packet_buffer_byteptr(packet_buffer_ref buf)
527 {
528 return buf->m_data + sizeof(struct ether_header);
529 }
530
531 typedef enum {
532 LAEventStart,
533 LAEventTimeout,
534 LAEventPacket,
535 LAEventMediaChange,
536 LAEventSelectedChange,
537 LAEventPortMoved,
538 LAEventReady
539 } LAEvent;
540
541 /**
542 ** Receive machine
543 **/
544 static void
545 bondport_receive_machine(bondport_ref p, LAEvent event,
546 void * event_data);
547 /**
548 ** Periodic Transmission machine
549 **/
550 static void
551 bondport_periodic_transmit_machine(bondport_ref p, LAEvent event,
552 void * event_data);
553
554 /**
555 ** Transmit machine
556 **/
557 #define TRANSMIT_MACHINE_TX_IMMEDIATE ((void *)1)
558
559 static void
560 bondport_transmit_machine(bondport_ref p, LAEvent event,
561 void * event_data);
562
563 /**
564 ** Mux machine
565 **/
566 static void
567 bondport_mux_machine(bondport_ref p, LAEvent event,
568 void * event_data);
569
570 /**
571 ** bond, LAG
572 **/
573 static void
574 ifbond_activate_LAG(ifbond_ref bond, LAG_ref lag, int active_media);
575
576 static void
577 ifbond_deactivate_LAG(ifbond_ref bond, LAG_ref lag);
578
579 static int
580 ifbond_all_ports_ready(ifbond_ref bond);
581
582 static LAG_ref
583 ifbond_find_best_LAG(ifbond_ref bond, int * active_media);
584
585 static int
586 LAG_get_aggregatable_port_count(LAG_ref lag, int * active_media);
587
588 static int
589 ifbond_selection(ifbond_ref bond);
590
591 static void
592 bond_handle_event(struct ifnet * port_ifp, int event_code);
593
594 /**
595 ** bondport
596 **/
597
598 static void
599 bondport_receive_lacpdu(bondport_ref p, lacpdu_ref in_lacpdu_p);
600
601 static void
602 bondport_slow_proto_transmit(bondport_ref p, packet_buffer_ref buf);
603
604 static bondport_ref
605 bondport_create(struct ifnet * port_ifp, lacp_port_priority priority,
606 int active, int short_timeout, int * error);
607 static void
608 bondport_start(bondport_ref p);
609
610 static void
611 bondport_free(bondport_ref p);
612
613 static int
614 bondport_aggregatable(bondport_ref p);
615
616 static int
617 bondport_remove_from_LAG(bondport_ref p);
618
619 static void
620 bondport_set_selected(bondport_ref p, SelectedState s);
621
622 static int
623 bondport_matches_LAG(bondport_ref p, LAG_ref lag);
624
625 static void
626 bondport_link_status_changed(bondport_ref p);
627
628 static void
629 bondport_enable_distributing(bondport_ref p);
630
631 static void
632 bondport_disable_distributing(bondport_ref p);
633
634 static __inline__ int
bondport_collecting(bondport_ref p)635 bondport_collecting(bondport_ref p)
636 {
637 if (p->po_bond->ifb_mode == IF_BOND_MODE_LACP) {
638 return lacp_actor_partner_state_collecting(p->po_actor_state);
639 }
640 return TRUE;
641 }
642
643 /**
644 ** bond interface/dlil specific routines
645 **/
646 static int bond_clone_create(struct if_clone *, u_int32_t, void *);
647 static int bond_clone_destroy(struct ifnet *);
648 static int bond_output(struct ifnet *ifp, struct mbuf *m);
649 static int bond_ioctl(struct ifnet *ifp, u_long cmd, void * addr);
650 static int bond_set_bpf_tap(struct ifnet * ifp, bpf_tap_mode mode,
651 bpf_packet_func func);
652 static int bond_attach_protocol(struct ifnet *ifp);
653 static int bond_detach_protocol(struct ifnet *ifp);
654 static errno_t bond_iff_input(void *cookie, ifnet_t ifp,
655 protocol_family_t protocol, mbuf_t *data, char **frame_ptr);
656 static int bond_attach_filter(struct ifnet *ifp, interface_filter_t * filter_p);
657 static int bond_setmulti(struct ifnet *ifp);
658 static int bond_add_interface(struct ifnet * ifp, struct ifnet * port_ifp);
659 static int bond_remove_interface(ifbond_ref ifb, struct ifnet * port_ifp);
660 static void bond_if_free(struct ifnet * ifp);
661 static void interface_link_event(struct ifnet * ifp, u_int32_t event_code);
662
663 static struct if_clone bond_cloner = IF_CLONE_INITIALIZER(BONDNAME,
664 bond_clone_create,
665 bond_clone_destroy,
666 0,
667 BOND_MAXUNIT,
668 BOND_ZONE_MAX_ELEM,
669 sizeof(ifbond));
670
671 static int
siocsifmtu(struct ifnet * ifp,int mtu)672 siocsifmtu(struct ifnet * ifp, int mtu)
673 {
674 struct ifreq ifr;
675
676 bzero(&ifr, sizeof(ifr));
677 ifr.ifr_mtu = mtu;
678 return ifnet_ioctl(ifp, 0, SIOCSIFMTU, &ifr);
679 }
680
681 static int
siocgifdevmtu(struct ifnet * ifp,struct ifdevmtu * ifdm_p)682 siocgifdevmtu(struct ifnet * ifp, struct ifdevmtu * ifdm_p)
683 {
684 struct ifreq ifr;
685 int error;
686
687 bzero(&ifr, sizeof(ifr));
688 error = ifnet_ioctl(ifp, 0, SIOCGIFDEVMTU, &ifr);
689 if (error == 0) {
690 *ifdm_p = ifr.ifr_devmtu;
691 }
692 return error;
693 }
694
695 static __inline__ void
ether_addr_copy(void * dest,const void * source)696 ether_addr_copy(void * dest, const void * source)
697 {
698 bcopy(source, dest, ETHER_ADDR_LEN);
699 return;
700 }
701
702 static __inline__ void
ifbond_retain(ifbond_ref ifb)703 ifbond_retain(ifbond_ref ifb)
704 {
705 os_ref_retain(&ifb->ifb_retain_count);
706 }
707
708 static __inline__ void
ifbond_release(ifbond_ref ifb)709 ifbond_release(ifbond_ref ifb)
710 {
711 if (os_ref_release(&ifb->ifb_retain_count) != 0) {
712 return;
713 }
714
715 if (if_bond_debug) {
716 printf("ifbond_release(%s)\n", ifb->ifb_name);
717 }
718 if (ifb->ifb_ifma_slow_proto != NULL) {
719 if (if_bond_debug) {
720 printf("ifbond_release(%s) removing multicast\n",
721 ifb->ifb_name);
722 }
723 (void) if_delmulti_anon(ifb->ifb_ifma_slow_proto->ifma_ifp,
724 ifb->ifb_ifma_slow_proto->ifma_addr);
725 IFMA_REMREF(ifb->ifb_ifma_slow_proto);
726 }
727 kfree_type(bondport_ref, ifb->ifb_distributing_max,
728 ifb->ifb_distributing_array);
729 if_clone_softc_deallocate(&bond_cloner, ifb);
730 }
731
732 /*
733 * Function: ifbond_wait
734 * Purpose:
735 * Allows a single thread to gain exclusive access to the ifbond
736 * data structure. Some operations take a long time to complete,
737 * and some have side-effects that we can't predict. Holding the
738 * bond_lock() across such operations is not possible.
739 *
740 * For example:
741 * 1) The SIOCSIFLLADDR ioctl takes a long time (several seconds) to
742 * complete. Simply holding the bond_lock() would freeze all other
743 * data structure accesses during that time.
744 * 2) When we attach our protocol to the interface, a dlil event is
745 * generated and invokes our bond_event() function. bond_event()
746 * needs to take the bond_lock(), but we're already holding it, so
747 * we're deadlocked against ourselves.
748 * Notes:
749 * Before calling, you must be holding the bond_lock and have taken
750 * a reference on the ifbond_ref.
751 */
752 static void
ifbond_wait(ifbond_ref ifb,const char * msg)753 ifbond_wait(ifbond_ref ifb, const char * msg)
754 {
755 int waited = 0;
756
757 /* other add/remove in progress */
758 while (ifbond_flags_change_in_progress(ifb)) {
759 if (if_bond_debug) {
760 printf("%s: %s msleep\n", ifb->ifb_name, msg);
761 }
762 waited = 1;
763 (void)msleep(ifb, &bond_lck_mtx, PZERO, msg, 0);
764 }
765 /* prevent other bond list remove/add from taking place */
766 ifbond_flags_set_change_in_progress(ifb);
767 if (if_bond_debug && waited) {
768 printf("%s: %s woke up\n", ifb->ifb_name, msg);
769 }
770 return;
771 }
772
773 /*
774 * Function: ifbond_signal
775 * Purpose:
776 * Allows the thread that previously invoked ifbond_wait() to
777 * give up exclusive access to the ifbond data structure, and wake up
778 * any other threads waiting to access
779 * Notes:
780 * Before calling, you must be holding the bond_lock and have taken
781 * a reference on the ifbond_ref.
782 */
783 static void
ifbond_signal(ifbond_ref ifb,const char * msg)784 ifbond_signal(ifbond_ref ifb, const char * msg)
785 {
786 ifbond_flags_clear_change_in_progress(ifb);
787 wakeup((caddr_t)ifb);
788 if (if_bond_debug) {
789 printf("%s: %s wakeup\n", ifb->ifb_name, msg);
790 }
791 return;
792 }
793
794 /**
795 ** Media information
796 **/
797
798 static int
link_speed(int active)799 link_speed(int active)
800 {
801 switch (IFM_SUBTYPE(active)) {
802 case IFM_AUTO:
803 case IFM_MANUAL:
804 case IFM_NONE:
805 return 0;
806 case IFM_10_T:
807 case IFM_10_2:
808 case IFM_10_5:
809 case IFM_10_STP:
810 case IFM_10_FL:
811 return 10;
812 case IFM_100_TX:
813 case IFM_100_FX:
814 case IFM_100_T4:
815 case IFM_100_VG:
816 case IFM_100_T2:
817 return 100;
818 case IFM_1000_SX:
819 case IFM_1000_LX:
820 case IFM_1000_CX:
821 case IFM_1000_TX:
822 case IFM_1000_CX_SGMII:
823 case IFM_1000_KX:
824 return 1000;
825 case IFM_HPNA_1:
826 return 1;
827 default:
828 /* assume that new defined types are going to be at least 10GigE */
829 case IFM_10G_SR:
830 case IFM_10G_LR:
831 case IFM_10G_KX4:
832 case IFM_10G_KR:
833 case IFM_10G_CR1:
834 case IFM_10G_ER:
835 return 10000;
836 case IFM_2500_T:
837 return 2500;
838 case IFM_5000_T:
839 return 5000;
840 case IFM_20G_KR2:
841 return 20000;
842 case IFM_25G_CR:
843 case IFM_25G_KR:
844 case IFM_25G_SR:
845 case IFM_25G_LR:
846 return 25000;
847 case IFM_40G_CR4:
848 case IFM_40G_SR4:
849 case IFM_40G_LR4:
850 case IFM_40G_KR4:
851 return 40000;
852 case IFM_50G_CR2:
853 case IFM_50G_KR2:
854 case IFM_50G_SR2:
855 case IFM_50G_LR2:
856 return 50000;
857 case IFM_56G_R4:
858 return 56000;
859 case IFM_100G_CR4:
860 case IFM_100G_SR4:
861 case IFM_100G_KR4:
862 case IFM_100G_LR4:
863 return 100000;
864 }
865 }
866
867 static __inline__ int
media_active(const struct media_info * mi)868 media_active(const struct media_info * mi)
869 {
870 if ((mi->mi_status & IFM_AVALID) == 0) {
871 return 1;
872 }
873 return (mi->mi_status & IFM_ACTIVE) != 0;
874 }
875
876 static __inline__ int
media_full_duplex(const struct media_info * mi)877 media_full_duplex(const struct media_info * mi)
878 {
879 return (mi->mi_active & IFM_FDX) != 0;
880 }
881
882 static __inline__ int
media_type_unknown(const struct media_info * mi)883 media_type_unknown(const struct media_info * mi)
884 {
885 int unknown;
886
887 switch (IFM_SUBTYPE(mi->mi_active)) {
888 case IFM_AUTO:
889 case IFM_MANUAL:
890 case IFM_NONE:
891 unknown = 1;
892 break;
893 default:
894 unknown = 0;
895 break;
896 }
897 return unknown;
898 }
899
900 static __inline__ int
media_ok(const struct media_info * mi)901 media_ok(const struct media_info * mi)
902 {
903 return media_full_duplex(mi) || media_type_unknown(mi);
904 }
905
906 static __inline__ int
media_speed(const struct media_info * mi)907 media_speed(const struct media_info * mi)
908 {
909 return link_speed(mi->mi_active);
910 }
911
912 static struct media_info
interface_media_info(struct ifnet * ifp)913 interface_media_info(struct ifnet * ifp)
914 {
915 struct ifmediareq ifmr;
916 struct media_info mi;
917
918 bzero(&mi, sizeof(mi));
919 bzero(&ifmr, sizeof(ifmr));
920 if (ifnet_ioctl(ifp, 0, SIOCGIFMEDIA, &ifmr) == 0) {
921 if (ifmr.ifm_count != 0) {
922 mi.mi_status = ifmr.ifm_status;
923 mi.mi_active = ifmr.ifm_active;
924 }
925 }
926 return mi;
927 }
928
929 static int
if_siflladdr(struct ifnet * ifp,const struct ether_addr * ea_p)930 if_siflladdr(struct ifnet * ifp, const struct ether_addr * ea_p)
931 {
932 struct ifreq ifr;
933
934 /*
935 * XXX setting the sa_len to ETHER_ADDR_LEN is wrong, but the driver
936 * currently expects it that way
937 */
938 ifr.ifr_addr.sa_family = AF_UNSPEC;
939 ifr.ifr_addr.sa_len = ETHER_ADDR_LEN;
940 ether_addr_copy(ifr.ifr_addr.sa_data, ea_p);
941 return ifnet_ioctl(ifp, 0, SIOCSIFLLADDR, &ifr);
942 }
943
944 /**
945 ** bond_globals
946 **/
947 static bond_globals_ref
bond_globals_create(lacp_system_priority sys_pri,lacp_system_ref sys)948 bond_globals_create(lacp_system_priority sys_pri,
949 lacp_system_ref sys)
950 {
951 bond_globals_ref b;
952
953 b = kalloc_type(struct bond_globals_s, Z_WAITOK | Z_ZERO | Z_NOFAIL);
954 TAILQ_INIT(&b->ifbond_list);
955 b->system = *sys;
956 b->system_priority = sys_pri;
957 return b;
958 }
959
960 static int
bond_globals_init(void)961 bond_globals_init(void)
962 {
963 bond_globals_ref b;
964 int i;
965 struct ifnet * ifp;
966
967 bond_assert_lock_not_held();
968
969 if (g_bond != NULL) {
970 return 0;
971 }
972
973 /*
974 * use en0's ethernet address as the system identifier, and if it's not
975 * there, use en1 .. en3
976 */
977 ifp = NULL;
978 for (i = 0; i < 4; i++) {
979 char ifname[IFNAMSIZ + 1];
980 snprintf(ifname, sizeof(ifname), "en%d", i);
981 ifp = ifunit(ifname);
982 if (ifp != NULL) {
983 break;
984 }
985 }
986 b = NULL;
987 if (ifp != NULL) {
988 b = bond_globals_create(0x8000, (lacp_system_ref)IF_LLADDR(ifp));
989 }
990 bond_lock();
991 if (g_bond != NULL) {
992 bond_unlock();
993 kfree_type(struct bond_globals_s, b);
994 return 0;
995 }
996 g_bond = b;
997 bond_unlock();
998 if (ifp == NULL) {
999 return ENXIO;
1000 }
1001 if (b == NULL) {
1002 return ENOMEM;
1003 }
1004 return 0;
1005 }
1006
1007 static void
bond_bpf_vlan(struct ifnet * ifp,struct mbuf * m,const struct ether_header * eh_p,u_int16_t vlan_tag,bpf_packet_func func)1008 bond_bpf_vlan(struct ifnet * ifp, struct mbuf * m,
1009 const struct ether_header * eh_p,
1010 u_int16_t vlan_tag, bpf_packet_func func)
1011 {
1012 struct ether_vlan_header * vlh_p;
1013 struct mbuf * vl_m;
1014
1015 vl_m = m_get(M_DONTWAIT, MT_DATA);
1016 if (vl_m == NULL) {
1017 return;
1018 }
1019 /* populate a new mbuf containing the vlan ethernet header */
1020 vl_m->m_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1021 vlh_p = mtod(vl_m, struct ether_vlan_header *);
1022 bcopy(eh_p, vlh_p, offsetof(struct ether_header, ether_type));
1023 vlh_p->evl_encap_proto = htons(ETHERTYPE_VLAN);
1024 vlh_p->evl_tag = htons(vlan_tag);
1025 vlh_p->evl_proto = eh_p->ether_type;
1026 vl_m->m_next = m;
1027 (*func)(ifp, vl_m);
1028 vl_m->m_next = NULL;
1029 m_free(vl_m);
1030 return;
1031 }
1032
1033 static __inline__ void
bond_bpf_output(struct ifnet * ifp,struct mbuf * m,bpf_packet_func func)1034 bond_bpf_output(struct ifnet * ifp, struct mbuf * m,
1035 bpf_packet_func func)
1036 {
1037 if (func != NULL) {
1038 if (m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) {
1039 const struct ether_header * eh_p;
1040 eh_p = mtod(m, const struct ether_header *);
1041 m->m_data += ETHER_HDR_LEN;
1042 m->m_len -= ETHER_HDR_LEN;
1043 bond_bpf_vlan(ifp, m, eh_p, m->m_pkthdr.vlan_tag, func);
1044 m->m_data -= ETHER_HDR_LEN;
1045 m->m_len += ETHER_HDR_LEN;
1046 } else {
1047 (*func)(ifp, m);
1048 }
1049 }
1050 return;
1051 }
1052
1053 static __inline__ void
bond_bpf_input(ifnet_t ifp,mbuf_t m,const struct ether_header * eh_p,bpf_packet_func func)1054 bond_bpf_input(ifnet_t ifp, mbuf_t m, const struct ether_header * eh_p,
1055 bpf_packet_func func)
1056 {
1057 if (func != NULL) {
1058 if (m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) {
1059 bond_bpf_vlan(ifp, m, eh_p, m->m_pkthdr.vlan_tag, func);
1060 } else {
1061 /* restore the header */
1062 m->m_data -= ETHER_HDR_LEN;
1063 m->m_len += ETHER_HDR_LEN;
1064 (*func)(ifp, m);
1065 m->m_data += ETHER_HDR_LEN;
1066 m->m_len -= ETHER_HDR_LEN;
1067 }
1068 }
1069 return;
1070 }
1071
1072 /*
1073 * Function: bond_setmulti
1074 * Purpose:
1075 * Enable multicast reception on "our" interface by enabling multicasts on
1076 * each of the member ports.
1077 */
1078 static int
bond_setmulti(struct ifnet * ifp)1079 bond_setmulti(struct ifnet * ifp)
1080 {
1081 ifbond_ref ifb;
1082 int error;
1083 int result = 0;
1084 bondport_ref p;
1085
1086 bond_lock();
1087 ifb = ifnet_softc(ifp);
1088 if (ifb == NULL || ifbond_flags_if_detaching(ifb)
1089 || TAILQ_EMPTY(&ifb->ifb_port_list)) {
1090 bond_unlock();
1091 return 0;
1092 }
1093 ifbond_retain(ifb);
1094 ifbond_wait(ifb, "bond_setmulti");
1095
1096 if (ifbond_flags_if_detaching(ifb)) {
1097 /* someone destroyed the bond while we were waiting */
1098 result = EBUSY;
1099 goto signal_done;
1100 }
1101 bond_unlock();
1102
1103 /* ifbond_wait() let's us safely walk the list without holding the lock */
1104 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
1105 struct ifnet * port_ifp = p->po_ifp;
1106
1107 error = multicast_list_program(&p->po_multicast,
1108 ifp, port_ifp);
1109 if (error != 0) {
1110 printf("bond_setmulti(%s): "
1111 "multicast_list_program(%s%d) failed, %d\n",
1112 ifb->ifb_name, ifnet_name(port_ifp),
1113 ifnet_unit(port_ifp), error);
1114 result = error;
1115 }
1116 }
1117 bond_lock();
1118 signal_done:
1119 ifbond_signal(ifb, __func__);
1120 bond_unlock();
1121 ifbond_release(ifb);
1122 return result;
1123 }
1124
1125 static int
bond_clone_attach(void)1126 bond_clone_attach(void)
1127 {
1128 int error;
1129
1130 if ((error = if_clone_attach(&bond_cloner)) != 0) {
1131 return error;
1132 }
1133 return 0;
1134 }
1135
1136 static int
ifbond_add_slow_proto_multicast(ifbond_ref ifb)1137 ifbond_add_slow_proto_multicast(ifbond_ref ifb)
1138 {
1139 int error;
1140 struct ifmultiaddr * ifma = NULL;
1141 struct sockaddr_dl sdl;
1142
1143 bond_assert_lock_not_held();
1144
1145 bzero(&sdl, sizeof(sdl));
1146 sdl.sdl_len = sizeof(sdl);
1147 sdl.sdl_family = AF_LINK;
1148 sdl.sdl_type = IFT_ETHER;
1149 sdl.sdl_nlen = 0;
1150 sdl.sdl_alen = sizeof(slow_proto_multicast);
1151 bcopy(&slow_proto_multicast, sdl.sdl_data, sizeof(slow_proto_multicast));
1152 error = if_addmulti_anon(ifb->ifb_ifp, (struct sockaddr *)&sdl, &ifma);
1153 if (error == 0) {
1154 ifb->ifb_ifma_slow_proto = ifma;
1155 }
1156 return error;
1157 }
1158
1159 static int
bond_clone_create(struct if_clone * ifc,u_int32_t unit,__unused void * params)1160 bond_clone_create(struct if_clone * ifc, u_int32_t unit, __unused void *params)
1161 {
1162 int error;
1163 ifbond_ref ifb;
1164 ifnet_t ifp;
1165 struct ifnet_init_eparams bond_init;
1166
1167 error = bond_globals_init();
1168 if (error != 0) {
1169 return error;
1170 }
1171
1172 ifb = if_clone_softc_allocate(&bond_cloner);
1173 if (ifb == NULL) {
1174 return ENOMEM;
1175 }
1176
1177 os_ref_init(&ifb->ifb_retain_count, NULL);
1178 TAILQ_INIT(&ifb->ifb_port_list);
1179 TAILQ_INIT(&ifb->ifb_lag_list);
1180 ifb->ifb_key = unit + 1;
1181
1182 /* use the interface name as the unique id for ifp recycle */
1183 if ((u_int32_t)snprintf(ifb->ifb_name, sizeof(ifb->ifb_name), "%s%d",
1184 ifc->ifc_name, unit) >= sizeof(ifb->ifb_name)) {
1185 ifbond_release(ifb);
1186 return EINVAL;
1187 }
1188
1189 bzero(&bond_init, sizeof(bond_init));
1190 bond_init.ver = IFNET_INIT_CURRENT_VERSION;
1191 bond_init.len = sizeof(bond_init);
1192 bond_init.flags = IFNET_INIT_LEGACY;
1193 bond_init.uniqueid = ifb->ifb_name;
1194 bond_init.uniqueid_len = strlen(ifb->ifb_name);
1195 bond_init.name = ifc->ifc_name;
1196 bond_init.unit = unit;
1197 bond_init.family = IFNET_FAMILY_BOND;
1198 bond_init.type = IFT_IEEE8023ADLAG;
1199 bond_init.output = bond_output;
1200 bond_init.demux = ether_demux;
1201 bond_init.add_proto = ether_add_proto;
1202 bond_init.del_proto = ether_del_proto;
1203 bond_init.check_multi = ether_check_multi;
1204 bond_init.framer_extended = ether_frameout_extended;
1205 bond_init.ioctl = bond_ioctl;
1206 bond_init.set_bpf_tap = bond_set_bpf_tap;
1207 bond_init.detach = bond_if_free;
1208 bond_init.broadcast_addr = etherbroadcastaddr;
1209 bond_init.broadcast_len = ETHER_ADDR_LEN;
1210 bond_init.softc = ifb;
1211 error = ifnet_allocate_extended(&bond_init, &ifp);
1212
1213 if (error) {
1214 ifbond_release(ifb);
1215 return error;
1216 }
1217
1218 ifb->ifb_ifp = ifp;
1219 ifnet_set_offload(ifp, 0);
1220 ifnet_set_addrlen(ifp, ETHER_ADDR_LEN); /* XXX ethernet specific */
1221 ifnet_set_flags(ifp, IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX, 0xffff);
1222 ifnet_set_mtu(ifp, ETHERMTU);
1223
1224 error = ifnet_attach(ifp, NULL);
1225 if (error != 0) {
1226 ifnet_release(ifp);
1227 ifbond_release(ifb);
1228 return error;
1229 }
1230 error = ifbond_add_slow_proto_multicast(ifb);
1231 if (error != 0) {
1232 printf("bond_clone_create(%s): "
1233 "failed to add slow_proto multicast, %d\n",
1234 ifb->ifb_name, error);
1235 }
1236
1237 /* attach as ethernet */
1238 bpfattach(ifp, DLT_EN10MB, sizeof(struct ether_header));
1239
1240 bond_lock();
1241 TAILQ_INSERT_HEAD(&g_bond->ifbond_list, ifb, ifb_bond_list);
1242 bond_unlock();
1243
1244 return 0;
1245 }
1246
1247 static void
bond_remove_all_interfaces(ifbond_ref ifb)1248 bond_remove_all_interfaces(ifbond_ref ifb)
1249 {
1250 bondport_ref p;
1251
1252 bond_assert_lock_held();
1253
1254 /*
1255 * do this in reverse order to avoid re-programming the mac address
1256 * as each head interface is removed
1257 */
1258 while ((p = TAILQ_LAST(&ifb->ifb_port_list, port_list)) != NULL) {
1259 bond_remove_interface(ifb, p->po_ifp);
1260 }
1261 return;
1262 }
1263
1264 static void
bond_remove(ifbond_ref ifb)1265 bond_remove(ifbond_ref ifb)
1266 {
1267 bond_assert_lock_held();
1268 ifbond_flags_set_if_detaching(ifb);
1269 TAILQ_REMOVE(&g_bond->ifbond_list, ifb, ifb_bond_list);
1270 bond_remove_all_interfaces(ifb);
1271 return;
1272 }
1273
1274 static void
bond_if_detach(struct ifnet * ifp)1275 bond_if_detach(struct ifnet * ifp)
1276 {
1277 int error;
1278
1279 error = ifnet_detach(ifp);
1280 if (error) {
1281 printf("bond_if_detach %s%d: ifnet_detach failed, %d\n",
1282 ifnet_name(ifp), ifnet_unit(ifp), error);
1283 }
1284
1285 return;
1286 }
1287
1288 static int
bond_clone_destroy(struct ifnet * ifp)1289 bond_clone_destroy(struct ifnet * ifp)
1290 {
1291 ifbond_ref ifb;
1292
1293 bond_lock();
1294 ifb = ifnet_softc(ifp);
1295 if (ifb == NULL || ifnet_type(ifp) != IFT_IEEE8023ADLAG) {
1296 bond_unlock();
1297 return 0;
1298 }
1299 if (ifbond_flags_if_detaching(ifb)) {
1300 bond_unlock();
1301 return 0;
1302 }
1303 bond_remove(ifb);
1304 bond_unlock();
1305 bond_if_detach(ifp);
1306 return 0;
1307 }
1308
1309 static int
bond_set_bpf_tap(struct ifnet * ifp,bpf_tap_mode mode,bpf_packet_func func)1310 bond_set_bpf_tap(struct ifnet * ifp, bpf_tap_mode mode, bpf_packet_func func)
1311 {
1312 ifbond_ref ifb;
1313
1314 bond_lock();
1315 ifb = ifnet_softc(ifp);
1316 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
1317 bond_unlock();
1318 return ENODEV;
1319 }
1320 switch (mode) {
1321 case BPF_TAP_DISABLE:
1322 ifb->ifb_bpf_input = ifb->ifb_bpf_output = NULL;
1323 break;
1324
1325 case BPF_TAP_INPUT:
1326 ifb->ifb_bpf_input = func;
1327 break;
1328
1329 case BPF_TAP_OUTPUT:
1330 ifb->ifb_bpf_output = func;
1331 break;
1332
1333 case BPF_TAP_INPUT_OUTPUT:
1334 ifb->ifb_bpf_input = ifb->ifb_bpf_output = func;
1335 break;
1336 default:
1337 break;
1338 }
1339 bond_unlock();
1340 return 0;
1341 }
1342
1343 static uint32_t
ether_header_hash(struct ether_header * eh_p)1344 ether_header_hash(struct ether_header * eh_p)
1345 {
1346 uint32_t h;
1347
1348 /* get 32-bits from destination ether and ether type */
1349 h = (*((uint16_t *)&eh_p->ether_dhost[4]) << 16)
1350 | eh_p->ether_type;
1351 h ^= *((uint32_t *)&eh_p->ether_dhost[0]);
1352 return h;
1353 }
1354
1355 static struct mbuf *
S_mbuf_skip_to_offset(struct mbuf * m,int32_t * offset)1356 S_mbuf_skip_to_offset(struct mbuf * m, int32_t * offset)
1357 {
1358 int len;
1359
1360 len = m->m_len;
1361 while (*offset >= len) {
1362 *offset -= len;
1363 m = m->m_next;
1364 if (m == NULL) {
1365 break;
1366 }
1367 len = m->m_len;
1368 }
1369 return m;
1370 }
1371
1372 #if BYTE_ORDER == BIG_ENDIAN
1373 static __inline__ uint32_t
make_uint32(u_char c0,u_char c1,u_char c2,u_char c3)1374 make_uint32(u_char c0, u_char c1, u_char c2, u_char c3)
1375 {
1376 return ((uint32_t)c0 << 24) | ((uint32_t)c1 << 16)
1377 | ((uint32_t)c2 << 8) | (uint32_t)c3;
1378 }
1379 #else /* BYTE_ORDER == LITTLE_ENDIAN */
1380 static __inline__ uint32_t
make_uint32(u_char c0,u_char c1,u_char c2,u_char c3)1381 make_uint32(u_char c0, u_char c1, u_char c2, u_char c3)
1382 {
1383 return ((uint32_t)c3 << 24) | ((uint32_t)c2 << 16)
1384 | ((uint32_t)c1 << 8) | (uint32_t)c0;
1385 }
1386 #endif /* BYTE_ORDER == LITTLE_ENDIAN */
1387
1388 static int
S_mbuf_copy_uint32(struct mbuf * m,int32_t offset,uint32_t * val)1389 S_mbuf_copy_uint32(struct mbuf * m, int32_t offset, uint32_t * val)
1390 {
1391 struct mbuf * current;
1392 u_char * current_data;
1393 struct mbuf * next;
1394 u_char * next_data;
1395 int space_current;
1396
1397 current = S_mbuf_skip_to_offset(m, &offset);
1398 if (current == NULL) {
1399 return 1;
1400 }
1401 current_data = mtod(current, u_char *) + offset;
1402 space_current = current->m_len - offset;
1403 if (space_current >= (int)sizeof(uint32_t)) {
1404 *val = *((uint32_t *)current_data);
1405 return 0;
1406 }
1407 next = current->m_next;
1408 if (next == NULL || (next->m_len + space_current) < (int)sizeof(uint32_t)) {
1409 return 1;
1410 }
1411 next_data = mtod(next, u_char *);
1412 switch (space_current) {
1413 case 1:
1414 *val = make_uint32(current_data[0], next_data[0],
1415 next_data[1], next_data[2]);
1416 break;
1417 case 2:
1418 *val = make_uint32(current_data[0], current_data[1],
1419 next_data[0], next_data[1]);
1420 break;
1421 default:
1422 *val = make_uint32(current_data[0], current_data[1],
1423 current_data[2], next_data[0]);
1424 break;
1425 }
1426 return 0;
1427 }
1428
1429 #define IP_SRC_OFFSET (offsetof(struct ip, ip_src) - offsetof(struct ip, ip_p))
1430 #define IP_DST_OFFSET (offsetof(struct ip, ip_dst) - offsetof(struct ip, ip_p))
1431
1432 static uint32_t
ip_header_hash(struct mbuf * m)1433 ip_header_hash(struct mbuf * m)
1434 {
1435 u_char * data;
1436 struct in_addr ip_dst;
1437 struct in_addr ip_src;
1438 u_char ip_p;
1439 int32_t offset;
1440 struct mbuf * orig_m = m;
1441
1442 /* find the IP protocol field relative to the start of the packet */
1443 offset = offsetof(struct ip, ip_p) + sizeof(struct ether_header);
1444 m = S_mbuf_skip_to_offset(m, &offset);
1445 if (m == NULL || m->m_len < 1) {
1446 goto bad_ip_packet;
1447 }
1448 data = mtod(m, u_char *) + offset;
1449 ip_p = *data;
1450
1451 /* find the IP src relative to the IP protocol */
1452 if ((m->m_len - offset)
1453 >= (int)(IP_SRC_OFFSET + sizeof(struct in_addr) * 2)) {
1454 /* this should be the normal case */
1455 ip_src = *(struct in_addr *)(data + IP_SRC_OFFSET);
1456 ip_dst = *(struct in_addr *)(data + IP_DST_OFFSET);
1457 } else {
1458 if (S_mbuf_copy_uint32(m, offset + IP_SRC_OFFSET,
1459 (uint32_t *)&ip_src.s_addr)) {
1460 goto bad_ip_packet;
1461 }
1462 if (S_mbuf_copy_uint32(m, offset + IP_DST_OFFSET,
1463 (uint32_t *)&ip_dst.s_addr)) {
1464 goto bad_ip_packet;
1465 }
1466 }
1467 return ntohl(ip_dst.s_addr) ^ ntohl(ip_src.s_addr) ^ ((uint32_t)ip_p);
1468
1469 bad_ip_packet:
1470 return ether_header_hash(mtod(orig_m, struct ether_header *));
1471 }
1472
1473 #define IP6_ADDRS_LEN (sizeof(struct in6_addr) * 2)
1474 static uint32_t
ipv6_header_hash(struct mbuf * m)1475 ipv6_header_hash(struct mbuf * m)
1476 {
1477 u_char * data;
1478 int i;
1479 int32_t offset;
1480 struct mbuf * orig_m = m;
1481 uint32_t * scan;
1482 uint32_t val;
1483
1484 /* find the IP protocol field relative to the start of the packet */
1485 offset = offsetof(struct ip6_hdr, ip6_src) + sizeof(struct ether_header);
1486 m = S_mbuf_skip_to_offset(m, &offset);
1487 if (m == NULL) {
1488 goto bad_ipv6_packet;
1489 }
1490 data = mtod(m, u_char *) + offset;
1491 val = 0;
1492 if ((m->m_len - offset) >= (int)IP6_ADDRS_LEN) {
1493 /* this should be the normal case */
1494 for (i = 0, scan = (uint32_t *)data;
1495 i < (int)(IP6_ADDRS_LEN / sizeof(uint32_t));
1496 i++, scan++) {
1497 val ^= *scan;
1498 }
1499 } else {
1500 for (i = 0; i < (int)(IP6_ADDRS_LEN / sizeof(uint32_t)); i++) {
1501 uint32_t tmp;
1502 if (S_mbuf_copy_uint32(m, offset + i * sizeof(uint32_t),
1503 (uint32_t *)&tmp)) {
1504 goto bad_ipv6_packet;
1505 }
1506 val ^= tmp;
1507 }
1508 }
1509 return ntohl(val);
1510
1511 bad_ipv6_packet:
1512 return ether_header_hash(mtod(orig_m, struct ether_header *));
1513 }
1514
1515 static int
bond_output(struct ifnet * ifp,struct mbuf * m)1516 bond_output(struct ifnet * ifp, struct mbuf * m)
1517 {
1518 bpf_packet_func bpf_func;
1519 uint32_t h;
1520 ifbond_ref ifb;
1521 struct ifnet * port_ifp = NULL;
1522 int err;
1523 struct flowadv adv = { .code = FADV_SUCCESS };
1524
1525 if (m == 0) {
1526 return 0;
1527 }
1528 if ((m->m_flags & M_PKTHDR) == 0) {
1529 m_freem(m);
1530 return 0;
1531 }
1532 if (m->m_pkthdr.pkt_flowid != 0) {
1533 h = m->m_pkthdr.pkt_flowid;
1534 } else {
1535 struct ether_header * eh_p;
1536
1537 eh_p = mtod(m, struct ether_header *);
1538 switch (ntohs(eh_p->ether_type)) {
1539 case ETHERTYPE_IP:
1540 h = ip_header_hash(m);
1541 break;
1542 case ETHERTYPE_IPV6:
1543 h = ipv6_header_hash(m);
1544 break;
1545 default:
1546 h = ether_header_hash(eh_p);
1547 break;
1548 }
1549 }
1550 bond_lock();
1551 ifb = ifnet_softc(ifp);
1552 if (ifb == NULL || ifbond_flags_if_detaching(ifb)
1553 || ifb->ifb_distributing_count == 0) {
1554 goto done;
1555 }
1556 h %= ifb->ifb_distributing_count;
1557 port_ifp = ifb->ifb_distributing_array[h]->po_ifp;
1558 bpf_func = ifb->ifb_bpf_output;
1559 bond_unlock();
1560
1561 if (m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) {
1562 (void)ifnet_stat_increment_out(ifp, 1,
1563 m->m_pkthdr.len + ETHER_VLAN_ENCAP_LEN,
1564 0);
1565 } else {
1566 (void)ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0);
1567 }
1568 bond_bpf_output(ifp, m, bpf_func);
1569
1570 err = dlil_output(port_ifp, PF_BOND, m, NULL, NULL, 1, &adv);
1571
1572 if (err == 0) {
1573 if (adv.code == FADV_FLOW_CONTROLLED) {
1574 err = EQFULL;
1575 } else if (adv.code == FADV_SUSPENDED) {
1576 err = EQSUSPENDED;
1577 }
1578 }
1579
1580 return err;
1581
1582 done:
1583 bond_unlock();
1584 m_freem(m);
1585 return 0;
1586 }
1587
1588 static bondport_ref
ifbond_lookup_port(ifbond_ref ifb,struct ifnet * port_ifp)1589 ifbond_lookup_port(ifbond_ref ifb, struct ifnet * port_ifp)
1590 {
1591 bondport_ref p;
1592 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
1593 if (p->po_ifp == port_ifp) {
1594 return p;
1595 }
1596 }
1597 return NULL;
1598 }
1599
1600 static bondport_ref
bond_lookup_port(struct ifnet * port_ifp)1601 bond_lookup_port(struct ifnet * port_ifp)
1602 {
1603 ifbond_ref ifb;
1604 bondport_ref port;
1605
1606 TAILQ_FOREACH(ifb, &g_bond->ifbond_list, ifb_bond_list) {
1607 port = ifbond_lookup_port(ifb, port_ifp);
1608 if (port != NULL) {
1609 return port;
1610 }
1611 }
1612 return NULL;
1613 }
1614
1615 static void
bond_receive_lacpdu(struct mbuf * m,struct ifnet * port_ifp)1616 bond_receive_lacpdu(struct mbuf * m, struct ifnet * port_ifp)
1617 {
1618 struct ifnet * bond_ifp = NULL;
1619 ifbond_ref ifb;
1620 int event_code = 0;
1621 bool need_link_update = false;
1622 bondport_ref p;
1623
1624 bond_lock();
1625 if ((ifnet_eflags(port_ifp) & IFEF_BOND) == 0) {
1626 goto done;
1627 }
1628 p = bond_lookup_port(port_ifp);
1629 if (p == NULL) {
1630 goto done;
1631 }
1632 if (p->po_enabled == 0) {
1633 goto done;
1634 }
1635 ifb = p->po_bond;
1636 if (ifb->ifb_mode != IF_BOND_MODE_LACP) {
1637 goto done;
1638 }
1639 /*
1640 * Work-around for rdar://problem/51372042
1641 * Sometimes, the link comes up but the driver doesn't report the
1642 * negotiated medium at that time. When we receive an LACPDU packet,
1643 * and the medium is unknown, force a link status check. Don't force
1644 * the link status check more often than _FORCE_LINK_EVENT_INTERVAL
1645 * seconds.
1646 */
1647 #define _FORCE_LINK_EVENT_INTERVAL 1
1648 if (media_type_unknown(&p->po_media_info)) {
1649 uint64_t now = net_uptime();
1650
1651 if ((now - p->po_force_link_event_time) >=
1652 _FORCE_LINK_EVENT_INTERVAL) {
1653 need_link_update = true;
1654 p->po_force_link_event_time = now;
1655 }
1656 }
1657 bondport_receive_lacpdu(p, (lacpdu_ref)m->m_data);
1658 if (ifbond_selection(ifb)) {
1659 event_code = (ifb->ifb_active_lag == NULL)
1660 ? KEV_DL_LINK_OFF
1661 : KEV_DL_LINK_ON;
1662 /* XXX need to take a reference on bond_ifp */
1663 bond_ifp = ifb->ifb_ifp;
1664 ifb->ifb_last_link_event = event_code;
1665 } else {
1666 event_code = (ifb->ifb_active_lag == NULL)
1667 ? KEV_DL_LINK_OFF
1668 : KEV_DL_LINK_ON;
1669 if (event_code != ifb->ifb_last_link_event) {
1670 if (if_bond_debug) {
1671 timestamp_printf("%s: (receive) generating LINK event\n",
1672 ifb->ifb_name);
1673 }
1674 bond_ifp = ifb->ifb_ifp;
1675 ifb->ifb_last_link_event = event_code;
1676 }
1677 }
1678
1679 done:
1680 bond_unlock();
1681 if (bond_ifp != NULL) {
1682 interface_link_event(bond_ifp, event_code);
1683 }
1684 m_freem(m);
1685 if (need_link_update) {
1686 if (if_bond_debug != 0) {
1687 printf("bond: simulating link status changed event");
1688 }
1689 bond_handle_event(port_ifp, KEV_DL_LINK_ON);
1690 }
1691 return;
1692 }
1693
1694 static void
bond_receive_la_marker_pdu(struct mbuf * m,struct ifnet * port_ifp)1695 bond_receive_la_marker_pdu(struct mbuf * m, struct ifnet * port_ifp)
1696 {
1697 la_marker_pdu_ref marker_p;
1698 bondport_ref p;
1699
1700 marker_p = (la_marker_pdu_ref)(m->m_data + ETHER_HDR_LEN);
1701 if (marker_p->lm_marker_tlv_type != LA_MARKER_TLV_TYPE_MARKER) {
1702 goto failed;
1703 }
1704 bond_lock();
1705 if ((ifnet_eflags(port_ifp) & IFEF_BOND) == 0) {
1706 bond_unlock();
1707 goto failed;
1708 }
1709 p = bond_lookup_port(port_ifp);
1710 if (p == NULL || p->po_enabled == 0
1711 || p->po_bond->ifb_mode != IF_BOND_MODE_LACP) {
1712 bond_unlock();
1713 goto failed;
1714 }
1715 /* echo back the same packet as a marker response */
1716 marker_p->lm_marker_tlv_type = LA_MARKER_TLV_TYPE_MARKER_RESPONSE;
1717 bondport_slow_proto_transmit(p, (packet_buffer_ref)m);
1718 bond_unlock();
1719 return;
1720
1721 failed:
1722 m_freem(m);
1723 return;
1724 }
1725
1726 static void
bond_input(ifnet_t port_ifp,mbuf_t m,char * frame_header)1727 bond_input(ifnet_t port_ifp, mbuf_t m, char *frame_header)
1728 {
1729 bpf_packet_func bpf_func;
1730 const struct ether_header * eh_p;
1731 ifbond_ref ifb;
1732 struct ifnet * ifp;
1733 bondport_ref p;
1734
1735 eh_p = (const struct ether_header *)frame_header;
1736 if ((m->m_flags & M_MCAST) != 0
1737 && bcmp(eh_p->ether_dhost, &slow_proto_multicast,
1738 sizeof(eh_p->ether_dhost)) == 0
1739 && ntohs(eh_p->ether_type) == IEEE8023AD_SLOW_PROTO_ETHERTYPE) {
1740 u_char subtype = *mtod(m, u_char *);
1741
1742 if (subtype == IEEE8023AD_SLOW_PROTO_SUBTYPE_LACP) {
1743 if (m->m_pkthdr.len < (int)offsetof(lacpdu, la_reserved)) {
1744 m_freem(m);
1745 return;
1746 }
1747 /* send to lacp */
1748 if (m->m_len < (int)offsetof(lacpdu, la_reserved)) {
1749 m = m_pullup(m, offsetof(lacpdu, la_reserved));
1750 if (m == NULL) {
1751 return;
1752 }
1753 }
1754 bond_receive_lacpdu(m, port_ifp);
1755 return;
1756 } else if (subtype == IEEE8023AD_SLOW_PROTO_SUBTYPE_LA_MARKER_PROTOCOL) {
1757 int min_size;
1758
1759 /* restore the ethernet header pointer in the mbuf */
1760 m->m_pkthdr.len += ETHER_HDR_LEN;
1761 m->m_data -= ETHER_HDR_LEN;
1762 m->m_len += ETHER_HDR_LEN;
1763 min_size = ETHER_HDR_LEN + offsetof(la_marker_pdu, lm_reserved);
1764 if (m->m_pkthdr.len < min_size) {
1765 m_freem(m);
1766 return;
1767 }
1768 /* send to lacp */
1769 if (m->m_len < min_size) {
1770 m = m_pullup(m, min_size);
1771 if (m == NULL) {
1772 return;
1773 }
1774 }
1775 /* send to marker responder */
1776 bond_receive_la_marker_pdu(m, port_ifp);
1777 return;
1778 } else if (subtype == 0
1779 || subtype > IEEE8023AD_SLOW_PROTO_SUBTYPE_RESERVED_END) {
1780 /* invalid subtype, discard the frame */
1781 m_freem(m);
1782 return;
1783 }
1784 }
1785 bond_lock();
1786 if ((ifnet_eflags(port_ifp) & IFEF_BOND) == 0) {
1787 goto done;
1788 }
1789 p = bond_lookup_port(port_ifp);
1790 if (p == NULL || bondport_collecting(p) == 0) {
1791 goto done;
1792 }
1793
1794 ifb = p->po_bond;
1795 ifp = ifb->ifb_ifp;
1796 bpf_func = ifb->ifb_bpf_input;
1797 bond_unlock();
1798
1799 /*
1800 * Need to clear the promiscous flags otherwise it will be
1801 * dropped by DLIL after processing filters
1802 */
1803 if ((mbuf_flags(m) & MBUF_PROMISC)) {
1804 mbuf_setflags_mask(m, 0, MBUF_PROMISC);
1805 }
1806
1807 if (m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) {
1808 (void)ifnet_stat_increment_in(ifp, 1,
1809 (m->m_pkthdr.len + ETHER_HDR_LEN
1810 + ETHER_VLAN_ENCAP_LEN), 0);
1811 } else {
1812 (void)ifnet_stat_increment_in(ifp, 1,
1813 (m->m_pkthdr.len + ETHER_HDR_LEN), 0);
1814 }
1815
1816 /* make the packet appear as if it arrived on the bonded interface */
1817 m->m_pkthdr.rcvif = ifp;
1818 bond_bpf_input(ifp, m, eh_p, bpf_func);
1819 m->m_pkthdr.pkt_hdr = frame_header;
1820 dlil_input_packet_list(ifp, m);
1821 return;
1822
1823 done:
1824 bond_unlock();
1825 m_freem(m);
1826 return;
1827 }
1828
1829 static errno_t
bond_iff_input(void * cookie,ifnet_t port_ifp,protocol_family_t protocol,mbuf_t * data,char ** frame_header_ptr)1830 bond_iff_input(void *cookie, ifnet_t port_ifp, protocol_family_t protocol,
1831 mbuf_t *data, char **frame_header_ptr)
1832 {
1833 #pragma unused(cookie)
1834 #pragma unused(protocol)
1835 mbuf_t m = *data;
1836 char * frame_header = *frame_header_ptr;
1837
1838 bond_input(port_ifp, m, frame_header);
1839 return EJUSTRETURN;
1840 }
1841
1842 static __inline__ const char *
bondport_get_name(bondport_ref p)1843 bondport_get_name(bondport_ref p)
1844 {
1845 return p->po_name;
1846 }
1847
1848 static __inline__ int
bondport_get_index(bondport_ref p)1849 bondport_get_index(bondport_ref p)
1850 {
1851 return ifnet_index(p->po_ifp);
1852 }
1853
1854 static void
bondport_slow_proto_transmit(bondport_ref p,packet_buffer_ref buf)1855 bondport_slow_proto_transmit(bondport_ref p, packet_buffer_ref buf)
1856 {
1857 struct ether_header * eh_p;
1858 int error;
1859
1860 /* packet_buffer_allocate leaves room for ethernet header */
1861 eh_p = mtod(buf, struct ether_header *);
1862 bcopy(&slow_proto_multicast, &eh_p->ether_dhost, sizeof(eh_p->ether_dhost));
1863 bcopy(&p->po_saved_addr, eh_p->ether_shost, sizeof(eh_p->ether_shost));
1864 eh_p->ether_type = htons(IEEE8023AD_SLOW_PROTO_ETHERTYPE);
1865 error = ifnet_output_raw(p->po_ifp, PF_BOND, buf);
1866 if (error != 0) {
1867 printf("bondport_slow_proto_transmit(%s) failed %d\n",
1868 bondport_get_name(p), error);
1869 }
1870 return;
1871 }
1872
1873 static void
bondport_timer_process_func(devtimer_ref timer,devtimer_process_func_event event)1874 bondport_timer_process_func(devtimer_ref timer,
1875 devtimer_process_func_event event)
1876 {
1877 bondport_ref p;
1878
1879 switch (event) {
1880 case devtimer_process_func_event_lock:
1881 bond_lock();
1882 devtimer_retain(timer);
1883 break;
1884 case devtimer_process_func_event_unlock:
1885 if (devtimer_valid(timer)) {
1886 /* as long as the devtimer is valid, we can look at arg0 */
1887 int event_code = 0;
1888 struct ifnet * bond_ifp = NULL;
1889
1890 p = (bondport_ref)devtimer_arg0(timer);
1891 if (ifbond_selection(p->po_bond)) {
1892 event_code = (p->po_bond->ifb_active_lag == NULL)
1893 ? KEV_DL_LINK_OFF
1894 : KEV_DL_LINK_ON;
1895 /* XXX need to take a reference on bond_ifp */
1896 bond_ifp = p->po_bond->ifb_ifp;
1897 p->po_bond->ifb_last_link_event = event_code;
1898 } else {
1899 event_code = (p->po_bond->ifb_active_lag == NULL)
1900 ? KEV_DL_LINK_OFF
1901 : KEV_DL_LINK_ON;
1902 if (event_code != p->po_bond->ifb_last_link_event) {
1903 if (if_bond_debug) {
1904 timestamp_printf("%s: (timer) generating LINK event\n",
1905 p->po_bond->ifb_name);
1906 }
1907 bond_ifp = p->po_bond->ifb_ifp;
1908 p->po_bond->ifb_last_link_event = event_code;
1909 }
1910 }
1911 devtimer_release(timer);
1912 bond_unlock();
1913 if (bond_ifp != NULL) {
1914 interface_link_event(bond_ifp, event_code);
1915 }
1916 } else {
1917 /* timer is going away */
1918 devtimer_release(timer);
1919 bond_unlock();
1920 }
1921 break;
1922 default:
1923 break;
1924 }
1925 }
1926
1927 static bondport_ref
bondport_create(struct ifnet * port_ifp,lacp_port_priority priority,int active,int short_timeout,int * ret_error)1928 bondport_create(struct ifnet * port_ifp, lacp_port_priority priority,
1929 int active, int short_timeout, int * ret_error)
1930 {
1931 int error = 0;
1932 bondport_ref p = NULL;
1933 lacp_actor_partner_state s;
1934
1935 *ret_error = 0;
1936 p = kalloc_type(struct bondport_s, Z_WAITOK | Z_ZERO | Z_NOFAIL);
1937 multicast_list_init(&p->po_multicast);
1938 if ((u_int32_t)snprintf(p->po_name, sizeof(p->po_name), "%s%d",
1939 ifnet_name(port_ifp), ifnet_unit(port_ifp))
1940 >= sizeof(p->po_name)) {
1941 printf("if_bond: name too large\n");
1942 *ret_error = EINVAL;
1943 goto failed;
1944 }
1945 error = siocgifdevmtu(port_ifp, &p->po_devmtu);
1946 if (error != 0) {
1947 printf("if_bond: SIOCGIFDEVMTU %s failed, %d\n",
1948 bondport_get_name(p), error);
1949 goto failed;
1950 }
1951 /* remember the current interface MTU so it can be restored */
1952 p->po_devmtu.ifdm_current = ifnet_mtu(port_ifp);
1953 p->po_ifp = port_ifp;
1954 p->po_media_info = interface_media_info(port_ifp);
1955 p->po_current_while_timer = devtimer_create(bondport_timer_process_func, p);
1956 if (p->po_current_while_timer == NULL) {
1957 *ret_error = ENOMEM;
1958 goto failed;
1959 }
1960 p->po_periodic_timer = devtimer_create(bondport_timer_process_func, p);
1961 if (p->po_periodic_timer == NULL) {
1962 *ret_error = ENOMEM;
1963 goto failed;
1964 }
1965 p->po_wait_while_timer = devtimer_create(bondport_timer_process_func, p);
1966 if (p->po_wait_while_timer == NULL) {
1967 *ret_error = ENOMEM;
1968 goto failed;
1969 }
1970 p->po_transmit_timer = devtimer_create(bondport_timer_process_func, p);
1971 if (p->po_transmit_timer == NULL) {
1972 *ret_error = ENOMEM;
1973 goto failed;
1974 }
1975 p->po_receive_state = ReceiveState_none;
1976 p->po_mux_state = MuxState_none;
1977 p->po_priority = priority;
1978 s = 0;
1979 s = lacp_actor_partner_state_set_aggregatable(s);
1980 if (short_timeout) {
1981 s = lacp_actor_partner_state_set_short_timeout(s);
1982 }
1983 if (active) {
1984 s = lacp_actor_partner_state_set_active_lacp(s);
1985 }
1986 p->po_actor_state = s;
1987 return p;
1988
1989 failed:
1990 bondport_free(p);
1991 return NULL;
1992 }
1993
1994 static void
bondport_start(bondport_ref p)1995 bondport_start(bondport_ref p)
1996 {
1997 bondport_receive_machine(p, LAEventStart, NULL);
1998 bondport_mux_machine(p, LAEventStart, NULL);
1999 bondport_periodic_transmit_machine(p, LAEventStart, NULL);
2000 bondport_transmit_machine(p, LAEventStart, NULL);
2001 return;
2002 }
2003
2004 /*
2005 * Function: bondport_invalidate_timers
2006 * Purpose:
2007 * Invalidate all of the timers for the bondport.
2008 */
2009 static void
bondport_invalidate_timers(bondport_ref p)2010 bondport_invalidate_timers(bondport_ref p)
2011 {
2012 devtimer_invalidate(p->po_current_while_timer);
2013 devtimer_invalidate(p->po_periodic_timer);
2014 devtimer_invalidate(p->po_wait_while_timer);
2015 devtimer_invalidate(p->po_transmit_timer);
2016 }
2017
2018 /*
2019 * Function: bondport_cancel_timers
2020 * Purpose:
2021 * Cancel all of the timers for the bondport.
2022 */
2023 static void
bondport_cancel_timers(bondport_ref p)2024 bondport_cancel_timers(bondport_ref p)
2025 {
2026 devtimer_cancel(p->po_current_while_timer);
2027 devtimer_cancel(p->po_periodic_timer);
2028 devtimer_cancel(p->po_wait_while_timer);
2029 devtimer_cancel(p->po_transmit_timer);
2030 }
2031
2032 static void
bondport_free(bondport_ref p)2033 bondport_free(bondport_ref p)
2034 {
2035 multicast_list_remove(&p->po_multicast);
2036 devtimer_release(p->po_current_while_timer);
2037 devtimer_release(p->po_periodic_timer);
2038 devtimer_release(p->po_wait_while_timer);
2039 devtimer_release(p->po_transmit_timer);
2040 kfree_type(struct bondport_s, p);
2041 return;
2042 }
2043
2044 static __inline__ int
bond_device_mtu(struct ifnet * ifp,ifbond_ref ifb)2045 bond_device_mtu(struct ifnet * ifp, ifbond_ref ifb)
2046 {
2047 return ((int)ifnet_mtu(ifp) > ifb->ifb_altmtu)
2048 ? (int)ifnet_mtu(ifp) : ifb->ifb_altmtu;
2049 }
2050
2051 static int
bond_add_interface(struct ifnet * ifp,struct ifnet * port_ifp)2052 bond_add_interface(struct ifnet * ifp, struct ifnet * port_ifp)
2053 {
2054 u_int32_t eflags;
2055 uint32_t control_flags = 0;
2056 int devmtu;
2057 int error = 0;
2058 int event_code = 0;
2059 interface_filter_t filter = NULL;
2060 int first = FALSE;
2061 ifbond_ref ifb;
2062 bondport_ref * new_array = NULL;
2063 bondport_ref * old_array = NULL;
2064 bondport_ref p;
2065 int old_max = 0;
2066 int new_max = 0;
2067
2068 if (IFNET_IS_INTCOPROC(port_ifp)) {
2069 return EINVAL;
2070 }
2071
2072 /* pre-allocate space for new port */
2073 p = bondport_create(port_ifp, 0x8000, 1, 0, &error);
2074 if (p == NULL) {
2075 return error;
2076 }
2077 bond_lock();
2078 ifb = (ifbond_ref)ifnet_softc(ifp);
2079 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
2080 bond_unlock();
2081 bondport_free(p);
2082 return ifb == NULL ? EOPNOTSUPP : EBUSY;
2083 }
2084
2085 /* make sure this interface can handle our current MTU */
2086 devmtu = bond_device_mtu(ifp, ifb);
2087 if (devmtu != 0
2088 && (devmtu > p->po_devmtu.ifdm_max || devmtu < p->po_devmtu.ifdm_min)) {
2089 bond_unlock();
2090 printf("if_bond: interface %s doesn't support mtu %d",
2091 bondport_get_name(p), devmtu);
2092 bondport_free(p);
2093 return EINVAL;
2094 }
2095
2096 /* make sure ifb doesn't get de-allocated while we wait */
2097 ifbond_retain(ifb);
2098
2099 /* wait for other add or remove to complete */
2100 ifbond_wait(ifb, __func__);
2101
2102 if (ifbond_flags_if_detaching(ifb)) {
2103 /* someone destroyed the bond while we were waiting */
2104 error = EBUSY;
2105 goto signal_done;
2106 }
2107 if (bond_lookup_port(port_ifp) != NULL) {
2108 /* port is already part of a bond */
2109 error = EBUSY;
2110 goto signal_done;
2111 }
2112 if ((ifnet_eflags(port_ifp) & (IFEF_VLAN | IFEF_BOND)) != 0) {
2113 /* interface already has VLAN's, or is part of bond */
2114 error = EBUSY;
2115 goto signal_done;
2116 }
2117
2118 /* mark the interface busy */
2119 eflags = if_set_eflags(port_ifp, IFEF_BOND);
2120 if ((eflags & IFEF_VLAN) != 0) {
2121 /* vlan got in ahead of us */
2122 if_clear_eflags(port_ifp, IFEF_BOND);
2123 error = EBUSY;
2124 goto signal_done;
2125 }
2126
2127 if (TAILQ_EMPTY(&ifb->ifb_port_list)) {
2128 ifnet_set_offload(ifp, ifnet_offload(port_ifp));
2129 ifnet_set_flags(ifp, IFF_RUNNING, IFF_RUNNING);
2130 if (ifbond_flags_lladdr(ifb) == FALSE) {
2131 first = TRUE;
2132 }
2133 } else {
2134 ifnet_offload_t ifp_offload;
2135 ifnet_offload_t port_ifp_offload;
2136
2137 ifp_offload = ifnet_offload(ifp);
2138 port_ifp_offload = ifnet_offload(port_ifp);
2139 if (ifp_offload != port_ifp_offload) {
2140 ifnet_offload_t offload;
2141
2142 offload = ifp_offload & port_ifp_offload;
2143 printf("%s(%s, %s) "
2144 "hwassist values don't match 0x%x != 0x%x, using 0x%x instead\n",
2145 __func__,
2146 ifb->ifb_name, bondport_get_name(p),
2147 ifp_offload, port_ifp_offload, offload);
2148 /*
2149 * XXX
2150 * if the bond has VLAN's, we can't simply change the hwassist
2151 * field behind its back: this needs work
2152 */
2153 ifnet_set_offload(ifp, offload);
2154 }
2155 }
2156 p->po_bond = ifb;
2157
2158 /* remember the port's ethernet address so it can be restored */
2159 ether_addr_copy(&p->po_saved_addr, IF_LLADDR(port_ifp));
2160
2161 /* add it to the list of ports */
2162 TAILQ_INSERT_TAIL(&ifb->ifb_port_list, p, po_port_list);
2163 ifb->ifb_port_count++;
2164
2165 bond_unlock();
2166
2167
2168 /* first port added to bond determines bond's ethernet address */
2169 if (first) {
2170 ifnet_set_lladdr_and_type(ifp, IF_LLADDR(port_ifp), ETHER_ADDR_LEN,
2171 IFT_ETHER);
2172 }
2173 uint32_bit_set(&control_flags, PORT_CONTROL_FLAGS_IN_LIST);
2174
2175 /* allocate a larger distributing array */
2176 new_max = ifb->ifb_port_count;
2177 new_array = kalloc_type(bondport_ref, new_max, Z_WAITOK);
2178 if (new_array == NULL) {
2179 error = ENOMEM;
2180 goto failed;
2181 }
2182
2183 /* attach our BOND "protocol" to the interface */
2184 error = bond_attach_protocol(port_ifp);
2185 if (error) {
2186 goto failed;
2187 }
2188 uint32_bit_set(&control_flags, PORT_CONTROL_FLAGS_PROTO_ATTACHED);
2189
2190 /* attach our BOND interface filter */
2191 error = bond_attach_filter(port_ifp, &filter);
2192 if (error != 0) {
2193 goto failed;
2194 }
2195 uint32_bit_set(&control_flags, PORT_CONTROL_FLAGS_FILTER_ATTACHED);
2196
2197 /* set the interface MTU */
2198 devmtu = bond_device_mtu(ifp, ifb);
2199 error = siocsifmtu(port_ifp, devmtu);
2200 if (error != 0) {
2201 printf("%s(%s, %s):"
2202 " SIOCSIFMTU %d failed %d\n",
2203 __func__,
2204 ifb->ifb_name, bondport_get_name(p), devmtu, error);
2205 goto failed;
2206 }
2207 uint32_bit_set(&control_flags, PORT_CONTROL_FLAGS_MTU_SET);
2208
2209 /* program the port with our multicast addresses */
2210 error = multicast_list_program(&p->po_multicast, ifp, port_ifp);
2211 if (error) {
2212 printf("%s(%s, %s): multicast_list_program failed %d\n",
2213 __func__,
2214 ifb->ifb_name, bondport_get_name(p), error);
2215 goto failed;
2216 }
2217
2218 /* mark the interface up */
2219 ifnet_set_flags(port_ifp, IFF_UP, IFF_UP);
2220
2221 error = ifnet_ioctl(port_ifp, 0, SIOCSIFFLAGS, NULL);
2222 if (error != 0) {
2223 printf("%s(%s, %s): SIOCSIFFLAGS failed %d\n",
2224 __func__,
2225 ifb->ifb_name, bondport_get_name(p), error);
2226 goto failed;
2227 }
2228
2229 /* re-program the port's ethernet address */
2230 error = if_siflladdr(port_ifp,
2231 (const struct ether_addr *)IF_LLADDR(ifp));
2232 if (error == 0) {
2233 if (memcmp(IF_LLADDR(ifp), IF_LLADDR(port_ifp), ETHER_ADDR_LEN)
2234 != 0) {
2235 /* it lied, it really doesn't support setting lladdr */
2236 error = EOPNOTSUPP;
2237 }
2238 }
2239 if (error != 0) {
2240 /* port doesn't support setting the link address */
2241 printf("%s(%s, %s): if_siflladdr failed %d\n",
2242 __func__,
2243 ifb->ifb_name, bondport_get_name(p), error);
2244 error = ifnet_set_promiscuous(port_ifp, 1);
2245 if (error != 0) {
2246 /* port doesn't support setting promiscuous mode */
2247 printf("%s(%s, %s): set promiscuous failed %d\n",
2248 __func__,
2249 ifb->ifb_name, bondport_get_name(p), error);
2250 goto failed;
2251 }
2252 uint32_bit_set(&control_flags,
2253 PORT_CONTROL_FLAGS_PROMISCUOUS_SET);
2254 } else {
2255 uint32_bit_set(&control_flags,
2256 PORT_CONTROL_FLAGS_LLADDR_SET);
2257 }
2258
2259 /* if we're in promiscuous mode, enable that as well */
2260 if (ifbond_flags_promisc(ifb)) {
2261 error = ifnet_set_promiscuous(port_ifp, 1);
2262 if (error != 0) {
2263 /* port doesn't support setting promiscuous mode */
2264 printf("%s(%s, %s): set promiscuous failed %d\n",
2265 __func__,
2266 ifb->ifb_name, bondport_get_name(p), error);
2267 goto failed;
2268 }
2269 uint32_bit_set(&control_flags,
2270 PORT_CONTROL_FLAGS_BOND_PROMISCUOUS_SET);
2271 }
2272
2273 bond_lock();
2274
2275 /* no failures past this point */
2276 p->po_enabled = 1;
2277 p->po_control_flags = control_flags;
2278
2279 /* copy the contents of the existing distributing array */
2280 if (ifb->ifb_distributing_count) {
2281 bcopy(ifb->ifb_distributing_array, new_array,
2282 sizeof(*new_array) * ifb->ifb_distributing_count);
2283 }
2284 old_array = ifb->ifb_distributing_array;
2285 old_max = ifb->ifb_distributing_max;
2286 ifb->ifb_distributing_array = new_array;
2287 ifb->ifb_distributing_max = new_max;
2288
2289 if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
2290 bondport_start(p);
2291
2292 /* check if we need to generate a link status event */
2293 if (ifbond_selection(ifb)) {
2294 event_code = (ifb->ifb_active_lag == NULL)
2295 ? KEV_DL_LINK_OFF
2296 : KEV_DL_LINK_ON;
2297 ifb->ifb_last_link_event = event_code;
2298 }
2299 } else {
2300 /* are we adding the first distributing interface? */
2301 if (media_active(&p->po_media_info)) {
2302 if (ifb->ifb_distributing_count == 0) {
2303 ifb->ifb_last_link_event = event_code = KEV_DL_LINK_ON;
2304 }
2305 bondport_enable_distributing(p);
2306 } else {
2307 bondport_disable_distributing(p);
2308 }
2309 }
2310 p->po_filter = filter;
2311
2312 /* clear the busy state, and wakeup anyone waiting */
2313 ifbond_signal(ifb, __func__);
2314 bond_unlock();
2315 if (event_code != 0) {
2316 interface_link_event(ifp, event_code);
2317 }
2318 kfree_type(bondport_ref, old_max, old_array);
2319 return 0;
2320
2321 failed:
2322 bond_assert_lock_not_held();
2323
2324 /* if this was the first port to be added, clear our address */
2325 if (first) {
2326 ifnet_set_lladdr_and_type(ifp, NULL, 0, IFT_IEEE8023ADLAG);
2327 }
2328
2329 kfree_type(bondport_ref, new_max, new_array);
2330 if (uint32_bit_is_set(control_flags,
2331 PORT_CONTROL_FLAGS_LLADDR_SET)) {
2332 int error1;
2333
2334 error1 = if_siflladdr(port_ifp, &p->po_saved_addr);
2335 if (error1 != 0) {
2336 printf("%s(%s, %s): if_siflladdr restore failed %d\n",
2337 __func__,
2338 ifb->ifb_name, bondport_get_name(p), error1);
2339 }
2340 }
2341 if (uint32_bit_is_set(control_flags,
2342 PORT_CONTROL_FLAGS_PROMISCUOUS_SET)) {
2343 int error1;
2344
2345 error1 = ifnet_set_promiscuous(port_ifp, 0);
2346 if (error1 != 0) {
2347 printf("%s(%s, %s): promiscous mode disable failed %d\n",
2348 __func__,
2349 ifb->ifb_name, bondport_get_name(p), error1);
2350 }
2351 }
2352 if (uint32_bit_is_set(control_flags,
2353 PORT_CONTROL_FLAGS_BOND_PROMISCUOUS_SET)) {
2354 int error1;
2355
2356 error1 = ifnet_set_promiscuous(port_ifp, 0);
2357 if (error1 != 0) {
2358 printf("%s(%s, %s): promiscous mode disable failed %d\n",
2359 __func__,
2360 ifb->ifb_name, bondport_get_name(p), error1);
2361 }
2362 }
2363 if (uint32_bit_is_set(control_flags,
2364 PORT_CONTROL_FLAGS_PROTO_ATTACHED)) {
2365 (void)bond_detach_protocol(port_ifp);
2366 }
2367 if (uint32_bit_is_set(control_flags,
2368 PORT_CONTROL_FLAGS_FILTER_ATTACHED)) {
2369 iflt_detach(filter);
2370 }
2371 if (uint32_bit_is_set(control_flags,
2372 PORT_CONTROL_FLAGS_MTU_SET)) {
2373 int error1;
2374
2375 error1 = siocsifmtu(port_ifp, p->po_devmtu.ifdm_current);
2376 if (error1 != 0) {
2377 printf("%s(%s, %s): SIOCSIFMTU %d failed %d\n",
2378 __func__,
2379 ifb->ifb_name, bondport_get_name(p),
2380 p->po_devmtu.ifdm_current, error1);
2381 }
2382 }
2383 bond_lock();
2384 if (uint32_bit_is_set(control_flags,
2385 PORT_CONTROL_FLAGS_IN_LIST)) {
2386 TAILQ_REMOVE(&ifb->ifb_port_list, p, po_port_list);
2387 ifb->ifb_port_count--;
2388 }
2389 if_clear_eflags(ifp, IFEF_BOND);
2390 if (TAILQ_EMPTY(&ifb->ifb_port_list)) {
2391 ifb->ifb_altmtu = 0;
2392 ifnet_set_mtu(ifp, ETHERMTU);
2393 ifnet_set_offload(ifp, 0);
2394 }
2395
2396 signal_done:
2397 ifbond_signal(ifb, __func__);
2398 bond_unlock();
2399 ifbond_release(ifb);
2400 bondport_free(p);
2401 return error;
2402 }
2403
2404 static int
bond_remove_interface(ifbond_ref ifb,struct ifnet * port_ifp)2405 bond_remove_interface(ifbond_ref ifb, struct ifnet * port_ifp)
2406 {
2407 int active_lag = 0;
2408 int error = 0;
2409 int event_code = 0;
2410 bondport_ref head_port;
2411 struct ifnet * ifp;
2412 interface_filter_t filter;
2413 int last = FALSE;
2414 int new_link_address = FALSE;
2415 bondport_ref p;
2416 lacp_actor_partner_state s;
2417 int was_distributing;
2418
2419 bond_assert_lock_held();
2420
2421 ifbond_retain(ifb);
2422 ifbond_wait(ifb, "bond_remove_interface");
2423
2424 p = ifbond_lookup_port(ifb, port_ifp);
2425 if (p == NULL) {
2426 error = ENXIO;
2427 /* it got removed by another thread */
2428 goto signal_done;
2429 }
2430
2431 /* de-select it and remove it from the lists */
2432 was_distributing = bondport_flags_distributing(p);
2433 bondport_disable_distributing(p);
2434 if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
2435 bondport_set_selected(p, SelectedState_UNSELECTED);
2436 active_lag = bondport_remove_from_LAG(p);
2437 /* invalidate timers here while holding the bond_lock */
2438 bondport_invalidate_timers(p);
2439
2440 /* announce that we're Individual now */
2441 s = p->po_actor_state;
2442 s = lacp_actor_partner_state_set_individual(s);
2443 s = lacp_actor_partner_state_set_not_collecting(s);
2444 s = lacp_actor_partner_state_set_not_distributing(s);
2445 s = lacp_actor_partner_state_set_out_of_sync(s);
2446 p->po_actor_state = s;
2447 bondport_flags_set_ntt(p);
2448 }
2449
2450 TAILQ_REMOVE(&ifb->ifb_port_list, p, po_port_list);
2451 ifb->ifb_port_count--;
2452
2453 ifp = ifb->ifb_ifp;
2454 head_port = TAILQ_FIRST(&ifb->ifb_port_list);
2455 if (head_port == NULL) {
2456 ifnet_set_flags(ifp, 0, IFF_RUNNING);
2457 if (ifbond_flags_lladdr(ifb) == FALSE) {
2458 last = TRUE;
2459 }
2460 ifnet_set_offload(ifp, 0);
2461 ifnet_set_mtu(ifp, ETHERMTU);
2462 ifb->ifb_altmtu = 0;
2463 } else if (ifbond_flags_lladdr(ifb) == FALSE
2464 && bcmp(&p->po_saved_addr, IF_LLADDR(ifp),
2465 ETHER_ADDR_LEN) == 0) {
2466 new_link_address = TRUE;
2467 }
2468 /* check if we need to generate a link status event */
2469 if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
2470 if (ifbond_selection(ifb) || active_lag) {
2471 event_code = (ifb->ifb_active_lag == NULL)
2472 ? KEV_DL_LINK_OFF
2473 : KEV_DL_LINK_ON;
2474 ifb->ifb_last_link_event = event_code;
2475 }
2476 bondport_transmit_machine(p, LAEventStart,
2477 TRANSMIT_MACHINE_TX_IMMEDIATE);
2478 } else {
2479 /* are we removing the last distributing interface? */
2480 if (was_distributing && ifb->ifb_distributing_count == 0) {
2481 ifb->ifb_last_link_event = event_code = KEV_DL_LINK_OFF;
2482 }
2483 }
2484 filter = p->po_filter;
2485 bond_unlock();
2486
2487 if (last) {
2488 ifnet_set_lladdr_and_type(ifp, NULL, 0, IFT_IEEE8023ADLAG);
2489 } else if (new_link_address) {
2490 struct ifnet * scan_ifp;
2491 bondport_ref scan_port;
2492
2493 /* ifbond_wait() allows port list traversal without holding the lock */
2494
2495 /* this port gave the bond its ethernet address, switch to new one */
2496 ifnet_set_lladdr_and_type(ifp,
2497 &head_port->po_saved_addr, ETHER_ADDR_LEN,
2498 IFT_ETHER);
2499
2500 /* re-program each port with the new link address */
2501 TAILQ_FOREACH(scan_port, &ifb->ifb_port_list, po_port_list) {
2502 scan_ifp = scan_port->po_ifp;
2503
2504 if (!uint32_bit_is_set(scan_port->po_control_flags,
2505 PORT_CONTROL_FLAGS_LLADDR_SET)) {
2506 /* port doesn't support setting lladdr */
2507 continue;
2508 }
2509 error = if_siflladdr(scan_ifp,
2510 (const struct ether_addr *) IF_LLADDR(ifp));
2511 if (error != 0) {
2512 printf("%s(%s, %s): "
2513 "if_siflladdr (%s) failed %d\n",
2514 __func__,
2515 ifb->ifb_name, bondport_get_name(p),
2516 bondport_get_name(scan_port), error);
2517 }
2518 }
2519 }
2520
2521 /* restore the port's ethernet address */
2522 if (uint32_bit_is_set(p->po_control_flags,
2523 PORT_CONTROL_FLAGS_LLADDR_SET)) {
2524 error = if_siflladdr(port_ifp, &p->po_saved_addr);
2525 if (error != 0) {
2526 printf("%s(%s, %s): if_siflladdr failed %d\n",
2527 __func__,
2528 ifb->ifb_name, bondport_get_name(p), error);
2529 }
2530 }
2531
2532 /* disable promiscous mode (if we enabled it) */
2533 if (uint32_bit_is_set(p->po_control_flags,
2534 PORT_CONTROL_FLAGS_PROMISCUOUS_SET)) {
2535 error = ifnet_set_promiscuous(port_ifp, 0);
2536 if (error != 0) {
2537 printf("%s(%s, %s): disable promiscuous failed %d\n",
2538 __func__,
2539 ifb->ifb_name, bondport_get_name(p), error);
2540 }
2541 }
2542
2543 /* disable promiscous mode from bond (if we enabled it) */
2544 if (uint32_bit_is_set(p->po_control_flags,
2545 PORT_CONTROL_FLAGS_BOND_PROMISCUOUS_SET)) {
2546 error = ifnet_set_promiscuous(port_ifp, 0);
2547 if (error != 0) {
2548 printf("%s(%s, %s): disable promiscuous failed %d\n",
2549 __func__,
2550 ifb->ifb_name, bondport_get_name(p), error);
2551 }
2552 }
2553
2554 /* restore the port's MTU */
2555 error = siocsifmtu(port_ifp, p->po_devmtu.ifdm_current);
2556 if (error != 0) {
2557 printf("%s(%s, %s): SIOCSIFMTU %d failed %d\n",
2558 __func__,
2559 ifb->ifb_name, bondport_get_name(p),
2560 p->po_devmtu.ifdm_current, error);
2561 }
2562
2563 /* remove the bond "protocol" */
2564 bond_detach_protocol(port_ifp);
2565
2566 /* detach the filter */
2567 if (filter != NULL) {
2568 iflt_detach(filter);
2569 }
2570
2571 /* generate link event */
2572 if (event_code != 0) {
2573 interface_link_event(ifp, event_code);
2574 }
2575
2576 bond_lock();
2577 bondport_free(p);
2578 if_clear_eflags(port_ifp, IFEF_BOND);
2579 /* release this bondport's reference to the ifbond */
2580 ifbond_release(ifb);
2581
2582 signal_done:
2583 ifbond_signal(ifb, __func__);
2584 ifbond_release(ifb);
2585 return error;
2586 }
2587
2588 static void
bond_set_lacp_mode(ifbond_ref ifb)2589 bond_set_lacp_mode(ifbond_ref ifb)
2590 {
2591 bondport_ref p;
2592
2593 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
2594 bondport_disable_distributing(p);
2595 bondport_start(p);
2596 }
2597 return;
2598 }
2599
2600 static void
bond_set_static_mode(ifbond_ref ifb)2601 bond_set_static_mode(ifbond_ref ifb)
2602 {
2603 bondport_ref p;
2604 lacp_actor_partner_state s;
2605
2606 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
2607 bondport_disable_distributing(p);
2608 bondport_set_selected(p, SelectedState_UNSELECTED);
2609 (void)bondport_remove_from_LAG(p);
2610 bondport_cancel_timers(p);
2611
2612 /* announce that we're Individual now */
2613 s = p->po_actor_state;
2614 s = lacp_actor_partner_state_set_individual(s);
2615 s = lacp_actor_partner_state_set_not_collecting(s);
2616 s = lacp_actor_partner_state_set_not_distributing(s);
2617 s = lacp_actor_partner_state_set_out_of_sync(s);
2618 p->po_actor_state = s;
2619 bondport_flags_set_ntt(p);
2620 bondport_transmit_machine(p, LAEventStart,
2621 TRANSMIT_MACHINE_TX_IMMEDIATE);
2622 /* clear state */
2623 p->po_actor_state = 0;
2624 bzero(&p->po_partner_state, sizeof(p->po_partner_state));
2625
2626 if (media_active(&p->po_media_info)) {
2627 bondport_enable_distributing(p);
2628 } else {
2629 bondport_disable_distributing(p);
2630 }
2631 }
2632 return;
2633 }
2634
2635 static int
bond_set_mode(struct ifnet * ifp,int mode)2636 bond_set_mode(struct ifnet * ifp, int mode)
2637 {
2638 int error = 0;
2639 int event_code = 0;
2640 ifbond_ref ifb;
2641
2642 bond_lock();
2643 ifb = (ifbond_ref)ifnet_softc(ifp);
2644 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
2645 bond_unlock();
2646 return (ifb == NULL) ? EOPNOTSUPP : EBUSY;
2647 }
2648 if (ifb->ifb_mode == mode) {
2649 bond_unlock();
2650 return 0;
2651 }
2652
2653 ifbond_retain(ifb);
2654 ifbond_wait(ifb, "bond_set_mode");
2655
2656 /* verify (again) that the mode is actually different */
2657 if (ifb->ifb_mode == mode) {
2658 /* nothing to do */
2659 goto signal_done;
2660 }
2661
2662 ifb->ifb_mode = mode;
2663 if (mode == IF_BOND_MODE_LACP) {
2664 bond_set_lacp_mode(ifb);
2665
2666 /* check if we need to generate a link status event */
2667 if (ifbond_selection(ifb)) {
2668 event_code = (ifb->ifb_active_lag == NULL)
2669 ? KEV_DL_LINK_OFF
2670 : KEV_DL_LINK_ON;
2671 }
2672 } else {
2673 bond_set_static_mode(ifb);
2674 event_code = (ifb->ifb_distributing_count == 0)
2675 ? KEV_DL_LINK_OFF
2676 : KEV_DL_LINK_ON;
2677 }
2678 ifb->ifb_last_link_event = event_code;
2679
2680 signal_done:
2681 ifbond_signal(ifb, __func__);
2682 bond_unlock();
2683 ifbond_release(ifb);
2684
2685 if (event_code != 0) {
2686 interface_link_event(ifp, event_code);
2687 }
2688 return error;
2689 }
2690
2691 static int
bond_get_status(ifbond_ref ifb,struct if_bond_req * ibr_p,user_addr_t datap)2692 bond_get_status(ifbond_ref ifb, struct if_bond_req * ibr_p, user_addr_t datap)
2693 {
2694 int count;
2695 user_addr_t dst;
2696 int error = 0;
2697 struct if_bond_status_req * ibsr;
2698 struct if_bond_status ibs;
2699 bondport_ref port;
2700
2701 ibsr = &(ibr_p->ibr_ibru.ibru_status);
2702 if (ibsr->ibsr_version != IF_BOND_STATUS_REQ_VERSION) {
2703 return EINVAL;
2704 }
2705 ibsr->ibsr_key = ifb->ifb_key;
2706 ibsr->ibsr_mode = ifb->ifb_mode;
2707 ibsr->ibsr_total = ifb->ifb_port_count;
2708 dst = proc_is64bit(current_proc())
2709 ? ibsr->ibsr_ibsru.ibsru_buffer64
2710 : CAST_USER_ADDR_T(ibsr->ibsr_ibsru.ibsru_buffer);
2711 if (dst == USER_ADDR_NULL) {
2712 /* just want to know how many there are */
2713 goto done;
2714 }
2715 if (ibsr->ibsr_count < 0) {
2716 return EINVAL;
2717 }
2718 count = (ifb->ifb_port_count < ibsr->ibsr_count)
2719 ? ifb->ifb_port_count : ibsr->ibsr_count;
2720 TAILQ_FOREACH(port, &ifb->ifb_port_list, po_port_list) {
2721 struct if_bond_partner_state * ibps_p;
2722 partner_state_ref ps;
2723
2724 if (count == 0) {
2725 break;
2726 }
2727 bzero(&ibs, sizeof(ibs));
2728 strlcpy(ibs.ibs_if_name, port->po_name, sizeof(ibs.ibs_if_name));
2729 ibs.ibs_port_priority = port->po_priority;
2730 if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
2731 ibs.ibs_state = port->po_actor_state;
2732 ibs.ibs_selected_state = port->po_selected;
2733 ps = &port->po_partner_state;
2734 ibps_p = &ibs.ibs_partner_state;
2735 ibps_p->ibps_system = ps->ps_lag_info.li_system;
2736 ibps_p->ibps_system_priority = ps->ps_lag_info.li_system_priority;
2737 ibps_p->ibps_key = ps->ps_lag_info.li_key;
2738 ibps_p->ibps_port = ps->ps_port;
2739 ibps_p->ibps_port_priority = ps->ps_port_priority;
2740 ibps_p->ibps_state = ps->ps_state;
2741 } else {
2742 /* fake the selected information */
2743 ibs.ibs_selected_state = bondport_flags_distributing(port)
2744 ? SelectedState_SELECTED : SelectedState_UNSELECTED;
2745 }
2746 error = copyout(&ibs, dst, sizeof(ibs));
2747 if (error != 0) {
2748 break;
2749 }
2750 dst += sizeof(ibs);
2751 count--;
2752 }
2753
2754 done:
2755 if (error == 0) {
2756 error = copyout(ibr_p, datap, sizeof(*ibr_p));
2757 } else {
2758 (void)copyout(ibr_p, datap, sizeof(*ibr_p));
2759 }
2760 return error;
2761 }
2762
2763 static int
bond_set_promisc(struct ifnet * ifp)2764 bond_set_promisc(struct ifnet * ifp)
2765 {
2766 int error = 0;
2767 ifbond_ref ifb;
2768 bool is_promisc;
2769 bondport_ref p;
2770 int val;
2771
2772 is_promisc = (ifnet_flags(ifp) & IFF_PROMISC) != 0;
2773
2774 /* determine whether promiscuous state needs to be changed */
2775 bond_lock();
2776 ifb = (ifbond_ref)ifnet_softc(ifp);
2777 if (ifb == NULL) {
2778 bond_unlock();
2779 error = EBUSY;
2780 goto done;
2781 }
2782 if (is_promisc == ifbond_flags_promisc(ifb)) {
2783 /* already in the right state */
2784 bond_unlock();
2785 goto done;
2786 }
2787 ifbond_retain(ifb);
2788 ifbond_wait(ifb, __func__);
2789 if (ifbond_flags_if_detaching(ifb)) {
2790 /* someone destroyed the bond while we were waiting */
2791 error = EBUSY;
2792 goto signal_done;
2793 }
2794 bond_unlock();
2795
2796 /* update the promiscuous state of each memeber */
2797 val = is_promisc ? 1 : 0;
2798 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
2799 struct ifnet * port_ifp = p->po_ifp;
2800 bool port_is_promisc;
2801
2802 port_is_promisc = uint32_bit_is_set(p->po_control_flags,
2803 PORT_CONTROL_FLAGS_BOND_PROMISCUOUS_SET);
2804 if (port_is_promisc == is_promisc) {
2805 /* already in the right state */
2806 continue;
2807 }
2808 error = ifnet_set_promiscuous(port_ifp, val);
2809 if (error != 0) {
2810 printf("%s: ifnet_set_promiscuous(%s, %d): failed %d",
2811 ifb->ifb_name, port_ifp->if_xname, val, error);
2812 continue;
2813 }
2814 printf("%s: ifnet_set_promiscuous(%s, %d): succeeded",
2815 ifb->ifb_name, port_ifp->if_xname, val);
2816 if (is_promisc) {
2817 /* remember that we set it */
2818 uint32_bit_set(&p->po_control_flags,
2819 PORT_CONTROL_FLAGS_BOND_PROMISCUOUS_SET);
2820 } else {
2821 uint32_bit_clear(&p->po_control_flags,
2822 PORT_CONTROL_FLAGS_BOND_PROMISCUOUS_SET);
2823 }
2824 }
2825
2826 /* assume that updating promiscuous state succeeded */
2827 error = 0;
2828 bond_lock();
2829
2830 /* update our internal state */
2831 if (is_promisc) {
2832 ifbond_flags_set_promisc(ifb);
2833 } else {
2834 ifbond_flags_clear_promisc(ifb);
2835 }
2836
2837 signal_done:
2838 ifbond_signal(ifb, __func__);
2839 bond_unlock();
2840 ifbond_release(ifb);
2841
2842 done:
2843 return error;
2844 }
2845
2846 static void
bond_get_mtu_values(ifbond_ref ifb,int * ret_min,int * ret_max)2847 bond_get_mtu_values(ifbond_ref ifb, int * ret_min, int * ret_max)
2848 {
2849 int mtu_min = 0;
2850 int mtu_max = 0;
2851 bondport_ref p;
2852
2853 if (TAILQ_FIRST(&ifb->ifb_port_list) != NULL) {
2854 mtu_min = IF_MINMTU;
2855 }
2856 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
2857 struct ifdevmtu * devmtu_p = &p->po_devmtu;
2858
2859 if (devmtu_p->ifdm_min > mtu_min) {
2860 mtu_min = devmtu_p->ifdm_min;
2861 }
2862 if (mtu_max == 0 || devmtu_p->ifdm_max < mtu_max) {
2863 mtu_max = devmtu_p->ifdm_max;
2864 }
2865 }
2866 *ret_min = mtu_min;
2867 *ret_max = mtu_max;
2868 return;
2869 }
2870
2871 static int
bond_set_mtu_on_ports(ifbond_ref ifb,int mtu)2872 bond_set_mtu_on_ports(ifbond_ref ifb, int mtu)
2873 {
2874 int error = 0;
2875 bondport_ref p;
2876
2877 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
2878 error = siocsifmtu(p->po_ifp, mtu);
2879 if (error != 0) {
2880 printf("if_bond(%s): SIOCSIFMTU %s failed, %d\n",
2881 ifb->ifb_name, bondport_get_name(p), error);
2882 break;
2883 }
2884 }
2885 return error;
2886 }
2887
2888 static int
bond_set_mtu(struct ifnet * ifp,int mtu,int isdevmtu)2889 bond_set_mtu(struct ifnet * ifp, int mtu, int isdevmtu)
2890 {
2891 int error = 0;
2892 ifbond_ref ifb;
2893 int mtu_min;
2894 int mtu_max;
2895 int new_max;
2896 int old_max;
2897
2898 bond_lock();
2899 ifb = (ifbond_ref)ifnet_softc(ifp);
2900 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
2901 error = (ifb == NULL) ? EOPNOTSUPP : EBUSY;
2902 goto done;
2903 }
2904 ifbond_retain(ifb);
2905 ifbond_wait(ifb, "bond_set_mtu");
2906
2907 /* check again */
2908 if (ifnet_softc(ifp) == NULL || ifbond_flags_if_detaching(ifb)) {
2909 error = EBUSY;
2910 goto signal_done;
2911 }
2912 bond_get_mtu_values(ifb, &mtu_min, &mtu_max);
2913 if (mtu > mtu_max) {
2914 error = EINVAL;
2915 goto signal_done;
2916 }
2917 if (mtu < mtu_min && (isdevmtu == 0 || mtu != 0)) {
2918 /* allow SIOCSIFALTMTU to set the mtu to 0 */
2919 error = EINVAL;
2920 goto signal_done;
2921 }
2922 if (isdevmtu) {
2923 new_max = (mtu > (int)ifnet_mtu(ifp)) ? mtu : (int)ifnet_mtu(ifp);
2924 } else {
2925 new_max = (mtu > ifb->ifb_altmtu) ? mtu : ifb->ifb_altmtu;
2926 }
2927 old_max = ((int)ifnet_mtu(ifp) > ifb->ifb_altmtu)
2928 ? (int)ifnet_mtu(ifp) : ifb->ifb_altmtu;
2929 if (new_max != old_max) {
2930 /* we can safely walk the list of port without the lock held */
2931 bond_unlock();
2932 error = bond_set_mtu_on_ports(ifb, new_max);
2933 if (error != 0) {
2934 /* try our best to back out of it */
2935 (void)bond_set_mtu_on_ports(ifb, old_max);
2936 }
2937 bond_lock();
2938 }
2939 if (error == 0) {
2940 if (isdevmtu) {
2941 ifb->ifb_altmtu = mtu;
2942 } else {
2943 ifnet_set_mtu(ifp, mtu);
2944 }
2945 }
2946
2947 signal_done:
2948 ifbond_signal(ifb, __func__);
2949 ifbond_release(ifb);
2950
2951 done:
2952 bond_unlock();
2953 return error;
2954 }
2955
2956 static int
bond_ioctl(struct ifnet * ifp,u_long cmd,void * data)2957 bond_ioctl(struct ifnet *ifp, u_long cmd, void * data)
2958 {
2959 int error = 0;
2960 struct if_bond_req ibr;
2961 struct ifaddr * ifa;
2962 ifbond_ref ifb;
2963 struct ifreq * ifr;
2964 struct ifmediareq *ifmr;
2965 struct ifnet * port_ifp = NULL;
2966 user_addr_t user_addr;
2967
2968 if (ifnet_type(ifp) != IFT_IEEE8023ADLAG) {
2969 return EOPNOTSUPP;
2970 }
2971 ifr = (struct ifreq *)data;
2972 ifa = (struct ifaddr *)data;
2973
2974 switch (cmd) {
2975 case SIOCSIFADDR:
2976 ifnet_set_flags(ifp, IFF_UP, IFF_UP);
2977 break;
2978
2979 case SIOCGIFMEDIA32:
2980 case SIOCGIFMEDIA64:
2981 bond_lock();
2982 ifb = (ifbond_ref)ifnet_softc(ifp);
2983 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
2984 bond_unlock();
2985 return ifb == NULL ? EOPNOTSUPP : EBUSY;
2986 }
2987 ifmr = (struct ifmediareq *)data;
2988 ifmr->ifm_current = IFM_ETHER;
2989 ifmr->ifm_mask = 0;
2990 ifmr->ifm_status = IFM_AVALID;
2991 ifmr->ifm_active = IFM_ETHER;
2992 ifmr->ifm_count = 1;
2993 if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
2994 if (ifb->ifb_active_lag != NULL) {
2995 ifmr->ifm_active = ifb->ifb_active_lag->lag_active_media;
2996 ifmr->ifm_status |= IFM_ACTIVE;
2997 }
2998 } else if (ifb->ifb_distributing_count > 0) {
2999 ifmr->ifm_active
3000 = ifb->ifb_distributing_array[0]->po_media_info.mi_active;
3001 ifmr->ifm_status |= IFM_ACTIVE;
3002 }
3003 bond_unlock();
3004 user_addr = (cmd == SIOCGIFMEDIA64) ?
3005 ((struct ifmediareq64 *)ifmr)->ifmu_ulist :
3006 CAST_USER_ADDR_T(((struct ifmediareq32 *)ifmr)->ifmu_ulist);
3007 if (user_addr != USER_ADDR_NULL) {
3008 error = copyout(&ifmr->ifm_current,
3009 user_addr,
3010 sizeof(int));
3011 }
3012 break;
3013
3014 case SIOCSIFMEDIA:
3015 /* XXX send the SIFMEDIA to all children? Or force autoselect? */
3016 error = EINVAL;
3017 break;
3018
3019 case SIOCGIFDEVMTU:
3020 bond_lock();
3021 ifb = (ifbond_ref)ifnet_softc(ifp);
3022 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
3023 bond_unlock();
3024 error = (ifb == NULL) ? EOPNOTSUPP : EBUSY;
3025 break;
3026 }
3027 ifr->ifr_devmtu.ifdm_current = bond_device_mtu(ifp, ifb);
3028 bond_get_mtu_values(ifb, &ifr->ifr_devmtu.ifdm_min,
3029 &ifr->ifr_devmtu.ifdm_max);
3030 bond_unlock();
3031 break;
3032
3033 case SIOCGIFALTMTU:
3034 bond_lock();
3035 ifb = (ifbond_ref)ifnet_softc(ifp);
3036 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
3037 bond_unlock();
3038 error = (ifb == NULL) ? EOPNOTSUPP : EBUSY;
3039 break;
3040 }
3041 ifr->ifr_mtu = ifb->ifb_altmtu;
3042 bond_unlock();
3043 break;
3044
3045 case SIOCSIFALTMTU:
3046 error = bond_set_mtu(ifp, ifr->ifr_mtu, 1);
3047 break;
3048
3049 case SIOCSIFMTU:
3050 error = bond_set_mtu(ifp, ifr->ifr_mtu, 0);
3051 break;
3052
3053 case SIOCSIFBOND:
3054 user_addr = proc_is64bit(current_proc())
3055 ? ifr->ifr_data64 : CAST_USER_ADDR_T(ifr->ifr_data);
3056 error = copyin(user_addr, &ibr, sizeof(ibr));
3057 if (error) {
3058 break;
3059 }
3060 switch (ibr.ibr_op) {
3061 case IF_BOND_OP_ADD_INTERFACE:
3062 case IF_BOND_OP_REMOVE_INTERFACE:
3063 port_ifp = ifunit(ibr.ibr_ibru.ibru_if_name);
3064 if (port_ifp == NULL) {
3065 error = ENXIO;
3066 break;
3067 }
3068 if (ifnet_type(port_ifp) != IFT_ETHER) {
3069 error = EPROTONOSUPPORT;
3070 break;
3071 }
3072 break;
3073 case IF_BOND_OP_SET_VERBOSE:
3074 case IF_BOND_OP_SET_MODE:
3075 break;
3076 default:
3077 error = EOPNOTSUPP;
3078 break;
3079 }
3080 if (error != 0) {
3081 break;
3082 }
3083 switch (ibr.ibr_op) {
3084 case IF_BOND_OP_ADD_INTERFACE:
3085 error = bond_add_interface(ifp, port_ifp);
3086 break;
3087 case IF_BOND_OP_REMOVE_INTERFACE:
3088 bond_lock();
3089 ifb = (ifbond_ref)ifnet_softc(ifp);
3090 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
3091 bond_unlock();
3092 return ifb == NULL ? EOPNOTSUPP : EBUSY;
3093 }
3094 error = bond_remove_interface(ifb, port_ifp);
3095 bond_unlock();
3096 break;
3097 case IF_BOND_OP_SET_VERBOSE:
3098 bond_lock();
3099 if_bond_debug = ibr.ibr_ibru.ibru_int_val;
3100 bond_unlock();
3101 break;
3102 case IF_BOND_OP_SET_MODE:
3103 switch (ibr.ibr_ibru.ibru_int_val) {
3104 case IF_BOND_MODE_LACP:
3105 case IF_BOND_MODE_STATIC:
3106 break;
3107 default:
3108 error = EINVAL;
3109 break;
3110 }
3111 if (error != 0) {
3112 break;
3113 }
3114 error = bond_set_mode(ifp, ibr.ibr_ibru.ibru_int_val);
3115 break;
3116 }
3117 break; /* SIOCSIFBOND */
3118
3119 case SIOCGIFBOND:
3120 user_addr = proc_is64bit(current_proc())
3121 ? ifr->ifr_data64 : CAST_USER_ADDR_T(ifr->ifr_data);
3122 error = copyin(user_addr, &ibr, sizeof(ibr));
3123 if (error) {
3124 break;
3125 }
3126 switch (ibr.ibr_op) {
3127 case IF_BOND_OP_GET_STATUS:
3128 break;
3129 default:
3130 error = EOPNOTSUPP;
3131 break;
3132 }
3133 if (error != 0) {
3134 break;
3135 }
3136 bond_lock();
3137 ifb = (ifbond_ref)ifnet_softc(ifp);
3138 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
3139 bond_unlock();
3140 return ifb == NULL ? EOPNOTSUPP : EBUSY;
3141 }
3142 switch (ibr.ibr_op) {
3143 case IF_BOND_OP_GET_STATUS:
3144 error = bond_get_status(ifb, &ibr, user_addr);
3145 break;
3146 }
3147 bond_unlock();
3148 break; /* SIOCGIFBOND */
3149
3150 case SIOCSIFLLADDR:
3151 error = EOPNOTSUPP;
3152 break;
3153
3154 case SIOCSIFFLAGS:
3155 /* enable promiscuous mode on members */
3156 error = bond_set_promisc(ifp);
3157 break;
3158
3159 case SIOCADDMULTI:
3160 case SIOCDELMULTI:
3161 error = bond_setmulti(ifp);
3162 break;
3163 default:
3164 error = EOPNOTSUPP;
3165 }
3166 return error;
3167 }
3168
3169 static void
bond_if_free(struct ifnet * ifp)3170 bond_if_free(struct ifnet * ifp)
3171 {
3172 ifbond_ref ifb;
3173
3174 if (ifp == NULL) {
3175 return;
3176 }
3177 bond_lock();
3178 ifb = (ifbond_ref)ifnet_softc(ifp);
3179 if (ifb == NULL) {
3180 bond_unlock();
3181 return;
3182 }
3183 ifbond_release(ifb);
3184 bond_unlock();
3185 ifnet_release(ifp);
3186 return;
3187 }
3188
3189 static void
bond_handle_event(struct ifnet * port_ifp,int event_code)3190 bond_handle_event(struct ifnet * port_ifp, int event_code)
3191 {
3192 struct ifnet * bond_ifp = NULL;
3193 ifbond_ref ifb;
3194 int old_distributing_count;
3195 bondport_ref p;
3196 struct media_info media_info = { .mi_active = 0, .mi_status = 0 };
3197
3198 switch (event_code) {
3199 case KEV_DL_IF_DETACHED:
3200 case KEV_DL_IF_DETACHING:
3201 break;
3202 case KEV_DL_LINK_OFF:
3203 case KEV_DL_LINK_ON:
3204 media_info = interface_media_info(port_ifp);
3205 break;
3206 default:
3207 return;
3208 }
3209 bond_lock();
3210 p = bond_lookup_port(port_ifp);
3211 if (p == NULL) {
3212 bond_unlock();
3213 return;
3214 }
3215 ifb = p->po_bond;
3216 old_distributing_count = ifb->ifb_distributing_count;
3217 switch (event_code) {
3218 case KEV_DL_IF_DETACHED:
3219 case KEV_DL_IF_DETACHING:
3220 bond_remove_interface(ifb, p->po_ifp);
3221 break;
3222 case KEV_DL_LINK_OFF:
3223 case KEV_DL_LINK_ON:
3224 p->po_media_info = media_info;
3225 if (p->po_enabled) {
3226 bondport_link_status_changed(p);
3227 }
3228 break;
3229 }
3230 /* generate a link-event */
3231 if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
3232 if (ifbond_selection(ifb)) {
3233 event_code = (ifb->ifb_active_lag == NULL)
3234 ? KEV_DL_LINK_OFF
3235 : KEV_DL_LINK_ON;
3236 /* XXX need to take a reference on bond_ifp */
3237 bond_ifp = ifb->ifb_ifp;
3238 ifb->ifb_last_link_event = event_code;
3239 } else {
3240 event_code = (ifb->ifb_active_lag == NULL)
3241 ? KEV_DL_LINK_OFF
3242 : KEV_DL_LINK_ON;
3243 if (event_code != ifb->ifb_last_link_event) {
3244 if (if_bond_debug) {
3245 timestamp_printf("%s: (event) generating LINK event\n",
3246 ifb->ifb_name);
3247 }
3248 bond_ifp = ifb->ifb_ifp;
3249 ifb->ifb_last_link_event = event_code;
3250 }
3251 }
3252 } else {
3253 /*
3254 * if the distributing array membership changed from 0 <-> !0
3255 * generate a link event
3256 */
3257 if (old_distributing_count == 0
3258 && ifb->ifb_distributing_count != 0) {
3259 event_code = KEV_DL_LINK_ON;
3260 } else if (old_distributing_count != 0
3261 && ifb->ifb_distributing_count == 0) {
3262 event_code = KEV_DL_LINK_OFF;
3263 }
3264 if (event_code != 0 && event_code != ifb->ifb_last_link_event) {
3265 bond_ifp = ifb->ifb_ifp;
3266 ifb->ifb_last_link_event = event_code;
3267 }
3268 }
3269
3270 bond_unlock();
3271 if (bond_ifp != NULL) {
3272 interface_link_event(bond_ifp, event_code);
3273 }
3274 return;
3275 }
3276
3277 static void
bond_iff_event(__unused void * cookie,ifnet_t port_ifp,__unused protocol_family_t protocol,const struct kev_msg * event)3278 bond_iff_event(__unused void *cookie, ifnet_t port_ifp,
3279 __unused protocol_family_t protocol,
3280 const struct kev_msg *event)
3281 {
3282 int event_code;
3283
3284 if (event->vendor_code != KEV_VENDOR_APPLE
3285 || event->kev_class != KEV_NETWORK_CLASS
3286 || event->kev_subclass != KEV_DL_SUBCLASS) {
3287 return;
3288 }
3289 event_code = event->event_code;
3290 switch (event_code) {
3291 case KEV_DL_LINK_OFF:
3292 case KEV_DL_LINK_ON:
3293 case KEV_DL_IF_DETACHING:
3294 case KEV_DL_IF_DETACHED:
3295 bond_handle_event(port_ifp, event_code);
3296 break;
3297 default:
3298 break;
3299 }
3300 return;
3301 }
3302
3303 static void
bond_iff_detached(__unused void * cookie,ifnet_t port_ifp)3304 bond_iff_detached(__unused void *cookie, ifnet_t port_ifp)
3305 {
3306 bond_handle_event(port_ifp, KEV_DL_IF_DETACHED);
3307 return;
3308 }
3309
3310 static void
interface_link_event(struct ifnet * ifp,u_int32_t event_code)3311 interface_link_event(struct ifnet * ifp, u_int32_t event_code)
3312 {
3313 struct event {
3314 u_int32_t ifnet_family;
3315 u_int32_t unit;
3316 char if_name[IFNAMSIZ];
3317 };
3318 _Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
3319 struct kern_event_msg *header = (struct kern_event_msg*)message;
3320 struct event *data = (struct event *)(header + 1);
3321
3322 header->total_size = sizeof(message);
3323 header->vendor_code = KEV_VENDOR_APPLE;
3324 header->kev_class = KEV_NETWORK_CLASS;
3325 header->kev_subclass = KEV_DL_SUBCLASS;
3326 header->event_code = event_code;
3327 data->ifnet_family = ifnet_family(ifp);
3328 data->unit = (u_int32_t)ifnet_unit(ifp);
3329 strlcpy(data->if_name, ifnet_name(ifp), IFNAMSIZ);
3330 ifnet_event(ifp, header);
3331 }
3332
3333 static errno_t
bond_proto_input(ifnet_t ifp,protocol_family_t protocol,mbuf_t packet,char * header)3334 bond_proto_input(ifnet_t ifp, protocol_family_t protocol, mbuf_t packet,
3335 char *header)
3336 {
3337 #pragma unused(protocol, packet, header)
3338 if (if_bond_debug != 0) {
3339 printf("%s: unexpected packet from %s\n", __func__,
3340 ifp->if_xname);
3341 }
3342 return 0;
3343 }
3344
3345
3346 /*
3347 * Function: bond_attach_protocol
3348 * Purpose:
3349 * Attach a DLIL protocol to the interface.
3350 *
3351 * The ethernet demux special cases to always return PF_BOND if the
3352 * interface is bonded. That means we receive all traffic from that
3353 * interface without passing any of the traffic to any other attached
3354 * protocol.
3355 */
3356 static int
bond_attach_protocol(struct ifnet * ifp)3357 bond_attach_protocol(struct ifnet *ifp)
3358 {
3359 int error;
3360 struct ifnet_attach_proto_param reg;
3361
3362 bzero(®, sizeof(reg));
3363 reg.input = bond_proto_input;
3364
3365 error = ifnet_attach_protocol(ifp, PF_BOND, ®);
3366 if (error) {
3367 printf("bond over %s%d: ifnet_attach_protocol failed, %d\n",
3368 ifnet_name(ifp), ifnet_unit(ifp), error);
3369 }
3370 return error;
3371 }
3372
3373 /*
3374 * Function: bond_detach_protocol
3375 * Purpose:
3376 * Detach our DLIL protocol from an interface
3377 */
3378 static int
bond_detach_protocol(struct ifnet * ifp)3379 bond_detach_protocol(struct ifnet *ifp)
3380 {
3381 int error;
3382
3383 error = ifnet_detach_protocol(ifp, PF_BOND);
3384 if (error) {
3385 printf("bond over %s%d: ifnet_detach_protocol failed, %d\n",
3386 ifnet_name(ifp), ifnet_unit(ifp), error);
3387 }
3388 return error;
3389 }
3390
3391 /*
3392 * Function: bond_attach_filter
3393 * Purpose:
3394 * Attach our DLIL interface filter.
3395 */
3396 static int
bond_attach_filter(struct ifnet * ifp,interface_filter_t * filter_p)3397 bond_attach_filter(struct ifnet *ifp, interface_filter_t * filter_p)
3398 {
3399 int error;
3400 struct iff_filter iff;
3401
3402 /*
3403 * install an interface filter
3404 */
3405 memset(&iff, 0, sizeof(struct iff_filter));
3406 iff.iff_name = "com.apple.kernel.bsd.net.if_bond";
3407 iff.iff_input = bond_iff_input;
3408 iff.iff_event = bond_iff_event;
3409 iff.iff_detached = bond_iff_detached;
3410 error = dlil_attach_filter(ifp, &iff, filter_p,
3411 DLIL_IFF_TSO | DLIL_IFF_INTERNAL);
3412 if (error != 0) {
3413 printf("%s: dlil_attach_filter failed %d\n", __func__, error);
3414 }
3415 return error;
3416 }
3417
3418
3419 /*
3420 * DLIL interface family functions
3421 */
3422 extern int ether_attach_inet(ifnet_t ifp, protocol_family_t protocol_family);
3423 extern void ether_detach_inet(ifnet_t ifp, protocol_family_t protocol_family);
3424 extern int ether_attach_inet6(ifnet_t ifp, protocol_family_t protocol_family);
3425 extern void ether_detach_inet6(ifnet_t ifp, protocol_family_t protocol_family);
3426 extern int ether_attach_at(ifnet_t ifp, protocol_family_t protocol_family);
3427 extern void ether_detach_at(ifnet_t ifp, protocol_family_t protocol_family);
3428
3429 __private_extern__ int
bond_family_init(void)3430 bond_family_init(void)
3431 {
3432 int error = 0;
3433
3434 error = proto_register_plumber(PF_INET, APPLE_IF_FAM_BOND,
3435 ether_attach_inet,
3436 ether_detach_inet);
3437 if (error != 0) {
3438 printf("bond: proto_register_plumber failed for AF_INET error=%d\n",
3439 error);
3440 goto done;
3441 }
3442 error = proto_register_plumber(PF_INET6, APPLE_IF_FAM_BOND,
3443 ether_attach_inet6,
3444 ether_detach_inet6);
3445 if (error != 0) {
3446 printf("bond: proto_register_plumber failed for AF_INET6 error=%d\n",
3447 error);
3448 goto done;
3449 }
3450 error = bond_clone_attach();
3451 if (error != 0) {
3452 printf("bond: proto_register_plumber failed bond_clone_attach error=%d\n",
3453 error);
3454 goto done;
3455 }
3456
3457 done:
3458 return error;
3459 }
3460 /**
3461 **
3462 ** LACP routines:
3463 **
3464 **/
3465
3466 /**
3467 ** LACP ifbond_list routines
3468 **/
3469 static bondport_ref
ifbond_list_find_moved_port(bondport_ref rx_port,const lacp_actor_partner_tlv_ref atlv)3470 ifbond_list_find_moved_port(bondport_ref rx_port,
3471 const lacp_actor_partner_tlv_ref atlv)
3472 {
3473 ifbond_ref bond;
3474 bondport_ref p;
3475 partner_state_ref ps;
3476 LAG_info_ref ps_li;
3477
3478 TAILQ_FOREACH(bond, &g_bond->ifbond_list, ifb_bond_list) {
3479 TAILQ_FOREACH(p, &bond->ifb_port_list, po_port_list) {
3480 if (rx_port == p) {
3481 /* no point in comparing against ourselves */
3482 continue;
3483 }
3484 if (p->po_receive_state != ReceiveState_PORT_DISABLED) {
3485 /* it's not clear that we should be checking this */
3486 continue;
3487 }
3488 ps = &p->po_partner_state;
3489 if (lacp_actor_partner_state_defaulted(ps->ps_state)) {
3490 continue;
3491 }
3492 ps_li = &ps->ps_lag_info;
3493 if (ps->ps_port == lacp_actor_partner_tlv_get_port(atlv)
3494 && bcmp(&ps_li->li_system, atlv->lap_system,
3495 sizeof(ps_li->li_system)) == 0) {
3496 if (if_bond_debug) {
3497 timestamp_printf("System " EA_FORMAT
3498 " Port 0x%x moved from %s to %s\n",
3499 EA_LIST(&ps_li->li_system), ps->ps_port,
3500 bondport_get_name(p),
3501 bondport_get_name(rx_port));
3502 }
3503 return p;
3504 }
3505 }
3506 }
3507 return NULL;
3508 }
3509
3510 /**
3511 ** LACP ifbond, LAG routines
3512 **/
3513
3514 static int
ifbond_selection(ifbond_ref bond)3515 ifbond_selection(ifbond_ref bond)
3516 {
3517 int all_ports_ready = 0;
3518 int active_media = 0;
3519 LAG_ref lag = NULL;
3520 int lag_changed = 0;
3521 bondport_ref p;
3522 int port_speed = 0;
3523
3524 lag = ifbond_find_best_LAG(bond, &active_media);
3525 if (lag != bond->ifb_active_lag) {
3526 if (bond->ifb_active_lag != NULL) {
3527 ifbond_deactivate_LAG(bond, bond->ifb_active_lag);
3528 bond->ifb_active_lag = NULL;
3529 }
3530 bond->ifb_active_lag = lag;
3531 if (lag != NULL) {
3532 ifbond_activate_LAG(bond, lag, active_media);
3533 }
3534 lag_changed = 1;
3535 } else if (lag != NULL) {
3536 if (lag->lag_active_media != active_media) {
3537 if (if_bond_debug) {
3538 timestamp_printf("LAG PORT SPEED CHANGED from %d to %d\n",
3539 link_speed(lag->lag_active_media),
3540 link_speed(active_media));
3541 }
3542 ifbond_deactivate_LAG(bond, lag);
3543 ifbond_activate_LAG(bond, lag, active_media);
3544 lag_changed = 1;
3545 }
3546 }
3547 if (lag != NULL) {
3548 port_speed = link_speed(active_media);
3549 all_ports_ready = ifbond_all_ports_ready(bond);
3550 }
3551 TAILQ_FOREACH(p, &bond->ifb_port_list, po_port_list) {
3552 if (lag != NULL && p->po_lag == lag
3553 && media_speed(&p->po_media_info) == port_speed
3554 && (p->po_mux_state == MuxState_DETACHED
3555 || p->po_selected == SelectedState_SELECTED
3556 || p->po_selected == SelectedState_STANDBY)
3557 && bondport_aggregatable(p)) {
3558 if (bond->ifb_max_active > 0) {
3559 if (lag->lag_selected_port_count < bond->ifb_max_active) {
3560 if (p->po_selected == SelectedState_STANDBY
3561 || p->po_selected == SelectedState_UNSELECTED) {
3562 bondport_set_selected(p, SelectedState_SELECTED);
3563 }
3564 } else if (p->po_selected == SelectedState_UNSELECTED) {
3565 bondport_set_selected(p, SelectedState_STANDBY);
3566 }
3567 } else {
3568 bondport_set_selected(p, SelectedState_SELECTED);
3569 }
3570 }
3571 if (bondport_flags_selected_changed(p)) {
3572 bondport_flags_clear_selected_changed(p);
3573 bondport_mux_machine(p, LAEventSelectedChange, NULL);
3574 }
3575 if (all_ports_ready
3576 && bondport_flags_ready(p)
3577 && p->po_mux_state == MuxState_WAITING) {
3578 bondport_mux_machine(p, LAEventReady, NULL);
3579 }
3580 bondport_transmit_machine(p, LAEventStart, NULL);
3581 }
3582 return lag_changed;
3583 }
3584
3585 static LAG_ref
ifbond_find_best_LAG(ifbond_ref bond,int * active_media)3586 ifbond_find_best_LAG(ifbond_ref bond, int * active_media)
3587 {
3588 int best_active = 0;
3589 LAG_ref best_lag = NULL;
3590 int best_count = 0;
3591 int best_speed = 0;
3592 LAG_ref lag;
3593
3594 if (bond->ifb_active_lag != NULL) {
3595 best_lag = bond->ifb_active_lag;
3596 best_count = LAG_get_aggregatable_port_count(best_lag, &best_active);
3597 if (bond->ifb_max_active > 0
3598 && best_count > bond->ifb_max_active) {
3599 best_count = bond->ifb_max_active;
3600 }
3601 best_speed = link_speed(best_active);
3602 }
3603 TAILQ_FOREACH(lag, &bond->ifb_lag_list, lag_list) {
3604 int active;
3605 int count;
3606 int speed;
3607
3608 if (lag == bond->ifb_active_lag) {
3609 /* we've already computed it */
3610 continue;
3611 }
3612 count = LAG_get_aggregatable_port_count(lag, &active);
3613 if (count == 0) {
3614 continue;
3615 }
3616 if (bond->ifb_max_active > 0
3617 && count > bond->ifb_max_active) {
3618 /* if there's a limit, don't count extra links */
3619 count = bond->ifb_max_active;
3620 }
3621 speed = link_speed(active);
3622 if ((count * speed) > (best_count * best_speed)) {
3623 best_count = count;
3624 best_speed = speed;
3625 best_active = active;
3626 best_lag = lag;
3627 }
3628 }
3629 if (best_count == 0) {
3630 return NULL;
3631 }
3632 *active_media = best_active;
3633 return best_lag;
3634 }
3635
3636 static void
ifbond_deactivate_LAG(__unused ifbond_ref bond,LAG_ref lag)3637 ifbond_deactivate_LAG(__unused ifbond_ref bond, LAG_ref lag)
3638 {
3639 bondport_ref p;
3640
3641 TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) {
3642 bondport_set_selected(p, SelectedState_UNSELECTED);
3643 }
3644 return;
3645 }
3646
3647 static void
ifbond_activate_LAG(ifbond_ref bond,LAG_ref lag,int active_media)3648 ifbond_activate_LAG(ifbond_ref bond, LAG_ref lag, int active_media)
3649 {
3650 int need = 0;
3651 bondport_ref p;
3652
3653 if (bond->ifb_max_active > 0) {
3654 need = bond->ifb_max_active;
3655 }
3656 lag->lag_active_media = active_media;
3657 TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) {
3658 if (bondport_aggregatable(p) == 0) {
3659 bondport_set_selected(p, SelectedState_UNSELECTED);
3660 } else if (media_speed(&p->po_media_info) != link_speed(active_media)) {
3661 bondport_set_selected(p, SelectedState_UNSELECTED);
3662 } else if (p->po_mux_state == MuxState_DETACHED) {
3663 if (bond->ifb_max_active > 0) {
3664 if (need > 0) {
3665 bondport_set_selected(p, SelectedState_SELECTED);
3666 need--;
3667 } else {
3668 bondport_set_selected(p, SelectedState_STANDBY);
3669 }
3670 } else {
3671 bondport_set_selected(p, SelectedState_SELECTED);
3672 }
3673 } else {
3674 bondport_set_selected(p, SelectedState_UNSELECTED);
3675 }
3676 }
3677 return;
3678 }
3679
3680 #if 0
3681 static void
3682 ifbond_set_max_active(ifbond_ref bond, int max_active)
3683 {
3684 LAG_ref lag = bond->ifb_active_lag;
3685
3686 bond->ifb_max_active = max_active;
3687 if (bond->ifb_max_active <= 0 || lag == NULL) {
3688 return;
3689 }
3690 if (lag->lag_selected_port_count > bond->ifb_max_active) {
3691 bondport_ref p;
3692 int remove_count;
3693
3694 remove_count = lag->lag_selected_port_count - bond->ifb_max_active;
3695 TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) {
3696 if (p->po_selected == SelectedState_SELECTED) {
3697 bondport_set_selected(p, SelectedState_UNSELECTED);
3698 remove_count--;
3699 if (remove_count == 0) {
3700 break;
3701 }
3702 }
3703 }
3704 }
3705 return;
3706 }
3707 #endif
3708
3709 static int
ifbond_all_ports_ready(ifbond_ref bond)3710 ifbond_all_ports_ready(ifbond_ref bond)
3711 {
3712 int ready = 0;
3713 bondport_ref p;
3714
3715 if (bond->ifb_active_lag == NULL) {
3716 return 0;
3717 }
3718 TAILQ_FOREACH(p, &bond->ifb_active_lag->lag_port_list, po_lag_port_list) {
3719 if (p->po_mux_state == MuxState_WAITING
3720 && p->po_selected == SelectedState_SELECTED) {
3721 if (bondport_flags_ready(p) == 0) {
3722 return 0;
3723 }
3724 }
3725 /* note that there was at least one ready port */
3726 ready = 1;
3727 }
3728 return ready;
3729 }
3730
3731 static int
ifbond_all_ports_attached(ifbond_ref bond,bondport_ref this_port)3732 ifbond_all_ports_attached(ifbond_ref bond, bondport_ref this_port)
3733 {
3734 bondport_ref p;
3735
3736 TAILQ_FOREACH(p, &bond->ifb_port_list, po_port_list) {
3737 if (this_port == p) {
3738 continue;
3739 }
3740 if (bondport_flags_mux_attached(p) == 0) {
3741 return 0;
3742 }
3743 }
3744 return 1;
3745 }
3746
3747 static LAG_ref
ifbond_get_LAG_matching_port(ifbond_ref bond,bondport_ref p)3748 ifbond_get_LAG_matching_port(ifbond_ref bond, bondport_ref p)
3749 {
3750 LAG_ref lag;
3751
3752 TAILQ_FOREACH(lag, &bond->ifb_lag_list, lag_list) {
3753 if (bcmp(&lag->lag_info, &p->po_partner_state.ps_lag_info,
3754 sizeof(lag->lag_info)) == 0) {
3755 return lag;
3756 }
3757 }
3758 return NULL;
3759 }
3760
3761 static int
LAG_get_aggregatable_port_count(LAG_ref lag,int * active_media)3762 LAG_get_aggregatable_port_count(LAG_ref lag, int * active_media)
3763 {
3764 int active;
3765 int count;
3766 bondport_ref p;
3767 int speed;
3768
3769 active = 0;
3770 count = 0;
3771 speed = 0;
3772 TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) {
3773 if (bondport_aggregatable(p)) {
3774 int this_speed;
3775
3776 this_speed = media_speed(&p->po_media_info);
3777 if (this_speed == 0) {
3778 continue;
3779 }
3780 if (this_speed > speed) {
3781 active = p->po_media_info.mi_active;
3782 speed = this_speed;
3783 count = 1;
3784 } else if (this_speed == speed) {
3785 count++;
3786 }
3787 }
3788 }
3789 *active_media = active;
3790 return count;
3791 }
3792
3793
3794 /**
3795 ** LACP bondport routines
3796 **/
3797 static void
bondport_link_status_changed(bondport_ref p)3798 bondport_link_status_changed(bondport_ref p)
3799 {
3800 ifbond_ref bond = p->po_bond;
3801
3802 if (if_bond_debug) {
3803 if (media_active(&p->po_media_info)) {
3804 const char * duplex_string;
3805
3806 if (media_full_duplex(&p->po_media_info)) {
3807 duplex_string = "full";
3808 } else if (media_type_unknown(&p->po_media_info)) {
3809 duplex_string = "unknown";
3810 } else {
3811 duplex_string = "half";
3812 }
3813 timestamp_printf("[%s] Link UP %d Mbit/s %s duplex\n",
3814 bondport_get_name(p),
3815 media_speed(&p->po_media_info),
3816 duplex_string);
3817 } else {
3818 timestamp_printf("[%s] Link DOWN\n",
3819 bondport_get_name(p));
3820 }
3821 }
3822 if (bond->ifb_mode == IF_BOND_MODE_LACP) {
3823 if (media_active(&p->po_media_info)
3824 && bond->ifb_active_lag != NULL
3825 && p->po_lag == bond->ifb_active_lag
3826 && p->po_selected != SelectedState_UNSELECTED) {
3827 if (media_speed(&p->po_media_info) != p->po_lag->lag_active_media) {
3828 if (if_bond_debug) {
3829 timestamp_printf("[%s] Port speed %d differs from LAG %d\n",
3830 bondport_get_name(p),
3831 media_speed(&p->po_media_info),
3832 link_speed(p->po_lag->lag_active_media));
3833 }
3834 bondport_set_selected(p, SelectedState_UNSELECTED);
3835 }
3836 }
3837 bondport_receive_machine(p, LAEventMediaChange, NULL);
3838 bondport_mux_machine(p, LAEventMediaChange, NULL);
3839 bondport_periodic_transmit_machine(p, LAEventMediaChange, NULL);
3840 } else {
3841 if (media_active(&p->po_media_info)) {
3842 bondport_enable_distributing(p);
3843 } else {
3844 bondport_disable_distributing(p);
3845 }
3846 }
3847 return;
3848 }
3849
3850 static int
bondport_aggregatable(bondport_ref p)3851 bondport_aggregatable(bondport_ref p)
3852 {
3853 partner_state_ref ps = &p->po_partner_state;
3854
3855 if (lacp_actor_partner_state_aggregatable(p->po_actor_state) == 0
3856 || lacp_actor_partner_state_aggregatable(ps->ps_state) == 0) {
3857 /* we and/or our partner are individual */
3858 return 0;
3859 }
3860 if (p->po_lag == NULL) {
3861 return 0;
3862 }
3863 switch (p->po_receive_state) {
3864 default:
3865 if (if_bond_debug) {
3866 timestamp_printf("[%s] Port is not selectable\n",
3867 bondport_get_name(p));
3868 }
3869 return 0;
3870 case ReceiveState_CURRENT:
3871 case ReceiveState_EXPIRED:
3872 break;
3873 }
3874 return 1;
3875 }
3876
3877 static int
bondport_matches_LAG(bondport_ref p,LAG_ref lag)3878 bondport_matches_LAG(bondport_ref p, LAG_ref lag)
3879 {
3880 LAG_info_ref lag_li;
3881 partner_state_ref ps;
3882 LAG_info_ref ps_li;
3883
3884 ps = &p->po_partner_state;
3885 ps_li = &ps->ps_lag_info;
3886 lag_li = &lag->lag_info;
3887 if (ps_li->li_system_priority == lag_li->li_system_priority
3888 && ps_li->li_key == lag_li->li_key
3889 && (bcmp(&ps_li->li_system, &lag_li->li_system,
3890 sizeof(lag_li->li_system))
3891 == 0)) {
3892 return 1;
3893 }
3894 return 0;
3895 }
3896
3897 static int
bondport_remove_from_LAG(bondport_ref p)3898 bondport_remove_from_LAG(bondport_ref p)
3899 {
3900 int active_lag = 0;
3901 ifbond_ref bond = p->po_bond;
3902 LAG_ref lag = p->po_lag;
3903
3904 if (lag == NULL) {
3905 return 0;
3906 }
3907 TAILQ_REMOVE(&lag->lag_port_list, p, po_lag_port_list);
3908 if (if_bond_debug) {
3909 timestamp_printf("[%s] Removed from LAG (0x%04x," EA_FORMAT
3910 ",0x%04x)\n",
3911 bondport_get_name(p),
3912 lag->lag_info.li_system_priority,
3913 EA_LIST(&lag->lag_info.li_system),
3914 lag->lag_info.li_key);
3915 }
3916 p->po_lag = NULL;
3917 lag->lag_port_count--;
3918 if (lag->lag_port_count > 0) {
3919 return bond->ifb_active_lag == lag;
3920 }
3921 if (if_bond_debug) {
3922 timestamp_printf("Key 0x%04x: LAG Released (%04x," EA_FORMAT
3923 ",0x%04x)\n",
3924 bond->ifb_key,
3925 lag->lag_info.li_system_priority,
3926 EA_LIST(&lag->lag_info.li_system),
3927 lag->lag_info.li_key);
3928 }
3929 TAILQ_REMOVE(&bond->ifb_lag_list, lag, lag_list);
3930 if (bond->ifb_active_lag == lag) {
3931 bond->ifb_active_lag = NULL;
3932 active_lag = 1;
3933 }
3934 kfree_type(struct LAG_s, lag);
3935 return active_lag;
3936 }
3937
3938 static void
bondport_add_to_LAG(bondport_ref p,LAG_ref lag)3939 bondport_add_to_LAG(bondport_ref p, LAG_ref lag)
3940 {
3941 TAILQ_INSERT_TAIL(&lag->lag_port_list, p, po_lag_port_list);
3942 p->po_lag = lag;
3943 lag->lag_port_count++;
3944 if (if_bond_debug) {
3945 timestamp_printf("[%s] Added to LAG (0x%04x," EA_FORMAT "0x%04x)\n",
3946 bondport_get_name(p),
3947 lag->lag_info.li_system_priority,
3948 EA_LIST(&lag->lag_info.li_system),
3949 lag->lag_info.li_key);
3950 }
3951 return;
3952 }
3953
3954 static void
bondport_assign_to_LAG(bondport_ref p)3955 bondport_assign_to_LAG(bondport_ref p)
3956 {
3957 ifbond_ref bond = p->po_bond;
3958 LAG_ref lag;
3959
3960 if (lacp_actor_partner_state_defaulted(p->po_actor_state)) {
3961 bondport_remove_from_LAG(p);
3962 return;
3963 }
3964 lag = p->po_lag;
3965 if (lag != NULL) {
3966 if (bondport_matches_LAG(p, lag)) {
3967 /* still OK */
3968 return;
3969 }
3970 bondport_remove_from_LAG(p);
3971 }
3972 lag = ifbond_get_LAG_matching_port(bond, p);
3973 if (lag != NULL) {
3974 bondport_add_to_LAG(p, lag);
3975 return;
3976 }
3977 lag = kalloc_type(struct LAG_s, Z_WAITOK);
3978 TAILQ_INIT(&lag->lag_port_list);
3979 lag->lag_port_count = 0;
3980 lag->lag_selected_port_count = 0;
3981 lag->lag_info = p->po_partner_state.ps_lag_info;
3982 TAILQ_INSERT_TAIL(&bond->ifb_lag_list, lag, lag_list);
3983 if (if_bond_debug) {
3984 timestamp_printf("Key 0x%04x: LAG Created (0x%04x," EA_FORMAT
3985 ",0x%04x)\n",
3986 bond->ifb_key,
3987 lag->lag_info.li_system_priority,
3988 EA_LIST(&lag->lag_info.li_system),
3989 lag->lag_info.li_key);
3990 }
3991 bondport_add_to_LAG(p, lag);
3992 return;
3993 }
3994
3995 static void
bondport_receive_lacpdu(bondport_ref p,lacpdu_ref in_lacpdu_p)3996 bondport_receive_lacpdu(bondport_ref p, lacpdu_ref in_lacpdu_p)
3997 {
3998 bondport_ref moved_port;
3999
4000 moved_port
4001 = ifbond_list_find_moved_port(p, (const lacp_actor_partner_tlv_ref)
4002 &in_lacpdu_p->la_actor_tlv);
4003 if (moved_port != NULL) {
4004 bondport_receive_machine(moved_port, LAEventPortMoved, NULL);
4005 }
4006 bondport_receive_machine(p, LAEventPacket, in_lacpdu_p);
4007 bondport_mux_machine(p, LAEventPacket, in_lacpdu_p);
4008 bondport_periodic_transmit_machine(p, LAEventPacket, in_lacpdu_p);
4009 return;
4010 }
4011
4012 static void
bondport_set_selected(bondport_ref p,SelectedState s)4013 bondport_set_selected(bondport_ref p, SelectedState s)
4014 {
4015 if (s != p->po_selected) {
4016 ifbond_ref bond = p->po_bond;
4017 LAG_ref lag = p->po_lag;
4018
4019 bondport_flags_set_selected_changed(p);
4020 if (lag != NULL && bond->ifb_active_lag == lag) {
4021 if (p->po_selected == SelectedState_SELECTED) {
4022 lag->lag_selected_port_count--;
4023 } else if (s == SelectedState_SELECTED) {
4024 lag->lag_selected_port_count++;
4025 }
4026 if (if_bond_debug) {
4027 timestamp_printf("[%s] SetSelected: %s (was %s)\n",
4028 bondport_get_name(p),
4029 SelectedStateString(s),
4030 SelectedStateString(p->po_selected));
4031 }
4032 }
4033 }
4034 p->po_selected = s;
4035 return;
4036 }
4037
4038 /**
4039 ** Receive machine
4040 **/
4041
4042 static void
bondport_UpdateDefaultSelected(bondport_ref p)4043 bondport_UpdateDefaultSelected(bondport_ref p)
4044 {
4045 bondport_set_selected(p, SelectedState_UNSELECTED);
4046 return;
4047 }
4048
4049 static void
bondport_RecordDefault(bondport_ref p)4050 bondport_RecordDefault(bondport_ref p)
4051 {
4052 bzero(&p->po_partner_state, sizeof(p->po_partner_state));
4053 p->po_actor_state
4054 = lacp_actor_partner_state_set_defaulted(p->po_actor_state);
4055 bondport_assign_to_LAG(p);
4056 return;
4057 }
4058
4059 static void
bondport_UpdateSelected(bondport_ref p,lacpdu_ref lacpdu_p)4060 bondport_UpdateSelected(bondport_ref p, lacpdu_ref lacpdu_p)
4061 {
4062 lacp_actor_partner_tlv_ref actor;
4063 partner_state_ref ps;
4064 LAG_info_ref ps_li;
4065
4066 /* compare the PDU's Actor information to our Partner state */
4067 actor = (lacp_actor_partner_tlv_ref)lacpdu_p->la_actor_tlv;
4068 ps = &p->po_partner_state;
4069 ps_li = &ps->ps_lag_info;
4070 if (lacp_actor_partner_tlv_get_port(actor) != ps->ps_port
4071 || (lacp_actor_partner_tlv_get_port_priority(actor)
4072 != ps->ps_port_priority)
4073 || bcmp(actor->lap_system, &ps_li->li_system, sizeof(ps_li->li_system))
4074 || (lacp_actor_partner_tlv_get_system_priority(actor)
4075 != ps_li->li_system_priority)
4076 || (lacp_actor_partner_tlv_get_key(actor) != ps_li->li_key)
4077 || (lacp_actor_partner_state_aggregatable(actor->lap_state)
4078 != lacp_actor_partner_state_aggregatable(ps->ps_state))) {
4079 bondport_set_selected(p, SelectedState_UNSELECTED);
4080 if (if_bond_debug) {
4081 timestamp_printf("[%s] updateSelected UNSELECTED\n",
4082 bondport_get_name(p));
4083 }
4084 }
4085 return;
4086 }
4087
4088 static void
bondport_RecordPDU(bondport_ref p,lacpdu_ref lacpdu_p)4089 bondport_RecordPDU(bondport_ref p, lacpdu_ref lacpdu_p)
4090 {
4091 lacp_actor_partner_tlv_ref actor;
4092 ifbond_ref bond = p->po_bond;
4093 int lacp_maintain = 0;
4094 partner_state_ref ps;
4095 lacp_actor_partner_tlv_ref partner;
4096 LAG_info_ref ps_li;
4097
4098 /* copy the PDU's Actor information into our Partner state */
4099 actor = (lacp_actor_partner_tlv_ref)lacpdu_p->la_actor_tlv;
4100 ps = &p->po_partner_state;
4101 ps_li = &ps->ps_lag_info;
4102 ps->ps_port = lacp_actor_partner_tlv_get_port(actor);
4103 ps->ps_port_priority = lacp_actor_partner_tlv_get_port_priority(actor);
4104 ps_li->li_system = *((lacp_system_ref)actor->lap_system);
4105 ps_li->li_system_priority
4106 = lacp_actor_partner_tlv_get_system_priority(actor);
4107 ps_li->li_key = lacp_actor_partner_tlv_get_key(actor);
4108 ps->ps_state = lacp_actor_partner_state_set_out_of_sync(actor->lap_state);
4109 p->po_actor_state
4110 = lacp_actor_partner_state_set_not_defaulted(p->po_actor_state);
4111
4112 /* compare the PDU's Partner information to our own information */
4113 partner = (lacp_actor_partner_tlv_ref)lacpdu_p->la_partner_tlv;
4114
4115 if (lacp_actor_partner_state_active_lacp(ps->ps_state)
4116 || (lacp_actor_partner_state_active_lacp(p->po_actor_state)
4117 && lacp_actor_partner_state_active_lacp(partner->lap_state))) {
4118 if (if_bond_debug) {
4119 timestamp_printf("[%s] recordPDU: LACP will maintain\n",
4120 bondport_get_name(p));
4121 }
4122 lacp_maintain = 1;
4123 }
4124 if ((lacp_actor_partner_tlv_get_port(partner)
4125 == bondport_get_index(p))
4126 && lacp_actor_partner_tlv_get_port_priority(partner) == p->po_priority
4127 && bcmp(partner->lap_system, &g_bond->system,
4128 sizeof(g_bond->system)) == 0
4129 && (lacp_actor_partner_tlv_get_system_priority(partner)
4130 == g_bond->system_priority)
4131 && lacp_actor_partner_tlv_get_key(partner) == bond->ifb_key
4132 && (lacp_actor_partner_state_aggregatable(partner->lap_state)
4133 == lacp_actor_partner_state_aggregatable(p->po_actor_state))
4134 && lacp_actor_partner_state_in_sync(actor->lap_state)
4135 && lacp_maintain) {
4136 ps->ps_state = lacp_actor_partner_state_set_in_sync(ps->ps_state);
4137 if (if_bond_debug) {
4138 timestamp_printf("[%s] recordPDU: LACP partner in sync\n",
4139 bondport_get_name(p));
4140 }
4141 } else if (lacp_actor_partner_state_aggregatable(actor->lap_state) == 0
4142 && lacp_actor_partner_state_in_sync(actor->lap_state)
4143 && lacp_maintain) {
4144 ps->ps_state = lacp_actor_partner_state_set_in_sync(ps->ps_state);
4145 if (if_bond_debug) {
4146 timestamp_printf("[%s] recordPDU: LACP partner in sync (ind)\n",
4147 bondport_get_name(p));
4148 }
4149 }
4150 bondport_assign_to_LAG(p);
4151 return;
4152 }
4153
4154 static __inline__ lacp_actor_partner_state
updateNTTBits(lacp_actor_partner_state s)4155 updateNTTBits(lacp_actor_partner_state s)
4156 {
4157 return s & (LACP_ACTOR_PARTNER_STATE_LACP_ACTIVITY
4158 | LACP_ACTOR_PARTNER_STATE_LACP_TIMEOUT
4159 | LACP_ACTOR_PARTNER_STATE_AGGREGATION
4160 | LACP_ACTOR_PARTNER_STATE_SYNCHRONIZATION);
4161 }
4162
4163 static void
bondport_UpdateNTT(bondport_ref p,lacpdu_ref lacpdu_p)4164 bondport_UpdateNTT(bondport_ref p, lacpdu_ref lacpdu_p)
4165 {
4166 ifbond_ref bond = p->po_bond;
4167 lacp_actor_partner_tlv_ref partner;
4168
4169 /* compare the PDU's Actor information to our Partner state */
4170 partner = (lacp_actor_partner_tlv_ref)lacpdu_p->la_partner_tlv;
4171 if ((lacp_actor_partner_tlv_get_port(partner) != bondport_get_index(p))
4172 || lacp_actor_partner_tlv_get_port_priority(partner) != p->po_priority
4173 || bcmp(partner->lap_system, &g_bond->system, sizeof(g_bond->system))
4174 || (lacp_actor_partner_tlv_get_system_priority(partner)
4175 != g_bond->system_priority)
4176 || lacp_actor_partner_tlv_get_key(partner) != bond->ifb_key
4177 || (updateNTTBits(partner->lap_state)
4178 != updateNTTBits(p->po_actor_state))) {
4179 bondport_flags_set_ntt(p);
4180 if (if_bond_debug) {
4181 timestamp_printf("[%s] updateNTT: Need To Transmit\n",
4182 bondport_get_name(p));
4183 }
4184 }
4185 return;
4186 }
4187
4188 static void
bondport_AttachMuxToAggregator(bondport_ref p)4189 bondport_AttachMuxToAggregator(bondport_ref p)
4190 {
4191 if (bondport_flags_mux_attached(p) == 0) {
4192 if (if_bond_debug) {
4193 timestamp_printf("[%s] Attached Mux To Aggregator\n",
4194 bondport_get_name(p));
4195 }
4196 bondport_flags_set_mux_attached(p);
4197 }
4198 return;
4199 }
4200
4201 static void
bondport_DetachMuxFromAggregator(bondport_ref p)4202 bondport_DetachMuxFromAggregator(bondport_ref p)
4203 {
4204 if (bondport_flags_mux_attached(p)) {
4205 if (if_bond_debug) {
4206 timestamp_printf("[%s] Detached Mux From Aggregator\n",
4207 bondport_get_name(p));
4208 }
4209 bondport_flags_clear_mux_attached(p);
4210 }
4211 return;
4212 }
4213
4214 static void
bondport_enable_distributing(bondport_ref p)4215 bondport_enable_distributing(bondport_ref p)
4216 {
4217 if (bondport_flags_distributing(p) == 0) {
4218 ifbond_ref bond = p->po_bond;
4219
4220 bond->ifb_distributing_array[bond->ifb_distributing_count++] = p;
4221 if (if_bond_debug) {
4222 timestamp_printf("[%s] Distribution Enabled\n",
4223 bondport_get_name(p));
4224 }
4225 bondport_flags_set_distributing(p);
4226 }
4227 return;
4228 }
4229
4230 static void
bondport_disable_distributing(bondport_ref p)4231 bondport_disable_distributing(bondport_ref p)
4232 {
4233 if (bondport_flags_distributing(p)) {
4234 bondport_ref * array;
4235 ifbond_ref bond;
4236 int count;
4237 int i;
4238
4239 bond = p->po_bond;
4240 array = bond->ifb_distributing_array;
4241 count = bond->ifb_distributing_count;
4242 for (i = 0; i < count; i++) {
4243 if (array[i] == p) {
4244 int j;
4245
4246 for (j = i; j < (count - 1); j++) {
4247 array[j] = array[j + 1];
4248 }
4249 break;
4250 }
4251 }
4252 bond->ifb_distributing_count--;
4253 if (if_bond_debug) {
4254 timestamp_printf("[%s] Distribution Disabled\n",
4255 bondport_get_name(p));
4256 }
4257 bondport_flags_clear_distributing(p);
4258 }
4259 return;
4260 }
4261
4262 /**
4263 ** Receive machine functions
4264 **/
4265 static void
4266 bondport_receive_machine_initialize(bondport_ref p, LAEvent event,
4267 void * event_data);
4268 static void
4269 bondport_receive_machine_port_disabled(bondport_ref p, LAEvent event,
4270 void * event_data);
4271 static void
4272 bondport_receive_machine_expired(bondport_ref p, LAEvent event,
4273 void * event_data);
4274 static void
4275 bondport_receive_machine_lacp_disabled(bondport_ref p, LAEvent event,
4276 void * event_data);
4277 static void
4278 bondport_receive_machine_defaulted(bondport_ref p, LAEvent event,
4279 void * event_data);
4280 static void
4281 bondport_receive_machine_current(bondport_ref p, LAEvent event,
4282 void * event_data);
4283
4284 static void
bondport_receive_machine_event(bondport_ref p,LAEvent event,void * event_data)4285 bondport_receive_machine_event(bondport_ref p, LAEvent event,
4286 void * event_data)
4287 {
4288 switch (p->po_receive_state) {
4289 case ReceiveState_none:
4290 bondport_receive_machine_initialize(p, LAEventStart, NULL);
4291 break;
4292 case ReceiveState_INITIALIZE:
4293 bondport_receive_machine_initialize(p, event, event_data);
4294 break;
4295 case ReceiveState_PORT_DISABLED:
4296 bondport_receive_machine_port_disabled(p, event, event_data);
4297 break;
4298 case ReceiveState_EXPIRED:
4299 bondport_receive_machine_expired(p, event, event_data);
4300 break;
4301 case ReceiveState_LACP_DISABLED:
4302 bondport_receive_machine_lacp_disabled(p, event, event_data);
4303 break;
4304 case ReceiveState_DEFAULTED:
4305 bondport_receive_machine_defaulted(p, event, event_data);
4306 break;
4307 case ReceiveState_CURRENT:
4308 bondport_receive_machine_current(p, event, event_data);
4309 break;
4310 default:
4311 break;
4312 }
4313 return;
4314 }
4315
4316 static void
bondport_receive_machine(bondport_ref p,LAEvent event,void * event_data)4317 bondport_receive_machine(bondport_ref p, LAEvent event,
4318 void * event_data)
4319 {
4320 switch (event) {
4321 case LAEventPacket:
4322 if (p->po_receive_state != ReceiveState_LACP_DISABLED) {
4323 bondport_receive_machine_current(p, event, event_data);
4324 }
4325 break;
4326 case LAEventMediaChange:
4327 if (media_active(&p->po_media_info)) {
4328 switch (p->po_receive_state) {
4329 case ReceiveState_PORT_DISABLED:
4330 case ReceiveState_LACP_DISABLED:
4331 bondport_receive_machine_port_disabled(p, LAEventMediaChange, NULL);
4332 break;
4333 default:
4334 break;
4335 }
4336 } else {
4337 bondport_receive_machine_port_disabled(p, LAEventStart, NULL);
4338 }
4339 break;
4340 default:
4341 bondport_receive_machine_event(p, event, event_data);
4342 break;
4343 }
4344 return;
4345 }
4346
4347 static void
bondport_receive_machine_initialize(bondport_ref p,LAEvent event,__unused void * event_data)4348 bondport_receive_machine_initialize(bondport_ref p, LAEvent event,
4349 __unused void * event_data)
4350 {
4351 switch (event) {
4352 case LAEventStart:
4353 devtimer_cancel(p->po_current_while_timer);
4354 if (if_bond_debug) {
4355 timestamp_printf("[%s] Receive INITIALIZE\n",
4356 bondport_get_name(p));
4357 }
4358 p->po_receive_state = ReceiveState_INITIALIZE;
4359 bondport_set_selected(p, SelectedState_UNSELECTED);
4360 bondport_RecordDefault(p);
4361 p->po_actor_state
4362 = lacp_actor_partner_state_set_not_expired(p->po_actor_state);
4363 bondport_receive_machine_port_disabled(p, LAEventStart, NULL);
4364 break;
4365 default:
4366 break;
4367 }
4368 return;
4369 }
4370
4371 static void
bondport_receive_machine_port_disabled(bondport_ref p,LAEvent event,__unused void * event_data)4372 bondport_receive_machine_port_disabled(bondport_ref p, LAEvent event,
4373 __unused void * event_data)
4374 {
4375 partner_state_ref ps;
4376
4377 switch (event) {
4378 case LAEventStart:
4379 devtimer_cancel(p->po_current_while_timer);
4380 if (if_bond_debug) {
4381 timestamp_printf("[%s] Receive PORT_DISABLED\n",
4382 bondport_get_name(p));
4383 }
4384 p->po_receive_state = ReceiveState_PORT_DISABLED;
4385 ps = &p->po_partner_state;
4386 ps->ps_state = lacp_actor_partner_state_set_out_of_sync(ps->ps_state);
4387 OS_FALLTHROUGH;
4388 case LAEventMediaChange:
4389 if (media_active(&p->po_media_info)) {
4390 if (media_ok(&p->po_media_info)) {
4391 bondport_receive_machine_expired(p, LAEventStart, NULL);
4392 } else {
4393 bondport_receive_machine_lacp_disabled(p, LAEventStart, NULL);
4394 }
4395 } else if (p->po_selected == SelectedState_SELECTED) {
4396 struct timeval tv;
4397
4398 if (if_bond_debug) {
4399 timestamp_printf("[%s] Receive PORT_DISABLED: "
4400 "link timer started\n",
4401 bondport_get_name(p));
4402 }
4403 tv.tv_sec = 1;
4404 tv.tv_usec = 0;
4405 devtimer_set_relative(p->po_current_while_timer, tv,
4406 (devtimer_timeout_func)
4407 bondport_receive_machine_port_disabled,
4408 (void *)LAEventTimeout, NULL);
4409 } else if (p->po_selected == SelectedState_STANDBY) {
4410 bondport_set_selected(p, SelectedState_UNSELECTED);
4411 }
4412 break;
4413 case LAEventTimeout:
4414 if (p->po_selected == SelectedState_SELECTED) {
4415 if (if_bond_debug) {
4416 timestamp_printf("[%s] Receive PORT_DISABLED: "
4417 "link timer completed, marking UNSELECTED\n",
4418 bondport_get_name(p));
4419 }
4420 bondport_set_selected(p, SelectedState_UNSELECTED);
4421 }
4422 break;
4423 case LAEventPortMoved:
4424 bondport_receive_machine_initialize(p, LAEventStart, NULL);
4425 break;
4426 default:
4427 break;
4428 }
4429 return;
4430 }
4431
4432 static void
bondport_receive_machine_expired(bondport_ref p,LAEvent event,__unused void * event_data)4433 bondport_receive_machine_expired(bondport_ref p, LAEvent event,
4434 __unused void * event_data)
4435 {
4436 lacp_actor_partner_state s;
4437 struct timeval tv;
4438
4439 switch (event) {
4440 case LAEventStart:
4441 devtimer_cancel(p->po_current_while_timer);
4442 if (if_bond_debug) {
4443 timestamp_printf("[%s] Receive EXPIRED\n",
4444 bondport_get_name(p));
4445 }
4446 p->po_receive_state = ReceiveState_EXPIRED;
4447 s = p->po_partner_state.ps_state;
4448 s = lacp_actor_partner_state_set_out_of_sync(s);
4449 s = lacp_actor_partner_state_set_short_timeout(s);
4450 p->po_partner_state.ps_state = s;
4451 p->po_actor_state
4452 = lacp_actor_partner_state_set_expired(p->po_actor_state);
4453 /* start current_while timer */
4454 tv.tv_sec = LACP_SHORT_TIMEOUT_TIME;
4455 tv.tv_usec = 0;
4456 devtimer_set_relative(p->po_current_while_timer, tv,
4457 (devtimer_timeout_func)
4458 bondport_receive_machine_expired,
4459 (void *)LAEventTimeout, NULL);
4460
4461 break;
4462 case LAEventTimeout:
4463 bondport_receive_machine_defaulted(p, LAEventStart, NULL);
4464 break;
4465 default:
4466 break;
4467 }
4468 return;
4469 }
4470
4471 static void
bondport_receive_machine_lacp_disabled(bondport_ref p,LAEvent event,__unused void * event_data)4472 bondport_receive_machine_lacp_disabled(bondport_ref p, LAEvent event,
4473 __unused void * event_data)
4474 {
4475 partner_state_ref ps;
4476 switch (event) {
4477 case LAEventStart:
4478 devtimer_cancel(p->po_current_while_timer);
4479 if (if_bond_debug) {
4480 timestamp_printf("[%s] Receive LACP_DISABLED\n",
4481 bondport_get_name(p));
4482 }
4483 p->po_receive_state = ReceiveState_LACP_DISABLED;
4484 bondport_set_selected(p, SelectedState_UNSELECTED);
4485 bondport_RecordDefault(p);
4486 ps = &p->po_partner_state;
4487 ps->ps_state = lacp_actor_partner_state_set_individual(ps->ps_state);
4488 p->po_actor_state
4489 = lacp_actor_partner_state_set_not_expired(p->po_actor_state);
4490 break;
4491 default:
4492 break;
4493 }
4494 return;
4495 }
4496
4497 static void
bondport_receive_machine_defaulted(bondport_ref p,LAEvent event,__unused void * event_data)4498 bondport_receive_machine_defaulted(bondport_ref p, LAEvent event,
4499 __unused void * event_data)
4500 {
4501 switch (event) {
4502 case LAEventStart:
4503 devtimer_cancel(p->po_current_while_timer);
4504 if (if_bond_debug) {
4505 timestamp_printf("[%s] Receive DEFAULTED\n",
4506 bondport_get_name(p));
4507 }
4508 p->po_receive_state = ReceiveState_DEFAULTED;
4509 bondport_UpdateDefaultSelected(p);
4510 bondport_RecordDefault(p);
4511 p->po_actor_state
4512 = lacp_actor_partner_state_set_not_expired(p->po_actor_state);
4513 break;
4514 default:
4515 break;
4516 }
4517 return;
4518 }
4519
4520 static void
bondport_receive_machine_current(bondport_ref p,LAEvent event,void * event_data)4521 bondport_receive_machine_current(bondport_ref p, LAEvent event,
4522 void * event_data)
4523 {
4524 partner_state_ref ps;
4525 struct timeval tv;
4526
4527 switch (event) {
4528 case LAEventPacket:
4529 devtimer_cancel(p->po_current_while_timer);
4530 if (if_bond_debug) {
4531 timestamp_printf("[%s] Receive CURRENT\n",
4532 bondport_get_name(p));
4533 }
4534 p->po_receive_state = ReceiveState_CURRENT;
4535 bondport_UpdateSelected(p, event_data);
4536 bondport_UpdateNTT(p, event_data);
4537 bondport_RecordPDU(p, event_data);
4538 p->po_actor_state
4539 = lacp_actor_partner_state_set_not_expired(p->po_actor_state);
4540 bondport_assign_to_LAG(p);
4541 /* start current_while timer */
4542 ps = &p->po_partner_state;
4543 if (lacp_actor_partner_state_short_timeout(ps->ps_state)) {
4544 tv.tv_sec = LACP_SHORT_TIMEOUT_TIME;
4545 } else {
4546 tv.tv_sec = LACP_LONG_TIMEOUT_TIME;
4547 }
4548 tv.tv_usec = 0;
4549 devtimer_set_relative(p->po_current_while_timer, tv,
4550 (devtimer_timeout_func)
4551 bondport_receive_machine_current,
4552 (void *)LAEventTimeout, NULL);
4553 break;
4554 case LAEventTimeout:
4555 bondport_receive_machine_expired(p, LAEventStart, NULL);
4556 break;
4557 default:
4558 break;
4559 }
4560 return;
4561 }
4562
4563 /**
4564 ** Periodic Transmission machine
4565 **/
4566
4567 static void
bondport_periodic_transmit_machine(bondport_ref p,LAEvent event,__unused void * event_data)4568 bondport_periodic_transmit_machine(bondport_ref p, LAEvent event,
4569 __unused void * event_data)
4570 {
4571 int interval;
4572 partner_state_ref ps;
4573 struct timeval tv;
4574
4575 switch (event) {
4576 case LAEventStart:
4577 if (if_bond_debug) {
4578 timestamp_printf("[%s] periodic_transmit Start\n",
4579 bondport_get_name(p));
4580 }
4581 OS_FALLTHROUGH;
4582 case LAEventMediaChange:
4583 devtimer_cancel(p->po_periodic_timer);
4584 p->po_periodic_interval = 0;
4585 if (media_active(&p->po_media_info) == 0
4586 || media_ok(&p->po_media_info) == 0) {
4587 break;
4588 }
4589 OS_FALLTHROUGH;
4590 case LAEventPacket:
4591 /* Neither Partner nor Actor are LACP Active, no periodic tx */
4592 ps = &p->po_partner_state;
4593 if (lacp_actor_partner_state_active_lacp(p->po_actor_state) == 0
4594 && (lacp_actor_partner_state_active_lacp(ps->ps_state)
4595 == 0)) {
4596 devtimer_cancel(p->po_periodic_timer);
4597 p->po_periodic_interval = 0;
4598 break;
4599 }
4600 if (lacp_actor_partner_state_short_timeout(ps->ps_state)) {
4601 interval = LACP_FAST_PERIODIC_TIME;
4602 } else {
4603 interval = LACP_SLOW_PERIODIC_TIME;
4604 }
4605 if (p->po_periodic_interval != interval) {
4606 if (interval == LACP_FAST_PERIODIC_TIME
4607 && p->po_periodic_interval == LACP_SLOW_PERIODIC_TIME) {
4608 if (if_bond_debug) {
4609 timestamp_printf("[%s] periodic_transmit:"
4610 " Need To Transmit\n",
4611 bondport_get_name(p));
4612 }
4613 bondport_flags_set_ntt(p);
4614 }
4615 p->po_periodic_interval = interval;
4616 tv.tv_usec = 0;
4617 tv.tv_sec = interval;
4618 devtimer_set_relative(p->po_periodic_timer, tv,
4619 (devtimer_timeout_func)
4620 bondport_periodic_transmit_machine,
4621 (void *)LAEventTimeout, NULL);
4622 if (if_bond_debug) {
4623 timestamp_printf("[%s] Periodic Transmission Timer: %d secs\n",
4624 bondport_get_name(p),
4625 p->po_periodic_interval);
4626 }
4627 }
4628 break;
4629 case LAEventTimeout:
4630 bondport_flags_set_ntt(p);
4631 tv.tv_sec = p->po_periodic_interval;
4632 tv.tv_usec = 0;
4633 devtimer_set_relative(p->po_periodic_timer, tv, (devtimer_timeout_func)
4634 bondport_periodic_transmit_machine,
4635 (void *)LAEventTimeout, NULL);
4636 if (if_bond_debug > 1) {
4637 timestamp_printf("[%s] Periodic Transmission Timer: %d secs\n",
4638 bondport_get_name(p), p->po_periodic_interval);
4639 }
4640 break;
4641 default:
4642 break;
4643 }
4644 return;
4645 }
4646
4647 /**
4648 ** Transmit machine
4649 **/
4650 static int
bondport_can_transmit(bondport_ref p,int32_t current_secs,__darwin_time_t * next_secs)4651 bondport_can_transmit(bondport_ref p, int32_t current_secs,
4652 __darwin_time_t * next_secs)
4653 {
4654 if (p->po_last_transmit_secs != current_secs) {
4655 p->po_last_transmit_secs = current_secs;
4656 p->po_n_transmit = 0;
4657 }
4658 if (p->po_n_transmit < LACP_PACKET_RATE) {
4659 p->po_n_transmit++;
4660 return 1;
4661 }
4662 if (next_secs != NULL) {
4663 *next_secs = current_secs + 1;
4664 }
4665 return 0;
4666 }
4667
4668 static void
bondport_transmit_machine(bondport_ref p,LAEvent event,void * event_data)4669 bondport_transmit_machine(bondport_ref p, LAEvent event,
4670 void * event_data)
4671 {
4672 lacp_actor_partner_tlv_ref aptlv;
4673 lacp_collector_tlv_ref ctlv;
4674 struct timeval next_tick_time = {.tv_sec = 0, .tv_usec = 0};
4675 lacpdu_ref out_lacpdu_p;
4676 packet_buffer_ref pkt;
4677 partner_state_ref ps;
4678 LAG_info_ref ps_li;
4679
4680 switch (event) {
4681 case LAEventTimeout:
4682 case LAEventStart:
4683 if (p->po_periodic_interval == 0 || bondport_flags_ntt(p) == 0) {
4684 break;
4685 }
4686 if (event_data == TRANSMIT_MACHINE_TX_IMMEDIATE) {
4687 /* we're going away, transmit the packet no matter what */
4688 } else if (bondport_can_transmit(p, devtimer_current_secs(),
4689 &next_tick_time.tv_sec) == 0) {
4690 if (devtimer_enabled(p->po_transmit_timer)) {
4691 if (if_bond_debug > 0) {
4692 timestamp_printf("[%s] Transmit Timer Already Set\n",
4693 bondport_get_name(p));
4694 }
4695 } else {
4696 devtimer_set_absolute(p->po_transmit_timer, next_tick_time,
4697 (devtimer_timeout_func)
4698 bondport_transmit_machine,
4699 (void *)LAEventTimeout, NULL);
4700 if (if_bond_debug > 0) {
4701 timestamp_printf("[%s] Transmit Timer Deadline %d secs\n",
4702 bondport_get_name(p),
4703 (int)next_tick_time.tv_sec);
4704 }
4705 }
4706 break;
4707 }
4708 if (if_bond_debug > 0) {
4709 if (event == LAEventTimeout) {
4710 timestamp_printf("[%s] Transmit Timer Complete\n",
4711 bondport_get_name(p));
4712 }
4713 }
4714 pkt = packet_buffer_allocate(sizeof(*out_lacpdu_p));
4715 if (pkt == NULL) {
4716 printf("[%s] Transmit: failed to allocate packet buffer\n",
4717 bondport_get_name(p));
4718 break;
4719 }
4720 out_lacpdu_p = (lacpdu_ref)packet_buffer_byteptr(pkt);
4721 bzero(out_lacpdu_p, sizeof(*out_lacpdu_p));
4722 out_lacpdu_p->la_subtype = IEEE8023AD_SLOW_PROTO_SUBTYPE_LACP;
4723 out_lacpdu_p->la_version = LACPDU_VERSION_1;
4724
4725 /* Actor */
4726 aptlv = (lacp_actor_partner_tlv_ref)out_lacpdu_p->la_actor_tlv;
4727 aptlv->lap_tlv_type = LACPDU_TLV_TYPE_ACTOR;
4728 aptlv->lap_length = LACPDU_ACTOR_TLV_LENGTH;
4729 *((lacp_system_ref)aptlv->lap_system) = g_bond->system;
4730 lacp_actor_partner_tlv_set_system_priority(aptlv,
4731 g_bond->system_priority);
4732 lacp_actor_partner_tlv_set_port_priority(aptlv, p->po_priority);
4733 lacp_actor_partner_tlv_set_port(aptlv, bondport_get_index(p));
4734 lacp_actor_partner_tlv_set_key(aptlv, p->po_bond->ifb_key);
4735 aptlv->lap_state = p->po_actor_state;
4736
4737 /* Partner */
4738 aptlv = (lacp_actor_partner_tlv_ref)out_lacpdu_p->la_partner_tlv;
4739 aptlv->lap_tlv_type = LACPDU_TLV_TYPE_PARTNER;
4740 aptlv->lap_length = LACPDU_PARTNER_TLV_LENGTH;
4741 ps = &p->po_partner_state;
4742 ps_li = &ps->ps_lag_info;
4743 lacp_actor_partner_tlv_set_port(aptlv, ps->ps_port);
4744 lacp_actor_partner_tlv_set_port_priority(aptlv, ps->ps_port_priority);
4745 *((lacp_system_ref)aptlv->lap_system) = ps_li->li_system;
4746 lacp_actor_partner_tlv_set_system_priority(aptlv,
4747 ps_li->li_system_priority);
4748 lacp_actor_partner_tlv_set_key(aptlv, ps_li->li_key);
4749 aptlv->lap_state = ps->ps_state;
4750
4751 /* Collector */
4752 ctlv = (lacp_collector_tlv_ref)out_lacpdu_p->la_collector_tlv;
4753 ctlv->lac_tlv_type = LACPDU_TLV_TYPE_COLLECTOR;
4754 ctlv->lac_length = LACPDU_COLLECTOR_TLV_LENGTH;
4755
4756 bondport_slow_proto_transmit(p, pkt);
4757 bondport_flags_clear_ntt(p);
4758 if (if_bond_debug > 0) {
4759 timestamp_printf("[%s] Transmit Packet %d\n",
4760 bondport_get_name(p), p->po_n_transmit);
4761 }
4762 break;
4763 default:
4764 break;
4765 }
4766 return;
4767 }
4768
4769 /**
4770 ** Mux machine functions
4771 **/
4772
4773 static void
4774 bondport_mux_machine_detached(bondport_ref p, LAEvent event,
4775 void * event_data);
4776 static void
4777 bondport_mux_machine_waiting(bondport_ref p, LAEvent event,
4778 void * event_data);
4779 static void
4780 bondport_mux_machine_attached(bondport_ref p, LAEvent event,
4781 void * event_data);
4782
4783 static void
4784 bondport_mux_machine_collecting_distributing(bondport_ref p, LAEvent event,
4785 void * event_data);
4786
4787 static void
bondport_mux_machine(bondport_ref p,LAEvent event,void * event_data)4788 bondport_mux_machine(bondport_ref p, LAEvent event, void * event_data)
4789 {
4790 switch (p->po_mux_state) {
4791 case MuxState_none:
4792 bondport_mux_machine_detached(p, LAEventStart, NULL);
4793 break;
4794 case MuxState_DETACHED:
4795 bondport_mux_machine_detached(p, event, event_data);
4796 break;
4797 case MuxState_WAITING:
4798 bondport_mux_machine_waiting(p, event, event_data);
4799 break;
4800 case MuxState_ATTACHED:
4801 bondport_mux_machine_attached(p, event, event_data);
4802 break;
4803 case MuxState_COLLECTING_DISTRIBUTING:
4804 bondport_mux_machine_collecting_distributing(p, event, event_data);
4805 break;
4806 default:
4807 break;
4808 }
4809 return;
4810 }
4811
4812 static void
bondport_mux_machine_detached(bondport_ref p,LAEvent event,__unused void * event_data)4813 bondport_mux_machine_detached(bondport_ref p, LAEvent event,
4814 __unused void * event_data)
4815 {
4816 lacp_actor_partner_state s;
4817
4818 switch (event) {
4819 case LAEventStart:
4820 devtimer_cancel(p->po_wait_while_timer);
4821 if (if_bond_debug) {
4822 timestamp_printf("[%s] Mux DETACHED\n",
4823 bondport_get_name(p));
4824 }
4825 p->po_mux_state = MuxState_DETACHED;
4826 bondport_flags_clear_ready(p);
4827 bondport_DetachMuxFromAggregator(p);
4828 bondport_disable_distributing(p);
4829 s = p->po_actor_state;
4830 s = lacp_actor_partner_state_set_out_of_sync(s);
4831 s = lacp_actor_partner_state_set_not_collecting(s);
4832 s = lacp_actor_partner_state_set_not_distributing(s);
4833 p->po_actor_state = s;
4834 bondport_flags_set_ntt(p);
4835 break;
4836 case LAEventSelectedChange:
4837 case LAEventPacket:
4838 case LAEventMediaChange:
4839 if (p->po_selected == SelectedState_SELECTED
4840 || p->po_selected == SelectedState_STANDBY) {
4841 bondport_mux_machine_waiting(p, LAEventStart, NULL);
4842 }
4843 break;
4844 default:
4845 break;
4846 }
4847 return;
4848 }
4849
4850 static void
bondport_mux_machine_waiting(bondport_ref p,LAEvent event,__unused void * event_data)4851 bondport_mux_machine_waiting(bondport_ref p, LAEvent event,
4852 __unused void * event_data)
4853 {
4854 struct timeval tv;
4855
4856 switch (event) {
4857 case LAEventStart:
4858 devtimer_cancel(p->po_wait_while_timer);
4859 if (if_bond_debug) {
4860 timestamp_printf("[%s] Mux WAITING\n",
4861 bondport_get_name(p));
4862 }
4863 p->po_mux_state = MuxState_WAITING;
4864 OS_FALLTHROUGH;
4865 default:
4866 case LAEventSelectedChange:
4867 if (p->po_selected == SelectedState_UNSELECTED) {
4868 bondport_mux_machine_detached(p, LAEventStart, NULL);
4869 break;
4870 }
4871 if (p->po_selected == SelectedState_STANDBY) {
4872 devtimer_cancel(p->po_wait_while_timer);
4873 /* wait until state changes to SELECTED */
4874 if (if_bond_debug) {
4875 timestamp_printf("[%s] Mux WAITING: Standby\n",
4876 bondport_get_name(p));
4877 }
4878 break;
4879 }
4880 if (bondport_flags_ready(p)) {
4881 if (if_bond_debug) {
4882 timestamp_printf("[%s] Mux WAITING: Port is already ready\n",
4883 bondport_get_name(p));
4884 }
4885 break;
4886 }
4887 if (devtimer_enabled(p->po_wait_while_timer)) {
4888 if (if_bond_debug) {
4889 timestamp_printf("[%s] Mux WAITING: Timer already set\n",
4890 bondport_get_name(p));
4891 }
4892 break;
4893 }
4894 if (ifbond_all_ports_attached(p->po_bond, p)) {
4895 devtimer_cancel(p->po_wait_while_timer);
4896 if (if_bond_debug) {
4897 timestamp_printf("[%s] Mux WAITING: No waiting\n",
4898 bondport_get_name(p));
4899 }
4900 bondport_flags_set_ready(p);
4901 goto no_waiting;
4902 }
4903 if (if_bond_debug) {
4904 timestamp_printf("[%s] Mux WAITING: 2 seconds\n",
4905 bondport_get_name(p));
4906 }
4907 tv.tv_sec = LACP_AGGREGATE_WAIT_TIME;
4908 tv.tv_usec = 0;
4909 devtimer_set_relative(p->po_wait_while_timer, tv,
4910 (devtimer_timeout_func)
4911 bondport_mux_machine_waiting,
4912 (void *)LAEventTimeout, NULL);
4913 break;
4914 case LAEventTimeout:
4915 if (if_bond_debug) {
4916 timestamp_printf("[%s] Mux WAITING: Ready\n",
4917 bondport_get_name(p));
4918 }
4919 bondport_flags_set_ready(p);
4920 break;
4921 case LAEventReady:
4922 no_waiting:
4923 if (bondport_flags_ready(p)) {
4924 if (if_bond_debug) {
4925 timestamp_printf("[%s] Mux WAITING: All Ports Ready\n",
4926 bondport_get_name(p));
4927 }
4928 bondport_mux_machine_attached(p, LAEventStart, NULL);
4929 break;
4930 }
4931 break;
4932 }
4933 return;
4934 }
4935
4936 static void
bondport_mux_machine_attached(bondport_ref p,LAEvent event,__unused void * event_data)4937 bondport_mux_machine_attached(bondport_ref p, LAEvent event,
4938 __unused void * event_data)
4939 {
4940 lacp_actor_partner_state s;
4941
4942 switch (event) {
4943 case LAEventStart:
4944 devtimer_cancel(p->po_wait_while_timer);
4945 if (if_bond_debug) {
4946 timestamp_printf("[%s] Mux ATTACHED\n",
4947 bondport_get_name(p));
4948 }
4949 p->po_mux_state = MuxState_ATTACHED;
4950 bondport_AttachMuxToAggregator(p);
4951 s = p->po_actor_state;
4952 s = lacp_actor_partner_state_set_in_sync(s);
4953 s = lacp_actor_partner_state_set_not_collecting(s);
4954 s = lacp_actor_partner_state_set_not_distributing(s);
4955 bondport_disable_distributing(p);
4956 p->po_actor_state = s;
4957 bondport_flags_set_ntt(p);
4958 OS_FALLTHROUGH;
4959 default:
4960 switch (p->po_selected) {
4961 case SelectedState_SELECTED:
4962 s = p->po_partner_state.ps_state;
4963 if (lacp_actor_partner_state_in_sync(s)) {
4964 bondport_mux_machine_collecting_distributing(p, LAEventStart,
4965 NULL);
4966 }
4967 break;
4968 default:
4969 bondport_mux_machine_detached(p, LAEventStart, NULL);
4970 break;
4971 }
4972 break;
4973 }
4974 return;
4975 }
4976
4977 static void
bondport_mux_machine_collecting_distributing(bondport_ref p,LAEvent event,__unused void * event_data)4978 bondport_mux_machine_collecting_distributing(bondport_ref p,
4979 LAEvent event,
4980 __unused void * event_data)
4981 {
4982 lacp_actor_partner_state s;
4983
4984 switch (event) {
4985 case LAEventStart:
4986 devtimer_cancel(p->po_wait_while_timer);
4987 if (if_bond_debug) {
4988 timestamp_printf("[%s] Mux COLLECTING_DISTRIBUTING\n",
4989 bondport_get_name(p));
4990 }
4991 p->po_mux_state = MuxState_COLLECTING_DISTRIBUTING;
4992 bondport_enable_distributing(p);
4993 s = p->po_actor_state;
4994 s = lacp_actor_partner_state_set_collecting(s);
4995 s = lacp_actor_partner_state_set_distributing(s);
4996 p->po_actor_state = s;
4997 bondport_flags_set_ntt(p);
4998 OS_FALLTHROUGH;
4999 default:
5000 s = p->po_partner_state.ps_state;
5001 if (lacp_actor_partner_state_in_sync(s) == 0) {
5002 bondport_mux_machine_attached(p, LAEventStart, NULL);
5003 break;
5004 }
5005 switch (p->po_selected) {
5006 case SelectedState_UNSELECTED:
5007 case SelectedState_STANDBY:
5008 bondport_mux_machine_attached(p, LAEventStart, NULL);
5009 break;
5010 default:
5011 break;
5012 }
5013 break;
5014 }
5015 return;
5016 }
5017