1 /*
2 * Copyright (c) 2004-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * if_bond.c
31 * - bond/failover interface
32 * - implements IEEE 802.3ad Link Aggregation
33 */
34
35 /*
36 * Modification History:
37 *
38 * April 29, 2004 Dieter Siegmund ([email protected])
39 * - created
40 */
41
42 #include <sys/param.h>
43 #include <sys/kernel.h>
44 #include <sys/malloc.h>
45 #include <sys/mbuf.h>
46 #include <sys/queue.h>
47 #include <sys/socket.h>
48 #include <sys/sockio.h>
49 #include <sys/sysctl.h>
50 #include <sys/systm.h>
51 #include <sys/kern_event.h>
52 #include <net/bpf.h>
53 #include <net/ethernet.h>
54 #include <net/if.h>
55 #include <net/kpi_interface.h>
56 #include <net/kpi_interfacefilter.h>
57 #include <net/if_arp.h>
58 #include <net/if_dl.h>
59 #include <net/if_ether.h>
60 #include <net/if_types.h>
61 #include <net/if_bond_var.h>
62 #include <net/ieee8023ad.h>
63 #include <net/lacp.h>
64 #include <net/dlil.h>
65 #include <sys/time.h>
66 #include <net/devtimer.h>
67 #include <net/if_vlan_var.h>
68 #include <net/kpi_protocol.h>
69 #include <sys/protosw.h>
70 #include <kern/locks.h>
71 #include <kern/zalloc.h>
72 #include <os/refcnt.h>
73
74 #include <netinet/in.h>
75 #include <netinet/if_ether.h>
76 #include <netinet/in_systm.h>
77 #include <netinet/ip.h>
78 #include <netinet/ip6.h>
79
80 #include <net/if_media.h>
81 #include <net/multicast_list.h>
82
83 SYSCTL_DECL(_net_link);
84 SYSCTL_NODE(_net_link, OID_AUTO, bond, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
85 "Bond interface");
86
87 static int if_bond_debug = 0;
88 SYSCTL_INT(_net_link_bond, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
89 &if_bond_debug, 0, "Bond interface debug logs");
90
91 static struct ether_addr slow_proto_multicast = {
92 .octet = IEEE8023AD_SLOW_PROTO_MULTICAST
93 };
94
95 typedef struct ifbond_s ifbond, * ifbond_ref;
96 typedef struct bondport_s bondport, * bondport_ref;
97
98 #define BOND_MAXUNIT 128
99 #define BOND_ZONE_MAX_ELEM MIN(IFNETS_MAX, BOND_MAXUNIT)
100 #define BONDNAME "bond"
101
102 #define EA_FORMAT "%x:%x:%x:%x:%x:%x"
103 #define EA_CH(e, i) ((u_char)((u_char *)(e))[(i)])
104 #define EA_LIST(ea) EA_CH(ea,0),EA_CH(ea,1),EA_CH(ea,2),EA_CH(ea,3),EA_CH(ea,4),EA_CH(ea,5)
105
106 #define timestamp_printf printf
107
108 /**
109 ** bond locks
110 **/
111
112 static LCK_GRP_DECLARE(bond_lck_grp, "if_bond");
113 static LCK_MTX_DECLARE(bond_lck_mtx, &bond_lck_grp);
114
115 static __inline__ void
bond_assert_lock_held(void)116 bond_assert_lock_held(void)
117 {
118 LCK_MTX_ASSERT(&bond_lck_mtx, LCK_MTX_ASSERT_OWNED);
119 }
120
121 static __inline__ void
bond_assert_lock_not_held(void)122 bond_assert_lock_not_held(void)
123 {
124 LCK_MTX_ASSERT(&bond_lck_mtx, LCK_MTX_ASSERT_NOTOWNED);
125 }
126
127 static __inline__ void
bond_lock(void)128 bond_lock(void)
129 {
130 lck_mtx_lock(&bond_lck_mtx);
131 }
132
133 static __inline__ void
bond_unlock(void)134 bond_unlock(void)
135 {
136 lck_mtx_unlock(&bond_lck_mtx);
137 }
138
139 /**
140 ** bond structures, types
141 **/
142
143 struct LAG_info_s {
144 lacp_system li_system;
145 lacp_system_priority li_system_priority;
146 lacp_key li_key;
147 };
148 typedef struct LAG_info_s LAG_info, * LAG_info_ref;
149
150 struct bondport_s;
151 TAILQ_HEAD(port_list, bondport_s);
152 struct ifbond_s;
153 TAILQ_HEAD(ifbond_list, ifbond_s);
154 struct LAG_s;
155 TAILQ_HEAD(lag_list, LAG_s);
156
157 typedef struct ifbond_s ifbond, * ifbond_ref;
158 typedef struct bondport_s bondport, * bondport_ref;
159
160 struct LAG_s {
161 TAILQ_ENTRY(LAG_s) lag_list;
162 struct port_list lag_port_list;
163 short lag_port_count;
164 short lag_selected_port_count;
165 int lag_active_media;
166 LAG_info lag_info;
167 };
168 typedef struct LAG_s LAG, * LAG_ref;
169
170 typedef struct partner_state_s {
171 LAG_info ps_lag_info;
172 lacp_port ps_port;
173 lacp_port_priority ps_port_priority;
174 lacp_actor_partner_state ps_state;
175 } partner_state, * partner_state_ref;
176
177 struct ifbond_s {
178 TAILQ_ENTRY(ifbond_s) ifb_bond_list;
179 int ifb_flags;
180 struct os_refcnt ifb_retain_count;
181 char ifb_name[IFNAMSIZ];
182 struct ifnet * ifb_ifp;
183 bpf_packet_func ifb_bpf_input;
184 bpf_packet_func ifb_bpf_output;
185 int ifb_altmtu;
186 struct port_list ifb_port_list;
187 short ifb_port_count;
188 struct lag_list ifb_lag_list;
189 lacp_key ifb_key;
190 short ifb_max_active;/* 0 == unlimited */
191 LAG_ref ifb_active_lag;
192 struct ifmultiaddr * ifb_ifma_slow_proto;
193 bondport_ref * ifb_distributing_array;
194 int ifb_distributing_count;
195 int ifb_distributing_max;
196 int ifb_last_link_event;
197 int ifb_mode;/* LACP, STATIC */
198 };
199
200 struct media_info {
201 int mi_active;
202 int mi_status;
203 };
204
205 enum {
206 ReceiveState_none = 0,
207 ReceiveState_INITIALIZE = 1,
208 ReceiveState_PORT_DISABLED = 2,
209 ReceiveState_EXPIRED = 3,
210 ReceiveState_LACP_DISABLED = 4,
211 ReceiveState_DEFAULTED = 5,
212 ReceiveState_CURRENT = 6,
213 };
214
215 typedef u_char ReceiveState;
216
217 enum {
218 SelectedState_UNSELECTED = IF_BOND_STATUS_SELECTED_STATE_UNSELECTED,
219 SelectedState_SELECTED = IF_BOND_STATUS_SELECTED_STATE_SELECTED,
220 SelectedState_STANDBY = IF_BOND_STATUS_SELECTED_STATE_STANDBY
221 };
222 typedef u_char SelectedState;
223
224 static __inline__ const char *
SelectedStateString(SelectedState s)225 SelectedStateString(SelectedState s)
226 {
227 static const char * names[] = { "UNSELECTED", "SELECTED", "STANDBY" };
228
229 if (s <= SelectedState_STANDBY) {
230 return names[s];
231 }
232 return "<unknown>";
233 }
234
235 enum {
236 MuxState_none = 0,
237 MuxState_DETACHED = 1,
238 MuxState_WAITING = 2,
239 MuxState_ATTACHED = 3,
240 MuxState_COLLECTING_DISTRIBUTING = 4,
241 };
242
243 typedef u_char MuxState;
244
245 #define PORT_CONTROL_FLAGS_IN_LIST 0x01
246 #define PORT_CONTROL_FLAGS_PROTO_ATTACHED 0x02
247 #define PORT_CONTROL_FLAGS_FILTER_ATTACHED 0x04
248 #define PORT_CONTROL_FLAGS_LLADDR_SET 0x08
249 #define PORT_CONTROL_FLAGS_MTU_SET 0x10
250 #define PORT_CONTROL_FLAGS_PROMISCUOUS_SET 0x20
251 #define PORT_CONTROL_FLAGS_BOND_PROMISCUOUS_SET 0x40
252
253
254 static inline bool
uint32_bit_is_set(uint32_t flags,uint32_t flags_to_test)255 uint32_bit_is_set(uint32_t flags, uint32_t flags_to_test)
256 {
257 return (flags & flags_to_test) != 0;
258 }
259
260 static inline void
uint32_bit_set(uint32_t * flags_p,uint32_t flags_to_set)261 uint32_bit_set(uint32_t * flags_p, uint32_t flags_to_set)
262 {
263 *flags_p |= flags_to_set;
264 }
265
266 static inline void
uint32_bit_clear(uint32_t * flags_p,uint32_t flags_to_clear)267 uint32_bit_clear(uint32_t * flags_p, uint32_t flags_to_clear)
268 {
269 *flags_p &= ~flags_to_clear;
270 }
271
272 struct bondport_s {
273 TAILQ_ENTRY(bondport_s) po_port_list;
274 ifbond_ref po_bond;
275 struct multicast_list po_multicast;
276 struct ifnet * po_ifp;
277 struct ether_addr po_saved_addr;
278 int po_enabled;
279 char po_name[IFNAMSIZ];
280 struct ifdevmtu po_devmtu;
281 uint32_t po_control_flags;
282 interface_filter_t po_filter;
283
284 /* LACP */
285 TAILQ_ENTRY(bondport_s) po_lag_port_list;
286 devtimer_ref po_current_while_timer;
287 devtimer_ref po_periodic_timer;
288 devtimer_ref po_wait_while_timer;
289 devtimer_ref po_transmit_timer;
290 partner_state po_partner_state;
291 lacp_port_priority po_priority;
292 lacp_actor_partner_state po_actor_state;
293 u_char po_flags;
294 u_char po_periodic_interval;
295 u_char po_n_transmit;
296 ReceiveState po_receive_state;
297 MuxState po_mux_state;
298 SelectedState po_selected;
299 int32_t po_last_transmit_secs;
300 struct media_info po_media_info;
301 uint64_t po_force_link_event_time;
302 LAG_ref po_lag;
303 };
304
305 #define IFBF_PROMISC 0x1 /* promiscuous mode */
306 #define IFBF_IF_DETACHING 0x2 /* interface is detaching */
307 #define IFBF_LLADDR 0x4 /* specific link address requested */
308 #define IFBF_CHANGE_IN_PROGRESS 0x8 /* interface add/remove in progress */
309
310 static int bond_get_status(ifbond_ref ifb, struct if_bond_req * ibr_p,
311 user_addr_t datap);
312
313 static __inline__ bool
ifbond_flags_if_detaching(ifbond_ref ifb)314 ifbond_flags_if_detaching(ifbond_ref ifb)
315 {
316 return (ifb->ifb_flags & IFBF_IF_DETACHING) != 0;
317 }
318
319 static __inline__ void
ifbond_flags_set_if_detaching(ifbond_ref ifb)320 ifbond_flags_set_if_detaching(ifbond_ref ifb)
321 {
322 ifb->ifb_flags |= IFBF_IF_DETACHING;
323 return;
324 }
325
326 static __inline__ bool
ifbond_flags_lladdr(ifbond_ref ifb)327 ifbond_flags_lladdr(ifbond_ref ifb)
328 {
329 return (ifb->ifb_flags & IFBF_LLADDR) != 0;
330 }
331
332 static __inline__ bool
ifbond_flags_change_in_progress(ifbond_ref ifb)333 ifbond_flags_change_in_progress(ifbond_ref ifb)
334 {
335 return (ifb->ifb_flags & IFBF_CHANGE_IN_PROGRESS) != 0;
336 }
337
338 static __inline__ void
ifbond_flags_set_change_in_progress(ifbond_ref ifb)339 ifbond_flags_set_change_in_progress(ifbond_ref ifb)
340 {
341 ifb->ifb_flags |= IFBF_CHANGE_IN_PROGRESS;
342 return;
343 }
344
345 static __inline__ void
ifbond_flags_clear_change_in_progress(ifbond_ref ifb)346 ifbond_flags_clear_change_in_progress(ifbond_ref ifb)
347 {
348 ifb->ifb_flags &= ~IFBF_CHANGE_IN_PROGRESS;
349 return;
350 }
351
352 static __inline__ bool
ifbond_flags_promisc(ifbond_ref ifb)353 ifbond_flags_promisc(ifbond_ref ifb)
354 {
355 return (ifb->ifb_flags & IFBF_PROMISC) != 0;
356 }
357
358 static __inline__ void
ifbond_flags_set_promisc(ifbond_ref ifb)359 ifbond_flags_set_promisc(ifbond_ref ifb)
360 {
361 ifb->ifb_flags |= IFBF_PROMISC;
362 return;
363 }
364
365 static __inline__ void
ifbond_flags_clear_promisc(ifbond_ref ifb)366 ifbond_flags_clear_promisc(ifbond_ref ifb)
367 {
368 ifb->ifb_flags &= ~IFBF_PROMISC;
369 return;
370 }
371
372 /*
373 * bondport_ref->po_flags bits
374 */
375 #define BONDPORT_FLAGS_NTT 0x01
376 #define BONDPORT_FLAGS_READY 0x02
377 #define BONDPORT_FLAGS_SELECTED_CHANGED 0x04
378 #define BONDPORT_FLAGS_MUX_ATTACHED 0x08
379 #define BONDPORT_FLAGS_DISTRIBUTING 0x10
380 #define BONDPORT_FLAGS_UNUSED2 0x20
381 #define BONDPORT_FLAGS_UNUSED3 0x40
382 #define BONDPORT_FLAGS_UNUSED4 0x80
383
384 static __inline__ void
bondport_flags_set_ntt(bondport_ref p)385 bondport_flags_set_ntt(bondport_ref p)
386 {
387 p->po_flags |= BONDPORT_FLAGS_NTT;
388 return;
389 }
390
391 static __inline__ void
bondport_flags_clear_ntt(bondport_ref p)392 bondport_flags_clear_ntt(bondport_ref p)
393 {
394 p->po_flags &= ~BONDPORT_FLAGS_NTT;
395 return;
396 }
397
398 static __inline__ int
bondport_flags_ntt(bondport_ref p)399 bondport_flags_ntt(bondport_ref p)
400 {
401 return (p->po_flags & BONDPORT_FLAGS_NTT) != 0;
402 }
403
404 static __inline__ void
bondport_flags_set_ready(bondport_ref p)405 bondport_flags_set_ready(bondport_ref p)
406 {
407 p->po_flags |= BONDPORT_FLAGS_READY;
408 return;
409 }
410
411 static __inline__ void
bondport_flags_clear_ready(bondport_ref p)412 bondport_flags_clear_ready(bondport_ref p)
413 {
414 p->po_flags &= ~BONDPORT_FLAGS_READY;
415 return;
416 }
417
418 static __inline__ int
bondport_flags_ready(bondport_ref p)419 bondport_flags_ready(bondport_ref p)
420 {
421 return (p->po_flags & BONDPORT_FLAGS_READY) != 0;
422 }
423
424 static __inline__ void
bondport_flags_set_selected_changed(bondport_ref p)425 bondport_flags_set_selected_changed(bondport_ref p)
426 {
427 p->po_flags |= BONDPORT_FLAGS_SELECTED_CHANGED;
428 return;
429 }
430
431 static __inline__ void
bondport_flags_clear_selected_changed(bondport_ref p)432 bondport_flags_clear_selected_changed(bondport_ref p)
433 {
434 p->po_flags &= ~BONDPORT_FLAGS_SELECTED_CHANGED;
435 return;
436 }
437
438 static __inline__ int
bondport_flags_selected_changed(bondport_ref p)439 bondport_flags_selected_changed(bondport_ref p)
440 {
441 return (p->po_flags & BONDPORT_FLAGS_SELECTED_CHANGED) != 0;
442 }
443
444 static __inline__ void
bondport_flags_set_mux_attached(bondport_ref p)445 bondport_flags_set_mux_attached(bondport_ref p)
446 {
447 p->po_flags |= BONDPORT_FLAGS_MUX_ATTACHED;
448 return;
449 }
450
451 static __inline__ void
bondport_flags_clear_mux_attached(bondport_ref p)452 bondport_flags_clear_mux_attached(bondport_ref p)
453 {
454 p->po_flags &= ~BONDPORT_FLAGS_MUX_ATTACHED;
455 return;
456 }
457
458 static __inline__ int
bondport_flags_mux_attached(bondport_ref p)459 bondport_flags_mux_attached(bondport_ref p)
460 {
461 return (p->po_flags & BONDPORT_FLAGS_MUX_ATTACHED) != 0;
462 }
463
464 static __inline__ void
bondport_flags_set_distributing(bondport_ref p)465 bondport_flags_set_distributing(bondport_ref p)
466 {
467 p->po_flags |= BONDPORT_FLAGS_DISTRIBUTING;
468 return;
469 }
470
471 static __inline__ void
bondport_flags_clear_distributing(bondport_ref p)472 bondport_flags_clear_distributing(bondport_ref p)
473 {
474 p->po_flags &= ~BONDPORT_FLAGS_DISTRIBUTING;
475 return;
476 }
477
478 static __inline__ int
bondport_flags_distributing(bondport_ref p)479 bondport_flags_distributing(bondport_ref p)
480 {
481 return (p->po_flags & BONDPORT_FLAGS_DISTRIBUTING) != 0;
482 }
483
484 typedef struct bond_globals_s {
485 struct ifbond_list ifbond_list;
486 lacp_system system;
487 lacp_system_priority system_priority;
488 } * bond_globals_ref;
489
490 static bond_globals_ref g_bond;
491
492 /**
493 ** packet_buffer routines
494 ** - thin wrapper for mbuf
495 **/
496
497 typedef struct mbuf * packet_buffer_ref;
498
499 static packet_buffer_ref
packet_buffer_allocate(int length)500 packet_buffer_allocate(int length)
501 {
502 packet_buffer_ref m;
503 int size;
504
505 /* leave room for ethernet header */
506 size = length + sizeof(struct ether_header);
507 if (size > (int)MHLEN) {
508 if (size > (int)MCLBYTES) {
509 printf("bond: packet_buffer_allocate size %d > max %u\n",
510 size, MCLBYTES);
511 return NULL;
512 }
513 m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
514 } else {
515 m = m_gethdr(M_WAITOK, MT_DATA);
516 }
517 if (m == NULL) {
518 return NULL;
519 }
520 m->m_len = size;
521 m->m_pkthdr.len = size;
522 return m;
523 }
524
525 static void *
packet_buffer_byteptr(packet_buffer_ref buf)526 packet_buffer_byteptr(packet_buffer_ref buf)
527 {
528 return buf->m_data + sizeof(struct ether_header);
529 }
530
531 typedef enum {
532 LAEventStart,
533 LAEventTimeout,
534 LAEventPacket,
535 LAEventMediaChange,
536 LAEventSelectedChange,
537 LAEventPortMoved,
538 LAEventReady
539 } LAEvent;
540
541 /**
542 ** Receive machine
543 **/
544 static void
545 bondport_receive_machine(bondport_ref p, LAEvent event,
546 void * event_data);
547 /**
548 ** Periodic Transmission machine
549 **/
550 static void
551 bondport_periodic_transmit_machine(bondport_ref p, LAEvent event,
552 void * event_data);
553
554 /**
555 ** Transmit machine
556 **/
557 #define TRANSMIT_MACHINE_TX_IMMEDIATE ((void *)1)
558
559 static void
560 bondport_transmit_machine(bondport_ref p, LAEvent event,
561 void * event_data);
562
563 /**
564 ** Mux machine
565 **/
566 static void
567 bondport_mux_machine(bondport_ref p, LAEvent event,
568 void * event_data);
569
570 /**
571 ** bond, LAG
572 **/
573 static void
574 ifbond_activate_LAG(ifbond_ref bond, LAG_ref lag, int active_media);
575
576 static void
577 ifbond_deactivate_LAG(ifbond_ref bond, LAG_ref lag);
578
579 static int
580 ifbond_all_ports_ready(ifbond_ref bond);
581
582 static LAG_ref
583 ifbond_find_best_LAG(ifbond_ref bond, int * active_media);
584
585 static int
586 LAG_get_aggregatable_port_count(LAG_ref lag, int * active_media);
587
588 static int
589 ifbond_selection(ifbond_ref bond);
590
591 static void
592 bond_handle_event(struct ifnet * port_ifp, int event_code);
593
594 /**
595 ** bondport
596 **/
597
598 static void
599 bondport_receive_lacpdu(bondport_ref p, lacpdu_ref in_lacpdu_p);
600
601 static void
602 bondport_slow_proto_transmit(bondport_ref p, packet_buffer_ref buf);
603
604 static bondport_ref
605 bondport_create(struct ifnet * port_ifp, lacp_port_priority priority,
606 int active, int short_timeout, int * error);
607 static void
608 bondport_start(bondport_ref p);
609
610 static void
611 bondport_free(bondport_ref p);
612
613 static int
614 bondport_aggregatable(bondport_ref p);
615
616 static int
617 bondport_remove_from_LAG(bondport_ref p);
618
619 static void
620 bondport_set_selected(bondport_ref p, SelectedState s);
621
622 static int
623 bondport_matches_LAG(bondport_ref p, LAG_ref lag);
624
625 static void
626 bondport_link_status_changed(bondport_ref p);
627
628 static void
629 bondport_enable_distributing(bondport_ref p);
630
631 static void
632 bondport_disable_distributing(bondport_ref p);
633
634 static __inline__ int
bondport_collecting(bondport_ref p)635 bondport_collecting(bondport_ref p)
636 {
637 if (p->po_bond->ifb_mode == IF_BOND_MODE_LACP) {
638 return lacp_actor_partner_state_collecting(p->po_actor_state);
639 }
640 return TRUE;
641 }
642
643 /**
644 ** bond interface/dlil specific routines
645 **/
646 static int bond_clone_create(struct if_clone *, u_int32_t, void *);
647 static int bond_clone_destroy(struct ifnet *);
648 static int bond_output(struct ifnet *ifp, struct mbuf *m);
649 static int bond_ioctl(struct ifnet *ifp, u_long cmd, void * addr);
650 static int bond_set_bpf_tap(struct ifnet * ifp, bpf_tap_mode mode,
651 bpf_packet_func func);
652 static int bond_attach_protocol(struct ifnet *ifp);
653 static int bond_detach_protocol(struct ifnet *ifp);
654 static errno_t bond_iff_input(void *cookie, ifnet_t ifp,
655 protocol_family_t protocol, mbuf_t *data, char **frame_ptr);
656 static int bond_attach_filter(struct ifnet *ifp, interface_filter_t * filter_p);
657 static int bond_setmulti(struct ifnet *ifp);
658 static int bond_add_interface(struct ifnet * ifp, struct ifnet * port_ifp);
659 static int bond_remove_interface(ifbond_ref ifb, struct ifnet * port_ifp);
660 static void bond_if_free(struct ifnet * ifp);
661 static void interface_link_event(struct ifnet * ifp, u_int32_t event_code);
662
663 static struct if_clone bond_cloner = IF_CLONE_INITIALIZER(BONDNAME,
664 bond_clone_create,
665 bond_clone_destroy,
666 0,
667 BOND_MAXUNIT);
668
669 static int
siocsifmtu(struct ifnet * ifp,int mtu)670 siocsifmtu(struct ifnet * ifp, int mtu)
671 {
672 struct ifreq ifr;
673
674 bzero(&ifr, sizeof(ifr));
675 ifr.ifr_mtu = mtu;
676 return ifnet_ioctl(ifp, 0, SIOCSIFMTU, &ifr);
677 }
678
679 static int
siocgifdevmtu(struct ifnet * ifp,struct ifdevmtu * ifdm_p)680 siocgifdevmtu(struct ifnet * ifp, struct ifdevmtu * ifdm_p)
681 {
682 struct ifreq ifr;
683 int error;
684
685 bzero(&ifr, sizeof(ifr));
686 error = ifnet_ioctl(ifp, 0, SIOCGIFDEVMTU, &ifr);
687 if (error == 0) {
688 *ifdm_p = ifr.ifr_devmtu;
689 }
690 return error;
691 }
692
693 static __inline__ void
ether_addr_copy(void * dest,const void * source)694 ether_addr_copy(void * dest, const void * source)
695 {
696 bcopy(source, dest, ETHER_ADDR_LEN);
697 return;
698 }
699
700 static __inline__ void
ifbond_retain(ifbond_ref ifb)701 ifbond_retain(ifbond_ref ifb)
702 {
703 os_ref_retain(&ifb->ifb_retain_count);
704 }
705
706 static __inline__ void
ifbond_release(ifbond_ref ifb)707 ifbond_release(ifbond_ref ifb)
708 {
709 if (os_ref_release(&ifb->ifb_retain_count) != 0) {
710 return;
711 }
712
713 if (if_bond_debug) {
714 printf("ifbond_release(%s)\n", ifb->ifb_name);
715 }
716 if (ifb->ifb_ifma_slow_proto != NULL) {
717 if (if_bond_debug) {
718 printf("ifbond_release(%s) removing multicast\n",
719 ifb->ifb_name);
720 }
721 (void) if_delmulti_anon(ifb->ifb_ifma_slow_proto->ifma_ifp,
722 ifb->ifb_ifma_slow_proto->ifma_addr);
723 IFMA_REMREF(ifb->ifb_ifma_slow_proto);
724 }
725 kfree_type(bondport_ref, ifb->ifb_distributing_max,
726 ifb->ifb_distributing_array);
727 kfree_type(struct ifbond_s, ifb);
728 }
729
730 /*
731 * Function: ifbond_wait
732 * Purpose:
733 * Allows a single thread to gain exclusive access to the ifbond
734 * data structure. Some operations take a long time to complete,
735 * and some have side-effects that we can't predict. Holding the
736 * bond_lock() across such operations is not possible.
737 *
738 * For example:
739 * 1) The SIOCSIFLLADDR ioctl takes a long time (several seconds) to
740 * complete. Simply holding the bond_lock() would freeze all other
741 * data structure accesses during that time.
742 * 2) When we attach our protocol to the interface, a dlil event is
743 * generated and invokes our bond_event() function. bond_event()
744 * needs to take the bond_lock(), but we're already holding it, so
745 * we're deadlocked against ourselves.
746 * Notes:
747 * Before calling, you must be holding the bond_lock and have taken
748 * a reference on the ifbond_ref.
749 */
750 static void
ifbond_wait(ifbond_ref ifb,const char * msg)751 ifbond_wait(ifbond_ref ifb, const char * msg)
752 {
753 int waited = 0;
754
755 /* other add/remove in progress */
756 while (ifbond_flags_change_in_progress(ifb)) {
757 if (if_bond_debug) {
758 printf("%s: %s msleep\n", ifb->ifb_name, msg);
759 }
760 waited = 1;
761 (void)msleep(ifb, &bond_lck_mtx, PZERO, msg, 0);
762 }
763 /* prevent other bond list remove/add from taking place */
764 ifbond_flags_set_change_in_progress(ifb);
765 if (if_bond_debug && waited) {
766 printf("%s: %s woke up\n", ifb->ifb_name, msg);
767 }
768 return;
769 }
770
771 /*
772 * Function: ifbond_signal
773 * Purpose:
774 * Allows the thread that previously invoked ifbond_wait() to
775 * give up exclusive access to the ifbond data structure, and wake up
776 * any other threads waiting to access
777 * Notes:
778 * Before calling, you must be holding the bond_lock and have taken
779 * a reference on the ifbond_ref.
780 */
781 static void
ifbond_signal(ifbond_ref ifb,const char * msg)782 ifbond_signal(ifbond_ref ifb, const char * msg)
783 {
784 ifbond_flags_clear_change_in_progress(ifb);
785 wakeup((caddr_t)ifb);
786 if (if_bond_debug) {
787 printf("%s: %s wakeup\n", ifb->ifb_name, msg);
788 }
789 return;
790 }
791
792 /**
793 ** Media information
794 **/
795
796 static int
link_speed(int active)797 link_speed(int active)
798 {
799 switch (IFM_SUBTYPE(active)) {
800 case IFM_AUTO:
801 case IFM_MANUAL:
802 case IFM_NONE:
803 return 0;
804 case IFM_10_T:
805 case IFM_10_2:
806 case IFM_10_5:
807 case IFM_10_STP:
808 case IFM_10_FL:
809 return 10;
810 case IFM_100_TX:
811 case IFM_100_FX:
812 case IFM_100_T4:
813 case IFM_100_VG:
814 case IFM_100_T2:
815 return 100;
816 case IFM_1000_SX:
817 case IFM_1000_LX:
818 case IFM_1000_CX:
819 case IFM_1000_TX:
820 case IFM_1000_CX_SGMII:
821 case IFM_1000_KX:
822 return 1000;
823 case IFM_HPNA_1:
824 return 1;
825 default:
826 /* assume that new defined types are going to be at least 10GigE */
827 case IFM_10G_SR:
828 case IFM_10G_LR:
829 case IFM_10G_KX4:
830 case IFM_10G_KR:
831 case IFM_10G_CR1:
832 case IFM_10G_ER:
833 return 10000;
834 case IFM_2500_T:
835 return 2500;
836 case IFM_5000_T:
837 return 5000;
838 case IFM_20G_KR2:
839 return 20000;
840 case IFM_25G_CR:
841 case IFM_25G_KR:
842 case IFM_25G_SR:
843 case IFM_25G_LR:
844 return 25000;
845 case IFM_40G_CR4:
846 case IFM_40G_SR4:
847 case IFM_40G_LR4:
848 case IFM_40G_KR4:
849 return 40000;
850 case IFM_50G_CR2:
851 case IFM_50G_KR2:
852 case IFM_50G_SR2:
853 case IFM_50G_LR2:
854 return 50000;
855 case IFM_56G_R4:
856 return 56000;
857 case IFM_100G_CR4:
858 case IFM_100G_SR4:
859 case IFM_100G_KR4:
860 case IFM_100G_LR4:
861 return 100000;
862 }
863 }
864
865 static __inline__ int
media_active(const struct media_info * mi)866 media_active(const struct media_info * mi)
867 {
868 if ((mi->mi_status & IFM_AVALID) == 0) {
869 return 1;
870 }
871 return (mi->mi_status & IFM_ACTIVE) != 0;
872 }
873
874 static __inline__ int
media_full_duplex(const struct media_info * mi)875 media_full_duplex(const struct media_info * mi)
876 {
877 return (mi->mi_active & IFM_FDX) != 0;
878 }
879
880 static __inline__ int
media_type_unknown(const struct media_info * mi)881 media_type_unknown(const struct media_info * mi)
882 {
883 int unknown;
884
885 switch (IFM_SUBTYPE(mi->mi_active)) {
886 case IFM_AUTO:
887 case IFM_MANUAL:
888 case IFM_NONE:
889 unknown = 1;
890 break;
891 default:
892 unknown = 0;
893 break;
894 }
895 return unknown;
896 }
897
898 static __inline__ int
media_ok(const struct media_info * mi)899 media_ok(const struct media_info * mi)
900 {
901 return media_full_duplex(mi) || media_type_unknown(mi);
902 }
903
904 static __inline__ int
media_speed(const struct media_info * mi)905 media_speed(const struct media_info * mi)
906 {
907 return link_speed(mi->mi_active);
908 }
909
910 static struct media_info
interface_media_info(struct ifnet * ifp)911 interface_media_info(struct ifnet * ifp)
912 {
913 struct ifmediareq ifmr;
914 struct media_info mi;
915
916 bzero(&mi, sizeof(mi));
917 bzero(&ifmr, sizeof(ifmr));
918 if (ifnet_ioctl(ifp, 0, SIOCGIFMEDIA, &ifmr) == 0) {
919 if (ifmr.ifm_count != 0) {
920 mi.mi_status = ifmr.ifm_status;
921 mi.mi_active = ifmr.ifm_active;
922 }
923 }
924 return mi;
925 }
926
927 static int
if_siflladdr(struct ifnet * ifp,const struct ether_addr * ea_p)928 if_siflladdr(struct ifnet * ifp, const struct ether_addr * ea_p)
929 {
930 struct ifreq ifr;
931
932 /*
933 * XXX setting the sa_len to ETHER_ADDR_LEN is wrong, but the driver
934 * currently expects it that way
935 */
936 ifr.ifr_addr.sa_family = AF_UNSPEC;
937 ifr.ifr_addr.sa_len = ETHER_ADDR_LEN;
938 ether_addr_copy(ifr.ifr_addr.sa_data, ea_p);
939 return ifnet_ioctl(ifp, 0, SIOCSIFLLADDR, &ifr);
940 }
941
942 /**
943 ** bond_globals
944 **/
945 static bond_globals_ref
bond_globals_create(lacp_system_priority sys_pri,lacp_system_ref sys)946 bond_globals_create(lacp_system_priority sys_pri,
947 lacp_system_ref sys)
948 {
949 bond_globals_ref b;
950
951 b = kalloc_type(struct bond_globals_s, Z_WAITOK | Z_ZERO | Z_NOFAIL);
952 TAILQ_INIT(&b->ifbond_list);
953 b->system = *sys;
954 b->system_priority = sys_pri;
955 return b;
956 }
957
958 static int
bond_globals_init(void)959 bond_globals_init(void)
960 {
961 bond_globals_ref b;
962 int i;
963 struct ifnet * ifp;
964
965 bond_assert_lock_not_held();
966
967 if (g_bond != NULL) {
968 return 0;
969 }
970
971 /*
972 * use en0's ethernet address as the system identifier, and if it's not
973 * there, use en1 .. en3
974 */
975 ifp = NULL;
976 for (i = 0; i < 4; i++) {
977 char ifname[IFNAMSIZ + 1];
978 snprintf(ifname, sizeof(ifname), "en%d", i);
979 ifp = ifunit(ifname);
980 if (ifp != NULL) {
981 break;
982 }
983 }
984 b = NULL;
985 if (ifp != NULL) {
986 b = bond_globals_create(0x8000, (lacp_system_ref)IF_LLADDR(ifp));
987 }
988 bond_lock();
989 if (g_bond != NULL) {
990 bond_unlock();
991 kfree_type(struct bond_globals_s, b);
992 return 0;
993 }
994 g_bond = b;
995 bond_unlock();
996 if (ifp == NULL) {
997 return ENXIO;
998 }
999 if (b == NULL) {
1000 return ENOMEM;
1001 }
1002 return 0;
1003 }
1004
1005 static void
bond_bpf_vlan(struct ifnet * ifp,struct mbuf * m,const struct ether_header * eh_p,u_int16_t vlan_tag,bpf_packet_func func)1006 bond_bpf_vlan(struct ifnet * ifp, struct mbuf * m,
1007 const struct ether_header * eh_p,
1008 u_int16_t vlan_tag, bpf_packet_func func)
1009 {
1010 struct ether_vlan_header * vlh_p;
1011 struct mbuf * vl_m;
1012
1013 vl_m = m_get(M_DONTWAIT, MT_DATA);
1014 if (vl_m == NULL) {
1015 return;
1016 }
1017 /* populate a new mbuf containing the vlan ethernet header */
1018 vl_m->m_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1019 vlh_p = mtod(vl_m, struct ether_vlan_header *);
1020 bcopy(eh_p, vlh_p, offsetof(struct ether_header, ether_type));
1021 vlh_p->evl_encap_proto = htons(ETHERTYPE_VLAN);
1022 vlh_p->evl_tag = htons(vlan_tag);
1023 vlh_p->evl_proto = eh_p->ether_type;
1024 vl_m->m_next = m;
1025 (*func)(ifp, vl_m);
1026 vl_m->m_next = NULL;
1027 m_free(vl_m);
1028 return;
1029 }
1030
1031 static __inline__ void
bond_bpf_output(struct ifnet * ifp,struct mbuf * m,bpf_packet_func func)1032 bond_bpf_output(struct ifnet * ifp, struct mbuf * m,
1033 bpf_packet_func func)
1034 {
1035 if (func != NULL) {
1036 if (m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) {
1037 const struct ether_header * eh_p;
1038 eh_p = mtod(m, const struct ether_header *);
1039 m->m_data += ETHER_HDR_LEN;
1040 m->m_len -= ETHER_HDR_LEN;
1041 bond_bpf_vlan(ifp, m, eh_p, m->m_pkthdr.vlan_tag, func);
1042 m->m_data -= ETHER_HDR_LEN;
1043 m->m_len += ETHER_HDR_LEN;
1044 } else {
1045 (*func)(ifp, m);
1046 }
1047 }
1048 return;
1049 }
1050
1051 static __inline__ void
bond_bpf_input(ifnet_t ifp,mbuf_t m,const struct ether_header * eh_p,bpf_packet_func func)1052 bond_bpf_input(ifnet_t ifp, mbuf_t m, const struct ether_header * eh_p,
1053 bpf_packet_func func)
1054 {
1055 if (func != NULL) {
1056 if (m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) {
1057 bond_bpf_vlan(ifp, m, eh_p, m->m_pkthdr.vlan_tag, func);
1058 } else {
1059 /* restore the header */
1060 m->m_data -= ETHER_HDR_LEN;
1061 m->m_len += ETHER_HDR_LEN;
1062 (*func)(ifp, m);
1063 m->m_data += ETHER_HDR_LEN;
1064 m->m_len -= ETHER_HDR_LEN;
1065 }
1066 }
1067 return;
1068 }
1069
1070 /*
1071 * Function: bond_setmulti
1072 * Purpose:
1073 * Enable multicast reception on "our" interface by enabling multicasts on
1074 * each of the member ports.
1075 */
1076 static int
bond_setmulti(struct ifnet * ifp)1077 bond_setmulti(struct ifnet * ifp)
1078 {
1079 ifbond_ref ifb;
1080 int error;
1081 int result = 0;
1082 bondport_ref p;
1083
1084 bond_lock();
1085 ifb = ifnet_softc(ifp);
1086 if (ifb == NULL || ifbond_flags_if_detaching(ifb)
1087 || TAILQ_EMPTY(&ifb->ifb_port_list)) {
1088 bond_unlock();
1089 return 0;
1090 }
1091 ifbond_retain(ifb);
1092 ifbond_wait(ifb, "bond_setmulti");
1093
1094 if (ifbond_flags_if_detaching(ifb)) {
1095 /* someone destroyed the bond while we were waiting */
1096 result = EBUSY;
1097 goto signal_done;
1098 }
1099 bond_unlock();
1100
1101 /* ifbond_wait() let's us safely walk the list without holding the lock */
1102 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
1103 struct ifnet * port_ifp = p->po_ifp;
1104
1105 error = multicast_list_program(&p->po_multicast,
1106 ifp, port_ifp);
1107 if (error != 0) {
1108 printf("bond_setmulti(%s): "
1109 "multicast_list_program(%s%d) failed, %d\n",
1110 ifb->ifb_name, ifnet_name(port_ifp),
1111 ifnet_unit(port_ifp), error);
1112 result = error;
1113 }
1114 }
1115 bond_lock();
1116 signal_done:
1117 ifbond_signal(ifb, __func__);
1118 bond_unlock();
1119 ifbond_release(ifb);
1120 return result;
1121 }
1122
1123 static int
bond_clone_attach(void)1124 bond_clone_attach(void)
1125 {
1126 int error;
1127
1128 if ((error = if_clone_attach(&bond_cloner)) != 0) {
1129 return error;
1130 }
1131 return 0;
1132 }
1133
1134 static int
ifbond_add_slow_proto_multicast(ifbond_ref ifb)1135 ifbond_add_slow_proto_multicast(ifbond_ref ifb)
1136 {
1137 int error;
1138 struct ifmultiaddr * ifma = NULL;
1139 struct sockaddr_dl sdl;
1140
1141 bond_assert_lock_not_held();
1142
1143 bzero(&sdl, sizeof(sdl));
1144 sdl.sdl_len = sizeof(sdl);
1145 sdl.sdl_family = AF_LINK;
1146 sdl.sdl_type = IFT_ETHER;
1147 sdl.sdl_nlen = 0;
1148 sdl.sdl_alen = sizeof(slow_proto_multicast);
1149 bcopy(&slow_proto_multicast, sdl.sdl_data, sizeof(slow_proto_multicast));
1150 error = if_addmulti_anon(ifb->ifb_ifp, (struct sockaddr *)&sdl, &ifma);
1151 if (error == 0) {
1152 ifb->ifb_ifma_slow_proto = ifma;
1153 }
1154 return error;
1155 }
1156
1157 static int
bond_clone_create(struct if_clone * ifc,u_int32_t unit,__unused void * params)1158 bond_clone_create(struct if_clone * ifc, u_int32_t unit, __unused void *params)
1159 {
1160 int error;
1161 ifbond_ref ifb;
1162 ifnet_t ifp;
1163 struct ifnet_init_eparams bond_init;
1164
1165 error = bond_globals_init();
1166 if (error != 0) {
1167 return error;
1168 }
1169
1170 ifb = kalloc_type(struct ifbond_s, Z_WAITOK_ZERO_NOFAIL);
1171 os_ref_init(&ifb->ifb_retain_count, NULL);
1172 TAILQ_INIT(&ifb->ifb_port_list);
1173 TAILQ_INIT(&ifb->ifb_lag_list);
1174 ifb->ifb_key = unit + 1;
1175
1176 /* use the interface name as the unique id for ifp recycle */
1177 if ((u_int32_t)snprintf(ifb->ifb_name, sizeof(ifb->ifb_name), "%s%d",
1178 ifc->ifc_name, unit) >= sizeof(ifb->ifb_name)) {
1179 ifbond_release(ifb);
1180 return EINVAL;
1181 }
1182
1183 bzero(&bond_init, sizeof(bond_init));
1184 bond_init.ver = IFNET_INIT_CURRENT_VERSION;
1185 bond_init.len = sizeof(bond_init);
1186 bond_init.flags = IFNET_INIT_LEGACY;
1187 bond_init.uniqueid = ifb->ifb_name;
1188 bond_init.uniqueid_len = strlen(ifb->ifb_name);
1189 bond_init.name = ifc->ifc_name;
1190 bond_init.unit = unit;
1191 bond_init.family = IFNET_FAMILY_BOND;
1192 bond_init.type = IFT_IEEE8023ADLAG;
1193 bond_init.output = bond_output;
1194 bond_init.demux = ether_demux;
1195 bond_init.add_proto = ether_add_proto;
1196 bond_init.del_proto = ether_del_proto;
1197 bond_init.check_multi = ether_check_multi;
1198 bond_init.framer_extended = ether_frameout_extended;
1199 bond_init.ioctl = bond_ioctl;
1200 bond_init.set_bpf_tap = bond_set_bpf_tap;
1201 bond_init.detach = bond_if_free;
1202 bond_init.broadcast_addr = etherbroadcastaddr;
1203 bond_init.broadcast_len = ETHER_ADDR_LEN;
1204 bond_init.softc = ifb;
1205 error = ifnet_allocate_extended(&bond_init, &ifp);
1206
1207 if (error) {
1208 ifbond_release(ifb);
1209 return error;
1210 }
1211
1212 ifb->ifb_ifp = ifp;
1213 ifnet_set_offload(ifp, 0);
1214 ifnet_set_addrlen(ifp, ETHER_ADDR_LEN); /* XXX ethernet specific */
1215 ifnet_set_flags(ifp, IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX, 0xffff);
1216 ifnet_set_mtu(ifp, ETHERMTU);
1217
1218 error = ifnet_attach(ifp, NULL);
1219 if (error != 0) {
1220 ifnet_release(ifp);
1221 ifbond_release(ifb);
1222 return error;
1223 }
1224 error = ifbond_add_slow_proto_multicast(ifb);
1225 if (error != 0) {
1226 printf("bond_clone_create(%s): "
1227 "failed to add slow_proto multicast, %d\n",
1228 ifb->ifb_name, error);
1229 }
1230
1231 /* attach as ethernet */
1232 bpfattach(ifp, DLT_EN10MB, sizeof(struct ether_header));
1233
1234 bond_lock();
1235 TAILQ_INSERT_HEAD(&g_bond->ifbond_list, ifb, ifb_bond_list);
1236 bond_unlock();
1237
1238 return 0;
1239 }
1240
1241 static void
bond_remove_all_interfaces(ifbond_ref ifb)1242 bond_remove_all_interfaces(ifbond_ref ifb)
1243 {
1244 bondport_ref p;
1245
1246 bond_assert_lock_held();
1247
1248 /*
1249 * do this in reverse order to avoid re-programming the mac address
1250 * as each head interface is removed
1251 */
1252 while ((p = TAILQ_LAST(&ifb->ifb_port_list, port_list)) != NULL) {
1253 bond_remove_interface(ifb, p->po_ifp);
1254 }
1255 return;
1256 }
1257
1258 static void
bond_remove(ifbond_ref ifb)1259 bond_remove(ifbond_ref ifb)
1260 {
1261 bond_assert_lock_held();
1262 ifbond_flags_set_if_detaching(ifb);
1263 TAILQ_REMOVE(&g_bond->ifbond_list, ifb, ifb_bond_list);
1264 bond_remove_all_interfaces(ifb);
1265 return;
1266 }
1267
1268 static void
bond_if_detach(struct ifnet * ifp)1269 bond_if_detach(struct ifnet * ifp)
1270 {
1271 int error;
1272
1273 error = ifnet_detach(ifp);
1274 if (error) {
1275 printf("bond_if_detach %s%d: ifnet_detach failed, %d\n",
1276 ifnet_name(ifp), ifnet_unit(ifp), error);
1277 }
1278
1279 return;
1280 }
1281
1282 static int
bond_clone_destroy(struct ifnet * ifp)1283 bond_clone_destroy(struct ifnet * ifp)
1284 {
1285 ifbond_ref ifb;
1286
1287 bond_lock();
1288 ifb = ifnet_softc(ifp);
1289 if (ifb == NULL || ifnet_type(ifp) != IFT_IEEE8023ADLAG) {
1290 bond_unlock();
1291 return 0;
1292 }
1293 if (ifbond_flags_if_detaching(ifb)) {
1294 bond_unlock();
1295 return 0;
1296 }
1297 bond_remove(ifb);
1298 bond_unlock();
1299 bond_if_detach(ifp);
1300 return 0;
1301 }
1302
1303 static int
bond_set_bpf_tap(struct ifnet * ifp,bpf_tap_mode mode,bpf_packet_func func)1304 bond_set_bpf_tap(struct ifnet * ifp, bpf_tap_mode mode, bpf_packet_func func)
1305 {
1306 ifbond_ref ifb;
1307
1308 bond_lock();
1309 ifb = ifnet_softc(ifp);
1310 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
1311 bond_unlock();
1312 return ENODEV;
1313 }
1314 switch (mode) {
1315 case BPF_TAP_DISABLE:
1316 ifb->ifb_bpf_input = ifb->ifb_bpf_output = NULL;
1317 break;
1318
1319 case BPF_TAP_INPUT:
1320 ifb->ifb_bpf_input = func;
1321 break;
1322
1323 case BPF_TAP_OUTPUT:
1324 ifb->ifb_bpf_output = func;
1325 break;
1326
1327 case BPF_TAP_INPUT_OUTPUT:
1328 ifb->ifb_bpf_input = ifb->ifb_bpf_output = func;
1329 break;
1330 default:
1331 break;
1332 }
1333 bond_unlock();
1334 return 0;
1335 }
1336
1337 static uint32_t
ether_header_hash(struct ether_header * eh_p)1338 ether_header_hash(struct ether_header * eh_p)
1339 {
1340 uint32_t h;
1341
1342 /* get 32-bits from destination ether and ether type */
1343 h = (*((uint16_t *)&eh_p->ether_dhost[4]) << 16)
1344 | eh_p->ether_type;
1345 h ^= *((uint32_t *)&eh_p->ether_dhost[0]);
1346 return h;
1347 }
1348
1349 static struct mbuf *
S_mbuf_skip_to_offset(struct mbuf * m,int32_t * offset)1350 S_mbuf_skip_to_offset(struct mbuf * m, int32_t * offset)
1351 {
1352 int len;
1353
1354 len = m->m_len;
1355 while (*offset >= len) {
1356 *offset -= len;
1357 m = m->m_next;
1358 if (m == NULL) {
1359 break;
1360 }
1361 len = m->m_len;
1362 }
1363 return m;
1364 }
1365
1366 #if BYTE_ORDER == BIG_ENDIAN
1367 static __inline__ uint32_t
make_uint32(u_char c0,u_char c1,u_char c2,u_char c3)1368 make_uint32(u_char c0, u_char c1, u_char c2, u_char c3)
1369 {
1370 return ((uint32_t)c0 << 24) | ((uint32_t)c1 << 16)
1371 | ((uint32_t)c2 << 8) | (uint32_t)c3;
1372 }
1373 #else /* BYTE_ORDER == LITTLE_ENDIAN */
1374 static __inline__ uint32_t
make_uint32(u_char c0,u_char c1,u_char c2,u_char c3)1375 make_uint32(u_char c0, u_char c1, u_char c2, u_char c3)
1376 {
1377 return ((uint32_t)c3 << 24) | ((uint32_t)c2 << 16)
1378 | ((uint32_t)c1 << 8) | (uint32_t)c0;
1379 }
1380 #endif /* BYTE_ORDER == LITTLE_ENDIAN */
1381
1382 static int
S_mbuf_copy_uint32(struct mbuf * m,int32_t offset,uint32_t * val)1383 S_mbuf_copy_uint32(struct mbuf * m, int32_t offset, uint32_t * val)
1384 {
1385 struct mbuf * current;
1386 u_char * current_data;
1387 struct mbuf * next;
1388 u_char * next_data;
1389 int space_current;
1390
1391 current = S_mbuf_skip_to_offset(m, &offset);
1392 if (current == NULL) {
1393 return 1;
1394 }
1395 current_data = mtod(current, u_char *) + offset;
1396 space_current = current->m_len - offset;
1397 if (space_current >= (int)sizeof(uint32_t)) {
1398 *val = *((uint32_t *)current_data);
1399 return 0;
1400 }
1401 next = current->m_next;
1402 if (next == NULL || (next->m_len + space_current) < (int)sizeof(uint32_t)) {
1403 return 1;
1404 }
1405 next_data = mtod(next, u_char *);
1406 switch (space_current) {
1407 case 1:
1408 *val = make_uint32(current_data[0], next_data[0],
1409 next_data[1], next_data[2]);
1410 break;
1411 case 2:
1412 *val = make_uint32(current_data[0], current_data[1],
1413 next_data[0], next_data[1]);
1414 break;
1415 default:
1416 *val = make_uint32(current_data[0], current_data[1],
1417 current_data[2], next_data[0]);
1418 break;
1419 }
1420 return 0;
1421 }
1422
1423 #define IP_SRC_OFFSET (offsetof(struct ip, ip_src) - offsetof(struct ip, ip_p))
1424 #define IP_DST_OFFSET (offsetof(struct ip, ip_dst) - offsetof(struct ip, ip_p))
1425
1426 static uint32_t
ip_header_hash(struct mbuf * m)1427 ip_header_hash(struct mbuf * m)
1428 {
1429 u_char * data;
1430 struct in_addr ip_dst;
1431 struct in_addr ip_src;
1432 u_char ip_p;
1433 int32_t offset;
1434 struct mbuf * orig_m = m;
1435
1436 /* find the IP protocol field relative to the start of the packet */
1437 offset = offsetof(struct ip, ip_p) + sizeof(struct ether_header);
1438 m = S_mbuf_skip_to_offset(m, &offset);
1439 if (m == NULL || m->m_len < 1) {
1440 goto bad_ip_packet;
1441 }
1442 data = mtod(m, u_char *) + offset;
1443 ip_p = *data;
1444
1445 /* find the IP src relative to the IP protocol */
1446 if ((m->m_len - offset)
1447 >= (int)(IP_SRC_OFFSET + sizeof(struct in_addr) * 2)) {
1448 /* this should be the normal case */
1449 ip_src = *(struct in_addr *)(data + IP_SRC_OFFSET);
1450 ip_dst = *(struct in_addr *)(data + IP_DST_OFFSET);
1451 } else {
1452 if (S_mbuf_copy_uint32(m, offset + IP_SRC_OFFSET,
1453 (uint32_t *)&ip_src.s_addr)) {
1454 goto bad_ip_packet;
1455 }
1456 if (S_mbuf_copy_uint32(m, offset + IP_DST_OFFSET,
1457 (uint32_t *)&ip_dst.s_addr)) {
1458 goto bad_ip_packet;
1459 }
1460 }
1461 return ntohl(ip_dst.s_addr) ^ ntohl(ip_src.s_addr) ^ ((uint32_t)ip_p);
1462
1463 bad_ip_packet:
1464 return ether_header_hash(mtod(orig_m, struct ether_header *));
1465 }
1466
1467 #define IP6_ADDRS_LEN (sizeof(struct in6_addr) * 2)
1468 static uint32_t
ipv6_header_hash(struct mbuf * m)1469 ipv6_header_hash(struct mbuf * m)
1470 {
1471 u_char * data;
1472 int i;
1473 int32_t offset;
1474 struct mbuf * orig_m = m;
1475 uint32_t * scan;
1476 uint32_t val;
1477
1478 /* find the IP protocol field relative to the start of the packet */
1479 offset = offsetof(struct ip6_hdr, ip6_src) + sizeof(struct ether_header);
1480 m = S_mbuf_skip_to_offset(m, &offset);
1481 if (m == NULL) {
1482 goto bad_ipv6_packet;
1483 }
1484 data = mtod(m, u_char *) + offset;
1485 val = 0;
1486 if ((m->m_len - offset) >= (int)IP6_ADDRS_LEN) {
1487 /* this should be the normal case */
1488 for (i = 0, scan = (uint32_t *)data;
1489 i < (int)(IP6_ADDRS_LEN / sizeof(uint32_t));
1490 i++, scan++) {
1491 val ^= *scan;
1492 }
1493 } else {
1494 for (i = 0; i < (int)(IP6_ADDRS_LEN / sizeof(uint32_t)); i++) {
1495 uint32_t tmp;
1496 if (S_mbuf_copy_uint32(m, offset + i * sizeof(uint32_t),
1497 (uint32_t *)&tmp)) {
1498 goto bad_ipv6_packet;
1499 }
1500 val ^= tmp;
1501 }
1502 }
1503 return ntohl(val);
1504
1505 bad_ipv6_packet:
1506 return ether_header_hash(mtod(orig_m, struct ether_header *));
1507 }
1508
1509 static int
bond_output(struct ifnet * ifp,struct mbuf * m)1510 bond_output(struct ifnet * ifp, struct mbuf * m)
1511 {
1512 bpf_packet_func bpf_func;
1513 uint32_t h;
1514 ifbond_ref ifb;
1515 struct ifnet * port_ifp = NULL;
1516 int err;
1517 struct flowadv adv = { .code = FADV_SUCCESS };
1518
1519 if (m == 0) {
1520 return 0;
1521 }
1522 if ((m->m_flags & M_PKTHDR) == 0) {
1523 m_freem(m);
1524 return 0;
1525 }
1526 if (m->m_pkthdr.pkt_flowid != 0) {
1527 h = m->m_pkthdr.pkt_flowid;
1528 } else {
1529 struct ether_header * eh_p;
1530
1531 eh_p = mtod(m, struct ether_header *);
1532 switch (ntohs(eh_p->ether_type)) {
1533 case ETHERTYPE_IP:
1534 h = ip_header_hash(m);
1535 break;
1536 case ETHERTYPE_IPV6:
1537 h = ipv6_header_hash(m);
1538 break;
1539 default:
1540 h = ether_header_hash(eh_p);
1541 break;
1542 }
1543 }
1544 bond_lock();
1545 ifb = ifnet_softc(ifp);
1546 if (ifb == NULL || ifbond_flags_if_detaching(ifb)
1547 || ifb->ifb_distributing_count == 0) {
1548 goto done;
1549 }
1550 h %= ifb->ifb_distributing_count;
1551 port_ifp = ifb->ifb_distributing_array[h]->po_ifp;
1552 bpf_func = ifb->ifb_bpf_output;
1553 bond_unlock();
1554
1555 if (m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) {
1556 (void)ifnet_stat_increment_out(ifp, 1,
1557 m->m_pkthdr.len + ETHER_VLAN_ENCAP_LEN,
1558 0);
1559 } else {
1560 (void)ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0);
1561 }
1562 bond_bpf_output(ifp, m, bpf_func);
1563
1564 err = dlil_output(port_ifp, PF_BOND, m, NULL, NULL, 1, &adv);
1565
1566 if (err == 0) {
1567 if (adv.code == FADV_FLOW_CONTROLLED) {
1568 err = EQFULL;
1569 } else if (adv.code == FADV_SUSPENDED) {
1570 err = EQSUSPENDED;
1571 }
1572 }
1573
1574 return err;
1575
1576 done:
1577 bond_unlock();
1578 m_freem(m);
1579 return 0;
1580 }
1581
1582 static bondport_ref
ifbond_lookup_port(ifbond_ref ifb,struct ifnet * port_ifp)1583 ifbond_lookup_port(ifbond_ref ifb, struct ifnet * port_ifp)
1584 {
1585 bondport_ref p;
1586 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
1587 if (p->po_ifp == port_ifp) {
1588 return p;
1589 }
1590 }
1591 return NULL;
1592 }
1593
1594 static bondport_ref
bond_lookup_port(struct ifnet * port_ifp)1595 bond_lookup_port(struct ifnet * port_ifp)
1596 {
1597 ifbond_ref ifb;
1598 bondport_ref port;
1599
1600 TAILQ_FOREACH(ifb, &g_bond->ifbond_list, ifb_bond_list) {
1601 port = ifbond_lookup_port(ifb, port_ifp);
1602 if (port != NULL) {
1603 return port;
1604 }
1605 }
1606 return NULL;
1607 }
1608
1609 static void
bond_receive_lacpdu(struct mbuf * m,struct ifnet * port_ifp)1610 bond_receive_lacpdu(struct mbuf * m, struct ifnet * port_ifp)
1611 {
1612 struct ifnet * bond_ifp = NULL;
1613 ifbond_ref ifb;
1614 int event_code = 0;
1615 bool need_link_update = false;
1616 bondport_ref p;
1617
1618 bond_lock();
1619 if ((ifnet_eflags(port_ifp) & IFEF_BOND) == 0) {
1620 goto done;
1621 }
1622 p = bond_lookup_port(port_ifp);
1623 if (p == NULL) {
1624 goto done;
1625 }
1626 if (p->po_enabled == 0) {
1627 goto done;
1628 }
1629 ifb = p->po_bond;
1630 if (ifb->ifb_mode != IF_BOND_MODE_LACP) {
1631 goto done;
1632 }
1633 /*
1634 * Work-around for rdar://problem/51372042
1635 * Sometimes, the link comes up but the driver doesn't report the
1636 * negotiated medium at that time. When we receive an LACPDU packet,
1637 * and the medium is unknown, force a link status check. Don't force
1638 * the link status check more often than _FORCE_LINK_EVENT_INTERVAL
1639 * seconds.
1640 */
1641 #define _FORCE_LINK_EVENT_INTERVAL 1
1642 if (media_type_unknown(&p->po_media_info)) {
1643 uint64_t now = net_uptime();
1644
1645 if ((now - p->po_force_link_event_time) >=
1646 _FORCE_LINK_EVENT_INTERVAL) {
1647 need_link_update = true;
1648 p->po_force_link_event_time = now;
1649 }
1650 }
1651 bondport_receive_lacpdu(p, (lacpdu_ref)m->m_data);
1652 if (ifbond_selection(ifb)) {
1653 event_code = (ifb->ifb_active_lag == NULL)
1654 ? KEV_DL_LINK_OFF
1655 : KEV_DL_LINK_ON;
1656 /* XXX need to take a reference on bond_ifp */
1657 bond_ifp = ifb->ifb_ifp;
1658 ifb->ifb_last_link_event = event_code;
1659 } else {
1660 event_code = (ifb->ifb_active_lag == NULL)
1661 ? KEV_DL_LINK_OFF
1662 : KEV_DL_LINK_ON;
1663 if (event_code != ifb->ifb_last_link_event) {
1664 if (if_bond_debug) {
1665 timestamp_printf("%s: (receive) generating LINK event\n",
1666 ifb->ifb_name);
1667 }
1668 bond_ifp = ifb->ifb_ifp;
1669 ifb->ifb_last_link_event = event_code;
1670 }
1671 }
1672
1673 done:
1674 bond_unlock();
1675 if (bond_ifp != NULL) {
1676 interface_link_event(bond_ifp, event_code);
1677 }
1678 m_freem(m);
1679 if (need_link_update) {
1680 if (if_bond_debug != 0) {
1681 printf("bond: simulating link status changed event");
1682 }
1683 bond_handle_event(port_ifp, KEV_DL_LINK_ON);
1684 }
1685 return;
1686 }
1687
1688 static void
bond_receive_la_marker_pdu(struct mbuf * m,struct ifnet * port_ifp)1689 bond_receive_la_marker_pdu(struct mbuf * m, struct ifnet * port_ifp)
1690 {
1691 la_marker_pdu_ref marker_p;
1692 bondport_ref p;
1693
1694 marker_p = (la_marker_pdu_ref)(m->m_data + ETHER_HDR_LEN);
1695 if (marker_p->lm_marker_tlv_type != LA_MARKER_TLV_TYPE_MARKER) {
1696 goto failed;
1697 }
1698 bond_lock();
1699 if ((ifnet_eflags(port_ifp) & IFEF_BOND) == 0) {
1700 bond_unlock();
1701 goto failed;
1702 }
1703 p = bond_lookup_port(port_ifp);
1704 if (p == NULL || p->po_enabled == 0
1705 || p->po_bond->ifb_mode != IF_BOND_MODE_LACP) {
1706 bond_unlock();
1707 goto failed;
1708 }
1709 /* echo back the same packet as a marker response */
1710 marker_p->lm_marker_tlv_type = LA_MARKER_TLV_TYPE_MARKER_RESPONSE;
1711 bondport_slow_proto_transmit(p, (packet_buffer_ref)m);
1712 bond_unlock();
1713 return;
1714
1715 failed:
1716 m_freem(m);
1717 return;
1718 }
1719
1720 static void
bond_input(ifnet_t port_ifp,mbuf_t m,char * frame_header)1721 bond_input(ifnet_t port_ifp, mbuf_t m, char *frame_header)
1722 {
1723 bpf_packet_func bpf_func;
1724 const struct ether_header * eh_p;
1725 ifbond_ref ifb;
1726 struct ifnet * ifp;
1727 bondport_ref p;
1728
1729 eh_p = (const struct ether_header *)frame_header;
1730 if ((m->m_flags & M_MCAST) != 0
1731 && bcmp(eh_p->ether_dhost, &slow_proto_multicast,
1732 sizeof(eh_p->ether_dhost)) == 0
1733 && ntohs(eh_p->ether_type) == IEEE8023AD_SLOW_PROTO_ETHERTYPE) {
1734 u_char subtype = *mtod(m, u_char *);
1735
1736 if (subtype == IEEE8023AD_SLOW_PROTO_SUBTYPE_LACP) {
1737 if (m->m_pkthdr.len < (int)offsetof(lacpdu, la_reserved)) {
1738 m_freem(m);
1739 return;
1740 }
1741 /* send to lacp */
1742 if (m->m_len < (int)offsetof(lacpdu, la_reserved)) {
1743 m = m_pullup(m, offsetof(lacpdu, la_reserved));
1744 if (m == NULL) {
1745 return;
1746 }
1747 }
1748 bond_receive_lacpdu(m, port_ifp);
1749 return;
1750 } else if (subtype == IEEE8023AD_SLOW_PROTO_SUBTYPE_LA_MARKER_PROTOCOL) {
1751 int min_size;
1752
1753 /* restore the ethernet header pointer in the mbuf */
1754 m->m_pkthdr.len += ETHER_HDR_LEN;
1755 m->m_data -= ETHER_HDR_LEN;
1756 m->m_len += ETHER_HDR_LEN;
1757 min_size = ETHER_HDR_LEN + offsetof(la_marker_pdu, lm_reserved);
1758 if (m->m_pkthdr.len < min_size) {
1759 m_freem(m);
1760 return;
1761 }
1762 /* send to lacp */
1763 if (m->m_len < min_size) {
1764 m = m_pullup(m, min_size);
1765 if (m == NULL) {
1766 return;
1767 }
1768 }
1769 /* send to marker responder */
1770 bond_receive_la_marker_pdu(m, port_ifp);
1771 return;
1772 } else if (subtype == 0
1773 || subtype > IEEE8023AD_SLOW_PROTO_SUBTYPE_RESERVED_END) {
1774 /* invalid subtype, discard the frame */
1775 m_freem(m);
1776 return;
1777 }
1778 }
1779 bond_lock();
1780 if ((ifnet_eflags(port_ifp) & IFEF_BOND) == 0) {
1781 goto done;
1782 }
1783 p = bond_lookup_port(port_ifp);
1784 if (p == NULL || bondport_collecting(p) == 0) {
1785 goto done;
1786 }
1787
1788 ifb = p->po_bond;
1789 ifp = ifb->ifb_ifp;
1790 bpf_func = ifb->ifb_bpf_input;
1791 bond_unlock();
1792
1793 /*
1794 * Need to clear the promiscous flags otherwise it will be
1795 * dropped by DLIL after processing filters
1796 */
1797 if ((mbuf_flags(m) & MBUF_PROMISC)) {
1798 mbuf_setflags_mask(m, 0, MBUF_PROMISC);
1799 }
1800
1801 if (m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) {
1802 (void)ifnet_stat_increment_in(ifp, 1,
1803 (m->m_pkthdr.len + ETHER_HDR_LEN
1804 + ETHER_VLAN_ENCAP_LEN), 0);
1805 } else {
1806 (void)ifnet_stat_increment_in(ifp, 1,
1807 (m->m_pkthdr.len + ETHER_HDR_LEN), 0);
1808 }
1809
1810 /* make the packet appear as if it arrived on the bonded interface */
1811 m->m_pkthdr.rcvif = ifp;
1812 bond_bpf_input(ifp, m, eh_p, bpf_func);
1813 m->m_pkthdr.pkt_hdr = frame_header;
1814 dlil_input_packet_list(ifp, m);
1815 return;
1816
1817 done:
1818 bond_unlock();
1819 m_freem(m);
1820 return;
1821 }
1822
1823 static errno_t
bond_iff_input(void * cookie,ifnet_t port_ifp,protocol_family_t protocol,mbuf_t * data,char ** frame_header_ptr)1824 bond_iff_input(void *cookie, ifnet_t port_ifp, protocol_family_t protocol,
1825 mbuf_t *data, char **frame_header_ptr)
1826 {
1827 #pragma unused(cookie)
1828 #pragma unused(protocol)
1829 mbuf_t m = *data;
1830 char * frame_header = *frame_header_ptr;
1831
1832 bond_input(port_ifp, m, frame_header);
1833 return EJUSTRETURN;
1834 }
1835
1836 static __inline__ const char *
bondport_get_name(bondport_ref p)1837 bondport_get_name(bondport_ref p)
1838 {
1839 return p->po_name;
1840 }
1841
1842 static __inline__ int
bondport_get_index(bondport_ref p)1843 bondport_get_index(bondport_ref p)
1844 {
1845 return ifnet_index(p->po_ifp);
1846 }
1847
1848 static void
bondport_slow_proto_transmit(bondport_ref p,packet_buffer_ref buf)1849 bondport_slow_proto_transmit(bondport_ref p, packet_buffer_ref buf)
1850 {
1851 struct ether_header * eh_p;
1852 int error;
1853
1854 /* packet_buffer_allocate leaves room for ethernet header */
1855 eh_p = mtod(buf, struct ether_header *);
1856 bcopy(&slow_proto_multicast, &eh_p->ether_dhost, sizeof(eh_p->ether_dhost));
1857 bcopy(&p->po_saved_addr, eh_p->ether_shost, sizeof(eh_p->ether_shost));
1858 eh_p->ether_type = htons(IEEE8023AD_SLOW_PROTO_ETHERTYPE);
1859 error = ifnet_output_raw(p->po_ifp, PF_BOND, buf);
1860 if (error != 0) {
1861 printf("bondport_slow_proto_transmit(%s) failed %d\n",
1862 bondport_get_name(p), error);
1863 }
1864 return;
1865 }
1866
1867 static void
bondport_timer_process_func(devtimer_ref timer,devtimer_process_func_event event)1868 bondport_timer_process_func(devtimer_ref timer,
1869 devtimer_process_func_event event)
1870 {
1871 bondport_ref p;
1872
1873 switch (event) {
1874 case devtimer_process_func_event_lock:
1875 bond_lock();
1876 devtimer_retain(timer);
1877 break;
1878 case devtimer_process_func_event_unlock:
1879 if (devtimer_valid(timer)) {
1880 /* as long as the devtimer is valid, we can look at arg0 */
1881 int event_code = 0;
1882 struct ifnet * bond_ifp = NULL;
1883
1884 p = (bondport_ref)devtimer_arg0(timer);
1885 if (ifbond_selection(p->po_bond)) {
1886 event_code = (p->po_bond->ifb_active_lag == NULL)
1887 ? KEV_DL_LINK_OFF
1888 : KEV_DL_LINK_ON;
1889 /* XXX need to take a reference on bond_ifp */
1890 bond_ifp = p->po_bond->ifb_ifp;
1891 p->po_bond->ifb_last_link_event = event_code;
1892 } else {
1893 event_code = (p->po_bond->ifb_active_lag == NULL)
1894 ? KEV_DL_LINK_OFF
1895 : KEV_DL_LINK_ON;
1896 if (event_code != p->po_bond->ifb_last_link_event) {
1897 if (if_bond_debug) {
1898 timestamp_printf("%s: (timer) generating LINK event\n",
1899 p->po_bond->ifb_name);
1900 }
1901 bond_ifp = p->po_bond->ifb_ifp;
1902 p->po_bond->ifb_last_link_event = event_code;
1903 }
1904 }
1905 devtimer_release(timer);
1906 bond_unlock();
1907 if (bond_ifp != NULL) {
1908 interface_link_event(bond_ifp, event_code);
1909 }
1910 } else {
1911 /* timer is going away */
1912 devtimer_release(timer);
1913 bond_unlock();
1914 }
1915 break;
1916 default:
1917 break;
1918 }
1919 }
1920
1921 static bondport_ref
bondport_create(struct ifnet * port_ifp,lacp_port_priority priority,int active,int short_timeout,int * ret_error)1922 bondport_create(struct ifnet * port_ifp, lacp_port_priority priority,
1923 int active, int short_timeout, int * ret_error)
1924 {
1925 int error = 0;
1926 bondport_ref p = NULL;
1927 lacp_actor_partner_state s;
1928
1929 *ret_error = 0;
1930 p = kalloc_type(struct bondport_s, Z_WAITOK | Z_ZERO | Z_NOFAIL);
1931 multicast_list_init(&p->po_multicast);
1932 if ((u_int32_t)snprintf(p->po_name, sizeof(p->po_name), "%s%d",
1933 ifnet_name(port_ifp), ifnet_unit(port_ifp))
1934 >= sizeof(p->po_name)) {
1935 printf("if_bond: name too large\n");
1936 *ret_error = EINVAL;
1937 goto failed;
1938 }
1939 error = siocgifdevmtu(port_ifp, &p->po_devmtu);
1940 if (error != 0) {
1941 printf("if_bond: SIOCGIFDEVMTU %s failed, %d\n",
1942 bondport_get_name(p), error);
1943 goto failed;
1944 }
1945 /* remember the current interface MTU so it can be restored */
1946 p->po_devmtu.ifdm_current = ifnet_mtu(port_ifp);
1947 p->po_ifp = port_ifp;
1948 p->po_media_info = interface_media_info(port_ifp);
1949 p->po_current_while_timer = devtimer_create(bondport_timer_process_func, p);
1950 if (p->po_current_while_timer == NULL) {
1951 *ret_error = ENOMEM;
1952 goto failed;
1953 }
1954 p->po_periodic_timer = devtimer_create(bondport_timer_process_func, p);
1955 if (p->po_periodic_timer == NULL) {
1956 *ret_error = ENOMEM;
1957 goto failed;
1958 }
1959 p->po_wait_while_timer = devtimer_create(bondport_timer_process_func, p);
1960 if (p->po_wait_while_timer == NULL) {
1961 *ret_error = ENOMEM;
1962 goto failed;
1963 }
1964 p->po_transmit_timer = devtimer_create(bondport_timer_process_func, p);
1965 if (p->po_transmit_timer == NULL) {
1966 *ret_error = ENOMEM;
1967 goto failed;
1968 }
1969 p->po_receive_state = ReceiveState_none;
1970 p->po_mux_state = MuxState_none;
1971 p->po_priority = priority;
1972 s = 0;
1973 s = lacp_actor_partner_state_set_aggregatable(s);
1974 if (short_timeout) {
1975 s = lacp_actor_partner_state_set_short_timeout(s);
1976 }
1977 if (active) {
1978 s = lacp_actor_partner_state_set_active_lacp(s);
1979 }
1980 p->po_actor_state = s;
1981 return p;
1982
1983 failed:
1984 bondport_free(p);
1985 return NULL;
1986 }
1987
1988 static void
bondport_start(bondport_ref p)1989 bondport_start(bondport_ref p)
1990 {
1991 bondport_receive_machine(p, LAEventStart, NULL);
1992 bondport_mux_machine(p, LAEventStart, NULL);
1993 bondport_periodic_transmit_machine(p, LAEventStart, NULL);
1994 bondport_transmit_machine(p, LAEventStart, NULL);
1995 return;
1996 }
1997
1998 /*
1999 * Function: bondport_invalidate_timers
2000 * Purpose:
2001 * Invalidate all of the timers for the bondport.
2002 */
2003 static void
bondport_invalidate_timers(bondport_ref p)2004 bondport_invalidate_timers(bondport_ref p)
2005 {
2006 devtimer_invalidate(p->po_current_while_timer);
2007 devtimer_invalidate(p->po_periodic_timer);
2008 devtimer_invalidate(p->po_wait_while_timer);
2009 devtimer_invalidate(p->po_transmit_timer);
2010 }
2011
2012 /*
2013 * Function: bondport_cancel_timers
2014 * Purpose:
2015 * Cancel all of the timers for the bondport.
2016 */
2017 static void
bondport_cancel_timers(bondport_ref p)2018 bondport_cancel_timers(bondport_ref p)
2019 {
2020 devtimer_cancel(p->po_current_while_timer);
2021 devtimer_cancel(p->po_periodic_timer);
2022 devtimer_cancel(p->po_wait_while_timer);
2023 devtimer_cancel(p->po_transmit_timer);
2024 }
2025
2026 static void
bondport_free(bondport_ref p)2027 bondport_free(bondport_ref p)
2028 {
2029 multicast_list_remove(&p->po_multicast);
2030 devtimer_release(p->po_current_while_timer);
2031 devtimer_release(p->po_periodic_timer);
2032 devtimer_release(p->po_wait_while_timer);
2033 devtimer_release(p->po_transmit_timer);
2034 kfree_type(struct bondport_s, p);
2035 return;
2036 }
2037
2038 static __inline__ int
bond_device_mtu(struct ifnet * ifp,ifbond_ref ifb)2039 bond_device_mtu(struct ifnet * ifp, ifbond_ref ifb)
2040 {
2041 return ((int)ifnet_mtu(ifp) > ifb->ifb_altmtu)
2042 ? (int)ifnet_mtu(ifp) : ifb->ifb_altmtu;
2043 }
2044
2045 static int
bond_add_interface(struct ifnet * ifp,struct ifnet * port_ifp)2046 bond_add_interface(struct ifnet * ifp, struct ifnet * port_ifp)
2047 {
2048 u_int32_t eflags;
2049 uint32_t control_flags = 0;
2050 int devmtu;
2051 int error = 0;
2052 int event_code = 0;
2053 interface_filter_t filter = NULL;
2054 int first = FALSE;
2055 ifbond_ref ifb;
2056 bondport_ref * new_array = NULL;
2057 bondport_ref * old_array = NULL;
2058 bondport_ref p;
2059 int old_max = 0;
2060 int new_max = 0;
2061
2062 if (IFNET_IS_INTCOPROC(port_ifp) || IFNET_IS_MANAGEMENT(port_ifp)) {
2063 return EINVAL;
2064 }
2065
2066 /* pre-allocate space for new port */
2067 p = bondport_create(port_ifp, 0x8000, 1, 0, &error);
2068 if (p == NULL) {
2069 return error;
2070 }
2071 bond_lock();
2072 ifb = (ifbond_ref)ifnet_softc(ifp);
2073 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
2074 bond_unlock();
2075 bondport_free(p);
2076 return ifb == NULL ? EOPNOTSUPP : EBUSY;
2077 }
2078
2079 /* make sure this interface can handle our current MTU */
2080 devmtu = bond_device_mtu(ifp, ifb);
2081 if (devmtu != 0
2082 && (devmtu > p->po_devmtu.ifdm_max || devmtu < p->po_devmtu.ifdm_min)) {
2083 bond_unlock();
2084 printf("if_bond: interface %s doesn't support mtu %d",
2085 bondport_get_name(p), devmtu);
2086 bondport_free(p);
2087 return EINVAL;
2088 }
2089
2090 /* make sure ifb doesn't get de-allocated while we wait */
2091 ifbond_retain(ifb);
2092
2093 /* wait for other add or remove to complete */
2094 ifbond_wait(ifb, __func__);
2095
2096 if (ifbond_flags_if_detaching(ifb)) {
2097 /* someone destroyed the bond while we were waiting */
2098 error = EBUSY;
2099 goto signal_done;
2100 }
2101 if (bond_lookup_port(port_ifp) != NULL) {
2102 /* port is already part of a bond */
2103 error = EBUSY;
2104 goto signal_done;
2105 }
2106 if ((ifnet_eflags(port_ifp) & (IFEF_VLAN | IFEF_BOND)) != 0) {
2107 /* interface already has VLAN's, or is part of bond */
2108 error = EBUSY;
2109 goto signal_done;
2110 }
2111
2112 /* mark the interface busy */
2113 eflags = if_set_eflags(port_ifp, IFEF_BOND);
2114 if ((eflags & IFEF_VLAN) != 0) {
2115 /* vlan got in ahead of us */
2116 if_clear_eflags(port_ifp, IFEF_BOND);
2117 error = EBUSY;
2118 goto signal_done;
2119 }
2120
2121 if (TAILQ_EMPTY(&ifb->ifb_port_list)) {
2122 ifnet_set_offload(ifp, ifnet_offload(port_ifp));
2123 ifnet_set_flags(ifp, IFF_RUNNING, IFF_RUNNING);
2124 if (ifbond_flags_lladdr(ifb) == FALSE) {
2125 first = TRUE;
2126 }
2127 } else {
2128 ifnet_offload_t ifp_offload;
2129 ifnet_offload_t port_ifp_offload;
2130
2131 ifp_offload = ifnet_offload(ifp);
2132 port_ifp_offload = ifnet_offload(port_ifp);
2133 if (ifp_offload != port_ifp_offload) {
2134 ifnet_offload_t offload;
2135
2136 offload = ifp_offload & port_ifp_offload;
2137 printf("%s(%s, %s) "
2138 "hwassist values don't match 0x%x != 0x%x, using 0x%x instead\n",
2139 __func__,
2140 ifb->ifb_name, bondport_get_name(p),
2141 ifp_offload, port_ifp_offload, offload);
2142 /*
2143 * XXX
2144 * if the bond has VLAN's, we can't simply change the hwassist
2145 * field behind its back: this needs work
2146 */
2147 ifnet_set_offload(ifp, offload);
2148 }
2149 }
2150 p->po_bond = ifb;
2151
2152 /* remember the port's ethernet address so it can be restored */
2153 ether_addr_copy(&p->po_saved_addr, IF_LLADDR(port_ifp));
2154
2155 /* add it to the list of ports */
2156 TAILQ_INSERT_TAIL(&ifb->ifb_port_list, p, po_port_list);
2157 ifb->ifb_port_count++;
2158
2159 bond_unlock();
2160
2161
2162 /* first port added to bond determines bond's ethernet address */
2163 if (first) {
2164 ifnet_set_lladdr_and_type(ifp, IF_LLADDR(port_ifp), ETHER_ADDR_LEN,
2165 IFT_ETHER);
2166 }
2167 uint32_bit_set(&control_flags, PORT_CONTROL_FLAGS_IN_LIST);
2168
2169 /* allocate a larger distributing array */
2170 new_max = ifb->ifb_port_count;
2171 new_array = kalloc_type(bondport_ref, new_max, Z_WAITOK);
2172 if (new_array == NULL) {
2173 error = ENOMEM;
2174 goto failed;
2175 }
2176
2177 /* attach our BOND "protocol" to the interface */
2178 error = bond_attach_protocol(port_ifp);
2179 if (error) {
2180 goto failed;
2181 }
2182 uint32_bit_set(&control_flags, PORT_CONTROL_FLAGS_PROTO_ATTACHED);
2183
2184 /* attach our BOND interface filter */
2185 error = bond_attach_filter(port_ifp, &filter);
2186 if (error != 0) {
2187 goto failed;
2188 }
2189 uint32_bit_set(&control_flags, PORT_CONTROL_FLAGS_FILTER_ATTACHED);
2190
2191 /* set the interface MTU */
2192 devmtu = bond_device_mtu(ifp, ifb);
2193 error = siocsifmtu(port_ifp, devmtu);
2194 if (error != 0) {
2195 printf("%s(%s, %s):"
2196 " SIOCSIFMTU %d failed %d\n",
2197 __func__,
2198 ifb->ifb_name, bondport_get_name(p), devmtu, error);
2199 goto failed;
2200 }
2201 uint32_bit_set(&control_flags, PORT_CONTROL_FLAGS_MTU_SET);
2202
2203 /* program the port with our multicast addresses */
2204 error = multicast_list_program(&p->po_multicast, ifp, port_ifp);
2205 if (error) {
2206 printf("%s(%s, %s): multicast_list_program failed %d\n",
2207 __func__,
2208 ifb->ifb_name, bondport_get_name(p), error);
2209 goto failed;
2210 }
2211
2212 /* mark the interface up */
2213 ifnet_set_flags(port_ifp, IFF_UP, IFF_UP);
2214
2215 error = ifnet_ioctl(port_ifp, 0, SIOCSIFFLAGS, NULL);
2216 if (error != 0) {
2217 printf("%s(%s, %s): SIOCSIFFLAGS failed %d\n",
2218 __func__,
2219 ifb->ifb_name, bondport_get_name(p), error);
2220 goto failed;
2221 }
2222
2223 /* re-program the port's ethernet address */
2224 error = if_siflladdr(port_ifp,
2225 (const struct ether_addr *)IF_LLADDR(ifp));
2226 if (error == 0) {
2227 if (memcmp(IF_LLADDR(ifp), IF_LLADDR(port_ifp), ETHER_ADDR_LEN)
2228 != 0) {
2229 /* it lied, it really doesn't support setting lladdr */
2230 error = EOPNOTSUPP;
2231 }
2232 }
2233 if (error != 0) {
2234 /* port doesn't support setting the link address */
2235 printf("%s(%s, %s): if_siflladdr failed %d\n",
2236 __func__,
2237 ifb->ifb_name, bondport_get_name(p), error);
2238 error = ifnet_set_promiscuous(port_ifp, 1);
2239 if (error != 0) {
2240 /* port doesn't support setting promiscuous mode */
2241 printf("%s(%s, %s): set promiscuous failed %d\n",
2242 __func__,
2243 ifb->ifb_name, bondport_get_name(p), error);
2244 goto failed;
2245 }
2246 uint32_bit_set(&control_flags,
2247 PORT_CONTROL_FLAGS_PROMISCUOUS_SET);
2248 } else {
2249 uint32_bit_set(&control_flags,
2250 PORT_CONTROL_FLAGS_LLADDR_SET);
2251 }
2252
2253 /* if we're in promiscuous mode, enable that as well */
2254 if (ifbond_flags_promisc(ifb)) {
2255 error = ifnet_set_promiscuous(port_ifp, 1);
2256 if (error != 0) {
2257 /* port doesn't support setting promiscuous mode */
2258 printf("%s(%s, %s): set promiscuous failed %d\n",
2259 __func__,
2260 ifb->ifb_name, bondport_get_name(p), error);
2261 goto failed;
2262 }
2263 uint32_bit_set(&control_flags,
2264 PORT_CONTROL_FLAGS_BOND_PROMISCUOUS_SET);
2265 }
2266
2267 bond_lock();
2268
2269 /* no failures past this point */
2270 p->po_enabled = 1;
2271 p->po_control_flags = control_flags;
2272
2273 /* copy the contents of the existing distributing array */
2274 if (ifb->ifb_distributing_count) {
2275 bcopy(ifb->ifb_distributing_array, new_array,
2276 sizeof(*new_array) * ifb->ifb_distributing_count);
2277 }
2278 old_array = ifb->ifb_distributing_array;
2279 old_max = ifb->ifb_distributing_max;
2280 ifb->ifb_distributing_array = new_array;
2281 ifb->ifb_distributing_max = new_max;
2282
2283 if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
2284 bondport_start(p);
2285
2286 /* check if we need to generate a link status event */
2287 if (ifbond_selection(ifb)) {
2288 event_code = (ifb->ifb_active_lag == NULL)
2289 ? KEV_DL_LINK_OFF
2290 : KEV_DL_LINK_ON;
2291 ifb->ifb_last_link_event = event_code;
2292 }
2293 } else {
2294 /* are we adding the first distributing interface? */
2295 if (media_active(&p->po_media_info)) {
2296 if (ifb->ifb_distributing_count == 0) {
2297 ifb->ifb_last_link_event = event_code = KEV_DL_LINK_ON;
2298 }
2299 bondport_enable_distributing(p);
2300 } else {
2301 bondport_disable_distributing(p);
2302 }
2303 }
2304 p->po_filter = filter;
2305
2306 /* clear the busy state, and wakeup anyone waiting */
2307 ifbond_signal(ifb, __func__);
2308 bond_unlock();
2309 if (event_code != 0) {
2310 interface_link_event(ifp, event_code);
2311 }
2312 kfree_type(bondport_ref, old_max, old_array);
2313 return 0;
2314
2315 failed:
2316 bond_assert_lock_not_held();
2317
2318 /* if this was the first port to be added, clear our address */
2319 if (first) {
2320 ifnet_set_lladdr_and_type(ifp, NULL, 0, IFT_IEEE8023ADLAG);
2321 }
2322
2323 kfree_type(bondport_ref, new_max, new_array);
2324 if (uint32_bit_is_set(control_flags,
2325 PORT_CONTROL_FLAGS_LLADDR_SET)) {
2326 int error1;
2327
2328 error1 = if_siflladdr(port_ifp, &p->po_saved_addr);
2329 if (error1 != 0) {
2330 printf("%s(%s, %s): if_siflladdr restore failed %d\n",
2331 __func__,
2332 ifb->ifb_name, bondport_get_name(p), error1);
2333 }
2334 }
2335 if (uint32_bit_is_set(control_flags,
2336 PORT_CONTROL_FLAGS_PROMISCUOUS_SET)) {
2337 int error1;
2338
2339 error1 = ifnet_set_promiscuous(port_ifp, 0);
2340 if (error1 != 0) {
2341 printf("%s(%s, %s): promiscous mode disable failed %d\n",
2342 __func__,
2343 ifb->ifb_name, bondport_get_name(p), error1);
2344 }
2345 }
2346 if (uint32_bit_is_set(control_flags,
2347 PORT_CONTROL_FLAGS_BOND_PROMISCUOUS_SET)) {
2348 int error1;
2349
2350 error1 = ifnet_set_promiscuous(port_ifp, 0);
2351 if (error1 != 0) {
2352 printf("%s(%s, %s): promiscous mode disable failed %d\n",
2353 __func__,
2354 ifb->ifb_name, bondport_get_name(p), error1);
2355 }
2356 }
2357 if (uint32_bit_is_set(control_flags,
2358 PORT_CONTROL_FLAGS_PROTO_ATTACHED)) {
2359 (void)bond_detach_protocol(port_ifp);
2360 }
2361 if (uint32_bit_is_set(control_flags,
2362 PORT_CONTROL_FLAGS_FILTER_ATTACHED)) {
2363 iflt_detach(filter);
2364 }
2365 if (uint32_bit_is_set(control_flags,
2366 PORT_CONTROL_FLAGS_MTU_SET)) {
2367 int error1;
2368
2369 error1 = siocsifmtu(port_ifp, p->po_devmtu.ifdm_current);
2370 if (error1 != 0) {
2371 printf("%s(%s, %s): SIOCSIFMTU %d failed %d\n",
2372 __func__,
2373 ifb->ifb_name, bondport_get_name(p),
2374 p->po_devmtu.ifdm_current, error1);
2375 }
2376 }
2377 bond_lock();
2378 if (uint32_bit_is_set(control_flags,
2379 PORT_CONTROL_FLAGS_IN_LIST)) {
2380 TAILQ_REMOVE(&ifb->ifb_port_list, p, po_port_list);
2381 ifb->ifb_port_count--;
2382 }
2383 if_clear_eflags(ifp, IFEF_BOND);
2384 if (TAILQ_EMPTY(&ifb->ifb_port_list)) {
2385 ifb->ifb_altmtu = 0;
2386 ifnet_set_mtu(ifp, ETHERMTU);
2387 ifnet_set_offload(ifp, 0);
2388 }
2389
2390 signal_done:
2391 ifbond_signal(ifb, __func__);
2392 bond_unlock();
2393 ifbond_release(ifb);
2394 bondport_free(p);
2395 return error;
2396 }
2397
2398 static int
bond_remove_interface(ifbond_ref ifb,struct ifnet * port_ifp)2399 bond_remove_interface(ifbond_ref ifb, struct ifnet * port_ifp)
2400 {
2401 int active_lag = 0;
2402 int error = 0;
2403 int event_code = 0;
2404 bondport_ref head_port;
2405 struct ifnet * ifp;
2406 interface_filter_t filter;
2407 int last = FALSE;
2408 int new_link_address = FALSE;
2409 bondport_ref p;
2410 lacp_actor_partner_state s;
2411 int was_distributing;
2412
2413 bond_assert_lock_held();
2414
2415 ifbond_retain(ifb);
2416 ifbond_wait(ifb, "bond_remove_interface");
2417
2418 p = ifbond_lookup_port(ifb, port_ifp);
2419 if (p == NULL) {
2420 error = ENXIO;
2421 /* it got removed by another thread */
2422 goto signal_done;
2423 }
2424
2425 /* de-select it and remove it from the lists */
2426 was_distributing = bondport_flags_distributing(p);
2427 bondport_disable_distributing(p);
2428 if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
2429 bondport_set_selected(p, SelectedState_UNSELECTED);
2430 active_lag = bondport_remove_from_LAG(p);
2431 /* invalidate timers here while holding the bond_lock */
2432 bondport_invalidate_timers(p);
2433
2434 /* announce that we're Individual now */
2435 s = p->po_actor_state;
2436 s = lacp_actor_partner_state_set_individual(s);
2437 s = lacp_actor_partner_state_set_not_collecting(s);
2438 s = lacp_actor_partner_state_set_not_distributing(s);
2439 s = lacp_actor_partner_state_set_out_of_sync(s);
2440 p->po_actor_state = s;
2441 bondport_flags_set_ntt(p);
2442 }
2443
2444 TAILQ_REMOVE(&ifb->ifb_port_list, p, po_port_list);
2445 ifb->ifb_port_count--;
2446
2447 ifp = ifb->ifb_ifp;
2448 head_port = TAILQ_FIRST(&ifb->ifb_port_list);
2449 if (head_port == NULL) {
2450 ifnet_set_flags(ifp, 0, IFF_RUNNING);
2451 if (ifbond_flags_lladdr(ifb) == FALSE) {
2452 last = TRUE;
2453 }
2454 ifnet_set_offload(ifp, 0);
2455 ifnet_set_mtu(ifp, ETHERMTU);
2456 ifb->ifb_altmtu = 0;
2457 } else if (ifbond_flags_lladdr(ifb) == FALSE
2458 && bcmp(&p->po_saved_addr, IF_LLADDR(ifp),
2459 ETHER_ADDR_LEN) == 0) {
2460 new_link_address = TRUE;
2461 }
2462 /* check if we need to generate a link status event */
2463 if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
2464 if (ifbond_selection(ifb) || active_lag) {
2465 event_code = (ifb->ifb_active_lag == NULL)
2466 ? KEV_DL_LINK_OFF
2467 : KEV_DL_LINK_ON;
2468 ifb->ifb_last_link_event = event_code;
2469 }
2470 bondport_transmit_machine(p, LAEventStart,
2471 TRANSMIT_MACHINE_TX_IMMEDIATE);
2472 } else {
2473 /* are we removing the last distributing interface? */
2474 if (was_distributing && ifb->ifb_distributing_count == 0) {
2475 ifb->ifb_last_link_event = event_code = KEV_DL_LINK_OFF;
2476 }
2477 }
2478 filter = p->po_filter;
2479 bond_unlock();
2480
2481 if (last) {
2482 ifnet_set_lladdr_and_type(ifp, NULL, 0, IFT_IEEE8023ADLAG);
2483 } else if (new_link_address) {
2484 struct ifnet * scan_ifp;
2485 bondport_ref scan_port;
2486
2487 /* ifbond_wait() allows port list traversal without holding the lock */
2488
2489 /* this port gave the bond its ethernet address, switch to new one */
2490 ifnet_set_lladdr_and_type(ifp,
2491 &head_port->po_saved_addr, ETHER_ADDR_LEN,
2492 IFT_ETHER);
2493
2494 /* re-program each port with the new link address */
2495 TAILQ_FOREACH(scan_port, &ifb->ifb_port_list, po_port_list) {
2496 scan_ifp = scan_port->po_ifp;
2497
2498 if (!uint32_bit_is_set(scan_port->po_control_flags,
2499 PORT_CONTROL_FLAGS_LLADDR_SET)) {
2500 /* port doesn't support setting lladdr */
2501 continue;
2502 }
2503 error = if_siflladdr(scan_ifp,
2504 (const struct ether_addr *) IF_LLADDR(ifp));
2505 if (error != 0) {
2506 printf("%s(%s, %s): "
2507 "if_siflladdr (%s) failed %d\n",
2508 __func__,
2509 ifb->ifb_name, bondport_get_name(p),
2510 bondport_get_name(scan_port), error);
2511 }
2512 }
2513 }
2514
2515 /* restore the port's ethernet address */
2516 if (uint32_bit_is_set(p->po_control_flags,
2517 PORT_CONTROL_FLAGS_LLADDR_SET)) {
2518 error = if_siflladdr(port_ifp, &p->po_saved_addr);
2519 if (error != 0) {
2520 printf("%s(%s, %s): if_siflladdr failed %d\n",
2521 __func__,
2522 ifb->ifb_name, bondport_get_name(p), error);
2523 }
2524 }
2525
2526 /* disable promiscous mode (if we enabled it) */
2527 if (uint32_bit_is_set(p->po_control_flags,
2528 PORT_CONTROL_FLAGS_PROMISCUOUS_SET)) {
2529 error = ifnet_set_promiscuous(port_ifp, 0);
2530 if (error != 0) {
2531 printf("%s(%s, %s): disable promiscuous failed %d\n",
2532 __func__,
2533 ifb->ifb_name, bondport_get_name(p), error);
2534 }
2535 }
2536
2537 /* disable promiscous mode from bond (if we enabled it) */
2538 if (uint32_bit_is_set(p->po_control_flags,
2539 PORT_CONTROL_FLAGS_BOND_PROMISCUOUS_SET)) {
2540 error = ifnet_set_promiscuous(port_ifp, 0);
2541 if (error != 0) {
2542 printf("%s(%s, %s): disable promiscuous failed %d\n",
2543 __func__,
2544 ifb->ifb_name, bondport_get_name(p), error);
2545 }
2546 }
2547
2548 /* restore the port's MTU */
2549 error = siocsifmtu(port_ifp, p->po_devmtu.ifdm_current);
2550 if (error != 0) {
2551 printf("%s(%s, %s): SIOCSIFMTU %d failed %d\n",
2552 __func__,
2553 ifb->ifb_name, bondport_get_name(p),
2554 p->po_devmtu.ifdm_current, error);
2555 }
2556
2557 /* remove the bond "protocol" */
2558 bond_detach_protocol(port_ifp);
2559
2560 /* detach the filter */
2561 if (filter != NULL) {
2562 iflt_detach(filter);
2563 }
2564
2565 /* generate link event */
2566 if (event_code != 0) {
2567 interface_link_event(ifp, event_code);
2568 }
2569
2570 bond_lock();
2571 bondport_free(p);
2572 if_clear_eflags(port_ifp, IFEF_BOND);
2573 /* release this bondport's reference to the ifbond */
2574 ifbond_release(ifb);
2575
2576 signal_done:
2577 ifbond_signal(ifb, __func__);
2578 ifbond_release(ifb);
2579 return error;
2580 }
2581
2582 static void
bond_set_lacp_mode(ifbond_ref ifb)2583 bond_set_lacp_mode(ifbond_ref ifb)
2584 {
2585 bondport_ref p;
2586
2587 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
2588 bondport_disable_distributing(p);
2589 bondport_start(p);
2590 }
2591 return;
2592 }
2593
2594 static void
bond_set_static_mode(ifbond_ref ifb)2595 bond_set_static_mode(ifbond_ref ifb)
2596 {
2597 bondport_ref p;
2598 lacp_actor_partner_state s;
2599
2600 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
2601 bondport_disable_distributing(p);
2602 bondport_set_selected(p, SelectedState_UNSELECTED);
2603 (void)bondport_remove_from_LAG(p);
2604 bondport_cancel_timers(p);
2605
2606 /* announce that we're Individual now */
2607 s = p->po_actor_state;
2608 s = lacp_actor_partner_state_set_individual(s);
2609 s = lacp_actor_partner_state_set_not_collecting(s);
2610 s = lacp_actor_partner_state_set_not_distributing(s);
2611 s = lacp_actor_partner_state_set_out_of_sync(s);
2612 p->po_actor_state = s;
2613 bondport_flags_set_ntt(p);
2614 bondport_transmit_machine(p, LAEventStart,
2615 TRANSMIT_MACHINE_TX_IMMEDIATE);
2616 /* clear state */
2617 p->po_actor_state = 0;
2618 bzero(&p->po_partner_state, sizeof(p->po_partner_state));
2619
2620 if (media_active(&p->po_media_info)) {
2621 bondport_enable_distributing(p);
2622 } else {
2623 bondport_disable_distributing(p);
2624 }
2625 }
2626 return;
2627 }
2628
2629 static int
bond_set_mode(struct ifnet * ifp,int mode)2630 bond_set_mode(struct ifnet * ifp, int mode)
2631 {
2632 int error = 0;
2633 int event_code = 0;
2634 ifbond_ref ifb;
2635
2636 bond_lock();
2637 ifb = (ifbond_ref)ifnet_softc(ifp);
2638 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
2639 bond_unlock();
2640 return (ifb == NULL) ? EOPNOTSUPP : EBUSY;
2641 }
2642 if (ifb->ifb_mode == mode) {
2643 bond_unlock();
2644 return 0;
2645 }
2646
2647 ifbond_retain(ifb);
2648 ifbond_wait(ifb, "bond_set_mode");
2649
2650 /* verify (again) that the mode is actually different */
2651 if (ifb->ifb_mode == mode) {
2652 /* nothing to do */
2653 goto signal_done;
2654 }
2655
2656 ifb->ifb_mode = mode;
2657 if (mode == IF_BOND_MODE_LACP) {
2658 bond_set_lacp_mode(ifb);
2659
2660 /* check if we need to generate a link status event */
2661 if (ifbond_selection(ifb)) {
2662 event_code = (ifb->ifb_active_lag == NULL)
2663 ? KEV_DL_LINK_OFF
2664 : KEV_DL_LINK_ON;
2665 }
2666 } else {
2667 bond_set_static_mode(ifb);
2668 event_code = (ifb->ifb_distributing_count == 0)
2669 ? KEV_DL_LINK_OFF
2670 : KEV_DL_LINK_ON;
2671 }
2672 ifb->ifb_last_link_event = event_code;
2673
2674 signal_done:
2675 ifbond_signal(ifb, __func__);
2676 bond_unlock();
2677 ifbond_release(ifb);
2678
2679 if (event_code != 0) {
2680 interface_link_event(ifp, event_code);
2681 }
2682 return error;
2683 }
2684
2685 static int
bond_get_status(ifbond_ref ifb,struct if_bond_req * ibr_p,user_addr_t datap)2686 bond_get_status(ifbond_ref ifb, struct if_bond_req * ibr_p, user_addr_t datap)
2687 {
2688 int count;
2689 user_addr_t dst;
2690 int error = 0;
2691 struct if_bond_status_req * ibsr;
2692 struct if_bond_status ibs;
2693 bondport_ref port;
2694
2695 ibsr = &(ibr_p->ibr_ibru.ibru_status);
2696 if (ibsr->ibsr_version != IF_BOND_STATUS_REQ_VERSION) {
2697 return EINVAL;
2698 }
2699 ibsr->ibsr_key = ifb->ifb_key;
2700 ibsr->ibsr_mode = ifb->ifb_mode;
2701 ibsr->ibsr_total = ifb->ifb_port_count;
2702 dst = proc_is64bit(current_proc())
2703 ? ibsr->ibsr_ibsru.ibsru_buffer64
2704 : CAST_USER_ADDR_T(ibsr->ibsr_ibsru.ibsru_buffer);
2705 if (dst == USER_ADDR_NULL) {
2706 /* just want to know how many there are */
2707 goto done;
2708 }
2709 if (ibsr->ibsr_count < 0) {
2710 return EINVAL;
2711 }
2712 count = (ifb->ifb_port_count < ibsr->ibsr_count)
2713 ? ifb->ifb_port_count : ibsr->ibsr_count;
2714 TAILQ_FOREACH(port, &ifb->ifb_port_list, po_port_list) {
2715 struct if_bond_partner_state * ibps_p;
2716 partner_state_ref ps;
2717
2718 if (count == 0) {
2719 break;
2720 }
2721 bzero(&ibs, sizeof(ibs));
2722 strlcpy(ibs.ibs_if_name, port->po_name, sizeof(ibs.ibs_if_name));
2723 ibs.ibs_port_priority = port->po_priority;
2724 if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
2725 ibs.ibs_state = port->po_actor_state;
2726 ibs.ibs_selected_state = port->po_selected;
2727 ps = &port->po_partner_state;
2728 ibps_p = &ibs.ibs_partner_state;
2729 ibps_p->ibps_system = ps->ps_lag_info.li_system;
2730 ibps_p->ibps_system_priority = ps->ps_lag_info.li_system_priority;
2731 ibps_p->ibps_key = ps->ps_lag_info.li_key;
2732 ibps_p->ibps_port = ps->ps_port;
2733 ibps_p->ibps_port_priority = ps->ps_port_priority;
2734 ibps_p->ibps_state = ps->ps_state;
2735 } else {
2736 /* fake the selected information */
2737 ibs.ibs_selected_state = bondport_flags_distributing(port)
2738 ? SelectedState_SELECTED : SelectedState_UNSELECTED;
2739 }
2740 error = copyout(&ibs, dst, sizeof(ibs));
2741 if (error != 0) {
2742 break;
2743 }
2744 dst += sizeof(ibs);
2745 count--;
2746 }
2747
2748 done:
2749 if (error == 0) {
2750 error = copyout(ibr_p, datap, sizeof(*ibr_p));
2751 } else {
2752 (void)copyout(ibr_p, datap, sizeof(*ibr_p));
2753 }
2754 return error;
2755 }
2756
2757 static int
bond_set_promisc(struct ifnet * ifp)2758 bond_set_promisc(struct ifnet * ifp)
2759 {
2760 int error = 0;
2761 ifbond_ref ifb;
2762 bool is_promisc;
2763 bondport_ref p;
2764 int val;
2765
2766 is_promisc = (ifnet_flags(ifp) & IFF_PROMISC) != 0;
2767
2768 /* determine whether promiscuous state needs to be changed */
2769 bond_lock();
2770 ifb = (ifbond_ref)ifnet_softc(ifp);
2771 if (ifb == NULL) {
2772 bond_unlock();
2773 error = EBUSY;
2774 goto done;
2775 }
2776 if (is_promisc == ifbond_flags_promisc(ifb)) {
2777 /* already in the right state */
2778 bond_unlock();
2779 goto done;
2780 }
2781 ifbond_retain(ifb);
2782 ifbond_wait(ifb, __func__);
2783 if (ifbond_flags_if_detaching(ifb)) {
2784 /* someone destroyed the bond while we were waiting */
2785 error = EBUSY;
2786 goto signal_done;
2787 }
2788 bond_unlock();
2789
2790 /* update the promiscuous state of each memeber */
2791 val = is_promisc ? 1 : 0;
2792 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
2793 struct ifnet * port_ifp = p->po_ifp;
2794 bool port_is_promisc;
2795
2796 port_is_promisc = uint32_bit_is_set(p->po_control_flags,
2797 PORT_CONTROL_FLAGS_BOND_PROMISCUOUS_SET);
2798 if (port_is_promisc == is_promisc) {
2799 /* already in the right state */
2800 continue;
2801 }
2802 error = ifnet_set_promiscuous(port_ifp, val);
2803 if (error != 0) {
2804 printf("%s: ifnet_set_promiscuous(%s, %d): failed %d",
2805 ifb->ifb_name, port_ifp->if_xname, val, error);
2806 continue;
2807 }
2808 printf("%s: ifnet_set_promiscuous(%s, %d): succeeded",
2809 ifb->ifb_name, port_ifp->if_xname, val);
2810 if (is_promisc) {
2811 /* remember that we set it */
2812 uint32_bit_set(&p->po_control_flags,
2813 PORT_CONTROL_FLAGS_BOND_PROMISCUOUS_SET);
2814 } else {
2815 uint32_bit_clear(&p->po_control_flags,
2816 PORT_CONTROL_FLAGS_BOND_PROMISCUOUS_SET);
2817 }
2818 }
2819
2820 /* assume that updating promiscuous state succeeded */
2821 error = 0;
2822 bond_lock();
2823
2824 /* update our internal state */
2825 if (is_promisc) {
2826 ifbond_flags_set_promisc(ifb);
2827 } else {
2828 ifbond_flags_clear_promisc(ifb);
2829 }
2830
2831 signal_done:
2832 ifbond_signal(ifb, __func__);
2833 bond_unlock();
2834 ifbond_release(ifb);
2835
2836 done:
2837 return error;
2838 }
2839
2840 static void
bond_get_mtu_values(ifbond_ref ifb,int * ret_min,int * ret_max)2841 bond_get_mtu_values(ifbond_ref ifb, int * ret_min, int * ret_max)
2842 {
2843 int mtu_min = 0;
2844 int mtu_max = 0;
2845 bondport_ref p;
2846
2847 if (TAILQ_FIRST(&ifb->ifb_port_list) != NULL) {
2848 mtu_min = IF_MINMTU;
2849 }
2850 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
2851 struct ifdevmtu * devmtu_p = &p->po_devmtu;
2852
2853 if (devmtu_p->ifdm_min > mtu_min) {
2854 mtu_min = devmtu_p->ifdm_min;
2855 }
2856 if (mtu_max == 0 || devmtu_p->ifdm_max < mtu_max) {
2857 mtu_max = devmtu_p->ifdm_max;
2858 }
2859 }
2860 *ret_min = mtu_min;
2861 *ret_max = mtu_max;
2862 return;
2863 }
2864
2865 static int
bond_set_mtu_on_ports(ifbond_ref ifb,int mtu)2866 bond_set_mtu_on_ports(ifbond_ref ifb, int mtu)
2867 {
2868 int error = 0;
2869 bondport_ref p;
2870
2871 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
2872 error = siocsifmtu(p->po_ifp, mtu);
2873 if (error != 0) {
2874 printf("if_bond(%s): SIOCSIFMTU %s failed, %d\n",
2875 ifb->ifb_name, bondport_get_name(p), error);
2876 break;
2877 }
2878 }
2879 return error;
2880 }
2881
2882 static int
bond_set_mtu(struct ifnet * ifp,int mtu,int isdevmtu)2883 bond_set_mtu(struct ifnet * ifp, int mtu, int isdevmtu)
2884 {
2885 int error = 0;
2886 ifbond_ref ifb;
2887 int mtu_min;
2888 int mtu_max;
2889 int new_max;
2890 int old_max;
2891
2892 bond_lock();
2893 ifb = (ifbond_ref)ifnet_softc(ifp);
2894 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
2895 error = (ifb == NULL) ? EOPNOTSUPP : EBUSY;
2896 goto done;
2897 }
2898 ifbond_retain(ifb);
2899 ifbond_wait(ifb, "bond_set_mtu");
2900
2901 /* check again */
2902 if (ifnet_softc(ifp) == NULL || ifbond_flags_if_detaching(ifb)) {
2903 error = EBUSY;
2904 goto signal_done;
2905 }
2906 bond_get_mtu_values(ifb, &mtu_min, &mtu_max);
2907 if (mtu > mtu_max) {
2908 error = EINVAL;
2909 goto signal_done;
2910 }
2911 if (mtu < mtu_min && (isdevmtu == 0 || mtu != 0)) {
2912 /* allow SIOCSIFALTMTU to set the mtu to 0 */
2913 error = EINVAL;
2914 goto signal_done;
2915 }
2916 if (isdevmtu) {
2917 new_max = (mtu > (int)ifnet_mtu(ifp)) ? mtu : (int)ifnet_mtu(ifp);
2918 } else {
2919 new_max = (mtu > ifb->ifb_altmtu) ? mtu : ifb->ifb_altmtu;
2920 }
2921 old_max = ((int)ifnet_mtu(ifp) > ifb->ifb_altmtu)
2922 ? (int)ifnet_mtu(ifp) : ifb->ifb_altmtu;
2923 if (new_max != old_max) {
2924 /* we can safely walk the list of port without the lock held */
2925 bond_unlock();
2926 error = bond_set_mtu_on_ports(ifb, new_max);
2927 if (error != 0) {
2928 /* try our best to back out of it */
2929 (void)bond_set_mtu_on_ports(ifb, old_max);
2930 }
2931 bond_lock();
2932 }
2933 if (error == 0) {
2934 if (isdevmtu) {
2935 ifb->ifb_altmtu = mtu;
2936 } else {
2937 ifnet_set_mtu(ifp, mtu);
2938 }
2939 }
2940
2941 signal_done:
2942 ifbond_signal(ifb, __func__);
2943 ifbond_release(ifb);
2944
2945 done:
2946 bond_unlock();
2947 return error;
2948 }
2949
2950 static int
bond_ioctl(struct ifnet * ifp,u_long cmd,void * data)2951 bond_ioctl(struct ifnet *ifp, u_long cmd, void * data)
2952 {
2953 int error = 0;
2954 struct if_bond_req ibr;
2955 struct ifaddr * ifa;
2956 ifbond_ref ifb;
2957 struct ifreq * ifr;
2958 struct ifmediareq *ifmr;
2959 struct ifnet * port_ifp = NULL;
2960 user_addr_t user_addr;
2961
2962 if (ifnet_type(ifp) != IFT_IEEE8023ADLAG) {
2963 return EOPNOTSUPP;
2964 }
2965 ifr = (struct ifreq *)data;
2966 ifa = (struct ifaddr *)data;
2967
2968 switch (cmd) {
2969 case SIOCSIFADDR:
2970 ifnet_set_flags(ifp, IFF_UP, IFF_UP);
2971 break;
2972
2973 case SIOCGIFMEDIA32:
2974 case SIOCGIFMEDIA64:
2975 bond_lock();
2976 ifb = (ifbond_ref)ifnet_softc(ifp);
2977 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
2978 bond_unlock();
2979 return ifb == NULL ? EOPNOTSUPP : EBUSY;
2980 }
2981 ifmr = (struct ifmediareq *)data;
2982 ifmr->ifm_current = IFM_ETHER;
2983 ifmr->ifm_mask = 0;
2984 ifmr->ifm_status = IFM_AVALID;
2985 ifmr->ifm_active = IFM_ETHER;
2986 ifmr->ifm_count = 1;
2987 if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
2988 if (ifb->ifb_active_lag != NULL) {
2989 ifmr->ifm_active = ifb->ifb_active_lag->lag_active_media;
2990 ifmr->ifm_status |= IFM_ACTIVE;
2991 }
2992 } else if (ifb->ifb_distributing_count > 0) {
2993 ifmr->ifm_active
2994 = ifb->ifb_distributing_array[0]->po_media_info.mi_active;
2995 ifmr->ifm_status |= IFM_ACTIVE;
2996 }
2997 bond_unlock();
2998 user_addr = (cmd == SIOCGIFMEDIA64) ?
2999 ((struct ifmediareq64 *)ifmr)->ifmu_ulist :
3000 CAST_USER_ADDR_T(((struct ifmediareq32 *)ifmr)->ifmu_ulist);
3001 if (user_addr != USER_ADDR_NULL) {
3002 error = copyout(&ifmr->ifm_current,
3003 user_addr,
3004 sizeof(int));
3005 }
3006 break;
3007
3008 case SIOCSIFMEDIA:
3009 /* XXX send the SIFMEDIA to all children? Or force autoselect? */
3010 error = EINVAL;
3011 break;
3012
3013 case SIOCGIFDEVMTU:
3014 bond_lock();
3015 ifb = (ifbond_ref)ifnet_softc(ifp);
3016 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
3017 bond_unlock();
3018 error = (ifb == NULL) ? EOPNOTSUPP : EBUSY;
3019 break;
3020 }
3021 ifr->ifr_devmtu.ifdm_current = bond_device_mtu(ifp, ifb);
3022 bond_get_mtu_values(ifb, &ifr->ifr_devmtu.ifdm_min,
3023 &ifr->ifr_devmtu.ifdm_max);
3024 bond_unlock();
3025 break;
3026
3027 case SIOCGIFALTMTU:
3028 bond_lock();
3029 ifb = (ifbond_ref)ifnet_softc(ifp);
3030 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
3031 bond_unlock();
3032 error = (ifb == NULL) ? EOPNOTSUPP : EBUSY;
3033 break;
3034 }
3035 ifr->ifr_mtu = ifb->ifb_altmtu;
3036 bond_unlock();
3037 break;
3038
3039 case SIOCSIFALTMTU:
3040 error = bond_set_mtu(ifp, ifr->ifr_mtu, 1);
3041 break;
3042
3043 case SIOCSIFMTU:
3044 error = bond_set_mtu(ifp, ifr->ifr_mtu, 0);
3045 break;
3046
3047 case SIOCSIFBOND:
3048 user_addr = proc_is64bit(current_proc())
3049 ? ifr->ifr_data64 : CAST_USER_ADDR_T(ifr->ifr_data);
3050 error = copyin(user_addr, &ibr, sizeof(ibr));
3051 if (error) {
3052 break;
3053 }
3054 switch (ibr.ibr_op) {
3055 case IF_BOND_OP_ADD_INTERFACE:
3056 case IF_BOND_OP_REMOVE_INTERFACE:
3057 port_ifp = ifunit(ibr.ibr_ibru.ibru_if_name);
3058 if (port_ifp == NULL) {
3059 error = ENXIO;
3060 break;
3061 }
3062 if (ifnet_type(port_ifp) != IFT_ETHER) {
3063 error = EPROTONOSUPPORT;
3064 break;
3065 }
3066 break;
3067 case IF_BOND_OP_SET_VERBOSE:
3068 case IF_BOND_OP_SET_MODE:
3069 break;
3070 default:
3071 error = EOPNOTSUPP;
3072 break;
3073 }
3074 if (error != 0) {
3075 break;
3076 }
3077 switch (ibr.ibr_op) {
3078 case IF_BOND_OP_ADD_INTERFACE:
3079 error = bond_add_interface(ifp, port_ifp);
3080 break;
3081 case IF_BOND_OP_REMOVE_INTERFACE:
3082 bond_lock();
3083 ifb = (ifbond_ref)ifnet_softc(ifp);
3084 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
3085 bond_unlock();
3086 return ifb == NULL ? EOPNOTSUPP : EBUSY;
3087 }
3088 error = bond_remove_interface(ifb, port_ifp);
3089 bond_unlock();
3090 break;
3091 case IF_BOND_OP_SET_VERBOSE:
3092 bond_lock();
3093 if_bond_debug = ibr.ibr_ibru.ibru_int_val;
3094 bond_unlock();
3095 break;
3096 case IF_BOND_OP_SET_MODE:
3097 switch (ibr.ibr_ibru.ibru_int_val) {
3098 case IF_BOND_MODE_LACP:
3099 case IF_BOND_MODE_STATIC:
3100 break;
3101 default:
3102 error = EINVAL;
3103 break;
3104 }
3105 if (error != 0) {
3106 break;
3107 }
3108 error = bond_set_mode(ifp, ibr.ibr_ibru.ibru_int_val);
3109 break;
3110 }
3111 break; /* SIOCSIFBOND */
3112
3113 case SIOCGIFBOND:
3114 user_addr = proc_is64bit(current_proc())
3115 ? ifr->ifr_data64 : CAST_USER_ADDR_T(ifr->ifr_data);
3116 error = copyin(user_addr, &ibr, sizeof(ibr));
3117 if (error) {
3118 break;
3119 }
3120 switch (ibr.ibr_op) {
3121 case IF_BOND_OP_GET_STATUS:
3122 break;
3123 default:
3124 error = EOPNOTSUPP;
3125 break;
3126 }
3127 if (error != 0) {
3128 break;
3129 }
3130 bond_lock();
3131 ifb = (ifbond_ref)ifnet_softc(ifp);
3132 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
3133 bond_unlock();
3134 return ifb == NULL ? EOPNOTSUPP : EBUSY;
3135 }
3136 switch (ibr.ibr_op) {
3137 case IF_BOND_OP_GET_STATUS:
3138 error = bond_get_status(ifb, &ibr, user_addr);
3139 break;
3140 }
3141 bond_unlock();
3142 break; /* SIOCGIFBOND */
3143
3144 case SIOCSIFLLADDR:
3145 error = EOPNOTSUPP;
3146 break;
3147
3148 case SIOCSIFFLAGS:
3149 /* enable promiscuous mode on members */
3150 error = bond_set_promisc(ifp);
3151 break;
3152
3153 case SIOCADDMULTI:
3154 case SIOCDELMULTI:
3155 error = bond_setmulti(ifp);
3156 break;
3157 default:
3158 error = EOPNOTSUPP;
3159 }
3160 return error;
3161 }
3162
3163 static void
bond_if_free(struct ifnet * ifp)3164 bond_if_free(struct ifnet * ifp)
3165 {
3166 ifbond_ref ifb;
3167
3168 if (ifp == NULL) {
3169 return;
3170 }
3171 bond_lock();
3172 ifb = (ifbond_ref)ifnet_softc(ifp);
3173 if (ifb == NULL) {
3174 bond_unlock();
3175 return;
3176 }
3177 ifbond_release(ifb);
3178 bond_unlock();
3179 ifnet_release(ifp);
3180 return;
3181 }
3182
3183 static void
bond_handle_event(struct ifnet * port_ifp,int event_code)3184 bond_handle_event(struct ifnet * port_ifp, int event_code)
3185 {
3186 struct ifnet * bond_ifp = NULL;
3187 ifbond_ref ifb;
3188 int old_distributing_count;
3189 bondport_ref p;
3190 struct media_info media_info = { .mi_active = 0, .mi_status = 0 };
3191
3192 switch (event_code) {
3193 case KEV_DL_IF_DETACHED:
3194 case KEV_DL_IF_DETACHING:
3195 break;
3196 case KEV_DL_LINK_OFF:
3197 case KEV_DL_LINK_ON:
3198 media_info = interface_media_info(port_ifp);
3199 break;
3200 default:
3201 return;
3202 }
3203 bond_lock();
3204 p = bond_lookup_port(port_ifp);
3205 if (p == NULL) {
3206 bond_unlock();
3207 return;
3208 }
3209 ifb = p->po_bond;
3210 old_distributing_count = ifb->ifb_distributing_count;
3211 switch (event_code) {
3212 case KEV_DL_IF_DETACHED:
3213 case KEV_DL_IF_DETACHING:
3214 bond_remove_interface(ifb, p->po_ifp);
3215 break;
3216 case KEV_DL_LINK_OFF:
3217 case KEV_DL_LINK_ON:
3218 p->po_media_info = media_info;
3219 if (p->po_enabled) {
3220 bondport_link_status_changed(p);
3221 }
3222 break;
3223 }
3224 /* generate a link-event */
3225 if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
3226 if (ifbond_selection(ifb)) {
3227 event_code = (ifb->ifb_active_lag == NULL)
3228 ? KEV_DL_LINK_OFF
3229 : KEV_DL_LINK_ON;
3230 /* XXX need to take a reference on bond_ifp */
3231 bond_ifp = ifb->ifb_ifp;
3232 ifb->ifb_last_link_event = event_code;
3233 } else {
3234 event_code = (ifb->ifb_active_lag == NULL)
3235 ? KEV_DL_LINK_OFF
3236 : KEV_DL_LINK_ON;
3237 if (event_code != ifb->ifb_last_link_event) {
3238 if (if_bond_debug) {
3239 timestamp_printf("%s: (event) generating LINK event\n",
3240 ifb->ifb_name);
3241 }
3242 bond_ifp = ifb->ifb_ifp;
3243 ifb->ifb_last_link_event = event_code;
3244 }
3245 }
3246 } else {
3247 /*
3248 * if the distributing array membership changed from 0 <-> !0
3249 * generate a link event
3250 */
3251 if (old_distributing_count == 0
3252 && ifb->ifb_distributing_count != 0) {
3253 event_code = KEV_DL_LINK_ON;
3254 } else if (old_distributing_count != 0
3255 && ifb->ifb_distributing_count == 0) {
3256 event_code = KEV_DL_LINK_OFF;
3257 }
3258 if (event_code != 0 && event_code != ifb->ifb_last_link_event) {
3259 bond_ifp = ifb->ifb_ifp;
3260 ifb->ifb_last_link_event = event_code;
3261 }
3262 }
3263
3264 bond_unlock();
3265 if (bond_ifp != NULL) {
3266 interface_link_event(bond_ifp, event_code);
3267 }
3268 return;
3269 }
3270
3271 static void
bond_iff_event(__unused void * cookie,ifnet_t port_ifp,__unused protocol_family_t protocol,const struct kev_msg * event)3272 bond_iff_event(__unused void *cookie, ifnet_t port_ifp,
3273 __unused protocol_family_t protocol,
3274 const struct kev_msg *event)
3275 {
3276 int event_code;
3277
3278 if (event->vendor_code != KEV_VENDOR_APPLE
3279 || event->kev_class != KEV_NETWORK_CLASS
3280 || event->kev_subclass != KEV_DL_SUBCLASS) {
3281 return;
3282 }
3283 event_code = event->event_code;
3284 switch (event_code) {
3285 case KEV_DL_LINK_OFF:
3286 case KEV_DL_LINK_ON:
3287 case KEV_DL_IF_DETACHING:
3288 case KEV_DL_IF_DETACHED:
3289 bond_handle_event(port_ifp, event_code);
3290 break;
3291 default:
3292 break;
3293 }
3294 return;
3295 }
3296
3297 static void
bond_iff_detached(__unused void * cookie,ifnet_t port_ifp)3298 bond_iff_detached(__unused void *cookie, ifnet_t port_ifp)
3299 {
3300 bond_handle_event(port_ifp, KEV_DL_IF_DETACHED);
3301 return;
3302 }
3303
3304 static void
interface_link_event(struct ifnet * ifp,u_int32_t event_code)3305 interface_link_event(struct ifnet * ifp, u_int32_t event_code)
3306 {
3307 struct event {
3308 u_int32_t ifnet_family;
3309 u_int32_t unit;
3310 char if_name[IFNAMSIZ];
3311 };
3312 _Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
3313 struct kern_event_msg *header = (struct kern_event_msg*)message;
3314 struct event *data = (struct event *)(header + 1);
3315
3316 header->total_size = sizeof(message);
3317 header->vendor_code = KEV_VENDOR_APPLE;
3318 header->kev_class = KEV_NETWORK_CLASS;
3319 header->kev_subclass = KEV_DL_SUBCLASS;
3320 header->event_code = event_code;
3321 data->ifnet_family = ifnet_family(ifp);
3322 data->unit = (u_int32_t)ifnet_unit(ifp);
3323 strlcpy(data->if_name, ifnet_name(ifp), IFNAMSIZ);
3324 ifnet_event(ifp, header);
3325 }
3326
3327 static errno_t
bond_proto_input(ifnet_t ifp,protocol_family_t protocol,mbuf_t packet,char * header)3328 bond_proto_input(ifnet_t ifp, protocol_family_t protocol, mbuf_t packet,
3329 char *header)
3330 {
3331 #pragma unused(protocol, packet, header)
3332 if (if_bond_debug != 0) {
3333 printf("%s: unexpected packet from %s\n", __func__,
3334 ifp->if_xname);
3335 }
3336 return 0;
3337 }
3338
3339
3340 /*
3341 * Function: bond_attach_protocol
3342 * Purpose:
3343 * Attach a DLIL protocol to the interface.
3344 *
3345 * The ethernet demux special cases to always return PF_BOND if the
3346 * interface is bonded. That means we receive all traffic from that
3347 * interface without passing any of the traffic to any other attached
3348 * protocol.
3349 */
3350 static int
bond_attach_protocol(struct ifnet * ifp)3351 bond_attach_protocol(struct ifnet *ifp)
3352 {
3353 int error;
3354 struct ifnet_attach_proto_param reg;
3355
3356 bzero(®, sizeof(reg));
3357 reg.input = bond_proto_input;
3358
3359 error = ifnet_attach_protocol(ifp, PF_BOND, ®);
3360 if (error) {
3361 printf("bond over %s%d: ifnet_attach_protocol failed, %d\n",
3362 ifnet_name(ifp), ifnet_unit(ifp), error);
3363 }
3364 return error;
3365 }
3366
3367 /*
3368 * Function: bond_detach_protocol
3369 * Purpose:
3370 * Detach our DLIL protocol from an interface
3371 */
3372 static int
bond_detach_protocol(struct ifnet * ifp)3373 bond_detach_protocol(struct ifnet *ifp)
3374 {
3375 int error;
3376
3377 error = ifnet_detach_protocol(ifp, PF_BOND);
3378 if (error) {
3379 printf("bond over %s%d: ifnet_detach_protocol failed, %d\n",
3380 ifnet_name(ifp), ifnet_unit(ifp), error);
3381 }
3382 return error;
3383 }
3384
3385 /*
3386 * Function: bond_attach_filter
3387 * Purpose:
3388 * Attach our DLIL interface filter.
3389 */
3390 static int
bond_attach_filter(struct ifnet * ifp,interface_filter_t * filter_p)3391 bond_attach_filter(struct ifnet *ifp, interface_filter_t * filter_p)
3392 {
3393 int error;
3394 struct iff_filter iff;
3395
3396 /*
3397 * install an interface filter
3398 */
3399 memset(&iff, 0, sizeof(struct iff_filter));
3400 iff.iff_name = "com.apple.kernel.bsd.net.if_bond";
3401 iff.iff_input = bond_iff_input;
3402 iff.iff_event = bond_iff_event;
3403 iff.iff_detached = bond_iff_detached;
3404 error = dlil_attach_filter(ifp, &iff, filter_p,
3405 DLIL_IFF_TSO | DLIL_IFF_INTERNAL);
3406 if (error != 0) {
3407 printf("%s: dlil_attach_filter failed %d\n", __func__, error);
3408 }
3409 return error;
3410 }
3411
3412
3413 /*
3414 * DLIL interface family functions
3415 */
3416 extern int ether_attach_inet(ifnet_t ifp, protocol_family_t protocol_family);
3417 extern void ether_detach_inet(ifnet_t ifp, protocol_family_t protocol_family);
3418 extern int ether_attach_inet6(ifnet_t ifp, protocol_family_t protocol_family);
3419 extern void ether_detach_inet6(ifnet_t ifp, protocol_family_t protocol_family);
3420 extern int ether_attach_at(ifnet_t ifp, protocol_family_t protocol_family);
3421 extern void ether_detach_at(ifnet_t ifp, protocol_family_t protocol_family);
3422
3423 __private_extern__ int
bond_family_init(void)3424 bond_family_init(void)
3425 {
3426 int error = 0;
3427
3428 error = proto_register_plumber(PF_INET, APPLE_IF_FAM_BOND,
3429 ether_attach_inet,
3430 ether_detach_inet);
3431 if (error != 0) {
3432 printf("bond: proto_register_plumber failed for AF_INET error=%d\n",
3433 error);
3434 goto done;
3435 }
3436 error = proto_register_plumber(PF_INET6, APPLE_IF_FAM_BOND,
3437 ether_attach_inet6,
3438 ether_detach_inet6);
3439 if (error != 0) {
3440 printf("bond: proto_register_plumber failed for AF_INET6 error=%d\n",
3441 error);
3442 goto done;
3443 }
3444 error = bond_clone_attach();
3445 if (error != 0) {
3446 printf("bond: proto_register_plumber failed bond_clone_attach error=%d\n",
3447 error);
3448 goto done;
3449 }
3450
3451 done:
3452 return error;
3453 }
3454 /**
3455 **
3456 ** LACP routines:
3457 **
3458 **/
3459
3460 /**
3461 ** LACP ifbond_list routines
3462 **/
3463 static bondport_ref
ifbond_list_find_moved_port(bondport_ref rx_port,const lacp_actor_partner_tlv_ref atlv)3464 ifbond_list_find_moved_port(bondport_ref rx_port,
3465 const lacp_actor_partner_tlv_ref atlv)
3466 {
3467 ifbond_ref bond;
3468 bondport_ref p;
3469 partner_state_ref ps;
3470 LAG_info_ref ps_li;
3471
3472 TAILQ_FOREACH(bond, &g_bond->ifbond_list, ifb_bond_list) {
3473 TAILQ_FOREACH(p, &bond->ifb_port_list, po_port_list) {
3474 if (rx_port == p) {
3475 /* no point in comparing against ourselves */
3476 continue;
3477 }
3478 if (p->po_receive_state != ReceiveState_PORT_DISABLED) {
3479 /* it's not clear that we should be checking this */
3480 continue;
3481 }
3482 ps = &p->po_partner_state;
3483 if (lacp_actor_partner_state_defaulted(ps->ps_state)) {
3484 continue;
3485 }
3486 ps_li = &ps->ps_lag_info;
3487 if (ps->ps_port == lacp_actor_partner_tlv_get_port(atlv)
3488 && bcmp(&ps_li->li_system, atlv->lap_system,
3489 sizeof(ps_li->li_system)) == 0) {
3490 if (if_bond_debug) {
3491 timestamp_printf("System " EA_FORMAT
3492 " Port 0x%x moved from %s to %s\n",
3493 EA_LIST(&ps_li->li_system), ps->ps_port,
3494 bondport_get_name(p),
3495 bondport_get_name(rx_port));
3496 }
3497 return p;
3498 }
3499 }
3500 }
3501 return NULL;
3502 }
3503
3504 /**
3505 ** LACP ifbond, LAG routines
3506 **/
3507
3508 static int
ifbond_selection(ifbond_ref bond)3509 ifbond_selection(ifbond_ref bond)
3510 {
3511 int all_ports_ready = 0;
3512 int active_media = 0;
3513 LAG_ref lag = NULL;
3514 int lag_changed = 0;
3515 bondport_ref p;
3516 int port_speed = 0;
3517
3518 lag = ifbond_find_best_LAG(bond, &active_media);
3519 if (lag != bond->ifb_active_lag) {
3520 if (bond->ifb_active_lag != NULL) {
3521 ifbond_deactivate_LAG(bond, bond->ifb_active_lag);
3522 bond->ifb_active_lag = NULL;
3523 }
3524 bond->ifb_active_lag = lag;
3525 if (lag != NULL) {
3526 ifbond_activate_LAG(bond, lag, active_media);
3527 }
3528 lag_changed = 1;
3529 } else if (lag != NULL) {
3530 if (lag->lag_active_media != active_media) {
3531 if (if_bond_debug) {
3532 timestamp_printf("LAG PORT SPEED CHANGED from %d to %d\n",
3533 link_speed(lag->lag_active_media),
3534 link_speed(active_media));
3535 }
3536 ifbond_deactivate_LAG(bond, lag);
3537 ifbond_activate_LAG(bond, lag, active_media);
3538 lag_changed = 1;
3539 }
3540 }
3541 if (lag != NULL) {
3542 port_speed = link_speed(active_media);
3543 all_ports_ready = ifbond_all_ports_ready(bond);
3544 }
3545 TAILQ_FOREACH(p, &bond->ifb_port_list, po_port_list) {
3546 if (lag != NULL && p->po_lag == lag
3547 && media_speed(&p->po_media_info) == port_speed
3548 && (p->po_mux_state == MuxState_DETACHED
3549 || p->po_selected == SelectedState_SELECTED
3550 || p->po_selected == SelectedState_STANDBY)
3551 && bondport_aggregatable(p)) {
3552 if (bond->ifb_max_active > 0) {
3553 if (lag->lag_selected_port_count < bond->ifb_max_active) {
3554 if (p->po_selected == SelectedState_STANDBY
3555 || p->po_selected == SelectedState_UNSELECTED) {
3556 bondport_set_selected(p, SelectedState_SELECTED);
3557 }
3558 } else if (p->po_selected == SelectedState_UNSELECTED) {
3559 bondport_set_selected(p, SelectedState_STANDBY);
3560 }
3561 } else {
3562 bondport_set_selected(p, SelectedState_SELECTED);
3563 }
3564 }
3565 if (bondport_flags_selected_changed(p)) {
3566 bondport_flags_clear_selected_changed(p);
3567 bondport_mux_machine(p, LAEventSelectedChange, NULL);
3568 }
3569 if (all_ports_ready
3570 && bondport_flags_ready(p)
3571 && p->po_mux_state == MuxState_WAITING) {
3572 bondport_mux_machine(p, LAEventReady, NULL);
3573 }
3574 bondport_transmit_machine(p, LAEventStart, NULL);
3575 }
3576 return lag_changed;
3577 }
3578
3579 static LAG_ref
ifbond_find_best_LAG(ifbond_ref bond,int * active_media)3580 ifbond_find_best_LAG(ifbond_ref bond, int * active_media)
3581 {
3582 int best_active = 0;
3583 LAG_ref best_lag = NULL;
3584 int best_count = 0;
3585 int best_speed = 0;
3586 LAG_ref lag;
3587
3588 if (bond->ifb_active_lag != NULL) {
3589 best_lag = bond->ifb_active_lag;
3590 best_count = LAG_get_aggregatable_port_count(best_lag, &best_active);
3591 if (bond->ifb_max_active > 0
3592 && best_count > bond->ifb_max_active) {
3593 best_count = bond->ifb_max_active;
3594 }
3595 best_speed = link_speed(best_active);
3596 }
3597 TAILQ_FOREACH(lag, &bond->ifb_lag_list, lag_list) {
3598 int active;
3599 int count;
3600 int speed;
3601
3602 if (lag == bond->ifb_active_lag) {
3603 /* we've already computed it */
3604 continue;
3605 }
3606 count = LAG_get_aggregatable_port_count(lag, &active);
3607 if (count == 0) {
3608 continue;
3609 }
3610 if (bond->ifb_max_active > 0
3611 && count > bond->ifb_max_active) {
3612 /* if there's a limit, don't count extra links */
3613 count = bond->ifb_max_active;
3614 }
3615 speed = link_speed(active);
3616 if ((count * speed) > (best_count * best_speed)) {
3617 best_count = count;
3618 best_speed = speed;
3619 best_active = active;
3620 best_lag = lag;
3621 }
3622 }
3623 if (best_count == 0) {
3624 return NULL;
3625 }
3626 *active_media = best_active;
3627 return best_lag;
3628 }
3629
3630 static void
ifbond_deactivate_LAG(__unused ifbond_ref bond,LAG_ref lag)3631 ifbond_deactivate_LAG(__unused ifbond_ref bond, LAG_ref lag)
3632 {
3633 bondport_ref p;
3634
3635 TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) {
3636 bondport_set_selected(p, SelectedState_UNSELECTED);
3637 }
3638 return;
3639 }
3640
3641 static void
ifbond_activate_LAG(ifbond_ref bond,LAG_ref lag,int active_media)3642 ifbond_activate_LAG(ifbond_ref bond, LAG_ref lag, int active_media)
3643 {
3644 int need = 0;
3645 bondport_ref p;
3646
3647 if (bond->ifb_max_active > 0) {
3648 need = bond->ifb_max_active;
3649 }
3650 lag->lag_active_media = active_media;
3651 TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) {
3652 if (bondport_aggregatable(p) == 0) {
3653 bondport_set_selected(p, SelectedState_UNSELECTED);
3654 } else if (media_speed(&p->po_media_info) != link_speed(active_media)) {
3655 bondport_set_selected(p, SelectedState_UNSELECTED);
3656 } else if (p->po_mux_state == MuxState_DETACHED) {
3657 if (bond->ifb_max_active > 0) {
3658 if (need > 0) {
3659 bondport_set_selected(p, SelectedState_SELECTED);
3660 need--;
3661 } else {
3662 bondport_set_selected(p, SelectedState_STANDBY);
3663 }
3664 } else {
3665 bondport_set_selected(p, SelectedState_SELECTED);
3666 }
3667 } else {
3668 bondport_set_selected(p, SelectedState_UNSELECTED);
3669 }
3670 }
3671 return;
3672 }
3673
3674 #if 0
3675 static void
3676 ifbond_set_max_active(ifbond_ref bond, int max_active)
3677 {
3678 LAG_ref lag = bond->ifb_active_lag;
3679
3680 bond->ifb_max_active = max_active;
3681 if (bond->ifb_max_active <= 0 || lag == NULL) {
3682 return;
3683 }
3684 if (lag->lag_selected_port_count > bond->ifb_max_active) {
3685 bondport_ref p;
3686 int remove_count;
3687
3688 remove_count = lag->lag_selected_port_count - bond->ifb_max_active;
3689 TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) {
3690 if (p->po_selected == SelectedState_SELECTED) {
3691 bondport_set_selected(p, SelectedState_UNSELECTED);
3692 remove_count--;
3693 if (remove_count == 0) {
3694 break;
3695 }
3696 }
3697 }
3698 }
3699 return;
3700 }
3701 #endif
3702
3703 static int
ifbond_all_ports_ready(ifbond_ref bond)3704 ifbond_all_ports_ready(ifbond_ref bond)
3705 {
3706 int ready = 0;
3707 bondport_ref p;
3708
3709 if (bond->ifb_active_lag == NULL) {
3710 return 0;
3711 }
3712 TAILQ_FOREACH(p, &bond->ifb_active_lag->lag_port_list, po_lag_port_list) {
3713 if (p->po_mux_state == MuxState_WAITING
3714 && p->po_selected == SelectedState_SELECTED) {
3715 if (bondport_flags_ready(p) == 0) {
3716 return 0;
3717 }
3718 }
3719 /* note that there was at least one ready port */
3720 ready = 1;
3721 }
3722 return ready;
3723 }
3724
3725 static int
ifbond_all_ports_attached(ifbond_ref bond,bondport_ref this_port)3726 ifbond_all_ports_attached(ifbond_ref bond, bondport_ref this_port)
3727 {
3728 bondport_ref p;
3729
3730 TAILQ_FOREACH(p, &bond->ifb_port_list, po_port_list) {
3731 if (this_port == p) {
3732 continue;
3733 }
3734 if (bondport_flags_mux_attached(p) == 0) {
3735 return 0;
3736 }
3737 }
3738 return 1;
3739 }
3740
3741 static LAG_ref
ifbond_get_LAG_matching_port(ifbond_ref bond,bondport_ref p)3742 ifbond_get_LAG_matching_port(ifbond_ref bond, bondport_ref p)
3743 {
3744 LAG_ref lag;
3745
3746 TAILQ_FOREACH(lag, &bond->ifb_lag_list, lag_list) {
3747 if (bcmp(&lag->lag_info, &p->po_partner_state.ps_lag_info,
3748 sizeof(lag->lag_info)) == 0) {
3749 return lag;
3750 }
3751 }
3752 return NULL;
3753 }
3754
3755 static int
LAG_get_aggregatable_port_count(LAG_ref lag,int * active_media)3756 LAG_get_aggregatable_port_count(LAG_ref lag, int * active_media)
3757 {
3758 int active;
3759 int count;
3760 bondport_ref p;
3761 int speed;
3762
3763 active = 0;
3764 count = 0;
3765 speed = 0;
3766 TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) {
3767 if (bondport_aggregatable(p)) {
3768 int this_speed;
3769
3770 this_speed = media_speed(&p->po_media_info);
3771 if (this_speed == 0) {
3772 continue;
3773 }
3774 if (this_speed > speed) {
3775 active = p->po_media_info.mi_active;
3776 speed = this_speed;
3777 count = 1;
3778 } else if (this_speed == speed) {
3779 count++;
3780 }
3781 }
3782 }
3783 *active_media = active;
3784 return count;
3785 }
3786
3787
3788 /**
3789 ** LACP bondport routines
3790 **/
3791 static void
bondport_link_status_changed(bondport_ref p)3792 bondport_link_status_changed(bondport_ref p)
3793 {
3794 ifbond_ref bond = p->po_bond;
3795
3796 if (if_bond_debug) {
3797 if (media_active(&p->po_media_info)) {
3798 const char * duplex_string;
3799
3800 if (media_full_duplex(&p->po_media_info)) {
3801 duplex_string = "full";
3802 } else if (media_type_unknown(&p->po_media_info)) {
3803 duplex_string = "unknown";
3804 } else {
3805 duplex_string = "half";
3806 }
3807 timestamp_printf("[%s] Link UP %d Mbit/s %s duplex\n",
3808 bondport_get_name(p),
3809 media_speed(&p->po_media_info),
3810 duplex_string);
3811 } else {
3812 timestamp_printf("[%s] Link DOWN\n",
3813 bondport_get_name(p));
3814 }
3815 }
3816 if (bond->ifb_mode == IF_BOND_MODE_LACP) {
3817 if (media_active(&p->po_media_info)
3818 && bond->ifb_active_lag != NULL
3819 && p->po_lag == bond->ifb_active_lag
3820 && p->po_selected != SelectedState_UNSELECTED) {
3821 if (media_speed(&p->po_media_info) != p->po_lag->lag_active_media) {
3822 if (if_bond_debug) {
3823 timestamp_printf("[%s] Port speed %d differs from LAG %d\n",
3824 bondport_get_name(p),
3825 media_speed(&p->po_media_info),
3826 link_speed(p->po_lag->lag_active_media));
3827 }
3828 bondport_set_selected(p, SelectedState_UNSELECTED);
3829 }
3830 }
3831 bondport_receive_machine(p, LAEventMediaChange, NULL);
3832 bondport_mux_machine(p, LAEventMediaChange, NULL);
3833 bondport_periodic_transmit_machine(p, LAEventMediaChange, NULL);
3834 } else {
3835 if (media_active(&p->po_media_info)) {
3836 bondport_enable_distributing(p);
3837 } else {
3838 bondport_disable_distributing(p);
3839 }
3840 }
3841 return;
3842 }
3843
3844 static int
bondport_aggregatable(bondport_ref p)3845 bondport_aggregatable(bondport_ref p)
3846 {
3847 partner_state_ref ps = &p->po_partner_state;
3848
3849 if (lacp_actor_partner_state_aggregatable(p->po_actor_state) == 0
3850 || lacp_actor_partner_state_aggregatable(ps->ps_state) == 0) {
3851 /* we and/or our partner are individual */
3852 return 0;
3853 }
3854 if (p->po_lag == NULL) {
3855 return 0;
3856 }
3857 switch (p->po_receive_state) {
3858 default:
3859 if (if_bond_debug) {
3860 timestamp_printf("[%s] Port is not selectable\n",
3861 bondport_get_name(p));
3862 }
3863 return 0;
3864 case ReceiveState_CURRENT:
3865 case ReceiveState_EXPIRED:
3866 break;
3867 }
3868 return 1;
3869 }
3870
3871 static int
bondport_matches_LAG(bondport_ref p,LAG_ref lag)3872 bondport_matches_LAG(bondport_ref p, LAG_ref lag)
3873 {
3874 LAG_info_ref lag_li;
3875 partner_state_ref ps;
3876 LAG_info_ref ps_li;
3877
3878 ps = &p->po_partner_state;
3879 ps_li = &ps->ps_lag_info;
3880 lag_li = &lag->lag_info;
3881 if (ps_li->li_system_priority == lag_li->li_system_priority
3882 && ps_li->li_key == lag_li->li_key
3883 && (bcmp(&ps_li->li_system, &lag_li->li_system,
3884 sizeof(lag_li->li_system))
3885 == 0)) {
3886 return 1;
3887 }
3888 return 0;
3889 }
3890
3891 static int
bondport_remove_from_LAG(bondport_ref p)3892 bondport_remove_from_LAG(bondport_ref p)
3893 {
3894 int active_lag = 0;
3895 ifbond_ref bond = p->po_bond;
3896 LAG_ref lag = p->po_lag;
3897
3898 if (lag == NULL) {
3899 return 0;
3900 }
3901 TAILQ_REMOVE(&lag->lag_port_list, p, po_lag_port_list);
3902 if (if_bond_debug) {
3903 timestamp_printf("[%s] Removed from LAG (0x%04x," EA_FORMAT
3904 ",0x%04x)\n",
3905 bondport_get_name(p),
3906 lag->lag_info.li_system_priority,
3907 EA_LIST(&lag->lag_info.li_system),
3908 lag->lag_info.li_key);
3909 }
3910 p->po_lag = NULL;
3911 lag->lag_port_count--;
3912 if (lag->lag_port_count > 0) {
3913 return bond->ifb_active_lag == lag;
3914 }
3915 if (if_bond_debug) {
3916 timestamp_printf("Key 0x%04x: LAG Released (%04x," EA_FORMAT
3917 ",0x%04x)\n",
3918 bond->ifb_key,
3919 lag->lag_info.li_system_priority,
3920 EA_LIST(&lag->lag_info.li_system),
3921 lag->lag_info.li_key);
3922 }
3923 TAILQ_REMOVE(&bond->ifb_lag_list, lag, lag_list);
3924 if (bond->ifb_active_lag == lag) {
3925 bond->ifb_active_lag = NULL;
3926 active_lag = 1;
3927 }
3928 kfree_type(struct LAG_s, lag);
3929 return active_lag;
3930 }
3931
3932 static void
bondport_add_to_LAG(bondport_ref p,LAG_ref lag)3933 bondport_add_to_LAG(bondport_ref p, LAG_ref lag)
3934 {
3935 TAILQ_INSERT_TAIL(&lag->lag_port_list, p, po_lag_port_list);
3936 p->po_lag = lag;
3937 lag->lag_port_count++;
3938 if (if_bond_debug) {
3939 timestamp_printf("[%s] Added to LAG (0x%04x," EA_FORMAT "0x%04x)\n",
3940 bondport_get_name(p),
3941 lag->lag_info.li_system_priority,
3942 EA_LIST(&lag->lag_info.li_system),
3943 lag->lag_info.li_key);
3944 }
3945 return;
3946 }
3947
3948 static void
bondport_assign_to_LAG(bondport_ref p)3949 bondport_assign_to_LAG(bondport_ref p)
3950 {
3951 ifbond_ref bond = p->po_bond;
3952 LAG_ref lag;
3953
3954 if (lacp_actor_partner_state_defaulted(p->po_actor_state)) {
3955 bondport_remove_from_LAG(p);
3956 return;
3957 }
3958 lag = p->po_lag;
3959 if (lag != NULL) {
3960 if (bondport_matches_LAG(p, lag)) {
3961 /* still OK */
3962 return;
3963 }
3964 bondport_remove_from_LAG(p);
3965 }
3966 lag = ifbond_get_LAG_matching_port(bond, p);
3967 if (lag != NULL) {
3968 bondport_add_to_LAG(p, lag);
3969 return;
3970 }
3971 lag = kalloc_type(struct LAG_s, Z_WAITOK);
3972 TAILQ_INIT(&lag->lag_port_list);
3973 lag->lag_port_count = 0;
3974 lag->lag_selected_port_count = 0;
3975 lag->lag_info = p->po_partner_state.ps_lag_info;
3976 TAILQ_INSERT_TAIL(&bond->ifb_lag_list, lag, lag_list);
3977 if (if_bond_debug) {
3978 timestamp_printf("Key 0x%04x: LAG Created (0x%04x," EA_FORMAT
3979 ",0x%04x)\n",
3980 bond->ifb_key,
3981 lag->lag_info.li_system_priority,
3982 EA_LIST(&lag->lag_info.li_system),
3983 lag->lag_info.li_key);
3984 }
3985 bondport_add_to_LAG(p, lag);
3986 return;
3987 }
3988
3989 static void
bondport_receive_lacpdu(bondport_ref p,lacpdu_ref in_lacpdu_p)3990 bondport_receive_lacpdu(bondport_ref p, lacpdu_ref in_lacpdu_p)
3991 {
3992 bondport_ref moved_port;
3993
3994 moved_port
3995 = ifbond_list_find_moved_port(p, (const lacp_actor_partner_tlv_ref)
3996 &in_lacpdu_p->la_actor_tlv);
3997 if (moved_port != NULL) {
3998 bondport_receive_machine(moved_port, LAEventPortMoved, NULL);
3999 }
4000 bondport_receive_machine(p, LAEventPacket, in_lacpdu_p);
4001 bondport_mux_machine(p, LAEventPacket, in_lacpdu_p);
4002 bondport_periodic_transmit_machine(p, LAEventPacket, in_lacpdu_p);
4003 return;
4004 }
4005
4006 static void
bondport_set_selected(bondport_ref p,SelectedState s)4007 bondport_set_selected(bondport_ref p, SelectedState s)
4008 {
4009 if (s != p->po_selected) {
4010 ifbond_ref bond = p->po_bond;
4011 LAG_ref lag = p->po_lag;
4012
4013 bondport_flags_set_selected_changed(p);
4014 if (lag != NULL && bond->ifb_active_lag == lag) {
4015 if (p->po_selected == SelectedState_SELECTED) {
4016 lag->lag_selected_port_count--;
4017 } else if (s == SelectedState_SELECTED) {
4018 lag->lag_selected_port_count++;
4019 }
4020 if (if_bond_debug) {
4021 timestamp_printf("[%s] SetSelected: %s (was %s)\n",
4022 bondport_get_name(p),
4023 SelectedStateString(s),
4024 SelectedStateString(p->po_selected));
4025 }
4026 }
4027 }
4028 p->po_selected = s;
4029 return;
4030 }
4031
4032 /**
4033 ** Receive machine
4034 **/
4035
4036 static void
bondport_UpdateDefaultSelected(bondport_ref p)4037 bondport_UpdateDefaultSelected(bondport_ref p)
4038 {
4039 bondport_set_selected(p, SelectedState_UNSELECTED);
4040 return;
4041 }
4042
4043 static void
bondport_RecordDefault(bondport_ref p)4044 bondport_RecordDefault(bondport_ref p)
4045 {
4046 bzero(&p->po_partner_state, sizeof(p->po_partner_state));
4047 p->po_actor_state
4048 = lacp_actor_partner_state_set_defaulted(p->po_actor_state);
4049 bondport_assign_to_LAG(p);
4050 return;
4051 }
4052
4053 static void
bondport_UpdateSelected(bondport_ref p,lacpdu_ref lacpdu_p)4054 bondport_UpdateSelected(bondport_ref p, lacpdu_ref lacpdu_p)
4055 {
4056 lacp_actor_partner_tlv_ref actor;
4057 partner_state_ref ps;
4058 LAG_info_ref ps_li;
4059
4060 /* compare the PDU's Actor information to our Partner state */
4061 actor = (lacp_actor_partner_tlv_ref)lacpdu_p->la_actor_tlv;
4062 ps = &p->po_partner_state;
4063 ps_li = &ps->ps_lag_info;
4064 if (lacp_actor_partner_tlv_get_port(actor) != ps->ps_port
4065 || (lacp_actor_partner_tlv_get_port_priority(actor)
4066 != ps->ps_port_priority)
4067 || bcmp(actor->lap_system, &ps_li->li_system, sizeof(ps_li->li_system))
4068 || (lacp_actor_partner_tlv_get_system_priority(actor)
4069 != ps_li->li_system_priority)
4070 || (lacp_actor_partner_tlv_get_key(actor) != ps_li->li_key)
4071 || (lacp_actor_partner_state_aggregatable(actor->lap_state)
4072 != lacp_actor_partner_state_aggregatable(ps->ps_state))) {
4073 bondport_set_selected(p, SelectedState_UNSELECTED);
4074 if (if_bond_debug) {
4075 timestamp_printf("[%s] updateSelected UNSELECTED\n",
4076 bondport_get_name(p));
4077 }
4078 }
4079 return;
4080 }
4081
4082 static void
bondport_RecordPDU(bondport_ref p,lacpdu_ref lacpdu_p)4083 bondport_RecordPDU(bondport_ref p, lacpdu_ref lacpdu_p)
4084 {
4085 lacp_actor_partner_tlv_ref actor;
4086 ifbond_ref bond = p->po_bond;
4087 int lacp_maintain = 0;
4088 partner_state_ref ps;
4089 lacp_actor_partner_tlv_ref partner;
4090 LAG_info_ref ps_li;
4091
4092 /* copy the PDU's Actor information into our Partner state */
4093 actor = (lacp_actor_partner_tlv_ref)lacpdu_p->la_actor_tlv;
4094 ps = &p->po_partner_state;
4095 ps_li = &ps->ps_lag_info;
4096 ps->ps_port = lacp_actor_partner_tlv_get_port(actor);
4097 ps->ps_port_priority = lacp_actor_partner_tlv_get_port_priority(actor);
4098 ps_li->li_system = *((lacp_system_ref)actor->lap_system);
4099 ps_li->li_system_priority
4100 = lacp_actor_partner_tlv_get_system_priority(actor);
4101 ps_li->li_key = lacp_actor_partner_tlv_get_key(actor);
4102 ps->ps_state = lacp_actor_partner_state_set_out_of_sync(actor->lap_state);
4103 p->po_actor_state
4104 = lacp_actor_partner_state_set_not_defaulted(p->po_actor_state);
4105
4106 /* compare the PDU's Partner information to our own information */
4107 partner = (lacp_actor_partner_tlv_ref)lacpdu_p->la_partner_tlv;
4108
4109 if (lacp_actor_partner_state_active_lacp(ps->ps_state)
4110 || (lacp_actor_partner_state_active_lacp(p->po_actor_state)
4111 && lacp_actor_partner_state_active_lacp(partner->lap_state))) {
4112 if (if_bond_debug) {
4113 timestamp_printf("[%s] recordPDU: LACP will maintain\n",
4114 bondport_get_name(p));
4115 }
4116 lacp_maintain = 1;
4117 }
4118 if ((lacp_actor_partner_tlv_get_port(partner)
4119 == bondport_get_index(p))
4120 && lacp_actor_partner_tlv_get_port_priority(partner) == p->po_priority
4121 && bcmp(partner->lap_system, &g_bond->system,
4122 sizeof(g_bond->system)) == 0
4123 && (lacp_actor_partner_tlv_get_system_priority(partner)
4124 == g_bond->system_priority)
4125 && lacp_actor_partner_tlv_get_key(partner) == bond->ifb_key
4126 && (lacp_actor_partner_state_aggregatable(partner->lap_state)
4127 == lacp_actor_partner_state_aggregatable(p->po_actor_state))
4128 && lacp_actor_partner_state_in_sync(actor->lap_state)
4129 && lacp_maintain) {
4130 ps->ps_state = lacp_actor_partner_state_set_in_sync(ps->ps_state);
4131 if (if_bond_debug) {
4132 timestamp_printf("[%s] recordPDU: LACP partner in sync\n",
4133 bondport_get_name(p));
4134 }
4135 } else if (lacp_actor_partner_state_aggregatable(actor->lap_state) == 0
4136 && lacp_actor_partner_state_in_sync(actor->lap_state)
4137 && lacp_maintain) {
4138 ps->ps_state = lacp_actor_partner_state_set_in_sync(ps->ps_state);
4139 if (if_bond_debug) {
4140 timestamp_printf("[%s] recordPDU: LACP partner in sync (ind)\n",
4141 bondport_get_name(p));
4142 }
4143 }
4144 bondport_assign_to_LAG(p);
4145 return;
4146 }
4147
4148 static __inline__ lacp_actor_partner_state
updateNTTBits(lacp_actor_partner_state s)4149 updateNTTBits(lacp_actor_partner_state s)
4150 {
4151 return s & (LACP_ACTOR_PARTNER_STATE_LACP_ACTIVITY
4152 | LACP_ACTOR_PARTNER_STATE_LACP_TIMEOUT
4153 | LACP_ACTOR_PARTNER_STATE_AGGREGATION
4154 | LACP_ACTOR_PARTNER_STATE_SYNCHRONIZATION);
4155 }
4156
4157 static void
bondport_UpdateNTT(bondport_ref p,lacpdu_ref lacpdu_p)4158 bondport_UpdateNTT(bondport_ref p, lacpdu_ref lacpdu_p)
4159 {
4160 ifbond_ref bond = p->po_bond;
4161 lacp_actor_partner_tlv_ref partner;
4162
4163 /* compare the PDU's Actor information to our Partner state */
4164 partner = (lacp_actor_partner_tlv_ref)lacpdu_p->la_partner_tlv;
4165 if ((lacp_actor_partner_tlv_get_port(partner) != bondport_get_index(p))
4166 || lacp_actor_partner_tlv_get_port_priority(partner) != p->po_priority
4167 || bcmp(partner->lap_system, &g_bond->system, sizeof(g_bond->system))
4168 || (lacp_actor_partner_tlv_get_system_priority(partner)
4169 != g_bond->system_priority)
4170 || lacp_actor_partner_tlv_get_key(partner) != bond->ifb_key
4171 || (updateNTTBits(partner->lap_state)
4172 != updateNTTBits(p->po_actor_state))) {
4173 bondport_flags_set_ntt(p);
4174 if (if_bond_debug) {
4175 timestamp_printf("[%s] updateNTT: Need To Transmit\n",
4176 bondport_get_name(p));
4177 }
4178 }
4179 return;
4180 }
4181
4182 static void
bondport_AttachMuxToAggregator(bondport_ref p)4183 bondport_AttachMuxToAggregator(bondport_ref p)
4184 {
4185 if (bondport_flags_mux_attached(p) == 0) {
4186 if (if_bond_debug) {
4187 timestamp_printf("[%s] Attached Mux To Aggregator\n",
4188 bondport_get_name(p));
4189 }
4190 bondport_flags_set_mux_attached(p);
4191 }
4192 return;
4193 }
4194
4195 static void
bondport_DetachMuxFromAggregator(bondport_ref p)4196 bondport_DetachMuxFromAggregator(bondport_ref p)
4197 {
4198 if (bondport_flags_mux_attached(p)) {
4199 if (if_bond_debug) {
4200 timestamp_printf("[%s] Detached Mux From Aggregator\n",
4201 bondport_get_name(p));
4202 }
4203 bondport_flags_clear_mux_attached(p);
4204 }
4205 return;
4206 }
4207
4208 static void
bondport_enable_distributing(bondport_ref p)4209 bondport_enable_distributing(bondport_ref p)
4210 {
4211 if (bondport_flags_distributing(p) == 0) {
4212 ifbond_ref bond = p->po_bond;
4213
4214 bond->ifb_distributing_array[bond->ifb_distributing_count++] = p;
4215 if (if_bond_debug) {
4216 timestamp_printf("[%s] Distribution Enabled\n",
4217 bondport_get_name(p));
4218 }
4219 bondport_flags_set_distributing(p);
4220 }
4221 return;
4222 }
4223
4224 static void
bondport_disable_distributing(bondport_ref p)4225 bondport_disable_distributing(bondport_ref p)
4226 {
4227 if (bondport_flags_distributing(p)) {
4228 bondport_ref * array;
4229 ifbond_ref bond;
4230 int count;
4231 int i;
4232
4233 bond = p->po_bond;
4234 array = bond->ifb_distributing_array;
4235 count = bond->ifb_distributing_count;
4236 for (i = 0; i < count; i++) {
4237 if (array[i] == p) {
4238 int j;
4239
4240 for (j = i; j < (count - 1); j++) {
4241 array[j] = array[j + 1];
4242 }
4243 break;
4244 }
4245 }
4246 bond->ifb_distributing_count--;
4247 if (if_bond_debug) {
4248 timestamp_printf("[%s] Distribution Disabled\n",
4249 bondport_get_name(p));
4250 }
4251 bondport_flags_clear_distributing(p);
4252 }
4253 return;
4254 }
4255
4256 /**
4257 ** Receive machine functions
4258 **/
4259 static void
4260 bondport_receive_machine_initialize(bondport_ref p, LAEvent event,
4261 void * event_data);
4262 static void
4263 bondport_receive_machine_port_disabled(bondport_ref p, LAEvent event,
4264 void * event_data);
4265 static void
4266 bondport_receive_machine_expired(bondport_ref p, LAEvent event,
4267 void * event_data);
4268 static void
4269 bondport_receive_machine_lacp_disabled(bondport_ref p, LAEvent event,
4270 void * event_data);
4271 static void
4272 bondport_receive_machine_defaulted(bondport_ref p, LAEvent event,
4273 void * event_data);
4274 static void
4275 bondport_receive_machine_current(bondport_ref p, LAEvent event,
4276 void * event_data);
4277
4278 static void
bondport_receive_machine_event(bondport_ref p,LAEvent event,void * event_data)4279 bondport_receive_machine_event(bondport_ref p, LAEvent event,
4280 void * event_data)
4281 {
4282 switch (p->po_receive_state) {
4283 case ReceiveState_none:
4284 bondport_receive_machine_initialize(p, LAEventStart, NULL);
4285 break;
4286 case ReceiveState_INITIALIZE:
4287 bondport_receive_machine_initialize(p, event, event_data);
4288 break;
4289 case ReceiveState_PORT_DISABLED:
4290 bondport_receive_machine_port_disabled(p, event, event_data);
4291 break;
4292 case ReceiveState_EXPIRED:
4293 bondport_receive_machine_expired(p, event, event_data);
4294 break;
4295 case ReceiveState_LACP_DISABLED:
4296 bondport_receive_machine_lacp_disabled(p, event, event_data);
4297 break;
4298 case ReceiveState_DEFAULTED:
4299 bondport_receive_machine_defaulted(p, event, event_data);
4300 break;
4301 case ReceiveState_CURRENT:
4302 bondport_receive_machine_current(p, event, event_data);
4303 break;
4304 default:
4305 break;
4306 }
4307 return;
4308 }
4309
4310 static void
bondport_receive_machine(bondport_ref p,LAEvent event,void * event_data)4311 bondport_receive_machine(bondport_ref p, LAEvent event,
4312 void * event_data)
4313 {
4314 switch (event) {
4315 case LAEventPacket:
4316 if (p->po_receive_state != ReceiveState_LACP_DISABLED) {
4317 bondport_receive_machine_current(p, event, event_data);
4318 }
4319 break;
4320 case LAEventMediaChange:
4321 if (media_active(&p->po_media_info)) {
4322 switch (p->po_receive_state) {
4323 case ReceiveState_PORT_DISABLED:
4324 case ReceiveState_LACP_DISABLED:
4325 bondport_receive_machine_port_disabled(p, LAEventMediaChange, NULL);
4326 break;
4327 default:
4328 break;
4329 }
4330 } else {
4331 bondport_receive_machine_port_disabled(p, LAEventStart, NULL);
4332 }
4333 break;
4334 default:
4335 bondport_receive_machine_event(p, event, event_data);
4336 break;
4337 }
4338 return;
4339 }
4340
4341 static void
bondport_receive_machine_initialize(bondport_ref p,LAEvent event,__unused void * event_data)4342 bondport_receive_machine_initialize(bondport_ref p, LAEvent event,
4343 __unused void * event_data)
4344 {
4345 switch (event) {
4346 case LAEventStart:
4347 devtimer_cancel(p->po_current_while_timer);
4348 if (if_bond_debug) {
4349 timestamp_printf("[%s] Receive INITIALIZE\n",
4350 bondport_get_name(p));
4351 }
4352 p->po_receive_state = ReceiveState_INITIALIZE;
4353 bondport_set_selected(p, SelectedState_UNSELECTED);
4354 bondport_RecordDefault(p);
4355 p->po_actor_state
4356 = lacp_actor_partner_state_set_not_expired(p->po_actor_state);
4357 bondport_receive_machine_port_disabled(p, LAEventStart, NULL);
4358 break;
4359 default:
4360 break;
4361 }
4362 return;
4363 }
4364
4365 static void
bondport_receive_machine_port_disabled(bondport_ref p,LAEvent event,__unused void * event_data)4366 bondport_receive_machine_port_disabled(bondport_ref p, LAEvent event,
4367 __unused void * event_data)
4368 {
4369 partner_state_ref ps;
4370
4371 switch (event) {
4372 case LAEventStart:
4373 devtimer_cancel(p->po_current_while_timer);
4374 if (if_bond_debug) {
4375 timestamp_printf("[%s] Receive PORT_DISABLED\n",
4376 bondport_get_name(p));
4377 }
4378 p->po_receive_state = ReceiveState_PORT_DISABLED;
4379 ps = &p->po_partner_state;
4380 ps->ps_state = lacp_actor_partner_state_set_out_of_sync(ps->ps_state);
4381 OS_FALLTHROUGH;
4382 case LAEventMediaChange:
4383 if (media_active(&p->po_media_info)) {
4384 if (media_ok(&p->po_media_info)) {
4385 bondport_receive_machine_expired(p, LAEventStart, NULL);
4386 } else {
4387 bondport_receive_machine_lacp_disabled(p, LAEventStart, NULL);
4388 }
4389 } else if (p->po_selected == SelectedState_SELECTED) {
4390 struct timeval tv;
4391
4392 if (if_bond_debug) {
4393 timestamp_printf("[%s] Receive PORT_DISABLED: "
4394 "link timer started\n",
4395 bondport_get_name(p));
4396 }
4397 tv.tv_sec = 1;
4398 tv.tv_usec = 0;
4399 devtimer_set_relative(p->po_current_while_timer, tv,
4400 (devtimer_timeout_func)(void (*)(void))
4401 bondport_receive_machine_port_disabled,
4402 (void *)LAEventTimeout, NULL);
4403 } else if (p->po_selected == SelectedState_STANDBY) {
4404 bondport_set_selected(p, SelectedState_UNSELECTED);
4405 }
4406 break;
4407 case LAEventTimeout:
4408 if (p->po_selected == SelectedState_SELECTED) {
4409 if (if_bond_debug) {
4410 timestamp_printf("[%s] Receive PORT_DISABLED: "
4411 "link timer completed, marking UNSELECTED\n",
4412 bondport_get_name(p));
4413 }
4414 bondport_set_selected(p, SelectedState_UNSELECTED);
4415 }
4416 break;
4417 case LAEventPortMoved:
4418 bondport_receive_machine_initialize(p, LAEventStart, NULL);
4419 break;
4420 default:
4421 break;
4422 }
4423 return;
4424 }
4425
4426 static void
bondport_receive_machine_expired(bondport_ref p,LAEvent event,__unused void * event_data)4427 bondport_receive_machine_expired(bondport_ref p, LAEvent event,
4428 __unused void * event_data)
4429 {
4430 lacp_actor_partner_state s;
4431 struct timeval tv;
4432
4433 switch (event) {
4434 case LAEventStart:
4435 devtimer_cancel(p->po_current_while_timer);
4436 if (if_bond_debug) {
4437 timestamp_printf("[%s] Receive EXPIRED\n",
4438 bondport_get_name(p));
4439 }
4440 p->po_receive_state = ReceiveState_EXPIRED;
4441 s = p->po_partner_state.ps_state;
4442 s = lacp_actor_partner_state_set_out_of_sync(s);
4443 s = lacp_actor_partner_state_set_short_timeout(s);
4444 p->po_partner_state.ps_state = s;
4445 p->po_actor_state
4446 = lacp_actor_partner_state_set_expired(p->po_actor_state);
4447 /* start current_while timer */
4448 tv.tv_sec = LACP_SHORT_TIMEOUT_TIME;
4449 tv.tv_usec = 0;
4450 devtimer_set_relative(p->po_current_while_timer, tv,
4451 (devtimer_timeout_func)(void (*)(void))
4452 bondport_receive_machine_expired,
4453 (void *)LAEventTimeout, NULL);
4454
4455 break;
4456 case LAEventTimeout:
4457 bondport_receive_machine_defaulted(p, LAEventStart, NULL);
4458 break;
4459 default:
4460 break;
4461 }
4462 return;
4463 }
4464
4465 static void
bondport_receive_machine_lacp_disabled(bondport_ref p,LAEvent event,__unused void * event_data)4466 bondport_receive_machine_lacp_disabled(bondport_ref p, LAEvent event,
4467 __unused void * event_data)
4468 {
4469 partner_state_ref ps;
4470 switch (event) {
4471 case LAEventStart:
4472 devtimer_cancel(p->po_current_while_timer);
4473 if (if_bond_debug) {
4474 timestamp_printf("[%s] Receive LACP_DISABLED\n",
4475 bondport_get_name(p));
4476 }
4477 p->po_receive_state = ReceiveState_LACP_DISABLED;
4478 bondport_set_selected(p, SelectedState_UNSELECTED);
4479 bondport_RecordDefault(p);
4480 ps = &p->po_partner_state;
4481 ps->ps_state = lacp_actor_partner_state_set_individual(ps->ps_state);
4482 p->po_actor_state
4483 = lacp_actor_partner_state_set_not_expired(p->po_actor_state);
4484 break;
4485 default:
4486 break;
4487 }
4488 return;
4489 }
4490
4491 static void
bondport_receive_machine_defaulted(bondport_ref p,LAEvent event,__unused void * event_data)4492 bondport_receive_machine_defaulted(bondport_ref p, LAEvent event,
4493 __unused void * event_data)
4494 {
4495 switch (event) {
4496 case LAEventStart:
4497 devtimer_cancel(p->po_current_while_timer);
4498 if (if_bond_debug) {
4499 timestamp_printf("[%s] Receive DEFAULTED\n",
4500 bondport_get_name(p));
4501 }
4502 p->po_receive_state = ReceiveState_DEFAULTED;
4503 bondport_UpdateDefaultSelected(p);
4504 bondport_RecordDefault(p);
4505 p->po_actor_state
4506 = lacp_actor_partner_state_set_not_expired(p->po_actor_state);
4507 break;
4508 default:
4509 break;
4510 }
4511 return;
4512 }
4513
4514 static void
bondport_receive_machine_current(bondport_ref p,LAEvent event,void * event_data)4515 bondport_receive_machine_current(bondport_ref p, LAEvent event,
4516 void * event_data)
4517 {
4518 partner_state_ref ps;
4519 struct timeval tv;
4520
4521 switch (event) {
4522 case LAEventPacket:
4523 devtimer_cancel(p->po_current_while_timer);
4524 if (if_bond_debug) {
4525 timestamp_printf("[%s] Receive CURRENT\n",
4526 bondport_get_name(p));
4527 }
4528 p->po_receive_state = ReceiveState_CURRENT;
4529 bondport_UpdateSelected(p, event_data);
4530 bondport_UpdateNTT(p, event_data);
4531 bondport_RecordPDU(p, event_data);
4532 p->po_actor_state
4533 = lacp_actor_partner_state_set_not_expired(p->po_actor_state);
4534 bondport_assign_to_LAG(p);
4535 /* start current_while timer */
4536 ps = &p->po_partner_state;
4537 if (lacp_actor_partner_state_short_timeout(ps->ps_state)) {
4538 tv.tv_sec = LACP_SHORT_TIMEOUT_TIME;
4539 } else {
4540 tv.tv_sec = LACP_LONG_TIMEOUT_TIME;
4541 }
4542 tv.tv_usec = 0;
4543 devtimer_set_relative(p->po_current_while_timer, tv,
4544 (devtimer_timeout_func)(void (*)(void))
4545 bondport_receive_machine_current,
4546 (void *)LAEventTimeout, NULL);
4547 break;
4548 case LAEventTimeout:
4549 bondport_receive_machine_expired(p, LAEventStart, NULL);
4550 break;
4551 default:
4552 break;
4553 }
4554 return;
4555 }
4556
4557 /**
4558 ** Periodic Transmission machine
4559 **/
4560
4561 static void
bondport_periodic_transmit_machine(bondport_ref p,LAEvent event,__unused void * event_data)4562 bondport_periodic_transmit_machine(bondport_ref p, LAEvent event,
4563 __unused void * event_data)
4564 {
4565 int interval;
4566 partner_state_ref ps;
4567 struct timeval tv;
4568
4569 switch (event) {
4570 case LAEventStart:
4571 if (if_bond_debug) {
4572 timestamp_printf("[%s] periodic_transmit Start\n",
4573 bondport_get_name(p));
4574 }
4575 OS_FALLTHROUGH;
4576 case LAEventMediaChange:
4577 devtimer_cancel(p->po_periodic_timer);
4578 p->po_periodic_interval = 0;
4579 if (media_active(&p->po_media_info) == 0
4580 || media_ok(&p->po_media_info) == 0) {
4581 break;
4582 }
4583 OS_FALLTHROUGH;
4584 case LAEventPacket:
4585 /* Neither Partner nor Actor are LACP Active, no periodic tx */
4586 ps = &p->po_partner_state;
4587 if (lacp_actor_partner_state_active_lacp(p->po_actor_state) == 0
4588 && (lacp_actor_partner_state_active_lacp(ps->ps_state)
4589 == 0)) {
4590 devtimer_cancel(p->po_periodic_timer);
4591 p->po_periodic_interval = 0;
4592 break;
4593 }
4594 if (lacp_actor_partner_state_short_timeout(ps->ps_state)) {
4595 interval = LACP_FAST_PERIODIC_TIME;
4596 } else {
4597 interval = LACP_SLOW_PERIODIC_TIME;
4598 }
4599 if (p->po_periodic_interval != interval) {
4600 if (interval == LACP_FAST_PERIODIC_TIME
4601 && p->po_periodic_interval == LACP_SLOW_PERIODIC_TIME) {
4602 if (if_bond_debug) {
4603 timestamp_printf("[%s] periodic_transmit:"
4604 " Need To Transmit\n",
4605 bondport_get_name(p));
4606 }
4607 bondport_flags_set_ntt(p);
4608 }
4609 p->po_periodic_interval = interval;
4610 tv.tv_usec = 0;
4611 tv.tv_sec = interval;
4612 devtimer_set_relative(p->po_periodic_timer, tv,
4613 (devtimer_timeout_func)(void (*)(void))
4614 bondport_periodic_transmit_machine,
4615 (void *)LAEventTimeout, NULL);
4616 if (if_bond_debug) {
4617 timestamp_printf("[%s] Periodic Transmission Timer: %d secs\n",
4618 bondport_get_name(p),
4619 p->po_periodic_interval);
4620 }
4621 }
4622 break;
4623 case LAEventTimeout:
4624 bondport_flags_set_ntt(p);
4625 tv.tv_sec = p->po_periodic_interval;
4626 tv.tv_usec = 0;
4627 devtimer_set_relative(p->po_periodic_timer, tv, (devtimer_timeout_func)(void (*)(void))
4628 bondport_periodic_transmit_machine,
4629 (void *)LAEventTimeout, NULL);
4630 if (if_bond_debug > 1) {
4631 timestamp_printf("[%s] Periodic Transmission Timer: %d secs\n",
4632 bondport_get_name(p), p->po_periodic_interval);
4633 }
4634 break;
4635 default:
4636 break;
4637 }
4638 return;
4639 }
4640
4641 /**
4642 ** Transmit machine
4643 **/
4644 static int
bondport_can_transmit(bondport_ref p,int32_t current_secs,__darwin_time_t * next_secs)4645 bondport_can_transmit(bondport_ref p, int32_t current_secs,
4646 __darwin_time_t * next_secs)
4647 {
4648 if (p->po_last_transmit_secs != current_secs) {
4649 p->po_last_transmit_secs = current_secs;
4650 p->po_n_transmit = 0;
4651 }
4652 if (p->po_n_transmit < LACP_PACKET_RATE) {
4653 p->po_n_transmit++;
4654 return 1;
4655 }
4656 if (next_secs != NULL) {
4657 *next_secs = current_secs + 1;
4658 }
4659 return 0;
4660 }
4661
4662 static void
bondport_transmit_machine(bondport_ref p,LAEvent event,void * event_data)4663 bondport_transmit_machine(bondport_ref p, LAEvent event,
4664 void * event_data)
4665 {
4666 lacp_actor_partner_tlv_ref aptlv;
4667 lacp_collector_tlv_ref ctlv;
4668 struct timeval next_tick_time = {.tv_sec = 0, .tv_usec = 0};
4669 lacpdu_ref out_lacpdu_p;
4670 packet_buffer_ref pkt;
4671 partner_state_ref ps;
4672 LAG_info_ref ps_li;
4673
4674 switch (event) {
4675 case LAEventTimeout:
4676 case LAEventStart:
4677 if (p->po_periodic_interval == 0 || bondport_flags_ntt(p) == 0) {
4678 break;
4679 }
4680 if (event_data == TRANSMIT_MACHINE_TX_IMMEDIATE) {
4681 /* we're going away, transmit the packet no matter what */
4682 } else if (bondport_can_transmit(p, devtimer_current_secs(),
4683 &next_tick_time.tv_sec) == 0) {
4684 if (devtimer_enabled(p->po_transmit_timer)) {
4685 if (if_bond_debug > 0) {
4686 timestamp_printf("[%s] Transmit Timer Already Set\n",
4687 bondport_get_name(p));
4688 }
4689 } else {
4690 devtimer_set_absolute(p->po_transmit_timer, next_tick_time,
4691 (devtimer_timeout_func)(void (*)(void))
4692 bondport_transmit_machine,
4693 (void *)LAEventTimeout, NULL);
4694 if (if_bond_debug > 0) {
4695 timestamp_printf("[%s] Transmit Timer Deadline %d secs\n",
4696 bondport_get_name(p),
4697 (int)next_tick_time.tv_sec);
4698 }
4699 }
4700 break;
4701 }
4702 if (if_bond_debug > 0) {
4703 if (event == LAEventTimeout) {
4704 timestamp_printf("[%s] Transmit Timer Complete\n",
4705 bondport_get_name(p));
4706 }
4707 }
4708 pkt = packet_buffer_allocate(sizeof(*out_lacpdu_p));
4709 if (pkt == NULL) {
4710 printf("[%s] Transmit: failed to allocate packet buffer\n",
4711 bondport_get_name(p));
4712 break;
4713 }
4714 out_lacpdu_p = (lacpdu_ref)packet_buffer_byteptr(pkt);
4715 bzero(out_lacpdu_p, sizeof(*out_lacpdu_p));
4716 out_lacpdu_p->la_subtype = IEEE8023AD_SLOW_PROTO_SUBTYPE_LACP;
4717 out_lacpdu_p->la_version = LACPDU_VERSION_1;
4718
4719 /* Actor */
4720 aptlv = (lacp_actor_partner_tlv_ref)out_lacpdu_p->la_actor_tlv;
4721 aptlv->lap_tlv_type = LACPDU_TLV_TYPE_ACTOR;
4722 aptlv->lap_length = LACPDU_ACTOR_TLV_LENGTH;
4723 *((lacp_system_ref)aptlv->lap_system) = g_bond->system;
4724 lacp_actor_partner_tlv_set_system_priority(aptlv,
4725 g_bond->system_priority);
4726 lacp_actor_partner_tlv_set_port_priority(aptlv, p->po_priority);
4727 lacp_actor_partner_tlv_set_port(aptlv, bondport_get_index(p));
4728 lacp_actor_partner_tlv_set_key(aptlv, p->po_bond->ifb_key);
4729 aptlv->lap_state = p->po_actor_state;
4730
4731 /* Partner */
4732 aptlv = (lacp_actor_partner_tlv_ref)out_lacpdu_p->la_partner_tlv;
4733 aptlv->lap_tlv_type = LACPDU_TLV_TYPE_PARTNER;
4734 aptlv->lap_length = LACPDU_PARTNER_TLV_LENGTH;
4735 ps = &p->po_partner_state;
4736 ps_li = &ps->ps_lag_info;
4737 lacp_actor_partner_tlv_set_port(aptlv, ps->ps_port);
4738 lacp_actor_partner_tlv_set_port_priority(aptlv, ps->ps_port_priority);
4739 *((lacp_system_ref)aptlv->lap_system) = ps_li->li_system;
4740 lacp_actor_partner_tlv_set_system_priority(aptlv,
4741 ps_li->li_system_priority);
4742 lacp_actor_partner_tlv_set_key(aptlv, ps_li->li_key);
4743 aptlv->lap_state = ps->ps_state;
4744
4745 /* Collector */
4746 ctlv = (lacp_collector_tlv_ref)out_lacpdu_p->la_collector_tlv;
4747 ctlv->lac_tlv_type = LACPDU_TLV_TYPE_COLLECTOR;
4748 ctlv->lac_length = LACPDU_COLLECTOR_TLV_LENGTH;
4749
4750 bondport_slow_proto_transmit(p, pkt);
4751 bondport_flags_clear_ntt(p);
4752 if (if_bond_debug > 0) {
4753 timestamp_printf("[%s] Transmit Packet %d\n",
4754 bondport_get_name(p), p->po_n_transmit);
4755 }
4756 break;
4757 default:
4758 break;
4759 }
4760 return;
4761 }
4762
4763 /**
4764 ** Mux machine functions
4765 **/
4766
4767 static void
4768 bondport_mux_machine_detached(bondport_ref p, LAEvent event,
4769 void * event_data);
4770 static void
4771 bondport_mux_machine_waiting(bondport_ref p, LAEvent event,
4772 void * event_data);
4773 static void
4774 bondport_mux_machine_attached(bondport_ref p, LAEvent event,
4775 void * event_data);
4776
4777 static void
4778 bondport_mux_machine_collecting_distributing(bondport_ref p, LAEvent event,
4779 void * event_data);
4780
4781 static void
bondport_mux_machine(bondport_ref p,LAEvent event,void * event_data)4782 bondport_mux_machine(bondport_ref p, LAEvent event, void * event_data)
4783 {
4784 switch (p->po_mux_state) {
4785 case MuxState_none:
4786 bondport_mux_machine_detached(p, LAEventStart, NULL);
4787 break;
4788 case MuxState_DETACHED:
4789 bondport_mux_machine_detached(p, event, event_data);
4790 break;
4791 case MuxState_WAITING:
4792 bondport_mux_machine_waiting(p, event, event_data);
4793 break;
4794 case MuxState_ATTACHED:
4795 bondport_mux_machine_attached(p, event, event_data);
4796 break;
4797 case MuxState_COLLECTING_DISTRIBUTING:
4798 bondport_mux_machine_collecting_distributing(p, event, event_data);
4799 break;
4800 default:
4801 break;
4802 }
4803 return;
4804 }
4805
4806 static void
bondport_mux_machine_detached(bondport_ref p,LAEvent event,__unused void * event_data)4807 bondport_mux_machine_detached(bondport_ref p, LAEvent event,
4808 __unused void * event_data)
4809 {
4810 lacp_actor_partner_state s;
4811
4812 switch (event) {
4813 case LAEventStart:
4814 devtimer_cancel(p->po_wait_while_timer);
4815 if (if_bond_debug) {
4816 timestamp_printf("[%s] Mux DETACHED\n",
4817 bondport_get_name(p));
4818 }
4819 p->po_mux_state = MuxState_DETACHED;
4820 bondport_flags_clear_ready(p);
4821 bondport_DetachMuxFromAggregator(p);
4822 bondport_disable_distributing(p);
4823 s = p->po_actor_state;
4824 s = lacp_actor_partner_state_set_out_of_sync(s);
4825 s = lacp_actor_partner_state_set_not_collecting(s);
4826 s = lacp_actor_partner_state_set_not_distributing(s);
4827 p->po_actor_state = s;
4828 bondport_flags_set_ntt(p);
4829 break;
4830 case LAEventSelectedChange:
4831 case LAEventPacket:
4832 case LAEventMediaChange:
4833 if (p->po_selected == SelectedState_SELECTED
4834 || p->po_selected == SelectedState_STANDBY) {
4835 bondport_mux_machine_waiting(p, LAEventStart, NULL);
4836 }
4837 break;
4838 default:
4839 break;
4840 }
4841 return;
4842 }
4843
4844 static void
bondport_mux_machine_waiting(bondport_ref p,LAEvent event,__unused void * event_data)4845 bondport_mux_machine_waiting(bondport_ref p, LAEvent event,
4846 __unused void * event_data)
4847 {
4848 struct timeval tv;
4849
4850 switch (event) {
4851 case LAEventStart:
4852 devtimer_cancel(p->po_wait_while_timer);
4853 if (if_bond_debug) {
4854 timestamp_printf("[%s] Mux WAITING\n",
4855 bondport_get_name(p));
4856 }
4857 p->po_mux_state = MuxState_WAITING;
4858 OS_FALLTHROUGH;
4859 default:
4860 case LAEventSelectedChange:
4861 if (p->po_selected == SelectedState_UNSELECTED) {
4862 bondport_mux_machine_detached(p, LAEventStart, NULL);
4863 break;
4864 }
4865 if (p->po_selected == SelectedState_STANDBY) {
4866 devtimer_cancel(p->po_wait_while_timer);
4867 /* wait until state changes to SELECTED */
4868 if (if_bond_debug) {
4869 timestamp_printf("[%s] Mux WAITING: Standby\n",
4870 bondport_get_name(p));
4871 }
4872 break;
4873 }
4874 if (bondport_flags_ready(p)) {
4875 if (if_bond_debug) {
4876 timestamp_printf("[%s] Mux WAITING: Port is already ready\n",
4877 bondport_get_name(p));
4878 }
4879 break;
4880 }
4881 if (devtimer_enabled(p->po_wait_while_timer)) {
4882 if (if_bond_debug) {
4883 timestamp_printf("[%s] Mux WAITING: Timer already set\n",
4884 bondport_get_name(p));
4885 }
4886 break;
4887 }
4888 if (ifbond_all_ports_attached(p->po_bond, p)) {
4889 devtimer_cancel(p->po_wait_while_timer);
4890 if (if_bond_debug) {
4891 timestamp_printf("[%s] Mux WAITING: No waiting\n",
4892 bondport_get_name(p));
4893 }
4894 bondport_flags_set_ready(p);
4895 goto no_waiting;
4896 }
4897 if (if_bond_debug) {
4898 timestamp_printf("[%s] Mux WAITING: 2 seconds\n",
4899 bondport_get_name(p));
4900 }
4901 tv.tv_sec = LACP_AGGREGATE_WAIT_TIME;
4902 tv.tv_usec = 0;
4903 devtimer_set_relative(p->po_wait_while_timer, tv,
4904 (devtimer_timeout_func)(void (*)(void))
4905 bondport_mux_machine_waiting,
4906 (void *)LAEventTimeout, NULL);
4907 break;
4908 case LAEventTimeout:
4909 if (if_bond_debug) {
4910 timestamp_printf("[%s] Mux WAITING: Ready\n",
4911 bondport_get_name(p));
4912 }
4913 bondport_flags_set_ready(p);
4914 break;
4915 case LAEventReady:
4916 no_waiting:
4917 if (bondport_flags_ready(p)) {
4918 if (if_bond_debug) {
4919 timestamp_printf("[%s] Mux WAITING: All Ports Ready\n",
4920 bondport_get_name(p));
4921 }
4922 bondport_mux_machine_attached(p, LAEventStart, NULL);
4923 break;
4924 }
4925 break;
4926 }
4927 return;
4928 }
4929
4930 static void
bondport_mux_machine_attached(bondport_ref p,LAEvent event,__unused void * event_data)4931 bondport_mux_machine_attached(bondport_ref p, LAEvent event,
4932 __unused void * event_data)
4933 {
4934 lacp_actor_partner_state s;
4935
4936 switch (event) {
4937 case LAEventStart:
4938 devtimer_cancel(p->po_wait_while_timer);
4939 if (if_bond_debug) {
4940 timestamp_printf("[%s] Mux ATTACHED\n",
4941 bondport_get_name(p));
4942 }
4943 p->po_mux_state = MuxState_ATTACHED;
4944 bondport_AttachMuxToAggregator(p);
4945 s = p->po_actor_state;
4946 s = lacp_actor_partner_state_set_in_sync(s);
4947 s = lacp_actor_partner_state_set_not_collecting(s);
4948 s = lacp_actor_partner_state_set_not_distributing(s);
4949 bondport_disable_distributing(p);
4950 p->po_actor_state = s;
4951 bondport_flags_set_ntt(p);
4952 OS_FALLTHROUGH;
4953 default:
4954 switch (p->po_selected) {
4955 case SelectedState_SELECTED:
4956 s = p->po_partner_state.ps_state;
4957 if (lacp_actor_partner_state_in_sync(s)) {
4958 bondport_mux_machine_collecting_distributing(p, LAEventStart,
4959 NULL);
4960 }
4961 break;
4962 default:
4963 bondport_mux_machine_detached(p, LAEventStart, NULL);
4964 break;
4965 }
4966 break;
4967 }
4968 return;
4969 }
4970
4971 static void
bondport_mux_machine_collecting_distributing(bondport_ref p,LAEvent event,__unused void * event_data)4972 bondport_mux_machine_collecting_distributing(bondport_ref p,
4973 LAEvent event,
4974 __unused void * event_data)
4975 {
4976 lacp_actor_partner_state s;
4977
4978 switch (event) {
4979 case LAEventStart:
4980 devtimer_cancel(p->po_wait_while_timer);
4981 if (if_bond_debug) {
4982 timestamp_printf("[%s] Mux COLLECTING_DISTRIBUTING\n",
4983 bondport_get_name(p));
4984 }
4985 p->po_mux_state = MuxState_COLLECTING_DISTRIBUTING;
4986 bondport_enable_distributing(p);
4987 s = p->po_actor_state;
4988 s = lacp_actor_partner_state_set_collecting(s);
4989 s = lacp_actor_partner_state_set_distributing(s);
4990 p->po_actor_state = s;
4991 bondport_flags_set_ntt(p);
4992 OS_FALLTHROUGH;
4993 default:
4994 s = p->po_partner_state.ps_state;
4995 if (lacp_actor_partner_state_in_sync(s) == 0) {
4996 bondport_mux_machine_attached(p, LAEventStart, NULL);
4997 break;
4998 }
4999 switch (p->po_selected) {
5000 case SelectedState_UNSELECTED:
5001 case SelectedState_STANDBY:
5002 bondport_mux_machine_attached(p, LAEventStart, NULL);
5003 break;
5004 default:
5005 break;
5006 }
5007 break;
5008 }
5009 return;
5010 }
5011