xref: /xnu-12377.41.6/bsd/net/if_bond.c (revision bbb1b6f9e71b8cdde6e5cd6f4841f207dee3d828)
1 /*
2  * Copyright (c) 2004-2024 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*
30  * if_bond.c
31  * - bond/failover interface
32  * - implements IEEE 802.3ad Link Aggregation
33  */
34 
35 /*
36  * Modification History:
37  *
38  * April 29, 2004	Dieter Siegmund ([email protected])
39  * - created
40  */
41 
42 #include <sys/param.h>
43 #include <sys/kernel.h>
44 #include <sys/malloc.h>
45 #include <sys/mbuf.h>
46 #include <sys/queue.h>
47 #include <sys/socket.h>
48 #include <sys/sockio.h>
49 #include <sys/sysctl.h>
50 #include <sys/systm.h>
51 #include <sys/kern_event.h>
52 #include <net/bpf.h>
53 #include <net/ethernet.h>
54 #include <net/if.h>
55 #include <net/kpi_interface.h>
56 #include <net/if_arp.h>
57 #include <net/if_dl.h>
58 #include <net/if_ether.h>
59 #include <net/if_types.h>
60 #include <net/if_bond_var.h>
61 #include <net/ieee8023ad.h>
62 #include <net/lacp.h>
63 #include <net/dlil.h>
64 #include <sys/time.h>
65 #include <net/devtimer.h>
66 #include <net/if_vlan_var.h>
67 #include <net/kpi_protocol.h>
68 #include <sys/protosw.h>
69 #include <kern/locks.h>
70 #include <kern/uipc_domain.h>
71 #include <kern/zalloc.h>
72 #include <os/refcnt.h>
73 
74 #include <netinet/in.h>
75 #include <netinet/if_ether.h>
76 #include <netinet/in_systm.h>
77 #include <netinet/ip.h>
78 #include <netinet/ip6.h>
79 
80 #include <net/if_media.h>
81 #include <net/multicast_list.h>
82 
83 #include <net/sockaddr_utils.h>
84 #include <net/mblist.h>
85 
86 static struct ether_addr slow_proto_multicast = {
87 	.octet = IEEE8023AD_SLOW_PROTO_MULTICAST
88 };
89 
90 typedef struct ifbond_s ifbond, *__single ifbond_ref;
91 typedef struct bondport_s bondport, *__single bondport_ref;
92 
93 #define BOND_MAXUNIT            128
94 #define BOND_ZONE_MAX_ELEM      MIN(IFNETS_MAX, BOND_MAXUNIT)
95 #define BONDNAME                "bond"
96 
97 #define EA_FORMAT       "%x:%x:%x:%x:%x:%x"
98 #define EA_CH(e, i)     ((u_char)((u_char *)(e))[(i)])
99 #define EA_LIST(ea)     EA_CH(ea,0),EA_CH(ea,1),EA_CH(ea,2),EA_CH(ea,3),EA_CH(ea,4),EA_CH(ea,5)
100 
101 /*
102  * if_bond_debug, BD_DBGF_*
103  * - 'if_bond_debug' is a bitmask of BD_DBGF_* flags that can be set
104  *   to enable additional logs for the corresponding bond function
105  * - "sysctl net.link.bond.debug" controls the value of
106  *   'if_bond_debug'
107  */
108 static uint32_t if_bond_debug = 0;
109 #define BD_DBGF_LIFECYCLE       0x0001
110 #define BD_DBGF_INPUT           0x0002
111 #define BD_DBGF_OUTPUT          0x0004
112 #define BD_DBGF_LACP            0x0008
113 
114 /*
115  * if_bond_log_level
116  * - 'if_bond_log_level' ensures that by default important logs are
117  *   logged regardless of if_bond_debug by comparing the log level
118  *   in BOND_LOG to if_bond_log_level
119  * - use "sysctl net.link.bond.log_level" controls the value of
120  *   'if_bond_log_level'
121  * - the default value of 'if_bond_log_level' is LOG_NOTICE; important
122  *   logs must use LOG_NOTICE to ensure they appear by default
123  */
124 static int if_bond_log_level = LOG_NOTICE;
125 
126 #define BOND_DBGF_ENABLED(__flag)     ((if_bond_debug & __flag) != 0)
127 
128 /*
129  * BOND_LOG, BOND_LOG_SIMPLE
130  * - macros to generate the specified log conditionally based on
131  *   the specified log level and debug flags
132  * - BOND_LOG_SIMPLE does not include the function name in the log
133  */
134 #define BOND_LOG(__level, __dbgf, __string, ...)              \
135 	do {                                                            \
136 	        if (__level <= if_bond_log_level ||                   \
137 	            BOND_DBGF_ENABLED(__dbgf)) {                      \
138 	                os_log(OS_LOG_DEFAULT, "%s: " __string, \
139 	                       __func__, ## __VA_ARGS__);       \
140 	        }                                                       \
141 	} while (0)
142 #define BOND_LOG_SIMPLE(__level, __dbgf, __string, ...)               \
143 	do {                                                    \
144 	        if (__level <= if_bond_log_level ||           \
145 	            BOND_DBGF_ENABLED(__dbgf)) {                      \
146 	                os_log(OS_LOG_DEFAULT, __string, ## __VA_ARGS__); \
147 	        }                                                               \
148 	} while (0)
149 
150 SYSCTL_DECL(_net_link);
151 SYSCTL_NODE(_net_link, OID_AUTO, bond, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
152     "Bond interface");
153 
154 SYSCTL_INT(_net_link_bond, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
155     &if_bond_debug, 0, "Bond interface debug flags");
156 
157 SYSCTL_INT(_net_link_bond, OID_AUTO, log_level, CTLFLAG_RW | CTLFLAG_LOCKED,
158     &if_bond_log_level, 0, "Bond interface log level");
159 
160 /**
161 ** bond locks
162 **/
163 
164 static LCK_GRP_DECLARE(bond_lck_grp, "if_bond");
165 static LCK_MTX_DECLARE(bond_lck_mtx, &bond_lck_grp);
166 
167 static __inline__ void
bond_assert_lock_held(void)168 bond_assert_lock_held(void)
169 {
170 	LCK_MTX_ASSERT(&bond_lck_mtx, LCK_MTX_ASSERT_OWNED);
171 }
172 
173 static __inline__ void
bond_assert_lock_not_held(void)174 bond_assert_lock_not_held(void)
175 {
176 	LCK_MTX_ASSERT(&bond_lck_mtx, LCK_MTX_ASSERT_NOTOWNED);
177 }
178 
179 static __inline__ void
bond_lock(void)180 bond_lock(void)
181 {
182 	lck_mtx_lock(&bond_lck_mtx);
183 }
184 
185 static __inline__ void
bond_unlock(void)186 bond_unlock(void)
187 {
188 	lck_mtx_unlock(&bond_lck_mtx);
189 }
190 
191 /**
192 ** bond structures, types
193 **/
194 
195 struct LAG_info_s {
196 	lacp_system                 li_system;
197 	lacp_system_priority        li_system_priority;
198 	lacp_key                    li_key;
199 };
200 typedef struct LAG_info_s LAG_info, * LAG_info_ref;
201 
202 struct bondport_s;
203 TAILQ_HEAD(port_list, bondport_s);
204 struct ifbond_s;
205 TAILQ_HEAD(ifbond_list, ifbond_s);
206 struct LAG_s;
207 TAILQ_HEAD(lag_list, LAG_s);
208 
209 struct LAG_s {
210 	TAILQ_ENTRY(LAG_s)          lag_list;
211 	struct port_list            lag_port_list;
212 	short                       lag_port_count;
213 	short                       lag_selected_port_count;
214 	int                         lag_active_media;
215 	LAG_info                    lag_info;
216 };
217 typedef struct LAG_s LAG, * LAG_ref;
218 
219 typedef struct partner_state_s {
220 	LAG_info                    ps_lag_info;
221 	lacp_port                   ps_port;
222 	lacp_port_priority          ps_port_priority;
223 	lacp_actor_partner_state    ps_state;
224 } partner_state, * partner_state_ref;
225 
226 struct ifbond_s {
227 	TAILQ_ENTRY(ifbond_s)       ifb_bond_list;
228 	int                         ifb_flags;
229 	struct os_refcnt            ifb_retain_count;
230 	char                        ifb_name[IFNAMSIZ];
231 	struct ifnet *              ifb_ifp;
232 	int                         ifb_altmtu;
233 	struct port_list            ifb_port_list;
234 	short                       ifb_port_count;
235 	struct lag_list             ifb_lag_list;
236 	lacp_key                    ifb_key;
237 	short                       ifb_max_active;/* 0 == unlimited */
238 	LAG_ref                     ifb_active_lag;
239 	struct ifmultiaddr *        ifb_ifma_slow_proto;
240 	bondport_ref *__counted_by(ifb_distributing_max) ifb_distributing_array;
241 	int                         ifb_distributing_count;
242 	int                         ifb_distributing_max;
243 	int                         ifb_last_link_event;
244 	int                         ifb_mode;/* LACP, STATIC */
245 };
246 
247 struct media_info {
248 	int         mi_active;
249 	int         mi_status;
250 };
251 
252 enum {
253 	ReceiveState_none = 0,
254 	ReceiveState_INITIALIZE = 1,
255 	ReceiveState_PORT_DISABLED = 2,
256 	ReceiveState_EXPIRED = 3,
257 	ReceiveState_LACP_DISABLED = 4,
258 	ReceiveState_DEFAULTED = 5,
259 	ReceiveState_CURRENT = 6,
260 };
261 
262 typedef u_char ReceiveState;
263 
264 enum {
265 	SelectedState_UNSELECTED = IF_BOND_STATUS_SELECTED_STATE_UNSELECTED,
266 	SelectedState_SELECTED = IF_BOND_STATUS_SELECTED_STATE_SELECTED,
267 	SelectedState_STANDBY = IF_BOND_STATUS_SELECTED_STATE_STANDBY
268 };
269 typedef u_char SelectedState;
270 
271 static __inline__ const char *
SelectedStateString(SelectedState s)272 SelectedStateString(SelectedState s)
273 {
274 	static const char * names[] = { "UNSELECTED", "SELECTED", "STANDBY" };
275 
276 	if (s <= SelectedState_STANDBY) {
277 		return names[s];
278 	}
279 	return "<unknown>";
280 }
281 
282 enum {
283 	MuxState_none = 0,
284 	MuxState_DETACHED = 1,
285 	MuxState_WAITING = 2,
286 	MuxState_ATTACHED = 3,
287 	MuxState_COLLECTING_DISTRIBUTING = 4,
288 };
289 
290 typedef u_char MuxState;
291 
292 #define PORT_CONTROL_FLAGS_IN_LIST               0x01
293 #define PORT_CONTROL_FLAGS_PROTO_ATTACHED        0x02
294 #define PORT_CONTROL_FLAGS_LLADDR_SET            0x04
295 #define PORT_CONTROL_FLAGS_MTU_SET               0x08
296 #define PORT_CONTROL_FLAGS_PROMISCUOUS_SET       0x10
297 #define PORT_CONTROL_FLAGS_BOND_PROMISCUOUS_SET  0x20
298 
299 
300 static inline bool
uint32_bit_is_set(uint32_t flags,uint32_t flags_to_test)301 uint32_bit_is_set(uint32_t flags, uint32_t flags_to_test)
302 {
303 	return (flags & flags_to_test) != 0;
304 }
305 
306 static inline void
uint32_bit_set(uint32_t * flags_p,uint32_t flags_to_set)307 uint32_bit_set(uint32_t * flags_p, uint32_t flags_to_set)
308 {
309 	*flags_p |= flags_to_set;
310 }
311 
312 static inline void
uint32_bit_clear(uint32_t * flags_p,uint32_t flags_to_clear)313 uint32_bit_clear(uint32_t * flags_p, uint32_t flags_to_clear)
314 {
315 	*flags_p &= ~flags_to_clear;
316 }
317 
318 struct bondport_s {
319 	TAILQ_ENTRY(bondport_s)     po_port_list;
320 	ifbond_ref                  po_bond;
321 	struct multicast_list       po_multicast;
322 	struct ifnet *              po_ifp;
323 	struct ether_addr           po_saved_addr;
324 	int                         po_enabled;
325 	char                        po_name[IFNAMSIZ];
326 	struct ifdevmtu             po_devmtu;
327 	uint32_t                    po_control_flags;
328 
329 	/* LACP */
330 	TAILQ_ENTRY(bondport_s)     po_lag_port_list;
331 	devtimer_ref                po_current_while_timer;
332 	devtimer_ref                po_periodic_timer;
333 	devtimer_ref                po_wait_while_timer;
334 	devtimer_ref                po_transmit_timer;
335 	partner_state               po_partner_state;
336 	lacp_port_priority          po_priority;
337 	lacp_actor_partner_state    po_actor_state;
338 	u_char                      po_flags;
339 	u_char                      po_periodic_interval;
340 	u_char                      po_n_transmit;
341 	ReceiveState                po_receive_state;
342 	MuxState                    po_mux_state;
343 	SelectedState               po_selected;
344 	int32_t                     po_last_transmit_secs;
345 	struct media_info           po_media_info;
346 	uint64_t                    po_force_link_event_time;
347 	LAG_ref                     po_lag;
348 };
349 
350 #define IFBF_PROMISC            0x1     /* promiscuous mode */
351 #define IFBF_IF_DETACHING       0x2     /* interface is detaching */
352 #define IFBF_LLADDR             0x4     /* specific link address requested */
353 #define IFBF_CHANGE_IN_PROGRESS 0x8     /* interface add/remove in progress */
354 
355 static int bond_get_status(ifbond_ref ifb, struct if_bond_req * ibr_p,
356     user_addr_t datap);
357 
358 static __inline__ bool
ifbond_flags_if_detaching(ifbond_ref ifb)359 ifbond_flags_if_detaching(ifbond_ref ifb)
360 {
361 	return (ifb->ifb_flags & IFBF_IF_DETACHING) != 0;
362 }
363 
364 static __inline__ void
ifbond_flags_set_if_detaching(ifbond_ref ifb)365 ifbond_flags_set_if_detaching(ifbond_ref ifb)
366 {
367 	ifb->ifb_flags |= IFBF_IF_DETACHING;
368 	return;
369 }
370 
371 static __inline__ bool
ifbond_flags_lladdr(ifbond_ref ifb)372 ifbond_flags_lladdr(ifbond_ref ifb)
373 {
374 	return (ifb->ifb_flags & IFBF_LLADDR) != 0;
375 }
376 
377 static __inline__ bool
ifbond_flags_change_in_progress(ifbond_ref ifb)378 ifbond_flags_change_in_progress(ifbond_ref ifb)
379 {
380 	return (ifb->ifb_flags & IFBF_CHANGE_IN_PROGRESS) != 0;
381 }
382 
383 static __inline__ void
ifbond_flags_set_change_in_progress(ifbond_ref ifb)384 ifbond_flags_set_change_in_progress(ifbond_ref ifb)
385 {
386 	ifb->ifb_flags |= IFBF_CHANGE_IN_PROGRESS;
387 	return;
388 }
389 
390 static __inline__ void
ifbond_flags_clear_change_in_progress(ifbond_ref ifb)391 ifbond_flags_clear_change_in_progress(ifbond_ref ifb)
392 {
393 	ifb->ifb_flags &= ~IFBF_CHANGE_IN_PROGRESS;
394 	return;
395 }
396 
397 static __inline__ bool
ifbond_flags_promisc(ifbond_ref ifb)398 ifbond_flags_promisc(ifbond_ref ifb)
399 {
400 	return (ifb->ifb_flags & IFBF_PROMISC) != 0;
401 }
402 
403 static __inline__ void
ifbond_flags_set_promisc(ifbond_ref ifb)404 ifbond_flags_set_promisc(ifbond_ref ifb)
405 {
406 	ifb->ifb_flags |= IFBF_PROMISC;
407 	return;
408 }
409 
410 static __inline__ void
ifbond_flags_clear_promisc(ifbond_ref ifb)411 ifbond_flags_clear_promisc(ifbond_ref ifb)
412 {
413 	ifb->ifb_flags &= ~IFBF_PROMISC;
414 	return;
415 }
416 
417 /*
418  * bondport_ref->po_flags bits
419  */
420 #define BONDPORT_FLAGS_NTT              0x01
421 #define BONDPORT_FLAGS_READY            0x02
422 #define BONDPORT_FLAGS_SELECTED_CHANGED 0x04
423 #define BONDPORT_FLAGS_MUX_ATTACHED     0x08
424 #define BONDPORT_FLAGS_DISTRIBUTING     0x10
425 #define BONDPORT_FLAGS_UNUSED2          0x20
426 #define BONDPORT_FLAGS_UNUSED3          0x40
427 #define BONDPORT_FLAGS_UNUSED4          0x80
428 
429 static __inline__ void
bondport_flags_set_ntt(bondport_ref p)430 bondport_flags_set_ntt(bondport_ref p)
431 {
432 	p->po_flags |= BONDPORT_FLAGS_NTT;
433 	return;
434 }
435 
436 static __inline__ void
bondport_flags_clear_ntt(bondport_ref p)437 bondport_flags_clear_ntt(bondport_ref p)
438 {
439 	p->po_flags &= ~BONDPORT_FLAGS_NTT;
440 	return;
441 }
442 
443 static __inline__ int
bondport_flags_ntt(bondport_ref p)444 bondport_flags_ntt(bondport_ref p)
445 {
446 	return (p->po_flags & BONDPORT_FLAGS_NTT) != 0;
447 }
448 
449 static __inline__ void
bondport_flags_set_ready(bondport_ref p)450 bondport_flags_set_ready(bondport_ref p)
451 {
452 	p->po_flags |= BONDPORT_FLAGS_READY;
453 	return;
454 }
455 
456 static __inline__ void
bondport_flags_clear_ready(bondport_ref p)457 bondport_flags_clear_ready(bondport_ref p)
458 {
459 	p->po_flags &= ~BONDPORT_FLAGS_READY;
460 	return;
461 }
462 
463 static __inline__ int
bondport_flags_ready(bondport_ref p)464 bondport_flags_ready(bondport_ref p)
465 {
466 	return (p->po_flags & BONDPORT_FLAGS_READY) != 0;
467 }
468 
469 static __inline__ void
bondport_flags_set_selected_changed(bondport_ref p)470 bondport_flags_set_selected_changed(bondport_ref p)
471 {
472 	p->po_flags |= BONDPORT_FLAGS_SELECTED_CHANGED;
473 	return;
474 }
475 
476 static __inline__ void
bondport_flags_clear_selected_changed(bondport_ref p)477 bondport_flags_clear_selected_changed(bondport_ref p)
478 {
479 	p->po_flags &= ~BONDPORT_FLAGS_SELECTED_CHANGED;
480 	return;
481 }
482 
483 static __inline__ int
bondport_flags_selected_changed(bondport_ref p)484 bondport_flags_selected_changed(bondport_ref p)
485 {
486 	return (p->po_flags & BONDPORT_FLAGS_SELECTED_CHANGED) != 0;
487 }
488 
489 static __inline__ void
bondport_flags_set_mux_attached(bondport_ref p)490 bondport_flags_set_mux_attached(bondport_ref p)
491 {
492 	p->po_flags |= BONDPORT_FLAGS_MUX_ATTACHED;
493 	return;
494 }
495 
496 static __inline__ void
bondport_flags_clear_mux_attached(bondport_ref p)497 bondport_flags_clear_mux_attached(bondport_ref p)
498 {
499 	p->po_flags &= ~BONDPORT_FLAGS_MUX_ATTACHED;
500 	return;
501 }
502 
503 static __inline__ int
bondport_flags_mux_attached(bondport_ref p)504 bondport_flags_mux_attached(bondport_ref p)
505 {
506 	return (p->po_flags & BONDPORT_FLAGS_MUX_ATTACHED) != 0;
507 }
508 
509 static __inline__ void
bondport_flags_set_distributing(bondport_ref p)510 bondport_flags_set_distributing(bondport_ref p)
511 {
512 	p->po_flags |= BONDPORT_FLAGS_DISTRIBUTING;
513 	return;
514 }
515 
516 static __inline__ void
bondport_flags_clear_distributing(bondport_ref p)517 bondport_flags_clear_distributing(bondport_ref p)
518 {
519 	p->po_flags &= ~BONDPORT_FLAGS_DISTRIBUTING;
520 	return;
521 }
522 
523 static __inline__ int
bondport_flags_distributing(bondport_ref p)524 bondport_flags_distributing(bondport_ref p)
525 {
526 	return (p->po_flags & BONDPORT_FLAGS_DISTRIBUTING) != 0;
527 }
528 
529 typedef struct bond_globals_s {
530 	struct ifbond_list          ifbond_list;
531 	lacp_system                 system;
532 	lacp_system_priority        system_priority;
533 } * bond_globals_ref;
534 
535 static bond_globals_ref g_bond;
536 
537 /**
538 ** packet_buffer routines
539 ** - thin wrapper for mbuf
540 **/
541 
542 typedef struct mbuf * packet_buffer_ref;
543 
544 static packet_buffer_ref
packet_buffer_allocate(int length)545 packet_buffer_allocate(int length)
546 {
547 	packet_buffer_ref   m;
548 	int                 size;
549 
550 	/* leave room for ethernet header */
551 	size = length + sizeof(struct ether_header);
552 	if (size > (int)MHLEN) {
553 		if (size > (int)MCLBYTES) {
554 			BOND_LOG(LOG_NOTICE, 0,
555 			    "packet_buffer_allocate size %d > max %u",
556 			    size, MCLBYTES);
557 			return NULL;
558 		}
559 		m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
560 	} else {
561 		m = m_gethdr(M_WAITOK, MT_DATA);
562 	}
563 	if (m == NULL) {
564 		return NULL;
565 	}
566 	m->m_len = size;
567 	m->m_pkthdr.len = size;
568 	return m;
569 }
570 
571 static void *__indexable
packet_buffer_byteptr(packet_buffer_ref buf)572 packet_buffer_byteptr(packet_buffer_ref buf)
573 {
574 	return m_mtod_current(buf) + sizeof(struct ether_header);
575 }
576 
577 typedef enum {
578 	LAEventStart,
579 	LAEventTimeout,
580 	LAEventPacket,
581 	LAEventMediaChange,
582 	LAEventSelectedChange,
583 	LAEventPortMoved,
584 	LAEventReady
585 } LAEvent;
586 
587 /**
588 ** Receive machine
589 **/
590 static void
591 bondport_receive_machine(bondport_ref p, LAEvent event,
592     void * event_data);
593 /**
594 ** Periodic Transmission machine
595 **/
596 static void
597 bondport_periodic_transmit_machine(bondport_ref p, LAEvent event,
598     void * event_data);
599 
600 /**
601 ** Transmit machine
602 **/
603 static void *TRANSMIT_MACHINE_TX_IMMEDIATE = __unsafe_forge_single(void *, 1);
604 
605 static void
606 bondport_transmit_machine(bondport_ref p, LAEvent event,
607     void * event_data);
608 
609 /**
610 ** Mux machine
611 **/
612 static void
613 bondport_mux_machine(bondport_ref p, LAEvent event,
614     void * event_data);
615 
616 /**
617 ** bond, LAG
618 **/
619 static void
620 ifbond_activate_LAG(ifbond_ref bond, LAG_ref lag, int active_media);
621 
622 static void
623 ifbond_deactivate_LAG(ifbond_ref bond, LAG_ref lag);
624 
625 static int
626 ifbond_all_ports_ready(ifbond_ref bond);
627 
628 static LAG_ref
629 ifbond_find_best_LAG(ifbond_ref bond, int * active_media);
630 
631 static int
632 LAG_get_aggregatable_port_count(LAG_ref lag, int * active_media);
633 
634 static int
635 ifbond_selection(ifbond_ref bond);
636 
637 static void
638 bond_handle_event(struct ifnet * port_ifp, int event_code);
639 
640 /**
641 ** bondport
642 **/
643 
644 static void
645 bondport_receive_lacpdu(bondport_ref p, lacpdu_ref in_lacpdu_p);
646 
647 static void
648 bondport_slow_proto_transmit(bondport_ref p, packet_buffer_ref buf);
649 
650 static bondport_ref
651 bondport_create(struct ifnet * port_ifp, lacp_port_priority priority,
652     int active, int short_timeout, int * error);
653 static void
654 bondport_start(bondport_ref p);
655 
656 static void
657 bondport_free(bondport_ref p);
658 
659 static int
660 bondport_aggregatable(bondport_ref p);
661 
662 static int
663 bondport_remove_from_LAG(bondport_ref p);
664 
665 static void
666 bondport_set_selected(bondport_ref p, SelectedState s);
667 
668 static int
669 bondport_matches_LAG(bondport_ref p, LAG_ref lag);
670 
671 static void
672 bondport_link_status_changed(bondport_ref p);
673 
674 static void
675 bondport_enable_distributing(bondport_ref p);
676 
677 static void
678 bondport_disable_distributing(bondport_ref p);
679 
680 static __inline__ int
bondport_collecting(bondport_ref p)681 bondport_collecting(bondport_ref p)
682 {
683 	if (p->po_bond->ifb_mode == IF_BOND_MODE_LACP) {
684 		return lacp_actor_partner_state_collecting(p->po_actor_state);
685 	}
686 	return TRUE;
687 }
688 
689 /**
690 ** bond interface/dlil specific routines
691 **/
692 static int bond_clone_create(struct if_clone *, u_int32_t, void *);
693 static int bond_clone_destroy(struct ifnet *);
694 static int bond_output(ifnet_t ifp, mbuf_t list);
695 static int bond_ioctl(struct ifnet *ifp, u_long cmd, void * addr);
696 static int bond_attach_protocol(struct ifnet *ifp);
697 static int bond_detach_protocol(struct ifnet *ifp);
698 static int bond_setmulti(struct ifnet *ifp);
699 static int bond_add_interface(struct ifnet * ifp, struct ifnet * port_ifp);
700 static int bond_remove_interface(ifbond_ref ifb, struct ifnet * port_ifp);
701 static void bond_if_free(struct ifnet * ifp);
702 static void interface_link_event(struct ifnet * ifp, u_int32_t event_code);
703 
704 static struct if_clone bond_cloner = IF_CLONE_INITIALIZER(BONDNAME,
705     bond_clone_create,
706     bond_clone_destroy,
707     0,
708     BOND_MAXUNIT);
709 
710 static int
siocsifmtu(struct ifnet * ifp,int mtu)711 siocsifmtu(struct ifnet * ifp, int mtu)
712 {
713 	struct ifreq        ifr;
714 
715 	bzero(&ifr, sizeof(ifr));
716 	ifr.ifr_mtu = mtu;
717 	return ifnet_ioctl(ifp, 0, SIOCSIFMTU, &ifr);
718 }
719 
720 static int
siocgifdevmtu(struct ifnet * ifp,struct ifdevmtu * ifdm_p)721 siocgifdevmtu(struct ifnet * ifp, struct ifdevmtu * ifdm_p)
722 {
723 	struct ifreq        ifr;
724 	int                 error;
725 
726 	bzero(&ifr, sizeof(ifr));
727 	error = ifnet_ioctl(ifp, 0, SIOCGIFDEVMTU, &ifr);
728 	if (error == 0) {
729 		*ifdm_p = ifr.ifr_devmtu;
730 	}
731 	return error;
732 }
733 
734 static __inline__ void
ether_addr_copy(uint8_t * __sized_by (ETHER_ADDR_LEN)dest,const uint8_t * __sized_by (ETHER_ADDR_LEN)source)735 ether_addr_copy(uint8_t *__sized_by(ETHER_ADDR_LEN) dest,
736     const uint8_t *__sized_by(ETHER_ADDR_LEN) source)
737 {
738 	bcopy(source, dest, ETHER_ADDR_LEN);
739 	return;
740 }
741 
742 static __inline__ void
ifbond_retain(ifbond_ref ifb)743 ifbond_retain(ifbond_ref ifb)
744 {
745 	os_ref_retain(&ifb->ifb_retain_count);
746 }
747 
748 static __inline__ void
ifbond_release(ifbond_ref ifb)749 ifbond_release(ifbond_ref ifb)
750 {
751 	if (os_ref_release(&ifb->ifb_retain_count) != 0) {
752 		return;
753 	}
754 	BOND_LOG(LOG_DEBUG, BD_DBGF_LIFECYCLE, "%s", ifb->ifb_name);
755 	if (ifb->ifb_ifma_slow_proto != NULL) {
756 		BOND_LOG(LOG_DEBUG, BD_DBGF_LIFECYCLE,
757 		    "%s: removing multicast", ifb->ifb_name);
758 		(void) if_delmulti_anon(ifb->ifb_ifma_slow_proto->ifma_ifp,
759 		    ifb->ifb_ifma_slow_proto->ifma_addr);
760 		IFMA_REMREF(ifb->ifb_ifma_slow_proto);
761 	}
762 	kfree_type_counted_by(bondport_ref, ifb->ifb_distributing_max,
763 	    ifb->ifb_distributing_array);
764 	kfree_type(struct ifbond_s, ifb);
765 }
766 
767 /*
768  * Function: ifbond_wait
769  * Purpose:
770  *   Allows a single thread to gain exclusive access to the ifbond
771  *   data structure.  Some operations take a long time to complete,
772  *   and some have side-effects that we can't predict.  Holding the
773  *   bond_lock() across such operations is not possible.
774  *
775  *   For example:
776  *   1) The SIOCSIFLLADDR ioctl takes a long time (several seconds) to
777  *      complete.  Simply holding the bond_lock() would freeze all other
778  *      data structure accesses during that time.
779  *   2) When we attach our protocol to the interface, a dlil event is
780  *      generated and invokes our bond_event() function.  bond_event()
781  *      needs to take the bond_lock(), but we're already holding it, so
782  *      we're deadlocked against ourselves.
783  * Notes:
784  *   Before calling, you must be holding the bond_lock and have taken
785  *   a reference on the ifbond_ref.
786  */
787 static void
ifbond_wait(ifbond_ref ifb,const char * msg)788 ifbond_wait(ifbond_ref ifb, const char * msg)
789 {
790 	int         waited = 0;
791 
792 	/* other add/remove in progress */
793 	while (ifbond_flags_change_in_progress(ifb)) {
794 		BOND_LOG(LOG_DEBUG, BD_DBGF_LIFECYCLE, "%s: %s msleep",
795 		    ifb->ifb_name, msg);
796 		waited = 1;
797 		(void)msleep(ifb, &bond_lck_mtx, PZERO, msg, 0);
798 	}
799 	/* prevent other bond list remove/add from taking place */
800 	ifbond_flags_set_change_in_progress(ifb);
801 	if (waited) {
802 		BOND_LOG(LOG_DEBUG, BD_DBGF_LIFECYCLE,
803 		    "%s: %s woke up", ifb->ifb_name, msg);
804 	}
805 	return;
806 }
807 
808 /*
809  * Function: ifbond_signal
810  * Purpose:
811  *   Allows the thread that previously invoked ifbond_wait() to
812  *   give up exclusive access to the ifbond data structure, and wake up
813  *   any other threads waiting to access
814  * Notes:
815  *   Before calling, you must be holding the bond_lock and have taken
816  *   a reference on the ifbond_ref.
817  */
818 static void
ifbond_signal(ifbond_ref ifb,const char * msg)819 ifbond_signal(ifbond_ref ifb, const char * msg)
820 {
821 	ifbond_flags_clear_change_in_progress(ifb);
822 	wakeup((caddr_t)ifb);
823 	BOND_LOG(LOG_DEBUG, BD_DBGF_LIFECYCLE, "%s: %s wakeup",
824 	    ifb->ifb_name, msg);
825 	return;
826 }
827 
828 /**
829 ** Media information
830 **/
831 
832 static int
link_speed(int active)833 link_speed(int active)
834 {
835 	switch (IFM_SUBTYPE(active)) {
836 	case IFM_AUTO:
837 	case IFM_MANUAL:
838 	case IFM_NONE:
839 		return 0;
840 	case IFM_10_T:
841 	case IFM_10_2:
842 	case IFM_10_5:
843 	case IFM_10_STP:
844 	case IFM_10_FL:
845 		return 10;
846 	case IFM_100_TX:
847 	case IFM_100_FX:
848 	case IFM_100_T4:
849 	case IFM_100_VG:
850 	case IFM_100_T2:
851 		return 100;
852 	case IFM_1000_SX:
853 	case IFM_1000_LX:
854 	case IFM_1000_CX:
855 	case IFM_1000_TX:
856 	case IFM_1000_CX_SGMII:
857 	case IFM_1000_KX:
858 		return 1000;
859 	case IFM_HPNA_1:
860 		return 1;
861 	default:
862 	/* assume that new defined types are going to be at least 10GigE */
863 	case IFM_10G_T:
864 	case IFM_10G_SR:
865 	case IFM_10G_LR:
866 	case IFM_10G_KX4:
867 	case IFM_10G_KR:
868 	case IFM_10G_CR1:
869 	case IFM_10G_ER:
870 		return 10000;
871 	case IFM_2500_T:
872 		return 2500;
873 	case IFM_5000_T:
874 		return 5000;
875 	case IFM_20G_KR2:
876 		return 20000;
877 	case IFM_25G_CR:
878 	case IFM_25G_KR:
879 	case IFM_25G_SR:
880 	case IFM_25G_LR:
881 		return 25000;
882 	case IFM_40G_CR4:
883 	case IFM_40G_SR4:
884 	case IFM_40G_LR4:
885 	case IFM_40G_KR4:
886 		return 40000;
887 	case IFM_50G_CR2:
888 	case IFM_50G_KR2:
889 	case IFM_50G_SR2:
890 	case IFM_50G_LR2:
891 		return 50000;
892 	case IFM_56G_R4:
893 		return 56000;
894 	case IFM_100G_CR4:
895 	case IFM_100G_SR4:
896 	case IFM_100G_KR4:
897 	case IFM_100G_LR4:
898 		return 100000;
899 	}
900 }
901 
902 static __inline__ int
media_active(const struct media_info * mi)903 media_active(const struct media_info * mi)
904 {
905 	if ((mi->mi_status & IFM_AVALID) == 0) {
906 		return 1;
907 	}
908 	return (mi->mi_status & IFM_ACTIVE) != 0;
909 }
910 
911 static __inline__ int
media_full_duplex(const struct media_info * mi)912 media_full_duplex(const struct media_info * mi)
913 {
914 	return (mi->mi_active & IFM_FDX) != 0;
915 }
916 
917 static __inline__ int
media_type_unknown(const struct media_info * mi)918 media_type_unknown(const struct media_info * mi)
919 {
920 	int unknown;
921 
922 	switch (IFM_SUBTYPE(mi->mi_active)) {
923 	case IFM_AUTO:
924 	case IFM_MANUAL:
925 	case IFM_NONE:
926 		unknown = 1;
927 		break;
928 	default:
929 		unknown = 0;
930 		break;
931 	}
932 	return unknown;
933 }
934 
935 static __inline__ int
media_ok(const struct media_info * mi)936 media_ok(const struct media_info * mi)
937 {
938 	return media_full_duplex(mi) || media_type_unknown(mi);
939 }
940 
941 static __inline__ int
media_speed(const struct media_info * mi)942 media_speed(const struct media_info * mi)
943 {
944 	return link_speed(mi->mi_active);
945 }
946 
947 static struct media_info
interface_media_info(struct ifnet * ifp)948 interface_media_info(struct ifnet * ifp)
949 {
950 	struct ifmediareq   ifmr;
951 	struct media_info   mi;
952 
953 	bzero(&mi, sizeof(mi));
954 	bzero(&ifmr, sizeof(ifmr));
955 	if (ifnet_ioctl(ifp, 0, SIOCGIFMEDIA, &ifmr) == 0) {
956 		if (ifmr.ifm_count != 0) {
957 			mi.mi_status = ifmr.ifm_status;
958 			mi.mi_active = ifmr.ifm_active;
959 		}
960 	}
961 	return mi;
962 }
963 
964 static int
if_siflladdr(struct ifnet * ifp,const struct ether_addr * ea_p)965 if_siflladdr(struct ifnet * ifp, const struct ether_addr * ea_p)
966 {
967 	struct ifreq        ifr;
968 
969 	/*
970 	 * XXX setting the sa_len to ETHER_ADDR_LEN is wrong, but the driver
971 	 * currently expects it that way
972 	 */
973 	ifr.ifr_addr.sa_family = AF_UNSPEC;
974 	ifr.ifr_addr.sa_len = ETHER_ADDR_LEN;
975 	ether_addr_copy((uint8_t *__indexable)ifr.ifr_addr.sa_data, ea_p->octet);
976 	return ifnet_ioctl(ifp, 0, SIOCSIFLLADDR, &ifr);
977 }
978 
979 /**
980 ** bond_globals
981 **/
982 static bond_globals_ref
bond_globals_create(lacp_system_priority sys_pri,lacp_system_ref sys)983 bond_globals_create(lacp_system_priority sys_pri,
984     lacp_system_ref sys)
985 {
986 	bond_globals_ref    b;
987 
988 	b = kalloc_type(struct bond_globals_s, Z_WAITOK | Z_ZERO | Z_NOFAIL);
989 	TAILQ_INIT(&b->ifbond_list);
990 	b->system = *sys;
991 	b->system_priority = sys_pri;
992 	return b;
993 }
994 
995 static int
bond_globals_init(void)996 bond_globals_init(void)
997 {
998 	bond_globals_ref    b;
999 	int                 i;
1000 	struct ifnet *      ifp;
1001 
1002 	bond_assert_lock_not_held();
1003 
1004 	if (g_bond != NULL) {
1005 		return 0;
1006 	}
1007 
1008 	/*
1009 	 * use en0's ethernet address as the system identifier, and if it's not
1010 	 * there, use en1 .. en3
1011 	 */
1012 	ifp = NULL;
1013 	for (i = 0; i < 4; i++) {
1014 		char            ifname[IFNAMSIZ + 1];
1015 		snprintf(ifname, sizeof(ifname), "en%d", i);
1016 		ifp = ifunit(__unsafe_null_terminated_from_indexable(ifname));
1017 		if (ifp != NULL) {
1018 			break;
1019 		}
1020 	}
1021 	b = NULL;
1022 	if (ifp != NULL) {
1023 		b = bond_globals_create(0x8000, (lacp_system_ref)IF_LLADDR(ifp));
1024 	}
1025 	bond_lock();
1026 	if (g_bond != NULL) {
1027 		bond_unlock();
1028 		kfree_type(struct bond_globals_s, b);
1029 		return 0;
1030 	}
1031 	g_bond = b;
1032 	bond_unlock();
1033 	if (ifp == NULL) {
1034 		return ENXIO;
1035 	}
1036 	if (b == NULL) {
1037 		return ENOMEM;
1038 	}
1039 	return 0;
1040 }
1041 
1042 /*
1043  * bpf tap
1044  */
1045 static inline void *__indexable
get_bpf_header(mbuf_t m,struct ether_header * eh_p,struct ether_vlan_header * evl_p,size_t * header_len)1046 get_bpf_header(mbuf_t m, struct ether_header * eh_p,
1047     struct ether_vlan_header * evl_p, size_t * header_len)
1048 {
1049 	void *header;
1050 
1051 	/* no VLAN tag, just use the ethernet header */
1052 	if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
1053 		header = (struct ether_header *__bidi_indexable)eh_p;
1054 		*header_len = sizeof(*eh_p);
1055 		goto done;
1056 	}
1057 
1058 	/* has VLAN tag, populate the ether VLAN header */
1059 	bcopy(eh_p, evl_p,
1060 	    offsetof(struct ether_header, ether_type));   /* dst+src ether */
1061 	evl_p->evl_encap_proto = htons(ETHERTYPE_VLAN);   /* VLAN encap */
1062 	evl_p->evl_tag = htons(m->m_pkthdr.vlan_tag);     /* tag */
1063 	evl_p->evl_proto = eh_p->ether_type;              /* proto */
1064 	*header_len = sizeof(*evl_p);
1065 	header = (struct ether_vlan_header *__bidi_indexable)evl_p;
1066 
1067 done:
1068 	return header;
1069 }
1070 
1071 typedef void (*_tap_func)(ifnet_t interface, u_int32_t dlt, mbuf_t packet,
1072     void *__sized_by(header_len) header, size_t header_len);
1073 
1074 static void
bond_bpf_tap_common(ifnet_t ifp,mbuf_t m,struct ether_header * eh_p,_tap_func func)1075 bond_bpf_tap_common(ifnet_t ifp, mbuf_t m, struct ether_header * eh_p,
1076     _tap_func func)
1077 {
1078 	struct ether_vlan_header        evl;
1079 	size_t                          header_len;
1080 	void *                          header;
1081 
1082 	header = get_bpf_header(m, eh_p, &evl, &header_len);
1083 	(*func)(ifp, DLT_EN10MB, m, header, header_len);
1084 }
1085 
1086 static inline void
bond_bpf_tap_in(ifnet_t ifp,mbuf_t m,struct ether_header * eh_p)1087 bond_bpf_tap_in(ifnet_t ifp, mbuf_t m, struct ether_header * eh_p)
1088 {
1089 	bond_bpf_tap_common(ifp, m, eh_p, bpf_tap_in);
1090 }
1091 
1092 static inline void
bond_bpf_tap_out(ifnet_t ifp,mbuf_t m,struct ether_header * eh_p)1093 bond_bpf_tap_out(ifnet_t ifp, mbuf_t m, struct ether_header * eh_p)
1094 {
1095 	bond_bpf_tap_common(ifp, m, eh_p, bpf_tap_out);
1096 }
1097 
1098 /*
1099  * Function: bond_setmulti
1100  * Purpose:
1101  *   Enable multicast reception on "our" interface by enabling multicasts on
1102  *   each of the member ports.
1103  */
1104 static int
bond_setmulti(struct ifnet * ifp)1105 bond_setmulti(struct ifnet * ifp)
1106 {
1107 	ifbond_ref          ifb;
1108 	int                 error;
1109 	int                 result = 0;
1110 	bondport_ref        p;
1111 
1112 	bond_lock();
1113 	ifb = ifnet_softc(ifp);
1114 	if (ifb == NULL || ifbond_flags_if_detaching(ifb)
1115 	    || TAILQ_EMPTY(&ifb->ifb_port_list)) {
1116 		bond_unlock();
1117 		return 0;
1118 	}
1119 	ifbond_retain(ifb);
1120 	ifbond_wait(ifb, "bond_setmulti");
1121 
1122 	if (ifbond_flags_if_detaching(ifb)) {
1123 		/* someone destroyed the bond while we were waiting */
1124 		result = EBUSY;
1125 		goto signal_done;
1126 	}
1127 	bond_unlock();
1128 
1129 	/* ifbond_wait() let's us safely walk the list without holding the lock */
1130 	TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
1131 		struct ifnet *  port_ifp = p->po_ifp;
1132 
1133 		error = multicast_list_program(&p->po_multicast,
1134 		    ifp, port_ifp);
1135 		if (error != 0) {
1136 			BOND_LOG(LOG_NOTICE, 0,
1137 			    "%s: multicast_list_program(%s) failed, %d",
1138 			    ifb->ifb_name, port_ifp->if_xname, error);
1139 			result = error;
1140 		}
1141 	}
1142 	bond_lock();
1143 signal_done:
1144 	ifbond_signal(ifb, __func__);
1145 	bond_unlock();
1146 	ifbond_release(ifb);
1147 	return result;
1148 }
1149 
1150 static int
bond_clone_attach(void)1151 bond_clone_attach(void)
1152 {
1153 	int error;
1154 
1155 	if ((error = if_clone_attach(&bond_cloner)) != 0) {
1156 		return error;
1157 	}
1158 	return 0;
1159 }
1160 
1161 static int
ifbond_add_slow_proto_multicast(ifbond_ref ifb)1162 ifbond_add_slow_proto_multicast(ifbond_ref ifb)
1163 {
1164 	int                         error;
1165 	struct ifmultiaddr *__single ifma = NULL;
1166 	struct sockaddr_dl          sdl;
1167 
1168 	bond_assert_lock_not_held();
1169 
1170 	SOCKADDR_ZERO(&sdl, sizeof(sdl));
1171 	sdl.sdl_len = sizeof(sdl);
1172 	sdl.sdl_family = AF_LINK;
1173 	sdl.sdl_type = IFT_ETHER;
1174 	sdl.sdl_nlen = 0;
1175 	sdl.sdl_alen = sizeof(slow_proto_multicast);
1176 	bcopy(&slow_proto_multicast, sdl.sdl_data, sizeof(slow_proto_multicast));
1177 	error = if_addmulti_anon(ifb->ifb_ifp, SA(&sdl), &ifma);
1178 	if (error == 0) {
1179 		ifb->ifb_ifma_slow_proto = ifma;
1180 	}
1181 	return error;
1182 }
1183 
1184 static int
bond_clone_create(struct if_clone * ifc,u_int32_t unit,__unused void * params)1185 bond_clone_create(struct if_clone * ifc, u_int32_t unit, __unused void *params)
1186 {
1187 	int                                             error;
1188 	ifbond_ref                                      ifb;
1189 	ifnet_ref_t                                     ifp;
1190 	struct ifnet_init_eparams       bond_init;
1191 
1192 	error = bond_globals_init();
1193 	if (error != 0) {
1194 		return error;
1195 	}
1196 
1197 	ifb = kalloc_type(struct ifbond_s, Z_WAITOK_ZERO_NOFAIL);
1198 	os_ref_init(&ifb->ifb_retain_count, NULL);
1199 	TAILQ_INIT(&ifb->ifb_port_list);
1200 	TAILQ_INIT(&ifb->ifb_lag_list);
1201 	ifb->ifb_key = unit + 1;
1202 
1203 	/* use the interface name as the unique id for ifp recycle */
1204 	if ((u_int32_t)snprintf(ifb->ifb_name, sizeof(ifb->ifb_name), "%s%d",
1205 	    ifc->ifc_name, unit) >= sizeof(ifb->ifb_name)) {
1206 		ifbond_release(ifb);
1207 		return EINVAL;
1208 	}
1209 
1210 	bzero(&bond_init, sizeof(bond_init));
1211 	bond_init.ver = IFNET_INIT_CURRENT_VERSION;
1212 	bond_init.len = sizeof(bond_init);
1213 	bond_init.flags = IFNET_INIT_LEGACY;
1214 	bond_init.uniqueid_len = strbuflen(ifb->ifb_name);
1215 	bond_init.uniqueid = ifb->ifb_name;
1216 	bond_init.name = __unsafe_null_terminated_from_indexable(ifc->ifc_name);
1217 	bond_init.unit = unit;
1218 	bond_init.family = IFNET_FAMILY_BOND;
1219 	bond_init.type = IFT_IEEE8023ADLAG;
1220 	bond_init.output = bond_output;
1221 	bond_init.demux = ether_demux;
1222 	bond_init.add_proto = ether_add_proto;
1223 	bond_init.del_proto = ether_del_proto;
1224 	bond_init.check_multi = ether_check_multi;
1225 	bond_init.framer_extended = ether_frameout_extended;
1226 	bond_init.ioctl = bond_ioctl;
1227 	bond_init.detach = bond_if_free;
1228 	bond_init.broadcast_addr = etherbroadcastaddr;
1229 	bond_init.broadcast_len = ETHER_ADDR_LEN;
1230 	bond_init.softc = ifb;
1231 	error = ifnet_allocate_extended(&bond_init, &ifp);
1232 
1233 	if (error) {
1234 		ifbond_release(ifb);
1235 		return error;
1236 	}
1237 
1238 	ifb->ifb_ifp = ifp;
1239 	ifnet_set_offload(ifp, 0);
1240 	ifnet_set_addrlen(ifp, ETHER_ADDR_LEN); /* XXX ethernet specific */
1241 	ifnet_set_flags(ifp, IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX, 0xffff);
1242 	ifnet_set_mtu(ifp, ETHERMTU);
1243 
1244 	error = ifnet_attach(ifp, NULL);
1245 	if (error != 0) {
1246 		ifnet_release(ifp);
1247 		ifbond_release(ifb);
1248 		return error;
1249 	}
1250 	error = ifbond_add_slow_proto_multicast(ifb);
1251 	if (error != 0) {
1252 		BOND_LOG(LOG_NOTICE, 0,
1253 		    "%s: failed to add slow_proto multicast, %d",
1254 		    ifb->ifb_name, error);
1255 	}
1256 
1257 	/* attach as ethernet */
1258 	bpfattach(ifp, DLT_EN10MB, sizeof(struct ether_header));
1259 
1260 	bond_lock();
1261 	TAILQ_INSERT_HEAD(&g_bond->ifbond_list, ifb, ifb_bond_list);
1262 	bond_unlock();
1263 
1264 	return 0;
1265 }
1266 
1267 static void
bond_remove_all_interfaces(ifbond_ref ifb)1268 bond_remove_all_interfaces(ifbond_ref ifb)
1269 {
1270 	bondport_ref        p;
1271 
1272 	bond_assert_lock_held();
1273 
1274 	/*
1275 	 * do this in reverse order to avoid re-programming the mac address
1276 	 * as each head interface is removed
1277 	 */
1278 	while ((p = TAILQ_LAST(&ifb->ifb_port_list, port_list)) != NULL) {
1279 		bond_remove_interface(ifb, p->po_ifp);
1280 	}
1281 	return;
1282 }
1283 
1284 static void
bond_remove(ifbond_ref ifb)1285 bond_remove(ifbond_ref ifb)
1286 {
1287 	bond_assert_lock_held();
1288 	ifbond_flags_set_if_detaching(ifb);
1289 	TAILQ_REMOVE(&g_bond->ifbond_list, ifb, ifb_bond_list);
1290 	bond_remove_all_interfaces(ifb);
1291 	return;
1292 }
1293 
1294 static void
bond_if_detach(struct ifnet * ifp)1295 bond_if_detach(struct ifnet * ifp)
1296 {
1297 	int         error;
1298 
1299 	error = ifnet_detach(ifp);
1300 	if (error != 0) {
1301 		BOND_LOG(LOG_NOTICE, 0, "%s: ifnet_detach failed, %d",
1302 		    ifp->if_xname, error);
1303 	}
1304 	return;
1305 }
1306 
1307 static int
bond_clone_destroy(struct ifnet * ifp)1308 bond_clone_destroy(struct ifnet * ifp)
1309 {
1310 	ifbond_ref ifb;
1311 
1312 	bond_lock();
1313 	ifb = ifnet_softc(ifp);
1314 	if (ifb == NULL || ifnet_type(ifp) != IFT_IEEE8023ADLAG) {
1315 		bond_unlock();
1316 		return 0;
1317 	}
1318 	if (ifbond_flags_if_detaching(ifb)) {
1319 		bond_unlock();
1320 		return 0;
1321 	}
1322 	bond_remove(ifb);
1323 	bond_unlock();
1324 	bond_if_detach(ifp);
1325 	return 0;
1326 }
1327 
1328 static uint32_t
ether_header_hash(struct ether_header * eh_p)1329 ether_header_hash(struct ether_header * eh_p)
1330 {
1331 	uint32_t    h;
1332 
1333 	/* get 32-bits from destination ether and ether type */
1334 	h = (*((uint16_t *)&eh_p->ether_dhost[4]) << 16)
1335 	    | eh_p->ether_type;
1336 	h ^= *((uint32_t *)&eh_p->ether_dhost[0]);
1337 	return h;
1338 }
1339 
1340 #define BOND_HASH_L3_HEADER     0
1341 #if BOND_HASH_L3_HEADER
1342 static struct mbuf *
S_mbuf_skip_to_offset(struct mbuf * m,int32_t * offset)1343 S_mbuf_skip_to_offset(struct mbuf * m, int32_t * offset)
1344 {
1345 	int                 len;
1346 
1347 	len = m->m_len;
1348 	while (*offset >= len) {
1349 		*offset -= len;
1350 		m = m->m_next;
1351 		if (m == NULL) {
1352 			break;
1353 		}
1354 		len = m->m_len;
1355 	}
1356 	return m;
1357 }
1358 
1359 #if BYTE_ORDER == BIG_ENDIAN
1360 static __inline__ uint32_t
make_uint32(u_char c0,u_char c1,u_char c2,u_char c3)1361 make_uint32(u_char c0, u_char c1, u_char c2, u_char c3)
1362 {
1363 	return ((uint32_t)c0 << 24) | ((uint32_t)c1 << 16)
1364 	       | ((uint32_t)c2 << 8) | (uint32_t)c3;
1365 }
1366 #else /* BYTE_ORDER == LITTLE_ENDIAN */
1367 static __inline__ uint32_t
make_uint32(u_char c0,u_char c1,u_char c2,u_char c3)1368 make_uint32(u_char c0, u_char c1, u_char c2, u_char c3)
1369 {
1370 	return ((uint32_t)c3 << 24) | ((uint32_t)c2 << 16)
1371 	       | ((uint32_t)c1 << 8) | (uint32_t)c0;
1372 }
1373 #endif /* BYTE_ORDER == LITTLE_ENDIAN */
1374 
1375 static int
S_mbuf_copy_uint32(struct mbuf * m,int32_t offset,uint32_t * val)1376 S_mbuf_copy_uint32(struct mbuf * m, int32_t offset, uint32_t * val)
1377 {
1378 	struct mbuf *       current;
1379 	u_char *            current_data;
1380 	struct mbuf *       next;
1381 	u_char *            next_data;
1382 	int                 space_current;
1383 
1384 	current = S_mbuf_skip_to_offset(m, &offset);
1385 	if (current == NULL) {
1386 		return 1;
1387 	}
1388 	current_data = mtod(current, u_char *) + offset;
1389 	space_current = current->m_len - offset;
1390 	if (space_current >= (int)sizeof(uint32_t)) {
1391 		*val = *((uint32_t *)current_data);
1392 		return 0;
1393 	}
1394 	next = current->m_next;
1395 	if (next == NULL || (next->m_len + space_current) < (int)sizeof(uint32_t)) {
1396 		return 1;
1397 	}
1398 	next_data = mtod(next, u_char *);
1399 	switch (space_current) {
1400 	case 1:
1401 		*val = make_uint32(current_data[0], next_data[0],
1402 		    next_data[1], next_data[2]);
1403 		break;
1404 	case 2:
1405 		*val = make_uint32(current_data[0], current_data[1],
1406 		    next_data[0], next_data[1]);
1407 		break;
1408 	default:
1409 		*val = make_uint32(current_data[0], current_data[1],
1410 		    current_data[2], next_data[0]);
1411 		break;
1412 	}
1413 	return 0;
1414 }
1415 
1416 #define IP_SRC_OFFSET (offsetof(struct ip, ip_src) - offsetof(struct ip, ip_p))
1417 #define IP_DST_OFFSET (offsetof(struct ip, ip_dst) - offsetof(struct ip, ip_p))
1418 
1419 static uint32_t
ip_header_hash(struct mbuf * m)1420 ip_header_hash(struct mbuf * m)
1421 {
1422 	u_char *            data;
1423 	struct in_addr      ip_dst;
1424 	struct in_addr      ip_src;
1425 	u_char              ip_p;
1426 	int32_t             offset;
1427 	struct mbuf *       orig_m = m;
1428 
1429 	/* find the IP protocol field relative to the start of the packet */
1430 	offset = offsetof(struct ip, ip_p) + sizeof(struct ether_header);
1431 	m = S_mbuf_skip_to_offset(m, &offset);
1432 	if (m == NULL || m->m_len < 1) {
1433 		goto bad_ip_packet;
1434 	}
1435 	data = mtod(m, u_char *) + offset;
1436 	ip_p = *data;
1437 
1438 	/* find the IP src relative to the IP protocol */
1439 	if ((m->m_len - offset)
1440 	    >= (int)(IP_SRC_OFFSET + sizeof(struct in_addr) * 2)) {
1441 		/* this should be the normal case */
1442 		ip_src = *(struct in_addr *)(data + IP_SRC_OFFSET);
1443 		ip_dst = *(struct in_addr *)(data + IP_DST_OFFSET);
1444 	} else {
1445 		if (S_mbuf_copy_uint32(m, offset + IP_SRC_OFFSET,
1446 		    (uint32_t *)&ip_src.s_addr)) {
1447 			goto bad_ip_packet;
1448 		}
1449 		if (S_mbuf_copy_uint32(m, offset + IP_DST_OFFSET,
1450 		    (uint32_t *)&ip_dst.s_addr)) {
1451 			goto bad_ip_packet;
1452 		}
1453 	}
1454 	return ntohl(ip_dst.s_addr) ^ ntohl(ip_src.s_addr) ^ ((uint32_t)ip_p);
1455 
1456 bad_ip_packet:
1457 	return ether_header_hash(mtod(orig_m, struct ether_header *));
1458 }
1459 
1460 #define IP6_ADDRS_LEN   (sizeof(struct in6_addr) * 2)
1461 static uint32_t
ipv6_header_hash(struct mbuf * m)1462 ipv6_header_hash(struct mbuf * m)
1463 {
1464 	u_char *            data;
1465 	int                 i;
1466 	int32_t             offset;
1467 	struct mbuf *       orig_m = m;
1468 	uint32_t *          scan;
1469 	uint32_t            val;
1470 
1471 	/* find the IP protocol field relative to the start of the packet */
1472 	offset = offsetof(struct ip6_hdr, ip6_src) + sizeof(struct ether_header);
1473 	m = S_mbuf_skip_to_offset(m, &offset);
1474 	if (m == NULL) {
1475 		goto bad_ipv6_packet;
1476 	}
1477 	data = mtod(m, u_char *) + offset;
1478 	val = 0;
1479 	if ((m->m_len - offset) >= (int)IP6_ADDRS_LEN) {
1480 		/* this should be the normal case */
1481 		for (i = 0, scan = (uint32_t *)data;
1482 		    i < (int)(IP6_ADDRS_LEN / sizeof(uint32_t));
1483 		    i++, scan++) {
1484 			val ^= *scan;
1485 		}
1486 	} else {
1487 		for (i = 0; i < (int)(IP6_ADDRS_LEN / sizeof(uint32_t)); i++) {
1488 			uint32_t    tmp;
1489 			if (S_mbuf_copy_uint32(m, offset + i * sizeof(uint32_t),
1490 			    (uint32_t *)&tmp)) {
1491 				goto bad_ipv6_packet;
1492 			}
1493 			val ^= tmp;
1494 		}
1495 	}
1496 	return ntohl(val);
1497 
1498 bad_ipv6_packet:
1499 	return ether_header_hash(mtod(orig_m, struct ether_header *));
1500 }
1501 
1502 #endif /* BOND_HASH_L3_HEADER */
1503 
1504 static void
_mbuf_adjust_pkthdr_and_data(mbuf_t m,int len)1505 _mbuf_adjust_pkthdr_and_data(mbuf_t m, int len)
1506 {
1507 	mbuf_setdata(m, mtodo(m, len), mbuf_len(m) - len);
1508 	mbuf_pkthdr_adjustlen(m, -len);
1509 }
1510 
1511 static uint32_t
get_packet_hash(mbuf_t m)1512 get_packet_hash(mbuf_t m)
1513 {
1514 	uint32_t        flow_hash;
1515 
1516 	if (m->m_pkthdr.pkt_flowid != 0) {
1517 		flow_hash = m->m_pkthdr.pkt_flowid;
1518 	} else {
1519 		struct ether_header *   eh_p;
1520 
1521 		eh_p = mtod(m, struct ether_header *);
1522 #if BOND_HASH_L3_HEADER
1523 		switch (ntohs(eh_p->ether_type)) {
1524 		case ETHERTYPE_IP:
1525 			flow_hash = ip_header_hash(m);
1526 			break;
1527 		case ETHERTYPE_IPV6:
1528 			flow_hash = ipv6_header_hash(m);
1529 			break;
1530 		default:
1531 			flow_hash = ether_header_hash(eh_p);
1532 			break;
1533 		}
1534 #else /* BOND_HASH_L3_HEADER */
1535 		flow_hash = ether_header_hash(eh_p);
1536 #endif /* BOND_HASH_L3_HEADER */
1537 	}
1538 	return flow_hash;
1539 }
1540 
1541 static ifnet_t
bond_get_port_ifp(ifnet_t ifp,uint32_t hash)1542 bond_get_port_ifp(ifnet_t ifp, uint32_t hash)
1543 {
1544 	uint32_t        port_index;
1545 	ifbond_ref      ifb;
1546 	ifnet_t         port_ifp = NULL;
1547 
1548 	bond_lock();
1549 	ifb = ifnet_softc(ifp);
1550 	if (ifb == NULL || ifbond_flags_if_detaching(ifb)
1551 	    || ifb->ifb_distributing_count == 0) {
1552 		goto done;
1553 	}
1554 	port_index = hash % ifb->ifb_distributing_count;
1555 	port_ifp = ifb->ifb_distributing_array[port_index]->po_ifp;
1556 
1557 done:
1558 	bond_unlock();
1559 	return port_ifp;
1560 }
1561 
1562 static int
bond_output(ifnet_t ifp,mbuf_t m)1563 bond_output(ifnet_t ifp, mbuf_t m)
1564 {
1565 	struct flowadv  adv = { .code = FADV_SUCCESS };
1566 	int             error = 0;
1567 	int             flags;
1568 	uint32_t        hash;
1569 	uint32_t        len;
1570 	int             log_level;
1571 	ifnet_t         port_ifp;
1572 
1573 	VERIFY((m->m_flags & M_PKTHDR) != 0);
1574 	hash = get_packet_hash(m);
1575 	port_ifp = bond_get_port_ifp(ifp, hash);
1576 	if (port_ifp == NULL) {
1577 		m_freem(m);
1578 		goto done;
1579 	}
1580 	if (ifp->if_bpf != NULL) {
1581 		struct ether_header *   eh_p;
1582 
1583 		eh_p = mtod(m, struct ether_header *);
1584 		_mbuf_adjust_pkthdr_and_data(m, ETHER_HDR_LEN);
1585 		bond_bpf_tap_out(ifp, m, eh_p);
1586 		_mbuf_adjust_pkthdr_and_data(m, -ETHER_HDR_LEN);
1587 	}
1588 	len = m->m_pkthdr.len;
1589 	if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) != 0) {
1590 		len += ETHER_VLAN_ENCAP_LEN;
1591 	}
1592 	ifnet_stat_increment_out(ifp, 1, len, 0);
1593 	flags = DLIL_OUTPUT_FLAGS_RAW;
1594 	error = dlil_output(port_ifp, PF_BOND, m, NULL, NULL, flags, &adv);
1595 	log_level = (error != 0) ? LOG_NOTICE : LOG_DEBUG;
1596 	BOND_LOG(log_level, BD_DBGF_OUTPUT, "%s: %s bytes %d, error=%d",
1597 	    ifp->if_xname, port_ifp->if_xname, len, error);
1598 	if (error == 0) {
1599 		if (adv.code == FADV_FLOW_CONTROLLED) {
1600 			error = EQFULL;
1601 		} else if (adv.code == FADV_SUSPENDED) {
1602 			error = EQSUSPENDED;
1603 		}
1604 	}
1605 done:
1606 	return error;
1607 }
1608 
1609 static bondport_ref
ifbond_lookup_port(ifbond_ref ifb,struct ifnet * port_ifp)1610 ifbond_lookup_port(ifbond_ref ifb, struct ifnet * port_ifp)
1611 {
1612 	bondport_ref        p;
1613 	TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
1614 		if (p->po_ifp == port_ifp) {
1615 			return p;
1616 		}
1617 	}
1618 	return NULL;
1619 }
1620 
1621 static bondport_ref
bond_lookup_port(struct ifnet * port_ifp)1622 bond_lookup_port(struct ifnet * port_ifp)
1623 {
1624 	ifbond_ref          ifb;
1625 	bondport_ref        port;
1626 
1627 	TAILQ_FOREACH(ifb, &g_bond->ifbond_list, ifb_bond_list) {
1628 		port = ifbond_lookup_port(ifb, port_ifp);
1629 		if (port != NULL) {
1630 			return port;
1631 		}
1632 	}
1633 	return NULL;
1634 }
1635 
1636 static void
bond_receive_lacpdu(struct mbuf * m,struct ifnet * port_ifp)1637 bond_receive_lacpdu(struct mbuf * m, struct ifnet * port_ifp)
1638 {
1639 	struct ifnet *              bond_ifp = NULL;
1640 	ifbond_ref                  ifb;
1641 	int                         event_code = 0;
1642 	bool                        need_link_update = false;
1643 	bondport_ref                p;
1644 
1645 	BOND_LOG(LOG_DEBUG, BD_DBGF_LACP, "%s", port_ifp->if_xname);
1646 
1647 	bond_lock();
1648 	if ((ifnet_eflags(port_ifp) & IFEF_BOND) == 0) {
1649 		goto done;
1650 	}
1651 	p = bond_lookup_port(port_ifp);
1652 	if (p == NULL) {
1653 		goto done;
1654 	}
1655 	if (p->po_enabled == 0) {
1656 		goto done;
1657 	}
1658 	ifb = p->po_bond;
1659 	if (ifb->ifb_mode != IF_BOND_MODE_LACP) {
1660 		goto done;
1661 	}
1662 	/*
1663 	 * Work-around for rdar://problem/51372042
1664 	 * Sometimes, the link comes up but the driver doesn't report the
1665 	 * negotiated medium at that time. When we receive an LACPDU packet,
1666 	 * and the medium is unknown, force a link status check. Don't force
1667 	 * the link status check more often than _FORCE_LINK_EVENT_INTERVAL
1668 	 * seconds.
1669 	 */
1670 #define _FORCE_LINK_EVENT_INTERVAL      1
1671 	if (media_type_unknown(&p->po_media_info)) {
1672 		uint64_t        now = net_uptime();
1673 
1674 		if ((now - p->po_force_link_event_time) >=
1675 		    _FORCE_LINK_EVENT_INTERVAL) {
1676 			need_link_update = true;
1677 			p->po_force_link_event_time = now;
1678 		}
1679 	}
1680 	bondport_receive_lacpdu(p, (lacpdu_ref)m_mtod_current(m));
1681 	if (ifbond_selection(ifb)) {
1682 		event_code = (ifb->ifb_active_lag == NULL)
1683 		    ? KEV_DL_LINK_OFF
1684 		    : KEV_DL_LINK_ON;
1685 		/* XXX need to take a reference on bond_ifp */
1686 		bond_ifp = ifb->ifb_ifp;
1687 		ifb->ifb_last_link_event = event_code;
1688 	} else {
1689 		event_code = (ifb->ifb_active_lag == NULL)
1690 		    ? KEV_DL_LINK_OFF
1691 		    : KEV_DL_LINK_ON;
1692 		if (event_code != ifb->ifb_last_link_event) {
1693 			BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
1694 			    "%s: (receive) generating LINK event",
1695 			    ifb->ifb_name);
1696 			bond_ifp = ifb->ifb_ifp;
1697 			ifb->ifb_last_link_event = event_code;
1698 		}
1699 	}
1700 
1701 done:
1702 	bond_unlock();
1703 	if (bond_ifp != NULL) {
1704 		interface_link_event(bond_ifp, event_code);
1705 	}
1706 	m_freem(m);
1707 	if (need_link_update) {
1708 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
1709 		    "simulating link status changed event");
1710 		bond_handle_event(port_ifp, KEV_DL_LINK_ON);
1711 	}
1712 	return;
1713 }
1714 
1715 static void
bond_receive_la_marker_pdu(struct mbuf * m,struct ifnet * port_ifp)1716 bond_receive_la_marker_pdu(struct mbuf * m, struct ifnet * port_ifp)
1717 {
1718 	la_marker_pdu_ref           marker_p;
1719 	bondport_ref                p;
1720 
1721 	BOND_LOG(LOG_DEBUG, BD_DBGF_LACP, "%s", port_ifp->if_xname);
1722 
1723 	marker_p = (la_marker_pdu_ref)(m_mtod_current(m) + ETHER_HDR_LEN);
1724 	if (marker_p->lm_marker_tlv_type != LA_MARKER_TLV_TYPE_MARKER) {
1725 		goto failed;
1726 	}
1727 	bond_lock();
1728 	if ((ifnet_eflags(port_ifp) & IFEF_BOND) == 0) {
1729 		bond_unlock();
1730 		goto failed;
1731 	}
1732 	p = bond_lookup_port(port_ifp);
1733 	if (p == NULL || p->po_enabled == 0
1734 	    || p->po_bond->ifb_mode != IF_BOND_MODE_LACP) {
1735 		bond_unlock();
1736 		goto failed;
1737 	}
1738 	/* echo back the same packet as a marker response */
1739 	marker_p->lm_marker_tlv_type = LA_MARKER_TLV_TYPE_MARKER_RESPONSE;
1740 	bondport_slow_proto_transmit(p, (packet_buffer_ref)m);
1741 	bond_unlock();
1742 	return;
1743 
1744 failed:
1745 	m_freem(m);
1746 	return;
1747 }
1748 
1749 static bool
is_slow_proto_multicast(struct ether_header * eh_p)1750 is_slow_proto_multicast(struct ether_header * eh_p)
1751 {
1752 	return bcmp(eh_p->ether_dhost, &slow_proto_multicast,
1753 	           sizeof(eh_p->ether_dhost)) == 0 &&
1754 	       eh_p->ether_type == htons(IEEE8023AD_SLOW_PROTO_ETHERTYPE);
1755 }
1756 
1757 static void
bond_handle_slow_proto_multicast(ifnet_t port_ifp,mbuf_t m)1758 bond_handle_slow_proto_multicast(ifnet_t port_ifp, mbuf_t m)
1759 {
1760 	u_char  subtype = *mtod(m, u_char *);
1761 
1762 	if (subtype == IEEE8023AD_SLOW_PROTO_SUBTYPE_LACP) {
1763 		if (m->m_pkthdr.len < LACPDU_MIN_SIZE) {
1764 			BOND_LOG(LOG_DEBUG, BD_DBGF_INPUT,
1765 			    "dropping short LACP frame %d < %d",
1766 			    m->m_pkthdr.len, LACPDU_MIN_SIZE);
1767 			goto discard;
1768 		}
1769 		/* send to lacp */
1770 		if (m->m_len < LACPDU_MIN_SIZE) {
1771 			m = m_pullup(m, LACPDU_MIN_SIZE);
1772 			if (m == NULL) {
1773 				BOND_LOG(LOG_NOTICE, BD_DBGF_INPUT,
1774 				    "m_pullup LACPDU failed");
1775 				return;
1776 			}
1777 		}
1778 		bond_receive_lacpdu(m, port_ifp);
1779 	} else if (subtype == IEEE8023AD_SLOW_PROTO_SUBTYPE_LA_MARKER_PROTOCOL) {
1780 		int         min_size;
1781 
1782 		/* restore the ethernet header pointer in the mbuf */
1783 		m->m_pkthdr.len += ETHER_HDR_LEN;
1784 		m->m_data -= ETHER_HDR_LEN;
1785 		m->m_len += ETHER_HDR_LEN;
1786 		min_size = ETHER_HDR_LEN + offsetof(la_marker_pdu, lm_reserved);
1787 		if (m->m_pkthdr.len < min_size) {
1788 			goto discard;
1789 		}
1790 		/* send to lacp */
1791 		if (m->m_len < min_size) {
1792 			m = m_pullup(m, min_size);
1793 			if (m == NULL) {
1794 				BOND_LOG(LOG_NOTICE, BD_DBGF_INPUT,
1795 				    "m_pullup LA_MARKER failed");
1796 				return;
1797 			}
1798 		}
1799 		/* send to marker responder */
1800 		bond_receive_la_marker_pdu(m, port_ifp);
1801 	} else if (subtype == 0
1802 	    || subtype > IEEE8023AD_SLOW_PROTO_SUBTYPE_RESERVED_END) {
1803 		/* invalid subtype, discard the frame */
1804 		goto discard;
1805 	}
1806 	return;
1807 
1808 discard:
1809 	m_freem(m);
1810 	return;
1811 }
1812 
1813 static void
bond_input_packet_list(ifnet_t port_ifp,mbuf_t list)1814 bond_input_packet_list(ifnet_t port_ifp, mbuf_t list)
1815 {
1816 	ifbond_ref                        ifb;
1817 	struct ifnet *                    ifp;
1818 	bondport_ref                      p;
1819 	struct ifnet_stat_increment_param s;
1820 
1821 	/* verify that we're ready to receive the packet list */
1822 	bond_lock();
1823 	if ((ifnet_eflags(port_ifp) & IFEF_BOND) == 0) {
1824 		goto discard;
1825 	}
1826 	p = bond_lookup_port(port_ifp);
1827 	if (p == NULL || bondport_collecting(p) == 0) {
1828 		goto discard;
1829 	}
1830 	ifb = p->po_bond;
1831 	ifp = ifb->ifb_ifp;
1832 	bond_unlock();
1833 
1834 	bzero(&s, sizeof(s));
1835 
1836 	for (mbuf_t scan = list; scan != NULL; scan = scan->m_nextpkt) {
1837 		struct ether_header *   eh_p;
1838 		void * __single         frame_header;
1839 
1840 		/* clear promisc so that the packet doesn't get dropped */
1841 		mbuf_setflags_mask(scan, 0, MBUF_PROMISC);
1842 		s.packets_in++;
1843 		s.bytes_in += scan->m_pkthdr.len + ETHER_HDR_LEN;
1844 		if ((scan->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) != 0) {
1845 			s.bytes_in += ETHER_VLAN_ENCAP_LEN;
1846 		}
1847 		if (ifp->if_bpf != NULL) {
1848 			frame_header = scan->m_pkthdr.pkt_hdr;
1849 			eh_p = (struct ether_header *)frame_header;
1850 			bond_bpf_tap_in(ifp, scan, eh_p);
1851 		}
1852 		scan->m_pkthdr.rcvif = ifp;
1853 	}
1854 	BOND_LOG(LOG_DEBUG, BD_DBGF_INPUT, "%s: %s packets %d bytes %d",
1855 	    ifp->if_xname, port_ifp->if_xname, s.packets_in, s.bytes_in);
1856 
1857 	dlil_input_packet_list(ifp, list);
1858 	return;
1859 
1860 discard:
1861 	bond_unlock();
1862 	m_freem_list(list);
1863 	return;
1864 }
1865 
1866 static int
bond_input(ifnet_t port_ifp,__unused protocol_family_t protocol,mbuf_t m)1867 bond_input(ifnet_t port_ifp, __unused protocol_family_t protocol, mbuf_t m)
1868 {
1869 	struct ether_header *       eh_p;
1870 	void * __single              frame_header;
1871 	mblist                      list;
1872 	mbuf_t                      next_packet = NULL;
1873 	mbuf_t                      scan;
1874 
1875 	mblist_init(&list);
1876 	for (scan = m; scan != NULL; scan = next_packet) {
1877 		next_packet = scan->m_nextpkt;
1878 		scan->m_nextpkt = NULL;
1879 
1880 		frame_header = scan->m_pkthdr.pkt_hdr;
1881 		eh_p = (struct ether_header *)frame_header;
1882 		if ((scan->m_flags & M_MCAST) != 0 &&
1883 		    is_slow_proto_multicast(eh_p)) {
1884 			/* send up what we have */
1885 			if (list.head != NULL) {
1886 				bond_input_packet_list(port_ifp, list.head);
1887 				mblist_init(&list);
1888 			}
1889 			/* process this multicast */
1890 			bond_handle_slow_proto_multicast(port_ifp, scan);
1891 		} else {
1892 			mblist_append(&list, scan);
1893 		}
1894 	}
1895 	if (list.head != NULL) {
1896 		bond_input_packet_list(port_ifp, list.head);
1897 	}
1898 	return 0;
1899 }
1900 
1901 static __inline__ const char *
bondport_get_name(bondport_ref p)1902 bondport_get_name(bondport_ref p)
1903 {
1904 	return __unsafe_null_terminated_from_indexable(p->po_name);
1905 }
1906 
1907 static __inline__ int
bondport_get_index(bondport_ref p)1908 bondport_get_index(bondport_ref p)
1909 {
1910 	return ifnet_index(p->po_ifp);
1911 }
1912 
1913 static void
bondport_slow_proto_transmit(bondport_ref p,packet_buffer_ref buf)1914 bondport_slow_proto_transmit(bondport_ref p, packet_buffer_ref buf)
1915 {
1916 	struct ether_header *       eh_p;
1917 	int                         error;
1918 
1919 	/* packet_buffer_allocate leaves room for ethernet header */
1920 	eh_p = mtod(buf, struct ether_header *);
1921 	bcopy(&slow_proto_multicast, &eh_p->ether_dhost, sizeof(eh_p->ether_dhost));
1922 	bcopy(&p->po_saved_addr, eh_p->ether_shost, sizeof(eh_p->ether_shost));
1923 	eh_p->ether_type = htons(IEEE8023AD_SLOW_PROTO_ETHERTYPE);
1924 	error = ifnet_output_raw(p->po_ifp, PF_BOND, buf);
1925 	if (error != 0) {
1926 		BOND_LOG(LOG_NOTICE, BD_DBGF_LACP,
1927 		    "(%s) failed %d", bondport_get_name(p), error);
1928 	}
1929 	return;
1930 }
1931 
1932 static void
bondport_timer_process_func(devtimer_ref timer,devtimer_process_func_event event)1933 bondport_timer_process_func(devtimer_ref timer,
1934     devtimer_process_func_event event)
1935 {
1936 	bondport_ref        p;
1937 
1938 	switch (event) {
1939 	case devtimer_process_func_event_lock:
1940 		bond_lock();
1941 		devtimer_retain(timer);
1942 		break;
1943 	case devtimer_process_func_event_unlock:
1944 		if (devtimer_valid(timer)) {
1945 			/* as long as the devtimer is valid, we can look at arg0 */
1946 			int                 event_code = 0;
1947 			struct ifnet *      bond_ifp = NULL;
1948 
1949 			p = (bondport_ref)devtimer_arg0(timer);
1950 			if (ifbond_selection(p->po_bond)) {
1951 				event_code = (p->po_bond->ifb_active_lag == NULL)
1952 				    ? KEV_DL_LINK_OFF
1953 				    : KEV_DL_LINK_ON;
1954 				/* XXX need to take a reference on bond_ifp */
1955 				bond_ifp = p->po_bond->ifb_ifp;
1956 				p->po_bond->ifb_last_link_event = event_code;
1957 			} else {
1958 				event_code = (p->po_bond->ifb_active_lag == NULL)
1959 				    ? KEV_DL_LINK_OFF
1960 				    : KEV_DL_LINK_ON;
1961 				if (event_code !=
1962 				    p->po_bond->ifb_last_link_event) {
1963 					BOND_LOG(LOG_NOTICE, BD_DBGF_LACP,
1964 					    "%s: (timer) generating LINK event",
1965 					    p->po_bond->ifb_name);
1966 					bond_ifp = p->po_bond->ifb_ifp;
1967 					p->po_bond->ifb_last_link_event = event_code;
1968 				}
1969 			}
1970 			devtimer_release(timer);
1971 			bond_unlock();
1972 			if (bond_ifp != NULL) {
1973 				interface_link_event(bond_ifp, event_code);
1974 			}
1975 		} else {
1976 			/* timer is going away */
1977 			devtimer_release(timer);
1978 			bond_unlock();
1979 		}
1980 		break;
1981 	default:
1982 		break;
1983 	}
1984 }
1985 
1986 static bondport_ref
bondport_create(struct ifnet * port_ifp,lacp_port_priority priority,int active,int short_timeout,int * ret_error)1987 bondport_create(struct ifnet * port_ifp, lacp_port_priority priority,
1988     int active, int short_timeout, int * ret_error)
1989 {
1990 	int                         error = 0;
1991 	bondport_ref                p = NULL;
1992 	lacp_actor_partner_state    s;
1993 
1994 	*ret_error = 0;
1995 	p = kalloc_type(struct bondport_s, Z_WAITOK | Z_ZERO | Z_NOFAIL);
1996 	multicast_list_init(&p->po_multicast);
1997 	if ((u_int32_t)snprintf(p->po_name, sizeof(p->po_name), "%s%d",
1998 	    ifnet_name(port_ifp), ifnet_unit(port_ifp))
1999 	    >= sizeof(p->po_name)) {
2000 		BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
2001 		    "name too large");
2002 		*ret_error = EINVAL;
2003 		goto failed;
2004 	}
2005 	error = siocgifdevmtu(port_ifp, &p->po_devmtu);
2006 	if (error != 0) {
2007 		BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
2008 		    "SIOCGIFDEVMTU %s failed, %d",
2009 		    bondport_get_name(p), error);
2010 		goto failed;
2011 	}
2012 	/* remember the current interface MTU so it can be restored */
2013 	p->po_devmtu.ifdm_current = ifnet_mtu(port_ifp);
2014 	p->po_ifp = port_ifp;
2015 	p->po_media_info = interface_media_info(port_ifp);
2016 	p->po_current_while_timer = devtimer_create(bondport_timer_process_func, p);
2017 	if (p->po_current_while_timer == NULL) {
2018 		*ret_error = ENOMEM;
2019 		goto failed;
2020 	}
2021 	p->po_periodic_timer = devtimer_create(bondport_timer_process_func, p);
2022 	if (p->po_periodic_timer == NULL) {
2023 		*ret_error = ENOMEM;
2024 		goto failed;
2025 	}
2026 	p->po_wait_while_timer = devtimer_create(bondport_timer_process_func, p);
2027 	if (p->po_wait_while_timer == NULL) {
2028 		*ret_error = ENOMEM;
2029 		goto failed;
2030 	}
2031 	p->po_transmit_timer = devtimer_create(bondport_timer_process_func, p);
2032 	if (p->po_transmit_timer == NULL) {
2033 		*ret_error = ENOMEM;
2034 		goto failed;
2035 	}
2036 	p->po_receive_state = ReceiveState_none;
2037 	p->po_mux_state = MuxState_none;
2038 	p->po_priority = priority;
2039 	s = 0;
2040 	s = lacp_actor_partner_state_set_aggregatable(s);
2041 	if (short_timeout) {
2042 		s = lacp_actor_partner_state_set_short_timeout(s);
2043 	}
2044 	if (active) {
2045 		s = lacp_actor_partner_state_set_active_lacp(s);
2046 	}
2047 	p->po_actor_state = s;
2048 	return p;
2049 
2050 failed:
2051 	bondport_free(p);
2052 	return NULL;
2053 }
2054 
2055 static void
bondport_start(bondport_ref p)2056 bondport_start(bondport_ref p)
2057 {
2058 	bondport_receive_machine(p, LAEventStart, NULL);
2059 	bondport_mux_machine(p, LAEventStart, NULL);
2060 	bondport_periodic_transmit_machine(p, LAEventStart, NULL);
2061 	bondport_transmit_machine(p, LAEventStart, NULL);
2062 	return;
2063 }
2064 
2065 /*
2066  * Function: bondport_invalidate_timers
2067  * Purpose:
2068  *   Invalidate all of the timers for the bondport.
2069  */
2070 static void
bondport_invalidate_timers(bondport_ref p)2071 bondport_invalidate_timers(bondport_ref p)
2072 {
2073 	devtimer_invalidate(p->po_current_while_timer);
2074 	devtimer_invalidate(p->po_periodic_timer);
2075 	devtimer_invalidate(p->po_wait_while_timer);
2076 	devtimer_invalidate(p->po_transmit_timer);
2077 }
2078 
2079 /*
2080  * Function: bondport_cancel_timers
2081  * Purpose:
2082  *   Cancel all of the timers for the bondport.
2083  */
2084 static void
bondport_cancel_timers(bondport_ref p)2085 bondport_cancel_timers(bondport_ref p)
2086 {
2087 	devtimer_cancel(p->po_current_while_timer);
2088 	devtimer_cancel(p->po_periodic_timer);
2089 	devtimer_cancel(p->po_wait_while_timer);
2090 	devtimer_cancel(p->po_transmit_timer);
2091 }
2092 
2093 static void
bondport_free(bondport_ref p)2094 bondport_free(bondport_ref p)
2095 {
2096 	multicast_list_remove(&p->po_multicast);
2097 	devtimer_release(p->po_current_while_timer);
2098 	devtimer_release(p->po_periodic_timer);
2099 	devtimer_release(p->po_wait_while_timer);
2100 	devtimer_release(p->po_transmit_timer);
2101 	kfree_type(struct bondport_s, p);
2102 	return;
2103 }
2104 
2105 static __inline__ int
bond_device_mtu(struct ifnet * ifp,ifbond_ref ifb)2106 bond_device_mtu(struct ifnet * ifp, ifbond_ref ifb)
2107 {
2108 	return ((int)ifnet_mtu(ifp) > ifb->ifb_altmtu)
2109 	       ? (int)ifnet_mtu(ifp) : ifb->ifb_altmtu;
2110 }
2111 
2112 static int
bond_add_interface(struct ifnet * ifp,struct ifnet * port_ifp)2113 bond_add_interface(struct ifnet * ifp, struct ifnet * port_ifp)
2114 {
2115 	u_int32_t                   eflags;
2116 	uint32_t                    control_flags = 0;
2117 	int                         devmtu;
2118 	int                         error = 0;
2119 	int                         event_code = 0;
2120 	int                         first = FALSE;
2121 	ifbond_ref                  ifb;
2122 	bondport_ref *              new_array = NULL;
2123 	bondport_ref *              old_array = NULL;
2124 	bondport_ref                p;
2125 	int                         old_max = 0;
2126 	int                         new_max = 0;
2127 
2128 	if (IFNET_IS_INTCOPROC(port_ifp) || IFNET_IS_MANAGEMENT(port_ifp)) {
2129 		return EINVAL;
2130 	}
2131 
2132 	/* pre-allocate space for new port */
2133 	p = bondport_create(port_ifp, 0x8000, 1, 0, &error);
2134 	if (p == NULL) {
2135 		return error;
2136 	}
2137 	bond_lock();
2138 	ifb = (ifbond_ref)ifnet_softc(ifp);
2139 	if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
2140 		bond_unlock();
2141 		bondport_free(p);
2142 		return ifb == NULL ? EOPNOTSUPP : EBUSY;
2143 	}
2144 
2145 	/* make sure this interface can handle our current MTU */
2146 	devmtu = bond_device_mtu(ifp, ifb);
2147 	if (devmtu != 0
2148 	    && (devmtu > p->po_devmtu.ifdm_max || devmtu < p->po_devmtu.ifdm_min)) {
2149 		bond_unlock();
2150 		BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
2151 		    "interface %s doesn't support mtu %d",
2152 		    bondport_get_name(p), devmtu);
2153 		bondport_free(p);
2154 		return EINVAL;
2155 	}
2156 
2157 	/* make sure ifb doesn't get de-allocated while we wait */
2158 	ifbond_retain(ifb);
2159 
2160 	/* wait for other add or remove to complete */
2161 	ifbond_wait(ifb, __func__);
2162 
2163 	if (ifbond_flags_if_detaching(ifb)) {
2164 		/* someone destroyed the bond while we were waiting */
2165 		error = EBUSY;
2166 		goto signal_done;
2167 	}
2168 	if (bond_lookup_port(port_ifp) != NULL) {
2169 		/* port is already part of a bond */
2170 		error = EBUSY;
2171 		goto signal_done;
2172 	}
2173 	if ((ifnet_eflags(port_ifp) & (IFEF_VLAN | IFEF_BOND)) != 0) {
2174 		/* interface already has VLAN's, or is part of bond */
2175 		error = EBUSY;
2176 		goto signal_done;
2177 	}
2178 
2179 	/* mark the interface busy */
2180 	eflags = if_set_eflags(port_ifp, IFEF_BOND);
2181 	if ((eflags & IFEF_VLAN) != 0) {
2182 		/* vlan got in ahead of us */
2183 		if_clear_eflags(port_ifp, IFEF_BOND);
2184 		error = EBUSY;
2185 		goto signal_done;
2186 	}
2187 
2188 	if (TAILQ_EMPTY(&ifb->ifb_port_list)) {
2189 		ifnet_set_offload(ifp, ifnet_offload(port_ifp));
2190 		ifnet_set_flags(ifp, IFF_RUNNING, IFF_RUNNING);
2191 		if (ifbond_flags_lladdr(ifb) == FALSE) {
2192 			first = TRUE;
2193 		}
2194 	} else {
2195 		ifnet_offload_t         ifp_offload;
2196 		ifnet_offload_t         port_ifp_offload;
2197 
2198 		ifp_offload = ifnet_offload(ifp);
2199 		port_ifp_offload = ifnet_offload(port_ifp);
2200 		if (ifp_offload != port_ifp_offload) {
2201 			ifnet_offload_t     offload;
2202 
2203 			offload = ifp_offload & port_ifp_offload;
2204 			BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
2205 			    "(%s, %s) hwassist values don't match 0x%x != 0x%x,"
2206 			    " using 0x%x instead",
2207 			    ifb->ifb_name, bondport_get_name(p),
2208 			    ifp_offload, port_ifp_offload, offload);
2209 			/*
2210 			 * XXX
2211 			 * if the bond has VLAN's, we can't simply change the hwassist
2212 			 * field behind its back: this needs work
2213 			 */
2214 			ifnet_set_offload(ifp, offload);
2215 		}
2216 	}
2217 	p->po_bond = ifb;
2218 
2219 	/* remember the port's ethernet address so it can be restored */
2220 	ether_addr_copy(p->po_saved_addr.octet,
2221 	    (uint8_t *__indexable)IF_LLADDR(port_ifp));
2222 
2223 	/* add it to the list of ports */
2224 	TAILQ_INSERT_TAIL(&ifb->ifb_port_list, p, po_port_list);
2225 	ifb->ifb_port_count++;
2226 
2227 	bond_unlock();
2228 
2229 
2230 	/* first port added to bond determines bond's ethernet address */
2231 	if (first) {
2232 		ifnet_set_lladdr_and_type(ifp, IF_LLADDR(port_ifp), ETHER_ADDR_LEN,
2233 		    IFT_ETHER);
2234 	}
2235 	uint32_bit_set(&control_flags, PORT_CONTROL_FLAGS_IN_LIST);
2236 
2237 	/* allocate a larger distributing array */
2238 	new_max = ifb->ifb_port_count;
2239 	new_array = kalloc_type(bondport_ref, new_max, Z_WAITOK);
2240 	if (new_array == NULL) {
2241 		error = ENOMEM;
2242 		goto failed;
2243 	}
2244 
2245 	/* attach our BOND "protocol" to the interface */
2246 	error = bond_attach_protocol(port_ifp);
2247 	if (error) {
2248 		goto failed;
2249 	}
2250 	uint32_bit_set(&control_flags, PORT_CONTROL_FLAGS_PROTO_ATTACHED);
2251 
2252 	/* set the interface MTU */
2253 	devmtu = bond_device_mtu(ifp, ifb);
2254 	error = siocsifmtu(port_ifp, devmtu);
2255 	if (error != 0) {
2256 		BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
2257 		    "(%s, %s): SIOCSIFMTU %d failed %d",
2258 		    ifb->ifb_name, bondport_get_name(p), devmtu, error);
2259 		goto failed;
2260 	}
2261 	uint32_bit_set(&control_flags, PORT_CONTROL_FLAGS_MTU_SET);
2262 
2263 	/* program the port with our multicast addresses */
2264 	error = multicast_list_program(&p->po_multicast, ifp, port_ifp);
2265 	if (error) {
2266 		BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
2267 		    "(%s, %s): multicast_list_program failed %d",
2268 		    ifb->ifb_name, bondport_get_name(p), error);
2269 		goto failed;
2270 	}
2271 
2272 	/* mark the interface up */
2273 	ifnet_set_flags(port_ifp, IFF_UP, IFF_UP);
2274 
2275 	error = ifnet_ioctl(port_ifp, 0, SIOCSIFFLAGS, NULL);
2276 	if (error != 0) {
2277 		BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
2278 		    "(%s, %s): SIOCSIFFLAGS failed %d",
2279 		    ifb->ifb_name, bondport_get_name(p), error);
2280 		goto failed;
2281 	}
2282 
2283 	/* re-program the port's ethernet address */
2284 	error = if_siflladdr(port_ifp,
2285 	    (const struct ether_addr *)IF_LLADDR(ifp));
2286 	if (error == 0) {
2287 		if (memcmp(IF_LLADDR(ifp), IF_LLADDR(port_ifp), ETHER_ADDR_LEN)
2288 		    != 0) {
2289 			/* it lied, it really doesn't support setting lladdr */
2290 			error = EOPNOTSUPP;
2291 		}
2292 	}
2293 	if (error != 0) {
2294 		/* port doesn't support setting the link address */
2295 		BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
2296 		    "(%s, %s): if_siflladdr failed %d",
2297 		    ifb->ifb_name, bondport_get_name(p), error);
2298 		error = ifnet_set_promiscuous(port_ifp, 1);
2299 		if (error != 0) {
2300 			/* port doesn't support setting promiscuous mode */
2301 			BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
2302 			    "(%s, %s): set promiscuous failed %d",
2303 			    ifb->ifb_name, bondport_get_name(p), error);
2304 			goto failed;
2305 		}
2306 		uint32_bit_set(&control_flags,
2307 		    PORT_CONTROL_FLAGS_PROMISCUOUS_SET);
2308 	} else {
2309 		uint32_bit_set(&control_flags,
2310 		    PORT_CONTROL_FLAGS_LLADDR_SET);
2311 	}
2312 
2313 	/* if we're in promiscuous mode, enable that as well */
2314 	if (ifbond_flags_promisc(ifb)) {
2315 		error = ifnet_set_promiscuous(port_ifp, 1);
2316 		if (error != 0) {
2317 			/* port doesn't support setting promiscuous mode */
2318 			BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
2319 			    "(%s, %s): set promiscuous failed %d",
2320 			    ifb->ifb_name, bondport_get_name(p), error);
2321 			goto failed;
2322 		}
2323 		uint32_bit_set(&control_flags,
2324 		    PORT_CONTROL_FLAGS_BOND_PROMISCUOUS_SET);
2325 	}
2326 
2327 	bond_lock();
2328 
2329 	/* no failures past this point */
2330 	p->po_enabled = 1;
2331 	p->po_control_flags = control_flags;
2332 
2333 	/* copy the contents of the existing distributing array */
2334 	if (ifb->ifb_distributing_count) {
2335 		bcopy(ifb->ifb_distributing_array, new_array,
2336 		    sizeof(*new_array) * ifb->ifb_distributing_count);
2337 	}
2338 	old_array = ifb->ifb_distributing_array;
2339 	old_max = ifb->ifb_distributing_max;
2340 	ifb->ifb_distributing_array = new_array;
2341 	ifb->ifb_distributing_max = new_max;
2342 
2343 	if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
2344 		bondport_start(p);
2345 
2346 		/* check if we need to generate a link status event */
2347 		if (ifbond_selection(ifb)) {
2348 			event_code = (ifb->ifb_active_lag == NULL)
2349 			    ? KEV_DL_LINK_OFF
2350 			    : KEV_DL_LINK_ON;
2351 			ifb->ifb_last_link_event = event_code;
2352 		}
2353 	} else {
2354 		/* are we adding the first distributing interface? */
2355 		if (media_active(&p->po_media_info)) {
2356 			if (ifb->ifb_distributing_count == 0) {
2357 				ifb->ifb_last_link_event = event_code = KEV_DL_LINK_ON;
2358 			}
2359 			bondport_enable_distributing(p);
2360 		} else {
2361 			bondport_disable_distributing(p);
2362 		}
2363 	}
2364 
2365 	/* clear the busy state, and wakeup anyone waiting */
2366 	ifbond_signal(ifb, __func__);
2367 	bond_unlock();
2368 	if (event_code != 0) {
2369 		interface_link_event(ifp, event_code);
2370 	}
2371 	kfree_type(bondport_ref, old_max, old_array);
2372 	return 0;
2373 
2374 failed:
2375 	bond_assert_lock_not_held();
2376 
2377 	/* if this was the first port to be added, clear our address */
2378 	if (first) {
2379 		ifnet_set_lladdr_and_type(ifp, NULL, 0, IFT_IEEE8023ADLAG);
2380 	}
2381 
2382 	kfree_type(bondport_ref, new_max, new_array);
2383 	if (uint32_bit_is_set(control_flags,
2384 	    PORT_CONTROL_FLAGS_LLADDR_SET)) {
2385 		int     error1;
2386 
2387 		error1 = if_siflladdr(port_ifp, &p->po_saved_addr);
2388 		if (error1 != 0) {
2389 			BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
2390 			    "(%s, %s): if_siflladdr restore failed %d",
2391 			    ifb->ifb_name, bondport_get_name(p), error1);
2392 		}
2393 	}
2394 	if (uint32_bit_is_set(control_flags,
2395 	    PORT_CONTROL_FLAGS_PROMISCUOUS_SET)) {
2396 		int     error1;
2397 
2398 		error1 = ifnet_set_promiscuous(port_ifp, 0);
2399 		if (error1 != 0) {
2400 			BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
2401 			    "(%s, %s): promiscous mode disable failed %d",
2402 			    ifb->ifb_name, bondport_get_name(p), error1);
2403 		}
2404 	}
2405 	if (uint32_bit_is_set(control_flags,
2406 	    PORT_CONTROL_FLAGS_PROTO_ATTACHED)) {
2407 		(void)bond_detach_protocol(port_ifp);
2408 	}
2409 	if (uint32_bit_is_set(control_flags,
2410 	    PORT_CONTROL_FLAGS_MTU_SET)) {
2411 		int error1;
2412 
2413 		error1 = siocsifmtu(port_ifp, p->po_devmtu.ifdm_current);
2414 		if (error1 != 0) {
2415 			BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
2416 			    "(%s, %s): SIOCSIFMTU %d failed %d",
2417 			    ifb->ifb_name, bondport_get_name(p),
2418 			    p->po_devmtu.ifdm_current, error1);
2419 		}
2420 	}
2421 	bond_lock();
2422 	if (uint32_bit_is_set(control_flags,
2423 	    PORT_CONTROL_FLAGS_IN_LIST)) {
2424 		TAILQ_REMOVE(&ifb->ifb_port_list, p, po_port_list);
2425 		ifb->ifb_port_count--;
2426 	}
2427 	if_clear_eflags(ifp, IFEF_BOND);
2428 	if (TAILQ_EMPTY(&ifb->ifb_port_list)) {
2429 		ifb->ifb_altmtu = 0;
2430 		ifnet_set_mtu(ifp, ETHERMTU);
2431 		ifnet_set_offload(ifp, 0);
2432 	}
2433 
2434 signal_done:
2435 	ifbond_signal(ifb, __func__);
2436 	bond_unlock();
2437 	ifbond_release(ifb);
2438 	bondport_free(p);
2439 	return error;
2440 }
2441 
2442 static int
bond_remove_interface(ifbond_ref ifb,struct ifnet * port_ifp)2443 bond_remove_interface(ifbond_ref ifb, struct ifnet * port_ifp)
2444 {
2445 	int                         active_lag = 0;
2446 	int                         error = 0;
2447 	int                         event_code = 0;
2448 	bondport_ref                head_port;
2449 	struct ifnet *              ifp;
2450 	int                         last = FALSE;
2451 	int                         new_link_address = FALSE;
2452 	bondport_ref                p;
2453 	lacp_actor_partner_state    s;
2454 	int                         was_distributing;
2455 
2456 	bond_assert_lock_held();
2457 
2458 	ifbond_retain(ifb);
2459 	ifbond_wait(ifb, "bond_remove_interface");
2460 
2461 	p = ifbond_lookup_port(ifb, port_ifp);
2462 	if (p == NULL) {
2463 		error = ENXIO;
2464 		/* it got removed by another thread */
2465 		goto signal_done;
2466 	}
2467 
2468 	/* de-select it and remove it from the lists */
2469 	was_distributing = bondport_flags_distributing(p);
2470 	bondport_disable_distributing(p);
2471 	if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
2472 		bondport_set_selected(p, SelectedState_UNSELECTED);
2473 		active_lag = bondport_remove_from_LAG(p);
2474 		/* invalidate timers here while holding the bond_lock */
2475 		bondport_invalidate_timers(p);
2476 
2477 		/* announce that we're Individual now */
2478 		s = p->po_actor_state;
2479 		s = lacp_actor_partner_state_set_individual(s);
2480 		s = lacp_actor_partner_state_set_not_collecting(s);
2481 		s = lacp_actor_partner_state_set_not_distributing(s);
2482 		s = lacp_actor_partner_state_set_out_of_sync(s);
2483 		p->po_actor_state = s;
2484 		bondport_flags_set_ntt(p);
2485 	}
2486 
2487 	TAILQ_REMOVE(&ifb->ifb_port_list, p, po_port_list);
2488 	ifb->ifb_port_count--;
2489 
2490 	ifp = ifb->ifb_ifp;
2491 	head_port = TAILQ_FIRST(&ifb->ifb_port_list);
2492 	if (head_port == NULL) {
2493 		ifnet_set_flags(ifp, 0, IFF_RUNNING);
2494 		if (ifbond_flags_lladdr(ifb) == FALSE) {
2495 			last = TRUE;
2496 		}
2497 		ifnet_set_offload(ifp, 0);
2498 		ifnet_set_mtu(ifp, ETHERMTU);
2499 		ifb->ifb_altmtu = 0;
2500 	} else if (ifbond_flags_lladdr(ifb) == FALSE
2501 	    && bcmp(&p->po_saved_addr, IF_LLADDR(ifp),
2502 	    ETHER_ADDR_LEN) == 0) {
2503 		new_link_address = TRUE;
2504 	}
2505 	/* check if we need to generate a link status event */
2506 	if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
2507 		if (ifbond_selection(ifb) || active_lag) {
2508 			event_code = (ifb->ifb_active_lag == NULL)
2509 			    ? KEV_DL_LINK_OFF
2510 			    : KEV_DL_LINK_ON;
2511 			ifb->ifb_last_link_event = event_code;
2512 		}
2513 		bondport_transmit_machine(p, LAEventStart,
2514 		    TRANSMIT_MACHINE_TX_IMMEDIATE);
2515 	} else {
2516 		/* are we removing the last distributing interface? */
2517 		if (was_distributing && ifb->ifb_distributing_count == 0) {
2518 			ifb->ifb_last_link_event = event_code = KEV_DL_LINK_OFF;
2519 		}
2520 	}
2521 	bond_unlock();
2522 
2523 	if (last) {
2524 		ifnet_set_lladdr_and_type(ifp, NULL, 0, IFT_IEEE8023ADLAG);
2525 	} else if (new_link_address) {
2526 		struct ifnet *  scan_ifp;
2527 		bondport_ref    scan_port;
2528 
2529 		/* ifbond_wait() allows port list traversal without holding the lock */
2530 
2531 		/* this port gave the bond its ethernet address, switch to new one */
2532 		ifnet_set_lladdr_and_type(ifp,
2533 		    &head_port->po_saved_addr, ETHER_ADDR_LEN,
2534 		    IFT_ETHER);
2535 
2536 		/* re-program each port with the new link address */
2537 		TAILQ_FOREACH(scan_port, &ifb->ifb_port_list, po_port_list) {
2538 			scan_ifp = scan_port->po_ifp;
2539 
2540 			if (!uint32_bit_is_set(scan_port->po_control_flags,
2541 			    PORT_CONTROL_FLAGS_LLADDR_SET)) {
2542 				/* port doesn't support setting lladdr */
2543 				continue;
2544 			}
2545 			error = if_siflladdr(scan_ifp,
2546 			    (const struct ether_addr *) IF_LLADDR(ifp));
2547 			if (error != 0) {
2548 				BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
2549 				    "(%s, %s): if_siflladdr (%s) failed %d",
2550 				    ifb->ifb_name, bondport_get_name(p),
2551 				    bondport_get_name(scan_port), error);
2552 			}
2553 		}
2554 	}
2555 
2556 	/* restore the port's ethernet address */
2557 	if (uint32_bit_is_set(p->po_control_flags,
2558 	    PORT_CONTROL_FLAGS_LLADDR_SET)) {
2559 		error = if_siflladdr(port_ifp, &p->po_saved_addr);
2560 		if (error != 0) {
2561 			BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
2562 			    "(%s, %s): if_siflladdr failed %d",
2563 			    ifb->ifb_name, bondport_get_name(p), error);
2564 		}
2565 	}
2566 
2567 	/* disable promiscous mode (if we enabled it) */
2568 	if (uint32_bit_is_set(p->po_control_flags,
2569 	    PORT_CONTROL_FLAGS_PROMISCUOUS_SET)) {
2570 		error = ifnet_set_promiscuous(port_ifp, 0);
2571 		if (error != 0) {
2572 			BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
2573 			    "(%s, %s): disable promiscuous failed %d",
2574 			    ifb->ifb_name, bondport_get_name(p), error);
2575 		}
2576 	}
2577 
2578 	/* disable promiscous mode from bond (if we enabled it) */
2579 	if (uint32_bit_is_set(p->po_control_flags,
2580 	    PORT_CONTROL_FLAGS_BOND_PROMISCUOUS_SET)) {
2581 		error = ifnet_set_promiscuous(port_ifp, 0);
2582 		if (error != 0) {
2583 			BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
2584 			    "(%s, %s): disable promiscuous failed %d",
2585 			    ifb->ifb_name, bondport_get_name(p), error);
2586 		}
2587 	}
2588 
2589 	/* restore the port's MTU */
2590 	error = siocsifmtu(port_ifp, p->po_devmtu.ifdm_current);
2591 	if (error != 0) {
2592 		BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
2593 		    "(%s, %s): SIOCSIFMTU %d failed %d",
2594 		    ifb->ifb_name, bondport_get_name(p),
2595 		    p->po_devmtu.ifdm_current, error);
2596 	}
2597 
2598 	/* remove the bond "protocol" */
2599 	bond_detach_protocol(port_ifp);
2600 
2601 	/* generate link event */
2602 	if (event_code != 0) {
2603 		interface_link_event(ifp, event_code);
2604 	}
2605 
2606 	bond_lock();
2607 	bondport_free(p);
2608 	if_clear_eflags(port_ifp, IFEF_BOND);
2609 	/* release this bondport's reference to the ifbond */
2610 	ifbond_release(ifb);
2611 
2612 signal_done:
2613 	ifbond_signal(ifb, __func__);
2614 	ifbond_release(ifb);
2615 	return error;
2616 }
2617 
2618 static void
bond_set_lacp_mode(ifbond_ref ifb)2619 bond_set_lacp_mode(ifbond_ref ifb)
2620 {
2621 	bondport_ref                p;
2622 
2623 	TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
2624 		bondport_disable_distributing(p);
2625 		bondport_start(p);
2626 	}
2627 	return;
2628 }
2629 
2630 static void
bond_set_static_mode(ifbond_ref ifb)2631 bond_set_static_mode(ifbond_ref ifb)
2632 {
2633 	bondport_ref                p;
2634 	lacp_actor_partner_state    s;
2635 
2636 	TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
2637 		bondport_disable_distributing(p);
2638 		bondport_set_selected(p, SelectedState_UNSELECTED);
2639 		(void)bondport_remove_from_LAG(p);
2640 		bondport_cancel_timers(p);
2641 
2642 		/* announce that we're Individual now */
2643 		s = p->po_actor_state;
2644 		s = lacp_actor_partner_state_set_individual(s);
2645 		s = lacp_actor_partner_state_set_not_collecting(s);
2646 		s = lacp_actor_partner_state_set_not_distributing(s);
2647 		s = lacp_actor_partner_state_set_out_of_sync(s);
2648 		p->po_actor_state = s;
2649 		bondport_flags_set_ntt(p);
2650 		bondport_transmit_machine(p, LAEventStart,
2651 		    TRANSMIT_MACHINE_TX_IMMEDIATE);
2652 		/* clear state */
2653 		p->po_actor_state = 0;
2654 		bzero(&p->po_partner_state, sizeof(p->po_partner_state));
2655 
2656 		if (media_active(&p->po_media_info)) {
2657 			bondport_enable_distributing(p);
2658 		} else {
2659 			bondport_disable_distributing(p);
2660 		}
2661 	}
2662 	return;
2663 }
2664 
2665 static int
bond_set_mode(struct ifnet * ifp,int mode)2666 bond_set_mode(struct ifnet * ifp, int mode)
2667 {
2668 	int                         error = 0;
2669 	int                         event_code = 0;
2670 	ifbond_ref                  ifb;
2671 
2672 	bond_lock();
2673 	ifb = (ifbond_ref)ifnet_softc(ifp);
2674 	if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
2675 		bond_unlock();
2676 		return (ifb == NULL) ? EOPNOTSUPP : EBUSY;
2677 	}
2678 	if (ifb->ifb_mode == mode) {
2679 		bond_unlock();
2680 		return 0;
2681 	}
2682 
2683 	ifbond_retain(ifb);
2684 	ifbond_wait(ifb, "bond_set_mode");
2685 
2686 	/* verify (again) that the mode is actually different */
2687 	if (ifb->ifb_mode == mode) {
2688 		/* nothing to do */
2689 		goto signal_done;
2690 	}
2691 
2692 	ifb->ifb_mode = mode;
2693 	if (mode == IF_BOND_MODE_LACP) {
2694 		bond_set_lacp_mode(ifb);
2695 
2696 		/* check if we need to generate a link status event */
2697 		if (ifbond_selection(ifb)) {
2698 			event_code = (ifb->ifb_active_lag == NULL)
2699 			    ? KEV_DL_LINK_OFF
2700 			    : KEV_DL_LINK_ON;
2701 		}
2702 	} else {
2703 		bond_set_static_mode(ifb);
2704 		event_code = (ifb->ifb_distributing_count == 0)
2705 		    ? KEV_DL_LINK_OFF
2706 		    : KEV_DL_LINK_ON;
2707 	}
2708 	ifb->ifb_last_link_event = event_code;
2709 
2710 signal_done:
2711 	ifbond_signal(ifb, __func__);
2712 	bond_unlock();
2713 	ifbond_release(ifb);
2714 
2715 	if (event_code != 0) {
2716 		interface_link_event(ifp, event_code);
2717 	}
2718 	return error;
2719 }
2720 
2721 static int
bond_get_status(ifbond_ref ifb,struct if_bond_req * ibr_p,user_addr_t datap)2722 bond_get_status(ifbond_ref ifb, struct if_bond_req * ibr_p, user_addr_t datap)
2723 {
2724 	int                         count;
2725 	user_addr_t                 dst;
2726 	int                         error = 0;
2727 	struct if_bond_status_req * ibsr;
2728 	struct if_bond_status       ibs;
2729 	bondport_ref                port;
2730 
2731 	ibsr = &(ibr_p->ibr_ibru.ibru_status);
2732 	if (ibsr->ibsr_version != IF_BOND_STATUS_REQ_VERSION) {
2733 		return EINVAL;
2734 	}
2735 	ibsr->ibsr_key = ifb->ifb_key;
2736 	ibsr->ibsr_mode = ifb->ifb_mode;
2737 	ibsr->ibsr_total = ifb->ifb_port_count;
2738 	dst = proc_is64bit(current_proc())
2739 	    ? ibsr->ibsr_ibsru.ibsru_buffer64
2740 	    : CAST_USER_ADDR_T(ibsr->ibsr_ibsru.ibsru_buffer);
2741 	if (dst == USER_ADDR_NULL) {
2742 		/* just want to know how many there are */
2743 		goto done;
2744 	}
2745 	if (ibsr->ibsr_count < 0) {
2746 		return EINVAL;
2747 	}
2748 	count = (ifb->ifb_port_count < ibsr->ibsr_count)
2749 	    ? ifb->ifb_port_count : ibsr->ibsr_count;
2750 	TAILQ_FOREACH(port, &ifb->ifb_port_list, po_port_list) {
2751 		struct if_bond_partner_state *  ibps_p;
2752 		partner_state_ref               ps;
2753 
2754 		if (count == 0) {
2755 			break;
2756 		}
2757 		bzero(&ibs, sizeof(ibs));
2758 		strbufcpy(ibs.ibs_if_name, port->po_name);
2759 		ibs.ibs_port_priority = port->po_priority;
2760 		if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
2761 			ibs.ibs_state = port->po_actor_state;
2762 			ibs.ibs_selected_state = port->po_selected;
2763 			ps = &port->po_partner_state;
2764 			ibps_p = &ibs.ibs_partner_state;
2765 			ibps_p->ibps_system = ps->ps_lag_info.li_system;
2766 			ibps_p->ibps_system_priority = ps->ps_lag_info.li_system_priority;
2767 			ibps_p->ibps_key = ps->ps_lag_info.li_key;
2768 			ibps_p->ibps_port = ps->ps_port;
2769 			ibps_p->ibps_port_priority = ps->ps_port_priority;
2770 			ibps_p->ibps_state = ps->ps_state;
2771 		} else {
2772 			/* fake the selected information */
2773 			ibs.ibs_selected_state = bondport_flags_distributing(port)
2774 			    ? SelectedState_SELECTED : SelectedState_UNSELECTED;
2775 		}
2776 		error = copyout(&ibs, dst, sizeof(ibs));
2777 		if (error != 0) {
2778 			break;
2779 		}
2780 		dst += sizeof(ibs);
2781 		count--;
2782 	}
2783 
2784 done:
2785 	if (error == 0) {
2786 		error = copyout(ibr_p, datap, sizeof(*ibr_p));
2787 	} else {
2788 		(void)copyout(ibr_p, datap, sizeof(*ibr_p));
2789 	}
2790 	return error;
2791 }
2792 
2793 static int
bond_set_promisc(struct ifnet * ifp)2794 bond_set_promisc(struct ifnet * ifp)
2795 {
2796 	int                 error = 0;
2797 	ifbond_ref          ifb;
2798 	bool                is_promisc;
2799 	bondport_ref        p;
2800 	int                 val;
2801 
2802 	is_promisc = (ifnet_flags(ifp) & IFF_PROMISC) != 0;
2803 
2804 	/* determine whether promiscuous state needs to be changed */
2805 	bond_lock();
2806 	ifb = (ifbond_ref)ifnet_softc(ifp);
2807 	if (ifb == NULL) {
2808 		bond_unlock();
2809 		error = EBUSY;
2810 		goto done;
2811 	}
2812 	if (is_promisc == ifbond_flags_promisc(ifb)) {
2813 		/* already in the right state */
2814 		bond_unlock();
2815 		goto done;
2816 	}
2817 	ifbond_retain(ifb);
2818 	ifbond_wait(ifb, __func__);
2819 	if (ifbond_flags_if_detaching(ifb)) {
2820 		/* someone destroyed the bond while we were waiting */
2821 		error = EBUSY;
2822 		goto signal_done;
2823 	}
2824 	bond_unlock();
2825 
2826 	/* update the promiscuous state of each memeber */
2827 	val = is_promisc ? 1 : 0;
2828 	TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
2829 		struct ifnet *  port_ifp = p->po_ifp;
2830 		bool            port_is_promisc;
2831 
2832 		port_is_promisc = uint32_bit_is_set(p->po_control_flags,
2833 		    PORT_CONTROL_FLAGS_BOND_PROMISCUOUS_SET);
2834 		if (port_is_promisc == is_promisc) {
2835 			/* already in the right state */
2836 			continue;
2837 		}
2838 		error = ifnet_set_promiscuous(port_ifp, val);
2839 		if (error != 0) {
2840 			BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
2841 			    "%s: ifnet_set_promiscuous(%s, %d): failed %d",
2842 			    ifb->ifb_name, port_ifp->if_xname, val, error);
2843 			continue;
2844 		}
2845 		BOND_LOG(LOG_DEBUG, BD_DBGF_LIFECYCLE,
2846 		    "%s: ifnet_set_promiscuous(%s, %d): succeeded",
2847 		    ifb->ifb_name, port_ifp->if_xname, val);
2848 		if (is_promisc) {
2849 			/* remember that we set it */
2850 			uint32_bit_set(&p->po_control_flags,
2851 			    PORT_CONTROL_FLAGS_BOND_PROMISCUOUS_SET);
2852 		} else {
2853 			uint32_bit_clear(&p->po_control_flags,
2854 			    PORT_CONTROL_FLAGS_BOND_PROMISCUOUS_SET);
2855 		}
2856 	}
2857 
2858 	/* assume that updating promiscuous state succeeded */
2859 	error = 0;
2860 	bond_lock();
2861 
2862 	/* update our internal state */
2863 	if (is_promisc) {
2864 		ifbond_flags_set_promisc(ifb);
2865 	} else {
2866 		ifbond_flags_clear_promisc(ifb);
2867 	}
2868 
2869 signal_done:
2870 	ifbond_signal(ifb, __func__);
2871 	bond_unlock();
2872 	ifbond_release(ifb);
2873 
2874 done:
2875 	return error;
2876 }
2877 
2878 static void
bond_get_mtu_values(ifbond_ref ifb,int * ret_min,int * ret_max)2879 bond_get_mtu_values(ifbond_ref ifb, int * ret_min, int * ret_max)
2880 {
2881 	int                         mtu_min = 0;
2882 	int                         mtu_max = 0;
2883 	bondport_ref                p;
2884 
2885 	if (TAILQ_FIRST(&ifb->ifb_port_list) != NULL) {
2886 		mtu_min = IF_MINMTU;
2887 	}
2888 	TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
2889 		struct ifdevmtu *       devmtu_p = &p->po_devmtu;
2890 
2891 		if (devmtu_p->ifdm_min > mtu_min) {
2892 			mtu_min = devmtu_p->ifdm_min;
2893 		}
2894 		if (mtu_max == 0 || devmtu_p->ifdm_max < mtu_max) {
2895 			mtu_max = devmtu_p->ifdm_max;
2896 		}
2897 	}
2898 	*ret_min = mtu_min;
2899 	*ret_max = mtu_max;
2900 	return;
2901 }
2902 
2903 static int
bond_set_mtu_on_ports(ifbond_ref ifb,int mtu)2904 bond_set_mtu_on_ports(ifbond_ref ifb, int mtu)
2905 {
2906 	int                         error = 0;
2907 	bondport_ref                p;
2908 
2909 	TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
2910 		error = siocsifmtu(p->po_ifp, mtu);
2911 		if (error != 0) {
2912 			BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
2913 			    "%s: SIOCSIFMTU %s failed, %d",
2914 			    ifb->ifb_name, bondport_get_name(p), error);
2915 			break;
2916 		}
2917 	}
2918 	return error;
2919 }
2920 
2921 static int
bond_set_mtu(struct ifnet * ifp,int mtu,int isdevmtu)2922 bond_set_mtu(struct ifnet * ifp, int mtu, int isdevmtu)
2923 {
2924 	int                 error = 0;
2925 	ifbond_ref          ifb;
2926 	int                 mtu_min;
2927 	int                 mtu_max;
2928 	int                 new_max;
2929 	int                 old_max;
2930 
2931 	bond_lock();
2932 	ifb = (ifbond_ref)ifnet_softc(ifp);
2933 	if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
2934 		error = (ifb == NULL) ? EOPNOTSUPP : EBUSY;
2935 		goto done;
2936 	}
2937 	ifbond_retain(ifb);
2938 	ifbond_wait(ifb, "bond_set_mtu");
2939 
2940 	/* check again */
2941 	if (ifnet_softc(ifp) == NULL || ifbond_flags_if_detaching(ifb)) {
2942 		error = EBUSY;
2943 		goto signal_done;
2944 	}
2945 	bond_get_mtu_values(ifb, &mtu_min, &mtu_max);
2946 	if (mtu > mtu_max) {
2947 		error = EINVAL;
2948 		goto signal_done;
2949 	}
2950 	if (mtu < mtu_min && (isdevmtu == 0 || mtu != 0)) {
2951 		/* allow SIOCSIFALTMTU to set the mtu to 0 */
2952 		error = EINVAL;
2953 		goto signal_done;
2954 	}
2955 	if (isdevmtu) {
2956 		new_max = (mtu > (int)ifnet_mtu(ifp)) ? mtu : (int)ifnet_mtu(ifp);
2957 	} else {
2958 		new_max = (mtu > ifb->ifb_altmtu) ? mtu : ifb->ifb_altmtu;
2959 	}
2960 	old_max = ((int)ifnet_mtu(ifp) > ifb->ifb_altmtu)
2961 	    ? (int)ifnet_mtu(ifp) : ifb->ifb_altmtu;
2962 	if (new_max != old_max) {
2963 		/* we can safely walk the list of port without the lock held */
2964 		bond_unlock();
2965 		error = bond_set_mtu_on_ports(ifb, new_max);
2966 		if (error != 0) {
2967 			/* try our best to back out of it */
2968 			(void)bond_set_mtu_on_ports(ifb, old_max);
2969 		}
2970 		bond_lock();
2971 	}
2972 	if (error == 0) {
2973 		if (isdevmtu) {
2974 			ifb->ifb_altmtu = mtu;
2975 		} else {
2976 			ifnet_set_mtu(ifp, mtu);
2977 		}
2978 	}
2979 
2980 signal_done:
2981 	ifbond_signal(ifb, __func__);
2982 	ifbond_release(ifb);
2983 
2984 done:
2985 	bond_unlock();
2986 	return error;
2987 }
2988 
2989 static int
bond_ioctl(struct ifnet * ifp,u_long cmd,void * data)2990 bond_ioctl(struct ifnet *ifp, u_long cmd, void * data)
2991 {
2992 	int                 error = 0;
2993 	struct if_bond_req  ibr;
2994 	struct ifaddr *     ifa;
2995 	ifbond_ref          ifb;
2996 	struct ifreq *      ifr;
2997 	struct ifmediareq32 * ifmr;
2998 	struct ifnet *      port_ifp = NULL;
2999 	user_addr_t         user_addr;
3000 
3001 	if (ifnet_type(ifp) != IFT_IEEE8023ADLAG) {
3002 		return EOPNOTSUPP;
3003 	}
3004 	ifr = (struct ifreq *)data;
3005 	ifa = (struct ifaddr *)data;
3006 
3007 	switch (cmd) {
3008 	case SIOCSIFADDR:
3009 		ifnet_set_flags(ifp, IFF_UP, IFF_UP);
3010 		break;
3011 
3012 	case SIOCGIFMEDIA32:
3013 	case SIOCGIFMEDIA64:
3014 		bond_lock();
3015 		ifb = (ifbond_ref)ifnet_softc(ifp);
3016 		if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
3017 			bond_unlock();
3018 			return ifb == NULL ? EOPNOTSUPP : EBUSY;
3019 		}
3020 		ifmr = (struct ifmediareq32 *)data;
3021 		ifmr->ifm_current = IFM_ETHER;
3022 		ifmr->ifm_mask = 0;
3023 		ifmr->ifm_status = IFM_AVALID;
3024 		ifmr->ifm_active = IFM_ETHER;
3025 		ifmr->ifm_count = 1;
3026 		if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
3027 			if (ifb->ifb_active_lag != NULL) {
3028 				ifmr->ifm_active = ifb->ifb_active_lag->lag_active_media;
3029 				ifmr->ifm_status |= IFM_ACTIVE;
3030 			}
3031 		} else if (ifb->ifb_distributing_count > 0) {
3032 			ifmr->ifm_active
3033 			        = ifb->ifb_distributing_array[0]->po_media_info.mi_active;
3034 			ifmr->ifm_status |= IFM_ACTIVE;
3035 		}
3036 		bond_unlock();
3037 		user_addr = (cmd == SIOCGIFMEDIA64) ?
3038 		    ((struct ifmediareq64 *)data)->ifmu_ulist :
3039 		    CAST_USER_ADDR_T(((struct ifmediareq32 *)data)->ifmu_ulist);
3040 		if (user_addr != USER_ADDR_NULL) {
3041 			error = copyout(&ifmr->ifm_current,
3042 			    user_addr,
3043 			    sizeof(int));
3044 		}
3045 		break;
3046 
3047 	case SIOCSIFMEDIA:
3048 		/* XXX send the SIFMEDIA to all children?  Or force autoselect? */
3049 		error = EINVAL;
3050 		break;
3051 
3052 	case SIOCGIFDEVMTU:
3053 		bond_lock();
3054 		ifb = (ifbond_ref)ifnet_softc(ifp);
3055 		if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
3056 			bond_unlock();
3057 			error = (ifb == NULL) ? EOPNOTSUPP : EBUSY;
3058 			break;
3059 		}
3060 		ifr->ifr_devmtu.ifdm_current = bond_device_mtu(ifp, ifb);
3061 		bond_get_mtu_values(ifb, &ifr->ifr_devmtu.ifdm_min,
3062 		    &ifr->ifr_devmtu.ifdm_max);
3063 		bond_unlock();
3064 		break;
3065 
3066 	case SIOCGIFALTMTU:
3067 		bond_lock();
3068 		ifb = (ifbond_ref)ifnet_softc(ifp);
3069 		if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
3070 			bond_unlock();
3071 			error = (ifb == NULL) ? EOPNOTSUPP : EBUSY;
3072 			break;
3073 		}
3074 		ifr->ifr_mtu = ifb->ifb_altmtu;
3075 		bond_unlock();
3076 		break;
3077 
3078 	case SIOCSIFALTMTU:
3079 		error = bond_set_mtu(ifp, ifr->ifr_mtu, 1);
3080 		break;
3081 
3082 	case SIOCSIFMTU:
3083 		error = bond_set_mtu(ifp, ifr->ifr_mtu, 0);
3084 		break;
3085 
3086 	case SIOCSIFBOND:
3087 		user_addr = proc_is64bit(current_proc())
3088 		    ? ifr->ifr_data64 : CAST_USER_ADDR_T(ifr->ifr_data);
3089 		error = copyin(user_addr, &ibr, sizeof(ibr));
3090 		if (error) {
3091 			break;
3092 		}
3093 		switch (ibr.ibr_op) {
3094 		case IF_BOND_OP_ADD_INTERFACE:
3095 		case IF_BOND_OP_REMOVE_INTERFACE:
3096 			port_ifp = ifunit(__unsafe_null_terminated_from_indexable(ibr.ibr_ibru.ibru_if_name));
3097 			if (port_ifp == NULL) {
3098 				error = ENXIO;
3099 				break;
3100 			}
3101 			if (ifnet_type(port_ifp) != IFT_ETHER) {
3102 				error = EPROTONOSUPPORT;
3103 				break;
3104 			}
3105 			break;
3106 		case IF_BOND_OP_SET_MODE:
3107 			break;
3108 		default:
3109 			error = EOPNOTSUPP;
3110 			break;
3111 		}
3112 		if (error != 0) {
3113 			break;
3114 		}
3115 		switch (ibr.ibr_op) {
3116 		case IF_BOND_OP_ADD_INTERFACE:
3117 			error = bond_add_interface(ifp, port_ifp);
3118 			break;
3119 		case IF_BOND_OP_REMOVE_INTERFACE:
3120 			bond_lock();
3121 			ifb = (ifbond_ref)ifnet_softc(ifp);
3122 			if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
3123 				bond_unlock();
3124 				return ifb == NULL ? EOPNOTSUPP : EBUSY;
3125 			}
3126 			error = bond_remove_interface(ifb, port_ifp);
3127 			bond_unlock();
3128 			break;
3129 		case IF_BOND_OP_SET_MODE:
3130 			switch (ibr.ibr_ibru.ibru_int_val) {
3131 			case IF_BOND_MODE_LACP:
3132 			case IF_BOND_MODE_STATIC:
3133 				break;
3134 			default:
3135 				error = EINVAL;
3136 				break;
3137 			}
3138 			if (error != 0) {
3139 				break;
3140 			}
3141 			error = bond_set_mode(ifp, ibr.ibr_ibru.ibru_int_val);
3142 			break;
3143 		}
3144 		break; /* SIOCSIFBOND */
3145 
3146 	case SIOCGIFBOND:
3147 		user_addr = proc_is64bit(current_proc())
3148 		    ? ifr->ifr_data64 : CAST_USER_ADDR_T(ifr->ifr_data);
3149 		error = copyin(user_addr, &ibr, sizeof(ibr));
3150 		if (error) {
3151 			break;
3152 		}
3153 		switch (ibr.ibr_op) {
3154 		case IF_BOND_OP_GET_STATUS:
3155 			break;
3156 		default:
3157 			error = EOPNOTSUPP;
3158 			break;
3159 		}
3160 		if (error != 0) {
3161 			break;
3162 		}
3163 		bond_lock();
3164 		ifb = (ifbond_ref)ifnet_softc(ifp);
3165 		if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
3166 			bond_unlock();
3167 			return ifb == NULL ? EOPNOTSUPP : EBUSY;
3168 		}
3169 		switch (ibr.ibr_op) {
3170 		case IF_BOND_OP_GET_STATUS:
3171 			error = bond_get_status(ifb, &ibr, user_addr);
3172 			break;
3173 		}
3174 		bond_unlock();
3175 		break; /* SIOCGIFBOND */
3176 
3177 	case SIOCSIFLLADDR:
3178 		error = EOPNOTSUPP;
3179 		break;
3180 
3181 	case SIOCSIFFLAGS:
3182 		/* enable promiscuous mode on members */
3183 		error = bond_set_promisc(ifp);
3184 		break;
3185 
3186 	case SIOCADDMULTI:
3187 	case SIOCDELMULTI:
3188 		error = bond_setmulti(ifp);
3189 		break;
3190 	default:
3191 		error = EOPNOTSUPP;
3192 	}
3193 	return error;
3194 }
3195 
3196 static void
bond_if_free(struct ifnet * ifp)3197 bond_if_free(struct ifnet * ifp)
3198 {
3199 	ifbond_ref  ifb;
3200 
3201 	if (ifp == NULL) {
3202 		return;
3203 	}
3204 	bond_lock();
3205 	ifb = (ifbond_ref)ifnet_softc(ifp);
3206 	if (ifb == NULL) {
3207 		bond_unlock();
3208 		return;
3209 	}
3210 	ifbond_release(ifb);
3211 	bond_unlock();
3212 	ifnet_release(ifp);
3213 	return;
3214 }
3215 
3216 static void
bond_handle_event(struct ifnet * port_ifp,int event_code)3217 bond_handle_event(struct ifnet * port_ifp, int event_code)
3218 {
3219 	struct ifnet *      bond_ifp = NULL;
3220 	ifbond_ref          ifb;
3221 	int                 old_distributing_count;
3222 	bondport_ref        p;
3223 	struct media_info   media_info = { .mi_active = 0, .mi_status = 0 };
3224 
3225 	switch (event_code) {
3226 	case KEV_DL_IF_DETACHED:
3227 	case KEV_DL_IF_DETACHING:
3228 		break;
3229 	case KEV_DL_LINK_OFF:
3230 	case KEV_DL_LINK_ON:
3231 		media_info = interface_media_info(port_ifp);
3232 		break;
3233 	default:
3234 		return;
3235 	}
3236 	bond_lock();
3237 	p = bond_lookup_port(port_ifp);
3238 	if (p == NULL) {
3239 		bond_unlock();
3240 		return;
3241 	}
3242 	ifb = p->po_bond;
3243 	old_distributing_count = ifb->ifb_distributing_count;
3244 	switch (event_code) {
3245 	case KEV_DL_IF_DETACHED:
3246 	case KEV_DL_IF_DETACHING:
3247 		bond_remove_interface(ifb, p->po_ifp);
3248 		break;
3249 	case KEV_DL_LINK_OFF:
3250 	case KEV_DL_LINK_ON:
3251 		p->po_media_info = media_info;
3252 		if (p->po_enabled) {
3253 			bondport_link_status_changed(p);
3254 		}
3255 		break;
3256 	}
3257 	/* generate a link-event */
3258 	if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
3259 		if (ifbond_selection(ifb)) {
3260 			event_code = (ifb->ifb_active_lag == NULL)
3261 			    ? KEV_DL_LINK_OFF
3262 			    : KEV_DL_LINK_ON;
3263 			/* XXX need to take a reference on bond_ifp */
3264 			bond_ifp = ifb->ifb_ifp;
3265 			ifb->ifb_last_link_event = event_code;
3266 		} else {
3267 			event_code = (ifb->ifb_active_lag == NULL)
3268 			    ? KEV_DL_LINK_OFF
3269 			    : KEV_DL_LINK_ON;
3270 			if (event_code != ifb->ifb_last_link_event) {
3271 				BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
3272 				    "%s: (event) generating LINK event",
3273 				    ifb->ifb_name);
3274 				bond_ifp = ifb->ifb_ifp;
3275 				ifb->ifb_last_link_event = event_code;
3276 			}
3277 		}
3278 	} else {
3279 		/*
3280 		 * if the distributing array membership changed from 0 <-> !0
3281 		 * generate a link event
3282 		 */
3283 		if (old_distributing_count == 0
3284 		    && ifb->ifb_distributing_count != 0) {
3285 			event_code = KEV_DL_LINK_ON;
3286 		} else if (old_distributing_count != 0
3287 		    && ifb->ifb_distributing_count == 0) {
3288 			event_code = KEV_DL_LINK_OFF;
3289 		}
3290 		if (event_code != 0 && event_code != ifb->ifb_last_link_event) {
3291 			bond_ifp = ifb->ifb_ifp;
3292 			ifb->ifb_last_link_event = event_code;
3293 		}
3294 	}
3295 
3296 	bond_unlock();
3297 	if (bond_ifp != NULL) {
3298 		interface_link_event(bond_ifp, event_code);
3299 	}
3300 	return;
3301 }
3302 
3303 static void
bond_event(struct ifnet * port_ifp,__unused protocol_family_t protocol,const struct kev_msg * event)3304 bond_event(struct ifnet * port_ifp, __unused protocol_family_t protocol,
3305     const struct kev_msg * event)
3306 {
3307 	int         event_code;
3308 
3309 	if (event->vendor_code != KEV_VENDOR_APPLE
3310 	    || event->kev_class != KEV_NETWORK_CLASS
3311 	    || event->kev_subclass != KEV_DL_SUBCLASS) {
3312 		return;
3313 	}
3314 	event_code = event->event_code;
3315 	switch (event_code) {
3316 	case KEV_DL_LINK_OFF:
3317 	case KEV_DL_LINK_ON:
3318 	case KEV_DL_IF_DETACHING:
3319 	case KEV_DL_IF_DETACHED:
3320 		bond_handle_event(port_ifp, event_code);
3321 		break;
3322 	default:
3323 		break;
3324 	}
3325 	return;
3326 }
3327 
3328 static errno_t
bond_detached(ifnet_t port_ifp,__unused protocol_family_t protocol)3329 bond_detached(ifnet_t port_ifp, __unused protocol_family_t protocol)
3330 {
3331 	bond_handle_event(port_ifp, KEV_DL_IF_DETACHED);
3332 	return 0;
3333 }
3334 
3335 static void
interface_link_event(struct ifnet * ifp,u_int32_t event_code)3336 interface_link_event(struct ifnet * ifp, u_int32_t event_code)
3337 {
3338 	struct event {
3339 		u_int32_t ifnet_family;
3340 		u_int32_t unit;
3341 		char if_name[IFNAMSIZ];
3342 	};
3343 	_Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
3344 	struct kern_event_msg *header = (struct kern_event_msg*)message;
3345 	struct event *data = (struct event *)(message + KEV_MSG_HEADER_SIZE);
3346 
3347 	header->total_size   = sizeof(message);
3348 	header->vendor_code  = KEV_VENDOR_APPLE;
3349 	header->kev_class    = KEV_NETWORK_CLASS;
3350 	header->kev_subclass = KEV_DL_SUBCLASS;
3351 	header->event_code   = event_code;
3352 	data->ifnet_family   = ifnet_family(ifp);
3353 	data->unit           = (u_int32_t)ifnet_unit(ifp);
3354 	strlcpy(data->if_name, ifnet_name(ifp), sizeof(data->if_name));
3355 	ifnet_event(ifp, header);
3356 }
3357 
3358 
3359 /*
3360  * Function: bond_attach_protocol
3361  * Purpose:
3362  *   Attach a DLIL protocol to the interface.
3363  *
3364  *   The ethernet demux special cases to always return PF_BOND if the
3365  *   interface is bonded.  That means we receive all traffic from that
3366  *   interface without passing any of the traffic to any other attached
3367  *   protocol.
3368  */
3369 static int
bond_attach_protocol(struct ifnet * ifp)3370 bond_attach_protocol(struct ifnet *ifp)
3371 {
3372 	int                                 error;
3373 	struct ifnet_attach_proto_param_v2  reg;
3374 
3375 	bzero(&reg, sizeof(reg));
3376 	reg.input = bond_input;
3377 	reg.event = bond_event;
3378 	reg.detached = bond_detached;
3379 
3380 	error = ifnet_attach_protocol_v2(ifp, PF_BOND, &reg);
3381 	if (error != 0) {
3382 		BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
3383 		    "%s: ifnet_attach_protocol failed, %d",
3384 		    ifp->if_xname, error);
3385 	}
3386 	return error;
3387 }
3388 
3389 /*
3390  * Function: bond_detach_protocol
3391  * Purpose:
3392  *   Detach our DLIL protocol from an interface
3393  */
3394 static int
bond_detach_protocol(struct ifnet * ifp)3395 bond_detach_protocol(struct ifnet *ifp)
3396 {
3397 	int         error;
3398 
3399 	error = ifnet_detach_protocol(ifp, PF_BOND);
3400 	if (error != 0) {
3401 		BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
3402 		    "%s: ifnet_detach_protocol failed, %d",
3403 		    ifp->if_xname, error);
3404 	}
3405 	return error;
3406 }
3407 
3408 
3409 /*
3410  * DLIL interface family functions
3411  */
3412 extern int ether_attach_inet(ifnet_t ifp, protocol_family_t protocol_family);
3413 extern void ether_detach_inet(ifnet_t ifp, protocol_family_t protocol_family);
3414 extern int ether_attach_inet6(ifnet_t ifp, protocol_family_t protocol_family);
3415 extern void ether_detach_inet6(ifnet_t ifp, protocol_family_t protocol_family);
3416 extern int ether_attach_at(ifnet_t ifp, protocol_family_t protocol_family);
3417 extern void ether_detach_at(ifnet_t ifp, protocol_family_t protocol_family);
3418 
3419 __private_extern__ int
bond_family_init(void)3420 bond_family_init(void)
3421 {
3422 	int error = 0;
3423 
3424 	error = proto_register_plumber(PF_INET, APPLE_IF_FAM_BOND,
3425 	    ether_attach_inet,
3426 	    ether_detach_inet);
3427 	if (error != 0) {
3428 		BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
3429 		    "proto_register_plumber failed for AF_INET error %d",
3430 		    error);
3431 		goto done;
3432 	}
3433 	error = proto_register_plumber(PF_INET6, APPLE_IF_FAM_BOND,
3434 	    ether_attach_inet6,
3435 	    ether_detach_inet6);
3436 	if (error != 0) {
3437 		BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
3438 		    "proto_register_plumber failed for AF_INET6 error %d",
3439 		    error);
3440 		goto done;
3441 	}
3442 	error = bond_clone_attach();
3443 	if (error != 0) {
3444 		BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
3445 		    "bond_clone_attach error %d",
3446 		    error);
3447 		goto done;
3448 	}
3449 
3450 done:
3451 	return error;
3452 }
3453 /**
3454 **
3455 ** LACP routines:
3456 **
3457 **/
3458 
3459 /**
3460 ** LACP ifbond_list routines
3461 **/
3462 static bondport_ref
ifbond_list_find_moved_port(bondport_ref rx_port,const lacp_actor_partner_tlv_ref atlv)3463 ifbond_list_find_moved_port(bondport_ref rx_port,
3464     const lacp_actor_partner_tlv_ref atlv)
3465 {
3466 	ifbond_ref          bond;
3467 	bondport_ref        p;
3468 	partner_state_ref   ps;
3469 	LAG_info_ref        ps_li;
3470 
3471 	TAILQ_FOREACH(bond, &g_bond->ifbond_list, ifb_bond_list) {
3472 		TAILQ_FOREACH(p, &bond->ifb_port_list, po_port_list) {
3473 			if (rx_port == p) {
3474 				/* no point in comparing against ourselves */
3475 				continue;
3476 			}
3477 			if (p->po_receive_state != ReceiveState_PORT_DISABLED) {
3478 				/* it's not clear that we should be checking this */
3479 				continue;
3480 			}
3481 			ps = &p->po_partner_state;
3482 			if (lacp_actor_partner_state_defaulted(ps->ps_state)) {
3483 				continue;
3484 			}
3485 			ps_li = &ps->ps_lag_info;
3486 			if (ps->ps_port == lacp_actor_partner_tlv_get_port(atlv)
3487 			    && bcmp(&ps_li->li_system, atlv->lap_system,
3488 			    sizeof(ps_li->li_system)) == 0) {
3489 				BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
3490 				    "System " EA_FORMAT
3491 				    " Port 0x%x moved from %s to %s",
3492 				    EA_LIST(&ps_li->li_system), ps->ps_port,
3493 				    bondport_get_name(p),
3494 				    bondport_get_name(rx_port));
3495 				return p;
3496 			}
3497 		}
3498 	}
3499 	return NULL;
3500 }
3501 
3502 /**
3503 ** LACP ifbond, LAG routines
3504 **/
3505 
3506 static int
ifbond_selection(ifbond_ref bond)3507 ifbond_selection(ifbond_ref bond)
3508 {
3509 	int                 all_ports_ready = 0;
3510 	int                 active_media = 0;
3511 	LAG_ref             lag = NULL;
3512 	int                 lag_changed = 0;
3513 	bondport_ref        p;
3514 	int                 port_speed = 0;
3515 
3516 	lag = ifbond_find_best_LAG(bond, &active_media);
3517 	if (lag != bond->ifb_active_lag) {
3518 		if (bond->ifb_active_lag != NULL) {
3519 			ifbond_deactivate_LAG(bond, bond->ifb_active_lag);
3520 			bond->ifb_active_lag = NULL;
3521 		}
3522 		bond->ifb_active_lag = lag;
3523 		if (lag != NULL) {
3524 			ifbond_activate_LAG(bond, lag, active_media);
3525 		}
3526 		lag_changed = 1;
3527 	} else if (lag != NULL) {
3528 		if (lag->lag_active_media != active_media) {
3529 			BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
3530 			    "LAG PORT SPEED CHANGED from %d to %d",
3531 			    link_speed(lag->lag_active_media),
3532 			    link_speed(active_media));
3533 			ifbond_deactivate_LAG(bond, lag);
3534 			ifbond_activate_LAG(bond, lag, active_media);
3535 			lag_changed = 1;
3536 		}
3537 	}
3538 	if (lag != NULL) {
3539 		port_speed = link_speed(active_media);
3540 		all_ports_ready = ifbond_all_ports_ready(bond);
3541 	}
3542 	TAILQ_FOREACH(p, &bond->ifb_port_list, po_port_list) {
3543 		if (lag != NULL && p->po_lag == lag
3544 		    && media_speed(&p->po_media_info) == port_speed
3545 		    && (p->po_mux_state == MuxState_DETACHED
3546 		    || p->po_selected == SelectedState_SELECTED
3547 		    || p->po_selected == SelectedState_STANDBY)
3548 		    && bondport_aggregatable(p)) {
3549 			if (bond->ifb_max_active > 0) {
3550 				if (lag->lag_selected_port_count < bond->ifb_max_active) {
3551 					if (p->po_selected == SelectedState_STANDBY
3552 					    || p->po_selected == SelectedState_UNSELECTED) {
3553 						bondport_set_selected(p, SelectedState_SELECTED);
3554 					}
3555 				} else if (p->po_selected == SelectedState_UNSELECTED) {
3556 					bondport_set_selected(p, SelectedState_STANDBY);
3557 				}
3558 			} else {
3559 				bondport_set_selected(p, SelectedState_SELECTED);
3560 			}
3561 		}
3562 		if (bondport_flags_selected_changed(p)) {
3563 			bondport_flags_clear_selected_changed(p);
3564 			bondport_mux_machine(p, LAEventSelectedChange, NULL);
3565 		}
3566 		if (all_ports_ready
3567 		    && bondport_flags_ready(p)
3568 		    && p->po_mux_state == MuxState_WAITING) {
3569 			bondport_mux_machine(p, LAEventReady, NULL);
3570 		}
3571 		bondport_transmit_machine(p, LAEventStart, NULL);
3572 	}
3573 	return lag_changed;
3574 }
3575 
3576 static LAG_ref
ifbond_find_best_LAG(ifbond_ref bond,int * active_media)3577 ifbond_find_best_LAG(ifbond_ref bond, int * active_media)
3578 {
3579 	int                 best_active = 0;
3580 	LAG_ref             best_lag = NULL;
3581 	int                 best_count = 0;
3582 	int                 best_speed = 0;
3583 	LAG_ref             lag;
3584 
3585 	if (bond->ifb_active_lag != NULL) {
3586 		best_lag = bond->ifb_active_lag;
3587 		best_count = LAG_get_aggregatable_port_count(best_lag, &best_active);
3588 		if (bond->ifb_max_active > 0
3589 		    && best_count > bond->ifb_max_active) {
3590 			best_count = bond->ifb_max_active;
3591 		}
3592 		best_speed = link_speed(best_active);
3593 	}
3594 	TAILQ_FOREACH(lag, &bond->ifb_lag_list, lag_list) {
3595 		int     active;
3596 		int     count;
3597 		int     speed;
3598 
3599 		if (lag == bond->ifb_active_lag) {
3600 			/* we've already computed it */
3601 			continue;
3602 		}
3603 		count = LAG_get_aggregatable_port_count(lag, &active);
3604 		if (count == 0) {
3605 			continue;
3606 		}
3607 		if (bond->ifb_max_active > 0
3608 		    && count > bond->ifb_max_active) {
3609 			/* if there's a limit, don't count extra links */
3610 			count = bond->ifb_max_active;
3611 		}
3612 		speed = link_speed(active);
3613 		if ((count * speed) > (best_count * best_speed)) {
3614 			best_count = count;
3615 			best_speed = speed;
3616 			best_active = active;
3617 			best_lag = lag;
3618 		}
3619 	}
3620 	if (best_count == 0) {
3621 		return NULL;
3622 	}
3623 	*active_media = best_active;
3624 	return best_lag;
3625 }
3626 
3627 static void
ifbond_deactivate_LAG(__unused ifbond_ref bond,LAG_ref lag)3628 ifbond_deactivate_LAG(__unused ifbond_ref bond, LAG_ref lag)
3629 {
3630 	bondport_ref        p;
3631 
3632 	TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) {
3633 		bondport_set_selected(p, SelectedState_UNSELECTED);
3634 	}
3635 	return;
3636 }
3637 
3638 static void
ifbond_activate_LAG(ifbond_ref bond,LAG_ref lag,int active_media)3639 ifbond_activate_LAG(ifbond_ref bond, LAG_ref lag, int active_media)
3640 {
3641 	int                 need = 0;
3642 	bondport_ref        p;
3643 
3644 	if (bond->ifb_max_active > 0) {
3645 		need = bond->ifb_max_active;
3646 	}
3647 	lag->lag_active_media = active_media;
3648 	TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) {
3649 		if (bondport_aggregatable(p) == 0) {
3650 			bondport_set_selected(p, SelectedState_UNSELECTED);
3651 		} else if (media_speed(&p->po_media_info) != link_speed(active_media)) {
3652 			bondport_set_selected(p, SelectedState_UNSELECTED);
3653 		} else if (p->po_mux_state == MuxState_DETACHED) {
3654 			if (bond->ifb_max_active > 0) {
3655 				if (need > 0) {
3656 					bondport_set_selected(p, SelectedState_SELECTED);
3657 					need--;
3658 				} else {
3659 					bondport_set_selected(p, SelectedState_STANDBY);
3660 				}
3661 			} else {
3662 				bondport_set_selected(p, SelectedState_SELECTED);
3663 			}
3664 		} else {
3665 			bondport_set_selected(p, SelectedState_UNSELECTED);
3666 		}
3667 	}
3668 	return;
3669 }
3670 
3671 #if 0
3672 static void
3673 ifbond_set_max_active(ifbond_ref bond, int max_active)
3674 {
3675 	LAG_ref     lag = bond->ifb_active_lag;
3676 
3677 	bond->ifb_max_active = max_active;
3678 	if (bond->ifb_max_active <= 0 || lag == NULL) {
3679 		return;
3680 	}
3681 	if (lag->lag_selected_port_count > bond->ifb_max_active) {
3682 		bondport_ref    p;
3683 		int                     remove_count;
3684 
3685 		remove_count = lag->lag_selected_port_count - bond->ifb_max_active;
3686 		TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) {
3687 			if (p->po_selected == SelectedState_SELECTED) {
3688 				bondport_set_selected(p, SelectedState_UNSELECTED);
3689 				remove_count--;
3690 				if (remove_count == 0) {
3691 					break;
3692 				}
3693 			}
3694 		}
3695 	}
3696 	return;
3697 }
3698 #endif
3699 
3700 static int
ifbond_all_ports_ready(ifbond_ref bond)3701 ifbond_all_ports_ready(ifbond_ref bond)
3702 {
3703 	int                 ready = 0;
3704 	bondport_ref        p;
3705 
3706 	if (bond->ifb_active_lag == NULL) {
3707 		return 0;
3708 	}
3709 	TAILQ_FOREACH(p, &bond->ifb_active_lag->lag_port_list, po_lag_port_list) {
3710 		if (p->po_mux_state == MuxState_WAITING
3711 		    && p->po_selected == SelectedState_SELECTED) {
3712 			if (bondport_flags_ready(p) == 0) {
3713 				return 0;
3714 			}
3715 		}
3716 		/* note that there was at least one ready port */
3717 		ready = 1;
3718 	}
3719 	return ready;
3720 }
3721 
3722 static int
ifbond_all_ports_attached(ifbond_ref bond,bondport_ref this_port)3723 ifbond_all_ports_attached(ifbond_ref bond, bondport_ref this_port)
3724 {
3725 	bondport_ref        p;
3726 
3727 	TAILQ_FOREACH(p, &bond->ifb_port_list, po_port_list) {
3728 		if (this_port == p) {
3729 			continue;
3730 		}
3731 		if (bondport_flags_mux_attached(p) == 0) {
3732 			return 0;
3733 		}
3734 	}
3735 	return 1;
3736 }
3737 
3738 static LAG_ref
ifbond_get_LAG_matching_port(ifbond_ref bond,bondport_ref p)3739 ifbond_get_LAG_matching_port(ifbond_ref bond, bondport_ref p)
3740 {
3741 	LAG_ref     lag;
3742 
3743 	TAILQ_FOREACH(lag, &bond->ifb_lag_list, lag_list) {
3744 		if (bcmp(&lag->lag_info, &p->po_partner_state.ps_lag_info,
3745 		    sizeof(lag->lag_info)) == 0) {
3746 			return lag;
3747 		}
3748 	}
3749 	return NULL;
3750 }
3751 
3752 static int
LAG_get_aggregatable_port_count(LAG_ref lag,int * active_media)3753 LAG_get_aggregatable_port_count(LAG_ref lag, int * active_media)
3754 {
3755 	int                 active;
3756 	int                 count;
3757 	bondport_ref        p;
3758 	int                 speed;
3759 
3760 	active = 0;
3761 	count = 0;
3762 	speed = 0;
3763 	TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) {
3764 		if (bondport_aggregatable(p)) {
3765 			int this_speed;
3766 
3767 			this_speed = media_speed(&p->po_media_info);
3768 			if (this_speed == 0) {
3769 				continue;
3770 			}
3771 			if (this_speed > speed) {
3772 				active = p->po_media_info.mi_active;
3773 				speed = this_speed;
3774 				count = 1;
3775 			} else if (this_speed == speed) {
3776 				count++;
3777 			}
3778 		}
3779 	}
3780 	*active_media = active;
3781 	return count;
3782 }
3783 
3784 
3785 /**
3786 ** LACP bondport routines
3787 **/
3788 static void
bondport_link_status_changed(bondport_ref p)3789 bondport_link_status_changed(bondport_ref p)
3790 {
3791 	ifbond_ref  bond = p->po_bond;
3792 
3793 	if (if_bond_debug) {
3794 		if (media_active(&p->po_media_info)) {
3795 			const char * duplex_string;
3796 
3797 			if (media_full_duplex(&p->po_media_info)) {
3798 				duplex_string = "full";
3799 			} else if (media_type_unknown(&p->po_media_info)) {
3800 				duplex_string = "unknown";
3801 			} else {
3802 				duplex_string = "half";
3803 			}
3804 			BOND_LOG(LOG_NOTICE, BD_DBGF_LACP,
3805 			    "[%s] Link UP %d Mbit/s %s duplex",
3806 			    bondport_get_name(p),
3807 			    media_speed(&p->po_media_info),
3808 			    duplex_string);
3809 		} else {
3810 			BOND_LOG(LOG_NOTICE, BD_DBGF_LACP,
3811 			    "[%s] Link DOWN", bondport_get_name(p));
3812 		}
3813 	}
3814 	if (bond->ifb_mode == IF_BOND_MODE_LACP) {
3815 		if (media_active(&p->po_media_info)
3816 		    && bond->ifb_active_lag != NULL
3817 		    && p->po_lag == bond->ifb_active_lag
3818 		    && p->po_selected != SelectedState_UNSELECTED) {
3819 			if (media_speed(&p->po_media_info) != p->po_lag->lag_active_media) {
3820 				BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
3821 				    "[%s] Port speed %d differs from LAG %d",
3822 				    bondport_get_name(p),
3823 				    media_speed(&p->po_media_info),
3824 				    link_speed(p->po_lag->lag_active_media));
3825 				bondport_set_selected(p, SelectedState_UNSELECTED);
3826 			}
3827 		}
3828 		bondport_receive_machine(p, LAEventMediaChange, NULL);
3829 		bondport_mux_machine(p, LAEventMediaChange, NULL);
3830 		bondport_periodic_transmit_machine(p, LAEventMediaChange, NULL);
3831 	} else {
3832 		if (media_active(&p->po_media_info)) {
3833 			bondport_enable_distributing(p);
3834 		} else {
3835 			bondport_disable_distributing(p);
3836 		}
3837 	}
3838 	return;
3839 }
3840 
3841 static int
bondport_aggregatable(bondport_ref p)3842 bondport_aggregatable(bondport_ref p)
3843 {
3844 	partner_state_ref   ps = &p->po_partner_state;
3845 
3846 	if (lacp_actor_partner_state_aggregatable(p->po_actor_state) == 0
3847 	    || lacp_actor_partner_state_aggregatable(ps->ps_state) == 0) {
3848 		/* we and/or our partner are individual */
3849 		return 0;
3850 	}
3851 	if (p->po_lag == NULL) {
3852 		return 0;
3853 	}
3854 	switch (p->po_receive_state) {
3855 	default:
3856 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
3857 		    "[%s] Port is not selectable",
3858 		    bondport_get_name(p));
3859 		return 0;
3860 	case ReceiveState_CURRENT:
3861 	case ReceiveState_EXPIRED:
3862 		break;
3863 	}
3864 	return 1;
3865 }
3866 
3867 static int
bondport_matches_LAG(bondport_ref p,LAG_ref lag)3868 bondport_matches_LAG(bondport_ref p, LAG_ref lag)
3869 {
3870 	LAG_info_ref        lag_li;
3871 	partner_state_ref   ps;
3872 	LAG_info_ref        ps_li;
3873 
3874 	ps = &p->po_partner_state;
3875 	ps_li = &ps->ps_lag_info;
3876 	lag_li = &lag->lag_info;
3877 	if (ps_li->li_system_priority == lag_li->li_system_priority
3878 	    && ps_li->li_key == lag_li->li_key
3879 	    && (bcmp(&ps_li->li_system, &lag_li->li_system,
3880 	    sizeof(lag_li->li_system))
3881 	    == 0)) {
3882 		return 1;
3883 	}
3884 	return 0;
3885 }
3886 
3887 static int
bondport_remove_from_LAG(bondport_ref p)3888 bondport_remove_from_LAG(bondport_ref p)
3889 {
3890 	int         active_lag = 0;
3891 	ifbond_ref  bond = p->po_bond;
3892 	LAG_ref     lag = p->po_lag;
3893 
3894 	if (lag == NULL) {
3895 		return 0;
3896 	}
3897 	TAILQ_REMOVE(&lag->lag_port_list, p, po_lag_port_list);
3898 	BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
3899 	    "[%s] Removed from LAG (0x%04x," EA_FORMAT ",0x%04x)",
3900 	    bondport_get_name(p), lag->lag_info.li_system_priority,
3901 	    EA_LIST(&lag->lag_info.li_system), lag->lag_info.li_key);
3902 	p->po_lag = NULL;
3903 	lag->lag_port_count--;
3904 	if (lag->lag_port_count > 0) {
3905 		return bond->ifb_active_lag == lag;
3906 	}
3907 	BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
3908 	    "Key 0x%04x: LAG Released (%04x," EA_FORMAT ",0x%04x)",
3909 	    bond->ifb_key,
3910 	    lag->lag_info.li_system_priority,
3911 	    EA_LIST(&lag->lag_info.li_system),
3912 	    lag->lag_info.li_key);
3913 	TAILQ_REMOVE(&bond->ifb_lag_list, lag, lag_list);
3914 	if (bond->ifb_active_lag == lag) {
3915 		bond->ifb_active_lag = NULL;
3916 		active_lag = 1;
3917 	}
3918 	kfree_type(struct LAG_s, lag);
3919 	return active_lag;
3920 }
3921 
3922 static void
bondport_add_to_LAG(bondport_ref p,LAG_ref lag)3923 bondport_add_to_LAG(bondport_ref p, LAG_ref lag)
3924 {
3925 	TAILQ_INSERT_TAIL(&lag->lag_port_list, p, po_lag_port_list);
3926 	p->po_lag = lag;
3927 	lag->lag_port_count++;
3928 	BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
3929 	    "[%s] Added to LAG (0x%04x," EA_FORMAT "0x%04x)",
3930 	    bondport_get_name(p),
3931 	    lag->lag_info.li_system_priority,
3932 	    EA_LIST(&lag->lag_info.li_system),
3933 	    lag->lag_info.li_key);
3934 	return;
3935 }
3936 
3937 static void
bondport_assign_to_LAG(bondport_ref p)3938 bondport_assign_to_LAG(bondport_ref p)
3939 {
3940 	ifbond_ref  bond = p->po_bond;
3941 	LAG_ref     lag;
3942 
3943 	if (lacp_actor_partner_state_defaulted(p->po_actor_state)) {
3944 		bondport_remove_from_LAG(p);
3945 		return;
3946 	}
3947 	lag = p->po_lag;
3948 	if (lag != NULL) {
3949 		if (bondport_matches_LAG(p, lag)) {
3950 			/* still OK */
3951 			return;
3952 		}
3953 		bondport_remove_from_LAG(p);
3954 	}
3955 	lag = ifbond_get_LAG_matching_port(bond, p);
3956 	if (lag != NULL) {
3957 		bondport_add_to_LAG(p, lag);
3958 		return;
3959 	}
3960 	lag = kalloc_type(struct LAG_s, Z_WAITOK);
3961 	TAILQ_INIT(&lag->lag_port_list);
3962 	lag->lag_port_count = 0;
3963 	lag->lag_selected_port_count = 0;
3964 	lag->lag_info = p->po_partner_state.ps_lag_info;
3965 	TAILQ_INSERT_TAIL(&bond->ifb_lag_list, lag, lag_list);
3966 	BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
3967 	    "Key 0x%04x: LAG Created (0x%04x," EA_FORMAT ",0x%04x)",
3968 	    bond->ifb_key, lag->lag_info.li_system_priority,
3969 	    EA_LIST(&lag->lag_info.li_system), lag->lag_info.li_key);
3970 	bondport_add_to_LAG(p, lag);
3971 	return;
3972 }
3973 
3974 static void
bondport_receive_lacpdu(bondport_ref p,lacpdu_ref in_lacpdu_p)3975 bondport_receive_lacpdu(bondport_ref p, lacpdu_ref in_lacpdu_p)
3976 {
3977 	bondport_ref                moved_port;
3978 
3979 	moved_port
3980 	        = ifbond_list_find_moved_port(p, (const lacp_actor_partner_tlv_ref)
3981 	    &in_lacpdu_p->la_actor_tlv);
3982 	if (moved_port != NULL) {
3983 		bondport_receive_machine(moved_port, LAEventPortMoved, NULL);
3984 	}
3985 	bondport_receive_machine(p, LAEventPacket, in_lacpdu_p);
3986 	bondport_mux_machine(p, LAEventPacket, in_lacpdu_p);
3987 	bondport_periodic_transmit_machine(p, LAEventPacket, in_lacpdu_p);
3988 	return;
3989 }
3990 
3991 static void
bondport_set_selected(bondport_ref p,SelectedState s)3992 bondport_set_selected(bondport_ref p, SelectedState s)
3993 {
3994 	if (s != p->po_selected) {
3995 		ifbond_ref      bond = p->po_bond;
3996 		LAG_ref         lag = p->po_lag;
3997 
3998 		bondport_flags_set_selected_changed(p);
3999 		if (lag != NULL && bond->ifb_active_lag == lag) {
4000 			if (p->po_selected == SelectedState_SELECTED) {
4001 				lag->lag_selected_port_count--;
4002 			} else if (s == SelectedState_SELECTED) {
4003 				lag->lag_selected_port_count++;
4004 			}
4005 			BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4006 			    "[%s] SetSelected: %s (was %s)",
4007 			    bondport_get_name(p),
4008 			    SelectedStateString(s),
4009 			    SelectedStateString(p->po_selected));
4010 		}
4011 	}
4012 	p->po_selected = s;
4013 	return;
4014 }
4015 
4016 /**
4017 ** Receive machine
4018 **/
4019 
4020 static void
bondport_UpdateDefaultSelected(bondport_ref p)4021 bondport_UpdateDefaultSelected(bondport_ref p)
4022 {
4023 	bondport_set_selected(p, SelectedState_UNSELECTED);
4024 	return;
4025 }
4026 
4027 static void
bondport_RecordDefault(bondport_ref p)4028 bondport_RecordDefault(bondport_ref p)
4029 {
4030 	bzero(&p->po_partner_state, sizeof(p->po_partner_state));
4031 	p->po_actor_state
4032 	        = lacp_actor_partner_state_set_defaulted(p->po_actor_state);
4033 	bondport_assign_to_LAG(p);
4034 	return;
4035 }
4036 
4037 static void
bondport_UpdateSelected(bondport_ref p,lacpdu_ref lacpdu_p)4038 bondport_UpdateSelected(bondport_ref p, lacpdu_ref lacpdu_p)
4039 {
4040 	lacp_actor_partner_tlv_ref  actor;
4041 	partner_state_ref           ps;
4042 	LAG_info_ref                ps_li;
4043 
4044 	/* compare the PDU's Actor information to our Partner state */
4045 	actor = (lacp_actor_partner_tlv_ref)lacpdu_p->la_actor_tlv;
4046 	ps = &p->po_partner_state;
4047 	ps_li = &ps->ps_lag_info;
4048 	if (lacp_actor_partner_tlv_get_port(actor) != ps->ps_port
4049 	    || (lacp_actor_partner_tlv_get_port_priority(actor)
4050 	    != ps->ps_port_priority)
4051 	    || bcmp(actor->lap_system, &ps_li->li_system, sizeof(ps_li->li_system))
4052 	    || (lacp_actor_partner_tlv_get_system_priority(actor)
4053 	    != ps_li->li_system_priority)
4054 	    || (lacp_actor_partner_tlv_get_key(actor) != ps_li->li_key)
4055 	    || (lacp_actor_partner_state_aggregatable(actor->lap_state)
4056 	    != lacp_actor_partner_state_aggregatable(ps->ps_state))) {
4057 		bondport_set_selected(p, SelectedState_UNSELECTED);
4058 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4059 		    "[%s] updateSelected UNSELECTED",
4060 		    bondport_get_name(p));
4061 	}
4062 	return;
4063 }
4064 
4065 static void
bondport_RecordPDU(bondport_ref p,lacpdu_ref lacpdu_p)4066 bondport_RecordPDU(bondport_ref p, lacpdu_ref lacpdu_p)
4067 {
4068 	lacp_actor_partner_tlv_ref  actor;
4069 	ifbond_ref                  bond = p->po_bond;
4070 	int                         lacp_maintain = 0;
4071 	partner_state_ref           ps;
4072 	lacp_actor_partner_tlv_ref  partner;
4073 	LAG_info_ref                ps_li;
4074 
4075 	/* copy the PDU's Actor information into our Partner state */
4076 	actor = (lacp_actor_partner_tlv_ref)lacpdu_p->la_actor_tlv;
4077 	ps = &p->po_partner_state;
4078 	ps_li = &ps->ps_lag_info;
4079 	ps->ps_port = lacp_actor_partner_tlv_get_port(actor);
4080 	ps->ps_port_priority = lacp_actor_partner_tlv_get_port_priority(actor);
4081 	ps_li->li_system = *((lacp_system_ref)actor->lap_system);
4082 	ps_li->li_system_priority
4083 	        = lacp_actor_partner_tlv_get_system_priority(actor);
4084 	ps_li->li_key = lacp_actor_partner_tlv_get_key(actor);
4085 	ps->ps_state = lacp_actor_partner_state_set_out_of_sync(actor->lap_state);
4086 	p->po_actor_state
4087 	        = lacp_actor_partner_state_set_not_defaulted(p->po_actor_state);
4088 
4089 	/* compare the PDU's Partner information to our own information */
4090 	partner = (lacp_actor_partner_tlv_ref)lacpdu_p->la_partner_tlv;
4091 
4092 	if (lacp_actor_partner_state_active_lacp(ps->ps_state)
4093 	    || (lacp_actor_partner_state_active_lacp(p->po_actor_state)
4094 	    && lacp_actor_partner_state_active_lacp(partner->lap_state))) {
4095 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4096 		    "[%s] recordPDU: LACP will maintain", bondport_get_name(p));
4097 		lacp_maintain = 1;
4098 	}
4099 	if ((lacp_actor_partner_tlv_get_port(partner)
4100 	    == bondport_get_index(p))
4101 	    && lacp_actor_partner_tlv_get_port_priority(partner) == p->po_priority
4102 	    && bcmp(partner->lap_system, &g_bond->system,
4103 	    sizeof(g_bond->system)) == 0
4104 	    && (lacp_actor_partner_tlv_get_system_priority(partner)
4105 	    == g_bond->system_priority)
4106 	    && lacp_actor_partner_tlv_get_key(partner) == bond->ifb_key
4107 	    && (lacp_actor_partner_state_aggregatable(partner->lap_state)
4108 	    == lacp_actor_partner_state_aggregatable(p->po_actor_state))
4109 	    && lacp_actor_partner_state_in_sync(actor->lap_state)
4110 	    && lacp_maintain) {
4111 		ps->ps_state = lacp_actor_partner_state_set_in_sync(ps->ps_state);
4112 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4113 		    "[%s] recordPDU: LACP partner in sync",
4114 		    bondport_get_name(p));
4115 	} else if (lacp_actor_partner_state_aggregatable(actor->lap_state) == 0
4116 	    && lacp_actor_partner_state_in_sync(actor->lap_state)
4117 	    && lacp_maintain) {
4118 		ps->ps_state = lacp_actor_partner_state_set_in_sync(ps->ps_state);
4119 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4120 		    "[%s] recordPDU: LACP partner in sync (ind)",
4121 		    bondport_get_name(p));
4122 	}
4123 	bondport_assign_to_LAG(p);
4124 	return;
4125 }
4126 
4127 static __inline__ lacp_actor_partner_state
updateNTTBits(lacp_actor_partner_state s)4128 updateNTTBits(lacp_actor_partner_state s)
4129 {
4130 	return s & (LACP_ACTOR_PARTNER_STATE_LACP_ACTIVITY
4131 	       | LACP_ACTOR_PARTNER_STATE_LACP_TIMEOUT
4132 	       | LACP_ACTOR_PARTNER_STATE_AGGREGATION
4133 	       | LACP_ACTOR_PARTNER_STATE_SYNCHRONIZATION);
4134 }
4135 
4136 static void
bondport_UpdateNTT(bondport_ref p,lacpdu_ref lacpdu_p)4137 bondport_UpdateNTT(bondport_ref p, lacpdu_ref lacpdu_p)
4138 {
4139 	ifbond_ref                  bond = p->po_bond;
4140 	lacp_actor_partner_tlv_ref  partner;
4141 
4142 	/* compare the PDU's Actor information to our Partner state */
4143 	partner = (lacp_actor_partner_tlv_ref)lacpdu_p->la_partner_tlv;
4144 	if ((lacp_actor_partner_tlv_get_port(partner) != bondport_get_index(p))
4145 	    || lacp_actor_partner_tlv_get_port_priority(partner) != p->po_priority
4146 	    || bcmp(partner->lap_system, &g_bond->system, sizeof(g_bond->system))
4147 	    || (lacp_actor_partner_tlv_get_system_priority(partner)
4148 	    != g_bond->system_priority)
4149 	    || lacp_actor_partner_tlv_get_key(partner) != bond->ifb_key
4150 	    || (updateNTTBits(partner->lap_state)
4151 	    != updateNTTBits(p->po_actor_state))) {
4152 		bondport_flags_set_ntt(p);
4153 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4154 		    "[%s] updateNTT: Need To Transmit", bondport_get_name(p));
4155 	}
4156 	return;
4157 }
4158 
4159 static void
bondport_AttachMuxToAggregator(bondport_ref p)4160 bondport_AttachMuxToAggregator(bondport_ref p)
4161 {
4162 	if (bondport_flags_mux_attached(p) == 0) {
4163 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4164 		    "[%s] Attached Mux To Aggregator", bondport_get_name(p));
4165 		bondport_flags_set_mux_attached(p);
4166 	}
4167 	return;
4168 }
4169 
4170 static void
bondport_DetachMuxFromAggregator(bondport_ref p)4171 bondport_DetachMuxFromAggregator(bondport_ref p)
4172 {
4173 	if (bondport_flags_mux_attached(p)) {
4174 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4175 		    "[%s] Detached Mux From Aggregator", bondport_get_name(p));
4176 		bondport_flags_clear_mux_attached(p);
4177 	}
4178 	return;
4179 }
4180 
4181 static void
bondport_enable_distributing(bondport_ref p)4182 bondport_enable_distributing(bondport_ref p)
4183 {
4184 	if (bondport_flags_distributing(p) == 0) {
4185 		ifbond_ref      bond = p->po_bond;
4186 
4187 		bond->ifb_distributing_array[bond->ifb_distributing_count++] = p;
4188 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4189 		    "[%s] Distribution Enabled", bondport_get_name(p));
4190 		bondport_flags_set_distributing(p);
4191 	}
4192 	return;
4193 }
4194 
4195 static void
bondport_disable_distributing(bondport_ref p)4196 bondport_disable_distributing(bondport_ref p)
4197 {
4198 	if (bondport_flags_distributing(p)) {
4199 		bondport_ref *  array;
4200 		ifbond_ref      bond;
4201 		int             count;
4202 		int             i;
4203 
4204 		bond = p->po_bond;
4205 		array = bond->ifb_distributing_array;
4206 		count = bond->ifb_distributing_count;
4207 		for (i = 0; i < count; i++) {
4208 			if (array[i] == p) {
4209 				int     j;
4210 
4211 				for (j = i; j < (count - 1); j++) {
4212 					array[j] = array[j + 1];
4213 				}
4214 				break;
4215 			}
4216 		}
4217 		bond->ifb_distributing_count--;
4218 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4219 		    "[%s] Distribution Disabled", bondport_get_name(p));
4220 		bondport_flags_clear_distributing(p);
4221 	}
4222 	return;
4223 }
4224 
4225 /**
4226 ** Receive machine functions
4227 **/
4228 static void
4229 bondport_receive_machine_initialize(bondport_ref p, LAEvent event,
4230     void * event_data);
4231 static void
4232 bondport_receive_machine_port_disabled(bondport_ref p, LAEvent event,
4233     void * event_data);
4234 static void
4235 bondport_receive_machine_expired(bondport_ref p, LAEvent event,
4236     void * event_data);
4237 static void
4238 bondport_receive_machine_lacp_disabled(bondport_ref p, LAEvent event,
4239     void * event_data);
4240 static void
4241 bondport_receive_machine_defaulted(bondport_ref p, LAEvent event,
4242     void * event_data);
4243 static void
4244 bondport_receive_machine_current(bondport_ref p, LAEvent event,
4245     void * event_data);
4246 
4247 static void
bondport_receive_machine_event(bondport_ref p,LAEvent event,void * event_data)4248 bondport_receive_machine_event(bondport_ref p, LAEvent event,
4249     void * event_data)
4250 {
4251 	switch (p->po_receive_state) {
4252 	case ReceiveState_none:
4253 		bondport_receive_machine_initialize(p, LAEventStart, NULL);
4254 		break;
4255 	case ReceiveState_INITIALIZE:
4256 		bondport_receive_machine_initialize(p, event, event_data);
4257 		break;
4258 	case ReceiveState_PORT_DISABLED:
4259 		bondport_receive_machine_port_disabled(p, event, event_data);
4260 		break;
4261 	case ReceiveState_EXPIRED:
4262 		bondport_receive_machine_expired(p, event, event_data);
4263 		break;
4264 	case ReceiveState_LACP_DISABLED:
4265 		bondport_receive_machine_lacp_disabled(p, event, event_data);
4266 		break;
4267 	case ReceiveState_DEFAULTED:
4268 		bondport_receive_machine_defaulted(p, event, event_data);
4269 		break;
4270 	case ReceiveState_CURRENT:
4271 		bondport_receive_machine_current(p, event, event_data);
4272 		break;
4273 	default:
4274 		break;
4275 	}
4276 	return;
4277 }
4278 
4279 static void
bondport_receive_machine(bondport_ref p,LAEvent event,void * event_data)4280 bondport_receive_machine(bondport_ref p, LAEvent event,
4281     void * event_data)
4282 {
4283 	switch (event) {
4284 	case LAEventPacket:
4285 		if (p->po_receive_state != ReceiveState_LACP_DISABLED) {
4286 			bondport_receive_machine_current(p, event, event_data);
4287 		}
4288 		break;
4289 	case LAEventMediaChange:
4290 		if (media_active(&p->po_media_info)) {
4291 			switch (p->po_receive_state) {
4292 			case ReceiveState_PORT_DISABLED:
4293 			case ReceiveState_LACP_DISABLED:
4294 				bondport_receive_machine_port_disabled(p, LAEventMediaChange, NULL);
4295 				break;
4296 			default:
4297 				break;
4298 			}
4299 		} else {
4300 			bondport_receive_machine_port_disabled(p, LAEventStart, NULL);
4301 		}
4302 		break;
4303 	default:
4304 		bondport_receive_machine_event(p, event, event_data);
4305 		break;
4306 	}
4307 	return;
4308 }
4309 
4310 static void
bondport_receive_machine_initialize(bondport_ref p,LAEvent event,__unused void * event_data)4311 bondport_receive_machine_initialize(bondport_ref p, LAEvent event,
4312     __unused void * event_data)
4313 {
4314 	switch (event) {
4315 	case LAEventStart:
4316 		devtimer_cancel(p->po_current_while_timer);
4317 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4318 		    "[%s] Receive INITIALIZE", bondport_get_name(p));
4319 		p->po_receive_state = ReceiveState_INITIALIZE;
4320 		bondport_set_selected(p, SelectedState_UNSELECTED);
4321 		bondport_RecordDefault(p);
4322 		p->po_actor_state
4323 		        = lacp_actor_partner_state_set_not_expired(p->po_actor_state);
4324 		bondport_receive_machine_port_disabled(p, LAEventStart, NULL);
4325 		break;
4326 	default:
4327 		break;
4328 	}
4329 	return;
4330 }
4331 
4332 static void
bondport_receive_machine_port_disabled(bondport_ref p,LAEvent event,__unused void * event_data)4333 bondport_receive_machine_port_disabled(bondport_ref p, LAEvent event,
4334     __unused void * event_data)
4335 {
4336 	partner_state_ref   ps;
4337 
4338 	switch (event) {
4339 	case LAEventStart:
4340 		devtimer_cancel(p->po_current_while_timer);
4341 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4342 		    "[%s] Receive PORT_DISABLED", bondport_get_name(p));
4343 		p->po_receive_state = ReceiveState_PORT_DISABLED;
4344 		ps = &p->po_partner_state;
4345 		ps->ps_state = lacp_actor_partner_state_set_out_of_sync(ps->ps_state);
4346 		OS_FALLTHROUGH;
4347 	case LAEventMediaChange:
4348 		if (media_active(&p->po_media_info)) {
4349 			if (media_ok(&p->po_media_info)) {
4350 				bondport_receive_machine_expired(p, LAEventStart, NULL);
4351 			} else {
4352 				bondport_receive_machine_lacp_disabled(p, LAEventStart, NULL);
4353 			}
4354 		} else if (p->po_selected == SelectedState_SELECTED) {
4355 			struct timeval      tv;
4356 
4357 			BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4358 			    "[%s] Receive PORT_DISABLED: link timer started",
4359 			    bondport_get_name(p));
4360 			tv.tv_sec = 1;
4361 			tv.tv_usec = 0;
4362 			devtimer_set_relative(p->po_current_while_timer, tv,
4363 			    (devtimer_timeout_func)(void (*)(void))
4364 			    bondport_receive_machine_port_disabled,
4365 			    __unsafe_forge_single(void *, LAEventTimeout), NULL);
4366 		} else if (p->po_selected == SelectedState_STANDBY) {
4367 			bondport_set_selected(p, SelectedState_UNSELECTED);
4368 		}
4369 		break;
4370 	case LAEventTimeout:
4371 		if (p->po_selected == SelectedState_SELECTED) {
4372 			BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4373 			    "[%s] Receive PORT_DISABLED: "
4374 			    "link timer completed, marking UNSELECTED",
4375 			    bondport_get_name(p));
4376 			bondport_set_selected(p, SelectedState_UNSELECTED);
4377 		}
4378 		break;
4379 	case LAEventPortMoved:
4380 		bondport_receive_machine_initialize(p, LAEventStart, NULL);
4381 		break;
4382 	default:
4383 		break;
4384 	}
4385 	return;
4386 }
4387 
4388 static void
bondport_receive_machine_expired(bondport_ref p,LAEvent event,__unused void * event_data)4389 bondport_receive_machine_expired(bondport_ref p, LAEvent event,
4390     __unused void * event_data)
4391 {
4392 	lacp_actor_partner_state    s;
4393 	struct timeval              tv;
4394 
4395 	switch (event) {
4396 	case LAEventStart:
4397 		devtimer_cancel(p->po_current_while_timer);
4398 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4399 		    "[%s] Receive EXPIRED", bondport_get_name(p));
4400 		p->po_receive_state = ReceiveState_EXPIRED;
4401 		s = p->po_partner_state.ps_state;
4402 		s = lacp_actor_partner_state_set_out_of_sync(s);
4403 		s = lacp_actor_partner_state_set_short_timeout(s);
4404 		p->po_partner_state.ps_state = s;
4405 		p->po_actor_state
4406 		        = lacp_actor_partner_state_set_expired(p->po_actor_state);
4407 		/* start current_while timer */
4408 		tv.tv_sec = LACP_SHORT_TIMEOUT_TIME;
4409 		tv.tv_usec = 0;
4410 		devtimer_set_relative(p->po_current_while_timer, tv,
4411 		    (devtimer_timeout_func)(void (*)(void))
4412 		    bondport_receive_machine_expired,
4413 		    __unsafe_forge_single(void *, LAEventTimeout), NULL);
4414 
4415 		break;
4416 	case LAEventTimeout:
4417 		bondport_receive_machine_defaulted(p, LAEventStart, NULL);
4418 		break;
4419 	default:
4420 		break;
4421 	}
4422 	return;
4423 }
4424 
4425 static void
bondport_receive_machine_lacp_disabled(bondport_ref p,LAEvent event,__unused void * event_data)4426 bondport_receive_machine_lacp_disabled(bondport_ref p, LAEvent event,
4427     __unused void * event_data)
4428 {
4429 	partner_state_ref   ps;
4430 	switch (event) {
4431 	case LAEventStart:
4432 		devtimer_cancel(p->po_current_while_timer);
4433 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4434 		    "[%s] Receive LACP_DISABLED", bondport_get_name(p));
4435 		p->po_receive_state = ReceiveState_LACP_DISABLED;
4436 		bondport_set_selected(p, SelectedState_UNSELECTED);
4437 		bondport_RecordDefault(p);
4438 		ps = &p->po_partner_state;
4439 		ps->ps_state = lacp_actor_partner_state_set_individual(ps->ps_state);
4440 		p->po_actor_state
4441 		        = lacp_actor_partner_state_set_not_expired(p->po_actor_state);
4442 		break;
4443 	default:
4444 		break;
4445 	}
4446 	return;
4447 }
4448 
4449 static void
bondport_receive_machine_defaulted(bondport_ref p,LAEvent event,__unused void * event_data)4450 bondport_receive_machine_defaulted(bondport_ref p, LAEvent event,
4451     __unused void * event_data)
4452 {
4453 	switch (event) {
4454 	case LAEventStart:
4455 		devtimer_cancel(p->po_current_while_timer);
4456 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4457 		    "[%s] Receive DEFAULTED", bondport_get_name(p));
4458 		p->po_receive_state = ReceiveState_DEFAULTED;
4459 		bondport_UpdateDefaultSelected(p);
4460 		bondport_RecordDefault(p);
4461 		p->po_actor_state
4462 		        = lacp_actor_partner_state_set_not_expired(p->po_actor_state);
4463 		break;
4464 	default:
4465 		break;
4466 	}
4467 	return;
4468 }
4469 
4470 static void
bondport_receive_machine_current(bondport_ref p,LAEvent event,void * event_data)4471 bondport_receive_machine_current(bondport_ref p, LAEvent event,
4472     void * event_data)
4473 {
4474 	partner_state_ref   ps;
4475 	struct timeval      tv;
4476 
4477 	switch (event) {
4478 	case LAEventPacket:
4479 		devtimer_cancel(p->po_current_while_timer);
4480 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4481 		    "[%s] Receive CURRENT", bondport_get_name(p));
4482 		p->po_receive_state = ReceiveState_CURRENT;
4483 		bondport_UpdateSelected(p, event_data);
4484 		bondport_UpdateNTT(p, event_data);
4485 		bondport_RecordPDU(p, event_data);
4486 		p->po_actor_state
4487 		        = lacp_actor_partner_state_set_not_expired(p->po_actor_state);
4488 		bondport_assign_to_LAG(p);
4489 		/* start current_while timer */
4490 		ps = &p->po_partner_state;
4491 		if (lacp_actor_partner_state_short_timeout(ps->ps_state)) {
4492 			tv.tv_sec = LACP_SHORT_TIMEOUT_TIME;
4493 		} else {
4494 			tv.tv_sec = LACP_LONG_TIMEOUT_TIME;
4495 		}
4496 		tv.tv_usec = 0;
4497 		devtimer_set_relative(p->po_current_while_timer, tv,
4498 		    (devtimer_timeout_func)(void (*)(void))
4499 		    bondport_receive_machine_current,
4500 		    __unsafe_forge_single(void *, LAEventTimeout), NULL);
4501 		break;
4502 	case LAEventTimeout:
4503 		bondport_receive_machine_expired(p, LAEventStart, NULL);
4504 		break;
4505 	default:
4506 		break;
4507 	}
4508 	return;
4509 }
4510 
4511 /**
4512 ** Periodic Transmission machine
4513 **/
4514 
4515 static void
bondport_periodic_transmit_machine(bondport_ref p,LAEvent event,__unused void * event_data)4516 bondport_periodic_transmit_machine(bondport_ref p, LAEvent event,
4517     __unused void * event_data)
4518 {
4519 	int                 interval;
4520 	partner_state_ref   ps;
4521 	struct timeval      tv;
4522 
4523 	switch (event) {
4524 	case LAEventStart:
4525 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4526 		    "[%s] periodic_transmit Start", bondport_get_name(p));
4527 		OS_FALLTHROUGH;
4528 	case LAEventMediaChange:
4529 		devtimer_cancel(p->po_periodic_timer);
4530 		p->po_periodic_interval = 0;
4531 		if (media_active(&p->po_media_info) == 0
4532 		    || media_ok(&p->po_media_info) == 0) {
4533 			break;
4534 		}
4535 		OS_FALLTHROUGH;
4536 	case LAEventPacket:
4537 		/* Neither Partner nor Actor are LACP Active, no periodic tx */
4538 		ps = &p->po_partner_state;
4539 		if (lacp_actor_partner_state_active_lacp(p->po_actor_state) == 0
4540 		    && (lacp_actor_partner_state_active_lacp(ps->ps_state)
4541 		    == 0)) {
4542 			devtimer_cancel(p->po_periodic_timer);
4543 			p->po_periodic_interval = 0;
4544 			break;
4545 		}
4546 		if (lacp_actor_partner_state_short_timeout(ps->ps_state)) {
4547 			interval = LACP_FAST_PERIODIC_TIME;
4548 		} else {
4549 			interval = LACP_SLOW_PERIODIC_TIME;
4550 		}
4551 		if (p->po_periodic_interval != interval) {
4552 			if (interval == LACP_FAST_PERIODIC_TIME
4553 			    && p->po_periodic_interval
4554 			    == LACP_SLOW_PERIODIC_TIME) {
4555 				BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4556 				    "[%s] periodic_transmit: Need To Transmit",
4557 				    bondport_get_name(p));
4558 				bondport_flags_set_ntt(p);
4559 			}
4560 			p->po_periodic_interval = interval;
4561 			tv.tv_usec = 0;
4562 			tv.tv_sec = interval;
4563 			devtimer_set_relative(p->po_periodic_timer, tv,
4564 			    (devtimer_timeout_func)(void (*)(void))
4565 			    bondport_periodic_transmit_machine,
4566 			    __unsafe_forge_single(void *, LAEventTimeout), NULL);
4567 			BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4568 			    "[%s] Periodic Transmission Timer: %d secs",
4569 			    bondport_get_name(p),
4570 			    p->po_periodic_interval);
4571 		}
4572 		break;
4573 	case LAEventTimeout:
4574 		bondport_flags_set_ntt(p);
4575 		tv.tv_sec = p->po_periodic_interval;
4576 		tv.tv_usec = 0;
4577 		devtimer_set_relative(p->po_periodic_timer, tv, (devtimer_timeout_func)(void (*)(void))
4578 		    bondport_periodic_transmit_machine,
4579 		    __unsafe_forge_single(void *, LAEventTimeout), NULL);
4580 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4581 		    "[%s] Periodic Transmission Timer: %d secs",
4582 		    bondport_get_name(p), p->po_periodic_interval);
4583 		break;
4584 	default:
4585 		break;
4586 	}
4587 	return;
4588 }
4589 
4590 /**
4591 ** Transmit machine
4592 **/
4593 static int
bondport_can_transmit(bondport_ref p,int32_t current_secs,__darwin_time_t * next_secs)4594 bondport_can_transmit(bondport_ref p, int32_t current_secs,
4595     __darwin_time_t * next_secs)
4596 {
4597 	if (p->po_last_transmit_secs != current_secs) {
4598 		p->po_last_transmit_secs = current_secs;
4599 		p->po_n_transmit = 0;
4600 	}
4601 	if (p->po_n_transmit < LACP_PACKET_RATE) {
4602 		p->po_n_transmit++;
4603 		return 1;
4604 	}
4605 	if (next_secs != NULL) {
4606 		*next_secs = current_secs + 1;
4607 	}
4608 	return 0;
4609 }
4610 
4611 static void
bondport_transmit_machine(bondport_ref p,LAEvent event,void * event_data)4612 bondport_transmit_machine(bondport_ref p, LAEvent event,
4613     void * event_data)
4614 {
4615 	lacp_actor_partner_tlv_ref  aptlv;
4616 	lacp_collector_tlv_ref      ctlv;
4617 	struct timeval              next_tick_time = {.tv_sec = 0, .tv_usec = 0};
4618 	lacpdu_ref          out_lacpdu_p;
4619 	packet_buffer_ref           pkt;
4620 	partner_state_ref           ps;
4621 	LAG_info_ref                ps_li;
4622 
4623 	switch (event) {
4624 	case LAEventTimeout:
4625 	case LAEventStart:
4626 		if (p->po_periodic_interval == 0 || bondport_flags_ntt(p) == 0) {
4627 			break;
4628 		}
4629 		if (event_data == TRANSMIT_MACHINE_TX_IMMEDIATE) {
4630 			/* we're going away, transmit the packet no matter what */
4631 		} else if (bondport_can_transmit(p, devtimer_current_secs(),
4632 		    &next_tick_time.tv_sec) == 0) {
4633 			if (devtimer_enabled(p->po_transmit_timer)) {
4634 				BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4635 				    "[%s] Transmit Timer Already Set",
4636 				    bondport_get_name(p));
4637 			} else {
4638 				devtimer_set_absolute(p->po_transmit_timer, next_tick_time,
4639 				    (devtimer_timeout_func)(void (*)(void))
4640 				    bondport_transmit_machine,
4641 				    __unsafe_forge_single(void *, LAEventTimeout), NULL);
4642 				BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4643 				    "[%s] Transmit Timer Deadline %d secs",
4644 				    bondport_get_name(p),
4645 				    (int)next_tick_time.tv_sec);
4646 			}
4647 			break;
4648 		}
4649 		if (event == LAEventTimeout) {
4650 			BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4651 			    "[%s] Transmit Timer Complete",
4652 			    bondport_get_name(p));
4653 		}
4654 		pkt = packet_buffer_allocate(sizeof(*out_lacpdu_p));
4655 		if (pkt == NULL) {
4656 			printf("[%s] Transmit: failed to allocate packet buffer\n",
4657 			    bondport_get_name(p));
4658 			break;
4659 		}
4660 		out_lacpdu_p = (lacpdu_ref)packet_buffer_byteptr(pkt);
4661 		bzero(out_lacpdu_p, sizeof(*out_lacpdu_p));
4662 		out_lacpdu_p->la_subtype = IEEE8023AD_SLOW_PROTO_SUBTYPE_LACP;
4663 		out_lacpdu_p->la_version = LACPDU_VERSION_1;
4664 
4665 		/* Actor */
4666 		aptlv = (lacp_actor_partner_tlv_ref)out_lacpdu_p->la_actor_tlv;
4667 		aptlv->lap_tlv_type = LACPDU_TLV_TYPE_ACTOR;
4668 		aptlv->lap_length = LACPDU_ACTOR_TLV_LENGTH;
4669 		*((lacp_system_ref)aptlv->lap_system) = g_bond->system;
4670 		lacp_actor_partner_tlv_set_system_priority(aptlv,
4671 		    g_bond->system_priority);
4672 		lacp_actor_partner_tlv_set_port_priority(aptlv, p->po_priority);
4673 		lacp_actor_partner_tlv_set_port(aptlv, bondport_get_index(p));
4674 		lacp_actor_partner_tlv_set_key(aptlv, p->po_bond->ifb_key);
4675 		aptlv->lap_state = p->po_actor_state;
4676 
4677 		/* Partner */
4678 		aptlv = (lacp_actor_partner_tlv_ref)out_lacpdu_p->la_partner_tlv;
4679 		aptlv->lap_tlv_type = LACPDU_TLV_TYPE_PARTNER;
4680 		aptlv->lap_length = LACPDU_PARTNER_TLV_LENGTH;
4681 		ps = &p->po_partner_state;
4682 		ps_li = &ps->ps_lag_info;
4683 		lacp_actor_partner_tlv_set_port(aptlv, ps->ps_port);
4684 		lacp_actor_partner_tlv_set_port_priority(aptlv, ps->ps_port_priority);
4685 		*((lacp_system_ref)aptlv->lap_system) = ps_li->li_system;
4686 		lacp_actor_partner_tlv_set_system_priority(aptlv,
4687 		    ps_li->li_system_priority);
4688 		lacp_actor_partner_tlv_set_key(aptlv, ps_li->li_key);
4689 		aptlv->lap_state = ps->ps_state;
4690 
4691 		/* Collector */
4692 		ctlv = (lacp_collector_tlv_ref)out_lacpdu_p->la_collector_tlv;
4693 		ctlv->lac_tlv_type = LACPDU_TLV_TYPE_COLLECTOR;
4694 		ctlv->lac_length = LACPDU_COLLECTOR_TLV_LENGTH;
4695 
4696 		bondport_slow_proto_transmit(p, pkt);
4697 		bondport_flags_clear_ntt(p);
4698 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP, "[%s] Transmit Packet %d",
4699 		    bondport_get_name(p), p->po_n_transmit);
4700 		break;
4701 	default:
4702 		break;
4703 	}
4704 	return;
4705 }
4706 
4707 /**
4708 ** Mux machine functions
4709 **/
4710 
4711 static void
4712 bondport_mux_machine_detached(bondport_ref p, LAEvent event,
4713     void * event_data);
4714 static void
4715 bondport_mux_machine_waiting(bondport_ref p, LAEvent event,
4716     void * event_data);
4717 static void
4718 bondport_mux_machine_attached(bondport_ref p, LAEvent event,
4719     void * event_data);
4720 
4721 static void
4722 bondport_mux_machine_collecting_distributing(bondport_ref p, LAEvent event,
4723     void * event_data);
4724 
4725 static void
bondport_mux_machine(bondport_ref p,LAEvent event,void * event_data)4726 bondport_mux_machine(bondport_ref p, LAEvent event, void * event_data)
4727 {
4728 	switch (p->po_mux_state) {
4729 	case MuxState_none:
4730 		bondport_mux_machine_detached(p, LAEventStart, NULL);
4731 		break;
4732 	case MuxState_DETACHED:
4733 		bondport_mux_machine_detached(p, event, event_data);
4734 		break;
4735 	case MuxState_WAITING:
4736 		bondport_mux_machine_waiting(p, event, event_data);
4737 		break;
4738 	case MuxState_ATTACHED:
4739 		bondport_mux_machine_attached(p, event, event_data);
4740 		break;
4741 	case MuxState_COLLECTING_DISTRIBUTING:
4742 		bondport_mux_machine_collecting_distributing(p, event, event_data);
4743 		break;
4744 	default:
4745 		break;
4746 	}
4747 	return;
4748 }
4749 
4750 static void
bondport_mux_machine_detached(bondport_ref p,LAEvent event,__unused void * event_data)4751 bondport_mux_machine_detached(bondport_ref p, LAEvent event,
4752     __unused void * event_data)
4753 {
4754 	lacp_actor_partner_state    s;
4755 
4756 	switch (event) {
4757 	case LAEventStart:
4758 		devtimer_cancel(p->po_wait_while_timer);
4759 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP, "[%s] Mux DETACHED",
4760 		    bondport_get_name(p));
4761 		p->po_mux_state = MuxState_DETACHED;
4762 		bondport_flags_clear_ready(p);
4763 		bondport_DetachMuxFromAggregator(p);
4764 		bondport_disable_distributing(p);
4765 		s = p->po_actor_state;
4766 		s = lacp_actor_partner_state_set_out_of_sync(s);
4767 		s = lacp_actor_partner_state_set_not_collecting(s);
4768 		s = lacp_actor_partner_state_set_not_distributing(s);
4769 		p->po_actor_state = s;
4770 		bondport_flags_set_ntt(p);
4771 		break;
4772 	case LAEventSelectedChange:
4773 	case LAEventPacket:
4774 	case LAEventMediaChange:
4775 		if (p->po_selected == SelectedState_SELECTED
4776 		    || p->po_selected == SelectedState_STANDBY) {
4777 			bondport_mux_machine_waiting(p, LAEventStart, NULL);
4778 		}
4779 		break;
4780 	default:
4781 		break;
4782 	}
4783 	return;
4784 }
4785 
4786 static void
bondport_mux_machine_waiting(bondport_ref p,LAEvent event,__unused void * event_data)4787 bondport_mux_machine_waiting(bondport_ref p, LAEvent event,
4788     __unused void * event_data)
4789 {
4790 	struct timeval      tv;
4791 
4792 	switch (event) {
4793 	case LAEventStart:
4794 		devtimer_cancel(p->po_wait_while_timer);
4795 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP, "[%s] Mux WAITING",
4796 		    bondport_get_name(p));
4797 		p->po_mux_state = MuxState_WAITING;
4798 		OS_FALLTHROUGH;
4799 	default:
4800 	case LAEventSelectedChange:
4801 		if (p->po_selected == SelectedState_UNSELECTED) {
4802 			bondport_mux_machine_detached(p, LAEventStart, NULL);
4803 			break;
4804 		}
4805 		if (p->po_selected == SelectedState_STANDBY) {
4806 			devtimer_cancel(p->po_wait_while_timer);
4807 			/* wait until state changes to SELECTED */
4808 			BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4809 			    "[%s] Mux WAITING: Standby", bondport_get_name(p));
4810 			break;
4811 		}
4812 		if (bondport_flags_ready(p)) {
4813 			BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4814 			    "[%s] Mux WAITING: Port is already ready",
4815 			    bondport_get_name(p));
4816 			break;
4817 		}
4818 		if (devtimer_enabled(p->po_wait_while_timer)) {
4819 			BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4820 			    "[%s] Mux WAITING: Timer already set",
4821 			    bondport_get_name(p));
4822 			break;
4823 		}
4824 		if (ifbond_all_ports_attached(p->po_bond, p)) {
4825 			devtimer_cancel(p->po_wait_while_timer);
4826 			BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4827 			    "[%s] Mux WAITING: No waiting",
4828 			    bondport_get_name(p));
4829 			bondport_flags_set_ready(p);
4830 			goto no_waiting;
4831 		}
4832 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4833 		    "[%s] Mux WAITING: 2 seconds", bondport_get_name(p));
4834 		tv.tv_sec = LACP_AGGREGATE_WAIT_TIME;
4835 		tv.tv_usec = 0;
4836 		devtimer_set_relative(p->po_wait_while_timer, tv,
4837 		    (devtimer_timeout_func)(void (*)(void))
4838 		    bondport_mux_machine_waiting,
4839 		    __unsafe_forge_single(void *, LAEventTimeout), NULL);
4840 		break;
4841 	case LAEventTimeout:
4842 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP, "[%s] Mux WAITING: Ready",
4843 		    bondport_get_name(p));
4844 		bondport_flags_set_ready(p);
4845 		break;
4846 	case LAEventReady:
4847 no_waiting:
4848 		if (bondport_flags_ready(p)) {
4849 			BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4850 			    "[%s] Mux WAITING: All Ports Ready",
4851 			    bondport_get_name(p));
4852 			bondport_mux_machine_attached(p, LAEventStart, NULL);
4853 			break;
4854 		}
4855 		break;
4856 	}
4857 	return;
4858 }
4859 
4860 static void
bondport_mux_machine_attached(bondport_ref p,LAEvent event,__unused void * event_data)4861 bondport_mux_machine_attached(bondport_ref p, LAEvent event,
4862     __unused void * event_data)
4863 {
4864 	lacp_actor_partner_state    s;
4865 
4866 	switch (event) {
4867 	case LAEventStart:
4868 		devtimer_cancel(p->po_wait_while_timer);
4869 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP, "[%s] Mux ATTACHED",
4870 		    bondport_get_name(p));
4871 		p->po_mux_state = MuxState_ATTACHED;
4872 		bondport_AttachMuxToAggregator(p);
4873 		s = p->po_actor_state;
4874 		s = lacp_actor_partner_state_set_in_sync(s);
4875 		s = lacp_actor_partner_state_set_not_collecting(s);
4876 		s = lacp_actor_partner_state_set_not_distributing(s);
4877 		bondport_disable_distributing(p);
4878 		p->po_actor_state = s;
4879 		bondport_flags_set_ntt(p);
4880 		OS_FALLTHROUGH;
4881 	default:
4882 		switch (p->po_selected) {
4883 		case SelectedState_SELECTED:
4884 			s = p->po_partner_state.ps_state;
4885 			if (lacp_actor_partner_state_in_sync(s)) {
4886 				bondport_mux_machine_collecting_distributing(p,
4887 				    LAEventStart, NULL);
4888 			}
4889 			break;
4890 		default:
4891 			bondport_mux_machine_detached(p, LAEventStart, NULL);
4892 			break;
4893 		}
4894 		break;
4895 	}
4896 	return;
4897 }
4898 
4899 static void
bondport_mux_machine_collecting_distributing(bondport_ref p,LAEvent event,__unused void * event_data)4900 bondport_mux_machine_collecting_distributing(bondport_ref p,
4901     LAEvent event,
4902     __unused void * event_data)
4903 {
4904 	lacp_actor_partner_state    s;
4905 
4906 	switch (event) {
4907 	case LAEventStart:
4908 		devtimer_cancel(p->po_wait_while_timer);
4909 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4910 		    "[%s] Mux COLLECTING_DISTRIBUTING",
4911 		    bondport_get_name(p));
4912 		p->po_mux_state = MuxState_COLLECTING_DISTRIBUTING;
4913 		bondport_enable_distributing(p);
4914 		s = p->po_actor_state;
4915 		s = lacp_actor_partner_state_set_collecting(s);
4916 		s = lacp_actor_partner_state_set_distributing(s);
4917 		p->po_actor_state = s;
4918 		bondport_flags_set_ntt(p);
4919 		OS_FALLTHROUGH;
4920 	default:
4921 		s = p->po_partner_state.ps_state;
4922 		if (lacp_actor_partner_state_in_sync(s) == 0) {
4923 			bondport_mux_machine_attached(p, LAEventStart, NULL);
4924 			break;
4925 		}
4926 		switch (p->po_selected) {
4927 		case SelectedState_UNSELECTED:
4928 		case SelectedState_STANDBY:
4929 			bondport_mux_machine_attached(p, LAEventStart, NULL);
4930 			break;
4931 		default:
4932 			break;
4933 		}
4934 		break;
4935 	}
4936 	return;
4937 }
4938