xref: /xnu-11215.1.10/bsd/net/if_bond.c (revision 8d741a5de7ff4191bf97d57b9f54c2f6d4a15585)
1 /*
2  * Copyright (c) 2004-2024 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*
30  * if_bond.c
31  * - bond/failover interface
32  * - implements IEEE 802.3ad Link Aggregation
33  */
34 
35 /*
36  * Modification History:
37  *
38  * April 29, 2004	Dieter Siegmund ([email protected])
39  * - created
40  */
41 
42 #include <sys/param.h>
43 #include <sys/kernel.h>
44 #include <sys/malloc.h>
45 #include <sys/mbuf.h>
46 #include <sys/queue.h>
47 #include <sys/socket.h>
48 #include <sys/sockio.h>
49 #include <sys/sysctl.h>
50 #include <sys/systm.h>
51 #include <sys/kern_event.h>
52 #include <net/bpf.h>
53 #include <net/ethernet.h>
54 #include <net/if.h>
55 #include <net/kpi_interface.h>
56 #include <net/if_arp.h>
57 #include <net/if_dl.h>
58 #include <net/if_ether.h>
59 #include <net/if_types.h>
60 #include <net/if_bond_var.h>
61 #include <net/ieee8023ad.h>
62 #include <net/lacp.h>
63 #include <net/dlil.h>
64 #include <sys/time.h>
65 #include <net/devtimer.h>
66 #include <net/if_vlan_var.h>
67 #include <net/kpi_protocol.h>
68 #include <sys/protosw.h>
69 #include <kern/locks.h>
70 #include <kern/zalloc.h>
71 #include <os/refcnt.h>
72 
73 #include <netinet/in.h>
74 #include <netinet/if_ether.h>
75 #include <netinet/in_systm.h>
76 #include <netinet/ip.h>
77 #include <netinet/ip6.h>
78 
79 #include <net/if_media.h>
80 #include <net/multicast_list.h>
81 
82 #include <net/sockaddr_utils.h>
83 #include <net/mblist.h>
84 
85 static struct ether_addr slow_proto_multicast = {
86 	.octet = IEEE8023AD_SLOW_PROTO_MULTICAST
87 };
88 
89 typedef struct ifbond_s ifbond, *__single ifbond_ref;
90 typedef struct bondport_s bondport, *__single bondport_ref;
91 
92 #define BOND_MAXUNIT            128
93 #define BOND_ZONE_MAX_ELEM      MIN(IFNETS_MAX, BOND_MAXUNIT)
94 #define BONDNAME                "bond"
95 
96 #define EA_FORMAT       "%x:%x:%x:%x:%x:%x"
97 #define EA_CH(e, i)     ((u_char)((u_char *)(e))[(i)])
98 #define EA_LIST(ea)     EA_CH(ea,0),EA_CH(ea,1),EA_CH(ea,2),EA_CH(ea,3),EA_CH(ea,4),EA_CH(ea,5)
99 
100 /*
101  * if_bond_debug, BD_DBGF_*
102  * - 'if_bond_debug' is a bitmask of BD_DBGF_* flags that can be set
103  *   to enable additional logs for the corresponding bond function
104  * - "sysctl net.link.bond.debug" controls the value of
105  *   'if_bond_debug'
106  */
107 static uint32_t if_bond_debug = 0;
108 #define BD_DBGF_LIFECYCLE       0x0001
109 #define BD_DBGF_INPUT           0x0002
110 #define BD_DBGF_OUTPUT          0x0004
111 #define BD_DBGF_LACP            0x0008
112 
113 /*
114  * if_bond_log_level
115  * - 'if_bond_log_level' ensures that by default important logs are
116  *   logged regardless of if_bond_debug by comparing the log level
117  *   in BOND_LOG to if_bond_log_level
118  * - use "sysctl net.link.bond.log_level" controls the value of
119  *   'if_bond_log_level'
120  * - the default value of 'if_bond_log_level' is LOG_NOTICE; important
121  *   logs must use LOG_NOTICE to ensure they appear by default
122  */
123 static int if_bond_log_level = LOG_NOTICE;
124 
125 #define BOND_DBGF_ENABLED(__flag)     ((if_bond_debug & __flag) != 0)
126 
127 /*
128  * BOND_LOG, BOND_LOG_SIMPLE
129  * - macros to generate the specified log conditionally based on
130  *   the specified log level and debug flags
131  * - BOND_LOG_SIMPLE does not include the function name in the log
132  */
133 #define BOND_LOG(__level, __dbgf, __string, ...)              \
134 	do {                                                            \
135 	        if (__level <= if_bond_log_level ||                   \
136 	            BOND_DBGF_ENABLED(__dbgf)) {                      \
137 	                os_log(OS_LOG_DEFAULT, "%s: " __string, \
138 	                       __func__, ## __VA_ARGS__);       \
139 	        }                                                       \
140 	} while (0)
141 #define BOND_LOG_SIMPLE(__level, __dbgf, __string, ...)               \
142 	do {                                                    \
143 	        if (__level <= if_bond_log_level ||           \
144 	            BOND_DBGF_ENABLED(__dbgf)) {                      \
145 	                os_log(OS_LOG_DEFAULT, __string, ## __VA_ARGS__); \
146 	        }                                                               \
147 	} while (0)
148 
149 SYSCTL_DECL(_net_link);
150 SYSCTL_NODE(_net_link, OID_AUTO, bond, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
151     "Bond interface");
152 
153 SYSCTL_INT(_net_link_bond, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
154     &if_bond_debug, 0, "Bond interface debug flags");
155 
156 SYSCTL_INT(_net_link_bond, OID_AUTO, log_level, CTLFLAG_RW | CTLFLAG_LOCKED,
157     &if_bond_log_level, 0, "Bond interface log level");
158 
159 /**
160 ** bond locks
161 **/
162 
163 static LCK_GRP_DECLARE(bond_lck_grp, "if_bond");
164 static LCK_MTX_DECLARE(bond_lck_mtx, &bond_lck_grp);
165 
166 static __inline__ void
bond_assert_lock_held(void)167 bond_assert_lock_held(void)
168 {
169 	LCK_MTX_ASSERT(&bond_lck_mtx, LCK_MTX_ASSERT_OWNED);
170 }
171 
172 static __inline__ void
bond_assert_lock_not_held(void)173 bond_assert_lock_not_held(void)
174 {
175 	LCK_MTX_ASSERT(&bond_lck_mtx, LCK_MTX_ASSERT_NOTOWNED);
176 }
177 
178 static __inline__ void
bond_lock(void)179 bond_lock(void)
180 {
181 	lck_mtx_lock(&bond_lck_mtx);
182 }
183 
184 static __inline__ void
bond_unlock(void)185 bond_unlock(void)
186 {
187 	lck_mtx_unlock(&bond_lck_mtx);
188 }
189 
190 /**
191 ** bond structures, types
192 **/
193 
194 struct LAG_info_s {
195 	lacp_system                 li_system;
196 	lacp_system_priority        li_system_priority;
197 	lacp_key                    li_key;
198 };
199 typedef struct LAG_info_s LAG_info, * LAG_info_ref;
200 
201 struct bondport_s;
202 TAILQ_HEAD(port_list, bondport_s);
203 struct ifbond_s;
204 TAILQ_HEAD(ifbond_list, ifbond_s);
205 struct LAG_s;
206 TAILQ_HEAD(lag_list, LAG_s);
207 
208 struct LAG_s {
209 	TAILQ_ENTRY(LAG_s)          lag_list;
210 	struct port_list            lag_port_list;
211 	short                       lag_port_count;
212 	short                       lag_selected_port_count;
213 	int                         lag_active_media;
214 	LAG_info                    lag_info;
215 };
216 typedef struct LAG_s LAG, * LAG_ref;
217 
218 typedef struct partner_state_s {
219 	LAG_info                    ps_lag_info;
220 	lacp_port                   ps_port;
221 	lacp_port_priority          ps_port_priority;
222 	lacp_actor_partner_state    ps_state;
223 } partner_state, * partner_state_ref;
224 
225 struct ifbond_s {
226 	TAILQ_ENTRY(ifbond_s)       ifb_bond_list;
227 	int                         ifb_flags;
228 	struct os_refcnt            ifb_retain_count;
229 	char                        ifb_name[IFNAMSIZ];
230 	struct ifnet *              ifb_ifp;
231 	int                         ifb_altmtu;
232 	struct port_list            ifb_port_list;
233 	short                       ifb_port_count;
234 	struct lag_list             ifb_lag_list;
235 	lacp_key                    ifb_key;
236 	short                       ifb_max_active;/* 0 == unlimited */
237 	LAG_ref                     ifb_active_lag;
238 	struct ifmultiaddr *        ifb_ifma_slow_proto;
239 	bondport_ref *__counted_by(ifb_distributing_max) ifb_distributing_array;
240 	int                         ifb_distributing_count;
241 	int                         ifb_distributing_max;
242 	int                         ifb_last_link_event;
243 	int                         ifb_mode;/* LACP, STATIC */
244 };
245 
246 struct media_info {
247 	int         mi_active;
248 	int         mi_status;
249 };
250 
251 enum {
252 	ReceiveState_none = 0,
253 	ReceiveState_INITIALIZE = 1,
254 	ReceiveState_PORT_DISABLED = 2,
255 	ReceiveState_EXPIRED = 3,
256 	ReceiveState_LACP_DISABLED = 4,
257 	ReceiveState_DEFAULTED = 5,
258 	ReceiveState_CURRENT = 6,
259 };
260 
261 typedef u_char ReceiveState;
262 
263 enum {
264 	SelectedState_UNSELECTED = IF_BOND_STATUS_SELECTED_STATE_UNSELECTED,
265 	SelectedState_SELECTED = IF_BOND_STATUS_SELECTED_STATE_SELECTED,
266 	SelectedState_STANDBY = IF_BOND_STATUS_SELECTED_STATE_STANDBY
267 };
268 typedef u_char SelectedState;
269 
270 static __inline__ const char *
SelectedStateString(SelectedState s)271 SelectedStateString(SelectedState s)
272 {
273 	static const char * names[] = { "UNSELECTED", "SELECTED", "STANDBY" };
274 
275 	if (s <= SelectedState_STANDBY) {
276 		return names[s];
277 	}
278 	return "<unknown>";
279 }
280 
281 enum {
282 	MuxState_none = 0,
283 	MuxState_DETACHED = 1,
284 	MuxState_WAITING = 2,
285 	MuxState_ATTACHED = 3,
286 	MuxState_COLLECTING_DISTRIBUTING = 4,
287 };
288 
289 typedef u_char MuxState;
290 
291 #define PORT_CONTROL_FLAGS_IN_LIST               0x01
292 #define PORT_CONTROL_FLAGS_PROTO_ATTACHED        0x02
293 #define PORT_CONTROL_FLAGS_LLADDR_SET            0x04
294 #define PORT_CONTROL_FLAGS_MTU_SET               0x08
295 #define PORT_CONTROL_FLAGS_PROMISCUOUS_SET       0x10
296 #define PORT_CONTROL_FLAGS_BOND_PROMISCUOUS_SET  0x20
297 
298 
299 static inline bool
uint32_bit_is_set(uint32_t flags,uint32_t flags_to_test)300 uint32_bit_is_set(uint32_t flags, uint32_t flags_to_test)
301 {
302 	return (flags & flags_to_test) != 0;
303 }
304 
305 static inline void
uint32_bit_set(uint32_t * flags_p,uint32_t flags_to_set)306 uint32_bit_set(uint32_t * flags_p, uint32_t flags_to_set)
307 {
308 	*flags_p |= flags_to_set;
309 }
310 
311 static inline void
uint32_bit_clear(uint32_t * flags_p,uint32_t flags_to_clear)312 uint32_bit_clear(uint32_t * flags_p, uint32_t flags_to_clear)
313 {
314 	*flags_p &= ~flags_to_clear;
315 }
316 
317 struct bondport_s {
318 	TAILQ_ENTRY(bondport_s)     po_port_list;
319 	ifbond_ref                  po_bond;
320 	struct multicast_list       po_multicast;
321 	struct ifnet *              po_ifp;
322 	struct ether_addr           po_saved_addr;
323 	int                         po_enabled;
324 	char                        po_name[IFNAMSIZ];
325 	struct ifdevmtu             po_devmtu;
326 	uint32_t                    po_control_flags;
327 
328 	/* LACP */
329 	TAILQ_ENTRY(bondport_s)     po_lag_port_list;
330 	devtimer_ref                po_current_while_timer;
331 	devtimer_ref                po_periodic_timer;
332 	devtimer_ref                po_wait_while_timer;
333 	devtimer_ref                po_transmit_timer;
334 	partner_state               po_partner_state;
335 	lacp_port_priority          po_priority;
336 	lacp_actor_partner_state    po_actor_state;
337 	u_char                      po_flags;
338 	u_char                      po_periodic_interval;
339 	u_char                      po_n_transmit;
340 	ReceiveState                po_receive_state;
341 	MuxState                    po_mux_state;
342 	SelectedState               po_selected;
343 	int32_t                     po_last_transmit_secs;
344 	struct media_info           po_media_info;
345 	uint64_t                    po_force_link_event_time;
346 	LAG_ref                     po_lag;
347 };
348 
349 #define IFBF_PROMISC            0x1     /* promiscuous mode */
350 #define IFBF_IF_DETACHING       0x2     /* interface is detaching */
351 #define IFBF_LLADDR             0x4     /* specific link address requested */
352 #define IFBF_CHANGE_IN_PROGRESS 0x8     /* interface add/remove in progress */
353 
354 static int bond_get_status(ifbond_ref ifb, struct if_bond_req * ibr_p,
355     user_addr_t datap);
356 
357 static __inline__ bool
ifbond_flags_if_detaching(ifbond_ref ifb)358 ifbond_flags_if_detaching(ifbond_ref ifb)
359 {
360 	return (ifb->ifb_flags & IFBF_IF_DETACHING) != 0;
361 }
362 
363 static __inline__ void
ifbond_flags_set_if_detaching(ifbond_ref ifb)364 ifbond_flags_set_if_detaching(ifbond_ref ifb)
365 {
366 	ifb->ifb_flags |= IFBF_IF_DETACHING;
367 	return;
368 }
369 
370 static __inline__ bool
ifbond_flags_lladdr(ifbond_ref ifb)371 ifbond_flags_lladdr(ifbond_ref ifb)
372 {
373 	return (ifb->ifb_flags & IFBF_LLADDR) != 0;
374 }
375 
376 static __inline__ bool
ifbond_flags_change_in_progress(ifbond_ref ifb)377 ifbond_flags_change_in_progress(ifbond_ref ifb)
378 {
379 	return (ifb->ifb_flags & IFBF_CHANGE_IN_PROGRESS) != 0;
380 }
381 
382 static __inline__ void
ifbond_flags_set_change_in_progress(ifbond_ref ifb)383 ifbond_flags_set_change_in_progress(ifbond_ref ifb)
384 {
385 	ifb->ifb_flags |= IFBF_CHANGE_IN_PROGRESS;
386 	return;
387 }
388 
389 static __inline__ void
ifbond_flags_clear_change_in_progress(ifbond_ref ifb)390 ifbond_flags_clear_change_in_progress(ifbond_ref ifb)
391 {
392 	ifb->ifb_flags &= ~IFBF_CHANGE_IN_PROGRESS;
393 	return;
394 }
395 
396 static __inline__ bool
ifbond_flags_promisc(ifbond_ref ifb)397 ifbond_flags_promisc(ifbond_ref ifb)
398 {
399 	return (ifb->ifb_flags & IFBF_PROMISC) != 0;
400 }
401 
402 static __inline__ void
ifbond_flags_set_promisc(ifbond_ref ifb)403 ifbond_flags_set_promisc(ifbond_ref ifb)
404 {
405 	ifb->ifb_flags |= IFBF_PROMISC;
406 	return;
407 }
408 
409 static __inline__ void
ifbond_flags_clear_promisc(ifbond_ref ifb)410 ifbond_flags_clear_promisc(ifbond_ref ifb)
411 {
412 	ifb->ifb_flags &= ~IFBF_PROMISC;
413 	return;
414 }
415 
416 /*
417  * bondport_ref->po_flags bits
418  */
419 #define BONDPORT_FLAGS_NTT              0x01
420 #define BONDPORT_FLAGS_READY            0x02
421 #define BONDPORT_FLAGS_SELECTED_CHANGED 0x04
422 #define BONDPORT_FLAGS_MUX_ATTACHED     0x08
423 #define BONDPORT_FLAGS_DISTRIBUTING     0x10
424 #define BONDPORT_FLAGS_UNUSED2          0x20
425 #define BONDPORT_FLAGS_UNUSED3          0x40
426 #define BONDPORT_FLAGS_UNUSED4          0x80
427 
428 static __inline__ void
bondport_flags_set_ntt(bondport_ref p)429 bondport_flags_set_ntt(bondport_ref p)
430 {
431 	p->po_flags |= BONDPORT_FLAGS_NTT;
432 	return;
433 }
434 
435 static __inline__ void
bondport_flags_clear_ntt(bondport_ref p)436 bondport_flags_clear_ntt(bondport_ref p)
437 {
438 	p->po_flags &= ~BONDPORT_FLAGS_NTT;
439 	return;
440 }
441 
442 static __inline__ int
bondport_flags_ntt(bondport_ref p)443 bondport_flags_ntt(bondport_ref p)
444 {
445 	return (p->po_flags & BONDPORT_FLAGS_NTT) != 0;
446 }
447 
448 static __inline__ void
bondport_flags_set_ready(bondport_ref p)449 bondport_flags_set_ready(bondport_ref p)
450 {
451 	p->po_flags |= BONDPORT_FLAGS_READY;
452 	return;
453 }
454 
455 static __inline__ void
bondport_flags_clear_ready(bondport_ref p)456 bondport_flags_clear_ready(bondport_ref p)
457 {
458 	p->po_flags &= ~BONDPORT_FLAGS_READY;
459 	return;
460 }
461 
462 static __inline__ int
bondport_flags_ready(bondport_ref p)463 bondport_flags_ready(bondport_ref p)
464 {
465 	return (p->po_flags & BONDPORT_FLAGS_READY) != 0;
466 }
467 
468 static __inline__ void
bondport_flags_set_selected_changed(bondport_ref p)469 bondport_flags_set_selected_changed(bondport_ref p)
470 {
471 	p->po_flags |= BONDPORT_FLAGS_SELECTED_CHANGED;
472 	return;
473 }
474 
475 static __inline__ void
bondport_flags_clear_selected_changed(bondport_ref p)476 bondport_flags_clear_selected_changed(bondport_ref p)
477 {
478 	p->po_flags &= ~BONDPORT_FLAGS_SELECTED_CHANGED;
479 	return;
480 }
481 
482 static __inline__ int
bondport_flags_selected_changed(bondport_ref p)483 bondport_flags_selected_changed(bondport_ref p)
484 {
485 	return (p->po_flags & BONDPORT_FLAGS_SELECTED_CHANGED) != 0;
486 }
487 
488 static __inline__ void
bondport_flags_set_mux_attached(bondport_ref p)489 bondport_flags_set_mux_attached(bondport_ref p)
490 {
491 	p->po_flags |= BONDPORT_FLAGS_MUX_ATTACHED;
492 	return;
493 }
494 
495 static __inline__ void
bondport_flags_clear_mux_attached(bondport_ref p)496 bondport_flags_clear_mux_attached(bondport_ref p)
497 {
498 	p->po_flags &= ~BONDPORT_FLAGS_MUX_ATTACHED;
499 	return;
500 }
501 
502 static __inline__ int
bondport_flags_mux_attached(bondport_ref p)503 bondport_flags_mux_attached(bondport_ref p)
504 {
505 	return (p->po_flags & BONDPORT_FLAGS_MUX_ATTACHED) != 0;
506 }
507 
508 static __inline__ void
bondport_flags_set_distributing(bondport_ref p)509 bondport_flags_set_distributing(bondport_ref p)
510 {
511 	p->po_flags |= BONDPORT_FLAGS_DISTRIBUTING;
512 	return;
513 }
514 
515 static __inline__ void
bondport_flags_clear_distributing(bondport_ref p)516 bondport_flags_clear_distributing(bondport_ref p)
517 {
518 	p->po_flags &= ~BONDPORT_FLAGS_DISTRIBUTING;
519 	return;
520 }
521 
522 static __inline__ int
bondport_flags_distributing(bondport_ref p)523 bondport_flags_distributing(bondport_ref p)
524 {
525 	return (p->po_flags & BONDPORT_FLAGS_DISTRIBUTING) != 0;
526 }
527 
528 typedef struct bond_globals_s {
529 	struct ifbond_list          ifbond_list;
530 	lacp_system                 system;
531 	lacp_system_priority        system_priority;
532 } * bond_globals_ref;
533 
534 static bond_globals_ref g_bond;
535 
536 /**
537 ** packet_buffer routines
538 ** - thin wrapper for mbuf
539 **/
540 
541 typedef struct mbuf * packet_buffer_ref;
542 
543 static packet_buffer_ref
packet_buffer_allocate(int length)544 packet_buffer_allocate(int length)
545 {
546 	packet_buffer_ref   m;
547 	int                 size;
548 
549 	/* leave room for ethernet header */
550 	size = length + sizeof(struct ether_header);
551 	if (size > (int)MHLEN) {
552 		if (size > (int)MCLBYTES) {
553 			BOND_LOG(LOG_NOTICE, 0,
554 			    "packet_buffer_allocate size %d > max %u",
555 			    size, MCLBYTES);
556 			return NULL;
557 		}
558 		m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
559 	} else {
560 		m = m_gethdr(M_WAITOK, MT_DATA);
561 	}
562 	if (m == NULL) {
563 		return NULL;
564 	}
565 	m->m_len = size;
566 	m->m_pkthdr.len = size;
567 	return m;
568 }
569 
570 static void *__indexable
packet_buffer_byteptr(packet_buffer_ref buf)571 packet_buffer_byteptr(packet_buffer_ref buf)
572 {
573 	return m_mtod_current(buf) + sizeof(struct ether_header);
574 }
575 
576 typedef enum {
577 	LAEventStart,
578 	LAEventTimeout,
579 	LAEventPacket,
580 	LAEventMediaChange,
581 	LAEventSelectedChange,
582 	LAEventPortMoved,
583 	LAEventReady
584 } LAEvent;
585 
586 /**
587 ** Receive machine
588 **/
589 static void
590 bondport_receive_machine(bondport_ref p, LAEvent event,
591     void * event_data);
592 /**
593 ** Periodic Transmission machine
594 **/
595 static void
596 bondport_periodic_transmit_machine(bondport_ref p, LAEvent event,
597     void * event_data);
598 
599 /**
600 ** Transmit machine
601 **/
602 static void *TRANSMIT_MACHINE_TX_IMMEDIATE = __unsafe_forge_single(void *, 1);
603 
604 static void
605 bondport_transmit_machine(bondport_ref p, LAEvent event,
606     void * event_data);
607 
608 /**
609 ** Mux machine
610 **/
611 static void
612 bondport_mux_machine(bondport_ref p, LAEvent event,
613     void * event_data);
614 
615 /**
616 ** bond, LAG
617 **/
618 static void
619 ifbond_activate_LAG(ifbond_ref bond, LAG_ref lag, int active_media);
620 
621 static void
622 ifbond_deactivate_LAG(ifbond_ref bond, LAG_ref lag);
623 
624 static int
625 ifbond_all_ports_ready(ifbond_ref bond);
626 
627 static LAG_ref
628 ifbond_find_best_LAG(ifbond_ref bond, int * active_media);
629 
630 static int
631 LAG_get_aggregatable_port_count(LAG_ref lag, int * active_media);
632 
633 static int
634 ifbond_selection(ifbond_ref bond);
635 
636 static void
637 bond_handle_event(struct ifnet * port_ifp, int event_code);
638 
639 /**
640 ** bondport
641 **/
642 
643 static void
644 bondport_receive_lacpdu(bondport_ref p, lacpdu_ref in_lacpdu_p);
645 
646 static void
647 bondport_slow_proto_transmit(bondport_ref p, packet_buffer_ref buf);
648 
649 static bondport_ref
650 bondport_create(struct ifnet * port_ifp, lacp_port_priority priority,
651     int active, int short_timeout, int * error);
652 static void
653 bondport_start(bondport_ref p);
654 
655 static void
656 bondport_free(bondport_ref p);
657 
658 static int
659 bondport_aggregatable(bondport_ref p);
660 
661 static int
662 bondport_remove_from_LAG(bondport_ref p);
663 
664 static void
665 bondport_set_selected(bondport_ref p, SelectedState s);
666 
667 static int
668 bondport_matches_LAG(bondport_ref p, LAG_ref lag);
669 
670 static void
671 bondport_link_status_changed(bondport_ref p);
672 
673 static void
674 bondport_enable_distributing(bondport_ref p);
675 
676 static void
677 bondport_disable_distributing(bondport_ref p);
678 
679 static __inline__ int
bondport_collecting(bondport_ref p)680 bondport_collecting(bondport_ref p)
681 {
682 	if (p->po_bond->ifb_mode == IF_BOND_MODE_LACP) {
683 		return lacp_actor_partner_state_collecting(p->po_actor_state);
684 	}
685 	return TRUE;
686 }
687 
688 /**
689 ** bond interface/dlil specific routines
690 **/
691 static int bond_clone_create(struct if_clone *, u_int32_t, void *);
692 static int bond_clone_destroy(struct ifnet *);
693 static int bond_output(ifnet_t ifp, mbuf_t list);
694 static int bond_ioctl(struct ifnet *ifp, u_long cmd, void * addr);
695 static int bond_attach_protocol(struct ifnet *ifp);
696 static int bond_detach_protocol(struct ifnet *ifp);
697 static int bond_setmulti(struct ifnet *ifp);
698 static int bond_add_interface(struct ifnet * ifp, struct ifnet * port_ifp);
699 static int bond_remove_interface(ifbond_ref ifb, struct ifnet * port_ifp);
700 static void bond_if_free(struct ifnet * ifp);
701 static void interface_link_event(struct ifnet * ifp, u_int32_t event_code);
702 
703 static struct if_clone bond_cloner = IF_CLONE_INITIALIZER(BONDNAME,
704     bond_clone_create,
705     bond_clone_destroy,
706     0,
707     BOND_MAXUNIT);
708 
709 static int
siocsifmtu(struct ifnet * ifp,int mtu)710 siocsifmtu(struct ifnet * ifp, int mtu)
711 {
712 	struct ifreq        ifr;
713 
714 	bzero(&ifr, sizeof(ifr));
715 	ifr.ifr_mtu = mtu;
716 	return ifnet_ioctl(ifp, 0, SIOCSIFMTU, &ifr);
717 }
718 
719 static int
siocgifdevmtu(struct ifnet * ifp,struct ifdevmtu * ifdm_p)720 siocgifdevmtu(struct ifnet * ifp, struct ifdevmtu * ifdm_p)
721 {
722 	struct ifreq        ifr;
723 	int                 error;
724 
725 	bzero(&ifr, sizeof(ifr));
726 	error = ifnet_ioctl(ifp, 0, SIOCGIFDEVMTU, &ifr);
727 	if (error == 0) {
728 		*ifdm_p = ifr.ifr_devmtu;
729 	}
730 	return error;
731 }
732 
733 static __inline__ void
ether_addr_copy(uint8_t * __sized_by (ETHER_ADDR_LEN)dest,const uint8_t * __sized_by (ETHER_ADDR_LEN)source)734 ether_addr_copy(uint8_t *__sized_by(ETHER_ADDR_LEN) dest,
735     const uint8_t *__sized_by(ETHER_ADDR_LEN) source)
736 {
737 	bcopy(source, dest, ETHER_ADDR_LEN);
738 	return;
739 }
740 
741 static __inline__ void
ifbond_retain(ifbond_ref ifb)742 ifbond_retain(ifbond_ref ifb)
743 {
744 	os_ref_retain(&ifb->ifb_retain_count);
745 }
746 
747 static __inline__ void
ifbond_release(ifbond_ref ifb)748 ifbond_release(ifbond_ref ifb)
749 {
750 	if (os_ref_release(&ifb->ifb_retain_count) != 0) {
751 		return;
752 	}
753 	BOND_LOG(LOG_DEBUG, BD_DBGF_LIFECYCLE, "%s", ifb->ifb_name);
754 	if (ifb->ifb_ifma_slow_proto != NULL) {
755 		BOND_LOG(LOG_DEBUG, BD_DBGF_LIFECYCLE,
756 		    "%s: removing multicast", ifb->ifb_name);
757 		(void) if_delmulti_anon(ifb->ifb_ifma_slow_proto->ifma_ifp,
758 		    ifb->ifb_ifma_slow_proto->ifma_addr);
759 		IFMA_REMREF(ifb->ifb_ifma_slow_proto);
760 	}
761 	kfree_type_counted_by(bondport_ref, ifb->ifb_distributing_max,
762 	    ifb->ifb_distributing_array);
763 	kfree_type(struct ifbond_s, ifb);
764 }
765 
766 /*
767  * Function: ifbond_wait
768  * Purpose:
769  *   Allows a single thread to gain exclusive access to the ifbond
770  *   data structure.  Some operations take a long time to complete,
771  *   and some have side-effects that we can't predict.  Holding the
772  *   bond_lock() across such operations is not possible.
773  *
774  *   For example:
775  *   1) The SIOCSIFLLADDR ioctl takes a long time (several seconds) to
776  *      complete.  Simply holding the bond_lock() would freeze all other
777  *      data structure accesses during that time.
778  *   2) When we attach our protocol to the interface, a dlil event is
779  *      generated and invokes our bond_event() function.  bond_event()
780  *      needs to take the bond_lock(), but we're already holding it, so
781  *      we're deadlocked against ourselves.
782  * Notes:
783  *   Before calling, you must be holding the bond_lock and have taken
784  *   a reference on the ifbond_ref.
785  */
786 static void
ifbond_wait(ifbond_ref ifb,const char * msg)787 ifbond_wait(ifbond_ref ifb, const char * msg)
788 {
789 	int         waited = 0;
790 
791 	/* other add/remove in progress */
792 	while (ifbond_flags_change_in_progress(ifb)) {
793 		BOND_LOG(LOG_DEBUG, BD_DBGF_LIFECYCLE, "%s: %s msleep",
794 		    ifb->ifb_name, msg);
795 		waited = 1;
796 		(void)msleep(ifb, &bond_lck_mtx, PZERO, msg, 0);
797 	}
798 	/* prevent other bond list remove/add from taking place */
799 	ifbond_flags_set_change_in_progress(ifb);
800 	if (waited) {
801 		BOND_LOG(LOG_DEBUG, BD_DBGF_LIFECYCLE,
802 		    "%s: %s woke up", ifb->ifb_name, msg);
803 	}
804 	return;
805 }
806 
807 /*
808  * Function: ifbond_signal
809  * Purpose:
810  *   Allows the thread that previously invoked ifbond_wait() to
811  *   give up exclusive access to the ifbond data structure, and wake up
812  *   any other threads waiting to access
813  * Notes:
814  *   Before calling, you must be holding the bond_lock and have taken
815  *   a reference on the ifbond_ref.
816  */
817 static void
ifbond_signal(ifbond_ref ifb,const char * msg)818 ifbond_signal(ifbond_ref ifb, const char * msg)
819 {
820 	ifbond_flags_clear_change_in_progress(ifb);
821 	wakeup((caddr_t)ifb);
822 	BOND_LOG(LOG_DEBUG, BD_DBGF_LIFECYCLE, "%s: %s wakeup",
823 	    ifb->ifb_name, msg);
824 	return;
825 }
826 
827 /**
828 ** Media information
829 **/
830 
831 static int
link_speed(int active)832 link_speed(int active)
833 {
834 	switch (IFM_SUBTYPE(active)) {
835 	case IFM_AUTO:
836 	case IFM_MANUAL:
837 	case IFM_NONE:
838 		return 0;
839 	case IFM_10_T:
840 	case IFM_10_2:
841 	case IFM_10_5:
842 	case IFM_10_STP:
843 	case IFM_10_FL:
844 		return 10;
845 	case IFM_100_TX:
846 	case IFM_100_FX:
847 	case IFM_100_T4:
848 	case IFM_100_VG:
849 	case IFM_100_T2:
850 		return 100;
851 	case IFM_1000_SX:
852 	case IFM_1000_LX:
853 	case IFM_1000_CX:
854 	case IFM_1000_TX:
855 	case IFM_1000_CX_SGMII:
856 	case IFM_1000_KX:
857 		return 1000;
858 	case IFM_HPNA_1:
859 		return 1;
860 	default:
861 	/* assume that new defined types are going to be at least 10GigE */
862 	case IFM_10G_T:
863 	case IFM_10G_SR:
864 	case IFM_10G_LR:
865 	case IFM_10G_KX4:
866 	case IFM_10G_KR:
867 	case IFM_10G_CR1:
868 	case IFM_10G_ER:
869 		return 10000;
870 	case IFM_2500_T:
871 		return 2500;
872 	case IFM_5000_T:
873 		return 5000;
874 	case IFM_20G_KR2:
875 		return 20000;
876 	case IFM_25G_CR:
877 	case IFM_25G_KR:
878 	case IFM_25G_SR:
879 	case IFM_25G_LR:
880 		return 25000;
881 	case IFM_40G_CR4:
882 	case IFM_40G_SR4:
883 	case IFM_40G_LR4:
884 	case IFM_40G_KR4:
885 		return 40000;
886 	case IFM_50G_CR2:
887 	case IFM_50G_KR2:
888 	case IFM_50G_SR2:
889 	case IFM_50G_LR2:
890 		return 50000;
891 	case IFM_56G_R4:
892 		return 56000;
893 	case IFM_100G_CR4:
894 	case IFM_100G_SR4:
895 	case IFM_100G_KR4:
896 	case IFM_100G_LR4:
897 		return 100000;
898 	}
899 }
900 
901 static __inline__ int
media_active(const struct media_info * mi)902 media_active(const struct media_info * mi)
903 {
904 	if ((mi->mi_status & IFM_AVALID) == 0) {
905 		return 1;
906 	}
907 	return (mi->mi_status & IFM_ACTIVE) != 0;
908 }
909 
910 static __inline__ int
media_full_duplex(const struct media_info * mi)911 media_full_duplex(const struct media_info * mi)
912 {
913 	return (mi->mi_active & IFM_FDX) != 0;
914 }
915 
916 static __inline__ int
media_type_unknown(const struct media_info * mi)917 media_type_unknown(const struct media_info * mi)
918 {
919 	int unknown;
920 
921 	switch (IFM_SUBTYPE(mi->mi_active)) {
922 	case IFM_AUTO:
923 	case IFM_MANUAL:
924 	case IFM_NONE:
925 		unknown = 1;
926 		break;
927 	default:
928 		unknown = 0;
929 		break;
930 	}
931 	return unknown;
932 }
933 
934 static __inline__ int
media_ok(const struct media_info * mi)935 media_ok(const struct media_info * mi)
936 {
937 	return media_full_duplex(mi) || media_type_unknown(mi);
938 }
939 
940 static __inline__ int
media_speed(const struct media_info * mi)941 media_speed(const struct media_info * mi)
942 {
943 	return link_speed(mi->mi_active);
944 }
945 
946 static struct media_info
interface_media_info(struct ifnet * ifp)947 interface_media_info(struct ifnet * ifp)
948 {
949 	struct ifmediareq   ifmr;
950 	struct media_info   mi;
951 
952 	bzero(&mi, sizeof(mi));
953 	bzero(&ifmr, sizeof(ifmr));
954 	if (ifnet_ioctl(ifp, 0, SIOCGIFMEDIA, &ifmr) == 0) {
955 		if (ifmr.ifm_count != 0) {
956 			mi.mi_status = ifmr.ifm_status;
957 			mi.mi_active = ifmr.ifm_active;
958 		}
959 	}
960 	return mi;
961 }
962 
963 static int
if_siflladdr(struct ifnet * ifp,const struct ether_addr * ea_p)964 if_siflladdr(struct ifnet * ifp, const struct ether_addr * ea_p)
965 {
966 	struct ifreq        ifr;
967 
968 	/*
969 	 * XXX setting the sa_len to ETHER_ADDR_LEN is wrong, but the driver
970 	 * currently expects it that way
971 	 */
972 	ifr.ifr_addr.sa_family = AF_UNSPEC;
973 	ifr.ifr_addr.sa_len = ETHER_ADDR_LEN;
974 	ether_addr_copy((uint8_t *__indexable)ifr.ifr_addr.sa_data, ea_p->octet);
975 	return ifnet_ioctl(ifp, 0, SIOCSIFLLADDR, &ifr);
976 }
977 
978 /**
979 ** bond_globals
980 **/
981 static bond_globals_ref
bond_globals_create(lacp_system_priority sys_pri,lacp_system_ref sys)982 bond_globals_create(lacp_system_priority sys_pri,
983     lacp_system_ref sys)
984 {
985 	bond_globals_ref    b;
986 
987 	b = kalloc_type(struct bond_globals_s, Z_WAITOK | Z_ZERO | Z_NOFAIL);
988 	TAILQ_INIT(&b->ifbond_list);
989 	b->system = *sys;
990 	b->system_priority = sys_pri;
991 	return b;
992 }
993 
994 static int
bond_globals_init(void)995 bond_globals_init(void)
996 {
997 	bond_globals_ref    b;
998 	int                 i;
999 	struct ifnet *      ifp;
1000 
1001 	bond_assert_lock_not_held();
1002 
1003 	if (g_bond != NULL) {
1004 		return 0;
1005 	}
1006 
1007 	/*
1008 	 * use en0's ethernet address as the system identifier, and if it's not
1009 	 * there, use en1 .. en3
1010 	 */
1011 	ifp = NULL;
1012 	for (i = 0; i < 4; i++) {
1013 		char            ifname[IFNAMSIZ + 1];
1014 		snprintf(ifname, sizeof(ifname), "en%d", i);
1015 		ifp = ifunit(__unsafe_null_terminated_from_indexable(ifname));
1016 		if (ifp != NULL) {
1017 			break;
1018 		}
1019 	}
1020 	b = NULL;
1021 	if (ifp != NULL) {
1022 		b = bond_globals_create(0x8000, (lacp_system_ref)IF_LLADDR(ifp));
1023 	}
1024 	bond_lock();
1025 	if (g_bond != NULL) {
1026 		bond_unlock();
1027 		kfree_type(struct bond_globals_s, b);
1028 		return 0;
1029 	}
1030 	g_bond = b;
1031 	bond_unlock();
1032 	if (ifp == NULL) {
1033 		return ENXIO;
1034 	}
1035 	if (b == NULL) {
1036 		return ENOMEM;
1037 	}
1038 	return 0;
1039 }
1040 
1041 /*
1042  * bpf tap
1043  */
1044 static inline void *__indexable
get_bpf_header(mbuf_t m,struct ether_header * eh_p,struct ether_vlan_header * evl_p,size_t * header_len)1045 get_bpf_header(mbuf_t m, struct ether_header * eh_p,
1046     struct ether_vlan_header * evl_p, size_t * header_len)
1047 {
1048 	void *header;
1049 
1050 	/* no VLAN tag, just use the ethernet header */
1051 	if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
1052 		header = (struct ether_header *__bidi_indexable)eh_p;
1053 		*header_len = sizeof(*eh_p);
1054 		goto done;
1055 	}
1056 
1057 	/* has VLAN tag, populate the ether VLAN header */
1058 	bcopy(eh_p, evl_p,
1059 	    offsetof(struct ether_header, ether_type));   /* dst+src ether */
1060 	evl_p->evl_encap_proto = htons(ETHERTYPE_VLAN);   /* VLAN encap */
1061 	evl_p->evl_tag = htons(m->m_pkthdr.vlan_tag);     /* tag */
1062 	evl_p->evl_proto = eh_p->ether_type;              /* proto */
1063 	*header_len = sizeof(*evl_p);
1064 	header = (struct ether_vlan_header *__bidi_indexable)evl_p;
1065 
1066 done:
1067 	return header;
1068 }
1069 
1070 typedef void (*_tap_func)(ifnet_t interface, u_int32_t dlt, mbuf_t packet,
1071     void *__sized_by(header_len) header, size_t header_len);
1072 
1073 static void
bond_bpf_tap_common(ifnet_t ifp,mbuf_t m,struct ether_header * eh_p,_tap_func func)1074 bond_bpf_tap_common(ifnet_t ifp, mbuf_t m, struct ether_header * eh_p,
1075     _tap_func func)
1076 {
1077 	struct ether_vlan_header        evl;
1078 	size_t                          header_len;
1079 	void *                          header;
1080 
1081 	header = get_bpf_header(m, eh_p, &evl, &header_len);
1082 	(*func)(ifp, DLT_EN10MB, m, header, header_len);
1083 }
1084 
1085 static inline void
bond_bpf_tap_in(ifnet_t ifp,mbuf_t m,struct ether_header * eh_p)1086 bond_bpf_tap_in(ifnet_t ifp, mbuf_t m, struct ether_header * eh_p)
1087 {
1088 	bond_bpf_tap_common(ifp, m, eh_p, bpf_tap_in);
1089 }
1090 
1091 static inline void
bond_bpf_tap_out(ifnet_t ifp,mbuf_t m,struct ether_header * eh_p)1092 bond_bpf_tap_out(ifnet_t ifp, mbuf_t m, struct ether_header * eh_p)
1093 {
1094 	bond_bpf_tap_common(ifp, m, eh_p, bpf_tap_out);
1095 }
1096 
1097 /*
1098  * Function: bond_setmulti
1099  * Purpose:
1100  *   Enable multicast reception on "our" interface by enabling multicasts on
1101  *   each of the member ports.
1102  */
1103 static int
bond_setmulti(struct ifnet * ifp)1104 bond_setmulti(struct ifnet * ifp)
1105 {
1106 	ifbond_ref          ifb;
1107 	int                 error;
1108 	int                 result = 0;
1109 	bondport_ref        p;
1110 
1111 	bond_lock();
1112 	ifb = ifnet_softc(ifp);
1113 	if (ifb == NULL || ifbond_flags_if_detaching(ifb)
1114 	    || TAILQ_EMPTY(&ifb->ifb_port_list)) {
1115 		bond_unlock();
1116 		return 0;
1117 	}
1118 	ifbond_retain(ifb);
1119 	ifbond_wait(ifb, "bond_setmulti");
1120 
1121 	if (ifbond_flags_if_detaching(ifb)) {
1122 		/* someone destroyed the bond while we were waiting */
1123 		result = EBUSY;
1124 		goto signal_done;
1125 	}
1126 	bond_unlock();
1127 
1128 	/* ifbond_wait() let's us safely walk the list without holding the lock */
1129 	TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
1130 		struct ifnet *  port_ifp = p->po_ifp;
1131 
1132 		error = multicast_list_program(&p->po_multicast,
1133 		    ifp, port_ifp);
1134 		if (error != 0) {
1135 			BOND_LOG(LOG_NOTICE, 0,
1136 			    "%s: multicast_list_program(%s) failed, %d",
1137 			    ifb->ifb_name, port_ifp->if_xname, error);
1138 			result = error;
1139 		}
1140 	}
1141 	bond_lock();
1142 signal_done:
1143 	ifbond_signal(ifb, __func__);
1144 	bond_unlock();
1145 	ifbond_release(ifb);
1146 	return result;
1147 }
1148 
1149 static int
bond_clone_attach(void)1150 bond_clone_attach(void)
1151 {
1152 	int error;
1153 
1154 	if ((error = if_clone_attach(&bond_cloner)) != 0) {
1155 		return error;
1156 	}
1157 	return 0;
1158 }
1159 
1160 static int
ifbond_add_slow_proto_multicast(ifbond_ref ifb)1161 ifbond_add_slow_proto_multicast(ifbond_ref ifb)
1162 {
1163 	int                         error;
1164 	struct ifmultiaddr *__single ifma = NULL;
1165 	struct sockaddr_dl          sdl;
1166 
1167 	bond_assert_lock_not_held();
1168 
1169 	SOCKADDR_ZERO(&sdl, sizeof(sdl));
1170 	sdl.sdl_len = sizeof(sdl);
1171 	sdl.sdl_family = AF_LINK;
1172 	sdl.sdl_type = IFT_ETHER;
1173 	sdl.sdl_nlen = 0;
1174 	sdl.sdl_alen = sizeof(slow_proto_multicast);
1175 	bcopy(&slow_proto_multicast, sdl.sdl_data, sizeof(slow_proto_multicast));
1176 	error = if_addmulti_anon(ifb->ifb_ifp, SA(&sdl), &ifma);
1177 	if (error == 0) {
1178 		ifb->ifb_ifma_slow_proto = ifma;
1179 	}
1180 	return error;
1181 }
1182 
1183 static int
bond_clone_create(struct if_clone * ifc,u_int32_t unit,__unused void * params)1184 bond_clone_create(struct if_clone * ifc, u_int32_t unit, __unused void *params)
1185 {
1186 	int                                             error;
1187 	ifbond_ref                                      ifb;
1188 	ifnet_ref_t                                     ifp;
1189 	struct ifnet_init_eparams       bond_init;
1190 
1191 	error = bond_globals_init();
1192 	if (error != 0) {
1193 		return error;
1194 	}
1195 
1196 	ifb = kalloc_type(struct ifbond_s, Z_WAITOK_ZERO_NOFAIL);
1197 	os_ref_init(&ifb->ifb_retain_count, NULL);
1198 	TAILQ_INIT(&ifb->ifb_port_list);
1199 	TAILQ_INIT(&ifb->ifb_lag_list);
1200 	ifb->ifb_key = unit + 1;
1201 
1202 	/* use the interface name as the unique id for ifp recycle */
1203 	if ((u_int32_t)snprintf(ifb->ifb_name, sizeof(ifb->ifb_name), "%s%d",
1204 	    ifc->ifc_name, unit) >= sizeof(ifb->ifb_name)) {
1205 		ifbond_release(ifb);
1206 		return EINVAL;
1207 	}
1208 
1209 	bzero(&bond_init, sizeof(bond_init));
1210 	bond_init.ver = IFNET_INIT_CURRENT_VERSION;
1211 	bond_init.len = sizeof(bond_init);
1212 	bond_init.flags = IFNET_INIT_LEGACY;
1213 	bond_init.uniqueid_len = strbuflen(ifb->ifb_name);
1214 	bond_init.uniqueid = ifb->ifb_name;
1215 	bond_init.name = __unsafe_null_terminated_from_indexable(ifc->ifc_name);
1216 	bond_init.unit = unit;
1217 	bond_init.family = IFNET_FAMILY_BOND;
1218 	bond_init.type = IFT_IEEE8023ADLAG;
1219 	bond_init.output = bond_output;
1220 	bond_init.demux = ether_demux;
1221 	bond_init.add_proto = ether_add_proto;
1222 	bond_init.del_proto = ether_del_proto;
1223 	bond_init.check_multi = ether_check_multi;
1224 	bond_init.framer_extended = ether_frameout_extended;
1225 	bond_init.ioctl = bond_ioctl;
1226 	bond_init.detach = bond_if_free;
1227 	bond_init.broadcast_addr = etherbroadcastaddr;
1228 	bond_init.broadcast_len = ETHER_ADDR_LEN;
1229 	bond_init.softc = ifb;
1230 	error = ifnet_allocate_extended(&bond_init, &ifp);
1231 
1232 	if (error) {
1233 		ifbond_release(ifb);
1234 		return error;
1235 	}
1236 
1237 	ifb->ifb_ifp = ifp;
1238 	ifnet_set_offload(ifp, 0);
1239 	ifnet_set_addrlen(ifp, ETHER_ADDR_LEN); /* XXX ethernet specific */
1240 	ifnet_set_flags(ifp, IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX, 0xffff);
1241 	ifnet_set_mtu(ifp, ETHERMTU);
1242 
1243 	error = ifnet_attach(ifp, NULL);
1244 	if (error != 0) {
1245 		ifnet_release(ifp);
1246 		ifbond_release(ifb);
1247 		return error;
1248 	}
1249 	error = ifbond_add_slow_proto_multicast(ifb);
1250 	if (error != 0) {
1251 		BOND_LOG(LOG_NOTICE, 0,
1252 		    "%s: failed to add slow_proto multicast, %d",
1253 		    ifb->ifb_name, error);
1254 	}
1255 
1256 	/* attach as ethernet */
1257 	bpfattach(ifp, DLT_EN10MB, sizeof(struct ether_header));
1258 
1259 	bond_lock();
1260 	TAILQ_INSERT_HEAD(&g_bond->ifbond_list, ifb, ifb_bond_list);
1261 	bond_unlock();
1262 
1263 	return 0;
1264 }
1265 
1266 static void
bond_remove_all_interfaces(ifbond_ref ifb)1267 bond_remove_all_interfaces(ifbond_ref ifb)
1268 {
1269 	bondport_ref        p;
1270 
1271 	bond_assert_lock_held();
1272 
1273 	/*
1274 	 * do this in reverse order to avoid re-programming the mac address
1275 	 * as each head interface is removed
1276 	 */
1277 	while ((p = TAILQ_LAST(&ifb->ifb_port_list, port_list)) != NULL) {
1278 		bond_remove_interface(ifb, p->po_ifp);
1279 	}
1280 	return;
1281 }
1282 
1283 static void
bond_remove(ifbond_ref ifb)1284 bond_remove(ifbond_ref ifb)
1285 {
1286 	bond_assert_lock_held();
1287 	ifbond_flags_set_if_detaching(ifb);
1288 	TAILQ_REMOVE(&g_bond->ifbond_list, ifb, ifb_bond_list);
1289 	bond_remove_all_interfaces(ifb);
1290 	return;
1291 }
1292 
1293 static void
bond_if_detach(struct ifnet * ifp)1294 bond_if_detach(struct ifnet * ifp)
1295 {
1296 	int         error;
1297 
1298 	error = ifnet_detach(ifp);
1299 	if (error != 0) {
1300 		BOND_LOG(LOG_NOTICE, 0, "%s: ifnet_detach failed, %d",
1301 		    ifp->if_xname, error);
1302 	}
1303 	return;
1304 }
1305 
1306 static int
bond_clone_destroy(struct ifnet * ifp)1307 bond_clone_destroy(struct ifnet * ifp)
1308 {
1309 	ifbond_ref ifb;
1310 
1311 	bond_lock();
1312 	ifb = ifnet_softc(ifp);
1313 	if (ifb == NULL || ifnet_type(ifp) != IFT_IEEE8023ADLAG) {
1314 		bond_unlock();
1315 		return 0;
1316 	}
1317 	if (ifbond_flags_if_detaching(ifb)) {
1318 		bond_unlock();
1319 		return 0;
1320 	}
1321 	bond_remove(ifb);
1322 	bond_unlock();
1323 	bond_if_detach(ifp);
1324 	return 0;
1325 }
1326 
1327 static uint32_t
ether_header_hash(struct ether_header * eh_p)1328 ether_header_hash(struct ether_header * eh_p)
1329 {
1330 	uint32_t    h;
1331 
1332 	/* get 32-bits from destination ether and ether type */
1333 	h = (*((uint16_t *)&eh_p->ether_dhost[4]) << 16)
1334 	    | eh_p->ether_type;
1335 	h ^= *((uint32_t *)&eh_p->ether_dhost[0]);
1336 	return h;
1337 }
1338 
1339 #define BOND_HASH_L3_HEADER     0
1340 #if BOND_HASH_L3_HEADER
1341 static struct mbuf *
S_mbuf_skip_to_offset(struct mbuf * m,int32_t * offset)1342 S_mbuf_skip_to_offset(struct mbuf * m, int32_t * offset)
1343 {
1344 	int                 len;
1345 
1346 	len = m->m_len;
1347 	while (*offset >= len) {
1348 		*offset -= len;
1349 		m = m->m_next;
1350 		if (m == NULL) {
1351 			break;
1352 		}
1353 		len = m->m_len;
1354 	}
1355 	return m;
1356 }
1357 
1358 #if BYTE_ORDER == BIG_ENDIAN
1359 static __inline__ uint32_t
make_uint32(u_char c0,u_char c1,u_char c2,u_char c3)1360 make_uint32(u_char c0, u_char c1, u_char c2, u_char c3)
1361 {
1362 	return ((uint32_t)c0 << 24) | ((uint32_t)c1 << 16)
1363 	       | ((uint32_t)c2 << 8) | (uint32_t)c3;
1364 }
1365 #else /* BYTE_ORDER == LITTLE_ENDIAN */
1366 static __inline__ uint32_t
make_uint32(u_char c0,u_char c1,u_char c2,u_char c3)1367 make_uint32(u_char c0, u_char c1, u_char c2, u_char c3)
1368 {
1369 	return ((uint32_t)c3 << 24) | ((uint32_t)c2 << 16)
1370 	       | ((uint32_t)c1 << 8) | (uint32_t)c0;
1371 }
1372 #endif /* BYTE_ORDER == LITTLE_ENDIAN */
1373 
1374 static int
S_mbuf_copy_uint32(struct mbuf * m,int32_t offset,uint32_t * val)1375 S_mbuf_copy_uint32(struct mbuf * m, int32_t offset, uint32_t * val)
1376 {
1377 	struct mbuf *       current;
1378 	u_char *            current_data;
1379 	struct mbuf *       next;
1380 	u_char *            next_data;
1381 	int                 space_current;
1382 
1383 	current = S_mbuf_skip_to_offset(m, &offset);
1384 	if (current == NULL) {
1385 		return 1;
1386 	}
1387 	current_data = mtod(current, u_char *) + offset;
1388 	space_current = current->m_len - offset;
1389 	if (space_current >= (int)sizeof(uint32_t)) {
1390 		*val = *((uint32_t *)current_data);
1391 		return 0;
1392 	}
1393 	next = current->m_next;
1394 	if (next == NULL || (next->m_len + space_current) < (int)sizeof(uint32_t)) {
1395 		return 1;
1396 	}
1397 	next_data = mtod(next, u_char *);
1398 	switch (space_current) {
1399 	case 1:
1400 		*val = make_uint32(current_data[0], next_data[0],
1401 		    next_data[1], next_data[2]);
1402 		break;
1403 	case 2:
1404 		*val = make_uint32(current_data[0], current_data[1],
1405 		    next_data[0], next_data[1]);
1406 		break;
1407 	default:
1408 		*val = make_uint32(current_data[0], current_data[1],
1409 		    current_data[2], next_data[0]);
1410 		break;
1411 	}
1412 	return 0;
1413 }
1414 
1415 #define IP_SRC_OFFSET (offsetof(struct ip, ip_src) - offsetof(struct ip, ip_p))
1416 #define IP_DST_OFFSET (offsetof(struct ip, ip_dst) - offsetof(struct ip, ip_p))
1417 
1418 static uint32_t
ip_header_hash(struct mbuf * m)1419 ip_header_hash(struct mbuf * m)
1420 {
1421 	u_char *            data;
1422 	struct in_addr      ip_dst;
1423 	struct in_addr      ip_src;
1424 	u_char              ip_p;
1425 	int32_t             offset;
1426 	struct mbuf *       orig_m = m;
1427 
1428 	/* find the IP protocol field relative to the start of the packet */
1429 	offset = offsetof(struct ip, ip_p) + sizeof(struct ether_header);
1430 	m = S_mbuf_skip_to_offset(m, &offset);
1431 	if (m == NULL || m->m_len < 1) {
1432 		goto bad_ip_packet;
1433 	}
1434 	data = mtod(m, u_char *) + offset;
1435 	ip_p = *data;
1436 
1437 	/* find the IP src relative to the IP protocol */
1438 	if ((m->m_len - offset)
1439 	    >= (int)(IP_SRC_OFFSET + sizeof(struct in_addr) * 2)) {
1440 		/* this should be the normal case */
1441 		ip_src = *(struct in_addr *)(data + IP_SRC_OFFSET);
1442 		ip_dst = *(struct in_addr *)(data + IP_DST_OFFSET);
1443 	} else {
1444 		if (S_mbuf_copy_uint32(m, offset + IP_SRC_OFFSET,
1445 		    (uint32_t *)&ip_src.s_addr)) {
1446 			goto bad_ip_packet;
1447 		}
1448 		if (S_mbuf_copy_uint32(m, offset + IP_DST_OFFSET,
1449 		    (uint32_t *)&ip_dst.s_addr)) {
1450 			goto bad_ip_packet;
1451 		}
1452 	}
1453 	return ntohl(ip_dst.s_addr) ^ ntohl(ip_src.s_addr) ^ ((uint32_t)ip_p);
1454 
1455 bad_ip_packet:
1456 	return ether_header_hash(mtod(orig_m, struct ether_header *));
1457 }
1458 
1459 #define IP6_ADDRS_LEN   (sizeof(struct in6_addr) * 2)
1460 static uint32_t
ipv6_header_hash(struct mbuf * m)1461 ipv6_header_hash(struct mbuf * m)
1462 {
1463 	u_char *            data;
1464 	int                 i;
1465 	int32_t             offset;
1466 	struct mbuf *       orig_m = m;
1467 	uint32_t *          scan;
1468 	uint32_t            val;
1469 
1470 	/* find the IP protocol field relative to the start of the packet */
1471 	offset = offsetof(struct ip6_hdr, ip6_src) + sizeof(struct ether_header);
1472 	m = S_mbuf_skip_to_offset(m, &offset);
1473 	if (m == NULL) {
1474 		goto bad_ipv6_packet;
1475 	}
1476 	data = mtod(m, u_char *) + offset;
1477 	val = 0;
1478 	if ((m->m_len - offset) >= (int)IP6_ADDRS_LEN) {
1479 		/* this should be the normal case */
1480 		for (i = 0, scan = (uint32_t *)data;
1481 		    i < (int)(IP6_ADDRS_LEN / sizeof(uint32_t));
1482 		    i++, scan++) {
1483 			val ^= *scan;
1484 		}
1485 	} else {
1486 		for (i = 0; i < (int)(IP6_ADDRS_LEN / sizeof(uint32_t)); i++) {
1487 			uint32_t    tmp;
1488 			if (S_mbuf_copy_uint32(m, offset + i * sizeof(uint32_t),
1489 			    (uint32_t *)&tmp)) {
1490 				goto bad_ipv6_packet;
1491 			}
1492 			val ^= tmp;
1493 		}
1494 	}
1495 	return ntohl(val);
1496 
1497 bad_ipv6_packet:
1498 	return ether_header_hash(mtod(orig_m, struct ether_header *));
1499 }
1500 
1501 #endif /* BOND_HASH_L3_HEADER */
1502 
1503 static void
_mbuf_adjust_pkthdr_and_data(mbuf_t m,int len)1504 _mbuf_adjust_pkthdr_and_data(mbuf_t m, int len)
1505 {
1506 	mbuf_setdata(m, mtodo(m, len), mbuf_len(m) - len);
1507 	mbuf_pkthdr_adjustlen(m, -len);
1508 }
1509 
1510 static uint32_t
get_packet_hash(mbuf_t m)1511 get_packet_hash(mbuf_t m)
1512 {
1513 	uint32_t        flow_hash;
1514 
1515 	if (m->m_pkthdr.pkt_flowid != 0) {
1516 		flow_hash = m->m_pkthdr.pkt_flowid;
1517 	} else {
1518 		struct ether_header *   eh_p;
1519 
1520 		eh_p = mtod(m, struct ether_header *);
1521 #if BOND_HASH_L3_HEADER
1522 		switch (ntohs(eh_p->ether_type)) {
1523 		case ETHERTYPE_IP:
1524 			flow_hash = ip_header_hash(m);
1525 			break;
1526 		case ETHERTYPE_IPV6:
1527 			flow_hash = ipv6_header_hash(m);
1528 			break;
1529 		default:
1530 			flow_hash = ether_header_hash(eh_p);
1531 			break;
1532 		}
1533 #else /* BOND_HASH_L3_HEADER */
1534 		flow_hash = ether_header_hash(eh_p);
1535 #endif /* BOND_HASH_L3_HEADER */
1536 	}
1537 	return flow_hash;
1538 }
1539 
1540 static ifnet_t
bond_get_port_ifp(ifnet_t ifp,uint32_t hash)1541 bond_get_port_ifp(ifnet_t ifp, uint32_t hash)
1542 {
1543 	uint32_t        port_index;
1544 	ifbond_ref      ifb;
1545 	ifnet_t         port_ifp = NULL;
1546 
1547 	bond_lock();
1548 	ifb = ifnet_softc(ifp);
1549 	if (ifb == NULL || ifbond_flags_if_detaching(ifb)
1550 	    || ifb->ifb_distributing_count == 0) {
1551 		goto done;
1552 	}
1553 	port_index = hash % ifb->ifb_distributing_count;
1554 	port_ifp = ifb->ifb_distributing_array[port_index]->po_ifp;
1555 
1556 done:
1557 	bond_unlock();
1558 	return port_ifp;
1559 }
1560 
1561 static int
bond_output(ifnet_t ifp,mbuf_t m)1562 bond_output(ifnet_t ifp, mbuf_t m)
1563 {
1564 	struct flowadv  adv = { .code = FADV_SUCCESS };
1565 	int             error = 0;
1566 	int             flags;
1567 	uint32_t        hash;
1568 	uint32_t        len;
1569 	int             log_level;
1570 	ifnet_t         port_ifp;
1571 
1572 	VERIFY((m->m_flags & M_PKTHDR) != 0);
1573 	hash = get_packet_hash(m);
1574 	port_ifp = bond_get_port_ifp(ifp, hash);
1575 	if (port_ifp == NULL) {
1576 		m_freem(m);
1577 		goto done;
1578 	}
1579 	if (ifp->if_bpf != NULL) {
1580 		struct ether_header *   eh_p;
1581 
1582 		eh_p = mtod(m, struct ether_header *);
1583 		_mbuf_adjust_pkthdr_and_data(m, ETHER_HDR_LEN);
1584 		bond_bpf_tap_out(ifp, m, eh_p);
1585 		_mbuf_adjust_pkthdr_and_data(m, -ETHER_HDR_LEN);
1586 	}
1587 	len = m->m_pkthdr.len;
1588 	if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) != 0) {
1589 		len += ETHER_VLAN_ENCAP_LEN;
1590 	}
1591 	ifnet_stat_increment_out(ifp, 1, len, 0);
1592 	flags = DLIL_OUTPUT_FLAGS_RAW;
1593 	error = dlil_output(port_ifp, PF_BOND, m, NULL, NULL, flags, &adv);
1594 	log_level = (error != 0) ? LOG_NOTICE : LOG_DEBUG;
1595 	BOND_LOG(log_level, BD_DBGF_OUTPUT, "%s: %s bytes %d, error=%d",
1596 	    ifp->if_xname, port_ifp->if_xname, len, error);
1597 	if (error == 0) {
1598 		if (adv.code == FADV_FLOW_CONTROLLED) {
1599 			error = EQFULL;
1600 		} else if (adv.code == FADV_SUSPENDED) {
1601 			error = EQSUSPENDED;
1602 		}
1603 	}
1604 done:
1605 	return error;
1606 }
1607 
1608 static bondport_ref
ifbond_lookup_port(ifbond_ref ifb,struct ifnet * port_ifp)1609 ifbond_lookup_port(ifbond_ref ifb, struct ifnet * port_ifp)
1610 {
1611 	bondport_ref        p;
1612 	TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
1613 		if (p->po_ifp == port_ifp) {
1614 			return p;
1615 		}
1616 	}
1617 	return NULL;
1618 }
1619 
1620 static bondport_ref
bond_lookup_port(struct ifnet * port_ifp)1621 bond_lookup_port(struct ifnet * port_ifp)
1622 {
1623 	ifbond_ref          ifb;
1624 	bondport_ref        port;
1625 
1626 	TAILQ_FOREACH(ifb, &g_bond->ifbond_list, ifb_bond_list) {
1627 		port = ifbond_lookup_port(ifb, port_ifp);
1628 		if (port != NULL) {
1629 			return port;
1630 		}
1631 	}
1632 	return NULL;
1633 }
1634 
1635 static void
bond_receive_lacpdu(struct mbuf * m,struct ifnet * port_ifp)1636 bond_receive_lacpdu(struct mbuf * m, struct ifnet * port_ifp)
1637 {
1638 	struct ifnet *              bond_ifp = NULL;
1639 	ifbond_ref                  ifb;
1640 	int                         event_code = 0;
1641 	bool                        need_link_update = false;
1642 	bondport_ref                p;
1643 
1644 	BOND_LOG(LOG_DEBUG, BD_DBGF_LACP, "%s", port_ifp->if_xname);
1645 
1646 	bond_lock();
1647 	if ((ifnet_eflags(port_ifp) & IFEF_BOND) == 0) {
1648 		goto done;
1649 	}
1650 	p = bond_lookup_port(port_ifp);
1651 	if (p == NULL) {
1652 		goto done;
1653 	}
1654 	if (p->po_enabled == 0) {
1655 		goto done;
1656 	}
1657 	ifb = p->po_bond;
1658 	if (ifb->ifb_mode != IF_BOND_MODE_LACP) {
1659 		goto done;
1660 	}
1661 	/*
1662 	 * Work-around for rdar://problem/51372042
1663 	 * Sometimes, the link comes up but the driver doesn't report the
1664 	 * negotiated medium at that time. When we receive an LACPDU packet,
1665 	 * and the medium is unknown, force a link status check. Don't force
1666 	 * the link status check more often than _FORCE_LINK_EVENT_INTERVAL
1667 	 * seconds.
1668 	 */
1669 #define _FORCE_LINK_EVENT_INTERVAL      1
1670 	if (media_type_unknown(&p->po_media_info)) {
1671 		uint64_t        now = net_uptime();
1672 
1673 		if ((now - p->po_force_link_event_time) >=
1674 		    _FORCE_LINK_EVENT_INTERVAL) {
1675 			need_link_update = true;
1676 			p->po_force_link_event_time = now;
1677 		}
1678 	}
1679 	bondport_receive_lacpdu(p, (lacpdu_ref)m_mtod_current(m));
1680 	if (ifbond_selection(ifb)) {
1681 		event_code = (ifb->ifb_active_lag == NULL)
1682 		    ? KEV_DL_LINK_OFF
1683 		    : KEV_DL_LINK_ON;
1684 		/* XXX need to take a reference on bond_ifp */
1685 		bond_ifp = ifb->ifb_ifp;
1686 		ifb->ifb_last_link_event = event_code;
1687 	} else {
1688 		event_code = (ifb->ifb_active_lag == NULL)
1689 		    ? KEV_DL_LINK_OFF
1690 		    : KEV_DL_LINK_ON;
1691 		if (event_code != ifb->ifb_last_link_event) {
1692 			BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
1693 			    "%s: (receive) generating LINK event",
1694 			    ifb->ifb_name);
1695 			bond_ifp = ifb->ifb_ifp;
1696 			ifb->ifb_last_link_event = event_code;
1697 		}
1698 	}
1699 
1700 done:
1701 	bond_unlock();
1702 	if (bond_ifp != NULL) {
1703 		interface_link_event(bond_ifp, event_code);
1704 	}
1705 	m_freem(m);
1706 	if (need_link_update) {
1707 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
1708 		    "simulating link status changed event");
1709 		bond_handle_event(port_ifp, KEV_DL_LINK_ON);
1710 	}
1711 	return;
1712 }
1713 
1714 static void
bond_receive_la_marker_pdu(struct mbuf * m,struct ifnet * port_ifp)1715 bond_receive_la_marker_pdu(struct mbuf * m, struct ifnet * port_ifp)
1716 {
1717 	la_marker_pdu_ref           marker_p;
1718 	bondport_ref                p;
1719 
1720 	BOND_LOG(LOG_DEBUG, BD_DBGF_LACP, "%s", port_ifp->if_xname);
1721 
1722 	marker_p = (la_marker_pdu_ref)(m_mtod_current(m) + ETHER_HDR_LEN);
1723 	if (marker_p->lm_marker_tlv_type != LA_MARKER_TLV_TYPE_MARKER) {
1724 		goto failed;
1725 	}
1726 	bond_lock();
1727 	if ((ifnet_eflags(port_ifp) & IFEF_BOND) == 0) {
1728 		bond_unlock();
1729 		goto failed;
1730 	}
1731 	p = bond_lookup_port(port_ifp);
1732 	if (p == NULL || p->po_enabled == 0
1733 	    || p->po_bond->ifb_mode != IF_BOND_MODE_LACP) {
1734 		bond_unlock();
1735 		goto failed;
1736 	}
1737 	/* echo back the same packet as a marker response */
1738 	marker_p->lm_marker_tlv_type = LA_MARKER_TLV_TYPE_MARKER_RESPONSE;
1739 	bondport_slow_proto_transmit(p, (packet_buffer_ref)m);
1740 	bond_unlock();
1741 	return;
1742 
1743 failed:
1744 	m_freem(m);
1745 	return;
1746 }
1747 
1748 static bool
is_slow_proto_multicast(struct ether_header * eh_p)1749 is_slow_proto_multicast(struct ether_header * eh_p)
1750 {
1751 	return bcmp(eh_p->ether_dhost, &slow_proto_multicast,
1752 	           sizeof(eh_p->ether_dhost)) == 0 &&
1753 	       eh_p->ether_type == htons(IEEE8023AD_SLOW_PROTO_ETHERTYPE);
1754 }
1755 
1756 static void
bond_handle_slow_proto_multicast(ifnet_t port_ifp,mbuf_t m)1757 bond_handle_slow_proto_multicast(ifnet_t port_ifp, mbuf_t m)
1758 {
1759 	u_char  subtype = *mtod(m, u_char *);
1760 
1761 	if (subtype == IEEE8023AD_SLOW_PROTO_SUBTYPE_LACP) {
1762 		if (m->m_pkthdr.len < LACPDU_MIN_SIZE) {
1763 			BOND_LOG(LOG_DEBUG, BD_DBGF_INPUT,
1764 			    "dropping short LACP frame %d < %d",
1765 			    m->m_pkthdr.len, LACPDU_MIN_SIZE);
1766 			goto discard;
1767 		}
1768 		/* send to lacp */
1769 		if (m->m_len < LACPDU_MIN_SIZE) {
1770 			m = m_pullup(m, LACPDU_MIN_SIZE);
1771 			if (m == NULL) {
1772 				BOND_LOG(LOG_NOTICE, BD_DBGF_INPUT,
1773 				    "m_pullup LACPDU failed");
1774 				return;
1775 			}
1776 		}
1777 		bond_receive_lacpdu(m, port_ifp);
1778 	} else if (subtype == IEEE8023AD_SLOW_PROTO_SUBTYPE_LA_MARKER_PROTOCOL) {
1779 		int         min_size;
1780 
1781 		/* restore the ethernet header pointer in the mbuf */
1782 		m->m_pkthdr.len += ETHER_HDR_LEN;
1783 		m->m_data -= ETHER_HDR_LEN;
1784 		m->m_len += ETHER_HDR_LEN;
1785 		min_size = ETHER_HDR_LEN + offsetof(la_marker_pdu, lm_reserved);
1786 		if (m->m_pkthdr.len < min_size) {
1787 			goto discard;
1788 		}
1789 		/* send to lacp */
1790 		if (m->m_len < min_size) {
1791 			m = m_pullup(m, min_size);
1792 			if (m == NULL) {
1793 				BOND_LOG(LOG_NOTICE, BD_DBGF_INPUT,
1794 				    "m_pullup LA_MARKER failed");
1795 				return;
1796 			}
1797 		}
1798 		/* send to marker responder */
1799 		bond_receive_la_marker_pdu(m, port_ifp);
1800 	} else if (subtype == 0
1801 	    || subtype > IEEE8023AD_SLOW_PROTO_SUBTYPE_RESERVED_END) {
1802 		/* invalid subtype, discard the frame */
1803 		goto discard;
1804 	}
1805 	return;
1806 
1807 discard:
1808 	m_freem(m);
1809 	return;
1810 }
1811 
1812 static void
bond_input_packet_list(ifnet_t port_ifp,mbuf_t list)1813 bond_input_packet_list(ifnet_t port_ifp, mbuf_t list)
1814 {
1815 	ifbond_ref                        ifb;
1816 	struct ifnet *                    ifp;
1817 	bondport_ref                      p;
1818 	struct ifnet_stat_increment_param s;
1819 
1820 	/* verify that we're ready to receive the packet list */
1821 	bond_lock();
1822 	if ((ifnet_eflags(port_ifp) & IFEF_BOND) == 0) {
1823 		goto discard;
1824 	}
1825 	p = bond_lookup_port(port_ifp);
1826 	if (p == NULL || bondport_collecting(p) == 0) {
1827 		goto discard;
1828 	}
1829 	ifb = p->po_bond;
1830 	ifp = ifb->ifb_ifp;
1831 	bond_unlock();
1832 
1833 	bzero(&s, sizeof(s));
1834 
1835 	for (mbuf_t scan = list; scan != NULL; scan = scan->m_nextpkt) {
1836 		struct ether_header *   eh_p;
1837 		void * __single         frame_header;
1838 
1839 		/* clear promisc so that the packet doesn't get dropped */
1840 		mbuf_setflags_mask(scan, 0, MBUF_PROMISC);
1841 		s.packets_in++;
1842 		s.bytes_in += scan->m_pkthdr.len + ETHER_HDR_LEN;
1843 		if ((scan->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) != 0) {
1844 			s.bytes_in += ETHER_VLAN_ENCAP_LEN;
1845 		}
1846 		if (ifp->if_bpf != NULL) {
1847 			frame_header = scan->m_pkthdr.pkt_hdr;
1848 			eh_p = (struct ether_header *)frame_header;
1849 			bond_bpf_tap_in(ifp, scan, eh_p);
1850 		}
1851 		scan->m_pkthdr.rcvif = ifp;
1852 	}
1853 	BOND_LOG(LOG_DEBUG, BD_DBGF_INPUT, "%s: %s packets %d bytes %d",
1854 	    ifp->if_xname, port_ifp->if_xname, s.packets_in, s.bytes_in);
1855 
1856 	dlil_input_packet_list(ifp, list);
1857 	return;
1858 
1859 discard:
1860 	bond_unlock();
1861 	m_freem_list(list);
1862 	return;
1863 }
1864 
1865 static int
bond_input(ifnet_t port_ifp,__unused protocol_family_t protocol,mbuf_t m)1866 bond_input(ifnet_t port_ifp, __unused protocol_family_t protocol, mbuf_t m)
1867 {
1868 	struct ether_header *       eh_p;
1869 	void * __single              frame_header;
1870 	mblist                      list;
1871 	mbuf_t                      next_packet = NULL;
1872 	mbuf_t                      scan;
1873 
1874 	mblist_init(&list);
1875 	for (scan = m; scan != NULL; scan = next_packet) {
1876 		next_packet = scan->m_nextpkt;
1877 		scan->m_nextpkt = NULL;
1878 
1879 		frame_header = scan->m_pkthdr.pkt_hdr;
1880 		eh_p = (struct ether_header *)frame_header;
1881 		if ((scan->m_flags & M_MCAST) != 0 &&
1882 		    is_slow_proto_multicast(eh_p)) {
1883 			/* send up what we have */
1884 			if (list.head != NULL) {
1885 				bond_input_packet_list(port_ifp, list.head);
1886 				mblist_init(&list);
1887 			}
1888 			/* process this multicast */
1889 			bond_handle_slow_proto_multicast(port_ifp, scan);
1890 		} else {
1891 			mblist_append(&list, scan);
1892 		}
1893 	}
1894 	if (list.head != NULL) {
1895 		bond_input_packet_list(port_ifp, list.head);
1896 	}
1897 	return 0;
1898 }
1899 
1900 static __inline__ const char *
bondport_get_name(bondport_ref p)1901 bondport_get_name(bondport_ref p)
1902 {
1903 	return __unsafe_null_terminated_from_indexable(p->po_name);
1904 }
1905 
1906 static __inline__ int
bondport_get_index(bondport_ref p)1907 bondport_get_index(bondport_ref p)
1908 {
1909 	return ifnet_index(p->po_ifp);
1910 }
1911 
1912 static void
bondport_slow_proto_transmit(bondport_ref p,packet_buffer_ref buf)1913 bondport_slow_proto_transmit(bondport_ref p, packet_buffer_ref buf)
1914 {
1915 	struct ether_header *       eh_p;
1916 	int                         error;
1917 
1918 	/* packet_buffer_allocate leaves room for ethernet header */
1919 	eh_p = mtod(buf, struct ether_header *);
1920 	bcopy(&slow_proto_multicast, &eh_p->ether_dhost, sizeof(eh_p->ether_dhost));
1921 	bcopy(&p->po_saved_addr, eh_p->ether_shost, sizeof(eh_p->ether_shost));
1922 	eh_p->ether_type = htons(IEEE8023AD_SLOW_PROTO_ETHERTYPE);
1923 	error = ifnet_output_raw(p->po_ifp, PF_BOND, buf);
1924 	if (error != 0) {
1925 		BOND_LOG(LOG_NOTICE, BD_DBGF_LACP,
1926 		    "(%s) failed %d", bondport_get_name(p), error);
1927 	}
1928 	return;
1929 }
1930 
1931 static void
bondport_timer_process_func(devtimer_ref timer,devtimer_process_func_event event)1932 bondport_timer_process_func(devtimer_ref timer,
1933     devtimer_process_func_event event)
1934 {
1935 	bondport_ref        p;
1936 
1937 	switch (event) {
1938 	case devtimer_process_func_event_lock:
1939 		bond_lock();
1940 		devtimer_retain(timer);
1941 		break;
1942 	case devtimer_process_func_event_unlock:
1943 		if (devtimer_valid(timer)) {
1944 			/* as long as the devtimer is valid, we can look at arg0 */
1945 			int                 event_code = 0;
1946 			struct ifnet *      bond_ifp = NULL;
1947 
1948 			p = (bondport_ref)devtimer_arg0(timer);
1949 			if (ifbond_selection(p->po_bond)) {
1950 				event_code = (p->po_bond->ifb_active_lag == NULL)
1951 				    ? KEV_DL_LINK_OFF
1952 				    : KEV_DL_LINK_ON;
1953 				/* XXX need to take a reference on bond_ifp */
1954 				bond_ifp = p->po_bond->ifb_ifp;
1955 				p->po_bond->ifb_last_link_event = event_code;
1956 			} else {
1957 				event_code = (p->po_bond->ifb_active_lag == NULL)
1958 				    ? KEV_DL_LINK_OFF
1959 				    : KEV_DL_LINK_ON;
1960 				if (event_code !=
1961 				    p->po_bond->ifb_last_link_event) {
1962 					BOND_LOG(LOG_NOTICE, BD_DBGF_LACP,
1963 					    "%s: (timer) generating LINK event",
1964 					    p->po_bond->ifb_name);
1965 					bond_ifp = p->po_bond->ifb_ifp;
1966 					p->po_bond->ifb_last_link_event = event_code;
1967 				}
1968 			}
1969 			devtimer_release(timer);
1970 			bond_unlock();
1971 			if (bond_ifp != NULL) {
1972 				interface_link_event(bond_ifp, event_code);
1973 			}
1974 		} else {
1975 			/* timer is going away */
1976 			devtimer_release(timer);
1977 			bond_unlock();
1978 		}
1979 		break;
1980 	default:
1981 		break;
1982 	}
1983 }
1984 
1985 static bondport_ref
bondport_create(struct ifnet * port_ifp,lacp_port_priority priority,int active,int short_timeout,int * ret_error)1986 bondport_create(struct ifnet * port_ifp, lacp_port_priority priority,
1987     int active, int short_timeout, int * ret_error)
1988 {
1989 	int                         error = 0;
1990 	bondport_ref                p = NULL;
1991 	lacp_actor_partner_state    s;
1992 
1993 	*ret_error = 0;
1994 	p = kalloc_type(struct bondport_s, Z_WAITOK | Z_ZERO | Z_NOFAIL);
1995 	multicast_list_init(&p->po_multicast);
1996 	if ((u_int32_t)snprintf(p->po_name, sizeof(p->po_name), "%s%d",
1997 	    ifnet_name(port_ifp), ifnet_unit(port_ifp))
1998 	    >= sizeof(p->po_name)) {
1999 		BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
2000 		    "name too large");
2001 		*ret_error = EINVAL;
2002 		goto failed;
2003 	}
2004 	error = siocgifdevmtu(port_ifp, &p->po_devmtu);
2005 	if (error != 0) {
2006 		BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
2007 		    "SIOCGIFDEVMTU %s failed, %d",
2008 		    bondport_get_name(p), error);
2009 		goto failed;
2010 	}
2011 	/* remember the current interface MTU so it can be restored */
2012 	p->po_devmtu.ifdm_current = ifnet_mtu(port_ifp);
2013 	p->po_ifp = port_ifp;
2014 	p->po_media_info = interface_media_info(port_ifp);
2015 	p->po_current_while_timer = devtimer_create(bondport_timer_process_func, p);
2016 	if (p->po_current_while_timer == NULL) {
2017 		*ret_error = ENOMEM;
2018 		goto failed;
2019 	}
2020 	p->po_periodic_timer = devtimer_create(bondport_timer_process_func, p);
2021 	if (p->po_periodic_timer == NULL) {
2022 		*ret_error = ENOMEM;
2023 		goto failed;
2024 	}
2025 	p->po_wait_while_timer = devtimer_create(bondport_timer_process_func, p);
2026 	if (p->po_wait_while_timer == NULL) {
2027 		*ret_error = ENOMEM;
2028 		goto failed;
2029 	}
2030 	p->po_transmit_timer = devtimer_create(bondport_timer_process_func, p);
2031 	if (p->po_transmit_timer == NULL) {
2032 		*ret_error = ENOMEM;
2033 		goto failed;
2034 	}
2035 	p->po_receive_state = ReceiveState_none;
2036 	p->po_mux_state = MuxState_none;
2037 	p->po_priority = priority;
2038 	s = 0;
2039 	s = lacp_actor_partner_state_set_aggregatable(s);
2040 	if (short_timeout) {
2041 		s = lacp_actor_partner_state_set_short_timeout(s);
2042 	}
2043 	if (active) {
2044 		s = lacp_actor_partner_state_set_active_lacp(s);
2045 	}
2046 	p->po_actor_state = s;
2047 	return p;
2048 
2049 failed:
2050 	bondport_free(p);
2051 	return NULL;
2052 }
2053 
2054 static void
bondport_start(bondport_ref p)2055 bondport_start(bondport_ref p)
2056 {
2057 	bondport_receive_machine(p, LAEventStart, NULL);
2058 	bondport_mux_machine(p, LAEventStart, NULL);
2059 	bondport_periodic_transmit_machine(p, LAEventStart, NULL);
2060 	bondport_transmit_machine(p, LAEventStart, NULL);
2061 	return;
2062 }
2063 
2064 /*
2065  * Function: bondport_invalidate_timers
2066  * Purpose:
2067  *   Invalidate all of the timers for the bondport.
2068  */
2069 static void
bondport_invalidate_timers(bondport_ref p)2070 bondport_invalidate_timers(bondport_ref p)
2071 {
2072 	devtimer_invalidate(p->po_current_while_timer);
2073 	devtimer_invalidate(p->po_periodic_timer);
2074 	devtimer_invalidate(p->po_wait_while_timer);
2075 	devtimer_invalidate(p->po_transmit_timer);
2076 }
2077 
2078 /*
2079  * Function: bondport_cancel_timers
2080  * Purpose:
2081  *   Cancel all of the timers for the bondport.
2082  */
2083 static void
bondport_cancel_timers(bondport_ref p)2084 bondport_cancel_timers(bondport_ref p)
2085 {
2086 	devtimer_cancel(p->po_current_while_timer);
2087 	devtimer_cancel(p->po_periodic_timer);
2088 	devtimer_cancel(p->po_wait_while_timer);
2089 	devtimer_cancel(p->po_transmit_timer);
2090 }
2091 
2092 static void
bondport_free(bondport_ref p)2093 bondport_free(bondport_ref p)
2094 {
2095 	multicast_list_remove(&p->po_multicast);
2096 	devtimer_release(p->po_current_while_timer);
2097 	devtimer_release(p->po_periodic_timer);
2098 	devtimer_release(p->po_wait_while_timer);
2099 	devtimer_release(p->po_transmit_timer);
2100 	kfree_type(struct bondport_s, p);
2101 	return;
2102 }
2103 
2104 static __inline__ int
bond_device_mtu(struct ifnet * ifp,ifbond_ref ifb)2105 bond_device_mtu(struct ifnet * ifp, ifbond_ref ifb)
2106 {
2107 	return ((int)ifnet_mtu(ifp) > ifb->ifb_altmtu)
2108 	       ? (int)ifnet_mtu(ifp) : ifb->ifb_altmtu;
2109 }
2110 
2111 static int
bond_add_interface(struct ifnet * ifp,struct ifnet * port_ifp)2112 bond_add_interface(struct ifnet * ifp, struct ifnet * port_ifp)
2113 {
2114 	u_int32_t                   eflags;
2115 	uint32_t                    control_flags = 0;
2116 	int                         devmtu;
2117 	int                         error = 0;
2118 	int                         event_code = 0;
2119 	int                         first = FALSE;
2120 	ifbond_ref                  ifb;
2121 	bondport_ref *              new_array = NULL;
2122 	bondport_ref *              old_array = NULL;
2123 	bondport_ref                p;
2124 	int                         old_max = 0;
2125 	int                         new_max = 0;
2126 
2127 	if (IFNET_IS_INTCOPROC(port_ifp) || IFNET_IS_MANAGEMENT(port_ifp)) {
2128 		return EINVAL;
2129 	}
2130 
2131 	/* pre-allocate space for new port */
2132 	p = bondport_create(port_ifp, 0x8000, 1, 0, &error);
2133 	if (p == NULL) {
2134 		return error;
2135 	}
2136 	bond_lock();
2137 	ifb = (ifbond_ref)ifnet_softc(ifp);
2138 	if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
2139 		bond_unlock();
2140 		bondport_free(p);
2141 		return ifb == NULL ? EOPNOTSUPP : EBUSY;
2142 	}
2143 
2144 	/* make sure this interface can handle our current MTU */
2145 	devmtu = bond_device_mtu(ifp, ifb);
2146 	if (devmtu != 0
2147 	    && (devmtu > p->po_devmtu.ifdm_max || devmtu < p->po_devmtu.ifdm_min)) {
2148 		bond_unlock();
2149 		BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
2150 		    "interface %s doesn't support mtu %d",
2151 		    bondport_get_name(p), devmtu);
2152 		bondport_free(p);
2153 		return EINVAL;
2154 	}
2155 
2156 	/* make sure ifb doesn't get de-allocated while we wait */
2157 	ifbond_retain(ifb);
2158 
2159 	/* wait for other add or remove to complete */
2160 	ifbond_wait(ifb, __func__);
2161 
2162 	if (ifbond_flags_if_detaching(ifb)) {
2163 		/* someone destroyed the bond while we were waiting */
2164 		error = EBUSY;
2165 		goto signal_done;
2166 	}
2167 	if (bond_lookup_port(port_ifp) != NULL) {
2168 		/* port is already part of a bond */
2169 		error = EBUSY;
2170 		goto signal_done;
2171 	}
2172 	if ((ifnet_eflags(port_ifp) & (IFEF_VLAN | IFEF_BOND)) != 0) {
2173 		/* interface already has VLAN's, or is part of bond */
2174 		error = EBUSY;
2175 		goto signal_done;
2176 	}
2177 
2178 	/* mark the interface busy */
2179 	eflags = if_set_eflags(port_ifp, IFEF_BOND);
2180 	if ((eflags & IFEF_VLAN) != 0) {
2181 		/* vlan got in ahead of us */
2182 		if_clear_eflags(port_ifp, IFEF_BOND);
2183 		error = EBUSY;
2184 		goto signal_done;
2185 	}
2186 
2187 	if (TAILQ_EMPTY(&ifb->ifb_port_list)) {
2188 		ifnet_set_offload(ifp, ifnet_offload(port_ifp));
2189 		ifnet_set_flags(ifp, IFF_RUNNING, IFF_RUNNING);
2190 		if (ifbond_flags_lladdr(ifb) == FALSE) {
2191 			first = TRUE;
2192 		}
2193 	} else {
2194 		ifnet_offload_t         ifp_offload;
2195 		ifnet_offload_t         port_ifp_offload;
2196 
2197 		ifp_offload = ifnet_offload(ifp);
2198 		port_ifp_offload = ifnet_offload(port_ifp);
2199 		if (ifp_offload != port_ifp_offload) {
2200 			ifnet_offload_t     offload;
2201 
2202 			offload = ifp_offload & port_ifp_offload;
2203 			BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
2204 			    "(%s, %s) hwassist values don't match 0x%x != 0x%x,"
2205 			    " using 0x%x instead",
2206 			    ifb->ifb_name, bondport_get_name(p),
2207 			    ifp_offload, port_ifp_offload, offload);
2208 			/*
2209 			 * XXX
2210 			 * if the bond has VLAN's, we can't simply change the hwassist
2211 			 * field behind its back: this needs work
2212 			 */
2213 			ifnet_set_offload(ifp, offload);
2214 		}
2215 	}
2216 	p->po_bond = ifb;
2217 
2218 	/* remember the port's ethernet address so it can be restored */
2219 	ether_addr_copy(p->po_saved_addr.octet,
2220 	    (uint8_t *__indexable)IF_LLADDR(port_ifp));
2221 
2222 	/* add it to the list of ports */
2223 	TAILQ_INSERT_TAIL(&ifb->ifb_port_list, p, po_port_list);
2224 	ifb->ifb_port_count++;
2225 
2226 	bond_unlock();
2227 
2228 
2229 	/* first port added to bond determines bond's ethernet address */
2230 	if (first) {
2231 		ifnet_set_lladdr_and_type(ifp, IF_LLADDR(port_ifp), ETHER_ADDR_LEN,
2232 		    IFT_ETHER);
2233 	}
2234 	uint32_bit_set(&control_flags, PORT_CONTROL_FLAGS_IN_LIST);
2235 
2236 	/* allocate a larger distributing array */
2237 	new_max = ifb->ifb_port_count;
2238 	new_array = kalloc_type(bondport_ref, new_max, Z_WAITOK);
2239 	if (new_array == NULL) {
2240 		error = ENOMEM;
2241 		goto failed;
2242 	}
2243 
2244 	/* attach our BOND "protocol" to the interface */
2245 	error = bond_attach_protocol(port_ifp);
2246 	if (error) {
2247 		goto failed;
2248 	}
2249 	uint32_bit_set(&control_flags, PORT_CONTROL_FLAGS_PROTO_ATTACHED);
2250 
2251 	/* set the interface MTU */
2252 	devmtu = bond_device_mtu(ifp, ifb);
2253 	error = siocsifmtu(port_ifp, devmtu);
2254 	if (error != 0) {
2255 		BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
2256 		    "(%s, %s): SIOCSIFMTU %d failed %d",
2257 		    ifb->ifb_name, bondport_get_name(p), devmtu, error);
2258 		goto failed;
2259 	}
2260 	uint32_bit_set(&control_flags, PORT_CONTROL_FLAGS_MTU_SET);
2261 
2262 	/* program the port with our multicast addresses */
2263 	error = multicast_list_program(&p->po_multicast, ifp, port_ifp);
2264 	if (error) {
2265 		BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
2266 		    "(%s, %s): multicast_list_program failed %d",
2267 		    ifb->ifb_name, bondport_get_name(p), error);
2268 		goto failed;
2269 	}
2270 
2271 	/* mark the interface up */
2272 	ifnet_set_flags(port_ifp, IFF_UP, IFF_UP);
2273 
2274 	error = ifnet_ioctl(port_ifp, 0, SIOCSIFFLAGS, NULL);
2275 	if (error != 0) {
2276 		BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
2277 		    "(%s, %s): SIOCSIFFLAGS failed %d",
2278 		    ifb->ifb_name, bondport_get_name(p), error);
2279 		goto failed;
2280 	}
2281 
2282 	/* re-program the port's ethernet address */
2283 	error = if_siflladdr(port_ifp,
2284 	    (const struct ether_addr *)IF_LLADDR(ifp));
2285 	if (error == 0) {
2286 		if (memcmp(IF_LLADDR(ifp), IF_LLADDR(port_ifp), ETHER_ADDR_LEN)
2287 		    != 0) {
2288 			/* it lied, it really doesn't support setting lladdr */
2289 			error = EOPNOTSUPP;
2290 		}
2291 	}
2292 	if (error != 0) {
2293 		/* port doesn't support setting the link address */
2294 		BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
2295 		    "(%s, %s): if_siflladdr failed %d",
2296 		    ifb->ifb_name, bondport_get_name(p), error);
2297 		error = ifnet_set_promiscuous(port_ifp, 1);
2298 		if (error != 0) {
2299 			/* port doesn't support setting promiscuous mode */
2300 			BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
2301 			    "(%s, %s): set promiscuous failed %d",
2302 			    ifb->ifb_name, bondport_get_name(p), error);
2303 			goto failed;
2304 		}
2305 		uint32_bit_set(&control_flags,
2306 		    PORT_CONTROL_FLAGS_PROMISCUOUS_SET);
2307 	} else {
2308 		uint32_bit_set(&control_flags,
2309 		    PORT_CONTROL_FLAGS_LLADDR_SET);
2310 	}
2311 
2312 	/* if we're in promiscuous mode, enable that as well */
2313 	if (ifbond_flags_promisc(ifb)) {
2314 		error = ifnet_set_promiscuous(port_ifp, 1);
2315 		if (error != 0) {
2316 			/* port doesn't support setting promiscuous mode */
2317 			BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
2318 			    "(%s, %s): set promiscuous failed %d",
2319 			    ifb->ifb_name, bondport_get_name(p), error);
2320 			goto failed;
2321 		}
2322 		uint32_bit_set(&control_flags,
2323 		    PORT_CONTROL_FLAGS_BOND_PROMISCUOUS_SET);
2324 	}
2325 
2326 	bond_lock();
2327 
2328 	/* no failures past this point */
2329 	p->po_enabled = 1;
2330 	p->po_control_flags = control_flags;
2331 
2332 	/* copy the contents of the existing distributing array */
2333 	if (ifb->ifb_distributing_count) {
2334 		bcopy(ifb->ifb_distributing_array, new_array,
2335 		    sizeof(*new_array) * ifb->ifb_distributing_count);
2336 	}
2337 	old_array = ifb->ifb_distributing_array;
2338 	old_max = ifb->ifb_distributing_max;
2339 	ifb->ifb_distributing_array = new_array;
2340 	ifb->ifb_distributing_max = new_max;
2341 
2342 	if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
2343 		bondport_start(p);
2344 
2345 		/* check if we need to generate a link status event */
2346 		if (ifbond_selection(ifb)) {
2347 			event_code = (ifb->ifb_active_lag == NULL)
2348 			    ? KEV_DL_LINK_OFF
2349 			    : KEV_DL_LINK_ON;
2350 			ifb->ifb_last_link_event = event_code;
2351 		}
2352 	} else {
2353 		/* are we adding the first distributing interface? */
2354 		if (media_active(&p->po_media_info)) {
2355 			if (ifb->ifb_distributing_count == 0) {
2356 				ifb->ifb_last_link_event = event_code = KEV_DL_LINK_ON;
2357 			}
2358 			bondport_enable_distributing(p);
2359 		} else {
2360 			bondport_disable_distributing(p);
2361 		}
2362 	}
2363 
2364 	/* clear the busy state, and wakeup anyone waiting */
2365 	ifbond_signal(ifb, __func__);
2366 	bond_unlock();
2367 	if (event_code != 0) {
2368 		interface_link_event(ifp, event_code);
2369 	}
2370 	kfree_type(bondport_ref, old_max, old_array);
2371 	return 0;
2372 
2373 failed:
2374 	bond_assert_lock_not_held();
2375 
2376 	/* if this was the first port to be added, clear our address */
2377 	if (first) {
2378 		ifnet_set_lladdr_and_type(ifp, NULL, 0, IFT_IEEE8023ADLAG);
2379 	}
2380 
2381 	kfree_type(bondport_ref, new_max, new_array);
2382 	if (uint32_bit_is_set(control_flags,
2383 	    PORT_CONTROL_FLAGS_LLADDR_SET)) {
2384 		int     error1;
2385 
2386 		error1 = if_siflladdr(port_ifp, &p->po_saved_addr);
2387 		if (error1 != 0) {
2388 			BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
2389 			    "(%s, %s): if_siflladdr restore failed %d",
2390 			    ifb->ifb_name, bondport_get_name(p), error1);
2391 		}
2392 	}
2393 	if (uint32_bit_is_set(control_flags,
2394 	    PORT_CONTROL_FLAGS_PROMISCUOUS_SET)) {
2395 		int     error1;
2396 
2397 		error1 = ifnet_set_promiscuous(port_ifp, 0);
2398 		if (error1 != 0) {
2399 			BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
2400 			    "(%s, %s): promiscous mode disable failed %d",
2401 			    ifb->ifb_name, bondport_get_name(p), error1);
2402 		}
2403 	}
2404 	if (uint32_bit_is_set(control_flags,
2405 	    PORT_CONTROL_FLAGS_PROTO_ATTACHED)) {
2406 		(void)bond_detach_protocol(port_ifp);
2407 	}
2408 	if (uint32_bit_is_set(control_flags,
2409 	    PORT_CONTROL_FLAGS_MTU_SET)) {
2410 		int error1;
2411 
2412 		error1 = siocsifmtu(port_ifp, p->po_devmtu.ifdm_current);
2413 		if (error1 != 0) {
2414 			BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
2415 			    "(%s, %s): SIOCSIFMTU %d failed %d",
2416 			    ifb->ifb_name, bondport_get_name(p),
2417 			    p->po_devmtu.ifdm_current, error1);
2418 		}
2419 	}
2420 	bond_lock();
2421 	if (uint32_bit_is_set(control_flags,
2422 	    PORT_CONTROL_FLAGS_IN_LIST)) {
2423 		TAILQ_REMOVE(&ifb->ifb_port_list, p, po_port_list);
2424 		ifb->ifb_port_count--;
2425 	}
2426 	if_clear_eflags(ifp, IFEF_BOND);
2427 	if (TAILQ_EMPTY(&ifb->ifb_port_list)) {
2428 		ifb->ifb_altmtu = 0;
2429 		ifnet_set_mtu(ifp, ETHERMTU);
2430 		ifnet_set_offload(ifp, 0);
2431 	}
2432 
2433 signal_done:
2434 	ifbond_signal(ifb, __func__);
2435 	bond_unlock();
2436 	ifbond_release(ifb);
2437 	bondport_free(p);
2438 	return error;
2439 }
2440 
2441 static int
bond_remove_interface(ifbond_ref ifb,struct ifnet * port_ifp)2442 bond_remove_interface(ifbond_ref ifb, struct ifnet * port_ifp)
2443 {
2444 	int                         active_lag = 0;
2445 	int                         error = 0;
2446 	int                         event_code = 0;
2447 	bondport_ref                head_port;
2448 	struct ifnet *              ifp;
2449 	int                         last = FALSE;
2450 	int                         new_link_address = FALSE;
2451 	bondport_ref                p;
2452 	lacp_actor_partner_state    s;
2453 	int                         was_distributing;
2454 
2455 	bond_assert_lock_held();
2456 
2457 	ifbond_retain(ifb);
2458 	ifbond_wait(ifb, "bond_remove_interface");
2459 
2460 	p = ifbond_lookup_port(ifb, port_ifp);
2461 	if (p == NULL) {
2462 		error = ENXIO;
2463 		/* it got removed by another thread */
2464 		goto signal_done;
2465 	}
2466 
2467 	/* de-select it and remove it from the lists */
2468 	was_distributing = bondport_flags_distributing(p);
2469 	bondport_disable_distributing(p);
2470 	if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
2471 		bondport_set_selected(p, SelectedState_UNSELECTED);
2472 		active_lag = bondport_remove_from_LAG(p);
2473 		/* invalidate timers here while holding the bond_lock */
2474 		bondport_invalidate_timers(p);
2475 
2476 		/* announce that we're Individual now */
2477 		s = p->po_actor_state;
2478 		s = lacp_actor_partner_state_set_individual(s);
2479 		s = lacp_actor_partner_state_set_not_collecting(s);
2480 		s = lacp_actor_partner_state_set_not_distributing(s);
2481 		s = lacp_actor_partner_state_set_out_of_sync(s);
2482 		p->po_actor_state = s;
2483 		bondport_flags_set_ntt(p);
2484 	}
2485 
2486 	TAILQ_REMOVE(&ifb->ifb_port_list, p, po_port_list);
2487 	ifb->ifb_port_count--;
2488 
2489 	ifp = ifb->ifb_ifp;
2490 	head_port = TAILQ_FIRST(&ifb->ifb_port_list);
2491 	if (head_port == NULL) {
2492 		ifnet_set_flags(ifp, 0, IFF_RUNNING);
2493 		if (ifbond_flags_lladdr(ifb) == FALSE) {
2494 			last = TRUE;
2495 		}
2496 		ifnet_set_offload(ifp, 0);
2497 		ifnet_set_mtu(ifp, ETHERMTU);
2498 		ifb->ifb_altmtu = 0;
2499 	} else if (ifbond_flags_lladdr(ifb) == FALSE
2500 	    && bcmp(&p->po_saved_addr, IF_LLADDR(ifp),
2501 	    ETHER_ADDR_LEN) == 0) {
2502 		new_link_address = TRUE;
2503 	}
2504 	/* check if we need to generate a link status event */
2505 	if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
2506 		if (ifbond_selection(ifb) || active_lag) {
2507 			event_code = (ifb->ifb_active_lag == NULL)
2508 			    ? KEV_DL_LINK_OFF
2509 			    : KEV_DL_LINK_ON;
2510 			ifb->ifb_last_link_event = event_code;
2511 		}
2512 		bondport_transmit_machine(p, LAEventStart,
2513 		    TRANSMIT_MACHINE_TX_IMMEDIATE);
2514 	} else {
2515 		/* are we removing the last distributing interface? */
2516 		if (was_distributing && ifb->ifb_distributing_count == 0) {
2517 			ifb->ifb_last_link_event = event_code = KEV_DL_LINK_OFF;
2518 		}
2519 	}
2520 	bond_unlock();
2521 
2522 	if (last) {
2523 		ifnet_set_lladdr_and_type(ifp, NULL, 0, IFT_IEEE8023ADLAG);
2524 	} else if (new_link_address) {
2525 		struct ifnet *  scan_ifp;
2526 		bondport_ref    scan_port;
2527 
2528 		/* ifbond_wait() allows port list traversal without holding the lock */
2529 
2530 		/* this port gave the bond its ethernet address, switch to new one */
2531 		ifnet_set_lladdr_and_type(ifp,
2532 		    &head_port->po_saved_addr, ETHER_ADDR_LEN,
2533 		    IFT_ETHER);
2534 
2535 		/* re-program each port with the new link address */
2536 		TAILQ_FOREACH(scan_port, &ifb->ifb_port_list, po_port_list) {
2537 			scan_ifp = scan_port->po_ifp;
2538 
2539 			if (!uint32_bit_is_set(scan_port->po_control_flags,
2540 			    PORT_CONTROL_FLAGS_LLADDR_SET)) {
2541 				/* port doesn't support setting lladdr */
2542 				continue;
2543 			}
2544 			error = if_siflladdr(scan_ifp,
2545 			    (const struct ether_addr *) IF_LLADDR(ifp));
2546 			if (error != 0) {
2547 				BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
2548 				    "(%s, %s): if_siflladdr (%s) failed %d",
2549 				    ifb->ifb_name, bondport_get_name(p),
2550 				    bondport_get_name(scan_port), error);
2551 			}
2552 		}
2553 	}
2554 
2555 	/* restore the port's ethernet address */
2556 	if (uint32_bit_is_set(p->po_control_flags,
2557 	    PORT_CONTROL_FLAGS_LLADDR_SET)) {
2558 		error = if_siflladdr(port_ifp, &p->po_saved_addr);
2559 		if (error != 0) {
2560 			BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
2561 			    "(%s, %s): if_siflladdr failed %d",
2562 			    ifb->ifb_name, bondport_get_name(p), error);
2563 		}
2564 	}
2565 
2566 	/* disable promiscous mode (if we enabled it) */
2567 	if (uint32_bit_is_set(p->po_control_flags,
2568 	    PORT_CONTROL_FLAGS_PROMISCUOUS_SET)) {
2569 		error = ifnet_set_promiscuous(port_ifp, 0);
2570 		if (error != 0) {
2571 			BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
2572 			    "(%s, %s): disable promiscuous failed %d",
2573 			    ifb->ifb_name, bondport_get_name(p), error);
2574 		}
2575 	}
2576 
2577 	/* disable promiscous mode from bond (if we enabled it) */
2578 	if (uint32_bit_is_set(p->po_control_flags,
2579 	    PORT_CONTROL_FLAGS_BOND_PROMISCUOUS_SET)) {
2580 		error = ifnet_set_promiscuous(port_ifp, 0);
2581 		if (error != 0) {
2582 			BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
2583 			    "(%s, %s): disable promiscuous failed %d",
2584 			    ifb->ifb_name, bondport_get_name(p), error);
2585 		}
2586 	}
2587 
2588 	/* restore the port's MTU */
2589 	error = siocsifmtu(port_ifp, p->po_devmtu.ifdm_current);
2590 	if (error != 0) {
2591 		BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
2592 		    "(%s, %s): SIOCSIFMTU %d failed %d",
2593 		    ifb->ifb_name, bondport_get_name(p),
2594 		    p->po_devmtu.ifdm_current, error);
2595 	}
2596 
2597 	/* remove the bond "protocol" */
2598 	bond_detach_protocol(port_ifp);
2599 
2600 	/* generate link event */
2601 	if (event_code != 0) {
2602 		interface_link_event(ifp, event_code);
2603 	}
2604 
2605 	bond_lock();
2606 	bondport_free(p);
2607 	if_clear_eflags(port_ifp, IFEF_BOND);
2608 	/* release this bondport's reference to the ifbond */
2609 	ifbond_release(ifb);
2610 
2611 signal_done:
2612 	ifbond_signal(ifb, __func__);
2613 	ifbond_release(ifb);
2614 	return error;
2615 }
2616 
2617 static void
bond_set_lacp_mode(ifbond_ref ifb)2618 bond_set_lacp_mode(ifbond_ref ifb)
2619 {
2620 	bondport_ref                p;
2621 
2622 	TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
2623 		bondport_disable_distributing(p);
2624 		bondport_start(p);
2625 	}
2626 	return;
2627 }
2628 
2629 static void
bond_set_static_mode(ifbond_ref ifb)2630 bond_set_static_mode(ifbond_ref ifb)
2631 {
2632 	bondport_ref                p;
2633 	lacp_actor_partner_state    s;
2634 
2635 	TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
2636 		bondport_disable_distributing(p);
2637 		bondport_set_selected(p, SelectedState_UNSELECTED);
2638 		(void)bondport_remove_from_LAG(p);
2639 		bondport_cancel_timers(p);
2640 
2641 		/* announce that we're Individual now */
2642 		s = p->po_actor_state;
2643 		s = lacp_actor_partner_state_set_individual(s);
2644 		s = lacp_actor_partner_state_set_not_collecting(s);
2645 		s = lacp_actor_partner_state_set_not_distributing(s);
2646 		s = lacp_actor_partner_state_set_out_of_sync(s);
2647 		p->po_actor_state = s;
2648 		bondport_flags_set_ntt(p);
2649 		bondport_transmit_machine(p, LAEventStart,
2650 		    TRANSMIT_MACHINE_TX_IMMEDIATE);
2651 		/* clear state */
2652 		p->po_actor_state = 0;
2653 		bzero(&p->po_partner_state, sizeof(p->po_partner_state));
2654 
2655 		if (media_active(&p->po_media_info)) {
2656 			bondport_enable_distributing(p);
2657 		} else {
2658 			bondport_disable_distributing(p);
2659 		}
2660 	}
2661 	return;
2662 }
2663 
2664 static int
bond_set_mode(struct ifnet * ifp,int mode)2665 bond_set_mode(struct ifnet * ifp, int mode)
2666 {
2667 	int                         error = 0;
2668 	int                         event_code = 0;
2669 	ifbond_ref                  ifb;
2670 
2671 	bond_lock();
2672 	ifb = (ifbond_ref)ifnet_softc(ifp);
2673 	if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
2674 		bond_unlock();
2675 		return (ifb == NULL) ? EOPNOTSUPP : EBUSY;
2676 	}
2677 	if (ifb->ifb_mode == mode) {
2678 		bond_unlock();
2679 		return 0;
2680 	}
2681 
2682 	ifbond_retain(ifb);
2683 	ifbond_wait(ifb, "bond_set_mode");
2684 
2685 	/* verify (again) that the mode is actually different */
2686 	if (ifb->ifb_mode == mode) {
2687 		/* nothing to do */
2688 		goto signal_done;
2689 	}
2690 
2691 	ifb->ifb_mode = mode;
2692 	if (mode == IF_BOND_MODE_LACP) {
2693 		bond_set_lacp_mode(ifb);
2694 
2695 		/* check if we need to generate a link status event */
2696 		if (ifbond_selection(ifb)) {
2697 			event_code = (ifb->ifb_active_lag == NULL)
2698 			    ? KEV_DL_LINK_OFF
2699 			    : KEV_DL_LINK_ON;
2700 		}
2701 	} else {
2702 		bond_set_static_mode(ifb);
2703 		event_code = (ifb->ifb_distributing_count == 0)
2704 		    ? KEV_DL_LINK_OFF
2705 		    : KEV_DL_LINK_ON;
2706 	}
2707 	ifb->ifb_last_link_event = event_code;
2708 
2709 signal_done:
2710 	ifbond_signal(ifb, __func__);
2711 	bond_unlock();
2712 	ifbond_release(ifb);
2713 
2714 	if (event_code != 0) {
2715 		interface_link_event(ifp, event_code);
2716 	}
2717 	return error;
2718 }
2719 
2720 static int
bond_get_status(ifbond_ref ifb,struct if_bond_req * ibr_p,user_addr_t datap)2721 bond_get_status(ifbond_ref ifb, struct if_bond_req * ibr_p, user_addr_t datap)
2722 {
2723 	int                         count;
2724 	user_addr_t                 dst;
2725 	int                         error = 0;
2726 	struct if_bond_status_req * ibsr;
2727 	struct if_bond_status       ibs;
2728 	bondport_ref                port;
2729 
2730 	ibsr = &(ibr_p->ibr_ibru.ibru_status);
2731 	if (ibsr->ibsr_version != IF_BOND_STATUS_REQ_VERSION) {
2732 		return EINVAL;
2733 	}
2734 	ibsr->ibsr_key = ifb->ifb_key;
2735 	ibsr->ibsr_mode = ifb->ifb_mode;
2736 	ibsr->ibsr_total = ifb->ifb_port_count;
2737 	dst = proc_is64bit(current_proc())
2738 	    ? ibsr->ibsr_ibsru.ibsru_buffer64
2739 	    : CAST_USER_ADDR_T(ibsr->ibsr_ibsru.ibsru_buffer);
2740 	if (dst == USER_ADDR_NULL) {
2741 		/* just want to know how many there are */
2742 		goto done;
2743 	}
2744 	if (ibsr->ibsr_count < 0) {
2745 		return EINVAL;
2746 	}
2747 	count = (ifb->ifb_port_count < ibsr->ibsr_count)
2748 	    ? ifb->ifb_port_count : ibsr->ibsr_count;
2749 	TAILQ_FOREACH(port, &ifb->ifb_port_list, po_port_list) {
2750 		struct if_bond_partner_state *  ibps_p;
2751 		partner_state_ref               ps;
2752 
2753 		if (count == 0) {
2754 			break;
2755 		}
2756 		bzero(&ibs, sizeof(ibs));
2757 		strbufcpy(ibs.ibs_if_name, port->po_name);
2758 		ibs.ibs_port_priority = port->po_priority;
2759 		if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
2760 			ibs.ibs_state = port->po_actor_state;
2761 			ibs.ibs_selected_state = port->po_selected;
2762 			ps = &port->po_partner_state;
2763 			ibps_p = &ibs.ibs_partner_state;
2764 			ibps_p->ibps_system = ps->ps_lag_info.li_system;
2765 			ibps_p->ibps_system_priority = ps->ps_lag_info.li_system_priority;
2766 			ibps_p->ibps_key = ps->ps_lag_info.li_key;
2767 			ibps_p->ibps_port = ps->ps_port;
2768 			ibps_p->ibps_port_priority = ps->ps_port_priority;
2769 			ibps_p->ibps_state = ps->ps_state;
2770 		} else {
2771 			/* fake the selected information */
2772 			ibs.ibs_selected_state = bondport_flags_distributing(port)
2773 			    ? SelectedState_SELECTED : SelectedState_UNSELECTED;
2774 		}
2775 		error = copyout(&ibs, dst, sizeof(ibs));
2776 		if (error != 0) {
2777 			break;
2778 		}
2779 		dst += sizeof(ibs);
2780 		count--;
2781 	}
2782 
2783 done:
2784 	if (error == 0) {
2785 		error = copyout(ibr_p, datap, sizeof(*ibr_p));
2786 	} else {
2787 		(void)copyout(ibr_p, datap, sizeof(*ibr_p));
2788 	}
2789 	return error;
2790 }
2791 
2792 static int
bond_set_promisc(struct ifnet * ifp)2793 bond_set_promisc(struct ifnet * ifp)
2794 {
2795 	int                 error = 0;
2796 	ifbond_ref          ifb;
2797 	bool                is_promisc;
2798 	bondport_ref        p;
2799 	int                 val;
2800 
2801 	is_promisc = (ifnet_flags(ifp) & IFF_PROMISC) != 0;
2802 
2803 	/* determine whether promiscuous state needs to be changed */
2804 	bond_lock();
2805 	ifb = (ifbond_ref)ifnet_softc(ifp);
2806 	if (ifb == NULL) {
2807 		bond_unlock();
2808 		error = EBUSY;
2809 		goto done;
2810 	}
2811 	if (is_promisc == ifbond_flags_promisc(ifb)) {
2812 		/* already in the right state */
2813 		bond_unlock();
2814 		goto done;
2815 	}
2816 	ifbond_retain(ifb);
2817 	ifbond_wait(ifb, __func__);
2818 	if (ifbond_flags_if_detaching(ifb)) {
2819 		/* someone destroyed the bond while we were waiting */
2820 		error = EBUSY;
2821 		goto signal_done;
2822 	}
2823 	bond_unlock();
2824 
2825 	/* update the promiscuous state of each memeber */
2826 	val = is_promisc ? 1 : 0;
2827 	TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
2828 		struct ifnet *  port_ifp = p->po_ifp;
2829 		bool            port_is_promisc;
2830 
2831 		port_is_promisc = uint32_bit_is_set(p->po_control_flags,
2832 		    PORT_CONTROL_FLAGS_BOND_PROMISCUOUS_SET);
2833 		if (port_is_promisc == is_promisc) {
2834 			/* already in the right state */
2835 			continue;
2836 		}
2837 		error = ifnet_set_promiscuous(port_ifp, val);
2838 		if (error != 0) {
2839 			BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
2840 			    "%s: ifnet_set_promiscuous(%s, %d): failed %d",
2841 			    ifb->ifb_name, port_ifp->if_xname, val, error);
2842 			continue;
2843 		}
2844 		BOND_LOG(LOG_DEBUG, BD_DBGF_LIFECYCLE,
2845 		    "%s: ifnet_set_promiscuous(%s, %d): succeeded",
2846 		    ifb->ifb_name, port_ifp->if_xname, val);
2847 		if (is_promisc) {
2848 			/* remember that we set it */
2849 			uint32_bit_set(&p->po_control_flags,
2850 			    PORT_CONTROL_FLAGS_BOND_PROMISCUOUS_SET);
2851 		} else {
2852 			uint32_bit_clear(&p->po_control_flags,
2853 			    PORT_CONTROL_FLAGS_BOND_PROMISCUOUS_SET);
2854 		}
2855 	}
2856 
2857 	/* assume that updating promiscuous state succeeded */
2858 	error = 0;
2859 	bond_lock();
2860 
2861 	/* update our internal state */
2862 	if (is_promisc) {
2863 		ifbond_flags_set_promisc(ifb);
2864 	} else {
2865 		ifbond_flags_clear_promisc(ifb);
2866 	}
2867 
2868 signal_done:
2869 	ifbond_signal(ifb, __func__);
2870 	bond_unlock();
2871 	ifbond_release(ifb);
2872 
2873 done:
2874 	return error;
2875 }
2876 
2877 static void
bond_get_mtu_values(ifbond_ref ifb,int * ret_min,int * ret_max)2878 bond_get_mtu_values(ifbond_ref ifb, int * ret_min, int * ret_max)
2879 {
2880 	int                         mtu_min = 0;
2881 	int                         mtu_max = 0;
2882 	bondport_ref                p;
2883 
2884 	if (TAILQ_FIRST(&ifb->ifb_port_list) != NULL) {
2885 		mtu_min = IF_MINMTU;
2886 	}
2887 	TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
2888 		struct ifdevmtu *       devmtu_p = &p->po_devmtu;
2889 
2890 		if (devmtu_p->ifdm_min > mtu_min) {
2891 			mtu_min = devmtu_p->ifdm_min;
2892 		}
2893 		if (mtu_max == 0 || devmtu_p->ifdm_max < mtu_max) {
2894 			mtu_max = devmtu_p->ifdm_max;
2895 		}
2896 	}
2897 	*ret_min = mtu_min;
2898 	*ret_max = mtu_max;
2899 	return;
2900 }
2901 
2902 static int
bond_set_mtu_on_ports(ifbond_ref ifb,int mtu)2903 bond_set_mtu_on_ports(ifbond_ref ifb, int mtu)
2904 {
2905 	int                         error = 0;
2906 	bondport_ref                p;
2907 
2908 	TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
2909 		error = siocsifmtu(p->po_ifp, mtu);
2910 		if (error != 0) {
2911 			BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
2912 			    "%s: SIOCSIFMTU %s failed, %d",
2913 			    ifb->ifb_name, bondport_get_name(p), error);
2914 			break;
2915 		}
2916 	}
2917 	return error;
2918 }
2919 
2920 static int
bond_set_mtu(struct ifnet * ifp,int mtu,int isdevmtu)2921 bond_set_mtu(struct ifnet * ifp, int mtu, int isdevmtu)
2922 {
2923 	int                 error = 0;
2924 	ifbond_ref          ifb;
2925 	int                 mtu_min;
2926 	int                 mtu_max;
2927 	int                 new_max;
2928 	int                 old_max;
2929 
2930 	bond_lock();
2931 	ifb = (ifbond_ref)ifnet_softc(ifp);
2932 	if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
2933 		error = (ifb == NULL) ? EOPNOTSUPP : EBUSY;
2934 		goto done;
2935 	}
2936 	ifbond_retain(ifb);
2937 	ifbond_wait(ifb, "bond_set_mtu");
2938 
2939 	/* check again */
2940 	if (ifnet_softc(ifp) == NULL || ifbond_flags_if_detaching(ifb)) {
2941 		error = EBUSY;
2942 		goto signal_done;
2943 	}
2944 	bond_get_mtu_values(ifb, &mtu_min, &mtu_max);
2945 	if (mtu > mtu_max) {
2946 		error = EINVAL;
2947 		goto signal_done;
2948 	}
2949 	if (mtu < mtu_min && (isdevmtu == 0 || mtu != 0)) {
2950 		/* allow SIOCSIFALTMTU to set the mtu to 0 */
2951 		error = EINVAL;
2952 		goto signal_done;
2953 	}
2954 	if (isdevmtu) {
2955 		new_max = (mtu > (int)ifnet_mtu(ifp)) ? mtu : (int)ifnet_mtu(ifp);
2956 	} else {
2957 		new_max = (mtu > ifb->ifb_altmtu) ? mtu : ifb->ifb_altmtu;
2958 	}
2959 	old_max = ((int)ifnet_mtu(ifp) > ifb->ifb_altmtu)
2960 	    ? (int)ifnet_mtu(ifp) : ifb->ifb_altmtu;
2961 	if (new_max != old_max) {
2962 		/* we can safely walk the list of port without the lock held */
2963 		bond_unlock();
2964 		error = bond_set_mtu_on_ports(ifb, new_max);
2965 		if (error != 0) {
2966 			/* try our best to back out of it */
2967 			(void)bond_set_mtu_on_ports(ifb, old_max);
2968 		}
2969 		bond_lock();
2970 	}
2971 	if (error == 0) {
2972 		if (isdevmtu) {
2973 			ifb->ifb_altmtu = mtu;
2974 		} else {
2975 			ifnet_set_mtu(ifp, mtu);
2976 		}
2977 	}
2978 
2979 signal_done:
2980 	ifbond_signal(ifb, __func__);
2981 	ifbond_release(ifb);
2982 
2983 done:
2984 	bond_unlock();
2985 	return error;
2986 }
2987 
2988 static int
bond_ioctl(struct ifnet * ifp,u_long cmd,void * data)2989 bond_ioctl(struct ifnet *ifp, u_long cmd, void * data)
2990 {
2991 	int                 error = 0;
2992 	struct if_bond_req  ibr;
2993 	struct ifaddr *     ifa;
2994 	ifbond_ref          ifb;
2995 	struct ifreq *      ifr;
2996 	struct ifmediareq32 * ifmr;
2997 	struct ifnet *      port_ifp = NULL;
2998 	user_addr_t         user_addr;
2999 
3000 	if (ifnet_type(ifp) != IFT_IEEE8023ADLAG) {
3001 		return EOPNOTSUPP;
3002 	}
3003 	ifr = (struct ifreq *)data;
3004 	ifa = (struct ifaddr *)data;
3005 
3006 	switch (cmd) {
3007 	case SIOCSIFADDR:
3008 		ifnet_set_flags(ifp, IFF_UP, IFF_UP);
3009 		break;
3010 
3011 	case SIOCGIFMEDIA32:
3012 	case SIOCGIFMEDIA64:
3013 		bond_lock();
3014 		ifb = (ifbond_ref)ifnet_softc(ifp);
3015 		if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
3016 			bond_unlock();
3017 			return ifb == NULL ? EOPNOTSUPP : EBUSY;
3018 		}
3019 		ifmr = (struct ifmediareq32 *)data;
3020 		ifmr->ifm_current = IFM_ETHER;
3021 		ifmr->ifm_mask = 0;
3022 		ifmr->ifm_status = IFM_AVALID;
3023 		ifmr->ifm_active = IFM_ETHER;
3024 		ifmr->ifm_count = 1;
3025 		if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
3026 			if (ifb->ifb_active_lag != NULL) {
3027 				ifmr->ifm_active = ifb->ifb_active_lag->lag_active_media;
3028 				ifmr->ifm_status |= IFM_ACTIVE;
3029 			}
3030 		} else if (ifb->ifb_distributing_count > 0) {
3031 			ifmr->ifm_active
3032 			        = ifb->ifb_distributing_array[0]->po_media_info.mi_active;
3033 			ifmr->ifm_status |= IFM_ACTIVE;
3034 		}
3035 		bond_unlock();
3036 		user_addr = (cmd == SIOCGIFMEDIA64) ?
3037 		    ((struct ifmediareq64 *)data)->ifmu_ulist :
3038 		    CAST_USER_ADDR_T(((struct ifmediareq32 *)data)->ifmu_ulist);
3039 		if (user_addr != USER_ADDR_NULL) {
3040 			error = copyout(&ifmr->ifm_current,
3041 			    user_addr,
3042 			    sizeof(int));
3043 		}
3044 		break;
3045 
3046 	case SIOCSIFMEDIA:
3047 		/* XXX send the SIFMEDIA to all children?  Or force autoselect? */
3048 		error = EINVAL;
3049 		break;
3050 
3051 	case SIOCGIFDEVMTU:
3052 		bond_lock();
3053 		ifb = (ifbond_ref)ifnet_softc(ifp);
3054 		if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
3055 			bond_unlock();
3056 			error = (ifb == NULL) ? EOPNOTSUPP : EBUSY;
3057 			break;
3058 		}
3059 		ifr->ifr_devmtu.ifdm_current = bond_device_mtu(ifp, ifb);
3060 		bond_get_mtu_values(ifb, &ifr->ifr_devmtu.ifdm_min,
3061 		    &ifr->ifr_devmtu.ifdm_max);
3062 		bond_unlock();
3063 		break;
3064 
3065 	case SIOCGIFALTMTU:
3066 		bond_lock();
3067 		ifb = (ifbond_ref)ifnet_softc(ifp);
3068 		if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
3069 			bond_unlock();
3070 			error = (ifb == NULL) ? EOPNOTSUPP : EBUSY;
3071 			break;
3072 		}
3073 		ifr->ifr_mtu = ifb->ifb_altmtu;
3074 		bond_unlock();
3075 		break;
3076 
3077 	case SIOCSIFALTMTU:
3078 		error = bond_set_mtu(ifp, ifr->ifr_mtu, 1);
3079 		break;
3080 
3081 	case SIOCSIFMTU:
3082 		error = bond_set_mtu(ifp, ifr->ifr_mtu, 0);
3083 		break;
3084 
3085 	case SIOCSIFBOND:
3086 		user_addr = proc_is64bit(current_proc())
3087 		    ? ifr->ifr_data64 : CAST_USER_ADDR_T(ifr->ifr_data);
3088 		error = copyin(user_addr, &ibr, sizeof(ibr));
3089 		if (error) {
3090 			break;
3091 		}
3092 		switch (ibr.ibr_op) {
3093 		case IF_BOND_OP_ADD_INTERFACE:
3094 		case IF_BOND_OP_REMOVE_INTERFACE:
3095 			port_ifp = ifunit(__unsafe_null_terminated_from_indexable(ibr.ibr_ibru.ibru_if_name));
3096 			if (port_ifp == NULL) {
3097 				error = ENXIO;
3098 				break;
3099 			}
3100 			if (ifnet_type(port_ifp) != IFT_ETHER) {
3101 				error = EPROTONOSUPPORT;
3102 				break;
3103 			}
3104 			break;
3105 		case IF_BOND_OP_SET_MODE:
3106 			break;
3107 		default:
3108 			error = EOPNOTSUPP;
3109 			break;
3110 		}
3111 		if (error != 0) {
3112 			break;
3113 		}
3114 		switch (ibr.ibr_op) {
3115 		case IF_BOND_OP_ADD_INTERFACE:
3116 			error = bond_add_interface(ifp, port_ifp);
3117 			break;
3118 		case IF_BOND_OP_REMOVE_INTERFACE:
3119 			bond_lock();
3120 			ifb = (ifbond_ref)ifnet_softc(ifp);
3121 			if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
3122 				bond_unlock();
3123 				return ifb == NULL ? EOPNOTSUPP : EBUSY;
3124 			}
3125 			error = bond_remove_interface(ifb, port_ifp);
3126 			bond_unlock();
3127 			break;
3128 		case IF_BOND_OP_SET_MODE:
3129 			switch (ibr.ibr_ibru.ibru_int_val) {
3130 			case IF_BOND_MODE_LACP:
3131 			case IF_BOND_MODE_STATIC:
3132 				break;
3133 			default:
3134 				error = EINVAL;
3135 				break;
3136 			}
3137 			if (error != 0) {
3138 				break;
3139 			}
3140 			error = bond_set_mode(ifp, ibr.ibr_ibru.ibru_int_val);
3141 			break;
3142 		}
3143 		break; /* SIOCSIFBOND */
3144 
3145 	case SIOCGIFBOND:
3146 		user_addr = proc_is64bit(current_proc())
3147 		    ? ifr->ifr_data64 : CAST_USER_ADDR_T(ifr->ifr_data);
3148 		error = copyin(user_addr, &ibr, sizeof(ibr));
3149 		if (error) {
3150 			break;
3151 		}
3152 		switch (ibr.ibr_op) {
3153 		case IF_BOND_OP_GET_STATUS:
3154 			break;
3155 		default:
3156 			error = EOPNOTSUPP;
3157 			break;
3158 		}
3159 		if (error != 0) {
3160 			break;
3161 		}
3162 		bond_lock();
3163 		ifb = (ifbond_ref)ifnet_softc(ifp);
3164 		if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
3165 			bond_unlock();
3166 			return ifb == NULL ? EOPNOTSUPP : EBUSY;
3167 		}
3168 		switch (ibr.ibr_op) {
3169 		case IF_BOND_OP_GET_STATUS:
3170 			error = bond_get_status(ifb, &ibr, user_addr);
3171 			break;
3172 		}
3173 		bond_unlock();
3174 		break; /* SIOCGIFBOND */
3175 
3176 	case SIOCSIFLLADDR:
3177 		error = EOPNOTSUPP;
3178 		break;
3179 
3180 	case SIOCSIFFLAGS:
3181 		/* enable promiscuous mode on members */
3182 		error = bond_set_promisc(ifp);
3183 		break;
3184 
3185 	case SIOCADDMULTI:
3186 	case SIOCDELMULTI:
3187 		error = bond_setmulti(ifp);
3188 		break;
3189 	default:
3190 		error = EOPNOTSUPP;
3191 	}
3192 	return error;
3193 }
3194 
3195 static void
bond_if_free(struct ifnet * ifp)3196 bond_if_free(struct ifnet * ifp)
3197 {
3198 	ifbond_ref  ifb;
3199 
3200 	if (ifp == NULL) {
3201 		return;
3202 	}
3203 	bond_lock();
3204 	ifb = (ifbond_ref)ifnet_softc(ifp);
3205 	if (ifb == NULL) {
3206 		bond_unlock();
3207 		return;
3208 	}
3209 	ifbond_release(ifb);
3210 	bond_unlock();
3211 	ifnet_release(ifp);
3212 	return;
3213 }
3214 
3215 static void
bond_handle_event(struct ifnet * port_ifp,int event_code)3216 bond_handle_event(struct ifnet * port_ifp, int event_code)
3217 {
3218 	struct ifnet *      bond_ifp = NULL;
3219 	ifbond_ref          ifb;
3220 	int                 old_distributing_count;
3221 	bondport_ref        p;
3222 	struct media_info   media_info = { .mi_active = 0, .mi_status = 0 };
3223 
3224 	switch (event_code) {
3225 	case KEV_DL_IF_DETACHED:
3226 	case KEV_DL_IF_DETACHING:
3227 		break;
3228 	case KEV_DL_LINK_OFF:
3229 	case KEV_DL_LINK_ON:
3230 		media_info = interface_media_info(port_ifp);
3231 		break;
3232 	default:
3233 		return;
3234 	}
3235 	bond_lock();
3236 	p = bond_lookup_port(port_ifp);
3237 	if (p == NULL) {
3238 		bond_unlock();
3239 		return;
3240 	}
3241 	ifb = p->po_bond;
3242 	old_distributing_count = ifb->ifb_distributing_count;
3243 	switch (event_code) {
3244 	case KEV_DL_IF_DETACHED:
3245 	case KEV_DL_IF_DETACHING:
3246 		bond_remove_interface(ifb, p->po_ifp);
3247 		break;
3248 	case KEV_DL_LINK_OFF:
3249 	case KEV_DL_LINK_ON:
3250 		p->po_media_info = media_info;
3251 		if (p->po_enabled) {
3252 			bondport_link_status_changed(p);
3253 		}
3254 		break;
3255 	}
3256 	/* generate a link-event */
3257 	if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
3258 		if (ifbond_selection(ifb)) {
3259 			event_code = (ifb->ifb_active_lag == NULL)
3260 			    ? KEV_DL_LINK_OFF
3261 			    : KEV_DL_LINK_ON;
3262 			/* XXX need to take a reference on bond_ifp */
3263 			bond_ifp = ifb->ifb_ifp;
3264 			ifb->ifb_last_link_event = event_code;
3265 		} else {
3266 			event_code = (ifb->ifb_active_lag == NULL)
3267 			    ? KEV_DL_LINK_OFF
3268 			    : KEV_DL_LINK_ON;
3269 			if (event_code != ifb->ifb_last_link_event) {
3270 				BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
3271 				    "%s: (event) generating LINK event",
3272 				    ifb->ifb_name);
3273 				bond_ifp = ifb->ifb_ifp;
3274 				ifb->ifb_last_link_event = event_code;
3275 			}
3276 		}
3277 	} else {
3278 		/*
3279 		 * if the distributing array membership changed from 0 <-> !0
3280 		 * generate a link event
3281 		 */
3282 		if (old_distributing_count == 0
3283 		    && ifb->ifb_distributing_count != 0) {
3284 			event_code = KEV_DL_LINK_ON;
3285 		} else if (old_distributing_count != 0
3286 		    && ifb->ifb_distributing_count == 0) {
3287 			event_code = KEV_DL_LINK_OFF;
3288 		}
3289 		if (event_code != 0 && event_code != ifb->ifb_last_link_event) {
3290 			bond_ifp = ifb->ifb_ifp;
3291 			ifb->ifb_last_link_event = event_code;
3292 		}
3293 	}
3294 
3295 	bond_unlock();
3296 	if (bond_ifp != NULL) {
3297 		interface_link_event(bond_ifp, event_code);
3298 	}
3299 	return;
3300 }
3301 
3302 static void
bond_event(struct ifnet * port_ifp,__unused protocol_family_t protocol,const struct kev_msg * event)3303 bond_event(struct ifnet * port_ifp, __unused protocol_family_t protocol,
3304     const struct kev_msg * event)
3305 {
3306 	int         event_code;
3307 
3308 	if (event->vendor_code != KEV_VENDOR_APPLE
3309 	    || event->kev_class != KEV_NETWORK_CLASS
3310 	    || event->kev_subclass != KEV_DL_SUBCLASS) {
3311 		return;
3312 	}
3313 	event_code = event->event_code;
3314 	switch (event_code) {
3315 	case KEV_DL_LINK_OFF:
3316 	case KEV_DL_LINK_ON:
3317 	case KEV_DL_IF_DETACHING:
3318 	case KEV_DL_IF_DETACHED:
3319 		bond_handle_event(port_ifp, event_code);
3320 		break;
3321 	default:
3322 		break;
3323 	}
3324 	return;
3325 }
3326 
3327 static errno_t
bond_detached(ifnet_t port_ifp,__unused protocol_family_t protocol)3328 bond_detached(ifnet_t port_ifp, __unused protocol_family_t protocol)
3329 {
3330 	bond_handle_event(port_ifp, KEV_DL_IF_DETACHED);
3331 	return 0;
3332 }
3333 
3334 static void
interface_link_event(struct ifnet * ifp,u_int32_t event_code)3335 interface_link_event(struct ifnet * ifp, u_int32_t event_code)
3336 {
3337 	struct event {
3338 		u_int32_t ifnet_family;
3339 		u_int32_t unit;
3340 		char if_name[IFNAMSIZ];
3341 	};
3342 	_Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
3343 	struct kern_event_msg *header = (struct kern_event_msg*)message;
3344 	struct event *data = (struct event *)(message + KEV_MSG_HEADER_SIZE);
3345 
3346 	header->total_size   = sizeof(message);
3347 	header->vendor_code  = KEV_VENDOR_APPLE;
3348 	header->kev_class    = KEV_NETWORK_CLASS;
3349 	header->kev_subclass = KEV_DL_SUBCLASS;
3350 	header->event_code   = event_code;
3351 	data->ifnet_family   = ifnet_family(ifp);
3352 	data->unit           = (u_int32_t)ifnet_unit(ifp);
3353 	strlcpy(data->if_name, ifnet_name(ifp), sizeof(data->if_name));
3354 	ifnet_event(ifp, header);
3355 }
3356 
3357 
3358 /*
3359  * Function: bond_attach_protocol
3360  * Purpose:
3361  *   Attach a DLIL protocol to the interface.
3362  *
3363  *   The ethernet demux special cases to always return PF_BOND if the
3364  *   interface is bonded.  That means we receive all traffic from that
3365  *   interface without passing any of the traffic to any other attached
3366  *   protocol.
3367  */
3368 static int
bond_attach_protocol(struct ifnet * ifp)3369 bond_attach_protocol(struct ifnet *ifp)
3370 {
3371 	int                                 error;
3372 	struct ifnet_attach_proto_param_v2  reg;
3373 
3374 	bzero(&reg, sizeof(reg));
3375 	reg.input = bond_input;
3376 	reg.event = bond_event;
3377 	reg.detached = bond_detached;
3378 
3379 	error = ifnet_attach_protocol_v2(ifp, PF_BOND, &reg);
3380 	if (error != 0) {
3381 		BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
3382 		    "%s: ifnet_attach_protocol failed, %d",
3383 		    ifp->if_xname, error);
3384 	}
3385 	return error;
3386 }
3387 
3388 /*
3389  * Function: bond_detach_protocol
3390  * Purpose:
3391  *   Detach our DLIL protocol from an interface
3392  */
3393 static int
bond_detach_protocol(struct ifnet * ifp)3394 bond_detach_protocol(struct ifnet *ifp)
3395 {
3396 	int         error;
3397 
3398 	error = ifnet_detach_protocol(ifp, PF_BOND);
3399 	if (error != 0) {
3400 		BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
3401 		    "%s: ifnet_detach_protocol failed, %d",
3402 		    ifp->if_xname, error);
3403 	}
3404 	return error;
3405 }
3406 
3407 
3408 /*
3409  * DLIL interface family functions
3410  */
3411 extern int ether_attach_inet(ifnet_t ifp, protocol_family_t protocol_family);
3412 extern void ether_detach_inet(ifnet_t ifp, protocol_family_t protocol_family);
3413 extern int ether_attach_inet6(ifnet_t ifp, protocol_family_t protocol_family);
3414 extern void ether_detach_inet6(ifnet_t ifp, protocol_family_t protocol_family);
3415 extern int ether_attach_at(ifnet_t ifp, protocol_family_t protocol_family);
3416 extern void ether_detach_at(ifnet_t ifp, protocol_family_t protocol_family);
3417 
3418 __private_extern__ int
bond_family_init(void)3419 bond_family_init(void)
3420 {
3421 	int error = 0;
3422 
3423 	error = proto_register_plumber(PF_INET, APPLE_IF_FAM_BOND,
3424 	    ether_attach_inet,
3425 	    ether_detach_inet);
3426 	if (error != 0) {
3427 		BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
3428 		    "proto_register_plumber failed for AF_INET error %d",
3429 		    error);
3430 		goto done;
3431 	}
3432 	error = proto_register_plumber(PF_INET6, APPLE_IF_FAM_BOND,
3433 	    ether_attach_inet6,
3434 	    ether_detach_inet6);
3435 	if (error != 0) {
3436 		BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
3437 		    "proto_register_plumber failed for AF_INET6 error %d",
3438 		    error);
3439 		goto done;
3440 	}
3441 	error = bond_clone_attach();
3442 	if (error != 0) {
3443 		BOND_LOG(LOG_NOTICE, BD_DBGF_LIFECYCLE,
3444 		    "bond_clone_attach error %d",
3445 		    error);
3446 		goto done;
3447 	}
3448 
3449 done:
3450 	return error;
3451 }
3452 /**
3453 **
3454 ** LACP routines:
3455 **
3456 **/
3457 
3458 /**
3459 ** LACP ifbond_list routines
3460 **/
3461 static bondport_ref
ifbond_list_find_moved_port(bondport_ref rx_port,const lacp_actor_partner_tlv_ref atlv)3462 ifbond_list_find_moved_port(bondport_ref rx_port,
3463     const lacp_actor_partner_tlv_ref atlv)
3464 {
3465 	ifbond_ref          bond;
3466 	bondport_ref        p;
3467 	partner_state_ref   ps;
3468 	LAG_info_ref        ps_li;
3469 
3470 	TAILQ_FOREACH(bond, &g_bond->ifbond_list, ifb_bond_list) {
3471 		TAILQ_FOREACH(p, &bond->ifb_port_list, po_port_list) {
3472 			if (rx_port == p) {
3473 				/* no point in comparing against ourselves */
3474 				continue;
3475 			}
3476 			if (p->po_receive_state != ReceiveState_PORT_DISABLED) {
3477 				/* it's not clear that we should be checking this */
3478 				continue;
3479 			}
3480 			ps = &p->po_partner_state;
3481 			if (lacp_actor_partner_state_defaulted(ps->ps_state)) {
3482 				continue;
3483 			}
3484 			ps_li = &ps->ps_lag_info;
3485 			if (ps->ps_port == lacp_actor_partner_tlv_get_port(atlv)
3486 			    && bcmp(&ps_li->li_system, atlv->lap_system,
3487 			    sizeof(ps_li->li_system)) == 0) {
3488 				BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
3489 				    "System " EA_FORMAT
3490 				    " Port 0x%x moved from %s to %s",
3491 				    EA_LIST(&ps_li->li_system), ps->ps_port,
3492 				    bondport_get_name(p),
3493 				    bondport_get_name(rx_port));
3494 				return p;
3495 			}
3496 		}
3497 	}
3498 	return NULL;
3499 }
3500 
3501 /**
3502 ** LACP ifbond, LAG routines
3503 **/
3504 
3505 static int
ifbond_selection(ifbond_ref bond)3506 ifbond_selection(ifbond_ref bond)
3507 {
3508 	int                 all_ports_ready = 0;
3509 	int                 active_media = 0;
3510 	LAG_ref             lag = NULL;
3511 	int                 lag_changed = 0;
3512 	bondport_ref        p;
3513 	int                 port_speed = 0;
3514 
3515 	lag = ifbond_find_best_LAG(bond, &active_media);
3516 	if (lag != bond->ifb_active_lag) {
3517 		if (bond->ifb_active_lag != NULL) {
3518 			ifbond_deactivate_LAG(bond, bond->ifb_active_lag);
3519 			bond->ifb_active_lag = NULL;
3520 		}
3521 		bond->ifb_active_lag = lag;
3522 		if (lag != NULL) {
3523 			ifbond_activate_LAG(bond, lag, active_media);
3524 		}
3525 		lag_changed = 1;
3526 	} else if (lag != NULL) {
3527 		if (lag->lag_active_media != active_media) {
3528 			BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
3529 			    "LAG PORT SPEED CHANGED from %d to %d",
3530 			    link_speed(lag->lag_active_media),
3531 			    link_speed(active_media));
3532 			ifbond_deactivate_LAG(bond, lag);
3533 			ifbond_activate_LAG(bond, lag, active_media);
3534 			lag_changed = 1;
3535 		}
3536 	}
3537 	if (lag != NULL) {
3538 		port_speed = link_speed(active_media);
3539 		all_ports_ready = ifbond_all_ports_ready(bond);
3540 	}
3541 	TAILQ_FOREACH(p, &bond->ifb_port_list, po_port_list) {
3542 		if (lag != NULL && p->po_lag == lag
3543 		    && media_speed(&p->po_media_info) == port_speed
3544 		    && (p->po_mux_state == MuxState_DETACHED
3545 		    || p->po_selected == SelectedState_SELECTED
3546 		    || p->po_selected == SelectedState_STANDBY)
3547 		    && bondport_aggregatable(p)) {
3548 			if (bond->ifb_max_active > 0) {
3549 				if (lag->lag_selected_port_count < bond->ifb_max_active) {
3550 					if (p->po_selected == SelectedState_STANDBY
3551 					    || p->po_selected == SelectedState_UNSELECTED) {
3552 						bondport_set_selected(p, SelectedState_SELECTED);
3553 					}
3554 				} else if (p->po_selected == SelectedState_UNSELECTED) {
3555 					bondport_set_selected(p, SelectedState_STANDBY);
3556 				}
3557 			} else {
3558 				bondport_set_selected(p, SelectedState_SELECTED);
3559 			}
3560 		}
3561 		if (bondport_flags_selected_changed(p)) {
3562 			bondport_flags_clear_selected_changed(p);
3563 			bondport_mux_machine(p, LAEventSelectedChange, NULL);
3564 		}
3565 		if (all_ports_ready
3566 		    && bondport_flags_ready(p)
3567 		    && p->po_mux_state == MuxState_WAITING) {
3568 			bondport_mux_machine(p, LAEventReady, NULL);
3569 		}
3570 		bondport_transmit_machine(p, LAEventStart, NULL);
3571 	}
3572 	return lag_changed;
3573 }
3574 
3575 static LAG_ref
ifbond_find_best_LAG(ifbond_ref bond,int * active_media)3576 ifbond_find_best_LAG(ifbond_ref bond, int * active_media)
3577 {
3578 	int                 best_active = 0;
3579 	LAG_ref             best_lag = NULL;
3580 	int                 best_count = 0;
3581 	int                 best_speed = 0;
3582 	LAG_ref             lag;
3583 
3584 	if (bond->ifb_active_lag != NULL) {
3585 		best_lag = bond->ifb_active_lag;
3586 		best_count = LAG_get_aggregatable_port_count(best_lag, &best_active);
3587 		if (bond->ifb_max_active > 0
3588 		    && best_count > bond->ifb_max_active) {
3589 			best_count = bond->ifb_max_active;
3590 		}
3591 		best_speed = link_speed(best_active);
3592 	}
3593 	TAILQ_FOREACH(lag, &bond->ifb_lag_list, lag_list) {
3594 		int     active;
3595 		int     count;
3596 		int     speed;
3597 
3598 		if (lag == bond->ifb_active_lag) {
3599 			/* we've already computed it */
3600 			continue;
3601 		}
3602 		count = LAG_get_aggregatable_port_count(lag, &active);
3603 		if (count == 0) {
3604 			continue;
3605 		}
3606 		if (bond->ifb_max_active > 0
3607 		    && count > bond->ifb_max_active) {
3608 			/* if there's a limit, don't count extra links */
3609 			count = bond->ifb_max_active;
3610 		}
3611 		speed = link_speed(active);
3612 		if ((count * speed) > (best_count * best_speed)) {
3613 			best_count = count;
3614 			best_speed = speed;
3615 			best_active = active;
3616 			best_lag = lag;
3617 		}
3618 	}
3619 	if (best_count == 0) {
3620 		return NULL;
3621 	}
3622 	*active_media = best_active;
3623 	return best_lag;
3624 }
3625 
3626 static void
ifbond_deactivate_LAG(__unused ifbond_ref bond,LAG_ref lag)3627 ifbond_deactivate_LAG(__unused ifbond_ref bond, LAG_ref lag)
3628 {
3629 	bondport_ref        p;
3630 
3631 	TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) {
3632 		bondport_set_selected(p, SelectedState_UNSELECTED);
3633 	}
3634 	return;
3635 }
3636 
3637 static void
ifbond_activate_LAG(ifbond_ref bond,LAG_ref lag,int active_media)3638 ifbond_activate_LAG(ifbond_ref bond, LAG_ref lag, int active_media)
3639 {
3640 	int                 need = 0;
3641 	bondport_ref        p;
3642 
3643 	if (bond->ifb_max_active > 0) {
3644 		need = bond->ifb_max_active;
3645 	}
3646 	lag->lag_active_media = active_media;
3647 	TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) {
3648 		if (bondport_aggregatable(p) == 0) {
3649 			bondport_set_selected(p, SelectedState_UNSELECTED);
3650 		} else if (media_speed(&p->po_media_info) != link_speed(active_media)) {
3651 			bondport_set_selected(p, SelectedState_UNSELECTED);
3652 		} else if (p->po_mux_state == MuxState_DETACHED) {
3653 			if (bond->ifb_max_active > 0) {
3654 				if (need > 0) {
3655 					bondport_set_selected(p, SelectedState_SELECTED);
3656 					need--;
3657 				} else {
3658 					bondport_set_selected(p, SelectedState_STANDBY);
3659 				}
3660 			} else {
3661 				bondport_set_selected(p, SelectedState_SELECTED);
3662 			}
3663 		} else {
3664 			bondport_set_selected(p, SelectedState_UNSELECTED);
3665 		}
3666 	}
3667 	return;
3668 }
3669 
3670 #if 0
3671 static void
3672 ifbond_set_max_active(ifbond_ref bond, int max_active)
3673 {
3674 	LAG_ref     lag = bond->ifb_active_lag;
3675 
3676 	bond->ifb_max_active = max_active;
3677 	if (bond->ifb_max_active <= 0 || lag == NULL) {
3678 		return;
3679 	}
3680 	if (lag->lag_selected_port_count > bond->ifb_max_active) {
3681 		bondport_ref    p;
3682 		int                     remove_count;
3683 
3684 		remove_count = lag->lag_selected_port_count - bond->ifb_max_active;
3685 		TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) {
3686 			if (p->po_selected == SelectedState_SELECTED) {
3687 				bondport_set_selected(p, SelectedState_UNSELECTED);
3688 				remove_count--;
3689 				if (remove_count == 0) {
3690 					break;
3691 				}
3692 			}
3693 		}
3694 	}
3695 	return;
3696 }
3697 #endif
3698 
3699 static int
ifbond_all_ports_ready(ifbond_ref bond)3700 ifbond_all_ports_ready(ifbond_ref bond)
3701 {
3702 	int                 ready = 0;
3703 	bondport_ref        p;
3704 
3705 	if (bond->ifb_active_lag == NULL) {
3706 		return 0;
3707 	}
3708 	TAILQ_FOREACH(p, &bond->ifb_active_lag->lag_port_list, po_lag_port_list) {
3709 		if (p->po_mux_state == MuxState_WAITING
3710 		    && p->po_selected == SelectedState_SELECTED) {
3711 			if (bondport_flags_ready(p) == 0) {
3712 				return 0;
3713 			}
3714 		}
3715 		/* note that there was at least one ready port */
3716 		ready = 1;
3717 	}
3718 	return ready;
3719 }
3720 
3721 static int
ifbond_all_ports_attached(ifbond_ref bond,bondport_ref this_port)3722 ifbond_all_ports_attached(ifbond_ref bond, bondport_ref this_port)
3723 {
3724 	bondport_ref        p;
3725 
3726 	TAILQ_FOREACH(p, &bond->ifb_port_list, po_port_list) {
3727 		if (this_port == p) {
3728 			continue;
3729 		}
3730 		if (bondport_flags_mux_attached(p) == 0) {
3731 			return 0;
3732 		}
3733 	}
3734 	return 1;
3735 }
3736 
3737 static LAG_ref
ifbond_get_LAG_matching_port(ifbond_ref bond,bondport_ref p)3738 ifbond_get_LAG_matching_port(ifbond_ref bond, bondport_ref p)
3739 {
3740 	LAG_ref     lag;
3741 
3742 	TAILQ_FOREACH(lag, &bond->ifb_lag_list, lag_list) {
3743 		if (bcmp(&lag->lag_info, &p->po_partner_state.ps_lag_info,
3744 		    sizeof(lag->lag_info)) == 0) {
3745 			return lag;
3746 		}
3747 	}
3748 	return NULL;
3749 }
3750 
3751 static int
LAG_get_aggregatable_port_count(LAG_ref lag,int * active_media)3752 LAG_get_aggregatable_port_count(LAG_ref lag, int * active_media)
3753 {
3754 	int                 active;
3755 	int                 count;
3756 	bondport_ref        p;
3757 	int                 speed;
3758 
3759 	active = 0;
3760 	count = 0;
3761 	speed = 0;
3762 	TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) {
3763 		if (bondport_aggregatable(p)) {
3764 			int this_speed;
3765 
3766 			this_speed = media_speed(&p->po_media_info);
3767 			if (this_speed == 0) {
3768 				continue;
3769 			}
3770 			if (this_speed > speed) {
3771 				active = p->po_media_info.mi_active;
3772 				speed = this_speed;
3773 				count = 1;
3774 			} else if (this_speed == speed) {
3775 				count++;
3776 			}
3777 		}
3778 	}
3779 	*active_media = active;
3780 	return count;
3781 }
3782 
3783 
3784 /**
3785 ** LACP bondport routines
3786 **/
3787 static void
bondport_link_status_changed(bondport_ref p)3788 bondport_link_status_changed(bondport_ref p)
3789 {
3790 	ifbond_ref  bond = p->po_bond;
3791 
3792 	if (if_bond_debug) {
3793 		if (media_active(&p->po_media_info)) {
3794 			const char * duplex_string;
3795 
3796 			if (media_full_duplex(&p->po_media_info)) {
3797 				duplex_string = "full";
3798 			} else if (media_type_unknown(&p->po_media_info)) {
3799 				duplex_string = "unknown";
3800 			} else {
3801 				duplex_string = "half";
3802 			}
3803 			BOND_LOG(LOG_NOTICE, BD_DBGF_LACP,
3804 			    "[%s] Link UP %d Mbit/s %s duplex",
3805 			    bondport_get_name(p),
3806 			    media_speed(&p->po_media_info),
3807 			    duplex_string);
3808 		} else {
3809 			BOND_LOG(LOG_NOTICE, BD_DBGF_LACP,
3810 			    "[%s] Link DOWN", bondport_get_name(p));
3811 		}
3812 	}
3813 	if (bond->ifb_mode == IF_BOND_MODE_LACP) {
3814 		if (media_active(&p->po_media_info)
3815 		    && bond->ifb_active_lag != NULL
3816 		    && p->po_lag == bond->ifb_active_lag
3817 		    && p->po_selected != SelectedState_UNSELECTED) {
3818 			if (media_speed(&p->po_media_info) != p->po_lag->lag_active_media) {
3819 				BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
3820 				    "[%s] Port speed %d differs from LAG %d",
3821 				    bondport_get_name(p),
3822 				    media_speed(&p->po_media_info),
3823 				    link_speed(p->po_lag->lag_active_media));
3824 				bondport_set_selected(p, SelectedState_UNSELECTED);
3825 			}
3826 		}
3827 		bondport_receive_machine(p, LAEventMediaChange, NULL);
3828 		bondport_mux_machine(p, LAEventMediaChange, NULL);
3829 		bondport_periodic_transmit_machine(p, LAEventMediaChange, NULL);
3830 	} else {
3831 		if (media_active(&p->po_media_info)) {
3832 			bondport_enable_distributing(p);
3833 		} else {
3834 			bondport_disable_distributing(p);
3835 		}
3836 	}
3837 	return;
3838 }
3839 
3840 static int
bondport_aggregatable(bondport_ref p)3841 bondport_aggregatable(bondport_ref p)
3842 {
3843 	partner_state_ref   ps = &p->po_partner_state;
3844 
3845 	if (lacp_actor_partner_state_aggregatable(p->po_actor_state) == 0
3846 	    || lacp_actor_partner_state_aggregatable(ps->ps_state) == 0) {
3847 		/* we and/or our partner are individual */
3848 		return 0;
3849 	}
3850 	if (p->po_lag == NULL) {
3851 		return 0;
3852 	}
3853 	switch (p->po_receive_state) {
3854 	default:
3855 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
3856 		    "[%s] Port is not selectable",
3857 		    bondport_get_name(p));
3858 		return 0;
3859 	case ReceiveState_CURRENT:
3860 	case ReceiveState_EXPIRED:
3861 		break;
3862 	}
3863 	return 1;
3864 }
3865 
3866 static int
bondport_matches_LAG(bondport_ref p,LAG_ref lag)3867 bondport_matches_LAG(bondport_ref p, LAG_ref lag)
3868 {
3869 	LAG_info_ref        lag_li;
3870 	partner_state_ref   ps;
3871 	LAG_info_ref        ps_li;
3872 
3873 	ps = &p->po_partner_state;
3874 	ps_li = &ps->ps_lag_info;
3875 	lag_li = &lag->lag_info;
3876 	if (ps_li->li_system_priority == lag_li->li_system_priority
3877 	    && ps_li->li_key == lag_li->li_key
3878 	    && (bcmp(&ps_li->li_system, &lag_li->li_system,
3879 	    sizeof(lag_li->li_system))
3880 	    == 0)) {
3881 		return 1;
3882 	}
3883 	return 0;
3884 }
3885 
3886 static int
bondport_remove_from_LAG(bondport_ref p)3887 bondport_remove_from_LAG(bondport_ref p)
3888 {
3889 	int         active_lag = 0;
3890 	ifbond_ref  bond = p->po_bond;
3891 	LAG_ref     lag = p->po_lag;
3892 
3893 	if (lag == NULL) {
3894 		return 0;
3895 	}
3896 	TAILQ_REMOVE(&lag->lag_port_list, p, po_lag_port_list);
3897 	BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
3898 	    "[%s] Removed from LAG (0x%04x," EA_FORMAT ",0x%04x)",
3899 	    bondport_get_name(p), lag->lag_info.li_system_priority,
3900 	    EA_LIST(&lag->lag_info.li_system), lag->lag_info.li_key);
3901 	p->po_lag = NULL;
3902 	lag->lag_port_count--;
3903 	if (lag->lag_port_count > 0) {
3904 		return bond->ifb_active_lag == lag;
3905 	}
3906 	BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
3907 	    "Key 0x%04x: LAG Released (%04x," EA_FORMAT ",0x%04x)",
3908 	    bond->ifb_key,
3909 	    lag->lag_info.li_system_priority,
3910 	    EA_LIST(&lag->lag_info.li_system),
3911 	    lag->lag_info.li_key);
3912 	TAILQ_REMOVE(&bond->ifb_lag_list, lag, lag_list);
3913 	if (bond->ifb_active_lag == lag) {
3914 		bond->ifb_active_lag = NULL;
3915 		active_lag = 1;
3916 	}
3917 	kfree_type(struct LAG_s, lag);
3918 	return active_lag;
3919 }
3920 
3921 static void
bondport_add_to_LAG(bondport_ref p,LAG_ref lag)3922 bondport_add_to_LAG(bondport_ref p, LAG_ref lag)
3923 {
3924 	TAILQ_INSERT_TAIL(&lag->lag_port_list, p, po_lag_port_list);
3925 	p->po_lag = lag;
3926 	lag->lag_port_count++;
3927 	BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
3928 	    "[%s] Added to LAG (0x%04x," EA_FORMAT "0x%04x)",
3929 	    bondport_get_name(p),
3930 	    lag->lag_info.li_system_priority,
3931 	    EA_LIST(&lag->lag_info.li_system),
3932 	    lag->lag_info.li_key);
3933 	return;
3934 }
3935 
3936 static void
bondport_assign_to_LAG(bondport_ref p)3937 bondport_assign_to_LAG(bondport_ref p)
3938 {
3939 	ifbond_ref  bond = p->po_bond;
3940 	LAG_ref     lag;
3941 
3942 	if (lacp_actor_partner_state_defaulted(p->po_actor_state)) {
3943 		bondport_remove_from_LAG(p);
3944 		return;
3945 	}
3946 	lag = p->po_lag;
3947 	if (lag != NULL) {
3948 		if (bondport_matches_LAG(p, lag)) {
3949 			/* still OK */
3950 			return;
3951 		}
3952 		bondport_remove_from_LAG(p);
3953 	}
3954 	lag = ifbond_get_LAG_matching_port(bond, p);
3955 	if (lag != NULL) {
3956 		bondport_add_to_LAG(p, lag);
3957 		return;
3958 	}
3959 	lag = kalloc_type(struct LAG_s, Z_WAITOK);
3960 	TAILQ_INIT(&lag->lag_port_list);
3961 	lag->lag_port_count = 0;
3962 	lag->lag_selected_port_count = 0;
3963 	lag->lag_info = p->po_partner_state.ps_lag_info;
3964 	TAILQ_INSERT_TAIL(&bond->ifb_lag_list, lag, lag_list);
3965 	BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
3966 	    "Key 0x%04x: LAG Created (0x%04x," EA_FORMAT ",0x%04x)",
3967 	    bond->ifb_key, lag->lag_info.li_system_priority,
3968 	    EA_LIST(&lag->lag_info.li_system), lag->lag_info.li_key);
3969 	bondport_add_to_LAG(p, lag);
3970 	return;
3971 }
3972 
3973 static void
bondport_receive_lacpdu(bondport_ref p,lacpdu_ref in_lacpdu_p)3974 bondport_receive_lacpdu(bondport_ref p, lacpdu_ref in_lacpdu_p)
3975 {
3976 	bondport_ref                moved_port;
3977 
3978 	moved_port
3979 	        = ifbond_list_find_moved_port(p, (const lacp_actor_partner_tlv_ref)
3980 	    &in_lacpdu_p->la_actor_tlv);
3981 	if (moved_port != NULL) {
3982 		bondport_receive_machine(moved_port, LAEventPortMoved, NULL);
3983 	}
3984 	bondport_receive_machine(p, LAEventPacket, in_lacpdu_p);
3985 	bondport_mux_machine(p, LAEventPacket, in_lacpdu_p);
3986 	bondport_periodic_transmit_machine(p, LAEventPacket, in_lacpdu_p);
3987 	return;
3988 }
3989 
3990 static void
bondport_set_selected(bondport_ref p,SelectedState s)3991 bondport_set_selected(bondport_ref p, SelectedState s)
3992 {
3993 	if (s != p->po_selected) {
3994 		ifbond_ref      bond = p->po_bond;
3995 		LAG_ref         lag = p->po_lag;
3996 
3997 		bondport_flags_set_selected_changed(p);
3998 		if (lag != NULL && bond->ifb_active_lag == lag) {
3999 			if (p->po_selected == SelectedState_SELECTED) {
4000 				lag->lag_selected_port_count--;
4001 			} else if (s == SelectedState_SELECTED) {
4002 				lag->lag_selected_port_count++;
4003 			}
4004 			BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4005 			    "[%s] SetSelected: %s (was %s)",
4006 			    bondport_get_name(p),
4007 			    SelectedStateString(s),
4008 			    SelectedStateString(p->po_selected));
4009 		}
4010 	}
4011 	p->po_selected = s;
4012 	return;
4013 }
4014 
4015 /**
4016 ** Receive machine
4017 **/
4018 
4019 static void
bondport_UpdateDefaultSelected(bondport_ref p)4020 bondport_UpdateDefaultSelected(bondport_ref p)
4021 {
4022 	bondport_set_selected(p, SelectedState_UNSELECTED);
4023 	return;
4024 }
4025 
4026 static void
bondport_RecordDefault(bondport_ref p)4027 bondport_RecordDefault(bondport_ref p)
4028 {
4029 	bzero(&p->po_partner_state, sizeof(p->po_partner_state));
4030 	p->po_actor_state
4031 	        = lacp_actor_partner_state_set_defaulted(p->po_actor_state);
4032 	bondport_assign_to_LAG(p);
4033 	return;
4034 }
4035 
4036 static void
bondport_UpdateSelected(bondport_ref p,lacpdu_ref lacpdu_p)4037 bondport_UpdateSelected(bondport_ref p, lacpdu_ref lacpdu_p)
4038 {
4039 	lacp_actor_partner_tlv_ref  actor;
4040 	partner_state_ref           ps;
4041 	LAG_info_ref                ps_li;
4042 
4043 	/* compare the PDU's Actor information to our Partner state */
4044 	actor = (lacp_actor_partner_tlv_ref)lacpdu_p->la_actor_tlv;
4045 	ps = &p->po_partner_state;
4046 	ps_li = &ps->ps_lag_info;
4047 	if (lacp_actor_partner_tlv_get_port(actor) != ps->ps_port
4048 	    || (lacp_actor_partner_tlv_get_port_priority(actor)
4049 	    != ps->ps_port_priority)
4050 	    || bcmp(actor->lap_system, &ps_li->li_system, sizeof(ps_li->li_system))
4051 	    || (lacp_actor_partner_tlv_get_system_priority(actor)
4052 	    != ps_li->li_system_priority)
4053 	    || (lacp_actor_partner_tlv_get_key(actor) != ps_li->li_key)
4054 	    || (lacp_actor_partner_state_aggregatable(actor->lap_state)
4055 	    != lacp_actor_partner_state_aggregatable(ps->ps_state))) {
4056 		bondport_set_selected(p, SelectedState_UNSELECTED);
4057 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4058 		    "[%s] updateSelected UNSELECTED",
4059 		    bondport_get_name(p));
4060 	}
4061 	return;
4062 }
4063 
4064 static void
bondport_RecordPDU(bondport_ref p,lacpdu_ref lacpdu_p)4065 bondport_RecordPDU(bondport_ref p, lacpdu_ref lacpdu_p)
4066 {
4067 	lacp_actor_partner_tlv_ref  actor;
4068 	ifbond_ref                  bond = p->po_bond;
4069 	int                         lacp_maintain = 0;
4070 	partner_state_ref           ps;
4071 	lacp_actor_partner_tlv_ref  partner;
4072 	LAG_info_ref                ps_li;
4073 
4074 	/* copy the PDU's Actor information into our Partner state */
4075 	actor = (lacp_actor_partner_tlv_ref)lacpdu_p->la_actor_tlv;
4076 	ps = &p->po_partner_state;
4077 	ps_li = &ps->ps_lag_info;
4078 	ps->ps_port = lacp_actor_partner_tlv_get_port(actor);
4079 	ps->ps_port_priority = lacp_actor_partner_tlv_get_port_priority(actor);
4080 	ps_li->li_system = *((lacp_system_ref)actor->lap_system);
4081 	ps_li->li_system_priority
4082 	        = lacp_actor_partner_tlv_get_system_priority(actor);
4083 	ps_li->li_key = lacp_actor_partner_tlv_get_key(actor);
4084 	ps->ps_state = lacp_actor_partner_state_set_out_of_sync(actor->lap_state);
4085 	p->po_actor_state
4086 	        = lacp_actor_partner_state_set_not_defaulted(p->po_actor_state);
4087 
4088 	/* compare the PDU's Partner information to our own information */
4089 	partner = (lacp_actor_partner_tlv_ref)lacpdu_p->la_partner_tlv;
4090 
4091 	if (lacp_actor_partner_state_active_lacp(ps->ps_state)
4092 	    || (lacp_actor_partner_state_active_lacp(p->po_actor_state)
4093 	    && lacp_actor_partner_state_active_lacp(partner->lap_state))) {
4094 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4095 		    "[%s] recordPDU: LACP will maintain", bondport_get_name(p));
4096 		lacp_maintain = 1;
4097 	}
4098 	if ((lacp_actor_partner_tlv_get_port(partner)
4099 	    == bondport_get_index(p))
4100 	    && lacp_actor_partner_tlv_get_port_priority(partner) == p->po_priority
4101 	    && bcmp(partner->lap_system, &g_bond->system,
4102 	    sizeof(g_bond->system)) == 0
4103 	    && (lacp_actor_partner_tlv_get_system_priority(partner)
4104 	    == g_bond->system_priority)
4105 	    && lacp_actor_partner_tlv_get_key(partner) == bond->ifb_key
4106 	    && (lacp_actor_partner_state_aggregatable(partner->lap_state)
4107 	    == lacp_actor_partner_state_aggregatable(p->po_actor_state))
4108 	    && lacp_actor_partner_state_in_sync(actor->lap_state)
4109 	    && lacp_maintain) {
4110 		ps->ps_state = lacp_actor_partner_state_set_in_sync(ps->ps_state);
4111 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4112 		    "[%s] recordPDU: LACP partner in sync",
4113 		    bondport_get_name(p));
4114 	} else if (lacp_actor_partner_state_aggregatable(actor->lap_state) == 0
4115 	    && lacp_actor_partner_state_in_sync(actor->lap_state)
4116 	    && lacp_maintain) {
4117 		ps->ps_state = lacp_actor_partner_state_set_in_sync(ps->ps_state);
4118 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4119 		    "[%s] recordPDU: LACP partner in sync (ind)",
4120 		    bondport_get_name(p));
4121 	}
4122 	bondport_assign_to_LAG(p);
4123 	return;
4124 }
4125 
4126 static __inline__ lacp_actor_partner_state
updateNTTBits(lacp_actor_partner_state s)4127 updateNTTBits(lacp_actor_partner_state s)
4128 {
4129 	return s & (LACP_ACTOR_PARTNER_STATE_LACP_ACTIVITY
4130 	       | LACP_ACTOR_PARTNER_STATE_LACP_TIMEOUT
4131 	       | LACP_ACTOR_PARTNER_STATE_AGGREGATION
4132 	       | LACP_ACTOR_PARTNER_STATE_SYNCHRONIZATION);
4133 }
4134 
4135 static void
bondport_UpdateNTT(bondport_ref p,lacpdu_ref lacpdu_p)4136 bondport_UpdateNTT(bondport_ref p, lacpdu_ref lacpdu_p)
4137 {
4138 	ifbond_ref                  bond = p->po_bond;
4139 	lacp_actor_partner_tlv_ref  partner;
4140 
4141 	/* compare the PDU's Actor information to our Partner state */
4142 	partner = (lacp_actor_partner_tlv_ref)lacpdu_p->la_partner_tlv;
4143 	if ((lacp_actor_partner_tlv_get_port(partner) != bondport_get_index(p))
4144 	    || lacp_actor_partner_tlv_get_port_priority(partner) != p->po_priority
4145 	    || bcmp(partner->lap_system, &g_bond->system, sizeof(g_bond->system))
4146 	    || (lacp_actor_partner_tlv_get_system_priority(partner)
4147 	    != g_bond->system_priority)
4148 	    || lacp_actor_partner_tlv_get_key(partner) != bond->ifb_key
4149 	    || (updateNTTBits(partner->lap_state)
4150 	    != updateNTTBits(p->po_actor_state))) {
4151 		bondport_flags_set_ntt(p);
4152 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4153 		    "[%s] updateNTT: Need To Transmit", bondport_get_name(p));
4154 	}
4155 	return;
4156 }
4157 
4158 static void
bondport_AttachMuxToAggregator(bondport_ref p)4159 bondport_AttachMuxToAggregator(bondport_ref p)
4160 {
4161 	if (bondport_flags_mux_attached(p) == 0) {
4162 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4163 		    "[%s] Attached Mux To Aggregator", bondport_get_name(p));
4164 		bondport_flags_set_mux_attached(p);
4165 	}
4166 	return;
4167 }
4168 
4169 static void
bondport_DetachMuxFromAggregator(bondport_ref p)4170 bondport_DetachMuxFromAggregator(bondport_ref p)
4171 {
4172 	if (bondport_flags_mux_attached(p)) {
4173 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4174 		    "[%s] Detached Mux From Aggregator", bondport_get_name(p));
4175 		bondport_flags_clear_mux_attached(p);
4176 	}
4177 	return;
4178 }
4179 
4180 static void
bondport_enable_distributing(bondport_ref p)4181 bondport_enable_distributing(bondport_ref p)
4182 {
4183 	if (bondport_flags_distributing(p) == 0) {
4184 		ifbond_ref      bond = p->po_bond;
4185 
4186 		bond->ifb_distributing_array[bond->ifb_distributing_count++] = p;
4187 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4188 		    "[%s] Distribution Enabled", bondport_get_name(p));
4189 		bondport_flags_set_distributing(p);
4190 	}
4191 	return;
4192 }
4193 
4194 static void
bondport_disable_distributing(bondport_ref p)4195 bondport_disable_distributing(bondport_ref p)
4196 {
4197 	if (bondport_flags_distributing(p)) {
4198 		bondport_ref *  array;
4199 		ifbond_ref      bond;
4200 		int             count;
4201 		int             i;
4202 
4203 		bond = p->po_bond;
4204 		array = bond->ifb_distributing_array;
4205 		count = bond->ifb_distributing_count;
4206 		for (i = 0; i < count; i++) {
4207 			if (array[i] == p) {
4208 				int     j;
4209 
4210 				for (j = i; j < (count - 1); j++) {
4211 					array[j] = array[j + 1];
4212 				}
4213 				break;
4214 			}
4215 		}
4216 		bond->ifb_distributing_count--;
4217 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4218 		    "[%s] Distribution Disabled", bondport_get_name(p));
4219 		bondport_flags_clear_distributing(p);
4220 	}
4221 	return;
4222 }
4223 
4224 /**
4225 ** Receive machine functions
4226 **/
4227 static void
4228 bondport_receive_machine_initialize(bondport_ref p, LAEvent event,
4229     void * event_data);
4230 static void
4231 bondport_receive_machine_port_disabled(bondport_ref p, LAEvent event,
4232     void * event_data);
4233 static void
4234 bondport_receive_machine_expired(bondport_ref p, LAEvent event,
4235     void * event_data);
4236 static void
4237 bondport_receive_machine_lacp_disabled(bondport_ref p, LAEvent event,
4238     void * event_data);
4239 static void
4240 bondport_receive_machine_defaulted(bondport_ref p, LAEvent event,
4241     void * event_data);
4242 static void
4243 bondport_receive_machine_current(bondport_ref p, LAEvent event,
4244     void * event_data);
4245 
4246 static void
bondport_receive_machine_event(bondport_ref p,LAEvent event,void * event_data)4247 bondport_receive_machine_event(bondport_ref p, LAEvent event,
4248     void * event_data)
4249 {
4250 	switch (p->po_receive_state) {
4251 	case ReceiveState_none:
4252 		bondport_receive_machine_initialize(p, LAEventStart, NULL);
4253 		break;
4254 	case ReceiveState_INITIALIZE:
4255 		bondport_receive_machine_initialize(p, event, event_data);
4256 		break;
4257 	case ReceiveState_PORT_DISABLED:
4258 		bondport_receive_machine_port_disabled(p, event, event_data);
4259 		break;
4260 	case ReceiveState_EXPIRED:
4261 		bondport_receive_machine_expired(p, event, event_data);
4262 		break;
4263 	case ReceiveState_LACP_DISABLED:
4264 		bondport_receive_machine_lacp_disabled(p, event, event_data);
4265 		break;
4266 	case ReceiveState_DEFAULTED:
4267 		bondport_receive_machine_defaulted(p, event, event_data);
4268 		break;
4269 	case ReceiveState_CURRENT:
4270 		bondport_receive_machine_current(p, event, event_data);
4271 		break;
4272 	default:
4273 		break;
4274 	}
4275 	return;
4276 }
4277 
4278 static void
bondport_receive_machine(bondport_ref p,LAEvent event,void * event_data)4279 bondport_receive_machine(bondport_ref p, LAEvent event,
4280     void * event_data)
4281 {
4282 	switch (event) {
4283 	case LAEventPacket:
4284 		if (p->po_receive_state != ReceiveState_LACP_DISABLED) {
4285 			bondport_receive_machine_current(p, event, event_data);
4286 		}
4287 		break;
4288 	case LAEventMediaChange:
4289 		if (media_active(&p->po_media_info)) {
4290 			switch (p->po_receive_state) {
4291 			case ReceiveState_PORT_DISABLED:
4292 			case ReceiveState_LACP_DISABLED:
4293 				bondport_receive_machine_port_disabled(p, LAEventMediaChange, NULL);
4294 				break;
4295 			default:
4296 				break;
4297 			}
4298 		} else {
4299 			bondport_receive_machine_port_disabled(p, LAEventStart, NULL);
4300 		}
4301 		break;
4302 	default:
4303 		bondport_receive_machine_event(p, event, event_data);
4304 		break;
4305 	}
4306 	return;
4307 }
4308 
4309 static void
bondport_receive_machine_initialize(bondport_ref p,LAEvent event,__unused void * event_data)4310 bondport_receive_machine_initialize(bondport_ref p, LAEvent event,
4311     __unused void * event_data)
4312 {
4313 	switch (event) {
4314 	case LAEventStart:
4315 		devtimer_cancel(p->po_current_while_timer);
4316 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4317 		    "[%s] Receive INITIALIZE", bondport_get_name(p));
4318 		p->po_receive_state = ReceiveState_INITIALIZE;
4319 		bondport_set_selected(p, SelectedState_UNSELECTED);
4320 		bondport_RecordDefault(p);
4321 		p->po_actor_state
4322 		        = lacp_actor_partner_state_set_not_expired(p->po_actor_state);
4323 		bondport_receive_machine_port_disabled(p, LAEventStart, NULL);
4324 		break;
4325 	default:
4326 		break;
4327 	}
4328 	return;
4329 }
4330 
4331 static void
bondport_receive_machine_port_disabled(bondport_ref p,LAEvent event,__unused void * event_data)4332 bondport_receive_machine_port_disabled(bondport_ref p, LAEvent event,
4333     __unused void * event_data)
4334 {
4335 	partner_state_ref   ps;
4336 
4337 	switch (event) {
4338 	case LAEventStart:
4339 		devtimer_cancel(p->po_current_while_timer);
4340 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4341 		    "[%s] Receive PORT_DISABLED", bondport_get_name(p));
4342 		p->po_receive_state = ReceiveState_PORT_DISABLED;
4343 		ps = &p->po_partner_state;
4344 		ps->ps_state = lacp_actor_partner_state_set_out_of_sync(ps->ps_state);
4345 		OS_FALLTHROUGH;
4346 	case LAEventMediaChange:
4347 		if (media_active(&p->po_media_info)) {
4348 			if (media_ok(&p->po_media_info)) {
4349 				bondport_receive_machine_expired(p, LAEventStart, NULL);
4350 			} else {
4351 				bondport_receive_machine_lacp_disabled(p, LAEventStart, NULL);
4352 			}
4353 		} else if (p->po_selected == SelectedState_SELECTED) {
4354 			struct timeval      tv;
4355 
4356 			BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4357 			    "[%s] Receive PORT_DISABLED: link timer started",
4358 			    bondport_get_name(p));
4359 			tv.tv_sec = 1;
4360 			tv.tv_usec = 0;
4361 			devtimer_set_relative(p->po_current_while_timer, tv,
4362 			    (devtimer_timeout_func)(void (*)(void))
4363 			    bondport_receive_machine_port_disabled,
4364 			    __unsafe_forge_single(void *, LAEventTimeout), NULL);
4365 		} else if (p->po_selected == SelectedState_STANDBY) {
4366 			bondport_set_selected(p, SelectedState_UNSELECTED);
4367 		}
4368 		break;
4369 	case LAEventTimeout:
4370 		if (p->po_selected == SelectedState_SELECTED) {
4371 			BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4372 			    "[%s] Receive PORT_DISABLED: "
4373 			    "link timer completed, marking UNSELECTED",
4374 			    bondport_get_name(p));
4375 			bondport_set_selected(p, SelectedState_UNSELECTED);
4376 		}
4377 		break;
4378 	case LAEventPortMoved:
4379 		bondport_receive_machine_initialize(p, LAEventStart, NULL);
4380 		break;
4381 	default:
4382 		break;
4383 	}
4384 	return;
4385 }
4386 
4387 static void
bondport_receive_machine_expired(bondport_ref p,LAEvent event,__unused void * event_data)4388 bondport_receive_machine_expired(bondport_ref p, LAEvent event,
4389     __unused void * event_data)
4390 {
4391 	lacp_actor_partner_state    s;
4392 	struct timeval              tv;
4393 
4394 	switch (event) {
4395 	case LAEventStart:
4396 		devtimer_cancel(p->po_current_while_timer);
4397 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4398 		    "[%s] Receive EXPIRED", bondport_get_name(p));
4399 		p->po_receive_state = ReceiveState_EXPIRED;
4400 		s = p->po_partner_state.ps_state;
4401 		s = lacp_actor_partner_state_set_out_of_sync(s);
4402 		s = lacp_actor_partner_state_set_short_timeout(s);
4403 		p->po_partner_state.ps_state = s;
4404 		p->po_actor_state
4405 		        = lacp_actor_partner_state_set_expired(p->po_actor_state);
4406 		/* start current_while timer */
4407 		tv.tv_sec = LACP_SHORT_TIMEOUT_TIME;
4408 		tv.tv_usec = 0;
4409 		devtimer_set_relative(p->po_current_while_timer, tv,
4410 		    (devtimer_timeout_func)(void (*)(void))
4411 		    bondport_receive_machine_expired,
4412 		    __unsafe_forge_single(void *, LAEventTimeout), NULL);
4413 
4414 		break;
4415 	case LAEventTimeout:
4416 		bondport_receive_machine_defaulted(p, LAEventStart, NULL);
4417 		break;
4418 	default:
4419 		break;
4420 	}
4421 	return;
4422 }
4423 
4424 static void
bondport_receive_machine_lacp_disabled(bondport_ref p,LAEvent event,__unused void * event_data)4425 bondport_receive_machine_lacp_disabled(bondport_ref p, LAEvent event,
4426     __unused void * event_data)
4427 {
4428 	partner_state_ref   ps;
4429 	switch (event) {
4430 	case LAEventStart:
4431 		devtimer_cancel(p->po_current_while_timer);
4432 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4433 		    "[%s] Receive LACP_DISABLED", bondport_get_name(p));
4434 		p->po_receive_state = ReceiveState_LACP_DISABLED;
4435 		bondport_set_selected(p, SelectedState_UNSELECTED);
4436 		bondport_RecordDefault(p);
4437 		ps = &p->po_partner_state;
4438 		ps->ps_state = lacp_actor_partner_state_set_individual(ps->ps_state);
4439 		p->po_actor_state
4440 		        = lacp_actor_partner_state_set_not_expired(p->po_actor_state);
4441 		break;
4442 	default:
4443 		break;
4444 	}
4445 	return;
4446 }
4447 
4448 static void
bondport_receive_machine_defaulted(bondport_ref p,LAEvent event,__unused void * event_data)4449 bondport_receive_machine_defaulted(bondport_ref p, LAEvent event,
4450     __unused void * event_data)
4451 {
4452 	switch (event) {
4453 	case LAEventStart:
4454 		devtimer_cancel(p->po_current_while_timer);
4455 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4456 		    "[%s] Receive DEFAULTED", bondport_get_name(p));
4457 		p->po_receive_state = ReceiveState_DEFAULTED;
4458 		bondport_UpdateDefaultSelected(p);
4459 		bondport_RecordDefault(p);
4460 		p->po_actor_state
4461 		        = lacp_actor_partner_state_set_not_expired(p->po_actor_state);
4462 		break;
4463 	default:
4464 		break;
4465 	}
4466 	return;
4467 }
4468 
4469 static void
bondport_receive_machine_current(bondport_ref p,LAEvent event,void * event_data)4470 bondport_receive_machine_current(bondport_ref p, LAEvent event,
4471     void * event_data)
4472 {
4473 	partner_state_ref   ps;
4474 	struct timeval      tv;
4475 
4476 	switch (event) {
4477 	case LAEventPacket:
4478 		devtimer_cancel(p->po_current_while_timer);
4479 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4480 		    "[%s] Receive CURRENT", bondport_get_name(p));
4481 		p->po_receive_state = ReceiveState_CURRENT;
4482 		bondport_UpdateSelected(p, event_data);
4483 		bondport_UpdateNTT(p, event_data);
4484 		bondport_RecordPDU(p, event_data);
4485 		p->po_actor_state
4486 		        = lacp_actor_partner_state_set_not_expired(p->po_actor_state);
4487 		bondport_assign_to_LAG(p);
4488 		/* start current_while timer */
4489 		ps = &p->po_partner_state;
4490 		if (lacp_actor_partner_state_short_timeout(ps->ps_state)) {
4491 			tv.tv_sec = LACP_SHORT_TIMEOUT_TIME;
4492 		} else {
4493 			tv.tv_sec = LACP_LONG_TIMEOUT_TIME;
4494 		}
4495 		tv.tv_usec = 0;
4496 		devtimer_set_relative(p->po_current_while_timer, tv,
4497 		    (devtimer_timeout_func)(void (*)(void))
4498 		    bondport_receive_machine_current,
4499 		    __unsafe_forge_single(void *, LAEventTimeout), NULL);
4500 		break;
4501 	case LAEventTimeout:
4502 		bondport_receive_machine_expired(p, LAEventStart, NULL);
4503 		break;
4504 	default:
4505 		break;
4506 	}
4507 	return;
4508 }
4509 
4510 /**
4511 ** Periodic Transmission machine
4512 **/
4513 
4514 static void
bondport_periodic_transmit_machine(bondport_ref p,LAEvent event,__unused void * event_data)4515 bondport_periodic_transmit_machine(bondport_ref p, LAEvent event,
4516     __unused void * event_data)
4517 {
4518 	int                 interval;
4519 	partner_state_ref   ps;
4520 	struct timeval      tv;
4521 
4522 	switch (event) {
4523 	case LAEventStart:
4524 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4525 		    "[%s] periodic_transmit Start", bondport_get_name(p));
4526 		OS_FALLTHROUGH;
4527 	case LAEventMediaChange:
4528 		devtimer_cancel(p->po_periodic_timer);
4529 		p->po_periodic_interval = 0;
4530 		if (media_active(&p->po_media_info) == 0
4531 		    || media_ok(&p->po_media_info) == 0) {
4532 			break;
4533 		}
4534 		OS_FALLTHROUGH;
4535 	case LAEventPacket:
4536 		/* Neither Partner nor Actor are LACP Active, no periodic tx */
4537 		ps = &p->po_partner_state;
4538 		if (lacp_actor_partner_state_active_lacp(p->po_actor_state) == 0
4539 		    && (lacp_actor_partner_state_active_lacp(ps->ps_state)
4540 		    == 0)) {
4541 			devtimer_cancel(p->po_periodic_timer);
4542 			p->po_periodic_interval = 0;
4543 			break;
4544 		}
4545 		if (lacp_actor_partner_state_short_timeout(ps->ps_state)) {
4546 			interval = LACP_FAST_PERIODIC_TIME;
4547 		} else {
4548 			interval = LACP_SLOW_PERIODIC_TIME;
4549 		}
4550 		if (p->po_periodic_interval != interval) {
4551 			if (interval == LACP_FAST_PERIODIC_TIME
4552 			    && p->po_periodic_interval
4553 			    == LACP_SLOW_PERIODIC_TIME) {
4554 				BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4555 				    "[%s] periodic_transmit: Need To Transmit",
4556 				    bondport_get_name(p));
4557 				bondport_flags_set_ntt(p);
4558 			}
4559 			p->po_periodic_interval = interval;
4560 			tv.tv_usec = 0;
4561 			tv.tv_sec = interval;
4562 			devtimer_set_relative(p->po_periodic_timer, tv,
4563 			    (devtimer_timeout_func)(void (*)(void))
4564 			    bondport_periodic_transmit_machine,
4565 			    __unsafe_forge_single(void *, LAEventTimeout), NULL);
4566 			BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4567 			    "[%s] Periodic Transmission Timer: %d secs",
4568 			    bondport_get_name(p),
4569 			    p->po_periodic_interval);
4570 		}
4571 		break;
4572 	case LAEventTimeout:
4573 		bondport_flags_set_ntt(p);
4574 		tv.tv_sec = p->po_periodic_interval;
4575 		tv.tv_usec = 0;
4576 		devtimer_set_relative(p->po_periodic_timer, tv, (devtimer_timeout_func)(void (*)(void))
4577 		    bondport_periodic_transmit_machine,
4578 		    __unsafe_forge_single(void *, LAEventTimeout), NULL);
4579 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4580 		    "[%s] Periodic Transmission Timer: %d secs",
4581 		    bondport_get_name(p), p->po_periodic_interval);
4582 		break;
4583 	default:
4584 		break;
4585 	}
4586 	return;
4587 }
4588 
4589 /**
4590 ** Transmit machine
4591 **/
4592 static int
bondport_can_transmit(bondport_ref p,int32_t current_secs,__darwin_time_t * next_secs)4593 bondport_can_transmit(bondport_ref p, int32_t current_secs,
4594     __darwin_time_t * next_secs)
4595 {
4596 	if (p->po_last_transmit_secs != current_secs) {
4597 		p->po_last_transmit_secs = current_secs;
4598 		p->po_n_transmit = 0;
4599 	}
4600 	if (p->po_n_transmit < LACP_PACKET_RATE) {
4601 		p->po_n_transmit++;
4602 		return 1;
4603 	}
4604 	if (next_secs != NULL) {
4605 		*next_secs = current_secs + 1;
4606 	}
4607 	return 0;
4608 }
4609 
4610 static void
bondport_transmit_machine(bondport_ref p,LAEvent event,void * event_data)4611 bondport_transmit_machine(bondport_ref p, LAEvent event,
4612     void * event_data)
4613 {
4614 	lacp_actor_partner_tlv_ref  aptlv;
4615 	lacp_collector_tlv_ref      ctlv;
4616 	struct timeval              next_tick_time = {.tv_sec = 0, .tv_usec = 0};
4617 	lacpdu_ref          out_lacpdu_p;
4618 	packet_buffer_ref           pkt;
4619 	partner_state_ref           ps;
4620 	LAG_info_ref                ps_li;
4621 
4622 	switch (event) {
4623 	case LAEventTimeout:
4624 	case LAEventStart:
4625 		if (p->po_periodic_interval == 0 || bondport_flags_ntt(p) == 0) {
4626 			break;
4627 		}
4628 		if (event_data == TRANSMIT_MACHINE_TX_IMMEDIATE) {
4629 			/* we're going away, transmit the packet no matter what */
4630 		} else if (bondport_can_transmit(p, devtimer_current_secs(),
4631 		    &next_tick_time.tv_sec) == 0) {
4632 			if (devtimer_enabled(p->po_transmit_timer)) {
4633 				BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4634 				    "[%s] Transmit Timer Already Set",
4635 				    bondport_get_name(p));
4636 			} else {
4637 				devtimer_set_absolute(p->po_transmit_timer, next_tick_time,
4638 				    (devtimer_timeout_func)(void (*)(void))
4639 				    bondport_transmit_machine,
4640 				    __unsafe_forge_single(void *, LAEventTimeout), NULL);
4641 				BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4642 				    "[%s] Transmit Timer Deadline %d secs",
4643 				    bondport_get_name(p),
4644 				    (int)next_tick_time.tv_sec);
4645 			}
4646 			break;
4647 		}
4648 		if (event == LAEventTimeout) {
4649 			BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4650 			    "[%s] Transmit Timer Complete",
4651 			    bondport_get_name(p));
4652 		}
4653 		pkt = packet_buffer_allocate(sizeof(*out_lacpdu_p));
4654 		if (pkt == NULL) {
4655 			printf("[%s] Transmit: failed to allocate packet buffer\n",
4656 			    bondport_get_name(p));
4657 			break;
4658 		}
4659 		out_lacpdu_p = (lacpdu_ref)packet_buffer_byteptr(pkt);
4660 		bzero(out_lacpdu_p, sizeof(*out_lacpdu_p));
4661 		out_lacpdu_p->la_subtype = IEEE8023AD_SLOW_PROTO_SUBTYPE_LACP;
4662 		out_lacpdu_p->la_version = LACPDU_VERSION_1;
4663 
4664 		/* Actor */
4665 		aptlv = (lacp_actor_partner_tlv_ref)out_lacpdu_p->la_actor_tlv;
4666 		aptlv->lap_tlv_type = LACPDU_TLV_TYPE_ACTOR;
4667 		aptlv->lap_length = LACPDU_ACTOR_TLV_LENGTH;
4668 		*((lacp_system_ref)aptlv->lap_system) = g_bond->system;
4669 		lacp_actor_partner_tlv_set_system_priority(aptlv,
4670 		    g_bond->system_priority);
4671 		lacp_actor_partner_tlv_set_port_priority(aptlv, p->po_priority);
4672 		lacp_actor_partner_tlv_set_port(aptlv, bondport_get_index(p));
4673 		lacp_actor_partner_tlv_set_key(aptlv, p->po_bond->ifb_key);
4674 		aptlv->lap_state = p->po_actor_state;
4675 
4676 		/* Partner */
4677 		aptlv = (lacp_actor_partner_tlv_ref)out_lacpdu_p->la_partner_tlv;
4678 		aptlv->lap_tlv_type = LACPDU_TLV_TYPE_PARTNER;
4679 		aptlv->lap_length = LACPDU_PARTNER_TLV_LENGTH;
4680 		ps = &p->po_partner_state;
4681 		ps_li = &ps->ps_lag_info;
4682 		lacp_actor_partner_tlv_set_port(aptlv, ps->ps_port);
4683 		lacp_actor_partner_tlv_set_port_priority(aptlv, ps->ps_port_priority);
4684 		*((lacp_system_ref)aptlv->lap_system) = ps_li->li_system;
4685 		lacp_actor_partner_tlv_set_system_priority(aptlv,
4686 		    ps_li->li_system_priority);
4687 		lacp_actor_partner_tlv_set_key(aptlv, ps_li->li_key);
4688 		aptlv->lap_state = ps->ps_state;
4689 
4690 		/* Collector */
4691 		ctlv = (lacp_collector_tlv_ref)out_lacpdu_p->la_collector_tlv;
4692 		ctlv->lac_tlv_type = LACPDU_TLV_TYPE_COLLECTOR;
4693 		ctlv->lac_length = LACPDU_COLLECTOR_TLV_LENGTH;
4694 
4695 		bondport_slow_proto_transmit(p, pkt);
4696 		bondport_flags_clear_ntt(p);
4697 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP, "[%s] Transmit Packet %d",
4698 		    bondport_get_name(p), p->po_n_transmit);
4699 		break;
4700 	default:
4701 		break;
4702 	}
4703 	return;
4704 }
4705 
4706 /**
4707 ** Mux machine functions
4708 **/
4709 
4710 static void
4711 bondport_mux_machine_detached(bondport_ref p, LAEvent event,
4712     void * event_data);
4713 static void
4714 bondport_mux_machine_waiting(bondport_ref p, LAEvent event,
4715     void * event_data);
4716 static void
4717 bondport_mux_machine_attached(bondport_ref p, LAEvent event,
4718     void * event_data);
4719 
4720 static void
4721 bondport_mux_machine_collecting_distributing(bondport_ref p, LAEvent event,
4722     void * event_data);
4723 
4724 static void
bondport_mux_machine(bondport_ref p,LAEvent event,void * event_data)4725 bondport_mux_machine(bondport_ref p, LAEvent event, void * event_data)
4726 {
4727 	switch (p->po_mux_state) {
4728 	case MuxState_none:
4729 		bondport_mux_machine_detached(p, LAEventStart, NULL);
4730 		break;
4731 	case MuxState_DETACHED:
4732 		bondport_mux_machine_detached(p, event, event_data);
4733 		break;
4734 	case MuxState_WAITING:
4735 		bondport_mux_machine_waiting(p, event, event_data);
4736 		break;
4737 	case MuxState_ATTACHED:
4738 		bondport_mux_machine_attached(p, event, event_data);
4739 		break;
4740 	case MuxState_COLLECTING_DISTRIBUTING:
4741 		bondport_mux_machine_collecting_distributing(p, event, event_data);
4742 		break;
4743 	default:
4744 		break;
4745 	}
4746 	return;
4747 }
4748 
4749 static void
bondport_mux_machine_detached(bondport_ref p,LAEvent event,__unused void * event_data)4750 bondport_mux_machine_detached(bondport_ref p, LAEvent event,
4751     __unused void * event_data)
4752 {
4753 	lacp_actor_partner_state    s;
4754 
4755 	switch (event) {
4756 	case LAEventStart:
4757 		devtimer_cancel(p->po_wait_while_timer);
4758 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP, "[%s] Mux DETACHED",
4759 		    bondport_get_name(p));
4760 		p->po_mux_state = MuxState_DETACHED;
4761 		bondport_flags_clear_ready(p);
4762 		bondport_DetachMuxFromAggregator(p);
4763 		bondport_disable_distributing(p);
4764 		s = p->po_actor_state;
4765 		s = lacp_actor_partner_state_set_out_of_sync(s);
4766 		s = lacp_actor_partner_state_set_not_collecting(s);
4767 		s = lacp_actor_partner_state_set_not_distributing(s);
4768 		p->po_actor_state = s;
4769 		bondport_flags_set_ntt(p);
4770 		break;
4771 	case LAEventSelectedChange:
4772 	case LAEventPacket:
4773 	case LAEventMediaChange:
4774 		if (p->po_selected == SelectedState_SELECTED
4775 		    || p->po_selected == SelectedState_STANDBY) {
4776 			bondport_mux_machine_waiting(p, LAEventStart, NULL);
4777 		}
4778 		break;
4779 	default:
4780 		break;
4781 	}
4782 	return;
4783 }
4784 
4785 static void
bondport_mux_machine_waiting(bondport_ref p,LAEvent event,__unused void * event_data)4786 bondport_mux_machine_waiting(bondport_ref p, LAEvent event,
4787     __unused void * event_data)
4788 {
4789 	struct timeval      tv;
4790 
4791 	switch (event) {
4792 	case LAEventStart:
4793 		devtimer_cancel(p->po_wait_while_timer);
4794 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP, "[%s] Mux WAITING",
4795 		    bondport_get_name(p));
4796 		p->po_mux_state = MuxState_WAITING;
4797 		OS_FALLTHROUGH;
4798 	default:
4799 	case LAEventSelectedChange:
4800 		if (p->po_selected == SelectedState_UNSELECTED) {
4801 			bondport_mux_machine_detached(p, LAEventStart, NULL);
4802 			break;
4803 		}
4804 		if (p->po_selected == SelectedState_STANDBY) {
4805 			devtimer_cancel(p->po_wait_while_timer);
4806 			/* wait until state changes to SELECTED */
4807 			BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4808 			    "[%s] Mux WAITING: Standby", bondport_get_name(p));
4809 			break;
4810 		}
4811 		if (bondport_flags_ready(p)) {
4812 			BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4813 			    "[%s] Mux WAITING: Port is already ready",
4814 			    bondport_get_name(p));
4815 			break;
4816 		}
4817 		if (devtimer_enabled(p->po_wait_while_timer)) {
4818 			BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4819 			    "[%s] Mux WAITING: Timer already set",
4820 			    bondport_get_name(p));
4821 			break;
4822 		}
4823 		if (ifbond_all_ports_attached(p->po_bond, p)) {
4824 			devtimer_cancel(p->po_wait_while_timer);
4825 			BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4826 			    "[%s] Mux WAITING: No waiting",
4827 			    bondport_get_name(p));
4828 			bondport_flags_set_ready(p);
4829 			goto no_waiting;
4830 		}
4831 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4832 		    "[%s] Mux WAITING: 2 seconds", bondport_get_name(p));
4833 		tv.tv_sec = LACP_AGGREGATE_WAIT_TIME;
4834 		tv.tv_usec = 0;
4835 		devtimer_set_relative(p->po_wait_while_timer, tv,
4836 		    (devtimer_timeout_func)(void (*)(void))
4837 		    bondport_mux_machine_waiting,
4838 		    __unsafe_forge_single(void *, LAEventTimeout), NULL);
4839 		break;
4840 	case LAEventTimeout:
4841 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP, "[%s] Mux WAITING: Ready",
4842 		    bondport_get_name(p));
4843 		bondport_flags_set_ready(p);
4844 		break;
4845 	case LAEventReady:
4846 no_waiting:
4847 		if (bondport_flags_ready(p)) {
4848 			BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4849 			    "[%s] Mux WAITING: All Ports Ready",
4850 			    bondport_get_name(p));
4851 			bondport_mux_machine_attached(p, LAEventStart, NULL);
4852 			break;
4853 		}
4854 		break;
4855 	}
4856 	return;
4857 }
4858 
4859 static void
bondport_mux_machine_attached(bondport_ref p,LAEvent event,__unused void * event_data)4860 bondport_mux_machine_attached(bondport_ref p, LAEvent event,
4861     __unused void * event_data)
4862 {
4863 	lacp_actor_partner_state    s;
4864 
4865 	switch (event) {
4866 	case LAEventStart:
4867 		devtimer_cancel(p->po_wait_while_timer);
4868 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP, "[%s] Mux ATTACHED",
4869 		    bondport_get_name(p));
4870 		p->po_mux_state = MuxState_ATTACHED;
4871 		bondport_AttachMuxToAggregator(p);
4872 		s = p->po_actor_state;
4873 		s = lacp_actor_partner_state_set_in_sync(s);
4874 		s = lacp_actor_partner_state_set_not_collecting(s);
4875 		s = lacp_actor_partner_state_set_not_distributing(s);
4876 		bondport_disable_distributing(p);
4877 		p->po_actor_state = s;
4878 		bondport_flags_set_ntt(p);
4879 		OS_FALLTHROUGH;
4880 	default:
4881 		switch (p->po_selected) {
4882 		case SelectedState_SELECTED:
4883 			s = p->po_partner_state.ps_state;
4884 			if (lacp_actor_partner_state_in_sync(s)) {
4885 				bondport_mux_machine_collecting_distributing(p,
4886 				    LAEventStart, NULL);
4887 			}
4888 			break;
4889 		default:
4890 			bondport_mux_machine_detached(p, LAEventStart, NULL);
4891 			break;
4892 		}
4893 		break;
4894 	}
4895 	return;
4896 }
4897 
4898 static void
bondport_mux_machine_collecting_distributing(bondport_ref p,LAEvent event,__unused void * event_data)4899 bondport_mux_machine_collecting_distributing(bondport_ref p,
4900     LAEvent event,
4901     __unused void * event_data)
4902 {
4903 	lacp_actor_partner_state    s;
4904 
4905 	switch (event) {
4906 	case LAEventStart:
4907 		devtimer_cancel(p->po_wait_while_timer);
4908 		BOND_LOG(LOG_DEBUG, BD_DBGF_LACP,
4909 		    "[%s] Mux COLLECTING_DISTRIBUTING",
4910 		    bondport_get_name(p));
4911 		p->po_mux_state = MuxState_COLLECTING_DISTRIBUTING;
4912 		bondport_enable_distributing(p);
4913 		s = p->po_actor_state;
4914 		s = lacp_actor_partner_state_set_collecting(s);
4915 		s = lacp_actor_partner_state_set_distributing(s);
4916 		p->po_actor_state = s;
4917 		bondport_flags_set_ntt(p);
4918 		OS_FALLTHROUGH;
4919 	default:
4920 		s = p->po_partner_state.ps_state;
4921 		if (lacp_actor_partner_state_in_sync(s) == 0) {
4922 			bondport_mux_machine_attached(p, LAEventStart, NULL);
4923 			break;
4924 		}
4925 		switch (p->po_selected) {
4926 		case SelectedState_UNSELECTED:
4927 		case SelectedState_STANDBY:
4928 			bondport_mux_machine_attached(p, LAEventStart, NULL);
4929 			break;
4930 		default:
4931 			break;
4932 		}
4933 		break;
4934 	}
4935 	return;
4936 }
4937