xref: /xnu-10002.1.13/bsd/netinet/igmp.c (revision 1031c584a5e37aff177559b9f69dbd3c8c3fd30a)
1 /*
2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*-
29  * Copyright (c) 2007-2009 Bruce Simpson.
30  * Copyright (c) 1988 Stephen Deering.
31  * Copyright (c) 1992, 1993
32  *	The Regents of the University of California.  All rights reserved.
33  *
34  * This code is derived from software contributed to Berkeley by
35  * Stephen Deering of Stanford University.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. All advertising materials mentioning features or use of this software
46  *    must display the following acknowledgement:
47  *	This product includes software developed by the University of
48  *	California, Berkeley and its contributors.
49  * 4. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  *	@(#)igmp.c	8.1 (Berkeley) 7/19/93
66  */
67 /*
68  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69  * support for mandatory and extensible security protections.  This notice
70  * is included in support of clause 2.2 (b) of the Apple Public License,
71  * Version 2.0.
72  */
73 
74 /*
75  * Internet Group Management Protocol (IGMP) routines.
76  * [RFC1112, RFC2236, RFC3376]
77  *
78  * Written by Steve Deering, Stanford, May 1988.
79  * Modified by Rosen Sharma, Stanford, Aug 1994.
80  * Modified by Bill Fenner, Xerox PARC, Feb 1995.
81  * Modified to fully comply to IGMPv2 by Bill Fenner, Oct 1995.
82  * Significantly rewritten for IGMPv3, VIMAGE, and SMP by Bruce Simpson.
83  *
84  * MULTICAST Revision: 3.5.1.4
85  */
86 
87 #include <sys/cdefs.h>
88 
89 #include <sys/param.h>
90 #include <sys/systm.h>
91 #include <sys/malloc.h>
92 #include <sys/mbuf.h>
93 #include <sys/socket.h>
94 #include <sys/protosw.h>
95 #include <sys/kernel.h>
96 #include <sys/sysctl.h>
97 #include <sys/mcache.h>
98 
99 #include <libkern/libkern.h>
100 #include <kern/zalloc.h>
101 
102 #include <net/if.h>
103 #include <net/route.h>
104 
105 #include <netinet/in.h>
106 #include <netinet/in_var.h>
107 #include <netinet/in_systm.h>
108 #include <netinet/ip.h>
109 #include <netinet/ip_var.h>
110 #include <netinet/igmp.h>
111 #include <netinet/igmp_var.h>
112 #include <netinet/kpi_ipfilter_var.h>
113 
114 #if SKYWALK
115 #include <skywalk/core/skywalk_var.h>
116 #endif /* SKYWALK */
117 
118 SLIST_HEAD(igmp_inm_relhead, in_multi);
119 
120 static void     igi_initvar(struct igmp_ifinfo *, struct ifnet *, int);
121 static struct igmp_ifinfo *igi_alloc(zalloc_flags_t);
122 static void     igi_free(struct igmp_ifinfo *);
123 static void     igi_delete(const struct ifnet *, struct igmp_inm_relhead *);
124 static void     igmp_dispatch_queue(struct igmp_ifinfo *, struct ifqueue *,
125     int, const int);
126 static void     igmp_final_leave(struct in_multi *, struct igmp_ifinfo *,
127     struct igmp_tparams *);
128 static int      igmp_handle_state_change(struct in_multi *,
129     struct igmp_ifinfo *, struct igmp_tparams *);
130 static int      igmp_initial_join(struct in_multi *, struct igmp_ifinfo *,
131     struct igmp_tparams *);
132 static int      igmp_input_v1_query(struct ifnet *, const struct ip *,
133     const struct igmp *);
134 static int      igmp_input_v2_query(struct ifnet *, const struct ip *,
135     const struct igmp *);
136 static int      igmp_input_v3_query(struct ifnet *, const struct ip *,
137     /*const*/ struct igmpv3 *);
138 static int      igmp_input_v3_group_query(struct in_multi *,
139     int, /*const*/ struct igmpv3 *);
140 static int      igmp_input_v1_report(struct ifnet *, struct mbuf *,
141     /*const*/ struct ip *, /*const*/ struct igmp *);
142 static int      igmp_input_v2_report(struct ifnet *, struct mbuf *,
143     /*const*/ struct ip *, /*const*/ struct igmp *);
144 static void     igmp_sendpkt(struct mbuf *);
145 static __inline__ int   igmp_isgroupreported(const struct in_addr);
146 static struct mbuf *igmp_ra_alloc(void);
147 #ifdef IGMP_DEBUG
148 static const char *igmp_rec_type_to_str(const int);
149 #endif
150 static uint32_t igmp_set_version(struct igmp_ifinfo *, const int);
151 static void     igmp_flush_relq(struct igmp_ifinfo *,
152     struct igmp_inm_relhead *);
153 static int      igmp_v1v2_queue_report(struct in_multi *, const int);
154 static void     igmp_v1v2_process_group_timer(struct in_multi *, const int);
155 static void     igmp_v1v2_process_querier_timers(struct igmp_ifinfo *);
156 static uint32_t igmp_v2_update_group(struct in_multi *, const int);
157 static void     igmp_v3_cancel_link_timers(struct igmp_ifinfo *);
158 static uint32_t igmp_v3_dispatch_general_query(struct igmp_ifinfo *);
159 static struct mbuf *
160 igmp_v3_encap_report(struct ifnet *, struct mbuf *);
161 static int      igmp_v3_enqueue_group_record(struct ifqueue *,
162     struct in_multi *, const int, const int, const int);
163 static int      igmp_v3_enqueue_filter_change(struct ifqueue *,
164     struct in_multi *);
165 static void     igmp_v3_process_group_timers(struct igmp_ifinfo *,
166     struct ifqueue *, struct ifqueue *, struct in_multi *,
167     const unsigned int);
168 static int      igmp_v3_merge_state_changes(struct in_multi *,
169     struct ifqueue *);
170 static void     igmp_v3_suppress_group_record(struct in_multi *);
171 static int      sysctl_igmp_ifinfo SYSCTL_HANDLER_ARGS;
172 static int      sysctl_igmp_gsr SYSCTL_HANDLER_ARGS;
173 static int      sysctl_igmp_default_version SYSCTL_HANDLER_ARGS;
174 
175 static const uint32_t igmp_timeout_delay = 1000; /* in milliseconds */
176 static const uint32_t igmp_timeout_leeway = 500; /* in millseconds  */
177 static bool igmp_timeout_run;            /* IGMP timer is scheduled to run */
178 static bool igmp_fast_timeout_run;       /* IGMP fast timer is scheduled to run */
179 static void igmp_timeout(thread_call_param_t, thread_call_param_t);
180 static void igmp_sched_timeout(void);
181 static void igmp_sched_fast_timeout(void);
182 
183 static struct mbuf *m_raopt;            /* Router Alert option */
184 
185 static int querier_present_timers_running;      /* IGMPv1/v2 older version
186                                                  * querier present */
187 static int interface_timers_running;            /* IGMPv3 general
188                                                  * query response */
189 static int state_change_timers_running;         /* IGMPv3 state-change
190                                                  * retransmit */
191 static int current_state_timers_running;        /* IGMPv1/v2 host
192                                                  * report; IGMPv3 g/sg
193                                                  * query response */
194 
195 /*
196  * Subsystem lock macros.
197  */
198 #define IGMP_LOCK()                     \
199 	lck_mtx_lock(&igmp_mtx)
200 #define IGMP_LOCK_ASSERT_HELD()         \
201 	LCK_MTX_ASSERT(&igmp_mtx, LCK_MTX_ASSERT_OWNED)
202 #define IGMP_LOCK_ASSERT_NOTHELD()      \
203 	LCK_MTX_ASSERT(&igmp_mtx, LCK_MTX_ASSERT_NOTOWNED)
204 #define IGMP_UNLOCK()                   \
205 	lck_mtx_unlock(&igmp_mtx)
206 
207 static LIST_HEAD(, igmp_ifinfo) igi_head;
208 static struct igmpstat_v3 igmpstat_v3 = {
209 	.igps_version = IGPS_VERSION_3,
210 	.igps_len = sizeof(struct igmpstat_v3),
211 };
212 static struct igmpstat igmpstat; /* old IGMPv2 stats structure */
213 static struct timeval igmp_gsrdelay = {.tv_sec = 10, .tv_usec = 0};
214 
215 static int igmp_recvifkludge = 1;
216 static int igmp_sendra = 1;
217 static int igmp_sendlocal = 1;
218 static int igmp_v1enable = 1;
219 static int igmp_v2enable = 1;
220 static int igmp_legacysupp = 0;
221 static int igmp_default_version = IGMP_VERSION_3;
222 
223 SYSCTL_STRUCT(_net_inet_igmp, IGMPCTL_STATS, stats, CTLFLAG_RD | CTLFLAG_LOCKED,
224     &igmpstat, igmpstat, "");
225 SYSCTL_STRUCT(_net_inet_igmp, OID_AUTO, v3stats,
226     CTLFLAG_RD | CTLFLAG_LOCKED, &igmpstat_v3, igmpstat_v3, "");
227 SYSCTL_INT(_net_inet_igmp, OID_AUTO, recvifkludge, CTLFLAG_RW | CTLFLAG_LOCKED,
228     &igmp_recvifkludge, 0,
229     "Rewrite IGMPv1/v2 reports from 0.0.0.0 to contain subnet address");
230 SYSCTL_INT(_net_inet_igmp, OID_AUTO, sendra, CTLFLAG_RW | CTLFLAG_LOCKED,
231     &igmp_sendra, 0,
232     "Send IP Router Alert option in IGMPv2/v3 messages");
233 SYSCTL_INT(_net_inet_igmp, OID_AUTO, sendlocal, CTLFLAG_RW | CTLFLAG_LOCKED,
234     &igmp_sendlocal, 0,
235     "Send IGMP membership reports for 224.0.0.0/24 groups");
236 SYSCTL_INT(_net_inet_igmp, OID_AUTO, v1enable, CTLFLAG_RW | CTLFLAG_LOCKED,
237     &igmp_v1enable, 0,
238     "Enable backwards compatibility with IGMPv1");
239 SYSCTL_INT(_net_inet_igmp, OID_AUTO, v2enable, CTLFLAG_RW | CTLFLAG_LOCKED,
240     &igmp_v2enable, 0,
241     "Enable backwards compatibility with IGMPv2");
242 SYSCTL_INT(_net_inet_igmp, OID_AUTO, legacysupp, CTLFLAG_RW | CTLFLAG_LOCKED,
243     &igmp_legacysupp, 0,
244     "Allow v1/v2 reports to suppress v3 group responses");
245 SYSCTL_PROC(_net_inet_igmp, OID_AUTO, default_version,
246     CTLTYPE_INT | CTLFLAG_RW,
247     &igmp_default_version, 0, sysctl_igmp_default_version, "I",
248     "Default version of IGMP to run on each interface");
249 SYSCTL_PROC(_net_inet_igmp, OID_AUTO, gsrdelay,
250     CTLTYPE_INT | CTLFLAG_RW,
251     &igmp_gsrdelay.tv_sec, 0, sysctl_igmp_gsr, "I",
252     "Rate limit for IGMPv3 Group-and-Source queries in seconds");
253 #ifdef IGMP_DEBUG
254 int igmp_debug = 0;
255 SYSCTL_INT(_net_inet_igmp, OID_AUTO,
256     debug, CTLFLAG_RW | CTLFLAG_LOCKED, &igmp_debug, 0, "");
257 #endif
258 
259 SYSCTL_NODE(_net_inet_igmp, OID_AUTO, ifinfo, CTLFLAG_RD | CTLFLAG_LOCKED,
260     sysctl_igmp_ifinfo, "Per-interface IGMPv3 state");
261 
262 /* Lock group and attribute for igmp_mtx */
263 static LCK_ATTR_DECLARE(igmp_mtx_attr, 0, 0);
264 static LCK_GRP_DECLARE(igmp_mtx_grp, "igmp_mtx");
265 
266 /*
267  * Locking and reference counting:
268  *
269  * igmp_mtx mainly protects igi_head.  In cases where both igmp_mtx and
270  * in_multihead_lock must be held, the former must be acquired first in order
271  * to maintain lock ordering.  It is not a requirement that igmp_mtx be
272  * acquired first before in_multihead_lock, but in case both must be acquired
273  * in succession, the correct lock ordering must be followed.
274  *
275  * Instead of walking the if_multiaddrs list at the interface and returning
276  * the ifma_protospec value of a matching entry, we search the global list
277  * of in_multi records and find it that way; this is done with in_multihead
278  * lock held.  Doing so avoids the race condition issues that many other BSDs
279  * suffer from (therefore in our implementation, ifma_protospec will never be
280  * NULL for as long as the in_multi is valid.)
281  *
282  * The above creates a requirement for the in_multi to stay in in_multihead
283  * list even after the final IGMP leave (in IGMPv3 mode) until no longer needs
284  * be retransmitted (this is not required for IGMPv1/v2.)  In order to handle
285  * this, the request and reference counts of the in_multi are bumped up when
286  * the state changes to IGMP_LEAVING_MEMBER, and later dropped in the timeout
287  * handler.  Each in_multi holds a reference to the underlying igmp_ifinfo.
288  *
289  * Thus, the permitted lock oder is:
290  *
291  *	igmp_mtx, in_multihead_lock, inm_lock, igi_lock
292  *
293  * Any may be taken independently, but if any are held at the same time,
294  * the above lock order must be followed.
295  */
296 static LCK_MTX_DECLARE_ATTR(igmp_mtx, &igmp_mtx_grp, &igmp_mtx_attr);
297 static int igmp_timers_are_running;
298 
299 #define IGMP_ADD_DETACHED_INM(_head, _inm) {                            \
300 	SLIST_INSERT_HEAD(_head, _inm, inm_dtle);                       \
301 }
302 
303 #define IGMP_REMOVE_DETACHED_INM(_head) {                               \
304 	struct in_multi *_inm, *_inm_tmp;                               \
305 	SLIST_FOREACH_SAFE(_inm, _head, inm_dtle, _inm_tmp) {           \
306 	        SLIST_REMOVE(_head, _inm, in_multi, inm_dtle);          \
307 	        INM_REMREF(_inm);                                       \
308 	}                                                               \
309 	VERIFY(SLIST_EMPTY(_head));                                     \
310 }
311 
312 static KALLOC_TYPE_DEFINE(igi_zone, struct igmp_ifinfo, NET_KT_DEFAULT);
313 
314 /* Store IGMPv3 record count in the module private scratch space */
315 #define vt_nrecs        pkt_mpriv.__mpriv_u.__mpriv32[0].__mpriv32_u.__val16[0]
316 
317 static __inline void
igmp_save_context(struct mbuf * m,struct ifnet * ifp)318 igmp_save_context(struct mbuf *m, struct ifnet *ifp)
319 {
320 	m->m_pkthdr.rcvif = ifp;
321 }
322 
323 static __inline void
igmp_scrub_context(struct mbuf * m)324 igmp_scrub_context(struct mbuf *m)
325 {
326 	m->m_pkthdr.rcvif = NULL;
327 }
328 
329 #ifdef IGMP_DEBUG
330 static __inline const char *
inet_ntop_haddr(in_addr_t haddr,char * buf,socklen_t size)331 inet_ntop_haddr(in_addr_t haddr, char *buf, socklen_t size)
332 {
333 	struct in_addr ia;
334 
335 	ia.s_addr = htonl(haddr);
336 	return inet_ntop(AF_INET, &ia, buf, size);
337 }
338 #endif
339 
340 /*
341  * Restore context from a queued IGMP output chain.
342  * Return saved ifp.
343  */
344 static __inline struct ifnet *
igmp_restore_context(struct mbuf * m)345 igmp_restore_context(struct mbuf *m)
346 {
347 	return m->m_pkthdr.rcvif;
348 }
349 
350 /*
351  * Retrieve or set default IGMP version.
352  */
353 static int
354 sysctl_igmp_default_version SYSCTL_HANDLER_ARGS
355 {
356 #pragma unused(oidp, arg2)
357 	int      error;
358 	int      new;
359 
360 	IGMP_LOCK();
361 
362 	error = SYSCTL_OUT(req, arg1, sizeof(int));
363 	if (error || !req->newptr) {
364 		goto out_locked;
365 	}
366 
367 	new = igmp_default_version;
368 
369 	error = SYSCTL_IN(req, &new, sizeof(int));
370 	if (error) {
371 		goto out_locked;
372 	}
373 
374 	if (new < IGMP_VERSION_1 || new > IGMP_VERSION_3) {
375 		error = EINVAL;
376 		goto out_locked;
377 	}
378 
379 	IGMP_PRINTF(("%s: change igmp_default_version from %d to %d\n",
380 	    __func__, igmp_default_version, new));
381 
382 	igmp_default_version = new;
383 
384 out_locked:
385 	IGMP_UNLOCK();
386 	return error;
387 }
388 
389 /*
390  * Retrieve or set threshold between group-source queries in seconds.
391  *
392  */
393 static int
394 sysctl_igmp_gsr SYSCTL_HANDLER_ARGS
395 {
396 #pragma unused(arg1, arg2)
397 	int error;
398 	int i;
399 
400 	IGMP_LOCK();
401 
402 	i = (int)igmp_gsrdelay.tv_sec;
403 
404 	error = sysctl_handle_int(oidp, &i, 0, req);
405 	if (error || !req->newptr) {
406 		goto out_locked;
407 	}
408 
409 	if (i < -1 || i >= 60) {
410 		error = EINVAL;
411 		goto out_locked;
412 	}
413 
414 	igmp_gsrdelay.tv_sec = i;
415 
416 out_locked:
417 	IGMP_UNLOCK();
418 	return error;
419 }
420 
421 /*
422  * Expose struct igmp_ifinfo to userland, keyed by ifindex.
423  * For use by ifmcstat(8).
424  *
425  */
426 static int
427 sysctl_igmp_ifinfo SYSCTL_HANDLER_ARGS
428 {
429 #pragma unused(oidp)
430 	int                     *name;
431 	int                      error;
432 	u_int                    namelen;
433 	struct ifnet            *ifp;
434 	struct igmp_ifinfo      *igi;
435 	struct igmp_ifinfo_u    igi_u;
436 
437 	name = (int *)arg1;
438 	namelen = arg2;
439 
440 	if (req->newptr != USER_ADDR_NULL) {
441 		return EPERM;
442 	}
443 
444 	if (namelen != 1) {
445 		return EINVAL;
446 	}
447 
448 	IGMP_LOCK();
449 
450 	if (name[0] <= 0 || name[0] > (u_int)if_index) {
451 		error = ENOENT;
452 		goto out_locked;
453 	}
454 
455 	error = ENOENT;
456 
457 	ifnet_head_lock_shared();
458 	ifp = ifindex2ifnet[name[0]];
459 	ifnet_head_done();
460 	if (ifp == NULL) {
461 		goto out_locked;
462 	}
463 
464 	bzero(&igi_u, sizeof(igi_u));
465 
466 	LIST_FOREACH(igi, &igi_head, igi_link) {
467 		IGI_LOCK(igi);
468 		if (ifp != igi->igi_ifp) {
469 			IGI_UNLOCK(igi);
470 			continue;
471 		}
472 		igi_u.igi_ifindex = igi->igi_ifp->if_index;
473 		igi_u.igi_version = igi->igi_version;
474 		igi_u.igi_v1_timer = igi->igi_v1_timer;
475 		igi_u.igi_v2_timer = igi->igi_v2_timer;
476 		igi_u.igi_v3_timer = igi->igi_v3_timer;
477 		igi_u.igi_flags = igi->igi_flags;
478 		igi_u.igi_rv = igi->igi_rv;
479 		igi_u.igi_qi = igi->igi_qi;
480 		igi_u.igi_qri = igi->igi_qri;
481 		igi_u.igi_uri = igi->igi_uri;
482 		IGI_UNLOCK(igi);
483 
484 		error = SYSCTL_OUT(req, &igi_u, sizeof(igi_u));
485 		break;
486 	}
487 
488 out_locked:
489 	IGMP_UNLOCK();
490 	return error;
491 }
492 
493 /*
494  * Dispatch an entire queue of pending packet chains
495  *
496  * Must not be called with inm_lock held.
497  */
498 static void
igmp_dispatch_queue(struct igmp_ifinfo * igi,struct ifqueue * ifq,int limit,const int loop)499 igmp_dispatch_queue(struct igmp_ifinfo *igi, struct ifqueue *ifq, int limit,
500     const int loop)
501 {
502 	struct mbuf *m;
503 	struct ip *ip;
504 
505 	if (igi != NULL) {
506 		IGI_LOCK_ASSERT_HELD(igi);
507 	}
508 
509 #if SKYWALK
510 	/*
511 	 * Since this function is called holding the igi lock, we need to ensure we
512 	 * don't enter the driver directly because a deadlock can happen if another
513 	 * thread holding the workloop lock tries to acquire the igi lock at
514 	 * the same time.
515 	 */
516 	sk_protect_t protect = sk_async_transmit_protect();
517 #endif /* SKYWALK */
518 
519 	for (;;) {
520 		IF_DEQUEUE(ifq, m);
521 		if (m == NULL) {
522 			break;
523 		}
524 		IGMP_PRINTF(("%s: dispatch 0x%llx from 0x%llx\n", __func__,
525 		    (uint64_t)VM_KERNEL_ADDRPERM(ifq),
526 		    (uint64_t)VM_KERNEL_ADDRPERM(m)));
527 		ip = mtod(m, struct ip *);
528 		if (loop) {
529 			m->m_flags |= M_IGMP_LOOP;
530 		}
531 		if (igi != NULL) {
532 			IGI_UNLOCK(igi);
533 		}
534 		igmp_sendpkt(m);
535 		if (igi != NULL) {
536 			IGI_LOCK(igi);
537 		}
538 		if (--limit == 0) {
539 			break;
540 		}
541 	}
542 
543 #if SKYWALK
544 	sk_async_transmit_unprotect(protect);
545 #endif /* SKYWALK */
546 
547 	if (igi != NULL) {
548 		IGI_LOCK_ASSERT_HELD(igi);
549 	}
550 }
551 
552 /*
553  * Filter outgoing IGMP report state by group.
554  *
555  * Reports are ALWAYS suppressed for ALL-HOSTS (224.0.0.1).
556  * If the net.inet.igmp.sendlocal sysctl is 0, then IGMP reports are
557  * disabled for all groups in the 224.0.0.0/24 link-local scope. However,
558  * this may break certain IGMP snooping switches which rely on the old
559  * report behaviour.
560  *
561  * Return zero if the given group is one for which IGMP reports
562  * should be suppressed, or non-zero if reports should be issued.
563  */
564 
565 static __inline__
566 int
igmp_isgroupreported(const struct in_addr addr)567 igmp_isgroupreported(const struct in_addr addr)
568 {
569 	if (in_allhosts(addr) ||
570 	    ((!igmp_sendlocal && IN_LOCAL_GROUP(ntohl(addr.s_addr))))) {
571 		return 0;
572 	}
573 
574 	return 1;
575 }
576 
577 /*
578  * Construct a Router Alert option to use in outgoing packets.
579  */
580 static struct mbuf *
igmp_ra_alloc(void)581 igmp_ra_alloc(void)
582 {
583 	struct mbuf     *m;
584 	struct ipoption *p;
585 
586 	MGET(m, M_WAITOK, MT_DATA);
587 	p = mtod(m, struct ipoption *);
588 	p->ipopt_dst.s_addr = INADDR_ANY;
589 	p->ipopt_list[0] = (char)IPOPT_RA;      /* Router Alert Option */
590 	p->ipopt_list[1] = 0x04;        /* 4 bytes long */
591 	p->ipopt_list[2] = IPOPT_EOL;   /* End of IP option list */
592 	p->ipopt_list[3] = 0x00;        /* pad byte */
593 	m->m_len = sizeof(p->ipopt_dst) + p->ipopt_list[1];
594 
595 	return m;
596 }
597 
598 /*
599  * Attach IGMP when PF_INET is attached to an interface.
600  */
601 struct igmp_ifinfo *
igmp_domifattach(struct ifnet * ifp,zalloc_flags_t how)602 igmp_domifattach(struct ifnet *ifp, zalloc_flags_t how)
603 {
604 	struct igmp_ifinfo *igi;
605 
606 	IGMP_PRINTF(("%s: called for ifp 0x%llx(%s)\n",
607 	    __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), ifp->if_name));
608 
609 	igi = igi_alloc(how);
610 	if (igi == NULL) {
611 		return NULL;
612 	}
613 
614 	IGMP_LOCK();
615 
616 	IGI_LOCK(igi);
617 	igi_initvar(igi, ifp, 0);
618 	igi->igi_debug |= IFD_ATTACHED;
619 	IGI_ADDREF_LOCKED(igi); /* hold a reference for igi_head */
620 	IGI_ADDREF_LOCKED(igi); /* hold a reference for caller */
621 	IGI_UNLOCK(igi);
622 	ifnet_lock_shared(ifp);
623 	igmp_initsilent(ifp, igi);
624 	ifnet_lock_done(ifp);
625 
626 	LIST_INSERT_HEAD(&igi_head, igi, igi_link);
627 
628 	IGMP_UNLOCK();
629 
630 	IGMP_PRINTF(("%s: allocate igmp_ifinfo for ifp 0x%llx(%s)\n", __func__,
631 	    (uint64_t)VM_KERNEL_ADDRPERM(ifp), ifp->if_name));
632 
633 	return igi;
634 }
635 
636 /*
637  * Attach IGMP when PF_INET is reattached to an interface.  Caller is
638  * expected to have an outstanding reference to the igi.
639  */
640 void
igmp_domifreattach(struct igmp_ifinfo * igi)641 igmp_domifreattach(struct igmp_ifinfo *igi)
642 {
643 	struct ifnet *ifp;
644 
645 	IGMP_LOCK();
646 
647 	IGI_LOCK(igi);
648 	VERIFY(!(igi->igi_debug & IFD_ATTACHED));
649 	ifp = igi->igi_ifp;
650 	VERIFY(ifp != NULL);
651 	igi_initvar(igi, ifp, 1);
652 	igi->igi_debug |= IFD_ATTACHED;
653 	IGI_ADDREF_LOCKED(igi); /* hold a reference for igi_head */
654 	IGI_UNLOCK(igi);
655 	ifnet_lock_shared(ifp);
656 	igmp_initsilent(ifp, igi);
657 	ifnet_lock_done(ifp);
658 
659 	LIST_INSERT_HEAD(&igi_head, igi, igi_link);
660 
661 	IGMP_UNLOCK();
662 
663 	IGMP_PRINTF(("%s: reattached igmp_ifinfo for ifp 0x%llx(%s)\n",
664 	    __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), ifp->if_name));
665 }
666 
667 /*
668  * Hook for domifdetach.
669  */
670 void
igmp_domifdetach(struct ifnet * ifp)671 igmp_domifdetach(struct ifnet *ifp)
672 {
673 	SLIST_HEAD(, in_multi) inm_dthead;
674 
675 	SLIST_INIT(&inm_dthead);
676 
677 	IGMP_PRINTF(("%s: called for ifp 0x%llx(%s%d)\n", __func__,
678 	    (uint64_t)VM_KERNEL_ADDRPERM(ifp), ifp->if_name, ifp->if_unit));
679 
680 	IGMP_LOCK();
681 	igi_delete(ifp, (struct igmp_inm_relhead *)&inm_dthead);
682 	IGMP_UNLOCK();
683 
684 	/* Now that we're dropped all locks, release detached records */
685 	IGMP_REMOVE_DETACHED_INM(&inm_dthead);
686 }
687 
688 /*
689  * Called at interface detach time.  Note that we only flush all deferred
690  * responses and record releases; all remaining inm records and their source
691  * entries related to this interface are left intact, in order to handle
692  * the reattach case.
693  */
694 static void
igi_delete(const struct ifnet * ifp,struct igmp_inm_relhead * inm_dthead)695 igi_delete(const struct ifnet *ifp, struct igmp_inm_relhead *inm_dthead)
696 {
697 	struct igmp_ifinfo *igi, *tigi;
698 
699 	IGMP_LOCK_ASSERT_HELD();
700 
701 	LIST_FOREACH_SAFE(igi, &igi_head, igi_link, tigi) {
702 		IGI_LOCK(igi);
703 		if (igi->igi_ifp == ifp) {
704 			/*
705 			 * Free deferred General Query responses.
706 			 */
707 			IF_DRAIN(&igi->igi_gq);
708 			IF_DRAIN(&igi->igi_v2q);
709 			igmp_flush_relq(igi, inm_dthead);
710 			VERIFY(SLIST_EMPTY(&igi->igi_relinmhead));
711 			igi->igi_debug &= ~IFD_ATTACHED;
712 			IGI_UNLOCK(igi);
713 
714 			LIST_REMOVE(igi, igi_link);
715 			IGI_REMREF(igi); /* release igi_head reference */
716 			return;
717 		}
718 		IGI_UNLOCK(igi);
719 	}
720 	panic("%s: igmp_ifinfo not found for ifp %p(%s)", __func__,
721 	    ifp, ifp->if_xname);
722 }
723 
724 __private_extern__ void
igmp_initsilent(struct ifnet * ifp,struct igmp_ifinfo * igi)725 igmp_initsilent(struct ifnet *ifp, struct igmp_ifinfo *igi)
726 {
727 	ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
728 
729 	IGI_LOCK_ASSERT_NOTHELD(igi);
730 	IGI_LOCK(igi);
731 	if (!(ifp->if_flags & IFF_MULTICAST)) {
732 		igi->igi_flags |= IGIF_SILENT;
733 	} else {
734 		igi->igi_flags &= ~IGIF_SILENT;
735 	}
736 	IGI_UNLOCK(igi);
737 }
738 
739 static void
igi_initvar(struct igmp_ifinfo * igi,struct ifnet * ifp,int reattach)740 igi_initvar(struct igmp_ifinfo *igi, struct ifnet *ifp, int reattach)
741 {
742 	IGI_LOCK_ASSERT_HELD(igi);
743 
744 	igi->igi_ifp = ifp;
745 	igi->igi_version = igmp_default_version;
746 	igi->igi_flags = 0;
747 	igi->igi_rv = IGMP_RV_INIT;
748 	igi->igi_qi = IGMP_QI_INIT;
749 	igi->igi_qri = IGMP_QRI_INIT;
750 	igi->igi_uri = IGMP_URI_INIT;
751 
752 	if (!reattach) {
753 		SLIST_INIT(&igi->igi_relinmhead);
754 	}
755 
756 	/*
757 	 * Responses to general queries are subject to bounds.
758 	 */
759 	igi->igi_gq.ifq_maxlen =  IGMP_MAX_RESPONSE_PACKETS;
760 	igi->igi_v2q.ifq_maxlen = IGMP_MAX_RESPONSE_PACKETS;
761 }
762 
763 static struct igmp_ifinfo *
igi_alloc(zalloc_flags_t how)764 igi_alloc(zalloc_flags_t how)
765 {
766 	struct igmp_ifinfo *igi = zalloc_flags(igi_zone, how | Z_ZERO);
767 	if (igi != NULL) {
768 		lck_mtx_init(&igi->igi_lock, &igmp_mtx_grp, &igmp_mtx_attr);
769 		igi->igi_debug |= IFD_ALLOC;
770 	}
771 	return igi;
772 }
773 
774 static void
igi_free(struct igmp_ifinfo * igi)775 igi_free(struct igmp_ifinfo *igi)
776 {
777 	IGI_LOCK(igi);
778 	if (igi->igi_debug & IFD_ATTACHED) {
779 		panic("%s: attached igi=%p is being freed", __func__, igi);
780 		/* NOTREACHED */
781 	} else if (igi->igi_ifp != NULL) {
782 		panic("%s: ifp not NULL for igi=%p", __func__, igi);
783 		/* NOTREACHED */
784 	} else if (!(igi->igi_debug & IFD_ALLOC)) {
785 		panic("%s: igi %p cannot be freed", __func__, igi);
786 		/* NOTREACHED */
787 	} else if (igi->igi_refcnt != 0) {
788 		panic("%s: non-zero refcnt igi=%p", __func__, igi);
789 		/* NOTREACHED */
790 	}
791 	igi->igi_debug &= ~IFD_ALLOC;
792 	IGI_UNLOCK(igi);
793 
794 	lck_mtx_destroy(&igi->igi_lock, &igmp_mtx_grp);
795 	zfree(igi_zone, igi);
796 }
797 
798 void
igi_addref(struct igmp_ifinfo * igi,int locked)799 igi_addref(struct igmp_ifinfo *igi, int locked)
800 {
801 	if (!locked) {
802 		IGI_LOCK_SPIN(igi);
803 	} else {
804 		IGI_LOCK_ASSERT_HELD(igi);
805 	}
806 
807 	if (++igi->igi_refcnt == 0) {
808 		panic("%s: igi=%p wraparound refcnt", __func__, igi);
809 		/* NOTREACHED */
810 	}
811 	if (!locked) {
812 		IGI_UNLOCK(igi);
813 	}
814 }
815 
816 void
igi_remref(struct igmp_ifinfo * igi)817 igi_remref(struct igmp_ifinfo *igi)
818 {
819 	SLIST_HEAD(, in_multi) inm_dthead;
820 	struct ifnet *ifp;
821 
822 	IGI_LOCK_SPIN(igi);
823 
824 	if (igi->igi_refcnt == 0) {
825 		panic("%s: igi=%p negative refcnt", __func__, igi);
826 		/* NOTREACHED */
827 	}
828 
829 	--igi->igi_refcnt;
830 	if (igi->igi_refcnt > 0) {
831 		IGI_UNLOCK(igi);
832 		return;
833 	}
834 
835 	ifp = igi->igi_ifp;
836 	igi->igi_ifp = NULL;
837 	IF_DRAIN(&igi->igi_gq);
838 	IF_DRAIN(&igi->igi_v2q);
839 	SLIST_INIT(&inm_dthead);
840 	igmp_flush_relq(igi, (struct igmp_inm_relhead *)&inm_dthead);
841 	VERIFY(SLIST_EMPTY(&igi->igi_relinmhead));
842 	IGI_UNLOCK(igi);
843 
844 	/* Now that we're dropped all locks, release detached records */
845 	IGMP_REMOVE_DETACHED_INM(&inm_dthead);
846 
847 	IGMP_PRINTF(("%s: freeing igmp_ifinfo for ifp 0x%llx(%s)\n",
848 	    __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
849 
850 	igi_free(igi);
851 }
852 
853 /*
854  * Process a received IGMPv1 query.
855  * Return non-zero if the message should be dropped.
856  */
857 static int
igmp_input_v1_query(struct ifnet * ifp,const struct ip * ip,const struct igmp * igmp)858 igmp_input_v1_query(struct ifnet *ifp, const struct ip *ip,
859     const struct igmp *igmp)
860 {
861 	struct igmp_ifinfo      *igi;
862 	struct in_multi         *inm;
863 	struct in_multistep     step;
864 	struct igmp_tparams     itp = { .qpt = 0, .it = 0, .cst = 0, .sct = 0 };
865 
866 	IGMP_LOCK_ASSERT_NOTHELD();
867 
868 	/*
869 	 * IGMPv1 Host Membership Queries SHOULD always be addressed to
870 	 * 224.0.0.1. They are always treated as General Queries.
871 	 * igmp_group is always ignored. Do not drop it as a userland
872 	 * daemon may wish to see it.
873 	 */
874 	if (!in_allhosts(ip->ip_dst) || !in_nullhost(igmp->igmp_group)) {
875 		IGMPSTAT_INC(igps_rcv_badqueries);
876 		OIGMPSTAT_INC(igps_rcv_badqueries);
877 		goto done;
878 	}
879 	IGMPSTAT_INC(igps_rcv_gen_queries);
880 
881 	igi = IGMP_IFINFO(ifp);
882 	VERIFY(igi != NULL);
883 
884 	IGI_LOCK(igi);
885 	if (igi->igi_flags & IGIF_LOOPBACK) {
886 		IGMP_PRINTF(("%s: ignore v1 query on IGIF_LOOPBACK "
887 		    "ifp 0x%llx(%s)\n", __func__,
888 		    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
889 		IGI_UNLOCK(igi);
890 		goto done;
891 	}
892 	/*
893 	 * Switch to IGMPv1 host compatibility mode.
894 	 */
895 	itp.qpt = igmp_set_version(igi, IGMP_VERSION_1);
896 	IGI_UNLOCK(igi);
897 
898 	IGMP_PRINTF(("%s: process v1 query on ifp 0x%llx(%s)\n", __func__,
899 	    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
900 
901 	/*
902 	 * Start the timers in all of our group records
903 	 * for the interface on which the query arrived,
904 	 * except those which are already running.
905 	 */
906 	in_multihead_lock_shared();
907 	IN_FIRST_MULTI(step, inm);
908 	while (inm != NULL) {
909 		INM_LOCK(inm);
910 		if (inm->inm_ifp != ifp || inm->inm_timer != 0) {
911 			goto next;
912 		}
913 
914 		switch (inm->inm_state) {
915 		case IGMP_NOT_MEMBER:
916 		case IGMP_SILENT_MEMBER:
917 			break;
918 		case IGMP_G_QUERY_PENDING_MEMBER:
919 		case IGMP_SG_QUERY_PENDING_MEMBER:
920 		case IGMP_REPORTING_MEMBER:
921 		case IGMP_IDLE_MEMBER:
922 		case IGMP_LAZY_MEMBER:
923 		case IGMP_SLEEPING_MEMBER:
924 		case IGMP_AWAKENING_MEMBER:
925 			inm->inm_state = IGMP_REPORTING_MEMBER;
926 			inm->inm_timer = IGMP_RANDOM_DELAY(IGMP_V1V2_MAX_RI);
927 			itp.cst = 1;
928 			break;
929 		case IGMP_LEAVING_MEMBER:
930 			break;
931 		}
932 next:
933 		INM_UNLOCK(inm);
934 		IN_NEXT_MULTI(step, inm);
935 	}
936 	in_multihead_lock_done();
937 done:
938 	igmp_set_timeout(&itp);
939 
940 	return 0;
941 }
942 
943 /*
944  * Process a received IGMPv2 general or group-specific query.
945  */
946 static int
igmp_input_v2_query(struct ifnet * ifp,const struct ip * ip,const struct igmp * igmp)947 igmp_input_v2_query(struct ifnet *ifp, const struct ip *ip,
948     const struct igmp *igmp)
949 {
950 	struct igmp_ifinfo      *igi;
951 	struct in_multi         *inm;
952 	int                      is_general_query;
953 	uint16_t                 timer;
954 	struct igmp_tparams      itp = { .qpt = 0, .it = 0, .cst = 0, .sct = 0 };
955 
956 	IGMP_LOCK_ASSERT_NOTHELD();
957 
958 	is_general_query = 0;
959 
960 	/*
961 	 * Validate address fields upfront.
962 	 */
963 	if (in_nullhost(igmp->igmp_group)) {
964 		/*
965 		 * IGMPv2 General Query.
966 		 * If this was not sent to the all-hosts group, ignore it.
967 		 */
968 		if (!in_allhosts(ip->ip_dst)) {
969 			goto done;
970 		}
971 		IGMPSTAT_INC(igps_rcv_gen_queries);
972 		is_general_query = 1;
973 	} else {
974 		/* IGMPv2 Group-Specific Query. */
975 		IGMPSTAT_INC(igps_rcv_group_queries);
976 	}
977 
978 	igi = IGMP_IFINFO(ifp);
979 	VERIFY(igi != NULL);
980 
981 	IGI_LOCK(igi);
982 	if (igi->igi_flags & IGIF_LOOPBACK) {
983 		IGMP_PRINTF(("%s: ignore v2 query on IGIF_LOOPBACK "
984 		    "ifp 0x%llx(%s)\n", __func__,
985 		    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
986 		IGI_UNLOCK(igi);
987 		goto done;
988 	}
989 	/*
990 	 * Ignore v2 query if in v1 Compatibility Mode.
991 	 */
992 	if (igi->igi_version == IGMP_VERSION_1) {
993 		IGI_UNLOCK(igi);
994 		goto done;
995 	}
996 	itp.qpt = igmp_set_version(igi, IGMP_VERSION_2);
997 	IGI_UNLOCK(igi);
998 
999 	timer = igmp->igmp_code / IGMP_TIMER_SCALE;
1000 	if (timer == 0) {
1001 		timer = 1;
1002 	}
1003 
1004 	if (is_general_query) {
1005 		struct in_multistep step;
1006 
1007 		IGMP_PRINTF(("%s: process v2 general query on ifp 0x%llx(%s)\n",
1008 		    __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1009 		/*
1010 		 * For each reporting group joined on this
1011 		 * interface, kick the report timer.
1012 		 */
1013 		in_multihead_lock_shared();
1014 		IN_FIRST_MULTI(step, inm);
1015 		while (inm != NULL) {
1016 			INM_LOCK(inm);
1017 			if (inm->inm_ifp == ifp) {
1018 				itp.cst += igmp_v2_update_group(inm, timer);
1019 			}
1020 			INM_UNLOCK(inm);
1021 			IN_NEXT_MULTI(step, inm);
1022 		}
1023 		in_multihead_lock_done();
1024 	} else {
1025 		/*
1026 		 * Group-specific IGMPv2 query, we need only
1027 		 * look up the single group to process it.
1028 		 */
1029 		in_multihead_lock_shared();
1030 		IN_LOOKUP_MULTI(&igmp->igmp_group, ifp, inm);
1031 		in_multihead_lock_done();
1032 		if (inm != NULL) {
1033 			INM_LOCK(inm);
1034 			IGMP_INET_PRINTF(igmp->igmp_group,
1035 			    ("process v2 query %s on ifp 0x%llx(%s)\n",
1036 			    _igmp_inet_buf,
1037 			    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1038 			itp.cst = igmp_v2_update_group(inm, timer);
1039 			INM_UNLOCK(inm);
1040 			INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1041 		}
1042 	}
1043 done:
1044 	igmp_set_timeout(&itp);
1045 
1046 	return 0;
1047 }
1048 
1049 /*
1050  * Update the report timer on a group in response to an IGMPv2 query.
1051  *
1052  * If we are becoming the reporting member for this group, start the timer.
1053  * If we already are the reporting member for this group, and timer is
1054  * below the threshold, reset it.
1055  *
1056  * We may be updating the group for the first time since we switched
1057  * to IGMPv3. If we are, then we must clear any recorded source lists,
1058  * and transition to REPORTING state; the group timer is overloaded
1059  * for group and group-source query responses.
1060  *
1061  * Unlike IGMPv3, the delay per group should be jittered
1062  * to avoid bursts of IGMPv2 reports.
1063  */
1064 static uint32_t
igmp_v2_update_group(struct in_multi * inm,const int timer)1065 igmp_v2_update_group(struct in_multi *inm, const int timer)
1066 {
1067 	IGMP_INET_PRINTF(inm->inm_addr, ("%s: %s/%s timer=%d\n",
1068 	    __func__, _igmp_inet_buf, if_name(inm->inm_ifp),
1069 	    timer));
1070 
1071 	INM_LOCK_ASSERT_HELD(inm);
1072 
1073 	switch (inm->inm_state) {
1074 	case IGMP_NOT_MEMBER:
1075 	case IGMP_SILENT_MEMBER:
1076 		break;
1077 	case IGMP_REPORTING_MEMBER:
1078 		if (inm->inm_timer != 0 &&
1079 		    inm->inm_timer <= timer) {
1080 			IGMP_PRINTF(("%s: REPORTING and timer running, "
1081 			    "skipping.\n", __func__));
1082 			break;
1083 		}
1084 		OS_FALLTHROUGH;
1085 	case IGMP_SG_QUERY_PENDING_MEMBER:
1086 	case IGMP_G_QUERY_PENDING_MEMBER:
1087 	case IGMP_IDLE_MEMBER:
1088 	case IGMP_LAZY_MEMBER:
1089 	case IGMP_AWAKENING_MEMBER:
1090 		IGMP_PRINTF(("%s: ->REPORTING\n", __func__));
1091 		inm->inm_state = IGMP_REPORTING_MEMBER;
1092 		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1093 		break;
1094 	case IGMP_SLEEPING_MEMBER:
1095 		IGMP_PRINTF(("%s: ->AWAKENING\n", __func__));
1096 		inm->inm_state = IGMP_AWAKENING_MEMBER;
1097 		break;
1098 	case IGMP_LEAVING_MEMBER:
1099 		break;
1100 	}
1101 
1102 	return inm->inm_timer;
1103 }
1104 
1105 /*
1106  * Process a received IGMPv3 general, group-specific or
1107  * group-and-source-specific query.
1108  * Assumes m has already been pulled up to the full IGMP message length.
1109  * Return 0 if successful, otherwise an appropriate error code is returned.
1110  */
1111 static int
igmp_input_v3_query(struct ifnet * ifp,const struct ip * ip,struct igmpv3 * igmpv3)1112 igmp_input_v3_query(struct ifnet *ifp, const struct ip *ip,
1113     /*const*/ struct igmpv3 *igmpv3)
1114 {
1115 	struct igmp_ifinfo      *igi;
1116 	struct in_multi         *inm;
1117 	int                      is_general_query;
1118 	uint32_t                 maxresp, nsrc, qqi;
1119 	uint32_t                 timer;
1120 	uint8_t                  qrv;
1121 	struct igmp_tparams      itp = { .qpt = 0, .it = 0, .cst = 0, .sct = 0 };
1122 
1123 	IGMP_LOCK_ASSERT_NOTHELD();
1124 
1125 	is_general_query = 0;
1126 
1127 	IGMP_PRINTF(("%s: process v3 query on ifp 0x%llx(%s)\n", __func__,
1128 	    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1129 
1130 	maxresp = igmpv3->igmp_code;    /* in 1/10ths of a second */
1131 	if (maxresp >= 128) {
1132 		maxresp = IGMP_MANT(igmpv3->igmp_code) <<
1133 		    (IGMP_EXP(igmpv3->igmp_code) + 3);
1134 	}
1135 
1136 	/*
1137 	 * Robustness must never be less than 2 for on-wire IGMPv3.
1138 	 * FUTURE: Check if ifp has IGIF_LOOPBACK set, as we will make
1139 	 * an exception for interfaces whose IGMPv3 state changes
1140 	 * are redirected to loopback (e.g. MANET).
1141 	 */
1142 	qrv = IGMP_QRV(igmpv3->igmp_misc);
1143 	if (qrv < 2) {
1144 		IGMP_PRINTF(("%s: clamping qrv %d to %d\n", __func__,
1145 		    qrv, IGMP_RV_INIT));
1146 		qrv = IGMP_RV_INIT;
1147 	}
1148 
1149 	qqi = igmpv3->igmp_qqi;
1150 	if (qqi >= 128) {
1151 		qqi = IGMP_MANT(igmpv3->igmp_qqi) <<
1152 		    (IGMP_EXP(igmpv3->igmp_qqi) + 3);
1153 	}
1154 
1155 	timer = maxresp / IGMP_TIMER_SCALE;
1156 	if (timer == 0) {
1157 		timer = 1;
1158 	}
1159 
1160 	nsrc = ntohs(igmpv3->igmp_numsrc);
1161 
1162 	/*
1163 	 * Validate address fields and versions upfront before
1164 	 * accepting v3 query.
1165 	 */
1166 	if (in_nullhost(igmpv3->igmp_group)) {
1167 		/*
1168 		 * IGMPv3 General Query.
1169 		 *
1170 		 * General Queries SHOULD be directed to 224.0.0.1.
1171 		 * A general query with a source list has undefined
1172 		 * behaviour; discard it.
1173 		 */
1174 		IGMPSTAT_INC(igps_rcv_gen_queries);
1175 		if (!in_allhosts(ip->ip_dst) || nsrc > 0) {
1176 			IGMPSTAT_INC(igps_rcv_badqueries);
1177 			OIGMPSTAT_INC(igps_rcv_badqueries);
1178 			goto done;
1179 		}
1180 		is_general_query = 1;
1181 	} else {
1182 		/* Group or group-source specific query. */
1183 		if (nsrc == 0) {
1184 			IGMPSTAT_INC(igps_rcv_group_queries);
1185 		} else {
1186 			IGMPSTAT_INC(igps_rcv_gsr_queries);
1187 		}
1188 	}
1189 
1190 	igi = IGMP_IFINFO(ifp);
1191 	VERIFY(igi != NULL);
1192 
1193 	IGI_LOCK(igi);
1194 	if (igi->igi_flags & IGIF_LOOPBACK) {
1195 		IGMP_PRINTF(("%s: ignore v3 query on IGIF_LOOPBACK "
1196 		    "ifp 0x%llx(%s)\n", __func__,
1197 		    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1198 		IGI_UNLOCK(igi);
1199 		goto done;
1200 	}
1201 
1202 	/*
1203 	 * Discard the v3 query if we're in Compatibility Mode.
1204 	 * The RFC is not obviously worded that hosts need to stay in
1205 	 * compatibility mode until the Old Version Querier Present
1206 	 * timer expires.
1207 	 */
1208 	if (igi->igi_version != IGMP_VERSION_3) {
1209 		IGMP_PRINTF(("%s: ignore v3 query in v%d mode on "
1210 		    "ifp 0x%llx(%s)\n", __func__, igi->igi_version,
1211 		    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1212 		IGI_UNLOCK(igi);
1213 		goto done;
1214 	}
1215 
1216 	itp.qpt = igmp_set_version(igi, IGMP_VERSION_3);
1217 	igi->igi_rv = qrv;
1218 	igi->igi_qi = qqi;
1219 	igi->igi_qri = MAX(timer, IGMP_QRI_MIN);
1220 
1221 	IGMP_PRINTF(("%s: qrv %d qi %d qri %d\n", __func__, igi->igi_rv,
1222 	    igi->igi_qi, igi->igi_qri));
1223 
1224 	if (is_general_query) {
1225 		/*
1226 		 * Schedule a current-state report on this ifp for
1227 		 * all groups, possibly containing source lists.
1228 		 * If there is a pending General Query response
1229 		 * scheduled earlier than the selected delay, do
1230 		 * not schedule any other reports.
1231 		 * Otherwise, reset the interface timer.
1232 		 */
1233 		IGMP_PRINTF(("%s: process v3 general query on ifp 0x%llx(%s)\n",
1234 		    __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1235 		if (igi->igi_v3_timer == 0 || igi->igi_v3_timer >= timer) {
1236 			itp.it = igi->igi_v3_timer = IGMP_RANDOM_DELAY(timer);
1237 		}
1238 		IGI_UNLOCK(igi);
1239 	} else {
1240 		IGI_UNLOCK(igi);
1241 		/*
1242 		 * Group-source-specific queries are throttled on
1243 		 * a per-group basis to defeat denial-of-service attempts.
1244 		 * Queries for groups we are not a member of on this
1245 		 * link are simply ignored.
1246 		 */
1247 		in_multihead_lock_shared();
1248 		IN_LOOKUP_MULTI(&igmpv3->igmp_group, ifp, inm);
1249 		in_multihead_lock_done();
1250 		if (inm == NULL) {
1251 			goto done;
1252 		}
1253 
1254 		INM_LOCK(inm);
1255 		if (nsrc > 0) {
1256 			if (!ratecheck(&inm->inm_lastgsrtv,
1257 			    &igmp_gsrdelay)) {
1258 				IGMP_PRINTF(("%s: GS query throttled.\n",
1259 				    __func__));
1260 				IGMPSTAT_INC(igps_drop_gsr_queries);
1261 				INM_UNLOCK(inm);
1262 				INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1263 				goto done;
1264 			}
1265 		}
1266 		IGMP_INET_PRINTF(igmpv3->igmp_group,
1267 		    ("process v3 %s query on ifp 0x%llx(%s)\n", _igmp_inet_buf,
1268 		    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1269 		/*
1270 		 * If there is a pending General Query response
1271 		 * scheduled sooner than the selected delay, no
1272 		 * further report need be scheduled.
1273 		 * Otherwise, prepare to respond to the
1274 		 * group-specific or group-and-source query.
1275 		 */
1276 		IGI_LOCK(igi);
1277 		itp.it = igi->igi_v3_timer;
1278 		IGI_UNLOCK(igi);
1279 		if (itp.it == 0 || itp.it >= timer) {
1280 			(void) igmp_input_v3_group_query(inm, timer, igmpv3);
1281 			itp.cst = inm->inm_timer;
1282 		}
1283 		INM_UNLOCK(inm);
1284 		INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1285 	}
1286 done:
1287 	if (itp.it > 0) {
1288 		IGMP_PRINTF(("%s: v3 general query response scheduled in "
1289 		    "T+%d seconds on ifp 0x%llx(%s)\n", __func__, itp.it,
1290 		    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1291 	}
1292 	igmp_set_timeout(&itp);
1293 
1294 	return 0;
1295 }
1296 
1297 /*
1298  * Process a recieved IGMPv3 group-specific or group-and-source-specific
1299  * query.
1300  * Return <0 if any error occured. Currently this is ignored.
1301  */
1302 static int
igmp_input_v3_group_query(struct in_multi * inm,int timer,struct igmpv3 * igmpv3)1303 igmp_input_v3_group_query(struct in_multi *inm,
1304     int timer, /*const*/ struct igmpv3 *igmpv3)
1305 {
1306 	int                      retval;
1307 	uint16_t                 nsrc;
1308 
1309 	INM_LOCK_ASSERT_HELD(inm);
1310 
1311 	retval = 0;
1312 
1313 	switch (inm->inm_state) {
1314 	case IGMP_NOT_MEMBER:
1315 	case IGMP_SILENT_MEMBER:
1316 	case IGMP_SLEEPING_MEMBER:
1317 	case IGMP_LAZY_MEMBER:
1318 	case IGMP_AWAKENING_MEMBER:
1319 	case IGMP_IDLE_MEMBER:
1320 	case IGMP_LEAVING_MEMBER:
1321 		return retval;
1322 	case IGMP_REPORTING_MEMBER:
1323 	case IGMP_G_QUERY_PENDING_MEMBER:
1324 	case IGMP_SG_QUERY_PENDING_MEMBER:
1325 		break;
1326 	}
1327 
1328 	nsrc = ntohs(igmpv3->igmp_numsrc);
1329 
1330 	/*
1331 	 * Deal with group-specific queries upfront.
1332 	 * If any group query is already pending, purge any recorded
1333 	 * source-list state if it exists, and schedule a query response
1334 	 * for this group-specific query.
1335 	 */
1336 	if (nsrc == 0) {
1337 		if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER ||
1338 		    inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) {
1339 			inm_clear_recorded(inm);
1340 			timer = min(inm->inm_timer, timer);
1341 		}
1342 		inm->inm_state = IGMP_G_QUERY_PENDING_MEMBER;
1343 		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1344 		return retval;
1345 	}
1346 
1347 	/*
1348 	 * Deal with the case where a group-and-source-specific query has
1349 	 * been received but a group-specific query is already pending.
1350 	 */
1351 	if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER) {
1352 		timer = min(inm->inm_timer, timer);
1353 		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1354 		return retval;
1355 	}
1356 
1357 	/*
1358 	 * Finally, deal with the case where a group-and-source-specific
1359 	 * query has been received, where a response to a previous g-s-r
1360 	 * query exists, or none exists.
1361 	 * In this case, we need to parse the source-list which the Querier
1362 	 * has provided us with and check if we have any source list filter
1363 	 * entries at T1 for these sources. If we do not, there is no need
1364 	 * schedule a report and the query may be dropped.
1365 	 * If we do, we must record them and schedule a current-state
1366 	 * report for those sources.
1367 	 * FIXME: Handling source lists larger than 1 mbuf requires that
1368 	 * we pass the mbuf chain pointer down to this function, and use
1369 	 * m_getptr() to walk the chain.
1370 	 */
1371 	if (inm->inm_nsrc > 0) {
1372 		const struct in_addr    *ap;
1373 		int                      i, nrecorded;
1374 
1375 		ap = (const struct in_addr *)(igmpv3 + 1);
1376 		nrecorded = 0;
1377 		for (i = 0; i < nsrc; i++, ap++) {
1378 			retval = inm_record_source(inm, ap->s_addr);
1379 			if (retval < 0) {
1380 				break;
1381 			}
1382 			nrecorded += retval;
1383 		}
1384 		if (nrecorded > 0) {
1385 			IGMP_PRINTF(("%s: schedule response to SG query\n",
1386 			    __func__));
1387 			inm->inm_state = IGMP_SG_QUERY_PENDING_MEMBER;
1388 			inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1389 		}
1390 	}
1391 
1392 	return retval;
1393 }
1394 
1395 /*
1396  * Process a received IGMPv1 host membership report.
1397  *
1398  * NOTE: 0.0.0.0 workaround breaks const correctness.
1399  */
1400 static int
igmp_input_v1_report(struct ifnet * ifp,struct mbuf * m,struct ip * ip,struct igmp * igmp)1401 igmp_input_v1_report(struct ifnet *ifp, struct mbuf *m, /*const*/ struct ip *ip,
1402     /*const*/ struct igmp *igmp)
1403 {
1404 	struct in_ifaddr *ia;
1405 	struct in_multi *inm;
1406 
1407 	IGMPSTAT_INC(igps_rcv_reports);
1408 	OIGMPSTAT_INC(igps_rcv_reports);
1409 
1410 	if ((ifp->if_flags & IFF_LOOPBACK) ||
1411 	    (m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
1412 		return 0;
1413 	}
1414 
1415 	if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr) ||
1416 	    !in_hosteq(igmp->igmp_group, ip->ip_dst))) {
1417 		IGMPSTAT_INC(igps_rcv_badreports);
1418 		OIGMPSTAT_INC(igps_rcv_badreports);
1419 		return EINVAL;
1420 	}
1421 
1422 	/*
1423 	 * RFC 3376, Section 4.2.13, 9.2, 9.3:
1424 	 * Booting clients may use the source address 0.0.0.0. Some
1425 	 * IGMP daemons may not know how to use IP_RECVIF to determine
1426 	 * the interface upon which this message was received.
1427 	 * Replace 0.0.0.0 with the subnet address if told to do so.
1428 	 */
1429 	if (igmp_recvifkludge && in_nullhost(ip->ip_src)) {
1430 		IFP_TO_IA(ifp, ia);
1431 		if (ia != NULL) {
1432 			IFA_LOCK(&ia->ia_ifa);
1433 			ip->ip_src.s_addr = htonl(ia->ia_subnet);
1434 			IFA_UNLOCK(&ia->ia_ifa);
1435 			IFA_REMREF(&ia->ia_ifa);
1436 		}
1437 	}
1438 
1439 	IGMP_INET_PRINTF(igmp->igmp_group,
1440 	    ("process v1 report %s on ifp 0x%llx(%s)\n", _igmp_inet_buf,
1441 	    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1442 
1443 	/*
1444 	 * IGMPv1 report suppression.
1445 	 * If we are a member of this group, and our membership should be
1446 	 * reported, stop our group timer and transition to the 'lazy' state.
1447 	 */
1448 	in_multihead_lock_shared();
1449 	IN_LOOKUP_MULTI(&igmp->igmp_group, ifp, inm);
1450 	in_multihead_lock_done();
1451 	if (inm != NULL) {
1452 		struct igmp_ifinfo *igi;
1453 
1454 		INM_LOCK(inm);
1455 
1456 		igi = inm->inm_igi;
1457 		VERIFY(igi != NULL);
1458 
1459 		IGMPSTAT_INC(igps_rcv_ourreports);
1460 		OIGMPSTAT_INC(igps_rcv_ourreports);
1461 
1462 		/*
1463 		 * If we are in IGMPv3 host mode, do not allow the
1464 		 * other host's IGMPv1 report to suppress our reports
1465 		 * unless explicitly configured to do so.
1466 		 */
1467 		IGI_LOCK(igi);
1468 		if (igi->igi_version == IGMP_VERSION_3) {
1469 			if (igmp_legacysupp) {
1470 				igmp_v3_suppress_group_record(inm);
1471 			}
1472 			IGI_UNLOCK(igi);
1473 			INM_UNLOCK(inm);
1474 			INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1475 			return 0;
1476 		}
1477 
1478 		INM_LOCK_ASSERT_HELD(inm);
1479 		inm->inm_timer = 0;
1480 
1481 		switch (inm->inm_state) {
1482 		case IGMP_NOT_MEMBER:
1483 		case IGMP_SILENT_MEMBER:
1484 			break;
1485 		case IGMP_IDLE_MEMBER:
1486 		case IGMP_LAZY_MEMBER:
1487 		case IGMP_AWAKENING_MEMBER:
1488 			IGMP_INET_PRINTF(igmp->igmp_group,
1489 			    ("report suppressed for %s on ifp 0x%llx(%s)\n",
1490 			    _igmp_inet_buf,
1491 			    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1492 			OS_FALLTHROUGH;
1493 		case IGMP_SLEEPING_MEMBER:
1494 			inm->inm_state = IGMP_SLEEPING_MEMBER;
1495 			break;
1496 		case IGMP_REPORTING_MEMBER:
1497 			IGMP_INET_PRINTF(igmp->igmp_group,
1498 			    ("report suppressed for %s on ifp 0x%llx(%s)\n",
1499 			    _igmp_inet_buf,
1500 			    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1501 			if (igi->igi_version == IGMP_VERSION_1) {
1502 				inm->inm_state = IGMP_LAZY_MEMBER;
1503 			} else if (igi->igi_version == IGMP_VERSION_2) {
1504 				inm->inm_state = IGMP_SLEEPING_MEMBER;
1505 			}
1506 			break;
1507 		case IGMP_G_QUERY_PENDING_MEMBER:
1508 		case IGMP_SG_QUERY_PENDING_MEMBER:
1509 		case IGMP_LEAVING_MEMBER:
1510 			break;
1511 		}
1512 		IGI_UNLOCK(igi);
1513 		INM_UNLOCK(inm);
1514 		INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1515 	}
1516 
1517 	return 0;
1518 }
1519 
1520 /*
1521  * Process a received IGMPv2 host membership report.
1522  *
1523  * NOTE: 0.0.0.0 workaround breaks const correctness.
1524  */
1525 static int
igmp_input_v2_report(struct ifnet * ifp,struct mbuf * m,struct ip * ip,struct igmp * igmp)1526 igmp_input_v2_report(struct ifnet *ifp, struct mbuf *m, /*const*/ struct ip *ip,
1527     /*const*/ struct igmp *igmp)
1528 {
1529 	struct in_ifaddr *ia;
1530 	struct in_multi *inm;
1531 
1532 	/*
1533 	 * Make sure we don't hear our own membership report.  Fast
1534 	 * leave requires knowing that we are the only member of a
1535 	 * group.
1536 	 */
1537 	IFP_TO_IA(ifp, ia);
1538 	if (ia != NULL) {
1539 		IFA_LOCK(&ia->ia_ifa);
1540 		if (in_hosteq(ip->ip_src, IA_SIN(ia)->sin_addr)) {
1541 			IFA_UNLOCK(&ia->ia_ifa);
1542 			IFA_REMREF(&ia->ia_ifa);
1543 			return 0;
1544 		}
1545 		IFA_UNLOCK(&ia->ia_ifa);
1546 	}
1547 
1548 	IGMPSTAT_INC(igps_rcv_reports);
1549 	OIGMPSTAT_INC(igps_rcv_reports);
1550 
1551 	if ((ifp->if_flags & IFF_LOOPBACK) ||
1552 	    (m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
1553 		if (ia != NULL) {
1554 			IFA_REMREF(&ia->ia_ifa);
1555 		}
1556 		return 0;
1557 	}
1558 
1559 	if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr)) ||
1560 	    !in_hosteq(igmp->igmp_group, ip->ip_dst)) {
1561 		if (ia != NULL) {
1562 			IFA_REMREF(&ia->ia_ifa);
1563 		}
1564 		IGMPSTAT_INC(igps_rcv_badreports);
1565 		OIGMPSTAT_INC(igps_rcv_badreports);
1566 		return EINVAL;
1567 	}
1568 
1569 	/*
1570 	 * RFC 3376, Section 4.2.13, 9.2, 9.3:
1571 	 * Booting clients may use the source address 0.0.0.0. Some
1572 	 * IGMP daemons may not know how to use IP_RECVIF to determine
1573 	 * the interface upon which this message was received.
1574 	 * Replace 0.0.0.0 with the subnet address if told to do so.
1575 	 */
1576 	if (igmp_recvifkludge && in_nullhost(ip->ip_src)) {
1577 		if (ia != NULL) {
1578 			IFA_LOCK(&ia->ia_ifa);
1579 			ip->ip_src.s_addr = htonl(ia->ia_subnet);
1580 			IFA_UNLOCK(&ia->ia_ifa);
1581 		}
1582 	}
1583 	if (ia != NULL) {
1584 		IFA_REMREF(&ia->ia_ifa);
1585 	}
1586 
1587 	IGMP_INET_PRINTF(igmp->igmp_group,
1588 	    ("process v2 report %s on ifp 0x%llx(%s)\n", _igmp_inet_buf,
1589 	    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1590 
1591 	/*
1592 	 * IGMPv2 report suppression.
1593 	 * If we are a member of this group, and our membership should be
1594 	 * reported, and our group timer is pending or about to be reset,
1595 	 * stop our group timer by transitioning to the 'lazy' state.
1596 	 */
1597 	in_multihead_lock_shared();
1598 	IN_LOOKUP_MULTI(&igmp->igmp_group, ifp, inm);
1599 	in_multihead_lock_done();
1600 	if (inm != NULL) {
1601 		struct igmp_ifinfo *igi;
1602 
1603 		INM_LOCK(inm);
1604 		igi = inm->inm_igi;
1605 		VERIFY(igi != NULL);
1606 
1607 		IGMPSTAT_INC(igps_rcv_ourreports);
1608 		OIGMPSTAT_INC(igps_rcv_ourreports);
1609 
1610 		/*
1611 		 * If we are in IGMPv3 host mode, do not allow the
1612 		 * other host's IGMPv1 report to suppress our reports
1613 		 * unless explicitly configured to do so.
1614 		 */
1615 		IGI_LOCK(igi);
1616 		if (igi->igi_version == IGMP_VERSION_3) {
1617 			if (igmp_legacysupp) {
1618 				igmp_v3_suppress_group_record(inm);
1619 			}
1620 			IGI_UNLOCK(igi);
1621 			INM_UNLOCK(inm);
1622 			INM_REMREF(inm);
1623 			return 0;
1624 		}
1625 
1626 		inm->inm_timer = 0;
1627 
1628 		switch (inm->inm_state) {
1629 		case IGMP_NOT_MEMBER:
1630 		case IGMP_SILENT_MEMBER:
1631 		case IGMP_SLEEPING_MEMBER:
1632 			break;
1633 		case IGMP_REPORTING_MEMBER:
1634 		case IGMP_IDLE_MEMBER:
1635 		case IGMP_AWAKENING_MEMBER:
1636 			IGMP_INET_PRINTF(igmp->igmp_group,
1637 			    ("report suppressed for %s on ifp 0x%llx(%s)\n",
1638 			    _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(ifp),
1639 			    if_name(ifp)));
1640 			OS_FALLTHROUGH;
1641 		case IGMP_LAZY_MEMBER:
1642 			inm->inm_state = IGMP_LAZY_MEMBER;
1643 			break;
1644 		case IGMP_G_QUERY_PENDING_MEMBER:
1645 		case IGMP_SG_QUERY_PENDING_MEMBER:
1646 		case IGMP_LEAVING_MEMBER:
1647 			break;
1648 		}
1649 		IGI_UNLOCK(igi);
1650 		INM_UNLOCK(inm);
1651 		INM_REMREF(inm);
1652 	}
1653 
1654 	return 0;
1655 }
1656 
1657 void
igmp_input(struct mbuf * m,int off)1658 igmp_input(struct mbuf *m, int off)
1659 {
1660 	int iphlen;
1661 	struct ifnet *ifp;
1662 	struct igmp *igmp;
1663 	struct ip *ip;
1664 	int igmplen;
1665 	int minlen;
1666 	int queryver;
1667 
1668 	IGMP_PRINTF(("%s: called w/mbuf (0x%llx,%d)\n", __func__,
1669 	    (uint64_t)VM_KERNEL_ADDRPERM(m), off));
1670 
1671 	ifp = m->m_pkthdr.rcvif;
1672 
1673 	IGMPSTAT_INC(igps_rcv_total);
1674 	OIGMPSTAT_INC(igps_rcv_total);
1675 
1676 	/* Expect 32-bit aligned data pointer on strict-align platforms */
1677 	MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
1678 
1679 	ip = mtod(m, struct ip *);
1680 	iphlen = off;
1681 
1682 	/* By now, ip_len no longer contains the length of IP header */
1683 	igmplen = ip->ip_len;
1684 
1685 	/*
1686 	 * Validate lengths.
1687 	 */
1688 	if (igmplen < IGMP_MINLEN) {
1689 		IGMPSTAT_INC(igps_rcv_tooshort);
1690 		OIGMPSTAT_INC(igps_rcv_tooshort);
1691 		m_freem(m);
1692 		return;
1693 	}
1694 
1695 	/*
1696 	 * Always pullup to the minimum size for v1/v2 or v3
1697 	 * to amortize calls to m_pulldown().
1698 	 */
1699 	if (igmplen >= IGMP_V3_QUERY_MINLEN) {
1700 		minlen = IGMP_V3_QUERY_MINLEN;
1701 	} else {
1702 		minlen = IGMP_MINLEN;
1703 	}
1704 
1705 	/* A bit more expensive than M_STRUCT_GET, but ensures alignment */
1706 	M_STRUCT_GET0(igmp, struct igmp *, m, off, minlen);
1707 	if (igmp == NULL) {
1708 		IGMPSTAT_INC(igps_rcv_tooshort);
1709 		OIGMPSTAT_INC(igps_rcv_tooshort);
1710 		return;
1711 	}
1712 	/* N.B.: we assume the packet was correctly aligned in ip_input. */
1713 
1714 	/*
1715 	 * Validate checksum.
1716 	 */
1717 	m->m_data += iphlen;
1718 	m->m_len -= iphlen;
1719 	if (in_cksum(m, igmplen)) {
1720 		IGMPSTAT_INC(igps_rcv_badsum);
1721 		OIGMPSTAT_INC(igps_rcv_badsum);
1722 		m_freem(m);
1723 		return;
1724 	}
1725 	m->m_data -= iphlen;
1726 	m->m_len += iphlen;
1727 
1728 	/*
1729 	 * IGMP control traffic is link-scope, and must have a TTL of 1.
1730 	 * DVMRP traffic (e.g. mrinfo, mtrace) is an exception;
1731 	 * probe packets may come from beyond the LAN.
1732 	 */
1733 	if (igmp->igmp_type != IGMP_DVMRP && ip->ip_ttl != 1) {
1734 		IGMPSTAT_INC(igps_rcv_badttl);
1735 		m_freem(m);
1736 		return;
1737 	}
1738 
1739 	switch (igmp->igmp_type) {
1740 	case IGMP_HOST_MEMBERSHIP_QUERY:
1741 		if (igmplen == IGMP_MINLEN) {
1742 			if (igmp->igmp_code == 0) {
1743 				queryver = IGMP_VERSION_1;
1744 			} else {
1745 				queryver = IGMP_VERSION_2;
1746 			}
1747 		} else if (igmplen >= IGMP_V3_QUERY_MINLEN) {
1748 			queryver = IGMP_VERSION_3;
1749 		} else {
1750 			IGMPSTAT_INC(igps_rcv_tooshort);
1751 			OIGMPSTAT_INC(igps_rcv_tooshort);
1752 			m_freem(m);
1753 			return;
1754 		}
1755 
1756 		OIGMPSTAT_INC(igps_rcv_queries);
1757 
1758 		switch (queryver) {
1759 		case IGMP_VERSION_1:
1760 			IGMPSTAT_INC(igps_rcv_v1v2_queries);
1761 			if (!igmp_v1enable) {
1762 				break;
1763 			}
1764 			if (igmp_input_v1_query(ifp, ip, igmp) != 0) {
1765 				m_freem(m);
1766 				return;
1767 			}
1768 			break;
1769 
1770 		case IGMP_VERSION_2:
1771 			IGMPSTAT_INC(igps_rcv_v1v2_queries);
1772 			if (!igmp_v2enable) {
1773 				break;
1774 			}
1775 			if (igmp_input_v2_query(ifp, ip, igmp) != 0) {
1776 				m_freem(m);
1777 				return;
1778 			}
1779 			break;
1780 
1781 		case IGMP_VERSION_3: {
1782 			struct igmpv3 *igmpv3;
1783 			uint16_t igmpv3len;
1784 			uint16_t srclen;
1785 			int nsrc;
1786 
1787 			IGMPSTAT_INC(igps_rcv_v3_queries);
1788 			igmpv3 = (struct igmpv3 *)igmp;
1789 			/*
1790 			 * Validate length based on source count.
1791 			 */
1792 			nsrc = ntohs(igmpv3->igmp_numsrc);
1793 			/*
1794 			 * The max vaue of nsrc is limited by the
1795 			 * MTU of the network on which the datagram
1796 			 * is received
1797 			 */
1798 			if (nsrc < 0 || nsrc > IGMP_V3_QUERY_MAX_SRCS) {
1799 				IGMPSTAT_INC(igps_rcv_tooshort);
1800 				OIGMPSTAT_INC(igps_rcv_tooshort);
1801 				m_freem(m);
1802 				return;
1803 			}
1804 			srclen = sizeof(struct in_addr) * (uint16_t)nsrc;
1805 			if (igmplen < (IGMP_V3_QUERY_MINLEN + srclen)) {
1806 				IGMPSTAT_INC(igps_rcv_tooshort);
1807 				OIGMPSTAT_INC(igps_rcv_tooshort);
1808 				m_freem(m);
1809 				return;
1810 			}
1811 			igmpv3len = IGMP_V3_QUERY_MINLEN + srclen;
1812 			/*
1813 			 * A bit more expensive than M_STRUCT_GET,
1814 			 * but ensures alignment.
1815 			 */
1816 			M_STRUCT_GET0(igmpv3, struct igmpv3 *, m,
1817 			    off, igmpv3len);
1818 			if (igmpv3 == NULL) {
1819 				IGMPSTAT_INC(igps_rcv_tooshort);
1820 				OIGMPSTAT_INC(igps_rcv_tooshort);
1821 				return;
1822 			}
1823 			/*
1824 			 * N.B.: we assume the packet was correctly
1825 			 * aligned in ip_input.
1826 			 */
1827 			if (igmp_input_v3_query(ifp, ip, igmpv3) != 0) {
1828 				m_freem(m);
1829 				return;
1830 			}
1831 		}
1832 		break;
1833 		}
1834 		break;
1835 
1836 	case IGMP_v1_HOST_MEMBERSHIP_REPORT:
1837 		if (!igmp_v1enable) {
1838 			break;
1839 		}
1840 		if (igmp_input_v1_report(ifp, m, ip, igmp) != 0) {
1841 			m_freem(m);
1842 			return;
1843 		}
1844 		break;
1845 
1846 	case IGMP_v2_HOST_MEMBERSHIP_REPORT:
1847 		if (!igmp_v2enable) {
1848 			break;
1849 		}
1850 		if (!ip_checkrouteralert(m)) {
1851 			IGMPSTAT_INC(igps_rcv_nora);
1852 		}
1853 		if (igmp_input_v2_report(ifp, m, ip, igmp) != 0) {
1854 			m_freem(m);
1855 			return;
1856 		}
1857 		break;
1858 
1859 	case IGMP_v3_HOST_MEMBERSHIP_REPORT:
1860 		/*
1861 		 * Hosts do not need to process IGMPv3 membership reports,
1862 		 * as report suppression is no longer required.
1863 		 */
1864 		if (!ip_checkrouteralert(m)) {
1865 			IGMPSTAT_INC(igps_rcv_nora);
1866 		}
1867 		break;
1868 
1869 	default:
1870 		break;
1871 	}
1872 
1873 	IGMP_LOCK_ASSERT_NOTHELD();
1874 	/*
1875 	 * Pass all valid IGMP packets up to any process(es) listening on a
1876 	 * raw IGMP socket.
1877 	 */
1878 	rip_input(m, off);
1879 }
1880 
1881 /*
1882  * Schedule IGMP timer based on various parameters; caller must ensure that
1883  * lock ordering is maintained as this routine acquires IGMP global lock.
1884  */
1885 void
igmp_set_timeout(struct igmp_tparams * itp)1886 igmp_set_timeout(struct igmp_tparams *itp)
1887 {
1888 	IGMP_LOCK_ASSERT_NOTHELD();
1889 	VERIFY(itp != NULL);
1890 
1891 	if (itp->qpt != 0 || itp->it != 0 || itp->cst != 0 || itp->sct != 0) {
1892 		IGMP_LOCK();
1893 		if (itp->qpt != 0) {
1894 			querier_present_timers_running = 1;
1895 		}
1896 		if (itp->it != 0) {
1897 			interface_timers_running = 1;
1898 		}
1899 		if (itp->cst != 0) {
1900 			current_state_timers_running = 1;
1901 		}
1902 		if (itp->sct != 0) {
1903 			state_change_timers_running = 1;
1904 		}
1905 		if (itp->fast) {
1906 			igmp_sched_fast_timeout();
1907 		} else {
1908 			igmp_sched_timeout();
1909 		}
1910 		IGMP_UNLOCK();
1911 	}
1912 }
1913 
1914 void
igmp_set_fast_timeout(struct igmp_tparams * itp)1915 igmp_set_fast_timeout(struct igmp_tparams *itp)
1916 {
1917 	VERIFY(itp != NULL);
1918 	itp->fast = true;
1919 	igmp_set_timeout(itp);
1920 }
1921 
1922 /*
1923  * IGMP timer handler (per 1 second).
1924  */
1925 static void
igmp_timeout(thread_call_param_t arg0,thread_call_param_t arg1 __unused)1926 igmp_timeout(thread_call_param_t arg0, thread_call_param_t arg1 __unused)
1927 {
1928 	struct ifqueue           scq;   /* State-change packets */
1929 	struct ifqueue           qrq;   /* Query response packets */
1930 	struct ifnet            *ifp;
1931 	struct igmp_ifinfo      *igi;
1932 	struct in_multi         *inm;
1933 	unsigned int             loop = 0, uri_sec = 0;
1934 	SLIST_HEAD(, in_multi)  inm_dthead;
1935 	bool                     fast = arg0 != NULL;
1936 
1937 	SLIST_INIT(&inm_dthead);
1938 
1939 	/*
1940 	 * Update coarse-grained networking timestamp (in sec.); the idea
1941 	 * is to piggy-back on the timeout callout to update the counter
1942 	 * returnable via net_uptime().
1943 	 */
1944 	net_update_uptime();
1945 
1946 	IGMP_LOCK();
1947 
1948 	IGMP_PRINTF(("%s: qpt %d, it %d, cst %d, sct %d, fast %d\n", __func__,
1949 	    querier_present_timers_running, interface_timers_running,
1950 	    current_state_timers_running, state_change_timers_running,
1951 	    fast));
1952 
1953 	if (fast) {
1954 		/*
1955 		 * When running the fast timer, skip processing
1956 		 * of "querier present" timers since they are
1957 		 * based on 1-second intervals.
1958 		 */
1959 		goto skip_query_timers;
1960 	}
1961 	/*
1962 	 * IGMPv1/v2 querier present timer processing.
1963 	 */
1964 	if (querier_present_timers_running) {
1965 		querier_present_timers_running = 0;
1966 		LIST_FOREACH(igi, &igi_head, igi_link) {
1967 			IGI_LOCK(igi);
1968 			igmp_v1v2_process_querier_timers(igi);
1969 			if (igi->igi_v1_timer > 0 || igi->igi_v2_timer > 0) {
1970 				querier_present_timers_running = 1;
1971 			}
1972 			IGI_UNLOCK(igi);
1973 		}
1974 	}
1975 
1976 	/*
1977 	 * IGMPv3 General Query response timer processing.
1978 	 */
1979 	if (interface_timers_running) {
1980 		IGMP_PRINTF(("%s: interface timers running\n", __func__));
1981 		interface_timers_running = 0;
1982 		LIST_FOREACH(igi, &igi_head, igi_link) {
1983 			IGI_LOCK(igi);
1984 			if (igi->igi_version != IGMP_VERSION_3) {
1985 				IGI_UNLOCK(igi);
1986 				continue;
1987 			}
1988 			if (igi->igi_v3_timer == 0) {
1989 				/* Do nothing. */
1990 			} else if (--igi->igi_v3_timer == 0) {
1991 				if (igmp_v3_dispatch_general_query(igi) > 0) {
1992 					interface_timers_running = 1;
1993 				}
1994 			} else {
1995 				interface_timers_running = 1;
1996 			}
1997 			IGI_UNLOCK(igi);
1998 		}
1999 	}
2000 
2001 skip_query_timers:
2002 	if (!current_state_timers_running &&
2003 	    !state_change_timers_running) {
2004 		goto out_locked;
2005 	}
2006 
2007 	current_state_timers_running = 0;
2008 	state_change_timers_running = 0;
2009 
2010 	memset(&qrq, 0, sizeof(struct ifqueue));
2011 	qrq.ifq_maxlen = IGMP_MAX_G_GS_PACKETS;
2012 
2013 	memset(&scq, 0, sizeof(struct ifqueue));
2014 	scq.ifq_maxlen =  IGMP_MAX_STATE_CHANGE_PACKETS;
2015 
2016 	IGMP_PRINTF(("%s: state change timers running\n", __func__));
2017 
2018 	/*
2019 	 * IGMPv1/v2/v3 host report and state-change timer processing.
2020 	 * Note: Processing a v3 group timer may remove a node.
2021 	 */
2022 	LIST_FOREACH(igi, &igi_head, igi_link) {
2023 		struct in_multistep step;
2024 
2025 		IGI_LOCK(igi);
2026 		ifp = igi->igi_ifp;
2027 		loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
2028 		uri_sec = IGMP_RANDOM_DELAY(igi->igi_uri);
2029 		IGI_UNLOCK(igi);
2030 
2031 		in_multihead_lock_shared();
2032 		IN_FIRST_MULTI(step, inm);
2033 		while (inm != NULL) {
2034 			INM_LOCK(inm);
2035 			if (inm->inm_ifp != ifp) {
2036 				goto next;
2037 			}
2038 
2039 			IGI_LOCK(igi);
2040 			switch (igi->igi_version) {
2041 			case IGMP_VERSION_1:
2042 			case IGMP_VERSION_2:
2043 				igmp_v1v2_process_group_timer(inm,
2044 				    igi->igi_version);
2045 				break;
2046 			case IGMP_VERSION_3:
2047 				igmp_v3_process_group_timers(igi, &qrq,
2048 				    &scq, inm, uri_sec);
2049 				break;
2050 			}
2051 			IGI_UNLOCK(igi);
2052 next:
2053 			INM_UNLOCK(inm);
2054 			IN_NEXT_MULTI(step, inm);
2055 		}
2056 		in_multihead_lock_done();
2057 
2058 		IGI_LOCK(igi);
2059 		if (igi->igi_version == IGMP_VERSION_1 ||
2060 		    igi->igi_version == IGMP_VERSION_2) {
2061 			igmp_dispatch_queue(igi, &igi->igi_v2q, 0, loop);
2062 		} else if (igi->igi_version == IGMP_VERSION_3) {
2063 			IGI_UNLOCK(igi);
2064 			igmp_dispatch_queue(NULL, &qrq, 0, loop);
2065 			igmp_dispatch_queue(NULL, &scq, 0, loop);
2066 			VERIFY(qrq.ifq_len == 0);
2067 			VERIFY(scq.ifq_len == 0);
2068 			IGI_LOCK(igi);
2069 		}
2070 		/*
2071 		 * In case there are still any pending membership reports
2072 		 * which didn't get drained at version change time.
2073 		 */
2074 		IF_DRAIN(&igi->igi_v2q);
2075 		/*
2076 		 * Release all deferred inm records, and drain any locally
2077 		 * enqueued packets; do it even if the current IGMP version
2078 		 * for the link is no longer IGMPv3, in order to handle the
2079 		 * version change case.
2080 		 */
2081 		igmp_flush_relq(igi, (struct igmp_inm_relhead *)&inm_dthead);
2082 		VERIFY(SLIST_EMPTY(&igi->igi_relinmhead));
2083 		IGI_UNLOCK(igi);
2084 
2085 		IF_DRAIN(&qrq);
2086 		IF_DRAIN(&scq);
2087 	}
2088 
2089 out_locked:
2090 	/* re-arm the timer if there's work to do */
2091 	if (fast) {
2092 		igmp_fast_timeout_run = false;
2093 	} else {
2094 		igmp_timeout_run = false;
2095 	}
2096 	igmp_sched_timeout();
2097 	IGMP_UNLOCK();
2098 
2099 	/* Now that we're dropped all locks, release detached records */
2100 	IGMP_REMOVE_DETACHED_INM(&inm_dthead);
2101 }
2102 
2103 static void
igmp_sched_timeout(void)2104 igmp_sched_timeout(void)
2105 {
2106 	static thread_call_t igmp_timeout_tcall;
2107 	uint64_t deadline = 0, leeway = 0;
2108 
2109 	IGMP_LOCK_ASSERT_HELD();
2110 	if (igmp_timeout_tcall == NULL) {
2111 		igmp_timeout_tcall =
2112 		    thread_call_allocate_with_options(igmp_timeout,
2113 		    NULL,
2114 		    THREAD_CALL_PRIORITY_KERNEL,
2115 		    THREAD_CALL_OPTIONS_ONCE);
2116 	}
2117 	if (!igmp_timeout_run &&
2118 	    (querier_present_timers_running || current_state_timers_running ||
2119 	    interface_timers_running || state_change_timers_running)) {
2120 		igmp_timeout_run = true;
2121 		clock_interval_to_deadline(igmp_timeout_delay, NSEC_PER_MSEC,
2122 		    &deadline);
2123 		clock_interval_to_absolutetime_interval(igmp_timeout_leeway,
2124 		    NSEC_PER_MSEC, &leeway);
2125 		thread_call_enter_delayed_with_leeway(igmp_timeout_tcall, NULL,
2126 		    deadline, leeway,
2127 		    THREAD_CALL_DELAY_LEEWAY);
2128 	}
2129 }
2130 
2131 static void
igmp_sched_fast_timeout(void)2132 igmp_sched_fast_timeout(void)
2133 {
2134 	static thread_call_t igmp_fast_timeout_tcall;
2135 
2136 	IGMP_LOCK_ASSERT_HELD();
2137 	if (igmp_fast_timeout_tcall == NULL) {
2138 		igmp_fast_timeout_tcall =
2139 		    thread_call_allocate_with_options(igmp_timeout,
2140 		    igmp_sched_fast_timeout,
2141 		    THREAD_CALL_PRIORITY_KERNEL,
2142 		    THREAD_CALL_OPTIONS_ONCE);
2143 	}
2144 	if (!igmp_fast_timeout_run &&
2145 	    (current_state_timers_running || state_change_timers_running)) {
2146 		igmp_fast_timeout_run = true;
2147 		thread_call_enter(igmp_fast_timeout_tcall);
2148 	}
2149 }
2150 
2151 /*
2152  * Free the in_multi reference(s) for this IGMP lifecycle.
2153  *
2154  * Caller must be holding igi_lock.
2155  */
2156 static void
igmp_flush_relq(struct igmp_ifinfo * igi,struct igmp_inm_relhead * inm_dthead)2157 igmp_flush_relq(struct igmp_ifinfo *igi, struct igmp_inm_relhead *inm_dthead)
2158 {
2159 	struct in_multi *inm;
2160 
2161 again:
2162 	IGI_LOCK_ASSERT_HELD(igi);
2163 	inm = SLIST_FIRST(&igi->igi_relinmhead);
2164 	if (inm != NULL) {
2165 		int lastref;
2166 
2167 		SLIST_REMOVE_HEAD(&igi->igi_relinmhead, inm_nrele);
2168 		IGI_UNLOCK(igi);
2169 
2170 		in_multihead_lock_exclusive();
2171 		INM_LOCK(inm);
2172 		VERIFY(inm->inm_nrelecnt != 0);
2173 		inm->inm_nrelecnt--;
2174 		lastref = in_multi_detach(inm);
2175 		VERIFY(!lastref || (!(inm->inm_debug & IFD_ATTACHED) &&
2176 		    inm->inm_reqcnt == 0));
2177 		INM_UNLOCK(inm);
2178 		in_multihead_lock_done();
2179 		/* from igi_relinmhead */
2180 		INM_REMREF(inm);
2181 		/* from in_multihead list */
2182 		if (lastref) {
2183 			/*
2184 			 * Defer releasing our final reference, as we
2185 			 * are holding the IGMP lock at this point, and
2186 			 * we could end up with locking issues later on
2187 			 * (while issuing SIOCDELMULTI) when this is the
2188 			 * final reference count.  Let the caller do it
2189 			 * when it is safe.
2190 			 */
2191 			IGMP_ADD_DETACHED_INM(inm_dthead, inm);
2192 		}
2193 		IGI_LOCK(igi);
2194 		goto again;
2195 	}
2196 }
2197 
2198 /*
2199  * Update host report group timer for IGMPv1/v2.
2200  * Will update the global pending timer flags.
2201  */
2202 static void
igmp_v1v2_process_group_timer(struct in_multi * inm,const int igmp_version)2203 igmp_v1v2_process_group_timer(struct in_multi *inm, const int igmp_version)
2204 {
2205 	int report_timer_expired;
2206 
2207 	IGMP_LOCK_ASSERT_HELD();
2208 	INM_LOCK_ASSERT_HELD(inm);
2209 	IGI_LOCK_ASSERT_HELD(inm->inm_igi);
2210 
2211 	if (inm->inm_timer == 0) {
2212 		report_timer_expired = 0;
2213 	} else if (--inm->inm_timer == 0) {
2214 		report_timer_expired = 1;
2215 	} else {
2216 		current_state_timers_running = 1;
2217 		/* caller will schedule timer */
2218 		return;
2219 	}
2220 
2221 	switch (inm->inm_state) {
2222 	case IGMP_NOT_MEMBER:
2223 	case IGMP_SILENT_MEMBER:
2224 	case IGMP_IDLE_MEMBER:
2225 	case IGMP_LAZY_MEMBER:
2226 	case IGMP_SLEEPING_MEMBER:
2227 	case IGMP_AWAKENING_MEMBER:
2228 		break;
2229 	case IGMP_REPORTING_MEMBER:
2230 		if (report_timer_expired) {
2231 			inm->inm_state = IGMP_IDLE_MEMBER;
2232 			(void) igmp_v1v2_queue_report(inm,
2233 			    (igmp_version == IGMP_VERSION_2) ?
2234 			    IGMP_v2_HOST_MEMBERSHIP_REPORT :
2235 			    IGMP_v1_HOST_MEMBERSHIP_REPORT);
2236 			INM_LOCK_ASSERT_HELD(inm);
2237 			IGI_LOCK_ASSERT_HELD(inm->inm_igi);
2238 		}
2239 		break;
2240 	case IGMP_G_QUERY_PENDING_MEMBER:
2241 	case IGMP_SG_QUERY_PENDING_MEMBER:
2242 	case IGMP_LEAVING_MEMBER:
2243 		break;
2244 	}
2245 }
2246 
2247 /*
2248  * Update a group's timers for IGMPv3.
2249  * Will update the global pending timer flags.
2250  * Note: Unlocked read from igi.
2251  */
2252 static void
igmp_v3_process_group_timers(struct igmp_ifinfo * igi,struct ifqueue * qrq,struct ifqueue * scq,struct in_multi * inm,const unsigned int uri_sec)2253 igmp_v3_process_group_timers(struct igmp_ifinfo *igi,
2254     struct ifqueue *qrq, struct ifqueue *scq,
2255     struct in_multi *inm, const unsigned int uri_sec)
2256 {
2257 	int query_response_timer_expired;
2258 	int state_change_retransmit_timer_expired;
2259 
2260 	IGMP_LOCK_ASSERT_HELD();
2261 	INM_LOCK_ASSERT_HELD(inm);
2262 	IGI_LOCK_ASSERT_HELD(igi);
2263 	VERIFY(igi == inm->inm_igi);
2264 
2265 	query_response_timer_expired = 0;
2266 	state_change_retransmit_timer_expired = 0;
2267 
2268 	/*
2269 	 * During a transition from v1/v2 compatibility mode back to v3,
2270 	 * a group record in REPORTING state may still have its group
2271 	 * timer active. This is a no-op in this function; it is easier
2272 	 * to deal with it here than to complicate the timeout path.
2273 	 */
2274 	if (inm->inm_timer == 0) {
2275 		query_response_timer_expired = 0;
2276 	} else if (--inm->inm_timer == 0) {
2277 		query_response_timer_expired = 1;
2278 	} else {
2279 		current_state_timers_running = 1;
2280 		/* caller will schedule timer */
2281 	}
2282 
2283 	if (inm->inm_sctimer == 0) {
2284 		state_change_retransmit_timer_expired = 0;
2285 	} else if (--inm->inm_sctimer == 0) {
2286 		state_change_retransmit_timer_expired = 1;
2287 	} else {
2288 		state_change_timers_running = 1;
2289 		/* caller will schedule timer */
2290 	}
2291 
2292 	/* We are in timer callback, so be quick about it. */
2293 	if (!state_change_retransmit_timer_expired &&
2294 	    !query_response_timer_expired) {
2295 		return;
2296 	}
2297 
2298 	switch (inm->inm_state) {
2299 	case IGMP_NOT_MEMBER:
2300 	case IGMP_SILENT_MEMBER:
2301 	case IGMP_SLEEPING_MEMBER:
2302 	case IGMP_LAZY_MEMBER:
2303 	case IGMP_AWAKENING_MEMBER:
2304 	case IGMP_IDLE_MEMBER:
2305 		break;
2306 	case IGMP_G_QUERY_PENDING_MEMBER:
2307 	case IGMP_SG_QUERY_PENDING_MEMBER:
2308 		/*
2309 		 * Respond to a previously pending Group-Specific
2310 		 * or Group-and-Source-Specific query by enqueueing
2311 		 * the appropriate Current-State report for
2312 		 * immediate transmission.
2313 		 */
2314 		if (query_response_timer_expired) {
2315 			int retval;
2316 
2317 			retval = igmp_v3_enqueue_group_record(qrq, inm, 0, 1,
2318 			    (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER));
2319 			IGMP_PRINTF(("%s: enqueue record = %d\n",
2320 			    __func__, retval));
2321 			inm->inm_state = IGMP_REPORTING_MEMBER;
2322 			/* XXX Clear recorded sources for next time. */
2323 			inm_clear_recorded(inm);
2324 		}
2325 		OS_FALLTHROUGH;
2326 	case IGMP_REPORTING_MEMBER:
2327 	case IGMP_LEAVING_MEMBER:
2328 		if (state_change_retransmit_timer_expired) {
2329 			/*
2330 			 * State-change retransmission timer fired.
2331 			 * If there are any further pending retransmissions,
2332 			 * set the global pending state-change flag, and
2333 			 * reset the timer.
2334 			 */
2335 			if (--inm->inm_scrv > 0) {
2336 				inm->inm_sctimer = (uint16_t)uri_sec;
2337 				state_change_timers_running = 1;
2338 				/* caller will schedule timer */
2339 			}
2340 			/*
2341 			 * Retransmit the previously computed state-change
2342 			 * report. If there are no further pending
2343 			 * retransmissions, the mbuf queue will be consumed.
2344 			 * Update T0 state to T1 as we have now sent
2345 			 * a state-change.
2346 			 */
2347 			(void) igmp_v3_merge_state_changes(inm, scq);
2348 
2349 			inm_commit(inm);
2350 			IGMP_INET_PRINTF(inm->inm_addr,
2351 			    ("%s: T1 -> T0 for %s/%s\n", __func__,
2352 			    _igmp_inet_buf, if_name(inm->inm_ifp)));
2353 
2354 			/*
2355 			 * If we are leaving the group for good, make sure
2356 			 * we release IGMP's reference to it.
2357 			 * This release must be deferred using a SLIST,
2358 			 * as we are called from a loop which traverses
2359 			 * the in_multihead list.
2360 			 */
2361 			if (inm->inm_state == IGMP_LEAVING_MEMBER &&
2362 			    inm->inm_scrv == 0) {
2363 				inm->inm_state = IGMP_NOT_MEMBER;
2364 				/*
2365 				 * A reference has already been held in
2366 				 * igmp_final_leave() for this inm, so
2367 				 * no need to hold another one.  We also
2368 				 * bumped up its request count then, so
2369 				 * that it stays in in_multihead.  Both
2370 				 * of them will be released when it is
2371 				 * dequeued later on.
2372 				 */
2373 				VERIFY(inm->inm_nrelecnt != 0);
2374 				SLIST_INSERT_HEAD(&igi->igi_relinmhead,
2375 				    inm, inm_nrele);
2376 			}
2377 		}
2378 		break;
2379 	}
2380 }
2381 
2382 /*
2383  * Suppress a group's pending response to a group or source/group query.
2384  *
2385  * Do NOT suppress state changes. This leads to IGMPv3 inconsistency.
2386  * Do NOT update ST1/ST0 as this operation merely suppresses
2387  * the currently pending group record.
2388  * Do NOT suppress the response to a general query. It is possible but
2389  * it would require adding another state or flag.
2390  */
2391 static void
igmp_v3_suppress_group_record(struct in_multi * inm)2392 igmp_v3_suppress_group_record(struct in_multi *inm)
2393 {
2394 	INM_LOCK_ASSERT_HELD(inm);
2395 	IGI_LOCK_ASSERT_HELD(inm->inm_igi);
2396 
2397 	VERIFY(inm->inm_igi->igi_version == IGMP_VERSION_3);
2398 
2399 	if (inm->inm_state != IGMP_G_QUERY_PENDING_MEMBER &&
2400 	    inm->inm_state != IGMP_SG_QUERY_PENDING_MEMBER) {
2401 		return;
2402 	}
2403 
2404 	if (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) {
2405 		inm_clear_recorded(inm);
2406 	}
2407 
2408 	inm->inm_timer = 0;
2409 	inm->inm_state = IGMP_REPORTING_MEMBER;
2410 }
2411 
2412 /*
2413  * Switch to a different IGMP version on the given interface,
2414  * as per Section 7.2.1.
2415  */
2416 static uint32_t
igmp_set_version(struct igmp_ifinfo * igi,const int igmp_version)2417 igmp_set_version(struct igmp_ifinfo *igi, const int igmp_version)
2418 {
2419 	int old_version_timer;
2420 
2421 	IGI_LOCK_ASSERT_HELD(igi);
2422 
2423 	IGMP_PRINTF(("%s: switching to v%d on ifp 0x%llx(%s)\n", __func__,
2424 	    igmp_version, (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2425 	    if_name(igi->igi_ifp)));
2426 
2427 	if (igmp_version == IGMP_VERSION_1 || igmp_version == IGMP_VERSION_2) {
2428 		/*
2429 		 * Compute the "Older Version Querier Present" timer as per
2430 		 * Section 8.12, in seconds.
2431 		 */
2432 		old_version_timer = igi->igi_rv * igi->igi_qi + igi->igi_qri;
2433 
2434 		if (igmp_version == IGMP_VERSION_1) {
2435 			igi->igi_v1_timer = old_version_timer;
2436 			igi->igi_v2_timer = 0;
2437 		} else if (igmp_version == IGMP_VERSION_2) {
2438 			igi->igi_v1_timer = 0;
2439 			igi->igi_v2_timer = old_version_timer;
2440 		}
2441 	}
2442 
2443 	if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) {
2444 		if (igi->igi_version != IGMP_VERSION_2) {
2445 			igmp_v3_cancel_link_timers(igi);
2446 			igi->igi_version = IGMP_VERSION_2;
2447 		}
2448 	} else if (igi->igi_v1_timer > 0) {
2449 		if (igi->igi_version != IGMP_VERSION_1) {
2450 			igmp_v3_cancel_link_timers(igi);
2451 			igi->igi_version = IGMP_VERSION_1;
2452 		}
2453 	}
2454 
2455 	IGI_LOCK_ASSERT_HELD(igi);
2456 
2457 	return MAX(igi->igi_v1_timer, igi->igi_v2_timer);
2458 }
2459 
2460 /*
2461  * Cancel pending IGMPv3 timers for the given link and all groups
2462  * joined on it; state-change, general-query, and group-query timers.
2463  *
2464  * Only ever called on a transition from v3 to Compatibility mode. Kill
2465  * the timers stone dead (this may be expensive for large N groups), they
2466  * will be restarted if Compatibility Mode deems that they must be due to
2467  * query processing.
2468  */
2469 static void
igmp_v3_cancel_link_timers(struct igmp_ifinfo * igi)2470 igmp_v3_cancel_link_timers(struct igmp_ifinfo *igi)
2471 {
2472 	struct ifnet            *ifp;
2473 	struct in_multi         *inm;
2474 	struct in_multistep     step;
2475 
2476 	IGI_LOCK_ASSERT_HELD(igi);
2477 
2478 	IGMP_PRINTF(("%s: cancel v3 timers on ifp 0x%llx(%s)\n", __func__,
2479 	    (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp), if_name(igi->igi_ifp)));
2480 
2481 	/*
2482 	 * Stop the v3 General Query Response on this link stone dead.
2483 	 * If timer is woken up due to interface_timers_running,
2484 	 * the flag will be cleared if there are no pending link timers.
2485 	 */
2486 	igi->igi_v3_timer = 0;
2487 
2488 	/*
2489 	 * Now clear the current-state and state-change report timers
2490 	 * for all memberships scoped to this link.
2491 	 */
2492 	ifp = igi->igi_ifp;
2493 	IGI_UNLOCK(igi);
2494 
2495 	in_multihead_lock_shared();
2496 	IN_FIRST_MULTI(step, inm);
2497 	while (inm != NULL) {
2498 		INM_LOCK(inm);
2499 		if (inm->inm_ifp != ifp && inm->inm_igi != igi) {
2500 			goto next;
2501 		}
2502 
2503 		switch (inm->inm_state) {
2504 		case IGMP_NOT_MEMBER:
2505 		case IGMP_SILENT_MEMBER:
2506 		case IGMP_IDLE_MEMBER:
2507 		case IGMP_LAZY_MEMBER:
2508 		case IGMP_SLEEPING_MEMBER:
2509 		case IGMP_AWAKENING_MEMBER:
2510 			/*
2511 			 * These states are either not relevant in v3 mode,
2512 			 * or are unreported. Do nothing.
2513 			 */
2514 			break;
2515 		case IGMP_LEAVING_MEMBER:
2516 			/*
2517 			 * If we are leaving the group and switching to
2518 			 * compatibility mode, we need to release the final
2519 			 * reference held for issuing the INCLUDE {}, and
2520 			 * transition to REPORTING to ensure the host leave
2521 			 * message is sent upstream to the old querier --
2522 			 * transition to NOT would lose the leave and race.
2523 			 * During igmp_final_leave(), we bumped up both the
2524 			 * request and reference counts.  Since we cannot
2525 			 * call in_multi_detach() here, defer this task to
2526 			 * the timer routine.
2527 			 */
2528 			VERIFY(inm->inm_nrelecnt != 0);
2529 			IGI_LOCK(igi);
2530 			SLIST_INSERT_HEAD(&igi->igi_relinmhead, inm, inm_nrele);
2531 			IGI_UNLOCK(igi);
2532 			OS_FALLTHROUGH;
2533 		case IGMP_G_QUERY_PENDING_MEMBER:
2534 		case IGMP_SG_QUERY_PENDING_MEMBER:
2535 			inm_clear_recorded(inm);
2536 			OS_FALLTHROUGH;
2537 		case IGMP_REPORTING_MEMBER:
2538 			inm->inm_state = IGMP_REPORTING_MEMBER;
2539 			break;
2540 		}
2541 		/*
2542 		 * Always clear state-change and group report timers.
2543 		 * Free any pending IGMPv3 state-change records.
2544 		 */
2545 		inm->inm_sctimer = 0;
2546 		inm->inm_timer = 0;
2547 		IF_DRAIN(&inm->inm_scq);
2548 next:
2549 		INM_UNLOCK(inm);
2550 		IN_NEXT_MULTI(step, inm);
2551 	}
2552 	in_multihead_lock_done();
2553 
2554 	IGI_LOCK(igi);
2555 }
2556 
2557 /*
2558  * Update the Older Version Querier Present timers for a link.
2559  * See Section 7.2.1 of RFC 3376.
2560  */
2561 static void
igmp_v1v2_process_querier_timers(struct igmp_ifinfo * igi)2562 igmp_v1v2_process_querier_timers(struct igmp_ifinfo *igi)
2563 {
2564 	IGI_LOCK_ASSERT_HELD(igi);
2565 
2566 	if (igi->igi_v1_timer == 0 && igi->igi_v2_timer == 0) {
2567 		/*
2568 		 * IGMPv1 and IGMPv2 Querier Present timers expired.
2569 		 *
2570 		 * Revert to IGMPv3.
2571 		 */
2572 		if (igi->igi_version != IGMP_VERSION_3) {
2573 			IGMP_PRINTF(("%s: transition from v%d -> v%d "
2574 			    "on 0x%llx(%s)\n", __func__,
2575 			    igi->igi_version, IGMP_VERSION_3,
2576 			    (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2577 			    if_name(igi->igi_ifp)));
2578 			igi->igi_version = IGMP_VERSION_3;
2579 			IF_DRAIN(&igi->igi_v2q);
2580 		}
2581 	} else if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) {
2582 		/*
2583 		 * IGMPv1 Querier Present timer expired,
2584 		 * IGMPv2 Querier Present timer running.
2585 		 * If IGMPv2 was disabled since last timeout,
2586 		 * revert to IGMPv3.
2587 		 * If IGMPv2 is enabled, revert to IGMPv2.
2588 		 */
2589 		if (!igmp_v2enable) {
2590 			IGMP_PRINTF(("%s: transition from v%d -> v%d "
2591 			    "on 0x%llx(%s%d)\n", __func__,
2592 			    igi->igi_version, IGMP_VERSION_3,
2593 			    (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2594 			    igi->igi_ifp->if_name, igi->igi_ifp->if_unit));
2595 			igi->igi_v2_timer = 0;
2596 			igi->igi_version = IGMP_VERSION_3;
2597 			IF_DRAIN(&igi->igi_v2q);
2598 		} else {
2599 			--igi->igi_v2_timer;
2600 			if (igi->igi_version != IGMP_VERSION_2) {
2601 				IGMP_PRINTF(("%s: transition from v%d -> v%d "
2602 				    "on 0x%llx(%s)\n", __func__,
2603 				    igi->igi_version, IGMP_VERSION_2,
2604 				    (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2605 				    if_name(igi->igi_ifp)));
2606 				IF_DRAIN(&igi->igi_gq);
2607 				igmp_v3_cancel_link_timers(igi);
2608 				igi->igi_version = IGMP_VERSION_2;
2609 			}
2610 		}
2611 	} else if (igi->igi_v1_timer > 0) {
2612 		/*
2613 		 * IGMPv1 Querier Present timer running.
2614 		 * Stop IGMPv2 timer if running.
2615 		 *
2616 		 * If IGMPv1 was disabled since last timeout,
2617 		 * revert to IGMPv3.
2618 		 * If IGMPv1 is enabled, reset IGMPv2 timer if running.
2619 		 */
2620 		if (!igmp_v1enable) {
2621 			IGMP_PRINTF(("%s: transition from v%d -> v%d "
2622 			    "on 0x%llx(%s%d)\n", __func__,
2623 			    igi->igi_version, IGMP_VERSION_3,
2624 			    (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2625 			    igi->igi_ifp->if_name, igi->igi_ifp->if_unit));
2626 			igi->igi_v1_timer = 0;
2627 			igi->igi_version = IGMP_VERSION_3;
2628 			IF_DRAIN(&igi->igi_v2q);
2629 		} else {
2630 			--igi->igi_v1_timer;
2631 		}
2632 		if (igi->igi_v2_timer > 0) {
2633 			IGMP_PRINTF(("%s: cancel v2 timer on 0x%llx(%s%d)\n",
2634 			    __func__,
2635 			    (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2636 			    igi->igi_ifp->if_name, igi->igi_ifp->if_unit));
2637 			igi->igi_v2_timer = 0;
2638 		}
2639 	}
2640 }
2641 
2642 /*
2643  * Dispatch an IGMPv1/v2 host report or leave message.
2644  * These are always small enough to fit inside a single mbuf.
2645  */
2646 static int
igmp_v1v2_queue_report(struct in_multi * inm,const int type)2647 igmp_v1v2_queue_report(struct in_multi *inm, const int type)
2648 {
2649 	struct ifnet            *ifp;
2650 	struct igmp             *igmp;
2651 	struct ip               *ip;
2652 	struct mbuf             *m;
2653 	int                     error = 0;
2654 
2655 	INM_LOCK_ASSERT_HELD(inm);
2656 	IGI_LOCK_ASSERT_HELD(inm->inm_igi);
2657 
2658 	ifp = inm->inm_ifp;
2659 
2660 	MGETHDR(m, M_DONTWAIT, MT_DATA);
2661 	if (m == NULL) {
2662 		return ENOMEM;
2663 	}
2664 	MH_ALIGN(m, sizeof(struct ip) + sizeof(struct igmp));
2665 
2666 	m->m_pkthdr.len = sizeof(struct ip) + sizeof(struct igmp);
2667 
2668 	m->m_data += sizeof(struct ip);
2669 	m->m_len = sizeof(struct igmp);
2670 
2671 	igmp = mtod(m, struct igmp *);
2672 	igmp->igmp_type = (u_char)type;
2673 	igmp->igmp_code = 0;
2674 	igmp->igmp_group = inm->inm_addr;
2675 	igmp->igmp_cksum = 0;
2676 	igmp->igmp_cksum = in_cksum(m, sizeof(struct igmp));
2677 
2678 	m->m_data -= sizeof(struct ip);
2679 	m->m_len += sizeof(struct ip);
2680 
2681 	ip = mtod(m, struct ip *);
2682 	ip->ip_tos = 0;
2683 	ip->ip_len = sizeof(struct ip) + sizeof(struct igmp);
2684 	ip->ip_off = 0;
2685 	ip->ip_p = IPPROTO_IGMP;
2686 	ip->ip_src.s_addr = INADDR_ANY;
2687 
2688 	if (type == IGMP_HOST_LEAVE_MESSAGE) {
2689 		ip->ip_dst.s_addr = htonl(INADDR_ALLRTRS_GROUP);
2690 	} else {
2691 		ip->ip_dst = inm->inm_addr;
2692 	}
2693 
2694 	igmp_save_context(m, ifp);
2695 
2696 	m->m_flags |= M_IGMPV2;
2697 	if (inm->inm_igi->igi_flags & IGIF_LOOPBACK) {
2698 		m->m_flags |= M_IGMP_LOOP;
2699 	}
2700 
2701 	/*
2702 	 * Due to the fact that at this point we are possibly holding
2703 	 * in_multihead_lock in shared or exclusive mode, we can't call
2704 	 * igmp_sendpkt() here since that will eventually call ip_output(),
2705 	 * which will try to lock in_multihead_lock and cause a deadlock.
2706 	 * Instead we defer the work to the igmp_timeout() thread, thus
2707 	 * avoiding unlocking in_multihead_lock here.
2708 	 */
2709 	if (IF_QFULL(&inm->inm_igi->igi_v2q)) {
2710 		IGMP_PRINTF(("%s: v1/v2 outbound queue full\n", __func__));
2711 		error = ENOMEM;
2712 		m_freem(m);
2713 	} else {
2714 		IF_ENQUEUE(&inm->inm_igi->igi_v2q, m);
2715 		VERIFY(error == 0);
2716 	}
2717 	return error;
2718 }
2719 
2720 /*
2721  * Process a state change from the upper layer for the given IPv4 group.
2722  *
2723  * Each socket holds a reference on the in_multi in its own ip_moptions.
2724  * The socket layer will have made the necessary updates to the group
2725  * state, it is now up to IGMP to issue a state change report if there
2726  * has been any change between T0 (when the last state-change was issued)
2727  * and T1 (now).
2728  *
2729  * We use the IGMPv3 state machine at group level. The IGMP module
2730  * however makes the decision as to which IGMP protocol version to speak.
2731  * A state change *from* INCLUDE {} always means an initial join.
2732  * A state change *to* INCLUDE {} always means a final leave.
2733  *
2734  * FUTURE: If IGIF_V3LITE is enabled for this interface, then we can
2735  * save ourselves a bunch of work; any exclusive mode groups need not
2736  * compute source filter lists.
2737  */
2738 int
igmp_change_state(struct in_multi * inm,struct igmp_tparams * itp)2739 igmp_change_state(struct in_multi *inm, struct igmp_tparams *itp)
2740 {
2741 	struct igmp_ifinfo *igi;
2742 	struct ifnet *ifp;
2743 	int error = 0;
2744 
2745 	VERIFY(itp != NULL);
2746 	bzero(itp, sizeof(*itp));
2747 
2748 	INM_LOCK_ASSERT_HELD(inm);
2749 	VERIFY(inm->inm_igi != NULL);
2750 	IGI_LOCK_ASSERT_NOTHELD(inm->inm_igi);
2751 
2752 	/*
2753 	 * Try to detect if the upper layer just asked us to change state
2754 	 * for an interface which has now gone away.
2755 	 */
2756 	VERIFY(inm->inm_ifma != NULL);
2757 	ifp = inm->inm_ifma->ifma_ifp;
2758 	/*
2759 	 * Sanity check that netinet's notion of ifp is the same as net's.
2760 	 */
2761 	VERIFY(inm->inm_ifp == ifp);
2762 
2763 	igi = IGMP_IFINFO(ifp);
2764 	VERIFY(igi != NULL);
2765 
2766 	/*
2767 	 * If we detect a state transition to or from MCAST_UNDEFINED
2768 	 * for this group, then we are starting or finishing an IGMP
2769 	 * life cycle for this group.
2770 	 */
2771 	if (inm->inm_st[1].iss_fmode != inm->inm_st[0].iss_fmode) {
2772 		IGMP_PRINTF(("%s: inm transition %d -> %d\n", __func__,
2773 		    inm->inm_st[0].iss_fmode, inm->inm_st[1].iss_fmode));
2774 		if (inm->inm_st[0].iss_fmode == MCAST_UNDEFINED) {
2775 			IGMP_PRINTF(("%s: initial join\n", __func__));
2776 			error = igmp_initial_join(inm, igi, itp);
2777 			goto out;
2778 		} else if (inm->inm_st[1].iss_fmode == MCAST_UNDEFINED) {
2779 			IGMP_PRINTF(("%s: final leave\n", __func__));
2780 			igmp_final_leave(inm, igi, itp);
2781 			goto out;
2782 		}
2783 	} else {
2784 		IGMP_PRINTF(("%s: filter set change\n", __func__));
2785 	}
2786 
2787 	error = igmp_handle_state_change(inm, igi, itp);
2788 out:
2789 	return error;
2790 }
2791 
2792 /*
2793  * Perform the initial join for an IGMP group.
2794  *
2795  * When joining a group:
2796  *  If the group should have its IGMP traffic suppressed, do nothing.
2797  *  IGMPv1 starts sending IGMPv1 host membership reports.
2798  *  IGMPv2 starts sending IGMPv2 host membership reports.
2799  *  IGMPv3 will schedule an IGMPv3 state-change report containing the
2800  *  initial state of the membership.
2801  */
2802 static int
igmp_initial_join(struct in_multi * inm,struct igmp_ifinfo * igi,struct igmp_tparams * itp)2803 igmp_initial_join(struct in_multi *inm, struct igmp_ifinfo *igi,
2804     struct igmp_tparams *itp)
2805 {
2806 	struct ifnet            *ifp;
2807 	struct ifqueue          *ifq;
2808 	int                      error, retval, syncstates;
2809 
2810 	INM_LOCK_ASSERT_HELD(inm);
2811 	IGI_LOCK_ASSERT_NOTHELD(igi);
2812 	VERIFY(itp != NULL);
2813 
2814 	IGMP_INET_PRINTF(inm->inm_addr,
2815 	    ("%s: initial join %s on ifp 0x%llx(%s)\n", __func__,
2816 	    _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(inm->inm_ifp),
2817 	    if_name(inm->inm_ifp)));
2818 
2819 	error = 0;
2820 	syncstates = 1;
2821 
2822 	ifp = inm->inm_ifp;
2823 
2824 	IGI_LOCK(igi);
2825 	VERIFY(igi->igi_ifp == ifp);
2826 
2827 	/*
2828 	 * Groups joined on loopback or marked as 'not reported',
2829 	 * e.g. 224.0.0.1, enter the IGMP_SILENT_MEMBER state and
2830 	 * are never reported in any IGMP protocol exchanges.
2831 	 * All other groups enter the appropriate IGMP state machine
2832 	 * for the version in use on this link.
2833 	 * A link marked as IGIF_SILENT causes IGMP to be completely
2834 	 * disabled for the link.
2835 	 */
2836 	if ((ifp->if_flags & IFF_LOOPBACK) ||
2837 	    (igi->igi_flags & IGIF_SILENT) ||
2838 	    !igmp_isgroupreported(inm->inm_addr)) {
2839 		IGMP_PRINTF(("%s: not kicking state machine for silent group\n",
2840 		    __func__));
2841 		inm->inm_state = IGMP_SILENT_MEMBER;
2842 		inm->inm_timer = 0;
2843 	} else {
2844 		/*
2845 		 * Deal with overlapping in_multi lifecycle.
2846 		 * If this group was LEAVING, then make sure
2847 		 * we drop the reference we picked up to keep the
2848 		 * group around for the final INCLUDE {} enqueue.
2849 		 * Since we cannot call in_multi_detach() here,
2850 		 * defer this task to the timer routine.
2851 		 */
2852 		if (igi->igi_version == IGMP_VERSION_3 &&
2853 		    inm->inm_state == IGMP_LEAVING_MEMBER) {
2854 			VERIFY(inm->inm_nrelecnt != 0);
2855 			SLIST_INSERT_HEAD(&igi->igi_relinmhead, inm, inm_nrele);
2856 		}
2857 
2858 		inm->inm_state = IGMP_REPORTING_MEMBER;
2859 
2860 		switch (igi->igi_version) {
2861 		case IGMP_VERSION_1:
2862 		case IGMP_VERSION_2:
2863 			inm->inm_state = IGMP_IDLE_MEMBER;
2864 			error = igmp_v1v2_queue_report(inm,
2865 			    (igi->igi_version == IGMP_VERSION_2) ?
2866 			    IGMP_v2_HOST_MEMBERSHIP_REPORT :
2867 			    IGMP_v1_HOST_MEMBERSHIP_REPORT);
2868 
2869 			INM_LOCK_ASSERT_HELD(inm);
2870 			IGI_LOCK_ASSERT_HELD(igi);
2871 
2872 			if (error == 0) {
2873 				inm->inm_timer =
2874 				    IGMP_RANDOM_DELAY(IGMP_V1V2_MAX_RI);
2875 				itp->cst = 1;
2876 			}
2877 			break;
2878 
2879 		case IGMP_VERSION_3:
2880 			/*
2881 			 * Defer update of T0 to T1, until the first copy
2882 			 * of the state change has been transmitted.
2883 			 */
2884 			syncstates = 0;
2885 
2886 			/*
2887 			 * Immediately enqueue a State-Change Report for
2888 			 * this interface, freeing any previous reports.
2889 			 * Don't kick the timers if there is nothing to do,
2890 			 * or if an error occurred.
2891 			 */
2892 			ifq = &inm->inm_scq;
2893 			IF_DRAIN(ifq);
2894 			retval = igmp_v3_enqueue_group_record(ifq, inm, 1,
2895 			    0, 0);
2896 			itp->cst = (ifq->ifq_len > 0);
2897 			IGMP_PRINTF(("%s: enqueue record = %d\n",
2898 			    __func__, retval));
2899 			if (retval <= 0) {
2900 				error = retval * -1;
2901 				break;
2902 			}
2903 
2904 			/*
2905 			 * Schedule transmission of pending state-change
2906 			 * report up to RV times for this link. The timer
2907 			 * will fire at the next igmp_timeout (1 second),
2908 			 * giving us an opportunity to merge the reports.
2909 			 */
2910 			if (igi->igi_flags & IGIF_LOOPBACK) {
2911 				inm->inm_scrv = 1;
2912 			} else {
2913 				VERIFY(igi->igi_rv > 1);
2914 				inm->inm_scrv = (uint16_t)igi->igi_rv;
2915 			}
2916 			inm->inm_sctimer = 1;
2917 			itp->sct = 1;
2918 
2919 			error = 0;
2920 			break;
2921 		}
2922 	}
2923 	IGI_UNLOCK(igi);
2924 
2925 	/*
2926 	 * Only update the T0 state if state change is atomic,
2927 	 * i.e. we don't need to wait for a timer to fire before we
2928 	 * can consider the state change to have been communicated.
2929 	 */
2930 	if (syncstates) {
2931 		inm_commit(inm);
2932 		IGMP_INET_PRINTF(inm->inm_addr,
2933 		    ("%s: T1 -> T0 for %s/%s\n", __func__,
2934 		    _igmp_inet_buf, if_name(inm->inm_ifp)));
2935 	}
2936 
2937 	return error;
2938 }
2939 
2940 /*
2941  * Issue an intermediate state change during the IGMP life-cycle.
2942  */
2943 static int
igmp_handle_state_change(struct in_multi * inm,struct igmp_ifinfo * igi,struct igmp_tparams * itp)2944 igmp_handle_state_change(struct in_multi *inm, struct igmp_ifinfo *igi,
2945     struct igmp_tparams *itp)
2946 {
2947 	struct ifnet            *ifp;
2948 	int                      retval = 0;
2949 
2950 	INM_LOCK_ASSERT_HELD(inm);
2951 	IGI_LOCK_ASSERT_NOTHELD(igi);
2952 	VERIFY(itp != NULL);
2953 
2954 	IGMP_INET_PRINTF(inm->inm_addr,
2955 	    ("%s: state change for %s on ifp 0x%llx(%s)\n", __func__,
2956 	    _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(inm->inm_ifp),
2957 	    if_name(inm->inm_ifp)));
2958 
2959 	ifp = inm->inm_ifp;
2960 
2961 	IGI_LOCK(igi);
2962 	VERIFY(igi->igi_ifp == ifp);
2963 
2964 	if ((ifp->if_flags & IFF_LOOPBACK) ||
2965 	    (igi->igi_flags & IGIF_SILENT) ||
2966 	    !igmp_isgroupreported(inm->inm_addr) ||
2967 	    (igi->igi_version != IGMP_VERSION_3)) {
2968 		IGI_UNLOCK(igi);
2969 		if (!igmp_isgroupreported(inm->inm_addr)) {
2970 			IGMP_PRINTF(("%s: not kicking state "
2971 			    "machine for silent group\n", __func__));
2972 		}
2973 		IGMP_PRINTF(("%s: nothing to do\n", __func__));
2974 		inm_commit(inm);
2975 		IGMP_INET_PRINTF(inm->inm_addr,
2976 		    ("%s: T1 -> T0 for %s/%s\n", __func__,
2977 		    _igmp_inet_buf, inm->inm_ifp->if_name));
2978 		goto done;
2979 	}
2980 
2981 	IF_DRAIN(&inm->inm_scq);
2982 
2983 	retval = igmp_v3_enqueue_group_record(&inm->inm_scq, inm, 1, 0, 0);
2984 	itp->cst = (inm->inm_scq.ifq_len > 0);
2985 	IGMP_PRINTF(("%s: enqueue record = %d\n", __func__, retval));
2986 	if (retval <= 0) {
2987 		IGI_UNLOCK(igi);
2988 		retval *= -1;
2989 		goto done;
2990 	}
2991 	/*
2992 	 * If record(s) were enqueued, start the state-change
2993 	 * report timer for this group.
2994 	 */
2995 	inm->inm_scrv = ((igi->igi_flags & IGIF_LOOPBACK) ? 1 : (uint16_t)igi->igi_rv);
2996 	inm->inm_sctimer = 1;
2997 	itp->sct = 1;
2998 	IGI_UNLOCK(igi);
2999 done:
3000 	return retval;
3001 }
3002 
3003 /*
3004  * Perform the final leave for an IGMP group.
3005  *
3006  * When leaving a group:
3007  *  IGMPv1 does nothing.
3008  *  IGMPv2 sends a host leave message, if and only if we are the reporter.
3009  *  IGMPv3 enqueues a state-change report containing a transition
3010  *  to INCLUDE {} for immediate transmission.
3011  */
3012 static void
igmp_final_leave(struct in_multi * inm,struct igmp_ifinfo * igi,struct igmp_tparams * itp)3013 igmp_final_leave(struct in_multi *inm, struct igmp_ifinfo *igi,
3014     struct igmp_tparams *itp)
3015 {
3016 	int syncstates = 1;
3017 	bool retried_already = false;
3018 
3019 	INM_LOCK_ASSERT_HELD(inm);
3020 	IGI_LOCK_ASSERT_NOTHELD(igi);
3021 	VERIFY(itp != NULL);
3022 
3023 	IGMP_INET_PRINTF(inm->inm_addr,
3024 	    ("%s: final leave %s on ifp 0x%llx(%s)\n", __func__,
3025 	    _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(inm->inm_ifp),
3026 	    if_name(inm->inm_ifp)));
3027 
3028 retry:
3029 	switch (inm->inm_state) {
3030 	case IGMP_NOT_MEMBER:
3031 	case IGMP_SILENT_MEMBER:
3032 	case IGMP_LEAVING_MEMBER:
3033 		/* Already leaving or left; do nothing. */
3034 		IGMP_PRINTF(("%s: not kicking state machine for silent group\n",
3035 		    __func__));
3036 		break;
3037 	case IGMP_REPORTING_MEMBER:
3038 	case IGMP_IDLE_MEMBER:
3039 	case IGMP_G_QUERY_PENDING_MEMBER:
3040 	case IGMP_SG_QUERY_PENDING_MEMBER:
3041 		IGI_LOCK(igi);
3042 		if (igi->igi_version == IGMP_VERSION_2) {
3043 			if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER ||
3044 			    inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) {
3045 				/*
3046 				 * We may be in the process of downgrading to
3047 				 * IGMPv2 but because we just grabbed the
3048 				 * igi_lock we may have lost the race.
3049 				 */
3050 				if (!retried_already) {
3051 					IGI_UNLOCK(igi);
3052 					retried_already = true;
3053 					goto retry;
3054 				} else {
3055 					/*
3056 					 * Proceed with leaving the group
3057 					 * as if it were IGMPv2 even though we
3058 					 * may have an inconsistent multicast state.
3059 					 */
3060 				}
3061 			}
3062 			/* scheduler timer if enqueue is successful */
3063 			itp->cst = (igmp_v1v2_queue_report(inm,
3064 			    IGMP_HOST_LEAVE_MESSAGE) == 0);
3065 
3066 			INM_LOCK_ASSERT_HELD(inm);
3067 			IGI_LOCK_ASSERT_HELD(igi);
3068 
3069 			inm->inm_state = IGMP_NOT_MEMBER;
3070 		} else if (igi->igi_version == IGMP_VERSION_3) {
3071 			/*
3072 			 * Stop group timer and all pending reports.
3073 			 * Immediately enqueue a state-change report
3074 			 * TO_IN {} to be sent on the next timeout,
3075 			 * giving us an opportunity to merge reports.
3076 			 */
3077 			IF_DRAIN(&inm->inm_scq);
3078 			inm->inm_timer = 0;
3079 			if (igi->igi_flags & IGIF_LOOPBACK) {
3080 				inm->inm_scrv = 1;
3081 			} else {
3082 				inm->inm_scrv = (uint16_t)igi->igi_rv;
3083 			}
3084 			IGMP_INET_PRINTF(inm->inm_addr,
3085 			    ("%s: Leaving %s/%s with %d "
3086 			    "pending retransmissions.\n", __func__,
3087 			    _igmp_inet_buf, if_name(inm->inm_ifp),
3088 			    inm->inm_scrv));
3089 			if (inm->inm_scrv == 0) {
3090 				inm->inm_state = IGMP_NOT_MEMBER;
3091 				inm->inm_sctimer = 0;
3092 			} else {
3093 				int retval;
3094 				/*
3095 				 * Stick around in the in_multihead list;
3096 				 * the final detach will be issued by
3097 				 * igmp_v3_process_group_timers() when
3098 				 * the retransmit timer expires.
3099 				 */
3100 				INM_ADDREF_LOCKED(inm);
3101 				VERIFY(inm->inm_debug & IFD_ATTACHED);
3102 				inm->inm_reqcnt++;
3103 				VERIFY(inm->inm_reqcnt >= 1);
3104 				inm->inm_nrelecnt++;
3105 				VERIFY(inm->inm_nrelecnt != 0);
3106 
3107 				retval = igmp_v3_enqueue_group_record(
3108 					&inm->inm_scq, inm, 1, 0, 0);
3109 				itp->cst = (inm->inm_scq.ifq_len > 0);
3110 				KASSERT(retval != 0,
3111 				    ("%s: enqueue record = %d\n", __func__,
3112 				    retval));
3113 
3114 				inm->inm_state = IGMP_LEAVING_MEMBER;
3115 				inm->inm_sctimer = 1;
3116 				itp->sct = 1;
3117 				syncstates = 0;
3118 			}
3119 		}
3120 		IGI_UNLOCK(igi);
3121 		break;
3122 	case IGMP_LAZY_MEMBER:
3123 	case IGMP_SLEEPING_MEMBER:
3124 	case IGMP_AWAKENING_MEMBER:
3125 		/* Our reports are suppressed; do nothing. */
3126 		break;
3127 	}
3128 
3129 	if (syncstates) {
3130 		inm_commit(inm);
3131 		IGMP_INET_PRINTF(inm->inm_addr,
3132 		    ("%s: T1 -> T0 for %s/%s\n", __func__,
3133 		    _igmp_inet_buf, if_name(inm->inm_ifp)));
3134 		inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
3135 		IGMP_INET_PRINTF(inm->inm_addr,
3136 		    ("%s: T1 now MCAST_UNDEFINED for %s/%s\n",
3137 		    __func__, _igmp_inet_buf, if_name(inm->inm_ifp)));
3138 	}
3139 }
3140 
3141 /*
3142  * Enqueue an IGMPv3 group record to the given output queue.
3143  *
3144  * XXX This function could do with having the allocation code
3145  * split out, and the multiple-tree-walks coalesced into a single
3146  * routine as has been done in igmp_v3_enqueue_filter_change().
3147  *
3148  * If is_state_change is zero, a current-state record is appended.
3149  * If is_state_change is non-zero, a state-change report is appended.
3150  *
3151  * If is_group_query is non-zero, an mbuf packet chain is allocated.
3152  * If is_group_query is zero, and if there is a packet with free space
3153  * at the tail of the queue, it will be appended to providing there
3154  * is enough free space.
3155  * Otherwise a new mbuf packet chain is allocated.
3156  *
3157  * If is_source_query is non-zero, each source is checked to see if
3158  * it was recorded for a Group-Source query, and will be omitted if
3159  * it is not both in-mode and recorded.
3160  *
3161  * The function will attempt to allocate leading space in the packet
3162  * for the IP/IGMP header to be prepended without fragmenting the chain.
3163  *
3164  * If successful the size of all data appended to the queue is returned,
3165  * otherwise an error code less than zero is returned, or zero if
3166  * no record(s) were appended.
3167  */
3168 static int
igmp_v3_enqueue_group_record(struct ifqueue * ifq,struct in_multi * inm,const int is_state_change,const int is_group_query,const int is_source_query)3169 igmp_v3_enqueue_group_record(struct ifqueue *ifq, struct in_multi *inm,
3170     const int is_state_change, const int is_group_query,
3171     const int is_source_query)
3172 {
3173 	struct igmp_grouprec     ig;
3174 	struct igmp_grouprec    *pig;
3175 	struct ifnet            *ifp;
3176 	struct ip_msource       *ims, *nims;
3177 	struct mbuf             *m0, *m, *md;
3178 	int                      error, is_filter_list_change;
3179 	int                      minrec0len, m0srcs, nbytes, off;
3180 	uint16_t                 msrcs;
3181 	int                      record_has_sources;
3182 	int                      now;
3183 	int                      type;
3184 	in_addr_t                naddr;
3185 	uint16_t                 mode;
3186 	u_int16_t                ig_numsrc;
3187 
3188 	INM_LOCK_ASSERT_HELD(inm);
3189 	IGI_LOCK_ASSERT_HELD(inm->inm_igi);
3190 
3191 	error = 0;
3192 	ifp = inm->inm_ifp;
3193 	is_filter_list_change = 0;
3194 	m = NULL;
3195 	m0 = NULL;
3196 	m0srcs = 0;
3197 	msrcs = 0;
3198 	nbytes = 0;
3199 	nims = NULL;
3200 	record_has_sources = 1;
3201 	pig = NULL;
3202 	type = IGMP_DO_NOTHING;
3203 	mode = inm->inm_st[1].iss_fmode;
3204 
3205 	/*
3206 	 * If we did not transition out of ASM mode during t0->t1,
3207 	 * and there are no source nodes to process, we can skip
3208 	 * the generation of source records.
3209 	 */
3210 	if (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0 &&
3211 	    inm->inm_nsrc == 0) {
3212 		record_has_sources = 0;
3213 	}
3214 
3215 	if (is_state_change) {
3216 		/*
3217 		 * Queue a state change record.
3218 		 * If the mode did not change, and there are non-ASM
3219 		 * listeners or source filters present,
3220 		 * we potentially need to issue two records for the group.
3221 		 * If we are transitioning to MCAST_UNDEFINED, we need
3222 		 * not send any sources.
3223 		 * If there are ASM listeners, and there was no filter
3224 		 * mode transition of any kind, do nothing.
3225 		 */
3226 		if (mode != inm->inm_st[0].iss_fmode) {
3227 			if (mode == MCAST_EXCLUDE) {
3228 				IGMP_PRINTF(("%s: change to EXCLUDE\n",
3229 				    __func__));
3230 				type = IGMP_CHANGE_TO_EXCLUDE_MODE;
3231 			} else {
3232 				IGMP_PRINTF(("%s: change to INCLUDE\n",
3233 				    __func__));
3234 				type = IGMP_CHANGE_TO_INCLUDE_MODE;
3235 				if (mode == MCAST_UNDEFINED) {
3236 					record_has_sources = 0;
3237 				}
3238 			}
3239 		} else {
3240 			if (record_has_sources) {
3241 				is_filter_list_change = 1;
3242 			} else {
3243 				type = IGMP_DO_NOTHING;
3244 			}
3245 		}
3246 	} else {
3247 		/*
3248 		 * Queue a current state record.
3249 		 */
3250 		if (mode == MCAST_EXCLUDE) {
3251 			type = IGMP_MODE_IS_EXCLUDE;
3252 		} else if (mode == MCAST_INCLUDE) {
3253 			type = IGMP_MODE_IS_INCLUDE;
3254 			VERIFY(inm->inm_st[1].iss_asm == 0);
3255 		}
3256 	}
3257 
3258 	/*
3259 	 * Generate the filter list changes using a separate function.
3260 	 */
3261 	if (is_filter_list_change) {
3262 		return igmp_v3_enqueue_filter_change(ifq, inm);
3263 	}
3264 
3265 	if (type == IGMP_DO_NOTHING) {
3266 		IGMP_INET_PRINTF(inm->inm_addr,
3267 		    ("%s: nothing to do for %s/%s\n",
3268 		    __func__, _igmp_inet_buf,
3269 		    if_name(inm->inm_ifp)));
3270 		return 0;
3271 	}
3272 
3273 	/*
3274 	 * If any sources are present, we must be able to fit at least
3275 	 * one in the trailing space of the tail packet's mbuf,
3276 	 * ideally more.
3277 	 */
3278 	minrec0len = sizeof(struct igmp_grouprec);
3279 	if (record_has_sources) {
3280 		minrec0len += sizeof(in_addr_t);
3281 	}
3282 
3283 	IGMP_INET_PRINTF(inm->inm_addr,
3284 	    ("%s: queueing %s for %s/%s\n", __func__,
3285 	    igmp_rec_type_to_str(type), _igmp_inet_buf,
3286 	    if_name(inm->inm_ifp)));
3287 
3288 	/*
3289 	 * Check if we have a packet in the tail of the queue for this
3290 	 * group into which the first group record for this group will fit.
3291 	 * Otherwise allocate a new packet.
3292 	 * Always allocate leading space for IP+RA_OPT+IGMP+REPORT.
3293 	 * Note: Group records for G/GSR query responses MUST be sent
3294 	 * in their own packet.
3295 	 */
3296 	m0 = ifq->ifq_tail;
3297 	if (!is_group_query &&
3298 	    m0 != NULL &&
3299 	    (m0->m_pkthdr.vt_nrecs + 1 <= IGMP_V3_REPORT_MAXRECS) &&
3300 	    (m0->m_pkthdr.len + minrec0len) <
3301 	    (ifp->if_mtu - IGMP_LEADINGSPACE)) {
3302 		m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
3303 		    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
3304 		m = m0;
3305 		IGMP_PRINTF(("%s: use existing packet\n", __func__));
3306 	} else {
3307 		if (IF_QFULL(ifq)) {
3308 			IGMP_PRINTF(("%s: outbound queue full\n", __func__));
3309 			return -ENOMEM;
3310 		}
3311 		m = NULL;
3312 		m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
3313 		    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
3314 		if (!is_state_change && !is_group_query) {
3315 			m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3316 			if (m) {
3317 				m->m_data += IGMP_LEADINGSPACE;
3318 			}
3319 		}
3320 		if (m == NULL) {
3321 			m = m_gethdr(M_DONTWAIT, MT_DATA);
3322 			if (m) {
3323 				MH_ALIGN(m, IGMP_LEADINGSPACE);
3324 			}
3325 		}
3326 		if (m == NULL) {
3327 			return -ENOMEM;
3328 		}
3329 
3330 		igmp_save_context(m, ifp);
3331 
3332 		IGMP_PRINTF(("%s: allocated first packet\n", __func__));
3333 	}
3334 
3335 	/*
3336 	 * Append group record.
3337 	 * If we have sources, we don't know how many yet.
3338 	 */
3339 	ig.ig_type = (u_char)type;
3340 	ig.ig_datalen = 0;
3341 	ig.ig_numsrc = 0;
3342 	ig.ig_group = inm->inm_addr;
3343 	if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) {
3344 		if (m != m0) {
3345 			m_freem(m);
3346 		}
3347 		IGMP_PRINTF(("%s: m_append() failed.\n", __func__));
3348 		return -ENOMEM;
3349 	}
3350 	nbytes += sizeof(struct igmp_grouprec);
3351 
3352 	/*
3353 	 * Append as many sources as will fit in the first packet.
3354 	 * If we are appending to a new packet, the chain allocation
3355 	 * may potentially use clusters; use m_getptr() in this case.
3356 	 * If we are appending to an existing packet, we need to obtain
3357 	 * a pointer to the group record after m_append(), in case a new
3358 	 * mbuf was allocated.
3359 	 * Only append sources which are in-mode at t1. If we are
3360 	 * transitioning to MCAST_UNDEFINED state on the group, do not
3361 	 * include source entries.
3362 	 * Only report recorded sources in our filter set when responding
3363 	 * to a group-source query.
3364 	 */
3365 	if (record_has_sources) {
3366 		if (m == m0) {
3367 			md = m_last(m);
3368 			pig = (struct igmp_grouprec *)(void *)
3369 			    (mtod(md, uint8_t *) + md->m_len - nbytes);
3370 		} else {
3371 			md = m_getptr(m, 0, &off);
3372 			pig = (struct igmp_grouprec *)(void *)
3373 			    (mtod(md, uint8_t *) + off);
3374 		}
3375 		msrcs = 0;
3376 		RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, nims) {
3377 #ifdef IGMP_DEBUG
3378 			char buf[MAX_IPv4_STR_LEN];
3379 
3380 			inet_ntop_haddr(ims->ims_haddr, buf, sizeof(buf));
3381 			IGMP_PRINTF(("%s: visit node %s\n", __func__, buf));
3382 #endif
3383 			now = ims_get_mode(inm, ims, 1);
3384 			IGMP_PRINTF(("%s: node is %d\n", __func__, now));
3385 			if ((now != mode) ||
3386 			    (now == mode && mode == MCAST_UNDEFINED)) {
3387 				IGMP_PRINTF(("%s: skip node\n", __func__));
3388 				continue;
3389 			}
3390 			if (is_source_query && ims->ims_stp == 0) {
3391 				IGMP_PRINTF(("%s: skip unrecorded node\n",
3392 				    __func__));
3393 				continue;
3394 			}
3395 			IGMP_PRINTF(("%s: append node\n", __func__));
3396 			naddr = htonl(ims->ims_haddr);
3397 			if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) {
3398 				if (m != m0) {
3399 					m_freem(m);
3400 				}
3401 				IGMP_PRINTF(("%s: m_append() failed.\n",
3402 				    __func__));
3403 				return -ENOMEM;
3404 			}
3405 			nbytes += sizeof(in_addr_t);
3406 			++msrcs;
3407 			if (msrcs == m0srcs) {
3408 				break;
3409 			}
3410 		}
3411 		IGMP_PRINTF(("%s: msrcs is %d this packet\n", __func__,
3412 		    msrcs));
3413 		ig_numsrc = htons(msrcs);
3414 		bcopy(&ig_numsrc, &pig->ig_numsrc, sizeof(ig_numsrc));
3415 		nbytes += (msrcs * sizeof(in_addr_t));
3416 	}
3417 
3418 	if (is_source_query && msrcs == 0) {
3419 		IGMP_PRINTF(("%s: no recorded sources to report\n", __func__));
3420 		if (m != m0) {
3421 			m_freem(m);
3422 		}
3423 		return 0;
3424 	}
3425 
3426 	/*
3427 	 * We are good to go with first packet.
3428 	 */
3429 	if (m != m0) {
3430 		IGMP_PRINTF(("%s: enqueueing first packet\n", __func__));
3431 		m->m_pkthdr.vt_nrecs = 1;
3432 		IF_ENQUEUE(ifq, m);
3433 	} else {
3434 		m->m_pkthdr.vt_nrecs++;
3435 	}
3436 	/*
3437 	 * No further work needed if no source list in packet(s).
3438 	 */
3439 	if (!record_has_sources) {
3440 		return nbytes;
3441 	}
3442 
3443 	/*
3444 	 * Whilst sources remain to be announced, we need to allocate
3445 	 * a new packet and fill out as many sources as will fit.
3446 	 * Always try for a cluster first.
3447 	 */
3448 	while (nims != NULL) {
3449 		if (IF_QFULL(ifq)) {
3450 			IGMP_PRINTF(("%s: outbound queue full\n", __func__));
3451 			return -ENOMEM;
3452 		}
3453 		m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3454 		if (m) {
3455 			m->m_data += IGMP_LEADINGSPACE;
3456 		}
3457 		if (m == NULL) {
3458 			m = m_gethdr(M_DONTWAIT, MT_DATA);
3459 			if (m) {
3460 				MH_ALIGN(m, IGMP_LEADINGSPACE);
3461 			}
3462 		}
3463 		if (m == NULL) {
3464 			return -ENOMEM;
3465 		}
3466 		igmp_save_context(m, ifp);
3467 		md = m_getptr(m, 0, &off);
3468 		pig = (struct igmp_grouprec *)(void *)
3469 		    (mtod(md, uint8_t *) + off);
3470 		IGMP_PRINTF(("%s: allocated next packet\n", __func__));
3471 
3472 		if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) {
3473 			if (m != m0) {
3474 				m_freem(m);
3475 			}
3476 			IGMP_PRINTF(("%s: m_append() failed.\n", __func__));
3477 			return -ENOMEM;
3478 		}
3479 		m->m_pkthdr.vt_nrecs = 1;
3480 		nbytes += sizeof(struct igmp_grouprec);
3481 
3482 		m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
3483 		    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
3484 
3485 		msrcs = 0;
3486 		RB_FOREACH_FROM(ims, ip_msource_tree, nims) {
3487 #ifdef IGMP_DEBUG
3488 			char buf[MAX_IPv4_STR_LEN];
3489 
3490 			inet_ntop_haddr(ims->ims_haddr, buf, sizeof(buf));
3491 			IGMP_PRINTF(("%s: visit node %s\n", __func__, buf));
3492 #endif
3493 			now = ims_get_mode(inm, ims, 1);
3494 			if ((now != mode) ||
3495 			    (now == mode && mode == MCAST_UNDEFINED)) {
3496 				IGMP_PRINTF(("%s: skip node\n", __func__));
3497 				continue;
3498 			}
3499 			if (is_source_query && ims->ims_stp == 0) {
3500 				IGMP_PRINTF(("%s: skip unrecorded node\n",
3501 				    __func__));
3502 				continue;
3503 			}
3504 			IGMP_PRINTF(("%s: append node\n", __func__));
3505 			naddr = htonl(ims->ims_haddr);
3506 			if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) {
3507 				if (m != m0) {
3508 					m_freem(m);
3509 				}
3510 				IGMP_PRINTF(("%s: m_append() failed.\n",
3511 				    __func__));
3512 				return -ENOMEM;
3513 			}
3514 			++msrcs;
3515 			if (msrcs == m0srcs) {
3516 				break;
3517 			}
3518 		}
3519 		ig_numsrc = htons(msrcs);
3520 		bcopy(&ig_numsrc, &pig->ig_numsrc, sizeof(ig_numsrc));
3521 		nbytes += (msrcs * sizeof(in_addr_t));
3522 
3523 		IGMP_PRINTF(("%s: enqueueing next packet\n", __func__));
3524 		IF_ENQUEUE(ifq, m);
3525 	}
3526 
3527 	return nbytes;
3528 }
3529 
3530 /*
3531  * Type used to mark record pass completion.
3532  * We exploit the fact we can cast to this easily from the
3533  * current filter modes on each ip_msource node.
3534  */
3535 typedef enum {
3536 	REC_NONE = 0x00,        /* MCAST_UNDEFINED */
3537 	REC_ALLOW = 0x01,       /* MCAST_INCLUDE */
3538 	REC_BLOCK = 0x02,       /* MCAST_EXCLUDE */
3539 	REC_FULL = REC_ALLOW | REC_BLOCK
3540 } rectype_t;
3541 
3542 /*
3543  * Enqueue an IGMPv3 filter list change to the given output queue.
3544  *
3545  * Source list filter state is held in an RB-tree. When the filter list
3546  * for a group is changed without changing its mode, we need to compute
3547  * the deltas between T0 and T1 for each source in the filter set,
3548  * and enqueue the appropriate ALLOW_NEW/BLOCK_OLD records.
3549  *
3550  * As we may potentially queue two record types, and the entire R-B tree
3551  * needs to be walked at once, we break this out into its own function
3552  * so we can generate a tightly packed queue of packets.
3553  *
3554  * XXX This could be written to only use one tree walk, although that makes
3555  * serializing into the mbuf chains a bit harder. For now we do two walks
3556  * which makes things easier on us, and it may or may not be harder on
3557  * the L2 cache.
3558  *
3559  * If successful the size of all data appended to the queue is returned,
3560  * otherwise an error code less than zero is returned, or zero if
3561  * no record(s) were appended.
3562  */
3563 static int
igmp_v3_enqueue_filter_change(struct ifqueue * ifq,struct in_multi * inm)3564 igmp_v3_enqueue_filter_change(struct ifqueue *ifq, struct in_multi *inm)
3565 {
3566 	static const int MINRECLEN =
3567 	    sizeof(struct igmp_grouprec) + sizeof(in_addr_t);
3568 	struct ifnet            *ifp;
3569 	struct igmp_grouprec     ig;
3570 	struct igmp_grouprec    *pig;
3571 	struct ip_msource       *ims, *nims;
3572 	struct mbuf             *m, *m0, *md;
3573 	in_addr_t                naddr;
3574 	int                      m0srcs, nbytes, npbytes, off, schanged;
3575 	uint16_t                 rsrcs;
3576 	int                      nallow, nblock;
3577 	uint16_t                 mode;
3578 	uint8_t                  now, then;
3579 	rectype_t                crt, drt, nrt;
3580 	u_int16_t                ig_numsrc;
3581 
3582 	INM_LOCK_ASSERT_HELD(inm);
3583 
3584 	if (inm->inm_nsrc == 0 ||
3585 	    (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0)) {
3586 		return 0;
3587 	}
3588 
3589 	ifp = inm->inm_ifp;                     /* interface */
3590 	mode = inm->inm_st[1].iss_fmode;        /* filter mode at t1 */
3591 	crt = REC_NONE; /* current group record type */
3592 	drt = REC_NONE; /* mask of completed group record types */
3593 	nrt = REC_NONE; /* record type for current node */
3594 	m0srcs = 0;     /* # source which will fit in current mbuf chain */
3595 	nbytes = 0;     /* # of bytes appended to group's state-change queue */
3596 	npbytes = 0;    /* # of bytes appended this packet */
3597 	rsrcs = 0;      /* # sources encoded in current record */
3598 	schanged = 0;   /* # nodes encoded in overall filter change */
3599 	nallow = 0;     /* # of source entries in ALLOW_NEW */
3600 	nblock = 0;     /* # of source entries in BLOCK_OLD */
3601 	nims = NULL;    /* next tree node pointer */
3602 
3603 	/*
3604 	 * For each possible filter record mode.
3605 	 * The first kind of source we encounter tells us which
3606 	 * is the first kind of record we start appending.
3607 	 * If a node transitioned to UNDEFINED at t1, its mode is treated
3608 	 * as the inverse of the group's filter mode.
3609 	 */
3610 	while (drt != REC_FULL) {
3611 		do {
3612 			m0 = ifq->ifq_tail;
3613 			if (m0 != NULL &&
3614 			    (m0->m_pkthdr.vt_nrecs + 1 <=
3615 			    IGMP_V3_REPORT_MAXRECS) &&
3616 			    (m0->m_pkthdr.len + MINRECLEN) <
3617 			    (ifp->if_mtu - IGMP_LEADINGSPACE)) {
3618 				m = m0;
3619 				m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
3620 				    sizeof(struct igmp_grouprec)) /
3621 				    sizeof(in_addr_t);
3622 				IGMP_PRINTF(("%s: use previous packet\n",
3623 				    __func__));
3624 			} else {
3625 				m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3626 				if (m) {
3627 					m->m_data += IGMP_LEADINGSPACE;
3628 				}
3629 				if (m == NULL) {
3630 					m = m_gethdr(M_DONTWAIT, MT_DATA);
3631 					if (m) {
3632 						MH_ALIGN(m, IGMP_LEADINGSPACE);
3633 					}
3634 				}
3635 				if (m == NULL) {
3636 					IGMP_PRINTF(("%s: m_get*() failed\n",
3637 					    __func__));
3638 					return -ENOMEM;
3639 				}
3640 				m->m_pkthdr.vt_nrecs = 0;
3641 				igmp_save_context(m, ifp);
3642 				m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
3643 				    sizeof(struct igmp_grouprec)) /
3644 				    sizeof(in_addr_t);
3645 				npbytes = 0;
3646 				IGMP_PRINTF(("%s: allocated new packet\n",
3647 				    __func__));
3648 			}
3649 			/*
3650 			 * Append the IGMP group record header to the
3651 			 * current packet's data area.
3652 			 * Recalculate pointer to free space for next
3653 			 * group record, in case m_append() allocated
3654 			 * a new mbuf or cluster.
3655 			 */
3656 			memset(&ig, 0, sizeof(ig));
3657 			ig.ig_group = inm->inm_addr;
3658 			if (!m_append(m, sizeof(ig), (void *)&ig)) {
3659 				if (m != m0) {
3660 					m_freem(m);
3661 				}
3662 				IGMP_PRINTF(("%s: m_append() failed\n",
3663 				    __func__));
3664 				return -ENOMEM;
3665 			}
3666 			npbytes += sizeof(struct igmp_grouprec);
3667 			if (m != m0) {
3668 				/* new packet; offset in c hain */
3669 				md = m_getptr(m, npbytes -
3670 				    sizeof(struct igmp_grouprec), &off);
3671 				pig = (struct igmp_grouprec *)(void *)(mtod(md,
3672 				    uint8_t *) + off);
3673 			} else {
3674 				/* current packet; offset from last append */
3675 				md = m_last(m);
3676 				pig = (struct igmp_grouprec *)(void *)(mtod(md,
3677 				    uint8_t *) + md->m_len -
3678 				    sizeof(struct igmp_grouprec));
3679 			}
3680 			/*
3681 			 * Begin walking the tree for this record type
3682 			 * pass, or continue from where we left off
3683 			 * previously if we had to allocate a new packet.
3684 			 * Only report deltas in-mode at t1.
3685 			 * We need not report included sources as allowed
3686 			 * if we are in inclusive mode on the group,
3687 			 * however the converse is not true.
3688 			 */
3689 			rsrcs = 0;
3690 			if (nims == NULL) {
3691 				nims = RB_MIN(ip_msource_tree, &inm->inm_srcs);
3692 			}
3693 			RB_FOREACH_FROM(ims, ip_msource_tree, nims) {
3694 #ifdef IGMP_DEBUG
3695 				char buf[MAX_IPv4_STR_LEN];
3696 
3697 				inet_ntop_haddr(ims->ims_haddr, buf, sizeof(buf));
3698 				IGMP_PRINTF(("%s: visit node %s\n", __func__, buf));
3699 #endif
3700 				now = ims_get_mode(inm, ims, 1);
3701 				then = ims_get_mode(inm, ims, 0);
3702 				IGMP_PRINTF(("%s: mode: t0 %d, t1 %d\n",
3703 				    __func__, then, now));
3704 				if (now == then) {
3705 					IGMP_PRINTF(("%s: skip unchanged\n",
3706 					    __func__));
3707 					continue;
3708 				}
3709 				if (mode == MCAST_EXCLUDE &&
3710 				    now == MCAST_INCLUDE) {
3711 					IGMP_PRINTF(("%s: skip IN src on EX "
3712 					    "group\n", __func__));
3713 					continue;
3714 				}
3715 				nrt = (rectype_t)now;
3716 				if (nrt == REC_NONE) {
3717 					nrt = (rectype_t)(~mode & REC_FULL);
3718 				}
3719 				if (schanged++ == 0) {
3720 					crt = nrt;
3721 				} else if (crt != nrt) {
3722 					continue;
3723 				}
3724 				naddr = htonl(ims->ims_haddr);
3725 				if (!m_append(m, sizeof(in_addr_t),
3726 				    (void *)&naddr)) {
3727 					if (m != m0) {
3728 						m_freem(m);
3729 					}
3730 					IGMP_PRINTF(("%s: m_append() failed\n",
3731 					    __func__));
3732 					return -ENOMEM;
3733 				}
3734 				nallow += !!(crt == REC_ALLOW);
3735 				nblock += !!(crt == REC_BLOCK);
3736 				if (++rsrcs == m0srcs) {
3737 					break;
3738 				}
3739 			}
3740 			/*
3741 			 * If we did not append any tree nodes on this
3742 			 * pass, back out of allocations.
3743 			 */
3744 			if (rsrcs == 0) {
3745 				npbytes -= sizeof(struct igmp_grouprec);
3746 				if (m != m0) {
3747 					IGMP_PRINTF(("%s: m_free(m)\n",
3748 					    __func__));
3749 					m_freem(m);
3750 				} else {
3751 					IGMP_PRINTF(("%s: m_adj(m, -ig)\n",
3752 					    __func__));
3753 					m_adj(m, -((int)sizeof(
3754 						    struct igmp_grouprec)));
3755 				}
3756 				continue;
3757 			}
3758 			npbytes += (rsrcs * sizeof(in_addr_t));
3759 			if (crt == REC_ALLOW) {
3760 				pig->ig_type = IGMP_ALLOW_NEW_SOURCES;
3761 			} else if (crt == REC_BLOCK) {
3762 				pig->ig_type = IGMP_BLOCK_OLD_SOURCES;
3763 			}
3764 			ig_numsrc = htons(rsrcs);
3765 			bcopy(&ig_numsrc, &pig->ig_numsrc, sizeof(ig_numsrc));
3766 			/*
3767 			 * Count the new group record, and enqueue this
3768 			 * packet if it wasn't already queued.
3769 			 */
3770 			m->m_pkthdr.vt_nrecs++;
3771 			if (m != m0) {
3772 				IF_ENQUEUE(ifq, m);
3773 			}
3774 			nbytes += npbytes;
3775 		} while (nims != NULL);
3776 		drt |= crt;
3777 		crt = (~crt & REC_FULL);
3778 	}
3779 
3780 	IGMP_PRINTF(("%s: queued %d ALLOW_NEW, %d BLOCK_OLD\n", __func__,
3781 	    nallow, nblock));
3782 
3783 	return nbytes;
3784 }
3785 
3786 static int
igmp_v3_merge_state_changes(struct in_multi * inm,struct ifqueue * ifscq)3787 igmp_v3_merge_state_changes(struct in_multi *inm, struct ifqueue *ifscq)
3788 {
3789 	struct ifqueue  *gq;
3790 	struct mbuf     *m;             /* pending state-change */
3791 	struct mbuf     *m0;            /* copy of pending state-change */
3792 	struct mbuf     *mt;            /* last state-change in packet */
3793 	struct mbuf     *n;
3794 	int              docopy, domerge;
3795 	u_int            recslen;
3796 
3797 	INM_LOCK_ASSERT_HELD(inm);
3798 
3799 	docopy = 0;
3800 	domerge = 0;
3801 	recslen = 0;
3802 
3803 	/*
3804 	 * If there are further pending retransmissions, make a writable
3805 	 * copy of each queued state-change message before merging.
3806 	 */
3807 	if (inm->inm_scrv > 0) {
3808 		docopy = 1;
3809 	}
3810 
3811 	gq = &inm->inm_scq;
3812 #ifdef IGMP_DEBUG
3813 	if (gq->ifq_head == NULL) {
3814 		IGMP_PRINTF(("%s: WARNING: queue for inm 0x%llx is empty\n",
3815 		    __func__, (uint64_t)VM_KERNEL_ADDRPERM(inm)));
3816 	}
3817 #endif
3818 
3819 	/*
3820 	 * Use IF_REMQUEUE() instead of IF_DEQUEUE() below, since the
3821 	 * packet might not always be at the head of the ifqueue.
3822 	 */
3823 	m = gq->ifq_head;
3824 	while (m != NULL) {
3825 		/*
3826 		 * Only merge the report into the current packet if
3827 		 * there is sufficient space to do so; an IGMPv3 report
3828 		 * packet may only contain 65,535 group records.
3829 		 * Always use a simple mbuf chain concatentation to do this,
3830 		 * as large state changes for single groups may have
3831 		 * allocated clusters.
3832 		 */
3833 		domerge = 0;
3834 		mt = ifscq->ifq_tail;
3835 		if (mt != NULL) {
3836 			recslen = m_length(m);
3837 
3838 			if ((mt->m_pkthdr.vt_nrecs +
3839 			    m->m_pkthdr.vt_nrecs <=
3840 			    IGMP_V3_REPORT_MAXRECS) &&
3841 			    (mt->m_pkthdr.len + recslen <=
3842 			    (inm->inm_ifp->if_mtu - IGMP_LEADINGSPACE))) {
3843 				domerge = 1;
3844 			}
3845 		}
3846 
3847 		if (!domerge && IF_QFULL(gq)) {
3848 			IGMP_PRINTF(("%s: outbound queue full, skipping whole "
3849 			    "packet 0x%llx\n", __func__,
3850 			    (uint64_t)VM_KERNEL_ADDRPERM(m)));
3851 			n = m->m_nextpkt;
3852 			if (!docopy) {
3853 				IF_REMQUEUE(gq, m);
3854 				m_freem(m);
3855 			}
3856 			m = n;
3857 			continue;
3858 		}
3859 
3860 		if (!docopy) {
3861 			IGMP_PRINTF(("%s: dequeueing 0x%llx\n", __func__,
3862 			    (uint64_t)VM_KERNEL_ADDRPERM(m)));
3863 			n = m->m_nextpkt;
3864 			IF_REMQUEUE(gq, m);
3865 			m0 = m;
3866 			m = n;
3867 		} else {
3868 			IGMP_PRINTF(("%s: copying 0x%llx\n", __func__,
3869 			    (uint64_t)VM_KERNEL_ADDRPERM(m)));
3870 			m0 = m_dup(m, M_NOWAIT);
3871 			if (m0 == NULL) {
3872 				return ENOMEM;
3873 			}
3874 			m0->m_nextpkt = NULL;
3875 			m = m->m_nextpkt;
3876 		}
3877 
3878 		if (!domerge) {
3879 			IGMP_PRINTF(("%s: queueing 0x%llx to ifscq 0x%llx)\n",
3880 			    __func__, (uint64_t)VM_KERNEL_ADDRPERM(m0),
3881 			    (uint64_t)VM_KERNEL_ADDRPERM(ifscq)));
3882 			IF_ENQUEUE(ifscq, m0);
3883 		} else {
3884 			struct mbuf *mtl;       /* last mbuf of packet mt */
3885 
3886 			IGMP_PRINTF(("%s: merging 0x%llx with ifscq tail "
3887 			    "0x%llx)\n", __func__,
3888 			    (uint64_t)VM_KERNEL_ADDRPERM(m0),
3889 			    (uint64_t)VM_KERNEL_ADDRPERM(mt)));
3890 
3891 			mtl = m_last(mt);
3892 			m0->m_flags &= ~M_PKTHDR;
3893 			mt->m_pkthdr.len += recslen;
3894 			mt->m_pkthdr.vt_nrecs +=
3895 			    m0->m_pkthdr.vt_nrecs;
3896 
3897 			mtl->m_next = m0;
3898 		}
3899 	}
3900 
3901 	return 0;
3902 }
3903 
3904 /*
3905  * Respond to a pending IGMPv3 General Query.
3906  */
3907 static uint32_t
igmp_v3_dispatch_general_query(struct igmp_ifinfo * igi)3908 igmp_v3_dispatch_general_query(struct igmp_ifinfo *igi)
3909 {
3910 	struct ifnet            *ifp;
3911 	struct in_multi         *inm;
3912 	struct in_multistep     step;
3913 	int                      retval, loop;
3914 
3915 	IGI_LOCK_ASSERT_HELD(igi);
3916 
3917 	VERIFY(igi->igi_version == IGMP_VERSION_3);
3918 
3919 	ifp = igi->igi_ifp;
3920 	IGI_UNLOCK(igi);
3921 
3922 	in_multihead_lock_shared();
3923 	IN_FIRST_MULTI(step, inm);
3924 	while (inm != NULL) {
3925 		INM_LOCK(inm);
3926 		if (inm->inm_ifp != ifp) {
3927 			goto next;
3928 		}
3929 
3930 		switch (inm->inm_state) {
3931 		case IGMP_NOT_MEMBER:
3932 		case IGMP_SILENT_MEMBER:
3933 			break;
3934 		case IGMP_REPORTING_MEMBER:
3935 		case IGMP_IDLE_MEMBER:
3936 		case IGMP_LAZY_MEMBER:
3937 		case IGMP_SLEEPING_MEMBER:
3938 		case IGMP_AWAKENING_MEMBER:
3939 			inm->inm_state = IGMP_REPORTING_MEMBER;
3940 			IGI_LOCK(igi);
3941 			retval = igmp_v3_enqueue_group_record(&igi->igi_gq,
3942 			    inm, 0, 0, 0);
3943 			IGI_UNLOCK(igi);
3944 			IGMP_PRINTF(("%s: enqueue record = %d\n",
3945 			    __func__, retval));
3946 			break;
3947 		case IGMP_G_QUERY_PENDING_MEMBER:
3948 		case IGMP_SG_QUERY_PENDING_MEMBER:
3949 		case IGMP_LEAVING_MEMBER:
3950 			break;
3951 		}
3952 next:
3953 		INM_UNLOCK(inm);
3954 		IN_NEXT_MULTI(step, inm);
3955 	}
3956 	in_multihead_lock_done();
3957 
3958 	IGI_LOCK(igi);
3959 	loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
3960 	igmp_dispatch_queue(igi, &igi->igi_gq, IGMP_MAX_RESPONSE_BURST,
3961 	    loop);
3962 	IGI_LOCK_ASSERT_HELD(igi);
3963 	/*
3964 	 * Slew transmission of bursts over 1 second intervals.
3965 	 */
3966 	if (igi->igi_gq.ifq_head != NULL) {
3967 		igi->igi_v3_timer = 1 + IGMP_RANDOM_DELAY(
3968 			IGMP_RESPONSE_BURST_INTERVAL);
3969 	}
3970 
3971 	return igi->igi_v3_timer;
3972 }
3973 
3974 /*
3975  * Transmit the next pending IGMP message in the output queue.
3976  *
3977  * Must not be called with inm_lock or igi_lock held.
3978  */
3979 static void
igmp_sendpkt(struct mbuf * m)3980 igmp_sendpkt(struct mbuf *m)
3981 {
3982 	struct ip_moptions      *imo;
3983 	struct mbuf             *ipopts, *m0;
3984 	int                     error;
3985 	struct route            ro;
3986 	struct ifnet            *ifp;
3987 
3988 	IGMP_PRINTF(("%s: transmit 0x%llx\n", __func__,
3989 	    (uint64_t)VM_KERNEL_ADDRPERM(m)));
3990 
3991 	ifp = igmp_restore_context(m);
3992 	/*
3993 	 * Check if the ifnet is still attached.
3994 	 */
3995 	if (ifp == NULL || !ifnet_is_attached(ifp, 0)) {
3996 		IGMP_PRINTF(("%s: dropped 0x%llx as ifp went away.\n",
3997 		    __func__, (uint64_t)VM_KERNEL_ADDRPERM(m)));
3998 		m_freem(m);
3999 		OSAddAtomic(1, &ipstat.ips_noroute);
4000 		return;
4001 	}
4002 
4003 	ipopts = igmp_sendra ? m_raopt : NULL;
4004 
4005 	imo = ip_allocmoptions(Z_WAITOK);
4006 	if (imo == NULL) {
4007 		m_freem(m);
4008 		return;
4009 	}
4010 
4011 	imo->imo_multicast_ttl  = 1;
4012 	imo->imo_multicast_vif  = -1;
4013 	imo->imo_multicast_loop = 0;
4014 
4015 	/*
4016 	 * If the user requested that IGMP traffic be explicitly
4017 	 * redirected to the loopback interface (e.g. they are running a
4018 	 * MANET interface and the routing protocol needs to see the
4019 	 * updates), handle this now.
4020 	 */
4021 	if (m->m_flags & M_IGMP_LOOP) {
4022 		imo->imo_multicast_ifp = lo_ifp;
4023 	} else {
4024 		imo->imo_multicast_ifp = ifp;
4025 	}
4026 
4027 	if (m->m_flags & M_IGMPV2) {
4028 		m0 = m;
4029 	} else {
4030 		m0 = igmp_v3_encap_report(ifp, m);
4031 		if (m0 == NULL) {
4032 			/*
4033 			 * If igmp_v3_encap_report() failed, then M_PREPEND()
4034 			 * already freed the original mbuf chain.
4035 			 * This means that we don't have to m_freem(m) here.
4036 			 */
4037 			IGMP_PRINTF(("%s: dropped 0x%llx\n", __func__,
4038 			    (uint64_t)VM_KERNEL_ADDRPERM(m)));
4039 			IMO_REMREF(imo);
4040 			os_atomic_inc(&ipstat.ips_odropped, relaxed);
4041 			return;
4042 		}
4043 	}
4044 
4045 	igmp_scrub_context(m0);
4046 	m->m_flags &= ~(M_PROTOFLAGS | M_IGMP_LOOP);
4047 	m0->m_pkthdr.rcvif = lo_ifp;
4048 
4049 	if (ifp->if_eflags & IFEF_TXSTART) {
4050 		/*
4051 		 * Use control service class if the interface supports
4052 		 * transmit-start model.
4053 		 */
4054 		(void) m_set_service_class(m0, MBUF_SC_CTL);
4055 	}
4056 	bzero(&ro, sizeof(ro));
4057 	error = ip_output(m0, ipopts, &ro, 0, imo, NULL);
4058 	ROUTE_RELEASE(&ro);
4059 
4060 	IMO_REMREF(imo);
4061 
4062 	if (error) {
4063 		IGMP_PRINTF(("%s: ip_output(0x%llx) = %d\n", __func__,
4064 		    (uint64_t)VM_KERNEL_ADDRPERM(m0), error));
4065 		return;
4066 	}
4067 
4068 	IGMPSTAT_INC(igps_snd_reports);
4069 	OIGMPSTAT_INC(igps_snd_reports);
4070 }
4071 /*
4072  * Encapsulate an IGMPv3 report.
4073  *
4074  * The internal mbuf flag M_IGMPV3_HDR is used to indicate that the mbuf
4075  * chain has already had its IP/IGMPv3 header prepended. In this case
4076  * the function will not attempt to prepend; the lengths and checksums
4077  * will however be re-computed.
4078  *
4079  * Returns a pointer to the new mbuf chain head, or NULL if the
4080  * allocation failed.
4081  */
4082 static struct mbuf *
igmp_v3_encap_report(struct ifnet * ifp,struct mbuf * m)4083 igmp_v3_encap_report(struct ifnet *ifp, struct mbuf *m)
4084 {
4085 	struct igmp_report      *igmp;
4086 	struct ip               *ip;
4087 	unsigned int             hdrlen, igmpreclen;
4088 
4089 	VERIFY((m->m_flags & M_PKTHDR));
4090 
4091 	igmpreclen = m_length(m);
4092 	hdrlen = sizeof(struct ip) + sizeof(struct igmp_report);
4093 
4094 	if (m->m_flags & M_IGMPV3_HDR) {
4095 		igmpreclen -= hdrlen;
4096 	} else {
4097 		M_PREPEND(m, hdrlen, M_DONTWAIT, 1);
4098 		if (m == NULL) {
4099 			return NULL;
4100 		}
4101 		m->m_flags |= M_IGMPV3_HDR;
4102 	}
4103 	if (hdrlen + igmpreclen > USHRT_MAX) {
4104 		IGMP_PRINTF(("%s: invalid length %d\n", __func__, hdrlen + igmpreclen));
4105 		m_freem(m);
4106 		return NULL;
4107 	}
4108 
4109 
4110 	IGMP_PRINTF(("%s: igmpreclen is %d\n", __func__, igmpreclen));
4111 
4112 	m->m_data += sizeof(struct ip);
4113 	m->m_len -= sizeof(struct ip);
4114 
4115 	igmp = mtod(m, struct igmp_report *);
4116 	igmp->ir_type = IGMP_v3_HOST_MEMBERSHIP_REPORT;
4117 	igmp->ir_rsv1 = 0;
4118 	igmp->ir_rsv2 = 0;
4119 	igmp->ir_numgrps = htons(m->m_pkthdr.vt_nrecs);
4120 	igmp->ir_cksum = 0;
4121 	igmp->ir_cksum = in_cksum(m, sizeof(struct igmp_report) + igmpreclen);
4122 	m->m_pkthdr.vt_nrecs = 0;
4123 
4124 	m->m_data -= sizeof(struct ip);
4125 	m->m_len += sizeof(struct ip);
4126 
4127 	ip = mtod(m, struct ip *);
4128 	ip->ip_tos = IPTOS_PREC_INTERNETCONTROL;
4129 	ip->ip_len = (u_short)(hdrlen + igmpreclen);
4130 	ip->ip_off = IP_DF;
4131 	ip->ip_p = IPPROTO_IGMP;
4132 	ip->ip_sum = 0;
4133 
4134 	ip->ip_src.s_addr = INADDR_ANY;
4135 
4136 	if (m->m_flags & M_IGMP_LOOP) {
4137 		struct in_ifaddr *ia;
4138 
4139 		IFP_TO_IA(ifp, ia);
4140 		if (ia != NULL) {
4141 			IFA_LOCK(&ia->ia_ifa);
4142 			ip->ip_src = ia->ia_addr.sin_addr;
4143 			IFA_UNLOCK(&ia->ia_ifa);
4144 			IFA_REMREF(&ia->ia_ifa);
4145 		}
4146 	}
4147 
4148 	ip->ip_dst.s_addr = htonl(INADDR_ALLRPTS_GROUP);
4149 
4150 	return m;
4151 }
4152 
4153 #ifdef IGMP_DEBUG
4154 static const char *
igmp_rec_type_to_str(const int type)4155 igmp_rec_type_to_str(const int type)
4156 {
4157 	switch (type) {
4158 	case IGMP_CHANGE_TO_EXCLUDE_MODE:
4159 		return "TO_EX";
4160 	case IGMP_CHANGE_TO_INCLUDE_MODE:
4161 		return "TO_IN";
4162 	case IGMP_MODE_IS_EXCLUDE:
4163 		return "MODE_EX";
4164 	case IGMP_MODE_IS_INCLUDE:
4165 		return "MODE_IN";
4166 	case IGMP_ALLOW_NEW_SOURCES:
4167 		return "ALLOW_NEW";
4168 	case IGMP_BLOCK_OLD_SOURCES:
4169 		return "BLOCK_OLD";
4170 	default:
4171 		break;
4172 	}
4173 	return "unknown";
4174 }
4175 #endif
4176 
4177 void
igmp_init(struct protosw * pp,struct domain * dp)4178 igmp_init(struct protosw *pp, struct domain *dp)
4179 {
4180 #pragma unused(dp)
4181 	static int igmp_initialized = 0;
4182 
4183 	VERIFY((pp->pr_flags & (PR_INITIALIZED | PR_ATTACHED)) == PR_ATTACHED);
4184 
4185 	if (igmp_initialized) {
4186 		return;
4187 	}
4188 	igmp_initialized = 1;
4189 
4190 	IGMP_PRINTF(("%s: initializing\n", __func__));
4191 
4192 	igmp_timers_are_running = 0;
4193 
4194 	LIST_INIT(&igi_head);
4195 	m_raopt = igmp_ra_alloc();
4196 }
4197