xref: /xnu-10002.41.9/bsd/netinet/igmp.c (revision 699cd48037512bf4380799317ca44ca453c82f57)
1 /*
2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*-
29  * Copyright (c) 2007-2009 Bruce Simpson.
30  * Copyright (c) 1988 Stephen Deering.
31  * Copyright (c) 1992, 1993
32  *	The Regents of the University of California.  All rights reserved.
33  *
34  * This code is derived from software contributed to Berkeley by
35  * Stephen Deering of Stanford University.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. All advertising materials mentioning features or use of this software
46  *    must display the following acknowledgement:
47  *	This product includes software developed by the University of
48  *	California, Berkeley and its contributors.
49  * 4. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  *	@(#)igmp.c	8.1 (Berkeley) 7/19/93
66  */
67 /*
68  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69  * support for mandatory and extensible security protections.  This notice
70  * is included in support of clause 2.2 (b) of the Apple Public License,
71  * Version 2.0.
72  */
73 
74 /*
75  * Internet Group Management Protocol (IGMP) routines.
76  * [RFC1112, RFC2236, RFC3376]
77  *
78  * Written by Steve Deering, Stanford, May 1988.
79  * Modified by Rosen Sharma, Stanford, Aug 1994.
80  * Modified by Bill Fenner, Xerox PARC, Feb 1995.
81  * Modified to fully comply to IGMPv2 by Bill Fenner, Oct 1995.
82  * Significantly rewritten for IGMPv3, VIMAGE, and SMP by Bruce Simpson.
83  *
84  * MULTICAST Revision: 3.5.1.4
85  */
86 
87 #include <sys/cdefs.h>
88 
89 #include <sys/param.h>
90 #include <sys/systm.h>
91 #include <sys/malloc.h>
92 #include <sys/mbuf.h>
93 #include <sys/socket.h>
94 #include <sys/protosw.h>
95 #include <sys/kernel.h>
96 #include <sys/sysctl.h>
97 #include <sys/mcache.h>
98 
99 #include <libkern/libkern.h>
100 #include <kern/zalloc.h>
101 
102 #include <net/if.h>
103 #include <net/route.h>
104 
105 #include <netinet/in.h>
106 #include <netinet/in_var.h>
107 #include <netinet/in_systm.h>
108 #include <netinet/ip.h>
109 #include <netinet/ip_var.h>
110 #include <netinet/igmp.h>
111 #include <netinet/igmp_var.h>
112 #include <netinet/kpi_ipfilter_var.h>
113 
114 #if SKYWALK
115 #include <skywalk/core/skywalk_var.h>
116 #endif /* SKYWALK */
117 
118 SLIST_HEAD(igmp_inm_relhead, in_multi);
119 
120 static void     igi_initvar(struct igmp_ifinfo *, struct ifnet *, int);
121 static struct igmp_ifinfo *igi_alloc(zalloc_flags_t);
122 static void     igi_free(struct igmp_ifinfo *);
123 static void     igi_delete(const struct ifnet *, struct igmp_inm_relhead *);
124 static void     igmp_dispatch_queue(struct igmp_ifinfo *, struct ifqueue *,
125     int, const int);
126 static void     igmp_final_leave(struct in_multi *, struct igmp_ifinfo *,
127     struct igmp_tparams *);
128 static int      igmp_handle_state_change(struct in_multi *,
129     struct igmp_ifinfo *, struct igmp_tparams *);
130 static int      igmp_initial_join(struct in_multi *, struct igmp_ifinfo *,
131     struct igmp_tparams *);
132 static int      igmp_input_v1_query(struct ifnet *, const struct ip *,
133     const struct igmp *);
134 static int      igmp_input_v2_query(struct ifnet *, const struct ip *,
135     const struct igmp *);
136 static int      igmp_input_v3_query(struct ifnet *, const struct ip *,
137     /*const*/ struct igmpv3 *);
138 static int      igmp_input_v3_group_query(struct in_multi *,
139     int, /*const*/ struct igmpv3 *);
140 static int      igmp_input_v1_report(struct ifnet *, struct mbuf *,
141     /*const*/ struct ip *, /*const*/ struct igmp *);
142 static int      igmp_input_v2_report(struct ifnet *, struct mbuf *,
143     /*const*/ struct ip *, /*const*/ struct igmp *);
144 static void     igmp_sendpkt(struct mbuf *);
145 static __inline__ int   igmp_isgroupreported(const struct in_addr);
146 static struct mbuf *igmp_ra_alloc(void);
147 #ifdef IGMP_DEBUG
148 static const char *igmp_rec_type_to_str(const int);
149 #endif
150 static uint32_t igmp_set_version(struct igmp_ifinfo *, const int);
151 static void     igmp_append_relq(struct igmp_ifinfo *, struct in_multi *);
152 static void     igmp_flush_relq(struct igmp_ifinfo *,
153     struct igmp_inm_relhead *);
154 static int      igmp_v1v2_queue_report(struct in_multi *, const int);
155 static void     igmp_v1v2_process_group_timer(struct in_multi *, const int);
156 static void     igmp_v1v2_process_querier_timers(struct igmp_ifinfo *);
157 static uint32_t igmp_v2_update_group(struct in_multi *, const int);
158 static void     igmp_v3_cancel_link_timers(struct igmp_ifinfo *);
159 static uint32_t igmp_v3_dispatch_general_query(struct igmp_ifinfo *);
160 static struct mbuf *
161 igmp_v3_encap_report(struct ifnet *, struct mbuf *);
162 static int      igmp_v3_enqueue_group_record(struct ifqueue *,
163     struct in_multi *, const int, const int, const int);
164 static int      igmp_v3_enqueue_filter_change(struct ifqueue *,
165     struct in_multi *);
166 static void     igmp_v3_process_group_timers(struct igmp_ifinfo *,
167     struct ifqueue *, struct ifqueue *, struct in_multi *,
168     const unsigned int);
169 static int      igmp_v3_merge_state_changes(struct in_multi *,
170     struct ifqueue *);
171 static void     igmp_v3_suppress_group_record(struct in_multi *);
172 static int      sysctl_igmp_ifinfo SYSCTL_HANDLER_ARGS;
173 static int      sysctl_igmp_gsr SYSCTL_HANDLER_ARGS;
174 static int      sysctl_igmp_default_version SYSCTL_HANDLER_ARGS;
175 
176 static const uint32_t igmp_timeout_delay = 1000; /* in milliseconds */
177 static const uint32_t igmp_timeout_leeway = 500; /* in millseconds  */
178 static bool igmp_timeout_run;            /* IGMP timer is scheduled to run */
179 static bool igmp_fast_timeout_run;       /* IGMP fast timer is scheduled to run */
180 static void igmp_timeout(thread_call_param_t, thread_call_param_t);
181 static void igmp_sched_timeout(void);
182 static void igmp_sched_fast_timeout(void);
183 
184 static struct mbuf *m_raopt;            /* Router Alert option */
185 
186 static int querier_present_timers_running;      /* IGMPv1/v2 older version
187                                                  * querier present */
188 static int interface_timers_running;            /* IGMPv3 general
189                                                  * query response */
190 static int state_change_timers_running;         /* IGMPv3 state-change
191                                                  * retransmit */
192 static int current_state_timers_running;        /* IGMPv1/v2 host
193                                                  * report; IGMPv3 g/sg
194                                                  * query response */
195 
196 /*
197  * Subsystem lock macros.
198  */
199 #define IGMP_LOCK()                     \
200 	lck_mtx_lock(&igmp_mtx)
201 #define IGMP_LOCK_ASSERT_HELD()         \
202 	LCK_MTX_ASSERT(&igmp_mtx, LCK_MTX_ASSERT_OWNED)
203 #define IGMP_LOCK_ASSERT_NOTHELD()      \
204 	LCK_MTX_ASSERT(&igmp_mtx, LCK_MTX_ASSERT_NOTOWNED)
205 #define IGMP_UNLOCK()                   \
206 	lck_mtx_unlock(&igmp_mtx)
207 
208 static LIST_HEAD(, igmp_ifinfo) igi_head;
209 static struct igmpstat_v3 igmpstat_v3 = {
210 	.igps_version = IGPS_VERSION_3,
211 	.igps_len = sizeof(struct igmpstat_v3),
212 };
213 static struct igmpstat igmpstat; /* old IGMPv2 stats structure */
214 static struct timeval igmp_gsrdelay = {.tv_sec = 10, .tv_usec = 0};
215 
216 static int igmp_recvifkludge = 1;
217 static int igmp_sendra = 1;
218 static int igmp_sendlocal = 1;
219 static int igmp_v1enable = 1;
220 static int igmp_v2enable = 1;
221 static int igmp_legacysupp = 0;
222 static int igmp_default_version = IGMP_VERSION_3;
223 
224 SYSCTL_STRUCT(_net_inet_igmp, IGMPCTL_STATS, stats, CTLFLAG_RD | CTLFLAG_LOCKED,
225     &igmpstat, igmpstat, "");
226 SYSCTL_STRUCT(_net_inet_igmp, OID_AUTO, v3stats,
227     CTLFLAG_RD | CTLFLAG_LOCKED, &igmpstat_v3, igmpstat_v3, "");
228 SYSCTL_INT(_net_inet_igmp, OID_AUTO, recvifkludge, CTLFLAG_RW | CTLFLAG_LOCKED,
229     &igmp_recvifkludge, 0,
230     "Rewrite IGMPv1/v2 reports from 0.0.0.0 to contain subnet address");
231 SYSCTL_INT(_net_inet_igmp, OID_AUTO, sendra, CTLFLAG_RW | CTLFLAG_LOCKED,
232     &igmp_sendra, 0,
233     "Send IP Router Alert option in IGMPv2/v3 messages");
234 SYSCTL_INT(_net_inet_igmp, OID_AUTO, sendlocal, CTLFLAG_RW | CTLFLAG_LOCKED,
235     &igmp_sendlocal, 0,
236     "Send IGMP membership reports for 224.0.0.0/24 groups");
237 SYSCTL_INT(_net_inet_igmp, OID_AUTO, v1enable, CTLFLAG_RW | CTLFLAG_LOCKED,
238     &igmp_v1enable, 0,
239     "Enable backwards compatibility with IGMPv1");
240 SYSCTL_INT(_net_inet_igmp, OID_AUTO, v2enable, CTLFLAG_RW | CTLFLAG_LOCKED,
241     &igmp_v2enable, 0,
242     "Enable backwards compatibility with IGMPv2");
243 SYSCTL_INT(_net_inet_igmp, OID_AUTO, legacysupp, CTLFLAG_RW | CTLFLAG_LOCKED,
244     &igmp_legacysupp, 0,
245     "Allow v1/v2 reports to suppress v3 group responses");
246 SYSCTL_PROC(_net_inet_igmp, OID_AUTO, default_version,
247     CTLTYPE_INT | CTLFLAG_RW,
248     &igmp_default_version, 0, sysctl_igmp_default_version, "I",
249     "Default version of IGMP to run on each interface");
250 SYSCTL_PROC(_net_inet_igmp, OID_AUTO, gsrdelay,
251     CTLTYPE_INT | CTLFLAG_RW,
252     &igmp_gsrdelay.tv_sec, 0, sysctl_igmp_gsr, "I",
253     "Rate limit for IGMPv3 Group-and-Source queries in seconds");
254 #ifdef IGMP_DEBUG
255 int igmp_debug = 0;
256 SYSCTL_INT(_net_inet_igmp, OID_AUTO,
257     debug, CTLFLAG_RW | CTLFLAG_LOCKED, &igmp_debug, 0, "");
258 #endif
259 
260 SYSCTL_NODE(_net_inet_igmp, OID_AUTO, ifinfo, CTLFLAG_RD | CTLFLAG_LOCKED,
261     sysctl_igmp_ifinfo, "Per-interface IGMPv3 state");
262 
263 /* Lock group and attribute for igmp_mtx */
264 static LCK_ATTR_DECLARE(igmp_mtx_attr, 0, 0);
265 static LCK_GRP_DECLARE(igmp_mtx_grp, "igmp_mtx");
266 
267 /*
268  * Locking and reference counting:
269  *
270  * igmp_mtx mainly protects igi_head.  In cases where both igmp_mtx and
271  * in_multihead_lock must be held, the former must be acquired first in order
272  * to maintain lock ordering.  It is not a requirement that igmp_mtx be
273  * acquired first before in_multihead_lock, but in case both must be acquired
274  * in succession, the correct lock ordering must be followed.
275  *
276  * Instead of walking the if_multiaddrs list at the interface and returning
277  * the ifma_protospec value of a matching entry, we search the global list
278  * of in_multi records and find it that way; this is done with in_multihead
279  * lock held.  Doing so avoids the race condition issues that many other BSDs
280  * suffer from (therefore in our implementation, ifma_protospec will never be
281  * NULL for as long as the in_multi is valid.)
282  *
283  * The above creates a requirement for the in_multi to stay in in_multihead
284  * list even after the final IGMP leave (in IGMPv3 mode) until no longer needs
285  * be retransmitted (this is not required for IGMPv1/v2.)  In order to handle
286  * this, the request and reference counts of the in_multi are bumped up when
287  * the state changes to IGMP_LEAVING_MEMBER, and later dropped in the timeout
288  * handler.  Each in_multi holds a reference to the underlying igmp_ifinfo.
289  *
290  * Thus, the permitted lock oder is:
291  *
292  *	igmp_mtx, in_multihead_lock, inm_lock, igi_lock
293  *
294  * Any may be taken independently, but if any are held at the same time,
295  * the above lock order must be followed.
296  */
297 static LCK_MTX_DECLARE_ATTR(igmp_mtx, &igmp_mtx_grp, &igmp_mtx_attr);
298 static int igmp_timers_are_running;
299 
300 #define IGMP_ADD_DETACHED_INM(_head, _inm) {                            \
301 	SLIST_INSERT_HEAD(_head, _inm, inm_dtle);                       \
302 }
303 
304 #define IGMP_REMOVE_DETACHED_INM(_head) {                               \
305 	struct in_multi *_inm, *_inm_tmp;                               \
306 	SLIST_FOREACH_SAFE(_inm, _head, inm_dtle, _inm_tmp) {           \
307 	        SLIST_REMOVE(_head, _inm, in_multi, inm_dtle);          \
308 	        INM_REMREF(_inm);                                       \
309 	}                                                               \
310 	VERIFY(SLIST_EMPTY(_head));                                     \
311 }
312 
313 static KALLOC_TYPE_DEFINE(igi_zone, struct igmp_ifinfo, NET_KT_DEFAULT);
314 
315 /* Store IGMPv3 record count in the module private scratch space */
316 #define vt_nrecs        pkt_mpriv.__mpriv_u.__mpriv32[0].__mpriv32_u.__val16[0]
317 
318 static __inline void
igmp_save_context(struct mbuf * m,struct ifnet * ifp)319 igmp_save_context(struct mbuf *m, struct ifnet *ifp)
320 {
321 	m->m_pkthdr.rcvif = ifp;
322 }
323 
324 static __inline void
igmp_scrub_context(struct mbuf * m)325 igmp_scrub_context(struct mbuf *m)
326 {
327 	m->m_pkthdr.rcvif = NULL;
328 }
329 
330 #ifdef IGMP_DEBUG
331 static __inline const char *
inet_ntop_haddr(in_addr_t haddr,char * buf,socklen_t size)332 inet_ntop_haddr(in_addr_t haddr, char *buf, socklen_t size)
333 {
334 	struct in_addr ia;
335 
336 	ia.s_addr = htonl(haddr);
337 	return inet_ntop(AF_INET, &ia, buf, size);
338 }
339 #endif
340 
341 /*
342  * Restore context from a queued IGMP output chain.
343  * Return saved ifp.
344  */
345 static __inline struct ifnet *
igmp_restore_context(struct mbuf * m)346 igmp_restore_context(struct mbuf *m)
347 {
348 	return m->m_pkthdr.rcvif;
349 }
350 
351 /*
352  * Retrieve or set default IGMP version.
353  */
354 static int
355 sysctl_igmp_default_version SYSCTL_HANDLER_ARGS
356 {
357 #pragma unused(oidp, arg2)
358 	int      error;
359 	int      new;
360 
361 	IGMP_LOCK();
362 
363 	error = SYSCTL_OUT(req, arg1, sizeof(int));
364 	if (error || !req->newptr) {
365 		goto out_locked;
366 	}
367 
368 	new = igmp_default_version;
369 
370 	error = SYSCTL_IN(req, &new, sizeof(int));
371 	if (error) {
372 		goto out_locked;
373 	}
374 
375 	if (new < IGMP_VERSION_1 || new > IGMP_VERSION_3) {
376 		error = EINVAL;
377 		goto out_locked;
378 	}
379 
380 	IGMP_PRINTF(("%s: change igmp_default_version from %d to %d\n",
381 	    __func__, igmp_default_version, new));
382 
383 	igmp_default_version = new;
384 
385 out_locked:
386 	IGMP_UNLOCK();
387 	return error;
388 }
389 
390 /*
391  * Retrieve or set threshold between group-source queries in seconds.
392  *
393  */
394 static int
395 sysctl_igmp_gsr SYSCTL_HANDLER_ARGS
396 {
397 #pragma unused(arg1, arg2)
398 	int error;
399 	int i;
400 
401 	IGMP_LOCK();
402 
403 	i = (int)igmp_gsrdelay.tv_sec;
404 
405 	error = sysctl_handle_int(oidp, &i, 0, req);
406 	if (error || !req->newptr) {
407 		goto out_locked;
408 	}
409 
410 	if (i < -1 || i >= 60) {
411 		error = EINVAL;
412 		goto out_locked;
413 	}
414 
415 	igmp_gsrdelay.tv_sec = i;
416 
417 out_locked:
418 	IGMP_UNLOCK();
419 	return error;
420 }
421 
422 /*
423  * Expose struct igmp_ifinfo to userland, keyed by ifindex.
424  * For use by ifmcstat(8).
425  *
426  */
427 static int
428 sysctl_igmp_ifinfo SYSCTL_HANDLER_ARGS
429 {
430 #pragma unused(oidp)
431 	int                     *name;
432 	int                      error;
433 	u_int                    namelen;
434 	struct ifnet            *ifp;
435 	struct igmp_ifinfo      *igi;
436 	struct igmp_ifinfo_u    igi_u;
437 
438 	name = (int *)arg1;
439 	namelen = arg2;
440 
441 	if (req->newptr != USER_ADDR_NULL) {
442 		return EPERM;
443 	}
444 
445 	if (namelen != 1) {
446 		return EINVAL;
447 	}
448 
449 	IGMP_LOCK();
450 
451 	if (name[0] <= 0 || name[0] > (u_int)if_index) {
452 		error = ENOENT;
453 		goto out_locked;
454 	}
455 
456 	error = ENOENT;
457 
458 	ifnet_head_lock_shared();
459 	ifp = ifindex2ifnet[name[0]];
460 	ifnet_head_done();
461 	if (ifp == NULL) {
462 		goto out_locked;
463 	}
464 
465 	bzero(&igi_u, sizeof(igi_u));
466 
467 	LIST_FOREACH(igi, &igi_head, igi_link) {
468 		IGI_LOCK(igi);
469 		if (ifp != igi->igi_ifp) {
470 			IGI_UNLOCK(igi);
471 			continue;
472 		}
473 		igi_u.igi_ifindex = igi->igi_ifp->if_index;
474 		igi_u.igi_version = igi->igi_version;
475 		igi_u.igi_v1_timer = igi->igi_v1_timer;
476 		igi_u.igi_v2_timer = igi->igi_v2_timer;
477 		igi_u.igi_v3_timer = igi->igi_v3_timer;
478 		igi_u.igi_flags = igi->igi_flags;
479 		igi_u.igi_rv = igi->igi_rv;
480 		igi_u.igi_qi = igi->igi_qi;
481 		igi_u.igi_qri = igi->igi_qri;
482 		igi_u.igi_uri = igi->igi_uri;
483 		IGI_UNLOCK(igi);
484 
485 		error = SYSCTL_OUT(req, &igi_u, sizeof(igi_u));
486 		break;
487 	}
488 
489 out_locked:
490 	IGMP_UNLOCK();
491 	return error;
492 }
493 
494 /*
495  * Dispatch an entire queue of pending packet chains
496  *
497  * Must not be called with inm_lock held.
498  */
499 static void
igmp_dispatch_queue(struct igmp_ifinfo * igi,struct ifqueue * ifq,int limit,const int loop)500 igmp_dispatch_queue(struct igmp_ifinfo *igi, struct ifqueue *ifq, int limit,
501     const int loop)
502 {
503 	struct mbuf *m;
504 	struct ip *ip;
505 
506 	if (igi != NULL) {
507 		IGI_LOCK_ASSERT_HELD(igi);
508 	}
509 
510 #if SKYWALK
511 	/*
512 	 * Since this function is called holding the igi lock, we need to ensure we
513 	 * don't enter the driver directly because a deadlock can happen if another
514 	 * thread holding the workloop lock tries to acquire the igi lock at
515 	 * the same time.
516 	 */
517 	sk_protect_t protect = sk_async_transmit_protect();
518 #endif /* SKYWALK */
519 
520 	for (;;) {
521 		IF_DEQUEUE(ifq, m);
522 		if (m == NULL) {
523 			break;
524 		}
525 		IGMP_PRINTF(("%s: dispatch 0x%llx from 0x%llx\n", __func__,
526 		    (uint64_t)VM_KERNEL_ADDRPERM(ifq),
527 		    (uint64_t)VM_KERNEL_ADDRPERM(m)));
528 		ip = mtod(m, struct ip *);
529 		if (loop) {
530 			m->m_flags |= M_IGMP_LOOP;
531 		}
532 		if (igi != NULL) {
533 			IGI_UNLOCK(igi);
534 		}
535 		igmp_sendpkt(m);
536 		if (igi != NULL) {
537 			IGI_LOCK(igi);
538 		}
539 		if (--limit == 0) {
540 			break;
541 		}
542 	}
543 
544 #if SKYWALK
545 	sk_async_transmit_unprotect(protect);
546 #endif /* SKYWALK */
547 
548 	if (igi != NULL) {
549 		IGI_LOCK_ASSERT_HELD(igi);
550 	}
551 }
552 
553 /*
554  * Filter outgoing IGMP report state by group.
555  *
556  * Reports are ALWAYS suppressed for ALL-HOSTS (224.0.0.1).
557  * If the net.inet.igmp.sendlocal sysctl is 0, then IGMP reports are
558  * disabled for all groups in the 224.0.0.0/24 link-local scope. However,
559  * this may break certain IGMP snooping switches which rely on the old
560  * report behaviour.
561  *
562  * Return zero if the given group is one for which IGMP reports
563  * should be suppressed, or non-zero if reports should be issued.
564  */
565 
566 static __inline__
567 int
igmp_isgroupreported(const struct in_addr addr)568 igmp_isgroupreported(const struct in_addr addr)
569 {
570 	if (in_allhosts(addr) ||
571 	    ((!igmp_sendlocal && IN_LOCAL_GROUP(ntohl(addr.s_addr))))) {
572 		return 0;
573 	}
574 
575 	return 1;
576 }
577 
578 /*
579  * Construct a Router Alert option to use in outgoing packets.
580  */
581 static struct mbuf *
igmp_ra_alloc(void)582 igmp_ra_alloc(void)
583 {
584 	struct mbuf     *m;
585 	struct ipoption *p;
586 
587 	MGET(m, M_WAITOK, MT_DATA);
588 	p = mtod(m, struct ipoption *);
589 	p->ipopt_dst.s_addr = INADDR_ANY;
590 	p->ipopt_list[0] = (char)IPOPT_RA;      /* Router Alert Option */
591 	p->ipopt_list[1] = 0x04;        /* 4 bytes long */
592 	p->ipopt_list[2] = IPOPT_EOL;   /* End of IP option list */
593 	p->ipopt_list[3] = 0x00;        /* pad byte */
594 	m->m_len = sizeof(p->ipopt_dst) + p->ipopt_list[1];
595 
596 	return m;
597 }
598 
599 /*
600  * Attach IGMP when PF_INET is attached to an interface.
601  */
602 struct igmp_ifinfo *
igmp_domifattach(struct ifnet * ifp,zalloc_flags_t how)603 igmp_domifattach(struct ifnet *ifp, zalloc_flags_t how)
604 {
605 	struct igmp_ifinfo *igi;
606 
607 	IGMP_PRINTF(("%s: called for ifp 0x%llx(%s)\n",
608 	    __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), ifp->if_name));
609 
610 	igi = igi_alloc(how);
611 	if (igi == NULL) {
612 		return NULL;
613 	}
614 
615 	IGMP_LOCK();
616 
617 	IGI_LOCK(igi);
618 	igi_initvar(igi, ifp, 0);
619 	igi->igi_debug |= IFD_ATTACHED;
620 	IGI_ADDREF_LOCKED(igi); /* hold a reference for igi_head */
621 	IGI_ADDREF_LOCKED(igi); /* hold a reference for caller */
622 	IGI_UNLOCK(igi);
623 	ifnet_lock_shared(ifp);
624 	igmp_initsilent(ifp, igi);
625 	ifnet_lock_done(ifp);
626 
627 	LIST_INSERT_HEAD(&igi_head, igi, igi_link);
628 
629 	IGMP_UNLOCK();
630 
631 	IGMP_PRINTF(("%s: allocate igmp_ifinfo for ifp 0x%llx(%s)\n", __func__,
632 	    (uint64_t)VM_KERNEL_ADDRPERM(ifp), ifp->if_name));
633 
634 	return igi;
635 }
636 
637 /*
638  * Attach IGMP when PF_INET is reattached to an interface.  Caller is
639  * expected to have an outstanding reference to the igi.
640  */
641 void
igmp_domifreattach(struct igmp_ifinfo * igi)642 igmp_domifreattach(struct igmp_ifinfo *igi)
643 {
644 	struct ifnet *ifp;
645 
646 	IGMP_LOCK();
647 
648 	IGI_LOCK(igi);
649 	VERIFY(!(igi->igi_debug & IFD_ATTACHED));
650 	ifp = igi->igi_ifp;
651 	VERIFY(ifp != NULL);
652 	igi_initvar(igi, ifp, 1);
653 	igi->igi_debug |= IFD_ATTACHED;
654 	IGI_ADDREF_LOCKED(igi); /* hold a reference for igi_head */
655 	IGI_UNLOCK(igi);
656 	ifnet_lock_shared(ifp);
657 	igmp_initsilent(ifp, igi);
658 	ifnet_lock_done(ifp);
659 
660 	LIST_INSERT_HEAD(&igi_head, igi, igi_link);
661 
662 	IGMP_UNLOCK();
663 
664 	IGMP_PRINTF(("%s: reattached igmp_ifinfo for ifp 0x%llx(%s)\n",
665 	    __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), ifp->if_name));
666 }
667 
668 /*
669  * Hook for domifdetach.
670  */
671 void
igmp_domifdetach(struct ifnet * ifp)672 igmp_domifdetach(struct ifnet *ifp)
673 {
674 	SLIST_HEAD(, in_multi) inm_dthead;
675 
676 	SLIST_INIT(&inm_dthead);
677 
678 	IGMP_PRINTF(("%s: called for ifp 0x%llx(%s%d)\n", __func__,
679 	    (uint64_t)VM_KERNEL_ADDRPERM(ifp), ifp->if_name, ifp->if_unit));
680 
681 	IGMP_LOCK();
682 	igi_delete(ifp, (struct igmp_inm_relhead *)&inm_dthead);
683 	IGMP_UNLOCK();
684 
685 	/* Now that we're dropped all locks, release detached records */
686 	IGMP_REMOVE_DETACHED_INM(&inm_dthead);
687 }
688 
689 /*
690  * Called at interface detach time.  Note that we only flush all deferred
691  * responses and record releases; all remaining inm records and their source
692  * entries related to this interface are left intact, in order to handle
693  * the reattach case.
694  */
695 static void
igi_delete(const struct ifnet * ifp,struct igmp_inm_relhead * inm_dthead)696 igi_delete(const struct ifnet *ifp, struct igmp_inm_relhead *inm_dthead)
697 {
698 	struct igmp_ifinfo *igi, *tigi;
699 
700 	IGMP_LOCK_ASSERT_HELD();
701 
702 	LIST_FOREACH_SAFE(igi, &igi_head, igi_link, tigi) {
703 		IGI_LOCK(igi);
704 		if (igi->igi_ifp == ifp) {
705 			/*
706 			 * Free deferred General Query responses.
707 			 */
708 			IF_DRAIN(&igi->igi_gq);
709 			IF_DRAIN(&igi->igi_v2q);
710 			igmp_flush_relq(igi, inm_dthead);
711 			igi->igi_debug &= ~IFD_ATTACHED;
712 			IGI_UNLOCK(igi);
713 
714 			LIST_REMOVE(igi, igi_link);
715 			IGI_REMREF(igi); /* release igi_head reference */
716 			return;
717 		}
718 		IGI_UNLOCK(igi);
719 	}
720 	panic("%s: igmp_ifinfo not found for ifp %p(%s)", __func__,
721 	    ifp, ifp->if_xname);
722 }
723 
724 __private_extern__ void
igmp_initsilent(struct ifnet * ifp,struct igmp_ifinfo * igi)725 igmp_initsilent(struct ifnet *ifp, struct igmp_ifinfo *igi)
726 {
727 	ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
728 
729 	IGI_LOCK_ASSERT_NOTHELD(igi);
730 	IGI_LOCK(igi);
731 	if (!(ifp->if_flags & IFF_MULTICAST)) {
732 		igi->igi_flags |= IGIF_SILENT;
733 	} else {
734 		igi->igi_flags &= ~IGIF_SILENT;
735 	}
736 	IGI_UNLOCK(igi);
737 }
738 
739 static void
igi_initvar(struct igmp_ifinfo * igi,struct ifnet * ifp,int reattach)740 igi_initvar(struct igmp_ifinfo *igi, struct ifnet *ifp, int reattach)
741 {
742 	IGI_LOCK_ASSERT_HELD(igi);
743 
744 	igi->igi_ifp = ifp;
745 	igi->igi_version = igmp_default_version;
746 	igi->igi_flags = 0;
747 	igi->igi_rv = IGMP_RV_INIT;
748 	igi->igi_qi = IGMP_QI_INIT;
749 	igi->igi_qri = IGMP_QRI_INIT;
750 	igi->igi_uri = IGMP_URI_INIT;
751 
752 	if (!reattach) {
753 		SLIST_INIT(&igi->igi_relinmhead);
754 	}
755 
756 	/*
757 	 * Responses to general queries are subject to bounds.
758 	 */
759 	igi->igi_gq.ifq_maxlen =  IGMP_MAX_RESPONSE_PACKETS;
760 	igi->igi_v2q.ifq_maxlen = IGMP_MAX_RESPONSE_PACKETS;
761 }
762 
763 static struct igmp_ifinfo *
igi_alloc(zalloc_flags_t how)764 igi_alloc(zalloc_flags_t how)
765 {
766 	struct igmp_ifinfo *igi = zalloc_flags(igi_zone, how | Z_ZERO);
767 	if (igi != NULL) {
768 		lck_mtx_init(&igi->igi_lock, &igmp_mtx_grp, &igmp_mtx_attr);
769 		igi->igi_debug |= IFD_ALLOC;
770 	}
771 	return igi;
772 }
773 
774 static void
igi_free(struct igmp_ifinfo * igi)775 igi_free(struct igmp_ifinfo *igi)
776 {
777 	IGI_LOCK(igi);
778 	if (igi->igi_debug & IFD_ATTACHED) {
779 		panic("%s: attached igi=%p is being freed", __func__, igi);
780 		/* NOTREACHED */
781 	} else if (igi->igi_ifp != NULL) {
782 		panic("%s: ifp not NULL for igi=%p", __func__, igi);
783 		/* NOTREACHED */
784 	} else if (!(igi->igi_debug & IFD_ALLOC)) {
785 		panic("%s: igi %p cannot be freed", __func__, igi);
786 		/* NOTREACHED */
787 	} else if (igi->igi_refcnt != 0) {
788 		panic("%s: non-zero refcnt igi=%p", __func__, igi);
789 		/* NOTREACHED */
790 	}
791 	igi->igi_debug &= ~IFD_ALLOC;
792 	IGI_UNLOCK(igi);
793 
794 	lck_mtx_destroy(&igi->igi_lock, &igmp_mtx_grp);
795 	zfree(igi_zone, igi);
796 }
797 
798 void
igi_addref(struct igmp_ifinfo * igi,int locked)799 igi_addref(struct igmp_ifinfo *igi, int locked)
800 {
801 	if (!locked) {
802 		IGI_LOCK_SPIN(igi);
803 	} else {
804 		IGI_LOCK_ASSERT_HELD(igi);
805 	}
806 
807 	if (++igi->igi_refcnt == 0) {
808 		panic("%s: igi=%p wraparound refcnt", __func__, igi);
809 		/* NOTREACHED */
810 	}
811 	if (!locked) {
812 		IGI_UNLOCK(igi);
813 	}
814 }
815 
816 void
igi_remref(struct igmp_ifinfo * igi)817 igi_remref(struct igmp_ifinfo *igi)
818 {
819 	SLIST_HEAD(, in_multi) inm_dthead;
820 	struct ifnet *ifp;
821 
822 	IGI_LOCK_SPIN(igi);
823 
824 	if (igi->igi_refcnt == 0) {
825 		panic("%s: igi=%p negative refcnt", __func__, igi);
826 		/* NOTREACHED */
827 	}
828 
829 	--igi->igi_refcnt;
830 	if (igi->igi_refcnt > 0) {
831 		IGI_UNLOCK(igi);
832 		return;
833 	}
834 
835 	ifp = igi->igi_ifp;
836 	igi->igi_ifp = NULL;
837 	IF_DRAIN(&igi->igi_gq);
838 	IF_DRAIN(&igi->igi_v2q);
839 	SLIST_INIT(&inm_dthead);
840 	igmp_flush_relq(igi, (struct igmp_inm_relhead *)&inm_dthead);
841 	IGI_UNLOCK(igi);
842 
843 	/* Now that we're dropped all locks, release detached records */
844 	IGMP_REMOVE_DETACHED_INM(&inm_dthead);
845 
846 	IGMP_PRINTF(("%s: freeing igmp_ifinfo for ifp 0x%llx(%s)\n",
847 	    __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
848 
849 	igi_free(igi);
850 }
851 
852 /*
853  * Process a received IGMPv1 query.
854  * Return non-zero if the message should be dropped.
855  */
856 static int
igmp_input_v1_query(struct ifnet * ifp,const struct ip * ip,const struct igmp * igmp)857 igmp_input_v1_query(struct ifnet *ifp, const struct ip *ip,
858     const struct igmp *igmp)
859 {
860 	struct igmp_ifinfo      *igi;
861 	struct in_multi         *inm;
862 	struct in_multistep     step;
863 	struct igmp_tparams     itp = { .qpt = 0, .it = 0, .cst = 0, .sct = 0 };
864 
865 	IGMP_LOCK_ASSERT_NOTHELD();
866 
867 	/*
868 	 * IGMPv1 Host Membership Queries SHOULD always be addressed to
869 	 * 224.0.0.1. They are always treated as General Queries.
870 	 * igmp_group is always ignored. Do not drop it as a userland
871 	 * daemon may wish to see it.
872 	 */
873 	if (!in_allhosts(ip->ip_dst) || !in_nullhost(igmp->igmp_group)) {
874 		IGMPSTAT_INC(igps_rcv_badqueries);
875 		OIGMPSTAT_INC(igps_rcv_badqueries);
876 		goto done;
877 	}
878 	IGMPSTAT_INC(igps_rcv_gen_queries);
879 
880 	igi = IGMP_IFINFO(ifp);
881 	VERIFY(igi != NULL);
882 
883 	IGI_LOCK(igi);
884 	if (igi->igi_flags & IGIF_LOOPBACK) {
885 		IGMP_PRINTF(("%s: ignore v1 query on IGIF_LOOPBACK "
886 		    "ifp 0x%llx(%s)\n", __func__,
887 		    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
888 		IGI_UNLOCK(igi);
889 		goto done;
890 	}
891 	/*
892 	 * Switch to IGMPv1 host compatibility mode.
893 	 */
894 	itp.qpt = igmp_set_version(igi, IGMP_VERSION_1);
895 	IGI_UNLOCK(igi);
896 
897 	IGMP_PRINTF(("%s: process v1 query on ifp 0x%llx(%s)\n", __func__,
898 	    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
899 
900 	/*
901 	 * Start the timers in all of our group records
902 	 * for the interface on which the query arrived,
903 	 * except those which are already running.
904 	 */
905 	in_multihead_lock_shared();
906 	IN_FIRST_MULTI(step, inm);
907 	while (inm != NULL) {
908 		INM_LOCK(inm);
909 		if (inm->inm_ifp != ifp || inm->inm_timer != 0) {
910 			goto next;
911 		}
912 
913 		switch (inm->inm_state) {
914 		case IGMP_NOT_MEMBER:
915 		case IGMP_SILENT_MEMBER:
916 			break;
917 		case IGMP_G_QUERY_PENDING_MEMBER:
918 		case IGMP_SG_QUERY_PENDING_MEMBER:
919 		case IGMP_REPORTING_MEMBER:
920 		case IGMP_IDLE_MEMBER:
921 		case IGMP_LAZY_MEMBER:
922 		case IGMP_SLEEPING_MEMBER:
923 		case IGMP_AWAKENING_MEMBER:
924 			inm->inm_state = IGMP_REPORTING_MEMBER;
925 			inm->inm_timer = IGMP_RANDOM_DELAY(IGMP_V1V2_MAX_RI);
926 			itp.cst = 1;
927 			break;
928 		case IGMP_LEAVING_MEMBER:
929 			break;
930 		}
931 next:
932 		INM_UNLOCK(inm);
933 		IN_NEXT_MULTI(step, inm);
934 	}
935 	in_multihead_lock_done();
936 done:
937 	igmp_set_timeout(&itp);
938 
939 	return 0;
940 }
941 
942 /*
943  * Process a received IGMPv2 general or group-specific query.
944  */
945 static int
igmp_input_v2_query(struct ifnet * ifp,const struct ip * ip,const struct igmp * igmp)946 igmp_input_v2_query(struct ifnet *ifp, const struct ip *ip,
947     const struct igmp *igmp)
948 {
949 	struct igmp_ifinfo      *igi;
950 	struct in_multi         *inm;
951 	int                      is_general_query;
952 	uint16_t                 timer;
953 	struct igmp_tparams      itp = { .qpt = 0, .it = 0, .cst = 0, .sct = 0 };
954 
955 	IGMP_LOCK_ASSERT_NOTHELD();
956 
957 	is_general_query = 0;
958 
959 	/*
960 	 * Validate address fields upfront.
961 	 */
962 	if (in_nullhost(igmp->igmp_group)) {
963 		/*
964 		 * IGMPv2 General Query.
965 		 * If this was not sent to the all-hosts group, ignore it.
966 		 */
967 		if (!in_allhosts(ip->ip_dst)) {
968 			goto done;
969 		}
970 		IGMPSTAT_INC(igps_rcv_gen_queries);
971 		is_general_query = 1;
972 	} else {
973 		/* IGMPv2 Group-Specific Query. */
974 		IGMPSTAT_INC(igps_rcv_group_queries);
975 	}
976 
977 	igi = IGMP_IFINFO(ifp);
978 	VERIFY(igi != NULL);
979 
980 	IGI_LOCK(igi);
981 	if (igi->igi_flags & IGIF_LOOPBACK) {
982 		IGMP_PRINTF(("%s: ignore v2 query on IGIF_LOOPBACK "
983 		    "ifp 0x%llx(%s)\n", __func__,
984 		    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
985 		IGI_UNLOCK(igi);
986 		goto done;
987 	}
988 	/*
989 	 * Ignore v2 query if in v1 Compatibility Mode.
990 	 */
991 	if (igi->igi_version == IGMP_VERSION_1) {
992 		IGI_UNLOCK(igi);
993 		goto done;
994 	}
995 	itp.qpt = igmp_set_version(igi, IGMP_VERSION_2);
996 	IGI_UNLOCK(igi);
997 
998 	timer = igmp->igmp_code / IGMP_TIMER_SCALE;
999 	if (timer == 0) {
1000 		timer = 1;
1001 	}
1002 
1003 	if (is_general_query) {
1004 		struct in_multistep step;
1005 
1006 		IGMP_PRINTF(("%s: process v2 general query on ifp 0x%llx(%s)\n",
1007 		    __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1008 		/*
1009 		 * For each reporting group joined on this
1010 		 * interface, kick the report timer.
1011 		 */
1012 		in_multihead_lock_shared();
1013 		IN_FIRST_MULTI(step, inm);
1014 		while (inm != NULL) {
1015 			INM_LOCK(inm);
1016 			if (inm->inm_ifp == ifp) {
1017 				itp.cst += igmp_v2_update_group(inm, timer);
1018 			}
1019 			INM_UNLOCK(inm);
1020 			IN_NEXT_MULTI(step, inm);
1021 		}
1022 		in_multihead_lock_done();
1023 	} else {
1024 		/*
1025 		 * Group-specific IGMPv2 query, we need only
1026 		 * look up the single group to process it.
1027 		 */
1028 		in_multihead_lock_shared();
1029 		IN_LOOKUP_MULTI(&igmp->igmp_group, ifp, inm);
1030 		in_multihead_lock_done();
1031 		if (inm != NULL) {
1032 			INM_LOCK(inm);
1033 			IGMP_INET_PRINTF(igmp->igmp_group,
1034 			    ("process v2 query %s on ifp 0x%llx(%s)\n",
1035 			    _igmp_inet_buf,
1036 			    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1037 			itp.cst = igmp_v2_update_group(inm, timer);
1038 			INM_UNLOCK(inm);
1039 			INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1040 		}
1041 	}
1042 done:
1043 	igmp_set_timeout(&itp);
1044 
1045 	return 0;
1046 }
1047 
1048 /*
1049  * Update the report timer on a group in response to an IGMPv2 query.
1050  *
1051  * If we are becoming the reporting member for this group, start the timer.
1052  * If we already are the reporting member for this group, and timer is
1053  * below the threshold, reset it.
1054  *
1055  * We may be updating the group for the first time since we switched
1056  * to IGMPv3. If we are, then we must clear any recorded source lists,
1057  * and transition to REPORTING state; the group timer is overloaded
1058  * for group and group-source query responses.
1059  *
1060  * Unlike IGMPv3, the delay per group should be jittered
1061  * to avoid bursts of IGMPv2 reports.
1062  */
1063 static uint32_t
igmp_v2_update_group(struct in_multi * inm,const int timer)1064 igmp_v2_update_group(struct in_multi *inm, const int timer)
1065 {
1066 	IGMP_INET_PRINTF(inm->inm_addr, ("%s: %s/%s timer=%d\n",
1067 	    __func__, _igmp_inet_buf, if_name(inm->inm_ifp),
1068 	    timer));
1069 
1070 	INM_LOCK_ASSERT_HELD(inm);
1071 
1072 	switch (inm->inm_state) {
1073 	case IGMP_NOT_MEMBER:
1074 	case IGMP_SILENT_MEMBER:
1075 		break;
1076 	case IGMP_REPORTING_MEMBER:
1077 		if (inm->inm_timer != 0 &&
1078 		    inm->inm_timer <= timer) {
1079 			IGMP_PRINTF(("%s: REPORTING and timer running, "
1080 			    "skipping.\n", __func__));
1081 			break;
1082 		}
1083 		OS_FALLTHROUGH;
1084 	case IGMP_SG_QUERY_PENDING_MEMBER:
1085 	case IGMP_G_QUERY_PENDING_MEMBER:
1086 	case IGMP_IDLE_MEMBER:
1087 	case IGMP_LAZY_MEMBER:
1088 	case IGMP_AWAKENING_MEMBER:
1089 		IGMP_PRINTF(("%s: ->REPORTING\n", __func__));
1090 		inm->inm_state = IGMP_REPORTING_MEMBER;
1091 		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1092 		break;
1093 	case IGMP_SLEEPING_MEMBER:
1094 		IGMP_PRINTF(("%s: ->AWAKENING\n", __func__));
1095 		inm->inm_state = IGMP_AWAKENING_MEMBER;
1096 		break;
1097 	case IGMP_LEAVING_MEMBER:
1098 		break;
1099 	}
1100 
1101 	return inm->inm_timer;
1102 }
1103 
1104 /*
1105  * Process a received IGMPv3 general, group-specific or
1106  * group-and-source-specific query.
1107  * Assumes m has already been pulled up to the full IGMP message length.
1108  * Return 0 if successful, otherwise an appropriate error code is returned.
1109  */
1110 static int
igmp_input_v3_query(struct ifnet * ifp,const struct ip * ip,struct igmpv3 * igmpv3)1111 igmp_input_v3_query(struct ifnet *ifp, const struct ip *ip,
1112     /*const*/ struct igmpv3 *igmpv3)
1113 {
1114 	struct igmp_ifinfo      *igi;
1115 	struct in_multi         *inm;
1116 	int                      is_general_query;
1117 	uint32_t                 maxresp, nsrc, qqi;
1118 	uint32_t                 timer;
1119 	uint8_t                  qrv;
1120 	struct igmp_tparams      itp = { .qpt = 0, .it = 0, .cst = 0, .sct = 0 };
1121 
1122 	IGMP_LOCK_ASSERT_NOTHELD();
1123 
1124 	is_general_query = 0;
1125 
1126 	IGMP_PRINTF(("%s: process v3 query on ifp 0x%llx(%s)\n", __func__,
1127 	    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1128 
1129 	maxresp = igmpv3->igmp_code;    /* in 1/10ths of a second */
1130 	if (maxresp >= 128) {
1131 		maxresp = IGMP_MANT(igmpv3->igmp_code) <<
1132 		    (IGMP_EXP(igmpv3->igmp_code) + 3);
1133 	}
1134 
1135 	/*
1136 	 * Robustness must never be less than 2 for on-wire IGMPv3.
1137 	 * FUTURE: Check if ifp has IGIF_LOOPBACK set, as we will make
1138 	 * an exception for interfaces whose IGMPv3 state changes
1139 	 * are redirected to loopback (e.g. MANET).
1140 	 */
1141 	qrv = IGMP_QRV(igmpv3->igmp_misc);
1142 	if (qrv < 2) {
1143 		IGMP_PRINTF(("%s: clamping qrv %d to %d\n", __func__,
1144 		    qrv, IGMP_RV_INIT));
1145 		qrv = IGMP_RV_INIT;
1146 	}
1147 
1148 	qqi = igmpv3->igmp_qqi;
1149 	if (qqi >= 128) {
1150 		qqi = IGMP_MANT(igmpv3->igmp_qqi) <<
1151 		    (IGMP_EXP(igmpv3->igmp_qqi) + 3);
1152 	}
1153 
1154 	timer = maxresp / IGMP_TIMER_SCALE;
1155 	if (timer == 0) {
1156 		timer = 1;
1157 	}
1158 
1159 	nsrc = ntohs(igmpv3->igmp_numsrc);
1160 
1161 	/*
1162 	 * Validate address fields and versions upfront before
1163 	 * accepting v3 query.
1164 	 */
1165 	if (in_nullhost(igmpv3->igmp_group)) {
1166 		/*
1167 		 * IGMPv3 General Query.
1168 		 *
1169 		 * General Queries SHOULD be directed to 224.0.0.1.
1170 		 * A general query with a source list has undefined
1171 		 * behaviour; discard it.
1172 		 */
1173 		IGMPSTAT_INC(igps_rcv_gen_queries);
1174 		if (!in_allhosts(ip->ip_dst) || nsrc > 0) {
1175 			IGMPSTAT_INC(igps_rcv_badqueries);
1176 			OIGMPSTAT_INC(igps_rcv_badqueries);
1177 			goto done;
1178 		}
1179 		is_general_query = 1;
1180 	} else {
1181 		/* Group or group-source specific query. */
1182 		if (nsrc == 0) {
1183 			IGMPSTAT_INC(igps_rcv_group_queries);
1184 		} else {
1185 			IGMPSTAT_INC(igps_rcv_gsr_queries);
1186 		}
1187 	}
1188 
1189 	igi = IGMP_IFINFO(ifp);
1190 	VERIFY(igi != NULL);
1191 
1192 	IGI_LOCK(igi);
1193 	if (igi->igi_flags & IGIF_LOOPBACK) {
1194 		IGMP_PRINTF(("%s: ignore v3 query on IGIF_LOOPBACK "
1195 		    "ifp 0x%llx(%s)\n", __func__,
1196 		    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1197 		IGI_UNLOCK(igi);
1198 		goto done;
1199 	}
1200 
1201 	/*
1202 	 * Discard the v3 query if we're in Compatibility Mode.
1203 	 * The RFC is not obviously worded that hosts need to stay in
1204 	 * compatibility mode until the Old Version Querier Present
1205 	 * timer expires.
1206 	 */
1207 	if (igi->igi_version != IGMP_VERSION_3) {
1208 		IGMP_PRINTF(("%s: ignore v3 query in v%d mode on "
1209 		    "ifp 0x%llx(%s)\n", __func__, igi->igi_version,
1210 		    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1211 		IGI_UNLOCK(igi);
1212 		goto done;
1213 	}
1214 
1215 	itp.qpt = igmp_set_version(igi, IGMP_VERSION_3);
1216 	igi->igi_rv = qrv;
1217 	igi->igi_qi = qqi;
1218 	igi->igi_qri = MAX(timer, IGMP_QRI_MIN);
1219 
1220 	IGMP_PRINTF(("%s: qrv %d qi %d qri %d\n", __func__, igi->igi_rv,
1221 	    igi->igi_qi, igi->igi_qri));
1222 
1223 	if (is_general_query) {
1224 		/*
1225 		 * Schedule a current-state report on this ifp for
1226 		 * all groups, possibly containing source lists.
1227 		 * If there is a pending General Query response
1228 		 * scheduled earlier than the selected delay, do
1229 		 * not schedule any other reports.
1230 		 * Otherwise, reset the interface timer.
1231 		 */
1232 		IGMP_PRINTF(("%s: process v3 general query on ifp 0x%llx(%s)\n",
1233 		    __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1234 		if (igi->igi_v3_timer == 0 || igi->igi_v3_timer >= timer) {
1235 			itp.it = igi->igi_v3_timer = IGMP_RANDOM_DELAY(timer);
1236 		}
1237 		IGI_UNLOCK(igi);
1238 	} else {
1239 		IGI_UNLOCK(igi);
1240 		/*
1241 		 * Group-source-specific queries are throttled on
1242 		 * a per-group basis to defeat denial-of-service attempts.
1243 		 * Queries for groups we are not a member of on this
1244 		 * link are simply ignored.
1245 		 */
1246 		in_multihead_lock_shared();
1247 		IN_LOOKUP_MULTI(&igmpv3->igmp_group, ifp, inm);
1248 		in_multihead_lock_done();
1249 		if (inm == NULL) {
1250 			goto done;
1251 		}
1252 
1253 		INM_LOCK(inm);
1254 		if (nsrc > 0) {
1255 			if (!ratecheck(&inm->inm_lastgsrtv,
1256 			    &igmp_gsrdelay)) {
1257 				IGMP_PRINTF(("%s: GS query throttled.\n",
1258 				    __func__));
1259 				IGMPSTAT_INC(igps_drop_gsr_queries);
1260 				INM_UNLOCK(inm);
1261 				INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1262 				goto done;
1263 			}
1264 		}
1265 		IGMP_INET_PRINTF(igmpv3->igmp_group,
1266 		    ("process v3 %s query on ifp 0x%llx(%s)\n", _igmp_inet_buf,
1267 		    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1268 		/*
1269 		 * If there is a pending General Query response
1270 		 * scheduled sooner than the selected delay, no
1271 		 * further report need be scheduled.
1272 		 * Otherwise, prepare to respond to the
1273 		 * group-specific or group-and-source query.
1274 		 */
1275 		IGI_LOCK(igi);
1276 		itp.it = igi->igi_v3_timer;
1277 		IGI_UNLOCK(igi);
1278 		if (itp.it == 0 || itp.it >= timer) {
1279 			(void) igmp_input_v3_group_query(inm, timer, igmpv3);
1280 			itp.cst = inm->inm_timer;
1281 		}
1282 		INM_UNLOCK(inm);
1283 		INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1284 	}
1285 done:
1286 	if (itp.it > 0) {
1287 		IGMP_PRINTF(("%s: v3 general query response scheduled in "
1288 		    "T+%d seconds on ifp 0x%llx(%s)\n", __func__, itp.it,
1289 		    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1290 	}
1291 	igmp_set_timeout(&itp);
1292 
1293 	return 0;
1294 }
1295 
1296 /*
1297  * Process a recieved IGMPv3 group-specific or group-and-source-specific
1298  * query.
1299  * Return <0 if any error occured. Currently this is ignored.
1300  */
1301 static int
igmp_input_v3_group_query(struct in_multi * inm,int timer,struct igmpv3 * igmpv3)1302 igmp_input_v3_group_query(struct in_multi *inm,
1303     int timer, /*const*/ struct igmpv3 *igmpv3)
1304 {
1305 	int                      retval;
1306 	uint16_t                 nsrc;
1307 
1308 	INM_LOCK_ASSERT_HELD(inm);
1309 
1310 	retval = 0;
1311 
1312 	switch (inm->inm_state) {
1313 	case IGMP_NOT_MEMBER:
1314 	case IGMP_SILENT_MEMBER:
1315 	case IGMP_SLEEPING_MEMBER:
1316 	case IGMP_LAZY_MEMBER:
1317 	case IGMP_AWAKENING_MEMBER:
1318 	case IGMP_IDLE_MEMBER:
1319 	case IGMP_LEAVING_MEMBER:
1320 		return retval;
1321 	case IGMP_REPORTING_MEMBER:
1322 	case IGMP_G_QUERY_PENDING_MEMBER:
1323 	case IGMP_SG_QUERY_PENDING_MEMBER:
1324 		break;
1325 	}
1326 
1327 	nsrc = ntohs(igmpv3->igmp_numsrc);
1328 
1329 	/*
1330 	 * Deal with group-specific queries upfront.
1331 	 * If any group query is already pending, purge any recorded
1332 	 * source-list state if it exists, and schedule a query response
1333 	 * for this group-specific query.
1334 	 */
1335 	if (nsrc == 0) {
1336 		if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER ||
1337 		    inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) {
1338 			inm_clear_recorded(inm);
1339 			timer = min(inm->inm_timer, timer);
1340 		}
1341 		inm->inm_state = IGMP_G_QUERY_PENDING_MEMBER;
1342 		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1343 		return retval;
1344 	}
1345 
1346 	/*
1347 	 * Deal with the case where a group-and-source-specific query has
1348 	 * been received but a group-specific query is already pending.
1349 	 */
1350 	if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER) {
1351 		timer = min(inm->inm_timer, timer);
1352 		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1353 		return retval;
1354 	}
1355 
1356 	/*
1357 	 * Finally, deal with the case where a group-and-source-specific
1358 	 * query has been received, where a response to a previous g-s-r
1359 	 * query exists, or none exists.
1360 	 * In this case, we need to parse the source-list which the Querier
1361 	 * has provided us with and check if we have any source list filter
1362 	 * entries at T1 for these sources. If we do not, there is no need
1363 	 * schedule a report and the query may be dropped.
1364 	 * If we do, we must record them and schedule a current-state
1365 	 * report for those sources.
1366 	 * FIXME: Handling source lists larger than 1 mbuf requires that
1367 	 * we pass the mbuf chain pointer down to this function, and use
1368 	 * m_getptr() to walk the chain.
1369 	 */
1370 	if (inm->inm_nsrc > 0) {
1371 		const struct in_addr    *ap;
1372 		int                      i, nrecorded;
1373 
1374 		ap = (const struct in_addr *)(igmpv3 + 1);
1375 		nrecorded = 0;
1376 		for (i = 0; i < nsrc; i++, ap++) {
1377 			retval = inm_record_source(inm, ap->s_addr);
1378 			if (retval < 0) {
1379 				break;
1380 			}
1381 			nrecorded += retval;
1382 		}
1383 		if (nrecorded > 0) {
1384 			IGMP_PRINTF(("%s: schedule response to SG query\n",
1385 			    __func__));
1386 			inm->inm_state = IGMP_SG_QUERY_PENDING_MEMBER;
1387 			inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1388 		}
1389 	}
1390 
1391 	return retval;
1392 }
1393 
1394 /*
1395  * Process a received IGMPv1 host membership report.
1396  *
1397  * NOTE: 0.0.0.0 workaround breaks const correctness.
1398  */
1399 static int
igmp_input_v1_report(struct ifnet * ifp,struct mbuf * m,struct ip * ip,struct igmp * igmp)1400 igmp_input_v1_report(struct ifnet *ifp, struct mbuf *m, /*const*/ struct ip *ip,
1401     /*const*/ struct igmp *igmp)
1402 {
1403 	struct in_ifaddr *ia;
1404 	struct in_multi *inm;
1405 
1406 	IGMPSTAT_INC(igps_rcv_reports);
1407 	OIGMPSTAT_INC(igps_rcv_reports);
1408 
1409 	if ((ifp->if_flags & IFF_LOOPBACK) ||
1410 	    (m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
1411 		return 0;
1412 	}
1413 
1414 	if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr) ||
1415 	    !in_hosteq(igmp->igmp_group, ip->ip_dst))) {
1416 		IGMPSTAT_INC(igps_rcv_badreports);
1417 		OIGMPSTAT_INC(igps_rcv_badreports);
1418 		return EINVAL;
1419 	}
1420 
1421 	/*
1422 	 * RFC 3376, Section 4.2.13, 9.2, 9.3:
1423 	 * Booting clients may use the source address 0.0.0.0. Some
1424 	 * IGMP daemons may not know how to use IP_RECVIF to determine
1425 	 * the interface upon which this message was received.
1426 	 * Replace 0.0.0.0 with the subnet address if told to do so.
1427 	 */
1428 	if (igmp_recvifkludge && in_nullhost(ip->ip_src)) {
1429 		IFP_TO_IA(ifp, ia);
1430 		if (ia != NULL) {
1431 			IFA_LOCK(&ia->ia_ifa);
1432 			ip->ip_src.s_addr = htonl(ia->ia_subnet);
1433 			IFA_UNLOCK(&ia->ia_ifa);
1434 			IFA_REMREF(&ia->ia_ifa);
1435 		}
1436 	}
1437 
1438 	IGMP_INET_PRINTF(igmp->igmp_group,
1439 	    ("process v1 report %s on ifp 0x%llx(%s)\n", _igmp_inet_buf,
1440 	    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1441 
1442 	/*
1443 	 * IGMPv1 report suppression.
1444 	 * If we are a member of this group, and our membership should be
1445 	 * reported, stop our group timer and transition to the 'lazy' state.
1446 	 */
1447 	in_multihead_lock_shared();
1448 	IN_LOOKUP_MULTI(&igmp->igmp_group, ifp, inm);
1449 	in_multihead_lock_done();
1450 	if (inm != NULL) {
1451 		struct igmp_ifinfo *igi;
1452 
1453 		INM_LOCK(inm);
1454 
1455 		igi = inm->inm_igi;
1456 		VERIFY(igi != NULL);
1457 
1458 		IGMPSTAT_INC(igps_rcv_ourreports);
1459 		OIGMPSTAT_INC(igps_rcv_ourreports);
1460 
1461 		/*
1462 		 * If we are in IGMPv3 host mode, do not allow the
1463 		 * other host's IGMPv1 report to suppress our reports
1464 		 * unless explicitly configured to do so.
1465 		 */
1466 		IGI_LOCK(igi);
1467 		if (igi->igi_version == IGMP_VERSION_3) {
1468 			if (igmp_legacysupp) {
1469 				igmp_v3_suppress_group_record(inm);
1470 			}
1471 			IGI_UNLOCK(igi);
1472 			INM_UNLOCK(inm);
1473 			INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1474 			return 0;
1475 		}
1476 
1477 		INM_LOCK_ASSERT_HELD(inm);
1478 		inm->inm_timer = 0;
1479 
1480 		switch (inm->inm_state) {
1481 		case IGMP_NOT_MEMBER:
1482 		case IGMP_SILENT_MEMBER:
1483 			break;
1484 		case IGMP_IDLE_MEMBER:
1485 		case IGMP_LAZY_MEMBER:
1486 		case IGMP_AWAKENING_MEMBER:
1487 			IGMP_INET_PRINTF(igmp->igmp_group,
1488 			    ("report suppressed for %s on ifp 0x%llx(%s)\n",
1489 			    _igmp_inet_buf,
1490 			    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1491 			OS_FALLTHROUGH;
1492 		case IGMP_SLEEPING_MEMBER:
1493 			inm->inm_state = IGMP_SLEEPING_MEMBER;
1494 			break;
1495 		case IGMP_REPORTING_MEMBER:
1496 			IGMP_INET_PRINTF(igmp->igmp_group,
1497 			    ("report suppressed for %s on ifp 0x%llx(%s)\n",
1498 			    _igmp_inet_buf,
1499 			    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1500 			if (igi->igi_version == IGMP_VERSION_1) {
1501 				inm->inm_state = IGMP_LAZY_MEMBER;
1502 			} else if (igi->igi_version == IGMP_VERSION_2) {
1503 				inm->inm_state = IGMP_SLEEPING_MEMBER;
1504 			}
1505 			break;
1506 		case IGMP_G_QUERY_PENDING_MEMBER:
1507 		case IGMP_SG_QUERY_PENDING_MEMBER:
1508 		case IGMP_LEAVING_MEMBER:
1509 			break;
1510 		}
1511 		IGI_UNLOCK(igi);
1512 		INM_UNLOCK(inm);
1513 		INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1514 	}
1515 
1516 	return 0;
1517 }
1518 
1519 /*
1520  * Process a received IGMPv2 host membership report.
1521  *
1522  * NOTE: 0.0.0.0 workaround breaks const correctness.
1523  */
1524 static int
igmp_input_v2_report(struct ifnet * ifp,struct mbuf * m,struct ip * ip,struct igmp * igmp)1525 igmp_input_v2_report(struct ifnet *ifp, struct mbuf *m, /*const*/ struct ip *ip,
1526     /*const*/ struct igmp *igmp)
1527 {
1528 	struct in_ifaddr *ia;
1529 	struct in_multi *inm;
1530 
1531 	/*
1532 	 * Make sure we don't hear our own membership report.  Fast
1533 	 * leave requires knowing that we are the only member of a
1534 	 * group.
1535 	 */
1536 	IFP_TO_IA(ifp, ia);
1537 	if (ia != NULL) {
1538 		IFA_LOCK(&ia->ia_ifa);
1539 		if (in_hosteq(ip->ip_src, IA_SIN(ia)->sin_addr)) {
1540 			IFA_UNLOCK(&ia->ia_ifa);
1541 			IFA_REMREF(&ia->ia_ifa);
1542 			return 0;
1543 		}
1544 		IFA_UNLOCK(&ia->ia_ifa);
1545 	}
1546 
1547 	IGMPSTAT_INC(igps_rcv_reports);
1548 	OIGMPSTAT_INC(igps_rcv_reports);
1549 
1550 	if ((ifp->if_flags & IFF_LOOPBACK) ||
1551 	    (m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
1552 		if (ia != NULL) {
1553 			IFA_REMREF(&ia->ia_ifa);
1554 		}
1555 		return 0;
1556 	}
1557 
1558 	if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr)) ||
1559 	    !in_hosteq(igmp->igmp_group, ip->ip_dst)) {
1560 		if (ia != NULL) {
1561 			IFA_REMREF(&ia->ia_ifa);
1562 		}
1563 		IGMPSTAT_INC(igps_rcv_badreports);
1564 		OIGMPSTAT_INC(igps_rcv_badreports);
1565 		return EINVAL;
1566 	}
1567 
1568 	/*
1569 	 * RFC 3376, Section 4.2.13, 9.2, 9.3:
1570 	 * Booting clients may use the source address 0.0.0.0. Some
1571 	 * IGMP daemons may not know how to use IP_RECVIF to determine
1572 	 * the interface upon which this message was received.
1573 	 * Replace 0.0.0.0 with the subnet address if told to do so.
1574 	 */
1575 	if (igmp_recvifkludge && in_nullhost(ip->ip_src)) {
1576 		if (ia != NULL) {
1577 			IFA_LOCK(&ia->ia_ifa);
1578 			ip->ip_src.s_addr = htonl(ia->ia_subnet);
1579 			IFA_UNLOCK(&ia->ia_ifa);
1580 		}
1581 	}
1582 	if (ia != NULL) {
1583 		IFA_REMREF(&ia->ia_ifa);
1584 	}
1585 
1586 	IGMP_INET_PRINTF(igmp->igmp_group,
1587 	    ("process v2 report %s on ifp 0x%llx(%s)\n", _igmp_inet_buf,
1588 	    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1589 
1590 	/*
1591 	 * IGMPv2 report suppression.
1592 	 * If we are a member of this group, and our membership should be
1593 	 * reported, and our group timer is pending or about to be reset,
1594 	 * stop our group timer by transitioning to the 'lazy' state.
1595 	 */
1596 	in_multihead_lock_shared();
1597 	IN_LOOKUP_MULTI(&igmp->igmp_group, ifp, inm);
1598 	in_multihead_lock_done();
1599 	if (inm != NULL) {
1600 		struct igmp_ifinfo *igi;
1601 
1602 		INM_LOCK(inm);
1603 		igi = inm->inm_igi;
1604 		VERIFY(igi != NULL);
1605 
1606 		IGMPSTAT_INC(igps_rcv_ourreports);
1607 		OIGMPSTAT_INC(igps_rcv_ourreports);
1608 
1609 		/*
1610 		 * If we are in IGMPv3 host mode, do not allow the
1611 		 * other host's IGMPv1 report to suppress our reports
1612 		 * unless explicitly configured to do so.
1613 		 */
1614 		IGI_LOCK(igi);
1615 		if (igi->igi_version == IGMP_VERSION_3) {
1616 			if (igmp_legacysupp) {
1617 				igmp_v3_suppress_group_record(inm);
1618 			}
1619 			IGI_UNLOCK(igi);
1620 			INM_UNLOCK(inm);
1621 			INM_REMREF(inm);
1622 			return 0;
1623 		}
1624 
1625 		inm->inm_timer = 0;
1626 
1627 		switch (inm->inm_state) {
1628 		case IGMP_NOT_MEMBER:
1629 		case IGMP_SILENT_MEMBER:
1630 		case IGMP_SLEEPING_MEMBER:
1631 			break;
1632 		case IGMP_REPORTING_MEMBER:
1633 		case IGMP_IDLE_MEMBER:
1634 		case IGMP_AWAKENING_MEMBER:
1635 			IGMP_INET_PRINTF(igmp->igmp_group,
1636 			    ("report suppressed for %s on ifp 0x%llx(%s)\n",
1637 			    _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(ifp),
1638 			    if_name(ifp)));
1639 			OS_FALLTHROUGH;
1640 		case IGMP_LAZY_MEMBER:
1641 			inm->inm_state = IGMP_LAZY_MEMBER;
1642 			break;
1643 		case IGMP_G_QUERY_PENDING_MEMBER:
1644 		case IGMP_SG_QUERY_PENDING_MEMBER:
1645 		case IGMP_LEAVING_MEMBER:
1646 			break;
1647 		}
1648 		IGI_UNLOCK(igi);
1649 		INM_UNLOCK(inm);
1650 		INM_REMREF(inm);
1651 	}
1652 
1653 	return 0;
1654 }
1655 
1656 void
igmp_input(struct mbuf * m,int off)1657 igmp_input(struct mbuf *m, int off)
1658 {
1659 	int iphlen;
1660 	struct ifnet *ifp;
1661 	struct igmp *igmp;
1662 	struct ip *ip;
1663 	int igmplen;
1664 	int minlen;
1665 	int queryver;
1666 
1667 	IGMP_PRINTF(("%s: called w/mbuf (0x%llx,%d)\n", __func__,
1668 	    (uint64_t)VM_KERNEL_ADDRPERM(m), off));
1669 
1670 	ifp = m->m_pkthdr.rcvif;
1671 
1672 	IGMPSTAT_INC(igps_rcv_total);
1673 	OIGMPSTAT_INC(igps_rcv_total);
1674 
1675 	/* Expect 32-bit aligned data pointer on strict-align platforms */
1676 	MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
1677 
1678 	ip = mtod(m, struct ip *);
1679 	iphlen = off;
1680 
1681 	/* By now, ip_len no longer contains the length of IP header */
1682 	igmplen = ip->ip_len;
1683 
1684 	/*
1685 	 * Validate lengths.
1686 	 */
1687 	if (igmplen < IGMP_MINLEN) {
1688 		IGMPSTAT_INC(igps_rcv_tooshort);
1689 		OIGMPSTAT_INC(igps_rcv_tooshort);
1690 		m_freem(m);
1691 		return;
1692 	}
1693 
1694 	/*
1695 	 * Always pullup to the minimum size for v1/v2 or v3
1696 	 * to amortize calls to m_pulldown().
1697 	 */
1698 	if (igmplen >= IGMP_V3_QUERY_MINLEN) {
1699 		minlen = IGMP_V3_QUERY_MINLEN;
1700 	} else {
1701 		minlen = IGMP_MINLEN;
1702 	}
1703 
1704 	/* A bit more expensive than M_STRUCT_GET, but ensures alignment */
1705 	M_STRUCT_GET0(igmp, struct igmp *, m, off, minlen);
1706 	if (igmp == NULL) {
1707 		IGMPSTAT_INC(igps_rcv_tooshort);
1708 		OIGMPSTAT_INC(igps_rcv_tooshort);
1709 		return;
1710 	}
1711 	/* N.B.: we assume the packet was correctly aligned in ip_input. */
1712 
1713 	/*
1714 	 * Validate checksum.
1715 	 */
1716 	m->m_data += iphlen;
1717 	m->m_len -= iphlen;
1718 	if (in_cksum(m, igmplen)) {
1719 		IGMPSTAT_INC(igps_rcv_badsum);
1720 		OIGMPSTAT_INC(igps_rcv_badsum);
1721 		m_freem(m);
1722 		return;
1723 	}
1724 	m->m_data -= iphlen;
1725 	m->m_len += iphlen;
1726 
1727 	/*
1728 	 * IGMP control traffic is link-scope, and must have a TTL of 1.
1729 	 * DVMRP traffic (e.g. mrinfo, mtrace) is an exception;
1730 	 * probe packets may come from beyond the LAN.
1731 	 */
1732 	if (igmp->igmp_type != IGMP_DVMRP && ip->ip_ttl != 1) {
1733 		IGMPSTAT_INC(igps_rcv_badttl);
1734 		m_freem(m);
1735 		return;
1736 	}
1737 
1738 	switch (igmp->igmp_type) {
1739 	case IGMP_HOST_MEMBERSHIP_QUERY:
1740 		if (igmplen == IGMP_MINLEN) {
1741 			if (igmp->igmp_code == 0) {
1742 				queryver = IGMP_VERSION_1;
1743 			} else {
1744 				queryver = IGMP_VERSION_2;
1745 			}
1746 		} else if (igmplen >= IGMP_V3_QUERY_MINLEN) {
1747 			queryver = IGMP_VERSION_3;
1748 		} else {
1749 			IGMPSTAT_INC(igps_rcv_tooshort);
1750 			OIGMPSTAT_INC(igps_rcv_tooshort);
1751 			m_freem(m);
1752 			return;
1753 		}
1754 
1755 		OIGMPSTAT_INC(igps_rcv_queries);
1756 
1757 		switch (queryver) {
1758 		case IGMP_VERSION_1:
1759 			IGMPSTAT_INC(igps_rcv_v1v2_queries);
1760 			if (!igmp_v1enable) {
1761 				break;
1762 			}
1763 			if (igmp_input_v1_query(ifp, ip, igmp) != 0) {
1764 				m_freem(m);
1765 				return;
1766 			}
1767 			break;
1768 
1769 		case IGMP_VERSION_2:
1770 			IGMPSTAT_INC(igps_rcv_v1v2_queries);
1771 			if (!igmp_v2enable) {
1772 				break;
1773 			}
1774 			if (igmp_input_v2_query(ifp, ip, igmp) != 0) {
1775 				m_freem(m);
1776 				return;
1777 			}
1778 			break;
1779 
1780 		case IGMP_VERSION_3: {
1781 			struct igmpv3 *igmpv3;
1782 			uint16_t igmpv3len;
1783 			uint16_t srclen;
1784 			int nsrc;
1785 
1786 			IGMPSTAT_INC(igps_rcv_v3_queries);
1787 			igmpv3 = (struct igmpv3 *)igmp;
1788 			/*
1789 			 * Validate length based on source count.
1790 			 */
1791 			nsrc = ntohs(igmpv3->igmp_numsrc);
1792 			/*
1793 			 * The max vaue of nsrc is limited by the
1794 			 * MTU of the network on which the datagram
1795 			 * is received
1796 			 */
1797 			if (nsrc < 0 || nsrc > IGMP_V3_QUERY_MAX_SRCS) {
1798 				IGMPSTAT_INC(igps_rcv_tooshort);
1799 				OIGMPSTAT_INC(igps_rcv_tooshort);
1800 				m_freem(m);
1801 				return;
1802 			}
1803 			srclen = sizeof(struct in_addr) * (uint16_t)nsrc;
1804 			if (igmplen < (IGMP_V3_QUERY_MINLEN + srclen)) {
1805 				IGMPSTAT_INC(igps_rcv_tooshort);
1806 				OIGMPSTAT_INC(igps_rcv_tooshort);
1807 				m_freem(m);
1808 				return;
1809 			}
1810 			igmpv3len = IGMP_V3_QUERY_MINLEN + srclen;
1811 			/*
1812 			 * A bit more expensive than M_STRUCT_GET,
1813 			 * but ensures alignment.
1814 			 */
1815 			M_STRUCT_GET0(igmpv3, struct igmpv3 *, m,
1816 			    off, igmpv3len);
1817 			if (igmpv3 == NULL) {
1818 				IGMPSTAT_INC(igps_rcv_tooshort);
1819 				OIGMPSTAT_INC(igps_rcv_tooshort);
1820 				return;
1821 			}
1822 			/*
1823 			 * N.B.: we assume the packet was correctly
1824 			 * aligned in ip_input.
1825 			 */
1826 			if (igmp_input_v3_query(ifp, ip, igmpv3) != 0) {
1827 				m_freem(m);
1828 				return;
1829 			}
1830 		}
1831 		break;
1832 		}
1833 		break;
1834 
1835 	case IGMP_v1_HOST_MEMBERSHIP_REPORT:
1836 		if (!igmp_v1enable) {
1837 			break;
1838 		}
1839 		if (igmp_input_v1_report(ifp, m, ip, igmp) != 0) {
1840 			m_freem(m);
1841 			return;
1842 		}
1843 		break;
1844 
1845 	case IGMP_v2_HOST_MEMBERSHIP_REPORT:
1846 		if (!igmp_v2enable) {
1847 			break;
1848 		}
1849 		if (!ip_checkrouteralert(m)) {
1850 			IGMPSTAT_INC(igps_rcv_nora);
1851 		}
1852 		if (igmp_input_v2_report(ifp, m, ip, igmp) != 0) {
1853 			m_freem(m);
1854 			return;
1855 		}
1856 		break;
1857 
1858 	case IGMP_v3_HOST_MEMBERSHIP_REPORT:
1859 		/*
1860 		 * Hosts do not need to process IGMPv3 membership reports,
1861 		 * as report suppression is no longer required.
1862 		 */
1863 		if (!ip_checkrouteralert(m)) {
1864 			IGMPSTAT_INC(igps_rcv_nora);
1865 		}
1866 		break;
1867 
1868 	default:
1869 		break;
1870 	}
1871 
1872 	IGMP_LOCK_ASSERT_NOTHELD();
1873 	/*
1874 	 * Pass all valid IGMP packets up to any process(es) listening on a
1875 	 * raw IGMP socket.
1876 	 */
1877 	rip_input(m, off);
1878 }
1879 
1880 /*
1881  * Schedule IGMP timer based on various parameters; caller must ensure that
1882  * lock ordering is maintained as this routine acquires IGMP global lock.
1883  */
1884 void
igmp_set_timeout(struct igmp_tparams * itp)1885 igmp_set_timeout(struct igmp_tparams *itp)
1886 {
1887 	IGMP_LOCK_ASSERT_NOTHELD();
1888 	VERIFY(itp != NULL);
1889 
1890 	if (itp->qpt != 0 || itp->it != 0 || itp->cst != 0 || itp->sct != 0) {
1891 		IGMP_LOCK();
1892 		if (itp->qpt != 0) {
1893 			querier_present_timers_running = 1;
1894 		}
1895 		if (itp->it != 0) {
1896 			interface_timers_running = 1;
1897 		}
1898 		if (itp->cst != 0) {
1899 			current_state_timers_running = 1;
1900 		}
1901 		if (itp->sct != 0) {
1902 			state_change_timers_running = 1;
1903 		}
1904 		if (itp->fast) {
1905 			igmp_sched_fast_timeout();
1906 		} else {
1907 			igmp_sched_timeout();
1908 		}
1909 		IGMP_UNLOCK();
1910 	}
1911 }
1912 
1913 void
igmp_set_fast_timeout(struct igmp_tparams * itp)1914 igmp_set_fast_timeout(struct igmp_tparams *itp)
1915 {
1916 	VERIFY(itp != NULL);
1917 	itp->fast = true;
1918 	igmp_set_timeout(itp);
1919 }
1920 
1921 /*
1922  * IGMP timer handler (per 1 second).
1923  */
1924 static void
igmp_timeout(thread_call_param_t arg0,thread_call_param_t arg1 __unused)1925 igmp_timeout(thread_call_param_t arg0, thread_call_param_t arg1 __unused)
1926 {
1927 	struct ifqueue           scq;   /* State-change packets */
1928 	struct ifqueue           qrq;   /* Query response packets */
1929 	struct ifnet            *ifp;
1930 	struct igmp_ifinfo      *igi;
1931 	struct in_multi         *inm;
1932 	unsigned int             loop = 0, uri_sec = 0;
1933 	SLIST_HEAD(, in_multi)  inm_dthead;
1934 	bool                     fast = arg0 != NULL;
1935 
1936 	SLIST_INIT(&inm_dthead);
1937 
1938 	/*
1939 	 * Update coarse-grained networking timestamp (in sec.); the idea
1940 	 * is to piggy-back on the timeout callout to update the counter
1941 	 * returnable via net_uptime().
1942 	 */
1943 	net_update_uptime();
1944 
1945 	IGMP_LOCK();
1946 
1947 	IGMP_PRINTF(("%s: qpt %d, it %d, cst %d, sct %d, fast %d\n", __func__,
1948 	    querier_present_timers_running, interface_timers_running,
1949 	    current_state_timers_running, state_change_timers_running,
1950 	    fast));
1951 
1952 	if (fast) {
1953 		/*
1954 		 * When running the fast timer, skip processing
1955 		 * of "querier present" timers since they are
1956 		 * based on 1-second intervals.
1957 		 */
1958 		goto skip_query_timers;
1959 	}
1960 	/*
1961 	 * IGMPv1/v2 querier present timer processing.
1962 	 */
1963 	if (querier_present_timers_running) {
1964 		querier_present_timers_running = 0;
1965 		LIST_FOREACH(igi, &igi_head, igi_link) {
1966 			IGI_LOCK(igi);
1967 			igmp_v1v2_process_querier_timers(igi);
1968 			if (igi->igi_v1_timer > 0 || igi->igi_v2_timer > 0) {
1969 				querier_present_timers_running = 1;
1970 			}
1971 			IGI_UNLOCK(igi);
1972 		}
1973 	}
1974 
1975 	/*
1976 	 * IGMPv3 General Query response timer processing.
1977 	 */
1978 	if (interface_timers_running) {
1979 		IGMP_PRINTF(("%s: interface timers running\n", __func__));
1980 		interface_timers_running = 0;
1981 		LIST_FOREACH(igi, &igi_head, igi_link) {
1982 			IGI_LOCK(igi);
1983 			if (igi->igi_version != IGMP_VERSION_3) {
1984 				IGI_UNLOCK(igi);
1985 				continue;
1986 			}
1987 			if (igi->igi_v3_timer == 0) {
1988 				/* Do nothing. */
1989 			} else if (--igi->igi_v3_timer == 0) {
1990 				if (igmp_v3_dispatch_general_query(igi) > 0) {
1991 					interface_timers_running = 1;
1992 				}
1993 			} else {
1994 				interface_timers_running = 1;
1995 			}
1996 			IGI_UNLOCK(igi);
1997 		}
1998 	}
1999 
2000 skip_query_timers:
2001 	if (!current_state_timers_running &&
2002 	    !state_change_timers_running) {
2003 		goto out_locked;
2004 	}
2005 
2006 	current_state_timers_running = 0;
2007 	state_change_timers_running = 0;
2008 
2009 	memset(&qrq, 0, sizeof(struct ifqueue));
2010 	qrq.ifq_maxlen = IGMP_MAX_G_GS_PACKETS;
2011 
2012 	memset(&scq, 0, sizeof(struct ifqueue));
2013 	scq.ifq_maxlen =  IGMP_MAX_STATE_CHANGE_PACKETS;
2014 
2015 	IGMP_PRINTF(("%s: state change timers running\n", __func__));
2016 
2017 	/*
2018 	 * IGMPv1/v2/v3 host report and state-change timer processing.
2019 	 * Note: Processing a v3 group timer may remove a node.
2020 	 */
2021 	LIST_FOREACH(igi, &igi_head, igi_link) {
2022 		struct in_multistep step;
2023 
2024 		IGI_LOCK(igi);
2025 		ifp = igi->igi_ifp;
2026 		loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
2027 		uri_sec = IGMP_RANDOM_DELAY(igi->igi_uri);
2028 		IGI_UNLOCK(igi);
2029 
2030 		in_multihead_lock_shared();
2031 		IN_FIRST_MULTI(step, inm);
2032 		while (inm != NULL) {
2033 			INM_LOCK(inm);
2034 			if (inm->inm_ifp != ifp) {
2035 				goto next;
2036 			}
2037 
2038 			IGI_LOCK(igi);
2039 			switch (igi->igi_version) {
2040 			case IGMP_VERSION_1:
2041 			case IGMP_VERSION_2:
2042 				igmp_v1v2_process_group_timer(inm,
2043 				    igi->igi_version);
2044 				break;
2045 			case IGMP_VERSION_3:
2046 				igmp_v3_process_group_timers(igi, &qrq,
2047 				    &scq, inm, uri_sec);
2048 				break;
2049 			}
2050 			IGI_UNLOCK(igi);
2051 next:
2052 			INM_UNLOCK(inm);
2053 			IN_NEXT_MULTI(step, inm);
2054 		}
2055 		in_multihead_lock_done();
2056 
2057 		IGI_LOCK(igi);
2058 		if (igi->igi_version == IGMP_VERSION_1 ||
2059 		    igi->igi_version == IGMP_VERSION_2) {
2060 			igmp_dispatch_queue(igi, &igi->igi_v2q, 0, loop);
2061 		} else if (igi->igi_version == IGMP_VERSION_3) {
2062 			IGI_UNLOCK(igi);
2063 			igmp_dispatch_queue(NULL, &qrq, 0, loop);
2064 			igmp_dispatch_queue(NULL, &scq, 0, loop);
2065 			VERIFY(qrq.ifq_len == 0);
2066 			VERIFY(scq.ifq_len == 0);
2067 			IGI_LOCK(igi);
2068 		}
2069 		/*
2070 		 * In case there are still any pending membership reports
2071 		 * which didn't get drained at version change time.
2072 		 */
2073 		IF_DRAIN(&igi->igi_v2q);
2074 		/*
2075 		 * Release all deferred inm records, and drain any locally
2076 		 * enqueued packets; do it even if the current IGMP version
2077 		 * for the link is no longer IGMPv3, in order to handle the
2078 		 * version change case.
2079 		 */
2080 		igmp_flush_relq(igi, (struct igmp_inm_relhead *)&inm_dthead);
2081 		IGI_UNLOCK(igi);
2082 
2083 		IF_DRAIN(&qrq);
2084 		IF_DRAIN(&scq);
2085 	}
2086 
2087 out_locked:
2088 	/* re-arm the timer if there's work to do */
2089 	if (fast) {
2090 		igmp_fast_timeout_run = false;
2091 	} else {
2092 		igmp_timeout_run = false;
2093 	}
2094 	igmp_sched_timeout();
2095 	IGMP_UNLOCK();
2096 
2097 	/* Now that we're dropped all locks, release detached records */
2098 	IGMP_REMOVE_DETACHED_INM(&inm_dthead);
2099 }
2100 
2101 static void
igmp_sched_timeout(void)2102 igmp_sched_timeout(void)
2103 {
2104 	static thread_call_t igmp_timeout_tcall;
2105 	uint64_t deadline = 0, leeway = 0;
2106 
2107 	IGMP_LOCK_ASSERT_HELD();
2108 	if (igmp_timeout_tcall == NULL) {
2109 		igmp_timeout_tcall =
2110 		    thread_call_allocate_with_options(igmp_timeout,
2111 		    NULL,
2112 		    THREAD_CALL_PRIORITY_KERNEL,
2113 		    THREAD_CALL_OPTIONS_ONCE);
2114 	}
2115 	if (!igmp_timeout_run &&
2116 	    (querier_present_timers_running || current_state_timers_running ||
2117 	    interface_timers_running || state_change_timers_running)) {
2118 		igmp_timeout_run = true;
2119 		clock_interval_to_deadline(igmp_timeout_delay, NSEC_PER_MSEC,
2120 		    &deadline);
2121 		clock_interval_to_absolutetime_interval(igmp_timeout_leeway,
2122 		    NSEC_PER_MSEC, &leeway);
2123 		thread_call_enter_delayed_with_leeway(igmp_timeout_tcall, NULL,
2124 		    deadline, leeway,
2125 		    THREAD_CALL_DELAY_LEEWAY);
2126 	}
2127 }
2128 
2129 static void
igmp_sched_fast_timeout(void)2130 igmp_sched_fast_timeout(void)
2131 {
2132 	static thread_call_t igmp_fast_timeout_tcall;
2133 
2134 	IGMP_LOCK_ASSERT_HELD();
2135 	if (igmp_fast_timeout_tcall == NULL) {
2136 		igmp_fast_timeout_tcall =
2137 		    thread_call_allocate_with_options(igmp_timeout,
2138 		    igmp_sched_fast_timeout,
2139 		    THREAD_CALL_PRIORITY_KERNEL,
2140 		    THREAD_CALL_OPTIONS_ONCE);
2141 	}
2142 	if (!igmp_fast_timeout_run &&
2143 	    (current_state_timers_running || state_change_timers_running)) {
2144 		igmp_fast_timeout_run = true;
2145 		thread_call_enter(igmp_fast_timeout_tcall);
2146 	}
2147 }
2148 
2149 /*
2150  * Appends an in_multi to the list to be released later.
2151  *
2152  * Caller must be holding igi_lock.
2153  */
2154 static void
igmp_append_relq(struct igmp_ifinfo * igi,struct in_multi * inm)2155 igmp_append_relq(struct igmp_ifinfo *igi, struct in_multi *inm)
2156 {
2157 	IGI_LOCK_ASSERT_HELD(igi);
2158 	IGMP_PRINTF(("%s: adding inm %llx on relq ifp 0x%llx(%s)\n",
2159 	    __func__, (uint64_t)VM_KERNEL_ADDRPERM(inm),
2160 	    (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2161 	    if_name(igi->igi_ifp)));
2162 	SLIST_INSERT_HEAD(&igi->igi_relinmhead, inm, inm_nrele);
2163 }
2164 
2165 /*
2166  * Free the in_multi reference(s) for this IGMP lifecycle.
2167  *
2168  * Caller must be holding igi_lock.
2169  */
2170 static void
igmp_flush_relq(struct igmp_ifinfo * igi,struct igmp_inm_relhead * inm_dthead)2171 igmp_flush_relq(struct igmp_ifinfo *igi, struct igmp_inm_relhead *inm_dthead)
2172 {
2173 	struct in_multi *inm;
2174 	SLIST_HEAD(, in_multi) temp_relinmhead;
2175 
2176 	/*
2177 	 * Before dropping the igi_lock, copy all the items in the
2178 	 * release list to a temporary list to prevent other threads
2179 	 * from changing igi_relinmhead while we are traversing it.
2180 	 */
2181 	IGI_LOCK_ASSERT_HELD(igi);
2182 	SLIST_INIT(&temp_relinmhead);
2183 	while ((inm = SLIST_FIRST(&igi->igi_relinmhead)) != NULL) {
2184 		SLIST_REMOVE_HEAD(&igi->igi_relinmhead, inm_nrele);
2185 		SLIST_INSERT_HEAD(&temp_relinmhead, inm, inm_nrele);
2186 	}
2187 	IGI_UNLOCK(igi);
2188 	in_multihead_lock_exclusive();
2189 	while ((inm = SLIST_FIRST(&temp_relinmhead)) != NULL) {
2190 		int lastref;
2191 
2192 		SLIST_REMOVE_HEAD(&temp_relinmhead, inm_nrele);
2193 		INM_LOCK(inm);
2194 		VERIFY(inm->inm_nrelecnt != 0);
2195 		inm->inm_nrelecnt--;
2196 		lastref = in_multi_detach(inm);
2197 		VERIFY(!lastref || (!(inm->inm_debug & IFD_ATTACHED) &&
2198 		    inm->inm_reqcnt == 0));
2199 		INM_UNLOCK(inm);
2200 		/* from igi_relinmhead */
2201 		INM_REMREF(inm);
2202 		/* from in_multihead list */
2203 		if (lastref) {
2204 			/*
2205 			 * Defer releasing our final reference, as we
2206 			 * are holding the IGMP lock at this point, and
2207 			 * we could end up with locking issues later on
2208 			 * (while issuing SIOCDELMULTI) when this is the
2209 			 * final reference count.  Let the caller do it
2210 			 * when it is safe.
2211 			 */
2212 			IGMP_ADD_DETACHED_INM(inm_dthead, inm);
2213 		}
2214 	}
2215 	in_multihead_lock_done();
2216 	IGI_LOCK(igi);
2217 }
2218 
2219 /*
2220  * Update host report group timer for IGMPv1/v2.
2221  * Will update the global pending timer flags.
2222  */
2223 static void
igmp_v1v2_process_group_timer(struct in_multi * inm,const int igmp_version)2224 igmp_v1v2_process_group_timer(struct in_multi *inm, const int igmp_version)
2225 {
2226 	int report_timer_expired;
2227 
2228 	IGMP_LOCK_ASSERT_HELD();
2229 	INM_LOCK_ASSERT_HELD(inm);
2230 	IGI_LOCK_ASSERT_HELD(inm->inm_igi);
2231 
2232 	if (inm->inm_timer == 0) {
2233 		report_timer_expired = 0;
2234 	} else if (--inm->inm_timer == 0) {
2235 		report_timer_expired = 1;
2236 	} else {
2237 		current_state_timers_running = 1;
2238 		/* caller will schedule timer */
2239 		return;
2240 	}
2241 
2242 	switch (inm->inm_state) {
2243 	case IGMP_NOT_MEMBER:
2244 	case IGMP_SILENT_MEMBER:
2245 	case IGMP_IDLE_MEMBER:
2246 	case IGMP_LAZY_MEMBER:
2247 	case IGMP_SLEEPING_MEMBER:
2248 	case IGMP_AWAKENING_MEMBER:
2249 		break;
2250 	case IGMP_REPORTING_MEMBER:
2251 		if (report_timer_expired) {
2252 			inm->inm_state = IGMP_IDLE_MEMBER;
2253 			(void) igmp_v1v2_queue_report(inm,
2254 			    (igmp_version == IGMP_VERSION_2) ?
2255 			    IGMP_v2_HOST_MEMBERSHIP_REPORT :
2256 			    IGMP_v1_HOST_MEMBERSHIP_REPORT);
2257 			INM_LOCK_ASSERT_HELD(inm);
2258 			IGI_LOCK_ASSERT_HELD(inm->inm_igi);
2259 		}
2260 		break;
2261 	case IGMP_G_QUERY_PENDING_MEMBER:
2262 	case IGMP_SG_QUERY_PENDING_MEMBER:
2263 	case IGMP_LEAVING_MEMBER:
2264 		break;
2265 	}
2266 }
2267 
2268 /*
2269  * Update a group's timers for IGMPv3.
2270  * Will update the global pending timer flags.
2271  * Note: Unlocked read from igi.
2272  */
2273 static void
igmp_v3_process_group_timers(struct igmp_ifinfo * igi,struct ifqueue * qrq,struct ifqueue * scq,struct in_multi * inm,const unsigned int uri_sec)2274 igmp_v3_process_group_timers(struct igmp_ifinfo *igi,
2275     struct ifqueue *qrq, struct ifqueue *scq,
2276     struct in_multi *inm, const unsigned int uri_sec)
2277 {
2278 	int query_response_timer_expired;
2279 	int state_change_retransmit_timer_expired;
2280 
2281 	IGMP_LOCK_ASSERT_HELD();
2282 	INM_LOCK_ASSERT_HELD(inm);
2283 	IGI_LOCK_ASSERT_HELD(igi);
2284 	VERIFY(igi == inm->inm_igi);
2285 
2286 	query_response_timer_expired = 0;
2287 	state_change_retransmit_timer_expired = 0;
2288 
2289 	/*
2290 	 * During a transition from v1/v2 compatibility mode back to v3,
2291 	 * a group record in REPORTING state may still have its group
2292 	 * timer active. This is a no-op in this function; it is easier
2293 	 * to deal with it here than to complicate the timeout path.
2294 	 */
2295 	if (inm->inm_timer == 0) {
2296 		query_response_timer_expired = 0;
2297 	} else if (--inm->inm_timer == 0) {
2298 		query_response_timer_expired = 1;
2299 	} else {
2300 		current_state_timers_running = 1;
2301 		/* caller will schedule timer */
2302 	}
2303 
2304 	if (inm->inm_sctimer == 0) {
2305 		state_change_retransmit_timer_expired = 0;
2306 	} else if (--inm->inm_sctimer == 0) {
2307 		state_change_retransmit_timer_expired = 1;
2308 	} else {
2309 		state_change_timers_running = 1;
2310 		/* caller will schedule timer */
2311 	}
2312 
2313 	/* We are in timer callback, so be quick about it. */
2314 	if (!state_change_retransmit_timer_expired &&
2315 	    !query_response_timer_expired) {
2316 		return;
2317 	}
2318 
2319 	switch (inm->inm_state) {
2320 	case IGMP_NOT_MEMBER:
2321 	case IGMP_SILENT_MEMBER:
2322 	case IGMP_SLEEPING_MEMBER:
2323 	case IGMP_LAZY_MEMBER:
2324 	case IGMP_AWAKENING_MEMBER:
2325 	case IGMP_IDLE_MEMBER:
2326 		break;
2327 	case IGMP_G_QUERY_PENDING_MEMBER:
2328 	case IGMP_SG_QUERY_PENDING_MEMBER:
2329 		/*
2330 		 * Respond to a previously pending Group-Specific
2331 		 * or Group-and-Source-Specific query by enqueueing
2332 		 * the appropriate Current-State report for
2333 		 * immediate transmission.
2334 		 */
2335 		if (query_response_timer_expired) {
2336 			int retval;
2337 
2338 			retval = igmp_v3_enqueue_group_record(qrq, inm, 0, 1,
2339 			    (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER));
2340 			IGMP_PRINTF(("%s: enqueue record = %d\n",
2341 			    __func__, retval));
2342 			inm->inm_state = IGMP_REPORTING_MEMBER;
2343 			/* XXX Clear recorded sources for next time. */
2344 			inm_clear_recorded(inm);
2345 		}
2346 		OS_FALLTHROUGH;
2347 	case IGMP_REPORTING_MEMBER:
2348 	case IGMP_LEAVING_MEMBER:
2349 		if (state_change_retransmit_timer_expired) {
2350 			/*
2351 			 * State-change retransmission timer fired.
2352 			 * If there are any further pending retransmissions,
2353 			 * set the global pending state-change flag, and
2354 			 * reset the timer.
2355 			 */
2356 			if (--inm->inm_scrv > 0) {
2357 				inm->inm_sctimer = (uint16_t)uri_sec;
2358 				state_change_timers_running = 1;
2359 				/* caller will schedule timer */
2360 			}
2361 			/*
2362 			 * Retransmit the previously computed state-change
2363 			 * report. If there are no further pending
2364 			 * retransmissions, the mbuf queue will be consumed.
2365 			 * Update T0 state to T1 as we have now sent
2366 			 * a state-change.
2367 			 */
2368 			(void) igmp_v3_merge_state_changes(inm, scq);
2369 
2370 			inm_commit(inm);
2371 			IGMP_INET_PRINTF(inm->inm_addr,
2372 			    ("%s: T1 -> T0 for %s/%s\n", __func__,
2373 			    _igmp_inet_buf, if_name(inm->inm_ifp)));
2374 
2375 			/*
2376 			 * If we are leaving the group for good, make sure
2377 			 * we release IGMP's reference to it.
2378 			 * This release must be deferred using a SLIST,
2379 			 * as we are called from a loop which traverses
2380 			 * the in_multihead list.
2381 			 */
2382 			if (inm->inm_state == IGMP_LEAVING_MEMBER &&
2383 			    inm->inm_scrv == 0) {
2384 				inm->inm_state = IGMP_NOT_MEMBER;
2385 				/*
2386 				 * A reference has already been held in
2387 				 * igmp_final_leave() for this inm, so
2388 				 * no need to hold another one.  We also
2389 				 * bumped up its request count then, so
2390 				 * that it stays in in_multihead.  Both
2391 				 * of them will be released when it is
2392 				 * dequeued later on.
2393 				 */
2394 				VERIFY(inm->inm_nrelecnt != 0);
2395 				igmp_append_relq(igi, inm);
2396 			}
2397 		}
2398 		break;
2399 	}
2400 }
2401 
2402 /*
2403  * Suppress a group's pending response to a group or source/group query.
2404  *
2405  * Do NOT suppress state changes. This leads to IGMPv3 inconsistency.
2406  * Do NOT update ST1/ST0 as this operation merely suppresses
2407  * the currently pending group record.
2408  * Do NOT suppress the response to a general query. It is possible but
2409  * it would require adding another state or flag.
2410  */
2411 static void
igmp_v3_suppress_group_record(struct in_multi * inm)2412 igmp_v3_suppress_group_record(struct in_multi *inm)
2413 {
2414 	INM_LOCK_ASSERT_HELD(inm);
2415 	IGI_LOCK_ASSERT_HELD(inm->inm_igi);
2416 
2417 	VERIFY(inm->inm_igi->igi_version == IGMP_VERSION_3);
2418 
2419 	if (inm->inm_state != IGMP_G_QUERY_PENDING_MEMBER &&
2420 	    inm->inm_state != IGMP_SG_QUERY_PENDING_MEMBER) {
2421 		return;
2422 	}
2423 
2424 	if (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) {
2425 		inm_clear_recorded(inm);
2426 	}
2427 
2428 	inm->inm_timer = 0;
2429 	inm->inm_state = IGMP_REPORTING_MEMBER;
2430 }
2431 
2432 /*
2433  * Switch to a different IGMP version on the given interface,
2434  * as per Section 7.2.1.
2435  */
2436 static uint32_t
igmp_set_version(struct igmp_ifinfo * igi,const int igmp_version)2437 igmp_set_version(struct igmp_ifinfo *igi, const int igmp_version)
2438 {
2439 	int old_version_timer;
2440 
2441 	IGI_LOCK_ASSERT_HELD(igi);
2442 
2443 	IGMP_PRINTF(("%s: switching to v%d on ifp 0x%llx(%s)\n", __func__,
2444 	    igmp_version, (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2445 	    if_name(igi->igi_ifp)));
2446 
2447 	if (igmp_version == IGMP_VERSION_1 || igmp_version == IGMP_VERSION_2) {
2448 		/*
2449 		 * Compute the "Older Version Querier Present" timer as per
2450 		 * Section 8.12, in seconds.
2451 		 */
2452 		old_version_timer = igi->igi_rv * igi->igi_qi + igi->igi_qri;
2453 
2454 		if (igmp_version == IGMP_VERSION_1) {
2455 			igi->igi_v1_timer = old_version_timer;
2456 			igi->igi_v2_timer = 0;
2457 		} else if (igmp_version == IGMP_VERSION_2) {
2458 			igi->igi_v1_timer = 0;
2459 			igi->igi_v2_timer = old_version_timer;
2460 		}
2461 	}
2462 
2463 	if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) {
2464 		if (igi->igi_version != IGMP_VERSION_2) {
2465 			igmp_v3_cancel_link_timers(igi);
2466 			igi->igi_version = IGMP_VERSION_2;
2467 		}
2468 	} else if (igi->igi_v1_timer > 0) {
2469 		if (igi->igi_version != IGMP_VERSION_1) {
2470 			igmp_v3_cancel_link_timers(igi);
2471 			igi->igi_version = IGMP_VERSION_1;
2472 		}
2473 	}
2474 
2475 	IGI_LOCK_ASSERT_HELD(igi);
2476 
2477 	return MAX(igi->igi_v1_timer, igi->igi_v2_timer);
2478 }
2479 
2480 /*
2481  * Cancel pending IGMPv3 timers for the given link and all groups
2482  * joined on it; state-change, general-query, and group-query timers.
2483  *
2484  * Only ever called on a transition from v3 to Compatibility mode. Kill
2485  * the timers stone dead (this may be expensive for large N groups), they
2486  * will be restarted if Compatibility Mode deems that they must be due to
2487  * query processing.
2488  */
2489 static void
igmp_v3_cancel_link_timers(struct igmp_ifinfo * igi)2490 igmp_v3_cancel_link_timers(struct igmp_ifinfo *igi)
2491 {
2492 	struct ifnet            *ifp;
2493 	struct in_multi         *inm;
2494 	struct in_multistep     step;
2495 
2496 	IGI_LOCK_ASSERT_HELD(igi);
2497 
2498 	IGMP_PRINTF(("%s: cancel v3 timers on ifp 0x%llx(%s)\n", __func__,
2499 	    (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp), if_name(igi->igi_ifp)));
2500 
2501 	/*
2502 	 * Stop the v3 General Query Response on this link stone dead.
2503 	 * If timer is woken up due to interface_timers_running,
2504 	 * the flag will be cleared if there are no pending link timers.
2505 	 */
2506 	igi->igi_v3_timer = 0;
2507 
2508 	/*
2509 	 * Now clear the current-state and state-change report timers
2510 	 * for all memberships scoped to this link.
2511 	 */
2512 	ifp = igi->igi_ifp;
2513 	IGI_UNLOCK(igi);
2514 
2515 	in_multihead_lock_shared();
2516 	IN_FIRST_MULTI(step, inm);
2517 	while (inm != NULL) {
2518 		INM_LOCK(inm);
2519 		if (inm->inm_ifp != ifp && inm->inm_igi != igi) {
2520 			goto next;
2521 		}
2522 
2523 		switch (inm->inm_state) {
2524 		case IGMP_NOT_MEMBER:
2525 		case IGMP_SILENT_MEMBER:
2526 		case IGMP_IDLE_MEMBER:
2527 		case IGMP_LAZY_MEMBER:
2528 		case IGMP_SLEEPING_MEMBER:
2529 		case IGMP_AWAKENING_MEMBER:
2530 			/*
2531 			 * These states are either not relevant in v3 mode,
2532 			 * or are unreported. Do nothing.
2533 			 */
2534 			break;
2535 		case IGMP_LEAVING_MEMBER:
2536 			/*
2537 			 * If we are leaving the group and switching to
2538 			 * compatibility mode, we need to release the final
2539 			 * reference held for issuing the INCLUDE {}, and
2540 			 * transition to REPORTING to ensure the host leave
2541 			 * message is sent upstream to the old querier --
2542 			 * transition to NOT would lose the leave and race.
2543 			 * During igmp_final_leave(), we bumped up both the
2544 			 * request and reference counts.  Since we cannot
2545 			 * call in_multi_detach() here, defer this task to
2546 			 * the timer routine.
2547 			 */
2548 			VERIFY(inm->inm_nrelecnt != 0);
2549 			IGI_LOCK(igi);
2550 			igmp_append_relq(igi, inm);
2551 			IGI_UNLOCK(igi);
2552 			OS_FALLTHROUGH;
2553 		case IGMP_G_QUERY_PENDING_MEMBER:
2554 		case IGMP_SG_QUERY_PENDING_MEMBER:
2555 			inm_clear_recorded(inm);
2556 			OS_FALLTHROUGH;
2557 		case IGMP_REPORTING_MEMBER:
2558 			inm->inm_state = IGMP_REPORTING_MEMBER;
2559 			break;
2560 		}
2561 		/*
2562 		 * Always clear state-change and group report timers.
2563 		 * Free any pending IGMPv3 state-change records.
2564 		 */
2565 		inm->inm_sctimer = 0;
2566 		inm->inm_timer = 0;
2567 		IF_DRAIN(&inm->inm_scq);
2568 next:
2569 		INM_UNLOCK(inm);
2570 		IN_NEXT_MULTI(step, inm);
2571 	}
2572 	in_multihead_lock_done();
2573 
2574 	IGI_LOCK(igi);
2575 }
2576 
2577 /*
2578  * Update the Older Version Querier Present timers for a link.
2579  * See Section 7.2.1 of RFC 3376.
2580  */
2581 static void
igmp_v1v2_process_querier_timers(struct igmp_ifinfo * igi)2582 igmp_v1v2_process_querier_timers(struct igmp_ifinfo *igi)
2583 {
2584 	IGI_LOCK_ASSERT_HELD(igi);
2585 
2586 	if (igi->igi_v1_timer == 0 && igi->igi_v2_timer == 0) {
2587 		/*
2588 		 * IGMPv1 and IGMPv2 Querier Present timers expired.
2589 		 *
2590 		 * Revert to IGMPv3.
2591 		 */
2592 		if (igi->igi_version != IGMP_VERSION_3) {
2593 			IGMP_PRINTF(("%s: transition from v%d -> v%d "
2594 			    "on 0x%llx(%s)\n", __func__,
2595 			    igi->igi_version, IGMP_VERSION_3,
2596 			    (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2597 			    if_name(igi->igi_ifp)));
2598 			igi->igi_version = IGMP_VERSION_3;
2599 			IF_DRAIN(&igi->igi_v2q);
2600 		}
2601 	} else if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) {
2602 		/*
2603 		 * IGMPv1 Querier Present timer expired,
2604 		 * IGMPv2 Querier Present timer running.
2605 		 * If IGMPv2 was disabled since last timeout,
2606 		 * revert to IGMPv3.
2607 		 * If IGMPv2 is enabled, revert to IGMPv2.
2608 		 */
2609 		if (!igmp_v2enable) {
2610 			IGMP_PRINTF(("%s: transition from v%d -> v%d "
2611 			    "on 0x%llx(%s%d)\n", __func__,
2612 			    igi->igi_version, IGMP_VERSION_3,
2613 			    (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2614 			    igi->igi_ifp->if_name, igi->igi_ifp->if_unit));
2615 			igi->igi_v2_timer = 0;
2616 			igi->igi_version = IGMP_VERSION_3;
2617 			IF_DRAIN(&igi->igi_v2q);
2618 		} else {
2619 			--igi->igi_v2_timer;
2620 			if (igi->igi_version != IGMP_VERSION_2) {
2621 				IGMP_PRINTF(("%s: transition from v%d -> v%d "
2622 				    "on 0x%llx(%s)\n", __func__,
2623 				    igi->igi_version, IGMP_VERSION_2,
2624 				    (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2625 				    if_name(igi->igi_ifp)));
2626 				IF_DRAIN(&igi->igi_gq);
2627 				igmp_v3_cancel_link_timers(igi);
2628 				igi->igi_version = IGMP_VERSION_2;
2629 			}
2630 		}
2631 	} else if (igi->igi_v1_timer > 0) {
2632 		/*
2633 		 * IGMPv1 Querier Present timer running.
2634 		 * Stop IGMPv2 timer if running.
2635 		 *
2636 		 * If IGMPv1 was disabled since last timeout,
2637 		 * revert to IGMPv3.
2638 		 * If IGMPv1 is enabled, reset IGMPv2 timer if running.
2639 		 */
2640 		if (!igmp_v1enable) {
2641 			IGMP_PRINTF(("%s: transition from v%d -> v%d "
2642 			    "on 0x%llx(%s%d)\n", __func__,
2643 			    igi->igi_version, IGMP_VERSION_3,
2644 			    (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2645 			    igi->igi_ifp->if_name, igi->igi_ifp->if_unit));
2646 			igi->igi_v1_timer = 0;
2647 			igi->igi_version = IGMP_VERSION_3;
2648 			IF_DRAIN(&igi->igi_v2q);
2649 		} else {
2650 			--igi->igi_v1_timer;
2651 		}
2652 		if (igi->igi_v2_timer > 0) {
2653 			IGMP_PRINTF(("%s: cancel v2 timer on 0x%llx(%s%d)\n",
2654 			    __func__,
2655 			    (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2656 			    igi->igi_ifp->if_name, igi->igi_ifp->if_unit));
2657 			igi->igi_v2_timer = 0;
2658 		}
2659 	}
2660 }
2661 
2662 /*
2663  * Dispatch an IGMPv1/v2 host report or leave message.
2664  * These are always small enough to fit inside a single mbuf.
2665  */
2666 static int
igmp_v1v2_queue_report(struct in_multi * inm,const int type)2667 igmp_v1v2_queue_report(struct in_multi *inm, const int type)
2668 {
2669 	struct ifnet            *ifp;
2670 	struct igmp             *igmp;
2671 	struct ip               *ip;
2672 	struct mbuf             *m;
2673 	int                     error = 0;
2674 
2675 	INM_LOCK_ASSERT_HELD(inm);
2676 	IGI_LOCK_ASSERT_HELD(inm->inm_igi);
2677 
2678 	ifp = inm->inm_ifp;
2679 
2680 	MGETHDR(m, M_DONTWAIT, MT_DATA);
2681 	if (m == NULL) {
2682 		return ENOMEM;
2683 	}
2684 	MH_ALIGN(m, sizeof(struct ip) + sizeof(struct igmp));
2685 
2686 	m->m_pkthdr.len = sizeof(struct ip) + sizeof(struct igmp);
2687 
2688 	m->m_data += sizeof(struct ip);
2689 	m->m_len = sizeof(struct igmp);
2690 
2691 	igmp = mtod(m, struct igmp *);
2692 	igmp->igmp_type = (u_char)type;
2693 	igmp->igmp_code = 0;
2694 	igmp->igmp_group = inm->inm_addr;
2695 	igmp->igmp_cksum = 0;
2696 	igmp->igmp_cksum = in_cksum(m, sizeof(struct igmp));
2697 
2698 	m->m_data -= sizeof(struct ip);
2699 	m->m_len += sizeof(struct ip);
2700 
2701 	ip = mtod(m, struct ip *);
2702 	ip->ip_tos = 0;
2703 	ip->ip_len = sizeof(struct ip) + sizeof(struct igmp);
2704 	ip->ip_off = 0;
2705 	ip->ip_p = IPPROTO_IGMP;
2706 	ip->ip_src.s_addr = INADDR_ANY;
2707 
2708 	if (type == IGMP_HOST_LEAVE_MESSAGE) {
2709 		ip->ip_dst.s_addr = htonl(INADDR_ALLRTRS_GROUP);
2710 	} else {
2711 		ip->ip_dst = inm->inm_addr;
2712 	}
2713 
2714 	igmp_save_context(m, ifp);
2715 
2716 	m->m_flags |= M_IGMPV2;
2717 	if (inm->inm_igi->igi_flags & IGIF_LOOPBACK) {
2718 		m->m_flags |= M_IGMP_LOOP;
2719 	}
2720 
2721 	/*
2722 	 * Due to the fact that at this point we are possibly holding
2723 	 * in_multihead_lock in shared or exclusive mode, we can't call
2724 	 * igmp_sendpkt() here since that will eventually call ip_output(),
2725 	 * which will try to lock in_multihead_lock and cause a deadlock.
2726 	 * Instead we defer the work to the igmp_timeout() thread, thus
2727 	 * avoiding unlocking in_multihead_lock here.
2728 	 */
2729 	if (IF_QFULL(&inm->inm_igi->igi_v2q)) {
2730 		IGMP_PRINTF(("%s: v1/v2 outbound queue full\n", __func__));
2731 		error = ENOMEM;
2732 		m_freem(m);
2733 	} else {
2734 		IF_ENQUEUE(&inm->inm_igi->igi_v2q, m);
2735 		VERIFY(error == 0);
2736 	}
2737 	return error;
2738 }
2739 
2740 /*
2741  * Process a state change from the upper layer for the given IPv4 group.
2742  *
2743  * Each socket holds a reference on the in_multi in its own ip_moptions.
2744  * The socket layer will have made the necessary updates to the group
2745  * state, it is now up to IGMP to issue a state change report if there
2746  * has been any change between T0 (when the last state-change was issued)
2747  * and T1 (now).
2748  *
2749  * We use the IGMPv3 state machine at group level. The IGMP module
2750  * however makes the decision as to which IGMP protocol version to speak.
2751  * A state change *from* INCLUDE {} always means an initial join.
2752  * A state change *to* INCLUDE {} always means a final leave.
2753  *
2754  * FUTURE: If IGIF_V3LITE is enabled for this interface, then we can
2755  * save ourselves a bunch of work; any exclusive mode groups need not
2756  * compute source filter lists.
2757  */
2758 int
igmp_change_state(struct in_multi * inm,struct igmp_tparams * itp)2759 igmp_change_state(struct in_multi *inm, struct igmp_tparams *itp)
2760 {
2761 	struct igmp_ifinfo *igi;
2762 	struct ifnet *ifp;
2763 	int error = 0;
2764 
2765 	VERIFY(itp != NULL);
2766 	bzero(itp, sizeof(*itp));
2767 
2768 	INM_LOCK_ASSERT_HELD(inm);
2769 	VERIFY(inm->inm_igi != NULL);
2770 	IGI_LOCK_ASSERT_NOTHELD(inm->inm_igi);
2771 
2772 	/*
2773 	 * Try to detect if the upper layer just asked us to change state
2774 	 * for an interface which has now gone away.
2775 	 */
2776 	VERIFY(inm->inm_ifma != NULL);
2777 	ifp = inm->inm_ifma->ifma_ifp;
2778 	/*
2779 	 * Sanity check that netinet's notion of ifp is the same as net's.
2780 	 */
2781 	VERIFY(inm->inm_ifp == ifp);
2782 
2783 	igi = IGMP_IFINFO(ifp);
2784 	VERIFY(igi != NULL);
2785 
2786 	/*
2787 	 * If we detect a state transition to or from MCAST_UNDEFINED
2788 	 * for this group, then we are starting or finishing an IGMP
2789 	 * life cycle for this group.
2790 	 */
2791 	if (inm->inm_st[1].iss_fmode != inm->inm_st[0].iss_fmode) {
2792 		IGMP_PRINTF(("%s: inm transition %d -> %d\n", __func__,
2793 		    inm->inm_st[0].iss_fmode, inm->inm_st[1].iss_fmode));
2794 		if (inm->inm_st[0].iss_fmode == MCAST_UNDEFINED) {
2795 			IGMP_PRINTF(("%s: initial join\n", __func__));
2796 			error = igmp_initial_join(inm, igi, itp);
2797 			goto out;
2798 		} else if (inm->inm_st[1].iss_fmode == MCAST_UNDEFINED) {
2799 			IGMP_PRINTF(("%s: final leave\n", __func__));
2800 			igmp_final_leave(inm, igi, itp);
2801 			goto out;
2802 		}
2803 	} else {
2804 		IGMP_PRINTF(("%s: filter set change\n", __func__));
2805 	}
2806 
2807 	error = igmp_handle_state_change(inm, igi, itp);
2808 out:
2809 	return error;
2810 }
2811 
2812 /*
2813  * Perform the initial join for an IGMP group.
2814  *
2815  * When joining a group:
2816  *  If the group should have its IGMP traffic suppressed, do nothing.
2817  *  IGMPv1 starts sending IGMPv1 host membership reports.
2818  *  IGMPv2 starts sending IGMPv2 host membership reports.
2819  *  IGMPv3 will schedule an IGMPv3 state-change report containing the
2820  *  initial state of the membership.
2821  */
2822 static int
igmp_initial_join(struct in_multi * inm,struct igmp_ifinfo * igi,struct igmp_tparams * itp)2823 igmp_initial_join(struct in_multi *inm, struct igmp_ifinfo *igi,
2824     struct igmp_tparams *itp)
2825 {
2826 	struct ifnet            *ifp;
2827 	struct ifqueue          *ifq;
2828 	int                      error, retval, syncstates;
2829 
2830 	INM_LOCK_ASSERT_HELD(inm);
2831 	IGI_LOCK_ASSERT_NOTHELD(igi);
2832 	VERIFY(itp != NULL);
2833 
2834 	IGMP_INET_PRINTF(inm->inm_addr,
2835 	    ("%s: initial join %s on ifp 0x%llx(%s)\n", __func__,
2836 	    _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(inm->inm_ifp),
2837 	    if_name(inm->inm_ifp)));
2838 
2839 	error = 0;
2840 	syncstates = 1;
2841 
2842 	ifp = inm->inm_ifp;
2843 
2844 	IGI_LOCK(igi);
2845 	VERIFY(igi->igi_ifp == ifp);
2846 
2847 	/*
2848 	 * Groups joined on loopback or marked as 'not reported',
2849 	 * e.g. 224.0.0.1, enter the IGMP_SILENT_MEMBER state and
2850 	 * are never reported in any IGMP protocol exchanges.
2851 	 * All other groups enter the appropriate IGMP state machine
2852 	 * for the version in use on this link.
2853 	 * A link marked as IGIF_SILENT causes IGMP to be completely
2854 	 * disabled for the link.
2855 	 */
2856 	if ((ifp->if_flags & IFF_LOOPBACK) ||
2857 	    (igi->igi_flags & IGIF_SILENT) ||
2858 	    !igmp_isgroupreported(inm->inm_addr)) {
2859 		IGMP_PRINTF(("%s: not kicking state machine for silent group\n",
2860 		    __func__));
2861 		inm->inm_state = IGMP_SILENT_MEMBER;
2862 		inm->inm_timer = 0;
2863 	} else {
2864 		/*
2865 		 * Deal with overlapping in_multi lifecycle.
2866 		 * If this group was LEAVING, then make sure
2867 		 * we drop the reference we picked up to keep the
2868 		 * group around for the final INCLUDE {} enqueue.
2869 		 * Since we cannot call in_multi_detach() here,
2870 		 * defer this task to the timer routine.
2871 		 */
2872 		if (igi->igi_version == IGMP_VERSION_3 &&
2873 		    inm->inm_state == IGMP_LEAVING_MEMBER) {
2874 			VERIFY(inm->inm_nrelecnt != 0);
2875 			igmp_append_relq(igi, inm);
2876 		}
2877 
2878 		inm->inm_state = IGMP_REPORTING_MEMBER;
2879 
2880 		switch (igi->igi_version) {
2881 		case IGMP_VERSION_1:
2882 		case IGMP_VERSION_2:
2883 			inm->inm_state = IGMP_IDLE_MEMBER;
2884 			error = igmp_v1v2_queue_report(inm,
2885 			    (igi->igi_version == IGMP_VERSION_2) ?
2886 			    IGMP_v2_HOST_MEMBERSHIP_REPORT :
2887 			    IGMP_v1_HOST_MEMBERSHIP_REPORT);
2888 
2889 			INM_LOCK_ASSERT_HELD(inm);
2890 			IGI_LOCK_ASSERT_HELD(igi);
2891 
2892 			if (error == 0) {
2893 				inm->inm_timer =
2894 				    IGMP_RANDOM_DELAY(IGMP_V1V2_MAX_RI);
2895 				itp->cst = 1;
2896 			}
2897 			break;
2898 
2899 		case IGMP_VERSION_3:
2900 			/*
2901 			 * Defer update of T0 to T1, until the first copy
2902 			 * of the state change has been transmitted.
2903 			 */
2904 			syncstates = 0;
2905 
2906 			/*
2907 			 * Immediately enqueue a State-Change Report for
2908 			 * this interface, freeing any previous reports.
2909 			 * Don't kick the timers if there is nothing to do,
2910 			 * or if an error occurred.
2911 			 */
2912 			ifq = &inm->inm_scq;
2913 			IF_DRAIN(ifq);
2914 			retval = igmp_v3_enqueue_group_record(ifq, inm, 1,
2915 			    0, 0);
2916 			itp->cst = (ifq->ifq_len > 0);
2917 			IGMP_PRINTF(("%s: enqueue record = %d\n",
2918 			    __func__, retval));
2919 			if (retval <= 0) {
2920 				error = retval * -1;
2921 				break;
2922 			}
2923 
2924 			/*
2925 			 * Schedule transmission of pending state-change
2926 			 * report up to RV times for this link. The timer
2927 			 * will fire at the next igmp_timeout (1 second),
2928 			 * giving us an opportunity to merge the reports.
2929 			 */
2930 			if (igi->igi_flags & IGIF_LOOPBACK) {
2931 				inm->inm_scrv = 1;
2932 			} else {
2933 				VERIFY(igi->igi_rv > 1);
2934 				inm->inm_scrv = (uint16_t)igi->igi_rv;
2935 			}
2936 			inm->inm_sctimer = 1;
2937 			itp->sct = 1;
2938 
2939 			error = 0;
2940 			break;
2941 		}
2942 	}
2943 	IGI_UNLOCK(igi);
2944 
2945 	/*
2946 	 * Only update the T0 state if state change is atomic,
2947 	 * i.e. we don't need to wait for a timer to fire before we
2948 	 * can consider the state change to have been communicated.
2949 	 */
2950 	if (syncstates) {
2951 		inm_commit(inm);
2952 		IGMP_INET_PRINTF(inm->inm_addr,
2953 		    ("%s: T1 -> T0 for %s/%s\n", __func__,
2954 		    _igmp_inet_buf, if_name(inm->inm_ifp)));
2955 	}
2956 
2957 	return error;
2958 }
2959 
2960 /*
2961  * Issue an intermediate state change during the IGMP life-cycle.
2962  */
2963 static int
igmp_handle_state_change(struct in_multi * inm,struct igmp_ifinfo * igi,struct igmp_tparams * itp)2964 igmp_handle_state_change(struct in_multi *inm, struct igmp_ifinfo *igi,
2965     struct igmp_tparams *itp)
2966 {
2967 	struct ifnet            *ifp;
2968 	int                      retval = 0;
2969 
2970 	INM_LOCK_ASSERT_HELD(inm);
2971 	IGI_LOCK_ASSERT_NOTHELD(igi);
2972 	VERIFY(itp != NULL);
2973 
2974 	IGMP_INET_PRINTF(inm->inm_addr,
2975 	    ("%s: state change for %s on ifp 0x%llx(%s)\n", __func__,
2976 	    _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(inm->inm_ifp),
2977 	    if_name(inm->inm_ifp)));
2978 
2979 	ifp = inm->inm_ifp;
2980 
2981 	IGI_LOCK(igi);
2982 	VERIFY(igi->igi_ifp == ifp);
2983 
2984 	if ((ifp->if_flags & IFF_LOOPBACK) ||
2985 	    (igi->igi_flags & IGIF_SILENT) ||
2986 	    !igmp_isgroupreported(inm->inm_addr) ||
2987 	    (igi->igi_version != IGMP_VERSION_3)) {
2988 		IGI_UNLOCK(igi);
2989 		if (!igmp_isgroupreported(inm->inm_addr)) {
2990 			IGMP_PRINTF(("%s: not kicking state "
2991 			    "machine for silent group\n", __func__));
2992 		}
2993 		IGMP_PRINTF(("%s: nothing to do\n", __func__));
2994 		inm_commit(inm);
2995 		IGMP_INET_PRINTF(inm->inm_addr,
2996 		    ("%s: T1 -> T0 for %s/%s\n", __func__,
2997 		    _igmp_inet_buf, inm->inm_ifp->if_name));
2998 		goto done;
2999 	}
3000 
3001 	IF_DRAIN(&inm->inm_scq);
3002 
3003 	retval = igmp_v3_enqueue_group_record(&inm->inm_scq, inm, 1, 0, 0);
3004 	itp->cst = (inm->inm_scq.ifq_len > 0);
3005 	IGMP_PRINTF(("%s: enqueue record = %d\n", __func__, retval));
3006 	if (retval <= 0) {
3007 		IGI_UNLOCK(igi);
3008 		retval *= -1;
3009 		goto done;
3010 	}
3011 	/*
3012 	 * If record(s) were enqueued, start the state-change
3013 	 * report timer for this group.
3014 	 */
3015 	inm->inm_scrv = ((igi->igi_flags & IGIF_LOOPBACK) ? 1 : (uint16_t)igi->igi_rv);
3016 	inm->inm_sctimer = 1;
3017 	itp->sct = 1;
3018 	IGI_UNLOCK(igi);
3019 done:
3020 	return retval;
3021 }
3022 
3023 /*
3024  * Perform the final leave for an IGMP group.
3025  *
3026  * When leaving a group:
3027  *  IGMPv1 does nothing.
3028  *  IGMPv2 sends a host leave message, if and only if we are the reporter.
3029  *  IGMPv3 enqueues a state-change report containing a transition
3030  *  to INCLUDE {} for immediate transmission.
3031  */
3032 static void
igmp_final_leave(struct in_multi * inm,struct igmp_ifinfo * igi,struct igmp_tparams * itp)3033 igmp_final_leave(struct in_multi *inm, struct igmp_ifinfo *igi,
3034     struct igmp_tparams *itp)
3035 {
3036 	int syncstates = 1;
3037 	bool retried_already = false;
3038 
3039 	INM_LOCK_ASSERT_HELD(inm);
3040 	IGI_LOCK_ASSERT_NOTHELD(igi);
3041 	VERIFY(itp != NULL);
3042 
3043 	IGMP_INET_PRINTF(inm->inm_addr,
3044 	    ("%s: final leave %s on ifp 0x%llx(%s)\n", __func__,
3045 	    _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(inm->inm_ifp),
3046 	    if_name(inm->inm_ifp)));
3047 
3048 retry:
3049 	switch (inm->inm_state) {
3050 	case IGMP_NOT_MEMBER:
3051 	case IGMP_SILENT_MEMBER:
3052 	case IGMP_LEAVING_MEMBER:
3053 		/* Already leaving or left; do nothing. */
3054 		IGMP_PRINTF(("%s: not kicking state machine for silent group\n",
3055 		    __func__));
3056 		break;
3057 	case IGMP_REPORTING_MEMBER:
3058 	case IGMP_IDLE_MEMBER:
3059 	case IGMP_G_QUERY_PENDING_MEMBER:
3060 	case IGMP_SG_QUERY_PENDING_MEMBER:
3061 		IGI_LOCK(igi);
3062 		if (igi->igi_version == IGMP_VERSION_2) {
3063 			if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER ||
3064 			    inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) {
3065 				/*
3066 				 * We may be in the process of downgrading to
3067 				 * IGMPv2 but because we just grabbed the
3068 				 * igi_lock we may have lost the race.
3069 				 */
3070 				if (!retried_already) {
3071 					IGI_UNLOCK(igi);
3072 					retried_already = true;
3073 					goto retry;
3074 				} else {
3075 					/*
3076 					 * Proceed with leaving the group
3077 					 * as if it were IGMPv2 even though we
3078 					 * may have an inconsistent multicast state.
3079 					 */
3080 				}
3081 			}
3082 			/* scheduler timer if enqueue is successful */
3083 			itp->cst = (igmp_v1v2_queue_report(inm,
3084 			    IGMP_HOST_LEAVE_MESSAGE) == 0);
3085 
3086 			INM_LOCK_ASSERT_HELD(inm);
3087 			IGI_LOCK_ASSERT_HELD(igi);
3088 
3089 			inm->inm_state = IGMP_NOT_MEMBER;
3090 		} else if (igi->igi_version == IGMP_VERSION_3) {
3091 			/*
3092 			 * Stop group timer and all pending reports.
3093 			 * Immediately enqueue a state-change report
3094 			 * TO_IN {} to be sent on the next timeout,
3095 			 * giving us an opportunity to merge reports.
3096 			 */
3097 			IF_DRAIN(&inm->inm_scq);
3098 			inm->inm_timer = 0;
3099 			if (igi->igi_flags & IGIF_LOOPBACK) {
3100 				inm->inm_scrv = 1;
3101 			} else {
3102 				inm->inm_scrv = (uint16_t)igi->igi_rv;
3103 			}
3104 			IGMP_INET_PRINTF(inm->inm_addr,
3105 			    ("%s: Leaving %s/%s with %d "
3106 			    "pending retransmissions.\n", __func__,
3107 			    _igmp_inet_buf, if_name(inm->inm_ifp),
3108 			    inm->inm_scrv));
3109 			if (inm->inm_scrv == 0) {
3110 				inm->inm_state = IGMP_NOT_MEMBER;
3111 				inm->inm_sctimer = 0;
3112 			} else {
3113 				int retval;
3114 				/*
3115 				 * Stick around in the in_multihead list;
3116 				 * the final detach will be issued by
3117 				 * igmp_v3_process_group_timers() when
3118 				 * the retransmit timer expires.
3119 				 */
3120 				INM_ADDREF_LOCKED(inm);
3121 				VERIFY(inm->inm_debug & IFD_ATTACHED);
3122 				inm->inm_reqcnt++;
3123 				VERIFY(inm->inm_reqcnt >= 1);
3124 				inm->inm_nrelecnt++;
3125 				VERIFY(inm->inm_nrelecnt != 0);
3126 
3127 				retval = igmp_v3_enqueue_group_record(
3128 					&inm->inm_scq, inm, 1, 0, 0);
3129 				itp->cst = (inm->inm_scq.ifq_len > 0);
3130 				KASSERT(retval != 0,
3131 				    ("%s: enqueue record = %d\n", __func__,
3132 				    retval));
3133 
3134 				inm->inm_state = IGMP_LEAVING_MEMBER;
3135 				inm->inm_sctimer = 1;
3136 				itp->sct = 1;
3137 				syncstates = 0;
3138 			}
3139 		}
3140 		IGI_UNLOCK(igi);
3141 		break;
3142 	case IGMP_LAZY_MEMBER:
3143 	case IGMP_SLEEPING_MEMBER:
3144 	case IGMP_AWAKENING_MEMBER:
3145 		/* Our reports are suppressed; do nothing. */
3146 		break;
3147 	}
3148 
3149 	if (syncstates) {
3150 		inm_commit(inm);
3151 		IGMP_INET_PRINTF(inm->inm_addr,
3152 		    ("%s: T1 -> T0 for %s/%s\n", __func__,
3153 		    _igmp_inet_buf, if_name(inm->inm_ifp)));
3154 		inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
3155 		IGMP_INET_PRINTF(inm->inm_addr,
3156 		    ("%s: T1 now MCAST_UNDEFINED for %s/%s\n",
3157 		    __func__, _igmp_inet_buf, if_name(inm->inm_ifp)));
3158 	}
3159 }
3160 
3161 /*
3162  * Enqueue an IGMPv3 group record to the given output queue.
3163  *
3164  * XXX This function could do with having the allocation code
3165  * split out, and the multiple-tree-walks coalesced into a single
3166  * routine as has been done in igmp_v3_enqueue_filter_change().
3167  *
3168  * If is_state_change is zero, a current-state record is appended.
3169  * If is_state_change is non-zero, a state-change report is appended.
3170  *
3171  * If is_group_query is non-zero, an mbuf packet chain is allocated.
3172  * If is_group_query is zero, and if there is a packet with free space
3173  * at the tail of the queue, it will be appended to providing there
3174  * is enough free space.
3175  * Otherwise a new mbuf packet chain is allocated.
3176  *
3177  * If is_source_query is non-zero, each source is checked to see if
3178  * it was recorded for a Group-Source query, and will be omitted if
3179  * it is not both in-mode and recorded.
3180  *
3181  * The function will attempt to allocate leading space in the packet
3182  * for the IP/IGMP header to be prepended without fragmenting the chain.
3183  *
3184  * If successful the size of all data appended to the queue is returned,
3185  * otherwise an error code less than zero is returned, or zero if
3186  * no record(s) were appended.
3187  */
3188 static int
igmp_v3_enqueue_group_record(struct ifqueue * ifq,struct in_multi * inm,const int is_state_change,const int is_group_query,const int is_source_query)3189 igmp_v3_enqueue_group_record(struct ifqueue *ifq, struct in_multi *inm,
3190     const int is_state_change, const int is_group_query,
3191     const int is_source_query)
3192 {
3193 	struct igmp_grouprec     ig;
3194 	struct igmp_grouprec    *pig;
3195 	struct ifnet            *ifp;
3196 	struct ip_msource       *ims, *nims;
3197 	struct mbuf             *m0, *m, *md;
3198 	int                      error, is_filter_list_change;
3199 	int                      minrec0len, m0srcs, nbytes, off;
3200 	uint16_t                 msrcs;
3201 	int                      record_has_sources;
3202 	int                      now;
3203 	int                      type;
3204 	in_addr_t                naddr;
3205 	uint16_t                 mode;
3206 	u_int16_t                ig_numsrc;
3207 
3208 	INM_LOCK_ASSERT_HELD(inm);
3209 	IGI_LOCK_ASSERT_HELD(inm->inm_igi);
3210 
3211 	error = 0;
3212 	ifp = inm->inm_ifp;
3213 	is_filter_list_change = 0;
3214 	m = NULL;
3215 	m0 = NULL;
3216 	m0srcs = 0;
3217 	msrcs = 0;
3218 	nbytes = 0;
3219 	nims = NULL;
3220 	record_has_sources = 1;
3221 	pig = NULL;
3222 	type = IGMP_DO_NOTHING;
3223 	mode = inm->inm_st[1].iss_fmode;
3224 
3225 	/*
3226 	 * If we did not transition out of ASM mode during t0->t1,
3227 	 * and there are no source nodes to process, we can skip
3228 	 * the generation of source records.
3229 	 */
3230 	if (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0 &&
3231 	    inm->inm_nsrc == 0) {
3232 		record_has_sources = 0;
3233 	}
3234 
3235 	if (is_state_change) {
3236 		/*
3237 		 * Queue a state change record.
3238 		 * If the mode did not change, and there are non-ASM
3239 		 * listeners or source filters present,
3240 		 * we potentially need to issue two records for the group.
3241 		 * If we are transitioning to MCAST_UNDEFINED, we need
3242 		 * not send any sources.
3243 		 * If there are ASM listeners, and there was no filter
3244 		 * mode transition of any kind, do nothing.
3245 		 */
3246 		if (mode != inm->inm_st[0].iss_fmode) {
3247 			if (mode == MCAST_EXCLUDE) {
3248 				IGMP_PRINTF(("%s: change to EXCLUDE\n",
3249 				    __func__));
3250 				type = IGMP_CHANGE_TO_EXCLUDE_MODE;
3251 			} else {
3252 				IGMP_PRINTF(("%s: change to INCLUDE\n",
3253 				    __func__));
3254 				type = IGMP_CHANGE_TO_INCLUDE_MODE;
3255 				if (mode == MCAST_UNDEFINED) {
3256 					record_has_sources = 0;
3257 				}
3258 			}
3259 		} else {
3260 			if (record_has_sources) {
3261 				is_filter_list_change = 1;
3262 			} else {
3263 				type = IGMP_DO_NOTHING;
3264 			}
3265 		}
3266 	} else {
3267 		/*
3268 		 * Queue a current state record.
3269 		 */
3270 		if (mode == MCAST_EXCLUDE) {
3271 			type = IGMP_MODE_IS_EXCLUDE;
3272 		} else if (mode == MCAST_INCLUDE) {
3273 			type = IGMP_MODE_IS_INCLUDE;
3274 			VERIFY(inm->inm_st[1].iss_asm == 0);
3275 		}
3276 	}
3277 
3278 	/*
3279 	 * Generate the filter list changes using a separate function.
3280 	 */
3281 	if (is_filter_list_change) {
3282 		return igmp_v3_enqueue_filter_change(ifq, inm);
3283 	}
3284 
3285 	if (type == IGMP_DO_NOTHING) {
3286 		IGMP_INET_PRINTF(inm->inm_addr,
3287 		    ("%s: nothing to do for %s/%s\n",
3288 		    __func__, _igmp_inet_buf,
3289 		    if_name(inm->inm_ifp)));
3290 		return 0;
3291 	}
3292 
3293 	/*
3294 	 * If any sources are present, we must be able to fit at least
3295 	 * one in the trailing space of the tail packet's mbuf,
3296 	 * ideally more.
3297 	 */
3298 	minrec0len = sizeof(struct igmp_grouprec);
3299 	if (record_has_sources) {
3300 		minrec0len += sizeof(in_addr_t);
3301 	}
3302 
3303 	IGMP_INET_PRINTF(inm->inm_addr,
3304 	    ("%s: queueing %s for %s/%s\n", __func__,
3305 	    igmp_rec_type_to_str(type), _igmp_inet_buf,
3306 	    if_name(inm->inm_ifp)));
3307 
3308 	/*
3309 	 * Check if we have a packet in the tail of the queue for this
3310 	 * group into which the first group record for this group will fit.
3311 	 * Otherwise allocate a new packet.
3312 	 * Always allocate leading space for IP+RA_OPT+IGMP+REPORT.
3313 	 * Note: Group records for G/GSR query responses MUST be sent
3314 	 * in their own packet.
3315 	 */
3316 	m0 = ifq->ifq_tail;
3317 	if (!is_group_query &&
3318 	    m0 != NULL &&
3319 	    (m0->m_pkthdr.vt_nrecs + 1 <= IGMP_V3_REPORT_MAXRECS) &&
3320 	    (m0->m_pkthdr.len + minrec0len) <
3321 	    (ifp->if_mtu - IGMP_LEADINGSPACE)) {
3322 		m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
3323 		    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
3324 		m = m0;
3325 		IGMP_PRINTF(("%s: use existing packet\n", __func__));
3326 	} else {
3327 		if (IF_QFULL(ifq)) {
3328 			IGMP_PRINTF(("%s: outbound queue full\n", __func__));
3329 			return -ENOMEM;
3330 		}
3331 		m = NULL;
3332 		m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
3333 		    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
3334 		if (!is_state_change && !is_group_query) {
3335 			m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3336 			if (m) {
3337 				m->m_data += IGMP_LEADINGSPACE;
3338 			}
3339 		}
3340 		if (m == NULL) {
3341 			m = m_gethdr(M_DONTWAIT, MT_DATA);
3342 			if (m) {
3343 				MH_ALIGN(m, IGMP_LEADINGSPACE);
3344 			}
3345 		}
3346 		if (m == NULL) {
3347 			return -ENOMEM;
3348 		}
3349 
3350 		igmp_save_context(m, ifp);
3351 
3352 		IGMP_PRINTF(("%s: allocated first packet\n", __func__));
3353 	}
3354 
3355 	/*
3356 	 * Append group record.
3357 	 * If we have sources, we don't know how many yet.
3358 	 */
3359 	ig.ig_type = (u_char)type;
3360 	ig.ig_datalen = 0;
3361 	ig.ig_numsrc = 0;
3362 	ig.ig_group = inm->inm_addr;
3363 	if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) {
3364 		if (m != m0) {
3365 			m_freem(m);
3366 		}
3367 		IGMP_PRINTF(("%s: m_append() failed.\n", __func__));
3368 		return -ENOMEM;
3369 	}
3370 	nbytes += sizeof(struct igmp_grouprec);
3371 
3372 	/*
3373 	 * Append as many sources as will fit in the first packet.
3374 	 * If we are appending to a new packet, the chain allocation
3375 	 * may potentially use clusters; use m_getptr() in this case.
3376 	 * If we are appending to an existing packet, we need to obtain
3377 	 * a pointer to the group record after m_append(), in case a new
3378 	 * mbuf was allocated.
3379 	 * Only append sources which are in-mode at t1. If we are
3380 	 * transitioning to MCAST_UNDEFINED state on the group, do not
3381 	 * include source entries.
3382 	 * Only report recorded sources in our filter set when responding
3383 	 * to a group-source query.
3384 	 */
3385 	if (record_has_sources) {
3386 		if (m == m0) {
3387 			md = m_last(m);
3388 			pig = (struct igmp_grouprec *)(void *)
3389 			    (mtod(md, uint8_t *) + md->m_len - nbytes);
3390 		} else {
3391 			md = m_getptr(m, 0, &off);
3392 			pig = (struct igmp_grouprec *)(void *)
3393 			    (mtod(md, uint8_t *) + off);
3394 		}
3395 		msrcs = 0;
3396 		RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, nims) {
3397 #ifdef IGMP_DEBUG
3398 			char buf[MAX_IPv4_STR_LEN];
3399 
3400 			inet_ntop_haddr(ims->ims_haddr, buf, sizeof(buf));
3401 			IGMP_PRINTF(("%s: visit node %s\n", __func__, buf));
3402 #endif
3403 			now = ims_get_mode(inm, ims, 1);
3404 			IGMP_PRINTF(("%s: node is %d\n", __func__, now));
3405 			if ((now != mode) ||
3406 			    (now == mode && mode == MCAST_UNDEFINED)) {
3407 				IGMP_PRINTF(("%s: skip node\n", __func__));
3408 				continue;
3409 			}
3410 			if (is_source_query && ims->ims_stp == 0) {
3411 				IGMP_PRINTF(("%s: skip unrecorded node\n",
3412 				    __func__));
3413 				continue;
3414 			}
3415 			IGMP_PRINTF(("%s: append node\n", __func__));
3416 			naddr = htonl(ims->ims_haddr);
3417 			if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) {
3418 				if (m != m0) {
3419 					m_freem(m);
3420 				}
3421 				IGMP_PRINTF(("%s: m_append() failed.\n",
3422 				    __func__));
3423 				return -ENOMEM;
3424 			}
3425 			nbytes += sizeof(in_addr_t);
3426 			++msrcs;
3427 			if (msrcs == m0srcs) {
3428 				break;
3429 			}
3430 		}
3431 		IGMP_PRINTF(("%s: msrcs is %d this packet\n", __func__,
3432 		    msrcs));
3433 		ig_numsrc = htons(msrcs);
3434 		bcopy(&ig_numsrc, &pig->ig_numsrc, sizeof(ig_numsrc));
3435 		nbytes += (msrcs * sizeof(in_addr_t));
3436 	}
3437 
3438 	if (is_source_query && msrcs == 0) {
3439 		IGMP_PRINTF(("%s: no recorded sources to report\n", __func__));
3440 		if (m != m0) {
3441 			m_freem(m);
3442 		}
3443 		return 0;
3444 	}
3445 
3446 	/*
3447 	 * We are good to go with first packet.
3448 	 */
3449 	if (m != m0) {
3450 		IGMP_PRINTF(("%s: enqueueing first packet\n", __func__));
3451 		m->m_pkthdr.vt_nrecs = 1;
3452 		IF_ENQUEUE(ifq, m);
3453 	} else {
3454 		m->m_pkthdr.vt_nrecs++;
3455 	}
3456 	/*
3457 	 * No further work needed if no source list in packet(s).
3458 	 */
3459 	if (!record_has_sources) {
3460 		return nbytes;
3461 	}
3462 
3463 	/*
3464 	 * Whilst sources remain to be announced, we need to allocate
3465 	 * a new packet and fill out as many sources as will fit.
3466 	 * Always try for a cluster first.
3467 	 */
3468 	while (nims != NULL) {
3469 		if (IF_QFULL(ifq)) {
3470 			IGMP_PRINTF(("%s: outbound queue full\n", __func__));
3471 			return -ENOMEM;
3472 		}
3473 		m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3474 		if (m) {
3475 			m->m_data += IGMP_LEADINGSPACE;
3476 		}
3477 		if (m == NULL) {
3478 			m = m_gethdr(M_DONTWAIT, MT_DATA);
3479 			if (m) {
3480 				MH_ALIGN(m, IGMP_LEADINGSPACE);
3481 			}
3482 		}
3483 		if (m == NULL) {
3484 			return -ENOMEM;
3485 		}
3486 		igmp_save_context(m, ifp);
3487 		md = m_getptr(m, 0, &off);
3488 		pig = (struct igmp_grouprec *)(void *)
3489 		    (mtod(md, uint8_t *) + off);
3490 		IGMP_PRINTF(("%s: allocated next packet\n", __func__));
3491 
3492 		if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) {
3493 			if (m != m0) {
3494 				m_freem(m);
3495 			}
3496 			IGMP_PRINTF(("%s: m_append() failed.\n", __func__));
3497 			return -ENOMEM;
3498 		}
3499 		m->m_pkthdr.vt_nrecs = 1;
3500 		nbytes += sizeof(struct igmp_grouprec);
3501 
3502 		m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
3503 		    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
3504 
3505 		msrcs = 0;
3506 		RB_FOREACH_FROM(ims, ip_msource_tree, nims) {
3507 #ifdef IGMP_DEBUG
3508 			char buf[MAX_IPv4_STR_LEN];
3509 
3510 			inet_ntop_haddr(ims->ims_haddr, buf, sizeof(buf));
3511 			IGMP_PRINTF(("%s: visit node %s\n", __func__, buf));
3512 #endif
3513 			now = ims_get_mode(inm, ims, 1);
3514 			if ((now != mode) ||
3515 			    (now == mode && mode == MCAST_UNDEFINED)) {
3516 				IGMP_PRINTF(("%s: skip node\n", __func__));
3517 				continue;
3518 			}
3519 			if (is_source_query && ims->ims_stp == 0) {
3520 				IGMP_PRINTF(("%s: skip unrecorded node\n",
3521 				    __func__));
3522 				continue;
3523 			}
3524 			IGMP_PRINTF(("%s: append node\n", __func__));
3525 			naddr = htonl(ims->ims_haddr);
3526 			if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) {
3527 				if (m != m0) {
3528 					m_freem(m);
3529 				}
3530 				IGMP_PRINTF(("%s: m_append() failed.\n",
3531 				    __func__));
3532 				return -ENOMEM;
3533 			}
3534 			++msrcs;
3535 			if (msrcs == m0srcs) {
3536 				break;
3537 			}
3538 		}
3539 		ig_numsrc = htons(msrcs);
3540 		bcopy(&ig_numsrc, &pig->ig_numsrc, sizeof(ig_numsrc));
3541 		nbytes += (msrcs * sizeof(in_addr_t));
3542 
3543 		IGMP_PRINTF(("%s: enqueueing next packet\n", __func__));
3544 		IF_ENQUEUE(ifq, m);
3545 	}
3546 
3547 	return nbytes;
3548 }
3549 
3550 /*
3551  * Type used to mark record pass completion.
3552  * We exploit the fact we can cast to this easily from the
3553  * current filter modes on each ip_msource node.
3554  */
3555 typedef enum {
3556 	REC_NONE = 0x00,        /* MCAST_UNDEFINED */
3557 	REC_ALLOW = 0x01,       /* MCAST_INCLUDE */
3558 	REC_BLOCK = 0x02,       /* MCAST_EXCLUDE */
3559 	REC_FULL = REC_ALLOW | REC_BLOCK
3560 } rectype_t;
3561 
3562 /*
3563  * Enqueue an IGMPv3 filter list change to the given output queue.
3564  *
3565  * Source list filter state is held in an RB-tree. When the filter list
3566  * for a group is changed without changing its mode, we need to compute
3567  * the deltas between T0 and T1 for each source in the filter set,
3568  * and enqueue the appropriate ALLOW_NEW/BLOCK_OLD records.
3569  *
3570  * As we may potentially queue two record types, and the entire R-B tree
3571  * needs to be walked at once, we break this out into its own function
3572  * so we can generate a tightly packed queue of packets.
3573  *
3574  * XXX This could be written to only use one tree walk, although that makes
3575  * serializing into the mbuf chains a bit harder. For now we do two walks
3576  * which makes things easier on us, and it may or may not be harder on
3577  * the L2 cache.
3578  *
3579  * If successful the size of all data appended to the queue is returned,
3580  * otherwise an error code less than zero is returned, or zero if
3581  * no record(s) were appended.
3582  */
3583 static int
igmp_v3_enqueue_filter_change(struct ifqueue * ifq,struct in_multi * inm)3584 igmp_v3_enqueue_filter_change(struct ifqueue *ifq, struct in_multi *inm)
3585 {
3586 	static const int MINRECLEN =
3587 	    sizeof(struct igmp_grouprec) + sizeof(in_addr_t);
3588 	struct ifnet            *ifp;
3589 	struct igmp_grouprec     ig;
3590 	struct igmp_grouprec    *pig;
3591 	struct ip_msource       *ims, *nims;
3592 	struct mbuf             *m, *m0, *md;
3593 	in_addr_t                naddr;
3594 	int                      m0srcs, nbytes, npbytes, off, schanged;
3595 	uint16_t                 rsrcs;
3596 	int                      nallow, nblock;
3597 	uint16_t                 mode;
3598 	uint8_t                  now, then;
3599 	rectype_t                crt, drt, nrt;
3600 	u_int16_t                ig_numsrc;
3601 
3602 	INM_LOCK_ASSERT_HELD(inm);
3603 
3604 	if (inm->inm_nsrc == 0 ||
3605 	    (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0)) {
3606 		return 0;
3607 	}
3608 
3609 	ifp = inm->inm_ifp;                     /* interface */
3610 	mode = inm->inm_st[1].iss_fmode;        /* filter mode at t1 */
3611 	crt = REC_NONE; /* current group record type */
3612 	drt = REC_NONE; /* mask of completed group record types */
3613 	nrt = REC_NONE; /* record type for current node */
3614 	m0srcs = 0;     /* # source which will fit in current mbuf chain */
3615 	nbytes = 0;     /* # of bytes appended to group's state-change queue */
3616 	npbytes = 0;    /* # of bytes appended this packet */
3617 	rsrcs = 0;      /* # sources encoded in current record */
3618 	schanged = 0;   /* # nodes encoded in overall filter change */
3619 	nallow = 0;     /* # of source entries in ALLOW_NEW */
3620 	nblock = 0;     /* # of source entries in BLOCK_OLD */
3621 	nims = NULL;    /* next tree node pointer */
3622 
3623 	/*
3624 	 * For each possible filter record mode.
3625 	 * The first kind of source we encounter tells us which
3626 	 * is the first kind of record we start appending.
3627 	 * If a node transitioned to UNDEFINED at t1, its mode is treated
3628 	 * as the inverse of the group's filter mode.
3629 	 */
3630 	while (drt != REC_FULL) {
3631 		do {
3632 			m0 = ifq->ifq_tail;
3633 			if (m0 != NULL &&
3634 			    (m0->m_pkthdr.vt_nrecs + 1 <=
3635 			    IGMP_V3_REPORT_MAXRECS) &&
3636 			    (m0->m_pkthdr.len + MINRECLEN) <
3637 			    (ifp->if_mtu - IGMP_LEADINGSPACE)) {
3638 				m = m0;
3639 				m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
3640 				    sizeof(struct igmp_grouprec)) /
3641 				    sizeof(in_addr_t);
3642 				IGMP_PRINTF(("%s: use previous packet\n",
3643 				    __func__));
3644 			} else {
3645 				m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3646 				if (m) {
3647 					m->m_data += IGMP_LEADINGSPACE;
3648 				}
3649 				if (m == NULL) {
3650 					m = m_gethdr(M_DONTWAIT, MT_DATA);
3651 					if (m) {
3652 						MH_ALIGN(m, IGMP_LEADINGSPACE);
3653 					}
3654 				}
3655 				if (m == NULL) {
3656 					IGMP_PRINTF(("%s: m_get*() failed\n",
3657 					    __func__));
3658 					return -ENOMEM;
3659 				}
3660 				m->m_pkthdr.vt_nrecs = 0;
3661 				igmp_save_context(m, ifp);
3662 				m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
3663 				    sizeof(struct igmp_grouprec)) /
3664 				    sizeof(in_addr_t);
3665 				npbytes = 0;
3666 				IGMP_PRINTF(("%s: allocated new packet\n",
3667 				    __func__));
3668 			}
3669 			/*
3670 			 * Append the IGMP group record header to the
3671 			 * current packet's data area.
3672 			 * Recalculate pointer to free space for next
3673 			 * group record, in case m_append() allocated
3674 			 * a new mbuf or cluster.
3675 			 */
3676 			memset(&ig, 0, sizeof(ig));
3677 			ig.ig_group = inm->inm_addr;
3678 			if (!m_append(m, sizeof(ig), (void *)&ig)) {
3679 				if (m != m0) {
3680 					m_freem(m);
3681 				}
3682 				IGMP_PRINTF(("%s: m_append() failed\n",
3683 				    __func__));
3684 				return -ENOMEM;
3685 			}
3686 			npbytes += sizeof(struct igmp_grouprec);
3687 			if (m != m0) {
3688 				/* new packet; offset in c hain */
3689 				md = m_getptr(m, npbytes -
3690 				    sizeof(struct igmp_grouprec), &off);
3691 				pig = (struct igmp_grouprec *)(void *)(mtod(md,
3692 				    uint8_t *) + off);
3693 			} else {
3694 				/* current packet; offset from last append */
3695 				md = m_last(m);
3696 				pig = (struct igmp_grouprec *)(void *)(mtod(md,
3697 				    uint8_t *) + md->m_len -
3698 				    sizeof(struct igmp_grouprec));
3699 			}
3700 			/*
3701 			 * Begin walking the tree for this record type
3702 			 * pass, or continue from where we left off
3703 			 * previously if we had to allocate a new packet.
3704 			 * Only report deltas in-mode at t1.
3705 			 * We need not report included sources as allowed
3706 			 * if we are in inclusive mode on the group,
3707 			 * however the converse is not true.
3708 			 */
3709 			rsrcs = 0;
3710 			if (nims == NULL) {
3711 				nims = RB_MIN(ip_msource_tree, &inm->inm_srcs);
3712 			}
3713 			RB_FOREACH_FROM(ims, ip_msource_tree, nims) {
3714 #ifdef IGMP_DEBUG
3715 				char buf[MAX_IPv4_STR_LEN];
3716 
3717 				inet_ntop_haddr(ims->ims_haddr, buf, sizeof(buf));
3718 				IGMP_PRINTF(("%s: visit node %s\n", __func__, buf));
3719 #endif
3720 				now = ims_get_mode(inm, ims, 1);
3721 				then = ims_get_mode(inm, ims, 0);
3722 				IGMP_PRINTF(("%s: mode: t0 %d, t1 %d\n",
3723 				    __func__, then, now));
3724 				if (now == then) {
3725 					IGMP_PRINTF(("%s: skip unchanged\n",
3726 					    __func__));
3727 					continue;
3728 				}
3729 				if (mode == MCAST_EXCLUDE &&
3730 				    now == MCAST_INCLUDE) {
3731 					IGMP_PRINTF(("%s: skip IN src on EX "
3732 					    "group\n", __func__));
3733 					continue;
3734 				}
3735 				nrt = (rectype_t)now;
3736 				if (nrt == REC_NONE) {
3737 					nrt = (rectype_t)(~mode & REC_FULL);
3738 				}
3739 				if (schanged++ == 0) {
3740 					crt = nrt;
3741 				} else if (crt != nrt) {
3742 					continue;
3743 				}
3744 				naddr = htonl(ims->ims_haddr);
3745 				if (!m_append(m, sizeof(in_addr_t),
3746 				    (void *)&naddr)) {
3747 					if (m != m0) {
3748 						m_freem(m);
3749 					}
3750 					IGMP_PRINTF(("%s: m_append() failed\n",
3751 					    __func__));
3752 					return -ENOMEM;
3753 				}
3754 				nallow += !!(crt == REC_ALLOW);
3755 				nblock += !!(crt == REC_BLOCK);
3756 				if (++rsrcs == m0srcs) {
3757 					break;
3758 				}
3759 			}
3760 			/*
3761 			 * If we did not append any tree nodes on this
3762 			 * pass, back out of allocations.
3763 			 */
3764 			if (rsrcs == 0) {
3765 				npbytes -= sizeof(struct igmp_grouprec);
3766 				if (m != m0) {
3767 					IGMP_PRINTF(("%s: m_free(m)\n",
3768 					    __func__));
3769 					m_freem(m);
3770 				} else {
3771 					IGMP_PRINTF(("%s: m_adj(m, -ig)\n",
3772 					    __func__));
3773 					m_adj(m, -((int)sizeof(
3774 						    struct igmp_grouprec)));
3775 				}
3776 				continue;
3777 			}
3778 			npbytes += (rsrcs * sizeof(in_addr_t));
3779 			if (crt == REC_ALLOW) {
3780 				pig->ig_type = IGMP_ALLOW_NEW_SOURCES;
3781 			} else if (crt == REC_BLOCK) {
3782 				pig->ig_type = IGMP_BLOCK_OLD_SOURCES;
3783 			}
3784 			ig_numsrc = htons(rsrcs);
3785 			bcopy(&ig_numsrc, &pig->ig_numsrc, sizeof(ig_numsrc));
3786 			/*
3787 			 * Count the new group record, and enqueue this
3788 			 * packet if it wasn't already queued.
3789 			 */
3790 			m->m_pkthdr.vt_nrecs++;
3791 			if (m != m0) {
3792 				IF_ENQUEUE(ifq, m);
3793 			}
3794 			nbytes += npbytes;
3795 		} while (nims != NULL);
3796 		drt |= crt;
3797 		crt = (~crt & REC_FULL);
3798 	}
3799 
3800 	IGMP_PRINTF(("%s: queued %d ALLOW_NEW, %d BLOCK_OLD\n", __func__,
3801 	    nallow, nblock));
3802 
3803 	return nbytes;
3804 }
3805 
3806 static int
igmp_v3_merge_state_changes(struct in_multi * inm,struct ifqueue * ifscq)3807 igmp_v3_merge_state_changes(struct in_multi *inm, struct ifqueue *ifscq)
3808 {
3809 	struct ifqueue  *gq;
3810 	struct mbuf     *m;             /* pending state-change */
3811 	struct mbuf     *m0;            /* copy of pending state-change */
3812 	struct mbuf     *mt;            /* last state-change in packet */
3813 	struct mbuf     *n;
3814 	int              docopy, domerge;
3815 	u_int            recslen;
3816 
3817 	INM_LOCK_ASSERT_HELD(inm);
3818 
3819 	docopy = 0;
3820 	domerge = 0;
3821 	recslen = 0;
3822 
3823 	/*
3824 	 * If there are further pending retransmissions, make a writable
3825 	 * copy of each queued state-change message before merging.
3826 	 */
3827 	if (inm->inm_scrv > 0) {
3828 		docopy = 1;
3829 	}
3830 
3831 	gq = &inm->inm_scq;
3832 #ifdef IGMP_DEBUG
3833 	if (gq->ifq_head == NULL) {
3834 		IGMP_PRINTF(("%s: WARNING: queue for inm 0x%llx is empty\n",
3835 		    __func__, (uint64_t)VM_KERNEL_ADDRPERM(inm)));
3836 	}
3837 #endif
3838 
3839 	/*
3840 	 * Use IF_REMQUEUE() instead of IF_DEQUEUE() below, since the
3841 	 * packet might not always be at the head of the ifqueue.
3842 	 */
3843 	m = gq->ifq_head;
3844 	while (m != NULL) {
3845 		/*
3846 		 * Only merge the report into the current packet if
3847 		 * there is sufficient space to do so; an IGMPv3 report
3848 		 * packet may only contain 65,535 group records.
3849 		 * Always use a simple mbuf chain concatentation to do this,
3850 		 * as large state changes for single groups may have
3851 		 * allocated clusters.
3852 		 */
3853 		domerge = 0;
3854 		mt = ifscq->ifq_tail;
3855 		if (mt != NULL) {
3856 			recslen = m_length(m);
3857 
3858 			if ((mt->m_pkthdr.vt_nrecs +
3859 			    m->m_pkthdr.vt_nrecs <=
3860 			    IGMP_V3_REPORT_MAXRECS) &&
3861 			    (mt->m_pkthdr.len + recslen <=
3862 			    (inm->inm_ifp->if_mtu - IGMP_LEADINGSPACE))) {
3863 				domerge = 1;
3864 			}
3865 		}
3866 
3867 		if (!domerge && IF_QFULL(gq)) {
3868 			IGMP_PRINTF(("%s: outbound queue full, skipping whole "
3869 			    "packet 0x%llx\n", __func__,
3870 			    (uint64_t)VM_KERNEL_ADDRPERM(m)));
3871 			n = m->m_nextpkt;
3872 			if (!docopy) {
3873 				IF_REMQUEUE(gq, m);
3874 				m_freem(m);
3875 			}
3876 			m = n;
3877 			continue;
3878 		}
3879 
3880 		if (!docopy) {
3881 			IGMP_PRINTF(("%s: dequeueing 0x%llx\n", __func__,
3882 			    (uint64_t)VM_KERNEL_ADDRPERM(m)));
3883 			n = m->m_nextpkt;
3884 			IF_REMQUEUE(gq, m);
3885 			m0 = m;
3886 			m = n;
3887 		} else {
3888 			IGMP_PRINTF(("%s: copying 0x%llx\n", __func__,
3889 			    (uint64_t)VM_KERNEL_ADDRPERM(m)));
3890 			m0 = m_dup(m, M_NOWAIT);
3891 			if (m0 == NULL) {
3892 				return ENOMEM;
3893 			}
3894 			m0->m_nextpkt = NULL;
3895 			m = m->m_nextpkt;
3896 		}
3897 
3898 		if (!domerge) {
3899 			IGMP_PRINTF(("%s: queueing 0x%llx to ifscq 0x%llx)\n",
3900 			    __func__, (uint64_t)VM_KERNEL_ADDRPERM(m0),
3901 			    (uint64_t)VM_KERNEL_ADDRPERM(ifscq)));
3902 			IF_ENQUEUE(ifscq, m0);
3903 		} else {
3904 			struct mbuf *mtl;       /* last mbuf of packet mt */
3905 
3906 			IGMP_PRINTF(("%s: merging 0x%llx with ifscq tail "
3907 			    "0x%llx)\n", __func__,
3908 			    (uint64_t)VM_KERNEL_ADDRPERM(m0),
3909 			    (uint64_t)VM_KERNEL_ADDRPERM(mt)));
3910 
3911 			mtl = m_last(mt);
3912 			m0->m_flags &= ~M_PKTHDR;
3913 			mt->m_pkthdr.len += recslen;
3914 			mt->m_pkthdr.vt_nrecs +=
3915 			    m0->m_pkthdr.vt_nrecs;
3916 
3917 			mtl->m_next = m0;
3918 		}
3919 	}
3920 
3921 	return 0;
3922 }
3923 
3924 /*
3925  * Respond to a pending IGMPv3 General Query.
3926  */
3927 static uint32_t
igmp_v3_dispatch_general_query(struct igmp_ifinfo * igi)3928 igmp_v3_dispatch_general_query(struct igmp_ifinfo *igi)
3929 {
3930 	struct ifnet            *ifp;
3931 	struct in_multi         *inm;
3932 	struct in_multistep     step;
3933 	int                      retval, loop;
3934 
3935 	IGI_LOCK_ASSERT_HELD(igi);
3936 
3937 	VERIFY(igi->igi_version == IGMP_VERSION_3);
3938 
3939 	ifp = igi->igi_ifp;
3940 	IGI_UNLOCK(igi);
3941 
3942 	in_multihead_lock_shared();
3943 	IN_FIRST_MULTI(step, inm);
3944 	while (inm != NULL) {
3945 		INM_LOCK(inm);
3946 		if (inm->inm_ifp != ifp) {
3947 			goto next;
3948 		}
3949 
3950 		switch (inm->inm_state) {
3951 		case IGMP_NOT_MEMBER:
3952 		case IGMP_SILENT_MEMBER:
3953 			break;
3954 		case IGMP_REPORTING_MEMBER:
3955 		case IGMP_IDLE_MEMBER:
3956 		case IGMP_LAZY_MEMBER:
3957 		case IGMP_SLEEPING_MEMBER:
3958 		case IGMP_AWAKENING_MEMBER:
3959 			inm->inm_state = IGMP_REPORTING_MEMBER;
3960 			IGI_LOCK(igi);
3961 			retval = igmp_v3_enqueue_group_record(&igi->igi_gq,
3962 			    inm, 0, 0, 0);
3963 			IGI_UNLOCK(igi);
3964 			IGMP_PRINTF(("%s: enqueue record = %d\n",
3965 			    __func__, retval));
3966 			break;
3967 		case IGMP_G_QUERY_PENDING_MEMBER:
3968 		case IGMP_SG_QUERY_PENDING_MEMBER:
3969 		case IGMP_LEAVING_MEMBER:
3970 			break;
3971 		}
3972 next:
3973 		INM_UNLOCK(inm);
3974 		IN_NEXT_MULTI(step, inm);
3975 	}
3976 	in_multihead_lock_done();
3977 
3978 	IGI_LOCK(igi);
3979 	loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
3980 	igmp_dispatch_queue(igi, &igi->igi_gq, IGMP_MAX_RESPONSE_BURST,
3981 	    loop);
3982 	IGI_LOCK_ASSERT_HELD(igi);
3983 	/*
3984 	 * Slew transmission of bursts over 1 second intervals.
3985 	 */
3986 	if (igi->igi_gq.ifq_head != NULL) {
3987 		igi->igi_v3_timer = 1 + IGMP_RANDOM_DELAY(
3988 			IGMP_RESPONSE_BURST_INTERVAL);
3989 	}
3990 
3991 	return igi->igi_v3_timer;
3992 }
3993 
3994 /*
3995  * Transmit the next pending IGMP message in the output queue.
3996  *
3997  * Must not be called with inm_lock or igi_lock held.
3998  */
3999 static void
igmp_sendpkt(struct mbuf * m)4000 igmp_sendpkt(struct mbuf *m)
4001 {
4002 	struct ip_moptions      *imo;
4003 	struct mbuf             *ipopts, *m0;
4004 	int                     error;
4005 	struct route            ro;
4006 	struct ifnet            *ifp;
4007 
4008 	IGMP_PRINTF(("%s: transmit 0x%llx\n", __func__,
4009 	    (uint64_t)VM_KERNEL_ADDRPERM(m)));
4010 
4011 	ifp = igmp_restore_context(m);
4012 	/*
4013 	 * Check if the ifnet is still attached.
4014 	 */
4015 	if (ifp == NULL || !ifnet_is_attached(ifp, 0)) {
4016 		IGMP_PRINTF(("%s: dropped 0x%llx as ifp went away.\n",
4017 		    __func__, (uint64_t)VM_KERNEL_ADDRPERM(m)));
4018 		m_freem(m);
4019 		OSAddAtomic(1, &ipstat.ips_noroute);
4020 		return;
4021 	}
4022 
4023 	ipopts = igmp_sendra ? m_raopt : NULL;
4024 
4025 	imo = ip_allocmoptions(Z_WAITOK);
4026 	if (imo == NULL) {
4027 		m_freem(m);
4028 		return;
4029 	}
4030 
4031 	imo->imo_multicast_ttl  = 1;
4032 	imo->imo_multicast_vif  = -1;
4033 	imo->imo_multicast_loop = 0;
4034 
4035 	/*
4036 	 * If the user requested that IGMP traffic be explicitly
4037 	 * redirected to the loopback interface (e.g. they are running a
4038 	 * MANET interface and the routing protocol needs to see the
4039 	 * updates), handle this now.
4040 	 */
4041 	if (m->m_flags & M_IGMP_LOOP) {
4042 		imo->imo_multicast_ifp = lo_ifp;
4043 	} else {
4044 		imo->imo_multicast_ifp = ifp;
4045 	}
4046 
4047 	if (m->m_flags & M_IGMPV2) {
4048 		m0 = m;
4049 	} else {
4050 		m0 = igmp_v3_encap_report(ifp, m);
4051 		if (m0 == NULL) {
4052 			/*
4053 			 * If igmp_v3_encap_report() failed, then M_PREPEND()
4054 			 * already freed the original mbuf chain.
4055 			 * This means that we don't have to m_freem(m) here.
4056 			 */
4057 			IGMP_PRINTF(("%s: dropped 0x%llx\n", __func__,
4058 			    (uint64_t)VM_KERNEL_ADDRPERM(m)));
4059 			IMO_REMREF(imo);
4060 			os_atomic_inc(&ipstat.ips_odropped, relaxed);
4061 			return;
4062 		}
4063 	}
4064 
4065 	igmp_scrub_context(m0);
4066 	m->m_flags &= ~(M_PROTOFLAGS | M_IGMP_LOOP);
4067 	m0->m_pkthdr.rcvif = lo_ifp;
4068 
4069 	if (ifp->if_eflags & IFEF_TXSTART) {
4070 		/*
4071 		 * Use control service class if the interface supports
4072 		 * transmit-start model.
4073 		 */
4074 		(void) m_set_service_class(m0, MBUF_SC_CTL);
4075 	}
4076 	bzero(&ro, sizeof(ro));
4077 	error = ip_output(m0, ipopts, &ro, 0, imo, NULL);
4078 	ROUTE_RELEASE(&ro);
4079 
4080 	IMO_REMREF(imo);
4081 
4082 	if (error) {
4083 		IGMP_PRINTF(("%s: ip_output(0x%llx) = %d\n", __func__,
4084 		    (uint64_t)VM_KERNEL_ADDRPERM(m0), error));
4085 		return;
4086 	}
4087 
4088 	IGMPSTAT_INC(igps_snd_reports);
4089 	OIGMPSTAT_INC(igps_snd_reports);
4090 }
4091 /*
4092  * Encapsulate an IGMPv3 report.
4093  *
4094  * The internal mbuf flag M_IGMPV3_HDR is used to indicate that the mbuf
4095  * chain has already had its IP/IGMPv3 header prepended. In this case
4096  * the function will not attempt to prepend; the lengths and checksums
4097  * will however be re-computed.
4098  *
4099  * Returns a pointer to the new mbuf chain head, or NULL if the
4100  * allocation failed.
4101  */
4102 static struct mbuf *
igmp_v3_encap_report(struct ifnet * ifp,struct mbuf * m)4103 igmp_v3_encap_report(struct ifnet *ifp, struct mbuf *m)
4104 {
4105 	struct igmp_report      *igmp;
4106 	struct ip               *ip;
4107 	unsigned int             hdrlen, igmpreclen;
4108 
4109 	VERIFY((m->m_flags & M_PKTHDR));
4110 
4111 	igmpreclen = m_length(m);
4112 	hdrlen = sizeof(struct ip) + sizeof(struct igmp_report);
4113 
4114 	if (m->m_flags & M_IGMPV3_HDR) {
4115 		igmpreclen -= hdrlen;
4116 	} else {
4117 		M_PREPEND(m, hdrlen, M_DONTWAIT, 1);
4118 		if (m == NULL) {
4119 			return NULL;
4120 		}
4121 		m->m_flags |= M_IGMPV3_HDR;
4122 	}
4123 	if (hdrlen + igmpreclen > USHRT_MAX) {
4124 		IGMP_PRINTF(("%s: invalid length %d\n", __func__, hdrlen + igmpreclen));
4125 		m_freem(m);
4126 		return NULL;
4127 	}
4128 
4129 
4130 	IGMP_PRINTF(("%s: igmpreclen is %d\n", __func__, igmpreclen));
4131 
4132 	m->m_data += sizeof(struct ip);
4133 	m->m_len -= sizeof(struct ip);
4134 
4135 	igmp = mtod(m, struct igmp_report *);
4136 	igmp->ir_type = IGMP_v3_HOST_MEMBERSHIP_REPORT;
4137 	igmp->ir_rsv1 = 0;
4138 	igmp->ir_rsv2 = 0;
4139 	igmp->ir_numgrps = htons(m->m_pkthdr.vt_nrecs);
4140 	igmp->ir_cksum = 0;
4141 	igmp->ir_cksum = in_cksum(m, sizeof(struct igmp_report) + igmpreclen);
4142 	m->m_pkthdr.vt_nrecs = 0;
4143 
4144 	m->m_data -= sizeof(struct ip);
4145 	m->m_len += sizeof(struct ip);
4146 
4147 	ip = mtod(m, struct ip *);
4148 	ip->ip_tos = IPTOS_PREC_INTERNETCONTROL;
4149 	ip->ip_len = (u_short)(hdrlen + igmpreclen);
4150 	ip->ip_off = IP_DF;
4151 	ip->ip_p = IPPROTO_IGMP;
4152 	ip->ip_sum = 0;
4153 
4154 	ip->ip_src.s_addr = INADDR_ANY;
4155 
4156 	if (m->m_flags & M_IGMP_LOOP) {
4157 		struct in_ifaddr *ia;
4158 
4159 		IFP_TO_IA(ifp, ia);
4160 		if (ia != NULL) {
4161 			IFA_LOCK(&ia->ia_ifa);
4162 			ip->ip_src = ia->ia_addr.sin_addr;
4163 			IFA_UNLOCK(&ia->ia_ifa);
4164 			IFA_REMREF(&ia->ia_ifa);
4165 		}
4166 	}
4167 
4168 	ip->ip_dst.s_addr = htonl(INADDR_ALLRPTS_GROUP);
4169 
4170 	return m;
4171 }
4172 
4173 #ifdef IGMP_DEBUG
4174 static const char *
igmp_rec_type_to_str(const int type)4175 igmp_rec_type_to_str(const int type)
4176 {
4177 	switch (type) {
4178 	case IGMP_CHANGE_TO_EXCLUDE_MODE:
4179 		return "TO_EX";
4180 	case IGMP_CHANGE_TO_INCLUDE_MODE:
4181 		return "TO_IN";
4182 	case IGMP_MODE_IS_EXCLUDE:
4183 		return "MODE_EX";
4184 	case IGMP_MODE_IS_INCLUDE:
4185 		return "MODE_IN";
4186 	case IGMP_ALLOW_NEW_SOURCES:
4187 		return "ALLOW_NEW";
4188 	case IGMP_BLOCK_OLD_SOURCES:
4189 		return "BLOCK_OLD";
4190 	default:
4191 		break;
4192 	}
4193 	return "unknown";
4194 }
4195 #endif
4196 
4197 void
igmp_init(struct protosw * pp,struct domain * dp)4198 igmp_init(struct protosw *pp, struct domain *dp)
4199 {
4200 #pragma unused(dp)
4201 	static int igmp_initialized = 0;
4202 
4203 	VERIFY((pp->pr_flags & (PR_INITIALIZED | PR_ATTACHED)) == PR_ATTACHED);
4204 
4205 	if (igmp_initialized) {
4206 		return;
4207 	}
4208 	igmp_initialized = 1;
4209 
4210 	IGMP_PRINTF(("%s: initializing\n", __func__));
4211 
4212 	igmp_timers_are_running = 0;
4213 
4214 	LIST_INIT(&igi_head);
4215 	m_raopt = igmp_ra_alloc();
4216 }
4217