xref: /xnu-8792.61.2/bsd/netinet/igmp.c (revision 42e220869062b56f8d7d0726fd4c88954f87902c)
1 /*
2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*-
29  * Copyright (c) 2007-2009 Bruce Simpson.
30  * Copyright (c) 1988 Stephen Deering.
31  * Copyright (c) 1992, 1993
32  *	The Regents of the University of California.  All rights reserved.
33  *
34  * This code is derived from software contributed to Berkeley by
35  * Stephen Deering of Stanford University.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. All advertising materials mentioning features or use of this software
46  *    must display the following acknowledgement:
47  *	This product includes software developed by the University of
48  *	California, Berkeley and its contributors.
49  * 4. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  *	@(#)igmp.c	8.1 (Berkeley) 7/19/93
66  */
67 /*
68  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69  * support for mandatory and extensible security protections.  This notice
70  * is included in support of clause 2.2 (b) of the Apple Public License,
71  * Version 2.0.
72  */
73 
74 /*
75  * Internet Group Management Protocol (IGMP) routines.
76  * [RFC1112, RFC2236, RFC3376]
77  *
78  * Written by Steve Deering, Stanford, May 1988.
79  * Modified by Rosen Sharma, Stanford, Aug 1994.
80  * Modified by Bill Fenner, Xerox PARC, Feb 1995.
81  * Modified to fully comply to IGMPv2 by Bill Fenner, Oct 1995.
82  * Significantly rewritten for IGMPv3, VIMAGE, and SMP by Bruce Simpson.
83  *
84  * MULTICAST Revision: 3.5.1.4
85  */
86 
87 #include <sys/cdefs.h>
88 
89 #include <sys/param.h>
90 #include <sys/systm.h>
91 #include <sys/malloc.h>
92 #include <sys/mbuf.h>
93 #include <sys/socket.h>
94 #include <sys/protosw.h>
95 #include <sys/kernel.h>
96 #include <sys/sysctl.h>
97 #include <sys/mcache.h>
98 
99 #include <libkern/libkern.h>
100 #include <kern/zalloc.h>
101 
102 #include <net/if.h>
103 #include <net/route.h>
104 
105 #include <netinet/in.h>
106 #include <netinet/in_var.h>
107 #include <netinet/in_systm.h>
108 #include <netinet/ip.h>
109 #include <netinet/ip_var.h>
110 #include <netinet/igmp.h>
111 #include <netinet/igmp_var.h>
112 #include <netinet/kpi_ipfilter_var.h>
113 
114 #if SKYWALK
115 #include <skywalk/core/skywalk_var.h>
116 #endif /* SKYWALK */
117 
118 SLIST_HEAD(igmp_inm_relhead, in_multi);
119 
120 static void     igi_initvar(struct igmp_ifinfo *, struct ifnet *, int);
121 static struct igmp_ifinfo *igi_alloc(zalloc_flags_t);
122 static void     igi_free(struct igmp_ifinfo *);
123 static void     igi_delete(const struct ifnet *, struct igmp_inm_relhead *);
124 static void     igmp_dispatch_queue(struct igmp_ifinfo *, struct ifqueue *,
125     int, const int);
126 static void     igmp_final_leave(struct in_multi *, struct igmp_ifinfo *,
127     struct igmp_tparams *);
128 static int      igmp_handle_state_change(struct in_multi *,
129     struct igmp_ifinfo *, struct igmp_tparams *);
130 static int      igmp_initial_join(struct in_multi *, struct igmp_ifinfo *,
131     struct igmp_tparams *);
132 static int      igmp_input_v1_query(struct ifnet *, const struct ip *,
133     const struct igmp *);
134 static int      igmp_input_v2_query(struct ifnet *, const struct ip *,
135     const struct igmp *);
136 static int      igmp_input_v3_query(struct ifnet *, const struct ip *,
137     /*const*/ struct igmpv3 *);
138 static int      igmp_input_v3_group_query(struct in_multi *,
139     int, /*const*/ struct igmpv3 *);
140 static int      igmp_input_v1_report(struct ifnet *, struct mbuf *,
141     /*const*/ struct ip *, /*const*/ struct igmp *);
142 static int      igmp_input_v2_report(struct ifnet *, struct mbuf *,
143     /*const*/ struct ip *, /*const*/ struct igmp *);
144 static void     igmp_sendpkt(struct mbuf *);
145 static __inline__ int   igmp_isgroupreported(const struct in_addr);
146 static struct mbuf *igmp_ra_alloc(void);
147 #ifdef IGMP_DEBUG
148 static const char *igmp_rec_type_to_str(const int);
149 #endif
150 static uint32_t igmp_set_version(struct igmp_ifinfo *, const int);
151 static void     igmp_flush_relq(struct igmp_ifinfo *,
152     struct igmp_inm_relhead *);
153 static int      igmp_v1v2_queue_report(struct in_multi *, const int);
154 static void     igmp_v1v2_process_group_timer(struct in_multi *, const int);
155 static void     igmp_v1v2_process_querier_timers(struct igmp_ifinfo *);
156 static uint32_t igmp_v2_update_group(struct in_multi *, const int);
157 static void     igmp_v3_cancel_link_timers(struct igmp_ifinfo *);
158 static uint32_t igmp_v3_dispatch_general_query(struct igmp_ifinfo *);
159 static struct mbuf *
160 igmp_v3_encap_report(struct ifnet *, struct mbuf *);
161 static int      igmp_v3_enqueue_group_record(struct ifqueue *,
162     struct in_multi *, const int, const int, const int);
163 static int      igmp_v3_enqueue_filter_change(struct ifqueue *,
164     struct in_multi *);
165 static void     igmp_v3_process_group_timers(struct igmp_ifinfo *,
166     struct ifqueue *, struct ifqueue *, struct in_multi *,
167     const unsigned int);
168 static int      igmp_v3_merge_state_changes(struct in_multi *,
169     struct ifqueue *);
170 static void     igmp_v3_suppress_group_record(struct in_multi *);
171 static int      sysctl_igmp_ifinfo SYSCTL_HANDLER_ARGS;
172 static int      sysctl_igmp_gsr SYSCTL_HANDLER_ARGS;
173 static int      sysctl_igmp_default_version SYSCTL_HANDLER_ARGS;
174 
175 static int igmp_timeout_run;            /* IGMP timer is scheduled to run */
176 static void igmp_timeout(void *);
177 static void igmp_sched_timeout(bool);
178 
179 static struct mbuf *m_raopt;            /* Router Alert option */
180 
181 static int querier_present_timers_running;      /* IGMPv1/v2 older version
182                                                  * querier present */
183 static int interface_timers_running;            /* IGMPv3 general
184                                                  * query response */
185 static int state_change_timers_running;         /* IGMPv3 state-change
186                                                  * retransmit */
187 static int current_state_timers_running;        /* IGMPv1/v2 host
188                                                  * report; IGMPv3 g/sg
189                                                  * query response */
190 
191 /*
192  * Subsystem lock macros.
193  */
194 #define IGMP_LOCK()                     \
195 	lck_mtx_lock(&igmp_mtx)
196 #define IGMP_LOCK_ASSERT_HELD()         \
197 	LCK_MTX_ASSERT(&igmp_mtx, LCK_MTX_ASSERT_OWNED)
198 #define IGMP_LOCK_ASSERT_NOTHELD()      \
199 	LCK_MTX_ASSERT(&igmp_mtx, LCK_MTX_ASSERT_NOTOWNED)
200 #define IGMP_UNLOCK()                   \
201 	lck_mtx_unlock(&igmp_mtx)
202 
203 static LIST_HEAD(, igmp_ifinfo) igi_head;
204 static struct igmpstat_v3 igmpstat_v3 = {
205 	.igps_version = IGPS_VERSION_3,
206 	.igps_len = sizeof(struct igmpstat_v3),
207 };
208 static struct igmpstat igmpstat; /* old IGMPv2 stats structure */
209 static struct timeval igmp_gsrdelay = {.tv_sec = 10, .tv_usec = 0};
210 
211 static int igmp_recvifkludge = 1;
212 static int igmp_sendra = 1;
213 static int igmp_sendlocal = 1;
214 static int igmp_v1enable = 1;
215 static int igmp_v2enable = 1;
216 static int igmp_legacysupp = 0;
217 static int igmp_default_version = IGMP_VERSION_3;
218 
219 SYSCTL_STRUCT(_net_inet_igmp, IGMPCTL_STATS, stats, CTLFLAG_RD | CTLFLAG_LOCKED,
220     &igmpstat, igmpstat, "");
221 SYSCTL_STRUCT(_net_inet_igmp, OID_AUTO, v3stats,
222     CTLFLAG_RD | CTLFLAG_LOCKED, &igmpstat_v3, igmpstat_v3, "");
223 SYSCTL_INT(_net_inet_igmp, OID_AUTO, recvifkludge, CTLFLAG_RW | CTLFLAG_LOCKED,
224     &igmp_recvifkludge, 0,
225     "Rewrite IGMPv1/v2 reports from 0.0.0.0 to contain subnet address");
226 SYSCTL_INT(_net_inet_igmp, OID_AUTO, sendra, CTLFLAG_RW | CTLFLAG_LOCKED,
227     &igmp_sendra, 0,
228     "Send IP Router Alert option in IGMPv2/v3 messages");
229 SYSCTL_INT(_net_inet_igmp, OID_AUTO, sendlocal, CTLFLAG_RW | CTLFLAG_LOCKED,
230     &igmp_sendlocal, 0,
231     "Send IGMP membership reports for 224.0.0.0/24 groups");
232 SYSCTL_INT(_net_inet_igmp, OID_AUTO, v1enable, CTLFLAG_RW | CTLFLAG_LOCKED,
233     &igmp_v1enable, 0,
234     "Enable backwards compatibility with IGMPv1");
235 SYSCTL_INT(_net_inet_igmp, OID_AUTO, v2enable, CTLFLAG_RW | CTLFLAG_LOCKED,
236     &igmp_v2enable, 0,
237     "Enable backwards compatibility with IGMPv2");
238 SYSCTL_INT(_net_inet_igmp, OID_AUTO, legacysupp, CTLFLAG_RW | CTLFLAG_LOCKED,
239     &igmp_legacysupp, 0,
240     "Allow v1/v2 reports to suppress v3 group responses");
241 SYSCTL_PROC(_net_inet_igmp, OID_AUTO, default_version,
242     CTLTYPE_INT | CTLFLAG_RW,
243     &igmp_default_version, 0, sysctl_igmp_default_version, "I",
244     "Default version of IGMP to run on each interface");
245 SYSCTL_PROC(_net_inet_igmp, OID_AUTO, gsrdelay,
246     CTLTYPE_INT | CTLFLAG_RW,
247     &igmp_gsrdelay.tv_sec, 0, sysctl_igmp_gsr, "I",
248     "Rate limit for IGMPv3 Group-and-Source queries in seconds");
249 #ifdef IGMP_DEBUG
250 int igmp_debug = 0;
251 SYSCTL_INT(_net_inet_igmp, OID_AUTO,
252     debug, CTLFLAG_RW | CTLFLAG_LOCKED, &igmp_debug, 0, "");
253 #endif
254 
255 SYSCTL_NODE(_net_inet_igmp, OID_AUTO, ifinfo, CTLFLAG_RD | CTLFLAG_LOCKED,
256     sysctl_igmp_ifinfo, "Per-interface IGMPv3 state");
257 
258 /* Lock group and attribute for igmp_mtx */
259 static LCK_ATTR_DECLARE(igmp_mtx_attr, 0, 0);
260 static LCK_GRP_DECLARE(igmp_mtx_grp, "igmp_mtx");
261 
262 /*
263  * Locking and reference counting:
264  *
265  * igmp_mtx mainly protects igi_head.  In cases where both igmp_mtx and
266  * in_multihead_lock must be held, the former must be acquired first in order
267  * to maintain lock ordering.  It is not a requirement that igmp_mtx be
268  * acquired first before in_multihead_lock, but in case both must be acquired
269  * in succession, the correct lock ordering must be followed.
270  *
271  * Instead of walking the if_multiaddrs list at the interface and returning
272  * the ifma_protospec value of a matching entry, we search the global list
273  * of in_multi records and find it that way; this is done with in_multihead
274  * lock held.  Doing so avoids the race condition issues that many other BSDs
275  * suffer from (therefore in our implementation, ifma_protospec will never be
276  * NULL for as long as the in_multi is valid.)
277  *
278  * The above creates a requirement for the in_multi to stay in in_multihead
279  * list even after the final IGMP leave (in IGMPv3 mode) until no longer needs
280  * be retransmitted (this is not required for IGMPv1/v2.)  In order to handle
281  * this, the request and reference counts of the in_multi are bumped up when
282  * the state changes to IGMP_LEAVING_MEMBER, and later dropped in the timeout
283  * handler.  Each in_multi holds a reference to the underlying igmp_ifinfo.
284  *
285  * Thus, the permitted lock oder is:
286  *
287  *	igmp_mtx, in_multihead_lock, inm_lock, igi_lock
288  *
289  * Any may be taken independently, but if any are held at the same time,
290  * the above lock order must be followed.
291  */
292 static LCK_MTX_DECLARE_ATTR(igmp_mtx, &igmp_mtx_grp, &igmp_mtx_attr);
293 static int igmp_timers_are_running;
294 
295 #define IGMP_ADD_DETACHED_INM(_head, _inm) {                            \
296 	SLIST_INSERT_HEAD(_head, _inm, inm_dtle);                       \
297 }
298 
299 #define IGMP_REMOVE_DETACHED_INM(_head) {                               \
300 	struct in_multi *_inm, *_inm_tmp;                               \
301 	SLIST_FOREACH_SAFE(_inm, _head, inm_dtle, _inm_tmp) {           \
302 	        SLIST_REMOVE(_head, _inm, in_multi, inm_dtle);          \
303 	        INM_REMREF(_inm);                                       \
304 	}                                                               \
305 	VERIFY(SLIST_EMPTY(_head));                                     \
306 }
307 
308 static ZONE_DEFINE(igi_zone, "igmp_ifinfo",
309     sizeof(struct igmp_ifinfo), ZC_ZFREE_CLEARMEM);
310 
311 /* Store IGMPv3 record count in the module private scratch space */
312 #define vt_nrecs        pkt_mpriv.__mpriv_u.__mpriv32[0].__mpriv32_u.__val16[0]
313 
314 static __inline void
igmp_save_context(struct mbuf * m,struct ifnet * ifp)315 igmp_save_context(struct mbuf *m, struct ifnet *ifp)
316 {
317 	m->m_pkthdr.rcvif = ifp;
318 }
319 
320 static __inline void
igmp_scrub_context(struct mbuf * m)321 igmp_scrub_context(struct mbuf *m)
322 {
323 	m->m_pkthdr.rcvif = NULL;
324 }
325 
326 #ifdef IGMP_DEBUG
327 static __inline const char *
inet_ntop_haddr(in_addr_t haddr,char * buf,socklen_t size)328 inet_ntop_haddr(in_addr_t haddr, char *buf, socklen_t size)
329 {
330 	struct in_addr ia;
331 
332 	ia.s_addr = htonl(haddr);
333 	return inet_ntop(AF_INET, &ia, buf, size);
334 }
335 #endif
336 
337 /*
338  * Restore context from a queued IGMP output chain.
339  * Return saved ifp.
340  */
341 static __inline struct ifnet *
igmp_restore_context(struct mbuf * m)342 igmp_restore_context(struct mbuf *m)
343 {
344 	return m->m_pkthdr.rcvif;
345 }
346 
347 /*
348  * Retrieve or set default IGMP version.
349  */
350 static int
351 sysctl_igmp_default_version SYSCTL_HANDLER_ARGS
352 {
353 #pragma unused(oidp, arg2)
354 	int      error;
355 	int      new;
356 
357 	IGMP_LOCK();
358 
359 	error = SYSCTL_OUT(req, arg1, sizeof(int));
360 	if (error || !req->newptr) {
361 		goto out_locked;
362 	}
363 
364 	new = igmp_default_version;
365 
366 	error = SYSCTL_IN(req, &new, sizeof(int));
367 	if (error) {
368 		goto out_locked;
369 	}
370 
371 	if (new < IGMP_VERSION_1 || new > IGMP_VERSION_3) {
372 		error = EINVAL;
373 		goto out_locked;
374 	}
375 
376 	IGMP_PRINTF(("%s: change igmp_default_version from %d to %d\n",
377 	    __func__, igmp_default_version, new));
378 
379 	igmp_default_version = new;
380 
381 out_locked:
382 	IGMP_UNLOCK();
383 	return error;
384 }
385 
386 /*
387  * Retrieve or set threshold between group-source queries in seconds.
388  *
389  */
390 static int
391 sysctl_igmp_gsr SYSCTL_HANDLER_ARGS
392 {
393 #pragma unused(arg1, arg2)
394 	int error;
395 	int i;
396 
397 	IGMP_LOCK();
398 
399 	i = (int)igmp_gsrdelay.tv_sec;
400 
401 	error = sysctl_handle_int(oidp, &i, 0, req);
402 	if (error || !req->newptr) {
403 		goto out_locked;
404 	}
405 
406 	if (i < -1 || i >= 60) {
407 		error = EINVAL;
408 		goto out_locked;
409 	}
410 
411 	igmp_gsrdelay.tv_sec = i;
412 
413 out_locked:
414 	IGMP_UNLOCK();
415 	return error;
416 }
417 
418 /*
419  * Expose struct igmp_ifinfo to userland, keyed by ifindex.
420  * For use by ifmcstat(8).
421  *
422  */
423 static int
424 sysctl_igmp_ifinfo SYSCTL_HANDLER_ARGS
425 {
426 #pragma unused(oidp)
427 	int                     *name;
428 	int                      error;
429 	u_int                    namelen;
430 	struct ifnet            *ifp;
431 	struct igmp_ifinfo      *igi;
432 	struct igmp_ifinfo_u    igi_u;
433 
434 	name = (int *)arg1;
435 	namelen = arg2;
436 
437 	if (req->newptr != USER_ADDR_NULL) {
438 		return EPERM;
439 	}
440 
441 	if (namelen != 1) {
442 		return EINVAL;
443 	}
444 
445 	IGMP_LOCK();
446 
447 	if (name[0] <= 0 || name[0] > (u_int)if_index) {
448 		error = ENOENT;
449 		goto out_locked;
450 	}
451 
452 	error = ENOENT;
453 
454 	ifnet_head_lock_shared();
455 	ifp = ifindex2ifnet[name[0]];
456 	ifnet_head_done();
457 	if (ifp == NULL) {
458 		goto out_locked;
459 	}
460 
461 	bzero(&igi_u, sizeof(igi_u));
462 
463 	LIST_FOREACH(igi, &igi_head, igi_link) {
464 		IGI_LOCK(igi);
465 		if (ifp != igi->igi_ifp) {
466 			IGI_UNLOCK(igi);
467 			continue;
468 		}
469 		igi_u.igi_ifindex = igi->igi_ifp->if_index;
470 		igi_u.igi_version = igi->igi_version;
471 		igi_u.igi_v1_timer = igi->igi_v1_timer;
472 		igi_u.igi_v2_timer = igi->igi_v2_timer;
473 		igi_u.igi_v3_timer = igi->igi_v3_timer;
474 		igi_u.igi_flags = igi->igi_flags;
475 		igi_u.igi_rv = igi->igi_rv;
476 		igi_u.igi_qi = igi->igi_qi;
477 		igi_u.igi_qri = igi->igi_qri;
478 		igi_u.igi_uri = igi->igi_uri;
479 		IGI_UNLOCK(igi);
480 
481 		error = SYSCTL_OUT(req, &igi_u, sizeof(igi_u));
482 		break;
483 	}
484 
485 out_locked:
486 	IGMP_UNLOCK();
487 	return error;
488 }
489 
490 /*
491  * Dispatch an entire queue of pending packet chains
492  *
493  * Must not be called with inm_lock held.
494  */
495 static void
igmp_dispatch_queue(struct igmp_ifinfo * igi,struct ifqueue * ifq,int limit,const int loop)496 igmp_dispatch_queue(struct igmp_ifinfo *igi, struct ifqueue *ifq, int limit,
497     const int loop)
498 {
499 	struct mbuf *m;
500 	struct ip *ip;
501 
502 	if (igi != NULL) {
503 		IGI_LOCK_ASSERT_HELD(igi);
504 	}
505 
506 #if SKYWALK
507 	/*
508 	 * Since this function is called holding the igi lock, we need to ensure we
509 	 * don't enter the driver directly because a deadlock can happen if another
510 	 * thread holding the workloop lock tries to acquire the igi lock at
511 	 * the same time.
512 	 */
513 	sk_protect_t protect = sk_async_transmit_protect();
514 #endif /* SKYWALK */
515 
516 	for (;;) {
517 		IF_DEQUEUE(ifq, m);
518 		if (m == NULL) {
519 			break;
520 		}
521 		IGMP_PRINTF(("%s: dispatch 0x%llx from 0x%llx\n", __func__,
522 		    (uint64_t)VM_KERNEL_ADDRPERM(ifq),
523 		    (uint64_t)VM_KERNEL_ADDRPERM(m)));
524 		ip = mtod(m, struct ip *);
525 		if (loop) {
526 			m->m_flags |= M_IGMP_LOOP;
527 		}
528 		if (igi != NULL) {
529 			IGI_UNLOCK(igi);
530 		}
531 		igmp_sendpkt(m);
532 		if (igi != NULL) {
533 			IGI_LOCK(igi);
534 		}
535 		if (--limit == 0) {
536 			break;
537 		}
538 	}
539 
540 #if SKYWALK
541 	sk_async_transmit_unprotect(protect);
542 #endif /* SKYWALK */
543 
544 	if (igi != NULL) {
545 		IGI_LOCK_ASSERT_HELD(igi);
546 	}
547 }
548 
549 /*
550  * Filter outgoing IGMP report state by group.
551  *
552  * Reports are ALWAYS suppressed for ALL-HOSTS (224.0.0.1).
553  * If the net.inet.igmp.sendlocal sysctl is 0, then IGMP reports are
554  * disabled for all groups in the 224.0.0.0/24 link-local scope. However,
555  * this may break certain IGMP snooping switches which rely on the old
556  * report behaviour.
557  *
558  * Return zero if the given group is one for which IGMP reports
559  * should be suppressed, or non-zero if reports should be issued.
560  */
561 
562 static __inline__
563 int
igmp_isgroupreported(const struct in_addr addr)564 igmp_isgroupreported(const struct in_addr addr)
565 {
566 	if (in_allhosts(addr) ||
567 	    ((!igmp_sendlocal && IN_LOCAL_GROUP(ntohl(addr.s_addr))))) {
568 		return 0;
569 	}
570 
571 	return 1;
572 }
573 
574 /*
575  * Construct a Router Alert option to use in outgoing packets.
576  */
577 static struct mbuf *
igmp_ra_alloc(void)578 igmp_ra_alloc(void)
579 {
580 	struct mbuf     *m;
581 	struct ipoption *p;
582 
583 	MGET(m, M_WAITOK, MT_DATA);
584 	p = mtod(m, struct ipoption *);
585 	p->ipopt_dst.s_addr = INADDR_ANY;
586 	p->ipopt_list[0] = (char)IPOPT_RA;      /* Router Alert Option */
587 	p->ipopt_list[1] = 0x04;        /* 4 bytes long */
588 	p->ipopt_list[2] = IPOPT_EOL;   /* End of IP option list */
589 	p->ipopt_list[3] = 0x00;        /* pad byte */
590 	m->m_len = sizeof(p->ipopt_dst) + p->ipopt_list[1];
591 
592 	return m;
593 }
594 
595 /*
596  * Attach IGMP when PF_INET is attached to an interface.
597  */
598 struct igmp_ifinfo *
igmp_domifattach(struct ifnet * ifp,zalloc_flags_t how)599 igmp_domifattach(struct ifnet *ifp, zalloc_flags_t how)
600 {
601 	struct igmp_ifinfo *igi;
602 
603 	IGMP_PRINTF(("%s: called for ifp 0x%llx(%s)\n",
604 	    __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), ifp->if_name));
605 
606 	igi = igi_alloc(how);
607 	if (igi == NULL) {
608 		return NULL;
609 	}
610 
611 	IGMP_LOCK();
612 
613 	IGI_LOCK(igi);
614 	igi_initvar(igi, ifp, 0);
615 	igi->igi_debug |= IFD_ATTACHED;
616 	IGI_ADDREF_LOCKED(igi); /* hold a reference for igi_head */
617 	IGI_ADDREF_LOCKED(igi); /* hold a reference for caller */
618 	IGI_UNLOCK(igi);
619 	ifnet_lock_shared(ifp);
620 	igmp_initsilent(ifp, igi);
621 	ifnet_lock_done(ifp);
622 
623 	LIST_INSERT_HEAD(&igi_head, igi, igi_link);
624 
625 	IGMP_UNLOCK();
626 
627 	IGMP_PRINTF(("%s: allocate igmp_ifinfo for ifp 0x%llx(%s)\n", __func__,
628 	    (uint64_t)VM_KERNEL_ADDRPERM(ifp), ifp->if_name));
629 
630 	return igi;
631 }
632 
633 /*
634  * Attach IGMP when PF_INET is reattached to an interface.  Caller is
635  * expected to have an outstanding reference to the igi.
636  */
637 void
igmp_domifreattach(struct igmp_ifinfo * igi)638 igmp_domifreattach(struct igmp_ifinfo *igi)
639 {
640 	struct ifnet *ifp;
641 
642 	IGMP_LOCK();
643 
644 	IGI_LOCK(igi);
645 	VERIFY(!(igi->igi_debug & IFD_ATTACHED));
646 	ifp = igi->igi_ifp;
647 	VERIFY(ifp != NULL);
648 	igi_initvar(igi, ifp, 1);
649 	igi->igi_debug |= IFD_ATTACHED;
650 	IGI_ADDREF_LOCKED(igi); /* hold a reference for igi_head */
651 	IGI_UNLOCK(igi);
652 	ifnet_lock_shared(ifp);
653 	igmp_initsilent(ifp, igi);
654 	ifnet_lock_done(ifp);
655 
656 	LIST_INSERT_HEAD(&igi_head, igi, igi_link);
657 
658 	IGMP_UNLOCK();
659 
660 	IGMP_PRINTF(("%s: reattached igmp_ifinfo for ifp 0x%llx(%s)\n",
661 	    __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), ifp->if_name));
662 }
663 
664 /*
665  * Hook for domifdetach.
666  */
667 void
igmp_domifdetach(struct ifnet * ifp)668 igmp_domifdetach(struct ifnet *ifp)
669 {
670 	SLIST_HEAD(, in_multi) inm_dthead;
671 
672 	SLIST_INIT(&inm_dthead);
673 
674 	IGMP_PRINTF(("%s: called for ifp 0x%llx(%s%d)\n", __func__,
675 	    (uint64_t)VM_KERNEL_ADDRPERM(ifp), ifp->if_name, ifp->if_unit));
676 
677 	IGMP_LOCK();
678 	igi_delete(ifp, (struct igmp_inm_relhead *)&inm_dthead);
679 	IGMP_UNLOCK();
680 
681 	/* Now that we're dropped all locks, release detached records */
682 	IGMP_REMOVE_DETACHED_INM(&inm_dthead);
683 }
684 
685 /*
686  * Called at interface detach time.  Note that we only flush all deferred
687  * responses and record releases; all remaining inm records and their source
688  * entries related to this interface are left intact, in order to handle
689  * the reattach case.
690  */
691 static void
igi_delete(const struct ifnet * ifp,struct igmp_inm_relhead * inm_dthead)692 igi_delete(const struct ifnet *ifp, struct igmp_inm_relhead *inm_dthead)
693 {
694 	struct igmp_ifinfo *igi, *tigi;
695 
696 	IGMP_LOCK_ASSERT_HELD();
697 
698 	LIST_FOREACH_SAFE(igi, &igi_head, igi_link, tigi) {
699 		IGI_LOCK(igi);
700 		if (igi->igi_ifp == ifp) {
701 			/*
702 			 * Free deferred General Query responses.
703 			 */
704 			IF_DRAIN(&igi->igi_gq);
705 			IF_DRAIN(&igi->igi_v2q);
706 			igmp_flush_relq(igi, inm_dthead);
707 			VERIFY(SLIST_EMPTY(&igi->igi_relinmhead));
708 			igi->igi_debug &= ~IFD_ATTACHED;
709 			IGI_UNLOCK(igi);
710 
711 			LIST_REMOVE(igi, igi_link);
712 			IGI_REMREF(igi); /* release igi_head reference */
713 			return;
714 		}
715 		IGI_UNLOCK(igi);
716 	}
717 	panic("%s: igmp_ifinfo not found for ifp %p(%s)", __func__,
718 	    ifp, ifp->if_xname);
719 }
720 
721 __private_extern__ void
igmp_initsilent(struct ifnet * ifp,struct igmp_ifinfo * igi)722 igmp_initsilent(struct ifnet *ifp, struct igmp_ifinfo *igi)
723 {
724 	ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
725 
726 	IGI_LOCK_ASSERT_NOTHELD(igi);
727 	IGI_LOCK(igi);
728 	if (!(ifp->if_flags & IFF_MULTICAST)) {
729 		igi->igi_flags |= IGIF_SILENT;
730 	} else {
731 		igi->igi_flags &= ~IGIF_SILENT;
732 	}
733 	IGI_UNLOCK(igi);
734 }
735 
736 static void
igi_initvar(struct igmp_ifinfo * igi,struct ifnet * ifp,int reattach)737 igi_initvar(struct igmp_ifinfo *igi, struct ifnet *ifp, int reattach)
738 {
739 	IGI_LOCK_ASSERT_HELD(igi);
740 
741 	igi->igi_ifp = ifp;
742 	igi->igi_version = igmp_default_version;
743 	igi->igi_flags = 0;
744 	igi->igi_rv = IGMP_RV_INIT;
745 	igi->igi_qi = IGMP_QI_INIT;
746 	igi->igi_qri = IGMP_QRI_INIT;
747 	igi->igi_uri = IGMP_URI_INIT;
748 
749 	if (!reattach) {
750 		SLIST_INIT(&igi->igi_relinmhead);
751 	}
752 
753 	/*
754 	 * Responses to general queries are subject to bounds.
755 	 */
756 	igi->igi_gq.ifq_maxlen =  IGMP_MAX_RESPONSE_PACKETS;
757 	igi->igi_v2q.ifq_maxlen = IGMP_MAX_RESPONSE_PACKETS;
758 }
759 
760 static struct igmp_ifinfo *
igi_alloc(zalloc_flags_t how)761 igi_alloc(zalloc_flags_t how)
762 {
763 	struct igmp_ifinfo *igi = zalloc_flags(igi_zone, how | Z_ZERO);
764 	if (igi != NULL) {
765 		lck_mtx_init(&igi->igi_lock, &igmp_mtx_grp, &igmp_mtx_attr);
766 		igi->igi_debug |= IFD_ALLOC;
767 	}
768 	return igi;
769 }
770 
771 static void
igi_free(struct igmp_ifinfo * igi)772 igi_free(struct igmp_ifinfo *igi)
773 {
774 	IGI_LOCK(igi);
775 	if (igi->igi_debug & IFD_ATTACHED) {
776 		panic("%s: attached igi=%p is being freed", __func__, igi);
777 		/* NOTREACHED */
778 	} else if (igi->igi_ifp != NULL) {
779 		panic("%s: ifp not NULL for igi=%p", __func__, igi);
780 		/* NOTREACHED */
781 	} else if (!(igi->igi_debug & IFD_ALLOC)) {
782 		panic("%s: igi %p cannot be freed", __func__, igi);
783 		/* NOTREACHED */
784 	} else if (igi->igi_refcnt != 0) {
785 		panic("%s: non-zero refcnt igi=%p", __func__, igi);
786 		/* NOTREACHED */
787 	}
788 	igi->igi_debug &= ~IFD_ALLOC;
789 	IGI_UNLOCK(igi);
790 
791 	lck_mtx_destroy(&igi->igi_lock, &igmp_mtx_grp);
792 	zfree(igi_zone, igi);
793 }
794 
795 void
igi_addref(struct igmp_ifinfo * igi,int locked)796 igi_addref(struct igmp_ifinfo *igi, int locked)
797 {
798 	if (!locked) {
799 		IGI_LOCK_SPIN(igi);
800 	} else {
801 		IGI_LOCK_ASSERT_HELD(igi);
802 	}
803 
804 	if (++igi->igi_refcnt == 0) {
805 		panic("%s: igi=%p wraparound refcnt", __func__, igi);
806 		/* NOTREACHED */
807 	}
808 	if (!locked) {
809 		IGI_UNLOCK(igi);
810 	}
811 }
812 
813 void
igi_remref(struct igmp_ifinfo * igi)814 igi_remref(struct igmp_ifinfo *igi)
815 {
816 	SLIST_HEAD(, in_multi) inm_dthead;
817 	struct ifnet *ifp;
818 
819 	IGI_LOCK_SPIN(igi);
820 
821 	if (igi->igi_refcnt == 0) {
822 		panic("%s: igi=%p negative refcnt", __func__, igi);
823 		/* NOTREACHED */
824 	}
825 
826 	--igi->igi_refcnt;
827 	if (igi->igi_refcnt > 0) {
828 		IGI_UNLOCK(igi);
829 		return;
830 	}
831 
832 	ifp = igi->igi_ifp;
833 	igi->igi_ifp = NULL;
834 	IF_DRAIN(&igi->igi_gq);
835 	IF_DRAIN(&igi->igi_v2q);
836 	SLIST_INIT(&inm_dthead);
837 	igmp_flush_relq(igi, (struct igmp_inm_relhead *)&inm_dthead);
838 	VERIFY(SLIST_EMPTY(&igi->igi_relinmhead));
839 	IGI_UNLOCK(igi);
840 
841 	/* Now that we're dropped all locks, release detached records */
842 	IGMP_REMOVE_DETACHED_INM(&inm_dthead);
843 
844 	IGMP_PRINTF(("%s: freeing igmp_ifinfo for ifp 0x%llx(%s)\n",
845 	    __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
846 
847 	igi_free(igi);
848 }
849 
850 /*
851  * Process a received IGMPv1 query.
852  * Return non-zero if the message should be dropped.
853  */
854 static int
igmp_input_v1_query(struct ifnet * ifp,const struct ip * ip,const struct igmp * igmp)855 igmp_input_v1_query(struct ifnet *ifp, const struct ip *ip,
856     const struct igmp *igmp)
857 {
858 	struct igmp_ifinfo      *igi;
859 	struct in_multi         *inm;
860 	struct in_multistep     step;
861 	struct igmp_tparams     itp = { .qpt = 0, .it = 0, .cst = 0, .sct = 0 };
862 
863 	IGMP_LOCK_ASSERT_NOTHELD();
864 
865 	/*
866 	 * IGMPv1 Host Membership Queries SHOULD always be addressed to
867 	 * 224.0.0.1. They are always treated as General Queries.
868 	 * igmp_group is always ignored. Do not drop it as a userland
869 	 * daemon may wish to see it.
870 	 */
871 	if (!in_allhosts(ip->ip_dst) || !in_nullhost(igmp->igmp_group)) {
872 		IGMPSTAT_INC(igps_rcv_badqueries);
873 		OIGMPSTAT_INC(igps_rcv_badqueries);
874 		goto done;
875 	}
876 	IGMPSTAT_INC(igps_rcv_gen_queries);
877 
878 	igi = IGMP_IFINFO(ifp);
879 	VERIFY(igi != NULL);
880 
881 	IGI_LOCK(igi);
882 	if (igi->igi_flags & IGIF_LOOPBACK) {
883 		IGMP_PRINTF(("%s: ignore v1 query on IGIF_LOOPBACK "
884 		    "ifp 0x%llx(%s)\n", __func__,
885 		    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
886 		IGI_UNLOCK(igi);
887 		goto done;
888 	}
889 	/*
890 	 * Switch to IGMPv1 host compatibility mode.
891 	 */
892 	itp.qpt = igmp_set_version(igi, IGMP_VERSION_1);
893 	IGI_UNLOCK(igi);
894 
895 	IGMP_PRINTF(("%s: process v1 query on ifp 0x%llx(%s)\n", __func__,
896 	    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
897 
898 	/*
899 	 * Start the timers in all of our group records
900 	 * for the interface on which the query arrived,
901 	 * except those which are already running.
902 	 */
903 	in_multihead_lock_shared();
904 	IN_FIRST_MULTI(step, inm);
905 	while (inm != NULL) {
906 		INM_LOCK(inm);
907 		if (inm->inm_ifp != ifp || inm->inm_timer != 0) {
908 			goto next;
909 		}
910 
911 		switch (inm->inm_state) {
912 		case IGMP_NOT_MEMBER:
913 		case IGMP_SILENT_MEMBER:
914 			break;
915 		case IGMP_G_QUERY_PENDING_MEMBER:
916 		case IGMP_SG_QUERY_PENDING_MEMBER:
917 		case IGMP_REPORTING_MEMBER:
918 		case IGMP_IDLE_MEMBER:
919 		case IGMP_LAZY_MEMBER:
920 		case IGMP_SLEEPING_MEMBER:
921 		case IGMP_AWAKENING_MEMBER:
922 			inm->inm_state = IGMP_REPORTING_MEMBER;
923 			inm->inm_timer = IGMP_RANDOM_DELAY(IGMP_V1V2_MAX_RI);
924 			itp.cst = 1;
925 			break;
926 		case IGMP_LEAVING_MEMBER:
927 			break;
928 		}
929 next:
930 		INM_UNLOCK(inm);
931 		IN_NEXT_MULTI(step, inm);
932 	}
933 	in_multihead_lock_done();
934 done:
935 	igmp_set_timeout(&itp);
936 
937 	return 0;
938 }
939 
940 /*
941  * Process a received IGMPv2 general or group-specific query.
942  */
943 static int
igmp_input_v2_query(struct ifnet * ifp,const struct ip * ip,const struct igmp * igmp)944 igmp_input_v2_query(struct ifnet *ifp, const struct ip *ip,
945     const struct igmp *igmp)
946 {
947 	struct igmp_ifinfo      *igi;
948 	struct in_multi         *inm;
949 	int                      is_general_query;
950 	uint16_t                 timer;
951 	struct igmp_tparams      itp = { .qpt = 0, .it = 0, .cst = 0, .sct = 0 };
952 
953 	IGMP_LOCK_ASSERT_NOTHELD();
954 
955 	is_general_query = 0;
956 
957 	/*
958 	 * Validate address fields upfront.
959 	 */
960 	if (in_nullhost(igmp->igmp_group)) {
961 		/*
962 		 * IGMPv2 General Query.
963 		 * If this was not sent to the all-hosts group, ignore it.
964 		 */
965 		if (!in_allhosts(ip->ip_dst)) {
966 			goto done;
967 		}
968 		IGMPSTAT_INC(igps_rcv_gen_queries);
969 		is_general_query = 1;
970 	} else {
971 		/* IGMPv2 Group-Specific Query. */
972 		IGMPSTAT_INC(igps_rcv_group_queries);
973 	}
974 
975 	igi = IGMP_IFINFO(ifp);
976 	VERIFY(igi != NULL);
977 
978 	IGI_LOCK(igi);
979 	if (igi->igi_flags & IGIF_LOOPBACK) {
980 		IGMP_PRINTF(("%s: ignore v2 query on IGIF_LOOPBACK "
981 		    "ifp 0x%llx(%s)\n", __func__,
982 		    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
983 		IGI_UNLOCK(igi);
984 		goto done;
985 	}
986 	/*
987 	 * Ignore v2 query if in v1 Compatibility Mode.
988 	 */
989 	if (igi->igi_version == IGMP_VERSION_1) {
990 		IGI_UNLOCK(igi);
991 		goto done;
992 	}
993 	itp.qpt = igmp_set_version(igi, IGMP_VERSION_2);
994 	IGI_UNLOCK(igi);
995 
996 	timer = igmp->igmp_code / IGMP_TIMER_SCALE;
997 	if (timer == 0) {
998 		timer = 1;
999 	}
1000 
1001 	if (is_general_query) {
1002 		struct in_multistep step;
1003 
1004 		IGMP_PRINTF(("%s: process v2 general query on ifp 0x%llx(%s)\n",
1005 		    __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1006 		/*
1007 		 * For each reporting group joined on this
1008 		 * interface, kick the report timer.
1009 		 */
1010 		in_multihead_lock_shared();
1011 		IN_FIRST_MULTI(step, inm);
1012 		while (inm != NULL) {
1013 			INM_LOCK(inm);
1014 			if (inm->inm_ifp == ifp) {
1015 				itp.cst += igmp_v2_update_group(inm, timer);
1016 			}
1017 			INM_UNLOCK(inm);
1018 			IN_NEXT_MULTI(step, inm);
1019 		}
1020 		in_multihead_lock_done();
1021 	} else {
1022 		/*
1023 		 * Group-specific IGMPv2 query, we need only
1024 		 * look up the single group to process it.
1025 		 */
1026 		in_multihead_lock_shared();
1027 		IN_LOOKUP_MULTI(&igmp->igmp_group, ifp, inm);
1028 		in_multihead_lock_done();
1029 		if (inm != NULL) {
1030 			INM_LOCK(inm);
1031 			IGMP_INET_PRINTF(igmp->igmp_group,
1032 			    ("process v2 query %s on ifp 0x%llx(%s)\n",
1033 			    _igmp_inet_buf,
1034 			    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1035 			itp.cst = igmp_v2_update_group(inm, timer);
1036 			INM_UNLOCK(inm);
1037 			INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1038 		}
1039 	}
1040 done:
1041 	igmp_set_timeout(&itp);
1042 
1043 	return 0;
1044 }
1045 
1046 /*
1047  * Update the report timer on a group in response to an IGMPv2 query.
1048  *
1049  * If we are becoming the reporting member for this group, start the timer.
1050  * If we already are the reporting member for this group, and timer is
1051  * below the threshold, reset it.
1052  *
1053  * We may be updating the group for the first time since we switched
1054  * to IGMPv3. If we are, then we must clear any recorded source lists,
1055  * and transition to REPORTING state; the group timer is overloaded
1056  * for group and group-source query responses.
1057  *
1058  * Unlike IGMPv3, the delay per group should be jittered
1059  * to avoid bursts of IGMPv2 reports.
1060  */
1061 static uint32_t
igmp_v2_update_group(struct in_multi * inm,const int timer)1062 igmp_v2_update_group(struct in_multi *inm, const int timer)
1063 {
1064 	IGMP_INET_PRINTF(inm->inm_addr, ("%s: %s/%s timer=%d\n",
1065 	    __func__, _igmp_inet_buf, if_name(inm->inm_ifp),
1066 	    timer));
1067 
1068 	INM_LOCK_ASSERT_HELD(inm);
1069 
1070 	switch (inm->inm_state) {
1071 	case IGMP_NOT_MEMBER:
1072 	case IGMP_SILENT_MEMBER:
1073 		break;
1074 	case IGMP_REPORTING_MEMBER:
1075 		if (inm->inm_timer != 0 &&
1076 		    inm->inm_timer <= timer) {
1077 			IGMP_PRINTF(("%s: REPORTING and timer running, "
1078 			    "skipping.\n", __func__));
1079 			break;
1080 		}
1081 		OS_FALLTHROUGH;
1082 	case IGMP_SG_QUERY_PENDING_MEMBER:
1083 	case IGMP_G_QUERY_PENDING_MEMBER:
1084 	case IGMP_IDLE_MEMBER:
1085 	case IGMP_LAZY_MEMBER:
1086 	case IGMP_AWAKENING_MEMBER:
1087 		IGMP_PRINTF(("%s: ->REPORTING\n", __func__));
1088 		inm->inm_state = IGMP_REPORTING_MEMBER;
1089 		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1090 		break;
1091 	case IGMP_SLEEPING_MEMBER:
1092 		IGMP_PRINTF(("%s: ->AWAKENING\n", __func__));
1093 		inm->inm_state = IGMP_AWAKENING_MEMBER;
1094 		break;
1095 	case IGMP_LEAVING_MEMBER:
1096 		break;
1097 	}
1098 
1099 	return inm->inm_timer;
1100 }
1101 
1102 /*
1103  * Process a received IGMPv3 general, group-specific or
1104  * group-and-source-specific query.
1105  * Assumes m has already been pulled up to the full IGMP message length.
1106  * Return 0 if successful, otherwise an appropriate error code is returned.
1107  */
1108 static int
igmp_input_v3_query(struct ifnet * ifp,const struct ip * ip,struct igmpv3 * igmpv3)1109 igmp_input_v3_query(struct ifnet *ifp, const struct ip *ip,
1110     /*const*/ struct igmpv3 *igmpv3)
1111 {
1112 	struct igmp_ifinfo      *igi;
1113 	struct in_multi         *inm;
1114 	int                      is_general_query;
1115 	uint32_t                 maxresp, nsrc, qqi;
1116 	uint32_t                 timer;
1117 	uint8_t                  qrv;
1118 	struct igmp_tparams      itp = { .qpt = 0, .it = 0, .cst = 0, .sct = 0 };
1119 
1120 	IGMP_LOCK_ASSERT_NOTHELD();
1121 
1122 	is_general_query = 0;
1123 
1124 	IGMP_PRINTF(("%s: process v3 query on ifp 0x%llx(%s)\n", __func__,
1125 	    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1126 
1127 	maxresp = igmpv3->igmp_code;    /* in 1/10ths of a second */
1128 	if (maxresp >= 128) {
1129 		maxresp = IGMP_MANT(igmpv3->igmp_code) <<
1130 		    (IGMP_EXP(igmpv3->igmp_code) + 3);
1131 	}
1132 
1133 	/*
1134 	 * Robustness must never be less than 2 for on-wire IGMPv3.
1135 	 * FUTURE: Check if ifp has IGIF_LOOPBACK set, as we will make
1136 	 * an exception for interfaces whose IGMPv3 state changes
1137 	 * are redirected to loopback (e.g. MANET).
1138 	 */
1139 	qrv = IGMP_QRV(igmpv3->igmp_misc);
1140 	if (qrv < 2) {
1141 		IGMP_PRINTF(("%s: clamping qrv %d to %d\n", __func__,
1142 		    qrv, IGMP_RV_INIT));
1143 		qrv = IGMP_RV_INIT;
1144 	}
1145 
1146 	qqi = igmpv3->igmp_qqi;
1147 	if (qqi >= 128) {
1148 		qqi = IGMP_MANT(igmpv3->igmp_qqi) <<
1149 		    (IGMP_EXP(igmpv3->igmp_qqi) + 3);
1150 	}
1151 
1152 	timer = maxresp / IGMP_TIMER_SCALE;
1153 	if (timer == 0) {
1154 		timer = 1;
1155 	}
1156 
1157 	nsrc = ntohs(igmpv3->igmp_numsrc);
1158 
1159 	/*
1160 	 * Validate address fields and versions upfront before
1161 	 * accepting v3 query.
1162 	 */
1163 	if (in_nullhost(igmpv3->igmp_group)) {
1164 		/*
1165 		 * IGMPv3 General Query.
1166 		 *
1167 		 * General Queries SHOULD be directed to 224.0.0.1.
1168 		 * A general query with a source list has undefined
1169 		 * behaviour; discard it.
1170 		 */
1171 		IGMPSTAT_INC(igps_rcv_gen_queries);
1172 		if (!in_allhosts(ip->ip_dst) || nsrc > 0) {
1173 			IGMPSTAT_INC(igps_rcv_badqueries);
1174 			OIGMPSTAT_INC(igps_rcv_badqueries);
1175 			goto done;
1176 		}
1177 		is_general_query = 1;
1178 	} else {
1179 		/* Group or group-source specific query. */
1180 		if (nsrc == 0) {
1181 			IGMPSTAT_INC(igps_rcv_group_queries);
1182 		} else {
1183 			IGMPSTAT_INC(igps_rcv_gsr_queries);
1184 		}
1185 	}
1186 
1187 	igi = IGMP_IFINFO(ifp);
1188 	VERIFY(igi != NULL);
1189 
1190 	IGI_LOCK(igi);
1191 	if (igi->igi_flags & IGIF_LOOPBACK) {
1192 		IGMP_PRINTF(("%s: ignore v3 query on IGIF_LOOPBACK "
1193 		    "ifp 0x%llx(%s)\n", __func__,
1194 		    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1195 		IGI_UNLOCK(igi);
1196 		goto done;
1197 	}
1198 
1199 	/*
1200 	 * Discard the v3 query if we're in Compatibility Mode.
1201 	 * The RFC is not obviously worded that hosts need to stay in
1202 	 * compatibility mode until the Old Version Querier Present
1203 	 * timer expires.
1204 	 */
1205 	if (igi->igi_version != IGMP_VERSION_3) {
1206 		IGMP_PRINTF(("%s: ignore v3 query in v%d mode on "
1207 		    "ifp 0x%llx(%s)\n", __func__, igi->igi_version,
1208 		    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1209 		IGI_UNLOCK(igi);
1210 		goto done;
1211 	}
1212 
1213 	itp.qpt = igmp_set_version(igi, IGMP_VERSION_3);
1214 	igi->igi_rv = qrv;
1215 	igi->igi_qi = qqi;
1216 	igi->igi_qri = MAX(timer, IGMP_QRI_MIN);
1217 
1218 	IGMP_PRINTF(("%s: qrv %d qi %d qri %d\n", __func__, igi->igi_rv,
1219 	    igi->igi_qi, igi->igi_qri));
1220 
1221 	if (is_general_query) {
1222 		/*
1223 		 * Schedule a current-state report on this ifp for
1224 		 * all groups, possibly containing source lists.
1225 		 * If there is a pending General Query response
1226 		 * scheduled earlier than the selected delay, do
1227 		 * not schedule any other reports.
1228 		 * Otherwise, reset the interface timer.
1229 		 */
1230 		IGMP_PRINTF(("%s: process v3 general query on ifp 0x%llx(%s)\n",
1231 		    __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1232 		if (igi->igi_v3_timer == 0 || igi->igi_v3_timer >= timer) {
1233 			itp.it = igi->igi_v3_timer = IGMP_RANDOM_DELAY(timer);
1234 		}
1235 		IGI_UNLOCK(igi);
1236 	} else {
1237 		IGI_UNLOCK(igi);
1238 		/*
1239 		 * Group-source-specific queries are throttled on
1240 		 * a per-group basis to defeat denial-of-service attempts.
1241 		 * Queries for groups we are not a member of on this
1242 		 * link are simply ignored.
1243 		 */
1244 		in_multihead_lock_shared();
1245 		IN_LOOKUP_MULTI(&igmpv3->igmp_group, ifp, inm);
1246 		in_multihead_lock_done();
1247 		if (inm == NULL) {
1248 			goto done;
1249 		}
1250 
1251 		INM_LOCK(inm);
1252 		if (nsrc > 0) {
1253 			if (!ratecheck(&inm->inm_lastgsrtv,
1254 			    &igmp_gsrdelay)) {
1255 				IGMP_PRINTF(("%s: GS query throttled.\n",
1256 				    __func__));
1257 				IGMPSTAT_INC(igps_drop_gsr_queries);
1258 				INM_UNLOCK(inm);
1259 				INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1260 				goto done;
1261 			}
1262 		}
1263 		IGMP_INET_PRINTF(igmpv3->igmp_group,
1264 		    ("process v3 %s query on ifp 0x%llx(%s)\n", _igmp_inet_buf,
1265 		    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1266 		/*
1267 		 * If there is a pending General Query response
1268 		 * scheduled sooner than the selected delay, no
1269 		 * further report need be scheduled.
1270 		 * Otherwise, prepare to respond to the
1271 		 * group-specific or group-and-source query.
1272 		 */
1273 		IGI_LOCK(igi);
1274 		itp.it = igi->igi_v3_timer;
1275 		IGI_UNLOCK(igi);
1276 		if (itp.it == 0 || itp.it >= timer) {
1277 			(void) igmp_input_v3_group_query(inm, timer, igmpv3);
1278 			itp.cst = inm->inm_timer;
1279 		}
1280 		INM_UNLOCK(inm);
1281 		INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1282 	}
1283 done:
1284 	if (itp.it > 0) {
1285 		IGMP_PRINTF(("%s: v3 general query response scheduled in "
1286 		    "T+%d seconds on ifp 0x%llx(%s)\n", __func__, itp.it,
1287 		    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1288 	}
1289 	igmp_set_timeout(&itp);
1290 
1291 	return 0;
1292 }
1293 
1294 /*
1295  * Process a recieved IGMPv3 group-specific or group-and-source-specific
1296  * query.
1297  * Return <0 if any error occured. Currently this is ignored.
1298  */
1299 static int
igmp_input_v3_group_query(struct in_multi * inm,int timer,struct igmpv3 * igmpv3)1300 igmp_input_v3_group_query(struct in_multi *inm,
1301     int timer, /*const*/ struct igmpv3 *igmpv3)
1302 {
1303 	int                      retval;
1304 	uint16_t                 nsrc;
1305 
1306 	INM_LOCK_ASSERT_HELD(inm);
1307 
1308 	retval = 0;
1309 
1310 	switch (inm->inm_state) {
1311 	case IGMP_NOT_MEMBER:
1312 	case IGMP_SILENT_MEMBER:
1313 	case IGMP_SLEEPING_MEMBER:
1314 	case IGMP_LAZY_MEMBER:
1315 	case IGMP_AWAKENING_MEMBER:
1316 	case IGMP_IDLE_MEMBER:
1317 	case IGMP_LEAVING_MEMBER:
1318 		return retval;
1319 	case IGMP_REPORTING_MEMBER:
1320 	case IGMP_G_QUERY_PENDING_MEMBER:
1321 	case IGMP_SG_QUERY_PENDING_MEMBER:
1322 		break;
1323 	}
1324 
1325 	nsrc = ntohs(igmpv3->igmp_numsrc);
1326 
1327 	/*
1328 	 * Deal with group-specific queries upfront.
1329 	 * If any group query is already pending, purge any recorded
1330 	 * source-list state if it exists, and schedule a query response
1331 	 * for this group-specific query.
1332 	 */
1333 	if (nsrc == 0) {
1334 		if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER ||
1335 		    inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) {
1336 			inm_clear_recorded(inm);
1337 			timer = min(inm->inm_timer, timer);
1338 		}
1339 		inm->inm_state = IGMP_G_QUERY_PENDING_MEMBER;
1340 		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1341 		return retval;
1342 	}
1343 
1344 	/*
1345 	 * Deal with the case where a group-and-source-specific query has
1346 	 * been received but a group-specific query is already pending.
1347 	 */
1348 	if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER) {
1349 		timer = min(inm->inm_timer, timer);
1350 		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1351 		return retval;
1352 	}
1353 
1354 	/*
1355 	 * Finally, deal with the case where a group-and-source-specific
1356 	 * query has been received, where a response to a previous g-s-r
1357 	 * query exists, or none exists.
1358 	 * In this case, we need to parse the source-list which the Querier
1359 	 * has provided us with and check if we have any source list filter
1360 	 * entries at T1 for these sources. If we do not, there is no need
1361 	 * schedule a report and the query may be dropped.
1362 	 * If we do, we must record them and schedule a current-state
1363 	 * report for those sources.
1364 	 * FIXME: Handling source lists larger than 1 mbuf requires that
1365 	 * we pass the mbuf chain pointer down to this function, and use
1366 	 * m_getptr() to walk the chain.
1367 	 */
1368 	if (inm->inm_nsrc > 0) {
1369 		const struct in_addr    *ap;
1370 		int                      i, nrecorded;
1371 
1372 		ap = (const struct in_addr *)(igmpv3 + 1);
1373 		nrecorded = 0;
1374 		for (i = 0; i < nsrc; i++, ap++) {
1375 			retval = inm_record_source(inm, ap->s_addr);
1376 			if (retval < 0) {
1377 				break;
1378 			}
1379 			nrecorded += retval;
1380 		}
1381 		if (nrecorded > 0) {
1382 			IGMP_PRINTF(("%s: schedule response to SG query\n",
1383 			    __func__));
1384 			inm->inm_state = IGMP_SG_QUERY_PENDING_MEMBER;
1385 			inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1386 		}
1387 	}
1388 
1389 	return retval;
1390 }
1391 
1392 /*
1393  * Process a received IGMPv1 host membership report.
1394  *
1395  * NOTE: 0.0.0.0 workaround breaks const correctness.
1396  */
1397 static int
igmp_input_v1_report(struct ifnet * ifp,struct mbuf * m,struct ip * ip,struct igmp * igmp)1398 igmp_input_v1_report(struct ifnet *ifp, struct mbuf *m, /*const*/ struct ip *ip,
1399     /*const*/ struct igmp *igmp)
1400 {
1401 	struct in_ifaddr *ia;
1402 	struct in_multi *inm;
1403 
1404 	IGMPSTAT_INC(igps_rcv_reports);
1405 	OIGMPSTAT_INC(igps_rcv_reports);
1406 
1407 	if ((ifp->if_flags & IFF_LOOPBACK) ||
1408 	    (m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
1409 		return 0;
1410 	}
1411 
1412 	if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr) ||
1413 	    !in_hosteq(igmp->igmp_group, ip->ip_dst))) {
1414 		IGMPSTAT_INC(igps_rcv_badreports);
1415 		OIGMPSTAT_INC(igps_rcv_badreports);
1416 		return EINVAL;
1417 	}
1418 
1419 	/*
1420 	 * RFC 3376, Section 4.2.13, 9.2, 9.3:
1421 	 * Booting clients may use the source address 0.0.0.0. Some
1422 	 * IGMP daemons may not know how to use IP_RECVIF to determine
1423 	 * the interface upon which this message was received.
1424 	 * Replace 0.0.0.0 with the subnet address if told to do so.
1425 	 */
1426 	if (igmp_recvifkludge && in_nullhost(ip->ip_src)) {
1427 		IFP_TO_IA(ifp, ia);
1428 		if (ia != NULL) {
1429 			IFA_LOCK(&ia->ia_ifa);
1430 			ip->ip_src.s_addr = htonl(ia->ia_subnet);
1431 			IFA_UNLOCK(&ia->ia_ifa);
1432 			IFA_REMREF(&ia->ia_ifa);
1433 		}
1434 	}
1435 
1436 	IGMP_INET_PRINTF(igmp->igmp_group,
1437 	    ("process v1 report %s on ifp 0x%llx(%s)\n", _igmp_inet_buf,
1438 	    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1439 
1440 	/*
1441 	 * IGMPv1 report suppression.
1442 	 * If we are a member of this group, and our membership should be
1443 	 * reported, stop our group timer and transition to the 'lazy' state.
1444 	 */
1445 	in_multihead_lock_shared();
1446 	IN_LOOKUP_MULTI(&igmp->igmp_group, ifp, inm);
1447 	in_multihead_lock_done();
1448 	if (inm != NULL) {
1449 		struct igmp_ifinfo *igi;
1450 
1451 		INM_LOCK(inm);
1452 
1453 		igi = inm->inm_igi;
1454 		VERIFY(igi != NULL);
1455 
1456 		IGMPSTAT_INC(igps_rcv_ourreports);
1457 		OIGMPSTAT_INC(igps_rcv_ourreports);
1458 
1459 		/*
1460 		 * If we are in IGMPv3 host mode, do not allow the
1461 		 * other host's IGMPv1 report to suppress our reports
1462 		 * unless explicitly configured to do so.
1463 		 */
1464 		IGI_LOCK(igi);
1465 		if (igi->igi_version == IGMP_VERSION_3) {
1466 			if (igmp_legacysupp) {
1467 				igmp_v3_suppress_group_record(inm);
1468 			}
1469 			IGI_UNLOCK(igi);
1470 			INM_UNLOCK(inm);
1471 			INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1472 			return 0;
1473 		}
1474 
1475 		INM_LOCK_ASSERT_HELD(inm);
1476 		inm->inm_timer = 0;
1477 
1478 		switch (inm->inm_state) {
1479 		case IGMP_NOT_MEMBER:
1480 		case IGMP_SILENT_MEMBER:
1481 			break;
1482 		case IGMP_IDLE_MEMBER:
1483 		case IGMP_LAZY_MEMBER:
1484 		case IGMP_AWAKENING_MEMBER:
1485 			IGMP_INET_PRINTF(igmp->igmp_group,
1486 			    ("report suppressed for %s on ifp 0x%llx(%s)\n",
1487 			    _igmp_inet_buf,
1488 			    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1489 			OS_FALLTHROUGH;
1490 		case IGMP_SLEEPING_MEMBER:
1491 			inm->inm_state = IGMP_SLEEPING_MEMBER;
1492 			break;
1493 		case IGMP_REPORTING_MEMBER:
1494 			IGMP_INET_PRINTF(igmp->igmp_group,
1495 			    ("report suppressed for %s on ifp 0x%llx(%s)\n",
1496 			    _igmp_inet_buf,
1497 			    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1498 			if (igi->igi_version == IGMP_VERSION_1) {
1499 				inm->inm_state = IGMP_LAZY_MEMBER;
1500 			} else if (igi->igi_version == IGMP_VERSION_2) {
1501 				inm->inm_state = IGMP_SLEEPING_MEMBER;
1502 			}
1503 			break;
1504 		case IGMP_G_QUERY_PENDING_MEMBER:
1505 		case IGMP_SG_QUERY_PENDING_MEMBER:
1506 		case IGMP_LEAVING_MEMBER:
1507 			break;
1508 		}
1509 		IGI_UNLOCK(igi);
1510 		INM_UNLOCK(inm);
1511 		INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1512 	}
1513 
1514 	return 0;
1515 }
1516 
1517 /*
1518  * Process a received IGMPv2 host membership report.
1519  *
1520  * NOTE: 0.0.0.0 workaround breaks const correctness.
1521  */
1522 static int
igmp_input_v2_report(struct ifnet * ifp,struct mbuf * m,struct ip * ip,struct igmp * igmp)1523 igmp_input_v2_report(struct ifnet *ifp, struct mbuf *m, /*const*/ struct ip *ip,
1524     /*const*/ struct igmp *igmp)
1525 {
1526 	struct in_ifaddr *ia;
1527 	struct in_multi *inm;
1528 
1529 	/*
1530 	 * Make sure we don't hear our own membership report.  Fast
1531 	 * leave requires knowing that we are the only member of a
1532 	 * group.
1533 	 */
1534 	IFP_TO_IA(ifp, ia);
1535 	if (ia != NULL) {
1536 		IFA_LOCK(&ia->ia_ifa);
1537 		if (in_hosteq(ip->ip_src, IA_SIN(ia)->sin_addr)) {
1538 			IFA_UNLOCK(&ia->ia_ifa);
1539 			IFA_REMREF(&ia->ia_ifa);
1540 			return 0;
1541 		}
1542 		IFA_UNLOCK(&ia->ia_ifa);
1543 	}
1544 
1545 	IGMPSTAT_INC(igps_rcv_reports);
1546 	OIGMPSTAT_INC(igps_rcv_reports);
1547 
1548 	if ((ifp->if_flags & IFF_LOOPBACK) ||
1549 	    (m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
1550 		if (ia != NULL) {
1551 			IFA_REMREF(&ia->ia_ifa);
1552 		}
1553 		return 0;
1554 	}
1555 
1556 	if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr)) ||
1557 	    !in_hosteq(igmp->igmp_group, ip->ip_dst)) {
1558 		if (ia != NULL) {
1559 			IFA_REMREF(&ia->ia_ifa);
1560 		}
1561 		IGMPSTAT_INC(igps_rcv_badreports);
1562 		OIGMPSTAT_INC(igps_rcv_badreports);
1563 		return EINVAL;
1564 	}
1565 
1566 	/*
1567 	 * RFC 3376, Section 4.2.13, 9.2, 9.3:
1568 	 * Booting clients may use the source address 0.0.0.0. Some
1569 	 * IGMP daemons may not know how to use IP_RECVIF to determine
1570 	 * the interface upon which this message was received.
1571 	 * Replace 0.0.0.0 with the subnet address if told to do so.
1572 	 */
1573 	if (igmp_recvifkludge && in_nullhost(ip->ip_src)) {
1574 		if (ia != NULL) {
1575 			IFA_LOCK(&ia->ia_ifa);
1576 			ip->ip_src.s_addr = htonl(ia->ia_subnet);
1577 			IFA_UNLOCK(&ia->ia_ifa);
1578 		}
1579 	}
1580 	if (ia != NULL) {
1581 		IFA_REMREF(&ia->ia_ifa);
1582 	}
1583 
1584 	IGMP_INET_PRINTF(igmp->igmp_group,
1585 	    ("process v2 report %s on ifp 0x%llx(%s)\n", _igmp_inet_buf,
1586 	    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1587 
1588 	/*
1589 	 * IGMPv2 report suppression.
1590 	 * If we are a member of this group, and our membership should be
1591 	 * reported, and our group timer is pending or about to be reset,
1592 	 * stop our group timer by transitioning to the 'lazy' state.
1593 	 */
1594 	in_multihead_lock_shared();
1595 	IN_LOOKUP_MULTI(&igmp->igmp_group, ifp, inm);
1596 	in_multihead_lock_done();
1597 	if (inm != NULL) {
1598 		struct igmp_ifinfo *igi;
1599 
1600 		INM_LOCK(inm);
1601 		igi = inm->inm_igi;
1602 		VERIFY(igi != NULL);
1603 
1604 		IGMPSTAT_INC(igps_rcv_ourreports);
1605 		OIGMPSTAT_INC(igps_rcv_ourreports);
1606 
1607 		/*
1608 		 * If we are in IGMPv3 host mode, do not allow the
1609 		 * other host's IGMPv1 report to suppress our reports
1610 		 * unless explicitly configured to do so.
1611 		 */
1612 		IGI_LOCK(igi);
1613 		if (igi->igi_version == IGMP_VERSION_3) {
1614 			if (igmp_legacysupp) {
1615 				igmp_v3_suppress_group_record(inm);
1616 			}
1617 			IGI_UNLOCK(igi);
1618 			INM_UNLOCK(inm);
1619 			INM_REMREF(inm);
1620 			return 0;
1621 		}
1622 
1623 		inm->inm_timer = 0;
1624 
1625 		switch (inm->inm_state) {
1626 		case IGMP_NOT_MEMBER:
1627 		case IGMP_SILENT_MEMBER:
1628 		case IGMP_SLEEPING_MEMBER:
1629 			break;
1630 		case IGMP_REPORTING_MEMBER:
1631 		case IGMP_IDLE_MEMBER:
1632 		case IGMP_AWAKENING_MEMBER:
1633 			IGMP_INET_PRINTF(igmp->igmp_group,
1634 			    ("report suppressed for %s on ifp 0x%llx(%s)\n",
1635 			    _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(ifp),
1636 			    if_name(ifp)));
1637 			OS_FALLTHROUGH;
1638 		case IGMP_LAZY_MEMBER:
1639 			inm->inm_state = IGMP_LAZY_MEMBER;
1640 			break;
1641 		case IGMP_G_QUERY_PENDING_MEMBER:
1642 		case IGMP_SG_QUERY_PENDING_MEMBER:
1643 		case IGMP_LEAVING_MEMBER:
1644 			break;
1645 		}
1646 		IGI_UNLOCK(igi);
1647 		INM_UNLOCK(inm);
1648 		INM_REMREF(inm);
1649 	}
1650 
1651 	return 0;
1652 }
1653 
1654 void
igmp_input(struct mbuf * m,int off)1655 igmp_input(struct mbuf *m, int off)
1656 {
1657 	int iphlen;
1658 	struct ifnet *ifp;
1659 	struct igmp *igmp;
1660 	struct ip *ip;
1661 	int igmplen;
1662 	int minlen;
1663 	int queryver;
1664 
1665 	IGMP_PRINTF(("%s: called w/mbuf (0x%llx,%d)\n", __func__,
1666 	    (uint64_t)VM_KERNEL_ADDRPERM(m), off));
1667 
1668 	ifp = m->m_pkthdr.rcvif;
1669 
1670 	IGMPSTAT_INC(igps_rcv_total);
1671 	OIGMPSTAT_INC(igps_rcv_total);
1672 
1673 	/* Expect 32-bit aligned data pointer on strict-align platforms */
1674 	MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
1675 
1676 	ip = mtod(m, struct ip *);
1677 	iphlen = off;
1678 
1679 	/* By now, ip_len no longer contains the length of IP header */
1680 	igmplen = ip->ip_len;
1681 
1682 	/*
1683 	 * Validate lengths.
1684 	 */
1685 	if (igmplen < IGMP_MINLEN) {
1686 		IGMPSTAT_INC(igps_rcv_tooshort);
1687 		OIGMPSTAT_INC(igps_rcv_tooshort);
1688 		m_freem(m);
1689 		return;
1690 	}
1691 
1692 	/*
1693 	 * Always pullup to the minimum size for v1/v2 or v3
1694 	 * to amortize calls to m_pulldown().
1695 	 */
1696 	if (igmplen >= IGMP_V3_QUERY_MINLEN) {
1697 		minlen = IGMP_V3_QUERY_MINLEN;
1698 	} else {
1699 		minlen = IGMP_MINLEN;
1700 	}
1701 
1702 	/* A bit more expensive than M_STRUCT_GET, but ensures alignment */
1703 	M_STRUCT_GET0(igmp, struct igmp *, m, off, minlen);
1704 	if (igmp == NULL) {
1705 		IGMPSTAT_INC(igps_rcv_tooshort);
1706 		OIGMPSTAT_INC(igps_rcv_tooshort);
1707 		return;
1708 	}
1709 	/* N.B.: we assume the packet was correctly aligned in ip_input. */
1710 
1711 	/*
1712 	 * Validate checksum.
1713 	 */
1714 	m->m_data += iphlen;
1715 	m->m_len -= iphlen;
1716 	if (in_cksum(m, igmplen)) {
1717 		IGMPSTAT_INC(igps_rcv_badsum);
1718 		OIGMPSTAT_INC(igps_rcv_badsum);
1719 		m_freem(m);
1720 		return;
1721 	}
1722 	m->m_data -= iphlen;
1723 	m->m_len += iphlen;
1724 
1725 	/*
1726 	 * IGMP control traffic is link-scope, and must have a TTL of 1.
1727 	 * DVMRP traffic (e.g. mrinfo, mtrace) is an exception;
1728 	 * probe packets may come from beyond the LAN.
1729 	 */
1730 	if (igmp->igmp_type != IGMP_DVMRP && ip->ip_ttl != 1) {
1731 		IGMPSTAT_INC(igps_rcv_badttl);
1732 		m_freem(m);
1733 		return;
1734 	}
1735 
1736 	switch (igmp->igmp_type) {
1737 	case IGMP_HOST_MEMBERSHIP_QUERY:
1738 		if (igmplen == IGMP_MINLEN) {
1739 			if (igmp->igmp_code == 0) {
1740 				queryver = IGMP_VERSION_1;
1741 			} else {
1742 				queryver = IGMP_VERSION_2;
1743 			}
1744 		} else if (igmplen >= IGMP_V3_QUERY_MINLEN) {
1745 			queryver = IGMP_VERSION_3;
1746 		} else {
1747 			IGMPSTAT_INC(igps_rcv_tooshort);
1748 			OIGMPSTAT_INC(igps_rcv_tooshort);
1749 			m_freem(m);
1750 			return;
1751 		}
1752 
1753 		OIGMPSTAT_INC(igps_rcv_queries);
1754 
1755 		switch (queryver) {
1756 		case IGMP_VERSION_1:
1757 			IGMPSTAT_INC(igps_rcv_v1v2_queries);
1758 			if (!igmp_v1enable) {
1759 				break;
1760 			}
1761 			if (igmp_input_v1_query(ifp, ip, igmp) != 0) {
1762 				m_freem(m);
1763 				return;
1764 			}
1765 			break;
1766 
1767 		case IGMP_VERSION_2:
1768 			IGMPSTAT_INC(igps_rcv_v1v2_queries);
1769 			if (!igmp_v2enable) {
1770 				break;
1771 			}
1772 			if (igmp_input_v2_query(ifp, ip, igmp) != 0) {
1773 				m_freem(m);
1774 				return;
1775 			}
1776 			break;
1777 
1778 		case IGMP_VERSION_3: {
1779 			struct igmpv3 *igmpv3;
1780 			uint16_t igmpv3len;
1781 			uint16_t srclen;
1782 			int nsrc;
1783 
1784 			IGMPSTAT_INC(igps_rcv_v3_queries);
1785 			igmpv3 = (struct igmpv3 *)igmp;
1786 			/*
1787 			 * Validate length based on source count.
1788 			 */
1789 			nsrc = ntohs(igmpv3->igmp_numsrc);
1790 			/*
1791 			 * The max vaue of nsrc is limited by the
1792 			 * MTU of the network on which the datagram
1793 			 * is received
1794 			 */
1795 			if (nsrc < 0 || nsrc > IGMP_V3_QUERY_MAX_SRCS) {
1796 				IGMPSTAT_INC(igps_rcv_tooshort);
1797 				OIGMPSTAT_INC(igps_rcv_tooshort);
1798 				m_freem(m);
1799 				return;
1800 			}
1801 			srclen = sizeof(struct in_addr) * (uint16_t)nsrc;
1802 			if (igmplen < (IGMP_V3_QUERY_MINLEN + srclen)) {
1803 				IGMPSTAT_INC(igps_rcv_tooshort);
1804 				OIGMPSTAT_INC(igps_rcv_tooshort);
1805 				m_freem(m);
1806 				return;
1807 			}
1808 			igmpv3len = IGMP_V3_QUERY_MINLEN + srclen;
1809 			/*
1810 			 * A bit more expensive than M_STRUCT_GET,
1811 			 * but ensures alignment.
1812 			 */
1813 			M_STRUCT_GET0(igmpv3, struct igmpv3 *, m,
1814 			    off, igmpv3len);
1815 			if (igmpv3 == NULL) {
1816 				IGMPSTAT_INC(igps_rcv_tooshort);
1817 				OIGMPSTAT_INC(igps_rcv_tooshort);
1818 				return;
1819 			}
1820 			/*
1821 			 * N.B.: we assume the packet was correctly
1822 			 * aligned in ip_input.
1823 			 */
1824 			if (igmp_input_v3_query(ifp, ip, igmpv3) != 0) {
1825 				m_freem(m);
1826 				return;
1827 			}
1828 		}
1829 		break;
1830 		}
1831 		break;
1832 
1833 	case IGMP_v1_HOST_MEMBERSHIP_REPORT:
1834 		if (!igmp_v1enable) {
1835 			break;
1836 		}
1837 		if (igmp_input_v1_report(ifp, m, ip, igmp) != 0) {
1838 			m_freem(m);
1839 			return;
1840 		}
1841 		break;
1842 
1843 	case IGMP_v2_HOST_MEMBERSHIP_REPORT:
1844 		if (!igmp_v2enable) {
1845 			break;
1846 		}
1847 		if (!ip_checkrouteralert(m)) {
1848 			IGMPSTAT_INC(igps_rcv_nora);
1849 		}
1850 		if (igmp_input_v2_report(ifp, m, ip, igmp) != 0) {
1851 			m_freem(m);
1852 			return;
1853 		}
1854 		break;
1855 
1856 	case IGMP_v3_HOST_MEMBERSHIP_REPORT:
1857 		/*
1858 		 * Hosts do not need to process IGMPv3 membership reports,
1859 		 * as report suppression is no longer required.
1860 		 */
1861 		if (!ip_checkrouteralert(m)) {
1862 			IGMPSTAT_INC(igps_rcv_nora);
1863 		}
1864 		break;
1865 
1866 	default:
1867 		break;
1868 	}
1869 
1870 	IGMP_LOCK_ASSERT_NOTHELD();
1871 	/*
1872 	 * Pass all valid IGMP packets up to any process(es) listening on a
1873 	 * raw IGMP socket.
1874 	 */
1875 	rip_input(m, off);
1876 }
1877 
1878 /*
1879  * Schedule IGMP timer based on various parameters; caller must ensure that
1880  * lock ordering is maintained as this routine acquires IGMP global lock.
1881  */
1882 void
igmp_set_timeout(struct igmp_tparams * itp)1883 igmp_set_timeout(struct igmp_tparams *itp)
1884 {
1885 	IGMP_LOCK_ASSERT_NOTHELD();
1886 	VERIFY(itp != NULL);
1887 
1888 	if (itp->qpt != 0 || itp->it != 0 || itp->cst != 0 || itp->sct != 0) {
1889 		IGMP_LOCK();
1890 		if (itp->qpt != 0) {
1891 			querier_present_timers_running = 1;
1892 		}
1893 		if (itp->it != 0) {
1894 			interface_timers_running = 1;
1895 		}
1896 		if (itp->cst != 0) {
1897 			current_state_timers_running = 1;
1898 		}
1899 		if (itp->sct != 0) {
1900 			state_change_timers_running = 1;
1901 		}
1902 		igmp_sched_timeout(itp->fast);
1903 		IGMP_UNLOCK();
1904 	}
1905 }
1906 
1907 void
igmp_set_fast_timeout(struct igmp_tparams * itp)1908 igmp_set_fast_timeout(struct igmp_tparams *itp)
1909 {
1910 	VERIFY(itp != NULL);
1911 	itp->fast = true;
1912 	igmp_set_timeout(itp);
1913 }
1914 
1915 /*
1916  * IGMP timer handler (per 1 second).
1917  */
1918 static void
igmp_timeout(void * arg)1919 igmp_timeout(void *arg)
1920 {
1921 	struct ifqueue           scq;   /* State-change packets */
1922 	struct ifqueue           qrq;   /* Query response packets */
1923 	struct ifnet            *ifp;
1924 	struct igmp_ifinfo      *igi;
1925 	struct in_multi         *inm;
1926 	unsigned int             loop = 0, uri_sec = 0;
1927 	SLIST_HEAD(, in_multi)  inm_dthead;
1928 	bool                     fast = arg != NULL;
1929 
1930 	SLIST_INIT(&inm_dthead);
1931 
1932 	/*
1933 	 * Update coarse-grained networking timestamp (in sec.); the idea
1934 	 * is to piggy-back on the timeout callout to update the counter
1935 	 * returnable via net_uptime().
1936 	 */
1937 	net_update_uptime();
1938 
1939 	IGMP_LOCK();
1940 
1941 	IGMP_PRINTF(("%s: qpt %d, it %d, cst %d, sct %d, fast %d\n", __func__,
1942 	    querier_present_timers_running, interface_timers_running,
1943 	    current_state_timers_running, state_change_timers_running,
1944 	    fast));
1945 
1946 	if (fast) {
1947 		/*
1948 		 * When running the fast timer, skip processing
1949 		 * of "querier present" timers since they are
1950 		 * based on 1-second intervals.
1951 		 */
1952 		goto skip_query_timers;
1953 	}
1954 	/*
1955 	 * IGMPv1/v2 querier present timer processing.
1956 	 */
1957 	if (querier_present_timers_running) {
1958 		querier_present_timers_running = 0;
1959 		LIST_FOREACH(igi, &igi_head, igi_link) {
1960 			IGI_LOCK(igi);
1961 			igmp_v1v2_process_querier_timers(igi);
1962 			if (igi->igi_v1_timer > 0 || igi->igi_v2_timer > 0) {
1963 				querier_present_timers_running = 1;
1964 			}
1965 			IGI_UNLOCK(igi);
1966 		}
1967 	}
1968 
1969 	/*
1970 	 * IGMPv3 General Query response timer processing.
1971 	 */
1972 	if (interface_timers_running) {
1973 		IGMP_PRINTF(("%s: interface timers running\n", __func__));
1974 		interface_timers_running = 0;
1975 		LIST_FOREACH(igi, &igi_head, igi_link) {
1976 			IGI_LOCK(igi);
1977 			if (igi->igi_version != IGMP_VERSION_3) {
1978 				IGI_UNLOCK(igi);
1979 				continue;
1980 			}
1981 			if (igi->igi_v3_timer == 0) {
1982 				/* Do nothing. */
1983 			} else if (--igi->igi_v3_timer == 0) {
1984 				if (igmp_v3_dispatch_general_query(igi) > 0) {
1985 					interface_timers_running = 1;
1986 				}
1987 			} else {
1988 				interface_timers_running = 1;
1989 			}
1990 			IGI_UNLOCK(igi);
1991 		}
1992 	}
1993 
1994 skip_query_timers:
1995 	if (!current_state_timers_running &&
1996 	    !state_change_timers_running) {
1997 		goto out_locked;
1998 	}
1999 
2000 	current_state_timers_running = 0;
2001 	state_change_timers_running = 0;
2002 
2003 	memset(&qrq, 0, sizeof(struct ifqueue));
2004 	qrq.ifq_maxlen = IGMP_MAX_G_GS_PACKETS;
2005 
2006 	memset(&scq, 0, sizeof(struct ifqueue));
2007 	scq.ifq_maxlen =  IGMP_MAX_STATE_CHANGE_PACKETS;
2008 
2009 	IGMP_PRINTF(("%s: state change timers running\n", __func__));
2010 
2011 	/*
2012 	 * IGMPv1/v2/v3 host report and state-change timer processing.
2013 	 * Note: Processing a v3 group timer may remove a node.
2014 	 */
2015 	LIST_FOREACH(igi, &igi_head, igi_link) {
2016 		struct in_multistep step;
2017 
2018 		IGI_LOCK(igi);
2019 		ifp = igi->igi_ifp;
2020 		loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
2021 		uri_sec = IGMP_RANDOM_DELAY(igi->igi_uri);
2022 		IGI_UNLOCK(igi);
2023 
2024 		in_multihead_lock_shared();
2025 		IN_FIRST_MULTI(step, inm);
2026 		while (inm != NULL) {
2027 			INM_LOCK(inm);
2028 			if (inm->inm_ifp != ifp) {
2029 				goto next;
2030 			}
2031 
2032 			IGI_LOCK(igi);
2033 			switch (igi->igi_version) {
2034 			case IGMP_VERSION_1:
2035 			case IGMP_VERSION_2:
2036 				igmp_v1v2_process_group_timer(inm,
2037 				    igi->igi_version);
2038 				break;
2039 			case IGMP_VERSION_3:
2040 				igmp_v3_process_group_timers(igi, &qrq,
2041 				    &scq, inm, uri_sec);
2042 				break;
2043 			}
2044 			IGI_UNLOCK(igi);
2045 next:
2046 			INM_UNLOCK(inm);
2047 			IN_NEXT_MULTI(step, inm);
2048 		}
2049 		in_multihead_lock_done();
2050 
2051 		IGI_LOCK(igi);
2052 		if (igi->igi_version == IGMP_VERSION_1 ||
2053 		    igi->igi_version == IGMP_VERSION_2) {
2054 			igmp_dispatch_queue(igi, &igi->igi_v2q, 0, loop);
2055 		} else if (igi->igi_version == IGMP_VERSION_3) {
2056 			IGI_UNLOCK(igi);
2057 			igmp_dispatch_queue(NULL, &qrq, 0, loop);
2058 			igmp_dispatch_queue(NULL, &scq, 0, loop);
2059 			VERIFY(qrq.ifq_len == 0);
2060 			VERIFY(scq.ifq_len == 0);
2061 			IGI_LOCK(igi);
2062 		}
2063 		/*
2064 		 * In case there are still any pending membership reports
2065 		 * which didn't get drained at version change time.
2066 		 */
2067 		IF_DRAIN(&igi->igi_v2q);
2068 		/*
2069 		 * Release all deferred inm records, and drain any locally
2070 		 * enqueued packets; do it even if the current IGMP version
2071 		 * for the link is no longer IGMPv3, in order to handle the
2072 		 * version change case.
2073 		 */
2074 		igmp_flush_relq(igi, (struct igmp_inm_relhead *)&inm_dthead);
2075 		VERIFY(SLIST_EMPTY(&igi->igi_relinmhead));
2076 		IGI_UNLOCK(igi);
2077 
2078 		IF_DRAIN(&qrq);
2079 		IF_DRAIN(&scq);
2080 	}
2081 
2082 out_locked:
2083 	/* re-arm the timer if there's work to do */
2084 	igmp_timeout_run = 0;
2085 	igmp_sched_timeout(false);
2086 	IGMP_UNLOCK();
2087 
2088 	/* Now that we're dropped all locks, release detached records */
2089 	IGMP_REMOVE_DETACHED_INM(&inm_dthead);
2090 }
2091 
2092 static void
igmp_sched_timeout(bool fast)2093 igmp_sched_timeout(bool fast)
2094 {
2095 	IGMP_LOCK_ASSERT_HELD();
2096 
2097 	if (!igmp_timeout_run &&
2098 	    (querier_present_timers_running || current_state_timers_running ||
2099 	    interface_timers_running || state_change_timers_running)) {
2100 		igmp_timeout_run = 1;
2101 		int sched_hz = fast ? 0 : hz;
2102 		void *arg = fast ? (void *)igmp_sched_timeout : NULL;
2103 		timeout(igmp_timeout, arg, sched_hz);
2104 	}
2105 }
2106 
2107 /*
2108  * Free the in_multi reference(s) for this IGMP lifecycle.
2109  *
2110  * Caller must be holding igi_lock.
2111  */
2112 static void
igmp_flush_relq(struct igmp_ifinfo * igi,struct igmp_inm_relhead * inm_dthead)2113 igmp_flush_relq(struct igmp_ifinfo *igi, struct igmp_inm_relhead *inm_dthead)
2114 {
2115 	struct in_multi *inm;
2116 
2117 again:
2118 	IGI_LOCK_ASSERT_HELD(igi);
2119 	inm = SLIST_FIRST(&igi->igi_relinmhead);
2120 	if (inm != NULL) {
2121 		int lastref;
2122 
2123 		SLIST_REMOVE_HEAD(&igi->igi_relinmhead, inm_nrele);
2124 		IGI_UNLOCK(igi);
2125 
2126 		in_multihead_lock_exclusive();
2127 		INM_LOCK(inm);
2128 		VERIFY(inm->inm_nrelecnt != 0);
2129 		inm->inm_nrelecnt--;
2130 		lastref = in_multi_detach(inm);
2131 		VERIFY(!lastref || (!(inm->inm_debug & IFD_ATTACHED) &&
2132 		    inm->inm_reqcnt == 0));
2133 		INM_UNLOCK(inm);
2134 		in_multihead_lock_done();
2135 		/* from igi_relinmhead */
2136 		INM_REMREF(inm);
2137 		/* from in_multihead list */
2138 		if (lastref) {
2139 			/*
2140 			 * Defer releasing our final reference, as we
2141 			 * are holding the IGMP lock at this point, and
2142 			 * we could end up with locking issues later on
2143 			 * (while issuing SIOCDELMULTI) when this is the
2144 			 * final reference count.  Let the caller do it
2145 			 * when it is safe.
2146 			 */
2147 			IGMP_ADD_DETACHED_INM(inm_dthead, inm);
2148 		}
2149 		IGI_LOCK(igi);
2150 		goto again;
2151 	}
2152 }
2153 
2154 /*
2155  * Update host report group timer for IGMPv1/v2.
2156  * Will update the global pending timer flags.
2157  */
2158 static void
igmp_v1v2_process_group_timer(struct in_multi * inm,const int igmp_version)2159 igmp_v1v2_process_group_timer(struct in_multi *inm, const int igmp_version)
2160 {
2161 	int report_timer_expired;
2162 
2163 	IGMP_LOCK_ASSERT_HELD();
2164 	INM_LOCK_ASSERT_HELD(inm);
2165 	IGI_LOCK_ASSERT_HELD(inm->inm_igi);
2166 
2167 	if (inm->inm_timer == 0) {
2168 		report_timer_expired = 0;
2169 	} else if (--inm->inm_timer == 0) {
2170 		report_timer_expired = 1;
2171 	} else {
2172 		current_state_timers_running = 1;
2173 		/* caller will schedule timer */
2174 		return;
2175 	}
2176 
2177 	switch (inm->inm_state) {
2178 	case IGMP_NOT_MEMBER:
2179 	case IGMP_SILENT_MEMBER:
2180 	case IGMP_IDLE_MEMBER:
2181 	case IGMP_LAZY_MEMBER:
2182 	case IGMP_SLEEPING_MEMBER:
2183 	case IGMP_AWAKENING_MEMBER:
2184 		break;
2185 	case IGMP_REPORTING_MEMBER:
2186 		if (report_timer_expired) {
2187 			inm->inm_state = IGMP_IDLE_MEMBER;
2188 			(void) igmp_v1v2_queue_report(inm,
2189 			    (igmp_version == IGMP_VERSION_2) ?
2190 			    IGMP_v2_HOST_MEMBERSHIP_REPORT :
2191 			    IGMP_v1_HOST_MEMBERSHIP_REPORT);
2192 			INM_LOCK_ASSERT_HELD(inm);
2193 			IGI_LOCK_ASSERT_HELD(inm->inm_igi);
2194 		}
2195 		break;
2196 	case IGMP_G_QUERY_PENDING_MEMBER:
2197 	case IGMP_SG_QUERY_PENDING_MEMBER:
2198 	case IGMP_LEAVING_MEMBER:
2199 		break;
2200 	}
2201 }
2202 
2203 /*
2204  * Update a group's timers for IGMPv3.
2205  * Will update the global pending timer flags.
2206  * Note: Unlocked read from igi.
2207  */
2208 static void
igmp_v3_process_group_timers(struct igmp_ifinfo * igi,struct ifqueue * qrq,struct ifqueue * scq,struct in_multi * inm,const unsigned int uri_sec)2209 igmp_v3_process_group_timers(struct igmp_ifinfo *igi,
2210     struct ifqueue *qrq, struct ifqueue *scq,
2211     struct in_multi *inm, const unsigned int uri_sec)
2212 {
2213 	int query_response_timer_expired;
2214 	int state_change_retransmit_timer_expired;
2215 
2216 	IGMP_LOCK_ASSERT_HELD();
2217 	INM_LOCK_ASSERT_HELD(inm);
2218 	IGI_LOCK_ASSERT_HELD(igi);
2219 	VERIFY(igi == inm->inm_igi);
2220 
2221 	query_response_timer_expired = 0;
2222 	state_change_retransmit_timer_expired = 0;
2223 
2224 	/*
2225 	 * During a transition from v1/v2 compatibility mode back to v3,
2226 	 * a group record in REPORTING state may still have its group
2227 	 * timer active. This is a no-op in this function; it is easier
2228 	 * to deal with it here than to complicate the timeout path.
2229 	 */
2230 	if (inm->inm_timer == 0) {
2231 		query_response_timer_expired = 0;
2232 	} else if (--inm->inm_timer == 0) {
2233 		query_response_timer_expired = 1;
2234 	} else {
2235 		current_state_timers_running = 1;
2236 		/* caller will schedule timer */
2237 	}
2238 
2239 	if (inm->inm_sctimer == 0) {
2240 		state_change_retransmit_timer_expired = 0;
2241 	} else if (--inm->inm_sctimer == 0) {
2242 		state_change_retransmit_timer_expired = 1;
2243 	} else {
2244 		state_change_timers_running = 1;
2245 		/* caller will schedule timer */
2246 	}
2247 
2248 	/* We are in timer callback, so be quick about it. */
2249 	if (!state_change_retransmit_timer_expired &&
2250 	    !query_response_timer_expired) {
2251 		return;
2252 	}
2253 
2254 	switch (inm->inm_state) {
2255 	case IGMP_NOT_MEMBER:
2256 	case IGMP_SILENT_MEMBER:
2257 	case IGMP_SLEEPING_MEMBER:
2258 	case IGMP_LAZY_MEMBER:
2259 	case IGMP_AWAKENING_MEMBER:
2260 	case IGMP_IDLE_MEMBER:
2261 		break;
2262 	case IGMP_G_QUERY_PENDING_MEMBER:
2263 	case IGMP_SG_QUERY_PENDING_MEMBER:
2264 		/*
2265 		 * Respond to a previously pending Group-Specific
2266 		 * or Group-and-Source-Specific query by enqueueing
2267 		 * the appropriate Current-State report for
2268 		 * immediate transmission.
2269 		 */
2270 		if (query_response_timer_expired) {
2271 			int retval;
2272 
2273 			retval = igmp_v3_enqueue_group_record(qrq, inm, 0, 1,
2274 			    (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER));
2275 			IGMP_PRINTF(("%s: enqueue record = %d\n",
2276 			    __func__, retval));
2277 			inm->inm_state = IGMP_REPORTING_MEMBER;
2278 			/* XXX Clear recorded sources for next time. */
2279 			inm_clear_recorded(inm);
2280 		}
2281 		OS_FALLTHROUGH;
2282 	case IGMP_REPORTING_MEMBER:
2283 	case IGMP_LEAVING_MEMBER:
2284 		if (state_change_retransmit_timer_expired) {
2285 			/*
2286 			 * State-change retransmission timer fired.
2287 			 * If there are any further pending retransmissions,
2288 			 * set the global pending state-change flag, and
2289 			 * reset the timer.
2290 			 */
2291 			if (--inm->inm_scrv > 0) {
2292 				inm->inm_sctimer = (uint16_t)uri_sec;
2293 				state_change_timers_running = 1;
2294 				/* caller will schedule timer */
2295 			}
2296 			/*
2297 			 * Retransmit the previously computed state-change
2298 			 * report. If there are no further pending
2299 			 * retransmissions, the mbuf queue will be consumed.
2300 			 * Update T0 state to T1 as we have now sent
2301 			 * a state-change.
2302 			 */
2303 			(void) igmp_v3_merge_state_changes(inm, scq);
2304 
2305 			inm_commit(inm);
2306 			IGMP_INET_PRINTF(inm->inm_addr,
2307 			    ("%s: T1 -> T0 for %s/%s\n", __func__,
2308 			    _igmp_inet_buf, if_name(inm->inm_ifp)));
2309 
2310 			/*
2311 			 * If we are leaving the group for good, make sure
2312 			 * we release IGMP's reference to it.
2313 			 * This release must be deferred using a SLIST,
2314 			 * as we are called from a loop which traverses
2315 			 * the in_multihead list.
2316 			 */
2317 			if (inm->inm_state == IGMP_LEAVING_MEMBER &&
2318 			    inm->inm_scrv == 0) {
2319 				inm->inm_state = IGMP_NOT_MEMBER;
2320 				/*
2321 				 * A reference has already been held in
2322 				 * igmp_final_leave() for this inm, so
2323 				 * no need to hold another one.  We also
2324 				 * bumped up its request count then, so
2325 				 * that it stays in in_multihead.  Both
2326 				 * of them will be released when it is
2327 				 * dequeued later on.
2328 				 */
2329 				VERIFY(inm->inm_nrelecnt != 0);
2330 				SLIST_INSERT_HEAD(&igi->igi_relinmhead,
2331 				    inm, inm_nrele);
2332 			}
2333 		}
2334 		break;
2335 	}
2336 }
2337 
2338 /*
2339  * Suppress a group's pending response to a group or source/group query.
2340  *
2341  * Do NOT suppress state changes. This leads to IGMPv3 inconsistency.
2342  * Do NOT update ST1/ST0 as this operation merely suppresses
2343  * the currently pending group record.
2344  * Do NOT suppress the response to a general query. It is possible but
2345  * it would require adding another state or flag.
2346  */
2347 static void
igmp_v3_suppress_group_record(struct in_multi * inm)2348 igmp_v3_suppress_group_record(struct in_multi *inm)
2349 {
2350 	INM_LOCK_ASSERT_HELD(inm);
2351 	IGI_LOCK_ASSERT_HELD(inm->inm_igi);
2352 
2353 	VERIFY(inm->inm_igi->igi_version == IGMP_VERSION_3);
2354 
2355 	if (inm->inm_state != IGMP_G_QUERY_PENDING_MEMBER ||
2356 	    inm->inm_state != IGMP_SG_QUERY_PENDING_MEMBER) {
2357 		return;
2358 	}
2359 
2360 	if (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) {
2361 		inm_clear_recorded(inm);
2362 	}
2363 
2364 	inm->inm_timer = 0;
2365 	inm->inm_state = IGMP_REPORTING_MEMBER;
2366 }
2367 
2368 /*
2369  * Switch to a different IGMP version on the given interface,
2370  * as per Section 7.2.1.
2371  */
2372 static uint32_t
igmp_set_version(struct igmp_ifinfo * igi,const int igmp_version)2373 igmp_set_version(struct igmp_ifinfo *igi, const int igmp_version)
2374 {
2375 	int old_version_timer;
2376 
2377 	IGI_LOCK_ASSERT_HELD(igi);
2378 
2379 	IGMP_PRINTF(("%s: switching to v%d on ifp 0x%llx(%s)\n", __func__,
2380 	    igmp_version, (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2381 	    if_name(igi->igi_ifp)));
2382 
2383 	if (igmp_version == IGMP_VERSION_1 || igmp_version == IGMP_VERSION_2) {
2384 		/*
2385 		 * Compute the "Older Version Querier Present" timer as per
2386 		 * Section 8.12, in seconds.
2387 		 */
2388 		old_version_timer = igi->igi_rv * igi->igi_qi + igi->igi_qri;
2389 
2390 		if (igmp_version == IGMP_VERSION_1) {
2391 			igi->igi_v1_timer = old_version_timer;
2392 			igi->igi_v2_timer = 0;
2393 		} else if (igmp_version == IGMP_VERSION_2) {
2394 			igi->igi_v1_timer = 0;
2395 			igi->igi_v2_timer = old_version_timer;
2396 		}
2397 	}
2398 
2399 	if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) {
2400 		if (igi->igi_version != IGMP_VERSION_2) {
2401 			igmp_v3_cancel_link_timers(igi);
2402 			igi->igi_version = IGMP_VERSION_2;
2403 		}
2404 	} else if (igi->igi_v1_timer > 0) {
2405 		if (igi->igi_version != IGMP_VERSION_1) {
2406 			igmp_v3_cancel_link_timers(igi);
2407 			igi->igi_version = IGMP_VERSION_1;
2408 		}
2409 	}
2410 
2411 	IGI_LOCK_ASSERT_HELD(igi);
2412 
2413 	return MAX(igi->igi_v1_timer, igi->igi_v2_timer);
2414 }
2415 
2416 /*
2417  * Cancel pending IGMPv3 timers for the given link and all groups
2418  * joined on it; state-change, general-query, and group-query timers.
2419  *
2420  * Only ever called on a transition from v3 to Compatibility mode. Kill
2421  * the timers stone dead (this may be expensive for large N groups), they
2422  * will be restarted if Compatibility Mode deems that they must be due to
2423  * query processing.
2424  */
2425 static void
igmp_v3_cancel_link_timers(struct igmp_ifinfo * igi)2426 igmp_v3_cancel_link_timers(struct igmp_ifinfo *igi)
2427 {
2428 	struct ifnet            *ifp;
2429 	struct in_multi         *inm;
2430 	struct in_multistep     step;
2431 
2432 	IGI_LOCK_ASSERT_HELD(igi);
2433 
2434 	IGMP_PRINTF(("%s: cancel v3 timers on ifp 0x%llx(%s)\n", __func__,
2435 	    (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp), if_name(igi->igi_ifp)));
2436 
2437 	/*
2438 	 * Stop the v3 General Query Response on this link stone dead.
2439 	 * If timer is woken up due to interface_timers_running,
2440 	 * the flag will be cleared if there are no pending link timers.
2441 	 */
2442 	igi->igi_v3_timer = 0;
2443 
2444 	/*
2445 	 * Now clear the current-state and state-change report timers
2446 	 * for all memberships scoped to this link.
2447 	 */
2448 	ifp = igi->igi_ifp;
2449 	IGI_UNLOCK(igi);
2450 
2451 	in_multihead_lock_shared();
2452 	IN_FIRST_MULTI(step, inm);
2453 	while (inm != NULL) {
2454 		INM_LOCK(inm);
2455 		if (inm->inm_ifp != ifp && inm->inm_igi != igi) {
2456 			goto next;
2457 		}
2458 
2459 		switch (inm->inm_state) {
2460 		case IGMP_NOT_MEMBER:
2461 		case IGMP_SILENT_MEMBER:
2462 		case IGMP_IDLE_MEMBER:
2463 		case IGMP_LAZY_MEMBER:
2464 		case IGMP_SLEEPING_MEMBER:
2465 		case IGMP_AWAKENING_MEMBER:
2466 			/*
2467 			 * These states are either not relevant in v3 mode,
2468 			 * or are unreported. Do nothing.
2469 			 */
2470 			break;
2471 		case IGMP_LEAVING_MEMBER:
2472 			/*
2473 			 * If we are leaving the group and switching to
2474 			 * compatibility mode, we need to release the final
2475 			 * reference held for issuing the INCLUDE {}, and
2476 			 * transition to REPORTING to ensure the host leave
2477 			 * message is sent upstream to the old querier --
2478 			 * transition to NOT would lose the leave and race.
2479 			 * During igmp_final_leave(), we bumped up both the
2480 			 * request and reference counts.  Since we cannot
2481 			 * call in_multi_detach() here, defer this task to
2482 			 * the timer routine.
2483 			 */
2484 			VERIFY(inm->inm_nrelecnt != 0);
2485 			IGI_LOCK(igi);
2486 			SLIST_INSERT_HEAD(&igi->igi_relinmhead, inm, inm_nrele);
2487 			IGI_UNLOCK(igi);
2488 			OS_FALLTHROUGH;
2489 		case IGMP_G_QUERY_PENDING_MEMBER:
2490 		case IGMP_SG_QUERY_PENDING_MEMBER:
2491 			inm_clear_recorded(inm);
2492 			OS_FALLTHROUGH;
2493 		case IGMP_REPORTING_MEMBER:
2494 			inm->inm_state = IGMP_REPORTING_MEMBER;
2495 			break;
2496 		}
2497 		/*
2498 		 * Always clear state-change and group report timers.
2499 		 * Free any pending IGMPv3 state-change records.
2500 		 */
2501 		inm->inm_sctimer = 0;
2502 		inm->inm_timer = 0;
2503 		IF_DRAIN(&inm->inm_scq);
2504 next:
2505 		INM_UNLOCK(inm);
2506 		IN_NEXT_MULTI(step, inm);
2507 	}
2508 	in_multihead_lock_done();
2509 
2510 	IGI_LOCK(igi);
2511 }
2512 
2513 /*
2514  * Update the Older Version Querier Present timers for a link.
2515  * See Section 7.2.1 of RFC 3376.
2516  */
2517 static void
igmp_v1v2_process_querier_timers(struct igmp_ifinfo * igi)2518 igmp_v1v2_process_querier_timers(struct igmp_ifinfo *igi)
2519 {
2520 	IGI_LOCK_ASSERT_HELD(igi);
2521 
2522 	if (igi->igi_v1_timer == 0 && igi->igi_v2_timer == 0) {
2523 		/*
2524 		 * IGMPv1 and IGMPv2 Querier Present timers expired.
2525 		 *
2526 		 * Revert to IGMPv3.
2527 		 */
2528 		if (igi->igi_version != IGMP_VERSION_3) {
2529 			IGMP_PRINTF(("%s: transition from v%d -> v%d "
2530 			    "on 0x%llx(%s)\n", __func__,
2531 			    igi->igi_version, IGMP_VERSION_3,
2532 			    (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2533 			    if_name(igi->igi_ifp)));
2534 			igi->igi_version = IGMP_VERSION_3;
2535 			IF_DRAIN(&igi->igi_v2q);
2536 		}
2537 	} else if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) {
2538 		/*
2539 		 * IGMPv1 Querier Present timer expired,
2540 		 * IGMPv2 Querier Present timer running.
2541 		 * If IGMPv2 was disabled since last timeout,
2542 		 * revert to IGMPv3.
2543 		 * If IGMPv2 is enabled, revert to IGMPv2.
2544 		 */
2545 		if (!igmp_v2enable) {
2546 			IGMP_PRINTF(("%s: transition from v%d -> v%d "
2547 			    "on 0x%llx(%s%d)\n", __func__,
2548 			    igi->igi_version, IGMP_VERSION_3,
2549 			    (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2550 			    igi->igi_ifp->if_name, igi->igi_ifp->if_unit));
2551 			igi->igi_v2_timer = 0;
2552 			igi->igi_version = IGMP_VERSION_3;
2553 			IF_DRAIN(&igi->igi_v2q);
2554 		} else {
2555 			--igi->igi_v2_timer;
2556 			if (igi->igi_version != IGMP_VERSION_2) {
2557 				IGMP_PRINTF(("%s: transition from v%d -> v%d "
2558 				    "on 0x%llx(%s)\n", __func__,
2559 				    igi->igi_version, IGMP_VERSION_2,
2560 				    (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2561 				    if_name(igi->igi_ifp)));
2562 				IF_DRAIN(&igi->igi_gq);
2563 				igmp_v3_cancel_link_timers(igi);
2564 				igi->igi_version = IGMP_VERSION_2;
2565 			}
2566 		}
2567 	} else if (igi->igi_v1_timer > 0) {
2568 		/*
2569 		 * IGMPv1 Querier Present timer running.
2570 		 * Stop IGMPv2 timer if running.
2571 		 *
2572 		 * If IGMPv1 was disabled since last timeout,
2573 		 * revert to IGMPv3.
2574 		 * If IGMPv1 is enabled, reset IGMPv2 timer if running.
2575 		 */
2576 		if (!igmp_v1enable) {
2577 			IGMP_PRINTF(("%s: transition from v%d -> v%d "
2578 			    "on 0x%llx(%s%d)\n", __func__,
2579 			    igi->igi_version, IGMP_VERSION_3,
2580 			    (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2581 			    igi->igi_ifp->if_name, igi->igi_ifp->if_unit));
2582 			igi->igi_v1_timer = 0;
2583 			igi->igi_version = IGMP_VERSION_3;
2584 			IF_DRAIN(&igi->igi_v2q);
2585 		} else {
2586 			--igi->igi_v1_timer;
2587 		}
2588 		if (igi->igi_v2_timer > 0) {
2589 			IGMP_PRINTF(("%s: cancel v2 timer on 0x%llx(%s%d)\n",
2590 			    __func__,
2591 			    (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2592 			    igi->igi_ifp->if_name, igi->igi_ifp->if_unit));
2593 			igi->igi_v2_timer = 0;
2594 		}
2595 	}
2596 }
2597 
2598 /*
2599  * Dispatch an IGMPv1/v2 host report or leave message.
2600  * These are always small enough to fit inside a single mbuf.
2601  */
2602 static int
igmp_v1v2_queue_report(struct in_multi * inm,const int type)2603 igmp_v1v2_queue_report(struct in_multi *inm, const int type)
2604 {
2605 	struct ifnet            *ifp;
2606 	struct igmp             *igmp;
2607 	struct ip               *ip;
2608 	struct mbuf             *m;
2609 	int                     error = 0;
2610 
2611 	INM_LOCK_ASSERT_HELD(inm);
2612 	IGI_LOCK_ASSERT_HELD(inm->inm_igi);
2613 
2614 	ifp = inm->inm_ifp;
2615 
2616 	MGETHDR(m, M_DONTWAIT, MT_DATA);
2617 	if (m == NULL) {
2618 		return ENOMEM;
2619 	}
2620 	MH_ALIGN(m, sizeof(struct ip) + sizeof(struct igmp));
2621 
2622 	m->m_pkthdr.len = sizeof(struct ip) + sizeof(struct igmp);
2623 
2624 	m->m_data += sizeof(struct ip);
2625 	m->m_len = sizeof(struct igmp);
2626 
2627 	igmp = mtod(m, struct igmp *);
2628 	igmp->igmp_type = (u_char)type;
2629 	igmp->igmp_code = 0;
2630 	igmp->igmp_group = inm->inm_addr;
2631 	igmp->igmp_cksum = 0;
2632 	igmp->igmp_cksum = in_cksum(m, sizeof(struct igmp));
2633 
2634 	m->m_data -= sizeof(struct ip);
2635 	m->m_len += sizeof(struct ip);
2636 
2637 	ip = mtod(m, struct ip *);
2638 	ip->ip_tos = 0;
2639 	ip->ip_len = sizeof(struct ip) + sizeof(struct igmp);
2640 	ip->ip_off = 0;
2641 	ip->ip_p = IPPROTO_IGMP;
2642 	ip->ip_src.s_addr = INADDR_ANY;
2643 
2644 	if (type == IGMP_HOST_LEAVE_MESSAGE) {
2645 		ip->ip_dst.s_addr = htonl(INADDR_ALLRTRS_GROUP);
2646 	} else {
2647 		ip->ip_dst = inm->inm_addr;
2648 	}
2649 
2650 	igmp_save_context(m, ifp);
2651 
2652 	m->m_flags |= M_IGMPV2;
2653 	if (inm->inm_igi->igi_flags & IGIF_LOOPBACK) {
2654 		m->m_flags |= M_IGMP_LOOP;
2655 	}
2656 
2657 	/*
2658 	 * Due to the fact that at this point we are possibly holding
2659 	 * in_multihead_lock in shared or exclusive mode, we can't call
2660 	 * igmp_sendpkt() here since that will eventually call ip_output(),
2661 	 * which will try to lock in_multihead_lock and cause a deadlock.
2662 	 * Instead we defer the work to the igmp_timeout() thread, thus
2663 	 * avoiding unlocking in_multihead_lock here.
2664 	 */
2665 	if (IF_QFULL(&inm->inm_igi->igi_v2q)) {
2666 		IGMP_PRINTF(("%s: v1/v2 outbound queue full\n", __func__));
2667 		error = ENOMEM;
2668 		m_freem(m);
2669 	} else {
2670 		IF_ENQUEUE(&inm->inm_igi->igi_v2q, m);
2671 		VERIFY(error == 0);
2672 	}
2673 	return error;
2674 }
2675 
2676 /*
2677  * Process a state change from the upper layer for the given IPv4 group.
2678  *
2679  * Each socket holds a reference on the in_multi in its own ip_moptions.
2680  * The socket layer will have made the necessary updates to the group
2681  * state, it is now up to IGMP to issue a state change report if there
2682  * has been any change between T0 (when the last state-change was issued)
2683  * and T1 (now).
2684  *
2685  * We use the IGMPv3 state machine at group level. The IGMP module
2686  * however makes the decision as to which IGMP protocol version to speak.
2687  * A state change *from* INCLUDE {} always means an initial join.
2688  * A state change *to* INCLUDE {} always means a final leave.
2689  *
2690  * FUTURE: If IGIF_V3LITE is enabled for this interface, then we can
2691  * save ourselves a bunch of work; any exclusive mode groups need not
2692  * compute source filter lists.
2693  */
2694 int
igmp_change_state(struct in_multi * inm,struct igmp_tparams * itp)2695 igmp_change_state(struct in_multi *inm, struct igmp_tparams *itp)
2696 {
2697 	struct igmp_ifinfo *igi;
2698 	struct ifnet *ifp;
2699 	int error = 0;
2700 
2701 	VERIFY(itp != NULL);
2702 	bzero(itp, sizeof(*itp));
2703 
2704 	INM_LOCK_ASSERT_HELD(inm);
2705 	VERIFY(inm->inm_igi != NULL);
2706 	IGI_LOCK_ASSERT_NOTHELD(inm->inm_igi);
2707 
2708 	/*
2709 	 * Try to detect if the upper layer just asked us to change state
2710 	 * for an interface which has now gone away.
2711 	 */
2712 	VERIFY(inm->inm_ifma != NULL);
2713 	ifp = inm->inm_ifma->ifma_ifp;
2714 	/*
2715 	 * Sanity check that netinet's notion of ifp is the same as net's.
2716 	 */
2717 	VERIFY(inm->inm_ifp == ifp);
2718 
2719 	igi = IGMP_IFINFO(ifp);
2720 	VERIFY(igi != NULL);
2721 
2722 	/*
2723 	 * If we detect a state transition to or from MCAST_UNDEFINED
2724 	 * for this group, then we are starting or finishing an IGMP
2725 	 * life cycle for this group.
2726 	 */
2727 	if (inm->inm_st[1].iss_fmode != inm->inm_st[0].iss_fmode) {
2728 		IGMP_PRINTF(("%s: inm transition %d -> %d\n", __func__,
2729 		    inm->inm_st[0].iss_fmode, inm->inm_st[1].iss_fmode));
2730 		if (inm->inm_st[0].iss_fmode == MCAST_UNDEFINED) {
2731 			IGMP_PRINTF(("%s: initial join\n", __func__));
2732 			error = igmp_initial_join(inm, igi, itp);
2733 			goto out;
2734 		} else if (inm->inm_st[1].iss_fmode == MCAST_UNDEFINED) {
2735 			IGMP_PRINTF(("%s: final leave\n", __func__));
2736 			igmp_final_leave(inm, igi, itp);
2737 			goto out;
2738 		}
2739 	} else {
2740 		IGMP_PRINTF(("%s: filter set change\n", __func__));
2741 	}
2742 
2743 	error = igmp_handle_state_change(inm, igi, itp);
2744 out:
2745 	return error;
2746 }
2747 
2748 /*
2749  * Perform the initial join for an IGMP group.
2750  *
2751  * When joining a group:
2752  *  If the group should have its IGMP traffic suppressed, do nothing.
2753  *  IGMPv1 starts sending IGMPv1 host membership reports.
2754  *  IGMPv2 starts sending IGMPv2 host membership reports.
2755  *  IGMPv3 will schedule an IGMPv3 state-change report containing the
2756  *  initial state of the membership.
2757  */
2758 static int
igmp_initial_join(struct in_multi * inm,struct igmp_ifinfo * igi,struct igmp_tparams * itp)2759 igmp_initial_join(struct in_multi *inm, struct igmp_ifinfo *igi,
2760     struct igmp_tparams *itp)
2761 {
2762 	struct ifnet            *ifp;
2763 	struct ifqueue          *ifq;
2764 	int                      error, retval, syncstates;
2765 
2766 	INM_LOCK_ASSERT_HELD(inm);
2767 	IGI_LOCK_ASSERT_NOTHELD(igi);
2768 	VERIFY(itp != NULL);
2769 
2770 	IGMP_INET_PRINTF(inm->inm_addr,
2771 	    ("%s: initial join %s on ifp 0x%llx(%s)\n", __func__,
2772 	    _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(inm->inm_ifp),
2773 	    if_name(inm->inm_ifp)));
2774 
2775 	error = 0;
2776 	syncstates = 1;
2777 
2778 	ifp = inm->inm_ifp;
2779 
2780 	IGI_LOCK(igi);
2781 	VERIFY(igi->igi_ifp == ifp);
2782 
2783 	/*
2784 	 * Groups joined on loopback or marked as 'not reported',
2785 	 * e.g. 224.0.0.1, enter the IGMP_SILENT_MEMBER state and
2786 	 * are never reported in any IGMP protocol exchanges.
2787 	 * All other groups enter the appropriate IGMP state machine
2788 	 * for the version in use on this link.
2789 	 * A link marked as IGIF_SILENT causes IGMP to be completely
2790 	 * disabled for the link.
2791 	 */
2792 	if ((ifp->if_flags & IFF_LOOPBACK) ||
2793 	    (igi->igi_flags & IGIF_SILENT) ||
2794 	    !igmp_isgroupreported(inm->inm_addr)) {
2795 		IGMP_PRINTF(("%s: not kicking state machine for silent group\n",
2796 		    __func__));
2797 		inm->inm_state = IGMP_SILENT_MEMBER;
2798 		inm->inm_timer = 0;
2799 	} else {
2800 		/*
2801 		 * Deal with overlapping in_multi lifecycle.
2802 		 * If this group was LEAVING, then make sure
2803 		 * we drop the reference we picked up to keep the
2804 		 * group around for the final INCLUDE {} enqueue.
2805 		 * Since we cannot call in_multi_detach() here,
2806 		 * defer this task to the timer routine.
2807 		 */
2808 		if (igi->igi_version == IGMP_VERSION_3 &&
2809 		    inm->inm_state == IGMP_LEAVING_MEMBER) {
2810 			VERIFY(inm->inm_nrelecnt != 0);
2811 			SLIST_INSERT_HEAD(&igi->igi_relinmhead, inm, inm_nrele);
2812 		}
2813 
2814 		inm->inm_state = IGMP_REPORTING_MEMBER;
2815 
2816 		switch (igi->igi_version) {
2817 		case IGMP_VERSION_1:
2818 		case IGMP_VERSION_2:
2819 			inm->inm_state = IGMP_IDLE_MEMBER;
2820 			error = igmp_v1v2_queue_report(inm,
2821 			    (igi->igi_version == IGMP_VERSION_2) ?
2822 			    IGMP_v2_HOST_MEMBERSHIP_REPORT :
2823 			    IGMP_v1_HOST_MEMBERSHIP_REPORT);
2824 
2825 			INM_LOCK_ASSERT_HELD(inm);
2826 			IGI_LOCK_ASSERT_HELD(igi);
2827 
2828 			if (error == 0) {
2829 				inm->inm_timer =
2830 				    IGMP_RANDOM_DELAY(IGMP_V1V2_MAX_RI);
2831 				itp->cst = 1;
2832 			}
2833 			break;
2834 
2835 		case IGMP_VERSION_3:
2836 			/*
2837 			 * Defer update of T0 to T1, until the first copy
2838 			 * of the state change has been transmitted.
2839 			 */
2840 			syncstates = 0;
2841 
2842 			/*
2843 			 * Immediately enqueue a State-Change Report for
2844 			 * this interface, freeing any previous reports.
2845 			 * Don't kick the timers if there is nothing to do,
2846 			 * or if an error occurred.
2847 			 */
2848 			ifq = &inm->inm_scq;
2849 			IF_DRAIN(ifq);
2850 			retval = igmp_v3_enqueue_group_record(ifq, inm, 1,
2851 			    0, 0);
2852 			itp->cst = (ifq->ifq_len > 0);
2853 			IGMP_PRINTF(("%s: enqueue record = %d\n",
2854 			    __func__, retval));
2855 			if (retval <= 0) {
2856 				error = retval * -1;
2857 				break;
2858 			}
2859 
2860 			/*
2861 			 * Schedule transmission of pending state-change
2862 			 * report up to RV times for this link. The timer
2863 			 * will fire at the next igmp_timeout (1 second),
2864 			 * giving us an opportunity to merge the reports.
2865 			 */
2866 			if (igi->igi_flags & IGIF_LOOPBACK) {
2867 				inm->inm_scrv = 1;
2868 			} else {
2869 				VERIFY(igi->igi_rv > 1);
2870 				inm->inm_scrv = (uint16_t)igi->igi_rv;
2871 			}
2872 			inm->inm_sctimer = 1;
2873 			itp->sct = 1;
2874 
2875 			error = 0;
2876 			break;
2877 		}
2878 	}
2879 	IGI_UNLOCK(igi);
2880 
2881 	/*
2882 	 * Only update the T0 state if state change is atomic,
2883 	 * i.e. we don't need to wait for a timer to fire before we
2884 	 * can consider the state change to have been communicated.
2885 	 */
2886 	if (syncstates) {
2887 		inm_commit(inm);
2888 		IGMP_INET_PRINTF(inm->inm_addr,
2889 		    ("%s: T1 -> T0 for %s/%s\n", __func__,
2890 		    _igmp_inet_buf, if_name(inm->inm_ifp)));
2891 	}
2892 
2893 	return error;
2894 }
2895 
2896 /*
2897  * Issue an intermediate state change during the IGMP life-cycle.
2898  */
2899 static int
igmp_handle_state_change(struct in_multi * inm,struct igmp_ifinfo * igi,struct igmp_tparams * itp)2900 igmp_handle_state_change(struct in_multi *inm, struct igmp_ifinfo *igi,
2901     struct igmp_tparams *itp)
2902 {
2903 	struct ifnet            *ifp;
2904 	int                      retval = 0;
2905 
2906 	INM_LOCK_ASSERT_HELD(inm);
2907 	IGI_LOCK_ASSERT_NOTHELD(igi);
2908 	VERIFY(itp != NULL);
2909 
2910 	IGMP_INET_PRINTF(inm->inm_addr,
2911 	    ("%s: state change for %s on ifp 0x%llx(%s)\n", __func__,
2912 	    _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(inm->inm_ifp),
2913 	    if_name(inm->inm_ifp)));
2914 
2915 	ifp = inm->inm_ifp;
2916 
2917 	IGI_LOCK(igi);
2918 	VERIFY(igi->igi_ifp == ifp);
2919 
2920 	if ((ifp->if_flags & IFF_LOOPBACK) ||
2921 	    (igi->igi_flags & IGIF_SILENT) ||
2922 	    !igmp_isgroupreported(inm->inm_addr) ||
2923 	    (igi->igi_version != IGMP_VERSION_3)) {
2924 		IGI_UNLOCK(igi);
2925 		if (!igmp_isgroupreported(inm->inm_addr)) {
2926 			IGMP_PRINTF(("%s: not kicking state "
2927 			    "machine for silent group\n", __func__));
2928 		}
2929 		IGMP_PRINTF(("%s: nothing to do\n", __func__));
2930 		inm_commit(inm);
2931 		IGMP_INET_PRINTF(inm->inm_addr,
2932 		    ("%s: T1 -> T0 for %s/%s\n", __func__,
2933 		    _igmp_inet_buf, inm->inm_ifp->if_name));
2934 		goto done;
2935 	}
2936 
2937 	IF_DRAIN(&inm->inm_scq);
2938 
2939 	retval = igmp_v3_enqueue_group_record(&inm->inm_scq, inm, 1, 0, 0);
2940 	itp->cst = (inm->inm_scq.ifq_len > 0);
2941 	IGMP_PRINTF(("%s: enqueue record = %d\n", __func__, retval));
2942 	if (retval <= 0) {
2943 		IGI_UNLOCK(igi);
2944 		retval *= -1;
2945 		goto done;
2946 	}
2947 	/*
2948 	 * If record(s) were enqueued, start the state-change
2949 	 * report timer for this group.
2950 	 */
2951 	inm->inm_scrv = ((igi->igi_flags & IGIF_LOOPBACK) ? 1 : (uint16_t)igi->igi_rv);
2952 	inm->inm_sctimer = 1;
2953 	itp->sct = 1;
2954 	IGI_UNLOCK(igi);
2955 done:
2956 	return retval;
2957 }
2958 
2959 /*
2960  * Perform the final leave for an IGMP group.
2961  *
2962  * When leaving a group:
2963  *  IGMPv1 does nothing.
2964  *  IGMPv2 sends a host leave message, if and only if we are the reporter.
2965  *  IGMPv3 enqueues a state-change report containing a transition
2966  *  to INCLUDE {} for immediate transmission.
2967  */
2968 static void
igmp_final_leave(struct in_multi * inm,struct igmp_ifinfo * igi,struct igmp_tparams * itp)2969 igmp_final_leave(struct in_multi *inm, struct igmp_ifinfo *igi,
2970     struct igmp_tparams *itp)
2971 {
2972 	int syncstates = 1;
2973 
2974 	INM_LOCK_ASSERT_HELD(inm);
2975 	IGI_LOCK_ASSERT_NOTHELD(igi);
2976 	VERIFY(itp != NULL);
2977 
2978 	IGMP_INET_PRINTF(inm->inm_addr,
2979 	    ("%s: final leave %s on ifp 0x%llx(%s)\n", __func__,
2980 	    _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(inm->inm_ifp),
2981 	    if_name(inm->inm_ifp)));
2982 
2983 	switch (inm->inm_state) {
2984 	case IGMP_NOT_MEMBER:
2985 	case IGMP_SILENT_MEMBER:
2986 	case IGMP_LEAVING_MEMBER:
2987 		/* Already leaving or left; do nothing. */
2988 		IGMP_PRINTF(("%s: not kicking state machine for silent group\n",
2989 		    __func__));
2990 		break;
2991 	case IGMP_REPORTING_MEMBER:
2992 	case IGMP_IDLE_MEMBER:
2993 	case IGMP_G_QUERY_PENDING_MEMBER:
2994 	case IGMP_SG_QUERY_PENDING_MEMBER:
2995 		IGI_LOCK(igi);
2996 		if (igi->igi_version == IGMP_VERSION_2) {
2997 			if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER ||
2998 			    inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) {
2999 				panic("%s: IGMPv3 state reached, not IGMPv3 "
3000 				    "mode (inm %s, igi %s)", __func__,
3001 				    if_name(inm->inm_ifp),
3002 				    if_name(igi->igi_ifp));
3003 				/* NOTREACHED */
3004 			}
3005 			/* scheduler timer if enqueue is successful */
3006 			itp->cst = (igmp_v1v2_queue_report(inm,
3007 			    IGMP_HOST_LEAVE_MESSAGE) == 0);
3008 
3009 			INM_LOCK_ASSERT_HELD(inm);
3010 			IGI_LOCK_ASSERT_HELD(igi);
3011 
3012 			inm->inm_state = IGMP_NOT_MEMBER;
3013 		} else if (igi->igi_version == IGMP_VERSION_3) {
3014 			/*
3015 			 * Stop group timer and all pending reports.
3016 			 * Immediately enqueue a state-change report
3017 			 * TO_IN {} to be sent on the next timeout,
3018 			 * giving us an opportunity to merge reports.
3019 			 */
3020 			IF_DRAIN(&inm->inm_scq);
3021 			inm->inm_timer = 0;
3022 			if (igi->igi_flags & IGIF_LOOPBACK) {
3023 				inm->inm_scrv = 1;
3024 			} else {
3025 				inm->inm_scrv = (uint16_t)igi->igi_rv;
3026 			}
3027 			IGMP_INET_PRINTF(inm->inm_addr,
3028 			    ("%s: Leaving %s/%s with %d "
3029 			    "pending retransmissions.\n", __func__,
3030 			    _igmp_inet_buf, if_name(inm->inm_ifp),
3031 			    inm->inm_scrv));
3032 			if (inm->inm_scrv == 0) {
3033 				inm->inm_state = IGMP_NOT_MEMBER;
3034 				inm->inm_sctimer = 0;
3035 			} else {
3036 				int retval;
3037 				/*
3038 				 * Stick around in the in_multihead list;
3039 				 * the final detach will be issued by
3040 				 * igmp_v3_process_group_timers() when
3041 				 * the retransmit timer expires.
3042 				 */
3043 				INM_ADDREF_LOCKED(inm);
3044 				VERIFY(inm->inm_debug & IFD_ATTACHED);
3045 				inm->inm_reqcnt++;
3046 				VERIFY(inm->inm_reqcnt >= 1);
3047 				inm->inm_nrelecnt++;
3048 				VERIFY(inm->inm_nrelecnt != 0);
3049 
3050 				retval = igmp_v3_enqueue_group_record(
3051 					&inm->inm_scq, inm, 1, 0, 0);
3052 				itp->cst = (inm->inm_scq.ifq_len > 0);
3053 				KASSERT(retval != 0,
3054 				    ("%s: enqueue record = %d\n", __func__,
3055 				    retval));
3056 
3057 				inm->inm_state = IGMP_LEAVING_MEMBER;
3058 				inm->inm_sctimer = 1;
3059 				itp->sct = 1;
3060 				syncstates = 0;
3061 			}
3062 		}
3063 		IGI_UNLOCK(igi);
3064 		break;
3065 	case IGMP_LAZY_MEMBER:
3066 	case IGMP_SLEEPING_MEMBER:
3067 	case IGMP_AWAKENING_MEMBER:
3068 		/* Our reports are suppressed; do nothing. */
3069 		break;
3070 	}
3071 
3072 	if (syncstates) {
3073 		inm_commit(inm);
3074 		IGMP_INET_PRINTF(inm->inm_addr,
3075 		    ("%s: T1 -> T0 for %s/%s\n", __func__,
3076 		    _igmp_inet_buf, if_name(inm->inm_ifp)));
3077 		inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
3078 		IGMP_INET_PRINTF(inm->inm_addr,
3079 		    ("%s: T1 now MCAST_UNDEFINED for %s/%s\n",
3080 		    __func__, _igmp_inet_buf, if_name(inm->inm_ifp)));
3081 	}
3082 }
3083 
3084 /*
3085  * Enqueue an IGMPv3 group record to the given output queue.
3086  *
3087  * XXX This function could do with having the allocation code
3088  * split out, and the multiple-tree-walks coalesced into a single
3089  * routine as has been done in igmp_v3_enqueue_filter_change().
3090  *
3091  * If is_state_change is zero, a current-state record is appended.
3092  * If is_state_change is non-zero, a state-change report is appended.
3093  *
3094  * If is_group_query is non-zero, an mbuf packet chain is allocated.
3095  * If is_group_query is zero, and if there is a packet with free space
3096  * at the tail of the queue, it will be appended to providing there
3097  * is enough free space.
3098  * Otherwise a new mbuf packet chain is allocated.
3099  *
3100  * If is_source_query is non-zero, each source is checked to see if
3101  * it was recorded for a Group-Source query, and will be omitted if
3102  * it is not both in-mode and recorded.
3103  *
3104  * The function will attempt to allocate leading space in the packet
3105  * for the IP/IGMP header to be prepended without fragmenting the chain.
3106  *
3107  * If successful the size of all data appended to the queue is returned,
3108  * otherwise an error code less than zero is returned, or zero if
3109  * no record(s) were appended.
3110  */
3111 static int
igmp_v3_enqueue_group_record(struct ifqueue * ifq,struct in_multi * inm,const int is_state_change,const int is_group_query,const int is_source_query)3112 igmp_v3_enqueue_group_record(struct ifqueue *ifq, struct in_multi *inm,
3113     const int is_state_change, const int is_group_query,
3114     const int is_source_query)
3115 {
3116 	struct igmp_grouprec     ig;
3117 	struct igmp_grouprec    *pig;
3118 	struct ifnet            *ifp;
3119 	struct ip_msource       *ims, *nims;
3120 	struct mbuf             *m0, *m, *md;
3121 	int                      error, is_filter_list_change;
3122 	int                      minrec0len, m0srcs, nbytes, off;
3123 	uint16_t                 msrcs;
3124 	int                      record_has_sources;
3125 	int                      now;
3126 	int                      type;
3127 	in_addr_t                naddr;
3128 	uint16_t                 mode;
3129 	u_int16_t                ig_numsrc;
3130 
3131 	INM_LOCK_ASSERT_HELD(inm);
3132 	IGI_LOCK_ASSERT_HELD(inm->inm_igi);
3133 
3134 	error = 0;
3135 	ifp = inm->inm_ifp;
3136 	is_filter_list_change = 0;
3137 	m = NULL;
3138 	m0 = NULL;
3139 	m0srcs = 0;
3140 	msrcs = 0;
3141 	nbytes = 0;
3142 	nims = NULL;
3143 	record_has_sources = 1;
3144 	pig = NULL;
3145 	type = IGMP_DO_NOTHING;
3146 	mode = inm->inm_st[1].iss_fmode;
3147 
3148 	/*
3149 	 * If we did not transition out of ASM mode during t0->t1,
3150 	 * and there are no source nodes to process, we can skip
3151 	 * the generation of source records.
3152 	 */
3153 	if (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0 &&
3154 	    inm->inm_nsrc == 0) {
3155 		record_has_sources = 0;
3156 	}
3157 
3158 	if (is_state_change) {
3159 		/*
3160 		 * Queue a state change record.
3161 		 * If the mode did not change, and there are non-ASM
3162 		 * listeners or source filters present,
3163 		 * we potentially need to issue two records for the group.
3164 		 * If we are transitioning to MCAST_UNDEFINED, we need
3165 		 * not send any sources.
3166 		 * If there are ASM listeners, and there was no filter
3167 		 * mode transition of any kind, do nothing.
3168 		 */
3169 		if (mode != inm->inm_st[0].iss_fmode) {
3170 			if (mode == MCAST_EXCLUDE) {
3171 				IGMP_PRINTF(("%s: change to EXCLUDE\n",
3172 				    __func__));
3173 				type = IGMP_CHANGE_TO_EXCLUDE_MODE;
3174 			} else {
3175 				IGMP_PRINTF(("%s: change to INCLUDE\n",
3176 				    __func__));
3177 				type = IGMP_CHANGE_TO_INCLUDE_MODE;
3178 				if (mode == MCAST_UNDEFINED) {
3179 					record_has_sources = 0;
3180 				}
3181 			}
3182 		} else {
3183 			if (record_has_sources) {
3184 				is_filter_list_change = 1;
3185 			} else {
3186 				type = IGMP_DO_NOTHING;
3187 			}
3188 		}
3189 	} else {
3190 		/*
3191 		 * Queue a current state record.
3192 		 */
3193 		if (mode == MCAST_EXCLUDE) {
3194 			type = IGMP_MODE_IS_EXCLUDE;
3195 		} else if (mode == MCAST_INCLUDE) {
3196 			type = IGMP_MODE_IS_INCLUDE;
3197 			VERIFY(inm->inm_st[1].iss_asm == 0);
3198 		}
3199 	}
3200 
3201 	/*
3202 	 * Generate the filter list changes using a separate function.
3203 	 */
3204 	if (is_filter_list_change) {
3205 		return igmp_v3_enqueue_filter_change(ifq, inm);
3206 	}
3207 
3208 	if (type == IGMP_DO_NOTHING) {
3209 		IGMP_INET_PRINTF(inm->inm_addr,
3210 		    ("%s: nothing to do for %s/%s\n",
3211 		    __func__, _igmp_inet_buf,
3212 		    if_name(inm->inm_ifp)));
3213 		return 0;
3214 	}
3215 
3216 	/*
3217 	 * If any sources are present, we must be able to fit at least
3218 	 * one in the trailing space of the tail packet's mbuf,
3219 	 * ideally more.
3220 	 */
3221 	minrec0len = sizeof(struct igmp_grouprec);
3222 	if (record_has_sources) {
3223 		minrec0len += sizeof(in_addr_t);
3224 	}
3225 
3226 	IGMP_INET_PRINTF(inm->inm_addr,
3227 	    ("%s: queueing %s for %s/%s\n", __func__,
3228 	    igmp_rec_type_to_str(type), _igmp_inet_buf,
3229 	    if_name(inm->inm_ifp)));
3230 
3231 	/*
3232 	 * Check if we have a packet in the tail of the queue for this
3233 	 * group into which the first group record for this group will fit.
3234 	 * Otherwise allocate a new packet.
3235 	 * Always allocate leading space for IP+RA_OPT+IGMP+REPORT.
3236 	 * Note: Group records for G/GSR query responses MUST be sent
3237 	 * in their own packet.
3238 	 */
3239 	m0 = ifq->ifq_tail;
3240 	if (!is_group_query &&
3241 	    m0 != NULL &&
3242 	    (m0->m_pkthdr.vt_nrecs + 1 <= IGMP_V3_REPORT_MAXRECS) &&
3243 	    (m0->m_pkthdr.len + minrec0len) <
3244 	    (ifp->if_mtu - IGMP_LEADINGSPACE)) {
3245 		m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
3246 		    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
3247 		m = m0;
3248 		IGMP_PRINTF(("%s: use existing packet\n", __func__));
3249 	} else {
3250 		if (IF_QFULL(ifq)) {
3251 			IGMP_PRINTF(("%s: outbound queue full\n", __func__));
3252 			return -ENOMEM;
3253 		}
3254 		m = NULL;
3255 		m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
3256 		    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
3257 		if (!is_state_change && !is_group_query) {
3258 			m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3259 			if (m) {
3260 				m->m_data += IGMP_LEADINGSPACE;
3261 			}
3262 		}
3263 		if (m == NULL) {
3264 			m = m_gethdr(M_DONTWAIT, MT_DATA);
3265 			if (m) {
3266 				MH_ALIGN(m, IGMP_LEADINGSPACE);
3267 			}
3268 		}
3269 		if (m == NULL) {
3270 			return -ENOMEM;
3271 		}
3272 
3273 		igmp_save_context(m, ifp);
3274 
3275 		IGMP_PRINTF(("%s: allocated first packet\n", __func__));
3276 	}
3277 
3278 	/*
3279 	 * Append group record.
3280 	 * If we have sources, we don't know how many yet.
3281 	 */
3282 	ig.ig_type = (u_char)type;
3283 	ig.ig_datalen = 0;
3284 	ig.ig_numsrc = 0;
3285 	ig.ig_group = inm->inm_addr;
3286 	if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) {
3287 		if (m != m0) {
3288 			m_freem(m);
3289 		}
3290 		IGMP_PRINTF(("%s: m_append() failed.\n", __func__));
3291 		return -ENOMEM;
3292 	}
3293 	nbytes += sizeof(struct igmp_grouprec);
3294 
3295 	/*
3296 	 * Append as many sources as will fit in the first packet.
3297 	 * If we are appending to a new packet, the chain allocation
3298 	 * may potentially use clusters; use m_getptr() in this case.
3299 	 * If we are appending to an existing packet, we need to obtain
3300 	 * a pointer to the group record after m_append(), in case a new
3301 	 * mbuf was allocated.
3302 	 * Only append sources which are in-mode at t1. If we are
3303 	 * transitioning to MCAST_UNDEFINED state on the group, do not
3304 	 * include source entries.
3305 	 * Only report recorded sources in our filter set when responding
3306 	 * to a group-source query.
3307 	 */
3308 	if (record_has_sources) {
3309 		if (m == m0) {
3310 			md = m_last(m);
3311 			pig = (struct igmp_grouprec *)(void *)
3312 			    (mtod(md, uint8_t *) + md->m_len - nbytes);
3313 		} else {
3314 			md = m_getptr(m, 0, &off);
3315 			pig = (struct igmp_grouprec *)(void *)
3316 			    (mtod(md, uint8_t *) + off);
3317 		}
3318 		msrcs = 0;
3319 		RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, nims) {
3320 #ifdef IGMP_DEBUG
3321 			char buf[MAX_IPv4_STR_LEN];
3322 
3323 			inet_ntop_haddr(ims->ims_haddr, buf, sizeof(buf));
3324 			IGMP_PRINTF(("%s: visit node %s\n", __func__, buf));
3325 #endif
3326 			now = ims_get_mode(inm, ims, 1);
3327 			IGMP_PRINTF(("%s: node is %d\n", __func__, now));
3328 			if ((now != mode) ||
3329 			    (now == mode && mode == MCAST_UNDEFINED)) {
3330 				IGMP_PRINTF(("%s: skip node\n", __func__));
3331 				continue;
3332 			}
3333 			if (is_source_query && ims->ims_stp == 0) {
3334 				IGMP_PRINTF(("%s: skip unrecorded node\n",
3335 				    __func__));
3336 				continue;
3337 			}
3338 			IGMP_PRINTF(("%s: append node\n", __func__));
3339 			naddr = htonl(ims->ims_haddr);
3340 			if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) {
3341 				if (m != m0) {
3342 					m_freem(m);
3343 				}
3344 				IGMP_PRINTF(("%s: m_append() failed.\n",
3345 				    __func__));
3346 				return -ENOMEM;
3347 			}
3348 			nbytes += sizeof(in_addr_t);
3349 			++msrcs;
3350 			if (msrcs == m0srcs) {
3351 				break;
3352 			}
3353 		}
3354 		IGMP_PRINTF(("%s: msrcs is %d this packet\n", __func__,
3355 		    msrcs));
3356 		ig_numsrc = htons(msrcs);
3357 		bcopy(&ig_numsrc, &pig->ig_numsrc, sizeof(ig_numsrc));
3358 		nbytes += (msrcs * sizeof(in_addr_t));
3359 	}
3360 
3361 	if (is_source_query && msrcs == 0) {
3362 		IGMP_PRINTF(("%s: no recorded sources to report\n", __func__));
3363 		if (m != m0) {
3364 			m_freem(m);
3365 		}
3366 		return 0;
3367 	}
3368 
3369 	/*
3370 	 * We are good to go with first packet.
3371 	 */
3372 	if (m != m0) {
3373 		IGMP_PRINTF(("%s: enqueueing first packet\n", __func__));
3374 		m->m_pkthdr.vt_nrecs = 1;
3375 		IF_ENQUEUE(ifq, m);
3376 	} else {
3377 		m->m_pkthdr.vt_nrecs++;
3378 	}
3379 	/*
3380 	 * No further work needed if no source list in packet(s).
3381 	 */
3382 	if (!record_has_sources) {
3383 		return nbytes;
3384 	}
3385 
3386 	/*
3387 	 * Whilst sources remain to be announced, we need to allocate
3388 	 * a new packet and fill out as many sources as will fit.
3389 	 * Always try for a cluster first.
3390 	 */
3391 	while (nims != NULL) {
3392 		if (IF_QFULL(ifq)) {
3393 			IGMP_PRINTF(("%s: outbound queue full\n", __func__));
3394 			return -ENOMEM;
3395 		}
3396 		m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3397 		if (m) {
3398 			m->m_data += IGMP_LEADINGSPACE;
3399 		}
3400 		if (m == NULL) {
3401 			m = m_gethdr(M_DONTWAIT, MT_DATA);
3402 			if (m) {
3403 				MH_ALIGN(m, IGMP_LEADINGSPACE);
3404 			}
3405 		}
3406 		if (m == NULL) {
3407 			return -ENOMEM;
3408 		}
3409 		igmp_save_context(m, ifp);
3410 		md = m_getptr(m, 0, &off);
3411 		pig = (struct igmp_grouprec *)(void *)
3412 		    (mtod(md, uint8_t *) + off);
3413 		IGMP_PRINTF(("%s: allocated next packet\n", __func__));
3414 
3415 		if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) {
3416 			if (m != m0) {
3417 				m_freem(m);
3418 			}
3419 			IGMP_PRINTF(("%s: m_append() failed.\n", __func__));
3420 			return -ENOMEM;
3421 		}
3422 		m->m_pkthdr.vt_nrecs = 1;
3423 		nbytes += sizeof(struct igmp_grouprec);
3424 
3425 		m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
3426 		    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
3427 
3428 		msrcs = 0;
3429 		RB_FOREACH_FROM(ims, ip_msource_tree, nims) {
3430 #ifdef IGMP_DEBUG
3431 			char buf[MAX_IPv4_STR_LEN];
3432 
3433 			inet_ntop_haddr(ims->ims_haddr, buf, sizeof(buf));
3434 			IGMP_PRINTF(("%s: visit node %s\n", __func__, buf));
3435 #endif
3436 			now = ims_get_mode(inm, ims, 1);
3437 			if ((now != mode) ||
3438 			    (now == mode && mode == MCAST_UNDEFINED)) {
3439 				IGMP_PRINTF(("%s: skip node\n", __func__));
3440 				continue;
3441 			}
3442 			if (is_source_query && ims->ims_stp == 0) {
3443 				IGMP_PRINTF(("%s: skip unrecorded node\n",
3444 				    __func__));
3445 				continue;
3446 			}
3447 			IGMP_PRINTF(("%s: append node\n", __func__));
3448 			naddr = htonl(ims->ims_haddr);
3449 			if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) {
3450 				if (m != m0) {
3451 					m_freem(m);
3452 				}
3453 				IGMP_PRINTF(("%s: m_append() failed.\n",
3454 				    __func__));
3455 				return -ENOMEM;
3456 			}
3457 			++msrcs;
3458 			if (msrcs == m0srcs) {
3459 				break;
3460 			}
3461 		}
3462 		ig_numsrc = htons(msrcs);
3463 		bcopy(&ig_numsrc, &pig->ig_numsrc, sizeof(ig_numsrc));
3464 		nbytes += (msrcs * sizeof(in_addr_t));
3465 
3466 		IGMP_PRINTF(("%s: enqueueing next packet\n", __func__));
3467 		IF_ENQUEUE(ifq, m);
3468 	}
3469 
3470 	return nbytes;
3471 }
3472 
3473 /*
3474  * Type used to mark record pass completion.
3475  * We exploit the fact we can cast to this easily from the
3476  * current filter modes on each ip_msource node.
3477  */
3478 typedef enum {
3479 	REC_NONE = 0x00,        /* MCAST_UNDEFINED */
3480 	REC_ALLOW = 0x01,       /* MCAST_INCLUDE */
3481 	REC_BLOCK = 0x02,       /* MCAST_EXCLUDE */
3482 	REC_FULL = REC_ALLOW | REC_BLOCK
3483 } rectype_t;
3484 
3485 /*
3486  * Enqueue an IGMPv3 filter list change to the given output queue.
3487  *
3488  * Source list filter state is held in an RB-tree. When the filter list
3489  * for a group is changed without changing its mode, we need to compute
3490  * the deltas between T0 and T1 for each source in the filter set,
3491  * and enqueue the appropriate ALLOW_NEW/BLOCK_OLD records.
3492  *
3493  * As we may potentially queue two record types, and the entire R-B tree
3494  * needs to be walked at once, we break this out into its own function
3495  * so we can generate a tightly packed queue of packets.
3496  *
3497  * XXX This could be written to only use one tree walk, although that makes
3498  * serializing into the mbuf chains a bit harder. For now we do two walks
3499  * which makes things easier on us, and it may or may not be harder on
3500  * the L2 cache.
3501  *
3502  * If successful the size of all data appended to the queue is returned,
3503  * otherwise an error code less than zero is returned, or zero if
3504  * no record(s) were appended.
3505  */
3506 static int
igmp_v3_enqueue_filter_change(struct ifqueue * ifq,struct in_multi * inm)3507 igmp_v3_enqueue_filter_change(struct ifqueue *ifq, struct in_multi *inm)
3508 {
3509 	static const int MINRECLEN =
3510 	    sizeof(struct igmp_grouprec) + sizeof(in_addr_t);
3511 	struct ifnet            *ifp;
3512 	struct igmp_grouprec     ig;
3513 	struct igmp_grouprec    *pig;
3514 	struct ip_msource       *ims, *nims;
3515 	struct mbuf             *m, *m0, *md;
3516 	in_addr_t                naddr;
3517 	int                      m0srcs, nbytes, npbytes, off, schanged;
3518 	uint16_t                 rsrcs;
3519 	int                      nallow, nblock;
3520 	uint16_t                 mode;
3521 	uint8_t                  now, then;
3522 	rectype_t                crt, drt, nrt;
3523 	u_int16_t                ig_numsrc;
3524 
3525 	INM_LOCK_ASSERT_HELD(inm);
3526 
3527 	if (inm->inm_nsrc == 0 ||
3528 	    (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0)) {
3529 		return 0;
3530 	}
3531 
3532 	ifp = inm->inm_ifp;                     /* interface */
3533 	mode = inm->inm_st[1].iss_fmode;        /* filter mode at t1 */
3534 	crt = REC_NONE; /* current group record type */
3535 	drt = REC_NONE; /* mask of completed group record types */
3536 	nrt = REC_NONE; /* record type for current node */
3537 	m0srcs = 0;     /* # source which will fit in current mbuf chain */
3538 	nbytes = 0;     /* # of bytes appended to group's state-change queue */
3539 	npbytes = 0;    /* # of bytes appended this packet */
3540 	rsrcs = 0;      /* # sources encoded in current record */
3541 	schanged = 0;   /* # nodes encoded in overall filter change */
3542 	nallow = 0;     /* # of source entries in ALLOW_NEW */
3543 	nblock = 0;     /* # of source entries in BLOCK_OLD */
3544 	nims = NULL;    /* next tree node pointer */
3545 
3546 	/*
3547 	 * For each possible filter record mode.
3548 	 * The first kind of source we encounter tells us which
3549 	 * is the first kind of record we start appending.
3550 	 * If a node transitioned to UNDEFINED at t1, its mode is treated
3551 	 * as the inverse of the group's filter mode.
3552 	 */
3553 	while (drt != REC_FULL) {
3554 		do {
3555 			m0 = ifq->ifq_tail;
3556 			if (m0 != NULL &&
3557 			    (m0->m_pkthdr.vt_nrecs + 1 <=
3558 			    IGMP_V3_REPORT_MAXRECS) &&
3559 			    (m0->m_pkthdr.len + MINRECLEN) <
3560 			    (ifp->if_mtu - IGMP_LEADINGSPACE)) {
3561 				m = m0;
3562 				m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
3563 				    sizeof(struct igmp_grouprec)) /
3564 				    sizeof(in_addr_t);
3565 				IGMP_PRINTF(("%s: use previous packet\n",
3566 				    __func__));
3567 			} else {
3568 				m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3569 				if (m) {
3570 					m->m_data += IGMP_LEADINGSPACE;
3571 				}
3572 				if (m == NULL) {
3573 					m = m_gethdr(M_DONTWAIT, MT_DATA);
3574 					if (m) {
3575 						MH_ALIGN(m, IGMP_LEADINGSPACE);
3576 					}
3577 				}
3578 				if (m == NULL) {
3579 					IGMP_PRINTF(("%s: m_get*() failed\n",
3580 					    __func__));
3581 					return -ENOMEM;
3582 				}
3583 				m->m_pkthdr.vt_nrecs = 0;
3584 				igmp_save_context(m, ifp);
3585 				m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
3586 				    sizeof(struct igmp_grouprec)) /
3587 				    sizeof(in_addr_t);
3588 				npbytes = 0;
3589 				IGMP_PRINTF(("%s: allocated new packet\n",
3590 				    __func__));
3591 			}
3592 			/*
3593 			 * Append the IGMP group record header to the
3594 			 * current packet's data area.
3595 			 * Recalculate pointer to free space for next
3596 			 * group record, in case m_append() allocated
3597 			 * a new mbuf or cluster.
3598 			 */
3599 			memset(&ig, 0, sizeof(ig));
3600 			ig.ig_group = inm->inm_addr;
3601 			if (!m_append(m, sizeof(ig), (void *)&ig)) {
3602 				if (m != m0) {
3603 					m_freem(m);
3604 				}
3605 				IGMP_PRINTF(("%s: m_append() failed\n",
3606 				    __func__));
3607 				return -ENOMEM;
3608 			}
3609 			npbytes += sizeof(struct igmp_grouprec);
3610 			if (m != m0) {
3611 				/* new packet; offset in c hain */
3612 				md = m_getptr(m, npbytes -
3613 				    sizeof(struct igmp_grouprec), &off);
3614 				pig = (struct igmp_grouprec *)(void *)(mtod(md,
3615 				    uint8_t *) + off);
3616 			} else {
3617 				/* current packet; offset from last append */
3618 				md = m_last(m);
3619 				pig = (struct igmp_grouprec *)(void *)(mtod(md,
3620 				    uint8_t *) + md->m_len -
3621 				    sizeof(struct igmp_grouprec));
3622 			}
3623 			/*
3624 			 * Begin walking the tree for this record type
3625 			 * pass, or continue from where we left off
3626 			 * previously if we had to allocate a new packet.
3627 			 * Only report deltas in-mode at t1.
3628 			 * We need not report included sources as allowed
3629 			 * if we are in inclusive mode on the group,
3630 			 * however the converse is not true.
3631 			 */
3632 			rsrcs = 0;
3633 			if (nims == NULL) {
3634 				nims = RB_MIN(ip_msource_tree, &inm->inm_srcs);
3635 			}
3636 			RB_FOREACH_FROM(ims, ip_msource_tree, nims) {
3637 #ifdef IGMP_DEBUG
3638 				char buf[MAX_IPv4_STR_LEN];
3639 
3640 				inet_ntop_haddr(ims->ims_haddr, buf, sizeof(buf));
3641 				IGMP_PRINTF(("%s: visit node %s\n", __func__, buf));
3642 #endif
3643 				now = ims_get_mode(inm, ims, 1);
3644 				then = ims_get_mode(inm, ims, 0);
3645 				IGMP_PRINTF(("%s: mode: t0 %d, t1 %d\n",
3646 				    __func__, then, now));
3647 				if (now == then) {
3648 					IGMP_PRINTF(("%s: skip unchanged\n",
3649 					    __func__));
3650 					continue;
3651 				}
3652 				if (mode == MCAST_EXCLUDE &&
3653 				    now == MCAST_INCLUDE) {
3654 					IGMP_PRINTF(("%s: skip IN src on EX "
3655 					    "group\n", __func__));
3656 					continue;
3657 				}
3658 				nrt = (rectype_t)now;
3659 				if (nrt == REC_NONE) {
3660 					nrt = (rectype_t)(~mode & REC_FULL);
3661 				}
3662 				if (schanged++ == 0) {
3663 					crt = nrt;
3664 				} else if (crt != nrt) {
3665 					continue;
3666 				}
3667 				naddr = htonl(ims->ims_haddr);
3668 				if (!m_append(m, sizeof(in_addr_t),
3669 				    (void *)&naddr)) {
3670 					if (m != m0) {
3671 						m_freem(m);
3672 					}
3673 					IGMP_PRINTF(("%s: m_append() failed\n",
3674 					    __func__));
3675 					return -ENOMEM;
3676 				}
3677 				nallow += !!(crt == REC_ALLOW);
3678 				nblock += !!(crt == REC_BLOCK);
3679 				if (++rsrcs == m0srcs) {
3680 					break;
3681 				}
3682 			}
3683 			/*
3684 			 * If we did not append any tree nodes on this
3685 			 * pass, back out of allocations.
3686 			 */
3687 			if (rsrcs == 0) {
3688 				npbytes -= sizeof(struct igmp_grouprec);
3689 				if (m != m0) {
3690 					IGMP_PRINTF(("%s: m_free(m)\n",
3691 					    __func__));
3692 					m_freem(m);
3693 				} else {
3694 					IGMP_PRINTF(("%s: m_adj(m, -ig)\n",
3695 					    __func__));
3696 					m_adj(m, -((int)sizeof(
3697 						    struct igmp_grouprec)));
3698 				}
3699 				continue;
3700 			}
3701 			npbytes += (rsrcs * sizeof(in_addr_t));
3702 			if (crt == REC_ALLOW) {
3703 				pig->ig_type = IGMP_ALLOW_NEW_SOURCES;
3704 			} else if (crt == REC_BLOCK) {
3705 				pig->ig_type = IGMP_BLOCK_OLD_SOURCES;
3706 			}
3707 			ig_numsrc = htons(rsrcs);
3708 			bcopy(&ig_numsrc, &pig->ig_numsrc, sizeof(ig_numsrc));
3709 			/*
3710 			 * Count the new group record, and enqueue this
3711 			 * packet if it wasn't already queued.
3712 			 */
3713 			m->m_pkthdr.vt_nrecs++;
3714 			if (m != m0) {
3715 				IF_ENQUEUE(ifq, m);
3716 			}
3717 			nbytes += npbytes;
3718 		} while (nims != NULL);
3719 		drt |= crt;
3720 		crt = (~crt & REC_FULL);
3721 	}
3722 
3723 	IGMP_PRINTF(("%s: queued %d ALLOW_NEW, %d BLOCK_OLD\n", __func__,
3724 	    nallow, nblock));
3725 
3726 	return nbytes;
3727 }
3728 
3729 static int
igmp_v3_merge_state_changes(struct in_multi * inm,struct ifqueue * ifscq)3730 igmp_v3_merge_state_changes(struct in_multi *inm, struct ifqueue *ifscq)
3731 {
3732 	struct ifqueue  *gq;
3733 	struct mbuf     *m;             /* pending state-change */
3734 	struct mbuf     *m0;            /* copy of pending state-change */
3735 	struct mbuf     *mt;            /* last state-change in packet */
3736 	struct mbuf     *n;
3737 	int              docopy, domerge;
3738 	u_int            recslen;
3739 
3740 	INM_LOCK_ASSERT_HELD(inm);
3741 
3742 	docopy = 0;
3743 	domerge = 0;
3744 	recslen = 0;
3745 
3746 	/*
3747 	 * If there are further pending retransmissions, make a writable
3748 	 * copy of each queued state-change message before merging.
3749 	 */
3750 	if (inm->inm_scrv > 0) {
3751 		docopy = 1;
3752 	}
3753 
3754 	gq = &inm->inm_scq;
3755 #ifdef IGMP_DEBUG
3756 	if (gq->ifq_head == NULL) {
3757 		IGMP_PRINTF(("%s: WARNING: queue for inm 0x%llx is empty\n",
3758 		    __func__, (uint64_t)VM_KERNEL_ADDRPERM(inm)));
3759 	}
3760 #endif
3761 
3762 	/*
3763 	 * Use IF_REMQUEUE() instead of IF_DEQUEUE() below, since the
3764 	 * packet might not always be at the head of the ifqueue.
3765 	 */
3766 	m = gq->ifq_head;
3767 	while (m != NULL) {
3768 		/*
3769 		 * Only merge the report into the current packet if
3770 		 * there is sufficient space to do so; an IGMPv3 report
3771 		 * packet may only contain 65,535 group records.
3772 		 * Always use a simple mbuf chain concatentation to do this,
3773 		 * as large state changes for single groups may have
3774 		 * allocated clusters.
3775 		 */
3776 		domerge = 0;
3777 		mt = ifscq->ifq_tail;
3778 		if (mt != NULL) {
3779 			recslen = m_length(m);
3780 
3781 			if ((mt->m_pkthdr.vt_nrecs +
3782 			    m->m_pkthdr.vt_nrecs <=
3783 			    IGMP_V3_REPORT_MAXRECS) &&
3784 			    (mt->m_pkthdr.len + recslen <=
3785 			    (inm->inm_ifp->if_mtu - IGMP_LEADINGSPACE))) {
3786 				domerge = 1;
3787 			}
3788 		}
3789 
3790 		if (!domerge && IF_QFULL(gq)) {
3791 			IGMP_PRINTF(("%s: outbound queue full, skipping whole "
3792 			    "packet 0x%llx\n", __func__,
3793 			    (uint64_t)VM_KERNEL_ADDRPERM(m)));
3794 			n = m->m_nextpkt;
3795 			if (!docopy) {
3796 				IF_REMQUEUE(gq, m);
3797 				m_freem(m);
3798 			}
3799 			m = n;
3800 			continue;
3801 		}
3802 
3803 		if (!docopy) {
3804 			IGMP_PRINTF(("%s: dequeueing 0x%llx\n", __func__,
3805 			    (uint64_t)VM_KERNEL_ADDRPERM(m)));
3806 			n = m->m_nextpkt;
3807 			IF_REMQUEUE(gq, m);
3808 			m0 = m;
3809 			m = n;
3810 		} else {
3811 			IGMP_PRINTF(("%s: copying 0x%llx\n", __func__,
3812 			    (uint64_t)VM_KERNEL_ADDRPERM(m)));
3813 			m0 = m_dup(m, M_NOWAIT);
3814 			if (m0 == NULL) {
3815 				return ENOMEM;
3816 			}
3817 			m0->m_nextpkt = NULL;
3818 			m = m->m_nextpkt;
3819 		}
3820 
3821 		if (!domerge) {
3822 			IGMP_PRINTF(("%s: queueing 0x%llx to ifscq 0x%llx)\n",
3823 			    __func__, (uint64_t)VM_KERNEL_ADDRPERM(m0),
3824 			    (uint64_t)VM_KERNEL_ADDRPERM(ifscq)));
3825 			IF_ENQUEUE(ifscq, m0);
3826 		} else {
3827 			struct mbuf *mtl;       /* last mbuf of packet mt */
3828 
3829 			IGMP_PRINTF(("%s: merging 0x%llx with ifscq tail "
3830 			    "0x%llx)\n", __func__,
3831 			    (uint64_t)VM_KERNEL_ADDRPERM(m0),
3832 			    (uint64_t)VM_KERNEL_ADDRPERM(mt)));
3833 
3834 			mtl = m_last(mt);
3835 			m0->m_flags &= ~M_PKTHDR;
3836 			mt->m_pkthdr.len += recslen;
3837 			mt->m_pkthdr.vt_nrecs +=
3838 			    m0->m_pkthdr.vt_nrecs;
3839 
3840 			mtl->m_next = m0;
3841 		}
3842 	}
3843 
3844 	return 0;
3845 }
3846 
3847 /*
3848  * Respond to a pending IGMPv3 General Query.
3849  */
3850 static uint32_t
igmp_v3_dispatch_general_query(struct igmp_ifinfo * igi)3851 igmp_v3_dispatch_general_query(struct igmp_ifinfo *igi)
3852 {
3853 	struct ifnet            *ifp;
3854 	struct in_multi         *inm;
3855 	struct in_multistep     step;
3856 	int                      retval, loop;
3857 
3858 	IGI_LOCK_ASSERT_HELD(igi);
3859 
3860 	VERIFY(igi->igi_version == IGMP_VERSION_3);
3861 
3862 	ifp = igi->igi_ifp;
3863 	IGI_UNLOCK(igi);
3864 
3865 	in_multihead_lock_shared();
3866 	IN_FIRST_MULTI(step, inm);
3867 	while (inm != NULL) {
3868 		INM_LOCK(inm);
3869 		if (inm->inm_ifp != ifp) {
3870 			goto next;
3871 		}
3872 
3873 		switch (inm->inm_state) {
3874 		case IGMP_NOT_MEMBER:
3875 		case IGMP_SILENT_MEMBER:
3876 			break;
3877 		case IGMP_REPORTING_MEMBER:
3878 		case IGMP_IDLE_MEMBER:
3879 		case IGMP_LAZY_MEMBER:
3880 		case IGMP_SLEEPING_MEMBER:
3881 		case IGMP_AWAKENING_MEMBER:
3882 			inm->inm_state = IGMP_REPORTING_MEMBER;
3883 			IGI_LOCK(igi);
3884 			retval = igmp_v3_enqueue_group_record(&igi->igi_gq,
3885 			    inm, 0, 0, 0);
3886 			IGI_UNLOCK(igi);
3887 			IGMP_PRINTF(("%s: enqueue record = %d\n",
3888 			    __func__, retval));
3889 			break;
3890 		case IGMP_G_QUERY_PENDING_MEMBER:
3891 		case IGMP_SG_QUERY_PENDING_MEMBER:
3892 		case IGMP_LEAVING_MEMBER:
3893 			break;
3894 		}
3895 next:
3896 		INM_UNLOCK(inm);
3897 		IN_NEXT_MULTI(step, inm);
3898 	}
3899 	in_multihead_lock_done();
3900 
3901 	IGI_LOCK(igi);
3902 	loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
3903 	igmp_dispatch_queue(igi, &igi->igi_gq, IGMP_MAX_RESPONSE_BURST,
3904 	    loop);
3905 	IGI_LOCK_ASSERT_HELD(igi);
3906 	/*
3907 	 * Slew transmission of bursts over 1 second intervals.
3908 	 */
3909 	if (igi->igi_gq.ifq_head != NULL) {
3910 		igi->igi_v3_timer = 1 + IGMP_RANDOM_DELAY(
3911 			IGMP_RESPONSE_BURST_INTERVAL);
3912 	}
3913 
3914 	return igi->igi_v3_timer;
3915 }
3916 
3917 /*
3918  * Transmit the next pending IGMP message in the output queue.
3919  *
3920  * Must not be called with inm_lock or igi_lock held.
3921  */
3922 static void
igmp_sendpkt(struct mbuf * m)3923 igmp_sendpkt(struct mbuf *m)
3924 {
3925 	struct ip_moptions      *imo;
3926 	struct mbuf             *ipopts, *m0;
3927 	int                     error;
3928 	struct route            ro;
3929 	struct ifnet            *ifp;
3930 
3931 	IGMP_PRINTF(("%s: transmit 0x%llx\n", __func__,
3932 	    (uint64_t)VM_KERNEL_ADDRPERM(m)));
3933 
3934 	ifp = igmp_restore_context(m);
3935 	/*
3936 	 * Check if the ifnet is still attached.
3937 	 */
3938 	if (ifp == NULL || !ifnet_is_attached(ifp, 0)) {
3939 		IGMP_PRINTF(("%s: dropped 0x%llx as ifp went away.\n",
3940 		    __func__, (uint64_t)VM_KERNEL_ADDRPERM(m)));
3941 		m_freem(m);
3942 		OSAddAtomic(1, &ipstat.ips_noroute);
3943 		return;
3944 	}
3945 
3946 	ipopts = igmp_sendra ? m_raopt : NULL;
3947 
3948 	imo = ip_allocmoptions(Z_WAITOK);
3949 	if (imo == NULL) {
3950 		m_freem(m);
3951 		return;
3952 	}
3953 
3954 	imo->imo_multicast_ttl  = 1;
3955 	imo->imo_multicast_vif  = -1;
3956 	imo->imo_multicast_loop = 0;
3957 
3958 	/*
3959 	 * If the user requested that IGMP traffic be explicitly
3960 	 * redirected to the loopback interface (e.g. they are running a
3961 	 * MANET interface and the routing protocol needs to see the
3962 	 * updates), handle this now.
3963 	 */
3964 	if (m->m_flags & M_IGMP_LOOP) {
3965 		imo->imo_multicast_ifp = lo_ifp;
3966 	} else {
3967 		imo->imo_multicast_ifp = ifp;
3968 	}
3969 
3970 	if (m->m_flags & M_IGMPV2) {
3971 		m0 = m;
3972 	} else {
3973 		m0 = igmp_v3_encap_report(ifp, m);
3974 		if (m0 == NULL) {
3975 			/*
3976 			 * If igmp_v3_encap_report() failed, then M_PREPEND()
3977 			 * already freed the original mbuf chain.
3978 			 * This means that we don't have to m_freem(m) here.
3979 			 */
3980 			IGMP_PRINTF(("%s: dropped 0x%llx\n", __func__,
3981 			    (uint64_t)VM_KERNEL_ADDRPERM(m)));
3982 			IMO_REMREF(imo);
3983 			atomic_add_32(&ipstat.ips_odropped, 1);
3984 			return;
3985 		}
3986 	}
3987 
3988 	igmp_scrub_context(m0);
3989 	m->m_flags &= ~(M_PROTOFLAGS | M_IGMP_LOOP);
3990 	m0->m_pkthdr.rcvif = lo_ifp;
3991 
3992 	if (ifp->if_eflags & IFEF_TXSTART) {
3993 		/*
3994 		 * Use control service class if the interface supports
3995 		 * transmit-start model.
3996 		 */
3997 		(void) m_set_service_class(m0, MBUF_SC_CTL);
3998 	}
3999 	bzero(&ro, sizeof(ro));
4000 	error = ip_output(m0, ipopts, &ro, 0, imo, NULL);
4001 	ROUTE_RELEASE(&ro);
4002 
4003 	IMO_REMREF(imo);
4004 
4005 	if (error) {
4006 		IGMP_PRINTF(("%s: ip_output(0x%llx) = %d\n", __func__,
4007 		    (uint64_t)VM_KERNEL_ADDRPERM(m0), error));
4008 		return;
4009 	}
4010 
4011 	IGMPSTAT_INC(igps_snd_reports);
4012 	OIGMPSTAT_INC(igps_snd_reports);
4013 }
4014 /*
4015  * Encapsulate an IGMPv3 report.
4016  *
4017  * The internal mbuf flag M_IGMPV3_HDR is used to indicate that the mbuf
4018  * chain has already had its IP/IGMPv3 header prepended. In this case
4019  * the function will not attempt to prepend; the lengths and checksums
4020  * will however be re-computed.
4021  *
4022  * Returns a pointer to the new mbuf chain head, or NULL if the
4023  * allocation failed.
4024  */
4025 static struct mbuf *
igmp_v3_encap_report(struct ifnet * ifp,struct mbuf * m)4026 igmp_v3_encap_report(struct ifnet *ifp, struct mbuf *m)
4027 {
4028 	struct igmp_report      *igmp;
4029 	struct ip               *ip;
4030 	unsigned int             hdrlen, igmpreclen;
4031 
4032 	VERIFY((m->m_flags & M_PKTHDR));
4033 
4034 	igmpreclen = m_length(m);
4035 	hdrlen = sizeof(struct ip) + sizeof(struct igmp_report);
4036 
4037 	if (m->m_flags & M_IGMPV3_HDR) {
4038 		igmpreclen -= hdrlen;
4039 	} else {
4040 		M_PREPEND(m, hdrlen, M_DONTWAIT, 1);
4041 		if (m == NULL) {
4042 			return NULL;
4043 		}
4044 		m->m_flags |= M_IGMPV3_HDR;
4045 	}
4046 	if (hdrlen + igmpreclen > USHRT_MAX) {
4047 		IGMP_PRINTF(("%s: invalid length %d\n", __func__, hdrlen + igmpreclen));
4048 		m_freem(m);
4049 		return NULL;
4050 	}
4051 
4052 
4053 	IGMP_PRINTF(("%s: igmpreclen is %d\n", __func__, igmpreclen));
4054 
4055 	m->m_data += sizeof(struct ip);
4056 	m->m_len -= sizeof(struct ip);
4057 
4058 	igmp = mtod(m, struct igmp_report *);
4059 	igmp->ir_type = IGMP_v3_HOST_MEMBERSHIP_REPORT;
4060 	igmp->ir_rsv1 = 0;
4061 	igmp->ir_rsv2 = 0;
4062 	igmp->ir_numgrps = htons(m->m_pkthdr.vt_nrecs);
4063 	igmp->ir_cksum = 0;
4064 	igmp->ir_cksum = in_cksum(m, sizeof(struct igmp_report) + igmpreclen);
4065 	m->m_pkthdr.vt_nrecs = 0;
4066 
4067 	m->m_data -= sizeof(struct ip);
4068 	m->m_len += sizeof(struct ip);
4069 
4070 	ip = mtod(m, struct ip *);
4071 	ip->ip_tos = IPTOS_PREC_INTERNETCONTROL;
4072 	ip->ip_len = (u_short)(hdrlen + igmpreclen);
4073 	ip->ip_off = IP_DF;
4074 	ip->ip_p = IPPROTO_IGMP;
4075 	ip->ip_sum = 0;
4076 
4077 	ip->ip_src.s_addr = INADDR_ANY;
4078 
4079 	if (m->m_flags & M_IGMP_LOOP) {
4080 		struct in_ifaddr *ia;
4081 
4082 		IFP_TO_IA(ifp, ia);
4083 		if (ia != NULL) {
4084 			IFA_LOCK(&ia->ia_ifa);
4085 			ip->ip_src = ia->ia_addr.sin_addr;
4086 			IFA_UNLOCK(&ia->ia_ifa);
4087 			IFA_REMREF(&ia->ia_ifa);
4088 		}
4089 	}
4090 
4091 	ip->ip_dst.s_addr = htonl(INADDR_ALLRPTS_GROUP);
4092 
4093 	return m;
4094 }
4095 
4096 #ifdef IGMP_DEBUG
4097 static const char *
igmp_rec_type_to_str(const int type)4098 igmp_rec_type_to_str(const int type)
4099 {
4100 	switch (type) {
4101 	case IGMP_CHANGE_TO_EXCLUDE_MODE:
4102 		return "TO_EX";
4103 	case IGMP_CHANGE_TO_INCLUDE_MODE:
4104 		return "TO_IN";
4105 	case IGMP_MODE_IS_EXCLUDE:
4106 		return "MODE_EX";
4107 	case IGMP_MODE_IS_INCLUDE:
4108 		return "MODE_IN";
4109 	case IGMP_ALLOW_NEW_SOURCES:
4110 		return "ALLOW_NEW";
4111 	case IGMP_BLOCK_OLD_SOURCES:
4112 		return "BLOCK_OLD";
4113 	default:
4114 		break;
4115 	}
4116 	return "unknown";
4117 }
4118 #endif
4119 
4120 void
igmp_init(struct protosw * pp,struct domain * dp)4121 igmp_init(struct protosw *pp, struct domain *dp)
4122 {
4123 #pragma unused(dp)
4124 	static int igmp_initialized = 0;
4125 
4126 	VERIFY((pp->pr_flags & (PR_INITIALIZED | PR_ATTACHED)) == PR_ATTACHED);
4127 
4128 	if (igmp_initialized) {
4129 		return;
4130 	}
4131 	igmp_initialized = 1;
4132 
4133 	IGMP_PRINTF(("%s: initializing\n", __func__));
4134 
4135 	igmp_timers_are_running = 0;
4136 
4137 	LIST_INIT(&igi_head);
4138 	m_raopt = igmp_ra_alloc();
4139 }
4140