xref: /xnu-8796.101.5/bsd/netinet/igmp.c (revision aca3beaa3dfbd42498b42c5e5ce20a938e6554e5)
1 /*
2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*-
29  * Copyright (c) 2007-2009 Bruce Simpson.
30  * Copyright (c) 1988 Stephen Deering.
31  * Copyright (c) 1992, 1993
32  *	The Regents of the University of California.  All rights reserved.
33  *
34  * This code is derived from software contributed to Berkeley by
35  * Stephen Deering of Stanford University.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. All advertising materials mentioning features or use of this software
46  *    must display the following acknowledgement:
47  *	This product includes software developed by the University of
48  *	California, Berkeley and its contributors.
49  * 4. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  *	@(#)igmp.c	8.1 (Berkeley) 7/19/93
66  */
67 /*
68  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69  * support for mandatory and extensible security protections.  This notice
70  * is included in support of clause 2.2 (b) of the Apple Public License,
71  * Version 2.0.
72  */
73 
74 /*
75  * Internet Group Management Protocol (IGMP) routines.
76  * [RFC1112, RFC2236, RFC3376]
77  *
78  * Written by Steve Deering, Stanford, May 1988.
79  * Modified by Rosen Sharma, Stanford, Aug 1994.
80  * Modified by Bill Fenner, Xerox PARC, Feb 1995.
81  * Modified to fully comply to IGMPv2 by Bill Fenner, Oct 1995.
82  * Significantly rewritten for IGMPv3, VIMAGE, and SMP by Bruce Simpson.
83  *
84  * MULTICAST Revision: 3.5.1.4
85  */
86 
87 #include <sys/cdefs.h>
88 
89 #include <sys/param.h>
90 #include <sys/systm.h>
91 #include <sys/malloc.h>
92 #include <sys/mbuf.h>
93 #include <sys/socket.h>
94 #include <sys/protosw.h>
95 #include <sys/kernel.h>
96 #include <sys/sysctl.h>
97 #include <sys/mcache.h>
98 
99 #include <libkern/libkern.h>
100 #include <kern/zalloc.h>
101 
102 #include <net/if.h>
103 #include <net/route.h>
104 
105 #include <netinet/in.h>
106 #include <netinet/in_var.h>
107 #include <netinet/in_systm.h>
108 #include <netinet/ip.h>
109 #include <netinet/ip_var.h>
110 #include <netinet/igmp.h>
111 #include <netinet/igmp_var.h>
112 #include <netinet/kpi_ipfilter_var.h>
113 
114 #if SKYWALK
115 #include <skywalk/core/skywalk_var.h>
116 #endif /* SKYWALK */
117 
118 SLIST_HEAD(igmp_inm_relhead, in_multi);
119 
120 static void     igi_initvar(struct igmp_ifinfo *, struct ifnet *, int);
121 static struct igmp_ifinfo *igi_alloc(zalloc_flags_t);
122 static void     igi_free(struct igmp_ifinfo *);
123 static void     igi_delete(const struct ifnet *, struct igmp_inm_relhead *);
124 static void     igmp_dispatch_queue(struct igmp_ifinfo *, struct ifqueue *,
125     int, const int);
126 static void     igmp_final_leave(struct in_multi *, struct igmp_ifinfo *,
127     struct igmp_tparams *);
128 static int      igmp_handle_state_change(struct in_multi *,
129     struct igmp_ifinfo *, struct igmp_tparams *);
130 static int      igmp_initial_join(struct in_multi *, struct igmp_ifinfo *,
131     struct igmp_tparams *);
132 static int      igmp_input_v1_query(struct ifnet *, const struct ip *,
133     const struct igmp *);
134 static int      igmp_input_v2_query(struct ifnet *, const struct ip *,
135     const struct igmp *);
136 static int      igmp_input_v3_query(struct ifnet *, const struct ip *,
137     /*const*/ struct igmpv3 *);
138 static int      igmp_input_v3_group_query(struct in_multi *,
139     int, /*const*/ struct igmpv3 *);
140 static int      igmp_input_v1_report(struct ifnet *, struct mbuf *,
141     /*const*/ struct ip *, /*const*/ struct igmp *);
142 static int      igmp_input_v2_report(struct ifnet *, struct mbuf *,
143     /*const*/ struct ip *, /*const*/ struct igmp *);
144 static void     igmp_sendpkt(struct mbuf *);
145 static __inline__ int   igmp_isgroupreported(const struct in_addr);
146 static struct mbuf *igmp_ra_alloc(void);
147 #ifdef IGMP_DEBUG
148 static const char *igmp_rec_type_to_str(const int);
149 #endif
150 static uint32_t igmp_set_version(struct igmp_ifinfo *, const int);
151 static void     igmp_flush_relq(struct igmp_ifinfo *,
152     struct igmp_inm_relhead *);
153 static int      igmp_v1v2_queue_report(struct in_multi *, const int);
154 static void     igmp_v1v2_process_group_timer(struct in_multi *, const int);
155 static void     igmp_v1v2_process_querier_timers(struct igmp_ifinfo *);
156 static uint32_t igmp_v2_update_group(struct in_multi *, const int);
157 static void     igmp_v3_cancel_link_timers(struct igmp_ifinfo *);
158 static uint32_t igmp_v3_dispatch_general_query(struct igmp_ifinfo *);
159 static struct mbuf *
160 igmp_v3_encap_report(struct ifnet *, struct mbuf *);
161 static int      igmp_v3_enqueue_group_record(struct ifqueue *,
162     struct in_multi *, const int, const int, const int);
163 static int      igmp_v3_enqueue_filter_change(struct ifqueue *,
164     struct in_multi *);
165 static void     igmp_v3_process_group_timers(struct igmp_ifinfo *,
166     struct ifqueue *, struct ifqueue *, struct in_multi *,
167     const unsigned int);
168 static int      igmp_v3_merge_state_changes(struct in_multi *,
169     struct ifqueue *);
170 static void     igmp_v3_suppress_group_record(struct in_multi *);
171 static int      sysctl_igmp_ifinfo SYSCTL_HANDLER_ARGS;
172 static int      sysctl_igmp_gsr SYSCTL_HANDLER_ARGS;
173 static int      sysctl_igmp_default_version SYSCTL_HANDLER_ARGS;
174 
175 static int igmp_timeout_run;            /* IGMP timer is scheduled to run */
176 static void igmp_timeout(void *);
177 static void igmp_sched_timeout(bool);
178 
179 static struct mbuf *m_raopt;            /* Router Alert option */
180 
181 static int querier_present_timers_running;      /* IGMPv1/v2 older version
182                                                  * querier present */
183 static int interface_timers_running;            /* IGMPv3 general
184                                                  * query response */
185 static int state_change_timers_running;         /* IGMPv3 state-change
186                                                  * retransmit */
187 static int current_state_timers_running;        /* IGMPv1/v2 host
188                                                  * report; IGMPv3 g/sg
189                                                  * query response */
190 
191 /*
192  * Subsystem lock macros.
193  */
194 #define IGMP_LOCK()                     \
195 	lck_mtx_lock(&igmp_mtx)
196 #define IGMP_LOCK_ASSERT_HELD()         \
197 	LCK_MTX_ASSERT(&igmp_mtx, LCK_MTX_ASSERT_OWNED)
198 #define IGMP_LOCK_ASSERT_NOTHELD()      \
199 	LCK_MTX_ASSERT(&igmp_mtx, LCK_MTX_ASSERT_NOTOWNED)
200 #define IGMP_UNLOCK()                   \
201 	lck_mtx_unlock(&igmp_mtx)
202 
203 static LIST_HEAD(, igmp_ifinfo) igi_head;
204 static struct igmpstat_v3 igmpstat_v3 = {
205 	.igps_version = IGPS_VERSION_3,
206 	.igps_len = sizeof(struct igmpstat_v3),
207 };
208 static struct igmpstat igmpstat; /* old IGMPv2 stats structure */
209 static struct timeval igmp_gsrdelay = {.tv_sec = 10, .tv_usec = 0};
210 
211 static int igmp_recvifkludge = 1;
212 static int igmp_sendra = 1;
213 static int igmp_sendlocal = 1;
214 static int igmp_v1enable = 1;
215 static int igmp_v2enable = 1;
216 static int igmp_legacysupp = 0;
217 static int igmp_default_version = IGMP_VERSION_3;
218 
219 SYSCTL_STRUCT(_net_inet_igmp, IGMPCTL_STATS, stats, CTLFLAG_RD | CTLFLAG_LOCKED,
220     &igmpstat, igmpstat, "");
221 SYSCTL_STRUCT(_net_inet_igmp, OID_AUTO, v3stats,
222     CTLFLAG_RD | CTLFLAG_LOCKED, &igmpstat_v3, igmpstat_v3, "");
223 SYSCTL_INT(_net_inet_igmp, OID_AUTO, recvifkludge, CTLFLAG_RW | CTLFLAG_LOCKED,
224     &igmp_recvifkludge, 0,
225     "Rewrite IGMPv1/v2 reports from 0.0.0.0 to contain subnet address");
226 SYSCTL_INT(_net_inet_igmp, OID_AUTO, sendra, CTLFLAG_RW | CTLFLAG_LOCKED,
227     &igmp_sendra, 0,
228     "Send IP Router Alert option in IGMPv2/v3 messages");
229 SYSCTL_INT(_net_inet_igmp, OID_AUTO, sendlocal, CTLFLAG_RW | CTLFLAG_LOCKED,
230     &igmp_sendlocal, 0,
231     "Send IGMP membership reports for 224.0.0.0/24 groups");
232 SYSCTL_INT(_net_inet_igmp, OID_AUTO, v1enable, CTLFLAG_RW | CTLFLAG_LOCKED,
233     &igmp_v1enable, 0,
234     "Enable backwards compatibility with IGMPv1");
235 SYSCTL_INT(_net_inet_igmp, OID_AUTO, v2enable, CTLFLAG_RW | CTLFLAG_LOCKED,
236     &igmp_v2enable, 0,
237     "Enable backwards compatibility with IGMPv2");
238 SYSCTL_INT(_net_inet_igmp, OID_AUTO, legacysupp, CTLFLAG_RW | CTLFLAG_LOCKED,
239     &igmp_legacysupp, 0,
240     "Allow v1/v2 reports to suppress v3 group responses");
241 SYSCTL_PROC(_net_inet_igmp, OID_AUTO, default_version,
242     CTLTYPE_INT | CTLFLAG_RW,
243     &igmp_default_version, 0, sysctl_igmp_default_version, "I",
244     "Default version of IGMP to run on each interface");
245 SYSCTL_PROC(_net_inet_igmp, OID_AUTO, gsrdelay,
246     CTLTYPE_INT | CTLFLAG_RW,
247     &igmp_gsrdelay.tv_sec, 0, sysctl_igmp_gsr, "I",
248     "Rate limit for IGMPv3 Group-and-Source queries in seconds");
249 #ifdef IGMP_DEBUG
250 int igmp_debug = 0;
251 SYSCTL_INT(_net_inet_igmp, OID_AUTO,
252     debug, CTLFLAG_RW | CTLFLAG_LOCKED, &igmp_debug, 0, "");
253 #endif
254 
255 SYSCTL_NODE(_net_inet_igmp, OID_AUTO, ifinfo, CTLFLAG_RD | CTLFLAG_LOCKED,
256     sysctl_igmp_ifinfo, "Per-interface IGMPv3 state");
257 
258 /* Lock group and attribute for igmp_mtx */
259 static LCK_ATTR_DECLARE(igmp_mtx_attr, 0, 0);
260 static LCK_GRP_DECLARE(igmp_mtx_grp, "igmp_mtx");
261 
262 /*
263  * Locking and reference counting:
264  *
265  * igmp_mtx mainly protects igi_head.  In cases where both igmp_mtx and
266  * in_multihead_lock must be held, the former must be acquired first in order
267  * to maintain lock ordering.  It is not a requirement that igmp_mtx be
268  * acquired first before in_multihead_lock, but in case both must be acquired
269  * in succession, the correct lock ordering must be followed.
270  *
271  * Instead of walking the if_multiaddrs list at the interface and returning
272  * the ifma_protospec value of a matching entry, we search the global list
273  * of in_multi records and find it that way; this is done with in_multihead
274  * lock held.  Doing so avoids the race condition issues that many other BSDs
275  * suffer from (therefore in our implementation, ifma_protospec will never be
276  * NULL for as long as the in_multi is valid.)
277  *
278  * The above creates a requirement for the in_multi to stay in in_multihead
279  * list even after the final IGMP leave (in IGMPv3 mode) until no longer needs
280  * be retransmitted (this is not required for IGMPv1/v2.)  In order to handle
281  * this, the request and reference counts of the in_multi are bumped up when
282  * the state changes to IGMP_LEAVING_MEMBER, and later dropped in the timeout
283  * handler.  Each in_multi holds a reference to the underlying igmp_ifinfo.
284  *
285  * Thus, the permitted lock oder is:
286  *
287  *	igmp_mtx, in_multihead_lock, inm_lock, igi_lock
288  *
289  * Any may be taken independently, but if any are held at the same time,
290  * the above lock order must be followed.
291  */
292 static LCK_MTX_DECLARE_ATTR(igmp_mtx, &igmp_mtx_grp, &igmp_mtx_attr);
293 static int igmp_timers_are_running;
294 
295 #define IGMP_ADD_DETACHED_INM(_head, _inm) {                            \
296 	SLIST_INSERT_HEAD(_head, _inm, inm_dtle);                       \
297 }
298 
299 #define IGMP_REMOVE_DETACHED_INM(_head) {                               \
300 	struct in_multi *_inm, *_inm_tmp;                               \
301 	SLIST_FOREACH_SAFE(_inm, _head, inm_dtle, _inm_tmp) {           \
302 	        SLIST_REMOVE(_head, _inm, in_multi, inm_dtle);          \
303 	        INM_REMREF(_inm);                                       \
304 	}                                                               \
305 	VERIFY(SLIST_EMPTY(_head));                                     \
306 }
307 
308 static KALLOC_TYPE_DEFINE(igi_zone, struct igmp_ifinfo, NET_KT_DEFAULT);
309 
310 /* Store IGMPv3 record count in the module private scratch space */
311 #define vt_nrecs        pkt_mpriv.__mpriv_u.__mpriv32[0].__mpriv32_u.__val16[0]
312 
313 static __inline void
igmp_save_context(struct mbuf * m,struct ifnet * ifp)314 igmp_save_context(struct mbuf *m, struct ifnet *ifp)
315 {
316 	m->m_pkthdr.rcvif = ifp;
317 }
318 
319 static __inline void
igmp_scrub_context(struct mbuf * m)320 igmp_scrub_context(struct mbuf *m)
321 {
322 	m->m_pkthdr.rcvif = NULL;
323 }
324 
325 #ifdef IGMP_DEBUG
326 static __inline const char *
inet_ntop_haddr(in_addr_t haddr,char * buf,socklen_t size)327 inet_ntop_haddr(in_addr_t haddr, char *buf, socklen_t size)
328 {
329 	struct in_addr ia;
330 
331 	ia.s_addr = htonl(haddr);
332 	return inet_ntop(AF_INET, &ia, buf, size);
333 }
334 #endif
335 
336 /*
337  * Restore context from a queued IGMP output chain.
338  * Return saved ifp.
339  */
340 static __inline struct ifnet *
igmp_restore_context(struct mbuf * m)341 igmp_restore_context(struct mbuf *m)
342 {
343 	return m->m_pkthdr.rcvif;
344 }
345 
346 /*
347  * Retrieve or set default IGMP version.
348  */
349 static int
350 sysctl_igmp_default_version SYSCTL_HANDLER_ARGS
351 {
352 #pragma unused(oidp, arg2)
353 	int      error;
354 	int      new;
355 
356 	IGMP_LOCK();
357 
358 	error = SYSCTL_OUT(req, arg1, sizeof(int));
359 	if (error || !req->newptr) {
360 		goto out_locked;
361 	}
362 
363 	new = igmp_default_version;
364 
365 	error = SYSCTL_IN(req, &new, sizeof(int));
366 	if (error) {
367 		goto out_locked;
368 	}
369 
370 	if (new < IGMP_VERSION_1 || new > IGMP_VERSION_3) {
371 		error = EINVAL;
372 		goto out_locked;
373 	}
374 
375 	IGMP_PRINTF(("%s: change igmp_default_version from %d to %d\n",
376 	    __func__, igmp_default_version, new));
377 
378 	igmp_default_version = new;
379 
380 out_locked:
381 	IGMP_UNLOCK();
382 	return error;
383 }
384 
385 /*
386  * Retrieve or set threshold between group-source queries in seconds.
387  *
388  */
389 static int
390 sysctl_igmp_gsr SYSCTL_HANDLER_ARGS
391 {
392 #pragma unused(arg1, arg2)
393 	int error;
394 	int i;
395 
396 	IGMP_LOCK();
397 
398 	i = (int)igmp_gsrdelay.tv_sec;
399 
400 	error = sysctl_handle_int(oidp, &i, 0, req);
401 	if (error || !req->newptr) {
402 		goto out_locked;
403 	}
404 
405 	if (i < -1 || i >= 60) {
406 		error = EINVAL;
407 		goto out_locked;
408 	}
409 
410 	igmp_gsrdelay.tv_sec = i;
411 
412 out_locked:
413 	IGMP_UNLOCK();
414 	return error;
415 }
416 
417 /*
418  * Expose struct igmp_ifinfo to userland, keyed by ifindex.
419  * For use by ifmcstat(8).
420  *
421  */
422 static int
423 sysctl_igmp_ifinfo SYSCTL_HANDLER_ARGS
424 {
425 #pragma unused(oidp)
426 	int                     *name;
427 	int                      error;
428 	u_int                    namelen;
429 	struct ifnet            *ifp;
430 	struct igmp_ifinfo      *igi;
431 	struct igmp_ifinfo_u    igi_u;
432 
433 	name = (int *)arg1;
434 	namelen = arg2;
435 
436 	if (req->newptr != USER_ADDR_NULL) {
437 		return EPERM;
438 	}
439 
440 	if (namelen != 1) {
441 		return EINVAL;
442 	}
443 
444 	IGMP_LOCK();
445 
446 	if (name[0] <= 0 || name[0] > (u_int)if_index) {
447 		error = ENOENT;
448 		goto out_locked;
449 	}
450 
451 	error = ENOENT;
452 
453 	ifnet_head_lock_shared();
454 	ifp = ifindex2ifnet[name[0]];
455 	ifnet_head_done();
456 	if (ifp == NULL) {
457 		goto out_locked;
458 	}
459 
460 	bzero(&igi_u, sizeof(igi_u));
461 
462 	LIST_FOREACH(igi, &igi_head, igi_link) {
463 		IGI_LOCK(igi);
464 		if (ifp != igi->igi_ifp) {
465 			IGI_UNLOCK(igi);
466 			continue;
467 		}
468 		igi_u.igi_ifindex = igi->igi_ifp->if_index;
469 		igi_u.igi_version = igi->igi_version;
470 		igi_u.igi_v1_timer = igi->igi_v1_timer;
471 		igi_u.igi_v2_timer = igi->igi_v2_timer;
472 		igi_u.igi_v3_timer = igi->igi_v3_timer;
473 		igi_u.igi_flags = igi->igi_flags;
474 		igi_u.igi_rv = igi->igi_rv;
475 		igi_u.igi_qi = igi->igi_qi;
476 		igi_u.igi_qri = igi->igi_qri;
477 		igi_u.igi_uri = igi->igi_uri;
478 		IGI_UNLOCK(igi);
479 
480 		error = SYSCTL_OUT(req, &igi_u, sizeof(igi_u));
481 		break;
482 	}
483 
484 out_locked:
485 	IGMP_UNLOCK();
486 	return error;
487 }
488 
489 /*
490  * Dispatch an entire queue of pending packet chains
491  *
492  * Must not be called with inm_lock held.
493  */
494 static void
igmp_dispatch_queue(struct igmp_ifinfo * igi,struct ifqueue * ifq,int limit,const int loop)495 igmp_dispatch_queue(struct igmp_ifinfo *igi, struct ifqueue *ifq, int limit,
496     const int loop)
497 {
498 	struct mbuf *m;
499 	struct ip *ip;
500 
501 	if (igi != NULL) {
502 		IGI_LOCK_ASSERT_HELD(igi);
503 	}
504 
505 #if SKYWALK
506 	/*
507 	 * Since this function is called holding the igi lock, we need to ensure we
508 	 * don't enter the driver directly because a deadlock can happen if another
509 	 * thread holding the workloop lock tries to acquire the igi lock at
510 	 * the same time.
511 	 */
512 	sk_protect_t protect = sk_async_transmit_protect();
513 #endif /* SKYWALK */
514 
515 	for (;;) {
516 		IF_DEQUEUE(ifq, m);
517 		if (m == NULL) {
518 			break;
519 		}
520 		IGMP_PRINTF(("%s: dispatch 0x%llx from 0x%llx\n", __func__,
521 		    (uint64_t)VM_KERNEL_ADDRPERM(ifq),
522 		    (uint64_t)VM_KERNEL_ADDRPERM(m)));
523 		ip = mtod(m, struct ip *);
524 		if (loop) {
525 			m->m_flags |= M_IGMP_LOOP;
526 		}
527 		if (igi != NULL) {
528 			IGI_UNLOCK(igi);
529 		}
530 		igmp_sendpkt(m);
531 		if (igi != NULL) {
532 			IGI_LOCK(igi);
533 		}
534 		if (--limit == 0) {
535 			break;
536 		}
537 	}
538 
539 #if SKYWALK
540 	sk_async_transmit_unprotect(protect);
541 #endif /* SKYWALK */
542 
543 	if (igi != NULL) {
544 		IGI_LOCK_ASSERT_HELD(igi);
545 	}
546 }
547 
548 /*
549  * Filter outgoing IGMP report state by group.
550  *
551  * Reports are ALWAYS suppressed for ALL-HOSTS (224.0.0.1).
552  * If the net.inet.igmp.sendlocal sysctl is 0, then IGMP reports are
553  * disabled for all groups in the 224.0.0.0/24 link-local scope. However,
554  * this may break certain IGMP snooping switches which rely on the old
555  * report behaviour.
556  *
557  * Return zero if the given group is one for which IGMP reports
558  * should be suppressed, or non-zero if reports should be issued.
559  */
560 
561 static __inline__
562 int
igmp_isgroupreported(const struct in_addr addr)563 igmp_isgroupreported(const struct in_addr addr)
564 {
565 	if (in_allhosts(addr) ||
566 	    ((!igmp_sendlocal && IN_LOCAL_GROUP(ntohl(addr.s_addr))))) {
567 		return 0;
568 	}
569 
570 	return 1;
571 }
572 
573 /*
574  * Construct a Router Alert option to use in outgoing packets.
575  */
576 static struct mbuf *
igmp_ra_alloc(void)577 igmp_ra_alloc(void)
578 {
579 	struct mbuf     *m;
580 	struct ipoption *p;
581 
582 	MGET(m, M_WAITOK, MT_DATA);
583 	p = mtod(m, struct ipoption *);
584 	p->ipopt_dst.s_addr = INADDR_ANY;
585 	p->ipopt_list[0] = (char)IPOPT_RA;      /* Router Alert Option */
586 	p->ipopt_list[1] = 0x04;        /* 4 bytes long */
587 	p->ipopt_list[2] = IPOPT_EOL;   /* End of IP option list */
588 	p->ipopt_list[3] = 0x00;        /* pad byte */
589 	m->m_len = sizeof(p->ipopt_dst) + p->ipopt_list[1];
590 
591 	return m;
592 }
593 
594 /*
595  * Attach IGMP when PF_INET is attached to an interface.
596  */
597 struct igmp_ifinfo *
igmp_domifattach(struct ifnet * ifp,zalloc_flags_t how)598 igmp_domifattach(struct ifnet *ifp, zalloc_flags_t how)
599 {
600 	struct igmp_ifinfo *igi;
601 
602 	IGMP_PRINTF(("%s: called for ifp 0x%llx(%s)\n",
603 	    __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), ifp->if_name));
604 
605 	igi = igi_alloc(how);
606 	if (igi == NULL) {
607 		return NULL;
608 	}
609 
610 	IGMP_LOCK();
611 
612 	IGI_LOCK(igi);
613 	igi_initvar(igi, ifp, 0);
614 	igi->igi_debug |= IFD_ATTACHED;
615 	IGI_ADDREF_LOCKED(igi); /* hold a reference for igi_head */
616 	IGI_ADDREF_LOCKED(igi); /* hold a reference for caller */
617 	IGI_UNLOCK(igi);
618 	ifnet_lock_shared(ifp);
619 	igmp_initsilent(ifp, igi);
620 	ifnet_lock_done(ifp);
621 
622 	LIST_INSERT_HEAD(&igi_head, igi, igi_link);
623 
624 	IGMP_UNLOCK();
625 
626 	IGMP_PRINTF(("%s: allocate igmp_ifinfo for ifp 0x%llx(%s)\n", __func__,
627 	    (uint64_t)VM_KERNEL_ADDRPERM(ifp), ifp->if_name));
628 
629 	return igi;
630 }
631 
632 /*
633  * Attach IGMP when PF_INET is reattached to an interface.  Caller is
634  * expected to have an outstanding reference to the igi.
635  */
636 void
igmp_domifreattach(struct igmp_ifinfo * igi)637 igmp_domifreattach(struct igmp_ifinfo *igi)
638 {
639 	struct ifnet *ifp;
640 
641 	IGMP_LOCK();
642 
643 	IGI_LOCK(igi);
644 	VERIFY(!(igi->igi_debug & IFD_ATTACHED));
645 	ifp = igi->igi_ifp;
646 	VERIFY(ifp != NULL);
647 	igi_initvar(igi, ifp, 1);
648 	igi->igi_debug |= IFD_ATTACHED;
649 	IGI_ADDREF_LOCKED(igi); /* hold a reference for igi_head */
650 	IGI_UNLOCK(igi);
651 	ifnet_lock_shared(ifp);
652 	igmp_initsilent(ifp, igi);
653 	ifnet_lock_done(ifp);
654 
655 	LIST_INSERT_HEAD(&igi_head, igi, igi_link);
656 
657 	IGMP_UNLOCK();
658 
659 	IGMP_PRINTF(("%s: reattached igmp_ifinfo for ifp 0x%llx(%s)\n",
660 	    __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), ifp->if_name));
661 }
662 
663 /*
664  * Hook for domifdetach.
665  */
666 void
igmp_domifdetach(struct ifnet * ifp)667 igmp_domifdetach(struct ifnet *ifp)
668 {
669 	SLIST_HEAD(, in_multi) inm_dthead;
670 
671 	SLIST_INIT(&inm_dthead);
672 
673 	IGMP_PRINTF(("%s: called for ifp 0x%llx(%s%d)\n", __func__,
674 	    (uint64_t)VM_KERNEL_ADDRPERM(ifp), ifp->if_name, ifp->if_unit));
675 
676 	IGMP_LOCK();
677 	igi_delete(ifp, (struct igmp_inm_relhead *)&inm_dthead);
678 	IGMP_UNLOCK();
679 
680 	/* Now that we're dropped all locks, release detached records */
681 	IGMP_REMOVE_DETACHED_INM(&inm_dthead);
682 }
683 
684 /*
685  * Called at interface detach time.  Note that we only flush all deferred
686  * responses and record releases; all remaining inm records and their source
687  * entries related to this interface are left intact, in order to handle
688  * the reattach case.
689  */
690 static void
igi_delete(const struct ifnet * ifp,struct igmp_inm_relhead * inm_dthead)691 igi_delete(const struct ifnet *ifp, struct igmp_inm_relhead *inm_dthead)
692 {
693 	struct igmp_ifinfo *igi, *tigi;
694 
695 	IGMP_LOCK_ASSERT_HELD();
696 
697 	LIST_FOREACH_SAFE(igi, &igi_head, igi_link, tigi) {
698 		IGI_LOCK(igi);
699 		if (igi->igi_ifp == ifp) {
700 			/*
701 			 * Free deferred General Query responses.
702 			 */
703 			IF_DRAIN(&igi->igi_gq);
704 			IF_DRAIN(&igi->igi_v2q);
705 			igmp_flush_relq(igi, inm_dthead);
706 			VERIFY(SLIST_EMPTY(&igi->igi_relinmhead));
707 			igi->igi_debug &= ~IFD_ATTACHED;
708 			IGI_UNLOCK(igi);
709 
710 			LIST_REMOVE(igi, igi_link);
711 			IGI_REMREF(igi); /* release igi_head reference */
712 			return;
713 		}
714 		IGI_UNLOCK(igi);
715 	}
716 	panic("%s: igmp_ifinfo not found for ifp %p(%s)", __func__,
717 	    ifp, ifp->if_xname);
718 }
719 
720 __private_extern__ void
igmp_initsilent(struct ifnet * ifp,struct igmp_ifinfo * igi)721 igmp_initsilent(struct ifnet *ifp, struct igmp_ifinfo *igi)
722 {
723 	ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
724 
725 	IGI_LOCK_ASSERT_NOTHELD(igi);
726 	IGI_LOCK(igi);
727 	if (!(ifp->if_flags & IFF_MULTICAST)) {
728 		igi->igi_flags |= IGIF_SILENT;
729 	} else {
730 		igi->igi_flags &= ~IGIF_SILENT;
731 	}
732 	IGI_UNLOCK(igi);
733 }
734 
735 static void
igi_initvar(struct igmp_ifinfo * igi,struct ifnet * ifp,int reattach)736 igi_initvar(struct igmp_ifinfo *igi, struct ifnet *ifp, int reattach)
737 {
738 	IGI_LOCK_ASSERT_HELD(igi);
739 
740 	igi->igi_ifp = ifp;
741 	igi->igi_version = igmp_default_version;
742 	igi->igi_flags = 0;
743 	igi->igi_rv = IGMP_RV_INIT;
744 	igi->igi_qi = IGMP_QI_INIT;
745 	igi->igi_qri = IGMP_QRI_INIT;
746 	igi->igi_uri = IGMP_URI_INIT;
747 
748 	if (!reattach) {
749 		SLIST_INIT(&igi->igi_relinmhead);
750 	}
751 
752 	/*
753 	 * Responses to general queries are subject to bounds.
754 	 */
755 	igi->igi_gq.ifq_maxlen =  IGMP_MAX_RESPONSE_PACKETS;
756 	igi->igi_v2q.ifq_maxlen = IGMP_MAX_RESPONSE_PACKETS;
757 }
758 
759 static struct igmp_ifinfo *
igi_alloc(zalloc_flags_t how)760 igi_alloc(zalloc_flags_t how)
761 {
762 	struct igmp_ifinfo *igi = zalloc_flags(igi_zone, how | Z_ZERO);
763 	if (igi != NULL) {
764 		lck_mtx_init(&igi->igi_lock, &igmp_mtx_grp, &igmp_mtx_attr);
765 		igi->igi_debug |= IFD_ALLOC;
766 	}
767 	return igi;
768 }
769 
770 static void
igi_free(struct igmp_ifinfo * igi)771 igi_free(struct igmp_ifinfo *igi)
772 {
773 	IGI_LOCK(igi);
774 	if (igi->igi_debug & IFD_ATTACHED) {
775 		panic("%s: attached igi=%p is being freed", __func__, igi);
776 		/* NOTREACHED */
777 	} else if (igi->igi_ifp != NULL) {
778 		panic("%s: ifp not NULL for igi=%p", __func__, igi);
779 		/* NOTREACHED */
780 	} else if (!(igi->igi_debug & IFD_ALLOC)) {
781 		panic("%s: igi %p cannot be freed", __func__, igi);
782 		/* NOTREACHED */
783 	} else if (igi->igi_refcnt != 0) {
784 		panic("%s: non-zero refcnt igi=%p", __func__, igi);
785 		/* NOTREACHED */
786 	}
787 	igi->igi_debug &= ~IFD_ALLOC;
788 	IGI_UNLOCK(igi);
789 
790 	lck_mtx_destroy(&igi->igi_lock, &igmp_mtx_grp);
791 	zfree(igi_zone, igi);
792 }
793 
794 void
igi_addref(struct igmp_ifinfo * igi,int locked)795 igi_addref(struct igmp_ifinfo *igi, int locked)
796 {
797 	if (!locked) {
798 		IGI_LOCK_SPIN(igi);
799 	} else {
800 		IGI_LOCK_ASSERT_HELD(igi);
801 	}
802 
803 	if (++igi->igi_refcnt == 0) {
804 		panic("%s: igi=%p wraparound refcnt", __func__, igi);
805 		/* NOTREACHED */
806 	}
807 	if (!locked) {
808 		IGI_UNLOCK(igi);
809 	}
810 }
811 
812 void
igi_remref(struct igmp_ifinfo * igi)813 igi_remref(struct igmp_ifinfo *igi)
814 {
815 	SLIST_HEAD(, in_multi) inm_dthead;
816 	struct ifnet *ifp;
817 
818 	IGI_LOCK_SPIN(igi);
819 
820 	if (igi->igi_refcnt == 0) {
821 		panic("%s: igi=%p negative refcnt", __func__, igi);
822 		/* NOTREACHED */
823 	}
824 
825 	--igi->igi_refcnt;
826 	if (igi->igi_refcnt > 0) {
827 		IGI_UNLOCK(igi);
828 		return;
829 	}
830 
831 	ifp = igi->igi_ifp;
832 	igi->igi_ifp = NULL;
833 	IF_DRAIN(&igi->igi_gq);
834 	IF_DRAIN(&igi->igi_v2q);
835 	SLIST_INIT(&inm_dthead);
836 	igmp_flush_relq(igi, (struct igmp_inm_relhead *)&inm_dthead);
837 	VERIFY(SLIST_EMPTY(&igi->igi_relinmhead));
838 	IGI_UNLOCK(igi);
839 
840 	/* Now that we're dropped all locks, release detached records */
841 	IGMP_REMOVE_DETACHED_INM(&inm_dthead);
842 
843 	IGMP_PRINTF(("%s: freeing igmp_ifinfo for ifp 0x%llx(%s)\n",
844 	    __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
845 
846 	igi_free(igi);
847 }
848 
849 /*
850  * Process a received IGMPv1 query.
851  * Return non-zero if the message should be dropped.
852  */
853 static int
igmp_input_v1_query(struct ifnet * ifp,const struct ip * ip,const struct igmp * igmp)854 igmp_input_v1_query(struct ifnet *ifp, const struct ip *ip,
855     const struct igmp *igmp)
856 {
857 	struct igmp_ifinfo      *igi;
858 	struct in_multi         *inm;
859 	struct in_multistep     step;
860 	struct igmp_tparams     itp = { .qpt = 0, .it = 0, .cst = 0, .sct = 0 };
861 
862 	IGMP_LOCK_ASSERT_NOTHELD();
863 
864 	/*
865 	 * IGMPv1 Host Membership Queries SHOULD always be addressed to
866 	 * 224.0.0.1. They are always treated as General Queries.
867 	 * igmp_group is always ignored. Do not drop it as a userland
868 	 * daemon may wish to see it.
869 	 */
870 	if (!in_allhosts(ip->ip_dst) || !in_nullhost(igmp->igmp_group)) {
871 		IGMPSTAT_INC(igps_rcv_badqueries);
872 		OIGMPSTAT_INC(igps_rcv_badqueries);
873 		goto done;
874 	}
875 	IGMPSTAT_INC(igps_rcv_gen_queries);
876 
877 	igi = IGMP_IFINFO(ifp);
878 	VERIFY(igi != NULL);
879 
880 	IGI_LOCK(igi);
881 	if (igi->igi_flags & IGIF_LOOPBACK) {
882 		IGMP_PRINTF(("%s: ignore v1 query on IGIF_LOOPBACK "
883 		    "ifp 0x%llx(%s)\n", __func__,
884 		    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
885 		IGI_UNLOCK(igi);
886 		goto done;
887 	}
888 	/*
889 	 * Switch to IGMPv1 host compatibility mode.
890 	 */
891 	itp.qpt = igmp_set_version(igi, IGMP_VERSION_1);
892 	IGI_UNLOCK(igi);
893 
894 	IGMP_PRINTF(("%s: process v1 query on ifp 0x%llx(%s)\n", __func__,
895 	    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
896 
897 	/*
898 	 * Start the timers in all of our group records
899 	 * for the interface on which the query arrived,
900 	 * except those which are already running.
901 	 */
902 	in_multihead_lock_shared();
903 	IN_FIRST_MULTI(step, inm);
904 	while (inm != NULL) {
905 		INM_LOCK(inm);
906 		if (inm->inm_ifp != ifp || inm->inm_timer != 0) {
907 			goto next;
908 		}
909 
910 		switch (inm->inm_state) {
911 		case IGMP_NOT_MEMBER:
912 		case IGMP_SILENT_MEMBER:
913 			break;
914 		case IGMP_G_QUERY_PENDING_MEMBER:
915 		case IGMP_SG_QUERY_PENDING_MEMBER:
916 		case IGMP_REPORTING_MEMBER:
917 		case IGMP_IDLE_MEMBER:
918 		case IGMP_LAZY_MEMBER:
919 		case IGMP_SLEEPING_MEMBER:
920 		case IGMP_AWAKENING_MEMBER:
921 			inm->inm_state = IGMP_REPORTING_MEMBER;
922 			inm->inm_timer = IGMP_RANDOM_DELAY(IGMP_V1V2_MAX_RI);
923 			itp.cst = 1;
924 			break;
925 		case IGMP_LEAVING_MEMBER:
926 			break;
927 		}
928 next:
929 		INM_UNLOCK(inm);
930 		IN_NEXT_MULTI(step, inm);
931 	}
932 	in_multihead_lock_done();
933 done:
934 	igmp_set_timeout(&itp);
935 
936 	return 0;
937 }
938 
939 /*
940  * Process a received IGMPv2 general or group-specific query.
941  */
942 static int
igmp_input_v2_query(struct ifnet * ifp,const struct ip * ip,const struct igmp * igmp)943 igmp_input_v2_query(struct ifnet *ifp, const struct ip *ip,
944     const struct igmp *igmp)
945 {
946 	struct igmp_ifinfo      *igi;
947 	struct in_multi         *inm;
948 	int                      is_general_query;
949 	uint16_t                 timer;
950 	struct igmp_tparams      itp = { .qpt = 0, .it = 0, .cst = 0, .sct = 0 };
951 
952 	IGMP_LOCK_ASSERT_NOTHELD();
953 
954 	is_general_query = 0;
955 
956 	/*
957 	 * Validate address fields upfront.
958 	 */
959 	if (in_nullhost(igmp->igmp_group)) {
960 		/*
961 		 * IGMPv2 General Query.
962 		 * If this was not sent to the all-hosts group, ignore it.
963 		 */
964 		if (!in_allhosts(ip->ip_dst)) {
965 			goto done;
966 		}
967 		IGMPSTAT_INC(igps_rcv_gen_queries);
968 		is_general_query = 1;
969 	} else {
970 		/* IGMPv2 Group-Specific Query. */
971 		IGMPSTAT_INC(igps_rcv_group_queries);
972 	}
973 
974 	igi = IGMP_IFINFO(ifp);
975 	VERIFY(igi != NULL);
976 
977 	IGI_LOCK(igi);
978 	if (igi->igi_flags & IGIF_LOOPBACK) {
979 		IGMP_PRINTF(("%s: ignore v2 query on IGIF_LOOPBACK "
980 		    "ifp 0x%llx(%s)\n", __func__,
981 		    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
982 		IGI_UNLOCK(igi);
983 		goto done;
984 	}
985 	/*
986 	 * Ignore v2 query if in v1 Compatibility Mode.
987 	 */
988 	if (igi->igi_version == IGMP_VERSION_1) {
989 		IGI_UNLOCK(igi);
990 		goto done;
991 	}
992 	itp.qpt = igmp_set_version(igi, IGMP_VERSION_2);
993 	IGI_UNLOCK(igi);
994 
995 	timer = igmp->igmp_code / IGMP_TIMER_SCALE;
996 	if (timer == 0) {
997 		timer = 1;
998 	}
999 
1000 	if (is_general_query) {
1001 		struct in_multistep step;
1002 
1003 		IGMP_PRINTF(("%s: process v2 general query on ifp 0x%llx(%s)\n",
1004 		    __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1005 		/*
1006 		 * For each reporting group joined on this
1007 		 * interface, kick the report timer.
1008 		 */
1009 		in_multihead_lock_shared();
1010 		IN_FIRST_MULTI(step, inm);
1011 		while (inm != NULL) {
1012 			INM_LOCK(inm);
1013 			if (inm->inm_ifp == ifp) {
1014 				itp.cst += igmp_v2_update_group(inm, timer);
1015 			}
1016 			INM_UNLOCK(inm);
1017 			IN_NEXT_MULTI(step, inm);
1018 		}
1019 		in_multihead_lock_done();
1020 	} else {
1021 		/*
1022 		 * Group-specific IGMPv2 query, we need only
1023 		 * look up the single group to process it.
1024 		 */
1025 		in_multihead_lock_shared();
1026 		IN_LOOKUP_MULTI(&igmp->igmp_group, ifp, inm);
1027 		in_multihead_lock_done();
1028 		if (inm != NULL) {
1029 			INM_LOCK(inm);
1030 			IGMP_INET_PRINTF(igmp->igmp_group,
1031 			    ("process v2 query %s on ifp 0x%llx(%s)\n",
1032 			    _igmp_inet_buf,
1033 			    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1034 			itp.cst = igmp_v2_update_group(inm, timer);
1035 			INM_UNLOCK(inm);
1036 			INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1037 		}
1038 	}
1039 done:
1040 	igmp_set_timeout(&itp);
1041 
1042 	return 0;
1043 }
1044 
1045 /*
1046  * Update the report timer on a group in response to an IGMPv2 query.
1047  *
1048  * If we are becoming the reporting member for this group, start the timer.
1049  * If we already are the reporting member for this group, and timer is
1050  * below the threshold, reset it.
1051  *
1052  * We may be updating the group for the first time since we switched
1053  * to IGMPv3. If we are, then we must clear any recorded source lists,
1054  * and transition to REPORTING state; the group timer is overloaded
1055  * for group and group-source query responses.
1056  *
1057  * Unlike IGMPv3, the delay per group should be jittered
1058  * to avoid bursts of IGMPv2 reports.
1059  */
1060 static uint32_t
igmp_v2_update_group(struct in_multi * inm,const int timer)1061 igmp_v2_update_group(struct in_multi *inm, const int timer)
1062 {
1063 	IGMP_INET_PRINTF(inm->inm_addr, ("%s: %s/%s timer=%d\n",
1064 	    __func__, _igmp_inet_buf, if_name(inm->inm_ifp),
1065 	    timer));
1066 
1067 	INM_LOCK_ASSERT_HELD(inm);
1068 
1069 	switch (inm->inm_state) {
1070 	case IGMP_NOT_MEMBER:
1071 	case IGMP_SILENT_MEMBER:
1072 		break;
1073 	case IGMP_REPORTING_MEMBER:
1074 		if (inm->inm_timer != 0 &&
1075 		    inm->inm_timer <= timer) {
1076 			IGMP_PRINTF(("%s: REPORTING and timer running, "
1077 			    "skipping.\n", __func__));
1078 			break;
1079 		}
1080 		OS_FALLTHROUGH;
1081 	case IGMP_SG_QUERY_PENDING_MEMBER:
1082 	case IGMP_G_QUERY_PENDING_MEMBER:
1083 	case IGMP_IDLE_MEMBER:
1084 	case IGMP_LAZY_MEMBER:
1085 	case IGMP_AWAKENING_MEMBER:
1086 		IGMP_PRINTF(("%s: ->REPORTING\n", __func__));
1087 		inm->inm_state = IGMP_REPORTING_MEMBER;
1088 		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1089 		break;
1090 	case IGMP_SLEEPING_MEMBER:
1091 		IGMP_PRINTF(("%s: ->AWAKENING\n", __func__));
1092 		inm->inm_state = IGMP_AWAKENING_MEMBER;
1093 		break;
1094 	case IGMP_LEAVING_MEMBER:
1095 		break;
1096 	}
1097 
1098 	return inm->inm_timer;
1099 }
1100 
1101 /*
1102  * Process a received IGMPv3 general, group-specific or
1103  * group-and-source-specific query.
1104  * Assumes m has already been pulled up to the full IGMP message length.
1105  * Return 0 if successful, otherwise an appropriate error code is returned.
1106  */
1107 static int
igmp_input_v3_query(struct ifnet * ifp,const struct ip * ip,struct igmpv3 * igmpv3)1108 igmp_input_v3_query(struct ifnet *ifp, const struct ip *ip,
1109     /*const*/ struct igmpv3 *igmpv3)
1110 {
1111 	struct igmp_ifinfo      *igi;
1112 	struct in_multi         *inm;
1113 	int                      is_general_query;
1114 	uint32_t                 maxresp, nsrc, qqi;
1115 	uint32_t                 timer;
1116 	uint8_t                  qrv;
1117 	struct igmp_tparams      itp = { .qpt = 0, .it = 0, .cst = 0, .sct = 0 };
1118 
1119 	IGMP_LOCK_ASSERT_NOTHELD();
1120 
1121 	is_general_query = 0;
1122 
1123 	IGMP_PRINTF(("%s: process v3 query on ifp 0x%llx(%s)\n", __func__,
1124 	    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1125 
1126 	maxresp = igmpv3->igmp_code;    /* in 1/10ths of a second */
1127 	if (maxresp >= 128) {
1128 		maxresp = IGMP_MANT(igmpv3->igmp_code) <<
1129 		    (IGMP_EXP(igmpv3->igmp_code) + 3);
1130 	}
1131 
1132 	/*
1133 	 * Robustness must never be less than 2 for on-wire IGMPv3.
1134 	 * FUTURE: Check if ifp has IGIF_LOOPBACK set, as we will make
1135 	 * an exception for interfaces whose IGMPv3 state changes
1136 	 * are redirected to loopback (e.g. MANET).
1137 	 */
1138 	qrv = IGMP_QRV(igmpv3->igmp_misc);
1139 	if (qrv < 2) {
1140 		IGMP_PRINTF(("%s: clamping qrv %d to %d\n", __func__,
1141 		    qrv, IGMP_RV_INIT));
1142 		qrv = IGMP_RV_INIT;
1143 	}
1144 
1145 	qqi = igmpv3->igmp_qqi;
1146 	if (qqi >= 128) {
1147 		qqi = IGMP_MANT(igmpv3->igmp_qqi) <<
1148 		    (IGMP_EXP(igmpv3->igmp_qqi) + 3);
1149 	}
1150 
1151 	timer = maxresp / IGMP_TIMER_SCALE;
1152 	if (timer == 0) {
1153 		timer = 1;
1154 	}
1155 
1156 	nsrc = ntohs(igmpv3->igmp_numsrc);
1157 
1158 	/*
1159 	 * Validate address fields and versions upfront before
1160 	 * accepting v3 query.
1161 	 */
1162 	if (in_nullhost(igmpv3->igmp_group)) {
1163 		/*
1164 		 * IGMPv3 General Query.
1165 		 *
1166 		 * General Queries SHOULD be directed to 224.0.0.1.
1167 		 * A general query with a source list has undefined
1168 		 * behaviour; discard it.
1169 		 */
1170 		IGMPSTAT_INC(igps_rcv_gen_queries);
1171 		if (!in_allhosts(ip->ip_dst) || nsrc > 0) {
1172 			IGMPSTAT_INC(igps_rcv_badqueries);
1173 			OIGMPSTAT_INC(igps_rcv_badqueries);
1174 			goto done;
1175 		}
1176 		is_general_query = 1;
1177 	} else {
1178 		/* Group or group-source specific query. */
1179 		if (nsrc == 0) {
1180 			IGMPSTAT_INC(igps_rcv_group_queries);
1181 		} else {
1182 			IGMPSTAT_INC(igps_rcv_gsr_queries);
1183 		}
1184 	}
1185 
1186 	igi = IGMP_IFINFO(ifp);
1187 	VERIFY(igi != NULL);
1188 
1189 	IGI_LOCK(igi);
1190 	if (igi->igi_flags & IGIF_LOOPBACK) {
1191 		IGMP_PRINTF(("%s: ignore v3 query on IGIF_LOOPBACK "
1192 		    "ifp 0x%llx(%s)\n", __func__,
1193 		    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1194 		IGI_UNLOCK(igi);
1195 		goto done;
1196 	}
1197 
1198 	/*
1199 	 * Discard the v3 query if we're in Compatibility Mode.
1200 	 * The RFC is not obviously worded that hosts need to stay in
1201 	 * compatibility mode until the Old Version Querier Present
1202 	 * timer expires.
1203 	 */
1204 	if (igi->igi_version != IGMP_VERSION_3) {
1205 		IGMP_PRINTF(("%s: ignore v3 query in v%d mode on "
1206 		    "ifp 0x%llx(%s)\n", __func__, igi->igi_version,
1207 		    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1208 		IGI_UNLOCK(igi);
1209 		goto done;
1210 	}
1211 
1212 	itp.qpt = igmp_set_version(igi, IGMP_VERSION_3);
1213 	igi->igi_rv = qrv;
1214 	igi->igi_qi = qqi;
1215 	igi->igi_qri = MAX(timer, IGMP_QRI_MIN);
1216 
1217 	IGMP_PRINTF(("%s: qrv %d qi %d qri %d\n", __func__, igi->igi_rv,
1218 	    igi->igi_qi, igi->igi_qri));
1219 
1220 	if (is_general_query) {
1221 		/*
1222 		 * Schedule a current-state report on this ifp for
1223 		 * all groups, possibly containing source lists.
1224 		 * If there is a pending General Query response
1225 		 * scheduled earlier than the selected delay, do
1226 		 * not schedule any other reports.
1227 		 * Otherwise, reset the interface timer.
1228 		 */
1229 		IGMP_PRINTF(("%s: process v3 general query on ifp 0x%llx(%s)\n",
1230 		    __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1231 		if (igi->igi_v3_timer == 0 || igi->igi_v3_timer >= timer) {
1232 			itp.it = igi->igi_v3_timer = IGMP_RANDOM_DELAY(timer);
1233 		}
1234 		IGI_UNLOCK(igi);
1235 	} else {
1236 		IGI_UNLOCK(igi);
1237 		/*
1238 		 * Group-source-specific queries are throttled on
1239 		 * a per-group basis to defeat denial-of-service attempts.
1240 		 * Queries for groups we are not a member of on this
1241 		 * link are simply ignored.
1242 		 */
1243 		in_multihead_lock_shared();
1244 		IN_LOOKUP_MULTI(&igmpv3->igmp_group, ifp, inm);
1245 		in_multihead_lock_done();
1246 		if (inm == NULL) {
1247 			goto done;
1248 		}
1249 
1250 		INM_LOCK(inm);
1251 		if (nsrc > 0) {
1252 			if (!ratecheck(&inm->inm_lastgsrtv,
1253 			    &igmp_gsrdelay)) {
1254 				IGMP_PRINTF(("%s: GS query throttled.\n",
1255 				    __func__));
1256 				IGMPSTAT_INC(igps_drop_gsr_queries);
1257 				INM_UNLOCK(inm);
1258 				INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1259 				goto done;
1260 			}
1261 		}
1262 		IGMP_INET_PRINTF(igmpv3->igmp_group,
1263 		    ("process v3 %s query on ifp 0x%llx(%s)\n", _igmp_inet_buf,
1264 		    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1265 		/*
1266 		 * If there is a pending General Query response
1267 		 * scheduled sooner than the selected delay, no
1268 		 * further report need be scheduled.
1269 		 * Otherwise, prepare to respond to the
1270 		 * group-specific or group-and-source query.
1271 		 */
1272 		IGI_LOCK(igi);
1273 		itp.it = igi->igi_v3_timer;
1274 		IGI_UNLOCK(igi);
1275 		if (itp.it == 0 || itp.it >= timer) {
1276 			(void) igmp_input_v3_group_query(inm, timer, igmpv3);
1277 			itp.cst = inm->inm_timer;
1278 		}
1279 		INM_UNLOCK(inm);
1280 		INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1281 	}
1282 done:
1283 	if (itp.it > 0) {
1284 		IGMP_PRINTF(("%s: v3 general query response scheduled in "
1285 		    "T+%d seconds on ifp 0x%llx(%s)\n", __func__, itp.it,
1286 		    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1287 	}
1288 	igmp_set_timeout(&itp);
1289 
1290 	return 0;
1291 }
1292 
1293 /*
1294  * Process a recieved IGMPv3 group-specific or group-and-source-specific
1295  * query.
1296  * Return <0 if any error occured. Currently this is ignored.
1297  */
1298 static int
igmp_input_v3_group_query(struct in_multi * inm,int timer,struct igmpv3 * igmpv3)1299 igmp_input_v3_group_query(struct in_multi *inm,
1300     int timer, /*const*/ struct igmpv3 *igmpv3)
1301 {
1302 	int                      retval;
1303 	uint16_t                 nsrc;
1304 
1305 	INM_LOCK_ASSERT_HELD(inm);
1306 
1307 	retval = 0;
1308 
1309 	switch (inm->inm_state) {
1310 	case IGMP_NOT_MEMBER:
1311 	case IGMP_SILENT_MEMBER:
1312 	case IGMP_SLEEPING_MEMBER:
1313 	case IGMP_LAZY_MEMBER:
1314 	case IGMP_AWAKENING_MEMBER:
1315 	case IGMP_IDLE_MEMBER:
1316 	case IGMP_LEAVING_MEMBER:
1317 		return retval;
1318 	case IGMP_REPORTING_MEMBER:
1319 	case IGMP_G_QUERY_PENDING_MEMBER:
1320 	case IGMP_SG_QUERY_PENDING_MEMBER:
1321 		break;
1322 	}
1323 
1324 	nsrc = ntohs(igmpv3->igmp_numsrc);
1325 
1326 	/*
1327 	 * Deal with group-specific queries upfront.
1328 	 * If any group query is already pending, purge any recorded
1329 	 * source-list state if it exists, and schedule a query response
1330 	 * for this group-specific query.
1331 	 */
1332 	if (nsrc == 0) {
1333 		if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER ||
1334 		    inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) {
1335 			inm_clear_recorded(inm);
1336 			timer = min(inm->inm_timer, timer);
1337 		}
1338 		inm->inm_state = IGMP_G_QUERY_PENDING_MEMBER;
1339 		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1340 		return retval;
1341 	}
1342 
1343 	/*
1344 	 * Deal with the case where a group-and-source-specific query has
1345 	 * been received but a group-specific query is already pending.
1346 	 */
1347 	if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER) {
1348 		timer = min(inm->inm_timer, timer);
1349 		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1350 		return retval;
1351 	}
1352 
1353 	/*
1354 	 * Finally, deal with the case where a group-and-source-specific
1355 	 * query has been received, where a response to a previous g-s-r
1356 	 * query exists, or none exists.
1357 	 * In this case, we need to parse the source-list which the Querier
1358 	 * has provided us with and check if we have any source list filter
1359 	 * entries at T1 for these sources. If we do not, there is no need
1360 	 * schedule a report and the query may be dropped.
1361 	 * If we do, we must record them and schedule a current-state
1362 	 * report for those sources.
1363 	 * FIXME: Handling source lists larger than 1 mbuf requires that
1364 	 * we pass the mbuf chain pointer down to this function, and use
1365 	 * m_getptr() to walk the chain.
1366 	 */
1367 	if (inm->inm_nsrc > 0) {
1368 		const struct in_addr    *ap;
1369 		int                      i, nrecorded;
1370 
1371 		ap = (const struct in_addr *)(igmpv3 + 1);
1372 		nrecorded = 0;
1373 		for (i = 0; i < nsrc; i++, ap++) {
1374 			retval = inm_record_source(inm, ap->s_addr);
1375 			if (retval < 0) {
1376 				break;
1377 			}
1378 			nrecorded += retval;
1379 		}
1380 		if (nrecorded > 0) {
1381 			IGMP_PRINTF(("%s: schedule response to SG query\n",
1382 			    __func__));
1383 			inm->inm_state = IGMP_SG_QUERY_PENDING_MEMBER;
1384 			inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1385 		}
1386 	}
1387 
1388 	return retval;
1389 }
1390 
1391 /*
1392  * Process a received IGMPv1 host membership report.
1393  *
1394  * NOTE: 0.0.0.0 workaround breaks const correctness.
1395  */
1396 static int
igmp_input_v1_report(struct ifnet * ifp,struct mbuf * m,struct ip * ip,struct igmp * igmp)1397 igmp_input_v1_report(struct ifnet *ifp, struct mbuf *m, /*const*/ struct ip *ip,
1398     /*const*/ struct igmp *igmp)
1399 {
1400 	struct in_ifaddr *ia;
1401 	struct in_multi *inm;
1402 
1403 	IGMPSTAT_INC(igps_rcv_reports);
1404 	OIGMPSTAT_INC(igps_rcv_reports);
1405 
1406 	if ((ifp->if_flags & IFF_LOOPBACK) ||
1407 	    (m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
1408 		return 0;
1409 	}
1410 
1411 	if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr) ||
1412 	    !in_hosteq(igmp->igmp_group, ip->ip_dst))) {
1413 		IGMPSTAT_INC(igps_rcv_badreports);
1414 		OIGMPSTAT_INC(igps_rcv_badreports);
1415 		return EINVAL;
1416 	}
1417 
1418 	/*
1419 	 * RFC 3376, Section 4.2.13, 9.2, 9.3:
1420 	 * Booting clients may use the source address 0.0.0.0. Some
1421 	 * IGMP daemons may not know how to use IP_RECVIF to determine
1422 	 * the interface upon which this message was received.
1423 	 * Replace 0.0.0.0 with the subnet address if told to do so.
1424 	 */
1425 	if (igmp_recvifkludge && in_nullhost(ip->ip_src)) {
1426 		IFP_TO_IA(ifp, ia);
1427 		if (ia != NULL) {
1428 			IFA_LOCK(&ia->ia_ifa);
1429 			ip->ip_src.s_addr = htonl(ia->ia_subnet);
1430 			IFA_UNLOCK(&ia->ia_ifa);
1431 			IFA_REMREF(&ia->ia_ifa);
1432 		}
1433 	}
1434 
1435 	IGMP_INET_PRINTF(igmp->igmp_group,
1436 	    ("process v1 report %s on ifp 0x%llx(%s)\n", _igmp_inet_buf,
1437 	    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1438 
1439 	/*
1440 	 * IGMPv1 report suppression.
1441 	 * If we are a member of this group, and our membership should be
1442 	 * reported, stop our group timer and transition to the 'lazy' state.
1443 	 */
1444 	in_multihead_lock_shared();
1445 	IN_LOOKUP_MULTI(&igmp->igmp_group, ifp, inm);
1446 	in_multihead_lock_done();
1447 	if (inm != NULL) {
1448 		struct igmp_ifinfo *igi;
1449 
1450 		INM_LOCK(inm);
1451 
1452 		igi = inm->inm_igi;
1453 		VERIFY(igi != NULL);
1454 
1455 		IGMPSTAT_INC(igps_rcv_ourreports);
1456 		OIGMPSTAT_INC(igps_rcv_ourreports);
1457 
1458 		/*
1459 		 * If we are in IGMPv3 host mode, do not allow the
1460 		 * other host's IGMPv1 report to suppress our reports
1461 		 * unless explicitly configured to do so.
1462 		 */
1463 		IGI_LOCK(igi);
1464 		if (igi->igi_version == IGMP_VERSION_3) {
1465 			if (igmp_legacysupp) {
1466 				igmp_v3_suppress_group_record(inm);
1467 			}
1468 			IGI_UNLOCK(igi);
1469 			INM_UNLOCK(inm);
1470 			INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1471 			return 0;
1472 		}
1473 
1474 		INM_LOCK_ASSERT_HELD(inm);
1475 		inm->inm_timer = 0;
1476 
1477 		switch (inm->inm_state) {
1478 		case IGMP_NOT_MEMBER:
1479 		case IGMP_SILENT_MEMBER:
1480 			break;
1481 		case IGMP_IDLE_MEMBER:
1482 		case IGMP_LAZY_MEMBER:
1483 		case IGMP_AWAKENING_MEMBER:
1484 			IGMP_INET_PRINTF(igmp->igmp_group,
1485 			    ("report suppressed for %s on ifp 0x%llx(%s)\n",
1486 			    _igmp_inet_buf,
1487 			    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1488 			OS_FALLTHROUGH;
1489 		case IGMP_SLEEPING_MEMBER:
1490 			inm->inm_state = IGMP_SLEEPING_MEMBER;
1491 			break;
1492 		case IGMP_REPORTING_MEMBER:
1493 			IGMP_INET_PRINTF(igmp->igmp_group,
1494 			    ("report suppressed for %s on ifp 0x%llx(%s)\n",
1495 			    _igmp_inet_buf,
1496 			    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1497 			if (igi->igi_version == IGMP_VERSION_1) {
1498 				inm->inm_state = IGMP_LAZY_MEMBER;
1499 			} else if (igi->igi_version == IGMP_VERSION_2) {
1500 				inm->inm_state = IGMP_SLEEPING_MEMBER;
1501 			}
1502 			break;
1503 		case IGMP_G_QUERY_PENDING_MEMBER:
1504 		case IGMP_SG_QUERY_PENDING_MEMBER:
1505 		case IGMP_LEAVING_MEMBER:
1506 			break;
1507 		}
1508 		IGI_UNLOCK(igi);
1509 		INM_UNLOCK(inm);
1510 		INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1511 	}
1512 
1513 	return 0;
1514 }
1515 
1516 /*
1517  * Process a received IGMPv2 host membership report.
1518  *
1519  * NOTE: 0.0.0.0 workaround breaks const correctness.
1520  */
1521 static int
igmp_input_v2_report(struct ifnet * ifp,struct mbuf * m,struct ip * ip,struct igmp * igmp)1522 igmp_input_v2_report(struct ifnet *ifp, struct mbuf *m, /*const*/ struct ip *ip,
1523     /*const*/ struct igmp *igmp)
1524 {
1525 	struct in_ifaddr *ia;
1526 	struct in_multi *inm;
1527 
1528 	/*
1529 	 * Make sure we don't hear our own membership report.  Fast
1530 	 * leave requires knowing that we are the only member of a
1531 	 * group.
1532 	 */
1533 	IFP_TO_IA(ifp, ia);
1534 	if (ia != NULL) {
1535 		IFA_LOCK(&ia->ia_ifa);
1536 		if (in_hosteq(ip->ip_src, IA_SIN(ia)->sin_addr)) {
1537 			IFA_UNLOCK(&ia->ia_ifa);
1538 			IFA_REMREF(&ia->ia_ifa);
1539 			return 0;
1540 		}
1541 		IFA_UNLOCK(&ia->ia_ifa);
1542 	}
1543 
1544 	IGMPSTAT_INC(igps_rcv_reports);
1545 	OIGMPSTAT_INC(igps_rcv_reports);
1546 
1547 	if ((ifp->if_flags & IFF_LOOPBACK) ||
1548 	    (m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
1549 		if (ia != NULL) {
1550 			IFA_REMREF(&ia->ia_ifa);
1551 		}
1552 		return 0;
1553 	}
1554 
1555 	if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr)) ||
1556 	    !in_hosteq(igmp->igmp_group, ip->ip_dst)) {
1557 		if (ia != NULL) {
1558 			IFA_REMREF(&ia->ia_ifa);
1559 		}
1560 		IGMPSTAT_INC(igps_rcv_badreports);
1561 		OIGMPSTAT_INC(igps_rcv_badreports);
1562 		return EINVAL;
1563 	}
1564 
1565 	/*
1566 	 * RFC 3376, Section 4.2.13, 9.2, 9.3:
1567 	 * Booting clients may use the source address 0.0.0.0. Some
1568 	 * IGMP daemons may not know how to use IP_RECVIF to determine
1569 	 * the interface upon which this message was received.
1570 	 * Replace 0.0.0.0 with the subnet address if told to do so.
1571 	 */
1572 	if (igmp_recvifkludge && in_nullhost(ip->ip_src)) {
1573 		if (ia != NULL) {
1574 			IFA_LOCK(&ia->ia_ifa);
1575 			ip->ip_src.s_addr = htonl(ia->ia_subnet);
1576 			IFA_UNLOCK(&ia->ia_ifa);
1577 		}
1578 	}
1579 	if (ia != NULL) {
1580 		IFA_REMREF(&ia->ia_ifa);
1581 	}
1582 
1583 	IGMP_INET_PRINTF(igmp->igmp_group,
1584 	    ("process v2 report %s on ifp 0x%llx(%s)\n", _igmp_inet_buf,
1585 	    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1586 
1587 	/*
1588 	 * IGMPv2 report suppression.
1589 	 * If we are a member of this group, and our membership should be
1590 	 * reported, and our group timer is pending or about to be reset,
1591 	 * stop our group timer by transitioning to the 'lazy' state.
1592 	 */
1593 	in_multihead_lock_shared();
1594 	IN_LOOKUP_MULTI(&igmp->igmp_group, ifp, inm);
1595 	in_multihead_lock_done();
1596 	if (inm != NULL) {
1597 		struct igmp_ifinfo *igi;
1598 
1599 		INM_LOCK(inm);
1600 		igi = inm->inm_igi;
1601 		VERIFY(igi != NULL);
1602 
1603 		IGMPSTAT_INC(igps_rcv_ourreports);
1604 		OIGMPSTAT_INC(igps_rcv_ourreports);
1605 
1606 		/*
1607 		 * If we are in IGMPv3 host mode, do not allow the
1608 		 * other host's IGMPv1 report to suppress our reports
1609 		 * unless explicitly configured to do so.
1610 		 */
1611 		IGI_LOCK(igi);
1612 		if (igi->igi_version == IGMP_VERSION_3) {
1613 			if (igmp_legacysupp) {
1614 				igmp_v3_suppress_group_record(inm);
1615 			}
1616 			IGI_UNLOCK(igi);
1617 			INM_UNLOCK(inm);
1618 			INM_REMREF(inm);
1619 			return 0;
1620 		}
1621 
1622 		inm->inm_timer = 0;
1623 
1624 		switch (inm->inm_state) {
1625 		case IGMP_NOT_MEMBER:
1626 		case IGMP_SILENT_MEMBER:
1627 		case IGMP_SLEEPING_MEMBER:
1628 			break;
1629 		case IGMP_REPORTING_MEMBER:
1630 		case IGMP_IDLE_MEMBER:
1631 		case IGMP_AWAKENING_MEMBER:
1632 			IGMP_INET_PRINTF(igmp->igmp_group,
1633 			    ("report suppressed for %s on ifp 0x%llx(%s)\n",
1634 			    _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(ifp),
1635 			    if_name(ifp)));
1636 			OS_FALLTHROUGH;
1637 		case IGMP_LAZY_MEMBER:
1638 			inm->inm_state = IGMP_LAZY_MEMBER;
1639 			break;
1640 		case IGMP_G_QUERY_PENDING_MEMBER:
1641 		case IGMP_SG_QUERY_PENDING_MEMBER:
1642 		case IGMP_LEAVING_MEMBER:
1643 			break;
1644 		}
1645 		IGI_UNLOCK(igi);
1646 		INM_UNLOCK(inm);
1647 		INM_REMREF(inm);
1648 	}
1649 
1650 	return 0;
1651 }
1652 
1653 void
igmp_input(struct mbuf * m,int off)1654 igmp_input(struct mbuf *m, int off)
1655 {
1656 	int iphlen;
1657 	struct ifnet *ifp;
1658 	struct igmp *igmp;
1659 	struct ip *ip;
1660 	int igmplen;
1661 	int minlen;
1662 	int queryver;
1663 
1664 	IGMP_PRINTF(("%s: called w/mbuf (0x%llx,%d)\n", __func__,
1665 	    (uint64_t)VM_KERNEL_ADDRPERM(m), off));
1666 
1667 	ifp = m->m_pkthdr.rcvif;
1668 
1669 	IGMPSTAT_INC(igps_rcv_total);
1670 	OIGMPSTAT_INC(igps_rcv_total);
1671 
1672 	/* Expect 32-bit aligned data pointer on strict-align platforms */
1673 	MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
1674 
1675 	ip = mtod(m, struct ip *);
1676 	iphlen = off;
1677 
1678 	/* By now, ip_len no longer contains the length of IP header */
1679 	igmplen = ip->ip_len;
1680 
1681 	/*
1682 	 * Validate lengths.
1683 	 */
1684 	if (igmplen < IGMP_MINLEN) {
1685 		IGMPSTAT_INC(igps_rcv_tooshort);
1686 		OIGMPSTAT_INC(igps_rcv_tooshort);
1687 		m_freem(m);
1688 		return;
1689 	}
1690 
1691 	/*
1692 	 * Always pullup to the minimum size for v1/v2 or v3
1693 	 * to amortize calls to m_pulldown().
1694 	 */
1695 	if (igmplen >= IGMP_V3_QUERY_MINLEN) {
1696 		minlen = IGMP_V3_QUERY_MINLEN;
1697 	} else {
1698 		minlen = IGMP_MINLEN;
1699 	}
1700 
1701 	/* A bit more expensive than M_STRUCT_GET, but ensures alignment */
1702 	M_STRUCT_GET0(igmp, struct igmp *, m, off, minlen);
1703 	if (igmp == NULL) {
1704 		IGMPSTAT_INC(igps_rcv_tooshort);
1705 		OIGMPSTAT_INC(igps_rcv_tooshort);
1706 		return;
1707 	}
1708 	/* N.B.: we assume the packet was correctly aligned in ip_input. */
1709 
1710 	/*
1711 	 * Validate checksum.
1712 	 */
1713 	m->m_data += iphlen;
1714 	m->m_len -= iphlen;
1715 	if (in_cksum(m, igmplen)) {
1716 		IGMPSTAT_INC(igps_rcv_badsum);
1717 		OIGMPSTAT_INC(igps_rcv_badsum);
1718 		m_freem(m);
1719 		return;
1720 	}
1721 	m->m_data -= iphlen;
1722 	m->m_len += iphlen;
1723 
1724 	/*
1725 	 * IGMP control traffic is link-scope, and must have a TTL of 1.
1726 	 * DVMRP traffic (e.g. mrinfo, mtrace) is an exception;
1727 	 * probe packets may come from beyond the LAN.
1728 	 */
1729 	if (igmp->igmp_type != IGMP_DVMRP && ip->ip_ttl != 1) {
1730 		IGMPSTAT_INC(igps_rcv_badttl);
1731 		m_freem(m);
1732 		return;
1733 	}
1734 
1735 	switch (igmp->igmp_type) {
1736 	case IGMP_HOST_MEMBERSHIP_QUERY:
1737 		if (igmplen == IGMP_MINLEN) {
1738 			if (igmp->igmp_code == 0) {
1739 				queryver = IGMP_VERSION_1;
1740 			} else {
1741 				queryver = IGMP_VERSION_2;
1742 			}
1743 		} else if (igmplen >= IGMP_V3_QUERY_MINLEN) {
1744 			queryver = IGMP_VERSION_3;
1745 		} else {
1746 			IGMPSTAT_INC(igps_rcv_tooshort);
1747 			OIGMPSTAT_INC(igps_rcv_tooshort);
1748 			m_freem(m);
1749 			return;
1750 		}
1751 
1752 		OIGMPSTAT_INC(igps_rcv_queries);
1753 
1754 		switch (queryver) {
1755 		case IGMP_VERSION_1:
1756 			IGMPSTAT_INC(igps_rcv_v1v2_queries);
1757 			if (!igmp_v1enable) {
1758 				break;
1759 			}
1760 			if (igmp_input_v1_query(ifp, ip, igmp) != 0) {
1761 				m_freem(m);
1762 				return;
1763 			}
1764 			break;
1765 
1766 		case IGMP_VERSION_2:
1767 			IGMPSTAT_INC(igps_rcv_v1v2_queries);
1768 			if (!igmp_v2enable) {
1769 				break;
1770 			}
1771 			if (igmp_input_v2_query(ifp, ip, igmp) != 0) {
1772 				m_freem(m);
1773 				return;
1774 			}
1775 			break;
1776 
1777 		case IGMP_VERSION_3: {
1778 			struct igmpv3 *igmpv3;
1779 			uint16_t igmpv3len;
1780 			uint16_t srclen;
1781 			int nsrc;
1782 
1783 			IGMPSTAT_INC(igps_rcv_v3_queries);
1784 			igmpv3 = (struct igmpv3 *)igmp;
1785 			/*
1786 			 * Validate length based on source count.
1787 			 */
1788 			nsrc = ntohs(igmpv3->igmp_numsrc);
1789 			/*
1790 			 * The max vaue of nsrc is limited by the
1791 			 * MTU of the network on which the datagram
1792 			 * is received
1793 			 */
1794 			if (nsrc < 0 || nsrc > IGMP_V3_QUERY_MAX_SRCS) {
1795 				IGMPSTAT_INC(igps_rcv_tooshort);
1796 				OIGMPSTAT_INC(igps_rcv_tooshort);
1797 				m_freem(m);
1798 				return;
1799 			}
1800 			srclen = sizeof(struct in_addr) * (uint16_t)nsrc;
1801 			if (igmplen < (IGMP_V3_QUERY_MINLEN + srclen)) {
1802 				IGMPSTAT_INC(igps_rcv_tooshort);
1803 				OIGMPSTAT_INC(igps_rcv_tooshort);
1804 				m_freem(m);
1805 				return;
1806 			}
1807 			igmpv3len = IGMP_V3_QUERY_MINLEN + srclen;
1808 			/*
1809 			 * A bit more expensive than M_STRUCT_GET,
1810 			 * but ensures alignment.
1811 			 */
1812 			M_STRUCT_GET0(igmpv3, struct igmpv3 *, m,
1813 			    off, igmpv3len);
1814 			if (igmpv3 == NULL) {
1815 				IGMPSTAT_INC(igps_rcv_tooshort);
1816 				OIGMPSTAT_INC(igps_rcv_tooshort);
1817 				return;
1818 			}
1819 			/*
1820 			 * N.B.: we assume the packet was correctly
1821 			 * aligned in ip_input.
1822 			 */
1823 			if (igmp_input_v3_query(ifp, ip, igmpv3) != 0) {
1824 				m_freem(m);
1825 				return;
1826 			}
1827 		}
1828 		break;
1829 		}
1830 		break;
1831 
1832 	case IGMP_v1_HOST_MEMBERSHIP_REPORT:
1833 		if (!igmp_v1enable) {
1834 			break;
1835 		}
1836 		if (igmp_input_v1_report(ifp, m, ip, igmp) != 0) {
1837 			m_freem(m);
1838 			return;
1839 		}
1840 		break;
1841 
1842 	case IGMP_v2_HOST_MEMBERSHIP_REPORT:
1843 		if (!igmp_v2enable) {
1844 			break;
1845 		}
1846 		if (!ip_checkrouteralert(m)) {
1847 			IGMPSTAT_INC(igps_rcv_nora);
1848 		}
1849 		if (igmp_input_v2_report(ifp, m, ip, igmp) != 0) {
1850 			m_freem(m);
1851 			return;
1852 		}
1853 		break;
1854 
1855 	case IGMP_v3_HOST_MEMBERSHIP_REPORT:
1856 		/*
1857 		 * Hosts do not need to process IGMPv3 membership reports,
1858 		 * as report suppression is no longer required.
1859 		 */
1860 		if (!ip_checkrouteralert(m)) {
1861 			IGMPSTAT_INC(igps_rcv_nora);
1862 		}
1863 		break;
1864 
1865 	default:
1866 		break;
1867 	}
1868 
1869 	IGMP_LOCK_ASSERT_NOTHELD();
1870 	/*
1871 	 * Pass all valid IGMP packets up to any process(es) listening on a
1872 	 * raw IGMP socket.
1873 	 */
1874 	rip_input(m, off);
1875 }
1876 
1877 /*
1878  * Schedule IGMP timer based on various parameters; caller must ensure that
1879  * lock ordering is maintained as this routine acquires IGMP global lock.
1880  */
1881 void
igmp_set_timeout(struct igmp_tparams * itp)1882 igmp_set_timeout(struct igmp_tparams *itp)
1883 {
1884 	IGMP_LOCK_ASSERT_NOTHELD();
1885 	VERIFY(itp != NULL);
1886 
1887 	if (itp->qpt != 0 || itp->it != 0 || itp->cst != 0 || itp->sct != 0) {
1888 		IGMP_LOCK();
1889 		if (itp->qpt != 0) {
1890 			querier_present_timers_running = 1;
1891 		}
1892 		if (itp->it != 0) {
1893 			interface_timers_running = 1;
1894 		}
1895 		if (itp->cst != 0) {
1896 			current_state_timers_running = 1;
1897 		}
1898 		if (itp->sct != 0) {
1899 			state_change_timers_running = 1;
1900 		}
1901 		igmp_sched_timeout(itp->fast);
1902 		IGMP_UNLOCK();
1903 	}
1904 }
1905 
1906 void
igmp_set_fast_timeout(struct igmp_tparams * itp)1907 igmp_set_fast_timeout(struct igmp_tparams *itp)
1908 {
1909 	VERIFY(itp != NULL);
1910 	itp->fast = true;
1911 	igmp_set_timeout(itp);
1912 }
1913 
1914 /*
1915  * IGMP timer handler (per 1 second).
1916  */
1917 static void
igmp_timeout(void * arg)1918 igmp_timeout(void *arg)
1919 {
1920 	struct ifqueue           scq;   /* State-change packets */
1921 	struct ifqueue           qrq;   /* Query response packets */
1922 	struct ifnet            *ifp;
1923 	struct igmp_ifinfo      *igi;
1924 	struct in_multi         *inm;
1925 	unsigned int             loop = 0, uri_sec = 0;
1926 	SLIST_HEAD(, in_multi)  inm_dthead;
1927 	bool                     fast = arg != NULL;
1928 
1929 	SLIST_INIT(&inm_dthead);
1930 
1931 	/*
1932 	 * Update coarse-grained networking timestamp (in sec.); the idea
1933 	 * is to piggy-back on the timeout callout to update the counter
1934 	 * returnable via net_uptime().
1935 	 */
1936 	net_update_uptime();
1937 
1938 	IGMP_LOCK();
1939 
1940 	IGMP_PRINTF(("%s: qpt %d, it %d, cst %d, sct %d, fast %d\n", __func__,
1941 	    querier_present_timers_running, interface_timers_running,
1942 	    current_state_timers_running, state_change_timers_running,
1943 	    fast));
1944 
1945 	if (fast) {
1946 		/*
1947 		 * When running the fast timer, skip processing
1948 		 * of "querier present" timers since they are
1949 		 * based on 1-second intervals.
1950 		 */
1951 		goto skip_query_timers;
1952 	}
1953 	/*
1954 	 * IGMPv1/v2 querier present timer processing.
1955 	 */
1956 	if (querier_present_timers_running) {
1957 		querier_present_timers_running = 0;
1958 		LIST_FOREACH(igi, &igi_head, igi_link) {
1959 			IGI_LOCK(igi);
1960 			igmp_v1v2_process_querier_timers(igi);
1961 			if (igi->igi_v1_timer > 0 || igi->igi_v2_timer > 0) {
1962 				querier_present_timers_running = 1;
1963 			}
1964 			IGI_UNLOCK(igi);
1965 		}
1966 	}
1967 
1968 	/*
1969 	 * IGMPv3 General Query response timer processing.
1970 	 */
1971 	if (interface_timers_running) {
1972 		IGMP_PRINTF(("%s: interface timers running\n", __func__));
1973 		interface_timers_running = 0;
1974 		LIST_FOREACH(igi, &igi_head, igi_link) {
1975 			IGI_LOCK(igi);
1976 			if (igi->igi_version != IGMP_VERSION_3) {
1977 				IGI_UNLOCK(igi);
1978 				continue;
1979 			}
1980 			if (igi->igi_v3_timer == 0) {
1981 				/* Do nothing. */
1982 			} else if (--igi->igi_v3_timer == 0) {
1983 				if (igmp_v3_dispatch_general_query(igi) > 0) {
1984 					interface_timers_running = 1;
1985 				}
1986 			} else {
1987 				interface_timers_running = 1;
1988 			}
1989 			IGI_UNLOCK(igi);
1990 		}
1991 	}
1992 
1993 skip_query_timers:
1994 	if (!current_state_timers_running &&
1995 	    !state_change_timers_running) {
1996 		goto out_locked;
1997 	}
1998 
1999 	current_state_timers_running = 0;
2000 	state_change_timers_running = 0;
2001 
2002 	memset(&qrq, 0, sizeof(struct ifqueue));
2003 	qrq.ifq_maxlen = IGMP_MAX_G_GS_PACKETS;
2004 
2005 	memset(&scq, 0, sizeof(struct ifqueue));
2006 	scq.ifq_maxlen =  IGMP_MAX_STATE_CHANGE_PACKETS;
2007 
2008 	IGMP_PRINTF(("%s: state change timers running\n", __func__));
2009 
2010 	/*
2011 	 * IGMPv1/v2/v3 host report and state-change timer processing.
2012 	 * Note: Processing a v3 group timer may remove a node.
2013 	 */
2014 	LIST_FOREACH(igi, &igi_head, igi_link) {
2015 		struct in_multistep step;
2016 
2017 		IGI_LOCK(igi);
2018 		ifp = igi->igi_ifp;
2019 		loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
2020 		uri_sec = IGMP_RANDOM_DELAY(igi->igi_uri);
2021 		IGI_UNLOCK(igi);
2022 
2023 		in_multihead_lock_shared();
2024 		IN_FIRST_MULTI(step, inm);
2025 		while (inm != NULL) {
2026 			INM_LOCK(inm);
2027 			if (inm->inm_ifp != ifp) {
2028 				goto next;
2029 			}
2030 
2031 			IGI_LOCK(igi);
2032 			switch (igi->igi_version) {
2033 			case IGMP_VERSION_1:
2034 			case IGMP_VERSION_2:
2035 				igmp_v1v2_process_group_timer(inm,
2036 				    igi->igi_version);
2037 				break;
2038 			case IGMP_VERSION_3:
2039 				igmp_v3_process_group_timers(igi, &qrq,
2040 				    &scq, inm, uri_sec);
2041 				break;
2042 			}
2043 			IGI_UNLOCK(igi);
2044 next:
2045 			INM_UNLOCK(inm);
2046 			IN_NEXT_MULTI(step, inm);
2047 		}
2048 		in_multihead_lock_done();
2049 
2050 		IGI_LOCK(igi);
2051 		if (igi->igi_version == IGMP_VERSION_1 ||
2052 		    igi->igi_version == IGMP_VERSION_2) {
2053 			igmp_dispatch_queue(igi, &igi->igi_v2q, 0, loop);
2054 		} else if (igi->igi_version == IGMP_VERSION_3) {
2055 			IGI_UNLOCK(igi);
2056 			igmp_dispatch_queue(NULL, &qrq, 0, loop);
2057 			igmp_dispatch_queue(NULL, &scq, 0, loop);
2058 			VERIFY(qrq.ifq_len == 0);
2059 			VERIFY(scq.ifq_len == 0);
2060 			IGI_LOCK(igi);
2061 		}
2062 		/*
2063 		 * In case there are still any pending membership reports
2064 		 * which didn't get drained at version change time.
2065 		 */
2066 		IF_DRAIN(&igi->igi_v2q);
2067 		/*
2068 		 * Release all deferred inm records, and drain any locally
2069 		 * enqueued packets; do it even if the current IGMP version
2070 		 * for the link is no longer IGMPv3, in order to handle the
2071 		 * version change case.
2072 		 */
2073 		igmp_flush_relq(igi, (struct igmp_inm_relhead *)&inm_dthead);
2074 		VERIFY(SLIST_EMPTY(&igi->igi_relinmhead));
2075 		IGI_UNLOCK(igi);
2076 
2077 		IF_DRAIN(&qrq);
2078 		IF_DRAIN(&scq);
2079 	}
2080 
2081 out_locked:
2082 	/* re-arm the timer if there's work to do */
2083 	igmp_timeout_run = 0;
2084 	igmp_sched_timeout(false);
2085 	IGMP_UNLOCK();
2086 
2087 	/* Now that we're dropped all locks, release detached records */
2088 	IGMP_REMOVE_DETACHED_INM(&inm_dthead);
2089 }
2090 
2091 static void
igmp_sched_timeout(bool fast)2092 igmp_sched_timeout(bool fast)
2093 {
2094 	IGMP_LOCK_ASSERT_HELD();
2095 
2096 	if (!igmp_timeout_run &&
2097 	    (querier_present_timers_running || current_state_timers_running ||
2098 	    interface_timers_running || state_change_timers_running)) {
2099 		igmp_timeout_run = 1;
2100 		int sched_hz = fast ? 0 : hz;
2101 		void *arg = fast ? (void *)igmp_sched_timeout : NULL;
2102 		timeout(igmp_timeout, arg, sched_hz);
2103 	}
2104 }
2105 
2106 /*
2107  * Free the in_multi reference(s) for this IGMP lifecycle.
2108  *
2109  * Caller must be holding igi_lock.
2110  */
2111 static void
igmp_flush_relq(struct igmp_ifinfo * igi,struct igmp_inm_relhead * inm_dthead)2112 igmp_flush_relq(struct igmp_ifinfo *igi, struct igmp_inm_relhead *inm_dthead)
2113 {
2114 	struct in_multi *inm;
2115 
2116 again:
2117 	IGI_LOCK_ASSERT_HELD(igi);
2118 	inm = SLIST_FIRST(&igi->igi_relinmhead);
2119 	if (inm != NULL) {
2120 		int lastref;
2121 
2122 		SLIST_REMOVE_HEAD(&igi->igi_relinmhead, inm_nrele);
2123 		IGI_UNLOCK(igi);
2124 
2125 		in_multihead_lock_exclusive();
2126 		INM_LOCK(inm);
2127 		VERIFY(inm->inm_nrelecnt != 0);
2128 		inm->inm_nrelecnt--;
2129 		lastref = in_multi_detach(inm);
2130 		VERIFY(!lastref || (!(inm->inm_debug & IFD_ATTACHED) &&
2131 		    inm->inm_reqcnt == 0));
2132 		INM_UNLOCK(inm);
2133 		in_multihead_lock_done();
2134 		/* from igi_relinmhead */
2135 		INM_REMREF(inm);
2136 		/* from in_multihead list */
2137 		if (lastref) {
2138 			/*
2139 			 * Defer releasing our final reference, as we
2140 			 * are holding the IGMP lock at this point, and
2141 			 * we could end up with locking issues later on
2142 			 * (while issuing SIOCDELMULTI) when this is the
2143 			 * final reference count.  Let the caller do it
2144 			 * when it is safe.
2145 			 */
2146 			IGMP_ADD_DETACHED_INM(inm_dthead, inm);
2147 		}
2148 		IGI_LOCK(igi);
2149 		goto again;
2150 	}
2151 }
2152 
2153 /*
2154  * Update host report group timer for IGMPv1/v2.
2155  * Will update the global pending timer flags.
2156  */
2157 static void
igmp_v1v2_process_group_timer(struct in_multi * inm,const int igmp_version)2158 igmp_v1v2_process_group_timer(struct in_multi *inm, const int igmp_version)
2159 {
2160 	int report_timer_expired;
2161 
2162 	IGMP_LOCK_ASSERT_HELD();
2163 	INM_LOCK_ASSERT_HELD(inm);
2164 	IGI_LOCK_ASSERT_HELD(inm->inm_igi);
2165 
2166 	if (inm->inm_timer == 0) {
2167 		report_timer_expired = 0;
2168 	} else if (--inm->inm_timer == 0) {
2169 		report_timer_expired = 1;
2170 	} else {
2171 		current_state_timers_running = 1;
2172 		/* caller will schedule timer */
2173 		return;
2174 	}
2175 
2176 	switch (inm->inm_state) {
2177 	case IGMP_NOT_MEMBER:
2178 	case IGMP_SILENT_MEMBER:
2179 	case IGMP_IDLE_MEMBER:
2180 	case IGMP_LAZY_MEMBER:
2181 	case IGMP_SLEEPING_MEMBER:
2182 	case IGMP_AWAKENING_MEMBER:
2183 		break;
2184 	case IGMP_REPORTING_MEMBER:
2185 		if (report_timer_expired) {
2186 			inm->inm_state = IGMP_IDLE_MEMBER;
2187 			(void) igmp_v1v2_queue_report(inm,
2188 			    (igmp_version == IGMP_VERSION_2) ?
2189 			    IGMP_v2_HOST_MEMBERSHIP_REPORT :
2190 			    IGMP_v1_HOST_MEMBERSHIP_REPORT);
2191 			INM_LOCK_ASSERT_HELD(inm);
2192 			IGI_LOCK_ASSERT_HELD(inm->inm_igi);
2193 		}
2194 		break;
2195 	case IGMP_G_QUERY_PENDING_MEMBER:
2196 	case IGMP_SG_QUERY_PENDING_MEMBER:
2197 	case IGMP_LEAVING_MEMBER:
2198 		break;
2199 	}
2200 }
2201 
2202 /*
2203  * Update a group's timers for IGMPv3.
2204  * Will update the global pending timer flags.
2205  * Note: Unlocked read from igi.
2206  */
2207 static void
igmp_v3_process_group_timers(struct igmp_ifinfo * igi,struct ifqueue * qrq,struct ifqueue * scq,struct in_multi * inm,const unsigned int uri_sec)2208 igmp_v3_process_group_timers(struct igmp_ifinfo *igi,
2209     struct ifqueue *qrq, struct ifqueue *scq,
2210     struct in_multi *inm, const unsigned int uri_sec)
2211 {
2212 	int query_response_timer_expired;
2213 	int state_change_retransmit_timer_expired;
2214 
2215 	IGMP_LOCK_ASSERT_HELD();
2216 	INM_LOCK_ASSERT_HELD(inm);
2217 	IGI_LOCK_ASSERT_HELD(igi);
2218 	VERIFY(igi == inm->inm_igi);
2219 
2220 	query_response_timer_expired = 0;
2221 	state_change_retransmit_timer_expired = 0;
2222 
2223 	/*
2224 	 * During a transition from v1/v2 compatibility mode back to v3,
2225 	 * a group record in REPORTING state may still have its group
2226 	 * timer active. This is a no-op in this function; it is easier
2227 	 * to deal with it here than to complicate the timeout path.
2228 	 */
2229 	if (inm->inm_timer == 0) {
2230 		query_response_timer_expired = 0;
2231 	} else if (--inm->inm_timer == 0) {
2232 		query_response_timer_expired = 1;
2233 	} else {
2234 		current_state_timers_running = 1;
2235 		/* caller will schedule timer */
2236 	}
2237 
2238 	if (inm->inm_sctimer == 0) {
2239 		state_change_retransmit_timer_expired = 0;
2240 	} else if (--inm->inm_sctimer == 0) {
2241 		state_change_retransmit_timer_expired = 1;
2242 	} else {
2243 		state_change_timers_running = 1;
2244 		/* caller will schedule timer */
2245 	}
2246 
2247 	/* We are in timer callback, so be quick about it. */
2248 	if (!state_change_retransmit_timer_expired &&
2249 	    !query_response_timer_expired) {
2250 		return;
2251 	}
2252 
2253 	switch (inm->inm_state) {
2254 	case IGMP_NOT_MEMBER:
2255 	case IGMP_SILENT_MEMBER:
2256 	case IGMP_SLEEPING_MEMBER:
2257 	case IGMP_LAZY_MEMBER:
2258 	case IGMP_AWAKENING_MEMBER:
2259 	case IGMP_IDLE_MEMBER:
2260 		break;
2261 	case IGMP_G_QUERY_PENDING_MEMBER:
2262 	case IGMP_SG_QUERY_PENDING_MEMBER:
2263 		/*
2264 		 * Respond to a previously pending Group-Specific
2265 		 * or Group-and-Source-Specific query by enqueueing
2266 		 * the appropriate Current-State report for
2267 		 * immediate transmission.
2268 		 */
2269 		if (query_response_timer_expired) {
2270 			int retval;
2271 
2272 			retval = igmp_v3_enqueue_group_record(qrq, inm, 0, 1,
2273 			    (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER));
2274 			IGMP_PRINTF(("%s: enqueue record = %d\n",
2275 			    __func__, retval));
2276 			inm->inm_state = IGMP_REPORTING_MEMBER;
2277 			/* XXX Clear recorded sources for next time. */
2278 			inm_clear_recorded(inm);
2279 		}
2280 		OS_FALLTHROUGH;
2281 	case IGMP_REPORTING_MEMBER:
2282 	case IGMP_LEAVING_MEMBER:
2283 		if (state_change_retransmit_timer_expired) {
2284 			/*
2285 			 * State-change retransmission timer fired.
2286 			 * If there are any further pending retransmissions,
2287 			 * set the global pending state-change flag, and
2288 			 * reset the timer.
2289 			 */
2290 			if (--inm->inm_scrv > 0) {
2291 				inm->inm_sctimer = (uint16_t)uri_sec;
2292 				state_change_timers_running = 1;
2293 				/* caller will schedule timer */
2294 			}
2295 			/*
2296 			 * Retransmit the previously computed state-change
2297 			 * report. If there are no further pending
2298 			 * retransmissions, the mbuf queue will be consumed.
2299 			 * Update T0 state to T1 as we have now sent
2300 			 * a state-change.
2301 			 */
2302 			(void) igmp_v3_merge_state_changes(inm, scq);
2303 
2304 			inm_commit(inm);
2305 			IGMP_INET_PRINTF(inm->inm_addr,
2306 			    ("%s: T1 -> T0 for %s/%s\n", __func__,
2307 			    _igmp_inet_buf, if_name(inm->inm_ifp)));
2308 
2309 			/*
2310 			 * If we are leaving the group for good, make sure
2311 			 * we release IGMP's reference to it.
2312 			 * This release must be deferred using a SLIST,
2313 			 * as we are called from a loop which traverses
2314 			 * the in_multihead list.
2315 			 */
2316 			if (inm->inm_state == IGMP_LEAVING_MEMBER &&
2317 			    inm->inm_scrv == 0) {
2318 				inm->inm_state = IGMP_NOT_MEMBER;
2319 				/*
2320 				 * A reference has already been held in
2321 				 * igmp_final_leave() for this inm, so
2322 				 * no need to hold another one.  We also
2323 				 * bumped up its request count then, so
2324 				 * that it stays in in_multihead.  Both
2325 				 * of them will be released when it is
2326 				 * dequeued later on.
2327 				 */
2328 				VERIFY(inm->inm_nrelecnt != 0);
2329 				SLIST_INSERT_HEAD(&igi->igi_relinmhead,
2330 				    inm, inm_nrele);
2331 			}
2332 		}
2333 		break;
2334 	}
2335 }
2336 
2337 /*
2338  * Suppress a group's pending response to a group or source/group query.
2339  *
2340  * Do NOT suppress state changes. This leads to IGMPv3 inconsistency.
2341  * Do NOT update ST1/ST0 as this operation merely suppresses
2342  * the currently pending group record.
2343  * Do NOT suppress the response to a general query. It is possible but
2344  * it would require adding another state or flag.
2345  */
2346 static void
igmp_v3_suppress_group_record(struct in_multi * inm)2347 igmp_v3_suppress_group_record(struct in_multi *inm)
2348 {
2349 	INM_LOCK_ASSERT_HELD(inm);
2350 	IGI_LOCK_ASSERT_HELD(inm->inm_igi);
2351 
2352 	VERIFY(inm->inm_igi->igi_version == IGMP_VERSION_3);
2353 
2354 	if (inm->inm_state != IGMP_G_QUERY_PENDING_MEMBER ||
2355 	    inm->inm_state != IGMP_SG_QUERY_PENDING_MEMBER) {
2356 		return;
2357 	}
2358 
2359 	if (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) {
2360 		inm_clear_recorded(inm);
2361 	}
2362 
2363 	inm->inm_timer = 0;
2364 	inm->inm_state = IGMP_REPORTING_MEMBER;
2365 }
2366 
2367 /*
2368  * Switch to a different IGMP version on the given interface,
2369  * as per Section 7.2.1.
2370  */
2371 static uint32_t
igmp_set_version(struct igmp_ifinfo * igi,const int igmp_version)2372 igmp_set_version(struct igmp_ifinfo *igi, const int igmp_version)
2373 {
2374 	int old_version_timer;
2375 
2376 	IGI_LOCK_ASSERT_HELD(igi);
2377 
2378 	IGMP_PRINTF(("%s: switching to v%d on ifp 0x%llx(%s)\n", __func__,
2379 	    igmp_version, (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2380 	    if_name(igi->igi_ifp)));
2381 
2382 	if (igmp_version == IGMP_VERSION_1 || igmp_version == IGMP_VERSION_2) {
2383 		/*
2384 		 * Compute the "Older Version Querier Present" timer as per
2385 		 * Section 8.12, in seconds.
2386 		 */
2387 		old_version_timer = igi->igi_rv * igi->igi_qi + igi->igi_qri;
2388 
2389 		if (igmp_version == IGMP_VERSION_1) {
2390 			igi->igi_v1_timer = old_version_timer;
2391 			igi->igi_v2_timer = 0;
2392 		} else if (igmp_version == IGMP_VERSION_2) {
2393 			igi->igi_v1_timer = 0;
2394 			igi->igi_v2_timer = old_version_timer;
2395 		}
2396 	}
2397 
2398 	if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) {
2399 		if (igi->igi_version != IGMP_VERSION_2) {
2400 			igmp_v3_cancel_link_timers(igi);
2401 			igi->igi_version = IGMP_VERSION_2;
2402 		}
2403 	} else if (igi->igi_v1_timer > 0) {
2404 		if (igi->igi_version != IGMP_VERSION_1) {
2405 			igmp_v3_cancel_link_timers(igi);
2406 			igi->igi_version = IGMP_VERSION_1;
2407 		}
2408 	}
2409 
2410 	IGI_LOCK_ASSERT_HELD(igi);
2411 
2412 	return MAX(igi->igi_v1_timer, igi->igi_v2_timer);
2413 }
2414 
2415 /*
2416  * Cancel pending IGMPv3 timers for the given link and all groups
2417  * joined on it; state-change, general-query, and group-query timers.
2418  *
2419  * Only ever called on a transition from v3 to Compatibility mode. Kill
2420  * the timers stone dead (this may be expensive for large N groups), they
2421  * will be restarted if Compatibility Mode deems that they must be due to
2422  * query processing.
2423  */
2424 static void
igmp_v3_cancel_link_timers(struct igmp_ifinfo * igi)2425 igmp_v3_cancel_link_timers(struct igmp_ifinfo *igi)
2426 {
2427 	struct ifnet            *ifp;
2428 	struct in_multi         *inm;
2429 	struct in_multistep     step;
2430 
2431 	IGI_LOCK_ASSERT_HELD(igi);
2432 
2433 	IGMP_PRINTF(("%s: cancel v3 timers on ifp 0x%llx(%s)\n", __func__,
2434 	    (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp), if_name(igi->igi_ifp)));
2435 
2436 	/*
2437 	 * Stop the v3 General Query Response on this link stone dead.
2438 	 * If timer is woken up due to interface_timers_running,
2439 	 * the flag will be cleared if there are no pending link timers.
2440 	 */
2441 	igi->igi_v3_timer = 0;
2442 
2443 	/*
2444 	 * Now clear the current-state and state-change report timers
2445 	 * for all memberships scoped to this link.
2446 	 */
2447 	ifp = igi->igi_ifp;
2448 	IGI_UNLOCK(igi);
2449 
2450 	in_multihead_lock_shared();
2451 	IN_FIRST_MULTI(step, inm);
2452 	while (inm != NULL) {
2453 		INM_LOCK(inm);
2454 		if (inm->inm_ifp != ifp && inm->inm_igi != igi) {
2455 			goto next;
2456 		}
2457 
2458 		switch (inm->inm_state) {
2459 		case IGMP_NOT_MEMBER:
2460 		case IGMP_SILENT_MEMBER:
2461 		case IGMP_IDLE_MEMBER:
2462 		case IGMP_LAZY_MEMBER:
2463 		case IGMP_SLEEPING_MEMBER:
2464 		case IGMP_AWAKENING_MEMBER:
2465 			/*
2466 			 * These states are either not relevant in v3 mode,
2467 			 * or are unreported. Do nothing.
2468 			 */
2469 			break;
2470 		case IGMP_LEAVING_MEMBER:
2471 			/*
2472 			 * If we are leaving the group and switching to
2473 			 * compatibility mode, we need to release the final
2474 			 * reference held for issuing the INCLUDE {}, and
2475 			 * transition to REPORTING to ensure the host leave
2476 			 * message is sent upstream to the old querier --
2477 			 * transition to NOT would lose the leave and race.
2478 			 * During igmp_final_leave(), we bumped up both the
2479 			 * request and reference counts.  Since we cannot
2480 			 * call in_multi_detach() here, defer this task to
2481 			 * the timer routine.
2482 			 */
2483 			VERIFY(inm->inm_nrelecnt != 0);
2484 			IGI_LOCK(igi);
2485 			SLIST_INSERT_HEAD(&igi->igi_relinmhead, inm, inm_nrele);
2486 			IGI_UNLOCK(igi);
2487 			OS_FALLTHROUGH;
2488 		case IGMP_G_QUERY_PENDING_MEMBER:
2489 		case IGMP_SG_QUERY_PENDING_MEMBER:
2490 			inm_clear_recorded(inm);
2491 			OS_FALLTHROUGH;
2492 		case IGMP_REPORTING_MEMBER:
2493 			inm->inm_state = IGMP_REPORTING_MEMBER;
2494 			break;
2495 		}
2496 		/*
2497 		 * Always clear state-change and group report timers.
2498 		 * Free any pending IGMPv3 state-change records.
2499 		 */
2500 		inm->inm_sctimer = 0;
2501 		inm->inm_timer = 0;
2502 		IF_DRAIN(&inm->inm_scq);
2503 next:
2504 		INM_UNLOCK(inm);
2505 		IN_NEXT_MULTI(step, inm);
2506 	}
2507 	in_multihead_lock_done();
2508 
2509 	IGI_LOCK(igi);
2510 }
2511 
2512 /*
2513  * Update the Older Version Querier Present timers for a link.
2514  * See Section 7.2.1 of RFC 3376.
2515  */
2516 static void
igmp_v1v2_process_querier_timers(struct igmp_ifinfo * igi)2517 igmp_v1v2_process_querier_timers(struct igmp_ifinfo *igi)
2518 {
2519 	IGI_LOCK_ASSERT_HELD(igi);
2520 
2521 	if (igi->igi_v1_timer == 0 && igi->igi_v2_timer == 0) {
2522 		/*
2523 		 * IGMPv1 and IGMPv2 Querier Present timers expired.
2524 		 *
2525 		 * Revert to IGMPv3.
2526 		 */
2527 		if (igi->igi_version != IGMP_VERSION_3) {
2528 			IGMP_PRINTF(("%s: transition from v%d -> v%d "
2529 			    "on 0x%llx(%s)\n", __func__,
2530 			    igi->igi_version, IGMP_VERSION_3,
2531 			    (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2532 			    if_name(igi->igi_ifp)));
2533 			igi->igi_version = IGMP_VERSION_3;
2534 			IF_DRAIN(&igi->igi_v2q);
2535 		}
2536 	} else if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) {
2537 		/*
2538 		 * IGMPv1 Querier Present timer expired,
2539 		 * IGMPv2 Querier Present timer running.
2540 		 * If IGMPv2 was disabled since last timeout,
2541 		 * revert to IGMPv3.
2542 		 * If IGMPv2 is enabled, revert to IGMPv2.
2543 		 */
2544 		if (!igmp_v2enable) {
2545 			IGMP_PRINTF(("%s: transition from v%d -> v%d "
2546 			    "on 0x%llx(%s%d)\n", __func__,
2547 			    igi->igi_version, IGMP_VERSION_3,
2548 			    (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2549 			    igi->igi_ifp->if_name, igi->igi_ifp->if_unit));
2550 			igi->igi_v2_timer = 0;
2551 			igi->igi_version = IGMP_VERSION_3;
2552 			IF_DRAIN(&igi->igi_v2q);
2553 		} else {
2554 			--igi->igi_v2_timer;
2555 			if (igi->igi_version != IGMP_VERSION_2) {
2556 				IGMP_PRINTF(("%s: transition from v%d -> v%d "
2557 				    "on 0x%llx(%s)\n", __func__,
2558 				    igi->igi_version, IGMP_VERSION_2,
2559 				    (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2560 				    if_name(igi->igi_ifp)));
2561 				IF_DRAIN(&igi->igi_gq);
2562 				igmp_v3_cancel_link_timers(igi);
2563 				igi->igi_version = IGMP_VERSION_2;
2564 			}
2565 		}
2566 	} else if (igi->igi_v1_timer > 0) {
2567 		/*
2568 		 * IGMPv1 Querier Present timer running.
2569 		 * Stop IGMPv2 timer if running.
2570 		 *
2571 		 * If IGMPv1 was disabled since last timeout,
2572 		 * revert to IGMPv3.
2573 		 * If IGMPv1 is enabled, reset IGMPv2 timer if running.
2574 		 */
2575 		if (!igmp_v1enable) {
2576 			IGMP_PRINTF(("%s: transition from v%d -> v%d "
2577 			    "on 0x%llx(%s%d)\n", __func__,
2578 			    igi->igi_version, IGMP_VERSION_3,
2579 			    (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2580 			    igi->igi_ifp->if_name, igi->igi_ifp->if_unit));
2581 			igi->igi_v1_timer = 0;
2582 			igi->igi_version = IGMP_VERSION_3;
2583 			IF_DRAIN(&igi->igi_v2q);
2584 		} else {
2585 			--igi->igi_v1_timer;
2586 		}
2587 		if (igi->igi_v2_timer > 0) {
2588 			IGMP_PRINTF(("%s: cancel v2 timer on 0x%llx(%s%d)\n",
2589 			    __func__,
2590 			    (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2591 			    igi->igi_ifp->if_name, igi->igi_ifp->if_unit));
2592 			igi->igi_v2_timer = 0;
2593 		}
2594 	}
2595 }
2596 
2597 /*
2598  * Dispatch an IGMPv1/v2 host report or leave message.
2599  * These are always small enough to fit inside a single mbuf.
2600  */
2601 static int
igmp_v1v2_queue_report(struct in_multi * inm,const int type)2602 igmp_v1v2_queue_report(struct in_multi *inm, const int type)
2603 {
2604 	struct ifnet            *ifp;
2605 	struct igmp             *igmp;
2606 	struct ip               *ip;
2607 	struct mbuf             *m;
2608 	int                     error = 0;
2609 
2610 	INM_LOCK_ASSERT_HELD(inm);
2611 	IGI_LOCK_ASSERT_HELD(inm->inm_igi);
2612 
2613 	ifp = inm->inm_ifp;
2614 
2615 	MGETHDR(m, M_DONTWAIT, MT_DATA);
2616 	if (m == NULL) {
2617 		return ENOMEM;
2618 	}
2619 	MH_ALIGN(m, sizeof(struct ip) + sizeof(struct igmp));
2620 
2621 	m->m_pkthdr.len = sizeof(struct ip) + sizeof(struct igmp);
2622 
2623 	m->m_data += sizeof(struct ip);
2624 	m->m_len = sizeof(struct igmp);
2625 
2626 	igmp = mtod(m, struct igmp *);
2627 	igmp->igmp_type = (u_char)type;
2628 	igmp->igmp_code = 0;
2629 	igmp->igmp_group = inm->inm_addr;
2630 	igmp->igmp_cksum = 0;
2631 	igmp->igmp_cksum = in_cksum(m, sizeof(struct igmp));
2632 
2633 	m->m_data -= sizeof(struct ip);
2634 	m->m_len += sizeof(struct ip);
2635 
2636 	ip = mtod(m, struct ip *);
2637 	ip->ip_tos = 0;
2638 	ip->ip_len = sizeof(struct ip) + sizeof(struct igmp);
2639 	ip->ip_off = 0;
2640 	ip->ip_p = IPPROTO_IGMP;
2641 	ip->ip_src.s_addr = INADDR_ANY;
2642 
2643 	if (type == IGMP_HOST_LEAVE_MESSAGE) {
2644 		ip->ip_dst.s_addr = htonl(INADDR_ALLRTRS_GROUP);
2645 	} else {
2646 		ip->ip_dst = inm->inm_addr;
2647 	}
2648 
2649 	igmp_save_context(m, ifp);
2650 
2651 	m->m_flags |= M_IGMPV2;
2652 	if (inm->inm_igi->igi_flags & IGIF_LOOPBACK) {
2653 		m->m_flags |= M_IGMP_LOOP;
2654 	}
2655 
2656 	/*
2657 	 * Due to the fact that at this point we are possibly holding
2658 	 * in_multihead_lock in shared or exclusive mode, we can't call
2659 	 * igmp_sendpkt() here since that will eventually call ip_output(),
2660 	 * which will try to lock in_multihead_lock and cause a deadlock.
2661 	 * Instead we defer the work to the igmp_timeout() thread, thus
2662 	 * avoiding unlocking in_multihead_lock here.
2663 	 */
2664 	if (IF_QFULL(&inm->inm_igi->igi_v2q)) {
2665 		IGMP_PRINTF(("%s: v1/v2 outbound queue full\n", __func__));
2666 		error = ENOMEM;
2667 		m_freem(m);
2668 	} else {
2669 		IF_ENQUEUE(&inm->inm_igi->igi_v2q, m);
2670 		VERIFY(error == 0);
2671 	}
2672 	return error;
2673 }
2674 
2675 /*
2676  * Process a state change from the upper layer for the given IPv4 group.
2677  *
2678  * Each socket holds a reference on the in_multi in its own ip_moptions.
2679  * The socket layer will have made the necessary updates to the group
2680  * state, it is now up to IGMP to issue a state change report if there
2681  * has been any change between T0 (when the last state-change was issued)
2682  * and T1 (now).
2683  *
2684  * We use the IGMPv3 state machine at group level. The IGMP module
2685  * however makes the decision as to which IGMP protocol version to speak.
2686  * A state change *from* INCLUDE {} always means an initial join.
2687  * A state change *to* INCLUDE {} always means a final leave.
2688  *
2689  * FUTURE: If IGIF_V3LITE is enabled for this interface, then we can
2690  * save ourselves a bunch of work; any exclusive mode groups need not
2691  * compute source filter lists.
2692  */
2693 int
igmp_change_state(struct in_multi * inm,struct igmp_tparams * itp)2694 igmp_change_state(struct in_multi *inm, struct igmp_tparams *itp)
2695 {
2696 	struct igmp_ifinfo *igi;
2697 	struct ifnet *ifp;
2698 	int error = 0;
2699 
2700 	VERIFY(itp != NULL);
2701 	bzero(itp, sizeof(*itp));
2702 
2703 	INM_LOCK_ASSERT_HELD(inm);
2704 	VERIFY(inm->inm_igi != NULL);
2705 	IGI_LOCK_ASSERT_NOTHELD(inm->inm_igi);
2706 
2707 	/*
2708 	 * Try to detect if the upper layer just asked us to change state
2709 	 * for an interface which has now gone away.
2710 	 */
2711 	VERIFY(inm->inm_ifma != NULL);
2712 	ifp = inm->inm_ifma->ifma_ifp;
2713 	/*
2714 	 * Sanity check that netinet's notion of ifp is the same as net's.
2715 	 */
2716 	VERIFY(inm->inm_ifp == ifp);
2717 
2718 	igi = IGMP_IFINFO(ifp);
2719 	VERIFY(igi != NULL);
2720 
2721 	/*
2722 	 * If we detect a state transition to or from MCAST_UNDEFINED
2723 	 * for this group, then we are starting or finishing an IGMP
2724 	 * life cycle for this group.
2725 	 */
2726 	if (inm->inm_st[1].iss_fmode != inm->inm_st[0].iss_fmode) {
2727 		IGMP_PRINTF(("%s: inm transition %d -> %d\n", __func__,
2728 		    inm->inm_st[0].iss_fmode, inm->inm_st[1].iss_fmode));
2729 		if (inm->inm_st[0].iss_fmode == MCAST_UNDEFINED) {
2730 			IGMP_PRINTF(("%s: initial join\n", __func__));
2731 			error = igmp_initial_join(inm, igi, itp);
2732 			goto out;
2733 		} else if (inm->inm_st[1].iss_fmode == MCAST_UNDEFINED) {
2734 			IGMP_PRINTF(("%s: final leave\n", __func__));
2735 			igmp_final_leave(inm, igi, itp);
2736 			goto out;
2737 		}
2738 	} else {
2739 		IGMP_PRINTF(("%s: filter set change\n", __func__));
2740 	}
2741 
2742 	error = igmp_handle_state_change(inm, igi, itp);
2743 out:
2744 	return error;
2745 }
2746 
2747 /*
2748  * Perform the initial join for an IGMP group.
2749  *
2750  * When joining a group:
2751  *  If the group should have its IGMP traffic suppressed, do nothing.
2752  *  IGMPv1 starts sending IGMPv1 host membership reports.
2753  *  IGMPv2 starts sending IGMPv2 host membership reports.
2754  *  IGMPv3 will schedule an IGMPv3 state-change report containing the
2755  *  initial state of the membership.
2756  */
2757 static int
igmp_initial_join(struct in_multi * inm,struct igmp_ifinfo * igi,struct igmp_tparams * itp)2758 igmp_initial_join(struct in_multi *inm, struct igmp_ifinfo *igi,
2759     struct igmp_tparams *itp)
2760 {
2761 	struct ifnet            *ifp;
2762 	struct ifqueue          *ifq;
2763 	int                      error, retval, syncstates;
2764 
2765 	INM_LOCK_ASSERT_HELD(inm);
2766 	IGI_LOCK_ASSERT_NOTHELD(igi);
2767 	VERIFY(itp != NULL);
2768 
2769 	IGMP_INET_PRINTF(inm->inm_addr,
2770 	    ("%s: initial join %s on ifp 0x%llx(%s)\n", __func__,
2771 	    _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(inm->inm_ifp),
2772 	    if_name(inm->inm_ifp)));
2773 
2774 	error = 0;
2775 	syncstates = 1;
2776 
2777 	ifp = inm->inm_ifp;
2778 
2779 	IGI_LOCK(igi);
2780 	VERIFY(igi->igi_ifp == ifp);
2781 
2782 	/*
2783 	 * Groups joined on loopback or marked as 'not reported',
2784 	 * e.g. 224.0.0.1, enter the IGMP_SILENT_MEMBER state and
2785 	 * are never reported in any IGMP protocol exchanges.
2786 	 * All other groups enter the appropriate IGMP state machine
2787 	 * for the version in use on this link.
2788 	 * A link marked as IGIF_SILENT causes IGMP to be completely
2789 	 * disabled for the link.
2790 	 */
2791 	if ((ifp->if_flags & IFF_LOOPBACK) ||
2792 	    (igi->igi_flags & IGIF_SILENT) ||
2793 	    !igmp_isgroupreported(inm->inm_addr)) {
2794 		IGMP_PRINTF(("%s: not kicking state machine for silent group\n",
2795 		    __func__));
2796 		inm->inm_state = IGMP_SILENT_MEMBER;
2797 		inm->inm_timer = 0;
2798 	} else {
2799 		/*
2800 		 * Deal with overlapping in_multi lifecycle.
2801 		 * If this group was LEAVING, then make sure
2802 		 * we drop the reference we picked up to keep the
2803 		 * group around for the final INCLUDE {} enqueue.
2804 		 * Since we cannot call in_multi_detach() here,
2805 		 * defer this task to the timer routine.
2806 		 */
2807 		if (igi->igi_version == IGMP_VERSION_3 &&
2808 		    inm->inm_state == IGMP_LEAVING_MEMBER) {
2809 			VERIFY(inm->inm_nrelecnt != 0);
2810 			SLIST_INSERT_HEAD(&igi->igi_relinmhead, inm, inm_nrele);
2811 		}
2812 
2813 		inm->inm_state = IGMP_REPORTING_MEMBER;
2814 
2815 		switch (igi->igi_version) {
2816 		case IGMP_VERSION_1:
2817 		case IGMP_VERSION_2:
2818 			inm->inm_state = IGMP_IDLE_MEMBER;
2819 			error = igmp_v1v2_queue_report(inm,
2820 			    (igi->igi_version == IGMP_VERSION_2) ?
2821 			    IGMP_v2_HOST_MEMBERSHIP_REPORT :
2822 			    IGMP_v1_HOST_MEMBERSHIP_REPORT);
2823 
2824 			INM_LOCK_ASSERT_HELD(inm);
2825 			IGI_LOCK_ASSERT_HELD(igi);
2826 
2827 			if (error == 0) {
2828 				inm->inm_timer =
2829 				    IGMP_RANDOM_DELAY(IGMP_V1V2_MAX_RI);
2830 				itp->cst = 1;
2831 			}
2832 			break;
2833 
2834 		case IGMP_VERSION_3:
2835 			/*
2836 			 * Defer update of T0 to T1, until the first copy
2837 			 * of the state change has been transmitted.
2838 			 */
2839 			syncstates = 0;
2840 
2841 			/*
2842 			 * Immediately enqueue a State-Change Report for
2843 			 * this interface, freeing any previous reports.
2844 			 * Don't kick the timers if there is nothing to do,
2845 			 * or if an error occurred.
2846 			 */
2847 			ifq = &inm->inm_scq;
2848 			IF_DRAIN(ifq);
2849 			retval = igmp_v3_enqueue_group_record(ifq, inm, 1,
2850 			    0, 0);
2851 			itp->cst = (ifq->ifq_len > 0);
2852 			IGMP_PRINTF(("%s: enqueue record = %d\n",
2853 			    __func__, retval));
2854 			if (retval <= 0) {
2855 				error = retval * -1;
2856 				break;
2857 			}
2858 
2859 			/*
2860 			 * Schedule transmission of pending state-change
2861 			 * report up to RV times for this link. The timer
2862 			 * will fire at the next igmp_timeout (1 second),
2863 			 * giving us an opportunity to merge the reports.
2864 			 */
2865 			if (igi->igi_flags & IGIF_LOOPBACK) {
2866 				inm->inm_scrv = 1;
2867 			} else {
2868 				VERIFY(igi->igi_rv > 1);
2869 				inm->inm_scrv = (uint16_t)igi->igi_rv;
2870 			}
2871 			inm->inm_sctimer = 1;
2872 			itp->sct = 1;
2873 
2874 			error = 0;
2875 			break;
2876 		}
2877 	}
2878 	IGI_UNLOCK(igi);
2879 
2880 	/*
2881 	 * Only update the T0 state if state change is atomic,
2882 	 * i.e. we don't need to wait for a timer to fire before we
2883 	 * can consider the state change to have been communicated.
2884 	 */
2885 	if (syncstates) {
2886 		inm_commit(inm);
2887 		IGMP_INET_PRINTF(inm->inm_addr,
2888 		    ("%s: T1 -> T0 for %s/%s\n", __func__,
2889 		    _igmp_inet_buf, if_name(inm->inm_ifp)));
2890 	}
2891 
2892 	return error;
2893 }
2894 
2895 /*
2896  * Issue an intermediate state change during the IGMP life-cycle.
2897  */
2898 static int
igmp_handle_state_change(struct in_multi * inm,struct igmp_ifinfo * igi,struct igmp_tparams * itp)2899 igmp_handle_state_change(struct in_multi *inm, struct igmp_ifinfo *igi,
2900     struct igmp_tparams *itp)
2901 {
2902 	struct ifnet            *ifp;
2903 	int                      retval = 0;
2904 
2905 	INM_LOCK_ASSERT_HELD(inm);
2906 	IGI_LOCK_ASSERT_NOTHELD(igi);
2907 	VERIFY(itp != NULL);
2908 
2909 	IGMP_INET_PRINTF(inm->inm_addr,
2910 	    ("%s: state change for %s on ifp 0x%llx(%s)\n", __func__,
2911 	    _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(inm->inm_ifp),
2912 	    if_name(inm->inm_ifp)));
2913 
2914 	ifp = inm->inm_ifp;
2915 
2916 	IGI_LOCK(igi);
2917 	VERIFY(igi->igi_ifp == ifp);
2918 
2919 	if ((ifp->if_flags & IFF_LOOPBACK) ||
2920 	    (igi->igi_flags & IGIF_SILENT) ||
2921 	    !igmp_isgroupreported(inm->inm_addr) ||
2922 	    (igi->igi_version != IGMP_VERSION_3)) {
2923 		IGI_UNLOCK(igi);
2924 		if (!igmp_isgroupreported(inm->inm_addr)) {
2925 			IGMP_PRINTF(("%s: not kicking state "
2926 			    "machine for silent group\n", __func__));
2927 		}
2928 		IGMP_PRINTF(("%s: nothing to do\n", __func__));
2929 		inm_commit(inm);
2930 		IGMP_INET_PRINTF(inm->inm_addr,
2931 		    ("%s: T1 -> T0 for %s/%s\n", __func__,
2932 		    _igmp_inet_buf, inm->inm_ifp->if_name));
2933 		goto done;
2934 	}
2935 
2936 	IF_DRAIN(&inm->inm_scq);
2937 
2938 	retval = igmp_v3_enqueue_group_record(&inm->inm_scq, inm, 1, 0, 0);
2939 	itp->cst = (inm->inm_scq.ifq_len > 0);
2940 	IGMP_PRINTF(("%s: enqueue record = %d\n", __func__, retval));
2941 	if (retval <= 0) {
2942 		IGI_UNLOCK(igi);
2943 		retval *= -1;
2944 		goto done;
2945 	}
2946 	/*
2947 	 * If record(s) were enqueued, start the state-change
2948 	 * report timer for this group.
2949 	 */
2950 	inm->inm_scrv = ((igi->igi_flags & IGIF_LOOPBACK) ? 1 : (uint16_t)igi->igi_rv);
2951 	inm->inm_sctimer = 1;
2952 	itp->sct = 1;
2953 	IGI_UNLOCK(igi);
2954 done:
2955 	return retval;
2956 }
2957 
2958 /*
2959  * Perform the final leave for an IGMP group.
2960  *
2961  * When leaving a group:
2962  *  IGMPv1 does nothing.
2963  *  IGMPv2 sends a host leave message, if and only if we are the reporter.
2964  *  IGMPv3 enqueues a state-change report containing a transition
2965  *  to INCLUDE {} for immediate transmission.
2966  */
2967 static void
igmp_final_leave(struct in_multi * inm,struct igmp_ifinfo * igi,struct igmp_tparams * itp)2968 igmp_final_leave(struct in_multi *inm, struct igmp_ifinfo *igi,
2969     struct igmp_tparams *itp)
2970 {
2971 	int syncstates = 1;
2972 	bool retried_already = false;
2973 
2974 	INM_LOCK_ASSERT_HELD(inm);
2975 	IGI_LOCK_ASSERT_NOTHELD(igi);
2976 	VERIFY(itp != NULL);
2977 
2978 	IGMP_INET_PRINTF(inm->inm_addr,
2979 	    ("%s: final leave %s on ifp 0x%llx(%s)\n", __func__,
2980 	    _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(inm->inm_ifp),
2981 	    if_name(inm->inm_ifp)));
2982 
2983 retry:
2984 	switch (inm->inm_state) {
2985 	case IGMP_NOT_MEMBER:
2986 	case IGMP_SILENT_MEMBER:
2987 	case IGMP_LEAVING_MEMBER:
2988 		/* Already leaving or left; do nothing. */
2989 		IGMP_PRINTF(("%s: not kicking state machine for silent group\n",
2990 		    __func__));
2991 		break;
2992 	case IGMP_REPORTING_MEMBER:
2993 	case IGMP_IDLE_MEMBER:
2994 	case IGMP_G_QUERY_PENDING_MEMBER:
2995 	case IGMP_SG_QUERY_PENDING_MEMBER:
2996 		IGI_LOCK(igi);
2997 		if (igi->igi_version == IGMP_VERSION_2) {
2998 			if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER ||
2999 			    inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) {
3000 				/*
3001 				 * We may be in the process of downgrading to
3002 				 * IGMPv2 but because we just grabbed the
3003 				 * igi_lock we may have lost the race.
3004 				 */
3005 				if (!retried_already) {
3006 					IGI_UNLOCK(igi);
3007 					retried_already = true;
3008 					goto retry;
3009 				} else {
3010 					/*
3011 					 * Proceed with leaving the group
3012 					 * as if it were IGMPv2 even though we
3013 					 * may have an inconsistent multicast state.
3014 					 */
3015 				}
3016 			}
3017 			/* scheduler timer if enqueue is successful */
3018 			itp->cst = (igmp_v1v2_queue_report(inm,
3019 			    IGMP_HOST_LEAVE_MESSAGE) == 0);
3020 
3021 			INM_LOCK_ASSERT_HELD(inm);
3022 			IGI_LOCK_ASSERT_HELD(igi);
3023 
3024 			inm->inm_state = IGMP_NOT_MEMBER;
3025 		} else if (igi->igi_version == IGMP_VERSION_3) {
3026 			/*
3027 			 * Stop group timer and all pending reports.
3028 			 * Immediately enqueue a state-change report
3029 			 * TO_IN {} to be sent on the next timeout,
3030 			 * giving us an opportunity to merge reports.
3031 			 */
3032 			IF_DRAIN(&inm->inm_scq);
3033 			inm->inm_timer = 0;
3034 			if (igi->igi_flags & IGIF_LOOPBACK) {
3035 				inm->inm_scrv = 1;
3036 			} else {
3037 				inm->inm_scrv = (uint16_t)igi->igi_rv;
3038 			}
3039 			IGMP_INET_PRINTF(inm->inm_addr,
3040 			    ("%s: Leaving %s/%s with %d "
3041 			    "pending retransmissions.\n", __func__,
3042 			    _igmp_inet_buf, if_name(inm->inm_ifp),
3043 			    inm->inm_scrv));
3044 			if (inm->inm_scrv == 0) {
3045 				inm->inm_state = IGMP_NOT_MEMBER;
3046 				inm->inm_sctimer = 0;
3047 			} else {
3048 				int retval;
3049 				/*
3050 				 * Stick around in the in_multihead list;
3051 				 * the final detach will be issued by
3052 				 * igmp_v3_process_group_timers() when
3053 				 * the retransmit timer expires.
3054 				 */
3055 				INM_ADDREF_LOCKED(inm);
3056 				VERIFY(inm->inm_debug & IFD_ATTACHED);
3057 				inm->inm_reqcnt++;
3058 				VERIFY(inm->inm_reqcnt >= 1);
3059 				inm->inm_nrelecnt++;
3060 				VERIFY(inm->inm_nrelecnt != 0);
3061 
3062 				retval = igmp_v3_enqueue_group_record(
3063 					&inm->inm_scq, inm, 1, 0, 0);
3064 				itp->cst = (inm->inm_scq.ifq_len > 0);
3065 				KASSERT(retval != 0,
3066 				    ("%s: enqueue record = %d\n", __func__,
3067 				    retval));
3068 
3069 				inm->inm_state = IGMP_LEAVING_MEMBER;
3070 				inm->inm_sctimer = 1;
3071 				itp->sct = 1;
3072 				syncstates = 0;
3073 			}
3074 		}
3075 		IGI_UNLOCK(igi);
3076 		break;
3077 	case IGMP_LAZY_MEMBER:
3078 	case IGMP_SLEEPING_MEMBER:
3079 	case IGMP_AWAKENING_MEMBER:
3080 		/* Our reports are suppressed; do nothing. */
3081 		break;
3082 	}
3083 
3084 	if (syncstates) {
3085 		inm_commit(inm);
3086 		IGMP_INET_PRINTF(inm->inm_addr,
3087 		    ("%s: T1 -> T0 for %s/%s\n", __func__,
3088 		    _igmp_inet_buf, if_name(inm->inm_ifp)));
3089 		inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
3090 		IGMP_INET_PRINTF(inm->inm_addr,
3091 		    ("%s: T1 now MCAST_UNDEFINED for %s/%s\n",
3092 		    __func__, _igmp_inet_buf, if_name(inm->inm_ifp)));
3093 	}
3094 }
3095 
3096 /*
3097  * Enqueue an IGMPv3 group record to the given output queue.
3098  *
3099  * XXX This function could do with having the allocation code
3100  * split out, and the multiple-tree-walks coalesced into a single
3101  * routine as has been done in igmp_v3_enqueue_filter_change().
3102  *
3103  * If is_state_change is zero, a current-state record is appended.
3104  * If is_state_change is non-zero, a state-change report is appended.
3105  *
3106  * If is_group_query is non-zero, an mbuf packet chain is allocated.
3107  * If is_group_query is zero, and if there is a packet with free space
3108  * at the tail of the queue, it will be appended to providing there
3109  * is enough free space.
3110  * Otherwise a new mbuf packet chain is allocated.
3111  *
3112  * If is_source_query is non-zero, each source is checked to see if
3113  * it was recorded for a Group-Source query, and will be omitted if
3114  * it is not both in-mode and recorded.
3115  *
3116  * The function will attempt to allocate leading space in the packet
3117  * for the IP/IGMP header to be prepended without fragmenting the chain.
3118  *
3119  * If successful the size of all data appended to the queue is returned,
3120  * otherwise an error code less than zero is returned, or zero if
3121  * no record(s) were appended.
3122  */
3123 static int
igmp_v3_enqueue_group_record(struct ifqueue * ifq,struct in_multi * inm,const int is_state_change,const int is_group_query,const int is_source_query)3124 igmp_v3_enqueue_group_record(struct ifqueue *ifq, struct in_multi *inm,
3125     const int is_state_change, const int is_group_query,
3126     const int is_source_query)
3127 {
3128 	struct igmp_grouprec     ig;
3129 	struct igmp_grouprec    *pig;
3130 	struct ifnet            *ifp;
3131 	struct ip_msource       *ims, *nims;
3132 	struct mbuf             *m0, *m, *md;
3133 	int                      error, is_filter_list_change;
3134 	int                      minrec0len, m0srcs, nbytes, off;
3135 	uint16_t                 msrcs;
3136 	int                      record_has_sources;
3137 	int                      now;
3138 	int                      type;
3139 	in_addr_t                naddr;
3140 	uint16_t                 mode;
3141 	u_int16_t                ig_numsrc;
3142 
3143 	INM_LOCK_ASSERT_HELD(inm);
3144 	IGI_LOCK_ASSERT_HELD(inm->inm_igi);
3145 
3146 	error = 0;
3147 	ifp = inm->inm_ifp;
3148 	is_filter_list_change = 0;
3149 	m = NULL;
3150 	m0 = NULL;
3151 	m0srcs = 0;
3152 	msrcs = 0;
3153 	nbytes = 0;
3154 	nims = NULL;
3155 	record_has_sources = 1;
3156 	pig = NULL;
3157 	type = IGMP_DO_NOTHING;
3158 	mode = inm->inm_st[1].iss_fmode;
3159 
3160 	/*
3161 	 * If we did not transition out of ASM mode during t0->t1,
3162 	 * and there are no source nodes to process, we can skip
3163 	 * the generation of source records.
3164 	 */
3165 	if (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0 &&
3166 	    inm->inm_nsrc == 0) {
3167 		record_has_sources = 0;
3168 	}
3169 
3170 	if (is_state_change) {
3171 		/*
3172 		 * Queue a state change record.
3173 		 * If the mode did not change, and there are non-ASM
3174 		 * listeners or source filters present,
3175 		 * we potentially need to issue two records for the group.
3176 		 * If we are transitioning to MCAST_UNDEFINED, we need
3177 		 * not send any sources.
3178 		 * If there are ASM listeners, and there was no filter
3179 		 * mode transition of any kind, do nothing.
3180 		 */
3181 		if (mode != inm->inm_st[0].iss_fmode) {
3182 			if (mode == MCAST_EXCLUDE) {
3183 				IGMP_PRINTF(("%s: change to EXCLUDE\n",
3184 				    __func__));
3185 				type = IGMP_CHANGE_TO_EXCLUDE_MODE;
3186 			} else {
3187 				IGMP_PRINTF(("%s: change to INCLUDE\n",
3188 				    __func__));
3189 				type = IGMP_CHANGE_TO_INCLUDE_MODE;
3190 				if (mode == MCAST_UNDEFINED) {
3191 					record_has_sources = 0;
3192 				}
3193 			}
3194 		} else {
3195 			if (record_has_sources) {
3196 				is_filter_list_change = 1;
3197 			} else {
3198 				type = IGMP_DO_NOTHING;
3199 			}
3200 		}
3201 	} else {
3202 		/*
3203 		 * Queue a current state record.
3204 		 */
3205 		if (mode == MCAST_EXCLUDE) {
3206 			type = IGMP_MODE_IS_EXCLUDE;
3207 		} else if (mode == MCAST_INCLUDE) {
3208 			type = IGMP_MODE_IS_INCLUDE;
3209 			VERIFY(inm->inm_st[1].iss_asm == 0);
3210 		}
3211 	}
3212 
3213 	/*
3214 	 * Generate the filter list changes using a separate function.
3215 	 */
3216 	if (is_filter_list_change) {
3217 		return igmp_v3_enqueue_filter_change(ifq, inm);
3218 	}
3219 
3220 	if (type == IGMP_DO_NOTHING) {
3221 		IGMP_INET_PRINTF(inm->inm_addr,
3222 		    ("%s: nothing to do for %s/%s\n",
3223 		    __func__, _igmp_inet_buf,
3224 		    if_name(inm->inm_ifp)));
3225 		return 0;
3226 	}
3227 
3228 	/*
3229 	 * If any sources are present, we must be able to fit at least
3230 	 * one in the trailing space of the tail packet's mbuf,
3231 	 * ideally more.
3232 	 */
3233 	minrec0len = sizeof(struct igmp_grouprec);
3234 	if (record_has_sources) {
3235 		minrec0len += sizeof(in_addr_t);
3236 	}
3237 
3238 	IGMP_INET_PRINTF(inm->inm_addr,
3239 	    ("%s: queueing %s for %s/%s\n", __func__,
3240 	    igmp_rec_type_to_str(type), _igmp_inet_buf,
3241 	    if_name(inm->inm_ifp)));
3242 
3243 	/*
3244 	 * Check if we have a packet in the tail of the queue for this
3245 	 * group into which the first group record for this group will fit.
3246 	 * Otherwise allocate a new packet.
3247 	 * Always allocate leading space for IP+RA_OPT+IGMP+REPORT.
3248 	 * Note: Group records for G/GSR query responses MUST be sent
3249 	 * in their own packet.
3250 	 */
3251 	m0 = ifq->ifq_tail;
3252 	if (!is_group_query &&
3253 	    m0 != NULL &&
3254 	    (m0->m_pkthdr.vt_nrecs + 1 <= IGMP_V3_REPORT_MAXRECS) &&
3255 	    (m0->m_pkthdr.len + minrec0len) <
3256 	    (ifp->if_mtu - IGMP_LEADINGSPACE)) {
3257 		m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
3258 		    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
3259 		m = m0;
3260 		IGMP_PRINTF(("%s: use existing packet\n", __func__));
3261 	} else {
3262 		if (IF_QFULL(ifq)) {
3263 			IGMP_PRINTF(("%s: outbound queue full\n", __func__));
3264 			return -ENOMEM;
3265 		}
3266 		m = NULL;
3267 		m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
3268 		    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
3269 		if (!is_state_change && !is_group_query) {
3270 			m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3271 			if (m) {
3272 				m->m_data += IGMP_LEADINGSPACE;
3273 			}
3274 		}
3275 		if (m == NULL) {
3276 			m = m_gethdr(M_DONTWAIT, MT_DATA);
3277 			if (m) {
3278 				MH_ALIGN(m, IGMP_LEADINGSPACE);
3279 			}
3280 		}
3281 		if (m == NULL) {
3282 			return -ENOMEM;
3283 		}
3284 
3285 		igmp_save_context(m, ifp);
3286 
3287 		IGMP_PRINTF(("%s: allocated first packet\n", __func__));
3288 	}
3289 
3290 	/*
3291 	 * Append group record.
3292 	 * If we have sources, we don't know how many yet.
3293 	 */
3294 	ig.ig_type = (u_char)type;
3295 	ig.ig_datalen = 0;
3296 	ig.ig_numsrc = 0;
3297 	ig.ig_group = inm->inm_addr;
3298 	if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) {
3299 		if (m != m0) {
3300 			m_freem(m);
3301 		}
3302 		IGMP_PRINTF(("%s: m_append() failed.\n", __func__));
3303 		return -ENOMEM;
3304 	}
3305 	nbytes += sizeof(struct igmp_grouprec);
3306 
3307 	/*
3308 	 * Append as many sources as will fit in the first packet.
3309 	 * If we are appending to a new packet, the chain allocation
3310 	 * may potentially use clusters; use m_getptr() in this case.
3311 	 * If we are appending to an existing packet, we need to obtain
3312 	 * a pointer to the group record after m_append(), in case a new
3313 	 * mbuf was allocated.
3314 	 * Only append sources which are in-mode at t1. If we are
3315 	 * transitioning to MCAST_UNDEFINED state on the group, do not
3316 	 * include source entries.
3317 	 * Only report recorded sources in our filter set when responding
3318 	 * to a group-source query.
3319 	 */
3320 	if (record_has_sources) {
3321 		if (m == m0) {
3322 			md = m_last(m);
3323 			pig = (struct igmp_grouprec *)(void *)
3324 			    (mtod(md, uint8_t *) + md->m_len - nbytes);
3325 		} else {
3326 			md = m_getptr(m, 0, &off);
3327 			pig = (struct igmp_grouprec *)(void *)
3328 			    (mtod(md, uint8_t *) + off);
3329 		}
3330 		msrcs = 0;
3331 		RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, nims) {
3332 #ifdef IGMP_DEBUG
3333 			char buf[MAX_IPv4_STR_LEN];
3334 
3335 			inet_ntop_haddr(ims->ims_haddr, buf, sizeof(buf));
3336 			IGMP_PRINTF(("%s: visit node %s\n", __func__, buf));
3337 #endif
3338 			now = ims_get_mode(inm, ims, 1);
3339 			IGMP_PRINTF(("%s: node is %d\n", __func__, now));
3340 			if ((now != mode) ||
3341 			    (now == mode && mode == MCAST_UNDEFINED)) {
3342 				IGMP_PRINTF(("%s: skip node\n", __func__));
3343 				continue;
3344 			}
3345 			if (is_source_query && ims->ims_stp == 0) {
3346 				IGMP_PRINTF(("%s: skip unrecorded node\n",
3347 				    __func__));
3348 				continue;
3349 			}
3350 			IGMP_PRINTF(("%s: append node\n", __func__));
3351 			naddr = htonl(ims->ims_haddr);
3352 			if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) {
3353 				if (m != m0) {
3354 					m_freem(m);
3355 				}
3356 				IGMP_PRINTF(("%s: m_append() failed.\n",
3357 				    __func__));
3358 				return -ENOMEM;
3359 			}
3360 			nbytes += sizeof(in_addr_t);
3361 			++msrcs;
3362 			if (msrcs == m0srcs) {
3363 				break;
3364 			}
3365 		}
3366 		IGMP_PRINTF(("%s: msrcs is %d this packet\n", __func__,
3367 		    msrcs));
3368 		ig_numsrc = htons(msrcs);
3369 		bcopy(&ig_numsrc, &pig->ig_numsrc, sizeof(ig_numsrc));
3370 		nbytes += (msrcs * sizeof(in_addr_t));
3371 	}
3372 
3373 	if (is_source_query && msrcs == 0) {
3374 		IGMP_PRINTF(("%s: no recorded sources to report\n", __func__));
3375 		if (m != m0) {
3376 			m_freem(m);
3377 		}
3378 		return 0;
3379 	}
3380 
3381 	/*
3382 	 * We are good to go with first packet.
3383 	 */
3384 	if (m != m0) {
3385 		IGMP_PRINTF(("%s: enqueueing first packet\n", __func__));
3386 		m->m_pkthdr.vt_nrecs = 1;
3387 		IF_ENQUEUE(ifq, m);
3388 	} else {
3389 		m->m_pkthdr.vt_nrecs++;
3390 	}
3391 	/*
3392 	 * No further work needed if no source list in packet(s).
3393 	 */
3394 	if (!record_has_sources) {
3395 		return nbytes;
3396 	}
3397 
3398 	/*
3399 	 * Whilst sources remain to be announced, we need to allocate
3400 	 * a new packet and fill out as many sources as will fit.
3401 	 * Always try for a cluster first.
3402 	 */
3403 	while (nims != NULL) {
3404 		if (IF_QFULL(ifq)) {
3405 			IGMP_PRINTF(("%s: outbound queue full\n", __func__));
3406 			return -ENOMEM;
3407 		}
3408 		m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3409 		if (m) {
3410 			m->m_data += IGMP_LEADINGSPACE;
3411 		}
3412 		if (m == NULL) {
3413 			m = m_gethdr(M_DONTWAIT, MT_DATA);
3414 			if (m) {
3415 				MH_ALIGN(m, IGMP_LEADINGSPACE);
3416 			}
3417 		}
3418 		if (m == NULL) {
3419 			return -ENOMEM;
3420 		}
3421 		igmp_save_context(m, ifp);
3422 		md = m_getptr(m, 0, &off);
3423 		pig = (struct igmp_grouprec *)(void *)
3424 		    (mtod(md, uint8_t *) + off);
3425 		IGMP_PRINTF(("%s: allocated next packet\n", __func__));
3426 
3427 		if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) {
3428 			if (m != m0) {
3429 				m_freem(m);
3430 			}
3431 			IGMP_PRINTF(("%s: m_append() failed.\n", __func__));
3432 			return -ENOMEM;
3433 		}
3434 		m->m_pkthdr.vt_nrecs = 1;
3435 		nbytes += sizeof(struct igmp_grouprec);
3436 
3437 		m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
3438 		    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
3439 
3440 		msrcs = 0;
3441 		RB_FOREACH_FROM(ims, ip_msource_tree, nims) {
3442 #ifdef IGMP_DEBUG
3443 			char buf[MAX_IPv4_STR_LEN];
3444 
3445 			inet_ntop_haddr(ims->ims_haddr, buf, sizeof(buf));
3446 			IGMP_PRINTF(("%s: visit node %s\n", __func__, buf));
3447 #endif
3448 			now = ims_get_mode(inm, ims, 1);
3449 			if ((now != mode) ||
3450 			    (now == mode && mode == MCAST_UNDEFINED)) {
3451 				IGMP_PRINTF(("%s: skip node\n", __func__));
3452 				continue;
3453 			}
3454 			if (is_source_query && ims->ims_stp == 0) {
3455 				IGMP_PRINTF(("%s: skip unrecorded node\n",
3456 				    __func__));
3457 				continue;
3458 			}
3459 			IGMP_PRINTF(("%s: append node\n", __func__));
3460 			naddr = htonl(ims->ims_haddr);
3461 			if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) {
3462 				if (m != m0) {
3463 					m_freem(m);
3464 				}
3465 				IGMP_PRINTF(("%s: m_append() failed.\n",
3466 				    __func__));
3467 				return -ENOMEM;
3468 			}
3469 			++msrcs;
3470 			if (msrcs == m0srcs) {
3471 				break;
3472 			}
3473 		}
3474 		ig_numsrc = htons(msrcs);
3475 		bcopy(&ig_numsrc, &pig->ig_numsrc, sizeof(ig_numsrc));
3476 		nbytes += (msrcs * sizeof(in_addr_t));
3477 
3478 		IGMP_PRINTF(("%s: enqueueing next packet\n", __func__));
3479 		IF_ENQUEUE(ifq, m);
3480 	}
3481 
3482 	return nbytes;
3483 }
3484 
3485 /*
3486  * Type used to mark record pass completion.
3487  * We exploit the fact we can cast to this easily from the
3488  * current filter modes on each ip_msource node.
3489  */
3490 typedef enum {
3491 	REC_NONE = 0x00,        /* MCAST_UNDEFINED */
3492 	REC_ALLOW = 0x01,       /* MCAST_INCLUDE */
3493 	REC_BLOCK = 0x02,       /* MCAST_EXCLUDE */
3494 	REC_FULL = REC_ALLOW | REC_BLOCK
3495 } rectype_t;
3496 
3497 /*
3498  * Enqueue an IGMPv3 filter list change to the given output queue.
3499  *
3500  * Source list filter state is held in an RB-tree. When the filter list
3501  * for a group is changed without changing its mode, we need to compute
3502  * the deltas between T0 and T1 for each source in the filter set,
3503  * and enqueue the appropriate ALLOW_NEW/BLOCK_OLD records.
3504  *
3505  * As we may potentially queue two record types, and the entire R-B tree
3506  * needs to be walked at once, we break this out into its own function
3507  * so we can generate a tightly packed queue of packets.
3508  *
3509  * XXX This could be written to only use one tree walk, although that makes
3510  * serializing into the mbuf chains a bit harder. For now we do two walks
3511  * which makes things easier on us, and it may or may not be harder on
3512  * the L2 cache.
3513  *
3514  * If successful the size of all data appended to the queue is returned,
3515  * otherwise an error code less than zero is returned, or zero if
3516  * no record(s) were appended.
3517  */
3518 static int
igmp_v3_enqueue_filter_change(struct ifqueue * ifq,struct in_multi * inm)3519 igmp_v3_enqueue_filter_change(struct ifqueue *ifq, struct in_multi *inm)
3520 {
3521 	static const int MINRECLEN =
3522 	    sizeof(struct igmp_grouprec) + sizeof(in_addr_t);
3523 	struct ifnet            *ifp;
3524 	struct igmp_grouprec     ig;
3525 	struct igmp_grouprec    *pig;
3526 	struct ip_msource       *ims, *nims;
3527 	struct mbuf             *m, *m0, *md;
3528 	in_addr_t                naddr;
3529 	int                      m0srcs, nbytes, npbytes, off, schanged;
3530 	uint16_t                 rsrcs;
3531 	int                      nallow, nblock;
3532 	uint16_t                 mode;
3533 	uint8_t                  now, then;
3534 	rectype_t                crt, drt, nrt;
3535 	u_int16_t                ig_numsrc;
3536 
3537 	INM_LOCK_ASSERT_HELD(inm);
3538 
3539 	if (inm->inm_nsrc == 0 ||
3540 	    (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0)) {
3541 		return 0;
3542 	}
3543 
3544 	ifp = inm->inm_ifp;                     /* interface */
3545 	mode = inm->inm_st[1].iss_fmode;        /* filter mode at t1 */
3546 	crt = REC_NONE; /* current group record type */
3547 	drt = REC_NONE; /* mask of completed group record types */
3548 	nrt = REC_NONE; /* record type for current node */
3549 	m0srcs = 0;     /* # source which will fit in current mbuf chain */
3550 	nbytes = 0;     /* # of bytes appended to group's state-change queue */
3551 	npbytes = 0;    /* # of bytes appended this packet */
3552 	rsrcs = 0;      /* # sources encoded in current record */
3553 	schanged = 0;   /* # nodes encoded in overall filter change */
3554 	nallow = 0;     /* # of source entries in ALLOW_NEW */
3555 	nblock = 0;     /* # of source entries in BLOCK_OLD */
3556 	nims = NULL;    /* next tree node pointer */
3557 
3558 	/*
3559 	 * For each possible filter record mode.
3560 	 * The first kind of source we encounter tells us which
3561 	 * is the first kind of record we start appending.
3562 	 * If a node transitioned to UNDEFINED at t1, its mode is treated
3563 	 * as the inverse of the group's filter mode.
3564 	 */
3565 	while (drt != REC_FULL) {
3566 		do {
3567 			m0 = ifq->ifq_tail;
3568 			if (m0 != NULL &&
3569 			    (m0->m_pkthdr.vt_nrecs + 1 <=
3570 			    IGMP_V3_REPORT_MAXRECS) &&
3571 			    (m0->m_pkthdr.len + MINRECLEN) <
3572 			    (ifp->if_mtu - IGMP_LEADINGSPACE)) {
3573 				m = m0;
3574 				m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
3575 				    sizeof(struct igmp_grouprec)) /
3576 				    sizeof(in_addr_t);
3577 				IGMP_PRINTF(("%s: use previous packet\n",
3578 				    __func__));
3579 			} else {
3580 				m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3581 				if (m) {
3582 					m->m_data += IGMP_LEADINGSPACE;
3583 				}
3584 				if (m == NULL) {
3585 					m = m_gethdr(M_DONTWAIT, MT_DATA);
3586 					if (m) {
3587 						MH_ALIGN(m, IGMP_LEADINGSPACE);
3588 					}
3589 				}
3590 				if (m == NULL) {
3591 					IGMP_PRINTF(("%s: m_get*() failed\n",
3592 					    __func__));
3593 					return -ENOMEM;
3594 				}
3595 				m->m_pkthdr.vt_nrecs = 0;
3596 				igmp_save_context(m, ifp);
3597 				m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
3598 				    sizeof(struct igmp_grouprec)) /
3599 				    sizeof(in_addr_t);
3600 				npbytes = 0;
3601 				IGMP_PRINTF(("%s: allocated new packet\n",
3602 				    __func__));
3603 			}
3604 			/*
3605 			 * Append the IGMP group record header to the
3606 			 * current packet's data area.
3607 			 * Recalculate pointer to free space for next
3608 			 * group record, in case m_append() allocated
3609 			 * a new mbuf or cluster.
3610 			 */
3611 			memset(&ig, 0, sizeof(ig));
3612 			ig.ig_group = inm->inm_addr;
3613 			if (!m_append(m, sizeof(ig), (void *)&ig)) {
3614 				if (m != m0) {
3615 					m_freem(m);
3616 				}
3617 				IGMP_PRINTF(("%s: m_append() failed\n",
3618 				    __func__));
3619 				return -ENOMEM;
3620 			}
3621 			npbytes += sizeof(struct igmp_grouprec);
3622 			if (m != m0) {
3623 				/* new packet; offset in c hain */
3624 				md = m_getptr(m, npbytes -
3625 				    sizeof(struct igmp_grouprec), &off);
3626 				pig = (struct igmp_grouprec *)(void *)(mtod(md,
3627 				    uint8_t *) + off);
3628 			} else {
3629 				/* current packet; offset from last append */
3630 				md = m_last(m);
3631 				pig = (struct igmp_grouprec *)(void *)(mtod(md,
3632 				    uint8_t *) + md->m_len -
3633 				    sizeof(struct igmp_grouprec));
3634 			}
3635 			/*
3636 			 * Begin walking the tree for this record type
3637 			 * pass, or continue from where we left off
3638 			 * previously if we had to allocate a new packet.
3639 			 * Only report deltas in-mode at t1.
3640 			 * We need not report included sources as allowed
3641 			 * if we are in inclusive mode on the group,
3642 			 * however the converse is not true.
3643 			 */
3644 			rsrcs = 0;
3645 			if (nims == NULL) {
3646 				nims = RB_MIN(ip_msource_tree, &inm->inm_srcs);
3647 			}
3648 			RB_FOREACH_FROM(ims, ip_msource_tree, nims) {
3649 #ifdef IGMP_DEBUG
3650 				char buf[MAX_IPv4_STR_LEN];
3651 
3652 				inet_ntop_haddr(ims->ims_haddr, buf, sizeof(buf));
3653 				IGMP_PRINTF(("%s: visit node %s\n", __func__, buf));
3654 #endif
3655 				now = ims_get_mode(inm, ims, 1);
3656 				then = ims_get_mode(inm, ims, 0);
3657 				IGMP_PRINTF(("%s: mode: t0 %d, t1 %d\n",
3658 				    __func__, then, now));
3659 				if (now == then) {
3660 					IGMP_PRINTF(("%s: skip unchanged\n",
3661 					    __func__));
3662 					continue;
3663 				}
3664 				if (mode == MCAST_EXCLUDE &&
3665 				    now == MCAST_INCLUDE) {
3666 					IGMP_PRINTF(("%s: skip IN src on EX "
3667 					    "group\n", __func__));
3668 					continue;
3669 				}
3670 				nrt = (rectype_t)now;
3671 				if (nrt == REC_NONE) {
3672 					nrt = (rectype_t)(~mode & REC_FULL);
3673 				}
3674 				if (schanged++ == 0) {
3675 					crt = nrt;
3676 				} else if (crt != nrt) {
3677 					continue;
3678 				}
3679 				naddr = htonl(ims->ims_haddr);
3680 				if (!m_append(m, sizeof(in_addr_t),
3681 				    (void *)&naddr)) {
3682 					if (m != m0) {
3683 						m_freem(m);
3684 					}
3685 					IGMP_PRINTF(("%s: m_append() failed\n",
3686 					    __func__));
3687 					return -ENOMEM;
3688 				}
3689 				nallow += !!(crt == REC_ALLOW);
3690 				nblock += !!(crt == REC_BLOCK);
3691 				if (++rsrcs == m0srcs) {
3692 					break;
3693 				}
3694 			}
3695 			/*
3696 			 * If we did not append any tree nodes on this
3697 			 * pass, back out of allocations.
3698 			 */
3699 			if (rsrcs == 0) {
3700 				npbytes -= sizeof(struct igmp_grouprec);
3701 				if (m != m0) {
3702 					IGMP_PRINTF(("%s: m_free(m)\n",
3703 					    __func__));
3704 					m_freem(m);
3705 				} else {
3706 					IGMP_PRINTF(("%s: m_adj(m, -ig)\n",
3707 					    __func__));
3708 					m_adj(m, -((int)sizeof(
3709 						    struct igmp_grouprec)));
3710 				}
3711 				continue;
3712 			}
3713 			npbytes += (rsrcs * sizeof(in_addr_t));
3714 			if (crt == REC_ALLOW) {
3715 				pig->ig_type = IGMP_ALLOW_NEW_SOURCES;
3716 			} else if (crt == REC_BLOCK) {
3717 				pig->ig_type = IGMP_BLOCK_OLD_SOURCES;
3718 			}
3719 			ig_numsrc = htons(rsrcs);
3720 			bcopy(&ig_numsrc, &pig->ig_numsrc, sizeof(ig_numsrc));
3721 			/*
3722 			 * Count the new group record, and enqueue this
3723 			 * packet if it wasn't already queued.
3724 			 */
3725 			m->m_pkthdr.vt_nrecs++;
3726 			if (m != m0) {
3727 				IF_ENQUEUE(ifq, m);
3728 			}
3729 			nbytes += npbytes;
3730 		} while (nims != NULL);
3731 		drt |= crt;
3732 		crt = (~crt & REC_FULL);
3733 	}
3734 
3735 	IGMP_PRINTF(("%s: queued %d ALLOW_NEW, %d BLOCK_OLD\n", __func__,
3736 	    nallow, nblock));
3737 
3738 	return nbytes;
3739 }
3740 
3741 static int
igmp_v3_merge_state_changes(struct in_multi * inm,struct ifqueue * ifscq)3742 igmp_v3_merge_state_changes(struct in_multi *inm, struct ifqueue *ifscq)
3743 {
3744 	struct ifqueue  *gq;
3745 	struct mbuf     *m;             /* pending state-change */
3746 	struct mbuf     *m0;            /* copy of pending state-change */
3747 	struct mbuf     *mt;            /* last state-change in packet */
3748 	struct mbuf     *n;
3749 	int              docopy, domerge;
3750 	u_int            recslen;
3751 
3752 	INM_LOCK_ASSERT_HELD(inm);
3753 
3754 	docopy = 0;
3755 	domerge = 0;
3756 	recslen = 0;
3757 
3758 	/*
3759 	 * If there are further pending retransmissions, make a writable
3760 	 * copy of each queued state-change message before merging.
3761 	 */
3762 	if (inm->inm_scrv > 0) {
3763 		docopy = 1;
3764 	}
3765 
3766 	gq = &inm->inm_scq;
3767 #ifdef IGMP_DEBUG
3768 	if (gq->ifq_head == NULL) {
3769 		IGMP_PRINTF(("%s: WARNING: queue for inm 0x%llx is empty\n",
3770 		    __func__, (uint64_t)VM_KERNEL_ADDRPERM(inm)));
3771 	}
3772 #endif
3773 
3774 	/*
3775 	 * Use IF_REMQUEUE() instead of IF_DEQUEUE() below, since the
3776 	 * packet might not always be at the head of the ifqueue.
3777 	 */
3778 	m = gq->ifq_head;
3779 	while (m != NULL) {
3780 		/*
3781 		 * Only merge the report into the current packet if
3782 		 * there is sufficient space to do so; an IGMPv3 report
3783 		 * packet may only contain 65,535 group records.
3784 		 * Always use a simple mbuf chain concatentation to do this,
3785 		 * as large state changes for single groups may have
3786 		 * allocated clusters.
3787 		 */
3788 		domerge = 0;
3789 		mt = ifscq->ifq_tail;
3790 		if (mt != NULL) {
3791 			recslen = m_length(m);
3792 
3793 			if ((mt->m_pkthdr.vt_nrecs +
3794 			    m->m_pkthdr.vt_nrecs <=
3795 			    IGMP_V3_REPORT_MAXRECS) &&
3796 			    (mt->m_pkthdr.len + recslen <=
3797 			    (inm->inm_ifp->if_mtu - IGMP_LEADINGSPACE))) {
3798 				domerge = 1;
3799 			}
3800 		}
3801 
3802 		if (!domerge && IF_QFULL(gq)) {
3803 			IGMP_PRINTF(("%s: outbound queue full, skipping whole "
3804 			    "packet 0x%llx\n", __func__,
3805 			    (uint64_t)VM_KERNEL_ADDRPERM(m)));
3806 			n = m->m_nextpkt;
3807 			if (!docopy) {
3808 				IF_REMQUEUE(gq, m);
3809 				m_freem(m);
3810 			}
3811 			m = n;
3812 			continue;
3813 		}
3814 
3815 		if (!docopy) {
3816 			IGMP_PRINTF(("%s: dequeueing 0x%llx\n", __func__,
3817 			    (uint64_t)VM_KERNEL_ADDRPERM(m)));
3818 			n = m->m_nextpkt;
3819 			IF_REMQUEUE(gq, m);
3820 			m0 = m;
3821 			m = n;
3822 		} else {
3823 			IGMP_PRINTF(("%s: copying 0x%llx\n", __func__,
3824 			    (uint64_t)VM_KERNEL_ADDRPERM(m)));
3825 			m0 = m_dup(m, M_NOWAIT);
3826 			if (m0 == NULL) {
3827 				return ENOMEM;
3828 			}
3829 			m0->m_nextpkt = NULL;
3830 			m = m->m_nextpkt;
3831 		}
3832 
3833 		if (!domerge) {
3834 			IGMP_PRINTF(("%s: queueing 0x%llx to ifscq 0x%llx)\n",
3835 			    __func__, (uint64_t)VM_KERNEL_ADDRPERM(m0),
3836 			    (uint64_t)VM_KERNEL_ADDRPERM(ifscq)));
3837 			IF_ENQUEUE(ifscq, m0);
3838 		} else {
3839 			struct mbuf *mtl;       /* last mbuf of packet mt */
3840 
3841 			IGMP_PRINTF(("%s: merging 0x%llx with ifscq tail "
3842 			    "0x%llx)\n", __func__,
3843 			    (uint64_t)VM_KERNEL_ADDRPERM(m0),
3844 			    (uint64_t)VM_KERNEL_ADDRPERM(mt)));
3845 
3846 			mtl = m_last(mt);
3847 			m0->m_flags &= ~M_PKTHDR;
3848 			mt->m_pkthdr.len += recslen;
3849 			mt->m_pkthdr.vt_nrecs +=
3850 			    m0->m_pkthdr.vt_nrecs;
3851 
3852 			mtl->m_next = m0;
3853 		}
3854 	}
3855 
3856 	return 0;
3857 }
3858 
3859 /*
3860  * Respond to a pending IGMPv3 General Query.
3861  */
3862 static uint32_t
igmp_v3_dispatch_general_query(struct igmp_ifinfo * igi)3863 igmp_v3_dispatch_general_query(struct igmp_ifinfo *igi)
3864 {
3865 	struct ifnet            *ifp;
3866 	struct in_multi         *inm;
3867 	struct in_multistep     step;
3868 	int                      retval, loop;
3869 
3870 	IGI_LOCK_ASSERT_HELD(igi);
3871 
3872 	VERIFY(igi->igi_version == IGMP_VERSION_3);
3873 
3874 	ifp = igi->igi_ifp;
3875 	IGI_UNLOCK(igi);
3876 
3877 	in_multihead_lock_shared();
3878 	IN_FIRST_MULTI(step, inm);
3879 	while (inm != NULL) {
3880 		INM_LOCK(inm);
3881 		if (inm->inm_ifp != ifp) {
3882 			goto next;
3883 		}
3884 
3885 		switch (inm->inm_state) {
3886 		case IGMP_NOT_MEMBER:
3887 		case IGMP_SILENT_MEMBER:
3888 			break;
3889 		case IGMP_REPORTING_MEMBER:
3890 		case IGMP_IDLE_MEMBER:
3891 		case IGMP_LAZY_MEMBER:
3892 		case IGMP_SLEEPING_MEMBER:
3893 		case IGMP_AWAKENING_MEMBER:
3894 			inm->inm_state = IGMP_REPORTING_MEMBER;
3895 			IGI_LOCK(igi);
3896 			retval = igmp_v3_enqueue_group_record(&igi->igi_gq,
3897 			    inm, 0, 0, 0);
3898 			IGI_UNLOCK(igi);
3899 			IGMP_PRINTF(("%s: enqueue record = %d\n",
3900 			    __func__, retval));
3901 			break;
3902 		case IGMP_G_QUERY_PENDING_MEMBER:
3903 		case IGMP_SG_QUERY_PENDING_MEMBER:
3904 		case IGMP_LEAVING_MEMBER:
3905 			break;
3906 		}
3907 next:
3908 		INM_UNLOCK(inm);
3909 		IN_NEXT_MULTI(step, inm);
3910 	}
3911 	in_multihead_lock_done();
3912 
3913 	IGI_LOCK(igi);
3914 	loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
3915 	igmp_dispatch_queue(igi, &igi->igi_gq, IGMP_MAX_RESPONSE_BURST,
3916 	    loop);
3917 	IGI_LOCK_ASSERT_HELD(igi);
3918 	/*
3919 	 * Slew transmission of bursts over 1 second intervals.
3920 	 */
3921 	if (igi->igi_gq.ifq_head != NULL) {
3922 		igi->igi_v3_timer = 1 + IGMP_RANDOM_DELAY(
3923 			IGMP_RESPONSE_BURST_INTERVAL);
3924 	}
3925 
3926 	return igi->igi_v3_timer;
3927 }
3928 
3929 /*
3930  * Transmit the next pending IGMP message in the output queue.
3931  *
3932  * Must not be called with inm_lock or igi_lock held.
3933  */
3934 static void
igmp_sendpkt(struct mbuf * m)3935 igmp_sendpkt(struct mbuf *m)
3936 {
3937 	struct ip_moptions      *imo;
3938 	struct mbuf             *ipopts, *m0;
3939 	int                     error;
3940 	struct route            ro;
3941 	struct ifnet            *ifp;
3942 
3943 	IGMP_PRINTF(("%s: transmit 0x%llx\n", __func__,
3944 	    (uint64_t)VM_KERNEL_ADDRPERM(m)));
3945 
3946 	ifp = igmp_restore_context(m);
3947 	/*
3948 	 * Check if the ifnet is still attached.
3949 	 */
3950 	if (ifp == NULL || !ifnet_is_attached(ifp, 0)) {
3951 		IGMP_PRINTF(("%s: dropped 0x%llx as ifp went away.\n",
3952 		    __func__, (uint64_t)VM_KERNEL_ADDRPERM(m)));
3953 		m_freem(m);
3954 		OSAddAtomic(1, &ipstat.ips_noroute);
3955 		return;
3956 	}
3957 
3958 	ipopts = igmp_sendra ? m_raopt : NULL;
3959 
3960 	imo = ip_allocmoptions(Z_WAITOK);
3961 	if (imo == NULL) {
3962 		m_freem(m);
3963 		return;
3964 	}
3965 
3966 	imo->imo_multicast_ttl  = 1;
3967 	imo->imo_multicast_vif  = -1;
3968 	imo->imo_multicast_loop = 0;
3969 
3970 	/*
3971 	 * If the user requested that IGMP traffic be explicitly
3972 	 * redirected to the loopback interface (e.g. they are running a
3973 	 * MANET interface and the routing protocol needs to see the
3974 	 * updates), handle this now.
3975 	 */
3976 	if (m->m_flags & M_IGMP_LOOP) {
3977 		imo->imo_multicast_ifp = lo_ifp;
3978 	} else {
3979 		imo->imo_multicast_ifp = ifp;
3980 	}
3981 
3982 	if (m->m_flags & M_IGMPV2) {
3983 		m0 = m;
3984 	} else {
3985 		m0 = igmp_v3_encap_report(ifp, m);
3986 		if (m0 == NULL) {
3987 			/*
3988 			 * If igmp_v3_encap_report() failed, then M_PREPEND()
3989 			 * already freed the original mbuf chain.
3990 			 * This means that we don't have to m_freem(m) here.
3991 			 */
3992 			IGMP_PRINTF(("%s: dropped 0x%llx\n", __func__,
3993 			    (uint64_t)VM_KERNEL_ADDRPERM(m)));
3994 			IMO_REMREF(imo);
3995 			atomic_add_32(&ipstat.ips_odropped, 1);
3996 			return;
3997 		}
3998 	}
3999 
4000 	igmp_scrub_context(m0);
4001 	m->m_flags &= ~(M_PROTOFLAGS | M_IGMP_LOOP);
4002 	m0->m_pkthdr.rcvif = lo_ifp;
4003 
4004 	if (ifp->if_eflags & IFEF_TXSTART) {
4005 		/*
4006 		 * Use control service class if the interface supports
4007 		 * transmit-start model.
4008 		 */
4009 		(void) m_set_service_class(m0, MBUF_SC_CTL);
4010 	}
4011 	bzero(&ro, sizeof(ro));
4012 	error = ip_output(m0, ipopts, &ro, 0, imo, NULL);
4013 	ROUTE_RELEASE(&ro);
4014 
4015 	IMO_REMREF(imo);
4016 
4017 	if (error) {
4018 		IGMP_PRINTF(("%s: ip_output(0x%llx) = %d\n", __func__,
4019 		    (uint64_t)VM_KERNEL_ADDRPERM(m0), error));
4020 		return;
4021 	}
4022 
4023 	IGMPSTAT_INC(igps_snd_reports);
4024 	OIGMPSTAT_INC(igps_snd_reports);
4025 }
4026 /*
4027  * Encapsulate an IGMPv3 report.
4028  *
4029  * The internal mbuf flag M_IGMPV3_HDR is used to indicate that the mbuf
4030  * chain has already had its IP/IGMPv3 header prepended. In this case
4031  * the function will not attempt to prepend; the lengths and checksums
4032  * will however be re-computed.
4033  *
4034  * Returns a pointer to the new mbuf chain head, or NULL if the
4035  * allocation failed.
4036  */
4037 static struct mbuf *
igmp_v3_encap_report(struct ifnet * ifp,struct mbuf * m)4038 igmp_v3_encap_report(struct ifnet *ifp, struct mbuf *m)
4039 {
4040 	struct igmp_report      *igmp;
4041 	struct ip               *ip;
4042 	unsigned int             hdrlen, igmpreclen;
4043 
4044 	VERIFY((m->m_flags & M_PKTHDR));
4045 
4046 	igmpreclen = m_length(m);
4047 	hdrlen = sizeof(struct ip) + sizeof(struct igmp_report);
4048 
4049 	if (m->m_flags & M_IGMPV3_HDR) {
4050 		igmpreclen -= hdrlen;
4051 	} else {
4052 		M_PREPEND(m, hdrlen, M_DONTWAIT, 1);
4053 		if (m == NULL) {
4054 			return NULL;
4055 		}
4056 		m->m_flags |= M_IGMPV3_HDR;
4057 	}
4058 	if (hdrlen + igmpreclen > USHRT_MAX) {
4059 		IGMP_PRINTF(("%s: invalid length %d\n", __func__, hdrlen + igmpreclen));
4060 		m_freem(m);
4061 		return NULL;
4062 	}
4063 
4064 
4065 	IGMP_PRINTF(("%s: igmpreclen is %d\n", __func__, igmpreclen));
4066 
4067 	m->m_data += sizeof(struct ip);
4068 	m->m_len -= sizeof(struct ip);
4069 
4070 	igmp = mtod(m, struct igmp_report *);
4071 	igmp->ir_type = IGMP_v3_HOST_MEMBERSHIP_REPORT;
4072 	igmp->ir_rsv1 = 0;
4073 	igmp->ir_rsv2 = 0;
4074 	igmp->ir_numgrps = htons(m->m_pkthdr.vt_nrecs);
4075 	igmp->ir_cksum = 0;
4076 	igmp->ir_cksum = in_cksum(m, sizeof(struct igmp_report) + igmpreclen);
4077 	m->m_pkthdr.vt_nrecs = 0;
4078 
4079 	m->m_data -= sizeof(struct ip);
4080 	m->m_len += sizeof(struct ip);
4081 
4082 	ip = mtod(m, struct ip *);
4083 	ip->ip_tos = IPTOS_PREC_INTERNETCONTROL;
4084 	ip->ip_len = (u_short)(hdrlen + igmpreclen);
4085 	ip->ip_off = IP_DF;
4086 	ip->ip_p = IPPROTO_IGMP;
4087 	ip->ip_sum = 0;
4088 
4089 	ip->ip_src.s_addr = INADDR_ANY;
4090 
4091 	if (m->m_flags & M_IGMP_LOOP) {
4092 		struct in_ifaddr *ia;
4093 
4094 		IFP_TO_IA(ifp, ia);
4095 		if (ia != NULL) {
4096 			IFA_LOCK(&ia->ia_ifa);
4097 			ip->ip_src = ia->ia_addr.sin_addr;
4098 			IFA_UNLOCK(&ia->ia_ifa);
4099 			IFA_REMREF(&ia->ia_ifa);
4100 		}
4101 	}
4102 
4103 	ip->ip_dst.s_addr = htonl(INADDR_ALLRPTS_GROUP);
4104 
4105 	return m;
4106 }
4107 
4108 #ifdef IGMP_DEBUG
4109 static const char *
igmp_rec_type_to_str(const int type)4110 igmp_rec_type_to_str(const int type)
4111 {
4112 	switch (type) {
4113 	case IGMP_CHANGE_TO_EXCLUDE_MODE:
4114 		return "TO_EX";
4115 	case IGMP_CHANGE_TO_INCLUDE_MODE:
4116 		return "TO_IN";
4117 	case IGMP_MODE_IS_EXCLUDE:
4118 		return "MODE_EX";
4119 	case IGMP_MODE_IS_INCLUDE:
4120 		return "MODE_IN";
4121 	case IGMP_ALLOW_NEW_SOURCES:
4122 		return "ALLOW_NEW";
4123 	case IGMP_BLOCK_OLD_SOURCES:
4124 		return "BLOCK_OLD";
4125 	default:
4126 		break;
4127 	}
4128 	return "unknown";
4129 }
4130 #endif
4131 
4132 void
igmp_init(struct protosw * pp,struct domain * dp)4133 igmp_init(struct protosw *pp, struct domain *dp)
4134 {
4135 #pragma unused(dp)
4136 	static int igmp_initialized = 0;
4137 
4138 	VERIFY((pp->pr_flags & (PR_INITIALIZED | PR_ATTACHED)) == PR_ATTACHED);
4139 
4140 	if (igmp_initialized) {
4141 		return;
4142 	}
4143 	igmp_initialized = 1;
4144 
4145 	IGMP_PRINTF(("%s: initializing\n", __func__));
4146 
4147 	igmp_timers_are_running = 0;
4148 
4149 	LIST_INIT(&igi_head);
4150 	m_raopt = igmp_ra_alloc();
4151 }
4152