xref: /xnu-10002.81.5/bsd/netinet/igmp.c (revision 5e3eaea39dcf651e66cb99ba7d70e32cc4a99587)
1 /*
2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*-
29  * Copyright (c) 2007-2009 Bruce Simpson.
30  * Copyright (c) 1988 Stephen Deering.
31  * Copyright (c) 1992, 1993
32  *	The Regents of the University of California.  All rights reserved.
33  *
34  * This code is derived from software contributed to Berkeley by
35  * Stephen Deering of Stanford University.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. All advertising materials mentioning features or use of this software
46  *    must display the following acknowledgement:
47  *	This product includes software developed by the University of
48  *	California, Berkeley and its contributors.
49  * 4. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  *	@(#)igmp.c	8.1 (Berkeley) 7/19/93
66  */
67 /*
68  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69  * support for mandatory and extensible security protections.  This notice
70  * is included in support of clause 2.2 (b) of the Apple Public License,
71  * Version 2.0.
72  */
73 
74 /*
75  * Internet Group Management Protocol (IGMP) routines.
76  * [RFC1112, RFC2236, RFC3376]
77  *
78  * Written by Steve Deering, Stanford, May 1988.
79  * Modified by Rosen Sharma, Stanford, Aug 1994.
80  * Modified by Bill Fenner, Xerox PARC, Feb 1995.
81  * Modified to fully comply to IGMPv2 by Bill Fenner, Oct 1995.
82  * Significantly rewritten for IGMPv3, VIMAGE, and SMP by Bruce Simpson.
83  *
84  * MULTICAST Revision: 3.5.1.4
85  */
86 
87 #include <sys/cdefs.h>
88 
89 #include <sys/param.h>
90 #include <sys/systm.h>
91 #include <sys/malloc.h>
92 #include <sys/mbuf.h>
93 #include <sys/socket.h>
94 #include <sys/protosw.h>
95 #include <sys/kernel.h>
96 #include <sys/sysctl.h>
97 #include <sys/mcache.h>
98 
99 #include <libkern/libkern.h>
100 #include <kern/zalloc.h>
101 
102 #include <net/if.h>
103 #include <net/route.h>
104 
105 #include <netinet/in.h>
106 #include <netinet/in_var.h>
107 #include <netinet/in_systm.h>
108 #include <netinet/ip.h>
109 #include <netinet/ip_var.h>
110 #include <netinet/igmp.h>
111 #include <netinet/igmp_var.h>
112 #include <netinet/kpi_ipfilter_var.h>
113 
114 #include <os/log.h>
115 
116 #if SKYWALK
117 #include <skywalk/core/skywalk_var.h>
118 #endif /* SKYWALK */
119 
120 SLIST_HEAD(igmp_inm_relhead, in_multi);
121 
122 static void     igi_initvar(struct igmp_ifinfo *, struct ifnet *, int);
123 static struct igmp_ifinfo *igi_alloc(zalloc_flags_t);
124 static void     igi_free(struct igmp_ifinfo *);
125 static void     igi_delete(const struct ifnet *, struct igmp_inm_relhead *);
126 static void     igmp_dispatch_queue(struct igmp_ifinfo *, struct ifqueue *,
127     int, const int);
128 static void     igmp_final_leave(struct in_multi *, struct igmp_ifinfo *,
129     struct igmp_tparams *);
130 static int      igmp_handle_state_change(struct in_multi *,
131     struct igmp_ifinfo *, struct igmp_tparams *);
132 static int      igmp_initial_join(struct in_multi *, struct igmp_ifinfo *,
133     struct igmp_tparams *);
134 static int      igmp_input_v1_query(struct ifnet *, const struct ip *,
135     const struct igmp *);
136 static int      igmp_input_v2_query(struct ifnet *, const struct ip *,
137     const struct igmp *);
138 static int      igmp_input_v3_query(struct ifnet *, const struct ip *,
139     /*const*/ struct igmpv3 *);
140 static int      igmp_input_v3_group_query(struct in_multi *,
141     int, /*const*/ struct igmpv3 *);
142 static int      igmp_input_v1_report(struct ifnet *, struct mbuf *,
143     /*const*/ struct ip *, /*const*/ struct igmp *);
144 static int      igmp_input_v2_report(struct ifnet *, struct mbuf *,
145     /*const*/ struct ip *, /*const*/ struct igmp *);
146 static void     igmp_sendpkt(struct mbuf *);
147 static __inline__ int   igmp_isgroupreported(const struct in_addr);
148 static struct mbuf *igmp_ra_alloc(void);
149 #ifdef IGMP_DEBUG
150 static const char *igmp_rec_type_to_str(const int);
151 #endif
152 static uint32_t igmp_set_version(struct igmp_ifinfo *, const int);
153 static void     igmp_append_relq(struct igmp_ifinfo *, struct in_multi *);
154 static void     igmp_flush_relq(struct igmp_ifinfo *,
155     struct igmp_inm_relhead *);
156 static int      igmp_v1v2_queue_report(struct in_multi *, const int);
157 static void     igmp_v1v2_process_group_timer(struct in_multi *, const int);
158 static void     igmp_v1v2_process_querier_timers(struct igmp_ifinfo *);
159 static uint32_t igmp_v2_update_group(struct in_multi *, const int);
160 static void     igmp_v3_cancel_link_timers(struct igmp_ifinfo *);
161 static uint32_t igmp_v3_dispatch_general_query(struct igmp_ifinfo *);
162 static struct mbuf *
163 igmp_v3_encap_report(struct ifnet *, struct mbuf *);
164 static int      igmp_v3_enqueue_group_record(struct ifqueue *,
165     struct in_multi *, const int, const int, const int);
166 static int      igmp_v3_enqueue_filter_change(struct ifqueue *,
167     struct in_multi *);
168 static void     igmp_v3_process_group_timers(struct igmp_ifinfo *,
169     struct ifqueue *, struct ifqueue *, struct in_multi *,
170     const unsigned int);
171 static int      igmp_v3_merge_state_changes(struct in_multi *,
172     struct ifqueue *);
173 static void     igmp_v3_suppress_group_record(struct in_multi *);
174 static int      sysctl_igmp_ifinfo SYSCTL_HANDLER_ARGS;
175 static int      sysctl_igmp_gsr SYSCTL_HANDLER_ARGS;
176 static int      sysctl_igmp_default_version SYSCTL_HANDLER_ARGS;
177 
178 static const uint32_t igmp_timeout_delay = 1000; /* in milliseconds */
179 static const uint32_t igmp_timeout_leeway = 500; /* in millseconds  */
180 static bool igmp_timeout_run;            /* IGMP timer is scheduled to run */
181 static bool igmp_fast_timeout_run;       /* IGMP fast timer is scheduled to run */
182 static void igmp_timeout(thread_call_param_t, thread_call_param_t);
183 static void igmp_sched_timeout(void);
184 static void igmp_sched_fast_timeout(void);
185 
186 static struct mbuf *m_raopt;            /* Router Alert option */
187 
188 static int querier_present_timers_running;      /* IGMPv1/v2 older version
189                                                  * querier present */
190 static int interface_timers_running;            /* IGMPv3 general
191                                                  * query response */
192 static int state_change_timers_running;         /* IGMPv3 state-change
193                                                  * retransmit */
194 static int current_state_timers_running;        /* IGMPv1/v2 host
195                                                  * report; IGMPv3 g/sg
196                                                  * query response */
197 
198 /*
199  * Subsystem lock macros.
200  */
201 #define IGMP_LOCK()                     \
202 	lck_mtx_lock(&igmp_mtx)
203 #define IGMP_LOCK_ASSERT_HELD()         \
204 	LCK_MTX_ASSERT(&igmp_mtx, LCK_MTX_ASSERT_OWNED)
205 #define IGMP_LOCK_ASSERT_NOTHELD()      \
206 	LCK_MTX_ASSERT(&igmp_mtx, LCK_MTX_ASSERT_NOTOWNED)
207 #define IGMP_UNLOCK()                   \
208 	lck_mtx_unlock(&igmp_mtx)
209 
210 static LIST_HEAD(, igmp_ifinfo) igi_head;
211 static struct igmpstat_v3 igmpstat_v3 = {
212 	.igps_version = IGPS_VERSION_3,
213 	.igps_len = sizeof(struct igmpstat_v3),
214 };
215 static struct igmpstat igmpstat; /* old IGMPv2 stats structure */
216 static struct timeval igmp_gsrdelay = {.tv_sec = 10, .tv_usec = 0};
217 
218 static int igmp_recvifkludge = 1;
219 static int igmp_sendra = 1;
220 static int igmp_sendlocal = 1;
221 static int igmp_v1enable = 1;
222 static int igmp_v2enable = 1;
223 static int igmp_legacysupp = 0;
224 static int igmp_default_version = IGMP_VERSION_3;
225 
226 SYSCTL_STRUCT(_net_inet_igmp, IGMPCTL_STATS, stats, CTLFLAG_RD | CTLFLAG_LOCKED,
227     &igmpstat, igmpstat, "");
228 SYSCTL_STRUCT(_net_inet_igmp, OID_AUTO, v3stats,
229     CTLFLAG_RD | CTLFLAG_LOCKED, &igmpstat_v3, igmpstat_v3, "");
230 SYSCTL_INT(_net_inet_igmp, OID_AUTO, recvifkludge, CTLFLAG_RW | CTLFLAG_LOCKED,
231     &igmp_recvifkludge, 0,
232     "Rewrite IGMPv1/v2 reports from 0.0.0.0 to contain subnet address");
233 SYSCTL_INT(_net_inet_igmp, OID_AUTO, sendra, CTLFLAG_RW | CTLFLAG_LOCKED,
234     &igmp_sendra, 0,
235     "Send IP Router Alert option in IGMPv2/v3 messages");
236 SYSCTL_INT(_net_inet_igmp, OID_AUTO, sendlocal, CTLFLAG_RW | CTLFLAG_LOCKED,
237     &igmp_sendlocal, 0,
238     "Send IGMP membership reports for 224.0.0.0/24 groups");
239 SYSCTL_INT(_net_inet_igmp, OID_AUTO, v1enable, CTLFLAG_RW | CTLFLAG_LOCKED,
240     &igmp_v1enable, 0,
241     "Enable backwards compatibility with IGMPv1");
242 SYSCTL_INT(_net_inet_igmp, OID_AUTO, v2enable, CTLFLAG_RW | CTLFLAG_LOCKED,
243     &igmp_v2enable, 0,
244     "Enable backwards compatibility with IGMPv2");
245 SYSCTL_INT(_net_inet_igmp, OID_AUTO, legacysupp, CTLFLAG_RW | CTLFLAG_LOCKED,
246     &igmp_legacysupp, 0,
247     "Allow v1/v2 reports to suppress v3 group responses");
248 SYSCTL_PROC(_net_inet_igmp, OID_AUTO, default_version,
249     CTLTYPE_INT | CTLFLAG_RW,
250     &igmp_default_version, 0, sysctl_igmp_default_version, "I",
251     "Default version of IGMP to run on each interface");
252 SYSCTL_PROC(_net_inet_igmp, OID_AUTO, gsrdelay,
253     CTLTYPE_INT | CTLFLAG_RW,
254     &igmp_gsrdelay.tv_sec, 0, sysctl_igmp_gsr, "I",
255     "Rate limit for IGMPv3 Group-and-Source queries in seconds");
256 #ifdef IGMP_DEBUG
257 int igmp_debug = 0;
258 SYSCTL_INT(_net_inet_igmp, OID_AUTO,
259     debug, CTLFLAG_RW | CTLFLAG_LOCKED, &igmp_debug, 0, "");
260 #endif
261 
262 SYSCTL_NODE(_net_inet_igmp, OID_AUTO, ifinfo, CTLFLAG_RD | CTLFLAG_LOCKED,
263     sysctl_igmp_ifinfo, "Per-interface IGMPv3 state");
264 
265 /* Lock group and attribute for igmp_mtx */
266 static LCK_ATTR_DECLARE(igmp_mtx_attr, 0, 0);
267 static LCK_GRP_DECLARE(igmp_mtx_grp, "igmp_mtx");
268 
269 /*
270  * Locking and reference counting:
271  *
272  * igmp_mtx mainly protects igi_head.  In cases where both igmp_mtx and
273  * in_multihead_lock must be held, the former must be acquired first in order
274  * to maintain lock ordering.  It is not a requirement that igmp_mtx be
275  * acquired first before in_multihead_lock, but in case both must be acquired
276  * in succession, the correct lock ordering must be followed.
277  *
278  * Instead of walking the if_multiaddrs list at the interface and returning
279  * the ifma_protospec value of a matching entry, we search the global list
280  * of in_multi records and find it that way; this is done with in_multihead
281  * lock held.  Doing so avoids the race condition issues that many other BSDs
282  * suffer from (therefore in our implementation, ifma_protospec will never be
283  * NULL for as long as the in_multi is valid.)
284  *
285  * The above creates a requirement for the in_multi to stay in in_multihead
286  * list even after the final IGMP leave (in IGMPv3 mode) until no longer needs
287  * be retransmitted (this is not required for IGMPv1/v2.)  In order to handle
288  * this, the request and reference counts of the in_multi are bumped up when
289  * the state changes to IGMP_LEAVING_MEMBER, and later dropped in the timeout
290  * handler.  Each in_multi holds a reference to the underlying igmp_ifinfo.
291  *
292  * Thus, the permitted lock oder is:
293  *
294  *	igmp_mtx, in_multihead_lock, inm_lock, igi_lock
295  *
296  * Any may be taken independently, but if any are held at the same time,
297  * the above lock order must be followed.
298  */
299 static LCK_MTX_DECLARE_ATTR(igmp_mtx, &igmp_mtx_grp, &igmp_mtx_attr);
300 static int igmp_timers_are_running;
301 
302 #define IGMP_ADD_DETACHED_INM(_head, _inm) {                            \
303 	SLIST_INSERT_HEAD(_head, _inm, inm_dtle);                       \
304 }
305 
306 #define IGMP_REMOVE_DETACHED_INM(_head) {                               \
307 	struct in_multi *_inm, *_inm_tmp;                               \
308 	SLIST_FOREACH_SAFE(_inm, _head, inm_dtle, _inm_tmp) {           \
309 	        SLIST_REMOVE(_head, _inm, in_multi, inm_dtle);          \
310 	        INM_REMREF(_inm);                                       \
311 	}                                                               \
312 	VERIFY(SLIST_EMPTY(_head));                                     \
313 }
314 
315 static KALLOC_TYPE_DEFINE(igi_zone, struct igmp_ifinfo, NET_KT_DEFAULT);
316 
317 /* Store IGMPv3 record count in the module private scratch space */
318 #define vt_nrecs        pkt_mpriv.__mpriv_u.__mpriv32[0].__mpriv32_u.__val16[0]
319 
320 static __inline void
igmp_save_context(struct mbuf * m,struct ifnet * ifp)321 igmp_save_context(struct mbuf *m, struct ifnet *ifp)
322 {
323 	m->m_pkthdr.rcvif = ifp;
324 }
325 
326 static __inline void
igmp_scrub_context(struct mbuf * m)327 igmp_scrub_context(struct mbuf *m)
328 {
329 	m->m_pkthdr.rcvif = NULL;
330 }
331 
332 #ifdef IGMP_DEBUG
333 static __inline const char *
inet_ntop_haddr(in_addr_t haddr,char * buf,socklen_t size)334 inet_ntop_haddr(in_addr_t haddr, char *buf, socklen_t size)
335 {
336 	struct in_addr ia;
337 
338 	ia.s_addr = htonl(haddr);
339 	return inet_ntop(AF_INET, &ia, buf, size);
340 }
341 #endif
342 
343 /*
344  * Restore context from a queued IGMP output chain.
345  * Return saved ifp.
346  */
347 static __inline struct ifnet *
igmp_restore_context(struct mbuf * m)348 igmp_restore_context(struct mbuf *m)
349 {
350 	return m->m_pkthdr.rcvif;
351 }
352 
353 /*
354  * Retrieve or set default IGMP version.
355  */
356 static int
357 sysctl_igmp_default_version SYSCTL_HANDLER_ARGS
358 {
359 #pragma unused(oidp, arg2)
360 	int      error;
361 	int      new;
362 
363 	IGMP_LOCK();
364 
365 	error = SYSCTL_OUT(req, arg1, sizeof(int));
366 	if (error || !req->newptr) {
367 		goto out_locked;
368 	}
369 
370 	new = igmp_default_version;
371 
372 	error = SYSCTL_IN(req, &new, sizeof(int));
373 	if (error) {
374 		goto out_locked;
375 	}
376 
377 	if (new < IGMP_VERSION_1 || new > IGMP_VERSION_3) {
378 		error = EINVAL;
379 		goto out_locked;
380 	}
381 
382 	os_log(OS_LOG_DEFAULT,
383 	    "%s: changed igmp_default_version from %d to %d\n",
384 	    __func__, igmp_default_version, new);
385 
386 	igmp_default_version = new;
387 
388 out_locked:
389 	IGMP_UNLOCK();
390 	return error;
391 }
392 
393 /*
394  * Retrieve or set threshold between group-source queries in seconds.
395  *
396  */
397 static int
398 sysctl_igmp_gsr SYSCTL_HANDLER_ARGS
399 {
400 #pragma unused(arg1, arg2)
401 	int error;
402 	int i;
403 
404 	IGMP_LOCK();
405 
406 	i = (int)igmp_gsrdelay.tv_sec;
407 
408 	error = sysctl_handle_int(oidp, &i, 0, req);
409 	if (error || !req->newptr) {
410 		goto out_locked;
411 	}
412 
413 	if (i < -1 || i >= 60) {
414 		error = EINVAL;
415 		goto out_locked;
416 	}
417 
418 	igmp_gsrdelay.tv_sec = i;
419 
420 out_locked:
421 	IGMP_UNLOCK();
422 	return error;
423 }
424 
425 /*
426  * Expose struct igmp_ifinfo to userland, keyed by ifindex.
427  * For use by ifmcstat(8).
428  *
429  */
430 static int
431 sysctl_igmp_ifinfo SYSCTL_HANDLER_ARGS
432 {
433 #pragma unused(oidp)
434 	int                     *name;
435 	int                      error;
436 	u_int                    namelen;
437 	struct ifnet            *ifp;
438 	struct igmp_ifinfo      *igi;
439 	struct igmp_ifinfo_u    igi_u;
440 
441 	name = (int *)arg1;
442 	namelen = arg2;
443 
444 	if (req->newptr != USER_ADDR_NULL) {
445 		return EPERM;
446 	}
447 
448 	if (namelen != 1) {
449 		return EINVAL;
450 	}
451 
452 	IGMP_LOCK();
453 
454 	if (name[0] <= 0 || name[0] > (u_int)if_index) {
455 		error = ENOENT;
456 		goto out_locked;
457 	}
458 
459 	error = ENOENT;
460 
461 	ifnet_head_lock_shared();
462 	ifp = ifindex2ifnet[name[0]];
463 	ifnet_head_done();
464 	if (ifp == NULL) {
465 		goto out_locked;
466 	}
467 
468 	bzero(&igi_u, sizeof(igi_u));
469 
470 	LIST_FOREACH(igi, &igi_head, igi_link) {
471 		IGI_LOCK(igi);
472 		if (ifp != igi->igi_ifp) {
473 			IGI_UNLOCK(igi);
474 			continue;
475 		}
476 		igi_u.igi_ifindex = igi->igi_ifp->if_index;
477 		igi_u.igi_version = igi->igi_version;
478 		igi_u.igi_v1_timer = igi->igi_v1_timer;
479 		igi_u.igi_v2_timer = igi->igi_v2_timer;
480 		igi_u.igi_v3_timer = igi->igi_v3_timer;
481 		igi_u.igi_flags = igi->igi_flags;
482 		igi_u.igi_rv = igi->igi_rv;
483 		igi_u.igi_qi = igi->igi_qi;
484 		igi_u.igi_qri = igi->igi_qri;
485 		igi_u.igi_uri = igi->igi_uri;
486 		IGI_UNLOCK(igi);
487 
488 		error = SYSCTL_OUT(req, &igi_u, sizeof(igi_u));
489 		break;
490 	}
491 
492 out_locked:
493 	IGMP_UNLOCK();
494 	return error;
495 }
496 
497 /*
498  * Dispatch an entire queue of pending packet chains
499  *
500  * Must not be called with inm_lock held.
501  */
502 static void
igmp_dispatch_queue(struct igmp_ifinfo * igi,struct ifqueue * ifq,int limit,const int loop)503 igmp_dispatch_queue(struct igmp_ifinfo *igi, struct ifqueue *ifq, int limit,
504     const int loop)
505 {
506 	struct mbuf *m;
507 	struct ip *ip;
508 
509 	if (igi != NULL) {
510 		IGI_LOCK_ASSERT_HELD(igi);
511 	}
512 
513 #if SKYWALK
514 	/*
515 	 * Since this function is called holding the igi lock, we need to ensure we
516 	 * don't enter the driver directly because a deadlock can happen if another
517 	 * thread holding the workloop lock tries to acquire the igi lock at
518 	 * the same time.
519 	 */
520 	sk_protect_t protect = sk_async_transmit_protect();
521 #endif /* SKYWALK */
522 
523 	for (;;) {
524 		IF_DEQUEUE(ifq, m);
525 		if (m == NULL) {
526 			break;
527 		}
528 		IGMP_PRINTF(("%s: dispatch 0x%llx from 0x%llx\n", __func__,
529 		    (uint64_t)VM_KERNEL_ADDRPERM(ifq),
530 		    (uint64_t)VM_KERNEL_ADDRPERM(m)));
531 		ip = mtod(m, struct ip *);
532 		if (loop) {
533 			m->m_flags |= M_IGMP_LOOP;
534 		}
535 		if (igi != NULL) {
536 			IGI_UNLOCK(igi);
537 		}
538 		igmp_sendpkt(m);
539 		if (igi != NULL) {
540 			IGI_LOCK(igi);
541 		}
542 		if (--limit == 0) {
543 			break;
544 		}
545 	}
546 
547 #if SKYWALK
548 	sk_async_transmit_unprotect(protect);
549 #endif /* SKYWALK */
550 
551 	if (igi != NULL) {
552 		IGI_LOCK_ASSERT_HELD(igi);
553 	}
554 }
555 
556 /*
557  * Filter outgoing IGMP report state by group.
558  *
559  * Reports are ALWAYS suppressed for ALL-HOSTS (224.0.0.1).
560  * If the net.inet.igmp.sendlocal sysctl is 0, then IGMP reports are
561  * disabled for all groups in the 224.0.0.0/24 link-local scope. However,
562  * this may break certain IGMP snooping switches which rely on the old
563  * report behaviour.
564  *
565  * Return zero if the given group is one for which IGMP reports
566  * should be suppressed, or non-zero if reports should be issued.
567  */
568 
569 static __inline__
570 int
igmp_isgroupreported(const struct in_addr addr)571 igmp_isgroupreported(const struct in_addr addr)
572 {
573 	if (in_allhosts(addr) ||
574 	    ((!igmp_sendlocal && IN_LOCAL_GROUP(ntohl(addr.s_addr))))) {
575 		return 0;
576 	}
577 
578 	return 1;
579 }
580 
581 /*
582  * Construct a Router Alert option to use in outgoing packets.
583  */
584 static struct mbuf *
igmp_ra_alloc(void)585 igmp_ra_alloc(void)
586 {
587 	struct mbuf     *m;
588 	struct ipoption *p;
589 
590 	MGET(m, M_WAITOK, MT_DATA);
591 	p = mtod(m, struct ipoption *);
592 	p->ipopt_dst.s_addr = INADDR_ANY;
593 	p->ipopt_list[0] = (char)IPOPT_RA;      /* Router Alert Option */
594 	p->ipopt_list[1] = 0x04;        /* 4 bytes long */
595 	p->ipopt_list[2] = IPOPT_EOL;   /* End of IP option list */
596 	p->ipopt_list[3] = 0x00;        /* pad byte */
597 	m->m_len = sizeof(p->ipopt_dst) + p->ipopt_list[1];
598 
599 	return m;
600 }
601 
602 /*
603  * Attach IGMP when PF_INET is attached to an interface.
604  */
605 struct igmp_ifinfo *
igmp_domifattach(struct ifnet * ifp,zalloc_flags_t how)606 igmp_domifattach(struct ifnet *ifp, zalloc_flags_t how)
607 {
608 	struct igmp_ifinfo *igi;
609 
610 	os_log_debug(OS_LOG_DEFAULT, "%s: called for ifp %s\n",
611 	    __func__, ifp->if_name);
612 
613 	igi = igi_alloc(how);
614 	if (igi == NULL) {
615 		return NULL;
616 	}
617 
618 	IGMP_LOCK();
619 
620 	IGI_LOCK(igi);
621 	igi_initvar(igi, ifp, 0);
622 	igi->igi_debug |= IFD_ATTACHED;
623 	IGI_ADDREF_LOCKED(igi); /* hold a reference for igi_head */
624 	IGI_ADDREF_LOCKED(igi); /* hold a reference for caller */
625 	IGI_UNLOCK(igi);
626 	ifnet_lock_shared(ifp);
627 	igmp_initsilent(ifp, igi);
628 	ifnet_lock_done(ifp);
629 
630 	LIST_INSERT_HEAD(&igi_head, igi, igi_link);
631 
632 	IGMP_UNLOCK();
633 
634 	os_log_info(OS_LOG_DEFAULT, "%s: allocated igmp_ifinfo for ifp %s\n",
635 	    __func__, ifp->if_name);
636 
637 	return igi;
638 }
639 
640 /*
641  * Attach IGMP when PF_INET is reattached to an interface.  Caller is
642  * expected to have an outstanding reference to the igi.
643  */
644 void
igmp_domifreattach(struct igmp_ifinfo * igi)645 igmp_domifreattach(struct igmp_ifinfo *igi)
646 {
647 	struct ifnet *ifp;
648 
649 	IGMP_LOCK();
650 
651 	IGI_LOCK(igi);
652 	VERIFY(!(igi->igi_debug & IFD_ATTACHED));
653 	ifp = igi->igi_ifp;
654 	VERIFY(ifp != NULL);
655 	igi_initvar(igi, ifp, 1);
656 	igi->igi_debug |= IFD_ATTACHED;
657 	IGI_ADDREF_LOCKED(igi); /* hold a reference for igi_head */
658 	IGI_UNLOCK(igi);
659 	ifnet_lock_shared(ifp);
660 	igmp_initsilent(ifp, igi);
661 	ifnet_lock_done(ifp);
662 
663 	LIST_INSERT_HEAD(&igi_head, igi, igi_link);
664 
665 	IGMP_UNLOCK();
666 
667 	os_log_info(OS_LOG_DEFAULT, "%s: reattached igmp_ifinfo for ifp %s\n",
668 	    __func__, ifp->if_name);
669 }
670 
671 /*
672  * Hook for domifdetach.
673  */
674 void
igmp_domifdetach(struct ifnet * ifp)675 igmp_domifdetach(struct ifnet *ifp)
676 {
677 	SLIST_HEAD(, in_multi) inm_dthead;
678 
679 	SLIST_INIT(&inm_dthead);
680 
681 	os_log_info(OS_LOG_DEFAULT, "%s: called for ifp %s\n", __func__,
682 	    if_name(ifp));
683 
684 	IGMP_LOCK();
685 	igi_delete(ifp, (struct igmp_inm_relhead *)&inm_dthead);
686 	IGMP_UNLOCK();
687 
688 	/* Now that we're dropped all locks, release detached records */
689 	IGMP_REMOVE_DETACHED_INM(&inm_dthead);
690 }
691 
692 /*
693  * Called at interface detach time.  Note that we only flush all deferred
694  * responses and record releases; all remaining inm records and their source
695  * entries related to this interface are left intact, in order to handle
696  * the reattach case.
697  */
698 static void
igi_delete(const struct ifnet * ifp,struct igmp_inm_relhead * inm_dthead)699 igi_delete(const struct ifnet *ifp, struct igmp_inm_relhead *inm_dthead)
700 {
701 	struct igmp_ifinfo *igi, *tigi;
702 
703 	IGMP_LOCK_ASSERT_HELD();
704 
705 	LIST_FOREACH_SAFE(igi, &igi_head, igi_link, tigi) {
706 		IGI_LOCK(igi);
707 		if (igi->igi_ifp == ifp) {
708 			/*
709 			 * Free deferred General Query responses.
710 			 */
711 			IF_DRAIN(&igi->igi_gq);
712 			IF_DRAIN(&igi->igi_v2q);
713 			igmp_flush_relq(igi, inm_dthead);
714 			igi->igi_debug &= ~IFD_ATTACHED;
715 			IGI_UNLOCK(igi);
716 
717 			LIST_REMOVE(igi, igi_link);
718 			IGI_REMREF(igi); /* release igi_head reference */
719 			return;
720 		}
721 		IGI_UNLOCK(igi);
722 	}
723 	panic("%s: igmp_ifinfo not found for ifp %p(%s)", __func__,
724 	    ifp, if_name(ifp));
725 }
726 
727 __private_extern__ void
igmp_initsilent(struct ifnet * ifp,struct igmp_ifinfo * igi)728 igmp_initsilent(struct ifnet *ifp, struct igmp_ifinfo *igi)
729 {
730 	ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
731 
732 	IGI_LOCK_ASSERT_NOTHELD(igi);
733 	IGI_LOCK(igi);
734 	if (!(ifp->if_flags & IFF_MULTICAST)) {
735 		igi->igi_flags |= IGIF_SILENT;
736 	} else {
737 		igi->igi_flags &= ~IGIF_SILENT;
738 	}
739 	IGI_UNLOCK(igi);
740 }
741 
742 static void
igi_initvar(struct igmp_ifinfo * igi,struct ifnet * ifp,int reattach)743 igi_initvar(struct igmp_ifinfo *igi, struct ifnet *ifp, int reattach)
744 {
745 	IGI_LOCK_ASSERT_HELD(igi);
746 
747 	igi->igi_ifp = ifp;
748 	igi->igi_version = igmp_default_version;
749 	igi->igi_flags = 0;
750 	igi->igi_rv = IGMP_RV_INIT;
751 	igi->igi_qi = IGMP_QI_INIT;
752 	igi->igi_qri = IGMP_QRI_INIT;
753 	igi->igi_uri = IGMP_URI_INIT;
754 
755 	if (!reattach) {
756 		SLIST_INIT(&igi->igi_relinmhead);
757 	}
758 
759 	/*
760 	 * Responses to general queries are subject to bounds.
761 	 */
762 	igi->igi_gq.ifq_maxlen =  IGMP_MAX_RESPONSE_PACKETS;
763 	igi->igi_v2q.ifq_maxlen = IGMP_MAX_RESPONSE_PACKETS;
764 }
765 
766 static struct igmp_ifinfo *
igi_alloc(zalloc_flags_t how)767 igi_alloc(zalloc_flags_t how)
768 {
769 	struct igmp_ifinfo *igi = zalloc_flags(igi_zone, how | Z_ZERO);
770 	if (igi != NULL) {
771 		lck_mtx_init(&igi->igi_lock, &igmp_mtx_grp, &igmp_mtx_attr);
772 		igi->igi_debug |= IFD_ALLOC;
773 	}
774 	return igi;
775 }
776 
777 static void
igi_free(struct igmp_ifinfo * igi)778 igi_free(struct igmp_ifinfo *igi)
779 {
780 	IGI_LOCK(igi);
781 	if (igi->igi_debug & IFD_ATTACHED) {
782 		panic("%s: attached igi=%p is being freed", __func__, igi);
783 		/* NOTREACHED */
784 	} else if (igi->igi_ifp != NULL) {
785 		panic("%s: ifp not NULL for igi=%p", __func__, igi);
786 		/* NOTREACHED */
787 	} else if (!(igi->igi_debug & IFD_ALLOC)) {
788 		panic("%s: igi %p cannot be freed", __func__, igi);
789 		/* NOTREACHED */
790 	} else if (igi->igi_refcnt != 0) {
791 		panic("%s: non-zero refcnt igi=%p", __func__, igi);
792 		/* NOTREACHED */
793 	}
794 	igi->igi_debug &= ~IFD_ALLOC;
795 	IGI_UNLOCK(igi);
796 
797 	lck_mtx_destroy(&igi->igi_lock, &igmp_mtx_grp);
798 	zfree(igi_zone, igi);
799 }
800 
801 void
igi_addref(struct igmp_ifinfo * igi,int locked)802 igi_addref(struct igmp_ifinfo *igi, int locked)
803 {
804 	if (!locked) {
805 		IGI_LOCK_SPIN(igi);
806 	} else {
807 		IGI_LOCK_ASSERT_HELD(igi);
808 	}
809 
810 	if (++igi->igi_refcnt == 0) {
811 		panic("%s: igi=%p wraparound refcnt", __func__, igi);
812 		/* NOTREACHED */
813 	}
814 	if (!locked) {
815 		IGI_UNLOCK(igi);
816 	}
817 }
818 
819 void
igi_remref(struct igmp_ifinfo * igi)820 igi_remref(struct igmp_ifinfo *igi)
821 {
822 	SLIST_HEAD(, in_multi) inm_dthead;
823 	struct ifnet *ifp;
824 
825 	IGI_LOCK_SPIN(igi);
826 
827 	if (igi->igi_refcnt == 0) {
828 		panic("%s: igi=%p negative refcnt", __func__, igi);
829 		/* NOTREACHED */
830 	}
831 
832 	--igi->igi_refcnt;
833 	if (igi->igi_refcnt > 0) {
834 		IGI_UNLOCK(igi);
835 		return;
836 	}
837 
838 	ifp = igi->igi_ifp;
839 	igi->igi_ifp = NULL;
840 	IF_DRAIN(&igi->igi_gq);
841 	IF_DRAIN(&igi->igi_v2q);
842 	SLIST_INIT(&inm_dthead);
843 	igmp_flush_relq(igi, (struct igmp_inm_relhead *)&inm_dthead);
844 	IGI_UNLOCK(igi);
845 
846 	/* Now that we're dropped all locks, release detached records */
847 	IGMP_REMOVE_DETACHED_INM(&inm_dthead);
848 
849 	os_log_info(OS_LOG_DEFAULT, "%s: freeing igmp_ifinfo for ifp %s\n",
850 	    __func__, if_name(ifp));
851 
852 	igi_free(igi);
853 }
854 
855 /*
856  * Process a received IGMPv1 query.
857  * Return non-zero if the message should be dropped.
858  */
859 static int
igmp_input_v1_query(struct ifnet * ifp,const struct ip * ip,const struct igmp * igmp)860 igmp_input_v1_query(struct ifnet *ifp, const struct ip *ip,
861     const struct igmp *igmp)
862 {
863 	struct igmp_ifinfo      *igi;
864 	struct in_multi         *inm;
865 	struct in_multistep     step;
866 	struct igmp_tparams     itp = { .qpt = 0, .it = 0, .cst = 0, .sct = 0 };
867 
868 	IGMP_LOCK_ASSERT_NOTHELD();
869 
870 	/*
871 	 * IGMPv1 Host Membership Queries SHOULD always be addressed to
872 	 * 224.0.0.1. They are always treated as General Queries.
873 	 * igmp_group is always ignored. Do not drop it as a userland
874 	 * daemon may wish to see it.
875 	 */
876 	if (!in_allhosts(ip->ip_dst) || !in_nullhost(igmp->igmp_group)) {
877 		IGMPSTAT_INC(igps_rcv_badqueries);
878 		OIGMPSTAT_INC(igps_rcv_badqueries);
879 		goto done;
880 	}
881 	IGMPSTAT_INC(igps_rcv_gen_queries);
882 
883 	igi = IGMP_IFINFO(ifp);
884 	VERIFY(igi != NULL);
885 
886 	IGI_LOCK(igi);
887 	if (igi->igi_flags & IGIF_LOOPBACK) {
888 		os_log_debug(OS_LOG_DEFAULT,
889 		    "%s: ignore v1 query on IGIF_LOOPBACK "
890 		    "ifp %s\n", __func__,
891 		    if_name(ifp));
892 		IGI_UNLOCK(igi);
893 		goto done;
894 	}
895 	/*
896 	 * Switch to IGMPv1 host compatibility mode.
897 	 */
898 	itp.qpt = igmp_set_version(igi, IGMP_VERSION_1);
899 	IGI_UNLOCK(igi);
900 
901 	os_log_debug(OS_LOG_DEFAULT, "%s: process v1 query on ifp %s\n", __func__,
902 	    if_name(ifp));
903 
904 	/*
905 	 * Start the timers in all of our group records
906 	 * for the interface on which the query arrived,
907 	 * except those which are already running.
908 	 */
909 	in_multihead_lock_shared();
910 	IN_FIRST_MULTI(step, inm);
911 	while (inm != NULL) {
912 		INM_LOCK(inm);
913 		if (inm->inm_ifp != ifp || inm->inm_timer != 0) {
914 			goto next;
915 		}
916 
917 		switch (inm->inm_state) {
918 		case IGMP_NOT_MEMBER:
919 		case IGMP_SILENT_MEMBER:
920 			break;
921 		case IGMP_G_QUERY_PENDING_MEMBER:
922 		case IGMP_SG_QUERY_PENDING_MEMBER:
923 		case IGMP_REPORTING_MEMBER:
924 		case IGMP_IDLE_MEMBER:
925 		case IGMP_LAZY_MEMBER:
926 		case IGMP_SLEEPING_MEMBER:
927 		case IGMP_AWAKENING_MEMBER:
928 			inm->inm_state = IGMP_REPORTING_MEMBER;
929 			inm->inm_timer = IGMP_RANDOM_DELAY(IGMP_V1V2_MAX_RI);
930 			itp.cst = 1;
931 			break;
932 		case IGMP_LEAVING_MEMBER:
933 			break;
934 		}
935 next:
936 		INM_UNLOCK(inm);
937 		IN_NEXT_MULTI(step, inm);
938 	}
939 	in_multihead_lock_done();
940 done:
941 	igmp_set_timeout(&itp);
942 
943 	return 0;
944 }
945 
946 /*
947  * Process a received IGMPv2 general or group-specific query.
948  */
949 static int
igmp_input_v2_query(struct ifnet * ifp,const struct ip * ip,const struct igmp * igmp)950 igmp_input_v2_query(struct ifnet *ifp, const struct ip *ip,
951     const struct igmp *igmp)
952 {
953 	struct igmp_ifinfo      *igi;
954 	struct in_multi         *inm;
955 	int                      is_general_query;
956 	uint16_t                 timer;
957 	struct igmp_tparams      itp = { .qpt = 0, .it = 0, .cst = 0, .sct = 0 };
958 
959 	IGMP_LOCK_ASSERT_NOTHELD();
960 
961 	is_general_query = 0;
962 
963 	/*
964 	 * Validate address fields upfront.
965 	 */
966 	if (in_nullhost(igmp->igmp_group)) {
967 		/*
968 		 * IGMPv2 General Query.
969 		 * If this was not sent to the all-hosts group, ignore it.
970 		 */
971 		if (!in_allhosts(ip->ip_dst)) {
972 			goto done;
973 		}
974 		IGMPSTAT_INC(igps_rcv_gen_queries);
975 		is_general_query = 1;
976 	} else {
977 		/* IGMPv2 Group-Specific Query. */
978 		IGMPSTAT_INC(igps_rcv_group_queries);
979 	}
980 
981 	igi = IGMP_IFINFO(ifp);
982 	VERIFY(igi != NULL);
983 
984 	IGI_LOCK(igi);
985 	if (igi->igi_flags & IGIF_LOOPBACK) {
986 		os_log_debug(OS_LOG_DEFAULT, "%s: ignore v2 query on IGIF_LOOPBACK "
987 		    "ifp %s\n", __func__, if_name(ifp));
988 		IGI_UNLOCK(igi);
989 		goto done;
990 	}
991 	/*
992 	 * Ignore v2 query if in v1 Compatibility Mode.
993 	 */
994 	if (igi->igi_version == IGMP_VERSION_1) {
995 		IGI_UNLOCK(igi);
996 		goto done;
997 	}
998 	itp.qpt = igmp_set_version(igi, IGMP_VERSION_2);
999 	IGI_UNLOCK(igi);
1000 
1001 	timer = igmp->igmp_code / IGMP_TIMER_SCALE;
1002 	if (timer == 0) {
1003 		timer = 1;
1004 	}
1005 
1006 	if (is_general_query) {
1007 		struct in_multistep step;
1008 
1009 		os_log_debug(OS_LOG_DEFAULT, "%s: process v2 general query on ifp %s\n",
1010 		    __func__, if_name(ifp));
1011 		/*
1012 		 * For each reporting group joined on this
1013 		 * interface, kick the report timer.
1014 		 */
1015 		in_multihead_lock_shared();
1016 		IN_FIRST_MULTI(step, inm);
1017 		while (inm != NULL) {
1018 			INM_LOCK(inm);
1019 			if (inm->inm_ifp == ifp) {
1020 				itp.cst += igmp_v2_update_group(inm, timer);
1021 			}
1022 			INM_UNLOCK(inm);
1023 			IN_NEXT_MULTI(step, inm);
1024 		}
1025 		in_multihead_lock_done();
1026 	} else {
1027 		/*
1028 		 * Group-specific IGMPv2 query, we need only
1029 		 * look up the single group to process it.
1030 		 */
1031 		in_multihead_lock_shared();
1032 		IN_LOOKUP_MULTI(&igmp->igmp_group, ifp, inm);
1033 		in_multihead_lock_done();
1034 		if (inm != NULL) {
1035 			INM_LOCK(inm);
1036 			IGMP_INET_PRINTF(igmp->igmp_group,
1037 			    ("process v2 query %s on ifp 0x%llx(%s)\n",
1038 			    _igmp_inet_buf,
1039 			    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1040 			itp.cst = igmp_v2_update_group(inm, timer);
1041 			INM_UNLOCK(inm);
1042 			INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1043 		}
1044 	}
1045 done:
1046 	igmp_set_timeout(&itp);
1047 
1048 	return 0;
1049 }
1050 
1051 /*
1052  * Update the report timer on a group in response to an IGMPv2 query.
1053  *
1054  * If we are becoming the reporting member for this group, start the timer.
1055  * If we already are the reporting member for this group, and timer is
1056  * below the threshold, reset it.
1057  *
1058  * We may be updating the group for the first time since we switched
1059  * to IGMPv3. If we are, then we must clear any recorded source lists,
1060  * and transition to REPORTING state; the group timer is overloaded
1061  * for group and group-source query responses.
1062  *
1063  * Unlike IGMPv3, the delay per group should be jittered
1064  * to avoid bursts of IGMPv2 reports.
1065  */
1066 static uint32_t
igmp_v2_update_group(struct in_multi * inm,const int timer)1067 igmp_v2_update_group(struct in_multi *inm, const int timer)
1068 {
1069 	IGMP_INET_PRINTF(inm->inm_addr, ("%s: %s/%s timer=%d\n",
1070 	    __func__, _igmp_inet_buf, if_name(inm->inm_ifp),
1071 	    timer));
1072 
1073 	INM_LOCK_ASSERT_HELD(inm);
1074 
1075 	switch (inm->inm_state) {
1076 	case IGMP_NOT_MEMBER:
1077 	case IGMP_SILENT_MEMBER:
1078 		break;
1079 	case IGMP_REPORTING_MEMBER:
1080 		if (inm->inm_timer != 0 &&
1081 		    inm->inm_timer <= timer) {
1082 			IGMP_PRINTF(("%s: REPORTING and timer running, "
1083 			    "skipping.\n", __func__));
1084 			break;
1085 		}
1086 		OS_FALLTHROUGH;
1087 	case IGMP_SG_QUERY_PENDING_MEMBER:
1088 	case IGMP_G_QUERY_PENDING_MEMBER:
1089 	case IGMP_IDLE_MEMBER:
1090 	case IGMP_LAZY_MEMBER:
1091 	case IGMP_AWAKENING_MEMBER:
1092 		IGMP_PRINTF(("%s: ->REPORTING\n", __func__));
1093 		inm->inm_state = IGMP_REPORTING_MEMBER;
1094 		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1095 		break;
1096 	case IGMP_SLEEPING_MEMBER:
1097 		IGMP_PRINTF(("%s: ->AWAKENING\n", __func__));
1098 		inm->inm_state = IGMP_AWAKENING_MEMBER;
1099 		break;
1100 	case IGMP_LEAVING_MEMBER:
1101 		break;
1102 	}
1103 
1104 	return inm->inm_timer;
1105 }
1106 
1107 /*
1108  * Process a received IGMPv3 general, group-specific or
1109  * group-and-source-specific query.
1110  * Assumes m has already been pulled up to the full IGMP message length.
1111  * Return 0 if successful, otherwise an appropriate error code is returned.
1112  */
1113 static int
igmp_input_v3_query(struct ifnet * ifp,const struct ip * ip,struct igmpv3 * igmpv3)1114 igmp_input_v3_query(struct ifnet *ifp, const struct ip *ip,
1115     /*const*/ struct igmpv3 *igmpv3)
1116 {
1117 	struct igmp_ifinfo      *igi;
1118 	struct in_multi         *inm;
1119 	int                      is_general_query;
1120 	uint32_t                 maxresp, nsrc, qqi;
1121 	uint32_t                 timer;
1122 	uint8_t                  qrv;
1123 	struct igmp_tparams      itp = { .qpt = 0, .it = 0, .cst = 0, .sct = 0 };
1124 
1125 	IGMP_LOCK_ASSERT_NOTHELD();
1126 
1127 	is_general_query = 0;
1128 
1129 	os_log_debug(OS_LOG_DEFAULT, "%s: process v3 query on ifp %s\n", __func__,
1130 	    if_name(ifp));
1131 
1132 	maxresp = igmpv3->igmp_code;    /* in 1/10ths of a second */
1133 	if (maxresp >= 128) {
1134 		maxresp = IGMP_MANT(igmpv3->igmp_code) <<
1135 		    (IGMP_EXP(igmpv3->igmp_code) + 3);
1136 	}
1137 
1138 	/*
1139 	 * Robustness must never be less than 2 for on-wire IGMPv3.
1140 	 * FUTURE: Check if ifp has IGIF_LOOPBACK set, as we will make
1141 	 * an exception for interfaces whose IGMPv3 state changes
1142 	 * are redirected to loopback (e.g. MANET).
1143 	 */
1144 	qrv = IGMP_QRV(igmpv3->igmp_misc);
1145 	if (qrv < 2) {
1146 		IGMP_PRINTF(("%s: clamping qrv %d to %d\n", __func__,
1147 		    qrv, IGMP_RV_INIT));
1148 		qrv = IGMP_RV_INIT;
1149 	}
1150 
1151 	qqi = igmpv3->igmp_qqi;
1152 	if (qqi >= 128) {
1153 		qqi = IGMP_MANT(igmpv3->igmp_qqi) <<
1154 		    (IGMP_EXP(igmpv3->igmp_qqi) + 3);
1155 	}
1156 
1157 	timer = maxresp / IGMP_TIMER_SCALE;
1158 	if (timer == 0) {
1159 		timer = 1;
1160 	}
1161 
1162 	nsrc = ntohs(igmpv3->igmp_numsrc);
1163 
1164 	/*
1165 	 * Validate address fields and versions upfront before
1166 	 * accepting v3 query.
1167 	 */
1168 	if (in_nullhost(igmpv3->igmp_group)) {
1169 		/*
1170 		 * IGMPv3 General Query.
1171 		 *
1172 		 * General Queries SHOULD be directed to 224.0.0.1.
1173 		 * A general query with a source list has undefined
1174 		 * behaviour; discard it.
1175 		 */
1176 		IGMPSTAT_INC(igps_rcv_gen_queries);
1177 		if (!in_allhosts(ip->ip_dst) || nsrc > 0) {
1178 			IGMPSTAT_INC(igps_rcv_badqueries);
1179 			OIGMPSTAT_INC(igps_rcv_badqueries);
1180 			goto done;
1181 		}
1182 		is_general_query = 1;
1183 	} else {
1184 		/* Group or group-source specific query. */
1185 		if (nsrc == 0) {
1186 			IGMPSTAT_INC(igps_rcv_group_queries);
1187 		} else {
1188 			IGMPSTAT_INC(igps_rcv_gsr_queries);
1189 		}
1190 	}
1191 
1192 	igi = IGMP_IFINFO(ifp);
1193 	VERIFY(igi != NULL);
1194 
1195 	IGI_LOCK(igi);
1196 	if (igi->igi_flags & IGIF_LOOPBACK) {
1197 		os_log_debug(OS_LOG_DEFAULT, "%s: ignore v3 query on IGIF_LOOPBACK "
1198 		    "ifp %s\n", __func__,
1199 		    if_name(ifp));
1200 		IGI_UNLOCK(igi);
1201 		goto done;
1202 	}
1203 
1204 	/*
1205 	 * Discard the v3 query if we're in Compatibility Mode.
1206 	 * The RFC is not obviously worded that hosts need to stay in
1207 	 * compatibility mode until the Old Version Querier Present
1208 	 * timer expires.
1209 	 */
1210 	if (igi->igi_version != IGMP_VERSION_3) {
1211 		os_log_debug(OS_LOG_DEFAULT, "%s: ignore v3 query in v%d mode on "
1212 		    "ifp %s\n", __func__, igi->igi_version,
1213 		    if_name(ifp));
1214 		IGI_UNLOCK(igi);
1215 		goto done;
1216 	}
1217 
1218 	itp.qpt = igmp_set_version(igi, IGMP_VERSION_3);
1219 	igi->igi_rv = qrv;
1220 	igi->igi_qi = qqi;
1221 	igi->igi_qri = MAX(timer, IGMP_QRI_MIN);
1222 
1223 	IGMP_PRINTF(("%s: qrv %d qi %d qri %d\n", __func__, igi->igi_rv,
1224 	    igi->igi_qi, igi->igi_qri));
1225 
1226 	if (is_general_query) {
1227 		/*
1228 		 * Schedule a current-state report on this ifp for
1229 		 * all groups, possibly containing source lists.
1230 		 * If there is a pending General Query response
1231 		 * scheduled earlier than the selected delay, do
1232 		 * not schedule any other reports.
1233 		 * Otherwise, reset the interface timer.
1234 		 */
1235 		os_log_debug(OS_LOG_DEFAULT, "%s: process v3 general query on ifp %s\n",
1236 		    __func__, if_name(ifp));
1237 		if (igi->igi_v3_timer == 0 || igi->igi_v3_timer >= timer) {
1238 			itp.it = igi->igi_v3_timer = IGMP_RANDOM_DELAY(timer);
1239 		}
1240 		IGI_UNLOCK(igi);
1241 	} else {
1242 		IGI_UNLOCK(igi);
1243 		/*
1244 		 * Group-source-specific queries are throttled on
1245 		 * a per-group basis to defeat denial-of-service attempts.
1246 		 * Queries for groups we are not a member of on this
1247 		 * link are simply ignored.
1248 		 */
1249 		in_multihead_lock_shared();
1250 		IN_LOOKUP_MULTI(&igmpv3->igmp_group, ifp, inm);
1251 		in_multihead_lock_done();
1252 		if (inm == NULL) {
1253 			goto done;
1254 		}
1255 
1256 		INM_LOCK(inm);
1257 		if (nsrc > 0) {
1258 			if (!ratecheck(&inm->inm_lastgsrtv,
1259 			    &igmp_gsrdelay)) {
1260 				os_log_info(OS_LOG_DEFAULT, "%s: GS query throttled.\n",
1261 				    __func__);
1262 				IGMPSTAT_INC(igps_drop_gsr_queries);
1263 				INM_UNLOCK(inm);
1264 				INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1265 				goto done;
1266 			}
1267 		}
1268 		IGMP_INET_PRINTF(igmpv3->igmp_group,
1269 		    ("process v3 %s query on ifp 0x%llx(%s)\n", _igmp_inet_buf,
1270 		    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1271 		os_log_debug(OS_LOG_DEFAULT, "%s: process v3 query on ifp %s\n",
1272 		    __func__, if_name(ifp));
1273 		/*
1274 		 * If there is a pending General Query response
1275 		 * scheduled sooner than the selected delay, no
1276 		 * further report need be scheduled.
1277 		 * Otherwise, prepare to respond to the
1278 		 * group-specific or group-and-source query.
1279 		 */
1280 		IGI_LOCK(igi);
1281 		itp.it = igi->igi_v3_timer;
1282 		IGI_UNLOCK(igi);
1283 		if (itp.it == 0 || itp.it >= timer) {
1284 			(void) igmp_input_v3_group_query(inm, timer, igmpv3);
1285 			itp.cst = inm->inm_timer;
1286 		}
1287 		INM_UNLOCK(inm);
1288 		INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1289 	}
1290 done:
1291 	if (itp.it > 0) {
1292 		os_log_debug(OS_LOG_DEFAULT, "%s: v3 general query response scheduled in "
1293 		    "T+%d seconds on ifp %s\n", __func__, itp.it,
1294 		    if_name(ifp));
1295 	}
1296 	igmp_set_timeout(&itp);
1297 
1298 	return 0;
1299 }
1300 
1301 /*
1302  * Process a recieved IGMPv3 group-specific or group-and-source-specific
1303  * query.
1304  * Return <0 if any error occured. Currently this is ignored.
1305  */
1306 static int
igmp_input_v3_group_query(struct in_multi * inm,int timer,struct igmpv3 * igmpv3)1307 igmp_input_v3_group_query(struct in_multi *inm,
1308     int timer, /*const*/ struct igmpv3 *igmpv3)
1309 {
1310 	int                      retval;
1311 	uint16_t                 nsrc;
1312 
1313 	INM_LOCK_ASSERT_HELD(inm);
1314 
1315 	retval = 0;
1316 
1317 	switch (inm->inm_state) {
1318 	case IGMP_NOT_MEMBER:
1319 	case IGMP_SILENT_MEMBER:
1320 	case IGMP_SLEEPING_MEMBER:
1321 	case IGMP_LAZY_MEMBER:
1322 	case IGMP_AWAKENING_MEMBER:
1323 	case IGMP_IDLE_MEMBER:
1324 	case IGMP_LEAVING_MEMBER:
1325 		return retval;
1326 	case IGMP_REPORTING_MEMBER:
1327 	case IGMP_G_QUERY_PENDING_MEMBER:
1328 	case IGMP_SG_QUERY_PENDING_MEMBER:
1329 		break;
1330 	}
1331 
1332 	nsrc = ntohs(igmpv3->igmp_numsrc);
1333 
1334 	/*
1335 	 * Deal with group-specific queries upfront.
1336 	 * If any group query is already pending, purge any recorded
1337 	 * source-list state if it exists, and schedule a query response
1338 	 * for this group-specific query.
1339 	 */
1340 	if (nsrc == 0) {
1341 		if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER ||
1342 		    inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) {
1343 			inm_clear_recorded(inm);
1344 			timer = min(inm->inm_timer, timer);
1345 		}
1346 		inm->inm_state = IGMP_G_QUERY_PENDING_MEMBER;
1347 		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1348 		return retval;
1349 	}
1350 
1351 	/*
1352 	 * Deal with the case where a group-and-source-specific query has
1353 	 * been received but a group-specific query is already pending.
1354 	 */
1355 	if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER) {
1356 		timer = min(inm->inm_timer, timer);
1357 		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1358 		return retval;
1359 	}
1360 
1361 	/*
1362 	 * Finally, deal with the case where a group-and-source-specific
1363 	 * query has been received, where a response to a previous g-s-r
1364 	 * query exists, or none exists.
1365 	 * In this case, we need to parse the source-list which the Querier
1366 	 * has provided us with and check if we have any source list filter
1367 	 * entries at T1 for these sources. If we do not, there is no need
1368 	 * schedule a report and the query may be dropped.
1369 	 * If we do, we must record them and schedule a current-state
1370 	 * report for those sources.
1371 	 * FIXME: Handling source lists larger than 1 mbuf requires that
1372 	 * we pass the mbuf chain pointer down to this function, and use
1373 	 * m_getptr() to walk the chain.
1374 	 */
1375 	if (inm->inm_nsrc > 0) {
1376 		const struct in_addr    *ap;
1377 		int                      i, nrecorded;
1378 
1379 		ap = (const struct in_addr *)(igmpv3 + 1);
1380 		nrecorded = 0;
1381 		for (i = 0; i < nsrc; i++, ap++) {
1382 			retval = inm_record_source(inm, ap->s_addr);
1383 			if (retval < 0) {
1384 				break;
1385 			}
1386 			nrecorded += retval;
1387 		}
1388 		if (nrecorded > 0) {
1389 			os_log_debug(OS_LOG_DEFAULT, "%s: schedule response to SG query\n",
1390 			    __func__);
1391 			inm->inm_state = IGMP_SG_QUERY_PENDING_MEMBER;
1392 			inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1393 		}
1394 	}
1395 
1396 	return retval;
1397 }
1398 
1399 /*
1400  * Process a received IGMPv1 host membership report.
1401  *
1402  * NOTE: 0.0.0.0 workaround breaks const correctness.
1403  */
1404 static int
igmp_input_v1_report(struct ifnet * ifp,struct mbuf * m,struct ip * ip,struct igmp * igmp)1405 igmp_input_v1_report(struct ifnet *ifp, struct mbuf *m, /*const*/ struct ip *ip,
1406     /*const*/ struct igmp *igmp)
1407 {
1408 	struct in_ifaddr *ia;
1409 	struct in_multi *inm;
1410 
1411 	IGMPSTAT_INC(igps_rcv_reports);
1412 	OIGMPSTAT_INC(igps_rcv_reports);
1413 
1414 	if ((ifp->if_flags & IFF_LOOPBACK) ||
1415 	    (m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
1416 		return 0;
1417 	}
1418 
1419 	if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr) ||
1420 	    !in_hosteq(igmp->igmp_group, ip->ip_dst))) {
1421 		IGMPSTAT_INC(igps_rcv_badreports);
1422 		OIGMPSTAT_INC(igps_rcv_badreports);
1423 		return EINVAL;
1424 	}
1425 
1426 	/*
1427 	 * RFC 3376, Section 4.2.13, 9.2, 9.3:
1428 	 * Booting clients may use the source address 0.0.0.0. Some
1429 	 * IGMP daemons may not know how to use IP_RECVIF to determine
1430 	 * the interface upon which this message was received.
1431 	 * Replace 0.0.0.0 with the subnet address if told to do so.
1432 	 */
1433 	if (igmp_recvifkludge && in_nullhost(ip->ip_src)) {
1434 		IFP_TO_IA(ifp, ia);
1435 		if (ia != NULL) {
1436 			IFA_LOCK(&ia->ia_ifa);
1437 			ip->ip_src.s_addr = htonl(ia->ia_subnet);
1438 			IFA_UNLOCK(&ia->ia_ifa);
1439 			IFA_REMREF(&ia->ia_ifa);
1440 		}
1441 	}
1442 
1443 	IGMP_INET_PRINTF(igmp->igmp_group,
1444 	    ("process v1 report %s on ifp 0x%llx(%s)\n", _igmp_inet_buf,
1445 	    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1446 
1447 	/*
1448 	 * IGMPv1 report suppression.
1449 	 * If we are a member of this group, and our membership should be
1450 	 * reported, stop our group timer and transition to the 'lazy' state.
1451 	 */
1452 	in_multihead_lock_shared();
1453 	IN_LOOKUP_MULTI(&igmp->igmp_group, ifp, inm);
1454 	in_multihead_lock_done();
1455 	if (inm != NULL) {
1456 		struct igmp_ifinfo *igi;
1457 
1458 		INM_LOCK(inm);
1459 
1460 		igi = inm->inm_igi;
1461 		VERIFY(igi != NULL);
1462 
1463 		IGMPSTAT_INC(igps_rcv_ourreports);
1464 		OIGMPSTAT_INC(igps_rcv_ourreports);
1465 
1466 		/*
1467 		 * If we are in IGMPv3 host mode, do not allow the
1468 		 * other host's IGMPv1 report to suppress our reports
1469 		 * unless explicitly configured to do so.
1470 		 */
1471 		IGI_LOCK(igi);
1472 		if (igi->igi_version == IGMP_VERSION_3) {
1473 			if (igmp_legacysupp) {
1474 				igmp_v3_suppress_group_record(inm);
1475 			}
1476 			IGI_UNLOCK(igi);
1477 			INM_UNLOCK(inm);
1478 			INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1479 			return 0;
1480 		}
1481 
1482 		INM_LOCK_ASSERT_HELD(inm);
1483 		inm->inm_timer = 0;
1484 
1485 		switch (inm->inm_state) {
1486 		case IGMP_NOT_MEMBER:
1487 		case IGMP_SILENT_MEMBER:
1488 			break;
1489 		case IGMP_IDLE_MEMBER:
1490 		case IGMP_LAZY_MEMBER:
1491 		case IGMP_AWAKENING_MEMBER:
1492 			IGMP_INET_PRINTF(igmp->igmp_group,
1493 			    ("report suppressed for %s on ifp 0x%llx(%s)\n",
1494 			    _igmp_inet_buf,
1495 			    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1496 			OS_FALLTHROUGH;
1497 		case IGMP_SLEEPING_MEMBER:
1498 			inm->inm_state = IGMP_SLEEPING_MEMBER;
1499 			break;
1500 		case IGMP_REPORTING_MEMBER:
1501 			IGMP_INET_PRINTF(igmp->igmp_group,
1502 			    ("report suppressed for %s on ifp 0x%llx(%s)\n",
1503 			    _igmp_inet_buf,
1504 			    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1505 			if (igi->igi_version == IGMP_VERSION_1) {
1506 				inm->inm_state = IGMP_LAZY_MEMBER;
1507 			} else if (igi->igi_version == IGMP_VERSION_2) {
1508 				inm->inm_state = IGMP_SLEEPING_MEMBER;
1509 			}
1510 			break;
1511 		case IGMP_G_QUERY_PENDING_MEMBER:
1512 		case IGMP_SG_QUERY_PENDING_MEMBER:
1513 		case IGMP_LEAVING_MEMBER:
1514 			break;
1515 		}
1516 		IGI_UNLOCK(igi);
1517 		INM_UNLOCK(inm);
1518 		INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1519 	}
1520 
1521 	return 0;
1522 }
1523 
1524 /*
1525  * Process a received IGMPv2 host membership report.
1526  *
1527  * NOTE: 0.0.0.0 workaround breaks const correctness.
1528  */
1529 static int
igmp_input_v2_report(struct ifnet * ifp,struct mbuf * m,struct ip * ip,struct igmp * igmp)1530 igmp_input_v2_report(struct ifnet *ifp, struct mbuf *m, /*const*/ struct ip *ip,
1531     /*const*/ struct igmp *igmp)
1532 {
1533 	struct in_ifaddr *ia;
1534 	struct in_multi *inm;
1535 
1536 	/*
1537 	 * Make sure we don't hear our own membership report.  Fast
1538 	 * leave requires knowing that we are the only member of a
1539 	 * group.
1540 	 */
1541 	IFP_TO_IA(ifp, ia);
1542 	if (ia != NULL) {
1543 		IFA_LOCK(&ia->ia_ifa);
1544 		if (in_hosteq(ip->ip_src, IA_SIN(ia)->sin_addr)) {
1545 			IFA_UNLOCK(&ia->ia_ifa);
1546 			IFA_REMREF(&ia->ia_ifa);
1547 			return 0;
1548 		}
1549 		IFA_UNLOCK(&ia->ia_ifa);
1550 	}
1551 
1552 	IGMPSTAT_INC(igps_rcv_reports);
1553 	OIGMPSTAT_INC(igps_rcv_reports);
1554 
1555 	if ((ifp->if_flags & IFF_LOOPBACK) ||
1556 	    (m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
1557 		if (ia != NULL) {
1558 			IFA_REMREF(&ia->ia_ifa);
1559 		}
1560 		return 0;
1561 	}
1562 
1563 	if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr)) ||
1564 	    !in_hosteq(igmp->igmp_group, ip->ip_dst)) {
1565 		if (ia != NULL) {
1566 			IFA_REMREF(&ia->ia_ifa);
1567 		}
1568 		IGMPSTAT_INC(igps_rcv_badreports);
1569 		OIGMPSTAT_INC(igps_rcv_badreports);
1570 		return EINVAL;
1571 	}
1572 
1573 	/*
1574 	 * RFC 3376, Section 4.2.13, 9.2, 9.3:
1575 	 * Booting clients may use the source address 0.0.0.0. Some
1576 	 * IGMP daemons may not know how to use IP_RECVIF to determine
1577 	 * the interface upon which this message was received.
1578 	 * Replace 0.0.0.0 with the subnet address if told to do so.
1579 	 */
1580 	if (igmp_recvifkludge && in_nullhost(ip->ip_src)) {
1581 		if (ia != NULL) {
1582 			IFA_LOCK(&ia->ia_ifa);
1583 			ip->ip_src.s_addr = htonl(ia->ia_subnet);
1584 			IFA_UNLOCK(&ia->ia_ifa);
1585 		}
1586 	}
1587 	if (ia != NULL) {
1588 		IFA_REMREF(&ia->ia_ifa);
1589 	}
1590 
1591 	IGMP_INET_PRINTF(igmp->igmp_group,
1592 	    ("process v2 report %s on ifp 0x%llx(%s)\n", _igmp_inet_buf,
1593 	    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1594 	os_log_debug(OS_LOG_DEFAULT, "%s: process v2 report on ifp %s",
1595 	    __func__, if_name(ifp));
1596 
1597 	/*
1598 	 * IGMPv2 report suppression.
1599 	 * If we are a member of this group, and our membership should be
1600 	 * reported, and our group timer is pending or about to be reset,
1601 	 * stop our group timer by transitioning to the 'lazy' state.
1602 	 */
1603 	in_multihead_lock_shared();
1604 	IN_LOOKUP_MULTI(&igmp->igmp_group, ifp, inm);
1605 	in_multihead_lock_done();
1606 	if (inm != NULL) {
1607 		struct igmp_ifinfo *igi;
1608 
1609 		INM_LOCK(inm);
1610 		igi = inm->inm_igi;
1611 		VERIFY(igi != NULL);
1612 
1613 		IGMPSTAT_INC(igps_rcv_ourreports);
1614 		OIGMPSTAT_INC(igps_rcv_ourreports);
1615 
1616 		/*
1617 		 * If we are in IGMPv3 host mode, do not allow the
1618 		 * other host's IGMPv1 report to suppress our reports
1619 		 * unless explicitly configured to do so.
1620 		 */
1621 		IGI_LOCK(igi);
1622 		if (igi->igi_version == IGMP_VERSION_3) {
1623 			if (igmp_legacysupp) {
1624 				igmp_v3_suppress_group_record(inm);
1625 			}
1626 			IGI_UNLOCK(igi);
1627 			INM_UNLOCK(inm);
1628 			INM_REMREF(inm);
1629 			return 0;
1630 		}
1631 
1632 		inm->inm_timer = 0;
1633 
1634 		switch (inm->inm_state) {
1635 		case IGMP_NOT_MEMBER:
1636 		case IGMP_SILENT_MEMBER:
1637 		case IGMP_SLEEPING_MEMBER:
1638 			break;
1639 		case IGMP_REPORTING_MEMBER:
1640 		case IGMP_IDLE_MEMBER:
1641 		case IGMP_AWAKENING_MEMBER:
1642 			IGMP_INET_PRINTF(igmp->igmp_group,
1643 			    ("report suppressed for %s on ifp 0x%llx(%s)\n",
1644 			    _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(ifp),
1645 			    if_name(ifp)));
1646 			OS_FALLTHROUGH;
1647 		case IGMP_LAZY_MEMBER:
1648 			inm->inm_state = IGMP_LAZY_MEMBER;
1649 			break;
1650 		case IGMP_G_QUERY_PENDING_MEMBER:
1651 		case IGMP_SG_QUERY_PENDING_MEMBER:
1652 		case IGMP_LEAVING_MEMBER:
1653 			break;
1654 		}
1655 		IGI_UNLOCK(igi);
1656 		INM_UNLOCK(inm);
1657 		INM_REMREF(inm);
1658 	}
1659 
1660 	return 0;
1661 }
1662 
1663 void
igmp_input(struct mbuf * m,int off)1664 igmp_input(struct mbuf *m, int off)
1665 {
1666 	int iphlen;
1667 	struct ifnet *ifp;
1668 	struct igmp *igmp;
1669 	struct ip *ip;
1670 	int igmplen;
1671 	int minlen;
1672 	int queryver;
1673 
1674 	IGMP_PRINTF(("%s: called w/mbuf(0x%llx,%d)\n", __func__,
1675 	    (uint64_t)VM_KERNEL_ADDRPERM(m), off));
1676 
1677 	ifp = m->m_pkthdr.rcvif;
1678 
1679 	IGMPSTAT_INC(igps_rcv_total);
1680 	OIGMPSTAT_INC(igps_rcv_total);
1681 
1682 	/* Expect 32-bit aligned data pointer on strict-align platforms */
1683 	MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
1684 
1685 	ip = mtod(m, struct ip *);
1686 	iphlen = off;
1687 
1688 	/* By now, ip_len no longer contains the length of IP header */
1689 	igmplen = ip->ip_len;
1690 
1691 	/*
1692 	 * Validate lengths.
1693 	 */
1694 	if (igmplen < IGMP_MINLEN) {
1695 		IGMPSTAT_INC(igps_rcv_tooshort);
1696 		OIGMPSTAT_INC(igps_rcv_tooshort);
1697 		m_freem(m);
1698 		return;
1699 	}
1700 
1701 	/*
1702 	 * Always pullup to the minimum size for v1/v2 or v3
1703 	 * to amortize calls to m_pulldown().
1704 	 */
1705 	if (igmplen >= IGMP_V3_QUERY_MINLEN) {
1706 		minlen = IGMP_V3_QUERY_MINLEN;
1707 	} else {
1708 		minlen = IGMP_MINLEN;
1709 	}
1710 
1711 	/* A bit more expensive than M_STRUCT_GET, but ensures alignment */
1712 	M_STRUCT_GET0(igmp, struct igmp *, m, off, minlen);
1713 	if (igmp == NULL) {
1714 		IGMPSTAT_INC(igps_rcv_tooshort);
1715 		OIGMPSTAT_INC(igps_rcv_tooshort);
1716 		return;
1717 	}
1718 	/* N.B.: we assume the packet was correctly aligned in ip_input. */
1719 
1720 	/*
1721 	 * Validate checksum.
1722 	 */
1723 	m->m_data += iphlen;
1724 	m->m_len -= iphlen;
1725 	if (in_cksum(m, igmplen)) {
1726 		IGMPSTAT_INC(igps_rcv_badsum);
1727 		OIGMPSTAT_INC(igps_rcv_badsum);
1728 		m_freem(m);
1729 		return;
1730 	}
1731 	m->m_data -= iphlen;
1732 	m->m_len += iphlen;
1733 
1734 	/*
1735 	 * IGMP control traffic is link-scope, and must have a TTL of 1.
1736 	 * DVMRP traffic (e.g. mrinfo, mtrace) is an exception;
1737 	 * probe packets may come from beyond the LAN.
1738 	 */
1739 	if (igmp->igmp_type != IGMP_DVMRP && ip->ip_ttl != 1) {
1740 		IGMPSTAT_INC(igps_rcv_badttl);
1741 		m_freem(m);
1742 		return;
1743 	}
1744 
1745 	switch (igmp->igmp_type) {
1746 	case IGMP_HOST_MEMBERSHIP_QUERY:
1747 		if (igmplen == IGMP_MINLEN) {
1748 			if (igmp->igmp_code == 0) {
1749 				queryver = IGMP_VERSION_1;
1750 			} else {
1751 				queryver = IGMP_VERSION_2;
1752 			}
1753 		} else if (igmplen >= IGMP_V3_QUERY_MINLEN) {
1754 			queryver = IGMP_VERSION_3;
1755 		} else {
1756 			IGMPSTAT_INC(igps_rcv_tooshort);
1757 			OIGMPSTAT_INC(igps_rcv_tooshort);
1758 			m_freem(m);
1759 			return;
1760 		}
1761 
1762 		OIGMPSTAT_INC(igps_rcv_queries);
1763 
1764 		switch (queryver) {
1765 		case IGMP_VERSION_1:
1766 			IGMPSTAT_INC(igps_rcv_v1v2_queries);
1767 			if (!igmp_v1enable) {
1768 				break;
1769 			}
1770 			if (igmp_input_v1_query(ifp, ip, igmp) != 0) {
1771 				m_freem(m);
1772 				return;
1773 			}
1774 			break;
1775 
1776 		case IGMP_VERSION_2:
1777 			IGMPSTAT_INC(igps_rcv_v1v2_queries);
1778 			if (!igmp_v2enable) {
1779 				break;
1780 			}
1781 			if (igmp_input_v2_query(ifp, ip, igmp) != 0) {
1782 				m_freem(m);
1783 				return;
1784 			}
1785 			break;
1786 
1787 		case IGMP_VERSION_3: {
1788 			struct igmpv3 *igmpv3;
1789 			uint16_t igmpv3len;
1790 			uint16_t srclen;
1791 			int nsrc;
1792 
1793 			IGMPSTAT_INC(igps_rcv_v3_queries);
1794 			igmpv3 = (struct igmpv3 *)igmp;
1795 			/*
1796 			 * Validate length based on source count.
1797 			 */
1798 			nsrc = ntohs(igmpv3->igmp_numsrc);
1799 			/*
1800 			 * The max vaue of nsrc is limited by the
1801 			 * MTU of the network on which the datagram
1802 			 * is received
1803 			 */
1804 			if (nsrc < 0 || nsrc > IGMP_V3_QUERY_MAX_SRCS) {
1805 				IGMPSTAT_INC(igps_rcv_tooshort);
1806 				OIGMPSTAT_INC(igps_rcv_tooshort);
1807 				m_freem(m);
1808 				return;
1809 			}
1810 			srclen = sizeof(struct in_addr) * (uint16_t)nsrc;
1811 			if (igmplen < (IGMP_V3_QUERY_MINLEN + srclen)) {
1812 				IGMPSTAT_INC(igps_rcv_tooshort);
1813 				OIGMPSTAT_INC(igps_rcv_tooshort);
1814 				m_freem(m);
1815 				return;
1816 			}
1817 			igmpv3len = IGMP_V3_QUERY_MINLEN + srclen;
1818 			/*
1819 			 * A bit more expensive than M_STRUCT_GET,
1820 			 * but ensures alignment.
1821 			 */
1822 			M_STRUCT_GET0(igmpv3, struct igmpv3 *, m,
1823 			    off, igmpv3len);
1824 			if (igmpv3 == NULL) {
1825 				IGMPSTAT_INC(igps_rcv_tooshort);
1826 				OIGMPSTAT_INC(igps_rcv_tooshort);
1827 				return;
1828 			}
1829 			/*
1830 			 * N.B.: we assume the packet was correctly
1831 			 * aligned in ip_input.
1832 			 */
1833 			if (igmp_input_v3_query(ifp, ip, igmpv3) != 0) {
1834 				m_freem(m);
1835 				return;
1836 			}
1837 		}
1838 		break;
1839 		}
1840 		break;
1841 
1842 	case IGMP_v1_HOST_MEMBERSHIP_REPORT:
1843 		if (!igmp_v1enable) {
1844 			break;
1845 		}
1846 		if (igmp_input_v1_report(ifp, m, ip, igmp) != 0) {
1847 			m_freem(m);
1848 			return;
1849 		}
1850 		break;
1851 
1852 	case IGMP_v2_HOST_MEMBERSHIP_REPORT:
1853 		if (!igmp_v2enable) {
1854 			break;
1855 		}
1856 		if (!ip_checkrouteralert(m)) {
1857 			IGMPSTAT_INC(igps_rcv_nora);
1858 		}
1859 		if (igmp_input_v2_report(ifp, m, ip, igmp) != 0) {
1860 			m_freem(m);
1861 			return;
1862 		}
1863 		break;
1864 
1865 	case IGMP_v3_HOST_MEMBERSHIP_REPORT:
1866 		/*
1867 		 * Hosts do not need to process IGMPv3 membership reports,
1868 		 * as report suppression is no longer required.
1869 		 */
1870 		if (!ip_checkrouteralert(m)) {
1871 			IGMPSTAT_INC(igps_rcv_nora);
1872 		}
1873 		break;
1874 
1875 	default:
1876 		break;
1877 	}
1878 
1879 	IGMP_LOCK_ASSERT_NOTHELD();
1880 	/*
1881 	 * Pass all valid IGMP packets up to any process(es) listening on a
1882 	 * raw IGMP socket.
1883 	 */
1884 	rip_input(m, off);
1885 }
1886 
1887 /*
1888  * Schedule IGMP timer based on various parameters; caller must ensure that
1889  * lock ordering is maintained as this routine acquires IGMP global lock.
1890  */
1891 void
igmp_set_timeout(struct igmp_tparams * itp)1892 igmp_set_timeout(struct igmp_tparams *itp)
1893 {
1894 	IGMP_LOCK_ASSERT_NOTHELD();
1895 	VERIFY(itp != NULL);
1896 
1897 	if (itp->qpt != 0 || itp->it != 0 || itp->cst != 0 || itp->sct != 0) {
1898 		IGMP_LOCK();
1899 		if (itp->qpt != 0) {
1900 			querier_present_timers_running = 1;
1901 		}
1902 		if (itp->it != 0) {
1903 			interface_timers_running = 1;
1904 		}
1905 		if (itp->cst != 0) {
1906 			current_state_timers_running = 1;
1907 		}
1908 		if (itp->sct != 0) {
1909 			state_change_timers_running = 1;
1910 		}
1911 		if (itp->fast) {
1912 			igmp_sched_fast_timeout();
1913 		} else {
1914 			igmp_sched_timeout();
1915 		}
1916 		IGMP_UNLOCK();
1917 	}
1918 }
1919 
1920 void
igmp_set_fast_timeout(struct igmp_tparams * itp)1921 igmp_set_fast_timeout(struct igmp_tparams *itp)
1922 {
1923 	VERIFY(itp != NULL);
1924 	itp->fast = true;
1925 	igmp_set_timeout(itp);
1926 }
1927 
1928 /*
1929  * IGMP timer handler (per 1 second).
1930  */
1931 static void
igmp_timeout(thread_call_param_t arg0,thread_call_param_t arg1 __unused)1932 igmp_timeout(thread_call_param_t arg0, thread_call_param_t arg1 __unused)
1933 {
1934 	struct ifqueue           scq;   /* State-change packets */
1935 	struct ifqueue           qrq;   /* Query response packets */
1936 	struct ifnet            *ifp;
1937 	struct igmp_ifinfo      *igi;
1938 	struct in_multi         *inm;
1939 	unsigned int             loop = 0, uri_sec = 0;
1940 	SLIST_HEAD(, in_multi)  inm_dthead;
1941 	bool                     fast = arg0 != NULL;
1942 
1943 	SLIST_INIT(&inm_dthead);
1944 
1945 	/*
1946 	 * Update coarse-grained networking timestamp (in sec.); the idea
1947 	 * is to piggy-back on the timeout callout to update the counter
1948 	 * returnable via net_uptime().
1949 	 */
1950 	net_update_uptime();
1951 
1952 	IGMP_LOCK();
1953 
1954 	IGMP_PRINTF(("%s: qpt %d, it %d, cst %d, sct %d, fast %d\n", __func__,
1955 	    querier_present_timers_running, interface_timers_running,
1956 	    current_state_timers_running, state_change_timers_running,
1957 	    fast));
1958 
1959 	if (fast) {
1960 		/*
1961 		 * When running the fast timer, skip processing
1962 		 * of "querier present" timers since they are
1963 		 * based on 1-second intervals.
1964 		 */
1965 		goto skip_query_timers;
1966 	}
1967 	/*
1968 	 * IGMPv1/v2 querier present timer processing.
1969 	 */
1970 	if (querier_present_timers_running) {
1971 		querier_present_timers_running = 0;
1972 		LIST_FOREACH(igi, &igi_head, igi_link) {
1973 			IGI_LOCK(igi);
1974 			igmp_v1v2_process_querier_timers(igi);
1975 			if (igi->igi_v1_timer > 0 || igi->igi_v2_timer > 0) {
1976 				querier_present_timers_running = 1;
1977 			}
1978 			IGI_UNLOCK(igi);
1979 		}
1980 	}
1981 
1982 	/*
1983 	 * IGMPv3 General Query response timer processing.
1984 	 */
1985 	if (interface_timers_running) {
1986 		IGMP_PRINTF(("%s: interface timers running\n", __func__));
1987 		interface_timers_running = 0;
1988 		LIST_FOREACH(igi, &igi_head, igi_link) {
1989 			IGI_LOCK(igi);
1990 			if (igi->igi_version != IGMP_VERSION_3) {
1991 				IGI_UNLOCK(igi);
1992 				continue;
1993 			}
1994 			if (igi->igi_v3_timer == 0) {
1995 				/* Do nothing. */
1996 			} else if (--igi->igi_v3_timer == 0) {
1997 				if (igmp_v3_dispatch_general_query(igi) > 0) {
1998 					interface_timers_running = 1;
1999 				}
2000 			} else {
2001 				interface_timers_running = 1;
2002 			}
2003 			IGI_UNLOCK(igi);
2004 		}
2005 	}
2006 
2007 skip_query_timers:
2008 	if (!current_state_timers_running &&
2009 	    !state_change_timers_running) {
2010 		goto out_locked;
2011 	}
2012 
2013 	current_state_timers_running = 0;
2014 	state_change_timers_running = 0;
2015 
2016 	memset(&qrq, 0, sizeof(struct ifqueue));
2017 	qrq.ifq_maxlen = IGMP_MAX_G_GS_PACKETS;
2018 
2019 	memset(&scq, 0, sizeof(struct ifqueue));
2020 	scq.ifq_maxlen =  IGMP_MAX_STATE_CHANGE_PACKETS;
2021 
2022 	IGMP_PRINTF(("%s: state change timers running\n", __func__));
2023 
2024 	/*
2025 	 * IGMPv1/v2/v3 host report and state-change timer processing.
2026 	 * Note: Processing a v3 group timer may remove a node.
2027 	 */
2028 	LIST_FOREACH(igi, &igi_head, igi_link) {
2029 		struct in_multistep step;
2030 
2031 		IGI_LOCK(igi);
2032 		ifp = igi->igi_ifp;
2033 		loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
2034 		uri_sec = IGMP_RANDOM_DELAY(igi->igi_uri);
2035 		IGI_UNLOCK(igi);
2036 
2037 		in_multihead_lock_shared();
2038 		IN_FIRST_MULTI(step, inm);
2039 		while (inm != NULL) {
2040 			INM_LOCK(inm);
2041 			if (inm->inm_ifp != ifp) {
2042 				goto next;
2043 			}
2044 
2045 			IGI_LOCK(igi);
2046 			switch (igi->igi_version) {
2047 			case IGMP_VERSION_1:
2048 			case IGMP_VERSION_2:
2049 				igmp_v1v2_process_group_timer(inm,
2050 				    igi->igi_version);
2051 				break;
2052 			case IGMP_VERSION_3:
2053 				igmp_v3_process_group_timers(igi, &qrq,
2054 				    &scq, inm, uri_sec);
2055 				break;
2056 			}
2057 			IGI_UNLOCK(igi);
2058 next:
2059 			INM_UNLOCK(inm);
2060 			IN_NEXT_MULTI(step, inm);
2061 		}
2062 		in_multihead_lock_done();
2063 
2064 		IGI_LOCK(igi);
2065 		if (igi->igi_version == IGMP_VERSION_1 ||
2066 		    igi->igi_version == IGMP_VERSION_2) {
2067 			igmp_dispatch_queue(igi, &igi->igi_v2q, 0, loop);
2068 		} else if (igi->igi_version == IGMP_VERSION_3) {
2069 			IGI_UNLOCK(igi);
2070 			igmp_dispatch_queue(NULL, &qrq, 0, loop);
2071 			igmp_dispatch_queue(NULL, &scq, 0, loop);
2072 			VERIFY(qrq.ifq_len == 0);
2073 			VERIFY(scq.ifq_len == 0);
2074 			IGI_LOCK(igi);
2075 		}
2076 		/*
2077 		 * In case there are still any pending membership reports
2078 		 * which didn't get drained at version change time.
2079 		 */
2080 		IF_DRAIN(&igi->igi_v2q);
2081 		/*
2082 		 * Release all deferred inm records, and drain any locally
2083 		 * enqueued packets; do it even if the current IGMP version
2084 		 * for the link is no longer IGMPv3, in order to handle the
2085 		 * version change case.
2086 		 */
2087 		igmp_flush_relq(igi, (struct igmp_inm_relhead *)&inm_dthead);
2088 		IGI_UNLOCK(igi);
2089 
2090 		IF_DRAIN(&qrq);
2091 		IF_DRAIN(&scq);
2092 	}
2093 
2094 out_locked:
2095 	/* re-arm the timer if there's work to do */
2096 	if (fast) {
2097 		igmp_fast_timeout_run = false;
2098 	} else {
2099 		igmp_timeout_run = false;
2100 	}
2101 	igmp_sched_timeout();
2102 	IGMP_UNLOCK();
2103 
2104 	/* Now that we're dropped all locks, release detached records */
2105 	IGMP_REMOVE_DETACHED_INM(&inm_dthead);
2106 }
2107 
2108 static void
igmp_sched_timeout(void)2109 igmp_sched_timeout(void)
2110 {
2111 	static thread_call_t igmp_timeout_tcall;
2112 	uint64_t deadline = 0, leeway = 0;
2113 
2114 	IGMP_LOCK_ASSERT_HELD();
2115 	if (igmp_timeout_tcall == NULL) {
2116 		igmp_timeout_tcall =
2117 		    thread_call_allocate_with_options(igmp_timeout,
2118 		    NULL,
2119 		    THREAD_CALL_PRIORITY_KERNEL,
2120 		    THREAD_CALL_OPTIONS_ONCE);
2121 	}
2122 	if (!igmp_timeout_run &&
2123 	    (querier_present_timers_running || current_state_timers_running ||
2124 	    interface_timers_running || state_change_timers_running)) {
2125 		igmp_timeout_run = true;
2126 		clock_interval_to_deadline(igmp_timeout_delay, NSEC_PER_MSEC,
2127 		    &deadline);
2128 		clock_interval_to_absolutetime_interval(igmp_timeout_leeway,
2129 		    NSEC_PER_MSEC, &leeway);
2130 		thread_call_enter_delayed_with_leeway(igmp_timeout_tcall, NULL,
2131 		    deadline, leeway,
2132 		    THREAD_CALL_DELAY_LEEWAY);
2133 	}
2134 }
2135 
2136 static void
igmp_sched_fast_timeout(void)2137 igmp_sched_fast_timeout(void)
2138 {
2139 	static thread_call_t igmp_fast_timeout_tcall;
2140 
2141 	IGMP_LOCK_ASSERT_HELD();
2142 	if (igmp_fast_timeout_tcall == NULL) {
2143 		igmp_fast_timeout_tcall =
2144 		    thread_call_allocate_with_options(igmp_timeout,
2145 		    igmp_sched_fast_timeout,
2146 		    THREAD_CALL_PRIORITY_KERNEL,
2147 		    THREAD_CALL_OPTIONS_ONCE);
2148 	}
2149 	if (!igmp_fast_timeout_run &&
2150 	    (current_state_timers_running || state_change_timers_running)) {
2151 		igmp_fast_timeout_run = true;
2152 		thread_call_enter(igmp_fast_timeout_tcall);
2153 	}
2154 }
2155 
2156 /*
2157  * Appends an in_multi to the list to be released later.
2158  *
2159  * Caller must be holding igi_lock.
2160  */
2161 static void
igmp_append_relq(struct igmp_ifinfo * igi,struct in_multi * inm)2162 igmp_append_relq(struct igmp_ifinfo *igi, struct in_multi *inm)
2163 {
2164 	IGI_LOCK_ASSERT_HELD(igi);
2165 	if (inm->inm_in_nrele) {
2166 		os_log_debug(OS_LOG_DEFAULT, "%s: inm %llx already on relq ifp %s\n",
2167 		    __func__, (uint64_t)VM_KERNEL_ADDRPERM(inm),
2168 		    if_name(igi->igi_ifp));
2169 		return;
2170 	}
2171 	os_log_debug(OS_LOG_DEFAULT, "%s: adding inm %llx on relq ifp %s\n",
2172 	    __func__, (uint64_t)VM_KERNEL_ADDRPERM(inm),
2173 	    if_name(igi->igi_ifp));
2174 	inm->inm_in_nrele = true;
2175 	SLIST_INSERT_HEAD(&igi->igi_relinmhead, inm, inm_nrele);
2176 }
2177 
2178 /*
2179  * Free the in_multi reference(s) for this IGMP lifecycle.
2180  *
2181  * Caller must be holding igi_lock.
2182  */
2183 static void
igmp_flush_relq(struct igmp_ifinfo * igi,struct igmp_inm_relhead * inm_dthead)2184 igmp_flush_relq(struct igmp_ifinfo *igi, struct igmp_inm_relhead *inm_dthead)
2185 {
2186 	struct in_multi *inm;
2187 	SLIST_HEAD(, in_multi) temp_relinmhead;
2188 
2189 	/*
2190 	 * Before dropping the igi_lock, copy all the items in the
2191 	 * release list to a temporary list to prevent other threads
2192 	 * from changing igi_relinmhead while we are traversing it.
2193 	 */
2194 	IGI_LOCK_ASSERT_HELD(igi);
2195 	SLIST_INIT(&temp_relinmhead);
2196 	while ((inm = SLIST_FIRST(&igi->igi_relinmhead)) != NULL) {
2197 		SLIST_REMOVE_HEAD(&igi->igi_relinmhead, inm_nrele);
2198 		SLIST_INSERT_HEAD(&temp_relinmhead, inm, inm_nrele);
2199 	}
2200 	IGI_UNLOCK(igi);
2201 	in_multihead_lock_exclusive();
2202 	while ((inm = SLIST_FIRST(&temp_relinmhead)) != NULL) {
2203 		int lastref;
2204 
2205 		SLIST_REMOVE_HEAD(&temp_relinmhead, inm_nrele);
2206 		INM_LOCK(inm);
2207 		os_log_debug(OS_LOG_DEFAULT, "%s: flushing %llx on relq ifp %s",
2208 		    __func__,
2209 		    (uint64_t)VM_KERNEL_ADDRPERM(inm),
2210 		    if_name(inm->inm_ifp));
2211 		VERIFY(inm->inm_in_nrele == true);
2212 		inm->inm_in_nrele = false;
2213 		VERIFY(inm->inm_nrelecnt != 0);
2214 		inm->inm_nrelecnt--;
2215 		lastref = in_multi_detach(inm);
2216 		VERIFY(!lastref || (!(inm->inm_debug & IFD_ATTACHED) &&
2217 		    inm->inm_reqcnt == 0));
2218 		INM_UNLOCK(inm);
2219 		/* from igi_relinmhead */
2220 		INM_REMREF(inm);
2221 		/* from in_multihead list */
2222 		if (lastref) {
2223 			/*
2224 			 * Defer releasing our final reference, as we
2225 			 * are holding the IGMP lock at this point, and
2226 			 * we could end up with locking issues later on
2227 			 * (while issuing SIOCDELMULTI) when this is the
2228 			 * final reference count.  Let the caller do it
2229 			 * when it is safe.
2230 			 */
2231 			IGMP_ADD_DETACHED_INM(inm_dthead, inm);
2232 		}
2233 	}
2234 	in_multihead_lock_done();
2235 	IGI_LOCK(igi);
2236 }
2237 
2238 /*
2239  * Update host report group timer for IGMPv1/v2.
2240  * Will update the global pending timer flags.
2241  */
2242 static void
igmp_v1v2_process_group_timer(struct in_multi * inm,const int igmp_version)2243 igmp_v1v2_process_group_timer(struct in_multi *inm, const int igmp_version)
2244 {
2245 	int report_timer_expired;
2246 
2247 	IGMP_LOCK_ASSERT_HELD();
2248 	INM_LOCK_ASSERT_HELD(inm);
2249 	IGI_LOCK_ASSERT_HELD(inm->inm_igi);
2250 
2251 	if (inm->inm_timer == 0) {
2252 		report_timer_expired = 0;
2253 	} else if (--inm->inm_timer == 0) {
2254 		report_timer_expired = 1;
2255 	} else {
2256 		current_state_timers_running = 1;
2257 		/* caller will schedule timer */
2258 		return;
2259 	}
2260 
2261 	switch (inm->inm_state) {
2262 	case IGMP_NOT_MEMBER:
2263 	case IGMP_SILENT_MEMBER:
2264 	case IGMP_IDLE_MEMBER:
2265 	case IGMP_LAZY_MEMBER:
2266 	case IGMP_SLEEPING_MEMBER:
2267 	case IGMP_AWAKENING_MEMBER:
2268 		break;
2269 	case IGMP_REPORTING_MEMBER:
2270 		if (report_timer_expired) {
2271 			inm->inm_state = IGMP_IDLE_MEMBER;
2272 			(void) igmp_v1v2_queue_report(inm,
2273 			    (igmp_version == IGMP_VERSION_2) ?
2274 			    IGMP_v2_HOST_MEMBERSHIP_REPORT :
2275 			    IGMP_v1_HOST_MEMBERSHIP_REPORT);
2276 			INM_LOCK_ASSERT_HELD(inm);
2277 			IGI_LOCK_ASSERT_HELD(inm->inm_igi);
2278 		}
2279 		break;
2280 	case IGMP_G_QUERY_PENDING_MEMBER:
2281 	case IGMP_SG_QUERY_PENDING_MEMBER:
2282 	case IGMP_LEAVING_MEMBER:
2283 		break;
2284 	}
2285 }
2286 
2287 /*
2288  * Update a group's timers for IGMPv3.
2289  * Will update the global pending timer flags.
2290  * Note: Unlocked read from igi.
2291  */
2292 static void
igmp_v3_process_group_timers(struct igmp_ifinfo * igi,struct ifqueue * qrq,struct ifqueue * scq,struct in_multi * inm,const unsigned int uri_sec)2293 igmp_v3_process_group_timers(struct igmp_ifinfo *igi,
2294     struct ifqueue *qrq, struct ifqueue *scq,
2295     struct in_multi *inm, const unsigned int uri_sec)
2296 {
2297 	int query_response_timer_expired;
2298 	int state_change_retransmit_timer_expired;
2299 
2300 	IGMP_LOCK_ASSERT_HELD();
2301 	INM_LOCK_ASSERT_HELD(inm);
2302 	IGI_LOCK_ASSERT_HELD(igi);
2303 	VERIFY(igi == inm->inm_igi);
2304 
2305 	query_response_timer_expired = 0;
2306 	state_change_retransmit_timer_expired = 0;
2307 
2308 	/*
2309 	 * During a transition from v1/v2 compatibility mode back to v3,
2310 	 * a group record in REPORTING state may still have its group
2311 	 * timer active. This is a no-op in this function; it is easier
2312 	 * to deal with it here than to complicate the timeout path.
2313 	 */
2314 	if (inm->inm_timer == 0) {
2315 		query_response_timer_expired = 0;
2316 	} else if (--inm->inm_timer == 0) {
2317 		query_response_timer_expired = 1;
2318 	} else {
2319 		current_state_timers_running = 1;
2320 		/* caller will schedule timer */
2321 	}
2322 
2323 	if (inm->inm_sctimer == 0) {
2324 		state_change_retransmit_timer_expired = 0;
2325 	} else if (--inm->inm_sctimer == 0) {
2326 		state_change_retransmit_timer_expired = 1;
2327 	} else {
2328 		state_change_timers_running = 1;
2329 		/* caller will schedule timer */
2330 	}
2331 
2332 	/* We are in timer callback, so be quick about it. */
2333 	if (!state_change_retransmit_timer_expired &&
2334 	    !query_response_timer_expired) {
2335 		return;
2336 	}
2337 
2338 	switch (inm->inm_state) {
2339 	case IGMP_NOT_MEMBER:
2340 	case IGMP_SILENT_MEMBER:
2341 	case IGMP_SLEEPING_MEMBER:
2342 	case IGMP_LAZY_MEMBER:
2343 	case IGMP_AWAKENING_MEMBER:
2344 	case IGMP_IDLE_MEMBER:
2345 		break;
2346 	case IGMP_G_QUERY_PENDING_MEMBER:
2347 	case IGMP_SG_QUERY_PENDING_MEMBER:
2348 		/*
2349 		 * Respond to a previously pending Group-Specific
2350 		 * or Group-and-Source-Specific query by enqueueing
2351 		 * the appropriate Current-State report for
2352 		 * immediate transmission.
2353 		 */
2354 		if (query_response_timer_expired) {
2355 			int retval;
2356 
2357 			retval = igmp_v3_enqueue_group_record(qrq, inm, 0, 1,
2358 			    (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER));
2359 			IGMP_PRINTF(("%s: enqueue record = %d\n",
2360 			    __func__, retval));
2361 			inm->inm_state = IGMP_REPORTING_MEMBER;
2362 			/* XXX Clear recorded sources for next time. */
2363 			inm_clear_recorded(inm);
2364 		}
2365 		OS_FALLTHROUGH;
2366 	case IGMP_REPORTING_MEMBER:
2367 	case IGMP_LEAVING_MEMBER:
2368 		if (state_change_retransmit_timer_expired) {
2369 			/*
2370 			 * State-change retransmission timer fired.
2371 			 * If there are any further pending retransmissions,
2372 			 * set the global pending state-change flag, and
2373 			 * reset the timer.
2374 			 */
2375 			if (--inm->inm_scrv > 0) {
2376 				inm->inm_sctimer = (uint16_t)uri_sec;
2377 				state_change_timers_running = 1;
2378 				/* caller will schedule timer */
2379 			}
2380 			/*
2381 			 * Retransmit the previously computed state-change
2382 			 * report. If there are no further pending
2383 			 * retransmissions, the mbuf queue will be consumed.
2384 			 * Update T0 state to T1 as we have now sent
2385 			 * a state-change.
2386 			 */
2387 			(void) igmp_v3_merge_state_changes(inm, scq);
2388 
2389 			inm_commit(inm);
2390 			IGMP_INET_PRINTF(inm->inm_addr,
2391 			    ("%s: T1 -> T0 for %s/%s\n", __func__,
2392 			    _igmp_inet_buf, if_name(inm->inm_ifp)));
2393 
2394 			/*
2395 			 * If we are leaving the group for good, make sure
2396 			 * we release IGMP's reference to it.
2397 			 * This release must be deferred using a SLIST,
2398 			 * as we are called from a loop which traverses
2399 			 * the in_multihead list.
2400 			 */
2401 			if (inm->inm_state == IGMP_LEAVING_MEMBER &&
2402 			    inm->inm_scrv == 0) {
2403 				inm->inm_state = IGMP_NOT_MEMBER;
2404 				/*
2405 				 * A reference has already been held in
2406 				 * igmp_final_leave() for this inm, so
2407 				 * no need to hold another one.  We also
2408 				 * bumped up its request count then, so
2409 				 * that it stays in in_multihead.  Both
2410 				 * of them will be released when it is
2411 				 * dequeued later on.
2412 				 */
2413 				VERIFY(inm->inm_nrelecnt != 0);
2414 				igmp_append_relq(igi, inm);
2415 			}
2416 		}
2417 		break;
2418 	}
2419 }
2420 
2421 /*
2422  * Suppress a group's pending response to a group or source/group query.
2423  *
2424  * Do NOT suppress state changes. This leads to IGMPv3 inconsistency.
2425  * Do NOT update ST1/ST0 as this operation merely suppresses
2426  * the currently pending group record.
2427  * Do NOT suppress the response to a general query. It is possible but
2428  * it would require adding another state or flag.
2429  */
2430 static void
igmp_v3_suppress_group_record(struct in_multi * inm)2431 igmp_v3_suppress_group_record(struct in_multi *inm)
2432 {
2433 	INM_LOCK_ASSERT_HELD(inm);
2434 	IGI_LOCK_ASSERT_HELD(inm->inm_igi);
2435 
2436 	VERIFY(inm->inm_igi->igi_version == IGMP_VERSION_3);
2437 
2438 	if (inm->inm_state != IGMP_G_QUERY_PENDING_MEMBER &&
2439 	    inm->inm_state != IGMP_SG_QUERY_PENDING_MEMBER) {
2440 		return;
2441 	}
2442 
2443 	if (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) {
2444 		inm_clear_recorded(inm);
2445 	}
2446 
2447 	inm->inm_timer = 0;
2448 	inm->inm_state = IGMP_REPORTING_MEMBER;
2449 }
2450 
2451 /*
2452  * Switch to a different IGMP version on the given interface,
2453  * as per Section 7.2.1.
2454  */
2455 static uint32_t
igmp_set_version(struct igmp_ifinfo * igi,const int igmp_version)2456 igmp_set_version(struct igmp_ifinfo *igi, const int igmp_version)
2457 {
2458 	int old_version_timer;
2459 
2460 	IGI_LOCK_ASSERT_HELD(igi);
2461 
2462 	os_log(OS_LOG_DEFAULT, "%s: switching to v%d on ifp %s\n", __func__,
2463 	    igmp_version, if_name(igi->igi_ifp));
2464 
2465 	if (igmp_version == IGMP_VERSION_1 || igmp_version == IGMP_VERSION_2) {
2466 		/*
2467 		 * Compute the "Older Version Querier Present" timer as per
2468 		 * Section 8.12, in seconds.
2469 		 */
2470 		old_version_timer = igi->igi_rv * igi->igi_qi + igi->igi_qri;
2471 
2472 		if (igmp_version == IGMP_VERSION_1) {
2473 			igi->igi_v1_timer = old_version_timer;
2474 			igi->igi_v2_timer = 0;
2475 		} else if (igmp_version == IGMP_VERSION_2) {
2476 			igi->igi_v1_timer = 0;
2477 			igi->igi_v2_timer = old_version_timer;
2478 		}
2479 	}
2480 
2481 	if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) {
2482 		if (igi->igi_version != IGMP_VERSION_2) {
2483 			igmp_v3_cancel_link_timers(igi);
2484 			igi->igi_version = IGMP_VERSION_2;
2485 		}
2486 	} else if (igi->igi_v1_timer > 0) {
2487 		if (igi->igi_version != IGMP_VERSION_1) {
2488 			igmp_v3_cancel_link_timers(igi);
2489 			igi->igi_version = IGMP_VERSION_1;
2490 		}
2491 	}
2492 
2493 	IGI_LOCK_ASSERT_HELD(igi);
2494 
2495 	return MAX(igi->igi_v1_timer, igi->igi_v2_timer);
2496 }
2497 
2498 /*
2499  * Cancel pending IGMPv3 timers for the given link and all groups
2500  * joined on it; state-change, general-query, and group-query timers.
2501  *
2502  * Only ever called on a transition from v3 to Compatibility mode. Kill
2503  * the timers stone dead (this may be expensive for large N groups), they
2504  * will be restarted if Compatibility Mode deems that they must be due to
2505  * query processing.
2506  */
2507 static void
igmp_v3_cancel_link_timers(struct igmp_ifinfo * igi)2508 igmp_v3_cancel_link_timers(struct igmp_ifinfo *igi)
2509 {
2510 	struct ifnet            *ifp;
2511 	struct in_multi         *inm;
2512 	struct in_multistep     step;
2513 
2514 	IGI_LOCK_ASSERT_HELD(igi);
2515 
2516 	IGMP_PRINTF(("%s: cancel v3 timers on ifp 0x%llx(%s)\n", __func__,
2517 	    (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp), if_name(igi->igi_ifp)));
2518 
2519 	/*
2520 	 * Stop the v3 General Query Response on this link stone dead.
2521 	 * If timer is woken up due to interface_timers_running,
2522 	 * the flag will be cleared if there are no pending link timers.
2523 	 */
2524 	igi->igi_v3_timer = 0;
2525 
2526 	/*
2527 	 * Now clear the current-state and state-change report timers
2528 	 * for all memberships scoped to this link.
2529 	 */
2530 	ifp = igi->igi_ifp;
2531 	IGI_UNLOCK(igi);
2532 
2533 	in_multihead_lock_shared();
2534 	IN_FIRST_MULTI(step, inm);
2535 	while (inm != NULL) {
2536 		INM_LOCK(inm);
2537 		if (inm->inm_ifp != ifp && inm->inm_igi != igi) {
2538 			goto next;
2539 		}
2540 
2541 		switch (inm->inm_state) {
2542 		case IGMP_NOT_MEMBER:
2543 		case IGMP_SILENT_MEMBER:
2544 		case IGMP_IDLE_MEMBER:
2545 		case IGMP_LAZY_MEMBER:
2546 		case IGMP_SLEEPING_MEMBER:
2547 		case IGMP_AWAKENING_MEMBER:
2548 			/*
2549 			 * These states are either not relevant in v3 mode,
2550 			 * or are unreported. Do nothing.
2551 			 */
2552 			break;
2553 		case IGMP_LEAVING_MEMBER:
2554 			/*
2555 			 * If we are leaving the group and switching to
2556 			 * compatibility mode, we need to release the final
2557 			 * reference held for issuing the INCLUDE {}, and
2558 			 * transition to REPORTING to ensure the host leave
2559 			 * message is sent upstream to the old querier --
2560 			 * transition to NOT would lose the leave and race.
2561 			 * During igmp_final_leave(), we bumped up both the
2562 			 * request and reference counts.  Since we cannot
2563 			 * call in_multi_detach() here, defer this task to
2564 			 * the timer routine.
2565 			 */
2566 			VERIFY(inm->inm_nrelecnt != 0);
2567 			IGI_LOCK(igi);
2568 			igmp_append_relq(igi, inm);
2569 			IGI_UNLOCK(igi);
2570 			OS_FALLTHROUGH;
2571 		case IGMP_G_QUERY_PENDING_MEMBER:
2572 		case IGMP_SG_QUERY_PENDING_MEMBER:
2573 			inm_clear_recorded(inm);
2574 			OS_FALLTHROUGH;
2575 		case IGMP_REPORTING_MEMBER:
2576 			inm->inm_state = IGMP_REPORTING_MEMBER;
2577 			break;
2578 		}
2579 		/*
2580 		 * Always clear state-change and group report timers.
2581 		 * Free any pending IGMPv3 state-change records.
2582 		 */
2583 		inm->inm_sctimer = 0;
2584 		inm->inm_timer = 0;
2585 		IF_DRAIN(&inm->inm_scq);
2586 next:
2587 		INM_UNLOCK(inm);
2588 		IN_NEXT_MULTI(step, inm);
2589 	}
2590 	in_multihead_lock_done();
2591 
2592 	IGI_LOCK(igi);
2593 }
2594 
2595 /*
2596  * Update the Older Version Querier Present timers for a link.
2597  * See Section 7.2.1 of RFC 3376.
2598  */
2599 static void
igmp_v1v2_process_querier_timers(struct igmp_ifinfo * igi)2600 igmp_v1v2_process_querier_timers(struct igmp_ifinfo *igi)
2601 {
2602 	IGI_LOCK_ASSERT_HELD(igi);
2603 
2604 	if (igi->igi_v1_timer == 0 && igi->igi_v2_timer == 0) {
2605 		/*
2606 		 * IGMPv1 and IGMPv2 Querier Present timers expired.
2607 		 *
2608 		 * Revert to IGMPv3.
2609 		 */
2610 		if (igi->igi_version != IGMP_VERSION_3) {
2611 			os_log(OS_LOG_DEFAULT, "%s: transition from v%d->v%d "
2612 			    "on %s\n", __func__,
2613 			    igi->igi_version, IGMP_VERSION_3,
2614 			    if_name(igi->igi_ifp));
2615 			igi->igi_version = IGMP_VERSION_3;
2616 			IF_DRAIN(&igi->igi_v2q);
2617 		}
2618 	} else if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) {
2619 		/*
2620 		 * IGMPv1 Querier Present timer expired,
2621 		 * IGMPv2 Querier Present timer running.
2622 		 * If IGMPv2 was disabled since last timeout,
2623 		 * revert to IGMPv3.
2624 		 * If IGMPv2 is enabled, revert to IGMPv2.
2625 		 */
2626 		if (!igmp_v2enable) {
2627 			os_log(OS_LOG_DEFAULT, "%s: transition from v%d->v%d "
2628 			    "on %s\n", __func__,
2629 			    igi->igi_version, IGMP_VERSION_3,
2630 			    if_name(igi->igi_ifp));
2631 			igi->igi_v2_timer = 0;
2632 			igi->igi_version = IGMP_VERSION_3;
2633 			IF_DRAIN(&igi->igi_v2q);
2634 		} else {
2635 			--igi->igi_v2_timer;
2636 			if (igi->igi_version != IGMP_VERSION_2) {
2637 				os_log(OS_LOG_DEFAULT, "%s: transition from v%d->v%d "
2638 				    "on %s\n", __func__,
2639 				    igi->igi_version, IGMP_VERSION_2,
2640 				    if_name(igi->igi_ifp));
2641 				IF_DRAIN(&igi->igi_gq);
2642 				igmp_v3_cancel_link_timers(igi);
2643 				igi->igi_version = IGMP_VERSION_2;
2644 			}
2645 		}
2646 	} else if (igi->igi_v1_timer > 0) {
2647 		/*
2648 		 * IGMPv1 Querier Present timer running.
2649 		 * Stop IGMPv2 timer if running.
2650 		 *
2651 		 * If IGMPv1 was disabled since last timeout,
2652 		 * revert to IGMPv3.
2653 		 * If IGMPv1 is enabled, reset IGMPv2 timer if running.
2654 		 */
2655 		if (!igmp_v1enable) {
2656 			os_log(OS_LOG_DEFAULT, "%s: transition from v%d->v%d "
2657 			    "on %s\n", __func__,
2658 			    igi->igi_version, IGMP_VERSION_3,
2659 			    if_name(igi->igi_ifp));
2660 			igi->igi_v1_timer = 0;
2661 			igi->igi_version = IGMP_VERSION_3;
2662 			IF_DRAIN(&igi->igi_v2q);
2663 		} else {
2664 			--igi->igi_v1_timer;
2665 		}
2666 		if (igi->igi_v2_timer > 0) {
2667 			IGMP_PRINTF(("%s: cancel v2 timer on 0x%llx(%s)\n",
2668 			    __func__,
2669 			    (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2670 			    if_name(igi->igi_ifp)));
2671 			igi->igi_v2_timer = 0;
2672 		}
2673 	}
2674 }
2675 
2676 /*
2677  * Dispatch an IGMPv1/v2 host report or leave message.
2678  * These are always small enough to fit inside a single mbuf.
2679  */
2680 static int
igmp_v1v2_queue_report(struct in_multi * inm,const int type)2681 igmp_v1v2_queue_report(struct in_multi *inm, const int type)
2682 {
2683 	struct ifnet            *ifp;
2684 	struct igmp             *igmp;
2685 	struct ip               *ip;
2686 	struct mbuf             *m;
2687 	int                     error = 0;
2688 
2689 	INM_LOCK_ASSERT_HELD(inm);
2690 	IGI_LOCK_ASSERT_HELD(inm->inm_igi);
2691 
2692 	ifp = inm->inm_ifp;
2693 
2694 	MGETHDR(m, M_DONTWAIT, MT_DATA);
2695 	if (m == NULL) {
2696 		return ENOMEM;
2697 	}
2698 	MH_ALIGN(m, sizeof(struct ip) + sizeof(struct igmp));
2699 
2700 	m->m_pkthdr.len = sizeof(struct ip) + sizeof(struct igmp);
2701 
2702 	m->m_data += sizeof(struct ip);
2703 	m->m_len = sizeof(struct igmp);
2704 
2705 	igmp = mtod(m, struct igmp *);
2706 	igmp->igmp_type = (u_char)type;
2707 	igmp->igmp_code = 0;
2708 	igmp->igmp_group = inm->inm_addr;
2709 	igmp->igmp_cksum = 0;
2710 	igmp->igmp_cksum = in_cksum(m, sizeof(struct igmp));
2711 
2712 	m->m_data -= sizeof(struct ip);
2713 	m->m_len += sizeof(struct ip);
2714 
2715 	ip = mtod(m, struct ip *);
2716 	ip->ip_tos = 0;
2717 	ip->ip_len = sizeof(struct ip) + sizeof(struct igmp);
2718 	ip->ip_off = 0;
2719 	ip->ip_p = IPPROTO_IGMP;
2720 	ip->ip_src.s_addr = INADDR_ANY;
2721 
2722 	if (type == IGMP_HOST_LEAVE_MESSAGE) {
2723 		ip->ip_dst.s_addr = htonl(INADDR_ALLRTRS_GROUP);
2724 	} else {
2725 		ip->ip_dst = inm->inm_addr;
2726 	}
2727 
2728 	igmp_save_context(m, ifp);
2729 
2730 	m->m_flags |= M_IGMPV2;
2731 	if (inm->inm_igi->igi_flags & IGIF_LOOPBACK) {
2732 		m->m_flags |= M_IGMP_LOOP;
2733 	}
2734 
2735 	/*
2736 	 * Due to the fact that at this point we are possibly holding
2737 	 * in_multihead_lock in shared or exclusive mode, we can't call
2738 	 * igmp_sendpkt() here since that will eventually call ip_output(),
2739 	 * which will try to lock in_multihead_lock and cause a deadlock.
2740 	 * Instead we defer the work to the igmp_timeout() thread, thus
2741 	 * avoiding unlocking in_multihead_lock here.
2742 	 */
2743 	if (IF_QFULL(&inm->inm_igi->igi_v2q)) {
2744 		os_log_error(OS_LOG_DEFAULT,
2745 		    "%s: v1 / v2 outbound queue full on %s\n",
2746 		    __func__, if_name(ifp));
2747 		error = ENOMEM;
2748 		m_freem(m);
2749 	} else {
2750 		IF_ENQUEUE(&inm->inm_igi->igi_v2q, m);
2751 		VERIFY(error == 0);
2752 	}
2753 	return error;
2754 }
2755 
2756 /*
2757  * Process a state change from the upper layer for the given IPv4 group.
2758  *
2759  * Each socket holds a reference on the in_multi in its own ip_moptions.
2760  * The socket layer will have made the necessary updates to the group
2761  * state, it is now up to IGMP to issue a state change report if there
2762  * has been any change between T0 (when the last state-change was issued)
2763  * and T1 (now).
2764  *
2765  * We use the IGMPv3 state machine at group level. The IGMP module
2766  * however makes the decision as to which IGMP protocol version to speak.
2767  * A state change *from* INCLUDE {} always means an initial join.
2768  * A state change *to* INCLUDE {} always means a final leave.
2769  *
2770  * FUTURE: If IGIF_V3LITE is enabled for this interface, then we can
2771  * save ourselves a bunch of work; any exclusive mode groups need not
2772  * compute source filter lists.
2773  */
2774 int
igmp_change_state(struct in_multi * inm,struct igmp_tparams * itp)2775 igmp_change_state(struct in_multi *inm, struct igmp_tparams *itp)
2776 {
2777 	struct igmp_ifinfo *igi;
2778 	struct ifnet *ifp;
2779 	int error = 0;
2780 
2781 	VERIFY(itp != NULL);
2782 	bzero(itp, sizeof(*itp));
2783 
2784 	INM_LOCK_ASSERT_HELD(inm);
2785 	VERIFY(inm->inm_igi != NULL);
2786 	IGI_LOCK_ASSERT_NOTHELD(inm->inm_igi);
2787 
2788 	/*
2789 	 * Try to detect if the upper layer just asked us to change state
2790 	 * for an interface which has now gone away.
2791 	 */
2792 	VERIFY(inm->inm_ifma != NULL);
2793 	ifp = inm->inm_ifma->ifma_ifp;
2794 	/*
2795 	 * Sanity check that netinet's notion of ifp is the same as net's.
2796 	 */
2797 	VERIFY(inm->inm_ifp == ifp);
2798 
2799 	igi = IGMP_IFINFO(ifp);
2800 	VERIFY(igi != NULL);
2801 
2802 	/*
2803 	 * If we detect a state transition to or from MCAST_UNDEFINED
2804 	 * for this group, then we are starting or finishing an IGMP
2805 	 * life cycle for this group.
2806 	 */
2807 	if (inm->inm_st[1].iss_fmode != inm->inm_st[0].iss_fmode) {
2808 		IGMP_PRINTF(("%s: inm transition %d -> %d\n", __func__,
2809 		    inm->inm_st[0].iss_fmode, inm->inm_st[1].iss_fmode));
2810 		if (inm->inm_st[0].iss_fmode == MCAST_UNDEFINED) {
2811 			IGMP_PRINTF(("%s: initial join\n", __func__));
2812 			error = igmp_initial_join(inm, igi, itp);
2813 			goto out;
2814 		} else if (inm->inm_st[1].iss_fmode == MCAST_UNDEFINED) {
2815 			IGMP_PRINTF(("%s: final leave\n", __func__));
2816 			igmp_final_leave(inm, igi, itp);
2817 			goto out;
2818 		}
2819 	} else {
2820 		IGMP_PRINTF(("%s: filter set change\n", __func__));
2821 	}
2822 
2823 	error = igmp_handle_state_change(inm, igi, itp);
2824 out:
2825 	return error;
2826 }
2827 
2828 /*
2829  * Perform the initial join for an IGMP group.
2830  *
2831  * When joining a group:
2832  *  If the group should have its IGMP traffic suppressed, do nothing.
2833  *  IGMPv1 starts sending IGMPv1 host membership reports.
2834  *  IGMPv2 starts sending IGMPv2 host membership reports.
2835  *  IGMPv3 will schedule an IGMPv3 state-change report containing the
2836  *  initial state of the membership.
2837  */
2838 static int
igmp_initial_join(struct in_multi * inm,struct igmp_ifinfo * igi,struct igmp_tparams * itp)2839 igmp_initial_join(struct in_multi *inm, struct igmp_ifinfo *igi,
2840     struct igmp_tparams *itp)
2841 {
2842 	struct ifnet            *ifp;
2843 	struct ifqueue          *ifq;
2844 	int                      error, retval, syncstates;
2845 
2846 	INM_LOCK_ASSERT_HELD(inm);
2847 	IGI_LOCK_ASSERT_NOTHELD(igi);
2848 	VERIFY(itp != NULL);
2849 
2850 	IGMP_INET_PRINTF(inm->inm_addr,
2851 	    ("%s: initial join %s on ifp 0x%llx(%s)\n", __func__,
2852 	    _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(inm->inm_ifp),
2853 	    if_name(inm->inm_ifp)));
2854 
2855 	error = 0;
2856 	syncstates = 1;
2857 
2858 	ifp = inm->inm_ifp;
2859 
2860 	IGI_LOCK(igi);
2861 	VERIFY(igi->igi_ifp == ifp);
2862 
2863 	/*
2864 	 * Groups joined on loopback or marked as 'not reported',
2865 	 * e.g. 224.0.0.1, enter the IGMP_SILENT_MEMBER state and
2866 	 * are never reported in any IGMP protocol exchanges.
2867 	 * All other groups enter the appropriate IGMP state machine
2868 	 * for the version in use on this link.
2869 	 * A link marked as IGIF_SILENT causes IGMP to be completely
2870 	 * disabled for the link.
2871 	 */
2872 	if ((ifp->if_flags & IFF_LOOPBACK) ||
2873 	    (igi->igi_flags & IGIF_SILENT) ||
2874 	    !igmp_isgroupreported(inm->inm_addr)) {
2875 		IGMP_PRINTF(("%s: not kicking state machine for silent group\n",
2876 		    __func__));
2877 		inm->inm_state = IGMP_SILENT_MEMBER;
2878 		inm->inm_timer = 0;
2879 	} else {
2880 		/*
2881 		 * Deal with overlapping in_multi lifecycle.
2882 		 * If this group was LEAVING, then make sure
2883 		 * we drop the reference we picked up to keep the
2884 		 * group around for the final INCLUDE {} enqueue.
2885 		 * Since we cannot call in_multi_detach() here,
2886 		 * defer this task to the timer routine.
2887 		 */
2888 		if (igi->igi_version == IGMP_VERSION_3 &&
2889 		    inm->inm_state == IGMP_LEAVING_MEMBER) {
2890 			VERIFY(inm->inm_nrelecnt != 0);
2891 			igmp_append_relq(igi, inm);
2892 		}
2893 
2894 		inm->inm_state = IGMP_REPORTING_MEMBER;
2895 
2896 		switch (igi->igi_version) {
2897 		case IGMP_VERSION_1:
2898 		case IGMP_VERSION_2:
2899 			inm->inm_state = IGMP_IDLE_MEMBER;
2900 			error = igmp_v1v2_queue_report(inm,
2901 			    (igi->igi_version == IGMP_VERSION_2) ?
2902 			    IGMP_v2_HOST_MEMBERSHIP_REPORT :
2903 			    IGMP_v1_HOST_MEMBERSHIP_REPORT);
2904 
2905 			INM_LOCK_ASSERT_HELD(inm);
2906 			IGI_LOCK_ASSERT_HELD(igi);
2907 
2908 			if (error == 0) {
2909 				inm->inm_timer =
2910 				    IGMP_RANDOM_DELAY(IGMP_V1V2_MAX_RI);
2911 				itp->cst = 1;
2912 			}
2913 			break;
2914 
2915 		case IGMP_VERSION_3:
2916 			/*
2917 			 * Defer update of T0 to T1, until the first copy
2918 			 * of the state change has been transmitted.
2919 			 */
2920 			syncstates = 0;
2921 
2922 			/*
2923 			 * Immediately enqueue a State-Change Report for
2924 			 * this interface, freeing any previous reports.
2925 			 * Don't kick the timers if there is nothing to do,
2926 			 * or if an error occurred.
2927 			 */
2928 			ifq = &inm->inm_scq;
2929 			IF_DRAIN(ifq);
2930 			retval = igmp_v3_enqueue_group_record(ifq, inm, 1,
2931 			    0, 0);
2932 			itp->cst = (ifq->ifq_len > 0);
2933 			IGMP_PRINTF(("%s: enqueue record = %d\n",
2934 			    __func__, retval));
2935 			if (retval <= 0) {
2936 				error = retval * -1;
2937 				break;
2938 			}
2939 
2940 			/*
2941 			 * Schedule transmission of pending state-change
2942 			 * report up to RV times for this link. The timer
2943 			 * will fire at the next igmp_timeout (1 second),
2944 			 * giving us an opportunity to merge the reports.
2945 			 */
2946 			if (igi->igi_flags & IGIF_LOOPBACK) {
2947 				inm->inm_scrv = 1;
2948 			} else {
2949 				VERIFY(igi->igi_rv > 1);
2950 				inm->inm_scrv = (uint16_t)igi->igi_rv;
2951 			}
2952 			inm->inm_sctimer = 1;
2953 			itp->sct = 1;
2954 
2955 			error = 0;
2956 			break;
2957 		}
2958 	}
2959 	IGI_UNLOCK(igi);
2960 
2961 	/*
2962 	 * Only update the T0 state if state change is atomic,
2963 	 * i.e. we don't need to wait for a timer to fire before we
2964 	 * can consider the state change to have been communicated.
2965 	 */
2966 	if (syncstates) {
2967 		inm_commit(inm);
2968 		IGMP_INET_PRINTF(inm->inm_addr,
2969 		    ("%s: T1->T0 for %s / %s\n", __func__,
2970 		    _igmp_inet_buf, if_name(inm->inm_ifp)));
2971 	}
2972 
2973 	return error;
2974 }
2975 
2976 /*
2977  * Issue an intermediate state change during the IGMP life-cycle.
2978  */
2979 static int
igmp_handle_state_change(struct in_multi * inm,struct igmp_ifinfo * igi,struct igmp_tparams * itp)2980 igmp_handle_state_change(struct in_multi *inm, struct igmp_ifinfo *igi,
2981     struct igmp_tparams *itp)
2982 {
2983 	struct ifnet            *ifp;
2984 	int                      retval = 0;
2985 
2986 	INM_LOCK_ASSERT_HELD(inm);
2987 	IGI_LOCK_ASSERT_NOTHELD(igi);
2988 	VERIFY(itp != NULL);
2989 
2990 	IGMP_INET_PRINTF(inm->inm_addr,
2991 	    ("%s: state change for %s on ifp 0x%llx(%s)\n", __func__,
2992 	    _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(inm->inm_ifp),
2993 	    if_name(inm->inm_ifp)));
2994 
2995 	ifp = inm->inm_ifp;
2996 
2997 	IGI_LOCK(igi);
2998 	VERIFY(igi->igi_ifp == ifp);
2999 
3000 	if ((ifp->if_flags & IFF_LOOPBACK) ||
3001 	    (igi->igi_flags & IGIF_SILENT) ||
3002 	    !igmp_isgroupreported(inm->inm_addr) ||
3003 	    (igi->igi_version != IGMP_VERSION_3)) {
3004 		IGI_UNLOCK(igi);
3005 		if (!igmp_isgroupreported(inm->inm_addr)) {
3006 			IGMP_PRINTF(("%s: not kicking state "
3007 			    "machine for silent group\n", __func__));
3008 		}
3009 		IGMP_PRINTF(("%s: nothing to do \n", __func__));
3010 		inm_commit(inm);
3011 		IGMP_INET_PRINTF(inm->inm_addr,
3012 		    ("%s: T1 -> T0 for %s/%s\n", __func__,
3013 		    _igmp_inet_buf, inm->inm_ifp->if_name));
3014 		goto done;
3015 	}
3016 
3017 	IF_DRAIN(&inm->inm_scq);
3018 
3019 	retval = igmp_v3_enqueue_group_record(&inm->inm_scq, inm, 1, 0, 0);
3020 	itp->cst = (inm->inm_scq.ifq_len > 0);
3021 	IGMP_PRINTF(("%s: enqueue record = %d\n", __func__, retval));
3022 	if (retval <= 0) {
3023 		IGI_UNLOCK(igi);
3024 		retval *= -1;
3025 		goto done;
3026 	}
3027 	/*
3028 	 * If record(s) were enqueued, start the state-change
3029 	 * report timer for this group.
3030 	 */
3031 	inm->inm_scrv = ((igi->igi_flags & IGIF_LOOPBACK) ? 1 : (uint16_t)igi->igi_rv);
3032 	inm->inm_sctimer = 1;
3033 	itp->sct = 1;
3034 	IGI_UNLOCK(igi);
3035 done:
3036 	return retval;
3037 }
3038 
3039 /*
3040  * Perform the final leave for an IGMP group.
3041  *
3042  * When leaving a group:
3043  *  IGMPv1 does nothing.
3044  *  IGMPv2 sends a host leave message, if and only if we are the reporter.
3045  *  IGMPv3 enqueues a state-change report containing a transition
3046  *  to INCLUDE {} for immediate transmission.
3047  */
3048 static void
igmp_final_leave(struct in_multi * inm,struct igmp_ifinfo * igi,struct igmp_tparams * itp)3049 igmp_final_leave(struct in_multi *inm, struct igmp_ifinfo *igi,
3050     struct igmp_tparams *itp)
3051 {
3052 	int syncstates = 1;
3053 	bool retried_already = false;
3054 
3055 	INM_LOCK_ASSERT_HELD(inm);
3056 	IGI_LOCK_ASSERT_NOTHELD(igi);
3057 	VERIFY(itp != NULL);
3058 
3059 	IGMP_INET_PRINTF(inm->inm_addr,
3060 	    ("%s: final leave %s on ifp 0x%llx(%s)\n", __func__,
3061 	    _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(inm->inm_ifp),
3062 	    if_name(inm->inm_ifp)));
3063 
3064 retry:
3065 	switch (inm->inm_state) {
3066 	case IGMP_NOT_MEMBER:
3067 	case IGMP_SILENT_MEMBER:
3068 	case IGMP_LEAVING_MEMBER:
3069 		/* Already leaving or left; do nothing. */
3070 		IGMP_PRINTF(("%s: not kicking state machine for silent group\n",
3071 		    __func__));
3072 		break;
3073 	case IGMP_REPORTING_MEMBER:
3074 	case IGMP_IDLE_MEMBER:
3075 	case IGMP_G_QUERY_PENDING_MEMBER:
3076 	case IGMP_SG_QUERY_PENDING_MEMBER:
3077 		IGI_LOCK(igi);
3078 		if (igi->igi_version == IGMP_VERSION_2) {
3079 			if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER ||
3080 			    inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) {
3081 				/*
3082 				 * We may be in the process of downgrading to
3083 				 * IGMPv2 but because we just grabbed the
3084 				 * igi_lock we may have lost the race.
3085 				 */
3086 				if (!retried_already) {
3087 					IGI_UNLOCK(igi);
3088 					retried_already = true;
3089 					goto retry;
3090 				} else {
3091 					/*
3092 					 * Proceed with leaving the group
3093 					 * as if it were IGMPv2 even though we
3094 					 * may have an inconsistent multicast state.
3095 					 */
3096 				}
3097 			}
3098 			/* scheduler timer if enqueue is successful */
3099 			itp->cst = (igmp_v1v2_queue_report(inm,
3100 			    IGMP_HOST_LEAVE_MESSAGE) == 0);
3101 
3102 			INM_LOCK_ASSERT_HELD(inm);
3103 			IGI_LOCK_ASSERT_HELD(igi);
3104 
3105 			inm->inm_state = IGMP_NOT_MEMBER;
3106 		} else if (igi->igi_version == IGMP_VERSION_3) {
3107 			/*
3108 			 * Stop group timer and all pending reports.
3109 			 * Immediately enqueue a state-change report
3110 			 * TO_IN {} to be sent on the next timeout,
3111 			 * giving us an opportunity to merge reports.
3112 			 */
3113 			IF_DRAIN(&inm->inm_scq);
3114 			inm->inm_timer = 0;
3115 			if (igi->igi_flags & IGIF_LOOPBACK) {
3116 				inm->inm_scrv = 1;
3117 			} else {
3118 				inm->inm_scrv = (uint16_t)igi->igi_rv;
3119 			}
3120 			IGMP_INET_PRINTF(inm->inm_addr,
3121 			    ("%s: Leaving %s/%s with %d "
3122 			    "pending retransmissions.\n", __func__,
3123 			    _igmp_inet_buf, if_name(inm->inm_ifp),
3124 			    inm->inm_scrv));
3125 			if (inm->inm_scrv == 0) {
3126 				inm->inm_state = IGMP_NOT_MEMBER;
3127 				inm->inm_sctimer = 0;
3128 			} else {
3129 				int retval;
3130 				/*
3131 				 * Stick around in the in_multihead list;
3132 				 * the final detach will be issued by
3133 				 * igmp_v3_process_group_timers() when
3134 				 * the retransmit timer expires.
3135 				 */
3136 				INM_ADDREF_LOCKED(inm);
3137 				VERIFY(inm->inm_debug & IFD_ATTACHED);
3138 				inm->inm_reqcnt++;
3139 				VERIFY(inm->inm_reqcnt >= 1);
3140 				inm->inm_nrelecnt++;
3141 				VERIFY(inm->inm_nrelecnt != 0);
3142 
3143 				retval = igmp_v3_enqueue_group_record(
3144 					&inm->inm_scq, inm, 1, 0, 0);
3145 				itp->cst = (inm->inm_scq.ifq_len > 0);
3146 				KASSERT(retval != 0,
3147 				    ("%s: enqueue record = %d\n", __func__,
3148 				    retval));
3149 
3150 				inm->inm_state = IGMP_LEAVING_MEMBER;
3151 				inm->inm_sctimer = 1;
3152 				itp->sct = 1;
3153 				syncstates = 0;
3154 			}
3155 		}
3156 		IGI_UNLOCK(igi);
3157 		break;
3158 	case IGMP_LAZY_MEMBER:
3159 	case IGMP_SLEEPING_MEMBER:
3160 	case IGMP_AWAKENING_MEMBER:
3161 		/* Our reports are suppressed; do nothing. */
3162 		break;
3163 	}
3164 
3165 	if (syncstates) {
3166 		inm_commit(inm);
3167 		IGMP_INET_PRINTF(inm->inm_addr,
3168 		    ("%s: T1 -> T0 for %s/%s\n", __func__,
3169 		    _igmp_inet_buf, if_name(inm->inm_ifp)));
3170 		inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
3171 		IGMP_INET_PRINTF(inm->inm_addr,
3172 		    ("%s: T1 now MCAST_UNDEFINED for %s/%s\n",
3173 		    __func__, _igmp_inet_buf, if_name(inm->inm_ifp)));
3174 	}
3175 }
3176 
3177 /*
3178  * Enqueue an IGMPv3 group record to the given output queue.
3179  *
3180  * XXX This function could do with having the allocation code
3181  * split out, and the multiple-tree-walks coalesced into a single
3182  * routine as has been done in igmp_v3_enqueue_filter_change().
3183  *
3184  * If is_state_change is zero, a current-state record is appended.
3185  * If is_state_change is non-zero, a state-change report is appended.
3186  *
3187  * If is_group_query is non-zero, an mbuf packet chain is allocated.
3188  * If is_group_query is zero, and if there is a packet with free space
3189  * at the tail of the queue, it will be appended to providing there
3190  * is enough free space.
3191  * Otherwise a new mbuf packet chain is allocated.
3192  *
3193  * If is_source_query is non-zero, each source is checked to see if
3194  * it was recorded for a Group-Source query, and will be omitted if
3195  * it is not both in-mode and recorded.
3196  *
3197  * The function will attempt to allocate leading space in the packet
3198  * for the IP/IGMP header to be prepended without fragmenting the chain.
3199  *
3200  * If successful the size of all data appended to the queue is returned,
3201  * otherwise an error code less than zero is returned, or zero if
3202  * no record(s) were appended.
3203  */
3204 static int
igmp_v3_enqueue_group_record(struct ifqueue * ifq,struct in_multi * inm,const int is_state_change,const int is_group_query,const int is_source_query)3205 igmp_v3_enqueue_group_record(struct ifqueue *ifq, struct in_multi *inm,
3206     const int is_state_change, const int is_group_query,
3207     const int is_source_query)
3208 {
3209 	struct igmp_grouprec     ig;
3210 	struct igmp_grouprec    *pig;
3211 	struct ifnet            *ifp;
3212 	struct ip_msource       *ims, *nims;
3213 	struct mbuf             *m0, *m, *md;
3214 	int                      error, is_filter_list_change;
3215 	int                      minrec0len, m0srcs, nbytes, off;
3216 	uint16_t                 msrcs;
3217 	int                      record_has_sources;
3218 	int                      now;
3219 	int                      type;
3220 	in_addr_t                naddr;
3221 	uint16_t                 mode;
3222 	u_int16_t                ig_numsrc;
3223 
3224 	INM_LOCK_ASSERT_HELD(inm);
3225 	IGI_LOCK_ASSERT_HELD(inm->inm_igi);
3226 
3227 	error = 0;
3228 	ifp = inm->inm_ifp;
3229 	is_filter_list_change = 0;
3230 	m = NULL;
3231 	m0 = NULL;
3232 	m0srcs = 0;
3233 	msrcs = 0;
3234 	nbytes = 0;
3235 	nims = NULL;
3236 	record_has_sources = 1;
3237 	pig = NULL;
3238 	type = IGMP_DO_NOTHING;
3239 	mode = inm->inm_st[1].iss_fmode;
3240 
3241 	/*
3242 	 * If we did not transition out of ASM mode during t0->t1,
3243 	 * and there are no source nodes to process, we can skip
3244 	 * the generation of source records.
3245 	 */
3246 	if (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0 &&
3247 	    inm->inm_nsrc == 0) {
3248 		record_has_sources = 0;
3249 	}
3250 
3251 	if (is_state_change) {
3252 		/*
3253 		 * Queue a state change record.
3254 		 * If the mode did not change, and there are non-ASM
3255 		 * listeners or source filters present,
3256 		 * we potentially need to issue two records for the group.
3257 		 * If we are transitioning to MCAST_UNDEFINED, we need
3258 		 * not send any sources.
3259 		 * If there are ASM listeners, and there was no filter
3260 		 * mode transition of any kind, do nothing.
3261 		 */
3262 		if (mode != inm->inm_st[0].iss_fmode) {
3263 			if (mode == MCAST_EXCLUDE) {
3264 				IGMP_PRINTF(("%s: change to EXCLUDE\n",
3265 				    __func__));
3266 				type = IGMP_CHANGE_TO_EXCLUDE_MODE;
3267 			} else {
3268 				IGMP_PRINTF(("%s: change to INCLUDE\n",
3269 				    __func__));
3270 				type = IGMP_CHANGE_TO_INCLUDE_MODE;
3271 				if (mode == MCAST_UNDEFINED) {
3272 					record_has_sources = 0;
3273 				}
3274 			}
3275 		} else {
3276 			if (record_has_sources) {
3277 				is_filter_list_change = 1;
3278 			} else {
3279 				type = IGMP_DO_NOTHING;
3280 			}
3281 		}
3282 	} else {
3283 		/*
3284 		 * Queue a current state record.
3285 		 */
3286 		if (mode == MCAST_EXCLUDE) {
3287 			type = IGMP_MODE_IS_EXCLUDE;
3288 		} else if (mode == MCAST_INCLUDE) {
3289 			type = IGMP_MODE_IS_INCLUDE;
3290 			VERIFY(inm->inm_st[1].iss_asm == 0);
3291 		}
3292 	}
3293 
3294 	/*
3295 	 * Generate the filter list changes using a separate function.
3296 	 */
3297 	if (is_filter_list_change) {
3298 		return igmp_v3_enqueue_filter_change(ifq, inm);
3299 	}
3300 
3301 	if (type == IGMP_DO_NOTHING) {
3302 		IGMP_INET_PRINTF(inm->inm_addr,
3303 		    ("%s: nothing to do for %s/%s\n",
3304 		    __func__, _igmp_inet_buf,
3305 		    if_name(inm->inm_ifp)));
3306 		return 0;
3307 	}
3308 
3309 	/*
3310 	 * If any sources are present, we must be able to fit at least
3311 	 * one in the trailing space of the tail packet's mbuf,
3312 	 * ideally more.
3313 	 */
3314 	minrec0len = sizeof(struct igmp_grouprec);
3315 	if (record_has_sources) {
3316 		minrec0len += sizeof(in_addr_t);
3317 	}
3318 
3319 	IGMP_INET_PRINTF(inm->inm_addr,
3320 	    ("%s: queueing %s for %s/%s\n", __func__,
3321 	    igmp_rec_type_to_str(type), _igmp_inet_buf,
3322 	    if_name(inm->inm_ifp)));
3323 
3324 	/*
3325 	 * Check if we have a packet in the tail of the queue for this
3326 	 * group into which the first group record for this group will fit.
3327 	 * Otherwise allocate a new packet.
3328 	 * Always allocate leading space for IP+RA_OPT+IGMP+REPORT.
3329 	 * Note: Group records for G/GSR query responses MUST be sent
3330 	 * in their own packet.
3331 	 */
3332 	m0 = ifq->ifq_tail;
3333 	if (!is_group_query &&
3334 	    m0 != NULL &&
3335 	    (m0->m_pkthdr.vt_nrecs + 1 <= IGMP_V3_REPORT_MAXRECS) &&
3336 	    (m0->m_pkthdr.len + minrec0len) <
3337 	    (ifp->if_mtu - IGMP_LEADINGSPACE)) {
3338 		m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
3339 		    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
3340 		m = m0;
3341 		IGMP_PRINTF(("%s: use existing packet\n", __func__));
3342 	} else {
3343 		if (IF_QFULL(ifq)) {
3344 			os_log_error(OS_LOG_DEFAULT,
3345 			    "%s: outbound queue full on %s\n", __func__, if_name(ifp));
3346 			return -ENOMEM;
3347 		}
3348 		m = NULL;
3349 		m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
3350 		    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
3351 		if (!is_state_change && !is_group_query) {
3352 			m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3353 			if (m) {
3354 				m->m_data += IGMP_LEADINGSPACE;
3355 			}
3356 		}
3357 		if (m == NULL) {
3358 			m = m_gethdr(M_DONTWAIT, MT_DATA);
3359 			if (m) {
3360 				MH_ALIGN(m, IGMP_LEADINGSPACE);
3361 			}
3362 		}
3363 		if (m == NULL) {
3364 			return -ENOMEM;
3365 		}
3366 
3367 		igmp_save_context(m, ifp);
3368 
3369 		IGMP_PRINTF(("%s: allocated first packet\n", __func__));
3370 	}
3371 
3372 	/*
3373 	 * Append group record.
3374 	 * If we have sources, we don't know how many yet.
3375 	 */
3376 	ig.ig_type = (u_char)type;
3377 	ig.ig_datalen = 0;
3378 	ig.ig_numsrc = 0;
3379 	ig.ig_group = inm->inm_addr;
3380 	if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) {
3381 		if (m != m0) {
3382 			m_freem(m);
3383 		}
3384 		os_log_error(OS_LOG_DEFAULT, "%s: m_append() failed\n", __func__);
3385 		return -ENOMEM;
3386 	}
3387 	nbytes += sizeof(struct igmp_grouprec);
3388 
3389 	/*
3390 	 * Append as many sources as will fit in the first packet.
3391 	 * If we are appending to a new packet, the chain allocation
3392 	 * may potentially use clusters; use m_getptr() in this case.
3393 	 * If we are appending to an existing packet, we need to obtain
3394 	 * a pointer to the group record after m_append(), in case a new
3395 	 * mbuf was allocated.
3396 	 * Only append sources which are in-mode at t1. If we are
3397 	 * transitioning to MCAST_UNDEFINED state on the group, do not
3398 	 * include source entries.
3399 	 * Only report recorded sources in our filter set when responding
3400 	 * to a group-source query.
3401 	 */
3402 	if (record_has_sources) {
3403 		if (m == m0) {
3404 			md = m_last(m);
3405 			pig = (struct igmp_grouprec *)(void *)
3406 			    (mtod(md, uint8_t *) + md->m_len - nbytes);
3407 		} else {
3408 			md = m_getptr(m, 0, &off);
3409 			pig = (struct igmp_grouprec *)(void *)
3410 			    (mtod(md, uint8_t *) + off);
3411 		}
3412 		msrcs = 0;
3413 		RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, nims) {
3414 #ifdef IGMP_DEBUG
3415 			char buf[MAX_IPv4_STR_LEN];
3416 
3417 			inet_ntop_haddr(ims->ims_haddr, buf, sizeof(buf));
3418 			IGMP_PRINTF(("%s: visit node %s\n", __func__, buf));
3419 #endif
3420 			now = ims_get_mode(inm, ims, 1);
3421 			IGMP_PRINTF(("%s: node is %d\n", __func__, now));
3422 			if ((now != mode) ||
3423 			    (now == mode && mode == MCAST_UNDEFINED)) {
3424 				IGMP_PRINTF(("%s: skip node\n", __func__));
3425 				continue;
3426 			}
3427 			if (is_source_query && ims->ims_stp == 0) {
3428 				IGMP_PRINTF(("%s: skip unrecorded node\n",
3429 				    __func__));
3430 				continue;
3431 			}
3432 			IGMP_PRINTF(("%s: append node\n", __func__));
3433 			naddr = htonl(ims->ims_haddr);
3434 			if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) {
3435 				if (m != m0) {
3436 					m_freem(m);
3437 				}
3438 				os_log_error(OS_LOG_DEFAULT, "%s: m_append() failed\n",
3439 				    __func__);
3440 				return -ENOMEM;
3441 			}
3442 			nbytes += sizeof(in_addr_t);
3443 			++msrcs;
3444 			if (msrcs == m0srcs) {
3445 				break;
3446 			}
3447 		}
3448 		IGMP_PRINTF(("%s: msrcs is %d this packet\n", __func__,
3449 		    msrcs));
3450 		ig_numsrc = htons(msrcs);
3451 		bcopy(&ig_numsrc, &pig->ig_numsrc, sizeof(ig_numsrc));
3452 		nbytes += (msrcs * sizeof(in_addr_t));
3453 	}
3454 
3455 	if (is_source_query && msrcs == 0) {
3456 		IGMP_PRINTF(("%s: no recorded sources to report\n", __func__));
3457 		if (m != m0) {
3458 			m_freem(m);
3459 		}
3460 		return 0;
3461 	}
3462 
3463 	/*
3464 	 * We are good to go with first packet.
3465 	 */
3466 	if (m != m0) {
3467 		IGMP_PRINTF(("%s: enqueueing first packet\n", __func__));
3468 		m->m_pkthdr.vt_nrecs = 1;
3469 		IF_ENQUEUE(ifq, m);
3470 	} else {
3471 		m->m_pkthdr.vt_nrecs++;
3472 	}
3473 	/*
3474 	 * No further work needed if no source list in packet(s).
3475 	 */
3476 	if (!record_has_sources) {
3477 		return nbytes;
3478 	}
3479 
3480 	/*
3481 	 * Whilst sources remain to be announced, we need to allocate
3482 	 * a new packet and fill out as many sources as will fit.
3483 	 * Always try for a cluster first.
3484 	 */
3485 	while (nims != NULL) {
3486 		if (IF_QFULL(ifq)) {
3487 			os_log_error(OS_LOG_DEFAULT, "%s: outbound queue full on %s\n",
3488 			    __func__, if_name(ifp));
3489 			return -ENOMEM;
3490 		}
3491 		m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3492 		if (m) {
3493 			m->m_data += IGMP_LEADINGSPACE;
3494 		}
3495 		if (m == NULL) {
3496 			m = m_gethdr(M_DONTWAIT, MT_DATA);
3497 			if (m) {
3498 				MH_ALIGN(m, IGMP_LEADINGSPACE);
3499 			}
3500 		}
3501 		if (m == NULL) {
3502 			return -ENOMEM;
3503 		}
3504 		igmp_save_context(m, ifp);
3505 		md = m_getptr(m, 0, &off);
3506 		pig = (struct igmp_grouprec *)(void *)
3507 		    (mtod(md, uint8_t *) + off);
3508 		IGMP_PRINTF(("%s: allocated next packet\n", __func__));
3509 
3510 		if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) {
3511 			if (m != m0) {
3512 				m_freem(m);
3513 			}
3514 			os_log_error(OS_LOG_DEFAULT, "%s: m_append() failed\n",
3515 			    __func__);
3516 			return -ENOMEM;
3517 		}
3518 		m->m_pkthdr.vt_nrecs = 1;
3519 		nbytes += sizeof(struct igmp_grouprec);
3520 
3521 		m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
3522 		    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
3523 
3524 		msrcs = 0;
3525 		RB_FOREACH_FROM(ims, ip_msource_tree, nims) {
3526 #ifdef IGMP_DEBUG
3527 			char buf[MAX_IPv4_STR_LEN];
3528 
3529 			inet_ntop_haddr(ims->ims_haddr, buf, sizeof(buf));
3530 			IGMP_PRINTF(("%s: visit node %s\n", __func__, buf));
3531 #endif
3532 			now = ims_get_mode(inm, ims, 1);
3533 			if ((now != mode) ||
3534 			    (now == mode && mode == MCAST_UNDEFINED)) {
3535 				IGMP_PRINTF(("%s: skip node\n", __func__));
3536 				continue;
3537 			}
3538 			if (is_source_query && ims->ims_stp == 0) {
3539 				IGMP_PRINTF(("%s: skip unrecorded node\n",
3540 				    __func__));
3541 				continue;
3542 			}
3543 			IGMP_PRINTF(("%s: append node\n", __func__));
3544 			naddr = htonl(ims->ims_haddr);
3545 			if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) {
3546 				if (m != m0) {
3547 					m_freem(m);
3548 				}
3549 				os_log_error(OS_LOG_DEFAULT, "%s: m_append() failed",
3550 				    __func__);
3551 				return -ENOMEM;
3552 			}
3553 			++msrcs;
3554 			if (msrcs == m0srcs) {
3555 				break;
3556 			}
3557 		}
3558 		ig_numsrc = htons(msrcs);
3559 		bcopy(&ig_numsrc, &pig->ig_numsrc, sizeof(ig_numsrc));
3560 		nbytes += (msrcs * sizeof(in_addr_t));
3561 
3562 		IGMP_PRINTF(("%s: enqueueing next packet\n", __func__));
3563 		IF_ENQUEUE(ifq, m);
3564 	}
3565 
3566 	return nbytes;
3567 }
3568 
3569 /*
3570  * Type used to mark record pass completion.
3571  * We exploit the fact we can cast to this easily from the
3572  * current filter modes on each ip_msource node.
3573  */
3574 typedef enum {
3575 	REC_NONE = 0x00,        /* MCAST_UNDEFINED */
3576 	REC_ALLOW = 0x01,       /* MCAST_INCLUDE */
3577 	REC_BLOCK = 0x02,       /* MCAST_EXCLUDE */
3578 	REC_FULL = REC_ALLOW | REC_BLOCK
3579 } rectype_t;
3580 
3581 /*
3582  * Enqueue an IGMPv3 filter list change to the given output queue.
3583  *
3584  * Source list filter state is held in an RB-tree. When the filter list
3585  * for a group is changed without changing its mode, we need to compute
3586  * the deltas between T0 and T1 for each source in the filter set,
3587  * and enqueue the appropriate ALLOW_NEW/BLOCK_OLD records.
3588  *
3589  * As we may potentially queue two record types, and the entire R-B tree
3590  * needs to be walked at once, we break this out into its own function
3591  * so we can generate a tightly packed queue of packets.
3592  *
3593  * XXX This could be written to only use one tree walk, although that makes
3594  * serializing into the mbuf chains a bit harder. For now we do two walks
3595  * which makes things easier on us, and it may or may not be harder on
3596  * the L2 cache.
3597  *
3598  * If successful the size of all data appended to the queue is returned,
3599  * otherwise an error code less than zero is returned, or zero if
3600  * no record(s) were appended.
3601  */
3602 static int
igmp_v3_enqueue_filter_change(struct ifqueue * ifq,struct in_multi * inm)3603 igmp_v3_enqueue_filter_change(struct ifqueue *ifq, struct in_multi *inm)
3604 {
3605 	static const int MINRECLEN =
3606 	    sizeof(struct igmp_grouprec) + sizeof(in_addr_t);
3607 	struct ifnet            *ifp;
3608 	struct igmp_grouprec     ig;
3609 	struct igmp_grouprec    *pig;
3610 	struct ip_msource       *ims, *nims;
3611 	struct mbuf             *m, *m0, *md;
3612 	in_addr_t                naddr;
3613 	int                      m0srcs, nbytes, npbytes, off, schanged;
3614 	uint16_t                 rsrcs;
3615 	int                      nallow, nblock;
3616 	uint16_t                 mode;
3617 	uint8_t                  now, then;
3618 	rectype_t                crt, drt, nrt;
3619 	u_int16_t                ig_numsrc;
3620 
3621 	INM_LOCK_ASSERT_HELD(inm);
3622 
3623 	if (inm->inm_nsrc == 0 ||
3624 	    (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0)) {
3625 		return 0;
3626 	}
3627 
3628 	ifp = inm->inm_ifp;                     /* interface */
3629 	mode = inm->inm_st[1].iss_fmode;        /* filter mode at t1 */
3630 	crt = REC_NONE; /* current group record type */
3631 	drt = REC_NONE; /* mask of completed group record types */
3632 	nrt = REC_NONE; /* record type for current node */
3633 	m0srcs = 0;     /* # source which will fit in current mbuf chain */
3634 	nbytes = 0;     /* # of bytes appended to group's state-change queue */
3635 	npbytes = 0;    /* # of bytes appended this packet */
3636 	rsrcs = 0;      /* # sources encoded in current record */
3637 	schanged = 0;   /* # nodes encoded in overall filter change */
3638 	nallow = 0;     /* # of source entries in ALLOW_NEW */
3639 	nblock = 0;     /* # of source entries in BLOCK_OLD */
3640 	nims = NULL;    /* next tree node pointer */
3641 
3642 	/*
3643 	 * For each possible filter record mode.
3644 	 * The first kind of source we encounter tells us which
3645 	 * is the first kind of record we start appending.
3646 	 * If a node transitioned to UNDEFINED at t1, its mode is treated
3647 	 * as the inverse of the group's filter mode.
3648 	 */
3649 	while (drt != REC_FULL) {
3650 		do {
3651 			m0 = ifq->ifq_tail;
3652 			if (m0 != NULL &&
3653 			    (m0->m_pkthdr.vt_nrecs + 1 <=
3654 			    IGMP_V3_REPORT_MAXRECS) &&
3655 			    (m0->m_pkthdr.len + MINRECLEN) <
3656 			    (ifp->if_mtu - IGMP_LEADINGSPACE)) {
3657 				m = m0;
3658 				m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
3659 				    sizeof(struct igmp_grouprec)) /
3660 				    sizeof(in_addr_t);
3661 				IGMP_PRINTF(("%s: use previous packet\n",
3662 				    __func__));
3663 			} else {
3664 				m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3665 				if (m) {
3666 					m->m_data += IGMP_LEADINGSPACE;
3667 				}
3668 				if (m == NULL) {
3669 					m = m_gethdr(M_DONTWAIT, MT_DATA);
3670 					if (m) {
3671 						MH_ALIGN(m, IGMP_LEADINGSPACE);
3672 					}
3673 				}
3674 				if (m == NULL) {
3675 					os_log_error(OS_LOG_DEFAULT, "%s: m_get*() failed",
3676 					    __func__);
3677 					return -ENOMEM;
3678 				}
3679 				m->m_pkthdr.vt_nrecs = 0;
3680 				igmp_save_context(m, ifp);
3681 				m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
3682 				    sizeof(struct igmp_grouprec)) /
3683 				    sizeof(in_addr_t);
3684 				npbytes = 0;
3685 				IGMP_PRINTF(("%s: allocated new packet\n",
3686 				    __func__));
3687 			}
3688 			/*
3689 			 * Append the IGMP group record header to the
3690 			 * current packet's data area.
3691 			 * Recalculate pointer to free space for next
3692 			 * group record, in case m_append() allocated
3693 			 * a new mbuf or cluster.
3694 			 */
3695 			memset(&ig, 0, sizeof(ig));
3696 			ig.ig_group = inm->inm_addr;
3697 			if (!m_append(m, sizeof(ig), (void *)&ig)) {
3698 				if (m != m0) {
3699 					m_freem(m);
3700 				}
3701 				os_log_error(OS_LOG_DEFAULT,
3702 				    "%s: m_append() failed\n",
3703 				    __func__);
3704 				return -ENOMEM;
3705 			}
3706 			npbytes += sizeof(struct igmp_grouprec);
3707 			if (m != m0) {
3708 				/* new packet; offset in c hain */
3709 				md = m_getptr(m, npbytes -
3710 				    sizeof(struct igmp_grouprec), &off);
3711 				pig = (struct igmp_grouprec *)(void *)(mtod(md,
3712 				    uint8_t *) + off);
3713 			} else {
3714 				/* current packet; offset from last append */
3715 				md = m_last(m);
3716 				pig = (struct igmp_grouprec *)(void *)(mtod(md,
3717 				    uint8_t *) + md->m_len -
3718 				    sizeof(struct igmp_grouprec));
3719 			}
3720 			/*
3721 			 * Begin walking the tree for this record type
3722 			 * pass, or continue from where we left off
3723 			 * previously if we had to allocate a new packet.
3724 			 * Only report deltas in-mode at t1.
3725 			 * We need not report included sources as allowed
3726 			 * if we are in inclusive mode on the group,
3727 			 * however the converse is not true.
3728 			 */
3729 			rsrcs = 0;
3730 			if (nims == NULL) {
3731 				nims = RB_MIN(ip_msource_tree, &inm->inm_srcs);
3732 			}
3733 			RB_FOREACH_FROM(ims, ip_msource_tree, nims) {
3734 #ifdef IGMP_DEBUG
3735 				char buf[MAX_IPv4_STR_LEN];
3736 
3737 				inet_ntop_haddr(ims->ims_haddr, buf, sizeof(buf));
3738 				IGMP_PRINTF(("%s: visit node %s\n", __func__, buf));
3739 #endif
3740 				now = ims_get_mode(inm, ims, 1);
3741 				then = ims_get_mode(inm, ims, 0);
3742 				IGMP_PRINTF(("%s: mode: t0 %d, t1 %d\n",
3743 				    __func__, then, now));
3744 				if (now == then) {
3745 					IGMP_PRINTF(("%s: skip unchanged\n",
3746 					    __func__));
3747 					continue;
3748 				}
3749 				if (mode == MCAST_EXCLUDE &&
3750 				    now == MCAST_INCLUDE) {
3751 					IGMP_PRINTF(("%s: skip IN src on EX "
3752 					    "group\n", __func__));
3753 					continue;
3754 				}
3755 				nrt = (rectype_t)now;
3756 				if (nrt == REC_NONE) {
3757 					nrt = (rectype_t)(~mode & REC_FULL);
3758 				}
3759 				if (schanged++ == 0) {
3760 					crt = nrt;
3761 				} else if (crt != nrt) {
3762 					continue;
3763 				}
3764 				naddr = htonl(ims->ims_haddr);
3765 				if (!m_append(m, sizeof(in_addr_t),
3766 				    (void *)&naddr)) {
3767 					if (m != m0) {
3768 						m_freem(m);
3769 					}
3770 					os_log_error(OS_LOG_DEFAULT, "%s: m_append() failed\n",
3771 					    __func__);
3772 					return -ENOMEM;
3773 				}
3774 				nallow += !!(crt == REC_ALLOW);
3775 				nblock += !!(crt == REC_BLOCK);
3776 				if (++rsrcs == m0srcs) {
3777 					break;
3778 				}
3779 			}
3780 			/*
3781 			 * If we did not append any tree nodes on this
3782 			 * pass, back out of allocations.
3783 			 */
3784 			if (rsrcs == 0) {
3785 				npbytes -= sizeof(struct igmp_grouprec);
3786 				if (m != m0) {
3787 					IGMP_PRINTF(("%s: m_free(m)\n",
3788 					    __func__));
3789 					m_freem(m);
3790 				} else {
3791 					IGMP_PRINTF(("%s: m_adj(m, -ig)\n",
3792 					    __func__));
3793 					m_adj(m, -((int)sizeof(
3794 						    struct igmp_grouprec)));
3795 				}
3796 				continue;
3797 			}
3798 			npbytes += (rsrcs * sizeof(in_addr_t));
3799 			if (crt == REC_ALLOW) {
3800 				pig->ig_type = IGMP_ALLOW_NEW_SOURCES;
3801 			} else if (crt == REC_BLOCK) {
3802 				pig->ig_type = IGMP_BLOCK_OLD_SOURCES;
3803 			}
3804 			ig_numsrc = htons(rsrcs);
3805 			bcopy(&ig_numsrc, &pig->ig_numsrc, sizeof(ig_numsrc));
3806 			/*
3807 			 * Count the new group record, and enqueue this
3808 			 * packet if it wasn't already queued.
3809 			 */
3810 			m->m_pkthdr.vt_nrecs++;
3811 			if (m != m0) {
3812 				IF_ENQUEUE(ifq, m);
3813 			}
3814 			nbytes += npbytes;
3815 		} while (nims != NULL);
3816 		drt |= crt;
3817 		crt = (~crt & REC_FULL);
3818 	}
3819 
3820 	IGMP_PRINTF(("%s: queued %d ALLOW_NEW, %d BLOCK_OLD\n", __func__,
3821 	    nallow, nblock));
3822 
3823 	return nbytes;
3824 }
3825 
3826 static int
igmp_v3_merge_state_changes(struct in_multi * inm,struct ifqueue * ifscq)3827 igmp_v3_merge_state_changes(struct in_multi *inm, struct ifqueue *ifscq)
3828 {
3829 	struct ifqueue  *gq;
3830 	struct mbuf     *m;             /* pending state-change */
3831 	struct mbuf     *m0;            /* copy of pending state-change */
3832 	struct mbuf     *mt;            /* last state-change in packet */
3833 	struct mbuf     *n;
3834 	int              docopy, domerge;
3835 	u_int            recslen;
3836 
3837 	INM_LOCK_ASSERT_HELD(inm);
3838 
3839 	docopy = 0;
3840 	domerge = 0;
3841 	recslen = 0;
3842 
3843 	/*
3844 	 * If there are further pending retransmissions, make a writable
3845 	 * copy of each queued state-change message before merging.
3846 	 */
3847 	if (inm->inm_scrv > 0) {
3848 		docopy = 1;
3849 	}
3850 
3851 	gq = &inm->inm_scq;
3852 #ifdef IGMP_DEBUG
3853 	if (gq->ifq_head == NULL) {
3854 		IGMP_PRINTF(("%s: WARNING: queue for inm 0x%llx is empty\n",
3855 		    __func__, (uint64_t)VM_KERNEL_ADDRPERM(inm)));
3856 	}
3857 #endif
3858 
3859 	/*
3860 	 * Use IF_REMQUEUE() instead of IF_DEQUEUE() below, since the
3861 	 * packet might not always be at the head of the ifqueue.
3862 	 */
3863 	m = gq->ifq_head;
3864 	while (m != NULL) {
3865 		/*
3866 		 * Only merge the report into the current packet if
3867 		 * there is sufficient space to do so; an IGMPv3 report
3868 		 * packet may only contain 65,535 group records.
3869 		 * Always use a simple mbuf chain concatentation to do this,
3870 		 * as large state changes for single groups may have
3871 		 * allocated clusters.
3872 		 */
3873 		domerge = 0;
3874 		mt = ifscq->ifq_tail;
3875 		if (mt != NULL) {
3876 			recslen = m_length(m);
3877 
3878 			if ((mt->m_pkthdr.vt_nrecs +
3879 			    m->m_pkthdr.vt_nrecs <=
3880 			    IGMP_V3_REPORT_MAXRECS) &&
3881 			    (mt->m_pkthdr.len + recslen <=
3882 			    (inm->inm_ifp->if_mtu - IGMP_LEADINGSPACE))) {
3883 				domerge = 1;
3884 			}
3885 		}
3886 
3887 		if (!domerge && IF_QFULL(gq)) {
3888 			os_log_error(OS_LOG_DEFAULT,
3889 			    "%s: outbound queue full on %s\n",
3890 			    __func__, if_name(inm->inm_ifp));
3891 			n = m->m_nextpkt;
3892 			if (!docopy) {
3893 				IF_REMQUEUE(gq, m);
3894 				m_freem(m);
3895 			}
3896 			m = n;
3897 			continue;
3898 		}
3899 
3900 		if (!docopy) {
3901 			IGMP_PRINTF(("%s: dequeueing 0x%llx\n", __func__,
3902 			    (uint64_t)VM_KERNEL_ADDRPERM(m)));
3903 			n = m->m_nextpkt;
3904 			IF_REMQUEUE(gq, m);
3905 			m0 = m;
3906 			m = n;
3907 		} else {
3908 			IGMP_PRINTF(("%s: copying 0x%llx\n", __func__,
3909 			    (uint64_t)VM_KERNEL_ADDRPERM(m)));
3910 			m0 = m_dup(m, M_NOWAIT);
3911 			if (m0 == NULL) {
3912 				return ENOMEM;
3913 			}
3914 			m0->m_nextpkt = NULL;
3915 			m = m->m_nextpkt;
3916 		}
3917 
3918 		if (!domerge) {
3919 			IGMP_PRINTF(("%s: queueing 0x%llx to ifscq 0x%llx)\n",
3920 			    __func__, (uint64_t)VM_KERNEL_ADDRPERM(m0),
3921 			    (uint64_t)VM_KERNEL_ADDRPERM(ifscq)));
3922 			IF_ENQUEUE(ifscq, m0);
3923 		} else {
3924 			struct mbuf *mtl;       /* last mbuf of packet mt */
3925 
3926 			IGMP_PRINTF(("%s: merging 0x%llx with ifscq tail "
3927 			    "0x%llx)\n", __func__,
3928 			    (uint64_t)VM_KERNEL_ADDRPERM(m0),
3929 			    (uint64_t)VM_KERNEL_ADDRPERM(mt)));
3930 
3931 			mtl = m_last(mt);
3932 			m0->m_flags &= ~M_PKTHDR;
3933 			mt->m_pkthdr.len += recslen;
3934 			mt->m_pkthdr.vt_nrecs +=
3935 			    m0->m_pkthdr.vt_nrecs;
3936 
3937 			mtl->m_next = m0;
3938 		}
3939 	}
3940 
3941 	return 0;
3942 }
3943 
3944 /*
3945  * Respond to a pending IGMPv3 General Query.
3946  */
3947 static uint32_t
igmp_v3_dispatch_general_query(struct igmp_ifinfo * igi)3948 igmp_v3_dispatch_general_query(struct igmp_ifinfo *igi)
3949 {
3950 	struct ifnet            *ifp;
3951 	struct in_multi         *inm;
3952 	struct in_multistep     step;
3953 	int                      retval, loop;
3954 
3955 	IGI_LOCK_ASSERT_HELD(igi);
3956 
3957 	VERIFY(igi->igi_version == IGMP_VERSION_3);
3958 
3959 	ifp = igi->igi_ifp;
3960 	IGI_UNLOCK(igi);
3961 
3962 	in_multihead_lock_shared();
3963 	IN_FIRST_MULTI(step, inm);
3964 	while (inm != NULL) {
3965 		INM_LOCK(inm);
3966 		if (inm->inm_ifp != ifp) {
3967 			goto next;
3968 		}
3969 
3970 		switch (inm->inm_state) {
3971 		case IGMP_NOT_MEMBER:
3972 		case IGMP_SILENT_MEMBER:
3973 			break;
3974 		case IGMP_REPORTING_MEMBER:
3975 		case IGMP_IDLE_MEMBER:
3976 		case IGMP_LAZY_MEMBER:
3977 		case IGMP_SLEEPING_MEMBER:
3978 		case IGMP_AWAKENING_MEMBER:
3979 			inm->inm_state = IGMP_REPORTING_MEMBER;
3980 			IGI_LOCK(igi);
3981 			retval = igmp_v3_enqueue_group_record(&igi->igi_gq,
3982 			    inm, 0, 0, 0);
3983 			IGI_UNLOCK(igi);
3984 			IGMP_PRINTF(("%s: enqueue record = %d\n",
3985 			    __func__, retval));
3986 			break;
3987 		case IGMP_G_QUERY_PENDING_MEMBER:
3988 		case IGMP_SG_QUERY_PENDING_MEMBER:
3989 		case IGMP_LEAVING_MEMBER:
3990 			break;
3991 		}
3992 next:
3993 		INM_UNLOCK(inm);
3994 		IN_NEXT_MULTI(step, inm);
3995 	}
3996 	in_multihead_lock_done();
3997 
3998 	IGI_LOCK(igi);
3999 	loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
4000 	igmp_dispatch_queue(igi, &igi->igi_gq, IGMP_MAX_RESPONSE_BURST,
4001 	    loop);
4002 	IGI_LOCK_ASSERT_HELD(igi);
4003 	/*
4004 	 * Slew transmission of bursts over 1 second intervals.
4005 	 */
4006 	if (igi->igi_gq.ifq_head != NULL) {
4007 		igi->igi_v3_timer = 1 + IGMP_RANDOM_DELAY(
4008 			IGMP_RESPONSE_BURST_INTERVAL);
4009 	}
4010 
4011 	return igi->igi_v3_timer;
4012 }
4013 
4014 /*
4015  * Transmit the next pending IGMP message in the output queue.
4016  *
4017  * Must not be called with inm_lock or igi_lock held.
4018  */
4019 static void
igmp_sendpkt(struct mbuf * m)4020 igmp_sendpkt(struct mbuf *m)
4021 {
4022 	struct ip_moptions      *imo;
4023 	struct mbuf             *ipopts, *m0;
4024 	int                     error;
4025 	struct route            ro;
4026 	struct ifnet            *ifp;
4027 
4028 	IGMP_PRINTF(("%s: transmit 0x%llx\n", __func__,
4029 	    (uint64_t)VM_KERNEL_ADDRPERM(m)));
4030 
4031 	ifp = igmp_restore_context(m);
4032 	/*
4033 	 * Check if the ifnet is still attached.
4034 	 */
4035 	if (ifp == NULL || !ifnet_is_attached(ifp, 0)) {
4036 		os_log_error(OS_LOG_DEFAULT, "%s: dropped 0x%llx as interface went away\n",
4037 		    __func__, (uint64_t)VM_KERNEL_ADDRPERM(m));
4038 		m_freem(m);
4039 		OSAddAtomic(1, &ipstat.ips_noroute);
4040 		return;
4041 	}
4042 
4043 	ipopts = igmp_sendra ? m_raopt : NULL;
4044 
4045 	imo = ip_allocmoptions(Z_WAITOK);
4046 	if (imo == NULL) {
4047 		m_freem(m);
4048 		return;
4049 	}
4050 
4051 	imo->imo_multicast_ttl  = 1;
4052 	imo->imo_multicast_vif  = -1;
4053 	imo->imo_multicast_loop = 0;
4054 
4055 	/*
4056 	 * If the user requested that IGMP traffic be explicitly
4057 	 * redirected to the loopback interface (e.g. they are running a
4058 	 * MANET interface and the routing protocol needs to see the
4059 	 * updates), handle this now.
4060 	 */
4061 	if (m->m_flags & M_IGMP_LOOP) {
4062 		imo->imo_multicast_ifp = lo_ifp;
4063 	} else {
4064 		imo->imo_multicast_ifp = ifp;
4065 	}
4066 
4067 	if (m->m_flags & M_IGMPV2) {
4068 		m0 = m;
4069 	} else {
4070 		m0 = igmp_v3_encap_report(ifp, m);
4071 		if (m0 == NULL) {
4072 			/*
4073 			 * If igmp_v3_encap_report() failed, then M_PREPEND()
4074 			 * already freed the original mbuf chain.
4075 			 * This means that we don't have to m_freem(m) here.
4076 			 */
4077 			os_log_error(OS_LOG_DEFAULT, "%s: dropped 0x%llx\n", __func__,
4078 			    (uint64_t)VM_KERNEL_ADDRPERM(m));
4079 			IMO_REMREF(imo);
4080 			os_atomic_inc(&ipstat.ips_odropped, relaxed);
4081 			return;
4082 		}
4083 	}
4084 
4085 	igmp_scrub_context(m0);
4086 	m->m_flags &= ~(M_PROTOFLAGS | M_IGMP_LOOP);
4087 	m0->m_pkthdr.rcvif = lo_ifp;
4088 
4089 	if (ifp->if_eflags & IFEF_TXSTART) {
4090 		/*
4091 		 * Use control service class if the interface supports
4092 		 * transmit-start model.
4093 		 */
4094 		(void) m_set_service_class(m0, MBUF_SC_CTL);
4095 	}
4096 	bzero(&ro, sizeof(ro));
4097 	error = ip_output(m0, ipopts, &ro, 0, imo, NULL);
4098 	ROUTE_RELEASE(&ro);
4099 
4100 	IMO_REMREF(imo);
4101 
4102 	if (error) {
4103 		os_log_error(OS_LOG_DEFAULT, "%s: ip_output(0x%llx) = %d\n", __func__,
4104 		    (uint64_t)VM_KERNEL_ADDRPERM(m0), error);
4105 		return;
4106 	}
4107 
4108 	IGMPSTAT_INC(igps_snd_reports);
4109 	OIGMPSTAT_INC(igps_snd_reports);
4110 }
4111 /*
4112  * Encapsulate an IGMPv3 report.
4113  *
4114  * The internal mbuf flag M_IGMPV3_HDR is used to indicate that the mbuf
4115  * chain has already had its IP/IGMPv3 header prepended. In this case
4116  * the function will not attempt to prepend; the lengths and checksums
4117  * will however be re-computed.
4118  *
4119  * Returns a pointer to the new mbuf chain head, or NULL if the
4120  * allocation failed.
4121  */
4122 static struct mbuf *
igmp_v3_encap_report(struct ifnet * ifp,struct mbuf * m)4123 igmp_v3_encap_report(struct ifnet *ifp, struct mbuf *m)
4124 {
4125 	struct igmp_report      *igmp;
4126 	struct ip               *ip;
4127 	unsigned int             hdrlen, igmpreclen;
4128 
4129 	VERIFY((m->m_flags & M_PKTHDR));
4130 
4131 	igmpreclen = m_length(m);
4132 	hdrlen = sizeof(struct ip) + sizeof(struct igmp_report);
4133 
4134 	if (m->m_flags & M_IGMPV3_HDR) {
4135 		igmpreclen -= hdrlen;
4136 	} else {
4137 		M_PREPEND(m, hdrlen, M_DONTWAIT, 1);
4138 		if (m == NULL) {
4139 			return NULL;
4140 		}
4141 		m->m_flags |= M_IGMPV3_HDR;
4142 	}
4143 	if (hdrlen + igmpreclen > USHRT_MAX) {
4144 		os_log_error(OS_LOG_DEFAULT, "%s: invalid length %d\n",
4145 		    __func__, hdrlen + igmpreclen);
4146 		m_freem(m);
4147 		return NULL;
4148 	}
4149 
4150 
4151 	IGMP_PRINTF(("%s: igmpreclen is %d\n", __func__, igmpreclen));
4152 
4153 	m->m_data += sizeof(struct ip);
4154 	m->m_len -= sizeof(struct ip);
4155 
4156 	igmp = mtod(m, struct igmp_report *);
4157 	igmp->ir_type = IGMP_v3_HOST_MEMBERSHIP_REPORT;
4158 	igmp->ir_rsv1 = 0;
4159 	igmp->ir_rsv2 = 0;
4160 	igmp->ir_numgrps = htons(m->m_pkthdr.vt_nrecs);
4161 	igmp->ir_cksum = 0;
4162 	igmp->ir_cksum = in_cksum(m, sizeof(struct igmp_report) + igmpreclen);
4163 	m->m_pkthdr.vt_nrecs = 0;
4164 
4165 	m->m_data -= sizeof(struct ip);
4166 	m->m_len += sizeof(struct ip);
4167 
4168 	ip = mtod(m, struct ip *);
4169 	ip->ip_tos = IPTOS_PREC_INTERNETCONTROL;
4170 	ip->ip_len = (u_short)(hdrlen + igmpreclen);
4171 	ip->ip_off = IP_DF;
4172 	ip->ip_p = IPPROTO_IGMP;
4173 	ip->ip_sum = 0;
4174 
4175 	ip->ip_src.s_addr = INADDR_ANY;
4176 
4177 	if (m->m_flags & M_IGMP_LOOP) {
4178 		struct in_ifaddr *ia;
4179 
4180 		IFP_TO_IA(ifp, ia);
4181 		if (ia != NULL) {
4182 			IFA_LOCK(&ia->ia_ifa);
4183 			ip->ip_src = ia->ia_addr.sin_addr;
4184 			IFA_UNLOCK(&ia->ia_ifa);
4185 			IFA_REMREF(&ia->ia_ifa);
4186 		}
4187 	}
4188 
4189 	ip->ip_dst.s_addr = htonl(INADDR_ALLRPTS_GROUP);
4190 
4191 	return m;
4192 }
4193 
4194 #ifdef IGMP_DEBUG
4195 static const char *
igmp_rec_type_to_str(const int type)4196 igmp_rec_type_to_str(const int type)
4197 {
4198 	switch (type) {
4199 	case IGMP_CHANGE_TO_EXCLUDE_MODE:
4200 		return "TO_EX";
4201 	case IGMP_CHANGE_TO_INCLUDE_MODE:
4202 		return "TO_IN";
4203 	case IGMP_MODE_IS_EXCLUDE:
4204 		return "MODE_EX";
4205 	case IGMP_MODE_IS_INCLUDE:
4206 		return "MODE_IN";
4207 	case IGMP_ALLOW_NEW_SOURCES:
4208 		return "ALLOW_NEW";
4209 	case IGMP_BLOCK_OLD_SOURCES:
4210 		return "BLOCK_OLD";
4211 	default:
4212 		break;
4213 	}
4214 	return "unknown";
4215 }
4216 #endif
4217 
4218 void
igmp_init(struct protosw * pp,struct domain * dp)4219 igmp_init(struct protosw *pp, struct domain *dp)
4220 {
4221 #pragma unused(dp)
4222 	static int igmp_initialized = 0;
4223 
4224 	VERIFY((pp->pr_flags & (PR_INITIALIZED | PR_ATTACHED)) == PR_ATTACHED);
4225 
4226 	if (igmp_initialized) {
4227 		return;
4228 	}
4229 	igmp_initialized = 1;
4230 	os_log(OS_LOG_DEFAULT, "%s: initializing\n", __func__);
4231 	igmp_timers_are_running = 0;
4232 	LIST_INIT(&igi_head);
4233 	m_raopt = igmp_ra_alloc();
4234 }
4235