xref: /xnu-11215.1.10/bsd/netinet/igmp.c (revision 8d741a5de7ff4191bf97d57b9f54c2f6d4a15585)
1 /*
2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*-
29  * Copyright (c) 2007-2009 Bruce Simpson.
30  * Copyright (c) 1988 Stephen Deering.
31  * Copyright (c) 1992, 1993
32  *	The Regents of the University of California.  All rights reserved.
33  *
34  * This code is derived from software contributed to Berkeley by
35  * Stephen Deering of Stanford University.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. All advertising materials mentioning features or use of this software
46  *    must display the following acknowledgement:
47  *	This product includes software developed by the University of
48  *	California, Berkeley and its contributors.
49  * 4. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  *	@(#)igmp.c	8.1 (Berkeley) 7/19/93
66  */
67 /*
68  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69  * support for mandatory and extensible security protections.  This notice
70  * is included in support of clause 2.2 (b) of the Apple Public License,
71  * Version 2.0.
72  */
73 
74 /*
75  * Internet Group Management Protocol (IGMP) routines.
76  * [RFC1112, RFC2236, RFC3376]
77  *
78  * Written by Steve Deering, Stanford, May 1988.
79  * Modified by Rosen Sharma, Stanford, Aug 1994.
80  * Modified by Bill Fenner, Xerox PARC, Feb 1995.
81  * Modified to fully comply to IGMPv2 by Bill Fenner, Oct 1995.
82  * Significantly rewritten for IGMPv3, VIMAGE, and SMP by Bruce Simpson.
83  *
84  * MULTICAST Revision: 3.5.1.4
85  */
86 
87 #include <sys/cdefs.h>
88 
89 #include <sys/param.h>
90 #include <sys/systm.h>
91 #include <sys/malloc.h>
92 #include <sys/mbuf.h>
93 #include <sys/socket.h>
94 #include <sys/protosw.h>
95 #include <sys/kernel.h>
96 #include <sys/mcache.h>
97 
98 #include <libkern/libkern.h>
99 #include <kern/zalloc.h>
100 
101 #include <net/if.h>
102 #include <net/route.h>
103 #include <net/net_sysctl.h>
104 
105 #include <netinet/in.h>
106 #include <netinet/in_var.h>
107 #include <netinet/in_systm.h>
108 #include <netinet/ip.h>
109 #include <netinet/ip_var.h>
110 #include <netinet/igmp.h>
111 #include <netinet/igmp_var.h>
112 #include <netinet/kpi_ipfilter_var.h>
113 
114 #include <os/log.h>
115 
116 #if SKYWALK
117 #include <skywalk/core/skywalk_var.h>
118 #endif /* SKYWALK */
119 
120 SLIST_HEAD(igmp_inm_relhead, in_multi);
121 
122 static void     igi_initvar(struct igmp_ifinfo *, struct ifnet *, int);
123 static struct igmp_ifinfo *igi_alloc(zalloc_flags_t);
124 static void     igi_free(struct igmp_ifinfo *);
125 static void     igi_delete(const struct ifnet *, struct igmp_inm_relhead *);
126 static void     igmp_dispatch_queue(struct igmp_ifinfo *, struct ifqueue *,
127     int, const int);
128 static void     igmp_final_leave(struct in_multi *, struct igmp_ifinfo *,
129     struct igmp_tparams *);
130 static int      igmp_handle_state_change(struct in_multi *,
131     struct igmp_ifinfo *, struct igmp_tparams *);
132 static int      igmp_initial_join(struct in_multi *, struct igmp_ifinfo *,
133     struct igmp_tparams *);
134 static int      igmp_input_v1_query(struct ifnet *, const struct ip *,
135     const struct igmp *);
136 static int      igmp_input_v2_query(struct ifnet *, const struct ip *,
137     const struct igmp *);
138 static int      igmp_input_v3_query(struct ifnet *, const struct ip *,
139     /*const*/ struct igmpv3 *__indexable);
140 static int      igmp_input_v3_group_query(struct in_multi *,
141     int, /*const*/ struct igmpv3 *__indexable);
142 static int      igmp_input_v1_report(struct ifnet *, struct mbuf *,
143     /*const*/ struct ip *, /*const*/ struct igmp *);
144 static int      igmp_input_v2_report(struct ifnet *, struct mbuf *,
145     /*const*/ struct ip *, /*const*/ struct igmp *);
146 static void     igmp_sendpkt(struct mbuf *);
147 static __inline__ int   igmp_isgroupreported(const struct in_addr);
148 static struct mbuf *igmp_ra_alloc(void);
149 #ifdef IGMP_DEBUG
150 static const char *igmp_rec_type_to_str(const int);
151 #endif
152 static uint32_t igmp_set_version(struct igmp_ifinfo *, const int);
153 static void     igmp_append_relq(struct igmp_ifinfo *, struct in_multi *);
154 static void     igmp_flush_relq(struct igmp_ifinfo *,
155     struct igmp_inm_relhead *);
156 static int      igmp_v1v2_queue_report(struct in_multi *, const int);
157 static void     igmp_v1v2_process_group_timer(struct in_multi *, const int);
158 static void     igmp_v1v2_process_querier_timers(struct igmp_ifinfo *);
159 static uint32_t igmp_v2_update_group(struct in_multi *, const int);
160 static void     igmp_v3_cancel_link_timers(struct igmp_ifinfo *);
161 static uint32_t igmp_v3_dispatch_general_query(struct igmp_ifinfo *);
162 static struct mbuf *
163 igmp_v3_encap_report(struct ifnet *, struct mbuf *);
164 static int      igmp_v3_enqueue_group_record(struct ifqueue *,
165     struct in_multi *, const int, const int, const int);
166 static int      igmp_v3_enqueue_filter_change(struct ifqueue *,
167     struct in_multi *);
168 static void     igmp_v3_process_group_timers(struct igmp_ifinfo *,
169     struct ifqueue *, struct ifqueue *, struct in_multi *,
170     const unsigned int);
171 static int      igmp_v3_merge_state_changes(struct in_multi *,
172     struct ifqueue *);
173 static void     igmp_v3_suppress_group_record(struct in_multi *);
174 static int      sysctl_igmp_ifinfo SYSCTL_HANDLER_ARGS;
175 static int      sysctl_igmp_gsr SYSCTL_HANDLER_ARGS;
176 static int      sysctl_igmp_default_version SYSCTL_HANDLER_ARGS;
177 
178 static const uint32_t igmp_timeout_delay = 1000; /* in milliseconds */
179 static const uint32_t igmp_timeout_leeway = 500; /* in millseconds  */
180 static bool igmp_timeout_run;            /* IGMP timer is scheduled to run */
181 static bool igmp_fast_timeout_run;       /* IGMP fast timer is scheduled to run */
182 static void igmp_timeout(thread_call_param_t, thread_call_param_t);
183 static void igmp_sched_timeout(void);
184 static void igmp_sched_fast_timeout(void);
185 
186 static struct mbuf *m_raopt;            /* Router Alert option */
187 
188 static int querier_present_timers_running;      /* IGMPv1/v2 older version
189                                                  * querier present */
190 static int interface_timers_running;            /* IGMPv3 general
191                                                  * query response */
192 static int state_change_timers_running;         /* IGMPv3 state-change
193                                                  * retransmit */
194 static int current_state_timers_running;        /* IGMPv1/v2 host
195                                                  * report; IGMPv3 g/sg
196                                                  * query response */
197 
198 /*
199  * Subsystem lock macros.
200  */
201 #define IGMP_LOCK()                     \
202 	lck_mtx_lock(&igmp_mtx)
203 #define IGMP_LOCK_ASSERT_HELD()         \
204 	LCK_MTX_ASSERT(&igmp_mtx, LCK_MTX_ASSERT_OWNED)
205 #define IGMP_LOCK_ASSERT_NOTHELD()      \
206 	LCK_MTX_ASSERT(&igmp_mtx, LCK_MTX_ASSERT_NOTOWNED)
207 #define IGMP_UNLOCK()                   \
208 	lck_mtx_unlock(&igmp_mtx)
209 
210 static LIST_HEAD(, igmp_ifinfo) igi_head;
211 static struct igmpstat_v3 igmpstat_v3 = {
212 	.igps_version = IGPS_VERSION_3,
213 	.igps_len = sizeof(struct igmpstat_v3),
214 };
215 static struct igmpstat igmpstat; /* old IGMPv2 stats structure */
216 static struct timeval igmp_gsrdelay = {.tv_sec = 10, .tv_usec = 0};
217 
218 static int igmp_recvifkludge = 1;
219 static int igmp_sendra = 1;
220 static int igmp_sendlocal = 1;
221 static int igmp_v1enable = 1;
222 static int igmp_v2enable = 1;
223 static int igmp_legacysupp = 0;
224 static int igmp_default_version = IGMP_VERSION_3;
225 
226 SYSCTL_STRUCT(_net_inet_igmp, IGMPCTL_STATS, stats, CTLFLAG_RD | CTLFLAG_LOCKED,
227     &igmpstat, igmpstat, "");
228 SYSCTL_STRUCT(_net_inet_igmp, OID_AUTO, v3stats,
229     CTLFLAG_RD | CTLFLAG_LOCKED, &igmpstat_v3, igmpstat_v3, "");
230 SYSCTL_INT(_net_inet_igmp, OID_AUTO, recvifkludge, CTLFLAG_RW | CTLFLAG_LOCKED,
231     &igmp_recvifkludge, 0,
232     "Rewrite IGMPv1/v2 reports from 0.0.0.0 to contain subnet address");
233 SYSCTL_INT(_net_inet_igmp, OID_AUTO, sendra, CTLFLAG_RW | CTLFLAG_LOCKED,
234     &igmp_sendra, 0,
235     "Send IP Router Alert option in IGMPv2/v3 messages");
236 SYSCTL_INT(_net_inet_igmp, OID_AUTO, sendlocal, CTLFLAG_RW | CTLFLAG_LOCKED,
237     &igmp_sendlocal, 0,
238     "Send IGMP membership reports for 224.0.0.0/24 groups");
239 SYSCTL_INT(_net_inet_igmp, OID_AUTO, v1enable, CTLFLAG_RW | CTLFLAG_LOCKED,
240     &igmp_v1enable, 0,
241     "Enable backwards compatibility with IGMPv1");
242 SYSCTL_INT(_net_inet_igmp, OID_AUTO, v2enable, CTLFLAG_RW | CTLFLAG_LOCKED,
243     &igmp_v2enable, 0,
244     "Enable backwards compatibility with IGMPv2");
245 SYSCTL_INT(_net_inet_igmp, OID_AUTO, legacysupp, CTLFLAG_RW | CTLFLAG_LOCKED,
246     &igmp_legacysupp, 0,
247     "Allow v1/v2 reports to suppress v3 group responses");
248 SYSCTL_PROC(_net_inet_igmp, OID_AUTO, default_version,
249     CTLTYPE_INT | CTLFLAG_RW,
250     &igmp_default_version, 0, sysctl_igmp_default_version, "I",
251     "Default version of IGMP to run on each interface");
252 SYSCTL_PROC(_net_inet_igmp, OID_AUTO, gsrdelay,
253     CTLTYPE_INT | CTLFLAG_RW,
254     &igmp_gsrdelay.tv_sec, 0, sysctl_igmp_gsr, "I",
255     "Rate limit for IGMPv3 Group-and-Source queries in seconds");
256 #ifdef IGMP_DEBUG
257 int igmp_debug = 0;
258 SYSCTL_INT(_net_inet_igmp, OID_AUTO,
259     debug, CTLFLAG_RW | CTLFLAG_LOCKED, &igmp_debug, 0, "");
260 #endif
261 
262 SYSCTL_NODE(_net_inet_igmp, OID_AUTO, ifinfo, CTLFLAG_RD | CTLFLAG_LOCKED,
263     sysctl_igmp_ifinfo, "Per-interface IGMPv3 state");
264 
265 /* Lock group and attribute for igmp_mtx */
266 static LCK_ATTR_DECLARE(igmp_mtx_attr, 0, 0);
267 static LCK_GRP_DECLARE(igmp_mtx_grp, "igmp_mtx");
268 
269 /*
270  * Locking and reference counting:
271  *
272  * igmp_mtx mainly protects igi_head.  In cases where both igmp_mtx and
273  * in_multihead_lock must be held, the former must be acquired first in order
274  * to maintain lock ordering.  It is not a requirement that igmp_mtx be
275  * acquired first before in_multihead_lock, but in case both must be acquired
276  * in succession, the correct lock ordering must be followed.
277  *
278  * Instead of walking the if_multiaddrs list at the interface and returning
279  * the ifma_protospec value of a matching entry, we search the global list
280  * of in_multi records and find it that way; this is done with in_multihead
281  * lock held.  Doing so avoids the race condition issues that many other BSDs
282  * suffer from (therefore in our implementation, ifma_protospec will never be
283  * NULL for as long as the in_multi is valid.)
284  *
285  * The above creates a requirement for the in_multi to stay in in_multihead
286  * list even after the final IGMP leave (in IGMPv3 mode) until no longer needs
287  * be retransmitted (this is not required for IGMPv1/v2.)  In order to handle
288  * this, the request and reference counts of the in_multi are bumped up when
289  * the state changes to IGMP_LEAVING_MEMBER, and later dropped in the timeout
290  * handler.  Each in_multi holds a reference to the underlying igmp_ifinfo.
291  *
292  * Thus, the permitted lock oder is:
293  *
294  *	igmp_mtx, in_multihead_lock, inm_lock, igi_lock
295  *
296  * Any may be taken independently, but if any are held at the same time,
297  * the above lock order must be followed.
298  */
299 static LCK_MTX_DECLARE_ATTR(igmp_mtx, &igmp_mtx_grp, &igmp_mtx_attr);
300 static int igmp_timers_are_running;
301 
302 #define IGMP_ADD_DETACHED_INM(_head, _inm) {                            \
303 	SLIST_INSERT_HEAD(_head, _inm, inm_dtle);                       \
304 }
305 
306 #define IGMP_REMOVE_DETACHED_INM(_head) {                               \
307 	struct in_multi *_inm, *_inm_tmp;                               \
308 	SLIST_FOREACH_SAFE(_inm, _head, inm_dtle, _inm_tmp) {           \
309 	        SLIST_REMOVE(_head, _inm, in_multi, inm_dtle);          \
310 	        INM_REMREF(_inm);                                       \
311 	}                                                               \
312 	VERIFY(SLIST_EMPTY(_head));                                     \
313 }
314 
315 static KALLOC_TYPE_DEFINE(igi_zone, struct igmp_ifinfo, NET_KT_DEFAULT);
316 
317 /* Store IGMPv3 record count in the module private scratch space */
318 #define vt_nrecs        pkt_mpriv.__mpriv_u.__mpriv32[0].__mpriv32_u.__val16[0]
319 
320 static __inline void
igmp_save_context(struct mbuf * m,struct ifnet * ifp)321 igmp_save_context(struct mbuf *m, struct ifnet *ifp)
322 {
323 	m->m_pkthdr.rcvif = ifp;
324 }
325 
326 static __inline void
igmp_scrub_context(struct mbuf * m)327 igmp_scrub_context(struct mbuf *m)
328 {
329 	m->m_pkthdr.rcvif = NULL;
330 }
331 
332 #ifdef IGMP_DEBUG
333 static __inline const char *
inet_ntop_haddr(in_addr_t haddr,char * buf __counted_by (size),socklen_t size)334 inet_ntop_haddr(in_addr_t haddr, char *buf __counted_by(size), socklen_t size)
335 {
336 	struct in_addr ia;
337 
338 	ia.s_addr = htonl(haddr);
339 	return inet_ntop(AF_INET, &ia, buf, size);
340 }
341 #endif
342 
343 /*
344  * Restore context from a queued IGMP output chain.
345  * Return saved ifp.
346  */
347 static __inline struct ifnet *
igmp_restore_context(struct mbuf * m)348 igmp_restore_context(struct mbuf *m)
349 {
350 	return m->m_pkthdr.rcvif;
351 }
352 
353 /*
354  * Retrieve or set default IGMP version.
355  */
356 static int
357 sysctl_igmp_default_version SYSCTL_HANDLER_ARGS
358 {
359 #pragma unused(oidp, arg2)
360 	int      error;
361 	int      new;
362 
363 	IGMP_LOCK();
364 
365 	error = SYSCTL_OUT(req, arg1, sizeof(int));
366 	if (error || !req->newptr) {
367 		goto out_locked;
368 	}
369 
370 	new = igmp_default_version;
371 
372 	error = SYSCTL_IN(req, &new, sizeof(int));
373 	if (error) {
374 		goto out_locked;
375 	}
376 
377 	if (new < IGMP_VERSION_1 || new > IGMP_VERSION_3) {
378 		error = EINVAL;
379 		goto out_locked;
380 	}
381 
382 	os_log(OS_LOG_DEFAULT,
383 	    "%s: changed igmp_default_version from %d to %d\n",
384 	    __func__, igmp_default_version, new);
385 
386 	igmp_default_version = new;
387 
388 out_locked:
389 	IGMP_UNLOCK();
390 	return error;
391 }
392 
393 /*
394  * Retrieve or set threshold between group-source queries in seconds.
395  *
396  */
397 static int
398 sysctl_igmp_gsr SYSCTL_HANDLER_ARGS
399 {
400 #pragma unused(arg1, arg2)
401 	int error;
402 	int i;
403 
404 	IGMP_LOCK();
405 
406 	i = (int)igmp_gsrdelay.tv_sec;
407 
408 	error = sysctl_handle_int(oidp, &i, 0, req);
409 	if (error || !req->newptr) {
410 		goto out_locked;
411 	}
412 
413 	if (i < -1 || i >= 60) {
414 		error = EINVAL;
415 		goto out_locked;
416 	}
417 
418 	igmp_gsrdelay.tv_sec = i;
419 
420 out_locked:
421 	IGMP_UNLOCK();
422 	return error;
423 }
424 
425 /*
426  * Expose struct igmp_ifinfo to userland, keyed by ifindex.
427  * For use by ifmcstat(8).
428  *
429  */
430 static int
431 sysctl_igmp_ifinfo SYSCTL_HANDLER_ARGS
432 {
433 #pragma unused(oidp)
434 	DECLARE_SYSCTL_HANDLER_ARG_ARRAY(int, 1, name, namelen);
435 	int                      error;
436 	struct ifnet            *ifp;
437 	struct igmp_ifinfo      *igi;
438 	struct igmp_ifinfo_u    igi_u;
439 
440 	if (req->newptr != USER_ADDR_NULL) {
441 		return EPERM;
442 	}
443 
444 	IGMP_LOCK();
445 
446 	if (name[0] <= 0 || name[0] > (u_int)if_index) {
447 		error = ENOENT;
448 		goto out_locked;
449 	}
450 
451 	error = ENOENT;
452 
453 	ifnet_head_lock_shared();
454 	ifp = ifindex2ifnet[name[0]];
455 	ifnet_head_done();
456 	if (ifp == NULL) {
457 		goto out_locked;
458 	}
459 
460 	bzero(&igi_u, sizeof(igi_u));
461 
462 	LIST_FOREACH(igi, &igi_head, igi_link) {
463 		IGI_LOCK(igi);
464 		if (ifp != igi->igi_ifp) {
465 			IGI_UNLOCK(igi);
466 			continue;
467 		}
468 		igi_u.igi_ifindex = igi->igi_ifp->if_index;
469 		igi_u.igi_version = igi->igi_version;
470 		igi_u.igi_v1_timer = igi->igi_v1_timer;
471 		igi_u.igi_v2_timer = igi->igi_v2_timer;
472 		igi_u.igi_v3_timer = igi->igi_v3_timer;
473 		igi_u.igi_flags = igi->igi_flags;
474 		igi_u.igi_rv = igi->igi_rv;
475 		igi_u.igi_qi = igi->igi_qi;
476 		igi_u.igi_qri = igi->igi_qri;
477 		igi_u.igi_uri = igi->igi_uri;
478 		IGI_UNLOCK(igi);
479 
480 		error = SYSCTL_OUT(req, &igi_u, sizeof(igi_u));
481 		break;
482 	}
483 
484 out_locked:
485 	IGMP_UNLOCK();
486 	return error;
487 }
488 
489 /*
490  * Dispatch an entire queue of pending packet chains
491  *
492  * Must not be called with inm_lock held.
493  */
494 static void
igmp_dispatch_queue(struct igmp_ifinfo * igi,struct ifqueue * ifq,int limit,const int loop)495 igmp_dispatch_queue(struct igmp_ifinfo *igi, struct ifqueue *ifq, int limit,
496     const int loop)
497 {
498 	struct mbuf *m;
499 	struct ip *ip;
500 
501 	if (igi != NULL) {
502 		IGI_LOCK_ASSERT_HELD(igi);
503 	}
504 
505 #if SKYWALK
506 	/*
507 	 * Since this function is called holding the igi lock, we need to ensure we
508 	 * don't enter the driver directly because a deadlock can happen if another
509 	 * thread holding the workloop lock tries to acquire the igi lock at
510 	 * the same time.
511 	 */
512 	sk_protect_t __single protect = sk_async_transmit_protect();
513 #endif /* SKYWALK */
514 
515 	for (;;) {
516 		IF_DEQUEUE(ifq, m);
517 		if (m == NULL) {
518 			break;
519 		}
520 		IGMP_PRINTF(("%s: dispatch 0x%llx from 0x%llx\n", __func__,
521 		    (uint64_t)VM_KERNEL_ADDRPERM(ifq),
522 		    (uint64_t)VM_KERNEL_ADDRPERM(m)));
523 		ip = mtod(m, struct ip *);
524 		if (loop) {
525 			m->m_flags |= M_IGMP_LOOP;
526 		}
527 		if (igi != NULL) {
528 			IGI_UNLOCK(igi);
529 		}
530 		igmp_sendpkt(m);
531 		if (igi != NULL) {
532 			IGI_LOCK(igi);
533 		}
534 		if (--limit == 0) {
535 			break;
536 		}
537 	}
538 
539 #if SKYWALK
540 	sk_async_transmit_unprotect(protect);
541 #endif /* SKYWALK */
542 
543 	if (igi != NULL) {
544 		IGI_LOCK_ASSERT_HELD(igi);
545 	}
546 }
547 
548 /*
549  * Filter outgoing IGMP report state by group.
550  *
551  * Reports are ALWAYS suppressed for ALL-HOSTS (224.0.0.1).
552  * If the net.inet.igmp.sendlocal sysctl is 0, then IGMP reports are
553  * disabled for all groups in the 224.0.0.0/24 link-local scope. However,
554  * this may break certain IGMP snooping switches which rely on the old
555  * report behaviour.
556  *
557  * Return zero if the given group is one for which IGMP reports
558  * should be suppressed, or non-zero if reports should be issued.
559  */
560 
561 static __inline__
562 int
igmp_isgroupreported(const struct in_addr addr)563 igmp_isgroupreported(const struct in_addr addr)
564 {
565 	if (in_allhosts(addr) ||
566 	    ((!igmp_sendlocal && IN_LOCAL_GROUP(ntohl(addr.s_addr))))) {
567 		return 0;
568 	}
569 
570 	return 1;
571 }
572 
573 /*
574  * Construct a Router Alert option to use in outgoing packets.
575  */
576 static struct mbuf *
igmp_ra_alloc(void)577 igmp_ra_alloc(void)
578 {
579 	struct mbuf     *m;
580 	struct ipoption *p;
581 
582 	MGET(m, M_WAITOK, MT_DATA);
583 	p = mtod(m, struct ipoption *);
584 	p->ipopt_dst.s_addr = INADDR_ANY;
585 	p->ipopt_list[0] = (char)IPOPT_RA;      /* Router Alert Option */
586 	p->ipopt_list[1] = 0x04;        /* 4 bytes long */
587 	p->ipopt_list[2] = IPOPT_EOL;   /* End of IP option list */
588 	p->ipopt_list[3] = 0x00;        /* pad byte */
589 	m->m_len = sizeof(p->ipopt_dst) + p->ipopt_list[1];
590 
591 	return m;
592 }
593 
594 /*
595  * Attach IGMP when PF_INET is attached to an interface.
596  */
597 struct igmp_ifinfo *
igmp_domifattach(struct ifnet * ifp,zalloc_flags_t how)598 igmp_domifattach(struct ifnet *ifp, zalloc_flags_t how)
599 {
600 	struct igmp_ifinfo *igi;
601 
602 	os_log_debug(OS_LOG_DEFAULT, "%s: called for ifp %s\n",
603 	    __func__, ifp->if_name);
604 
605 	igi = igi_alloc(how);
606 	if (igi == NULL) {
607 		return NULL;
608 	}
609 
610 	IGMP_LOCK();
611 
612 	IGI_LOCK(igi);
613 	igi_initvar(igi, ifp, 0);
614 	igi->igi_debug |= IFD_ATTACHED;
615 	IGI_ADDREF_LOCKED(igi); /* hold a reference for igi_head */
616 	IGI_ADDREF_LOCKED(igi); /* hold a reference for caller */
617 	IGI_UNLOCK(igi);
618 	ifnet_lock_shared(ifp);
619 	igmp_initsilent(ifp, igi);
620 	ifnet_lock_done(ifp);
621 
622 	LIST_INSERT_HEAD(&igi_head, igi, igi_link);
623 
624 	IGMP_UNLOCK();
625 
626 	os_log_info(OS_LOG_DEFAULT, "%s: allocated igmp_ifinfo for ifp %s\n",
627 	    __func__, ifp->if_name);
628 
629 	return igi;
630 }
631 
632 /*
633  * Attach IGMP when PF_INET is reattached to an interface.  Caller is
634  * expected to have an outstanding reference to the igi.
635  */
636 void
igmp_domifreattach(struct igmp_ifinfo * igi)637 igmp_domifreattach(struct igmp_ifinfo *igi)
638 {
639 	struct ifnet *ifp;
640 
641 	IGMP_LOCK();
642 
643 	IGI_LOCK(igi);
644 	VERIFY(!(igi->igi_debug & IFD_ATTACHED));
645 	ifp = igi->igi_ifp;
646 	VERIFY(ifp != NULL);
647 	igi_initvar(igi, ifp, 1);
648 	igi->igi_debug |= IFD_ATTACHED;
649 	IGI_ADDREF_LOCKED(igi); /* hold a reference for igi_head */
650 	IGI_UNLOCK(igi);
651 	ifnet_lock_shared(ifp);
652 	igmp_initsilent(ifp, igi);
653 	ifnet_lock_done(ifp);
654 
655 	LIST_INSERT_HEAD(&igi_head, igi, igi_link);
656 
657 	IGMP_UNLOCK();
658 
659 	os_log_info(OS_LOG_DEFAULT, "%s: reattached igmp_ifinfo for ifp %s\n",
660 	    __func__, ifp->if_name);
661 }
662 
663 /*
664  * Hook for domifdetach.
665  */
666 void
igmp_domifdetach(struct ifnet * ifp)667 igmp_domifdetach(struct ifnet *ifp)
668 {
669 	SLIST_HEAD(, in_multi) inm_dthead;
670 
671 	SLIST_INIT(&inm_dthead);
672 
673 	os_log_info(OS_LOG_DEFAULT, "%s: called for ifp %s\n", __func__,
674 	    if_name(ifp));
675 
676 	IGMP_LOCK();
677 	igi_delete(ifp, (struct igmp_inm_relhead *)&inm_dthead);
678 	IGMP_UNLOCK();
679 
680 	/* Now that we're dropped all locks, release detached records */
681 	IGMP_REMOVE_DETACHED_INM(&inm_dthead);
682 }
683 
684 /*
685  * Called at interface detach time.  Note that we only flush all deferred
686  * responses and record releases; all remaining inm records and their source
687  * entries related to this interface are left intact, in order to handle
688  * the reattach case.
689  */
690 static void
igi_delete(const struct ifnet * ifp,struct igmp_inm_relhead * inm_dthead)691 igi_delete(const struct ifnet *ifp, struct igmp_inm_relhead *inm_dthead)
692 {
693 	struct igmp_ifinfo *igi, *tigi;
694 
695 	IGMP_LOCK_ASSERT_HELD();
696 
697 	LIST_FOREACH_SAFE(igi, &igi_head, igi_link, tigi) {
698 		IGI_LOCK(igi);
699 		if (igi->igi_ifp == ifp) {
700 			/*
701 			 * Free deferred General Query responses.
702 			 */
703 			IF_DRAIN(&igi->igi_gq);
704 			IF_DRAIN(&igi->igi_v2q);
705 			igmp_flush_relq(igi, inm_dthead);
706 			igi->igi_debug &= ~IFD_ATTACHED;
707 			IGI_UNLOCK(igi);
708 
709 			LIST_REMOVE(igi, igi_link);
710 			IGI_REMREF(igi); /* release igi_head reference */
711 			return;
712 		}
713 		IGI_UNLOCK(igi);
714 	}
715 	panic("%s: igmp_ifinfo not found for ifp %p(%s)", __func__,
716 	    ifp, if_name(ifp));
717 }
718 
719 __private_extern__ void
igmp_initsilent(struct ifnet * ifp,struct igmp_ifinfo * igi)720 igmp_initsilent(struct ifnet *ifp, struct igmp_ifinfo *igi)
721 {
722 	ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
723 
724 	IGI_LOCK_ASSERT_NOTHELD(igi);
725 	IGI_LOCK(igi);
726 	if (!(ifp->if_flags & IFF_MULTICAST)) {
727 		igi->igi_flags |= IGIF_SILENT;
728 	} else {
729 		igi->igi_flags &= ~IGIF_SILENT;
730 	}
731 	IGI_UNLOCK(igi);
732 }
733 
734 static void
igi_initvar(struct igmp_ifinfo * igi,struct ifnet * ifp,int reattach)735 igi_initvar(struct igmp_ifinfo *igi, struct ifnet *ifp, int reattach)
736 {
737 	IGI_LOCK_ASSERT_HELD(igi);
738 
739 	igi->igi_ifp = ifp;
740 	igi->igi_version = igmp_default_version;
741 	igi->igi_flags = 0;
742 	igi->igi_rv = IGMP_RV_INIT;
743 	igi->igi_qi = IGMP_QI_INIT;
744 	igi->igi_qri = IGMP_QRI_INIT;
745 	igi->igi_uri = IGMP_URI_INIT;
746 
747 	if (!reattach) {
748 		SLIST_INIT(&igi->igi_relinmhead);
749 	}
750 
751 	/*
752 	 * Responses to general queries are subject to bounds.
753 	 */
754 	igi->igi_gq.ifq_maxlen =  IGMP_MAX_RESPONSE_PACKETS;
755 	igi->igi_v2q.ifq_maxlen = IGMP_MAX_RESPONSE_PACKETS;
756 }
757 
758 static struct igmp_ifinfo *
igi_alloc(zalloc_flags_t how)759 igi_alloc(zalloc_flags_t how)
760 {
761 	struct igmp_ifinfo *igi = zalloc_flags(igi_zone, how | Z_ZERO);
762 	if (igi != NULL) {
763 		lck_mtx_init(&igi->igi_lock, &igmp_mtx_grp, &igmp_mtx_attr);
764 		igi->igi_debug |= IFD_ALLOC;
765 	}
766 	return igi;
767 }
768 
769 static void
igi_free(struct igmp_ifinfo * igi)770 igi_free(struct igmp_ifinfo *igi)
771 {
772 	IGI_LOCK(igi);
773 	if (igi->igi_debug & IFD_ATTACHED) {
774 		panic("%s: attached igi=%p is being freed", __func__, igi);
775 		/* NOTREACHED */
776 	} else if (igi->igi_ifp != NULL) {
777 		panic("%s: ifp not NULL for igi=%p", __func__, igi);
778 		/* NOTREACHED */
779 	} else if (!(igi->igi_debug & IFD_ALLOC)) {
780 		panic("%s: igi %p cannot be freed", __func__, igi);
781 		/* NOTREACHED */
782 	} else if (igi->igi_refcnt != 0) {
783 		panic("%s: non-zero refcnt igi=%p", __func__, igi);
784 		/* NOTREACHED */
785 	}
786 	igi->igi_debug &= ~IFD_ALLOC;
787 	IGI_UNLOCK(igi);
788 
789 	lck_mtx_destroy(&igi->igi_lock, &igmp_mtx_grp);
790 	zfree(igi_zone, igi);
791 }
792 
793 void
igi_addref(struct igmp_ifinfo * igi,int locked)794 igi_addref(struct igmp_ifinfo *igi, int locked)
795 {
796 	if (!locked) {
797 		IGI_LOCK_SPIN(igi);
798 	} else {
799 		IGI_LOCK_ASSERT_HELD(igi);
800 	}
801 
802 	if (++igi->igi_refcnt == 0) {
803 		panic("%s: igi=%p wraparound refcnt", __func__, igi);
804 		/* NOTREACHED */
805 	}
806 	if (!locked) {
807 		IGI_UNLOCK(igi);
808 	}
809 }
810 
811 void
igi_remref(struct igmp_ifinfo * igi)812 igi_remref(struct igmp_ifinfo *igi)
813 {
814 	SLIST_HEAD(, in_multi) inm_dthead;
815 	struct ifnet *ifp;
816 
817 	IGI_LOCK_SPIN(igi);
818 
819 	if (igi->igi_refcnt == 0) {
820 		panic("%s: igi=%p negative refcnt", __func__, igi);
821 		/* NOTREACHED */
822 	}
823 
824 	--igi->igi_refcnt;
825 	if (igi->igi_refcnt > 0) {
826 		IGI_UNLOCK(igi);
827 		return;
828 	}
829 
830 	ifp = igi->igi_ifp;
831 	igi->igi_ifp = NULL;
832 	IF_DRAIN(&igi->igi_gq);
833 	IF_DRAIN(&igi->igi_v2q);
834 	SLIST_INIT(&inm_dthead);
835 	igmp_flush_relq(igi, (struct igmp_inm_relhead *)&inm_dthead);
836 	IGI_UNLOCK(igi);
837 
838 	/* Now that we're dropped all locks, release detached records */
839 	IGMP_REMOVE_DETACHED_INM(&inm_dthead);
840 
841 	os_log_info(OS_LOG_DEFAULT, "%s: freeing igmp_ifinfo for ifp %s\n",
842 	    __func__, if_name(ifp));
843 
844 	igi_free(igi);
845 }
846 
847 /*
848  * Process a received IGMPv1 query.
849  * Return non-zero if the message should be dropped.
850  */
851 static int
igmp_input_v1_query(struct ifnet * ifp,const struct ip * ip,const struct igmp * igmp)852 igmp_input_v1_query(struct ifnet *ifp, const struct ip *ip,
853     const struct igmp *igmp)
854 {
855 	struct igmp_ifinfo      *igi;
856 	struct in_multi         *inm;
857 	struct in_multistep     step;
858 	struct igmp_tparams     itp = { .qpt = 0, .it = 0, .cst = 0, .sct = 0 };
859 
860 	IGMP_LOCK_ASSERT_NOTHELD();
861 
862 	/*
863 	 * IGMPv1 Host Membership Queries SHOULD always be addressed to
864 	 * 224.0.0.1. They are always treated as General Queries.
865 	 * igmp_group is always ignored. Do not drop it as a userland
866 	 * daemon may wish to see it.
867 	 */
868 	if (!in_allhosts(ip->ip_dst) || !in_nullhost(igmp->igmp_group)) {
869 		IGMPSTAT_INC(igps_rcv_badqueries);
870 		OIGMPSTAT_INC(igps_rcv_badqueries);
871 		goto done;
872 	}
873 	IGMPSTAT_INC(igps_rcv_gen_queries);
874 
875 	igi = IGMP_IFINFO(ifp);
876 	VERIFY(igi != NULL);
877 
878 	IGI_LOCK(igi);
879 	if (igi->igi_flags & IGIF_LOOPBACK) {
880 		os_log_debug(OS_LOG_DEFAULT,
881 		    "%s: ignore v1 query on IGIF_LOOPBACK "
882 		    "ifp %s\n", __func__,
883 		    if_name(ifp));
884 		IGI_UNLOCK(igi);
885 		goto done;
886 	}
887 	/*
888 	 * Switch to IGMPv1 host compatibility mode.
889 	 */
890 	itp.qpt = igmp_set_version(igi, IGMP_VERSION_1);
891 	IGI_UNLOCK(igi);
892 
893 	os_log_debug(OS_LOG_DEFAULT, "%s: process v1 query on ifp %s\n", __func__,
894 	    if_name(ifp));
895 
896 	/*
897 	 * Start the timers in all of our group records
898 	 * for the interface on which the query arrived,
899 	 * except those which are already running.
900 	 */
901 	in_multihead_lock_shared();
902 	IN_FIRST_MULTI(step, inm);
903 	while (inm != NULL) {
904 		INM_LOCK(inm);
905 		if (inm->inm_ifp != ifp || inm->inm_timer != 0) {
906 			goto next;
907 		}
908 
909 		switch (inm->inm_state) {
910 		case IGMP_NOT_MEMBER:
911 		case IGMP_SILENT_MEMBER:
912 			break;
913 		case IGMP_G_QUERY_PENDING_MEMBER:
914 		case IGMP_SG_QUERY_PENDING_MEMBER:
915 		case IGMP_REPORTING_MEMBER:
916 		case IGMP_IDLE_MEMBER:
917 		case IGMP_LAZY_MEMBER:
918 		case IGMP_SLEEPING_MEMBER:
919 		case IGMP_AWAKENING_MEMBER:
920 			inm->inm_state = IGMP_REPORTING_MEMBER;
921 			inm->inm_timer = IGMP_RANDOM_DELAY(IGMP_V1V2_MAX_RI);
922 			itp.cst = 1;
923 			break;
924 		case IGMP_LEAVING_MEMBER:
925 			break;
926 		}
927 next:
928 		INM_UNLOCK(inm);
929 		IN_NEXT_MULTI(step, inm);
930 	}
931 	in_multihead_lock_done();
932 done:
933 	igmp_set_timeout(&itp);
934 
935 	return 0;
936 }
937 
938 /*
939  * Process a received IGMPv2 general or group-specific query.
940  */
941 static int
igmp_input_v2_query(struct ifnet * ifp,const struct ip * ip,const struct igmp * igmp)942 igmp_input_v2_query(struct ifnet *ifp, const struct ip *ip,
943     const struct igmp *igmp)
944 {
945 	struct igmp_ifinfo      *igi;
946 	struct in_multi         *inm;
947 	int                      is_general_query;
948 	uint16_t                 timer;
949 	struct igmp_tparams      itp = { .qpt = 0, .it = 0, .cst = 0, .sct = 0 };
950 
951 	IGMP_LOCK_ASSERT_NOTHELD();
952 
953 	is_general_query = 0;
954 
955 	/*
956 	 * Validate address fields upfront.
957 	 */
958 	if (in_nullhost(igmp->igmp_group)) {
959 		/*
960 		 * IGMPv2 General Query.
961 		 * If this was not sent to the all-hosts group, ignore it.
962 		 */
963 		if (!in_allhosts(ip->ip_dst)) {
964 			goto done;
965 		}
966 		IGMPSTAT_INC(igps_rcv_gen_queries);
967 		is_general_query = 1;
968 	} else {
969 		/* IGMPv2 Group-Specific Query. */
970 		IGMPSTAT_INC(igps_rcv_group_queries);
971 	}
972 
973 	igi = IGMP_IFINFO(ifp);
974 	VERIFY(igi != NULL);
975 
976 	IGI_LOCK(igi);
977 	if (igi->igi_flags & IGIF_LOOPBACK) {
978 		os_log_debug(OS_LOG_DEFAULT, "%s: ignore v2 query on IGIF_LOOPBACK "
979 		    "ifp %s\n", __func__, if_name(ifp));
980 		IGI_UNLOCK(igi);
981 		goto done;
982 	}
983 	/*
984 	 * Ignore v2 query if in v1 Compatibility Mode.
985 	 */
986 	if (igi->igi_version == IGMP_VERSION_1) {
987 		IGI_UNLOCK(igi);
988 		goto done;
989 	}
990 	itp.qpt = igmp_set_version(igi, IGMP_VERSION_2);
991 	IGI_UNLOCK(igi);
992 
993 	timer = igmp->igmp_code / IGMP_TIMER_SCALE;
994 	if (timer == 0) {
995 		timer = 1;
996 	}
997 
998 	if (is_general_query) {
999 		struct in_multistep step;
1000 
1001 		os_log_debug(OS_LOG_DEFAULT, "%s: process v2 general query on ifp %s\n",
1002 		    __func__, if_name(ifp));
1003 		/*
1004 		 * For each reporting group joined on this
1005 		 * interface, kick the report timer.
1006 		 */
1007 		in_multihead_lock_shared();
1008 		IN_FIRST_MULTI(step, inm);
1009 		while (inm != NULL) {
1010 			INM_LOCK(inm);
1011 			if (inm->inm_ifp == ifp) {
1012 				itp.cst += igmp_v2_update_group(inm, timer);
1013 			}
1014 			INM_UNLOCK(inm);
1015 			IN_NEXT_MULTI(step, inm);
1016 		}
1017 		in_multihead_lock_done();
1018 	} else {
1019 		/*
1020 		 * Group-specific IGMPv2 query, we need only
1021 		 * look up the single group to process it.
1022 		 */
1023 		in_multihead_lock_shared();
1024 		IN_LOOKUP_MULTI(&igmp->igmp_group, ifp, inm);
1025 		in_multihead_lock_done();
1026 		if (inm != NULL) {
1027 			INM_LOCK(inm);
1028 			IGMP_INET_PRINTF(igmp->igmp_group,
1029 			    ("process v2 query %s on ifp 0x%llx(%s)\n",
1030 			    _igmp_inet_buf,
1031 			    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1032 			itp.cst = igmp_v2_update_group(inm, timer);
1033 			INM_UNLOCK(inm);
1034 			INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1035 		}
1036 	}
1037 done:
1038 	igmp_set_timeout(&itp);
1039 
1040 	return 0;
1041 }
1042 
1043 /*
1044  * Update the report timer on a group in response to an IGMPv2 query.
1045  *
1046  * If we are becoming the reporting member for this group, start the timer.
1047  * If we already are the reporting member for this group, and timer is
1048  * below the threshold, reset it.
1049  *
1050  * We may be updating the group for the first time since we switched
1051  * to IGMPv3. If we are, then we must clear any recorded source lists,
1052  * and transition to REPORTING state; the group timer is overloaded
1053  * for group and group-source query responses.
1054  *
1055  * Unlike IGMPv3, the delay per group should be jittered
1056  * to avoid bursts of IGMPv2 reports.
1057  */
1058 static uint32_t
igmp_v2_update_group(struct in_multi * inm,const int timer)1059 igmp_v2_update_group(struct in_multi *inm, const int timer)
1060 {
1061 	IGMP_INET_PRINTF(inm->inm_addr, ("%s: %s/%s timer=%d\n",
1062 	    __func__, _igmp_inet_buf, if_name(inm->inm_ifp),
1063 	    timer));
1064 
1065 	INM_LOCK_ASSERT_HELD(inm);
1066 
1067 	switch (inm->inm_state) {
1068 	case IGMP_NOT_MEMBER:
1069 	case IGMP_SILENT_MEMBER:
1070 		break;
1071 	case IGMP_REPORTING_MEMBER:
1072 		if (inm->inm_timer != 0 &&
1073 		    inm->inm_timer <= timer) {
1074 			IGMP_PRINTF(("%s: REPORTING and timer running, "
1075 			    "skipping.\n", __func__));
1076 			break;
1077 		}
1078 		OS_FALLTHROUGH;
1079 	case IGMP_SG_QUERY_PENDING_MEMBER:
1080 	case IGMP_G_QUERY_PENDING_MEMBER:
1081 	case IGMP_IDLE_MEMBER:
1082 	case IGMP_LAZY_MEMBER:
1083 	case IGMP_AWAKENING_MEMBER:
1084 		IGMP_PRINTF(("%s: ->REPORTING\n", __func__));
1085 		inm->inm_state = IGMP_REPORTING_MEMBER;
1086 		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1087 		break;
1088 	case IGMP_SLEEPING_MEMBER:
1089 		IGMP_PRINTF(("%s: ->AWAKENING\n", __func__));
1090 		inm->inm_state = IGMP_AWAKENING_MEMBER;
1091 		break;
1092 	case IGMP_LEAVING_MEMBER:
1093 		break;
1094 	}
1095 
1096 	return inm->inm_timer;
1097 }
1098 
1099 /*
1100  * Process a received IGMPv3 general, group-specific or
1101  * group-and-source-specific query.
1102  * Assumes m has already been pulled up to the full IGMP message length.
1103  * Return 0 if successful, otherwise an appropriate error code is returned.
1104  */
1105 static int
igmp_input_v3_query(struct ifnet * ifp,const struct ip * ip,struct igmpv3 * __indexable igmpv3)1106 igmp_input_v3_query(struct ifnet *ifp, const struct ip *ip,
1107     /*const*/ struct igmpv3 *__indexable igmpv3)
1108 {
1109 	struct igmp_ifinfo      *igi;
1110 	struct in_multi         *inm;
1111 	int                      is_general_query;
1112 	uint32_t                 maxresp, nsrc, qqi;
1113 	uint32_t                 timer;
1114 	uint8_t                  qrv;
1115 	struct igmp_tparams      itp = { .qpt = 0, .it = 0, .cst = 0, .sct = 0 };
1116 
1117 	IGMP_LOCK_ASSERT_NOTHELD();
1118 
1119 	is_general_query = 0;
1120 
1121 	os_log_debug(OS_LOG_DEFAULT, "%s: process v3 query on ifp %s\n", __func__,
1122 	    if_name(ifp));
1123 
1124 	maxresp = igmpv3->igmp_code;    /* in 1/10ths of a second */
1125 	if (maxresp >= 128) {
1126 		maxresp = IGMP_MANT(igmpv3->igmp_code) <<
1127 		    (IGMP_EXP(igmpv3->igmp_code) + 3);
1128 	}
1129 
1130 	/*
1131 	 * Robustness must never be less than 2 for on-wire IGMPv3.
1132 	 * FUTURE: Check if ifp has IGIF_LOOPBACK set, as we will make
1133 	 * an exception for interfaces whose IGMPv3 state changes
1134 	 * are redirected to loopback (e.g. MANET).
1135 	 */
1136 	qrv = IGMP_QRV(igmpv3->igmp_misc);
1137 	if (qrv < 2) {
1138 		IGMP_PRINTF(("%s: clamping qrv %d to %d\n", __func__,
1139 		    qrv, IGMP_RV_INIT));
1140 		qrv = IGMP_RV_INIT;
1141 	}
1142 
1143 	qqi = igmpv3->igmp_qqi;
1144 	if (qqi >= 128) {
1145 		qqi = IGMP_MANT(igmpv3->igmp_qqi) <<
1146 		    (IGMP_EXP(igmpv3->igmp_qqi) + 3);
1147 	}
1148 
1149 	timer = maxresp / IGMP_TIMER_SCALE;
1150 	if (timer == 0) {
1151 		timer = 1;
1152 	}
1153 
1154 	nsrc = ntohs(igmpv3->igmp_numsrc);
1155 
1156 	/*
1157 	 * Validate address fields and versions upfront before
1158 	 * accepting v3 query.
1159 	 */
1160 	if (in_nullhost(igmpv3->igmp_group)) {
1161 		/*
1162 		 * IGMPv3 General Query.
1163 		 *
1164 		 * General Queries SHOULD be directed to 224.0.0.1.
1165 		 * A general query with a source list has undefined
1166 		 * behaviour; discard it.
1167 		 */
1168 		IGMPSTAT_INC(igps_rcv_gen_queries);
1169 		if (!in_allhosts(ip->ip_dst) || nsrc > 0) {
1170 			IGMPSTAT_INC(igps_rcv_badqueries);
1171 			OIGMPSTAT_INC(igps_rcv_badqueries);
1172 			goto done;
1173 		}
1174 		is_general_query = 1;
1175 	} else {
1176 		/* Group or group-source specific query. */
1177 		if (nsrc == 0) {
1178 			IGMPSTAT_INC(igps_rcv_group_queries);
1179 		} else {
1180 			IGMPSTAT_INC(igps_rcv_gsr_queries);
1181 		}
1182 	}
1183 
1184 	igi = IGMP_IFINFO(ifp);
1185 	VERIFY(igi != NULL);
1186 
1187 	IGI_LOCK(igi);
1188 	if (igi->igi_flags & IGIF_LOOPBACK) {
1189 		os_log_debug(OS_LOG_DEFAULT, "%s: ignore v3 query on IGIF_LOOPBACK "
1190 		    "ifp %s\n", __func__,
1191 		    if_name(ifp));
1192 		IGI_UNLOCK(igi);
1193 		goto done;
1194 	}
1195 
1196 	/*
1197 	 * Discard the v3 query if we're in Compatibility Mode.
1198 	 * The RFC is not obviously worded that hosts need to stay in
1199 	 * compatibility mode until the Old Version Querier Present
1200 	 * timer expires.
1201 	 */
1202 	if (igi->igi_version != IGMP_VERSION_3) {
1203 		os_log_debug(OS_LOG_DEFAULT, "%s: ignore v3 query in v%d mode on "
1204 		    "ifp %s\n", __func__, igi->igi_version,
1205 		    if_name(ifp));
1206 		IGI_UNLOCK(igi);
1207 		goto done;
1208 	}
1209 
1210 	itp.qpt = igmp_set_version(igi, IGMP_VERSION_3);
1211 	igi->igi_rv = qrv;
1212 	igi->igi_qi = qqi;
1213 	igi->igi_qri = MAX(timer, IGMP_QRI_MIN);
1214 
1215 	IGMP_PRINTF(("%s: qrv %d qi %d qri %d\n", __func__, igi->igi_rv,
1216 	    igi->igi_qi, igi->igi_qri));
1217 
1218 	if (is_general_query) {
1219 		/*
1220 		 * Schedule a current-state report on this ifp for
1221 		 * all groups, possibly containing source lists.
1222 		 * If there is a pending General Query response
1223 		 * scheduled earlier than the selected delay, do
1224 		 * not schedule any other reports.
1225 		 * Otherwise, reset the interface timer.
1226 		 */
1227 		os_log_debug(OS_LOG_DEFAULT, "%s: process v3 general query on ifp %s\n",
1228 		    __func__, if_name(ifp));
1229 		if (igi->igi_v3_timer == 0 || igi->igi_v3_timer >= timer) {
1230 			itp.it = igi->igi_v3_timer = IGMP_RANDOM_DELAY(timer);
1231 		}
1232 		IGI_UNLOCK(igi);
1233 	} else {
1234 		IGI_UNLOCK(igi);
1235 		/*
1236 		 * Group-source-specific queries are throttled on
1237 		 * a per-group basis to defeat denial-of-service attempts.
1238 		 * Queries for groups we are not a member of on this
1239 		 * link are simply ignored.
1240 		 */
1241 		in_multihead_lock_shared();
1242 		IN_LOOKUP_MULTI(&igmpv3->igmp_group, ifp, inm);
1243 		in_multihead_lock_done();
1244 		if (inm == NULL) {
1245 			goto done;
1246 		}
1247 
1248 		INM_LOCK(inm);
1249 		if (nsrc > 0) {
1250 			if (!ratecheck(&inm->inm_lastgsrtv,
1251 			    &igmp_gsrdelay)) {
1252 				os_log_info(OS_LOG_DEFAULT, "%s: GS query throttled.\n",
1253 				    __func__);
1254 				IGMPSTAT_INC(igps_drop_gsr_queries);
1255 				INM_UNLOCK(inm);
1256 				INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1257 				goto done;
1258 			}
1259 		}
1260 		IGMP_INET_PRINTF(igmpv3->igmp_group,
1261 		    ("process v3 %s query on ifp 0x%llx(%s)\n", _igmp_inet_buf,
1262 		    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1263 		os_log_debug(OS_LOG_DEFAULT, "%s: process v3 query on ifp %s\n",
1264 		    __func__, if_name(ifp));
1265 		/*
1266 		 * If there is a pending General Query response
1267 		 * scheduled sooner than the selected delay, no
1268 		 * further report need be scheduled.
1269 		 * Otherwise, prepare to respond to the
1270 		 * group-specific or group-and-source query.
1271 		 */
1272 		IGI_LOCK(igi);
1273 		itp.it = igi->igi_v3_timer;
1274 		IGI_UNLOCK(igi);
1275 		if (itp.it == 0 || itp.it >= timer) {
1276 			(void) igmp_input_v3_group_query(inm, timer, igmpv3);
1277 			itp.cst = inm->inm_timer;
1278 		}
1279 		INM_UNLOCK(inm);
1280 		INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1281 	}
1282 done:
1283 	if (itp.it > 0) {
1284 		os_log_debug(OS_LOG_DEFAULT, "%s: v3 general query response scheduled in "
1285 		    "T+%d seconds on ifp %s\n", __func__, itp.it,
1286 		    if_name(ifp));
1287 	}
1288 	igmp_set_timeout(&itp);
1289 
1290 	return 0;
1291 }
1292 
1293 /*
1294  * Process a recieved IGMPv3 group-specific or group-and-source-specific
1295  * query.
1296  * Return <0 if any error occured. Currently this is ignored.
1297  */
1298 static int
igmp_input_v3_group_query(struct in_multi * inm,int timer,struct igmpv3 * __indexable igmpv3)1299 igmp_input_v3_group_query(struct in_multi *inm,
1300     int timer, /*const*/ struct igmpv3 *__indexable igmpv3)
1301 {
1302 	int                      retval;
1303 	uint16_t                 nsrc;
1304 
1305 	INM_LOCK_ASSERT_HELD(inm);
1306 
1307 	retval = 0;
1308 
1309 	switch (inm->inm_state) {
1310 	case IGMP_NOT_MEMBER:
1311 	case IGMP_SILENT_MEMBER:
1312 	case IGMP_SLEEPING_MEMBER:
1313 	case IGMP_LAZY_MEMBER:
1314 	case IGMP_AWAKENING_MEMBER:
1315 	case IGMP_IDLE_MEMBER:
1316 	case IGMP_LEAVING_MEMBER:
1317 		return retval;
1318 	case IGMP_REPORTING_MEMBER:
1319 	case IGMP_G_QUERY_PENDING_MEMBER:
1320 	case IGMP_SG_QUERY_PENDING_MEMBER:
1321 		break;
1322 	}
1323 
1324 	nsrc = ntohs(igmpv3->igmp_numsrc);
1325 
1326 	/*
1327 	 * Deal with group-specific queries upfront.
1328 	 * If any group query is already pending, purge any recorded
1329 	 * source-list state if it exists, and schedule a query response
1330 	 * for this group-specific query.
1331 	 */
1332 	if (nsrc == 0) {
1333 		if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER ||
1334 		    inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) {
1335 			inm_clear_recorded(inm);
1336 			timer = min(inm->inm_timer, timer);
1337 		}
1338 		inm->inm_state = IGMP_G_QUERY_PENDING_MEMBER;
1339 		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1340 		return retval;
1341 	}
1342 
1343 	/*
1344 	 * Deal with the case where a group-and-source-specific query has
1345 	 * been received but a group-specific query is already pending.
1346 	 */
1347 	if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER) {
1348 		timer = min(inm->inm_timer, timer);
1349 		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1350 		return retval;
1351 	}
1352 
1353 	/*
1354 	 * Finally, deal with the case where a group-and-source-specific
1355 	 * query has been received, where a response to a previous g-s-r
1356 	 * query exists, or none exists.
1357 	 * In this case, we need to parse the source-list which the Querier
1358 	 * has provided us with and check if we have any source list filter
1359 	 * entries at T1 for these sources. If we do not, there is no need
1360 	 * schedule a report and the query may be dropped.
1361 	 * If we do, we must record them and schedule a current-state
1362 	 * report for those sources.
1363 	 * FIXME: Handling source lists larger than 1 mbuf requires that
1364 	 * we pass the mbuf chain pointer down to this function, and use
1365 	 * m_getptr() to walk the chain.
1366 	 */
1367 	if (inm->inm_nsrc > 0) {
1368 		const struct in_addr    *ap;
1369 		int                      i, nrecorded;
1370 
1371 		ap = (const struct in_addr *)(igmpv3 + 1);
1372 		nrecorded = 0;
1373 		for (i = 0; i < nsrc; i++, ap++) {
1374 			retval = inm_record_source(inm, ap->s_addr);
1375 			if (retval < 0) {
1376 				break;
1377 			}
1378 			nrecorded += retval;
1379 		}
1380 		if (nrecorded > 0) {
1381 			os_log_debug(OS_LOG_DEFAULT, "%s: schedule response to SG query\n",
1382 			    __func__);
1383 			inm->inm_state = IGMP_SG_QUERY_PENDING_MEMBER;
1384 			inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1385 		}
1386 	}
1387 
1388 	return retval;
1389 }
1390 
1391 /*
1392  * Process a received IGMPv1 host membership report.
1393  *
1394  * NOTE: 0.0.0.0 workaround breaks const correctness.
1395  */
1396 static int
igmp_input_v1_report(struct ifnet * ifp,struct mbuf * m,struct ip * ip,struct igmp * igmp)1397 igmp_input_v1_report(struct ifnet *ifp, struct mbuf *m, /*const*/ struct ip *ip,
1398     /*const*/ struct igmp *igmp)
1399 {
1400 	struct in_ifaddr *ia;
1401 	struct in_multi *inm;
1402 
1403 	IGMPSTAT_INC(igps_rcv_reports);
1404 	OIGMPSTAT_INC(igps_rcv_reports);
1405 
1406 	if ((ifp->if_flags & IFF_LOOPBACK) ||
1407 	    (m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
1408 		return 0;
1409 	}
1410 
1411 	if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr) ||
1412 	    !in_hosteq(igmp->igmp_group, ip->ip_dst))) {
1413 		IGMPSTAT_INC(igps_rcv_badreports);
1414 		OIGMPSTAT_INC(igps_rcv_badreports);
1415 		return EINVAL;
1416 	}
1417 
1418 	/*
1419 	 * RFC 3376, Section 4.2.13, 9.2, 9.3:
1420 	 * Booting clients may use the source address 0.0.0.0. Some
1421 	 * IGMP daemons may not know how to use IP_RECVIF to determine
1422 	 * the interface upon which this message was received.
1423 	 * Replace 0.0.0.0 with the subnet address if told to do so.
1424 	 */
1425 	if (igmp_recvifkludge && in_nullhost(ip->ip_src)) {
1426 		IFP_TO_IA(ifp, ia);
1427 		if (ia != NULL) {
1428 			IFA_LOCK(&ia->ia_ifa);
1429 			ip->ip_src.s_addr = htonl(ia->ia_subnet);
1430 			IFA_UNLOCK(&ia->ia_ifa);
1431 			ifa_remref(&ia->ia_ifa);
1432 		}
1433 	}
1434 
1435 	IGMP_INET_PRINTF(igmp->igmp_group,
1436 	    ("process v1 report %s on ifp 0x%llx(%s)\n", _igmp_inet_buf,
1437 	    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1438 
1439 	/*
1440 	 * IGMPv1 report suppression.
1441 	 * If we are a member of this group, and our membership should be
1442 	 * reported, stop our group timer and transition to the 'lazy' state.
1443 	 */
1444 	in_multihead_lock_shared();
1445 	IN_LOOKUP_MULTI(&igmp->igmp_group, ifp, inm);
1446 	in_multihead_lock_done();
1447 	if (inm != NULL) {
1448 		struct igmp_ifinfo *igi;
1449 
1450 		INM_LOCK(inm);
1451 
1452 		igi = inm->inm_igi;
1453 		VERIFY(igi != NULL);
1454 
1455 		IGMPSTAT_INC(igps_rcv_ourreports);
1456 		OIGMPSTAT_INC(igps_rcv_ourreports);
1457 
1458 		/*
1459 		 * If we are in IGMPv3 host mode, do not allow the
1460 		 * other host's IGMPv1 report to suppress our reports
1461 		 * unless explicitly configured to do so.
1462 		 */
1463 		IGI_LOCK(igi);
1464 		if (igi->igi_version == IGMP_VERSION_3) {
1465 			if (igmp_legacysupp) {
1466 				igmp_v3_suppress_group_record(inm);
1467 			}
1468 			IGI_UNLOCK(igi);
1469 			INM_UNLOCK(inm);
1470 			INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1471 			return 0;
1472 		}
1473 
1474 		INM_LOCK_ASSERT_HELD(inm);
1475 		inm->inm_timer = 0;
1476 
1477 		switch (inm->inm_state) {
1478 		case IGMP_NOT_MEMBER:
1479 		case IGMP_SILENT_MEMBER:
1480 			break;
1481 		case IGMP_IDLE_MEMBER:
1482 		case IGMP_LAZY_MEMBER:
1483 		case IGMP_AWAKENING_MEMBER:
1484 			IGMP_INET_PRINTF(igmp->igmp_group,
1485 			    ("report suppressed for %s on ifp 0x%llx(%s)\n",
1486 			    _igmp_inet_buf,
1487 			    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1488 			OS_FALLTHROUGH;
1489 		case IGMP_SLEEPING_MEMBER:
1490 			inm->inm_state = IGMP_SLEEPING_MEMBER;
1491 			break;
1492 		case IGMP_REPORTING_MEMBER:
1493 			IGMP_INET_PRINTF(igmp->igmp_group,
1494 			    ("report suppressed for %s on ifp 0x%llx(%s)\n",
1495 			    _igmp_inet_buf,
1496 			    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1497 			if (igi->igi_version == IGMP_VERSION_1) {
1498 				inm->inm_state = IGMP_LAZY_MEMBER;
1499 			} else if (igi->igi_version == IGMP_VERSION_2) {
1500 				inm->inm_state = IGMP_SLEEPING_MEMBER;
1501 			}
1502 			break;
1503 		case IGMP_G_QUERY_PENDING_MEMBER:
1504 		case IGMP_SG_QUERY_PENDING_MEMBER:
1505 		case IGMP_LEAVING_MEMBER:
1506 			break;
1507 		}
1508 		IGI_UNLOCK(igi);
1509 		INM_UNLOCK(inm);
1510 		INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1511 	}
1512 
1513 	return 0;
1514 }
1515 
1516 /*
1517  * Process a received IGMPv2 host membership report.
1518  *
1519  * NOTE: 0.0.0.0 workaround breaks const correctness.
1520  */
1521 static int
igmp_input_v2_report(struct ifnet * ifp,struct mbuf * m,struct ip * ip,struct igmp * igmp)1522 igmp_input_v2_report(struct ifnet *ifp, struct mbuf *m, /*const*/ struct ip *ip,
1523     /*const*/ struct igmp *igmp)
1524 {
1525 	struct in_ifaddr *ia;
1526 	struct in_multi *inm;
1527 
1528 	/*
1529 	 * Make sure we don't hear our own membership report.  Fast
1530 	 * leave requires knowing that we are the only member of a
1531 	 * group.
1532 	 */
1533 	IFP_TO_IA(ifp, ia);
1534 	if (ia != NULL) {
1535 		IFA_LOCK(&ia->ia_ifa);
1536 		if (in_hosteq(ip->ip_src, IA_SIN(ia)->sin_addr)) {
1537 			IFA_UNLOCK(&ia->ia_ifa);
1538 			ifa_remref(&ia->ia_ifa);
1539 			return 0;
1540 		}
1541 		IFA_UNLOCK(&ia->ia_ifa);
1542 	}
1543 
1544 	IGMPSTAT_INC(igps_rcv_reports);
1545 	OIGMPSTAT_INC(igps_rcv_reports);
1546 
1547 	if ((ifp->if_flags & IFF_LOOPBACK) ||
1548 	    (m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
1549 		if (ia != NULL) {
1550 			ifa_remref(&ia->ia_ifa);
1551 		}
1552 		return 0;
1553 	}
1554 
1555 	if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr)) ||
1556 	    !in_hosteq(igmp->igmp_group, ip->ip_dst)) {
1557 		if (ia != NULL) {
1558 			ifa_remref(&ia->ia_ifa);
1559 		}
1560 		IGMPSTAT_INC(igps_rcv_badreports);
1561 		OIGMPSTAT_INC(igps_rcv_badreports);
1562 		return EINVAL;
1563 	}
1564 
1565 	/*
1566 	 * RFC 3376, Section 4.2.13, 9.2, 9.3:
1567 	 * Booting clients may use the source address 0.0.0.0. Some
1568 	 * IGMP daemons may not know how to use IP_RECVIF to determine
1569 	 * the interface upon which this message was received.
1570 	 * Replace 0.0.0.0 with the subnet address if told to do so.
1571 	 */
1572 	if (igmp_recvifkludge && in_nullhost(ip->ip_src)) {
1573 		if (ia != NULL) {
1574 			IFA_LOCK(&ia->ia_ifa);
1575 			ip->ip_src.s_addr = htonl(ia->ia_subnet);
1576 			IFA_UNLOCK(&ia->ia_ifa);
1577 		}
1578 	}
1579 	if (ia != NULL) {
1580 		ifa_remref(&ia->ia_ifa);
1581 	}
1582 
1583 	IGMP_INET_PRINTF(igmp->igmp_group,
1584 	    ("process v2 report %s on ifp 0x%llx(%s)\n", _igmp_inet_buf,
1585 	    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1586 	os_log_debug(OS_LOG_DEFAULT, "%s: process v2 report on ifp %s",
1587 	    __func__, if_name(ifp));
1588 
1589 	/*
1590 	 * IGMPv2 report suppression.
1591 	 * If we are a member of this group, and our membership should be
1592 	 * reported, and our group timer is pending or about to be reset,
1593 	 * stop our group timer by transitioning to the 'lazy' state.
1594 	 */
1595 	in_multihead_lock_shared();
1596 	IN_LOOKUP_MULTI(&igmp->igmp_group, ifp, inm);
1597 	in_multihead_lock_done();
1598 	if (inm != NULL) {
1599 		struct igmp_ifinfo *igi;
1600 
1601 		INM_LOCK(inm);
1602 		igi = inm->inm_igi;
1603 		VERIFY(igi != NULL);
1604 
1605 		IGMPSTAT_INC(igps_rcv_ourreports);
1606 		OIGMPSTAT_INC(igps_rcv_ourreports);
1607 
1608 		/*
1609 		 * If we are in IGMPv3 host mode, do not allow the
1610 		 * other host's IGMPv1 report to suppress our reports
1611 		 * unless explicitly configured to do so.
1612 		 */
1613 		IGI_LOCK(igi);
1614 		if (igi->igi_version == IGMP_VERSION_3) {
1615 			if (igmp_legacysupp) {
1616 				igmp_v3_suppress_group_record(inm);
1617 			}
1618 			IGI_UNLOCK(igi);
1619 			INM_UNLOCK(inm);
1620 			INM_REMREF(inm);
1621 			return 0;
1622 		}
1623 
1624 		inm->inm_timer = 0;
1625 
1626 		switch (inm->inm_state) {
1627 		case IGMP_NOT_MEMBER:
1628 		case IGMP_SILENT_MEMBER:
1629 		case IGMP_SLEEPING_MEMBER:
1630 			break;
1631 		case IGMP_REPORTING_MEMBER:
1632 		case IGMP_IDLE_MEMBER:
1633 		case IGMP_AWAKENING_MEMBER:
1634 			IGMP_INET_PRINTF(igmp->igmp_group,
1635 			    ("report suppressed for %s on ifp 0x%llx(%s)\n",
1636 			    _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(ifp),
1637 			    if_name(ifp)));
1638 			OS_FALLTHROUGH;
1639 		case IGMP_LAZY_MEMBER:
1640 			inm->inm_state = IGMP_LAZY_MEMBER;
1641 			break;
1642 		case IGMP_G_QUERY_PENDING_MEMBER:
1643 		case IGMP_SG_QUERY_PENDING_MEMBER:
1644 		case IGMP_LEAVING_MEMBER:
1645 			break;
1646 		}
1647 		IGI_UNLOCK(igi);
1648 		INM_UNLOCK(inm);
1649 		INM_REMREF(inm);
1650 	}
1651 
1652 	return 0;
1653 }
1654 
1655 void
igmp_input(struct mbuf * m,int off)1656 igmp_input(struct mbuf *m, int off)
1657 {
1658 	int iphlen;
1659 	struct ifnet *ifp;
1660 	struct igmp *igmp;
1661 	struct ip *ip;
1662 	int igmplen;
1663 	int minlen;
1664 	int queryver;
1665 
1666 	IGMP_PRINTF(("%s: called w/mbuf(0x%llx,%d)\n", __func__,
1667 	    (uint64_t)VM_KERNEL_ADDRPERM(m), off));
1668 
1669 	ifp = m->m_pkthdr.rcvif;
1670 
1671 	IGMPSTAT_INC(igps_rcv_total);
1672 	OIGMPSTAT_INC(igps_rcv_total);
1673 
1674 	/* Expect 32-bit aligned data pointer on strict-align platforms */
1675 	MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
1676 
1677 	ip = mtod(m, struct ip *);
1678 	iphlen = off;
1679 
1680 	/* By now, ip_len no longer contains the length of IP header */
1681 	igmplen = ip->ip_len;
1682 
1683 	/*
1684 	 * Validate lengths.
1685 	 */
1686 	if (igmplen < IGMP_MINLEN) {
1687 		IGMPSTAT_INC(igps_rcv_tooshort);
1688 		OIGMPSTAT_INC(igps_rcv_tooshort);
1689 		m_freem(m);
1690 		return;
1691 	}
1692 
1693 	/*
1694 	 * Always pullup to the minimum size for v1/v2 or v3
1695 	 * to amortize calls to m_pulldown().
1696 	 */
1697 	if (igmplen >= IGMP_V3_QUERY_MINLEN) {
1698 		minlen = IGMP_V3_QUERY_MINLEN;
1699 	} else {
1700 		minlen = IGMP_MINLEN;
1701 	}
1702 
1703 	/* A bit more expensive than M_STRUCT_GET, but ensures alignment */
1704 	M_STRUCT_GET0(igmp, struct igmp *, m, off, minlen);
1705 	if (igmp == NULL) {
1706 		IGMPSTAT_INC(igps_rcv_tooshort);
1707 		OIGMPSTAT_INC(igps_rcv_tooshort);
1708 		return;
1709 	}
1710 	/* N.B.: we assume the packet was correctly aligned in ip_input. */
1711 
1712 	/*
1713 	 * Validate checksum.
1714 	 */
1715 	m->m_data += iphlen;
1716 	m->m_len -= iphlen;
1717 	if (in_cksum(m, igmplen)) {
1718 		IGMPSTAT_INC(igps_rcv_badsum);
1719 		OIGMPSTAT_INC(igps_rcv_badsum);
1720 		m_freem(m);
1721 		return;
1722 	}
1723 	m->m_data -= iphlen;
1724 	m->m_len += iphlen;
1725 
1726 	/*
1727 	 * IGMP control traffic is link-scope, and must have a TTL of 1.
1728 	 * DVMRP traffic (e.g. mrinfo, mtrace) is an exception;
1729 	 * probe packets may come from beyond the LAN.
1730 	 */
1731 	if (igmp->igmp_type != IGMP_DVMRP && ip->ip_ttl != 1) {
1732 		IGMPSTAT_INC(igps_rcv_badttl);
1733 		m_freem(m);
1734 		return;
1735 	}
1736 
1737 	switch (igmp->igmp_type) {
1738 	case IGMP_HOST_MEMBERSHIP_QUERY:
1739 		if (igmplen == IGMP_MINLEN) {
1740 			if (igmp->igmp_code == 0) {
1741 				queryver = IGMP_VERSION_1;
1742 			} else {
1743 				queryver = IGMP_VERSION_2;
1744 			}
1745 		} else if (igmplen >= IGMP_V3_QUERY_MINLEN) {
1746 			queryver = IGMP_VERSION_3;
1747 		} else {
1748 			IGMPSTAT_INC(igps_rcv_tooshort);
1749 			OIGMPSTAT_INC(igps_rcv_tooshort);
1750 			m_freem(m);
1751 			return;
1752 		}
1753 
1754 		OIGMPSTAT_INC(igps_rcv_queries);
1755 
1756 		switch (queryver) {
1757 		case IGMP_VERSION_1:
1758 			IGMPSTAT_INC(igps_rcv_v1v2_queries);
1759 			if (!igmp_v1enable) {
1760 				break;
1761 			}
1762 			if (igmp_input_v1_query(ifp, ip, igmp) != 0) {
1763 				m_freem(m);
1764 				return;
1765 			}
1766 			break;
1767 
1768 		case IGMP_VERSION_2:
1769 			IGMPSTAT_INC(igps_rcv_v1v2_queries);
1770 			if (!igmp_v2enable) {
1771 				break;
1772 			}
1773 			if (igmp_input_v2_query(ifp, ip, igmp) != 0) {
1774 				m_freem(m);
1775 				return;
1776 			}
1777 			break;
1778 
1779 		case IGMP_VERSION_3: {
1780 			struct igmpv3 *igmpv3;
1781 			uint16_t igmpv3len;
1782 			uint16_t srclen;
1783 			int nsrc;
1784 
1785 			IGMPSTAT_INC(igps_rcv_v3_queries);
1786 			igmpv3 = (struct igmpv3 *)igmp;
1787 			/*
1788 			 * Validate length based on source count.
1789 			 */
1790 			nsrc = ntohs(igmpv3->igmp_numsrc);
1791 			/*
1792 			 * The max vaue of nsrc is limited by the
1793 			 * MTU of the network on which the datagram
1794 			 * is received
1795 			 */
1796 			if (nsrc < 0 || nsrc > IGMP_V3_QUERY_MAX_SRCS) {
1797 				IGMPSTAT_INC(igps_rcv_tooshort);
1798 				OIGMPSTAT_INC(igps_rcv_tooshort);
1799 				m_freem(m);
1800 				return;
1801 			}
1802 			srclen = sizeof(struct in_addr) * (uint16_t)nsrc;
1803 			if (igmplen < (IGMP_V3_QUERY_MINLEN + srclen)) {
1804 				IGMPSTAT_INC(igps_rcv_tooshort);
1805 				OIGMPSTAT_INC(igps_rcv_tooshort);
1806 				m_freem(m);
1807 				return;
1808 			}
1809 			igmpv3len = IGMP_V3_QUERY_MINLEN + srclen;
1810 			/*
1811 			 * A bit more expensive than M_STRUCT_GET,
1812 			 * but ensures alignment.
1813 			 */
1814 			M_STRUCT_GET0(igmpv3, struct igmpv3 *, m,
1815 			    off, igmpv3len);
1816 			if (igmpv3 == NULL) {
1817 				IGMPSTAT_INC(igps_rcv_tooshort);
1818 				OIGMPSTAT_INC(igps_rcv_tooshort);
1819 				return;
1820 			}
1821 			/*
1822 			 * N.B.: we assume the packet was correctly
1823 			 * aligned in ip_input.
1824 			 */
1825 			if (igmp_input_v3_query(ifp, ip, igmpv3) != 0) {
1826 				m_freem(m);
1827 				return;
1828 			}
1829 		}
1830 		break;
1831 		}
1832 		break;
1833 
1834 	case IGMP_v1_HOST_MEMBERSHIP_REPORT:
1835 		if (!igmp_v1enable) {
1836 			break;
1837 		}
1838 		if (igmp_input_v1_report(ifp, m, ip, igmp) != 0) {
1839 			m_freem(m);
1840 			return;
1841 		}
1842 		break;
1843 
1844 	case IGMP_v2_HOST_MEMBERSHIP_REPORT:
1845 		if (!igmp_v2enable) {
1846 			break;
1847 		}
1848 		if (!ip_checkrouteralert(m)) {
1849 			IGMPSTAT_INC(igps_rcv_nora);
1850 		}
1851 		if (igmp_input_v2_report(ifp, m, ip, igmp) != 0) {
1852 			m_freem(m);
1853 			return;
1854 		}
1855 		break;
1856 
1857 	case IGMP_v3_HOST_MEMBERSHIP_REPORT:
1858 		/*
1859 		 * Hosts do not need to process IGMPv3 membership reports,
1860 		 * as report suppression is no longer required.
1861 		 */
1862 		if (!ip_checkrouteralert(m)) {
1863 			IGMPSTAT_INC(igps_rcv_nora);
1864 		}
1865 		break;
1866 
1867 	default:
1868 		break;
1869 	}
1870 
1871 	IGMP_LOCK_ASSERT_NOTHELD();
1872 	/*
1873 	 * Pass all valid IGMP packets up to any process(es) listening on a
1874 	 * raw IGMP socket.
1875 	 */
1876 	rip_input(m, off);
1877 }
1878 
1879 /*
1880  * Schedule IGMP timer based on various parameters; caller must ensure that
1881  * lock ordering is maintained as this routine acquires IGMP global lock.
1882  */
1883 void
igmp_set_timeout(struct igmp_tparams * itp)1884 igmp_set_timeout(struct igmp_tparams *itp)
1885 {
1886 	IGMP_LOCK_ASSERT_NOTHELD();
1887 	VERIFY(itp != NULL);
1888 
1889 	if (itp->qpt != 0 || itp->it != 0 || itp->cst != 0 || itp->sct != 0) {
1890 		IGMP_LOCK();
1891 		if (itp->qpt != 0) {
1892 			querier_present_timers_running = 1;
1893 		}
1894 		if (itp->it != 0) {
1895 			interface_timers_running = 1;
1896 		}
1897 		if (itp->cst != 0) {
1898 			current_state_timers_running = 1;
1899 		}
1900 		if (itp->sct != 0) {
1901 			state_change_timers_running = 1;
1902 		}
1903 		if (itp->fast) {
1904 			igmp_sched_fast_timeout();
1905 		} else {
1906 			igmp_sched_timeout();
1907 		}
1908 		IGMP_UNLOCK();
1909 	}
1910 }
1911 
1912 void
igmp_set_fast_timeout(struct igmp_tparams * itp)1913 igmp_set_fast_timeout(struct igmp_tparams *itp)
1914 {
1915 	VERIFY(itp != NULL);
1916 	itp->fast = true;
1917 	igmp_set_timeout(itp);
1918 }
1919 
1920 /*
1921  * IGMP timer handler (per 1 second).
1922  */
1923 static void
igmp_timeout(thread_call_param_t arg0,thread_call_param_t arg1 __unused)1924 igmp_timeout(thread_call_param_t arg0, thread_call_param_t arg1 __unused)
1925 {
1926 	struct ifqueue           scq;   /* State-change packets */
1927 	struct ifqueue           qrq;   /* Query response packets */
1928 	struct ifnet            *ifp;
1929 	struct igmp_ifinfo      *igi;
1930 	struct in_multi         *inm;
1931 	unsigned int             loop = 0, uri_sec = 0;
1932 	SLIST_HEAD(, in_multi)  inm_dthead;
1933 	bool                     fast = arg0 != NULL;
1934 
1935 	SLIST_INIT(&inm_dthead);
1936 
1937 	/*
1938 	 * Update coarse-grained networking timestamp (in sec.); the idea
1939 	 * is to piggy-back on the timeout callout to update the counter
1940 	 * returnable via net_uptime().
1941 	 */
1942 	net_update_uptime();
1943 
1944 	IGMP_LOCK();
1945 
1946 	IGMP_PRINTF(("%s: qpt %d, it %d, cst %d, sct %d, fast %d\n", __func__,
1947 	    querier_present_timers_running, interface_timers_running,
1948 	    current_state_timers_running, state_change_timers_running,
1949 	    fast));
1950 
1951 	if (fast) {
1952 		/*
1953 		 * When running the fast timer, skip processing
1954 		 * of "querier present" timers since they are
1955 		 * based on 1-second intervals.
1956 		 */
1957 		goto skip_query_timers;
1958 	}
1959 	/*
1960 	 * IGMPv1/v2 querier present timer processing.
1961 	 */
1962 	if (querier_present_timers_running) {
1963 		querier_present_timers_running = 0;
1964 		LIST_FOREACH(igi, &igi_head, igi_link) {
1965 			IGI_LOCK(igi);
1966 			igmp_v1v2_process_querier_timers(igi);
1967 			if (igi->igi_v1_timer > 0 || igi->igi_v2_timer > 0) {
1968 				querier_present_timers_running = 1;
1969 			}
1970 			IGI_UNLOCK(igi);
1971 		}
1972 	}
1973 
1974 	/*
1975 	 * IGMPv3 General Query response timer processing.
1976 	 */
1977 	if (interface_timers_running) {
1978 		IGMP_PRINTF(("%s: interface timers running\n", __func__));
1979 		interface_timers_running = 0;
1980 		LIST_FOREACH(igi, &igi_head, igi_link) {
1981 			IGI_LOCK(igi);
1982 			if (igi->igi_version != IGMP_VERSION_3) {
1983 				IGI_UNLOCK(igi);
1984 				continue;
1985 			}
1986 			if (igi->igi_v3_timer == 0) {
1987 				/* Do nothing. */
1988 			} else if (--igi->igi_v3_timer == 0) {
1989 				if (igmp_v3_dispatch_general_query(igi) > 0) {
1990 					interface_timers_running = 1;
1991 				}
1992 			} else {
1993 				interface_timers_running = 1;
1994 			}
1995 			IGI_UNLOCK(igi);
1996 		}
1997 	}
1998 
1999 skip_query_timers:
2000 	if (!current_state_timers_running &&
2001 	    !state_change_timers_running) {
2002 		goto out_locked;
2003 	}
2004 
2005 	current_state_timers_running = 0;
2006 	state_change_timers_running = 0;
2007 
2008 	memset(&qrq, 0, sizeof(struct ifqueue));
2009 	qrq.ifq_maxlen = IGMP_MAX_G_GS_PACKETS;
2010 
2011 	memset(&scq, 0, sizeof(struct ifqueue));
2012 	scq.ifq_maxlen =  IGMP_MAX_STATE_CHANGE_PACKETS;
2013 
2014 	IGMP_PRINTF(("%s: state change timers running\n", __func__));
2015 
2016 	/*
2017 	 * IGMPv1/v2/v3 host report and state-change timer processing.
2018 	 * Note: Processing a v3 group timer may remove a node.
2019 	 */
2020 	LIST_FOREACH(igi, &igi_head, igi_link) {
2021 		struct in_multistep step;
2022 
2023 		IGI_LOCK(igi);
2024 		ifp = igi->igi_ifp;
2025 		loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
2026 		uri_sec = IGMP_RANDOM_DELAY(igi->igi_uri);
2027 		IGI_UNLOCK(igi);
2028 
2029 		in_multihead_lock_shared();
2030 		IN_FIRST_MULTI(step, inm);
2031 		while (inm != NULL) {
2032 			INM_LOCK(inm);
2033 			if (inm->inm_ifp != ifp) {
2034 				goto next;
2035 			}
2036 
2037 			IGI_LOCK(igi);
2038 			switch (igi->igi_version) {
2039 			case IGMP_VERSION_1:
2040 			case IGMP_VERSION_2:
2041 				igmp_v1v2_process_group_timer(inm,
2042 				    igi->igi_version);
2043 				break;
2044 			case IGMP_VERSION_3:
2045 				igmp_v3_process_group_timers(igi, &qrq,
2046 				    &scq, inm, uri_sec);
2047 				break;
2048 			}
2049 			IGI_UNLOCK(igi);
2050 next:
2051 			INM_UNLOCK(inm);
2052 			IN_NEXT_MULTI(step, inm);
2053 		}
2054 		in_multihead_lock_done();
2055 
2056 		IGI_LOCK(igi);
2057 		if (igi->igi_version == IGMP_VERSION_1 ||
2058 		    igi->igi_version == IGMP_VERSION_2) {
2059 			igmp_dispatch_queue(igi, &igi->igi_v2q, 0, loop);
2060 		} else if (igi->igi_version == IGMP_VERSION_3) {
2061 			IGI_UNLOCK(igi);
2062 			igmp_dispatch_queue(NULL, &qrq, 0, loop);
2063 			igmp_dispatch_queue(NULL, &scq, 0, loop);
2064 			VERIFY(qrq.ifq_len == 0);
2065 			VERIFY(scq.ifq_len == 0);
2066 			IGI_LOCK(igi);
2067 		}
2068 		/*
2069 		 * In case there are still any pending membership reports
2070 		 * which didn't get drained at version change time.
2071 		 */
2072 		IF_DRAIN(&igi->igi_v2q);
2073 		/*
2074 		 * Release all deferred inm records, and drain any locally
2075 		 * enqueued packets; do it even if the current IGMP version
2076 		 * for the link is no longer IGMPv3, in order to handle the
2077 		 * version change case.
2078 		 */
2079 		igmp_flush_relq(igi, (struct igmp_inm_relhead *)&inm_dthead);
2080 		IGI_UNLOCK(igi);
2081 
2082 		IF_DRAIN(&qrq);
2083 		IF_DRAIN(&scq);
2084 	}
2085 
2086 out_locked:
2087 	/* re-arm the timer if there's work to do */
2088 	if (fast) {
2089 		igmp_fast_timeout_run = false;
2090 	} else {
2091 		igmp_timeout_run = false;
2092 	}
2093 	igmp_sched_timeout();
2094 	IGMP_UNLOCK();
2095 
2096 	/* Now that we're dropped all locks, release detached records */
2097 	IGMP_REMOVE_DETACHED_INM(&inm_dthead);
2098 }
2099 
2100 static void
igmp_sched_timeout(void)2101 igmp_sched_timeout(void)
2102 {
2103 	static thread_call_t igmp_timeout_tcall;
2104 	uint64_t deadline = 0, leeway = 0;
2105 
2106 	IGMP_LOCK_ASSERT_HELD();
2107 	if (igmp_timeout_tcall == NULL) {
2108 		igmp_timeout_tcall =
2109 		    thread_call_allocate_with_options(igmp_timeout,
2110 		    NULL,
2111 		    THREAD_CALL_PRIORITY_KERNEL,
2112 		    THREAD_CALL_OPTIONS_ONCE);
2113 	}
2114 	if (!igmp_timeout_run &&
2115 	    (querier_present_timers_running || current_state_timers_running ||
2116 	    interface_timers_running || state_change_timers_running)) {
2117 		igmp_timeout_run = true;
2118 		clock_interval_to_deadline(igmp_timeout_delay, NSEC_PER_MSEC,
2119 		    &deadline);
2120 		clock_interval_to_absolutetime_interval(igmp_timeout_leeway,
2121 		    NSEC_PER_MSEC, &leeway);
2122 		thread_call_enter_delayed_with_leeway(igmp_timeout_tcall, NULL,
2123 		    deadline, leeway,
2124 		    THREAD_CALL_DELAY_LEEWAY);
2125 	}
2126 }
2127 
2128 static void
igmp_sched_fast_timeout(void)2129 igmp_sched_fast_timeout(void)
2130 {
2131 	static thread_call_t igmp_fast_timeout_tcall;
2132 
2133 	IGMP_LOCK_ASSERT_HELD();
2134 	if (igmp_fast_timeout_tcall == NULL) {
2135 		igmp_fast_timeout_tcall =
2136 		    thread_call_allocate_with_options(igmp_timeout,
2137 		    igmp_sched_fast_timeout,
2138 		    THREAD_CALL_PRIORITY_KERNEL,
2139 		    THREAD_CALL_OPTIONS_ONCE);
2140 	}
2141 	if (!igmp_fast_timeout_run &&
2142 	    (current_state_timers_running || state_change_timers_running)) {
2143 		igmp_fast_timeout_run = true;
2144 		thread_call_enter(igmp_fast_timeout_tcall);
2145 	}
2146 }
2147 
2148 /*
2149  * Appends an in_multi to the list to be released later.
2150  *
2151  * Caller must be holding igi_lock.
2152  */
2153 static void
igmp_append_relq(struct igmp_ifinfo * igi,struct in_multi * inm)2154 igmp_append_relq(struct igmp_ifinfo *igi, struct in_multi *inm)
2155 {
2156 	IGI_LOCK_ASSERT_HELD(igi);
2157 	if (inm->inm_in_nrele) {
2158 		os_log_debug(OS_LOG_DEFAULT, "%s: inm %llx already on relq ifp %s\n",
2159 		    __func__, (uint64_t)VM_KERNEL_ADDRPERM(inm),
2160 		    if_name(igi->igi_ifp));
2161 		return;
2162 	}
2163 	os_log_debug(OS_LOG_DEFAULT, "%s: adding inm %llx on relq ifp %s\n",
2164 	    __func__, (uint64_t)VM_KERNEL_ADDRPERM(inm),
2165 	    if_name(igi->igi_ifp));
2166 	inm->inm_in_nrele = true;
2167 	SLIST_INSERT_HEAD(&igi->igi_relinmhead, inm, inm_nrele);
2168 }
2169 
2170 /*
2171  * Free the in_multi reference(s) for this IGMP lifecycle.
2172  *
2173  * Caller must be holding igi_lock.
2174  */
2175 static void
igmp_flush_relq(struct igmp_ifinfo * igi,struct igmp_inm_relhead * inm_dthead)2176 igmp_flush_relq(struct igmp_ifinfo *igi, struct igmp_inm_relhead *inm_dthead)
2177 {
2178 	struct in_multi *inm;
2179 	SLIST_HEAD(, in_multi) temp_relinmhead;
2180 
2181 	/*
2182 	 * Before dropping the igi_lock, copy all the items in the
2183 	 * release list to a temporary list to prevent other threads
2184 	 * from changing igi_relinmhead while we are traversing it.
2185 	 */
2186 	IGI_LOCK_ASSERT_HELD(igi);
2187 	SLIST_INIT(&temp_relinmhead);
2188 	while ((inm = SLIST_FIRST(&igi->igi_relinmhead)) != NULL) {
2189 		SLIST_REMOVE_HEAD(&igi->igi_relinmhead, inm_nrele);
2190 		SLIST_INSERT_HEAD(&temp_relinmhead, inm, inm_nrele);
2191 	}
2192 	IGI_UNLOCK(igi);
2193 	in_multihead_lock_exclusive();
2194 	while ((inm = SLIST_FIRST(&temp_relinmhead)) != NULL) {
2195 		int lastref;
2196 
2197 		SLIST_REMOVE_HEAD(&temp_relinmhead, inm_nrele);
2198 		INM_LOCK(inm);
2199 		os_log_debug(OS_LOG_DEFAULT, "%s: flushing %llx on relq ifp %s",
2200 		    __func__,
2201 		    (uint64_t)VM_KERNEL_ADDRPERM(inm),
2202 		    if_name(inm->inm_ifp));
2203 		VERIFY(inm->inm_in_nrele == true);
2204 		inm->inm_in_nrele = false;
2205 		VERIFY(inm->inm_nrelecnt != 0);
2206 		inm->inm_nrelecnt--;
2207 		lastref = in_multi_detach(inm);
2208 		VERIFY(!lastref || (!(inm->inm_debug & IFD_ATTACHED) &&
2209 		    inm->inm_reqcnt == 0));
2210 		INM_UNLOCK(inm);
2211 		/* from igi_relinmhead */
2212 		INM_REMREF(inm);
2213 		/* from in_multihead list */
2214 		if (lastref) {
2215 			/*
2216 			 * Defer releasing our final reference, as we
2217 			 * are holding the IGMP lock at this point, and
2218 			 * we could end up with locking issues later on
2219 			 * (while issuing SIOCDELMULTI) when this is the
2220 			 * final reference count.  Let the caller do it
2221 			 * when it is safe.
2222 			 */
2223 			IGMP_ADD_DETACHED_INM(inm_dthead, inm);
2224 		}
2225 	}
2226 	in_multihead_lock_done();
2227 	IGI_LOCK(igi);
2228 }
2229 
2230 /*
2231  * Update host report group timer for IGMPv1/v2.
2232  * Will update the global pending timer flags.
2233  */
2234 static void
igmp_v1v2_process_group_timer(struct in_multi * inm,const int igmp_version)2235 igmp_v1v2_process_group_timer(struct in_multi *inm, const int igmp_version)
2236 {
2237 	int report_timer_expired;
2238 
2239 	IGMP_LOCK_ASSERT_HELD();
2240 	INM_LOCK_ASSERT_HELD(inm);
2241 	IGI_LOCK_ASSERT_HELD(inm->inm_igi);
2242 
2243 	if (inm->inm_timer == 0) {
2244 		report_timer_expired = 0;
2245 	} else if (--inm->inm_timer == 0) {
2246 		report_timer_expired = 1;
2247 	} else {
2248 		current_state_timers_running = 1;
2249 		/* caller will schedule timer */
2250 		return;
2251 	}
2252 
2253 	switch (inm->inm_state) {
2254 	case IGMP_NOT_MEMBER:
2255 	case IGMP_SILENT_MEMBER:
2256 	case IGMP_IDLE_MEMBER:
2257 	case IGMP_LAZY_MEMBER:
2258 	case IGMP_SLEEPING_MEMBER:
2259 	case IGMP_AWAKENING_MEMBER:
2260 		break;
2261 	case IGMP_REPORTING_MEMBER:
2262 		if (report_timer_expired) {
2263 			inm->inm_state = IGMP_IDLE_MEMBER;
2264 			(void) igmp_v1v2_queue_report(inm,
2265 			    (igmp_version == IGMP_VERSION_2) ?
2266 			    IGMP_v2_HOST_MEMBERSHIP_REPORT :
2267 			    IGMP_v1_HOST_MEMBERSHIP_REPORT);
2268 			INM_LOCK_ASSERT_HELD(inm);
2269 			IGI_LOCK_ASSERT_HELD(inm->inm_igi);
2270 		}
2271 		break;
2272 	case IGMP_G_QUERY_PENDING_MEMBER:
2273 	case IGMP_SG_QUERY_PENDING_MEMBER:
2274 	case IGMP_LEAVING_MEMBER:
2275 		break;
2276 	}
2277 }
2278 
2279 /*
2280  * Update a group's timers for IGMPv3.
2281  * Will update the global pending timer flags.
2282  * Note: Unlocked read from igi.
2283  */
2284 static void
igmp_v3_process_group_timers(struct igmp_ifinfo * igi,struct ifqueue * qrq,struct ifqueue * scq,struct in_multi * inm,const unsigned int uri_sec)2285 igmp_v3_process_group_timers(struct igmp_ifinfo *igi,
2286     struct ifqueue *qrq, struct ifqueue *scq,
2287     struct in_multi *inm, const unsigned int uri_sec)
2288 {
2289 	int query_response_timer_expired;
2290 	int state_change_retransmit_timer_expired;
2291 
2292 	IGMP_LOCK_ASSERT_HELD();
2293 	INM_LOCK_ASSERT_HELD(inm);
2294 	IGI_LOCK_ASSERT_HELD(igi);
2295 	VERIFY(igi == inm->inm_igi);
2296 
2297 	query_response_timer_expired = 0;
2298 	state_change_retransmit_timer_expired = 0;
2299 
2300 	/*
2301 	 * During a transition from v1/v2 compatibility mode back to v3,
2302 	 * a group record in REPORTING state may still have its group
2303 	 * timer active. This is a no-op in this function; it is easier
2304 	 * to deal with it here than to complicate the timeout path.
2305 	 */
2306 	if (inm->inm_timer == 0) {
2307 		query_response_timer_expired = 0;
2308 	} else if (--inm->inm_timer == 0) {
2309 		query_response_timer_expired = 1;
2310 	} else {
2311 		current_state_timers_running = 1;
2312 		/* caller will schedule timer */
2313 	}
2314 
2315 	if (inm->inm_sctimer == 0) {
2316 		state_change_retransmit_timer_expired = 0;
2317 	} else if (--inm->inm_sctimer == 0) {
2318 		state_change_retransmit_timer_expired = 1;
2319 	} else {
2320 		state_change_timers_running = 1;
2321 		/* caller will schedule timer */
2322 	}
2323 
2324 	/* We are in timer callback, so be quick about it. */
2325 	if (!state_change_retransmit_timer_expired &&
2326 	    !query_response_timer_expired) {
2327 		return;
2328 	}
2329 
2330 	switch (inm->inm_state) {
2331 	case IGMP_NOT_MEMBER:
2332 	case IGMP_SILENT_MEMBER:
2333 	case IGMP_SLEEPING_MEMBER:
2334 	case IGMP_LAZY_MEMBER:
2335 	case IGMP_AWAKENING_MEMBER:
2336 	case IGMP_IDLE_MEMBER:
2337 		break;
2338 	case IGMP_G_QUERY_PENDING_MEMBER:
2339 	case IGMP_SG_QUERY_PENDING_MEMBER:
2340 		/*
2341 		 * Respond to a previously pending Group-Specific
2342 		 * or Group-and-Source-Specific query by enqueueing
2343 		 * the appropriate Current-State report for
2344 		 * immediate transmission.
2345 		 */
2346 		if (query_response_timer_expired) {
2347 			int retval;
2348 
2349 			retval = igmp_v3_enqueue_group_record(qrq, inm, 0, 1,
2350 			    (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER));
2351 			IGMP_PRINTF(("%s: enqueue record = %d\n",
2352 			    __func__, retval));
2353 			inm->inm_state = IGMP_REPORTING_MEMBER;
2354 			/* XXX Clear recorded sources for next time. */
2355 			inm_clear_recorded(inm);
2356 		}
2357 		OS_FALLTHROUGH;
2358 	case IGMP_REPORTING_MEMBER:
2359 	case IGMP_LEAVING_MEMBER:
2360 		if (state_change_retransmit_timer_expired) {
2361 			/*
2362 			 * State-change retransmission timer fired.
2363 			 * If there are any further pending retransmissions,
2364 			 * set the global pending state-change flag, and
2365 			 * reset the timer.
2366 			 */
2367 			if (--inm->inm_scrv > 0) {
2368 				inm->inm_sctimer = (uint16_t)uri_sec;
2369 				state_change_timers_running = 1;
2370 				/* caller will schedule timer */
2371 			}
2372 			/*
2373 			 * Retransmit the previously computed state-change
2374 			 * report. If there are no further pending
2375 			 * retransmissions, the mbuf queue will be consumed.
2376 			 * Update T0 state to T1 as we have now sent
2377 			 * a state-change.
2378 			 */
2379 			(void) igmp_v3_merge_state_changes(inm, scq);
2380 
2381 			inm_commit(inm);
2382 			IGMP_INET_PRINTF(inm->inm_addr,
2383 			    ("%s: T1 -> T0 for %s/%s\n", __func__,
2384 			    _igmp_inet_buf, if_name(inm->inm_ifp)));
2385 
2386 			/*
2387 			 * If we are leaving the group for good, make sure
2388 			 * we release IGMP's reference to it.
2389 			 * This release must be deferred using a SLIST,
2390 			 * as we are called from a loop which traverses
2391 			 * the in_multihead list.
2392 			 */
2393 			if (inm->inm_state == IGMP_LEAVING_MEMBER &&
2394 			    inm->inm_scrv == 0) {
2395 				inm->inm_state = IGMP_NOT_MEMBER;
2396 				/*
2397 				 * A reference has already been held in
2398 				 * igmp_final_leave() for this inm, so
2399 				 * no need to hold another one.  We also
2400 				 * bumped up its request count then, so
2401 				 * that it stays in in_multihead.  Both
2402 				 * of them will be released when it is
2403 				 * dequeued later on.
2404 				 */
2405 				VERIFY(inm->inm_nrelecnt != 0);
2406 				igmp_append_relq(igi, inm);
2407 			}
2408 		}
2409 		break;
2410 	}
2411 }
2412 
2413 /*
2414  * Suppress a group's pending response to a group or source/group query.
2415  *
2416  * Do NOT suppress state changes. This leads to IGMPv3 inconsistency.
2417  * Do NOT update ST1/ST0 as this operation merely suppresses
2418  * the currently pending group record.
2419  * Do NOT suppress the response to a general query. It is possible but
2420  * it would require adding another state or flag.
2421  */
2422 static void
igmp_v3_suppress_group_record(struct in_multi * inm)2423 igmp_v3_suppress_group_record(struct in_multi *inm)
2424 {
2425 	INM_LOCK_ASSERT_HELD(inm);
2426 	IGI_LOCK_ASSERT_HELD(inm->inm_igi);
2427 
2428 	VERIFY(inm->inm_igi->igi_version == IGMP_VERSION_3);
2429 
2430 	if (inm->inm_state != IGMP_G_QUERY_PENDING_MEMBER &&
2431 	    inm->inm_state != IGMP_SG_QUERY_PENDING_MEMBER) {
2432 		return;
2433 	}
2434 
2435 	if (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) {
2436 		inm_clear_recorded(inm);
2437 	}
2438 
2439 	inm->inm_timer = 0;
2440 	inm->inm_state = IGMP_REPORTING_MEMBER;
2441 }
2442 
2443 /*
2444  * Switch to a different IGMP version on the given interface,
2445  * as per Section 7.2.1.
2446  */
2447 static uint32_t
igmp_set_version(struct igmp_ifinfo * igi,const int igmp_version)2448 igmp_set_version(struct igmp_ifinfo *igi, const int igmp_version)
2449 {
2450 	int old_version_timer;
2451 
2452 	IGI_LOCK_ASSERT_HELD(igi);
2453 
2454 	os_log(OS_LOG_DEFAULT, "%s: switching to v%d on ifp %s\n", __func__,
2455 	    igmp_version, if_name(igi->igi_ifp));
2456 
2457 	if (igmp_version == IGMP_VERSION_1 || igmp_version == IGMP_VERSION_2) {
2458 		/*
2459 		 * Compute the "Older Version Querier Present" timer as per
2460 		 * Section 8.12, in seconds.
2461 		 */
2462 		old_version_timer = igi->igi_rv * igi->igi_qi + igi->igi_qri;
2463 
2464 		if (igmp_version == IGMP_VERSION_1) {
2465 			igi->igi_v1_timer = old_version_timer;
2466 			igi->igi_v2_timer = 0;
2467 		} else if (igmp_version == IGMP_VERSION_2) {
2468 			igi->igi_v1_timer = 0;
2469 			igi->igi_v2_timer = old_version_timer;
2470 		}
2471 	}
2472 
2473 	if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) {
2474 		if (igi->igi_version != IGMP_VERSION_2) {
2475 			igmp_v3_cancel_link_timers(igi);
2476 			igi->igi_version = IGMP_VERSION_2;
2477 		}
2478 	} else if (igi->igi_v1_timer > 0) {
2479 		if (igi->igi_version != IGMP_VERSION_1) {
2480 			igmp_v3_cancel_link_timers(igi);
2481 			igi->igi_version = IGMP_VERSION_1;
2482 		}
2483 	}
2484 
2485 	IGI_LOCK_ASSERT_HELD(igi);
2486 
2487 	return MAX(igi->igi_v1_timer, igi->igi_v2_timer);
2488 }
2489 
2490 /*
2491  * Cancel pending IGMPv3 timers for the given link and all groups
2492  * joined on it; state-change, general-query, and group-query timers.
2493  *
2494  * Only ever called on a transition from v3 to Compatibility mode. Kill
2495  * the timers stone dead (this may be expensive for large N groups), they
2496  * will be restarted if Compatibility Mode deems that they must be due to
2497  * query processing.
2498  */
2499 static void
igmp_v3_cancel_link_timers(struct igmp_ifinfo * igi)2500 igmp_v3_cancel_link_timers(struct igmp_ifinfo *igi)
2501 {
2502 	struct ifnet            *ifp;
2503 	struct in_multi         *inm;
2504 	struct in_multistep     step;
2505 
2506 	IGI_LOCK_ASSERT_HELD(igi);
2507 
2508 	IGMP_PRINTF(("%s: cancel v3 timers on ifp 0x%llx(%s)\n", __func__,
2509 	    (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp), if_name(igi->igi_ifp)));
2510 
2511 	/*
2512 	 * Stop the v3 General Query Response on this link stone dead.
2513 	 * If timer is woken up due to interface_timers_running,
2514 	 * the flag will be cleared if there are no pending link timers.
2515 	 */
2516 	igi->igi_v3_timer = 0;
2517 
2518 	/*
2519 	 * Now clear the current-state and state-change report timers
2520 	 * for all memberships scoped to this link.
2521 	 */
2522 	ifp = igi->igi_ifp;
2523 	IGI_UNLOCK(igi);
2524 
2525 	in_multihead_lock_shared();
2526 	IN_FIRST_MULTI(step, inm);
2527 	while (inm != NULL) {
2528 		INM_LOCK(inm);
2529 		if (inm->inm_ifp != ifp && inm->inm_igi != igi) {
2530 			goto next;
2531 		}
2532 
2533 		switch (inm->inm_state) {
2534 		case IGMP_NOT_MEMBER:
2535 		case IGMP_SILENT_MEMBER:
2536 		case IGMP_IDLE_MEMBER:
2537 		case IGMP_LAZY_MEMBER:
2538 		case IGMP_SLEEPING_MEMBER:
2539 		case IGMP_AWAKENING_MEMBER:
2540 			/*
2541 			 * These states are either not relevant in v3 mode,
2542 			 * or are unreported. Do nothing.
2543 			 */
2544 			break;
2545 		case IGMP_LEAVING_MEMBER:
2546 			/*
2547 			 * If we are leaving the group and switching to
2548 			 * compatibility mode, we need to release the final
2549 			 * reference held for issuing the INCLUDE {}, and
2550 			 * transition to REPORTING to ensure the host leave
2551 			 * message is sent upstream to the old querier --
2552 			 * transition to NOT would lose the leave and race.
2553 			 * During igmp_final_leave(), we bumped up both the
2554 			 * request and reference counts.  Since we cannot
2555 			 * call in_multi_detach() here, defer this task to
2556 			 * the timer routine.
2557 			 */
2558 			VERIFY(inm->inm_nrelecnt != 0);
2559 			IGI_LOCK(igi);
2560 			igmp_append_relq(igi, inm);
2561 			IGI_UNLOCK(igi);
2562 			OS_FALLTHROUGH;
2563 		case IGMP_G_QUERY_PENDING_MEMBER:
2564 		case IGMP_SG_QUERY_PENDING_MEMBER:
2565 			inm_clear_recorded(inm);
2566 			OS_FALLTHROUGH;
2567 		case IGMP_REPORTING_MEMBER:
2568 			inm->inm_state = IGMP_REPORTING_MEMBER;
2569 			break;
2570 		}
2571 		/*
2572 		 * Always clear state-change and group report timers.
2573 		 * Free any pending IGMPv3 state-change records.
2574 		 */
2575 		inm->inm_sctimer = 0;
2576 		inm->inm_timer = 0;
2577 		IF_DRAIN(&inm->inm_scq);
2578 next:
2579 		INM_UNLOCK(inm);
2580 		IN_NEXT_MULTI(step, inm);
2581 	}
2582 	in_multihead_lock_done();
2583 
2584 	IGI_LOCK(igi);
2585 }
2586 
2587 /*
2588  * Update the Older Version Querier Present timers for a link.
2589  * See Section 7.2.1 of RFC 3376.
2590  */
2591 static void
igmp_v1v2_process_querier_timers(struct igmp_ifinfo * igi)2592 igmp_v1v2_process_querier_timers(struct igmp_ifinfo *igi)
2593 {
2594 	IGI_LOCK_ASSERT_HELD(igi);
2595 
2596 	if (igi->igi_v1_timer == 0 && igi->igi_v2_timer == 0) {
2597 		/*
2598 		 * IGMPv1 and IGMPv2 Querier Present timers expired.
2599 		 *
2600 		 * Revert to IGMPv3.
2601 		 */
2602 		if (igi->igi_version != IGMP_VERSION_3) {
2603 			os_log(OS_LOG_DEFAULT, "%s: transition from v%d->v%d "
2604 			    "on %s\n", __func__,
2605 			    igi->igi_version, IGMP_VERSION_3,
2606 			    if_name(igi->igi_ifp));
2607 			igi->igi_version = IGMP_VERSION_3;
2608 			IF_DRAIN(&igi->igi_v2q);
2609 		}
2610 	} else if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) {
2611 		/*
2612 		 * IGMPv1 Querier Present timer expired,
2613 		 * IGMPv2 Querier Present timer running.
2614 		 * If IGMPv2 was disabled since last timeout,
2615 		 * revert to IGMPv3.
2616 		 * If IGMPv2 is enabled, revert to IGMPv2.
2617 		 */
2618 		if (!igmp_v2enable) {
2619 			os_log(OS_LOG_DEFAULT, "%s: transition from v%d->v%d "
2620 			    "on %s\n", __func__,
2621 			    igi->igi_version, IGMP_VERSION_3,
2622 			    if_name(igi->igi_ifp));
2623 			igi->igi_v2_timer = 0;
2624 			igi->igi_version = IGMP_VERSION_3;
2625 			IF_DRAIN(&igi->igi_v2q);
2626 		} else {
2627 			--igi->igi_v2_timer;
2628 			if (igi->igi_version != IGMP_VERSION_2) {
2629 				os_log(OS_LOG_DEFAULT, "%s: transition from v%d->v%d "
2630 				    "on %s\n", __func__,
2631 				    igi->igi_version, IGMP_VERSION_2,
2632 				    if_name(igi->igi_ifp));
2633 				IF_DRAIN(&igi->igi_gq);
2634 				igmp_v3_cancel_link_timers(igi);
2635 				igi->igi_version = IGMP_VERSION_2;
2636 			}
2637 		}
2638 	} else if (igi->igi_v1_timer > 0) {
2639 		/*
2640 		 * IGMPv1 Querier Present timer running.
2641 		 * Stop IGMPv2 timer if running.
2642 		 *
2643 		 * If IGMPv1 was disabled since last timeout,
2644 		 * revert to IGMPv3.
2645 		 * If IGMPv1 is enabled, reset IGMPv2 timer if running.
2646 		 */
2647 		if (!igmp_v1enable) {
2648 			os_log(OS_LOG_DEFAULT, "%s: transition from v%d->v%d "
2649 			    "on %s\n", __func__,
2650 			    igi->igi_version, IGMP_VERSION_3,
2651 			    if_name(igi->igi_ifp));
2652 			igi->igi_v1_timer = 0;
2653 			igi->igi_version = IGMP_VERSION_3;
2654 			IF_DRAIN(&igi->igi_v2q);
2655 		} else {
2656 			--igi->igi_v1_timer;
2657 		}
2658 		if (igi->igi_v2_timer > 0) {
2659 			IGMP_PRINTF(("%s: cancel v2 timer on 0x%llx(%s)\n",
2660 			    __func__,
2661 			    (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2662 			    if_name(igi->igi_ifp)));
2663 			igi->igi_v2_timer = 0;
2664 		}
2665 	}
2666 }
2667 
2668 /*
2669  * Dispatch an IGMPv1/v2 host report or leave message.
2670  * These are always small enough to fit inside a single mbuf.
2671  */
2672 static int
igmp_v1v2_queue_report(struct in_multi * inm,const int type)2673 igmp_v1v2_queue_report(struct in_multi *inm, const int type)
2674 {
2675 	struct ifnet            *ifp;
2676 	struct igmp             *igmp;
2677 	struct ip               *ip;
2678 	struct mbuf             *m;
2679 	int                     error = 0;
2680 
2681 	INM_LOCK_ASSERT_HELD(inm);
2682 	IGI_LOCK_ASSERT_HELD(inm->inm_igi);
2683 
2684 	ifp = inm->inm_ifp;
2685 
2686 	MGETHDR(m, M_DONTWAIT, MT_DATA);
2687 	if (m == NULL) {
2688 		return ENOMEM;
2689 	}
2690 	MH_ALIGN(m, sizeof(struct ip) + sizeof(struct igmp));
2691 
2692 	m->m_pkthdr.len = sizeof(struct ip) + sizeof(struct igmp);
2693 
2694 	m->m_data += sizeof(struct ip);
2695 	m->m_len = sizeof(struct igmp);
2696 
2697 	igmp = mtod(m, struct igmp *);
2698 	igmp->igmp_type = (u_char)type;
2699 	igmp->igmp_code = 0;
2700 	igmp->igmp_group = inm->inm_addr;
2701 	igmp->igmp_cksum = 0;
2702 	igmp->igmp_cksum = in_cksum(m, sizeof(struct igmp));
2703 
2704 	m->m_data -= sizeof(struct ip);
2705 	m->m_len += sizeof(struct ip);
2706 
2707 	ip = mtod(m, struct ip *);
2708 	ip->ip_tos = 0;
2709 	ip->ip_len = sizeof(struct ip) + sizeof(struct igmp);
2710 	ip->ip_off = 0;
2711 	ip->ip_p = IPPROTO_IGMP;
2712 	ip->ip_src.s_addr = INADDR_ANY;
2713 
2714 	if (type == IGMP_HOST_LEAVE_MESSAGE) {
2715 		ip->ip_dst.s_addr = htonl(INADDR_ALLRTRS_GROUP);
2716 	} else {
2717 		ip->ip_dst = inm->inm_addr;
2718 	}
2719 
2720 	igmp_save_context(m, ifp);
2721 
2722 	m->m_flags |= M_IGMPV2;
2723 	if (inm->inm_igi->igi_flags & IGIF_LOOPBACK) {
2724 		m->m_flags |= M_IGMP_LOOP;
2725 	}
2726 
2727 	/*
2728 	 * Due to the fact that at this point we are possibly holding
2729 	 * in_multihead_lock in shared or exclusive mode, we can't call
2730 	 * igmp_sendpkt() here since that will eventually call ip_output(),
2731 	 * which will try to lock in_multihead_lock and cause a deadlock.
2732 	 * Instead we defer the work to the igmp_timeout() thread, thus
2733 	 * avoiding unlocking in_multihead_lock here.
2734 	 */
2735 	if (IF_QFULL(&inm->inm_igi->igi_v2q)) {
2736 		os_log_error(OS_LOG_DEFAULT,
2737 		    "%s: v1 / v2 outbound queue full on %s\n",
2738 		    __func__, if_name(ifp));
2739 		error = ENOMEM;
2740 		m_freem(m);
2741 	} else {
2742 		IF_ENQUEUE(&inm->inm_igi->igi_v2q, m);
2743 		VERIFY(error == 0);
2744 	}
2745 	return error;
2746 }
2747 
2748 /*
2749  * Process a state change from the upper layer for the given IPv4 group.
2750  *
2751  * Each socket holds a reference on the in_multi in its own ip_moptions.
2752  * The socket layer will have made the necessary updates to the group
2753  * state, it is now up to IGMP to issue a state change report if there
2754  * has been any change between T0 (when the last state-change was issued)
2755  * and T1 (now).
2756  *
2757  * We use the IGMPv3 state machine at group level. The IGMP module
2758  * however makes the decision as to which IGMP protocol version to speak.
2759  * A state change *from* INCLUDE {} always means an initial join.
2760  * A state change *to* INCLUDE {} always means a final leave.
2761  *
2762  * FUTURE: If IGIF_V3LITE is enabled for this interface, then we can
2763  * save ourselves a bunch of work; any exclusive mode groups need not
2764  * compute source filter lists.
2765  */
2766 int
igmp_change_state(struct in_multi * inm,struct igmp_tparams * itp)2767 igmp_change_state(struct in_multi *inm, struct igmp_tparams *itp)
2768 {
2769 	struct igmp_ifinfo *igi;
2770 	struct ifnet *ifp;
2771 	int error = 0;
2772 
2773 	VERIFY(itp != NULL);
2774 	bzero(itp, sizeof(*itp));
2775 
2776 	INM_LOCK_ASSERT_HELD(inm);
2777 	VERIFY(inm->inm_igi != NULL);
2778 	IGI_LOCK_ASSERT_NOTHELD(inm->inm_igi);
2779 
2780 	/*
2781 	 * Try to detect if the upper layer just asked us to change state
2782 	 * for an interface which has now gone away.
2783 	 */
2784 	VERIFY(inm->inm_ifma != NULL);
2785 	ifp = inm->inm_ifma->ifma_ifp;
2786 	/*
2787 	 * Sanity check that netinet's notion of ifp is the same as net's.
2788 	 */
2789 	VERIFY(inm->inm_ifp == ifp);
2790 
2791 	igi = IGMP_IFINFO(ifp);
2792 	VERIFY(igi != NULL);
2793 
2794 	/*
2795 	 * If we detect a state transition to or from MCAST_UNDEFINED
2796 	 * for this group, then we are starting or finishing an IGMP
2797 	 * life cycle for this group.
2798 	 */
2799 	if (inm->inm_st[1].iss_fmode != inm->inm_st[0].iss_fmode) {
2800 		IGMP_PRINTF(("%s: inm transition %d -> %d\n", __func__,
2801 		    inm->inm_st[0].iss_fmode, inm->inm_st[1].iss_fmode));
2802 		if (inm->inm_st[0].iss_fmode == MCAST_UNDEFINED) {
2803 			IGMP_PRINTF(("%s: initial join\n", __func__));
2804 			error = igmp_initial_join(inm, igi, itp);
2805 			goto out;
2806 		} else if (inm->inm_st[1].iss_fmode == MCAST_UNDEFINED) {
2807 			IGMP_PRINTF(("%s: final leave\n", __func__));
2808 			igmp_final_leave(inm, igi, itp);
2809 			goto out;
2810 		}
2811 	} else {
2812 		IGMP_PRINTF(("%s: filter set change\n", __func__));
2813 	}
2814 
2815 	error = igmp_handle_state_change(inm, igi, itp);
2816 out:
2817 	return error;
2818 }
2819 
2820 /*
2821  * Perform the initial join for an IGMP group.
2822  *
2823  * When joining a group:
2824  *  If the group should have its IGMP traffic suppressed, do nothing.
2825  *  IGMPv1 starts sending IGMPv1 host membership reports.
2826  *  IGMPv2 starts sending IGMPv2 host membership reports.
2827  *  IGMPv3 will schedule an IGMPv3 state-change report containing the
2828  *  initial state of the membership.
2829  */
2830 static int
igmp_initial_join(struct in_multi * inm,struct igmp_ifinfo * igi,struct igmp_tparams * itp)2831 igmp_initial_join(struct in_multi *inm, struct igmp_ifinfo *igi,
2832     struct igmp_tparams *itp)
2833 {
2834 	struct ifnet            *ifp;
2835 	struct ifqueue          *ifq;
2836 	int                      error, retval, syncstates;
2837 
2838 	INM_LOCK_ASSERT_HELD(inm);
2839 	IGI_LOCK_ASSERT_NOTHELD(igi);
2840 	VERIFY(itp != NULL);
2841 
2842 	IGMP_INET_PRINTF(inm->inm_addr,
2843 	    ("%s: initial join %s on ifp 0x%llx(%s)\n", __func__,
2844 	    _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(inm->inm_ifp),
2845 	    if_name(inm->inm_ifp)));
2846 
2847 	error = 0;
2848 	syncstates = 1;
2849 
2850 	ifp = inm->inm_ifp;
2851 
2852 	IGI_LOCK(igi);
2853 	VERIFY(igi->igi_ifp == ifp);
2854 
2855 	/*
2856 	 * Groups joined on loopback or marked as 'not reported',
2857 	 * e.g. 224.0.0.1, enter the IGMP_SILENT_MEMBER state and
2858 	 * are never reported in any IGMP protocol exchanges.
2859 	 * All other groups enter the appropriate IGMP state machine
2860 	 * for the version in use on this link.
2861 	 * A link marked as IGIF_SILENT causes IGMP to be completely
2862 	 * disabled for the link.
2863 	 */
2864 	if ((ifp->if_flags & IFF_LOOPBACK) ||
2865 	    (igi->igi_flags & IGIF_SILENT) ||
2866 	    !igmp_isgroupreported(inm->inm_addr)) {
2867 		IGMP_PRINTF(("%s: not kicking state machine for silent group\n",
2868 		    __func__));
2869 		inm->inm_state = IGMP_SILENT_MEMBER;
2870 		inm->inm_timer = 0;
2871 	} else {
2872 		/*
2873 		 * Deal with overlapping in_multi lifecycle.
2874 		 * If this group was LEAVING, then make sure
2875 		 * we drop the reference we picked up to keep the
2876 		 * group around for the final INCLUDE {} enqueue.
2877 		 * Since we cannot call in_multi_detach() here,
2878 		 * defer this task to the timer routine.
2879 		 */
2880 		if (igi->igi_version == IGMP_VERSION_3 &&
2881 		    inm->inm_state == IGMP_LEAVING_MEMBER) {
2882 			VERIFY(inm->inm_nrelecnt != 0);
2883 			igmp_append_relq(igi, inm);
2884 		}
2885 
2886 		inm->inm_state = IGMP_REPORTING_MEMBER;
2887 
2888 		switch (igi->igi_version) {
2889 		case IGMP_VERSION_1:
2890 		case IGMP_VERSION_2:
2891 			inm->inm_state = IGMP_IDLE_MEMBER;
2892 			error = igmp_v1v2_queue_report(inm,
2893 			    (igi->igi_version == IGMP_VERSION_2) ?
2894 			    IGMP_v2_HOST_MEMBERSHIP_REPORT :
2895 			    IGMP_v1_HOST_MEMBERSHIP_REPORT);
2896 
2897 			INM_LOCK_ASSERT_HELD(inm);
2898 			IGI_LOCK_ASSERT_HELD(igi);
2899 
2900 			if (error == 0) {
2901 				inm->inm_timer =
2902 				    IGMP_RANDOM_DELAY(IGMP_V1V2_MAX_RI);
2903 				itp->cst = 1;
2904 			}
2905 			break;
2906 
2907 		case IGMP_VERSION_3:
2908 			/*
2909 			 * Defer update of T0 to T1, until the first copy
2910 			 * of the state change has been transmitted.
2911 			 */
2912 			syncstates = 0;
2913 
2914 			/*
2915 			 * Immediately enqueue a State-Change Report for
2916 			 * this interface, freeing any previous reports.
2917 			 * Don't kick the timers if there is nothing to do,
2918 			 * or if an error occurred.
2919 			 */
2920 			ifq = &inm->inm_scq;
2921 			IF_DRAIN(ifq);
2922 			retval = igmp_v3_enqueue_group_record(ifq, inm, 1,
2923 			    0, 0);
2924 			itp->cst = (ifq->ifq_len > 0);
2925 			IGMP_PRINTF(("%s: enqueue record = %d\n",
2926 			    __func__, retval));
2927 			if (retval <= 0) {
2928 				error = retval * -1;
2929 				break;
2930 			}
2931 
2932 			/*
2933 			 * Schedule transmission of pending state-change
2934 			 * report up to RV times for this link. The timer
2935 			 * will fire at the next igmp_timeout (1 second),
2936 			 * giving us an opportunity to merge the reports.
2937 			 */
2938 			if (igi->igi_flags & IGIF_LOOPBACK) {
2939 				inm->inm_scrv = 1;
2940 			} else {
2941 				VERIFY(igi->igi_rv > 1);
2942 				inm->inm_scrv = (uint16_t)igi->igi_rv;
2943 			}
2944 			inm->inm_sctimer = 1;
2945 			itp->sct = 1;
2946 
2947 			error = 0;
2948 			break;
2949 		}
2950 	}
2951 	IGI_UNLOCK(igi);
2952 
2953 	/*
2954 	 * Only update the T0 state if state change is atomic,
2955 	 * i.e. we don't need to wait for a timer to fire before we
2956 	 * can consider the state change to have been communicated.
2957 	 */
2958 	if (syncstates) {
2959 		inm_commit(inm);
2960 		IGMP_INET_PRINTF(inm->inm_addr,
2961 		    ("%s: T1->T0 for %s / %s\n", __func__,
2962 		    _igmp_inet_buf, if_name(inm->inm_ifp)));
2963 	}
2964 
2965 	return error;
2966 }
2967 
2968 /*
2969  * Issue an intermediate state change during the IGMP life-cycle.
2970  */
2971 static int
igmp_handle_state_change(struct in_multi * inm,struct igmp_ifinfo * igi,struct igmp_tparams * itp)2972 igmp_handle_state_change(struct in_multi *inm, struct igmp_ifinfo *igi,
2973     struct igmp_tparams *itp)
2974 {
2975 	struct ifnet            *ifp;
2976 	int                      retval = 0;
2977 
2978 	INM_LOCK_ASSERT_HELD(inm);
2979 	IGI_LOCK_ASSERT_NOTHELD(igi);
2980 	VERIFY(itp != NULL);
2981 
2982 	IGMP_INET_PRINTF(inm->inm_addr,
2983 	    ("%s: state change for %s on ifp 0x%llx(%s)\n", __func__,
2984 	    _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(inm->inm_ifp),
2985 	    if_name(inm->inm_ifp)));
2986 
2987 	ifp = inm->inm_ifp;
2988 
2989 	IGI_LOCK(igi);
2990 	VERIFY(igi->igi_ifp == ifp);
2991 
2992 	if ((ifp->if_flags & IFF_LOOPBACK) ||
2993 	    (igi->igi_flags & IGIF_SILENT) ||
2994 	    !igmp_isgroupreported(inm->inm_addr) ||
2995 	    (igi->igi_version != IGMP_VERSION_3)) {
2996 		IGI_UNLOCK(igi);
2997 		if (!igmp_isgroupreported(inm->inm_addr)) {
2998 			IGMP_PRINTF(("%s: not kicking state "
2999 			    "machine for silent group\n", __func__));
3000 		}
3001 		IGMP_PRINTF(("%s: nothing to do \n", __func__));
3002 		inm_commit(inm);
3003 		IGMP_INET_PRINTF(inm->inm_addr,
3004 		    ("%s: T1 -> T0 for %s/%s\n", __func__,
3005 		    _igmp_inet_buf, inm->inm_ifp->if_name));
3006 		goto done;
3007 	}
3008 
3009 	IF_DRAIN(&inm->inm_scq);
3010 
3011 	retval = igmp_v3_enqueue_group_record(&inm->inm_scq, inm, 1, 0, 0);
3012 	itp->cst = (inm->inm_scq.ifq_len > 0);
3013 	IGMP_PRINTF(("%s: enqueue record = %d\n", __func__, retval));
3014 	if (retval <= 0) {
3015 		IGI_UNLOCK(igi);
3016 		retval *= -1;
3017 		goto done;
3018 	}
3019 	/*
3020 	 * If record(s) were enqueued, start the state-change
3021 	 * report timer for this group.
3022 	 */
3023 	inm->inm_scrv = ((igi->igi_flags & IGIF_LOOPBACK) ? 1 : (uint16_t)igi->igi_rv);
3024 	inm->inm_sctimer = 1;
3025 	itp->sct = 1;
3026 	IGI_UNLOCK(igi);
3027 done:
3028 	return retval;
3029 }
3030 
3031 /*
3032  * Perform the final leave for an IGMP group.
3033  *
3034  * When leaving a group:
3035  *  IGMPv1 does nothing.
3036  *  IGMPv2 sends a host leave message, if and only if we are the reporter.
3037  *  IGMPv3 enqueues a state-change report containing a transition
3038  *  to INCLUDE {} for immediate transmission.
3039  */
3040 static void
igmp_final_leave(struct in_multi * inm,struct igmp_ifinfo * igi,struct igmp_tparams * itp)3041 igmp_final_leave(struct in_multi *inm, struct igmp_ifinfo *igi,
3042     struct igmp_tparams *itp)
3043 {
3044 	int syncstates = 1;
3045 	bool retried_already = false;
3046 
3047 	INM_LOCK_ASSERT_HELD(inm);
3048 	IGI_LOCK_ASSERT_NOTHELD(igi);
3049 	VERIFY(itp != NULL);
3050 
3051 	IGMP_INET_PRINTF(inm->inm_addr,
3052 	    ("%s: final leave %s on ifp 0x%llx(%s)\n", __func__,
3053 	    _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(inm->inm_ifp),
3054 	    if_name(inm->inm_ifp)));
3055 
3056 retry:
3057 	switch (inm->inm_state) {
3058 	case IGMP_NOT_MEMBER:
3059 	case IGMP_SILENT_MEMBER:
3060 	case IGMP_LEAVING_MEMBER:
3061 		/* Already leaving or left; do nothing. */
3062 		IGMP_PRINTF(("%s: not kicking state machine for silent group\n",
3063 		    __func__));
3064 		break;
3065 	case IGMP_REPORTING_MEMBER:
3066 	case IGMP_IDLE_MEMBER:
3067 	case IGMP_G_QUERY_PENDING_MEMBER:
3068 	case IGMP_SG_QUERY_PENDING_MEMBER:
3069 		IGI_LOCK(igi);
3070 		if (igi->igi_version == IGMP_VERSION_2) {
3071 			if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER ||
3072 			    inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) {
3073 				/*
3074 				 * We may be in the process of downgrading to
3075 				 * IGMPv2 but because we just grabbed the
3076 				 * igi_lock we may have lost the race.
3077 				 */
3078 				if (!retried_already) {
3079 					IGI_UNLOCK(igi);
3080 					retried_already = true;
3081 					goto retry;
3082 				} else {
3083 					/*
3084 					 * Proceed with leaving the group
3085 					 * as if it were IGMPv2 even though we
3086 					 * may have an inconsistent multicast state.
3087 					 */
3088 				}
3089 			}
3090 			/* scheduler timer if enqueue is successful */
3091 			itp->cst = (igmp_v1v2_queue_report(inm,
3092 			    IGMP_HOST_LEAVE_MESSAGE) == 0);
3093 
3094 			INM_LOCK_ASSERT_HELD(inm);
3095 			IGI_LOCK_ASSERT_HELD(igi);
3096 
3097 			inm->inm_state = IGMP_NOT_MEMBER;
3098 		} else if (igi->igi_version == IGMP_VERSION_3) {
3099 			/*
3100 			 * Stop group timer and all pending reports.
3101 			 * Immediately enqueue a state-change report
3102 			 * TO_IN {} to be sent on the next timeout,
3103 			 * giving us an opportunity to merge reports.
3104 			 */
3105 			IF_DRAIN(&inm->inm_scq);
3106 			inm->inm_timer = 0;
3107 			if (igi->igi_flags & IGIF_LOOPBACK) {
3108 				inm->inm_scrv = 1;
3109 			} else {
3110 				inm->inm_scrv = (uint16_t)igi->igi_rv;
3111 			}
3112 			IGMP_INET_PRINTF(inm->inm_addr,
3113 			    ("%s: Leaving %s/%s with %d "
3114 			    "pending retransmissions.\n", __func__,
3115 			    _igmp_inet_buf, if_name(inm->inm_ifp),
3116 			    inm->inm_scrv));
3117 			if (inm->inm_scrv == 0) {
3118 				inm->inm_state = IGMP_NOT_MEMBER;
3119 				inm->inm_sctimer = 0;
3120 			} else {
3121 				int retval;
3122 				/*
3123 				 * Stick around in the in_multihead list;
3124 				 * the final detach will be issued by
3125 				 * igmp_v3_process_group_timers() when
3126 				 * the retransmit timer expires.
3127 				 */
3128 				INM_ADDREF_LOCKED(inm);
3129 				VERIFY(inm->inm_debug & IFD_ATTACHED);
3130 				inm->inm_reqcnt++;
3131 				VERIFY(inm->inm_reqcnt >= 1);
3132 				inm->inm_nrelecnt++;
3133 				VERIFY(inm->inm_nrelecnt != 0);
3134 
3135 				retval = igmp_v3_enqueue_group_record(
3136 					&inm->inm_scq, inm, 1, 0, 0);
3137 				itp->cst = (inm->inm_scq.ifq_len > 0);
3138 				KASSERT(retval != 0,
3139 				    ("%s: enqueue record = %d\n", __func__,
3140 				    retval));
3141 
3142 				inm->inm_state = IGMP_LEAVING_MEMBER;
3143 				inm->inm_sctimer = 1;
3144 				itp->sct = 1;
3145 				syncstates = 0;
3146 			}
3147 		}
3148 		IGI_UNLOCK(igi);
3149 		break;
3150 	case IGMP_LAZY_MEMBER:
3151 	case IGMP_SLEEPING_MEMBER:
3152 	case IGMP_AWAKENING_MEMBER:
3153 		/* Our reports are suppressed; do nothing. */
3154 		break;
3155 	}
3156 
3157 	if (syncstates) {
3158 		inm_commit(inm);
3159 		IGMP_INET_PRINTF(inm->inm_addr,
3160 		    ("%s: T1 -> T0 for %s/%s\n", __func__,
3161 		    _igmp_inet_buf, if_name(inm->inm_ifp)));
3162 		inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
3163 		IGMP_INET_PRINTF(inm->inm_addr,
3164 		    ("%s: T1 now MCAST_UNDEFINED for %s/%s\n",
3165 		    __func__, _igmp_inet_buf, if_name(inm->inm_ifp)));
3166 	}
3167 }
3168 
3169 /*
3170  * Enqueue an IGMPv3 group record to the given output queue.
3171  *
3172  * XXX This function could do with having the allocation code
3173  * split out, and the multiple-tree-walks coalesced into a single
3174  * routine as has been done in igmp_v3_enqueue_filter_change().
3175  *
3176  * If is_state_change is zero, a current-state record is appended.
3177  * If is_state_change is non-zero, a state-change report is appended.
3178  *
3179  * If is_group_query is non-zero, an mbuf packet chain is allocated.
3180  * If is_group_query is zero, and if there is a packet with free space
3181  * at the tail of the queue, it will be appended to providing there
3182  * is enough free space.
3183  * Otherwise a new mbuf packet chain is allocated.
3184  *
3185  * If is_source_query is non-zero, each source is checked to see if
3186  * it was recorded for a Group-Source query, and will be omitted if
3187  * it is not both in-mode and recorded.
3188  *
3189  * The function will attempt to allocate leading space in the packet
3190  * for the IP/IGMP header to be prepended without fragmenting the chain.
3191  *
3192  * If successful the size of all data appended to the queue is returned,
3193  * otherwise an error code less than zero is returned, or zero if
3194  * no record(s) were appended.
3195  */
3196 static int
igmp_v3_enqueue_group_record(struct ifqueue * ifq,struct in_multi * inm,const int is_state_change,const int is_group_query,const int is_source_query)3197 igmp_v3_enqueue_group_record(struct ifqueue *ifq, struct in_multi *inm,
3198     const int is_state_change, const int is_group_query,
3199     const int is_source_query)
3200 {
3201 	struct igmp_grouprec     ig;
3202 	struct igmp_grouprec    *pig;
3203 	struct ifnet            *ifp;
3204 	struct ip_msource       *ims, *nims;
3205 	mbuf_ref_t               m0, m, md;
3206 	int                      error, is_filter_list_change;
3207 	int                      minrec0len, m0srcs, nbytes, off;
3208 	uint16_t                 msrcs;
3209 	int                      record_has_sources;
3210 	int                      now;
3211 	int                      type;
3212 	in_addr_t                naddr;
3213 	uint16_t                 mode;
3214 	u_int16_t                ig_numsrc;
3215 
3216 	INM_LOCK_ASSERT_HELD(inm);
3217 	IGI_LOCK_ASSERT_HELD(inm->inm_igi);
3218 
3219 	error = 0;
3220 	ifp = inm->inm_ifp;
3221 	is_filter_list_change = 0;
3222 	m = NULL;
3223 	m0 = NULL;
3224 	m0srcs = 0;
3225 	msrcs = 0;
3226 	nbytes = 0;
3227 	nims = NULL;
3228 	record_has_sources = 1;
3229 	pig = NULL;
3230 	type = IGMP_DO_NOTHING;
3231 	mode = inm->inm_st[1].iss_fmode;
3232 
3233 	/*
3234 	 * If we did not transition out of ASM mode during t0->t1,
3235 	 * and there are no source nodes to process, we can skip
3236 	 * the generation of source records.
3237 	 */
3238 	if (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0 &&
3239 	    inm->inm_nsrc == 0) {
3240 		record_has_sources = 0;
3241 	}
3242 
3243 	if (is_state_change) {
3244 		/*
3245 		 * Queue a state change record.
3246 		 * If the mode did not change, and there are non-ASM
3247 		 * listeners or source filters present,
3248 		 * we potentially need to issue two records for the group.
3249 		 * If we are transitioning to MCAST_UNDEFINED, we need
3250 		 * not send any sources.
3251 		 * If there are ASM listeners, and there was no filter
3252 		 * mode transition of any kind, do nothing.
3253 		 */
3254 		if (mode != inm->inm_st[0].iss_fmode) {
3255 			if (mode == MCAST_EXCLUDE) {
3256 				IGMP_PRINTF(("%s: change to EXCLUDE\n",
3257 				    __func__));
3258 				type = IGMP_CHANGE_TO_EXCLUDE_MODE;
3259 			} else {
3260 				IGMP_PRINTF(("%s: change to INCLUDE\n",
3261 				    __func__));
3262 				type = IGMP_CHANGE_TO_INCLUDE_MODE;
3263 				if (mode == MCAST_UNDEFINED) {
3264 					record_has_sources = 0;
3265 				}
3266 			}
3267 		} else {
3268 			if (record_has_sources) {
3269 				is_filter_list_change = 1;
3270 			} else {
3271 				type = IGMP_DO_NOTHING;
3272 			}
3273 		}
3274 	} else {
3275 		/*
3276 		 * Queue a current state record.
3277 		 */
3278 		if (mode == MCAST_EXCLUDE) {
3279 			type = IGMP_MODE_IS_EXCLUDE;
3280 		} else if (mode == MCAST_INCLUDE) {
3281 			type = IGMP_MODE_IS_INCLUDE;
3282 			VERIFY(inm->inm_st[1].iss_asm == 0);
3283 		}
3284 	}
3285 
3286 	/*
3287 	 * Generate the filter list changes using a separate function.
3288 	 */
3289 	if (is_filter_list_change) {
3290 		return igmp_v3_enqueue_filter_change(ifq, inm);
3291 	}
3292 
3293 	if (type == IGMP_DO_NOTHING) {
3294 		IGMP_INET_PRINTF(inm->inm_addr,
3295 		    ("%s: nothing to do for %s/%s\n",
3296 		    __func__, _igmp_inet_buf,
3297 		    if_name(inm->inm_ifp)));
3298 		return 0;
3299 	}
3300 
3301 	/*
3302 	 * If any sources are present, we must be able to fit at least
3303 	 * one in the trailing space of the tail packet's mbuf,
3304 	 * ideally more.
3305 	 */
3306 	minrec0len = sizeof(struct igmp_grouprec);
3307 	if (record_has_sources) {
3308 		minrec0len += sizeof(in_addr_t);
3309 	}
3310 
3311 	IGMP_INET_PRINTF(inm->inm_addr,
3312 	    ("%s: queueing %s for %s/%s\n", __func__,
3313 	    igmp_rec_type_to_str(type), _igmp_inet_buf,
3314 	    if_name(inm->inm_ifp)));
3315 
3316 	/*
3317 	 * Check if we have a packet in the tail of the queue for this
3318 	 * group into which the first group record for this group will fit.
3319 	 * Otherwise allocate a new packet.
3320 	 * Always allocate leading space for IP+RA_OPT+IGMP+REPORT.
3321 	 * Note: Group records for G/GSR query responses MUST be sent
3322 	 * in their own packet.
3323 	 */
3324 	m0 = ifq->ifq_tail;
3325 	if (!is_group_query &&
3326 	    m0 != NULL &&
3327 	    (m0->m_pkthdr.vt_nrecs + 1 <= IGMP_V3_REPORT_MAXRECS) &&
3328 	    (m0->m_pkthdr.len + minrec0len) <
3329 	    (ifp->if_mtu - IGMP_LEADINGSPACE)) {
3330 		m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
3331 		    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
3332 		m = m0;
3333 		IGMP_PRINTF(("%s: use existing packet\n", __func__));
3334 	} else {
3335 		if (IF_QFULL(ifq)) {
3336 			os_log_error(OS_LOG_DEFAULT,
3337 			    "%s: outbound queue full on %s\n", __func__, if_name(ifp));
3338 			return -ENOMEM;
3339 		}
3340 		m = NULL;
3341 		m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
3342 		    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
3343 		if (!is_state_change && !is_group_query) {
3344 			m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3345 			if (m) {
3346 				m->m_data += IGMP_LEADINGSPACE;
3347 			}
3348 		}
3349 		if (m == NULL) {
3350 			m = m_gethdr(M_DONTWAIT, MT_DATA);
3351 			if (m) {
3352 				MH_ALIGN(m, IGMP_LEADINGSPACE);
3353 			}
3354 		}
3355 		if (m == NULL) {
3356 			return -ENOMEM;
3357 		}
3358 
3359 		igmp_save_context(m, ifp);
3360 
3361 		IGMP_PRINTF(("%s: allocated first packet\n", __func__));
3362 	}
3363 
3364 	/*
3365 	 * Append group record.
3366 	 * If we have sources, we don't know how many yet.
3367 	 */
3368 	ig.ig_type = (u_char)type;
3369 	ig.ig_datalen = 0;
3370 	ig.ig_numsrc = 0;
3371 	ig.ig_group = inm->inm_addr;
3372 	if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) {
3373 		if (m != m0) {
3374 			m_freem(m);
3375 		}
3376 		os_log_error(OS_LOG_DEFAULT, "%s: m_append() failed\n", __func__);
3377 		return -ENOMEM;
3378 	}
3379 	nbytes += sizeof(struct igmp_grouprec);
3380 
3381 	/*
3382 	 * Append as many sources as will fit in the first packet.
3383 	 * If we are appending to a new packet, the chain allocation
3384 	 * may potentially use clusters; use m_getptr() in this case.
3385 	 * If we are appending to an existing packet, we need to obtain
3386 	 * a pointer to the group record after m_append(), in case a new
3387 	 * mbuf was allocated.
3388 	 * Only append sources which are in-mode at t1. If we are
3389 	 * transitioning to MCAST_UNDEFINED state on the group, do not
3390 	 * include source entries.
3391 	 * Only report recorded sources in our filter set when responding
3392 	 * to a group-source query.
3393 	 */
3394 	if (record_has_sources) {
3395 		if (m == m0) {
3396 			md = m_last(m);
3397 			pig = (struct igmp_grouprec *)(void *)
3398 			    (mtod(md, uint8_t *) + md->m_len - nbytes);
3399 		} else {
3400 			md = m_getptr(m, 0, &off);
3401 			pig = (struct igmp_grouprec *)(void *)
3402 			    (mtod(md, uint8_t *) + off);
3403 		}
3404 		msrcs = 0;
3405 		RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, nims) {
3406 #ifdef IGMP_DEBUG
3407 			char buf[MAX_IPv4_STR_LEN];
3408 
3409 			inet_ntop_haddr(ims->ims_haddr, buf, sizeof(buf));
3410 			IGMP_PRINTF(("%s: visit node %s\n", __func__, buf));
3411 #endif
3412 			now = ims_get_mode(inm, ims, 1);
3413 			IGMP_PRINTF(("%s: node is %d\n", __func__, now));
3414 			if ((now != mode) ||
3415 			    (now == mode && mode == MCAST_UNDEFINED)) {
3416 				IGMP_PRINTF(("%s: skip node\n", __func__));
3417 				continue;
3418 			}
3419 			if (is_source_query && ims->ims_stp == 0) {
3420 				IGMP_PRINTF(("%s: skip unrecorded node\n",
3421 				    __func__));
3422 				continue;
3423 			}
3424 			IGMP_PRINTF(("%s: append node\n", __func__));
3425 			naddr = htonl(ims->ims_haddr);
3426 			if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) {
3427 				if (m != m0) {
3428 					m_freem(m);
3429 				}
3430 				os_log_error(OS_LOG_DEFAULT, "%s: m_append() failed\n",
3431 				    __func__);
3432 				return -ENOMEM;
3433 			}
3434 			nbytes += sizeof(in_addr_t);
3435 			++msrcs;
3436 			if (msrcs == m0srcs) {
3437 				break;
3438 			}
3439 		}
3440 		IGMP_PRINTF(("%s: msrcs is %d this packet\n", __func__,
3441 		    msrcs));
3442 		ig_numsrc = htons(msrcs);
3443 		bcopy(&ig_numsrc, &pig->ig_numsrc, sizeof(ig_numsrc));
3444 		nbytes += (msrcs * sizeof(in_addr_t));
3445 	}
3446 
3447 	if (is_source_query && msrcs == 0) {
3448 		IGMP_PRINTF(("%s: no recorded sources to report\n", __func__));
3449 		if (m != m0) {
3450 			m_freem(m);
3451 		}
3452 		return 0;
3453 	}
3454 
3455 	/*
3456 	 * We are good to go with first packet.
3457 	 */
3458 	if (m != m0) {
3459 		IGMP_PRINTF(("%s: enqueueing first packet\n", __func__));
3460 		m->m_pkthdr.vt_nrecs = 1;
3461 		IF_ENQUEUE(ifq, m);
3462 	} else {
3463 		m->m_pkthdr.vt_nrecs++;
3464 	}
3465 	/*
3466 	 * No further work needed if no source list in packet(s).
3467 	 */
3468 	if (!record_has_sources) {
3469 		return nbytes;
3470 	}
3471 
3472 	/*
3473 	 * Whilst sources remain to be announced, we need to allocate
3474 	 * a new packet and fill out as many sources as will fit.
3475 	 * Always try for a cluster first.
3476 	 */
3477 	while (nims != NULL) {
3478 		if (IF_QFULL(ifq)) {
3479 			os_log_error(OS_LOG_DEFAULT, "%s: outbound queue full on %s\n",
3480 			    __func__, if_name(ifp));
3481 			return -ENOMEM;
3482 		}
3483 		m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3484 		if (m) {
3485 			m->m_data += IGMP_LEADINGSPACE;
3486 		}
3487 		if (m == NULL) {
3488 			m = m_gethdr(M_DONTWAIT, MT_DATA);
3489 			if (m) {
3490 				MH_ALIGN(m, IGMP_LEADINGSPACE);
3491 			}
3492 		}
3493 		if (m == NULL) {
3494 			return -ENOMEM;
3495 		}
3496 		igmp_save_context(m, ifp);
3497 		md = m_getptr(m, 0, &off);
3498 		pig = (struct igmp_grouprec *)(void *)
3499 		    (mtod(md, uint8_t *) + off);
3500 		IGMP_PRINTF(("%s: allocated next packet\n", __func__));
3501 
3502 		if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) {
3503 			if (m != m0) {
3504 				m_freem(m);
3505 			}
3506 			os_log_error(OS_LOG_DEFAULT, "%s: m_append() failed\n",
3507 			    __func__);
3508 			return -ENOMEM;
3509 		}
3510 		m->m_pkthdr.vt_nrecs = 1;
3511 		nbytes += sizeof(struct igmp_grouprec);
3512 
3513 		m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
3514 		    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
3515 
3516 		msrcs = 0;
3517 		RB_FOREACH_FROM(ims, ip_msource_tree, nims) {
3518 #ifdef IGMP_DEBUG
3519 			char buf[MAX_IPv4_STR_LEN];
3520 
3521 			inet_ntop_haddr(ims->ims_haddr, buf, sizeof(buf));
3522 			IGMP_PRINTF(("%s: visit node %s\n", __func__, buf));
3523 #endif
3524 			now = ims_get_mode(inm, ims, 1);
3525 			if ((now != mode) ||
3526 			    (now == mode && mode == MCAST_UNDEFINED)) {
3527 				IGMP_PRINTF(("%s: skip node\n", __func__));
3528 				continue;
3529 			}
3530 			if (is_source_query && ims->ims_stp == 0) {
3531 				IGMP_PRINTF(("%s: skip unrecorded node\n",
3532 				    __func__));
3533 				continue;
3534 			}
3535 			IGMP_PRINTF(("%s: append node\n", __func__));
3536 			naddr = htonl(ims->ims_haddr);
3537 			if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) {
3538 				if (m != m0) {
3539 					m_freem(m);
3540 				}
3541 				os_log_error(OS_LOG_DEFAULT, "%s: m_append() failed",
3542 				    __func__);
3543 				return -ENOMEM;
3544 			}
3545 			++msrcs;
3546 			if (msrcs == m0srcs) {
3547 				break;
3548 			}
3549 		}
3550 		ig_numsrc = htons(msrcs);
3551 		bcopy(&ig_numsrc, &pig->ig_numsrc, sizeof(ig_numsrc));
3552 		nbytes += (msrcs * sizeof(in_addr_t));
3553 
3554 		IGMP_PRINTF(("%s: enqueueing next packet\n", __func__));
3555 		IF_ENQUEUE(ifq, m);
3556 	}
3557 
3558 	return nbytes;
3559 }
3560 
3561 /*
3562  * Type used to mark record pass completion.
3563  * We exploit the fact we can cast to this easily from the
3564  * current filter modes on each ip_msource node.
3565  */
3566 typedef enum {
3567 	REC_NONE = 0x00,        /* MCAST_UNDEFINED */
3568 	REC_ALLOW = 0x01,       /* MCAST_INCLUDE */
3569 	REC_BLOCK = 0x02,       /* MCAST_EXCLUDE */
3570 	REC_FULL = REC_ALLOW | REC_BLOCK
3571 } rectype_t;
3572 
3573 /*
3574  * Enqueue an IGMPv3 filter list change to the given output queue.
3575  *
3576  * Source list filter state is held in an RB-tree. When the filter list
3577  * for a group is changed without changing its mode, we need to compute
3578  * the deltas between T0 and T1 for each source in the filter set,
3579  * and enqueue the appropriate ALLOW_NEW/BLOCK_OLD records.
3580  *
3581  * As we may potentially queue two record types, and the entire R-B tree
3582  * needs to be walked at once, we break this out into its own function
3583  * so we can generate a tightly packed queue of packets.
3584  *
3585  * XXX This could be written to only use one tree walk, although that makes
3586  * serializing into the mbuf chains a bit harder. For now we do two walks
3587  * which makes things easier on us, and it may or may not be harder on
3588  * the L2 cache.
3589  *
3590  * If successful the size of all data appended to the queue is returned,
3591  * otherwise an error code less than zero is returned, or zero if
3592  * no record(s) were appended.
3593  */
3594 static int
igmp_v3_enqueue_filter_change(struct ifqueue * ifq,struct in_multi * inm)3595 igmp_v3_enqueue_filter_change(struct ifqueue *ifq, struct in_multi *inm)
3596 {
3597 	static const int MINRECLEN =
3598 	    sizeof(struct igmp_grouprec) + sizeof(in_addr_t);
3599 	struct ifnet            *ifp;
3600 	struct igmp_grouprec     ig;
3601 	struct igmp_grouprec    *pig;
3602 	struct ip_msource       *ims, *nims;
3603 	mbuf_ref_t               m0, m, md;
3604 	in_addr_t                naddr;
3605 	int                      m0srcs, nbytes, npbytes, off, schanged;
3606 	uint16_t                 rsrcs;
3607 	int                      nallow, nblock;
3608 	uint16_t                 mode;
3609 	uint8_t                  now, then;
3610 	rectype_t                crt, drt, nrt;
3611 	u_int16_t                ig_numsrc;
3612 
3613 	INM_LOCK_ASSERT_HELD(inm);
3614 
3615 	if (inm->inm_nsrc == 0 ||
3616 	    (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0)) {
3617 		return 0;
3618 	}
3619 
3620 	ifp = inm->inm_ifp;                     /* interface */
3621 	mode = inm->inm_st[1].iss_fmode;        /* filter mode at t1 */
3622 	crt = REC_NONE; /* current group record type */
3623 	drt = REC_NONE; /* mask of completed group record types */
3624 	nrt = REC_NONE; /* record type for current node */
3625 	m0srcs = 0;     /* # source which will fit in current mbuf chain */
3626 	nbytes = 0;     /* # of bytes appended to group's state-change queue */
3627 	npbytes = 0;    /* # of bytes appended this packet */
3628 	rsrcs = 0;      /* # sources encoded in current record */
3629 	schanged = 0;   /* # nodes encoded in overall filter change */
3630 	nallow = 0;     /* # of source entries in ALLOW_NEW */
3631 	nblock = 0;     /* # of source entries in BLOCK_OLD */
3632 	nims = NULL;    /* next tree node pointer */
3633 
3634 	/*
3635 	 * For each possible filter record mode.
3636 	 * The first kind of source we encounter tells us which
3637 	 * is the first kind of record we start appending.
3638 	 * If a node transitioned to UNDEFINED at t1, its mode is treated
3639 	 * as the inverse of the group's filter mode.
3640 	 */
3641 	while (drt != REC_FULL) {
3642 		do {
3643 			m0 = ifq->ifq_tail;
3644 			if (m0 != NULL &&
3645 			    (m0->m_pkthdr.vt_nrecs + 1 <=
3646 			    IGMP_V3_REPORT_MAXRECS) &&
3647 			    (m0->m_pkthdr.len + MINRECLEN) <
3648 			    (ifp->if_mtu - IGMP_LEADINGSPACE)) {
3649 				m = m0;
3650 				m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
3651 				    sizeof(struct igmp_grouprec)) /
3652 				    sizeof(in_addr_t);
3653 				IGMP_PRINTF(("%s: use previous packet\n",
3654 				    __func__));
3655 			} else {
3656 				m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3657 				if (m) {
3658 					m->m_data += IGMP_LEADINGSPACE;
3659 				}
3660 				if (m == NULL) {
3661 					m = m_gethdr(M_DONTWAIT, MT_DATA);
3662 					if (m) {
3663 						MH_ALIGN(m, IGMP_LEADINGSPACE);
3664 					}
3665 				}
3666 				if (m == NULL) {
3667 					os_log_error(OS_LOG_DEFAULT, "%s: m_get*() failed",
3668 					    __func__);
3669 					return -ENOMEM;
3670 				}
3671 				m->m_pkthdr.vt_nrecs = 0;
3672 				igmp_save_context(m, ifp);
3673 				m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
3674 				    sizeof(struct igmp_grouprec)) /
3675 				    sizeof(in_addr_t);
3676 				npbytes = 0;
3677 				IGMP_PRINTF(("%s: allocated new packet\n",
3678 				    __func__));
3679 			}
3680 			/*
3681 			 * Append the IGMP group record header to the
3682 			 * current packet's data area.
3683 			 * Recalculate pointer to free space for next
3684 			 * group record, in case m_append() allocated
3685 			 * a new mbuf or cluster.
3686 			 */
3687 			memset(&ig, 0, sizeof(ig));
3688 			ig.ig_group = inm->inm_addr;
3689 			if (!m_append(m, sizeof(ig), (void *)&ig)) {
3690 				if (m != m0) {
3691 					m_freem(m);
3692 				}
3693 				os_log_error(OS_LOG_DEFAULT,
3694 				    "%s: m_append() failed\n",
3695 				    __func__);
3696 				return -ENOMEM;
3697 			}
3698 			npbytes += sizeof(struct igmp_grouprec);
3699 			if (m != m0) {
3700 				/* new packet; offset in c hain */
3701 				md = m_getptr(m, npbytes -
3702 				    sizeof(struct igmp_grouprec), &off);
3703 				pig = (struct igmp_grouprec *)(void *)(mtod(md,
3704 				    uint8_t *) + off);
3705 			} else {
3706 				/* current packet; offset from last append */
3707 				md = m_last(m);
3708 				pig = (struct igmp_grouprec *)(void *)(mtod(md,
3709 				    uint8_t *) + md->m_len -
3710 				    sizeof(struct igmp_grouprec));
3711 			}
3712 			/*
3713 			 * Begin walking the tree for this record type
3714 			 * pass, or continue from where we left off
3715 			 * previously if we had to allocate a new packet.
3716 			 * Only report deltas in-mode at t1.
3717 			 * We need not report included sources as allowed
3718 			 * if we are in inclusive mode on the group,
3719 			 * however the converse is not true.
3720 			 */
3721 			rsrcs = 0;
3722 			if (nims == NULL) {
3723 				nims = RB_MIN(ip_msource_tree, &inm->inm_srcs);
3724 			}
3725 			RB_FOREACH_FROM(ims, ip_msource_tree, nims) {
3726 #ifdef IGMP_DEBUG
3727 				char buf[MAX_IPv4_STR_LEN];
3728 
3729 				inet_ntop_haddr(ims->ims_haddr, buf, sizeof(buf));
3730 				IGMP_PRINTF(("%s: visit node %s\n", __func__, buf));
3731 #endif
3732 				now = ims_get_mode(inm, ims, 1);
3733 				then = ims_get_mode(inm, ims, 0);
3734 				IGMP_PRINTF(("%s: mode: t0 %d, t1 %d\n",
3735 				    __func__, then, now));
3736 				if (now == then) {
3737 					IGMP_PRINTF(("%s: skip unchanged\n",
3738 					    __func__));
3739 					continue;
3740 				}
3741 				if (mode == MCAST_EXCLUDE &&
3742 				    now == MCAST_INCLUDE) {
3743 					IGMP_PRINTF(("%s: skip IN src on EX "
3744 					    "group\n", __func__));
3745 					continue;
3746 				}
3747 				nrt = (rectype_t)now;
3748 				if (nrt == REC_NONE) {
3749 					nrt = (rectype_t)(~mode & REC_FULL);
3750 				}
3751 				if (schanged++ == 0) {
3752 					crt = nrt;
3753 				} else if (crt != nrt) {
3754 					continue;
3755 				}
3756 				naddr = htonl(ims->ims_haddr);
3757 				if (!m_append(m, sizeof(in_addr_t),
3758 				    (void *)&naddr)) {
3759 					if (m != m0) {
3760 						m_freem(m);
3761 					}
3762 					os_log_error(OS_LOG_DEFAULT, "%s: m_append() failed\n",
3763 					    __func__);
3764 					return -ENOMEM;
3765 				}
3766 				nallow += !!(crt == REC_ALLOW);
3767 				nblock += !!(crt == REC_BLOCK);
3768 				if (++rsrcs == m0srcs) {
3769 					break;
3770 				}
3771 			}
3772 			/*
3773 			 * If we did not append any tree nodes on this
3774 			 * pass, back out of allocations.
3775 			 */
3776 			if (rsrcs == 0) {
3777 				npbytes -= sizeof(struct igmp_grouprec);
3778 				if (m != m0) {
3779 					IGMP_PRINTF(("%s: m_free(m)\n",
3780 					    __func__));
3781 					m_freem(m);
3782 				} else {
3783 					IGMP_PRINTF(("%s: m_adj(m, -ig)\n",
3784 					    __func__));
3785 					m_adj(m, -((int)sizeof(
3786 						    struct igmp_grouprec)));
3787 				}
3788 				continue;
3789 			}
3790 			npbytes += (rsrcs * sizeof(in_addr_t));
3791 			if (crt == REC_ALLOW) {
3792 				pig->ig_type = IGMP_ALLOW_NEW_SOURCES;
3793 			} else if (crt == REC_BLOCK) {
3794 				pig->ig_type = IGMP_BLOCK_OLD_SOURCES;
3795 			}
3796 			ig_numsrc = htons(rsrcs);
3797 			bcopy(&ig_numsrc, &pig->ig_numsrc, sizeof(ig_numsrc));
3798 			/*
3799 			 * Count the new group record, and enqueue this
3800 			 * packet if it wasn't already queued.
3801 			 */
3802 			m->m_pkthdr.vt_nrecs++;
3803 			if (m != m0) {
3804 				IF_ENQUEUE(ifq, m);
3805 			}
3806 			nbytes += npbytes;
3807 		} while (nims != NULL);
3808 		drt |= crt;
3809 		crt = (~crt & REC_FULL);
3810 	}
3811 
3812 	IGMP_PRINTF(("%s: queued %d ALLOW_NEW, %d BLOCK_OLD\n", __func__,
3813 	    nallow, nblock));
3814 
3815 	return nbytes;
3816 }
3817 
3818 static int
igmp_v3_merge_state_changes(struct in_multi * inm,struct ifqueue * ifscq)3819 igmp_v3_merge_state_changes(struct in_multi *inm, struct ifqueue *ifscq)
3820 {
3821 	struct ifqueue  *gq;
3822 	mbuf_ref_t       m;             /* pending state-change */
3823 	mbuf_ref_t       m0;            /* copy of pending state-change */
3824 	mbuf_ref_t       mt;            /* last state-change in packet */
3825 	mbuf_ref_t       n;;
3826 	int              docopy, domerge;
3827 	u_int            recslen;
3828 
3829 	INM_LOCK_ASSERT_HELD(inm);
3830 
3831 	docopy = 0;
3832 	domerge = 0;
3833 	recslen = 0;
3834 
3835 	/*
3836 	 * If there are further pending retransmissions, make a writable
3837 	 * copy of each queued state-change message before merging.
3838 	 */
3839 	if (inm->inm_scrv > 0) {
3840 		docopy = 1;
3841 	}
3842 
3843 	gq = &inm->inm_scq;
3844 #ifdef IGMP_DEBUG
3845 	if (gq->ifq_head == NULL) {
3846 		IGMP_PRINTF(("%s: WARNING: queue for inm 0x%llx is empty\n",
3847 		    __func__, (uint64_t)VM_KERNEL_ADDRPERM(inm)));
3848 	}
3849 #endif
3850 
3851 	/*
3852 	 * Use IF_REMQUEUE() instead of IF_DEQUEUE() below, since the
3853 	 * packet might not always be at the head of the ifqueue.
3854 	 */
3855 	m = gq->ifq_head;
3856 	while (m != NULL) {
3857 		/*
3858 		 * Only merge the report into the current packet if
3859 		 * there is sufficient space to do so; an IGMPv3 report
3860 		 * packet may only contain 65,535 group records.
3861 		 * Always use a simple mbuf chain concatentation to do this,
3862 		 * as large state changes for single groups may have
3863 		 * allocated clusters.
3864 		 */
3865 		domerge = 0;
3866 		mt = ifscq->ifq_tail;
3867 		if (mt != NULL) {
3868 			recslen = m_length(m);
3869 
3870 			if ((mt->m_pkthdr.vt_nrecs +
3871 			    m->m_pkthdr.vt_nrecs <=
3872 			    IGMP_V3_REPORT_MAXRECS) &&
3873 			    (mt->m_pkthdr.len + recslen <=
3874 			    (inm->inm_ifp->if_mtu - IGMP_LEADINGSPACE))) {
3875 				domerge = 1;
3876 			}
3877 		}
3878 
3879 		if (!domerge && IF_QFULL(gq)) {
3880 			os_log_error(OS_LOG_DEFAULT,
3881 			    "%s: outbound queue full on %s\n",
3882 			    __func__, if_name(inm->inm_ifp));
3883 			n = m->m_nextpkt;
3884 			if (!docopy) {
3885 				IF_REMQUEUE(gq, m);
3886 				m_freem(m);
3887 			}
3888 			m = n;
3889 			continue;
3890 		}
3891 
3892 		if (!docopy) {
3893 			IGMP_PRINTF(("%s: dequeueing 0x%llx\n", __func__,
3894 			    (uint64_t)VM_KERNEL_ADDRPERM(m)));
3895 			n = m->m_nextpkt;
3896 			IF_REMQUEUE(gq, m);
3897 			m0 = m;
3898 			m = n;
3899 		} else {
3900 			IGMP_PRINTF(("%s: copying 0x%llx\n", __func__,
3901 			    (uint64_t)VM_KERNEL_ADDRPERM(m)));
3902 			m0 = m_dup(m, M_NOWAIT);
3903 			if (m0 == NULL) {
3904 				return ENOMEM;
3905 			}
3906 			m0->m_nextpkt = NULL;
3907 			m = m->m_nextpkt;
3908 		}
3909 
3910 		if (!domerge) {
3911 			IGMP_PRINTF(("%s: queueing 0x%llx to ifscq 0x%llx)\n",
3912 			    __func__, (uint64_t)VM_KERNEL_ADDRPERM(m0),
3913 			    (uint64_t)VM_KERNEL_ADDRPERM(ifscq)));
3914 			IF_ENQUEUE(ifscq, m0);
3915 		} else {
3916 			struct mbuf *mtl;       /* last mbuf of packet mt */
3917 
3918 			IGMP_PRINTF(("%s: merging 0x%llx with ifscq tail "
3919 			    "0x%llx)\n", __func__,
3920 			    (uint64_t)VM_KERNEL_ADDRPERM(m0),
3921 			    (uint64_t)VM_KERNEL_ADDRPERM(mt)));
3922 
3923 			mtl = m_last(mt);
3924 			m0->m_flags &= ~M_PKTHDR;
3925 			mt->m_pkthdr.len += recslen;
3926 			mt->m_pkthdr.vt_nrecs +=
3927 			    m0->m_pkthdr.vt_nrecs;
3928 
3929 			mtl->m_next = m0;
3930 		}
3931 	}
3932 
3933 	return 0;
3934 }
3935 
3936 /*
3937  * Respond to a pending IGMPv3 General Query.
3938  */
3939 static uint32_t
igmp_v3_dispatch_general_query(struct igmp_ifinfo * igi)3940 igmp_v3_dispatch_general_query(struct igmp_ifinfo *igi)
3941 {
3942 	struct ifnet            *ifp;
3943 	struct in_multi         *inm;
3944 	struct in_multistep     step;
3945 	int                      retval, loop;
3946 
3947 	IGI_LOCK_ASSERT_HELD(igi);
3948 
3949 	VERIFY(igi->igi_version == IGMP_VERSION_3);
3950 
3951 	ifp = igi->igi_ifp;
3952 	IGI_UNLOCK(igi);
3953 
3954 	in_multihead_lock_shared();
3955 	IN_FIRST_MULTI(step, inm);
3956 	while (inm != NULL) {
3957 		INM_LOCK(inm);
3958 		if (inm->inm_ifp != ifp) {
3959 			goto next;
3960 		}
3961 
3962 		switch (inm->inm_state) {
3963 		case IGMP_NOT_MEMBER:
3964 		case IGMP_SILENT_MEMBER:
3965 			break;
3966 		case IGMP_REPORTING_MEMBER:
3967 		case IGMP_IDLE_MEMBER:
3968 		case IGMP_LAZY_MEMBER:
3969 		case IGMP_SLEEPING_MEMBER:
3970 		case IGMP_AWAKENING_MEMBER:
3971 			inm->inm_state = IGMP_REPORTING_MEMBER;
3972 			IGI_LOCK(igi);
3973 			retval = igmp_v3_enqueue_group_record(&igi->igi_gq,
3974 			    inm, 0, 0, 0);
3975 			IGI_UNLOCK(igi);
3976 			IGMP_PRINTF(("%s: enqueue record = %d\n",
3977 			    __func__, retval));
3978 			break;
3979 		case IGMP_G_QUERY_PENDING_MEMBER:
3980 		case IGMP_SG_QUERY_PENDING_MEMBER:
3981 		case IGMP_LEAVING_MEMBER:
3982 			break;
3983 		}
3984 next:
3985 		INM_UNLOCK(inm);
3986 		IN_NEXT_MULTI(step, inm);
3987 	}
3988 	in_multihead_lock_done();
3989 
3990 	IGI_LOCK(igi);
3991 	loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
3992 	igmp_dispatch_queue(igi, &igi->igi_gq, IGMP_MAX_RESPONSE_BURST,
3993 	    loop);
3994 	IGI_LOCK_ASSERT_HELD(igi);
3995 	/*
3996 	 * Slew transmission of bursts over 1 second intervals.
3997 	 */
3998 	if (igi->igi_gq.ifq_head != NULL) {
3999 		igi->igi_v3_timer = 1 + IGMP_RANDOM_DELAY(
4000 			IGMP_RESPONSE_BURST_INTERVAL);
4001 	}
4002 
4003 	return igi->igi_v3_timer;
4004 }
4005 
4006 /*
4007  * Transmit the next pending IGMP message in the output queue.
4008  *
4009  * Must not be called with inm_lock or igi_lock held.
4010  */
4011 static void
igmp_sendpkt(struct mbuf * m)4012 igmp_sendpkt(struct mbuf *m)
4013 {
4014 	struct ip_moptions      *imo;
4015 	struct mbuf             *ipopts, *m0;
4016 	int                     error;
4017 	struct route            ro;
4018 	struct ifnet            *ifp;
4019 
4020 	IGMP_PRINTF(("%s: transmit 0x%llx\n", __func__,
4021 	    (uint64_t)VM_KERNEL_ADDRPERM(m)));
4022 
4023 	ifp = igmp_restore_context(m);
4024 	/*
4025 	 * Check if the ifnet is still attached.
4026 	 */
4027 	if (ifp == NULL || !ifnet_is_attached(ifp, 0)) {
4028 		os_log_error(OS_LOG_DEFAULT, "%s: dropped 0x%llx as interface went away\n",
4029 		    __func__, (uint64_t)VM_KERNEL_ADDRPERM(m));
4030 		m_freem(m);
4031 		OSAddAtomic(1, &ipstat.ips_noroute);
4032 		return;
4033 	}
4034 
4035 	ipopts = igmp_sendra ? m_raopt : NULL;
4036 
4037 	imo = ip_allocmoptions(Z_WAITOK);
4038 	if (imo == NULL) {
4039 		m_freem(m);
4040 		return;
4041 	}
4042 
4043 	imo->imo_multicast_ttl  = 1;
4044 	imo->imo_multicast_vif  = -1;
4045 	imo->imo_multicast_loop = 0;
4046 
4047 	/*
4048 	 * If the user requested that IGMP traffic be explicitly
4049 	 * redirected to the loopback interface (e.g. they are running a
4050 	 * MANET interface and the routing protocol needs to see the
4051 	 * updates), handle this now.
4052 	 */
4053 	if (m->m_flags & M_IGMP_LOOP) {
4054 		imo->imo_multicast_ifp = lo_ifp;
4055 	} else {
4056 		imo->imo_multicast_ifp = ifp;
4057 	}
4058 
4059 	if (m->m_flags & M_IGMPV2) {
4060 		m0 = m;
4061 	} else {
4062 		m0 = igmp_v3_encap_report(ifp, m);
4063 		if (m0 == NULL) {
4064 			/*
4065 			 * If igmp_v3_encap_report() failed, then M_PREPEND()
4066 			 * already freed the original mbuf chain.
4067 			 * This means that we don't have to m_freem(m) here.
4068 			 */
4069 			os_log_error(OS_LOG_DEFAULT, "%s: dropped 0x%llx\n", __func__,
4070 			    (uint64_t)VM_KERNEL_ADDRPERM(m));
4071 			IMO_REMREF(imo);
4072 			os_atomic_inc(&ipstat.ips_odropped, relaxed);
4073 			return;
4074 		}
4075 	}
4076 
4077 	igmp_scrub_context(m0);
4078 	m->m_flags &= ~(M_PROTOFLAGS | M_IGMP_LOOP);
4079 	m0->m_pkthdr.rcvif = lo_ifp;
4080 
4081 	if (ifp->if_eflags & IFEF_TXSTART) {
4082 		/*
4083 		 * Use control service class if the interface supports
4084 		 * transmit-start model.
4085 		 */
4086 		(void) m_set_service_class(m0, MBUF_SC_CTL);
4087 	}
4088 	bzero(&ro, sizeof(ro));
4089 	error = ip_output(m0, ipopts, &ro, 0, imo, NULL);
4090 	ROUTE_RELEASE(&ro);
4091 
4092 	IMO_REMREF(imo);
4093 
4094 	if (error) {
4095 		os_log_error(OS_LOG_DEFAULT, "%s: ip_output(0x%llx) = %d\n", __func__,
4096 		    (uint64_t)VM_KERNEL_ADDRPERM(m0), error);
4097 		return;
4098 	}
4099 
4100 	IGMPSTAT_INC(igps_snd_reports);
4101 	OIGMPSTAT_INC(igps_snd_reports);
4102 }
4103 /*
4104  * Encapsulate an IGMPv3 report.
4105  *
4106  * The internal mbuf flag M_IGMPV3_HDR is used to indicate that the mbuf
4107  * chain has already had its IP/IGMPv3 header prepended. In this case
4108  * the function will not attempt to prepend; the lengths and checksums
4109  * will however be re-computed.
4110  *
4111  * Returns a pointer to the new mbuf chain head, or NULL if the
4112  * allocation failed.
4113  */
4114 static struct mbuf *
igmp_v3_encap_report(struct ifnet * ifp,struct mbuf * m)4115 igmp_v3_encap_report(struct ifnet *ifp, struct mbuf *m)
4116 {
4117 	struct igmp_report      *igmp;
4118 	struct ip               *ip;
4119 	unsigned int             hdrlen, igmpreclen;
4120 
4121 	VERIFY((m->m_flags & M_PKTHDR));
4122 
4123 	igmpreclen = m_length(m);
4124 	hdrlen = sizeof(struct ip) + sizeof(struct igmp_report);
4125 
4126 	if (m->m_flags & M_IGMPV3_HDR) {
4127 		igmpreclen -= hdrlen;
4128 	} else {
4129 		M_PREPEND(m, hdrlen, M_DONTWAIT, 1);
4130 		if (m == NULL) {
4131 			return NULL;
4132 		}
4133 		m->m_flags |= M_IGMPV3_HDR;
4134 	}
4135 	if (hdrlen + igmpreclen > USHRT_MAX) {
4136 		os_log_error(OS_LOG_DEFAULT, "%s: invalid length %d\n",
4137 		    __func__, hdrlen + igmpreclen);
4138 		m_freem(m);
4139 		return NULL;
4140 	}
4141 
4142 
4143 	IGMP_PRINTF(("%s: igmpreclen is %d\n", __func__, igmpreclen));
4144 
4145 	m->m_data += sizeof(struct ip);
4146 	m->m_len -= sizeof(struct ip);
4147 
4148 	igmp = mtod(m, struct igmp_report *);
4149 	igmp->ir_type = IGMP_v3_HOST_MEMBERSHIP_REPORT;
4150 	igmp->ir_rsv1 = 0;
4151 	igmp->ir_rsv2 = 0;
4152 	igmp->ir_numgrps = htons(m->m_pkthdr.vt_nrecs);
4153 	igmp->ir_cksum = 0;
4154 	igmp->ir_cksum = in_cksum(m, sizeof(struct igmp_report) + igmpreclen);
4155 	m->m_pkthdr.vt_nrecs = 0;
4156 
4157 	m->m_data -= sizeof(struct ip);
4158 	m->m_len += sizeof(struct ip);
4159 
4160 	ip = mtod(m, struct ip *);
4161 	ip->ip_tos = IPTOS_PREC_INTERNETCONTROL;
4162 	ip->ip_len = (u_short)(hdrlen + igmpreclen);
4163 	ip->ip_off = IP_DF;
4164 	ip->ip_p = IPPROTO_IGMP;
4165 	ip->ip_sum = 0;
4166 
4167 	ip->ip_src.s_addr = INADDR_ANY;
4168 
4169 	if (m->m_flags & M_IGMP_LOOP) {
4170 		struct in_ifaddr *ia;
4171 
4172 		IFP_TO_IA(ifp, ia);
4173 		if (ia != NULL) {
4174 			IFA_LOCK(&ia->ia_ifa);
4175 			ip->ip_src = ia->ia_addr.sin_addr;
4176 			IFA_UNLOCK(&ia->ia_ifa);
4177 			ifa_remref(&ia->ia_ifa);
4178 		}
4179 	}
4180 
4181 	ip->ip_dst.s_addr = htonl(INADDR_ALLRPTS_GROUP);
4182 
4183 	return m;
4184 }
4185 
4186 #ifdef IGMP_DEBUG
4187 static const char *
igmp_rec_type_to_str(const int type)4188 igmp_rec_type_to_str(const int type)
4189 {
4190 	switch (type) {
4191 	case IGMP_CHANGE_TO_EXCLUDE_MODE:
4192 		return "TO_EX";
4193 	case IGMP_CHANGE_TO_INCLUDE_MODE:
4194 		return "TO_IN";
4195 	case IGMP_MODE_IS_EXCLUDE:
4196 		return "MODE_EX";
4197 	case IGMP_MODE_IS_INCLUDE:
4198 		return "MODE_IN";
4199 	case IGMP_ALLOW_NEW_SOURCES:
4200 		return "ALLOW_NEW";
4201 	case IGMP_BLOCK_OLD_SOURCES:
4202 		return "BLOCK_OLD";
4203 	default:
4204 		break;
4205 	}
4206 	return "unknown";
4207 }
4208 #endif
4209 
4210 void
igmp_init(struct protosw * pp,struct domain * dp)4211 igmp_init(struct protosw *pp, struct domain *dp)
4212 {
4213 #pragma unused(dp)
4214 	static int igmp_initialized = 0;
4215 
4216 	VERIFY((pp->pr_flags & (PR_INITIALIZED | PR_ATTACHED)) == PR_ATTACHED);
4217 
4218 	if (igmp_initialized) {
4219 		return;
4220 	}
4221 	igmp_initialized = 1;
4222 	os_log(OS_LOG_DEFAULT, "%s: initializing\n", __func__);
4223 	igmp_timers_are_running = 0;
4224 	LIST_INIT(&igi_head);
4225 	m_raopt = igmp_ra_alloc();
4226 }
4227