xref: /xnu-12377.81.4/bsd/netinet/igmp.c (revision 043036a2b3718f7f0be807e2870f8f47d3fa0796)
1 /*
2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*-
29  * Copyright (c) 2007-2009 Bruce Simpson.
30  * Copyright (c) 1988 Stephen Deering.
31  * Copyright (c) 1992, 1993
32  *	The Regents of the University of California.  All rights reserved.
33  *
34  * This code is derived from software contributed to Berkeley by
35  * Stephen Deering of Stanford University.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. All advertising materials mentioning features or use of this software
46  *    must display the following acknowledgement:
47  *	This product includes software developed by the University of
48  *	California, Berkeley and its contributors.
49  * 4. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  *	@(#)igmp.c	8.1 (Berkeley) 7/19/93
66  */
67 /*
68  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69  * support for mandatory and extensible security protections.  This notice
70  * is included in support of clause 2.2 (b) of the Apple Public License,
71  * Version 2.0.
72  */
73 
74 /*
75  * Internet Group Management Protocol (IGMP) routines.
76  * [RFC1112, RFC2236, RFC3376]
77  *
78  * Written by Steve Deering, Stanford, May 1988.
79  * Modified by Rosen Sharma, Stanford, Aug 1994.
80  * Modified by Bill Fenner, Xerox PARC, Feb 1995.
81  * Modified to fully comply to IGMPv2 by Bill Fenner, Oct 1995.
82  * Significantly rewritten for IGMPv3, VIMAGE, and SMP by Bruce Simpson.
83  *
84  * MULTICAST Revision: 3.5.1.4
85  */
86 
87 #include <sys/cdefs.h>
88 
89 #include <sys/param.h>
90 #include <sys/systm.h>
91 #include <sys/malloc.h>
92 #include <sys/mbuf.h>
93 #include <sys/socket.h>
94 #include <sys/protosw.h>
95 #include <sys/kernel.h>
96 #include <sys/mcache.h>
97 
98 #include <libkern/libkern.h>
99 #include <kern/uipc_domain.h>
100 #include <kern/zalloc.h>
101 
102 #include <net/if.h>
103 #include <net/route.h>
104 #include <net/net_sysctl.h>
105 
106 #include <netinet/in.h>
107 #include <netinet/in_var.h>
108 #include <netinet/in_systm.h>
109 #include <netinet/ip.h>
110 #include <netinet/ip_var.h>
111 #include <netinet/igmp.h>
112 #include <netinet/igmp_var.h>
113 #include <netinet/kpi_ipfilter_var.h>
114 
115 #include <os/log.h>
116 
117 #if SKYWALK
118 #include <skywalk/core/skywalk_var.h>
119 #endif /* SKYWALK */
120 
121 SLIST_HEAD(igmp_inm_relhead, in_multi);
122 
123 static void     igi_initvar(struct igmp_ifinfo *, struct ifnet *, int);
124 static struct igmp_ifinfo *igi_alloc(zalloc_flags_t);
125 static void     igi_free(struct igmp_ifinfo *);
126 static void     igi_delete(const struct ifnet *, struct igmp_inm_relhead *);
127 static void     igmp_dispatch_queue(struct igmp_ifinfo *, struct ifqueue *,
128     int, const int);
129 static void     igmp_final_leave(struct in_multi *, struct igmp_ifinfo *,
130     struct igmp_tparams *);
131 static int      igmp_handle_state_change(struct in_multi *,
132     struct igmp_ifinfo *, struct igmp_tparams *);
133 static int      igmp_initial_join(struct in_multi *, struct igmp_ifinfo *,
134     struct igmp_tparams *);
135 static int      igmp_input_v1_query(struct ifnet *, const struct ip *,
136     const struct igmp *);
137 static int      igmp_input_v2_query(struct ifnet *, const struct ip *,
138     const struct igmp *);
139 static int      igmp_input_v3_query(struct ifnet *, const struct ip *,
140     /*const*/ struct igmpv3 *__indexable);
141 static int      igmp_input_v3_group_query(struct in_multi *,
142     int, /*const*/ struct igmpv3 *__indexable);
143 static int      igmp_input_v1_report(struct ifnet *, struct mbuf *,
144     /*const*/ struct ip *, /*const*/ struct igmp *);
145 static int      igmp_input_v2_report(struct ifnet *, struct mbuf *,
146     /*const*/ struct ip *, /*const*/ struct igmp *);
147 static void     igmp_sendpkt(struct mbuf *);
148 static __inline__ int   igmp_isgroupreported(const struct in_addr);
149 static struct mbuf *igmp_ra_alloc(void);
150 #ifdef IGMP_DEBUG
151 static const char *igmp_rec_type_to_str(const int);
152 #endif
153 static uint32_t igmp_set_version(struct igmp_ifinfo *, const int);
154 static void     igmp_append_relq(struct igmp_ifinfo *, struct in_multi *);
155 static void     igmp_flush_relq(struct igmp_ifinfo *,
156     struct igmp_inm_relhead *);
157 static int      igmp_v1v2_queue_report(struct in_multi *, const int);
158 static void     igmp_v1v2_process_group_timer(struct in_multi *, const int);
159 static void     igmp_v1v2_process_querier_timers(struct igmp_ifinfo *);
160 static uint32_t igmp_v2_update_group(struct in_multi *, const int);
161 static void     igmp_v3_cancel_link_timers(struct igmp_ifinfo *);
162 static uint32_t igmp_v3_dispatch_general_query(struct igmp_ifinfo *);
163 static struct mbuf *
164 igmp_v3_encap_report(struct ifnet *, struct mbuf *);
165 static int      igmp_v3_enqueue_group_record(struct ifqueue *,
166     struct in_multi *, const int, const int, const int);
167 static int      igmp_v3_enqueue_filter_change(struct ifqueue *,
168     struct in_multi *);
169 static void     igmp_v3_process_group_timers(struct igmp_ifinfo *,
170     struct ifqueue *, struct ifqueue *, struct in_multi *,
171     const unsigned int);
172 static int      igmp_v3_merge_state_changes(struct in_multi *,
173     struct ifqueue *);
174 static void     igmp_v3_suppress_group_record(struct in_multi *);
175 static int      sysctl_igmp_ifinfo SYSCTL_HANDLER_ARGS;
176 static int      sysctl_igmp_gsr SYSCTL_HANDLER_ARGS;
177 static int      sysctl_igmp_default_version SYSCTL_HANDLER_ARGS;
178 
179 static const uint32_t igmp_timeout_delay = 1000; /* in milliseconds */
180 static const uint32_t igmp_timeout_leeway = 500; /* in millseconds  */
181 static bool igmp_timeout_run;            /* IGMP timer is scheduled to run */
182 static bool igmp_fast_timeout_run;       /* IGMP fast timer is scheduled to run */
183 static void igmp_timeout(thread_call_param_t, thread_call_param_t);
184 static void igmp_sched_timeout(void);
185 static void igmp_sched_fast_timeout(void);
186 
187 static struct mbuf *m_raopt;            /* Router Alert option */
188 
189 static int querier_present_timers_running;      /* IGMPv1/v2 older version
190                                                  * querier present */
191 static int interface_timers_running;            /* IGMPv3 general
192                                                  * query response */
193 static int state_change_timers_running;         /* IGMPv3 state-change
194                                                  * retransmit */
195 static int current_state_timers_running;        /* IGMPv1/v2 host
196                                                  * report; IGMPv3 g/sg
197                                                  * query response */
198 
199 /*
200  * Subsystem lock macros.
201  */
202 #define IGMP_LOCK()                     \
203 	lck_mtx_lock(&igmp_mtx)
204 #define IGMP_LOCK_ASSERT_HELD()         \
205 	LCK_MTX_ASSERT(&igmp_mtx, LCK_MTX_ASSERT_OWNED)
206 #define IGMP_LOCK_ASSERT_NOTHELD()      \
207 	LCK_MTX_ASSERT(&igmp_mtx, LCK_MTX_ASSERT_NOTOWNED)
208 #define IGMP_UNLOCK()                   \
209 	lck_mtx_unlock(&igmp_mtx)
210 
211 static LIST_HEAD(, igmp_ifinfo) igi_head;
212 static struct igmpstat_v3 igmpstat_v3 = {
213 	.igps_version = IGPS_VERSION_3,
214 	.igps_len = sizeof(struct igmpstat_v3),
215 };
216 static struct igmpstat igmpstat; /* old IGMPv2 stats structure */
217 static struct timeval igmp_gsrdelay = {.tv_sec = 10, .tv_usec = 0};
218 
219 static int igmp_recvifkludge = 1;
220 static int igmp_sendra = 1;
221 static int igmp_sendlocal = 1;
222 static int igmp_v1enable = 1;
223 static int igmp_v2enable = 1;
224 static int igmp_legacysupp = 0;
225 static int igmp_default_version = IGMP_VERSION_3;
226 
227 SYSCTL_STRUCT(_net_inet_igmp, IGMPCTL_STATS, stats, CTLFLAG_RD | CTLFLAG_LOCKED,
228     &igmpstat, igmpstat, "");
229 SYSCTL_STRUCT(_net_inet_igmp, OID_AUTO, v3stats,
230     CTLFLAG_RD | CTLFLAG_LOCKED, &igmpstat_v3, igmpstat_v3, "");
231 SYSCTL_INT(_net_inet_igmp, OID_AUTO, recvifkludge, CTLFLAG_RW | CTLFLAG_LOCKED,
232     &igmp_recvifkludge, 0,
233     "Rewrite IGMPv1/v2 reports from 0.0.0.0 to contain subnet address");
234 SYSCTL_INT(_net_inet_igmp, OID_AUTO, sendra, CTLFLAG_RW | CTLFLAG_LOCKED,
235     &igmp_sendra, 0,
236     "Send IP Router Alert option in IGMPv2/v3 messages");
237 SYSCTL_INT(_net_inet_igmp, OID_AUTO, sendlocal, CTLFLAG_RW | CTLFLAG_LOCKED,
238     &igmp_sendlocal, 0,
239     "Send IGMP membership reports for 224.0.0.0/24 groups");
240 SYSCTL_INT(_net_inet_igmp, OID_AUTO, v1enable, CTLFLAG_RW | CTLFLAG_LOCKED,
241     &igmp_v1enable, 0,
242     "Enable backwards compatibility with IGMPv1");
243 SYSCTL_INT(_net_inet_igmp, OID_AUTO, v2enable, CTLFLAG_RW | CTLFLAG_LOCKED,
244     &igmp_v2enable, 0,
245     "Enable backwards compatibility with IGMPv2");
246 SYSCTL_INT(_net_inet_igmp, OID_AUTO, legacysupp, CTLFLAG_RW | CTLFLAG_LOCKED,
247     &igmp_legacysupp, 0,
248     "Allow v1/v2 reports to suppress v3 group responses");
249 SYSCTL_PROC(_net_inet_igmp, OID_AUTO, default_version,
250     CTLTYPE_INT | CTLFLAG_RW,
251     &igmp_default_version, 0, sysctl_igmp_default_version, "I",
252     "Default version of IGMP to run on each interface");
253 SYSCTL_PROC(_net_inet_igmp, OID_AUTO, gsrdelay,
254     CTLTYPE_INT | CTLFLAG_RW,
255     &igmp_gsrdelay.tv_sec, 0, sysctl_igmp_gsr, "I",
256     "Rate limit for IGMPv3 Group-and-Source queries in seconds");
257 #ifdef IGMP_DEBUG
258 int igmp_debug = 0;
259 SYSCTL_INT(_net_inet_igmp, OID_AUTO,
260     debug, CTLFLAG_RW | CTLFLAG_LOCKED, &igmp_debug, 0, "");
261 #endif
262 
263 SYSCTL_NODE(_net_inet_igmp, OID_AUTO, ifinfo, CTLFLAG_RD | CTLFLAG_LOCKED,
264     sysctl_igmp_ifinfo, "Per-interface IGMPv3 state");
265 
266 /* Lock group and attribute for igmp_mtx */
267 static LCK_ATTR_DECLARE(igmp_mtx_attr, 0, 0);
268 static LCK_GRP_DECLARE(igmp_mtx_grp, "igmp_mtx");
269 
270 /*
271  * Locking and reference counting:
272  *
273  * igmp_mtx mainly protects igi_head.  In cases where both igmp_mtx and
274  * in_multihead_lock must be held, the former must be acquired first in order
275  * to maintain lock ordering.  It is not a requirement that igmp_mtx be
276  * acquired first before in_multihead_lock, but in case both must be acquired
277  * in succession, the correct lock ordering must be followed.
278  *
279  * Instead of walking the if_multiaddrs list at the interface and returning
280  * the ifma_protospec value of a matching entry, we search the global list
281  * of in_multi records and find it that way; this is done with in_multihead
282  * lock held.  Doing so avoids the race condition issues that many other BSDs
283  * suffer from (therefore in our implementation, ifma_protospec will never be
284  * NULL for as long as the in_multi is valid.)
285  *
286  * The above creates a requirement for the in_multi to stay in in_multihead
287  * list even after the final IGMP leave (in IGMPv3 mode) until no longer needs
288  * be retransmitted (this is not required for IGMPv1/v2.)  In order to handle
289  * this, the request and reference counts of the in_multi are bumped up when
290  * the state changes to IGMP_LEAVING_MEMBER, and later dropped in the timeout
291  * handler.  Each in_multi holds a reference to the underlying igmp_ifinfo.
292  *
293  * Thus, the permitted lock oder is:
294  *
295  *	igmp_mtx, in_multihead_lock, inm_lock, igi_lock
296  *
297  * Any may be taken independently, but if any are held at the same time,
298  * the above lock order must be followed.
299  */
300 static LCK_MTX_DECLARE_ATTR(igmp_mtx, &igmp_mtx_grp, &igmp_mtx_attr);
301 static int igmp_timers_are_running;
302 
303 #define IGMP_ADD_DETACHED_INM(_head, _inm) {                            \
304 	SLIST_INSERT_HEAD(_head, _inm, inm_dtle);                       \
305 }
306 
307 #define IGMP_REMOVE_DETACHED_INM(_head) {                               \
308 	struct in_multi *_inm, *_inm_tmp;                               \
309 	SLIST_FOREACH_SAFE(_inm, _head, inm_dtle, _inm_tmp) {           \
310 	        SLIST_REMOVE(_head, _inm, in_multi, inm_dtle);          \
311 	        INM_REMREF(_inm);                                       \
312 	}                                                               \
313 	VERIFY(SLIST_EMPTY(_head));                                     \
314 }
315 
316 static KALLOC_TYPE_DEFINE(igi_zone, struct igmp_ifinfo, NET_KT_DEFAULT);
317 
318 /* Store IGMPv3 record count in the module private scratch space */
319 #define vt_nrecs        pkt_mpriv.__mpriv_u.__mpriv32[0].__mpriv32_u.__val16[0]
320 
321 static __inline void
igmp_save_context(struct mbuf * m,struct ifnet * ifp)322 igmp_save_context(struct mbuf *m, struct ifnet *ifp)
323 {
324 	m->m_pkthdr.rcvif = ifp;
325 }
326 
327 static __inline void
igmp_scrub_context(struct mbuf * m)328 igmp_scrub_context(struct mbuf *m)
329 {
330 	m->m_pkthdr.rcvif = NULL;
331 }
332 
333 #ifdef IGMP_DEBUG
334 static __inline const char *
inet_ntop_haddr(in_addr_t haddr,char * buf __counted_by (size),socklen_t size)335 inet_ntop_haddr(in_addr_t haddr, char *buf __counted_by(size), socklen_t size)
336 {
337 	struct in_addr ia;
338 
339 	ia.s_addr = htonl(haddr);
340 	return inet_ntop(AF_INET, &ia, buf, size);
341 }
342 #endif
343 
344 /*
345  * Restore context from a queued IGMP output chain.
346  * Return saved ifp.
347  */
348 static __inline struct ifnet *
igmp_restore_context(struct mbuf * m)349 igmp_restore_context(struct mbuf *m)
350 {
351 	return m->m_pkthdr.rcvif;
352 }
353 
354 /*
355  * Retrieve or set default IGMP version.
356  */
357 static int
358 sysctl_igmp_default_version SYSCTL_HANDLER_ARGS
359 {
360 #pragma unused(oidp, arg2)
361 	int      error;
362 	int      new;
363 
364 	IGMP_LOCK();
365 
366 	error = SYSCTL_OUT(req, arg1, sizeof(int));
367 	if (error || !req->newptr) {
368 		goto out_locked;
369 	}
370 
371 	new = igmp_default_version;
372 
373 	error = SYSCTL_IN(req, &new, sizeof(int));
374 	if (error) {
375 		goto out_locked;
376 	}
377 
378 	if (new < IGMP_VERSION_1 || new > IGMP_VERSION_3) {
379 		error = EINVAL;
380 		goto out_locked;
381 	}
382 
383 	os_log(OS_LOG_DEFAULT,
384 	    "%s: changed igmp_default_version from %d to %d\n",
385 	    __func__, igmp_default_version, new);
386 
387 	igmp_default_version = new;
388 
389 out_locked:
390 	IGMP_UNLOCK();
391 	return error;
392 }
393 
394 /*
395  * Retrieve or set threshold between group-source queries in seconds.
396  *
397  */
398 static int
399 sysctl_igmp_gsr SYSCTL_HANDLER_ARGS
400 {
401 #pragma unused(arg1, arg2)
402 	int error;
403 	int i;
404 
405 	IGMP_LOCK();
406 
407 	i = (int)igmp_gsrdelay.tv_sec;
408 
409 	error = sysctl_handle_int(oidp, &i, 0, req);
410 	if (error || !req->newptr) {
411 		goto out_locked;
412 	}
413 
414 	if (i < -1 || i >= 60) {
415 		error = EINVAL;
416 		goto out_locked;
417 	}
418 
419 	igmp_gsrdelay.tv_sec = i;
420 
421 out_locked:
422 	IGMP_UNLOCK();
423 	return error;
424 }
425 
426 /*
427  * Expose struct igmp_ifinfo to userland, keyed by ifindex.
428  * For use by ifmcstat(8).
429  *
430  */
431 static int
432 sysctl_igmp_ifinfo SYSCTL_HANDLER_ARGS
433 {
434 #pragma unused(oidp)
435 	DECLARE_SYSCTL_HANDLER_ARG_ARRAY(int, 1, name, namelen);
436 	int                      error;
437 	struct ifnet            *ifp;
438 	struct igmp_ifinfo      *igi;
439 	struct igmp_ifinfo_u    igi_u;
440 
441 	if (req->newptr != USER_ADDR_NULL) {
442 		return EPERM;
443 	}
444 
445 	IGMP_LOCK();
446 
447 	if (name[0] <= 0 || name[0] > (u_int)if_index) {
448 		error = ENOENT;
449 		goto out_locked;
450 	}
451 
452 	error = ENOENT;
453 
454 	ifnet_head_lock_shared();
455 	ifp = ifindex2ifnet[name[0]];
456 	ifnet_head_done();
457 	if (ifp == NULL) {
458 		goto out_locked;
459 	}
460 
461 	bzero(&igi_u, sizeof(igi_u));
462 
463 	LIST_FOREACH(igi, &igi_head, igi_link) {
464 		IGI_LOCK(igi);
465 		if (ifp != igi->igi_ifp) {
466 			IGI_UNLOCK(igi);
467 			continue;
468 		}
469 		igi_u.igi_ifindex = igi->igi_ifp->if_index;
470 		igi_u.igi_version = igi->igi_version;
471 		igi_u.igi_v1_timer = igi->igi_v1_timer;
472 		igi_u.igi_v2_timer = igi->igi_v2_timer;
473 		igi_u.igi_v3_timer = igi->igi_v3_timer;
474 		igi_u.igi_flags = igi->igi_flags;
475 		igi_u.igi_rv = igi->igi_rv;
476 		igi_u.igi_qi = igi->igi_qi;
477 		igi_u.igi_qri = igi->igi_qri;
478 		igi_u.igi_uri = igi->igi_uri;
479 		IGI_UNLOCK(igi);
480 
481 		error = SYSCTL_OUT(req, &igi_u, sizeof(igi_u));
482 		break;
483 	}
484 
485 out_locked:
486 	IGMP_UNLOCK();
487 	return error;
488 }
489 
490 /*
491  * Dispatch an entire queue of pending packet chains
492  *
493  * Must not be called with inm_lock held.
494  */
495 static void
igmp_dispatch_queue(struct igmp_ifinfo * igi,struct ifqueue * ifq,int limit,const int loop)496 igmp_dispatch_queue(struct igmp_ifinfo *igi, struct ifqueue *ifq, int limit,
497     const int loop)
498 {
499 	struct mbuf *m;
500 	struct ip *ip;
501 
502 	if (igi != NULL) {
503 		IGI_LOCK_ASSERT_HELD(igi);
504 	}
505 
506 #if SKYWALK
507 	/*
508 	 * Since this function is called holding the igi lock, we need to ensure we
509 	 * don't enter the driver directly because a deadlock can happen if another
510 	 * thread holding the workloop lock tries to acquire the igi lock at
511 	 * the same time.
512 	 */
513 	sk_protect_t __single protect = sk_async_transmit_protect();
514 #endif /* SKYWALK */
515 
516 	for (;;) {
517 		IF_DEQUEUE(ifq, m);
518 		if (m == NULL) {
519 			break;
520 		}
521 		IGMP_PRINTF(("%s: dispatch 0x%llx from 0x%llx\n", __func__,
522 		    (uint64_t)VM_KERNEL_ADDRPERM(ifq),
523 		    (uint64_t)VM_KERNEL_ADDRPERM(m)));
524 		ip = mtod(m, struct ip *);
525 		if (loop) {
526 			m->m_flags |= M_IGMP_LOOP;
527 		}
528 		if (igi != NULL) {
529 			IGI_UNLOCK(igi);
530 		}
531 		igmp_sendpkt(m);
532 		if (igi != NULL) {
533 			IGI_LOCK(igi);
534 		}
535 		if (--limit == 0) {
536 			break;
537 		}
538 	}
539 
540 #if SKYWALK
541 	sk_async_transmit_unprotect(protect);
542 #endif /* SKYWALK */
543 
544 	if (igi != NULL) {
545 		IGI_LOCK_ASSERT_HELD(igi);
546 	}
547 }
548 
549 /*
550  * Filter outgoing IGMP report state by group.
551  *
552  * Reports are ALWAYS suppressed for ALL-HOSTS (224.0.0.1).
553  * If the net.inet.igmp.sendlocal sysctl is 0, then IGMP reports are
554  * disabled for all groups in the 224.0.0.0/24 link-local scope. However,
555  * this may break certain IGMP snooping switches which rely on the old
556  * report behaviour.
557  *
558  * Return zero if the given group is one for which IGMP reports
559  * should be suppressed, or non-zero if reports should be issued.
560  */
561 
562 static __inline__
563 int
igmp_isgroupreported(const struct in_addr addr)564 igmp_isgroupreported(const struct in_addr addr)
565 {
566 	if (in_allhosts(addr) ||
567 	    ((!igmp_sendlocal && IN_LOCAL_GROUP(ntohl(addr.s_addr))))) {
568 		return 0;
569 	}
570 
571 	return 1;
572 }
573 
574 /*
575  * Construct a Router Alert option to use in outgoing packets.
576  */
577 static struct mbuf *
igmp_ra_alloc(void)578 igmp_ra_alloc(void)
579 {
580 	struct mbuf     *m;
581 	struct ipoption *p;
582 
583 	MGET(m, M_WAITOK, MT_DATA);
584 	p = mtod(m, struct ipoption *);
585 	p->ipopt_dst.s_addr = INADDR_ANY;
586 	p->ipopt_list[0] = (char)IPOPT_RA;      /* Router Alert Option */
587 	p->ipopt_list[1] = 0x04;        /* 4 bytes long */
588 	p->ipopt_list[2] = IPOPT_EOL;   /* End of IP option list */
589 	p->ipopt_list[3] = 0x00;        /* pad byte */
590 	m->m_len = sizeof(p->ipopt_dst) + p->ipopt_list[1];
591 
592 	return m;
593 }
594 
595 /*
596  * Attach IGMP when PF_INET is attached to an interface.
597  */
598 struct igmp_ifinfo *
igmp_domifattach(struct ifnet * ifp,zalloc_flags_t how)599 igmp_domifattach(struct ifnet *ifp, zalloc_flags_t how)
600 {
601 	struct igmp_ifinfo *igi;
602 
603 	os_log_debug(OS_LOG_DEFAULT, "%s: called for ifp %s\n",
604 	    __func__, ifp->if_name);
605 
606 	igi = igi_alloc(how);
607 	if (igi == NULL) {
608 		return NULL;
609 	}
610 
611 	IGMP_LOCK();
612 
613 	IGI_LOCK(igi);
614 	igi_initvar(igi, ifp, 0);
615 	igi->igi_debug |= IFD_ATTACHED;
616 	IGI_ADDREF_LOCKED(igi); /* hold a reference for igi_head */
617 	IGI_ADDREF_LOCKED(igi); /* hold a reference for caller */
618 	IGI_UNLOCK(igi);
619 	ifnet_lock_shared(ifp);
620 	igmp_initsilent(ifp, igi);
621 	ifnet_lock_done(ifp);
622 
623 	LIST_INSERT_HEAD(&igi_head, igi, igi_link);
624 
625 	IGMP_UNLOCK();
626 
627 	os_log_info(OS_LOG_DEFAULT, "%s: allocated igmp_ifinfo for ifp %s\n",
628 	    __func__, ifp->if_name);
629 
630 	return igi;
631 }
632 
633 /*
634  * Attach IGMP when PF_INET is reattached to an interface.  Caller is
635  * expected to have an outstanding reference to the igi.
636  */
637 void
igmp_domifreattach(struct igmp_ifinfo * igi)638 igmp_domifreattach(struct igmp_ifinfo *igi)
639 {
640 	struct ifnet *ifp;
641 
642 	IGMP_LOCK();
643 
644 	IGI_LOCK(igi);
645 	VERIFY(!(igi->igi_debug & IFD_ATTACHED));
646 	ifp = igi->igi_ifp;
647 	VERIFY(ifp != NULL);
648 	igi_initvar(igi, ifp, 1);
649 	igi->igi_debug |= IFD_ATTACHED;
650 	IGI_ADDREF_LOCKED(igi); /* hold a reference for igi_head */
651 	IGI_UNLOCK(igi);
652 	ifnet_lock_shared(ifp);
653 	igmp_initsilent(ifp, igi);
654 	ifnet_lock_done(ifp);
655 
656 	LIST_INSERT_HEAD(&igi_head, igi, igi_link);
657 
658 	IGMP_UNLOCK();
659 
660 	os_log_info(OS_LOG_DEFAULT, "%s: reattached igmp_ifinfo for ifp %s\n",
661 	    __func__, ifp->if_name);
662 }
663 
664 /*
665  * Hook for domifdetach.
666  */
667 void
igmp_domifdetach(struct ifnet * ifp)668 igmp_domifdetach(struct ifnet *ifp)
669 {
670 	SLIST_HEAD(, in_multi) inm_dthead;
671 
672 	SLIST_INIT(&inm_dthead);
673 
674 	os_log_info(OS_LOG_DEFAULT, "%s: called for ifp %s\n", __func__,
675 	    if_name(ifp));
676 
677 	IGMP_LOCK();
678 	igi_delete(ifp, (struct igmp_inm_relhead *)&inm_dthead);
679 	IGMP_UNLOCK();
680 
681 	/* Now that we're dropped all locks, release detached records */
682 	IGMP_REMOVE_DETACHED_INM(&inm_dthead);
683 }
684 
685 /*
686  * Called at interface detach time.  Note that we only flush all deferred
687  * responses and record releases; all remaining inm records and their source
688  * entries related to this interface are left intact, in order to handle
689  * the reattach case.
690  */
691 static void
igi_delete(const struct ifnet * ifp,struct igmp_inm_relhead * inm_dthead)692 igi_delete(const struct ifnet *ifp, struct igmp_inm_relhead *inm_dthead)
693 {
694 	struct igmp_ifinfo *igi, *tigi;
695 
696 	IGMP_LOCK_ASSERT_HELD();
697 
698 	LIST_FOREACH_SAFE(igi, &igi_head, igi_link, tigi) {
699 		IGI_LOCK(igi);
700 		if (igi->igi_ifp == ifp) {
701 			/*
702 			 * Free deferred General Query responses.
703 			 */
704 			IF_DRAIN(&igi->igi_gq);
705 			IF_DRAIN(&igi->igi_v2q);
706 			igmp_flush_relq(igi, inm_dthead);
707 			igi->igi_debug &= ~IFD_ATTACHED;
708 			IGI_UNLOCK(igi);
709 
710 			LIST_REMOVE(igi, igi_link);
711 			IGI_REMREF(igi); /* release igi_head reference */
712 			return;
713 		}
714 		IGI_UNLOCK(igi);
715 	}
716 	panic("%s: igmp_ifinfo not found for ifp %p(%s)", __func__,
717 	    ifp, if_name(ifp));
718 }
719 
720 __private_extern__ void
igmp_initsilent(struct ifnet * ifp,struct igmp_ifinfo * igi)721 igmp_initsilent(struct ifnet *ifp, struct igmp_ifinfo *igi)
722 {
723 	ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
724 
725 	IGI_LOCK_ASSERT_NOTHELD(igi);
726 	IGI_LOCK(igi);
727 	if (!(ifp->if_flags & IFF_MULTICAST)) {
728 		igi->igi_flags |= IGIF_SILENT;
729 	} else {
730 		igi->igi_flags &= ~IGIF_SILENT;
731 	}
732 	IGI_UNLOCK(igi);
733 }
734 
735 static void
igi_initvar(struct igmp_ifinfo * igi,struct ifnet * ifp,int reattach)736 igi_initvar(struct igmp_ifinfo *igi, struct ifnet *ifp, int reattach)
737 {
738 	IGI_LOCK_ASSERT_HELD(igi);
739 
740 	igi->igi_ifp = ifp;
741 	igi->igi_version = igmp_default_version;
742 	igi->igi_flags = 0;
743 	igi->igi_rv = IGMP_RV_INIT;
744 	igi->igi_qi = IGMP_QI_INIT;
745 	igi->igi_qri = IGMP_QRI_INIT;
746 	igi->igi_uri = IGMP_URI_INIT;
747 
748 	if (!reattach) {
749 		SLIST_INIT(&igi->igi_relinmhead);
750 	}
751 
752 	/*
753 	 * Responses to general queries are subject to bounds.
754 	 */
755 	igi->igi_gq.ifq_maxlen =  IGMP_MAX_RESPONSE_PACKETS;
756 	igi->igi_v2q.ifq_maxlen = IGMP_MAX_RESPONSE_PACKETS;
757 }
758 
759 static struct igmp_ifinfo *
igi_alloc(zalloc_flags_t how)760 igi_alloc(zalloc_flags_t how)
761 {
762 	struct igmp_ifinfo *igi = zalloc_flags(igi_zone, how | Z_ZERO);
763 	if (igi != NULL) {
764 		lck_mtx_init(&igi->igi_lock, &igmp_mtx_grp, &igmp_mtx_attr);
765 		igi->igi_debug |= IFD_ALLOC;
766 	}
767 	return igi;
768 }
769 
770 static void
igi_free(struct igmp_ifinfo * igi)771 igi_free(struct igmp_ifinfo *igi)
772 {
773 	IGI_LOCK(igi);
774 	if (igi->igi_debug & IFD_ATTACHED) {
775 		panic("%s: attached igi=%p is being freed", __func__, igi);
776 		/* NOTREACHED */
777 	} else if (igi->igi_ifp != NULL) {
778 		panic("%s: ifp not NULL for igi=%p", __func__, igi);
779 		/* NOTREACHED */
780 	} else if (!(igi->igi_debug & IFD_ALLOC)) {
781 		panic("%s: igi %p cannot be freed", __func__, igi);
782 		/* NOTREACHED */
783 	} else if (igi->igi_refcnt != 0) {
784 		panic("%s: non-zero refcnt igi=%p", __func__, igi);
785 		/* NOTREACHED */
786 	}
787 	igi->igi_debug &= ~IFD_ALLOC;
788 	IGI_UNLOCK(igi);
789 
790 	lck_mtx_destroy(&igi->igi_lock, &igmp_mtx_grp);
791 	zfree(igi_zone, igi);
792 }
793 
794 void
igi_addref(struct igmp_ifinfo * igi,int locked)795 igi_addref(struct igmp_ifinfo *igi, int locked)
796 {
797 	if (!locked) {
798 		IGI_LOCK_SPIN(igi);
799 	} else {
800 		IGI_LOCK_ASSERT_HELD(igi);
801 	}
802 
803 	if (++igi->igi_refcnt == 0) {
804 		panic("%s: igi=%p wraparound refcnt", __func__, igi);
805 		/* NOTREACHED */
806 	}
807 	if (!locked) {
808 		IGI_UNLOCK(igi);
809 	}
810 }
811 
812 void
igi_remref(struct igmp_ifinfo * igi)813 igi_remref(struct igmp_ifinfo *igi)
814 {
815 	SLIST_HEAD(, in_multi) inm_dthead;
816 	struct ifnet *ifp;
817 
818 	IGI_LOCK_SPIN(igi);
819 
820 	if (igi->igi_refcnt == 0) {
821 		panic("%s: igi=%p negative refcnt", __func__, igi);
822 		/* NOTREACHED */
823 	}
824 
825 	--igi->igi_refcnt;
826 	if (igi->igi_refcnt > 0) {
827 		IGI_UNLOCK(igi);
828 		return;
829 	}
830 
831 	ifp = igi->igi_ifp;
832 	igi->igi_ifp = NULL;
833 	IF_DRAIN(&igi->igi_gq);
834 	IF_DRAIN(&igi->igi_v2q);
835 	SLIST_INIT(&inm_dthead);
836 	igmp_flush_relq(igi, (struct igmp_inm_relhead *)&inm_dthead);
837 	IGI_UNLOCK(igi);
838 
839 	/* Now that we're dropped all locks, release detached records */
840 	IGMP_REMOVE_DETACHED_INM(&inm_dthead);
841 
842 	os_log_info(OS_LOG_DEFAULT, "%s: freeing igmp_ifinfo for ifp %s\n",
843 	    __func__, if_name(ifp));
844 
845 	igi_free(igi);
846 }
847 
848 /*
849  * Process a received IGMPv1 query.
850  * Return non-zero if the message should be dropped.
851  */
852 static int
igmp_input_v1_query(struct ifnet * ifp,const struct ip * ip,const struct igmp * igmp)853 igmp_input_v1_query(struct ifnet *ifp, const struct ip *ip,
854     const struct igmp *igmp)
855 {
856 	struct igmp_ifinfo      *igi;
857 	struct in_multi         *inm;
858 	struct in_multistep     step;
859 	struct igmp_tparams     itp = { .qpt = 0, .it = 0, .cst = 0, .sct = 0 };
860 
861 	IGMP_LOCK_ASSERT_NOTHELD();
862 
863 	/*
864 	 * IGMPv1 Host Membership Queries SHOULD always be addressed to
865 	 * 224.0.0.1. They are always treated as General Queries.
866 	 * igmp_group is always ignored. Do not drop it as a userland
867 	 * daemon may wish to see it.
868 	 */
869 	if (!in_allhosts(ip->ip_dst) || !in_nullhost(igmp->igmp_group)) {
870 		IGMPSTAT_INC(igps_rcv_badqueries);
871 		OIGMPSTAT_INC(igps_rcv_badqueries);
872 		goto done;
873 	}
874 	IGMPSTAT_INC(igps_rcv_gen_queries);
875 
876 	igi = IGMP_IFINFO(ifp);
877 	VERIFY(igi != NULL);
878 
879 	IGI_LOCK(igi);
880 	if (igi->igi_flags & IGIF_LOOPBACK) {
881 		os_log_debug(OS_LOG_DEFAULT,
882 		    "%s: ignore v1 query on IGIF_LOOPBACK "
883 		    "ifp %s\n", __func__,
884 		    if_name(ifp));
885 		IGI_UNLOCK(igi);
886 		goto done;
887 	}
888 	/*
889 	 * Switch to IGMPv1 host compatibility mode.
890 	 */
891 	itp.qpt = igmp_set_version(igi, IGMP_VERSION_1);
892 	IGI_UNLOCK(igi);
893 
894 	os_log_debug(OS_LOG_DEFAULT, "%s: process v1 query on ifp %s\n", __func__,
895 	    if_name(ifp));
896 
897 	/*
898 	 * Start the timers in all of our group records
899 	 * for the interface on which the query arrived,
900 	 * except those which are already running.
901 	 */
902 	in_multihead_lock_shared();
903 	IN_FIRST_MULTI(step, inm);
904 	while (inm != NULL) {
905 		INM_LOCK(inm);
906 		if (inm->inm_ifp != ifp || inm->inm_timer != 0) {
907 			goto next;
908 		}
909 
910 		switch (inm->inm_state) {
911 		case IGMP_NOT_MEMBER:
912 		case IGMP_SILENT_MEMBER:
913 			break;
914 		case IGMP_G_QUERY_PENDING_MEMBER:
915 		case IGMP_SG_QUERY_PENDING_MEMBER:
916 		case IGMP_REPORTING_MEMBER:
917 		case IGMP_IDLE_MEMBER:
918 		case IGMP_LAZY_MEMBER:
919 		case IGMP_SLEEPING_MEMBER:
920 		case IGMP_AWAKENING_MEMBER:
921 			inm->inm_state = IGMP_REPORTING_MEMBER;
922 			inm->inm_timer = IGMP_RANDOM_DELAY(IGMP_V1V2_MAX_RI);
923 			itp.cst = 1;
924 			break;
925 		case IGMP_LEAVING_MEMBER:
926 			break;
927 		}
928 next:
929 		INM_UNLOCK(inm);
930 		IN_NEXT_MULTI(step, inm);
931 	}
932 	in_multihead_lock_done();
933 done:
934 	igmp_set_timeout(&itp);
935 
936 	return 0;
937 }
938 
939 /*
940  * Process a received IGMPv2 general or group-specific query.
941  */
942 static int
igmp_input_v2_query(struct ifnet * ifp,const struct ip * ip,const struct igmp * igmp)943 igmp_input_v2_query(struct ifnet *ifp, const struct ip *ip,
944     const struct igmp *igmp)
945 {
946 	struct igmp_ifinfo      *igi;
947 	struct in_multi         *inm;
948 	int                      is_general_query;
949 	uint16_t                 timer;
950 	struct igmp_tparams      itp = { .qpt = 0, .it = 0, .cst = 0, .sct = 0 };
951 
952 	IGMP_LOCK_ASSERT_NOTHELD();
953 
954 	is_general_query = 0;
955 
956 	/*
957 	 * Validate address fields upfront.
958 	 */
959 	if (in_nullhost(igmp->igmp_group)) {
960 		/*
961 		 * IGMPv2 General Query.
962 		 * If this was not sent to the all-hosts group, ignore it.
963 		 */
964 		if (!in_allhosts(ip->ip_dst)) {
965 			goto done;
966 		}
967 		IGMPSTAT_INC(igps_rcv_gen_queries);
968 		is_general_query = 1;
969 	} else {
970 		/* IGMPv2 Group-Specific Query. */
971 		IGMPSTAT_INC(igps_rcv_group_queries);
972 	}
973 
974 	igi = IGMP_IFINFO(ifp);
975 	VERIFY(igi != NULL);
976 
977 	IGI_LOCK(igi);
978 	if (igi->igi_flags & IGIF_LOOPBACK) {
979 		os_log_debug(OS_LOG_DEFAULT, "%s: ignore v2 query on IGIF_LOOPBACK "
980 		    "ifp %s\n", __func__, if_name(ifp));
981 		IGI_UNLOCK(igi);
982 		goto done;
983 	}
984 	/*
985 	 * Ignore v2 query if in v1 Compatibility Mode.
986 	 */
987 	if (igi->igi_version == IGMP_VERSION_1) {
988 		IGI_UNLOCK(igi);
989 		goto done;
990 	}
991 	itp.qpt = igmp_set_version(igi, IGMP_VERSION_2);
992 	IGI_UNLOCK(igi);
993 
994 	timer = igmp->igmp_code / IGMP_TIMER_SCALE;
995 	if (timer == 0) {
996 		timer = 1;
997 	}
998 
999 	if (is_general_query) {
1000 		struct in_multistep step;
1001 
1002 		os_log_debug(OS_LOG_DEFAULT, "%s: process v2 general query on ifp %s\n",
1003 		    __func__, if_name(ifp));
1004 		/*
1005 		 * For each reporting group joined on this
1006 		 * interface, kick the report timer.
1007 		 */
1008 		in_multihead_lock_shared();
1009 		IN_FIRST_MULTI(step, inm);
1010 		while (inm != NULL) {
1011 			INM_LOCK(inm);
1012 			if (inm->inm_ifp == ifp) {
1013 				itp.cst += igmp_v2_update_group(inm, timer);
1014 			}
1015 			INM_UNLOCK(inm);
1016 			IN_NEXT_MULTI(step, inm);
1017 		}
1018 		in_multihead_lock_done();
1019 	} else {
1020 		/*
1021 		 * Group-specific IGMPv2 query, we need only
1022 		 * look up the single group to process it.
1023 		 */
1024 		in_multihead_lock_shared();
1025 		IN_LOOKUP_MULTI(&igmp->igmp_group, ifp, inm);
1026 		in_multihead_lock_done();
1027 		if (inm != NULL) {
1028 			INM_LOCK(inm);
1029 			IGMP_INET_PRINTF(igmp->igmp_group,
1030 			    ("process v2 query %s on ifp 0x%llx(%s)\n",
1031 			    _igmp_inet_buf,
1032 			    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1033 			itp.cst = igmp_v2_update_group(inm, timer);
1034 			INM_UNLOCK(inm);
1035 			INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1036 		}
1037 	}
1038 done:
1039 	igmp_set_timeout(&itp);
1040 
1041 	return 0;
1042 }
1043 
1044 /*
1045  * Update the report timer on a group in response to an IGMPv2 query.
1046  *
1047  * If we are becoming the reporting member for this group, start the timer.
1048  * If we already are the reporting member for this group, and timer is
1049  * below the threshold, reset it.
1050  *
1051  * We may be updating the group for the first time since we switched
1052  * to IGMPv3. If we are, then we must clear any recorded source lists,
1053  * and transition to REPORTING state; the group timer is overloaded
1054  * for group and group-source query responses.
1055  *
1056  * Unlike IGMPv3, the delay per group should be jittered
1057  * to avoid bursts of IGMPv2 reports.
1058  */
1059 static uint32_t
igmp_v2_update_group(struct in_multi * inm,const int timer)1060 igmp_v2_update_group(struct in_multi *inm, const int timer)
1061 {
1062 	IGMP_INET_PRINTF(inm->inm_addr, ("%s: %s/%s timer=%d\n",
1063 	    __func__, _igmp_inet_buf, if_name(inm->inm_ifp),
1064 	    timer));
1065 
1066 	INM_LOCK_ASSERT_HELD(inm);
1067 
1068 	switch (inm->inm_state) {
1069 	case IGMP_NOT_MEMBER:
1070 	case IGMP_SILENT_MEMBER:
1071 		break;
1072 	case IGMP_REPORTING_MEMBER:
1073 		if (inm->inm_timer != 0 &&
1074 		    inm->inm_timer <= timer) {
1075 			IGMP_PRINTF(("%s: REPORTING and timer running, "
1076 			    "skipping.\n", __func__));
1077 			break;
1078 		}
1079 		OS_FALLTHROUGH;
1080 	case IGMP_SG_QUERY_PENDING_MEMBER:
1081 	case IGMP_G_QUERY_PENDING_MEMBER:
1082 	case IGMP_IDLE_MEMBER:
1083 	case IGMP_LAZY_MEMBER:
1084 	case IGMP_AWAKENING_MEMBER:
1085 		IGMP_PRINTF(("%s: ->REPORTING\n", __func__));
1086 		inm->inm_state = IGMP_REPORTING_MEMBER;
1087 		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1088 		break;
1089 	case IGMP_SLEEPING_MEMBER:
1090 		IGMP_PRINTF(("%s: ->AWAKENING\n", __func__));
1091 		inm->inm_state = IGMP_AWAKENING_MEMBER;
1092 		break;
1093 	case IGMP_LEAVING_MEMBER:
1094 		break;
1095 	}
1096 
1097 	return inm->inm_timer;
1098 }
1099 
1100 /*
1101  * Process a received IGMPv3 general, group-specific or
1102  * group-and-source-specific query.
1103  * Assumes m has already been pulled up to the full IGMP message length.
1104  * Return 0 if successful, otherwise an appropriate error code is returned.
1105  */
1106 static int
igmp_input_v3_query(struct ifnet * ifp,const struct ip * ip,struct igmpv3 * __indexable igmpv3)1107 igmp_input_v3_query(struct ifnet *ifp, const struct ip *ip,
1108     /*const*/ struct igmpv3 *__indexable igmpv3)
1109 {
1110 	struct igmp_ifinfo      *igi;
1111 	struct in_multi         *inm;
1112 	int                      is_general_query;
1113 	uint32_t                 maxresp, nsrc, qqi;
1114 	uint32_t                 timer;
1115 	uint8_t                  qrv;
1116 	struct igmp_tparams      itp = { .qpt = 0, .it = 0, .cst = 0, .sct = 0 };
1117 
1118 	IGMP_LOCK_ASSERT_NOTHELD();
1119 
1120 	is_general_query = 0;
1121 
1122 	os_log_debug(OS_LOG_DEFAULT, "%s: process v3 query on ifp %s\n", __func__,
1123 	    if_name(ifp));
1124 
1125 	maxresp = igmpv3->igmp_code;    /* in 1/10ths of a second */
1126 	if (maxresp >= 128) {
1127 		maxresp = IGMP_MANT(igmpv3->igmp_code) <<
1128 		    (IGMP_EXP(igmpv3->igmp_code) + 3);
1129 	}
1130 
1131 	/*
1132 	 * Robustness must never be less than 2 for on-wire IGMPv3.
1133 	 * FUTURE: Check if ifp has IGIF_LOOPBACK set, as we will make
1134 	 * an exception for interfaces whose IGMPv3 state changes
1135 	 * are redirected to loopback (e.g. MANET).
1136 	 */
1137 	qrv = IGMP_QRV(igmpv3->igmp_misc);
1138 	if (qrv < 2) {
1139 		IGMP_PRINTF(("%s: clamping qrv %d to %d\n", __func__,
1140 		    qrv, IGMP_RV_INIT));
1141 		qrv = IGMP_RV_INIT;
1142 	}
1143 
1144 	qqi = igmpv3->igmp_qqi;
1145 	if (qqi >= 128) {
1146 		qqi = IGMP_MANT(igmpv3->igmp_qqi) <<
1147 		    (IGMP_EXP(igmpv3->igmp_qqi) + 3);
1148 	}
1149 
1150 	timer = maxresp / IGMP_TIMER_SCALE;
1151 	if (timer == 0) {
1152 		timer = 1;
1153 	}
1154 
1155 	nsrc = ntohs(igmpv3->igmp_numsrc);
1156 
1157 	/*
1158 	 * Validate address fields and versions upfront before
1159 	 * accepting v3 query.
1160 	 */
1161 	if (in_nullhost(igmpv3->igmp_group)) {
1162 		/*
1163 		 * IGMPv3 General Query.
1164 		 *
1165 		 * General Queries SHOULD be directed to 224.0.0.1.
1166 		 * A general query with a source list has undefined
1167 		 * behaviour; discard it.
1168 		 */
1169 		IGMPSTAT_INC(igps_rcv_gen_queries);
1170 		if (!in_allhosts(ip->ip_dst) || nsrc > 0) {
1171 			IGMPSTAT_INC(igps_rcv_badqueries);
1172 			OIGMPSTAT_INC(igps_rcv_badqueries);
1173 			goto done;
1174 		}
1175 		is_general_query = 1;
1176 	} else {
1177 		/* Group or group-source specific query. */
1178 		if (nsrc == 0) {
1179 			IGMPSTAT_INC(igps_rcv_group_queries);
1180 		} else {
1181 			IGMPSTAT_INC(igps_rcv_gsr_queries);
1182 		}
1183 	}
1184 
1185 	igi = IGMP_IFINFO(ifp);
1186 	VERIFY(igi != NULL);
1187 
1188 	IGI_LOCK(igi);
1189 	if (igi->igi_flags & IGIF_LOOPBACK) {
1190 		os_log_debug(OS_LOG_DEFAULT, "%s: ignore v3 query on IGIF_LOOPBACK "
1191 		    "ifp %s\n", __func__,
1192 		    if_name(ifp));
1193 		IGI_UNLOCK(igi);
1194 		goto done;
1195 	}
1196 
1197 	/*
1198 	 * Discard the v3 query if we're in Compatibility Mode.
1199 	 * The RFC is not obviously worded that hosts need to stay in
1200 	 * compatibility mode until the Old Version Querier Present
1201 	 * timer expires.
1202 	 */
1203 	if (igi->igi_version != IGMP_VERSION_3) {
1204 		os_log_debug(OS_LOG_DEFAULT, "%s: ignore v3 query in v%d mode on "
1205 		    "ifp %s\n", __func__, igi->igi_version,
1206 		    if_name(ifp));
1207 		IGI_UNLOCK(igi);
1208 		goto done;
1209 	}
1210 
1211 	itp.qpt = igmp_set_version(igi, IGMP_VERSION_3);
1212 	igi->igi_rv = qrv;
1213 	igi->igi_qi = qqi;
1214 	igi->igi_qri = MAX(timer, IGMP_QRI_MIN);
1215 
1216 	IGMP_PRINTF(("%s: qrv %d qi %d qri %d\n", __func__, igi->igi_rv,
1217 	    igi->igi_qi, igi->igi_qri));
1218 
1219 	if (is_general_query) {
1220 		/*
1221 		 * Schedule a current-state report on this ifp for
1222 		 * all groups, possibly containing source lists.
1223 		 * If there is a pending General Query response
1224 		 * scheduled earlier than the selected delay, do
1225 		 * not schedule any other reports.
1226 		 * Otherwise, reset the interface timer.
1227 		 */
1228 		os_log_debug(OS_LOG_DEFAULT, "%s: process v3 general query on ifp %s\n",
1229 		    __func__, if_name(ifp));
1230 		if (igi->igi_v3_timer == 0 || igi->igi_v3_timer >= timer) {
1231 			itp.it = igi->igi_v3_timer = IGMP_RANDOM_DELAY(timer);
1232 		}
1233 		IGI_UNLOCK(igi);
1234 	} else {
1235 		IGI_UNLOCK(igi);
1236 		/*
1237 		 * Group-source-specific queries are throttled on
1238 		 * a per-group basis to defeat denial-of-service attempts.
1239 		 * Queries for groups we are not a member of on this
1240 		 * link are simply ignored.
1241 		 */
1242 		in_multihead_lock_shared();
1243 		IN_LOOKUP_MULTI(&igmpv3->igmp_group, ifp, inm);
1244 		in_multihead_lock_done();
1245 		if (inm == NULL) {
1246 			goto done;
1247 		}
1248 
1249 		INM_LOCK(inm);
1250 		if (nsrc > 0) {
1251 			if (!ratecheck(&inm->inm_lastgsrtv,
1252 			    &igmp_gsrdelay)) {
1253 				os_log_info(OS_LOG_DEFAULT, "%s: GS query throttled.\n",
1254 				    __func__);
1255 				IGMPSTAT_INC(igps_drop_gsr_queries);
1256 				INM_UNLOCK(inm);
1257 				INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1258 				goto done;
1259 			}
1260 		}
1261 		IGMP_INET_PRINTF(igmpv3->igmp_group,
1262 		    ("process v3 %s query on ifp 0x%llx(%s)\n", _igmp_inet_buf,
1263 		    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1264 		os_log_debug(OS_LOG_DEFAULT, "%s: process v3 query on ifp %s\n",
1265 		    __func__, if_name(ifp));
1266 		/*
1267 		 * If there is a pending General Query response
1268 		 * scheduled sooner than the selected delay, no
1269 		 * further report need be scheduled.
1270 		 * Otherwise, prepare to respond to the
1271 		 * group-specific or group-and-source query.
1272 		 */
1273 		IGI_LOCK(igi);
1274 		itp.it = igi->igi_v3_timer;
1275 		IGI_UNLOCK(igi);
1276 		if (itp.it == 0 || itp.it >= timer) {
1277 			(void) igmp_input_v3_group_query(inm, timer, igmpv3);
1278 			itp.cst = inm->inm_timer;
1279 		}
1280 		INM_UNLOCK(inm);
1281 		INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1282 	}
1283 done:
1284 	if (itp.it > 0) {
1285 		os_log_debug(OS_LOG_DEFAULT, "%s: v3 general query response scheduled in "
1286 		    "T+%d seconds on ifp %s\n", __func__, itp.it,
1287 		    if_name(ifp));
1288 	}
1289 	igmp_set_timeout(&itp);
1290 
1291 	return 0;
1292 }
1293 
1294 /*
1295  * Process a recieved IGMPv3 group-specific or group-and-source-specific
1296  * query.
1297  * Return <0 if any error occured. Currently this is ignored.
1298  */
1299 static int
igmp_input_v3_group_query(struct in_multi * inm,int timer,struct igmpv3 * __indexable igmpv3)1300 igmp_input_v3_group_query(struct in_multi *inm,
1301     int timer, /*const*/ struct igmpv3 *__indexable igmpv3)
1302 {
1303 	int                      retval;
1304 	uint16_t                 nsrc;
1305 
1306 	INM_LOCK_ASSERT_HELD(inm);
1307 
1308 	retval = 0;
1309 
1310 	switch (inm->inm_state) {
1311 	case IGMP_NOT_MEMBER:
1312 	case IGMP_SILENT_MEMBER:
1313 	case IGMP_SLEEPING_MEMBER:
1314 	case IGMP_LAZY_MEMBER:
1315 	case IGMP_AWAKENING_MEMBER:
1316 	case IGMP_IDLE_MEMBER:
1317 	case IGMP_LEAVING_MEMBER:
1318 		return retval;
1319 	case IGMP_REPORTING_MEMBER:
1320 	case IGMP_G_QUERY_PENDING_MEMBER:
1321 	case IGMP_SG_QUERY_PENDING_MEMBER:
1322 		break;
1323 	}
1324 
1325 	nsrc = ntohs(igmpv3->igmp_numsrc);
1326 
1327 	/*
1328 	 * Deal with group-specific queries upfront.
1329 	 * If any group query is already pending, purge any recorded
1330 	 * source-list state if it exists, and schedule a query response
1331 	 * for this group-specific query.
1332 	 */
1333 	if (nsrc == 0) {
1334 		if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER ||
1335 		    inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) {
1336 			inm_clear_recorded(inm);
1337 			timer = min(inm->inm_timer, timer);
1338 		}
1339 		inm->inm_state = IGMP_G_QUERY_PENDING_MEMBER;
1340 		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1341 		return retval;
1342 	}
1343 
1344 	/*
1345 	 * Deal with the case where a group-and-source-specific query has
1346 	 * been received but a group-specific query is already pending.
1347 	 */
1348 	if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER) {
1349 		timer = min(inm->inm_timer, timer);
1350 		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1351 		return retval;
1352 	}
1353 
1354 	/*
1355 	 * Finally, deal with the case where a group-and-source-specific
1356 	 * query has been received, where a response to a previous g-s-r
1357 	 * query exists, or none exists.
1358 	 * In this case, we need to parse the source-list which the Querier
1359 	 * has provided us with and check if we have any source list filter
1360 	 * entries at T1 for these sources. If we do not, there is no need
1361 	 * schedule a report and the query may be dropped.
1362 	 * If we do, we must record them and schedule a current-state
1363 	 * report for those sources.
1364 	 * FIXME: Handling source lists larger than 1 mbuf requires that
1365 	 * we pass the mbuf chain pointer down to this function, and use
1366 	 * m_getptr() to walk the chain.
1367 	 */
1368 	if (inm->inm_nsrc > 0) {
1369 		const struct in_addr    *ap;
1370 		int                      i, nrecorded;
1371 
1372 		ap = (const struct in_addr *)(igmpv3 + 1);
1373 		nrecorded = 0;
1374 		for (i = 0; i < nsrc; i++, ap++) {
1375 			retval = inm_record_source(inm, ap->s_addr);
1376 			if (retval < 0) {
1377 				break;
1378 			}
1379 			nrecorded += retval;
1380 		}
1381 		if (nrecorded > 0) {
1382 			os_log_debug(OS_LOG_DEFAULT, "%s: schedule response to SG query\n",
1383 			    __func__);
1384 			inm->inm_state = IGMP_SG_QUERY_PENDING_MEMBER;
1385 			inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1386 		}
1387 	}
1388 
1389 	return retval;
1390 }
1391 
1392 /*
1393  * Process a received IGMPv1 host membership report.
1394  *
1395  * NOTE: 0.0.0.0 workaround breaks const correctness.
1396  */
1397 static int
igmp_input_v1_report(struct ifnet * ifp,struct mbuf * m,struct ip * ip,struct igmp * igmp)1398 igmp_input_v1_report(struct ifnet *ifp, struct mbuf *m, /*const*/ struct ip *ip,
1399     /*const*/ struct igmp *igmp)
1400 {
1401 	struct in_ifaddr *ia;
1402 	struct in_multi *inm;
1403 
1404 	IGMPSTAT_INC(igps_rcv_reports);
1405 	OIGMPSTAT_INC(igps_rcv_reports);
1406 
1407 	if ((ifp->if_flags & IFF_LOOPBACK) ||
1408 	    (m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
1409 		return 0;
1410 	}
1411 
1412 	if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr) ||
1413 	    !in_hosteq(igmp->igmp_group, ip->ip_dst))) {
1414 		IGMPSTAT_INC(igps_rcv_badreports);
1415 		OIGMPSTAT_INC(igps_rcv_badreports);
1416 		return EINVAL;
1417 	}
1418 
1419 	/*
1420 	 * RFC 3376, Section 4.2.13, 9.2, 9.3:
1421 	 * Booting clients may use the source address 0.0.0.0. Some
1422 	 * IGMP daemons may not know how to use IP_RECVIF to determine
1423 	 * the interface upon which this message was received.
1424 	 * Replace 0.0.0.0 with the subnet address if told to do so.
1425 	 */
1426 	if (igmp_recvifkludge && in_nullhost(ip->ip_src)) {
1427 		IFP_TO_IA(ifp, ia);
1428 		if (ia != NULL) {
1429 			IFA_LOCK(&ia->ia_ifa);
1430 			ip->ip_src.s_addr = htonl(ia->ia_subnet);
1431 			IFA_UNLOCK(&ia->ia_ifa);
1432 			ifa_remref(&ia->ia_ifa);
1433 		}
1434 	}
1435 
1436 	IGMP_INET_PRINTF(igmp->igmp_group,
1437 	    ("process v1 report %s on ifp 0x%llx(%s)\n", _igmp_inet_buf,
1438 	    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1439 
1440 	/*
1441 	 * IGMPv1 report suppression.
1442 	 * If we are a member of this group, and our membership should be
1443 	 * reported, stop our group timer and transition to the 'lazy' state.
1444 	 */
1445 	in_multihead_lock_shared();
1446 	IN_LOOKUP_MULTI(&igmp->igmp_group, ifp, inm);
1447 	in_multihead_lock_done();
1448 	if (inm != NULL) {
1449 		struct igmp_ifinfo *igi;
1450 
1451 		INM_LOCK(inm);
1452 
1453 		igi = inm->inm_igi;
1454 		VERIFY(igi != NULL);
1455 
1456 		IGMPSTAT_INC(igps_rcv_ourreports);
1457 		OIGMPSTAT_INC(igps_rcv_ourreports);
1458 
1459 		/*
1460 		 * If we are in IGMPv3 host mode, do not allow the
1461 		 * other host's IGMPv1 report to suppress our reports
1462 		 * unless explicitly configured to do so.
1463 		 */
1464 		IGI_LOCK(igi);
1465 		if (igi->igi_version == IGMP_VERSION_3) {
1466 			if (igmp_legacysupp) {
1467 				igmp_v3_suppress_group_record(inm);
1468 			}
1469 			IGI_UNLOCK(igi);
1470 			INM_UNLOCK(inm);
1471 			INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1472 			return 0;
1473 		}
1474 
1475 		INM_LOCK_ASSERT_HELD(inm);
1476 		inm->inm_timer = 0;
1477 
1478 		switch (inm->inm_state) {
1479 		case IGMP_NOT_MEMBER:
1480 		case IGMP_SILENT_MEMBER:
1481 			break;
1482 		case IGMP_IDLE_MEMBER:
1483 		case IGMP_LAZY_MEMBER:
1484 		case IGMP_AWAKENING_MEMBER:
1485 			IGMP_INET_PRINTF(igmp->igmp_group,
1486 			    ("report suppressed for %s on ifp 0x%llx(%s)\n",
1487 			    _igmp_inet_buf,
1488 			    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1489 			OS_FALLTHROUGH;
1490 		case IGMP_SLEEPING_MEMBER:
1491 			inm->inm_state = IGMP_SLEEPING_MEMBER;
1492 			break;
1493 		case IGMP_REPORTING_MEMBER:
1494 			IGMP_INET_PRINTF(igmp->igmp_group,
1495 			    ("report suppressed for %s on ifp 0x%llx(%s)\n",
1496 			    _igmp_inet_buf,
1497 			    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1498 			if (igi->igi_version == IGMP_VERSION_1) {
1499 				inm->inm_state = IGMP_LAZY_MEMBER;
1500 			} else if (igi->igi_version == IGMP_VERSION_2) {
1501 				inm->inm_state = IGMP_SLEEPING_MEMBER;
1502 			}
1503 			break;
1504 		case IGMP_G_QUERY_PENDING_MEMBER:
1505 		case IGMP_SG_QUERY_PENDING_MEMBER:
1506 		case IGMP_LEAVING_MEMBER:
1507 			break;
1508 		}
1509 		IGI_UNLOCK(igi);
1510 		INM_UNLOCK(inm);
1511 		INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1512 	}
1513 
1514 	return 0;
1515 }
1516 
1517 /*
1518  * Process a received IGMPv2 host membership report.
1519  *
1520  * NOTE: 0.0.0.0 workaround breaks const correctness.
1521  */
1522 static int
igmp_input_v2_report(struct ifnet * ifp,struct mbuf * m,struct ip * ip,struct igmp * igmp)1523 igmp_input_v2_report(struct ifnet *ifp, struct mbuf *m, /*const*/ struct ip *ip,
1524     /*const*/ struct igmp *igmp)
1525 {
1526 	struct in_ifaddr *ia;
1527 	struct in_multi *inm;
1528 
1529 	/*
1530 	 * Make sure we don't hear our own membership report.  Fast
1531 	 * leave requires knowing that we are the only member of a
1532 	 * group.
1533 	 */
1534 	IFP_TO_IA(ifp, ia);
1535 	if (ia != NULL) {
1536 		IFA_LOCK(&ia->ia_ifa);
1537 		if (in_hosteq(ip->ip_src, IA_SIN(ia)->sin_addr)) {
1538 			IFA_UNLOCK(&ia->ia_ifa);
1539 			ifa_remref(&ia->ia_ifa);
1540 			return 0;
1541 		}
1542 		IFA_UNLOCK(&ia->ia_ifa);
1543 	}
1544 
1545 	IGMPSTAT_INC(igps_rcv_reports);
1546 	OIGMPSTAT_INC(igps_rcv_reports);
1547 
1548 	if ((ifp->if_flags & IFF_LOOPBACK) ||
1549 	    (m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
1550 		if (ia != NULL) {
1551 			ifa_remref(&ia->ia_ifa);
1552 		}
1553 		return 0;
1554 	}
1555 
1556 	if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr)) ||
1557 	    !in_hosteq(igmp->igmp_group, ip->ip_dst)) {
1558 		if (ia != NULL) {
1559 			ifa_remref(&ia->ia_ifa);
1560 		}
1561 		IGMPSTAT_INC(igps_rcv_badreports);
1562 		OIGMPSTAT_INC(igps_rcv_badreports);
1563 		return EINVAL;
1564 	}
1565 
1566 	/*
1567 	 * RFC 3376, Section 4.2.13, 9.2, 9.3:
1568 	 * Booting clients may use the source address 0.0.0.0. Some
1569 	 * IGMP daemons may not know how to use IP_RECVIF to determine
1570 	 * the interface upon which this message was received.
1571 	 * Replace 0.0.0.0 with the subnet address if told to do so.
1572 	 */
1573 	if (igmp_recvifkludge && in_nullhost(ip->ip_src)) {
1574 		if (ia != NULL) {
1575 			IFA_LOCK(&ia->ia_ifa);
1576 			ip->ip_src.s_addr = htonl(ia->ia_subnet);
1577 			IFA_UNLOCK(&ia->ia_ifa);
1578 		}
1579 	}
1580 	if (ia != NULL) {
1581 		ifa_remref(&ia->ia_ifa);
1582 	}
1583 
1584 	IGMP_INET_PRINTF(igmp->igmp_group,
1585 	    ("process v2 report %s on ifp 0x%llx(%s)\n", _igmp_inet_buf,
1586 	    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1587 	os_log_debug(OS_LOG_DEFAULT, "%s: process v2 report on ifp %s",
1588 	    __func__, if_name(ifp));
1589 
1590 	/*
1591 	 * IGMPv2 report suppression.
1592 	 * If we are a member of this group, and our membership should be
1593 	 * reported, and our group timer is pending or about to be reset,
1594 	 * stop our group timer by transitioning to the 'lazy' state.
1595 	 */
1596 	in_multihead_lock_shared();
1597 	IN_LOOKUP_MULTI(&igmp->igmp_group, ifp, inm);
1598 	in_multihead_lock_done();
1599 	if (inm != NULL) {
1600 		struct igmp_ifinfo *igi;
1601 
1602 		INM_LOCK(inm);
1603 		igi = inm->inm_igi;
1604 		VERIFY(igi != NULL);
1605 
1606 		IGMPSTAT_INC(igps_rcv_ourreports);
1607 		OIGMPSTAT_INC(igps_rcv_ourreports);
1608 
1609 		/*
1610 		 * If we are in IGMPv3 host mode, do not allow the
1611 		 * other host's IGMPv1 report to suppress our reports
1612 		 * unless explicitly configured to do so.
1613 		 */
1614 		IGI_LOCK(igi);
1615 		if (igi->igi_version == IGMP_VERSION_3) {
1616 			if (igmp_legacysupp) {
1617 				igmp_v3_suppress_group_record(inm);
1618 			}
1619 			IGI_UNLOCK(igi);
1620 			INM_UNLOCK(inm);
1621 			INM_REMREF(inm);
1622 			return 0;
1623 		}
1624 
1625 		inm->inm_timer = 0;
1626 
1627 		switch (inm->inm_state) {
1628 		case IGMP_NOT_MEMBER:
1629 		case IGMP_SILENT_MEMBER:
1630 		case IGMP_SLEEPING_MEMBER:
1631 			break;
1632 		case IGMP_REPORTING_MEMBER:
1633 		case IGMP_IDLE_MEMBER:
1634 		case IGMP_AWAKENING_MEMBER:
1635 			IGMP_INET_PRINTF(igmp->igmp_group,
1636 			    ("report suppressed for %s on ifp 0x%llx(%s)\n",
1637 			    _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(ifp),
1638 			    if_name(ifp)));
1639 			OS_FALLTHROUGH;
1640 		case IGMP_LAZY_MEMBER:
1641 			inm->inm_state = IGMP_LAZY_MEMBER;
1642 			break;
1643 		case IGMP_G_QUERY_PENDING_MEMBER:
1644 		case IGMP_SG_QUERY_PENDING_MEMBER:
1645 		case IGMP_LEAVING_MEMBER:
1646 			break;
1647 		}
1648 		IGI_UNLOCK(igi);
1649 		INM_UNLOCK(inm);
1650 		INM_REMREF(inm);
1651 	}
1652 
1653 	return 0;
1654 }
1655 
1656 void
igmp_input(struct mbuf * m,int off)1657 igmp_input(struct mbuf *m, int off)
1658 {
1659 	int iphlen;
1660 	struct ifnet *ifp;
1661 	struct igmp *igmp;
1662 	struct ip *ip;
1663 	int igmplen;
1664 	int minlen;
1665 	int queryver;
1666 
1667 	IGMP_PRINTF(("%s: called w/mbuf(0x%llx,%d)\n", __func__,
1668 	    (uint64_t)VM_KERNEL_ADDRPERM(m), off));
1669 
1670 	ifp = m->m_pkthdr.rcvif;
1671 
1672 	IGMPSTAT_INC(igps_rcv_total);
1673 	OIGMPSTAT_INC(igps_rcv_total);
1674 
1675 	/* Expect 32-bit aligned data pointer on strict-align platforms */
1676 	MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
1677 
1678 	ip = mtod(m, struct ip *);
1679 	iphlen = off;
1680 
1681 	/* By now, ip_len no longer contains the length of IP header */
1682 	igmplen = ip->ip_len;
1683 
1684 	/*
1685 	 * Validate lengths.
1686 	 */
1687 	if (igmplen < IGMP_MINLEN) {
1688 		IGMPSTAT_INC(igps_rcv_tooshort);
1689 		OIGMPSTAT_INC(igps_rcv_tooshort);
1690 		m_freem(m);
1691 		return;
1692 	}
1693 
1694 	/*
1695 	 * Always pullup to the minimum size for v1/v2 or v3
1696 	 * to amortize calls to m_pulldown().
1697 	 */
1698 	if (igmplen >= IGMP_V3_QUERY_MINLEN) {
1699 		minlen = IGMP_V3_QUERY_MINLEN;
1700 	} else {
1701 		minlen = IGMP_MINLEN;
1702 	}
1703 
1704 	/* A bit more expensive than M_STRUCT_GET, but ensures alignment */
1705 	M_STRUCT_GET0(igmp, struct igmp *, m, off, minlen);
1706 	if (igmp == NULL) {
1707 		IGMPSTAT_INC(igps_rcv_tooshort);
1708 		OIGMPSTAT_INC(igps_rcv_tooshort);
1709 		return;
1710 	}
1711 	/* N.B.: we assume the packet was correctly aligned in ip_input. */
1712 
1713 	/*
1714 	 * Validate checksum.
1715 	 */
1716 	m->m_data += iphlen;
1717 	m->m_len -= iphlen;
1718 	if (in_cksum(m, igmplen)) {
1719 		IGMPSTAT_INC(igps_rcv_badsum);
1720 		OIGMPSTAT_INC(igps_rcv_badsum);
1721 		m_freem(m);
1722 		return;
1723 	}
1724 	m->m_data -= iphlen;
1725 	m->m_len += iphlen;
1726 
1727 	/*
1728 	 * IGMP control traffic is link-scope, and must have a TTL of 1.
1729 	 * DVMRP traffic (e.g. mrinfo, mtrace) is an exception;
1730 	 * probe packets may come from beyond the LAN.
1731 	 */
1732 	if (igmp->igmp_type != IGMP_DVMRP && ip->ip_ttl != 1) {
1733 		IGMPSTAT_INC(igps_rcv_badttl);
1734 		m_freem(m);
1735 		return;
1736 	}
1737 
1738 	switch (igmp->igmp_type) {
1739 	case IGMP_HOST_MEMBERSHIP_QUERY:
1740 		if (igmplen == IGMP_MINLEN) {
1741 			if (igmp->igmp_code == 0) {
1742 				queryver = IGMP_VERSION_1;
1743 			} else {
1744 				queryver = IGMP_VERSION_2;
1745 			}
1746 		} else if (igmplen >= IGMP_V3_QUERY_MINLEN) {
1747 			queryver = IGMP_VERSION_3;
1748 		} else {
1749 			IGMPSTAT_INC(igps_rcv_tooshort);
1750 			OIGMPSTAT_INC(igps_rcv_tooshort);
1751 			m_freem(m);
1752 			return;
1753 		}
1754 
1755 		OIGMPSTAT_INC(igps_rcv_queries);
1756 
1757 		switch (queryver) {
1758 		case IGMP_VERSION_1:
1759 			IGMPSTAT_INC(igps_rcv_v1v2_queries);
1760 			if (!igmp_v1enable) {
1761 				break;
1762 			}
1763 			if (igmp_input_v1_query(ifp, ip, igmp) != 0) {
1764 				m_freem(m);
1765 				return;
1766 			}
1767 			break;
1768 
1769 		case IGMP_VERSION_2:
1770 			IGMPSTAT_INC(igps_rcv_v1v2_queries);
1771 			if (!igmp_v2enable) {
1772 				break;
1773 			}
1774 			if (igmp_input_v2_query(ifp, ip, igmp) != 0) {
1775 				m_freem(m);
1776 				return;
1777 			}
1778 			break;
1779 
1780 		case IGMP_VERSION_3: {
1781 			struct igmpv3 *igmpv3;
1782 			uint16_t igmpv3len;
1783 			uint16_t srclen;
1784 			int nsrc;
1785 
1786 			IGMPSTAT_INC(igps_rcv_v3_queries);
1787 			igmpv3 = (struct igmpv3 *)igmp;
1788 			/*
1789 			 * Validate length based on source count.
1790 			 */
1791 			nsrc = ntohs(igmpv3->igmp_numsrc);
1792 			/*
1793 			 * The max vaue of nsrc is limited by the
1794 			 * MTU of the network on which the datagram
1795 			 * is received
1796 			 */
1797 			if (nsrc < 0 || nsrc > IGMP_V3_QUERY_MAX_SRCS) {
1798 				IGMPSTAT_INC(igps_rcv_tooshort);
1799 				OIGMPSTAT_INC(igps_rcv_tooshort);
1800 				m_freem(m);
1801 				return;
1802 			}
1803 			srclen = sizeof(struct in_addr) * (uint16_t)nsrc;
1804 			if (igmplen < (IGMP_V3_QUERY_MINLEN + srclen)) {
1805 				IGMPSTAT_INC(igps_rcv_tooshort);
1806 				OIGMPSTAT_INC(igps_rcv_tooshort);
1807 				m_freem(m);
1808 				return;
1809 			}
1810 			igmpv3len = IGMP_V3_QUERY_MINLEN + srclen;
1811 			/*
1812 			 * A bit more expensive than M_STRUCT_GET,
1813 			 * but ensures alignment.
1814 			 */
1815 			M_STRUCT_GET0(igmpv3, struct igmpv3 *, m,
1816 			    off, igmpv3len);
1817 			if (igmpv3 == NULL) {
1818 				IGMPSTAT_INC(igps_rcv_tooshort);
1819 				OIGMPSTAT_INC(igps_rcv_tooshort);
1820 				return;
1821 			}
1822 			/*
1823 			 * N.B.: we assume the packet was correctly
1824 			 * aligned in ip_input.
1825 			 */
1826 			if (igmp_input_v3_query(ifp, ip, igmpv3) != 0) {
1827 				m_freem(m);
1828 				return;
1829 			}
1830 		}
1831 		break;
1832 		}
1833 		break;
1834 
1835 	case IGMP_v1_HOST_MEMBERSHIP_REPORT:
1836 		if (!igmp_v1enable) {
1837 			break;
1838 		}
1839 		if (igmp_input_v1_report(ifp, m, ip, igmp) != 0) {
1840 			m_freem(m);
1841 			return;
1842 		}
1843 		break;
1844 
1845 	case IGMP_v2_HOST_MEMBERSHIP_REPORT:
1846 		if (!igmp_v2enable) {
1847 			break;
1848 		}
1849 		if (!ip_checkrouteralert(m)) {
1850 			IGMPSTAT_INC(igps_rcv_nora);
1851 		}
1852 		if (igmp_input_v2_report(ifp, m, ip, igmp) != 0) {
1853 			m_freem(m);
1854 			return;
1855 		}
1856 		break;
1857 
1858 	case IGMP_v3_HOST_MEMBERSHIP_REPORT:
1859 		/*
1860 		 * Hosts do not need to process IGMPv3 membership reports,
1861 		 * as report suppression is no longer required.
1862 		 */
1863 		if (!ip_checkrouteralert(m)) {
1864 			IGMPSTAT_INC(igps_rcv_nora);
1865 		}
1866 		break;
1867 
1868 	default:
1869 		break;
1870 	}
1871 
1872 	IGMP_LOCK_ASSERT_NOTHELD();
1873 	/*
1874 	 * Pass all valid IGMP packets up to any process(es) listening on a
1875 	 * raw IGMP socket.
1876 	 */
1877 	rip_input(m, off);
1878 }
1879 
1880 /*
1881  * Schedule IGMP timer based on various parameters; caller must ensure that
1882  * lock ordering is maintained as this routine acquires IGMP global lock.
1883  */
1884 void
igmp_set_timeout(struct igmp_tparams * itp)1885 igmp_set_timeout(struct igmp_tparams *itp)
1886 {
1887 	IGMP_LOCK_ASSERT_NOTHELD();
1888 	VERIFY(itp != NULL);
1889 
1890 	if (itp->qpt != 0 || itp->it != 0 || itp->cst != 0 || itp->sct != 0) {
1891 		IGMP_LOCK();
1892 		if (itp->qpt != 0) {
1893 			querier_present_timers_running = 1;
1894 		}
1895 		if (itp->it != 0) {
1896 			interface_timers_running = 1;
1897 		}
1898 		if (itp->cst != 0) {
1899 			current_state_timers_running = 1;
1900 		}
1901 		if (itp->sct != 0) {
1902 			state_change_timers_running = 1;
1903 		}
1904 		if (itp->fast) {
1905 			igmp_sched_fast_timeout();
1906 		} else {
1907 			igmp_sched_timeout();
1908 		}
1909 		IGMP_UNLOCK();
1910 	}
1911 }
1912 
1913 void
igmp_set_fast_timeout(struct igmp_tparams * itp)1914 igmp_set_fast_timeout(struct igmp_tparams *itp)
1915 {
1916 	VERIFY(itp != NULL);
1917 	itp->fast = true;
1918 	igmp_set_timeout(itp);
1919 }
1920 
1921 /*
1922  * IGMP timer handler (per 1 second).
1923  */
1924 static void
igmp_timeout(thread_call_param_t arg0,thread_call_param_t arg1 __unused)1925 igmp_timeout(thread_call_param_t arg0, thread_call_param_t arg1 __unused)
1926 {
1927 	struct ifqueue           scq;   /* State-change packets */
1928 	struct ifqueue           qrq;   /* Query response packets */
1929 	struct ifnet            *ifp;
1930 	struct igmp_ifinfo      *igi;
1931 	struct in_multi         *inm;
1932 	unsigned int             loop = 0, uri_sec = 0;
1933 	SLIST_HEAD(, in_multi)  inm_dthead;
1934 	bool                     fast = arg0 != NULL;
1935 
1936 	SLIST_INIT(&inm_dthead);
1937 
1938 	/*
1939 	 * Update coarse-grained networking timestamp (in sec.); the idea
1940 	 * is to piggy-back on the timeout callout to update the counter
1941 	 * returnable via net_uptime().
1942 	 */
1943 	net_update_uptime();
1944 
1945 	IGMP_LOCK();
1946 
1947 	IGMP_PRINTF(("%s: qpt %d, it %d, cst %d, sct %d, fast %d\n", __func__,
1948 	    querier_present_timers_running, interface_timers_running,
1949 	    current_state_timers_running, state_change_timers_running,
1950 	    fast));
1951 
1952 	if (fast) {
1953 		/*
1954 		 * When running the fast timer, skip processing
1955 		 * of "querier present" timers since they are
1956 		 * based on 1-second intervals.
1957 		 */
1958 		goto skip_query_timers;
1959 	}
1960 	/*
1961 	 * IGMPv1/v2 querier present timer processing.
1962 	 */
1963 	if (querier_present_timers_running) {
1964 		querier_present_timers_running = 0;
1965 		LIST_FOREACH(igi, &igi_head, igi_link) {
1966 			IGI_LOCK(igi);
1967 			igmp_v1v2_process_querier_timers(igi);
1968 			if (igi->igi_v1_timer > 0 || igi->igi_v2_timer > 0) {
1969 				querier_present_timers_running = 1;
1970 			}
1971 			IGI_UNLOCK(igi);
1972 		}
1973 	}
1974 
1975 	/*
1976 	 * IGMPv3 General Query response timer processing.
1977 	 */
1978 	if (interface_timers_running) {
1979 		IGMP_PRINTF(("%s: interface timers running\n", __func__));
1980 		interface_timers_running = 0;
1981 		LIST_FOREACH(igi, &igi_head, igi_link) {
1982 			IGI_LOCK(igi);
1983 			if (igi->igi_version != IGMP_VERSION_3) {
1984 				IGI_UNLOCK(igi);
1985 				continue;
1986 			}
1987 			if (igi->igi_v3_timer == 0) {
1988 				/* Do nothing. */
1989 			} else if (--igi->igi_v3_timer == 0) {
1990 				if (igmp_v3_dispatch_general_query(igi) > 0) {
1991 					interface_timers_running = 1;
1992 				}
1993 			} else {
1994 				interface_timers_running = 1;
1995 			}
1996 			IGI_UNLOCK(igi);
1997 		}
1998 	}
1999 
2000 skip_query_timers:
2001 	if (!current_state_timers_running &&
2002 	    !state_change_timers_running) {
2003 		goto out_locked;
2004 	}
2005 
2006 	current_state_timers_running = 0;
2007 	state_change_timers_running = 0;
2008 
2009 	memset(&qrq, 0, sizeof(struct ifqueue));
2010 	qrq.ifq_maxlen = IGMP_MAX_G_GS_PACKETS;
2011 
2012 	memset(&scq, 0, sizeof(struct ifqueue));
2013 	scq.ifq_maxlen =  IGMP_MAX_STATE_CHANGE_PACKETS;
2014 
2015 	IGMP_PRINTF(("%s: state change timers running\n", __func__));
2016 
2017 	/*
2018 	 * IGMPv1/v2/v3 host report and state-change timer processing.
2019 	 * Note: Processing a v3 group timer may remove a node.
2020 	 */
2021 	LIST_FOREACH(igi, &igi_head, igi_link) {
2022 		struct in_multistep step;
2023 
2024 		IGI_LOCK(igi);
2025 		ifp = igi->igi_ifp;
2026 		loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
2027 		uri_sec = IGMP_RANDOM_DELAY(igi->igi_uri);
2028 		IGI_UNLOCK(igi);
2029 
2030 		in_multihead_lock_shared();
2031 		IN_FIRST_MULTI(step, inm);
2032 		while (inm != NULL) {
2033 			INM_LOCK(inm);
2034 			if (inm->inm_ifp != ifp) {
2035 				goto next;
2036 			}
2037 
2038 			IGI_LOCK(igi);
2039 			switch (igi->igi_version) {
2040 			case IGMP_VERSION_1:
2041 			case IGMP_VERSION_2:
2042 				igmp_v1v2_process_group_timer(inm,
2043 				    igi->igi_version);
2044 				break;
2045 			case IGMP_VERSION_3:
2046 				igmp_v3_process_group_timers(igi, &qrq,
2047 				    &scq, inm, uri_sec);
2048 				break;
2049 			}
2050 			IGI_UNLOCK(igi);
2051 next:
2052 			INM_UNLOCK(inm);
2053 			IN_NEXT_MULTI(step, inm);
2054 		}
2055 		in_multihead_lock_done();
2056 
2057 		IGI_LOCK(igi);
2058 		if (igi->igi_version == IGMP_VERSION_1 ||
2059 		    igi->igi_version == IGMP_VERSION_2) {
2060 			igmp_dispatch_queue(igi, &igi->igi_v2q, 0, loop);
2061 		} else if (igi->igi_version == IGMP_VERSION_3) {
2062 			IGI_UNLOCK(igi);
2063 			igmp_dispatch_queue(NULL, &qrq, 0, loop);
2064 			igmp_dispatch_queue(NULL, &scq, 0, loop);
2065 			VERIFY(qrq.ifq_len == 0);
2066 			VERIFY(scq.ifq_len == 0);
2067 			IGI_LOCK(igi);
2068 		}
2069 		/*
2070 		 * In case there are still any pending membership reports
2071 		 * which didn't get drained at version change time.
2072 		 */
2073 		IF_DRAIN(&igi->igi_v2q);
2074 		/*
2075 		 * Release all deferred inm records, and drain any locally
2076 		 * enqueued packets; do it even if the current IGMP version
2077 		 * for the link is no longer IGMPv3, in order to handle the
2078 		 * version change case.
2079 		 */
2080 		igmp_flush_relq(igi, (struct igmp_inm_relhead *)&inm_dthead);
2081 		IGI_UNLOCK(igi);
2082 
2083 		IF_DRAIN(&qrq);
2084 		IF_DRAIN(&scq);
2085 	}
2086 
2087 out_locked:
2088 	/* re-arm the timer if there's work to do */
2089 	if (fast) {
2090 		igmp_fast_timeout_run = false;
2091 	} else {
2092 		igmp_timeout_run = false;
2093 	}
2094 	igmp_sched_timeout();
2095 	IGMP_UNLOCK();
2096 
2097 	/* Now that we're dropped all locks, release detached records */
2098 	IGMP_REMOVE_DETACHED_INM(&inm_dthead);
2099 }
2100 
2101 static void
igmp_sched_timeout(void)2102 igmp_sched_timeout(void)
2103 {
2104 	static thread_call_t igmp_timeout_tcall;
2105 	uint64_t deadline = 0, leeway = 0;
2106 
2107 	IGMP_LOCK_ASSERT_HELD();
2108 	if (igmp_timeout_tcall == NULL) {
2109 		igmp_timeout_tcall =
2110 		    thread_call_allocate_with_options(igmp_timeout,
2111 		    NULL,
2112 		    THREAD_CALL_PRIORITY_KERNEL,
2113 		    THREAD_CALL_OPTIONS_ONCE);
2114 	}
2115 	if (!igmp_timeout_run &&
2116 	    (querier_present_timers_running || current_state_timers_running ||
2117 	    interface_timers_running || state_change_timers_running)) {
2118 		igmp_timeout_run = true;
2119 		clock_interval_to_deadline(igmp_timeout_delay, NSEC_PER_MSEC,
2120 		    &deadline);
2121 		clock_interval_to_absolutetime_interval(igmp_timeout_leeway,
2122 		    NSEC_PER_MSEC, &leeway);
2123 		thread_call_enter_delayed_with_leeway(igmp_timeout_tcall, NULL,
2124 		    deadline, leeway,
2125 		    THREAD_CALL_DELAY_LEEWAY);
2126 	}
2127 }
2128 
2129 static void
igmp_sched_fast_timeout(void)2130 igmp_sched_fast_timeout(void)
2131 {
2132 	static thread_call_t igmp_fast_timeout_tcall;
2133 
2134 	IGMP_LOCK_ASSERT_HELD();
2135 	if (igmp_fast_timeout_tcall == NULL) {
2136 		igmp_fast_timeout_tcall =
2137 		    thread_call_allocate_with_options(igmp_timeout,
2138 		    igmp_sched_fast_timeout,
2139 		    THREAD_CALL_PRIORITY_KERNEL,
2140 		    THREAD_CALL_OPTIONS_ONCE);
2141 	}
2142 	if (!igmp_fast_timeout_run &&
2143 	    (current_state_timers_running || state_change_timers_running)) {
2144 		igmp_fast_timeout_run = true;
2145 		thread_call_enter(igmp_fast_timeout_tcall);
2146 	}
2147 }
2148 
2149 /*
2150  * Appends an in_multi to the list to be released later.
2151  *
2152  * Caller must be holding igi_lock.
2153  */
2154 static void
igmp_append_relq(struct igmp_ifinfo * igi,struct in_multi * inm)2155 igmp_append_relq(struct igmp_ifinfo *igi, struct in_multi *inm)
2156 {
2157 	IGI_LOCK_ASSERT_HELD(igi);
2158 	if (inm->inm_in_nrele) {
2159 		os_log_debug(OS_LOG_DEFAULT, "%s: inm %llx already on relq ifp %s\n",
2160 		    __func__, (uint64_t)VM_KERNEL_ADDRPERM(inm),
2161 		    if_name(igi->igi_ifp));
2162 		return;
2163 	}
2164 	os_log_debug(OS_LOG_DEFAULT, "%s: adding inm %llx on relq ifp %s\n",
2165 	    __func__, (uint64_t)VM_KERNEL_ADDRPERM(inm),
2166 	    if_name(igi->igi_ifp));
2167 	inm->inm_in_nrele = true;
2168 	SLIST_INSERT_HEAD(&igi->igi_relinmhead, inm, inm_nrele);
2169 }
2170 
2171 /*
2172  * Free the in_multi reference(s) for this IGMP lifecycle.
2173  *
2174  * Caller must be holding igi_lock.
2175  */
2176 static void
igmp_flush_relq(struct igmp_ifinfo * igi,struct igmp_inm_relhead * inm_dthead)2177 igmp_flush_relq(struct igmp_ifinfo *igi, struct igmp_inm_relhead *inm_dthead)
2178 {
2179 	struct in_multi *inm;
2180 	SLIST_HEAD(, in_multi) temp_relinmhead;
2181 
2182 	/*
2183 	 * Before dropping the igi_lock, copy all the items in the
2184 	 * release list to a temporary list to prevent other threads
2185 	 * from changing igi_relinmhead while we are traversing it.
2186 	 */
2187 	IGI_LOCK_ASSERT_HELD(igi);
2188 	SLIST_INIT(&temp_relinmhead);
2189 	while ((inm = SLIST_FIRST(&igi->igi_relinmhead)) != NULL) {
2190 		SLIST_REMOVE_HEAD(&igi->igi_relinmhead, inm_nrele);
2191 		SLIST_INSERT_HEAD(&temp_relinmhead, inm, inm_nrele);
2192 	}
2193 	IGI_UNLOCK(igi);
2194 	in_multihead_lock_exclusive();
2195 	while ((inm = SLIST_FIRST(&temp_relinmhead)) != NULL) {
2196 		int lastref;
2197 
2198 		SLIST_REMOVE_HEAD(&temp_relinmhead, inm_nrele);
2199 		INM_LOCK(inm);
2200 		os_log_debug(OS_LOG_DEFAULT, "%s: flushing %llx on relq ifp %s",
2201 		    __func__,
2202 		    (uint64_t)VM_KERNEL_ADDRPERM(inm),
2203 		    if_name(inm->inm_ifp));
2204 		VERIFY(inm->inm_in_nrele == true);
2205 		inm->inm_in_nrele = false;
2206 		VERIFY(inm->inm_nrelecnt != 0);
2207 		inm->inm_nrelecnt--;
2208 		lastref = in_multi_detach(inm);
2209 		VERIFY(!lastref || (!(inm->inm_debug & IFD_ATTACHED) &&
2210 		    inm->inm_reqcnt == 0));
2211 		INM_UNLOCK(inm);
2212 		/* from igi_relinmhead */
2213 		INM_REMREF(inm);
2214 		/* from in_multihead list */
2215 		if (lastref) {
2216 			/*
2217 			 * Defer releasing our final reference, as we
2218 			 * are holding the IGMP lock at this point, and
2219 			 * we could end up with locking issues later on
2220 			 * (while issuing SIOCDELMULTI) when this is the
2221 			 * final reference count.  Let the caller do it
2222 			 * when it is safe.
2223 			 */
2224 			IGMP_ADD_DETACHED_INM(inm_dthead, inm);
2225 		}
2226 	}
2227 	in_multihead_lock_done();
2228 	IGI_LOCK(igi);
2229 }
2230 
2231 /*
2232  * Update host report group timer for IGMPv1/v2.
2233  * Will update the global pending timer flags.
2234  */
2235 static void
igmp_v1v2_process_group_timer(struct in_multi * inm,const int igmp_version)2236 igmp_v1v2_process_group_timer(struct in_multi *inm, const int igmp_version)
2237 {
2238 	int report_timer_expired;
2239 
2240 	IGMP_LOCK_ASSERT_HELD();
2241 	INM_LOCK_ASSERT_HELD(inm);
2242 	IGI_LOCK_ASSERT_HELD(inm->inm_igi);
2243 
2244 	if (inm->inm_timer == 0) {
2245 		report_timer_expired = 0;
2246 	} else if (--inm->inm_timer == 0) {
2247 		report_timer_expired = 1;
2248 	} else {
2249 		current_state_timers_running = 1;
2250 		/* caller will schedule timer */
2251 		return;
2252 	}
2253 
2254 	switch (inm->inm_state) {
2255 	case IGMP_NOT_MEMBER:
2256 	case IGMP_SILENT_MEMBER:
2257 	case IGMP_IDLE_MEMBER:
2258 	case IGMP_LAZY_MEMBER:
2259 	case IGMP_SLEEPING_MEMBER:
2260 	case IGMP_AWAKENING_MEMBER:
2261 		break;
2262 	case IGMP_REPORTING_MEMBER:
2263 		if (report_timer_expired) {
2264 			inm->inm_state = IGMP_IDLE_MEMBER;
2265 			(void) igmp_v1v2_queue_report(inm,
2266 			    (igmp_version == IGMP_VERSION_2) ?
2267 			    IGMP_v2_HOST_MEMBERSHIP_REPORT :
2268 			    IGMP_v1_HOST_MEMBERSHIP_REPORT);
2269 			INM_LOCK_ASSERT_HELD(inm);
2270 			IGI_LOCK_ASSERT_HELD(inm->inm_igi);
2271 		}
2272 		break;
2273 	case IGMP_G_QUERY_PENDING_MEMBER:
2274 	case IGMP_SG_QUERY_PENDING_MEMBER:
2275 	case IGMP_LEAVING_MEMBER:
2276 		break;
2277 	}
2278 }
2279 
2280 /*
2281  * Update a group's timers for IGMPv3.
2282  * Will update the global pending timer flags.
2283  * Note: Unlocked read from igi.
2284  */
2285 static void
igmp_v3_process_group_timers(struct igmp_ifinfo * igi,struct ifqueue * qrq,struct ifqueue * scq,struct in_multi * inm,const unsigned int uri_sec)2286 igmp_v3_process_group_timers(struct igmp_ifinfo *igi,
2287     struct ifqueue *qrq, struct ifqueue *scq,
2288     struct in_multi *inm, const unsigned int uri_sec)
2289 {
2290 	int query_response_timer_expired;
2291 	int state_change_retransmit_timer_expired;
2292 
2293 	IGMP_LOCK_ASSERT_HELD();
2294 	INM_LOCK_ASSERT_HELD(inm);
2295 	IGI_LOCK_ASSERT_HELD(igi);
2296 	VERIFY(igi == inm->inm_igi);
2297 
2298 	query_response_timer_expired = 0;
2299 	state_change_retransmit_timer_expired = 0;
2300 
2301 	/*
2302 	 * During a transition from v1/v2 compatibility mode back to v3,
2303 	 * a group record in REPORTING state may still have its group
2304 	 * timer active. This is a no-op in this function; it is easier
2305 	 * to deal with it here than to complicate the timeout path.
2306 	 */
2307 	if (inm->inm_timer == 0) {
2308 		query_response_timer_expired = 0;
2309 	} else if (--inm->inm_timer == 0) {
2310 		query_response_timer_expired = 1;
2311 	} else {
2312 		current_state_timers_running = 1;
2313 		/* caller will schedule timer */
2314 	}
2315 
2316 	if (inm->inm_sctimer == 0) {
2317 		state_change_retransmit_timer_expired = 0;
2318 	} else if (--inm->inm_sctimer == 0) {
2319 		state_change_retransmit_timer_expired = 1;
2320 	} else {
2321 		state_change_timers_running = 1;
2322 		/* caller will schedule timer */
2323 	}
2324 
2325 	/* We are in timer callback, so be quick about it. */
2326 	if (!state_change_retransmit_timer_expired &&
2327 	    !query_response_timer_expired) {
2328 		return;
2329 	}
2330 
2331 	switch (inm->inm_state) {
2332 	case IGMP_NOT_MEMBER:
2333 	case IGMP_SILENT_MEMBER:
2334 	case IGMP_SLEEPING_MEMBER:
2335 	case IGMP_LAZY_MEMBER:
2336 	case IGMP_AWAKENING_MEMBER:
2337 	case IGMP_IDLE_MEMBER:
2338 		break;
2339 	case IGMP_G_QUERY_PENDING_MEMBER:
2340 	case IGMP_SG_QUERY_PENDING_MEMBER:
2341 		/*
2342 		 * Respond to a previously pending Group-Specific
2343 		 * or Group-and-Source-Specific query by enqueueing
2344 		 * the appropriate Current-State report for
2345 		 * immediate transmission.
2346 		 */
2347 		if (query_response_timer_expired) {
2348 			int retval;
2349 
2350 			retval = igmp_v3_enqueue_group_record(qrq, inm, 0, 1,
2351 			    (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER));
2352 			IGMP_PRINTF(("%s: enqueue record = %d\n",
2353 			    __func__, retval));
2354 			inm->inm_state = IGMP_REPORTING_MEMBER;
2355 			/* XXX Clear recorded sources for next time. */
2356 			inm_clear_recorded(inm);
2357 		}
2358 		OS_FALLTHROUGH;
2359 	case IGMP_REPORTING_MEMBER:
2360 	case IGMP_LEAVING_MEMBER:
2361 		if (state_change_retransmit_timer_expired) {
2362 			/*
2363 			 * State-change retransmission timer fired.
2364 			 * If there are any further pending retransmissions,
2365 			 * set the global pending state-change flag, and
2366 			 * reset the timer.
2367 			 */
2368 			if (--inm->inm_scrv > 0) {
2369 				inm->inm_sctimer = (uint16_t)uri_sec;
2370 				state_change_timers_running = 1;
2371 				/* caller will schedule timer */
2372 			}
2373 			/*
2374 			 * Retransmit the previously computed state-change
2375 			 * report. If there are no further pending
2376 			 * retransmissions, the mbuf queue will be consumed.
2377 			 * Update T0 state to T1 as we have now sent
2378 			 * a state-change.
2379 			 */
2380 			(void) igmp_v3_merge_state_changes(inm, scq);
2381 
2382 			inm_commit(inm);
2383 			IGMP_INET_PRINTF(inm->inm_addr,
2384 			    ("%s: T1 -> T0 for %s/%s\n", __func__,
2385 			    _igmp_inet_buf, if_name(inm->inm_ifp)));
2386 
2387 			/*
2388 			 * If we are leaving the group for good, make sure
2389 			 * we release IGMP's reference to it.
2390 			 * This release must be deferred using a SLIST,
2391 			 * as we are called from a loop which traverses
2392 			 * the in_multihead list.
2393 			 */
2394 			if (inm->inm_state == IGMP_LEAVING_MEMBER &&
2395 			    inm->inm_scrv == 0) {
2396 				inm->inm_state = IGMP_NOT_MEMBER;
2397 				/*
2398 				 * A reference has already been held in
2399 				 * igmp_final_leave() for this inm, so
2400 				 * no need to hold another one.  We also
2401 				 * bumped up its request count then, so
2402 				 * that it stays in in_multihead.  Both
2403 				 * of them will be released when it is
2404 				 * dequeued later on.
2405 				 */
2406 				VERIFY(inm->inm_nrelecnt != 0);
2407 				igmp_append_relq(igi, inm);
2408 			}
2409 		}
2410 		break;
2411 	}
2412 }
2413 
2414 /*
2415  * Suppress a group's pending response to a group or source/group query.
2416  *
2417  * Do NOT suppress state changes. This leads to IGMPv3 inconsistency.
2418  * Do NOT update ST1/ST0 as this operation merely suppresses
2419  * the currently pending group record.
2420  * Do NOT suppress the response to a general query. It is possible but
2421  * it would require adding another state or flag.
2422  */
2423 static void
igmp_v3_suppress_group_record(struct in_multi * inm)2424 igmp_v3_suppress_group_record(struct in_multi *inm)
2425 {
2426 	INM_LOCK_ASSERT_HELD(inm);
2427 	IGI_LOCK_ASSERT_HELD(inm->inm_igi);
2428 
2429 	VERIFY(inm->inm_igi->igi_version == IGMP_VERSION_3);
2430 
2431 	if (inm->inm_state != IGMP_G_QUERY_PENDING_MEMBER &&
2432 	    inm->inm_state != IGMP_SG_QUERY_PENDING_MEMBER) {
2433 		return;
2434 	}
2435 
2436 	if (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) {
2437 		inm_clear_recorded(inm);
2438 	}
2439 
2440 	inm->inm_timer = 0;
2441 	inm->inm_state = IGMP_REPORTING_MEMBER;
2442 }
2443 
2444 /*
2445  * Switch to a different IGMP version on the given interface,
2446  * as per Section 7.2.1.
2447  */
2448 static uint32_t
igmp_set_version(struct igmp_ifinfo * igi,const int igmp_version)2449 igmp_set_version(struct igmp_ifinfo *igi, const int igmp_version)
2450 {
2451 	int old_version_timer;
2452 
2453 	IGI_LOCK_ASSERT_HELD(igi);
2454 
2455 	os_log(OS_LOG_DEFAULT, "%s: switching to v%d on ifp %s\n", __func__,
2456 	    igmp_version, if_name(igi->igi_ifp));
2457 
2458 	if (igmp_version == IGMP_VERSION_1 || igmp_version == IGMP_VERSION_2) {
2459 		/*
2460 		 * Compute the "Older Version Querier Present" timer as per
2461 		 * Section 8.12, in seconds.
2462 		 */
2463 		old_version_timer = igi->igi_rv * igi->igi_qi + igi->igi_qri;
2464 
2465 		if (igmp_version == IGMP_VERSION_1) {
2466 			igi->igi_v1_timer = old_version_timer;
2467 			igi->igi_v2_timer = 0;
2468 		} else if (igmp_version == IGMP_VERSION_2) {
2469 			igi->igi_v1_timer = 0;
2470 			igi->igi_v2_timer = old_version_timer;
2471 		}
2472 	}
2473 
2474 	if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) {
2475 		if (igi->igi_version != IGMP_VERSION_2) {
2476 			igmp_v3_cancel_link_timers(igi);
2477 			igi->igi_version = IGMP_VERSION_2;
2478 		}
2479 	} else if (igi->igi_v1_timer > 0) {
2480 		if (igi->igi_version != IGMP_VERSION_1) {
2481 			igmp_v3_cancel_link_timers(igi);
2482 			igi->igi_version = IGMP_VERSION_1;
2483 		}
2484 	}
2485 
2486 	IGI_LOCK_ASSERT_HELD(igi);
2487 
2488 	return MAX(igi->igi_v1_timer, igi->igi_v2_timer);
2489 }
2490 
2491 /*
2492  * Cancel pending IGMPv3 timers for the given link and all groups
2493  * joined on it; state-change, general-query, and group-query timers.
2494  *
2495  * Only ever called on a transition from v3 to Compatibility mode. Kill
2496  * the timers stone dead (this may be expensive for large N groups), they
2497  * will be restarted if Compatibility Mode deems that they must be due to
2498  * query processing.
2499  */
2500 static void
igmp_v3_cancel_link_timers(struct igmp_ifinfo * igi)2501 igmp_v3_cancel_link_timers(struct igmp_ifinfo *igi)
2502 {
2503 	struct ifnet            *ifp;
2504 	struct in_multi         *inm;
2505 	struct in_multistep     step;
2506 
2507 	IGI_LOCK_ASSERT_HELD(igi);
2508 
2509 	IGMP_PRINTF(("%s: cancel v3 timers on ifp 0x%llx(%s)\n", __func__,
2510 	    (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp), if_name(igi->igi_ifp)));
2511 
2512 	/*
2513 	 * Stop the v3 General Query Response on this link stone dead.
2514 	 * If timer is woken up due to interface_timers_running,
2515 	 * the flag will be cleared if there are no pending link timers.
2516 	 */
2517 	igi->igi_v3_timer = 0;
2518 
2519 	/*
2520 	 * Now clear the current-state and state-change report timers
2521 	 * for all memberships scoped to this link.
2522 	 */
2523 	ifp = igi->igi_ifp;
2524 	IGI_UNLOCK(igi);
2525 
2526 	in_multihead_lock_shared();
2527 	IN_FIRST_MULTI(step, inm);
2528 	while (inm != NULL) {
2529 		INM_LOCK(inm);
2530 		if (inm->inm_ifp != ifp && inm->inm_igi != igi) {
2531 			goto next;
2532 		}
2533 
2534 		switch (inm->inm_state) {
2535 		case IGMP_NOT_MEMBER:
2536 		case IGMP_SILENT_MEMBER:
2537 		case IGMP_IDLE_MEMBER:
2538 		case IGMP_LAZY_MEMBER:
2539 		case IGMP_SLEEPING_MEMBER:
2540 		case IGMP_AWAKENING_MEMBER:
2541 			/*
2542 			 * These states are either not relevant in v3 mode,
2543 			 * or are unreported. Do nothing.
2544 			 */
2545 			break;
2546 		case IGMP_LEAVING_MEMBER:
2547 			/*
2548 			 * If we are leaving the group and switching to
2549 			 * compatibility mode, we need to release the final
2550 			 * reference held for issuing the INCLUDE {}, and
2551 			 * transition to REPORTING to ensure the host leave
2552 			 * message is sent upstream to the old querier --
2553 			 * transition to NOT would lose the leave and race.
2554 			 * During igmp_final_leave(), we bumped up both the
2555 			 * request and reference counts.  Since we cannot
2556 			 * call in_multi_detach() here, defer this task to
2557 			 * the timer routine.
2558 			 */
2559 			VERIFY(inm->inm_nrelecnt != 0);
2560 			IGI_LOCK(igi);
2561 			igmp_append_relq(igi, inm);
2562 			IGI_UNLOCK(igi);
2563 			OS_FALLTHROUGH;
2564 		case IGMP_G_QUERY_PENDING_MEMBER:
2565 		case IGMP_SG_QUERY_PENDING_MEMBER:
2566 			inm_clear_recorded(inm);
2567 			OS_FALLTHROUGH;
2568 		case IGMP_REPORTING_MEMBER:
2569 			inm->inm_state = IGMP_REPORTING_MEMBER;
2570 			break;
2571 		}
2572 		/*
2573 		 * Always clear state-change and group report timers.
2574 		 * Free any pending IGMPv3 state-change records.
2575 		 */
2576 		inm->inm_sctimer = 0;
2577 		inm->inm_timer = 0;
2578 		IF_DRAIN(&inm->inm_scq);
2579 next:
2580 		INM_UNLOCK(inm);
2581 		IN_NEXT_MULTI(step, inm);
2582 	}
2583 	in_multihead_lock_done();
2584 
2585 	IGI_LOCK(igi);
2586 }
2587 
2588 /*
2589  * Update the Older Version Querier Present timers for a link.
2590  * See Section 7.2.1 of RFC 3376.
2591  */
2592 static void
igmp_v1v2_process_querier_timers(struct igmp_ifinfo * igi)2593 igmp_v1v2_process_querier_timers(struct igmp_ifinfo *igi)
2594 {
2595 	IGI_LOCK_ASSERT_HELD(igi);
2596 
2597 	if (igi->igi_v1_timer == 0 && igi->igi_v2_timer == 0) {
2598 		/*
2599 		 * IGMPv1 and IGMPv2 Querier Present timers expired.
2600 		 *
2601 		 * Revert to IGMPv3.
2602 		 */
2603 		if (igi->igi_version != IGMP_VERSION_3) {
2604 			os_log(OS_LOG_DEFAULT, "%s: transition from v%d->v%d "
2605 			    "on %s\n", __func__,
2606 			    igi->igi_version, IGMP_VERSION_3,
2607 			    if_name(igi->igi_ifp));
2608 			igi->igi_version = IGMP_VERSION_3;
2609 			IF_DRAIN(&igi->igi_v2q);
2610 		}
2611 	} else if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) {
2612 		/*
2613 		 * IGMPv1 Querier Present timer expired,
2614 		 * IGMPv2 Querier Present timer running.
2615 		 * If IGMPv2 was disabled since last timeout,
2616 		 * revert to IGMPv3.
2617 		 * If IGMPv2 is enabled, revert to IGMPv2.
2618 		 */
2619 		if (!igmp_v2enable) {
2620 			os_log(OS_LOG_DEFAULT, "%s: transition from v%d->v%d "
2621 			    "on %s\n", __func__,
2622 			    igi->igi_version, IGMP_VERSION_3,
2623 			    if_name(igi->igi_ifp));
2624 			igi->igi_v2_timer = 0;
2625 			igi->igi_version = IGMP_VERSION_3;
2626 			IF_DRAIN(&igi->igi_v2q);
2627 		} else {
2628 			--igi->igi_v2_timer;
2629 			if (igi->igi_version != IGMP_VERSION_2) {
2630 				os_log(OS_LOG_DEFAULT, "%s: transition from v%d->v%d "
2631 				    "on %s\n", __func__,
2632 				    igi->igi_version, IGMP_VERSION_2,
2633 				    if_name(igi->igi_ifp));
2634 				IF_DRAIN(&igi->igi_gq);
2635 				igmp_v3_cancel_link_timers(igi);
2636 				igi->igi_version = IGMP_VERSION_2;
2637 			}
2638 		}
2639 	} else if (igi->igi_v1_timer > 0) {
2640 		/*
2641 		 * IGMPv1 Querier Present timer running.
2642 		 * Stop IGMPv2 timer if running.
2643 		 *
2644 		 * If IGMPv1 was disabled since last timeout,
2645 		 * revert to IGMPv3.
2646 		 * If IGMPv1 is enabled, reset IGMPv2 timer if running.
2647 		 */
2648 		if (!igmp_v1enable) {
2649 			os_log(OS_LOG_DEFAULT, "%s: transition from v%d->v%d "
2650 			    "on %s\n", __func__,
2651 			    igi->igi_version, IGMP_VERSION_3,
2652 			    if_name(igi->igi_ifp));
2653 			igi->igi_v1_timer = 0;
2654 			igi->igi_version = IGMP_VERSION_3;
2655 			IF_DRAIN(&igi->igi_v2q);
2656 		} else {
2657 			--igi->igi_v1_timer;
2658 		}
2659 		if (igi->igi_v2_timer > 0) {
2660 			IGMP_PRINTF(("%s: cancel v2 timer on 0x%llx(%s)\n",
2661 			    __func__,
2662 			    (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2663 			    if_name(igi->igi_ifp)));
2664 			igi->igi_v2_timer = 0;
2665 		}
2666 	}
2667 }
2668 
2669 /*
2670  * Dispatch an IGMPv1/v2 host report or leave message.
2671  * These are always small enough to fit inside a single mbuf.
2672  */
2673 static int
igmp_v1v2_queue_report(struct in_multi * inm,const int type)2674 igmp_v1v2_queue_report(struct in_multi *inm, const int type)
2675 {
2676 	struct ifnet            *ifp;
2677 	struct igmp             *igmp;
2678 	struct ip               *ip;
2679 	struct mbuf             *m;
2680 	int                     error = 0;
2681 
2682 	INM_LOCK_ASSERT_HELD(inm);
2683 	IGI_LOCK_ASSERT_HELD(inm->inm_igi);
2684 
2685 	ifp = inm->inm_ifp;
2686 
2687 	MGETHDR(m, M_DONTWAIT, MT_DATA);
2688 	if (m == NULL) {
2689 		return ENOMEM;
2690 	}
2691 	MH_ALIGN(m, sizeof(struct ip) + sizeof(struct igmp));
2692 
2693 	m->m_pkthdr.len = sizeof(struct ip) + sizeof(struct igmp);
2694 
2695 	m->m_data += sizeof(struct ip);
2696 	m->m_len = sizeof(struct igmp);
2697 
2698 	igmp = mtod(m, struct igmp *);
2699 	igmp->igmp_type = (u_char)type;
2700 	igmp->igmp_code = 0;
2701 	igmp->igmp_group = inm->inm_addr;
2702 	igmp->igmp_cksum = 0;
2703 	igmp->igmp_cksum = in_cksum(m, sizeof(struct igmp));
2704 
2705 	m->m_data -= sizeof(struct ip);
2706 	m->m_len += sizeof(struct ip);
2707 
2708 	ip = mtod(m, struct ip *);
2709 	ip->ip_tos = 0;
2710 	ip->ip_len = sizeof(struct ip) + sizeof(struct igmp);
2711 	ip->ip_off = 0;
2712 	ip->ip_p = IPPROTO_IGMP;
2713 	ip->ip_src.s_addr = INADDR_ANY;
2714 
2715 	if (type == IGMP_HOST_LEAVE_MESSAGE) {
2716 		ip->ip_dst.s_addr = htonl(INADDR_ALLRTRS_GROUP);
2717 	} else {
2718 		ip->ip_dst = inm->inm_addr;
2719 	}
2720 
2721 	igmp_save_context(m, ifp);
2722 
2723 	m->m_flags |= M_IGMPV2;
2724 	if (inm->inm_igi->igi_flags & IGIF_LOOPBACK) {
2725 		m->m_flags |= M_IGMP_LOOP;
2726 	}
2727 
2728 	/*
2729 	 * Due to the fact that at this point we are possibly holding
2730 	 * in_multihead_lock in shared or exclusive mode, we can't call
2731 	 * igmp_sendpkt() here since that will eventually call ip_output(),
2732 	 * which will try to lock in_multihead_lock and cause a deadlock.
2733 	 * Instead we defer the work to the igmp_timeout() thread, thus
2734 	 * avoiding unlocking in_multihead_lock here.
2735 	 */
2736 	if (IF_QFULL(&inm->inm_igi->igi_v2q)) {
2737 		os_log_error(OS_LOG_DEFAULT,
2738 		    "%s: v1 / v2 outbound queue full on %s\n",
2739 		    __func__, if_name(ifp));
2740 		error = ENOMEM;
2741 		m_freem(m);
2742 	} else {
2743 		IF_ENQUEUE(&inm->inm_igi->igi_v2q, m);
2744 		VERIFY(error == 0);
2745 	}
2746 	return error;
2747 }
2748 
2749 /*
2750  * Process a state change from the upper layer for the given IPv4 group.
2751  *
2752  * Each socket holds a reference on the in_multi in its own ip_moptions.
2753  * The socket layer will have made the necessary updates to the group
2754  * state, it is now up to IGMP to issue a state change report if there
2755  * has been any change between T0 (when the last state-change was issued)
2756  * and T1 (now).
2757  *
2758  * We use the IGMPv3 state machine at group level. The IGMP module
2759  * however makes the decision as to which IGMP protocol version to speak.
2760  * A state change *from* INCLUDE {} always means an initial join.
2761  * A state change *to* INCLUDE {} always means a final leave.
2762  *
2763  * FUTURE: If IGIF_V3LITE is enabled for this interface, then we can
2764  * save ourselves a bunch of work; any exclusive mode groups need not
2765  * compute source filter lists.
2766  */
2767 int
igmp_change_state(struct in_multi * inm,struct igmp_tparams * itp)2768 igmp_change_state(struct in_multi *inm, struct igmp_tparams *itp)
2769 {
2770 	struct igmp_ifinfo *igi;
2771 	struct ifnet *ifp;
2772 	int error = 0;
2773 
2774 	VERIFY(itp != NULL);
2775 	bzero(itp, sizeof(*itp));
2776 
2777 	INM_LOCK_ASSERT_HELD(inm);
2778 	VERIFY(inm->inm_igi != NULL);
2779 	IGI_LOCK_ASSERT_NOTHELD(inm->inm_igi);
2780 
2781 	/*
2782 	 * Try to detect if the upper layer just asked us to change state
2783 	 * for an interface which has now gone away.
2784 	 */
2785 	VERIFY(inm->inm_ifma != NULL);
2786 	ifp = inm->inm_ifma->ifma_ifp;
2787 	/*
2788 	 * Sanity check that netinet's notion of ifp is the same as net's.
2789 	 */
2790 	VERIFY(inm->inm_ifp == ifp);
2791 
2792 	igi = IGMP_IFINFO(ifp);
2793 	VERIFY(igi != NULL);
2794 
2795 	/*
2796 	 * If we detect a state transition to or from MCAST_UNDEFINED
2797 	 * for this group, then we are starting or finishing an IGMP
2798 	 * life cycle for this group.
2799 	 */
2800 	if (inm->inm_st[1].iss_fmode != inm->inm_st[0].iss_fmode) {
2801 		IGMP_PRINTF(("%s: inm transition %d -> %d\n", __func__,
2802 		    inm->inm_st[0].iss_fmode, inm->inm_st[1].iss_fmode));
2803 		if (inm->inm_st[0].iss_fmode == MCAST_UNDEFINED) {
2804 			IGMP_PRINTF(("%s: initial join\n", __func__));
2805 			error = igmp_initial_join(inm, igi, itp);
2806 			goto out;
2807 		} else if (inm->inm_st[1].iss_fmode == MCAST_UNDEFINED) {
2808 			IGMP_PRINTF(("%s: final leave\n", __func__));
2809 			igmp_final_leave(inm, igi, itp);
2810 			goto out;
2811 		}
2812 	} else {
2813 		IGMP_PRINTF(("%s: filter set change\n", __func__));
2814 	}
2815 
2816 	error = igmp_handle_state_change(inm, igi, itp);
2817 out:
2818 	return error;
2819 }
2820 
2821 /*
2822  * Perform the initial join for an IGMP group.
2823  *
2824  * When joining a group:
2825  *  If the group should have its IGMP traffic suppressed, do nothing.
2826  *  IGMPv1 starts sending IGMPv1 host membership reports.
2827  *  IGMPv2 starts sending IGMPv2 host membership reports.
2828  *  IGMPv3 will schedule an IGMPv3 state-change report containing the
2829  *  initial state of the membership.
2830  */
2831 static int
igmp_initial_join(struct in_multi * inm,struct igmp_ifinfo * igi,struct igmp_tparams * itp)2832 igmp_initial_join(struct in_multi *inm, struct igmp_ifinfo *igi,
2833     struct igmp_tparams *itp)
2834 {
2835 	struct ifnet            *ifp;
2836 	struct ifqueue          *ifq;
2837 	int                      error, retval, syncstates;
2838 
2839 	INM_LOCK_ASSERT_HELD(inm);
2840 	IGI_LOCK_ASSERT_NOTHELD(igi);
2841 	VERIFY(itp != NULL);
2842 
2843 	IGMP_INET_PRINTF(inm->inm_addr,
2844 	    ("%s: initial join %s on ifp 0x%llx(%s)\n", __func__,
2845 	    _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(inm->inm_ifp),
2846 	    if_name(inm->inm_ifp)));
2847 
2848 	error = 0;
2849 	syncstates = 1;
2850 
2851 	ifp = inm->inm_ifp;
2852 
2853 	IGI_LOCK(igi);
2854 	VERIFY(igi->igi_ifp == ifp);
2855 
2856 	/*
2857 	 * Groups joined on loopback or marked as 'not reported',
2858 	 * e.g. 224.0.0.1, enter the IGMP_SILENT_MEMBER state and
2859 	 * are never reported in any IGMP protocol exchanges.
2860 	 * All other groups enter the appropriate IGMP state machine
2861 	 * for the version in use on this link.
2862 	 * A link marked as IGIF_SILENT causes IGMP to be completely
2863 	 * disabled for the link.
2864 	 */
2865 	if ((ifp->if_flags & IFF_LOOPBACK) ||
2866 	    (igi->igi_flags & IGIF_SILENT) ||
2867 	    !igmp_isgroupreported(inm->inm_addr)) {
2868 		IGMP_PRINTF(("%s: not kicking state machine for silent group\n",
2869 		    __func__));
2870 		inm->inm_state = IGMP_SILENT_MEMBER;
2871 		inm->inm_timer = 0;
2872 	} else {
2873 		/*
2874 		 * Deal with overlapping in_multi lifecycle.
2875 		 * If this group was LEAVING, then make sure
2876 		 * we drop the reference we picked up to keep the
2877 		 * group around for the final INCLUDE {} enqueue.
2878 		 * Since we cannot call in_multi_detach() here,
2879 		 * defer this task to the timer routine.
2880 		 */
2881 		if (igi->igi_version == IGMP_VERSION_3 &&
2882 		    inm->inm_state == IGMP_LEAVING_MEMBER) {
2883 			VERIFY(inm->inm_nrelecnt != 0);
2884 			igmp_append_relq(igi, inm);
2885 		}
2886 
2887 		inm->inm_state = IGMP_REPORTING_MEMBER;
2888 
2889 		switch (igi->igi_version) {
2890 		case IGMP_VERSION_1:
2891 		case IGMP_VERSION_2:
2892 			inm->inm_state = IGMP_IDLE_MEMBER;
2893 			error = igmp_v1v2_queue_report(inm,
2894 			    (igi->igi_version == IGMP_VERSION_2) ?
2895 			    IGMP_v2_HOST_MEMBERSHIP_REPORT :
2896 			    IGMP_v1_HOST_MEMBERSHIP_REPORT);
2897 
2898 			INM_LOCK_ASSERT_HELD(inm);
2899 			IGI_LOCK_ASSERT_HELD(igi);
2900 
2901 			if (error == 0) {
2902 				inm->inm_timer =
2903 				    IGMP_RANDOM_DELAY(IGMP_V1V2_MAX_RI);
2904 				itp->cst = 1;
2905 			}
2906 			break;
2907 
2908 		case IGMP_VERSION_3:
2909 			/*
2910 			 * Defer update of T0 to T1, until the first copy
2911 			 * of the state change has been transmitted.
2912 			 */
2913 			syncstates = 0;
2914 
2915 			/*
2916 			 * Immediately enqueue a State-Change Report for
2917 			 * this interface, freeing any previous reports.
2918 			 * Don't kick the timers if there is nothing to do,
2919 			 * or if an error occurred.
2920 			 */
2921 			ifq = &inm->inm_scq;
2922 			IF_DRAIN(ifq);
2923 			retval = igmp_v3_enqueue_group_record(ifq, inm, 1,
2924 			    0, 0);
2925 			itp->cst = (ifq->ifq_len > 0);
2926 			IGMP_PRINTF(("%s: enqueue record = %d\n",
2927 			    __func__, retval));
2928 			if (retval <= 0) {
2929 				error = retval * -1;
2930 				break;
2931 			}
2932 
2933 			/*
2934 			 * Schedule transmission of pending state-change
2935 			 * report up to RV times for this link. The timer
2936 			 * will fire at the next igmp_timeout (1 second),
2937 			 * giving us an opportunity to merge the reports.
2938 			 */
2939 			if (igi->igi_flags & IGIF_LOOPBACK) {
2940 				inm->inm_scrv = 1;
2941 			} else {
2942 				VERIFY(igi->igi_rv > 1);
2943 				inm->inm_scrv = (uint16_t)igi->igi_rv;
2944 			}
2945 			inm->inm_sctimer = 1;
2946 			itp->sct = 1;
2947 
2948 			error = 0;
2949 			break;
2950 		}
2951 	}
2952 	IGI_UNLOCK(igi);
2953 
2954 	/*
2955 	 * Only update the T0 state if state change is atomic,
2956 	 * i.e. we don't need to wait for a timer to fire before we
2957 	 * can consider the state change to have been communicated.
2958 	 */
2959 	if (syncstates) {
2960 		inm_commit(inm);
2961 		IGMP_INET_PRINTF(inm->inm_addr,
2962 		    ("%s: T1->T0 for %s / %s\n", __func__,
2963 		    _igmp_inet_buf, if_name(inm->inm_ifp)));
2964 	}
2965 
2966 	return error;
2967 }
2968 
2969 /*
2970  * Issue an intermediate state change during the IGMP life-cycle.
2971  */
2972 static int
igmp_handle_state_change(struct in_multi * inm,struct igmp_ifinfo * igi,struct igmp_tparams * itp)2973 igmp_handle_state_change(struct in_multi *inm, struct igmp_ifinfo *igi,
2974     struct igmp_tparams *itp)
2975 {
2976 	struct ifnet            *ifp;
2977 
2978 	INM_LOCK_ASSERT_HELD(inm);
2979 	IGI_LOCK_ASSERT_NOTHELD(igi);
2980 	VERIFY(itp != NULL);
2981 
2982 	IGMP_INET_PRINTF(inm->inm_addr,
2983 	    ("%s: state change for %s on ifp 0x%llx(%s)\n", __func__,
2984 	    _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(inm->inm_ifp),
2985 	    if_name(inm->inm_ifp)));
2986 
2987 	ifp = inm->inm_ifp;
2988 
2989 	IGI_LOCK(igi);
2990 	VERIFY(igi->igi_ifp == ifp);
2991 
2992 	if ((ifp->if_flags & IFF_LOOPBACK) ||
2993 	    (igi->igi_flags & IGIF_SILENT) ||
2994 	    !igmp_isgroupreported(inm->inm_addr) ||
2995 	    (igi->igi_version != IGMP_VERSION_3)) {
2996 		IGI_UNLOCK(igi);
2997 		if (!igmp_isgroupreported(inm->inm_addr)) {
2998 			IGMP_PRINTF(("%s: not kicking state "
2999 			    "machine for silent group\n", __func__));
3000 		}
3001 		IGMP_PRINTF(("%s: nothing to do \n", __func__));
3002 		inm_commit(inm);
3003 		IGMP_INET_PRINTF(inm->inm_addr,
3004 		    ("%s: T1 -> T0 for %s/%s\n", __func__,
3005 		    _igmp_inet_buf, inm->inm_ifp->if_name));
3006 		goto done;
3007 	}
3008 
3009 	IF_DRAIN(&inm->inm_scq);
3010 
3011 	int retval = igmp_v3_enqueue_group_record(&inm->inm_scq, inm, 1, 0, 0);
3012 	itp->cst = (inm->inm_scq.ifq_len > 0);
3013 	IGMP_PRINTF(("%s: enqueue record = %d\n", __func__, retval));
3014 	// N.B.: igmp_v3_enqueue_group_record() returned the number of bytes sent.
3015 	if (retval <= 0) {
3016 		IGI_UNLOCK(igi);
3017 		return -retval;
3018 	}
3019 	/*
3020 	 * If record(s) were enqueued, start the state-change
3021 	 * report timer for this group.
3022 	 */
3023 	inm->inm_scrv = ((igi->igi_flags & IGIF_LOOPBACK) ? 1 : (uint16_t)igi->igi_rv);
3024 	inm->inm_sctimer = 1;
3025 	itp->sct = 1;
3026 	IGI_UNLOCK(igi);
3027 done:
3028 	return 0;
3029 }
3030 
3031 /*
3032  * Perform the final leave for an IGMP group.
3033  *
3034  * When leaving a group:
3035  *  IGMPv1 does nothing.
3036  *  IGMPv2 sends a host leave message, if and only if we are the reporter.
3037  *  IGMPv3 enqueues a state-change report containing a transition
3038  *  to INCLUDE {} for immediate transmission.
3039  */
3040 static void
igmp_final_leave(struct in_multi * inm,struct igmp_ifinfo * igi,struct igmp_tparams * itp)3041 igmp_final_leave(struct in_multi *inm, struct igmp_ifinfo *igi,
3042     struct igmp_tparams *itp)
3043 {
3044 	int syncstates = 1;
3045 	bool retried_already = false;
3046 
3047 	INM_LOCK_ASSERT_HELD(inm);
3048 	IGI_LOCK_ASSERT_NOTHELD(igi);
3049 	VERIFY(itp != NULL);
3050 
3051 	IGMP_INET_PRINTF(inm->inm_addr,
3052 	    ("%s: final leave %s on ifp 0x%llx(%s)\n", __func__,
3053 	    _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(inm->inm_ifp),
3054 	    if_name(inm->inm_ifp)));
3055 
3056 retry:
3057 	switch (inm->inm_state) {
3058 	case IGMP_NOT_MEMBER:
3059 	case IGMP_SILENT_MEMBER:
3060 	case IGMP_LEAVING_MEMBER:
3061 		/* Already leaving or left; do nothing. */
3062 		IGMP_PRINTF(("%s: not kicking state machine for silent group\n",
3063 		    __func__));
3064 		break;
3065 	case IGMP_REPORTING_MEMBER:
3066 	case IGMP_IDLE_MEMBER:
3067 	case IGMP_G_QUERY_PENDING_MEMBER:
3068 	case IGMP_SG_QUERY_PENDING_MEMBER:
3069 		IGI_LOCK(igi);
3070 		if (igi->igi_version == IGMP_VERSION_2) {
3071 			if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER ||
3072 			    inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) {
3073 				/*
3074 				 * We may be in the process of downgrading to
3075 				 * IGMPv2 but because we just grabbed the
3076 				 * igi_lock we may have lost the race.
3077 				 */
3078 				if (!retried_already) {
3079 					IGI_UNLOCK(igi);
3080 					retried_already = true;
3081 					goto retry;
3082 				} else {
3083 					/*
3084 					 * Proceed with leaving the group
3085 					 * as if it were IGMPv2 even though we
3086 					 * may have an inconsistent multicast state.
3087 					 */
3088 				}
3089 			}
3090 			/* scheduler timer if enqueue is successful */
3091 			itp->cst = (igmp_v1v2_queue_report(inm,
3092 			    IGMP_HOST_LEAVE_MESSAGE) == 0);
3093 
3094 			INM_LOCK_ASSERT_HELD(inm);
3095 			IGI_LOCK_ASSERT_HELD(igi);
3096 
3097 			inm->inm_state = IGMP_NOT_MEMBER;
3098 		} else if (igi->igi_version == IGMP_VERSION_3) {
3099 			/*
3100 			 * Stop group timer and all pending reports.
3101 			 * Immediately enqueue a state-change report
3102 			 * TO_IN {} to be sent on the next timeout,
3103 			 * giving us an opportunity to merge reports.
3104 			 */
3105 			IF_DRAIN(&inm->inm_scq);
3106 			inm->inm_timer = 0;
3107 			if (igi->igi_flags & IGIF_LOOPBACK) {
3108 				inm->inm_scrv = 1;
3109 			} else {
3110 				inm->inm_scrv = (uint16_t)igi->igi_rv;
3111 			}
3112 			IGMP_INET_PRINTF(inm->inm_addr,
3113 			    ("%s: Leaving %s/%s with %d "
3114 			    "pending retransmissions.\n", __func__,
3115 			    _igmp_inet_buf, if_name(inm->inm_ifp),
3116 			    inm->inm_scrv));
3117 			if (inm->inm_scrv == 0) {
3118 				inm->inm_state = IGMP_NOT_MEMBER;
3119 				inm->inm_sctimer = 0;
3120 			} else {
3121 				int retval;
3122 				/*
3123 				 * Stick around in the in_multihead list;
3124 				 * the final detach will be issued by
3125 				 * igmp_v3_process_group_timers() when
3126 				 * the retransmit timer expires.
3127 				 */
3128 				INM_ADDREF_LOCKED(inm);
3129 				VERIFY(inm->inm_debug & IFD_ATTACHED);
3130 				inm->inm_reqcnt++;
3131 				VERIFY(inm->inm_reqcnt >= 1);
3132 				inm->inm_nrelecnt++;
3133 				VERIFY(inm->inm_nrelecnt != 0);
3134 
3135 				retval = igmp_v3_enqueue_group_record(
3136 					&inm->inm_scq, inm, 1, 0, 0);
3137 				itp->cst = (inm->inm_scq.ifq_len > 0);
3138 				KASSERT(retval != 0,
3139 				    ("%s: enqueue record = %d\n", __func__,
3140 				    retval));
3141 
3142 				inm->inm_state = IGMP_LEAVING_MEMBER;
3143 				inm->inm_sctimer = 1;
3144 				itp->sct = 1;
3145 				syncstates = 0;
3146 			}
3147 		}
3148 		IGI_UNLOCK(igi);
3149 		break;
3150 	case IGMP_LAZY_MEMBER:
3151 	case IGMP_SLEEPING_MEMBER:
3152 	case IGMP_AWAKENING_MEMBER:
3153 		/* Our reports are suppressed; do nothing. */
3154 		break;
3155 	}
3156 
3157 	if (syncstates) {
3158 		inm_commit(inm);
3159 		IGMP_INET_PRINTF(inm->inm_addr,
3160 		    ("%s: T1 -> T0 for %s/%s\n", __func__,
3161 		    _igmp_inet_buf, if_name(inm->inm_ifp)));
3162 		inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
3163 		IGMP_INET_PRINTF(inm->inm_addr,
3164 		    ("%s: T1 now MCAST_UNDEFINED for %s/%s\n",
3165 		    __func__, _igmp_inet_buf, if_name(inm->inm_ifp)));
3166 	}
3167 }
3168 
3169 /*
3170  * Enqueue an IGMPv3 group record to the given output queue.
3171  *
3172  * XXX This function could do with having the allocation code
3173  * split out, and the multiple-tree-walks coalesced into a single
3174  * routine as has been done in igmp_v3_enqueue_filter_change().
3175  *
3176  * If is_state_change is zero, a current-state record is appended.
3177  * If is_state_change is non-zero, a state-change report is appended.
3178  *
3179  * If is_group_query is non-zero, an mbuf packet chain is allocated.
3180  * If is_group_query is zero, and if there is a packet with free space
3181  * at the tail of the queue, it will be appended to providing there
3182  * is enough free space.
3183  * Otherwise a new mbuf packet chain is allocated.
3184  *
3185  * If is_source_query is non-zero, each source is checked to see if
3186  * it was recorded for a Group-Source query, and will be omitted if
3187  * it is not both in-mode and recorded.
3188  *
3189  * The function will attempt to allocate leading space in the packet
3190  * for the IP/IGMP header to be prepended without fragmenting the chain.
3191  *
3192  * If successful the size of all data appended to the queue is returned,
3193  * otherwise an error code less than zero is returned, or zero if
3194  * no record(s) were appended.
3195  */
3196 static int
igmp_v3_enqueue_group_record(struct ifqueue * ifq,struct in_multi * inm,const int is_state_change,const int is_group_query,const int is_source_query)3197 igmp_v3_enqueue_group_record(struct ifqueue *ifq, struct in_multi *inm,
3198     const int is_state_change, const int is_group_query,
3199     const int is_source_query)
3200 {
3201 	struct igmp_grouprec     ig;
3202 	struct igmp_grouprec    *pig;
3203 	struct ifnet            *ifp;
3204 	struct ip_msource       *ims, *nims;
3205 	mbuf_ref_t               m0, m, md;
3206 	int                      error, is_filter_list_change;
3207 	int                      minrec0len, m0srcs, nbytes, off;
3208 	uint16_t                 msrcs;
3209 	int                      record_has_sources;
3210 	int                      now;
3211 	int                      type;
3212 	in_addr_t                naddr;
3213 	uint16_t                 mode;
3214 	u_int16_t                ig_numsrc;
3215 
3216 	INM_LOCK_ASSERT_HELD(inm);
3217 	IGI_LOCK_ASSERT_HELD(inm->inm_igi);
3218 
3219 	error = 0;
3220 	ifp = inm->inm_ifp;
3221 	is_filter_list_change = 0;
3222 	m = NULL;
3223 	m0 = NULL;
3224 	m0srcs = 0;
3225 	msrcs = 0;
3226 	nbytes = 0;
3227 	nims = NULL;
3228 	record_has_sources = 1;
3229 	pig = NULL;
3230 	type = IGMP_DO_NOTHING;
3231 	mode = inm->inm_st[1].iss_fmode;
3232 
3233 	/*
3234 	 * If we did not transition out of ASM mode during t0->t1,
3235 	 * and there are no source nodes to process, we can skip
3236 	 * the generation of source records.
3237 	 */
3238 	if (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0 &&
3239 	    inm->inm_nsrc == 0) {
3240 		record_has_sources = 0;
3241 	}
3242 
3243 	if (is_state_change) {
3244 		/*
3245 		 * Queue a state change record.
3246 		 * If the mode did not change, and there are non-ASM
3247 		 * listeners or source filters present,
3248 		 * we potentially need to issue two records for the group.
3249 		 * If we are transitioning to MCAST_UNDEFINED, we need
3250 		 * not send any sources.
3251 		 * If there are ASM listeners, and there was no filter
3252 		 * mode transition of any kind, do nothing.
3253 		 */
3254 		if (mode != inm->inm_st[0].iss_fmode) {
3255 			if (mode == MCAST_EXCLUDE) {
3256 				IGMP_PRINTF(("%s: change to EXCLUDE\n",
3257 				    __func__));
3258 				type = IGMP_CHANGE_TO_EXCLUDE_MODE;
3259 			} else {
3260 				IGMP_PRINTF(("%s: change to INCLUDE\n",
3261 				    __func__));
3262 				type = IGMP_CHANGE_TO_INCLUDE_MODE;
3263 				if (mode == MCAST_UNDEFINED) {
3264 					record_has_sources = 0;
3265 				}
3266 			}
3267 		} else {
3268 			if (record_has_sources) {
3269 				is_filter_list_change = 1;
3270 			} else {
3271 				type = IGMP_DO_NOTHING;
3272 			}
3273 		}
3274 	} else {
3275 		/*
3276 		 * Queue a current state record.
3277 		 */
3278 		if (mode == MCAST_EXCLUDE) {
3279 			type = IGMP_MODE_IS_EXCLUDE;
3280 		} else if (mode == MCAST_INCLUDE) {
3281 			type = IGMP_MODE_IS_INCLUDE;
3282 			VERIFY(inm->inm_st[1].iss_asm == 0);
3283 		}
3284 	}
3285 
3286 	/*
3287 	 * Generate the filter list changes using a separate function.
3288 	 */
3289 	if (is_filter_list_change) {
3290 		return igmp_v3_enqueue_filter_change(ifq, inm);
3291 	}
3292 
3293 	if (type == IGMP_DO_NOTHING) {
3294 		IGMP_INET_PRINTF(inm->inm_addr,
3295 		    ("%s: nothing to do for %s/%s\n",
3296 		    __func__, _igmp_inet_buf,
3297 		    if_name(inm->inm_ifp)));
3298 		return 0;
3299 	}
3300 
3301 	/*
3302 	 * If any sources are present, we must be able to fit at least
3303 	 * one in the trailing space of the tail packet's mbuf,
3304 	 * ideally more.
3305 	 */
3306 	minrec0len = sizeof(struct igmp_grouprec);
3307 	if (record_has_sources) {
3308 		minrec0len += sizeof(in_addr_t);
3309 	}
3310 
3311 	IGMP_INET_PRINTF(inm->inm_addr,
3312 	    ("%s: queueing %s for %s/%s\n", __func__,
3313 	    igmp_rec_type_to_str(type), _igmp_inet_buf,
3314 	    if_name(inm->inm_ifp)));
3315 
3316 	/*
3317 	 * Check if we have a packet in the tail of the queue for this
3318 	 * group into which the first group record for this group will fit.
3319 	 * Otherwise allocate a new packet.
3320 	 * Always allocate leading space for IP+RA_OPT+IGMP+REPORT.
3321 	 * Note: Group records for G/GSR query responses MUST be sent
3322 	 * in their own packet.
3323 	 */
3324 	m0 = ifq->ifq_tail;
3325 	if (!is_group_query &&
3326 	    m0 != NULL &&
3327 	    (m0->m_pkthdr.vt_nrecs + 1 <= IGMP_V3_REPORT_MAXRECS) &&
3328 	    (m0->m_pkthdr.len + minrec0len) <
3329 	    (ifp->if_mtu - IGMP_LEADINGSPACE)) {
3330 		m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
3331 		    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
3332 		m = m0;
3333 		IGMP_PRINTF(("%s: use existing packet\n", __func__));
3334 	} else {
3335 		if (IF_QFULL(ifq)) {
3336 			os_log_error(OS_LOG_DEFAULT,
3337 			    "%s: outbound queue full on %s\n", __func__, if_name(ifp));
3338 			return -ENOMEM;
3339 		}
3340 		m = NULL;
3341 		m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
3342 		    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
3343 		if (!is_state_change && !is_group_query) {
3344 			m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3345 			if (m) {
3346 				m->m_data += IGMP_LEADINGSPACE;
3347 			}
3348 		}
3349 		if (m == NULL) {
3350 			m = m_gethdr(M_DONTWAIT, MT_DATA);
3351 			if (m) {
3352 				MH_ALIGN(m, IGMP_LEADINGSPACE);
3353 			}
3354 		}
3355 		if (m == NULL) {
3356 			return -ENOMEM;
3357 		}
3358 
3359 		igmp_save_context(m, ifp);
3360 
3361 		IGMP_PRINTF(("%s: allocated first packet\n", __func__));
3362 	}
3363 
3364 	/*
3365 	 * Append group record.
3366 	 * If we have sources, we don't know how many yet.
3367 	 */
3368 	ig.ig_type = (u_char)type;
3369 	ig.ig_datalen = 0;
3370 	ig.ig_numsrc = 0;
3371 	ig.ig_group = inm->inm_addr;
3372 	if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) {
3373 		if (m != m0) {
3374 			m_freem(m);
3375 		}
3376 		os_log_error(OS_LOG_DEFAULT, "%s: m_append() failed\n", __func__);
3377 		return -ENOMEM;
3378 	}
3379 	nbytes += sizeof(struct igmp_grouprec);
3380 
3381 	/*
3382 	 * Append as many sources as will fit in the first packet.
3383 	 * If we are appending to a new packet, the chain allocation
3384 	 * may potentially use clusters; use m_getptr() in this case.
3385 	 * If we are appending to an existing packet, we need to obtain
3386 	 * a pointer to the group record after m_append(), in case a new
3387 	 * mbuf was allocated.
3388 	 * Only append sources which are in-mode at t1. If we are
3389 	 * transitioning to MCAST_UNDEFINED state on the group, do not
3390 	 * include source entries.
3391 	 * Only report recorded sources in our filter set when responding
3392 	 * to a group-source query.
3393 	 */
3394 	if (record_has_sources) {
3395 		if (m == m0) {
3396 			md = m_last(m);
3397 			pig = (struct igmp_grouprec *)(void *)
3398 			    (mtod(md, uint8_t *) + md->m_len - nbytes);
3399 		} else {
3400 			md = m_getptr(m, 0, &off);
3401 			pig = (struct igmp_grouprec *)(void *)
3402 			    (mtod(md, uint8_t *) + off);
3403 		}
3404 		msrcs = 0;
3405 		RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, nims) {
3406 #ifdef IGMP_DEBUG
3407 			char buf[MAX_IPv4_STR_LEN];
3408 
3409 			inet_ntop_haddr(ims->ims_haddr, buf, sizeof(buf));
3410 			IGMP_PRINTF(("%s: visit node %s\n", __func__, buf));
3411 #endif
3412 			now = ims_get_mode(inm, ims, 1);
3413 			IGMP_PRINTF(("%s: node is %d\n", __func__, now));
3414 			if ((now != mode) ||
3415 			    (now == mode && mode == MCAST_UNDEFINED)) {
3416 				IGMP_PRINTF(("%s: skip node\n", __func__));
3417 				continue;
3418 			}
3419 			if (is_source_query && ims->ims_stp == 0) {
3420 				IGMP_PRINTF(("%s: skip unrecorded node\n",
3421 				    __func__));
3422 				continue;
3423 			}
3424 			IGMP_PRINTF(("%s: append node\n", __func__));
3425 			naddr = htonl(ims->ims_haddr);
3426 			if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) {
3427 				if (m != m0) {
3428 					m_freem(m);
3429 				}
3430 				os_log_error(OS_LOG_DEFAULT, "%s: m_append() failed\n",
3431 				    __func__);
3432 				return -ENOMEM;
3433 			}
3434 			nbytes += sizeof(in_addr_t);
3435 			++msrcs;
3436 			if (msrcs == m0srcs) {
3437 				break;
3438 			}
3439 		}
3440 		IGMP_PRINTF(("%s: msrcs is %d this packet\n", __func__,
3441 		    msrcs));
3442 		ig_numsrc = htons(msrcs);
3443 		bcopy(&ig_numsrc, &pig->ig_numsrc, sizeof(ig_numsrc));
3444 		nbytes += (msrcs * sizeof(in_addr_t));
3445 	}
3446 
3447 	if (is_source_query && msrcs == 0) {
3448 		IGMP_PRINTF(("%s: no recorded sources to report\n", __func__));
3449 		if (m != m0) {
3450 			m_freem(m);
3451 		}
3452 		return 0;
3453 	}
3454 
3455 	/*
3456 	 * We are good to go with first packet.
3457 	 */
3458 	if (m != m0) {
3459 		IGMP_PRINTF(("%s: enqueueing first packet\n", __func__));
3460 		m->m_pkthdr.vt_nrecs = 1;
3461 		IF_ENQUEUE(ifq, m);
3462 	} else {
3463 		m->m_pkthdr.vt_nrecs++;
3464 	}
3465 	/*
3466 	 * No further work needed if no source list in packet(s).
3467 	 */
3468 	if (!record_has_sources) {
3469 		return nbytes;
3470 	}
3471 
3472 	/*
3473 	 * Whilst sources remain to be announced, we need to allocate
3474 	 * a new packet and fill out as many sources as will fit.
3475 	 * Always try for a cluster first.
3476 	 */
3477 	while (nims != NULL) {
3478 		if (IF_QFULL(ifq)) {
3479 			os_log_error(OS_LOG_DEFAULT, "%s: outbound queue full on %s\n",
3480 			    __func__, if_name(ifp));
3481 			return -ENOMEM;
3482 		}
3483 		m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3484 		if (m) {
3485 			m->m_data += IGMP_LEADINGSPACE;
3486 		}
3487 		if (m == NULL) {
3488 			m = m_gethdr(M_DONTWAIT, MT_DATA);
3489 			if (m) {
3490 				MH_ALIGN(m, IGMP_LEADINGSPACE);
3491 			}
3492 		}
3493 		if (m == NULL) {
3494 			return -ENOMEM;
3495 		}
3496 		igmp_save_context(m, ifp);
3497 		md = m_getptr(m, 0, &off);
3498 		pig = (struct igmp_grouprec *)(void *)
3499 		    (mtod(md, uint8_t *) + off);
3500 		IGMP_PRINTF(("%s: allocated next packet\n", __func__));
3501 
3502 		if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) {
3503 			if (m != m0) {
3504 				m_freem(m);
3505 			}
3506 			os_log_error(OS_LOG_DEFAULT, "%s: m_append() failed\n",
3507 			    __func__);
3508 			return -ENOMEM;
3509 		}
3510 		m->m_pkthdr.vt_nrecs = 1;
3511 		nbytes += sizeof(struct igmp_grouprec);
3512 
3513 		m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
3514 		    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
3515 
3516 		msrcs = 0;
3517 		RB_FOREACH_FROM(ims, ip_msource_tree, nims) {
3518 #ifdef IGMP_DEBUG
3519 			char buf[MAX_IPv4_STR_LEN];
3520 
3521 			inet_ntop_haddr(ims->ims_haddr, buf, sizeof(buf));
3522 			IGMP_PRINTF(("%s: visit node %s\n", __func__, buf));
3523 #endif
3524 			now = ims_get_mode(inm, ims, 1);
3525 			if ((now != mode) ||
3526 			    (now == mode && mode == MCAST_UNDEFINED)) {
3527 				IGMP_PRINTF(("%s: skip node\n", __func__));
3528 				continue;
3529 			}
3530 			if (is_source_query && ims->ims_stp == 0) {
3531 				IGMP_PRINTF(("%s: skip unrecorded node\n",
3532 				    __func__));
3533 				continue;
3534 			}
3535 			IGMP_PRINTF(("%s: append node\n", __func__));
3536 			naddr = htonl(ims->ims_haddr);
3537 			if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) {
3538 				if (m != m0) {
3539 					m_freem(m);
3540 				}
3541 				os_log_error(OS_LOG_DEFAULT, "%s: m_append() failed",
3542 				    __func__);
3543 				return -ENOMEM;
3544 			}
3545 			++msrcs;
3546 			if (msrcs == m0srcs) {
3547 				break;
3548 			}
3549 		}
3550 		ig_numsrc = htons(msrcs);
3551 		bcopy(&ig_numsrc, &pig->ig_numsrc, sizeof(ig_numsrc));
3552 		nbytes += (msrcs * sizeof(in_addr_t));
3553 
3554 		IGMP_PRINTF(("%s: enqueueing next packet\n", __func__));
3555 		IF_ENQUEUE(ifq, m);
3556 	}
3557 
3558 	return nbytes;
3559 }
3560 
3561 /*
3562  * Type used to mark record pass completion.
3563  * We exploit the fact we can cast to this easily from the
3564  * current filter modes on each ip_msource node.
3565  */
3566 typedef enum {
3567 	REC_NONE = 0x00,        /* MCAST_UNDEFINED */
3568 	REC_ALLOW = 0x01,       /* MCAST_INCLUDE */
3569 	REC_BLOCK = 0x02,       /* MCAST_EXCLUDE */
3570 	REC_FULL = REC_ALLOW | REC_BLOCK
3571 } rectype_t;
3572 
3573 /*
3574  * Enqueue an IGMPv3 filter list change to the given output queue.
3575  *
3576  * Source list filter state is held in an RB-tree. When the filter list
3577  * for a group is changed without changing its mode, we need to compute
3578  * the deltas between T0 and T1 for each source in the filter set,
3579  * and enqueue the appropriate ALLOW_NEW/BLOCK_OLD records.
3580  *
3581  * As we may potentially queue two record types, and the entire R-B tree
3582  * needs to be walked at once, we break this out into its own function
3583  * so we can generate a tightly packed queue of packets.
3584  *
3585  * XXX This could be written to only use one tree walk, although that makes
3586  * serializing into the mbuf chains a bit harder. For now we do two walks
3587  * which makes things easier on us, and it may or may not be harder on
3588  * the L2 cache.
3589  *
3590  * If successful the size of all data appended to the queue is returned,
3591  * otherwise an error code less than zero is returned, or zero if
3592  * no record(s) were appended.
3593  */
3594 static int
igmp_v3_enqueue_filter_change(struct ifqueue * ifq,struct in_multi * inm)3595 igmp_v3_enqueue_filter_change(struct ifqueue *ifq, struct in_multi *inm)
3596 {
3597 	static const int MINRECLEN =
3598 	    sizeof(struct igmp_grouprec) + sizeof(in_addr_t);
3599 	struct ifnet            *ifp;
3600 	struct igmp_grouprec     ig;
3601 	struct igmp_grouprec    *pig;
3602 	struct ip_msource       *ims, *nims;
3603 	mbuf_ref_t               m0, m, md;
3604 	in_addr_t                naddr;
3605 	int                      m0srcs, nbytes, npbytes, off, schanged;
3606 	uint16_t                 rsrcs;
3607 	int                      nallow, nblock;
3608 	uint16_t                 mode;
3609 	uint8_t                  now, then;
3610 	rectype_t                crt, drt, nrt;
3611 	u_int16_t                ig_numsrc;
3612 
3613 	INM_LOCK_ASSERT_HELD(inm);
3614 
3615 	if (inm->inm_nsrc == 0 ||
3616 	    (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0)) {
3617 		return 0;
3618 	}
3619 
3620 	ifp = inm->inm_ifp;                     /* interface */
3621 	mode = inm->inm_st[1].iss_fmode;        /* filter mode at t1 */
3622 	crt = REC_NONE; /* current group record type */
3623 	drt = REC_NONE; /* mask of completed group record types */
3624 	nrt = REC_NONE; /* record type for current node */
3625 	m0srcs = 0;     /* # source which will fit in current mbuf chain */
3626 	nbytes = 0;     /* # of bytes appended to group's state-change queue */
3627 	npbytes = 0;    /* # of bytes appended this packet */
3628 	rsrcs = 0;      /* # sources encoded in current record */
3629 	schanged = 0;   /* # nodes encoded in overall filter change */
3630 	nallow = 0;     /* # of source entries in ALLOW_NEW */
3631 	nblock = 0;     /* # of source entries in BLOCK_OLD */
3632 	nims = NULL;    /* next tree node pointer */
3633 
3634 	/*
3635 	 * For each possible filter record mode.
3636 	 * The first kind of source we encounter tells us which
3637 	 * is the first kind of record we start appending.
3638 	 * If a node transitioned to UNDEFINED at t1, its mode is treated
3639 	 * as the inverse of the group's filter mode.
3640 	 */
3641 	while (drt != REC_FULL) {
3642 		do {
3643 			m0 = ifq->ifq_tail;
3644 			if (m0 != NULL &&
3645 			    (m0->m_pkthdr.vt_nrecs + 1 <=
3646 			    IGMP_V3_REPORT_MAXRECS) &&
3647 			    (m0->m_pkthdr.len + MINRECLEN) <
3648 			    (ifp->if_mtu - IGMP_LEADINGSPACE)) {
3649 				m = m0;
3650 				m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
3651 				    sizeof(struct igmp_grouprec)) /
3652 				    sizeof(in_addr_t);
3653 				IGMP_PRINTF(("%s: use previous packet\n",
3654 				    __func__));
3655 			} else {
3656 				m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3657 				if (m) {
3658 					m->m_data += IGMP_LEADINGSPACE;
3659 				}
3660 				if (m == NULL) {
3661 					m = m_gethdr(M_DONTWAIT, MT_DATA);
3662 					if (m) {
3663 						MH_ALIGN(m, IGMP_LEADINGSPACE);
3664 					}
3665 				}
3666 				if (m == NULL) {
3667 					os_log_error(OS_LOG_DEFAULT, "%s: m_get*() failed",
3668 					    __func__);
3669 					return -ENOMEM;
3670 				}
3671 				m->m_pkthdr.vt_nrecs = 0;
3672 				igmp_save_context(m, ifp);
3673 				m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
3674 				    sizeof(struct igmp_grouprec)) /
3675 				    sizeof(in_addr_t);
3676 				npbytes = 0;
3677 				IGMP_PRINTF(("%s: allocated new packet\n",
3678 				    __func__));
3679 			}
3680 			/*
3681 			 * Append the IGMP group record header to the
3682 			 * current packet's data area.
3683 			 * Recalculate pointer to free space for next
3684 			 * group record, in case m_append() allocated
3685 			 * a new mbuf or cluster.
3686 			 */
3687 			memset(&ig, 0, sizeof(ig));
3688 			ig.ig_group = inm->inm_addr;
3689 			if (!m_append(m, sizeof(ig), (void *)&ig)) {
3690 				if (m != m0) {
3691 					m_freem(m);
3692 				}
3693 				os_log_error(OS_LOG_DEFAULT,
3694 				    "%s: m_append() failed\n",
3695 				    __func__);
3696 				return -ENOMEM;
3697 			}
3698 			npbytes += sizeof(struct igmp_grouprec);
3699 			if (m != m0) {
3700 				/* new packet; offset in c hain */
3701 				md = m_getptr(m, npbytes -
3702 				    sizeof(struct igmp_grouprec), &off);
3703 				pig = (struct igmp_grouprec *)(void *)(mtod(md,
3704 				    uint8_t *) + off);
3705 			} else {
3706 				/* current packet; offset from last append */
3707 				md = m_last(m);
3708 				pig = (struct igmp_grouprec *)(void *)(mtod(md,
3709 				    uint8_t *) + md->m_len -
3710 				    sizeof(struct igmp_grouprec));
3711 			}
3712 			/*
3713 			 * Begin walking the tree for this record type
3714 			 * pass, or continue from where we left off
3715 			 * previously if we had to allocate a new packet.
3716 			 * Only report deltas in-mode at t1.
3717 			 * We need not report included sources as allowed
3718 			 * if we are in inclusive mode on the group,
3719 			 * however the converse is not true.
3720 			 */
3721 			rsrcs = 0;
3722 			if (nims == NULL) {
3723 				nims = RB_MIN(ip_msource_tree, &inm->inm_srcs);
3724 			}
3725 			RB_FOREACH_FROM(ims, ip_msource_tree, nims) {
3726 #ifdef IGMP_DEBUG
3727 				char buf[MAX_IPv4_STR_LEN];
3728 
3729 				inet_ntop_haddr(ims->ims_haddr, buf, sizeof(buf));
3730 				IGMP_PRINTF(("%s: visit node %s\n", __func__, buf));
3731 #endif
3732 				now = ims_get_mode(inm, ims, 1);
3733 				then = ims_get_mode(inm, ims, 0);
3734 				IGMP_PRINTF(("%s: mode: t0 %d, t1 %d\n",
3735 				    __func__, then, now));
3736 				if (now == then) {
3737 					IGMP_PRINTF(("%s: skip unchanged\n",
3738 					    __func__));
3739 					continue;
3740 				}
3741 				if (mode == MCAST_EXCLUDE &&
3742 				    now == MCAST_INCLUDE) {
3743 					IGMP_PRINTF(("%s: skip IN src on EX "
3744 					    "group\n", __func__));
3745 					continue;
3746 				}
3747 				nrt = (rectype_t)now;
3748 				if (nrt == REC_NONE) {
3749 					nrt = (rectype_t)(~mode & REC_FULL);
3750 				}
3751 				if (schanged++ == 0) {
3752 					crt = nrt;
3753 				} else if (crt != nrt) {
3754 					continue;
3755 				}
3756 				naddr = htonl(ims->ims_haddr);
3757 				if (!m_append(m, sizeof(in_addr_t),
3758 				    (void *)&naddr)) {
3759 					if (m != m0) {
3760 						m_freem(m);
3761 					}
3762 					os_log_error(OS_LOG_DEFAULT, "%s: m_append() failed\n",
3763 					    __func__);
3764 					return -ENOMEM;
3765 				}
3766 				nallow += !!(crt == REC_ALLOW);
3767 				nblock += !!(crt == REC_BLOCK);
3768 				if (++rsrcs == m0srcs) {
3769 					break;
3770 				}
3771 			}
3772 			/*
3773 			 * If we did not append any tree nodes on this
3774 			 * pass, back out of allocations.
3775 			 */
3776 			if (rsrcs == 0) {
3777 				npbytes -= sizeof(struct igmp_grouprec);
3778 				if (m != m0) {
3779 					IGMP_PRINTF(("%s: m_free(m)\n",
3780 					    __func__));
3781 					m_freem(m);
3782 				} else {
3783 					IGMP_PRINTF(("%s: m_adj(m, -ig)\n",
3784 					    __func__));
3785 					m_adj(m, -((int)sizeof(
3786 						    struct igmp_grouprec)));
3787 				}
3788 				continue;
3789 			}
3790 			npbytes += (rsrcs * sizeof(in_addr_t));
3791 			if (crt == REC_ALLOW) {
3792 				pig->ig_type = IGMP_ALLOW_NEW_SOURCES;
3793 			} else if (crt == REC_BLOCK) {
3794 				pig->ig_type = IGMP_BLOCK_OLD_SOURCES;
3795 			}
3796 			ig_numsrc = htons(rsrcs);
3797 			bcopy(&ig_numsrc, &pig->ig_numsrc, sizeof(ig_numsrc));
3798 			/*
3799 			 * Count the new group record, and enqueue this
3800 			 * packet if it wasn't already queued.
3801 			 */
3802 			m->m_pkthdr.vt_nrecs++;
3803 			if (m != m0) {
3804 				IF_ENQUEUE(ifq, m);
3805 			}
3806 			nbytes += npbytes;
3807 		} while (nims != NULL);
3808 		drt |= crt;
3809 		crt = (~crt & REC_FULL);
3810 	}
3811 
3812 	IGMP_PRINTF(("%s: queued %d ALLOW_NEW, %d BLOCK_OLD\n", __func__,
3813 	    nallow, nblock));
3814 
3815 	return nbytes;
3816 }
3817 
3818 static int
igmp_v3_merge_state_changes(struct in_multi * inm,struct ifqueue * ifscq)3819 igmp_v3_merge_state_changes(struct in_multi *inm, struct ifqueue *ifscq)
3820 {
3821 	struct ifqueue  *gq;
3822 	mbuf_ref_t       m;             /* pending state-change */
3823 	mbuf_ref_t       m0;            /* copy of pending state-change */
3824 	mbuf_ref_t       mt;            /* last state-change in packet */
3825 	mbuf_ref_t       n;;
3826 	int              docopy, domerge;
3827 	u_int            recslen;
3828 
3829 	INM_LOCK_ASSERT_HELD(inm);
3830 
3831 	docopy = 0;
3832 	domerge = 0;
3833 	recslen = 0;
3834 
3835 	/*
3836 	 * If there are further pending retransmissions, make a writable
3837 	 * copy of each queued state-change message before merging.
3838 	 */
3839 	if (inm->inm_scrv > 0) {
3840 		docopy = 1;
3841 	}
3842 
3843 	gq = &inm->inm_scq;
3844 #ifdef IGMP_DEBUG
3845 	if (gq->ifq_head == NULL) {
3846 		IGMP_PRINTF(("%s: WARNING: queue for inm 0x%llx is empty\n",
3847 		    __func__, (uint64_t)VM_KERNEL_ADDRPERM(inm)));
3848 	}
3849 #endif
3850 
3851 	/*
3852 	 * Use IF_REMQUEUE() instead of IF_DEQUEUE() below, since the
3853 	 * packet might not always be at the head of the ifqueue.
3854 	 */
3855 	m = gq->ifq_head;
3856 	while (m != NULL) {
3857 		/*
3858 		 * Only merge the report into the current packet if
3859 		 * there is sufficient space to do so; an IGMPv3 report
3860 		 * packet may only contain 65,535 group records.
3861 		 * Always use a simple mbuf chain concatentation to do this,
3862 		 * as large state changes for single groups may have
3863 		 * allocated clusters.
3864 		 */
3865 		domerge = 0;
3866 		mt = ifscq->ifq_tail;
3867 		if (mt != NULL) {
3868 			recslen = m_length(m);
3869 
3870 			if ((mt->m_pkthdr.vt_nrecs +
3871 			    m->m_pkthdr.vt_nrecs <=
3872 			    IGMP_V3_REPORT_MAXRECS) &&
3873 			    (mt->m_pkthdr.len + recslen <=
3874 			    (inm->inm_ifp->if_mtu - IGMP_LEADINGSPACE))) {
3875 				domerge = 1;
3876 			}
3877 		}
3878 
3879 		if (!domerge && IF_QFULL(gq)) {
3880 			os_log_error(OS_LOG_DEFAULT,
3881 			    "%s: outbound queue full on %s\n",
3882 			    __func__, if_name(inm->inm_ifp));
3883 			n = m->m_nextpkt;
3884 			if (!docopy) {
3885 				IF_REMQUEUE(gq, m);
3886 				m_freem(m);
3887 			}
3888 			m = n;
3889 			continue;
3890 		}
3891 
3892 		if (!docopy) {
3893 			IGMP_PRINTF(("%s: dequeueing 0x%llx\n", __func__,
3894 			    (uint64_t)VM_KERNEL_ADDRPERM(m)));
3895 			n = m->m_nextpkt;
3896 			IF_REMQUEUE(gq, m);
3897 			m0 = m;
3898 			m = n;
3899 		} else {
3900 			IGMP_PRINTF(("%s: copying 0x%llx\n", __func__,
3901 			    (uint64_t)VM_KERNEL_ADDRPERM(m)));
3902 			m0 = m_dup(m, M_NOWAIT);
3903 			if (m0 == NULL) {
3904 				return ENOMEM;
3905 			}
3906 			m0->m_nextpkt = NULL;
3907 			m = m->m_nextpkt;
3908 		}
3909 
3910 		if (!domerge) {
3911 			IGMP_PRINTF(("%s: queueing 0x%llx to ifscq 0x%llx)\n",
3912 			    __func__, (uint64_t)VM_KERNEL_ADDRPERM(m0),
3913 			    (uint64_t)VM_KERNEL_ADDRPERM(ifscq)));
3914 			IF_ENQUEUE(ifscq, m0);
3915 		} else {
3916 			struct mbuf *mtl;       /* last mbuf of packet mt */
3917 
3918 			IGMP_PRINTF(("%s: merging 0x%llx with ifscq tail "
3919 			    "0x%llx)\n", __func__,
3920 			    (uint64_t)VM_KERNEL_ADDRPERM(m0),
3921 			    (uint64_t)VM_KERNEL_ADDRPERM(mt)));
3922 
3923 			mtl = m_last(mt);
3924 			m0->m_flags &= ~M_PKTHDR;
3925 			mt->m_pkthdr.len += recslen;
3926 			mt->m_pkthdr.vt_nrecs +=
3927 			    m0->m_pkthdr.vt_nrecs;
3928 
3929 			mtl->m_next = m0;
3930 		}
3931 	}
3932 
3933 	return 0;
3934 }
3935 
3936 /*
3937  * Respond to a pending IGMPv3 General Query.
3938  */
3939 static uint32_t
igmp_v3_dispatch_general_query(struct igmp_ifinfo * igi)3940 igmp_v3_dispatch_general_query(struct igmp_ifinfo *igi)
3941 {
3942 	struct ifnet            *ifp;
3943 	struct in_multi         *inm;
3944 	struct in_multistep     step;
3945 	int                      retval, loop;
3946 
3947 	IGI_LOCK_ASSERT_HELD(igi);
3948 
3949 	VERIFY(igi->igi_version == IGMP_VERSION_3);
3950 
3951 	ifp = igi->igi_ifp;
3952 	IGI_UNLOCK(igi);
3953 
3954 	in_multihead_lock_shared();
3955 	IN_FIRST_MULTI(step, inm);
3956 	while (inm != NULL) {
3957 		INM_LOCK(inm);
3958 		if (inm->inm_ifp != ifp) {
3959 			goto next;
3960 		}
3961 
3962 		switch (inm->inm_state) {
3963 		case IGMP_NOT_MEMBER:
3964 		case IGMP_SILENT_MEMBER:
3965 			break;
3966 		case IGMP_REPORTING_MEMBER:
3967 		case IGMP_IDLE_MEMBER:
3968 		case IGMP_LAZY_MEMBER:
3969 		case IGMP_SLEEPING_MEMBER:
3970 		case IGMP_AWAKENING_MEMBER:
3971 			inm->inm_state = IGMP_REPORTING_MEMBER;
3972 			IGI_LOCK(igi);
3973 			retval = igmp_v3_enqueue_group_record(&igi->igi_gq,
3974 			    inm, 0, 0, 0);
3975 			IGI_UNLOCK(igi);
3976 			IGMP_PRINTF(("%s: enqueue record = %d\n",
3977 			    __func__, retval));
3978 			break;
3979 		case IGMP_G_QUERY_PENDING_MEMBER:
3980 		case IGMP_SG_QUERY_PENDING_MEMBER:
3981 		case IGMP_LEAVING_MEMBER:
3982 			break;
3983 		}
3984 next:
3985 		INM_UNLOCK(inm);
3986 		IN_NEXT_MULTI(step, inm);
3987 	}
3988 	in_multihead_lock_done();
3989 
3990 	IGI_LOCK(igi);
3991 	loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
3992 	igmp_dispatch_queue(igi, &igi->igi_gq, IGMP_MAX_RESPONSE_BURST,
3993 	    loop);
3994 	IGI_LOCK_ASSERT_HELD(igi);
3995 	/*
3996 	 * Slew transmission of bursts over 1 second intervals.
3997 	 */
3998 	if (igi->igi_gq.ifq_head != NULL) {
3999 		igi->igi_v3_timer = 1 + IGMP_RANDOM_DELAY(
4000 			IGMP_RESPONSE_BURST_INTERVAL);
4001 	}
4002 
4003 	return igi->igi_v3_timer;
4004 }
4005 
4006 /*
4007  * Transmit the next pending IGMP message in the output queue.
4008  *
4009  * Must not be called with inm_lock or igi_lock held.
4010  */
4011 static void
igmp_sendpkt(struct mbuf * m)4012 igmp_sendpkt(struct mbuf *m)
4013 {
4014 	struct ip_moptions      *imo;
4015 	struct mbuf             *ipopts, *m0;
4016 	int                     error;
4017 	struct route            ro;
4018 	struct ifnet            *ifp;
4019 
4020 	IGMP_PRINTF(("%s: transmit 0x%llx\n", __func__,
4021 	    (uint64_t)VM_KERNEL_ADDRPERM(m)));
4022 
4023 	ifp = igmp_restore_context(m);
4024 	/*
4025 	 * Check if the ifnet is still attached.
4026 	 */
4027 	if (ifp == NULL || !ifnet_is_fully_attached(ifp)) {
4028 		os_log_error(OS_LOG_DEFAULT, "%s: dropped 0x%llx as interface went away\n",
4029 		    __func__, (uint64_t)VM_KERNEL_ADDRPERM(m));
4030 		m_freem(m);
4031 		OSAddAtomic(1, &ipstat.ips_noroute);
4032 		return;
4033 	}
4034 
4035 	ipopts = igmp_sendra ? m_raopt : NULL;
4036 
4037 	imo = ip_allocmoptions(Z_WAITOK);
4038 	if (imo == NULL) {
4039 		m_freem(m);
4040 		return;
4041 	}
4042 
4043 	imo->imo_multicast_ttl  = 1;
4044 	imo->imo_multicast_vif  = -1;
4045 	imo->imo_multicast_loop = 0;
4046 
4047 	/*
4048 	 * If the user requested that IGMP traffic be explicitly
4049 	 * redirected to the loopback interface (e.g. they are running a
4050 	 * MANET interface and the routing protocol needs to see the
4051 	 * updates), handle this now.
4052 	 */
4053 	if (m->m_flags & M_IGMP_LOOP) {
4054 		imo->imo_multicast_ifp = lo_ifp;
4055 	} else {
4056 		imo->imo_multicast_ifp = ifp;
4057 	}
4058 
4059 	if (m->m_flags & M_IGMPV2) {
4060 		m0 = m;
4061 	} else {
4062 		m0 = igmp_v3_encap_report(ifp, m);
4063 		if (m0 == NULL) {
4064 			/*
4065 			 * If igmp_v3_encap_report() failed, then M_PREPEND()
4066 			 * already freed the original mbuf chain.
4067 			 * This means that we don't have to m_freem(m) here.
4068 			 */
4069 			os_log_error(OS_LOG_DEFAULT, "%s: dropped 0x%llx\n", __func__,
4070 			    (uint64_t)VM_KERNEL_ADDRPERM(m));
4071 			IMO_REMREF(imo);
4072 			os_atomic_inc(&ipstat.ips_odropped, relaxed);
4073 			return;
4074 		}
4075 	}
4076 
4077 	igmp_scrub_context(m0);
4078 	m->m_flags &= ~(M_PROTOFLAGS | M_IGMP_LOOP);
4079 	m0->m_pkthdr.rcvif = lo_ifp;
4080 
4081 	if (ifp->if_eflags & IFEF_TXSTART) {
4082 		/*
4083 		 * Use control service class if the interface supports
4084 		 * transmit-start model.
4085 		 */
4086 		(void) m_set_service_class(m0, MBUF_SC_CTL);
4087 	}
4088 	bzero(&ro, sizeof(ro));
4089 	error = ip_output(m0, ipopts, &ro, 0, imo, NULL);
4090 	ROUTE_RELEASE(&ro);
4091 
4092 	IMO_REMREF(imo);
4093 
4094 	if (error) {
4095 		os_log_error(OS_LOG_DEFAULT, "%s: ip_output(0x%llx) = %d\n", __func__,
4096 		    (uint64_t)VM_KERNEL_ADDRPERM(m0), error);
4097 		return;
4098 	}
4099 
4100 	IGMPSTAT_INC(igps_snd_reports);
4101 	OIGMPSTAT_INC(igps_snd_reports);
4102 }
4103 /*
4104  * Encapsulate an IGMPv3 report.
4105  *
4106  * The internal mbuf flag M_IGMPV3_HDR is used to indicate that the mbuf
4107  * chain has already had its IP/IGMPv3 header prepended. In this case
4108  * the function will not attempt to prepend; the lengths and checksums
4109  * will however be re-computed.
4110  *
4111  * Returns a pointer to the new mbuf chain head, or NULL if the
4112  * allocation failed.
4113  */
4114 static struct mbuf *
igmp_v3_encap_report(struct ifnet * ifp,struct mbuf * m)4115 igmp_v3_encap_report(struct ifnet *ifp, struct mbuf *m)
4116 {
4117 	struct igmp_report      *igmp;
4118 	struct ip               *ip;
4119 	unsigned int             hdrlen, igmpreclen;
4120 
4121 	VERIFY((m->m_flags & M_PKTHDR));
4122 
4123 	igmpreclen = m_length(m);
4124 	hdrlen = sizeof(struct ip) + sizeof(struct igmp_report);
4125 
4126 	if (m->m_flags & M_IGMPV3_HDR) {
4127 		igmpreclen -= hdrlen;
4128 	} else {
4129 		M_PREPEND(m, hdrlen, M_DONTWAIT, 1);
4130 		if (m == NULL) {
4131 			return NULL;
4132 		}
4133 		m->m_flags |= M_IGMPV3_HDR;
4134 	}
4135 	if (hdrlen + igmpreclen > USHRT_MAX) {
4136 		os_log_error(OS_LOG_DEFAULT, "%s: invalid length %d\n",
4137 		    __func__, hdrlen + igmpreclen);
4138 		m_freem(m);
4139 		return NULL;
4140 	}
4141 
4142 
4143 	IGMP_PRINTF(("%s: igmpreclen is %d\n", __func__, igmpreclen));
4144 
4145 	m->m_data += sizeof(struct ip);
4146 	m->m_len -= sizeof(struct ip);
4147 
4148 	igmp = mtod(m, struct igmp_report *);
4149 	igmp->ir_type = IGMP_v3_HOST_MEMBERSHIP_REPORT;
4150 	igmp->ir_rsv1 = 0;
4151 	igmp->ir_rsv2 = 0;
4152 	igmp->ir_numgrps = htons(m->m_pkthdr.vt_nrecs);
4153 	igmp->ir_cksum = 0;
4154 	igmp->ir_cksum = in_cksum(m, sizeof(struct igmp_report) + igmpreclen);
4155 	m->m_pkthdr.vt_nrecs = 0;
4156 
4157 	m->m_data -= sizeof(struct ip);
4158 	m->m_len += sizeof(struct ip);
4159 
4160 	ip = mtod(m, struct ip *);
4161 	ip->ip_tos = IPTOS_PREC_INTERNETCONTROL;
4162 	ip->ip_len = (u_short)(hdrlen + igmpreclen);
4163 	ip->ip_off = IP_DF;
4164 	ip->ip_p = IPPROTO_IGMP;
4165 	ip->ip_sum = 0;
4166 
4167 	ip->ip_src.s_addr = INADDR_ANY;
4168 
4169 	if (m->m_flags & M_IGMP_LOOP) {
4170 		struct in_ifaddr *ia;
4171 
4172 		IFP_TO_IA(ifp, ia);
4173 		if (ia != NULL) {
4174 			IFA_LOCK(&ia->ia_ifa);
4175 			ip->ip_src = ia->ia_addr.sin_addr;
4176 			IFA_UNLOCK(&ia->ia_ifa);
4177 			ifa_remref(&ia->ia_ifa);
4178 		}
4179 	}
4180 
4181 	ip->ip_dst.s_addr = htonl(INADDR_ALLRPTS_GROUP);
4182 
4183 	return m;
4184 }
4185 
4186 #ifdef IGMP_DEBUG
4187 static const char *
igmp_rec_type_to_str(const int type)4188 igmp_rec_type_to_str(const int type)
4189 {
4190 	switch (type) {
4191 	case IGMP_CHANGE_TO_EXCLUDE_MODE:
4192 		return "TO_EX";
4193 	case IGMP_CHANGE_TO_INCLUDE_MODE:
4194 		return "TO_IN";
4195 	case IGMP_MODE_IS_EXCLUDE:
4196 		return "MODE_EX";
4197 	case IGMP_MODE_IS_INCLUDE:
4198 		return "MODE_IN";
4199 	case IGMP_ALLOW_NEW_SOURCES:
4200 		return "ALLOW_NEW";
4201 	case IGMP_BLOCK_OLD_SOURCES:
4202 		return "BLOCK_OLD";
4203 	default:
4204 		break;
4205 	}
4206 	return "unknown";
4207 }
4208 #endif
4209 
4210 void
igmp_init(struct protosw * pp,struct domain * dp)4211 igmp_init(struct protosw *pp, struct domain *dp)
4212 {
4213 #pragma unused(dp)
4214 	static int igmp_initialized = 0;
4215 
4216 	VERIFY((pp->pr_flags & (PR_INITIALIZED | PR_ATTACHED)) == PR_ATTACHED);
4217 
4218 	if (!os_atomic_cmpxchg(&igmp_initialized, 0, 1, relaxed)) {
4219 		return;
4220 	}
4221 	os_log(OS_LOG_DEFAULT, "%s: initializing\n", __func__);
4222 	igmp_timers_are_running = 0;
4223 	LIST_INIT(&igi_head);
4224 	m_raopt = igmp_ra_alloc();
4225 }
4226