1 /*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*-
29 * Copyright (c) 2007-2009 Bruce Simpson.
30 * Copyright (c) 1988 Stephen Deering.
31 * Copyright (c) 1992, 1993
32 * The Regents of the University of California. All rights reserved.
33 *
34 * This code is derived from software contributed to Berkeley by
35 * Stephen Deering of Stanford University.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)igmp.c 8.1 (Berkeley) 7/19/93
66 */
67 /*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
73
74 /*
75 * Internet Group Management Protocol (IGMP) routines.
76 * [RFC1112, RFC2236, RFC3376]
77 *
78 * Written by Steve Deering, Stanford, May 1988.
79 * Modified by Rosen Sharma, Stanford, Aug 1994.
80 * Modified by Bill Fenner, Xerox PARC, Feb 1995.
81 * Modified to fully comply to IGMPv2 by Bill Fenner, Oct 1995.
82 * Significantly rewritten for IGMPv3, VIMAGE, and SMP by Bruce Simpson.
83 *
84 * MULTICAST Revision: 3.5.1.4
85 */
86
87 #include <sys/cdefs.h>
88
89 #include <sys/param.h>
90 #include <sys/systm.h>
91 #include <sys/malloc.h>
92 #include <sys/mbuf.h>
93 #include <sys/socket.h>
94 #include <sys/protosw.h>
95 #include <sys/kernel.h>
96 #include <sys/sysctl.h>
97 #include <sys/mcache.h>
98
99 #include <libkern/libkern.h>
100 #include <kern/zalloc.h>
101
102 #include <net/if.h>
103 #include <net/route.h>
104
105 #include <netinet/in.h>
106 #include <netinet/in_var.h>
107 #include <netinet/in_systm.h>
108 #include <netinet/ip.h>
109 #include <netinet/ip_var.h>
110 #include <netinet/igmp.h>
111 #include <netinet/igmp_var.h>
112 #include <netinet/kpi_ipfilter_var.h>
113
114 #if SKYWALK
115 #include <skywalk/core/skywalk_var.h>
116 #endif /* SKYWALK */
117
118 SLIST_HEAD(igmp_inm_relhead, in_multi);
119
120 static void igi_initvar(struct igmp_ifinfo *, struct ifnet *, int);
121 static struct igmp_ifinfo *igi_alloc(zalloc_flags_t);
122 static void igi_free(struct igmp_ifinfo *);
123 static void igi_delete(const struct ifnet *, struct igmp_inm_relhead *);
124 static void igmp_dispatch_queue(struct igmp_ifinfo *, struct ifqueue *,
125 int, const int);
126 static void igmp_final_leave(struct in_multi *, struct igmp_ifinfo *,
127 struct igmp_tparams *);
128 static int igmp_handle_state_change(struct in_multi *,
129 struct igmp_ifinfo *, struct igmp_tparams *);
130 static int igmp_initial_join(struct in_multi *, struct igmp_ifinfo *,
131 struct igmp_tparams *);
132 static int igmp_input_v1_query(struct ifnet *, const struct ip *,
133 const struct igmp *);
134 static int igmp_input_v2_query(struct ifnet *, const struct ip *,
135 const struct igmp *);
136 static int igmp_input_v3_query(struct ifnet *, const struct ip *,
137 /*const*/ struct igmpv3 *);
138 static int igmp_input_v3_group_query(struct in_multi *,
139 int, /*const*/ struct igmpv3 *);
140 static int igmp_input_v1_report(struct ifnet *, struct mbuf *,
141 /*const*/ struct ip *, /*const*/ struct igmp *);
142 static int igmp_input_v2_report(struct ifnet *, struct mbuf *,
143 /*const*/ struct ip *, /*const*/ struct igmp *);
144 static void igmp_sendpkt(struct mbuf *);
145 static __inline__ int igmp_isgroupreported(const struct in_addr);
146 static struct mbuf *igmp_ra_alloc(void);
147 #ifdef IGMP_DEBUG
148 static const char *igmp_rec_type_to_str(const int);
149 #endif
150 static uint32_t igmp_set_version(struct igmp_ifinfo *, const int);
151 static void igmp_flush_relq(struct igmp_ifinfo *,
152 struct igmp_inm_relhead *);
153 static int igmp_v1v2_queue_report(struct in_multi *, const int);
154 static void igmp_v1v2_process_group_timer(struct in_multi *, const int);
155 static void igmp_v1v2_process_querier_timers(struct igmp_ifinfo *);
156 static uint32_t igmp_v2_update_group(struct in_multi *, const int);
157 static void igmp_v3_cancel_link_timers(struct igmp_ifinfo *);
158 static uint32_t igmp_v3_dispatch_general_query(struct igmp_ifinfo *);
159 static struct mbuf *
160 igmp_v3_encap_report(struct ifnet *, struct mbuf *);
161 static int igmp_v3_enqueue_group_record(struct ifqueue *,
162 struct in_multi *, const int, const int, const int);
163 static int igmp_v3_enqueue_filter_change(struct ifqueue *,
164 struct in_multi *);
165 static void igmp_v3_process_group_timers(struct igmp_ifinfo *,
166 struct ifqueue *, struct ifqueue *, struct in_multi *,
167 const unsigned int);
168 static int igmp_v3_merge_state_changes(struct in_multi *,
169 struct ifqueue *);
170 static void igmp_v3_suppress_group_record(struct in_multi *);
171 static int sysctl_igmp_ifinfo SYSCTL_HANDLER_ARGS;
172 static int sysctl_igmp_gsr SYSCTL_HANDLER_ARGS;
173 static int sysctl_igmp_default_version SYSCTL_HANDLER_ARGS;
174
175 static int igmp_timeout_run; /* IGMP timer is scheduled to run */
176 static void igmp_timeout(void *);
177 static void igmp_sched_timeout(bool);
178
179 static struct mbuf *m_raopt; /* Router Alert option */
180
181 static int querier_present_timers_running; /* IGMPv1/v2 older version
182 * querier present */
183 static int interface_timers_running; /* IGMPv3 general
184 * query response */
185 static int state_change_timers_running; /* IGMPv3 state-change
186 * retransmit */
187 static int current_state_timers_running; /* IGMPv1/v2 host
188 * report; IGMPv3 g/sg
189 * query response */
190
191 /*
192 * Subsystem lock macros.
193 */
194 #define IGMP_LOCK() \
195 lck_mtx_lock(&igmp_mtx)
196 #define IGMP_LOCK_ASSERT_HELD() \
197 LCK_MTX_ASSERT(&igmp_mtx, LCK_MTX_ASSERT_OWNED)
198 #define IGMP_LOCK_ASSERT_NOTHELD() \
199 LCK_MTX_ASSERT(&igmp_mtx, LCK_MTX_ASSERT_NOTOWNED)
200 #define IGMP_UNLOCK() \
201 lck_mtx_unlock(&igmp_mtx)
202
203 static LIST_HEAD(, igmp_ifinfo) igi_head;
204 static struct igmpstat_v3 igmpstat_v3 = {
205 .igps_version = IGPS_VERSION_3,
206 .igps_len = sizeof(struct igmpstat_v3),
207 };
208 static struct igmpstat igmpstat; /* old IGMPv2 stats structure */
209 static struct timeval igmp_gsrdelay = {.tv_sec = 10, .tv_usec = 0};
210
211 static int igmp_recvifkludge = 1;
212 static int igmp_sendra = 1;
213 static int igmp_sendlocal = 1;
214 static int igmp_v1enable = 1;
215 static int igmp_v2enable = 1;
216 static int igmp_legacysupp = 0;
217 static int igmp_default_version = IGMP_VERSION_3;
218
219 SYSCTL_STRUCT(_net_inet_igmp, IGMPCTL_STATS, stats, CTLFLAG_RD | CTLFLAG_LOCKED,
220 &igmpstat, igmpstat, "");
221 SYSCTL_STRUCT(_net_inet_igmp, OID_AUTO, v3stats,
222 CTLFLAG_RD | CTLFLAG_LOCKED, &igmpstat_v3, igmpstat_v3, "");
223 SYSCTL_INT(_net_inet_igmp, OID_AUTO, recvifkludge, CTLFLAG_RW | CTLFLAG_LOCKED,
224 &igmp_recvifkludge, 0,
225 "Rewrite IGMPv1/v2 reports from 0.0.0.0 to contain subnet address");
226 SYSCTL_INT(_net_inet_igmp, OID_AUTO, sendra, CTLFLAG_RW | CTLFLAG_LOCKED,
227 &igmp_sendra, 0,
228 "Send IP Router Alert option in IGMPv2/v3 messages");
229 SYSCTL_INT(_net_inet_igmp, OID_AUTO, sendlocal, CTLFLAG_RW | CTLFLAG_LOCKED,
230 &igmp_sendlocal, 0,
231 "Send IGMP membership reports for 224.0.0.0/24 groups");
232 SYSCTL_INT(_net_inet_igmp, OID_AUTO, v1enable, CTLFLAG_RW | CTLFLAG_LOCKED,
233 &igmp_v1enable, 0,
234 "Enable backwards compatibility with IGMPv1");
235 SYSCTL_INT(_net_inet_igmp, OID_AUTO, v2enable, CTLFLAG_RW | CTLFLAG_LOCKED,
236 &igmp_v2enable, 0,
237 "Enable backwards compatibility with IGMPv2");
238 SYSCTL_INT(_net_inet_igmp, OID_AUTO, legacysupp, CTLFLAG_RW | CTLFLAG_LOCKED,
239 &igmp_legacysupp, 0,
240 "Allow v1/v2 reports to suppress v3 group responses");
241 SYSCTL_PROC(_net_inet_igmp, OID_AUTO, default_version,
242 CTLTYPE_INT | CTLFLAG_RW,
243 &igmp_default_version, 0, sysctl_igmp_default_version, "I",
244 "Default version of IGMP to run on each interface");
245 SYSCTL_PROC(_net_inet_igmp, OID_AUTO, gsrdelay,
246 CTLTYPE_INT | CTLFLAG_RW,
247 &igmp_gsrdelay.tv_sec, 0, sysctl_igmp_gsr, "I",
248 "Rate limit for IGMPv3 Group-and-Source queries in seconds");
249 #ifdef IGMP_DEBUG
250 int igmp_debug = 0;
251 SYSCTL_INT(_net_inet_igmp, OID_AUTO,
252 debug, CTLFLAG_RW | CTLFLAG_LOCKED, &igmp_debug, 0, "");
253 #endif
254
255 SYSCTL_NODE(_net_inet_igmp, OID_AUTO, ifinfo, CTLFLAG_RD | CTLFLAG_LOCKED,
256 sysctl_igmp_ifinfo, "Per-interface IGMPv3 state");
257
258 /* Lock group and attribute for igmp_mtx */
259 static LCK_ATTR_DECLARE(igmp_mtx_attr, 0, 0);
260 static LCK_GRP_DECLARE(igmp_mtx_grp, "igmp_mtx");
261
262 /*
263 * Locking and reference counting:
264 *
265 * igmp_mtx mainly protects igi_head. In cases where both igmp_mtx and
266 * in_multihead_lock must be held, the former must be acquired first in order
267 * to maintain lock ordering. It is not a requirement that igmp_mtx be
268 * acquired first before in_multihead_lock, but in case both must be acquired
269 * in succession, the correct lock ordering must be followed.
270 *
271 * Instead of walking the if_multiaddrs list at the interface and returning
272 * the ifma_protospec value of a matching entry, we search the global list
273 * of in_multi records and find it that way; this is done with in_multihead
274 * lock held. Doing so avoids the race condition issues that many other BSDs
275 * suffer from (therefore in our implementation, ifma_protospec will never be
276 * NULL for as long as the in_multi is valid.)
277 *
278 * The above creates a requirement for the in_multi to stay in in_multihead
279 * list even after the final IGMP leave (in IGMPv3 mode) until no longer needs
280 * be retransmitted (this is not required for IGMPv1/v2.) In order to handle
281 * this, the request and reference counts of the in_multi are bumped up when
282 * the state changes to IGMP_LEAVING_MEMBER, and later dropped in the timeout
283 * handler. Each in_multi holds a reference to the underlying igmp_ifinfo.
284 *
285 * Thus, the permitted lock oder is:
286 *
287 * igmp_mtx, in_multihead_lock, inm_lock, igi_lock
288 *
289 * Any may be taken independently, but if any are held at the same time,
290 * the above lock order must be followed.
291 */
292 static LCK_MTX_DECLARE_ATTR(igmp_mtx, &igmp_mtx_grp, &igmp_mtx_attr);
293 static int igmp_timers_are_running;
294
295 #define IGMP_ADD_DETACHED_INM(_head, _inm) { \
296 SLIST_INSERT_HEAD(_head, _inm, inm_dtle); \
297 }
298
299 #define IGMP_REMOVE_DETACHED_INM(_head) { \
300 struct in_multi *_inm, *_inm_tmp; \
301 SLIST_FOREACH_SAFE(_inm, _head, inm_dtle, _inm_tmp) { \
302 SLIST_REMOVE(_head, _inm, in_multi, inm_dtle); \
303 INM_REMREF(_inm); \
304 } \
305 VERIFY(SLIST_EMPTY(_head)); \
306 }
307
308 static ZONE_DEFINE(igi_zone, "igmp_ifinfo",
309 sizeof(struct igmp_ifinfo), ZC_ZFREE_CLEARMEM);
310
311 /* Store IGMPv3 record count in the module private scratch space */
312 #define vt_nrecs pkt_mpriv.__mpriv_u.__mpriv32[0].__mpriv32_u.__val16[0]
313
314 static __inline void
igmp_save_context(struct mbuf * m,struct ifnet * ifp)315 igmp_save_context(struct mbuf *m, struct ifnet *ifp)
316 {
317 m->m_pkthdr.rcvif = ifp;
318 }
319
320 static __inline void
igmp_scrub_context(struct mbuf * m)321 igmp_scrub_context(struct mbuf *m)
322 {
323 m->m_pkthdr.rcvif = NULL;
324 }
325
326 #ifdef IGMP_DEBUG
327 static __inline const char *
inet_ntop_haddr(in_addr_t haddr,char * buf,socklen_t size)328 inet_ntop_haddr(in_addr_t haddr, char *buf, socklen_t size)
329 {
330 struct in_addr ia;
331
332 ia.s_addr = htonl(haddr);
333 return inet_ntop(AF_INET, &ia, buf, size);
334 }
335 #endif
336
337 /*
338 * Restore context from a queued IGMP output chain.
339 * Return saved ifp.
340 */
341 static __inline struct ifnet *
igmp_restore_context(struct mbuf * m)342 igmp_restore_context(struct mbuf *m)
343 {
344 return m->m_pkthdr.rcvif;
345 }
346
347 /*
348 * Retrieve or set default IGMP version.
349 */
350 static int
351 sysctl_igmp_default_version SYSCTL_HANDLER_ARGS
352 {
353 #pragma unused(oidp, arg2)
354 int error;
355 int new;
356
357 IGMP_LOCK();
358
359 error = SYSCTL_OUT(req, arg1, sizeof(int));
360 if (error || !req->newptr) {
361 goto out_locked;
362 }
363
364 new = igmp_default_version;
365
366 error = SYSCTL_IN(req, &new, sizeof(int));
367 if (error) {
368 goto out_locked;
369 }
370
371 if (new < IGMP_VERSION_1 || new > IGMP_VERSION_3) {
372 error = EINVAL;
373 goto out_locked;
374 }
375
376 IGMP_PRINTF(("%s: change igmp_default_version from %d to %d\n",
377 __func__, igmp_default_version, new));
378
379 igmp_default_version = new;
380
381 out_locked:
382 IGMP_UNLOCK();
383 return error;
384 }
385
386 /*
387 * Retrieve or set threshold between group-source queries in seconds.
388 *
389 */
390 static int
391 sysctl_igmp_gsr SYSCTL_HANDLER_ARGS
392 {
393 #pragma unused(arg1, arg2)
394 int error;
395 int i;
396
397 IGMP_LOCK();
398
399 i = (int)igmp_gsrdelay.tv_sec;
400
401 error = sysctl_handle_int(oidp, &i, 0, req);
402 if (error || !req->newptr) {
403 goto out_locked;
404 }
405
406 if (i < -1 || i >= 60) {
407 error = EINVAL;
408 goto out_locked;
409 }
410
411 igmp_gsrdelay.tv_sec = i;
412
413 out_locked:
414 IGMP_UNLOCK();
415 return error;
416 }
417
418 /*
419 * Expose struct igmp_ifinfo to userland, keyed by ifindex.
420 * For use by ifmcstat(8).
421 *
422 */
423 static int
424 sysctl_igmp_ifinfo SYSCTL_HANDLER_ARGS
425 {
426 #pragma unused(oidp)
427 int *name;
428 int error;
429 u_int namelen;
430 struct ifnet *ifp;
431 struct igmp_ifinfo *igi;
432 struct igmp_ifinfo_u igi_u;
433
434 name = (int *)arg1;
435 namelen = arg2;
436
437 if (req->newptr != USER_ADDR_NULL) {
438 return EPERM;
439 }
440
441 if (namelen != 1) {
442 return EINVAL;
443 }
444
445 IGMP_LOCK();
446
447 if (name[0] <= 0 || name[0] > (u_int)if_index) {
448 error = ENOENT;
449 goto out_locked;
450 }
451
452 error = ENOENT;
453
454 ifnet_head_lock_shared();
455 ifp = ifindex2ifnet[name[0]];
456 ifnet_head_done();
457 if (ifp == NULL) {
458 goto out_locked;
459 }
460
461 bzero(&igi_u, sizeof(igi_u));
462
463 LIST_FOREACH(igi, &igi_head, igi_link) {
464 IGI_LOCK(igi);
465 if (ifp != igi->igi_ifp) {
466 IGI_UNLOCK(igi);
467 continue;
468 }
469 igi_u.igi_ifindex = igi->igi_ifp->if_index;
470 igi_u.igi_version = igi->igi_version;
471 igi_u.igi_v1_timer = igi->igi_v1_timer;
472 igi_u.igi_v2_timer = igi->igi_v2_timer;
473 igi_u.igi_v3_timer = igi->igi_v3_timer;
474 igi_u.igi_flags = igi->igi_flags;
475 igi_u.igi_rv = igi->igi_rv;
476 igi_u.igi_qi = igi->igi_qi;
477 igi_u.igi_qri = igi->igi_qri;
478 igi_u.igi_uri = igi->igi_uri;
479 IGI_UNLOCK(igi);
480
481 error = SYSCTL_OUT(req, &igi_u, sizeof(igi_u));
482 break;
483 }
484
485 out_locked:
486 IGMP_UNLOCK();
487 return error;
488 }
489
490 /*
491 * Dispatch an entire queue of pending packet chains
492 *
493 * Must not be called with inm_lock held.
494 */
495 static void
igmp_dispatch_queue(struct igmp_ifinfo * igi,struct ifqueue * ifq,int limit,const int loop)496 igmp_dispatch_queue(struct igmp_ifinfo *igi, struct ifqueue *ifq, int limit,
497 const int loop)
498 {
499 struct mbuf *m;
500 struct ip *ip;
501
502 if (igi != NULL) {
503 IGI_LOCK_ASSERT_HELD(igi);
504 }
505
506 #if SKYWALK
507 /*
508 * Since this function is called holding the igi lock, we need to ensure we
509 * don't enter the driver directly because a deadlock can happen if another
510 * thread holding the workloop lock tries to acquire the igi lock at
511 * the same time.
512 */
513 sk_protect_t protect = sk_async_transmit_protect();
514 #endif /* SKYWALK */
515
516 for (;;) {
517 IF_DEQUEUE(ifq, m);
518 if (m == NULL) {
519 break;
520 }
521 IGMP_PRINTF(("%s: dispatch 0x%llx from 0x%llx\n", __func__,
522 (uint64_t)VM_KERNEL_ADDRPERM(ifq),
523 (uint64_t)VM_KERNEL_ADDRPERM(m)));
524 ip = mtod(m, struct ip *);
525 if (loop) {
526 m->m_flags |= M_IGMP_LOOP;
527 }
528 if (igi != NULL) {
529 IGI_UNLOCK(igi);
530 }
531 igmp_sendpkt(m);
532 if (igi != NULL) {
533 IGI_LOCK(igi);
534 }
535 if (--limit == 0) {
536 break;
537 }
538 }
539
540 #if SKYWALK
541 sk_async_transmit_unprotect(protect);
542 #endif /* SKYWALK */
543
544 if (igi != NULL) {
545 IGI_LOCK_ASSERT_HELD(igi);
546 }
547 }
548
549 /*
550 * Filter outgoing IGMP report state by group.
551 *
552 * Reports are ALWAYS suppressed for ALL-HOSTS (224.0.0.1).
553 * If the net.inet.igmp.sendlocal sysctl is 0, then IGMP reports are
554 * disabled for all groups in the 224.0.0.0/24 link-local scope. However,
555 * this may break certain IGMP snooping switches which rely on the old
556 * report behaviour.
557 *
558 * Return zero if the given group is one for which IGMP reports
559 * should be suppressed, or non-zero if reports should be issued.
560 */
561
562 static __inline__
563 int
igmp_isgroupreported(const struct in_addr addr)564 igmp_isgroupreported(const struct in_addr addr)
565 {
566 if (in_allhosts(addr) ||
567 ((!igmp_sendlocal && IN_LOCAL_GROUP(ntohl(addr.s_addr))))) {
568 return 0;
569 }
570
571 return 1;
572 }
573
574 /*
575 * Construct a Router Alert option to use in outgoing packets.
576 */
577 static struct mbuf *
igmp_ra_alloc(void)578 igmp_ra_alloc(void)
579 {
580 struct mbuf *m;
581 struct ipoption *p;
582
583 MGET(m, M_WAITOK, MT_DATA);
584 p = mtod(m, struct ipoption *);
585 p->ipopt_dst.s_addr = INADDR_ANY;
586 p->ipopt_list[0] = (char)IPOPT_RA; /* Router Alert Option */
587 p->ipopt_list[1] = 0x04; /* 4 bytes long */
588 p->ipopt_list[2] = IPOPT_EOL; /* End of IP option list */
589 p->ipopt_list[3] = 0x00; /* pad byte */
590 m->m_len = sizeof(p->ipopt_dst) + p->ipopt_list[1];
591
592 return m;
593 }
594
595 /*
596 * Attach IGMP when PF_INET is attached to an interface.
597 */
598 struct igmp_ifinfo *
igmp_domifattach(struct ifnet * ifp,zalloc_flags_t how)599 igmp_domifattach(struct ifnet *ifp, zalloc_flags_t how)
600 {
601 struct igmp_ifinfo *igi;
602
603 IGMP_PRINTF(("%s: called for ifp 0x%llx(%s)\n",
604 __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), ifp->if_name));
605
606 igi = igi_alloc(how);
607 if (igi == NULL) {
608 return NULL;
609 }
610
611 IGMP_LOCK();
612
613 IGI_LOCK(igi);
614 igi_initvar(igi, ifp, 0);
615 igi->igi_debug |= IFD_ATTACHED;
616 IGI_ADDREF_LOCKED(igi); /* hold a reference for igi_head */
617 IGI_ADDREF_LOCKED(igi); /* hold a reference for caller */
618 IGI_UNLOCK(igi);
619 ifnet_lock_shared(ifp);
620 igmp_initsilent(ifp, igi);
621 ifnet_lock_done(ifp);
622
623 LIST_INSERT_HEAD(&igi_head, igi, igi_link);
624
625 IGMP_UNLOCK();
626
627 IGMP_PRINTF(("%s: allocate igmp_ifinfo for ifp 0x%llx(%s)\n", __func__,
628 (uint64_t)VM_KERNEL_ADDRPERM(ifp), ifp->if_name));
629
630 return igi;
631 }
632
633 /*
634 * Attach IGMP when PF_INET is reattached to an interface. Caller is
635 * expected to have an outstanding reference to the igi.
636 */
637 void
igmp_domifreattach(struct igmp_ifinfo * igi)638 igmp_domifreattach(struct igmp_ifinfo *igi)
639 {
640 struct ifnet *ifp;
641
642 IGMP_LOCK();
643
644 IGI_LOCK(igi);
645 VERIFY(!(igi->igi_debug & IFD_ATTACHED));
646 ifp = igi->igi_ifp;
647 VERIFY(ifp != NULL);
648 igi_initvar(igi, ifp, 1);
649 igi->igi_debug |= IFD_ATTACHED;
650 IGI_ADDREF_LOCKED(igi); /* hold a reference for igi_head */
651 IGI_UNLOCK(igi);
652 ifnet_lock_shared(ifp);
653 igmp_initsilent(ifp, igi);
654 ifnet_lock_done(ifp);
655
656 LIST_INSERT_HEAD(&igi_head, igi, igi_link);
657
658 IGMP_UNLOCK();
659
660 IGMP_PRINTF(("%s: reattached igmp_ifinfo for ifp 0x%llx(%s)\n",
661 __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), ifp->if_name));
662 }
663
664 /*
665 * Hook for domifdetach.
666 */
667 void
igmp_domifdetach(struct ifnet * ifp)668 igmp_domifdetach(struct ifnet *ifp)
669 {
670 SLIST_HEAD(, in_multi) inm_dthead;
671
672 SLIST_INIT(&inm_dthead);
673
674 IGMP_PRINTF(("%s: called for ifp 0x%llx(%s%d)\n", __func__,
675 (uint64_t)VM_KERNEL_ADDRPERM(ifp), ifp->if_name, ifp->if_unit));
676
677 IGMP_LOCK();
678 igi_delete(ifp, (struct igmp_inm_relhead *)&inm_dthead);
679 IGMP_UNLOCK();
680
681 /* Now that we're dropped all locks, release detached records */
682 IGMP_REMOVE_DETACHED_INM(&inm_dthead);
683 }
684
685 /*
686 * Called at interface detach time. Note that we only flush all deferred
687 * responses and record releases; all remaining inm records and their source
688 * entries related to this interface are left intact, in order to handle
689 * the reattach case.
690 */
691 static void
igi_delete(const struct ifnet * ifp,struct igmp_inm_relhead * inm_dthead)692 igi_delete(const struct ifnet *ifp, struct igmp_inm_relhead *inm_dthead)
693 {
694 struct igmp_ifinfo *igi, *tigi;
695
696 IGMP_LOCK_ASSERT_HELD();
697
698 LIST_FOREACH_SAFE(igi, &igi_head, igi_link, tigi) {
699 IGI_LOCK(igi);
700 if (igi->igi_ifp == ifp) {
701 /*
702 * Free deferred General Query responses.
703 */
704 IF_DRAIN(&igi->igi_gq);
705 IF_DRAIN(&igi->igi_v2q);
706 igmp_flush_relq(igi, inm_dthead);
707 VERIFY(SLIST_EMPTY(&igi->igi_relinmhead));
708 igi->igi_debug &= ~IFD_ATTACHED;
709 IGI_UNLOCK(igi);
710
711 LIST_REMOVE(igi, igi_link);
712 IGI_REMREF(igi); /* release igi_head reference */
713 return;
714 }
715 IGI_UNLOCK(igi);
716 }
717 panic("%s: igmp_ifinfo not found for ifp %p(%s)", __func__,
718 ifp, ifp->if_xname);
719 }
720
721 __private_extern__ void
igmp_initsilent(struct ifnet * ifp,struct igmp_ifinfo * igi)722 igmp_initsilent(struct ifnet *ifp, struct igmp_ifinfo *igi)
723 {
724 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
725
726 IGI_LOCK_ASSERT_NOTHELD(igi);
727 IGI_LOCK(igi);
728 if (!(ifp->if_flags & IFF_MULTICAST)) {
729 igi->igi_flags |= IGIF_SILENT;
730 } else {
731 igi->igi_flags &= ~IGIF_SILENT;
732 }
733 IGI_UNLOCK(igi);
734 }
735
736 static void
igi_initvar(struct igmp_ifinfo * igi,struct ifnet * ifp,int reattach)737 igi_initvar(struct igmp_ifinfo *igi, struct ifnet *ifp, int reattach)
738 {
739 IGI_LOCK_ASSERT_HELD(igi);
740
741 igi->igi_ifp = ifp;
742 igi->igi_version = igmp_default_version;
743 igi->igi_flags = 0;
744 igi->igi_rv = IGMP_RV_INIT;
745 igi->igi_qi = IGMP_QI_INIT;
746 igi->igi_qri = IGMP_QRI_INIT;
747 igi->igi_uri = IGMP_URI_INIT;
748
749 if (!reattach) {
750 SLIST_INIT(&igi->igi_relinmhead);
751 }
752
753 /*
754 * Responses to general queries are subject to bounds.
755 */
756 igi->igi_gq.ifq_maxlen = IGMP_MAX_RESPONSE_PACKETS;
757 igi->igi_v2q.ifq_maxlen = IGMP_MAX_RESPONSE_PACKETS;
758 }
759
760 static struct igmp_ifinfo *
igi_alloc(zalloc_flags_t how)761 igi_alloc(zalloc_flags_t how)
762 {
763 struct igmp_ifinfo *igi = zalloc_flags(igi_zone, how | Z_ZERO);
764 if (igi != NULL) {
765 lck_mtx_init(&igi->igi_lock, &igmp_mtx_grp, &igmp_mtx_attr);
766 igi->igi_debug |= IFD_ALLOC;
767 }
768 return igi;
769 }
770
771 static void
igi_free(struct igmp_ifinfo * igi)772 igi_free(struct igmp_ifinfo *igi)
773 {
774 IGI_LOCK(igi);
775 if (igi->igi_debug & IFD_ATTACHED) {
776 panic("%s: attached igi=%p is being freed", __func__, igi);
777 /* NOTREACHED */
778 } else if (igi->igi_ifp != NULL) {
779 panic("%s: ifp not NULL for igi=%p", __func__, igi);
780 /* NOTREACHED */
781 } else if (!(igi->igi_debug & IFD_ALLOC)) {
782 panic("%s: igi %p cannot be freed", __func__, igi);
783 /* NOTREACHED */
784 } else if (igi->igi_refcnt != 0) {
785 panic("%s: non-zero refcnt igi=%p", __func__, igi);
786 /* NOTREACHED */
787 }
788 igi->igi_debug &= ~IFD_ALLOC;
789 IGI_UNLOCK(igi);
790
791 lck_mtx_destroy(&igi->igi_lock, &igmp_mtx_grp);
792 zfree(igi_zone, igi);
793 }
794
795 void
igi_addref(struct igmp_ifinfo * igi,int locked)796 igi_addref(struct igmp_ifinfo *igi, int locked)
797 {
798 if (!locked) {
799 IGI_LOCK_SPIN(igi);
800 } else {
801 IGI_LOCK_ASSERT_HELD(igi);
802 }
803
804 if (++igi->igi_refcnt == 0) {
805 panic("%s: igi=%p wraparound refcnt", __func__, igi);
806 /* NOTREACHED */
807 }
808 if (!locked) {
809 IGI_UNLOCK(igi);
810 }
811 }
812
813 void
igi_remref(struct igmp_ifinfo * igi)814 igi_remref(struct igmp_ifinfo *igi)
815 {
816 SLIST_HEAD(, in_multi) inm_dthead;
817 struct ifnet *ifp;
818
819 IGI_LOCK_SPIN(igi);
820
821 if (igi->igi_refcnt == 0) {
822 panic("%s: igi=%p negative refcnt", __func__, igi);
823 /* NOTREACHED */
824 }
825
826 --igi->igi_refcnt;
827 if (igi->igi_refcnt > 0) {
828 IGI_UNLOCK(igi);
829 return;
830 }
831
832 ifp = igi->igi_ifp;
833 igi->igi_ifp = NULL;
834 IF_DRAIN(&igi->igi_gq);
835 IF_DRAIN(&igi->igi_v2q);
836 SLIST_INIT(&inm_dthead);
837 igmp_flush_relq(igi, (struct igmp_inm_relhead *)&inm_dthead);
838 VERIFY(SLIST_EMPTY(&igi->igi_relinmhead));
839 IGI_UNLOCK(igi);
840
841 /* Now that we're dropped all locks, release detached records */
842 IGMP_REMOVE_DETACHED_INM(&inm_dthead);
843
844 IGMP_PRINTF(("%s: freeing igmp_ifinfo for ifp 0x%llx(%s)\n",
845 __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
846
847 igi_free(igi);
848 }
849
850 /*
851 * Process a received IGMPv1 query.
852 * Return non-zero if the message should be dropped.
853 */
854 static int
igmp_input_v1_query(struct ifnet * ifp,const struct ip * ip,const struct igmp * igmp)855 igmp_input_v1_query(struct ifnet *ifp, const struct ip *ip,
856 const struct igmp *igmp)
857 {
858 struct igmp_ifinfo *igi;
859 struct in_multi *inm;
860 struct in_multistep step;
861 struct igmp_tparams itp = { .qpt = 0, .it = 0, .cst = 0, .sct = 0 };
862
863 IGMP_LOCK_ASSERT_NOTHELD();
864
865 /*
866 * IGMPv1 Host Membership Queries SHOULD always be addressed to
867 * 224.0.0.1. They are always treated as General Queries.
868 * igmp_group is always ignored. Do not drop it as a userland
869 * daemon may wish to see it.
870 */
871 if (!in_allhosts(ip->ip_dst) || !in_nullhost(igmp->igmp_group)) {
872 IGMPSTAT_INC(igps_rcv_badqueries);
873 OIGMPSTAT_INC(igps_rcv_badqueries);
874 goto done;
875 }
876 IGMPSTAT_INC(igps_rcv_gen_queries);
877
878 igi = IGMP_IFINFO(ifp);
879 VERIFY(igi != NULL);
880
881 IGI_LOCK(igi);
882 if (igi->igi_flags & IGIF_LOOPBACK) {
883 IGMP_PRINTF(("%s: ignore v1 query on IGIF_LOOPBACK "
884 "ifp 0x%llx(%s)\n", __func__,
885 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
886 IGI_UNLOCK(igi);
887 goto done;
888 }
889 /*
890 * Switch to IGMPv1 host compatibility mode.
891 */
892 itp.qpt = igmp_set_version(igi, IGMP_VERSION_1);
893 IGI_UNLOCK(igi);
894
895 IGMP_PRINTF(("%s: process v1 query on ifp 0x%llx(%s)\n", __func__,
896 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
897
898 /*
899 * Start the timers in all of our group records
900 * for the interface on which the query arrived,
901 * except those which are already running.
902 */
903 in_multihead_lock_shared();
904 IN_FIRST_MULTI(step, inm);
905 while (inm != NULL) {
906 INM_LOCK(inm);
907 if (inm->inm_ifp != ifp || inm->inm_timer != 0) {
908 goto next;
909 }
910
911 switch (inm->inm_state) {
912 case IGMP_NOT_MEMBER:
913 case IGMP_SILENT_MEMBER:
914 break;
915 case IGMP_G_QUERY_PENDING_MEMBER:
916 case IGMP_SG_QUERY_PENDING_MEMBER:
917 case IGMP_REPORTING_MEMBER:
918 case IGMP_IDLE_MEMBER:
919 case IGMP_LAZY_MEMBER:
920 case IGMP_SLEEPING_MEMBER:
921 case IGMP_AWAKENING_MEMBER:
922 inm->inm_state = IGMP_REPORTING_MEMBER;
923 inm->inm_timer = IGMP_RANDOM_DELAY(IGMP_V1V2_MAX_RI);
924 itp.cst = 1;
925 break;
926 case IGMP_LEAVING_MEMBER:
927 break;
928 }
929 next:
930 INM_UNLOCK(inm);
931 IN_NEXT_MULTI(step, inm);
932 }
933 in_multihead_lock_done();
934 done:
935 igmp_set_timeout(&itp);
936
937 return 0;
938 }
939
940 /*
941 * Process a received IGMPv2 general or group-specific query.
942 */
943 static int
igmp_input_v2_query(struct ifnet * ifp,const struct ip * ip,const struct igmp * igmp)944 igmp_input_v2_query(struct ifnet *ifp, const struct ip *ip,
945 const struct igmp *igmp)
946 {
947 struct igmp_ifinfo *igi;
948 struct in_multi *inm;
949 int is_general_query;
950 uint16_t timer;
951 struct igmp_tparams itp = { .qpt = 0, .it = 0, .cst = 0, .sct = 0 };
952
953 IGMP_LOCK_ASSERT_NOTHELD();
954
955 is_general_query = 0;
956
957 /*
958 * Validate address fields upfront.
959 */
960 if (in_nullhost(igmp->igmp_group)) {
961 /*
962 * IGMPv2 General Query.
963 * If this was not sent to the all-hosts group, ignore it.
964 */
965 if (!in_allhosts(ip->ip_dst)) {
966 goto done;
967 }
968 IGMPSTAT_INC(igps_rcv_gen_queries);
969 is_general_query = 1;
970 } else {
971 /* IGMPv2 Group-Specific Query. */
972 IGMPSTAT_INC(igps_rcv_group_queries);
973 }
974
975 igi = IGMP_IFINFO(ifp);
976 VERIFY(igi != NULL);
977
978 IGI_LOCK(igi);
979 if (igi->igi_flags & IGIF_LOOPBACK) {
980 IGMP_PRINTF(("%s: ignore v2 query on IGIF_LOOPBACK "
981 "ifp 0x%llx(%s)\n", __func__,
982 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
983 IGI_UNLOCK(igi);
984 goto done;
985 }
986 /*
987 * Ignore v2 query if in v1 Compatibility Mode.
988 */
989 if (igi->igi_version == IGMP_VERSION_1) {
990 IGI_UNLOCK(igi);
991 goto done;
992 }
993 itp.qpt = igmp_set_version(igi, IGMP_VERSION_2);
994 IGI_UNLOCK(igi);
995
996 timer = igmp->igmp_code / IGMP_TIMER_SCALE;
997 if (timer == 0) {
998 timer = 1;
999 }
1000
1001 if (is_general_query) {
1002 struct in_multistep step;
1003
1004 IGMP_PRINTF(("%s: process v2 general query on ifp 0x%llx(%s)\n",
1005 __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1006 /*
1007 * For each reporting group joined on this
1008 * interface, kick the report timer.
1009 */
1010 in_multihead_lock_shared();
1011 IN_FIRST_MULTI(step, inm);
1012 while (inm != NULL) {
1013 INM_LOCK(inm);
1014 if (inm->inm_ifp == ifp) {
1015 itp.cst += igmp_v2_update_group(inm, timer);
1016 }
1017 INM_UNLOCK(inm);
1018 IN_NEXT_MULTI(step, inm);
1019 }
1020 in_multihead_lock_done();
1021 } else {
1022 /*
1023 * Group-specific IGMPv2 query, we need only
1024 * look up the single group to process it.
1025 */
1026 in_multihead_lock_shared();
1027 IN_LOOKUP_MULTI(&igmp->igmp_group, ifp, inm);
1028 in_multihead_lock_done();
1029 if (inm != NULL) {
1030 INM_LOCK(inm);
1031 IGMP_INET_PRINTF(igmp->igmp_group,
1032 ("process v2 query %s on ifp 0x%llx(%s)\n",
1033 _igmp_inet_buf,
1034 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1035 itp.cst = igmp_v2_update_group(inm, timer);
1036 INM_UNLOCK(inm);
1037 INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1038 }
1039 }
1040 done:
1041 igmp_set_timeout(&itp);
1042
1043 return 0;
1044 }
1045
1046 /*
1047 * Update the report timer on a group in response to an IGMPv2 query.
1048 *
1049 * If we are becoming the reporting member for this group, start the timer.
1050 * If we already are the reporting member for this group, and timer is
1051 * below the threshold, reset it.
1052 *
1053 * We may be updating the group for the first time since we switched
1054 * to IGMPv3. If we are, then we must clear any recorded source lists,
1055 * and transition to REPORTING state; the group timer is overloaded
1056 * for group and group-source query responses.
1057 *
1058 * Unlike IGMPv3, the delay per group should be jittered
1059 * to avoid bursts of IGMPv2 reports.
1060 */
1061 static uint32_t
igmp_v2_update_group(struct in_multi * inm,const int timer)1062 igmp_v2_update_group(struct in_multi *inm, const int timer)
1063 {
1064 IGMP_INET_PRINTF(inm->inm_addr, ("%s: %s/%s timer=%d\n",
1065 __func__, _igmp_inet_buf, if_name(inm->inm_ifp),
1066 timer));
1067
1068 INM_LOCK_ASSERT_HELD(inm);
1069
1070 switch (inm->inm_state) {
1071 case IGMP_NOT_MEMBER:
1072 case IGMP_SILENT_MEMBER:
1073 break;
1074 case IGMP_REPORTING_MEMBER:
1075 if (inm->inm_timer != 0 &&
1076 inm->inm_timer <= timer) {
1077 IGMP_PRINTF(("%s: REPORTING and timer running, "
1078 "skipping.\n", __func__));
1079 break;
1080 }
1081 OS_FALLTHROUGH;
1082 case IGMP_SG_QUERY_PENDING_MEMBER:
1083 case IGMP_G_QUERY_PENDING_MEMBER:
1084 case IGMP_IDLE_MEMBER:
1085 case IGMP_LAZY_MEMBER:
1086 case IGMP_AWAKENING_MEMBER:
1087 IGMP_PRINTF(("%s: ->REPORTING\n", __func__));
1088 inm->inm_state = IGMP_REPORTING_MEMBER;
1089 inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1090 break;
1091 case IGMP_SLEEPING_MEMBER:
1092 IGMP_PRINTF(("%s: ->AWAKENING\n", __func__));
1093 inm->inm_state = IGMP_AWAKENING_MEMBER;
1094 break;
1095 case IGMP_LEAVING_MEMBER:
1096 break;
1097 }
1098
1099 return inm->inm_timer;
1100 }
1101
1102 /*
1103 * Process a received IGMPv3 general, group-specific or
1104 * group-and-source-specific query.
1105 * Assumes m has already been pulled up to the full IGMP message length.
1106 * Return 0 if successful, otherwise an appropriate error code is returned.
1107 */
1108 static int
igmp_input_v3_query(struct ifnet * ifp,const struct ip * ip,struct igmpv3 * igmpv3)1109 igmp_input_v3_query(struct ifnet *ifp, const struct ip *ip,
1110 /*const*/ struct igmpv3 *igmpv3)
1111 {
1112 struct igmp_ifinfo *igi;
1113 struct in_multi *inm;
1114 int is_general_query;
1115 uint32_t maxresp, nsrc, qqi;
1116 uint32_t timer;
1117 uint8_t qrv;
1118 struct igmp_tparams itp = { .qpt = 0, .it = 0, .cst = 0, .sct = 0 };
1119
1120 IGMP_LOCK_ASSERT_NOTHELD();
1121
1122 is_general_query = 0;
1123
1124 IGMP_PRINTF(("%s: process v3 query on ifp 0x%llx(%s)\n", __func__,
1125 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1126
1127 maxresp = igmpv3->igmp_code; /* in 1/10ths of a second */
1128 if (maxresp >= 128) {
1129 maxresp = IGMP_MANT(igmpv3->igmp_code) <<
1130 (IGMP_EXP(igmpv3->igmp_code) + 3);
1131 }
1132
1133 /*
1134 * Robustness must never be less than 2 for on-wire IGMPv3.
1135 * FUTURE: Check if ifp has IGIF_LOOPBACK set, as we will make
1136 * an exception for interfaces whose IGMPv3 state changes
1137 * are redirected to loopback (e.g. MANET).
1138 */
1139 qrv = IGMP_QRV(igmpv3->igmp_misc);
1140 if (qrv < 2) {
1141 IGMP_PRINTF(("%s: clamping qrv %d to %d\n", __func__,
1142 qrv, IGMP_RV_INIT));
1143 qrv = IGMP_RV_INIT;
1144 }
1145
1146 qqi = igmpv3->igmp_qqi;
1147 if (qqi >= 128) {
1148 qqi = IGMP_MANT(igmpv3->igmp_qqi) <<
1149 (IGMP_EXP(igmpv3->igmp_qqi) + 3);
1150 }
1151
1152 timer = maxresp / IGMP_TIMER_SCALE;
1153 if (timer == 0) {
1154 timer = 1;
1155 }
1156
1157 nsrc = ntohs(igmpv3->igmp_numsrc);
1158
1159 /*
1160 * Validate address fields and versions upfront before
1161 * accepting v3 query.
1162 */
1163 if (in_nullhost(igmpv3->igmp_group)) {
1164 /*
1165 * IGMPv3 General Query.
1166 *
1167 * General Queries SHOULD be directed to 224.0.0.1.
1168 * A general query with a source list has undefined
1169 * behaviour; discard it.
1170 */
1171 IGMPSTAT_INC(igps_rcv_gen_queries);
1172 if (!in_allhosts(ip->ip_dst) || nsrc > 0) {
1173 IGMPSTAT_INC(igps_rcv_badqueries);
1174 OIGMPSTAT_INC(igps_rcv_badqueries);
1175 goto done;
1176 }
1177 is_general_query = 1;
1178 } else {
1179 /* Group or group-source specific query. */
1180 if (nsrc == 0) {
1181 IGMPSTAT_INC(igps_rcv_group_queries);
1182 } else {
1183 IGMPSTAT_INC(igps_rcv_gsr_queries);
1184 }
1185 }
1186
1187 igi = IGMP_IFINFO(ifp);
1188 VERIFY(igi != NULL);
1189
1190 IGI_LOCK(igi);
1191 if (igi->igi_flags & IGIF_LOOPBACK) {
1192 IGMP_PRINTF(("%s: ignore v3 query on IGIF_LOOPBACK "
1193 "ifp 0x%llx(%s)\n", __func__,
1194 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1195 IGI_UNLOCK(igi);
1196 goto done;
1197 }
1198
1199 /*
1200 * Discard the v3 query if we're in Compatibility Mode.
1201 * The RFC is not obviously worded that hosts need to stay in
1202 * compatibility mode until the Old Version Querier Present
1203 * timer expires.
1204 */
1205 if (igi->igi_version != IGMP_VERSION_3) {
1206 IGMP_PRINTF(("%s: ignore v3 query in v%d mode on "
1207 "ifp 0x%llx(%s)\n", __func__, igi->igi_version,
1208 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1209 IGI_UNLOCK(igi);
1210 goto done;
1211 }
1212
1213 itp.qpt = igmp_set_version(igi, IGMP_VERSION_3);
1214 igi->igi_rv = qrv;
1215 igi->igi_qi = qqi;
1216 igi->igi_qri = MAX(timer, IGMP_QRI_MIN);
1217
1218 IGMP_PRINTF(("%s: qrv %d qi %d qri %d\n", __func__, igi->igi_rv,
1219 igi->igi_qi, igi->igi_qri));
1220
1221 if (is_general_query) {
1222 /*
1223 * Schedule a current-state report on this ifp for
1224 * all groups, possibly containing source lists.
1225 * If there is a pending General Query response
1226 * scheduled earlier than the selected delay, do
1227 * not schedule any other reports.
1228 * Otherwise, reset the interface timer.
1229 */
1230 IGMP_PRINTF(("%s: process v3 general query on ifp 0x%llx(%s)\n",
1231 __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1232 if (igi->igi_v3_timer == 0 || igi->igi_v3_timer >= timer) {
1233 itp.it = igi->igi_v3_timer = IGMP_RANDOM_DELAY(timer);
1234 }
1235 IGI_UNLOCK(igi);
1236 } else {
1237 IGI_UNLOCK(igi);
1238 /*
1239 * Group-source-specific queries are throttled on
1240 * a per-group basis to defeat denial-of-service attempts.
1241 * Queries for groups we are not a member of on this
1242 * link are simply ignored.
1243 */
1244 in_multihead_lock_shared();
1245 IN_LOOKUP_MULTI(&igmpv3->igmp_group, ifp, inm);
1246 in_multihead_lock_done();
1247 if (inm == NULL) {
1248 goto done;
1249 }
1250
1251 INM_LOCK(inm);
1252 if (nsrc > 0) {
1253 if (!ratecheck(&inm->inm_lastgsrtv,
1254 &igmp_gsrdelay)) {
1255 IGMP_PRINTF(("%s: GS query throttled.\n",
1256 __func__));
1257 IGMPSTAT_INC(igps_drop_gsr_queries);
1258 INM_UNLOCK(inm);
1259 INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1260 goto done;
1261 }
1262 }
1263 IGMP_INET_PRINTF(igmpv3->igmp_group,
1264 ("process v3 %s query on ifp 0x%llx(%s)\n", _igmp_inet_buf,
1265 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1266 /*
1267 * If there is a pending General Query response
1268 * scheduled sooner than the selected delay, no
1269 * further report need be scheduled.
1270 * Otherwise, prepare to respond to the
1271 * group-specific or group-and-source query.
1272 */
1273 IGI_LOCK(igi);
1274 itp.it = igi->igi_v3_timer;
1275 IGI_UNLOCK(igi);
1276 if (itp.it == 0 || itp.it >= timer) {
1277 (void) igmp_input_v3_group_query(inm, timer, igmpv3);
1278 itp.cst = inm->inm_timer;
1279 }
1280 INM_UNLOCK(inm);
1281 INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1282 }
1283 done:
1284 if (itp.it > 0) {
1285 IGMP_PRINTF(("%s: v3 general query response scheduled in "
1286 "T+%d seconds on ifp 0x%llx(%s)\n", __func__, itp.it,
1287 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1288 }
1289 igmp_set_timeout(&itp);
1290
1291 return 0;
1292 }
1293
1294 /*
1295 * Process a recieved IGMPv3 group-specific or group-and-source-specific
1296 * query.
1297 * Return <0 if any error occured. Currently this is ignored.
1298 */
1299 static int
igmp_input_v3_group_query(struct in_multi * inm,int timer,struct igmpv3 * igmpv3)1300 igmp_input_v3_group_query(struct in_multi *inm,
1301 int timer, /*const*/ struct igmpv3 *igmpv3)
1302 {
1303 int retval;
1304 uint16_t nsrc;
1305
1306 INM_LOCK_ASSERT_HELD(inm);
1307
1308 retval = 0;
1309
1310 switch (inm->inm_state) {
1311 case IGMP_NOT_MEMBER:
1312 case IGMP_SILENT_MEMBER:
1313 case IGMP_SLEEPING_MEMBER:
1314 case IGMP_LAZY_MEMBER:
1315 case IGMP_AWAKENING_MEMBER:
1316 case IGMP_IDLE_MEMBER:
1317 case IGMP_LEAVING_MEMBER:
1318 return retval;
1319 case IGMP_REPORTING_MEMBER:
1320 case IGMP_G_QUERY_PENDING_MEMBER:
1321 case IGMP_SG_QUERY_PENDING_MEMBER:
1322 break;
1323 }
1324
1325 nsrc = ntohs(igmpv3->igmp_numsrc);
1326
1327 /*
1328 * Deal with group-specific queries upfront.
1329 * If any group query is already pending, purge any recorded
1330 * source-list state if it exists, and schedule a query response
1331 * for this group-specific query.
1332 */
1333 if (nsrc == 0) {
1334 if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER ||
1335 inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) {
1336 inm_clear_recorded(inm);
1337 timer = min(inm->inm_timer, timer);
1338 }
1339 inm->inm_state = IGMP_G_QUERY_PENDING_MEMBER;
1340 inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1341 return retval;
1342 }
1343
1344 /*
1345 * Deal with the case where a group-and-source-specific query has
1346 * been received but a group-specific query is already pending.
1347 */
1348 if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER) {
1349 timer = min(inm->inm_timer, timer);
1350 inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1351 return retval;
1352 }
1353
1354 /*
1355 * Finally, deal with the case where a group-and-source-specific
1356 * query has been received, where a response to a previous g-s-r
1357 * query exists, or none exists.
1358 * In this case, we need to parse the source-list which the Querier
1359 * has provided us with and check if we have any source list filter
1360 * entries at T1 for these sources. If we do not, there is no need
1361 * schedule a report and the query may be dropped.
1362 * If we do, we must record them and schedule a current-state
1363 * report for those sources.
1364 * FIXME: Handling source lists larger than 1 mbuf requires that
1365 * we pass the mbuf chain pointer down to this function, and use
1366 * m_getptr() to walk the chain.
1367 */
1368 if (inm->inm_nsrc > 0) {
1369 const struct in_addr *ap;
1370 int i, nrecorded;
1371
1372 ap = (const struct in_addr *)(igmpv3 + 1);
1373 nrecorded = 0;
1374 for (i = 0; i < nsrc; i++, ap++) {
1375 retval = inm_record_source(inm, ap->s_addr);
1376 if (retval < 0) {
1377 break;
1378 }
1379 nrecorded += retval;
1380 }
1381 if (nrecorded > 0) {
1382 IGMP_PRINTF(("%s: schedule response to SG query\n",
1383 __func__));
1384 inm->inm_state = IGMP_SG_QUERY_PENDING_MEMBER;
1385 inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1386 }
1387 }
1388
1389 return retval;
1390 }
1391
1392 /*
1393 * Process a received IGMPv1 host membership report.
1394 *
1395 * NOTE: 0.0.0.0 workaround breaks const correctness.
1396 */
1397 static int
igmp_input_v1_report(struct ifnet * ifp,struct mbuf * m,struct ip * ip,struct igmp * igmp)1398 igmp_input_v1_report(struct ifnet *ifp, struct mbuf *m, /*const*/ struct ip *ip,
1399 /*const*/ struct igmp *igmp)
1400 {
1401 struct in_ifaddr *ia;
1402 struct in_multi *inm;
1403
1404 IGMPSTAT_INC(igps_rcv_reports);
1405 OIGMPSTAT_INC(igps_rcv_reports);
1406
1407 if ((ifp->if_flags & IFF_LOOPBACK) ||
1408 (m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
1409 return 0;
1410 }
1411
1412 if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr) ||
1413 !in_hosteq(igmp->igmp_group, ip->ip_dst))) {
1414 IGMPSTAT_INC(igps_rcv_badreports);
1415 OIGMPSTAT_INC(igps_rcv_badreports);
1416 return EINVAL;
1417 }
1418
1419 /*
1420 * RFC 3376, Section 4.2.13, 9.2, 9.3:
1421 * Booting clients may use the source address 0.0.0.0. Some
1422 * IGMP daemons may not know how to use IP_RECVIF to determine
1423 * the interface upon which this message was received.
1424 * Replace 0.0.0.0 with the subnet address if told to do so.
1425 */
1426 if (igmp_recvifkludge && in_nullhost(ip->ip_src)) {
1427 IFP_TO_IA(ifp, ia);
1428 if (ia != NULL) {
1429 IFA_LOCK(&ia->ia_ifa);
1430 ip->ip_src.s_addr = htonl(ia->ia_subnet);
1431 IFA_UNLOCK(&ia->ia_ifa);
1432 IFA_REMREF(&ia->ia_ifa);
1433 }
1434 }
1435
1436 IGMP_INET_PRINTF(igmp->igmp_group,
1437 ("process v1 report %s on ifp 0x%llx(%s)\n", _igmp_inet_buf,
1438 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1439
1440 /*
1441 * IGMPv1 report suppression.
1442 * If we are a member of this group, and our membership should be
1443 * reported, stop our group timer and transition to the 'lazy' state.
1444 */
1445 in_multihead_lock_shared();
1446 IN_LOOKUP_MULTI(&igmp->igmp_group, ifp, inm);
1447 in_multihead_lock_done();
1448 if (inm != NULL) {
1449 struct igmp_ifinfo *igi;
1450
1451 INM_LOCK(inm);
1452
1453 igi = inm->inm_igi;
1454 VERIFY(igi != NULL);
1455
1456 IGMPSTAT_INC(igps_rcv_ourreports);
1457 OIGMPSTAT_INC(igps_rcv_ourreports);
1458
1459 /*
1460 * If we are in IGMPv3 host mode, do not allow the
1461 * other host's IGMPv1 report to suppress our reports
1462 * unless explicitly configured to do so.
1463 */
1464 IGI_LOCK(igi);
1465 if (igi->igi_version == IGMP_VERSION_3) {
1466 if (igmp_legacysupp) {
1467 igmp_v3_suppress_group_record(inm);
1468 }
1469 IGI_UNLOCK(igi);
1470 INM_UNLOCK(inm);
1471 INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1472 return 0;
1473 }
1474
1475 INM_LOCK_ASSERT_HELD(inm);
1476 inm->inm_timer = 0;
1477
1478 switch (inm->inm_state) {
1479 case IGMP_NOT_MEMBER:
1480 case IGMP_SILENT_MEMBER:
1481 break;
1482 case IGMP_IDLE_MEMBER:
1483 case IGMP_LAZY_MEMBER:
1484 case IGMP_AWAKENING_MEMBER:
1485 IGMP_INET_PRINTF(igmp->igmp_group,
1486 ("report suppressed for %s on ifp 0x%llx(%s)\n",
1487 _igmp_inet_buf,
1488 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1489 OS_FALLTHROUGH;
1490 case IGMP_SLEEPING_MEMBER:
1491 inm->inm_state = IGMP_SLEEPING_MEMBER;
1492 break;
1493 case IGMP_REPORTING_MEMBER:
1494 IGMP_INET_PRINTF(igmp->igmp_group,
1495 ("report suppressed for %s on ifp 0x%llx(%s)\n",
1496 _igmp_inet_buf,
1497 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1498 if (igi->igi_version == IGMP_VERSION_1) {
1499 inm->inm_state = IGMP_LAZY_MEMBER;
1500 } else if (igi->igi_version == IGMP_VERSION_2) {
1501 inm->inm_state = IGMP_SLEEPING_MEMBER;
1502 }
1503 break;
1504 case IGMP_G_QUERY_PENDING_MEMBER:
1505 case IGMP_SG_QUERY_PENDING_MEMBER:
1506 case IGMP_LEAVING_MEMBER:
1507 break;
1508 }
1509 IGI_UNLOCK(igi);
1510 INM_UNLOCK(inm);
1511 INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1512 }
1513
1514 return 0;
1515 }
1516
1517 /*
1518 * Process a received IGMPv2 host membership report.
1519 *
1520 * NOTE: 0.0.0.0 workaround breaks const correctness.
1521 */
1522 static int
igmp_input_v2_report(struct ifnet * ifp,struct mbuf * m,struct ip * ip,struct igmp * igmp)1523 igmp_input_v2_report(struct ifnet *ifp, struct mbuf *m, /*const*/ struct ip *ip,
1524 /*const*/ struct igmp *igmp)
1525 {
1526 struct in_ifaddr *ia;
1527 struct in_multi *inm;
1528
1529 /*
1530 * Make sure we don't hear our own membership report. Fast
1531 * leave requires knowing that we are the only member of a
1532 * group.
1533 */
1534 IFP_TO_IA(ifp, ia);
1535 if (ia != NULL) {
1536 IFA_LOCK(&ia->ia_ifa);
1537 if (in_hosteq(ip->ip_src, IA_SIN(ia)->sin_addr)) {
1538 IFA_UNLOCK(&ia->ia_ifa);
1539 IFA_REMREF(&ia->ia_ifa);
1540 return 0;
1541 }
1542 IFA_UNLOCK(&ia->ia_ifa);
1543 }
1544
1545 IGMPSTAT_INC(igps_rcv_reports);
1546 OIGMPSTAT_INC(igps_rcv_reports);
1547
1548 if ((ifp->if_flags & IFF_LOOPBACK) ||
1549 (m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
1550 if (ia != NULL) {
1551 IFA_REMREF(&ia->ia_ifa);
1552 }
1553 return 0;
1554 }
1555
1556 if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr)) ||
1557 !in_hosteq(igmp->igmp_group, ip->ip_dst)) {
1558 if (ia != NULL) {
1559 IFA_REMREF(&ia->ia_ifa);
1560 }
1561 IGMPSTAT_INC(igps_rcv_badreports);
1562 OIGMPSTAT_INC(igps_rcv_badreports);
1563 return EINVAL;
1564 }
1565
1566 /*
1567 * RFC 3376, Section 4.2.13, 9.2, 9.3:
1568 * Booting clients may use the source address 0.0.0.0. Some
1569 * IGMP daemons may not know how to use IP_RECVIF to determine
1570 * the interface upon which this message was received.
1571 * Replace 0.0.0.0 with the subnet address if told to do so.
1572 */
1573 if (igmp_recvifkludge && in_nullhost(ip->ip_src)) {
1574 if (ia != NULL) {
1575 IFA_LOCK(&ia->ia_ifa);
1576 ip->ip_src.s_addr = htonl(ia->ia_subnet);
1577 IFA_UNLOCK(&ia->ia_ifa);
1578 }
1579 }
1580 if (ia != NULL) {
1581 IFA_REMREF(&ia->ia_ifa);
1582 }
1583
1584 IGMP_INET_PRINTF(igmp->igmp_group,
1585 ("process v2 report %s on ifp 0x%llx(%s)\n", _igmp_inet_buf,
1586 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1587
1588 /*
1589 * IGMPv2 report suppression.
1590 * If we are a member of this group, and our membership should be
1591 * reported, and our group timer is pending or about to be reset,
1592 * stop our group timer by transitioning to the 'lazy' state.
1593 */
1594 in_multihead_lock_shared();
1595 IN_LOOKUP_MULTI(&igmp->igmp_group, ifp, inm);
1596 in_multihead_lock_done();
1597 if (inm != NULL) {
1598 struct igmp_ifinfo *igi;
1599
1600 INM_LOCK(inm);
1601 igi = inm->inm_igi;
1602 VERIFY(igi != NULL);
1603
1604 IGMPSTAT_INC(igps_rcv_ourreports);
1605 OIGMPSTAT_INC(igps_rcv_ourreports);
1606
1607 /*
1608 * If we are in IGMPv3 host mode, do not allow the
1609 * other host's IGMPv1 report to suppress our reports
1610 * unless explicitly configured to do so.
1611 */
1612 IGI_LOCK(igi);
1613 if (igi->igi_version == IGMP_VERSION_3) {
1614 if (igmp_legacysupp) {
1615 igmp_v3_suppress_group_record(inm);
1616 }
1617 IGI_UNLOCK(igi);
1618 INM_UNLOCK(inm);
1619 INM_REMREF(inm);
1620 return 0;
1621 }
1622
1623 inm->inm_timer = 0;
1624
1625 switch (inm->inm_state) {
1626 case IGMP_NOT_MEMBER:
1627 case IGMP_SILENT_MEMBER:
1628 case IGMP_SLEEPING_MEMBER:
1629 break;
1630 case IGMP_REPORTING_MEMBER:
1631 case IGMP_IDLE_MEMBER:
1632 case IGMP_AWAKENING_MEMBER:
1633 IGMP_INET_PRINTF(igmp->igmp_group,
1634 ("report suppressed for %s on ifp 0x%llx(%s)\n",
1635 _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(ifp),
1636 if_name(ifp)));
1637 OS_FALLTHROUGH;
1638 case IGMP_LAZY_MEMBER:
1639 inm->inm_state = IGMP_LAZY_MEMBER;
1640 break;
1641 case IGMP_G_QUERY_PENDING_MEMBER:
1642 case IGMP_SG_QUERY_PENDING_MEMBER:
1643 case IGMP_LEAVING_MEMBER:
1644 break;
1645 }
1646 IGI_UNLOCK(igi);
1647 INM_UNLOCK(inm);
1648 INM_REMREF(inm);
1649 }
1650
1651 return 0;
1652 }
1653
1654 void
igmp_input(struct mbuf * m,int off)1655 igmp_input(struct mbuf *m, int off)
1656 {
1657 int iphlen;
1658 struct ifnet *ifp;
1659 struct igmp *igmp;
1660 struct ip *ip;
1661 int igmplen;
1662 int minlen;
1663 int queryver;
1664
1665 IGMP_PRINTF(("%s: called w/mbuf (0x%llx,%d)\n", __func__,
1666 (uint64_t)VM_KERNEL_ADDRPERM(m), off));
1667
1668 ifp = m->m_pkthdr.rcvif;
1669
1670 IGMPSTAT_INC(igps_rcv_total);
1671 OIGMPSTAT_INC(igps_rcv_total);
1672
1673 /* Expect 32-bit aligned data pointer on strict-align platforms */
1674 MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
1675
1676 ip = mtod(m, struct ip *);
1677 iphlen = off;
1678
1679 /* By now, ip_len no longer contains the length of IP header */
1680 igmplen = ip->ip_len;
1681
1682 /*
1683 * Validate lengths.
1684 */
1685 if (igmplen < IGMP_MINLEN) {
1686 IGMPSTAT_INC(igps_rcv_tooshort);
1687 OIGMPSTAT_INC(igps_rcv_tooshort);
1688 m_freem(m);
1689 return;
1690 }
1691
1692 /*
1693 * Always pullup to the minimum size for v1/v2 or v3
1694 * to amortize calls to m_pulldown().
1695 */
1696 if (igmplen >= IGMP_V3_QUERY_MINLEN) {
1697 minlen = IGMP_V3_QUERY_MINLEN;
1698 } else {
1699 minlen = IGMP_MINLEN;
1700 }
1701
1702 /* A bit more expensive than M_STRUCT_GET, but ensures alignment */
1703 M_STRUCT_GET0(igmp, struct igmp *, m, off, minlen);
1704 if (igmp == NULL) {
1705 IGMPSTAT_INC(igps_rcv_tooshort);
1706 OIGMPSTAT_INC(igps_rcv_tooshort);
1707 return;
1708 }
1709 /* N.B.: we assume the packet was correctly aligned in ip_input. */
1710
1711 /*
1712 * Validate checksum.
1713 */
1714 m->m_data += iphlen;
1715 m->m_len -= iphlen;
1716 if (in_cksum(m, igmplen)) {
1717 IGMPSTAT_INC(igps_rcv_badsum);
1718 OIGMPSTAT_INC(igps_rcv_badsum);
1719 m_freem(m);
1720 return;
1721 }
1722 m->m_data -= iphlen;
1723 m->m_len += iphlen;
1724
1725 /*
1726 * IGMP control traffic is link-scope, and must have a TTL of 1.
1727 * DVMRP traffic (e.g. mrinfo, mtrace) is an exception;
1728 * probe packets may come from beyond the LAN.
1729 */
1730 if (igmp->igmp_type != IGMP_DVMRP && ip->ip_ttl != 1) {
1731 IGMPSTAT_INC(igps_rcv_badttl);
1732 m_freem(m);
1733 return;
1734 }
1735
1736 switch (igmp->igmp_type) {
1737 case IGMP_HOST_MEMBERSHIP_QUERY:
1738 if (igmplen == IGMP_MINLEN) {
1739 if (igmp->igmp_code == 0) {
1740 queryver = IGMP_VERSION_1;
1741 } else {
1742 queryver = IGMP_VERSION_2;
1743 }
1744 } else if (igmplen >= IGMP_V3_QUERY_MINLEN) {
1745 queryver = IGMP_VERSION_3;
1746 } else {
1747 IGMPSTAT_INC(igps_rcv_tooshort);
1748 OIGMPSTAT_INC(igps_rcv_tooshort);
1749 m_freem(m);
1750 return;
1751 }
1752
1753 OIGMPSTAT_INC(igps_rcv_queries);
1754
1755 switch (queryver) {
1756 case IGMP_VERSION_1:
1757 IGMPSTAT_INC(igps_rcv_v1v2_queries);
1758 if (!igmp_v1enable) {
1759 break;
1760 }
1761 if (igmp_input_v1_query(ifp, ip, igmp) != 0) {
1762 m_freem(m);
1763 return;
1764 }
1765 break;
1766
1767 case IGMP_VERSION_2:
1768 IGMPSTAT_INC(igps_rcv_v1v2_queries);
1769 if (!igmp_v2enable) {
1770 break;
1771 }
1772 if (igmp_input_v2_query(ifp, ip, igmp) != 0) {
1773 m_freem(m);
1774 return;
1775 }
1776 break;
1777
1778 case IGMP_VERSION_3: {
1779 struct igmpv3 *igmpv3;
1780 uint16_t igmpv3len;
1781 uint16_t srclen;
1782 int nsrc;
1783
1784 IGMPSTAT_INC(igps_rcv_v3_queries);
1785 igmpv3 = (struct igmpv3 *)igmp;
1786 /*
1787 * Validate length based on source count.
1788 */
1789 nsrc = ntohs(igmpv3->igmp_numsrc);
1790 /*
1791 * The max vaue of nsrc is limited by the
1792 * MTU of the network on which the datagram
1793 * is received
1794 */
1795 if (nsrc < 0 || nsrc > IGMP_V3_QUERY_MAX_SRCS) {
1796 IGMPSTAT_INC(igps_rcv_tooshort);
1797 OIGMPSTAT_INC(igps_rcv_tooshort);
1798 m_freem(m);
1799 return;
1800 }
1801 srclen = sizeof(struct in_addr) * (uint16_t)nsrc;
1802 if (igmplen < (IGMP_V3_QUERY_MINLEN + srclen)) {
1803 IGMPSTAT_INC(igps_rcv_tooshort);
1804 OIGMPSTAT_INC(igps_rcv_tooshort);
1805 m_freem(m);
1806 return;
1807 }
1808 igmpv3len = IGMP_V3_QUERY_MINLEN + srclen;
1809 /*
1810 * A bit more expensive than M_STRUCT_GET,
1811 * but ensures alignment.
1812 */
1813 M_STRUCT_GET0(igmpv3, struct igmpv3 *, m,
1814 off, igmpv3len);
1815 if (igmpv3 == NULL) {
1816 IGMPSTAT_INC(igps_rcv_tooshort);
1817 OIGMPSTAT_INC(igps_rcv_tooshort);
1818 return;
1819 }
1820 /*
1821 * N.B.: we assume the packet was correctly
1822 * aligned in ip_input.
1823 */
1824 if (igmp_input_v3_query(ifp, ip, igmpv3) != 0) {
1825 m_freem(m);
1826 return;
1827 }
1828 }
1829 break;
1830 }
1831 break;
1832
1833 case IGMP_v1_HOST_MEMBERSHIP_REPORT:
1834 if (!igmp_v1enable) {
1835 break;
1836 }
1837 if (igmp_input_v1_report(ifp, m, ip, igmp) != 0) {
1838 m_freem(m);
1839 return;
1840 }
1841 break;
1842
1843 case IGMP_v2_HOST_MEMBERSHIP_REPORT:
1844 if (!igmp_v2enable) {
1845 break;
1846 }
1847 if (!ip_checkrouteralert(m)) {
1848 IGMPSTAT_INC(igps_rcv_nora);
1849 }
1850 if (igmp_input_v2_report(ifp, m, ip, igmp) != 0) {
1851 m_freem(m);
1852 return;
1853 }
1854 break;
1855
1856 case IGMP_v3_HOST_MEMBERSHIP_REPORT:
1857 /*
1858 * Hosts do not need to process IGMPv3 membership reports,
1859 * as report suppression is no longer required.
1860 */
1861 if (!ip_checkrouteralert(m)) {
1862 IGMPSTAT_INC(igps_rcv_nora);
1863 }
1864 break;
1865
1866 default:
1867 break;
1868 }
1869
1870 IGMP_LOCK_ASSERT_NOTHELD();
1871 /*
1872 * Pass all valid IGMP packets up to any process(es) listening on a
1873 * raw IGMP socket.
1874 */
1875 rip_input(m, off);
1876 }
1877
1878 /*
1879 * Schedule IGMP timer based on various parameters; caller must ensure that
1880 * lock ordering is maintained as this routine acquires IGMP global lock.
1881 */
1882 void
igmp_set_timeout(struct igmp_tparams * itp)1883 igmp_set_timeout(struct igmp_tparams *itp)
1884 {
1885 IGMP_LOCK_ASSERT_NOTHELD();
1886 VERIFY(itp != NULL);
1887
1888 if (itp->qpt != 0 || itp->it != 0 || itp->cst != 0 || itp->sct != 0) {
1889 IGMP_LOCK();
1890 if (itp->qpt != 0) {
1891 querier_present_timers_running = 1;
1892 }
1893 if (itp->it != 0) {
1894 interface_timers_running = 1;
1895 }
1896 if (itp->cst != 0) {
1897 current_state_timers_running = 1;
1898 }
1899 if (itp->sct != 0) {
1900 state_change_timers_running = 1;
1901 }
1902 igmp_sched_timeout(itp->fast);
1903 IGMP_UNLOCK();
1904 }
1905 }
1906
1907 void
igmp_set_fast_timeout(struct igmp_tparams * itp)1908 igmp_set_fast_timeout(struct igmp_tparams *itp)
1909 {
1910 VERIFY(itp != NULL);
1911 itp->fast = true;
1912 igmp_set_timeout(itp);
1913 }
1914
1915 /*
1916 * IGMP timer handler (per 1 second).
1917 */
1918 static void
igmp_timeout(void * arg)1919 igmp_timeout(void *arg)
1920 {
1921 struct ifqueue scq; /* State-change packets */
1922 struct ifqueue qrq; /* Query response packets */
1923 struct ifnet *ifp;
1924 struct igmp_ifinfo *igi;
1925 struct in_multi *inm;
1926 unsigned int loop = 0, uri_sec = 0;
1927 SLIST_HEAD(, in_multi) inm_dthead;
1928 bool fast = arg != NULL;
1929
1930 SLIST_INIT(&inm_dthead);
1931
1932 /*
1933 * Update coarse-grained networking timestamp (in sec.); the idea
1934 * is to piggy-back on the timeout callout to update the counter
1935 * returnable via net_uptime().
1936 */
1937 net_update_uptime();
1938
1939 IGMP_LOCK();
1940
1941 IGMP_PRINTF(("%s: qpt %d, it %d, cst %d, sct %d, fast %d\n", __func__,
1942 querier_present_timers_running, interface_timers_running,
1943 current_state_timers_running, state_change_timers_running,
1944 fast));
1945
1946 if (fast) {
1947 /*
1948 * When running the fast timer, skip processing
1949 * of "querier present" timers since they are
1950 * based on 1-second intervals.
1951 */
1952 goto skip_query_timers;
1953 }
1954 /*
1955 * IGMPv1/v2 querier present timer processing.
1956 */
1957 if (querier_present_timers_running) {
1958 querier_present_timers_running = 0;
1959 LIST_FOREACH(igi, &igi_head, igi_link) {
1960 IGI_LOCK(igi);
1961 igmp_v1v2_process_querier_timers(igi);
1962 if (igi->igi_v1_timer > 0 || igi->igi_v2_timer > 0) {
1963 querier_present_timers_running = 1;
1964 }
1965 IGI_UNLOCK(igi);
1966 }
1967 }
1968
1969 /*
1970 * IGMPv3 General Query response timer processing.
1971 */
1972 if (interface_timers_running) {
1973 IGMP_PRINTF(("%s: interface timers running\n", __func__));
1974 interface_timers_running = 0;
1975 LIST_FOREACH(igi, &igi_head, igi_link) {
1976 IGI_LOCK(igi);
1977 if (igi->igi_version != IGMP_VERSION_3) {
1978 IGI_UNLOCK(igi);
1979 continue;
1980 }
1981 if (igi->igi_v3_timer == 0) {
1982 /* Do nothing. */
1983 } else if (--igi->igi_v3_timer == 0) {
1984 if (igmp_v3_dispatch_general_query(igi) > 0) {
1985 interface_timers_running = 1;
1986 }
1987 } else {
1988 interface_timers_running = 1;
1989 }
1990 IGI_UNLOCK(igi);
1991 }
1992 }
1993
1994 skip_query_timers:
1995 if (!current_state_timers_running &&
1996 !state_change_timers_running) {
1997 goto out_locked;
1998 }
1999
2000 current_state_timers_running = 0;
2001 state_change_timers_running = 0;
2002
2003 memset(&qrq, 0, sizeof(struct ifqueue));
2004 qrq.ifq_maxlen = IGMP_MAX_G_GS_PACKETS;
2005
2006 memset(&scq, 0, sizeof(struct ifqueue));
2007 scq.ifq_maxlen = IGMP_MAX_STATE_CHANGE_PACKETS;
2008
2009 IGMP_PRINTF(("%s: state change timers running\n", __func__));
2010
2011 /*
2012 * IGMPv1/v2/v3 host report and state-change timer processing.
2013 * Note: Processing a v3 group timer may remove a node.
2014 */
2015 LIST_FOREACH(igi, &igi_head, igi_link) {
2016 struct in_multistep step;
2017
2018 IGI_LOCK(igi);
2019 ifp = igi->igi_ifp;
2020 loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
2021 uri_sec = IGMP_RANDOM_DELAY(igi->igi_uri);
2022 IGI_UNLOCK(igi);
2023
2024 in_multihead_lock_shared();
2025 IN_FIRST_MULTI(step, inm);
2026 while (inm != NULL) {
2027 INM_LOCK(inm);
2028 if (inm->inm_ifp != ifp) {
2029 goto next;
2030 }
2031
2032 IGI_LOCK(igi);
2033 switch (igi->igi_version) {
2034 case IGMP_VERSION_1:
2035 case IGMP_VERSION_2:
2036 igmp_v1v2_process_group_timer(inm,
2037 igi->igi_version);
2038 break;
2039 case IGMP_VERSION_3:
2040 igmp_v3_process_group_timers(igi, &qrq,
2041 &scq, inm, uri_sec);
2042 break;
2043 }
2044 IGI_UNLOCK(igi);
2045 next:
2046 INM_UNLOCK(inm);
2047 IN_NEXT_MULTI(step, inm);
2048 }
2049 in_multihead_lock_done();
2050
2051 IGI_LOCK(igi);
2052 if (igi->igi_version == IGMP_VERSION_1 ||
2053 igi->igi_version == IGMP_VERSION_2) {
2054 igmp_dispatch_queue(igi, &igi->igi_v2q, 0, loop);
2055 } else if (igi->igi_version == IGMP_VERSION_3) {
2056 IGI_UNLOCK(igi);
2057 igmp_dispatch_queue(NULL, &qrq, 0, loop);
2058 igmp_dispatch_queue(NULL, &scq, 0, loop);
2059 VERIFY(qrq.ifq_len == 0);
2060 VERIFY(scq.ifq_len == 0);
2061 IGI_LOCK(igi);
2062 }
2063 /*
2064 * In case there are still any pending membership reports
2065 * which didn't get drained at version change time.
2066 */
2067 IF_DRAIN(&igi->igi_v2q);
2068 /*
2069 * Release all deferred inm records, and drain any locally
2070 * enqueued packets; do it even if the current IGMP version
2071 * for the link is no longer IGMPv3, in order to handle the
2072 * version change case.
2073 */
2074 igmp_flush_relq(igi, (struct igmp_inm_relhead *)&inm_dthead);
2075 VERIFY(SLIST_EMPTY(&igi->igi_relinmhead));
2076 IGI_UNLOCK(igi);
2077
2078 IF_DRAIN(&qrq);
2079 IF_DRAIN(&scq);
2080 }
2081
2082 out_locked:
2083 /* re-arm the timer if there's work to do */
2084 igmp_timeout_run = 0;
2085 igmp_sched_timeout(false);
2086 IGMP_UNLOCK();
2087
2088 /* Now that we're dropped all locks, release detached records */
2089 IGMP_REMOVE_DETACHED_INM(&inm_dthead);
2090 }
2091
2092 static void
igmp_sched_timeout(bool fast)2093 igmp_sched_timeout(bool fast)
2094 {
2095 IGMP_LOCK_ASSERT_HELD();
2096
2097 if (!igmp_timeout_run &&
2098 (querier_present_timers_running || current_state_timers_running ||
2099 interface_timers_running || state_change_timers_running)) {
2100 igmp_timeout_run = 1;
2101 int sched_hz = fast ? 0 : hz;
2102 void *arg = fast ? (void *)igmp_sched_timeout : NULL;
2103 timeout(igmp_timeout, arg, sched_hz);
2104 }
2105 }
2106
2107 /*
2108 * Free the in_multi reference(s) for this IGMP lifecycle.
2109 *
2110 * Caller must be holding igi_lock.
2111 */
2112 static void
igmp_flush_relq(struct igmp_ifinfo * igi,struct igmp_inm_relhead * inm_dthead)2113 igmp_flush_relq(struct igmp_ifinfo *igi, struct igmp_inm_relhead *inm_dthead)
2114 {
2115 struct in_multi *inm;
2116
2117 again:
2118 IGI_LOCK_ASSERT_HELD(igi);
2119 inm = SLIST_FIRST(&igi->igi_relinmhead);
2120 if (inm != NULL) {
2121 int lastref;
2122
2123 SLIST_REMOVE_HEAD(&igi->igi_relinmhead, inm_nrele);
2124 IGI_UNLOCK(igi);
2125
2126 in_multihead_lock_exclusive();
2127 INM_LOCK(inm);
2128 VERIFY(inm->inm_nrelecnt != 0);
2129 inm->inm_nrelecnt--;
2130 lastref = in_multi_detach(inm);
2131 VERIFY(!lastref || (!(inm->inm_debug & IFD_ATTACHED) &&
2132 inm->inm_reqcnt == 0));
2133 INM_UNLOCK(inm);
2134 in_multihead_lock_done();
2135 /* from igi_relinmhead */
2136 INM_REMREF(inm);
2137 /* from in_multihead list */
2138 if (lastref) {
2139 /*
2140 * Defer releasing our final reference, as we
2141 * are holding the IGMP lock at this point, and
2142 * we could end up with locking issues later on
2143 * (while issuing SIOCDELMULTI) when this is the
2144 * final reference count. Let the caller do it
2145 * when it is safe.
2146 */
2147 IGMP_ADD_DETACHED_INM(inm_dthead, inm);
2148 }
2149 IGI_LOCK(igi);
2150 goto again;
2151 }
2152 }
2153
2154 /*
2155 * Update host report group timer for IGMPv1/v2.
2156 * Will update the global pending timer flags.
2157 */
2158 static void
igmp_v1v2_process_group_timer(struct in_multi * inm,const int igmp_version)2159 igmp_v1v2_process_group_timer(struct in_multi *inm, const int igmp_version)
2160 {
2161 int report_timer_expired;
2162
2163 IGMP_LOCK_ASSERT_HELD();
2164 INM_LOCK_ASSERT_HELD(inm);
2165 IGI_LOCK_ASSERT_HELD(inm->inm_igi);
2166
2167 if (inm->inm_timer == 0) {
2168 report_timer_expired = 0;
2169 } else if (--inm->inm_timer == 0) {
2170 report_timer_expired = 1;
2171 } else {
2172 current_state_timers_running = 1;
2173 /* caller will schedule timer */
2174 return;
2175 }
2176
2177 switch (inm->inm_state) {
2178 case IGMP_NOT_MEMBER:
2179 case IGMP_SILENT_MEMBER:
2180 case IGMP_IDLE_MEMBER:
2181 case IGMP_LAZY_MEMBER:
2182 case IGMP_SLEEPING_MEMBER:
2183 case IGMP_AWAKENING_MEMBER:
2184 break;
2185 case IGMP_REPORTING_MEMBER:
2186 if (report_timer_expired) {
2187 inm->inm_state = IGMP_IDLE_MEMBER;
2188 (void) igmp_v1v2_queue_report(inm,
2189 (igmp_version == IGMP_VERSION_2) ?
2190 IGMP_v2_HOST_MEMBERSHIP_REPORT :
2191 IGMP_v1_HOST_MEMBERSHIP_REPORT);
2192 INM_LOCK_ASSERT_HELD(inm);
2193 IGI_LOCK_ASSERT_HELD(inm->inm_igi);
2194 }
2195 break;
2196 case IGMP_G_QUERY_PENDING_MEMBER:
2197 case IGMP_SG_QUERY_PENDING_MEMBER:
2198 case IGMP_LEAVING_MEMBER:
2199 break;
2200 }
2201 }
2202
2203 /*
2204 * Update a group's timers for IGMPv3.
2205 * Will update the global pending timer flags.
2206 * Note: Unlocked read from igi.
2207 */
2208 static void
igmp_v3_process_group_timers(struct igmp_ifinfo * igi,struct ifqueue * qrq,struct ifqueue * scq,struct in_multi * inm,const unsigned int uri_sec)2209 igmp_v3_process_group_timers(struct igmp_ifinfo *igi,
2210 struct ifqueue *qrq, struct ifqueue *scq,
2211 struct in_multi *inm, const unsigned int uri_sec)
2212 {
2213 int query_response_timer_expired;
2214 int state_change_retransmit_timer_expired;
2215
2216 IGMP_LOCK_ASSERT_HELD();
2217 INM_LOCK_ASSERT_HELD(inm);
2218 IGI_LOCK_ASSERT_HELD(igi);
2219 VERIFY(igi == inm->inm_igi);
2220
2221 query_response_timer_expired = 0;
2222 state_change_retransmit_timer_expired = 0;
2223
2224 /*
2225 * During a transition from v1/v2 compatibility mode back to v3,
2226 * a group record in REPORTING state may still have its group
2227 * timer active. This is a no-op in this function; it is easier
2228 * to deal with it here than to complicate the timeout path.
2229 */
2230 if (inm->inm_timer == 0) {
2231 query_response_timer_expired = 0;
2232 } else if (--inm->inm_timer == 0) {
2233 query_response_timer_expired = 1;
2234 } else {
2235 current_state_timers_running = 1;
2236 /* caller will schedule timer */
2237 }
2238
2239 if (inm->inm_sctimer == 0) {
2240 state_change_retransmit_timer_expired = 0;
2241 } else if (--inm->inm_sctimer == 0) {
2242 state_change_retransmit_timer_expired = 1;
2243 } else {
2244 state_change_timers_running = 1;
2245 /* caller will schedule timer */
2246 }
2247
2248 /* We are in timer callback, so be quick about it. */
2249 if (!state_change_retransmit_timer_expired &&
2250 !query_response_timer_expired) {
2251 return;
2252 }
2253
2254 switch (inm->inm_state) {
2255 case IGMP_NOT_MEMBER:
2256 case IGMP_SILENT_MEMBER:
2257 case IGMP_SLEEPING_MEMBER:
2258 case IGMP_LAZY_MEMBER:
2259 case IGMP_AWAKENING_MEMBER:
2260 case IGMP_IDLE_MEMBER:
2261 break;
2262 case IGMP_G_QUERY_PENDING_MEMBER:
2263 case IGMP_SG_QUERY_PENDING_MEMBER:
2264 /*
2265 * Respond to a previously pending Group-Specific
2266 * or Group-and-Source-Specific query by enqueueing
2267 * the appropriate Current-State report for
2268 * immediate transmission.
2269 */
2270 if (query_response_timer_expired) {
2271 int retval;
2272
2273 retval = igmp_v3_enqueue_group_record(qrq, inm, 0, 1,
2274 (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER));
2275 IGMP_PRINTF(("%s: enqueue record = %d\n",
2276 __func__, retval));
2277 inm->inm_state = IGMP_REPORTING_MEMBER;
2278 /* XXX Clear recorded sources for next time. */
2279 inm_clear_recorded(inm);
2280 }
2281 OS_FALLTHROUGH;
2282 case IGMP_REPORTING_MEMBER:
2283 case IGMP_LEAVING_MEMBER:
2284 if (state_change_retransmit_timer_expired) {
2285 /*
2286 * State-change retransmission timer fired.
2287 * If there are any further pending retransmissions,
2288 * set the global pending state-change flag, and
2289 * reset the timer.
2290 */
2291 if (--inm->inm_scrv > 0) {
2292 inm->inm_sctimer = (uint16_t)uri_sec;
2293 state_change_timers_running = 1;
2294 /* caller will schedule timer */
2295 }
2296 /*
2297 * Retransmit the previously computed state-change
2298 * report. If there are no further pending
2299 * retransmissions, the mbuf queue will be consumed.
2300 * Update T0 state to T1 as we have now sent
2301 * a state-change.
2302 */
2303 (void) igmp_v3_merge_state_changes(inm, scq);
2304
2305 inm_commit(inm);
2306 IGMP_INET_PRINTF(inm->inm_addr,
2307 ("%s: T1 -> T0 for %s/%s\n", __func__,
2308 _igmp_inet_buf, if_name(inm->inm_ifp)));
2309
2310 /*
2311 * If we are leaving the group for good, make sure
2312 * we release IGMP's reference to it.
2313 * This release must be deferred using a SLIST,
2314 * as we are called from a loop which traverses
2315 * the in_multihead list.
2316 */
2317 if (inm->inm_state == IGMP_LEAVING_MEMBER &&
2318 inm->inm_scrv == 0) {
2319 inm->inm_state = IGMP_NOT_MEMBER;
2320 /*
2321 * A reference has already been held in
2322 * igmp_final_leave() for this inm, so
2323 * no need to hold another one. We also
2324 * bumped up its request count then, so
2325 * that it stays in in_multihead. Both
2326 * of them will be released when it is
2327 * dequeued later on.
2328 */
2329 VERIFY(inm->inm_nrelecnt != 0);
2330 SLIST_INSERT_HEAD(&igi->igi_relinmhead,
2331 inm, inm_nrele);
2332 }
2333 }
2334 break;
2335 }
2336 }
2337
2338 /*
2339 * Suppress a group's pending response to a group or source/group query.
2340 *
2341 * Do NOT suppress state changes. This leads to IGMPv3 inconsistency.
2342 * Do NOT update ST1/ST0 as this operation merely suppresses
2343 * the currently pending group record.
2344 * Do NOT suppress the response to a general query. It is possible but
2345 * it would require adding another state or flag.
2346 */
2347 static void
igmp_v3_suppress_group_record(struct in_multi * inm)2348 igmp_v3_suppress_group_record(struct in_multi *inm)
2349 {
2350 INM_LOCK_ASSERT_HELD(inm);
2351 IGI_LOCK_ASSERT_HELD(inm->inm_igi);
2352
2353 VERIFY(inm->inm_igi->igi_version == IGMP_VERSION_3);
2354
2355 if (inm->inm_state != IGMP_G_QUERY_PENDING_MEMBER ||
2356 inm->inm_state != IGMP_SG_QUERY_PENDING_MEMBER) {
2357 return;
2358 }
2359
2360 if (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) {
2361 inm_clear_recorded(inm);
2362 }
2363
2364 inm->inm_timer = 0;
2365 inm->inm_state = IGMP_REPORTING_MEMBER;
2366 }
2367
2368 /*
2369 * Switch to a different IGMP version on the given interface,
2370 * as per Section 7.2.1.
2371 */
2372 static uint32_t
igmp_set_version(struct igmp_ifinfo * igi,const int igmp_version)2373 igmp_set_version(struct igmp_ifinfo *igi, const int igmp_version)
2374 {
2375 int old_version_timer;
2376
2377 IGI_LOCK_ASSERT_HELD(igi);
2378
2379 IGMP_PRINTF(("%s: switching to v%d on ifp 0x%llx(%s)\n", __func__,
2380 igmp_version, (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2381 if_name(igi->igi_ifp)));
2382
2383 if (igmp_version == IGMP_VERSION_1 || igmp_version == IGMP_VERSION_2) {
2384 /*
2385 * Compute the "Older Version Querier Present" timer as per
2386 * Section 8.12, in seconds.
2387 */
2388 old_version_timer = igi->igi_rv * igi->igi_qi + igi->igi_qri;
2389
2390 if (igmp_version == IGMP_VERSION_1) {
2391 igi->igi_v1_timer = old_version_timer;
2392 igi->igi_v2_timer = 0;
2393 } else if (igmp_version == IGMP_VERSION_2) {
2394 igi->igi_v1_timer = 0;
2395 igi->igi_v2_timer = old_version_timer;
2396 }
2397 }
2398
2399 if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) {
2400 if (igi->igi_version != IGMP_VERSION_2) {
2401 igmp_v3_cancel_link_timers(igi);
2402 igi->igi_version = IGMP_VERSION_2;
2403 }
2404 } else if (igi->igi_v1_timer > 0) {
2405 if (igi->igi_version != IGMP_VERSION_1) {
2406 igmp_v3_cancel_link_timers(igi);
2407 igi->igi_version = IGMP_VERSION_1;
2408 }
2409 }
2410
2411 IGI_LOCK_ASSERT_HELD(igi);
2412
2413 return MAX(igi->igi_v1_timer, igi->igi_v2_timer);
2414 }
2415
2416 /*
2417 * Cancel pending IGMPv3 timers for the given link and all groups
2418 * joined on it; state-change, general-query, and group-query timers.
2419 *
2420 * Only ever called on a transition from v3 to Compatibility mode. Kill
2421 * the timers stone dead (this may be expensive for large N groups), they
2422 * will be restarted if Compatibility Mode deems that they must be due to
2423 * query processing.
2424 */
2425 static void
igmp_v3_cancel_link_timers(struct igmp_ifinfo * igi)2426 igmp_v3_cancel_link_timers(struct igmp_ifinfo *igi)
2427 {
2428 struct ifnet *ifp;
2429 struct in_multi *inm;
2430 struct in_multistep step;
2431
2432 IGI_LOCK_ASSERT_HELD(igi);
2433
2434 IGMP_PRINTF(("%s: cancel v3 timers on ifp 0x%llx(%s)\n", __func__,
2435 (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp), if_name(igi->igi_ifp)));
2436
2437 /*
2438 * Stop the v3 General Query Response on this link stone dead.
2439 * If timer is woken up due to interface_timers_running,
2440 * the flag will be cleared if there are no pending link timers.
2441 */
2442 igi->igi_v3_timer = 0;
2443
2444 /*
2445 * Now clear the current-state and state-change report timers
2446 * for all memberships scoped to this link.
2447 */
2448 ifp = igi->igi_ifp;
2449 IGI_UNLOCK(igi);
2450
2451 in_multihead_lock_shared();
2452 IN_FIRST_MULTI(step, inm);
2453 while (inm != NULL) {
2454 INM_LOCK(inm);
2455 if (inm->inm_ifp != ifp && inm->inm_igi != igi) {
2456 goto next;
2457 }
2458
2459 switch (inm->inm_state) {
2460 case IGMP_NOT_MEMBER:
2461 case IGMP_SILENT_MEMBER:
2462 case IGMP_IDLE_MEMBER:
2463 case IGMP_LAZY_MEMBER:
2464 case IGMP_SLEEPING_MEMBER:
2465 case IGMP_AWAKENING_MEMBER:
2466 /*
2467 * These states are either not relevant in v3 mode,
2468 * or are unreported. Do nothing.
2469 */
2470 break;
2471 case IGMP_LEAVING_MEMBER:
2472 /*
2473 * If we are leaving the group and switching to
2474 * compatibility mode, we need to release the final
2475 * reference held for issuing the INCLUDE {}, and
2476 * transition to REPORTING to ensure the host leave
2477 * message is sent upstream to the old querier --
2478 * transition to NOT would lose the leave and race.
2479 * During igmp_final_leave(), we bumped up both the
2480 * request and reference counts. Since we cannot
2481 * call in_multi_detach() here, defer this task to
2482 * the timer routine.
2483 */
2484 VERIFY(inm->inm_nrelecnt != 0);
2485 IGI_LOCK(igi);
2486 SLIST_INSERT_HEAD(&igi->igi_relinmhead, inm, inm_nrele);
2487 IGI_UNLOCK(igi);
2488 OS_FALLTHROUGH;
2489 case IGMP_G_QUERY_PENDING_MEMBER:
2490 case IGMP_SG_QUERY_PENDING_MEMBER:
2491 inm_clear_recorded(inm);
2492 OS_FALLTHROUGH;
2493 case IGMP_REPORTING_MEMBER:
2494 inm->inm_state = IGMP_REPORTING_MEMBER;
2495 break;
2496 }
2497 /*
2498 * Always clear state-change and group report timers.
2499 * Free any pending IGMPv3 state-change records.
2500 */
2501 inm->inm_sctimer = 0;
2502 inm->inm_timer = 0;
2503 IF_DRAIN(&inm->inm_scq);
2504 next:
2505 INM_UNLOCK(inm);
2506 IN_NEXT_MULTI(step, inm);
2507 }
2508 in_multihead_lock_done();
2509
2510 IGI_LOCK(igi);
2511 }
2512
2513 /*
2514 * Update the Older Version Querier Present timers for a link.
2515 * See Section 7.2.1 of RFC 3376.
2516 */
2517 static void
igmp_v1v2_process_querier_timers(struct igmp_ifinfo * igi)2518 igmp_v1v2_process_querier_timers(struct igmp_ifinfo *igi)
2519 {
2520 IGI_LOCK_ASSERT_HELD(igi);
2521
2522 if (igi->igi_v1_timer == 0 && igi->igi_v2_timer == 0) {
2523 /*
2524 * IGMPv1 and IGMPv2 Querier Present timers expired.
2525 *
2526 * Revert to IGMPv3.
2527 */
2528 if (igi->igi_version != IGMP_VERSION_3) {
2529 IGMP_PRINTF(("%s: transition from v%d -> v%d "
2530 "on 0x%llx(%s)\n", __func__,
2531 igi->igi_version, IGMP_VERSION_3,
2532 (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2533 if_name(igi->igi_ifp)));
2534 igi->igi_version = IGMP_VERSION_3;
2535 IF_DRAIN(&igi->igi_v2q);
2536 }
2537 } else if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) {
2538 /*
2539 * IGMPv1 Querier Present timer expired,
2540 * IGMPv2 Querier Present timer running.
2541 * If IGMPv2 was disabled since last timeout,
2542 * revert to IGMPv3.
2543 * If IGMPv2 is enabled, revert to IGMPv2.
2544 */
2545 if (!igmp_v2enable) {
2546 IGMP_PRINTF(("%s: transition from v%d -> v%d "
2547 "on 0x%llx(%s%d)\n", __func__,
2548 igi->igi_version, IGMP_VERSION_3,
2549 (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2550 igi->igi_ifp->if_name, igi->igi_ifp->if_unit));
2551 igi->igi_v2_timer = 0;
2552 igi->igi_version = IGMP_VERSION_3;
2553 IF_DRAIN(&igi->igi_v2q);
2554 } else {
2555 --igi->igi_v2_timer;
2556 if (igi->igi_version != IGMP_VERSION_2) {
2557 IGMP_PRINTF(("%s: transition from v%d -> v%d "
2558 "on 0x%llx(%s)\n", __func__,
2559 igi->igi_version, IGMP_VERSION_2,
2560 (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2561 if_name(igi->igi_ifp)));
2562 IF_DRAIN(&igi->igi_gq);
2563 igmp_v3_cancel_link_timers(igi);
2564 igi->igi_version = IGMP_VERSION_2;
2565 }
2566 }
2567 } else if (igi->igi_v1_timer > 0) {
2568 /*
2569 * IGMPv1 Querier Present timer running.
2570 * Stop IGMPv2 timer if running.
2571 *
2572 * If IGMPv1 was disabled since last timeout,
2573 * revert to IGMPv3.
2574 * If IGMPv1 is enabled, reset IGMPv2 timer if running.
2575 */
2576 if (!igmp_v1enable) {
2577 IGMP_PRINTF(("%s: transition from v%d -> v%d "
2578 "on 0x%llx(%s%d)\n", __func__,
2579 igi->igi_version, IGMP_VERSION_3,
2580 (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2581 igi->igi_ifp->if_name, igi->igi_ifp->if_unit));
2582 igi->igi_v1_timer = 0;
2583 igi->igi_version = IGMP_VERSION_3;
2584 IF_DRAIN(&igi->igi_v2q);
2585 } else {
2586 --igi->igi_v1_timer;
2587 }
2588 if (igi->igi_v2_timer > 0) {
2589 IGMP_PRINTF(("%s: cancel v2 timer on 0x%llx(%s%d)\n",
2590 __func__,
2591 (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2592 igi->igi_ifp->if_name, igi->igi_ifp->if_unit));
2593 igi->igi_v2_timer = 0;
2594 }
2595 }
2596 }
2597
2598 /*
2599 * Dispatch an IGMPv1/v2 host report or leave message.
2600 * These are always small enough to fit inside a single mbuf.
2601 */
2602 static int
igmp_v1v2_queue_report(struct in_multi * inm,const int type)2603 igmp_v1v2_queue_report(struct in_multi *inm, const int type)
2604 {
2605 struct ifnet *ifp;
2606 struct igmp *igmp;
2607 struct ip *ip;
2608 struct mbuf *m;
2609 int error = 0;
2610
2611 INM_LOCK_ASSERT_HELD(inm);
2612 IGI_LOCK_ASSERT_HELD(inm->inm_igi);
2613
2614 ifp = inm->inm_ifp;
2615
2616 MGETHDR(m, M_DONTWAIT, MT_DATA);
2617 if (m == NULL) {
2618 return ENOMEM;
2619 }
2620 MH_ALIGN(m, sizeof(struct ip) + sizeof(struct igmp));
2621
2622 m->m_pkthdr.len = sizeof(struct ip) + sizeof(struct igmp);
2623
2624 m->m_data += sizeof(struct ip);
2625 m->m_len = sizeof(struct igmp);
2626
2627 igmp = mtod(m, struct igmp *);
2628 igmp->igmp_type = (u_char)type;
2629 igmp->igmp_code = 0;
2630 igmp->igmp_group = inm->inm_addr;
2631 igmp->igmp_cksum = 0;
2632 igmp->igmp_cksum = in_cksum(m, sizeof(struct igmp));
2633
2634 m->m_data -= sizeof(struct ip);
2635 m->m_len += sizeof(struct ip);
2636
2637 ip = mtod(m, struct ip *);
2638 ip->ip_tos = 0;
2639 ip->ip_len = sizeof(struct ip) + sizeof(struct igmp);
2640 ip->ip_off = 0;
2641 ip->ip_p = IPPROTO_IGMP;
2642 ip->ip_src.s_addr = INADDR_ANY;
2643
2644 if (type == IGMP_HOST_LEAVE_MESSAGE) {
2645 ip->ip_dst.s_addr = htonl(INADDR_ALLRTRS_GROUP);
2646 } else {
2647 ip->ip_dst = inm->inm_addr;
2648 }
2649
2650 igmp_save_context(m, ifp);
2651
2652 m->m_flags |= M_IGMPV2;
2653 if (inm->inm_igi->igi_flags & IGIF_LOOPBACK) {
2654 m->m_flags |= M_IGMP_LOOP;
2655 }
2656
2657 /*
2658 * Due to the fact that at this point we are possibly holding
2659 * in_multihead_lock in shared or exclusive mode, we can't call
2660 * igmp_sendpkt() here since that will eventually call ip_output(),
2661 * which will try to lock in_multihead_lock and cause a deadlock.
2662 * Instead we defer the work to the igmp_timeout() thread, thus
2663 * avoiding unlocking in_multihead_lock here.
2664 */
2665 if (IF_QFULL(&inm->inm_igi->igi_v2q)) {
2666 IGMP_PRINTF(("%s: v1/v2 outbound queue full\n", __func__));
2667 error = ENOMEM;
2668 m_freem(m);
2669 } else {
2670 IF_ENQUEUE(&inm->inm_igi->igi_v2q, m);
2671 VERIFY(error == 0);
2672 }
2673 return error;
2674 }
2675
2676 /*
2677 * Process a state change from the upper layer for the given IPv4 group.
2678 *
2679 * Each socket holds a reference on the in_multi in its own ip_moptions.
2680 * The socket layer will have made the necessary updates to the group
2681 * state, it is now up to IGMP to issue a state change report if there
2682 * has been any change between T0 (when the last state-change was issued)
2683 * and T1 (now).
2684 *
2685 * We use the IGMPv3 state machine at group level. The IGMP module
2686 * however makes the decision as to which IGMP protocol version to speak.
2687 * A state change *from* INCLUDE {} always means an initial join.
2688 * A state change *to* INCLUDE {} always means a final leave.
2689 *
2690 * FUTURE: If IGIF_V3LITE is enabled for this interface, then we can
2691 * save ourselves a bunch of work; any exclusive mode groups need not
2692 * compute source filter lists.
2693 */
2694 int
igmp_change_state(struct in_multi * inm,struct igmp_tparams * itp)2695 igmp_change_state(struct in_multi *inm, struct igmp_tparams *itp)
2696 {
2697 struct igmp_ifinfo *igi;
2698 struct ifnet *ifp;
2699 int error = 0;
2700
2701 VERIFY(itp != NULL);
2702 bzero(itp, sizeof(*itp));
2703
2704 INM_LOCK_ASSERT_HELD(inm);
2705 VERIFY(inm->inm_igi != NULL);
2706 IGI_LOCK_ASSERT_NOTHELD(inm->inm_igi);
2707
2708 /*
2709 * Try to detect if the upper layer just asked us to change state
2710 * for an interface which has now gone away.
2711 */
2712 VERIFY(inm->inm_ifma != NULL);
2713 ifp = inm->inm_ifma->ifma_ifp;
2714 /*
2715 * Sanity check that netinet's notion of ifp is the same as net's.
2716 */
2717 VERIFY(inm->inm_ifp == ifp);
2718
2719 igi = IGMP_IFINFO(ifp);
2720 VERIFY(igi != NULL);
2721
2722 /*
2723 * If we detect a state transition to or from MCAST_UNDEFINED
2724 * for this group, then we are starting or finishing an IGMP
2725 * life cycle for this group.
2726 */
2727 if (inm->inm_st[1].iss_fmode != inm->inm_st[0].iss_fmode) {
2728 IGMP_PRINTF(("%s: inm transition %d -> %d\n", __func__,
2729 inm->inm_st[0].iss_fmode, inm->inm_st[1].iss_fmode));
2730 if (inm->inm_st[0].iss_fmode == MCAST_UNDEFINED) {
2731 IGMP_PRINTF(("%s: initial join\n", __func__));
2732 error = igmp_initial_join(inm, igi, itp);
2733 goto out;
2734 } else if (inm->inm_st[1].iss_fmode == MCAST_UNDEFINED) {
2735 IGMP_PRINTF(("%s: final leave\n", __func__));
2736 igmp_final_leave(inm, igi, itp);
2737 goto out;
2738 }
2739 } else {
2740 IGMP_PRINTF(("%s: filter set change\n", __func__));
2741 }
2742
2743 error = igmp_handle_state_change(inm, igi, itp);
2744 out:
2745 return error;
2746 }
2747
2748 /*
2749 * Perform the initial join for an IGMP group.
2750 *
2751 * When joining a group:
2752 * If the group should have its IGMP traffic suppressed, do nothing.
2753 * IGMPv1 starts sending IGMPv1 host membership reports.
2754 * IGMPv2 starts sending IGMPv2 host membership reports.
2755 * IGMPv3 will schedule an IGMPv3 state-change report containing the
2756 * initial state of the membership.
2757 */
2758 static int
igmp_initial_join(struct in_multi * inm,struct igmp_ifinfo * igi,struct igmp_tparams * itp)2759 igmp_initial_join(struct in_multi *inm, struct igmp_ifinfo *igi,
2760 struct igmp_tparams *itp)
2761 {
2762 struct ifnet *ifp;
2763 struct ifqueue *ifq;
2764 int error, retval, syncstates;
2765
2766 INM_LOCK_ASSERT_HELD(inm);
2767 IGI_LOCK_ASSERT_NOTHELD(igi);
2768 VERIFY(itp != NULL);
2769
2770 IGMP_INET_PRINTF(inm->inm_addr,
2771 ("%s: initial join %s on ifp 0x%llx(%s)\n", __func__,
2772 _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(inm->inm_ifp),
2773 if_name(inm->inm_ifp)));
2774
2775 error = 0;
2776 syncstates = 1;
2777
2778 ifp = inm->inm_ifp;
2779
2780 IGI_LOCK(igi);
2781 VERIFY(igi->igi_ifp == ifp);
2782
2783 /*
2784 * Groups joined on loopback or marked as 'not reported',
2785 * e.g. 224.0.0.1, enter the IGMP_SILENT_MEMBER state and
2786 * are never reported in any IGMP protocol exchanges.
2787 * All other groups enter the appropriate IGMP state machine
2788 * for the version in use on this link.
2789 * A link marked as IGIF_SILENT causes IGMP to be completely
2790 * disabled for the link.
2791 */
2792 if ((ifp->if_flags & IFF_LOOPBACK) ||
2793 (igi->igi_flags & IGIF_SILENT) ||
2794 !igmp_isgroupreported(inm->inm_addr)) {
2795 IGMP_PRINTF(("%s: not kicking state machine for silent group\n",
2796 __func__));
2797 inm->inm_state = IGMP_SILENT_MEMBER;
2798 inm->inm_timer = 0;
2799 } else {
2800 /*
2801 * Deal with overlapping in_multi lifecycle.
2802 * If this group was LEAVING, then make sure
2803 * we drop the reference we picked up to keep the
2804 * group around for the final INCLUDE {} enqueue.
2805 * Since we cannot call in_multi_detach() here,
2806 * defer this task to the timer routine.
2807 */
2808 if (igi->igi_version == IGMP_VERSION_3 &&
2809 inm->inm_state == IGMP_LEAVING_MEMBER) {
2810 VERIFY(inm->inm_nrelecnt != 0);
2811 SLIST_INSERT_HEAD(&igi->igi_relinmhead, inm, inm_nrele);
2812 }
2813
2814 inm->inm_state = IGMP_REPORTING_MEMBER;
2815
2816 switch (igi->igi_version) {
2817 case IGMP_VERSION_1:
2818 case IGMP_VERSION_2:
2819 inm->inm_state = IGMP_IDLE_MEMBER;
2820 error = igmp_v1v2_queue_report(inm,
2821 (igi->igi_version == IGMP_VERSION_2) ?
2822 IGMP_v2_HOST_MEMBERSHIP_REPORT :
2823 IGMP_v1_HOST_MEMBERSHIP_REPORT);
2824
2825 INM_LOCK_ASSERT_HELD(inm);
2826 IGI_LOCK_ASSERT_HELD(igi);
2827
2828 if (error == 0) {
2829 inm->inm_timer =
2830 IGMP_RANDOM_DELAY(IGMP_V1V2_MAX_RI);
2831 itp->cst = 1;
2832 }
2833 break;
2834
2835 case IGMP_VERSION_3:
2836 /*
2837 * Defer update of T0 to T1, until the first copy
2838 * of the state change has been transmitted.
2839 */
2840 syncstates = 0;
2841
2842 /*
2843 * Immediately enqueue a State-Change Report for
2844 * this interface, freeing any previous reports.
2845 * Don't kick the timers if there is nothing to do,
2846 * or if an error occurred.
2847 */
2848 ifq = &inm->inm_scq;
2849 IF_DRAIN(ifq);
2850 retval = igmp_v3_enqueue_group_record(ifq, inm, 1,
2851 0, 0);
2852 itp->cst = (ifq->ifq_len > 0);
2853 IGMP_PRINTF(("%s: enqueue record = %d\n",
2854 __func__, retval));
2855 if (retval <= 0) {
2856 error = retval * -1;
2857 break;
2858 }
2859
2860 /*
2861 * Schedule transmission of pending state-change
2862 * report up to RV times for this link. The timer
2863 * will fire at the next igmp_timeout (1 second),
2864 * giving us an opportunity to merge the reports.
2865 */
2866 if (igi->igi_flags & IGIF_LOOPBACK) {
2867 inm->inm_scrv = 1;
2868 } else {
2869 VERIFY(igi->igi_rv > 1);
2870 inm->inm_scrv = (uint16_t)igi->igi_rv;
2871 }
2872 inm->inm_sctimer = 1;
2873 itp->sct = 1;
2874
2875 error = 0;
2876 break;
2877 }
2878 }
2879 IGI_UNLOCK(igi);
2880
2881 /*
2882 * Only update the T0 state if state change is atomic,
2883 * i.e. we don't need to wait for a timer to fire before we
2884 * can consider the state change to have been communicated.
2885 */
2886 if (syncstates) {
2887 inm_commit(inm);
2888 IGMP_INET_PRINTF(inm->inm_addr,
2889 ("%s: T1 -> T0 for %s/%s\n", __func__,
2890 _igmp_inet_buf, if_name(inm->inm_ifp)));
2891 }
2892
2893 return error;
2894 }
2895
2896 /*
2897 * Issue an intermediate state change during the IGMP life-cycle.
2898 */
2899 static int
igmp_handle_state_change(struct in_multi * inm,struct igmp_ifinfo * igi,struct igmp_tparams * itp)2900 igmp_handle_state_change(struct in_multi *inm, struct igmp_ifinfo *igi,
2901 struct igmp_tparams *itp)
2902 {
2903 struct ifnet *ifp;
2904 int retval = 0;
2905
2906 INM_LOCK_ASSERT_HELD(inm);
2907 IGI_LOCK_ASSERT_NOTHELD(igi);
2908 VERIFY(itp != NULL);
2909
2910 IGMP_INET_PRINTF(inm->inm_addr,
2911 ("%s: state change for %s on ifp 0x%llx(%s)\n", __func__,
2912 _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(inm->inm_ifp),
2913 if_name(inm->inm_ifp)));
2914
2915 ifp = inm->inm_ifp;
2916
2917 IGI_LOCK(igi);
2918 VERIFY(igi->igi_ifp == ifp);
2919
2920 if ((ifp->if_flags & IFF_LOOPBACK) ||
2921 (igi->igi_flags & IGIF_SILENT) ||
2922 !igmp_isgroupreported(inm->inm_addr) ||
2923 (igi->igi_version != IGMP_VERSION_3)) {
2924 IGI_UNLOCK(igi);
2925 if (!igmp_isgroupreported(inm->inm_addr)) {
2926 IGMP_PRINTF(("%s: not kicking state "
2927 "machine for silent group\n", __func__));
2928 }
2929 IGMP_PRINTF(("%s: nothing to do\n", __func__));
2930 inm_commit(inm);
2931 IGMP_INET_PRINTF(inm->inm_addr,
2932 ("%s: T1 -> T0 for %s/%s\n", __func__,
2933 _igmp_inet_buf, inm->inm_ifp->if_name));
2934 goto done;
2935 }
2936
2937 IF_DRAIN(&inm->inm_scq);
2938
2939 retval = igmp_v3_enqueue_group_record(&inm->inm_scq, inm, 1, 0, 0);
2940 itp->cst = (inm->inm_scq.ifq_len > 0);
2941 IGMP_PRINTF(("%s: enqueue record = %d\n", __func__, retval));
2942 if (retval <= 0) {
2943 IGI_UNLOCK(igi);
2944 retval *= -1;
2945 goto done;
2946 }
2947 /*
2948 * If record(s) were enqueued, start the state-change
2949 * report timer for this group.
2950 */
2951 inm->inm_scrv = ((igi->igi_flags & IGIF_LOOPBACK) ? 1 : (uint16_t)igi->igi_rv);
2952 inm->inm_sctimer = 1;
2953 itp->sct = 1;
2954 IGI_UNLOCK(igi);
2955 done:
2956 return retval;
2957 }
2958
2959 /*
2960 * Perform the final leave for an IGMP group.
2961 *
2962 * When leaving a group:
2963 * IGMPv1 does nothing.
2964 * IGMPv2 sends a host leave message, if and only if we are the reporter.
2965 * IGMPv3 enqueues a state-change report containing a transition
2966 * to INCLUDE {} for immediate transmission.
2967 */
2968 static void
igmp_final_leave(struct in_multi * inm,struct igmp_ifinfo * igi,struct igmp_tparams * itp)2969 igmp_final_leave(struct in_multi *inm, struct igmp_ifinfo *igi,
2970 struct igmp_tparams *itp)
2971 {
2972 int syncstates = 1;
2973
2974 INM_LOCK_ASSERT_HELD(inm);
2975 IGI_LOCK_ASSERT_NOTHELD(igi);
2976 VERIFY(itp != NULL);
2977
2978 IGMP_INET_PRINTF(inm->inm_addr,
2979 ("%s: final leave %s on ifp 0x%llx(%s)\n", __func__,
2980 _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(inm->inm_ifp),
2981 if_name(inm->inm_ifp)));
2982
2983 switch (inm->inm_state) {
2984 case IGMP_NOT_MEMBER:
2985 case IGMP_SILENT_MEMBER:
2986 case IGMP_LEAVING_MEMBER:
2987 /* Already leaving or left; do nothing. */
2988 IGMP_PRINTF(("%s: not kicking state machine for silent group\n",
2989 __func__));
2990 break;
2991 case IGMP_REPORTING_MEMBER:
2992 case IGMP_IDLE_MEMBER:
2993 case IGMP_G_QUERY_PENDING_MEMBER:
2994 case IGMP_SG_QUERY_PENDING_MEMBER:
2995 IGI_LOCK(igi);
2996 if (igi->igi_version == IGMP_VERSION_2) {
2997 if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER ||
2998 inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) {
2999 panic("%s: IGMPv3 state reached, not IGMPv3 "
3000 "mode (inm %s, igi %s)", __func__,
3001 if_name(inm->inm_ifp),
3002 if_name(igi->igi_ifp));
3003 /* NOTREACHED */
3004 }
3005 /* scheduler timer if enqueue is successful */
3006 itp->cst = (igmp_v1v2_queue_report(inm,
3007 IGMP_HOST_LEAVE_MESSAGE) == 0);
3008
3009 INM_LOCK_ASSERT_HELD(inm);
3010 IGI_LOCK_ASSERT_HELD(igi);
3011
3012 inm->inm_state = IGMP_NOT_MEMBER;
3013 } else if (igi->igi_version == IGMP_VERSION_3) {
3014 /*
3015 * Stop group timer and all pending reports.
3016 * Immediately enqueue a state-change report
3017 * TO_IN {} to be sent on the next timeout,
3018 * giving us an opportunity to merge reports.
3019 */
3020 IF_DRAIN(&inm->inm_scq);
3021 inm->inm_timer = 0;
3022 if (igi->igi_flags & IGIF_LOOPBACK) {
3023 inm->inm_scrv = 1;
3024 } else {
3025 inm->inm_scrv = (uint16_t)igi->igi_rv;
3026 }
3027 IGMP_INET_PRINTF(inm->inm_addr,
3028 ("%s: Leaving %s/%s with %d "
3029 "pending retransmissions.\n", __func__,
3030 _igmp_inet_buf, if_name(inm->inm_ifp),
3031 inm->inm_scrv));
3032 if (inm->inm_scrv == 0) {
3033 inm->inm_state = IGMP_NOT_MEMBER;
3034 inm->inm_sctimer = 0;
3035 } else {
3036 int retval;
3037 /*
3038 * Stick around in the in_multihead list;
3039 * the final detach will be issued by
3040 * igmp_v3_process_group_timers() when
3041 * the retransmit timer expires.
3042 */
3043 INM_ADDREF_LOCKED(inm);
3044 VERIFY(inm->inm_debug & IFD_ATTACHED);
3045 inm->inm_reqcnt++;
3046 VERIFY(inm->inm_reqcnt >= 1);
3047 inm->inm_nrelecnt++;
3048 VERIFY(inm->inm_nrelecnt != 0);
3049
3050 retval = igmp_v3_enqueue_group_record(
3051 &inm->inm_scq, inm, 1, 0, 0);
3052 itp->cst = (inm->inm_scq.ifq_len > 0);
3053 KASSERT(retval != 0,
3054 ("%s: enqueue record = %d\n", __func__,
3055 retval));
3056
3057 inm->inm_state = IGMP_LEAVING_MEMBER;
3058 inm->inm_sctimer = 1;
3059 itp->sct = 1;
3060 syncstates = 0;
3061 }
3062 }
3063 IGI_UNLOCK(igi);
3064 break;
3065 case IGMP_LAZY_MEMBER:
3066 case IGMP_SLEEPING_MEMBER:
3067 case IGMP_AWAKENING_MEMBER:
3068 /* Our reports are suppressed; do nothing. */
3069 break;
3070 }
3071
3072 if (syncstates) {
3073 inm_commit(inm);
3074 IGMP_INET_PRINTF(inm->inm_addr,
3075 ("%s: T1 -> T0 for %s/%s\n", __func__,
3076 _igmp_inet_buf, if_name(inm->inm_ifp)));
3077 inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
3078 IGMP_INET_PRINTF(inm->inm_addr,
3079 ("%s: T1 now MCAST_UNDEFINED for %s/%s\n",
3080 __func__, _igmp_inet_buf, if_name(inm->inm_ifp)));
3081 }
3082 }
3083
3084 /*
3085 * Enqueue an IGMPv3 group record to the given output queue.
3086 *
3087 * XXX This function could do with having the allocation code
3088 * split out, and the multiple-tree-walks coalesced into a single
3089 * routine as has been done in igmp_v3_enqueue_filter_change().
3090 *
3091 * If is_state_change is zero, a current-state record is appended.
3092 * If is_state_change is non-zero, a state-change report is appended.
3093 *
3094 * If is_group_query is non-zero, an mbuf packet chain is allocated.
3095 * If is_group_query is zero, and if there is a packet with free space
3096 * at the tail of the queue, it will be appended to providing there
3097 * is enough free space.
3098 * Otherwise a new mbuf packet chain is allocated.
3099 *
3100 * If is_source_query is non-zero, each source is checked to see if
3101 * it was recorded for a Group-Source query, and will be omitted if
3102 * it is not both in-mode and recorded.
3103 *
3104 * The function will attempt to allocate leading space in the packet
3105 * for the IP/IGMP header to be prepended without fragmenting the chain.
3106 *
3107 * If successful the size of all data appended to the queue is returned,
3108 * otherwise an error code less than zero is returned, or zero if
3109 * no record(s) were appended.
3110 */
3111 static int
igmp_v3_enqueue_group_record(struct ifqueue * ifq,struct in_multi * inm,const int is_state_change,const int is_group_query,const int is_source_query)3112 igmp_v3_enqueue_group_record(struct ifqueue *ifq, struct in_multi *inm,
3113 const int is_state_change, const int is_group_query,
3114 const int is_source_query)
3115 {
3116 struct igmp_grouprec ig;
3117 struct igmp_grouprec *pig;
3118 struct ifnet *ifp;
3119 struct ip_msource *ims, *nims;
3120 struct mbuf *m0, *m, *md;
3121 int error, is_filter_list_change;
3122 int minrec0len, m0srcs, nbytes, off;
3123 uint16_t msrcs;
3124 int record_has_sources;
3125 int now;
3126 int type;
3127 in_addr_t naddr;
3128 uint16_t mode;
3129 u_int16_t ig_numsrc;
3130
3131 INM_LOCK_ASSERT_HELD(inm);
3132 IGI_LOCK_ASSERT_HELD(inm->inm_igi);
3133
3134 error = 0;
3135 ifp = inm->inm_ifp;
3136 is_filter_list_change = 0;
3137 m = NULL;
3138 m0 = NULL;
3139 m0srcs = 0;
3140 msrcs = 0;
3141 nbytes = 0;
3142 nims = NULL;
3143 record_has_sources = 1;
3144 pig = NULL;
3145 type = IGMP_DO_NOTHING;
3146 mode = inm->inm_st[1].iss_fmode;
3147
3148 /*
3149 * If we did not transition out of ASM mode during t0->t1,
3150 * and there are no source nodes to process, we can skip
3151 * the generation of source records.
3152 */
3153 if (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0 &&
3154 inm->inm_nsrc == 0) {
3155 record_has_sources = 0;
3156 }
3157
3158 if (is_state_change) {
3159 /*
3160 * Queue a state change record.
3161 * If the mode did not change, and there are non-ASM
3162 * listeners or source filters present,
3163 * we potentially need to issue two records for the group.
3164 * If we are transitioning to MCAST_UNDEFINED, we need
3165 * not send any sources.
3166 * If there are ASM listeners, and there was no filter
3167 * mode transition of any kind, do nothing.
3168 */
3169 if (mode != inm->inm_st[0].iss_fmode) {
3170 if (mode == MCAST_EXCLUDE) {
3171 IGMP_PRINTF(("%s: change to EXCLUDE\n",
3172 __func__));
3173 type = IGMP_CHANGE_TO_EXCLUDE_MODE;
3174 } else {
3175 IGMP_PRINTF(("%s: change to INCLUDE\n",
3176 __func__));
3177 type = IGMP_CHANGE_TO_INCLUDE_MODE;
3178 if (mode == MCAST_UNDEFINED) {
3179 record_has_sources = 0;
3180 }
3181 }
3182 } else {
3183 if (record_has_sources) {
3184 is_filter_list_change = 1;
3185 } else {
3186 type = IGMP_DO_NOTHING;
3187 }
3188 }
3189 } else {
3190 /*
3191 * Queue a current state record.
3192 */
3193 if (mode == MCAST_EXCLUDE) {
3194 type = IGMP_MODE_IS_EXCLUDE;
3195 } else if (mode == MCAST_INCLUDE) {
3196 type = IGMP_MODE_IS_INCLUDE;
3197 VERIFY(inm->inm_st[1].iss_asm == 0);
3198 }
3199 }
3200
3201 /*
3202 * Generate the filter list changes using a separate function.
3203 */
3204 if (is_filter_list_change) {
3205 return igmp_v3_enqueue_filter_change(ifq, inm);
3206 }
3207
3208 if (type == IGMP_DO_NOTHING) {
3209 IGMP_INET_PRINTF(inm->inm_addr,
3210 ("%s: nothing to do for %s/%s\n",
3211 __func__, _igmp_inet_buf,
3212 if_name(inm->inm_ifp)));
3213 return 0;
3214 }
3215
3216 /*
3217 * If any sources are present, we must be able to fit at least
3218 * one in the trailing space of the tail packet's mbuf,
3219 * ideally more.
3220 */
3221 minrec0len = sizeof(struct igmp_grouprec);
3222 if (record_has_sources) {
3223 minrec0len += sizeof(in_addr_t);
3224 }
3225
3226 IGMP_INET_PRINTF(inm->inm_addr,
3227 ("%s: queueing %s for %s/%s\n", __func__,
3228 igmp_rec_type_to_str(type), _igmp_inet_buf,
3229 if_name(inm->inm_ifp)));
3230
3231 /*
3232 * Check if we have a packet in the tail of the queue for this
3233 * group into which the first group record for this group will fit.
3234 * Otherwise allocate a new packet.
3235 * Always allocate leading space for IP+RA_OPT+IGMP+REPORT.
3236 * Note: Group records for G/GSR query responses MUST be sent
3237 * in their own packet.
3238 */
3239 m0 = ifq->ifq_tail;
3240 if (!is_group_query &&
3241 m0 != NULL &&
3242 (m0->m_pkthdr.vt_nrecs + 1 <= IGMP_V3_REPORT_MAXRECS) &&
3243 (m0->m_pkthdr.len + minrec0len) <
3244 (ifp->if_mtu - IGMP_LEADINGSPACE)) {
3245 m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
3246 sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
3247 m = m0;
3248 IGMP_PRINTF(("%s: use existing packet\n", __func__));
3249 } else {
3250 if (IF_QFULL(ifq)) {
3251 IGMP_PRINTF(("%s: outbound queue full\n", __func__));
3252 return -ENOMEM;
3253 }
3254 m = NULL;
3255 m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
3256 sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
3257 if (!is_state_change && !is_group_query) {
3258 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3259 if (m) {
3260 m->m_data += IGMP_LEADINGSPACE;
3261 }
3262 }
3263 if (m == NULL) {
3264 m = m_gethdr(M_DONTWAIT, MT_DATA);
3265 if (m) {
3266 MH_ALIGN(m, IGMP_LEADINGSPACE);
3267 }
3268 }
3269 if (m == NULL) {
3270 return -ENOMEM;
3271 }
3272
3273 igmp_save_context(m, ifp);
3274
3275 IGMP_PRINTF(("%s: allocated first packet\n", __func__));
3276 }
3277
3278 /*
3279 * Append group record.
3280 * If we have sources, we don't know how many yet.
3281 */
3282 ig.ig_type = (u_char)type;
3283 ig.ig_datalen = 0;
3284 ig.ig_numsrc = 0;
3285 ig.ig_group = inm->inm_addr;
3286 if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) {
3287 if (m != m0) {
3288 m_freem(m);
3289 }
3290 IGMP_PRINTF(("%s: m_append() failed.\n", __func__));
3291 return -ENOMEM;
3292 }
3293 nbytes += sizeof(struct igmp_grouprec);
3294
3295 /*
3296 * Append as many sources as will fit in the first packet.
3297 * If we are appending to a new packet, the chain allocation
3298 * may potentially use clusters; use m_getptr() in this case.
3299 * If we are appending to an existing packet, we need to obtain
3300 * a pointer to the group record after m_append(), in case a new
3301 * mbuf was allocated.
3302 * Only append sources which are in-mode at t1. If we are
3303 * transitioning to MCAST_UNDEFINED state on the group, do not
3304 * include source entries.
3305 * Only report recorded sources in our filter set when responding
3306 * to a group-source query.
3307 */
3308 if (record_has_sources) {
3309 if (m == m0) {
3310 md = m_last(m);
3311 pig = (struct igmp_grouprec *)(void *)
3312 (mtod(md, uint8_t *) + md->m_len - nbytes);
3313 } else {
3314 md = m_getptr(m, 0, &off);
3315 pig = (struct igmp_grouprec *)(void *)
3316 (mtod(md, uint8_t *) + off);
3317 }
3318 msrcs = 0;
3319 RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, nims) {
3320 #ifdef IGMP_DEBUG
3321 char buf[MAX_IPv4_STR_LEN];
3322
3323 inet_ntop_haddr(ims->ims_haddr, buf, sizeof(buf));
3324 IGMP_PRINTF(("%s: visit node %s\n", __func__, buf));
3325 #endif
3326 now = ims_get_mode(inm, ims, 1);
3327 IGMP_PRINTF(("%s: node is %d\n", __func__, now));
3328 if ((now != mode) ||
3329 (now == mode && mode == MCAST_UNDEFINED)) {
3330 IGMP_PRINTF(("%s: skip node\n", __func__));
3331 continue;
3332 }
3333 if (is_source_query && ims->ims_stp == 0) {
3334 IGMP_PRINTF(("%s: skip unrecorded node\n",
3335 __func__));
3336 continue;
3337 }
3338 IGMP_PRINTF(("%s: append node\n", __func__));
3339 naddr = htonl(ims->ims_haddr);
3340 if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) {
3341 if (m != m0) {
3342 m_freem(m);
3343 }
3344 IGMP_PRINTF(("%s: m_append() failed.\n",
3345 __func__));
3346 return -ENOMEM;
3347 }
3348 nbytes += sizeof(in_addr_t);
3349 ++msrcs;
3350 if (msrcs == m0srcs) {
3351 break;
3352 }
3353 }
3354 IGMP_PRINTF(("%s: msrcs is %d this packet\n", __func__,
3355 msrcs));
3356 ig_numsrc = htons(msrcs);
3357 bcopy(&ig_numsrc, &pig->ig_numsrc, sizeof(ig_numsrc));
3358 nbytes += (msrcs * sizeof(in_addr_t));
3359 }
3360
3361 if (is_source_query && msrcs == 0) {
3362 IGMP_PRINTF(("%s: no recorded sources to report\n", __func__));
3363 if (m != m0) {
3364 m_freem(m);
3365 }
3366 return 0;
3367 }
3368
3369 /*
3370 * We are good to go with first packet.
3371 */
3372 if (m != m0) {
3373 IGMP_PRINTF(("%s: enqueueing first packet\n", __func__));
3374 m->m_pkthdr.vt_nrecs = 1;
3375 IF_ENQUEUE(ifq, m);
3376 } else {
3377 m->m_pkthdr.vt_nrecs++;
3378 }
3379 /*
3380 * No further work needed if no source list in packet(s).
3381 */
3382 if (!record_has_sources) {
3383 return nbytes;
3384 }
3385
3386 /*
3387 * Whilst sources remain to be announced, we need to allocate
3388 * a new packet and fill out as many sources as will fit.
3389 * Always try for a cluster first.
3390 */
3391 while (nims != NULL) {
3392 if (IF_QFULL(ifq)) {
3393 IGMP_PRINTF(("%s: outbound queue full\n", __func__));
3394 return -ENOMEM;
3395 }
3396 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3397 if (m) {
3398 m->m_data += IGMP_LEADINGSPACE;
3399 }
3400 if (m == NULL) {
3401 m = m_gethdr(M_DONTWAIT, MT_DATA);
3402 if (m) {
3403 MH_ALIGN(m, IGMP_LEADINGSPACE);
3404 }
3405 }
3406 if (m == NULL) {
3407 return -ENOMEM;
3408 }
3409 igmp_save_context(m, ifp);
3410 md = m_getptr(m, 0, &off);
3411 pig = (struct igmp_grouprec *)(void *)
3412 (mtod(md, uint8_t *) + off);
3413 IGMP_PRINTF(("%s: allocated next packet\n", __func__));
3414
3415 if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) {
3416 if (m != m0) {
3417 m_freem(m);
3418 }
3419 IGMP_PRINTF(("%s: m_append() failed.\n", __func__));
3420 return -ENOMEM;
3421 }
3422 m->m_pkthdr.vt_nrecs = 1;
3423 nbytes += sizeof(struct igmp_grouprec);
3424
3425 m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
3426 sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
3427
3428 msrcs = 0;
3429 RB_FOREACH_FROM(ims, ip_msource_tree, nims) {
3430 #ifdef IGMP_DEBUG
3431 char buf[MAX_IPv4_STR_LEN];
3432
3433 inet_ntop_haddr(ims->ims_haddr, buf, sizeof(buf));
3434 IGMP_PRINTF(("%s: visit node %s\n", __func__, buf));
3435 #endif
3436 now = ims_get_mode(inm, ims, 1);
3437 if ((now != mode) ||
3438 (now == mode && mode == MCAST_UNDEFINED)) {
3439 IGMP_PRINTF(("%s: skip node\n", __func__));
3440 continue;
3441 }
3442 if (is_source_query && ims->ims_stp == 0) {
3443 IGMP_PRINTF(("%s: skip unrecorded node\n",
3444 __func__));
3445 continue;
3446 }
3447 IGMP_PRINTF(("%s: append node\n", __func__));
3448 naddr = htonl(ims->ims_haddr);
3449 if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) {
3450 if (m != m0) {
3451 m_freem(m);
3452 }
3453 IGMP_PRINTF(("%s: m_append() failed.\n",
3454 __func__));
3455 return -ENOMEM;
3456 }
3457 ++msrcs;
3458 if (msrcs == m0srcs) {
3459 break;
3460 }
3461 }
3462 ig_numsrc = htons(msrcs);
3463 bcopy(&ig_numsrc, &pig->ig_numsrc, sizeof(ig_numsrc));
3464 nbytes += (msrcs * sizeof(in_addr_t));
3465
3466 IGMP_PRINTF(("%s: enqueueing next packet\n", __func__));
3467 IF_ENQUEUE(ifq, m);
3468 }
3469
3470 return nbytes;
3471 }
3472
3473 /*
3474 * Type used to mark record pass completion.
3475 * We exploit the fact we can cast to this easily from the
3476 * current filter modes on each ip_msource node.
3477 */
3478 typedef enum {
3479 REC_NONE = 0x00, /* MCAST_UNDEFINED */
3480 REC_ALLOW = 0x01, /* MCAST_INCLUDE */
3481 REC_BLOCK = 0x02, /* MCAST_EXCLUDE */
3482 REC_FULL = REC_ALLOW | REC_BLOCK
3483 } rectype_t;
3484
3485 /*
3486 * Enqueue an IGMPv3 filter list change to the given output queue.
3487 *
3488 * Source list filter state is held in an RB-tree. When the filter list
3489 * for a group is changed without changing its mode, we need to compute
3490 * the deltas between T0 and T1 for each source in the filter set,
3491 * and enqueue the appropriate ALLOW_NEW/BLOCK_OLD records.
3492 *
3493 * As we may potentially queue two record types, and the entire R-B tree
3494 * needs to be walked at once, we break this out into its own function
3495 * so we can generate a tightly packed queue of packets.
3496 *
3497 * XXX This could be written to only use one tree walk, although that makes
3498 * serializing into the mbuf chains a bit harder. For now we do two walks
3499 * which makes things easier on us, and it may or may not be harder on
3500 * the L2 cache.
3501 *
3502 * If successful the size of all data appended to the queue is returned,
3503 * otherwise an error code less than zero is returned, or zero if
3504 * no record(s) were appended.
3505 */
3506 static int
igmp_v3_enqueue_filter_change(struct ifqueue * ifq,struct in_multi * inm)3507 igmp_v3_enqueue_filter_change(struct ifqueue *ifq, struct in_multi *inm)
3508 {
3509 static const int MINRECLEN =
3510 sizeof(struct igmp_grouprec) + sizeof(in_addr_t);
3511 struct ifnet *ifp;
3512 struct igmp_grouprec ig;
3513 struct igmp_grouprec *pig;
3514 struct ip_msource *ims, *nims;
3515 struct mbuf *m, *m0, *md;
3516 in_addr_t naddr;
3517 int m0srcs, nbytes, npbytes, off, schanged;
3518 uint16_t rsrcs;
3519 int nallow, nblock;
3520 uint16_t mode;
3521 uint8_t now, then;
3522 rectype_t crt, drt, nrt;
3523 u_int16_t ig_numsrc;
3524
3525 INM_LOCK_ASSERT_HELD(inm);
3526
3527 if (inm->inm_nsrc == 0 ||
3528 (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0)) {
3529 return 0;
3530 }
3531
3532 ifp = inm->inm_ifp; /* interface */
3533 mode = inm->inm_st[1].iss_fmode; /* filter mode at t1 */
3534 crt = REC_NONE; /* current group record type */
3535 drt = REC_NONE; /* mask of completed group record types */
3536 nrt = REC_NONE; /* record type for current node */
3537 m0srcs = 0; /* # source which will fit in current mbuf chain */
3538 nbytes = 0; /* # of bytes appended to group's state-change queue */
3539 npbytes = 0; /* # of bytes appended this packet */
3540 rsrcs = 0; /* # sources encoded in current record */
3541 schanged = 0; /* # nodes encoded in overall filter change */
3542 nallow = 0; /* # of source entries in ALLOW_NEW */
3543 nblock = 0; /* # of source entries in BLOCK_OLD */
3544 nims = NULL; /* next tree node pointer */
3545
3546 /*
3547 * For each possible filter record mode.
3548 * The first kind of source we encounter tells us which
3549 * is the first kind of record we start appending.
3550 * If a node transitioned to UNDEFINED at t1, its mode is treated
3551 * as the inverse of the group's filter mode.
3552 */
3553 while (drt != REC_FULL) {
3554 do {
3555 m0 = ifq->ifq_tail;
3556 if (m0 != NULL &&
3557 (m0->m_pkthdr.vt_nrecs + 1 <=
3558 IGMP_V3_REPORT_MAXRECS) &&
3559 (m0->m_pkthdr.len + MINRECLEN) <
3560 (ifp->if_mtu - IGMP_LEADINGSPACE)) {
3561 m = m0;
3562 m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
3563 sizeof(struct igmp_grouprec)) /
3564 sizeof(in_addr_t);
3565 IGMP_PRINTF(("%s: use previous packet\n",
3566 __func__));
3567 } else {
3568 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3569 if (m) {
3570 m->m_data += IGMP_LEADINGSPACE;
3571 }
3572 if (m == NULL) {
3573 m = m_gethdr(M_DONTWAIT, MT_DATA);
3574 if (m) {
3575 MH_ALIGN(m, IGMP_LEADINGSPACE);
3576 }
3577 }
3578 if (m == NULL) {
3579 IGMP_PRINTF(("%s: m_get*() failed\n",
3580 __func__));
3581 return -ENOMEM;
3582 }
3583 m->m_pkthdr.vt_nrecs = 0;
3584 igmp_save_context(m, ifp);
3585 m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
3586 sizeof(struct igmp_grouprec)) /
3587 sizeof(in_addr_t);
3588 npbytes = 0;
3589 IGMP_PRINTF(("%s: allocated new packet\n",
3590 __func__));
3591 }
3592 /*
3593 * Append the IGMP group record header to the
3594 * current packet's data area.
3595 * Recalculate pointer to free space for next
3596 * group record, in case m_append() allocated
3597 * a new mbuf or cluster.
3598 */
3599 memset(&ig, 0, sizeof(ig));
3600 ig.ig_group = inm->inm_addr;
3601 if (!m_append(m, sizeof(ig), (void *)&ig)) {
3602 if (m != m0) {
3603 m_freem(m);
3604 }
3605 IGMP_PRINTF(("%s: m_append() failed\n",
3606 __func__));
3607 return -ENOMEM;
3608 }
3609 npbytes += sizeof(struct igmp_grouprec);
3610 if (m != m0) {
3611 /* new packet; offset in c hain */
3612 md = m_getptr(m, npbytes -
3613 sizeof(struct igmp_grouprec), &off);
3614 pig = (struct igmp_grouprec *)(void *)(mtod(md,
3615 uint8_t *) + off);
3616 } else {
3617 /* current packet; offset from last append */
3618 md = m_last(m);
3619 pig = (struct igmp_grouprec *)(void *)(mtod(md,
3620 uint8_t *) + md->m_len -
3621 sizeof(struct igmp_grouprec));
3622 }
3623 /*
3624 * Begin walking the tree for this record type
3625 * pass, or continue from where we left off
3626 * previously if we had to allocate a new packet.
3627 * Only report deltas in-mode at t1.
3628 * We need not report included sources as allowed
3629 * if we are in inclusive mode on the group,
3630 * however the converse is not true.
3631 */
3632 rsrcs = 0;
3633 if (nims == NULL) {
3634 nims = RB_MIN(ip_msource_tree, &inm->inm_srcs);
3635 }
3636 RB_FOREACH_FROM(ims, ip_msource_tree, nims) {
3637 #ifdef IGMP_DEBUG
3638 char buf[MAX_IPv4_STR_LEN];
3639
3640 inet_ntop_haddr(ims->ims_haddr, buf, sizeof(buf));
3641 IGMP_PRINTF(("%s: visit node %s\n", __func__, buf));
3642 #endif
3643 now = ims_get_mode(inm, ims, 1);
3644 then = ims_get_mode(inm, ims, 0);
3645 IGMP_PRINTF(("%s: mode: t0 %d, t1 %d\n",
3646 __func__, then, now));
3647 if (now == then) {
3648 IGMP_PRINTF(("%s: skip unchanged\n",
3649 __func__));
3650 continue;
3651 }
3652 if (mode == MCAST_EXCLUDE &&
3653 now == MCAST_INCLUDE) {
3654 IGMP_PRINTF(("%s: skip IN src on EX "
3655 "group\n", __func__));
3656 continue;
3657 }
3658 nrt = (rectype_t)now;
3659 if (nrt == REC_NONE) {
3660 nrt = (rectype_t)(~mode & REC_FULL);
3661 }
3662 if (schanged++ == 0) {
3663 crt = nrt;
3664 } else if (crt != nrt) {
3665 continue;
3666 }
3667 naddr = htonl(ims->ims_haddr);
3668 if (!m_append(m, sizeof(in_addr_t),
3669 (void *)&naddr)) {
3670 if (m != m0) {
3671 m_freem(m);
3672 }
3673 IGMP_PRINTF(("%s: m_append() failed\n",
3674 __func__));
3675 return -ENOMEM;
3676 }
3677 nallow += !!(crt == REC_ALLOW);
3678 nblock += !!(crt == REC_BLOCK);
3679 if (++rsrcs == m0srcs) {
3680 break;
3681 }
3682 }
3683 /*
3684 * If we did not append any tree nodes on this
3685 * pass, back out of allocations.
3686 */
3687 if (rsrcs == 0) {
3688 npbytes -= sizeof(struct igmp_grouprec);
3689 if (m != m0) {
3690 IGMP_PRINTF(("%s: m_free(m)\n",
3691 __func__));
3692 m_freem(m);
3693 } else {
3694 IGMP_PRINTF(("%s: m_adj(m, -ig)\n",
3695 __func__));
3696 m_adj(m, -((int)sizeof(
3697 struct igmp_grouprec)));
3698 }
3699 continue;
3700 }
3701 npbytes += (rsrcs * sizeof(in_addr_t));
3702 if (crt == REC_ALLOW) {
3703 pig->ig_type = IGMP_ALLOW_NEW_SOURCES;
3704 } else if (crt == REC_BLOCK) {
3705 pig->ig_type = IGMP_BLOCK_OLD_SOURCES;
3706 }
3707 ig_numsrc = htons(rsrcs);
3708 bcopy(&ig_numsrc, &pig->ig_numsrc, sizeof(ig_numsrc));
3709 /*
3710 * Count the new group record, and enqueue this
3711 * packet if it wasn't already queued.
3712 */
3713 m->m_pkthdr.vt_nrecs++;
3714 if (m != m0) {
3715 IF_ENQUEUE(ifq, m);
3716 }
3717 nbytes += npbytes;
3718 } while (nims != NULL);
3719 drt |= crt;
3720 crt = (~crt & REC_FULL);
3721 }
3722
3723 IGMP_PRINTF(("%s: queued %d ALLOW_NEW, %d BLOCK_OLD\n", __func__,
3724 nallow, nblock));
3725
3726 return nbytes;
3727 }
3728
3729 static int
igmp_v3_merge_state_changes(struct in_multi * inm,struct ifqueue * ifscq)3730 igmp_v3_merge_state_changes(struct in_multi *inm, struct ifqueue *ifscq)
3731 {
3732 struct ifqueue *gq;
3733 struct mbuf *m; /* pending state-change */
3734 struct mbuf *m0; /* copy of pending state-change */
3735 struct mbuf *mt; /* last state-change in packet */
3736 struct mbuf *n;
3737 int docopy, domerge;
3738 u_int recslen;
3739
3740 INM_LOCK_ASSERT_HELD(inm);
3741
3742 docopy = 0;
3743 domerge = 0;
3744 recslen = 0;
3745
3746 /*
3747 * If there are further pending retransmissions, make a writable
3748 * copy of each queued state-change message before merging.
3749 */
3750 if (inm->inm_scrv > 0) {
3751 docopy = 1;
3752 }
3753
3754 gq = &inm->inm_scq;
3755 #ifdef IGMP_DEBUG
3756 if (gq->ifq_head == NULL) {
3757 IGMP_PRINTF(("%s: WARNING: queue for inm 0x%llx is empty\n",
3758 __func__, (uint64_t)VM_KERNEL_ADDRPERM(inm)));
3759 }
3760 #endif
3761
3762 /*
3763 * Use IF_REMQUEUE() instead of IF_DEQUEUE() below, since the
3764 * packet might not always be at the head of the ifqueue.
3765 */
3766 m = gq->ifq_head;
3767 while (m != NULL) {
3768 /*
3769 * Only merge the report into the current packet if
3770 * there is sufficient space to do so; an IGMPv3 report
3771 * packet may only contain 65,535 group records.
3772 * Always use a simple mbuf chain concatentation to do this,
3773 * as large state changes for single groups may have
3774 * allocated clusters.
3775 */
3776 domerge = 0;
3777 mt = ifscq->ifq_tail;
3778 if (mt != NULL) {
3779 recslen = m_length(m);
3780
3781 if ((mt->m_pkthdr.vt_nrecs +
3782 m->m_pkthdr.vt_nrecs <=
3783 IGMP_V3_REPORT_MAXRECS) &&
3784 (mt->m_pkthdr.len + recslen <=
3785 (inm->inm_ifp->if_mtu - IGMP_LEADINGSPACE))) {
3786 domerge = 1;
3787 }
3788 }
3789
3790 if (!domerge && IF_QFULL(gq)) {
3791 IGMP_PRINTF(("%s: outbound queue full, skipping whole "
3792 "packet 0x%llx\n", __func__,
3793 (uint64_t)VM_KERNEL_ADDRPERM(m)));
3794 n = m->m_nextpkt;
3795 if (!docopy) {
3796 IF_REMQUEUE(gq, m);
3797 m_freem(m);
3798 }
3799 m = n;
3800 continue;
3801 }
3802
3803 if (!docopy) {
3804 IGMP_PRINTF(("%s: dequeueing 0x%llx\n", __func__,
3805 (uint64_t)VM_KERNEL_ADDRPERM(m)));
3806 n = m->m_nextpkt;
3807 IF_REMQUEUE(gq, m);
3808 m0 = m;
3809 m = n;
3810 } else {
3811 IGMP_PRINTF(("%s: copying 0x%llx\n", __func__,
3812 (uint64_t)VM_KERNEL_ADDRPERM(m)));
3813 m0 = m_dup(m, M_NOWAIT);
3814 if (m0 == NULL) {
3815 return ENOMEM;
3816 }
3817 m0->m_nextpkt = NULL;
3818 m = m->m_nextpkt;
3819 }
3820
3821 if (!domerge) {
3822 IGMP_PRINTF(("%s: queueing 0x%llx to ifscq 0x%llx)\n",
3823 __func__, (uint64_t)VM_KERNEL_ADDRPERM(m0),
3824 (uint64_t)VM_KERNEL_ADDRPERM(ifscq)));
3825 IF_ENQUEUE(ifscq, m0);
3826 } else {
3827 struct mbuf *mtl; /* last mbuf of packet mt */
3828
3829 IGMP_PRINTF(("%s: merging 0x%llx with ifscq tail "
3830 "0x%llx)\n", __func__,
3831 (uint64_t)VM_KERNEL_ADDRPERM(m0),
3832 (uint64_t)VM_KERNEL_ADDRPERM(mt)));
3833
3834 mtl = m_last(mt);
3835 m0->m_flags &= ~M_PKTHDR;
3836 mt->m_pkthdr.len += recslen;
3837 mt->m_pkthdr.vt_nrecs +=
3838 m0->m_pkthdr.vt_nrecs;
3839
3840 mtl->m_next = m0;
3841 }
3842 }
3843
3844 return 0;
3845 }
3846
3847 /*
3848 * Respond to a pending IGMPv3 General Query.
3849 */
3850 static uint32_t
igmp_v3_dispatch_general_query(struct igmp_ifinfo * igi)3851 igmp_v3_dispatch_general_query(struct igmp_ifinfo *igi)
3852 {
3853 struct ifnet *ifp;
3854 struct in_multi *inm;
3855 struct in_multistep step;
3856 int retval, loop;
3857
3858 IGI_LOCK_ASSERT_HELD(igi);
3859
3860 VERIFY(igi->igi_version == IGMP_VERSION_3);
3861
3862 ifp = igi->igi_ifp;
3863 IGI_UNLOCK(igi);
3864
3865 in_multihead_lock_shared();
3866 IN_FIRST_MULTI(step, inm);
3867 while (inm != NULL) {
3868 INM_LOCK(inm);
3869 if (inm->inm_ifp != ifp) {
3870 goto next;
3871 }
3872
3873 switch (inm->inm_state) {
3874 case IGMP_NOT_MEMBER:
3875 case IGMP_SILENT_MEMBER:
3876 break;
3877 case IGMP_REPORTING_MEMBER:
3878 case IGMP_IDLE_MEMBER:
3879 case IGMP_LAZY_MEMBER:
3880 case IGMP_SLEEPING_MEMBER:
3881 case IGMP_AWAKENING_MEMBER:
3882 inm->inm_state = IGMP_REPORTING_MEMBER;
3883 IGI_LOCK(igi);
3884 retval = igmp_v3_enqueue_group_record(&igi->igi_gq,
3885 inm, 0, 0, 0);
3886 IGI_UNLOCK(igi);
3887 IGMP_PRINTF(("%s: enqueue record = %d\n",
3888 __func__, retval));
3889 break;
3890 case IGMP_G_QUERY_PENDING_MEMBER:
3891 case IGMP_SG_QUERY_PENDING_MEMBER:
3892 case IGMP_LEAVING_MEMBER:
3893 break;
3894 }
3895 next:
3896 INM_UNLOCK(inm);
3897 IN_NEXT_MULTI(step, inm);
3898 }
3899 in_multihead_lock_done();
3900
3901 IGI_LOCK(igi);
3902 loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
3903 igmp_dispatch_queue(igi, &igi->igi_gq, IGMP_MAX_RESPONSE_BURST,
3904 loop);
3905 IGI_LOCK_ASSERT_HELD(igi);
3906 /*
3907 * Slew transmission of bursts over 1 second intervals.
3908 */
3909 if (igi->igi_gq.ifq_head != NULL) {
3910 igi->igi_v3_timer = 1 + IGMP_RANDOM_DELAY(
3911 IGMP_RESPONSE_BURST_INTERVAL);
3912 }
3913
3914 return igi->igi_v3_timer;
3915 }
3916
3917 /*
3918 * Transmit the next pending IGMP message in the output queue.
3919 *
3920 * Must not be called with inm_lock or igi_lock held.
3921 */
3922 static void
igmp_sendpkt(struct mbuf * m)3923 igmp_sendpkt(struct mbuf *m)
3924 {
3925 struct ip_moptions *imo;
3926 struct mbuf *ipopts, *m0;
3927 int error;
3928 struct route ro;
3929 struct ifnet *ifp;
3930
3931 IGMP_PRINTF(("%s: transmit 0x%llx\n", __func__,
3932 (uint64_t)VM_KERNEL_ADDRPERM(m)));
3933
3934 ifp = igmp_restore_context(m);
3935 /*
3936 * Check if the ifnet is still attached.
3937 */
3938 if (ifp == NULL || !ifnet_is_attached(ifp, 0)) {
3939 IGMP_PRINTF(("%s: dropped 0x%llx as ifp went away.\n",
3940 __func__, (uint64_t)VM_KERNEL_ADDRPERM(m)));
3941 m_freem(m);
3942 OSAddAtomic(1, &ipstat.ips_noroute);
3943 return;
3944 }
3945
3946 ipopts = igmp_sendra ? m_raopt : NULL;
3947
3948 imo = ip_allocmoptions(Z_WAITOK);
3949 if (imo == NULL) {
3950 m_freem(m);
3951 return;
3952 }
3953
3954 imo->imo_multicast_ttl = 1;
3955 imo->imo_multicast_vif = -1;
3956 imo->imo_multicast_loop = 0;
3957
3958 /*
3959 * If the user requested that IGMP traffic be explicitly
3960 * redirected to the loopback interface (e.g. they are running a
3961 * MANET interface and the routing protocol needs to see the
3962 * updates), handle this now.
3963 */
3964 if (m->m_flags & M_IGMP_LOOP) {
3965 imo->imo_multicast_ifp = lo_ifp;
3966 } else {
3967 imo->imo_multicast_ifp = ifp;
3968 }
3969
3970 if (m->m_flags & M_IGMPV2) {
3971 m0 = m;
3972 } else {
3973 m0 = igmp_v3_encap_report(ifp, m);
3974 if (m0 == NULL) {
3975 /*
3976 * If igmp_v3_encap_report() failed, then M_PREPEND()
3977 * already freed the original mbuf chain.
3978 * This means that we don't have to m_freem(m) here.
3979 */
3980 IGMP_PRINTF(("%s: dropped 0x%llx\n", __func__,
3981 (uint64_t)VM_KERNEL_ADDRPERM(m)));
3982 IMO_REMREF(imo);
3983 atomic_add_32(&ipstat.ips_odropped, 1);
3984 return;
3985 }
3986 }
3987
3988 igmp_scrub_context(m0);
3989 m->m_flags &= ~(M_PROTOFLAGS | M_IGMP_LOOP);
3990 m0->m_pkthdr.rcvif = lo_ifp;
3991
3992 if (ifp->if_eflags & IFEF_TXSTART) {
3993 /*
3994 * Use control service class if the interface supports
3995 * transmit-start model.
3996 */
3997 (void) m_set_service_class(m0, MBUF_SC_CTL);
3998 }
3999 bzero(&ro, sizeof(ro));
4000 error = ip_output(m0, ipopts, &ro, 0, imo, NULL);
4001 ROUTE_RELEASE(&ro);
4002
4003 IMO_REMREF(imo);
4004
4005 if (error) {
4006 IGMP_PRINTF(("%s: ip_output(0x%llx) = %d\n", __func__,
4007 (uint64_t)VM_KERNEL_ADDRPERM(m0), error));
4008 return;
4009 }
4010
4011 IGMPSTAT_INC(igps_snd_reports);
4012 OIGMPSTAT_INC(igps_snd_reports);
4013 }
4014 /*
4015 * Encapsulate an IGMPv3 report.
4016 *
4017 * The internal mbuf flag M_IGMPV3_HDR is used to indicate that the mbuf
4018 * chain has already had its IP/IGMPv3 header prepended. In this case
4019 * the function will not attempt to prepend; the lengths and checksums
4020 * will however be re-computed.
4021 *
4022 * Returns a pointer to the new mbuf chain head, or NULL if the
4023 * allocation failed.
4024 */
4025 static struct mbuf *
igmp_v3_encap_report(struct ifnet * ifp,struct mbuf * m)4026 igmp_v3_encap_report(struct ifnet *ifp, struct mbuf *m)
4027 {
4028 struct igmp_report *igmp;
4029 struct ip *ip;
4030 unsigned int hdrlen, igmpreclen;
4031
4032 VERIFY((m->m_flags & M_PKTHDR));
4033
4034 igmpreclen = m_length(m);
4035 hdrlen = sizeof(struct ip) + sizeof(struct igmp_report);
4036
4037 if (m->m_flags & M_IGMPV3_HDR) {
4038 igmpreclen -= hdrlen;
4039 } else {
4040 M_PREPEND(m, hdrlen, M_DONTWAIT, 1);
4041 if (m == NULL) {
4042 return NULL;
4043 }
4044 m->m_flags |= M_IGMPV3_HDR;
4045 }
4046 if (hdrlen + igmpreclen > USHRT_MAX) {
4047 IGMP_PRINTF(("%s: invalid length %d\n", __func__, hdrlen + igmpreclen));
4048 m_freem(m);
4049 return NULL;
4050 }
4051
4052
4053 IGMP_PRINTF(("%s: igmpreclen is %d\n", __func__, igmpreclen));
4054
4055 m->m_data += sizeof(struct ip);
4056 m->m_len -= sizeof(struct ip);
4057
4058 igmp = mtod(m, struct igmp_report *);
4059 igmp->ir_type = IGMP_v3_HOST_MEMBERSHIP_REPORT;
4060 igmp->ir_rsv1 = 0;
4061 igmp->ir_rsv2 = 0;
4062 igmp->ir_numgrps = htons(m->m_pkthdr.vt_nrecs);
4063 igmp->ir_cksum = 0;
4064 igmp->ir_cksum = in_cksum(m, sizeof(struct igmp_report) + igmpreclen);
4065 m->m_pkthdr.vt_nrecs = 0;
4066
4067 m->m_data -= sizeof(struct ip);
4068 m->m_len += sizeof(struct ip);
4069
4070 ip = mtod(m, struct ip *);
4071 ip->ip_tos = IPTOS_PREC_INTERNETCONTROL;
4072 ip->ip_len = (u_short)(hdrlen + igmpreclen);
4073 ip->ip_off = IP_DF;
4074 ip->ip_p = IPPROTO_IGMP;
4075 ip->ip_sum = 0;
4076
4077 ip->ip_src.s_addr = INADDR_ANY;
4078
4079 if (m->m_flags & M_IGMP_LOOP) {
4080 struct in_ifaddr *ia;
4081
4082 IFP_TO_IA(ifp, ia);
4083 if (ia != NULL) {
4084 IFA_LOCK(&ia->ia_ifa);
4085 ip->ip_src = ia->ia_addr.sin_addr;
4086 IFA_UNLOCK(&ia->ia_ifa);
4087 IFA_REMREF(&ia->ia_ifa);
4088 }
4089 }
4090
4091 ip->ip_dst.s_addr = htonl(INADDR_ALLRPTS_GROUP);
4092
4093 return m;
4094 }
4095
4096 #ifdef IGMP_DEBUG
4097 static const char *
igmp_rec_type_to_str(const int type)4098 igmp_rec_type_to_str(const int type)
4099 {
4100 switch (type) {
4101 case IGMP_CHANGE_TO_EXCLUDE_MODE:
4102 return "TO_EX";
4103 case IGMP_CHANGE_TO_INCLUDE_MODE:
4104 return "TO_IN";
4105 case IGMP_MODE_IS_EXCLUDE:
4106 return "MODE_EX";
4107 case IGMP_MODE_IS_INCLUDE:
4108 return "MODE_IN";
4109 case IGMP_ALLOW_NEW_SOURCES:
4110 return "ALLOW_NEW";
4111 case IGMP_BLOCK_OLD_SOURCES:
4112 return "BLOCK_OLD";
4113 default:
4114 break;
4115 }
4116 return "unknown";
4117 }
4118 #endif
4119
4120 void
igmp_init(struct protosw * pp,struct domain * dp)4121 igmp_init(struct protosw *pp, struct domain *dp)
4122 {
4123 #pragma unused(dp)
4124 static int igmp_initialized = 0;
4125
4126 VERIFY((pp->pr_flags & (PR_INITIALIZED | PR_ATTACHED)) == PR_ATTACHED);
4127
4128 if (igmp_initialized) {
4129 return;
4130 }
4131 igmp_initialized = 1;
4132
4133 IGMP_PRINTF(("%s: initializing\n", __func__));
4134
4135 igmp_timers_are_running = 0;
4136
4137 LIST_INIT(&igi_head);
4138 m_raopt = igmp_ra_alloc();
4139 }
4140