1 /*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*-
29 * Copyright (c) 2007-2009 Bruce Simpson.
30 * Copyright (c) 1988 Stephen Deering.
31 * Copyright (c) 1992, 1993
32 * The Regents of the University of California. All rights reserved.
33 *
34 * This code is derived from software contributed to Berkeley by
35 * Stephen Deering of Stanford University.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)igmp.c 8.1 (Berkeley) 7/19/93
66 */
67 /*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
73
74 /*
75 * Internet Group Management Protocol (IGMP) routines.
76 * [RFC1112, RFC2236, RFC3376]
77 *
78 * Written by Steve Deering, Stanford, May 1988.
79 * Modified by Rosen Sharma, Stanford, Aug 1994.
80 * Modified by Bill Fenner, Xerox PARC, Feb 1995.
81 * Modified to fully comply to IGMPv2 by Bill Fenner, Oct 1995.
82 * Significantly rewritten for IGMPv3, VIMAGE, and SMP by Bruce Simpson.
83 *
84 * MULTICAST Revision: 3.5.1.4
85 */
86
87 #include <sys/cdefs.h>
88
89 #include <sys/param.h>
90 #include <sys/systm.h>
91 #include <sys/malloc.h>
92 #include <sys/mbuf.h>
93 #include <sys/socket.h>
94 #include <sys/protosw.h>
95 #include <sys/kernel.h>
96 #include <sys/sysctl.h>
97 #include <sys/mcache.h>
98
99 #include <libkern/libkern.h>
100 #include <kern/zalloc.h>
101
102 #include <net/if.h>
103 #include <net/route.h>
104
105 #include <netinet/in.h>
106 #include <netinet/in_var.h>
107 #include <netinet/in_systm.h>
108 #include <netinet/ip.h>
109 #include <netinet/ip_var.h>
110 #include <netinet/igmp.h>
111 #include <netinet/igmp_var.h>
112 #include <netinet/kpi_ipfilter_var.h>
113
114 #if SKYWALK
115 #include <skywalk/core/skywalk_var.h>
116 #endif /* SKYWALK */
117
118 SLIST_HEAD(igmp_inm_relhead, in_multi);
119
120 static void igi_initvar(struct igmp_ifinfo *, struct ifnet *, int);
121 static struct igmp_ifinfo *igi_alloc(zalloc_flags_t);
122 static void igi_free(struct igmp_ifinfo *);
123 static void igi_delete(const struct ifnet *, struct igmp_inm_relhead *);
124 static void igmp_dispatch_queue(struct igmp_ifinfo *, struct ifqueue *,
125 int, const int);
126 static void igmp_final_leave(struct in_multi *, struct igmp_ifinfo *,
127 struct igmp_tparams *);
128 static int igmp_handle_state_change(struct in_multi *,
129 struct igmp_ifinfo *, struct igmp_tparams *);
130 static int igmp_initial_join(struct in_multi *, struct igmp_ifinfo *,
131 struct igmp_tparams *);
132 static int igmp_input_v1_query(struct ifnet *, const struct ip *,
133 const struct igmp *);
134 static int igmp_input_v2_query(struct ifnet *, const struct ip *,
135 const struct igmp *);
136 static int igmp_input_v3_query(struct ifnet *, const struct ip *,
137 /*const*/ struct igmpv3 *);
138 static int igmp_input_v3_group_query(struct in_multi *,
139 int, /*const*/ struct igmpv3 *);
140 static int igmp_input_v1_report(struct ifnet *, struct mbuf *,
141 /*const*/ struct ip *, /*const*/ struct igmp *);
142 static int igmp_input_v2_report(struct ifnet *, struct mbuf *,
143 /*const*/ struct ip *, /*const*/ struct igmp *);
144 static void igmp_sendpkt(struct mbuf *);
145 static __inline__ int igmp_isgroupreported(const struct in_addr);
146 static struct mbuf *igmp_ra_alloc(void);
147 #ifdef IGMP_DEBUG
148 static const char *igmp_rec_type_to_str(const int);
149 #endif
150 static uint32_t igmp_set_version(struct igmp_ifinfo *, const int);
151 static void igmp_flush_relq(struct igmp_ifinfo *,
152 struct igmp_inm_relhead *);
153 static int igmp_v1v2_queue_report(struct in_multi *, const int);
154 static void igmp_v1v2_process_group_timer(struct in_multi *, const int);
155 static void igmp_v1v2_process_querier_timers(struct igmp_ifinfo *);
156 static uint32_t igmp_v2_update_group(struct in_multi *, const int);
157 static void igmp_v3_cancel_link_timers(struct igmp_ifinfo *);
158 static uint32_t igmp_v3_dispatch_general_query(struct igmp_ifinfo *);
159 static struct mbuf *
160 igmp_v3_encap_report(struct ifnet *, struct mbuf *);
161 static int igmp_v3_enqueue_group_record(struct ifqueue *,
162 struct in_multi *, const int, const int, const int);
163 static int igmp_v3_enqueue_filter_change(struct ifqueue *,
164 struct in_multi *);
165 static void igmp_v3_process_group_timers(struct igmp_ifinfo *,
166 struct ifqueue *, struct ifqueue *, struct in_multi *,
167 const unsigned int);
168 static int igmp_v3_merge_state_changes(struct in_multi *,
169 struct ifqueue *);
170 static void igmp_v3_suppress_group_record(struct in_multi *);
171 static int sysctl_igmp_ifinfo SYSCTL_HANDLER_ARGS;
172 static int sysctl_igmp_gsr SYSCTL_HANDLER_ARGS;
173 static int sysctl_igmp_default_version SYSCTL_HANDLER_ARGS;
174
175 static int igmp_timeout_run; /* IGMP timer is scheduled to run */
176 static void igmp_timeout(void *);
177 static void igmp_sched_timeout(bool);
178
179 static struct mbuf *m_raopt; /* Router Alert option */
180
181 static int querier_present_timers_running; /* IGMPv1/v2 older version
182 * querier present */
183 static int interface_timers_running; /* IGMPv3 general
184 * query response */
185 static int state_change_timers_running; /* IGMPv3 state-change
186 * retransmit */
187 static int current_state_timers_running; /* IGMPv1/v2 host
188 * report; IGMPv3 g/sg
189 * query response */
190
191 /*
192 * Subsystem lock macros.
193 */
194 #define IGMP_LOCK() \
195 lck_mtx_lock(&igmp_mtx)
196 #define IGMP_LOCK_ASSERT_HELD() \
197 LCK_MTX_ASSERT(&igmp_mtx, LCK_MTX_ASSERT_OWNED)
198 #define IGMP_LOCK_ASSERT_NOTHELD() \
199 LCK_MTX_ASSERT(&igmp_mtx, LCK_MTX_ASSERT_NOTOWNED)
200 #define IGMP_UNLOCK() \
201 lck_mtx_unlock(&igmp_mtx)
202
203 static LIST_HEAD(, igmp_ifinfo) igi_head;
204 static struct igmpstat_v3 igmpstat_v3 = {
205 .igps_version = IGPS_VERSION_3,
206 .igps_len = sizeof(struct igmpstat_v3),
207 };
208 static struct igmpstat igmpstat; /* old IGMPv2 stats structure */
209 static struct timeval igmp_gsrdelay = {.tv_sec = 10, .tv_usec = 0};
210
211 static int igmp_recvifkludge = 1;
212 static int igmp_sendra = 1;
213 static int igmp_sendlocal = 1;
214 static int igmp_v1enable = 1;
215 static int igmp_v2enable = 1;
216 static int igmp_legacysupp = 0;
217 static int igmp_default_version = IGMP_VERSION_3;
218
219 SYSCTL_STRUCT(_net_inet_igmp, IGMPCTL_STATS, stats, CTLFLAG_RD | CTLFLAG_LOCKED,
220 &igmpstat, igmpstat, "");
221 SYSCTL_STRUCT(_net_inet_igmp, OID_AUTO, v3stats,
222 CTLFLAG_RD | CTLFLAG_LOCKED, &igmpstat_v3, igmpstat_v3, "");
223 SYSCTL_INT(_net_inet_igmp, OID_AUTO, recvifkludge, CTLFLAG_RW | CTLFLAG_LOCKED,
224 &igmp_recvifkludge, 0,
225 "Rewrite IGMPv1/v2 reports from 0.0.0.0 to contain subnet address");
226 SYSCTL_INT(_net_inet_igmp, OID_AUTO, sendra, CTLFLAG_RW | CTLFLAG_LOCKED,
227 &igmp_sendra, 0,
228 "Send IP Router Alert option in IGMPv2/v3 messages");
229 SYSCTL_INT(_net_inet_igmp, OID_AUTO, sendlocal, CTLFLAG_RW | CTLFLAG_LOCKED,
230 &igmp_sendlocal, 0,
231 "Send IGMP membership reports for 224.0.0.0/24 groups");
232 SYSCTL_INT(_net_inet_igmp, OID_AUTO, v1enable, CTLFLAG_RW | CTLFLAG_LOCKED,
233 &igmp_v1enable, 0,
234 "Enable backwards compatibility with IGMPv1");
235 SYSCTL_INT(_net_inet_igmp, OID_AUTO, v2enable, CTLFLAG_RW | CTLFLAG_LOCKED,
236 &igmp_v2enable, 0,
237 "Enable backwards compatibility with IGMPv2");
238 SYSCTL_INT(_net_inet_igmp, OID_AUTO, legacysupp, CTLFLAG_RW | CTLFLAG_LOCKED,
239 &igmp_legacysupp, 0,
240 "Allow v1/v2 reports to suppress v3 group responses");
241 SYSCTL_PROC(_net_inet_igmp, OID_AUTO, default_version,
242 CTLTYPE_INT | CTLFLAG_RW,
243 &igmp_default_version, 0, sysctl_igmp_default_version, "I",
244 "Default version of IGMP to run on each interface");
245 SYSCTL_PROC(_net_inet_igmp, OID_AUTO, gsrdelay,
246 CTLTYPE_INT | CTLFLAG_RW,
247 &igmp_gsrdelay.tv_sec, 0, sysctl_igmp_gsr, "I",
248 "Rate limit for IGMPv3 Group-and-Source queries in seconds");
249 #ifdef IGMP_DEBUG
250 int igmp_debug = 0;
251 SYSCTL_INT(_net_inet_igmp, OID_AUTO,
252 debug, CTLFLAG_RW | CTLFLAG_LOCKED, &igmp_debug, 0, "");
253 #endif
254
255 SYSCTL_NODE(_net_inet_igmp, OID_AUTO, ifinfo, CTLFLAG_RD | CTLFLAG_LOCKED,
256 sysctl_igmp_ifinfo, "Per-interface IGMPv3 state");
257
258 /* Lock group and attribute for igmp_mtx */
259 static LCK_ATTR_DECLARE(igmp_mtx_attr, 0, 0);
260 static LCK_GRP_DECLARE(igmp_mtx_grp, "igmp_mtx");
261
262 /*
263 * Locking and reference counting:
264 *
265 * igmp_mtx mainly protects igi_head. In cases where both igmp_mtx and
266 * in_multihead_lock must be held, the former must be acquired first in order
267 * to maintain lock ordering. It is not a requirement that igmp_mtx be
268 * acquired first before in_multihead_lock, but in case both must be acquired
269 * in succession, the correct lock ordering must be followed.
270 *
271 * Instead of walking the if_multiaddrs list at the interface and returning
272 * the ifma_protospec value of a matching entry, we search the global list
273 * of in_multi records and find it that way; this is done with in_multihead
274 * lock held. Doing so avoids the race condition issues that many other BSDs
275 * suffer from (therefore in our implementation, ifma_protospec will never be
276 * NULL for as long as the in_multi is valid.)
277 *
278 * The above creates a requirement for the in_multi to stay in in_multihead
279 * list even after the final IGMP leave (in IGMPv3 mode) until no longer needs
280 * be retransmitted (this is not required for IGMPv1/v2.) In order to handle
281 * this, the request and reference counts of the in_multi are bumped up when
282 * the state changes to IGMP_LEAVING_MEMBER, and later dropped in the timeout
283 * handler. Each in_multi holds a reference to the underlying igmp_ifinfo.
284 *
285 * Thus, the permitted lock oder is:
286 *
287 * igmp_mtx, in_multihead_lock, inm_lock, igi_lock
288 *
289 * Any may be taken independently, but if any are held at the same time,
290 * the above lock order must be followed.
291 */
292 static LCK_MTX_DECLARE_ATTR(igmp_mtx, &igmp_mtx_grp, &igmp_mtx_attr);
293 static int igmp_timers_are_running;
294
295 #define IGMP_ADD_DETACHED_INM(_head, _inm) { \
296 SLIST_INSERT_HEAD(_head, _inm, inm_dtle); \
297 }
298
299 #define IGMP_REMOVE_DETACHED_INM(_head) { \
300 struct in_multi *_inm, *_inm_tmp; \
301 SLIST_FOREACH_SAFE(_inm, _head, inm_dtle, _inm_tmp) { \
302 SLIST_REMOVE(_head, _inm, in_multi, inm_dtle); \
303 INM_REMREF(_inm); \
304 } \
305 VERIFY(SLIST_EMPTY(_head)); \
306 }
307
308 static KALLOC_TYPE_DEFINE(igi_zone, struct igmp_ifinfo, NET_KT_DEFAULT);
309
310 /* Store IGMPv3 record count in the module private scratch space */
311 #define vt_nrecs pkt_mpriv.__mpriv_u.__mpriv32[0].__mpriv32_u.__val16[0]
312
313 static __inline void
igmp_save_context(struct mbuf * m,struct ifnet * ifp)314 igmp_save_context(struct mbuf *m, struct ifnet *ifp)
315 {
316 m->m_pkthdr.rcvif = ifp;
317 }
318
319 static __inline void
igmp_scrub_context(struct mbuf * m)320 igmp_scrub_context(struct mbuf *m)
321 {
322 m->m_pkthdr.rcvif = NULL;
323 }
324
325 #ifdef IGMP_DEBUG
326 static __inline const char *
inet_ntop_haddr(in_addr_t haddr,char * buf,socklen_t size)327 inet_ntop_haddr(in_addr_t haddr, char *buf, socklen_t size)
328 {
329 struct in_addr ia;
330
331 ia.s_addr = htonl(haddr);
332 return inet_ntop(AF_INET, &ia, buf, size);
333 }
334 #endif
335
336 /*
337 * Restore context from a queued IGMP output chain.
338 * Return saved ifp.
339 */
340 static __inline struct ifnet *
igmp_restore_context(struct mbuf * m)341 igmp_restore_context(struct mbuf *m)
342 {
343 return m->m_pkthdr.rcvif;
344 }
345
346 /*
347 * Retrieve or set default IGMP version.
348 */
349 static int
350 sysctl_igmp_default_version SYSCTL_HANDLER_ARGS
351 {
352 #pragma unused(oidp, arg2)
353 int error;
354 int new;
355
356 IGMP_LOCK();
357
358 error = SYSCTL_OUT(req, arg1, sizeof(int));
359 if (error || !req->newptr) {
360 goto out_locked;
361 }
362
363 new = igmp_default_version;
364
365 error = SYSCTL_IN(req, &new, sizeof(int));
366 if (error) {
367 goto out_locked;
368 }
369
370 if (new < IGMP_VERSION_1 || new > IGMP_VERSION_3) {
371 error = EINVAL;
372 goto out_locked;
373 }
374
375 IGMP_PRINTF(("%s: change igmp_default_version from %d to %d\n",
376 __func__, igmp_default_version, new));
377
378 igmp_default_version = new;
379
380 out_locked:
381 IGMP_UNLOCK();
382 return error;
383 }
384
385 /*
386 * Retrieve or set threshold between group-source queries in seconds.
387 *
388 */
389 static int
390 sysctl_igmp_gsr SYSCTL_HANDLER_ARGS
391 {
392 #pragma unused(arg1, arg2)
393 int error;
394 int i;
395
396 IGMP_LOCK();
397
398 i = (int)igmp_gsrdelay.tv_sec;
399
400 error = sysctl_handle_int(oidp, &i, 0, req);
401 if (error || !req->newptr) {
402 goto out_locked;
403 }
404
405 if (i < -1 || i >= 60) {
406 error = EINVAL;
407 goto out_locked;
408 }
409
410 igmp_gsrdelay.tv_sec = i;
411
412 out_locked:
413 IGMP_UNLOCK();
414 return error;
415 }
416
417 /*
418 * Expose struct igmp_ifinfo to userland, keyed by ifindex.
419 * For use by ifmcstat(8).
420 *
421 */
422 static int
423 sysctl_igmp_ifinfo SYSCTL_HANDLER_ARGS
424 {
425 #pragma unused(oidp)
426 int *name;
427 int error;
428 u_int namelen;
429 struct ifnet *ifp;
430 struct igmp_ifinfo *igi;
431 struct igmp_ifinfo_u igi_u;
432
433 name = (int *)arg1;
434 namelen = arg2;
435
436 if (req->newptr != USER_ADDR_NULL) {
437 return EPERM;
438 }
439
440 if (namelen != 1) {
441 return EINVAL;
442 }
443
444 IGMP_LOCK();
445
446 if (name[0] <= 0 || name[0] > (u_int)if_index) {
447 error = ENOENT;
448 goto out_locked;
449 }
450
451 error = ENOENT;
452
453 ifnet_head_lock_shared();
454 ifp = ifindex2ifnet[name[0]];
455 ifnet_head_done();
456 if (ifp == NULL) {
457 goto out_locked;
458 }
459
460 bzero(&igi_u, sizeof(igi_u));
461
462 LIST_FOREACH(igi, &igi_head, igi_link) {
463 IGI_LOCK(igi);
464 if (ifp != igi->igi_ifp) {
465 IGI_UNLOCK(igi);
466 continue;
467 }
468 igi_u.igi_ifindex = igi->igi_ifp->if_index;
469 igi_u.igi_version = igi->igi_version;
470 igi_u.igi_v1_timer = igi->igi_v1_timer;
471 igi_u.igi_v2_timer = igi->igi_v2_timer;
472 igi_u.igi_v3_timer = igi->igi_v3_timer;
473 igi_u.igi_flags = igi->igi_flags;
474 igi_u.igi_rv = igi->igi_rv;
475 igi_u.igi_qi = igi->igi_qi;
476 igi_u.igi_qri = igi->igi_qri;
477 igi_u.igi_uri = igi->igi_uri;
478 IGI_UNLOCK(igi);
479
480 error = SYSCTL_OUT(req, &igi_u, sizeof(igi_u));
481 break;
482 }
483
484 out_locked:
485 IGMP_UNLOCK();
486 return error;
487 }
488
489 /*
490 * Dispatch an entire queue of pending packet chains
491 *
492 * Must not be called with inm_lock held.
493 */
494 static void
igmp_dispatch_queue(struct igmp_ifinfo * igi,struct ifqueue * ifq,int limit,const int loop)495 igmp_dispatch_queue(struct igmp_ifinfo *igi, struct ifqueue *ifq, int limit,
496 const int loop)
497 {
498 struct mbuf *m;
499 struct ip *ip;
500
501 if (igi != NULL) {
502 IGI_LOCK_ASSERT_HELD(igi);
503 }
504
505 #if SKYWALK
506 /*
507 * Since this function is called holding the igi lock, we need to ensure we
508 * don't enter the driver directly because a deadlock can happen if another
509 * thread holding the workloop lock tries to acquire the igi lock at
510 * the same time.
511 */
512 sk_protect_t protect = sk_async_transmit_protect();
513 #endif /* SKYWALK */
514
515 for (;;) {
516 IF_DEQUEUE(ifq, m);
517 if (m == NULL) {
518 break;
519 }
520 IGMP_PRINTF(("%s: dispatch 0x%llx from 0x%llx\n", __func__,
521 (uint64_t)VM_KERNEL_ADDRPERM(ifq),
522 (uint64_t)VM_KERNEL_ADDRPERM(m)));
523 ip = mtod(m, struct ip *);
524 if (loop) {
525 m->m_flags |= M_IGMP_LOOP;
526 }
527 if (igi != NULL) {
528 IGI_UNLOCK(igi);
529 }
530 igmp_sendpkt(m);
531 if (igi != NULL) {
532 IGI_LOCK(igi);
533 }
534 if (--limit == 0) {
535 break;
536 }
537 }
538
539 #if SKYWALK
540 sk_async_transmit_unprotect(protect);
541 #endif /* SKYWALK */
542
543 if (igi != NULL) {
544 IGI_LOCK_ASSERT_HELD(igi);
545 }
546 }
547
548 /*
549 * Filter outgoing IGMP report state by group.
550 *
551 * Reports are ALWAYS suppressed for ALL-HOSTS (224.0.0.1).
552 * If the net.inet.igmp.sendlocal sysctl is 0, then IGMP reports are
553 * disabled for all groups in the 224.0.0.0/24 link-local scope. However,
554 * this may break certain IGMP snooping switches which rely on the old
555 * report behaviour.
556 *
557 * Return zero if the given group is one for which IGMP reports
558 * should be suppressed, or non-zero if reports should be issued.
559 */
560
561 static __inline__
562 int
igmp_isgroupreported(const struct in_addr addr)563 igmp_isgroupreported(const struct in_addr addr)
564 {
565 if (in_allhosts(addr) ||
566 ((!igmp_sendlocal && IN_LOCAL_GROUP(ntohl(addr.s_addr))))) {
567 return 0;
568 }
569
570 return 1;
571 }
572
573 /*
574 * Construct a Router Alert option to use in outgoing packets.
575 */
576 static struct mbuf *
igmp_ra_alloc(void)577 igmp_ra_alloc(void)
578 {
579 struct mbuf *m;
580 struct ipoption *p;
581
582 MGET(m, M_WAITOK, MT_DATA);
583 p = mtod(m, struct ipoption *);
584 p->ipopt_dst.s_addr = INADDR_ANY;
585 p->ipopt_list[0] = (char)IPOPT_RA; /* Router Alert Option */
586 p->ipopt_list[1] = 0x04; /* 4 bytes long */
587 p->ipopt_list[2] = IPOPT_EOL; /* End of IP option list */
588 p->ipopt_list[3] = 0x00; /* pad byte */
589 m->m_len = sizeof(p->ipopt_dst) + p->ipopt_list[1];
590
591 return m;
592 }
593
594 /*
595 * Attach IGMP when PF_INET is attached to an interface.
596 */
597 struct igmp_ifinfo *
igmp_domifattach(struct ifnet * ifp,zalloc_flags_t how)598 igmp_domifattach(struct ifnet *ifp, zalloc_flags_t how)
599 {
600 struct igmp_ifinfo *igi;
601
602 IGMP_PRINTF(("%s: called for ifp 0x%llx(%s)\n",
603 __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), ifp->if_name));
604
605 igi = igi_alloc(how);
606 if (igi == NULL) {
607 return NULL;
608 }
609
610 IGMP_LOCK();
611
612 IGI_LOCK(igi);
613 igi_initvar(igi, ifp, 0);
614 igi->igi_debug |= IFD_ATTACHED;
615 IGI_ADDREF_LOCKED(igi); /* hold a reference for igi_head */
616 IGI_ADDREF_LOCKED(igi); /* hold a reference for caller */
617 IGI_UNLOCK(igi);
618 ifnet_lock_shared(ifp);
619 igmp_initsilent(ifp, igi);
620 ifnet_lock_done(ifp);
621
622 LIST_INSERT_HEAD(&igi_head, igi, igi_link);
623
624 IGMP_UNLOCK();
625
626 IGMP_PRINTF(("%s: allocate igmp_ifinfo for ifp 0x%llx(%s)\n", __func__,
627 (uint64_t)VM_KERNEL_ADDRPERM(ifp), ifp->if_name));
628
629 return igi;
630 }
631
632 /*
633 * Attach IGMP when PF_INET is reattached to an interface. Caller is
634 * expected to have an outstanding reference to the igi.
635 */
636 void
igmp_domifreattach(struct igmp_ifinfo * igi)637 igmp_domifreattach(struct igmp_ifinfo *igi)
638 {
639 struct ifnet *ifp;
640
641 IGMP_LOCK();
642
643 IGI_LOCK(igi);
644 VERIFY(!(igi->igi_debug & IFD_ATTACHED));
645 ifp = igi->igi_ifp;
646 VERIFY(ifp != NULL);
647 igi_initvar(igi, ifp, 1);
648 igi->igi_debug |= IFD_ATTACHED;
649 IGI_ADDREF_LOCKED(igi); /* hold a reference for igi_head */
650 IGI_UNLOCK(igi);
651 ifnet_lock_shared(ifp);
652 igmp_initsilent(ifp, igi);
653 ifnet_lock_done(ifp);
654
655 LIST_INSERT_HEAD(&igi_head, igi, igi_link);
656
657 IGMP_UNLOCK();
658
659 IGMP_PRINTF(("%s: reattached igmp_ifinfo for ifp 0x%llx(%s)\n",
660 __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), ifp->if_name));
661 }
662
663 /*
664 * Hook for domifdetach.
665 */
666 void
igmp_domifdetach(struct ifnet * ifp)667 igmp_domifdetach(struct ifnet *ifp)
668 {
669 SLIST_HEAD(, in_multi) inm_dthead;
670
671 SLIST_INIT(&inm_dthead);
672
673 IGMP_PRINTF(("%s: called for ifp 0x%llx(%s%d)\n", __func__,
674 (uint64_t)VM_KERNEL_ADDRPERM(ifp), ifp->if_name, ifp->if_unit));
675
676 IGMP_LOCK();
677 igi_delete(ifp, (struct igmp_inm_relhead *)&inm_dthead);
678 IGMP_UNLOCK();
679
680 /* Now that we're dropped all locks, release detached records */
681 IGMP_REMOVE_DETACHED_INM(&inm_dthead);
682 }
683
684 /*
685 * Called at interface detach time. Note that we only flush all deferred
686 * responses and record releases; all remaining inm records and their source
687 * entries related to this interface are left intact, in order to handle
688 * the reattach case.
689 */
690 static void
igi_delete(const struct ifnet * ifp,struct igmp_inm_relhead * inm_dthead)691 igi_delete(const struct ifnet *ifp, struct igmp_inm_relhead *inm_dthead)
692 {
693 struct igmp_ifinfo *igi, *tigi;
694
695 IGMP_LOCK_ASSERT_HELD();
696
697 LIST_FOREACH_SAFE(igi, &igi_head, igi_link, tigi) {
698 IGI_LOCK(igi);
699 if (igi->igi_ifp == ifp) {
700 /*
701 * Free deferred General Query responses.
702 */
703 IF_DRAIN(&igi->igi_gq);
704 IF_DRAIN(&igi->igi_v2q);
705 igmp_flush_relq(igi, inm_dthead);
706 VERIFY(SLIST_EMPTY(&igi->igi_relinmhead));
707 igi->igi_debug &= ~IFD_ATTACHED;
708 IGI_UNLOCK(igi);
709
710 LIST_REMOVE(igi, igi_link);
711 IGI_REMREF(igi); /* release igi_head reference */
712 return;
713 }
714 IGI_UNLOCK(igi);
715 }
716 panic("%s: igmp_ifinfo not found for ifp %p(%s)", __func__,
717 ifp, ifp->if_xname);
718 }
719
720 __private_extern__ void
igmp_initsilent(struct ifnet * ifp,struct igmp_ifinfo * igi)721 igmp_initsilent(struct ifnet *ifp, struct igmp_ifinfo *igi)
722 {
723 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
724
725 IGI_LOCK_ASSERT_NOTHELD(igi);
726 IGI_LOCK(igi);
727 if (!(ifp->if_flags & IFF_MULTICAST)) {
728 igi->igi_flags |= IGIF_SILENT;
729 } else {
730 igi->igi_flags &= ~IGIF_SILENT;
731 }
732 IGI_UNLOCK(igi);
733 }
734
735 static void
igi_initvar(struct igmp_ifinfo * igi,struct ifnet * ifp,int reattach)736 igi_initvar(struct igmp_ifinfo *igi, struct ifnet *ifp, int reattach)
737 {
738 IGI_LOCK_ASSERT_HELD(igi);
739
740 igi->igi_ifp = ifp;
741 igi->igi_version = igmp_default_version;
742 igi->igi_flags = 0;
743 igi->igi_rv = IGMP_RV_INIT;
744 igi->igi_qi = IGMP_QI_INIT;
745 igi->igi_qri = IGMP_QRI_INIT;
746 igi->igi_uri = IGMP_URI_INIT;
747
748 if (!reattach) {
749 SLIST_INIT(&igi->igi_relinmhead);
750 }
751
752 /*
753 * Responses to general queries are subject to bounds.
754 */
755 igi->igi_gq.ifq_maxlen = IGMP_MAX_RESPONSE_PACKETS;
756 igi->igi_v2q.ifq_maxlen = IGMP_MAX_RESPONSE_PACKETS;
757 }
758
759 static struct igmp_ifinfo *
igi_alloc(zalloc_flags_t how)760 igi_alloc(zalloc_flags_t how)
761 {
762 struct igmp_ifinfo *igi = zalloc_flags(igi_zone, how | Z_ZERO);
763 if (igi != NULL) {
764 lck_mtx_init(&igi->igi_lock, &igmp_mtx_grp, &igmp_mtx_attr);
765 igi->igi_debug |= IFD_ALLOC;
766 }
767 return igi;
768 }
769
770 static void
igi_free(struct igmp_ifinfo * igi)771 igi_free(struct igmp_ifinfo *igi)
772 {
773 IGI_LOCK(igi);
774 if (igi->igi_debug & IFD_ATTACHED) {
775 panic("%s: attached igi=%p is being freed", __func__, igi);
776 /* NOTREACHED */
777 } else if (igi->igi_ifp != NULL) {
778 panic("%s: ifp not NULL for igi=%p", __func__, igi);
779 /* NOTREACHED */
780 } else if (!(igi->igi_debug & IFD_ALLOC)) {
781 panic("%s: igi %p cannot be freed", __func__, igi);
782 /* NOTREACHED */
783 } else if (igi->igi_refcnt != 0) {
784 panic("%s: non-zero refcnt igi=%p", __func__, igi);
785 /* NOTREACHED */
786 }
787 igi->igi_debug &= ~IFD_ALLOC;
788 IGI_UNLOCK(igi);
789
790 lck_mtx_destroy(&igi->igi_lock, &igmp_mtx_grp);
791 zfree(igi_zone, igi);
792 }
793
794 void
igi_addref(struct igmp_ifinfo * igi,int locked)795 igi_addref(struct igmp_ifinfo *igi, int locked)
796 {
797 if (!locked) {
798 IGI_LOCK_SPIN(igi);
799 } else {
800 IGI_LOCK_ASSERT_HELD(igi);
801 }
802
803 if (++igi->igi_refcnt == 0) {
804 panic("%s: igi=%p wraparound refcnt", __func__, igi);
805 /* NOTREACHED */
806 }
807 if (!locked) {
808 IGI_UNLOCK(igi);
809 }
810 }
811
812 void
igi_remref(struct igmp_ifinfo * igi)813 igi_remref(struct igmp_ifinfo *igi)
814 {
815 SLIST_HEAD(, in_multi) inm_dthead;
816 struct ifnet *ifp;
817
818 IGI_LOCK_SPIN(igi);
819
820 if (igi->igi_refcnt == 0) {
821 panic("%s: igi=%p negative refcnt", __func__, igi);
822 /* NOTREACHED */
823 }
824
825 --igi->igi_refcnt;
826 if (igi->igi_refcnt > 0) {
827 IGI_UNLOCK(igi);
828 return;
829 }
830
831 ifp = igi->igi_ifp;
832 igi->igi_ifp = NULL;
833 IF_DRAIN(&igi->igi_gq);
834 IF_DRAIN(&igi->igi_v2q);
835 SLIST_INIT(&inm_dthead);
836 igmp_flush_relq(igi, (struct igmp_inm_relhead *)&inm_dthead);
837 VERIFY(SLIST_EMPTY(&igi->igi_relinmhead));
838 IGI_UNLOCK(igi);
839
840 /* Now that we're dropped all locks, release detached records */
841 IGMP_REMOVE_DETACHED_INM(&inm_dthead);
842
843 IGMP_PRINTF(("%s: freeing igmp_ifinfo for ifp 0x%llx(%s)\n",
844 __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
845
846 igi_free(igi);
847 }
848
849 /*
850 * Process a received IGMPv1 query.
851 * Return non-zero if the message should be dropped.
852 */
853 static int
igmp_input_v1_query(struct ifnet * ifp,const struct ip * ip,const struct igmp * igmp)854 igmp_input_v1_query(struct ifnet *ifp, const struct ip *ip,
855 const struct igmp *igmp)
856 {
857 struct igmp_ifinfo *igi;
858 struct in_multi *inm;
859 struct in_multistep step;
860 struct igmp_tparams itp = { .qpt = 0, .it = 0, .cst = 0, .sct = 0 };
861
862 IGMP_LOCK_ASSERT_NOTHELD();
863
864 /*
865 * IGMPv1 Host Membership Queries SHOULD always be addressed to
866 * 224.0.0.1. They are always treated as General Queries.
867 * igmp_group is always ignored. Do not drop it as a userland
868 * daemon may wish to see it.
869 */
870 if (!in_allhosts(ip->ip_dst) || !in_nullhost(igmp->igmp_group)) {
871 IGMPSTAT_INC(igps_rcv_badqueries);
872 OIGMPSTAT_INC(igps_rcv_badqueries);
873 goto done;
874 }
875 IGMPSTAT_INC(igps_rcv_gen_queries);
876
877 igi = IGMP_IFINFO(ifp);
878 VERIFY(igi != NULL);
879
880 IGI_LOCK(igi);
881 if (igi->igi_flags & IGIF_LOOPBACK) {
882 IGMP_PRINTF(("%s: ignore v1 query on IGIF_LOOPBACK "
883 "ifp 0x%llx(%s)\n", __func__,
884 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
885 IGI_UNLOCK(igi);
886 goto done;
887 }
888 /*
889 * Switch to IGMPv1 host compatibility mode.
890 */
891 itp.qpt = igmp_set_version(igi, IGMP_VERSION_1);
892 IGI_UNLOCK(igi);
893
894 IGMP_PRINTF(("%s: process v1 query on ifp 0x%llx(%s)\n", __func__,
895 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
896
897 /*
898 * Start the timers in all of our group records
899 * for the interface on which the query arrived,
900 * except those which are already running.
901 */
902 in_multihead_lock_shared();
903 IN_FIRST_MULTI(step, inm);
904 while (inm != NULL) {
905 INM_LOCK(inm);
906 if (inm->inm_ifp != ifp || inm->inm_timer != 0) {
907 goto next;
908 }
909
910 switch (inm->inm_state) {
911 case IGMP_NOT_MEMBER:
912 case IGMP_SILENT_MEMBER:
913 break;
914 case IGMP_G_QUERY_PENDING_MEMBER:
915 case IGMP_SG_QUERY_PENDING_MEMBER:
916 case IGMP_REPORTING_MEMBER:
917 case IGMP_IDLE_MEMBER:
918 case IGMP_LAZY_MEMBER:
919 case IGMP_SLEEPING_MEMBER:
920 case IGMP_AWAKENING_MEMBER:
921 inm->inm_state = IGMP_REPORTING_MEMBER;
922 inm->inm_timer = IGMP_RANDOM_DELAY(IGMP_V1V2_MAX_RI);
923 itp.cst = 1;
924 break;
925 case IGMP_LEAVING_MEMBER:
926 break;
927 }
928 next:
929 INM_UNLOCK(inm);
930 IN_NEXT_MULTI(step, inm);
931 }
932 in_multihead_lock_done();
933 done:
934 igmp_set_timeout(&itp);
935
936 return 0;
937 }
938
939 /*
940 * Process a received IGMPv2 general or group-specific query.
941 */
942 static int
igmp_input_v2_query(struct ifnet * ifp,const struct ip * ip,const struct igmp * igmp)943 igmp_input_v2_query(struct ifnet *ifp, const struct ip *ip,
944 const struct igmp *igmp)
945 {
946 struct igmp_ifinfo *igi;
947 struct in_multi *inm;
948 int is_general_query;
949 uint16_t timer;
950 struct igmp_tparams itp = { .qpt = 0, .it = 0, .cst = 0, .sct = 0 };
951
952 IGMP_LOCK_ASSERT_NOTHELD();
953
954 is_general_query = 0;
955
956 /*
957 * Validate address fields upfront.
958 */
959 if (in_nullhost(igmp->igmp_group)) {
960 /*
961 * IGMPv2 General Query.
962 * If this was not sent to the all-hosts group, ignore it.
963 */
964 if (!in_allhosts(ip->ip_dst)) {
965 goto done;
966 }
967 IGMPSTAT_INC(igps_rcv_gen_queries);
968 is_general_query = 1;
969 } else {
970 /* IGMPv2 Group-Specific Query. */
971 IGMPSTAT_INC(igps_rcv_group_queries);
972 }
973
974 igi = IGMP_IFINFO(ifp);
975 VERIFY(igi != NULL);
976
977 IGI_LOCK(igi);
978 if (igi->igi_flags & IGIF_LOOPBACK) {
979 IGMP_PRINTF(("%s: ignore v2 query on IGIF_LOOPBACK "
980 "ifp 0x%llx(%s)\n", __func__,
981 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
982 IGI_UNLOCK(igi);
983 goto done;
984 }
985 /*
986 * Ignore v2 query if in v1 Compatibility Mode.
987 */
988 if (igi->igi_version == IGMP_VERSION_1) {
989 IGI_UNLOCK(igi);
990 goto done;
991 }
992 itp.qpt = igmp_set_version(igi, IGMP_VERSION_2);
993 IGI_UNLOCK(igi);
994
995 timer = igmp->igmp_code / IGMP_TIMER_SCALE;
996 if (timer == 0) {
997 timer = 1;
998 }
999
1000 if (is_general_query) {
1001 struct in_multistep step;
1002
1003 IGMP_PRINTF(("%s: process v2 general query on ifp 0x%llx(%s)\n",
1004 __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1005 /*
1006 * For each reporting group joined on this
1007 * interface, kick the report timer.
1008 */
1009 in_multihead_lock_shared();
1010 IN_FIRST_MULTI(step, inm);
1011 while (inm != NULL) {
1012 INM_LOCK(inm);
1013 if (inm->inm_ifp == ifp) {
1014 itp.cst += igmp_v2_update_group(inm, timer);
1015 }
1016 INM_UNLOCK(inm);
1017 IN_NEXT_MULTI(step, inm);
1018 }
1019 in_multihead_lock_done();
1020 } else {
1021 /*
1022 * Group-specific IGMPv2 query, we need only
1023 * look up the single group to process it.
1024 */
1025 in_multihead_lock_shared();
1026 IN_LOOKUP_MULTI(&igmp->igmp_group, ifp, inm);
1027 in_multihead_lock_done();
1028 if (inm != NULL) {
1029 INM_LOCK(inm);
1030 IGMP_INET_PRINTF(igmp->igmp_group,
1031 ("process v2 query %s on ifp 0x%llx(%s)\n",
1032 _igmp_inet_buf,
1033 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1034 itp.cst = igmp_v2_update_group(inm, timer);
1035 INM_UNLOCK(inm);
1036 INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1037 }
1038 }
1039 done:
1040 igmp_set_timeout(&itp);
1041
1042 return 0;
1043 }
1044
1045 /*
1046 * Update the report timer on a group in response to an IGMPv2 query.
1047 *
1048 * If we are becoming the reporting member for this group, start the timer.
1049 * If we already are the reporting member for this group, and timer is
1050 * below the threshold, reset it.
1051 *
1052 * We may be updating the group for the first time since we switched
1053 * to IGMPv3. If we are, then we must clear any recorded source lists,
1054 * and transition to REPORTING state; the group timer is overloaded
1055 * for group and group-source query responses.
1056 *
1057 * Unlike IGMPv3, the delay per group should be jittered
1058 * to avoid bursts of IGMPv2 reports.
1059 */
1060 static uint32_t
igmp_v2_update_group(struct in_multi * inm,const int timer)1061 igmp_v2_update_group(struct in_multi *inm, const int timer)
1062 {
1063 IGMP_INET_PRINTF(inm->inm_addr, ("%s: %s/%s timer=%d\n",
1064 __func__, _igmp_inet_buf, if_name(inm->inm_ifp),
1065 timer));
1066
1067 INM_LOCK_ASSERT_HELD(inm);
1068
1069 switch (inm->inm_state) {
1070 case IGMP_NOT_MEMBER:
1071 case IGMP_SILENT_MEMBER:
1072 break;
1073 case IGMP_REPORTING_MEMBER:
1074 if (inm->inm_timer != 0 &&
1075 inm->inm_timer <= timer) {
1076 IGMP_PRINTF(("%s: REPORTING and timer running, "
1077 "skipping.\n", __func__));
1078 break;
1079 }
1080 OS_FALLTHROUGH;
1081 case IGMP_SG_QUERY_PENDING_MEMBER:
1082 case IGMP_G_QUERY_PENDING_MEMBER:
1083 case IGMP_IDLE_MEMBER:
1084 case IGMP_LAZY_MEMBER:
1085 case IGMP_AWAKENING_MEMBER:
1086 IGMP_PRINTF(("%s: ->REPORTING\n", __func__));
1087 inm->inm_state = IGMP_REPORTING_MEMBER;
1088 inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1089 break;
1090 case IGMP_SLEEPING_MEMBER:
1091 IGMP_PRINTF(("%s: ->AWAKENING\n", __func__));
1092 inm->inm_state = IGMP_AWAKENING_MEMBER;
1093 break;
1094 case IGMP_LEAVING_MEMBER:
1095 break;
1096 }
1097
1098 return inm->inm_timer;
1099 }
1100
1101 /*
1102 * Process a received IGMPv3 general, group-specific or
1103 * group-and-source-specific query.
1104 * Assumes m has already been pulled up to the full IGMP message length.
1105 * Return 0 if successful, otherwise an appropriate error code is returned.
1106 */
1107 static int
igmp_input_v3_query(struct ifnet * ifp,const struct ip * ip,struct igmpv3 * igmpv3)1108 igmp_input_v3_query(struct ifnet *ifp, const struct ip *ip,
1109 /*const*/ struct igmpv3 *igmpv3)
1110 {
1111 struct igmp_ifinfo *igi;
1112 struct in_multi *inm;
1113 int is_general_query;
1114 uint32_t maxresp, nsrc, qqi;
1115 uint32_t timer;
1116 uint8_t qrv;
1117 struct igmp_tparams itp = { .qpt = 0, .it = 0, .cst = 0, .sct = 0 };
1118
1119 IGMP_LOCK_ASSERT_NOTHELD();
1120
1121 is_general_query = 0;
1122
1123 IGMP_PRINTF(("%s: process v3 query on ifp 0x%llx(%s)\n", __func__,
1124 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1125
1126 maxresp = igmpv3->igmp_code; /* in 1/10ths of a second */
1127 if (maxresp >= 128) {
1128 maxresp = IGMP_MANT(igmpv3->igmp_code) <<
1129 (IGMP_EXP(igmpv3->igmp_code) + 3);
1130 }
1131
1132 /*
1133 * Robustness must never be less than 2 for on-wire IGMPv3.
1134 * FUTURE: Check if ifp has IGIF_LOOPBACK set, as we will make
1135 * an exception for interfaces whose IGMPv3 state changes
1136 * are redirected to loopback (e.g. MANET).
1137 */
1138 qrv = IGMP_QRV(igmpv3->igmp_misc);
1139 if (qrv < 2) {
1140 IGMP_PRINTF(("%s: clamping qrv %d to %d\n", __func__,
1141 qrv, IGMP_RV_INIT));
1142 qrv = IGMP_RV_INIT;
1143 }
1144
1145 qqi = igmpv3->igmp_qqi;
1146 if (qqi >= 128) {
1147 qqi = IGMP_MANT(igmpv3->igmp_qqi) <<
1148 (IGMP_EXP(igmpv3->igmp_qqi) + 3);
1149 }
1150
1151 timer = maxresp / IGMP_TIMER_SCALE;
1152 if (timer == 0) {
1153 timer = 1;
1154 }
1155
1156 nsrc = ntohs(igmpv3->igmp_numsrc);
1157
1158 /*
1159 * Validate address fields and versions upfront before
1160 * accepting v3 query.
1161 */
1162 if (in_nullhost(igmpv3->igmp_group)) {
1163 /*
1164 * IGMPv3 General Query.
1165 *
1166 * General Queries SHOULD be directed to 224.0.0.1.
1167 * A general query with a source list has undefined
1168 * behaviour; discard it.
1169 */
1170 IGMPSTAT_INC(igps_rcv_gen_queries);
1171 if (!in_allhosts(ip->ip_dst) || nsrc > 0) {
1172 IGMPSTAT_INC(igps_rcv_badqueries);
1173 OIGMPSTAT_INC(igps_rcv_badqueries);
1174 goto done;
1175 }
1176 is_general_query = 1;
1177 } else {
1178 /* Group or group-source specific query. */
1179 if (nsrc == 0) {
1180 IGMPSTAT_INC(igps_rcv_group_queries);
1181 } else {
1182 IGMPSTAT_INC(igps_rcv_gsr_queries);
1183 }
1184 }
1185
1186 igi = IGMP_IFINFO(ifp);
1187 VERIFY(igi != NULL);
1188
1189 IGI_LOCK(igi);
1190 if (igi->igi_flags & IGIF_LOOPBACK) {
1191 IGMP_PRINTF(("%s: ignore v3 query on IGIF_LOOPBACK "
1192 "ifp 0x%llx(%s)\n", __func__,
1193 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1194 IGI_UNLOCK(igi);
1195 goto done;
1196 }
1197
1198 /*
1199 * Discard the v3 query if we're in Compatibility Mode.
1200 * The RFC is not obviously worded that hosts need to stay in
1201 * compatibility mode until the Old Version Querier Present
1202 * timer expires.
1203 */
1204 if (igi->igi_version != IGMP_VERSION_3) {
1205 IGMP_PRINTF(("%s: ignore v3 query in v%d mode on "
1206 "ifp 0x%llx(%s)\n", __func__, igi->igi_version,
1207 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1208 IGI_UNLOCK(igi);
1209 goto done;
1210 }
1211
1212 itp.qpt = igmp_set_version(igi, IGMP_VERSION_3);
1213 igi->igi_rv = qrv;
1214 igi->igi_qi = qqi;
1215 igi->igi_qri = MAX(timer, IGMP_QRI_MIN);
1216
1217 IGMP_PRINTF(("%s: qrv %d qi %d qri %d\n", __func__, igi->igi_rv,
1218 igi->igi_qi, igi->igi_qri));
1219
1220 if (is_general_query) {
1221 /*
1222 * Schedule a current-state report on this ifp for
1223 * all groups, possibly containing source lists.
1224 * If there is a pending General Query response
1225 * scheduled earlier than the selected delay, do
1226 * not schedule any other reports.
1227 * Otherwise, reset the interface timer.
1228 */
1229 IGMP_PRINTF(("%s: process v3 general query on ifp 0x%llx(%s)\n",
1230 __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1231 if (igi->igi_v3_timer == 0 || igi->igi_v3_timer >= timer) {
1232 itp.it = igi->igi_v3_timer = IGMP_RANDOM_DELAY(timer);
1233 }
1234 IGI_UNLOCK(igi);
1235 } else {
1236 IGI_UNLOCK(igi);
1237 /*
1238 * Group-source-specific queries are throttled on
1239 * a per-group basis to defeat denial-of-service attempts.
1240 * Queries for groups we are not a member of on this
1241 * link are simply ignored.
1242 */
1243 in_multihead_lock_shared();
1244 IN_LOOKUP_MULTI(&igmpv3->igmp_group, ifp, inm);
1245 in_multihead_lock_done();
1246 if (inm == NULL) {
1247 goto done;
1248 }
1249
1250 INM_LOCK(inm);
1251 if (nsrc > 0) {
1252 if (!ratecheck(&inm->inm_lastgsrtv,
1253 &igmp_gsrdelay)) {
1254 IGMP_PRINTF(("%s: GS query throttled.\n",
1255 __func__));
1256 IGMPSTAT_INC(igps_drop_gsr_queries);
1257 INM_UNLOCK(inm);
1258 INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1259 goto done;
1260 }
1261 }
1262 IGMP_INET_PRINTF(igmpv3->igmp_group,
1263 ("process v3 %s query on ifp 0x%llx(%s)\n", _igmp_inet_buf,
1264 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1265 /*
1266 * If there is a pending General Query response
1267 * scheduled sooner than the selected delay, no
1268 * further report need be scheduled.
1269 * Otherwise, prepare to respond to the
1270 * group-specific or group-and-source query.
1271 */
1272 IGI_LOCK(igi);
1273 itp.it = igi->igi_v3_timer;
1274 IGI_UNLOCK(igi);
1275 if (itp.it == 0 || itp.it >= timer) {
1276 (void) igmp_input_v3_group_query(inm, timer, igmpv3);
1277 itp.cst = inm->inm_timer;
1278 }
1279 INM_UNLOCK(inm);
1280 INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1281 }
1282 done:
1283 if (itp.it > 0) {
1284 IGMP_PRINTF(("%s: v3 general query response scheduled in "
1285 "T+%d seconds on ifp 0x%llx(%s)\n", __func__, itp.it,
1286 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1287 }
1288 igmp_set_timeout(&itp);
1289
1290 return 0;
1291 }
1292
1293 /*
1294 * Process a recieved IGMPv3 group-specific or group-and-source-specific
1295 * query.
1296 * Return <0 if any error occured. Currently this is ignored.
1297 */
1298 static int
igmp_input_v3_group_query(struct in_multi * inm,int timer,struct igmpv3 * igmpv3)1299 igmp_input_v3_group_query(struct in_multi *inm,
1300 int timer, /*const*/ struct igmpv3 *igmpv3)
1301 {
1302 int retval;
1303 uint16_t nsrc;
1304
1305 INM_LOCK_ASSERT_HELD(inm);
1306
1307 retval = 0;
1308
1309 switch (inm->inm_state) {
1310 case IGMP_NOT_MEMBER:
1311 case IGMP_SILENT_MEMBER:
1312 case IGMP_SLEEPING_MEMBER:
1313 case IGMP_LAZY_MEMBER:
1314 case IGMP_AWAKENING_MEMBER:
1315 case IGMP_IDLE_MEMBER:
1316 case IGMP_LEAVING_MEMBER:
1317 return retval;
1318 case IGMP_REPORTING_MEMBER:
1319 case IGMP_G_QUERY_PENDING_MEMBER:
1320 case IGMP_SG_QUERY_PENDING_MEMBER:
1321 break;
1322 }
1323
1324 nsrc = ntohs(igmpv3->igmp_numsrc);
1325
1326 /*
1327 * Deal with group-specific queries upfront.
1328 * If any group query is already pending, purge any recorded
1329 * source-list state if it exists, and schedule a query response
1330 * for this group-specific query.
1331 */
1332 if (nsrc == 0) {
1333 if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER ||
1334 inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) {
1335 inm_clear_recorded(inm);
1336 timer = min(inm->inm_timer, timer);
1337 }
1338 inm->inm_state = IGMP_G_QUERY_PENDING_MEMBER;
1339 inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1340 return retval;
1341 }
1342
1343 /*
1344 * Deal with the case where a group-and-source-specific query has
1345 * been received but a group-specific query is already pending.
1346 */
1347 if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER) {
1348 timer = min(inm->inm_timer, timer);
1349 inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1350 return retval;
1351 }
1352
1353 /*
1354 * Finally, deal with the case where a group-and-source-specific
1355 * query has been received, where a response to a previous g-s-r
1356 * query exists, or none exists.
1357 * In this case, we need to parse the source-list which the Querier
1358 * has provided us with and check if we have any source list filter
1359 * entries at T1 for these sources. If we do not, there is no need
1360 * schedule a report and the query may be dropped.
1361 * If we do, we must record them and schedule a current-state
1362 * report for those sources.
1363 * FIXME: Handling source lists larger than 1 mbuf requires that
1364 * we pass the mbuf chain pointer down to this function, and use
1365 * m_getptr() to walk the chain.
1366 */
1367 if (inm->inm_nsrc > 0) {
1368 const struct in_addr *ap;
1369 int i, nrecorded;
1370
1371 ap = (const struct in_addr *)(igmpv3 + 1);
1372 nrecorded = 0;
1373 for (i = 0; i < nsrc; i++, ap++) {
1374 retval = inm_record_source(inm, ap->s_addr);
1375 if (retval < 0) {
1376 break;
1377 }
1378 nrecorded += retval;
1379 }
1380 if (nrecorded > 0) {
1381 IGMP_PRINTF(("%s: schedule response to SG query\n",
1382 __func__));
1383 inm->inm_state = IGMP_SG_QUERY_PENDING_MEMBER;
1384 inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1385 }
1386 }
1387
1388 return retval;
1389 }
1390
1391 /*
1392 * Process a received IGMPv1 host membership report.
1393 *
1394 * NOTE: 0.0.0.0 workaround breaks const correctness.
1395 */
1396 static int
igmp_input_v1_report(struct ifnet * ifp,struct mbuf * m,struct ip * ip,struct igmp * igmp)1397 igmp_input_v1_report(struct ifnet *ifp, struct mbuf *m, /*const*/ struct ip *ip,
1398 /*const*/ struct igmp *igmp)
1399 {
1400 struct in_ifaddr *ia;
1401 struct in_multi *inm;
1402
1403 IGMPSTAT_INC(igps_rcv_reports);
1404 OIGMPSTAT_INC(igps_rcv_reports);
1405
1406 if ((ifp->if_flags & IFF_LOOPBACK) ||
1407 (m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
1408 return 0;
1409 }
1410
1411 if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr) ||
1412 !in_hosteq(igmp->igmp_group, ip->ip_dst))) {
1413 IGMPSTAT_INC(igps_rcv_badreports);
1414 OIGMPSTAT_INC(igps_rcv_badreports);
1415 return EINVAL;
1416 }
1417
1418 /*
1419 * RFC 3376, Section 4.2.13, 9.2, 9.3:
1420 * Booting clients may use the source address 0.0.0.0. Some
1421 * IGMP daemons may not know how to use IP_RECVIF to determine
1422 * the interface upon which this message was received.
1423 * Replace 0.0.0.0 with the subnet address if told to do so.
1424 */
1425 if (igmp_recvifkludge && in_nullhost(ip->ip_src)) {
1426 IFP_TO_IA(ifp, ia);
1427 if (ia != NULL) {
1428 IFA_LOCK(&ia->ia_ifa);
1429 ip->ip_src.s_addr = htonl(ia->ia_subnet);
1430 IFA_UNLOCK(&ia->ia_ifa);
1431 IFA_REMREF(&ia->ia_ifa);
1432 }
1433 }
1434
1435 IGMP_INET_PRINTF(igmp->igmp_group,
1436 ("process v1 report %s on ifp 0x%llx(%s)\n", _igmp_inet_buf,
1437 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1438
1439 /*
1440 * IGMPv1 report suppression.
1441 * If we are a member of this group, and our membership should be
1442 * reported, stop our group timer and transition to the 'lazy' state.
1443 */
1444 in_multihead_lock_shared();
1445 IN_LOOKUP_MULTI(&igmp->igmp_group, ifp, inm);
1446 in_multihead_lock_done();
1447 if (inm != NULL) {
1448 struct igmp_ifinfo *igi;
1449
1450 INM_LOCK(inm);
1451
1452 igi = inm->inm_igi;
1453 VERIFY(igi != NULL);
1454
1455 IGMPSTAT_INC(igps_rcv_ourreports);
1456 OIGMPSTAT_INC(igps_rcv_ourreports);
1457
1458 /*
1459 * If we are in IGMPv3 host mode, do not allow the
1460 * other host's IGMPv1 report to suppress our reports
1461 * unless explicitly configured to do so.
1462 */
1463 IGI_LOCK(igi);
1464 if (igi->igi_version == IGMP_VERSION_3) {
1465 if (igmp_legacysupp) {
1466 igmp_v3_suppress_group_record(inm);
1467 }
1468 IGI_UNLOCK(igi);
1469 INM_UNLOCK(inm);
1470 INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1471 return 0;
1472 }
1473
1474 INM_LOCK_ASSERT_HELD(inm);
1475 inm->inm_timer = 0;
1476
1477 switch (inm->inm_state) {
1478 case IGMP_NOT_MEMBER:
1479 case IGMP_SILENT_MEMBER:
1480 break;
1481 case IGMP_IDLE_MEMBER:
1482 case IGMP_LAZY_MEMBER:
1483 case IGMP_AWAKENING_MEMBER:
1484 IGMP_INET_PRINTF(igmp->igmp_group,
1485 ("report suppressed for %s on ifp 0x%llx(%s)\n",
1486 _igmp_inet_buf,
1487 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1488 OS_FALLTHROUGH;
1489 case IGMP_SLEEPING_MEMBER:
1490 inm->inm_state = IGMP_SLEEPING_MEMBER;
1491 break;
1492 case IGMP_REPORTING_MEMBER:
1493 IGMP_INET_PRINTF(igmp->igmp_group,
1494 ("report suppressed for %s on ifp 0x%llx(%s)\n",
1495 _igmp_inet_buf,
1496 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1497 if (igi->igi_version == IGMP_VERSION_1) {
1498 inm->inm_state = IGMP_LAZY_MEMBER;
1499 } else if (igi->igi_version == IGMP_VERSION_2) {
1500 inm->inm_state = IGMP_SLEEPING_MEMBER;
1501 }
1502 break;
1503 case IGMP_G_QUERY_PENDING_MEMBER:
1504 case IGMP_SG_QUERY_PENDING_MEMBER:
1505 case IGMP_LEAVING_MEMBER:
1506 break;
1507 }
1508 IGI_UNLOCK(igi);
1509 INM_UNLOCK(inm);
1510 INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1511 }
1512
1513 return 0;
1514 }
1515
1516 /*
1517 * Process a received IGMPv2 host membership report.
1518 *
1519 * NOTE: 0.0.0.0 workaround breaks const correctness.
1520 */
1521 static int
igmp_input_v2_report(struct ifnet * ifp,struct mbuf * m,struct ip * ip,struct igmp * igmp)1522 igmp_input_v2_report(struct ifnet *ifp, struct mbuf *m, /*const*/ struct ip *ip,
1523 /*const*/ struct igmp *igmp)
1524 {
1525 struct in_ifaddr *ia;
1526 struct in_multi *inm;
1527
1528 /*
1529 * Make sure we don't hear our own membership report. Fast
1530 * leave requires knowing that we are the only member of a
1531 * group.
1532 */
1533 IFP_TO_IA(ifp, ia);
1534 if (ia != NULL) {
1535 IFA_LOCK(&ia->ia_ifa);
1536 if (in_hosteq(ip->ip_src, IA_SIN(ia)->sin_addr)) {
1537 IFA_UNLOCK(&ia->ia_ifa);
1538 IFA_REMREF(&ia->ia_ifa);
1539 return 0;
1540 }
1541 IFA_UNLOCK(&ia->ia_ifa);
1542 }
1543
1544 IGMPSTAT_INC(igps_rcv_reports);
1545 OIGMPSTAT_INC(igps_rcv_reports);
1546
1547 if ((ifp->if_flags & IFF_LOOPBACK) ||
1548 (m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
1549 if (ia != NULL) {
1550 IFA_REMREF(&ia->ia_ifa);
1551 }
1552 return 0;
1553 }
1554
1555 if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr)) ||
1556 !in_hosteq(igmp->igmp_group, ip->ip_dst)) {
1557 if (ia != NULL) {
1558 IFA_REMREF(&ia->ia_ifa);
1559 }
1560 IGMPSTAT_INC(igps_rcv_badreports);
1561 OIGMPSTAT_INC(igps_rcv_badreports);
1562 return EINVAL;
1563 }
1564
1565 /*
1566 * RFC 3376, Section 4.2.13, 9.2, 9.3:
1567 * Booting clients may use the source address 0.0.0.0. Some
1568 * IGMP daemons may not know how to use IP_RECVIF to determine
1569 * the interface upon which this message was received.
1570 * Replace 0.0.0.0 with the subnet address if told to do so.
1571 */
1572 if (igmp_recvifkludge && in_nullhost(ip->ip_src)) {
1573 if (ia != NULL) {
1574 IFA_LOCK(&ia->ia_ifa);
1575 ip->ip_src.s_addr = htonl(ia->ia_subnet);
1576 IFA_UNLOCK(&ia->ia_ifa);
1577 }
1578 }
1579 if (ia != NULL) {
1580 IFA_REMREF(&ia->ia_ifa);
1581 }
1582
1583 IGMP_INET_PRINTF(igmp->igmp_group,
1584 ("process v2 report %s on ifp 0x%llx(%s)\n", _igmp_inet_buf,
1585 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1586
1587 /*
1588 * IGMPv2 report suppression.
1589 * If we are a member of this group, and our membership should be
1590 * reported, and our group timer is pending or about to be reset,
1591 * stop our group timer by transitioning to the 'lazy' state.
1592 */
1593 in_multihead_lock_shared();
1594 IN_LOOKUP_MULTI(&igmp->igmp_group, ifp, inm);
1595 in_multihead_lock_done();
1596 if (inm != NULL) {
1597 struct igmp_ifinfo *igi;
1598
1599 INM_LOCK(inm);
1600 igi = inm->inm_igi;
1601 VERIFY(igi != NULL);
1602
1603 IGMPSTAT_INC(igps_rcv_ourreports);
1604 OIGMPSTAT_INC(igps_rcv_ourreports);
1605
1606 /*
1607 * If we are in IGMPv3 host mode, do not allow the
1608 * other host's IGMPv1 report to suppress our reports
1609 * unless explicitly configured to do so.
1610 */
1611 IGI_LOCK(igi);
1612 if (igi->igi_version == IGMP_VERSION_3) {
1613 if (igmp_legacysupp) {
1614 igmp_v3_suppress_group_record(inm);
1615 }
1616 IGI_UNLOCK(igi);
1617 INM_UNLOCK(inm);
1618 INM_REMREF(inm);
1619 return 0;
1620 }
1621
1622 inm->inm_timer = 0;
1623
1624 switch (inm->inm_state) {
1625 case IGMP_NOT_MEMBER:
1626 case IGMP_SILENT_MEMBER:
1627 case IGMP_SLEEPING_MEMBER:
1628 break;
1629 case IGMP_REPORTING_MEMBER:
1630 case IGMP_IDLE_MEMBER:
1631 case IGMP_AWAKENING_MEMBER:
1632 IGMP_INET_PRINTF(igmp->igmp_group,
1633 ("report suppressed for %s on ifp 0x%llx(%s)\n",
1634 _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(ifp),
1635 if_name(ifp)));
1636 OS_FALLTHROUGH;
1637 case IGMP_LAZY_MEMBER:
1638 inm->inm_state = IGMP_LAZY_MEMBER;
1639 break;
1640 case IGMP_G_QUERY_PENDING_MEMBER:
1641 case IGMP_SG_QUERY_PENDING_MEMBER:
1642 case IGMP_LEAVING_MEMBER:
1643 break;
1644 }
1645 IGI_UNLOCK(igi);
1646 INM_UNLOCK(inm);
1647 INM_REMREF(inm);
1648 }
1649
1650 return 0;
1651 }
1652
1653 void
igmp_input(struct mbuf * m,int off)1654 igmp_input(struct mbuf *m, int off)
1655 {
1656 int iphlen;
1657 struct ifnet *ifp;
1658 struct igmp *igmp;
1659 struct ip *ip;
1660 int igmplen;
1661 int minlen;
1662 int queryver;
1663
1664 IGMP_PRINTF(("%s: called w/mbuf (0x%llx,%d)\n", __func__,
1665 (uint64_t)VM_KERNEL_ADDRPERM(m), off));
1666
1667 ifp = m->m_pkthdr.rcvif;
1668
1669 IGMPSTAT_INC(igps_rcv_total);
1670 OIGMPSTAT_INC(igps_rcv_total);
1671
1672 /* Expect 32-bit aligned data pointer on strict-align platforms */
1673 MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
1674
1675 ip = mtod(m, struct ip *);
1676 iphlen = off;
1677
1678 /* By now, ip_len no longer contains the length of IP header */
1679 igmplen = ip->ip_len;
1680
1681 /*
1682 * Validate lengths.
1683 */
1684 if (igmplen < IGMP_MINLEN) {
1685 IGMPSTAT_INC(igps_rcv_tooshort);
1686 OIGMPSTAT_INC(igps_rcv_tooshort);
1687 m_freem(m);
1688 return;
1689 }
1690
1691 /*
1692 * Always pullup to the minimum size for v1/v2 or v3
1693 * to amortize calls to m_pulldown().
1694 */
1695 if (igmplen >= IGMP_V3_QUERY_MINLEN) {
1696 minlen = IGMP_V3_QUERY_MINLEN;
1697 } else {
1698 minlen = IGMP_MINLEN;
1699 }
1700
1701 /* A bit more expensive than M_STRUCT_GET, but ensures alignment */
1702 M_STRUCT_GET0(igmp, struct igmp *, m, off, minlen);
1703 if (igmp == NULL) {
1704 IGMPSTAT_INC(igps_rcv_tooshort);
1705 OIGMPSTAT_INC(igps_rcv_tooshort);
1706 return;
1707 }
1708 /* N.B.: we assume the packet was correctly aligned in ip_input. */
1709
1710 /*
1711 * Validate checksum.
1712 */
1713 m->m_data += iphlen;
1714 m->m_len -= iphlen;
1715 if (in_cksum(m, igmplen)) {
1716 IGMPSTAT_INC(igps_rcv_badsum);
1717 OIGMPSTAT_INC(igps_rcv_badsum);
1718 m_freem(m);
1719 return;
1720 }
1721 m->m_data -= iphlen;
1722 m->m_len += iphlen;
1723
1724 /*
1725 * IGMP control traffic is link-scope, and must have a TTL of 1.
1726 * DVMRP traffic (e.g. mrinfo, mtrace) is an exception;
1727 * probe packets may come from beyond the LAN.
1728 */
1729 if (igmp->igmp_type != IGMP_DVMRP && ip->ip_ttl != 1) {
1730 IGMPSTAT_INC(igps_rcv_badttl);
1731 m_freem(m);
1732 return;
1733 }
1734
1735 switch (igmp->igmp_type) {
1736 case IGMP_HOST_MEMBERSHIP_QUERY:
1737 if (igmplen == IGMP_MINLEN) {
1738 if (igmp->igmp_code == 0) {
1739 queryver = IGMP_VERSION_1;
1740 } else {
1741 queryver = IGMP_VERSION_2;
1742 }
1743 } else if (igmplen >= IGMP_V3_QUERY_MINLEN) {
1744 queryver = IGMP_VERSION_3;
1745 } else {
1746 IGMPSTAT_INC(igps_rcv_tooshort);
1747 OIGMPSTAT_INC(igps_rcv_tooshort);
1748 m_freem(m);
1749 return;
1750 }
1751
1752 OIGMPSTAT_INC(igps_rcv_queries);
1753
1754 switch (queryver) {
1755 case IGMP_VERSION_1:
1756 IGMPSTAT_INC(igps_rcv_v1v2_queries);
1757 if (!igmp_v1enable) {
1758 break;
1759 }
1760 if (igmp_input_v1_query(ifp, ip, igmp) != 0) {
1761 m_freem(m);
1762 return;
1763 }
1764 break;
1765
1766 case IGMP_VERSION_2:
1767 IGMPSTAT_INC(igps_rcv_v1v2_queries);
1768 if (!igmp_v2enable) {
1769 break;
1770 }
1771 if (igmp_input_v2_query(ifp, ip, igmp) != 0) {
1772 m_freem(m);
1773 return;
1774 }
1775 break;
1776
1777 case IGMP_VERSION_3: {
1778 struct igmpv3 *igmpv3;
1779 uint16_t igmpv3len;
1780 uint16_t srclen;
1781 int nsrc;
1782
1783 IGMPSTAT_INC(igps_rcv_v3_queries);
1784 igmpv3 = (struct igmpv3 *)igmp;
1785 /*
1786 * Validate length based on source count.
1787 */
1788 nsrc = ntohs(igmpv3->igmp_numsrc);
1789 /*
1790 * The max vaue of nsrc is limited by the
1791 * MTU of the network on which the datagram
1792 * is received
1793 */
1794 if (nsrc < 0 || nsrc > IGMP_V3_QUERY_MAX_SRCS) {
1795 IGMPSTAT_INC(igps_rcv_tooshort);
1796 OIGMPSTAT_INC(igps_rcv_tooshort);
1797 m_freem(m);
1798 return;
1799 }
1800 srclen = sizeof(struct in_addr) * (uint16_t)nsrc;
1801 if (igmplen < (IGMP_V3_QUERY_MINLEN + srclen)) {
1802 IGMPSTAT_INC(igps_rcv_tooshort);
1803 OIGMPSTAT_INC(igps_rcv_tooshort);
1804 m_freem(m);
1805 return;
1806 }
1807 igmpv3len = IGMP_V3_QUERY_MINLEN + srclen;
1808 /*
1809 * A bit more expensive than M_STRUCT_GET,
1810 * but ensures alignment.
1811 */
1812 M_STRUCT_GET0(igmpv3, struct igmpv3 *, m,
1813 off, igmpv3len);
1814 if (igmpv3 == NULL) {
1815 IGMPSTAT_INC(igps_rcv_tooshort);
1816 OIGMPSTAT_INC(igps_rcv_tooshort);
1817 return;
1818 }
1819 /*
1820 * N.B.: we assume the packet was correctly
1821 * aligned in ip_input.
1822 */
1823 if (igmp_input_v3_query(ifp, ip, igmpv3) != 0) {
1824 m_freem(m);
1825 return;
1826 }
1827 }
1828 break;
1829 }
1830 break;
1831
1832 case IGMP_v1_HOST_MEMBERSHIP_REPORT:
1833 if (!igmp_v1enable) {
1834 break;
1835 }
1836 if (igmp_input_v1_report(ifp, m, ip, igmp) != 0) {
1837 m_freem(m);
1838 return;
1839 }
1840 break;
1841
1842 case IGMP_v2_HOST_MEMBERSHIP_REPORT:
1843 if (!igmp_v2enable) {
1844 break;
1845 }
1846 if (!ip_checkrouteralert(m)) {
1847 IGMPSTAT_INC(igps_rcv_nora);
1848 }
1849 if (igmp_input_v2_report(ifp, m, ip, igmp) != 0) {
1850 m_freem(m);
1851 return;
1852 }
1853 break;
1854
1855 case IGMP_v3_HOST_MEMBERSHIP_REPORT:
1856 /*
1857 * Hosts do not need to process IGMPv3 membership reports,
1858 * as report suppression is no longer required.
1859 */
1860 if (!ip_checkrouteralert(m)) {
1861 IGMPSTAT_INC(igps_rcv_nora);
1862 }
1863 break;
1864
1865 default:
1866 break;
1867 }
1868
1869 IGMP_LOCK_ASSERT_NOTHELD();
1870 /*
1871 * Pass all valid IGMP packets up to any process(es) listening on a
1872 * raw IGMP socket.
1873 */
1874 rip_input(m, off);
1875 }
1876
1877 /*
1878 * Schedule IGMP timer based on various parameters; caller must ensure that
1879 * lock ordering is maintained as this routine acquires IGMP global lock.
1880 */
1881 void
igmp_set_timeout(struct igmp_tparams * itp)1882 igmp_set_timeout(struct igmp_tparams *itp)
1883 {
1884 IGMP_LOCK_ASSERT_NOTHELD();
1885 VERIFY(itp != NULL);
1886
1887 if (itp->qpt != 0 || itp->it != 0 || itp->cst != 0 || itp->sct != 0) {
1888 IGMP_LOCK();
1889 if (itp->qpt != 0) {
1890 querier_present_timers_running = 1;
1891 }
1892 if (itp->it != 0) {
1893 interface_timers_running = 1;
1894 }
1895 if (itp->cst != 0) {
1896 current_state_timers_running = 1;
1897 }
1898 if (itp->sct != 0) {
1899 state_change_timers_running = 1;
1900 }
1901 igmp_sched_timeout(itp->fast);
1902 IGMP_UNLOCK();
1903 }
1904 }
1905
1906 void
igmp_set_fast_timeout(struct igmp_tparams * itp)1907 igmp_set_fast_timeout(struct igmp_tparams *itp)
1908 {
1909 VERIFY(itp != NULL);
1910 itp->fast = true;
1911 igmp_set_timeout(itp);
1912 }
1913
1914 /*
1915 * IGMP timer handler (per 1 second).
1916 */
1917 static void
igmp_timeout(void * arg)1918 igmp_timeout(void *arg)
1919 {
1920 struct ifqueue scq; /* State-change packets */
1921 struct ifqueue qrq; /* Query response packets */
1922 struct ifnet *ifp;
1923 struct igmp_ifinfo *igi;
1924 struct in_multi *inm;
1925 unsigned int loop = 0, uri_sec = 0;
1926 SLIST_HEAD(, in_multi) inm_dthead;
1927 bool fast = arg != NULL;
1928
1929 SLIST_INIT(&inm_dthead);
1930
1931 /*
1932 * Update coarse-grained networking timestamp (in sec.); the idea
1933 * is to piggy-back on the timeout callout to update the counter
1934 * returnable via net_uptime().
1935 */
1936 net_update_uptime();
1937
1938 IGMP_LOCK();
1939
1940 IGMP_PRINTF(("%s: qpt %d, it %d, cst %d, sct %d, fast %d\n", __func__,
1941 querier_present_timers_running, interface_timers_running,
1942 current_state_timers_running, state_change_timers_running,
1943 fast));
1944
1945 if (fast) {
1946 /*
1947 * When running the fast timer, skip processing
1948 * of "querier present" timers since they are
1949 * based on 1-second intervals.
1950 */
1951 goto skip_query_timers;
1952 }
1953 /*
1954 * IGMPv1/v2 querier present timer processing.
1955 */
1956 if (querier_present_timers_running) {
1957 querier_present_timers_running = 0;
1958 LIST_FOREACH(igi, &igi_head, igi_link) {
1959 IGI_LOCK(igi);
1960 igmp_v1v2_process_querier_timers(igi);
1961 if (igi->igi_v1_timer > 0 || igi->igi_v2_timer > 0) {
1962 querier_present_timers_running = 1;
1963 }
1964 IGI_UNLOCK(igi);
1965 }
1966 }
1967
1968 /*
1969 * IGMPv3 General Query response timer processing.
1970 */
1971 if (interface_timers_running) {
1972 IGMP_PRINTF(("%s: interface timers running\n", __func__));
1973 interface_timers_running = 0;
1974 LIST_FOREACH(igi, &igi_head, igi_link) {
1975 IGI_LOCK(igi);
1976 if (igi->igi_version != IGMP_VERSION_3) {
1977 IGI_UNLOCK(igi);
1978 continue;
1979 }
1980 if (igi->igi_v3_timer == 0) {
1981 /* Do nothing. */
1982 } else if (--igi->igi_v3_timer == 0) {
1983 if (igmp_v3_dispatch_general_query(igi) > 0) {
1984 interface_timers_running = 1;
1985 }
1986 } else {
1987 interface_timers_running = 1;
1988 }
1989 IGI_UNLOCK(igi);
1990 }
1991 }
1992
1993 skip_query_timers:
1994 if (!current_state_timers_running &&
1995 !state_change_timers_running) {
1996 goto out_locked;
1997 }
1998
1999 current_state_timers_running = 0;
2000 state_change_timers_running = 0;
2001
2002 memset(&qrq, 0, sizeof(struct ifqueue));
2003 qrq.ifq_maxlen = IGMP_MAX_G_GS_PACKETS;
2004
2005 memset(&scq, 0, sizeof(struct ifqueue));
2006 scq.ifq_maxlen = IGMP_MAX_STATE_CHANGE_PACKETS;
2007
2008 IGMP_PRINTF(("%s: state change timers running\n", __func__));
2009
2010 /*
2011 * IGMPv1/v2/v3 host report and state-change timer processing.
2012 * Note: Processing a v3 group timer may remove a node.
2013 */
2014 LIST_FOREACH(igi, &igi_head, igi_link) {
2015 struct in_multistep step;
2016
2017 IGI_LOCK(igi);
2018 ifp = igi->igi_ifp;
2019 loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
2020 uri_sec = IGMP_RANDOM_DELAY(igi->igi_uri);
2021 IGI_UNLOCK(igi);
2022
2023 in_multihead_lock_shared();
2024 IN_FIRST_MULTI(step, inm);
2025 while (inm != NULL) {
2026 INM_LOCK(inm);
2027 if (inm->inm_ifp != ifp) {
2028 goto next;
2029 }
2030
2031 IGI_LOCK(igi);
2032 switch (igi->igi_version) {
2033 case IGMP_VERSION_1:
2034 case IGMP_VERSION_2:
2035 igmp_v1v2_process_group_timer(inm,
2036 igi->igi_version);
2037 break;
2038 case IGMP_VERSION_3:
2039 igmp_v3_process_group_timers(igi, &qrq,
2040 &scq, inm, uri_sec);
2041 break;
2042 }
2043 IGI_UNLOCK(igi);
2044 next:
2045 INM_UNLOCK(inm);
2046 IN_NEXT_MULTI(step, inm);
2047 }
2048 in_multihead_lock_done();
2049
2050 IGI_LOCK(igi);
2051 if (igi->igi_version == IGMP_VERSION_1 ||
2052 igi->igi_version == IGMP_VERSION_2) {
2053 igmp_dispatch_queue(igi, &igi->igi_v2q, 0, loop);
2054 } else if (igi->igi_version == IGMP_VERSION_3) {
2055 IGI_UNLOCK(igi);
2056 igmp_dispatch_queue(NULL, &qrq, 0, loop);
2057 igmp_dispatch_queue(NULL, &scq, 0, loop);
2058 VERIFY(qrq.ifq_len == 0);
2059 VERIFY(scq.ifq_len == 0);
2060 IGI_LOCK(igi);
2061 }
2062 /*
2063 * In case there are still any pending membership reports
2064 * which didn't get drained at version change time.
2065 */
2066 IF_DRAIN(&igi->igi_v2q);
2067 /*
2068 * Release all deferred inm records, and drain any locally
2069 * enqueued packets; do it even if the current IGMP version
2070 * for the link is no longer IGMPv3, in order to handle the
2071 * version change case.
2072 */
2073 igmp_flush_relq(igi, (struct igmp_inm_relhead *)&inm_dthead);
2074 VERIFY(SLIST_EMPTY(&igi->igi_relinmhead));
2075 IGI_UNLOCK(igi);
2076
2077 IF_DRAIN(&qrq);
2078 IF_DRAIN(&scq);
2079 }
2080
2081 out_locked:
2082 /* re-arm the timer if there's work to do */
2083 igmp_timeout_run = 0;
2084 igmp_sched_timeout(false);
2085 IGMP_UNLOCK();
2086
2087 /* Now that we're dropped all locks, release detached records */
2088 IGMP_REMOVE_DETACHED_INM(&inm_dthead);
2089 }
2090
2091 static void
igmp_sched_timeout(bool fast)2092 igmp_sched_timeout(bool fast)
2093 {
2094 IGMP_LOCK_ASSERT_HELD();
2095
2096 if (!igmp_timeout_run &&
2097 (querier_present_timers_running || current_state_timers_running ||
2098 interface_timers_running || state_change_timers_running)) {
2099 igmp_timeout_run = 1;
2100 int sched_hz = fast ? 0 : hz;
2101 void *arg = fast ? (void *)igmp_sched_timeout : NULL;
2102 timeout(igmp_timeout, arg, sched_hz);
2103 }
2104 }
2105
2106 /*
2107 * Free the in_multi reference(s) for this IGMP lifecycle.
2108 *
2109 * Caller must be holding igi_lock.
2110 */
2111 static void
igmp_flush_relq(struct igmp_ifinfo * igi,struct igmp_inm_relhead * inm_dthead)2112 igmp_flush_relq(struct igmp_ifinfo *igi, struct igmp_inm_relhead *inm_dthead)
2113 {
2114 struct in_multi *inm;
2115
2116 again:
2117 IGI_LOCK_ASSERT_HELD(igi);
2118 inm = SLIST_FIRST(&igi->igi_relinmhead);
2119 if (inm != NULL) {
2120 int lastref;
2121
2122 SLIST_REMOVE_HEAD(&igi->igi_relinmhead, inm_nrele);
2123 IGI_UNLOCK(igi);
2124
2125 in_multihead_lock_exclusive();
2126 INM_LOCK(inm);
2127 VERIFY(inm->inm_nrelecnt != 0);
2128 inm->inm_nrelecnt--;
2129 lastref = in_multi_detach(inm);
2130 VERIFY(!lastref || (!(inm->inm_debug & IFD_ATTACHED) &&
2131 inm->inm_reqcnt == 0));
2132 INM_UNLOCK(inm);
2133 in_multihead_lock_done();
2134 /* from igi_relinmhead */
2135 INM_REMREF(inm);
2136 /* from in_multihead list */
2137 if (lastref) {
2138 /*
2139 * Defer releasing our final reference, as we
2140 * are holding the IGMP lock at this point, and
2141 * we could end up with locking issues later on
2142 * (while issuing SIOCDELMULTI) when this is the
2143 * final reference count. Let the caller do it
2144 * when it is safe.
2145 */
2146 IGMP_ADD_DETACHED_INM(inm_dthead, inm);
2147 }
2148 IGI_LOCK(igi);
2149 goto again;
2150 }
2151 }
2152
2153 /*
2154 * Update host report group timer for IGMPv1/v2.
2155 * Will update the global pending timer flags.
2156 */
2157 static void
igmp_v1v2_process_group_timer(struct in_multi * inm,const int igmp_version)2158 igmp_v1v2_process_group_timer(struct in_multi *inm, const int igmp_version)
2159 {
2160 int report_timer_expired;
2161
2162 IGMP_LOCK_ASSERT_HELD();
2163 INM_LOCK_ASSERT_HELD(inm);
2164 IGI_LOCK_ASSERT_HELD(inm->inm_igi);
2165
2166 if (inm->inm_timer == 0) {
2167 report_timer_expired = 0;
2168 } else if (--inm->inm_timer == 0) {
2169 report_timer_expired = 1;
2170 } else {
2171 current_state_timers_running = 1;
2172 /* caller will schedule timer */
2173 return;
2174 }
2175
2176 switch (inm->inm_state) {
2177 case IGMP_NOT_MEMBER:
2178 case IGMP_SILENT_MEMBER:
2179 case IGMP_IDLE_MEMBER:
2180 case IGMP_LAZY_MEMBER:
2181 case IGMP_SLEEPING_MEMBER:
2182 case IGMP_AWAKENING_MEMBER:
2183 break;
2184 case IGMP_REPORTING_MEMBER:
2185 if (report_timer_expired) {
2186 inm->inm_state = IGMP_IDLE_MEMBER;
2187 (void) igmp_v1v2_queue_report(inm,
2188 (igmp_version == IGMP_VERSION_2) ?
2189 IGMP_v2_HOST_MEMBERSHIP_REPORT :
2190 IGMP_v1_HOST_MEMBERSHIP_REPORT);
2191 INM_LOCK_ASSERT_HELD(inm);
2192 IGI_LOCK_ASSERT_HELD(inm->inm_igi);
2193 }
2194 break;
2195 case IGMP_G_QUERY_PENDING_MEMBER:
2196 case IGMP_SG_QUERY_PENDING_MEMBER:
2197 case IGMP_LEAVING_MEMBER:
2198 break;
2199 }
2200 }
2201
2202 /*
2203 * Update a group's timers for IGMPv3.
2204 * Will update the global pending timer flags.
2205 * Note: Unlocked read from igi.
2206 */
2207 static void
igmp_v3_process_group_timers(struct igmp_ifinfo * igi,struct ifqueue * qrq,struct ifqueue * scq,struct in_multi * inm,const unsigned int uri_sec)2208 igmp_v3_process_group_timers(struct igmp_ifinfo *igi,
2209 struct ifqueue *qrq, struct ifqueue *scq,
2210 struct in_multi *inm, const unsigned int uri_sec)
2211 {
2212 int query_response_timer_expired;
2213 int state_change_retransmit_timer_expired;
2214
2215 IGMP_LOCK_ASSERT_HELD();
2216 INM_LOCK_ASSERT_HELD(inm);
2217 IGI_LOCK_ASSERT_HELD(igi);
2218 VERIFY(igi == inm->inm_igi);
2219
2220 query_response_timer_expired = 0;
2221 state_change_retransmit_timer_expired = 0;
2222
2223 /*
2224 * During a transition from v1/v2 compatibility mode back to v3,
2225 * a group record in REPORTING state may still have its group
2226 * timer active. This is a no-op in this function; it is easier
2227 * to deal with it here than to complicate the timeout path.
2228 */
2229 if (inm->inm_timer == 0) {
2230 query_response_timer_expired = 0;
2231 } else if (--inm->inm_timer == 0) {
2232 query_response_timer_expired = 1;
2233 } else {
2234 current_state_timers_running = 1;
2235 /* caller will schedule timer */
2236 }
2237
2238 if (inm->inm_sctimer == 0) {
2239 state_change_retransmit_timer_expired = 0;
2240 } else if (--inm->inm_sctimer == 0) {
2241 state_change_retransmit_timer_expired = 1;
2242 } else {
2243 state_change_timers_running = 1;
2244 /* caller will schedule timer */
2245 }
2246
2247 /* We are in timer callback, so be quick about it. */
2248 if (!state_change_retransmit_timer_expired &&
2249 !query_response_timer_expired) {
2250 return;
2251 }
2252
2253 switch (inm->inm_state) {
2254 case IGMP_NOT_MEMBER:
2255 case IGMP_SILENT_MEMBER:
2256 case IGMP_SLEEPING_MEMBER:
2257 case IGMP_LAZY_MEMBER:
2258 case IGMP_AWAKENING_MEMBER:
2259 case IGMP_IDLE_MEMBER:
2260 break;
2261 case IGMP_G_QUERY_PENDING_MEMBER:
2262 case IGMP_SG_QUERY_PENDING_MEMBER:
2263 /*
2264 * Respond to a previously pending Group-Specific
2265 * or Group-and-Source-Specific query by enqueueing
2266 * the appropriate Current-State report for
2267 * immediate transmission.
2268 */
2269 if (query_response_timer_expired) {
2270 int retval;
2271
2272 retval = igmp_v3_enqueue_group_record(qrq, inm, 0, 1,
2273 (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER));
2274 IGMP_PRINTF(("%s: enqueue record = %d\n",
2275 __func__, retval));
2276 inm->inm_state = IGMP_REPORTING_MEMBER;
2277 /* XXX Clear recorded sources for next time. */
2278 inm_clear_recorded(inm);
2279 }
2280 OS_FALLTHROUGH;
2281 case IGMP_REPORTING_MEMBER:
2282 case IGMP_LEAVING_MEMBER:
2283 if (state_change_retransmit_timer_expired) {
2284 /*
2285 * State-change retransmission timer fired.
2286 * If there are any further pending retransmissions,
2287 * set the global pending state-change flag, and
2288 * reset the timer.
2289 */
2290 if (--inm->inm_scrv > 0) {
2291 inm->inm_sctimer = (uint16_t)uri_sec;
2292 state_change_timers_running = 1;
2293 /* caller will schedule timer */
2294 }
2295 /*
2296 * Retransmit the previously computed state-change
2297 * report. If there are no further pending
2298 * retransmissions, the mbuf queue will be consumed.
2299 * Update T0 state to T1 as we have now sent
2300 * a state-change.
2301 */
2302 (void) igmp_v3_merge_state_changes(inm, scq);
2303
2304 inm_commit(inm);
2305 IGMP_INET_PRINTF(inm->inm_addr,
2306 ("%s: T1 -> T0 for %s/%s\n", __func__,
2307 _igmp_inet_buf, if_name(inm->inm_ifp)));
2308
2309 /*
2310 * If we are leaving the group for good, make sure
2311 * we release IGMP's reference to it.
2312 * This release must be deferred using a SLIST,
2313 * as we are called from a loop which traverses
2314 * the in_multihead list.
2315 */
2316 if (inm->inm_state == IGMP_LEAVING_MEMBER &&
2317 inm->inm_scrv == 0) {
2318 inm->inm_state = IGMP_NOT_MEMBER;
2319 /*
2320 * A reference has already been held in
2321 * igmp_final_leave() for this inm, so
2322 * no need to hold another one. We also
2323 * bumped up its request count then, so
2324 * that it stays in in_multihead. Both
2325 * of them will be released when it is
2326 * dequeued later on.
2327 */
2328 VERIFY(inm->inm_nrelecnt != 0);
2329 SLIST_INSERT_HEAD(&igi->igi_relinmhead,
2330 inm, inm_nrele);
2331 }
2332 }
2333 break;
2334 }
2335 }
2336
2337 /*
2338 * Suppress a group's pending response to a group or source/group query.
2339 *
2340 * Do NOT suppress state changes. This leads to IGMPv3 inconsistency.
2341 * Do NOT update ST1/ST0 as this operation merely suppresses
2342 * the currently pending group record.
2343 * Do NOT suppress the response to a general query. It is possible but
2344 * it would require adding another state or flag.
2345 */
2346 static void
igmp_v3_suppress_group_record(struct in_multi * inm)2347 igmp_v3_suppress_group_record(struct in_multi *inm)
2348 {
2349 INM_LOCK_ASSERT_HELD(inm);
2350 IGI_LOCK_ASSERT_HELD(inm->inm_igi);
2351
2352 VERIFY(inm->inm_igi->igi_version == IGMP_VERSION_3);
2353
2354 if (inm->inm_state != IGMP_G_QUERY_PENDING_MEMBER ||
2355 inm->inm_state != IGMP_SG_QUERY_PENDING_MEMBER) {
2356 return;
2357 }
2358
2359 if (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) {
2360 inm_clear_recorded(inm);
2361 }
2362
2363 inm->inm_timer = 0;
2364 inm->inm_state = IGMP_REPORTING_MEMBER;
2365 }
2366
2367 /*
2368 * Switch to a different IGMP version on the given interface,
2369 * as per Section 7.2.1.
2370 */
2371 static uint32_t
igmp_set_version(struct igmp_ifinfo * igi,const int igmp_version)2372 igmp_set_version(struct igmp_ifinfo *igi, const int igmp_version)
2373 {
2374 int old_version_timer;
2375
2376 IGI_LOCK_ASSERT_HELD(igi);
2377
2378 IGMP_PRINTF(("%s: switching to v%d on ifp 0x%llx(%s)\n", __func__,
2379 igmp_version, (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2380 if_name(igi->igi_ifp)));
2381
2382 if (igmp_version == IGMP_VERSION_1 || igmp_version == IGMP_VERSION_2) {
2383 /*
2384 * Compute the "Older Version Querier Present" timer as per
2385 * Section 8.12, in seconds.
2386 */
2387 old_version_timer = igi->igi_rv * igi->igi_qi + igi->igi_qri;
2388
2389 if (igmp_version == IGMP_VERSION_1) {
2390 igi->igi_v1_timer = old_version_timer;
2391 igi->igi_v2_timer = 0;
2392 } else if (igmp_version == IGMP_VERSION_2) {
2393 igi->igi_v1_timer = 0;
2394 igi->igi_v2_timer = old_version_timer;
2395 }
2396 }
2397
2398 if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) {
2399 if (igi->igi_version != IGMP_VERSION_2) {
2400 igmp_v3_cancel_link_timers(igi);
2401 igi->igi_version = IGMP_VERSION_2;
2402 }
2403 } else if (igi->igi_v1_timer > 0) {
2404 if (igi->igi_version != IGMP_VERSION_1) {
2405 igmp_v3_cancel_link_timers(igi);
2406 igi->igi_version = IGMP_VERSION_1;
2407 }
2408 }
2409
2410 IGI_LOCK_ASSERT_HELD(igi);
2411
2412 return MAX(igi->igi_v1_timer, igi->igi_v2_timer);
2413 }
2414
2415 /*
2416 * Cancel pending IGMPv3 timers for the given link and all groups
2417 * joined on it; state-change, general-query, and group-query timers.
2418 *
2419 * Only ever called on a transition from v3 to Compatibility mode. Kill
2420 * the timers stone dead (this may be expensive for large N groups), they
2421 * will be restarted if Compatibility Mode deems that they must be due to
2422 * query processing.
2423 */
2424 static void
igmp_v3_cancel_link_timers(struct igmp_ifinfo * igi)2425 igmp_v3_cancel_link_timers(struct igmp_ifinfo *igi)
2426 {
2427 struct ifnet *ifp;
2428 struct in_multi *inm;
2429 struct in_multistep step;
2430
2431 IGI_LOCK_ASSERT_HELD(igi);
2432
2433 IGMP_PRINTF(("%s: cancel v3 timers on ifp 0x%llx(%s)\n", __func__,
2434 (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp), if_name(igi->igi_ifp)));
2435
2436 /*
2437 * Stop the v3 General Query Response on this link stone dead.
2438 * If timer is woken up due to interface_timers_running,
2439 * the flag will be cleared if there are no pending link timers.
2440 */
2441 igi->igi_v3_timer = 0;
2442
2443 /*
2444 * Now clear the current-state and state-change report timers
2445 * for all memberships scoped to this link.
2446 */
2447 ifp = igi->igi_ifp;
2448 IGI_UNLOCK(igi);
2449
2450 in_multihead_lock_shared();
2451 IN_FIRST_MULTI(step, inm);
2452 while (inm != NULL) {
2453 INM_LOCK(inm);
2454 if (inm->inm_ifp != ifp && inm->inm_igi != igi) {
2455 goto next;
2456 }
2457
2458 switch (inm->inm_state) {
2459 case IGMP_NOT_MEMBER:
2460 case IGMP_SILENT_MEMBER:
2461 case IGMP_IDLE_MEMBER:
2462 case IGMP_LAZY_MEMBER:
2463 case IGMP_SLEEPING_MEMBER:
2464 case IGMP_AWAKENING_MEMBER:
2465 /*
2466 * These states are either not relevant in v3 mode,
2467 * or are unreported. Do nothing.
2468 */
2469 break;
2470 case IGMP_LEAVING_MEMBER:
2471 /*
2472 * If we are leaving the group and switching to
2473 * compatibility mode, we need to release the final
2474 * reference held for issuing the INCLUDE {}, and
2475 * transition to REPORTING to ensure the host leave
2476 * message is sent upstream to the old querier --
2477 * transition to NOT would lose the leave and race.
2478 * During igmp_final_leave(), we bumped up both the
2479 * request and reference counts. Since we cannot
2480 * call in_multi_detach() here, defer this task to
2481 * the timer routine.
2482 */
2483 VERIFY(inm->inm_nrelecnt != 0);
2484 IGI_LOCK(igi);
2485 SLIST_INSERT_HEAD(&igi->igi_relinmhead, inm, inm_nrele);
2486 IGI_UNLOCK(igi);
2487 OS_FALLTHROUGH;
2488 case IGMP_G_QUERY_PENDING_MEMBER:
2489 case IGMP_SG_QUERY_PENDING_MEMBER:
2490 inm_clear_recorded(inm);
2491 OS_FALLTHROUGH;
2492 case IGMP_REPORTING_MEMBER:
2493 inm->inm_state = IGMP_REPORTING_MEMBER;
2494 break;
2495 }
2496 /*
2497 * Always clear state-change and group report timers.
2498 * Free any pending IGMPv3 state-change records.
2499 */
2500 inm->inm_sctimer = 0;
2501 inm->inm_timer = 0;
2502 IF_DRAIN(&inm->inm_scq);
2503 next:
2504 INM_UNLOCK(inm);
2505 IN_NEXT_MULTI(step, inm);
2506 }
2507 in_multihead_lock_done();
2508
2509 IGI_LOCK(igi);
2510 }
2511
2512 /*
2513 * Update the Older Version Querier Present timers for a link.
2514 * See Section 7.2.1 of RFC 3376.
2515 */
2516 static void
igmp_v1v2_process_querier_timers(struct igmp_ifinfo * igi)2517 igmp_v1v2_process_querier_timers(struct igmp_ifinfo *igi)
2518 {
2519 IGI_LOCK_ASSERT_HELD(igi);
2520
2521 if (igi->igi_v1_timer == 0 && igi->igi_v2_timer == 0) {
2522 /*
2523 * IGMPv1 and IGMPv2 Querier Present timers expired.
2524 *
2525 * Revert to IGMPv3.
2526 */
2527 if (igi->igi_version != IGMP_VERSION_3) {
2528 IGMP_PRINTF(("%s: transition from v%d -> v%d "
2529 "on 0x%llx(%s)\n", __func__,
2530 igi->igi_version, IGMP_VERSION_3,
2531 (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2532 if_name(igi->igi_ifp)));
2533 igi->igi_version = IGMP_VERSION_3;
2534 IF_DRAIN(&igi->igi_v2q);
2535 }
2536 } else if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) {
2537 /*
2538 * IGMPv1 Querier Present timer expired,
2539 * IGMPv2 Querier Present timer running.
2540 * If IGMPv2 was disabled since last timeout,
2541 * revert to IGMPv3.
2542 * If IGMPv2 is enabled, revert to IGMPv2.
2543 */
2544 if (!igmp_v2enable) {
2545 IGMP_PRINTF(("%s: transition from v%d -> v%d "
2546 "on 0x%llx(%s%d)\n", __func__,
2547 igi->igi_version, IGMP_VERSION_3,
2548 (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2549 igi->igi_ifp->if_name, igi->igi_ifp->if_unit));
2550 igi->igi_v2_timer = 0;
2551 igi->igi_version = IGMP_VERSION_3;
2552 IF_DRAIN(&igi->igi_v2q);
2553 } else {
2554 --igi->igi_v2_timer;
2555 if (igi->igi_version != IGMP_VERSION_2) {
2556 IGMP_PRINTF(("%s: transition from v%d -> v%d "
2557 "on 0x%llx(%s)\n", __func__,
2558 igi->igi_version, IGMP_VERSION_2,
2559 (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2560 if_name(igi->igi_ifp)));
2561 IF_DRAIN(&igi->igi_gq);
2562 igmp_v3_cancel_link_timers(igi);
2563 igi->igi_version = IGMP_VERSION_2;
2564 }
2565 }
2566 } else if (igi->igi_v1_timer > 0) {
2567 /*
2568 * IGMPv1 Querier Present timer running.
2569 * Stop IGMPv2 timer if running.
2570 *
2571 * If IGMPv1 was disabled since last timeout,
2572 * revert to IGMPv3.
2573 * If IGMPv1 is enabled, reset IGMPv2 timer if running.
2574 */
2575 if (!igmp_v1enable) {
2576 IGMP_PRINTF(("%s: transition from v%d -> v%d "
2577 "on 0x%llx(%s%d)\n", __func__,
2578 igi->igi_version, IGMP_VERSION_3,
2579 (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2580 igi->igi_ifp->if_name, igi->igi_ifp->if_unit));
2581 igi->igi_v1_timer = 0;
2582 igi->igi_version = IGMP_VERSION_3;
2583 IF_DRAIN(&igi->igi_v2q);
2584 } else {
2585 --igi->igi_v1_timer;
2586 }
2587 if (igi->igi_v2_timer > 0) {
2588 IGMP_PRINTF(("%s: cancel v2 timer on 0x%llx(%s%d)\n",
2589 __func__,
2590 (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2591 igi->igi_ifp->if_name, igi->igi_ifp->if_unit));
2592 igi->igi_v2_timer = 0;
2593 }
2594 }
2595 }
2596
2597 /*
2598 * Dispatch an IGMPv1/v2 host report or leave message.
2599 * These are always small enough to fit inside a single mbuf.
2600 */
2601 static int
igmp_v1v2_queue_report(struct in_multi * inm,const int type)2602 igmp_v1v2_queue_report(struct in_multi *inm, const int type)
2603 {
2604 struct ifnet *ifp;
2605 struct igmp *igmp;
2606 struct ip *ip;
2607 struct mbuf *m;
2608 int error = 0;
2609
2610 INM_LOCK_ASSERT_HELD(inm);
2611 IGI_LOCK_ASSERT_HELD(inm->inm_igi);
2612
2613 ifp = inm->inm_ifp;
2614
2615 MGETHDR(m, M_DONTWAIT, MT_DATA);
2616 if (m == NULL) {
2617 return ENOMEM;
2618 }
2619 MH_ALIGN(m, sizeof(struct ip) + sizeof(struct igmp));
2620
2621 m->m_pkthdr.len = sizeof(struct ip) + sizeof(struct igmp);
2622
2623 m->m_data += sizeof(struct ip);
2624 m->m_len = sizeof(struct igmp);
2625
2626 igmp = mtod(m, struct igmp *);
2627 igmp->igmp_type = (u_char)type;
2628 igmp->igmp_code = 0;
2629 igmp->igmp_group = inm->inm_addr;
2630 igmp->igmp_cksum = 0;
2631 igmp->igmp_cksum = in_cksum(m, sizeof(struct igmp));
2632
2633 m->m_data -= sizeof(struct ip);
2634 m->m_len += sizeof(struct ip);
2635
2636 ip = mtod(m, struct ip *);
2637 ip->ip_tos = 0;
2638 ip->ip_len = sizeof(struct ip) + sizeof(struct igmp);
2639 ip->ip_off = 0;
2640 ip->ip_p = IPPROTO_IGMP;
2641 ip->ip_src.s_addr = INADDR_ANY;
2642
2643 if (type == IGMP_HOST_LEAVE_MESSAGE) {
2644 ip->ip_dst.s_addr = htonl(INADDR_ALLRTRS_GROUP);
2645 } else {
2646 ip->ip_dst = inm->inm_addr;
2647 }
2648
2649 igmp_save_context(m, ifp);
2650
2651 m->m_flags |= M_IGMPV2;
2652 if (inm->inm_igi->igi_flags & IGIF_LOOPBACK) {
2653 m->m_flags |= M_IGMP_LOOP;
2654 }
2655
2656 /*
2657 * Due to the fact that at this point we are possibly holding
2658 * in_multihead_lock in shared or exclusive mode, we can't call
2659 * igmp_sendpkt() here since that will eventually call ip_output(),
2660 * which will try to lock in_multihead_lock and cause a deadlock.
2661 * Instead we defer the work to the igmp_timeout() thread, thus
2662 * avoiding unlocking in_multihead_lock here.
2663 */
2664 if (IF_QFULL(&inm->inm_igi->igi_v2q)) {
2665 IGMP_PRINTF(("%s: v1/v2 outbound queue full\n", __func__));
2666 error = ENOMEM;
2667 m_freem(m);
2668 } else {
2669 IF_ENQUEUE(&inm->inm_igi->igi_v2q, m);
2670 VERIFY(error == 0);
2671 }
2672 return error;
2673 }
2674
2675 /*
2676 * Process a state change from the upper layer for the given IPv4 group.
2677 *
2678 * Each socket holds a reference on the in_multi in its own ip_moptions.
2679 * The socket layer will have made the necessary updates to the group
2680 * state, it is now up to IGMP to issue a state change report if there
2681 * has been any change between T0 (when the last state-change was issued)
2682 * and T1 (now).
2683 *
2684 * We use the IGMPv3 state machine at group level. The IGMP module
2685 * however makes the decision as to which IGMP protocol version to speak.
2686 * A state change *from* INCLUDE {} always means an initial join.
2687 * A state change *to* INCLUDE {} always means a final leave.
2688 *
2689 * FUTURE: If IGIF_V3LITE is enabled for this interface, then we can
2690 * save ourselves a bunch of work; any exclusive mode groups need not
2691 * compute source filter lists.
2692 */
2693 int
igmp_change_state(struct in_multi * inm,struct igmp_tparams * itp)2694 igmp_change_state(struct in_multi *inm, struct igmp_tparams *itp)
2695 {
2696 struct igmp_ifinfo *igi;
2697 struct ifnet *ifp;
2698 int error = 0;
2699
2700 VERIFY(itp != NULL);
2701 bzero(itp, sizeof(*itp));
2702
2703 INM_LOCK_ASSERT_HELD(inm);
2704 VERIFY(inm->inm_igi != NULL);
2705 IGI_LOCK_ASSERT_NOTHELD(inm->inm_igi);
2706
2707 /*
2708 * Try to detect if the upper layer just asked us to change state
2709 * for an interface which has now gone away.
2710 */
2711 VERIFY(inm->inm_ifma != NULL);
2712 ifp = inm->inm_ifma->ifma_ifp;
2713 /*
2714 * Sanity check that netinet's notion of ifp is the same as net's.
2715 */
2716 VERIFY(inm->inm_ifp == ifp);
2717
2718 igi = IGMP_IFINFO(ifp);
2719 VERIFY(igi != NULL);
2720
2721 /*
2722 * If we detect a state transition to or from MCAST_UNDEFINED
2723 * for this group, then we are starting or finishing an IGMP
2724 * life cycle for this group.
2725 */
2726 if (inm->inm_st[1].iss_fmode != inm->inm_st[0].iss_fmode) {
2727 IGMP_PRINTF(("%s: inm transition %d -> %d\n", __func__,
2728 inm->inm_st[0].iss_fmode, inm->inm_st[1].iss_fmode));
2729 if (inm->inm_st[0].iss_fmode == MCAST_UNDEFINED) {
2730 IGMP_PRINTF(("%s: initial join\n", __func__));
2731 error = igmp_initial_join(inm, igi, itp);
2732 goto out;
2733 } else if (inm->inm_st[1].iss_fmode == MCAST_UNDEFINED) {
2734 IGMP_PRINTF(("%s: final leave\n", __func__));
2735 igmp_final_leave(inm, igi, itp);
2736 goto out;
2737 }
2738 } else {
2739 IGMP_PRINTF(("%s: filter set change\n", __func__));
2740 }
2741
2742 error = igmp_handle_state_change(inm, igi, itp);
2743 out:
2744 return error;
2745 }
2746
2747 /*
2748 * Perform the initial join for an IGMP group.
2749 *
2750 * When joining a group:
2751 * If the group should have its IGMP traffic suppressed, do nothing.
2752 * IGMPv1 starts sending IGMPv1 host membership reports.
2753 * IGMPv2 starts sending IGMPv2 host membership reports.
2754 * IGMPv3 will schedule an IGMPv3 state-change report containing the
2755 * initial state of the membership.
2756 */
2757 static int
igmp_initial_join(struct in_multi * inm,struct igmp_ifinfo * igi,struct igmp_tparams * itp)2758 igmp_initial_join(struct in_multi *inm, struct igmp_ifinfo *igi,
2759 struct igmp_tparams *itp)
2760 {
2761 struct ifnet *ifp;
2762 struct ifqueue *ifq;
2763 int error, retval, syncstates;
2764
2765 INM_LOCK_ASSERT_HELD(inm);
2766 IGI_LOCK_ASSERT_NOTHELD(igi);
2767 VERIFY(itp != NULL);
2768
2769 IGMP_INET_PRINTF(inm->inm_addr,
2770 ("%s: initial join %s on ifp 0x%llx(%s)\n", __func__,
2771 _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(inm->inm_ifp),
2772 if_name(inm->inm_ifp)));
2773
2774 error = 0;
2775 syncstates = 1;
2776
2777 ifp = inm->inm_ifp;
2778
2779 IGI_LOCK(igi);
2780 VERIFY(igi->igi_ifp == ifp);
2781
2782 /*
2783 * Groups joined on loopback or marked as 'not reported',
2784 * e.g. 224.0.0.1, enter the IGMP_SILENT_MEMBER state and
2785 * are never reported in any IGMP protocol exchanges.
2786 * All other groups enter the appropriate IGMP state machine
2787 * for the version in use on this link.
2788 * A link marked as IGIF_SILENT causes IGMP to be completely
2789 * disabled for the link.
2790 */
2791 if ((ifp->if_flags & IFF_LOOPBACK) ||
2792 (igi->igi_flags & IGIF_SILENT) ||
2793 !igmp_isgroupreported(inm->inm_addr)) {
2794 IGMP_PRINTF(("%s: not kicking state machine for silent group\n",
2795 __func__));
2796 inm->inm_state = IGMP_SILENT_MEMBER;
2797 inm->inm_timer = 0;
2798 } else {
2799 /*
2800 * Deal with overlapping in_multi lifecycle.
2801 * If this group was LEAVING, then make sure
2802 * we drop the reference we picked up to keep the
2803 * group around for the final INCLUDE {} enqueue.
2804 * Since we cannot call in_multi_detach() here,
2805 * defer this task to the timer routine.
2806 */
2807 if (igi->igi_version == IGMP_VERSION_3 &&
2808 inm->inm_state == IGMP_LEAVING_MEMBER) {
2809 VERIFY(inm->inm_nrelecnt != 0);
2810 SLIST_INSERT_HEAD(&igi->igi_relinmhead, inm, inm_nrele);
2811 }
2812
2813 inm->inm_state = IGMP_REPORTING_MEMBER;
2814
2815 switch (igi->igi_version) {
2816 case IGMP_VERSION_1:
2817 case IGMP_VERSION_2:
2818 inm->inm_state = IGMP_IDLE_MEMBER;
2819 error = igmp_v1v2_queue_report(inm,
2820 (igi->igi_version == IGMP_VERSION_2) ?
2821 IGMP_v2_HOST_MEMBERSHIP_REPORT :
2822 IGMP_v1_HOST_MEMBERSHIP_REPORT);
2823
2824 INM_LOCK_ASSERT_HELD(inm);
2825 IGI_LOCK_ASSERT_HELD(igi);
2826
2827 if (error == 0) {
2828 inm->inm_timer =
2829 IGMP_RANDOM_DELAY(IGMP_V1V2_MAX_RI);
2830 itp->cst = 1;
2831 }
2832 break;
2833
2834 case IGMP_VERSION_3:
2835 /*
2836 * Defer update of T0 to T1, until the first copy
2837 * of the state change has been transmitted.
2838 */
2839 syncstates = 0;
2840
2841 /*
2842 * Immediately enqueue a State-Change Report for
2843 * this interface, freeing any previous reports.
2844 * Don't kick the timers if there is nothing to do,
2845 * or if an error occurred.
2846 */
2847 ifq = &inm->inm_scq;
2848 IF_DRAIN(ifq);
2849 retval = igmp_v3_enqueue_group_record(ifq, inm, 1,
2850 0, 0);
2851 itp->cst = (ifq->ifq_len > 0);
2852 IGMP_PRINTF(("%s: enqueue record = %d\n",
2853 __func__, retval));
2854 if (retval <= 0) {
2855 error = retval * -1;
2856 break;
2857 }
2858
2859 /*
2860 * Schedule transmission of pending state-change
2861 * report up to RV times for this link. The timer
2862 * will fire at the next igmp_timeout (1 second),
2863 * giving us an opportunity to merge the reports.
2864 */
2865 if (igi->igi_flags & IGIF_LOOPBACK) {
2866 inm->inm_scrv = 1;
2867 } else {
2868 VERIFY(igi->igi_rv > 1);
2869 inm->inm_scrv = (uint16_t)igi->igi_rv;
2870 }
2871 inm->inm_sctimer = 1;
2872 itp->sct = 1;
2873
2874 error = 0;
2875 break;
2876 }
2877 }
2878 IGI_UNLOCK(igi);
2879
2880 /*
2881 * Only update the T0 state if state change is atomic,
2882 * i.e. we don't need to wait for a timer to fire before we
2883 * can consider the state change to have been communicated.
2884 */
2885 if (syncstates) {
2886 inm_commit(inm);
2887 IGMP_INET_PRINTF(inm->inm_addr,
2888 ("%s: T1 -> T0 for %s/%s\n", __func__,
2889 _igmp_inet_buf, if_name(inm->inm_ifp)));
2890 }
2891
2892 return error;
2893 }
2894
2895 /*
2896 * Issue an intermediate state change during the IGMP life-cycle.
2897 */
2898 static int
igmp_handle_state_change(struct in_multi * inm,struct igmp_ifinfo * igi,struct igmp_tparams * itp)2899 igmp_handle_state_change(struct in_multi *inm, struct igmp_ifinfo *igi,
2900 struct igmp_tparams *itp)
2901 {
2902 struct ifnet *ifp;
2903 int retval = 0;
2904
2905 INM_LOCK_ASSERT_HELD(inm);
2906 IGI_LOCK_ASSERT_NOTHELD(igi);
2907 VERIFY(itp != NULL);
2908
2909 IGMP_INET_PRINTF(inm->inm_addr,
2910 ("%s: state change for %s on ifp 0x%llx(%s)\n", __func__,
2911 _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(inm->inm_ifp),
2912 if_name(inm->inm_ifp)));
2913
2914 ifp = inm->inm_ifp;
2915
2916 IGI_LOCK(igi);
2917 VERIFY(igi->igi_ifp == ifp);
2918
2919 if ((ifp->if_flags & IFF_LOOPBACK) ||
2920 (igi->igi_flags & IGIF_SILENT) ||
2921 !igmp_isgroupreported(inm->inm_addr) ||
2922 (igi->igi_version != IGMP_VERSION_3)) {
2923 IGI_UNLOCK(igi);
2924 if (!igmp_isgroupreported(inm->inm_addr)) {
2925 IGMP_PRINTF(("%s: not kicking state "
2926 "machine for silent group\n", __func__));
2927 }
2928 IGMP_PRINTF(("%s: nothing to do\n", __func__));
2929 inm_commit(inm);
2930 IGMP_INET_PRINTF(inm->inm_addr,
2931 ("%s: T1 -> T0 for %s/%s\n", __func__,
2932 _igmp_inet_buf, inm->inm_ifp->if_name));
2933 goto done;
2934 }
2935
2936 IF_DRAIN(&inm->inm_scq);
2937
2938 retval = igmp_v3_enqueue_group_record(&inm->inm_scq, inm, 1, 0, 0);
2939 itp->cst = (inm->inm_scq.ifq_len > 0);
2940 IGMP_PRINTF(("%s: enqueue record = %d\n", __func__, retval));
2941 if (retval <= 0) {
2942 IGI_UNLOCK(igi);
2943 retval *= -1;
2944 goto done;
2945 }
2946 /*
2947 * If record(s) were enqueued, start the state-change
2948 * report timer for this group.
2949 */
2950 inm->inm_scrv = ((igi->igi_flags & IGIF_LOOPBACK) ? 1 : (uint16_t)igi->igi_rv);
2951 inm->inm_sctimer = 1;
2952 itp->sct = 1;
2953 IGI_UNLOCK(igi);
2954 done:
2955 return retval;
2956 }
2957
2958 /*
2959 * Perform the final leave for an IGMP group.
2960 *
2961 * When leaving a group:
2962 * IGMPv1 does nothing.
2963 * IGMPv2 sends a host leave message, if and only if we are the reporter.
2964 * IGMPv3 enqueues a state-change report containing a transition
2965 * to INCLUDE {} for immediate transmission.
2966 */
2967 static void
igmp_final_leave(struct in_multi * inm,struct igmp_ifinfo * igi,struct igmp_tparams * itp)2968 igmp_final_leave(struct in_multi *inm, struct igmp_ifinfo *igi,
2969 struct igmp_tparams *itp)
2970 {
2971 int syncstates = 1;
2972 bool retried_already = false;
2973
2974 INM_LOCK_ASSERT_HELD(inm);
2975 IGI_LOCK_ASSERT_NOTHELD(igi);
2976 VERIFY(itp != NULL);
2977
2978 IGMP_INET_PRINTF(inm->inm_addr,
2979 ("%s: final leave %s on ifp 0x%llx(%s)\n", __func__,
2980 _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(inm->inm_ifp),
2981 if_name(inm->inm_ifp)));
2982
2983 retry:
2984 switch (inm->inm_state) {
2985 case IGMP_NOT_MEMBER:
2986 case IGMP_SILENT_MEMBER:
2987 case IGMP_LEAVING_MEMBER:
2988 /* Already leaving or left; do nothing. */
2989 IGMP_PRINTF(("%s: not kicking state machine for silent group\n",
2990 __func__));
2991 break;
2992 case IGMP_REPORTING_MEMBER:
2993 case IGMP_IDLE_MEMBER:
2994 case IGMP_G_QUERY_PENDING_MEMBER:
2995 case IGMP_SG_QUERY_PENDING_MEMBER:
2996 IGI_LOCK(igi);
2997 if (igi->igi_version == IGMP_VERSION_2) {
2998 if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER ||
2999 inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) {
3000 /*
3001 * We may be in the process of downgrading to
3002 * IGMPv2 but because we just grabbed the
3003 * igi_lock we may have lost the race.
3004 */
3005 if (!retried_already) {
3006 IGI_UNLOCK(igi);
3007 retried_already = true;
3008 goto retry;
3009 } else {
3010 /*
3011 * Proceed with leaving the group
3012 * as if it were IGMPv2 even though we
3013 * may have an inconsistent multicast state.
3014 */
3015 }
3016 }
3017 /* scheduler timer if enqueue is successful */
3018 itp->cst = (igmp_v1v2_queue_report(inm,
3019 IGMP_HOST_LEAVE_MESSAGE) == 0);
3020
3021 INM_LOCK_ASSERT_HELD(inm);
3022 IGI_LOCK_ASSERT_HELD(igi);
3023
3024 inm->inm_state = IGMP_NOT_MEMBER;
3025 } else if (igi->igi_version == IGMP_VERSION_3) {
3026 /*
3027 * Stop group timer and all pending reports.
3028 * Immediately enqueue a state-change report
3029 * TO_IN {} to be sent on the next timeout,
3030 * giving us an opportunity to merge reports.
3031 */
3032 IF_DRAIN(&inm->inm_scq);
3033 inm->inm_timer = 0;
3034 if (igi->igi_flags & IGIF_LOOPBACK) {
3035 inm->inm_scrv = 1;
3036 } else {
3037 inm->inm_scrv = (uint16_t)igi->igi_rv;
3038 }
3039 IGMP_INET_PRINTF(inm->inm_addr,
3040 ("%s: Leaving %s/%s with %d "
3041 "pending retransmissions.\n", __func__,
3042 _igmp_inet_buf, if_name(inm->inm_ifp),
3043 inm->inm_scrv));
3044 if (inm->inm_scrv == 0) {
3045 inm->inm_state = IGMP_NOT_MEMBER;
3046 inm->inm_sctimer = 0;
3047 } else {
3048 int retval;
3049 /*
3050 * Stick around in the in_multihead list;
3051 * the final detach will be issued by
3052 * igmp_v3_process_group_timers() when
3053 * the retransmit timer expires.
3054 */
3055 INM_ADDREF_LOCKED(inm);
3056 VERIFY(inm->inm_debug & IFD_ATTACHED);
3057 inm->inm_reqcnt++;
3058 VERIFY(inm->inm_reqcnt >= 1);
3059 inm->inm_nrelecnt++;
3060 VERIFY(inm->inm_nrelecnt != 0);
3061
3062 retval = igmp_v3_enqueue_group_record(
3063 &inm->inm_scq, inm, 1, 0, 0);
3064 itp->cst = (inm->inm_scq.ifq_len > 0);
3065 KASSERT(retval != 0,
3066 ("%s: enqueue record = %d\n", __func__,
3067 retval));
3068
3069 inm->inm_state = IGMP_LEAVING_MEMBER;
3070 inm->inm_sctimer = 1;
3071 itp->sct = 1;
3072 syncstates = 0;
3073 }
3074 }
3075 IGI_UNLOCK(igi);
3076 break;
3077 case IGMP_LAZY_MEMBER:
3078 case IGMP_SLEEPING_MEMBER:
3079 case IGMP_AWAKENING_MEMBER:
3080 /* Our reports are suppressed; do nothing. */
3081 break;
3082 }
3083
3084 if (syncstates) {
3085 inm_commit(inm);
3086 IGMP_INET_PRINTF(inm->inm_addr,
3087 ("%s: T1 -> T0 for %s/%s\n", __func__,
3088 _igmp_inet_buf, if_name(inm->inm_ifp)));
3089 inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
3090 IGMP_INET_PRINTF(inm->inm_addr,
3091 ("%s: T1 now MCAST_UNDEFINED for %s/%s\n",
3092 __func__, _igmp_inet_buf, if_name(inm->inm_ifp)));
3093 }
3094 }
3095
3096 /*
3097 * Enqueue an IGMPv3 group record to the given output queue.
3098 *
3099 * XXX This function could do with having the allocation code
3100 * split out, and the multiple-tree-walks coalesced into a single
3101 * routine as has been done in igmp_v3_enqueue_filter_change().
3102 *
3103 * If is_state_change is zero, a current-state record is appended.
3104 * If is_state_change is non-zero, a state-change report is appended.
3105 *
3106 * If is_group_query is non-zero, an mbuf packet chain is allocated.
3107 * If is_group_query is zero, and if there is a packet with free space
3108 * at the tail of the queue, it will be appended to providing there
3109 * is enough free space.
3110 * Otherwise a new mbuf packet chain is allocated.
3111 *
3112 * If is_source_query is non-zero, each source is checked to see if
3113 * it was recorded for a Group-Source query, and will be omitted if
3114 * it is not both in-mode and recorded.
3115 *
3116 * The function will attempt to allocate leading space in the packet
3117 * for the IP/IGMP header to be prepended without fragmenting the chain.
3118 *
3119 * If successful the size of all data appended to the queue is returned,
3120 * otherwise an error code less than zero is returned, or zero if
3121 * no record(s) were appended.
3122 */
3123 static int
igmp_v3_enqueue_group_record(struct ifqueue * ifq,struct in_multi * inm,const int is_state_change,const int is_group_query,const int is_source_query)3124 igmp_v3_enqueue_group_record(struct ifqueue *ifq, struct in_multi *inm,
3125 const int is_state_change, const int is_group_query,
3126 const int is_source_query)
3127 {
3128 struct igmp_grouprec ig;
3129 struct igmp_grouprec *pig;
3130 struct ifnet *ifp;
3131 struct ip_msource *ims, *nims;
3132 struct mbuf *m0, *m, *md;
3133 int error, is_filter_list_change;
3134 int minrec0len, m0srcs, nbytes, off;
3135 uint16_t msrcs;
3136 int record_has_sources;
3137 int now;
3138 int type;
3139 in_addr_t naddr;
3140 uint16_t mode;
3141 u_int16_t ig_numsrc;
3142
3143 INM_LOCK_ASSERT_HELD(inm);
3144 IGI_LOCK_ASSERT_HELD(inm->inm_igi);
3145
3146 error = 0;
3147 ifp = inm->inm_ifp;
3148 is_filter_list_change = 0;
3149 m = NULL;
3150 m0 = NULL;
3151 m0srcs = 0;
3152 msrcs = 0;
3153 nbytes = 0;
3154 nims = NULL;
3155 record_has_sources = 1;
3156 pig = NULL;
3157 type = IGMP_DO_NOTHING;
3158 mode = inm->inm_st[1].iss_fmode;
3159
3160 /*
3161 * If we did not transition out of ASM mode during t0->t1,
3162 * and there are no source nodes to process, we can skip
3163 * the generation of source records.
3164 */
3165 if (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0 &&
3166 inm->inm_nsrc == 0) {
3167 record_has_sources = 0;
3168 }
3169
3170 if (is_state_change) {
3171 /*
3172 * Queue a state change record.
3173 * If the mode did not change, and there are non-ASM
3174 * listeners or source filters present,
3175 * we potentially need to issue two records for the group.
3176 * If we are transitioning to MCAST_UNDEFINED, we need
3177 * not send any sources.
3178 * If there are ASM listeners, and there was no filter
3179 * mode transition of any kind, do nothing.
3180 */
3181 if (mode != inm->inm_st[0].iss_fmode) {
3182 if (mode == MCAST_EXCLUDE) {
3183 IGMP_PRINTF(("%s: change to EXCLUDE\n",
3184 __func__));
3185 type = IGMP_CHANGE_TO_EXCLUDE_MODE;
3186 } else {
3187 IGMP_PRINTF(("%s: change to INCLUDE\n",
3188 __func__));
3189 type = IGMP_CHANGE_TO_INCLUDE_MODE;
3190 if (mode == MCAST_UNDEFINED) {
3191 record_has_sources = 0;
3192 }
3193 }
3194 } else {
3195 if (record_has_sources) {
3196 is_filter_list_change = 1;
3197 } else {
3198 type = IGMP_DO_NOTHING;
3199 }
3200 }
3201 } else {
3202 /*
3203 * Queue a current state record.
3204 */
3205 if (mode == MCAST_EXCLUDE) {
3206 type = IGMP_MODE_IS_EXCLUDE;
3207 } else if (mode == MCAST_INCLUDE) {
3208 type = IGMP_MODE_IS_INCLUDE;
3209 VERIFY(inm->inm_st[1].iss_asm == 0);
3210 }
3211 }
3212
3213 /*
3214 * Generate the filter list changes using a separate function.
3215 */
3216 if (is_filter_list_change) {
3217 return igmp_v3_enqueue_filter_change(ifq, inm);
3218 }
3219
3220 if (type == IGMP_DO_NOTHING) {
3221 IGMP_INET_PRINTF(inm->inm_addr,
3222 ("%s: nothing to do for %s/%s\n",
3223 __func__, _igmp_inet_buf,
3224 if_name(inm->inm_ifp)));
3225 return 0;
3226 }
3227
3228 /*
3229 * If any sources are present, we must be able to fit at least
3230 * one in the trailing space of the tail packet's mbuf,
3231 * ideally more.
3232 */
3233 minrec0len = sizeof(struct igmp_grouprec);
3234 if (record_has_sources) {
3235 minrec0len += sizeof(in_addr_t);
3236 }
3237
3238 IGMP_INET_PRINTF(inm->inm_addr,
3239 ("%s: queueing %s for %s/%s\n", __func__,
3240 igmp_rec_type_to_str(type), _igmp_inet_buf,
3241 if_name(inm->inm_ifp)));
3242
3243 /*
3244 * Check if we have a packet in the tail of the queue for this
3245 * group into which the first group record for this group will fit.
3246 * Otherwise allocate a new packet.
3247 * Always allocate leading space for IP+RA_OPT+IGMP+REPORT.
3248 * Note: Group records for G/GSR query responses MUST be sent
3249 * in their own packet.
3250 */
3251 m0 = ifq->ifq_tail;
3252 if (!is_group_query &&
3253 m0 != NULL &&
3254 (m0->m_pkthdr.vt_nrecs + 1 <= IGMP_V3_REPORT_MAXRECS) &&
3255 (m0->m_pkthdr.len + minrec0len) <
3256 (ifp->if_mtu - IGMP_LEADINGSPACE)) {
3257 m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
3258 sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
3259 m = m0;
3260 IGMP_PRINTF(("%s: use existing packet\n", __func__));
3261 } else {
3262 if (IF_QFULL(ifq)) {
3263 IGMP_PRINTF(("%s: outbound queue full\n", __func__));
3264 return -ENOMEM;
3265 }
3266 m = NULL;
3267 m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
3268 sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
3269 if (!is_state_change && !is_group_query) {
3270 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3271 if (m) {
3272 m->m_data += IGMP_LEADINGSPACE;
3273 }
3274 }
3275 if (m == NULL) {
3276 m = m_gethdr(M_DONTWAIT, MT_DATA);
3277 if (m) {
3278 MH_ALIGN(m, IGMP_LEADINGSPACE);
3279 }
3280 }
3281 if (m == NULL) {
3282 return -ENOMEM;
3283 }
3284
3285 igmp_save_context(m, ifp);
3286
3287 IGMP_PRINTF(("%s: allocated first packet\n", __func__));
3288 }
3289
3290 /*
3291 * Append group record.
3292 * If we have sources, we don't know how many yet.
3293 */
3294 ig.ig_type = (u_char)type;
3295 ig.ig_datalen = 0;
3296 ig.ig_numsrc = 0;
3297 ig.ig_group = inm->inm_addr;
3298 if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) {
3299 if (m != m0) {
3300 m_freem(m);
3301 }
3302 IGMP_PRINTF(("%s: m_append() failed.\n", __func__));
3303 return -ENOMEM;
3304 }
3305 nbytes += sizeof(struct igmp_grouprec);
3306
3307 /*
3308 * Append as many sources as will fit in the first packet.
3309 * If we are appending to a new packet, the chain allocation
3310 * may potentially use clusters; use m_getptr() in this case.
3311 * If we are appending to an existing packet, we need to obtain
3312 * a pointer to the group record after m_append(), in case a new
3313 * mbuf was allocated.
3314 * Only append sources which are in-mode at t1. If we are
3315 * transitioning to MCAST_UNDEFINED state on the group, do not
3316 * include source entries.
3317 * Only report recorded sources in our filter set when responding
3318 * to a group-source query.
3319 */
3320 if (record_has_sources) {
3321 if (m == m0) {
3322 md = m_last(m);
3323 pig = (struct igmp_grouprec *)(void *)
3324 (mtod(md, uint8_t *) + md->m_len - nbytes);
3325 } else {
3326 md = m_getptr(m, 0, &off);
3327 pig = (struct igmp_grouprec *)(void *)
3328 (mtod(md, uint8_t *) + off);
3329 }
3330 msrcs = 0;
3331 RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, nims) {
3332 #ifdef IGMP_DEBUG
3333 char buf[MAX_IPv4_STR_LEN];
3334
3335 inet_ntop_haddr(ims->ims_haddr, buf, sizeof(buf));
3336 IGMP_PRINTF(("%s: visit node %s\n", __func__, buf));
3337 #endif
3338 now = ims_get_mode(inm, ims, 1);
3339 IGMP_PRINTF(("%s: node is %d\n", __func__, now));
3340 if ((now != mode) ||
3341 (now == mode && mode == MCAST_UNDEFINED)) {
3342 IGMP_PRINTF(("%s: skip node\n", __func__));
3343 continue;
3344 }
3345 if (is_source_query && ims->ims_stp == 0) {
3346 IGMP_PRINTF(("%s: skip unrecorded node\n",
3347 __func__));
3348 continue;
3349 }
3350 IGMP_PRINTF(("%s: append node\n", __func__));
3351 naddr = htonl(ims->ims_haddr);
3352 if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) {
3353 if (m != m0) {
3354 m_freem(m);
3355 }
3356 IGMP_PRINTF(("%s: m_append() failed.\n",
3357 __func__));
3358 return -ENOMEM;
3359 }
3360 nbytes += sizeof(in_addr_t);
3361 ++msrcs;
3362 if (msrcs == m0srcs) {
3363 break;
3364 }
3365 }
3366 IGMP_PRINTF(("%s: msrcs is %d this packet\n", __func__,
3367 msrcs));
3368 ig_numsrc = htons(msrcs);
3369 bcopy(&ig_numsrc, &pig->ig_numsrc, sizeof(ig_numsrc));
3370 nbytes += (msrcs * sizeof(in_addr_t));
3371 }
3372
3373 if (is_source_query && msrcs == 0) {
3374 IGMP_PRINTF(("%s: no recorded sources to report\n", __func__));
3375 if (m != m0) {
3376 m_freem(m);
3377 }
3378 return 0;
3379 }
3380
3381 /*
3382 * We are good to go with first packet.
3383 */
3384 if (m != m0) {
3385 IGMP_PRINTF(("%s: enqueueing first packet\n", __func__));
3386 m->m_pkthdr.vt_nrecs = 1;
3387 IF_ENQUEUE(ifq, m);
3388 } else {
3389 m->m_pkthdr.vt_nrecs++;
3390 }
3391 /*
3392 * No further work needed if no source list in packet(s).
3393 */
3394 if (!record_has_sources) {
3395 return nbytes;
3396 }
3397
3398 /*
3399 * Whilst sources remain to be announced, we need to allocate
3400 * a new packet and fill out as many sources as will fit.
3401 * Always try for a cluster first.
3402 */
3403 while (nims != NULL) {
3404 if (IF_QFULL(ifq)) {
3405 IGMP_PRINTF(("%s: outbound queue full\n", __func__));
3406 return -ENOMEM;
3407 }
3408 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3409 if (m) {
3410 m->m_data += IGMP_LEADINGSPACE;
3411 }
3412 if (m == NULL) {
3413 m = m_gethdr(M_DONTWAIT, MT_DATA);
3414 if (m) {
3415 MH_ALIGN(m, IGMP_LEADINGSPACE);
3416 }
3417 }
3418 if (m == NULL) {
3419 return -ENOMEM;
3420 }
3421 igmp_save_context(m, ifp);
3422 md = m_getptr(m, 0, &off);
3423 pig = (struct igmp_grouprec *)(void *)
3424 (mtod(md, uint8_t *) + off);
3425 IGMP_PRINTF(("%s: allocated next packet\n", __func__));
3426
3427 if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) {
3428 if (m != m0) {
3429 m_freem(m);
3430 }
3431 IGMP_PRINTF(("%s: m_append() failed.\n", __func__));
3432 return -ENOMEM;
3433 }
3434 m->m_pkthdr.vt_nrecs = 1;
3435 nbytes += sizeof(struct igmp_grouprec);
3436
3437 m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
3438 sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
3439
3440 msrcs = 0;
3441 RB_FOREACH_FROM(ims, ip_msource_tree, nims) {
3442 #ifdef IGMP_DEBUG
3443 char buf[MAX_IPv4_STR_LEN];
3444
3445 inet_ntop_haddr(ims->ims_haddr, buf, sizeof(buf));
3446 IGMP_PRINTF(("%s: visit node %s\n", __func__, buf));
3447 #endif
3448 now = ims_get_mode(inm, ims, 1);
3449 if ((now != mode) ||
3450 (now == mode && mode == MCAST_UNDEFINED)) {
3451 IGMP_PRINTF(("%s: skip node\n", __func__));
3452 continue;
3453 }
3454 if (is_source_query && ims->ims_stp == 0) {
3455 IGMP_PRINTF(("%s: skip unrecorded node\n",
3456 __func__));
3457 continue;
3458 }
3459 IGMP_PRINTF(("%s: append node\n", __func__));
3460 naddr = htonl(ims->ims_haddr);
3461 if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) {
3462 if (m != m0) {
3463 m_freem(m);
3464 }
3465 IGMP_PRINTF(("%s: m_append() failed.\n",
3466 __func__));
3467 return -ENOMEM;
3468 }
3469 ++msrcs;
3470 if (msrcs == m0srcs) {
3471 break;
3472 }
3473 }
3474 ig_numsrc = htons(msrcs);
3475 bcopy(&ig_numsrc, &pig->ig_numsrc, sizeof(ig_numsrc));
3476 nbytes += (msrcs * sizeof(in_addr_t));
3477
3478 IGMP_PRINTF(("%s: enqueueing next packet\n", __func__));
3479 IF_ENQUEUE(ifq, m);
3480 }
3481
3482 return nbytes;
3483 }
3484
3485 /*
3486 * Type used to mark record pass completion.
3487 * We exploit the fact we can cast to this easily from the
3488 * current filter modes on each ip_msource node.
3489 */
3490 typedef enum {
3491 REC_NONE = 0x00, /* MCAST_UNDEFINED */
3492 REC_ALLOW = 0x01, /* MCAST_INCLUDE */
3493 REC_BLOCK = 0x02, /* MCAST_EXCLUDE */
3494 REC_FULL = REC_ALLOW | REC_BLOCK
3495 } rectype_t;
3496
3497 /*
3498 * Enqueue an IGMPv3 filter list change to the given output queue.
3499 *
3500 * Source list filter state is held in an RB-tree. When the filter list
3501 * for a group is changed without changing its mode, we need to compute
3502 * the deltas between T0 and T1 for each source in the filter set,
3503 * and enqueue the appropriate ALLOW_NEW/BLOCK_OLD records.
3504 *
3505 * As we may potentially queue two record types, and the entire R-B tree
3506 * needs to be walked at once, we break this out into its own function
3507 * so we can generate a tightly packed queue of packets.
3508 *
3509 * XXX This could be written to only use one tree walk, although that makes
3510 * serializing into the mbuf chains a bit harder. For now we do two walks
3511 * which makes things easier on us, and it may or may not be harder on
3512 * the L2 cache.
3513 *
3514 * If successful the size of all data appended to the queue is returned,
3515 * otherwise an error code less than zero is returned, or zero if
3516 * no record(s) were appended.
3517 */
3518 static int
igmp_v3_enqueue_filter_change(struct ifqueue * ifq,struct in_multi * inm)3519 igmp_v3_enqueue_filter_change(struct ifqueue *ifq, struct in_multi *inm)
3520 {
3521 static const int MINRECLEN =
3522 sizeof(struct igmp_grouprec) + sizeof(in_addr_t);
3523 struct ifnet *ifp;
3524 struct igmp_grouprec ig;
3525 struct igmp_grouprec *pig;
3526 struct ip_msource *ims, *nims;
3527 struct mbuf *m, *m0, *md;
3528 in_addr_t naddr;
3529 int m0srcs, nbytes, npbytes, off, schanged;
3530 uint16_t rsrcs;
3531 int nallow, nblock;
3532 uint16_t mode;
3533 uint8_t now, then;
3534 rectype_t crt, drt, nrt;
3535 u_int16_t ig_numsrc;
3536
3537 INM_LOCK_ASSERT_HELD(inm);
3538
3539 if (inm->inm_nsrc == 0 ||
3540 (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0)) {
3541 return 0;
3542 }
3543
3544 ifp = inm->inm_ifp; /* interface */
3545 mode = inm->inm_st[1].iss_fmode; /* filter mode at t1 */
3546 crt = REC_NONE; /* current group record type */
3547 drt = REC_NONE; /* mask of completed group record types */
3548 nrt = REC_NONE; /* record type for current node */
3549 m0srcs = 0; /* # source which will fit in current mbuf chain */
3550 nbytes = 0; /* # of bytes appended to group's state-change queue */
3551 npbytes = 0; /* # of bytes appended this packet */
3552 rsrcs = 0; /* # sources encoded in current record */
3553 schanged = 0; /* # nodes encoded in overall filter change */
3554 nallow = 0; /* # of source entries in ALLOW_NEW */
3555 nblock = 0; /* # of source entries in BLOCK_OLD */
3556 nims = NULL; /* next tree node pointer */
3557
3558 /*
3559 * For each possible filter record mode.
3560 * The first kind of source we encounter tells us which
3561 * is the first kind of record we start appending.
3562 * If a node transitioned to UNDEFINED at t1, its mode is treated
3563 * as the inverse of the group's filter mode.
3564 */
3565 while (drt != REC_FULL) {
3566 do {
3567 m0 = ifq->ifq_tail;
3568 if (m0 != NULL &&
3569 (m0->m_pkthdr.vt_nrecs + 1 <=
3570 IGMP_V3_REPORT_MAXRECS) &&
3571 (m0->m_pkthdr.len + MINRECLEN) <
3572 (ifp->if_mtu - IGMP_LEADINGSPACE)) {
3573 m = m0;
3574 m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
3575 sizeof(struct igmp_grouprec)) /
3576 sizeof(in_addr_t);
3577 IGMP_PRINTF(("%s: use previous packet\n",
3578 __func__));
3579 } else {
3580 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3581 if (m) {
3582 m->m_data += IGMP_LEADINGSPACE;
3583 }
3584 if (m == NULL) {
3585 m = m_gethdr(M_DONTWAIT, MT_DATA);
3586 if (m) {
3587 MH_ALIGN(m, IGMP_LEADINGSPACE);
3588 }
3589 }
3590 if (m == NULL) {
3591 IGMP_PRINTF(("%s: m_get*() failed\n",
3592 __func__));
3593 return -ENOMEM;
3594 }
3595 m->m_pkthdr.vt_nrecs = 0;
3596 igmp_save_context(m, ifp);
3597 m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
3598 sizeof(struct igmp_grouprec)) /
3599 sizeof(in_addr_t);
3600 npbytes = 0;
3601 IGMP_PRINTF(("%s: allocated new packet\n",
3602 __func__));
3603 }
3604 /*
3605 * Append the IGMP group record header to the
3606 * current packet's data area.
3607 * Recalculate pointer to free space for next
3608 * group record, in case m_append() allocated
3609 * a new mbuf or cluster.
3610 */
3611 memset(&ig, 0, sizeof(ig));
3612 ig.ig_group = inm->inm_addr;
3613 if (!m_append(m, sizeof(ig), (void *)&ig)) {
3614 if (m != m0) {
3615 m_freem(m);
3616 }
3617 IGMP_PRINTF(("%s: m_append() failed\n",
3618 __func__));
3619 return -ENOMEM;
3620 }
3621 npbytes += sizeof(struct igmp_grouprec);
3622 if (m != m0) {
3623 /* new packet; offset in c hain */
3624 md = m_getptr(m, npbytes -
3625 sizeof(struct igmp_grouprec), &off);
3626 pig = (struct igmp_grouprec *)(void *)(mtod(md,
3627 uint8_t *) + off);
3628 } else {
3629 /* current packet; offset from last append */
3630 md = m_last(m);
3631 pig = (struct igmp_grouprec *)(void *)(mtod(md,
3632 uint8_t *) + md->m_len -
3633 sizeof(struct igmp_grouprec));
3634 }
3635 /*
3636 * Begin walking the tree for this record type
3637 * pass, or continue from where we left off
3638 * previously if we had to allocate a new packet.
3639 * Only report deltas in-mode at t1.
3640 * We need not report included sources as allowed
3641 * if we are in inclusive mode on the group,
3642 * however the converse is not true.
3643 */
3644 rsrcs = 0;
3645 if (nims == NULL) {
3646 nims = RB_MIN(ip_msource_tree, &inm->inm_srcs);
3647 }
3648 RB_FOREACH_FROM(ims, ip_msource_tree, nims) {
3649 #ifdef IGMP_DEBUG
3650 char buf[MAX_IPv4_STR_LEN];
3651
3652 inet_ntop_haddr(ims->ims_haddr, buf, sizeof(buf));
3653 IGMP_PRINTF(("%s: visit node %s\n", __func__, buf));
3654 #endif
3655 now = ims_get_mode(inm, ims, 1);
3656 then = ims_get_mode(inm, ims, 0);
3657 IGMP_PRINTF(("%s: mode: t0 %d, t1 %d\n",
3658 __func__, then, now));
3659 if (now == then) {
3660 IGMP_PRINTF(("%s: skip unchanged\n",
3661 __func__));
3662 continue;
3663 }
3664 if (mode == MCAST_EXCLUDE &&
3665 now == MCAST_INCLUDE) {
3666 IGMP_PRINTF(("%s: skip IN src on EX "
3667 "group\n", __func__));
3668 continue;
3669 }
3670 nrt = (rectype_t)now;
3671 if (nrt == REC_NONE) {
3672 nrt = (rectype_t)(~mode & REC_FULL);
3673 }
3674 if (schanged++ == 0) {
3675 crt = nrt;
3676 } else if (crt != nrt) {
3677 continue;
3678 }
3679 naddr = htonl(ims->ims_haddr);
3680 if (!m_append(m, sizeof(in_addr_t),
3681 (void *)&naddr)) {
3682 if (m != m0) {
3683 m_freem(m);
3684 }
3685 IGMP_PRINTF(("%s: m_append() failed\n",
3686 __func__));
3687 return -ENOMEM;
3688 }
3689 nallow += !!(crt == REC_ALLOW);
3690 nblock += !!(crt == REC_BLOCK);
3691 if (++rsrcs == m0srcs) {
3692 break;
3693 }
3694 }
3695 /*
3696 * If we did not append any tree nodes on this
3697 * pass, back out of allocations.
3698 */
3699 if (rsrcs == 0) {
3700 npbytes -= sizeof(struct igmp_grouprec);
3701 if (m != m0) {
3702 IGMP_PRINTF(("%s: m_free(m)\n",
3703 __func__));
3704 m_freem(m);
3705 } else {
3706 IGMP_PRINTF(("%s: m_adj(m, -ig)\n",
3707 __func__));
3708 m_adj(m, -((int)sizeof(
3709 struct igmp_grouprec)));
3710 }
3711 continue;
3712 }
3713 npbytes += (rsrcs * sizeof(in_addr_t));
3714 if (crt == REC_ALLOW) {
3715 pig->ig_type = IGMP_ALLOW_NEW_SOURCES;
3716 } else if (crt == REC_BLOCK) {
3717 pig->ig_type = IGMP_BLOCK_OLD_SOURCES;
3718 }
3719 ig_numsrc = htons(rsrcs);
3720 bcopy(&ig_numsrc, &pig->ig_numsrc, sizeof(ig_numsrc));
3721 /*
3722 * Count the new group record, and enqueue this
3723 * packet if it wasn't already queued.
3724 */
3725 m->m_pkthdr.vt_nrecs++;
3726 if (m != m0) {
3727 IF_ENQUEUE(ifq, m);
3728 }
3729 nbytes += npbytes;
3730 } while (nims != NULL);
3731 drt |= crt;
3732 crt = (~crt & REC_FULL);
3733 }
3734
3735 IGMP_PRINTF(("%s: queued %d ALLOW_NEW, %d BLOCK_OLD\n", __func__,
3736 nallow, nblock));
3737
3738 return nbytes;
3739 }
3740
3741 static int
igmp_v3_merge_state_changes(struct in_multi * inm,struct ifqueue * ifscq)3742 igmp_v3_merge_state_changes(struct in_multi *inm, struct ifqueue *ifscq)
3743 {
3744 struct ifqueue *gq;
3745 struct mbuf *m; /* pending state-change */
3746 struct mbuf *m0; /* copy of pending state-change */
3747 struct mbuf *mt; /* last state-change in packet */
3748 struct mbuf *n;
3749 int docopy, domerge;
3750 u_int recslen;
3751
3752 INM_LOCK_ASSERT_HELD(inm);
3753
3754 docopy = 0;
3755 domerge = 0;
3756 recslen = 0;
3757
3758 /*
3759 * If there are further pending retransmissions, make a writable
3760 * copy of each queued state-change message before merging.
3761 */
3762 if (inm->inm_scrv > 0) {
3763 docopy = 1;
3764 }
3765
3766 gq = &inm->inm_scq;
3767 #ifdef IGMP_DEBUG
3768 if (gq->ifq_head == NULL) {
3769 IGMP_PRINTF(("%s: WARNING: queue for inm 0x%llx is empty\n",
3770 __func__, (uint64_t)VM_KERNEL_ADDRPERM(inm)));
3771 }
3772 #endif
3773
3774 /*
3775 * Use IF_REMQUEUE() instead of IF_DEQUEUE() below, since the
3776 * packet might not always be at the head of the ifqueue.
3777 */
3778 m = gq->ifq_head;
3779 while (m != NULL) {
3780 /*
3781 * Only merge the report into the current packet if
3782 * there is sufficient space to do so; an IGMPv3 report
3783 * packet may only contain 65,535 group records.
3784 * Always use a simple mbuf chain concatentation to do this,
3785 * as large state changes for single groups may have
3786 * allocated clusters.
3787 */
3788 domerge = 0;
3789 mt = ifscq->ifq_tail;
3790 if (mt != NULL) {
3791 recslen = m_length(m);
3792
3793 if ((mt->m_pkthdr.vt_nrecs +
3794 m->m_pkthdr.vt_nrecs <=
3795 IGMP_V3_REPORT_MAXRECS) &&
3796 (mt->m_pkthdr.len + recslen <=
3797 (inm->inm_ifp->if_mtu - IGMP_LEADINGSPACE))) {
3798 domerge = 1;
3799 }
3800 }
3801
3802 if (!domerge && IF_QFULL(gq)) {
3803 IGMP_PRINTF(("%s: outbound queue full, skipping whole "
3804 "packet 0x%llx\n", __func__,
3805 (uint64_t)VM_KERNEL_ADDRPERM(m)));
3806 n = m->m_nextpkt;
3807 if (!docopy) {
3808 IF_REMQUEUE(gq, m);
3809 m_freem(m);
3810 }
3811 m = n;
3812 continue;
3813 }
3814
3815 if (!docopy) {
3816 IGMP_PRINTF(("%s: dequeueing 0x%llx\n", __func__,
3817 (uint64_t)VM_KERNEL_ADDRPERM(m)));
3818 n = m->m_nextpkt;
3819 IF_REMQUEUE(gq, m);
3820 m0 = m;
3821 m = n;
3822 } else {
3823 IGMP_PRINTF(("%s: copying 0x%llx\n", __func__,
3824 (uint64_t)VM_KERNEL_ADDRPERM(m)));
3825 m0 = m_dup(m, M_NOWAIT);
3826 if (m0 == NULL) {
3827 return ENOMEM;
3828 }
3829 m0->m_nextpkt = NULL;
3830 m = m->m_nextpkt;
3831 }
3832
3833 if (!domerge) {
3834 IGMP_PRINTF(("%s: queueing 0x%llx to ifscq 0x%llx)\n",
3835 __func__, (uint64_t)VM_KERNEL_ADDRPERM(m0),
3836 (uint64_t)VM_KERNEL_ADDRPERM(ifscq)));
3837 IF_ENQUEUE(ifscq, m0);
3838 } else {
3839 struct mbuf *mtl; /* last mbuf of packet mt */
3840
3841 IGMP_PRINTF(("%s: merging 0x%llx with ifscq tail "
3842 "0x%llx)\n", __func__,
3843 (uint64_t)VM_KERNEL_ADDRPERM(m0),
3844 (uint64_t)VM_KERNEL_ADDRPERM(mt)));
3845
3846 mtl = m_last(mt);
3847 m0->m_flags &= ~M_PKTHDR;
3848 mt->m_pkthdr.len += recslen;
3849 mt->m_pkthdr.vt_nrecs +=
3850 m0->m_pkthdr.vt_nrecs;
3851
3852 mtl->m_next = m0;
3853 }
3854 }
3855
3856 return 0;
3857 }
3858
3859 /*
3860 * Respond to a pending IGMPv3 General Query.
3861 */
3862 static uint32_t
igmp_v3_dispatch_general_query(struct igmp_ifinfo * igi)3863 igmp_v3_dispatch_general_query(struct igmp_ifinfo *igi)
3864 {
3865 struct ifnet *ifp;
3866 struct in_multi *inm;
3867 struct in_multistep step;
3868 int retval, loop;
3869
3870 IGI_LOCK_ASSERT_HELD(igi);
3871
3872 VERIFY(igi->igi_version == IGMP_VERSION_3);
3873
3874 ifp = igi->igi_ifp;
3875 IGI_UNLOCK(igi);
3876
3877 in_multihead_lock_shared();
3878 IN_FIRST_MULTI(step, inm);
3879 while (inm != NULL) {
3880 INM_LOCK(inm);
3881 if (inm->inm_ifp != ifp) {
3882 goto next;
3883 }
3884
3885 switch (inm->inm_state) {
3886 case IGMP_NOT_MEMBER:
3887 case IGMP_SILENT_MEMBER:
3888 break;
3889 case IGMP_REPORTING_MEMBER:
3890 case IGMP_IDLE_MEMBER:
3891 case IGMP_LAZY_MEMBER:
3892 case IGMP_SLEEPING_MEMBER:
3893 case IGMP_AWAKENING_MEMBER:
3894 inm->inm_state = IGMP_REPORTING_MEMBER;
3895 IGI_LOCK(igi);
3896 retval = igmp_v3_enqueue_group_record(&igi->igi_gq,
3897 inm, 0, 0, 0);
3898 IGI_UNLOCK(igi);
3899 IGMP_PRINTF(("%s: enqueue record = %d\n",
3900 __func__, retval));
3901 break;
3902 case IGMP_G_QUERY_PENDING_MEMBER:
3903 case IGMP_SG_QUERY_PENDING_MEMBER:
3904 case IGMP_LEAVING_MEMBER:
3905 break;
3906 }
3907 next:
3908 INM_UNLOCK(inm);
3909 IN_NEXT_MULTI(step, inm);
3910 }
3911 in_multihead_lock_done();
3912
3913 IGI_LOCK(igi);
3914 loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
3915 igmp_dispatch_queue(igi, &igi->igi_gq, IGMP_MAX_RESPONSE_BURST,
3916 loop);
3917 IGI_LOCK_ASSERT_HELD(igi);
3918 /*
3919 * Slew transmission of bursts over 1 second intervals.
3920 */
3921 if (igi->igi_gq.ifq_head != NULL) {
3922 igi->igi_v3_timer = 1 + IGMP_RANDOM_DELAY(
3923 IGMP_RESPONSE_BURST_INTERVAL);
3924 }
3925
3926 return igi->igi_v3_timer;
3927 }
3928
3929 /*
3930 * Transmit the next pending IGMP message in the output queue.
3931 *
3932 * Must not be called with inm_lock or igi_lock held.
3933 */
3934 static void
igmp_sendpkt(struct mbuf * m)3935 igmp_sendpkt(struct mbuf *m)
3936 {
3937 struct ip_moptions *imo;
3938 struct mbuf *ipopts, *m0;
3939 int error;
3940 struct route ro;
3941 struct ifnet *ifp;
3942
3943 IGMP_PRINTF(("%s: transmit 0x%llx\n", __func__,
3944 (uint64_t)VM_KERNEL_ADDRPERM(m)));
3945
3946 ifp = igmp_restore_context(m);
3947 /*
3948 * Check if the ifnet is still attached.
3949 */
3950 if (ifp == NULL || !ifnet_is_attached(ifp, 0)) {
3951 IGMP_PRINTF(("%s: dropped 0x%llx as ifp went away.\n",
3952 __func__, (uint64_t)VM_KERNEL_ADDRPERM(m)));
3953 m_freem(m);
3954 OSAddAtomic(1, &ipstat.ips_noroute);
3955 return;
3956 }
3957
3958 ipopts = igmp_sendra ? m_raopt : NULL;
3959
3960 imo = ip_allocmoptions(Z_WAITOK);
3961 if (imo == NULL) {
3962 m_freem(m);
3963 return;
3964 }
3965
3966 imo->imo_multicast_ttl = 1;
3967 imo->imo_multicast_vif = -1;
3968 imo->imo_multicast_loop = 0;
3969
3970 /*
3971 * If the user requested that IGMP traffic be explicitly
3972 * redirected to the loopback interface (e.g. they are running a
3973 * MANET interface and the routing protocol needs to see the
3974 * updates), handle this now.
3975 */
3976 if (m->m_flags & M_IGMP_LOOP) {
3977 imo->imo_multicast_ifp = lo_ifp;
3978 } else {
3979 imo->imo_multicast_ifp = ifp;
3980 }
3981
3982 if (m->m_flags & M_IGMPV2) {
3983 m0 = m;
3984 } else {
3985 m0 = igmp_v3_encap_report(ifp, m);
3986 if (m0 == NULL) {
3987 /*
3988 * If igmp_v3_encap_report() failed, then M_PREPEND()
3989 * already freed the original mbuf chain.
3990 * This means that we don't have to m_freem(m) here.
3991 */
3992 IGMP_PRINTF(("%s: dropped 0x%llx\n", __func__,
3993 (uint64_t)VM_KERNEL_ADDRPERM(m)));
3994 IMO_REMREF(imo);
3995 atomic_add_32(&ipstat.ips_odropped, 1);
3996 return;
3997 }
3998 }
3999
4000 igmp_scrub_context(m0);
4001 m->m_flags &= ~(M_PROTOFLAGS | M_IGMP_LOOP);
4002 m0->m_pkthdr.rcvif = lo_ifp;
4003
4004 if (ifp->if_eflags & IFEF_TXSTART) {
4005 /*
4006 * Use control service class if the interface supports
4007 * transmit-start model.
4008 */
4009 (void) m_set_service_class(m0, MBUF_SC_CTL);
4010 }
4011 bzero(&ro, sizeof(ro));
4012 error = ip_output(m0, ipopts, &ro, 0, imo, NULL);
4013 ROUTE_RELEASE(&ro);
4014
4015 IMO_REMREF(imo);
4016
4017 if (error) {
4018 IGMP_PRINTF(("%s: ip_output(0x%llx) = %d\n", __func__,
4019 (uint64_t)VM_KERNEL_ADDRPERM(m0), error));
4020 return;
4021 }
4022
4023 IGMPSTAT_INC(igps_snd_reports);
4024 OIGMPSTAT_INC(igps_snd_reports);
4025 }
4026 /*
4027 * Encapsulate an IGMPv3 report.
4028 *
4029 * The internal mbuf flag M_IGMPV3_HDR is used to indicate that the mbuf
4030 * chain has already had its IP/IGMPv3 header prepended. In this case
4031 * the function will not attempt to prepend; the lengths and checksums
4032 * will however be re-computed.
4033 *
4034 * Returns a pointer to the new mbuf chain head, or NULL if the
4035 * allocation failed.
4036 */
4037 static struct mbuf *
igmp_v3_encap_report(struct ifnet * ifp,struct mbuf * m)4038 igmp_v3_encap_report(struct ifnet *ifp, struct mbuf *m)
4039 {
4040 struct igmp_report *igmp;
4041 struct ip *ip;
4042 unsigned int hdrlen, igmpreclen;
4043
4044 VERIFY((m->m_flags & M_PKTHDR));
4045
4046 igmpreclen = m_length(m);
4047 hdrlen = sizeof(struct ip) + sizeof(struct igmp_report);
4048
4049 if (m->m_flags & M_IGMPV3_HDR) {
4050 igmpreclen -= hdrlen;
4051 } else {
4052 M_PREPEND(m, hdrlen, M_DONTWAIT, 1);
4053 if (m == NULL) {
4054 return NULL;
4055 }
4056 m->m_flags |= M_IGMPV3_HDR;
4057 }
4058 if (hdrlen + igmpreclen > USHRT_MAX) {
4059 IGMP_PRINTF(("%s: invalid length %d\n", __func__, hdrlen + igmpreclen));
4060 m_freem(m);
4061 return NULL;
4062 }
4063
4064
4065 IGMP_PRINTF(("%s: igmpreclen is %d\n", __func__, igmpreclen));
4066
4067 m->m_data += sizeof(struct ip);
4068 m->m_len -= sizeof(struct ip);
4069
4070 igmp = mtod(m, struct igmp_report *);
4071 igmp->ir_type = IGMP_v3_HOST_MEMBERSHIP_REPORT;
4072 igmp->ir_rsv1 = 0;
4073 igmp->ir_rsv2 = 0;
4074 igmp->ir_numgrps = htons(m->m_pkthdr.vt_nrecs);
4075 igmp->ir_cksum = 0;
4076 igmp->ir_cksum = in_cksum(m, sizeof(struct igmp_report) + igmpreclen);
4077 m->m_pkthdr.vt_nrecs = 0;
4078
4079 m->m_data -= sizeof(struct ip);
4080 m->m_len += sizeof(struct ip);
4081
4082 ip = mtod(m, struct ip *);
4083 ip->ip_tos = IPTOS_PREC_INTERNETCONTROL;
4084 ip->ip_len = (u_short)(hdrlen + igmpreclen);
4085 ip->ip_off = IP_DF;
4086 ip->ip_p = IPPROTO_IGMP;
4087 ip->ip_sum = 0;
4088
4089 ip->ip_src.s_addr = INADDR_ANY;
4090
4091 if (m->m_flags & M_IGMP_LOOP) {
4092 struct in_ifaddr *ia;
4093
4094 IFP_TO_IA(ifp, ia);
4095 if (ia != NULL) {
4096 IFA_LOCK(&ia->ia_ifa);
4097 ip->ip_src = ia->ia_addr.sin_addr;
4098 IFA_UNLOCK(&ia->ia_ifa);
4099 IFA_REMREF(&ia->ia_ifa);
4100 }
4101 }
4102
4103 ip->ip_dst.s_addr = htonl(INADDR_ALLRPTS_GROUP);
4104
4105 return m;
4106 }
4107
4108 #ifdef IGMP_DEBUG
4109 static const char *
igmp_rec_type_to_str(const int type)4110 igmp_rec_type_to_str(const int type)
4111 {
4112 switch (type) {
4113 case IGMP_CHANGE_TO_EXCLUDE_MODE:
4114 return "TO_EX";
4115 case IGMP_CHANGE_TO_INCLUDE_MODE:
4116 return "TO_IN";
4117 case IGMP_MODE_IS_EXCLUDE:
4118 return "MODE_EX";
4119 case IGMP_MODE_IS_INCLUDE:
4120 return "MODE_IN";
4121 case IGMP_ALLOW_NEW_SOURCES:
4122 return "ALLOW_NEW";
4123 case IGMP_BLOCK_OLD_SOURCES:
4124 return "BLOCK_OLD";
4125 default:
4126 break;
4127 }
4128 return "unknown";
4129 }
4130 #endif
4131
4132 void
igmp_init(struct protosw * pp,struct domain * dp)4133 igmp_init(struct protosw *pp, struct domain *dp)
4134 {
4135 #pragma unused(dp)
4136 static int igmp_initialized = 0;
4137
4138 VERIFY((pp->pr_flags & (PR_INITIALIZED | PR_ATTACHED)) == PR_ATTACHED);
4139
4140 if (igmp_initialized) {
4141 return;
4142 }
4143 igmp_initialized = 1;
4144
4145 IGMP_PRINTF(("%s: initializing\n", __func__));
4146
4147 igmp_timers_are_running = 0;
4148
4149 LIST_INIT(&igi_head);
4150 m_raopt = igmp_ra_alloc();
4151 }
4152