1 /*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*-
29 * Copyright (c) 2007-2009 Bruce Simpson.
30 * Copyright (c) 1988 Stephen Deering.
31 * Copyright (c) 1992, 1993
32 * The Regents of the University of California. All rights reserved.
33 *
34 * This code is derived from software contributed to Berkeley by
35 * Stephen Deering of Stanford University.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)igmp.c 8.1 (Berkeley) 7/19/93
66 */
67 /*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
73
74 /*
75 * Internet Group Management Protocol (IGMP) routines.
76 * [RFC1112, RFC2236, RFC3376]
77 *
78 * Written by Steve Deering, Stanford, May 1988.
79 * Modified by Rosen Sharma, Stanford, Aug 1994.
80 * Modified by Bill Fenner, Xerox PARC, Feb 1995.
81 * Modified to fully comply to IGMPv2 by Bill Fenner, Oct 1995.
82 * Significantly rewritten for IGMPv3, VIMAGE, and SMP by Bruce Simpson.
83 *
84 * MULTICAST Revision: 3.5.1.4
85 */
86
87 #include <sys/cdefs.h>
88
89 #include <sys/param.h>
90 #include <sys/systm.h>
91 #include <sys/malloc.h>
92 #include <sys/mbuf.h>
93 #include <sys/socket.h>
94 #include <sys/protosw.h>
95 #include <sys/kernel.h>
96 #include <sys/sysctl.h>
97 #include <sys/mcache.h>
98
99 #include <libkern/libkern.h>
100 #include <kern/zalloc.h>
101
102 #include <net/if.h>
103 #include <net/route.h>
104
105 #include <netinet/in.h>
106 #include <netinet/in_var.h>
107 #include <netinet/in_systm.h>
108 #include <netinet/ip.h>
109 #include <netinet/ip_var.h>
110 #include <netinet/igmp.h>
111 #include <netinet/igmp_var.h>
112 #include <netinet/kpi_ipfilter_var.h>
113
114 #if SKYWALK
115 #include <skywalk/core/skywalk_var.h>
116 #endif /* SKYWALK */
117
118 SLIST_HEAD(igmp_inm_relhead, in_multi);
119
120 static void igi_initvar(struct igmp_ifinfo *, struct ifnet *, int);
121 static struct igmp_ifinfo *igi_alloc(zalloc_flags_t);
122 static void igi_free(struct igmp_ifinfo *);
123 static void igi_delete(const struct ifnet *, struct igmp_inm_relhead *);
124 static void igmp_dispatch_queue(struct igmp_ifinfo *, struct ifqueue *,
125 int, const int);
126 static void igmp_final_leave(struct in_multi *, struct igmp_ifinfo *,
127 struct igmp_tparams *);
128 static int igmp_handle_state_change(struct in_multi *,
129 struct igmp_ifinfo *, struct igmp_tparams *);
130 static int igmp_initial_join(struct in_multi *, struct igmp_ifinfo *,
131 struct igmp_tparams *);
132 static int igmp_input_v1_query(struct ifnet *, const struct ip *,
133 const struct igmp *);
134 static int igmp_input_v2_query(struct ifnet *, const struct ip *,
135 const struct igmp *);
136 static int igmp_input_v3_query(struct ifnet *, const struct ip *,
137 /*const*/ struct igmpv3 *);
138 static int igmp_input_v3_group_query(struct in_multi *,
139 int, /*const*/ struct igmpv3 *);
140 static int igmp_input_v1_report(struct ifnet *, struct mbuf *,
141 /*const*/ struct ip *, /*const*/ struct igmp *);
142 static int igmp_input_v2_report(struct ifnet *, struct mbuf *,
143 /*const*/ struct ip *, /*const*/ struct igmp *);
144 static void igmp_sendpkt(struct mbuf *);
145 static __inline__ int igmp_isgroupreported(const struct in_addr);
146 static struct mbuf *igmp_ra_alloc(void);
147 #ifdef IGMP_DEBUG
148 static const char *igmp_rec_type_to_str(const int);
149 #endif
150 static uint32_t igmp_set_version(struct igmp_ifinfo *, const int);
151 static void igmp_flush_relq(struct igmp_ifinfo *,
152 struct igmp_inm_relhead *);
153 static int igmp_v1v2_queue_report(struct in_multi *, const int);
154 static void igmp_v1v2_process_group_timer(struct in_multi *, const int);
155 static void igmp_v1v2_process_querier_timers(struct igmp_ifinfo *);
156 static uint32_t igmp_v2_update_group(struct in_multi *, const int);
157 static void igmp_v3_cancel_link_timers(struct igmp_ifinfo *);
158 static uint32_t igmp_v3_dispatch_general_query(struct igmp_ifinfo *);
159 static struct mbuf *
160 igmp_v3_encap_report(struct ifnet *, struct mbuf *);
161 static int igmp_v3_enqueue_group_record(struct ifqueue *,
162 struct in_multi *, const int, const int, const int);
163 static int igmp_v3_enqueue_filter_change(struct ifqueue *,
164 struct in_multi *);
165 static void igmp_v3_process_group_timers(struct igmp_ifinfo *,
166 struct ifqueue *, struct ifqueue *, struct in_multi *,
167 const unsigned int);
168 static int igmp_v3_merge_state_changes(struct in_multi *,
169 struct ifqueue *);
170 static void igmp_v3_suppress_group_record(struct in_multi *);
171 static int sysctl_igmp_ifinfo SYSCTL_HANDLER_ARGS;
172 static int sysctl_igmp_gsr SYSCTL_HANDLER_ARGS;
173 static int sysctl_igmp_default_version SYSCTL_HANDLER_ARGS;
174
175 static const uint32_t igmp_timeout_delay = 1000; /* in milliseconds */
176 static const uint32_t igmp_timeout_leeway = 500; /* in millseconds */
177 static bool igmp_timeout_run; /* IGMP timer is scheduled to run */
178 static bool igmp_fast_timeout_run; /* IGMP fast timer is scheduled to run */
179 static void igmp_timeout(thread_call_param_t, thread_call_param_t);
180 static void igmp_sched_timeout(void);
181 static void igmp_sched_fast_timeout(void);
182
183 static struct mbuf *m_raopt; /* Router Alert option */
184
185 static int querier_present_timers_running; /* IGMPv1/v2 older version
186 * querier present */
187 static int interface_timers_running; /* IGMPv3 general
188 * query response */
189 static int state_change_timers_running; /* IGMPv3 state-change
190 * retransmit */
191 static int current_state_timers_running; /* IGMPv1/v2 host
192 * report; IGMPv3 g/sg
193 * query response */
194
195 /*
196 * Subsystem lock macros.
197 */
198 #define IGMP_LOCK() \
199 lck_mtx_lock(&igmp_mtx)
200 #define IGMP_LOCK_ASSERT_HELD() \
201 LCK_MTX_ASSERT(&igmp_mtx, LCK_MTX_ASSERT_OWNED)
202 #define IGMP_LOCK_ASSERT_NOTHELD() \
203 LCK_MTX_ASSERT(&igmp_mtx, LCK_MTX_ASSERT_NOTOWNED)
204 #define IGMP_UNLOCK() \
205 lck_mtx_unlock(&igmp_mtx)
206
207 static LIST_HEAD(, igmp_ifinfo) igi_head;
208 static struct igmpstat_v3 igmpstat_v3 = {
209 .igps_version = IGPS_VERSION_3,
210 .igps_len = sizeof(struct igmpstat_v3),
211 };
212 static struct igmpstat igmpstat; /* old IGMPv2 stats structure */
213 static struct timeval igmp_gsrdelay = {.tv_sec = 10, .tv_usec = 0};
214
215 static int igmp_recvifkludge = 1;
216 static int igmp_sendra = 1;
217 static int igmp_sendlocal = 1;
218 static int igmp_v1enable = 1;
219 static int igmp_v2enable = 1;
220 static int igmp_legacysupp = 0;
221 static int igmp_default_version = IGMP_VERSION_3;
222
223 SYSCTL_STRUCT(_net_inet_igmp, IGMPCTL_STATS, stats, CTLFLAG_RD | CTLFLAG_LOCKED,
224 &igmpstat, igmpstat, "");
225 SYSCTL_STRUCT(_net_inet_igmp, OID_AUTO, v3stats,
226 CTLFLAG_RD | CTLFLAG_LOCKED, &igmpstat_v3, igmpstat_v3, "");
227 SYSCTL_INT(_net_inet_igmp, OID_AUTO, recvifkludge, CTLFLAG_RW | CTLFLAG_LOCKED,
228 &igmp_recvifkludge, 0,
229 "Rewrite IGMPv1/v2 reports from 0.0.0.0 to contain subnet address");
230 SYSCTL_INT(_net_inet_igmp, OID_AUTO, sendra, CTLFLAG_RW | CTLFLAG_LOCKED,
231 &igmp_sendra, 0,
232 "Send IP Router Alert option in IGMPv2/v3 messages");
233 SYSCTL_INT(_net_inet_igmp, OID_AUTO, sendlocal, CTLFLAG_RW | CTLFLAG_LOCKED,
234 &igmp_sendlocal, 0,
235 "Send IGMP membership reports for 224.0.0.0/24 groups");
236 SYSCTL_INT(_net_inet_igmp, OID_AUTO, v1enable, CTLFLAG_RW | CTLFLAG_LOCKED,
237 &igmp_v1enable, 0,
238 "Enable backwards compatibility with IGMPv1");
239 SYSCTL_INT(_net_inet_igmp, OID_AUTO, v2enable, CTLFLAG_RW | CTLFLAG_LOCKED,
240 &igmp_v2enable, 0,
241 "Enable backwards compatibility with IGMPv2");
242 SYSCTL_INT(_net_inet_igmp, OID_AUTO, legacysupp, CTLFLAG_RW | CTLFLAG_LOCKED,
243 &igmp_legacysupp, 0,
244 "Allow v1/v2 reports to suppress v3 group responses");
245 SYSCTL_PROC(_net_inet_igmp, OID_AUTO, default_version,
246 CTLTYPE_INT | CTLFLAG_RW,
247 &igmp_default_version, 0, sysctl_igmp_default_version, "I",
248 "Default version of IGMP to run on each interface");
249 SYSCTL_PROC(_net_inet_igmp, OID_AUTO, gsrdelay,
250 CTLTYPE_INT | CTLFLAG_RW,
251 &igmp_gsrdelay.tv_sec, 0, sysctl_igmp_gsr, "I",
252 "Rate limit for IGMPv3 Group-and-Source queries in seconds");
253 #ifdef IGMP_DEBUG
254 int igmp_debug = 0;
255 SYSCTL_INT(_net_inet_igmp, OID_AUTO,
256 debug, CTLFLAG_RW | CTLFLAG_LOCKED, &igmp_debug, 0, "");
257 #endif
258
259 SYSCTL_NODE(_net_inet_igmp, OID_AUTO, ifinfo, CTLFLAG_RD | CTLFLAG_LOCKED,
260 sysctl_igmp_ifinfo, "Per-interface IGMPv3 state");
261
262 /* Lock group and attribute for igmp_mtx */
263 static LCK_ATTR_DECLARE(igmp_mtx_attr, 0, 0);
264 static LCK_GRP_DECLARE(igmp_mtx_grp, "igmp_mtx");
265
266 /*
267 * Locking and reference counting:
268 *
269 * igmp_mtx mainly protects igi_head. In cases where both igmp_mtx and
270 * in_multihead_lock must be held, the former must be acquired first in order
271 * to maintain lock ordering. It is not a requirement that igmp_mtx be
272 * acquired first before in_multihead_lock, but in case both must be acquired
273 * in succession, the correct lock ordering must be followed.
274 *
275 * Instead of walking the if_multiaddrs list at the interface and returning
276 * the ifma_protospec value of a matching entry, we search the global list
277 * of in_multi records and find it that way; this is done with in_multihead
278 * lock held. Doing so avoids the race condition issues that many other BSDs
279 * suffer from (therefore in our implementation, ifma_protospec will never be
280 * NULL for as long as the in_multi is valid.)
281 *
282 * The above creates a requirement for the in_multi to stay in in_multihead
283 * list even after the final IGMP leave (in IGMPv3 mode) until no longer needs
284 * be retransmitted (this is not required for IGMPv1/v2.) In order to handle
285 * this, the request and reference counts of the in_multi are bumped up when
286 * the state changes to IGMP_LEAVING_MEMBER, and later dropped in the timeout
287 * handler. Each in_multi holds a reference to the underlying igmp_ifinfo.
288 *
289 * Thus, the permitted lock oder is:
290 *
291 * igmp_mtx, in_multihead_lock, inm_lock, igi_lock
292 *
293 * Any may be taken independently, but if any are held at the same time,
294 * the above lock order must be followed.
295 */
296 static LCK_MTX_DECLARE_ATTR(igmp_mtx, &igmp_mtx_grp, &igmp_mtx_attr);
297 static int igmp_timers_are_running;
298
299 #define IGMP_ADD_DETACHED_INM(_head, _inm) { \
300 SLIST_INSERT_HEAD(_head, _inm, inm_dtle); \
301 }
302
303 #define IGMP_REMOVE_DETACHED_INM(_head) { \
304 struct in_multi *_inm, *_inm_tmp; \
305 SLIST_FOREACH_SAFE(_inm, _head, inm_dtle, _inm_tmp) { \
306 SLIST_REMOVE(_head, _inm, in_multi, inm_dtle); \
307 INM_REMREF(_inm); \
308 } \
309 VERIFY(SLIST_EMPTY(_head)); \
310 }
311
312 static KALLOC_TYPE_DEFINE(igi_zone, struct igmp_ifinfo, NET_KT_DEFAULT);
313
314 /* Store IGMPv3 record count in the module private scratch space */
315 #define vt_nrecs pkt_mpriv.__mpriv_u.__mpriv32[0].__mpriv32_u.__val16[0]
316
317 static __inline void
igmp_save_context(struct mbuf * m,struct ifnet * ifp)318 igmp_save_context(struct mbuf *m, struct ifnet *ifp)
319 {
320 m->m_pkthdr.rcvif = ifp;
321 }
322
323 static __inline void
igmp_scrub_context(struct mbuf * m)324 igmp_scrub_context(struct mbuf *m)
325 {
326 m->m_pkthdr.rcvif = NULL;
327 }
328
329 #ifdef IGMP_DEBUG
330 static __inline const char *
inet_ntop_haddr(in_addr_t haddr,char * buf,socklen_t size)331 inet_ntop_haddr(in_addr_t haddr, char *buf, socklen_t size)
332 {
333 struct in_addr ia;
334
335 ia.s_addr = htonl(haddr);
336 return inet_ntop(AF_INET, &ia, buf, size);
337 }
338 #endif
339
340 /*
341 * Restore context from a queued IGMP output chain.
342 * Return saved ifp.
343 */
344 static __inline struct ifnet *
igmp_restore_context(struct mbuf * m)345 igmp_restore_context(struct mbuf *m)
346 {
347 return m->m_pkthdr.rcvif;
348 }
349
350 /*
351 * Retrieve or set default IGMP version.
352 */
353 static int
354 sysctl_igmp_default_version SYSCTL_HANDLER_ARGS
355 {
356 #pragma unused(oidp, arg2)
357 int error;
358 int new;
359
360 IGMP_LOCK();
361
362 error = SYSCTL_OUT(req, arg1, sizeof(int));
363 if (error || !req->newptr) {
364 goto out_locked;
365 }
366
367 new = igmp_default_version;
368
369 error = SYSCTL_IN(req, &new, sizeof(int));
370 if (error) {
371 goto out_locked;
372 }
373
374 if (new < IGMP_VERSION_1 || new > IGMP_VERSION_3) {
375 error = EINVAL;
376 goto out_locked;
377 }
378
379 IGMP_PRINTF(("%s: change igmp_default_version from %d to %d\n",
380 __func__, igmp_default_version, new));
381
382 igmp_default_version = new;
383
384 out_locked:
385 IGMP_UNLOCK();
386 return error;
387 }
388
389 /*
390 * Retrieve or set threshold between group-source queries in seconds.
391 *
392 */
393 static int
394 sysctl_igmp_gsr SYSCTL_HANDLER_ARGS
395 {
396 #pragma unused(arg1, arg2)
397 int error;
398 int i;
399
400 IGMP_LOCK();
401
402 i = (int)igmp_gsrdelay.tv_sec;
403
404 error = sysctl_handle_int(oidp, &i, 0, req);
405 if (error || !req->newptr) {
406 goto out_locked;
407 }
408
409 if (i < -1 || i >= 60) {
410 error = EINVAL;
411 goto out_locked;
412 }
413
414 igmp_gsrdelay.tv_sec = i;
415
416 out_locked:
417 IGMP_UNLOCK();
418 return error;
419 }
420
421 /*
422 * Expose struct igmp_ifinfo to userland, keyed by ifindex.
423 * For use by ifmcstat(8).
424 *
425 */
426 static int
427 sysctl_igmp_ifinfo SYSCTL_HANDLER_ARGS
428 {
429 #pragma unused(oidp)
430 int *name;
431 int error;
432 u_int namelen;
433 struct ifnet *ifp;
434 struct igmp_ifinfo *igi;
435 struct igmp_ifinfo_u igi_u;
436
437 name = (int *)arg1;
438 namelen = arg2;
439
440 if (req->newptr != USER_ADDR_NULL) {
441 return EPERM;
442 }
443
444 if (namelen != 1) {
445 return EINVAL;
446 }
447
448 IGMP_LOCK();
449
450 if (name[0] <= 0 || name[0] > (u_int)if_index) {
451 error = ENOENT;
452 goto out_locked;
453 }
454
455 error = ENOENT;
456
457 ifnet_head_lock_shared();
458 ifp = ifindex2ifnet[name[0]];
459 ifnet_head_done();
460 if (ifp == NULL) {
461 goto out_locked;
462 }
463
464 bzero(&igi_u, sizeof(igi_u));
465
466 LIST_FOREACH(igi, &igi_head, igi_link) {
467 IGI_LOCK(igi);
468 if (ifp != igi->igi_ifp) {
469 IGI_UNLOCK(igi);
470 continue;
471 }
472 igi_u.igi_ifindex = igi->igi_ifp->if_index;
473 igi_u.igi_version = igi->igi_version;
474 igi_u.igi_v1_timer = igi->igi_v1_timer;
475 igi_u.igi_v2_timer = igi->igi_v2_timer;
476 igi_u.igi_v3_timer = igi->igi_v3_timer;
477 igi_u.igi_flags = igi->igi_flags;
478 igi_u.igi_rv = igi->igi_rv;
479 igi_u.igi_qi = igi->igi_qi;
480 igi_u.igi_qri = igi->igi_qri;
481 igi_u.igi_uri = igi->igi_uri;
482 IGI_UNLOCK(igi);
483
484 error = SYSCTL_OUT(req, &igi_u, sizeof(igi_u));
485 break;
486 }
487
488 out_locked:
489 IGMP_UNLOCK();
490 return error;
491 }
492
493 /*
494 * Dispatch an entire queue of pending packet chains
495 *
496 * Must not be called with inm_lock held.
497 */
498 static void
igmp_dispatch_queue(struct igmp_ifinfo * igi,struct ifqueue * ifq,int limit,const int loop)499 igmp_dispatch_queue(struct igmp_ifinfo *igi, struct ifqueue *ifq, int limit,
500 const int loop)
501 {
502 struct mbuf *m;
503 struct ip *ip;
504
505 if (igi != NULL) {
506 IGI_LOCK_ASSERT_HELD(igi);
507 }
508
509 #if SKYWALK
510 /*
511 * Since this function is called holding the igi lock, we need to ensure we
512 * don't enter the driver directly because a deadlock can happen if another
513 * thread holding the workloop lock tries to acquire the igi lock at
514 * the same time.
515 */
516 sk_protect_t protect = sk_async_transmit_protect();
517 #endif /* SKYWALK */
518
519 for (;;) {
520 IF_DEQUEUE(ifq, m);
521 if (m == NULL) {
522 break;
523 }
524 IGMP_PRINTF(("%s: dispatch 0x%llx from 0x%llx\n", __func__,
525 (uint64_t)VM_KERNEL_ADDRPERM(ifq),
526 (uint64_t)VM_KERNEL_ADDRPERM(m)));
527 ip = mtod(m, struct ip *);
528 if (loop) {
529 m->m_flags |= M_IGMP_LOOP;
530 }
531 if (igi != NULL) {
532 IGI_UNLOCK(igi);
533 }
534 igmp_sendpkt(m);
535 if (igi != NULL) {
536 IGI_LOCK(igi);
537 }
538 if (--limit == 0) {
539 break;
540 }
541 }
542
543 #if SKYWALK
544 sk_async_transmit_unprotect(protect);
545 #endif /* SKYWALK */
546
547 if (igi != NULL) {
548 IGI_LOCK_ASSERT_HELD(igi);
549 }
550 }
551
552 /*
553 * Filter outgoing IGMP report state by group.
554 *
555 * Reports are ALWAYS suppressed for ALL-HOSTS (224.0.0.1).
556 * If the net.inet.igmp.sendlocal sysctl is 0, then IGMP reports are
557 * disabled for all groups in the 224.0.0.0/24 link-local scope. However,
558 * this may break certain IGMP snooping switches which rely on the old
559 * report behaviour.
560 *
561 * Return zero if the given group is one for which IGMP reports
562 * should be suppressed, or non-zero if reports should be issued.
563 */
564
565 static __inline__
566 int
igmp_isgroupreported(const struct in_addr addr)567 igmp_isgroupreported(const struct in_addr addr)
568 {
569 if (in_allhosts(addr) ||
570 ((!igmp_sendlocal && IN_LOCAL_GROUP(ntohl(addr.s_addr))))) {
571 return 0;
572 }
573
574 return 1;
575 }
576
577 /*
578 * Construct a Router Alert option to use in outgoing packets.
579 */
580 static struct mbuf *
igmp_ra_alloc(void)581 igmp_ra_alloc(void)
582 {
583 struct mbuf *m;
584 struct ipoption *p;
585
586 MGET(m, M_WAITOK, MT_DATA);
587 p = mtod(m, struct ipoption *);
588 p->ipopt_dst.s_addr = INADDR_ANY;
589 p->ipopt_list[0] = (char)IPOPT_RA; /* Router Alert Option */
590 p->ipopt_list[1] = 0x04; /* 4 bytes long */
591 p->ipopt_list[2] = IPOPT_EOL; /* End of IP option list */
592 p->ipopt_list[3] = 0x00; /* pad byte */
593 m->m_len = sizeof(p->ipopt_dst) + p->ipopt_list[1];
594
595 return m;
596 }
597
598 /*
599 * Attach IGMP when PF_INET is attached to an interface.
600 */
601 struct igmp_ifinfo *
igmp_domifattach(struct ifnet * ifp,zalloc_flags_t how)602 igmp_domifattach(struct ifnet *ifp, zalloc_flags_t how)
603 {
604 struct igmp_ifinfo *igi;
605
606 IGMP_PRINTF(("%s: called for ifp 0x%llx(%s)\n",
607 __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), ifp->if_name));
608
609 igi = igi_alloc(how);
610 if (igi == NULL) {
611 return NULL;
612 }
613
614 IGMP_LOCK();
615
616 IGI_LOCK(igi);
617 igi_initvar(igi, ifp, 0);
618 igi->igi_debug |= IFD_ATTACHED;
619 IGI_ADDREF_LOCKED(igi); /* hold a reference for igi_head */
620 IGI_ADDREF_LOCKED(igi); /* hold a reference for caller */
621 IGI_UNLOCK(igi);
622 ifnet_lock_shared(ifp);
623 igmp_initsilent(ifp, igi);
624 ifnet_lock_done(ifp);
625
626 LIST_INSERT_HEAD(&igi_head, igi, igi_link);
627
628 IGMP_UNLOCK();
629
630 IGMP_PRINTF(("%s: allocate igmp_ifinfo for ifp 0x%llx(%s)\n", __func__,
631 (uint64_t)VM_KERNEL_ADDRPERM(ifp), ifp->if_name));
632
633 return igi;
634 }
635
636 /*
637 * Attach IGMP when PF_INET is reattached to an interface. Caller is
638 * expected to have an outstanding reference to the igi.
639 */
640 void
igmp_domifreattach(struct igmp_ifinfo * igi)641 igmp_domifreattach(struct igmp_ifinfo *igi)
642 {
643 struct ifnet *ifp;
644
645 IGMP_LOCK();
646
647 IGI_LOCK(igi);
648 VERIFY(!(igi->igi_debug & IFD_ATTACHED));
649 ifp = igi->igi_ifp;
650 VERIFY(ifp != NULL);
651 igi_initvar(igi, ifp, 1);
652 igi->igi_debug |= IFD_ATTACHED;
653 IGI_ADDREF_LOCKED(igi); /* hold a reference for igi_head */
654 IGI_UNLOCK(igi);
655 ifnet_lock_shared(ifp);
656 igmp_initsilent(ifp, igi);
657 ifnet_lock_done(ifp);
658
659 LIST_INSERT_HEAD(&igi_head, igi, igi_link);
660
661 IGMP_UNLOCK();
662
663 IGMP_PRINTF(("%s: reattached igmp_ifinfo for ifp 0x%llx(%s)\n",
664 __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), ifp->if_name));
665 }
666
667 /*
668 * Hook for domifdetach.
669 */
670 void
igmp_domifdetach(struct ifnet * ifp)671 igmp_domifdetach(struct ifnet *ifp)
672 {
673 SLIST_HEAD(, in_multi) inm_dthead;
674
675 SLIST_INIT(&inm_dthead);
676
677 IGMP_PRINTF(("%s: called for ifp 0x%llx(%s%d)\n", __func__,
678 (uint64_t)VM_KERNEL_ADDRPERM(ifp), ifp->if_name, ifp->if_unit));
679
680 IGMP_LOCK();
681 igi_delete(ifp, (struct igmp_inm_relhead *)&inm_dthead);
682 IGMP_UNLOCK();
683
684 /* Now that we're dropped all locks, release detached records */
685 IGMP_REMOVE_DETACHED_INM(&inm_dthead);
686 }
687
688 /*
689 * Called at interface detach time. Note that we only flush all deferred
690 * responses and record releases; all remaining inm records and their source
691 * entries related to this interface are left intact, in order to handle
692 * the reattach case.
693 */
694 static void
igi_delete(const struct ifnet * ifp,struct igmp_inm_relhead * inm_dthead)695 igi_delete(const struct ifnet *ifp, struct igmp_inm_relhead *inm_dthead)
696 {
697 struct igmp_ifinfo *igi, *tigi;
698
699 IGMP_LOCK_ASSERT_HELD();
700
701 LIST_FOREACH_SAFE(igi, &igi_head, igi_link, tigi) {
702 IGI_LOCK(igi);
703 if (igi->igi_ifp == ifp) {
704 /*
705 * Free deferred General Query responses.
706 */
707 IF_DRAIN(&igi->igi_gq);
708 IF_DRAIN(&igi->igi_v2q);
709 igmp_flush_relq(igi, inm_dthead);
710 VERIFY(SLIST_EMPTY(&igi->igi_relinmhead));
711 igi->igi_debug &= ~IFD_ATTACHED;
712 IGI_UNLOCK(igi);
713
714 LIST_REMOVE(igi, igi_link);
715 IGI_REMREF(igi); /* release igi_head reference */
716 return;
717 }
718 IGI_UNLOCK(igi);
719 }
720 panic("%s: igmp_ifinfo not found for ifp %p(%s)", __func__,
721 ifp, ifp->if_xname);
722 }
723
724 __private_extern__ void
igmp_initsilent(struct ifnet * ifp,struct igmp_ifinfo * igi)725 igmp_initsilent(struct ifnet *ifp, struct igmp_ifinfo *igi)
726 {
727 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
728
729 IGI_LOCK_ASSERT_NOTHELD(igi);
730 IGI_LOCK(igi);
731 if (!(ifp->if_flags & IFF_MULTICAST)) {
732 igi->igi_flags |= IGIF_SILENT;
733 } else {
734 igi->igi_flags &= ~IGIF_SILENT;
735 }
736 IGI_UNLOCK(igi);
737 }
738
739 static void
igi_initvar(struct igmp_ifinfo * igi,struct ifnet * ifp,int reattach)740 igi_initvar(struct igmp_ifinfo *igi, struct ifnet *ifp, int reattach)
741 {
742 IGI_LOCK_ASSERT_HELD(igi);
743
744 igi->igi_ifp = ifp;
745 igi->igi_version = igmp_default_version;
746 igi->igi_flags = 0;
747 igi->igi_rv = IGMP_RV_INIT;
748 igi->igi_qi = IGMP_QI_INIT;
749 igi->igi_qri = IGMP_QRI_INIT;
750 igi->igi_uri = IGMP_URI_INIT;
751
752 if (!reattach) {
753 SLIST_INIT(&igi->igi_relinmhead);
754 }
755
756 /*
757 * Responses to general queries are subject to bounds.
758 */
759 igi->igi_gq.ifq_maxlen = IGMP_MAX_RESPONSE_PACKETS;
760 igi->igi_v2q.ifq_maxlen = IGMP_MAX_RESPONSE_PACKETS;
761 }
762
763 static struct igmp_ifinfo *
igi_alloc(zalloc_flags_t how)764 igi_alloc(zalloc_flags_t how)
765 {
766 struct igmp_ifinfo *igi = zalloc_flags(igi_zone, how | Z_ZERO);
767 if (igi != NULL) {
768 lck_mtx_init(&igi->igi_lock, &igmp_mtx_grp, &igmp_mtx_attr);
769 igi->igi_debug |= IFD_ALLOC;
770 }
771 return igi;
772 }
773
774 static void
igi_free(struct igmp_ifinfo * igi)775 igi_free(struct igmp_ifinfo *igi)
776 {
777 IGI_LOCK(igi);
778 if (igi->igi_debug & IFD_ATTACHED) {
779 panic("%s: attached igi=%p is being freed", __func__, igi);
780 /* NOTREACHED */
781 } else if (igi->igi_ifp != NULL) {
782 panic("%s: ifp not NULL for igi=%p", __func__, igi);
783 /* NOTREACHED */
784 } else if (!(igi->igi_debug & IFD_ALLOC)) {
785 panic("%s: igi %p cannot be freed", __func__, igi);
786 /* NOTREACHED */
787 } else if (igi->igi_refcnt != 0) {
788 panic("%s: non-zero refcnt igi=%p", __func__, igi);
789 /* NOTREACHED */
790 }
791 igi->igi_debug &= ~IFD_ALLOC;
792 IGI_UNLOCK(igi);
793
794 lck_mtx_destroy(&igi->igi_lock, &igmp_mtx_grp);
795 zfree(igi_zone, igi);
796 }
797
798 void
igi_addref(struct igmp_ifinfo * igi,int locked)799 igi_addref(struct igmp_ifinfo *igi, int locked)
800 {
801 if (!locked) {
802 IGI_LOCK_SPIN(igi);
803 } else {
804 IGI_LOCK_ASSERT_HELD(igi);
805 }
806
807 if (++igi->igi_refcnt == 0) {
808 panic("%s: igi=%p wraparound refcnt", __func__, igi);
809 /* NOTREACHED */
810 }
811 if (!locked) {
812 IGI_UNLOCK(igi);
813 }
814 }
815
816 void
igi_remref(struct igmp_ifinfo * igi)817 igi_remref(struct igmp_ifinfo *igi)
818 {
819 SLIST_HEAD(, in_multi) inm_dthead;
820 struct ifnet *ifp;
821
822 IGI_LOCK_SPIN(igi);
823
824 if (igi->igi_refcnt == 0) {
825 panic("%s: igi=%p negative refcnt", __func__, igi);
826 /* NOTREACHED */
827 }
828
829 --igi->igi_refcnt;
830 if (igi->igi_refcnt > 0) {
831 IGI_UNLOCK(igi);
832 return;
833 }
834
835 ifp = igi->igi_ifp;
836 igi->igi_ifp = NULL;
837 IF_DRAIN(&igi->igi_gq);
838 IF_DRAIN(&igi->igi_v2q);
839 SLIST_INIT(&inm_dthead);
840 igmp_flush_relq(igi, (struct igmp_inm_relhead *)&inm_dthead);
841 VERIFY(SLIST_EMPTY(&igi->igi_relinmhead));
842 IGI_UNLOCK(igi);
843
844 /* Now that we're dropped all locks, release detached records */
845 IGMP_REMOVE_DETACHED_INM(&inm_dthead);
846
847 IGMP_PRINTF(("%s: freeing igmp_ifinfo for ifp 0x%llx(%s)\n",
848 __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
849
850 igi_free(igi);
851 }
852
853 /*
854 * Process a received IGMPv1 query.
855 * Return non-zero if the message should be dropped.
856 */
857 static int
igmp_input_v1_query(struct ifnet * ifp,const struct ip * ip,const struct igmp * igmp)858 igmp_input_v1_query(struct ifnet *ifp, const struct ip *ip,
859 const struct igmp *igmp)
860 {
861 struct igmp_ifinfo *igi;
862 struct in_multi *inm;
863 struct in_multistep step;
864 struct igmp_tparams itp = { .qpt = 0, .it = 0, .cst = 0, .sct = 0 };
865
866 IGMP_LOCK_ASSERT_NOTHELD();
867
868 /*
869 * IGMPv1 Host Membership Queries SHOULD always be addressed to
870 * 224.0.0.1. They are always treated as General Queries.
871 * igmp_group is always ignored. Do not drop it as a userland
872 * daemon may wish to see it.
873 */
874 if (!in_allhosts(ip->ip_dst) || !in_nullhost(igmp->igmp_group)) {
875 IGMPSTAT_INC(igps_rcv_badqueries);
876 OIGMPSTAT_INC(igps_rcv_badqueries);
877 goto done;
878 }
879 IGMPSTAT_INC(igps_rcv_gen_queries);
880
881 igi = IGMP_IFINFO(ifp);
882 VERIFY(igi != NULL);
883
884 IGI_LOCK(igi);
885 if (igi->igi_flags & IGIF_LOOPBACK) {
886 IGMP_PRINTF(("%s: ignore v1 query on IGIF_LOOPBACK "
887 "ifp 0x%llx(%s)\n", __func__,
888 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
889 IGI_UNLOCK(igi);
890 goto done;
891 }
892 /*
893 * Switch to IGMPv1 host compatibility mode.
894 */
895 itp.qpt = igmp_set_version(igi, IGMP_VERSION_1);
896 IGI_UNLOCK(igi);
897
898 IGMP_PRINTF(("%s: process v1 query on ifp 0x%llx(%s)\n", __func__,
899 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
900
901 /*
902 * Start the timers in all of our group records
903 * for the interface on which the query arrived,
904 * except those which are already running.
905 */
906 in_multihead_lock_shared();
907 IN_FIRST_MULTI(step, inm);
908 while (inm != NULL) {
909 INM_LOCK(inm);
910 if (inm->inm_ifp != ifp || inm->inm_timer != 0) {
911 goto next;
912 }
913
914 switch (inm->inm_state) {
915 case IGMP_NOT_MEMBER:
916 case IGMP_SILENT_MEMBER:
917 break;
918 case IGMP_G_QUERY_PENDING_MEMBER:
919 case IGMP_SG_QUERY_PENDING_MEMBER:
920 case IGMP_REPORTING_MEMBER:
921 case IGMP_IDLE_MEMBER:
922 case IGMP_LAZY_MEMBER:
923 case IGMP_SLEEPING_MEMBER:
924 case IGMP_AWAKENING_MEMBER:
925 inm->inm_state = IGMP_REPORTING_MEMBER;
926 inm->inm_timer = IGMP_RANDOM_DELAY(IGMP_V1V2_MAX_RI);
927 itp.cst = 1;
928 break;
929 case IGMP_LEAVING_MEMBER:
930 break;
931 }
932 next:
933 INM_UNLOCK(inm);
934 IN_NEXT_MULTI(step, inm);
935 }
936 in_multihead_lock_done();
937 done:
938 igmp_set_timeout(&itp);
939
940 return 0;
941 }
942
943 /*
944 * Process a received IGMPv2 general or group-specific query.
945 */
946 static int
igmp_input_v2_query(struct ifnet * ifp,const struct ip * ip,const struct igmp * igmp)947 igmp_input_v2_query(struct ifnet *ifp, const struct ip *ip,
948 const struct igmp *igmp)
949 {
950 struct igmp_ifinfo *igi;
951 struct in_multi *inm;
952 int is_general_query;
953 uint16_t timer;
954 struct igmp_tparams itp = { .qpt = 0, .it = 0, .cst = 0, .sct = 0 };
955
956 IGMP_LOCK_ASSERT_NOTHELD();
957
958 is_general_query = 0;
959
960 /*
961 * Validate address fields upfront.
962 */
963 if (in_nullhost(igmp->igmp_group)) {
964 /*
965 * IGMPv2 General Query.
966 * If this was not sent to the all-hosts group, ignore it.
967 */
968 if (!in_allhosts(ip->ip_dst)) {
969 goto done;
970 }
971 IGMPSTAT_INC(igps_rcv_gen_queries);
972 is_general_query = 1;
973 } else {
974 /* IGMPv2 Group-Specific Query. */
975 IGMPSTAT_INC(igps_rcv_group_queries);
976 }
977
978 igi = IGMP_IFINFO(ifp);
979 VERIFY(igi != NULL);
980
981 IGI_LOCK(igi);
982 if (igi->igi_flags & IGIF_LOOPBACK) {
983 IGMP_PRINTF(("%s: ignore v2 query on IGIF_LOOPBACK "
984 "ifp 0x%llx(%s)\n", __func__,
985 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
986 IGI_UNLOCK(igi);
987 goto done;
988 }
989 /*
990 * Ignore v2 query if in v1 Compatibility Mode.
991 */
992 if (igi->igi_version == IGMP_VERSION_1) {
993 IGI_UNLOCK(igi);
994 goto done;
995 }
996 itp.qpt = igmp_set_version(igi, IGMP_VERSION_2);
997 IGI_UNLOCK(igi);
998
999 timer = igmp->igmp_code / IGMP_TIMER_SCALE;
1000 if (timer == 0) {
1001 timer = 1;
1002 }
1003
1004 if (is_general_query) {
1005 struct in_multistep step;
1006
1007 IGMP_PRINTF(("%s: process v2 general query on ifp 0x%llx(%s)\n",
1008 __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1009 /*
1010 * For each reporting group joined on this
1011 * interface, kick the report timer.
1012 */
1013 in_multihead_lock_shared();
1014 IN_FIRST_MULTI(step, inm);
1015 while (inm != NULL) {
1016 INM_LOCK(inm);
1017 if (inm->inm_ifp == ifp) {
1018 itp.cst += igmp_v2_update_group(inm, timer);
1019 }
1020 INM_UNLOCK(inm);
1021 IN_NEXT_MULTI(step, inm);
1022 }
1023 in_multihead_lock_done();
1024 } else {
1025 /*
1026 * Group-specific IGMPv2 query, we need only
1027 * look up the single group to process it.
1028 */
1029 in_multihead_lock_shared();
1030 IN_LOOKUP_MULTI(&igmp->igmp_group, ifp, inm);
1031 in_multihead_lock_done();
1032 if (inm != NULL) {
1033 INM_LOCK(inm);
1034 IGMP_INET_PRINTF(igmp->igmp_group,
1035 ("process v2 query %s on ifp 0x%llx(%s)\n",
1036 _igmp_inet_buf,
1037 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1038 itp.cst = igmp_v2_update_group(inm, timer);
1039 INM_UNLOCK(inm);
1040 INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1041 }
1042 }
1043 done:
1044 igmp_set_timeout(&itp);
1045
1046 return 0;
1047 }
1048
1049 /*
1050 * Update the report timer on a group in response to an IGMPv2 query.
1051 *
1052 * If we are becoming the reporting member for this group, start the timer.
1053 * If we already are the reporting member for this group, and timer is
1054 * below the threshold, reset it.
1055 *
1056 * We may be updating the group for the first time since we switched
1057 * to IGMPv3. If we are, then we must clear any recorded source lists,
1058 * and transition to REPORTING state; the group timer is overloaded
1059 * for group and group-source query responses.
1060 *
1061 * Unlike IGMPv3, the delay per group should be jittered
1062 * to avoid bursts of IGMPv2 reports.
1063 */
1064 static uint32_t
igmp_v2_update_group(struct in_multi * inm,const int timer)1065 igmp_v2_update_group(struct in_multi *inm, const int timer)
1066 {
1067 IGMP_INET_PRINTF(inm->inm_addr, ("%s: %s/%s timer=%d\n",
1068 __func__, _igmp_inet_buf, if_name(inm->inm_ifp),
1069 timer));
1070
1071 INM_LOCK_ASSERT_HELD(inm);
1072
1073 switch (inm->inm_state) {
1074 case IGMP_NOT_MEMBER:
1075 case IGMP_SILENT_MEMBER:
1076 break;
1077 case IGMP_REPORTING_MEMBER:
1078 if (inm->inm_timer != 0 &&
1079 inm->inm_timer <= timer) {
1080 IGMP_PRINTF(("%s: REPORTING and timer running, "
1081 "skipping.\n", __func__));
1082 break;
1083 }
1084 OS_FALLTHROUGH;
1085 case IGMP_SG_QUERY_PENDING_MEMBER:
1086 case IGMP_G_QUERY_PENDING_MEMBER:
1087 case IGMP_IDLE_MEMBER:
1088 case IGMP_LAZY_MEMBER:
1089 case IGMP_AWAKENING_MEMBER:
1090 IGMP_PRINTF(("%s: ->REPORTING\n", __func__));
1091 inm->inm_state = IGMP_REPORTING_MEMBER;
1092 inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1093 break;
1094 case IGMP_SLEEPING_MEMBER:
1095 IGMP_PRINTF(("%s: ->AWAKENING\n", __func__));
1096 inm->inm_state = IGMP_AWAKENING_MEMBER;
1097 break;
1098 case IGMP_LEAVING_MEMBER:
1099 break;
1100 }
1101
1102 return inm->inm_timer;
1103 }
1104
1105 /*
1106 * Process a received IGMPv3 general, group-specific or
1107 * group-and-source-specific query.
1108 * Assumes m has already been pulled up to the full IGMP message length.
1109 * Return 0 if successful, otherwise an appropriate error code is returned.
1110 */
1111 static int
igmp_input_v3_query(struct ifnet * ifp,const struct ip * ip,struct igmpv3 * igmpv3)1112 igmp_input_v3_query(struct ifnet *ifp, const struct ip *ip,
1113 /*const*/ struct igmpv3 *igmpv3)
1114 {
1115 struct igmp_ifinfo *igi;
1116 struct in_multi *inm;
1117 int is_general_query;
1118 uint32_t maxresp, nsrc, qqi;
1119 uint32_t timer;
1120 uint8_t qrv;
1121 struct igmp_tparams itp = { .qpt = 0, .it = 0, .cst = 0, .sct = 0 };
1122
1123 IGMP_LOCK_ASSERT_NOTHELD();
1124
1125 is_general_query = 0;
1126
1127 IGMP_PRINTF(("%s: process v3 query on ifp 0x%llx(%s)\n", __func__,
1128 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1129
1130 maxresp = igmpv3->igmp_code; /* in 1/10ths of a second */
1131 if (maxresp >= 128) {
1132 maxresp = IGMP_MANT(igmpv3->igmp_code) <<
1133 (IGMP_EXP(igmpv3->igmp_code) + 3);
1134 }
1135
1136 /*
1137 * Robustness must never be less than 2 for on-wire IGMPv3.
1138 * FUTURE: Check if ifp has IGIF_LOOPBACK set, as we will make
1139 * an exception for interfaces whose IGMPv3 state changes
1140 * are redirected to loopback (e.g. MANET).
1141 */
1142 qrv = IGMP_QRV(igmpv3->igmp_misc);
1143 if (qrv < 2) {
1144 IGMP_PRINTF(("%s: clamping qrv %d to %d\n", __func__,
1145 qrv, IGMP_RV_INIT));
1146 qrv = IGMP_RV_INIT;
1147 }
1148
1149 qqi = igmpv3->igmp_qqi;
1150 if (qqi >= 128) {
1151 qqi = IGMP_MANT(igmpv3->igmp_qqi) <<
1152 (IGMP_EXP(igmpv3->igmp_qqi) + 3);
1153 }
1154
1155 timer = maxresp / IGMP_TIMER_SCALE;
1156 if (timer == 0) {
1157 timer = 1;
1158 }
1159
1160 nsrc = ntohs(igmpv3->igmp_numsrc);
1161
1162 /*
1163 * Validate address fields and versions upfront before
1164 * accepting v3 query.
1165 */
1166 if (in_nullhost(igmpv3->igmp_group)) {
1167 /*
1168 * IGMPv3 General Query.
1169 *
1170 * General Queries SHOULD be directed to 224.0.0.1.
1171 * A general query with a source list has undefined
1172 * behaviour; discard it.
1173 */
1174 IGMPSTAT_INC(igps_rcv_gen_queries);
1175 if (!in_allhosts(ip->ip_dst) || nsrc > 0) {
1176 IGMPSTAT_INC(igps_rcv_badqueries);
1177 OIGMPSTAT_INC(igps_rcv_badqueries);
1178 goto done;
1179 }
1180 is_general_query = 1;
1181 } else {
1182 /* Group or group-source specific query. */
1183 if (nsrc == 0) {
1184 IGMPSTAT_INC(igps_rcv_group_queries);
1185 } else {
1186 IGMPSTAT_INC(igps_rcv_gsr_queries);
1187 }
1188 }
1189
1190 igi = IGMP_IFINFO(ifp);
1191 VERIFY(igi != NULL);
1192
1193 IGI_LOCK(igi);
1194 if (igi->igi_flags & IGIF_LOOPBACK) {
1195 IGMP_PRINTF(("%s: ignore v3 query on IGIF_LOOPBACK "
1196 "ifp 0x%llx(%s)\n", __func__,
1197 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1198 IGI_UNLOCK(igi);
1199 goto done;
1200 }
1201
1202 /*
1203 * Discard the v3 query if we're in Compatibility Mode.
1204 * The RFC is not obviously worded that hosts need to stay in
1205 * compatibility mode until the Old Version Querier Present
1206 * timer expires.
1207 */
1208 if (igi->igi_version != IGMP_VERSION_3) {
1209 IGMP_PRINTF(("%s: ignore v3 query in v%d mode on "
1210 "ifp 0x%llx(%s)\n", __func__, igi->igi_version,
1211 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1212 IGI_UNLOCK(igi);
1213 goto done;
1214 }
1215
1216 itp.qpt = igmp_set_version(igi, IGMP_VERSION_3);
1217 igi->igi_rv = qrv;
1218 igi->igi_qi = qqi;
1219 igi->igi_qri = MAX(timer, IGMP_QRI_MIN);
1220
1221 IGMP_PRINTF(("%s: qrv %d qi %d qri %d\n", __func__, igi->igi_rv,
1222 igi->igi_qi, igi->igi_qri));
1223
1224 if (is_general_query) {
1225 /*
1226 * Schedule a current-state report on this ifp for
1227 * all groups, possibly containing source lists.
1228 * If there is a pending General Query response
1229 * scheduled earlier than the selected delay, do
1230 * not schedule any other reports.
1231 * Otherwise, reset the interface timer.
1232 */
1233 IGMP_PRINTF(("%s: process v3 general query on ifp 0x%llx(%s)\n",
1234 __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1235 if (igi->igi_v3_timer == 0 || igi->igi_v3_timer >= timer) {
1236 itp.it = igi->igi_v3_timer = IGMP_RANDOM_DELAY(timer);
1237 }
1238 IGI_UNLOCK(igi);
1239 } else {
1240 IGI_UNLOCK(igi);
1241 /*
1242 * Group-source-specific queries are throttled on
1243 * a per-group basis to defeat denial-of-service attempts.
1244 * Queries for groups we are not a member of on this
1245 * link are simply ignored.
1246 */
1247 in_multihead_lock_shared();
1248 IN_LOOKUP_MULTI(&igmpv3->igmp_group, ifp, inm);
1249 in_multihead_lock_done();
1250 if (inm == NULL) {
1251 goto done;
1252 }
1253
1254 INM_LOCK(inm);
1255 if (nsrc > 0) {
1256 if (!ratecheck(&inm->inm_lastgsrtv,
1257 &igmp_gsrdelay)) {
1258 IGMP_PRINTF(("%s: GS query throttled.\n",
1259 __func__));
1260 IGMPSTAT_INC(igps_drop_gsr_queries);
1261 INM_UNLOCK(inm);
1262 INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1263 goto done;
1264 }
1265 }
1266 IGMP_INET_PRINTF(igmpv3->igmp_group,
1267 ("process v3 %s query on ifp 0x%llx(%s)\n", _igmp_inet_buf,
1268 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1269 /*
1270 * If there is a pending General Query response
1271 * scheduled sooner than the selected delay, no
1272 * further report need be scheduled.
1273 * Otherwise, prepare to respond to the
1274 * group-specific or group-and-source query.
1275 */
1276 IGI_LOCK(igi);
1277 itp.it = igi->igi_v3_timer;
1278 IGI_UNLOCK(igi);
1279 if (itp.it == 0 || itp.it >= timer) {
1280 (void) igmp_input_v3_group_query(inm, timer, igmpv3);
1281 itp.cst = inm->inm_timer;
1282 }
1283 INM_UNLOCK(inm);
1284 INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1285 }
1286 done:
1287 if (itp.it > 0) {
1288 IGMP_PRINTF(("%s: v3 general query response scheduled in "
1289 "T+%d seconds on ifp 0x%llx(%s)\n", __func__, itp.it,
1290 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1291 }
1292 igmp_set_timeout(&itp);
1293
1294 return 0;
1295 }
1296
1297 /*
1298 * Process a recieved IGMPv3 group-specific or group-and-source-specific
1299 * query.
1300 * Return <0 if any error occured. Currently this is ignored.
1301 */
1302 static int
igmp_input_v3_group_query(struct in_multi * inm,int timer,struct igmpv3 * igmpv3)1303 igmp_input_v3_group_query(struct in_multi *inm,
1304 int timer, /*const*/ struct igmpv3 *igmpv3)
1305 {
1306 int retval;
1307 uint16_t nsrc;
1308
1309 INM_LOCK_ASSERT_HELD(inm);
1310
1311 retval = 0;
1312
1313 switch (inm->inm_state) {
1314 case IGMP_NOT_MEMBER:
1315 case IGMP_SILENT_MEMBER:
1316 case IGMP_SLEEPING_MEMBER:
1317 case IGMP_LAZY_MEMBER:
1318 case IGMP_AWAKENING_MEMBER:
1319 case IGMP_IDLE_MEMBER:
1320 case IGMP_LEAVING_MEMBER:
1321 return retval;
1322 case IGMP_REPORTING_MEMBER:
1323 case IGMP_G_QUERY_PENDING_MEMBER:
1324 case IGMP_SG_QUERY_PENDING_MEMBER:
1325 break;
1326 }
1327
1328 nsrc = ntohs(igmpv3->igmp_numsrc);
1329
1330 /*
1331 * Deal with group-specific queries upfront.
1332 * If any group query is already pending, purge any recorded
1333 * source-list state if it exists, and schedule a query response
1334 * for this group-specific query.
1335 */
1336 if (nsrc == 0) {
1337 if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER ||
1338 inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) {
1339 inm_clear_recorded(inm);
1340 timer = min(inm->inm_timer, timer);
1341 }
1342 inm->inm_state = IGMP_G_QUERY_PENDING_MEMBER;
1343 inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1344 return retval;
1345 }
1346
1347 /*
1348 * Deal with the case where a group-and-source-specific query has
1349 * been received but a group-specific query is already pending.
1350 */
1351 if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER) {
1352 timer = min(inm->inm_timer, timer);
1353 inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1354 return retval;
1355 }
1356
1357 /*
1358 * Finally, deal with the case where a group-and-source-specific
1359 * query has been received, where a response to a previous g-s-r
1360 * query exists, or none exists.
1361 * In this case, we need to parse the source-list which the Querier
1362 * has provided us with and check if we have any source list filter
1363 * entries at T1 for these sources. If we do not, there is no need
1364 * schedule a report and the query may be dropped.
1365 * If we do, we must record them and schedule a current-state
1366 * report for those sources.
1367 * FIXME: Handling source lists larger than 1 mbuf requires that
1368 * we pass the mbuf chain pointer down to this function, and use
1369 * m_getptr() to walk the chain.
1370 */
1371 if (inm->inm_nsrc > 0) {
1372 const struct in_addr *ap;
1373 int i, nrecorded;
1374
1375 ap = (const struct in_addr *)(igmpv3 + 1);
1376 nrecorded = 0;
1377 for (i = 0; i < nsrc; i++, ap++) {
1378 retval = inm_record_source(inm, ap->s_addr);
1379 if (retval < 0) {
1380 break;
1381 }
1382 nrecorded += retval;
1383 }
1384 if (nrecorded > 0) {
1385 IGMP_PRINTF(("%s: schedule response to SG query\n",
1386 __func__));
1387 inm->inm_state = IGMP_SG_QUERY_PENDING_MEMBER;
1388 inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1389 }
1390 }
1391
1392 return retval;
1393 }
1394
1395 /*
1396 * Process a received IGMPv1 host membership report.
1397 *
1398 * NOTE: 0.0.0.0 workaround breaks const correctness.
1399 */
1400 static int
igmp_input_v1_report(struct ifnet * ifp,struct mbuf * m,struct ip * ip,struct igmp * igmp)1401 igmp_input_v1_report(struct ifnet *ifp, struct mbuf *m, /*const*/ struct ip *ip,
1402 /*const*/ struct igmp *igmp)
1403 {
1404 struct in_ifaddr *ia;
1405 struct in_multi *inm;
1406
1407 IGMPSTAT_INC(igps_rcv_reports);
1408 OIGMPSTAT_INC(igps_rcv_reports);
1409
1410 if ((ifp->if_flags & IFF_LOOPBACK) ||
1411 (m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
1412 return 0;
1413 }
1414
1415 if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr) ||
1416 !in_hosteq(igmp->igmp_group, ip->ip_dst))) {
1417 IGMPSTAT_INC(igps_rcv_badreports);
1418 OIGMPSTAT_INC(igps_rcv_badreports);
1419 return EINVAL;
1420 }
1421
1422 /*
1423 * RFC 3376, Section 4.2.13, 9.2, 9.3:
1424 * Booting clients may use the source address 0.0.0.0. Some
1425 * IGMP daemons may not know how to use IP_RECVIF to determine
1426 * the interface upon which this message was received.
1427 * Replace 0.0.0.0 with the subnet address if told to do so.
1428 */
1429 if (igmp_recvifkludge && in_nullhost(ip->ip_src)) {
1430 IFP_TO_IA(ifp, ia);
1431 if (ia != NULL) {
1432 IFA_LOCK(&ia->ia_ifa);
1433 ip->ip_src.s_addr = htonl(ia->ia_subnet);
1434 IFA_UNLOCK(&ia->ia_ifa);
1435 IFA_REMREF(&ia->ia_ifa);
1436 }
1437 }
1438
1439 IGMP_INET_PRINTF(igmp->igmp_group,
1440 ("process v1 report %s on ifp 0x%llx(%s)\n", _igmp_inet_buf,
1441 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1442
1443 /*
1444 * IGMPv1 report suppression.
1445 * If we are a member of this group, and our membership should be
1446 * reported, stop our group timer and transition to the 'lazy' state.
1447 */
1448 in_multihead_lock_shared();
1449 IN_LOOKUP_MULTI(&igmp->igmp_group, ifp, inm);
1450 in_multihead_lock_done();
1451 if (inm != NULL) {
1452 struct igmp_ifinfo *igi;
1453
1454 INM_LOCK(inm);
1455
1456 igi = inm->inm_igi;
1457 VERIFY(igi != NULL);
1458
1459 IGMPSTAT_INC(igps_rcv_ourreports);
1460 OIGMPSTAT_INC(igps_rcv_ourreports);
1461
1462 /*
1463 * If we are in IGMPv3 host mode, do not allow the
1464 * other host's IGMPv1 report to suppress our reports
1465 * unless explicitly configured to do so.
1466 */
1467 IGI_LOCK(igi);
1468 if (igi->igi_version == IGMP_VERSION_3) {
1469 if (igmp_legacysupp) {
1470 igmp_v3_suppress_group_record(inm);
1471 }
1472 IGI_UNLOCK(igi);
1473 INM_UNLOCK(inm);
1474 INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1475 return 0;
1476 }
1477
1478 INM_LOCK_ASSERT_HELD(inm);
1479 inm->inm_timer = 0;
1480
1481 switch (inm->inm_state) {
1482 case IGMP_NOT_MEMBER:
1483 case IGMP_SILENT_MEMBER:
1484 break;
1485 case IGMP_IDLE_MEMBER:
1486 case IGMP_LAZY_MEMBER:
1487 case IGMP_AWAKENING_MEMBER:
1488 IGMP_INET_PRINTF(igmp->igmp_group,
1489 ("report suppressed for %s on ifp 0x%llx(%s)\n",
1490 _igmp_inet_buf,
1491 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1492 OS_FALLTHROUGH;
1493 case IGMP_SLEEPING_MEMBER:
1494 inm->inm_state = IGMP_SLEEPING_MEMBER;
1495 break;
1496 case IGMP_REPORTING_MEMBER:
1497 IGMP_INET_PRINTF(igmp->igmp_group,
1498 ("report suppressed for %s on ifp 0x%llx(%s)\n",
1499 _igmp_inet_buf,
1500 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1501 if (igi->igi_version == IGMP_VERSION_1) {
1502 inm->inm_state = IGMP_LAZY_MEMBER;
1503 } else if (igi->igi_version == IGMP_VERSION_2) {
1504 inm->inm_state = IGMP_SLEEPING_MEMBER;
1505 }
1506 break;
1507 case IGMP_G_QUERY_PENDING_MEMBER:
1508 case IGMP_SG_QUERY_PENDING_MEMBER:
1509 case IGMP_LEAVING_MEMBER:
1510 break;
1511 }
1512 IGI_UNLOCK(igi);
1513 INM_UNLOCK(inm);
1514 INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
1515 }
1516
1517 return 0;
1518 }
1519
1520 /*
1521 * Process a received IGMPv2 host membership report.
1522 *
1523 * NOTE: 0.0.0.0 workaround breaks const correctness.
1524 */
1525 static int
igmp_input_v2_report(struct ifnet * ifp,struct mbuf * m,struct ip * ip,struct igmp * igmp)1526 igmp_input_v2_report(struct ifnet *ifp, struct mbuf *m, /*const*/ struct ip *ip,
1527 /*const*/ struct igmp *igmp)
1528 {
1529 struct in_ifaddr *ia;
1530 struct in_multi *inm;
1531
1532 /*
1533 * Make sure we don't hear our own membership report. Fast
1534 * leave requires knowing that we are the only member of a
1535 * group.
1536 */
1537 IFP_TO_IA(ifp, ia);
1538 if (ia != NULL) {
1539 IFA_LOCK(&ia->ia_ifa);
1540 if (in_hosteq(ip->ip_src, IA_SIN(ia)->sin_addr)) {
1541 IFA_UNLOCK(&ia->ia_ifa);
1542 IFA_REMREF(&ia->ia_ifa);
1543 return 0;
1544 }
1545 IFA_UNLOCK(&ia->ia_ifa);
1546 }
1547
1548 IGMPSTAT_INC(igps_rcv_reports);
1549 OIGMPSTAT_INC(igps_rcv_reports);
1550
1551 if ((ifp->if_flags & IFF_LOOPBACK) ||
1552 (m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
1553 if (ia != NULL) {
1554 IFA_REMREF(&ia->ia_ifa);
1555 }
1556 return 0;
1557 }
1558
1559 if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr)) ||
1560 !in_hosteq(igmp->igmp_group, ip->ip_dst)) {
1561 if (ia != NULL) {
1562 IFA_REMREF(&ia->ia_ifa);
1563 }
1564 IGMPSTAT_INC(igps_rcv_badreports);
1565 OIGMPSTAT_INC(igps_rcv_badreports);
1566 return EINVAL;
1567 }
1568
1569 /*
1570 * RFC 3376, Section 4.2.13, 9.2, 9.3:
1571 * Booting clients may use the source address 0.0.0.0. Some
1572 * IGMP daemons may not know how to use IP_RECVIF to determine
1573 * the interface upon which this message was received.
1574 * Replace 0.0.0.0 with the subnet address if told to do so.
1575 */
1576 if (igmp_recvifkludge && in_nullhost(ip->ip_src)) {
1577 if (ia != NULL) {
1578 IFA_LOCK(&ia->ia_ifa);
1579 ip->ip_src.s_addr = htonl(ia->ia_subnet);
1580 IFA_UNLOCK(&ia->ia_ifa);
1581 }
1582 }
1583 if (ia != NULL) {
1584 IFA_REMREF(&ia->ia_ifa);
1585 }
1586
1587 IGMP_INET_PRINTF(igmp->igmp_group,
1588 ("process v2 report %s on ifp 0x%llx(%s)\n", _igmp_inet_buf,
1589 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1590
1591 /*
1592 * IGMPv2 report suppression.
1593 * If we are a member of this group, and our membership should be
1594 * reported, and our group timer is pending or about to be reset,
1595 * stop our group timer by transitioning to the 'lazy' state.
1596 */
1597 in_multihead_lock_shared();
1598 IN_LOOKUP_MULTI(&igmp->igmp_group, ifp, inm);
1599 in_multihead_lock_done();
1600 if (inm != NULL) {
1601 struct igmp_ifinfo *igi;
1602
1603 INM_LOCK(inm);
1604 igi = inm->inm_igi;
1605 VERIFY(igi != NULL);
1606
1607 IGMPSTAT_INC(igps_rcv_ourreports);
1608 OIGMPSTAT_INC(igps_rcv_ourreports);
1609
1610 /*
1611 * If we are in IGMPv3 host mode, do not allow the
1612 * other host's IGMPv1 report to suppress our reports
1613 * unless explicitly configured to do so.
1614 */
1615 IGI_LOCK(igi);
1616 if (igi->igi_version == IGMP_VERSION_3) {
1617 if (igmp_legacysupp) {
1618 igmp_v3_suppress_group_record(inm);
1619 }
1620 IGI_UNLOCK(igi);
1621 INM_UNLOCK(inm);
1622 INM_REMREF(inm);
1623 return 0;
1624 }
1625
1626 inm->inm_timer = 0;
1627
1628 switch (inm->inm_state) {
1629 case IGMP_NOT_MEMBER:
1630 case IGMP_SILENT_MEMBER:
1631 case IGMP_SLEEPING_MEMBER:
1632 break;
1633 case IGMP_REPORTING_MEMBER:
1634 case IGMP_IDLE_MEMBER:
1635 case IGMP_AWAKENING_MEMBER:
1636 IGMP_INET_PRINTF(igmp->igmp_group,
1637 ("report suppressed for %s on ifp 0x%llx(%s)\n",
1638 _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(ifp),
1639 if_name(ifp)));
1640 OS_FALLTHROUGH;
1641 case IGMP_LAZY_MEMBER:
1642 inm->inm_state = IGMP_LAZY_MEMBER;
1643 break;
1644 case IGMP_G_QUERY_PENDING_MEMBER:
1645 case IGMP_SG_QUERY_PENDING_MEMBER:
1646 case IGMP_LEAVING_MEMBER:
1647 break;
1648 }
1649 IGI_UNLOCK(igi);
1650 INM_UNLOCK(inm);
1651 INM_REMREF(inm);
1652 }
1653
1654 return 0;
1655 }
1656
1657 void
igmp_input(struct mbuf * m,int off)1658 igmp_input(struct mbuf *m, int off)
1659 {
1660 int iphlen;
1661 struct ifnet *ifp;
1662 struct igmp *igmp;
1663 struct ip *ip;
1664 int igmplen;
1665 int minlen;
1666 int queryver;
1667
1668 IGMP_PRINTF(("%s: called w/mbuf (0x%llx,%d)\n", __func__,
1669 (uint64_t)VM_KERNEL_ADDRPERM(m), off));
1670
1671 ifp = m->m_pkthdr.rcvif;
1672
1673 IGMPSTAT_INC(igps_rcv_total);
1674 OIGMPSTAT_INC(igps_rcv_total);
1675
1676 /* Expect 32-bit aligned data pointer on strict-align platforms */
1677 MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
1678
1679 ip = mtod(m, struct ip *);
1680 iphlen = off;
1681
1682 /* By now, ip_len no longer contains the length of IP header */
1683 igmplen = ip->ip_len;
1684
1685 /*
1686 * Validate lengths.
1687 */
1688 if (igmplen < IGMP_MINLEN) {
1689 IGMPSTAT_INC(igps_rcv_tooshort);
1690 OIGMPSTAT_INC(igps_rcv_tooshort);
1691 m_freem(m);
1692 return;
1693 }
1694
1695 /*
1696 * Always pullup to the minimum size for v1/v2 or v3
1697 * to amortize calls to m_pulldown().
1698 */
1699 if (igmplen >= IGMP_V3_QUERY_MINLEN) {
1700 minlen = IGMP_V3_QUERY_MINLEN;
1701 } else {
1702 minlen = IGMP_MINLEN;
1703 }
1704
1705 /* A bit more expensive than M_STRUCT_GET, but ensures alignment */
1706 M_STRUCT_GET0(igmp, struct igmp *, m, off, minlen);
1707 if (igmp == NULL) {
1708 IGMPSTAT_INC(igps_rcv_tooshort);
1709 OIGMPSTAT_INC(igps_rcv_tooshort);
1710 return;
1711 }
1712 /* N.B.: we assume the packet was correctly aligned in ip_input. */
1713
1714 /*
1715 * Validate checksum.
1716 */
1717 m->m_data += iphlen;
1718 m->m_len -= iphlen;
1719 if (in_cksum(m, igmplen)) {
1720 IGMPSTAT_INC(igps_rcv_badsum);
1721 OIGMPSTAT_INC(igps_rcv_badsum);
1722 m_freem(m);
1723 return;
1724 }
1725 m->m_data -= iphlen;
1726 m->m_len += iphlen;
1727
1728 /*
1729 * IGMP control traffic is link-scope, and must have a TTL of 1.
1730 * DVMRP traffic (e.g. mrinfo, mtrace) is an exception;
1731 * probe packets may come from beyond the LAN.
1732 */
1733 if (igmp->igmp_type != IGMP_DVMRP && ip->ip_ttl != 1) {
1734 IGMPSTAT_INC(igps_rcv_badttl);
1735 m_freem(m);
1736 return;
1737 }
1738
1739 switch (igmp->igmp_type) {
1740 case IGMP_HOST_MEMBERSHIP_QUERY:
1741 if (igmplen == IGMP_MINLEN) {
1742 if (igmp->igmp_code == 0) {
1743 queryver = IGMP_VERSION_1;
1744 } else {
1745 queryver = IGMP_VERSION_2;
1746 }
1747 } else if (igmplen >= IGMP_V3_QUERY_MINLEN) {
1748 queryver = IGMP_VERSION_3;
1749 } else {
1750 IGMPSTAT_INC(igps_rcv_tooshort);
1751 OIGMPSTAT_INC(igps_rcv_tooshort);
1752 m_freem(m);
1753 return;
1754 }
1755
1756 OIGMPSTAT_INC(igps_rcv_queries);
1757
1758 switch (queryver) {
1759 case IGMP_VERSION_1:
1760 IGMPSTAT_INC(igps_rcv_v1v2_queries);
1761 if (!igmp_v1enable) {
1762 break;
1763 }
1764 if (igmp_input_v1_query(ifp, ip, igmp) != 0) {
1765 m_freem(m);
1766 return;
1767 }
1768 break;
1769
1770 case IGMP_VERSION_2:
1771 IGMPSTAT_INC(igps_rcv_v1v2_queries);
1772 if (!igmp_v2enable) {
1773 break;
1774 }
1775 if (igmp_input_v2_query(ifp, ip, igmp) != 0) {
1776 m_freem(m);
1777 return;
1778 }
1779 break;
1780
1781 case IGMP_VERSION_3: {
1782 struct igmpv3 *igmpv3;
1783 uint16_t igmpv3len;
1784 uint16_t srclen;
1785 int nsrc;
1786
1787 IGMPSTAT_INC(igps_rcv_v3_queries);
1788 igmpv3 = (struct igmpv3 *)igmp;
1789 /*
1790 * Validate length based on source count.
1791 */
1792 nsrc = ntohs(igmpv3->igmp_numsrc);
1793 /*
1794 * The max vaue of nsrc is limited by the
1795 * MTU of the network on which the datagram
1796 * is received
1797 */
1798 if (nsrc < 0 || nsrc > IGMP_V3_QUERY_MAX_SRCS) {
1799 IGMPSTAT_INC(igps_rcv_tooshort);
1800 OIGMPSTAT_INC(igps_rcv_tooshort);
1801 m_freem(m);
1802 return;
1803 }
1804 srclen = sizeof(struct in_addr) * (uint16_t)nsrc;
1805 if (igmplen < (IGMP_V3_QUERY_MINLEN + srclen)) {
1806 IGMPSTAT_INC(igps_rcv_tooshort);
1807 OIGMPSTAT_INC(igps_rcv_tooshort);
1808 m_freem(m);
1809 return;
1810 }
1811 igmpv3len = IGMP_V3_QUERY_MINLEN + srclen;
1812 /*
1813 * A bit more expensive than M_STRUCT_GET,
1814 * but ensures alignment.
1815 */
1816 M_STRUCT_GET0(igmpv3, struct igmpv3 *, m,
1817 off, igmpv3len);
1818 if (igmpv3 == NULL) {
1819 IGMPSTAT_INC(igps_rcv_tooshort);
1820 OIGMPSTAT_INC(igps_rcv_tooshort);
1821 return;
1822 }
1823 /*
1824 * N.B.: we assume the packet was correctly
1825 * aligned in ip_input.
1826 */
1827 if (igmp_input_v3_query(ifp, ip, igmpv3) != 0) {
1828 m_freem(m);
1829 return;
1830 }
1831 }
1832 break;
1833 }
1834 break;
1835
1836 case IGMP_v1_HOST_MEMBERSHIP_REPORT:
1837 if (!igmp_v1enable) {
1838 break;
1839 }
1840 if (igmp_input_v1_report(ifp, m, ip, igmp) != 0) {
1841 m_freem(m);
1842 return;
1843 }
1844 break;
1845
1846 case IGMP_v2_HOST_MEMBERSHIP_REPORT:
1847 if (!igmp_v2enable) {
1848 break;
1849 }
1850 if (!ip_checkrouteralert(m)) {
1851 IGMPSTAT_INC(igps_rcv_nora);
1852 }
1853 if (igmp_input_v2_report(ifp, m, ip, igmp) != 0) {
1854 m_freem(m);
1855 return;
1856 }
1857 break;
1858
1859 case IGMP_v3_HOST_MEMBERSHIP_REPORT:
1860 /*
1861 * Hosts do not need to process IGMPv3 membership reports,
1862 * as report suppression is no longer required.
1863 */
1864 if (!ip_checkrouteralert(m)) {
1865 IGMPSTAT_INC(igps_rcv_nora);
1866 }
1867 break;
1868
1869 default:
1870 break;
1871 }
1872
1873 IGMP_LOCK_ASSERT_NOTHELD();
1874 /*
1875 * Pass all valid IGMP packets up to any process(es) listening on a
1876 * raw IGMP socket.
1877 */
1878 rip_input(m, off);
1879 }
1880
1881 /*
1882 * Schedule IGMP timer based on various parameters; caller must ensure that
1883 * lock ordering is maintained as this routine acquires IGMP global lock.
1884 */
1885 void
igmp_set_timeout(struct igmp_tparams * itp)1886 igmp_set_timeout(struct igmp_tparams *itp)
1887 {
1888 IGMP_LOCK_ASSERT_NOTHELD();
1889 VERIFY(itp != NULL);
1890
1891 if (itp->qpt != 0 || itp->it != 0 || itp->cst != 0 || itp->sct != 0) {
1892 IGMP_LOCK();
1893 if (itp->qpt != 0) {
1894 querier_present_timers_running = 1;
1895 }
1896 if (itp->it != 0) {
1897 interface_timers_running = 1;
1898 }
1899 if (itp->cst != 0) {
1900 current_state_timers_running = 1;
1901 }
1902 if (itp->sct != 0) {
1903 state_change_timers_running = 1;
1904 }
1905 if (itp->fast) {
1906 igmp_sched_fast_timeout();
1907 } else {
1908 igmp_sched_timeout();
1909 }
1910 IGMP_UNLOCK();
1911 }
1912 }
1913
1914 void
igmp_set_fast_timeout(struct igmp_tparams * itp)1915 igmp_set_fast_timeout(struct igmp_tparams *itp)
1916 {
1917 VERIFY(itp != NULL);
1918 itp->fast = true;
1919 igmp_set_timeout(itp);
1920 }
1921
1922 /*
1923 * IGMP timer handler (per 1 second).
1924 */
1925 static void
igmp_timeout(thread_call_param_t arg0,thread_call_param_t arg1 __unused)1926 igmp_timeout(thread_call_param_t arg0, thread_call_param_t arg1 __unused)
1927 {
1928 struct ifqueue scq; /* State-change packets */
1929 struct ifqueue qrq; /* Query response packets */
1930 struct ifnet *ifp;
1931 struct igmp_ifinfo *igi;
1932 struct in_multi *inm;
1933 unsigned int loop = 0, uri_sec = 0;
1934 SLIST_HEAD(, in_multi) inm_dthead;
1935 bool fast = arg0 != NULL;
1936
1937 SLIST_INIT(&inm_dthead);
1938
1939 /*
1940 * Update coarse-grained networking timestamp (in sec.); the idea
1941 * is to piggy-back on the timeout callout to update the counter
1942 * returnable via net_uptime().
1943 */
1944 net_update_uptime();
1945
1946 IGMP_LOCK();
1947
1948 IGMP_PRINTF(("%s: qpt %d, it %d, cst %d, sct %d, fast %d\n", __func__,
1949 querier_present_timers_running, interface_timers_running,
1950 current_state_timers_running, state_change_timers_running,
1951 fast));
1952
1953 if (fast) {
1954 /*
1955 * When running the fast timer, skip processing
1956 * of "querier present" timers since they are
1957 * based on 1-second intervals.
1958 */
1959 goto skip_query_timers;
1960 }
1961 /*
1962 * IGMPv1/v2 querier present timer processing.
1963 */
1964 if (querier_present_timers_running) {
1965 querier_present_timers_running = 0;
1966 LIST_FOREACH(igi, &igi_head, igi_link) {
1967 IGI_LOCK(igi);
1968 igmp_v1v2_process_querier_timers(igi);
1969 if (igi->igi_v1_timer > 0 || igi->igi_v2_timer > 0) {
1970 querier_present_timers_running = 1;
1971 }
1972 IGI_UNLOCK(igi);
1973 }
1974 }
1975
1976 /*
1977 * IGMPv3 General Query response timer processing.
1978 */
1979 if (interface_timers_running) {
1980 IGMP_PRINTF(("%s: interface timers running\n", __func__));
1981 interface_timers_running = 0;
1982 LIST_FOREACH(igi, &igi_head, igi_link) {
1983 IGI_LOCK(igi);
1984 if (igi->igi_version != IGMP_VERSION_3) {
1985 IGI_UNLOCK(igi);
1986 continue;
1987 }
1988 if (igi->igi_v3_timer == 0) {
1989 /* Do nothing. */
1990 } else if (--igi->igi_v3_timer == 0) {
1991 if (igmp_v3_dispatch_general_query(igi) > 0) {
1992 interface_timers_running = 1;
1993 }
1994 } else {
1995 interface_timers_running = 1;
1996 }
1997 IGI_UNLOCK(igi);
1998 }
1999 }
2000
2001 skip_query_timers:
2002 if (!current_state_timers_running &&
2003 !state_change_timers_running) {
2004 goto out_locked;
2005 }
2006
2007 current_state_timers_running = 0;
2008 state_change_timers_running = 0;
2009
2010 memset(&qrq, 0, sizeof(struct ifqueue));
2011 qrq.ifq_maxlen = IGMP_MAX_G_GS_PACKETS;
2012
2013 memset(&scq, 0, sizeof(struct ifqueue));
2014 scq.ifq_maxlen = IGMP_MAX_STATE_CHANGE_PACKETS;
2015
2016 IGMP_PRINTF(("%s: state change timers running\n", __func__));
2017
2018 /*
2019 * IGMPv1/v2/v3 host report and state-change timer processing.
2020 * Note: Processing a v3 group timer may remove a node.
2021 */
2022 LIST_FOREACH(igi, &igi_head, igi_link) {
2023 struct in_multistep step;
2024
2025 IGI_LOCK(igi);
2026 ifp = igi->igi_ifp;
2027 loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
2028 uri_sec = IGMP_RANDOM_DELAY(igi->igi_uri);
2029 IGI_UNLOCK(igi);
2030
2031 in_multihead_lock_shared();
2032 IN_FIRST_MULTI(step, inm);
2033 while (inm != NULL) {
2034 INM_LOCK(inm);
2035 if (inm->inm_ifp != ifp) {
2036 goto next;
2037 }
2038
2039 IGI_LOCK(igi);
2040 switch (igi->igi_version) {
2041 case IGMP_VERSION_1:
2042 case IGMP_VERSION_2:
2043 igmp_v1v2_process_group_timer(inm,
2044 igi->igi_version);
2045 break;
2046 case IGMP_VERSION_3:
2047 igmp_v3_process_group_timers(igi, &qrq,
2048 &scq, inm, uri_sec);
2049 break;
2050 }
2051 IGI_UNLOCK(igi);
2052 next:
2053 INM_UNLOCK(inm);
2054 IN_NEXT_MULTI(step, inm);
2055 }
2056 in_multihead_lock_done();
2057
2058 IGI_LOCK(igi);
2059 if (igi->igi_version == IGMP_VERSION_1 ||
2060 igi->igi_version == IGMP_VERSION_2) {
2061 igmp_dispatch_queue(igi, &igi->igi_v2q, 0, loop);
2062 } else if (igi->igi_version == IGMP_VERSION_3) {
2063 IGI_UNLOCK(igi);
2064 igmp_dispatch_queue(NULL, &qrq, 0, loop);
2065 igmp_dispatch_queue(NULL, &scq, 0, loop);
2066 VERIFY(qrq.ifq_len == 0);
2067 VERIFY(scq.ifq_len == 0);
2068 IGI_LOCK(igi);
2069 }
2070 /*
2071 * In case there are still any pending membership reports
2072 * which didn't get drained at version change time.
2073 */
2074 IF_DRAIN(&igi->igi_v2q);
2075 /*
2076 * Release all deferred inm records, and drain any locally
2077 * enqueued packets; do it even if the current IGMP version
2078 * for the link is no longer IGMPv3, in order to handle the
2079 * version change case.
2080 */
2081 igmp_flush_relq(igi, (struct igmp_inm_relhead *)&inm_dthead);
2082 VERIFY(SLIST_EMPTY(&igi->igi_relinmhead));
2083 IGI_UNLOCK(igi);
2084
2085 IF_DRAIN(&qrq);
2086 IF_DRAIN(&scq);
2087 }
2088
2089 out_locked:
2090 /* re-arm the timer if there's work to do */
2091 if (fast) {
2092 igmp_fast_timeout_run = false;
2093 } else {
2094 igmp_timeout_run = false;
2095 }
2096 igmp_sched_timeout();
2097 IGMP_UNLOCK();
2098
2099 /* Now that we're dropped all locks, release detached records */
2100 IGMP_REMOVE_DETACHED_INM(&inm_dthead);
2101 }
2102
2103 static void
igmp_sched_timeout(void)2104 igmp_sched_timeout(void)
2105 {
2106 static thread_call_t igmp_timeout_tcall;
2107 uint64_t deadline = 0, leeway = 0;
2108
2109 IGMP_LOCK_ASSERT_HELD();
2110 if (igmp_timeout_tcall == NULL) {
2111 igmp_timeout_tcall =
2112 thread_call_allocate_with_options(igmp_timeout,
2113 NULL,
2114 THREAD_CALL_PRIORITY_KERNEL,
2115 THREAD_CALL_OPTIONS_ONCE);
2116 }
2117 if (!igmp_timeout_run &&
2118 (querier_present_timers_running || current_state_timers_running ||
2119 interface_timers_running || state_change_timers_running)) {
2120 igmp_timeout_run = true;
2121 clock_interval_to_deadline(igmp_timeout_delay, NSEC_PER_MSEC,
2122 &deadline);
2123 clock_interval_to_absolutetime_interval(igmp_timeout_leeway,
2124 NSEC_PER_MSEC, &leeway);
2125 thread_call_enter_delayed_with_leeway(igmp_timeout_tcall, NULL,
2126 deadline, leeway,
2127 THREAD_CALL_DELAY_LEEWAY);
2128 }
2129 }
2130
2131 static void
igmp_sched_fast_timeout(void)2132 igmp_sched_fast_timeout(void)
2133 {
2134 static thread_call_t igmp_fast_timeout_tcall;
2135
2136 IGMP_LOCK_ASSERT_HELD();
2137 if (igmp_fast_timeout_tcall == NULL) {
2138 igmp_fast_timeout_tcall =
2139 thread_call_allocate_with_options(igmp_timeout,
2140 igmp_sched_fast_timeout,
2141 THREAD_CALL_PRIORITY_KERNEL,
2142 THREAD_CALL_OPTIONS_ONCE);
2143 }
2144 if (!igmp_fast_timeout_run &&
2145 (current_state_timers_running || state_change_timers_running)) {
2146 igmp_fast_timeout_run = true;
2147 thread_call_enter(igmp_fast_timeout_tcall);
2148 }
2149 }
2150
2151 /*
2152 * Free the in_multi reference(s) for this IGMP lifecycle.
2153 *
2154 * Caller must be holding igi_lock.
2155 */
2156 static void
igmp_flush_relq(struct igmp_ifinfo * igi,struct igmp_inm_relhead * inm_dthead)2157 igmp_flush_relq(struct igmp_ifinfo *igi, struct igmp_inm_relhead *inm_dthead)
2158 {
2159 struct in_multi *inm;
2160
2161 again:
2162 IGI_LOCK_ASSERT_HELD(igi);
2163 inm = SLIST_FIRST(&igi->igi_relinmhead);
2164 if (inm != NULL) {
2165 int lastref;
2166
2167 SLIST_REMOVE_HEAD(&igi->igi_relinmhead, inm_nrele);
2168 IGI_UNLOCK(igi);
2169
2170 in_multihead_lock_exclusive();
2171 INM_LOCK(inm);
2172 VERIFY(inm->inm_nrelecnt != 0);
2173 inm->inm_nrelecnt--;
2174 lastref = in_multi_detach(inm);
2175 VERIFY(!lastref || (!(inm->inm_debug & IFD_ATTACHED) &&
2176 inm->inm_reqcnt == 0));
2177 INM_UNLOCK(inm);
2178 in_multihead_lock_done();
2179 /* from igi_relinmhead */
2180 INM_REMREF(inm);
2181 /* from in_multihead list */
2182 if (lastref) {
2183 /*
2184 * Defer releasing our final reference, as we
2185 * are holding the IGMP lock at this point, and
2186 * we could end up with locking issues later on
2187 * (while issuing SIOCDELMULTI) when this is the
2188 * final reference count. Let the caller do it
2189 * when it is safe.
2190 */
2191 IGMP_ADD_DETACHED_INM(inm_dthead, inm);
2192 }
2193 IGI_LOCK(igi);
2194 goto again;
2195 }
2196 }
2197
2198 /*
2199 * Update host report group timer for IGMPv1/v2.
2200 * Will update the global pending timer flags.
2201 */
2202 static void
igmp_v1v2_process_group_timer(struct in_multi * inm,const int igmp_version)2203 igmp_v1v2_process_group_timer(struct in_multi *inm, const int igmp_version)
2204 {
2205 int report_timer_expired;
2206
2207 IGMP_LOCK_ASSERT_HELD();
2208 INM_LOCK_ASSERT_HELD(inm);
2209 IGI_LOCK_ASSERT_HELD(inm->inm_igi);
2210
2211 if (inm->inm_timer == 0) {
2212 report_timer_expired = 0;
2213 } else if (--inm->inm_timer == 0) {
2214 report_timer_expired = 1;
2215 } else {
2216 current_state_timers_running = 1;
2217 /* caller will schedule timer */
2218 return;
2219 }
2220
2221 switch (inm->inm_state) {
2222 case IGMP_NOT_MEMBER:
2223 case IGMP_SILENT_MEMBER:
2224 case IGMP_IDLE_MEMBER:
2225 case IGMP_LAZY_MEMBER:
2226 case IGMP_SLEEPING_MEMBER:
2227 case IGMP_AWAKENING_MEMBER:
2228 break;
2229 case IGMP_REPORTING_MEMBER:
2230 if (report_timer_expired) {
2231 inm->inm_state = IGMP_IDLE_MEMBER;
2232 (void) igmp_v1v2_queue_report(inm,
2233 (igmp_version == IGMP_VERSION_2) ?
2234 IGMP_v2_HOST_MEMBERSHIP_REPORT :
2235 IGMP_v1_HOST_MEMBERSHIP_REPORT);
2236 INM_LOCK_ASSERT_HELD(inm);
2237 IGI_LOCK_ASSERT_HELD(inm->inm_igi);
2238 }
2239 break;
2240 case IGMP_G_QUERY_PENDING_MEMBER:
2241 case IGMP_SG_QUERY_PENDING_MEMBER:
2242 case IGMP_LEAVING_MEMBER:
2243 break;
2244 }
2245 }
2246
2247 /*
2248 * Update a group's timers for IGMPv3.
2249 * Will update the global pending timer flags.
2250 * Note: Unlocked read from igi.
2251 */
2252 static void
igmp_v3_process_group_timers(struct igmp_ifinfo * igi,struct ifqueue * qrq,struct ifqueue * scq,struct in_multi * inm,const unsigned int uri_sec)2253 igmp_v3_process_group_timers(struct igmp_ifinfo *igi,
2254 struct ifqueue *qrq, struct ifqueue *scq,
2255 struct in_multi *inm, const unsigned int uri_sec)
2256 {
2257 int query_response_timer_expired;
2258 int state_change_retransmit_timer_expired;
2259
2260 IGMP_LOCK_ASSERT_HELD();
2261 INM_LOCK_ASSERT_HELD(inm);
2262 IGI_LOCK_ASSERT_HELD(igi);
2263 VERIFY(igi == inm->inm_igi);
2264
2265 query_response_timer_expired = 0;
2266 state_change_retransmit_timer_expired = 0;
2267
2268 /*
2269 * During a transition from v1/v2 compatibility mode back to v3,
2270 * a group record in REPORTING state may still have its group
2271 * timer active. This is a no-op in this function; it is easier
2272 * to deal with it here than to complicate the timeout path.
2273 */
2274 if (inm->inm_timer == 0) {
2275 query_response_timer_expired = 0;
2276 } else if (--inm->inm_timer == 0) {
2277 query_response_timer_expired = 1;
2278 } else {
2279 current_state_timers_running = 1;
2280 /* caller will schedule timer */
2281 }
2282
2283 if (inm->inm_sctimer == 0) {
2284 state_change_retransmit_timer_expired = 0;
2285 } else if (--inm->inm_sctimer == 0) {
2286 state_change_retransmit_timer_expired = 1;
2287 } else {
2288 state_change_timers_running = 1;
2289 /* caller will schedule timer */
2290 }
2291
2292 /* We are in timer callback, so be quick about it. */
2293 if (!state_change_retransmit_timer_expired &&
2294 !query_response_timer_expired) {
2295 return;
2296 }
2297
2298 switch (inm->inm_state) {
2299 case IGMP_NOT_MEMBER:
2300 case IGMP_SILENT_MEMBER:
2301 case IGMP_SLEEPING_MEMBER:
2302 case IGMP_LAZY_MEMBER:
2303 case IGMP_AWAKENING_MEMBER:
2304 case IGMP_IDLE_MEMBER:
2305 break;
2306 case IGMP_G_QUERY_PENDING_MEMBER:
2307 case IGMP_SG_QUERY_PENDING_MEMBER:
2308 /*
2309 * Respond to a previously pending Group-Specific
2310 * or Group-and-Source-Specific query by enqueueing
2311 * the appropriate Current-State report for
2312 * immediate transmission.
2313 */
2314 if (query_response_timer_expired) {
2315 int retval;
2316
2317 retval = igmp_v3_enqueue_group_record(qrq, inm, 0, 1,
2318 (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER));
2319 IGMP_PRINTF(("%s: enqueue record = %d\n",
2320 __func__, retval));
2321 inm->inm_state = IGMP_REPORTING_MEMBER;
2322 /* XXX Clear recorded sources for next time. */
2323 inm_clear_recorded(inm);
2324 }
2325 OS_FALLTHROUGH;
2326 case IGMP_REPORTING_MEMBER:
2327 case IGMP_LEAVING_MEMBER:
2328 if (state_change_retransmit_timer_expired) {
2329 /*
2330 * State-change retransmission timer fired.
2331 * If there are any further pending retransmissions,
2332 * set the global pending state-change flag, and
2333 * reset the timer.
2334 */
2335 if (--inm->inm_scrv > 0) {
2336 inm->inm_sctimer = (uint16_t)uri_sec;
2337 state_change_timers_running = 1;
2338 /* caller will schedule timer */
2339 }
2340 /*
2341 * Retransmit the previously computed state-change
2342 * report. If there are no further pending
2343 * retransmissions, the mbuf queue will be consumed.
2344 * Update T0 state to T1 as we have now sent
2345 * a state-change.
2346 */
2347 (void) igmp_v3_merge_state_changes(inm, scq);
2348
2349 inm_commit(inm);
2350 IGMP_INET_PRINTF(inm->inm_addr,
2351 ("%s: T1 -> T0 for %s/%s\n", __func__,
2352 _igmp_inet_buf, if_name(inm->inm_ifp)));
2353
2354 /*
2355 * If we are leaving the group for good, make sure
2356 * we release IGMP's reference to it.
2357 * This release must be deferred using a SLIST,
2358 * as we are called from a loop which traverses
2359 * the in_multihead list.
2360 */
2361 if (inm->inm_state == IGMP_LEAVING_MEMBER &&
2362 inm->inm_scrv == 0) {
2363 inm->inm_state = IGMP_NOT_MEMBER;
2364 /*
2365 * A reference has already been held in
2366 * igmp_final_leave() for this inm, so
2367 * no need to hold another one. We also
2368 * bumped up its request count then, so
2369 * that it stays in in_multihead. Both
2370 * of them will be released when it is
2371 * dequeued later on.
2372 */
2373 VERIFY(inm->inm_nrelecnt != 0);
2374 SLIST_INSERT_HEAD(&igi->igi_relinmhead,
2375 inm, inm_nrele);
2376 }
2377 }
2378 break;
2379 }
2380 }
2381
2382 /*
2383 * Suppress a group's pending response to a group or source/group query.
2384 *
2385 * Do NOT suppress state changes. This leads to IGMPv3 inconsistency.
2386 * Do NOT update ST1/ST0 as this operation merely suppresses
2387 * the currently pending group record.
2388 * Do NOT suppress the response to a general query. It is possible but
2389 * it would require adding another state or flag.
2390 */
2391 static void
igmp_v3_suppress_group_record(struct in_multi * inm)2392 igmp_v3_suppress_group_record(struct in_multi *inm)
2393 {
2394 INM_LOCK_ASSERT_HELD(inm);
2395 IGI_LOCK_ASSERT_HELD(inm->inm_igi);
2396
2397 VERIFY(inm->inm_igi->igi_version == IGMP_VERSION_3);
2398
2399 if (inm->inm_state != IGMP_G_QUERY_PENDING_MEMBER &&
2400 inm->inm_state != IGMP_SG_QUERY_PENDING_MEMBER) {
2401 return;
2402 }
2403
2404 if (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) {
2405 inm_clear_recorded(inm);
2406 }
2407
2408 inm->inm_timer = 0;
2409 inm->inm_state = IGMP_REPORTING_MEMBER;
2410 }
2411
2412 /*
2413 * Switch to a different IGMP version on the given interface,
2414 * as per Section 7.2.1.
2415 */
2416 static uint32_t
igmp_set_version(struct igmp_ifinfo * igi,const int igmp_version)2417 igmp_set_version(struct igmp_ifinfo *igi, const int igmp_version)
2418 {
2419 int old_version_timer;
2420
2421 IGI_LOCK_ASSERT_HELD(igi);
2422
2423 IGMP_PRINTF(("%s: switching to v%d on ifp 0x%llx(%s)\n", __func__,
2424 igmp_version, (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2425 if_name(igi->igi_ifp)));
2426
2427 if (igmp_version == IGMP_VERSION_1 || igmp_version == IGMP_VERSION_2) {
2428 /*
2429 * Compute the "Older Version Querier Present" timer as per
2430 * Section 8.12, in seconds.
2431 */
2432 old_version_timer = igi->igi_rv * igi->igi_qi + igi->igi_qri;
2433
2434 if (igmp_version == IGMP_VERSION_1) {
2435 igi->igi_v1_timer = old_version_timer;
2436 igi->igi_v2_timer = 0;
2437 } else if (igmp_version == IGMP_VERSION_2) {
2438 igi->igi_v1_timer = 0;
2439 igi->igi_v2_timer = old_version_timer;
2440 }
2441 }
2442
2443 if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) {
2444 if (igi->igi_version != IGMP_VERSION_2) {
2445 igmp_v3_cancel_link_timers(igi);
2446 igi->igi_version = IGMP_VERSION_2;
2447 }
2448 } else if (igi->igi_v1_timer > 0) {
2449 if (igi->igi_version != IGMP_VERSION_1) {
2450 igmp_v3_cancel_link_timers(igi);
2451 igi->igi_version = IGMP_VERSION_1;
2452 }
2453 }
2454
2455 IGI_LOCK_ASSERT_HELD(igi);
2456
2457 return MAX(igi->igi_v1_timer, igi->igi_v2_timer);
2458 }
2459
2460 /*
2461 * Cancel pending IGMPv3 timers for the given link and all groups
2462 * joined on it; state-change, general-query, and group-query timers.
2463 *
2464 * Only ever called on a transition from v3 to Compatibility mode. Kill
2465 * the timers stone dead (this may be expensive for large N groups), they
2466 * will be restarted if Compatibility Mode deems that they must be due to
2467 * query processing.
2468 */
2469 static void
igmp_v3_cancel_link_timers(struct igmp_ifinfo * igi)2470 igmp_v3_cancel_link_timers(struct igmp_ifinfo *igi)
2471 {
2472 struct ifnet *ifp;
2473 struct in_multi *inm;
2474 struct in_multistep step;
2475
2476 IGI_LOCK_ASSERT_HELD(igi);
2477
2478 IGMP_PRINTF(("%s: cancel v3 timers on ifp 0x%llx(%s)\n", __func__,
2479 (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp), if_name(igi->igi_ifp)));
2480
2481 /*
2482 * Stop the v3 General Query Response on this link stone dead.
2483 * If timer is woken up due to interface_timers_running,
2484 * the flag will be cleared if there are no pending link timers.
2485 */
2486 igi->igi_v3_timer = 0;
2487
2488 /*
2489 * Now clear the current-state and state-change report timers
2490 * for all memberships scoped to this link.
2491 */
2492 ifp = igi->igi_ifp;
2493 IGI_UNLOCK(igi);
2494
2495 in_multihead_lock_shared();
2496 IN_FIRST_MULTI(step, inm);
2497 while (inm != NULL) {
2498 INM_LOCK(inm);
2499 if (inm->inm_ifp != ifp && inm->inm_igi != igi) {
2500 goto next;
2501 }
2502
2503 switch (inm->inm_state) {
2504 case IGMP_NOT_MEMBER:
2505 case IGMP_SILENT_MEMBER:
2506 case IGMP_IDLE_MEMBER:
2507 case IGMP_LAZY_MEMBER:
2508 case IGMP_SLEEPING_MEMBER:
2509 case IGMP_AWAKENING_MEMBER:
2510 /*
2511 * These states are either not relevant in v3 mode,
2512 * or are unreported. Do nothing.
2513 */
2514 break;
2515 case IGMP_LEAVING_MEMBER:
2516 /*
2517 * If we are leaving the group and switching to
2518 * compatibility mode, we need to release the final
2519 * reference held for issuing the INCLUDE {}, and
2520 * transition to REPORTING to ensure the host leave
2521 * message is sent upstream to the old querier --
2522 * transition to NOT would lose the leave and race.
2523 * During igmp_final_leave(), we bumped up both the
2524 * request and reference counts. Since we cannot
2525 * call in_multi_detach() here, defer this task to
2526 * the timer routine.
2527 */
2528 VERIFY(inm->inm_nrelecnt != 0);
2529 IGI_LOCK(igi);
2530 SLIST_INSERT_HEAD(&igi->igi_relinmhead, inm, inm_nrele);
2531 IGI_UNLOCK(igi);
2532 OS_FALLTHROUGH;
2533 case IGMP_G_QUERY_PENDING_MEMBER:
2534 case IGMP_SG_QUERY_PENDING_MEMBER:
2535 inm_clear_recorded(inm);
2536 OS_FALLTHROUGH;
2537 case IGMP_REPORTING_MEMBER:
2538 inm->inm_state = IGMP_REPORTING_MEMBER;
2539 break;
2540 }
2541 /*
2542 * Always clear state-change and group report timers.
2543 * Free any pending IGMPv3 state-change records.
2544 */
2545 inm->inm_sctimer = 0;
2546 inm->inm_timer = 0;
2547 IF_DRAIN(&inm->inm_scq);
2548 next:
2549 INM_UNLOCK(inm);
2550 IN_NEXT_MULTI(step, inm);
2551 }
2552 in_multihead_lock_done();
2553
2554 IGI_LOCK(igi);
2555 }
2556
2557 /*
2558 * Update the Older Version Querier Present timers for a link.
2559 * See Section 7.2.1 of RFC 3376.
2560 */
2561 static void
igmp_v1v2_process_querier_timers(struct igmp_ifinfo * igi)2562 igmp_v1v2_process_querier_timers(struct igmp_ifinfo *igi)
2563 {
2564 IGI_LOCK_ASSERT_HELD(igi);
2565
2566 if (igi->igi_v1_timer == 0 && igi->igi_v2_timer == 0) {
2567 /*
2568 * IGMPv1 and IGMPv2 Querier Present timers expired.
2569 *
2570 * Revert to IGMPv3.
2571 */
2572 if (igi->igi_version != IGMP_VERSION_3) {
2573 IGMP_PRINTF(("%s: transition from v%d -> v%d "
2574 "on 0x%llx(%s)\n", __func__,
2575 igi->igi_version, IGMP_VERSION_3,
2576 (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2577 if_name(igi->igi_ifp)));
2578 igi->igi_version = IGMP_VERSION_3;
2579 IF_DRAIN(&igi->igi_v2q);
2580 }
2581 } else if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) {
2582 /*
2583 * IGMPv1 Querier Present timer expired,
2584 * IGMPv2 Querier Present timer running.
2585 * If IGMPv2 was disabled since last timeout,
2586 * revert to IGMPv3.
2587 * If IGMPv2 is enabled, revert to IGMPv2.
2588 */
2589 if (!igmp_v2enable) {
2590 IGMP_PRINTF(("%s: transition from v%d -> v%d "
2591 "on 0x%llx(%s%d)\n", __func__,
2592 igi->igi_version, IGMP_VERSION_3,
2593 (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2594 igi->igi_ifp->if_name, igi->igi_ifp->if_unit));
2595 igi->igi_v2_timer = 0;
2596 igi->igi_version = IGMP_VERSION_3;
2597 IF_DRAIN(&igi->igi_v2q);
2598 } else {
2599 --igi->igi_v2_timer;
2600 if (igi->igi_version != IGMP_VERSION_2) {
2601 IGMP_PRINTF(("%s: transition from v%d -> v%d "
2602 "on 0x%llx(%s)\n", __func__,
2603 igi->igi_version, IGMP_VERSION_2,
2604 (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2605 if_name(igi->igi_ifp)));
2606 IF_DRAIN(&igi->igi_gq);
2607 igmp_v3_cancel_link_timers(igi);
2608 igi->igi_version = IGMP_VERSION_2;
2609 }
2610 }
2611 } else if (igi->igi_v1_timer > 0) {
2612 /*
2613 * IGMPv1 Querier Present timer running.
2614 * Stop IGMPv2 timer if running.
2615 *
2616 * If IGMPv1 was disabled since last timeout,
2617 * revert to IGMPv3.
2618 * If IGMPv1 is enabled, reset IGMPv2 timer if running.
2619 */
2620 if (!igmp_v1enable) {
2621 IGMP_PRINTF(("%s: transition from v%d -> v%d "
2622 "on 0x%llx(%s%d)\n", __func__,
2623 igi->igi_version, IGMP_VERSION_3,
2624 (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2625 igi->igi_ifp->if_name, igi->igi_ifp->if_unit));
2626 igi->igi_v1_timer = 0;
2627 igi->igi_version = IGMP_VERSION_3;
2628 IF_DRAIN(&igi->igi_v2q);
2629 } else {
2630 --igi->igi_v1_timer;
2631 }
2632 if (igi->igi_v2_timer > 0) {
2633 IGMP_PRINTF(("%s: cancel v2 timer on 0x%llx(%s%d)\n",
2634 __func__,
2635 (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp),
2636 igi->igi_ifp->if_name, igi->igi_ifp->if_unit));
2637 igi->igi_v2_timer = 0;
2638 }
2639 }
2640 }
2641
2642 /*
2643 * Dispatch an IGMPv1/v2 host report or leave message.
2644 * These are always small enough to fit inside a single mbuf.
2645 */
2646 static int
igmp_v1v2_queue_report(struct in_multi * inm,const int type)2647 igmp_v1v2_queue_report(struct in_multi *inm, const int type)
2648 {
2649 struct ifnet *ifp;
2650 struct igmp *igmp;
2651 struct ip *ip;
2652 struct mbuf *m;
2653 int error = 0;
2654
2655 INM_LOCK_ASSERT_HELD(inm);
2656 IGI_LOCK_ASSERT_HELD(inm->inm_igi);
2657
2658 ifp = inm->inm_ifp;
2659
2660 MGETHDR(m, M_DONTWAIT, MT_DATA);
2661 if (m == NULL) {
2662 return ENOMEM;
2663 }
2664 MH_ALIGN(m, sizeof(struct ip) + sizeof(struct igmp));
2665
2666 m->m_pkthdr.len = sizeof(struct ip) + sizeof(struct igmp);
2667
2668 m->m_data += sizeof(struct ip);
2669 m->m_len = sizeof(struct igmp);
2670
2671 igmp = mtod(m, struct igmp *);
2672 igmp->igmp_type = (u_char)type;
2673 igmp->igmp_code = 0;
2674 igmp->igmp_group = inm->inm_addr;
2675 igmp->igmp_cksum = 0;
2676 igmp->igmp_cksum = in_cksum(m, sizeof(struct igmp));
2677
2678 m->m_data -= sizeof(struct ip);
2679 m->m_len += sizeof(struct ip);
2680
2681 ip = mtod(m, struct ip *);
2682 ip->ip_tos = 0;
2683 ip->ip_len = sizeof(struct ip) + sizeof(struct igmp);
2684 ip->ip_off = 0;
2685 ip->ip_p = IPPROTO_IGMP;
2686 ip->ip_src.s_addr = INADDR_ANY;
2687
2688 if (type == IGMP_HOST_LEAVE_MESSAGE) {
2689 ip->ip_dst.s_addr = htonl(INADDR_ALLRTRS_GROUP);
2690 } else {
2691 ip->ip_dst = inm->inm_addr;
2692 }
2693
2694 igmp_save_context(m, ifp);
2695
2696 m->m_flags |= M_IGMPV2;
2697 if (inm->inm_igi->igi_flags & IGIF_LOOPBACK) {
2698 m->m_flags |= M_IGMP_LOOP;
2699 }
2700
2701 /*
2702 * Due to the fact that at this point we are possibly holding
2703 * in_multihead_lock in shared or exclusive mode, we can't call
2704 * igmp_sendpkt() here since that will eventually call ip_output(),
2705 * which will try to lock in_multihead_lock and cause a deadlock.
2706 * Instead we defer the work to the igmp_timeout() thread, thus
2707 * avoiding unlocking in_multihead_lock here.
2708 */
2709 if (IF_QFULL(&inm->inm_igi->igi_v2q)) {
2710 IGMP_PRINTF(("%s: v1/v2 outbound queue full\n", __func__));
2711 error = ENOMEM;
2712 m_freem(m);
2713 } else {
2714 IF_ENQUEUE(&inm->inm_igi->igi_v2q, m);
2715 VERIFY(error == 0);
2716 }
2717 return error;
2718 }
2719
2720 /*
2721 * Process a state change from the upper layer for the given IPv4 group.
2722 *
2723 * Each socket holds a reference on the in_multi in its own ip_moptions.
2724 * The socket layer will have made the necessary updates to the group
2725 * state, it is now up to IGMP to issue a state change report if there
2726 * has been any change between T0 (when the last state-change was issued)
2727 * and T1 (now).
2728 *
2729 * We use the IGMPv3 state machine at group level. The IGMP module
2730 * however makes the decision as to which IGMP protocol version to speak.
2731 * A state change *from* INCLUDE {} always means an initial join.
2732 * A state change *to* INCLUDE {} always means a final leave.
2733 *
2734 * FUTURE: If IGIF_V3LITE is enabled for this interface, then we can
2735 * save ourselves a bunch of work; any exclusive mode groups need not
2736 * compute source filter lists.
2737 */
2738 int
igmp_change_state(struct in_multi * inm,struct igmp_tparams * itp)2739 igmp_change_state(struct in_multi *inm, struct igmp_tparams *itp)
2740 {
2741 struct igmp_ifinfo *igi;
2742 struct ifnet *ifp;
2743 int error = 0;
2744
2745 VERIFY(itp != NULL);
2746 bzero(itp, sizeof(*itp));
2747
2748 INM_LOCK_ASSERT_HELD(inm);
2749 VERIFY(inm->inm_igi != NULL);
2750 IGI_LOCK_ASSERT_NOTHELD(inm->inm_igi);
2751
2752 /*
2753 * Try to detect if the upper layer just asked us to change state
2754 * for an interface which has now gone away.
2755 */
2756 VERIFY(inm->inm_ifma != NULL);
2757 ifp = inm->inm_ifma->ifma_ifp;
2758 /*
2759 * Sanity check that netinet's notion of ifp is the same as net's.
2760 */
2761 VERIFY(inm->inm_ifp == ifp);
2762
2763 igi = IGMP_IFINFO(ifp);
2764 VERIFY(igi != NULL);
2765
2766 /*
2767 * If we detect a state transition to or from MCAST_UNDEFINED
2768 * for this group, then we are starting or finishing an IGMP
2769 * life cycle for this group.
2770 */
2771 if (inm->inm_st[1].iss_fmode != inm->inm_st[0].iss_fmode) {
2772 IGMP_PRINTF(("%s: inm transition %d -> %d\n", __func__,
2773 inm->inm_st[0].iss_fmode, inm->inm_st[1].iss_fmode));
2774 if (inm->inm_st[0].iss_fmode == MCAST_UNDEFINED) {
2775 IGMP_PRINTF(("%s: initial join\n", __func__));
2776 error = igmp_initial_join(inm, igi, itp);
2777 goto out;
2778 } else if (inm->inm_st[1].iss_fmode == MCAST_UNDEFINED) {
2779 IGMP_PRINTF(("%s: final leave\n", __func__));
2780 igmp_final_leave(inm, igi, itp);
2781 goto out;
2782 }
2783 } else {
2784 IGMP_PRINTF(("%s: filter set change\n", __func__));
2785 }
2786
2787 error = igmp_handle_state_change(inm, igi, itp);
2788 out:
2789 return error;
2790 }
2791
2792 /*
2793 * Perform the initial join for an IGMP group.
2794 *
2795 * When joining a group:
2796 * If the group should have its IGMP traffic suppressed, do nothing.
2797 * IGMPv1 starts sending IGMPv1 host membership reports.
2798 * IGMPv2 starts sending IGMPv2 host membership reports.
2799 * IGMPv3 will schedule an IGMPv3 state-change report containing the
2800 * initial state of the membership.
2801 */
2802 static int
igmp_initial_join(struct in_multi * inm,struct igmp_ifinfo * igi,struct igmp_tparams * itp)2803 igmp_initial_join(struct in_multi *inm, struct igmp_ifinfo *igi,
2804 struct igmp_tparams *itp)
2805 {
2806 struct ifnet *ifp;
2807 struct ifqueue *ifq;
2808 int error, retval, syncstates;
2809
2810 INM_LOCK_ASSERT_HELD(inm);
2811 IGI_LOCK_ASSERT_NOTHELD(igi);
2812 VERIFY(itp != NULL);
2813
2814 IGMP_INET_PRINTF(inm->inm_addr,
2815 ("%s: initial join %s on ifp 0x%llx(%s)\n", __func__,
2816 _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(inm->inm_ifp),
2817 if_name(inm->inm_ifp)));
2818
2819 error = 0;
2820 syncstates = 1;
2821
2822 ifp = inm->inm_ifp;
2823
2824 IGI_LOCK(igi);
2825 VERIFY(igi->igi_ifp == ifp);
2826
2827 /*
2828 * Groups joined on loopback or marked as 'not reported',
2829 * e.g. 224.0.0.1, enter the IGMP_SILENT_MEMBER state and
2830 * are never reported in any IGMP protocol exchanges.
2831 * All other groups enter the appropriate IGMP state machine
2832 * for the version in use on this link.
2833 * A link marked as IGIF_SILENT causes IGMP to be completely
2834 * disabled for the link.
2835 */
2836 if ((ifp->if_flags & IFF_LOOPBACK) ||
2837 (igi->igi_flags & IGIF_SILENT) ||
2838 !igmp_isgroupreported(inm->inm_addr)) {
2839 IGMP_PRINTF(("%s: not kicking state machine for silent group\n",
2840 __func__));
2841 inm->inm_state = IGMP_SILENT_MEMBER;
2842 inm->inm_timer = 0;
2843 } else {
2844 /*
2845 * Deal with overlapping in_multi lifecycle.
2846 * If this group was LEAVING, then make sure
2847 * we drop the reference we picked up to keep the
2848 * group around for the final INCLUDE {} enqueue.
2849 * Since we cannot call in_multi_detach() here,
2850 * defer this task to the timer routine.
2851 */
2852 if (igi->igi_version == IGMP_VERSION_3 &&
2853 inm->inm_state == IGMP_LEAVING_MEMBER) {
2854 VERIFY(inm->inm_nrelecnt != 0);
2855 SLIST_INSERT_HEAD(&igi->igi_relinmhead, inm, inm_nrele);
2856 }
2857
2858 inm->inm_state = IGMP_REPORTING_MEMBER;
2859
2860 switch (igi->igi_version) {
2861 case IGMP_VERSION_1:
2862 case IGMP_VERSION_2:
2863 inm->inm_state = IGMP_IDLE_MEMBER;
2864 error = igmp_v1v2_queue_report(inm,
2865 (igi->igi_version == IGMP_VERSION_2) ?
2866 IGMP_v2_HOST_MEMBERSHIP_REPORT :
2867 IGMP_v1_HOST_MEMBERSHIP_REPORT);
2868
2869 INM_LOCK_ASSERT_HELD(inm);
2870 IGI_LOCK_ASSERT_HELD(igi);
2871
2872 if (error == 0) {
2873 inm->inm_timer =
2874 IGMP_RANDOM_DELAY(IGMP_V1V2_MAX_RI);
2875 itp->cst = 1;
2876 }
2877 break;
2878
2879 case IGMP_VERSION_3:
2880 /*
2881 * Defer update of T0 to T1, until the first copy
2882 * of the state change has been transmitted.
2883 */
2884 syncstates = 0;
2885
2886 /*
2887 * Immediately enqueue a State-Change Report for
2888 * this interface, freeing any previous reports.
2889 * Don't kick the timers if there is nothing to do,
2890 * or if an error occurred.
2891 */
2892 ifq = &inm->inm_scq;
2893 IF_DRAIN(ifq);
2894 retval = igmp_v3_enqueue_group_record(ifq, inm, 1,
2895 0, 0);
2896 itp->cst = (ifq->ifq_len > 0);
2897 IGMP_PRINTF(("%s: enqueue record = %d\n",
2898 __func__, retval));
2899 if (retval <= 0) {
2900 error = retval * -1;
2901 break;
2902 }
2903
2904 /*
2905 * Schedule transmission of pending state-change
2906 * report up to RV times for this link. The timer
2907 * will fire at the next igmp_timeout (1 second),
2908 * giving us an opportunity to merge the reports.
2909 */
2910 if (igi->igi_flags & IGIF_LOOPBACK) {
2911 inm->inm_scrv = 1;
2912 } else {
2913 VERIFY(igi->igi_rv > 1);
2914 inm->inm_scrv = (uint16_t)igi->igi_rv;
2915 }
2916 inm->inm_sctimer = 1;
2917 itp->sct = 1;
2918
2919 error = 0;
2920 break;
2921 }
2922 }
2923 IGI_UNLOCK(igi);
2924
2925 /*
2926 * Only update the T0 state if state change is atomic,
2927 * i.e. we don't need to wait for a timer to fire before we
2928 * can consider the state change to have been communicated.
2929 */
2930 if (syncstates) {
2931 inm_commit(inm);
2932 IGMP_INET_PRINTF(inm->inm_addr,
2933 ("%s: T1 -> T0 for %s/%s\n", __func__,
2934 _igmp_inet_buf, if_name(inm->inm_ifp)));
2935 }
2936
2937 return error;
2938 }
2939
2940 /*
2941 * Issue an intermediate state change during the IGMP life-cycle.
2942 */
2943 static int
igmp_handle_state_change(struct in_multi * inm,struct igmp_ifinfo * igi,struct igmp_tparams * itp)2944 igmp_handle_state_change(struct in_multi *inm, struct igmp_ifinfo *igi,
2945 struct igmp_tparams *itp)
2946 {
2947 struct ifnet *ifp;
2948 int retval = 0;
2949
2950 INM_LOCK_ASSERT_HELD(inm);
2951 IGI_LOCK_ASSERT_NOTHELD(igi);
2952 VERIFY(itp != NULL);
2953
2954 IGMP_INET_PRINTF(inm->inm_addr,
2955 ("%s: state change for %s on ifp 0x%llx(%s)\n", __func__,
2956 _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(inm->inm_ifp),
2957 if_name(inm->inm_ifp)));
2958
2959 ifp = inm->inm_ifp;
2960
2961 IGI_LOCK(igi);
2962 VERIFY(igi->igi_ifp == ifp);
2963
2964 if ((ifp->if_flags & IFF_LOOPBACK) ||
2965 (igi->igi_flags & IGIF_SILENT) ||
2966 !igmp_isgroupreported(inm->inm_addr) ||
2967 (igi->igi_version != IGMP_VERSION_3)) {
2968 IGI_UNLOCK(igi);
2969 if (!igmp_isgroupreported(inm->inm_addr)) {
2970 IGMP_PRINTF(("%s: not kicking state "
2971 "machine for silent group\n", __func__));
2972 }
2973 IGMP_PRINTF(("%s: nothing to do\n", __func__));
2974 inm_commit(inm);
2975 IGMP_INET_PRINTF(inm->inm_addr,
2976 ("%s: T1 -> T0 for %s/%s\n", __func__,
2977 _igmp_inet_buf, inm->inm_ifp->if_name));
2978 goto done;
2979 }
2980
2981 IF_DRAIN(&inm->inm_scq);
2982
2983 retval = igmp_v3_enqueue_group_record(&inm->inm_scq, inm, 1, 0, 0);
2984 itp->cst = (inm->inm_scq.ifq_len > 0);
2985 IGMP_PRINTF(("%s: enqueue record = %d\n", __func__, retval));
2986 if (retval <= 0) {
2987 IGI_UNLOCK(igi);
2988 retval *= -1;
2989 goto done;
2990 }
2991 /*
2992 * If record(s) were enqueued, start the state-change
2993 * report timer for this group.
2994 */
2995 inm->inm_scrv = ((igi->igi_flags & IGIF_LOOPBACK) ? 1 : (uint16_t)igi->igi_rv);
2996 inm->inm_sctimer = 1;
2997 itp->sct = 1;
2998 IGI_UNLOCK(igi);
2999 done:
3000 return retval;
3001 }
3002
3003 /*
3004 * Perform the final leave for an IGMP group.
3005 *
3006 * When leaving a group:
3007 * IGMPv1 does nothing.
3008 * IGMPv2 sends a host leave message, if and only if we are the reporter.
3009 * IGMPv3 enqueues a state-change report containing a transition
3010 * to INCLUDE {} for immediate transmission.
3011 */
3012 static void
igmp_final_leave(struct in_multi * inm,struct igmp_ifinfo * igi,struct igmp_tparams * itp)3013 igmp_final_leave(struct in_multi *inm, struct igmp_ifinfo *igi,
3014 struct igmp_tparams *itp)
3015 {
3016 int syncstates = 1;
3017 bool retried_already = false;
3018
3019 INM_LOCK_ASSERT_HELD(inm);
3020 IGI_LOCK_ASSERT_NOTHELD(igi);
3021 VERIFY(itp != NULL);
3022
3023 IGMP_INET_PRINTF(inm->inm_addr,
3024 ("%s: final leave %s on ifp 0x%llx(%s)\n", __func__,
3025 _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(inm->inm_ifp),
3026 if_name(inm->inm_ifp)));
3027
3028 retry:
3029 switch (inm->inm_state) {
3030 case IGMP_NOT_MEMBER:
3031 case IGMP_SILENT_MEMBER:
3032 case IGMP_LEAVING_MEMBER:
3033 /* Already leaving or left; do nothing. */
3034 IGMP_PRINTF(("%s: not kicking state machine for silent group\n",
3035 __func__));
3036 break;
3037 case IGMP_REPORTING_MEMBER:
3038 case IGMP_IDLE_MEMBER:
3039 case IGMP_G_QUERY_PENDING_MEMBER:
3040 case IGMP_SG_QUERY_PENDING_MEMBER:
3041 IGI_LOCK(igi);
3042 if (igi->igi_version == IGMP_VERSION_2) {
3043 if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER ||
3044 inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) {
3045 /*
3046 * We may be in the process of downgrading to
3047 * IGMPv2 but because we just grabbed the
3048 * igi_lock we may have lost the race.
3049 */
3050 if (!retried_already) {
3051 IGI_UNLOCK(igi);
3052 retried_already = true;
3053 goto retry;
3054 } else {
3055 /*
3056 * Proceed with leaving the group
3057 * as if it were IGMPv2 even though we
3058 * may have an inconsistent multicast state.
3059 */
3060 }
3061 }
3062 /* scheduler timer if enqueue is successful */
3063 itp->cst = (igmp_v1v2_queue_report(inm,
3064 IGMP_HOST_LEAVE_MESSAGE) == 0);
3065
3066 INM_LOCK_ASSERT_HELD(inm);
3067 IGI_LOCK_ASSERT_HELD(igi);
3068
3069 inm->inm_state = IGMP_NOT_MEMBER;
3070 } else if (igi->igi_version == IGMP_VERSION_3) {
3071 /*
3072 * Stop group timer and all pending reports.
3073 * Immediately enqueue a state-change report
3074 * TO_IN {} to be sent on the next timeout,
3075 * giving us an opportunity to merge reports.
3076 */
3077 IF_DRAIN(&inm->inm_scq);
3078 inm->inm_timer = 0;
3079 if (igi->igi_flags & IGIF_LOOPBACK) {
3080 inm->inm_scrv = 1;
3081 } else {
3082 inm->inm_scrv = (uint16_t)igi->igi_rv;
3083 }
3084 IGMP_INET_PRINTF(inm->inm_addr,
3085 ("%s: Leaving %s/%s with %d "
3086 "pending retransmissions.\n", __func__,
3087 _igmp_inet_buf, if_name(inm->inm_ifp),
3088 inm->inm_scrv));
3089 if (inm->inm_scrv == 0) {
3090 inm->inm_state = IGMP_NOT_MEMBER;
3091 inm->inm_sctimer = 0;
3092 } else {
3093 int retval;
3094 /*
3095 * Stick around in the in_multihead list;
3096 * the final detach will be issued by
3097 * igmp_v3_process_group_timers() when
3098 * the retransmit timer expires.
3099 */
3100 INM_ADDREF_LOCKED(inm);
3101 VERIFY(inm->inm_debug & IFD_ATTACHED);
3102 inm->inm_reqcnt++;
3103 VERIFY(inm->inm_reqcnt >= 1);
3104 inm->inm_nrelecnt++;
3105 VERIFY(inm->inm_nrelecnt != 0);
3106
3107 retval = igmp_v3_enqueue_group_record(
3108 &inm->inm_scq, inm, 1, 0, 0);
3109 itp->cst = (inm->inm_scq.ifq_len > 0);
3110 KASSERT(retval != 0,
3111 ("%s: enqueue record = %d\n", __func__,
3112 retval));
3113
3114 inm->inm_state = IGMP_LEAVING_MEMBER;
3115 inm->inm_sctimer = 1;
3116 itp->sct = 1;
3117 syncstates = 0;
3118 }
3119 }
3120 IGI_UNLOCK(igi);
3121 break;
3122 case IGMP_LAZY_MEMBER:
3123 case IGMP_SLEEPING_MEMBER:
3124 case IGMP_AWAKENING_MEMBER:
3125 /* Our reports are suppressed; do nothing. */
3126 break;
3127 }
3128
3129 if (syncstates) {
3130 inm_commit(inm);
3131 IGMP_INET_PRINTF(inm->inm_addr,
3132 ("%s: T1 -> T0 for %s/%s\n", __func__,
3133 _igmp_inet_buf, if_name(inm->inm_ifp)));
3134 inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
3135 IGMP_INET_PRINTF(inm->inm_addr,
3136 ("%s: T1 now MCAST_UNDEFINED for %s/%s\n",
3137 __func__, _igmp_inet_buf, if_name(inm->inm_ifp)));
3138 }
3139 }
3140
3141 /*
3142 * Enqueue an IGMPv3 group record to the given output queue.
3143 *
3144 * XXX This function could do with having the allocation code
3145 * split out, and the multiple-tree-walks coalesced into a single
3146 * routine as has been done in igmp_v3_enqueue_filter_change().
3147 *
3148 * If is_state_change is zero, a current-state record is appended.
3149 * If is_state_change is non-zero, a state-change report is appended.
3150 *
3151 * If is_group_query is non-zero, an mbuf packet chain is allocated.
3152 * If is_group_query is zero, and if there is a packet with free space
3153 * at the tail of the queue, it will be appended to providing there
3154 * is enough free space.
3155 * Otherwise a new mbuf packet chain is allocated.
3156 *
3157 * If is_source_query is non-zero, each source is checked to see if
3158 * it was recorded for a Group-Source query, and will be omitted if
3159 * it is not both in-mode and recorded.
3160 *
3161 * The function will attempt to allocate leading space in the packet
3162 * for the IP/IGMP header to be prepended without fragmenting the chain.
3163 *
3164 * If successful the size of all data appended to the queue is returned,
3165 * otherwise an error code less than zero is returned, or zero if
3166 * no record(s) were appended.
3167 */
3168 static int
igmp_v3_enqueue_group_record(struct ifqueue * ifq,struct in_multi * inm,const int is_state_change,const int is_group_query,const int is_source_query)3169 igmp_v3_enqueue_group_record(struct ifqueue *ifq, struct in_multi *inm,
3170 const int is_state_change, const int is_group_query,
3171 const int is_source_query)
3172 {
3173 struct igmp_grouprec ig;
3174 struct igmp_grouprec *pig;
3175 struct ifnet *ifp;
3176 struct ip_msource *ims, *nims;
3177 struct mbuf *m0, *m, *md;
3178 int error, is_filter_list_change;
3179 int minrec0len, m0srcs, nbytes, off;
3180 uint16_t msrcs;
3181 int record_has_sources;
3182 int now;
3183 int type;
3184 in_addr_t naddr;
3185 uint16_t mode;
3186 u_int16_t ig_numsrc;
3187
3188 INM_LOCK_ASSERT_HELD(inm);
3189 IGI_LOCK_ASSERT_HELD(inm->inm_igi);
3190
3191 error = 0;
3192 ifp = inm->inm_ifp;
3193 is_filter_list_change = 0;
3194 m = NULL;
3195 m0 = NULL;
3196 m0srcs = 0;
3197 msrcs = 0;
3198 nbytes = 0;
3199 nims = NULL;
3200 record_has_sources = 1;
3201 pig = NULL;
3202 type = IGMP_DO_NOTHING;
3203 mode = inm->inm_st[1].iss_fmode;
3204
3205 /*
3206 * If we did not transition out of ASM mode during t0->t1,
3207 * and there are no source nodes to process, we can skip
3208 * the generation of source records.
3209 */
3210 if (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0 &&
3211 inm->inm_nsrc == 0) {
3212 record_has_sources = 0;
3213 }
3214
3215 if (is_state_change) {
3216 /*
3217 * Queue a state change record.
3218 * If the mode did not change, and there are non-ASM
3219 * listeners or source filters present,
3220 * we potentially need to issue two records for the group.
3221 * If we are transitioning to MCAST_UNDEFINED, we need
3222 * not send any sources.
3223 * If there are ASM listeners, and there was no filter
3224 * mode transition of any kind, do nothing.
3225 */
3226 if (mode != inm->inm_st[0].iss_fmode) {
3227 if (mode == MCAST_EXCLUDE) {
3228 IGMP_PRINTF(("%s: change to EXCLUDE\n",
3229 __func__));
3230 type = IGMP_CHANGE_TO_EXCLUDE_MODE;
3231 } else {
3232 IGMP_PRINTF(("%s: change to INCLUDE\n",
3233 __func__));
3234 type = IGMP_CHANGE_TO_INCLUDE_MODE;
3235 if (mode == MCAST_UNDEFINED) {
3236 record_has_sources = 0;
3237 }
3238 }
3239 } else {
3240 if (record_has_sources) {
3241 is_filter_list_change = 1;
3242 } else {
3243 type = IGMP_DO_NOTHING;
3244 }
3245 }
3246 } else {
3247 /*
3248 * Queue a current state record.
3249 */
3250 if (mode == MCAST_EXCLUDE) {
3251 type = IGMP_MODE_IS_EXCLUDE;
3252 } else if (mode == MCAST_INCLUDE) {
3253 type = IGMP_MODE_IS_INCLUDE;
3254 VERIFY(inm->inm_st[1].iss_asm == 0);
3255 }
3256 }
3257
3258 /*
3259 * Generate the filter list changes using a separate function.
3260 */
3261 if (is_filter_list_change) {
3262 return igmp_v3_enqueue_filter_change(ifq, inm);
3263 }
3264
3265 if (type == IGMP_DO_NOTHING) {
3266 IGMP_INET_PRINTF(inm->inm_addr,
3267 ("%s: nothing to do for %s/%s\n",
3268 __func__, _igmp_inet_buf,
3269 if_name(inm->inm_ifp)));
3270 return 0;
3271 }
3272
3273 /*
3274 * If any sources are present, we must be able to fit at least
3275 * one in the trailing space of the tail packet's mbuf,
3276 * ideally more.
3277 */
3278 minrec0len = sizeof(struct igmp_grouprec);
3279 if (record_has_sources) {
3280 minrec0len += sizeof(in_addr_t);
3281 }
3282
3283 IGMP_INET_PRINTF(inm->inm_addr,
3284 ("%s: queueing %s for %s/%s\n", __func__,
3285 igmp_rec_type_to_str(type), _igmp_inet_buf,
3286 if_name(inm->inm_ifp)));
3287
3288 /*
3289 * Check if we have a packet in the tail of the queue for this
3290 * group into which the first group record for this group will fit.
3291 * Otherwise allocate a new packet.
3292 * Always allocate leading space for IP+RA_OPT+IGMP+REPORT.
3293 * Note: Group records for G/GSR query responses MUST be sent
3294 * in their own packet.
3295 */
3296 m0 = ifq->ifq_tail;
3297 if (!is_group_query &&
3298 m0 != NULL &&
3299 (m0->m_pkthdr.vt_nrecs + 1 <= IGMP_V3_REPORT_MAXRECS) &&
3300 (m0->m_pkthdr.len + minrec0len) <
3301 (ifp->if_mtu - IGMP_LEADINGSPACE)) {
3302 m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
3303 sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
3304 m = m0;
3305 IGMP_PRINTF(("%s: use existing packet\n", __func__));
3306 } else {
3307 if (IF_QFULL(ifq)) {
3308 IGMP_PRINTF(("%s: outbound queue full\n", __func__));
3309 return -ENOMEM;
3310 }
3311 m = NULL;
3312 m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
3313 sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
3314 if (!is_state_change && !is_group_query) {
3315 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3316 if (m) {
3317 m->m_data += IGMP_LEADINGSPACE;
3318 }
3319 }
3320 if (m == NULL) {
3321 m = m_gethdr(M_DONTWAIT, MT_DATA);
3322 if (m) {
3323 MH_ALIGN(m, IGMP_LEADINGSPACE);
3324 }
3325 }
3326 if (m == NULL) {
3327 return -ENOMEM;
3328 }
3329
3330 igmp_save_context(m, ifp);
3331
3332 IGMP_PRINTF(("%s: allocated first packet\n", __func__));
3333 }
3334
3335 /*
3336 * Append group record.
3337 * If we have sources, we don't know how many yet.
3338 */
3339 ig.ig_type = (u_char)type;
3340 ig.ig_datalen = 0;
3341 ig.ig_numsrc = 0;
3342 ig.ig_group = inm->inm_addr;
3343 if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) {
3344 if (m != m0) {
3345 m_freem(m);
3346 }
3347 IGMP_PRINTF(("%s: m_append() failed.\n", __func__));
3348 return -ENOMEM;
3349 }
3350 nbytes += sizeof(struct igmp_grouprec);
3351
3352 /*
3353 * Append as many sources as will fit in the first packet.
3354 * If we are appending to a new packet, the chain allocation
3355 * may potentially use clusters; use m_getptr() in this case.
3356 * If we are appending to an existing packet, we need to obtain
3357 * a pointer to the group record after m_append(), in case a new
3358 * mbuf was allocated.
3359 * Only append sources which are in-mode at t1. If we are
3360 * transitioning to MCAST_UNDEFINED state on the group, do not
3361 * include source entries.
3362 * Only report recorded sources in our filter set when responding
3363 * to a group-source query.
3364 */
3365 if (record_has_sources) {
3366 if (m == m0) {
3367 md = m_last(m);
3368 pig = (struct igmp_grouprec *)(void *)
3369 (mtod(md, uint8_t *) + md->m_len - nbytes);
3370 } else {
3371 md = m_getptr(m, 0, &off);
3372 pig = (struct igmp_grouprec *)(void *)
3373 (mtod(md, uint8_t *) + off);
3374 }
3375 msrcs = 0;
3376 RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, nims) {
3377 #ifdef IGMP_DEBUG
3378 char buf[MAX_IPv4_STR_LEN];
3379
3380 inet_ntop_haddr(ims->ims_haddr, buf, sizeof(buf));
3381 IGMP_PRINTF(("%s: visit node %s\n", __func__, buf));
3382 #endif
3383 now = ims_get_mode(inm, ims, 1);
3384 IGMP_PRINTF(("%s: node is %d\n", __func__, now));
3385 if ((now != mode) ||
3386 (now == mode && mode == MCAST_UNDEFINED)) {
3387 IGMP_PRINTF(("%s: skip node\n", __func__));
3388 continue;
3389 }
3390 if (is_source_query && ims->ims_stp == 0) {
3391 IGMP_PRINTF(("%s: skip unrecorded node\n",
3392 __func__));
3393 continue;
3394 }
3395 IGMP_PRINTF(("%s: append node\n", __func__));
3396 naddr = htonl(ims->ims_haddr);
3397 if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) {
3398 if (m != m0) {
3399 m_freem(m);
3400 }
3401 IGMP_PRINTF(("%s: m_append() failed.\n",
3402 __func__));
3403 return -ENOMEM;
3404 }
3405 nbytes += sizeof(in_addr_t);
3406 ++msrcs;
3407 if (msrcs == m0srcs) {
3408 break;
3409 }
3410 }
3411 IGMP_PRINTF(("%s: msrcs is %d this packet\n", __func__,
3412 msrcs));
3413 ig_numsrc = htons(msrcs);
3414 bcopy(&ig_numsrc, &pig->ig_numsrc, sizeof(ig_numsrc));
3415 nbytes += (msrcs * sizeof(in_addr_t));
3416 }
3417
3418 if (is_source_query && msrcs == 0) {
3419 IGMP_PRINTF(("%s: no recorded sources to report\n", __func__));
3420 if (m != m0) {
3421 m_freem(m);
3422 }
3423 return 0;
3424 }
3425
3426 /*
3427 * We are good to go with first packet.
3428 */
3429 if (m != m0) {
3430 IGMP_PRINTF(("%s: enqueueing first packet\n", __func__));
3431 m->m_pkthdr.vt_nrecs = 1;
3432 IF_ENQUEUE(ifq, m);
3433 } else {
3434 m->m_pkthdr.vt_nrecs++;
3435 }
3436 /*
3437 * No further work needed if no source list in packet(s).
3438 */
3439 if (!record_has_sources) {
3440 return nbytes;
3441 }
3442
3443 /*
3444 * Whilst sources remain to be announced, we need to allocate
3445 * a new packet and fill out as many sources as will fit.
3446 * Always try for a cluster first.
3447 */
3448 while (nims != NULL) {
3449 if (IF_QFULL(ifq)) {
3450 IGMP_PRINTF(("%s: outbound queue full\n", __func__));
3451 return -ENOMEM;
3452 }
3453 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3454 if (m) {
3455 m->m_data += IGMP_LEADINGSPACE;
3456 }
3457 if (m == NULL) {
3458 m = m_gethdr(M_DONTWAIT, MT_DATA);
3459 if (m) {
3460 MH_ALIGN(m, IGMP_LEADINGSPACE);
3461 }
3462 }
3463 if (m == NULL) {
3464 return -ENOMEM;
3465 }
3466 igmp_save_context(m, ifp);
3467 md = m_getptr(m, 0, &off);
3468 pig = (struct igmp_grouprec *)(void *)
3469 (mtod(md, uint8_t *) + off);
3470 IGMP_PRINTF(("%s: allocated next packet\n", __func__));
3471
3472 if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) {
3473 if (m != m0) {
3474 m_freem(m);
3475 }
3476 IGMP_PRINTF(("%s: m_append() failed.\n", __func__));
3477 return -ENOMEM;
3478 }
3479 m->m_pkthdr.vt_nrecs = 1;
3480 nbytes += sizeof(struct igmp_grouprec);
3481
3482 m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
3483 sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
3484
3485 msrcs = 0;
3486 RB_FOREACH_FROM(ims, ip_msource_tree, nims) {
3487 #ifdef IGMP_DEBUG
3488 char buf[MAX_IPv4_STR_LEN];
3489
3490 inet_ntop_haddr(ims->ims_haddr, buf, sizeof(buf));
3491 IGMP_PRINTF(("%s: visit node %s\n", __func__, buf));
3492 #endif
3493 now = ims_get_mode(inm, ims, 1);
3494 if ((now != mode) ||
3495 (now == mode && mode == MCAST_UNDEFINED)) {
3496 IGMP_PRINTF(("%s: skip node\n", __func__));
3497 continue;
3498 }
3499 if (is_source_query && ims->ims_stp == 0) {
3500 IGMP_PRINTF(("%s: skip unrecorded node\n",
3501 __func__));
3502 continue;
3503 }
3504 IGMP_PRINTF(("%s: append node\n", __func__));
3505 naddr = htonl(ims->ims_haddr);
3506 if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) {
3507 if (m != m0) {
3508 m_freem(m);
3509 }
3510 IGMP_PRINTF(("%s: m_append() failed.\n",
3511 __func__));
3512 return -ENOMEM;
3513 }
3514 ++msrcs;
3515 if (msrcs == m0srcs) {
3516 break;
3517 }
3518 }
3519 ig_numsrc = htons(msrcs);
3520 bcopy(&ig_numsrc, &pig->ig_numsrc, sizeof(ig_numsrc));
3521 nbytes += (msrcs * sizeof(in_addr_t));
3522
3523 IGMP_PRINTF(("%s: enqueueing next packet\n", __func__));
3524 IF_ENQUEUE(ifq, m);
3525 }
3526
3527 return nbytes;
3528 }
3529
3530 /*
3531 * Type used to mark record pass completion.
3532 * We exploit the fact we can cast to this easily from the
3533 * current filter modes on each ip_msource node.
3534 */
3535 typedef enum {
3536 REC_NONE = 0x00, /* MCAST_UNDEFINED */
3537 REC_ALLOW = 0x01, /* MCAST_INCLUDE */
3538 REC_BLOCK = 0x02, /* MCAST_EXCLUDE */
3539 REC_FULL = REC_ALLOW | REC_BLOCK
3540 } rectype_t;
3541
3542 /*
3543 * Enqueue an IGMPv3 filter list change to the given output queue.
3544 *
3545 * Source list filter state is held in an RB-tree. When the filter list
3546 * for a group is changed without changing its mode, we need to compute
3547 * the deltas between T0 and T1 for each source in the filter set,
3548 * and enqueue the appropriate ALLOW_NEW/BLOCK_OLD records.
3549 *
3550 * As we may potentially queue two record types, and the entire R-B tree
3551 * needs to be walked at once, we break this out into its own function
3552 * so we can generate a tightly packed queue of packets.
3553 *
3554 * XXX This could be written to only use one tree walk, although that makes
3555 * serializing into the mbuf chains a bit harder. For now we do two walks
3556 * which makes things easier on us, and it may or may not be harder on
3557 * the L2 cache.
3558 *
3559 * If successful the size of all data appended to the queue is returned,
3560 * otherwise an error code less than zero is returned, or zero if
3561 * no record(s) were appended.
3562 */
3563 static int
igmp_v3_enqueue_filter_change(struct ifqueue * ifq,struct in_multi * inm)3564 igmp_v3_enqueue_filter_change(struct ifqueue *ifq, struct in_multi *inm)
3565 {
3566 static const int MINRECLEN =
3567 sizeof(struct igmp_grouprec) + sizeof(in_addr_t);
3568 struct ifnet *ifp;
3569 struct igmp_grouprec ig;
3570 struct igmp_grouprec *pig;
3571 struct ip_msource *ims, *nims;
3572 struct mbuf *m, *m0, *md;
3573 in_addr_t naddr;
3574 int m0srcs, nbytes, npbytes, off, schanged;
3575 uint16_t rsrcs;
3576 int nallow, nblock;
3577 uint16_t mode;
3578 uint8_t now, then;
3579 rectype_t crt, drt, nrt;
3580 u_int16_t ig_numsrc;
3581
3582 INM_LOCK_ASSERT_HELD(inm);
3583
3584 if (inm->inm_nsrc == 0 ||
3585 (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0)) {
3586 return 0;
3587 }
3588
3589 ifp = inm->inm_ifp; /* interface */
3590 mode = inm->inm_st[1].iss_fmode; /* filter mode at t1 */
3591 crt = REC_NONE; /* current group record type */
3592 drt = REC_NONE; /* mask of completed group record types */
3593 nrt = REC_NONE; /* record type for current node */
3594 m0srcs = 0; /* # source which will fit in current mbuf chain */
3595 nbytes = 0; /* # of bytes appended to group's state-change queue */
3596 npbytes = 0; /* # of bytes appended this packet */
3597 rsrcs = 0; /* # sources encoded in current record */
3598 schanged = 0; /* # nodes encoded in overall filter change */
3599 nallow = 0; /* # of source entries in ALLOW_NEW */
3600 nblock = 0; /* # of source entries in BLOCK_OLD */
3601 nims = NULL; /* next tree node pointer */
3602
3603 /*
3604 * For each possible filter record mode.
3605 * The first kind of source we encounter tells us which
3606 * is the first kind of record we start appending.
3607 * If a node transitioned to UNDEFINED at t1, its mode is treated
3608 * as the inverse of the group's filter mode.
3609 */
3610 while (drt != REC_FULL) {
3611 do {
3612 m0 = ifq->ifq_tail;
3613 if (m0 != NULL &&
3614 (m0->m_pkthdr.vt_nrecs + 1 <=
3615 IGMP_V3_REPORT_MAXRECS) &&
3616 (m0->m_pkthdr.len + MINRECLEN) <
3617 (ifp->if_mtu - IGMP_LEADINGSPACE)) {
3618 m = m0;
3619 m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
3620 sizeof(struct igmp_grouprec)) /
3621 sizeof(in_addr_t);
3622 IGMP_PRINTF(("%s: use previous packet\n",
3623 __func__));
3624 } else {
3625 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3626 if (m) {
3627 m->m_data += IGMP_LEADINGSPACE;
3628 }
3629 if (m == NULL) {
3630 m = m_gethdr(M_DONTWAIT, MT_DATA);
3631 if (m) {
3632 MH_ALIGN(m, IGMP_LEADINGSPACE);
3633 }
3634 }
3635 if (m == NULL) {
3636 IGMP_PRINTF(("%s: m_get*() failed\n",
3637 __func__));
3638 return -ENOMEM;
3639 }
3640 m->m_pkthdr.vt_nrecs = 0;
3641 igmp_save_context(m, ifp);
3642 m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
3643 sizeof(struct igmp_grouprec)) /
3644 sizeof(in_addr_t);
3645 npbytes = 0;
3646 IGMP_PRINTF(("%s: allocated new packet\n",
3647 __func__));
3648 }
3649 /*
3650 * Append the IGMP group record header to the
3651 * current packet's data area.
3652 * Recalculate pointer to free space for next
3653 * group record, in case m_append() allocated
3654 * a new mbuf or cluster.
3655 */
3656 memset(&ig, 0, sizeof(ig));
3657 ig.ig_group = inm->inm_addr;
3658 if (!m_append(m, sizeof(ig), (void *)&ig)) {
3659 if (m != m0) {
3660 m_freem(m);
3661 }
3662 IGMP_PRINTF(("%s: m_append() failed\n",
3663 __func__));
3664 return -ENOMEM;
3665 }
3666 npbytes += sizeof(struct igmp_grouprec);
3667 if (m != m0) {
3668 /* new packet; offset in c hain */
3669 md = m_getptr(m, npbytes -
3670 sizeof(struct igmp_grouprec), &off);
3671 pig = (struct igmp_grouprec *)(void *)(mtod(md,
3672 uint8_t *) + off);
3673 } else {
3674 /* current packet; offset from last append */
3675 md = m_last(m);
3676 pig = (struct igmp_grouprec *)(void *)(mtod(md,
3677 uint8_t *) + md->m_len -
3678 sizeof(struct igmp_grouprec));
3679 }
3680 /*
3681 * Begin walking the tree for this record type
3682 * pass, or continue from where we left off
3683 * previously if we had to allocate a new packet.
3684 * Only report deltas in-mode at t1.
3685 * We need not report included sources as allowed
3686 * if we are in inclusive mode on the group,
3687 * however the converse is not true.
3688 */
3689 rsrcs = 0;
3690 if (nims == NULL) {
3691 nims = RB_MIN(ip_msource_tree, &inm->inm_srcs);
3692 }
3693 RB_FOREACH_FROM(ims, ip_msource_tree, nims) {
3694 #ifdef IGMP_DEBUG
3695 char buf[MAX_IPv4_STR_LEN];
3696
3697 inet_ntop_haddr(ims->ims_haddr, buf, sizeof(buf));
3698 IGMP_PRINTF(("%s: visit node %s\n", __func__, buf));
3699 #endif
3700 now = ims_get_mode(inm, ims, 1);
3701 then = ims_get_mode(inm, ims, 0);
3702 IGMP_PRINTF(("%s: mode: t0 %d, t1 %d\n",
3703 __func__, then, now));
3704 if (now == then) {
3705 IGMP_PRINTF(("%s: skip unchanged\n",
3706 __func__));
3707 continue;
3708 }
3709 if (mode == MCAST_EXCLUDE &&
3710 now == MCAST_INCLUDE) {
3711 IGMP_PRINTF(("%s: skip IN src on EX "
3712 "group\n", __func__));
3713 continue;
3714 }
3715 nrt = (rectype_t)now;
3716 if (nrt == REC_NONE) {
3717 nrt = (rectype_t)(~mode & REC_FULL);
3718 }
3719 if (schanged++ == 0) {
3720 crt = nrt;
3721 } else if (crt != nrt) {
3722 continue;
3723 }
3724 naddr = htonl(ims->ims_haddr);
3725 if (!m_append(m, sizeof(in_addr_t),
3726 (void *)&naddr)) {
3727 if (m != m0) {
3728 m_freem(m);
3729 }
3730 IGMP_PRINTF(("%s: m_append() failed\n",
3731 __func__));
3732 return -ENOMEM;
3733 }
3734 nallow += !!(crt == REC_ALLOW);
3735 nblock += !!(crt == REC_BLOCK);
3736 if (++rsrcs == m0srcs) {
3737 break;
3738 }
3739 }
3740 /*
3741 * If we did not append any tree nodes on this
3742 * pass, back out of allocations.
3743 */
3744 if (rsrcs == 0) {
3745 npbytes -= sizeof(struct igmp_grouprec);
3746 if (m != m0) {
3747 IGMP_PRINTF(("%s: m_free(m)\n",
3748 __func__));
3749 m_freem(m);
3750 } else {
3751 IGMP_PRINTF(("%s: m_adj(m, -ig)\n",
3752 __func__));
3753 m_adj(m, -((int)sizeof(
3754 struct igmp_grouprec)));
3755 }
3756 continue;
3757 }
3758 npbytes += (rsrcs * sizeof(in_addr_t));
3759 if (crt == REC_ALLOW) {
3760 pig->ig_type = IGMP_ALLOW_NEW_SOURCES;
3761 } else if (crt == REC_BLOCK) {
3762 pig->ig_type = IGMP_BLOCK_OLD_SOURCES;
3763 }
3764 ig_numsrc = htons(rsrcs);
3765 bcopy(&ig_numsrc, &pig->ig_numsrc, sizeof(ig_numsrc));
3766 /*
3767 * Count the new group record, and enqueue this
3768 * packet if it wasn't already queued.
3769 */
3770 m->m_pkthdr.vt_nrecs++;
3771 if (m != m0) {
3772 IF_ENQUEUE(ifq, m);
3773 }
3774 nbytes += npbytes;
3775 } while (nims != NULL);
3776 drt |= crt;
3777 crt = (~crt & REC_FULL);
3778 }
3779
3780 IGMP_PRINTF(("%s: queued %d ALLOW_NEW, %d BLOCK_OLD\n", __func__,
3781 nallow, nblock));
3782
3783 return nbytes;
3784 }
3785
3786 static int
igmp_v3_merge_state_changes(struct in_multi * inm,struct ifqueue * ifscq)3787 igmp_v3_merge_state_changes(struct in_multi *inm, struct ifqueue *ifscq)
3788 {
3789 struct ifqueue *gq;
3790 struct mbuf *m; /* pending state-change */
3791 struct mbuf *m0; /* copy of pending state-change */
3792 struct mbuf *mt; /* last state-change in packet */
3793 struct mbuf *n;
3794 int docopy, domerge;
3795 u_int recslen;
3796
3797 INM_LOCK_ASSERT_HELD(inm);
3798
3799 docopy = 0;
3800 domerge = 0;
3801 recslen = 0;
3802
3803 /*
3804 * If there are further pending retransmissions, make a writable
3805 * copy of each queued state-change message before merging.
3806 */
3807 if (inm->inm_scrv > 0) {
3808 docopy = 1;
3809 }
3810
3811 gq = &inm->inm_scq;
3812 #ifdef IGMP_DEBUG
3813 if (gq->ifq_head == NULL) {
3814 IGMP_PRINTF(("%s: WARNING: queue for inm 0x%llx is empty\n",
3815 __func__, (uint64_t)VM_KERNEL_ADDRPERM(inm)));
3816 }
3817 #endif
3818
3819 /*
3820 * Use IF_REMQUEUE() instead of IF_DEQUEUE() below, since the
3821 * packet might not always be at the head of the ifqueue.
3822 */
3823 m = gq->ifq_head;
3824 while (m != NULL) {
3825 /*
3826 * Only merge the report into the current packet if
3827 * there is sufficient space to do so; an IGMPv3 report
3828 * packet may only contain 65,535 group records.
3829 * Always use a simple mbuf chain concatentation to do this,
3830 * as large state changes for single groups may have
3831 * allocated clusters.
3832 */
3833 domerge = 0;
3834 mt = ifscq->ifq_tail;
3835 if (mt != NULL) {
3836 recslen = m_length(m);
3837
3838 if ((mt->m_pkthdr.vt_nrecs +
3839 m->m_pkthdr.vt_nrecs <=
3840 IGMP_V3_REPORT_MAXRECS) &&
3841 (mt->m_pkthdr.len + recslen <=
3842 (inm->inm_ifp->if_mtu - IGMP_LEADINGSPACE))) {
3843 domerge = 1;
3844 }
3845 }
3846
3847 if (!domerge && IF_QFULL(gq)) {
3848 IGMP_PRINTF(("%s: outbound queue full, skipping whole "
3849 "packet 0x%llx\n", __func__,
3850 (uint64_t)VM_KERNEL_ADDRPERM(m)));
3851 n = m->m_nextpkt;
3852 if (!docopy) {
3853 IF_REMQUEUE(gq, m);
3854 m_freem(m);
3855 }
3856 m = n;
3857 continue;
3858 }
3859
3860 if (!docopy) {
3861 IGMP_PRINTF(("%s: dequeueing 0x%llx\n", __func__,
3862 (uint64_t)VM_KERNEL_ADDRPERM(m)));
3863 n = m->m_nextpkt;
3864 IF_REMQUEUE(gq, m);
3865 m0 = m;
3866 m = n;
3867 } else {
3868 IGMP_PRINTF(("%s: copying 0x%llx\n", __func__,
3869 (uint64_t)VM_KERNEL_ADDRPERM(m)));
3870 m0 = m_dup(m, M_NOWAIT);
3871 if (m0 == NULL) {
3872 return ENOMEM;
3873 }
3874 m0->m_nextpkt = NULL;
3875 m = m->m_nextpkt;
3876 }
3877
3878 if (!domerge) {
3879 IGMP_PRINTF(("%s: queueing 0x%llx to ifscq 0x%llx)\n",
3880 __func__, (uint64_t)VM_KERNEL_ADDRPERM(m0),
3881 (uint64_t)VM_KERNEL_ADDRPERM(ifscq)));
3882 IF_ENQUEUE(ifscq, m0);
3883 } else {
3884 struct mbuf *mtl; /* last mbuf of packet mt */
3885
3886 IGMP_PRINTF(("%s: merging 0x%llx with ifscq tail "
3887 "0x%llx)\n", __func__,
3888 (uint64_t)VM_KERNEL_ADDRPERM(m0),
3889 (uint64_t)VM_KERNEL_ADDRPERM(mt)));
3890
3891 mtl = m_last(mt);
3892 m0->m_flags &= ~M_PKTHDR;
3893 mt->m_pkthdr.len += recslen;
3894 mt->m_pkthdr.vt_nrecs +=
3895 m0->m_pkthdr.vt_nrecs;
3896
3897 mtl->m_next = m0;
3898 }
3899 }
3900
3901 return 0;
3902 }
3903
3904 /*
3905 * Respond to a pending IGMPv3 General Query.
3906 */
3907 static uint32_t
igmp_v3_dispatch_general_query(struct igmp_ifinfo * igi)3908 igmp_v3_dispatch_general_query(struct igmp_ifinfo *igi)
3909 {
3910 struct ifnet *ifp;
3911 struct in_multi *inm;
3912 struct in_multistep step;
3913 int retval, loop;
3914
3915 IGI_LOCK_ASSERT_HELD(igi);
3916
3917 VERIFY(igi->igi_version == IGMP_VERSION_3);
3918
3919 ifp = igi->igi_ifp;
3920 IGI_UNLOCK(igi);
3921
3922 in_multihead_lock_shared();
3923 IN_FIRST_MULTI(step, inm);
3924 while (inm != NULL) {
3925 INM_LOCK(inm);
3926 if (inm->inm_ifp != ifp) {
3927 goto next;
3928 }
3929
3930 switch (inm->inm_state) {
3931 case IGMP_NOT_MEMBER:
3932 case IGMP_SILENT_MEMBER:
3933 break;
3934 case IGMP_REPORTING_MEMBER:
3935 case IGMP_IDLE_MEMBER:
3936 case IGMP_LAZY_MEMBER:
3937 case IGMP_SLEEPING_MEMBER:
3938 case IGMP_AWAKENING_MEMBER:
3939 inm->inm_state = IGMP_REPORTING_MEMBER;
3940 IGI_LOCK(igi);
3941 retval = igmp_v3_enqueue_group_record(&igi->igi_gq,
3942 inm, 0, 0, 0);
3943 IGI_UNLOCK(igi);
3944 IGMP_PRINTF(("%s: enqueue record = %d\n",
3945 __func__, retval));
3946 break;
3947 case IGMP_G_QUERY_PENDING_MEMBER:
3948 case IGMP_SG_QUERY_PENDING_MEMBER:
3949 case IGMP_LEAVING_MEMBER:
3950 break;
3951 }
3952 next:
3953 INM_UNLOCK(inm);
3954 IN_NEXT_MULTI(step, inm);
3955 }
3956 in_multihead_lock_done();
3957
3958 IGI_LOCK(igi);
3959 loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
3960 igmp_dispatch_queue(igi, &igi->igi_gq, IGMP_MAX_RESPONSE_BURST,
3961 loop);
3962 IGI_LOCK_ASSERT_HELD(igi);
3963 /*
3964 * Slew transmission of bursts over 1 second intervals.
3965 */
3966 if (igi->igi_gq.ifq_head != NULL) {
3967 igi->igi_v3_timer = 1 + IGMP_RANDOM_DELAY(
3968 IGMP_RESPONSE_BURST_INTERVAL);
3969 }
3970
3971 return igi->igi_v3_timer;
3972 }
3973
3974 /*
3975 * Transmit the next pending IGMP message in the output queue.
3976 *
3977 * Must not be called with inm_lock or igi_lock held.
3978 */
3979 static void
igmp_sendpkt(struct mbuf * m)3980 igmp_sendpkt(struct mbuf *m)
3981 {
3982 struct ip_moptions *imo;
3983 struct mbuf *ipopts, *m0;
3984 int error;
3985 struct route ro;
3986 struct ifnet *ifp;
3987
3988 IGMP_PRINTF(("%s: transmit 0x%llx\n", __func__,
3989 (uint64_t)VM_KERNEL_ADDRPERM(m)));
3990
3991 ifp = igmp_restore_context(m);
3992 /*
3993 * Check if the ifnet is still attached.
3994 */
3995 if (ifp == NULL || !ifnet_is_attached(ifp, 0)) {
3996 IGMP_PRINTF(("%s: dropped 0x%llx as ifp went away.\n",
3997 __func__, (uint64_t)VM_KERNEL_ADDRPERM(m)));
3998 m_freem(m);
3999 OSAddAtomic(1, &ipstat.ips_noroute);
4000 return;
4001 }
4002
4003 ipopts = igmp_sendra ? m_raopt : NULL;
4004
4005 imo = ip_allocmoptions(Z_WAITOK);
4006 if (imo == NULL) {
4007 m_freem(m);
4008 return;
4009 }
4010
4011 imo->imo_multicast_ttl = 1;
4012 imo->imo_multicast_vif = -1;
4013 imo->imo_multicast_loop = 0;
4014
4015 /*
4016 * If the user requested that IGMP traffic be explicitly
4017 * redirected to the loopback interface (e.g. they are running a
4018 * MANET interface and the routing protocol needs to see the
4019 * updates), handle this now.
4020 */
4021 if (m->m_flags & M_IGMP_LOOP) {
4022 imo->imo_multicast_ifp = lo_ifp;
4023 } else {
4024 imo->imo_multicast_ifp = ifp;
4025 }
4026
4027 if (m->m_flags & M_IGMPV2) {
4028 m0 = m;
4029 } else {
4030 m0 = igmp_v3_encap_report(ifp, m);
4031 if (m0 == NULL) {
4032 /*
4033 * If igmp_v3_encap_report() failed, then M_PREPEND()
4034 * already freed the original mbuf chain.
4035 * This means that we don't have to m_freem(m) here.
4036 */
4037 IGMP_PRINTF(("%s: dropped 0x%llx\n", __func__,
4038 (uint64_t)VM_KERNEL_ADDRPERM(m)));
4039 IMO_REMREF(imo);
4040 os_atomic_inc(&ipstat.ips_odropped, relaxed);
4041 return;
4042 }
4043 }
4044
4045 igmp_scrub_context(m0);
4046 m->m_flags &= ~(M_PROTOFLAGS | M_IGMP_LOOP);
4047 m0->m_pkthdr.rcvif = lo_ifp;
4048
4049 if (ifp->if_eflags & IFEF_TXSTART) {
4050 /*
4051 * Use control service class if the interface supports
4052 * transmit-start model.
4053 */
4054 (void) m_set_service_class(m0, MBUF_SC_CTL);
4055 }
4056 bzero(&ro, sizeof(ro));
4057 error = ip_output(m0, ipopts, &ro, 0, imo, NULL);
4058 ROUTE_RELEASE(&ro);
4059
4060 IMO_REMREF(imo);
4061
4062 if (error) {
4063 IGMP_PRINTF(("%s: ip_output(0x%llx) = %d\n", __func__,
4064 (uint64_t)VM_KERNEL_ADDRPERM(m0), error));
4065 return;
4066 }
4067
4068 IGMPSTAT_INC(igps_snd_reports);
4069 OIGMPSTAT_INC(igps_snd_reports);
4070 }
4071 /*
4072 * Encapsulate an IGMPv3 report.
4073 *
4074 * The internal mbuf flag M_IGMPV3_HDR is used to indicate that the mbuf
4075 * chain has already had its IP/IGMPv3 header prepended. In this case
4076 * the function will not attempt to prepend; the lengths and checksums
4077 * will however be re-computed.
4078 *
4079 * Returns a pointer to the new mbuf chain head, or NULL if the
4080 * allocation failed.
4081 */
4082 static struct mbuf *
igmp_v3_encap_report(struct ifnet * ifp,struct mbuf * m)4083 igmp_v3_encap_report(struct ifnet *ifp, struct mbuf *m)
4084 {
4085 struct igmp_report *igmp;
4086 struct ip *ip;
4087 unsigned int hdrlen, igmpreclen;
4088
4089 VERIFY((m->m_flags & M_PKTHDR));
4090
4091 igmpreclen = m_length(m);
4092 hdrlen = sizeof(struct ip) + sizeof(struct igmp_report);
4093
4094 if (m->m_flags & M_IGMPV3_HDR) {
4095 igmpreclen -= hdrlen;
4096 } else {
4097 M_PREPEND(m, hdrlen, M_DONTWAIT, 1);
4098 if (m == NULL) {
4099 return NULL;
4100 }
4101 m->m_flags |= M_IGMPV3_HDR;
4102 }
4103 if (hdrlen + igmpreclen > USHRT_MAX) {
4104 IGMP_PRINTF(("%s: invalid length %d\n", __func__, hdrlen + igmpreclen));
4105 m_freem(m);
4106 return NULL;
4107 }
4108
4109
4110 IGMP_PRINTF(("%s: igmpreclen is %d\n", __func__, igmpreclen));
4111
4112 m->m_data += sizeof(struct ip);
4113 m->m_len -= sizeof(struct ip);
4114
4115 igmp = mtod(m, struct igmp_report *);
4116 igmp->ir_type = IGMP_v3_HOST_MEMBERSHIP_REPORT;
4117 igmp->ir_rsv1 = 0;
4118 igmp->ir_rsv2 = 0;
4119 igmp->ir_numgrps = htons(m->m_pkthdr.vt_nrecs);
4120 igmp->ir_cksum = 0;
4121 igmp->ir_cksum = in_cksum(m, sizeof(struct igmp_report) + igmpreclen);
4122 m->m_pkthdr.vt_nrecs = 0;
4123
4124 m->m_data -= sizeof(struct ip);
4125 m->m_len += sizeof(struct ip);
4126
4127 ip = mtod(m, struct ip *);
4128 ip->ip_tos = IPTOS_PREC_INTERNETCONTROL;
4129 ip->ip_len = (u_short)(hdrlen + igmpreclen);
4130 ip->ip_off = IP_DF;
4131 ip->ip_p = IPPROTO_IGMP;
4132 ip->ip_sum = 0;
4133
4134 ip->ip_src.s_addr = INADDR_ANY;
4135
4136 if (m->m_flags & M_IGMP_LOOP) {
4137 struct in_ifaddr *ia;
4138
4139 IFP_TO_IA(ifp, ia);
4140 if (ia != NULL) {
4141 IFA_LOCK(&ia->ia_ifa);
4142 ip->ip_src = ia->ia_addr.sin_addr;
4143 IFA_UNLOCK(&ia->ia_ifa);
4144 IFA_REMREF(&ia->ia_ifa);
4145 }
4146 }
4147
4148 ip->ip_dst.s_addr = htonl(INADDR_ALLRPTS_GROUP);
4149
4150 return m;
4151 }
4152
4153 #ifdef IGMP_DEBUG
4154 static const char *
igmp_rec_type_to_str(const int type)4155 igmp_rec_type_to_str(const int type)
4156 {
4157 switch (type) {
4158 case IGMP_CHANGE_TO_EXCLUDE_MODE:
4159 return "TO_EX";
4160 case IGMP_CHANGE_TO_INCLUDE_MODE:
4161 return "TO_IN";
4162 case IGMP_MODE_IS_EXCLUDE:
4163 return "MODE_EX";
4164 case IGMP_MODE_IS_INCLUDE:
4165 return "MODE_IN";
4166 case IGMP_ALLOW_NEW_SOURCES:
4167 return "ALLOW_NEW";
4168 case IGMP_BLOCK_OLD_SOURCES:
4169 return "BLOCK_OLD";
4170 default:
4171 break;
4172 }
4173 return "unknown";
4174 }
4175 #endif
4176
4177 void
igmp_init(struct protosw * pp,struct domain * dp)4178 igmp_init(struct protosw *pp, struct domain *dp)
4179 {
4180 #pragma unused(dp)
4181 static int igmp_initialized = 0;
4182
4183 VERIFY((pp->pr_flags & (PR_INITIALIZED | PR_ATTACHED)) == PR_ATTACHED);
4184
4185 if (igmp_initialized) {
4186 return;
4187 }
4188 igmp_initialized = 1;
4189
4190 IGMP_PRINTF(("%s: initializing\n", __func__));
4191
4192 igmp_timers_are_running = 0;
4193
4194 LIST_INIT(&igi_head);
4195 m_raopt = igmp_ra_alloc();
4196 }
4197