xref: /xnu-10002.1.13/bsd/netinet6/mld6.c (revision 1031c584a5e37aff177559b9f69dbd3c8c3fd30a)
1 /*
2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*-
29  * Copyright (c) 2009 Bruce Simpson.
30  *
31  * Redistribution and use in source and binary forms, with or without
32  * modification, are permitted provided that the following conditions
33  * are met:
34  * 1. Redistributions of source code must retain the above copyright
35  *    notice, this list of conditions and the following disclaimer.
36  * 2. Redistributions in binary form must reproduce the above copyright
37  *    notice, this list of conditions and the following disclaimer in the
38  *    documentation and/or other materials provided with the distribution.
39  * 3. The name of the author may not be used to endorse or promote
40  *    products derived from this software without specific prior written
41  *    permission.
42  *
43  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
44  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
47  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53  * SUCH DAMAGE.
54  */
55 
56 /*
57  * Copyright (c) 1988 Stephen Deering.
58  * Copyright (c) 1992, 1993
59  *	The Regents of the University of California.  All rights reserved.
60  *
61  * This code is derived from software contributed to Berkeley by
62  * Stephen Deering of Stanford University.
63  *
64  * Redistribution and use in source and binary forms, with or without
65  * modification, are permitted provided that the following conditions
66  * are met:
67  * 1. Redistributions of source code must retain the above copyright
68  *    notice, this list of conditions and the following disclaimer.
69  * 2. Redistributions in binary form must reproduce the above copyright
70  *    notice, this list of conditions and the following disclaimer in the
71  *    documentation and/or other materials provided with the distribution.
72  * 3. All advertising materials mentioning features or use of this software
73  *    must display the following acknowledgement:
74  *	This product includes software developed by the University of
75  *	California, Berkeley and its contributors.
76  * 4. Neither the name of the University nor the names of its contributors
77  *    may be used to endorse or promote products derived from this software
78  *    without specific prior written permission.
79  *
80  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
81  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
82  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
83  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
84  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
85  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
86  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
87  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
88  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
89  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
90  * SUCH DAMAGE.
91  *
92  *	@(#)igmp.c	8.1 (Berkeley) 7/19/93
93  */
94 /*
95  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
96  * support for mandatory and extensible security protections.  This notice
97  * is included in support of clause 2.2 (b) of the Apple Public License,
98  * Version 2.0.
99  */
100 
101 #include <sys/cdefs.h>
102 
103 #include <sys/param.h>
104 #include <sys/systm.h>
105 #include <sys/mbuf.h>
106 #include <sys/socket.h>
107 #include <sys/protosw.h>
108 #include <sys/sysctl.h>
109 #include <sys/kernel.h>
110 #include <sys/malloc.h>
111 #include <sys/mcache.h>
112 
113 #include <dev/random/randomdev.h>
114 
115 #include <kern/zalloc.h>
116 
117 #include <net/if.h>
118 #include <net/route.h>
119 
120 #include <netinet/in.h>
121 #include <netinet/in_var.h>
122 #include <netinet6/in6_var.h>
123 #include <netinet/ip6.h>
124 #include <netinet6/ip6_var.h>
125 #include <netinet6/scope6_var.h>
126 #include <netinet/icmp6.h>
127 #include <netinet6/mld6.h>
128 #include <netinet6/mld6_var.h>
129 
130 /* Lock group and attribute for mld_mtx */
131 static LCK_ATTR_DECLARE(mld_mtx_attr, 0, 0);
132 static LCK_GRP_DECLARE(mld_mtx_grp, "mld_mtx");
133 
134 /*
135  * Locking and reference counting:
136  *
137  * mld_mtx mainly protects mli_head.  In cases where both mld_mtx and
138  * in6_multihead_lock must be held, the former must be acquired first in order
139  * to maintain lock ordering.  It is not a requirement that mld_mtx be
140  * acquired first before in6_multihead_lock, but in case both must be acquired
141  * in succession, the correct lock ordering must be followed.
142  *
143  * Instead of walking the if_multiaddrs list at the interface and returning
144  * the ifma_protospec value of a matching entry, we search the global list
145  * of in6_multi records and find it that way; this is done with in6_multihead
146  * lock held.  Doing so avoids the race condition issues that many other BSDs
147  * suffer from (therefore in our implementation, ifma_protospec will never be
148  * NULL for as long as the in6_multi is valid.)
149  *
150  * The above creates a requirement for the in6_multi to stay in in6_multihead
151  * list even after the final MLD leave (in MLDv2 mode) until no longer needs
152  * be retransmitted (this is not required for MLDv1.)  In order to handle
153  * this, the request and reference counts of the in6_multi are bumped up when
154  * the state changes to MLD_LEAVING_MEMBER, and later dropped in the timeout
155  * handler.  Each in6_multi holds a reference to the underlying mld_ifinfo.
156  *
157  * Thus, the permitted lock order is:
158  *
159  *	mld_mtx, in6_multihead_lock, inm6_lock, mli_lock
160  *
161  * Any may be taken independently, but if any are held at the same time,
162  * the above lock order must be followed.
163  */
164 static LCK_MTX_DECLARE_ATTR(mld_mtx, &mld_mtx_grp, &mld_mtx_attr);
165 
166 SLIST_HEAD(mld_in6m_relhead, in6_multi);
167 
168 static void     mli_initvar(struct mld_ifinfo *, struct ifnet *, int);
169 static struct mld_ifinfo *mli_alloc(zalloc_flags_t);
170 static void     mli_free(struct mld_ifinfo *);
171 static void     mli_delete(const struct ifnet *, struct mld_in6m_relhead *);
172 static void     mld_dispatch_packet(struct mbuf *);
173 static void     mld_final_leave(struct in6_multi *, struct mld_ifinfo *,
174     struct mld_tparams *);
175 static int      mld_handle_state_change(struct in6_multi *, struct mld_ifinfo *,
176     struct mld_tparams *);
177 static int      mld_initial_join(struct in6_multi *, struct mld_ifinfo *,
178     struct mld_tparams *, const int);
179 #ifdef MLD_DEBUG
180 static const char *     mld_rec_type_to_str(const int);
181 #endif
182 static uint32_t mld_set_version(struct mld_ifinfo *, const int);
183 static void     mld_flush_relq(struct mld_ifinfo *, struct mld_in6m_relhead *);
184 static void     mld_dispatch_queue_locked(struct mld_ifinfo *, struct ifqueue *, int);
185 static int      mld_v1_input_query(struct ifnet *, const struct ip6_hdr *,
186     /*const*/ struct mld_hdr *);
187 static int      mld_v1_input_report(struct ifnet *, struct mbuf *,
188     const struct ip6_hdr *, /*const*/ struct mld_hdr *);
189 static void     mld_v1_process_group_timer(struct in6_multi *, const int);
190 static void     mld_v1_process_querier_timers(struct mld_ifinfo *);
191 static int      mld_v1_transmit_report(struct in6_multi *, const uint8_t);
192 static uint32_t mld_v1_update_group(struct in6_multi *, const int);
193 static void     mld_v2_cancel_link_timers(struct mld_ifinfo *);
194 static uint32_t mld_v2_dispatch_general_query(struct mld_ifinfo *);
195 static struct mbuf *
196 mld_v2_encap_report(struct ifnet *, struct mbuf *);
197 static int      mld_v2_enqueue_filter_change(struct ifqueue *,
198     struct in6_multi *);
199 static int      mld_v2_enqueue_group_record(struct ifqueue *,
200     struct in6_multi *, const int, const int, const int,
201     const int);
202 static int      mld_v2_input_query(struct ifnet *, const struct ip6_hdr *,
203     struct mbuf *, const int, const int);
204 static int      mld_v2_merge_state_changes(struct in6_multi *,
205     struct ifqueue *);
206 static void     mld_v2_process_group_timers(struct mld_ifinfo *,
207     struct ifqueue *, struct ifqueue *,
208     struct in6_multi *, const int);
209 static int      mld_v2_process_group_query(struct in6_multi *,
210     int, struct mbuf *, const int);
211 static int      sysctl_mld_gsr SYSCTL_HANDLER_ARGS;
212 static int      sysctl_mld_ifinfo SYSCTL_HANDLER_ARGS;
213 static int      sysctl_mld_v2enable SYSCTL_HANDLER_ARGS;
214 
215 static const uint32_t mld_timeout_delay = 1000; /* in milliseconds */
216 static const uint32_t mld_timeout_leeway = 500; /* in millseconds  */
217 static bool mld_timeout_run;             /* MLD timer is scheduled to run */
218 static bool mld_fast_timeout_run;        /* MLD fast timer is scheduled to run */
219 static void mld_timeout(thread_call_param_t, thread_call_param_t);
220 static void mld_sched_timeout(void);
221 static void mld_sched_fast_timeout(void);
222 
223 /*
224  * Normative references: RFC 2710, RFC 3590, RFC 3810.
225  */
226 static struct timeval mld_gsrdelay = {.tv_sec = 10, .tv_usec = 0};
227 static LIST_HEAD(, mld_ifinfo) mli_head;
228 
229 static int querier_present_timers_running6;
230 static int interface_timers_running6;
231 static int state_change_timers_running6;
232 static int current_state_timers_running6;
233 
234 static unsigned int mld_mli_list_genid;
235 /*
236  * Subsystem lock macros.
237  */
238 #define MLD_LOCK()                      \
239 	lck_mtx_lock(&mld_mtx)
240 #define MLD_LOCK_ASSERT_HELD()          \
241 	LCK_MTX_ASSERT(&mld_mtx, LCK_MTX_ASSERT_OWNED)
242 #define MLD_LOCK_ASSERT_NOTHELD()       \
243 	LCK_MTX_ASSERT(&mld_mtx, LCK_MTX_ASSERT_NOTOWNED)
244 #define MLD_UNLOCK()                    \
245 	lck_mtx_unlock(&mld_mtx)
246 
247 #define MLD_ADD_DETACHED_IN6M(_head, _in6m) {                           \
248 	SLIST_INSERT_HEAD(_head, _in6m, in6m_dtle);                     \
249 }
250 
251 #define MLD_REMOVE_DETACHED_IN6M(_head) {                               \
252 	struct in6_multi *_in6m, *_inm_tmp;                             \
253 	SLIST_FOREACH_SAFE(_in6m, _head, in6m_dtle, _inm_tmp) {         \
254 	        SLIST_REMOVE(_head, _in6m, in6_multi, in6m_dtle);       \
255 	        IN6M_REMREF(_in6m);                                     \
256 	}                                                               \
257 	VERIFY(SLIST_EMPTY(_head));                                     \
258 }
259 
260 static KALLOC_TYPE_DEFINE(mli_zone, struct mld_ifinfo, NET_KT_DEFAULT);
261 
262 SYSCTL_DECL(_net_inet6);        /* Note: Not in any common header. */
263 
264 SYSCTL_NODE(_net_inet6, OID_AUTO, mld, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
265     "IPv6 Multicast Listener Discovery");
266 SYSCTL_PROC(_net_inet6_mld, OID_AUTO, gsrdelay,
267     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
268     &mld_gsrdelay.tv_sec, 0, sysctl_mld_gsr, "I",
269     "Rate limit for MLDv2 Group-and-Source queries in seconds");
270 
271 SYSCTL_NODE(_net_inet6_mld, OID_AUTO, ifinfo, CTLFLAG_RD | CTLFLAG_LOCKED,
272     sysctl_mld_ifinfo, "Per-interface MLDv2 state");
273 
274 static int      mld_v1enable = 1;
275 SYSCTL_INT(_net_inet6_mld, OID_AUTO, v1enable, CTLFLAG_RW | CTLFLAG_LOCKED,
276     &mld_v1enable, 0, "Enable fallback to MLDv1");
277 
278 static int      mld_v2enable = 1;
279 SYSCTL_PROC(_net_inet6_mld, OID_AUTO, v2enable,
280     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
281     &mld_v2enable, 0, sysctl_mld_v2enable, "I",
282     "Enable MLDv2 (debug purposes only)");
283 
284 static int      mld_use_allow = 1;
285 SYSCTL_INT(_net_inet6_mld, OID_AUTO, use_allow, CTLFLAG_RW | CTLFLAG_LOCKED,
286     &mld_use_allow, 0, "Use ALLOW/BLOCK for RFC 4604 SSM joins/leaves");
287 
288 #ifdef MLD_DEBUG
289 int mld_debug = 0;
290 SYSCTL_INT(_net_inet6_mld, OID_AUTO,
291     debug, CTLFLAG_RW | CTLFLAG_LOCKED, &mld_debug, 0, "");
292 #endif
293 /*
294  * Packed Router Alert option structure declaration.
295  */
296 struct mld_raopt {
297 	struct ip6_hbh          hbh;
298 	struct ip6_opt          pad;
299 	struct ip6_opt_router   ra;
300 } __packed;
301 
302 /*
303  * Router Alert hop-by-hop option header.
304  */
305 static struct mld_raopt mld_ra = {
306 	.hbh = { .ip6h_nxt = 0, .ip6h_len = 0 },
307 	.pad = { .ip6o_type = IP6OPT_PADN, .ip6o_len = 0 },
308 	.ra = {
309 		.ip6or_type = (u_int8_t)IP6OPT_ROUTER_ALERT,
310 		.ip6or_len = (u_int8_t)(IP6OPT_RTALERT_LEN - 2),
311 		.ip6or_value =  {((IP6OPT_RTALERT_MLD >> 8) & 0xFF),
312 			         (IP6OPT_RTALERT_MLD & 0xFF) }
313 	}
314 };
315 static struct ip6_pktopts mld_po;
316 
317 /* Store MLDv2 record count in the module private scratch space */
318 #define vt_nrecs        pkt_mpriv.__mpriv_u.__mpriv32[0].__mpriv32_u.__val16[0]
319 
320 static __inline void
mld_save_context(struct mbuf * m,struct ifnet * ifp)321 mld_save_context(struct mbuf *m, struct ifnet *ifp)
322 {
323 	m->m_pkthdr.rcvif = ifp;
324 }
325 
326 static __inline void
mld_scrub_context(struct mbuf * m)327 mld_scrub_context(struct mbuf *m)
328 {
329 	m->m_pkthdr.rcvif = NULL;
330 }
331 
332 /*
333  * Restore context from a queued output chain.
334  * Return saved ifp.
335  */
336 static __inline struct ifnet *
mld_restore_context(struct mbuf * m)337 mld_restore_context(struct mbuf *m)
338 {
339 	return m->m_pkthdr.rcvif;
340 }
341 
342 /*
343  * Retrieve or set threshold between group-source queries in seconds.
344  */
345 static int
346 sysctl_mld_gsr SYSCTL_HANDLER_ARGS
347 {
348 #pragma unused(arg1, arg2)
349 	int error;
350 	int i;
351 
352 	MLD_LOCK();
353 
354 	i = (int)mld_gsrdelay.tv_sec;
355 
356 	error = sysctl_handle_int(oidp, &i, 0, req);
357 	if (error || !req->newptr) {
358 		goto out_locked;
359 	}
360 
361 	if (i < -1 || i >= 60) {
362 		error = EINVAL;
363 		goto out_locked;
364 	}
365 
366 	mld_gsrdelay.tv_sec = i;
367 
368 out_locked:
369 	MLD_UNLOCK();
370 	return error;
371 }
372 /*
373  * Expose struct mld_ifinfo to userland, keyed by ifindex.
374  * For use by ifmcstat(8).
375  *
376  */
377 static int
378 sysctl_mld_ifinfo SYSCTL_HANDLER_ARGS
379 {
380 #pragma unused(oidp)
381 	int                     *name;
382 	int                      error;
383 	u_int                    namelen;
384 	struct ifnet            *ifp;
385 	struct mld_ifinfo       *mli;
386 	struct mld_ifinfo_u     mli_u;
387 
388 	name = (int *)arg1;
389 	namelen = arg2;
390 
391 	if (req->newptr != USER_ADDR_NULL) {
392 		return EPERM;
393 	}
394 
395 	if (namelen != 1) {
396 		return EINVAL;
397 	}
398 
399 	MLD_LOCK();
400 
401 	if (name[0] <= 0 || name[0] > (u_int)if_index) {
402 		error = ENOENT;
403 		goto out_locked;
404 	}
405 
406 	error = ENOENT;
407 
408 	ifnet_head_lock_shared();
409 	ifp = ifindex2ifnet[name[0]];
410 	ifnet_head_done();
411 	if (ifp == NULL) {
412 		goto out_locked;
413 	}
414 
415 	bzero(&mli_u, sizeof(mli_u));
416 
417 	LIST_FOREACH(mli, &mli_head, mli_link) {
418 		MLI_LOCK(mli);
419 		if (ifp != mli->mli_ifp) {
420 			MLI_UNLOCK(mli);
421 			continue;
422 		}
423 
424 		mli_u.mli_ifindex = mli->mli_ifp->if_index;
425 		mli_u.mli_version = mli->mli_version;
426 		mli_u.mli_v1_timer = mli->mli_v1_timer;
427 		mli_u.mli_v2_timer = mli->mli_v2_timer;
428 		mli_u.mli_flags = mli->mli_flags;
429 		mli_u.mli_rv = mli->mli_rv;
430 		mli_u.mli_qi = mli->mli_qi;
431 		mli_u.mli_qri = mli->mli_qri;
432 		mli_u.mli_uri = mli->mli_uri;
433 		MLI_UNLOCK(mli);
434 
435 		error = SYSCTL_OUT(req, &mli_u, sizeof(mli_u));
436 		break;
437 	}
438 
439 out_locked:
440 	MLD_UNLOCK();
441 	return error;
442 }
443 
444 static int
445 sysctl_mld_v2enable SYSCTL_HANDLER_ARGS
446 {
447 #pragma unused(arg1, arg2)
448 	int error;
449 	int i;
450 	struct mld_ifinfo *mli;
451 	struct mld_tparams mtp = { .qpt = 0, .it = 0, .cst = 0, .sct = 0 };
452 
453 	MLD_LOCK();
454 
455 	i = mld_v2enable;
456 
457 	error = sysctl_handle_int(oidp, &i, 0, req);
458 	if (error || !req->newptr) {
459 		goto out_locked;
460 	}
461 
462 	if (i < 0 || i > 1) {
463 		error = EINVAL;
464 		goto out_locked;
465 	}
466 
467 	mld_v2enable = i;
468 	/*
469 	 * If we enabled v2, the state transition will take care of upgrading
470 	 * the MLD version back to v2. Otherwise, we have to explicitly
471 	 * downgrade. Note that this functionality is to be used for debugging.
472 	 */
473 	if (mld_v2enable == 1) {
474 		goto out_locked;
475 	}
476 
477 	LIST_FOREACH(mli, &mli_head, mli_link) {
478 		MLI_LOCK(mli);
479 		if (mld_set_version(mli, MLD_VERSION_1) > 0) {
480 			mtp.qpt = 1;
481 		}
482 		MLI_UNLOCK(mli);
483 	}
484 
485 out_locked:
486 	MLD_UNLOCK();
487 
488 	mld_set_timeout(&mtp);
489 
490 	return error;
491 }
492 
493 /*
494  * Dispatch an entire queue of pending packet chains.
495  *
496  * Must not be called with in6m_lock held.
497  * XXX This routine unlocks MLD global lock and also mli locks.
498  * Make sure that the calling routine takes reference on the mli
499  * before calling this routine.
500  * Also if we are traversing mli_head, remember to check for
501  * mli list generation count and restart the loop if generation count
502  * has changed.
503  */
504 static void
mld_dispatch_queue_locked(struct mld_ifinfo * mli,struct ifqueue * ifq,int limit)505 mld_dispatch_queue_locked(struct mld_ifinfo *mli, struct ifqueue *ifq, int limit)
506 {
507 	struct mbuf *m;
508 
509 	MLD_LOCK_ASSERT_HELD();
510 
511 	if (mli != NULL) {
512 		MLI_LOCK_ASSERT_HELD(mli);
513 	}
514 
515 	for (;;) {
516 		IF_DEQUEUE(ifq, m);
517 		if (m == NULL) {
518 			break;
519 		}
520 		MLD_PRINTF(("%s: dispatch 0x%llx from 0x%llx\n", __func__,
521 		    (uint64_t)VM_KERNEL_ADDRPERM(ifq),
522 		    (uint64_t)VM_KERNEL_ADDRPERM(m)));
523 
524 		if (mli != NULL) {
525 			MLI_UNLOCK(mli);
526 		}
527 		MLD_UNLOCK();
528 
529 		mld_dispatch_packet(m);
530 
531 		MLD_LOCK();
532 		if (mli != NULL) {
533 			MLI_LOCK(mli);
534 		}
535 
536 		if (--limit == 0) {
537 			break;
538 		}
539 	}
540 
541 	if (mli != NULL) {
542 		MLI_LOCK_ASSERT_HELD(mli);
543 	}
544 }
545 
546 /*
547  * Filter outgoing MLD report state by group.
548  *
549  * Reports are ALWAYS suppressed for ALL-HOSTS (ff02::1)
550  * and node-local addresses. However, kernel and socket consumers
551  * always embed the KAME scope ID in the address provided, so strip it
552  * when performing comparison.
553  * Note: This is not the same as the *multicast* scope.
554  *
555  * Return zero if the given group is one for which MLD reports
556  * should be suppressed, or non-zero if reports should be issued.
557  */
558 static __inline__ int
mld_is_addr_reported(const struct in6_addr * addr)559 mld_is_addr_reported(const struct in6_addr *addr)
560 {
561 	VERIFY(IN6_IS_ADDR_MULTICAST(addr));
562 
563 	if (IPV6_ADDR_MC_SCOPE(addr) == IPV6_ADDR_SCOPE_NODELOCAL) {
564 		return 0;
565 	}
566 
567 	if (IPV6_ADDR_MC_SCOPE(addr) == IPV6_ADDR_SCOPE_LINKLOCAL && !IN6_IS_ADDR_UNICAST_BASED_MULTICAST(addr)) {
568 		struct in6_addr tmp = *addr;
569 		in6_clearscope(&tmp);
570 		if (IN6_ARE_ADDR_EQUAL(&tmp, &in6addr_linklocal_allnodes)) {
571 			return 0;
572 		}
573 	}
574 
575 	return 1;
576 }
577 
578 /*
579  * Attach MLD when PF_INET6 is attached to an interface.
580  */
581 struct mld_ifinfo *
mld_domifattach(struct ifnet * ifp,zalloc_flags_t how)582 mld_domifattach(struct ifnet *ifp, zalloc_flags_t how)
583 {
584 	struct mld_ifinfo *mli;
585 
586 	MLD_PRINTF(("%s: called for ifp 0x%llx(%s)\n", __func__,
587 	    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
588 
589 	mli = mli_alloc(how);
590 	if (mli == NULL) {
591 		return NULL;
592 	}
593 
594 	MLD_LOCK();
595 
596 	MLI_LOCK(mli);
597 	mli_initvar(mli, ifp, 0);
598 	mli->mli_debug |= IFD_ATTACHED;
599 	MLI_ADDREF_LOCKED(mli); /* hold a reference for mli_head */
600 	MLI_ADDREF_LOCKED(mli); /* hold a reference for caller */
601 	MLI_UNLOCK(mli);
602 	ifnet_lock_shared(ifp);
603 	mld6_initsilent(ifp, mli);
604 	ifnet_lock_done(ifp);
605 
606 	LIST_INSERT_HEAD(&mli_head, mli, mli_link);
607 	mld_mli_list_genid++;
608 
609 	MLD_UNLOCK();
610 
611 	MLD_PRINTF(("%s: allocate mld_ifinfo for ifp 0x%llx(%s)\n",
612 	    __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
613 
614 	return mli;
615 }
616 
617 /*
618  * Attach MLD when PF_INET6 is reattached to an interface.  Caller is
619  * expected to have an outstanding reference to the mli.
620  */
621 void
mld_domifreattach(struct mld_ifinfo * mli)622 mld_domifreattach(struct mld_ifinfo *mli)
623 {
624 	struct ifnet *ifp;
625 
626 	MLD_LOCK();
627 
628 	MLI_LOCK(mli);
629 	VERIFY(!(mli->mli_debug & IFD_ATTACHED));
630 	ifp = mli->mli_ifp;
631 	VERIFY(ifp != NULL);
632 	mli_initvar(mli, ifp, 1);
633 	mli->mli_debug |= IFD_ATTACHED;
634 	MLI_ADDREF_LOCKED(mli); /* hold a reference for mli_head */
635 	MLI_UNLOCK(mli);
636 	ifnet_lock_shared(ifp);
637 	mld6_initsilent(ifp, mli);
638 	ifnet_lock_done(ifp);
639 
640 	LIST_INSERT_HEAD(&mli_head, mli, mli_link);
641 	mld_mli_list_genid++;
642 
643 	MLD_UNLOCK();
644 
645 	MLD_PRINTF(("%s: reattached mld_ifinfo for ifp 0x%llx(%s)\n",
646 	    __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
647 }
648 
649 /*
650  * Hook for domifdetach.
651  */
652 void
mld_domifdetach(struct ifnet * ifp)653 mld_domifdetach(struct ifnet *ifp)
654 {
655 	SLIST_HEAD(, in6_multi) in6m_dthead;
656 
657 	SLIST_INIT(&in6m_dthead);
658 
659 	MLD_PRINTF(("%s: called for ifp 0x%llx(%s)\n", __func__,
660 	    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
661 
662 	MLD_LOCK();
663 	mli_delete(ifp, (struct mld_in6m_relhead *)&in6m_dthead);
664 	MLD_UNLOCK();
665 
666 	/* Now that we're dropped all locks, release detached records */
667 	MLD_REMOVE_DETACHED_IN6M(&in6m_dthead);
668 }
669 
670 /*
671  * Called at interface detach time.  Note that we only flush all deferred
672  * responses and record releases; all remaining inm records and their source
673  * entries related to this interface are left intact, in order to handle
674  * the reattach case.
675  */
676 static void
mli_delete(const struct ifnet * ifp,struct mld_in6m_relhead * in6m_dthead)677 mli_delete(const struct ifnet *ifp, struct mld_in6m_relhead *in6m_dthead)
678 {
679 	struct mld_ifinfo *mli, *tmli;
680 
681 	MLD_LOCK_ASSERT_HELD();
682 
683 	LIST_FOREACH_SAFE(mli, &mli_head, mli_link, tmli) {
684 		MLI_LOCK(mli);
685 		if (mli->mli_ifp == ifp) {
686 			/*
687 			 * Free deferred General Query responses.
688 			 */
689 			IF_DRAIN(&mli->mli_gq);
690 			IF_DRAIN(&mli->mli_v1q);
691 			mld_flush_relq(mli, in6m_dthead);
692 			VERIFY(SLIST_EMPTY(&mli->mli_relinmhead));
693 			mli->mli_debug &= ~IFD_ATTACHED;
694 			MLI_UNLOCK(mli);
695 
696 			LIST_REMOVE(mli, mli_link);
697 			MLI_REMREF(mli); /* release mli_head reference */
698 			mld_mli_list_genid++;
699 			return;
700 		}
701 		MLI_UNLOCK(mli);
702 	}
703 	panic("%s: mld_ifinfo not found for ifp %p(%s)", __func__,
704 	    ifp, ifp->if_xname);
705 }
706 
707 __private_extern__ void
mld6_initsilent(struct ifnet * ifp,struct mld_ifinfo * mli)708 mld6_initsilent(struct ifnet *ifp, struct mld_ifinfo *mli)
709 {
710 	ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
711 
712 	MLI_LOCK_ASSERT_NOTHELD(mli);
713 	MLI_LOCK(mli);
714 	if (!(ifp->if_flags & IFF_MULTICAST) &&
715 	    (ifp->if_eflags & (IFEF_IPV6_ND6ALT | IFEF_LOCALNET_PRIVATE))) {
716 		mli->mli_flags |= MLIF_SILENT;
717 	} else {
718 		mli->mli_flags &= ~MLIF_SILENT;
719 	}
720 	MLI_UNLOCK(mli);
721 }
722 
723 static void
mli_initvar(struct mld_ifinfo * mli,struct ifnet * ifp,int reattach)724 mli_initvar(struct mld_ifinfo *mli, struct ifnet *ifp, int reattach)
725 {
726 	MLI_LOCK_ASSERT_HELD(mli);
727 
728 	mli->mli_ifp = ifp;
729 	if (mld_v2enable) {
730 		mli->mli_version = MLD_VERSION_2;
731 	} else {
732 		mli->mli_version = MLD_VERSION_1;
733 	}
734 	mli->mli_flags = 0;
735 	mli->mli_rv = MLD_RV_INIT;
736 	mli->mli_qi = MLD_QI_INIT;
737 	mli->mli_qri = MLD_QRI_INIT;
738 	mli->mli_uri = MLD_URI_INIT;
739 
740 	if (mld_use_allow) {
741 		mli->mli_flags |= MLIF_USEALLOW;
742 	}
743 	if (!reattach) {
744 		SLIST_INIT(&mli->mli_relinmhead);
745 	}
746 
747 	/*
748 	 * Responses to general queries are subject to bounds.
749 	 */
750 	mli->mli_gq.ifq_maxlen = MLD_MAX_RESPONSE_PACKETS;
751 	mli->mli_v1q.ifq_maxlen = MLD_MAX_RESPONSE_PACKETS;
752 }
753 
754 static struct mld_ifinfo *
mli_alloc(zalloc_flags_t how)755 mli_alloc(zalloc_flags_t how)
756 {
757 	struct mld_ifinfo *mli = zalloc_flags(mli_zone, how | Z_ZERO);
758 	if (mli != NULL) {
759 		lck_mtx_init(&mli->mli_lock, &mld_mtx_grp, &mld_mtx_attr);
760 		mli->mli_debug |= IFD_ALLOC;
761 	}
762 	return mli;
763 }
764 
765 static void
mli_free(struct mld_ifinfo * mli)766 mli_free(struct mld_ifinfo *mli)
767 {
768 	MLI_LOCK(mli);
769 	if (mli->mli_debug & IFD_ATTACHED) {
770 		panic("%s: attached mli=%p is being freed", __func__, mli);
771 		/* NOTREACHED */
772 	} else if (mli->mli_ifp != NULL) {
773 		panic("%s: ifp not NULL for mli=%p", __func__, mli);
774 		/* NOTREACHED */
775 	} else if (!(mli->mli_debug & IFD_ALLOC)) {
776 		panic("%s: mli %p cannot be freed", __func__, mli);
777 		/* NOTREACHED */
778 	} else if (mli->mli_refcnt != 0) {
779 		panic("%s: non-zero refcnt mli=%p", __func__, mli);
780 		/* NOTREACHED */
781 	}
782 	mli->mli_debug &= ~IFD_ALLOC;
783 	MLI_UNLOCK(mli);
784 
785 	lck_mtx_destroy(&mli->mli_lock, &mld_mtx_grp);
786 	zfree(mli_zone, mli);
787 }
788 
789 void
mli_addref(struct mld_ifinfo * mli,int locked)790 mli_addref(struct mld_ifinfo *mli, int locked)
791 {
792 	if (!locked) {
793 		MLI_LOCK_SPIN(mli);
794 	} else {
795 		MLI_LOCK_ASSERT_HELD(mli);
796 	}
797 
798 	if (++mli->mli_refcnt == 0) {
799 		panic("%s: mli=%p wraparound refcnt", __func__, mli);
800 		/* NOTREACHED */
801 	}
802 	if (!locked) {
803 		MLI_UNLOCK(mli);
804 	}
805 }
806 
807 void
mli_remref(struct mld_ifinfo * mli)808 mli_remref(struct mld_ifinfo *mli)
809 {
810 	SLIST_HEAD(, in6_multi) in6m_dthead;
811 	struct ifnet *ifp;
812 
813 	MLI_LOCK_SPIN(mli);
814 
815 	if (mli->mli_refcnt == 0) {
816 		panic("%s: mli=%p negative refcnt", __func__, mli);
817 		/* NOTREACHED */
818 	}
819 
820 	--mli->mli_refcnt;
821 	if (mli->mli_refcnt > 0) {
822 		MLI_UNLOCK(mli);
823 		return;
824 	}
825 
826 	ifp = mli->mli_ifp;
827 	mli->mli_ifp = NULL;
828 	IF_DRAIN(&mli->mli_gq);
829 	IF_DRAIN(&mli->mli_v1q);
830 	SLIST_INIT(&in6m_dthead);
831 	mld_flush_relq(mli, (struct mld_in6m_relhead *)&in6m_dthead);
832 	VERIFY(SLIST_EMPTY(&mli->mli_relinmhead));
833 	MLI_UNLOCK(mli);
834 
835 	/* Now that we're dropped all locks, release detached records */
836 	MLD_REMOVE_DETACHED_IN6M(&in6m_dthead);
837 
838 	MLD_PRINTF(("%s: freeing mld_ifinfo for ifp 0x%llx(%s)\n",
839 	    __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
840 
841 	mli_free(mli);
842 }
843 
844 /*
845  * Process a received MLDv1 general or address-specific query.
846  * Assumes that the query header has been pulled up to sizeof(mld_hdr).
847  *
848  * NOTE: Can't be fully const correct as we temporarily embed scope ID in
849  * mld_addr. This is OK as we own the mbuf chain.
850  */
851 static int
mld_v1_input_query(struct ifnet * ifp,const struct ip6_hdr * ip6,struct mld_hdr * mld)852 mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
853     /*const*/ struct mld_hdr *mld)
854 {
855 	struct mld_ifinfo       *mli;
856 	struct in6_multi        *inm;
857 	int                      err = 0, is_general_query;
858 	uint16_t                 timer;
859 	struct mld_tparams       mtp = { .qpt = 0, .it = 0, .cst = 0, .sct = 0 };
860 
861 	MLD_LOCK_ASSERT_NOTHELD();
862 
863 	is_general_query = 0;
864 
865 	if (!mld_v1enable) {
866 		MLD_PRINTF(("%s: ignore v1 query %s on ifp 0x%llx(%s)\n",
867 		    __func__, ip6_sprintf(&mld->mld_addr),
868 		    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
869 		goto done;
870 	}
871 
872 	/*
873 	 * RFC3810 Section 6.2: MLD queries must originate from
874 	 * a router's link-local address.
875 	 */
876 	if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
877 		MLD_PRINTF(("%s: ignore v1 query src %s on ifp 0x%llx(%s)\n",
878 		    __func__, ip6_sprintf(&ip6->ip6_src),
879 		    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
880 		goto done;
881 	}
882 
883 	/*
884 	 * Do address field validation upfront before we accept
885 	 * the query.
886 	 */
887 	if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) {
888 		/*
889 		 * MLDv1 General Query.
890 		 * If this was not sent to the all-nodes group, ignore it.
891 		 */
892 		struct in6_addr          dst;
893 
894 		dst = ip6->ip6_dst;
895 		in6_clearscope(&dst);
896 		if (!IN6_ARE_ADDR_EQUAL(&dst, &in6addr_linklocal_allnodes)) {
897 			err = EINVAL;
898 			goto done;
899 		}
900 		is_general_query = 1;
901 	} else {
902 		/*
903 		 * Embed scope ID of receiving interface in MLD query for
904 		 * lookup whilst we don't hold other locks.
905 		 */
906 		(void)in6_setscope(&mld->mld_addr, ifp, NULL);
907 	}
908 
909 	/*
910 	 * Switch to MLDv1 host compatibility mode.
911 	 */
912 	mli = MLD_IFINFO(ifp);
913 	VERIFY(mli != NULL);
914 
915 	MLI_LOCK(mli);
916 	mtp.qpt = mld_set_version(mli, MLD_VERSION_1);
917 	MLI_UNLOCK(mli);
918 
919 	timer = ntohs(mld->mld_maxdelay) / MLD_TIMER_SCALE;
920 	if (timer == 0) {
921 		timer = 1;
922 	}
923 
924 	if (is_general_query) {
925 		struct in6_multistep step;
926 
927 		MLD_PRINTF(("%s: process v1 general query on ifp 0x%llx(%s)\n",
928 		    __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
929 		/*
930 		 * For each reporting group joined on this
931 		 * interface, kick the report timer.
932 		 */
933 		in6_multihead_lock_shared();
934 		IN6_FIRST_MULTI(step, inm);
935 		while (inm != NULL) {
936 			IN6M_LOCK(inm);
937 			if (inm->in6m_ifp == ifp) {
938 				mtp.cst += mld_v1_update_group(inm, timer);
939 			}
940 			IN6M_UNLOCK(inm);
941 			IN6_NEXT_MULTI(step, inm);
942 		}
943 		in6_multihead_lock_done();
944 	} else {
945 		/*
946 		 * MLDv1 Group-Specific Query.
947 		 * If this is a group-specific MLDv1 query, we need only
948 		 * look up the single group to process it.
949 		 */
950 		in6_multihead_lock_shared();
951 		IN6_LOOKUP_MULTI(&mld->mld_addr, ifp, inm);
952 		in6_multihead_lock_done();
953 
954 		if (inm != NULL) {
955 			IN6M_LOCK(inm);
956 			MLD_PRINTF(("%s: process v1 query %s on "
957 			    "ifp 0x%llx(%s)\n", __func__,
958 			    ip6_sprintf(&mld->mld_addr),
959 			    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
960 			mtp.cst = mld_v1_update_group(inm, timer);
961 			IN6M_UNLOCK(inm);
962 			IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
963 		}
964 		/* XXX Clear embedded scope ID as userland won't expect it. */
965 		in6_clearscope(&mld->mld_addr);
966 	}
967 done:
968 	mld_set_timeout(&mtp);
969 
970 	return err;
971 }
972 
973 /*
974  * Update the report timer on a group in response to an MLDv1 query.
975  *
976  * If we are becoming the reporting member for this group, start the timer.
977  * If we already are the reporting member for this group, and timer is
978  * below the threshold, reset it.
979  *
980  * We may be updating the group for the first time since we switched
981  * to MLDv2. If we are, then we must clear any recorded source lists,
982  * and transition to REPORTING state; the group timer is overloaded
983  * for group and group-source query responses.
984  *
985  * Unlike MLDv2, the delay per group should be jittered
986  * to avoid bursts of MLDv1 reports.
987  */
988 static uint32_t
mld_v1_update_group(struct in6_multi * inm,const int timer)989 mld_v1_update_group(struct in6_multi *inm, const int timer)
990 {
991 	IN6M_LOCK_ASSERT_HELD(inm);
992 
993 	MLD_PRINTF(("%s: %s/%s timer=%d\n", __func__,
994 	    ip6_sprintf(&inm->in6m_addr),
995 	    if_name(inm->in6m_ifp), timer));
996 
997 	switch (inm->in6m_state) {
998 	case MLD_NOT_MEMBER:
999 	case MLD_SILENT_MEMBER:
1000 		break;
1001 	case MLD_REPORTING_MEMBER:
1002 		if (inm->in6m_timer != 0 &&
1003 		    inm->in6m_timer <= timer) {
1004 			MLD_PRINTF(("%s: REPORTING and timer running, "
1005 			    "skipping.\n", __func__));
1006 			break;
1007 		}
1008 		OS_FALLTHROUGH;
1009 	case MLD_SG_QUERY_PENDING_MEMBER:
1010 	case MLD_G_QUERY_PENDING_MEMBER:
1011 	case MLD_IDLE_MEMBER:
1012 	case MLD_LAZY_MEMBER:
1013 	case MLD_AWAKENING_MEMBER:
1014 		MLD_PRINTF(("%s: ->REPORTING\n", __func__));
1015 		inm->in6m_state = MLD_REPORTING_MEMBER;
1016 		inm->in6m_timer = MLD_RANDOM_DELAY(timer);
1017 		break;
1018 	case MLD_SLEEPING_MEMBER:
1019 		MLD_PRINTF(("%s: ->AWAKENING\n", __func__));
1020 		inm->in6m_state = MLD_AWAKENING_MEMBER;
1021 		break;
1022 	case MLD_LEAVING_MEMBER:
1023 		break;
1024 	}
1025 
1026 	return inm->in6m_timer;
1027 }
1028 
1029 /*
1030  * Process a received MLDv2 general, group-specific or
1031  * group-and-source-specific query.
1032  *
1033  * Assumes that the query header has been pulled up to sizeof(mldv2_query).
1034  *
1035  * Return 0 if successful, otherwise an appropriate error code is returned.
1036  */
1037 static int
mld_v2_input_query(struct ifnet * ifp,const struct ip6_hdr * ip6,struct mbuf * m,const int off,const int icmp6len)1038 mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
1039     struct mbuf *m, const int off, const int icmp6len)
1040 {
1041 	struct mld_ifinfo       *mli;
1042 	struct mldv2_query      *mld;
1043 	struct in6_multi        *inm;
1044 	uint32_t                 maxdelay, nsrc, qqi, timer;
1045 	int                      err = 0, is_general_query;
1046 	uint8_t                  qrv;
1047 	struct mld_tparams       mtp = { .qpt = 0, .it = 0, .cst = 0, .sct = 0 };
1048 
1049 	MLD_LOCK_ASSERT_NOTHELD();
1050 
1051 	is_general_query = 0;
1052 
1053 	if (!mld_v2enable) {
1054 		MLD_PRINTF(("%s: ignore v2 query %s on ifp 0x%llx(%s)\n",
1055 		    __func__, ip6_sprintf(&ip6->ip6_src),
1056 		    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1057 		goto done;
1058 	}
1059 
1060 	/*
1061 	 * RFC3810 Section 6.2: MLD queries must originate from
1062 	 * a router's link-local address.
1063 	 */
1064 	if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
1065 		MLD_PRINTF(("%s: ignore v1 query src %s on ifp 0x%llx(%s)\n",
1066 		    __func__, ip6_sprintf(&ip6->ip6_src),
1067 		    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1068 		goto done;
1069 	}
1070 
1071 	MLD_PRINTF(("%s: input v2 query on ifp 0x%llx(%s)\n", __func__,
1072 	    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1073 
1074 	mld = (struct mldv2_query *)(mtod(m, uint8_t *) + off);
1075 
1076 	maxdelay = ntohs(mld->mld_maxdelay);    /* in 1/10ths of a second */
1077 	if (maxdelay > SHRT_MAX) {
1078 		maxdelay = (MLD_MRC_MANT((uint16_t)maxdelay) | 0x1000) <<
1079 		    (MLD_MRC_EXP((uint16_t)maxdelay) + 3);
1080 	}
1081 	timer = maxdelay / MLD_TIMER_SCALE;
1082 	if (timer == 0) {
1083 		timer = 1;
1084 	}
1085 
1086 	qrv = MLD_QRV(mld->mld_misc);
1087 	if (qrv < 2) {
1088 		MLD_PRINTF(("%s: clamping qrv %d to %d\n", __func__,
1089 		    qrv, MLD_RV_INIT));
1090 		qrv = MLD_RV_INIT;
1091 	}
1092 
1093 	qqi = mld->mld_qqi;
1094 	if (qqi >= 128) {
1095 		qqi = MLD_QQIC_MANT(mld->mld_qqi) <<
1096 		    (MLD_QQIC_EXP(mld->mld_qqi) + 3);
1097 	}
1098 
1099 	nsrc = ntohs(mld->mld_numsrc);
1100 	if (nsrc > MLD_MAX_GS_SOURCES) {
1101 		err = EMSGSIZE;
1102 		goto done;
1103 	}
1104 	if (icmp6len < sizeof(struct mldv2_query) +
1105 	    (nsrc * sizeof(struct in6_addr))) {
1106 		err = EMSGSIZE;
1107 		goto done;
1108 	}
1109 
1110 	/*
1111 	 * Do further input validation upfront to avoid resetting timers
1112 	 * should we need to discard this query.
1113 	 */
1114 	if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) {
1115 		/*
1116 		 * A general query with a source list has undefined
1117 		 * behaviour; discard it.
1118 		 */
1119 		if (nsrc > 0) {
1120 			err = EINVAL;
1121 			goto done;
1122 		}
1123 		is_general_query = 1;
1124 	} else {
1125 		/*
1126 		 * Embed scope ID of receiving interface in MLD query for
1127 		 * lookup whilst we don't hold other locks (due to KAME
1128 		 * locking lameness). We own this mbuf chain just now.
1129 		 */
1130 		(void)in6_setscope(&mld->mld_addr, ifp, NULL);
1131 	}
1132 
1133 	mli = MLD_IFINFO(ifp);
1134 	VERIFY(mli != NULL);
1135 
1136 	MLI_LOCK(mli);
1137 	/*
1138 	 * Discard the v2 query if we're in Compatibility Mode.
1139 	 * The RFC is pretty clear that hosts need to stay in MLDv1 mode
1140 	 * until the Old Version Querier Present timer expires.
1141 	 */
1142 	if (mli->mli_version != MLD_VERSION_2) {
1143 		MLI_UNLOCK(mli);
1144 		goto done;
1145 	}
1146 
1147 	mtp.qpt = mld_set_version(mli, MLD_VERSION_2);
1148 	mli->mli_rv = qrv;
1149 	mli->mli_qi = qqi;
1150 	mli->mli_qri = MAX(timer, MLD_QRI_MIN);
1151 
1152 	MLD_PRINTF(("%s: qrv %d qi %d qri %d\n", __func__, mli->mli_rv,
1153 	    mli->mli_qi, mli->mli_qri));
1154 
1155 	if (is_general_query) {
1156 		/*
1157 		 * MLDv2 General Query.
1158 		 *
1159 		 * Schedule a current-state report on this ifp for
1160 		 * all groups, possibly containing source lists.
1161 		 *
1162 		 * If there is a pending General Query response
1163 		 * scheduled earlier than the selected delay, do
1164 		 * not schedule any other reports.
1165 		 * Otherwise, reset the interface timer.
1166 		 */
1167 		MLD_PRINTF(("%s: process v2 general query on ifp 0x%llx(%s)\n",
1168 		    __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1169 		if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer) {
1170 			mtp.it = mli->mli_v2_timer = MLD_RANDOM_DELAY(timer);
1171 		}
1172 		MLI_UNLOCK(mli);
1173 	} else {
1174 		MLI_UNLOCK(mli);
1175 		/*
1176 		 * MLDv2 Group-specific or Group-and-source-specific Query.
1177 		 *
1178 		 * Group-source-specific queries are throttled on
1179 		 * a per-group basis to defeat denial-of-service attempts.
1180 		 * Queries for groups we are not a member of on this
1181 		 * link are simply ignored.
1182 		 */
1183 		in6_multihead_lock_shared();
1184 		IN6_LOOKUP_MULTI(&mld->mld_addr, ifp, inm);
1185 		in6_multihead_lock_done();
1186 		if (inm == NULL) {
1187 			goto done;
1188 		}
1189 
1190 		IN6M_LOCK(inm);
1191 		if (nsrc > 0) {
1192 			if (!ratecheck(&inm->in6m_lastgsrtv,
1193 			    &mld_gsrdelay)) {
1194 				MLD_PRINTF(("%s: GS query throttled.\n",
1195 				    __func__));
1196 				IN6M_UNLOCK(inm);
1197 				IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
1198 				goto done;
1199 			}
1200 		}
1201 		MLD_PRINTF(("%s: process v2 group query on ifp 0x%llx(%s)\n",
1202 		    __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1203 		/*
1204 		 * If there is a pending General Query response
1205 		 * scheduled sooner than the selected delay, no
1206 		 * further report need be scheduled.
1207 		 * Otherwise, prepare to respond to the
1208 		 * group-specific or group-and-source query.
1209 		 */
1210 		MLI_LOCK(mli);
1211 		mtp.it = mli->mli_v2_timer;
1212 		MLI_UNLOCK(mli);
1213 		if (mtp.it == 0 || mtp.it >= timer) {
1214 			(void) mld_v2_process_group_query(inm, timer, m, off);
1215 			mtp.cst = inm->in6m_timer;
1216 		}
1217 		IN6M_UNLOCK(inm);
1218 		IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
1219 		/* XXX Clear embedded scope ID as userland won't expect it. */
1220 		in6_clearscope(&mld->mld_addr);
1221 	}
1222 done:
1223 	if (mtp.it > 0) {
1224 		MLD_PRINTF(("%s: v2 general query response scheduled in "
1225 		    "T+%d seconds on ifp 0x%llx(%s)\n", __func__, mtp.it,
1226 		    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1227 	}
1228 	mld_set_timeout(&mtp);
1229 
1230 	return err;
1231 }
1232 
1233 /*
1234  * Process a recieved MLDv2 group-specific or group-and-source-specific
1235  * query.
1236  * Return <0 if any error occured. Currently this is ignored.
1237  */
1238 static int
mld_v2_process_group_query(struct in6_multi * inm,int timer,struct mbuf * m0,const int off)1239 mld_v2_process_group_query(struct in6_multi *inm, int timer, struct mbuf *m0,
1240     const int off)
1241 {
1242 	struct mldv2_query      *mld;
1243 	int                      retval;
1244 	uint16_t                 nsrc;
1245 
1246 	IN6M_LOCK_ASSERT_HELD(inm);
1247 
1248 	retval = 0;
1249 	mld = (struct mldv2_query *)(mtod(m0, uint8_t *) + off);
1250 
1251 	switch (inm->in6m_state) {
1252 	case MLD_NOT_MEMBER:
1253 	case MLD_SILENT_MEMBER:
1254 	case MLD_SLEEPING_MEMBER:
1255 	case MLD_LAZY_MEMBER:
1256 	case MLD_AWAKENING_MEMBER:
1257 	case MLD_IDLE_MEMBER:
1258 	case MLD_LEAVING_MEMBER:
1259 		return retval;
1260 	case MLD_REPORTING_MEMBER:
1261 	case MLD_G_QUERY_PENDING_MEMBER:
1262 	case MLD_SG_QUERY_PENDING_MEMBER:
1263 		break;
1264 	}
1265 
1266 	nsrc = ntohs(mld->mld_numsrc);
1267 
1268 	/*
1269 	 * Deal with group-specific queries upfront.
1270 	 * If any group query is already pending, purge any recorded
1271 	 * source-list state if it exists, and schedule a query response
1272 	 * for this group-specific query.
1273 	 */
1274 	if (nsrc == 0) {
1275 		if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER ||
1276 		    inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER) {
1277 			in6m_clear_recorded(inm);
1278 			timer = min(inm->in6m_timer, timer);
1279 		}
1280 		inm->in6m_state = MLD_G_QUERY_PENDING_MEMBER;
1281 		inm->in6m_timer = MLD_RANDOM_DELAY(timer);
1282 		return retval;
1283 	}
1284 
1285 	/*
1286 	 * Deal with the case where a group-and-source-specific query has
1287 	 * been received but a group-specific query is already pending.
1288 	 */
1289 	if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER) {
1290 		timer = min(inm->in6m_timer, timer);
1291 		inm->in6m_timer = MLD_RANDOM_DELAY(timer);
1292 		return retval;
1293 	}
1294 
1295 	/*
1296 	 * Finally, deal with the case where a group-and-source-specific
1297 	 * query has been received, where a response to a previous g-s-r
1298 	 * query exists, or none exists.
1299 	 * In this case, we need to parse the source-list which the Querier
1300 	 * has provided us with and check if we have any source list filter
1301 	 * entries at T1 for these sources. If we do not, there is no need
1302 	 * schedule a report and the query may be dropped.
1303 	 * If we do, we must record them and schedule a current-state
1304 	 * report for those sources.
1305 	 */
1306 	if (inm->in6m_nsrc > 0) {
1307 		struct mbuf             *m;
1308 		struct in6_addr          addr;
1309 		int                      i, nrecorded;
1310 		int                      soff;
1311 
1312 		m = m0;
1313 		soff = off + sizeof(struct mldv2_query);
1314 		nrecorded = 0;
1315 		for (i = 0; i < nsrc; i++) {
1316 			m_copydata(m, soff, sizeof(addr), &addr);
1317 			retval = in6m_record_source(inm, &addr);
1318 			if (retval < 0) {
1319 				break;
1320 			}
1321 			nrecorded += retval;
1322 			soff += sizeof(struct in6_addr);
1323 
1324 			while (m && (soff >= m->m_len)) {
1325 				soff -= m->m_len;
1326 				m = m->m_next;
1327 			}
1328 
1329 			/* should not be possible: */
1330 			if (m == NULL) {
1331 				break;
1332 			}
1333 		}
1334 		if (nrecorded > 0) {
1335 			MLD_PRINTF(("%s: schedule response to SG query\n",
1336 			    __func__));
1337 			inm->in6m_state = MLD_SG_QUERY_PENDING_MEMBER;
1338 			inm->in6m_timer = MLD_RANDOM_DELAY(timer);
1339 		}
1340 	}
1341 
1342 	return retval;
1343 }
1344 
1345 /*
1346  * Process a received MLDv1 host membership report.
1347  * Assumes mld points to mld_hdr in pulled up mbuf chain.
1348  *
1349  * NOTE: Can't be fully const correct as we temporarily embed scope ID in
1350  * mld_addr. This is OK as we own the mbuf chain.
1351  */
1352 static int
mld_v1_input_report(struct ifnet * ifp,struct mbuf * m,const struct ip6_hdr * ip6,struct mld_hdr * mld)1353 mld_v1_input_report(struct ifnet *ifp, struct mbuf *m,
1354     const struct ip6_hdr *ip6, /*const*/ struct mld_hdr *mld)
1355 {
1356 	struct in6_addr          src, dst;
1357 	struct in6_ifaddr       *ia;
1358 	struct in6_multi        *inm;
1359 
1360 	if (!mld_v1enable) {
1361 		MLD_PRINTF(("%s: ignore v1 report %s on ifp 0x%llx(%s)\n",
1362 		    __func__, ip6_sprintf(&mld->mld_addr),
1363 		    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1364 		return 0;
1365 	}
1366 
1367 	if ((ifp->if_flags & IFF_LOOPBACK) ||
1368 	    (m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
1369 		return 0;
1370 	}
1371 
1372 	/*
1373 	 * MLDv1 reports must originate from a host's link-local address,
1374 	 * or the unspecified address (when booting).
1375 	 */
1376 	src = ip6->ip6_src;
1377 	in6_clearscope(&src);
1378 	if (!IN6_IS_SCOPE_LINKLOCAL(&src) && !IN6_IS_ADDR_UNSPECIFIED(&src)) {
1379 		MLD_PRINTF(("%s: ignore v1 query src %s on ifp 0x%llx(%s)\n",
1380 		    __func__, ip6_sprintf(&ip6->ip6_src),
1381 		    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1382 		return EINVAL;
1383 	}
1384 
1385 	/*
1386 	 * RFC2710 Section 4: MLDv1 reports must pertain to a multicast
1387 	 * group, and must be directed to the group itself.
1388 	 */
1389 	dst = ip6->ip6_dst;
1390 	in6_clearscope(&dst);
1391 	if (!IN6_IS_ADDR_MULTICAST(&mld->mld_addr) ||
1392 	    !IN6_ARE_ADDR_EQUAL(&mld->mld_addr, &dst)) {
1393 		MLD_PRINTF(("%s: ignore v1 query dst %s on ifp 0x%llx(%s)\n",
1394 		    __func__, ip6_sprintf(&ip6->ip6_dst),
1395 		    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1396 		return EINVAL;
1397 	}
1398 
1399 	/*
1400 	 * Make sure we don't hear our own membership report, as fast
1401 	 * leave requires knowing that we are the only member of a
1402 	 * group. Assume we used the link-local address if available,
1403 	 * otherwise look for ::.
1404 	 *
1405 	 * XXX Note that scope ID comparison is needed for the address
1406 	 * returned by in6ifa_ifpforlinklocal(), but SHOULD NOT be
1407 	 * performed for the on-wire address.
1408 	 */
1409 	ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY | IN6_IFF_ANYCAST);
1410 	if (ia != NULL) {
1411 		IFA_LOCK(&ia->ia_ifa);
1412 		if ((IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, IA6_IN6(ia)))) {
1413 			IFA_UNLOCK(&ia->ia_ifa);
1414 			IFA_REMREF(&ia->ia_ifa);
1415 			return 0;
1416 		}
1417 		IFA_UNLOCK(&ia->ia_ifa);
1418 		IFA_REMREF(&ia->ia_ifa);
1419 	} else if (IN6_IS_ADDR_UNSPECIFIED(&src)) {
1420 		return 0;
1421 	}
1422 
1423 	MLD_PRINTF(("%s: process v1 report %s on ifp 0x%llx(%s)\n",
1424 	    __func__, ip6_sprintf(&mld->mld_addr),
1425 	    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1426 
1427 	/*
1428 	 * Embed scope ID of receiving interface in MLD query for lookup
1429 	 * whilst we don't hold other locks (due to KAME locking lameness).
1430 	 */
1431 	if (!IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) {
1432 		(void)in6_setscope(&mld->mld_addr, ifp, NULL);
1433 	}
1434 
1435 	/*
1436 	 * MLDv1 report suppression.
1437 	 * If we are a member of this group, and our membership should be
1438 	 * reported, and our group timer is pending or about to be reset,
1439 	 * stop our group timer by transitioning to the 'lazy' state.
1440 	 */
1441 	in6_multihead_lock_shared();
1442 	IN6_LOOKUP_MULTI(&mld->mld_addr, ifp, inm);
1443 	in6_multihead_lock_done();
1444 
1445 	if (inm != NULL) {
1446 		struct mld_ifinfo *mli;
1447 
1448 		IN6M_LOCK(inm);
1449 		mli = inm->in6m_mli;
1450 		VERIFY(mli != NULL);
1451 
1452 		MLI_LOCK(mli);
1453 		/*
1454 		 * If we are in MLDv2 host mode, do not allow the
1455 		 * other host's MLDv1 report to suppress our reports.
1456 		 */
1457 		if (mli->mli_version == MLD_VERSION_2) {
1458 			MLI_UNLOCK(mli);
1459 			IN6M_UNLOCK(inm);
1460 			IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
1461 			goto out;
1462 		}
1463 		MLI_UNLOCK(mli);
1464 
1465 		inm->in6m_timer = 0;
1466 
1467 		switch (inm->in6m_state) {
1468 		case MLD_NOT_MEMBER:
1469 		case MLD_SILENT_MEMBER:
1470 		case MLD_SLEEPING_MEMBER:
1471 			break;
1472 		case MLD_REPORTING_MEMBER:
1473 		case MLD_IDLE_MEMBER:
1474 		case MLD_AWAKENING_MEMBER:
1475 			MLD_PRINTF(("%s: report suppressed for %s on "
1476 			    "ifp 0x%llx(%s)\n", __func__,
1477 			    ip6_sprintf(&mld->mld_addr),
1478 			    (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1479 			OS_FALLTHROUGH;
1480 		case MLD_LAZY_MEMBER:
1481 			inm->in6m_state = MLD_LAZY_MEMBER;
1482 			break;
1483 		case MLD_G_QUERY_PENDING_MEMBER:
1484 		case MLD_SG_QUERY_PENDING_MEMBER:
1485 		case MLD_LEAVING_MEMBER:
1486 			break;
1487 		}
1488 		IN6M_UNLOCK(inm);
1489 		IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
1490 	}
1491 
1492 out:
1493 	/* XXX Clear embedded scope ID as userland won't expect it. */
1494 	in6_clearscope(&mld->mld_addr);
1495 
1496 	return 0;
1497 }
1498 
1499 /*
1500  * MLD input path.
1501  *
1502  * Assume query messages which fit in a single ICMPv6 message header
1503  * have been pulled up.
1504  * Assume that userland will want to see the message, even if it
1505  * otherwise fails kernel input validation; do not free it.
1506  * Pullup may however free the mbuf chain m if it fails.
1507  *
1508  * Return IPPROTO_DONE if we freed m. Otherwise, return 0.
1509  */
1510 int
mld_input(struct mbuf * m,int off,int icmp6len)1511 mld_input(struct mbuf *m, int off, int icmp6len)
1512 {
1513 	struct ifnet    *ifp = NULL;
1514 	struct ip6_hdr  *ip6 = NULL;
1515 	struct mld_hdr  *mld = NULL;
1516 	int              mldlen = 0;
1517 
1518 	MLD_PRINTF(("%s: called w/mbuf (0x%llx,%d)\n", __func__,
1519 	    (uint64_t)VM_KERNEL_ADDRPERM(m), off));
1520 
1521 	ifp = m->m_pkthdr.rcvif;
1522 
1523 	/* Pullup to appropriate size. */
1524 	mld = (struct mld_hdr *)(mtod(m, uint8_t *) + off);
1525 	if (mld->mld_type == MLD_LISTENER_QUERY &&
1526 	    icmp6len >= sizeof(struct mldv2_query)) {
1527 		mldlen = sizeof(struct mldv2_query);
1528 	} else {
1529 		mldlen = sizeof(struct mld_hdr);
1530 	}
1531 	// check if mldv2_query/mld_hdr fits in the first mbuf
1532 	IP6_EXTHDR_CHECK(m, off, mldlen, return IPPROTO_DONE);
1533 	IP6_EXTHDR_GET(mld, struct mld_hdr *, m, off, mldlen);
1534 	if (mld == NULL) {
1535 		icmp6stat.icp6s_badlen++;
1536 		return IPPROTO_DONE;
1537 	}
1538 	ip6 = mtod(m, struct ip6_hdr *);
1539 
1540 	/*
1541 	 * Userland needs to see all of this traffic for implementing
1542 	 * the endpoint discovery portion of multicast routing.
1543 	 */
1544 	switch (mld->mld_type) {
1545 	case MLD_LISTENER_QUERY:
1546 		icmp6_ifstat_inc(ifp, ifs6_in_mldquery);
1547 		if (icmp6len == sizeof(struct mld_hdr)) {
1548 			if (mld_v1_input_query(ifp, ip6, mld) != 0) {
1549 				return 0;
1550 			}
1551 		} else if (icmp6len >= sizeof(struct mldv2_query)) {
1552 			if (mld_v2_input_query(ifp, ip6, m, off,
1553 			    icmp6len) != 0) {
1554 				return 0;
1555 			}
1556 		}
1557 		break;
1558 	case MLD_LISTENER_REPORT:
1559 		icmp6_ifstat_inc(ifp, ifs6_in_mldreport);
1560 		if (mld_v1_input_report(ifp, m, ip6, mld) != 0) {
1561 			return 0;
1562 		}
1563 		break;
1564 	case MLDV2_LISTENER_REPORT:
1565 		icmp6_ifstat_inc(ifp, ifs6_in_mldreport);
1566 		break;
1567 	case MLD_LISTENER_DONE:
1568 		icmp6_ifstat_inc(ifp, ifs6_in_mlddone);
1569 		break;
1570 	default:
1571 		break;
1572 	}
1573 
1574 	return 0;
1575 }
1576 
1577 /*
1578  * Schedule MLD timer based on various parameters; caller must ensure that
1579  * lock ordering is maintained as this routine acquires MLD global lock.
1580  */
1581 void
mld_set_timeout(struct mld_tparams * mtp)1582 mld_set_timeout(struct mld_tparams *mtp)
1583 {
1584 	MLD_LOCK_ASSERT_NOTHELD();
1585 	VERIFY(mtp != NULL);
1586 
1587 	if (mtp->qpt != 0 || mtp->it != 0 || mtp->cst != 0 || mtp->sct != 0) {
1588 		MLD_LOCK();
1589 		if (mtp->qpt != 0) {
1590 			querier_present_timers_running6 = 1;
1591 		}
1592 		if (mtp->it != 0) {
1593 			interface_timers_running6 = 1;
1594 		}
1595 		if (mtp->cst != 0) {
1596 			current_state_timers_running6 = 1;
1597 		}
1598 		if (mtp->sct != 0) {
1599 			state_change_timers_running6 = 1;
1600 		}
1601 		if (mtp->fast) {
1602 			mld_sched_fast_timeout();
1603 		} else {
1604 			mld_sched_timeout();
1605 		}
1606 		MLD_UNLOCK();
1607 	}
1608 }
1609 
1610 void
mld_set_fast_timeout(struct mld_tparams * mtp)1611 mld_set_fast_timeout(struct mld_tparams *mtp)
1612 {
1613 	VERIFY(mtp != NULL);
1614 	mtp->fast = true;
1615 	mld_set_timeout(mtp);
1616 }
1617 
1618 /*
1619  * MLD6 timer handler (per 1 second).
1620  */
1621 static void
mld_timeout(thread_call_param_t arg0,thread_call_param_t arg1 __unused)1622 mld_timeout(thread_call_param_t arg0, thread_call_param_t arg1 __unused)
1623 {
1624 	struct ifqueue           scq;   /* State-change packets */
1625 	struct ifqueue           qrq;   /* Query response packets */
1626 	struct ifnet            *ifp;
1627 	struct mld_ifinfo       *mli;
1628 	struct in6_multi        *inm;
1629 	int                      uri_sec = 0;
1630 	unsigned int genid = mld_mli_list_genid;
1631 	bool                     fast = arg0 != NULL;
1632 
1633 	SLIST_HEAD(, in6_multi) in6m_dthead;
1634 
1635 	SLIST_INIT(&in6m_dthead);
1636 
1637 	/*
1638 	 * Update coarse-grained networking timestamp (in sec.); the idea
1639 	 * is to piggy-back on the timeout callout to update the counter
1640 	 * returnable via net_uptime().
1641 	 */
1642 	net_update_uptime();
1643 
1644 	MLD_LOCK();
1645 
1646 	MLD_PRINTF(("%s: qpt %d, it %d, cst %d, sct %d, fast %d\n", __func__,
1647 	    querier_present_timers_running6, interface_timers_running6,
1648 	    current_state_timers_running6, state_change_timers_running6, fast));
1649 
1650 	if (fast) {
1651 		/*
1652 		 * When running the fast timer, skip processing
1653 		 * of "querier present" timers since they are
1654 		 * based on 1-second intervals.
1655 		 */
1656 		goto skip_query_timers;
1657 	}
1658 	/*
1659 	 * MLDv1 querier present timer processing.
1660 	 */
1661 	if (querier_present_timers_running6) {
1662 		querier_present_timers_running6 = 0;
1663 		LIST_FOREACH(mli, &mli_head, mli_link) {
1664 			MLI_LOCK(mli);
1665 			mld_v1_process_querier_timers(mli);
1666 			if (mli->mli_v1_timer > 0) {
1667 				querier_present_timers_running6 = 1;
1668 			}
1669 			MLI_UNLOCK(mli);
1670 		}
1671 	}
1672 
1673 	/*
1674 	 * MLDv2 General Query response timer processing.
1675 	 */
1676 	if (interface_timers_running6) {
1677 		MLD_PRINTF(("%s: interface timers running\n", __func__));
1678 		interface_timers_running6 = 0;
1679 		mli = LIST_FIRST(&mli_head);
1680 
1681 		while (mli != NULL) {
1682 			if (mli->mli_flags & MLIF_PROCESSED) {
1683 				mli = LIST_NEXT(mli, mli_link);
1684 				continue;
1685 			}
1686 
1687 			MLI_LOCK(mli);
1688 			if (mli->mli_version != MLD_VERSION_2) {
1689 				MLI_UNLOCK(mli);
1690 				mli = LIST_NEXT(mli, mli_link);
1691 				continue;
1692 			}
1693 			/*
1694 			 * XXX The logic below ends up calling
1695 			 * mld_dispatch_packet which can unlock mli
1696 			 * and the global MLD lock.
1697 			 * Therefore grab a reference on MLI and also
1698 			 * check for generation count to see if we should
1699 			 * iterate the list again.
1700 			 */
1701 			MLI_ADDREF_LOCKED(mli);
1702 
1703 			if (mli->mli_v2_timer == 0) {
1704 				/* Do nothing. */
1705 			} else if (--mli->mli_v2_timer == 0) {
1706 				if (mld_v2_dispatch_general_query(mli) > 0) {
1707 					interface_timers_running6 = 1;
1708 				}
1709 			} else {
1710 				interface_timers_running6 = 1;
1711 			}
1712 			mli->mli_flags |= MLIF_PROCESSED;
1713 			MLI_UNLOCK(mli);
1714 			MLI_REMREF(mli);
1715 
1716 			if (genid != mld_mli_list_genid) {
1717 				MLD_PRINTF(("%s: MLD information list changed "
1718 				    "in the middle of iteration! Restart iteration.\n",
1719 				    __func__));
1720 				mli = LIST_FIRST(&mli_head);
1721 				genid = mld_mli_list_genid;
1722 			} else {
1723 				mli = LIST_NEXT(mli, mli_link);
1724 			}
1725 		}
1726 
1727 		LIST_FOREACH(mli, &mli_head, mli_link)
1728 		mli->mli_flags &= ~MLIF_PROCESSED;
1729 	}
1730 
1731 skip_query_timers:
1732 	if (!current_state_timers_running6 &&
1733 	    !state_change_timers_running6) {
1734 		goto out_locked;
1735 	}
1736 
1737 	current_state_timers_running6 = 0;
1738 	state_change_timers_running6 = 0;
1739 
1740 	MLD_PRINTF(("%s: state change timers running\n", __func__));
1741 
1742 	memset(&qrq, 0, sizeof(struct ifqueue));
1743 	qrq.ifq_maxlen = MLD_MAX_G_GS_PACKETS;
1744 
1745 	memset(&scq, 0, sizeof(struct ifqueue));
1746 	scq.ifq_maxlen = MLD_MAX_STATE_CHANGE_PACKETS;
1747 
1748 	/*
1749 	 * MLD host report and state-change timer processing.
1750 	 * Note: Processing a v2 group timer may remove a node.
1751 	 */
1752 	mli = LIST_FIRST(&mli_head);
1753 
1754 	while (mli != NULL) {
1755 		struct in6_multistep step;
1756 
1757 		if (mli->mli_flags & MLIF_PROCESSED) {
1758 			mli = LIST_NEXT(mli, mli_link);
1759 			continue;
1760 		}
1761 
1762 		MLI_LOCK(mli);
1763 		ifp = mli->mli_ifp;
1764 		uri_sec = MLD_RANDOM_DELAY(mli->mli_uri);
1765 		MLI_UNLOCK(mli);
1766 
1767 		in6_multihead_lock_shared();
1768 		IN6_FIRST_MULTI(step, inm);
1769 		while (inm != NULL) {
1770 			IN6M_LOCK(inm);
1771 			if (inm->in6m_ifp != ifp) {
1772 				goto next;
1773 			}
1774 
1775 			MLI_LOCK(mli);
1776 			switch (mli->mli_version) {
1777 			case MLD_VERSION_1:
1778 				mld_v1_process_group_timer(inm,
1779 				    mli->mli_version);
1780 				break;
1781 			case MLD_VERSION_2:
1782 				mld_v2_process_group_timers(mli, &qrq,
1783 				    &scq, inm, uri_sec);
1784 				break;
1785 			}
1786 			MLI_UNLOCK(mli);
1787 next:
1788 			IN6M_UNLOCK(inm);
1789 			IN6_NEXT_MULTI(step, inm);
1790 		}
1791 		in6_multihead_lock_done();
1792 
1793 		/*
1794 		 * XXX The logic below ends up calling
1795 		 * mld_dispatch_packet which can unlock mli
1796 		 * and the global MLD lock.
1797 		 * Therefore grab a reference on MLI and also
1798 		 * check for generation count to see if we should
1799 		 * iterate the list again.
1800 		 */
1801 		MLI_LOCK(mli);
1802 		MLI_ADDREF_LOCKED(mli);
1803 		if (mli->mli_version == MLD_VERSION_1) {
1804 			mld_dispatch_queue_locked(mli, &mli->mli_v1q, 0);
1805 		} else if (mli->mli_version == MLD_VERSION_2) {
1806 			MLI_UNLOCK(mli);
1807 			mld_dispatch_queue_locked(NULL, &qrq, 0);
1808 			mld_dispatch_queue_locked(NULL, &scq, 0);
1809 			VERIFY(qrq.ifq_len == 0);
1810 			VERIFY(scq.ifq_len == 0);
1811 			MLI_LOCK(mli);
1812 		}
1813 		/*
1814 		 * In case there are still any pending membership reports
1815 		 * which didn't get drained at version change time.
1816 		 */
1817 		IF_DRAIN(&mli->mli_v1q);
1818 		/*
1819 		 * Release all deferred inm records, and drain any locally
1820 		 * enqueued packets; do it even if the current MLD version
1821 		 * for the link is no longer MLDv2, in order to handle the
1822 		 * version change case.
1823 		 */
1824 		mld_flush_relq(mli, (struct mld_in6m_relhead *)&in6m_dthead);
1825 		VERIFY(SLIST_EMPTY(&mli->mli_relinmhead));
1826 		mli->mli_flags |= MLIF_PROCESSED;
1827 		MLI_UNLOCK(mli);
1828 		MLI_REMREF(mli);
1829 
1830 		IF_DRAIN(&qrq);
1831 		IF_DRAIN(&scq);
1832 
1833 		if (genid != mld_mli_list_genid) {
1834 			MLD_PRINTF(("%s: MLD information list changed "
1835 			    "in the middle of iteration! Restart iteration.\n",
1836 			    __func__));
1837 			mli = LIST_FIRST(&mli_head);
1838 			genid = mld_mli_list_genid;
1839 		} else {
1840 			mli = LIST_NEXT(mli, mli_link);
1841 		}
1842 	}
1843 
1844 	LIST_FOREACH(mli, &mli_head, mli_link)
1845 	mli->mli_flags &= ~MLIF_PROCESSED;
1846 
1847 out_locked:
1848 	/* re-arm the timer if there's work to do */
1849 	if (fast) {
1850 		mld_fast_timeout_run = false;
1851 	} else {
1852 		mld_timeout_run = false;
1853 	}
1854 	mld_sched_timeout();
1855 	MLD_UNLOCK();
1856 
1857 	/* Now that we're dropped all locks, release detached records */
1858 	MLD_REMOVE_DETACHED_IN6M(&in6m_dthead);
1859 }
1860 
1861 static void
mld_sched_timeout(void)1862 mld_sched_timeout(void)
1863 {
1864 	static thread_call_t mld_timeout_tcall;
1865 	uint64_t deadline = 0, leeway = 0;
1866 
1867 	MLD_LOCK_ASSERT_HELD();
1868 	if (mld_timeout_tcall == NULL) {
1869 		mld_timeout_tcall =
1870 		    thread_call_allocate_with_options(mld_timeout,
1871 		    NULL,
1872 		    THREAD_CALL_PRIORITY_KERNEL,
1873 		    THREAD_CALL_OPTIONS_ONCE);
1874 	}
1875 
1876 	if (!mld_timeout_run &&
1877 	    (querier_present_timers_running6 || current_state_timers_running6 ||
1878 	    interface_timers_running6 || state_change_timers_running6)) {
1879 		mld_timeout_run = true;
1880 		clock_interval_to_deadline(mld_timeout_delay, NSEC_PER_MSEC,
1881 		    &deadline);
1882 		clock_interval_to_absolutetime_interval(mld_timeout_leeway,
1883 		    NSEC_PER_MSEC, &leeway);
1884 		thread_call_enter_delayed_with_leeway(mld_timeout_tcall, NULL,
1885 		    deadline, leeway,
1886 		    THREAD_CALL_DELAY_LEEWAY);
1887 	}
1888 }
1889 
1890 static void
mld_sched_fast_timeout(void)1891 mld_sched_fast_timeout(void)
1892 {
1893 	static thread_call_t mld_fast_timeout_tcall;
1894 
1895 	MLD_LOCK_ASSERT_HELD();
1896 	if (mld_fast_timeout_tcall == NULL) {
1897 		mld_fast_timeout_tcall =
1898 		    thread_call_allocate_with_options(mld_timeout,
1899 		    mld_sched_fast_timeout,
1900 		    THREAD_CALL_PRIORITY_KERNEL,
1901 		    THREAD_CALL_OPTIONS_ONCE);
1902 	}
1903 	if (!mld_fast_timeout_run &&
1904 	    (current_state_timers_running6 || state_change_timers_running6)) {
1905 		mld_fast_timeout_run = true;
1906 		thread_call_enter(mld_fast_timeout_tcall);
1907 	}
1908 }
1909 
1910 /*
1911  * Free the in6_multi reference(s) for this MLD lifecycle.
1912  *
1913  * Caller must be holding mli_lock.
1914  */
1915 static void
mld_flush_relq(struct mld_ifinfo * mli,struct mld_in6m_relhead * in6m_dthead)1916 mld_flush_relq(struct mld_ifinfo *mli, struct mld_in6m_relhead *in6m_dthead)
1917 {
1918 	struct in6_multi *inm;
1919 
1920 again:
1921 	MLI_LOCK_ASSERT_HELD(mli);
1922 	inm = SLIST_FIRST(&mli->mli_relinmhead);
1923 	if (inm != NULL) {
1924 		int lastref;
1925 
1926 		SLIST_REMOVE_HEAD(&mli->mli_relinmhead, in6m_nrele);
1927 		MLI_UNLOCK(mli);
1928 
1929 		in6_multihead_lock_exclusive();
1930 		IN6M_LOCK(inm);
1931 		VERIFY(inm->in6m_nrelecnt != 0);
1932 		inm->in6m_nrelecnt--;
1933 		lastref = in6_multi_detach(inm);
1934 		VERIFY(!lastref || (!(inm->in6m_debug & IFD_ATTACHED) &&
1935 		    inm->in6m_reqcnt == 0));
1936 		IN6M_UNLOCK(inm);
1937 		in6_multihead_lock_done();
1938 		/* from mli_relinmhead */
1939 		IN6M_REMREF(inm);
1940 		/* from in6_multihead_list */
1941 		if (lastref) {
1942 			/*
1943 			 * Defer releasing our final reference, as we
1944 			 * are holding the MLD lock at this point, and
1945 			 * we could end up with locking issues later on
1946 			 * (while issuing SIOCDELMULTI) when this is the
1947 			 * final reference count.  Let the caller do it
1948 			 * when it is safe.
1949 			 */
1950 			MLD_ADD_DETACHED_IN6M(in6m_dthead, inm);
1951 		}
1952 		MLI_LOCK(mli);
1953 		goto again;
1954 	}
1955 }
1956 
1957 /*
1958  * Update host report group timer.
1959  * Will update the global pending timer flags.
1960  */
1961 static void
mld_v1_process_group_timer(struct in6_multi * inm,const int mld_version)1962 mld_v1_process_group_timer(struct in6_multi *inm, const int mld_version)
1963 {
1964 #pragma unused(mld_version)
1965 	int report_timer_expired;
1966 
1967 	MLD_LOCK_ASSERT_HELD();
1968 	IN6M_LOCK_ASSERT_HELD(inm);
1969 	MLI_LOCK_ASSERT_HELD(inm->in6m_mli);
1970 
1971 	if (inm->in6m_timer == 0) {
1972 		report_timer_expired = 0;
1973 	} else if (--inm->in6m_timer == 0) {
1974 		report_timer_expired = 1;
1975 	} else {
1976 		current_state_timers_running6 = 1;
1977 		/* caller will schedule timer */
1978 		return;
1979 	}
1980 
1981 	switch (inm->in6m_state) {
1982 	case MLD_NOT_MEMBER:
1983 	case MLD_SILENT_MEMBER:
1984 	case MLD_IDLE_MEMBER:
1985 	case MLD_LAZY_MEMBER:
1986 	case MLD_SLEEPING_MEMBER:
1987 	case MLD_AWAKENING_MEMBER:
1988 		break;
1989 	case MLD_REPORTING_MEMBER:
1990 		if (report_timer_expired) {
1991 			inm->in6m_state = MLD_IDLE_MEMBER;
1992 			(void) mld_v1_transmit_report(inm,
1993 			    MLD_LISTENER_REPORT);
1994 			IN6M_LOCK_ASSERT_HELD(inm);
1995 			MLI_LOCK_ASSERT_HELD(inm->in6m_mli);
1996 		}
1997 		break;
1998 	case MLD_G_QUERY_PENDING_MEMBER:
1999 	case MLD_SG_QUERY_PENDING_MEMBER:
2000 	case MLD_LEAVING_MEMBER:
2001 		break;
2002 	}
2003 }
2004 
2005 /*
2006  * Update a group's timers for MLDv2.
2007  * Will update the global pending timer flags.
2008  * Note: Unlocked read from mli.
2009  */
2010 static void
mld_v2_process_group_timers(struct mld_ifinfo * mli,struct ifqueue * qrq,struct ifqueue * scq,struct in6_multi * inm,const int uri_sec)2011 mld_v2_process_group_timers(struct mld_ifinfo *mli,
2012     struct ifqueue *qrq, struct ifqueue *scq,
2013     struct in6_multi *inm, const int uri_sec)
2014 {
2015 	int query_response_timer_expired;
2016 	int state_change_retransmit_timer_expired;
2017 
2018 	MLD_LOCK_ASSERT_HELD();
2019 	IN6M_LOCK_ASSERT_HELD(inm);
2020 	MLI_LOCK_ASSERT_HELD(mli);
2021 	VERIFY(mli == inm->in6m_mli);
2022 
2023 	query_response_timer_expired = 0;
2024 	state_change_retransmit_timer_expired = 0;
2025 
2026 	/*
2027 	 * During a transition from compatibility mode back to MLDv2,
2028 	 * a group record in REPORTING state may still have its group
2029 	 * timer active. This is a no-op in this function; it is easier
2030 	 * to deal with it here than to complicate the timeout path.
2031 	 */
2032 	if (inm->in6m_timer == 0) {
2033 		query_response_timer_expired = 0;
2034 	} else if (--inm->in6m_timer == 0) {
2035 		query_response_timer_expired = 1;
2036 	} else {
2037 		current_state_timers_running6 = 1;
2038 		/* caller will schedule timer */
2039 	}
2040 
2041 	if (inm->in6m_sctimer == 0) {
2042 		state_change_retransmit_timer_expired = 0;
2043 	} else if (--inm->in6m_sctimer == 0) {
2044 		state_change_retransmit_timer_expired = 1;
2045 	} else {
2046 		state_change_timers_running6 = 1;
2047 		/* caller will schedule timer */
2048 	}
2049 
2050 	/* We are in timer callback, so be quick about it. */
2051 	if (!state_change_retransmit_timer_expired &&
2052 	    !query_response_timer_expired) {
2053 		return;
2054 	}
2055 
2056 	switch (inm->in6m_state) {
2057 	case MLD_NOT_MEMBER:
2058 	case MLD_SILENT_MEMBER:
2059 	case MLD_SLEEPING_MEMBER:
2060 	case MLD_LAZY_MEMBER:
2061 	case MLD_AWAKENING_MEMBER:
2062 	case MLD_IDLE_MEMBER:
2063 		break;
2064 	case MLD_G_QUERY_PENDING_MEMBER:
2065 	case MLD_SG_QUERY_PENDING_MEMBER:
2066 		/*
2067 		 * Respond to a previously pending Group-Specific
2068 		 * or Group-and-Source-Specific query by enqueueing
2069 		 * the appropriate Current-State report for
2070 		 * immediate transmission.
2071 		 */
2072 		if (query_response_timer_expired) {
2073 			int retval;
2074 
2075 			retval = mld_v2_enqueue_group_record(qrq, inm, 0, 1,
2076 			    (inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER),
2077 			    0);
2078 			MLD_PRINTF(("%s: enqueue record = %d\n",
2079 			    __func__, retval));
2080 			inm->in6m_state = MLD_REPORTING_MEMBER;
2081 			in6m_clear_recorded(inm);
2082 		}
2083 		OS_FALLTHROUGH;
2084 	case MLD_REPORTING_MEMBER:
2085 	case MLD_LEAVING_MEMBER:
2086 		if (state_change_retransmit_timer_expired) {
2087 			/*
2088 			 * State-change retransmission timer fired.
2089 			 * If there are any further pending retransmissions,
2090 			 * set the global pending state-change flag, and
2091 			 * reset the timer.
2092 			 */
2093 			if (--inm->in6m_scrv > 0) {
2094 				inm->in6m_sctimer = (uint16_t)uri_sec;
2095 				state_change_timers_running6 = 1;
2096 				/* caller will schedule timer */
2097 			}
2098 			/*
2099 			 * Retransmit the previously computed state-change
2100 			 * report. If there are no further pending
2101 			 * retransmissions, the mbuf queue will be consumed.
2102 			 * Update T0 state to T1 as we have now sent
2103 			 * a state-change.
2104 			 */
2105 			(void) mld_v2_merge_state_changes(inm, scq);
2106 
2107 			in6m_commit(inm);
2108 			MLD_PRINTF(("%s: T1 -> T0 for %s/%s\n", __func__,
2109 			    ip6_sprintf(&inm->in6m_addr),
2110 			    if_name(inm->in6m_ifp)));
2111 
2112 			/*
2113 			 * If we are leaving the group for good, make sure
2114 			 * we release MLD's reference to it.
2115 			 * This release must be deferred using a SLIST,
2116 			 * as we are called from a loop which traverses
2117 			 * the in_ifmultiaddr TAILQ.
2118 			 */
2119 			if (inm->in6m_state == MLD_LEAVING_MEMBER &&
2120 			    inm->in6m_scrv == 0) {
2121 				inm->in6m_state = MLD_NOT_MEMBER;
2122 				/*
2123 				 * A reference has already been held in
2124 				 * mld_final_leave() for this inm, so
2125 				 * no need to hold another one.  We also
2126 				 * bumped up its request count then, so
2127 				 * that it stays in in6_multihead.  Both
2128 				 * of them will be released when it is
2129 				 * dequeued later on.
2130 				 */
2131 				VERIFY(inm->in6m_nrelecnt != 0);
2132 				SLIST_INSERT_HEAD(&mli->mli_relinmhead,
2133 				    inm, in6m_nrele);
2134 			}
2135 		}
2136 		break;
2137 	}
2138 }
2139 
2140 /*
2141  * Switch to a different version on the given interface,
2142  * as per Section 9.12.
2143  */
2144 static uint32_t
mld_set_version(struct mld_ifinfo * mli,const int mld_version)2145 mld_set_version(struct mld_ifinfo *mli, const int mld_version)
2146 {
2147 	int old_version_timer;
2148 
2149 	MLI_LOCK_ASSERT_HELD(mli);
2150 
2151 	MLD_PRINTF(("%s: switching to v%d on ifp 0x%llx(%s)\n", __func__,
2152 	    mld_version, (uint64_t)VM_KERNEL_ADDRPERM(mli->mli_ifp),
2153 	    if_name(mli->mli_ifp)));
2154 
2155 	if (mld_version == MLD_VERSION_1) {
2156 		/*
2157 		 * Compute the "Older Version Querier Present" timer as per
2158 		 * Section 9.12, in seconds.
2159 		 */
2160 		old_version_timer = (mli->mli_rv * mli->mli_qi) + mli->mli_qri;
2161 		mli->mli_v1_timer = old_version_timer;
2162 	}
2163 
2164 	if (mli->mli_v1_timer > 0 && mli->mli_version != MLD_VERSION_1) {
2165 		mli->mli_version = MLD_VERSION_1;
2166 		mld_v2_cancel_link_timers(mli);
2167 	}
2168 
2169 	MLI_LOCK_ASSERT_HELD(mli);
2170 
2171 	return mli->mli_v1_timer;
2172 }
2173 
2174 /*
2175  * Cancel pending MLDv2 timers for the given link and all groups
2176  * joined on it; state-change, general-query, and group-query timers.
2177  *
2178  * Only ever called on a transition from v2 to Compatibility mode. Kill
2179  * the timers stone dead (this may be expensive for large N groups), they
2180  * will be restarted if Compatibility Mode deems that they must be due to
2181  * query processing.
2182  */
2183 static void
mld_v2_cancel_link_timers(struct mld_ifinfo * mli)2184 mld_v2_cancel_link_timers(struct mld_ifinfo *mli)
2185 {
2186 	struct ifnet            *ifp;
2187 	struct in6_multi        *inm;
2188 	struct in6_multistep    step;
2189 
2190 	MLI_LOCK_ASSERT_HELD(mli);
2191 
2192 	MLD_PRINTF(("%s: cancel v2 timers on ifp 0x%llx(%s)\n", __func__,
2193 	    (uint64_t)VM_KERNEL_ADDRPERM(mli->mli_ifp), if_name(mli->mli_ifp)));
2194 
2195 	/*
2196 	 * Stop the v2 General Query Response on this link stone dead.
2197 	 * If timer is woken up due to interface_timers_running6,
2198 	 * the flag will be cleared if there are no pending link timers.
2199 	 */
2200 	mli->mli_v2_timer = 0;
2201 
2202 	/*
2203 	 * Now clear the current-state and state-change report timers
2204 	 * for all memberships scoped to this link.
2205 	 */
2206 	ifp = mli->mli_ifp;
2207 	MLI_UNLOCK(mli);
2208 
2209 	in6_multihead_lock_shared();
2210 	IN6_FIRST_MULTI(step, inm);
2211 	while (inm != NULL) {
2212 		IN6M_LOCK(inm);
2213 		if (inm->in6m_ifp != ifp) {
2214 			goto next;
2215 		}
2216 
2217 		switch (inm->in6m_state) {
2218 		case MLD_NOT_MEMBER:
2219 		case MLD_SILENT_MEMBER:
2220 		case MLD_IDLE_MEMBER:
2221 		case MLD_LAZY_MEMBER:
2222 		case MLD_SLEEPING_MEMBER:
2223 		case MLD_AWAKENING_MEMBER:
2224 			/*
2225 			 * These states are either not relevant in v2 mode,
2226 			 * or are unreported. Do nothing.
2227 			 */
2228 			break;
2229 		case MLD_LEAVING_MEMBER:
2230 			/*
2231 			 * If we are leaving the group and switching
2232 			 * version, we need to release the final
2233 			 * reference held for issuing the INCLUDE {}.
2234 			 * During mld_final_leave(), we bumped up both the
2235 			 * request and reference counts.  Since we cannot
2236 			 * call in6_multi_detach() here, defer this task to
2237 			 * the timer routine.
2238 			 */
2239 			VERIFY(inm->in6m_nrelecnt != 0);
2240 			MLI_LOCK(mli);
2241 			SLIST_INSERT_HEAD(&mli->mli_relinmhead, inm,
2242 			    in6m_nrele);
2243 			MLI_UNLOCK(mli);
2244 			OS_FALLTHROUGH;
2245 		case MLD_G_QUERY_PENDING_MEMBER:
2246 		case MLD_SG_QUERY_PENDING_MEMBER:
2247 			in6m_clear_recorded(inm);
2248 			OS_FALLTHROUGH;
2249 		case MLD_REPORTING_MEMBER:
2250 			inm->in6m_state = MLD_REPORTING_MEMBER;
2251 			break;
2252 		}
2253 		/*
2254 		 * Always clear state-change and group report timers.
2255 		 * Free any pending MLDv2 state-change records.
2256 		 */
2257 		inm->in6m_sctimer = 0;
2258 		inm->in6m_timer = 0;
2259 		IF_DRAIN(&inm->in6m_scq);
2260 next:
2261 		IN6M_UNLOCK(inm);
2262 		IN6_NEXT_MULTI(step, inm);
2263 	}
2264 	in6_multihead_lock_done();
2265 
2266 	MLI_LOCK(mli);
2267 }
2268 
2269 /*
2270  * Update the Older Version Querier Present timers for a link.
2271  * See Section 9.12 of RFC 3810.
2272  */
2273 static void
mld_v1_process_querier_timers(struct mld_ifinfo * mli)2274 mld_v1_process_querier_timers(struct mld_ifinfo *mli)
2275 {
2276 	MLI_LOCK_ASSERT_HELD(mli);
2277 
2278 	if (mld_v2enable && mli->mli_version != MLD_VERSION_2 &&
2279 	    --mli->mli_v1_timer == 0) {
2280 		/*
2281 		 * MLDv1 Querier Present timer expired; revert to MLDv2.
2282 		 */
2283 		MLD_PRINTF(("%s: transition from v%d -> v%d on 0x%llx(%s)\n",
2284 		    __func__, mli->mli_version, MLD_VERSION_2,
2285 		    (uint64_t)VM_KERNEL_ADDRPERM(mli->mli_ifp),
2286 		    if_name(mli->mli_ifp)));
2287 		mli->mli_version = MLD_VERSION_2;
2288 	}
2289 }
2290 
2291 /*
2292  * Transmit an MLDv1 report immediately.
2293  */
2294 static int
mld_v1_transmit_report(struct in6_multi * in6m,const uint8_t type)2295 mld_v1_transmit_report(struct in6_multi *in6m, const uint8_t type)
2296 {
2297 	struct ifnet            *ifp;
2298 	struct in6_ifaddr       *ia;
2299 	struct ip6_hdr          *ip6;
2300 	struct mbuf             *mh, *md;
2301 	struct mld_hdr          *mld;
2302 	int                     error = 0;
2303 
2304 	IN6M_LOCK_ASSERT_HELD(in6m);
2305 	MLI_LOCK_ASSERT_HELD(in6m->in6m_mli);
2306 
2307 	ifp = in6m->in6m_ifp;
2308 	/* ia may be NULL if link-local address is tentative. */
2309 	ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY | IN6_IFF_ANYCAST);
2310 
2311 	MGETHDR(mh, M_DONTWAIT, MT_HEADER);
2312 	if (mh == NULL) {
2313 		if (ia != NULL) {
2314 			IFA_REMREF(&ia->ia_ifa);
2315 		}
2316 		return ENOMEM;
2317 	}
2318 	MGET(md, M_DONTWAIT, MT_DATA);
2319 	if (md == NULL) {
2320 		m_free(mh);
2321 		if (ia != NULL) {
2322 			IFA_REMREF(&ia->ia_ifa);
2323 		}
2324 		return ENOMEM;
2325 	}
2326 	mh->m_next = md;
2327 
2328 	/*
2329 	 * FUTURE: Consider increasing alignment by ETHER_HDR_LEN, so
2330 	 * that ether_output() does not need to allocate another mbuf
2331 	 * for the header in the most common case.
2332 	 */
2333 	MH_ALIGN(mh, sizeof(struct ip6_hdr));
2334 	mh->m_pkthdr.len = sizeof(struct ip6_hdr) + sizeof(struct mld_hdr);
2335 	mh->m_len = sizeof(struct ip6_hdr);
2336 
2337 	ip6 = mtod(mh, struct ip6_hdr *);
2338 	ip6->ip6_flow = 0;
2339 	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
2340 	ip6->ip6_vfc |= IPV6_VERSION;
2341 	ip6->ip6_nxt = IPPROTO_ICMPV6;
2342 	if (ia != NULL) {
2343 		IFA_LOCK(&ia->ia_ifa);
2344 	}
2345 	ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any;
2346 	ip6_output_setsrcifscope(mh, IFSCOPE_NONE, ia);
2347 	if (ia != NULL) {
2348 		IFA_UNLOCK(&ia->ia_ifa);
2349 		IFA_REMREF(&ia->ia_ifa);
2350 		ia = NULL;
2351 	}
2352 	ip6->ip6_dst = in6m->in6m_addr;
2353 	ip6_output_setdstifscope(mh, in6m->ifscope, NULL);
2354 
2355 	md->m_len = sizeof(struct mld_hdr);
2356 	mld = mtod(md, struct mld_hdr *);
2357 	mld->mld_type = type;
2358 	mld->mld_code = 0;
2359 	mld->mld_cksum = 0;
2360 	mld->mld_maxdelay = 0;
2361 	mld->mld_reserved = 0;
2362 	mld->mld_addr = in6m->in6m_addr;
2363 	in6_clearscope(&mld->mld_addr);
2364 	mld->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6,
2365 	    sizeof(struct ip6_hdr), sizeof(struct mld_hdr));
2366 
2367 	mld_save_context(mh, ifp);
2368 	mh->m_flags |= M_MLDV1;
2369 
2370 	/*
2371 	 * Due to the fact that at this point we are possibly holding
2372 	 * in6_multihead_lock in shared or exclusive mode, we can't call
2373 	 * mld_dispatch_packet() here since that will eventually call
2374 	 * ip6_output(), which will try to lock in6_multihead_lock and cause
2375 	 * a deadlock.
2376 	 * Instead we defer the work to the mld_timeout() thread, thus
2377 	 * avoiding unlocking in_multihead_lock here.
2378 	 */
2379 	if (IF_QFULL(&in6m->in6m_mli->mli_v1q)) {
2380 		MLD_PRINTF(("%s: v1 outbound queue full\n", __func__));
2381 		error = ENOMEM;
2382 		m_freem(mh);
2383 	} else {
2384 		IF_ENQUEUE(&in6m->in6m_mli->mli_v1q, mh);
2385 		VERIFY(error == 0);
2386 	}
2387 
2388 	return error;
2389 }
2390 
2391 /*
2392  * Process a state change from the upper layer for the given IPv6 group.
2393  *
2394  * Each socket holds a reference on the in6_multi in its own ip_moptions.
2395  * The socket layer will have made the necessary updates to.the group
2396  * state, it is now up to MLD to issue a state change report if there
2397  * has been any change between T0 (when the last state-change was issued)
2398  * and T1 (now).
2399  *
2400  * We use the MLDv2 state machine at group level. The MLd module
2401  * however makes the decision as to which MLD protocol version to speak.
2402  * A state change *from* INCLUDE {} always means an initial join.
2403  * A state change *to* INCLUDE {} always means a final leave.
2404  *
2405  * If delay is non-zero, and the state change is an initial multicast
2406  * join, the state change report will be delayed by 'delay' ticks
2407  * in units of seconds if MLDv1 is active on the link; otherwise
2408  * the initial MLDv2 state change report will be delayed by whichever
2409  * is sooner, a pending state-change timer or delay itself.
2410  */
2411 int
mld_change_state(struct in6_multi * inm,struct mld_tparams * mtp,const int delay)2412 mld_change_state(struct in6_multi *inm, struct mld_tparams *mtp,
2413     const int delay)
2414 {
2415 	struct mld_ifinfo *mli;
2416 	struct ifnet *ifp;
2417 	int error = 0;
2418 
2419 	VERIFY(mtp != NULL);
2420 	bzero(mtp, sizeof(*mtp));
2421 
2422 	IN6M_LOCK_ASSERT_HELD(inm);
2423 	VERIFY(inm->in6m_mli != NULL);
2424 	MLI_LOCK_ASSERT_NOTHELD(inm->in6m_mli);
2425 
2426 	/*
2427 	 * Try to detect if the upper layer just asked us to change state
2428 	 * for an interface which has now gone away.
2429 	 */
2430 	VERIFY(inm->in6m_ifma != NULL);
2431 	ifp = inm->in6m_ifma->ifma_ifp;
2432 	/*
2433 	 * Sanity check that netinet6's notion of ifp is the same as net's.
2434 	 */
2435 	VERIFY(inm->in6m_ifp == ifp);
2436 
2437 	mli = MLD_IFINFO(ifp);
2438 	VERIFY(mli != NULL);
2439 
2440 	/*
2441 	 * If we detect a state transition to or from MCAST_UNDEFINED
2442 	 * for this group, then we are starting or finishing an MLD
2443 	 * life cycle for this group.
2444 	 */
2445 	if (inm->in6m_st[1].iss_fmode != inm->in6m_st[0].iss_fmode) {
2446 		MLD_PRINTF(("%s: inm transition %d -> %d\n", __func__,
2447 		    inm->in6m_st[0].iss_fmode, inm->in6m_st[1].iss_fmode));
2448 		if (inm->in6m_st[0].iss_fmode == MCAST_UNDEFINED) {
2449 			MLD_PRINTF(("%s: initial join\n", __func__));
2450 			error = mld_initial_join(inm, mli, mtp, delay);
2451 			goto out;
2452 		} else if (inm->in6m_st[1].iss_fmode == MCAST_UNDEFINED) {
2453 			MLD_PRINTF(("%s: final leave\n", __func__));
2454 			mld_final_leave(inm, mli, mtp);
2455 			goto out;
2456 		}
2457 	} else {
2458 		MLD_PRINTF(("%s: filter set change\n", __func__));
2459 	}
2460 
2461 	error = mld_handle_state_change(inm, mli, mtp);
2462 out:
2463 	return error;
2464 }
2465 
2466 /*
2467  * Perform the initial join for an MLD group.
2468  *
2469  * When joining a group:
2470  *  If the group should have its MLD traffic suppressed, do nothing.
2471  *  MLDv1 starts sending MLDv1 host membership reports.
2472  *  MLDv2 will schedule an MLDv2 state-change report containing the
2473  *  initial state of the membership.
2474  *
2475  * If the delay argument is non-zero, then we must delay sending the
2476  * initial state change for delay ticks (in units of seconds).
2477  */
2478 static int
mld_initial_join(struct in6_multi * inm,struct mld_ifinfo * mli,struct mld_tparams * mtp,const int delay)2479 mld_initial_join(struct in6_multi *inm, struct mld_ifinfo *mli,
2480     struct mld_tparams *mtp, const int delay)
2481 {
2482 	struct ifnet            *ifp;
2483 	struct ifqueue          *ifq;
2484 	int                      error, retval, syncstates;
2485 	int                      odelay;
2486 
2487 	IN6M_LOCK_ASSERT_HELD(inm);
2488 	MLI_LOCK_ASSERT_NOTHELD(mli);
2489 	VERIFY(mtp != NULL);
2490 
2491 	MLD_PRINTF(("%s: initial join %s on ifp 0x%llx(%s)\n",
2492 	    __func__, ip6_sprintf(&inm->in6m_addr),
2493 	    (uint64_t)VM_KERNEL_ADDRPERM(inm->in6m_ifp),
2494 	    if_name(inm->in6m_ifp)));
2495 
2496 	error = 0;
2497 	syncstates = 1;
2498 
2499 	ifp = inm->in6m_ifp;
2500 
2501 	MLI_LOCK(mli);
2502 	VERIFY(mli->mli_ifp == ifp);
2503 
2504 	/*
2505 	 * Avoid MLD if group is :
2506 	 * 1. Joined on loopback, OR
2507 	 * 2. On a link that is marked MLIF_SILENT
2508 	 * 3. rdar://problem/19227650 Is link local scoped and
2509 	 *    on cellular interface
2510 	 * 4. Is a type that should not be reported (node local
2511 	 *    or all node link local multicast.
2512 	 * All other groups enter the appropriate state machine
2513 	 * for the version in use on this link.
2514 	 */
2515 	if ((ifp->if_flags & IFF_LOOPBACK) ||
2516 	    (mli->mli_flags & MLIF_SILENT) ||
2517 	    (IFNET_IS_CELLULAR(ifp) &&
2518 	    (IN6_IS_ADDR_MC_LINKLOCAL(&inm->in6m_addr) || IN6_IS_ADDR_MC_UNICAST_BASED_LINKLOCAL(&inm->in6m_addr))) ||
2519 	    !mld_is_addr_reported(&inm->in6m_addr)) {
2520 		MLD_PRINTF(("%s: not kicking state machine for silent group\n",
2521 		    __func__));
2522 		inm->in6m_state = MLD_SILENT_MEMBER;
2523 		inm->in6m_timer = 0;
2524 	} else {
2525 		/*
2526 		 * Deal with overlapping in6_multi lifecycle.
2527 		 * If this group was LEAVING, then make sure
2528 		 * we drop the reference we picked up to keep the
2529 		 * group around for the final INCLUDE {} enqueue.
2530 		 * Since we cannot call in6_multi_detach() here,
2531 		 * defer this task to the timer routine.
2532 		 */
2533 		if (mli->mli_version == MLD_VERSION_2 &&
2534 		    inm->in6m_state == MLD_LEAVING_MEMBER) {
2535 			VERIFY(inm->in6m_nrelecnt != 0);
2536 			SLIST_INSERT_HEAD(&mli->mli_relinmhead, inm,
2537 			    in6m_nrele);
2538 		}
2539 
2540 		inm->in6m_state = MLD_REPORTING_MEMBER;
2541 
2542 		switch (mli->mli_version) {
2543 		case MLD_VERSION_1:
2544 			/*
2545 			 * If a delay was provided, only use it if
2546 			 * it is greater than the delay normally
2547 			 * used for an MLDv1 state change report,
2548 			 * and delay sending the initial MLDv1 report
2549 			 * by not transitioning to the IDLE state.
2550 			 */
2551 			odelay = MLD_RANDOM_DELAY(MLD_V1_MAX_RI);
2552 			if (delay) {
2553 				inm->in6m_timer = max(delay, odelay);
2554 				mtp->cst = 1;
2555 			} else {
2556 				inm->in6m_state = MLD_IDLE_MEMBER;
2557 				error = mld_v1_transmit_report(inm,
2558 				    MLD_LISTENER_REPORT);
2559 
2560 				IN6M_LOCK_ASSERT_HELD(inm);
2561 				MLI_LOCK_ASSERT_HELD(mli);
2562 
2563 				if (error == 0) {
2564 					inm->in6m_timer = odelay;
2565 					mtp->cst = 1;
2566 				}
2567 			}
2568 			break;
2569 
2570 		case MLD_VERSION_2:
2571 			/*
2572 			 * Defer update of T0 to T1, until the first copy
2573 			 * of the state change has been transmitted.
2574 			 */
2575 			syncstates = 0;
2576 
2577 			/*
2578 			 * Immediately enqueue a State-Change Report for
2579 			 * this interface, freeing any previous reports.
2580 			 * Don't kick the timers if there is nothing to do,
2581 			 * or if an error occurred.
2582 			 */
2583 			ifq = &inm->in6m_scq;
2584 			IF_DRAIN(ifq);
2585 			retval = mld_v2_enqueue_group_record(ifq, inm, 1,
2586 			    0, 0, (mli->mli_flags & MLIF_USEALLOW));
2587 			mtp->cst = (ifq->ifq_len > 0);
2588 			MLD_PRINTF(("%s: enqueue record = %d\n",
2589 			    __func__, retval));
2590 			if (retval <= 0) {
2591 				error = retval * -1;
2592 				break;
2593 			}
2594 
2595 			/*
2596 			 * Schedule transmission of pending state-change
2597 			 * report up to RV times for this link. The timer
2598 			 * will fire at the next mld_timeout (1 second)),
2599 			 * giving us an opportunity to merge the reports.
2600 			 *
2601 			 * If a delay was provided to this function, only
2602 			 * use this delay if sooner than the existing one.
2603 			 */
2604 			VERIFY(mli->mli_rv > 1);
2605 			inm->in6m_scrv = (uint16_t)mli->mli_rv;
2606 			if (delay) {
2607 				if (inm->in6m_sctimer > 1) {
2608 					inm->in6m_sctimer =
2609 					    MIN(inm->in6m_sctimer, (uint16_t)delay);
2610 				} else {
2611 					inm->in6m_sctimer = (uint16_t)delay;
2612 				}
2613 			} else {
2614 				inm->in6m_sctimer = 1;
2615 			}
2616 			mtp->sct = 1;
2617 			error = 0;
2618 			break;
2619 		}
2620 	}
2621 	MLI_UNLOCK(mli);
2622 
2623 	/*
2624 	 * Only update the T0 state if state change is atomic,
2625 	 * i.e. we don't need to wait for a timer to fire before we
2626 	 * can consider the state change to have been communicated.
2627 	 */
2628 	if (syncstates) {
2629 		in6m_commit(inm);
2630 		MLD_PRINTF(("%s: T1 -> T0 for %s/%s\n", __func__,
2631 		    ip6_sprintf(&inm->in6m_addr),
2632 		    if_name(inm->in6m_ifp)));
2633 	}
2634 
2635 	return error;
2636 }
2637 
2638 /*
2639  * Issue an intermediate state change during the life-cycle.
2640  */
2641 static int
mld_handle_state_change(struct in6_multi * inm,struct mld_ifinfo * mli,struct mld_tparams * mtp)2642 mld_handle_state_change(struct in6_multi *inm, struct mld_ifinfo *mli,
2643     struct mld_tparams *mtp)
2644 {
2645 	struct ifnet            *ifp;
2646 	int                      retval = 0;
2647 
2648 	IN6M_LOCK_ASSERT_HELD(inm);
2649 	MLI_LOCK_ASSERT_NOTHELD(mli);
2650 	VERIFY(mtp != NULL);
2651 
2652 	MLD_PRINTF(("%s: state change for %s on ifp 0x%llx(%s)\n",
2653 	    __func__, ip6_sprintf(&inm->in6m_addr),
2654 	    (uint64_t)VM_KERNEL_ADDRPERM(inm->in6m_ifp),
2655 	    if_name(inm->in6m_ifp)));
2656 
2657 	ifp = inm->in6m_ifp;
2658 
2659 	MLI_LOCK(mli);
2660 	VERIFY(mli->mli_ifp == ifp);
2661 
2662 	if ((ifp->if_flags & IFF_LOOPBACK) ||
2663 	    (mli->mli_flags & MLIF_SILENT) ||
2664 	    !mld_is_addr_reported(&inm->in6m_addr) ||
2665 	    (mli->mli_version != MLD_VERSION_2)) {
2666 		MLI_UNLOCK(mli);
2667 		if (!mld_is_addr_reported(&inm->in6m_addr)) {
2668 			MLD_PRINTF(("%s: not kicking state machine for silent "
2669 			    "group\n", __func__));
2670 		}
2671 		MLD_PRINTF(("%s: nothing to do\n", __func__));
2672 		in6m_commit(inm);
2673 		MLD_PRINTF(("%s: T1 -> T0 for %s/%s\n", __func__,
2674 		    ip6_sprintf(&inm->in6m_addr),
2675 		    if_name(inm->in6m_ifp)));
2676 		goto done;
2677 	}
2678 
2679 	IF_DRAIN(&inm->in6m_scq);
2680 
2681 	retval = mld_v2_enqueue_group_record(&inm->in6m_scq, inm, 1, 0, 0,
2682 	    (mli->mli_flags & MLIF_USEALLOW));
2683 	mtp->cst = (inm->in6m_scq.ifq_len > 0);
2684 	MLD_PRINTF(("%s: enqueue record = %d\n", __func__, retval));
2685 	if (retval <= 0) {
2686 		MLI_UNLOCK(mli);
2687 		retval *= -1;
2688 		goto done;
2689 	} else {
2690 		retval = 0;
2691 	}
2692 
2693 	/*
2694 	 * If record(s) were enqueued, start the state-change
2695 	 * report timer for this group.
2696 	 */
2697 	inm->in6m_scrv = (uint16_t)mli->mli_rv;
2698 	inm->in6m_sctimer = 1;
2699 	mtp->sct = 1;
2700 	MLI_UNLOCK(mli);
2701 
2702 done:
2703 	return retval;
2704 }
2705 
2706 /*
2707  * Perform the final leave for a multicast address.
2708  *
2709  * When leaving a group:
2710  *  MLDv1 sends a DONE message, if and only if we are the reporter.
2711  *  MLDv2 enqueues a state-change report containing a transition
2712  *  to INCLUDE {} for immediate transmission.
2713  */
2714 static void
mld_final_leave(struct in6_multi * inm,struct mld_ifinfo * mli,struct mld_tparams * mtp)2715 mld_final_leave(struct in6_multi *inm, struct mld_ifinfo *mli,
2716     struct mld_tparams *mtp)
2717 {
2718 	int syncstates = 1;
2719 
2720 	IN6M_LOCK_ASSERT_HELD(inm);
2721 	MLI_LOCK_ASSERT_NOTHELD(mli);
2722 	VERIFY(mtp != NULL);
2723 
2724 	MLD_PRINTF(("%s: final leave %s on ifp 0x%llx(%s)\n",
2725 	    __func__, ip6_sprintf(&inm->in6m_addr),
2726 	    (uint64_t)VM_KERNEL_ADDRPERM(inm->in6m_ifp),
2727 	    if_name(inm->in6m_ifp)));
2728 
2729 	switch (inm->in6m_state) {
2730 	case MLD_NOT_MEMBER:
2731 	case MLD_SILENT_MEMBER:
2732 	case MLD_LEAVING_MEMBER:
2733 		/* Already leaving or left; do nothing. */
2734 		MLD_PRINTF(("%s: not kicking state machine for silent group\n",
2735 		    __func__));
2736 		break;
2737 	case MLD_REPORTING_MEMBER:
2738 	case MLD_IDLE_MEMBER:
2739 	case MLD_G_QUERY_PENDING_MEMBER:
2740 	case MLD_SG_QUERY_PENDING_MEMBER:
2741 		MLI_LOCK(mli);
2742 		if (mli->mli_version == MLD_VERSION_1) {
2743 			if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER ||
2744 			    inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER) {
2745 				panic("%s: MLDv2 state reached, not MLDv2 "
2746 				    "mode\n", __func__);
2747 				/* NOTREACHED */
2748 			}
2749 			/* scheduler timer if enqueue is successful */
2750 			mtp->cst = (mld_v1_transmit_report(inm,
2751 			    MLD_LISTENER_DONE) == 0);
2752 
2753 			IN6M_LOCK_ASSERT_HELD(inm);
2754 			MLI_LOCK_ASSERT_HELD(mli);
2755 
2756 			inm->in6m_state = MLD_NOT_MEMBER;
2757 		} else if (mli->mli_version == MLD_VERSION_2) {
2758 			/*
2759 			 * Stop group timer and all pending reports.
2760 			 * Immediately enqueue a state-change report
2761 			 * TO_IN {} to be sent on the next timeout,
2762 			 * giving us an opportunity to merge reports.
2763 			 */
2764 			IF_DRAIN(&inm->in6m_scq);
2765 			inm->in6m_timer = 0;
2766 			inm->in6m_scrv = (uint16_t)mli->mli_rv;
2767 			MLD_PRINTF(("%s: Leaving %s/%s with %d "
2768 			    "pending retransmissions.\n", __func__,
2769 			    ip6_sprintf(&inm->in6m_addr),
2770 			    if_name(inm->in6m_ifp),
2771 			    inm->in6m_scrv));
2772 			if (inm->in6m_scrv == 0) {
2773 				inm->in6m_state = MLD_NOT_MEMBER;
2774 				inm->in6m_sctimer = 0;
2775 			} else {
2776 				int retval;
2777 				/*
2778 				 * Stick around in the in6_multihead list;
2779 				 * the final detach will be issued by
2780 				 * mld_v2_process_group_timers() when
2781 				 * the retransmit timer expires.
2782 				 */
2783 				IN6M_ADDREF_LOCKED(inm);
2784 				VERIFY(inm->in6m_debug & IFD_ATTACHED);
2785 				inm->in6m_reqcnt++;
2786 				VERIFY(inm->in6m_reqcnt >= 1);
2787 				inm->in6m_nrelecnt++;
2788 				VERIFY(inm->in6m_nrelecnt != 0);
2789 
2790 				retval = mld_v2_enqueue_group_record(
2791 					&inm->in6m_scq, inm, 1, 0, 0,
2792 					(mli->mli_flags & MLIF_USEALLOW));
2793 				mtp->cst = (inm->in6m_scq.ifq_len > 0);
2794 				KASSERT(retval != 0,
2795 				    ("%s: enqueue record = %d\n", __func__,
2796 				    retval));
2797 
2798 				inm->in6m_state = MLD_LEAVING_MEMBER;
2799 				inm->in6m_sctimer = 1;
2800 				mtp->sct = 1;
2801 				syncstates = 0;
2802 			}
2803 		}
2804 		MLI_UNLOCK(mli);
2805 		break;
2806 	case MLD_LAZY_MEMBER:
2807 	case MLD_SLEEPING_MEMBER:
2808 	case MLD_AWAKENING_MEMBER:
2809 		/* Our reports are suppressed; do nothing. */
2810 		break;
2811 	}
2812 
2813 	if (syncstates) {
2814 		in6m_commit(inm);
2815 		MLD_PRINTF(("%s: T1 -> T0 for %s/%s\n", __func__,
2816 		    ip6_sprintf(&inm->in6m_addr),
2817 		    if_name(inm->in6m_ifp)));
2818 		inm->in6m_st[1].iss_fmode = MCAST_UNDEFINED;
2819 		MLD_PRINTF(("%s: T1 now MCAST_UNDEFINED for 0x%llx/%s\n",
2820 		    __func__, (uint64_t)VM_KERNEL_ADDRPERM(&inm->in6m_addr),
2821 		    if_name(inm->in6m_ifp)));
2822 	}
2823 }
2824 
2825 /*
2826  * Enqueue an MLDv2 group record to the given output queue.
2827  *
2828  * If is_state_change is zero, a current-state record is appended.
2829  * If is_state_change is non-zero, a state-change report is appended.
2830  *
2831  * If is_group_query is non-zero, an mbuf packet chain is allocated.
2832  * If is_group_query is zero, and if there is a packet with free space
2833  * at the tail of the queue, it will be appended to providing there
2834  * is enough free space.
2835  * Otherwise a new mbuf packet chain is allocated.
2836  *
2837  * If is_source_query is non-zero, each source is checked to see if
2838  * it was recorded for a Group-Source query, and will be omitted if
2839  * it is not both in-mode and recorded.
2840  *
2841  * If use_block_allow is non-zero, state change reports for initial join
2842  * and final leave, on an inclusive mode group with a source list, will be
2843  * rewritten to use the ALLOW_NEW and BLOCK_OLD record types, respectively.
2844  *
2845  * The function will attempt to allocate leading space in the packet
2846  * for the IPv6+ICMP headers to be prepended without fragmenting the chain.
2847  *
2848  * If successful the size of all data appended to the queue is returned,
2849  * otherwise an error code less than zero is returned, or zero if
2850  * no record(s) were appended.
2851  */
2852 static int
mld_v2_enqueue_group_record(struct ifqueue * ifq,struct in6_multi * inm,const int is_state_change,const int is_group_query,const int is_source_query,const int use_block_allow)2853 mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm,
2854     const int is_state_change, const int is_group_query,
2855     const int is_source_query, const int use_block_allow)
2856 {
2857 	struct mldv2_record      mr;
2858 	struct mldv2_record     *pmr;
2859 	struct ifnet            *ifp;
2860 	struct ip6_msource      *ims, *nims;
2861 	struct mbuf             *m0, *m, *md;
2862 	int                      error, is_filter_list_change;
2863 	int                      minrec0len, m0srcs, msrcs, nbytes, off;
2864 	int                      record_has_sources;
2865 	int                      now;
2866 	uint8_t                  type;
2867 	uint8_t                  mode;
2868 
2869 	IN6M_LOCK_ASSERT_HELD(inm);
2870 	MLI_LOCK_ASSERT_HELD(inm->in6m_mli);
2871 
2872 	error = 0;
2873 	ifp = inm->in6m_ifp;
2874 	is_filter_list_change = 0;
2875 	m = NULL;
2876 	m0 = NULL;
2877 	m0srcs = 0;
2878 	msrcs = 0;
2879 	nbytes = 0;
2880 	nims = NULL;
2881 	record_has_sources = 1;
2882 	pmr = NULL;
2883 	type = MLD_DO_NOTHING;
2884 	mode = (uint8_t)inm->in6m_st[1].iss_fmode;
2885 
2886 	/*
2887 	 * If we did not transition out of ASM mode during t0->t1,
2888 	 * and there are no source nodes to process, we can skip
2889 	 * the generation of source records.
2890 	 */
2891 	if (inm->in6m_st[0].iss_asm > 0 && inm->in6m_st[1].iss_asm > 0 &&
2892 	    inm->in6m_nsrc == 0) {
2893 		record_has_sources = 0;
2894 	}
2895 
2896 	if (is_state_change) {
2897 		/*
2898 		 * Queue a state change record.
2899 		 * If the mode did not change, and there are non-ASM
2900 		 * listeners or source filters present,
2901 		 * we potentially need to issue two records for the group.
2902 		 * If there are ASM listeners, and there was no filter
2903 		 * mode transition of any kind, do nothing.
2904 		 *
2905 		 * If we are transitioning to MCAST_UNDEFINED, we need
2906 		 * not send any sources. A transition to/from this state is
2907 		 * considered inclusive with some special treatment.
2908 		 *
2909 		 * If we are rewriting initial joins/leaves to use
2910 		 * ALLOW/BLOCK, and the group's membership is inclusive,
2911 		 * we need to send sources in all cases.
2912 		 */
2913 		if (mode != inm->in6m_st[0].iss_fmode) {
2914 			if (mode == MCAST_EXCLUDE) {
2915 				MLD_PRINTF(("%s: change to EXCLUDE\n",
2916 				    __func__));
2917 				type = MLD_CHANGE_TO_EXCLUDE_MODE;
2918 			} else {
2919 				MLD_PRINTF(("%s: change to INCLUDE\n",
2920 				    __func__));
2921 				if (use_block_allow) {
2922 					/*
2923 					 * XXX
2924 					 * Here we're interested in state
2925 					 * edges either direction between
2926 					 * MCAST_UNDEFINED and MCAST_INCLUDE.
2927 					 * Perhaps we should just check
2928 					 * the group state, rather than
2929 					 * the filter mode.
2930 					 */
2931 					if (mode == MCAST_UNDEFINED) {
2932 						type = MLD_BLOCK_OLD_SOURCES;
2933 					} else {
2934 						type = MLD_ALLOW_NEW_SOURCES;
2935 					}
2936 				} else {
2937 					type = MLD_CHANGE_TO_INCLUDE_MODE;
2938 					if (mode == MCAST_UNDEFINED) {
2939 						record_has_sources = 0;
2940 					}
2941 				}
2942 			}
2943 		} else {
2944 			if (record_has_sources) {
2945 				is_filter_list_change = 1;
2946 			} else {
2947 				type = MLD_DO_NOTHING;
2948 			}
2949 		}
2950 	} else {
2951 		/*
2952 		 * Queue a current state record.
2953 		 */
2954 		if (mode == MCAST_EXCLUDE) {
2955 			type = MLD_MODE_IS_EXCLUDE;
2956 		} else if (mode == MCAST_INCLUDE) {
2957 			type = MLD_MODE_IS_INCLUDE;
2958 			VERIFY(inm->in6m_st[1].iss_asm == 0);
2959 		}
2960 	}
2961 
2962 	/*
2963 	 * Generate the filter list changes using a separate function.
2964 	 */
2965 	if (is_filter_list_change) {
2966 		return mld_v2_enqueue_filter_change(ifq, inm);
2967 	}
2968 
2969 	if (type == MLD_DO_NOTHING) {
2970 		MLD_PRINTF(("%s: nothing to do for %s/%s\n",
2971 		    __func__, ip6_sprintf(&inm->in6m_addr),
2972 		    if_name(inm->in6m_ifp)));
2973 		return 0;
2974 	}
2975 
2976 	/*
2977 	 * If any sources are present, we must be able to fit at least
2978 	 * one in the trailing space of the tail packet's mbuf,
2979 	 * ideally more.
2980 	 */
2981 	minrec0len = sizeof(struct mldv2_record);
2982 	if (record_has_sources) {
2983 		minrec0len += sizeof(struct in6_addr);
2984 	}
2985 	MLD_PRINTF(("%s: queueing %s for %s/%s\n", __func__,
2986 	    mld_rec_type_to_str(type),
2987 	    ip6_sprintf(&inm->in6m_addr),
2988 	    if_name(inm->in6m_ifp)));
2989 
2990 	/*
2991 	 * Check if we have a packet in the tail of the queue for this
2992 	 * group into which the first group record for this group will fit.
2993 	 * Otherwise allocate a new packet.
2994 	 * Always allocate leading space for IP6+RA+ICMPV6+REPORT.
2995 	 * Note: Group records for G/GSR query responses MUST be sent
2996 	 * in their own packet.
2997 	 */
2998 	m0 = ifq->ifq_tail;
2999 	if (!is_group_query &&
3000 	    m0 != NULL &&
3001 	    (m0->m_pkthdr.vt_nrecs + 1 <= MLD_V2_REPORT_MAXRECS) &&
3002 	    (m0->m_pkthdr.len + minrec0len) <
3003 	    (ifp->if_mtu - MLD_MTUSPACE)) {
3004 		m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
3005 		    sizeof(struct mldv2_record)) /
3006 		    sizeof(struct in6_addr);
3007 		m = m0;
3008 		MLD_PRINTF(("%s: use existing packet\n", __func__));
3009 	} else {
3010 		if (IF_QFULL(ifq)) {
3011 			MLD_PRINTF(("%s: outbound queue full\n", __func__));
3012 			return -ENOMEM;
3013 		}
3014 		m = NULL;
3015 		m0srcs = (ifp->if_mtu - MLD_MTUSPACE -
3016 		    sizeof(struct mldv2_record)) / sizeof(struct in6_addr);
3017 		if (!is_state_change && !is_group_query) {
3018 			m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3019 		}
3020 		if (m == NULL) {
3021 			m = m_gethdr(M_DONTWAIT, MT_DATA);
3022 		}
3023 		if (m == NULL) {
3024 			return -ENOMEM;
3025 		}
3026 
3027 		mld_save_context(m, ifp);
3028 
3029 		MLD_PRINTF(("%s: allocated first packet\n", __func__));
3030 	}
3031 
3032 	/*
3033 	 * Append group record.
3034 	 * If we have sources, we don't know how many yet.
3035 	 */
3036 	mr.mr_type = type;
3037 	mr.mr_datalen = 0;
3038 	mr.mr_numsrc = 0;
3039 	mr.mr_addr = inm->in6m_addr;
3040 	in6_clearscope(&mr.mr_addr);
3041 	if (!m_append(m, sizeof(struct mldv2_record), (void *)&mr)) {
3042 		if (m != m0) {
3043 			m_freem(m);
3044 		}
3045 		MLD_PRINTF(("%s: m_append() failed.\n", __func__));
3046 		return -ENOMEM;
3047 	}
3048 	nbytes += sizeof(struct mldv2_record);
3049 
3050 	/*
3051 	 * Append as many sources as will fit in the first packet.
3052 	 * If we are appending to a new packet, the chain allocation
3053 	 * may potentially use clusters; use m_getptr() in this case.
3054 	 * If we are appending to an existing packet, we need to obtain
3055 	 * a pointer to the group record after m_append(), in case a new
3056 	 * mbuf was allocated.
3057 	 *
3058 	 * Only append sources which are in-mode at t1. If we are
3059 	 * transitioning to MCAST_UNDEFINED state on the group, and
3060 	 * use_block_allow is zero, do not include source entries.
3061 	 * Otherwise, we need to include this source in the report.
3062 	 *
3063 	 * Only report recorded sources in our filter set when responding
3064 	 * to a group-source query.
3065 	 */
3066 	if (record_has_sources) {
3067 		if (m == m0) {
3068 			md = m_last(m);
3069 			pmr = (struct mldv2_record *)(mtod(md, uint8_t *) +
3070 			    md->m_len - nbytes);
3071 		} else {
3072 			md = m_getptr(m, 0, &off);
3073 			pmr = (struct mldv2_record *)(mtod(md, uint8_t *) +
3074 			    off);
3075 		}
3076 		msrcs = 0;
3077 		RB_FOREACH_SAFE(ims, ip6_msource_tree, &inm->in6m_srcs,
3078 		    nims) {
3079 			MLD_PRINTF(("%s: visit node %s\n", __func__,
3080 			    ip6_sprintf(&ims->im6s_addr)));
3081 			now = im6s_get_mode(inm, ims, 1);
3082 			MLD_PRINTF(("%s: node is %d\n", __func__, now));
3083 			if ((now != mode) ||
3084 			    (now == mode &&
3085 			    (!use_block_allow && mode == MCAST_UNDEFINED))) {
3086 				MLD_PRINTF(("%s: skip node\n", __func__));
3087 				continue;
3088 			}
3089 			if (is_source_query && ims->im6s_stp == 0) {
3090 				MLD_PRINTF(("%s: skip unrecorded node\n",
3091 				    __func__));
3092 				continue;
3093 			}
3094 			MLD_PRINTF(("%s: append node\n", __func__));
3095 			if (!m_append(m, sizeof(struct in6_addr),
3096 			    (void *)&ims->im6s_addr)) {
3097 				if (m != m0) {
3098 					m_freem(m);
3099 				}
3100 				MLD_PRINTF(("%s: m_append() failed.\n",
3101 				    __func__));
3102 				return -ENOMEM;
3103 			}
3104 			nbytes += sizeof(struct in6_addr);
3105 			++msrcs;
3106 			if (msrcs == m0srcs) {
3107 				break;
3108 			}
3109 		}
3110 		MLD_PRINTF(("%s: msrcs is %d this packet\n", __func__,
3111 		    msrcs));
3112 		pmr->mr_numsrc = htons((uint16_t)msrcs);
3113 		nbytes += (msrcs * sizeof(struct in6_addr));
3114 	}
3115 
3116 	if (is_source_query && msrcs == 0) {
3117 		MLD_PRINTF(("%s: no recorded sources to report\n", __func__));
3118 		if (m != m0) {
3119 			m_freem(m);
3120 		}
3121 		return 0;
3122 	}
3123 
3124 	/*
3125 	 * We are good to go with first packet.
3126 	 */
3127 	if (m != m0) {
3128 		MLD_PRINTF(("%s: enqueueing first packet\n", __func__));
3129 		m->m_pkthdr.vt_nrecs = 1;
3130 		IF_ENQUEUE(ifq, m);
3131 	} else {
3132 		m->m_pkthdr.vt_nrecs++;
3133 	}
3134 	/*
3135 	 * No further work needed if no source list in packet(s).
3136 	 */
3137 	if (!record_has_sources) {
3138 		return nbytes;
3139 	}
3140 
3141 	/*
3142 	 * Whilst sources remain to be announced, we need to allocate
3143 	 * a new packet and fill out as many sources as will fit.
3144 	 * Always try for a cluster first.
3145 	 */
3146 	while (nims != NULL) {
3147 		if (IF_QFULL(ifq)) {
3148 			MLD_PRINTF(("%s: outbound queue full\n", __func__));
3149 			return -ENOMEM;
3150 		}
3151 		m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3152 		if (m == NULL) {
3153 			m = m_gethdr(M_DONTWAIT, MT_DATA);
3154 		}
3155 		if (m == NULL) {
3156 			return -ENOMEM;
3157 		}
3158 		mld_save_context(m, ifp);
3159 		md = m_getptr(m, 0, &off);
3160 		pmr = (struct mldv2_record *)(mtod(md, uint8_t *) + off);
3161 		MLD_PRINTF(("%s: allocated next packet\n", __func__));
3162 
3163 		if (!m_append(m, sizeof(struct mldv2_record), (void *)&mr)) {
3164 			if (m != m0) {
3165 				m_freem(m);
3166 			}
3167 			MLD_PRINTF(("%s: m_append() failed.\n", __func__));
3168 			return -ENOMEM;
3169 		}
3170 		m->m_pkthdr.vt_nrecs = 1;
3171 		nbytes += sizeof(struct mldv2_record);
3172 
3173 		m0srcs = (ifp->if_mtu - MLD_MTUSPACE -
3174 		    sizeof(struct mldv2_record)) / sizeof(struct in6_addr);
3175 
3176 		msrcs = 0;
3177 		RB_FOREACH_FROM(ims, ip6_msource_tree, nims) {
3178 			MLD_PRINTF(("%s: visit node %s\n",
3179 			    __func__, ip6_sprintf(&ims->im6s_addr)));
3180 			now = im6s_get_mode(inm, ims, 1);
3181 			if ((now != mode) ||
3182 			    (now == mode &&
3183 			    (!use_block_allow && mode == MCAST_UNDEFINED))) {
3184 				MLD_PRINTF(("%s: skip node\n", __func__));
3185 				continue;
3186 			}
3187 			if (is_source_query && ims->im6s_stp == 0) {
3188 				MLD_PRINTF(("%s: skip unrecorded node\n",
3189 				    __func__));
3190 				continue;
3191 			}
3192 			MLD_PRINTF(("%s: append node\n", __func__));
3193 			if (!m_append(m, sizeof(struct in6_addr),
3194 			    (void *)&ims->im6s_addr)) {
3195 				if (m != m0) {
3196 					m_freem(m);
3197 				}
3198 				MLD_PRINTF(("%s: m_append() failed.\n",
3199 				    __func__));
3200 				return -ENOMEM;
3201 			}
3202 			++msrcs;
3203 			if (msrcs == m0srcs) {
3204 				break;
3205 			}
3206 		}
3207 		pmr->mr_numsrc = htons((uint16_t)msrcs);
3208 		nbytes += (msrcs * sizeof(struct in6_addr));
3209 
3210 		MLD_PRINTF(("%s: enqueueing next packet\n", __func__));
3211 		IF_ENQUEUE(ifq, m);
3212 	}
3213 
3214 	return nbytes;
3215 }
3216 
3217 /*
3218  * Type used to mark record pass completion.
3219  * We exploit the fact we can cast to this easily from the
3220  * current filter modes on each ip_msource node.
3221  */
3222 typedef enum {
3223 	REC_NONE = 0x00,        /* MCAST_UNDEFINED */
3224 	REC_ALLOW = 0x01,       /* MCAST_INCLUDE */
3225 	REC_BLOCK = 0x02,       /* MCAST_EXCLUDE */
3226 	REC_FULL = REC_ALLOW | REC_BLOCK
3227 } rectype_t;
3228 
3229 /*
3230  * Enqueue an MLDv2 filter list change to the given output queue.
3231  *
3232  * Source list filter state is held in an RB-tree. When the filter list
3233  * for a group is changed without changing its mode, we need to compute
3234  * the deltas between T0 and T1 for each source in the filter set,
3235  * and enqueue the appropriate ALLOW_NEW/BLOCK_OLD records.
3236  *
3237  * As we may potentially queue two record types, and the entire R-B tree
3238  * needs to be walked at once, we break this out into its own function
3239  * so we can generate a tightly packed queue of packets.
3240  *
3241  * XXX This could be written to only use one tree walk, although that makes
3242  * serializing into the mbuf chains a bit harder. For now we do two walks
3243  * which makes things easier on us, and it may or may not be harder on
3244  * the L2 cache.
3245  *
3246  * If successful the size of all data appended to the queue is returned,
3247  * otherwise an error code less than zero is returned, or zero if
3248  * no record(s) were appended.
3249  */
3250 static int
mld_v2_enqueue_filter_change(struct ifqueue * ifq,struct in6_multi * inm)3251 mld_v2_enqueue_filter_change(struct ifqueue *ifq, struct in6_multi *inm)
3252 {
3253 	static const int MINRECLEN =
3254 	    sizeof(struct mldv2_record) + sizeof(struct in6_addr);
3255 	struct ifnet            *ifp;
3256 	struct mldv2_record      mr;
3257 	struct mldv2_record     *pmr;
3258 	struct ip6_msource      *ims, *nims;
3259 	struct mbuf             *m, *m0, *md;
3260 	int                      m0srcs, nbytes, npbytes, off, rsrcs, schanged;
3261 	int                      nallow, nblock;
3262 	uint8_t                  mode, now, then;
3263 	rectype_t                crt, drt, nrt;
3264 
3265 	IN6M_LOCK_ASSERT_HELD(inm);
3266 
3267 	if (inm->in6m_nsrc == 0 ||
3268 	    (inm->in6m_st[0].iss_asm > 0 && inm->in6m_st[1].iss_asm > 0)) {
3269 		return 0;
3270 	}
3271 
3272 	ifp = inm->in6m_ifp;                    /* interface */
3273 	mode = (uint8_t)inm->in6m_st[1].iss_fmode;       /* filter mode at t1 */
3274 	crt = REC_NONE; /* current group record type */
3275 	drt = REC_NONE; /* mask of completed group record types */
3276 	nrt = REC_NONE; /* record type for current node */
3277 	m0srcs = 0;     /* # source which will fit in current mbuf chain */
3278 	npbytes = 0;    /* # of bytes appended this packet */
3279 	nbytes = 0;     /* # of bytes appended to group's state-change queue */
3280 	rsrcs = 0;      /* # sources encoded in current record */
3281 	schanged = 0;   /* # nodes encoded in overall filter change */
3282 	nallow = 0;     /* # of source entries in ALLOW_NEW */
3283 	nblock = 0;     /* # of source entries in BLOCK_OLD */
3284 	nims = NULL;    /* next tree node pointer */
3285 
3286 	/*
3287 	 * For each possible filter record mode.
3288 	 * The first kind of source we encounter tells us which
3289 	 * is the first kind of record we start appending.
3290 	 * If a node transitioned to UNDEFINED at t1, its mode is treated
3291 	 * as the inverse of the group's filter mode.
3292 	 */
3293 	while (drt != REC_FULL) {
3294 		do {
3295 			m0 = ifq->ifq_tail;
3296 			if (m0 != NULL &&
3297 			    (m0->m_pkthdr.vt_nrecs + 1 <=
3298 			    MLD_V2_REPORT_MAXRECS) &&
3299 			    (m0->m_pkthdr.len + MINRECLEN) <
3300 			    (ifp->if_mtu - MLD_MTUSPACE)) {
3301 				m = m0;
3302 				m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
3303 				    sizeof(struct mldv2_record)) /
3304 				    sizeof(struct in6_addr);
3305 				MLD_PRINTF(("%s: use previous packet\n",
3306 				    __func__));
3307 			} else {
3308 				m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3309 				if (m == NULL) {
3310 					m = m_gethdr(M_DONTWAIT, MT_DATA);
3311 				}
3312 				if (m == NULL) {
3313 					MLD_PRINTF(("%s: m_get*() failed\n",
3314 					    __func__));
3315 					return -ENOMEM;
3316 				}
3317 				m->m_pkthdr.vt_nrecs = 0;
3318 				mld_save_context(m, ifp);
3319 				m0srcs = (ifp->if_mtu - MLD_MTUSPACE -
3320 				    sizeof(struct mldv2_record)) /
3321 				    sizeof(struct in6_addr);
3322 				npbytes = 0;
3323 				MLD_PRINTF(("%s: allocated new packet\n",
3324 				    __func__));
3325 			}
3326 			/*
3327 			 * Append the MLD group record header to the
3328 			 * current packet's data area.
3329 			 * Recalculate pointer to free space for next
3330 			 * group record, in case m_append() allocated
3331 			 * a new mbuf or cluster.
3332 			 */
3333 			memset(&mr, 0, sizeof(mr));
3334 			mr.mr_addr = inm->in6m_addr;
3335 			in6_clearscope(&mr.mr_addr);
3336 			if (!m_append(m, sizeof(mr), (void *)&mr)) {
3337 				if (m != m0) {
3338 					m_freem(m);
3339 				}
3340 				MLD_PRINTF(("%s: m_append() failed\n",
3341 				    __func__));
3342 				return -ENOMEM;
3343 			}
3344 			npbytes += sizeof(struct mldv2_record);
3345 			if (m != m0) {
3346 				/* new packet; offset in chain */
3347 				md = m_getptr(m, npbytes -
3348 				    sizeof(struct mldv2_record), &off);
3349 				pmr = (struct mldv2_record *)(mtod(md,
3350 				    uint8_t *) + off);
3351 			} else {
3352 				/* current packet; offset from last append */
3353 				md = m_last(m);
3354 				pmr = (struct mldv2_record *)(mtod(md,
3355 				    uint8_t *) + md->m_len -
3356 				    sizeof(struct mldv2_record));
3357 			}
3358 			/*
3359 			 * Begin walking the tree for this record type
3360 			 * pass, or continue from where we left off
3361 			 * previously if we had to allocate a new packet.
3362 			 * Only report deltas in-mode at t1.
3363 			 * We need not report included sources as allowed
3364 			 * if we are in inclusive mode on the group,
3365 			 * however the converse is not true.
3366 			 */
3367 			rsrcs = 0;
3368 			if (nims == NULL) {
3369 				nims = RB_MIN(ip6_msource_tree,
3370 				    &inm->in6m_srcs);
3371 			}
3372 			RB_FOREACH_FROM(ims, ip6_msource_tree, nims) {
3373 				MLD_PRINTF(("%s: visit node %s\n", __func__,
3374 				    ip6_sprintf(&ims->im6s_addr)));
3375 				now = im6s_get_mode(inm, ims, 1);
3376 				then = im6s_get_mode(inm, ims, 0);
3377 				MLD_PRINTF(("%s: mode: t0 %d, t1 %d\n",
3378 				    __func__, then, now));
3379 				if (now == then) {
3380 					MLD_PRINTF(("%s: skip unchanged\n",
3381 					    __func__));
3382 					continue;
3383 				}
3384 				if (mode == MCAST_EXCLUDE &&
3385 				    now == MCAST_INCLUDE) {
3386 					MLD_PRINTF(("%s: skip IN src on EX "
3387 					    "group\n", __func__));
3388 					continue;
3389 				}
3390 				nrt = (rectype_t)now;
3391 				if (nrt == REC_NONE) {
3392 					nrt = (rectype_t)(~mode & REC_FULL);
3393 				}
3394 				if (schanged++ == 0) {
3395 					crt = nrt;
3396 				} else if (crt != nrt) {
3397 					continue;
3398 				}
3399 				if (!m_append(m, sizeof(struct in6_addr),
3400 				    (void *)&ims->im6s_addr)) {
3401 					if (m != m0) {
3402 						m_freem(m);
3403 					}
3404 					MLD_PRINTF(("%s: m_append() failed\n",
3405 					    __func__));
3406 					return -ENOMEM;
3407 				}
3408 				nallow += !!(crt == REC_ALLOW);
3409 				nblock += !!(crt == REC_BLOCK);
3410 				if (++rsrcs == m0srcs) {
3411 					break;
3412 				}
3413 			}
3414 			/*
3415 			 * If we did not append any tree nodes on this
3416 			 * pass, back out of allocations.
3417 			 */
3418 			if (rsrcs == 0) {
3419 				npbytes -= sizeof(struct mldv2_record);
3420 				if (m != m0) {
3421 					MLD_PRINTF(("%s: m_free(m)\n",
3422 					    __func__));
3423 					m_freem(m);
3424 				} else {
3425 					MLD_PRINTF(("%s: m_adj(m, -mr)\n",
3426 					    __func__));
3427 					m_adj(m, -((int)sizeof(
3428 						    struct mldv2_record)));
3429 				}
3430 				continue;
3431 			}
3432 			npbytes += (rsrcs * sizeof(struct in6_addr));
3433 			if (crt == REC_ALLOW) {
3434 				pmr->mr_type = MLD_ALLOW_NEW_SOURCES;
3435 			} else if (crt == REC_BLOCK) {
3436 				pmr->mr_type = MLD_BLOCK_OLD_SOURCES;
3437 			}
3438 			pmr->mr_numsrc = htons((uint16_t)rsrcs);
3439 			/*
3440 			 * Count the new group record, and enqueue this
3441 			 * packet if it wasn't already queued.
3442 			 */
3443 			m->m_pkthdr.vt_nrecs++;
3444 			if (m != m0) {
3445 				IF_ENQUEUE(ifq, m);
3446 			}
3447 			nbytes += npbytes;
3448 		} while (nims != NULL);
3449 		drt |= crt;
3450 		crt = (~crt & REC_FULL);
3451 	}
3452 
3453 	MLD_PRINTF(("%s: queued %d ALLOW_NEW, %d BLOCK_OLD\n", __func__,
3454 	    nallow, nblock));
3455 
3456 	return nbytes;
3457 }
3458 
3459 static int
mld_v2_merge_state_changes(struct in6_multi * inm,struct ifqueue * ifscq)3460 mld_v2_merge_state_changes(struct in6_multi *inm, struct ifqueue *ifscq)
3461 {
3462 	struct ifqueue  *gq;
3463 	struct mbuf     *m;             /* pending state-change */
3464 	struct mbuf     *m0;            /* copy of pending state-change */
3465 	struct mbuf     *mt;            /* last state-change in packet */
3466 	struct mbuf     *n;
3467 	int              docopy, domerge;
3468 	u_int            recslen;
3469 
3470 	IN6M_LOCK_ASSERT_HELD(inm);
3471 
3472 	docopy = 0;
3473 	domerge = 0;
3474 	recslen = 0;
3475 
3476 	/*
3477 	 * If there are further pending retransmissions, make a writable
3478 	 * copy of each queued state-change message before merging.
3479 	 */
3480 	if (inm->in6m_scrv > 0) {
3481 		docopy = 1;
3482 	}
3483 
3484 	gq = &inm->in6m_scq;
3485 #ifdef MLD_DEBUG
3486 	if (gq->ifq_head == NULL) {
3487 		MLD_PRINTF(("%s: WARNING: queue for inm 0x%llx is empty\n",
3488 		    __func__, (uint64_t)VM_KERNEL_ADDRPERM(inm)));
3489 	}
3490 #endif
3491 
3492 	/*
3493 	 * Use IF_REMQUEUE() instead of IF_DEQUEUE() below, since the
3494 	 * packet might not always be at the head of the ifqueue.
3495 	 */
3496 	m = gq->ifq_head;
3497 	while (m != NULL) {
3498 		/*
3499 		 * Only merge the report into the current packet if
3500 		 * there is sufficient space to do so; an MLDv2 report
3501 		 * packet may only contain 65,535 group records.
3502 		 * Always use a simple mbuf chain concatentation to do this,
3503 		 * as large state changes for single groups may have
3504 		 * allocated clusters.
3505 		 */
3506 		domerge = 0;
3507 		mt = ifscq->ifq_tail;
3508 		if (mt != NULL) {
3509 			recslen = m_length(m);
3510 
3511 			if ((mt->m_pkthdr.vt_nrecs +
3512 			    m->m_pkthdr.vt_nrecs <=
3513 			    MLD_V2_REPORT_MAXRECS) &&
3514 			    (mt->m_pkthdr.len + recslen <=
3515 			    (inm->in6m_ifp->if_mtu - MLD_MTUSPACE))) {
3516 				domerge = 1;
3517 			}
3518 		}
3519 
3520 		if (!domerge && IF_QFULL(gq)) {
3521 			MLD_PRINTF(("%s: outbound queue full, skipping whole "
3522 			    "packet 0x%llx\n", __func__,
3523 			    (uint64_t)VM_KERNEL_ADDRPERM(m)));
3524 			n = m->m_nextpkt;
3525 			if (!docopy) {
3526 				IF_REMQUEUE(gq, m);
3527 				m_freem(m);
3528 			}
3529 			m = n;
3530 			continue;
3531 		}
3532 
3533 		if (!docopy) {
3534 			MLD_PRINTF(("%s: dequeueing 0x%llx\n", __func__,
3535 			    (uint64_t)VM_KERNEL_ADDRPERM(m)));
3536 			n = m->m_nextpkt;
3537 			IF_REMQUEUE(gq, m);
3538 			m0 = m;
3539 			m = n;
3540 		} else {
3541 			MLD_PRINTF(("%s: copying 0x%llx\n", __func__,
3542 			    (uint64_t)VM_KERNEL_ADDRPERM(m)));
3543 			m0 = m_dup(m, M_NOWAIT);
3544 			if (m0 == NULL) {
3545 				return ENOMEM;
3546 			}
3547 			m0->m_nextpkt = NULL;
3548 			m = m->m_nextpkt;
3549 		}
3550 
3551 		if (!domerge) {
3552 			MLD_PRINTF(("%s: queueing 0x%llx to ifscq 0x%llx)\n",
3553 			    __func__, (uint64_t)VM_KERNEL_ADDRPERM(m0),
3554 			    (uint64_t)VM_KERNEL_ADDRPERM(ifscq)));
3555 			IF_ENQUEUE(ifscq, m0);
3556 		} else {
3557 			struct mbuf *mtl;       /* last mbuf of packet mt */
3558 
3559 			MLD_PRINTF(("%s: merging 0x%llx with ifscq tail "
3560 			    "0x%llx)\n", __func__,
3561 			    (uint64_t)VM_KERNEL_ADDRPERM(m0),
3562 			    (uint64_t)VM_KERNEL_ADDRPERM(mt)));
3563 
3564 			mtl = m_last(mt);
3565 			m0->m_flags &= ~M_PKTHDR;
3566 			mt->m_pkthdr.len += recslen;
3567 			mt->m_pkthdr.vt_nrecs +=
3568 			    m0->m_pkthdr.vt_nrecs;
3569 
3570 			mtl->m_next = m0;
3571 		}
3572 	}
3573 
3574 	return 0;
3575 }
3576 
3577 /*
3578  * Respond to a pending MLDv2 General Query.
3579  */
3580 static uint32_t
mld_v2_dispatch_general_query(struct mld_ifinfo * mli)3581 mld_v2_dispatch_general_query(struct mld_ifinfo *mli)
3582 {
3583 	struct ifnet            *ifp;
3584 	struct in6_multi        *inm;
3585 	struct in6_multistep    step;
3586 	int                      retval;
3587 
3588 	MLI_LOCK_ASSERT_HELD(mli);
3589 
3590 	VERIFY(mli->mli_version == MLD_VERSION_2);
3591 
3592 	ifp = mli->mli_ifp;
3593 	MLI_UNLOCK(mli);
3594 
3595 	in6_multihead_lock_shared();
3596 	IN6_FIRST_MULTI(step, inm);
3597 	while (inm != NULL) {
3598 		IN6M_LOCK(inm);
3599 		if (inm->in6m_ifp != ifp) {
3600 			goto next;
3601 		}
3602 
3603 		switch (inm->in6m_state) {
3604 		case MLD_NOT_MEMBER:
3605 		case MLD_SILENT_MEMBER:
3606 			break;
3607 		case MLD_REPORTING_MEMBER:
3608 		case MLD_IDLE_MEMBER:
3609 		case MLD_LAZY_MEMBER:
3610 		case MLD_SLEEPING_MEMBER:
3611 		case MLD_AWAKENING_MEMBER:
3612 			inm->in6m_state = MLD_REPORTING_MEMBER;
3613 			MLI_LOCK(mli);
3614 			retval = mld_v2_enqueue_group_record(&mli->mli_gq,
3615 			    inm, 0, 0, 0, 0);
3616 			MLI_UNLOCK(mli);
3617 			MLD_PRINTF(("%s: enqueue record = %d\n",
3618 			    __func__, retval));
3619 			break;
3620 		case MLD_G_QUERY_PENDING_MEMBER:
3621 		case MLD_SG_QUERY_PENDING_MEMBER:
3622 		case MLD_LEAVING_MEMBER:
3623 			break;
3624 		}
3625 next:
3626 		IN6M_UNLOCK(inm);
3627 		IN6_NEXT_MULTI(step, inm);
3628 	}
3629 	in6_multihead_lock_done();
3630 
3631 	MLI_LOCK(mli);
3632 	mld_dispatch_queue_locked(mli, &mli->mli_gq, MLD_MAX_RESPONSE_BURST);
3633 	MLI_LOCK_ASSERT_HELD(mli);
3634 
3635 	/*
3636 	 * Slew transmission of bursts over 1 second intervals.
3637 	 */
3638 	if (mli->mli_gq.ifq_head != NULL) {
3639 		mli->mli_v2_timer = 1 + MLD_RANDOM_DELAY(
3640 			MLD_RESPONSE_BURST_INTERVAL);
3641 	}
3642 
3643 	return mli->mli_v2_timer;
3644 }
3645 
3646 /*
3647  * Transmit the next pending message in the output queue.
3648  *
3649  * Must not be called with in6m_lockm or mli_lock held.
3650  */
3651 static void
mld_dispatch_packet(struct mbuf * m)3652 mld_dispatch_packet(struct mbuf *m)
3653 {
3654 	struct ip6_moptions     *im6o;
3655 	struct ifnet            *ifp;
3656 	struct ifnet            *oifp = NULL;
3657 	struct mbuf             *m0;
3658 	struct mbuf             *md;
3659 	struct ip6_hdr          *ip6;
3660 	struct mld_hdr          *mld;
3661 	int                      error;
3662 	int                      off;
3663 	int                      type;
3664 
3665 	MLD_PRINTF(("%s: transmit 0x%llx\n", __func__,
3666 	    (uint64_t)VM_KERNEL_ADDRPERM(m)));
3667 
3668 	/*
3669 	 * Check if the ifnet is still attached.
3670 	 */
3671 	ifp = mld_restore_context(m);
3672 	if (ifp == NULL || !ifnet_is_attached(ifp, 0)) {
3673 		MLD_PRINTF(("%s: dropped 0x%llx as ifindex %u went away.\n",
3674 		    __func__, (uint64_t)VM_KERNEL_ADDRPERM(m),
3675 		    (u_int)if_index));
3676 		m_freem(m);
3677 		ip6stat.ip6s_noroute++;
3678 		return;
3679 	}
3680 
3681 	im6o = ip6_allocmoptions(Z_WAITOK);
3682 	if (im6o == NULL) {
3683 		m_freem(m);
3684 		return;
3685 	}
3686 
3687 	im6o->im6o_multicast_hlim  = 1;
3688 	im6o->im6o_multicast_loop = 0;
3689 	im6o->im6o_multicast_ifp = ifp;
3690 
3691 	if (m->m_flags & M_MLDV1) {
3692 		m0 = m;
3693 	} else {
3694 		m0 = mld_v2_encap_report(ifp, m);
3695 		if (m0 == NULL) {
3696 			MLD_PRINTF(("%s: dropped 0x%llx\n", __func__,
3697 			    (uint64_t)VM_KERNEL_ADDRPERM(m)));
3698 			/*
3699 			 * mld_v2_encap_report() has already freed our mbuf.
3700 			 */
3701 			IM6O_REMREF(im6o);
3702 			ip6stat.ip6s_odropped++;
3703 			return;
3704 		}
3705 	}
3706 
3707 	mld_scrub_context(m0);
3708 	m->m_flags &= ~(M_PROTOFLAGS);
3709 	m0->m_pkthdr.rcvif = lo_ifp;
3710 
3711 	ip6 = mtod(m0, struct ip6_hdr *);
3712 	(void)in6_setscope(&ip6->ip6_dst, ifp, NULL);
3713 	ip6_output_setdstifscope(m0, ifp->if_index, NULL);
3714 	/*
3715 	 * Retrieve the ICMPv6 type before handoff to ip6_output(),
3716 	 * so we can bump the stats.
3717 	 */
3718 	md = m_getptr(m0, sizeof(struct ip6_hdr), &off);
3719 	mld = (struct mld_hdr *)(mtod(md, uint8_t *) + off);
3720 	type = mld->mld_type;
3721 
3722 	if (ifp->if_eflags & IFEF_TXSTART) {
3723 		/*
3724 		 * Use control service class if the outgoing
3725 		 * interface supports transmit-start model.
3726 		 */
3727 		(void) m_set_service_class(m0, MBUF_SC_CTL);
3728 	}
3729 
3730 	error = ip6_output(m0, &mld_po, NULL, IPV6_UNSPECSRC, im6o,
3731 	    &oifp, NULL);
3732 
3733 	IM6O_REMREF(im6o);
3734 
3735 	if (error) {
3736 		MLD_PRINTF(("%s: ip6_output(0x%llx) = %d\n", __func__,
3737 		    (uint64_t)VM_KERNEL_ADDRPERM(m0), error));
3738 		if (oifp != NULL) {
3739 			ifnet_release(oifp);
3740 		}
3741 		return;
3742 	}
3743 
3744 	icmp6stat.icp6s_outhist[type]++;
3745 	if (oifp != NULL) {
3746 		icmp6_ifstat_inc(oifp, ifs6_out_msg);
3747 		switch (type) {
3748 		case MLD_LISTENER_REPORT:
3749 		case MLDV2_LISTENER_REPORT:
3750 			icmp6_ifstat_inc(oifp, ifs6_out_mldreport);
3751 			break;
3752 		case MLD_LISTENER_DONE:
3753 			icmp6_ifstat_inc(oifp, ifs6_out_mlddone);
3754 			break;
3755 		}
3756 		ifnet_release(oifp);
3757 	}
3758 }
3759 
3760 /*
3761  * Encapsulate an MLDv2 report.
3762  *
3763  * KAME IPv6 requires that hop-by-hop options be passed separately,
3764  * and that the IPv6 header be prepended in a separate mbuf.
3765  *
3766  * Returns a pointer to the new mbuf chain head, or NULL if the
3767  * allocation failed.
3768  */
3769 static struct mbuf *
mld_v2_encap_report(struct ifnet * ifp,struct mbuf * m)3770 mld_v2_encap_report(struct ifnet *ifp, struct mbuf *m)
3771 {
3772 	struct mbuf             *mh;
3773 	struct mldv2_report     *mld;
3774 	struct ip6_hdr          *ip6;
3775 	struct in6_ifaddr       *ia;
3776 	int                      mldreclen;
3777 
3778 	VERIFY(m->m_flags & M_PKTHDR);
3779 
3780 	/*
3781 	 * RFC3590: OK to send as :: or tentative during DAD.
3782 	 */
3783 	ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY | IN6_IFF_ANYCAST);
3784 	if (ia == NULL) {
3785 		MLD_PRINTF(("%s: warning: ia is NULL\n", __func__));
3786 	}
3787 
3788 	MGETHDR(mh, M_DONTWAIT, MT_HEADER);
3789 	if (mh == NULL) {
3790 		if (ia != NULL) {
3791 			IFA_REMREF(&ia->ia_ifa);
3792 		}
3793 		m_freem(m);
3794 		return NULL;
3795 	}
3796 	MH_ALIGN(mh, sizeof(struct ip6_hdr) + sizeof(struct mldv2_report));
3797 
3798 	mldreclen = m_length(m);
3799 	MLD_PRINTF(("%s: mldreclen is %d\n", __func__, mldreclen));
3800 
3801 	mh->m_len = sizeof(struct ip6_hdr) + sizeof(struct mldv2_report);
3802 	mh->m_pkthdr.len = sizeof(struct ip6_hdr) +
3803 	    sizeof(struct mldv2_report) + mldreclen;
3804 
3805 	ip6 = mtod(mh, struct ip6_hdr *);
3806 	ip6->ip6_flow = 0;
3807 	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
3808 	ip6->ip6_vfc |= IPV6_VERSION;
3809 	ip6->ip6_nxt = IPPROTO_ICMPV6;
3810 	if (ia != NULL) {
3811 		IFA_LOCK(&ia->ia_ifa);
3812 	}
3813 	ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any;
3814 	ip6_output_setsrcifscope(mh, IFSCOPE_NONE, ia);
3815 
3816 	if (ia != NULL) {
3817 		IFA_UNLOCK(&ia->ia_ifa);
3818 		IFA_REMREF(&ia->ia_ifa);
3819 		ia = NULL;
3820 	}
3821 	ip6->ip6_dst = in6addr_linklocal_allv2routers;
3822 	ip6_output_setdstifscope(mh, ifp->if_index, NULL);
3823 	/* scope ID will be set in netisr */
3824 
3825 	mld = (struct mldv2_report *)(ip6 + 1);
3826 	mld->mld_type = MLDV2_LISTENER_REPORT;
3827 	mld->mld_code = 0;
3828 	mld->mld_cksum = 0;
3829 	mld->mld_v2_reserved = 0;
3830 	mld->mld_v2_numrecs = htons(m->m_pkthdr.vt_nrecs);
3831 	m->m_pkthdr.vt_nrecs = 0;
3832 	m->m_flags &= ~M_PKTHDR;
3833 
3834 	mh->m_next = m;
3835 	mld->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6,
3836 	    sizeof(struct ip6_hdr), sizeof(struct mldv2_report) + mldreclen);
3837 	return mh;
3838 }
3839 
3840 #ifdef MLD_DEBUG
3841 static const char *
mld_rec_type_to_str(const int type)3842 mld_rec_type_to_str(const int type)
3843 {
3844 	switch (type) {
3845 	case MLD_CHANGE_TO_EXCLUDE_MODE:
3846 		return "TO_EX";
3847 	case MLD_CHANGE_TO_INCLUDE_MODE:
3848 		return "TO_IN";
3849 	case MLD_MODE_IS_EXCLUDE:
3850 		return "MODE_EX";
3851 	case MLD_MODE_IS_INCLUDE:
3852 		return "MODE_IN";
3853 	case MLD_ALLOW_NEW_SOURCES:
3854 		return "ALLOW_NEW";
3855 	case MLD_BLOCK_OLD_SOURCES:
3856 		return "BLOCK_OLD";
3857 	default:
3858 		break;
3859 	}
3860 	return "unknown";
3861 }
3862 #endif
3863 
3864 void
mld_init(void)3865 mld_init(void)
3866 {
3867 	MLD_PRINTF(("%s: initializing\n", __func__));
3868 
3869 	ip6_initpktopts(&mld_po);
3870 	mld_po.ip6po_hlim = 1;
3871 	mld_po.ip6po_hbh = &mld_ra.hbh;
3872 	mld_po.ip6po_prefer_tempaddr = IP6PO_TEMPADDR_NOTPREFER;
3873 	mld_po.ip6po_flags = IP6PO_DONTFRAG;
3874 	LIST_INIT(&mli_head);
3875 }
3876