1 /*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*-
29 * Copyright (c) 2009 Bruce Simpson.
30 *
31 * Redistribution and use in source and binary forms, with or without
32 * modification, are permitted provided that the following conditions
33 * are met:
34 * 1. Redistributions of source code must retain the above copyright
35 * notice, this list of conditions and the following disclaimer.
36 * 2. Redistributions in binary form must reproduce the above copyright
37 * notice, this list of conditions and the following disclaimer in the
38 * documentation and/or other materials provided with the distribution.
39 * 3. The name of the author may not be used to endorse or promote
40 * products derived from this software without specific prior written
41 * permission.
42 *
43 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
44 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53 * SUCH DAMAGE.
54 */
55
56 /*
57 * Copyright (c) 1988 Stephen Deering.
58 * Copyright (c) 1992, 1993
59 * The Regents of the University of California. All rights reserved.
60 *
61 * This code is derived from software contributed to Berkeley by
62 * Stephen Deering of Stanford University.
63 *
64 * Redistribution and use in source and binary forms, with or without
65 * modification, are permitted provided that the following conditions
66 * are met:
67 * 1. Redistributions of source code must retain the above copyright
68 * notice, this list of conditions and the following disclaimer.
69 * 2. Redistributions in binary form must reproduce the above copyright
70 * notice, this list of conditions and the following disclaimer in the
71 * documentation and/or other materials provided with the distribution.
72 * 3. All advertising materials mentioning features or use of this software
73 * must display the following acknowledgement:
74 * This product includes software developed by the University of
75 * California, Berkeley and its contributors.
76 * 4. Neither the name of the University nor the names of its contributors
77 * may be used to endorse or promote products derived from this software
78 * without specific prior written permission.
79 *
80 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
81 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
82 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
83 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
84 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
85 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
86 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
87 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
88 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
89 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
90 * SUCH DAMAGE.
91 *
92 * @(#)igmp.c 8.1 (Berkeley) 7/19/93
93 */
94 /*
95 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
96 * support for mandatory and extensible security protections. This notice
97 * is included in support of clause 2.2 (b) of the Apple Public License,
98 * Version 2.0.
99 */
100
101 #include <sys/cdefs.h>
102
103 #include <sys/param.h>
104 #include <sys/systm.h>
105 #include <sys/mbuf.h>
106 #include <sys/socket.h>
107 #include <sys/protosw.h>
108 #include <sys/sysctl.h>
109 #include <sys/kernel.h>
110 #include <sys/malloc.h>
111 #include <sys/mcache.h>
112
113 #include <dev/random/randomdev.h>
114
115 #include <kern/zalloc.h>
116
117 #include <net/if.h>
118 #include <net/route.h>
119
120 #include <netinet/in.h>
121 #include <netinet/in_var.h>
122 #include <netinet6/in6_var.h>
123 #include <netinet/ip6.h>
124 #include <netinet6/ip6_var.h>
125 #include <netinet6/scope6_var.h>
126 #include <netinet/icmp6.h>
127 #include <netinet6/mld6.h>
128 #include <netinet6/mld6_var.h>
129
130 /* Lock group and attribute for mld_mtx */
131 static LCK_ATTR_DECLARE(mld_mtx_attr, 0, 0);
132 static LCK_GRP_DECLARE(mld_mtx_grp, "mld_mtx");
133
134 /*
135 * Locking and reference counting:
136 *
137 * mld_mtx mainly protects mli_head. In cases where both mld_mtx and
138 * in6_multihead_lock must be held, the former must be acquired first in order
139 * to maintain lock ordering. It is not a requirement that mld_mtx be
140 * acquired first before in6_multihead_lock, but in case both must be acquired
141 * in succession, the correct lock ordering must be followed.
142 *
143 * Instead of walking the if_multiaddrs list at the interface and returning
144 * the ifma_protospec value of a matching entry, we search the global list
145 * of in6_multi records and find it that way; this is done with in6_multihead
146 * lock held. Doing so avoids the race condition issues that many other BSDs
147 * suffer from (therefore in our implementation, ifma_protospec will never be
148 * NULL for as long as the in6_multi is valid.)
149 *
150 * The above creates a requirement for the in6_multi to stay in in6_multihead
151 * list even after the final MLD leave (in MLDv2 mode) until no longer needs
152 * be retransmitted (this is not required for MLDv1.) In order to handle
153 * this, the request and reference counts of the in6_multi are bumped up when
154 * the state changes to MLD_LEAVING_MEMBER, and later dropped in the timeout
155 * handler. Each in6_multi holds a reference to the underlying mld_ifinfo.
156 *
157 * Thus, the permitted lock order is:
158 *
159 * mld_mtx, in6_multihead_lock, inm6_lock, mli_lock
160 *
161 * Any may be taken independently, but if any are held at the same time,
162 * the above lock order must be followed.
163 */
164 static LCK_MTX_DECLARE_ATTR(mld_mtx, &mld_mtx_grp, &mld_mtx_attr);
165
166 SLIST_HEAD(mld_in6m_relhead, in6_multi);
167
168 static void mli_initvar(struct mld_ifinfo *, struct ifnet *, int);
169 static struct mld_ifinfo *mli_alloc(zalloc_flags_t);
170 static void mli_free(struct mld_ifinfo *);
171 static void mli_delete(const struct ifnet *, struct mld_in6m_relhead *);
172 static void mld_dispatch_packet(struct mbuf *);
173 static void mld_final_leave(struct in6_multi *, struct mld_ifinfo *,
174 struct mld_tparams *);
175 static int mld_handle_state_change(struct in6_multi *, struct mld_ifinfo *,
176 struct mld_tparams *);
177 static int mld_initial_join(struct in6_multi *, struct mld_ifinfo *,
178 struct mld_tparams *, const int);
179 #ifdef MLD_DEBUG
180 static const char * mld_rec_type_to_str(const int);
181 #endif
182 static uint32_t mld_set_version(struct mld_ifinfo *, const int);
183 static void mld_append_relq(struct mld_ifinfo *, struct in6_multi *);
184 static void mld_flush_relq(struct mld_ifinfo *, struct mld_in6m_relhead *);
185 static void mld_dispatch_queue_locked(struct mld_ifinfo *, struct ifqueue *, int);
186 static int mld_v1_input_query(struct ifnet *, const struct ip6_hdr *,
187 /*const*/ struct mld_hdr *);
188 static int mld_v1_input_report(struct ifnet *, struct mbuf *,
189 const struct ip6_hdr *, /*const*/ struct mld_hdr *);
190 static void mld_v1_process_group_timer(struct in6_multi *, const int);
191 static void mld_v1_process_querier_timers(struct mld_ifinfo *);
192 static int mld_v1_transmit_report(struct in6_multi *, const uint8_t);
193 static uint32_t mld_v1_update_group(struct in6_multi *, const int);
194 static void mld_v2_cancel_link_timers(struct mld_ifinfo *);
195 static uint32_t mld_v2_dispatch_general_query(struct mld_ifinfo *);
196 static struct mbuf *
197 mld_v2_encap_report(struct ifnet *, struct mbuf *);
198 static int mld_v2_enqueue_filter_change(struct ifqueue *,
199 struct in6_multi *);
200 static int mld_v2_enqueue_group_record(struct ifqueue *,
201 struct in6_multi *, const int, const int, const int,
202 const int);
203 static int mld_v2_input_query(struct ifnet *, const struct ip6_hdr *,
204 struct mbuf *, const int, const int);
205 static int mld_v2_merge_state_changes(struct in6_multi *,
206 struct ifqueue *);
207 static void mld_v2_process_group_timers(struct mld_ifinfo *,
208 struct ifqueue *, struct ifqueue *,
209 struct in6_multi *, const int);
210 static int mld_v2_process_group_query(struct in6_multi *,
211 int, struct mbuf *, const int);
212 static int sysctl_mld_gsr SYSCTL_HANDLER_ARGS;
213 static int sysctl_mld_ifinfo SYSCTL_HANDLER_ARGS;
214 static int sysctl_mld_v2enable SYSCTL_HANDLER_ARGS;
215
216 static const uint32_t mld_timeout_delay = 1000; /* in milliseconds */
217 static const uint32_t mld_timeout_leeway = 500; /* in millseconds */
218 static bool mld_timeout_run; /* MLD timer is scheduled to run */
219 static bool mld_fast_timeout_run; /* MLD fast timer is scheduled to run */
220 static void mld_timeout(thread_call_param_t, thread_call_param_t);
221 static void mld_sched_timeout(void);
222 static void mld_sched_fast_timeout(void);
223
224 /*
225 * Normative references: RFC 2710, RFC 3590, RFC 3810.
226 */
227 static struct timeval mld_gsrdelay = {.tv_sec = 10, .tv_usec = 0};
228 static LIST_HEAD(, mld_ifinfo) mli_head;
229
230 static int querier_present_timers_running6;
231 static int interface_timers_running6;
232 static int state_change_timers_running6;
233 static int current_state_timers_running6;
234
235 static unsigned int mld_mli_list_genid;
236 /*
237 * Subsystem lock macros.
238 */
239 #define MLD_LOCK() \
240 lck_mtx_lock(&mld_mtx)
241 #define MLD_LOCK_ASSERT_HELD() \
242 LCK_MTX_ASSERT(&mld_mtx, LCK_MTX_ASSERT_OWNED)
243 #define MLD_LOCK_ASSERT_NOTHELD() \
244 LCK_MTX_ASSERT(&mld_mtx, LCK_MTX_ASSERT_NOTOWNED)
245 #define MLD_UNLOCK() \
246 lck_mtx_unlock(&mld_mtx)
247
248 #define MLD_ADD_DETACHED_IN6M(_head, _in6m) { \
249 SLIST_INSERT_HEAD(_head, _in6m, in6m_dtle); \
250 }
251
252 #define MLD_REMOVE_DETACHED_IN6M(_head) { \
253 struct in6_multi *_in6m, *_inm_tmp; \
254 SLIST_FOREACH_SAFE(_in6m, _head, in6m_dtle, _inm_tmp) { \
255 SLIST_REMOVE(_head, _in6m, in6_multi, in6m_dtle); \
256 IN6M_REMREF(_in6m); \
257 } \
258 VERIFY(SLIST_EMPTY(_head)); \
259 }
260
261 static KALLOC_TYPE_DEFINE(mli_zone, struct mld_ifinfo, NET_KT_DEFAULT);
262
263 SYSCTL_DECL(_net_inet6); /* Note: Not in any common header. */
264
265 SYSCTL_NODE(_net_inet6, OID_AUTO, mld, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
266 "IPv6 Multicast Listener Discovery");
267 SYSCTL_PROC(_net_inet6_mld, OID_AUTO, gsrdelay,
268 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
269 &mld_gsrdelay.tv_sec, 0, sysctl_mld_gsr, "I",
270 "Rate limit for MLDv2 Group-and-Source queries in seconds");
271
272 SYSCTL_NODE(_net_inet6_mld, OID_AUTO, ifinfo, CTLFLAG_RD | CTLFLAG_LOCKED,
273 sysctl_mld_ifinfo, "Per-interface MLDv2 state");
274
275 static int mld_v1enable = 1;
276 SYSCTL_INT(_net_inet6_mld, OID_AUTO, v1enable, CTLFLAG_RW | CTLFLAG_LOCKED,
277 &mld_v1enable, 0, "Enable fallback to MLDv1");
278
279 static int mld_v2enable = 1;
280 SYSCTL_PROC(_net_inet6_mld, OID_AUTO, v2enable,
281 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
282 &mld_v2enable, 0, sysctl_mld_v2enable, "I",
283 "Enable MLDv2 (debug purposes only)");
284
285 static int mld_use_allow = 1;
286 SYSCTL_INT(_net_inet6_mld, OID_AUTO, use_allow, CTLFLAG_RW | CTLFLAG_LOCKED,
287 &mld_use_allow, 0, "Use ALLOW/BLOCK for RFC 4604 SSM joins/leaves");
288
289 #ifdef MLD_DEBUG
290 int mld_debug = 0;
291 SYSCTL_INT(_net_inet6_mld, OID_AUTO,
292 debug, CTLFLAG_RW | CTLFLAG_LOCKED, &mld_debug, 0, "");
293 #endif
294 /*
295 * Packed Router Alert option structure declaration.
296 */
297 struct mld_raopt {
298 struct ip6_hbh hbh;
299 struct ip6_opt pad;
300 struct ip6_opt_router ra;
301 } __packed;
302
303 /*
304 * Router Alert hop-by-hop option header.
305 */
306 static struct mld_raopt mld_ra = {
307 .hbh = { .ip6h_nxt = 0, .ip6h_len = 0 },
308 .pad = { .ip6o_type = IP6OPT_PADN, .ip6o_len = 0 },
309 .ra = {
310 .ip6or_type = (u_int8_t)IP6OPT_ROUTER_ALERT,
311 .ip6or_len = (u_int8_t)(IP6OPT_RTALERT_LEN - 2),
312 .ip6or_value = {((IP6OPT_RTALERT_MLD >> 8) & 0xFF),
313 (IP6OPT_RTALERT_MLD & 0xFF) }
314 }
315 };
316 static struct ip6_pktopts mld_po;
317
318 /* Store MLDv2 record count in the module private scratch space */
319 #define vt_nrecs pkt_mpriv.__mpriv_u.__mpriv32[0].__mpriv32_u.__val16[0]
320
321 static __inline void
mld_save_context(struct mbuf * m,struct ifnet * ifp)322 mld_save_context(struct mbuf *m, struct ifnet *ifp)
323 {
324 m->m_pkthdr.rcvif = ifp;
325 }
326
327 static __inline void
mld_scrub_context(struct mbuf * m)328 mld_scrub_context(struct mbuf *m)
329 {
330 m->m_pkthdr.rcvif = NULL;
331 }
332
333 /*
334 * Restore context from a queued output chain.
335 * Return saved ifp.
336 */
337 static __inline struct ifnet *
mld_restore_context(struct mbuf * m)338 mld_restore_context(struct mbuf *m)
339 {
340 return m->m_pkthdr.rcvif;
341 }
342
343 /*
344 * Retrieve or set threshold between group-source queries in seconds.
345 */
346 static int
347 sysctl_mld_gsr SYSCTL_HANDLER_ARGS
348 {
349 #pragma unused(arg1, arg2)
350 int error;
351 int i;
352
353 MLD_LOCK();
354
355 i = (int)mld_gsrdelay.tv_sec;
356
357 error = sysctl_handle_int(oidp, &i, 0, req);
358 if (error || !req->newptr) {
359 goto out_locked;
360 }
361
362 if (i < -1 || i >= 60) {
363 error = EINVAL;
364 goto out_locked;
365 }
366
367 mld_gsrdelay.tv_sec = i;
368
369 out_locked:
370 MLD_UNLOCK();
371 return error;
372 }
373 /*
374 * Expose struct mld_ifinfo to userland, keyed by ifindex.
375 * For use by ifmcstat(8).
376 *
377 */
378 static int
379 sysctl_mld_ifinfo SYSCTL_HANDLER_ARGS
380 {
381 #pragma unused(oidp)
382 int *name;
383 int error;
384 u_int namelen;
385 struct ifnet *ifp;
386 struct mld_ifinfo *mli;
387 struct mld_ifinfo_u mli_u;
388
389 name = (int *)arg1;
390 namelen = arg2;
391
392 if (req->newptr != USER_ADDR_NULL) {
393 return EPERM;
394 }
395
396 if (namelen != 1) {
397 return EINVAL;
398 }
399
400 MLD_LOCK();
401
402 if (name[0] <= 0 || name[0] > (u_int)if_index) {
403 error = ENOENT;
404 goto out_locked;
405 }
406
407 error = ENOENT;
408
409 ifnet_head_lock_shared();
410 ifp = ifindex2ifnet[name[0]];
411 ifnet_head_done();
412 if (ifp == NULL) {
413 goto out_locked;
414 }
415
416 bzero(&mli_u, sizeof(mli_u));
417
418 LIST_FOREACH(mli, &mli_head, mli_link) {
419 MLI_LOCK(mli);
420 if (ifp != mli->mli_ifp) {
421 MLI_UNLOCK(mli);
422 continue;
423 }
424
425 mli_u.mli_ifindex = mli->mli_ifp->if_index;
426 mli_u.mli_version = mli->mli_version;
427 mli_u.mli_v1_timer = mli->mli_v1_timer;
428 mli_u.mli_v2_timer = mli->mli_v2_timer;
429 mli_u.mli_flags = mli->mli_flags;
430 mli_u.mli_rv = mli->mli_rv;
431 mli_u.mli_qi = mli->mli_qi;
432 mli_u.mli_qri = mli->mli_qri;
433 mli_u.mli_uri = mli->mli_uri;
434 MLI_UNLOCK(mli);
435
436 error = SYSCTL_OUT(req, &mli_u, sizeof(mli_u));
437 break;
438 }
439
440 out_locked:
441 MLD_UNLOCK();
442 return error;
443 }
444
445 static int
446 sysctl_mld_v2enable SYSCTL_HANDLER_ARGS
447 {
448 #pragma unused(arg1, arg2)
449 int error;
450 int i;
451 struct mld_ifinfo *mli;
452 struct mld_tparams mtp = { .qpt = 0, .it = 0, .cst = 0, .sct = 0 };
453
454 MLD_LOCK();
455
456 i = mld_v2enable;
457
458 error = sysctl_handle_int(oidp, &i, 0, req);
459 if (error || !req->newptr) {
460 goto out_locked;
461 }
462
463 if (i < 0 || i > 1) {
464 error = EINVAL;
465 goto out_locked;
466 }
467
468 mld_v2enable = i;
469 /*
470 * If we enabled v2, the state transition will take care of upgrading
471 * the MLD version back to v2. Otherwise, we have to explicitly
472 * downgrade. Note that this functionality is to be used for debugging.
473 */
474 if (mld_v2enable == 1) {
475 goto out_locked;
476 }
477
478 LIST_FOREACH(mli, &mli_head, mli_link) {
479 MLI_LOCK(mli);
480 if (mld_set_version(mli, MLD_VERSION_1) > 0) {
481 mtp.qpt = 1;
482 }
483 MLI_UNLOCK(mli);
484 }
485
486 out_locked:
487 MLD_UNLOCK();
488
489 mld_set_timeout(&mtp);
490
491 return error;
492 }
493
494 /*
495 * Dispatch an entire queue of pending packet chains.
496 *
497 * Must not be called with in6m_lock held.
498 * XXX This routine unlocks MLD global lock and also mli locks.
499 * Make sure that the calling routine takes reference on the mli
500 * before calling this routine.
501 * Also if we are traversing mli_head, remember to check for
502 * mli list generation count and restart the loop if generation count
503 * has changed.
504 */
505 static void
mld_dispatch_queue_locked(struct mld_ifinfo * mli,struct ifqueue * ifq,int limit)506 mld_dispatch_queue_locked(struct mld_ifinfo *mli, struct ifqueue *ifq, int limit)
507 {
508 struct mbuf *m;
509
510 MLD_LOCK_ASSERT_HELD();
511
512 if (mli != NULL) {
513 MLI_LOCK_ASSERT_HELD(mli);
514 }
515
516 for (;;) {
517 IF_DEQUEUE(ifq, m);
518 if (m == NULL) {
519 break;
520 }
521 MLD_PRINTF(("%s: dispatch 0x%llx from 0x%llx\n", __func__,
522 (uint64_t)VM_KERNEL_ADDRPERM(ifq),
523 (uint64_t)VM_KERNEL_ADDRPERM(m)));
524
525 if (mli != NULL) {
526 MLI_UNLOCK(mli);
527 }
528 MLD_UNLOCK();
529
530 mld_dispatch_packet(m);
531
532 MLD_LOCK();
533 if (mli != NULL) {
534 MLI_LOCK(mli);
535 }
536
537 if (--limit == 0) {
538 break;
539 }
540 }
541
542 if (mli != NULL) {
543 MLI_LOCK_ASSERT_HELD(mli);
544 }
545 }
546
547 /*
548 * Filter outgoing MLD report state by group.
549 *
550 * Reports are ALWAYS suppressed for ALL-HOSTS (ff02::1)
551 * and node-local addresses. However, kernel and socket consumers
552 * always embed the KAME scope ID in the address provided, so strip it
553 * when performing comparison.
554 * Note: This is not the same as the *multicast* scope.
555 *
556 * Return zero if the given group is one for which MLD reports
557 * should be suppressed, or non-zero if reports should be issued.
558 */
559 static __inline__ int
mld_is_addr_reported(const struct in6_addr * addr)560 mld_is_addr_reported(const struct in6_addr *addr)
561 {
562 VERIFY(IN6_IS_ADDR_MULTICAST(addr));
563
564 if (IPV6_ADDR_MC_SCOPE(addr) == IPV6_ADDR_SCOPE_NODELOCAL) {
565 return 0;
566 }
567
568 if (IPV6_ADDR_MC_SCOPE(addr) == IPV6_ADDR_SCOPE_LINKLOCAL && !IN6_IS_ADDR_UNICAST_BASED_MULTICAST(addr)) {
569 struct in6_addr tmp = *addr;
570 in6_clearscope(&tmp);
571 if (IN6_ARE_ADDR_EQUAL(&tmp, &in6addr_linklocal_allnodes)) {
572 return 0;
573 }
574 }
575
576 return 1;
577 }
578
579 /*
580 * Attach MLD when PF_INET6 is attached to an interface.
581 */
582 struct mld_ifinfo *
mld_domifattach(struct ifnet * ifp,zalloc_flags_t how)583 mld_domifattach(struct ifnet *ifp, zalloc_flags_t how)
584 {
585 struct mld_ifinfo *mli;
586
587 MLD_PRINTF(("%s: called for ifp 0x%llx(%s)\n", __func__,
588 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
589
590 mli = mli_alloc(how);
591 if (mli == NULL) {
592 return NULL;
593 }
594
595 MLD_LOCK();
596
597 MLI_LOCK(mli);
598 mli_initvar(mli, ifp, 0);
599 mli->mli_debug |= IFD_ATTACHED;
600 MLI_ADDREF_LOCKED(mli); /* hold a reference for mli_head */
601 MLI_ADDREF_LOCKED(mli); /* hold a reference for caller */
602 MLI_UNLOCK(mli);
603 ifnet_lock_shared(ifp);
604 mld6_initsilent(ifp, mli);
605 ifnet_lock_done(ifp);
606
607 LIST_INSERT_HEAD(&mli_head, mli, mli_link);
608 mld_mli_list_genid++;
609
610 MLD_UNLOCK();
611
612 MLD_PRINTF(("%s: allocate mld_ifinfo for ifp 0x%llx(%s)\n",
613 __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
614
615 return mli;
616 }
617
618 /*
619 * Attach MLD when PF_INET6 is reattached to an interface. Caller is
620 * expected to have an outstanding reference to the mli.
621 */
622 void
mld_domifreattach(struct mld_ifinfo * mli)623 mld_domifreattach(struct mld_ifinfo *mli)
624 {
625 struct ifnet *ifp;
626
627 MLD_LOCK();
628
629 MLI_LOCK(mli);
630 VERIFY(!(mli->mli_debug & IFD_ATTACHED));
631 ifp = mli->mli_ifp;
632 VERIFY(ifp != NULL);
633 mli_initvar(mli, ifp, 1);
634 mli->mli_debug |= IFD_ATTACHED;
635 MLI_ADDREF_LOCKED(mli); /* hold a reference for mli_head */
636 MLI_UNLOCK(mli);
637 ifnet_lock_shared(ifp);
638 mld6_initsilent(ifp, mli);
639 ifnet_lock_done(ifp);
640
641 LIST_INSERT_HEAD(&mli_head, mli, mli_link);
642 mld_mli_list_genid++;
643
644 MLD_UNLOCK();
645
646 MLD_PRINTF(("%s: reattached mld_ifinfo for ifp 0x%llx(%s)\n",
647 __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
648 }
649
650 /*
651 * Hook for domifdetach.
652 */
653 void
mld_domifdetach(struct ifnet * ifp)654 mld_domifdetach(struct ifnet *ifp)
655 {
656 SLIST_HEAD(, in6_multi) in6m_dthead;
657
658 SLIST_INIT(&in6m_dthead);
659
660 MLD_PRINTF(("%s: called for ifp 0x%llx(%s)\n", __func__,
661 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
662
663 MLD_LOCK();
664 mli_delete(ifp, (struct mld_in6m_relhead *)&in6m_dthead);
665 MLD_UNLOCK();
666
667 /* Now that we're dropped all locks, release detached records */
668 MLD_REMOVE_DETACHED_IN6M(&in6m_dthead);
669 }
670
671 /*
672 * Called at interface detach time. Note that we only flush all deferred
673 * responses and record releases; all remaining inm records and their source
674 * entries related to this interface are left intact, in order to handle
675 * the reattach case.
676 */
677 static void
mli_delete(const struct ifnet * ifp,struct mld_in6m_relhead * in6m_dthead)678 mli_delete(const struct ifnet *ifp, struct mld_in6m_relhead *in6m_dthead)
679 {
680 struct mld_ifinfo *mli, *tmli;
681
682 MLD_LOCK_ASSERT_HELD();
683
684 LIST_FOREACH_SAFE(mli, &mli_head, mli_link, tmli) {
685 MLI_LOCK(mli);
686 if (mli->mli_ifp == ifp) {
687 /*
688 * Free deferred General Query responses.
689 */
690 IF_DRAIN(&mli->mli_gq);
691 IF_DRAIN(&mli->mli_v1q);
692 mld_flush_relq(mli, in6m_dthead);
693 mli->mli_debug &= ~IFD_ATTACHED;
694 MLI_UNLOCK(mli);
695
696 LIST_REMOVE(mli, mli_link);
697 MLI_REMREF(mli); /* release mli_head reference */
698 mld_mli_list_genid++;
699 return;
700 }
701 MLI_UNLOCK(mli);
702 }
703 panic("%s: mld_ifinfo not found for ifp %p(%s)", __func__,
704 ifp, ifp->if_xname);
705 }
706
707 __private_extern__ void
mld6_initsilent(struct ifnet * ifp,struct mld_ifinfo * mli)708 mld6_initsilent(struct ifnet *ifp, struct mld_ifinfo *mli)
709 {
710 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
711
712 MLI_LOCK_ASSERT_NOTHELD(mli);
713 MLI_LOCK(mli);
714 if (!(ifp->if_flags & IFF_MULTICAST) &&
715 (ifp->if_eflags & (IFEF_IPV6_ND6ALT | IFEF_LOCALNET_PRIVATE))) {
716 mli->mli_flags |= MLIF_SILENT;
717 } else {
718 mli->mli_flags &= ~MLIF_SILENT;
719 }
720 MLI_UNLOCK(mli);
721 }
722
723 static void
mli_initvar(struct mld_ifinfo * mli,struct ifnet * ifp,int reattach)724 mli_initvar(struct mld_ifinfo *mli, struct ifnet *ifp, int reattach)
725 {
726 MLI_LOCK_ASSERT_HELD(mli);
727
728 mli->mli_ifp = ifp;
729 if (mld_v2enable) {
730 mli->mli_version = MLD_VERSION_2;
731 } else {
732 mli->mli_version = MLD_VERSION_1;
733 }
734 mli->mli_flags = 0;
735 mli->mli_rv = MLD_RV_INIT;
736 mli->mli_qi = MLD_QI_INIT;
737 mli->mli_qri = MLD_QRI_INIT;
738 mli->mli_uri = MLD_URI_INIT;
739
740 if (mld_use_allow) {
741 mli->mli_flags |= MLIF_USEALLOW;
742 }
743 if (!reattach) {
744 SLIST_INIT(&mli->mli_relinmhead);
745 }
746
747 /*
748 * Responses to general queries are subject to bounds.
749 */
750 mli->mli_gq.ifq_maxlen = MLD_MAX_RESPONSE_PACKETS;
751 mli->mli_v1q.ifq_maxlen = MLD_MAX_RESPONSE_PACKETS;
752 }
753
754 static struct mld_ifinfo *
mli_alloc(zalloc_flags_t how)755 mli_alloc(zalloc_flags_t how)
756 {
757 struct mld_ifinfo *mli = zalloc_flags(mli_zone, how | Z_ZERO);
758 if (mli != NULL) {
759 lck_mtx_init(&mli->mli_lock, &mld_mtx_grp, &mld_mtx_attr);
760 mli->mli_debug |= IFD_ALLOC;
761 }
762 return mli;
763 }
764
765 static void
mli_free(struct mld_ifinfo * mli)766 mli_free(struct mld_ifinfo *mli)
767 {
768 MLI_LOCK(mli);
769 if (mli->mli_debug & IFD_ATTACHED) {
770 panic("%s: attached mli=%p is being freed", __func__, mli);
771 /* NOTREACHED */
772 } else if (mli->mli_ifp != NULL) {
773 panic("%s: ifp not NULL for mli=%p", __func__, mli);
774 /* NOTREACHED */
775 } else if (!(mli->mli_debug & IFD_ALLOC)) {
776 panic("%s: mli %p cannot be freed", __func__, mli);
777 /* NOTREACHED */
778 } else if (mli->mli_refcnt != 0) {
779 panic("%s: non-zero refcnt mli=%p", __func__, mli);
780 /* NOTREACHED */
781 }
782 mli->mli_debug &= ~IFD_ALLOC;
783 MLI_UNLOCK(mli);
784
785 lck_mtx_destroy(&mli->mli_lock, &mld_mtx_grp);
786 zfree(mli_zone, mli);
787 }
788
789 void
mli_addref(struct mld_ifinfo * mli,int locked)790 mli_addref(struct mld_ifinfo *mli, int locked)
791 {
792 if (!locked) {
793 MLI_LOCK_SPIN(mli);
794 } else {
795 MLI_LOCK_ASSERT_HELD(mli);
796 }
797
798 if (++mli->mli_refcnt == 0) {
799 panic("%s: mli=%p wraparound refcnt", __func__, mli);
800 /* NOTREACHED */
801 }
802 if (!locked) {
803 MLI_UNLOCK(mli);
804 }
805 }
806
807 void
mli_remref(struct mld_ifinfo * mli)808 mli_remref(struct mld_ifinfo *mli)
809 {
810 SLIST_HEAD(, in6_multi) in6m_dthead;
811 struct ifnet *ifp;
812
813 MLI_LOCK_SPIN(mli);
814
815 if (mli->mli_refcnt == 0) {
816 panic("%s: mli=%p negative refcnt", __func__, mli);
817 /* NOTREACHED */
818 }
819
820 --mli->mli_refcnt;
821 if (mli->mli_refcnt > 0) {
822 MLI_UNLOCK(mli);
823 return;
824 }
825
826 ifp = mli->mli_ifp;
827 mli->mli_ifp = NULL;
828 IF_DRAIN(&mli->mli_gq);
829 IF_DRAIN(&mli->mli_v1q);
830 SLIST_INIT(&in6m_dthead);
831 mld_flush_relq(mli, (struct mld_in6m_relhead *)&in6m_dthead);
832 MLI_UNLOCK(mli);
833
834 /* Now that we're dropped all locks, release detached records */
835 MLD_REMOVE_DETACHED_IN6M(&in6m_dthead);
836
837 MLD_PRINTF(("%s: freeing mld_ifinfo for ifp 0x%llx(%s)\n",
838 __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
839
840 mli_free(mli);
841 }
842
843 /*
844 * Process a received MLDv1 general or address-specific query.
845 * Assumes that the query header has been pulled up to sizeof(mld_hdr).
846 *
847 * NOTE: Can't be fully const correct as we temporarily embed scope ID in
848 * mld_addr. This is OK as we own the mbuf chain.
849 */
850 static int
mld_v1_input_query(struct ifnet * ifp,const struct ip6_hdr * ip6,struct mld_hdr * mld)851 mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
852 /*const*/ struct mld_hdr *mld)
853 {
854 struct mld_ifinfo *mli;
855 struct in6_multi *inm;
856 int err = 0, is_general_query;
857 uint16_t timer;
858 struct mld_tparams mtp = { .qpt = 0, .it = 0, .cst = 0, .sct = 0 };
859
860 MLD_LOCK_ASSERT_NOTHELD();
861
862 is_general_query = 0;
863
864 if (!mld_v1enable) {
865 MLD_PRINTF(("%s: ignore v1 query %s on ifp 0x%llx(%s)\n",
866 __func__, ip6_sprintf(&mld->mld_addr),
867 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
868 goto done;
869 }
870
871 /*
872 * RFC3810 Section 6.2: MLD queries must originate from
873 * a router's link-local address.
874 */
875 if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
876 MLD_PRINTF(("%s: ignore v1 query src %s on ifp 0x%llx(%s)\n",
877 __func__, ip6_sprintf(&ip6->ip6_src),
878 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
879 goto done;
880 }
881
882 /*
883 * Do address field validation upfront before we accept
884 * the query.
885 */
886 if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) {
887 /*
888 * MLDv1 General Query.
889 * If this was not sent to the all-nodes group, ignore it.
890 */
891 struct in6_addr dst;
892
893 dst = ip6->ip6_dst;
894 in6_clearscope(&dst);
895 if (!IN6_ARE_ADDR_EQUAL(&dst, &in6addr_linklocal_allnodes)) {
896 err = EINVAL;
897 goto done;
898 }
899 is_general_query = 1;
900 } else {
901 /*
902 * Embed scope ID of receiving interface in MLD query for
903 * lookup whilst we don't hold other locks.
904 */
905 (void)in6_setscope(&mld->mld_addr, ifp, NULL);
906 }
907
908 /*
909 * Switch to MLDv1 host compatibility mode.
910 */
911 mli = MLD_IFINFO(ifp);
912 VERIFY(mli != NULL);
913
914 MLI_LOCK(mli);
915 mtp.qpt = mld_set_version(mli, MLD_VERSION_1);
916 MLI_UNLOCK(mli);
917
918 timer = ntohs(mld->mld_maxdelay) / MLD_TIMER_SCALE;
919 if (timer == 0) {
920 timer = 1;
921 }
922
923 if (is_general_query) {
924 struct in6_multistep step;
925
926 MLD_PRINTF(("%s: process v1 general query on ifp 0x%llx(%s)\n",
927 __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
928 /*
929 * For each reporting group joined on this
930 * interface, kick the report timer.
931 */
932 in6_multihead_lock_shared();
933 IN6_FIRST_MULTI(step, inm);
934 while (inm != NULL) {
935 IN6M_LOCK(inm);
936 if (inm->in6m_ifp == ifp) {
937 mtp.cst += mld_v1_update_group(inm, timer);
938 }
939 IN6M_UNLOCK(inm);
940 IN6_NEXT_MULTI(step, inm);
941 }
942 in6_multihead_lock_done();
943 } else {
944 /*
945 * MLDv1 Group-Specific Query.
946 * If this is a group-specific MLDv1 query, we need only
947 * look up the single group to process it.
948 */
949 in6_multihead_lock_shared();
950 IN6_LOOKUP_MULTI(&mld->mld_addr, ifp, inm);
951 in6_multihead_lock_done();
952
953 if (inm != NULL) {
954 IN6M_LOCK(inm);
955 MLD_PRINTF(("%s: process v1 query %s on "
956 "ifp 0x%llx(%s)\n", __func__,
957 ip6_sprintf(&mld->mld_addr),
958 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
959 mtp.cst = mld_v1_update_group(inm, timer);
960 IN6M_UNLOCK(inm);
961 IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
962 }
963 /* XXX Clear embedded scope ID as userland won't expect it. */
964 in6_clearscope(&mld->mld_addr);
965 }
966 done:
967 mld_set_timeout(&mtp);
968
969 return err;
970 }
971
972 /*
973 * Update the report timer on a group in response to an MLDv1 query.
974 *
975 * If we are becoming the reporting member for this group, start the timer.
976 * If we already are the reporting member for this group, and timer is
977 * below the threshold, reset it.
978 *
979 * We may be updating the group for the first time since we switched
980 * to MLDv2. If we are, then we must clear any recorded source lists,
981 * and transition to REPORTING state; the group timer is overloaded
982 * for group and group-source query responses.
983 *
984 * Unlike MLDv2, the delay per group should be jittered
985 * to avoid bursts of MLDv1 reports.
986 */
987 static uint32_t
mld_v1_update_group(struct in6_multi * inm,const int timer)988 mld_v1_update_group(struct in6_multi *inm, const int timer)
989 {
990 IN6M_LOCK_ASSERT_HELD(inm);
991
992 MLD_PRINTF(("%s: %s/%s timer=%d\n", __func__,
993 ip6_sprintf(&inm->in6m_addr),
994 if_name(inm->in6m_ifp), timer));
995
996 switch (inm->in6m_state) {
997 case MLD_NOT_MEMBER:
998 case MLD_SILENT_MEMBER:
999 break;
1000 case MLD_REPORTING_MEMBER:
1001 if (inm->in6m_timer != 0 &&
1002 inm->in6m_timer <= timer) {
1003 MLD_PRINTF(("%s: REPORTING and timer running, "
1004 "skipping.\n", __func__));
1005 break;
1006 }
1007 OS_FALLTHROUGH;
1008 case MLD_SG_QUERY_PENDING_MEMBER:
1009 case MLD_G_QUERY_PENDING_MEMBER:
1010 case MLD_IDLE_MEMBER:
1011 case MLD_LAZY_MEMBER:
1012 case MLD_AWAKENING_MEMBER:
1013 MLD_PRINTF(("%s: ->REPORTING\n", __func__));
1014 inm->in6m_state = MLD_REPORTING_MEMBER;
1015 inm->in6m_timer = MLD_RANDOM_DELAY(timer);
1016 break;
1017 case MLD_SLEEPING_MEMBER:
1018 MLD_PRINTF(("%s: ->AWAKENING\n", __func__));
1019 inm->in6m_state = MLD_AWAKENING_MEMBER;
1020 break;
1021 case MLD_LEAVING_MEMBER:
1022 break;
1023 }
1024
1025 return inm->in6m_timer;
1026 }
1027
1028 /*
1029 * Process a received MLDv2 general, group-specific or
1030 * group-and-source-specific query.
1031 *
1032 * Assumes that the query header has been pulled up to sizeof(mldv2_query).
1033 *
1034 * Return 0 if successful, otherwise an appropriate error code is returned.
1035 */
1036 static int
mld_v2_input_query(struct ifnet * ifp,const struct ip6_hdr * ip6,struct mbuf * m,const int off,const int icmp6len)1037 mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
1038 struct mbuf *m, const int off, const int icmp6len)
1039 {
1040 struct mld_ifinfo *mli;
1041 struct mldv2_query *mld;
1042 struct in6_multi *inm;
1043 uint32_t maxdelay, nsrc, qqi, timer;
1044 int err = 0, is_general_query;
1045 uint8_t qrv;
1046 struct mld_tparams mtp = { .qpt = 0, .it = 0, .cst = 0, .sct = 0 };
1047
1048 MLD_LOCK_ASSERT_NOTHELD();
1049
1050 is_general_query = 0;
1051
1052 if (!mld_v2enable) {
1053 MLD_PRINTF(("%s: ignore v2 query %s on ifp 0x%llx(%s)\n",
1054 __func__, ip6_sprintf(&ip6->ip6_src),
1055 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1056 goto done;
1057 }
1058
1059 /*
1060 * RFC3810 Section 6.2: MLD queries must originate from
1061 * a router's link-local address.
1062 */
1063 if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
1064 MLD_PRINTF(("%s: ignore v1 query src %s on ifp 0x%llx(%s)\n",
1065 __func__, ip6_sprintf(&ip6->ip6_src),
1066 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1067 goto done;
1068 }
1069
1070 MLD_PRINTF(("%s: input v2 query on ifp 0x%llx(%s)\n", __func__,
1071 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1072
1073 mld = (struct mldv2_query *)(mtod(m, uint8_t *) + off);
1074
1075 maxdelay = ntohs(mld->mld_maxdelay); /* in 1/10ths of a second */
1076 if (maxdelay > SHRT_MAX) {
1077 maxdelay = (MLD_MRC_MANT((uint16_t)maxdelay) | 0x1000) <<
1078 (MLD_MRC_EXP((uint16_t)maxdelay) + 3);
1079 }
1080 timer = maxdelay / MLD_TIMER_SCALE;
1081 if (timer == 0) {
1082 timer = 1;
1083 }
1084
1085 qrv = MLD_QRV(mld->mld_misc);
1086 if (qrv < 2) {
1087 MLD_PRINTF(("%s: clamping qrv %d to %d\n", __func__,
1088 qrv, MLD_RV_INIT));
1089 qrv = MLD_RV_INIT;
1090 }
1091
1092 qqi = mld->mld_qqi;
1093 if (qqi >= 128) {
1094 qqi = MLD_QQIC_MANT(mld->mld_qqi) <<
1095 (MLD_QQIC_EXP(mld->mld_qqi) + 3);
1096 }
1097
1098 nsrc = ntohs(mld->mld_numsrc);
1099 if (nsrc > MLD_MAX_GS_SOURCES) {
1100 err = EMSGSIZE;
1101 goto done;
1102 }
1103 if (icmp6len < sizeof(struct mldv2_query) +
1104 (nsrc * sizeof(struct in6_addr))) {
1105 err = EMSGSIZE;
1106 goto done;
1107 }
1108
1109 /*
1110 * Do further input validation upfront to avoid resetting timers
1111 * should we need to discard this query.
1112 */
1113 if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) {
1114 /*
1115 * A general query with a source list has undefined
1116 * behaviour; discard it.
1117 */
1118 if (nsrc > 0) {
1119 err = EINVAL;
1120 goto done;
1121 }
1122 is_general_query = 1;
1123 } else {
1124 /*
1125 * Embed scope ID of receiving interface in MLD query for
1126 * lookup whilst we don't hold other locks (due to KAME
1127 * locking lameness). We own this mbuf chain just now.
1128 */
1129 (void)in6_setscope(&mld->mld_addr, ifp, NULL);
1130 }
1131
1132 mli = MLD_IFINFO(ifp);
1133 VERIFY(mli != NULL);
1134
1135 MLI_LOCK(mli);
1136 /*
1137 * Discard the v2 query if we're in Compatibility Mode.
1138 * The RFC is pretty clear that hosts need to stay in MLDv1 mode
1139 * until the Old Version Querier Present timer expires.
1140 */
1141 if (mli->mli_version != MLD_VERSION_2) {
1142 MLI_UNLOCK(mli);
1143 goto done;
1144 }
1145
1146 mtp.qpt = mld_set_version(mli, MLD_VERSION_2);
1147 mli->mli_rv = qrv;
1148 mli->mli_qi = qqi;
1149 mli->mli_qri = MAX(timer, MLD_QRI_MIN);
1150
1151 MLD_PRINTF(("%s: qrv %d qi %d qri %d\n", __func__, mli->mli_rv,
1152 mli->mli_qi, mli->mli_qri));
1153
1154 if (is_general_query) {
1155 /*
1156 * MLDv2 General Query.
1157 *
1158 * Schedule a current-state report on this ifp for
1159 * all groups, possibly containing source lists.
1160 *
1161 * If there is a pending General Query response
1162 * scheduled earlier than the selected delay, do
1163 * not schedule any other reports.
1164 * Otherwise, reset the interface timer.
1165 */
1166 MLD_PRINTF(("%s: process v2 general query on ifp 0x%llx(%s)\n",
1167 __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1168 if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer) {
1169 mtp.it = mli->mli_v2_timer = MLD_RANDOM_DELAY(timer);
1170 }
1171 MLI_UNLOCK(mli);
1172 } else {
1173 MLI_UNLOCK(mli);
1174 /*
1175 * MLDv2 Group-specific or Group-and-source-specific Query.
1176 *
1177 * Group-source-specific queries are throttled on
1178 * a per-group basis to defeat denial-of-service attempts.
1179 * Queries for groups we are not a member of on this
1180 * link are simply ignored.
1181 */
1182 in6_multihead_lock_shared();
1183 IN6_LOOKUP_MULTI(&mld->mld_addr, ifp, inm);
1184 in6_multihead_lock_done();
1185 if (inm == NULL) {
1186 goto done;
1187 }
1188
1189 IN6M_LOCK(inm);
1190 if (nsrc > 0) {
1191 if (!ratecheck(&inm->in6m_lastgsrtv,
1192 &mld_gsrdelay)) {
1193 MLD_PRINTF(("%s: GS query throttled.\n",
1194 __func__));
1195 IN6M_UNLOCK(inm);
1196 IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
1197 goto done;
1198 }
1199 }
1200 MLD_PRINTF(("%s: process v2 group query on ifp 0x%llx(%s)\n",
1201 __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1202 /*
1203 * If there is a pending General Query response
1204 * scheduled sooner than the selected delay, no
1205 * further report need be scheduled.
1206 * Otherwise, prepare to respond to the
1207 * group-specific or group-and-source query.
1208 */
1209 MLI_LOCK(mli);
1210 mtp.it = mli->mli_v2_timer;
1211 MLI_UNLOCK(mli);
1212 if (mtp.it == 0 || mtp.it >= timer) {
1213 (void) mld_v2_process_group_query(inm, timer, m, off);
1214 mtp.cst = inm->in6m_timer;
1215 }
1216 IN6M_UNLOCK(inm);
1217 IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
1218 /* XXX Clear embedded scope ID as userland won't expect it. */
1219 in6_clearscope(&mld->mld_addr);
1220 }
1221 done:
1222 if (mtp.it > 0) {
1223 MLD_PRINTF(("%s: v2 general query response scheduled in "
1224 "T+%d seconds on ifp 0x%llx(%s)\n", __func__, mtp.it,
1225 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1226 }
1227 mld_set_timeout(&mtp);
1228
1229 return err;
1230 }
1231
1232 /*
1233 * Process a recieved MLDv2 group-specific or group-and-source-specific
1234 * query.
1235 * Return <0 if any error occured. Currently this is ignored.
1236 */
1237 static int
mld_v2_process_group_query(struct in6_multi * inm,int timer,struct mbuf * m0,const int off)1238 mld_v2_process_group_query(struct in6_multi *inm, int timer, struct mbuf *m0,
1239 const int off)
1240 {
1241 struct mldv2_query *mld;
1242 int retval;
1243 uint16_t nsrc;
1244
1245 IN6M_LOCK_ASSERT_HELD(inm);
1246
1247 retval = 0;
1248 mld = (struct mldv2_query *)(mtod(m0, uint8_t *) + off);
1249
1250 switch (inm->in6m_state) {
1251 case MLD_NOT_MEMBER:
1252 case MLD_SILENT_MEMBER:
1253 case MLD_SLEEPING_MEMBER:
1254 case MLD_LAZY_MEMBER:
1255 case MLD_AWAKENING_MEMBER:
1256 case MLD_IDLE_MEMBER:
1257 case MLD_LEAVING_MEMBER:
1258 return retval;
1259 case MLD_REPORTING_MEMBER:
1260 case MLD_G_QUERY_PENDING_MEMBER:
1261 case MLD_SG_QUERY_PENDING_MEMBER:
1262 break;
1263 }
1264
1265 nsrc = ntohs(mld->mld_numsrc);
1266
1267 /*
1268 * Deal with group-specific queries upfront.
1269 * If any group query is already pending, purge any recorded
1270 * source-list state if it exists, and schedule a query response
1271 * for this group-specific query.
1272 */
1273 if (nsrc == 0) {
1274 if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER ||
1275 inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER) {
1276 in6m_clear_recorded(inm);
1277 timer = min(inm->in6m_timer, timer);
1278 }
1279 inm->in6m_state = MLD_G_QUERY_PENDING_MEMBER;
1280 inm->in6m_timer = MLD_RANDOM_DELAY(timer);
1281 return retval;
1282 }
1283
1284 /*
1285 * Deal with the case where a group-and-source-specific query has
1286 * been received but a group-specific query is already pending.
1287 */
1288 if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER) {
1289 timer = min(inm->in6m_timer, timer);
1290 inm->in6m_timer = MLD_RANDOM_DELAY(timer);
1291 return retval;
1292 }
1293
1294 /*
1295 * Finally, deal with the case where a group-and-source-specific
1296 * query has been received, where a response to a previous g-s-r
1297 * query exists, or none exists.
1298 * In this case, we need to parse the source-list which the Querier
1299 * has provided us with and check if we have any source list filter
1300 * entries at T1 for these sources. If we do not, there is no need
1301 * schedule a report and the query may be dropped.
1302 * If we do, we must record them and schedule a current-state
1303 * report for those sources.
1304 */
1305 if (inm->in6m_nsrc > 0) {
1306 struct mbuf *m;
1307 struct in6_addr addr;
1308 int i, nrecorded;
1309 int soff;
1310
1311 m = m0;
1312 soff = off + sizeof(struct mldv2_query);
1313 nrecorded = 0;
1314 for (i = 0; i < nsrc; i++) {
1315 m_copydata(m, soff, sizeof(addr), &addr);
1316 retval = in6m_record_source(inm, &addr);
1317 if (retval < 0) {
1318 break;
1319 }
1320 nrecorded += retval;
1321 soff += sizeof(struct in6_addr);
1322
1323 while (m && (soff >= m->m_len)) {
1324 soff -= m->m_len;
1325 m = m->m_next;
1326 }
1327
1328 /* should not be possible: */
1329 if (m == NULL) {
1330 break;
1331 }
1332 }
1333 if (nrecorded > 0) {
1334 MLD_PRINTF(("%s: schedule response to SG query\n",
1335 __func__));
1336 inm->in6m_state = MLD_SG_QUERY_PENDING_MEMBER;
1337 inm->in6m_timer = MLD_RANDOM_DELAY(timer);
1338 }
1339 }
1340
1341 return retval;
1342 }
1343
1344 /*
1345 * Process a received MLDv1 host membership report.
1346 * Assumes mld points to mld_hdr in pulled up mbuf chain.
1347 *
1348 * NOTE: Can't be fully const correct as we temporarily embed scope ID in
1349 * mld_addr. This is OK as we own the mbuf chain.
1350 */
1351 static int
mld_v1_input_report(struct ifnet * ifp,struct mbuf * m,const struct ip6_hdr * ip6,struct mld_hdr * mld)1352 mld_v1_input_report(struct ifnet *ifp, struct mbuf *m,
1353 const struct ip6_hdr *ip6, /*const*/ struct mld_hdr *mld)
1354 {
1355 struct in6_addr src, dst;
1356 struct in6_ifaddr *ia;
1357 struct in6_multi *inm;
1358
1359 if (!mld_v1enable) {
1360 MLD_PRINTF(("%s: ignore v1 report %s on ifp 0x%llx(%s)\n",
1361 __func__, ip6_sprintf(&mld->mld_addr),
1362 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1363 return 0;
1364 }
1365
1366 if ((ifp->if_flags & IFF_LOOPBACK) ||
1367 (m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
1368 return 0;
1369 }
1370
1371 /*
1372 * MLDv1 reports must originate from a host's link-local address,
1373 * or the unspecified address (when booting).
1374 */
1375 src = ip6->ip6_src;
1376 in6_clearscope(&src);
1377 if (!IN6_IS_SCOPE_LINKLOCAL(&src) && !IN6_IS_ADDR_UNSPECIFIED(&src)) {
1378 MLD_PRINTF(("%s: ignore v1 query src %s on ifp 0x%llx(%s)\n",
1379 __func__, ip6_sprintf(&ip6->ip6_src),
1380 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1381 return EINVAL;
1382 }
1383
1384 /*
1385 * RFC2710 Section 4: MLDv1 reports must pertain to a multicast
1386 * group, and must be directed to the group itself.
1387 */
1388 dst = ip6->ip6_dst;
1389 in6_clearscope(&dst);
1390 if (!IN6_IS_ADDR_MULTICAST(&mld->mld_addr) ||
1391 !IN6_ARE_ADDR_EQUAL(&mld->mld_addr, &dst)) {
1392 MLD_PRINTF(("%s: ignore v1 query dst %s on ifp 0x%llx(%s)\n",
1393 __func__, ip6_sprintf(&ip6->ip6_dst),
1394 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1395 return EINVAL;
1396 }
1397
1398 /*
1399 * Make sure we don't hear our own membership report, as fast
1400 * leave requires knowing that we are the only member of a
1401 * group. Assume we used the link-local address if available,
1402 * otherwise look for ::.
1403 *
1404 * XXX Note that scope ID comparison is needed for the address
1405 * returned by in6ifa_ifpforlinklocal(), but SHOULD NOT be
1406 * performed for the on-wire address.
1407 */
1408 ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY | IN6_IFF_ANYCAST);
1409 if (ia != NULL) {
1410 IFA_LOCK(&ia->ia_ifa);
1411 if ((IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, IA6_IN6(ia)))) {
1412 IFA_UNLOCK(&ia->ia_ifa);
1413 IFA_REMREF(&ia->ia_ifa);
1414 return 0;
1415 }
1416 IFA_UNLOCK(&ia->ia_ifa);
1417 IFA_REMREF(&ia->ia_ifa);
1418 } else if (IN6_IS_ADDR_UNSPECIFIED(&src)) {
1419 return 0;
1420 }
1421
1422 MLD_PRINTF(("%s: process v1 report %s on ifp 0x%llx(%s)\n",
1423 __func__, ip6_sprintf(&mld->mld_addr),
1424 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1425
1426 /*
1427 * Embed scope ID of receiving interface in MLD query for lookup
1428 * whilst we don't hold other locks (due to KAME locking lameness).
1429 */
1430 if (!IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) {
1431 (void)in6_setscope(&mld->mld_addr, ifp, NULL);
1432 }
1433
1434 /*
1435 * MLDv1 report suppression.
1436 * If we are a member of this group, and our membership should be
1437 * reported, and our group timer is pending or about to be reset,
1438 * stop our group timer by transitioning to the 'lazy' state.
1439 */
1440 in6_multihead_lock_shared();
1441 IN6_LOOKUP_MULTI(&mld->mld_addr, ifp, inm);
1442 in6_multihead_lock_done();
1443
1444 if (inm != NULL) {
1445 struct mld_ifinfo *mli;
1446
1447 IN6M_LOCK(inm);
1448 mli = inm->in6m_mli;
1449 VERIFY(mli != NULL);
1450
1451 MLI_LOCK(mli);
1452 /*
1453 * If we are in MLDv2 host mode, do not allow the
1454 * other host's MLDv1 report to suppress our reports.
1455 */
1456 if (mli->mli_version == MLD_VERSION_2) {
1457 MLI_UNLOCK(mli);
1458 IN6M_UNLOCK(inm);
1459 IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
1460 goto out;
1461 }
1462 MLI_UNLOCK(mli);
1463
1464 inm->in6m_timer = 0;
1465
1466 switch (inm->in6m_state) {
1467 case MLD_NOT_MEMBER:
1468 case MLD_SILENT_MEMBER:
1469 case MLD_SLEEPING_MEMBER:
1470 break;
1471 case MLD_REPORTING_MEMBER:
1472 case MLD_IDLE_MEMBER:
1473 case MLD_AWAKENING_MEMBER:
1474 MLD_PRINTF(("%s: report suppressed for %s on "
1475 "ifp 0x%llx(%s)\n", __func__,
1476 ip6_sprintf(&mld->mld_addr),
1477 (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1478 OS_FALLTHROUGH;
1479 case MLD_LAZY_MEMBER:
1480 inm->in6m_state = MLD_LAZY_MEMBER;
1481 break;
1482 case MLD_G_QUERY_PENDING_MEMBER:
1483 case MLD_SG_QUERY_PENDING_MEMBER:
1484 case MLD_LEAVING_MEMBER:
1485 break;
1486 }
1487 IN6M_UNLOCK(inm);
1488 IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
1489 }
1490
1491 out:
1492 /* XXX Clear embedded scope ID as userland won't expect it. */
1493 in6_clearscope(&mld->mld_addr);
1494
1495 return 0;
1496 }
1497
1498 /*
1499 * MLD input path.
1500 *
1501 * Assume query messages which fit in a single ICMPv6 message header
1502 * have been pulled up.
1503 * Assume that userland will want to see the message, even if it
1504 * otherwise fails kernel input validation; do not free it.
1505 * Pullup may however free the mbuf chain m if it fails.
1506 *
1507 * Return IPPROTO_DONE if we freed m. Otherwise, return 0.
1508 */
1509 int
mld_input(struct mbuf * m,int off,int icmp6len)1510 mld_input(struct mbuf *m, int off, int icmp6len)
1511 {
1512 struct ifnet *ifp = NULL;
1513 struct ip6_hdr *ip6 = NULL;
1514 struct mld_hdr *mld = NULL;
1515 int mldlen = 0;
1516
1517 MLD_PRINTF(("%s: called w/mbuf (0x%llx,%d)\n", __func__,
1518 (uint64_t)VM_KERNEL_ADDRPERM(m), off));
1519
1520 ifp = m->m_pkthdr.rcvif;
1521
1522 /* Pullup to appropriate size. */
1523 mld = (struct mld_hdr *)(mtod(m, uint8_t *) + off);
1524 if (mld->mld_type == MLD_LISTENER_QUERY &&
1525 icmp6len >= sizeof(struct mldv2_query)) {
1526 mldlen = sizeof(struct mldv2_query);
1527 } else {
1528 mldlen = sizeof(struct mld_hdr);
1529 }
1530 // check if mldv2_query/mld_hdr fits in the first mbuf
1531 IP6_EXTHDR_CHECK(m, off, mldlen, return IPPROTO_DONE);
1532 IP6_EXTHDR_GET(mld, struct mld_hdr *, m, off, mldlen);
1533 if (mld == NULL) {
1534 icmp6stat.icp6s_badlen++;
1535 return IPPROTO_DONE;
1536 }
1537 ip6 = mtod(m, struct ip6_hdr *);
1538
1539 /*
1540 * Userland needs to see all of this traffic for implementing
1541 * the endpoint discovery portion of multicast routing.
1542 */
1543 switch (mld->mld_type) {
1544 case MLD_LISTENER_QUERY:
1545 icmp6_ifstat_inc(ifp, ifs6_in_mldquery);
1546 if (icmp6len == sizeof(struct mld_hdr)) {
1547 if (mld_v1_input_query(ifp, ip6, mld) != 0) {
1548 return 0;
1549 }
1550 } else if (icmp6len >= sizeof(struct mldv2_query)) {
1551 if (mld_v2_input_query(ifp, ip6, m, off,
1552 icmp6len) != 0) {
1553 return 0;
1554 }
1555 }
1556 break;
1557 case MLD_LISTENER_REPORT:
1558 icmp6_ifstat_inc(ifp, ifs6_in_mldreport);
1559 if (mld_v1_input_report(ifp, m, ip6, mld) != 0) {
1560 return 0;
1561 }
1562 break;
1563 case MLDV2_LISTENER_REPORT:
1564 icmp6_ifstat_inc(ifp, ifs6_in_mldreport);
1565 break;
1566 case MLD_LISTENER_DONE:
1567 icmp6_ifstat_inc(ifp, ifs6_in_mlddone);
1568 break;
1569 default:
1570 break;
1571 }
1572
1573 return 0;
1574 }
1575
1576 /*
1577 * Schedule MLD timer based on various parameters; caller must ensure that
1578 * lock ordering is maintained as this routine acquires MLD global lock.
1579 */
1580 void
mld_set_timeout(struct mld_tparams * mtp)1581 mld_set_timeout(struct mld_tparams *mtp)
1582 {
1583 MLD_LOCK_ASSERT_NOTHELD();
1584 VERIFY(mtp != NULL);
1585
1586 if (mtp->qpt != 0 || mtp->it != 0 || mtp->cst != 0 || mtp->sct != 0) {
1587 MLD_LOCK();
1588 if (mtp->qpt != 0) {
1589 querier_present_timers_running6 = 1;
1590 }
1591 if (mtp->it != 0) {
1592 interface_timers_running6 = 1;
1593 }
1594 if (mtp->cst != 0) {
1595 current_state_timers_running6 = 1;
1596 }
1597 if (mtp->sct != 0) {
1598 state_change_timers_running6 = 1;
1599 }
1600 if (mtp->fast) {
1601 mld_sched_fast_timeout();
1602 } else {
1603 mld_sched_timeout();
1604 }
1605 MLD_UNLOCK();
1606 }
1607 }
1608
1609 void
mld_set_fast_timeout(struct mld_tparams * mtp)1610 mld_set_fast_timeout(struct mld_tparams *mtp)
1611 {
1612 VERIFY(mtp != NULL);
1613 mtp->fast = true;
1614 mld_set_timeout(mtp);
1615 }
1616
1617 /*
1618 * MLD6 timer handler (per 1 second).
1619 */
1620 static void
mld_timeout(thread_call_param_t arg0,thread_call_param_t arg1 __unused)1621 mld_timeout(thread_call_param_t arg0, thread_call_param_t arg1 __unused)
1622 {
1623 struct ifqueue scq; /* State-change packets */
1624 struct ifqueue qrq; /* Query response packets */
1625 struct ifnet *ifp;
1626 struct mld_ifinfo *mli;
1627 struct in6_multi *inm;
1628 int uri_sec = 0;
1629 unsigned int genid = mld_mli_list_genid;
1630 bool fast = arg0 != NULL;
1631
1632 SLIST_HEAD(, in6_multi) in6m_dthead;
1633
1634 SLIST_INIT(&in6m_dthead);
1635
1636 /*
1637 * Update coarse-grained networking timestamp (in sec.); the idea
1638 * is to piggy-back on the timeout callout to update the counter
1639 * returnable via net_uptime().
1640 */
1641 net_update_uptime();
1642
1643 MLD_LOCK();
1644
1645 MLD_PRINTF(("%s: qpt %d, it %d, cst %d, sct %d, fast %d\n", __func__,
1646 querier_present_timers_running6, interface_timers_running6,
1647 current_state_timers_running6, state_change_timers_running6, fast));
1648
1649 if (fast) {
1650 /*
1651 * When running the fast timer, skip processing
1652 * of "querier present" timers since they are
1653 * based on 1-second intervals.
1654 */
1655 goto skip_query_timers;
1656 }
1657 /*
1658 * MLDv1 querier present timer processing.
1659 */
1660 if (querier_present_timers_running6) {
1661 querier_present_timers_running6 = 0;
1662 LIST_FOREACH(mli, &mli_head, mli_link) {
1663 MLI_LOCK(mli);
1664 mld_v1_process_querier_timers(mli);
1665 if (mli->mli_v1_timer > 0) {
1666 querier_present_timers_running6 = 1;
1667 }
1668 MLI_UNLOCK(mli);
1669 }
1670 }
1671
1672 /*
1673 * MLDv2 General Query response timer processing.
1674 */
1675 if (interface_timers_running6) {
1676 MLD_PRINTF(("%s: interface timers running\n", __func__));
1677 interface_timers_running6 = 0;
1678 mli = LIST_FIRST(&mli_head);
1679
1680 while (mli != NULL) {
1681 if (mli->mli_flags & MLIF_PROCESSED) {
1682 mli = LIST_NEXT(mli, mli_link);
1683 continue;
1684 }
1685
1686 MLI_LOCK(mli);
1687 if (mli->mli_version != MLD_VERSION_2) {
1688 MLI_UNLOCK(mli);
1689 mli = LIST_NEXT(mli, mli_link);
1690 continue;
1691 }
1692 /*
1693 * XXX The logic below ends up calling
1694 * mld_dispatch_packet which can unlock mli
1695 * and the global MLD lock.
1696 * Therefore grab a reference on MLI and also
1697 * check for generation count to see if we should
1698 * iterate the list again.
1699 */
1700 MLI_ADDREF_LOCKED(mli);
1701
1702 if (mli->mli_v2_timer == 0) {
1703 /* Do nothing. */
1704 } else if (--mli->mli_v2_timer == 0) {
1705 if (mld_v2_dispatch_general_query(mli) > 0) {
1706 interface_timers_running6 = 1;
1707 }
1708 } else {
1709 interface_timers_running6 = 1;
1710 }
1711 mli->mli_flags |= MLIF_PROCESSED;
1712 MLI_UNLOCK(mli);
1713 MLI_REMREF(mli);
1714
1715 if (genid != mld_mli_list_genid) {
1716 MLD_PRINTF(("%s: MLD information list changed "
1717 "in the middle of iteration! Restart iteration.\n",
1718 __func__));
1719 mli = LIST_FIRST(&mli_head);
1720 genid = mld_mli_list_genid;
1721 } else {
1722 mli = LIST_NEXT(mli, mli_link);
1723 }
1724 }
1725
1726 LIST_FOREACH(mli, &mli_head, mli_link)
1727 mli->mli_flags &= ~MLIF_PROCESSED;
1728 }
1729
1730 skip_query_timers:
1731 if (!current_state_timers_running6 &&
1732 !state_change_timers_running6) {
1733 goto out_locked;
1734 }
1735
1736 current_state_timers_running6 = 0;
1737 state_change_timers_running6 = 0;
1738
1739 MLD_PRINTF(("%s: state change timers running\n", __func__));
1740
1741 memset(&qrq, 0, sizeof(struct ifqueue));
1742 qrq.ifq_maxlen = MLD_MAX_G_GS_PACKETS;
1743
1744 memset(&scq, 0, sizeof(struct ifqueue));
1745 scq.ifq_maxlen = MLD_MAX_STATE_CHANGE_PACKETS;
1746
1747 /*
1748 * MLD host report and state-change timer processing.
1749 * Note: Processing a v2 group timer may remove a node.
1750 */
1751 mli = LIST_FIRST(&mli_head);
1752
1753 while (mli != NULL) {
1754 struct in6_multistep step;
1755
1756 if (mli->mli_flags & MLIF_PROCESSED) {
1757 mli = LIST_NEXT(mli, mli_link);
1758 continue;
1759 }
1760
1761 MLI_LOCK(mli);
1762 ifp = mli->mli_ifp;
1763 uri_sec = MLD_RANDOM_DELAY(mli->mli_uri);
1764 MLI_UNLOCK(mli);
1765
1766 in6_multihead_lock_shared();
1767 IN6_FIRST_MULTI(step, inm);
1768 while (inm != NULL) {
1769 IN6M_LOCK(inm);
1770 if (inm->in6m_ifp != ifp) {
1771 goto next;
1772 }
1773
1774 MLI_LOCK(mli);
1775 switch (mli->mli_version) {
1776 case MLD_VERSION_1:
1777 mld_v1_process_group_timer(inm,
1778 mli->mli_version);
1779 break;
1780 case MLD_VERSION_2:
1781 mld_v2_process_group_timers(mli, &qrq,
1782 &scq, inm, uri_sec);
1783 break;
1784 }
1785 MLI_UNLOCK(mli);
1786 next:
1787 IN6M_UNLOCK(inm);
1788 IN6_NEXT_MULTI(step, inm);
1789 }
1790 in6_multihead_lock_done();
1791
1792 /*
1793 * XXX The logic below ends up calling
1794 * mld_dispatch_packet which can unlock mli
1795 * and the global MLD lock.
1796 * Therefore grab a reference on MLI and also
1797 * check for generation count to see if we should
1798 * iterate the list again.
1799 */
1800 MLI_LOCK(mli);
1801 MLI_ADDREF_LOCKED(mli);
1802 if (mli->mli_version == MLD_VERSION_1) {
1803 mld_dispatch_queue_locked(mli, &mli->mli_v1q, 0);
1804 } else if (mli->mli_version == MLD_VERSION_2) {
1805 MLI_UNLOCK(mli);
1806 mld_dispatch_queue_locked(NULL, &qrq, 0);
1807 mld_dispatch_queue_locked(NULL, &scq, 0);
1808 VERIFY(qrq.ifq_len == 0);
1809 VERIFY(scq.ifq_len == 0);
1810 MLI_LOCK(mli);
1811 }
1812 /*
1813 * In case there are still any pending membership reports
1814 * which didn't get drained at version change time.
1815 */
1816 IF_DRAIN(&mli->mli_v1q);
1817 /*
1818 * Release all deferred inm records, and drain any locally
1819 * enqueued packets; do it even if the current MLD version
1820 * for the link is no longer MLDv2, in order to handle the
1821 * version change case.
1822 */
1823 mld_flush_relq(mli, (struct mld_in6m_relhead *)&in6m_dthead);
1824 mli->mli_flags |= MLIF_PROCESSED;
1825 MLI_UNLOCK(mli);
1826 MLI_REMREF(mli);
1827
1828 IF_DRAIN(&qrq);
1829 IF_DRAIN(&scq);
1830
1831 if (genid != mld_mli_list_genid) {
1832 MLD_PRINTF(("%s: MLD information list changed "
1833 "in the middle of iteration! Restart iteration.\n",
1834 __func__));
1835 mli = LIST_FIRST(&mli_head);
1836 genid = mld_mli_list_genid;
1837 } else {
1838 mli = LIST_NEXT(mli, mli_link);
1839 }
1840 }
1841
1842 LIST_FOREACH(mli, &mli_head, mli_link)
1843 mli->mli_flags &= ~MLIF_PROCESSED;
1844
1845 out_locked:
1846 /* re-arm the timer if there's work to do */
1847 if (fast) {
1848 mld_fast_timeout_run = false;
1849 } else {
1850 mld_timeout_run = false;
1851 }
1852 mld_sched_timeout();
1853 MLD_UNLOCK();
1854
1855 /* Now that we're dropped all locks, release detached records */
1856 MLD_REMOVE_DETACHED_IN6M(&in6m_dthead);
1857 }
1858
1859 static void
mld_sched_timeout(void)1860 mld_sched_timeout(void)
1861 {
1862 static thread_call_t mld_timeout_tcall;
1863 uint64_t deadline = 0, leeway = 0;
1864
1865 MLD_LOCK_ASSERT_HELD();
1866 if (mld_timeout_tcall == NULL) {
1867 mld_timeout_tcall =
1868 thread_call_allocate_with_options(mld_timeout,
1869 NULL,
1870 THREAD_CALL_PRIORITY_KERNEL,
1871 THREAD_CALL_OPTIONS_ONCE);
1872 }
1873
1874 if (!mld_timeout_run &&
1875 (querier_present_timers_running6 || current_state_timers_running6 ||
1876 interface_timers_running6 || state_change_timers_running6)) {
1877 mld_timeout_run = true;
1878 clock_interval_to_deadline(mld_timeout_delay, NSEC_PER_MSEC,
1879 &deadline);
1880 clock_interval_to_absolutetime_interval(mld_timeout_leeway,
1881 NSEC_PER_MSEC, &leeway);
1882 thread_call_enter_delayed_with_leeway(mld_timeout_tcall, NULL,
1883 deadline, leeway,
1884 THREAD_CALL_DELAY_LEEWAY);
1885 }
1886 }
1887
1888 static void
mld_sched_fast_timeout(void)1889 mld_sched_fast_timeout(void)
1890 {
1891 static thread_call_t mld_fast_timeout_tcall;
1892
1893 MLD_LOCK_ASSERT_HELD();
1894 if (mld_fast_timeout_tcall == NULL) {
1895 mld_fast_timeout_tcall =
1896 thread_call_allocate_with_options(mld_timeout,
1897 mld_sched_fast_timeout,
1898 THREAD_CALL_PRIORITY_KERNEL,
1899 THREAD_CALL_OPTIONS_ONCE);
1900 }
1901 if (!mld_fast_timeout_run &&
1902 (current_state_timers_running6 || state_change_timers_running6)) {
1903 mld_fast_timeout_run = true;
1904 thread_call_enter(mld_fast_timeout_tcall);
1905 }
1906 }
1907
1908 /*
1909 * Appends an in6_multi to the list to be released later.
1910 *
1911 * Caller must be holding mli_lock.
1912 */
1913 static void
mld_append_relq(struct mld_ifinfo * mli,struct in6_multi * inm)1914 mld_append_relq(struct mld_ifinfo *mli, struct in6_multi *inm)
1915 {
1916 MLI_LOCK_ASSERT_HELD(mli);
1917 MLD_PRINTF(("%s: adding inm %llx on relq ifp 0x%llx(%s)\n",
1918 __func__, (uint64_t)VM_KERNEL_ADDRPERM(inm),
1919 (uint64_t)VM_KERNEL_ADDRPERM(mli->mli_ifp),
1920 if_name(mli->mli_ifp)));
1921 SLIST_INSERT_HEAD(&mli->mli_relinmhead, inm, in6m_nrele);
1922 }
1923
1924 /*
1925 * Free the in6_multi reference(s) for this MLD lifecycle.
1926 *
1927 * Caller must be holding mli_lock.
1928 */
1929 static void
mld_flush_relq(struct mld_ifinfo * mli,struct mld_in6m_relhead * in6m_dthead)1930 mld_flush_relq(struct mld_ifinfo *mli, struct mld_in6m_relhead *in6m_dthead)
1931 {
1932 struct in6_multi *inm;
1933 SLIST_HEAD(, in6_multi) temp_relinmhead;
1934
1935 /*
1936 * Before dropping the mli_lock, copy all the items in the
1937 * release list to a temporary list to prevent other threads
1938 * from changing mli_relinmhead while we are traversing it.
1939 */
1940 MLI_LOCK_ASSERT_HELD(mli);
1941 SLIST_INIT(&temp_relinmhead);
1942 while ((inm = SLIST_FIRST(&mli->mli_relinmhead)) != NULL) {
1943 SLIST_REMOVE_HEAD(&mli->mli_relinmhead, in6m_nrele);
1944 SLIST_INSERT_HEAD(&temp_relinmhead, inm, in6m_nrele);
1945 }
1946 MLI_UNLOCK(mli);
1947 in6_multihead_lock_exclusive();
1948 while ((inm = SLIST_FIRST(&temp_relinmhead)) != NULL) {
1949 int lastref;
1950
1951 SLIST_REMOVE_HEAD(&temp_relinmhead, in6m_nrele);
1952 IN6M_LOCK(inm);
1953 VERIFY(inm->in6m_nrelecnt != 0);
1954 inm->in6m_nrelecnt--;
1955 lastref = in6_multi_detach(inm);
1956 VERIFY(!lastref || (!(inm->in6m_debug & IFD_ATTACHED) &&
1957 inm->in6m_reqcnt == 0));
1958 IN6M_UNLOCK(inm);
1959 /* from mli_relinmhead */
1960 IN6M_REMREF(inm);
1961 /* from in6_multihead_list */
1962 if (lastref) {
1963 /*
1964 * Defer releasing our final reference, as we
1965 * are holding the MLD lock at this point, and
1966 * we could end up with locking issues later on
1967 * (while issuing SIOCDELMULTI) when this is the
1968 * final reference count. Let the caller do it
1969 * when it is safe.
1970 */
1971 MLD_ADD_DETACHED_IN6M(in6m_dthead, inm);
1972 }
1973 }
1974 in6_multihead_lock_done();
1975 MLI_LOCK(mli);
1976 }
1977
1978 /*
1979 * Update host report group timer.
1980 * Will update the global pending timer flags.
1981 */
1982 static void
mld_v1_process_group_timer(struct in6_multi * inm,const int mld_version)1983 mld_v1_process_group_timer(struct in6_multi *inm, const int mld_version)
1984 {
1985 #pragma unused(mld_version)
1986 int report_timer_expired;
1987
1988 MLD_LOCK_ASSERT_HELD();
1989 IN6M_LOCK_ASSERT_HELD(inm);
1990 MLI_LOCK_ASSERT_HELD(inm->in6m_mli);
1991
1992 if (inm->in6m_timer == 0) {
1993 report_timer_expired = 0;
1994 } else if (--inm->in6m_timer == 0) {
1995 report_timer_expired = 1;
1996 } else {
1997 current_state_timers_running6 = 1;
1998 /* caller will schedule timer */
1999 return;
2000 }
2001
2002 switch (inm->in6m_state) {
2003 case MLD_NOT_MEMBER:
2004 case MLD_SILENT_MEMBER:
2005 case MLD_IDLE_MEMBER:
2006 case MLD_LAZY_MEMBER:
2007 case MLD_SLEEPING_MEMBER:
2008 case MLD_AWAKENING_MEMBER:
2009 break;
2010 case MLD_REPORTING_MEMBER:
2011 if (report_timer_expired) {
2012 inm->in6m_state = MLD_IDLE_MEMBER;
2013 (void) mld_v1_transmit_report(inm,
2014 MLD_LISTENER_REPORT);
2015 IN6M_LOCK_ASSERT_HELD(inm);
2016 MLI_LOCK_ASSERT_HELD(inm->in6m_mli);
2017 }
2018 break;
2019 case MLD_G_QUERY_PENDING_MEMBER:
2020 case MLD_SG_QUERY_PENDING_MEMBER:
2021 case MLD_LEAVING_MEMBER:
2022 break;
2023 }
2024 }
2025
2026 /*
2027 * Update a group's timers for MLDv2.
2028 * Will update the global pending timer flags.
2029 * Note: Unlocked read from mli.
2030 */
2031 static void
mld_v2_process_group_timers(struct mld_ifinfo * mli,struct ifqueue * qrq,struct ifqueue * scq,struct in6_multi * inm,const int uri_sec)2032 mld_v2_process_group_timers(struct mld_ifinfo *mli,
2033 struct ifqueue *qrq, struct ifqueue *scq,
2034 struct in6_multi *inm, const int uri_sec)
2035 {
2036 int query_response_timer_expired;
2037 int state_change_retransmit_timer_expired;
2038
2039 MLD_LOCK_ASSERT_HELD();
2040 IN6M_LOCK_ASSERT_HELD(inm);
2041 MLI_LOCK_ASSERT_HELD(mli);
2042 VERIFY(mli == inm->in6m_mli);
2043
2044 query_response_timer_expired = 0;
2045 state_change_retransmit_timer_expired = 0;
2046
2047 /*
2048 * During a transition from compatibility mode back to MLDv2,
2049 * a group record in REPORTING state may still have its group
2050 * timer active. This is a no-op in this function; it is easier
2051 * to deal with it here than to complicate the timeout path.
2052 */
2053 if (inm->in6m_timer == 0) {
2054 query_response_timer_expired = 0;
2055 } else if (--inm->in6m_timer == 0) {
2056 query_response_timer_expired = 1;
2057 } else {
2058 current_state_timers_running6 = 1;
2059 /* caller will schedule timer */
2060 }
2061
2062 if (inm->in6m_sctimer == 0) {
2063 state_change_retransmit_timer_expired = 0;
2064 } else if (--inm->in6m_sctimer == 0) {
2065 state_change_retransmit_timer_expired = 1;
2066 } else {
2067 state_change_timers_running6 = 1;
2068 /* caller will schedule timer */
2069 }
2070
2071 /* We are in timer callback, so be quick about it. */
2072 if (!state_change_retransmit_timer_expired &&
2073 !query_response_timer_expired) {
2074 return;
2075 }
2076
2077 switch (inm->in6m_state) {
2078 case MLD_NOT_MEMBER:
2079 case MLD_SILENT_MEMBER:
2080 case MLD_SLEEPING_MEMBER:
2081 case MLD_LAZY_MEMBER:
2082 case MLD_AWAKENING_MEMBER:
2083 case MLD_IDLE_MEMBER:
2084 break;
2085 case MLD_G_QUERY_PENDING_MEMBER:
2086 case MLD_SG_QUERY_PENDING_MEMBER:
2087 /*
2088 * Respond to a previously pending Group-Specific
2089 * or Group-and-Source-Specific query by enqueueing
2090 * the appropriate Current-State report for
2091 * immediate transmission.
2092 */
2093 if (query_response_timer_expired) {
2094 int retval;
2095
2096 retval = mld_v2_enqueue_group_record(qrq, inm, 0, 1,
2097 (inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER),
2098 0);
2099 MLD_PRINTF(("%s: enqueue record = %d\n",
2100 __func__, retval));
2101 inm->in6m_state = MLD_REPORTING_MEMBER;
2102 in6m_clear_recorded(inm);
2103 }
2104 OS_FALLTHROUGH;
2105 case MLD_REPORTING_MEMBER:
2106 case MLD_LEAVING_MEMBER:
2107 if (state_change_retransmit_timer_expired) {
2108 /*
2109 * State-change retransmission timer fired.
2110 * If there are any further pending retransmissions,
2111 * set the global pending state-change flag, and
2112 * reset the timer.
2113 */
2114 if (--inm->in6m_scrv > 0) {
2115 inm->in6m_sctimer = (uint16_t)uri_sec;
2116 state_change_timers_running6 = 1;
2117 /* caller will schedule timer */
2118 }
2119 /*
2120 * Retransmit the previously computed state-change
2121 * report. If there are no further pending
2122 * retransmissions, the mbuf queue will be consumed.
2123 * Update T0 state to T1 as we have now sent
2124 * a state-change.
2125 */
2126 (void) mld_v2_merge_state_changes(inm, scq);
2127
2128 in6m_commit(inm);
2129 MLD_PRINTF(("%s: T1 -> T0 for %s/%s\n", __func__,
2130 ip6_sprintf(&inm->in6m_addr),
2131 if_name(inm->in6m_ifp)));
2132
2133 /*
2134 * If we are leaving the group for good, make sure
2135 * we release MLD's reference to it.
2136 * This release must be deferred using a SLIST,
2137 * as we are called from a loop which traverses
2138 * the in_ifmultiaddr TAILQ.
2139 */
2140 if (inm->in6m_state == MLD_LEAVING_MEMBER &&
2141 inm->in6m_scrv == 0) {
2142 inm->in6m_state = MLD_NOT_MEMBER;
2143 /*
2144 * A reference has already been held in
2145 * mld_final_leave() for this inm, so
2146 * no need to hold another one. We also
2147 * bumped up its request count then, so
2148 * that it stays in in6_multihead. Both
2149 * of them will be released when it is
2150 * dequeued later on.
2151 */
2152 VERIFY(inm->in6m_nrelecnt != 0);
2153 mld_append_relq(mli, inm);
2154 }
2155 }
2156 break;
2157 }
2158 }
2159
2160 /*
2161 * Switch to a different version on the given interface,
2162 * as per Section 9.12.
2163 */
2164 static uint32_t
mld_set_version(struct mld_ifinfo * mli,const int mld_version)2165 mld_set_version(struct mld_ifinfo *mli, const int mld_version)
2166 {
2167 int old_version_timer;
2168
2169 MLI_LOCK_ASSERT_HELD(mli);
2170
2171 MLD_PRINTF(("%s: switching to v%d on ifp 0x%llx(%s)\n", __func__,
2172 mld_version, (uint64_t)VM_KERNEL_ADDRPERM(mli->mli_ifp),
2173 if_name(mli->mli_ifp)));
2174
2175 if (mld_version == MLD_VERSION_1) {
2176 /*
2177 * Compute the "Older Version Querier Present" timer as per
2178 * Section 9.12, in seconds.
2179 */
2180 old_version_timer = (mli->mli_rv * mli->mli_qi) + mli->mli_qri;
2181 mli->mli_v1_timer = old_version_timer;
2182 }
2183
2184 if (mli->mli_v1_timer > 0 && mli->mli_version != MLD_VERSION_1) {
2185 mli->mli_version = MLD_VERSION_1;
2186 mld_v2_cancel_link_timers(mli);
2187 }
2188
2189 MLI_LOCK_ASSERT_HELD(mli);
2190
2191 return mli->mli_v1_timer;
2192 }
2193
2194 /*
2195 * Cancel pending MLDv2 timers for the given link and all groups
2196 * joined on it; state-change, general-query, and group-query timers.
2197 *
2198 * Only ever called on a transition from v2 to Compatibility mode. Kill
2199 * the timers stone dead (this may be expensive for large N groups), they
2200 * will be restarted if Compatibility Mode deems that they must be due to
2201 * query processing.
2202 */
2203 static void
mld_v2_cancel_link_timers(struct mld_ifinfo * mli)2204 mld_v2_cancel_link_timers(struct mld_ifinfo *mli)
2205 {
2206 struct ifnet *ifp;
2207 struct in6_multi *inm;
2208 struct in6_multistep step;
2209
2210 MLI_LOCK_ASSERT_HELD(mli);
2211
2212 MLD_PRINTF(("%s: cancel v2 timers on ifp 0x%llx(%s)\n", __func__,
2213 (uint64_t)VM_KERNEL_ADDRPERM(mli->mli_ifp), if_name(mli->mli_ifp)));
2214
2215 /*
2216 * Stop the v2 General Query Response on this link stone dead.
2217 * If timer is woken up due to interface_timers_running6,
2218 * the flag will be cleared if there are no pending link timers.
2219 */
2220 mli->mli_v2_timer = 0;
2221
2222 /*
2223 * Now clear the current-state and state-change report timers
2224 * for all memberships scoped to this link.
2225 */
2226 ifp = mli->mli_ifp;
2227 MLI_UNLOCK(mli);
2228
2229 in6_multihead_lock_shared();
2230 IN6_FIRST_MULTI(step, inm);
2231 while (inm != NULL) {
2232 IN6M_LOCK(inm);
2233 if (inm->in6m_ifp != ifp) {
2234 goto next;
2235 }
2236
2237 switch (inm->in6m_state) {
2238 case MLD_NOT_MEMBER:
2239 case MLD_SILENT_MEMBER:
2240 case MLD_IDLE_MEMBER:
2241 case MLD_LAZY_MEMBER:
2242 case MLD_SLEEPING_MEMBER:
2243 case MLD_AWAKENING_MEMBER:
2244 /*
2245 * These states are either not relevant in v2 mode,
2246 * or are unreported. Do nothing.
2247 */
2248 break;
2249 case MLD_LEAVING_MEMBER:
2250 /*
2251 * If we are leaving the group and switching
2252 * version, we need to release the final
2253 * reference held for issuing the INCLUDE {}.
2254 * During mld_final_leave(), we bumped up both the
2255 * request and reference counts. Since we cannot
2256 * call in6_multi_detach() here, defer this task to
2257 * the timer routine.
2258 */
2259 VERIFY(inm->in6m_nrelecnt != 0);
2260 MLI_LOCK(mli);
2261 mld_append_relq(mli, inm);
2262 MLI_UNLOCK(mli);
2263 OS_FALLTHROUGH;
2264 case MLD_G_QUERY_PENDING_MEMBER:
2265 case MLD_SG_QUERY_PENDING_MEMBER:
2266 in6m_clear_recorded(inm);
2267 OS_FALLTHROUGH;
2268 case MLD_REPORTING_MEMBER:
2269 inm->in6m_state = MLD_REPORTING_MEMBER;
2270 break;
2271 }
2272 /*
2273 * Always clear state-change and group report timers.
2274 * Free any pending MLDv2 state-change records.
2275 */
2276 inm->in6m_sctimer = 0;
2277 inm->in6m_timer = 0;
2278 IF_DRAIN(&inm->in6m_scq);
2279 next:
2280 IN6M_UNLOCK(inm);
2281 IN6_NEXT_MULTI(step, inm);
2282 }
2283 in6_multihead_lock_done();
2284
2285 MLI_LOCK(mli);
2286 }
2287
2288 /*
2289 * Update the Older Version Querier Present timers for a link.
2290 * See Section 9.12 of RFC 3810.
2291 */
2292 static void
mld_v1_process_querier_timers(struct mld_ifinfo * mli)2293 mld_v1_process_querier_timers(struct mld_ifinfo *mli)
2294 {
2295 MLI_LOCK_ASSERT_HELD(mli);
2296
2297 if (mld_v2enable && mli->mli_version != MLD_VERSION_2 &&
2298 --mli->mli_v1_timer == 0) {
2299 /*
2300 * MLDv1 Querier Present timer expired; revert to MLDv2.
2301 */
2302 MLD_PRINTF(("%s: transition from v%d -> v%d on 0x%llx(%s)\n",
2303 __func__, mli->mli_version, MLD_VERSION_2,
2304 (uint64_t)VM_KERNEL_ADDRPERM(mli->mli_ifp),
2305 if_name(mli->mli_ifp)));
2306 mli->mli_version = MLD_VERSION_2;
2307 }
2308 }
2309
2310 /*
2311 * Transmit an MLDv1 report immediately.
2312 */
2313 static int
mld_v1_transmit_report(struct in6_multi * in6m,const uint8_t type)2314 mld_v1_transmit_report(struct in6_multi *in6m, const uint8_t type)
2315 {
2316 struct ifnet *ifp;
2317 struct in6_ifaddr *ia;
2318 struct ip6_hdr *ip6;
2319 struct mbuf *mh, *md;
2320 struct mld_hdr *mld;
2321 int error = 0;
2322
2323 IN6M_LOCK_ASSERT_HELD(in6m);
2324 MLI_LOCK_ASSERT_HELD(in6m->in6m_mli);
2325
2326 ifp = in6m->in6m_ifp;
2327 /* ia may be NULL if link-local address is tentative. */
2328 ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY | IN6_IFF_ANYCAST);
2329
2330 MGETHDR(mh, M_DONTWAIT, MT_HEADER);
2331 if (mh == NULL) {
2332 if (ia != NULL) {
2333 IFA_REMREF(&ia->ia_ifa);
2334 }
2335 return ENOMEM;
2336 }
2337 MGET(md, M_DONTWAIT, MT_DATA);
2338 if (md == NULL) {
2339 m_free(mh);
2340 if (ia != NULL) {
2341 IFA_REMREF(&ia->ia_ifa);
2342 }
2343 return ENOMEM;
2344 }
2345 mh->m_next = md;
2346
2347 /*
2348 * FUTURE: Consider increasing alignment by ETHER_HDR_LEN, so
2349 * that ether_output() does not need to allocate another mbuf
2350 * for the header in the most common case.
2351 */
2352 MH_ALIGN(mh, sizeof(struct ip6_hdr));
2353 mh->m_pkthdr.len = sizeof(struct ip6_hdr) + sizeof(struct mld_hdr);
2354 mh->m_len = sizeof(struct ip6_hdr);
2355
2356 ip6 = mtod(mh, struct ip6_hdr *);
2357 ip6->ip6_flow = 0;
2358 ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
2359 ip6->ip6_vfc |= IPV6_VERSION;
2360 ip6->ip6_nxt = IPPROTO_ICMPV6;
2361 if (ia != NULL) {
2362 IFA_LOCK(&ia->ia_ifa);
2363 }
2364 ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any;
2365 ip6_output_setsrcifscope(mh, IFSCOPE_NONE, ia);
2366 if (ia != NULL) {
2367 IFA_UNLOCK(&ia->ia_ifa);
2368 IFA_REMREF(&ia->ia_ifa);
2369 ia = NULL;
2370 }
2371 ip6->ip6_dst = in6m->in6m_addr;
2372 ip6_output_setdstifscope(mh, in6m->ifscope, NULL);
2373
2374 md->m_len = sizeof(struct mld_hdr);
2375 mld = mtod(md, struct mld_hdr *);
2376 mld->mld_type = type;
2377 mld->mld_code = 0;
2378 mld->mld_cksum = 0;
2379 mld->mld_maxdelay = 0;
2380 mld->mld_reserved = 0;
2381 mld->mld_addr = in6m->in6m_addr;
2382 in6_clearscope(&mld->mld_addr);
2383 mld->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6,
2384 sizeof(struct ip6_hdr), sizeof(struct mld_hdr));
2385
2386 mld_save_context(mh, ifp);
2387 mh->m_flags |= M_MLDV1;
2388
2389 /*
2390 * Due to the fact that at this point we are possibly holding
2391 * in6_multihead_lock in shared or exclusive mode, we can't call
2392 * mld_dispatch_packet() here since that will eventually call
2393 * ip6_output(), which will try to lock in6_multihead_lock and cause
2394 * a deadlock.
2395 * Instead we defer the work to the mld_timeout() thread, thus
2396 * avoiding unlocking in_multihead_lock here.
2397 */
2398 if (IF_QFULL(&in6m->in6m_mli->mli_v1q)) {
2399 MLD_PRINTF(("%s: v1 outbound queue full\n", __func__));
2400 error = ENOMEM;
2401 m_freem(mh);
2402 } else {
2403 IF_ENQUEUE(&in6m->in6m_mli->mli_v1q, mh);
2404 VERIFY(error == 0);
2405 }
2406
2407 return error;
2408 }
2409
2410 /*
2411 * Process a state change from the upper layer for the given IPv6 group.
2412 *
2413 * Each socket holds a reference on the in6_multi in its own ip_moptions.
2414 * The socket layer will have made the necessary updates to.the group
2415 * state, it is now up to MLD to issue a state change report if there
2416 * has been any change between T0 (when the last state-change was issued)
2417 * and T1 (now).
2418 *
2419 * We use the MLDv2 state machine at group level. The MLd module
2420 * however makes the decision as to which MLD protocol version to speak.
2421 * A state change *from* INCLUDE {} always means an initial join.
2422 * A state change *to* INCLUDE {} always means a final leave.
2423 *
2424 * If delay is non-zero, and the state change is an initial multicast
2425 * join, the state change report will be delayed by 'delay' ticks
2426 * in units of seconds if MLDv1 is active on the link; otherwise
2427 * the initial MLDv2 state change report will be delayed by whichever
2428 * is sooner, a pending state-change timer or delay itself.
2429 */
2430 int
mld_change_state(struct in6_multi * inm,struct mld_tparams * mtp,const int delay)2431 mld_change_state(struct in6_multi *inm, struct mld_tparams *mtp,
2432 const int delay)
2433 {
2434 struct mld_ifinfo *mli;
2435 struct ifnet *ifp;
2436 int error = 0;
2437
2438 VERIFY(mtp != NULL);
2439 bzero(mtp, sizeof(*mtp));
2440
2441 IN6M_LOCK_ASSERT_HELD(inm);
2442 VERIFY(inm->in6m_mli != NULL);
2443 MLI_LOCK_ASSERT_NOTHELD(inm->in6m_mli);
2444
2445 /*
2446 * Try to detect if the upper layer just asked us to change state
2447 * for an interface which has now gone away.
2448 */
2449 VERIFY(inm->in6m_ifma != NULL);
2450 ifp = inm->in6m_ifma->ifma_ifp;
2451 /*
2452 * Sanity check that netinet6's notion of ifp is the same as net's.
2453 */
2454 VERIFY(inm->in6m_ifp == ifp);
2455
2456 mli = MLD_IFINFO(ifp);
2457 VERIFY(mli != NULL);
2458
2459 /*
2460 * If we detect a state transition to or from MCAST_UNDEFINED
2461 * for this group, then we are starting or finishing an MLD
2462 * life cycle for this group.
2463 */
2464 if (inm->in6m_st[1].iss_fmode != inm->in6m_st[0].iss_fmode) {
2465 MLD_PRINTF(("%s: inm transition %d -> %d\n", __func__,
2466 inm->in6m_st[0].iss_fmode, inm->in6m_st[1].iss_fmode));
2467 if (inm->in6m_st[0].iss_fmode == MCAST_UNDEFINED) {
2468 MLD_PRINTF(("%s: initial join\n", __func__));
2469 error = mld_initial_join(inm, mli, mtp, delay);
2470 goto out;
2471 } else if (inm->in6m_st[1].iss_fmode == MCAST_UNDEFINED) {
2472 MLD_PRINTF(("%s: final leave\n", __func__));
2473 mld_final_leave(inm, mli, mtp);
2474 goto out;
2475 }
2476 } else {
2477 MLD_PRINTF(("%s: filter set change\n", __func__));
2478 }
2479
2480 error = mld_handle_state_change(inm, mli, mtp);
2481 out:
2482 return error;
2483 }
2484
2485 /*
2486 * Perform the initial join for an MLD group.
2487 *
2488 * When joining a group:
2489 * If the group should have its MLD traffic suppressed, do nothing.
2490 * MLDv1 starts sending MLDv1 host membership reports.
2491 * MLDv2 will schedule an MLDv2 state-change report containing the
2492 * initial state of the membership.
2493 *
2494 * If the delay argument is non-zero, then we must delay sending the
2495 * initial state change for delay ticks (in units of seconds).
2496 */
2497 static int
mld_initial_join(struct in6_multi * inm,struct mld_ifinfo * mli,struct mld_tparams * mtp,const int delay)2498 mld_initial_join(struct in6_multi *inm, struct mld_ifinfo *mli,
2499 struct mld_tparams *mtp, const int delay)
2500 {
2501 struct ifnet *ifp;
2502 struct ifqueue *ifq;
2503 int error, retval, syncstates;
2504 int odelay;
2505
2506 IN6M_LOCK_ASSERT_HELD(inm);
2507 MLI_LOCK_ASSERT_NOTHELD(mli);
2508 VERIFY(mtp != NULL);
2509
2510 MLD_PRINTF(("%s: initial join %s on ifp 0x%llx(%s)\n",
2511 __func__, ip6_sprintf(&inm->in6m_addr),
2512 (uint64_t)VM_KERNEL_ADDRPERM(inm->in6m_ifp),
2513 if_name(inm->in6m_ifp)));
2514
2515 error = 0;
2516 syncstates = 1;
2517
2518 ifp = inm->in6m_ifp;
2519
2520 MLI_LOCK(mli);
2521 VERIFY(mli->mli_ifp == ifp);
2522
2523 /*
2524 * Avoid MLD if group is :
2525 * 1. Joined on loopback, OR
2526 * 2. On a link that is marked MLIF_SILENT
2527 * 3. rdar://problem/19227650 Is link local scoped and
2528 * on cellular interface
2529 * 4. Is a type that should not be reported (node local
2530 * or all node link local multicast.
2531 * All other groups enter the appropriate state machine
2532 * for the version in use on this link.
2533 */
2534 if ((ifp->if_flags & IFF_LOOPBACK) ||
2535 (mli->mli_flags & MLIF_SILENT) ||
2536 (IFNET_IS_CELLULAR(ifp) &&
2537 (IN6_IS_ADDR_MC_LINKLOCAL(&inm->in6m_addr) || IN6_IS_ADDR_MC_UNICAST_BASED_LINKLOCAL(&inm->in6m_addr))) ||
2538 !mld_is_addr_reported(&inm->in6m_addr)) {
2539 MLD_PRINTF(("%s: not kicking state machine for silent group\n",
2540 __func__));
2541 inm->in6m_state = MLD_SILENT_MEMBER;
2542 inm->in6m_timer = 0;
2543 } else {
2544 /*
2545 * Deal with overlapping in6_multi lifecycle.
2546 * If this group was LEAVING, then make sure
2547 * we drop the reference we picked up to keep the
2548 * group around for the final INCLUDE {} enqueue.
2549 * Since we cannot call in6_multi_detach() here,
2550 * defer this task to the timer routine.
2551 */
2552 if (mli->mli_version == MLD_VERSION_2 &&
2553 inm->in6m_state == MLD_LEAVING_MEMBER) {
2554 VERIFY(inm->in6m_nrelecnt != 0);
2555 mld_append_relq(mli, inm);
2556 }
2557
2558 inm->in6m_state = MLD_REPORTING_MEMBER;
2559
2560 switch (mli->mli_version) {
2561 case MLD_VERSION_1:
2562 /*
2563 * If a delay was provided, only use it if
2564 * it is greater than the delay normally
2565 * used for an MLDv1 state change report,
2566 * and delay sending the initial MLDv1 report
2567 * by not transitioning to the IDLE state.
2568 */
2569 odelay = MLD_RANDOM_DELAY(MLD_V1_MAX_RI);
2570 if (delay) {
2571 inm->in6m_timer = max(delay, odelay);
2572 mtp->cst = 1;
2573 } else {
2574 inm->in6m_state = MLD_IDLE_MEMBER;
2575 error = mld_v1_transmit_report(inm,
2576 MLD_LISTENER_REPORT);
2577
2578 IN6M_LOCK_ASSERT_HELD(inm);
2579 MLI_LOCK_ASSERT_HELD(mli);
2580
2581 if (error == 0) {
2582 inm->in6m_timer = odelay;
2583 mtp->cst = 1;
2584 }
2585 }
2586 break;
2587
2588 case MLD_VERSION_2:
2589 /*
2590 * Defer update of T0 to T1, until the first copy
2591 * of the state change has been transmitted.
2592 */
2593 syncstates = 0;
2594
2595 /*
2596 * Immediately enqueue a State-Change Report for
2597 * this interface, freeing any previous reports.
2598 * Don't kick the timers if there is nothing to do,
2599 * or if an error occurred.
2600 */
2601 ifq = &inm->in6m_scq;
2602 IF_DRAIN(ifq);
2603 retval = mld_v2_enqueue_group_record(ifq, inm, 1,
2604 0, 0, (mli->mli_flags & MLIF_USEALLOW));
2605 mtp->cst = (ifq->ifq_len > 0);
2606 MLD_PRINTF(("%s: enqueue record = %d\n",
2607 __func__, retval));
2608 if (retval <= 0) {
2609 error = retval * -1;
2610 break;
2611 }
2612
2613 /*
2614 * Schedule transmission of pending state-change
2615 * report up to RV times for this link. The timer
2616 * will fire at the next mld_timeout (1 second)),
2617 * giving us an opportunity to merge the reports.
2618 *
2619 * If a delay was provided to this function, only
2620 * use this delay if sooner than the existing one.
2621 */
2622 VERIFY(mli->mli_rv > 1);
2623 inm->in6m_scrv = (uint16_t)mli->mli_rv;
2624 if (delay) {
2625 if (inm->in6m_sctimer > 1) {
2626 inm->in6m_sctimer =
2627 MIN(inm->in6m_sctimer, (uint16_t)delay);
2628 } else {
2629 inm->in6m_sctimer = (uint16_t)delay;
2630 }
2631 } else {
2632 inm->in6m_sctimer = 1;
2633 }
2634 mtp->sct = 1;
2635 error = 0;
2636 break;
2637 }
2638 }
2639 MLI_UNLOCK(mli);
2640
2641 /*
2642 * Only update the T0 state if state change is atomic,
2643 * i.e. we don't need to wait for a timer to fire before we
2644 * can consider the state change to have been communicated.
2645 */
2646 if (syncstates) {
2647 in6m_commit(inm);
2648 MLD_PRINTF(("%s: T1 -> T0 for %s/%s\n", __func__,
2649 ip6_sprintf(&inm->in6m_addr),
2650 if_name(inm->in6m_ifp)));
2651 }
2652
2653 return error;
2654 }
2655
2656 /*
2657 * Issue an intermediate state change during the life-cycle.
2658 */
2659 static int
mld_handle_state_change(struct in6_multi * inm,struct mld_ifinfo * mli,struct mld_tparams * mtp)2660 mld_handle_state_change(struct in6_multi *inm, struct mld_ifinfo *mli,
2661 struct mld_tparams *mtp)
2662 {
2663 struct ifnet *ifp;
2664 int retval = 0;
2665
2666 IN6M_LOCK_ASSERT_HELD(inm);
2667 MLI_LOCK_ASSERT_NOTHELD(mli);
2668 VERIFY(mtp != NULL);
2669
2670 MLD_PRINTF(("%s: state change for %s on ifp 0x%llx(%s)\n",
2671 __func__, ip6_sprintf(&inm->in6m_addr),
2672 (uint64_t)VM_KERNEL_ADDRPERM(inm->in6m_ifp),
2673 if_name(inm->in6m_ifp)));
2674
2675 ifp = inm->in6m_ifp;
2676
2677 MLI_LOCK(mli);
2678 VERIFY(mli->mli_ifp == ifp);
2679
2680 if ((ifp->if_flags & IFF_LOOPBACK) ||
2681 (mli->mli_flags & MLIF_SILENT) ||
2682 !mld_is_addr_reported(&inm->in6m_addr) ||
2683 (mli->mli_version != MLD_VERSION_2)) {
2684 MLI_UNLOCK(mli);
2685 if (!mld_is_addr_reported(&inm->in6m_addr)) {
2686 MLD_PRINTF(("%s: not kicking state machine for silent "
2687 "group\n", __func__));
2688 }
2689 MLD_PRINTF(("%s: nothing to do\n", __func__));
2690 in6m_commit(inm);
2691 MLD_PRINTF(("%s: T1 -> T0 for %s/%s\n", __func__,
2692 ip6_sprintf(&inm->in6m_addr),
2693 if_name(inm->in6m_ifp)));
2694 goto done;
2695 }
2696
2697 IF_DRAIN(&inm->in6m_scq);
2698
2699 retval = mld_v2_enqueue_group_record(&inm->in6m_scq, inm, 1, 0, 0,
2700 (mli->mli_flags & MLIF_USEALLOW));
2701 mtp->cst = (inm->in6m_scq.ifq_len > 0);
2702 MLD_PRINTF(("%s: enqueue record = %d\n", __func__, retval));
2703 if (retval <= 0) {
2704 MLI_UNLOCK(mli);
2705 retval *= -1;
2706 goto done;
2707 } else {
2708 retval = 0;
2709 }
2710
2711 /*
2712 * If record(s) were enqueued, start the state-change
2713 * report timer for this group.
2714 */
2715 inm->in6m_scrv = (uint16_t)mli->mli_rv;
2716 inm->in6m_sctimer = 1;
2717 mtp->sct = 1;
2718 MLI_UNLOCK(mli);
2719
2720 done:
2721 return retval;
2722 }
2723
2724 /*
2725 * Perform the final leave for a multicast address.
2726 *
2727 * When leaving a group:
2728 * MLDv1 sends a DONE message, if and only if we are the reporter.
2729 * MLDv2 enqueues a state-change report containing a transition
2730 * to INCLUDE {} for immediate transmission.
2731 */
2732 static void
mld_final_leave(struct in6_multi * inm,struct mld_ifinfo * mli,struct mld_tparams * mtp)2733 mld_final_leave(struct in6_multi *inm, struct mld_ifinfo *mli,
2734 struct mld_tparams *mtp)
2735 {
2736 int syncstates = 1;
2737
2738 IN6M_LOCK_ASSERT_HELD(inm);
2739 MLI_LOCK_ASSERT_NOTHELD(mli);
2740 VERIFY(mtp != NULL);
2741
2742 MLD_PRINTF(("%s: final leave %s on ifp 0x%llx(%s)\n",
2743 __func__, ip6_sprintf(&inm->in6m_addr),
2744 (uint64_t)VM_KERNEL_ADDRPERM(inm->in6m_ifp),
2745 if_name(inm->in6m_ifp)));
2746
2747 switch (inm->in6m_state) {
2748 case MLD_NOT_MEMBER:
2749 case MLD_SILENT_MEMBER:
2750 case MLD_LEAVING_MEMBER:
2751 /* Already leaving or left; do nothing. */
2752 MLD_PRINTF(("%s: not kicking state machine for silent group\n",
2753 __func__));
2754 break;
2755 case MLD_REPORTING_MEMBER:
2756 case MLD_IDLE_MEMBER:
2757 case MLD_G_QUERY_PENDING_MEMBER:
2758 case MLD_SG_QUERY_PENDING_MEMBER:
2759 MLI_LOCK(mli);
2760 if (mli->mli_version == MLD_VERSION_1) {
2761 if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER ||
2762 inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER) {
2763 panic("%s: MLDv2 state reached, not MLDv2 "
2764 "mode\n", __func__);
2765 /* NOTREACHED */
2766 }
2767 /* scheduler timer if enqueue is successful */
2768 mtp->cst = (mld_v1_transmit_report(inm,
2769 MLD_LISTENER_DONE) == 0);
2770
2771 IN6M_LOCK_ASSERT_HELD(inm);
2772 MLI_LOCK_ASSERT_HELD(mli);
2773
2774 inm->in6m_state = MLD_NOT_MEMBER;
2775 } else if (mli->mli_version == MLD_VERSION_2) {
2776 /*
2777 * Stop group timer and all pending reports.
2778 * Immediately enqueue a state-change report
2779 * TO_IN {} to be sent on the next timeout,
2780 * giving us an opportunity to merge reports.
2781 */
2782 IF_DRAIN(&inm->in6m_scq);
2783 inm->in6m_timer = 0;
2784 inm->in6m_scrv = (uint16_t)mli->mli_rv;
2785 MLD_PRINTF(("%s: Leaving %s/%s with %d "
2786 "pending retransmissions.\n", __func__,
2787 ip6_sprintf(&inm->in6m_addr),
2788 if_name(inm->in6m_ifp),
2789 inm->in6m_scrv));
2790 if (inm->in6m_scrv == 0) {
2791 inm->in6m_state = MLD_NOT_MEMBER;
2792 inm->in6m_sctimer = 0;
2793 } else {
2794 int retval;
2795 /*
2796 * Stick around in the in6_multihead list;
2797 * the final detach will be issued by
2798 * mld_v2_process_group_timers() when
2799 * the retransmit timer expires.
2800 */
2801 IN6M_ADDREF_LOCKED(inm);
2802 VERIFY(inm->in6m_debug & IFD_ATTACHED);
2803 inm->in6m_reqcnt++;
2804 VERIFY(inm->in6m_reqcnt >= 1);
2805 inm->in6m_nrelecnt++;
2806 VERIFY(inm->in6m_nrelecnt != 0);
2807
2808 retval = mld_v2_enqueue_group_record(
2809 &inm->in6m_scq, inm, 1, 0, 0,
2810 (mli->mli_flags & MLIF_USEALLOW));
2811 mtp->cst = (inm->in6m_scq.ifq_len > 0);
2812 KASSERT(retval != 0,
2813 ("%s: enqueue record = %d\n", __func__,
2814 retval));
2815
2816 inm->in6m_state = MLD_LEAVING_MEMBER;
2817 inm->in6m_sctimer = 1;
2818 mtp->sct = 1;
2819 syncstates = 0;
2820 }
2821 }
2822 MLI_UNLOCK(mli);
2823 break;
2824 case MLD_LAZY_MEMBER:
2825 case MLD_SLEEPING_MEMBER:
2826 case MLD_AWAKENING_MEMBER:
2827 /* Our reports are suppressed; do nothing. */
2828 break;
2829 }
2830
2831 if (syncstates) {
2832 in6m_commit(inm);
2833 MLD_PRINTF(("%s: T1 -> T0 for %s/%s\n", __func__,
2834 ip6_sprintf(&inm->in6m_addr),
2835 if_name(inm->in6m_ifp)));
2836 inm->in6m_st[1].iss_fmode = MCAST_UNDEFINED;
2837 MLD_PRINTF(("%s: T1 now MCAST_UNDEFINED for 0x%llx/%s\n",
2838 __func__, (uint64_t)VM_KERNEL_ADDRPERM(&inm->in6m_addr),
2839 if_name(inm->in6m_ifp)));
2840 }
2841 }
2842
2843 /*
2844 * Enqueue an MLDv2 group record to the given output queue.
2845 *
2846 * If is_state_change is zero, a current-state record is appended.
2847 * If is_state_change is non-zero, a state-change report is appended.
2848 *
2849 * If is_group_query is non-zero, an mbuf packet chain is allocated.
2850 * If is_group_query is zero, and if there is a packet with free space
2851 * at the tail of the queue, it will be appended to providing there
2852 * is enough free space.
2853 * Otherwise a new mbuf packet chain is allocated.
2854 *
2855 * If is_source_query is non-zero, each source is checked to see if
2856 * it was recorded for a Group-Source query, and will be omitted if
2857 * it is not both in-mode and recorded.
2858 *
2859 * If use_block_allow is non-zero, state change reports for initial join
2860 * and final leave, on an inclusive mode group with a source list, will be
2861 * rewritten to use the ALLOW_NEW and BLOCK_OLD record types, respectively.
2862 *
2863 * The function will attempt to allocate leading space in the packet
2864 * for the IPv6+ICMP headers to be prepended without fragmenting the chain.
2865 *
2866 * If successful the size of all data appended to the queue is returned,
2867 * otherwise an error code less than zero is returned, or zero if
2868 * no record(s) were appended.
2869 */
2870 static int
mld_v2_enqueue_group_record(struct ifqueue * ifq,struct in6_multi * inm,const int is_state_change,const int is_group_query,const int is_source_query,const int use_block_allow)2871 mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm,
2872 const int is_state_change, const int is_group_query,
2873 const int is_source_query, const int use_block_allow)
2874 {
2875 struct mldv2_record mr;
2876 struct mldv2_record *pmr;
2877 struct ifnet *ifp;
2878 struct ip6_msource *ims, *nims;
2879 struct mbuf *m0, *m, *md;
2880 int error, is_filter_list_change;
2881 int minrec0len, m0srcs, msrcs, nbytes, off;
2882 int record_has_sources;
2883 int now;
2884 uint8_t type;
2885 uint8_t mode;
2886
2887 IN6M_LOCK_ASSERT_HELD(inm);
2888 MLI_LOCK_ASSERT_HELD(inm->in6m_mli);
2889
2890 error = 0;
2891 ifp = inm->in6m_ifp;
2892 is_filter_list_change = 0;
2893 m = NULL;
2894 m0 = NULL;
2895 m0srcs = 0;
2896 msrcs = 0;
2897 nbytes = 0;
2898 nims = NULL;
2899 record_has_sources = 1;
2900 pmr = NULL;
2901 type = MLD_DO_NOTHING;
2902 mode = (uint8_t)inm->in6m_st[1].iss_fmode;
2903
2904 /*
2905 * If we did not transition out of ASM mode during t0->t1,
2906 * and there are no source nodes to process, we can skip
2907 * the generation of source records.
2908 */
2909 if (inm->in6m_st[0].iss_asm > 0 && inm->in6m_st[1].iss_asm > 0 &&
2910 inm->in6m_nsrc == 0) {
2911 record_has_sources = 0;
2912 }
2913
2914 if (is_state_change) {
2915 /*
2916 * Queue a state change record.
2917 * If the mode did not change, and there are non-ASM
2918 * listeners or source filters present,
2919 * we potentially need to issue two records for the group.
2920 * If there are ASM listeners, and there was no filter
2921 * mode transition of any kind, do nothing.
2922 *
2923 * If we are transitioning to MCAST_UNDEFINED, we need
2924 * not send any sources. A transition to/from this state is
2925 * considered inclusive with some special treatment.
2926 *
2927 * If we are rewriting initial joins/leaves to use
2928 * ALLOW/BLOCK, and the group's membership is inclusive,
2929 * we need to send sources in all cases.
2930 */
2931 if (mode != inm->in6m_st[0].iss_fmode) {
2932 if (mode == MCAST_EXCLUDE) {
2933 MLD_PRINTF(("%s: change to EXCLUDE\n",
2934 __func__));
2935 type = MLD_CHANGE_TO_EXCLUDE_MODE;
2936 } else {
2937 MLD_PRINTF(("%s: change to INCLUDE\n",
2938 __func__));
2939 if (use_block_allow) {
2940 /*
2941 * XXX
2942 * Here we're interested in state
2943 * edges either direction between
2944 * MCAST_UNDEFINED and MCAST_INCLUDE.
2945 * Perhaps we should just check
2946 * the group state, rather than
2947 * the filter mode.
2948 */
2949 if (mode == MCAST_UNDEFINED) {
2950 type = MLD_BLOCK_OLD_SOURCES;
2951 } else {
2952 type = MLD_ALLOW_NEW_SOURCES;
2953 }
2954 } else {
2955 type = MLD_CHANGE_TO_INCLUDE_MODE;
2956 if (mode == MCAST_UNDEFINED) {
2957 record_has_sources = 0;
2958 }
2959 }
2960 }
2961 } else {
2962 if (record_has_sources) {
2963 is_filter_list_change = 1;
2964 } else {
2965 type = MLD_DO_NOTHING;
2966 }
2967 }
2968 } else {
2969 /*
2970 * Queue a current state record.
2971 */
2972 if (mode == MCAST_EXCLUDE) {
2973 type = MLD_MODE_IS_EXCLUDE;
2974 } else if (mode == MCAST_INCLUDE) {
2975 type = MLD_MODE_IS_INCLUDE;
2976 VERIFY(inm->in6m_st[1].iss_asm == 0);
2977 }
2978 }
2979
2980 /*
2981 * Generate the filter list changes using a separate function.
2982 */
2983 if (is_filter_list_change) {
2984 return mld_v2_enqueue_filter_change(ifq, inm);
2985 }
2986
2987 if (type == MLD_DO_NOTHING) {
2988 MLD_PRINTF(("%s: nothing to do for %s/%s\n",
2989 __func__, ip6_sprintf(&inm->in6m_addr),
2990 if_name(inm->in6m_ifp)));
2991 return 0;
2992 }
2993
2994 /*
2995 * If any sources are present, we must be able to fit at least
2996 * one in the trailing space of the tail packet's mbuf,
2997 * ideally more.
2998 */
2999 minrec0len = sizeof(struct mldv2_record);
3000 if (record_has_sources) {
3001 minrec0len += sizeof(struct in6_addr);
3002 }
3003 MLD_PRINTF(("%s: queueing %s for %s/%s\n", __func__,
3004 mld_rec_type_to_str(type),
3005 ip6_sprintf(&inm->in6m_addr),
3006 if_name(inm->in6m_ifp)));
3007
3008 /*
3009 * Check if we have a packet in the tail of the queue for this
3010 * group into which the first group record for this group will fit.
3011 * Otherwise allocate a new packet.
3012 * Always allocate leading space for IP6+RA+ICMPV6+REPORT.
3013 * Note: Group records for G/GSR query responses MUST be sent
3014 * in their own packet.
3015 */
3016 m0 = ifq->ifq_tail;
3017 if (!is_group_query &&
3018 m0 != NULL &&
3019 (m0->m_pkthdr.vt_nrecs + 1 <= MLD_V2_REPORT_MAXRECS) &&
3020 (m0->m_pkthdr.len + minrec0len) <
3021 (ifp->if_mtu - MLD_MTUSPACE)) {
3022 m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
3023 sizeof(struct mldv2_record)) /
3024 sizeof(struct in6_addr);
3025 m = m0;
3026 MLD_PRINTF(("%s: use existing packet\n", __func__));
3027 } else {
3028 if (IF_QFULL(ifq)) {
3029 MLD_PRINTF(("%s: outbound queue full\n", __func__));
3030 return -ENOMEM;
3031 }
3032 m = NULL;
3033 m0srcs = (ifp->if_mtu - MLD_MTUSPACE -
3034 sizeof(struct mldv2_record)) / sizeof(struct in6_addr);
3035 if (!is_state_change && !is_group_query) {
3036 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3037 }
3038 if (m == NULL) {
3039 m = m_gethdr(M_DONTWAIT, MT_DATA);
3040 }
3041 if (m == NULL) {
3042 return -ENOMEM;
3043 }
3044
3045 mld_save_context(m, ifp);
3046
3047 MLD_PRINTF(("%s: allocated first packet\n", __func__));
3048 }
3049
3050 /*
3051 * Append group record.
3052 * If we have sources, we don't know how many yet.
3053 */
3054 mr.mr_type = type;
3055 mr.mr_datalen = 0;
3056 mr.mr_numsrc = 0;
3057 mr.mr_addr = inm->in6m_addr;
3058 in6_clearscope(&mr.mr_addr);
3059 if (!m_append(m, sizeof(struct mldv2_record), (void *)&mr)) {
3060 if (m != m0) {
3061 m_freem(m);
3062 }
3063 MLD_PRINTF(("%s: m_append() failed.\n", __func__));
3064 return -ENOMEM;
3065 }
3066 nbytes += sizeof(struct mldv2_record);
3067
3068 /*
3069 * Append as many sources as will fit in the first packet.
3070 * If we are appending to a new packet, the chain allocation
3071 * may potentially use clusters; use m_getptr() in this case.
3072 * If we are appending to an existing packet, we need to obtain
3073 * a pointer to the group record after m_append(), in case a new
3074 * mbuf was allocated.
3075 *
3076 * Only append sources which are in-mode at t1. If we are
3077 * transitioning to MCAST_UNDEFINED state on the group, and
3078 * use_block_allow is zero, do not include source entries.
3079 * Otherwise, we need to include this source in the report.
3080 *
3081 * Only report recorded sources in our filter set when responding
3082 * to a group-source query.
3083 */
3084 if (record_has_sources) {
3085 if (m == m0) {
3086 md = m_last(m);
3087 pmr = (struct mldv2_record *)(mtod(md, uint8_t *) +
3088 md->m_len - nbytes);
3089 } else {
3090 md = m_getptr(m, 0, &off);
3091 pmr = (struct mldv2_record *)(mtod(md, uint8_t *) +
3092 off);
3093 }
3094 msrcs = 0;
3095 RB_FOREACH_SAFE(ims, ip6_msource_tree, &inm->in6m_srcs,
3096 nims) {
3097 MLD_PRINTF(("%s: visit node %s\n", __func__,
3098 ip6_sprintf(&ims->im6s_addr)));
3099 now = im6s_get_mode(inm, ims, 1);
3100 MLD_PRINTF(("%s: node is %d\n", __func__, now));
3101 if ((now != mode) ||
3102 (now == mode &&
3103 (!use_block_allow && mode == MCAST_UNDEFINED))) {
3104 MLD_PRINTF(("%s: skip node\n", __func__));
3105 continue;
3106 }
3107 if (is_source_query && ims->im6s_stp == 0) {
3108 MLD_PRINTF(("%s: skip unrecorded node\n",
3109 __func__));
3110 continue;
3111 }
3112 MLD_PRINTF(("%s: append node\n", __func__));
3113 if (!m_append(m, sizeof(struct in6_addr),
3114 (void *)&ims->im6s_addr)) {
3115 if (m != m0) {
3116 m_freem(m);
3117 }
3118 MLD_PRINTF(("%s: m_append() failed.\n",
3119 __func__));
3120 return -ENOMEM;
3121 }
3122 nbytes += sizeof(struct in6_addr);
3123 ++msrcs;
3124 if (msrcs == m0srcs) {
3125 break;
3126 }
3127 }
3128 MLD_PRINTF(("%s: msrcs is %d this packet\n", __func__,
3129 msrcs));
3130 pmr->mr_numsrc = htons((uint16_t)msrcs);
3131 nbytes += (msrcs * sizeof(struct in6_addr));
3132 }
3133
3134 if (is_source_query && msrcs == 0) {
3135 MLD_PRINTF(("%s: no recorded sources to report\n", __func__));
3136 if (m != m0) {
3137 m_freem(m);
3138 }
3139 return 0;
3140 }
3141
3142 /*
3143 * We are good to go with first packet.
3144 */
3145 if (m != m0) {
3146 MLD_PRINTF(("%s: enqueueing first packet\n", __func__));
3147 m->m_pkthdr.vt_nrecs = 1;
3148 IF_ENQUEUE(ifq, m);
3149 } else {
3150 m->m_pkthdr.vt_nrecs++;
3151 }
3152 /*
3153 * No further work needed if no source list in packet(s).
3154 */
3155 if (!record_has_sources) {
3156 return nbytes;
3157 }
3158
3159 /*
3160 * Whilst sources remain to be announced, we need to allocate
3161 * a new packet and fill out as many sources as will fit.
3162 * Always try for a cluster first.
3163 */
3164 while (nims != NULL) {
3165 if (IF_QFULL(ifq)) {
3166 MLD_PRINTF(("%s: outbound queue full\n", __func__));
3167 return -ENOMEM;
3168 }
3169 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3170 if (m == NULL) {
3171 m = m_gethdr(M_DONTWAIT, MT_DATA);
3172 }
3173 if (m == NULL) {
3174 return -ENOMEM;
3175 }
3176 mld_save_context(m, ifp);
3177 md = m_getptr(m, 0, &off);
3178 pmr = (struct mldv2_record *)(mtod(md, uint8_t *) + off);
3179 MLD_PRINTF(("%s: allocated next packet\n", __func__));
3180
3181 if (!m_append(m, sizeof(struct mldv2_record), (void *)&mr)) {
3182 if (m != m0) {
3183 m_freem(m);
3184 }
3185 MLD_PRINTF(("%s: m_append() failed.\n", __func__));
3186 return -ENOMEM;
3187 }
3188 m->m_pkthdr.vt_nrecs = 1;
3189 nbytes += sizeof(struct mldv2_record);
3190
3191 m0srcs = (ifp->if_mtu - MLD_MTUSPACE -
3192 sizeof(struct mldv2_record)) / sizeof(struct in6_addr);
3193
3194 msrcs = 0;
3195 RB_FOREACH_FROM(ims, ip6_msource_tree, nims) {
3196 MLD_PRINTF(("%s: visit node %s\n",
3197 __func__, ip6_sprintf(&ims->im6s_addr)));
3198 now = im6s_get_mode(inm, ims, 1);
3199 if ((now != mode) ||
3200 (now == mode &&
3201 (!use_block_allow && mode == MCAST_UNDEFINED))) {
3202 MLD_PRINTF(("%s: skip node\n", __func__));
3203 continue;
3204 }
3205 if (is_source_query && ims->im6s_stp == 0) {
3206 MLD_PRINTF(("%s: skip unrecorded node\n",
3207 __func__));
3208 continue;
3209 }
3210 MLD_PRINTF(("%s: append node\n", __func__));
3211 if (!m_append(m, sizeof(struct in6_addr),
3212 (void *)&ims->im6s_addr)) {
3213 if (m != m0) {
3214 m_freem(m);
3215 }
3216 MLD_PRINTF(("%s: m_append() failed.\n",
3217 __func__));
3218 return -ENOMEM;
3219 }
3220 ++msrcs;
3221 if (msrcs == m0srcs) {
3222 break;
3223 }
3224 }
3225 pmr->mr_numsrc = htons((uint16_t)msrcs);
3226 nbytes += (msrcs * sizeof(struct in6_addr));
3227
3228 MLD_PRINTF(("%s: enqueueing next packet\n", __func__));
3229 IF_ENQUEUE(ifq, m);
3230 }
3231
3232 return nbytes;
3233 }
3234
3235 /*
3236 * Type used to mark record pass completion.
3237 * We exploit the fact we can cast to this easily from the
3238 * current filter modes on each ip_msource node.
3239 */
3240 typedef enum {
3241 REC_NONE = 0x00, /* MCAST_UNDEFINED */
3242 REC_ALLOW = 0x01, /* MCAST_INCLUDE */
3243 REC_BLOCK = 0x02, /* MCAST_EXCLUDE */
3244 REC_FULL = REC_ALLOW | REC_BLOCK
3245 } rectype_t;
3246
3247 /*
3248 * Enqueue an MLDv2 filter list change to the given output queue.
3249 *
3250 * Source list filter state is held in an RB-tree. When the filter list
3251 * for a group is changed without changing its mode, we need to compute
3252 * the deltas between T0 and T1 for each source in the filter set,
3253 * and enqueue the appropriate ALLOW_NEW/BLOCK_OLD records.
3254 *
3255 * As we may potentially queue two record types, and the entire R-B tree
3256 * needs to be walked at once, we break this out into its own function
3257 * so we can generate a tightly packed queue of packets.
3258 *
3259 * XXX This could be written to only use one tree walk, although that makes
3260 * serializing into the mbuf chains a bit harder. For now we do two walks
3261 * which makes things easier on us, and it may or may not be harder on
3262 * the L2 cache.
3263 *
3264 * If successful the size of all data appended to the queue is returned,
3265 * otherwise an error code less than zero is returned, or zero if
3266 * no record(s) were appended.
3267 */
3268 static int
mld_v2_enqueue_filter_change(struct ifqueue * ifq,struct in6_multi * inm)3269 mld_v2_enqueue_filter_change(struct ifqueue *ifq, struct in6_multi *inm)
3270 {
3271 static const int MINRECLEN =
3272 sizeof(struct mldv2_record) + sizeof(struct in6_addr);
3273 struct ifnet *ifp;
3274 struct mldv2_record mr;
3275 struct mldv2_record *pmr;
3276 struct ip6_msource *ims, *nims;
3277 struct mbuf *m, *m0, *md;
3278 int m0srcs, nbytes, npbytes, off, rsrcs, schanged;
3279 int nallow, nblock;
3280 uint8_t mode, now, then;
3281 rectype_t crt, drt, nrt;
3282
3283 IN6M_LOCK_ASSERT_HELD(inm);
3284
3285 if (inm->in6m_nsrc == 0 ||
3286 (inm->in6m_st[0].iss_asm > 0 && inm->in6m_st[1].iss_asm > 0)) {
3287 return 0;
3288 }
3289
3290 ifp = inm->in6m_ifp; /* interface */
3291 mode = (uint8_t)inm->in6m_st[1].iss_fmode; /* filter mode at t1 */
3292 crt = REC_NONE; /* current group record type */
3293 drt = REC_NONE; /* mask of completed group record types */
3294 nrt = REC_NONE; /* record type for current node */
3295 m0srcs = 0; /* # source which will fit in current mbuf chain */
3296 npbytes = 0; /* # of bytes appended this packet */
3297 nbytes = 0; /* # of bytes appended to group's state-change queue */
3298 rsrcs = 0; /* # sources encoded in current record */
3299 schanged = 0; /* # nodes encoded in overall filter change */
3300 nallow = 0; /* # of source entries in ALLOW_NEW */
3301 nblock = 0; /* # of source entries in BLOCK_OLD */
3302 nims = NULL; /* next tree node pointer */
3303
3304 /*
3305 * For each possible filter record mode.
3306 * The first kind of source we encounter tells us which
3307 * is the first kind of record we start appending.
3308 * If a node transitioned to UNDEFINED at t1, its mode is treated
3309 * as the inverse of the group's filter mode.
3310 */
3311 while (drt != REC_FULL) {
3312 do {
3313 m0 = ifq->ifq_tail;
3314 if (m0 != NULL &&
3315 (m0->m_pkthdr.vt_nrecs + 1 <=
3316 MLD_V2_REPORT_MAXRECS) &&
3317 (m0->m_pkthdr.len + MINRECLEN) <
3318 (ifp->if_mtu - MLD_MTUSPACE)) {
3319 m = m0;
3320 m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
3321 sizeof(struct mldv2_record)) /
3322 sizeof(struct in6_addr);
3323 MLD_PRINTF(("%s: use previous packet\n",
3324 __func__));
3325 } else {
3326 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3327 if (m == NULL) {
3328 m = m_gethdr(M_DONTWAIT, MT_DATA);
3329 }
3330 if (m == NULL) {
3331 MLD_PRINTF(("%s: m_get*() failed\n",
3332 __func__));
3333 return -ENOMEM;
3334 }
3335 m->m_pkthdr.vt_nrecs = 0;
3336 mld_save_context(m, ifp);
3337 m0srcs = (ifp->if_mtu - MLD_MTUSPACE -
3338 sizeof(struct mldv2_record)) /
3339 sizeof(struct in6_addr);
3340 npbytes = 0;
3341 MLD_PRINTF(("%s: allocated new packet\n",
3342 __func__));
3343 }
3344 /*
3345 * Append the MLD group record header to the
3346 * current packet's data area.
3347 * Recalculate pointer to free space for next
3348 * group record, in case m_append() allocated
3349 * a new mbuf or cluster.
3350 */
3351 memset(&mr, 0, sizeof(mr));
3352 mr.mr_addr = inm->in6m_addr;
3353 in6_clearscope(&mr.mr_addr);
3354 if (!m_append(m, sizeof(mr), (void *)&mr)) {
3355 if (m != m0) {
3356 m_freem(m);
3357 }
3358 MLD_PRINTF(("%s: m_append() failed\n",
3359 __func__));
3360 return -ENOMEM;
3361 }
3362 npbytes += sizeof(struct mldv2_record);
3363 if (m != m0) {
3364 /* new packet; offset in chain */
3365 md = m_getptr(m, npbytes -
3366 sizeof(struct mldv2_record), &off);
3367 pmr = (struct mldv2_record *)(mtod(md,
3368 uint8_t *) + off);
3369 } else {
3370 /* current packet; offset from last append */
3371 md = m_last(m);
3372 pmr = (struct mldv2_record *)(mtod(md,
3373 uint8_t *) + md->m_len -
3374 sizeof(struct mldv2_record));
3375 }
3376 /*
3377 * Begin walking the tree for this record type
3378 * pass, or continue from where we left off
3379 * previously if we had to allocate a new packet.
3380 * Only report deltas in-mode at t1.
3381 * We need not report included sources as allowed
3382 * if we are in inclusive mode on the group,
3383 * however the converse is not true.
3384 */
3385 rsrcs = 0;
3386 if (nims == NULL) {
3387 nims = RB_MIN(ip6_msource_tree,
3388 &inm->in6m_srcs);
3389 }
3390 RB_FOREACH_FROM(ims, ip6_msource_tree, nims) {
3391 MLD_PRINTF(("%s: visit node %s\n", __func__,
3392 ip6_sprintf(&ims->im6s_addr)));
3393 now = im6s_get_mode(inm, ims, 1);
3394 then = im6s_get_mode(inm, ims, 0);
3395 MLD_PRINTF(("%s: mode: t0 %d, t1 %d\n",
3396 __func__, then, now));
3397 if (now == then) {
3398 MLD_PRINTF(("%s: skip unchanged\n",
3399 __func__));
3400 continue;
3401 }
3402 if (mode == MCAST_EXCLUDE &&
3403 now == MCAST_INCLUDE) {
3404 MLD_PRINTF(("%s: skip IN src on EX "
3405 "group\n", __func__));
3406 continue;
3407 }
3408 nrt = (rectype_t)now;
3409 if (nrt == REC_NONE) {
3410 nrt = (rectype_t)(~mode & REC_FULL);
3411 }
3412 if (schanged++ == 0) {
3413 crt = nrt;
3414 } else if (crt != nrt) {
3415 continue;
3416 }
3417 if (!m_append(m, sizeof(struct in6_addr),
3418 (void *)&ims->im6s_addr)) {
3419 if (m != m0) {
3420 m_freem(m);
3421 }
3422 MLD_PRINTF(("%s: m_append() failed\n",
3423 __func__));
3424 return -ENOMEM;
3425 }
3426 nallow += !!(crt == REC_ALLOW);
3427 nblock += !!(crt == REC_BLOCK);
3428 if (++rsrcs == m0srcs) {
3429 break;
3430 }
3431 }
3432 /*
3433 * If we did not append any tree nodes on this
3434 * pass, back out of allocations.
3435 */
3436 if (rsrcs == 0) {
3437 npbytes -= sizeof(struct mldv2_record);
3438 if (m != m0) {
3439 MLD_PRINTF(("%s: m_free(m)\n",
3440 __func__));
3441 m_freem(m);
3442 } else {
3443 MLD_PRINTF(("%s: m_adj(m, -mr)\n",
3444 __func__));
3445 m_adj(m, -((int)sizeof(
3446 struct mldv2_record)));
3447 }
3448 continue;
3449 }
3450 npbytes += (rsrcs * sizeof(struct in6_addr));
3451 if (crt == REC_ALLOW) {
3452 pmr->mr_type = MLD_ALLOW_NEW_SOURCES;
3453 } else if (crt == REC_BLOCK) {
3454 pmr->mr_type = MLD_BLOCK_OLD_SOURCES;
3455 }
3456 pmr->mr_numsrc = htons((uint16_t)rsrcs);
3457 /*
3458 * Count the new group record, and enqueue this
3459 * packet if it wasn't already queued.
3460 */
3461 m->m_pkthdr.vt_nrecs++;
3462 if (m != m0) {
3463 IF_ENQUEUE(ifq, m);
3464 }
3465 nbytes += npbytes;
3466 } while (nims != NULL);
3467 drt |= crt;
3468 crt = (~crt & REC_FULL);
3469 }
3470
3471 MLD_PRINTF(("%s: queued %d ALLOW_NEW, %d BLOCK_OLD\n", __func__,
3472 nallow, nblock));
3473
3474 return nbytes;
3475 }
3476
3477 static int
mld_v2_merge_state_changes(struct in6_multi * inm,struct ifqueue * ifscq)3478 mld_v2_merge_state_changes(struct in6_multi *inm, struct ifqueue *ifscq)
3479 {
3480 struct ifqueue *gq;
3481 struct mbuf *m; /* pending state-change */
3482 struct mbuf *m0; /* copy of pending state-change */
3483 struct mbuf *mt; /* last state-change in packet */
3484 struct mbuf *n;
3485 int docopy, domerge;
3486 u_int recslen;
3487
3488 IN6M_LOCK_ASSERT_HELD(inm);
3489
3490 docopy = 0;
3491 domerge = 0;
3492 recslen = 0;
3493
3494 /*
3495 * If there are further pending retransmissions, make a writable
3496 * copy of each queued state-change message before merging.
3497 */
3498 if (inm->in6m_scrv > 0) {
3499 docopy = 1;
3500 }
3501
3502 gq = &inm->in6m_scq;
3503 #ifdef MLD_DEBUG
3504 if (gq->ifq_head == NULL) {
3505 MLD_PRINTF(("%s: WARNING: queue for inm 0x%llx is empty\n",
3506 __func__, (uint64_t)VM_KERNEL_ADDRPERM(inm)));
3507 }
3508 #endif
3509
3510 /*
3511 * Use IF_REMQUEUE() instead of IF_DEQUEUE() below, since the
3512 * packet might not always be at the head of the ifqueue.
3513 */
3514 m = gq->ifq_head;
3515 while (m != NULL) {
3516 /*
3517 * Only merge the report into the current packet if
3518 * there is sufficient space to do so; an MLDv2 report
3519 * packet may only contain 65,535 group records.
3520 * Always use a simple mbuf chain concatentation to do this,
3521 * as large state changes for single groups may have
3522 * allocated clusters.
3523 */
3524 domerge = 0;
3525 mt = ifscq->ifq_tail;
3526 if (mt != NULL) {
3527 recslen = m_length(m);
3528
3529 if ((mt->m_pkthdr.vt_nrecs +
3530 m->m_pkthdr.vt_nrecs <=
3531 MLD_V2_REPORT_MAXRECS) &&
3532 (mt->m_pkthdr.len + recslen <=
3533 (inm->in6m_ifp->if_mtu - MLD_MTUSPACE))) {
3534 domerge = 1;
3535 }
3536 }
3537
3538 if (!domerge && IF_QFULL(gq)) {
3539 MLD_PRINTF(("%s: outbound queue full, skipping whole "
3540 "packet 0x%llx\n", __func__,
3541 (uint64_t)VM_KERNEL_ADDRPERM(m)));
3542 n = m->m_nextpkt;
3543 if (!docopy) {
3544 IF_REMQUEUE(gq, m);
3545 m_freem(m);
3546 }
3547 m = n;
3548 continue;
3549 }
3550
3551 if (!docopy) {
3552 MLD_PRINTF(("%s: dequeueing 0x%llx\n", __func__,
3553 (uint64_t)VM_KERNEL_ADDRPERM(m)));
3554 n = m->m_nextpkt;
3555 IF_REMQUEUE(gq, m);
3556 m0 = m;
3557 m = n;
3558 } else {
3559 MLD_PRINTF(("%s: copying 0x%llx\n", __func__,
3560 (uint64_t)VM_KERNEL_ADDRPERM(m)));
3561 m0 = m_dup(m, M_NOWAIT);
3562 if (m0 == NULL) {
3563 return ENOMEM;
3564 }
3565 m0->m_nextpkt = NULL;
3566 m = m->m_nextpkt;
3567 }
3568
3569 if (!domerge) {
3570 MLD_PRINTF(("%s: queueing 0x%llx to ifscq 0x%llx)\n",
3571 __func__, (uint64_t)VM_KERNEL_ADDRPERM(m0),
3572 (uint64_t)VM_KERNEL_ADDRPERM(ifscq)));
3573 IF_ENQUEUE(ifscq, m0);
3574 } else {
3575 struct mbuf *mtl; /* last mbuf of packet mt */
3576
3577 MLD_PRINTF(("%s: merging 0x%llx with ifscq tail "
3578 "0x%llx)\n", __func__,
3579 (uint64_t)VM_KERNEL_ADDRPERM(m0),
3580 (uint64_t)VM_KERNEL_ADDRPERM(mt)));
3581
3582 mtl = m_last(mt);
3583 m0->m_flags &= ~M_PKTHDR;
3584 mt->m_pkthdr.len += recslen;
3585 mt->m_pkthdr.vt_nrecs +=
3586 m0->m_pkthdr.vt_nrecs;
3587
3588 mtl->m_next = m0;
3589 }
3590 }
3591
3592 return 0;
3593 }
3594
3595 /*
3596 * Respond to a pending MLDv2 General Query.
3597 */
3598 static uint32_t
mld_v2_dispatch_general_query(struct mld_ifinfo * mli)3599 mld_v2_dispatch_general_query(struct mld_ifinfo *mli)
3600 {
3601 struct ifnet *ifp;
3602 struct in6_multi *inm;
3603 struct in6_multistep step;
3604 int retval;
3605
3606 MLI_LOCK_ASSERT_HELD(mli);
3607
3608 VERIFY(mli->mli_version == MLD_VERSION_2);
3609
3610 ifp = mli->mli_ifp;
3611 MLI_UNLOCK(mli);
3612
3613 in6_multihead_lock_shared();
3614 IN6_FIRST_MULTI(step, inm);
3615 while (inm != NULL) {
3616 IN6M_LOCK(inm);
3617 if (inm->in6m_ifp != ifp) {
3618 goto next;
3619 }
3620
3621 switch (inm->in6m_state) {
3622 case MLD_NOT_MEMBER:
3623 case MLD_SILENT_MEMBER:
3624 break;
3625 case MLD_REPORTING_MEMBER:
3626 case MLD_IDLE_MEMBER:
3627 case MLD_LAZY_MEMBER:
3628 case MLD_SLEEPING_MEMBER:
3629 case MLD_AWAKENING_MEMBER:
3630 inm->in6m_state = MLD_REPORTING_MEMBER;
3631 MLI_LOCK(mli);
3632 retval = mld_v2_enqueue_group_record(&mli->mli_gq,
3633 inm, 0, 0, 0, 0);
3634 MLI_UNLOCK(mli);
3635 MLD_PRINTF(("%s: enqueue record = %d\n",
3636 __func__, retval));
3637 break;
3638 case MLD_G_QUERY_PENDING_MEMBER:
3639 case MLD_SG_QUERY_PENDING_MEMBER:
3640 case MLD_LEAVING_MEMBER:
3641 break;
3642 }
3643 next:
3644 IN6M_UNLOCK(inm);
3645 IN6_NEXT_MULTI(step, inm);
3646 }
3647 in6_multihead_lock_done();
3648
3649 MLI_LOCK(mli);
3650 mld_dispatch_queue_locked(mli, &mli->mli_gq, MLD_MAX_RESPONSE_BURST);
3651 MLI_LOCK_ASSERT_HELD(mli);
3652
3653 /*
3654 * Slew transmission of bursts over 1 second intervals.
3655 */
3656 if (mli->mli_gq.ifq_head != NULL) {
3657 mli->mli_v2_timer = 1 + MLD_RANDOM_DELAY(
3658 MLD_RESPONSE_BURST_INTERVAL);
3659 }
3660
3661 return mli->mli_v2_timer;
3662 }
3663
3664 /*
3665 * Transmit the next pending message in the output queue.
3666 *
3667 * Must not be called with in6m_lockm or mli_lock held.
3668 */
3669 static void
mld_dispatch_packet(struct mbuf * m)3670 mld_dispatch_packet(struct mbuf *m)
3671 {
3672 struct ip6_moptions *im6o;
3673 struct ifnet *ifp;
3674 struct ifnet *oifp = NULL;
3675 struct mbuf *m0;
3676 struct mbuf *md;
3677 struct ip6_hdr *ip6;
3678 struct mld_hdr *mld;
3679 int error;
3680 int off;
3681 int type;
3682
3683 MLD_PRINTF(("%s: transmit 0x%llx\n", __func__,
3684 (uint64_t)VM_KERNEL_ADDRPERM(m)));
3685
3686 /*
3687 * Check if the ifnet is still attached.
3688 */
3689 ifp = mld_restore_context(m);
3690 if (ifp == NULL || !ifnet_is_attached(ifp, 0)) {
3691 MLD_PRINTF(("%s: dropped 0x%llx as ifindex %u went away.\n",
3692 __func__, (uint64_t)VM_KERNEL_ADDRPERM(m),
3693 (u_int)if_index));
3694 m_freem(m);
3695 ip6stat.ip6s_noroute++;
3696 return;
3697 }
3698
3699 im6o = ip6_allocmoptions(Z_WAITOK);
3700 if (im6o == NULL) {
3701 m_freem(m);
3702 return;
3703 }
3704
3705 im6o->im6o_multicast_hlim = 1;
3706 im6o->im6o_multicast_loop = 0;
3707 im6o->im6o_multicast_ifp = ifp;
3708
3709 if (m->m_flags & M_MLDV1) {
3710 m0 = m;
3711 } else {
3712 m0 = mld_v2_encap_report(ifp, m);
3713 if (m0 == NULL) {
3714 MLD_PRINTF(("%s: dropped 0x%llx\n", __func__,
3715 (uint64_t)VM_KERNEL_ADDRPERM(m)));
3716 /*
3717 * mld_v2_encap_report() has already freed our mbuf.
3718 */
3719 IM6O_REMREF(im6o);
3720 ip6stat.ip6s_odropped++;
3721 return;
3722 }
3723 }
3724
3725 mld_scrub_context(m0);
3726 m->m_flags &= ~(M_PROTOFLAGS);
3727 m0->m_pkthdr.rcvif = lo_ifp;
3728
3729 ip6 = mtod(m0, struct ip6_hdr *);
3730 (void)in6_setscope(&ip6->ip6_dst, ifp, NULL);
3731 ip6_output_setdstifscope(m0, ifp->if_index, NULL);
3732 /*
3733 * Retrieve the ICMPv6 type before handoff to ip6_output(),
3734 * so we can bump the stats.
3735 */
3736 md = m_getptr(m0, sizeof(struct ip6_hdr), &off);
3737 mld = (struct mld_hdr *)(mtod(md, uint8_t *) + off);
3738 type = mld->mld_type;
3739
3740 if (ifp->if_eflags & IFEF_TXSTART) {
3741 /*
3742 * Use control service class if the outgoing
3743 * interface supports transmit-start model.
3744 */
3745 (void) m_set_service_class(m0, MBUF_SC_CTL);
3746 }
3747
3748 error = ip6_output(m0, &mld_po, NULL, IPV6_UNSPECSRC, im6o,
3749 &oifp, NULL);
3750
3751 IM6O_REMREF(im6o);
3752
3753 if (error) {
3754 MLD_PRINTF(("%s: ip6_output(0x%llx) = %d\n", __func__,
3755 (uint64_t)VM_KERNEL_ADDRPERM(m0), error));
3756 if (oifp != NULL) {
3757 ifnet_release(oifp);
3758 }
3759 return;
3760 }
3761
3762 icmp6stat.icp6s_outhist[type]++;
3763 if (oifp != NULL) {
3764 icmp6_ifstat_inc(oifp, ifs6_out_msg);
3765 switch (type) {
3766 case MLD_LISTENER_REPORT:
3767 case MLDV2_LISTENER_REPORT:
3768 icmp6_ifstat_inc(oifp, ifs6_out_mldreport);
3769 break;
3770 case MLD_LISTENER_DONE:
3771 icmp6_ifstat_inc(oifp, ifs6_out_mlddone);
3772 break;
3773 }
3774 ifnet_release(oifp);
3775 }
3776 }
3777
3778 /*
3779 * Encapsulate an MLDv2 report.
3780 *
3781 * KAME IPv6 requires that hop-by-hop options be passed separately,
3782 * and that the IPv6 header be prepended in a separate mbuf.
3783 *
3784 * Returns a pointer to the new mbuf chain head, or NULL if the
3785 * allocation failed.
3786 */
3787 static struct mbuf *
mld_v2_encap_report(struct ifnet * ifp,struct mbuf * m)3788 mld_v2_encap_report(struct ifnet *ifp, struct mbuf *m)
3789 {
3790 struct mbuf *mh;
3791 struct mldv2_report *mld;
3792 struct ip6_hdr *ip6;
3793 struct in6_ifaddr *ia;
3794 int mldreclen;
3795
3796 VERIFY(m->m_flags & M_PKTHDR);
3797
3798 /*
3799 * RFC3590: OK to send as :: or tentative during DAD.
3800 */
3801 ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY | IN6_IFF_ANYCAST);
3802 if (ia == NULL) {
3803 MLD_PRINTF(("%s: warning: ia is NULL\n", __func__));
3804 }
3805
3806 MGETHDR(mh, M_DONTWAIT, MT_HEADER);
3807 if (mh == NULL) {
3808 if (ia != NULL) {
3809 IFA_REMREF(&ia->ia_ifa);
3810 }
3811 m_freem(m);
3812 return NULL;
3813 }
3814 MH_ALIGN(mh, sizeof(struct ip6_hdr) + sizeof(struct mldv2_report));
3815
3816 mldreclen = m_length(m);
3817 MLD_PRINTF(("%s: mldreclen is %d\n", __func__, mldreclen));
3818
3819 mh->m_len = sizeof(struct ip6_hdr) + sizeof(struct mldv2_report);
3820 mh->m_pkthdr.len = sizeof(struct ip6_hdr) +
3821 sizeof(struct mldv2_report) + mldreclen;
3822
3823 ip6 = mtod(mh, struct ip6_hdr *);
3824 ip6->ip6_flow = 0;
3825 ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
3826 ip6->ip6_vfc |= IPV6_VERSION;
3827 ip6->ip6_nxt = IPPROTO_ICMPV6;
3828 if (ia != NULL) {
3829 IFA_LOCK(&ia->ia_ifa);
3830 }
3831 ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any;
3832 ip6_output_setsrcifscope(mh, IFSCOPE_NONE, ia);
3833
3834 if (ia != NULL) {
3835 IFA_UNLOCK(&ia->ia_ifa);
3836 IFA_REMREF(&ia->ia_ifa);
3837 ia = NULL;
3838 }
3839 ip6->ip6_dst = in6addr_linklocal_allv2routers;
3840 ip6_output_setdstifscope(mh, ifp->if_index, NULL);
3841 /* scope ID will be set in netisr */
3842
3843 mld = (struct mldv2_report *)(ip6 + 1);
3844 mld->mld_type = MLDV2_LISTENER_REPORT;
3845 mld->mld_code = 0;
3846 mld->mld_cksum = 0;
3847 mld->mld_v2_reserved = 0;
3848 mld->mld_v2_numrecs = htons(m->m_pkthdr.vt_nrecs);
3849 m->m_pkthdr.vt_nrecs = 0;
3850 m->m_flags &= ~M_PKTHDR;
3851
3852 mh->m_next = m;
3853 mld->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6,
3854 sizeof(struct ip6_hdr), sizeof(struct mldv2_report) + mldreclen);
3855 return mh;
3856 }
3857
3858 #ifdef MLD_DEBUG
3859 static const char *
mld_rec_type_to_str(const int type)3860 mld_rec_type_to_str(const int type)
3861 {
3862 switch (type) {
3863 case MLD_CHANGE_TO_EXCLUDE_MODE:
3864 return "TO_EX";
3865 case MLD_CHANGE_TO_INCLUDE_MODE:
3866 return "TO_IN";
3867 case MLD_MODE_IS_EXCLUDE:
3868 return "MODE_EX";
3869 case MLD_MODE_IS_INCLUDE:
3870 return "MODE_IN";
3871 case MLD_ALLOW_NEW_SOURCES:
3872 return "ALLOW_NEW";
3873 case MLD_BLOCK_OLD_SOURCES:
3874 return "BLOCK_OLD";
3875 default:
3876 break;
3877 }
3878 return "unknown";
3879 }
3880 #endif
3881
3882 void
mld_init(void)3883 mld_init(void)
3884 {
3885 MLD_PRINTF(("%s: initializing\n", __func__));
3886
3887 ip6_initpktopts(&mld_po);
3888 mld_po.ip6po_hlim = 1;
3889 mld_po.ip6po_hbh = &mld_ra.hbh;
3890 mld_po.ip6po_prefer_tempaddr = IP6PO_TEMPADDR_NOTPREFER;
3891 mld_po.ip6po_flags = IP6PO_DONTFRAG;
3892 LIST_INIT(&mli_head);
3893 }
3894