1 /*
2 * Copyright (c) 2000-2023 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /* $FreeBSD: src/sys/netinet6/frag6.c,v 1.2.2.5 2001/07/03 11:01:50 ume Exp $ */
30 /* $KAME: frag6.c,v 1.31 2001/05/17 13:45:34 jinmei Exp $ */
31
32 /*
33 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
34 * All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the project nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 */
60
61 #include <sys/param.h>
62 #include <sys/systm.h>
63 #include <sys/malloc.h>
64 #include <sys/mcache.h>
65 #include <sys/mbuf.h>
66 #include <sys/domain.h>
67 #include <sys/protosw.h>
68 #include <sys/socket.h>
69 #include <sys/errno.h>
70 #include <sys/time.h>
71 #include <sys/kernel.h>
72 #include <sys/syslog.h>
73 #include <kern/queue.h>
74 #include <kern/locks.h>
75
76 #include <net/if.h>
77 #include <net/route.h>
78
79 #include <netinet/in.h>
80 #include <netinet/in_var.h>
81 #include <netinet/ip.h>
82 #include <netinet/ip_var.h>
83 #include <netinet/ip6.h>
84 #include <netinet6/ip6_var.h>
85 #include <netinet/icmp6.h>
86
87 #include <net/net_osdep.h>
88 #include <dev/random/randomdev.h>
89
90 /*
91 * Define it to get a correct behavior on per-interface statistics.
92 */
93 #define IN6_IFSTAT_STRICT
94 struct ip6asfrag {
95 struct ip6asfrag *ip6af_down;
96 struct ip6asfrag *ip6af_up;
97 struct mbuf *ip6af_m;
98 int ip6af_offset; /* offset in ip6af_m to next header */
99 int ip6af_frglen; /* fragmentable part length */
100 int ip6af_off; /* fragment offset */
101 u_int16_t ip6af_mff; /* more fragment bit in frag off */
102 };
103
104 #define IP6_REASS_MBUF(ip6af) ((ip6af)->ip6af_m)
105
106 MBUFQ_HEAD(fq6_head);
107
108 static void frag6_save_context(struct mbuf *, int);
109 static void frag6_scrub_context(struct mbuf *);
110 static int frag6_restore_context(struct mbuf *);
111
112 static void frag6_icmp6_paramprob_error(struct fq6_head *);
113 static void frag6_icmp6_timeex_error(struct fq6_head *);
114
115 static void frag6_enq(struct ip6asfrag *, struct ip6asfrag *);
116 static void frag6_deq(struct ip6asfrag *);
117 static void frag6_insque(struct ip6q *, struct ip6q *);
118 static void frag6_remque(struct ip6q *);
119 static void frag6_purgef(struct ip6q *, struct fq6_head *, struct fq6_head *);
120 static void frag6_freef(struct ip6q *, struct fq6_head *, struct fq6_head *);
121
122 static int frag6_timeout_run; /* frag6 timer is scheduled to run */
123 static void frag6_timeout(void *);
124 static void frag6_sched_timeout(void);
125
126 static struct ip6q *ip6q_alloc(int);
127 static void ip6q_free(struct ip6q *);
128 static void ip6q_updateparams(void);
129 static struct ip6asfrag *ip6af_alloc(int);
130 static void ip6af_free(struct ip6asfrag *);
131
132 static LCK_GRP_DECLARE(ip6qlock_grp, "ip6qlock");
133 static LCK_MTX_DECLARE(ip6qlock, &ip6qlock_grp);
134
135 /* IPv6 fragment reassembly queues (protected by ip6qlock) */
136 static struct ip6q ip6q; /* ip6 reassembly queues */
137 static int ip6_maxfragpackets; /* max packets in reass queues */
138 static u_int32_t frag6_nfragpackets; /* # of packets in reass queues */
139 static int ip6_maxfrags; /* max fragments in reass queues */
140 static u_int32_t frag6_nfrags; /* # of fragments in reass queues */
141 static u_int32_t ip6q_limit; /* ip6q allocation limit */
142 static u_int32_t ip6q_count; /* current # of allocated ip6q's */
143 static u_int32_t ip6af_limit; /* ip6asfrag allocation limit */
144 static u_int32_t ip6af_count; /* current # of allocated ip6asfrag's */
145
146 static int sysctl_maxfragpackets SYSCTL_HANDLER_ARGS;
147 static int sysctl_maxfrags SYSCTL_HANDLER_ARGS;
148
149 SYSCTL_DECL(_net_inet6_ip6);
150
151 SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets,
152 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_maxfragpackets, 0,
153 sysctl_maxfragpackets, "I",
154 "Maximum number of IPv6 fragment reassembly queue entries");
155
156 SYSCTL_UINT(_net_inet6_ip6, OID_AUTO, fragpackets,
157 CTLFLAG_RD | CTLFLAG_LOCKED, &frag6_nfragpackets, 0,
158 "Current number of IPv6 fragment reassembly queue entries");
159
160 SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_MAXFRAGS, maxfrags,
161 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_maxfrags, 0,
162 sysctl_maxfrags, "I", "Maximum number of IPv6 fragments allowed");
163
164 /*
165 * Initialise reassembly queue and fragment identifier.
166 */
167 void
frag6_init(void)168 frag6_init(void)
169 {
170 /* ip6q_alloc() uses mbufs for IPv6 fragment queue structures */
171 _CASSERT(sizeof(struct ip6q) <= _MLEN);
172 /* ip6af_alloc() uses mbufs for IPv6 fragment queue structures */
173 _CASSERT(sizeof(struct ip6asfrag) <= _MLEN);
174
175 lck_mtx_lock(&ip6qlock);
176 /* Initialize IPv6 reassembly queue. */
177 ip6q.ip6q_next = ip6q.ip6q_prev = &ip6q;
178
179 /* same limits as IPv4 */
180 ip6_maxfragpackets = nmbclusters / 32;
181 ip6_maxfrags = ip6_maxfragpackets * 2;
182 ip6q_updateparams();
183 lck_mtx_unlock(&ip6qlock);
184 }
185
186 static void
frag6_save_context(struct mbuf * m,int val)187 frag6_save_context(struct mbuf *m, int val)
188 {
189 m->m_pkthdr.pkt_hdr = (void *)(uintptr_t)val;
190 }
191
192 static void
frag6_scrub_context(struct mbuf * m)193 frag6_scrub_context(struct mbuf *m)
194 {
195 m->m_pkthdr.pkt_hdr = NULL;
196 }
197
198 static int
frag6_restore_context(struct mbuf * m)199 frag6_restore_context(struct mbuf *m)
200 {
201 return (int)m->m_pkthdr.pkt_hdr;
202 }
203
204 /*
205 * Send any deferred ICMP param problem error messages; caller must not be
206 * holding ip6qlock and is expected to have saved the per-packet parameter
207 * value via frag6_save_context().
208 */
209 static void
frag6_icmp6_paramprob_error(struct fq6_head * diq6)210 frag6_icmp6_paramprob_error(struct fq6_head *diq6)
211 {
212 LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_NOTOWNED);
213
214 if (!MBUFQ_EMPTY(diq6)) {
215 struct mbuf *merr, *merr_tmp;
216 int param;
217 MBUFQ_FOREACH_SAFE(merr, diq6, merr_tmp) {
218 MBUFQ_REMOVE(diq6, merr);
219 MBUFQ_NEXT(merr) = NULL;
220 param = frag6_restore_context(merr);
221 frag6_scrub_context(merr);
222 icmp6_error(merr, ICMP6_PARAM_PROB,
223 ICMP6_PARAMPROB_HEADER, param);
224 }
225 }
226 }
227
228 /*
229 * Send any deferred ICMP time exceeded error messages;
230 * caller must not be holding ip6qlock.
231 */
232 static void
frag6_icmp6_timeex_error(struct fq6_head * diq6)233 frag6_icmp6_timeex_error(struct fq6_head *diq6)
234 {
235 LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_NOTOWNED);
236
237 if (!MBUFQ_EMPTY(diq6)) {
238 struct mbuf *m, *m_tmp;
239 MBUFQ_FOREACH_SAFE(m, diq6, m_tmp) {
240 MBUFQ_REMOVE(diq6, m);
241 MBUFQ_NEXT(m) = NULL;
242 icmp6_error_flag(m, ICMP6_TIME_EXCEEDED,
243 ICMP6_TIME_EXCEED_REASSEMBLY, 0, 0);
244 }
245 }
246 }
247
248 /*
249 * In RFC2460, fragment and reassembly rule do not agree with each other,
250 * in terms of next header field handling in fragment header.
251 * While the sender will use the same value for all of the fragmented packets,
252 * receiver is suggested not to check the consistency.
253 *
254 * fragment rule (p20):
255 * (2) A Fragment header containing:
256 * The Next Header value that identifies the first header of
257 * the Fragmentable Part of the original packet.
258 * -> next header field is same for all fragments
259 *
260 * reassembly rule (p21):
261 * The Next Header field of the last header of the Unfragmentable
262 * Part is obtained from the Next Header field of the first
263 * fragment's Fragment header.
264 * -> should grab it from the first fragment only
265 *
266 * The following note also contradicts with fragment rule - noone is going to
267 * send different fragment with different next header field.
268 *
269 * additional note (p22):
270 * The Next Header values in the Fragment headers of different
271 * fragments of the same original packet may differ. Only the value
272 * from the Offset zero fragment packet is used for reassembly.
273 * -> should grab it from the first fragment only
274 *
275 * There is no explicit reason given in the RFC. Historical reason maybe?
276 */
277 /*
278 * Fragment input
279 */
280 int
frag6_input(struct mbuf ** mp,int * offp,int proto)281 frag6_input(struct mbuf **mp, int *offp, int proto)
282 {
283 #pragma unused(proto)
284 struct mbuf *m = *mp, *t = NULL;
285 struct ip6_hdr *ip6 = NULL;
286 struct ip6_frag *ip6f = NULL;
287 struct ip6q *q6 = NULL;
288 struct ip6asfrag *af6 = NULL, *ip6af = NULL, *af6dwn = NULL;
289 int offset = *offp, i = 0, next = 0;
290 u_int8_t nxt = 0;
291 int first_frag = 0;
292 int fragoff = 0, frgpartlen = 0; /* must be larger than u_int16_t */
293 struct ifnet *dstifp = NULL;
294 u_int8_t ecn = 0, ecn0 = 0;
295 uint32_t csum = 0, csum_flags = 0;
296 struct fq6_head diq6 = {};
297 int locked = 0;
298 boolean_t drop_fragq = FALSE;
299 int local_ip6q_unfrglen;
300 u_int8_t local_ip6q_nxt;
301
302 VERIFY(m->m_flags & M_PKTHDR);
303
304 MBUFQ_INIT(&diq6); /* for deferred ICMP param problem errors */
305
306 /* Expect 32-bit aligned data pointer on strict-align platforms */
307 MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
308
309 IP6_EXTHDR_CHECK(m, offset, sizeof(struct ip6_frag), goto done);
310 ip6 = mtod(m, struct ip6_hdr *);
311 ip6f = (struct ip6_frag *)((caddr_t)ip6 + offset);
312
313 #ifdef IN6_IFSTAT_STRICT
314 /* find the destination interface of the packet. */
315 if (m->m_pkthdr.pkt_flags & PKTF_IFAINFO) {
316 uint32_t idx;
317
318 if (ip6_getdstifaddr_info(m, &idx, NULL) == 0) {
319 if (idx > 0 && idx <= if_index) {
320 ifnet_head_lock_shared();
321 dstifp = ifindex2ifnet[idx];
322 ifnet_head_done();
323 }
324 }
325 }
326 #endif /* IN6_IFSTAT_STRICT */
327
328 /* we are violating the spec, this may not be the dst interface */
329 if (dstifp == NULL) {
330 dstifp = m->m_pkthdr.rcvif;
331 }
332
333 /* jumbo payload can't contain a fragment header */
334 if (ip6->ip6_plen == 0) {
335 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset);
336 in6_ifstat_inc(dstifp, ifs6_reass_fail);
337 m = NULL;
338 goto done;
339 }
340
341 /*
342 * check whether fragment packet's fragment length is
343 * multiple of 8 octets.
344 * sizeof(struct ip6_frag) == 8
345 * sizeof(struct ip6_hdr) = 40
346 */
347 if ((ip6f->ip6f_offlg & IP6F_MORE_FRAG) &&
348 (((ntohs(ip6->ip6_plen) - offset) & 0x7) != 0)) {
349 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
350 offsetof(struct ip6_hdr, ip6_plen));
351 in6_ifstat_inc(dstifp, ifs6_reass_fail);
352 m = NULL;
353 goto done;
354 }
355
356 /* If ip6_maxfragpackets or ip6_maxfrags is 0, never accept fragments */
357 if (ip6_maxfragpackets == 0 || ip6_maxfrags == 0) {
358 ip6stat.ip6s_fragments++;
359 ip6stat.ip6s_fragdropped++;
360 in6_ifstat_inc(dstifp, ifs6_reass_fail);
361 m_freem(m);
362 m = NULL;
363 goto done;
364 }
365
366 /* offset now points to data portion */
367 offset += sizeof(struct ip6_frag);
368
369 /*
370 * RFC 6946: Handle "atomic" fragments (offset and m bit set to 0)
371 * upfront, unrelated to any reassembly. Just skip the fragment header.
372 */
373 if ((ip6f->ip6f_offlg & ~IP6F_RESERVED_MASK) == 0) {
374 /*
375 * Mark packet as reassembled.
376 * In ICMPv6 processing, we drop certain
377 * NDP messages that are not expected to
378 * have fragment header based on recommendations
379 * against security vulnerability as described in
380 * RFC 6980.
381 * Treat atomic fragments as re-assembled packets as well.
382 */
383 m->m_pkthdr.pkt_flags |= PKTF_REASSEMBLED;
384 ip6stat.ip6s_atmfrag_rcvd++;
385 in6_ifstat_inc(dstifp, ifs6_atmfrag_rcvd);
386 *mp = m;
387 *offp = offset;
388 return ip6f->ip6f_nxt;
389 }
390
391 /*
392 * Leverage partial checksum offload for simple UDP/IP fragments,
393 * as that is the most common case.
394 *
395 * Perform 1's complement adjustment of octets that got included/
396 * excluded in the hardware-calculated checksum value. Also take
397 * care of any trailing bytes and subtract out their partial sum.
398 */
399 if (ip6f->ip6f_nxt == IPPROTO_UDP &&
400 offset == (sizeof(*ip6) + sizeof(*ip6f)) &&
401 (m->m_pkthdr.csum_flags &
402 (CSUM_DATA_VALID | CSUM_PARTIAL | CSUM_PSEUDO_HDR)) ==
403 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
404 uint32_t start = m->m_pkthdr.csum_rx_start;
405 uint32_t ip_len = (sizeof(*ip6) + ntohs(ip6->ip6_plen));
406 int32_t trailer = (m_pktlen(m) - ip_len);
407 uint32_t swbytes = (uint32_t)trailer;
408
409 csum = m->m_pkthdr.csum_rx_val;
410
411 ASSERT(trailer >= 0);
412 if (start != offset || trailer != 0) {
413 uint16_t s = 0, d = 0;
414
415 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) {
416 s = ip6->ip6_src.s6_addr16[1];
417 ip6->ip6_src.s6_addr16[1] = 0;
418 }
419 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) {
420 d = ip6->ip6_dst.s6_addr16[1];
421 ip6->ip6_dst.s6_addr16[1] = 0;
422 }
423
424 /* callee folds in sum */
425 csum = m_adj_sum16(m, start, offset,
426 (ip_len - offset), csum);
427 if (offset > start) {
428 swbytes += (offset - start);
429 } else {
430 swbytes += (start - offset);
431 }
432
433 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) {
434 ip6->ip6_src.s6_addr16[1] = s;
435 }
436 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) {
437 ip6->ip6_dst.s6_addr16[1] = d;
438 }
439 }
440 csum_flags = m->m_pkthdr.csum_flags;
441
442 if (swbytes != 0) {
443 udp_in6_cksum_stats(swbytes);
444 }
445 if (trailer != 0) {
446 m_adj(m, -trailer);
447 }
448 } else {
449 csum = 0;
450 csum_flags = 0;
451 }
452
453 /* Invalidate checksum */
454 m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID;
455
456 ip6stat.ip6s_fragments++;
457 in6_ifstat_inc(dstifp, ifs6_reass_reqd);
458
459 lck_mtx_lock(&ip6qlock);
460 locked = 1;
461
462 for (q6 = ip6q.ip6q_next; q6 != &ip6q; q6 = q6->ip6q_next) {
463 if (ip6f->ip6f_ident == q6->ip6q_ident &&
464 in6_are_addr_equal_scoped(&ip6->ip6_src, &q6->ip6q_src, ip6_input_getsrcifscope(m), q6->ip6q_src_ifscope) &&
465 in6_are_addr_equal_scoped(&ip6->ip6_dst, &q6->ip6q_dst, ip6_input_getdstifscope(m), q6->ip6q_dst_ifscope)) {
466 break;
467 }
468 }
469
470 if (q6 == &ip6q) {
471 /*
472 * Create a reassembly queue as this is the first fragment to
473 * arrive.
474 * By first frag, we don't mean the one with offset 0, but
475 * any of the fragments of the fragmented packet that has
476 * reached us first.
477 */
478 first_frag = 1;
479
480 q6 = ip6q_alloc(M_DONTWAIT);
481 if (q6 == NULL) {
482 goto dropfrag;
483 }
484
485 frag6_insque(q6, &ip6q);
486 frag6_nfragpackets++;
487
488 /* ip6q_nxt will be filled afterwards, from 1st fragment */
489 q6->ip6q_down = q6->ip6q_up = (struct ip6asfrag *)q6;
490 #ifdef notyet
491 q6->ip6q_nxtp = (u_char *)nxtp;
492 #endif
493 q6->ip6q_ident = ip6f->ip6f_ident;
494 q6->ip6q_ttl = IPV6_FRAGTTL;
495 q6->ip6q_src = ip6->ip6_src;
496 q6->ip6q_dst = ip6->ip6_dst;
497 q6->ip6q_dst_ifscope = IN6_IS_SCOPE_EMBED(&q6->ip6q_dst) ? ip6_input_getdstifscope(m) : IFSCOPE_NONE;
498 q6->ip6q_src_ifscope = IN6_IS_SCOPE_EMBED(&q6->ip6q_src) ? ip6_input_getsrcifscope(m) : IFSCOPE_NONE;
499 q6->ip6q_ecn =
500 (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK;
501 q6->ip6q_unfrglen = -1; /* The 1st fragment has not arrived. */
502
503 q6->ip6q_nfrag = 0;
504 q6->ip6q_flags = 0;
505
506 /*
507 * If the first fragment has valid checksum offload
508 * info, the rest of fragments are eligible as well.
509 */
510 if (csum_flags != 0) {
511 q6->ip6q_csum = csum;
512 q6->ip6q_csum_flags = csum_flags;
513 }
514 }
515
516 if (q6->ip6q_flags & IP6QF_DIRTY) {
517 goto dropfrag;
518 }
519
520 local_ip6q_unfrglen = q6->ip6q_unfrglen;
521 local_ip6q_nxt = q6->ip6q_nxt;
522
523 /*
524 * If it's the 1st fragment, record the length of the
525 * unfragmentable part and the next header of the fragment header.
526 * Assume the first fragement to arrive will be correct.
527 * We do not have any duplicate checks here yet so another packet
528 * with fragoff == 0 could come and overwrite the ip6q_unfrglen
529 * and worse, the next header, at any time.
530 */
531 fragoff = ntohs(ip6f->ip6f_offlg & IP6F_OFF_MASK);
532 if (fragoff == 0 && local_ip6q_unfrglen == -1) {
533 local_ip6q_unfrglen = offset - sizeof(struct ip6_hdr) -
534 sizeof(struct ip6_frag);
535 local_ip6q_nxt = ip6f->ip6f_nxt;
536 /* XXX ECN? */
537 }
538
539 /*
540 * Check that the reassembled packet would not exceed 65535 bytes
541 * in size.
542 * If it would exceed, discard the fragment and return an ICMP error.
543 */
544 frgpartlen = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - offset;
545 if (local_ip6q_unfrglen >= 0) {
546 /* The 1st fragment has already arrived. */
547 if (local_ip6q_unfrglen + fragoff + frgpartlen > IPV6_MAXPACKET) {
548 lck_mtx_unlock(&ip6qlock);
549 locked = 0;
550 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
551 offset - sizeof(struct ip6_frag) +
552 offsetof(struct ip6_frag, ip6f_offlg));
553 m = NULL;
554 goto done;
555 }
556 } else if (fragoff + frgpartlen > IPV6_MAXPACKET) {
557 lck_mtx_unlock(&ip6qlock);
558 locked = 0;
559 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
560 offset - sizeof(struct ip6_frag) +
561 offsetof(struct ip6_frag, ip6f_offlg));
562 m = NULL;
563 goto done;
564 }
565 /*
566 * If it's the first fragment, do the above check for each
567 * fragment already stored in the reassembly queue.
568 */
569 if (fragoff == 0) {
570 /*
571 * https://tools.ietf.org/html/rfc8200#page-20
572 * If the first fragment does not include all headers through an
573 * Upper-Layer header, then that fragment should be discarded and
574 * an ICMP Parameter Problem, Code 3, message should be sent to
575 * the source of the fragment, with the Pointer field set to zero.
576 */
577 if (!ip6_pkt_has_ulp(m)) {
578 lck_mtx_unlock(&ip6qlock);
579 locked = 0;
580 icmp6_error(m, ICMP6_PARAM_PROB,
581 ICMP6_PARAMPROB_FIRSTFRAG_INCOMP_HDR, 0);
582 m = NULL;
583 goto done;
584 }
585 for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
586 af6 = af6dwn) {
587 af6dwn = af6->ip6af_down;
588
589 if (local_ip6q_unfrglen + af6->ip6af_off + af6->ip6af_frglen >
590 IPV6_MAXPACKET) {
591 struct mbuf *merr = IP6_REASS_MBUF(af6);
592 struct ip6_hdr *ip6err;
593 int erroff = af6->ip6af_offset;
594
595 /* dequeue the fragment. */
596 frag6_deq(af6);
597 ip6af_free(af6);
598
599 /* adjust pointer. */
600 ip6err = mtod(merr, struct ip6_hdr *);
601
602 /*
603 * Restore source and destination addresses
604 * in the erroneous IPv6 header.
605 */
606 ip6err->ip6_src = q6->ip6q_src;
607 ip6err->ip6_dst = q6->ip6q_dst;
608 ip6_output_setdstifscope(m, q6->ip6q_dst_ifscope, NULL);
609 ip6_output_setsrcifscope(m, q6->ip6q_src_ifscope, NULL);
610 frag6_save_context(merr,
611 erroff - sizeof(struct ip6_frag) +
612 offsetof(struct ip6_frag, ip6f_offlg));
613
614 MBUFQ_ENQUEUE(&diq6, merr);
615 }
616 }
617 }
618
619 ip6af = ip6af_alloc(M_DONTWAIT);
620 if (ip6af == NULL) {
621 goto dropfrag;
622 }
623
624 ip6af->ip6af_mff = ip6f->ip6f_offlg & IP6F_MORE_FRAG;
625 ip6af->ip6af_off = fragoff;
626 ip6af->ip6af_frglen = frgpartlen;
627 ip6af->ip6af_offset = offset;
628 IP6_REASS_MBUF(ip6af) = m;
629
630 if (first_frag) {
631 af6 = (struct ip6asfrag *)q6;
632 goto insert;
633 }
634
635 /*
636 * Handle ECN by comparing this segment with the first one;
637 * if CE is set, do not lose CE.
638 * drop if CE and not-ECT are mixed for the same packet.
639 */
640 ecn = (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK;
641 ecn0 = q6->ip6q_ecn;
642 if (ecn == IPTOS_ECN_CE) {
643 if (ecn0 == IPTOS_ECN_NOTECT) {
644 ip6af_free(ip6af);
645 goto dropfrag;
646 }
647 if (ecn0 != IPTOS_ECN_CE) {
648 q6->ip6q_ecn = IPTOS_ECN_CE;
649 }
650 }
651 if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT) {
652 ip6af_free(ip6af);
653 goto dropfrag;
654 }
655
656 /*
657 * Find a segment which begins after this one does.
658 */
659 for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
660 af6 = af6->ip6af_down) {
661 if (af6->ip6af_off > ip6af->ip6af_off) {
662 break;
663 }
664 }
665
666 /*
667 * As per RFC 8200 reassembly rules, we MUST drop the entire
668 * chain of fragments for a packet to be assembled, if we receive
669 * any overlapping fragments.
670 * https://tools.ietf.org/html/rfc8200#page-20
671 *
672 * To avoid more conditional code, just reuse frag6_freef and defer
673 * its call to post fragment insertion in the queue.
674 */
675 if (af6->ip6af_up != (struct ip6asfrag *)q6) {
676 if (af6->ip6af_up->ip6af_off == ip6af->ip6af_off) {
677 if (af6->ip6af_up->ip6af_frglen != ip6af->ip6af_frglen) {
678 drop_fragq = TRUE;
679 } else {
680 /*
681 * XXX Ideally we should be comparing the entire
682 * packet here but for now just use off and fraglen
683 * to ignore a duplicate fragment.
684 */
685 ip6af_free(ip6af);
686 goto dropfrag;
687 }
688 } else {
689 i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen
690 - ip6af->ip6af_off;
691 if (i > 0) {
692 drop_fragq = TRUE;
693 }
694 }
695 }
696
697 if (af6 != (struct ip6asfrag *)q6) {
698 /*
699 * Given that we break when af6->ip6af_off > ip6af->ip6af_off,
700 * we shouldn't need a check for duplicate fragment here.
701 * For now just assert.
702 */
703 VERIFY(af6->ip6af_off != ip6af->ip6af_off);
704 i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off;
705 if (i > 0) {
706 drop_fragq = TRUE;
707 }
708 }
709
710 /*
711 * If this fragment contains similar checksum offload info
712 * as that of the existing ones, accumulate checksum. Otherwise,
713 * invalidate checksum offload info for the entire datagram.
714 */
715 if (csum_flags != 0 && csum_flags == q6->ip6q_csum_flags) {
716 q6->ip6q_csum += csum;
717 } else if (q6->ip6q_csum_flags != 0) {
718 q6->ip6q_csum_flags = 0;
719 }
720
721 insert:
722 /*
723 * Stick new segment in its place;
724 * check for complete reassembly.
725 * Move to front of packet queue, as we are
726 * the most recently active fragmented packet.
727 */
728 frag6_enq(ip6af, af6->ip6af_up);
729 frag6_nfrags++;
730 q6->ip6q_nfrag++;
731
732 /*
733 * This holds true, when we receive overlapping fragments.
734 * We must silently drop all the fragments we have received
735 * so far.
736 * Also mark q6 as dirty, so as to not add any new fragments to it.
737 * Make sure even q6 marked dirty is kept till timer expires for
738 * reassembly and when that happens, silenty get rid of q6
739 */
740 if (drop_fragq) {
741 struct fq6_head dfq6 = {0};
742 MBUFQ_INIT(&dfq6); /* for deferred frees */
743 q6->ip6q_flags |= IP6QF_DIRTY;
744 /* Purge all the fragments but do not free q6 */
745 frag6_purgef(q6, &dfq6, NULL);
746 af6 = NULL;
747
748 /* free fragments that need to be freed */
749 if (!MBUFQ_EMPTY(&dfq6)) {
750 MBUFQ_DRAIN(&dfq6);
751 }
752 VERIFY(MBUFQ_EMPTY(&dfq6));
753 /*
754 * Just in case the above logic got anything added
755 * to diq6, drain it.
756 * Please note that these mbufs are not present in the
757 * fragment queue and are added to diq6 for sending
758 * ICMPv6 error.
759 * Given that the current fragment was an overlapping
760 * fragment and the RFC requires us to not send any
761 * ICMPv6 errors while purging the entire queue.
762 * Just empty it out.
763 */
764 if (!MBUFQ_EMPTY(&diq6)) {
765 MBUFQ_DRAIN(&diq6);
766 }
767 VERIFY(MBUFQ_EMPTY(&diq6));
768 /*
769 * MBUFQ_DRAIN would have drained all the mbufs
770 * in the fragment queue.
771 * This shouldn't be needed as we are returning IPPROTO_DONE
772 * from here but change the passed mbuf pointer to NULL.
773 */
774 *mp = NULL;
775 lck_mtx_unlock(&ip6qlock);
776 return IPPROTO_DONE;
777 }
778
779 /*
780 * We're keeping the fragment.
781 */
782 q6->ip6q_unfrglen = local_ip6q_unfrglen;
783 q6->ip6q_nxt = local_ip6q_nxt;
784
785 next = 0;
786 for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
787 af6 = af6->ip6af_down) {
788 if (af6->ip6af_off != next) {
789 lck_mtx_unlock(&ip6qlock);
790 locked = 0;
791 m = NULL;
792 goto done;
793 }
794 next += af6->ip6af_frglen;
795 }
796 if (af6->ip6af_up->ip6af_mff) {
797 lck_mtx_unlock(&ip6qlock);
798 locked = 0;
799 m = NULL;
800 goto done;
801 }
802
803 /*
804 * Reassembly is complete; concatenate fragments.
805 */
806 ip6af = q6->ip6q_down;
807 t = m = IP6_REASS_MBUF(ip6af);
808 af6 = ip6af->ip6af_down;
809 frag6_deq(ip6af);
810 while (af6 != (struct ip6asfrag *)q6) {
811 af6dwn = af6->ip6af_down;
812 frag6_deq(af6);
813 while (t->m_next) {
814 t = t->m_next;
815 }
816 t->m_next = IP6_REASS_MBUF(af6);
817 m_adj(t->m_next, af6->ip6af_offset);
818 ip6af_free(af6);
819 af6 = af6dwn;
820 }
821
822 /*
823 * Store partial hardware checksum info from the fragment queue;
824 * the receive start offset is set to 40 bytes (see code at the
825 * top of this routine.)
826 */
827 if (q6->ip6q_csum_flags != 0) {
828 csum = q6->ip6q_csum;
829
830 ADDCARRY(csum);
831
832 m->m_pkthdr.csum_rx_val = (u_int16_t)csum;
833 m->m_pkthdr.csum_rx_start = sizeof(struct ip6_hdr);
834 m->m_pkthdr.csum_flags = q6->ip6q_csum_flags;
835 } else if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) ||
836 (m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
837 /* loopback checksums are always OK */
838 m->m_pkthdr.csum_data = 0xffff;
839 m->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
840 }
841
842 /* adjust offset to point where the original next header starts */
843 offset = ip6af->ip6af_offset - sizeof(struct ip6_frag);
844 ip6af_free(ip6af);
845 ip6 = mtod(m, struct ip6_hdr *);
846 ip6->ip6_plen = htons((uint16_t)(next + offset - sizeof(struct ip6_hdr)));
847 ip6->ip6_src = q6->ip6q_src;
848 ip6->ip6_dst = q6->ip6q_dst;
849 ip6_output_setdstifscope(m, q6->ip6q_dst_ifscope, NULL);
850 ip6_output_setsrcifscope(m, q6->ip6q_src_ifscope, NULL);
851 if (q6->ip6q_ecn == IPTOS_ECN_CE) {
852 ip6->ip6_flow |= htonl(IPTOS_ECN_CE << 20);
853 }
854
855 nxt = q6->ip6q_nxt;
856 #ifdef notyet
857 *q6->ip6q_nxtp = (u_char)(nxt & 0xff);
858 #endif
859
860 /* Delete frag6 header */
861 if (m->m_len >= offset + sizeof(struct ip6_frag)) {
862 /* This is the only possible case with !PULLDOWN_TEST */
863 ovbcopy((caddr_t)ip6, (caddr_t)ip6 + sizeof(struct ip6_frag),
864 offset);
865 m->m_data += sizeof(struct ip6_frag);
866 m->m_len -= sizeof(struct ip6_frag);
867 } else {
868 /* this comes with no copy if the boundary is on cluster */
869 if ((t = m_split(m, offset, M_DONTWAIT)) == NULL) {
870 frag6_remque(q6);
871 frag6_nfragpackets--;
872 frag6_nfrags -= q6->ip6q_nfrag;
873 ip6q_free(q6);
874 goto dropfrag;
875 }
876 m_adj(t, sizeof(struct ip6_frag));
877 m_cat(m, t);
878 }
879
880 /*
881 * Store NXT to the original.
882 */
883 {
884 char *prvnxtp = ip6_get_prevhdr(m, offset); /* XXX */
885 *prvnxtp = nxt;
886 }
887
888 frag6_remque(q6);
889 frag6_nfragpackets--;
890 frag6_nfrags -= q6->ip6q_nfrag;
891 ip6q_free(q6);
892
893 if (m->m_flags & M_PKTHDR) { /* Isn't it always true? */
894 m_fixhdr(m);
895 /*
896 * Mark packet as reassembled
897 * In ICMPv6 processing, we drop certain
898 * NDP messages that are not expected to
899 * have fragment header based on recommendations
900 * against security vulnerability as described in
901 * RFC 6980.
902 */
903 m->m_pkthdr.pkt_flags |= PKTF_REASSEMBLED;
904 }
905 ip6stat.ip6s_reassembled++;
906
907 /*
908 * Tell launch routine the next header
909 */
910 *mp = m;
911 *offp = offset;
912
913 /* arm the purge timer if not already and if there's work to do */
914 frag6_sched_timeout();
915 lck_mtx_unlock(&ip6qlock);
916 in6_ifstat_inc(dstifp, ifs6_reass_ok);
917 frag6_icmp6_paramprob_error(&diq6);
918 VERIFY(MBUFQ_EMPTY(&diq6));
919 return nxt;
920
921 done:
922 VERIFY(m == NULL);
923 *mp = m;
924 if (!locked) {
925 if (frag6_nfragpackets == 0) {
926 frag6_icmp6_paramprob_error(&diq6);
927 VERIFY(MBUFQ_EMPTY(&diq6));
928 return IPPROTO_DONE;
929 }
930 lck_mtx_lock(&ip6qlock);
931 }
932 /* arm the purge timer if not already and if there's work to do */
933 frag6_sched_timeout();
934 lck_mtx_unlock(&ip6qlock);
935 frag6_icmp6_paramprob_error(&diq6);
936 VERIFY(MBUFQ_EMPTY(&diq6));
937 return IPPROTO_DONE;
938
939 dropfrag:
940 ip6stat.ip6s_fragdropped++;
941 /* arm the purge timer if not already and if there's work to do */
942 frag6_sched_timeout();
943 lck_mtx_unlock(&ip6qlock);
944 in6_ifstat_inc(dstifp, ifs6_reass_fail);
945 m_freem(m);
946 *mp = NULL;
947 frag6_icmp6_paramprob_error(&diq6);
948 VERIFY(MBUFQ_EMPTY(&diq6));
949 return IPPROTO_DONE;
950 }
951
952 /*
953 * This routine removes the enqueued frames from the passed fragment
954 * header and enqueues those to dfq6 which is an out-arg for the dequeued
955 * fragments.
956 * If the caller also provides diq6, this routine also enqueues the 0 offset
957 * fragment to that list as it potentially gets used by the caller
958 * to prepare the relevant ICMPv6 error message (time exceeded or
959 * param problem).
960 * It leaves the fragment header object (q6) intact.
961 */
962 static void
frag6_purgef(struct ip6q * q6,struct fq6_head * dfq6,struct fq6_head * diq6)963 frag6_purgef(struct ip6q *q6, struct fq6_head *dfq6, struct fq6_head *diq6)
964 {
965 struct ip6asfrag *af6 = NULL;
966 struct ip6asfrag *down6 = NULL;
967
968 LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_OWNED);
969
970 for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
971 af6 = down6) {
972 struct mbuf *m = IP6_REASS_MBUF(af6);
973
974 down6 = af6->ip6af_down;
975 frag6_deq(af6);
976
977 /*
978 * If caller wants to generate ICMP time-exceeded,
979 * as indicated by the argument diq6, return it for
980 * the first fragment and add others to the fragment
981 * free queue.
982 */
983 if (af6->ip6af_off == 0 && diq6 != NULL) {
984 struct ip6_hdr *ip6;
985
986 /* adjust pointer */
987 ip6 = mtod(m, struct ip6_hdr *);
988
989 /* restore source and destination addresses */
990 ip6->ip6_src = q6->ip6q_src;
991 ip6->ip6_dst = q6->ip6q_dst;
992 ip6_output_setdstifscope(m, q6->ip6q_dst_ifscope, NULL);
993 ip6_output_setsrcifscope(m, q6->ip6q_src_ifscope, NULL);
994 MBUFQ_ENQUEUE(diq6, m);
995 } else {
996 MBUFQ_ENQUEUE(dfq6, m);
997 }
998 ip6af_free(af6);
999 }
1000 }
1001
1002 /*
1003 * This routine removes the enqueued frames from the passed fragment
1004 * header and enqueues those to dfq6 which is an out-arg for the dequeued
1005 * fragments.
1006 * If the caller also provides diq6, this routine also enqueues the 0 offset
1007 * fragment to that list as it potentially gets used by the caller
1008 * to prepare the relevant ICMPv6 error message (time exceeded or
1009 * param problem).
1010 * It also remove the fragment header object from the queue and frees it.
1011 */
1012 static void
frag6_freef(struct ip6q * q6,struct fq6_head * dfq6,struct fq6_head * diq6)1013 frag6_freef(struct ip6q *q6, struct fq6_head *dfq6, struct fq6_head *diq6)
1014 {
1015 frag6_purgef(q6, dfq6, diq6);
1016 frag6_remque(q6);
1017 frag6_nfragpackets--;
1018 frag6_nfrags -= q6->ip6q_nfrag;
1019 ip6q_free(q6);
1020 }
1021
1022 /*
1023 * Put an ip fragment on a reassembly chain.
1024 * Like insque, but pointers in middle of structure.
1025 */
1026 void
frag6_enq(struct ip6asfrag * af6,struct ip6asfrag * up6)1027 frag6_enq(struct ip6asfrag *af6, struct ip6asfrag *up6)
1028 {
1029 LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_OWNED);
1030
1031 af6->ip6af_up = up6;
1032 af6->ip6af_down = up6->ip6af_down;
1033 up6->ip6af_down->ip6af_up = af6;
1034 up6->ip6af_down = af6;
1035 }
1036
1037 /*
1038 * To frag6_enq as remque is to insque.
1039 */
1040 void
frag6_deq(struct ip6asfrag * af6)1041 frag6_deq(struct ip6asfrag *af6)
1042 {
1043 LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_OWNED);
1044
1045 af6->ip6af_up->ip6af_down = af6->ip6af_down;
1046 af6->ip6af_down->ip6af_up = af6->ip6af_up;
1047 }
1048
1049 void
frag6_insque(struct ip6q * new,struct ip6q * old)1050 frag6_insque(struct ip6q *new, struct ip6q *old)
1051 {
1052 LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_OWNED);
1053
1054 new->ip6q_prev = old;
1055 new->ip6q_next = old->ip6q_next;
1056 old->ip6q_next->ip6q_prev = new;
1057 old->ip6q_next = new;
1058 }
1059
1060 void
frag6_remque(struct ip6q * p6)1061 frag6_remque(struct ip6q *p6)
1062 {
1063 LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_OWNED);
1064
1065 p6->ip6q_prev->ip6q_next = p6->ip6q_next;
1066 p6->ip6q_next->ip6q_prev = p6->ip6q_prev;
1067 }
1068
1069 /*
1070 * IPv6 reassembling timer processing;
1071 * if a timer expires on a reassembly
1072 * queue, discard it.
1073 */
1074 static void
frag6_timeout(void * arg)1075 frag6_timeout(void *arg)
1076 {
1077 #pragma unused(arg)
1078 struct fq6_head dfq6, diq6;
1079 struct fq6_head *diq6_tmp = NULL;
1080 struct ip6q *q6;
1081
1082 MBUFQ_INIT(&dfq6); /* for deferred frees */
1083 MBUFQ_INIT(&diq6); /* for deferred ICMP time exceeded errors */
1084
1085 /*
1086 * Update coarse-grained networking timestamp (in sec.); the idea
1087 * is to piggy-back on the timeout callout to update the counter
1088 * returnable via net_uptime().
1089 */
1090 net_update_uptime();
1091
1092 lck_mtx_lock(&ip6qlock);
1093 q6 = ip6q.ip6q_next;
1094 if (q6) {
1095 while (q6 != &ip6q) {
1096 --q6->ip6q_ttl;
1097 q6 = q6->ip6q_next;
1098 if (q6->ip6q_prev->ip6q_ttl == 0) {
1099 ip6stat.ip6s_fragtimeout++;
1100 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
1101 /*
1102 * Avoid sending ICMPv6 Time Exceeded for fragment headers
1103 * that are marked dirty.
1104 */
1105 diq6_tmp = (q6->ip6q_prev->ip6q_flags & IP6QF_DIRTY) ?
1106 NULL : &diq6;
1107 frag6_freef(q6->ip6q_prev, &dfq6, diq6_tmp);
1108 }
1109 }
1110 }
1111 /*
1112 * If we are over the maximum number of fragments
1113 * (due to the limit being lowered), drain off
1114 * enough to get down to the new limit.
1115 */
1116 if (ip6_maxfragpackets >= 0) {
1117 while (frag6_nfragpackets > (unsigned)ip6_maxfragpackets &&
1118 ip6q.ip6q_prev) {
1119 ip6stat.ip6s_fragoverflow++;
1120 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
1121 /*
1122 * Avoid sending ICMPv6 Time Exceeded for fragment headers
1123 * that are marked dirty.
1124 */
1125 diq6_tmp = (ip6q.ip6q_prev->ip6q_flags & IP6QF_DIRTY) ?
1126 NULL : &diq6;
1127 frag6_freef(ip6q.ip6q_prev, &dfq6, diq6_tmp);
1128 }
1129 }
1130 /* re-arm the purge timer if there's work to do */
1131 frag6_timeout_run = 0;
1132 frag6_sched_timeout();
1133 lck_mtx_unlock(&ip6qlock);
1134
1135 /* free fragments that need to be freed */
1136 if (!MBUFQ_EMPTY(&dfq6)) {
1137 MBUFQ_DRAIN(&dfq6);
1138 }
1139
1140 frag6_icmp6_timeex_error(&diq6);
1141
1142 VERIFY(MBUFQ_EMPTY(&dfq6));
1143 VERIFY(MBUFQ_EMPTY(&diq6));
1144 }
1145
1146 static void
frag6_sched_timeout(void)1147 frag6_sched_timeout(void)
1148 {
1149 LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_OWNED);
1150
1151 if (!frag6_timeout_run && frag6_nfragpackets > 0) {
1152 frag6_timeout_run = 1;
1153 timeout(frag6_timeout, NULL, hz);
1154 }
1155 }
1156
1157 /*
1158 * Drain off all datagram fragments.
1159 */
1160 void
frag6_drain(void)1161 frag6_drain(void)
1162 {
1163 struct fq6_head dfq6, diq6;
1164 struct fq6_head *diq6_tmp = NULL;
1165
1166 MBUFQ_INIT(&dfq6); /* for deferred frees */
1167 MBUFQ_INIT(&diq6); /* for deferred ICMP time exceeded errors */
1168
1169 lck_mtx_lock(&ip6qlock);
1170 while (ip6q.ip6q_next != &ip6q) {
1171 ip6stat.ip6s_fragdropped++;
1172 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
1173 /*
1174 * Avoid sending ICMPv6 Time Exceeded for fragment headers
1175 * that are marked dirty.
1176 */
1177 diq6_tmp = (ip6q.ip6q_next->ip6q_flags & IP6QF_DIRTY) ?
1178 NULL : &diq6;
1179 frag6_freef(ip6q.ip6q_next, &dfq6, diq6_tmp);
1180 }
1181 lck_mtx_unlock(&ip6qlock);
1182
1183 /* free fragments that need to be freed */
1184 if (!MBUFQ_EMPTY(&dfq6)) {
1185 MBUFQ_DRAIN(&dfq6);
1186 }
1187
1188 frag6_icmp6_timeex_error(&diq6);
1189
1190 VERIFY(MBUFQ_EMPTY(&dfq6));
1191 VERIFY(MBUFQ_EMPTY(&diq6));
1192 }
1193
1194 static struct ip6q *
ip6q_alloc(int how)1195 ip6q_alloc(int how)
1196 {
1197 struct mbuf *t;
1198 struct ip6q *q6;
1199
1200 /*
1201 * See comments in ip6q_updateparams(). Keep the count separate
1202 * from frag6_nfragpackets since the latter represents the elements
1203 * already in the reassembly queues.
1204 */
1205 if (ip6q_limit > 0 && ip6q_count > ip6q_limit) {
1206 return NULL;
1207 }
1208
1209 t = m_get(how, MT_FTABLE);
1210 if (t != NULL) {
1211 atomic_add_32(&ip6q_count, 1);
1212 q6 = mtod(t, struct ip6q *);
1213 bzero(q6, sizeof(*q6));
1214 } else {
1215 q6 = NULL;
1216 }
1217 return q6;
1218 }
1219
1220 static void
ip6q_free(struct ip6q * q6)1221 ip6q_free(struct ip6q *q6)
1222 {
1223 (void) m_free(dtom(q6));
1224 atomic_add_32(&ip6q_count, -1);
1225 }
1226
1227 static struct ip6asfrag *
ip6af_alloc(int how)1228 ip6af_alloc(int how)
1229 {
1230 struct mbuf *t;
1231 struct ip6asfrag *af6;
1232
1233 /*
1234 * See comments in ip6q_updateparams(). Keep the count separate
1235 * from frag6_nfrags since the latter represents the elements
1236 * already in the reassembly queues.
1237 */
1238 if (ip6af_limit > 0 && ip6af_count > ip6af_limit) {
1239 return NULL;
1240 }
1241
1242 t = m_get(how, MT_FTABLE);
1243 if (t != NULL) {
1244 atomic_add_32(&ip6af_count, 1);
1245 af6 = mtod(t, struct ip6asfrag *);
1246 bzero(af6, sizeof(*af6));
1247 } else {
1248 af6 = NULL;
1249 }
1250 return af6;
1251 }
1252
1253 static void
ip6af_free(struct ip6asfrag * af6)1254 ip6af_free(struct ip6asfrag *af6)
1255 {
1256 (void) m_free(dtom(af6));
1257 atomic_add_32(&ip6af_count, -1);
1258 }
1259
1260 static void
ip6q_updateparams(void)1261 ip6q_updateparams(void)
1262 {
1263 LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_OWNED);
1264 /*
1265 * -1 for unlimited allocation.
1266 */
1267 if (ip6_maxfragpackets < 0) {
1268 ip6q_limit = 0;
1269 }
1270 if (ip6_maxfrags < 0) {
1271 ip6af_limit = 0;
1272 }
1273 /*
1274 * Positive number for specific bound.
1275 */
1276 if (ip6_maxfragpackets > 0) {
1277 ip6q_limit = ip6_maxfragpackets;
1278 }
1279 if (ip6_maxfrags > 0) {
1280 ip6af_limit = ip6_maxfrags;
1281 }
1282 /*
1283 * Zero specifies no further fragment queue allocation -- set the
1284 * bound very low, but rely on implementation elsewhere to actually
1285 * prevent allocation and reclaim current queues.
1286 */
1287 if (ip6_maxfragpackets == 0) {
1288 ip6q_limit = 1;
1289 }
1290 if (ip6_maxfrags == 0) {
1291 ip6af_limit = 1;
1292 }
1293 /*
1294 * Arm the purge timer if not already and if there's work to do
1295 */
1296 frag6_sched_timeout();
1297 }
1298
1299 static int
1300 sysctl_maxfragpackets SYSCTL_HANDLER_ARGS
1301 {
1302 #pragma unused(arg1, arg2)
1303 int error, i;
1304
1305 lck_mtx_lock(&ip6qlock);
1306 i = ip6_maxfragpackets;
1307 error = sysctl_handle_int(oidp, &i, 0, req);
1308 if (error || req->newptr == USER_ADDR_NULL) {
1309 goto done;
1310 }
1311 /* impose bounds */
1312 if (i < -1 || i > (nmbclusters / 4)) {
1313 error = EINVAL;
1314 goto done;
1315 }
1316 ip6_maxfragpackets = i;
1317 ip6q_updateparams();
1318 done:
1319 lck_mtx_unlock(&ip6qlock);
1320 return error;
1321 }
1322
1323 static int
1324 sysctl_maxfrags SYSCTL_HANDLER_ARGS
1325 {
1326 #pragma unused(arg1, arg2)
1327 int error, i;
1328
1329 lck_mtx_lock(&ip6qlock);
1330 i = ip6_maxfrags;
1331 error = sysctl_handle_int(oidp, &i, 0, req);
1332 if (error || req->newptr == USER_ADDR_NULL) {
1333 goto done;
1334 }
1335 /* impose bounds */
1336 if (i < -1 || i > (nmbclusters / 4)) {
1337 error = EINVAL;
1338 goto done;
1339 }
1340 ip6_maxfrags = i;
1341 ip6q_updateparams(); /* see if we need to arm timer */
1342 done:
1343 lck_mtx_unlock(&ip6qlock);
1344 return error;
1345 }
1346