1 /*
2 * Copyright (c) 2007-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /* $apfw: pf_norm.c,v 1.10 2008/08/28 19:10:53 jhw Exp $ */
30 /* $OpenBSD: pf_norm.c,v 1.107 2006/04/16 00:59:52 pascoe Exp $ */
31
32 /*
33 * Copyright 2001 Niels Provos <[email protected]>
34 * All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 *
45 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
46 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
47 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
48 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
49 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
50 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
51 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
52 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
53 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
54 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
55 */
56
57 #include <sys/param.h>
58 #include <sys/systm.h>
59 #include <sys/mbuf.h>
60 #include <sys/filio.h>
61 #include <sys/fcntl.h>
62 #include <sys/socket.h>
63 #include <sys/kernel.h>
64 #include <sys/time.h>
65 #include <sys/random.h>
66 #include <sys/mcache.h>
67
68 #include <net/if.h>
69 #include <net/if_types.h>
70 #include <net/bpf.h>
71 #include <net/route.h>
72 #include <net/if_pflog.h>
73
74 #include <netinet/in.h>
75 #include <netinet/in_var.h>
76 #include <netinet/in_systm.h>
77 #include <netinet/ip.h>
78 #include <netinet/ip_var.h>
79 #include <netinet/tcp.h>
80 #include <netinet/tcp_seq.h>
81 #include <netinet/tcp_fsm.h>
82 #include <netinet/udp.h>
83 #include <netinet/ip_icmp.h>
84
85 #include <netinet/ip6.h>
86 #include <netinet6/ip6_var.h>
87
88 #include <net/pfvar.h>
89 #include <net/droptap.h>
90
91 struct pf_frent {
92 LIST_ENTRY(pf_frent) fr_next;
93 struct mbuf *fr_m;
94 #define fr_ip fr_u.fru_ipv4
95 #define fr_ip6 fr_u.fru_ipv6
96 union {
97 struct ip *fru_ipv4;
98 struct ip6_hdr *fru_ipv6;
99 } fr_u;
100 struct ip6_frag fr_ip6f_opt;
101 uint16_t fr_ip6f_hlen; /* total header length */
102 uint16_t fr_ip6f_extoff; /* last extension header offset or 0 */
103 };
104
105 struct pf_frcache {
106 LIST_ENTRY(pf_frcache) fr_next;
107 uint16_t fr_off;
108 uint16_t fr_end;
109 };
110
111 #define PFFRAG_SEENLAST 0x0001 /* Seen the last fragment for this */
112 #define PFFRAG_NOBUFFER 0x0002 /* Non-buffering fragment cache */
113 #define PFFRAG_DROP 0x0004 /* Drop all fragments */
114 #define BUFFER_FRAGMENTS(fr) (!((fr)->fr_flags & PFFRAG_NOBUFFER))
115
116 struct pf_fragment {
117 RB_ENTRY(pf_fragment) fr_entry;
118 TAILQ_ENTRY(pf_fragment) frag_next;
119 struct pf_addr fr_srcx;
120 struct pf_addr fr_dstx;
121 u_int8_t fr_p; /* protocol of this fragment */
122 u_int8_t fr_flags; /* status flags */
123 u_int16_t fr_max; /* fragment data max */
124 #define fr_id fr_uid.fru_id4
125 #define fr_id6 fr_uid.fru_id6
126 union {
127 u_int16_t fru_id4;
128 u_int32_t fru_id6;
129 } fr_uid;
130 int fr_af;
131 u_int32_t fr_timeout;
132 #define fr_queue fr_u.fru_queue
133 #define fr_cache fr_u.fru_cache
134 union {
135 LIST_HEAD(pf_fragq, pf_frent) fru_queue; /* buffering */
136 LIST_HEAD(pf_cacheq, pf_frcache) fru_cache; /* non-buf */
137 } fr_u;
138 uint32_t fr_csum_flags; /* checksum flags */
139 uint32_t fr_csum; /* partial checksum value */
140 uint16_t fr_ip6_maxlen; /* maximum length of a single fragment in IPv6 */
141 };
142
143 static TAILQ_HEAD(pf_fragqueue, pf_fragment) pf_fragqueue;
144 static TAILQ_HEAD(pf_cachequeue, pf_fragment) pf_cachequeue;
145
146 static __inline int pf_frag_compare(struct pf_fragment *,
147 struct pf_fragment *);
148 static RB_HEAD(pf_frag_tree, pf_fragment) pf_frag_tree, pf_cache_tree;
149 RB_PROTOTYPE_SC(__private_extern__, pf_frag_tree, pf_fragment, fr_entry,
150 pf_frag_compare);
151 RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
152
153 /* Private prototypes */
154 static void pf_ip6hdr2key(struct pf_fragment *, struct ip6_hdr *,
155 struct ip6_frag *);
156 static void pf_ip2key(struct pf_fragment *, struct ip *);
157 static void pf_remove_fragment(struct pf_fragment *);
158 static void pf_flush_fragments(void);
159 static void pf_free_fragment(struct pf_fragment *);
160 static struct pf_fragment *pf_find_fragment_by_key(struct pf_fragment *,
161 struct pf_frag_tree *);
162 static __inline struct pf_fragment *
163 pf_find_fragment_by_ipv4_header(struct ip *, struct pf_frag_tree *);
164 static struct mbuf *pf_reassemble(struct mbuf *, struct pf_fragment **,
165 struct pf_frent *, int);
166 static struct mbuf *pf_fragcache(struct mbuf **, struct ip *,
167 struct pf_fragment **, int, int, int *);
168 static int pf_normalize_tcpopt(struct pf_rule *, int, struct pfi_kif *,
169 struct pf_pdesc *, pbuf_t *, struct tcphdr *, int, int *);
170 static __inline struct pf_fragment *
171 pf_find_fragment_by_ipv6_header(struct ip6_hdr *, struct ip6_frag *,
172 struct pf_frag_tree *);
173 static struct mbuf *pf_reassemble6(struct mbuf **, struct pf_fragment **,
174 struct pf_frent *, int);
175 static struct mbuf *pf_frag6cache(struct mbuf **, struct ip6_hdr*,
176 struct ip6_frag *, struct pf_fragment **, int, int, int, int *);
177
178 #define DPFPRINTF(x) do { \
179 if (pf_status.debug >= PF_DEBUG_MISC) { \
180 printf("%s: ", __func__); \
181 printf x ; \
182 } \
183 } while (0)
184
185 /* Globals */
186 struct pool pf_frent_pl, pf_frag_pl;
187 static struct pool pf_cache_pl, pf_cent_pl;
188 struct pool pf_state_scrub_pl;
189
190 static int pf_nfrents, pf_ncache;
191
192 void
pf_normalize_init(void)193 pf_normalize_init(void)
194 {
195 pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0, 0, 0, "pffrent",
196 NULL);
197 pool_init(&pf_frag_pl, sizeof(struct pf_fragment), 0, 0, 0, "pffrag",
198 NULL);
199 pool_init(&pf_cache_pl, sizeof(struct pf_fragment), 0, 0, 0,
200 "pffrcache", NULL);
201 pool_init(&pf_cent_pl, sizeof(struct pf_frcache), 0, 0, 0, "pffrcent",
202 NULL);
203 pool_init(&pf_state_scrub_pl, sizeof(struct pf_state_scrub), 0, 0, 0,
204 "pfstscr", NULL);
205
206 pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT);
207 pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0);
208 pool_sethardlimit(&pf_cache_pl, PFFRAG_FRCACHE_HIWAT, NULL, 0);
209 pool_sethardlimit(&pf_cent_pl, PFFRAG_FRCENT_HIWAT, NULL, 0);
210
211 TAILQ_INIT(&pf_fragqueue);
212 TAILQ_INIT(&pf_cachequeue);
213 }
214
215 #if 0
216 void
217 pf_normalize_destroy(void)
218 {
219 pool_destroy(&pf_state_scrub_pl);
220 pool_destroy(&pf_cent_pl);
221 pool_destroy(&pf_cache_pl);
222 pool_destroy(&pf_frag_pl);
223 pool_destroy(&pf_frent_pl);
224 }
225 #endif
226
227 int
pf_normalize_isempty(void)228 pf_normalize_isempty(void)
229 {
230 return TAILQ_EMPTY(&pf_fragqueue) && TAILQ_EMPTY(&pf_cachequeue);
231 }
232
233 static __inline int
pf_frag_compare(struct pf_fragment * a,struct pf_fragment * b)234 pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b)
235 {
236 int diff;
237
238 if ((diff = a->fr_af - b->fr_af)) {
239 return diff;
240 } else if ((diff = a->fr_p - b->fr_p)) {
241 return diff;
242 } else {
243 struct pf_addr *sa = &a->fr_srcx;
244 struct pf_addr *sb = &b->fr_srcx;
245 struct pf_addr *da = &a->fr_dstx;
246 struct pf_addr *db = &b->fr_dstx;
247
248 switch (a->fr_af) {
249 #ifdef INET
250 case AF_INET:
251 if ((diff = a->fr_id - b->fr_id)) {
252 return diff;
253 } else if (sa->v4addr.s_addr < sb->v4addr.s_addr) {
254 return -1;
255 } else if (sa->v4addr.s_addr > sb->v4addr.s_addr) {
256 return 1;
257 } else if (da->v4addr.s_addr < db->v4addr.s_addr) {
258 return -1;
259 } else if (da->v4addr.s_addr > db->v4addr.s_addr) {
260 return 1;
261 }
262 break;
263 #endif
264 case AF_INET6:
265 if ((diff = a->fr_id6 - b->fr_id6)) {
266 return diff;
267 } else if (sa->addr32[3] < sb->addr32[3]) {
268 return -1;
269 } else if (sa->addr32[3] > sb->addr32[3]) {
270 return 1;
271 } else if (sa->addr32[2] < sb->addr32[2]) {
272 return -1;
273 } else if (sa->addr32[2] > sb->addr32[2]) {
274 return 1;
275 } else if (sa->addr32[1] < sb->addr32[1]) {
276 return -1;
277 } else if (sa->addr32[1] > sb->addr32[1]) {
278 return 1;
279 } else if (sa->addr32[0] < sb->addr32[0]) {
280 return -1;
281 } else if (sa->addr32[0] > sb->addr32[0]) {
282 return 1;
283 } else if (da->addr32[3] < db->addr32[3]) {
284 return -1;
285 } else if (da->addr32[3] > db->addr32[3]) {
286 return 1;
287 } else if (da->addr32[2] < db->addr32[2]) {
288 return -1;
289 } else if (da->addr32[2] > db->addr32[2]) {
290 return 1;
291 } else if (da->addr32[1] < db->addr32[1]) {
292 return -1;
293 } else if (da->addr32[1] > db->addr32[1]) {
294 return 1;
295 } else if (da->addr32[0] < db->addr32[0]) {
296 return -1;
297 } else if (da->addr32[0] > db->addr32[0]) {
298 return 1;
299 }
300 break;
301 default:
302 VERIFY(!0 && "only IPv4 and IPv6 supported!");
303 break;
304 }
305 }
306 return 0;
307 }
308
309 void
pf_purge_expired_fragments(void)310 pf_purge_expired_fragments(void)
311 {
312 struct pf_fragment *frag;
313 u_int32_t expire = pf_time_second() -
314 pf_default_rule.timeout[PFTM_FRAG];
315
316 while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) {
317 VERIFY(BUFFER_FRAGMENTS(frag));
318 if (frag->fr_timeout > expire) {
319 break;
320 }
321
322 switch (frag->fr_af) {
323 case AF_INET:
324 DPFPRINTF(("expiring IPv4 %d(0x%llx) from queue.\n",
325 ntohs(frag->fr_id),
326 (uint64_t)VM_KERNEL_ADDRHASH(frag)));
327 break;
328 case AF_INET6:
329 DPFPRINTF(("expiring IPv6 %d(0x%llx) from queue.\n",
330 ntohl(frag->fr_id6),
331 (uint64_t)VM_KERNEL_ADDRHASH(frag)));
332 break;
333 default:
334 VERIFY(0 && "only IPv4 and IPv6 supported");
335 break;
336 }
337 pf_free_fragment(frag);
338 }
339
340 while ((frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue)) != NULL) {
341 VERIFY(!BUFFER_FRAGMENTS(frag));
342 if (frag->fr_timeout > expire) {
343 break;
344 }
345
346 switch (frag->fr_af) {
347 case AF_INET:
348 DPFPRINTF(("expiring IPv4 %d(0x%llx) from cache.\n",
349 ntohs(frag->fr_id),
350 (uint64_t)VM_KERNEL_ADDRHASH(frag)));
351 break;
352 case AF_INET6:
353 DPFPRINTF(("expiring IPv6 %d(0x%llx) from cache.\n",
354 ntohl(frag->fr_id6),
355 (uint64_t)VM_KERNEL_ADDRHASH(frag)));
356 break;
357 default:
358 VERIFY(0 && "only IPv4 and IPv6 supported");
359 break;
360 }
361 pf_free_fragment(frag);
362 VERIFY(TAILQ_EMPTY(&pf_cachequeue) ||
363 TAILQ_LAST(&pf_cachequeue, pf_cachequeue) != frag);
364 }
365 }
366
367 /*
368 * Try to flush old fragments to make space for new ones
369 */
370
371 static void
pf_flush_fragments(void)372 pf_flush_fragments(void)
373 {
374 struct pf_fragment *frag;
375 int goal;
376
377 goal = pf_nfrents * 9 / 10;
378 DPFPRINTF(("trying to free > %d frents\n",
379 pf_nfrents - goal));
380 while (goal < pf_nfrents) {
381 frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue);
382 if (frag == NULL) {
383 break;
384 }
385 pf_free_fragment(frag);
386 }
387
388
389 goal = pf_ncache * 9 / 10;
390 DPFPRINTF(("trying to free > %d cache entries\n",
391 pf_ncache - goal));
392 while (goal < pf_ncache) {
393 frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue);
394 if (frag == NULL) {
395 break;
396 }
397 pf_free_fragment(frag);
398 }
399 }
400
401 /* Frees the fragments and all associated entries */
402
403 static void
pf_free_fragment(struct pf_fragment * frag)404 pf_free_fragment(struct pf_fragment *frag)
405 {
406 struct pf_frent *frent;
407 struct pf_frcache *frcache;
408
409 /* Free all fragments */
410 if (BUFFER_FRAGMENTS(frag)) {
411 for (frent = LIST_FIRST(&frag->fr_queue); frent;
412 frent = LIST_FIRST(&frag->fr_queue)) {
413 LIST_REMOVE(frent, fr_next);
414
415 m_freem(frent->fr_m);
416 pool_put(&pf_frent_pl, frent);
417 pf_nfrents--;
418 }
419 } else {
420 for (frcache = LIST_FIRST(&frag->fr_cache); frcache;
421 frcache = LIST_FIRST(&frag->fr_cache)) {
422 LIST_REMOVE(frcache, fr_next);
423
424 VERIFY(LIST_EMPTY(&frag->fr_cache) ||
425 LIST_FIRST(&frag->fr_cache)->fr_off >
426 frcache->fr_end);
427
428 pool_put(&pf_cent_pl, frcache);
429 pf_ncache--;
430 }
431 }
432
433 pf_remove_fragment(frag);
434 }
435
436 static void
pf_ip6hdr2key(struct pf_fragment * key,struct ip6_hdr * ip6,struct ip6_frag * fh)437 pf_ip6hdr2key(struct pf_fragment *key, struct ip6_hdr *ip6,
438 struct ip6_frag *fh)
439 {
440 key->fr_p = fh->ip6f_nxt;
441 key->fr_id6 = fh->ip6f_ident;
442 key->fr_af = AF_INET6;
443 key->fr_srcx.v6addr = ip6->ip6_src;
444 key->fr_dstx.v6addr = ip6->ip6_dst;
445 }
446
447 static void
pf_ip2key(struct pf_fragment * key,struct ip * ip)448 pf_ip2key(struct pf_fragment *key, struct ip *ip)
449 {
450 key->fr_p = ip->ip_p;
451 key->fr_id = ip->ip_id;
452 key->fr_af = AF_INET;
453 key->fr_srcx.v4addr.s_addr = ip->ip_src.s_addr;
454 key->fr_dstx.v4addr.s_addr = ip->ip_dst.s_addr;
455 }
456
457 static struct pf_fragment *
pf_find_fragment_by_key(struct pf_fragment * key,struct pf_frag_tree * tree)458 pf_find_fragment_by_key(struct pf_fragment *key, struct pf_frag_tree *tree)
459 {
460 struct pf_fragment *frag;
461
462 frag = RB_FIND(pf_frag_tree, tree, key);
463 if (frag != NULL) {
464 /* XXX Are we sure we want to update the timeout? */
465 frag->fr_timeout = pf_time_second();
466 if (BUFFER_FRAGMENTS(frag)) {
467 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
468 TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next);
469 } else {
470 TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
471 TAILQ_INSERT_HEAD(&pf_cachequeue, frag, frag_next);
472 }
473 }
474
475 return frag;
476 }
477
478 static __attribute__((noinline)) struct pf_fragment *
pf_find_fragment_by_ipv4_header(struct ip * ip,struct pf_frag_tree * tree)479 pf_find_fragment_by_ipv4_header(struct ip *ip, struct pf_frag_tree *tree)
480 {
481 struct pf_fragment key;
482 pf_ip2key(&key, ip);
483 return pf_find_fragment_by_key(&key, tree);
484 }
485
486 /* Removes a fragment from the fragment queue and frees the fragment */
487 static void
pf_remove_fragment(struct pf_fragment * frag)488 pf_remove_fragment(struct pf_fragment *frag)
489 {
490 if (BUFFER_FRAGMENTS(frag)) {
491 RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag);
492 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
493 pool_put(&pf_frag_pl, frag);
494 } else {
495 RB_REMOVE(pf_frag_tree, &pf_cache_tree, frag);
496 TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
497 pool_put(&pf_cache_pl, frag);
498 }
499 }
500
501 #define FR_IP_OFF(fr) ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3)
502 static struct mbuf *
pf_reassemble(struct mbuf * m0,struct pf_fragment ** frag,struct pf_frent * frent,int mff)503 pf_reassemble(struct mbuf *m0, struct pf_fragment **frag,
504 struct pf_frent *frent, int mff)
505 {
506 struct mbuf *m = m0, *m2;
507 struct pf_frent *frea, *next;
508 struct pf_frent *frep = NULL;
509 struct ip *ip = frent->fr_ip;
510 uint32_t hlen = ip->ip_hl << 2;
511 u_int16_t off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
512 u_int16_t ip_len = ntohs(ip->ip_len) - ip->ip_hl * 4;
513 u_int16_t fr_max = ip_len + off;
514 uint32_t csum, csum_flags;
515
516 VERIFY(*frag == NULL || BUFFER_FRAGMENTS(*frag));
517
518 /*
519 * Leverage partial checksum offload for IP fragments. Narrow down
520 * the scope to cover only UDP without IP options, as that is the
521 * most common case.
522 *
523 * Perform 1's complement adjustment of octets that got included/
524 * excluded in the hardware-calculated checksum value. Ignore cases
525 * where the value includes the entire IPv4 header span, as the sum
526 * for those octets would already be 0 by the time we get here; IP
527 * has already performed its header checksum validation. Also take
528 * care of any trailing bytes and subtract out their partial sum.
529 */
530 if (ip->ip_p == IPPROTO_UDP && hlen == sizeof(struct ip) &&
531 (m->m_pkthdr.csum_flags &
532 (CSUM_DATA_VALID | CSUM_PARTIAL | CSUM_PSEUDO_HDR)) ==
533 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
534 uint32_t start = m->m_pkthdr.csum_rx_start;
535 int32_t trailer = (m_pktlen(m) - ntohs(ip->ip_len));
536 uint32_t swbytes = (uint32_t)trailer;
537
538 csum = m->m_pkthdr.csum_rx_val;
539
540 ASSERT(trailer >= 0);
541 if ((start != 0 && start != hlen) || trailer != 0) {
542 #if BYTE_ORDER != BIG_ENDIAN
543 if (start < hlen) {
544 HTONS(ip->ip_len);
545 HTONS(ip->ip_off);
546 }
547 #endif /* BYTE_ORDER != BIG_ENDIAN */
548 /* callee folds in sum */
549 csum = m_adj_sum16(m, start, hlen,
550 (ip->ip_len - hlen), csum);
551 if (hlen > start) {
552 swbytes += (hlen - start);
553 } else {
554 swbytes += (start - hlen);
555 }
556 #if BYTE_ORDER != BIG_ENDIAN
557 if (start < hlen) {
558 NTOHS(ip->ip_off);
559 NTOHS(ip->ip_len);
560 }
561 #endif /* BYTE_ORDER != BIG_ENDIAN */
562 }
563 csum_flags = m->m_pkthdr.csum_flags;
564
565 if (swbytes != 0) {
566 udp_in_cksum_stats(swbytes);
567 }
568 if (trailer != 0) {
569 m_adj(m, -trailer);
570 }
571 } else {
572 csum = 0;
573 csum_flags = 0;
574 }
575
576 /* Invalidate checksum */
577 m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID;
578
579 /* Strip off ip header */
580 m->m_data += hlen;
581 m->m_len -= hlen;
582
583 /* Create a new reassembly queue for this packet */
584 if (*frag == NULL) {
585 *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
586 if (*frag == NULL) {
587 pf_flush_fragments();
588 *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
589 if (*frag == NULL) {
590 goto drop_fragment;
591 }
592 }
593
594 (*frag)->fr_flags = 0;
595 (*frag)->fr_max = 0;
596 (*frag)->fr_af = AF_INET;
597 (*frag)->fr_srcx.v4addr = frent->fr_ip->ip_src;
598 (*frag)->fr_dstx.v4addr = frent->fr_ip->ip_dst;
599 (*frag)->fr_p = frent->fr_ip->ip_p;
600 (*frag)->fr_id = frent->fr_ip->ip_id;
601 (*frag)->fr_timeout = pf_time_second();
602 if (csum_flags != 0) {
603 (*frag)->fr_csum_flags = csum_flags;
604 (*frag)->fr_csum = csum;
605 }
606 LIST_INIT(&(*frag)->fr_queue);
607
608 RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag);
609 TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next);
610
611 /* We do not have a previous fragment */
612 frep = NULL;
613 goto insert;
614 }
615
616 /*
617 * If this fragment contains similar checksum offload info
618 * as that of the existing ones, accumulate checksum. Otherwise,
619 * invalidate checksum offload info for the entire datagram.
620 */
621 if (csum_flags != 0 && csum_flags == (*frag)->fr_csum_flags) {
622 (*frag)->fr_csum += csum;
623 } else if ((*frag)->fr_csum_flags != 0) {
624 (*frag)->fr_csum_flags = 0;
625 }
626
627 /*
628 * Find a fragment after the current one:
629 * - off contains the real shifted offset.
630 */
631 LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) {
632 if (FR_IP_OFF(frea) > off) {
633 break;
634 }
635 frep = frea;
636 }
637
638 VERIFY(frep != NULL || frea != NULL);
639
640 if (frep != NULL &&
641 FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl *
642 4 > off) {
643 u_int16_t precut;
644
645 precut = FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) -
646 frep->fr_ip->ip_hl * 4 - off;
647 if (precut >= ip_len) {
648 goto drop_fragment;
649 }
650 m_adj(frent->fr_m, precut);
651 DPFPRINTF(("overlap -%d\n", precut));
652 /* Enforce 8 byte boundaries */
653 ip->ip_off = htons(ntohs(ip->ip_off) + (precut >> 3));
654 off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
655 ip_len -= precut;
656 ip->ip_len = htons(ip_len);
657 }
658
659 for (; frea != NULL && ip_len + off > FR_IP_OFF(frea);
660 frea = next) {
661 u_int16_t aftercut;
662
663 aftercut = ip_len + off - FR_IP_OFF(frea);
664 DPFPRINTF(("adjust overlap %d\n", aftercut));
665 if (aftercut < ntohs(frea->fr_ip->ip_len) - frea->fr_ip->ip_hl
666 * 4) {
667 frea->fr_ip->ip_len =
668 htons(ntohs(frea->fr_ip->ip_len) - aftercut);
669 frea->fr_ip->ip_off = htons(ntohs(frea->fr_ip->ip_off) +
670 (aftercut >> 3));
671 m_adj(frea->fr_m, aftercut);
672 break;
673 }
674
675 /* This fragment is completely overlapped, lose it */
676 next = LIST_NEXT(frea, fr_next);
677 m_freem(frea->fr_m);
678 LIST_REMOVE(frea, fr_next);
679 pool_put(&pf_frent_pl, frea);
680 pf_nfrents--;
681 }
682
683 insert:
684 /* Update maximum data size */
685 if ((*frag)->fr_max < fr_max) {
686 (*frag)->fr_max = fr_max;
687 }
688 /* This is the last segment */
689 if (!mff) {
690 (*frag)->fr_flags |= PFFRAG_SEENLAST;
691 }
692
693 if (frep == NULL) {
694 LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next);
695 } else {
696 LIST_INSERT_AFTER(frep, frent, fr_next);
697 }
698
699 /* Check if we are completely reassembled */
700 if (!((*frag)->fr_flags & PFFRAG_SEENLAST)) {
701 return NULL;
702 }
703
704 /* Check if we have all the data */
705 off = 0;
706 for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) {
707 next = LIST_NEXT(frep, fr_next);
708
709 off += ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 4;
710 if (off < (*frag)->fr_max &&
711 (next == NULL || FR_IP_OFF(next) != off)) {
712 DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
713 off, next == NULL ? -1 : FR_IP_OFF(next),
714 (*frag)->fr_max));
715 return NULL;
716 }
717 }
718 DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max));
719 if (off < (*frag)->fr_max) {
720 return NULL;
721 }
722
723 /* We have all the data */
724 frent = LIST_FIRST(&(*frag)->fr_queue);
725 VERIFY(frent != NULL);
726 if ((frent->fr_ip->ip_hl << 2) + off > IP_MAXPACKET) {
727 DPFPRINTF(("drop: too big: %d\n", off));
728 pf_free_fragment(*frag);
729 *frag = NULL;
730 return NULL;
731 }
732 next = LIST_NEXT(frent, fr_next);
733
734 /* Magic from ip_input */
735 ip = frent->fr_ip;
736 m = frent->fr_m;
737 m2 = m->m_next;
738 m->m_next = NULL;
739 m_cat(m, m2);
740 pool_put(&pf_frent_pl, frent);
741 pf_nfrents--;
742 for (frent = next; frent != NULL; frent = next) {
743 next = LIST_NEXT(frent, fr_next);
744
745 m2 = frent->fr_m;
746 pool_put(&pf_frent_pl, frent);
747 pf_nfrents--;
748 m_cat(m, m2);
749 }
750
751 ip->ip_src = (*frag)->fr_srcx.v4addr;
752 ip->ip_dst = (*frag)->fr_dstx.v4addr;
753
754 if ((*frag)->fr_csum_flags != 0) {
755 csum = (*frag)->fr_csum;
756
757 ADDCARRY(csum);
758
759 m->m_pkthdr.csum_rx_val = csum;
760 m->m_pkthdr.csum_rx_start = sizeof(struct ip);
761 m->m_pkthdr.csum_flags = (*frag)->fr_csum_flags;
762 } else if ((m->m_pkthdr.rcvif != NULL &&
763 m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) ||
764 (m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
765 /* loopback checksums are always OK */
766 m->m_pkthdr.csum_data = 0xffff;
767 m->m_pkthdr.csum_flags =
768 CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
769 CSUM_IP_CHECKED | CSUM_IP_VALID;
770 }
771
772 /* Remove from fragment queue */
773 pf_remove_fragment(*frag);
774 *frag = NULL;
775
776 hlen = ip->ip_hl << 2;
777 ip->ip_len = htons(off + hlen);
778 m->m_len += hlen;
779 m->m_data -= hlen;
780
781 /* some debugging cruft by sklower, below, will go away soon */
782 /* XXX this should be done elsewhere */
783 if (m->m_flags & M_PKTHDR) {
784 int plen = 0;
785 for (m2 = m; m2; m2 = m2->m_next) {
786 plen += m2->m_len;
787 }
788 m->m_pkthdr.len = plen;
789 }
790
791 DPFPRINTF(("complete: 0x%llx(%d)\n",
792 (uint64_t)VM_KERNEL_ADDRPERM(m), ntohs(ip->ip_len)));
793 return m;
794
795 drop_fragment:
796 /* Oops - fail safe - drop packet */
797 pool_put(&pf_frent_pl, frent);
798 pf_nfrents--;
799 m_drop(m, DROPTAP_FLAG_DIR_IN, DROP_REASON_PF_BAD_FRAGMENT, NULL, 0);
800 return NULL;
801 }
802
803 static __attribute__((noinline)) struct mbuf *
pf_fragcache(struct mbuf ** m0,struct ip * h,struct pf_fragment ** frag,int mff,int drop,int * nomem)804 pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
805 int drop, int *nomem)
806 {
807 struct mbuf *__single m = *m0;
808 struct pf_frcache *__single frp, *__single fra, *__single cur = NULL;
809 int ip_len = ntohs(h->ip_len) - (h->ip_hl << 2);
810 u_int16_t off = ntohs(h->ip_off) << 3;
811 u_int16_t fr_max = ip_len + off;
812 int hosed = 0;
813
814 VERIFY(*frag == NULL || !BUFFER_FRAGMENTS(*frag));
815
816 /* Create a new range queue for this packet */
817 if (*frag == NULL) {
818 *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
819 if (*frag == NULL) {
820 pf_flush_fragments();
821 *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
822 if (*frag == NULL) {
823 goto no_mem;
824 }
825 }
826
827 /* Get an entry for the queue */
828 cur = pool_get(&pf_cent_pl, PR_NOWAIT);
829 if (cur == NULL) {
830 pool_put(&pf_cache_pl, *frag);
831 *frag = NULL;
832 goto no_mem;
833 }
834 pf_ncache++;
835
836 (*frag)->fr_flags = PFFRAG_NOBUFFER;
837 (*frag)->fr_max = 0;
838 (*frag)->fr_af = AF_INET;
839 (*frag)->fr_srcx.v4addr = h->ip_src;
840 (*frag)->fr_dstx.v4addr = h->ip_dst;
841 (*frag)->fr_p = h->ip_p;
842 (*frag)->fr_id = h->ip_id;
843 (*frag)->fr_timeout = pf_time_second();
844
845 cur->fr_off = off;
846 cur->fr_end = fr_max;
847 LIST_INIT(&(*frag)->fr_cache);
848 LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next);
849
850 RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag);
851 TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next);
852
853 DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off,
854 fr_max));
855
856 goto pass;
857 }
858
859 /*
860 * Find a fragment after the current one:
861 * - off contains the real shifted offset.
862 */
863 frp = NULL;
864 LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) {
865 if (fra->fr_off > off) {
866 break;
867 }
868 frp = fra;
869 }
870
871 VERIFY(frp != NULL || fra != NULL);
872
873 if (frp != NULL) {
874 int precut;
875
876 precut = frp->fr_end - off;
877 if (precut >= ip_len) {
878 /* Fragment is entirely a duplicate */
879 DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n",
880 h->ip_id, frp->fr_off, frp->fr_end, off, fr_max));
881 goto drop_fragment;
882 }
883 if (precut == 0) {
884 /* They are adjacent. Fixup cache entry */
885 DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n",
886 h->ip_id, frp->fr_off, frp->fr_end, off, fr_max));
887 frp->fr_end = fr_max;
888 } else if (precut > 0) {
889 /*
890 * The first part of this payload overlaps with a
891 * fragment that has already been passed.
892 * Need to trim off the first part of the payload.
893 * But to do so easily, we need to create another
894 * mbuf to throw the original header into.
895 */
896
897 DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n",
898 h->ip_id, precut, frp->fr_off, frp->fr_end, off,
899 fr_max));
900
901 off += precut;
902 fr_max -= precut;
903 /* Update the previous frag to encompass this one */
904 frp->fr_end = fr_max;
905
906 if (!drop) {
907 /*
908 * XXX Optimization opportunity
909 * This is a very heavy way to trim the payload.
910 * we could do it much faster by diddling mbuf
911 * internals but that would be even less legible
912 * than this mbuf magic. For my next trick,
913 * I'll pull a rabbit out of my laptop.
914 */
915 *m0 = m_copym(m, 0, h->ip_hl << 2, M_NOWAIT);
916 if (*m0 == NULL) {
917 goto no_mem;
918 }
919 VERIFY((*m0)->m_next == NULL);
920 m_adj(m, precut + (h->ip_hl << 2));
921 m_cat(*m0, m);
922 m = *m0;
923 if (m->m_flags & M_PKTHDR) {
924 int plen = 0;
925 struct mbuf *t;
926 for (t = m; t; t = t->m_next) {
927 plen += t->m_len;
928 }
929 m->m_pkthdr.len = plen;
930 }
931
932
933 h = mtod(m, struct ip *);
934
935
936 VERIFY((int)m->m_len ==
937 ntohs(h->ip_len) - precut);
938 h->ip_off = htons(ntohs(h->ip_off) +
939 (precut >> 3));
940 h->ip_len = htons(ntohs(h->ip_len) - precut);
941 } else {
942 hosed++;
943 }
944 } else {
945 /* There is a gap between fragments */
946
947 DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n",
948 h->ip_id, -precut, frp->fr_off, frp->fr_end, off,
949 fr_max));
950
951 cur = pool_get(&pf_cent_pl, PR_NOWAIT);
952 if (cur == NULL) {
953 goto no_mem;
954 }
955 pf_ncache++;
956
957 cur->fr_off = off;
958 cur->fr_end = fr_max;
959 LIST_INSERT_AFTER(frp, cur, fr_next);
960 }
961 }
962
963 if (fra != NULL) {
964 int aftercut;
965 int merge = 0;
966
967 aftercut = fr_max - fra->fr_off;
968 if (aftercut == 0) {
969 /* Adjacent fragments */
970 DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n",
971 h->ip_id, off, fr_max, fra->fr_off, fra->fr_end));
972 fra->fr_off = off;
973 merge = 1;
974 } else if (aftercut > 0) {
975 /* Need to chop off the tail of this fragment */
976 DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n",
977 h->ip_id, aftercut, off, fr_max, fra->fr_off,
978 fra->fr_end));
979 fra->fr_off = off;
980 fr_max -= aftercut;
981
982 merge = 1;
983
984 if (!drop) {
985 m_adj(m, -aftercut);
986 if (m->m_flags & M_PKTHDR) {
987 int plen = 0;
988 struct mbuf *t;
989 for (t = m; t; t = t->m_next) {
990 plen += t->m_len;
991 }
992 m->m_pkthdr.len = plen;
993 }
994 h = mtod(m, struct ip *);
995 VERIFY((int)m->m_len ==
996 ntohs(h->ip_len) - aftercut);
997 h->ip_len = htons(ntohs(h->ip_len) - aftercut);
998 } else {
999 hosed++;
1000 }
1001 } else if (frp == NULL) {
1002 /* There is a gap between fragments */
1003 DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n",
1004 h->ip_id, -aftercut, off, fr_max, fra->fr_off,
1005 fra->fr_end));
1006
1007 cur = pool_get(&pf_cent_pl, PR_NOWAIT);
1008 if (cur == NULL) {
1009 goto no_mem;
1010 }
1011 pf_ncache++;
1012
1013 cur->fr_off = off;
1014 cur->fr_end = fr_max;
1015 LIST_INSERT_BEFORE(fra, cur, fr_next);
1016 }
1017
1018
1019 /* Need to glue together two separate fragment descriptors */
1020 if (merge) {
1021 if (cur && fra->fr_off <= cur->fr_end) {
1022 /* Need to merge in a previous 'cur' */
1023 DPFPRINTF(("fragcache[%d]: adjacent(merge "
1024 "%d-%d) %d-%d (%d-%d)\n",
1025 h->ip_id, cur->fr_off, cur->fr_end, off,
1026 fr_max, fra->fr_off, fra->fr_end));
1027 fra->fr_off = cur->fr_off;
1028 LIST_REMOVE(cur, fr_next);
1029 pool_put(&pf_cent_pl, cur);
1030 pf_ncache--;
1031 cur = NULL;
1032 } else if (frp && fra->fr_off <= frp->fr_end) {
1033 /* Need to merge in a modified 'frp' */
1034 VERIFY(cur == NULL);
1035 DPFPRINTF(("fragcache[%d]: adjacent(merge "
1036 "%d-%d) %d-%d (%d-%d)\n",
1037 h->ip_id, frp->fr_off, frp->fr_end, off,
1038 fr_max, fra->fr_off, fra->fr_end));
1039 fra->fr_off = frp->fr_off;
1040 LIST_REMOVE(frp, fr_next);
1041 pool_put(&pf_cent_pl, frp);
1042 pf_ncache--;
1043 frp = NULL;
1044 }
1045 }
1046 }
1047
1048 if (hosed) {
1049 /*
1050 * We must keep tracking the overall fragment even when
1051 * we're going to drop it anyway so that we know when to
1052 * free the overall descriptor. Thus we drop the frag late.
1053 */
1054 goto drop_fragment;
1055 }
1056
1057
1058 pass:
1059 /* Update maximum data size */
1060 if ((*frag)->fr_max < fr_max) {
1061 (*frag)->fr_max = fr_max;
1062 }
1063
1064 /* This is the last segment */
1065 if (!mff) {
1066 (*frag)->fr_flags |= PFFRAG_SEENLAST;
1067 }
1068
1069 /* Check if we are completely reassembled */
1070 if (((*frag)->fr_flags & PFFRAG_SEENLAST) &&
1071 LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 &&
1072 LIST_FIRST(&(*frag)->fr_cache)->fr_end == (*frag)->fr_max) {
1073 /* Remove from fragment queue */
1074 DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id,
1075 (*frag)->fr_max));
1076 pf_free_fragment(*frag);
1077 *frag = NULL;
1078 }
1079
1080 return m;
1081
1082 no_mem:
1083 *nomem = 1;
1084
1085 /* Still need to pay attention to !IP_MF */
1086 if (!mff && *frag != NULL) {
1087 (*frag)->fr_flags |= PFFRAG_SEENLAST;
1088 }
1089
1090 m_drop(m, DROPTAP_FLAG_DIR_IN,
1091 DROP_REASON_PF_MEM_ALLOC, NULL, 0);
1092 return NULL;
1093
1094 drop_fragment:
1095
1096 /* Still need to pay attention to !IP_MF */
1097 if (!mff && *frag != NULL) {
1098 (*frag)->fr_flags |= PFFRAG_SEENLAST;
1099 }
1100
1101 if (drop) {
1102 /* This fragment has been deemed bad. Don't reass */
1103 if (((*frag)->fr_flags & PFFRAG_DROP) == 0) {
1104 DPFPRINTF(("fragcache[%d]: dropping overall fragment\n",
1105 h->ip_id));
1106 }
1107 (*frag)->fr_flags |= PFFRAG_DROP;
1108 }
1109
1110 m_drop(m, DROPTAP_FLAG_DIR_IN,
1111 DROP_REASON_PF_BAD_FRAGMENT, NULL, 0);
1112 return NULL;
1113 }
1114
1115 #define FR_IP6_OFF(fr) \
1116 (ntohs((fr)->fr_ip6f_opt.ip6f_offlg & IP6F_OFF_MASK))
1117 #define FR_IP6_PLEN(fr) (ntohs((fr)->fr_ip6->ip6_plen))
1118 struct mbuf *
pf_reassemble6(struct mbuf ** m0,struct pf_fragment ** frag,struct pf_frent * frent,int mff)1119 pf_reassemble6(struct mbuf **m0, struct pf_fragment **frag,
1120 struct pf_frent *frent, int mff)
1121 {
1122 struct mbuf *__single m, *__single m2;
1123 struct pf_frent *__single frea, *__single frep, *__single next;
1124 struct ip6_hdr *__single ip6;
1125 struct ip6_frag *__single ip6f;
1126 int plen, off, fr_max, pktlen;
1127 uint32_t uoff, csum, csum_flags;
1128
1129 VERIFY(*frag == NULL || BUFFER_FRAGMENTS(*frag));
1130 m = *m0;
1131 frep = NULL;
1132 ip6 = frent->fr_ip6;
1133 ip6f = &frent->fr_ip6f_opt;
1134 off = FR_IP6_OFF(frent);
1135 uoff = frent->fr_ip6f_hlen;
1136 plen = FR_IP6_PLEN(frent);
1137 fr_max = off + plen - (frent->fr_ip6f_hlen - sizeof(*ip6));
1138 pktlen = plen + sizeof(*ip6);
1139
1140 DPFPRINTF(("0x%llx IPv6 frag plen %u off %u fr_ip6f_hlen %u "
1141 "fr_max %u m_len %u\n", (uint64_t)VM_KERNEL_ADDRPERM(m), plen, off,
1142 frent->fr_ip6f_hlen, fr_max, m->m_len));
1143
1144 /*
1145 * Leverage partial checksum offload for simple UDP/IP fragments,
1146 * as that is the most common case.
1147 *
1148 * Perform 1's complement adjustment of octets that got included/
1149 * excluded in the hardware-calculated checksum value. Also take
1150 * care of any trailing bytes and subtract out their partial sum.
1151 */
1152 if (ip6f->ip6f_nxt == IPPROTO_UDP &&
1153 uoff == (sizeof(*ip6) + sizeof(*ip6f)) &&
1154 (m->m_pkthdr.csum_flags &
1155 (CSUM_DATA_VALID | CSUM_PARTIAL | CSUM_PSEUDO_HDR)) ==
1156 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
1157 uint32_t start = m->m_pkthdr.csum_rx_start;
1158 uint32_t ip_len = (sizeof(*ip6) + ntohs(ip6->ip6_plen));
1159 int32_t trailer = (m_pktlen(m) - ip_len);
1160 uint32_t swbytes = (uint32_t)trailer;
1161
1162 csum = m->m_pkthdr.csum_rx_val;
1163
1164 ASSERT(trailer >= 0);
1165 if (start != uoff || trailer != 0) {
1166 uint16_t s = 0, d = 0;
1167
1168 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) {
1169 s = ip6->ip6_src.s6_addr16[1];
1170 ip6->ip6_src.s6_addr16[1] = 0;
1171 }
1172 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) {
1173 d = ip6->ip6_dst.s6_addr16[1];
1174 ip6->ip6_dst.s6_addr16[1] = 0;
1175 }
1176
1177 /* callee folds in sum */
1178 csum = m_adj_sum16(m, start, uoff,
1179 (ip_len - uoff), csum);
1180 if (uoff > start) {
1181 swbytes += (uoff - start);
1182 } else {
1183 swbytes += (start - uoff);
1184 }
1185
1186 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) {
1187 ip6->ip6_src.s6_addr16[1] = s;
1188 }
1189 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) {
1190 ip6->ip6_dst.s6_addr16[1] = d;
1191 }
1192 }
1193 csum_flags = m->m_pkthdr.csum_flags;
1194
1195 if (swbytes != 0) {
1196 udp_in6_cksum_stats(swbytes);
1197 }
1198 if (trailer != 0) {
1199 m_adj(m, -trailer);
1200 }
1201 } else {
1202 csum = 0;
1203 csum_flags = 0;
1204 }
1205
1206 /* Invalidate checksum */
1207 m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID;
1208
1209 /* strip off headers up to the fragment payload */
1210 m->m_data += frent->fr_ip6f_hlen;
1211 m->m_len -= frent->fr_ip6f_hlen;
1212
1213 /* Create a new reassembly queue for this packet */
1214 if (*frag == NULL) {
1215 *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
1216 if (*frag == NULL) {
1217 pf_flush_fragments();
1218 *frag = pool_get(&pf_frag_pl, PR_NOWAIT);
1219 if (*frag == NULL) {
1220 goto drop_fragment;
1221 }
1222 }
1223
1224 (*frag)->fr_flags = 0;
1225 (*frag)->fr_max = 0;
1226 (*frag)->fr_ip6_maxlen = pktlen;
1227 (*frag)->fr_af = AF_INET6;
1228 (*frag)->fr_srcx.v6addr = frent->fr_ip6->ip6_src;
1229 (*frag)->fr_dstx.v6addr = frent->fr_ip6->ip6_dst;
1230 (*frag)->fr_p = frent->fr_ip6f_opt.ip6f_nxt;
1231 (*frag)->fr_id6 = frent->fr_ip6f_opt.ip6f_ident;
1232 (*frag)->fr_timeout = pf_time_second();
1233 if (csum_flags != 0) {
1234 (*frag)->fr_csum_flags = csum_flags;
1235 (*frag)->fr_csum = csum;
1236 }
1237 LIST_INIT(&(*frag)->fr_queue);
1238
1239 RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag);
1240 TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next);
1241
1242 /* We do not have a previous fragment */
1243 frep = NULL;
1244 goto insert;
1245 }
1246
1247 /* Remember maximum fragment len for refragmentation */
1248 if (pktlen > (*frag)->fr_ip6_maxlen) {
1249 (*frag)->fr_ip6_maxlen = pktlen;
1250 }
1251 /*
1252 * If this fragment contains similar checksum offload info
1253 * as that of the existing ones, accumulate checksum. Otherwise,
1254 * invalidate checksum offload info for the entire datagram.
1255 */
1256 if (csum_flags != 0 && csum_flags == (*frag)->fr_csum_flags) {
1257 (*frag)->fr_csum += csum;
1258 } else if ((*frag)->fr_csum_flags != 0) {
1259 (*frag)->fr_csum_flags = 0;
1260 }
1261
1262 /*
1263 * Find a fragment after the current one:
1264 * - off contains the real shifted offset.
1265 */
1266 LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) {
1267 if (FR_IP6_OFF(frea) > off) {
1268 break;
1269 }
1270 frep = frea;
1271 }
1272
1273 VERIFY(frep != NULL || frea != NULL);
1274
1275 if (frep != NULL &&
1276 FR_IP6_OFF(frep) + FR_IP6_PLEN(frep) - frep->fr_ip6f_hlen > off) {
1277 u_int16_t precut;
1278
1279 precut = FR_IP6_OFF(frep) + FR_IP6_PLEN(frep) -
1280 frep->fr_ip6f_hlen - off;
1281 if (precut >= plen) {
1282 goto drop_fragment;
1283 }
1284 m_adj(frent->fr_m, precut);
1285 DPFPRINTF(("overlap -%d\n", precut));
1286 /* Enforce 8 byte boundaries */
1287 frent->fr_ip6f_opt.ip6f_offlg =
1288 htons(ntohs(frent->fr_ip6f_opt.ip6f_offlg) +
1289 (precut >> 3));
1290 off = FR_IP6_OFF(frent);
1291 plen -= precut;
1292 ip6->ip6_plen = htons(plen);
1293 }
1294
1295 for (; frea != NULL && plen + off > FR_IP6_OFF(frea); frea = next) {
1296 u_int16_t aftercut;
1297
1298 aftercut = plen + off - FR_IP6_OFF(frea);
1299 DPFPRINTF(("adjust overlap %d\n", aftercut));
1300 if (aftercut < FR_IP6_PLEN(frea) - frea->fr_ip6f_hlen) {
1301 frea->fr_ip6->ip6_plen = htons(FR_IP6_PLEN(frea) -
1302 aftercut);
1303 frea->fr_ip6f_opt.ip6f_offlg =
1304 htons(ntohs(frea->fr_ip6f_opt.ip6f_offlg) +
1305 (aftercut >> 3));
1306 m_adj(frea->fr_m, aftercut);
1307 break;
1308 }
1309
1310 /* This fragment is completely overlapped, lose it */
1311 next = LIST_NEXT(frea, fr_next);
1312 m_freem(frea->fr_m);
1313 LIST_REMOVE(frea, fr_next);
1314 pool_put(&pf_frent_pl, frea);
1315 pf_nfrents--;
1316 }
1317
1318 insert:
1319 /* Update maximum data size */
1320 if ((*frag)->fr_max < fr_max) {
1321 (*frag)->fr_max = fr_max;
1322 }
1323 /* This is the last segment */
1324 if (!mff) {
1325 (*frag)->fr_flags |= PFFRAG_SEENLAST;
1326 }
1327
1328 if (frep == NULL) {
1329 LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next);
1330 } else {
1331 LIST_INSERT_AFTER(frep, frent, fr_next);
1332 }
1333
1334 /* Check if we are completely reassembled */
1335 if (!((*frag)->fr_flags & PFFRAG_SEENLAST)) {
1336 return NULL;
1337 }
1338
1339 /* Check if we have all the data */
1340 off = 0;
1341 for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) {
1342 next = LIST_NEXT(frep, fr_next);
1343 off += FR_IP6_PLEN(frep) - (frent->fr_ip6f_hlen - sizeof *ip6);
1344 DPFPRINTF(("frep at %d, next %d, max %d\n",
1345 off, next == NULL ? -1 : FR_IP6_OFF(next),
1346 (*frag)->fr_max));
1347 if (off < (*frag)->fr_max &&
1348 (next == NULL || FR_IP6_OFF(next) != off)) {
1349 DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
1350 off, next == NULL ? -1 : FR_IP6_OFF(next),
1351 (*frag)->fr_max));
1352 return NULL;
1353 }
1354 }
1355 DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max));
1356 if (off < (*frag)->fr_max) {
1357 return NULL;
1358 }
1359
1360 /* We have all the data */
1361 frent = LIST_FIRST(&(*frag)->fr_queue);
1362 VERIFY(frent != NULL);
1363 if (frent->fr_ip6f_hlen + off > IP_MAXPACKET) {
1364 DPFPRINTF(("drop: too big: %d\n", off));
1365 pf_free_fragment(*frag);
1366 *frag = NULL;
1367 return NULL;
1368 }
1369
1370 ASSERT(*frag != NULL);
1371 ASSERT(frent != NULL);
1372 next = LIST_NEXT(frent, fr_next);
1373 if (next == NULL) {
1374 DPFPRINTF(("drop: atomic fragment\n"));
1375 pf_free_fragment(*frag);
1376 *frag = NULL;
1377 return NULL;
1378 }
1379
1380 /* retrieve the values to be filled in to reassembled tag */
1381 uint16_t hdrlen, unfragpartlen, extoff, maxlen;
1382 uint32_t id;
1383
1384 /* Get total extension header length from the first fragment */
1385 hdrlen = frent->fr_ip6f_hlen - sizeof(struct ip6_frag);
1386 /*
1387 * Get total extension header length of per-fragment headers from the
1388 * subsequent fragment.
1389 */
1390 unfragpartlen = next->fr_ip6f_hlen - sizeof(struct ip6_frag);
1391 extoff = frent->fr_ip6f_extoff;
1392 maxlen = (*frag)->fr_ip6_maxlen;
1393 id = (*frag)->fr_id6;
1394
1395 ip6 = frent->fr_ip6;
1396 ip6->ip6_nxt = (*frag)->fr_p;
1397 ip6->ip6_plen = htons(off);
1398 ip6->ip6_src = (*frag)->fr_srcx.v6addr;
1399 ip6->ip6_dst = (*frag)->fr_dstx.v6addr;
1400
1401 if ((*frag)->fr_csum_flags != 0) {
1402 csum = (*frag)->fr_csum;
1403
1404 ADDCARRY(csum);
1405
1406 m->m_pkthdr.csum_rx_val = csum;
1407 m->m_pkthdr.csum_rx_start = sizeof(struct ip6_hdr);
1408 m->m_pkthdr.csum_flags = (*frag)->fr_csum_flags;
1409 } else if ((m->m_pkthdr.rcvif != NULL &&
1410 m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) ||
1411 (m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
1412 /* loopback checksums are always OK */
1413 m->m_pkthdr.csum_data = 0xffff;
1414 m->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1415 }
1416
1417 /* Remove from fragment queue */
1418 pf_remove_fragment(*frag);
1419 *frag = NULL;
1420
1421 m = frent->fr_m;
1422 m->m_len += sizeof(struct ip6_hdr);
1423 m->m_data -= sizeof(struct ip6_hdr);
1424 memmove(m_mtod_current(m), ip6, sizeof(struct ip6_hdr));
1425
1426 next = LIST_NEXT(frent, fr_next);
1427 pool_put(&pf_frent_pl, frent);
1428 pf_nfrents--;
1429 for (frent = next; next != NULL; frent = next) {
1430 m2 = frent->fr_m;
1431
1432 m_cat(m, m2);
1433 next = LIST_NEXT(frent, fr_next);
1434 pool_put(&pf_frent_pl, frent);
1435 pf_nfrents--;
1436 }
1437
1438 /* XXX this should be done elsewhere */
1439 if (m->m_flags & M_PKTHDR) {
1440 int len = 0;
1441 for (m2 = m; m2; m2 = m2->m_next) {
1442 len += m2->m_len;
1443 }
1444 m->m_pkthdr.len = len;
1445 }
1446
1447 DPFPRINTF(("complete: 0x%llx ip6_plen %d m_pkthdr.len %d\n",
1448 (uint64_t)VM_KERNEL_ADDRHASH(m), ntohs(ip6->ip6_plen),
1449 m->m_pkthdr.len));
1450
1451 /* Add the reassembled tag */
1452 struct m_tag *mtag;
1453 struct pf_fragment_tag *ftag;
1454 mtag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_PF_REASS,
1455 sizeof(*ftag), M_NOWAIT, m);
1456 if (mtag == NULL) {
1457 /* XXX: add stats */
1458 m_drop(m, DROPTAP_FLAG_DIR_IN, DROP_REASON_PF_MEM_ALLOC, NULL, 0);
1459 return NULL;
1460 }
1461 ftag = (struct pf_fragment_tag *)mtag->m_tag_data;
1462 ftag->ft_hdrlen = hdrlen;
1463 ftag->ft_unfragpartlen = unfragpartlen;
1464 ftag->ft_extoff = extoff;
1465 ftag->ft_maxlen = maxlen;
1466 ftag->ft_id = id;
1467 m_tag_prepend(m, mtag);
1468
1469 struct pf_mtag *pftag = pf_get_mtag(m);
1470 ASSERT(pftag != NULL);
1471 pftag->pftag_flags |= PF_TAG_REASSEMBLED;
1472 return m;
1473
1474 drop_fragment:
1475 /* Oops - fail safe - drop packet */
1476 pool_put(&pf_frent_pl, frent);
1477 --pf_nfrents;
1478 m_drop(m, DROPTAP_FLAG_DIR_IN, DROP_REASON_PF_BAD_FRAGMENT, NULL, 0);
1479 return NULL;
1480 }
1481
1482 static __attribute__((noinline)) struct mbuf *
pf_frag6cache(struct mbuf ** m0,struct ip6_hdr * h,struct ip6_frag * fh,struct pf_fragment ** frag,int hlen,int mff,int drop,int * nomem)1483 pf_frag6cache(struct mbuf **m0, struct ip6_hdr *h, struct ip6_frag *fh,
1484 struct pf_fragment **frag, int hlen, int mff, int drop, int *nomem)
1485 {
1486 struct mbuf *__single m = *m0;
1487 u_int16_t plen, off, fr_max;
1488 struct pf_frcache *__single frp, *__single fra, *__single cur = NULL;
1489 int hosed = 0;
1490
1491 VERIFY(*frag == NULL || !BUFFER_FRAGMENTS(*frag));
1492 m = *m0;
1493 off = ntohs(fh->ip6f_offlg & IP6F_OFF_MASK);
1494 plen = ntohs(h->ip6_plen) - (hlen - sizeof *h);
1495
1496 /*
1497 * Apple Modification: [email protected]. The hlen, being passed
1498 * into this function Includes all the headers associated with
1499 * the packet, and may include routing headers, so to get to
1500 * the data payload as stored in the original IPv6 header we need
1501 * to subtract al those headers and the IP header.
1502 *
1503 * The 'max' local variable should also contain the offset from the start
1504 * of the reassembled packet to the octet just past the end of the octets
1505 * in the current fragment where:
1506 * - 'off' is the offset from the start of the reassembled packet to the
1507 * first octet in the fragment,
1508 * - 'plen' is the length of the "payload data length" Excluding all the
1509 * IPv6 headers of the fragment.
1510 * - 'hlen' is computed in pf_normalize_ip6() as the offset from the start
1511 * of the IPv6 packet to the beginning of the data.
1512 */
1513 fr_max = off + plen;
1514
1515 DPFPRINTF(("0x%llx plen %u off %u fr_max %u\n",
1516 (uint64_t)VM_KERNEL_ADDRHASH(m), plen, off, fr_max));
1517
1518 /* Create a new range queue for this packet */
1519 if (*frag == NULL) {
1520 *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
1521 if (*frag == NULL) {
1522 pf_flush_fragments();
1523 *frag = pool_get(&pf_cache_pl, PR_NOWAIT);
1524 if (*frag == NULL) {
1525 goto no_mem;
1526 }
1527 }
1528
1529 /* Get an entry for the queue */
1530 cur = pool_get(&pf_cent_pl, PR_NOWAIT);
1531 if (cur == NULL) {
1532 pool_put(&pf_cache_pl, *frag);
1533 *frag = NULL;
1534 goto no_mem;
1535 }
1536 pf_ncache++;
1537
1538 (*frag)->fr_flags = PFFRAG_NOBUFFER;
1539 (*frag)->fr_max = 0;
1540 (*frag)->fr_af = AF_INET6;
1541 (*frag)->fr_srcx.v6addr = h->ip6_src;
1542 (*frag)->fr_dstx.v6addr = h->ip6_dst;
1543 (*frag)->fr_p = fh->ip6f_nxt;
1544 (*frag)->fr_id6 = fh->ip6f_ident;
1545 (*frag)->fr_timeout = pf_time_second();
1546
1547 cur->fr_off = off;
1548 cur->fr_end = fr_max;
1549 LIST_INIT(&(*frag)->fr_cache);
1550 LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next);
1551
1552 RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag);
1553 TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next);
1554
1555 DPFPRINTF(("frag6cache[%d]: new %d-%d\n", ntohl(fh->ip6f_ident),
1556 off, fr_max));
1557
1558 goto pass;
1559 }
1560
1561 /*
1562 * Find a fragment after the current one:
1563 * - off contains the real shifted offset.
1564 */
1565 frp = NULL;
1566 LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) {
1567 if (fra->fr_off > off) {
1568 break;
1569 }
1570 frp = fra;
1571 }
1572
1573 VERIFY(frp != NULL || fra != NULL);
1574
1575 if (frp != NULL) {
1576 int precut;
1577
1578 precut = frp->fr_end - off;
1579 if (precut >= plen) {
1580 /* Fragment is entirely a duplicate */
1581 DPFPRINTF(("frag6cache[%u]: dead (%d-%d) %d-%d\n",
1582 ntohl(fh->ip6f_ident), frp->fr_off, frp->fr_end,
1583 off, fr_max));
1584 goto drop_fragment;
1585 }
1586 if (precut == 0) {
1587 /* They are adjacent. Fixup cache entry */
1588 DPFPRINTF(("frag6cache[%u]: adjacent (%d-%d) %d-%d\n",
1589 ntohl(fh->ip6f_ident), frp->fr_off, frp->fr_end,
1590 off, fr_max));
1591 frp->fr_end = fr_max;
1592 } else if (precut > 0) {
1593 /* The first part of this payload overlaps with a
1594 * fragment that has already been passed.
1595 * Need to trim off the first part of the payload.
1596 * But to do so easily, we need to create another
1597 * mbuf to throw the original header into.
1598 */
1599
1600 DPFPRINTF(("frag6cache[%u]: chop %d (%d-%d) %d-%d\n",
1601 ntohl(fh->ip6f_ident), precut, frp->fr_off,
1602 frp->fr_end, off, fr_max));
1603
1604 off += precut;
1605 fr_max -= precut;
1606 /* Update the previous frag to encompass this one */
1607 frp->fr_end = fr_max;
1608
1609 if (!drop) {
1610 /* XXX Optimization opportunity
1611 * This is a very heavy way to trim the payload.
1612 * we could do it much faster by diddling mbuf
1613 * internals but that would be even less legible
1614 * than this mbuf magic. For my next trick,
1615 * I'll pull a rabbit out of my laptop.
1616 */
1617 *m0 = m_copym(m, 0, hlen, M_NOWAIT);
1618 if (*m0 == NULL) {
1619 goto no_mem;
1620 }
1621 VERIFY((*m0)->m_next == NULL);
1622 m_adj(m, precut + hlen);
1623 m_cat(*m0, m);
1624 m = *m0;
1625 if (m->m_flags & M_PKTHDR) {
1626 int pktlen = 0;
1627 struct mbuf *t;
1628 for (t = m; t; t = t->m_next) {
1629 pktlen += t->m_len;
1630 }
1631 m->m_pkthdr.len = pktlen;
1632 }
1633
1634 h = mtod(m, struct ip6_hdr *);
1635
1636 VERIFY((int)m->m_len ==
1637 ntohs(h->ip6_plen) - precut);
1638 fh->ip6f_offlg &= ~IP6F_OFF_MASK;
1639 fh->ip6f_offlg |=
1640 htons(ntohs(fh->ip6f_offlg & IP6F_OFF_MASK)
1641 + (precut >> 3));
1642 h->ip6_plen = htons(ntohs(h->ip6_plen) -
1643 precut);
1644 } else {
1645 hosed++;
1646 }
1647 } else {
1648 /* There is a gap between fragments */
1649
1650 DPFPRINTF(("frag6cache[%u]: gap %d (%d-%d) %d-%d\n",
1651 ntohl(fh->ip6f_ident), -precut, frp->fr_off,
1652 frp->fr_end, off, fr_max));
1653
1654 cur = pool_get(&pf_cent_pl, PR_NOWAIT);
1655 if (cur == NULL) {
1656 goto no_mem;
1657 }
1658 pf_ncache++;
1659
1660 cur->fr_off = off;
1661 cur->fr_end = fr_max;
1662 LIST_INSERT_AFTER(frp, cur, fr_next);
1663 }
1664 }
1665
1666 if (fra != NULL) {
1667 int aftercut;
1668 int merge = 0;
1669
1670 aftercut = fr_max - fra->fr_off;
1671 if (aftercut == 0) {
1672 /* Adjacent fragments */
1673 DPFPRINTF(("frag6cache[%u]: adjacent %d-%d (%d-%d)\n",
1674 ntohl(fh->ip6f_ident), off, fr_max, fra->fr_off,
1675 fra->fr_end));
1676 fra->fr_off = off;
1677 merge = 1;
1678 } else if (aftercut > 0) {
1679 /* Need to chop off the tail of this fragment */
1680 DPFPRINTF(("frag6cache[%u]: chop %d %d-%d (%d-%d)\n",
1681 ntohl(fh->ip6f_ident), aftercut, off, fr_max,
1682 fra->fr_off, fra->fr_end));
1683 fra->fr_off = off;
1684 fr_max -= aftercut;
1685
1686 merge = 1;
1687
1688 if (!drop) {
1689 m_adj(m, -aftercut);
1690 if (m->m_flags & M_PKTHDR) {
1691 int pktlen = 0;
1692 struct mbuf *t;
1693 for (t = m; t; t = t->m_next) {
1694 pktlen += t->m_len;
1695 }
1696 m->m_pkthdr.len = pktlen;
1697 }
1698 h = mtod(m, struct ip6_hdr *);
1699 VERIFY((int)m->m_len ==
1700 ntohs(h->ip6_plen) - aftercut);
1701 h->ip6_plen =
1702 htons(ntohs(h->ip6_plen) - aftercut);
1703 } else {
1704 hosed++;
1705 }
1706 } else if (frp == NULL) {
1707 /* There is a gap between fragments */
1708 DPFPRINTF(("frag6cache[%u]: gap %d %d-%d (%d-%d)\n",
1709 ntohl(fh->ip6f_ident), -aftercut, off, fr_max,
1710 fra->fr_off, fra->fr_end));
1711
1712 cur = pool_get(&pf_cent_pl, PR_NOWAIT);
1713 if (cur == NULL) {
1714 goto no_mem;
1715 }
1716 pf_ncache++;
1717
1718 cur->fr_off = off;
1719 cur->fr_end = fr_max;
1720 LIST_INSERT_BEFORE(fra, cur, fr_next);
1721 }
1722
1723 /* Need to glue together two separate fragment descriptors */
1724 if (merge) {
1725 if (cur && fra->fr_off <= cur->fr_end) {
1726 /* Need to merge in a previous 'cur' */
1727 DPFPRINTF(("frag6cache[%u]: adjacent(merge "
1728 "%d-%d) %d-%d (%d-%d)\n",
1729 ntohl(fh->ip6f_ident), cur->fr_off,
1730 cur->fr_end, off, fr_max, fra->fr_off,
1731 fra->fr_end));
1732 fra->fr_off = cur->fr_off;
1733 LIST_REMOVE(cur, fr_next);
1734 pool_put(&pf_cent_pl, cur);
1735 pf_ncache--;
1736 cur = NULL;
1737 } else if (frp && fra->fr_off <= frp->fr_end) {
1738 /* Need to merge in a modified 'frp' */
1739 VERIFY(cur == NULL);
1740 DPFPRINTF(("frag6cache[%u]: adjacent(merge "
1741 "%d-%d) %d-%d (%d-%d)\n",
1742 ntohl(fh->ip6f_ident), frp->fr_off,
1743 frp->fr_end, off, fr_max, fra->fr_off,
1744 fra->fr_end));
1745 fra->fr_off = frp->fr_off;
1746 LIST_REMOVE(frp, fr_next);
1747 pool_put(&pf_cent_pl, frp);
1748 pf_ncache--;
1749 frp = NULL;
1750 }
1751 }
1752 }
1753
1754 if (hosed) {
1755 /*
1756 * We must keep tracking the overall fragment even when
1757 * we're going to drop it anyway so that we know when to
1758 * free the overall descriptor. Thus we drop the frag late.
1759 */
1760 goto drop_fragment;
1761 }
1762
1763 pass:
1764 /* Update maximum data size */
1765 if ((*frag)->fr_max < fr_max) {
1766 (*frag)->fr_max = fr_max;
1767 }
1768
1769 /* This is the last segment */
1770 if (!mff) {
1771 (*frag)->fr_flags |= PFFRAG_SEENLAST;
1772 }
1773
1774 /* Check if we are completely reassembled */
1775 if (((*frag)->fr_flags & PFFRAG_SEENLAST) &&
1776 LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 &&
1777 LIST_FIRST(&(*frag)->fr_cache)->fr_end == (*frag)->fr_max) {
1778 /* Remove from fragment queue */
1779 DPFPRINTF(("frag6cache[%u]: done 0-%d\n",
1780 ntohl(fh->ip6f_ident), (*frag)->fr_max));
1781 pf_free_fragment(*frag);
1782 *frag = NULL;
1783 }
1784
1785 return m;
1786
1787 no_mem:
1788 *nomem = 1;
1789
1790 /* Still need to pay attention to !IP_MF */
1791 if (!mff && *frag != NULL) {
1792 (*frag)->fr_flags |= PFFRAG_SEENLAST;
1793 }
1794
1795 m_drop(m, DROPTAP_FLAG_DIR_IN, DROP_REASON_PF_MEM_ALLOC, NULL, 0);
1796 return NULL;
1797
1798 drop_fragment:
1799
1800 /* Still need to pay attention to !IP_MF */
1801 if (!mff && *frag != NULL) {
1802 (*frag)->fr_flags |= PFFRAG_SEENLAST;
1803 }
1804
1805 if (drop) {
1806 /* This fragment has been deemed bad. Don't reass */
1807 if (((*frag)->fr_flags & PFFRAG_DROP) == 0) {
1808 DPFPRINTF(("frag6cache[%u]: dropping overall fragment\n",
1809 ntohl(fh->ip6f_ident)));
1810 }
1811 (*frag)->fr_flags |= PFFRAG_DROP;
1812 }
1813
1814 m_drop(m, DROPTAP_FLAG_DIR_IN, DROP_REASON_PF_BAD_FRAGMENT, NULL, 0);
1815 return NULL;
1816 }
1817
1818 int
pf_refragment6(struct ifnet * ifp,pbuf_t ** pbufp,struct pf_fragment_tag * ftag)1819 pf_refragment6(struct ifnet *ifp, pbuf_t **pbufp, struct pf_fragment_tag *ftag)
1820 {
1821 struct mbuf *__single m;
1822 uint32_t frag_id;
1823 uint16_t hdrlen, extoff, maxlen, unfragpartlen;
1824 uint8_t proto;
1825 int error, action;
1826 uint8_t *__single lexthdrsp;
1827 struct route_in6 ip6route;
1828 struct route_in6 *__single ro;
1829 struct sockaddr_in6 *__single dst;
1830 struct ip6_hdr *__single hdr;
1831 struct m_tag *__single tag;
1832
1833 if (pbufp == NULL || !pbuf_is_valid(*pbufp) || ftag == NULL) {
1834 panic("pf_route6: invalid parameters");
1835 /* NOT REACHED */
1836 }
1837 m = pbuf_to_mbuf(*pbufp, FALSE);
1838 hdr = mtod(m, struct ip6_hdr *);
1839 hdrlen = ftag->ft_hdrlen - sizeof(struct ip6_hdr);
1840 extoff = ftag->ft_extoff;
1841 maxlen = ftag->ft_maxlen;
1842 frag_id = ftag->ft_id;
1843 unfragpartlen = ftag->ft_unfragpartlen;
1844 tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_PF_REASS);
1845 m_tag_delete(m, tag);
1846 ftag = NULL;
1847 tag = NULL;
1848 pf_find_mtag(m)->pftag_flags &= ~PF_TAG_REASSEMBLED;
1849 ro = &ip6route;
1850 bzero((struct route_in6 *__bidi_indexable)ro, sizeof(*ro));
1851 dst = (struct sockaddr_in6 *)&ro->ro_dst;
1852 dst->sin6_family = AF_INET6;
1853 dst->sin6_len = sizeof(*dst);
1854 dst->sin6_addr = hdr->ip6_dst;
1855
1856 if (extoff) {
1857 int off;
1858 struct mbuf *mexthdr;
1859
1860 /* Use protocol from next field of last extension header */
1861 mexthdr = m_getptr(m, extoff +
1862 offsetof(struct ip6_ext, ip6e_nxt), &off);
1863 ASSERT(mexthdr != NULL);
1864 lexthdrsp = (mtod(mexthdr, uint8_t *) + off);
1865 proto = *lexthdrsp;
1866 if (proto == IPPROTO_DSTOPTS) {
1867 struct ip6_ext ext;
1868 if (!pf_pull_hdr(*pbufp, off, &ext, sizeof(ext), sizeof(ext), NULL,
1869 NULL, AF_INET6)) {
1870 DPFPRINTF(("pkt too short"));
1871 action = PF_DROP;
1872 goto done;
1873 }
1874 proto = ext.ip6e_nxt;
1875 }
1876 } else {
1877 lexthdrsp = NULL;
1878 proto = hdr->ip6_nxt;
1879 }
1880
1881 /*
1882 * The MTU must be a multiple of 8 bytes, or we risk doing the
1883 * fragmentation wrong.
1884 */
1885 maxlen = maxlen & ~7;
1886
1887 error = ip6_do_fragmentation(&m, hdrlen, NULL, unfragpartlen,
1888 hdr, lexthdrsp, maxlen, proto, frag_id);
1889
1890 if (error == 0) {
1891 /*
1892 * PF_TAG_REFRAGMENTED flag set to indicate ip6_forward()
1893 * and pf_route6() that the mbuf contains a chain of fragments.
1894 */
1895 pf_find_mtag(m)->pftag_flags |= PF_TAG_REFRAGMENTED;
1896 action = PF_PASS;
1897 pbuf_init_mbuf(*pbufp, m, ifp);
1898 } else {
1899 DPFPRINTF(("refragment error %d", error));
1900 action = PF_DROP;
1901 goto done;
1902 }
1903 done:
1904 return action;
1905 }
1906
1907 int
pf_normalize_ip(pbuf_t * pbuf,int dir,struct pfi_kif * kif,u_short * reason,struct pf_pdesc * pd)1908 pf_normalize_ip(pbuf_t *pbuf, int dir, struct pfi_kif *kif, u_short *reason,
1909 struct pf_pdesc *pd)
1910 {
1911 struct mbuf *__single m;
1912 struct pf_rule *__single r;
1913 struct pf_frent *__single frent;
1914 struct pf_fragment *__single frag = NULL;
1915 struct ip *__single h = pbuf->pb_data;
1916 int mff = (ntohs(h->ip_off) & IP_MF);
1917 int hlen = h->ip_hl << 2;
1918 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
1919 u_int16_t fr_max;
1920 int ip_len;
1921 int ip_off;
1922 int asd = 0;
1923 struct pf_ruleset *__single ruleset = NULL;
1924 struct ifnet *__single ifp = pbuf->pb_ifp;
1925 uint64_t ipid_salt = (uint64_t)pbuf_get_packet_buffer_address(pbuf);
1926
1927 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
1928 while (r != NULL) {
1929 r->evaluations++;
1930 if (pfi_kif_match(r->kif, kif) == r->ifnot) {
1931 r = r->skip[PF_SKIP_IFP].ptr;
1932 } else if (r->direction && r->direction != dir) {
1933 r = r->skip[PF_SKIP_DIR].ptr;
1934 } else if (r->af && r->af != AF_INET) {
1935 r = r->skip[PF_SKIP_AF].ptr;
1936 } else if (r->proto && r->proto != h->ip_p) {
1937 r = r->skip[PF_SKIP_PROTO].ptr;
1938 } else if (PF_MISMATCHAW(&r->src.addr,
1939 (struct pf_addr *)&h->ip_src.s_addr, AF_INET,
1940 r->src.neg, kif)) {
1941 r = r->skip[PF_SKIP_SRC_ADDR].ptr;
1942 } else if (PF_MISMATCHAW(&r->dst.addr,
1943 (struct pf_addr *)&h->ip_dst.s_addr, AF_INET,
1944 r->dst.neg, NULL)) {
1945 r = r->skip[PF_SKIP_DST_ADDR].ptr;
1946 } else {
1947 if (r->anchor == NULL) {
1948 break;
1949 } else {
1950 pf_step_into_anchor(&asd, &ruleset,
1951 PF_RULESET_SCRUB, &r, NULL, NULL);
1952 }
1953 }
1954 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
1955 PF_RULESET_SCRUB, &r, NULL, NULL)) {
1956 break;
1957 }
1958 }
1959
1960 if (r == NULL || r->action == PF_NOSCRUB) {
1961 return PF_PASS;
1962 } else {
1963 r->packets[dir == PF_OUT]++;
1964 r->bytes[dir == PF_OUT] += pd->tot_len;
1965 }
1966
1967 /* Check for illegal packets */
1968 if (hlen < (int)sizeof(struct ip)) {
1969 goto drop;
1970 }
1971
1972 if (hlen > ntohs(h->ip_len)) {
1973 goto drop;
1974 }
1975
1976 /* Clear IP_DF if the rule uses the no-df option */
1977 if (r->rule_flag & PFRULE_NODF && h->ip_off & htons(IP_DF)) {
1978 u_int16_t ipoff = h->ip_off;
1979
1980 h->ip_off &= htons(~IP_DF);
1981 h->ip_sum = pf_cksum_fixup(h->ip_sum, ipoff, h->ip_off, 0);
1982 }
1983
1984 /* We will need other tests here */
1985 if (!fragoff && !mff) {
1986 goto no_fragment;
1987 }
1988
1989 /*
1990 * We're dealing with a fragment now. Don't allow fragments
1991 * with IP_DF to enter the cache. If the flag was cleared by
1992 * no-df above, fine. Otherwise drop it.
1993 */
1994 if (h->ip_off & htons(IP_DF)) {
1995 DPFPRINTF(("IP_DF\n"));
1996 goto bad;
1997 }
1998
1999 ip_len = ntohs(h->ip_len) - hlen;
2000 ip_off = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
2001
2002 /* All fragments are 8 byte aligned */
2003 if (mff && (ip_len & 0x7)) {
2004 DPFPRINTF(("mff and %d\n", ip_len));
2005 goto bad;
2006 }
2007
2008 /* Respect maximum length */
2009 if (fragoff + ip_len > IP_MAXPACKET) {
2010 DPFPRINTF(("max packet %d\n", fragoff + ip_len));
2011 goto bad;
2012 }
2013 fr_max = fragoff + ip_len;
2014
2015 if ((r->rule_flag & (PFRULE_FRAGCROP | PFRULE_FRAGDROP)) == 0) {
2016 /* Fully buffer all of the fragments */
2017
2018 frag = pf_find_fragment_by_ipv4_header(h, &pf_frag_tree);
2019 /* Check if we saw the last fragment already */
2020 if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
2021 fr_max > frag->fr_max) {
2022 goto bad;
2023 }
2024
2025 if ((m = pbuf_to_mbuf(pbuf, TRUE)) == NULL) {
2026 REASON_SET(reason, PFRES_MEMORY);
2027 return PF_DROP;
2028 }
2029
2030 VERIFY(!pbuf_is_valid(pbuf));
2031
2032 /* Restore iph pointer after pbuf_to_mbuf() */
2033 h = mtod(m, struct ip *);
2034
2035 /* Get an entry for the fragment queue */
2036 frent = pool_get(&pf_frent_pl, PR_NOWAIT);
2037 if (frent == NULL) {
2038 REASON_SET(reason, PFRES_MEMORY);
2039 m_drop(m, DROPTAP_FLAG_DIR_IN, DROP_REASON_PF_MEM_ALLOC, NULL, 0);
2040 return PF_DROP;
2041 }
2042 pf_nfrents++;
2043 frent->fr_ip = h;
2044 frent->fr_m = m;
2045
2046 /* Might return a completely reassembled mbuf, or NULL */
2047 DPFPRINTF(("reass IPv4 frag %d @ %d-%d\n", ntohs(h->ip_id),
2048 fragoff, fr_max));
2049 m = pf_reassemble(m, &frag, frent, mff);
2050
2051 if (m == NULL) {
2052 return PF_DROP;
2053 }
2054
2055 VERIFY(m->m_flags & M_PKTHDR);
2056 pbuf_init_mbuf(pbuf, m, ifp);
2057
2058 /* use mtag from concatenated mbuf chain */
2059 pd->pf_mtag = pf_find_mtag_pbuf(pbuf);
2060 #if 0
2061 // SCW: This check is superfluous
2062 #if DIAGNOSTIC
2063 if (pd->pf_mtag == NULL) {
2064 printf("%s: pf_find_mtag returned NULL(1)\n", __func__);
2065 if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) {
2066 m_freem(m);
2067 m = NULL;
2068 goto no_mem;
2069 }
2070 }
2071 #endif
2072 #endif
2073
2074 h = mtod(m, struct ip *);
2075
2076 if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) {
2077 goto drop;
2078 }
2079 } else {
2080 /* non-buffering fragment cache (drops or masks overlaps) */
2081 int nomem = 0;
2082
2083 if (dir == PF_OUT && (pd->pf_mtag->pftag_flags & PF_TAG_FRAGCACHE)) {
2084 /*
2085 * Already passed the fragment cache in the
2086 * input direction. If we continued, it would
2087 * appear to be a dup and would be dropped.
2088 */
2089 goto fragment_pass;
2090 }
2091
2092 frag = pf_find_fragment_by_ipv4_header(h, &pf_cache_tree);
2093
2094 /* Check if we saw the last fragment already */
2095 if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
2096 fr_max > frag->fr_max) {
2097 if (r->rule_flag & PFRULE_FRAGDROP) {
2098 frag->fr_flags |= PFFRAG_DROP;
2099 }
2100 goto bad;
2101 }
2102
2103 if ((m = pbuf_to_mbuf(pbuf, TRUE)) == NULL) {
2104 REASON_SET(reason, PFRES_MEMORY);
2105 goto bad;
2106 }
2107
2108 VERIFY(!pbuf_is_valid(pbuf));
2109
2110 /* Restore iph pointer after pbuf_to_mbuf() */
2111 h = mtod(m, struct ip *);
2112
2113 m = pf_fragcache(&m, h, &frag, mff,
2114 (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem);
2115 if (m == NULL) {
2116 // Note: pf_fragcache() has already m_freem'd the mbuf
2117 if (nomem) {
2118 goto no_mem;
2119 }
2120 goto drop;
2121 }
2122
2123 VERIFY(m->m_flags & M_PKTHDR);
2124 pbuf_init_mbuf(pbuf, m, ifp);
2125
2126 /* use mtag from copied and trimmed mbuf chain */
2127 pd->pf_mtag = pf_find_mtag_pbuf(pbuf);
2128 #if 0
2129 // SCW: This check is superfluous
2130 #if DIAGNOSTIC
2131 if (pd->pf_mtag == NULL) {
2132 printf("%s: pf_find_mtag returned NULL(2)\n", __func__);
2133 if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) {
2134 m_freem(m);
2135 m = NULL;
2136 goto no_mem;
2137 }
2138 }
2139 #endif
2140 #endif
2141 if (dir == PF_IN) {
2142 pd->pf_mtag->pftag_flags |= PF_TAG_FRAGCACHE;
2143 }
2144
2145 if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) {
2146 goto drop;
2147 }
2148
2149 goto fragment_pass;
2150 }
2151
2152 no_fragment:
2153 /* At this point, only IP_DF is allowed in ip_off */
2154 if (h->ip_off & ~htons(IP_DF)) {
2155 u_int16_t ipoff = h->ip_off;
2156
2157 h->ip_off &= htons(IP_DF);
2158 h->ip_sum = pf_cksum_fixup(h->ip_sum, ipoff, h->ip_off, 0);
2159 }
2160
2161 /* Enforce a minimum ttl, may cause endless packet loops */
2162 if (r->min_ttl && h->ip_ttl < r->min_ttl) {
2163 u_int16_t ip_ttl = h->ip_ttl;
2164
2165 h->ip_ttl = r->min_ttl;
2166 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
2167 }
2168 if (r->rule_flag & PFRULE_RANDOMID) {
2169 u_int16_t oip_id = h->ip_id;
2170
2171 if (rfc6864 && IP_OFF_IS_ATOMIC(ntohs(h->ip_off))) {
2172 h->ip_id = 0;
2173 } else {
2174 h->ip_id = ip_randomid(ipid_salt);
2175 }
2176 h->ip_sum = pf_cksum_fixup(h->ip_sum, oip_id, h->ip_id, 0);
2177 }
2178 if ((r->rule_flag & (PFRULE_FRAGCROP | PFRULE_FRAGDROP)) == 0) {
2179 pd->flags |= PFDESC_IP_REAS;
2180 }
2181
2182 return PF_PASS;
2183
2184 fragment_pass:
2185 /* Enforce a minimum ttl, may cause endless packet loops */
2186 if (r->min_ttl && h->ip_ttl < r->min_ttl) {
2187 u_int16_t ip_ttl = h->ip_ttl;
2188
2189 h->ip_ttl = r->min_ttl;
2190 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
2191 }
2192 if ((r->rule_flag & (PFRULE_FRAGCROP | PFRULE_FRAGDROP)) == 0) {
2193 pd->flags |= PFDESC_IP_REAS;
2194 }
2195 return PF_PASS;
2196
2197 no_mem:
2198 REASON_SET(reason, PFRES_MEMORY);
2199 if (r != NULL && r->log && pbuf_is_valid(pbuf)) {
2200 PFLOG_PACKET(kif, h, pbuf, AF_INET, dir, *reason, r,
2201 NULL, NULL, pd);
2202 }
2203 return PF_DROP;
2204
2205 drop:
2206 REASON_SET(reason, PFRES_NORM);
2207 if (r != NULL && r->log && pbuf_is_valid(pbuf)) {
2208 PFLOG_PACKET(kif, h, pbuf, AF_INET, dir, *reason, r,
2209 NULL, NULL, pd);
2210 }
2211 return PF_DROP;
2212
2213 bad:
2214 DPFPRINTF(("dropping bad IPv4 fragment\n"));
2215
2216 /* Free associated fragments */
2217 if (frag != NULL) {
2218 pf_free_fragment(frag);
2219 }
2220
2221 REASON_SET(reason, PFRES_FRAG);
2222 if (r != NULL && r->log && pbuf_is_valid(pbuf)) {
2223 PFLOG_PACKET(kif, h, pbuf, AF_INET, dir, *reason, r, NULL, NULL, pd);
2224 }
2225
2226 return PF_DROP;
2227 }
2228
2229 static __attribute__((noinline)) struct pf_fragment *
pf_find_fragment_by_ipv6_header(struct ip6_hdr * ip6,struct ip6_frag * fh,struct pf_frag_tree * tree)2230 pf_find_fragment_by_ipv6_header(struct ip6_hdr *ip6, struct ip6_frag *fh,
2231 struct pf_frag_tree *tree)
2232 {
2233 struct pf_fragment key;
2234 pf_ip6hdr2key(&key, ip6, fh);
2235 return pf_find_fragment_by_key(&key, tree);
2236 }
2237
2238 int
pf_normalize_ip6(pbuf_t * pbuf,int dir,struct pfi_kif * kif,u_short * reason,struct pf_pdesc * pd)2239 pf_normalize_ip6(pbuf_t *pbuf, int dir, struct pfi_kif *kif,
2240 u_short *reason, struct pf_pdesc *pd)
2241 {
2242 struct mbuf *__single m = NULL;
2243 struct pf_rule *__single r;
2244 struct ip6_hdr *__single h = pbuf->pb_data;
2245 int extoff;
2246 int off;
2247 struct ip6_ext ext;
2248 struct ip6_opt opt;
2249 struct ip6_opt_jumbo jumbo;
2250 int optend;
2251 int ooff;
2252 struct ip6_frag frag;
2253 u_int32_t jumbolen = 0, plen;
2254 u_int16_t fragoff = 0;
2255 u_int8_t proto;
2256 int terminal;
2257 struct pf_frent *__single frent;
2258 struct pf_fragment *__single pff = NULL;
2259 int mff = 0, rh_cnt = 0;
2260 u_int16_t fr_max;
2261 int asd = 0;
2262 struct pf_ruleset *__single ruleset = NULL;
2263 struct ifnet *__single ifp = pbuf->pb_ifp;
2264
2265 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
2266 while (r != NULL) {
2267 r->evaluations++;
2268 if (pfi_kif_match(r->kif, kif) == r->ifnot) {
2269 r = r->skip[PF_SKIP_IFP].ptr;
2270 } else if (r->direction && r->direction != dir) {
2271 r = r->skip[PF_SKIP_DIR].ptr;
2272 } else if (r->af && r->af != AF_INET6) {
2273 r = r->skip[PF_SKIP_AF].ptr;
2274 }
2275 #if 0 /* header chain! */
2276 else if (r->proto && r->proto != h->ip6_nxt) {
2277 r = r->skip[PF_SKIP_PROTO].ptr;
2278 }
2279 #endif
2280 else if (PF_MISMATCHAW(&r->src.addr,
2281 (struct pf_addr *)(void *)&h->ip6_src, AF_INET6,
2282 r->src.neg, kif)) {
2283 r = r->skip[PF_SKIP_SRC_ADDR].ptr;
2284 } else if (PF_MISMATCHAW(&r->dst.addr,
2285 (struct pf_addr *)(void *)&h->ip6_dst, AF_INET6,
2286 r->dst.neg, NULL)) {
2287 r = r->skip[PF_SKIP_DST_ADDR].ptr;
2288 } else {
2289 if (r->anchor == NULL) {
2290 break;
2291 } else {
2292 pf_step_into_anchor(&asd, &ruleset,
2293 PF_RULESET_SCRUB, &r, NULL, NULL);
2294 }
2295 }
2296 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
2297 PF_RULESET_SCRUB, &r, NULL, NULL)) {
2298 break;
2299 }
2300 }
2301
2302 if (r == NULL || r->action == PF_NOSCRUB) {
2303 return PF_PASS;
2304 } else {
2305 r->packets[dir == PF_OUT]++;
2306 r->bytes[dir == PF_OUT] += pd->tot_len;
2307 }
2308
2309 /* Check for illegal packets */
2310 if ((uint32_t)(sizeof(struct ip6_hdr) + IPV6_MAXPACKET) <
2311 pbuf->pb_packet_len) {
2312 goto drop;
2313 }
2314
2315 extoff = 0;
2316 off = sizeof(struct ip6_hdr);
2317 proto = h->ip6_nxt;
2318 terminal = 0;
2319 do {
2320 pd->proto = proto;
2321 if (proto == IPPROTO_FRAGMENT) {
2322 goto fragment;
2323 }
2324 if (!pf_pull_hdr(pbuf, off, &ext, sizeof(ext), sizeof(ext), NULL,
2325 NULL, AF_INET6)) {
2326 goto shortpkt;
2327 }
2328 switch (proto) {
2329 case IPPROTO_AH:
2330 case IPPROTO_ROUTING:
2331 case IPPROTO_DSTOPTS:
2332 extoff = off;
2333 /*
2334 * <[email protected]>
2335 * Multiple routing headers not allowed.
2336 * Routing header type zero considered harmful.
2337 */
2338 if (proto == IPPROTO_ROUTING) {
2339 struct ip6_rthdr rh = {0};
2340 if (!pf_pull_hdr(pbuf, off, &rh, sizeof(rh), sizeof(rh), NULL, NULL, AF_INET6)) {
2341 goto shortpkt;
2342 }
2343 if (rh_cnt++) {
2344 goto drop;
2345 }
2346 if (rh.ip6r_type == IPV6_RTHDR_TYPE_0) {
2347 goto drop;
2348 }
2349 } else if (proto == IPPROTO_AH) {
2350 off += (ext.ip6e_len + 2) * 4;
2351 } else {
2352 off += (ext.ip6e_len + 1) * 8;
2353 }
2354 proto = ext.ip6e_nxt;
2355 break;
2356 case IPPROTO_HOPOPTS:
2357 extoff = off;
2358 optend = off + (ext.ip6e_len + 1) * 8;
2359 ooff = off + sizeof(ext);
2360 do {
2361 if (!pf_pull_hdr(pbuf, ooff, &opt.ip6o_type, sizeof(opt.ip6o_type),
2362 sizeof(opt.ip6o_type), NULL, NULL,
2363 AF_INET6)) {
2364 goto shortpkt;
2365 }
2366 if (opt.ip6o_type == IP6OPT_PAD1) {
2367 ooff++;
2368 continue;
2369 }
2370 if (!pf_pull_hdr(pbuf, ooff, &opt, sizeof(opt), sizeof(opt),
2371 NULL, NULL, AF_INET6)) {
2372 goto shortpkt;
2373 }
2374 if ((ooff + (int) sizeof(opt) + opt.ip6o_len) >
2375 optend) {
2376 goto drop;
2377 }
2378 switch (opt.ip6o_type) {
2379 case IP6OPT_JUMBO:
2380 if (h->ip6_plen != 0) {
2381 goto drop;
2382 }
2383 if (!pf_pull_hdr(pbuf, ooff, &jumbo,
2384 sizeof(jumbo), sizeof(jumbo), NULL, NULL,
2385 AF_INET6)) {
2386 goto shortpkt;
2387 }
2388 memcpy(&jumbolen, jumbo.ip6oj_jumbo_len,
2389 sizeof(jumbolen));
2390 jumbolen = ntohl(jumbolen);
2391 if (jumbolen <= IPV6_MAXPACKET) {
2392 goto drop;
2393 }
2394 if ((sizeof(struct ip6_hdr) +
2395 jumbolen) != pbuf->pb_packet_len) {
2396 goto drop;
2397 }
2398 break;
2399 default:
2400 break;
2401 }
2402 ooff += sizeof(opt) + opt.ip6o_len;
2403 } while (ooff < optend);
2404
2405 off = optend;
2406 proto = ext.ip6e_nxt;
2407 break;
2408 default:
2409 terminal = 1;
2410 break;
2411 }
2412 } while (!terminal);
2413
2414 /* jumbo payload option must be present, or plen > 0 */
2415 if (ntohs(h->ip6_plen) == 0) {
2416 plen = jumbolen;
2417 } else {
2418 plen = ntohs(h->ip6_plen);
2419 }
2420 if (plen == 0) {
2421 goto drop;
2422 }
2423 if ((uint32_t)(sizeof(struct ip6_hdr) + plen) > pbuf->pb_packet_len) {
2424 goto shortpkt;
2425 }
2426
2427 /* Enforce a minimum ttl, may cause endless packet loops */
2428 if (r->min_ttl && h->ip6_hlim < r->min_ttl) {
2429 h->ip6_hlim = r->min_ttl;
2430 }
2431
2432 return PF_PASS;
2433
2434 fragment:
2435 plen = ntohs(h->ip6_plen);
2436 /* Jumbo payload packets cannot be fragmented */
2437 if (plen == 0 || jumbolen) {
2438 goto drop;
2439 }
2440
2441 if (!pf_pull_hdr(pbuf, off, &frag, sizeof(frag), sizeof(frag), NULL, NULL, AF_INET6)) {
2442 goto shortpkt;
2443 }
2444 fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK);
2445 pd->proto = frag.ip6f_nxt;
2446 mff = ntohs(frag.ip6f_offlg & IP6F_MORE_FRAG);
2447 off += sizeof(frag);
2448 if (fragoff + (plen - off) > IPV6_MAXPACKET) {
2449 goto badfrag;
2450 }
2451
2452 fr_max = fragoff + plen - (off - sizeof(struct ip6_hdr));
2453 // XXX SCW: mbuf-specific
2454 // DPFPRINTF(("0x%llx IPv6 frag plen %u mff %d off %u fragoff %u "
2455 // "fr_max %u\n", (uint64_t)VM_KERNEL_ADDRPERM(m), plen, mff, off,
2456 // fragoff, fr_max));
2457
2458 if ((r->rule_flag & (PFRULE_FRAGCROP | PFRULE_FRAGDROP)) == 0) {
2459 /* Fully buffer all of the fragments */
2460 pd->flags |= PFDESC_IP_REAS;
2461
2462 pff = pf_find_fragment_by_ipv6_header(h, &frag,
2463 &pf_frag_tree);
2464
2465 /* Check if we saw the last fragment already */
2466 if (pff != NULL && (pff->fr_flags & PFFRAG_SEENLAST) &&
2467 fr_max > pff->fr_max) {
2468 goto badfrag;
2469 }
2470
2471 if ((m = pbuf_to_mbuf(pbuf, TRUE)) == NULL) {
2472 REASON_SET(reason, PFRES_MEMORY);
2473 return PF_DROP;
2474 }
2475
2476 /* Restore iph pointer after pbuf_to_mbuf() */
2477 h = mtod(m, struct ip6_hdr *);
2478
2479 /* Get an entry for the fragment queue */
2480 frent = pool_get(&pf_frent_pl, PR_NOWAIT);
2481 if (frent == NULL) {
2482 REASON_SET(reason, PFRES_MEMORY);
2483 return PF_DROP;
2484 }
2485
2486 pf_nfrents++;
2487 frent->fr_ip6 = h;
2488 frent->fr_m = m;
2489 frent->fr_ip6f_opt = frag;
2490 frent->fr_ip6f_extoff = extoff;
2491 frent->fr_ip6f_hlen = off;
2492 /* account for 2nd Destination Options header if present */
2493 if (pd->proto == IPPROTO_DSTOPTS) {
2494 if (!pf_pull_hdr(pbuf, off, &ext, sizeof(ext), sizeof(ext), NULL,
2495 NULL, AF_INET6)) {
2496 goto shortpkt;
2497 }
2498 frent->fr_ip6f_hlen += (ext.ip6e_len + 1) * 8;
2499 }
2500
2501 /* Might return a completely reassembled mbuf, or NULL */
2502 DPFPRINTF(("reass IPv6 frag %d @ %d-%d\n",
2503 ntohl(frag.ip6f_ident), fragoff, fr_max));
2504 m = pf_reassemble6(&m, &pff, frent, mff);
2505
2506 if (m == NULL) {
2507 return PF_DROP;
2508 }
2509
2510 pbuf_init_mbuf(pbuf, m, ifp);
2511 h = pbuf->pb_data;
2512
2513 if (pff != NULL && (pff->fr_flags & PFFRAG_DROP)) {
2514 goto drop;
2515 }
2516 } else if (dir == PF_IN ||
2517 !(pd->pf_mtag->pftag_flags & PF_TAG_FRAGCACHE)) {
2518 /* non-buffering fragment cache (overlaps: see RFC 5722) */
2519 int nomem = 0;
2520
2521 pff = pf_find_fragment_by_ipv6_header(h, &frag,
2522 &pf_cache_tree);
2523
2524 /* Check if we saw the last fragment already */
2525 if (pff != NULL && (pff->fr_flags & PFFRAG_SEENLAST) &&
2526 fr_max > pff->fr_max) {
2527 if (r->rule_flag & PFRULE_FRAGDROP) {
2528 pff->fr_flags |= PFFRAG_DROP;
2529 }
2530 goto badfrag;
2531 }
2532
2533 if ((m = pbuf_to_mbuf(pbuf, TRUE)) == NULL) {
2534 goto no_mem;
2535 }
2536
2537 /* Restore iph pointer after pbuf_to_mbuf() */
2538 h = mtod(m, struct ip6_hdr *);
2539
2540 m = pf_frag6cache(&m, h, &frag, &pff, off, mff,
2541 (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem);
2542 if (m == NULL) {
2543 // Note: pf_frag6cache() has already m_freem'd the mbuf
2544 if (nomem) {
2545 goto no_mem;
2546 }
2547 goto drop;
2548 }
2549
2550 pbuf_init_mbuf(pbuf, m, ifp);
2551 pd->pf_mtag = pf_find_mtag_pbuf(pbuf);
2552 h = pbuf->pb_data;
2553
2554 if (dir == PF_IN) {
2555 pd->pf_mtag->pftag_flags |= PF_TAG_FRAGCACHE;
2556 }
2557
2558 if (pff != NULL && (pff->fr_flags & PFFRAG_DROP)) {
2559 goto drop;
2560 }
2561 }
2562
2563 /* Enforce a minimum ttl, may cause endless packet loops */
2564 if (r->min_ttl && h->ip6_hlim < r->min_ttl) {
2565 h->ip6_hlim = r->min_ttl;
2566 }
2567 return PF_PASS;
2568
2569 no_mem:
2570 REASON_SET(reason, PFRES_MEMORY);
2571 goto dropout;
2572
2573 shortpkt:
2574 REASON_SET(reason, PFRES_SHORT);
2575 goto dropout;
2576
2577 drop:
2578 REASON_SET(reason, PFRES_NORM);
2579 goto dropout;
2580
2581 badfrag:
2582 DPFPRINTF(("dropping bad IPv6 fragment\n"));
2583 REASON_SET(reason, PFRES_FRAG);
2584 goto dropout;
2585
2586 dropout:
2587 if (pff != NULL) {
2588 pf_free_fragment(pff);
2589 }
2590 if (r != NULL && r->log && pbuf_is_valid(pbuf)) {
2591 PFLOG_PACKET(kif, h, pbuf, AF_INET6, dir, *reason, r, NULL, NULL, pd);
2592 }
2593 return PF_DROP;
2594 }
2595
2596 int
pf_normalize_tcp(int dir,struct pfi_kif * kif,pbuf_t * pbuf,int ipoff,int off,void * h,struct pf_pdesc * pd)2597 pf_normalize_tcp(int dir, struct pfi_kif *kif, pbuf_t *pbuf, int ipoff,
2598 int off, void *h, struct pf_pdesc *pd)
2599 {
2600 #pragma unused(ipoff, h)
2601 struct pf_rule *__single r, *__single rm = NULL;
2602 struct tcphdr *__single th = pf_pd_get_hdr_tcp(pd);
2603 int rewrite = 0;
2604 int asd = 0;
2605 u_short reason;
2606 u_int8_t flags;
2607 sa_family_t af = pd->af;
2608 struct pf_ruleset *__single ruleset = NULL;
2609 union pf_state_xport sxport, dxport;
2610
2611 sxport.port = th->th_sport;
2612 dxport.port = th->th_dport;
2613
2614 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
2615 while (r != NULL) {
2616 r->evaluations++;
2617 if (pfi_kif_match(r->kif, kif) == r->ifnot) {
2618 r = r->skip[PF_SKIP_IFP].ptr;
2619 } else if (r->direction && r->direction != dir) {
2620 r = r->skip[PF_SKIP_DIR].ptr;
2621 } else if (r->af && r->af != af) {
2622 r = r->skip[PF_SKIP_AF].ptr;
2623 } else if (r->proto && r->proto != pd->proto) {
2624 r = r->skip[PF_SKIP_PROTO].ptr;
2625 } else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
2626 r->src.neg, kif)) {
2627 r = r->skip[PF_SKIP_SRC_ADDR].ptr;
2628 } else if (r->src.xport.range.op &&
2629 !pf_match_xport(r->src.xport.range.op, r->proto_variant,
2630 &r->src.xport, &sxport)) {
2631 r = r->skip[PF_SKIP_SRC_PORT].ptr;
2632 } else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
2633 r->dst.neg, NULL)) {
2634 r = r->skip[PF_SKIP_DST_ADDR].ptr;
2635 } else if (r->dst.xport.range.op &&
2636 !pf_match_xport(r->dst.xport.range.op, r->proto_variant,
2637 &r->dst.xport, &dxport)) {
2638 r = r->skip[PF_SKIP_DST_PORT].ptr;
2639 } else if (r->os_fingerprint != PF_OSFP_ANY &&
2640 !pf_osfp_match(pf_osfp_fingerprint(pd, pbuf, off, th),
2641 r->os_fingerprint)) {
2642 r = TAILQ_NEXT(r, entries);
2643 } else {
2644 if (r->anchor == NULL) {
2645 rm = r;
2646 break;
2647 } else {
2648 pf_step_into_anchor(&asd, &ruleset,
2649 PF_RULESET_SCRUB, &r, NULL, NULL);
2650 }
2651 }
2652 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
2653 PF_RULESET_SCRUB, &r, NULL, NULL)) {
2654 break;
2655 }
2656 }
2657
2658 if (rm == NULL || rm->action == PF_NOSCRUB) {
2659 return PF_PASS;
2660 } else {
2661 r->packets[dir == PF_OUT]++;
2662 r->bytes[dir == PF_OUT] += pd->tot_len;
2663 }
2664
2665 if (rm->rule_flag & PFRULE_REASSEMBLE_TCP) {
2666 pd->flags |= PFDESC_TCP_NORM;
2667 }
2668
2669 flags = th->th_flags;
2670 if (flags & TH_SYN) {
2671 /* Illegal packet */
2672 if (flags & TH_RST) {
2673 goto tcp_drop;
2674 }
2675
2676 if (flags & TH_FIN) {
2677 flags &= ~TH_FIN;
2678 }
2679 } else {
2680 /* Illegal packet */
2681 if (!(flags & (TH_ACK | TH_RST))) {
2682 goto tcp_drop;
2683 }
2684 }
2685
2686 if (!(flags & TH_ACK)) {
2687 /* These flags are only valid if ACK is set */
2688 if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG)) {
2689 goto tcp_drop;
2690 }
2691 }
2692
2693 /* Check for illegal header length */
2694 if (th->th_off < (sizeof(struct tcphdr) >> 2)) {
2695 goto tcp_drop;
2696 }
2697
2698 /* If flags changed, or reserved data set, then adjust */
2699 if (flags != th->th_flags || th->th_x2 != 0) {
2700 u_int16_t ov, nv;
2701 // Explicit __bidi_indexable is to avoid a warning false positive (rdar://119193012)
2702 uint8_t *__bidi_indexable th_iter = (uint8_t * __bidi_indexable)(struct tcphdr *__bidi_indexable) th;
2703
2704 ov = *(u_int16_t *)(void *)(th_iter + offsetof(struct tcphdr, th_ack) + sizeof(th->th_ack));
2705 th->th_flags = flags;
2706 th->th_x2 = 0;
2707 nv = *(u_int16_t *)(void *)(th_iter + offsetof(struct tcphdr, th_ack) + sizeof(th->th_ack));
2708
2709 th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv, 0);
2710 rewrite = 1;
2711 }
2712
2713 /* Remove urgent pointer, if TH_URG is not set */
2714 if (!(flags & TH_URG) && th->th_urp) {
2715 th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0, 0);
2716 th->th_urp = 0;
2717 rewrite = 1;
2718 }
2719
2720 /* copy back packet headers if we sanitized */
2721 /* Process options */
2722 if (r->max_mss) {
2723 int rv = pf_normalize_tcpopt(r, dir, kif, pd, pbuf, th, off,
2724 &rewrite);
2725 if (rv == PF_DROP) {
2726 return rv;
2727 }
2728 pbuf = pd->mp;
2729 }
2730
2731 if (rewrite) {
2732 if (pf_lazy_makewritable(pd, pbuf,
2733 off + sizeof(*th)) == NULL) {
2734 REASON_SET(&reason, PFRES_MEMORY);
2735 if (r->log) {
2736 PFLOG_PACKET(kif, h, pbuf, AF_INET, dir, reason,
2737 r, 0, 0, pd);
2738 }
2739 return PF_DROP;
2740 }
2741
2742 pbuf_copy_back(pbuf, off, sizeof(*th), th, sizeof(*th));
2743 }
2744
2745 return PF_PASS;
2746
2747 tcp_drop:
2748 REASON_SET(&reason, PFRES_NORM);
2749 if (rm != NULL && r->log) {
2750 PFLOG_PACKET(kif, h, pbuf, AF_INET, dir, reason, r, NULL, NULL, pd);
2751 }
2752 return PF_DROP;
2753 }
2754
2755 int
pf_normalize_tcp_init(pbuf_t * pbuf,int off,struct pf_pdesc * pd,struct tcphdr * th,struct pf_state_peer * src,struct pf_state_peer * dst)2756 pf_normalize_tcp_init(pbuf_t *pbuf, int off, struct pf_pdesc *pd,
2757 struct tcphdr *th, struct pf_state_peer *src, struct pf_state_peer *dst)
2758 {
2759 #pragma unused(dst)
2760 u_int32_t tsval, tsecr;
2761 u_int8_t hdr[60];
2762
2763 VERIFY(src->scrub == NULL);
2764
2765 src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT);
2766 if (src->scrub == NULL) {
2767 return 1;
2768 }
2769 bzero(src->scrub, sizeof(*src->scrub));
2770
2771 switch (pd->af) {
2772 #if INET
2773 case AF_INET: {
2774 struct ip *__single h = pbuf->pb_data;
2775 src->scrub->pfss_ttl = h->ip_ttl;
2776 break;
2777 }
2778 #endif /* INET */
2779 case AF_INET6: {
2780 struct ip6_hdr *__single h = pbuf->pb_data;
2781 src->scrub->pfss_ttl = h->ip6_hlim;
2782 break;
2783 }
2784 }
2785
2786
2787 /*
2788 * All normalizations below are only begun if we see the start of
2789 * the connections. They must all set an enabled bit in pfss_flags
2790 */
2791 if ((th->th_flags & TH_SYN) == 0) {
2792 return 0;
2793 }
2794
2795
2796 if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub &&
2797 pf_pull_hdr(pbuf, off, hdr, sizeof(hdr), th->th_off << 2, NULL, NULL, pd->af)) {
2798 /* Diddle with TCP options */
2799 int hlen = (th->th_off << 2) - sizeof(struct tcphdr);
2800 u_int8_t *opt = hdr + sizeof(struct tcphdr);
2801 while (hlen >= TCPOLEN_TIMESTAMP) {
2802 switch (*opt) {
2803 case TCPOPT_EOL: /* FALLTHROUGH */
2804 case TCPOPT_NOP:
2805 opt++;
2806 hlen--;
2807 break;
2808 case TCPOPT_TIMESTAMP:
2809 if (opt[1] >= TCPOLEN_TIMESTAMP) {
2810 src->scrub->pfss_flags |=
2811 PFSS_TIMESTAMP;
2812 src->scrub->pfss_ts_mod =
2813 htonl(random());
2814
2815 /* note PFSS_PAWS not set yet */
2816 memcpy(&tsval, &opt[2],
2817 sizeof(u_int32_t));
2818 memcpy(&tsecr, &opt[6],
2819 sizeof(u_int32_t));
2820 src->scrub->pfss_tsval0 = ntohl(tsval);
2821 src->scrub->pfss_tsval = ntohl(tsval);
2822 src->scrub->pfss_tsecr = ntohl(tsecr);
2823 getmicrouptime(&src->scrub->pfss_last);
2824 }
2825 OS_FALLTHROUGH;
2826 default:
2827 hlen -= MAX(opt[1], 2);
2828 opt += MAX(opt[1], 2);
2829 break;
2830 }
2831 }
2832 }
2833
2834 return 0;
2835 }
2836
2837 void
pf_normalize_tcp_cleanup(struct pf_state * state)2838 pf_normalize_tcp_cleanup(struct pf_state *state)
2839 {
2840 if (state->src.scrub) {
2841 pool_put(&pf_state_scrub_pl, state->src.scrub);
2842 }
2843 if (state->dst.scrub) {
2844 pool_put(&pf_state_scrub_pl, state->dst.scrub);
2845 }
2846
2847 /* Someday... flush the TCP segment reassembly descriptors. */
2848 }
2849
2850 int
pf_normalize_tcp_stateful(pbuf_t * pbuf,int off,struct pf_pdesc * pd,u_short * reason,struct tcphdr * th,struct pf_state * state,struct pf_state_peer * src,struct pf_state_peer * dst,int * writeback)2851 pf_normalize_tcp_stateful(pbuf_t *pbuf, int off, struct pf_pdesc *pd,
2852 u_short *reason, struct tcphdr *th, struct pf_state *state,
2853 struct pf_state_peer *src, struct pf_state_peer *dst, int *writeback)
2854 {
2855 struct timeval uptime;
2856 u_int32_t tsval = 0, tsecr = 0;
2857 u_int tsval_from_last;
2858 u_int8_t hdr[60];
2859 u_int8_t *opt;
2860 int copyback = 0;
2861 int got_ts = 0;
2862
2863 VERIFY(src->scrub || dst->scrub);
2864
2865 /*
2866 * Enforce the minimum TTL seen for this connection. Negate a common
2867 * technique to evade an intrusion detection system and confuse
2868 * firewall state code.
2869 */
2870 switch (pd->af) {
2871 #if INET
2872 case AF_INET: {
2873 if (src->scrub) {
2874 struct ip *__single h = pbuf->pb_data;
2875 if (h->ip_ttl > src->scrub->pfss_ttl) {
2876 src->scrub->pfss_ttl = h->ip_ttl;
2877 }
2878 h->ip_ttl = src->scrub->pfss_ttl;
2879 }
2880 break;
2881 }
2882 #endif /* INET */
2883 case AF_INET6: {
2884 if (src->scrub) {
2885 struct ip6_hdr *__single h = pbuf->pb_data;
2886 if (h->ip6_hlim > src->scrub->pfss_ttl) {
2887 src->scrub->pfss_ttl = h->ip6_hlim;
2888 }
2889 h->ip6_hlim = src->scrub->pfss_ttl;
2890 }
2891 break;
2892 }
2893 }
2894
2895 if (th->th_off > (sizeof(struct tcphdr) >> 2) &&
2896 ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) ||
2897 (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) &&
2898 pf_pull_hdr(pbuf, off, hdr, sizeof(hdr), th->th_off << 2, NULL, NULL, pd->af)) {
2899 /* Diddle with TCP options */
2900 int hlen;
2901 opt = hdr + sizeof(struct tcphdr);
2902 hlen = (th->th_off << 2) - sizeof(struct tcphdr);
2903 while (hlen >= TCPOLEN_TIMESTAMP) {
2904 switch (*opt) {
2905 case TCPOPT_EOL: /* FALLTHROUGH */
2906 case TCPOPT_NOP:
2907 opt++;
2908 hlen--;
2909 break;
2910 case TCPOPT_TIMESTAMP:
2911 /*
2912 * Modulate the timestamps. Can be used for
2913 * NAT detection, OS uptime determination or
2914 * reboot detection.
2915 */
2916
2917 if (got_ts) {
2918 /* Huh? Multiple timestamps!? */
2919 if (pf_status.debug >= PF_DEBUG_MISC) {
2920 DPFPRINTF(("multiple TS??"));
2921 pf_print_state(state);
2922 printf("\n");
2923 }
2924 REASON_SET(reason, PFRES_TS);
2925 return PF_DROP;
2926 }
2927 if (opt[1] >= TCPOLEN_TIMESTAMP) {
2928 memcpy(&tsval, &opt[2],
2929 sizeof(u_int32_t));
2930 if (tsval && src->scrub &&
2931 (src->scrub->pfss_flags &
2932 PFSS_TIMESTAMP)) {
2933 tsval = ntohl(tsval);
2934 pf_change_a(&opt[2],
2935 &th->th_sum,
2936 htonl(tsval +
2937 src->scrub->pfss_ts_mod),
2938 0);
2939 copyback = 1;
2940 }
2941
2942 /* Modulate TS reply iff valid (!0) */
2943 memcpy(&tsecr, &opt[6],
2944 sizeof(u_int32_t));
2945 if (tsecr && dst->scrub &&
2946 (dst->scrub->pfss_flags &
2947 PFSS_TIMESTAMP)) {
2948 tsecr = ntohl(tsecr)
2949 - dst->scrub->pfss_ts_mod;
2950 pf_change_a(&opt[6],
2951 &th->th_sum, htonl(tsecr),
2952 0);
2953 copyback = 1;
2954 }
2955 got_ts = 1;
2956 }
2957 OS_FALLTHROUGH;
2958 default:
2959 hlen -= MAX(opt[1], 2);
2960 opt += MAX(opt[1], 2);
2961 break;
2962 }
2963 }
2964 if (copyback) {
2965 /* Copyback the options, caller copys back header */
2966 int optoff = off + sizeof(*th);
2967 int optlen = (th->th_off << 2) - sizeof(*th);
2968 if (pf_lazy_makewritable(pd, pbuf, optoff + optlen) ==
2969 NULL) {
2970 REASON_SET(reason, PFRES_MEMORY);
2971 return PF_DROP;
2972 }
2973 *writeback = optoff + optlen;
2974 pbuf_copy_back(pbuf, optoff, optlen, hdr + sizeof(*th), sizeof(hdr) - sizeof(*th));
2975 }
2976 }
2977
2978
2979 /*
2980 * Must invalidate PAWS checks on connections idle for too long.
2981 * The fastest allowed timestamp clock is 1ms. That turns out to
2982 * be about 24 days before it wraps. XXX Right now our lowerbound
2983 * TS echo check only works for the first 12 days of a connection
2984 * when the TS has exhausted half its 32bit space
2985 */
2986 #define TS_MAX_IDLE (24*24*60*60)
2987 #define TS_MAX_CONN (12*24*60*60) /* XXX remove when better tsecr check */
2988
2989 getmicrouptime(&uptime);
2990 if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) &&
2991 (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE ||
2992 pf_time_second() - state->creation > TS_MAX_CONN)) {
2993 if (pf_status.debug >= PF_DEBUG_MISC) {
2994 DPFPRINTF(("src idled out of PAWS\n"));
2995 pf_print_state(state);
2996 printf("\n");
2997 }
2998 src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS)
2999 | PFSS_PAWS_IDLED;
3000 }
3001 if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) &&
3002 uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) {
3003 if (pf_status.debug >= PF_DEBUG_MISC) {
3004 DPFPRINTF(("dst idled out of PAWS\n"));
3005 pf_print_state(state);
3006 printf("\n");
3007 }
3008 dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS)
3009 | PFSS_PAWS_IDLED;
3010 }
3011
3012 if (got_ts && src->scrub && dst->scrub &&
3013 (src->scrub->pfss_flags & PFSS_PAWS) &&
3014 (dst->scrub->pfss_flags & PFSS_PAWS)) {
3015 /*
3016 * Validate that the timestamps are "in-window".
3017 * RFC1323 describes TCP Timestamp options that allow
3018 * measurement of RTT (round trip time) and PAWS
3019 * (protection against wrapped sequence numbers). PAWS
3020 * gives us a set of rules for rejecting packets on
3021 * long fat pipes (packets that were somehow delayed
3022 * in transit longer than the time it took to send the
3023 * full TCP sequence space of 4Gb). We can use these
3024 * rules and infer a few others that will let us treat
3025 * the 32bit timestamp and the 32bit echoed timestamp
3026 * as sequence numbers to prevent a blind attacker from
3027 * inserting packets into a connection.
3028 *
3029 * RFC1323 tells us:
3030 * - The timestamp on this packet must be greater than
3031 * or equal to the last value echoed by the other
3032 * endpoint. The RFC says those will be discarded
3033 * since it is a dup that has already been acked.
3034 * This gives us a lowerbound on the timestamp.
3035 * timestamp >= other last echoed timestamp
3036 * - The timestamp will be less than or equal to
3037 * the last timestamp plus the time between the
3038 * last packet and now. The RFC defines the max
3039 * clock rate as 1ms. We will allow clocks to be
3040 * up to 10% fast and will allow a total difference
3041 * or 30 seconds due to a route change. And this
3042 * gives us an upperbound on the timestamp.
3043 * timestamp <= last timestamp + max ticks
3044 * We have to be careful here. Windows will send an
3045 * initial timestamp of zero and then initialize it
3046 * to a random value after the 3whs; presumably to
3047 * avoid a DoS by having to call an expensive RNG
3048 * during a SYN flood. Proof MS has at least one
3049 * good security geek.
3050 *
3051 * - The TCP timestamp option must also echo the other
3052 * endpoints timestamp. The timestamp echoed is the
3053 * one carried on the earliest unacknowledged segment
3054 * on the left edge of the sequence window. The RFC
3055 * states that the host will reject any echoed
3056 * timestamps that were larger than any ever sent.
3057 * This gives us an upperbound on the TS echo.
3058 * tescr <= largest_tsval
3059 * - The lowerbound on the TS echo is a little more
3060 * tricky to determine. The other endpoint's echoed
3061 * values will not decrease. But there may be
3062 * network conditions that re-order packets and
3063 * cause our view of them to decrease. For now the
3064 * only lowerbound we can safely determine is that
3065 * the TS echo will never be less than the original
3066 * TS. XXX There is probably a better lowerbound.
3067 * Remove TS_MAX_CONN with better lowerbound check.
3068 * tescr >= other original TS
3069 *
3070 * It is also important to note that the fastest
3071 * timestamp clock of 1ms will wrap its 32bit space in
3072 * 24 days. So we just disable TS checking after 24
3073 * days of idle time. We actually must use a 12d
3074 * connection limit until we can come up with a better
3075 * lowerbound to the TS echo check.
3076 */
3077 struct timeval delta_ts;
3078 int ts_fudge;
3079
3080
3081 /*
3082 * PFTM_TS_DIFF is how many seconds of leeway to allow
3083 * a host's timestamp. This can happen if the previous
3084 * packet got delayed in transit for much longer than
3085 * this packet.
3086 */
3087 if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0) {
3088 ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF];
3089 }
3090
3091
3092 /* Calculate max ticks since the last timestamp */
3093 #define TS_MAXFREQ 1100 /* RFC max TS freq of 1Khz + 10% skew */
3094 #define TS_MICROSECS 1000000 /* microseconds per second */
3095 timersub(&uptime, &src->scrub->pfss_last, &delta_ts);
3096 tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ;
3097 tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS / TS_MAXFREQ);
3098
3099
3100 if ((src->state >= TCPS_ESTABLISHED &&
3101 dst->state >= TCPS_ESTABLISHED) &&
3102 (SEQ_LT(tsval, dst->scrub->pfss_tsecr) ||
3103 SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) ||
3104 (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) ||
3105 SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) {
3106 /*
3107 * Bad RFC1323 implementation or an insertion attack.
3108 *
3109 * - Solaris 2.6 and 2.7 are known to send another ACK
3110 * after the FIN,FIN|ACK,ACK closing that carries
3111 * an old timestamp.
3112 */
3113
3114 DPFPRINTF(("Timestamp failed %c%c%c%c\n",
3115 SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ',
3116 SEQ_GT(tsval, src->scrub->pfss_tsval +
3117 tsval_from_last) ? '1' : ' ',
3118 SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ',
3119 SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' '));
3120 DPFPRINTF((" tsval: %u tsecr: %u +ticks: %u "
3121 "idle: %lus %ums\n",
3122 tsval, tsecr, tsval_from_last, delta_ts.tv_sec,
3123 delta_ts.tv_usec / 1000));
3124 DPFPRINTF((" src->tsval: %u tsecr: %u\n",
3125 src->scrub->pfss_tsval, src->scrub->pfss_tsecr));
3126 DPFPRINTF((" dst->tsval: %u tsecr: %u tsval0: %u\n",
3127 dst->scrub->pfss_tsval, dst->scrub->pfss_tsecr,
3128 dst->scrub->pfss_tsval0));
3129 if (pf_status.debug >= PF_DEBUG_MISC) {
3130 pf_print_state(state);
3131 pf_print_flags(th->th_flags);
3132 printf("\n");
3133 }
3134 REASON_SET(reason, PFRES_TS);
3135 return PF_DROP;
3136 }
3137
3138 /* XXX I'd really like to require tsecr but it's optional */
3139 } else if (!got_ts && (th->th_flags & TH_RST) == 0 &&
3140 ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED)
3141 || pd->p_len > 0 || (th->th_flags & TH_SYN)) &&
3142 src->scrub && dst->scrub &&
3143 (src->scrub->pfss_flags & PFSS_PAWS) &&
3144 (dst->scrub->pfss_flags & PFSS_PAWS)) {
3145 /*
3146 * Didn't send a timestamp. Timestamps aren't really useful
3147 * when:
3148 * - connection opening or closing (often not even sent).
3149 * but we must not let an attacker to put a FIN on a
3150 * data packet to sneak it through our ESTABLISHED check.
3151 * - on a TCP reset. RFC suggests not even looking at TS.
3152 * - on an empty ACK. The TS will not be echoed so it will
3153 * probably not help keep the RTT calculation in sync and
3154 * there isn't as much danger when the sequence numbers
3155 * got wrapped. So some stacks don't include TS on empty
3156 * ACKs :-(
3157 *
3158 * To minimize the disruption to mostly RFC1323 conformant
3159 * stacks, we will only require timestamps on data packets.
3160 *
3161 * And what do ya know, we cannot require timestamps on data
3162 * packets. There appear to be devices that do legitimate
3163 * TCP connection hijacking. There are HTTP devices that allow
3164 * a 3whs (with timestamps) and then buffer the HTTP request.
3165 * If the intermediate device has the HTTP response cache, it
3166 * will spoof the response but not bother timestamping its
3167 * packets. So we can look for the presence of a timestamp in
3168 * the first data packet and if there, require it in all future
3169 * packets.
3170 */
3171
3172 if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) {
3173 /*
3174 * Hey! Someone tried to sneak a packet in. Or the
3175 * stack changed its RFC1323 behavior?!?!
3176 */
3177 if (pf_status.debug >= PF_DEBUG_MISC) {
3178 DPFPRINTF(("Did not receive expected RFC1323 "
3179 "timestamp\n"));
3180 pf_print_state(state);
3181 pf_print_flags(th->th_flags);
3182 printf("\n");
3183 }
3184 REASON_SET(reason, PFRES_TS);
3185 return PF_DROP;
3186 }
3187 }
3188
3189
3190 /*
3191 * We will note if a host sends his data packets with or without
3192 * timestamps. And require all data packets to contain a timestamp
3193 * if the first does. PAWS implicitly requires that all data packets be
3194 * timestamped. But I think there are middle-man devices that hijack
3195 * TCP streams immediately after the 3whs and don't timestamp their
3196 * packets (seen in a WWW accelerator or cache).
3197 */
3198 if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags &
3199 (PFSS_TIMESTAMP | PFSS_DATA_TS | PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) {
3200 if (got_ts) {
3201 src->scrub->pfss_flags |= PFSS_DATA_TS;
3202 } else {
3203 src->scrub->pfss_flags |= PFSS_DATA_NOTS;
3204 if (pf_status.debug >= PF_DEBUG_MISC && dst->scrub &&
3205 (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) {
3206 /* Don't warn if other host rejected RFC1323 */
3207 DPFPRINTF(("Broken RFC1323 stack did not "
3208 "timestamp data packet. Disabled PAWS "
3209 "security.\n"));
3210 pf_print_state(state);
3211 pf_print_flags(th->th_flags);
3212 printf("\n");
3213 }
3214 }
3215 }
3216
3217
3218 /*
3219 * Update PAWS values
3220 */
3221 if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags &
3222 (PFSS_PAWS_IDLED | PFSS_TIMESTAMP))) {
3223 getmicrouptime(&src->scrub->pfss_last);
3224 if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) ||
3225 (src->scrub->pfss_flags & PFSS_PAWS) == 0) {
3226 src->scrub->pfss_tsval = tsval;
3227 }
3228
3229 if (tsecr) {
3230 if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) ||
3231 (src->scrub->pfss_flags & PFSS_PAWS) == 0) {
3232 src->scrub->pfss_tsecr = tsecr;
3233 }
3234
3235 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 &&
3236 (SEQ_LT(tsval, src->scrub->pfss_tsval0) ||
3237 src->scrub->pfss_tsval0 == 0)) {
3238 /* tsval0 MUST be the lowest timestamp */
3239 src->scrub->pfss_tsval0 = tsval;
3240 }
3241
3242 /* Only fully initialized after a TS gets echoed */
3243 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0) {
3244 src->scrub->pfss_flags |= PFSS_PAWS;
3245 }
3246 }
3247 }
3248
3249 /* I have a dream.... TCP segment reassembly.... */
3250 return 0;
3251 }
3252
3253 static __attribute__((noinline)) int
pf_normalize_tcpopt(struct pf_rule * r,int dir,struct pfi_kif * kif,struct pf_pdesc * pd,pbuf_t * pbuf,struct tcphdr * th,int off,int * rewrptr)3254 pf_normalize_tcpopt(struct pf_rule *r, int dir, struct pfi_kif *kif,
3255 struct pf_pdesc *pd, pbuf_t *pbuf, struct tcphdr *th, int off,
3256 int *rewrptr)
3257 {
3258 #pragma unused(dir, kif)
3259 sa_family_t af = pd->af;
3260 u_int16_t *mss;
3261 int thoff;
3262 int opt, cnt, optlen = 0;
3263 int rewrite = 0;
3264 u_char opts[MAX_TCPOPTLEN];
3265 u_char *optp = opts;
3266
3267 thoff = th->th_off << 2;
3268 cnt = thoff - sizeof(struct tcphdr);
3269
3270 if (cnt > 0 && !pf_pull_hdr(pbuf, off + sizeof(*th), opts, sizeof(opts), cnt,
3271 NULL, NULL, af)) {
3272 return PF_DROP;
3273 }
3274
3275 for (; cnt > 0; cnt -= optlen, optp += optlen) {
3276 opt = optp[0];
3277 if (opt == TCPOPT_EOL) {
3278 break;
3279 }
3280 if (opt == TCPOPT_NOP) {
3281 optlen = 1;
3282 } else {
3283 if (cnt < 2) {
3284 break;
3285 }
3286 optlen = optp[1];
3287 if (optlen < 2 || optlen > cnt) {
3288 break;
3289 }
3290 }
3291 switch (opt) {
3292 case TCPOPT_MAXSEG:
3293 mss = (u_int16_t *)(void *)(optp + 2);
3294 if ((ntohs(*mss)) > r->max_mss) {
3295 /*
3296 * <[email protected]>
3297 * Only do the TCP checksum fixup if delayed
3298 * checksum calculation will not be performed.
3299 */
3300 if (pbuf->pb_ifp ||
3301 !(*pbuf->pb_csum_flags & CSUM_TCP)) {
3302 th->th_sum = pf_cksum_fixup(th->th_sum,
3303 *mss, htons(r->max_mss), 0);
3304 }
3305 *mss = htons(r->max_mss);
3306 rewrite = 1;
3307 }
3308 break;
3309 default:
3310 break;
3311 }
3312 }
3313
3314 if (rewrite) {
3315 u_short reason;
3316
3317 VERIFY(pbuf == pd->mp);
3318
3319 if (pf_lazy_makewritable(pd, pd->mp,
3320 off + sizeof(*th) + thoff) == NULL) {
3321 REASON_SET(&reason, PFRES_MEMORY);
3322 if (r->log) {
3323 PFLOG_PACKET(kif, h, pbuf, AF_INET, dir, reason,
3324 r, 0, 0, pd);
3325 }
3326 return PF_DROP;
3327 }
3328
3329 *rewrptr = 1;
3330 pbuf_copy_back(pd->mp, off + sizeof(*th), thoff - sizeof(*th), opts, sizeof(opts));
3331 }
3332
3333 return PF_PASS;
3334 }
3335