xref: /xnu-12377.61.12/bsd/net/pf_norm.c (revision 4d495c6e23c53686cf65f45067f79024cf5dcee8)
1 /*
2  * Copyright (c) 2007-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*	$apfw: pf_norm.c,v 1.10 2008/08/28 19:10:53 jhw Exp $ */
30 /*	$OpenBSD: pf_norm.c,v 1.107 2006/04/16 00:59:52 pascoe Exp $ */
31 
32 /*
33  * Copyright 2001 Niels Provos <[email protected]>
34  * All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  *
45  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
46  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
47  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
48  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
49  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
50  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
51  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
52  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
53  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
54  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
55  */
56 
57 #include <sys/param.h>
58 #include <sys/systm.h>
59 #include <sys/mbuf.h>
60 #include <sys/filio.h>
61 #include <sys/fcntl.h>
62 #include <sys/socket.h>
63 #include <sys/kernel.h>
64 #include <sys/time.h>
65 #include <sys/random.h>
66 #include <sys/mcache.h>
67 
68 #include <net/if.h>
69 #include <net/if_types.h>
70 #include <net/bpf.h>
71 #include <net/route.h>
72 #include <net/if_pflog.h>
73 
74 #include <netinet/in.h>
75 #include <netinet/in_var.h>
76 #include <netinet/in_systm.h>
77 #include <netinet/ip.h>
78 #include <netinet/ip_var.h>
79 #include <netinet/tcp.h>
80 #include <netinet/tcp_seq.h>
81 #include <netinet/tcp_fsm.h>
82 #include <netinet/udp.h>
83 #include <netinet/ip_icmp.h>
84 
85 #include <netinet/ip6.h>
86 #include <netinet6/ip6_var.h>
87 
88 #include <net/pfvar.h>
89 #include <net/droptap.h>
90 
91 struct pf_frent {
92 	LIST_ENTRY(pf_frent)    fr_next;
93 	struct mbuf             *fr_m;
94 #define fr_ip           fr_u.fru_ipv4
95 #define fr_ip6          fr_u.fru_ipv6
96 	union {
97 		struct ip       *fru_ipv4;
98 		struct ip6_hdr  *fru_ipv6;
99 	} fr_u;
100 	struct ip6_frag         fr_ip6f_opt;
101 	uint16_t                fr_ip6f_hlen;   /* total header length */
102 	uint16_t                fr_ip6f_extoff; /* last extension header offset or 0 */
103 };
104 
105 struct pf_frcache {
106 	LIST_ENTRY(pf_frcache) fr_next;
107 	uint16_t        fr_off;
108 	uint16_t        fr_end;
109 };
110 
111 #define PFFRAG_SEENLAST 0x0001          /* Seen the last fragment for this */
112 #define PFFRAG_NOBUFFER 0x0002          /* Non-buffering fragment cache */
113 #define PFFRAG_DROP     0x0004          /* Drop all fragments */
114 #define BUFFER_FRAGMENTS(fr)    (!((fr)->fr_flags & PFFRAG_NOBUFFER))
115 
116 struct pf_fragment {
117 	RB_ENTRY(pf_fragment) fr_entry;
118 	TAILQ_ENTRY(pf_fragment) frag_next;
119 	struct pf_addr  fr_srcx;
120 	struct pf_addr  fr_dstx;
121 	u_int8_t        fr_p;           /* protocol of this fragment */
122 	u_int8_t        fr_flags;       /* status flags */
123 	u_int16_t       fr_max;         /* fragment data max */
124 #define fr_id           fr_uid.fru_id4
125 #define fr_id6          fr_uid.fru_id6
126 	union {
127 		u_int16_t       fru_id4;
128 		u_int32_t       fru_id6;
129 	} fr_uid;
130 	int             fr_af;
131 	u_int32_t       fr_timeout;
132 #define fr_queue        fr_u.fru_queue
133 #define fr_cache        fr_u.fru_cache
134 	union {
135 		LIST_HEAD(pf_fragq, pf_frent) fru_queue;        /* buffering */
136 		LIST_HEAD(pf_cacheq, pf_frcache) fru_cache;     /* non-buf */
137 	} fr_u;
138 	uint32_t        fr_csum_flags;  /* checksum flags */
139 	uint32_t        fr_csum;        /* partial checksum value */
140 	uint16_t        fr_ip6_maxlen;  /* maximum length of a single fragment in IPv6 */
141 };
142 
143 static TAILQ_HEAD(pf_fragqueue, pf_fragment)    pf_fragqueue;
144 static TAILQ_HEAD(pf_cachequeue, pf_fragment)   pf_cachequeue;
145 
146 static __inline int  pf_frag_compare(struct pf_fragment *,
147     struct pf_fragment *);
148 static RB_HEAD(pf_frag_tree, pf_fragment)       pf_frag_tree, pf_cache_tree;
149 RB_PROTOTYPE_SC(__private_extern__, pf_frag_tree, pf_fragment, fr_entry,
150     pf_frag_compare);
151 RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
152 
153 /* Private prototypes */
154 static void pf_ip6hdr2key(struct pf_fragment *, struct ip6_hdr *,
155     struct ip6_frag *);
156 static void pf_ip2key(struct pf_fragment *, struct ip *);
157 static void pf_remove_fragment(struct pf_fragment *);
158 static void pf_flush_fragments(void);
159 static void pf_free_fragment(struct pf_fragment *);
160 static struct pf_fragment *pf_find_fragment_by_key(struct pf_fragment *,
161     struct pf_frag_tree *);
162 static __inline struct pf_fragment *
163 pf_find_fragment_by_ipv4_header(struct ip *, struct pf_frag_tree *);
164 static struct mbuf *pf_reassemble(struct mbuf *, struct pf_fragment **,
165     struct pf_frent *, int);
166 static struct mbuf *pf_fragcache(struct mbuf **, struct ip *,
167     struct pf_fragment **, int, int, int *);
168 static int pf_normalize_tcpopt(struct pf_rule *, int, struct pfi_kif *,
169     struct pf_pdesc *, pbuf_t *, struct tcphdr *, int, int *);
170 static __inline struct pf_fragment *
171 pf_find_fragment_by_ipv6_header(struct ip6_hdr *, struct ip6_frag *,
172     struct pf_frag_tree *);
173 static struct mbuf *pf_reassemble6(struct mbuf **, struct pf_fragment **,
174     struct pf_frent *, int);
175 static struct mbuf *pf_frag6cache(struct mbuf **, struct ip6_hdr*,
176     struct ip6_frag *, struct pf_fragment **, int, int, int, int *);
177 
178 #define DPFPRINTF(x) do {                               \
179 	if (pf_status.debug >= PF_DEBUG_MISC) {         \
180 	        printf("%s: ", __func__);               \
181 	        printf x ;                              \
182 	}                                               \
183 } while (0)
184 
185 /* Globals */
186 struct pool              pf_frent_pl, pf_frag_pl;
187 static struct pool       pf_cache_pl, pf_cent_pl;
188 struct pool              pf_state_scrub_pl;
189 
190 static int               pf_nfrents, pf_ncache;
191 
192 void
pf_normalize_init(void)193 pf_normalize_init(void)
194 {
195 	pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0, 0, 0, "pffrent",
196 	    NULL);
197 	pool_init(&pf_frag_pl, sizeof(struct pf_fragment), 0, 0, 0, "pffrag",
198 	    NULL);
199 	pool_init(&pf_cache_pl, sizeof(struct pf_fragment), 0, 0, 0,
200 	    "pffrcache", NULL);
201 	pool_init(&pf_cent_pl, sizeof(struct pf_frcache), 0, 0, 0, "pffrcent",
202 	    NULL);
203 	pool_init(&pf_state_scrub_pl, sizeof(struct pf_state_scrub), 0, 0, 0,
204 	    "pfstscr", NULL);
205 
206 	pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT);
207 	pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0);
208 	pool_sethardlimit(&pf_cache_pl, PFFRAG_FRCACHE_HIWAT, NULL, 0);
209 	pool_sethardlimit(&pf_cent_pl, PFFRAG_FRCENT_HIWAT, NULL, 0);
210 
211 	TAILQ_INIT(&pf_fragqueue);
212 	TAILQ_INIT(&pf_cachequeue);
213 }
214 
215 #if 0
216 void
217 pf_normalize_destroy(void)
218 {
219 	pool_destroy(&pf_state_scrub_pl);
220 	pool_destroy(&pf_cent_pl);
221 	pool_destroy(&pf_cache_pl);
222 	pool_destroy(&pf_frag_pl);
223 	pool_destroy(&pf_frent_pl);
224 }
225 #endif
226 
227 int
pf_normalize_isempty(void)228 pf_normalize_isempty(void)
229 {
230 	return TAILQ_EMPTY(&pf_fragqueue) && TAILQ_EMPTY(&pf_cachequeue);
231 }
232 
233 static __inline int
pf_frag_compare(struct pf_fragment * a,struct pf_fragment * b)234 pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b)
235 {
236 	int     diff;
237 
238 	if ((diff = a->fr_af - b->fr_af)) {
239 		return diff;
240 	} else if ((diff = a->fr_p - b->fr_p)) {
241 		return diff;
242 	} else {
243 		struct pf_addr *sa = &a->fr_srcx;
244 		struct pf_addr *sb = &b->fr_srcx;
245 		struct pf_addr *da = &a->fr_dstx;
246 		struct pf_addr *db = &b->fr_dstx;
247 
248 		switch (a->fr_af) {
249 #ifdef INET
250 		case AF_INET:
251 			if ((diff = a->fr_id - b->fr_id)) {
252 				return diff;
253 			} else if (sa->v4addr.s_addr < sb->v4addr.s_addr) {
254 				return -1;
255 			} else if (sa->v4addr.s_addr > sb->v4addr.s_addr) {
256 				return 1;
257 			} else if (da->v4addr.s_addr < db->v4addr.s_addr) {
258 				return -1;
259 			} else if (da->v4addr.s_addr > db->v4addr.s_addr) {
260 				return 1;
261 			}
262 			break;
263 #endif
264 		case AF_INET6:
265 			if ((diff = a->fr_id6 - b->fr_id6)) {
266 				return diff;
267 			} else if (sa->addr32[3] < sb->addr32[3]) {
268 				return -1;
269 			} else if (sa->addr32[3] > sb->addr32[3]) {
270 				return 1;
271 			} else if (sa->addr32[2] < sb->addr32[2]) {
272 				return -1;
273 			} else if (sa->addr32[2] > sb->addr32[2]) {
274 				return 1;
275 			} else if (sa->addr32[1] < sb->addr32[1]) {
276 				return -1;
277 			} else if (sa->addr32[1] > sb->addr32[1]) {
278 				return 1;
279 			} else if (sa->addr32[0] < sb->addr32[0]) {
280 				return -1;
281 			} else if (sa->addr32[0] > sb->addr32[0]) {
282 				return 1;
283 			} else if (da->addr32[3] < db->addr32[3]) {
284 				return -1;
285 			} else if (da->addr32[3] > db->addr32[3]) {
286 				return 1;
287 			} else if (da->addr32[2] < db->addr32[2]) {
288 				return -1;
289 			} else if (da->addr32[2] > db->addr32[2]) {
290 				return 1;
291 			} else if (da->addr32[1] < db->addr32[1]) {
292 				return -1;
293 			} else if (da->addr32[1] > db->addr32[1]) {
294 				return 1;
295 			} else if (da->addr32[0] < db->addr32[0]) {
296 				return -1;
297 			} else if (da->addr32[0] > db->addr32[0]) {
298 				return 1;
299 			}
300 			break;
301 		default:
302 			VERIFY(!0 && "only IPv4 and IPv6 supported!");
303 			break;
304 		}
305 	}
306 	return 0;
307 }
308 
309 void
pf_purge_expired_fragments(void)310 pf_purge_expired_fragments(void)
311 {
312 	struct pf_fragment *frag;
313 	u_int32_t expire = pf_time_second() -
314 	    pf_default_rule.timeout[PFTM_FRAG];
315 
316 	while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) {
317 		VERIFY(BUFFER_FRAGMENTS(frag));
318 		if (frag->fr_timeout > expire) {
319 			break;
320 		}
321 
322 		switch (frag->fr_af) {
323 		case AF_INET:
324 			DPFPRINTF(("expiring IPv4 %d(0x%llx) from queue.\n",
325 			    ntohs(frag->fr_id),
326 			    (uint64_t)VM_KERNEL_ADDRHASH(frag)));
327 			break;
328 		case AF_INET6:
329 			DPFPRINTF(("expiring IPv6 %d(0x%llx) from queue.\n",
330 			    ntohl(frag->fr_id6),
331 			    (uint64_t)VM_KERNEL_ADDRHASH(frag)));
332 			break;
333 		default:
334 			VERIFY(0 && "only IPv4 and IPv6 supported");
335 			break;
336 		}
337 		pf_free_fragment(frag);
338 	}
339 
340 	while ((frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue)) != NULL) {
341 		VERIFY(!BUFFER_FRAGMENTS(frag));
342 		if (frag->fr_timeout > expire) {
343 			break;
344 		}
345 
346 		switch (frag->fr_af) {
347 		case AF_INET:
348 			DPFPRINTF(("expiring IPv4 %d(0x%llx) from cache.\n",
349 			    ntohs(frag->fr_id),
350 			    (uint64_t)VM_KERNEL_ADDRHASH(frag)));
351 			break;
352 		case AF_INET6:
353 			DPFPRINTF(("expiring IPv6 %d(0x%llx) from cache.\n",
354 			    ntohl(frag->fr_id6),
355 			    (uint64_t)VM_KERNEL_ADDRHASH(frag)));
356 			break;
357 		default:
358 			VERIFY(0 && "only IPv4 and IPv6 supported");
359 			break;
360 		}
361 		pf_free_fragment(frag);
362 		VERIFY(TAILQ_EMPTY(&pf_cachequeue) ||
363 		    TAILQ_LAST(&pf_cachequeue, pf_cachequeue) != frag);
364 	}
365 }
366 
367 /*
368  * Try to flush old fragments to make space for new ones
369  */
370 
371 static void
pf_flush_fragments(void)372 pf_flush_fragments(void)
373 {
374 	struct pf_fragment      *frag;
375 	int                      goal;
376 
377 	goal = pf_nfrents * 9 / 10;
378 	DPFPRINTF(("trying to free > %d frents\n",
379 	    pf_nfrents - goal));
380 	while (goal < pf_nfrents) {
381 		frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue);
382 		if (frag == NULL) {
383 			break;
384 		}
385 		pf_free_fragment(frag);
386 	}
387 
388 
389 	goal = pf_ncache * 9 / 10;
390 	DPFPRINTF(("trying to free > %d cache entries\n",
391 	    pf_ncache - goal));
392 	while (goal < pf_ncache) {
393 		frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue);
394 		if (frag == NULL) {
395 			break;
396 		}
397 		pf_free_fragment(frag);
398 	}
399 }
400 
401 /* Frees the fragments and all associated entries */
402 
403 static void
pf_free_fragment(struct pf_fragment * frag)404 pf_free_fragment(struct pf_fragment *frag)
405 {
406 	struct pf_frent         *frent;
407 	struct pf_frcache       *frcache;
408 
409 	/* Free all fragments */
410 	if (BUFFER_FRAGMENTS(frag)) {
411 		for (frent = LIST_FIRST(&frag->fr_queue); frent;
412 		    frent = LIST_FIRST(&frag->fr_queue)) {
413 			LIST_REMOVE(frent, fr_next);
414 
415 			m_freem(frent->fr_m);
416 			pool_put(&pf_frent_pl, frent);
417 			pf_nfrents--;
418 		}
419 	} else {
420 		for (frcache = LIST_FIRST(&frag->fr_cache); frcache;
421 		    frcache = LIST_FIRST(&frag->fr_cache)) {
422 			LIST_REMOVE(frcache, fr_next);
423 
424 			VERIFY(LIST_EMPTY(&frag->fr_cache) ||
425 			    LIST_FIRST(&frag->fr_cache)->fr_off >
426 			    frcache->fr_end);
427 
428 			pool_put(&pf_cent_pl, frcache);
429 			pf_ncache--;
430 		}
431 	}
432 
433 	pf_remove_fragment(frag);
434 }
435 
436 static void
pf_ip6hdr2key(struct pf_fragment * key,struct ip6_hdr * ip6,struct ip6_frag * fh)437 pf_ip6hdr2key(struct pf_fragment *key, struct ip6_hdr *ip6,
438     struct ip6_frag *fh)
439 {
440 	key->fr_p = fh->ip6f_nxt;
441 	key->fr_id6 = fh->ip6f_ident;
442 	key->fr_af = AF_INET6;
443 	key->fr_srcx.v6addr = ip6->ip6_src;
444 	key->fr_dstx.v6addr = ip6->ip6_dst;
445 }
446 
447 static void
pf_ip2key(struct pf_fragment * key,struct ip * ip)448 pf_ip2key(struct pf_fragment *key, struct ip *ip)
449 {
450 	key->fr_p = ip->ip_p;
451 	key->fr_id = ip->ip_id;
452 	key->fr_af = AF_INET;
453 	key->fr_srcx.v4addr.s_addr = ip->ip_src.s_addr;
454 	key->fr_dstx.v4addr.s_addr = ip->ip_dst.s_addr;
455 }
456 
457 static struct pf_fragment *
pf_find_fragment_by_key(struct pf_fragment * key,struct pf_frag_tree * tree)458 pf_find_fragment_by_key(struct pf_fragment *key, struct pf_frag_tree *tree)
459 {
460 	struct pf_fragment *frag;
461 
462 	frag = RB_FIND(pf_frag_tree, tree, key);
463 	if (frag != NULL) {
464 		/* XXX Are we sure we want to update the timeout? */
465 		frag->fr_timeout = pf_time_second();
466 		if (BUFFER_FRAGMENTS(frag)) {
467 			TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
468 			TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next);
469 		} else {
470 			TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
471 			TAILQ_INSERT_HEAD(&pf_cachequeue, frag, frag_next);
472 		}
473 	}
474 
475 	return frag;
476 }
477 
478 static __attribute__((noinline)) struct pf_fragment *
pf_find_fragment_by_ipv4_header(struct ip * ip,struct pf_frag_tree * tree)479 pf_find_fragment_by_ipv4_header(struct ip *ip, struct pf_frag_tree *tree)
480 {
481 	struct pf_fragment key;
482 	pf_ip2key(&key, ip);
483 	return pf_find_fragment_by_key(&key, tree);
484 }
485 
486 /* Removes a fragment from the fragment queue and frees the fragment */
487 static void
pf_remove_fragment(struct pf_fragment * frag)488 pf_remove_fragment(struct pf_fragment *frag)
489 {
490 	if (BUFFER_FRAGMENTS(frag)) {
491 		RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag);
492 		TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
493 		pool_put(&pf_frag_pl, frag);
494 	} else {
495 		RB_REMOVE(pf_frag_tree, &pf_cache_tree, frag);
496 		TAILQ_REMOVE(&pf_cachequeue, frag, frag_next);
497 		pool_put(&pf_cache_pl, frag);
498 	}
499 }
500 
501 #define FR_IP_OFF(fr)   ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3)
502 static struct mbuf *
pf_reassemble(struct mbuf * m0,struct pf_fragment ** frag,struct pf_frent * frent,int mff)503 pf_reassemble(struct mbuf *m0, struct pf_fragment **frag,
504     struct pf_frent *frent, int mff)
505 {
506 	struct mbuf     *m = m0, *m2;
507 	struct pf_frent *frea, *next;
508 	struct pf_frent *frep = NULL;
509 	struct ip       *ip = frent->fr_ip;
510 	uint32_t         hlen = ip->ip_hl << 2;
511 	u_int16_t        off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
512 	u_int16_t        ip_len = ntohs(ip->ip_len) - ip->ip_hl * 4;
513 	u_int16_t        fr_max = ip_len + off;
514 	uint32_t         csum, csum_flags;
515 
516 	VERIFY(*frag == NULL || BUFFER_FRAGMENTS(*frag));
517 
518 	/*
519 	 * Leverage partial checksum offload for IP fragments.  Narrow down
520 	 * the scope to cover only UDP without IP options, as that is the
521 	 * most common case.
522 	 *
523 	 * Perform 1's complement adjustment of octets that got included/
524 	 * excluded in the hardware-calculated checksum value.  Ignore cases
525 	 * where the value includes the entire IPv4 header span, as the sum
526 	 * for those octets would already be 0 by the time we get here; IP
527 	 * has already performed its header checksum validation.  Also take
528 	 * care of any trailing bytes and subtract out their partial sum.
529 	 */
530 	if (ip->ip_p == IPPROTO_UDP && hlen == sizeof(struct ip) &&
531 	    (m->m_pkthdr.csum_flags &
532 	    (CSUM_DATA_VALID | CSUM_PARTIAL | CSUM_PSEUDO_HDR)) ==
533 	    (CSUM_DATA_VALID | CSUM_PARTIAL)) {
534 		uint32_t start = m->m_pkthdr.csum_rx_start;
535 		int32_t trailer = (m_pktlen(m) - ntohs(ip->ip_len));
536 		uint32_t swbytes = (uint32_t)trailer;
537 
538 		csum = m->m_pkthdr.csum_rx_val;
539 
540 		ASSERT(trailer >= 0);
541 		if ((start != 0 && start != hlen) || trailer != 0) {
542 #if BYTE_ORDER != BIG_ENDIAN
543 			if (start < hlen) {
544 				HTONS(ip->ip_len);
545 				HTONS(ip->ip_off);
546 			}
547 #endif /* BYTE_ORDER != BIG_ENDIAN */
548 			/* callee folds in sum */
549 			csum = m_adj_sum16(m, start, hlen,
550 			    (ip->ip_len - hlen), csum);
551 			if (hlen > start) {
552 				swbytes += (hlen - start);
553 			} else {
554 				swbytes += (start - hlen);
555 			}
556 #if BYTE_ORDER != BIG_ENDIAN
557 			if (start < hlen) {
558 				NTOHS(ip->ip_off);
559 				NTOHS(ip->ip_len);
560 			}
561 #endif /* BYTE_ORDER != BIG_ENDIAN */
562 		}
563 		csum_flags = m->m_pkthdr.csum_flags;
564 
565 		if (swbytes != 0) {
566 			udp_in_cksum_stats(swbytes);
567 		}
568 		if (trailer != 0) {
569 			m_adj(m, -trailer);
570 		}
571 	} else {
572 		csum = 0;
573 		csum_flags = 0;
574 	}
575 
576 	/* Invalidate checksum */
577 	m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID;
578 
579 	/* Strip off ip header */
580 	m->m_data += hlen;
581 	m->m_len -= hlen;
582 
583 	/* Create a new reassembly queue for this packet */
584 	if (*frag == NULL) {
585 		*frag = pool_get(&pf_frag_pl, PR_NOWAIT);
586 		if (*frag == NULL) {
587 			pf_flush_fragments();
588 			*frag = pool_get(&pf_frag_pl, PR_NOWAIT);
589 			if (*frag == NULL) {
590 				goto drop_fragment;
591 			}
592 		}
593 
594 		(*frag)->fr_flags = 0;
595 		(*frag)->fr_max = 0;
596 		(*frag)->fr_af = AF_INET;
597 		(*frag)->fr_srcx.v4addr = frent->fr_ip->ip_src;
598 		(*frag)->fr_dstx.v4addr = frent->fr_ip->ip_dst;
599 		(*frag)->fr_p = frent->fr_ip->ip_p;
600 		(*frag)->fr_id = frent->fr_ip->ip_id;
601 		(*frag)->fr_timeout = pf_time_second();
602 		if (csum_flags != 0) {
603 			(*frag)->fr_csum_flags = csum_flags;
604 			(*frag)->fr_csum = csum;
605 		}
606 		LIST_INIT(&(*frag)->fr_queue);
607 
608 		RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag);
609 		TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next);
610 
611 		/* We do not have a previous fragment */
612 		frep = NULL;
613 		goto insert;
614 	}
615 
616 	/*
617 	 * If this fragment contains similar checksum offload info
618 	 * as that of the existing ones, accumulate checksum.  Otherwise,
619 	 * invalidate checksum offload info for the entire datagram.
620 	 */
621 	if (csum_flags != 0 && csum_flags == (*frag)->fr_csum_flags) {
622 		(*frag)->fr_csum += csum;
623 	} else if ((*frag)->fr_csum_flags != 0) {
624 		(*frag)->fr_csum_flags = 0;
625 	}
626 
627 	/*
628 	 * Find a fragment after the current one:
629 	 *  - off contains the real shifted offset.
630 	 */
631 	LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) {
632 		if (FR_IP_OFF(frea) > off) {
633 			break;
634 		}
635 		frep = frea;
636 	}
637 
638 	VERIFY(frep != NULL || frea != NULL);
639 
640 	if (frep != NULL &&
641 	    FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl *
642 	    4 > off) {
643 		u_int16_t       precut;
644 
645 		precut = FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) -
646 		    frep->fr_ip->ip_hl * 4 - off;
647 		if (precut >= ip_len) {
648 			goto drop_fragment;
649 		}
650 		m_adj(frent->fr_m, precut);
651 		DPFPRINTF(("overlap -%d\n", precut));
652 		/* Enforce 8 byte boundaries */
653 		ip->ip_off = htons(ntohs(ip->ip_off) + (precut >> 3));
654 		off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
655 		ip_len -= precut;
656 		ip->ip_len = htons(ip_len);
657 	}
658 
659 	for (; frea != NULL && ip_len + off > FR_IP_OFF(frea);
660 	    frea = next) {
661 		u_int16_t       aftercut;
662 
663 		aftercut = ip_len + off - FR_IP_OFF(frea);
664 		DPFPRINTF(("adjust overlap %d\n", aftercut));
665 		if (aftercut < ntohs(frea->fr_ip->ip_len) - frea->fr_ip->ip_hl
666 		    * 4) {
667 			frea->fr_ip->ip_len =
668 			    htons(ntohs(frea->fr_ip->ip_len) - aftercut);
669 			frea->fr_ip->ip_off = htons(ntohs(frea->fr_ip->ip_off) +
670 			    (aftercut >> 3));
671 			m_adj(frea->fr_m, aftercut);
672 			break;
673 		}
674 
675 		/* This fragment is completely overlapped, lose it */
676 		next = LIST_NEXT(frea, fr_next);
677 		m_freem(frea->fr_m);
678 		LIST_REMOVE(frea, fr_next);
679 		pool_put(&pf_frent_pl, frea);
680 		pf_nfrents--;
681 	}
682 
683 insert:
684 	/* Update maximum data size */
685 	if ((*frag)->fr_max < fr_max) {
686 		(*frag)->fr_max = fr_max;
687 	}
688 	/* This is the last segment */
689 	if (!mff) {
690 		(*frag)->fr_flags |= PFFRAG_SEENLAST;
691 	}
692 
693 	if (frep == NULL) {
694 		LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next);
695 	} else {
696 		LIST_INSERT_AFTER(frep, frent, fr_next);
697 	}
698 
699 	/* Check if we are completely reassembled */
700 	if (!((*frag)->fr_flags & PFFRAG_SEENLAST)) {
701 		return NULL;
702 	}
703 
704 	/* Check if we have all the data */
705 	off = 0;
706 	for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) {
707 		next = LIST_NEXT(frep, fr_next);
708 
709 		off += ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 4;
710 		if (off < (*frag)->fr_max &&
711 		    (next == NULL || FR_IP_OFF(next) != off)) {
712 			DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
713 			    off, next == NULL ? -1 : FR_IP_OFF(next),
714 			    (*frag)->fr_max));
715 			return NULL;
716 		}
717 	}
718 	DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max));
719 	if (off < (*frag)->fr_max) {
720 		return NULL;
721 	}
722 
723 	/* We have all the data */
724 	frent = LIST_FIRST(&(*frag)->fr_queue);
725 	VERIFY(frent != NULL);
726 	if ((frent->fr_ip->ip_hl << 2) + off > IP_MAXPACKET) {
727 		DPFPRINTF(("drop: too big: %d\n", off));
728 		pf_free_fragment(*frag);
729 		*frag = NULL;
730 		return NULL;
731 	}
732 	next = LIST_NEXT(frent, fr_next);
733 
734 	/* Magic from ip_input */
735 	ip = frent->fr_ip;
736 	m = frent->fr_m;
737 	m2 = m->m_next;
738 	m->m_next = NULL;
739 	m_cat(m, m2);
740 	pool_put(&pf_frent_pl, frent);
741 	pf_nfrents--;
742 	for (frent = next; frent != NULL; frent = next) {
743 		next = LIST_NEXT(frent, fr_next);
744 
745 		m2 = frent->fr_m;
746 		pool_put(&pf_frent_pl, frent);
747 		pf_nfrents--;
748 		m_cat(m, m2);
749 	}
750 
751 	ip->ip_src = (*frag)->fr_srcx.v4addr;
752 	ip->ip_dst = (*frag)->fr_dstx.v4addr;
753 
754 	if ((*frag)->fr_csum_flags != 0) {
755 		csum = (*frag)->fr_csum;
756 
757 		ADDCARRY(csum);
758 
759 		m->m_pkthdr.csum_rx_val = csum;
760 		m->m_pkthdr.csum_rx_start = sizeof(struct ip);
761 		m->m_pkthdr.csum_flags = (*frag)->fr_csum_flags;
762 	} else if ((m->m_pkthdr.rcvif != NULL &&
763 	    m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) ||
764 	    (m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
765 		/* loopback checksums are always OK */
766 		m->m_pkthdr.csum_data = 0xffff;
767 		m->m_pkthdr.csum_flags =
768 		    CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
769 		    CSUM_IP_CHECKED | CSUM_IP_VALID;
770 	}
771 
772 	/* Remove from fragment queue */
773 	pf_remove_fragment(*frag);
774 	*frag = NULL;
775 
776 	hlen = ip->ip_hl << 2;
777 	ip->ip_len = htons(off + hlen);
778 	m->m_len += hlen;
779 	m->m_data -= hlen;
780 
781 	/* some debugging cruft by sklower, below, will go away soon */
782 	/* XXX this should be done elsewhere */
783 	if (m->m_flags & M_PKTHDR) {
784 		int plen = 0;
785 		for (m2 = m; m2; m2 = m2->m_next) {
786 			plen += m2->m_len;
787 		}
788 		m->m_pkthdr.len = plen;
789 	}
790 
791 	DPFPRINTF(("complete: 0x%llx(%d)\n",
792 	    (uint64_t)VM_KERNEL_ADDRPERM(m), ntohs(ip->ip_len)));
793 	return m;
794 
795 drop_fragment:
796 	/* Oops - fail safe - drop packet */
797 	pool_put(&pf_frent_pl, frent);
798 	pf_nfrents--;
799 	m_drop(m, DROPTAP_FLAG_DIR_IN, DROP_REASON_PF_BAD_FRAGMENT, NULL, 0);
800 	return NULL;
801 }
802 
803 static __attribute__((noinline)) struct mbuf *
pf_fragcache(struct mbuf ** m0,struct ip * h,struct pf_fragment ** frag,int mff,int drop,int * nomem)804 pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
805     int drop, int *nomem)
806 {
807 	struct mbuf             *__single m = *m0;
808 	struct pf_frcache       *__single frp, *__single fra, *__single cur = NULL;
809 	int                      ip_len = ntohs(h->ip_len) - (h->ip_hl << 2);
810 	u_int16_t                off = ntohs(h->ip_off) << 3;
811 	u_int16_t                fr_max = ip_len + off;
812 	int                      hosed = 0;
813 
814 	VERIFY(*frag == NULL || !BUFFER_FRAGMENTS(*frag));
815 
816 	/* Create a new range queue for this packet */
817 	if (*frag == NULL) {
818 		*frag = pool_get(&pf_cache_pl, PR_NOWAIT);
819 		if (*frag == NULL) {
820 			pf_flush_fragments();
821 			*frag = pool_get(&pf_cache_pl, PR_NOWAIT);
822 			if (*frag == NULL) {
823 				goto no_mem;
824 			}
825 		}
826 
827 		/* Get an entry for the queue */
828 		cur = pool_get(&pf_cent_pl, PR_NOWAIT);
829 		if (cur == NULL) {
830 			pool_put(&pf_cache_pl, *frag);
831 			*frag = NULL;
832 			goto no_mem;
833 		}
834 		pf_ncache++;
835 
836 		(*frag)->fr_flags = PFFRAG_NOBUFFER;
837 		(*frag)->fr_max = 0;
838 		(*frag)->fr_af = AF_INET;
839 		(*frag)->fr_srcx.v4addr = h->ip_src;
840 		(*frag)->fr_dstx.v4addr = h->ip_dst;
841 		(*frag)->fr_p = h->ip_p;
842 		(*frag)->fr_id = h->ip_id;
843 		(*frag)->fr_timeout = pf_time_second();
844 
845 		cur->fr_off = off;
846 		cur->fr_end = fr_max;
847 		LIST_INIT(&(*frag)->fr_cache);
848 		LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next);
849 
850 		RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag);
851 		TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next);
852 
853 		DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off,
854 		    fr_max));
855 
856 		goto pass;
857 	}
858 
859 	/*
860 	 * Find a fragment after the current one:
861 	 *  - off contains the real shifted offset.
862 	 */
863 	frp = NULL;
864 	LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) {
865 		if (fra->fr_off > off) {
866 			break;
867 		}
868 		frp = fra;
869 	}
870 
871 	VERIFY(frp != NULL || fra != NULL);
872 
873 	if (frp != NULL) {
874 		int     precut;
875 
876 		precut = frp->fr_end - off;
877 		if (precut >= ip_len) {
878 			/* Fragment is entirely a duplicate */
879 			DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n",
880 			    h->ip_id, frp->fr_off, frp->fr_end, off, fr_max));
881 			goto drop_fragment;
882 		}
883 		if (precut == 0) {
884 			/* They are adjacent.  Fixup cache entry */
885 			DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n",
886 			    h->ip_id, frp->fr_off, frp->fr_end, off, fr_max));
887 			frp->fr_end = fr_max;
888 		} else if (precut > 0) {
889 			/*
890 			 * The first part of this payload overlaps with a
891 			 * fragment that has already been passed.
892 			 * Need to trim off the first part of the payload.
893 			 * But to do so easily, we need to create another
894 			 * mbuf to throw the original header into.
895 			 */
896 
897 			DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n",
898 			    h->ip_id, precut, frp->fr_off, frp->fr_end, off,
899 			    fr_max));
900 
901 			off += precut;
902 			fr_max -= precut;
903 			/* Update the previous frag to encompass this one */
904 			frp->fr_end = fr_max;
905 
906 			if (!drop) {
907 				/*
908 				 * XXX Optimization opportunity
909 				 * This is a very heavy way to trim the payload.
910 				 * we could do it much faster by diddling mbuf
911 				 * internals but that would be even less legible
912 				 * than this mbuf magic.  For my next trick,
913 				 * I'll pull a rabbit out of my laptop.
914 				 */
915 				*m0 = m_copym(m, 0, h->ip_hl << 2, M_NOWAIT);
916 				if (*m0 == NULL) {
917 					goto no_mem;
918 				}
919 				VERIFY((*m0)->m_next == NULL);
920 				m_adj(m, precut + (h->ip_hl << 2));
921 				m_cat(*m0, m);
922 				m = *m0;
923 				if (m->m_flags & M_PKTHDR) {
924 					int plen = 0;
925 					struct mbuf *t;
926 					for (t = m; t; t = t->m_next) {
927 						plen += t->m_len;
928 					}
929 					m->m_pkthdr.len = plen;
930 				}
931 
932 
933 				h = mtod(m, struct ip *);
934 
935 
936 				VERIFY((int)m->m_len ==
937 				    ntohs(h->ip_len) - precut);
938 				h->ip_off = htons(ntohs(h->ip_off) +
939 				    (precut >> 3));
940 				h->ip_len = htons(ntohs(h->ip_len) - precut);
941 			} else {
942 				hosed++;
943 			}
944 		} else {
945 			/* There is a gap between fragments */
946 
947 			DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n",
948 			    h->ip_id, -precut, frp->fr_off, frp->fr_end, off,
949 			    fr_max));
950 
951 			cur = pool_get(&pf_cent_pl, PR_NOWAIT);
952 			if (cur == NULL) {
953 				goto no_mem;
954 			}
955 			pf_ncache++;
956 
957 			cur->fr_off = off;
958 			cur->fr_end = fr_max;
959 			LIST_INSERT_AFTER(frp, cur, fr_next);
960 		}
961 	}
962 
963 	if (fra != NULL) {
964 		int     aftercut;
965 		int     merge = 0;
966 
967 		aftercut = fr_max - fra->fr_off;
968 		if (aftercut == 0) {
969 			/* Adjacent fragments */
970 			DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n",
971 			    h->ip_id, off, fr_max, fra->fr_off, fra->fr_end));
972 			fra->fr_off = off;
973 			merge = 1;
974 		} else if (aftercut > 0) {
975 			/* Need to chop off the tail of this fragment */
976 			DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n",
977 			    h->ip_id, aftercut, off, fr_max, fra->fr_off,
978 			    fra->fr_end));
979 			fra->fr_off = off;
980 			fr_max -= aftercut;
981 
982 			merge = 1;
983 
984 			if (!drop) {
985 				m_adj(m, -aftercut);
986 				if (m->m_flags & M_PKTHDR) {
987 					int plen = 0;
988 					struct mbuf *t;
989 					for (t = m; t; t = t->m_next) {
990 						plen += t->m_len;
991 					}
992 					m->m_pkthdr.len = plen;
993 				}
994 				h = mtod(m, struct ip *);
995 				VERIFY((int)m->m_len ==
996 				    ntohs(h->ip_len) - aftercut);
997 				h->ip_len = htons(ntohs(h->ip_len) - aftercut);
998 			} else {
999 				hosed++;
1000 			}
1001 		} else if (frp == NULL) {
1002 			/* There is a gap between fragments */
1003 			DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n",
1004 			    h->ip_id, -aftercut, off, fr_max, fra->fr_off,
1005 			    fra->fr_end));
1006 
1007 			cur = pool_get(&pf_cent_pl, PR_NOWAIT);
1008 			if (cur == NULL) {
1009 				goto no_mem;
1010 			}
1011 			pf_ncache++;
1012 
1013 			cur->fr_off = off;
1014 			cur->fr_end = fr_max;
1015 			LIST_INSERT_BEFORE(fra, cur, fr_next);
1016 		}
1017 
1018 
1019 		/* Need to glue together two separate fragment descriptors */
1020 		if (merge) {
1021 			if (cur && fra->fr_off <= cur->fr_end) {
1022 				/* Need to merge in a previous 'cur' */
1023 				DPFPRINTF(("fragcache[%d]: adjacent(merge "
1024 				    "%d-%d) %d-%d (%d-%d)\n",
1025 				    h->ip_id, cur->fr_off, cur->fr_end, off,
1026 				    fr_max, fra->fr_off, fra->fr_end));
1027 				fra->fr_off = cur->fr_off;
1028 				LIST_REMOVE(cur, fr_next);
1029 				pool_put(&pf_cent_pl, cur);
1030 				pf_ncache--;
1031 				cur = NULL;
1032 			} else if (frp && fra->fr_off <= frp->fr_end) {
1033 				/* Need to merge in a modified 'frp' */
1034 				VERIFY(cur == NULL);
1035 				DPFPRINTF(("fragcache[%d]: adjacent(merge "
1036 				    "%d-%d) %d-%d (%d-%d)\n",
1037 				    h->ip_id, frp->fr_off, frp->fr_end, off,
1038 				    fr_max, fra->fr_off, fra->fr_end));
1039 				fra->fr_off = frp->fr_off;
1040 				LIST_REMOVE(frp, fr_next);
1041 				pool_put(&pf_cent_pl, frp);
1042 				pf_ncache--;
1043 				frp = NULL;
1044 			}
1045 		}
1046 	}
1047 
1048 	if (hosed) {
1049 		/*
1050 		 * We must keep tracking the overall fragment even when
1051 		 * we're going to drop it anyway so that we know when to
1052 		 * free the overall descriptor.  Thus we drop the frag late.
1053 		 */
1054 		goto drop_fragment;
1055 	}
1056 
1057 
1058 pass:
1059 	/* Update maximum data size */
1060 	if ((*frag)->fr_max < fr_max) {
1061 		(*frag)->fr_max = fr_max;
1062 	}
1063 
1064 	/* This is the last segment */
1065 	if (!mff) {
1066 		(*frag)->fr_flags |= PFFRAG_SEENLAST;
1067 	}
1068 
1069 	/* Check if we are completely reassembled */
1070 	if (((*frag)->fr_flags & PFFRAG_SEENLAST) &&
1071 	    LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 &&
1072 	    LIST_FIRST(&(*frag)->fr_cache)->fr_end == (*frag)->fr_max) {
1073 		/* Remove from fragment queue */
1074 		DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id,
1075 		    (*frag)->fr_max));
1076 		pf_free_fragment(*frag);
1077 		*frag = NULL;
1078 	}
1079 
1080 	return m;
1081 
1082 no_mem:
1083 	*nomem = 1;
1084 
1085 	/* Still need to pay attention to !IP_MF */
1086 	if (!mff && *frag != NULL) {
1087 		(*frag)->fr_flags |= PFFRAG_SEENLAST;
1088 	}
1089 
1090 	m_drop(m, DROPTAP_FLAG_DIR_IN,
1091 	    DROP_REASON_PF_MEM_ALLOC, NULL, 0);
1092 	return NULL;
1093 
1094 drop_fragment:
1095 
1096 	/* Still need to pay attention to !IP_MF */
1097 	if (!mff && *frag != NULL) {
1098 		(*frag)->fr_flags |= PFFRAG_SEENLAST;
1099 	}
1100 
1101 	if (drop) {
1102 		/* This fragment has been deemed bad.  Don't reass */
1103 		if (((*frag)->fr_flags & PFFRAG_DROP) == 0) {
1104 			DPFPRINTF(("fragcache[%d]: dropping overall fragment\n",
1105 			    h->ip_id));
1106 		}
1107 		(*frag)->fr_flags |= PFFRAG_DROP;
1108 	}
1109 
1110 	m_drop(m, DROPTAP_FLAG_DIR_IN,
1111 	    DROP_REASON_PF_BAD_FRAGMENT, NULL, 0);
1112 	return NULL;
1113 }
1114 
1115 #define FR_IP6_OFF(fr) \
1116 	(ntohs((fr)->fr_ip6f_opt.ip6f_offlg & IP6F_OFF_MASK))
1117 #define FR_IP6_PLEN(fr) (ntohs((fr)->fr_ip6->ip6_plen))
1118 struct mbuf *
pf_reassemble6(struct mbuf ** m0,struct pf_fragment ** frag,struct pf_frent * frent,int mff)1119 pf_reassemble6(struct mbuf **m0, struct pf_fragment **frag,
1120     struct pf_frent *frent, int mff)
1121 {
1122 	struct mbuf *__single m, *__single m2;
1123 	struct pf_frent *__single frea, *__single frep, *__single next;
1124 	struct ip6_hdr *__single ip6;
1125 	struct ip6_frag *__single ip6f;
1126 	int plen, off, fr_max, pktlen;
1127 	uint32_t uoff, csum, csum_flags;
1128 
1129 	VERIFY(*frag == NULL || BUFFER_FRAGMENTS(*frag));
1130 	m = *m0;
1131 	frep = NULL;
1132 	ip6 = frent->fr_ip6;
1133 	ip6f = &frent->fr_ip6f_opt;
1134 	off = FR_IP6_OFF(frent);
1135 	uoff = frent->fr_ip6f_hlen;
1136 	plen = FR_IP6_PLEN(frent);
1137 	fr_max = off + plen - (frent->fr_ip6f_hlen - sizeof(*ip6));
1138 	pktlen = plen + sizeof(*ip6);
1139 
1140 	DPFPRINTF(("0x%llx IPv6 frag plen %u off %u fr_ip6f_hlen %u "
1141 	    "fr_max %u m_len %u\n", (uint64_t)VM_KERNEL_ADDRPERM(m), plen, off,
1142 	    frent->fr_ip6f_hlen, fr_max, m->m_len));
1143 
1144 	/*
1145 	 * Leverage partial checksum offload for simple UDP/IP fragments,
1146 	 * as that is the most common case.
1147 	 *
1148 	 * Perform 1's complement adjustment of octets that got included/
1149 	 * excluded in the hardware-calculated checksum value.  Also take
1150 	 * care of any trailing bytes and subtract out their partial sum.
1151 	 */
1152 	if (ip6f->ip6f_nxt == IPPROTO_UDP &&
1153 	    uoff == (sizeof(*ip6) + sizeof(*ip6f)) &&
1154 	    (m->m_pkthdr.csum_flags &
1155 	    (CSUM_DATA_VALID | CSUM_PARTIAL | CSUM_PSEUDO_HDR)) ==
1156 	    (CSUM_DATA_VALID | CSUM_PARTIAL)) {
1157 		uint32_t start = m->m_pkthdr.csum_rx_start;
1158 		uint32_t ip_len = (sizeof(*ip6) + ntohs(ip6->ip6_plen));
1159 		int32_t trailer = (m_pktlen(m) - ip_len);
1160 		uint32_t swbytes = (uint32_t)trailer;
1161 
1162 		csum = m->m_pkthdr.csum_rx_val;
1163 
1164 		ASSERT(trailer >= 0);
1165 		if (start != uoff || trailer != 0) {
1166 			uint16_t s = 0, d = 0;
1167 
1168 			if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) {
1169 				s = ip6->ip6_src.s6_addr16[1];
1170 				ip6->ip6_src.s6_addr16[1] = 0;
1171 			}
1172 			if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) {
1173 				d = ip6->ip6_dst.s6_addr16[1];
1174 				ip6->ip6_dst.s6_addr16[1] = 0;
1175 			}
1176 
1177 			/* callee folds in sum */
1178 			csum = m_adj_sum16(m, start, uoff,
1179 			    (ip_len - uoff), csum);
1180 			if (uoff > start) {
1181 				swbytes += (uoff - start);
1182 			} else {
1183 				swbytes += (start - uoff);
1184 			}
1185 
1186 			if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) {
1187 				ip6->ip6_src.s6_addr16[1] = s;
1188 			}
1189 			if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) {
1190 				ip6->ip6_dst.s6_addr16[1] = d;
1191 			}
1192 		}
1193 		csum_flags = m->m_pkthdr.csum_flags;
1194 
1195 		if (swbytes != 0) {
1196 			udp_in6_cksum_stats(swbytes);
1197 		}
1198 		if (trailer != 0) {
1199 			m_adj(m, -trailer);
1200 		}
1201 	} else {
1202 		csum = 0;
1203 		csum_flags = 0;
1204 	}
1205 
1206 	/* Invalidate checksum */
1207 	m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID;
1208 
1209 	/* strip off headers up to the fragment payload */
1210 	m->m_data += frent->fr_ip6f_hlen;
1211 	m->m_len -= frent->fr_ip6f_hlen;
1212 
1213 	/* Create a new reassembly queue for this packet */
1214 	if (*frag == NULL) {
1215 		*frag = pool_get(&pf_frag_pl, PR_NOWAIT);
1216 		if (*frag == NULL) {
1217 			pf_flush_fragments();
1218 			*frag = pool_get(&pf_frag_pl, PR_NOWAIT);
1219 			if (*frag == NULL) {
1220 				goto drop_fragment;
1221 			}
1222 		}
1223 
1224 		(*frag)->fr_flags = 0;
1225 		(*frag)->fr_max = 0;
1226 		(*frag)->fr_ip6_maxlen = pktlen;
1227 		(*frag)->fr_af = AF_INET6;
1228 		(*frag)->fr_srcx.v6addr = frent->fr_ip6->ip6_src;
1229 		(*frag)->fr_dstx.v6addr = frent->fr_ip6->ip6_dst;
1230 		(*frag)->fr_p = frent->fr_ip6f_opt.ip6f_nxt;
1231 		(*frag)->fr_id6 = frent->fr_ip6f_opt.ip6f_ident;
1232 		(*frag)->fr_timeout = pf_time_second();
1233 		if (csum_flags != 0) {
1234 			(*frag)->fr_csum_flags = csum_flags;
1235 			(*frag)->fr_csum = csum;
1236 		}
1237 		LIST_INIT(&(*frag)->fr_queue);
1238 
1239 		RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag);
1240 		TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next);
1241 
1242 		/* We do not have a previous fragment */
1243 		frep = NULL;
1244 		goto insert;
1245 	}
1246 
1247 	/* Remember maximum fragment len for refragmentation */
1248 	if (pktlen > (*frag)->fr_ip6_maxlen) {
1249 		(*frag)->fr_ip6_maxlen = pktlen;
1250 	}
1251 	/*
1252 	 * If this fragment contains similar checksum offload info
1253 	 * as that of the existing ones, accumulate checksum.  Otherwise,
1254 	 * invalidate checksum offload info for the entire datagram.
1255 	 */
1256 	if (csum_flags != 0 && csum_flags == (*frag)->fr_csum_flags) {
1257 		(*frag)->fr_csum += csum;
1258 	} else if ((*frag)->fr_csum_flags != 0) {
1259 		(*frag)->fr_csum_flags = 0;
1260 	}
1261 
1262 	/*
1263 	 * Find a fragment after the current one:
1264 	 *  - off contains the real shifted offset.
1265 	 */
1266 	LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) {
1267 		if (FR_IP6_OFF(frea) > off) {
1268 			break;
1269 		}
1270 		frep = frea;
1271 	}
1272 
1273 	VERIFY(frep != NULL || frea != NULL);
1274 
1275 	if (frep != NULL &&
1276 	    FR_IP6_OFF(frep) + FR_IP6_PLEN(frep) - frep->fr_ip6f_hlen > off) {
1277 		u_int16_t precut;
1278 
1279 		precut = FR_IP6_OFF(frep) + FR_IP6_PLEN(frep) -
1280 		    frep->fr_ip6f_hlen - off;
1281 		if (precut >= plen) {
1282 			goto drop_fragment;
1283 		}
1284 		m_adj(frent->fr_m, precut);
1285 		DPFPRINTF(("overlap -%d\n", precut));
1286 		/* Enforce 8 byte boundaries */
1287 		frent->fr_ip6f_opt.ip6f_offlg =
1288 		    htons(ntohs(frent->fr_ip6f_opt.ip6f_offlg) +
1289 		    (precut >> 3));
1290 		off = FR_IP6_OFF(frent);
1291 		plen -= precut;
1292 		ip6->ip6_plen = htons(plen);
1293 	}
1294 
1295 	for (; frea != NULL && plen + off > FR_IP6_OFF(frea); frea = next) {
1296 		u_int16_t       aftercut;
1297 
1298 		aftercut = plen + off - FR_IP6_OFF(frea);
1299 		DPFPRINTF(("adjust overlap %d\n", aftercut));
1300 		if (aftercut < FR_IP6_PLEN(frea) - frea->fr_ip6f_hlen) {
1301 			frea->fr_ip6->ip6_plen = htons(FR_IP6_PLEN(frea) -
1302 			    aftercut);
1303 			frea->fr_ip6f_opt.ip6f_offlg =
1304 			    htons(ntohs(frea->fr_ip6f_opt.ip6f_offlg) +
1305 			    (aftercut >> 3));
1306 			m_adj(frea->fr_m, aftercut);
1307 			break;
1308 		}
1309 
1310 		/* This fragment is completely overlapped, lose it */
1311 		next = LIST_NEXT(frea, fr_next);
1312 		m_freem(frea->fr_m);
1313 		LIST_REMOVE(frea, fr_next);
1314 		pool_put(&pf_frent_pl, frea);
1315 		pf_nfrents--;
1316 	}
1317 
1318 insert:
1319 	/* Update maximum data size */
1320 	if ((*frag)->fr_max < fr_max) {
1321 		(*frag)->fr_max = fr_max;
1322 	}
1323 	/* This is the last segment */
1324 	if (!mff) {
1325 		(*frag)->fr_flags |= PFFRAG_SEENLAST;
1326 	}
1327 
1328 	if (frep == NULL) {
1329 		LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next);
1330 	} else {
1331 		LIST_INSERT_AFTER(frep, frent, fr_next);
1332 	}
1333 
1334 	/* Check if we are completely reassembled */
1335 	if (!((*frag)->fr_flags & PFFRAG_SEENLAST)) {
1336 		return NULL;
1337 	}
1338 
1339 	/* Check if we have all the data */
1340 	off = 0;
1341 	for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) {
1342 		next = LIST_NEXT(frep, fr_next);
1343 		off += FR_IP6_PLEN(frep) - (frent->fr_ip6f_hlen - sizeof *ip6);
1344 		DPFPRINTF(("frep at %d, next %d, max %d\n",
1345 		    off, next == NULL ? -1 : FR_IP6_OFF(next),
1346 		    (*frag)->fr_max));
1347 		if (off < (*frag)->fr_max &&
1348 		    (next == NULL || FR_IP6_OFF(next) != off)) {
1349 			DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
1350 			    off, next == NULL ? -1 : FR_IP6_OFF(next),
1351 			    (*frag)->fr_max));
1352 			return NULL;
1353 		}
1354 	}
1355 	DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max));
1356 	if (off < (*frag)->fr_max) {
1357 		return NULL;
1358 	}
1359 
1360 	/* We have all the data */
1361 	frent = LIST_FIRST(&(*frag)->fr_queue);
1362 	VERIFY(frent != NULL);
1363 	if (frent->fr_ip6f_hlen + off > IP_MAXPACKET) {
1364 		DPFPRINTF(("drop: too big: %d\n", off));
1365 		pf_free_fragment(*frag);
1366 		*frag = NULL;
1367 		return NULL;
1368 	}
1369 
1370 	ASSERT(*frag != NULL);
1371 	ASSERT(frent != NULL);
1372 	next = LIST_NEXT(frent, fr_next);
1373 	if (next == NULL) {
1374 		DPFPRINTF(("drop: atomic fragment\n"));
1375 		pf_free_fragment(*frag);
1376 		*frag = NULL;
1377 		return NULL;
1378 	}
1379 
1380 	/* retrieve the values to be filled in to reassembled tag */
1381 	uint16_t hdrlen, unfragpartlen, extoff, maxlen;
1382 	uint32_t id;
1383 
1384 	/* Get total extension header length from the first fragment */
1385 	hdrlen = frent->fr_ip6f_hlen - sizeof(struct ip6_frag);
1386 	/*
1387 	 * Get total extension header length of per-fragment headers from the
1388 	 * subsequent fragment.
1389 	 */
1390 	unfragpartlen = next->fr_ip6f_hlen - sizeof(struct ip6_frag);
1391 	extoff = frent->fr_ip6f_extoff;
1392 	maxlen = (*frag)->fr_ip6_maxlen;
1393 	id = (*frag)->fr_id6;
1394 
1395 	ip6 = frent->fr_ip6;
1396 	ip6->ip6_nxt = (*frag)->fr_p;
1397 	ip6->ip6_plen = htons(off);
1398 	ip6->ip6_src = (*frag)->fr_srcx.v6addr;
1399 	ip6->ip6_dst = (*frag)->fr_dstx.v6addr;
1400 
1401 	if ((*frag)->fr_csum_flags != 0) {
1402 		csum = (*frag)->fr_csum;
1403 
1404 		ADDCARRY(csum);
1405 
1406 		m->m_pkthdr.csum_rx_val = csum;
1407 		m->m_pkthdr.csum_rx_start = sizeof(struct ip6_hdr);
1408 		m->m_pkthdr.csum_flags = (*frag)->fr_csum_flags;
1409 	} else if ((m->m_pkthdr.rcvif != NULL &&
1410 	    m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) ||
1411 	    (m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
1412 		/* loopback checksums are always OK */
1413 		m->m_pkthdr.csum_data = 0xffff;
1414 		m->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1415 	}
1416 
1417 	/* Remove from fragment queue */
1418 	pf_remove_fragment(*frag);
1419 	*frag = NULL;
1420 
1421 	m = frent->fr_m;
1422 	m->m_len += sizeof(struct ip6_hdr);
1423 	m->m_data -= sizeof(struct ip6_hdr);
1424 	memmove(m_mtod_current(m), ip6, sizeof(struct ip6_hdr));
1425 
1426 	next = LIST_NEXT(frent, fr_next);
1427 	pool_put(&pf_frent_pl, frent);
1428 	pf_nfrents--;
1429 	for (frent = next; next != NULL; frent = next) {
1430 		m2 = frent->fr_m;
1431 
1432 		m_cat(m, m2);
1433 		next = LIST_NEXT(frent, fr_next);
1434 		pool_put(&pf_frent_pl, frent);
1435 		pf_nfrents--;
1436 	}
1437 
1438 	/* XXX this should be done elsewhere */
1439 	if (m->m_flags & M_PKTHDR) {
1440 		int len = 0;
1441 		for (m2 = m; m2; m2 = m2->m_next) {
1442 			len += m2->m_len;
1443 		}
1444 		m->m_pkthdr.len = len;
1445 	}
1446 
1447 	DPFPRINTF(("complete: 0x%llx ip6_plen %d m_pkthdr.len %d\n",
1448 	    (uint64_t)VM_KERNEL_ADDRHASH(m), ntohs(ip6->ip6_plen),
1449 	    m->m_pkthdr.len));
1450 
1451 	/* Add the reassembled tag */
1452 	struct m_tag *mtag;
1453 	struct pf_fragment_tag *ftag;
1454 	mtag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_PF_REASS,
1455 	    sizeof(*ftag), M_NOWAIT, m);
1456 	if (mtag == NULL) {
1457 		/* XXX: add stats */
1458 		m_drop(m, DROPTAP_FLAG_DIR_IN, DROP_REASON_PF_MEM_ALLOC, NULL, 0);
1459 		return NULL;
1460 	}
1461 	ftag = (struct pf_fragment_tag *)mtag->m_tag_data;
1462 	ftag->ft_hdrlen = hdrlen;
1463 	ftag->ft_unfragpartlen = unfragpartlen;
1464 	ftag->ft_extoff = extoff;
1465 	ftag->ft_maxlen = maxlen;
1466 	ftag->ft_id = id;
1467 	m_tag_prepend(m, mtag);
1468 
1469 	struct pf_mtag *pftag = pf_get_mtag(m);
1470 	ASSERT(pftag != NULL);
1471 	pftag->pftag_flags |= PF_TAG_REASSEMBLED;
1472 	return m;
1473 
1474 drop_fragment:
1475 	/* Oops - fail safe - drop packet */
1476 	pool_put(&pf_frent_pl, frent);
1477 	--pf_nfrents;
1478 	m_drop(m, DROPTAP_FLAG_DIR_IN, DROP_REASON_PF_BAD_FRAGMENT, NULL, 0);
1479 	return NULL;
1480 }
1481 
1482 static __attribute__((noinline)) struct mbuf *
pf_frag6cache(struct mbuf ** m0,struct ip6_hdr * h,struct ip6_frag * fh,struct pf_fragment ** frag,int hlen,int mff,int drop,int * nomem)1483 pf_frag6cache(struct mbuf **m0, struct ip6_hdr *h, struct ip6_frag *fh,
1484     struct pf_fragment **frag, int hlen, int mff, int drop, int *nomem)
1485 {
1486 	struct mbuf *__single m = *m0;
1487 	u_int16_t plen, off, fr_max;
1488 	struct pf_frcache *__single frp, *__single fra, *__single cur = NULL;
1489 	int hosed = 0;
1490 
1491 	VERIFY(*frag == NULL || !BUFFER_FRAGMENTS(*frag));
1492 	m = *m0;
1493 	off = ntohs(fh->ip6f_offlg & IP6F_OFF_MASK);
1494 	plen = ntohs(h->ip6_plen) - (hlen - sizeof *h);
1495 
1496 	/*
1497 	 * Apple Modification: [email protected]. The hlen, being passed
1498 	 * into this function Includes all the headers associated with
1499 	 * the packet, and may include routing headers, so to get to
1500 	 * the data payload as stored in the original IPv6 header we need
1501 	 * to subtract al those headers and the IP header.
1502 	 *
1503 	 * The 'max' local variable should also contain the offset from the start
1504 	 * of the reassembled packet to the octet just past the end of the octets
1505 	 * in the current fragment where:
1506 	 * - 'off' is the offset from the start of the reassembled packet to the
1507 	 *    first octet in the fragment,
1508 	 * - 'plen' is the length of the "payload data length" Excluding all the
1509 	 *   IPv6 headers of the fragment.
1510 	 * - 'hlen' is computed in pf_normalize_ip6() as the offset from the start
1511 	 *   of the IPv6 packet to the beginning of the data.
1512 	 */
1513 	fr_max = off + plen;
1514 
1515 	DPFPRINTF(("0x%llx plen %u off %u fr_max %u\n",
1516 	    (uint64_t)VM_KERNEL_ADDRHASH(m), plen, off, fr_max));
1517 
1518 	/* Create a new range queue for this packet */
1519 	if (*frag == NULL) {
1520 		*frag = pool_get(&pf_cache_pl, PR_NOWAIT);
1521 		if (*frag == NULL) {
1522 			pf_flush_fragments();
1523 			*frag = pool_get(&pf_cache_pl, PR_NOWAIT);
1524 			if (*frag == NULL) {
1525 				goto no_mem;
1526 			}
1527 		}
1528 
1529 		/* Get an entry for the queue */
1530 		cur = pool_get(&pf_cent_pl, PR_NOWAIT);
1531 		if (cur == NULL) {
1532 			pool_put(&pf_cache_pl, *frag);
1533 			*frag = NULL;
1534 			goto no_mem;
1535 		}
1536 		pf_ncache++;
1537 
1538 		(*frag)->fr_flags = PFFRAG_NOBUFFER;
1539 		(*frag)->fr_max = 0;
1540 		(*frag)->fr_af = AF_INET6;
1541 		(*frag)->fr_srcx.v6addr = h->ip6_src;
1542 		(*frag)->fr_dstx.v6addr = h->ip6_dst;
1543 		(*frag)->fr_p = fh->ip6f_nxt;
1544 		(*frag)->fr_id6 = fh->ip6f_ident;
1545 		(*frag)->fr_timeout = pf_time_second();
1546 
1547 		cur->fr_off = off;
1548 		cur->fr_end = fr_max;
1549 		LIST_INIT(&(*frag)->fr_cache);
1550 		LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next);
1551 
1552 		RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag);
1553 		TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next);
1554 
1555 		DPFPRINTF(("frag6cache[%d]: new %d-%d\n", ntohl(fh->ip6f_ident),
1556 		    off, fr_max));
1557 
1558 		goto pass;
1559 	}
1560 
1561 	/*
1562 	 * Find a fragment after the current one:
1563 	 *  - off contains the real shifted offset.
1564 	 */
1565 	frp = NULL;
1566 	LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) {
1567 		if (fra->fr_off > off) {
1568 			break;
1569 		}
1570 		frp = fra;
1571 	}
1572 
1573 	VERIFY(frp != NULL || fra != NULL);
1574 
1575 	if (frp != NULL) {
1576 		int precut;
1577 
1578 		precut = frp->fr_end - off;
1579 		if (precut >= plen) {
1580 			/* Fragment is entirely a duplicate */
1581 			DPFPRINTF(("frag6cache[%u]: dead (%d-%d) %d-%d\n",
1582 			    ntohl(fh->ip6f_ident), frp->fr_off, frp->fr_end,
1583 			    off, fr_max));
1584 			goto drop_fragment;
1585 		}
1586 		if (precut == 0) {
1587 			/* They are adjacent.  Fixup cache entry */
1588 			DPFPRINTF(("frag6cache[%u]: adjacent (%d-%d) %d-%d\n",
1589 			    ntohl(fh->ip6f_ident), frp->fr_off, frp->fr_end,
1590 			    off, fr_max));
1591 			frp->fr_end = fr_max;
1592 		} else if (precut > 0) {
1593 			/* The first part of this payload overlaps with a
1594 			 * fragment that has already been passed.
1595 			 * Need to trim off the first part of the payload.
1596 			 * But to do so easily, we need to create another
1597 			 * mbuf to throw the original header into.
1598 			 */
1599 
1600 			DPFPRINTF(("frag6cache[%u]: chop %d (%d-%d) %d-%d\n",
1601 			    ntohl(fh->ip6f_ident), precut, frp->fr_off,
1602 			    frp->fr_end, off, fr_max));
1603 
1604 			off += precut;
1605 			fr_max -= precut;
1606 			/* Update the previous frag to encompass this one */
1607 			frp->fr_end = fr_max;
1608 
1609 			if (!drop) {
1610 				/* XXX Optimization opportunity
1611 				 * This is a very heavy way to trim the payload.
1612 				 * we could do it much faster by diddling mbuf
1613 				 * internals but that would be even less legible
1614 				 * than this mbuf magic.  For my next trick,
1615 				 * I'll pull a rabbit out of my laptop.
1616 				 */
1617 				*m0 = m_copym(m, 0, hlen, M_NOWAIT);
1618 				if (*m0 == NULL) {
1619 					goto no_mem;
1620 				}
1621 				VERIFY((*m0)->m_next == NULL);
1622 				m_adj(m, precut + hlen);
1623 				m_cat(*m0, m);
1624 				m = *m0;
1625 				if (m->m_flags & M_PKTHDR) {
1626 					int pktlen = 0;
1627 					struct mbuf *t;
1628 					for (t = m; t; t = t->m_next) {
1629 						pktlen += t->m_len;
1630 					}
1631 					m->m_pkthdr.len = pktlen;
1632 				}
1633 
1634 				h = mtod(m, struct ip6_hdr *);
1635 
1636 				VERIFY((int)m->m_len ==
1637 				    ntohs(h->ip6_plen) - precut);
1638 				fh->ip6f_offlg &= ~IP6F_OFF_MASK;
1639 				fh->ip6f_offlg |=
1640 				    htons(ntohs(fh->ip6f_offlg & IP6F_OFF_MASK)
1641 				    + (precut >> 3));
1642 				h->ip6_plen = htons(ntohs(h->ip6_plen) -
1643 				    precut);
1644 			} else {
1645 				hosed++;
1646 			}
1647 		} else {
1648 			/* There is a gap between fragments */
1649 
1650 			DPFPRINTF(("frag6cache[%u]: gap %d (%d-%d) %d-%d\n",
1651 			    ntohl(fh->ip6f_ident), -precut, frp->fr_off,
1652 			    frp->fr_end, off, fr_max));
1653 
1654 			cur = pool_get(&pf_cent_pl, PR_NOWAIT);
1655 			if (cur == NULL) {
1656 				goto no_mem;
1657 			}
1658 			pf_ncache++;
1659 
1660 			cur->fr_off = off;
1661 			cur->fr_end = fr_max;
1662 			LIST_INSERT_AFTER(frp, cur, fr_next);
1663 		}
1664 	}
1665 
1666 	if (fra != NULL) {
1667 		int     aftercut;
1668 		int     merge = 0;
1669 
1670 		aftercut = fr_max - fra->fr_off;
1671 		if (aftercut == 0) {
1672 			/* Adjacent fragments */
1673 			DPFPRINTF(("frag6cache[%u]: adjacent %d-%d (%d-%d)\n",
1674 			    ntohl(fh->ip6f_ident), off, fr_max, fra->fr_off,
1675 			    fra->fr_end));
1676 			fra->fr_off = off;
1677 			merge = 1;
1678 		} else if (aftercut > 0) {
1679 			/* Need to chop off the tail of this fragment */
1680 			DPFPRINTF(("frag6cache[%u]: chop %d %d-%d (%d-%d)\n",
1681 			    ntohl(fh->ip6f_ident), aftercut, off, fr_max,
1682 			    fra->fr_off, fra->fr_end));
1683 			fra->fr_off = off;
1684 			fr_max -= aftercut;
1685 
1686 			merge = 1;
1687 
1688 			if (!drop) {
1689 				m_adj(m, -aftercut);
1690 				if (m->m_flags & M_PKTHDR) {
1691 					int pktlen = 0;
1692 					struct mbuf *t;
1693 					for (t = m; t; t = t->m_next) {
1694 						pktlen += t->m_len;
1695 					}
1696 					m->m_pkthdr.len = pktlen;
1697 				}
1698 				h = mtod(m, struct ip6_hdr *);
1699 				VERIFY((int)m->m_len ==
1700 				    ntohs(h->ip6_plen) - aftercut);
1701 				h->ip6_plen =
1702 				    htons(ntohs(h->ip6_plen) - aftercut);
1703 			} else {
1704 				hosed++;
1705 			}
1706 		} else if (frp == NULL) {
1707 			/* There is a gap between fragments */
1708 			DPFPRINTF(("frag6cache[%u]: gap %d %d-%d (%d-%d)\n",
1709 			    ntohl(fh->ip6f_ident), -aftercut, off, fr_max,
1710 			    fra->fr_off, fra->fr_end));
1711 
1712 			cur = pool_get(&pf_cent_pl, PR_NOWAIT);
1713 			if (cur == NULL) {
1714 				goto no_mem;
1715 			}
1716 			pf_ncache++;
1717 
1718 			cur->fr_off = off;
1719 			cur->fr_end = fr_max;
1720 			LIST_INSERT_BEFORE(fra, cur, fr_next);
1721 		}
1722 
1723 		/* Need to glue together two separate fragment descriptors */
1724 		if (merge) {
1725 			if (cur && fra->fr_off <= cur->fr_end) {
1726 				/* Need to merge in a previous 'cur' */
1727 				DPFPRINTF(("frag6cache[%u]: adjacent(merge "
1728 				    "%d-%d) %d-%d (%d-%d)\n",
1729 				    ntohl(fh->ip6f_ident), cur->fr_off,
1730 				    cur->fr_end, off, fr_max, fra->fr_off,
1731 				    fra->fr_end));
1732 				fra->fr_off = cur->fr_off;
1733 				LIST_REMOVE(cur, fr_next);
1734 				pool_put(&pf_cent_pl, cur);
1735 				pf_ncache--;
1736 				cur = NULL;
1737 			} else if (frp && fra->fr_off <= frp->fr_end) {
1738 				/* Need to merge in a modified 'frp' */
1739 				VERIFY(cur == NULL);
1740 				DPFPRINTF(("frag6cache[%u]: adjacent(merge "
1741 				    "%d-%d) %d-%d (%d-%d)\n",
1742 				    ntohl(fh->ip6f_ident), frp->fr_off,
1743 				    frp->fr_end, off, fr_max, fra->fr_off,
1744 				    fra->fr_end));
1745 				fra->fr_off = frp->fr_off;
1746 				LIST_REMOVE(frp, fr_next);
1747 				pool_put(&pf_cent_pl, frp);
1748 				pf_ncache--;
1749 				frp = NULL;
1750 			}
1751 		}
1752 	}
1753 
1754 	if (hosed) {
1755 		/*
1756 		 * We must keep tracking the overall fragment even when
1757 		 * we're going to drop it anyway so that we know when to
1758 		 * free the overall descriptor.  Thus we drop the frag late.
1759 		 */
1760 		goto drop_fragment;
1761 	}
1762 
1763 pass:
1764 	/* Update maximum data size */
1765 	if ((*frag)->fr_max < fr_max) {
1766 		(*frag)->fr_max = fr_max;
1767 	}
1768 
1769 	/* This is the last segment */
1770 	if (!mff) {
1771 		(*frag)->fr_flags |= PFFRAG_SEENLAST;
1772 	}
1773 
1774 	/* Check if we are completely reassembled */
1775 	if (((*frag)->fr_flags & PFFRAG_SEENLAST) &&
1776 	    LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 &&
1777 	    LIST_FIRST(&(*frag)->fr_cache)->fr_end == (*frag)->fr_max) {
1778 		/* Remove from fragment queue */
1779 		DPFPRINTF(("frag6cache[%u]: done 0-%d\n",
1780 		    ntohl(fh->ip6f_ident), (*frag)->fr_max));
1781 		pf_free_fragment(*frag);
1782 		*frag = NULL;
1783 	}
1784 
1785 	return m;
1786 
1787 no_mem:
1788 	*nomem = 1;
1789 
1790 	/* Still need to pay attention to !IP_MF */
1791 	if (!mff && *frag != NULL) {
1792 		(*frag)->fr_flags |= PFFRAG_SEENLAST;
1793 	}
1794 
1795 	m_drop(m, DROPTAP_FLAG_DIR_IN, DROP_REASON_PF_MEM_ALLOC, NULL, 0);
1796 	return NULL;
1797 
1798 drop_fragment:
1799 
1800 	/* Still need to pay attention to !IP_MF */
1801 	if (!mff && *frag != NULL) {
1802 		(*frag)->fr_flags |= PFFRAG_SEENLAST;
1803 	}
1804 
1805 	if (drop) {
1806 		/* This fragment has been deemed bad.  Don't reass */
1807 		if (((*frag)->fr_flags & PFFRAG_DROP) == 0) {
1808 			DPFPRINTF(("frag6cache[%u]: dropping overall fragment\n",
1809 			    ntohl(fh->ip6f_ident)));
1810 		}
1811 		(*frag)->fr_flags |= PFFRAG_DROP;
1812 	}
1813 
1814 	m_drop(m, DROPTAP_FLAG_DIR_IN, DROP_REASON_PF_BAD_FRAGMENT, NULL, 0);
1815 	return NULL;
1816 }
1817 
1818 int
pf_refragment6(struct ifnet * ifp,pbuf_t ** pbufp,struct pf_fragment_tag * ftag)1819 pf_refragment6(struct ifnet *ifp, pbuf_t **pbufp, struct pf_fragment_tag *ftag)
1820 {
1821 	struct mbuf        *__single m;
1822 	uint32_t           frag_id;
1823 	uint16_t           hdrlen, extoff, maxlen, unfragpartlen;
1824 	uint8_t            proto;
1825 	int                error, action;
1826 	uint8_t            *__single lexthdrsp;
1827 	struct route_in6   ip6route;
1828 	struct route_in6   *__single ro;
1829 	struct sockaddr_in6     *__single dst;
1830 	struct ip6_hdr *__single hdr;
1831 	struct m_tag *__single tag;
1832 
1833 	if (pbufp == NULL || !pbuf_is_valid(*pbufp) || ftag == NULL) {
1834 		panic("pf_route6: invalid parameters");
1835 		/* NOT REACHED */
1836 	}
1837 	m = pbuf_to_mbuf(*pbufp, FALSE);
1838 	hdr = mtod(m, struct ip6_hdr *);
1839 	hdrlen = ftag->ft_hdrlen - sizeof(struct ip6_hdr);
1840 	extoff = ftag->ft_extoff;
1841 	maxlen = ftag->ft_maxlen;
1842 	frag_id = ftag->ft_id;
1843 	unfragpartlen = ftag->ft_unfragpartlen;
1844 	tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_PF_REASS);
1845 	m_tag_delete(m, tag);
1846 	ftag = NULL;
1847 	tag = NULL;
1848 	pf_find_mtag(m)->pftag_flags &= ~PF_TAG_REASSEMBLED;
1849 	ro = &ip6route;
1850 	bzero((struct route_in6 *__bidi_indexable)ro, sizeof(*ro));
1851 	dst = (struct sockaddr_in6 *)&ro->ro_dst;
1852 	dst->sin6_family = AF_INET6;
1853 	dst->sin6_len = sizeof(*dst);
1854 	dst->sin6_addr = hdr->ip6_dst;
1855 
1856 	if (extoff) {
1857 		int off;
1858 		struct mbuf *mexthdr;
1859 
1860 		/* Use protocol from next field of last extension header */
1861 		mexthdr = m_getptr(m, extoff +
1862 		    offsetof(struct ip6_ext, ip6e_nxt), &off);
1863 		ASSERT(mexthdr != NULL);
1864 		lexthdrsp = (mtod(mexthdr, uint8_t *) + off);
1865 		proto = *lexthdrsp;
1866 		if (proto == IPPROTO_DSTOPTS) {
1867 			struct ip6_ext ext;
1868 			if (!pf_pull_hdr(*pbufp, off, &ext, sizeof(ext), sizeof(ext), NULL,
1869 			    NULL, AF_INET6)) {
1870 				DPFPRINTF(("pkt too short"));
1871 				action = PF_DROP;
1872 				goto done;
1873 			}
1874 			proto = ext.ip6e_nxt;
1875 		}
1876 	} else {
1877 		lexthdrsp = NULL;
1878 		proto = hdr->ip6_nxt;
1879 	}
1880 
1881 	/*
1882 	 * The MTU must be a multiple of 8 bytes, or we risk doing the
1883 	 * fragmentation wrong.
1884 	 */
1885 	maxlen = maxlen & ~7;
1886 
1887 	error = ip6_do_fragmentation(&m, hdrlen, NULL, unfragpartlen,
1888 	    hdr, lexthdrsp, maxlen, proto, frag_id);
1889 
1890 	if (error == 0) {
1891 		/*
1892 		 * PF_TAG_REFRAGMENTED flag set to indicate ip6_forward()
1893 		 * and pf_route6() that the mbuf contains a chain of fragments.
1894 		 */
1895 		pf_find_mtag(m)->pftag_flags |= PF_TAG_REFRAGMENTED;
1896 		action = PF_PASS;
1897 		pbuf_init_mbuf(*pbufp, m, ifp);
1898 	} else {
1899 		DPFPRINTF(("refragment error %d", error));
1900 		action = PF_DROP;
1901 		goto done;
1902 	}
1903 done:
1904 	return action;
1905 }
1906 
1907 int
pf_normalize_ip(pbuf_t * pbuf,int dir,struct pfi_kif * kif,u_short * reason,struct pf_pdesc * pd)1908 pf_normalize_ip(pbuf_t *pbuf, int dir, struct pfi_kif *kif, u_short *reason,
1909     struct pf_pdesc *pd)
1910 {
1911 	struct mbuf             *__single m;
1912 	struct pf_rule          *__single r;
1913 	struct pf_frent         *__single frent;
1914 	struct pf_fragment      *__single frag = NULL;
1915 	struct ip               *__single h = pbuf->pb_data;
1916 	int                      mff = (ntohs(h->ip_off) & IP_MF);
1917 	int                      hlen = h->ip_hl << 2;
1918 	u_int16_t                fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
1919 	u_int16_t                fr_max;
1920 	int                      ip_len;
1921 	int                      ip_off;
1922 	int                      asd = 0;
1923 	struct pf_ruleset       *__single ruleset = NULL;
1924 	struct ifnet            *__single ifp = pbuf->pb_ifp;
1925 	uint64_t                ipid_salt = (uint64_t)pbuf_get_packet_buffer_address(pbuf);
1926 
1927 	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
1928 	while (r != NULL) {
1929 		r->evaluations++;
1930 		if (pfi_kif_match(r->kif, kif) == r->ifnot) {
1931 			r = r->skip[PF_SKIP_IFP].ptr;
1932 		} else if (r->direction && r->direction != dir) {
1933 			r = r->skip[PF_SKIP_DIR].ptr;
1934 		} else if (r->af && r->af != AF_INET) {
1935 			r = r->skip[PF_SKIP_AF].ptr;
1936 		} else if (r->proto && r->proto != h->ip_p) {
1937 			r = r->skip[PF_SKIP_PROTO].ptr;
1938 		} else if (PF_MISMATCHAW(&r->src.addr,
1939 		    (struct pf_addr *)&h->ip_src.s_addr, AF_INET,
1940 		    r->src.neg, kif)) {
1941 			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
1942 		} else if (PF_MISMATCHAW(&r->dst.addr,
1943 		    (struct pf_addr *)&h->ip_dst.s_addr, AF_INET,
1944 		    r->dst.neg, NULL)) {
1945 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
1946 		} else {
1947 			if (r->anchor == NULL) {
1948 				break;
1949 			} else {
1950 				pf_step_into_anchor(&asd, &ruleset,
1951 				    PF_RULESET_SCRUB, &r, NULL, NULL);
1952 			}
1953 		}
1954 		if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
1955 		    PF_RULESET_SCRUB, &r, NULL, NULL)) {
1956 			break;
1957 		}
1958 	}
1959 
1960 	if (r == NULL || r->action == PF_NOSCRUB) {
1961 		return PF_PASS;
1962 	} else {
1963 		r->packets[dir == PF_OUT]++;
1964 		r->bytes[dir == PF_OUT] += pd->tot_len;
1965 	}
1966 
1967 	/* Check for illegal packets */
1968 	if (hlen < (int)sizeof(struct ip)) {
1969 		goto drop;
1970 	}
1971 
1972 	if (hlen > ntohs(h->ip_len)) {
1973 		goto drop;
1974 	}
1975 
1976 	/* Clear IP_DF if the rule uses the no-df option */
1977 	if (r->rule_flag & PFRULE_NODF && h->ip_off & htons(IP_DF)) {
1978 		u_int16_t ipoff = h->ip_off;
1979 
1980 		h->ip_off &= htons(~IP_DF);
1981 		h->ip_sum = pf_cksum_fixup(h->ip_sum, ipoff, h->ip_off, 0);
1982 	}
1983 
1984 	/* We will need other tests here */
1985 	if (!fragoff && !mff) {
1986 		goto no_fragment;
1987 	}
1988 
1989 	/*
1990 	 * We're dealing with a fragment now. Don't allow fragments
1991 	 * with IP_DF to enter the cache. If the flag was cleared by
1992 	 * no-df above, fine. Otherwise drop it.
1993 	 */
1994 	if (h->ip_off & htons(IP_DF)) {
1995 		DPFPRINTF(("IP_DF\n"));
1996 		goto bad;
1997 	}
1998 
1999 	ip_len = ntohs(h->ip_len) - hlen;
2000 	ip_off = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
2001 
2002 	/* All fragments are 8 byte aligned */
2003 	if (mff && (ip_len & 0x7)) {
2004 		DPFPRINTF(("mff and %d\n", ip_len));
2005 		goto bad;
2006 	}
2007 
2008 	/* Respect maximum length */
2009 	if (fragoff + ip_len > IP_MAXPACKET) {
2010 		DPFPRINTF(("max packet %d\n", fragoff + ip_len));
2011 		goto bad;
2012 	}
2013 	fr_max = fragoff + ip_len;
2014 
2015 	if ((r->rule_flag & (PFRULE_FRAGCROP | PFRULE_FRAGDROP)) == 0) {
2016 		/* Fully buffer all of the fragments */
2017 
2018 		frag = pf_find_fragment_by_ipv4_header(h, &pf_frag_tree);
2019 		/* Check if we saw the last fragment already */
2020 		if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
2021 		    fr_max > frag->fr_max) {
2022 			goto bad;
2023 		}
2024 
2025 		if ((m = pbuf_to_mbuf(pbuf, TRUE)) == NULL) {
2026 			REASON_SET(reason, PFRES_MEMORY);
2027 			return PF_DROP;
2028 		}
2029 
2030 		VERIFY(!pbuf_is_valid(pbuf));
2031 
2032 		/* Restore iph pointer after pbuf_to_mbuf() */
2033 		h = mtod(m, struct ip *);
2034 
2035 		/* Get an entry for the fragment queue */
2036 		frent = pool_get(&pf_frent_pl, PR_NOWAIT);
2037 		if (frent == NULL) {
2038 			REASON_SET(reason, PFRES_MEMORY);
2039 			m_drop(m, DROPTAP_FLAG_DIR_IN, DROP_REASON_PF_MEM_ALLOC, NULL, 0);
2040 			return PF_DROP;
2041 		}
2042 		pf_nfrents++;
2043 		frent->fr_ip = h;
2044 		frent->fr_m = m;
2045 
2046 		/* Might return a completely reassembled mbuf, or NULL */
2047 		DPFPRINTF(("reass IPv4 frag %d @ %d-%d\n", ntohs(h->ip_id),
2048 		    fragoff, fr_max));
2049 		m = pf_reassemble(m, &frag, frent, mff);
2050 
2051 		if (m == NULL) {
2052 			return PF_DROP;
2053 		}
2054 
2055 		VERIFY(m->m_flags & M_PKTHDR);
2056 		pbuf_init_mbuf(pbuf, m, ifp);
2057 
2058 		/* use mtag from concatenated mbuf chain */
2059 		pd->pf_mtag = pf_find_mtag_pbuf(pbuf);
2060 #if 0
2061 // SCW: This check is superfluous
2062 #if DIAGNOSTIC
2063 		if (pd->pf_mtag == NULL) {
2064 			printf("%s: pf_find_mtag returned NULL(1)\n", __func__);
2065 			if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) {
2066 				m_freem(m);
2067 				m = NULL;
2068 				goto no_mem;
2069 			}
2070 		}
2071 #endif
2072 #endif
2073 
2074 		h = mtod(m, struct ip *);
2075 
2076 		if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) {
2077 			goto drop;
2078 		}
2079 	} else {
2080 		/* non-buffering fragment cache (drops or masks overlaps) */
2081 		int     nomem = 0;
2082 
2083 		if (dir == PF_OUT && (pd->pf_mtag->pftag_flags & PF_TAG_FRAGCACHE)) {
2084 			/*
2085 			 * Already passed the fragment cache in the
2086 			 * input direction.  If we continued, it would
2087 			 * appear to be a dup and would be dropped.
2088 			 */
2089 			goto fragment_pass;
2090 		}
2091 
2092 		frag = pf_find_fragment_by_ipv4_header(h, &pf_cache_tree);
2093 
2094 		/* Check if we saw the last fragment already */
2095 		if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
2096 		    fr_max > frag->fr_max) {
2097 			if (r->rule_flag & PFRULE_FRAGDROP) {
2098 				frag->fr_flags |= PFFRAG_DROP;
2099 			}
2100 			goto bad;
2101 		}
2102 
2103 		if ((m = pbuf_to_mbuf(pbuf, TRUE)) == NULL) {
2104 			REASON_SET(reason, PFRES_MEMORY);
2105 			goto bad;
2106 		}
2107 
2108 		VERIFY(!pbuf_is_valid(pbuf));
2109 
2110 		/* Restore iph pointer after pbuf_to_mbuf() */
2111 		h = mtod(m, struct ip *);
2112 
2113 		m = pf_fragcache(&m, h, &frag, mff,
2114 		    (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem);
2115 		if (m == NULL) {
2116 			// Note: pf_fragcache() has already m_freem'd the mbuf
2117 			if (nomem) {
2118 				goto no_mem;
2119 			}
2120 			goto drop;
2121 		}
2122 
2123 		VERIFY(m->m_flags & M_PKTHDR);
2124 		pbuf_init_mbuf(pbuf, m, ifp);
2125 
2126 		/* use mtag from copied and trimmed mbuf chain */
2127 		pd->pf_mtag = pf_find_mtag_pbuf(pbuf);
2128 #if 0
2129 // SCW: This check is superfluous
2130 #if DIAGNOSTIC
2131 		if (pd->pf_mtag == NULL) {
2132 			printf("%s: pf_find_mtag returned NULL(2)\n", __func__);
2133 			if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) {
2134 				m_freem(m);
2135 				m = NULL;
2136 				goto no_mem;
2137 			}
2138 		}
2139 #endif
2140 #endif
2141 		if (dir == PF_IN) {
2142 			pd->pf_mtag->pftag_flags |= PF_TAG_FRAGCACHE;
2143 		}
2144 
2145 		if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) {
2146 			goto drop;
2147 		}
2148 
2149 		goto fragment_pass;
2150 	}
2151 
2152 no_fragment:
2153 	/* At this point, only IP_DF is allowed in ip_off */
2154 	if (h->ip_off & ~htons(IP_DF)) {
2155 		u_int16_t ipoff = h->ip_off;
2156 
2157 		h->ip_off &= htons(IP_DF);
2158 		h->ip_sum = pf_cksum_fixup(h->ip_sum, ipoff, h->ip_off, 0);
2159 	}
2160 
2161 	/* Enforce a minimum ttl, may cause endless packet loops */
2162 	if (r->min_ttl && h->ip_ttl < r->min_ttl) {
2163 		u_int16_t ip_ttl = h->ip_ttl;
2164 
2165 		h->ip_ttl = r->min_ttl;
2166 		h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
2167 	}
2168 	if (r->rule_flag & PFRULE_RANDOMID) {
2169 		u_int16_t oip_id = h->ip_id;
2170 
2171 		if (rfc6864 && IP_OFF_IS_ATOMIC(ntohs(h->ip_off))) {
2172 			h->ip_id = 0;
2173 		} else {
2174 			h->ip_id = ip_randomid(ipid_salt);
2175 		}
2176 		h->ip_sum = pf_cksum_fixup(h->ip_sum, oip_id, h->ip_id, 0);
2177 	}
2178 	if ((r->rule_flag & (PFRULE_FRAGCROP | PFRULE_FRAGDROP)) == 0) {
2179 		pd->flags |= PFDESC_IP_REAS;
2180 	}
2181 
2182 	return PF_PASS;
2183 
2184 fragment_pass:
2185 	/* Enforce a minimum ttl, may cause endless packet loops */
2186 	if (r->min_ttl && h->ip_ttl < r->min_ttl) {
2187 		u_int16_t ip_ttl = h->ip_ttl;
2188 
2189 		h->ip_ttl = r->min_ttl;
2190 		h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
2191 	}
2192 	if ((r->rule_flag & (PFRULE_FRAGCROP | PFRULE_FRAGDROP)) == 0) {
2193 		pd->flags |= PFDESC_IP_REAS;
2194 	}
2195 	return PF_PASS;
2196 
2197 no_mem:
2198 	REASON_SET(reason, PFRES_MEMORY);
2199 	if (r != NULL && r->log && pbuf_is_valid(pbuf)) {
2200 		PFLOG_PACKET(kif, h, pbuf, AF_INET, dir, *reason, r,
2201 		    NULL, NULL, pd);
2202 	}
2203 	return PF_DROP;
2204 
2205 drop:
2206 	REASON_SET(reason, PFRES_NORM);
2207 	if (r != NULL && r->log && pbuf_is_valid(pbuf)) {
2208 		PFLOG_PACKET(kif, h, pbuf, AF_INET, dir, *reason, r,
2209 		    NULL, NULL, pd);
2210 	}
2211 	return PF_DROP;
2212 
2213 bad:
2214 	DPFPRINTF(("dropping bad IPv4 fragment\n"));
2215 
2216 	/* Free associated fragments */
2217 	if (frag != NULL) {
2218 		pf_free_fragment(frag);
2219 	}
2220 
2221 	REASON_SET(reason, PFRES_FRAG);
2222 	if (r != NULL && r->log && pbuf_is_valid(pbuf)) {
2223 		PFLOG_PACKET(kif, h, pbuf, AF_INET, dir, *reason, r, NULL, NULL, pd);
2224 	}
2225 
2226 	return PF_DROP;
2227 }
2228 
2229 static __attribute__((noinline)) struct pf_fragment *
pf_find_fragment_by_ipv6_header(struct ip6_hdr * ip6,struct ip6_frag * fh,struct pf_frag_tree * tree)2230 pf_find_fragment_by_ipv6_header(struct ip6_hdr *ip6, struct ip6_frag *fh,
2231     struct pf_frag_tree *tree)
2232 {
2233 	struct pf_fragment key;
2234 	pf_ip6hdr2key(&key, ip6, fh);
2235 	return pf_find_fragment_by_key(&key, tree);
2236 }
2237 
2238 int
pf_normalize_ip6(pbuf_t * pbuf,int dir,struct pfi_kif * kif,u_short * reason,struct pf_pdesc * pd)2239 pf_normalize_ip6(pbuf_t *pbuf, int dir, struct pfi_kif *kif,
2240     u_short *reason, struct pf_pdesc *pd)
2241 {
2242 	struct mbuf             *__single m = NULL;
2243 	struct pf_rule          *__single r;
2244 	struct ip6_hdr          *__single h = pbuf->pb_data;
2245 	int                      extoff;
2246 	int                      off;
2247 	struct ip6_ext           ext;
2248 	struct ip6_opt           opt;
2249 	struct ip6_opt_jumbo     jumbo;
2250 	int                      optend;
2251 	int                      ooff;
2252 	struct ip6_frag          frag;
2253 	u_int32_t                jumbolen = 0, plen;
2254 	u_int16_t                fragoff = 0;
2255 	u_int8_t                 proto;
2256 	int                      terminal;
2257 	struct pf_frent         *__single frent;
2258 	struct pf_fragment      *__single pff = NULL;
2259 	int                      mff = 0, rh_cnt = 0;
2260 	u_int16_t                fr_max;
2261 	int                      asd = 0;
2262 	struct pf_ruleset       *__single ruleset = NULL;
2263 	struct ifnet            *__single ifp = pbuf->pb_ifp;
2264 
2265 	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
2266 	while (r != NULL) {
2267 		r->evaluations++;
2268 		if (pfi_kif_match(r->kif, kif) == r->ifnot) {
2269 			r = r->skip[PF_SKIP_IFP].ptr;
2270 		} else if (r->direction && r->direction != dir) {
2271 			r = r->skip[PF_SKIP_DIR].ptr;
2272 		} else if (r->af && r->af != AF_INET6) {
2273 			r = r->skip[PF_SKIP_AF].ptr;
2274 		}
2275 #if 0 /* header chain! */
2276 		else if (r->proto && r->proto != h->ip6_nxt) {
2277 			r = r->skip[PF_SKIP_PROTO].ptr;
2278 		}
2279 #endif
2280 		else if (PF_MISMATCHAW(&r->src.addr,
2281 		    (struct pf_addr *)(void *)&h->ip6_src, AF_INET6,
2282 		    r->src.neg, kif)) {
2283 			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
2284 		} else if (PF_MISMATCHAW(&r->dst.addr,
2285 		    (struct pf_addr *)(void *)&h->ip6_dst, AF_INET6,
2286 		    r->dst.neg, NULL)) {
2287 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
2288 		} else {
2289 			if (r->anchor == NULL) {
2290 				break;
2291 			} else {
2292 				pf_step_into_anchor(&asd, &ruleset,
2293 				    PF_RULESET_SCRUB, &r, NULL, NULL);
2294 			}
2295 		}
2296 		if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
2297 		    PF_RULESET_SCRUB, &r, NULL, NULL)) {
2298 			break;
2299 		}
2300 	}
2301 
2302 	if (r == NULL || r->action == PF_NOSCRUB) {
2303 		return PF_PASS;
2304 	} else {
2305 		r->packets[dir == PF_OUT]++;
2306 		r->bytes[dir == PF_OUT] += pd->tot_len;
2307 	}
2308 
2309 	/* Check for illegal packets */
2310 	if ((uint32_t)(sizeof(struct ip6_hdr) + IPV6_MAXPACKET) <
2311 	    pbuf->pb_packet_len) {
2312 		goto drop;
2313 	}
2314 
2315 	extoff = 0;
2316 	off = sizeof(struct ip6_hdr);
2317 	proto = h->ip6_nxt;
2318 	terminal = 0;
2319 	do {
2320 		pd->proto = proto;
2321 		if (proto == IPPROTO_FRAGMENT) {
2322 			goto fragment;
2323 		}
2324 		if (!pf_pull_hdr(pbuf, off, &ext, sizeof(ext), sizeof(ext), NULL,
2325 		    NULL, AF_INET6)) {
2326 			goto shortpkt;
2327 		}
2328 		switch (proto) {
2329 		case IPPROTO_AH:
2330 		case IPPROTO_ROUTING:
2331 		case IPPROTO_DSTOPTS:
2332 			extoff = off;
2333 			/*
2334 			 * <[email protected]>
2335 			 * Multiple routing headers not allowed.
2336 			 * Routing header type zero considered harmful.
2337 			 */
2338 			if (proto == IPPROTO_ROUTING) {
2339 				struct ip6_rthdr rh = {0};
2340 				if (!pf_pull_hdr(pbuf, off, &rh, sizeof(rh), sizeof(rh), NULL, NULL, AF_INET6)) {
2341 					goto shortpkt;
2342 				}
2343 				if (rh_cnt++) {
2344 					goto drop;
2345 				}
2346 				if (rh.ip6r_type == IPV6_RTHDR_TYPE_0) {
2347 					goto drop;
2348 				}
2349 			} else if (proto == IPPROTO_AH) {
2350 				off += (ext.ip6e_len + 2) * 4;
2351 			} else {
2352 				off += (ext.ip6e_len + 1) * 8;
2353 			}
2354 			proto = ext.ip6e_nxt;
2355 			break;
2356 		case IPPROTO_HOPOPTS:
2357 			extoff = off;
2358 			optend = off + (ext.ip6e_len + 1) * 8;
2359 			ooff = off + sizeof(ext);
2360 			do {
2361 				if (!pf_pull_hdr(pbuf, ooff, &opt.ip6o_type, sizeof(opt.ip6o_type),
2362 				    sizeof(opt.ip6o_type), NULL, NULL,
2363 				    AF_INET6)) {
2364 					goto shortpkt;
2365 				}
2366 				if (opt.ip6o_type == IP6OPT_PAD1) {
2367 					ooff++;
2368 					continue;
2369 				}
2370 				if (!pf_pull_hdr(pbuf, ooff, &opt, sizeof(opt), sizeof(opt),
2371 				    NULL, NULL, AF_INET6)) {
2372 					goto shortpkt;
2373 				}
2374 				if ((ooff + (int) sizeof(opt) + opt.ip6o_len) >
2375 				    optend) {
2376 					goto drop;
2377 				}
2378 				switch (opt.ip6o_type) {
2379 				case IP6OPT_JUMBO:
2380 					if (h->ip6_plen != 0) {
2381 						goto drop;
2382 					}
2383 					if (!pf_pull_hdr(pbuf, ooff, &jumbo,
2384 					    sizeof(jumbo), sizeof(jumbo), NULL, NULL,
2385 					    AF_INET6)) {
2386 						goto shortpkt;
2387 					}
2388 					memcpy(&jumbolen, jumbo.ip6oj_jumbo_len,
2389 					    sizeof(jumbolen));
2390 					jumbolen = ntohl(jumbolen);
2391 					if (jumbolen <= IPV6_MAXPACKET) {
2392 						goto drop;
2393 					}
2394 					if ((sizeof(struct ip6_hdr) +
2395 					    jumbolen) != pbuf->pb_packet_len) {
2396 						goto drop;
2397 					}
2398 					break;
2399 				default:
2400 					break;
2401 				}
2402 				ooff += sizeof(opt) + opt.ip6o_len;
2403 			} while (ooff < optend);
2404 
2405 			off = optend;
2406 			proto = ext.ip6e_nxt;
2407 			break;
2408 		default:
2409 			terminal = 1;
2410 			break;
2411 		}
2412 	} while (!terminal);
2413 
2414 	/* jumbo payload option must be present, or plen > 0 */
2415 	if (ntohs(h->ip6_plen) == 0) {
2416 		plen = jumbolen;
2417 	} else {
2418 		plen = ntohs(h->ip6_plen);
2419 	}
2420 	if (plen == 0) {
2421 		goto drop;
2422 	}
2423 	if ((uint32_t)(sizeof(struct ip6_hdr) + plen) > pbuf->pb_packet_len) {
2424 		goto shortpkt;
2425 	}
2426 
2427 	/* Enforce a minimum ttl, may cause endless packet loops */
2428 	if (r->min_ttl && h->ip6_hlim < r->min_ttl) {
2429 		h->ip6_hlim = r->min_ttl;
2430 	}
2431 
2432 	return PF_PASS;
2433 
2434 fragment:
2435 	plen = ntohs(h->ip6_plen);
2436 	/* Jumbo payload packets cannot be fragmented */
2437 	if (plen == 0 || jumbolen) {
2438 		goto drop;
2439 	}
2440 
2441 	if (!pf_pull_hdr(pbuf, off, &frag, sizeof(frag), sizeof(frag), NULL, NULL, AF_INET6)) {
2442 		goto shortpkt;
2443 	}
2444 	fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK);
2445 	pd->proto = frag.ip6f_nxt;
2446 	mff = ntohs(frag.ip6f_offlg & IP6F_MORE_FRAG);
2447 	off += sizeof(frag);
2448 	if (fragoff + (plen - off) > IPV6_MAXPACKET) {
2449 		goto badfrag;
2450 	}
2451 
2452 	fr_max = fragoff + plen - (off - sizeof(struct ip6_hdr));
2453 // XXX SCW: mbuf-specific
2454 //	DPFPRINTF(("0x%llx IPv6 frag plen %u mff %d off %u fragoff %u "
2455 //	    "fr_max %u\n", (uint64_t)VM_KERNEL_ADDRPERM(m), plen, mff, off,
2456 //	    fragoff, fr_max));
2457 
2458 	if ((r->rule_flag & (PFRULE_FRAGCROP | PFRULE_FRAGDROP)) == 0) {
2459 		/* Fully buffer all of the fragments */
2460 		pd->flags |= PFDESC_IP_REAS;
2461 
2462 		pff = pf_find_fragment_by_ipv6_header(h, &frag,
2463 		    &pf_frag_tree);
2464 
2465 		/* Check if we saw the last fragment already */
2466 		if (pff != NULL && (pff->fr_flags & PFFRAG_SEENLAST) &&
2467 		    fr_max > pff->fr_max) {
2468 			goto badfrag;
2469 		}
2470 
2471 		if ((m = pbuf_to_mbuf(pbuf, TRUE)) == NULL) {
2472 			REASON_SET(reason, PFRES_MEMORY);
2473 			return PF_DROP;
2474 		}
2475 
2476 		/* Restore iph pointer after pbuf_to_mbuf() */
2477 		h = mtod(m, struct ip6_hdr *);
2478 
2479 		/* Get an entry for the fragment queue */
2480 		frent = pool_get(&pf_frent_pl, PR_NOWAIT);
2481 		if (frent == NULL) {
2482 			REASON_SET(reason, PFRES_MEMORY);
2483 			return PF_DROP;
2484 		}
2485 
2486 		pf_nfrents++;
2487 		frent->fr_ip6 = h;
2488 		frent->fr_m = m;
2489 		frent->fr_ip6f_opt = frag;
2490 		frent->fr_ip6f_extoff = extoff;
2491 		frent->fr_ip6f_hlen = off;
2492 		/* account for 2nd Destination Options header if present */
2493 		if (pd->proto == IPPROTO_DSTOPTS) {
2494 			if (!pf_pull_hdr(pbuf, off, &ext, sizeof(ext), sizeof(ext), NULL,
2495 			    NULL, AF_INET6)) {
2496 				goto shortpkt;
2497 			}
2498 			frent->fr_ip6f_hlen += (ext.ip6e_len + 1) * 8;
2499 		}
2500 
2501 		/* Might return a completely reassembled mbuf, or NULL */
2502 		DPFPRINTF(("reass IPv6 frag %d @ %d-%d\n",
2503 		    ntohl(frag.ip6f_ident), fragoff, fr_max));
2504 		m = pf_reassemble6(&m, &pff, frent, mff);
2505 
2506 		if (m == NULL) {
2507 			return PF_DROP;
2508 		}
2509 
2510 		pbuf_init_mbuf(pbuf, m, ifp);
2511 		h = pbuf->pb_data;
2512 
2513 		if (pff != NULL && (pff->fr_flags & PFFRAG_DROP)) {
2514 			goto drop;
2515 		}
2516 	} else if (dir == PF_IN ||
2517 	    !(pd->pf_mtag->pftag_flags & PF_TAG_FRAGCACHE)) {
2518 		/* non-buffering fragment cache (overlaps: see RFC 5722) */
2519 		int nomem = 0;
2520 
2521 		pff = pf_find_fragment_by_ipv6_header(h, &frag,
2522 		    &pf_cache_tree);
2523 
2524 		/* Check if we saw the last fragment already */
2525 		if (pff != NULL && (pff->fr_flags & PFFRAG_SEENLAST) &&
2526 		    fr_max > pff->fr_max) {
2527 			if (r->rule_flag & PFRULE_FRAGDROP) {
2528 				pff->fr_flags |= PFFRAG_DROP;
2529 			}
2530 			goto badfrag;
2531 		}
2532 
2533 		if ((m = pbuf_to_mbuf(pbuf, TRUE)) == NULL) {
2534 			goto no_mem;
2535 		}
2536 
2537 		/* Restore iph pointer after pbuf_to_mbuf() */
2538 		h = mtod(m, struct ip6_hdr *);
2539 
2540 		m = pf_frag6cache(&m, h, &frag, &pff, off, mff,
2541 		    (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem);
2542 		if (m == NULL) {
2543 			// Note: pf_frag6cache() has already m_freem'd the mbuf
2544 			if (nomem) {
2545 				goto no_mem;
2546 			}
2547 			goto drop;
2548 		}
2549 
2550 		pbuf_init_mbuf(pbuf, m, ifp);
2551 		pd->pf_mtag = pf_find_mtag_pbuf(pbuf);
2552 		h = pbuf->pb_data;
2553 
2554 		if (dir == PF_IN) {
2555 			pd->pf_mtag->pftag_flags |= PF_TAG_FRAGCACHE;
2556 		}
2557 
2558 		if (pff != NULL && (pff->fr_flags & PFFRAG_DROP)) {
2559 			goto drop;
2560 		}
2561 	}
2562 
2563 	/* Enforce a minimum ttl, may cause endless packet loops */
2564 	if (r->min_ttl && h->ip6_hlim < r->min_ttl) {
2565 		h->ip6_hlim = r->min_ttl;
2566 	}
2567 	return PF_PASS;
2568 
2569 no_mem:
2570 	REASON_SET(reason, PFRES_MEMORY);
2571 	goto dropout;
2572 
2573 shortpkt:
2574 	REASON_SET(reason, PFRES_SHORT);
2575 	goto dropout;
2576 
2577 drop:
2578 	REASON_SET(reason, PFRES_NORM);
2579 	goto dropout;
2580 
2581 badfrag:
2582 	DPFPRINTF(("dropping bad IPv6 fragment\n"));
2583 	REASON_SET(reason, PFRES_FRAG);
2584 	goto dropout;
2585 
2586 dropout:
2587 	if (pff != NULL) {
2588 		pf_free_fragment(pff);
2589 	}
2590 	if (r != NULL && r->log && pbuf_is_valid(pbuf)) {
2591 		PFLOG_PACKET(kif, h, pbuf, AF_INET6, dir, *reason, r, NULL, NULL, pd);
2592 	}
2593 	return PF_DROP;
2594 }
2595 
2596 int
pf_normalize_tcp(int dir,struct pfi_kif * kif,pbuf_t * pbuf,int ipoff,int off,void * h,struct pf_pdesc * pd)2597 pf_normalize_tcp(int dir, struct pfi_kif *kif, pbuf_t *pbuf, int ipoff,
2598     int off, void *h, struct pf_pdesc *pd)
2599 {
2600 #pragma unused(ipoff, h)
2601 	struct pf_rule  *__single r, *__single rm = NULL;
2602 	struct tcphdr   *__single th = pf_pd_get_hdr_tcp(pd);
2603 	int              rewrite = 0;
2604 	int              asd = 0;
2605 	u_short          reason;
2606 	u_int8_t         flags;
2607 	sa_family_t      af = pd->af;
2608 	struct pf_ruleset *__single ruleset = NULL;
2609 	union pf_state_xport sxport, dxport;
2610 
2611 	sxport.port = th->th_sport;
2612 	dxport.port = th->th_dport;
2613 
2614 	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
2615 	while (r != NULL) {
2616 		r->evaluations++;
2617 		if (pfi_kif_match(r->kif, kif) == r->ifnot) {
2618 			r = r->skip[PF_SKIP_IFP].ptr;
2619 		} else if (r->direction && r->direction != dir) {
2620 			r = r->skip[PF_SKIP_DIR].ptr;
2621 		} else if (r->af && r->af != af) {
2622 			r = r->skip[PF_SKIP_AF].ptr;
2623 		} else if (r->proto && r->proto != pd->proto) {
2624 			r = r->skip[PF_SKIP_PROTO].ptr;
2625 		} else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
2626 		    r->src.neg, kif)) {
2627 			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
2628 		} else if (r->src.xport.range.op &&
2629 		    !pf_match_xport(r->src.xport.range.op, r->proto_variant,
2630 		    &r->src.xport, &sxport)) {
2631 			r = r->skip[PF_SKIP_SRC_PORT].ptr;
2632 		} else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
2633 		    r->dst.neg, NULL)) {
2634 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
2635 		} else if (r->dst.xport.range.op &&
2636 		    !pf_match_xport(r->dst.xport.range.op, r->proto_variant,
2637 		    &r->dst.xport, &dxport)) {
2638 			r = r->skip[PF_SKIP_DST_PORT].ptr;
2639 		} else if (r->os_fingerprint != PF_OSFP_ANY &&
2640 		    !pf_osfp_match(pf_osfp_fingerprint(pd, pbuf, off, th),
2641 		    r->os_fingerprint)) {
2642 			r = TAILQ_NEXT(r, entries);
2643 		} else {
2644 			if (r->anchor == NULL) {
2645 				rm = r;
2646 				break;
2647 			} else {
2648 				pf_step_into_anchor(&asd, &ruleset,
2649 				    PF_RULESET_SCRUB, &r, NULL, NULL);
2650 			}
2651 		}
2652 		if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
2653 		    PF_RULESET_SCRUB, &r, NULL, NULL)) {
2654 			break;
2655 		}
2656 	}
2657 
2658 	if (rm == NULL || rm->action == PF_NOSCRUB) {
2659 		return PF_PASS;
2660 	} else {
2661 		r->packets[dir == PF_OUT]++;
2662 		r->bytes[dir == PF_OUT] += pd->tot_len;
2663 	}
2664 
2665 	if (rm->rule_flag & PFRULE_REASSEMBLE_TCP) {
2666 		pd->flags |= PFDESC_TCP_NORM;
2667 	}
2668 
2669 	flags = th->th_flags;
2670 	if (flags & TH_SYN) {
2671 		/* Illegal packet */
2672 		if (flags & TH_RST) {
2673 			goto tcp_drop;
2674 		}
2675 
2676 		if (flags & TH_FIN) {
2677 			flags &= ~TH_FIN;
2678 		}
2679 	} else {
2680 		/* Illegal packet */
2681 		if (!(flags & (TH_ACK | TH_RST))) {
2682 			goto tcp_drop;
2683 		}
2684 	}
2685 
2686 	if (!(flags & TH_ACK)) {
2687 		/* These flags are only valid if ACK is set */
2688 		if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG)) {
2689 			goto tcp_drop;
2690 		}
2691 	}
2692 
2693 	/* Check for illegal header length */
2694 	if (th->th_off < (sizeof(struct tcphdr) >> 2)) {
2695 		goto tcp_drop;
2696 	}
2697 
2698 	/* If flags changed, or reserved data set, then adjust */
2699 	if (flags != th->th_flags || th->th_x2 != 0) {
2700 		u_int16_t       ov, nv;
2701 		// Explicit __bidi_indexable is to avoid a warning false positive (rdar://119193012)
2702 		uint8_t *__bidi_indexable th_iter = (uint8_t * __bidi_indexable)(struct tcphdr *__bidi_indexable) th;
2703 
2704 		ov = *(u_int16_t *)(void *)(th_iter + offsetof(struct tcphdr, th_ack) + sizeof(th->th_ack));
2705 		th->th_flags = flags;
2706 		th->th_x2 = 0;
2707 		nv = *(u_int16_t *)(void *)(th_iter + offsetof(struct tcphdr, th_ack) + sizeof(th->th_ack));
2708 
2709 		th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv, 0);
2710 		rewrite = 1;
2711 	}
2712 
2713 	/* Remove urgent pointer, if TH_URG is not set */
2714 	if (!(flags & TH_URG) && th->th_urp) {
2715 		th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0, 0);
2716 		th->th_urp = 0;
2717 		rewrite = 1;
2718 	}
2719 
2720 	/* copy back packet headers if we sanitized */
2721 	/* Process options */
2722 	if (r->max_mss) {
2723 		int rv = pf_normalize_tcpopt(r, dir, kif, pd, pbuf, th, off,
2724 		    &rewrite);
2725 		if (rv == PF_DROP) {
2726 			return rv;
2727 		}
2728 		pbuf = pd->mp;
2729 	}
2730 
2731 	if (rewrite) {
2732 		if (pf_lazy_makewritable(pd, pbuf,
2733 		    off + sizeof(*th)) == NULL) {
2734 			REASON_SET(&reason, PFRES_MEMORY);
2735 			if (r->log) {
2736 				PFLOG_PACKET(kif, h, pbuf, AF_INET, dir, reason,
2737 				    r, 0, 0, pd);
2738 			}
2739 			return PF_DROP;
2740 		}
2741 
2742 		pbuf_copy_back(pbuf, off, sizeof(*th), th, sizeof(*th));
2743 	}
2744 
2745 	return PF_PASS;
2746 
2747 tcp_drop:
2748 	REASON_SET(&reason, PFRES_NORM);
2749 	if (rm != NULL && r->log) {
2750 		PFLOG_PACKET(kif, h, pbuf, AF_INET, dir, reason, r, NULL, NULL, pd);
2751 	}
2752 	return PF_DROP;
2753 }
2754 
2755 int
pf_normalize_tcp_init(pbuf_t * pbuf,int off,struct pf_pdesc * pd,struct tcphdr * th,struct pf_state_peer * src,struct pf_state_peer * dst)2756 pf_normalize_tcp_init(pbuf_t *pbuf, int off, struct pf_pdesc *pd,
2757     struct tcphdr *th, struct pf_state_peer *src, struct pf_state_peer *dst)
2758 {
2759 #pragma unused(dst)
2760 	u_int32_t tsval, tsecr;
2761 	u_int8_t hdr[60];
2762 
2763 	VERIFY(src->scrub == NULL);
2764 
2765 	src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT);
2766 	if (src->scrub == NULL) {
2767 		return 1;
2768 	}
2769 	bzero(src->scrub, sizeof(*src->scrub));
2770 
2771 	switch (pd->af) {
2772 #if INET
2773 	case AF_INET: {
2774 		struct ip *__single h = pbuf->pb_data;
2775 		src->scrub->pfss_ttl = h->ip_ttl;
2776 		break;
2777 	}
2778 #endif /* INET */
2779 	case AF_INET6: {
2780 		struct ip6_hdr *__single h = pbuf->pb_data;
2781 		src->scrub->pfss_ttl = h->ip6_hlim;
2782 		break;
2783 	}
2784 	}
2785 
2786 
2787 	/*
2788 	 * All normalizations below are only begun if we see the start of
2789 	 * the connections.  They must all set an enabled bit in pfss_flags
2790 	 */
2791 	if ((th->th_flags & TH_SYN) == 0) {
2792 		return 0;
2793 	}
2794 
2795 
2796 	if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub &&
2797 	    pf_pull_hdr(pbuf, off, hdr, sizeof(hdr), th->th_off << 2, NULL, NULL, pd->af)) {
2798 		/* Diddle with TCP options */
2799 		int hlen = (th->th_off << 2) - sizeof(struct tcphdr);
2800 		u_int8_t *opt = hdr + sizeof(struct tcphdr);
2801 		while (hlen >= TCPOLEN_TIMESTAMP) {
2802 			switch (*opt) {
2803 			case TCPOPT_EOL:        /* FALLTHROUGH */
2804 			case TCPOPT_NOP:
2805 				opt++;
2806 				hlen--;
2807 				break;
2808 			case TCPOPT_TIMESTAMP:
2809 				if (opt[1] >= TCPOLEN_TIMESTAMP) {
2810 					src->scrub->pfss_flags |=
2811 					    PFSS_TIMESTAMP;
2812 					src->scrub->pfss_ts_mod =
2813 					    htonl(random());
2814 
2815 					/* note PFSS_PAWS not set yet */
2816 					memcpy(&tsval, &opt[2],
2817 					    sizeof(u_int32_t));
2818 					memcpy(&tsecr, &opt[6],
2819 					    sizeof(u_int32_t));
2820 					src->scrub->pfss_tsval0 = ntohl(tsval);
2821 					src->scrub->pfss_tsval = ntohl(tsval);
2822 					src->scrub->pfss_tsecr = ntohl(tsecr);
2823 					getmicrouptime(&src->scrub->pfss_last);
2824 				}
2825 				OS_FALLTHROUGH;
2826 			default:
2827 				hlen -= MAX(opt[1], 2);
2828 				opt += MAX(opt[1], 2);
2829 				break;
2830 			}
2831 		}
2832 	}
2833 
2834 	return 0;
2835 }
2836 
2837 void
pf_normalize_tcp_cleanup(struct pf_state * state)2838 pf_normalize_tcp_cleanup(struct pf_state *state)
2839 {
2840 	if (state->src.scrub) {
2841 		pool_put(&pf_state_scrub_pl, state->src.scrub);
2842 	}
2843 	if (state->dst.scrub) {
2844 		pool_put(&pf_state_scrub_pl, state->dst.scrub);
2845 	}
2846 
2847 	/* Someday... flush the TCP segment reassembly descriptors. */
2848 }
2849 
2850 int
pf_normalize_tcp_stateful(pbuf_t * pbuf,int off,struct pf_pdesc * pd,u_short * reason,struct tcphdr * th,struct pf_state * state,struct pf_state_peer * src,struct pf_state_peer * dst,int * writeback)2851 pf_normalize_tcp_stateful(pbuf_t *pbuf, int off, struct pf_pdesc *pd,
2852     u_short *reason, struct tcphdr *th, struct pf_state *state,
2853     struct pf_state_peer *src, struct pf_state_peer *dst, int *writeback)
2854 {
2855 	struct timeval uptime;
2856 	u_int32_t tsval = 0, tsecr = 0;
2857 	u_int tsval_from_last;
2858 	u_int8_t hdr[60];
2859 	u_int8_t *opt;
2860 	int copyback = 0;
2861 	int got_ts = 0;
2862 
2863 	VERIFY(src->scrub || dst->scrub);
2864 
2865 	/*
2866 	 * Enforce the minimum TTL seen for this connection.  Negate a common
2867 	 * technique to evade an intrusion detection system and confuse
2868 	 * firewall state code.
2869 	 */
2870 	switch (pd->af) {
2871 #if INET
2872 	case AF_INET: {
2873 		if (src->scrub) {
2874 			struct ip *__single h = pbuf->pb_data;
2875 			if (h->ip_ttl > src->scrub->pfss_ttl) {
2876 				src->scrub->pfss_ttl = h->ip_ttl;
2877 			}
2878 			h->ip_ttl = src->scrub->pfss_ttl;
2879 		}
2880 		break;
2881 	}
2882 #endif /* INET */
2883 	case AF_INET6: {
2884 		if (src->scrub) {
2885 			struct ip6_hdr *__single h = pbuf->pb_data;
2886 			if (h->ip6_hlim > src->scrub->pfss_ttl) {
2887 				src->scrub->pfss_ttl = h->ip6_hlim;
2888 			}
2889 			h->ip6_hlim = src->scrub->pfss_ttl;
2890 		}
2891 		break;
2892 	}
2893 	}
2894 
2895 	if (th->th_off > (sizeof(struct tcphdr) >> 2) &&
2896 	    ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) ||
2897 	    (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) &&
2898 	    pf_pull_hdr(pbuf, off, hdr, sizeof(hdr), th->th_off << 2, NULL, NULL, pd->af)) {
2899 		/* Diddle with TCP options */
2900 		int hlen;
2901 		opt = hdr + sizeof(struct tcphdr);
2902 		hlen = (th->th_off << 2) - sizeof(struct tcphdr);
2903 		while (hlen >= TCPOLEN_TIMESTAMP) {
2904 			switch (*opt) {
2905 			case TCPOPT_EOL:        /* FALLTHROUGH */
2906 			case TCPOPT_NOP:
2907 				opt++;
2908 				hlen--;
2909 				break;
2910 			case TCPOPT_TIMESTAMP:
2911 				/*
2912 				 * Modulate the timestamps.  Can be used for
2913 				 * NAT detection, OS uptime determination or
2914 				 * reboot detection.
2915 				 */
2916 
2917 				if (got_ts) {
2918 					/* Huh?  Multiple timestamps!? */
2919 					if (pf_status.debug >= PF_DEBUG_MISC) {
2920 						DPFPRINTF(("multiple TS??"));
2921 						pf_print_state(state);
2922 						printf("\n");
2923 					}
2924 					REASON_SET(reason, PFRES_TS);
2925 					return PF_DROP;
2926 				}
2927 				if (opt[1] >= TCPOLEN_TIMESTAMP) {
2928 					memcpy(&tsval, &opt[2],
2929 					    sizeof(u_int32_t));
2930 					if (tsval && src->scrub &&
2931 					    (src->scrub->pfss_flags &
2932 					    PFSS_TIMESTAMP)) {
2933 						tsval = ntohl(tsval);
2934 						pf_change_a(&opt[2],
2935 						    &th->th_sum,
2936 						    htonl(tsval +
2937 						    src->scrub->pfss_ts_mod),
2938 						    0);
2939 						copyback = 1;
2940 					}
2941 
2942 					/* Modulate TS reply iff valid (!0) */
2943 					memcpy(&tsecr, &opt[6],
2944 					    sizeof(u_int32_t));
2945 					if (tsecr && dst->scrub &&
2946 					    (dst->scrub->pfss_flags &
2947 					    PFSS_TIMESTAMP)) {
2948 						tsecr = ntohl(tsecr)
2949 						    - dst->scrub->pfss_ts_mod;
2950 						pf_change_a(&opt[6],
2951 						    &th->th_sum, htonl(tsecr),
2952 						    0);
2953 						copyback = 1;
2954 					}
2955 					got_ts = 1;
2956 				}
2957 				OS_FALLTHROUGH;
2958 			default:
2959 				hlen -= MAX(opt[1], 2);
2960 				opt += MAX(opt[1], 2);
2961 				break;
2962 			}
2963 		}
2964 		if (copyback) {
2965 			/* Copyback the options, caller copys back header */
2966 			int optoff = off + sizeof(*th);
2967 			int optlen = (th->th_off << 2) - sizeof(*th);
2968 			if (pf_lazy_makewritable(pd, pbuf, optoff + optlen) ==
2969 			    NULL) {
2970 				REASON_SET(reason, PFRES_MEMORY);
2971 				return PF_DROP;
2972 			}
2973 			*writeback = optoff + optlen;
2974 			pbuf_copy_back(pbuf, optoff, optlen, hdr + sizeof(*th), sizeof(hdr) - sizeof(*th));
2975 		}
2976 	}
2977 
2978 
2979 	/*
2980 	 * Must invalidate PAWS checks on connections idle for too long.
2981 	 * The fastest allowed timestamp clock is 1ms.  That turns out to
2982 	 * be about 24 days before it wraps.  XXX Right now our lowerbound
2983 	 * TS echo check only works for the first 12 days of a connection
2984 	 * when the TS has exhausted half its 32bit space
2985 	 */
2986 #define TS_MAX_IDLE     (24*24*60*60)
2987 #define TS_MAX_CONN     (12*24*60*60)   /* XXX remove when better tsecr check */
2988 
2989 	getmicrouptime(&uptime);
2990 	if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) &&
2991 	    (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE ||
2992 	    pf_time_second() - state->creation > TS_MAX_CONN)) {
2993 		if (pf_status.debug >= PF_DEBUG_MISC) {
2994 			DPFPRINTF(("src idled out of PAWS\n"));
2995 			pf_print_state(state);
2996 			printf("\n");
2997 		}
2998 		src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS)
2999 		    | PFSS_PAWS_IDLED;
3000 	}
3001 	if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) &&
3002 	    uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) {
3003 		if (pf_status.debug >= PF_DEBUG_MISC) {
3004 			DPFPRINTF(("dst idled out of PAWS\n"));
3005 			pf_print_state(state);
3006 			printf("\n");
3007 		}
3008 		dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS)
3009 		    | PFSS_PAWS_IDLED;
3010 	}
3011 
3012 	if (got_ts && src->scrub && dst->scrub &&
3013 	    (src->scrub->pfss_flags & PFSS_PAWS) &&
3014 	    (dst->scrub->pfss_flags & PFSS_PAWS)) {
3015 		/*
3016 		 * Validate that the timestamps are "in-window".
3017 		 * RFC1323 describes TCP Timestamp options that allow
3018 		 * measurement of RTT (round trip time) and PAWS
3019 		 * (protection against wrapped sequence numbers).  PAWS
3020 		 * gives us a set of rules for rejecting packets on
3021 		 * long fat pipes (packets that were somehow delayed
3022 		 * in transit longer than the time it took to send the
3023 		 * full TCP sequence space of 4Gb).  We can use these
3024 		 * rules and infer a few others that will let us treat
3025 		 * the 32bit timestamp and the 32bit echoed timestamp
3026 		 * as sequence numbers to prevent a blind attacker from
3027 		 * inserting packets into a connection.
3028 		 *
3029 		 * RFC1323 tells us:
3030 		 *  - The timestamp on this packet must be greater than
3031 		 *    or equal to the last value echoed by the other
3032 		 *    endpoint.  The RFC says those will be discarded
3033 		 *    since it is a dup that has already been acked.
3034 		 *    This gives us a lowerbound on the timestamp.
3035 		 *        timestamp >= other last echoed timestamp
3036 		 *  - The timestamp will be less than or equal to
3037 		 *    the last timestamp plus the time between the
3038 		 *    last packet and now.  The RFC defines the max
3039 		 *    clock rate as 1ms.  We will allow clocks to be
3040 		 *    up to 10% fast and will allow a total difference
3041 		 *    or 30 seconds due to a route change.  And this
3042 		 *    gives us an upperbound on the timestamp.
3043 		 *        timestamp <= last timestamp + max ticks
3044 		 *    We have to be careful here.  Windows will send an
3045 		 *    initial timestamp of zero and then initialize it
3046 		 *    to a random value after the 3whs; presumably to
3047 		 *    avoid a DoS by having to call an expensive RNG
3048 		 *    during a SYN flood.  Proof MS has at least one
3049 		 *    good security geek.
3050 		 *
3051 		 *  - The TCP timestamp option must also echo the other
3052 		 *    endpoints timestamp.  The timestamp echoed is the
3053 		 *    one carried on the earliest unacknowledged segment
3054 		 *    on the left edge of the sequence window.  The RFC
3055 		 *    states that the host will reject any echoed
3056 		 *    timestamps that were larger than any ever sent.
3057 		 *    This gives us an upperbound on the TS echo.
3058 		 *        tescr <= largest_tsval
3059 		 *  - The lowerbound on the TS echo is a little more
3060 		 *    tricky to determine.  The other endpoint's echoed
3061 		 *    values will not decrease.  But there may be
3062 		 *    network conditions that re-order packets and
3063 		 *    cause our view of them to decrease.  For now the
3064 		 *    only lowerbound we can safely determine is that
3065 		 *    the TS echo will never be less than the original
3066 		 *    TS.  XXX There is probably a better lowerbound.
3067 		 *    Remove TS_MAX_CONN with better lowerbound check.
3068 		 *        tescr >= other original TS
3069 		 *
3070 		 * It is also important to note that the fastest
3071 		 * timestamp clock of 1ms will wrap its 32bit space in
3072 		 * 24 days.  So we just disable TS checking after 24
3073 		 * days of idle time.  We actually must use a 12d
3074 		 * connection limit until we can come up with a better
3075 		 * lowerbound to the TS echo check.
3076 		 */
3077 		struct timeval delta_ts;
3078 		int ts_fudge;
3079 
3080 
3081 		/*
3082 		 * PFTM_TS_DIFF is how many seconds of leeway to allow
3083 		 * a host's timestamp.  This can happen if the previous
3084 		 * packet got delayed in transit for much longer than
3085 		 * this packet.
3086 		 */
3087 		if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0) {
3088 			ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF];
3089 		}
3090 
3091 
3092 		/* Calculate max ticks since the last timestamp */
3093 #define TS_MAXFREQ      1100            /* RFC max TS freq of 1Khz + 10% skew */
3094 #define TS_MICROSECS    1000000         /* microseconds per second */
3095 		timersub(&uptime, &src->scrub->pfss_last, &delta_ts);
3096 		tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ;
3097 		tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS / TS_MAXFREQ);
3098 
3099 
3100 		if ((src->state >= TCPS_ESTABLISHED &&
3101 		    dst->state >= TCPS_ESTABLISHED) &&
3102 		    (SEQ_LT(tsval, dst->scrub->pfss_tsecr) ||
3103 		    SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) ||
3104 		    (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) ||
3105 		    SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) {
3106 			/*
3107 			 * Bad RFC1323 implementation or an insertion attack.
3108 			 *
3109 			 * - Solaris 2.6 and 2.7 are known to send another ACK
3110 			 *   after the FIN,FIN|ACK,ACK closing that carries
3111 			 *   an old timestamp.
3112 			 */
3113 
3114 			DPFPRINTF(("Timestamp failed %c%c%c%c\n",
3115 			    SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ',
3116 			    SEQ_GT(tsval, src->scrub->pfss_tsval +
3117 			    tsval_from_last) ? '1' : ' ',
3118 			    SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ',
3119 			    SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' '));
3120 			DPFPRINTF((" tsval: %u  tsecr: %u  +ticks: %u  "
3121 			    "idle: %lus %ums\n",
3122 			    tsval, tsecr, tsval_from_last, delta_ts.tv_sec,
3123 			    delta_ts.tv_usec / 1000));
3124 			DPFPRINTF((" src->tsval: %u  tsecr: %u\n",
3125 			    src->scrub->pfss_tsval, src->scrub->pfss_tsecr));
3126 			DPFPRINTF((" dst->tsval: %u  tsecr: %u  tsval0: %u\n",
3127 			    dst->scrub->pfss_tsval, dst->scrub->pfss_tsecr,
3128 			    dst->scrub->pfss_tsval0));
3129 			if (pf_status.debug >= PF_DEBUG_MISC) {
3130 				pf_print_state(state);
3131 				pf_print_flags(th->th_flags);
3132 				printf("\n");
3133 			}
3134 			REASON_SET(reason, PFRES_TS);
3135 			return PF_DROP;
3136 		}
3137 
3138 		/* XXX I'd really like to require tsecr but it's optional */
3139 	} else if (!got_ts && (th->th_flags & TH_RST) == 0 &&
3140 	    ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED)
3141 	    || pd->p_len > 0 || (th->th_flags & TH_SYN)) &&
3142 	    src->scrub && dst->scrub &&
3143 	    (src->scrub->pfss_flags & PFSS_PAWS) &&
3144 	    (dst->scrub->pfss_flags & PFSS_PAWS)) {
3145 		/*
3146 		 * Didn't send a timestamp.  Timestamps aren't really useful
3147 		 * when:
3148 		 *  - connection opening or closing (often not even sent).
3149 		 *    but we must not let an attacker to put a FIN on a
3150 		 *    data packet to sneak it through our ESTABLISHED check.
3151 		 *  - on a TCP reset.  RFC suggests not even looking at TS.
3152 		 *  - on an empty ACK.  The TS will not be echoed so it will
3153 		 *    probably not help keep the RTT calculation in sync and
3154 		 *    there isn't as much danger when the sequence numbers
3155 		 *    got wrapped.  So some stacks don't include TS on empty
3156 		 *    ACKs :-(
3157 		 *
3158 		 * To minimize the disruption to mostly RFC1323 conformant
3159 		 * stacks, we will only require timestamps on data packets.
3160 		 *
3161 		 * And what do ya know, we cannot require timestamps on data
3162 		 * packets.  There appear to be devices that do legitimate
3163 		 * TCP connection hijacking.  There are HTTP devices that allow
3164 		 * a 3whs (with timestamps) and then buffer the HTTP request.
3165 		 * If the intermediate device has the HTTP response cache, it
3166 		 * will spoof the response but not bother timestamping its
3167 		 * packets.  So we can look for the presence of a timestamp in
3168 		 * the first data packet and if there, require it in all future
3169 		 * packets.
3170 		 */
3171 
3172 		if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) {
3173 			/*
3174 			 * Hey!  Someone tried to sneak a packet in.  Or the
3175 			 * stack changed its RFC1323 behavior?!?!
3176 			 */
3177 			if (pf_status.debug >= PF_DEBUG_MISC) {
3178 				DPFPRINTF(("Did not receive expected RFC1323 "
3179 				    "timestamp\n"));
3180 				pf_print_state(state);
3181 				pf_print_flags(th->th_flags);
3182 				printf("\n");
3183 			}
3184 			REASON_SET(reason, PFRES_TS);
3185 			return PF_DROP;
3186 		}
3187 	}
3188 
3189 
3190 	/*
3191 	 * We will note if a host sends his data packets with or without
3192 	 * timestamps.  And require all data packets to contain a timestamp
3193 	 * if the first does.  PAWS implicitly requires that all data packets be
3194 	 * timestamped.  But I think there are middle-man devices that hijack
3195 	 * TCP streams immediately after the 3whs and don't timestamp their
3196 	 * packets (seen in a WWW accelerator or cache).
3197 	 */
3198 	if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags &
3199 	    (PFSS_TIMESTAMP | PFSS_DATA_TS | PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) {
3200 		if (got_ts) {
3201 			src->scrub->pfss_flags |= PFSS_DATA_TS;
3202 		} else {
3203 			src->scrub->pfss_flags |= PFSS_DATA_NOTS;
3204 			if (pf_status.debug >= PF_DEBUG_MISC && dst->scrub &&
3205 			    (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) {
3206 				/* Don't warn if other host rejected RFC1323 */
3207 				DPFPRINTF(("Broken RFC1323 stack did not "
3208 				    "timestamp data packet. Disabled PAWS "
3209 				    "security.\n"));
3210 				pf_print_state(state);
3211 				pf_print_flags(th->th_flags);
3212 				printf("\n");
3213 			}
3214 		}
3215 	}
3216 
3217 
3218 	/*
3219 	 * Update PAWS values
3220 	 */
3221 	if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags &
3222 	    (PFSS_PAWS_IDLED | PFSS_TIMESTAMP))) {
3223 		getmicrouptime(&src->scrub->pfss_last);
3224 		if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) ||
3225 		    (src->scrub->pfss_flags & PFSS_PAWS) == 0) {
3226 			src->scrub->pfss_tsval = tsval;
3227 		}
3228 
3229 		if (tsecr) {
3230 			if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) ||
3231 			    (src->scrub->pfss_flags & PFSS_PAWS) == 0) {
3232 				src->scrub->pfss_tsecr = tsecr;
3233 			}
3234 
3235 			if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 &&
3236 			    (SEQ_LT(tsval, src->scrub->pfss_tsval0) ||
3237 			    src->scrub->pfss_tsval0 == 0)) {
3238 				/* tsval0 MUST be the lowest timestamp */
3239 				src->scrub->pfss_tsval0 = tsval;
3240 			}
3241 
3242 			/* Only fully initialized after a TS gets echoed */
3243 			if ((src->scrub->pfss_flags & PFSS_PAWS) == 0) {
3244 				src->scrub->pfss_flags |= PFSS_PAWS;
3245 			}
3246 		}
3247 	}
3248 
3249 	/* I have a dream....  TCP segment reassembly.... */
3250 	return 0;
3251 }
3252 
3253 static __attribute__((noinline)) int
pf_normalize_tcpopt(struct pf_rule * r,int dir,struct pfi_kif * kif,struct pf_pdesc * pd,pbuf_t * pbuf,struct tcphdr * th,int off,int * rewrptr)3254 pf_normalize_tcpopt(struct pf_rule *r, int dir, struct pfi_kif *kif,
3255     struct pf_pdesc *pd, pbuf_t *pbuf, struct tcphdr *th, int off,
3256     int *rewrptr)
3257 {
3258 #pragma unused(dir, kif)
3259 	sa_family_t af = pd->af;
3260 	u_int16_t       *mss;
3261 	int             thoff;
3262 	int             opt, cnt, optlen = 0;
3263 	int             rewrite = 0;
3264 	u_char          opts[MAX_TCPOPTLEN];
3265 	u_char          *optp = opts;
3266 
3267 	thoff = th->th_off << 2;
3268 	cnt = thoff - sizeof(struct tcphdr);
3269 
3270 	if (cnt > 0 && !pf_pull_hdr(pbuf, off + sizeof(*th), opts, sizeof(opts), cnt,
3271 	    NULL, NULL, af)) {
3272 		return PF_DROP;
3273 	}
3274 
3275 	for (; cnt > 0; cnt -= optlen, optp += optlen) {
3276 		opt = optp[0];
3277 		if (opt == TCPOPT_EOL) {
3278 			break;
3279 		}
3280 		if (opt == TCPOPT_NOP) {
3281 			optlen = 1;
3282 		} else {
3283 			if (cnt < 2) {
3284 				break;
3285 			}
3286 			optlen = optp[1];
3287 			if (optlen < 2 || optlen > cnt) {
3288 				break;
3289 			}
3290 		}
3291 		switch (opt) {
3292 		case TCPOPT_MAXSEG:
3293 			mss = (u_int16_t *)(void *)(optp + 2);
3294 			if ((ntohs(*mss)) > r->max_mss) {
3295 				/*
3296 				 * <[email protected]>
3297 				 *  Only do the TCP checksum fixup if delayed
3298 				 * checksum calculation will not be performed.
3299 				 */
3300 				if (pbuf->pb_ifp ||
3301 				    !(*pbuf->pb_csum_flags & CSUM_TCP)) {
3302 					th->th_sum = pf_cksum_fixup(th->th_sum,
3303 					    *mss, htons(r->max_mss), 0);
3304 				}
3305 				*mss = htons(r->max_mss);
3306 				rewrite = 1;
3307 			}
3308 			break;
3309 		default:
3310 			break;
3311 		}
3312 	}
3313 
3314 	if (rewrite) {
3315 		u_short reason;
3316 
3317 		VERIFY(pbuf == pd->mp);
3318 
3319 		if (pf_lazy_makewritable(pd, pd->mp,
3320 		    off + sizeof(*th) + thoff) == NULL) {
3321 			REASON_SET(&reason, PFRES_MEMORY);
3322 			if (r->log) {
3323 				PFLOG_PACKET(kif, h, pbuf, AF_INET, dir, reason,
3324 				    r, 0, 0, pd);
3325 			}
3326 			return PF_DROP;
3327 		}
3328 
3329 		*rewrptr = 1;
3330 		pbuf_copy_back(pd->mp, off + sizeof(*th), thoff - sizeof(*th), opts, sizeof(opts));
3331 	}
3332 
3333 	return PF_PASS;
3334 }
3335