xref: /xnu-8020.121.3/bsd/net/pf.c (revision fdd8201d7b966f0c3ea610489d29bd841d358941) !
1 /*
2  * Copyright (c) 2007-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*	$apfw: git commit 6602420f2f101b74305cd78f7cd9e0c8fdedae97 $ */
30 /*	$OpenBSD: pf.c,v 1.567 2008/02/20 23:40:13 henning Exp $ */
31 
32 /*
33  * Copyright (c) 2001 Daniel Hartmeier
34  * Copyright (c) 2002 - 2013 Henning Brauer
35  * NAT64 - Copyright (c) 2010 Viagenie Inc. (http://www.viagenie.ca)
36  * All rights reserved.
37  *
38  * Redistribution and use in source and binary forms, with or without
39  * modification, are permitted provided that the following conditions
40  * are met:
41  *
42  *    - Redistributions of source code must retain the above copyright
43  *      notice, this list of conditions and the following disclaimer.
44  *    - Redistributions in binary form must reproduce the above
45  *      copyright notice, this list of conditions and the following
46  *      disclaimer in the documentation and/or other materials provided
47  *      with the distribution.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
50  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
51  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
52  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
53  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
54  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
55  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
56  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
57  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
59  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
60  * POSSIBILITY OF SUCH DAMAGE.
61  *
62  * Effort sponsored in part by the Defense Advanced Research Projects
63  * Agency (DARPA) and Air Force Research Laboratory, Air Force
64  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
65  *
66  */
67 
68 #include <machine/endian.h>
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/filio.h>
72 #include <sys/socket.h>
73 #include <sys/socketvar.h>
74 #include <sys/kernel.h>
75 #include <sys/time.h>
76 #include <sys/proc.h>
77 #include <sys/random.h>
78 #include <sys/mcache.h>
79 #include <sys/protosw.h>
80 
81 #include <libkern/crypto/md5.h>
82 #include <libkern/libkern.h>
83 
84 #include <mach/thread_act.h>
85 
86 #include <net/if.h>
87 #include <net/if_types.h>
88 #include <net/bpf.h>
89 #include <net/route.h>
90 #include <net/dlil.h>
91 
92 #include <netinet/in.h>
93 #include <netinet/in_var.h>
94 #include <netinet/in_systm.h>
95 #include <netinet/ip.h>
96 #include <netinet/ip_var.h>
97 #include <netinet/tcp.h>
98 #include <netinet/tcp_seq.h>
99 #include <netinet/udp.h>
100 #include <netinet/ip_icmp.h>
101 #include <netinet/in_pcb.h>
102 #include <netinet/tcp_timer.h>
103 #include <netinet/tcp_var.h>
104 #include <netinet/tcp_fsm.h>
105 #include <netinet/udp_var.h>
106 #include <netinet/icmp_var.h>
107 #include <net/if_ether.h>
108 #include <net/ethernet.h>
109 #include <net/flowhash.h>
110 #include <net/nat464_utils.h>
111 #include <net/pfvar.h>
112 #include <net/if_pflog.h>
113 
114 #if NPFSYNC
115 #include <net/if_pfsync.h>
116 #endif /* NPFSYNC */
117 
118 #include <netinet/ip6.h>
119 #include <netinet6/in6_pcb.h>
120 #include <netinet6/ip6_var.h>
121 #include <netinet/icmp6.h>
122 #include <netinet6/nd6.h>
123 
124 #if DUMMYNET
125 #include <netinet/ip_dummynet.h>
126 #endif /* DUMMYNET */
127 
128 /*
129  * For RandomULong(), to get a 32 bits random value
130  * Note that random() returns a 31 bits value, see rdar://11159750
131  */
132 #include <dev/random/randomdev.h>
133 
134 #define DPFPRINTF(n, x) (pf_status.debug >= (n) ? printf x : ((void)0))
135 
136 /*
137  * On Mac OS X, the rtableid value is treated as the interface scope
138  * value that is equivalent to the interface index used for scoped
139  * routing.  A valid scope value is anything but IFSCOPE_NONE (0),
140  * as per definition of ifindex which is a positive, non-zero number.
141  * The other BSDs treat a negative rtableid value as invalid, hence
142  * the test against INT_MAX to handle userland apps which initialize
143  * the field with a negative number.
144  */
145 #define PF_RTABLEID_IS_VALID(r) \
146 	((r) > IFSCOPE_NONE && (r) <= INT_MAX)
147 
148 /*
149  * Global variables
150  */
151 static LCK_GRP_DECLARE(pf_lock_grp, "pf");
152 LCK_MTX_DECLARE(pf_lock, &pf_lock_grp);
153 
154 static LCK_GRP_DECLARE(pf_perim_lock_grp, "pf_perim");
155 LCK_RW_DECLARE(pf_perim_lock, &pf_perim_lock_grp);
156 
157 /* state tables */
158 struct pf_state_tree_lan_ext     pf_statetbl_lan_ext;
159 struct pf_state_tree_ext_gwy     pf_statetbl_ext_gwy;
160 
161 struct pf_palist         pf_pabuf;
162 struct pf_status         pf_status;
163 
164 u_int32_t                ticket_pabuf;
165 
166 static MD5_CTX           pf_tcp_secret_ctx;
167 static u_char            pf_tcp_secret[16];
168 static int               pf_tcp_secret_init;
169 static int               pf_tcp_iss_off;
170 
171 static struct pf_anchor_stackframe {
172 	struct pf_ruleset                       *rs;
173 	struct pf_rule                          *r;
174 	struct pf_anchor_node                   *parent;
175 	struct pf_anchor                        *child;
176 } pf_anchor_stack[64];
177 
178 struct pool              pf_src_tree_pl, pf_rule_pl, pf_pooladdr_pl;
179 struct pool              pf_state_pl, pf_state_key_pl;
180 
181 typedef void (*hook_fn_t)(void *);
182 
183 struct hook_desc {
184 	TAILQ_ENTRY(hook_desc) hd_list;
185 	hook_fn_t hd_fn;
186 	void *hd_arg;
187 };
188 
189 #define HOOK_REMOVE     0x01
190 #define HOOK_FREE       0x02
191 #define HOOK_ABORT      0x04
192 
193 static void             *hook_establish(struct hook_desc_head *, int,
194     hook_fn_t, void *);
195 static void             hook_runloop(struct hook_desc_head *, int flags);
196 
197 struct pool              pf_app_state_pl;
198 static void              pf_print_addr(struct pf_addr *addr, sa_family_t af);
199 static void              pf_print_sk_host(struct pf_state_host *, u_int8_t, int,
200     u_int8_t);
201 
202 static void              pf_print_host(struct pf_addr *, u_int16_t, u_int8_t);
203 
204 static void              pf_init_threshold(struct pf_threshold *, u_int32_t,
205     u_int32_t);
206 static void              pf_add_threshold(struct pf_threshold *);
207 static int               pf_check_threshold(struct pf_threshold *);
208 
209 static void              pf_change_ap(int, pbuf_t *, struct pf_addr *,
210     u_int16_t *, u_int16_t *, u_int16_t *,
211     struct pf_addr *, u_int16_t, u_int8_t, sa_family_t,
212     sa_family_t, int);
213 static int               pf_modulate_sack(pbuf_t *, int, struct pf_pdesc *,
214     struct tcphdr *, struct pf_state_peer *);
215 static void              pf_change_a6(struct pf_addr *, u_int16_t *,
216     struct pf_addr *, u_int8_t);
217 static void pf_change_addr(struct pf_addr *a, u_int16_t *c, struct pf_addr *an,
218     u_int8_t u, sa_family_t af, sa_family_t afn);
219 static void              pf_change_icmp(struct pf_addr *, u_int16_t *,
220     struct pf_addr *, struct pf_addr *, u_int16_t,
221     u_int16_t *, u_int16_t *, u_int16_t *,
222     u_int16_t *, u_int8_t, sa_family_t);
223 static void              pf_send_tcp(const struct pf_rule *, sa_family_t,
224     const struct pf_addr *, const struct pf_addr *,
225     u_int16_t, u_int16_t, u_int32_t, u_int32_t,
226     u_int8_t, u_int16_t, u_int16_t, u_int8_t, int,
227     u_int16_t, struct ether_header *, struct ifnet *);
228 static void              pf_send_icmp(pbuf_t *, u_int8_t, u_int8_t,
229     sa_family_t, struct pf_rule *);
230 static struct pf_rule   *pf_match_translation(struct pf_pdesc *, pbuf_t *,
231     int, int, struct pfi_kif *, struct pf_addr *,
232     union pf_state_xport *, struct pf_addr *,
233     union pf_state_xport *, int);
234 static struct pf_rule   *pf_get_translation_aux(struct pf_pdesc *,
235     pbuf_t *, int, int, struct pfi_kif *,
236     struct pf_src_node **, struct pf_addr *,
237     union pf_state_xport *, struct pf_addr *,
238     union pf_state_xport *, union pf_state_xport *
239 #if SKYWALK
240     , netns_token *
241 #endif
242     );
243 static void              pf_attach_state(struct pf_state_key *,
244     struct pf_state *, int);
245 static void              pf_detach_state(struct pf_state *, int);
246 static u_int32_t         pf_tcp_iss(struct pf_pdesc *);
247 static int               pf_test_rule(struct pf_rule **, struct pf_state **,
248     int, struct pfi_kif *, pbuf_t *, int,
249     void *, struct pf_pdesc *, struct pf_rule **,
250     struct pf_ruleset **, struct ifqueue *);
251 #if DUMMYNET
252 static int               pf_test_dummynet(struct pf_rule **, int,
253     struct pfi_kif *, pbuf_t **,
254     struct pf_pdesc *, struct ip_fw_args *);
255 #endif /* DUMMYNET */
256 static int               pf_test_fragment(struct pf_rule **, int,
257     struct pfi_kif *, pbuf_t *, void *,
258     struct pf_pdesc *, struct pf_rule **,
259     struct pf_ruleset **);
260 static int               pf_test_state_tcp(struct pf_state **, int,
261     struct pfi_kif *, pbuf_t *, int,
262     void *, struct pf_pdesc *, u_short *);
263 static int               pf_test_state_udp(struct pf_state **, int,
264     struct pfi_kif *, pbuf_t *, int,
265     void *, struct pf_pdesc *, u_short *);
266 static int               pf_test_state_icmp(struct pf_state **, int,
267     struct pfi_kif *, pbuf_t *, int,
268     void *, struct pf_pdesc *, u_short *);
269 static int               pf_test_state_other(struct pf_state **, int,
270     struct pfi_kif *, struct pf_pdesc *);
271 static int               pf_match_tag(struct pf_rule *,
272     struct pf_mtag *, int *);
273 static void              pf_hash(struct pf_addr *, struct pf_addr *,
274     struct pf_poolhashkey *, sa_family_t);
275 static int               pf_map_addr(u_int8_t, struct pf_rule *,
276     struct pf_addr *, struct pf_addr *,
277     struct pf_addr *, struct pf_src_node **);
278 static int               pf_get_sport(struct pf_pdesc *, struct pfi_kif *,
279     struct pf_rule *, struct pf_addr *,
280     union pf_state_xport *, struct pf_addr *,
281     union pf_state_xport *, struct pf_addr *,
282     union pf_state_xport *, struct pf_src_node **
283 #if SKYWALK
284     , netns_token *
285 #endif
286     );
287 static void              pf_route(pbuf_t **, struct pf_rule *, int,
288     struct ifnet *, struct pf_state *,
289     struct pf_pdesc *);
290 static void              pf_route6(pbuf_t **, struct pf_rule *, int,
291     struct ifnet *, struct pf_state *,
292     struct pf_pdesc *);
293 static u_int8_t          pf_get_wscale(pbuf_t *, int, u_int16_t,
294     sa_family_t);
295 static u_int16_t         pf_get_mss(pbuf_t *, int, u_int16_t,
296     sa_family_t);
297 static u_int16_t         pf_calc_mss(struct pf_addr *, sa_family_t,
298     u_int16_t);
299 static void              pf_set_rt_ifp(struct pf_state *,
300     struct pf_addr *, sa_family_t af);
301 static int               pf_check_proto_cksum(pbuf_t *, int, int,
302     u_int8_t, sa_family_t);
303 static int               pf_addr_wrap_neq(struct pf_addr_wrap *,
304     struct pf_addr_wrap *);
305 static struct pf_state  *pf_find_state(struct pfi_kif *,
306     struct pf_state_key_cmp *, u_int);
307 static int               pf_src_connlimit(struct pf_state **);
308 static void              pf_stateins_err(const char *, struct pf_state *,
309     struct pfi_kif *);
310 static int               pf_check_congestion(struct ifqueue *);
311 
312 #if 0
313 static const char *pf_pptp_ctrl_type_name(u_int16_t code);
314 #endif
315 static void             pf_pptp_handler(struct pf_state *, int, int,
316     struct pf_pdesc *, struct pfi_kif *);
317 static void             pf_pptp_unlink(struct pf_state *);
318 static void             pf_grev1_unlink(struct pf_state *);
319 static int              pf_test_state_grev1(struct pf_state **, int,
320     struct pfi_kif *, int, struct pf_pdesc *);
321 static int              pf_ike_compare(struct pf_app_state *,
322     struct pf_app_state *);
323 static int              pf_test_state_esp(struct pf_state **, int,
324     struct pfi_kif *, int, struct pf_pdesc *);
325 static int pf_test6(int, struct ifnet *, pbuf_t **, struct ether_header *,
326     struct ip_fw_args *);
327 #if INET
328 static int pf_test(int, struct ifnet *, pbuf_t **,
329     struct ether_header *, struct ip_fw_args *);
330 #endif /* INET */
331 
332 
333 extern struct pool pfr_ktable_pl;
334 extern struct pool pfr_kentry_pl;
335 extern int path_mtu_discovery;
336 
337 struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = {
338 	{ .pp = &pf_state_pl, .limit = PFSTATE_HIWAT },
339 	{ .pp = &pf_app_state_pl, .limit = PFAPPSTATE_HIWAT },
340 	{ .pp = &pf_src_tree_pl, .limit = PFSNODE_HIWAT },
341 	{ .pp = &pf_frent_pl, .limit = PFFRAG_FRENT_HIWAT },
342 	{ .pp = &pfr_ktable_pl, .limit = PFR_KTABLE_HIWAT },
343 	{ .pp = &pfr_kentry_pl, .limit = PFR_KENTRY_HIWAT },
344 };
345 
346 #if defined(SKYWALK) && defined(XNU_TARGET_OS_OSX)
347 const char *compatible_anchors[] = {
348 	"com.apple.internet-sharing",
349 	"com.apple/250.ApplicationFirewall",
350 	"com.apple/200.AirDrop"
351 };
352 #endif // SKYWALK && defined(XNU_TARGET_OS_OSX)
353 
354 void *
pf_lazy_makewritable(struct pf_pdesc * pd,pbuf_t * pbuf,int len)355 pf_lazy_makewritable(struct pf_pdesc *pd, pbuf_t *pbuf, int len)
356 {
357 	void *p;
358 
359 	if (pd->lmw < 0) {
360 		return NULL;
361 	}
362 
363 	VERIFY(pbuf == pd->mp);
364 
365 	p = pbuf->pb_data;
366 	if (len > pd->lmw) {
367 		if ((p = pbuf_ensure_writable(pbuf, len)) == NULL) {
368 			len = -1;
369 		}
370 		pd->lmw = len;
371 		if (len >= 0) {
372 			pd->pf_mtag = pf_find_mtag_pbuf(pbuf);
373 
374 			switch (pd->af) {
375 			case AF_INET: {
376 				struct ip *h = p;
377 				pd->src = (struct pf_addr *)(uintptr_t)&h->ip_src;
378 				pd->dst = (struct pf_addr *)(uintptr_t)&h->ip_dst;
379 				pd->ip_sum = &h->ip_sum;
380 				break;
381 			}
382 			case AF_INET6: {
383 				struct ip6_hdr *h = p;
384 				pd->src = (struct pf_addr *)(uintptr_t)&h->ip6_src;
385 				pd->dst = (struct pf_addr *)(uintptr_t)&h->ip6_dst;
386 				break;
387 			}
388 			}
389 		}
390 	}
391 
392 	return len < 0 ? NULL : p;
393 }
394 
395 static const int *
pf_state_lookup_aux(struct pf_state ** state,struct pfi_kif * kif,int direction,int * action)396 pf_state_lookup_aux(struct pf_state **state, struct pfi_kif *kif,
397     int direction, int *action)
398 {
399 	if (*state == NULL || (*state)->timeout == PFTM_PURGE) {
400 		*action = PF_DROP;
401 		return action;
402 	}
403 
404 	if (direction == PF_OUT &&
405 	    (((*state)->rule.ptr->rt == PF_ROUTETO &&
406 	    (*state)->rule.ptr->direction == PF_OUT) ||
407 	    ((*state)->rule.ptr->rt == PF_REPLYTO &&
408 	    (*state)->rule.ptr->direction == PF_IN)) &&
409 	    (*state)->rt_kif != NULL && (*state)->rt_kif != kif) {
410 		*action = PF_PASS;
411 		return action;
412 	}
413 
414 	return 0;
415 }
416 
417 #define STATE_LOOKUP()                                                   \
418 	do {                                                             \
419 	        int action;                                              \
420 	        *state = pf_find_state(kif, &key, direction);            \
421 	        if (*state != NULL && pd != NULL &&                      \
422 	            !(pd->pktflags & PKTF_FLOW_ID)) {                    \
423 	                pd->flowsrc = (*state)->state_key->flowsrc;      \
424 	                pd->flowhash = (*state)->state_key->flowhash;    \
425 	                if (pd->flowhash != 0) {                         \
426 	                        pd->pktflags |= PKTF_FLOW_ID;            \
427 	                        pd->pktflags &= ~PKTF_FLOW_ADV;          \
428 	                }                                                \
429 	        }                                                        \
430 	        if (pf_state_lookup_aux(state, kif, direction, &action)) \
431 	                return (action);                                 \
432 	} while (0)
433 
434 #define STATE_ADDR_TRANSLATE(sk)                                        \
435 	(sk)->lan.addr.addr32[0] != (sk)->gwy.addr.addr32[0] ||         \
436 	((sk)->af_lan == AF_INET6 &&                                    \
437 	((sk)->lan.addr.addr32[1] != (sk)->gwy.addr.addr32[1] ||        \
438 	(sk)->lan.addr.addr32[2] != (sk)->gwy.addr.addr32[2] ||         \
439 	(sk)->lan.addr.addr32[3] != (sk)->gwy.addr.addr32[3]))
440 
441 #define STATE_TRANSLATE(sk)                                             \
442 	((sk)->af_lan != (sk)->af_gwy ||                                \
443 	STATE_ADDR_TRANSLATE(sk) ||                                     \
444 	(sk)->lan.xport.port != (sk)->gwy.xport.port)
445 
446 #define STATE_GRE_TRANSLATE(sk)                                         \
447 	(STATE_ADDR_TRANSLATE(sk) ||                                    \
448 	(sk)->lan.xport.call_id != (sk)->gwy.xport.call_id)
449 
450 #define BOUND_IFACE(r, k) \
451 	((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all
452 
453 #define STATE_INC_COUNTERS(s)                                   \
454 	do {                                                    \
455 	        s->rule.ptr->states++;                          \
456 	        VERIFY(s->rule.ptr->states != 0);               \
457 	        if (s->anchor.ptr != NULL) {                    \
458 	                s->anchor.ptr->states++;                \
459 	                VERIFY(s->anchor.ptr->states != 0);     \
460 	        }                                               \
461 	        if (s->nat_rule.ptr != NULL) {                  \
462 	                s->nat_rule.ptr->states++;              \
463 	                VERIFY(s->nat_rule.ptr->states != 0);   \
464 	        }                                               \
465 	} while (0)
466 
467 #define STATE_DEC_COUNTERS(s)                                   \
468 	do {                                                    \
469 	        if (s->nat_rule.ptr != NULL) {                  \
470 	                VERIFY(s->nat_rule.ptr->states > 0);    \
471 	                s->nat_rule.ptr->states--;              \
472 	        }                                               \
473 	        if (s->anchor.ptr != NULL) {                    \
474 	                VERIFY(s->anchor.ptr->states > 0);      \
475 	                s->anchor.ptr->states--;                \
476 	        }                                               \
477 	        VERIFY(s->rule.ptr->states > 0);                \
478 	        s->rule.ptr->states--;                          \
479 	} while (0)
480 
481 static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *);
482 static __inline int pf_state_compare_lan_ext(struct pf_state_key *,
483     struct pf_state_key *);
484 static __inline int pf_state_compare_ext_gwy(struct pf_state_key *,
485     struct pf_state_key *);
486 static __inline int pf_state_compare_id(struct pf_state *,
487     struct pf_state *);
488 
489 struct pf_src_tree tree_src_tracking;
490 
491 struct pf_state_tree_id tree_id;
492 struct pf_state_queue state_list;
493 
494 RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare);
495 RB_GENERATE(pf_state_tree_lan_ext, pf_state_key,
496     entry_lan_ext, pf_state_compare_lan_ext);
497 RB_GENERATE(pf_state_tree_ext_gwy, pf_state_key,
498     entry_ext_gwy, pf_state_compare_ext_gwy);
499 RB_GENERATE(pf_state_tree_id, pf_state,
500     entry_id, pf_state_compare_id);
501 
502 #define PF_DT_SKIP_LANEXT       0x01
503 #define PF_DT_SKIP_EXTGWY       0x02
504 
505 static const u_int16_t PF_PPTP_PORT = 1723;
506 static const u_int32_t PF_PPTP_MAGIC_NUMBER = 0x1A2B3C4D;
507 
508 struct pf_pptp_hdr {
509 	u_int16_t       length;
510 	u_int16_t       type;
511 	u_int32_t       magic;
512 };
513 
514 struct pf_pptp_ctrl_hdr {
515 	u_int16_t       type;
516 	u_int16_t       reserved_0;
517 };
518 
519 struct pf_pptp_ctrl_generic {
520 	u_int16_t       data[0];
521 };
522 
523 #define PF_PPTP_CTRL_TYPE_START_REQ     1
524 struct pf_pptp_ctrl_start_req {
525 	u_int16_t       protocol_version;
526 	u_int16_t       reserved_1;
527 	u_int32_t       framing_capabilities;
528 	u_int32_t       bearer_capabilities;
529 	u_int16_t       maximum_channels;
530 	u_int16_t       firmware_revision;
531 	u_int8_t        host_name[64];
532 	u_int8_t        vendor_string[64];
533 };
534 
535 #define PF_PPTP_CTRL_TYPE_START_RPY     2
536 struct pf_pptp_ctrl_start_rpy {
537 	u_int16_t       protocol_version;
538 	u_int8_t        result_code;
539 	u_int8_t        error_code;
540 	u_int32_t       framing_capabilities;
541 	u_int32_t       bearer_capabilities;
542 	u_int16_t       maximum_channels;
543 	u_int16_t       firmware_revision;
544 	u_int8_t        host_name[64];
545 	u_int8_t        vendor_string[64];
546 };
547 
548 #define PF_PPTP_CTRL_TYPE_STOP_REQ      3
549 struct pf_pptp_ctrl_stop_req {
550 	u_int8_t        reason;
551 	u_int8_t        reserved_1;
552 	u_int16_t       reserved_2;
553 };
554 
555 #define PF_PPTP_CTRL_TYPE_STOP_RPY      4
556 struct pf_pptp_ctrl_stop_rpy {
557 	u_int8_t        reason;
558 	u_int8_t        error_code;
559 	u_int16_t       reserved_1;
560 };
561 
562 #define PF_PPTP_CTRL_TYPE_ECHO_REQ      5
563 struct pf_pptp_ctrl_echo_req {
564 	u_int32_t       identifier;
565 };
566 
567 #define PF_PPTP_CTRL_TYPE_ECHO_RPY      6
568 struct pf_pptp_ctrl_echo_rpy {
569 	u_int32_t       identifier;
570 	u_int8_t        result_code;
571 	u_int8_t        error_code;
572 	u_int16_t       reserved_1;
573 };
574 
575 #define PF_PPTP_CTRL_TYPE_CALL_OUT_REQ  7
576 struct pf_pptp_ctrl_call_out_req {
577 	u_int16_t       call_id;
578 	u_int16_t       call_sernum;
579 	u_int32_t       min_bps;
580 	u_int32_t       bearer_type;
581 	u_int32_t       framing_type;
582 	u_int16_t       rxwindow_size;
583 	u_int16_t       proc_delay;
584 	u_int8_t        phone_num[64];
585 	u_int8_t        sub_addr[64];
586 };
587 
588 #define PF_PPTP_CTRL_TYPE_CALL_OUT_RPY  8
589 struct pf_pptp_ctrl_call_out_rpy {
590 	u_int16_t       call_id;
591 	u_int16_t       peer_call_id;
592 	u_int8_t        result_code;
593 	u_int8_t        error_code;
594 	u_int16_t       cause_code;
595 	u_int32_t       connect_speed;
596 	u_int16_t       rxwindow_size;
597 	u_int16_t       proc_delay;
598 	u_int32_t       phy_channel_id;
599 };
600 
601 #define PF_PPTP_CTRL_TYPE_CALL_IN_1ST   9
602 struct pf_pptp_ctrl_call_in_1st {
603 	u_int16_t       call_id;
604 	u_int16_t       call_sernum;
605 	u_int32_t       bearer_type;
606 	u_int32_t       phy_channel_id;
607 	u_int16_t       dialed_number_len;
608 	u_int16_t       dialing_number_len;
609 	u_int8_t        dialed_num[64];
610 	u_int8_t        dialing_num[64];
611 	u_int8_t        sub_addr[64];
612 };
613 
614 #define PF_PPTP_CTRL_TYPE_CALL_IN_2ND   10
615 struct pf_pptp_ctrl_call_in_2nd {
616 	u_int16_t       call_id;
617 	u_int16_t       peer_call_id;
618 	u_int8_t        result_code;
619 	u_int8_t        error_code;
620 	u_int16_t       rxwindow_size;
621 	u_int16_t       txdelay;
622 	u_int16_t       reserved_1;
623 };
624 
625 #define PF_PPTP_CTRL_TYPE_CALL_IN_3RD   11
626 struct pf_pptp_ctrl_call_in_3rd {
627 	u_int16_t       call_id;
628 	u_int16_t       reserved_1;
629 	u_int32_t       connect_speed;
630 	u_int16_t       rxwindow_size;
631 	u_int16_t       txdelay;
632 	u_int32_t       framing_type;
633 };
634 
635 #define PF_PPTP_CTRL_TYPE_CALL_CLR      12
636 struct pf_pptp_ctrl_call_clr {
637 	u_int16_t       call_id;
638 	u_int16_t       reserved_1;
639 };
640 
641 #define PF_PPTP_CTRL_TYPE_CALL_DISC     13
642 struct pf_pptp_ctrl_call_disc {
643 	u_int16_t       call_id;
644 	u_int8_t        result_code;
645 	u_int8_t        error_code;
646 	u_int16_t       cause_code;
647 	u_int16_t       reserved_1;
648 	u_int8_t        statistics[128];
649 };
650 
651 #define PF_PPTP_CTRL_TYPE_ERROR 14
652 struct pf_pptp_ctrl_error {
653 	u_int16_t       peer_call_id;
654 	u_int16_t       reserved_1;
655 	u_int32_t       crc_errors;
656 	u_int32_t       fr_errors;
657 	u_int32_t       hw_errors;
658 	u_int32_t       buf_errors;
659 	u_int32_t       tim_errors;
660 	u_int32_t       align_errors;
661 };
662 
663 #define PF_PPTP_CTRL_TYPE_SET_LINKINFO  15
664 struct pf_pptp_ctrl_set_linkinfo {
665 	u_int16_t       peer_call_id;
666 	u_int16_t       reserved_1;
667 	u_int32_t       tx_accm;
668 	u_int32_t       rx_accm;
669 };
670 
671 static const size_t PF_PPTP_CTRL_MSG_MINSIZE =
672     sizeof(struct pf_pptp_hdr) + sizeof(struct pf_pptp_ctrl_hdr);
673 
674 union pf_pptp_ctrl_msg_union {
675 	struct pf_pptp_ctrl_start_req           start_req;
676 	struct pf_pptp_ctrl_start_rpy           start_rpy;
677 	struct pf_pptp_ctrl_stop_req            stop_req;
678 	struct pf_pptp_ctrl_stop_rpy            stop_rpy;
679 	struct pf_pptp_ctrl_echo_req            echo_req;
680 	struct pf_pptp_ctrl_echo_rpy            echo_rpy;
681 	struct pf_pptp_ctrl_call_out_req        call_out_req;
682 	struct pf_pptp_ctrl_call_out_rpy        call_out_rpy;
683 	struct pf_pptp_ctrl_call_in_1st         call_in_1st;
684 	struct pf_pptp_ctrl_call_in_2nd         call_in_2nd;
685 	struct pf_pptp_ctrl_call_in_3rd         call_in_3rd;
686 	struct pf_pptp_ctrl_call_clr            call_clr;
687 	struct pf_pptp_ctrl_call_disc           call_disc;
688 	struct pf_pptp_ctrl_error                       error;
689 	struct pf_pptp_ctrl_set_linkinfo        set_linkinfo;
690 	u_int8_t                                                        data[0];
691 };
692 
693 struct pf_pptp_ctrl_msg {
694 	struct pf_pptp_hdr                              hdr;
695 	struct pf_pptp_ctrl_hdr                 ctrl;
696 	union pf_pptp_ctrl_msg_union    msg;
697 };
698 
699 #define PF_GRE_FLAG_CHECKSUM_PRESENT    0x8000
700 #define PF_GRE_FLAG_VERSION_MASK                0x0007
701 #define PF_GRE_PPP_ETHERTYPE                    0x880B
702 
703 struct pf_grev1_hdr {
704 	u_int16_t flags;
705 	u_int16_t protocol_type;
706 	u_int16_t payload_length;
707 	u_int16_t call_id;
708 	/*
709 	 *  u_int32_t seqno;
710 	 *  u_int32_t ackno;
711 	 */
712 };
713 
714 static const u_int16_t PF_IKE_PORT = 500;
715 
716 struct pf_ike_hdr {
717 	u_int64_t initiator_cookie, responder_cookie;
718 	u_int8_t next_payload, version, exchange_type, flags;
719 	u_int32_t message_id, length;
720 };
721 
722 #define PF_IKE_PACKET_MINSIZE   (sizeof (struct pf_ike_hdr))
723 
724 #define PF_IKEv1_EXCHTYPE_BASE                           1
725 #define PF_IKEv1_EXCHTYPE_ID_PROTECT             2
726 #define PF_IKEv1_EXCHTYPE_AUTH_ONLY                      3
727 #define PF_IKEv1_EXCHTYPE_AGGRESSIVE             4
728 #define PF_IKEv1_EXCHTYPE_INFORMATIONAL          5
729 #define PF_IKEv2_EXCHTYPE_SA_INIT                       34
730 #define PF_IKEv2_EXCHTYPE_AUTH                          35
731 #define PF_IKEv2_EXCHTYPE_CREATE_CHILD_SA       36
732 #define PF_IKEv2_EXCHTYPE_INFORMATIONAL         37
733 
734 #define PF_IKEv1_FLAG_E         0x01
735 #define PF_IKEv1_FLAG_C         0x02
736 #define PF_IKEv1_FLAG_A         0x04
737 #define PF_IKEv2_FLAG_I         0x08
738 #define PF_IKEv2_FLAG_V         0x10
739 #define PF_IKEv2_FLAG_R         0x20
740 
741 struct pf_esp_hdr {
742 	u_int32_t spi;
743 	u_int32_t seqno;
744 	u_int8_t payload[];
745 };
746 
747 static __inline int
pf_addr_compare(struct pf_addr * a,struct pf_addr * b,sa_family_t af)748 pf_addr_compare(struct pf_addr *a, struct pf_addr *b, sa_family_t af)
749 {
750 	switch (af) {
751 #ifdef INET
752 	case AF_INET:
753 		if (a->addr32[0] > b->addr32[0]) {
754 			return 1;
755 		}
756 		if (a->addr32[0] < b->addr32[0]) {
757 			return -1;
758 		}
759 		break;
760 #endif /* INET */
761 	case AF_INET6:
762 		if (a->addr32[3] > b->addr32[3]) {
763 			return 1;
764 		}
765 		if (a->addr32[3] < b->addr32[3]) {
766 			return -1;
767 		}
768 		if (a->addr32[2] > b->addr32[2]) {
769 			return 1;
770 		}
771 		if (a->addr32[2] < b->addr32[2]) {
772 			return -1;
773 		}
774 		if (a->addr32[1] > b->addr32[1]) {
775 			return 1;
776 		}
777 		if (a->addr32[1] < b->addr32[1]) {
778 			return -1;
779 		}
780 		if (a->addr32[0] > b->addr32[0]) {
781 			return 1;
782 		}
783 		if (a->addr32[0] < b->addr32[0]) {
784 			return -1;
785 		}
786 		break;
787 	}
788 	return 0;
789 }
790 
791 static __inline int
pf_src_compare(struct pf_src_node * a,struct pf_src_node * b)792 pf_src_compare(struct pf_src_node *a, struct pf_src_node *b)
793 {
794 	int     diff;
795 
796 	if (a->rule.ptr > b->rule.ptr) {
797 		return 1;
798 	}
799 	if (a->rule.ptr < b->rule.ptr) {
800 		return -1;
801 	}
802 	if ((diff = a->af - b->af) != 0) {
803 		return diff;
804 	}
805 	if ((diff = pf_addr_compare(&a->addr, &b->addr, a->af)) != 0) {
806 		return diff;
807 	}
808 	return 0;
809 }
810 
811 static __inline int
pf_state_compare_lan_ext(struct pf_state_key * a,struct pf_state_key * b)812 pf_state_compare_lan_ext(struct pf_state_key *a, struct pf_state_key *b)
813 {
814 	int     diff;
815 	int     extfilter;
816 
817 	if ((diff = a->proto - b->proto) != 0) {
818 		return diff;
819 	}
820 	if ((diff = a->af_lan - b->af_lan) != 0) {
821 		return diff;
822 	}
823 
824 	extfilter = PF_EXTFILTER_APD;
825 
826 	switch (a->proto) {
827 	case IPPROTO_ICMP:
828 	case IPPROTO_ICMPV6:
829 		if ((diff = a->lan.xport.port - b->lan.xport.port) != 0) {
830 			return diff;
831 		}
832 		break;
833 
834 	case IPPROTO_TCP:
835 		if ((diff = a->lan.xport.port - b->lan.xport.port) != 0) {
836 			return diff;
837 		}
838 		if ((diff = a->ext_lan.xport.port - b->ext_lan.xport.port) != 0) {
839 			return diff;
840 		}
841 		break;
842 
843 	case IPPROTO_UDP:
844 		if ((diff = a->proto_variant - b->proto_variant)) {
845 			return diff;
846 		}
847 		extfilter = a->proto_variant;
848 		if ((diff = a->lan.xport.port - b->lan.xport.port) != 0) {
849 			return diff;
850 		}
851 		if ((extfilter < PF_EXTFILTER_AD) &&
852 		    (diff = a->ext_lan.xport.port - b->ext_lan.xport.port) != 0) {
853 			return diff;
854 		}
855 		break;
856 
857 	case IPPROTO_GRE:
858 		if (a->proto_variant == PF_GRE_PPTP_VARIANT &&
859 		    a->proto_variant == b->proto_variant) {
860 			if (!!(diff = a->ext_lan.xport.call_id -
861 			    b->ext_lan.xport.call_id)) {
862 				return diff;
863 			}
864 		}
865 		break;
866 
867 	case IPPROTO_ESP:
868 		if (!!(diff = a->ext_lan.xport.spi - b->ext_lan.xport.spi)) {
869 			return diff;
870 		}
871 		break;
872 
873 	default:
874 		break;
875 	}
876 
877 	switch (a->af_lan) {
878 #if INET
879 	case AF_INET:
880 		if ((diff = pf_addr_compare(&a->lan.addr, &b->lan.addr,
881 		    a->af_lan)) != 0) {
882 			return diff;
883 		}
884 
885 		if (extfilter < PF_EXTFILTER_EI) {
886 			if ((diff = pf_addr_compare(&a->ext_lan.addr,
887 			    &b->ext_lan.addr,
888 			    a->af_lan)) != 0) {
889 				return diff;
890 			}
891 		}
892 		break;
893 #endif /* INET */
894 	case AF_INET6:
895 		if ((diff = pf_addr_compare(&a->lan.addr, &b->lan.addr,
896 		    a->af_lan)) != 0) {
897 			return diff;
898 		}
899 
900 		if (extfilter < PF_EXTFILTER_EI ||
901 		    !PF_AZERO(&b->ext_lan.addr, AF_INET6)) {
902 			if ((diff = pf_addr_compare(&a->ext_lan.addr,
903 			    &b->ext_lan.addr,
904 			    a->af_lan)) != 0) {
905 				return diff;
906 			}
907 		}
908 		break;
909 	}
910 
911 	if (a->app_state && b->app_state) {
912 		if (a->app_state->compare_lan_ext &&
913 		    b->app_state->compare_lan_ext) {
914 			diff = (const char *)b->app_state->compare_lan_ext -
915 			    (const char *)a->app_state->compare_lan_ext;
916 			if (diff != 0) {
917 				return diff;
918 			}
919 			diff = a->app_state->compare_lan_ext(a->app_state,
920 			    b->app_state);
921 			if (diff != 0) {
922 				return diff;
923 			}
924 		}
925 	}
926 
927 	return 0;
928 }
929 
930 static __inline int
pf_state_compare_ext_gwy(struct pf_state_key * a,struct pf_state_key * b)931 pf_state_compare_ext_gwy(struct pf_state_key *a, struct pf_state_key *b)
932 {
933 	int     diff;
934 	int     extfilter;
935 
936 	if ((diff = a->proto - b->proto) != 0) {
937 		return diff;
938 	}
939 
940 	if ((diff = a->af_gwy - b->af_gwy) != 0) {
941 		return diff;
942 	}
943 
944 	extfilter = PF_EXTFILTER_APD;
945 
946 	switch (a->proto) {
947 	case IPPROTO_ICMP:
948 	case IPPROTO_ICMPV6:
949 		if ((diff = a->gwy.xport.port - b->gwy.xport.port) != 0) {
950 			return diff;
951 		}
952 		break;
953 
954 	case IPPROTO_TCP:
955 		if ((diff = a->ext_gwy.xport.port - b->ext_gwy.xport.port) != 0) {
956 			return diff;
957 		}
958 		if ((diff = a->gwy.xport.port - b->gwy.xport.port) != 0) {
959 			return diff;
960 		}
961 		break;
962 
963 	case IPPROTO_UDP:
964 		if ((diff = a->proto_variant - b->proto_variant)) {
965 			return diff;
966 		}
967 		extfilter = a->proto_variant;
968 		if ((diff = a->gwy.xport.port - b->gwy.xport.port) != 0) {
969 			return diff;
970 		}
971 		if ((extfilter < PF_EXTFILTER_AD) &&
972 		    (diff = a->ext_gwy.xport.port - b->ext_gwy.xport.port) != 0) {
973 			return diff;
974 		}
975 		break;
976 
977 	case IPPROTO_GRE:
978 		if (a->proto_variant == PF_GRE_PPTP_VARIANT &&
979 		    a->proto_variant == b->proto_variant) {
980 			if (!!(diff = a->gwy.xport.call_id -
981 			    b->gwy.xport.call_id)) {
982 				return diff;
983 			}
984 		}
985 		break;
986 
987 	case IPPROTO_ESP:
988 		if (!!(diff = a->gwy.xport.spi - b->gwy.xport.spi)) {
989 			return diff;
990 		}
991 		break;
992 
993 	default:
994 		break;
995 	}
996 
997 	switch (a->af_gwy) {
998 #if INET
999 	case AF_INET:
1000 		if ((diff = pf_addr_compare(&a->gwy.addr, &b->gwy.addr,
1001 		    a->af_gwy)) != 0) {
1002 			return diff;
1003 		}
1004 
1005 		if (extfilter < PF_EXTFILTER_EI) {
1006 			if ((diff = pf_addr_compare(&a->ext_gwy.addr, &b->ext_gwy.addr,
1007 			    a->af_gwy)) != 0) {
1008 				return diff;
1009 			}
1010 		}
1011 		break;
1012 #endif /* INET */
1013 	case AF_INET6:
1014 		if ((diff = pf_addr_compare(&a->gwy.addr, &b->gwy.addr,
1015 		    a->af_gwy)) != 0) {
1016 			return diff;
1017 		}
1018 
1019 		if (extfilter < PF_EXTFILTER_EI ||
1020 		    !PF_AZERO(&b->ext_gwy.addr, AF_INET6)) {
1021 			if ((diff = pf_addr_compare(&a->ext_gwy.addr, &b->ext_gwy.addr,
1022 			    a->af_gwy)) != 0) {
1023 				return diff;
1024 			}
1025 		}
1026 		break;
1027 	}
1028 
1029 	if (a->app_state && b->app_state) {
1030 		if (a->app_state->compare_ext_gwy &&
1031 		    b->app_state->compare_ext_gwy) {
1032 			diff = (const char *)b->app_state->compare_ext_gwy -
1033 			    (const char *)a->app_state->compare_ext_gwy;
1034 			if (diff != 0) {
1035 				return diff;
1036 			}
1037 			diff = a->app_state->compare_ext_gwy(a->app_state,
1038 			    b->app_state);
1039 			if (diff != 0) {
1040 				return diff;
1041 			}
1042 		}
1043 	}
1044 
1045 	return 0;
1046 }
1047 
1048 static __inline int
pf_state_compare_id(struct pf_state * a,struct pf_state * b)1049 pf_state_compare_id(struct pf_state *a, struct pf_state *b)
1050 {
1051 	if (a->id > b->id) {
1052 		return 1;
1053 	}
1054 	if (a->id < b->id) {
1055 		return -1;
1056 	}
1057 	if (a->creatorid > b->creatorid) {
1058 		return 1;
1059 	}
1060 	if (a->creatorid < b->creatorid) {
1061 		return -1;
1062 	}
1063 
1064 	return 0;
1065 }
1066 
1067 void
pf_addrcpy(struct pf_addr * dst,struct pf_addr * src,sa_family_t af)1068 pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af)
1069 {
1070 	switch (af) {
1071 #if INET
1072 	case AF_INET:
1073 		dst->addr32[0] = src->addr32[0];
1074 		break;
1075 #endif /* INET */
1076 	case AF_INET6:
1077 		dst->addr32[0] = src->addr32[0];
1078 		dst->addr32[1] = src->addr32[1];
1079 		dst->addr32[2] = src->addr32[2];
1080 		dst->addr32[3] = src->addr32[3];
1081 		break;
1082 	}
1083 }
1084 
1085 struct pf_state *
pf_find_state_byid(struct pf_state_cmp * key)1086 pf_find_state_byid(struct pf_state_cmp *key)
1087 {
1088 	pf_status.fcounters[FCNT_STATE_SEARCH]++;
1089 
1090 	return RB_FIND(pf_state_tree_id, &tree_id,
1091 	           (struct pf_state *)(void *)key);
1092 }
1093 
1094 static struct pf_state *
pf_find_state(struct pfi_kif * kif,struct pf_state_key_cmp * key,u_int dir)1095 pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir)
1096 {
1097 	struct pf_state_key     *sk = NULL;
1098 	struct pf_state         *s;
1099 
1100 	pf_status.fcounters[FCNT_STATE_SEARCH]++;
1101 
1102 	switch (dir) {
1103 	case PF_OUT:
1104 		sk = RB_FIND(pf_state_tree_lan_ext, &pf_statetbl_lan_ext,
1105 		    (struct pf_state_key *)key);
1106 		break;
1107 	case PF_IN:
1108 		sk = RB_FIND(pf_state_tree_ext_gwy, &pf_statetbl_ext_gwy,
1109 		    (struct pf_state_key *)key);
1110 		/*
1111 		 * NAT64 is done only on input, for packets coming in from
1112 		 * from the LAN side, need to lookup the lan_ext tree.
1113 		 */
1114 		if (sk == NULL) {
1115 			sk = RB_FIND(pf_state_tree_lan_ext,
1116 			    &pf_statetbl_lan_ext,
1117 			    (struct pf_state_key *)key);
1118 			if (sk && sk->af_lan == sk->af_gwy) {
1119 				sk = NULL;
1120 			}
1121 		}
1122 		break;
1123 	default:
1124 		panic("pf_find_state");
1125 	}
1126 
1127 	/* list is sorted, if-bound states before floating ones */
1128 	if (sk != NULL) {
1129 		TAILQ_FOREACH(s, &sk->states, next)
1130 		if (s->kif == pfi_all || s->kif == kif) {
1131 			return s;
1132 		}
1133 	}
1134 
1135 	return NULL;
1136 }
1137 
1138 struct pf_state *
pf_find_state_all(struct pf_state_key_cmp * key,u_int dir,int * more)1139 pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more)
1140 {
1141 	struct pf_state_key     *sk = NULL;
1142 	struct pf_state         *s, *ret = NULL;
1143 
1144 	pf_status.fcounters[FCNT_STATE_SEARCH]++;
1145 
1146 	switch (dir) {
1147 	case PF_OUT:
1148 		sk = RB_FIND(pf_state_tree_lan_ext,
1149 		    &pf_statetbl_lan_ext, (struct pf_state_key *)key);
1150 		break;
1151 	case PF_IN:
1152 		sk = RB_FIND(pf_state_tree_ext_gwy,
1153 		    &pf_statetbl_ext_gwy, (struct pf_state_key *)key);
1154 		/*
1155 		 * NAT64 is done only on input, for packets coming in from
1156 		 * from the LAN side, need to lookup the lan_ext tree.
1157 		 */
1158 		if ((sk == NULL) && pf_nat64_configured) {
1159 			sk = RB_FIND(pf_state_tree_lan_ext,
1160 			    &pf_statetbl_lan_ext,
1161 			    (struct pf_state_key *)key);
1162 			if (sk && sk->af_lan == sk->af_gwy) {
1163 				sk = NULL;
1164 			}
1165 		}
1166 		break;
1167 	default:
1168 		panic("pf_find_state_all");
1169 	}
1170 
1171 	if (sk != NULL) {
1172 		ret = TAILQ_FIRST(&sk->states);
1173 		if (more == NULL) {
1174 			return ret;
1175 		}
1176 
1177 		TAILQ_FOREACH(s, &sk->states, next)
1178 		(*more)++;
1179 	}
1180 
1181 	return ret;
1182 }
1183 
1184 static void
pf_init_threshold(struct pf_threshold * threshold,u_int32_t limit,u_int32_t seconds)1185 pf_init_threshold(struct pf_threshold *threshold,
1186     u_int32_t limit, u_int32_t seconds)
1187 {
1188 	threshold->limit = limit * PF_THRESHOLD_MULT;
1189 	threshold->seconds = seconds;
1190 	threshold->count = 0;
1191 	threshold->last = pf_time_second();
1192 }
1193 
1194 static void
pf_add_threshold(struct pf_threshold * threshold)1195 pf_add_threshold(struct pf_threshold *threshold)
1196 {
1197 	u_int32_t t = pf_time_second(), diff = t - threshold->last;
1198 
1199 	if (diff >= threshold->seconds) {
1200 		threshold->count = 0;
1201 	} else {
1202 		threshold->count -= threshold->count * diff /
1203 		    threshold->seconds;
1204 	}
1205 	threshold->count += PF_THRESHOLD_MULT;
1206 	threshold->last = t;
1207 }
1208 
1209 static int
pf_check_threshold(struct pf_threshold * threshold)1210 pf_check_threshold(struct pf_threshold *threshold)
1211 {
1212 	return threshold->count > threshold->limit;
1213 }
1214 
1215 static int
pf_src_connlimit(struct pf_state ** state)1216 pf_src_connlimit(struct pf_state **state)
1217 {
1218 	int bad = 0;
1219 	(*state)->src_node->conn++;
1220 	VERIFY((*state)->src_node->conn != 0);
1221 	(*state)->src.tcp_est = 1;
1222 	pf_add_threshold(&(*state)->src_node->conn_rate);
1223 
1224 	if ((*state)->rule.ptr->max_src_conn &&
1225 	    (*state)->rule.ptr->max_src_conn <
1226 	    (*state)->src_node->conn) {
1227 		pf_status.lcounters[LCNT_SRCCONN]++;
1228 		bad++;
1229 	}
1230 
1231 	if ((*state)->rule.ptr->max_src_conn_rate.limit &&
1232 	    pf_check_threshold(&(*state)->src_node->conn_rate)) {
1233 		pf_status.lcounters[LCNT_SRCCONNRATE]++;
1234 		bad++;
1235 	}
1236 
1237 	if (!bad) {
1238 		return 0;
1239 	}
1240 
1241 	if ((*state)->rule.ptr->overload_tbl) {
1242 		struct pfr_addr p;
1243 		u_int32_t       killed = 0;
1244 
1245 		pf_status.lcounters[LCNT_OVERLOAD_TABLE]++;
1246 		if (pf_status.debug >= PF_DEBUG_MISC) {
1247 			printf("pf_src_connlimit: blocking address ");
1248 			pf_print_host(&(*state)->src_node->addr, 0,
1249 			    (*state)->state_key->af_lan);
1250 		}
1251 
1252 		bzero(&p, sizeof(p));
1253 		p.pfra_af = (*state)->state_key->af_lan;
1254 		switch ((*state)->state_key->af_lan) {
1255 #if INET
1256 		case AF_INET:
1257 			p.pfra_net = 32;
1258 			p.pfra_ip4addr = (*state)->src_node->addr.v4addr;
1259 			break;
1260 #endif /* INET */
1261 		case AF_INET6:
1262 			p.pfra_net = 128;
1263 			p.pfra_ip6addr = (*state)->src_node->addr.v6addr;
1264 			break;
1265 		}
1266 
1267 		pfr_insert_kentry((*state)->rule.ptr->overload_tbl,
1268 		    &p, pf_calendar_time_second());
1269 
1270 		/* kill existing states if that's required. */
1271 		if ((*state)->rule.ptr->flush) {
1272 			struct pf_state_key *sk;
1273 			struct pf_state *st;
1274 
1275 			pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++;
1276 			RB_FOREACH(st, pf_state_tree_id, &tree_id) {
1277 				sk = st->state_key;
1278 				/*
1279 				 * Kill states from this source.  (Only those
1280 				 * from the same rule if PF_FLUSH_GLOBAL is not
1281 				 * set)
1282 				 */
1283 				if (sk->af_lan ==
1284 				    (*state)->state_key->af_lan &&
1285 				    (((*state)->state_key->direction ==
1286 				    PF_OUT &&
1287 				    PF_AEQ(&(*state)->src_node->addr,
1288 				    &sk->lan.addr, sk->af_lan)) ||
1289 				    ((*state)->state_key->direction == PF_IN &&
1290 				    PF_AEQ(&(*state)->src_node->addr,
1291 				    &sk->ext_lan.addr, sk->af_lan))) &&
1292 				    ((*state)->rule.ptr->flush &
1293 				    PF_FLUSH_GLOBAL ||
1294 				    (*state)->rule.ptr == st->rule.ptr)) {
1295 					st->timeout = PFTM_PURGE;
1296 					st->src.state = st->dst.state =
1297 					    TCPS_CLOSED;
1298 					killed++;
1299 				}
1300 			}
1301 			if (pf_status.debug >= PF_DEBUG_MISC) {
1302 				printf(", %u states killed", killed);
1303 			}
1304 		}
1305 		if (pf_status.debug >= PF_DEBUG_MISC) {
1306 			printf("\n");
1307 		}
1308 	}
1309 
1310 	/* kill this state */
1311 	(*state)->timeout = PFTM_PURGE;
1312 	(*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
1313 	return 1;
1314 }
1315 
1316 int
pf_insert_src_node(struct pf_src_node ** sn,struct pf_rule * rule,struct pf_addr * src,sa_family_t af)1317 pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
1318     struct pf_addr *src, sa_family_t af)
1319 {
1320 	struct pf_src_node      k;
1321 
1322 	if (*sn == NULL) {
1323 		k.af = af;
1324 		PF_ACPY(&k.addr, src, af);
1325 		if (rule->rule_flag & PFRULE_RULESRCTRACK ||
1326 		    rule->rpool.opts & PF_POOL_STICKYADDR) {
1327 			k.rule.ptr = rule;
1328 		} else {
1329 			k.rule.ptr = NULL;
1330 		}
1331 		pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
1332 		*sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
1333 	}
1334 	if (*sn == NULL) {
1335 		if (!rule->max_src_nodes ||
1336 		    rule->src_nodes < rule->max_src_nodes) {
1337 			(*sn) = pool_get(&pf_src_tree_pl, PR_WAITOK);
1338 		} else {
1339 			pf_status.lcounters[LCNT_SRCNODES]++;
1340 		}
1341 		if ((*sn) == NULL) {
1342 			return -1;
1343 		}
1344 		bzero(*sn, sizeof(struct pf_src_node));
1345 
1346 		pf_init_threshold(&(*sn)->conn_rate,
1347 		    rule->max_src_conn_rate.limit,
1348 		    rule->max_src_conn_rate.seconds);
1349 
1350 		(*sn)->af = af;
1351 		if (rule->rule_flag & PFRULE_RULESRCTRACK ||
1352 		    rule->rpool.opts & PF_POOL_STICKYADDR) {
1353 			(*sn)->rule.ptr = rule;
1354 		} else {
1355 			(*sn)->rule.ptr = NULL;
1356 		}
1357 		PF_ACPY(&(*sn)->addr, src, af);
1358 		if (RB_INSERT(pf_src_tree,
1359 		    &tree_src_tracking, *sn) != NULL) {
1360 			if (pf_status.debug >= PF_DEBUG_MISC) {
1361 				printf("pf: src_tree insert failed: ");
1362 				pf_print_host(&(*sn)->addr, 0, af);
1363 				printf("\n");
1364 			}
1365 			pool_put(&pf_src_tree_pl, *sn);
1366 			return -1;
1367 		}
1368 		(*sn)->creation = pf_time_second();
1369 		(*sn)->ruletype = rule->action;
1370 		if ((*sn)->rule.ptr != NULL) {
1371 			(*sn)->rule.ptr->src_nodes++;
1372 		}
1373 		pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
1374 		pf_status.src_nodes++;
1375 	} else {
1376 		if (rule->max_src_states &&
1377 		    (*sn)->states >= rule->max_src_states) {
1378 			pf_status.lcounters[LCNT_SRCSTATES]++;
1379 			return -1;
1380 		}
1381 	}
1382 	return 0;
1383 }
1384 
1385 static void
pf_stateins_err(const char * tree,struct pf_state * s,struct pfi_kif * kif)1386 pf_stateins_err(const char *tree, struct pf_state *s, struct pfi_kif *kif)
1387 {
1388 	struct pf_state_key     *sk = s->state_key;
1389 
1390 	if (pf_status.debug >= PF_DEBUG_MISC) {
1391 		printf("pf: state insert failed: %s %s ", tree, kif->pfik_name);
1392 		switch (sk->proto) {
1393 		case IPPROTO_TCP:
1394 			printf("TCP");
1395 			break;
1396 		case IPPROTO_UDP:
1397 			printf("UDP");
1398 			break;
1399 		case IPPROTO_ICMP:
1400 			printf("ICMP4");
1401 			break;
1402 		case IPPROTO_ICMPV6:
1403 			printf("ICMP6");
1404 			break;
1405 		default:
1406 			printf("PROTO=%u", sk->proto);
1407 			break;
1408 		}
1409 		printf(" lan: ");
1410 		pf_print_sk_host(&sk->lan, sk->af_lan, sk->proto,
1411 		    sk->proto_variant);
1412 		printf(" gwy: ");
1413 		pf_print_sk_host(&sk->gwy, sk->af_gwy, sk->proto,
1414 		    sk->proto_variant);
1415 		printf(" ext_lan: ");
1416 		pf_print_sk_host(&sk->ext_lan, sk->af_lan, sk->proto,
1417 		    sk->proto_variant);
1418 		printf(" ext_gwy: ");
1419 		pf_print_sk_host(&sk->ext_gwy, sk->af_gwy, sk->proto,
1420 		    sk->proto_variant);
1421 		if (s->sync_flags & PFSTATE_FROMSYNC) {
1422 			printf(" (from sync)");
1423 		}
1424 		printf("\n");
1425 	}
1426 }
1427 
1428 int
pf_insert_state(struct pfi_kif * kif,struct pf_state * s)1429 pf_insert_state(struct pfi_kif *kif, struct pf_state *s)
1430 {
1431 	struct pf_state_key     *cur;
1432 	struct pf_state         *sp;
1433 
1434 	VERIFY(s->state_key != NULL);
1435 	s->kif = kif;
1436 
1437 	if ((cur = RB_INSERT(pf_state_tree_lan_ext, &pf_statetbl_lan_ext,
1438 	    s->state_key)) != NULL) {
1439 		/* key exists. check for same kif, if none, add to key */
1440 		TAILQ_FOREACH(sp, &cur->states, next)
1441 		if (sp->kif == kif) {           /* collision! */
1442 			pf_stateins_err("tree_lan_ext", s, kif);
1443 			pf_detach_state(s,
1444 			    PF_DT_SKIP_LANEXT | PF_DT_SKIP_EXTGWY);
1445 			return -1;
1446 		}
1447 		pf_detach_state(s, PF_DT_SKIP_LANEXT | PF_DT_SKIP_EXTGWY);
1448 		pf_attach_state(cur, s, kif == pfi_all ? 1 : 0);
1449 	}
1450 
1451 	/* if cur != NULL, we already found a state key and attached to it */
1452 	if (cur == NULL && (cur = RB_INSERT(pf_state_tree_ext_gwy,
1453 	    &pf_statetbl_ext_gwy, s->state_key)) != NULL) {
1454 		/* must not happen. we must have found the sk above! */
1455 		pf_stateins_err("tree_ext_gwy", s, kif);
1456 		pf_detach_state(s, PF_DT_SKIP_EXTGWY);
1457 		return -1;
1458 	}
1459 
1460 	if (s->id == 0 && s->creatorid == 0) {
1461 		s->id = htobe64(pf_status.stateid++);
1462 		s->creatorid = pf_status.hostid;
1463 	}
1464 	if (RB_INSERT(pf_state_tree_id, &tree_id, s) != NULL) {
1465 		if (pf_status.debug >= PF_DEBUG_MISC) {
1466 			printf("pf: state insert failed: "
1467 			    "id: %016llx creatorid: %08x",
1468 			    be64toh(s->id), ntohl(s->creatorid));
1469 			if (s->sync_flags & PFSTATE_FROMSYNC) {
1470 				printf(" (from sync)");
1471 			}
1472 			printf("\n");
1473 		}
1474 		pf_detach_state(s, 0);
1475 		return -1;
1476 	}
1477 	TAILQ_INSERT_TAIL(&state_list, s, entry_list);
1478 	pf_status.fcounters[FCNT_STATE_INSERT]++;
1479 	pf_status.states++;
1480 	VERIFY(pf_status.states != 0);
1481 	pfi_kif_ref(kif, PFI_KIF_REF_STATE);
1482 #if NPFSYNC
1483 	pfsync_insert_state(s);
1484 #endif
1485 	return 0;
1486 }
1487 
1488 static int
pf_purge_thread_cont(int err)1489 pf_purge_thread_cont(int err)
1490 {
1491 #pragma unused(err)
1492 	static u_int32_t nloops = 0;
1493 	int t = 1;      /* 1 second */
1494 
1495 	/*
1496 	 * Update coarse-grained networking timestamp (in sec.); the idea
1497 	 * is to piggy-back on the periodic timeout callout to update
1498 	 * the counter returnable via net_uptime().
1499 	 */
1500 	net_update_uptime();
1501 
1502 	lck_rw_lock_shared(&pf_perim_lock);
1503 	lck_mtx_lock(&pf_lock);
1504 
1505 	/* purge everything if not running */
1506 	if (!pf_status.running) {
1507 		pf_purge_expired_states(pf_status.states);
1508 		pf_purge_expired_fragments();
1509 		pf_purge_expired_src_nodes();
1510 
1511 		/* terminate thread (we don't currently do this) */
1512 		if (pf_purge_thread == NULL) {
1513 			lck_mtx_unlock(&pf_lock);
1514 			lck_rw_done(&pf_perim_lock);
1515 
1516 			thread_deallocate(current_thread());
1517 			thread_terminate(current_thread());
1518 			/* NOTREACHED */
1519 			return 0;
1520 		} else {
1521 			/* if there's nothing left, sleep w/o timeout */
1522 			if (pf_status.states == 0 &&
1523 			    pf_normalize_isempty() &&
1524 			    RB_EMPTY(&tree_src_tracking)) {
1525 				nloops = 0;
1526 				t = 0;
1527 			}
1528 			goto done;
1529 		}
1530 	}
1531 
1532 	/* process a fraction of the state table every second */
1533 	pf_purge_expired_states(1 + (pf_status.states
1534 	    / pf_default_rule.timeout[PFTM_INTERVAL]));
1535 
1536 	/* purge other expired types every PFTM_INTERVAL seconds */
1537 	if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) {
1538 		pf_purge_expired_fragments();
1539 		pf_purge_expired_src_nodes();
1540 		nloops = 0;
1541 	}
1542 done:
1543 	lck_mtx_unlock(&pf_lock);
1544 	lck_rw_done(&pf_perim_lock);
1545 
1546 	(void) tsleep0(pf_purge_thread_fn, PWAIT, "pf_purge_cont",
1547 	    t * hz, pf_purge_thread_cont);
1548 	/* NOTREACHED */
1549 	VERIFY(0);
1550 
1551 	return 0;
1552 }
1553 
1554 void
pf_purge_thread_fn(void * v,wait_result_t w)1555 pf_purge_thread_fn(void *v, wait_result_t w)
1556 {
1557 #pragma unused(v, w)
1558 	(void) tsleep0(pf_purge_thread_fn, PWAIT, "pf_purge", 0,
1559 	    pf_purge_thread_cont);
1560 	/*
1561 	 * tsleep0() shouldn't have returned as PCATCH was not set;
1562 	 * therefore assert in this case.
1563 	 */
1564 	VERIFY(0);
1565 }
1566 
1567 u_int64_t
pf_state_expires(const struct pf_state * state)1568 pf_state_expires(const struct pf_state *state)
1569 {
1570 	u_int32_t       t;
1571 	u_int32_t       start;
1572 	u_int32_t       end;
1573 	u_int32_t       states;
1574 
1575 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1576 
1577 	/* handle all PFTM_* > PFTM_MAX here */
1578 	if (state->timeout == PFTM_PURGE) {
1579 		return pf_time_second();
1580 	}
1581 
1582 	VERIFY(state->timeout != PFTM_UNLINKED);
1583 	VERIFY(state->timeout < PFTM_MAX);
1584 	t = state->rule.ptr->timeout[state->timeout];
1585 	if (!t) {
1586 		t = pf_default_rule.timeout[state->timeout];
1587 	}
1588 	start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START];
1589 	if (start) {
1590 		end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END];
1591 		states = state->rule.ptr->states;
1592 	} else {
1593 		start = pf_default_rule.timeout[PFTM_ADAPTIVE_START];
1594 		end = pf_default_rule.timeout[PFTM_ADAPTIVE_END];
1595 		states = pf_status.states;
1596 	}
1597 	if (end && states > start && start < end) {
1598 		if (states < end) {
1599 			return state->expire + t * (end - states) /
1600 			       (end - start);
1601 		} else {
1602 			return pf_time_second();
1603 		}
1604 	}
1605 	return state->expire + t;
1606 }
1607 
1608 void
pf_purge_expired_src_nodes(void)1609 pf_purge_expired_src_nodes(void)
1610 {
1611 	struct pf_src_node              *cur, *next;
1612 
1613 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1614 
1615 	for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) {
1616 		next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur);
1617 
1618 		if (cur->states <= 0 && cur->expire <= pf_time_second()) {
1619 			if (cur->rule.ptr != NULL) {
1620 				cur->rule.ptr->src_nodes--;
1621 				if (cur->rule.ptr->states <= 0 &&
1622 				    cur->rule.ptr->max_src_nodes <= 0) {
1623 					pf_rm_rule(NULL, cur->rule.ptr);
1624 				}
1625 			}
1626 			RB_REMOVE(pf_src_tree, &tree_src_tracking, cur);
1627 			pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
1628 			pf_status.src_nodes--;
1629 			pool_put(&pf_src_tree_pl, cur);
1630 		}
1631 	}
1632 }
1633 
1634 void
pf_src_tree_remove_state(struct pf_state * s)1635 pf_src_tree_remove_state(struct pf_state *s)
1636 {
1637 	u_int32_t t;
1638 
1639 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1640 
1641 	if (s->src_node != NULL) {
1642 		if (s->src.tcp_est) {
1643 			VERIFY(s->src_node->conn > 0);
1644 			--s->src_node->conn;
1645 		}
1646 		VERIFY(s->src_node->states > 0);
1647 		if (--s->src_node->states <= 0) {
1648 			t = s->rule.ptr->timeout[PFTM_SRC_NODE];
1649 			if (!t) {
1650 				t = pf_default_rule.timeout[PFTM_SRC_NODE];
1651 			}
1652 			s->src_node->expire = pf_time_second() + t;
1653 		}
1654 	}
1655 	if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
1656 		VERIFY(s->nat_src_node->states > 0);
1657 		if (--s->nat_src_node->states <= 0) {
1658 			t = s->rule.ptr->timeout[PFTM_SRC_NODE];
1659 			if (!t) {
1660 				t = pf_default_rule.timeout[PFTM_SRC_NODE];
1661 			}
1662 			s->nat_src_node->expire = pf_time_second() + t;
1663 		}
1664 	}
1665 	s->src_node = s->nat_src_node = NULL;
1666 }
1667 
1668 void
pf_unlink_state(struct pf_state * cur)1669 pf_unlink_state(struct pf_state *cur)
1670 {
1671 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1672 
1673 	if (cur->src.state == PF_TCPS_PROXY_DST) {
1674 		pf_send_tcp(cur->rule.ptr, cur->state_key->af_lan,
1675 		    &cur->state_key->ext_lan.addr, &cur->state_key->lan.addr,
1676 		    cur->state_key->ext_lan.xport.port,
1677 		    cur->state_key->lan.xport.port,
1678 		    cur->src.seqhi, cur->src.seqlo + 1,
1679 		    TH_RST | TH_ACK, 0, 0, 0, 1, cur->tag, NULL, NULL);
1680 	}
1681 
1682 	hook_runloop(&cur->unlink_hooks, HOOK_REMOVE | HOOK_FREE);
1683 	RB_REMOVE(pf_state_tree_id, &tree_id, cur);
1684 #if NPFSYNC
1685 	if (cur->creatorid == pf_status.hostid) {
1686 		pfsync_delete_state(cur);
1687 	}
1688 #endif
1689 	cur->timeout = PFTM_UNLINKED;
1690 	pf_src_tree_remove_state(cur);
1691 	pf_detach_state(cur, 0);
1692 }
1693 
1694 /* callers should be at splpf and hold the
1695  * write_lock on pf_consistency_lock */
1696 void
pf_free_state(struct pf_state * cur)1697 pf_free_state(struct pf_state *cur)
1698 {
1699 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1700 #if NPFSYNC
1701 	if (pfsyncif != NULL &&
1702 	    (pfsyncif->sc_bulk_send_next == cur ||
1703 	    pfsyncif->sc_bulk_terminator == cur)) {
1704 		return;
1705 	}
1706 #endif
1707 	VERIFY(cur->timeout == PFTM_UNLINKED);
1708 	VERIFY(cur->rule.ptr->states > 0);
1709 	if (--cur->rule.ptr->states <= 0 &&
1710 	    cur->rule.ptr->src_nodes <= 0) {
1711 		pf_rm_rule(NULL, cur->rule.ptr);
1712 	}
1713 	if (cur->nat_rule.ptr != NULL) {
1714 		VERIFY(cur->nat_rule.ptr->states > 0);
1715 		if (--cur->nat_rule.ptr->states <= 0 &&
1716 		    cur->nat_rule.ptr->src_nodes <= 0) {
1717 			pf_rm_rule(NULL, cur->nat_rule.ptr);
1718 		}
1719 	}
1720 	if (cur->anchor.ptr != NULL) {
1721 		VERIFY(cur->anchor.ptr->states > 0);
1722 		if (--cur->anchor.ptr->states <= 0) {
1723 			pf_rm_rule(NULL, cur->anchor.ptr);
1724 		}
1725 	}
1726 	pf_normalize_tcp_cleanup(cur);
1727 	pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE);
1728 	TAILQ_REMOVE(&state_list, cur, entry_list);
1729 	if (cur->tag) {
1730 		pf_tag_unref(cur->tag);
1731 	}
1732 #if SKYWALK
1733 	netns_release(&cur->nstoken);
1734 #endif
1735 	pool_put(&pf_state_pl, cur);
1736 	pf_status.fcounters[FCNT_STATE_REMOVALS]++;
1737 	VERIFY(pf_status.states > 0);
1738 	pf_status.states--;
1739 }
1740 
1741 void
pf_purge_expired_states(u_int32_t maxcheck)1742 pf_purge_expired_states(u_int32_t maxcheck)
1743 {
1744 	static struct pf_state  *cur = NULL;
1745 	struct pf_state         *next;
1746 
1747 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1748 
1749 	while (maxcheck--) {
1750 		/* wrap to start of list when we hit the end */
1751 		if (cur == NULL) {
1752 			cur = TAILQ_FIRST(&state_list);
1753 			if (cur == NULL) {
1754 				break;  /* list empty */
1755 			}
1756 		}
1757 
1758 		/* get next state, as cur may get deleted */
1759 		next = TAILQ_NEXT(cur, entry_list);
1760 
1761 		if (cur->timeout == PFTM_UNLINKED) {
1762 			pf_free_state(cur);
1763 		} else if (pf_state_expires(cur) <= pf_time_second()) {
1764 			/* unlink and free expired state */
1765 			pf_unlink_state(cur);
1766 			pf_free_state(cur);
1767 		}
1768 		cur = next;
1769 	}
1770 }
1771 
1772 int
pf_tbladdr_setup(struct pf_ruleset * rs,struct pf_addr_wrap * aw)1773 pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw)
1774 {
1775 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1776 
1777 	if (aw->type != PF_ADDR_TABLE) {
1778 		return 0;
1779 	}
1780 	if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname)) == NULL) {
1781 		return 1;
1782 	}
1783 	return 0;
1784 }
1785 
1786 void
pf_tbladdr_remove(struct pf_addr_wrap * aw)1787 pf_tbladdr_remove(struct pf_addr_wrap *aw)
1788 {
1789 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1790 
1791 	if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL) {
1792 		return;
1793 	}
1794 	pfr_detach_table(aw->p.tbl);
1795 	aw->p.tbl = NULL;
1796 }
1797 
1798 void
pf_tbladdr_copyout(struct pf_addr_wrap * aw)1799 pf_tbladdr_copyout(struct pf_addr_wrap *aw)
1800 {
1801 	struct pfr_ktable *kt = aw->p.tbl;
1802 
1803 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1804 
1805 	if (aw->type != PF_ADDR_TABLE || kt == NULL) {
1806 		return;
1807 	}
1808 	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) {
1809 		kt = kt->pfrkt_root;
1810 	}
1811 	aw->p.tbl = NULL;
1812 	aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ?
1813 	    kt->pfrkt_cnt : -1;
1814 }
1815 
1816 static void
pf_print_addr(struct pf_addr * addr,sa_family_t af)1817 pf_print_addr(struct pf_addr *addr, sa_family_t af)
1818 {
1819 	switch (af) {
1820 #if INET
1821 	case AF_INET: {
1822 		u_int32_t a = ntohl(addr->addr32[0]);
1823 		printf("%u.%u.%u.%u", (a >> 24) & 255, (a >> 16) & 255,
1824 		    (a >> 8) & 255, a & 255);
1825 		break;
1826 	}
1827 #endif /* INET */
1828 	case AF_INET6: {
1829 		u_int16_t b;
1830 		u_int8_t i, curstart = 255, curend = 0,
1831 		    maxstart = 0, maxend = 0;
1832 		for (i = 0; i < 8; i++) {
1833 			if (!addr->addr16[i]) {
1834 				if (curstart == 255) {
1835 					curstart = i;
1836 				} else {
1837 					curend = i;
1838 				}
1839 			} else {
1840 				if (curstart) {
1841 					if ((curend - curstart) >
1842 					    (maxend - maxstart)) {
1843 						maxstart = curstart;
1844 						maxend = curend;
1845 						curstart = 255;
1846 					}
1847 				}
1848 			}
1849 		}
1850 		for (i = 0; i < 8; i++) {
1851 			if (i >= maxstart && i <= maxend) {
1852 				if (maxend != 7) {
1853 					if (i == maxstart) {
1854 						printf(":");
1855 					}
1856 				} else {
1857 					if (i == maxend) {
1858 						printf(":");
1859 					}
1860 				}
1861 			} else {
1862 				b = ntohs(addr->addr16[i]);
1863 				printf("%x", b);
1864 				if (i < 7) {
1865 					printf(":");
1866 				}
1867 			}
1868 		}
1869 		break;
1870 	}
1871 	}
1872 }
1873 
1874 static void
pf_print_sk_host(struct pf_state_host * sh,sa_family_t af,int proto,u_int8_t proto_variant)1875 pf_print_sk_host(struct pf_state_host *sh, sa_family_t af, int proto,
1876     u_int8_t proto_variant)
1877 {
1878 	pf_print_addr(&sh->addr, af);
1879 
1880 	switch (proto) {
1881 	case IPPROTO_ESP:
1882 		if (sh->xport.spi) {
1883 			printf("[%08x]", ntohl(sh->xport.spi));
1884 		}
1885 		break;
1886 
1887 	case IPPROTO_GRE:
1888 		if (proto_variant == PF_GRE_PPTP_VARIANT) {
1889 			printf("[%u]", ntohs(sh->xport.call_id));
1890 		}
1891 		break;
1892 
1893 	case IPPROTO_TCP:
1894 	case IPPROTO_UDP:
1895 		printf("[%u]", ntohs(sh->xport.port));
1896 		break;
1897 
1898 	default:
1899 		break;
1900 	}
1901 }
1902 
1903 static void
pf_print_host(struct pf_addr * addr,u_int16_t p,sa_family_t af)1904 pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
1905 {
1906 	pf_print_addr(addr, af);
1907 	if (p) {
1908 		printf("[%u]", ntohs(p));
1909 	}
1910 }
1911 
1912 void
pf_print_state(struct pf_state * s)1913 pf_print_state(struct pf_state *s)
1914 {
1915 	struct pf_state_key *sk = s->state_key;
1916 	switch (sk->proto) {
1917 	case IPPROTO_ESP:
1918 		printf("ESP ");
1919 		break;
1920 	case IPPROTO_GRE:
1921 		printf("GRE%u ", sk->proto_variant);
1922 		break;
1923 	case IPPROTO_TCP:
1924 		printf("TCP ");
1925 		break;
1926 	case IPPROTO_UDP:
1927 		printf("UDP ");
1928 		break;
1929 	case IPPROTO_ICMP:
1930 		printf("ICMP ");
1931 		break;
1932 	case IPPROTO_ICMPV6:
1933 		printf("ICMPV6 ");
1934 		break;
1935 	default:
1936 		printf("%u ", sk->proto);
1937 		break;
1938 	}
1939 	pf_print_sk_host(&sk->lan, sk->af_lan, sk->proto, sk->proto_variant);
1940 	printf(" ");
1941 	pf_print_sk_host(&sk->gwy, sk->af_gwy, sk->proto, sk->proto_variant);
1942 	printf(" ");
1943 	pf_print_sk_host(&sk->ext_lan, sk->af_lan, sk->proto,
1944 	    sk->proto_variant);
1945 	printf(" ");
1946 	pf_print_sk_host(&sk->ext_gwy, sk->af_gwy, sk->proto,
1947 	    sk->proto_variant);
1948 	printf(" [lo=%u high=%u win=%u modulator=%u", s->src.seqlo,
1949 	    s->src.seqhi, s->src.max_win, s->src.seqdiff);
1950 	if (s->src.wscale && s->dst.wscale) {
1951 		printf(" wscale=%u", s->src.wscale & PF_WSCALE_MASK);
1952 	}
1953 	printf("]");
1954 	printf(" [lo=%u high=%u win=%u modulator=%u", s->dst.seqlo,
1955 	    s->dst.seqhi, s->dst.max_win, s->dst.seqdiff);
1956 	if (s->src.wscale && s->dst.wscale) {
1957 		printf(" wscale=%u", s->dst.wscale & PF_WSCALE_MASK);
1958 	}
1959 	printf("]");
1960 	printf(" %u:%u", s->src.state, s->dst.state);
1961 }
1962 
1963 void
pf_print_flags(u_int8_t f)1964 pf_print_flags(u_int8_t f)
1965 {
1966 	if (f) {
1967 		printf(" ");
1968 	}
1969 	if (f & TH_FIN) {
1970 		printf("F");
1971 	}
1972 	if (f & TH_SYN) {
1973 		printf("S");
1974 	}
1975 	if (f & TH_RST) {
1976 		printf("R");
1977 	}
1978 	if (f & TH_PUSH) {
1979 		printf("P");
1980 	}
1981 	if (f & TH_ACK) {
1982 		printf("A");
1983 	}
1984 	if (f & TH_URG) {
1985 		printf("U");
1986 	}
1987 	if (f & TH_ECE) {
1988 		printf("E");
1989 	}
1990 	if (f & TH_CWR) {
1991 		printf("W");
1992 	}
1993 }
1994 
1995 #define PF_SET_SKIP_STEPS(i)                                    \
1996 	do {                                                    \
1997 	        while (head[i] != cur) {                        \
1998 	                head[i]->skip[i].ptr = cur;             \
1999 	                head[i] = TAILQ_NEXT(head[i], entries); \
2000 	        }                                               \
2001 	} while (0)
2002 
2003 void
pf_calc_skip_steps(struct pf_rulequeue * rules)2004 pf_calc_skip_steps(struct pf_rulequeue *rules)
2005 {
2006 	struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT];
2007 	int i;
2008 
2009 	cur = TAILQ_FIRST(rules);
2010 	prev = cur;
2011 	for (i = 0; i < PF_SKIP_COUNT; ++i) {
2012 		head[i] = cur;
2013 	}
2014 	while (cur != NULL) {
2015 		if (cur->kif != prev->kif || cur->ifnot != prev->ifnot) {
2016 			PF_SET_SKIP_STEPS(PF_SKIP_IFP);
2017 		}
2018 		if (cur->direction != prev->direction) {
2019 			PF_SET_SKIP_STEPS(PF_SKIP_DIR);
2020 		}
2021 		if (cur->af != prev->af) {
2022 			PF_SET_SKIP_STEPS(PF_SKIP_AF);
2023 		}
2024 		if (cur->proto != prev->proto) {
2025 			PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
2026 		}
2027 		if (cur->src.neg != prev->src.neg ||
2028 		    pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr)) {
2029 			PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
2030 		}
2031 		{
2032 			union pf_rule_xport *cx = &cur->src.xport;
2033 			union pf_rule_xport *px = &prev->src.xport;
2034 
2035 			switch (cur->proto) {
2036 			case IPPROTO_GRE:
2037 			case IPPROTO_ESP:
2038 				PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
2039 				break;
2040 			default:
2041 				if (prev->proto == IPPROTO_GRE ||
2042 				    prev->proto == IPPROTO_ESP ||
2043 				    cx->range.op != px->range.op ||
2044 				    cx->range.port[0] != px->range.port[0] ||
2045 				    cx->range.port[1] != px->range.port[1]) {
2046 					PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
2047 				}
2048 				break;
2049 			}
2050 		}
2051 		if (cur->dst.neg != prev->dst.neg ||
2052 		    pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr)) {
2053 			PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
2054 		}
2055 		{
2056 			union pf_rule_xport *cx = &cur->dst.xport;
2057 			union pf_rule_xport *px = &prev->dst.xport;
2058 
2059 			switch (cur->proto) {
2060 			case IPPROTO_GRE:
2061 				if (cur->proto != prev->proto ||
2062 				    cx->call_id != px->call_id) {
2063 					PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
2064 				}
2065 				break;
2066 			case IPPROTO_ESP:
2067 				if (cur->proto != prev->proto ||
2068 				    cx->spi != px->spi) {
2069 					PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
2070 				}
2071 				break;
2072 			default:
2073 				if (prev->proto == IPPROTO_GRE ||
2074 				    prev->proto == IPPROTO_ESP ||
2075 				    cx->range.op != px->range.op ||
2076 				    cx->range.port[0] != px->range.port[0] ||
2077 				    cx->range.port[1] != px->range.port[1]) {
2078 					PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
2079 				}
2080 				break;
2081 			}
2082 		}
2083 
2084 		prev = cur;
2085 		cur = TAILQ_NEXT(cur, entries);
2086 	}
2087 	for (i = 0; i < PF_SKIP_COUNT; ++i) {
2088 		PF_SET_SKIP_STEPS(i);
2089 	}
2090 }
2091 
2092 u_int32_t
pf_calc_state_key_flowhash(struct pf_state_key * sk)2093 pf_calc_state_key_flowhash(struct pf_state_key *sk)
2094 {
2095 	struct pf_flowhash_key fh __attribute__((aligned(8)));
2096 	uint32_t flowhash = 0;
2097 
2098 	bzero(&fh, sizeof(fh));
2099 	if (PF_ALEQ(&sk->lan.addr, &sk->ext_lan.addr, sk->af_lan)) {
2100 		bcopy(&sk->lan.addr, &fh.ap1.addr, sizeof(fh.ap1.addr));
2101 		bcopy(&sk->ext_lan.addr, &fh.ap2.addr, sizeof(fh.ap2.addr));
2102 	} else {
2103 		bcopy(&sk->ext_lan.addr, &fh.ap1.addr, sizeof(fh.ap1.addr));
2104 		bcopy(&sk->lan.addr, &fh.ap2.addr, sizeof(fh.ap2.addr));
2105 	}
2106 	if (sk->lan.xport.spi <= sk->ext_lan.xport.spi) {
2107 		fh.ap1.xport.spi = sk->lan.xport.spi;
2108 		fh.ap2.xport.spi = sk->ext_lan.xport.spi;
2109 	} else {
2110 		fh.ap1.xport.spi = sk->ext_lan.xport.spi;
2111 		fh.ap2.xport.spi = sk->lan.xport.spi;
2112 	}
2113 	fh.af = sk->af_lan;
2114 	fh.proto = sk->proto;
2115 
2116 try_again:
2117 	flowhash = net_flowhash(&fh, sizeof(fh), pf_hash_seed);
2118 	if (flowhash == 0) {
2119 		/* try to get a non-zero flowhash */
2120 		pf_hash_seed = RandomULong();
2121 		goto try_again;
2122 	}
2123 
2124 	return flowhash;
2125 }
2126 
2127 static int
pf_addr_wrap_neq(struct pf_addr_wrap * aw1,struct pf_addr_wrap * aw2)2128 pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
2129 {
2130 	if (aw1->type != aw2->type) {
2131 		return 1;
2132 	}
2133 	switch (aw1->type) {
2134 	case PF_ADDR_ADDRMASK:
2135 	case PF_ADDR_RANGE:
2136 		if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, 0)) {
2137 			return 1;
2138 		}
2139 		if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0)) {
2140 			return 1;
2141 		}
2142 		return 0;
2143 	case PF_ADDR_DYNIFTL:
2144 		return aw1->p.dyn == NULL || aw2->p.dyn == NULL ||
2145 		       aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt;
2146 	case PF_ADDR_NOROUTE:
2147 	case PF_ADDR_URPFFAILED:
2148 		return 0;
2149 	case PF_ADDR_TABLE:
2150 		return aw1->p.tbl != aw2->p.tbl;
2151 	case PF_ADDR_RTLABEL:
2152 		return aw1->v.rtlabel != aw2->v.rtlabel;
2153 	default:
2154 		printf("invalid address type: %d\n", aw1->type);
2155 		return 1;
2156 	}
2157 }
2158 
2159 u_int16_t
pf_cksum_fixup(u_int16_t cksum,u_int16_t old,u_int16_t new,u_int8_t udp)2160 pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
2161 {
2162 	return nat464_cksum_fixup(cksum, old, new, udp);
2163 }
2164 
2165 /*
2166  * change ip address & port
2167  * dir	: packet direction
2168  * a	: address to be changed
2169  * p	: port to be changed
2170  * ic	: ip header checksum
2171  * pc	: protocol checksum
2172  * an	: new ip address
2173  * pn	: new port
2174  * u	: should be 1 if UDP packet else 0
2175  * af	: address family of the packet
2176  * afn	: address family of the new address
2177  * ua	: should be 1 if ip address needs to be updated in the packet else
2178  *	  only the checksum is recalculated & updated.
2179  */
2180 static __attribute__((noinline)) void
pf_change_ap(int dir,pbuf_t * pbuf,struct pf_addr * a,u_int16_t * p,u_int16_t * ic,u_int16_t * pc,struct pf_addr * an,u_int16_t pn,u_int8_t u,sa_family_t af,sa_family_t afn,int ua)2181 pf_change_ap(int dir, pbuf_t *pbuf, struct pf_addr *a, u_int16_t *p,
2182     u_int16_t *ic, u_int16_t *pc, struct pf_addr *an, u_int16_t pn,
2183     u_int8_t u, sa_family_t af, sa_family_t afn, int ua)
2184 {
2185 	struct pf_addr  ao;
2186 	u_int16_t       po = *p;
2187 
2188 	PF_ACPY(&ao, a, af);
2189 	if (ua) {
2190 		PF_ACPY(a, an, afn);
2191 	}
2192 
2193 	*p = pn;
2194 
2195 	switch (af) {
2196 #if INET
2197 	case AF_INET:
2198 		switch (afn) {
2199 		case AF_INET:
2200 			*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
2201 			    ao.addr16[0], an->addr16[0], 0),
2202 			    ao.addr16[1], an->addr16[1], 0);
2203 			*p = pn;
2204 			/*
2205 			 * If the packet is originated from an ALG on the NAT gateway
2206 			 * (source address is loopback or local), in which case the
2207 			 * TCP/UDP checksum field contains the pseudo header checksum
2208 			 * that's not yet complemented.
2209 			 * In that case we do not need to fixup the checksum for port
2210 			 * translation as the pseudo header checksum doesn't include ports.
2211 			 *
2212 			 * A packet generated locally will have UDP/TCP CSUM flag
2213 			 * set (gets set in protocol output).
2214 			 *
2215 			 * It should be noted that the fixup doesn't do anything if the
2216 			 * checksum is 0.
2217 			 */
2218 			if (dir == PF_OUT && pbuf != NULL &&
2219 			    (*pbuf->pb_csum_flags & (CSUM_TCP | CSUM_UDP))) {
2220 				/* Pseudo-header checksum does not include ports */
2221 				*pc = ~pf_cksum_fixup(pf_cksum_fixup(~*pc,
2222 				    ao.addr16[0], an->addr16[0], u),
2223 				    ao.addr16[1], an->addr16[1], u);
2224 			} else {
2225 				*pc =
2226 				    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2227 					    *pc, ao.addr16[0], an->addr16[0], u),
2228 				    ao.addr16[1], an->addr16[1], u),
2229 				    po, pn, u);
2230 			}
2231 			break;
2232 		case AF_INET6:
2233 			*p = pn;
2234 			*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2235 				    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2236 
2237 					    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
2238 					    ao.addr16[0], an->addr16[0], u),
2239 					    ao.addr16[1], an->addr16[1], u),
2240 					    0, an->addr16[2], u),
2241 					    0, an->addr16[3], u),
2242 				    0, an->addr16[4], u),
2243 				    0, an->addr16[5], u),
2244 				    0, an->addr16[6], u),
2245 			    0, an->addr16[7], u),
2246 			    po, pn, u);
2247 			break;
2248 		}
2249 		break;
2250 #endif /* INET */
2251 	case AF_INET6:
2252 		switch (afn) {
2253 		case AF_INET6:
2254 			/*
2255 			 * If the packet is originated from an ALG on the NAT gateway
2256 			 * (source address is loopback or local), in which case the
2257 			 * TCP/UDP checksum field contains the pseudo header checksum
2258 			 * that's not yet complemented.
2259 			 * A packet generated locally
2260 			 * will have UDP/TCP CSUM flag set (gets set in protocol
2261 			 * output).
2262 			 */
2263 			if (dir == PF_OUT && pbuf != NULL &&
2264 			    (*pbuf->pb_csum_flags & (CSUM_TCPIPV6 |
2265 			    CSUM_UDPIPV6))) {
2266 				/* Pseudo-header checksum does not include ports */
2267 				*pc =
2268 				    ~pf_cksum_fixup(pf_cksum_fixup(
2269 					    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2270 						    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2271 							    ~*pc,
2272 							    ao.addr16[0], an->addr16[0], u),
2273 						    ao.addr16[1], an->addr16[1], u),
2274 						    ao.addr16[2], an->addr16[2], u),
2275 						    ao.addr16[3], an->addr16[3], u),
2276 					    ao.addr16[4], an->addr16[4], u),
2277 					    ao.addr16[5], an->addr16[5], u),
2278 					    ao.addr16[6], an->addr16[6], u),
2279 				    ao.addr16[7], an->addr16[7], u);
2280 			} else {
2281 				*pc =
2282 				    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2283 					    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2284 						    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2285 							    *pc,
2286 							    ao.addr16[0], an->addr16[0], u),
2287 						    ao.addr16[1], an->addr16[1], u),
2288 						    ao.addr16[2], an->addr16[2], u),
2289 						    ao.addr16[3], an->addr16[3], u),
2290 					    ao.addr16[4], an->addr16[4], u),
2291 					    ao.addr16[5], an->addr16[5], u),
2292 					    ao.addr16[6], an->addr16[6], u),
2293 				    ao.addr16[7], an->addr16[7], u),
2294 				    po, pn, u);
2295 			}
2296 			break;
2297 #ifdef INET
2298 		case AF_INET:
2299 			*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2300 				    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2301 					    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
2302 					    ao.addr16[0], an->addr16[0], u),
2303 					    ao.addr16[1], an->addr16[1], u),
2304 					    ao.addr16[2], 0, u),
2305 					    ao.addr16[3], 0, u),
2306 				    ao.addr16[4], 0, u),
2307 				    ao.addr16[5], 0, u),
2308 				    ao.addr16[6], 0, u),
2309 			    ao.addr16[7], 0, u),
2310 			    po, pn, u);
2311 			break;
2312 #endif /* INET */
2313 		}
2314 		break;
2315 	}
2316 }
2317 
2318 
2319 /* Changes a u_int32_t.  Uses a void * so there are no align restrictions */
2320 void
pf_change_a(void * a,u_int16_t * c,u_int32_t an,u_int8_t u)2321 pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u)
2322 {
2323 	u_int32_t       ao;
2324 
2325 	memcpy(&ao, a, sizeof(ao));
2326 	memcpy(a, &an, sizeof(u_int32_t));
2327 	*c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u),
2328 	    ao % 65536, an % 65536, u);
2329 }
2330 
2331 static __attribute__((noinline)) void
pf_change_a6(struct pf_addr * a,u_int16_t * c,struct pf_addr * an,u_int8_t u)2332 pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
2333 {
2334 	struct pf_addr  ao;
2335 
2336 	PF_ACPY(&ao, a, AF_INET6);
2337 	PF_ACPY(a, an, AF_INET6);
2338 
2339 	*c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2340 		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2341 			    pf_cksum_fixup(pf_cksum_fixup(*c,
2342 			    ao.addr16[0], an->addr16[0], u),
2343 			    ao.addr16[1], an->addr16[1], u),
2344 			    ao.addr16[2], an->addr16[2], u),
2345 		    ao.addr16[3], an->addr16[3], u),
2346 		    ao.addr16[4], an->addr16[4], u),
2347 		    ao.addr16[5], an->addr16[5], u),
2348 	    ao.addr16[6], an->addr16[6], u),
2349 	    ao.addr16[7], an->addr16[7], u);
2350 }
2351 
2352 static __attribute__((noinline)) void
pf_change_addr(struct pf_addr * a,u_int16_t * c,struct pf_addr * an,u_int8_t u,sa_family_t af,sa_family_t afn)2353 pf_change_addr(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u,
2354     sa_family_t af, sa_family_t afn)
2355 {
2356 	struct pf_addr  ao;
2357 
2358 	if (af != afn) {
2359 		PF_ACPY(&ao, a, af);
2360 		PF_ACPY(a, an, afn);
2361 	}
2362 
2363 	switch (af) {
2364 	case AF_INET:
2365 		switch (afn) {
2366 		case AF_INET:
2367 			pf_change_a(a, c, an->v4addr.s_addr, u);
2368 			break;
2369 		case AF_INET6:
2370 			*c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2371 				    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2372 					    pf_cksum_fixup(pf_cksum_fixup(*c,
2373 					    ao.addr16[0], an->addr16[0], u),
2374 					    ao.addr16[1], an->addr16[1], u),
2375 					    0, an->addr16[2], u),
2376 				    0, an->addr16[3], u),
2377 				    0, an->addr16[4], u),
2378 				    0, an->addr16[5], u),
2379 			    0, an->addr16[6], u),
2380 			    0, an->addr16[7], u);
2381 			break;
2382 		}
2383 		break;
2384 	case AF_INET6:
2385 		switch (afn) {
2386 		case AF_INET:
2387 			*c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2388 				    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2389 					    pf_cksum_fixup(pf_cksum_fixup(*c,
2390 					    ao.addr16[0], an->addr16[0], u),
2391 					    ao.addr16[1], an->addr16[1], u),
2392 					    ao.addr16[2], 0, u),
2393 				    ao.addr16[3], 0, u),
2394 				    ao.addr16[4], 0, u),
2395 				    ao.addr16[5], 0, u),
2396 			    ao.addr16[6], 0, u),
2397 			    ao.addr16[7], 0, u);
2398 			break;
2399 		case AF_INET6:
2400 			pf_change_a6(a, c, an, u);
2401 			break;
2402 		}
2403 		break;
2404 	}
2405 }
2406 
2407 static __attribute__((noinline)) void
pf_change_icmp(struct pf_addr * ia,u_int16_t * ip,struct pf_addr * oa,struct pf_addr * na,u_int16_t np,u_int16_t * pc,u_int16_t * h2c,u_int16_t * ic,u_int16_t * hc,u_int8_t u,sa_family_t af)2408 pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
2409     struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c,
2410     u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af)
2411 {
2412 	struct pf_addr  oia, ooa;
2413 
2414 	PF_ACPY(&oia, ia, af);
2415 	PF_ACPY(&ooa, oa, af);
2416 
2417 	/* Change inner protocol port, fix inner protocol checksum. */
2418 	if (ip != NULL) {
2419 		u_int16_t       oip = *ip;
2420 		u_int32_t       opc = 0;
2421 
2422 		if (pc != NULL) {
2423 			opc = *pc;
2424 		}
2425 		*ip = np;
2426 		if (pc != NULL) {
2427 			*pc = pf_cksum_fixup(*pc, oip, *ip, u);
2428 		}
2429 		*ic = pf_cksum_fixup(*ic, oip, *ip, 0);
2430 		if (pc != NULL) {
2431 			*ic = pf_cksum_fixup(*ic, opc, *pc, 0);
2432 		}
2433 	}
2434 	/* Change inner ip address, fix inner ip and icmp checksums. */
2435 	PF_ACPY(ia, na, af);
2436 	switch (af) {
2437 #if INET
2438 	case AF_INET: {
2439 		u_int32_t        oh2c = *h2c;
2440 
2441 		*h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c,
2442 		    oia.addr16[0], ia->addr16[0], 0),
2443 		    oia.addr16[1], ia->addr16[1], 0);
2444 		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
2445 		    oia.addr16[0], ia->addr16[0], 0),
2446 		    oia.addr16[1], ia->addr16[1], 0);
2447 		*ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0);
2448 		break;
2449 	}
2450 #endif /* INET */
2451 	case AF_INET6:
2452 		*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2453 			    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2454 				    pf_cksum_fixup(pf_cksum_fixup(*ic,
2455 				    oia.addr16[0], ia->addr16[0], u),
2456 				    oia.addr16[1], ia->addr16[1], u),
2457 				    oia.addr16[2], ia->addr16[2], u),
2458 			    oia.addr16[3], ia->addr16[3], u),
2459 			    oia.addr16[4], ia->addr16[4], u),
2460 			    oia.addr16[5], ia->addr16[5], u),
2461 		    oia.addr16[6], ia->addr16[6], u),
2462 		    oia.addr16[7], ia->addr16[7], u);
2463 		break;
2464 	}
2465 	/* Change outer ip address, fix outer ip or icmpv6 checksum. */
2466 	PF_ACPY(oa, na, af);
2467 	switch (af) {
2468 #if INET
2469 	case AF_INET:
2470 		*hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
2471 		    ooa.addr16[0], oa->addr16[0], 0),
2472 		    ooa.addr16[1], oa->addr16[1], 0);
2473 		break;
2474 #endif /* INET */
2475 	case AF_INET6:
2476 		*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2477 			    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2478 				    pf_cksum_fixup(pf_cksum_fixup(*ic,
2479 				    ooa.addr16[0], oa->addr16[0], u),
2480 				    ooa.addr16[1], oa->addr16[1], u),
2481 				    ooa.addr16[2], oa->addr16[2], u),
2482 			    ooa.addr16[3], oa->addr16[3], u),
2483 			    ooa.addr16[4], oa->addr16[4], u),
2484 			    ooa.addr16[5], oa->addr16[5], u),
2485 		    ooa.addr16[6], oa->addr16[6], u),
2486 		    ooa.addr16[7], oa->addr16[7], u);
2487 		break;
2488 	}
2489 }
2490 
2491 
2492 /*
2493  * Need to modulate the sequence numbers in the TCP SACK option
2494  * (credits to Krzysztof Pfaff for report and patch)
2495  */
2496 static __attribute__((noinline)) int
pf_modulate_sack(pbuf_t * pbuf,int off,struct pf_pdesc * pd,struct tcphdr * th,struct pf_state_peer * dst)2497 pf_modulate_sack(pbuf_t *pbuf, int off, struct pf_pdesc *pd,
2498     struct tcphdr *th, struct pf_state_peer *dst)
2499 {
2500 	int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen;
2501 	u_int8_t opts[MAX_TCPOPTLEN], *opt = opts;
2502 	int copyback = 0, i, olen;
2503 	struct sackblk sack;
2504 
2505 #define TCPOLEN_SACKLEN (TCPOLEN_SACK + 2)
2506 	if (hlen < TCPOLEN_SACKLEN ||
2507 	    !pf_pull_hdr(pbuf, off + sizeof(*th), opts, hlen, NULL, NULL, pd->af)) {
2508 		return 0;
2509 	}
2510 
2511 	while (hlen >= TCPOLEN_SACKLEN) {
2512 		olen = opt[1];
2513 		switch (*opt) {
2514 		case TCPOPT_EOL:        /* FALLTHROUGH */
2515 		case TCPOPT_NOP:
2516 			opt++;
2517 			hlen--;
2518 			break;
2519 		case TCPOPT_SACK:
2520 			if (olen > hlen) {
2521 				olen = hlen;
2522 			}
2523 			if (olen >= TCPOLEN_SACKLEN) {
2524 				for (i = 2; i + TCPOLEN_SACK <= olen;
2525 				    i += TCPOLEN_SACK) {
2526 					memcpy(&sack, &opt[i], sizeof(sack));
2527 					pf_change_a(&sack.start, &th->th_sum,
2528 					    htonl(ntohl(sack.start) -
2529 					    dst->seqdiff), 0);
2530 					pf_change_a(&sack.end, &th->th_sum,
2531 					    htonl(ntohl(sack.end) -
2532 					    dst->seqdiff), 0);
2533 					memcpy(&opt[i], &sack, sizeof(sack));
2534 				}
2535 				copyback = off + sizeof(*th) + thoptlen;
2536 			}
2537 			OS_FALLTHROUGH;
2538 		default:
2539 			if (olen < 2) {
2540 				olen = 2;
2541 			}
2542 			hlen -= olen;
2543 			opt += olen;
2544 		}
2545 	}
2546 
2547 	if (copyback) {
2548 		if (pf_lazy_makewritable(pd, pbuf, copyback) == NULL) {
2549 			return -1;
2550 		}
2551 		pbuf_copy_back(pbuf, off + sizeof(*th), thoptlen, opts);
2552 	}
2553 	return copyback;
2554 }
2555 
2556 /*
2557  * XXX
2558  *
2559  * The following functions (pf_send_tcp and pf_send_icmp) are somewhat
2560  * special in that they originate "spurious" packets rather than
2561  * filter/NAT existing packets. As such, they're not a great fit for
2562  * the 'pbuf' shim, which assumes the underlying packet buffers are
2563  * allocated elsewhere.
2564  *
2565  * Since these functions are rarely used, we'll carry on allocating mbufs
2566  * and passing them to the IP stack for eventual routing.
2567  */
2568 static __attribute__((noinline)) void
pf_send_tcp(const struct pf_rule * r,sa_family_t af,const struct pf_addr * saddr,const struct pf_addr * daddr,u_int16_t sport,u_int16_t dport,u_int32_t seq,u_int32_t ack,u_int8_t flags,u_int16_t win,u_int16_t mss,u_int8_t ttl,int tag,u_int16_t rtag,struct ether_header * eh,struct ifnet * ifp)2569 pf_send_tcp(const struct pf_rule *r, sa_family_t af,
2570     const struct pf_addr *saddr, const struct pf_addr *daddr,
2571     u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
2572     u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag,
2573     u_int16_t rtag, struct ether_header *eh, struct ifnet *ifp)
2574 {
2575 #pragma unused(eh, ifp)
2576 	struct mbuf     *m;
2577 	int              len, tlen;
2578 #if INET
2579 	struct ip       *h = NULL;
2580 #endif /* INET */
2581 	struct ip6_hdr  *h6 = NULL;
2582 	struct tcphdr   *th = NULL;
2583 	char            *opt;
2584 	struct pf_mtag  *pf_mtag;
2585 
2586 	/* maximum segment size tcp option */
2587 	tlen = sizeof(struct tcphdr);
2588 	if (mss) {
2589 		tlen += 4;
2590 	}
2591 
2592 	switch (af) {
2593 #if INET
2594 	case AF_INET:
2595 		len = sizeof(struct ip) + tlen;
2596 		break;
2597 #endif /* INET */
2598 	case AF_INET6:
2599 		len = sizeof(struct ip6_hdr) + tlen;
2600 		break;
2601 	default:
2602 		panic("pf_send_tcp: not AF_INET or AF_INET6!");
2603 		return;
2604 	}
2605 
2606 	/* create outgoing mbuf */
2607 	m = m_gethdr(M_DONTWAIT, MT_HEADER);
2608 	if (m == NULL) {
2609 		return;
2610 	}
2611 
2612 	if ((pf_mtag = pf_get_mtag(m)) == NULL) {
2613 		return;
2614 	}
2615 
2616 	if (tag) {
2617 		pf_mtag->pftag_flags |= PF_TAG_GENERATED;
2618 	}
2619 	pf_mtag->pftag_tag = rtag;
2620 
2621 	if (r != NULL && PF_RTABLEID_IS_VALID(r->rtableid)) {
2622 		pf_mtag->pftag_rtableid = r->rtableid;
2623 	}
2624 
2625 #if PF_ECN
2626 	/* add hints for ecn */
2627 	pf_mtag->pftag_hdr = mtod(m, struct ip *);
2628 	/* record address family */
2629 	pf_mtag->pftag_flags &= ~(PF_TAG_HDR_INET | PF_TAG_HDR_INET6);
2630 	switch (af) {
2631 #if INET
2632 	case AF_INET:
2633 		pf_mtag->pftag_flags |= PF_TAG_HDR_INET;
2634 		break;
2635 #endif /* INET */
2636 	case AF_INET6:
2637 		pf_mtag->pftag_flags |= PF_TAG_HDR_INET6;
2638 		break;
2639 	}
2640 #endif /* PF_ECN */
2641 
2642 	/* indicate this is TCP */
2643 	m->m_pkthdr.pkt_proto = IPPROTO_TCP;
2644 
2645 	/* Make sure headers are 32-bit aligned */
2646 	m->m_data += max_linkhdr;
2647 	m->m_pkthdr.len = m->m_len = len;
2648 	m->m_pkthdr.rcvif = NULL;
2649 	bzero(m->m_data, len);
2650 	switch (af) {
2651 #if INET
2652 	case AF_INET:
2653 		h = mtod(m, struct ip *);
2654 
2655 		/* IP header fields included in the TCP checksum */
2656 		h->ip_p = IPPROTO_TCP;
2657 		h->ip_len = htons(tlen);
2658 		h->ip_src.s_addr = saddr->v4addr.s_addr;
2659 		h->ip_dst.s_addr = daddr->v4addr.s_addr;
2660 
2661 		th = (struct tcphdr *)(void *)((caddr_t)h + sizeof(struct ip));
2662 		break;
2663 #endif /* INET */
2664 	case AF_INET6:
2665 		h6 = mtod(m, struct ip6_hdr *);
2666 
2667 		/* IP header fields included in the TCP checksum */
2668 		h6->ip6_nxt = IPPROTO_TCP;
2669 		h6->ip6_plen = htons(tlen);
2670 		memcpy(&h6->ip6_src, &saddr->v6addr, sizeof(struct in6_addr));
2671 		memcpy(&h6->ip6_dst, &daddr->v6addr, sizeof(struct in6_addr));
2672 
2673 		th = (struct tcphdr *)(void *)
2674 		    ((caddr_t)h6 + sizeof(struct ip6_hdr));
2675 		break;
2676 	}
2677 
2678 	/* TCP header */
2679 	th->th_sport = sport;
2680 	th->th_dport = dport;
2681 	th->th_seq = htonl(seq);
2682 	th->th_ack = htonl(ack);
2683 	th->th_off = tlen >> 2;
2684 	th->th_flags = flags;
2685 	th->th_win = htons(win);
2686 
2687 	if (mss) {
2688 		opt = (char *)(th + 1);
2689 		opt[0] = TCPOPT_MAXSEG;
2690 		opt[1] = 4;
2691 #if BYTE_ORDER != BIG_ENDIAN
2692 		HTONS(mss);
2693 #endif
2694 		bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2);
2695 	}
2696 
2697 	switch (af) {
2698 #if INET
2699 	case AF_INET: {
2700 		struct route ro;
2701 
2702 		/* TCP checksum */
2703 		th->th_sum = in_cksum(m, len);
2704 
2705 		/* Finish the IP header */
2706 		h->ip_v = 4;
2707 		h->ip_hl = sizeof(*h) >> 2;
2708 		h->ip_tos = IPTOS_LOWDELAY;
2709 		/*
2710 		 * ip_output() expects ip_len and ip_off to be in host order.
2711 		 */
2712 		h->ip_len = len;
2713 		h->ip_off = (path_mtu_discovery ? IP_DF : 0);
2714 		h->ip_ttl = ttl ? ttl : ip_defttl;
2715 		h->ip_sum = 0;
2716 
2717 		bzero(&ro, sizeof(ro));
2718 		ip_output(m, NULL, &ro, 0, NULL, NULL);
2719 		ROUTE_RELEASE(&ro);
2720 		break;
2721 	}
2722 #endif /* INET */
2723 	case AF_INET6: {
2724 		struct route_in6 ro6;
2725 
2726 		/* TCP checksum */
2727 		th->th_sum = in6_cksum(m, IPPROTO_TCP,
2728 		    sizeof(struct ip6_hdr), tlen);
2729 
2730 		h6->ip6_vfc |= IPV6_VERSION;
2731 		h6->ip6_hlim = IPV6_DEFHLIM;
2732 
2733 		ip6_output_setsrcifscope(m, IFSCOPE_UNKNOWN, NULL);
2734 		ip6_output_setdstifscope(m, IFSCOPE_UNKNOWN, NULL);
2735 		bzero(&ro6, sizeof(ro6));
2736 		ip6_output(m, NULL, &ro6, 0, NULL, NULL, NULL);
2737 		ROUTE_RELEASE(&ro6);
2738 		break;
2739 	}
2740 	}
2741 }
2742 
2743 static __attribute__((noinline)) void
pf_send_icmp(pbuf_t * pbuf,u_int8_t type,u_int8_t code,sa_family_t af,struct pf_rule * r)2744 pf_send_icmp(pbuf_t *pbuf, u_int8_t type, u_int8_t code, sa_family_t af,
2745     struct pf_rule *r)
2746 {
2747 	struct mbuf     *m0;
2748 	struct pf_mtag  *pf_mtag;
2749 
2750 	m0 = pbuf_clone_to_mbuf(pbuf);
2751 	if (m0 == NULL) {
2752 		return;
2753 	}
2754 
2755 	if ((pf_mtag = pf_get_mtag(m0)) == NULL) {
2756 		return;
2757 	}
2758 
2759 	pf_mtag->pftag_flags |= PF_TAG_GENERATED;
2760 
2761 	if (PF_RTABLEID_IS_VALID(r->rtableid)) {
2762 		pf_mtag->pftag_rtableid = r->rtableid;
2763 	}
2764 
2765 #if PF_ECN
2766 	/* add hints for ecn */
2767 	pf_mtag->pftag_hdr = mtod(m0, struct ip *);
2768 	/* record address family */
2769 	pf_mtag->pftag_flags &= ~(PF_TAG_HDR_INET | PF_TAG_HDR_INET6);
2770 	switch (af) {
2771 #if INET
2772 	case AF_INET:
2773 		pf_mtag->pftag_flags |= PF_TAG_HDR_INET;
2774 		m0->m_pkthdr.pkt_proto = IPPROTO_ICMP;
2775 		break;
2776 #endif /* INET */
2777 	case AF_INET6:
2778 		pf_mtag->pftag_flags |= PF_TAG_HDR_INET6;
2779 		m0->m_pkthdr.pkt_proto = IPPROTO_ICMPV6;
2780 		break;
2781 	}
2782 #endif /* PF_ECN */
2783 
2784 	switch (af) {
2785 #if INET
2786 	case AF_INET:
2787 		icmp_error(m0, type, code, 0, 0);
2788 		break;
2789 #endif /* INET */
2790 	case AF_INET6:
2791 		icmp6_error(m0, type, code, 0);
2792 		break;
2793 	}
2794 }
2795 
2796 /*
2797  * Return 1 if the addresses a and b match (with mask m), otherwise return 0.
2798  * If n is 0, they match if they are equal. If n is != 0, they match if they
2799  * are different.
2800  */
2801 int
pf_match_addr(u_int8_t n,struct pf_addr * a,struct pf_addr * m,struct pf_addr * b,sa_family_t af)2802 pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m,
2803     struct pf_addr *b, sa_family_t af)
2804 {
2805 	int     match = 0;
2806 
2807 	switch (af) {
2808 #if INET
2809 	case AF_INET:
2810 		if ((a->addr32[0] & m->addr32[0]) ==
2811 		    (b->addr32[0] & m->addr32[0])) {
2812 			match++;
2813 		}
2814 		break;
2815 #endif /* INET */
2816 	case AF_INET6:
2817 		if (((a->addr32[0] & m->addr32[0]) ==
2818 		    (b->addr32[0] & m->addr32[0])) &&
2819 		    ((a->addr32[1] & m->addr32[1]) ==
2820 		    (b->addr32[1] & m->addr32[1])) &&
2821 		    ((a->addr32[2] & m->addr32[2]) ==
2822 		    (b->addr32[2] & m->addr32[2])) &&
2823 		    ((a->addr32[3] & m->addr32[3]) ==
2824 		    (b->addr32[3] & m->addr32[3]))) {
2825 			match++;
2826 		}
2827 		break;
2828 	}
2829 	if (match) {
2830 		if (n) {
2831 			return 0;
2832 		} else {
2833 			return 1;
2834 		}
2835 	} else {
2836 		if (n) {
2837 			return 1;
2838 		} else {
2839 			return 0;
2840 		}
2841 	}
2842 }
2843 
2844 /*
2845  * Return 1 if b <= a <= e, otherwise return 0.
2846  */
2847 int
pf_match_addr_range(struct pf_addr * b,struct pf_addr * e,struct pf_addr * a,sa_family_t af)2848 pf_match_addr_range(struct pf_addr *b, struct pf_addr *e,
2849     struct pf_addr *a, sa_family_t af)
2850 {
2851 	switch (af) {
2852 #if INET
2853 	case AF_INET:
2854 		if ((a->addr32[0] < b->addr32[0]) ||
2855 		    (a->addr32[0] > e->addr32[0])) {
2856 			return 0;
2857 		}
2858 		break;
2859 #endif /* INET */
2860 	case AF_INET6: {
2861 		int     i;
2862 
2863 		/* check a >= b */
2864 		for (i = 0; i < 4; ++i) {
2865 			if (a->addr32[i] > b->addr32[i]) {
2866 				break;
2867 			} else if (a->addr32[i] < b->addr32[i]) {
2868 				return 0;
2869 			}
2870 		}
2871 		/* check a <= e */
2872 		for (i = 0; i < 4; ++i) {
2873 			if (a->addr32[i] < e->addr32[i]) {
2874 				break;
2875 			} else if (a->addr32[i] > e->addr32[i]) {
2876 				return 0;
2877 			}
2878 		}
2879 		break;
2880 	}
2881 	}
2882 	return 1;
2883 }
2884 
2885 int
pf_match(u_int8_t op,u_int32_t a1,u_int32_t a2,u_int32_t p)2886 pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
2887 {
2888 	switch (op) {
2889 	case PF_OP_IRG:
2890 		return (p > a1) && (p < a2);
2891 	case PF_OP_XRG:
2892 		return (p < a1) || (p > a2);
2893 	case PF_OP_RRG:
2894 		return (p >= a1) && (p <= a2);
2895 	case PF_OP_EQ:
2896 		return p == a1;
2897 	case PF_OP_NE:
2898 		return p != a1;
2899 	case PF_OP_LT:
2900 		return p < a1;
2901 	case PF_OP_LE:
2902 		return p <= a1;
2903 	case PF_OP_GT:
2904 		return p > a1;
2905 	case PF_OP_GE:
2906 		return p >= a1;
2907 	}
2908 	return 0; /* never reached */
2909 }
2910 
2911 int
pf_match_port(u_int8_t op,u_int16_t a1,u_int16_t a2,u_int16_t p)2912 pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
2913 {
2914 #if BYTE_ORDER != BIG_ENDIAN
2915 	NTOHS(a1);
2916 	NTOHS(a2);
2917 	NTOHS(p);
2918 #endif
2919 	return pf_match(op, a1, a2, p);
2920 }
2921 
2922 int
pf_match_xport(u_int8_t proto,u_int8_t proto_variant,union pf_rule_xport * rx,union pf_state_xport * sx)2923 pf_match_xport(u_int8_t proto, u_int8_t proto_variant, union pf_rule_xport *rx,
2924     union pf_state_xport *sx)
2925 {
2926 	int d = !0;
2927 
2928 	if (sx) {
2929 		switch (proto) {
2930 		case IPPROTO_GRE:
2931 			if (proto_variant == PF_GRE_PPTP_VARIANT) {
2932 				d = (rx->call_id == sx->call_id);
2933 			}
2934 			break;
2935 
2936 		case IPPROTO_ESP:
2937 			d = (rx->spi == sx->spi);
2938 			break;
2939 
2940 		case IPPROTO_TCP:
2941 		case IPPROTO_UDP:
2942 		case IPPROTO_ICMP:
2943 		case IPPROTO_ICMPV6:
2944 			if (rx->range.op) {
2945 				d = pf_match_port(rx->range.op,
2946 				    rx->range.port[0], rx->range.port[1],
2947 				    sx->port);
2948 			}
2949 			break;
2950 
2951 		default:
2952 			break;
2953 		}
2954 	}
2955 
2956 	return d;
2957 }
2958 
2959 int
pf_match_uid(u_int8_t op,uid_t a1,uid_t a2,uid_t u)2960 pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
2961 {
2962 	if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE) {
2963 		return 0;
2964 	}
2965 	return pf_match(op, a1, a2, u);
2966 }
2967 
2968 int
pf_match_gid(u_int8_t op,gid_t a1,gid_t a2,gid_t g)2969 pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
2970 {
2971 	if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE) {
2972 		return 0;
2973 	}
2974 	return pf_match(op, a1, a2, g);
2975 }
2976 
2977 static int
pf_match_tag(struct pf_rule * r,struct pf_mtag * pf_mtag,int * tag)2978 pf_match_tag(struct pf_rule *r, struct pf_mtag *pf_mtag,
2979     int *tag)
2980 {
2981 	if (*tag == -1) {
2982 		*tag = pf_mtag->pftag_tag;
2983 	}
2984 
2985 	return (!r->match_tag_not && r->match_tag == *tag) ||
2986 	       (r->match_tag_not && r->match_tag != *tag);
2987 }
2988 
2989 int
pf_tag_packet(pbuf_t * pbuf,struct pf_mtag * pf_mtag,int tag,unsigned int rtableid,struct pf_pdesc * pd)2990 pf_tag_packet(pbuf_t *pbuf, struct pf_mtag *pf_mtag, int tag,
2991     unsigned int rtableid, struct pf_pdesc *pd)
2992 {
2993 	if (tag <= 0 && !PF_RTABLEID_IS_VALID(rtableid) &&
2994 	    (pd == NULL || !(pd->pktflags & PKTF_FLOW_ID))) {
2995 		return 0;
2996 	}
2997 
2998 	if (pf_mtag == NULL && (pf_mtag = pf_get_mtag_pbuf(pbuf)) == NULL) {
2999 		return 1;
3000 	}
3001 
3002 	if (tag > 0) {
3003 		pf_mtag->pftag_tag = tag;
3004 	}
3005 	if (PF_RTABLEID_IS_VALID(rtableid)) {
3006 		pf_mtag->pftag_rtableid = rtableid;
3007 	}
3008 	if (pd != NULL && (pd->pktflags & PKTF_FLOW_ID)) {
3009 		*pbuf->pb_flowsrc = pd->flowsrc;
3010 		*pbuf->pb_flowid = pd->flowhash;
3011 		*pbuf->pb_flags |= pd->pktflags;
3012 		*pbuf->pb_proto = pd->proto;
3013 	}
3014 
3015 	return 0;
3016 }
3017 
3018 void
pf_step_into_anchor(int * depth,struct pf_ruleset ** rs,int n,struct pf_rule ** r,struct pf_rule ** a,int * match)3019 pf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n,
3020     struct pf_rule **r, struct pf_rule **a, int *match)
3021 {
3022 	struct pf_anchor_stackframe     *f;
3023 
3024 	(*r)->anchor->match = 0;
3025 	if (match) {
3026 		*match = 0;
3027 	}
3028 	if (*depth >= (int)sizeof(pf_anchor_stack) /
3029 	    (int)sizeof(pf_anchor_stack[0])) {
3030 		printf("pf_step_into_anchor: stack overflow\n");
3031 		*r = TAILQ_NEXT(*r, entries);
3032 		return;
3033 	} else if (*depth == 0 && a != NULL) {
3034 		*a = *r;
3035 	}
3036 	f = pf_anchor_stack + (*depth)++;
3037 	f->rs = *rs;
3038 	f->r = *r;
3039 	if ((*r)->anchor_wildcard) {
3040 		f->parent = &(*r)->anchor->children;
3041 		if ((f->child = RB_MIN(pf_anchor_node, f->parent)) ==
3042 		    NULL) {
3043 			*r = NULL;
3044 			return;
3045 		}
3046 		*rs = &f->child->ruleset;
3047 	} else {
3048 		f->parent = NULL;
3049 		f->child = NULL;
3050 		*rs = &(*r)->anchor->ruleset;
3051 	}
3052 	*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
3053 }
3054 
3055 int
pf_step_out_of_anchor(int * depth,struct pf_ruleset ** rs,int n,struct pf_rule ** r,struct pf_rule ** a,int * match)3056 pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n,
3057     struct pf_rule **r, struct pf_rule **a, int *match)
3058 {
3059 	struct pf_anchor_stackframe     *f;
3060 	int quick = 0;
3061 
3062 	do {
3063 		if (*depth <= 0) {
3064 			break;
3065 		}
3066 		f = pf_anchor_stack + *depth - 1;
3067 		if (f->parent != NULL && f->child != NULL) {
3068 			if (f->child->match ||
3069 			    (match != NULL && *match)) {
3070 				f->r->anchor->match = 1;
3071 				if (match) {
3072 					*match = 0;
3073 				}
3074 			}
3075 			f->child = RB_NEXT(pf_anchor_node, f->parent, f->child);
3076 			if (f->child != NULL) {
3077 				*rs = &f->child->ruleset;
3078 				*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
3079 				if (*r == NULL) {
3080 					continue;
3081 				} else {
3082 					break;
3083 				}
3084 			}
3085 		}
3086 		(*depth)--;
3087 		if (*depth == 0 && a != NULL) {
3088 			*a = NULL;
3089 		}
3090 		*rs = f->rs;
3091 		if (f->r->anchor->match || (match != NULL && *match)) {
3092 			quick = f->r->quick;
3093 		}
3094 		*r = TAILQ_NEXT(f->r, entries);
3095 	} while (*r == NULL);
3096 
3097 	return quick;
3098 }
3099 
3100 void
pf_poolmask(struct pf_addr * naddr,struct pf_addr * raddr,struct pf_addr * rmask,struct pf_addr * saddr,sa_family_t af)3101 pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
3102     struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
3103 {
3104 	switch (af) {
3105 #if INET
3106 	case AF_INET:
3107 		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
3108 		    ((rmask->addr32[0] ^ 0xffffffff) & saddr->addr32[0]);
3109 		break;
3110 #endif /* INET */
3111 	case AF_INET6:
3112 		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
3113 		    ((rmask->addr32[0] ^ 0xffffffff) & saddr->addr32[0]);
3114 		naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
3115 		    ((rmask->addr32[1] ^ 0xffffffff) & saddr->addr32[1]);
3116 		naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
3117 		    ((rmask->addr32[2] ^ 0xffffffff) & saddr->addr32[2]);
3118 		naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
3119 		    ((rmask->addr32[3] ^ 0xffffffff) & saddr->addr32[3]);
3120 		break;
3121 	}
3122 }
3123 
3124 void
pf_addr_inc(struct pf_addr * addr,sa_family_t af)3125 pf_addr_inc(struct pf_addr *addr, sa_family_t af)
3126 {
3127 	switch (af) {
3128 #if INET
3129 	case AF_INET:
3130 		addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
3131 		break;
3132 #endif /* INET */
3133 	case AF_INET6:
3134 		if (addr->addr32[3] == 0xffffffff) {
3135 			addr->addr32[3] = 0;
3136 			if (addr->addr32[2] == 0xffffffff) {
3137 				addr->addr32[2] = 0;
3138 				if (addr->addr32[1] == 0xffffffff) {
3139 					addr->addr32[1] = 0;
3140 					addr->addr32[0] =
3141 					    htonl(ntohl(addr->addr32[0]) + 1);
3142 				} else {
3143 					addr->addr32[1] =
3144 					    htonl(ntohl(addr->addr32[1]) + 1);
3145 				}
3146 			} else {
3147 				addr->addr32[2] =
3148 				    htonl(ntohl(addr->addr32[2]) + 1);
3149 			}
3150 		} else {
3151 			addr->addr32[3] =
3152 			    htonl(ntohl(addr->addr32[3]) + 1);
3153 		}
3154 		break;
3155 	}
3156 }
3157 
3158 #define mix(a, b, c) \
3159 	do {                                    \
3160 	        a -= b; a -= c; a ^= (c >> 13); \
3161 	        b -= c; b -= a; b ^= (a << 8);  \
3162 	        c -= a; c -= b; c ^= (b >> 13); \
3163 	        a -= b; a -= c; a ^= (c >> 12); \
3164 	        b -= c; b -= a; b ^= (a << 16); \
3165 	        c -= a; c -= b; c ^= (b >> 5);  \
3166 	        a -= b; a -= c; a ^= (c >> 3);  \
3167 	        b -= c; b -= a; b ^= (a << 10); \
3168 	        c -= a; c -= b; c ^= (b >> 15); \
3169 	} while (0)
3170 
3171 /*
3172  * hash function based on bridge_hash in if_bridge.c
3173  */
3174 static void
pf_hash(struct pf_addr * inaddr,struct pf_addr * hash,struct pf_poolhashkey * key,sa_family_t af)3175 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
3176     struct pf_poolhashkey *key, sa_family_t af)
3177 {
3178 	u_int32_t       a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
3179 
3180 	switch (af) {
3181 #if INET
3182 	case AF_INET:
3183 		a += inaddr->addr32[0];
3184 		b += key->key32[1];
3185 		mix(a, b, c);
3186 		hash->addr32[0] = c + key->key32[2];
3187 		break;
3188 #endif /* INET */
3189 	case AF_INET6:
3190 		a += inaddr->addr32[0];
3191 		b += inaddr->addr32[2];
3192 		mix(a, b, c);
3193 		hash->addr32[0] = c;
3194 		a += inaddr->addr32[1];
3195 		b += inaddr->addr32[3];
3196 		c += key->key32[1];
3197 		mix(a, b, c);
3198 		hash->addr32[1] = c;
3199 		a += inaddr->addr32[2];
3200 		b += inaddr->addr32[1];
3201 		c += key->key32[2];
3202 		mix(a, b, c);
3203 		hash->addr32[2] = c;
3204 		a += inaddr->addr32[3];
3205 		b += inaddr->addr32[0];
3206 		c += key->key32[3];
3207 		mix(a, b, c);
3208 		hash->addr32[3] = c;
3209 		break;
3210 	}
3211 }
3212 
3213 static __attribute__((noinline)) int
pf_map_addr(sa_family_t af,struct pf_rule * r,struct pf_addr * saddr,struct pf_addr * naddr,struct pf_addr * init_addr,struct pf_src_node ** sn)3214 pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
3215     struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn)
3216 {
3217 	unsigned char            hash[16];
3218 	struct pf_pool          *rpool = &r->rpool;
3219 	struct pf_addr          *raddr = &rpool->cur->addr.v.a.addr;
3220 	struct pf_addr          *rmask = &rpool->cur->addr.v.a.mask;
3221 	struct pf_pooladdr      *acur = rpool->cur;
3222 	struct pf_src_node       k;
3223 
3224 	if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
3225 	    (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
3226 		k.af = af;
3227 		PF_ACPY(&k.addr, saddr, af);
3228 		if (r->rule_flag & PFRULE_RULESRCTRACK ||
3229 		    r->rpool.opts & PF_POOL_STICKYADDR) {
3230 			k.rule.ptr = r;
3231 		} else {
3232 			k.rule.ptr = NULL;
3233 		}
3234 		pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
3235 		*sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
3236 		if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, rpool->af)) {
3237 			PF_ACPY(naddr, &(*sn)->raddr, rpool->af);
3238 			if (pf_status.debug >= PF_DEBUG_MISC) {
3239 				printf("pf_map_addr: src tracking maps ");
3240 				pf_print_host(&k.addr, 0, af);
3241 				printf(" to ");
3242 				pf_print_host(naddr, 0, rpool->af);
3243 				printf("\n");
3244 			}
3245 			return 0;
3246 		}
3247 	}
3248 
3249 	if (rpool->cur->addr.type == PF_ADDR_NOROUTE) {
3250 		return 1;
3251 	}
3252 	if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
3253 		if (rpool->cur->addr.p.dyn == NULL) {
3254 			return 1;
3255 		}
3256 		switch (rpool->af) {
3257 #if INET
3258 		case AF_INET:
3259 			if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
3260 			    (rpool->opts & PF_POOL_TYPEMASK) !=
3261 			    PF_POOL_ROUNDROBIN) {
3262 				return 1;
3263 			}
3264 			raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
3265 			rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
3266 			break;
3267 #endif /* INET */
3268 		case AF_INET6:
3269 			if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
3270 			    (rpool->opts & PF_POOL_TYPEMASK) !=
3271 			    PF_POOL_ROUNDROBIN) {
3272 				return 1;
3273 			}
3274 			raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
3275 			rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
3276 			break;
3277 		}
3278 	} else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
3279 		if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) {
3280 			return 1; /* unsupported */
3281 		}
3282 	} else {
3283 		raddr = &rpool->cur->addr.v.a.addr;
3284 		rmask = &rpool->cur->addr.v.a.mask;
3285 	}
3286 
3287 	switch (rpool->opts & PF_POOL_TYPEMASK) {
3288 	case PF_POOL_NONE:
3289 		PF_ACPY(naddr, raddr, rpool->af);
3290 		break;
3291 	case PF_POOL_BITMASK:
3292 		ASSERT(af == rpool->af);
3293 		PF_POOLMASK(naddr, raddr, rmask, saddr, af);
3294 		break;
3295 	case PF_POOL_RANDOM:
3296 		if (init_addr != NULL && PF_AZERO(init_addr, rpool->af)) {
3297 			switch (af) {
3298 #if INET
3299 			case AF_INET:
3300 				rpool->counter.addr32[0] = htonl(random());
3301 				break;
3302 #endif /* INET */
3303 			case AF_INET6:
3304 				if (rmask->addr32[3] != 0xffffffff) {
3305 					rpool->counter.addr32[3] =
3306 					    RandomULong();
3307 				} else {
3308 					break;
3309 				}
3310 				if (rmask->addr32[2] != 0xffffffff) {
3311 					rpool->counter.addr32[2] =
3312 					    RandomULong();
3313 				} else {
3314 					break;
3315 				}
3316 				if (rmask->addr32[1] != 0xffffffff) {
3317 					rpool->counter.addr32[1] =
3318 					    RandomULong();
3319 				} else {
3320 					break;
3321 				}
3322 				if (rmask->addr32[0] != 0xffffffff) {
3323 					rpool->counter.addr32[0] =
3324 					    RandomULong();
3325 				}
3326 				break;
3327 			}
3328 			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter,
3329 			    rpool->af);
3330 			PF_ACPY(init_addr, naddr, rpool->af);
3331 		} else {
3332 			PF_AINC(&rpool->counter, rpool->af);
3333 			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter,
3334 			    rpool->af);
3335 		}
3336 		break;
3337 	case PF_POOL_SRCHASH:
3338 		ASSERT(af == rpool->af);
3339 		PF_POOLMASK(naddr, raddr, rmask, saddr, af);
3340 		pf_hash(saddr, (struct pf_addr *)(void *)&hash,
3341 		    &rpool->key, af);
3342 		PF_POOLMASK(naddr, raddr, rmask,
3343 		    (struct pf_addr *)(void *)&hash, af);
3344 		break;
3345 	case PF_POOL_ROUNDROBIN:
3346 		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
3347 			if (!pfr_pool_get(rpool->cur->addr.p.tbl,
3348 			    &rpool->tblidx, &rpool->counter,
3349 			    &raddr, &rmask, rpool->af)) {
3350 				goto get_addr;
3351 			}
3352 		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
3353 			if (rpool->cur->addr.p.dyn != NULL &&
3354 			    !pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
3355 			    &rpool->tblidx, &rpool->counter,
3356 			    &raddr, &rmask, af)) {
3357 				goto get_addr;
3358 			}
3359 		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter,
3360 		    rpool->af)) {
3361 			goto get_addr;
3362 		}
3363 
3364 try_next:
3365 		if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL) {
3366 			rpool->cur = TAILQ_FIRST(&rpool->list);
3367 		}
3368 		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
3369 			rpool->tblidx = -1;
3370 			if (pfr_pool_get(rpool->cur->addr.p.tbl,
3371 			    &rpool->tblidx, &rpool->counter,
3372 			    &raddr, &rmask, rpool->af)) {
3373 				/* table contains no address of type
3374 				 * 'rpool->af' */
3375 				if (rpool->cur != acur) {
3376 					goto try_next;
3377 				}
3378 				return 1;
3379 			}
3380 		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
3381 			rpool->tblidx = -1;
3382 			if (rpool->cur->addr.p.dyn == NULL) {
3383 				return 1;
3384 			}
3385 			if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
3386 			    &rpool->tblidx, &rpool->counter,
3387 			    &raddr, &rmask, rpool->af)) {
3388 				/* table contains no address of type
3389 				 * 'rpool->af' */
3390 				if (rpool->cur != acur) {
3391 					goto try_next;
3392 				}
3393 				return 1;
3394 			}
3395 		} else {
3396 			raddr = &rpool->cur->addr.v.a.addr;
3397 			rmask = &rpool->cur->addr.v.a.mask;
3398 			PF_ACPY(&rpool->counter, raddr, rpool->af);
3399 		}
3400 
3401 get_addr:
3402 		PF_ACPY(naddr, &rpool->counter, rpool->af);
3403 		if (init_addr != NULL && PF_AZERO(init_addr, rpool->af)) {
3404 			PF_ACPY(init_addr, naddr, rpool->af);
3405 		}
3406 		PF_AINC(&rpool->counter, rpool->af);
3407 		break;
3408 	}
3409 	if (*sn != NULL) {
3410 		PF_ACPY(&(*sn)->raddr, naddr, rpool->af);
3411 	}
3412 
3413 	if (pf_status.debug >= PF_DEBUG_MISC &&
3414 	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
3415 		printf("pf_map_addr: selected address ");
3416 		pf_print_host(naddr, 0, rpool->af);
3417 		printf("\n");
3418 	}
3419 
3420 	return 0;
3421 }
3422 
3423 static __attribute__((noinline)) int
pf_get_sport(struct pf_pdesc * pd,struct pfi_kif * kif,struct pf_rule * r,struct pf_addr * saddr,union pf_state_xport * sxport,struct pf_addr * daddr,union pf_state_xport * dxport,struct pf_addr * naddr,union pf_state_xport * nxport,struct pf_src_node ** sn,netns_token * pnstoken)3424 pf_get_sport(struct pf_pdesc *pd, struct pfi_kif *kif, struct pf_rule *r,
3425     struct pf_addr *saddr, union pf_state_xport *sxport, struct pf_addr *daddr,
3426     union pf_state_xport *dxport, struct pf_addr *naddr,
3427     union pf_state_xport *nxport, struct pf_src_node **sn
3428 #if SKYWALK
3429     , netns_token *pnstoken
3430 #endif
3431     )
3432 {
3433 #pragma unused(kif)
3434 	struct pf_state_key_cmp key;
3435 	struct pf_addr          init_addr;
3436 	unsigned int cut;
3437 	sa_family_t af = pd->af;
3438 	u_int8_t proto = pd->proto;
3439 	unsigned int low = r->rpool.proxy_port[0];
3440 	unsigned int high = r->rpool.proxy_port[1];
3441 
3442 	bzero(&init_addr, sizeof(init_addr));
3443 	if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn)) {
3444 		return 1;
3445 	}
3446 
3447 	if (proto == IPPROTO_ICMP) {
3448 		low = 1;
3449 		high = 65535;
3450 	}
3451 
3452 	if (!nxport) {
3453 		return 0; /* No output necessary. */
3454 	}
3455 	/*--- Special mapping rules for UDP ---*/
3456 	if (proto == IPPROTO_UDP) {
3457 		/*--- Never float IKE source port ---*/
3458 		if (ntohs(sxport->port) == PF_IKE_PORT) {
3459 			nxport->port = sxport->port;
3460 			return 0;
3461 		}
3462 
3463 		/*--- Apply exterior mapping options ---*/
3464 		if (r->extmap > PF_EXTMAP_APD) {
3465 			struct pf_state *s;
3466 
3467 			TAILQ_FOREACH(s, &state_list, entry_list) {
3468 				struct pf_state_key *sk = s->state_key;
3469 				if (!sk) {
3470 					continue;
3471 				}
3472 				if (s->nat_rule.ptr != r) {
3473 					continue;
3474 				}
3475 				if (sk->proto != IPPROTO_UDP ||
3476 				    sk->af_lan != af) {
3477 					continue;
3478 				}
3479 				if (sk->lan.xport.port != sxport->port) {
3480 					continue;
3481 				}
3482 				if (PF_ANEQ(&sk->lan.addr, saddr, af)) {
3483 					continue;
3484 				}
3485 				if (r->extmap < PF_EXTMAP_EI &&
3486 				    PF_ANEQ(&sk->ext_lan.addr, daddr, af)) {
3487 					continue;
3488 				}
3489 
3490 #if SKYWALK
3491 				if (netns_reserve(pnstoken, naddr->addr32,
3492 				    NETNS_AF_SIZE(af), proto, sxport->port,
3493 				    NETNS_PF, NULL) != 0) {
3494 					return 1;
3495 				}
3496 #endif
3497 				nxport->port = sk->gwy.xport.port;
3498 				return 0;
3499 			}
3500 		}
3501 	} else if (proto == IPPROTO_TCP) {
3502 		struct pf_state* s;
3503 		/*
3504 		 * APPLE MODIFICATION: <rdar://problem/6546358>
3505 		 * Fix allows....NAT to use a single binding for TCP session
3506 		 * with same source IP and source port
3507 		 */
3508 		TAILQ_FOREACH(s, &state_list, entry_list) {
3509 			struct pf_state_key* sk = s->state_key;
3510 			if (!sk) {
3511 				continue;
3512 			}
3513 			if (s->nat_rule.ptr != r) {
3514 				continue;
3515 			}
3516 			if (sk->proto != IPPROTO_TCP || sk->af_lan != af) {
3517 				continue;
3518 			}
3519 			if (sk->lan.xport.port != sxport->port) {
3520 				continue;
3521 			}
3522 			if (!(PF_AEQ(&sk->lan.addr, saddr, af))) {
3523 				continue;
3524 			}
3525 #if SKYWALK
3526 			if (netns_reserve(pnstoken, naddr->addr32,
3527 			    NETNS_AF_SIZE(af), proto, sxport->port,
3528 			    NETNS_PF, NULL) != 0) {
3529 				return 1;
3530 			}
3531 #endif
3532 			nxport->port = sk->gwy.xport.port;
3533 			return 0;
3534 		}
3535 	}
3536 	do {
3537 		key.af_gwy = af;
3538 		key.proto = proto;
3539 		PF_ACPY(&key.ext_gwy.addr, daddr, key.af_gwy);
3540 		PF_ACPY(&key.gwy.addr, naddr, key.af_gwy);
3541 		switch (proto) {
3542 		case IPPROTO_UDP:
3543 			key.proto_variant = r->extfilter;
3544 			break;
3545 		default:
3546 			key.proto_variant = 0;
3547 			break;
3548 		}
3549 		if (dxport) {
3550 			key.ext_gwy.xport = *dxport;
3551 		} else {
3552 			memset(&key.ext_gwy.xport, 0,
3553 			    sizeof(key.ext_gwy.xport));
3554 		}
3555 		/*
3556 		 * port search; start random, step;
3557 		 * similar 2 portloop in in_pcbbind
3558 		 */
3559 		if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP ||
3560 		    proto == IPPROTO_ICMP)) {
3561 			if (dxport) {
3562 				key.gwy.xport = *dxport;
3563 			} else {
3564 				memset(&key.gwy.xport, 0,
3565 				    sizeof(key.gwy.xport));
3566 			}
3567 #if SKYWALK
3568 			/* Nothing to do: netns handles TCP/UDP only */
3569 #endif
3570 			if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
3571 				return 0;
3572 			}
3573 		} else if (low == 0 && high == 0) {
3574 			key.gwy.xport = *nxport;
3575 			if (pf_find_state_all(&key, PF_IN, NULL) == NULL
3576 #if SKYWALK
3577 			    && ((proto != IPPROTO_TCP && proto != IPPROTO_UDP)
3578 			    || netns_reserve(pnstoken, naddr->addr32,
3579 			    NETNS_AF_SIZE(af), proto, nxport->port,
3580 			    NETNS_PF, NULL) == 0)
3581 #endif
3582 			    ) {
3583 				return 0;
3584 			}
3585 		} else if (low == high) {
3586 			key.gwy.xport.port = htons(low);
3587 			if (pf_find_state_all(&key, PF_IN, NULL) == NULL
3588 #if SKYWALK
3589 			    && ((proto != IPPROTO_TCP && proto != IPPROTO_UDP)
3590 			    || netns_reserve(pnstoken, naddr->addr32,
3591 			    NETNS_AF_SIZE(af), proto, htons(low),
3592 			    NETNS_PF, NULL) == 0)
3593 #endif
3594 			    ) {
3595 				nxport->port = htons(low);
3596 				return 0;
3597 			}
3598 		} else {
3599 			unsigned int tmp;
3600 			if (low > high) {
3601 				tmp = low;
3602 				low = high;
3603 				high = tmp;
3604 			}
3605 			/* low < high */
3606 			cut = htonl(random()) % (1 + high - low) + low;
3607 			/* low <= cut <= high */
3608 			for (tmp = cut; tmp <= high; ++(tmp)) {
3609 				key.gwy.xport.port = htons(tmp);
3610 				if (pf_find_state_all(&key, PF_IN, NULL) == NULL
3611 #if SKYWALK
3612 				    && ((proto != IPPROTO_TCP && proto != IPPROTO_UDP)
3613 				    || netns_reserve(pnstoken, naddr->addr32,
3614 				    NETNS_AF_SIZE(af), proto, htons(tmp),
3615 				    NETNS_PF, NULL) == 0)
3616 #endif
3617 				    ) {
3618 					nxport->port = htons(tmp);
3619 					return 0;
3620 				}
3621 			}
3622 			for (tmp = cut - 1; tmp >= low; --(tmp)) {
3623 				key.gwy.xport.port = htons(tmp);
3624 				if (pf_find_state_all(&key, PF_IN, NULL) == NULL
3625 #if SKYWALK
3626 				    && ((proto != IPPROTO_TCP && proto != IPPROTO_UDP)
3627 				    || netns_reserve(pnstoken, naddr->addr32,
3628 				    NETNS_AF_SIZE(af), proto, htons(tmp),
3629 				    NETNS_PF, NULL) == 0)
3630 #endif
3631 				    ) {
3632 					nxport->port = htons(tmp);
3633 					return 0;
3634 				}
3635 			}
3636 		}
3637 
3638 		switch (r->rpool.opts & PF_POOL_TYPEMASK) {
3639 		case PF_POOL_RANDOM:
3640 		case PF_POOL_ROUNDROBIN:
3641 			if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn)) {
3642 				return 1;
3643 			}
3644 			break;
3645 		case PF_POOL_NONE:
3646 		case PF_POOL_SRCHASH:
3647 		case PF_POOL_BITMASK:
3648 		default:
3649 			return 1;
3650 		}
3651 	} while (!PF_AEQ(&init_addr, naddr, af));
3652 
3653 	return 1;                                     /* none available */
3654 }
3655 
3656 static __attribute__((noinline)) struct pf_rule *
pf_match_translation(struct pf_pdesc * pd,pbuf_t * pbuf,int off,int direction,struct pfi_kif * kif,struct pf_addr * saddr,union pf_state_xport * sxport,struct pf_addr * daddr,union pf_state_xport * dxport,int rs_num)3657 pf_match_translation(struct pf_pdesc *pd, pbuf_t *pbuf, int off,
3658     int direction, struct pfi_kif *kif, struct pf_addr *saddr,
3659     union pf_state_xport *sxport, struct pf_addr *daddr,
3660     union pf_state_xport *dxport, int rs_num)
3661 {
3662 	struct pf_rule          *r, *rm = NULL;
3663 	struct pf_ruleset       *ruleset = NULL;
3664 	int                      tag = -1;
3665 	unsigned int             rtableid = IFSCOPE_NONE;
3666 	int                      asd = 0;
3667 
3668 	r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
3669 	while (r && rm == NULL) {
3670 		struct pf_rule_addr     *src = NULL, *dst = NULL;
3671 		struct pf_addr_wrap     *xdst = NULL;
3672 		struct pf_addr_wrap     *xsrc = NULL;
3673 		union pf_rule_xport     rdrxport;
3674 
3675 		if (r->action == PF_BINAT && direction == PF_IN) {
3676 			src = &r->dst;
3677 			if (r->rpool.cur != NULL) {
3678 				xdst = &r->rpool.cur->addr;
3679 			}
3680 		} else if (r->action == PF_RDR && direction == PF_OUT) {
3681 			dst = &r->src;
3682 			src = &r->dst;
3683 			if (r->rpool.cur != NULL) {
3684 				rdrxport.range.op = PF_OP_EQ;
3685 				rdrxport.range.port[0] =
3686 				    htons(r->rpool.proxy_port[0]);
3687 				xsrc = &r->rpool.cur->addr;
3688 			}
3689 		} else {
3690 			src = &r->src;
3691 			dst = &r->dst;
3692 		}
3693 
3694 		r->evaluations++;
3695 		if (pfi_kif_match(r->kif, kif) == r->ifnot) {
3696 			r = r->skip[PF_SKIP_IFP].ptr;
3697 		} else if (r->direction && r->direction != direction) {
3698 			r = r->skip[PF_SKIP_DIR].ptr;
3699 		} else if (r->af && r->af != pd->af) {
3700 			r = r->skip[PF_SKIP_AF].ptr;
3701 		} else if (r->proto && r->proto != pd->proto) {
3702 			r = r->skip[PF_SKIP_PROTO].ptr;
3703 		} else if (xsrc && PF_MISMATCHAW(xsrc, saddr, pd->af, 0, NULL)) {
3704 			r = TAILQ_NEXT(r, entries);
3705 		} else if (!xsrc && PF_MISMATCHAW(&src->addr, saddr, pd->af,
3706 		    src->neg, kif)) {
3707 			r = TAILQ_NEXT(r, entries);
3708 		} else if (xsrc && (!rdrxport.range.port[0] ||
3709 		    !pf_match_xport(r->proto, r->proto_variant, &rdrxport,
3710 		    sxport))) {
3711 			r = TAILQ_NEXT(r, entries);
3712 		} else if (!xsrc && !pf_match_xport(r->proto,
3713 		    r->proto_variant, &src->xport, sxport)) {
3714 			r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
3715 			    PF_SKIP_DST_PORT].ptr;
3716 		} else if (dst != NULL &&
3717 		    PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL)) {
3718 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
3719 		} else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af,
3720 		    0, NULL)) {
3721 			r = TAILQ_NEXT(r, entries);
3722 		} else if (dst && !pf_match_xport(r->proto, r->proto_variant,
3723 		    &dst->xport, dxport)) {
3724 			r = r->skip[PF_SKIP_DST_PORT].ptr;
3725 		} else if (r->match_tag && !pf_match_tag(r, pd->pf_mtag, &tag)) {
3726 			r = TAILQ_NEXT(r, entries);
3727 		} else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
3728 		    IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, pbuf,
3729 		    off, pd->hdr.tcp), r->os_fingerprint))) {
3730 			r = TAILQ_NEXT(r, entries);
3731 		} else {
3732 			if (r->tag) {
3733 				tag = r->tag;
3734 			}
3735 			if (PF_RTABLEID_IS_VALID(r->rtableid)) {
3736 				rtableid = r->rtableid;
3737 			}
3738 			if (r->anchor == NULL) {
3739 				rm = r;
3740 			} else {
3741 				pf_step_into_anchor(&asd, &ruleset, rs_num,
3742 				    &r, NULL, NULL);
3743 			}
3744 		}
3745 		if (r == NULL) {
3746 			pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r,
3747 			    NULL, NULL);
3748 		}
3749 	}
3750 	if (pf_tag_packet(pbuf, pd->pf_mtag, tag, rtableid, NULL)) {
3751 		return NULL;
3752 	}
3753 	if (rm != NULL && (rm->action == PF_NONAT ||
3754 	    rm->action == PF_NORDR || rm->action == PF_NOBINAT ||
3755 	    rm->action == PF_NONAT64)) {
3756 		return NULL;
3757 	}
3758 	return rm;
3759 }
3760 
3761 /*
3762  * Get address translation information for NAT/BINAT/RDR
3763  * pd		: pf packet descriptor
3764  * pbuf		: pbuf holding the packet
3765  * off		: offset to protocol header
3766  * direction	: direction of packet
3767  * kif		: pf interface info obtained from the packet's recv interface
3768  * sn		: source node pointer (output)
3769  * saddr	: packet source address
3770  * sxport	: packet source port
3771  * daddr	: packet destination address
3772  * dxport	: packet destination port
3773  * nsxport	: translated source port (output)
3774  *
3775  * Translated source & destination address are updated in pd->nsaddr &
3776  * pd->ndaddr
3777  */
3778 static __attribute__((noinline)) struct pf_rule *
pf_get_translation_aux(struct pf_pdesc * pd,pbuf_t * pbuf,int off,int direction,struct pfi_kif * kif,struct pf_src_node ** sn,struct pf_addr * saddr,union pf_state_xport * sxport,struct pf_addr * daddr,union pf_state_xport * dxport,union pf_state_xport * nsxport,netns_token * pnstoken)3779 pf_get_translation_aux(struct pf_pdesc *pd, pbuf_t *pbuf, int off,
3780     int direction, struct pfi_kif *kif, struct pf_src_node **sn,
3781     struct pf_addr *saddr, union pf_state_xport *sxport, struct pf_addr *daddr,
3782     union pf_state_xport *dxport, union pf_state_xport *nsxport
3783 #if SKYWALK
3784     , netns_token *pnstoken
3785 #endif
3786     )
3787 {
3788 	struct pf_rule  *r = NULL;
3789 	pd->naf = pd->af;
3790 
3791 	if (direction == PF_OUT) {
3792 		r = pf_match_translation(pd, pbuf, off, direction, kif, saddr,
3793 		    sxport, daddr, dxport, PF_RULESET_BINAT);
3794 		if (r == NULL) {
3795 			r = pf_match_translation(pd, pbuf, off, direction, kif,
3796 			    saddr, sxport, daddr, dxport, PF_RULESET_RDR);
3797 		}
3798 		if (r == NULL) {
3799 			r = pf_match_translation(pd, pbuf, off, direction, kif,
3800 			    saddr, sxport, daddr, dxport, PF_RULESET_NAT);
3801 		}
3802 	} else {
3803 		r = pf_match_translation(pd, pbuf, off, direction, kif, saddr,
3804 		    sxport, daddr, dxport, PF_RULESET_RDR);
3805 		if (r == NULL) {
3806 			r = pf_match_translation(pd, pbuf, off, direction, kif,
3807 			    saddr, sxport, daddr, dxport, PF_RULESET_BINAT);
3808 		}
3809 	}
3810 
3811 	if (r != NULL) {
3812 		struct pf_addr *nsaddr = &pd->naddr;
3813 		struct pf_addr *ndaddr = &pd->ndaddr;
3814 
3815 		*nsaddr = *saddr;
3816 		*ndaddr = *daddr;
3817 
3818 		switch (r->action) {
3819 		case PF_NONAT:
3820 		case PF_NONAT64:
3821 		case PF_NOBINAT:
3822 		case PF_NORDR:
3823 			return NULL;
3824 		case PF_NAT:
3825 		case PF_NAT64:
3826 			/*
3827 			 * we do NAT64 on incoming path and we call ip_input
3828 			 * which asserts receive interface to be not NULL.
3829 			 * The below check is to prevent NAT64 action on any
3830 			 * packet generated by local entity using synthesized
3831 			 * IPv6 address.
3832 			 */
3833 			if ((r->action == PF_NAT64) && (direction == PF_OUT)) {
3834 				return NULL;
3835 			}
3836 
3837 			if (pf_get_sport(pd, kif, r, saddr, sxport, daddr,
3838 			    dxport, nsaddr, nsxport, sn
3839 #if SKYWALK
3840 			    , pnstoken
3841 #endif
3842 			    )) {
3843 				DPFPRINTF(PF_DEBUG_MISC,
3844 				    ("pf: NAT proxy port allocation "
3845 				    "(%u-%u) failed\n",
3846 				    r->rpool.proxy_port[0],
3847 				    r->rpool.proxy_port[1]));
3848 				return NULL;
3849 			}
3850 			/*
3851 			 * For NAT64 the destination IPv4 address is derived
3852 			 * from the last 32 bits of synthesized IPv6 address
3853 			 */
3854 			if (r->action == PF_NAT64) {
3855 				ndaddr->v4addr.s_addr = daddr->addr32[3];
3856 				pd->naf = AF_INET;
3857 			}
3858 			break;
3859 		case PF_BINAT:
3860 			switch (direction) {
3861 			case PF_OUT:
3862 				if (r->rpool.cur->addr.type ==
3863 				    PF_ADDR_DYNIFTL) {
3864 					if (r->rpool.cur->addr.p.dyn == NULL) {
3865 						return NULL;
3866 					}
3867 					switch (pd->af) {
3868 #if INET
3869 					case AF_INET:
3870 						if (r->rpool.cur->addr.p.dyn->
3871 						    pfid_acnt4 < 1) {
3872 							return NULL;
3873 						}
3874 						PF_POOLMASK(nsaddr,
3875 						    &r->rpool.cur->addr.p.dyn->
3876 						    pfid_addr4,
3877 						    &r->rpool.cur->addr.p.dyn->
3878 						    pfid_mask4,
3879 						    saddr, AF_INET);
3880 						break;
3881 #endif /* INET */
3882 					case AF_INET6:
3883 						if (r->rpool.cur->addr.p.dyn->
3884 						    pfid_acnt6 < 1) {
3885 							return NULL;
3886 						}
3887 						PF_POOLMASK(nsaddr,
3888 						    &r->rpool.cur->addr.p.dyn->
3889 						    pfid_addr6,
3890 						    &r->rpool.cur->addr.p.dyn->
3891 						    pfid_mask6,
3892 						    saddr, AF_INET6);
3893 						break;
3894 					}
3895 				} else {
3896 					PF_POOLMASK(nsaddr,
3897 					    &r->rpool.cur->addr.v.a.addr,
3898 					    &r->rpool.cur->addr.v.a.mask,
3899 					    saddr, pd->af);
3900 				}
3901 				break;
3902 			case PF_IN:
3903 				if (r->src.addr.type == PF_ADDR_DYNIFTL) {
3904 					if (r->src.addr.p.dyn == NULL) {
3905 						return NULL;
3906 					}
3907 					switch (pd->af) {
3908 #if INET
3909 					case AF_INET:
3910 						if (r->src.addr.p.dyn->
3911 						    pfid_acnt4 < 1) {
3912 							return NULL;
3913 						}
3914 						PF_POOLMASK(ndaddr,
3915 						    &r->src.addr.p.dyn->
3916 						    pfid_addr4,
3917 						    &r->src.addr.p.dyn->
3918 						    pfid_mask4,
3919 						    daddr, AF_INET);
3920 						break;
3921 #endif /* INET */
3922 					case AF_INET6:
3923 						if (r->src.addr.p.dyn->
3924 						    pfid_acnt6 < 1) {
3925 							return NULL;
3926 						}
3927 						PF_POOLMASK(ndaddr,
3928 						    &r->src.addr.p.dyn->
3929 						    pfid_addr6,
3930 						    &r->src.addr.p.dyn->
3931 						    pfid_mask6,
3932 						    daddr, AF_INET6);
3933 						break;
3934 					}
3935 				} else {
3936 					PF_POOLMASK(ndaddr,
3937 					    &r->src.addr.v.a.addr,
3938 					    &r->src.addr.v.a.mask, daddr,
3939 					    pd->af);
3940 				}
3941 				break;
3942 			}
3943 			break;
3944 		case PF_RDR: {
3945 			switch (direction) {
3946 			case PF_OUT:
3947 				if (r->dst.addr.type == PF_ADDR_DYNIFTL) {
3948 					if (r->dst.addr.p.dyn == NULL) {
3949 						return NULL;
3950 					}
3951 					switch (pd->af) {
3952 #if INET
3953 					case AF_INET:
3954 						if (r->dst.addr.p.dyn->
3955 						    pfid_acnt4 < 1) {
3956 							return NULL;
3957 						}
3958 						PF_POOLMASK(nsaddr,
3959 						    &r->dst.addr.p.dyn->
3960 						    pfid_addr4,
3961 						    &r->dst.addr.p.dyn->
3962 						    pfid_mask4,
3963 						    daddr, AF_INET);
3964 						break;
3965 #endif /* INET */
3966 					case AF_INET6:
3967 						if (r->dst.addr.p.dyn->
3968 						    pfid_acnt6 < 1) {
3969 							return NULL;
3970 						}
3971 						PF_POOLMASK(nsaddr,
3972 						    &r->dst.addr.p.dyn->
3973 						    pfid_addr6,
3974 						    &r->dst.addr.p.dyn->
3975 						    pfid_mask6,
3976 						    daddr, AF_INET6);
3977 						break;
3978 					}
3979 				} else {
3980 					PF_POOLMASK(nsaddr,
3981 					    &r->dst.addr.v.a.addr,
3982 					    &r->dst.addr.v.a.mask,
3983 					    daddr, pd->af);
3984 				}
3985 				if (nsxport && r->dst.xport.range.port[0]) {
3986 					nsxport->port =
3987 					    r->dst.xport.range.port[0];
3988 				}
3989 				break;
3990 			case PF_IN:
3991 				if (pf_map_addr(pd->af, r, saddr,
3992 				    ndaddr, NULL, sn)) {
3993 					return NULL;
3994 				}
3995 				if ((r->rpool.opts & PF_POOL_TYPEMASK) ==
3996 				    PF_POOL_BITMASK) {
3997 					PF_POOLMASK(ndaddr, ndaddr,
3998 					    &r->rpool.cur->addr.v.a.mask, daddr,
3999 					    pd->af);
4000 				}
4001 
4002 				if (nsxport && dxport) {
4003 					if (r->rpool.proxy_port[1]) {
4004 						u_int32_t       tmp_nport;
4005 
4006 						tmp_nport =
4007 						    ((ntohs(dxport->port) -
4008 						    ntohs(r->dst.xport.range.
4009 						    port[0])) %
4010 						    (r->rpool.proxy_port[1] -
4011 						    r->rpool.proxy_port[0] +
4012 						    1)) + r->rpool.proxy_port[0];
4013 
4014 						/* wrap around if necessary */
4015 						if (tmp_nport > 65535) {
4016 							tmp_nport -= 65535;
4017 						}
4018 						nsxport->port =
4019 						    htons((u_int16_t)tmp_nport);
4020 					} else if (r->rpool.proxy_port[0]) {
4021 						nsxport->port = htons(r->rpool.
4022 						    proxy_port[0]);
4023 					}
4024 				}
4025 				break;
4026 			}
4027 			break;
4028 		}
4029 		default:
4030 			return NULL;
4031 		}
4032 	}
4033 
4034 	return r;
4035 }
4036 
4037 int
pf_socket_lookup(int direction,struct pf_pdesc * pd)4038 pf_socket_lookup(int direction, struct pf_pdesc *pd)
4039 {
4040 	struct pf_addr          *saddr, *daddr;
4041 	u_int16_t                sport, dport;
4042 	struct inpcbinfo        *pi;
4043 	int                     inp = 0;
4044 
4045 	if (pd == NULL) {
4046 		return -1;
4047 	}
4048 	pd->lookup.uid = UID_MAX;
4049 	pd->lookup.gid = GID_MAX;
4050 	pd->lookup.pid = NO_PID;
4051 
4052 	switch (pd->proto) {
4053 	case IPPROTO_TCP:
4054 		if (pd->hdr.tcp == NULL) {
4055 			return -1;
4056 		}
4057 		sport = pd->hdr.tcp->th_sport;
4058 		dport = pd->hdr.tcp->th_dport;
4059 		pi = &tcbinfo;
4060 		break;
4061 	case IPPROTO_UDP:
4062 		if (pd->hdr.udp == NULL) {
4063 			return -1;
4064 		}
4065 		sport = pd->hdr.udp->uh_sport;
4066 		dport = pd->hdr.udp->uh_dport;
4067 		pi = &udbinfo;
4068 		break;
4069 	default:
4070 		return -1;
4071 	}
4072 	if (direction == PF_IN) {
4073 		saddr = pd->src;
4074 		daddr = pd->dst;
4075 	} else {
4076 		u_int16_t       p;
4077 
4078 		p = sport;
4079 		sport = dport;
4080 		dport = p;
4081 		saddr = pd->dst;
4082 		daddr = pd->src;
4083 	}
4084 	switch (pd->af) {
4085 #if INET
4086 	case AF_INET:
4087 		inp = in_pcblookup_hash_exists(pi, saddr->v4addr, sport, daddr->v4addr, dport,
4088 		    0, &pd->lookup.uid, &pd->lookup.gid, NULL);
4089 		if (inp == 0) {
4090 			struct in6_addr s6, d6;
4091 
4092 			memset(&s6, 0, sizeof(s6));
4093 			s6.s6_addr16[5] = htons(0xffff);
4094 			memcpy(&s6.s6_addr32[3], &saddr->v4addr,
4095 			    sizeof(saddr->v4addr));
4096 
4097 			memset(&d6, 0, sizeof(d6));
4098 			d6.s6_addr16[5] = htons(0xffff);
4099 			memcpy(&d6.s6_addr32[3], &daddr->v4addr,
4100 			    sizeof(daddr->v4addr));
4101 
4102 			inp = in6_pcblookup_hash_exists(pi, &s6, sport, IFSCOPE_NONE,
4103 			    &d6, dport, IFSCOPE_NONE, 0, &pd->lookup.uid, &pd->lookup.gid, NULL, false);
4104 			if (inp == 0) {
4105 				inp = in_pcblookup_hash_exists(pi, saddr->v4addr, sport,
4106 				    daddr->v4addr, dport, INPLOOKUP_WILDCARD, &pd->lookup.uid, &pd->lookup.gid, NULL);
4107 				if (inp == 0) {
4108 					inp = in6_pcblookup_hash_exists(pi, &s6, sport, IFSCOPE_NONE,
4109 					    &d6, dport, IFSCOPE_NONE, INPLOOKUP_WILDCARD,
4110 					    &pd->lookup.uid, &pd->lookup.gid, NULL, false);
4111 					if (inp == 0) {
4112 						return -1;
4113 					}
4114 				}
4115 			}
4116 		}
4117 		break;
4118 #endif /* INET */
4119 	case AF_INET6:
4120 		inp = in6_pcblookup_hash_exists(pi, &saddr->v6addr, sport, IFSCOPE_UNKNOWN, &daddr->v6addr,
4121 		    dport, IFSCOPE_UNKNOWN, 0, &pd->lookup.uid, &pd->lookup.gid, NULL, false);
4122 		if (inp == 0) {
4123 			inp = in6_pcblookup_hash_exists(pi, &saddr->v6addr, sport, IFSCOPE_UNKNOWN,
4124 			    &daddr->v6addr, dport, IFSCOPE_UNKNOWN, INPLOOKUP_WILDCARD,
4125 			    &pd->lookup.uid, &pd->lookup.gid, NULL, false);
4126 			if (inp == 0) {
4127 				return -1;
4128 			}
4129 		}
4130 		break;
4131 
4132 	default:
4133 		return -1;
4134 	}
4135 
4136 	return 1;
4137 }
4138 
4139 static __attribute__((noinline)) u_int8_t
pf_get_wscale(pbuf_t * pbuf,int off,u_int16_t th_off,sa_family_t af)4140 pf_get_wscale(pbuf_t *pbuf, int off, u_int16_t th_off, sa_family_t af)
4141 {
4142 	int              hlen;
4143 	u_int8_t         hdr[60];
4144 	u_int8_t        *opt, optlen;
4145 	u_int8_t         wscale = 0;
4146 
4147 	hlen = th_off << 2;             /* hlen <= sizeof (hdr) */
4148 	if (hlen <= (int)sizeof(struct tcphdr)) {
4149 		return 0;
4150 	}
4151 	if (!pf_pull_hdr(pbuf, off, hdr, hlen, NULL, NULL, af)) {
4152 		return 0;
4153 	}
4154 	opt = hdr + sizeof(struct tcphdr);
4155 	hlen -= sizeof(struct tcphdr);
4156 	while (hlen >= 3) {
4157 		switch (*opt) {
4158 		case TCPOPT_EOL:
4159 		case TCPOPT_NOP:
4160 			++opt;
4161 			--hlen;
4162 			break;
4163 		case TCPOPT_WINDOW:
4164 			wscale = opt[2];
4165 			if (wscale > TCP_MAX_WINSHIFT) {
4166 				wscale = TCP_MAX_WINSHIFT;
4167 			}
4168 			wscale |= PF_WSCALE_FLAG;
4169 			OS_FALLTHROUGH;
4170 		default:
4171 			optlen = opt[1];
4172 			if (optlen < 2) {
4173 				optlen = 2;
4174 			}
4175 			hlen -= optlen;
4176 			opt += optlen;
4177 			break;
4178 		}
4179 	}
4180 	return wscale;
4181 }
4182 
4183 static __attribute__((noinline)) u_int16_t
pf_get_mss(pbuf_t * pbuf,int off,u_int16_t th_off,sa_family_t af)4184 pf_get_mss(pbuf_t *pbuf, int off, u_int16_t th_off, sa_family_t af)
4185 {
4186 	int              hlen;
4187 	u_int8_t         hdr[60];
4188 	u_int8_t        *opt, optlen;
4189 	u_int16_t        mss = tcp_mssdflt;
4190 
4191 	hlen = th_off << 2;     /* hlen <= sizeof (hdr) */
4192 	if (hlen <= (int)sizeof(struct tcphdr)) {
4193 		return 0;
4194 	}
4195 	if (!pf_pull_hdr(pbuf, off, hdr, hlen, NULL, NULL, af)) {
4196 		return 0;
4197 	}
4198 	opt = hdr + sizeof(struct tcphdr);
4199 	hlen -= sizeof(struct tcphdr);
4200 	while (hlen >= TCPOLEN_MAXSEG) {
4201 		switch (*opt) {
4202 		case TCPOPT_EOL:
4203 		case TCPOPT_NOP:
4204 			++opt;
4205 			--hlen;
4206 			break;
4207 		case TCPOPT_MAXSEG:
4208 			bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2);
4209 #if BYTE_ORDER != BIG_ENDIAN
4210 			NTOHS(mss);
4211 #endif
4212 			OS_FALLTHROUGH;
4213 		default:
4214 			optlen = opt[1];
4215 			if (optlen < 2) {
4216 				optlen = 2;
4217 			}
4218 			hlen -= optlen;
4219 			opt += optlen;
4220 			break;
4221 		}
4222 	}
4223 	return mss;
4224 }
4225 
4226 static __attribute__((noinline)) u_int16_t
pf_calc_mss(struct pf_addr * addr,sa_family_t af,u_int16_t offer)4227 pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer)
4228 {
4229 #if INET
4230 	struct sockaddr_in      *dst;
4231 	struct route             ro;
4232 #endif /* INET */
4233 	struct sockaddr_in6     *dst6;
4234 	struct route_in6         ro6;
4235 	struct rtentry          *rt = NULL;
4236 	int                      hlen;
4237 	u_int16_t                mss = tcp_mssdflt;
4238 
4239 	switch (af) {
4240 #if INET
4241 	case AF_INET:
4242 		hlen = sizeof(struct ip);
4243 		bzero(&ro, sizeof(ro));
4244 		dst = (struct sockaddr_in *)(void *)&ro.ro_dst;
4245 		dst->sin_family = AF_INET;
4246 		dst->sin_len = sizeof(*dst);
4247 		dst->sin_addr = addr->v4addr;
4248 		rtalloc(&ro);
4249 		rt = ro.ro_rt;
4250 		break;
4251 #endif /* INET */
4252 	case AF_INET6:
4253 		hlen = sizeof(struct ip6_hdr);
4254 		bzero(&ro6, sizeof(ro6));
4255 		dst6 = (struct sockaddr_in6 *)(void *)&ro6.ro_dst;
4256 		dst6->sin6_family = AF_INET6;
4257 		dst6->sin6_len = sizeof(*dst6);
4258 		dst6->sin6_addr = addr->v6addr;
4259 		rtalloc((struct route *)&ro);
4260 		rt = ro6.ro_rt;
4261 		break;
4262 	default:
4263 		panic("pf_calc_mss: not AF_INET or AF_INET6!");
4264 		return 0;
4265 	}
4266 
4267 	if (rt && rt->rt_ifp) {
4268 		/* This is relevant only for PF SYN Proxy */
4269 		int interface_mtu = rt->rt_ifp->if_mtu;
4270 
4271 		if (af == AF_INET &&
4272 		    INTF_ADJUST_MTU_FOR_CLAT46(rt->rt_ifp)) {
4273 			interface_mtu = IN6_LINKMTU(rt->rt_ifp);
4274 			/* Further adjust the size for CLAT46 expansion */
4275 			interface_mtu -= CLAT46_HDR_EXPANSION_OVERHD;
4276 		}
4277 		mss = interface_mtu - hlen - sizeof(struct tcphdr);
4278 		mss = max(tcp_mssdflt, mss);
4279 		rtfree(rt);
4280 	}
4281 	mss = min(mss, offer);
4282 	mss = max(mss, 64);             /* sanity - at least max opt space */
4283 	return mss;
4284 }
4285 
4286 static void
pf_set_rt_ifp(struct pf_state * s,struct pf_addr * saddr,sa_family_t af)4287 pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr, sa_family_t af)
4288 {
4289 	struct pf_rule *r = s->rule.ptr;
4290 
4291 	s->rt_kif = NULL;
4292 
4293 	if (!r->rt || r->rt == PF_FASTROUTE) {
4294 		return;
4295 	}
4296 	if ((af == AF_INET) || (af == AF_INET6)) {
4297 		pf_map_addr(af, r, saddr, &s->rt_addr, NULL,
4298 		    &s->nat_src_node);
4299 		s->rt_kif = r->rpool.cur->kif;
4300 	}
4301 
4302 	return;
4303 }
4304 
4305 static void
pf_attach_state(struct pf_state_key * sk,struct pf_state * s,int tail)4306 pf_attach_state(struct pf_state_key *sk, struct pf_state *s, int tail)
4307 {
4308 	s->state_key = sk;
4309 	sk->refcnt++;
4310 
4311 	/* list is sorted, if-bound states before floating */
4312 	if (tail) {
4313 		TAILQ_INSERT_TAIL(&sk->states, s, next);
4314 	} else {
4315 		TAILQ_INSERT_HEAD(&sk->states, s, next);
4316 	}
4317 }
4318 
4319 static void
pf_detach_state(struct pf_state * s,int flags)4320 pf_detach_state(struct pf_state *s, int flags)
4321 {
4322 	struct pf_state_key     *sk = s->state_key;
4323 
4324 	if (sk == NULL) {
4325 		return;
4326 	}
4327 
4328 	s->state_key = NULL;
4329 	TAILQ_REMOVE(&sk->states, s, next);
4330 	if (--sk->refcnt == 0) {
4331 		if (!(flags & PF_DT_SKIP_EXTGWY)) {
4332 			RB_REMOVE(pf_state_tree_ext_gwy,
4333 			    &pf_statetbl_ext_gwy, sk);
4334 		}
4335 		if (!(flags & PF_DT_SKIP_LANEXT)) {
4336 			RB_REMOVE(pf_state_tree_lan_ext,
4337 			    &pf_statetbl_lan_ext, sk);
4338 		}
4339 		if (sk->app_state) {
4340 			pool_put(&pf_app_state_pl, sk->app_state);
4341 		}
4342 		pool_put(&pf_state_key_pl, sk);
4343 	}
4344 }
4345 
4346 struct pf_state_key *
pf_alloc_state_key(struct pf_state * s,struct pf_state_key * psk)4347 pf_alloc_state_key(struct pf_state *s, struct pf_state_key *psk)
4348 {
4349 	struct pf_state_key     *sk;
4350 
4351 	if ((sk = pool_get(&pf_state_key_pl, PR_WAITOK)) == NULL) {
4352 		return NULL;
4353 	}
4354 	bzero(sk, sizeof(*sk));
4355 	TAILQ_INIT(&sk->states);
4356 	pf_attach_state(sk, s, 0);
4357 
4358 	/* initialize state key from psk, if provided */
4359 	if (psk != NULL) {
4360 		bcopy(&psk->lan, &sk->lan, sizeof(sk->lan));
4361 		bcopy(&psk->gwy, &sk->gwy, sizeof(sk->gwy));
4362 		bcopy(&psk->ext_lan, &sk->ext_lan, sizeof(sk->ext_lan));
4363 		bcopy(&psk->ext_gwy, &sk->ext_gwy, sizeof(sk->ext_gwy));
4364 		sk->af_lan = psk->af_lan;
4365 		sk->af_gwy = psk->af_gwy;
4366 		sk->proto = psk->proto;
4367 		sk->direction = psk->direction;
4368 		sk->proto_variant = psk->proto_variant;
4369 		VERIFY(psk->app_state == NULL);
4370 		sk->flowsrc = psk->flowsrc;
4371 		sk->flowhash = psk->flowhash;
4372 		/* don't touch tree entries, states and refcnt on sk */
4373 	}
4374 
4375 	return sk;
4376 }
4377 
4378 static __attribute__((noinline)) u_int32_t
pf_tcp_iss(struct pf_pdesc * pd)4379 pf_tcp_iss(struct pf_pdesc *pd)
4380 {
4381 	MD5_CTX ctx;
4382 	u_int32_t digest[4];
4383 
4384 	if (pf_tcp_secret_init == 0) {
4385 		read_frandom(pf_tcp_secret, sizeof(pf_tcp_secret));
4386 		MD5Init(&pf_tcp_secret_ctx);
4387 		MD5Update(&pf_tcp_secret_ctx, pf_tcp_secret,
4388 		    sizeof(pf_tcp_secret));
4389 		pf_tcp_secret_init = 1;
4390 	}
4391 	ctx = pf_tcp_secret_ctx;
4392 
4393 	MD5Update(&ctx, (char *)&pd->hdr.tcp->th_sport, sizeof(u_short));
4394 	MD5Update(&ctx, (char *)&pd->hdr.tcp->th_dport, sizeof(u_short));
4395 	if (pd->af == AF_INET6) {
4396 		MD5Update(&ctx, (char *)&pd->src->v6addr, sizeof(struct in6_addr));
4397 		MD5Update(&ctx, (char *)&pd->dst->v6addr, sizeof(struct in6_addr));
4398 	} else {
4399 		MD5Update(&ctx, (char *)&pd->src->v4addr, sizeof(struct in_addr));
4400 		MD5Update(&ctx, (char *)&pd->dst->v4addr, sizeof(struct in_addr));
4401 	}
4402 	MD5Final((u_char *)digest, &ctx);
4403 	pf_tcp_iss_off += 4096;
4404 	return digest[0] + random() + pf_tcp_iss_off;
4405 }
4406 
4407 /*
4408  * This routine is called to perform address family translation on the
4409  * inner IP header (that may come as payload) of an ICMP(v4addr/6) error
4410  * response.
4411  */
4412 static __attribute__((noinline)) int
pf_change_icmp_af(pbuf_t * pbuf,int off,struct pf_pdesc * pd,struct pf_pdesc * pd2,struct pf_addr * src,struct pf_addr * dst,sa_family_t af,sa_family_t naf)4413 pf_change_icmp_af(pbuf_t *pbuf, int off,
4414     struct pf_pdesc *pd, struct pf_pdesc *pd2, struct pf_addr *src,
4415     struct pf_addr *dst, sa_family_t af, sa_family_t naf)
4416 {
4417 	struct ip               *ip4 = NULL;
4418 	struct ip6_hdr          *ip6 = NULL;
4419 	void                    *hdr;
4420 	int                      hlen, olen;
4421 	uint64_t                ipid_salt = (uint64_t)pbuf_get_packet_buffer_address(pbuf);
4422 
4423 	if (af == naf || (af != AF_INET && af != AF_INET6) ||
4424 	    (naf != AF_INET && naf != AF_INET6)) {
4425 		return -1;
4426 	}
4427 
4428 	/* old header */
4429 	olen = pd2->off - off;
4430 	/* new header */
4431 	hlen = naf == AF_INET ? sizeof(*ip4) : sizeof(*ip6);
4432 
4433 	/* Modify the pbuf to accommodate the new header */
4434 	hdr = pbuf_resize_segment(pbuf, off, olen, hlen);
4435 	if (hdr == NULL) {
4436 		return -1;
4437 	}
4438 
4439 	/* translate inner ip/ip6 header */
4440 	switch (naf) {
4441 	case AF_INET:
4442 		ip4 = hdr;
4443 		bzero(ip4, sizeof(*ip4));
4444 		ip4->ip_v   = IPVERSION;
4445 		ip4->ip_hl  = sizeof(*ip4) >> 2;
4446 		ip4->ip_len = htons(sizeof(*ip4) + pd2->tot_len - olen);
4447 		ip4->ip_id  = rfc6864 ? 0 : htons(ip_randomid(ipid_salt));
4448 		ip4->ip_off = htons(IP_DF);
4449 		ip4->ip_ttl = pd2->ttl;
4450 		if (pd2->proto == IPPROTO_ICMPV6) {
4451 			ip4->ip_p = IPPROTO_ICMP;
4452 		} else {
4453 			ip4->ip_p = pd2->proto;
4454 		}
4455 		ip4->ip_src = src->v4addr;
4456 		ip4->ip_dst = dst->v4addr;
4457 		ip4->ip_sum = pbuf_inet_cksum(pbuf, 0, 0, ip4->ip_hl << 2);
4458 		break;
4459 	case AF_INET6:
4460 		ip6 = hdr;
4461 		bzero(ip6, sizeof(*ip6));
4462 		ip6->ip6_vfc  = IPV6_VERSION;
4463 		ip6->ip6_plen = htons(pd2->tot_len - olen);
4464 		if (pd2->proto == IPPROTO_ICMP) {
4465 			ip6->ip6_nxt = IPPROTO_ICMPV6;
4466 		} else {
4467 			ip6->ip6_nxt = pd2->proto;
4468 		}
4469 		if (!pd2->ttl || pd2->ttl > IPV6_DEFHLIM) {
4470 			ip6->ip6_hlim = IPV6_DEFHLIM;
4471 		} else {
4472 			ip6->ip6_hlim = pd2->ttl;
4473 		}
4474 		ip6->ip6_src  = src->v6addr;
4475 		ip6->ip6_dst  = dst->v6addr;
4476 		break;
4477 	}
4478 
4479 	/* adjust payload offset and total packet length */
4480 	pd2->off += hlen - olen;
4481 	pd->tot_len += hlen - olen;
4482 
4483 	return 0;
4484 }
4485 
4486 #define PTR_IP(field)   ((int32_t)offsetof(struct ip, field))
4487 #define PTR_IP6(field)  ((int32_t)offsetof(struct ip6_hdr, field))
4488 
4489 static __attribute__((noinline)) int
pf_translate_icmp_af(int af,void * arg)4490 pf_translate_icmp_af(int af, void *arg)
4491 {
4492 	struct icmp             *icmp4;
4493 	struct icmp6_hdr        *icmp6;
4494 	u_int32_t                mtu;
4495 	int32_t                  ptr = -1;
4496 	u_int8_t                 type;
4497 	u_int8_t                 code;
4498 
4499 	switch (af) {
4500 	case AF_INET:
4501 		icmp6 = arg;
4502 		type  = icmp6->icmp6_type;
4503 		code  = icmp6->icmp6_code;
4504 		mtu   = ntohl(icmp6->icmp6_mtu);
4505 
4506 		switch (type) {
4507 		case ICMP6_ECHO_REQUEST:
4508 			type = ICMP_ECHO;
4509 			break;
4510 		case ICMP6_ECHO_REPLY:
4511 			type = ICMP_ECHOREPLY;
4512 			break;
4513 		case ICMP6_DST_UNREACH:
4514 			type = ICMP_UNREACH;
4515 			switch (code) {
4516 			case ICMP6_DST_UNREACH_NOROUTE:
4517 			case ICMP6_DST_UNREACH_BEYONDSCOPE:
4518 			case ICMP6_DST_UNREACH_ADDR:
4519 				code = ICMP_UNREACH_HOST;
4520 				break;
4521 			case ICMP6_DST_UNREACH_ADMIN:
4522 				code = ICMP_UNREACH_HOST_PROHIB;
4523 				break;
4524 			case ICMP6_DST_UNREACH_NOPORT:
4525 				code = ICMP_UNREACH_PORT;
4526 				break;
4527 			default:
4528 				return -1;
4529 			}
4530 			break;
4531 		case ICMP6_PACKET_TOO_BIG:
4532 			type = ICMP_UNREACH;
4533 			code = ICMP_UNREACH_NEEDFRAG;
4534 			mtu -= 20;
4535 			break;
4536 		case ICMP6_TIME_EXCEEDED:
4537 			type = ICMP_TIMXCEED;
4538 			break;
4539 		case ICMP6_PARAM_PROB:
4540 			switch (code) {
4541 			case ICMP6_PARAMPROB_HEADER:
4542 				type = ICMP_PARAMPROB;
4543 				code = ICMP_PARAMPROB_ERRATPTR;
4544 				ptr  = ntohl(icmp6->icmp6_pptr);
4545 
4546 				if (ptr == PTR_IP6(ip6_vfc)) {
4547 					; /* preserve */
4548 				} else if (ptr == PTR_IP6(ip6_vfc) + 1) {
4549 					ptr = PTR_IP(ip_tos);
4550 				} else if (ptr == PTR_IP6(ip6_plen) ||
4551 				    ptr == PTR_IP6(ip6_plen) + 1) {
4552 					ptr = PTR_IP(ip_len);
4553 				} else if (ptr == PTR_IP6(ip6_nxt)) {
4554 					ptr = PTR_IP(ip_p);
4555 				} else if (ptr == PTR_IP6(ip6_hlim)) {
4556 					ptr = PTR_IP(ip_ttl);
4557 				} else if (ptr >= PTR_IP6(ip6_src) &&
4558 				    ptr < PTR_IP6(ip6_dst)) {
4559 					ptr = PTR_IP(ip_src);
4560 				} else if (ptr >= PTR_IP6(ip6_dst) &&
4561 				    ptr < (int32_t)sizeof(struct ip6_hdr)) {
4562 					ptr = PTR_IP(ip_dst);
4563 				} else {
4564 					return -1;
4565 				}
4566 				break;
4567 			case ICMP6_PARAMPROB_NEXTHEADER:
4568 				type = ICMP_UNREACH;
4569 				code = ICMP_UNREACH_PROTOCOL;
4570 				break;
4571 			default:
4572 				return -1;
4573 			}
4574 			break;
4575 		default:
4576 			return -1;
4577 		}
4578 		icmp6->icmp6_type = type;
4579 		icmp6->icmp6_code = code;
4580 		/* aligns well with a icmpv4 nextmtu */
4581 		icmp6->icmp6_mtu = htonl(mtu);
4582 		/* icmpv4 pptr is a one most significant byte */
4583 		if (ptr >= 0) {
4584 			icmp6->icmp6_pptr = htonl(ptr << 24);
4585 		}
4586 		break;
4587 
4588 	case AF_INET6:
4589 		icmp4 = arg;
4590 		type  = icmp4->icmp_type;
4591 		code  = icmp4->icmp_code;
4592 		mtu   = ntohs(icmp4->icmp_nextmtu);
4593 
4594 		switch (type) {
4595 		case ICMP_ECHO:
4596 			type = ICMP6_ECHO_REQUEST;
4597 			break;
4598 		case ICMP_ECHOREPLY:
4599 			type = ICMP6_ECHO_REPLY;
4600 			break;
4601 		case ICMP_UNREACH:
4602 			type = ICMP6_DST_UNREACH;
4603 			switch (code) {
4604 			case ICMP_UNREACH_NET:
4605 			case ICMP_UNREACH_HOST:
4606 			case ICMP_UNREACH_NET_UNKNOWN:
4607 			case ICMP_UNREACH_HOST_UNKNOWN:
4608 			case ICMP_UNREACH_ISOLATED:
4609 			case ICMP_UNREACH_TOSNET:
4610 			case ICMP_UNREACH_TOSHOST:
4611 				code = ICMP6_DST_UNREACH_NOROUTE;
4612 				break;
4613 			case ICMP_UNREACH_PORT:
4614 				code = ICMP6_DST_UNREACH_NOPORT;
4615 				break;
4616 			case ICMP_UNREACH_NET_PROHIB:
4617 			case ICMP_UNREACH_HOST_PROHIB:
4618 			case ICMP_UNREACH_FILTER_PROHIB:
4619 			case ICMP_UNREACH_PRECEDENCE_CUTOFF:
4620 				code = ICMP6_DST_UNREACH_ADMIN;
4621 				break;
4622 			case ICMP_UNREACH_PROTOCOL:
4623 				type = ICMP6_PARAM_PROB;
4624 				code = ICMP6_PARAMPROB_NEXTHEADER;
4625 				ptr  = offsetof(struct ip6_hdr, ip6_nxt);
4626 				break;
4627 			case ICMP_UNREACH_NEEDFRAG:
4628 				type = ICMP6_PACKET_TOO_BIG;
4629 				code = 0;
4630 				mtu += 20;
4631 				break;
4632 			default:
4633 				return -1;
4634 			}
4635 			break;
4636 		case ICMP_TIMXCEED:
4637 			type = ICMP6_TIME_EXCEEDED;
4638 			break;
4639 		case ICMP_PARAMPROB:
4640 			type = ICMP6_PARAM_PROB;
4641 			switch (code) {
4642 			case ICMP_PARAMPROB_ERRATPTR:
4643 				code = ICMP6_PARAMPROB_HEADER;
4644 				break;
4645 			case ICMP_PARAMPROB_LENGTH:
4646 				code = ICMP6_PARAMPROB_HEADER;
4647 				break;
4648 			default:
4649 				return -1;
4650 			}
4651 
4652 			ptr = icmp4->icmp_pptr;
4653 			if (ptr == 0 || ptr == PTR_IP(ip_tos)) {
4654 				; /* preserve */
4655 			} else if (ptr == PTR_IP(ip_len) ||
4656 			    ptr == PTR_IP(ip_len) + 1) {
4657 				ptr = PTR_IP6(ip6_plen);
4658 			} else if (ptr == PTR_IP(ip_ttl)) {
4659 				ptr = PTR_IP6(ip6_hlim);
4660 			} else if (ptr == PTR_IP(ip_p)) {
4661 				ptr = PTR_IP6(ip6_nxt);
4662 			} else if (ptr >= PTR_IP(ip_src) &&
4663 			    ptr < PTR_IP(ip_dst)) {
4664 				ptr = PTR_IP6(ip6_src);
4665 			} else if (ptr >= PTR_IP(ip_dst) &&
4666 			    ptr < (int32_t)sizeof(struct ip)) {
4667 				ptr = PTR_IP6(ip6_dst);
4668 			} else {
4669 				return -1;
4670 			}
4671 			break;
4672 		default:
4673 			return -1;
4674 		}
4675 		icmp4->icmp_type = type;
4676 		icmp4->icmp_code = code;
4677 		icmp4->icmp_nextmtu = htons(mtu);
4678 		if (ptr >= 0) {
4679 			icmp4->icmp_void = htonl(ptr);
4680 		}
4681 		break;
4682 	}
4683 
4684 	return 0;
4685 }
4686 
4687 /* Note: frees pbuf if PF_NAT64 is returned */
4688 static __attribute__((noinline)) int
pf_nat64_ipv6(pbuf_t * pbuf,int off,struct pf_pdesc * pd)4689 pf_nat64_ipv6(pbuf_t *pbuf, int off, struct pf_pdesc *pd)
4690 {
4691 	struct ip               *ip4;
4692 	struct mbuf *m;
4693 
4694 	/*
4695 	 * ip_input asserts for rcvif to be not NULL
4696 	 * That may not be true for two corner cases
4697 	 * 1. If for some reason a local app sends DNS
4698 	 * AAAA query to local host
4699 	 * 2. If IPv6 stack in kernel internally generates a
4700 	 * message destined for a synthesized IPv6 end-point.
4701 	 */
4702 	if (pbuf->pb_ifp == NULL) {
4703 		return PF_DROP;
4704 	}
4705 
4706 	ip4 = (struct ip *)pbuf_resize_segment(pbuf, 0, off, sizeof(*ip4));
4707 	if (ip4 == NULL) {
4708 		return PF_DROP;
4709 	}
4710 
4711 	ip4->ip_v   = 4;
4712 	ip4->ip_hl  = 5;
4713 	ip4->ip_tos = pd->tos & htonl(0x0ff00000);
4714 	ip4->ip_len = htons(sizeof(*ip4) + (pd->tot_len - off));
4715 	ip4->ip_id  = 0;
4716 	ip4->ip_off = htons(IP_DF);
4717 	ip4->ip_ttl = pd->ttl;
4718 	ip4->ip_p   = pd->proto;
4719 	ip4->ip_sum = 0;
4720 	ip4->ip_src = pd->naddr.v4addr;
4721 	ip4->ip_dst = pd->ndaddr.v4addr;
4722 	ip4->ip_sum = pbuf_inet_cksum(pbuf, 0, 0, ip4->ip_hl << 2);
4723 
4724 	/* recalculate icmp checksums */
4725 	if (pd->proto == IPPROTO_ICMP) {
4726 		struct icmp *icmp;
4727 		int hlen = sizeof(*ip4);
4728 
4729 		icmp = (struct icmp *)pbuf_contig_segment(pbuf, hlen,
4730 		    ICMP_MINLEN);
4731 		if (icmp == NULL) {
4732 			return PF_DROP;
4733 		}
4734 
4735 		icmp->icmp_cksum = 0;
4736 		icmp->icmp_cksum = pbuf_inet_cksum(pbuf, 0, hlen,
4737 		    ntohs(ip4->ip_len) - hlen);
4738 	}
4739 
4740 	if ((m = pbuf_to_mbuf(pbuf, TRUE)) != NULL) {
4741 		ip_input(m);
4742 	}
4743 
4744 	return PF_NAT64;
4745 }
4746 
4747 static __attribute__((noinline)) int
pf_nat64_ipv4(pbuf_t * pbuf,int off,struct pf_pdesc * pd)4748 pf_nat64_ipv4(pbuf_t *pbuf, int off, struct pf_pdesc *pd)
4749 {
4750 	struct ip6_hdr          *ip6;
4751 	struct mbuf *m;
4752 
4753 	if (pbuf->pb_ifp == NULL) {
4754 		return PF_DROP;
4755 	}
4756 
4757 	ip6 = (struct ip6_hdr *)pbuf_resize_segment(pbuf, 0, off, sizeof(*ip6));
4758 	if (ip6 == NULL) {
4759 		return PF_DROP;
4760 	}
4761 
4762 	ip6->ip6_vfc  = htonl((6 << 28) | (pd->tos << 20));
4763 	ip6->ip6_plen = htons(pd->tot_len - off);
4764 	ip6->ip6_nxt  = pd->proto;
4765 	ip6->ip6_hlim = pd->ttl;
4766 	ip6->ip6_src = pd->naddr.v6addr;
4767 	ip6->ip6_dst = pd->ndaddr.v6addr;
4768 
4769 	/* recalculate icmp6 checksums */
4770 	if (pd->proto == IPPROTO_ICMPV6) {
4771 		struct icmp6_hdr *icmp6;
4772 		int hlen = sizeof(*ip6);
4773 
4774 		icmp6 = (struct icmp6_hdr *)pbuf_contig_segment(pbuf, hlen,
4775 		    sizeof(*icmp6));
4776 		if (icmp6 == NULL) {
4777 			return PF_DROP;
4778 		}
4779 
4780 		icmp6->icmp6_cksum = 0;
4781 		icmp6->icmp6_cksum = pbuf_inet6_cksum(pbuf,
4782 		    IPPROTO_ICMPV6, hlen,
4783 		    ntohs(ip6->ip6_plen));
4784 	} else if (pd->proto == IPPROTO_UDP) {
4785 		struct udphdr *uh;
4786 		int hlen = sizeof(*ip6);
4787 
4788 		uh = (struct udphdr *)pbuf_contig_segment(pbuf, hlen,
4789 		    sizeof(*uh));
4790 		if (uh == NULL) {
4791 			return PF_DROP;
4792 		}
4793 
4794 		if (uh->uh_sum == 0) {
4795 			uh->uh_sum = pbuf_inet6_cksum(pbuf, IPPROTO_UDP,
4796 			    hlen, ntohs(ip6->ip6_plen));
4797 		}
4798 	}
4799 
4800 	if ((m = pbuf_to_mbuf(pbuf, TRUE)) != NULL) {
4801 		ip6_input(m);
4802 	}
4803 
4804 	return PF_NAT64;
4805 }
4806 
4807 static __attribute__((noinline)) int
pf_test_rule(struct pf_rule ** rm,struct pf_state ** sm,int direction,struct pfi_kif * kif,pbuf_t * pbuf,int off,void * h,struct pf_pdesc * pd,struct pf_rule ** am,struct pf_ruleset ** rsm,struct ifqueue * ifq)4808 pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
4809     struct pfi_kif *kif, pbuf_t *pbuf, int off, void *h,
4810     struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
4811     struct ifqueue *ifq)
4812 {
4813 #pragma unused(h)
4814 	struct pf_rule          *nr = NULL;
4815 	struct pf_addr          *saddr = pd->src, *daddr = pd->dst;
4816 	sa_family_t              af = pd->af;
4817 	struct pf_rule          *r, *a = NULL;
4818 	struct pf_ruleset       *ruleset = NULL;
4819 	struct pf_src_node      *nsn = NULL;
4820 	struct tcphdr           *th = pd->hdr.tcp;
4821 	struct udphdr           *uh = pd->hdr.udp;
4822 	u_short                  reason;
4823 	int                      rewrite = 0, hdrlen = 0;
4824 	int                      tag = -1;
4825 	unsigned int             rtableid = IFSCOPE_NONE;
4826 	int                      asd = 0;
4827 	int                      match = 0;
4828 	int                      state_icmp = 0;
4829 	u_int16_t                mss = tcp_mssdflt;
4830 	u_int8_t                 icmptype = 0, icmpcode = 0;
4831 #if SKYWALK
4832 	netns_token              nstoken = NULL;
4833 #endif
4834 
4835 	struct pf_grev1_hdr     *grev1 = pd->hdr.grev1;
4836 	union pf_state_xport bxport, bdxport, nxport, sxport, dxport;
4837 	struct pf_state_key      psk;
4838 
4839 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
4840 
4841 	if (direction == PF_IN && pf_check_congestion(ifq)) {
4842 		REASON_SET(&reason, PFRES_CONGEST);
4843 		return PF_DROP;
4844 	}
4845 
4846 	hdrlen = 0;
4847 	sxport.spi = 0;
4848 	dxport.spi = 0;
4849 	nxport.spi = 0;
4850 
4851 	switch (pd->proto) {
4852 	case IPPROTO_TCP:
4853 		sxport.port = th->th_sport;
4854 		dxport.port = th->th_dport;
4855 		hdrlen = sizeof(*th);
4856 		break;
4857 	case IPPROTO_UDP:
4858 		sxport.port = uh->uh_sport;
4859 		dxport.port = uh->uh_dport;
4860 		hdrlen = sizeof(*uh);
4861 		break;
4862 #if INET
4863 	case IPPROTO_ICMP:
4864 		if (pd->af != AF_INET) {
4865 			break;
4866 		}
4867 		sxport.port = dxport.port = pd->hdr.icmp->icmp_id;
4868 		hdrlen = ICMP_MINLEN;
4869 		icmptype = pd->hdr.icmp->icmp_type;
4870 		icmpcode = pd->hdr.icmp->icmp_code;
4871 
4872 		if (ICMP_ERRORTYPE(icmptype)) {
4873 			state_icmp++;
4874 		}
4875 		break;
4876 #endif /* INET */
4877 	case IPPROTO_ICMPV6:
4878 		if (pd->af != AF_INET6) {
4879 			break;
4880 		}
4881 		sxport.port = dxport.port = pd->hdr.icmp6->icmp6_id;
4882 		hdrlen = sizeof(*pd->hdr.icmp6);
4883 		icmptype = pd->hdr.icmp6->icmp6_type;
4884 		icmpcode = pd->hdr.icmp6->icmp6_code;
4885 
4886 		if (ICMP6_ERRORTYPE(icmptype)) {
4887 			state_icmp++;
4888 		}
4889 		break;
4890 	case IPPROTO_GRE:
4891 		if (pd->proto_variant == PF_GRE_PPTP_VARIANT) {
4892 			sxport.call_id = dxport.call_id =
4893 			    pd->hdr.grev1->call_id;
4894 			hdrlen = sizeof(*pd->hdr.grev1);
4895 		}
4896 		break;
4897 	case IPPROTO_ESP:
4898 		sxport.spi = 0;
4899 		dxport.spi = pd->hdr.esp->spi;
4900 		hdrlen = sizeof(*pd->hdr.esp);
4901 		break;
4902 	}
4903 
4904 	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
4905 
4906 	bxport = sxport;
4907 	bdxport = dxport;
4908 
4909 	if (direction == PF_OUT) {
4910 		nxport = sxport;
4911 	} else {
4912 		nxport = dxport;
4913 	}
4914 
4915 	/* check packet for BINAT/NAT/RDR */
4916 	if ((nr = pf_get_translation_aux(pd, pbuf, off, direction, kif, &nsn,
4917 	    saddr, &sxport, daddr, &dxport, &nxport
4918 #if SKYWALK
4919 	    , &nstoken
4920 #endif
4921 	    )) != NULL) {
4922 		int ua;
4923 		u_int16_t dport;
4924 
4925 		if (pd->af != pd->naf) {
4926 			ua = 0;
4927 		} else {
4928 			ua = 1;
4929 		}
4930 
4931 		PF_ACPY(&pd->baddr, saddr, af);
4932 		PF_ACPY(&pd->bdaddr, daddr, af);
4933 
4934 		switch (pd->proto) {
4935 		case IPPROTO_TCP:
4936 			if (pd->af != pd->naf ||
4937 			    PF_ANEQ(saddr, &pd->naddr, pd->af)) {
4938 				pf_change_ap(direction, pd->mp, saddr,
4939 				    &th->th_sport, pd->ip_sum, &th->th_sum,
4940 				    &pd->naddr, nxport.port, 0, af,
4941 				    pd->naf, ua);
4942 				sxport.port = th->th_sport;
4943 			}
4944 
4945 			if (pd->af != pd->naf ||
4946 			    PF_ANEQ(daddr, &pd->ndaddr, pd->af) ||
4947 			    (nr && (nr->action == PF_RDR) &&
4948 			    (th->th_dport != nxport.port))) {
4949 				if (nr && nr->action == PF_RDR) {
4950 					dport = nxport.port;
4951 				} else {
4952 					dport = th->th_dport;
4953 				}
4954 				pf_change_ap(direction, pd->mp, daddr,
4955 				    &th->th_dport, pd->ip_sum,
4956 				    &th->th_sum, &pd->ndaddr,
4957 				    dport, 0, af, pd->naf, ua);
4958 				dxport.port = th->th_dport;
4959 			}
4960 			rewrite++;
4961 			break;
4962 
4963 		case IPPROTO_UDP:
4964 			if (pd->af != pd->naf ||
4965 			    PF_ANEQ(saddr, &pd->naddr, pd->af)) {
4966 				pf_change_ap(direction, pd->mp, saddr,
4967 				    &uh->uh_sport, pd->ip_sum,
4968 				    &uh->uh_sum, &pd->naddr,
4969 				    nxport.port, 1, af, pd->naf, ua);
4970 				sxport.port = uh->uh_sport;
4971 			}
4972 
4973 			if (pd->af != pd->naf ||
4974 			    PF_ANEQ(daddr, &pd->ndaddr, pd->af) ||
4975 			    (nr && (nr->action == PF_RDR) &&
4976 			    (uh->uh_dport != nxport.port))) {
4977 				if (nr && nr->action == PF_RDR) {
4978 					dport = nxport.port;
4979 				} else {
4980 					dport = uh->uh_dport;
4981 				}
4982 				pf_change_ap(direction, pd->mp, daddr,
4983 				    &uh->uh_dport, pd->ip_sum,
4984 				    &uh->uh_sum, &pd->ndaddr,
4985 				    dport, 0, af, pd->naf, ua);
4986 				dxport.port = uh->uh_dport;
4987 			}
4988 			rewrite++;
4989 			break;
4990 #if INET
4991 		case IPPROTO_ICMP:
4992 			if (pd->af != AF_INET) {
4993 				break;
4994 			}
4995 			/*
4996 			 * TODO:
4997 			 * pd->af != pd->naf not handled yet here and would be
4998 			 * needed for NAT46 needed to support XLAT.
4999 			 * Will cross the bridge when it comes.
5000 			 */
5001 			if (PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5002 				pf_change_a(&saddr->v4addr.s_addr, pd->ip_sum,
5003 				    pd->naddr.v4addr.s_addr, 0);
5004 				pd->hdr.icmp->icmp_cksum = pf_cksum_fixup(
5005 					pd->hdr.icmp->icmp_cksum, sxport.port,
5006 					nxport.port, 0);
5007 				pd->hdr.icmp->icmp_id = nxport.port;
5008 			}
5009 
5010 			if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
5011 				pf_change_a(&daddr->v4addr.s_addr, pd->ip_sum,
5012 				    pd->ndaddr.v4addr.s_addr, 0);
5013 			}
5014 			++rewrite;
5015 			break;
5016 #endif /* INET */
5017 		case IPPROTO_ICMPV6:
5018 			if (pd->af != AF_INET6) {
5019 				break;
5020 			}
5021 
5022 			if (pd->af != pd->naf ||
5023 			    PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5024 				pf_change_addr(saddr,
5025 				    &pd->hdr.icmp6->icmp6_cksum,
5026 				    &pd->naddr, 0, pd->af, pd->naf);
5027 			}
5028 
5029 			if (pd->af != pd->naf ||
5030 			    PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
5031 				pf_change_addr(daddr,
5032 				    &pd->hdr.icmp6->icmp6_cksum,
5033 				    &pd->ndaddr, 0, pd->af, pd->naf);
5034 			}
5035 
5036 			if (pd->af != pd->naf) {
5037 				if (pf_translate_icmp_af(AF_INET,
5038 				    pd->hdr.icmp6)) {
5039 					return PF_DROP;
5040 				}
5041 				pd->proto = IPPROTO_ICMP;
5042 			}
5043 			rewrite++;
5044 			break;
5045 		case IPPROTO_GRE:
5046 			if ((direction == PF_IN) &&
5047 			    (pd->proto_variant == PF_GRE_PPTP_VARIANT)) {
5048 				grev1->call_id = nxport.call_id;
5049 			}
5050 
5051 			switch (pd->af) {
5052 #if INET
5053 			case AF_INET:
5054 				if (PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5055 					pf_change_a(&saddr->v4addr.s_addr,
5056 					    pd->ip_sum,
5057 					    pd->naddr.v4addr.s_addr, 0);
5058 				}
5059 				if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
5060 					pf_change_a(&daddr->v4addr.s_addr,
5061 					    pd->ip_sum,
5062 					    pd->ndaddr.v4addr.s_addr, 0);
5063 				}
5064 				break;
5065 #endif /* INET */
5066 			case AF_INET6:
5067 				if (PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5068 					PF_ACPY(saddr, &pd->naddr, AF_INET6);
5069 				}
5070 				if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
5071 					PF_ACPY(daddr, &pd->ndaddr, AF_INET6);
5072 				}
5073 				break;
5074 			}
5075 			++rewrite;
5076 			break;
5077 		case IPPROTO_ESP:
5078 			if (direction == PF_OUT) {
5079 				bxport.spi = 0;
5080 			}
5081 
5082 			switch (pd->af) {
5083 #if INET
5084 			case AF_INET:
5085 				if (PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5086 					pf_change_a(&saddr->v4addr.s_addr,
5087 					    pd->ip_sum, pd->naddr.v4addr.s_addr, 0);
5088 				}
5089 				if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
5090 					pf_change_a(&daddr->v4addr.s_addr,
5091 					    pd->ip_sum,
5092 					    pd->ndaddr.v4addr.s_addr, 0);
5093 				}
5094 				break;
5095 #endif /* INET */
5096 			case AF_INET6:
5097 				if (PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5098 					PF_ACPY(saddr, &pd->naddr, AF_INET6);
5099 				}
5100 				if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
5101 					PF_ACPY(daddr, &pd->ndaddr, AF_INET6);
5102 				}
5103 				break;
5104 			}
5105 			break;
5106 		default:
5107 			switch (pd->af) {
5108 #if INET
5109 			case AF_INET:
5110 				if ((pd->naf != AF_INET) ||
5111 				    (PF_ANEQ(saddr, &pd->naddr, pd->af))) {
5112 					pf_change_addr(saddr, pd->ip_sum,
5113 					    &pd->naddr, 0, af, pd->naf);
5114 				}
5115 
5116 				if ((pd->naf != AF_INET) ||
5117 				    (PF_ANEQ(daddr, &pd->ndaddr, pd->af))) {
5118 					pf_change_addr(daddr, pd->ip_sum,
5119 					    &pd->ndaddr, 0, af, pd->naf);
5120 				}
5121 				break;
5122 #endif /* INET */
5123 			case AF_INET6:
5124 				if (PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5125 					PF_ACPY(saddr, &pd->naddr, af);
5126 				}
5127 				if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
5128 					PF_ACPY(daddr, &pd->ndaddr, af);
5129 				}
5130 				break;
5131 			}
5132 			break;
5133 		}
5134 
5135 		if (nr->natpass) {
5136 			r = NULL;
5137 		}
5138 		pd->nat_rule = nr;
5139 		pd->af = pd->naf;
5140 	} else {
5141 #if SKYWALK
5142 		VERIFY(!NETNS_TOKEN_VALID(&nstoken));
5143 #endif
5144 	}
5145 
5146 	if (nr && nr->tag > 0) {
5147 		tag = nr->tag;
5148 	}
5149 
5150 	while (r != NULL) {
5151 		r->evaluations++;
5152 		if (pfi_kif_match(r->kif, kif) == r->ifnot) {
5153 			r = r->skip[PF_SKIP_IFP].ptr;
5154 		} else if (r->direction && r->direction != direction) {
5155 			r = r->skip[PF_SKIP_DIR].ptr;
5156 		} else if (r->af && r->af != pd->af) {
5157 			r = r->skip[PF_SKIP_AF].ptr;
5158 		} else if (r->proto && r->proto != pd->proto) {
5159 			r = r->skip[PF_SKIP_PROTO].ptr;
5160 		} else if (PF_MISMATCHAW(&r->src.addr, saddr, pd->af,
5161 		    r->src.neg, kif)) {
5162 			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
5163 		}
5164 		/* tcp/udp only. port_op always 0 in other cases */
5165 		else if (r->proto == pd->proto &&
5166 		    (r->proto == IPPROTO_TCP || r->proto == IPPROTO_UDP) &&
5167 		    r->src.xport.range.op &&
5168 		    !pf_match_port(r->src.xport.range.op,
5169 		    r->src.xport.range.port[0], r->src.xport.range.port[1],
5170 		    th->th_sport)) {
5171 			r = r->skip[PF_SKIP_SRC_PORT].ptr;
5172 		} else if (PF_MISMATCHAW(&r->dst.addr, daddr, pd->af,
5173 		    r->dst.neg, NULL)) {
5174 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
5175 		}
5176 		/* tcp/udp only. port_op always 0 in other cases */
5177 		else if (r->proto == pd->proto &&
5178 		    (r->proto == IPPROTO_TCP || r->proto == IPPROTO_UDP) &&
5179 		    r->dst.xport.range.op &&
5180 		    !pf_match_port(r->dst.xport.range.op,
5181 		    r->dst.xport.range.port[0], r->dst.xport.range.port[1],
5182 		    th->th_dport)) {
5183 			r = r->skip[PF_SKIP_DST_PORT].ptr;
5184 		}
5185 		/* icmp only. type always 0 in other cases */
5186 		else if (r->type && r->type != icmptype + 1) {
5187 			r = TAILQ_NEXT(r, entries);
5188 		}
5189 		/* icmp only. type always 0 in other cases */
5190 		else if (r->code && r->code != icmpcode + 1) {
5191 			r = TAILQ_NEXT(r, entries);
5192 		} else if ((r->rule_flag & PFRULE_TOS) && r->tos &&
5193 		    !(r->tos & pd->tos)) {
5194 			r = TAILQ_NEXT(r, entries);
5195 		} else if ((r->rule_flag & PFRULE_DSCP) && r->tos &&
5196 		    !(r->tos & (pd->tos & DSCP_MASK))) {
5197 			r = TAILQ_NEXT(r, entries);
5198 		} else if ((r->rule_flag & PFRULE_SC) && r->tos &&
5199 		    ((r->tos & SCIDX_MASK) != pd->sc)) {
5200 			r = TAILQ_NEXT(r, entries);
5201 		} else if (r->rule_flag & PFRULE_FRAGMENT) {
5202 			r = TAILQ_NEXT(r, entries);
5203 		} else if (pd->proto == IPPROTO_TCP &&
5204 		    (r->flagset & th->th_flags) != r->flags) {
5205 			r = TAILQ_NEXT(r, entries);
5206 		}
5207 		/* tcp/udp only. uid.op always 0 in other cases */
5208 		else if (r->uid.op && (pd->lookup.done || ((void)(pd->lookup.done =
5209 		    pf_socket_lookup(direction, pd)), 1)) &&
5210 		    !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
5211 		    pd->lookup.uid)) {
5212 			r = TAILQ_NEXT(r, entries);
5213 		}
5214 		/* tcp/udp only. gid.op always 0 in other cases */
5215 		else if (r->gid.op && (pd->lookup.done || ((void)(pd->lookup.done =
5216 		    pf_socket_lookup(direction, pd)), 1)) &&
5217 		    !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
5218 		    pd->lookup.gid)) {
5219 			r = TAILQ_NEXT(r, entries);
5220 		} else if (r->prob && r->prob <= (RandomULong() % (UINT_MAX - 1) + 1)) {
5221 			r = TAILQ_NEXT(r, entries);
5222 		} else if (r->match_tag && !pf_match_tag(r, pd->pf_mtag, &tag)) {
5223 			r = TAILQ_NEXT(r, entries);
5224 		} else if (r->os_fingerprint != PF_OSFP_ANY &&
5225 		    (pd->proto != IPPROTO_TCP || !pf_osfp_match(
5226 			    pf_osfp_fingerprint(pd, pbuf, off, th),
5227 			    r->os_fingerprint))) {
5228 			r = TAILQ_NEXT(r, entries);
5229 		} else {
5230 			if (r->tag) {
5231 				tag = r->tag;
5232 			}
5233 			if (PF_RTABLEID_IS_VALID(r->rtableid)) {
5234 				rtableid = r->rtableid;
5235 			}
5236 			if (r->anchor == NULL) {
5237 				match = 1;
5238 				*rm = r;
5239 				*am = a;
5240 				*rsm = ruleset;
5241 				if ((*rm)->quick) {
5242 					break;
5243 				}
5244 				r = TAILQ_NEXT(r, entries);
5245 			} else {
5246 				pf_step_into_anchor(&asd, &ruleset,
5247 				    PF_RULESET_FILTER, &r, &a, &match);
5248 			}
5249 		}
5250 		if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
5251 		    PF_RULESET_FILTER, &r, &a, &match)) {
5252 			break;
5253 		}
5254 	}
5255 	r = *rm;
5256 	a = *am;
5257 	ruleset = *rsm;
5258 
5259 	REASON_SET(&reason, PFRES_MATCH);
5260 
5261 	if (r->log || (nr != NULL && nr->log)) {
5262 		if (rewrite > 0) {
5263 			if (rewrite < off + hdrlen) {
5264 				rewrite = off + hdrlen;
5265 			}
5266 
5267 			if (pf_lazy_makewritable(pd, pbuf, rewrite) == NULL) {
5268 				REASON_SET(&reason, PFRES_MEMORY);
5269 #if SKYWALK
5270 				netns_release(&nstoken);
5271 #endif
5272 				return PF_DROP;
5273 			}
5274 
5275 			pbuf_copy_back(pbuf, off, hdrlen, pd->hdr.any);
5276 		}
5277 		PFLOG_PACKET(kif, h, pbuf, pd->af, direction, reason,
5278 		    r->log ? r : nr, a, ruleset, pd);
5279 	}
5280 
5281 	if ((r->action == PF_DROP) &&
5282 	    ((r->rule_flag & PFRULE_RETURNRST) ||
5283 	    (r->rule_flag & PFRULE_RETURNICMP) ||
5284 	    (r->rule_flag & PFRULE_RETURN))) {
5285 		/* undo NAT changes, if they have taken place */
5286 		/* XXX For NAT64 we are not reverting the changes */
5287 		if (nr != NULL && nr->action != PF_NAT64) {
5288 			if (direction == PF_OUT) {
5289 				pd->af = af;
5290 				switch (pd->proto) {
5291 				case IPPROTO_TCP:
5292 					pf_change_ap(direction, pd->mp, saddr,
5293 					    &th->th_sport, pd->ip_sum,
5294 					    &th->th_sum, &pd->baddr,
5295 					    bxport.port, 0, af, pd->af, 1);
5296 					sxport.port = th->th_sport;
5297 					rewrite++;
5298 					break;
5299 				case IPPROTO_UDP:
5300 					pf_change_ap(direction, pd->mp, saddr,
5301 					    &pd->hdr.udp->uh_sport, pd->ip_sum,
5302 					    &pd->hdr.udp->uh_sum, &pd->baddr,
5303 					    bxport.port, 1, af, pd->af, 1);
5304 					sxport.port = pd->hdr.udp->uh_sport;
5305 					rewrite++;
5306 					break;
5307 				case IPPROTO_ICMP:
5308 				case IPPROTO_ICMPV6:
5309 					/* nothing! */
5310 					break;
5311 				case IPPROTO_GRE:
5312 					PF_ACPY(&pd->baddr, saddr, af);
5313 					++rewrite;
5314 					switch (af) {
5315 #if INET
5316 					case AF_INET:
5317 						pf_change_a(&saddr->v4addr.s_addr,
5318 						    pd->ip_sum,
5319 						    pd->baddr.v4addr.s_addr, 0);
5320 						break;
5321 #endif /* INET */
5322 					case AF_INET6:
5323 						PF_ACPY(saddr, &pd->baddr,
5324 						    AF_INET6);
5325 						break;
5326 					}
5327 					break;
5328 				case IPPROTO_ESP:
5329 					PF_ACPY(&pd->baddr, saddr, af);
5330 					switch (af) {
5331 #if INET
5332 					case AF_INET:
5333 						pf_change_a(&saddr->v4addr.s_addr,
5334 						    pd->ip_sum,
5335 						    pd->baddr.v4addr.s_addr, 0);
5336 						break;
5337 #endif /* INET */
5338 					case AF_INET6:
5339 						PF_ACPY(saddr, &pd->baddr,
5340 						    AF_INET6);
5341 						break;
5342 					}
5343 					break;
5344 				default:
5345 					switch (af) {
5346 					case AF_INET:
5347 						pf_change_a(&saddr->v4addr.s_addr,
5348 						    pd->ip_sum,
5349 						    pd->baddr.v4addr.s_addr, 0);
5350 						break;
5351 					case AF_INET6:
5352 						PF_ACPY(saddr, &pd->baddr, af);
5353 						break;
5354 					}
5355 				}
5356 			} else {
5357 				switch (pd->proto) {
5358 				case IPPROTO_TCP:
5359 					pf_change_ap(direction, pd->mp, daddr,
5360 					    &th->th_dport, pd->ip_sum,
5361 					    &th->th_sum, &pd->bdaddr,
5362 					    bdxport.port, 0, af, pd->af, 1);
5363 					dxport.port = th->th_dport;
5364 					rewrite++;
5365 					break;
5366 				case IPPROTO_UDP:
5367 					pf_change_ap(direction, pd->mp, daddr,
5368 					    &pd->hdr.udp->uh_dport, pd->ip_sum,
5369 					    &pd->hdr.udp->uh_sum, &pd->bdaddr,
5370 					    bdxport.port, 1, af, pd->af, 1);
5371 					dxport.port = pd->hdr.udp->uh_dport;
5372 					rewrite++;
5373 					break;
5374 				case IPPROTO_ICMP:
5375 				case IPPROTO_ICMPV6:
5376 					/* nothing! */
5377 					break;
5378 				case IPPROTO_GRE:
5379 					if (pd->proto_variant ==
5380 					    PF_GRE_PPTP_VARIANT) {
5381 						grev1->call_id =
5382 						    bdxport.call_id;
5383 					}
5384 					++rewrite;
5385 					switch (af) {
5386 #if INET
5387 					case AF_INET:
5388 						pf_change_a(&daddr->v4addr.s_addr,
5389 						    pd->ip_sum,
5390 						    pd->bdaddr.v4addr.s_addr, 0);
5391 						break;
5392 #endif /* INET */
5393 					case AF_INET6:
5394 						PF_ACPY(daddr, &pd->bdaddr,
5395 						    AF_INET6);
5396 						break;
5397 					}
5398 					break;
5399 				case IPPROTO_ESP:
5400 					switch (af) {
5401 #if INET
5402 					case AF_INET:
5403 						pf_change_a(&daddr->v4addr.s_addr,
5404 						    pd->ip_sum,
5405 						    pd->bdaddr.v4addr.s_addr, 0);
5406 						break;
5407 #endif /* INET */
5408 					case AF_INET6:
5409 						PF_ACPY(daddr, &pd->bdaddr,
5410 						    AF_INET6);
5411 						break;
5412 					}
5413 					break;
5414 				default:
5415 					switch (af) {
5416 					case AF_INET:
5417 						pf_change_a(&daddr->v4addr.s_addr,
5418 						    pd->ip_sum,
5419 						    pd->bdaddr.v4addr.s_addr, 0);
5420 						break;
5421 					case AF_INET6:
5422 						PF_ACPY(daddr, &pd->bdaddr, af);
5423 						break;
5424 					}
5425 				}
5426 			}
5427 		}
5428 		if (pd->proto == IPPROTO_TCP &&
5429 		    ((r->rule_flag & PFRULE_RETURNRST) ||
5430 		    (r->rule_flag & PFRULE_RETURN)) &&
5431 		    !(th->th_flags & TH_RST)) {
5432 			u_int32_t        ack = ntohl(th->th_seq) + pd->p_len;
5433 			int              len = 0;
5434 			struct ip       *h4;
5435 			struct ip6_hdr  *h6;
5436 
5437 			switch (pd->af) {
5438 			case AF_INET:
5439 				h4 = pbuf->pb_data;
5440 				len = ntohs(h4->ip_len) - off;
5441 				break;
5442 			case AF_INET6:
5443 				h6 = pbuf->pb_data;
5444 				len = ntohs(h6->ip6_plen) -
5445 				    (off - sizeof(*h6));
5446 				break;
5447 			}
5448 
5449 			if (pf_check_proto_cksum(pbuf, off, len, IPPROTO_TCP,
5450 			    pd->af)) {
5451 				REASON_SET(&reason, PFRES_PROTCKSUM);
5452 			} else {
5453 				if (th->th_flags & TH_SYN) {
5454 					ack++;
5455 				}
5456 				if (th->th_flags & TH_FIN) {
5457 					ack++;
5458 				}
5459 				pf_send_tcp(r, pd->af, pd->dst,
5460 				    pd->src, th->th_dport, th->th_sport,
5461 				    ntohl(th->th_ack), ack, TH_RST | TH_ACK, 0, 0,
5462 				    r->return_ttl, 1, 0, pd->eh, kif->pfik_ifp);
5463 			}
5464 		} else if (pd->proto != IPPROTO_ICMP && pd->af == AF_INET &&
5465 		    pd->proto != IPPROTO_ESP && pd->proto != IPPROTO_AH &&
5466 		    r->return_icmp) {
5467 			pf_send_icmp(pbuf, r->return_icmp >> 8,
5468 			    r->return_icmp & 255, pd->af, r);
5469 		} else if (pd->proto != IPPROTO_ICMPV6 && af == AF_INET6 &&
5470 		    pd->proto != IPPROTO_ESP && pd->proto != IPPROTO_AH &&
5471 		    r->return_icmp6) {
5472 			pf_send_icmp(pbuf, r->return_icmp6 >> 8,
5473 			    r->return_icmp6 & 255, pd->af, r);
5474 		}
5475 	}
5476 
5477 	if (r->action == PF_DROP) {
5478 #if SKYWALK
5479 		netns_release(&nstoken);
5480 #endif
5481 		return PF_DROP;
5482 	}
5483 
5484 	/* prepare state key, for flowhash and/or the state (if created) */
5485 	bzero(&psk, sizeof(psk));
5486 	psk.proto = pd->proto;
5487 	psk.direction = direction;
5488 	if (pd->proto == IPPROTO_UDP) {
5489 		if (ntohs(pd->hdr.udp->uh_sport) == PF_IKE_PORT &&
5490 		    ntohs(pd->hdr.udp->uh_dport) == PF_IKE_PORT) {
5491 			psk.proto_variant = PF_EXTFILTER_APD;
5492 		} else {
5493 			psk.proto_variant = nr ? nr->extfilter : r->extfilter;
5494 			if (psk.proto_variant < PF_EXTFILTER_APD) {
5495 				psk.proto_variant = PF_EXTFILTER_APD;
5496 			}
5497 		}
5498 	} else if (pd->proto == IPPROTO_GRE) {
5499 		psk.proto_variant = pd->proto_variant;
5500 	}
5501 	if (direction == PF_OUT) {
5502 		psk.af_gwy = af;
5503 		PF_ACPY(&psk.gwy.addr, saddr, af);
5504 		PF_ACPY(&psk.ext_gwy.addr, daddr, af);
5505 		switch (pd->proto) {
5506 		case IPPROTO_ESP:
5507 			psk.gwy.xport.spi = 0;
5508 			psk.ext_gwy.xport.spi = pd->hdr.esp->spi;
5509 			break;
5510 		case IPPROTO_ICMP:
5511 		case IPPROTO_ICMPV6:
5512 			/*
5513 			 * NAT64 requires protocol translation  between ICMPv4
5514 			 * and ICMPv6. TCP and UDP do not require protocol
5515 			 * translation. To avoid adding complexity just to
5516 			 * handle ICMP(v4addr/v6addr), we always lookup  for
5517 			 * proto = IPPROTO_ICMP on both LAN and WAN side
5518 			 */
5519 			psk.proto = IPPROTO_ICMP;
5520 			psk.gwy.xport.port = nxport.port;
5521 			psk.ext_gwy.xport.spi = 0;
5522 			break;
5523 		default:
5524 			psk.gwy.xport = sxport;
5525 			psk.ext_gwy.xport = dxport;
5526 			break;
5527 		}
5528 		psk.af_lan = af;
5529 		if (nr != NULL) {
5530 			PF_ACPY(&psk.lan.addr, &pd->baddr, af);
5531 			psk.lan.xport = bxport;
5532 			PF_ACPY(&psk.ext_lan.addr, &pd->bdaddr, af);
5533 			psk.ext_lan.xport = bdxport;
5534 		} else {
5535 			PF_ACPY(&psk.lan.addr, &psk.gwy.addr, af);
5536 			psk.lan.xport = psk.gwy.xport;
5537 			PF_ACPY(&psk.ext_lan.addr, &psk.ext_gwy.addr, af);
5538 			psk.ext_lan.xport = psk.ext_gwy.xport;
5539 		}
5540 	} else {
5541 		psk.af_lan = af;
5542 		if (nr && nr->action == PF_NAT64) {
5543 			PF_ACPY(&psk.lan.addr, &pd->baddr, af);
5544 			PF_ACPY(&psk.ext_lan.addr, &pd->bdaddr, af);
5545 		} else {
5546 			PF_ACPY(&psk.lan.addr, daddr, af);
5547 			PF_ACPY(&psk.ext_lan.addr, saddr, af);
5548 		}
5549 		switch (pd->proto) {
5550 		case IPPROTO_ICMP:
5551 		case IPPROTO_ICMPV6:
5552 			/*
5553 			 * NAT64 requires protocol translation  between ICMPv4
5554 			 * and ICMPv6. TCP and UDP do not require protocol
5555 			 * translation. To avoid adding complexity just to
5556 			 * handle ICMP(v4addr/v6addr), we always lookup  for
5557 			 * proto = IPPROTO_ICMP on both LAN and WAN side
5558 			 */
5559 			psk.proto = IPPROTO_ICMP;
5560 			if (nr && nr->action == PF_NAT64) {
5561 				psk.lan.xport = bxport;
5562 				psk.ext_lan.xport = bxport;
5563 			} else {
5564 				psk.lan.xport = nxport;
5565 				psk.ext_lan.xport.spi = 0;
5566 			}
5567 			break;
5568 		case IPPROTO_ESP:
5569 			psk.ext_lan.xport.spi = 0;
5570 			psk.lan.xport.spi = pd->hdr.esp->spi;
5571 			break;
5572 		default:
5573 			if (nr != NULL) {
5574 				if (nr->action == PF_NAT64) {
5575 					psk.lan.xport = bxport;
5576 					psk.ext_lan.xport = bdxport;
5577 				} else {
5578 					psk.lan.xport = dxport;
5579 					psk.ext_lan.xport = sxport;
5580 				}
5581 			} else {
5582 				psk.lan.xport = dxport;
5583 				psk.ext_lan.xport = sxport;
5584 			}
5585 			break;
5586 		}
5587 		psk.af_gwy = pd->naf;
5588 		if (nr != NULL) {
5589 			if (nr->action == PF_NAT64) {
5590 				PF_ACPY(&psk.gwy.addr, &pd->naddr, pd->naf);
5591 				PF_ACPY(&psk.ext_gwy.addr, &pd->ndaddr,
5592 				    pd->naf);
5593 				if ((pd->proto == IPPROTO_ICMPV6) ||
5594 				    (pd->proto == IPPROTO_ICMP)) {
5595 					psk.gwy.xport = nxport;
5596 					psk.ext_gwy.xport = nxport;
5597 				} else {
5598 					psk.gwy.xport = sxport;
5599 					psk.ext_gwy.xport = dxport;
5600 				}
5601 			} else {
5602 				PF_ACPY(&psk.gwy.addr, &pd->bdaddr, af);
5603 				psk.gwy.xport = bdxport;
5604 				PF_ACPY(&psk.ext_gwy.addr, saddr, af);
5605 				psk.ext_gwy.xport = sxport;
5606 			}
5607 		} else {
5608 			PF_ACPY(&psk.gwy.addr, &psk.lan.addr, af);
5609 			psk.gwy.xport = psk.lan.xport;
5610 			PF_ACPY(&psk.ext_gwy.addr, &psk.ext_lan.addr, af);
5611 			psk.ext_gwy.xport = psk.ext_lan.xport;
5612 		}
5613 	}
5614 	if (pd->pktflags & PKTF_FLOW_ID) {
5615 		/* flow hash was already computed outside of PF */
5616 		psk.flowsrc = pd->flowsrc;
5617 		psk.flowhash = pd->flowhash;
5618 	} else {
5619 		/* compute flow hash and store it in state key */
5620 		psk.flowsrc = FLOWSRC_PF;
5621 		psk.flowhash = pf_calc_state_key_flowhash(&psk);
5622 		pd->flowsrc = psk.flowsrc;
5623 		pd->flowhash = psk.flowhash;
5624 		pd->pktflags |= PKTF_FLOW_ID;
5625 		pd->pktflags &= ~PKTF_FLOW_ADV;
5626 	}
5627 
5628 	if (pf_tag_packet(pbuf, pd->pf_mtag, tag, rtableid, pd)) {
5629 		REASON_SET(&reason, PFRES_MEMORY);
5630 #if SKYWALK
5631 		netns_release(&nstoken);
5632 #endif
5633 		return PF_DROP;
5634 	}
5635 
5636 	if (!state_icmp && (r->keep_state || nr != NULL ||
5637 	    (pd->flags & PFDESC_TCP_NORM))) {
5638 		/* create new state */
5639 		struct pf_state *s = NULL;
5640 		struct pf_state_key *sk = NULL;
5641 		struct pf_src_node *sn = NULL;
5642 		struct pf_ike_hdr ike;
5643 
5644 		if (pd->proto == IPPROTO_UDP) {
5645 			size_t plen = pbuf->pb_packet_len - off - sizeof(*uh);
5646 
5647 			if (ntohs(uh->uh_sport) == PF_IKE_PORT &&
5648 			    ntohs(uh->uh_dport) == PF_IKE_PORT &&
5649 			    plen >= PF_IKE_PACKET_MINSIZE) {
5650 				if (plen > PF_IKE_PACKET_MINSIZE) {
5651 					plen = PF_IKE_PACKET_MINSIZE;
5652 				}
5653 				pbuf_copy_data(pbuf, off + sizeof(*uh), plen,
5654 				    &ike);
5655 			}
5656 		}
5657 
5658 		if (nr != NULL && pd->proto == IPPROTO_ESP &&
5659 		    direction == PF_OUT) {
5660 			struct pf_state_key_cmp sk0;
5661 			struct pf_state *s0;
5662 
5663 			/*
5664 			 * <[email protected]>
5665 			 * This squelches state creation if the external
5666 			 * address matches an existing incomplete state with a
5667 			 * different internal address.  Only one 'blocking'
5668 			 * partial state is allowed for each external address.
5669 			 */
5670 #if SKYWALK
5671 			/*
5672 			 * XXXSCW:
5673 			 *
5674 			 * It's not clear how this impacts netns. The original
5675 			 * state will hold the port reservation token but what
5676 			 * happens to other "Cone NAT" states when the first is
5677 			 * torn down?
5678 			 */
5679 #endif
5680 			memset(&sk0, 0, sizeof(sk0));
5681 			sk0.af_gwy = pd->af;
5682 			sk0.proto = IPPROTO_ESP;
5683 			PF_ACPY(&sk0.gwy.addr, saddr, sk0.af_gwy);
5684 			PF_ACPY(&sk0.ext_gwy.addr, daddr, sk0.af_gwy);
5685 			s0 = pf_find_state(kif, &sk0, PF_IN);
5686 
5687 			if (s0 && PF_ANEQ(&s0->state_key->lan.addr,
5688 			    pd->src, pd->af)) {
5689 				nsn = 0;
5690 				goto cleanup;
5691 			}
5692 		}
5693 
5694 		/* check maximums */
5695 		if (r->max_states && (r->states >= r->max_states)) {
5696 			pf_status.lcounters[LCNT_STATES]++;
5697 			REASON_SET(&reason, PFRES_MAXSTATES);
5698 			goto cleanup;
5699 		}
5700 		/* src node for filter rule */
5701 		if ((r->rule_flag & PFRULE_SRCTRACK ||
5702 		    r->rpool.opts & PF_POOL_STICKYADDR) &&
5703 		    pf_insert_src_node(&sn, r, saddr, af) != 0) {
5704 			REASON_SET(&reason, PFRES_SRCLIMIT);
5705 			goto cleanup;
5706 		}
5707 		/* src node for translation rule */
5708 		if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
5709 		    ((direction == PF_OUT &&
5710 		    nr->action != PF_RDR &&
5711 		    pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
5712 		    (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) {
5713 			REASON_SET(&reason, PFRES_SRCLIMIT);
5714 			goto cleanup;
5715 		}
5716 		s = pool_get(&pf_state_pl, PR_WAITOK);
5717 		if (s == NULL) {
5718 			REASON_SET(&reason, PFRES_MEMORY);
5719 cleanup:
5720 			if (sn != NULL && sn->states == 0 && sn->expire == 0) {
5721 				RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
5722 				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
5723 				pf_status.src_nodes--;
5724 				pool_put(&pf_src_tree_pl, sn);
5725 			}
5726 			if (nsn != sn && nsn != NULL && nsn->states == 0 &&
5727 			    nsn->expire == 0) {
5728 				RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
5729 				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
5730 				pf_status.src_nodes--;
5731 				pool_put(&pf_src_tree_pl, nsn);
5732 			}
5733 			if (sk != NULL) {
5734 				if (sk->app_state) {
5735 					pool_put(&pf_app_state_pl,
5736 					    sk->app_state);
5737 				}
5738 				pool_put(&pf_state_key_pl, sk);
5739 			}
5740 #if SKYWALK
5741 			netns_release(&nstoken);
5742 #endif
5743 			return PF_DROP;
5744 		}
5745 		bzero(s, sizeof(*s));
5746 		TAILQ_INIT(&s->unlink_hooks);
5747 		s->rule.ptr = r;
5748 		s->nat_rule.ptr = nr;
5749 		s->anchor.ptr = a;
5750 		STATE_INC_COUNTERS(s);
5751 		s->allow_opts = r->allow_opts;
5752 		s->log = r->log & PF_LOG_ALL;
5753 		if (nr != NULL) {
5754 			s->log |= nr->log & PF_LOG_ALL;
5755 		}
5756 		switch (pd->proto) {
5757 		case IPPROTO_TCP:
5758 			s->src.seqlo = ntohl(th->th_seq);
5759 			s->src.seqhi = s->src.seqlo + pd->p_len + 1;
5760 			if ((th->th_flags & (TH_SYN | TH_ACK)) ==
5761 			    TH_SYN && r->keep_state == PF_STATE_MODULATE) {
5762 				/* Generate sequence number modulator */
5763 				if ((s->src.seqdiff = pf_tcp_iss(pd) -
5764 				    s->src.seqlo) == 0) {
5765 					s->src.seqdiff = 1;
5766 				}
5767 				pf_change_a(&th->th_seq, &th->th_sum,
5768 				    htonl(s->src.seqlo + s->src.seqdiff), 0);
5769 				rewrite = off + sizeof(*th);
5770 			} else {
5771 				s->src.seqdiff = 0;
5772 			}
5773 			if (th->th_flags & TH_SYN) {
5774 				s->src.seqhi++;
5775 				s->src.wscale = pf_get_wscale(pbuf, off,
5776 				    th->th_off, af);
5777 			}
5778 			s->src.max_win = MAX(ntohs(th->th_win), 1);
5779 			if (s->src.wscale & PF_WSCALE_MASK) {
5780 				/* Remove scale factor from initial window */
5781 				int win = s->src.max_win;
5782 				win += 1 << (s->src.wscale & PF_WSCALE_MASK);
5783 				s->src.max_win = (win - 1) >>
5784 				    (s->src.wscale & PF_WSCALE_MASK);
5785 			}
5786 			if (th->th_flags & TH_FIN) {
5787 				s->src.seqhi++;
5788 			}
5789 			s->dst.seqhi = 1;
5790 			s->dst.max_win = 1;
5791 			s->src.state = TCPS_SYN_SENT;
5792 			s->dst.state = TCPS_CLOSED;
5793 			s->timeout = PFTM_TCP_FIRST_PACKET;
5794 			break;
5795 		case IPPROTO_UDP:
5796 			s->src.state = PFUDPS_SINGLE;
5797 			s->dst.state = PFUDPS_NO_TRAFFIC;
5798 			s->timeout = PFTM_UDP_FIRST_PACKET;
5799 			break;
5800 		case IPPROTO_ICMP:
5801 		case IPPROTO_ICMPV6:
5802 			s->timeout = PFTM_ICMP_FIRST_PACKET;
5803 			break;
5804 		case IPPROTO_GRE:
5805 			s->src.state = PFGRE1S_INITIATING;
5806 			s->dst.state = PFGRE1S_NO_TRAFFIC;
5807 			s->timeout = PFTM_GREv1_INITIATING;
5808 			break;
5809 		case IPPROTO_ESP:
5810 			s->src.state = PFESPS_INITIATING;
5811 			s->dst.state = PFESPS_NO_TRAFFIC;
5812 			s->timeout = PFTM_ESP_FIRST_PACKET;
5813 			break;
5814 		default:
5815 			s->src.state = PFOTHERS_SINGLE;
5816 			s->dst.state = PFOTHERS_NO_TRAFFIC;
5817 			s->timeout = PFTM_OTHER_FIRST_PACKET;
5818 		}
5819 
5820 		s->creation = pf_time_second();
5821 		s->expire = pf_time_second();
5822 
5823 		if (sn != NULL) {
5824 			s->src_node = sn;
5825 			s->src_node->states++;
5826 			VERIFY(s->src_node->states != 0);
5827 		}
5828 		if (nsn != NULL) {
5829 			PF_ACPY(&nsn->raddr, &pd->naddr, af);
5830 			s->nat_src_node = nsn;
5831 			s->nat_src_node->states++;
5832 			VERIFY(s->nat_src_node->states != 0);
5833 		}
5834 		if (pd->proto == IPPROTO_TCP) {
5835 			if ((pd->flags & PFDESC_TCP_NORM) &&
5836 			    pf_normalize_tcp_init(pbuf, off, pd, th, &s->src,
5837 			    &s->dst)) {
5838 				REASON_SET(&reason, PFRES_MEMORY);
5839 				pf_src_tree_remove_state(s);
5840 				STATE_DEC_COUNTERS(s);
5841 #if SKYWALK
5842 				netns_release(&nstoken);
5843 #endif
5844 				pool_put(&pf_state_pl, s);
5845 				return PF_DROP;
5846 			}
5847 			if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub &&
5848 			    pf_normalize_tcp_stateful(pbuf, off, pd, &reason,
5849 			    th, s, &s->src, &s->dst, &rewrite)) {
5850 				/* This really shouldn't happen!!! */
5851 				DPFPRINTF(PF_DEBUG_URGENT,
5852 				    ("pf_normalize_tcp_stateful failed on "
5853 				    "first pkt"));
5854 #if SKYWALK
5855 				netns_release(&nstoken);
5856 #endif
5857 				pf_normalize_tcp_cleanup(s);
5858 				pf_src_tree_remove_state(s);
5859 				STATE_DEC_COUNTERS(s);
5860 				pool_put(&pf_state_pl, s);
5861 				return PF_DROP;
5862 			}
5863 		}
5864 
5865 		/* allocate state key and import values from psk */
5866 		if ((sk = pf_alloc_state_key(s, &psk)) == NULL) {
5867 			REASON_SET(&reason, PFRES_MEMORY);
5868 			/*
5869 			 * XXXSCW: This will leak the freshly-allocated
5870 			 * state structure 's'. Although it should
5871 			 * eventually be aged-out and removed.
5872 			 */
5873 			goto cleanup;
5874 		}
5875 
5876 		pf_set_rt_ifp(s, saddr, af);    /* needs s->state_key set */
5877 
5878 		pbuf = pd->mp; // XXXSCW: Why?
5879 
5880 		if (sk->app_state == 0) {
5881 			switch (pd->proto) {
5882 			case IPPROTO_TCP: {
5883 				u_int16_t dport = (direction == PF_OUT) ?
5884 				    sk->ext_gwy.xport.port : sk->gwy.xport.port;
5885 
5886 				if (nr != NULL &&
5887 				    ntohs(dport) == PF_PPTP_PORT) {
5888 					struct pf_app_state *as;
5889 
5890 					as = pool_get(&pf_app_state_pl,
5891 					    PR_WAITOK);
5892 					if (!as) {
5893 						REASON_SET(&reason,
5894 						    PFRES_MEMORY);
5895 						goto cleanup;
5896 					}
5897 
5898 					bzero(as, sizeof(*as));
5899 					as->handler = pf_pptp_handler;
5900 					as->compare_lan_ext = 0;
5901 					as->compare_ext_gwy = 0;
5902 					as->u.pptp.grev1_state = 0;
5903 					sk->app_state = as;
5904 					(void) hook_establish(&s->unlink_hooks,
5905 					    0, (hook_fn_t) pf_pptp_unlink, s);
5906 				}
5907 				break;
5908 			}
5909 
5910 			case IPPROTO_UDP: {
5911 				if (nr != NULL &&
5912 				    ntohs(uh->uh_sport) == PF_IKE_PORT &&
5913 				    ntohs(uh->uh_dport) == PF_IKE_PORT) {
5914 					struct pf_app_state *as;
5915 
5916 					as = pool_get(&pf_app_state_pl,
5917 					    PR_WAITOK);
5918 					if (!as) {
5919 						REASON_SET(&reason,
5920 						    PFRES_MEMORY);
5921 						goto cleanup;
5922 					}
5923 
5924 					bzero(as, sizeof(*as));
5925 					as->compare_lan_ext = pf_ike_compare;
5926 					as->compare_ext_gwy = pf_ike_compare;
5927 					as->u.ike.cookie = ike.initiator_cookie;
5928 					sk->app_state = as;
5929 				}
5930 				break;
5931 			}
5932 
5933 			default:
5934 				break;
5935 			}
5936 		}
5937 
5938 		if (pf_insert_state(BOUND_IFACE(r, kif), s)) {
5939 			if (pd->proto == IPPROTO_TCP) {
5940 				pf_normalize_tcp_cleanup(s);
5941 			}
5942 			REASON_SET(&reason, PFRES_STATEINS);
5943 			pf_src_tree_remove_state(s);
5944 			STATE_DEC_COUNTERS(s);
5945 #if SKYWALK
5946 			netns_release(&nstoken);
5947 #endif
5948 			pool_put(&pf_state_pl, s);
5949 			return PF_DROP;
5950 		} else {
5951 #if SKYWALK
5952 			s->nstoken = nstoken;
5953 			nstoken = NULL;
5954 #endif
5955 			*sm = s;
5956 		}
5957 		if (tag > 0) {
5958 			pf_tag_ref(tag);
5959 			s->tag = tag;
5960 		}
5961 		if (pd->proto == IPPROTO_TCP &&
5962 		    (th->th_flags & (TH_SYN | TH_ACK)) == TH_SYN &&
5963 		    r->keep_state == PF_STATE_SYNPROXY) {
5964 			int ua = (sk->af_lan == sk->af_gwy) ? 1 : 0;
5965 			s->src.state = PF_TCPS_PROXY_SRC;
5966 			if (nr != NULL) {
5967 				if (direction == PF_OUT) {
5968 					pf_change_ap(direction, pd->mp, saddr,
5969 					    &th->th_sport, pd->ip_sum,
5970 					    &th->th_sum, &pd->baddr,
5971 					    bxport.port, 0, af, pd->af, ua);
5972 					sxport.port = th->th_sport;
5973 				} else {
5974 					pf_change_ap(direction, pd->mp, daddr,
5975 					    &th->th_dport, pd->ip_sum,
5976 					    &th->th_sum, &pd->baddr,
5977 					    bxport.port, 0, af, pd->af, ua);
5978 					sxport.port = th->th_dport;
5979 				}
5980 			}
5981 			s->src.seqhi = htonl(random());
5982 			/* Find mss option */
5983 			mss = pf_get_mss(pbuf, off, th->th_off, af);
5984 			mss = pf_calc_mss(saddr, af, mss);
5985 			mss = pf_calc_mss(daddr, af, mss);
5986 			s->src.mss = mss;
5987 			pf_send_tcp(r, af, daddr, saddr, th->th_dport,
5988 			    th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
5989 			    TH_SYN | TH_ACK, 0, s->src.mss, 0, 1, 0, NULL, NULL);
5990 			REASON_SET(&reason, PFRES_SYNPROXY);
5991 			return PF_SYNPROXY_DROP;
5992 		}
5993 
5994 		if (sk->app_state && sk->app_state->handler) {
5995 			int offx = off;
5996 
5997 			switch (pd->proto) {
5998 			case IPPROTO_TCP:
5999 				offx += th->th_off << 2;
6000 				break;
6001 			case IPPROTO_UDP:
6002 				offx += pd->hdr.udp->uh_ulen << 2;
6003 				break;
6004 			default:
6005 				/* ALG handlers only apply to TCP and UDP rules */
6006 				break;
6007 			}
6008 
6009 			if (offx > off) {
6010 				sk->app_state->handler(s, direction, offx,
6011 				    pd, kif);
6012 				if (pd->lmw < 0) {
6013 					REASON_SET(&reason, PFRES_MEMORY);
6014 					return PF_DROP;
6015 				}
6016 				pbuf = pd->mp;  // XXXSCW: Why?
6017 			}
6018 		}
6019 	}
6020 #if SKYWALK
6021 	else {
6022 		netns_release(&nstoken);
6023 	}
6024 #endif
6025 
6026 	/* copy back packet headers if we performed NAT operations */
6027 	if (rewrite) {
6028 		if (rewrite < off + hdrlen) {
6029 			rewrite = off + hdrlen;
6030 		}
6031 
6032 		if (pf_lazy_makewritable(pd, pd->mp, rewrite) == NULL) {
6033 			REASON_SET(&reason, PFRES_MEMORY);
6034 			return PF_DROP;
6035 		}
6036 
6037 		pbuf_copy_back(pbuf, off, hdrlen, pd->hdr.any);
6038 		if (af == AF_INET6 && pd->naf == AF_INET) {
6039 			return pf_nat64_ipv6(pbuf, off, pd);
6040 		} else if (af == AF_INET && pd->naf == AF_INET6) {
6041 			return pf_nat64_ipv4(pbuf, off, pd);
6042 		}
6043 	}
6044 
6045 	return PF_PASS;
6046 }
6047 
6048 boolean_t is_nlc_enabled_glb = FALSE;
6049 
6050 static inline boolean_t
pf_is_dummynet_enabled(void)6051 pf_is_dummynet_enabled(void)
6052 {
6053 #if DUMMYNET
6054 	if (__probable(!PF_IS_ENABLED)) {
6055 		return FALSE;
6056 	}
6057 
6058 	if (__probable(!DUMMYNET_LOADED)) {
6059 		return FALSE;
6060 	}
6061 
6062 	if (__probable(TAILQ_EMPTY(pf_main_ruleset.
6063 	    rules[PF_RULESET_DUMMYNET].active.ptr))) {
6064 		return FALSE;
6065 	}
6066 
6067 	return TRUE;
6068 #else
6069 	return FALSE;
6070 #endif /* DUMMYNET */
6071 }
6072 
6073 #if DUMMYNET
6074 /*
6075  * When pf_test_dummynet() returns PF_PASS, the rule matching parameter "rm"
6076  * remains unchanged, meaning the packet did not match a dummynet rule.
6077  * when the packet does match a dummynet rule, pf_test_dummynet() returns
6078  * PF_PASS and zero out the mbuf rule as the packet is effectively siphoned
6079  * out by dummynet.
6080  */
6081 static __attribute__((noinline)) int
pf_test_dummynet(struct pf_rule ** rm,int direction,struct pfi_kif * kif,pbuf_t ** pbuf0,struct pf_pdesc * pd,struct ip_fw_args * fwa)6082 pf_test_dummynet(struct pf_rule **rm, int direction, struct pfi_kif *kif,
6083     pbuf_t **pbuf0, struct pf_pdesc *pd, struct ip_fw_args *fwa)
6084 {
6085 	pbuf_t                  *pbuf = *pbuf0;
6086 	struct pf_rule          *am = NULL;
6087 	struct pf_ruleset       *rsm = NULL;
6088 	struct pf_addr          *saddr = pd->src, *daddr = pd->dst;
6089 	sa_family_t              af = pd->af;
6090 	struct pf_rule          *r, *a = NULL;
6091 	struct pf_ruleset       *ruleset = NULL;
6092 	struct tcphdr           *th = pd->hdr.tcp;
6093 	u_short                  reason;
6094 	int                      hdrlen = 0;
6095 	int                      tag = -1;
6096 	unsigned int             rtableid = IFSCOPE_NONE;
6097 	int                      asd = 0;
6098 	int                      match = 0;
6099 	u_int8_t                 icmptype = 0, icmpcode = 0;
6100 	struct ip_fw_args       dnflow;
6101 	struct pf_rule          *prev_matching_rule = fwa ? fwa->fwa_pf_rule : NULL;
6102 	int                     found_prev_rule = (prev_matching_rule) ? 0 : 1;
6103 
6104 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
6105 
6106 	if (!pf_is_dummynet_enabled()) {
6107 		return PF_PASS;
6108 	}
6109 
6110 	bzero(&dnflow, sizeof(dnflow));
6111 
6112 	hdrlen = 0;
6113 
6114 	/* Fragments don't gave protocol headers */
6115 	if (!(pd->flags & PFDESC_IP_FRAG)) {
6116 		switch (pd->proto) {
6117 		case IPPROTO_TCP:
6118 			dnflow.fwa_id.flags = pd->hdr.tcp->th_flags;
6119 			dnflow.fwa_id.dst_port = ntohs(pd->hdr.tcp->th_dport);
6120 			dnflow.fwa_id.src_port = ntohs(pd->hdr.tcp->th_sport);
6121 			hdrlen = sizeof(*th);
6122 			break;
6123 		case IPPROTO_UDP:
6124 			dnflow.fwa_id.dst_port = ntohs(pd->hdr.udp->uh_dport);
6125 			dnflow.fwa_id.src_port = ntohs(pd->hdr.udp->uh_sport);
6126 			hdrlen = sizeof(*pd->hdr.udp);
6127 			break;
6128 #if INET
6129 		case IPPROTO_ICMP:
6130 			if (af != AF_INET) {
6131 				break;
6132 			}
6133 			hdrlen = ICMP_MINLEN;
6134 			icmptype = pd->hdr.icmp->icmp_type;
6135 			icmpcode = pd->hdr.icmp->icmp_code;
6136 			break;
6137 #endif /* INET */
6138 		case IPPROTO_ICMPV6:
6139 			if (af != AF_INET6) {
6140 				break;
6141 			}
6142 			hdrlen = sizeof(*pd->hdr.icmp6);
6143 			icmptype = pd->hdr.icmp6->icmp6_type;
6144 			icmpcode = pd->hdr.icmp6->icmp6_code;
6145 			break;
6146 		case IPPROTO_GRE:
6147 			if (pd->proto_variant == PF_GRE_PPTP_VARIANT) {
6148 				hdrlen = sizeof(*pd->hdr.grev1);
6149 			}
6150 			break;
6151 		case IPPROTO_ESP:
6152 			hdrlen = sizeof(*pd->hdr.esp);
6153 			break;
6154 		}
6155 	}
6156 
6157 	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_DUMMYNET].active.ptr);
6158 
6159 	while (r != NULL) {
6160 		r->evaluations++;
6161 		if (pfi_kif_match(r->kif, kif) == r->ifnot) {
6162 			r = r->skip[PF_SKIP_IFP].ptr;
6163 		} else if (r->direction && r->direction != direction) {
6164 			r = r->skip[PF_SKIP_DIR].ptr;
6165 		} else if (r->af && r->af != af) {
6166 			r = r->skip[PF_SKIP_AF].ptr;
6167 		} else if (r->proto && r->proto != pd->proto) {
6168 			r = r->skip[PF_SKIP_PROTO].ptr;
6169 		} else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
6170 		    r->src.neg, kif)) {
6171 			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
6172 		}
6173 		/* tcp/udp only. port_op always 0 in other cases */
6174 		else if (r->proto == pd->proto &&
6175 		    (r->proto == IPPROTO_TCP || r->proto == IPPROTO_UDP) &&
6176 		    ((pd->flags & PFDESC_IP_FRAG) ||
6177 		    ((r->src.xport.range.op &&
6178 		    !pf_match_port(r->src.xport.range.op,
6179 		    r->src.xport.range.port[0], r->src.xport.range.port[1],
6180 		    th->th_sport))))) {
6181 			r = r->skip[PF_SKIP_SRC_PORT].ptr;
6182 		} else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
6183 		    r->dst.neg, NULL)) {
6184 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
6185 		}
6186 		/* tcp/udp only. port_op always 0 in other cases */
6187 		else if (r->proto == pd->proto &&
6188 		    (r->proto == IPPROTO_TCP || r->proto == IPPROTO_UDP) &&
6189 		    r->dst.xport.range.op &&
6190 		    ((pd->flags & PFDESC_IP_FRAG) ||
6191 		    !pf_match_port(r->dst.xport.range.op,
6192 		    r->dst.xport.range.port[0], r->dst.xport.range.port[1],
6193 		    th->th_dport))) {
6194 			r = r->skip[PF_SKIP_DST_PORT].ptr;
6195 		}
6196 		/* icmp only. type always 0 in other cases */
6197 		else if (r->type &&
6198 		    ((pd->flags & PFDESC_IP_FRAG) ||
6199 		    r->type != icmptype + 1)) {
6200 			r = TAILQ_NEXT(r, entries);
6201 		}
6202 		/* icmp only. type always 0 in other cases */
6203 		else if (r->code &&
6204 		    ((pd->flags & PFDESC_IP_FRAG) ||
6205 		    r->code != icmpcode + 1)) {
6206 			r = TAILQ_NEXT(r, entries);
6207 		} else if (r->tos && !(r->tos == pd->tos)) {
6208 			r = TAILQ_NEXT(r, entries);
6209 		} else if (r->rule_flag & PFRULE_FRAGMENT) {
6210 			r = TAILQ_NEXT(r, entries);
6211 		} else if (pd->proto == IPPROTO_TCP &&
6212 		    ((pd->flags & PFDESC_IP_FRAG) ||
6213 		    (r->flagset & th->th_flags) != r->flags)) {
6214 			r = TAILQ_NEXT(r, entries);
6215 		} else if (r->prob && r->prob <= (RandomULong() % (UINT_MAX - 1) + 1)) {
6216 			r = TAILQ_NEXT(r, entries);
6217 		} else if (r->match_tag && !pf_match_tag(r, pd->pf_mtag, &tag)) {
6218 			r = TAILQ_NEXT(r, entries);
6219 		} else {
6220 			/*
6221 			 * Need to go past the previous dummynet matching rule
6222 			 */
6223 			if (r->anchor == NULL) {
6224 				if (found_prev_rule) {
6225 					if (r->tag) {
6226 						tag = r->tag;
6227 					}
6228 					if (PF_RTABLEID_IS_VALID(r->rtableid)) {
6229 						rtableid = r->rtableid;
6230 					}
6231 					match = 1;
6232 					*rm = r;
6233 					am = a;
6234 					rsm = ruleset;
6235 					if ((*rm)->quick) {
6236 						break;
6237 					}
6238 				} else if (r == prev_matching_rule) {
6239 					found_prev_rule = 1;
6240 				}
6241 				r = TAILQ_NEXT(r, entries);
6242 			} else {
6243 				pf_step_into_anchor(&asd, &ruleset,
6244 				    PF_RULESET_DUMMYNET, &r, &a, &match);
6245 			}
6246 		}
6247 		if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
6248 		    PF_RULESET_DUMMYNET, &r, &a, &match)) {
6249 			break;
6250 		}
6251 	}
6252 	r = *rm;
6253 	a = am;
6254 	ruleset = rsm;
6255 
6256 	if (!match) {
6257 		return PF_PASS;
6258 	}
6259 
6260 	REASON_SET(&reason, PFRES_DUMMYNET);
6261 
6262 	if (r->log) {
6263 		PFLOG_PACKET(kif, h, pbuf, af, direction, reason, r,
6264 		    a, ruleset, pd);
6265 	}
6266 
6267 	if (r->action == PF_NODUMMYNET) {
6268 		int dirndx = (direction == PF_OUT);
6269 
6270 		r->packets[dirndx]++;
6271 		r->bytes[dirndx] += pd->tot_len;
6272 
6273 		return PF_PASS;
6274 	}
6275 	if (pf_tag_packet(pbuf, pd->pf_mtag, tag, rtableid, pd)) {
6276 		REASON_SET(&reason, PFRES_MEMORY);
6277 
6278 		return PF_DROP;
6279 	}
6280 
6281 	if (r->dnpipe && ip_dn_io_ptr != NULL) {
6282 		struct mbuf *m;
6283 		int dirndx = (direction == PF_OUT);
6284 
6285 		r->packets[dirndx]++;
6286 		r->bytes[dirndx] += pd->tot_len;
6287 
6288 		dnflow.fwa_cookie = r->dnpipe;
6289 		dnflow.fwa_pf_rule = r;
6290 		dnflow.fwa_id.proto = pd->proto;
6291 		dnflow.fwa_flags = r->dntype;
6292 		switch (af) {
6293 		case AF_INET:
6294 			dnflow.fwa_id.addr_type = 4;
6295 			dnflow.fwa_id.src_ip = ntohl(saddr->v4addr.s_addr);
6296 			dnflow.fwa_id.dst_ip = ntohl(daddr->v4addr.s_addr);
6297 			break;
6298 		case AF_INET6:
6299 			dnflow.fwa_id.addr_type = 6;
6300 			dnflow.fwa_id.src_ip6 = saddr->v6addr;
6301 			dnflow.fwa_id.dst_ip6 = saddr->v6addr;
6302 			break;
6303 		}
6304 
6305 		if (fwa != NULL) {
6306 			dnflow.fwa_oif = fwa->fwa_oif;
6307 			dnflow.fwa_oflags = fwa->fwa_oflags;
6308 			/*
6309 			 * Note that fwa_ro, fwa_dst and fwa_ipoa are
6310 			 * actually in a union so the following does work
6311 			 * for both IPv4 and IPv6
6312 			 */
6313 			dnflow.fwa_ro = fwa->fwa_ro;
6314 			dnflow.fwa_dst = fwa->fwa_dst;
6315 			dnflow.fwa_ipoa = fwa->fwa_ipoa;
6316 			dnflow.fwa_ro6_pmtu = fwa->fwa_ro6_pmtu;
6317 			dnflow.fwa_origifp = fwa->fwa_origifp;
6318 			dnflow.fwa_mtu = fwa->fwa_mtu;
6319 			dnflow.fwa_unfragpartlen = fwa->fwa_unfragpartlen;
6320 			dnflow.fwa_exthdrs = fwa->fwa_exthdrs;
6321 		}
6322 
6323 		if (af == AF_INET) {
6324 			struct ip *iphdr = pbuf->pb_data;
6325 			NTOHS(iphdr->ip_len);
6326 			NTOHS(iphdr->ip_off);
6327 		}
6328 		/*
6329 		 * Don't need to unlock pf_lock as NET_THREAD_HELD_PF
6330 		 * allows for recursive behavior
6331 		 */
6332 		m = pbuf_to_mbuf(pbuf, TRUE);
6333 		if (m != NULL) {
6334 			ip_dn_io_ptr(m,
6335 			    dnflow.fwa_cookie, (af == AF_INET) ?
6336 			    ((direction == PF_IN) ? DN_TO_IP_IN : DN_TO_IP_OUT) :
6337 			    ((direction == PF_IN) ? DN_TO_IP6_IN : DN_TO_IP6_OUT),
6338 			    &dnflow);
6339 		}
6340 
6341 		/*
6342 		 * The packet is siphoned out by dummynet so return a NULL
6343 		 * pbuf so the caller can still return success.
6344 		 */
6345 		*pbuf0 = NULL;
6346 
6347 		return PF_PASS;
6348 	}
6349 
6350 	return PF_PASS;
6351 }
6352 #endif /* DUMMYNET */
6353 
6354 static __attribute__((noinline)) int
pf_test_fragment(struct pf_rule ** rm,int direction,struct pfi_kif * kif,pbuf_t * pbuf,void * h,struct pf_pdesc * pd,struct pf_rule ** am,struct pf_ruleset ** rsm)6355 pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif,
6356     pbuf_t *pbuf, void *h, struct pf_pdesc *pd, struct pf_rule **am,
6357     struct pf_ruleset **rsm)
6358 {
6359 #pragma unused(h)
6360 	struct pf_rule          *r, *a = NULL;
6361 	struct pf_ruleset       *ruleset = NULL;
6362 	sa_family_t              af = pd->af;
6363 	u_short                  reason;
6364 	int                      tag = -1;
6365 	int                      asd = 0;
6366 	int                      match = 0;
6367 
6368 	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
6369 	while (r != NULL) {
6370 		r->evaluations++;
6371 		if (pfi_kif_match(r->kif, kif) == r->ifnot) {
6372 			r = r->skip[PF_SKIP_IFP].ptr;
6373 		} else if (r->direction && r->direction != direction) {
6374 			r = r->skip[PF_SKIP_DIR].ptr;
6375 		} else if (r->af && r->af != af) {
6376 			r = r->skip[PF_SKIP_AF].ptr;
6377 		} else if (r->proto && r->proto != pd->proto) {
6378 			r = r->skip[PF_SKIP_PROTO].ptr;
6379 		} else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
6380 		    r->src.neg, kif)) {
6381 			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
6382 		} else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
6383 		    r->dst.neg, NULL)) {
6384 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
6385 		} else if ((r->rule_flag & PFRULE_TOS) && r->tos &&
6386 		    !(r->tos & pd->tos)) {
6387 			r = TAILQ_NEXT(r, entries);
6388 		} else if ((r->rule_flag & PFRULE_DSCP) && r->tos &&
6389 		    !(r->tos & (pd->tos & DSCP_MASK))) {
6390 			r = TAILQ_NEXT(r, entries);
6391 		} else if ((r->rule_flag & PFRULE_SC) && r->tos &&
6392 		    ((r->tos & SCIDX_MASK) != pd->sc)) {
6393 			r = TAILQ_NEXT(r, entries);
6394 		} else if (r->os_fingerprint != PF_OSFP_ANY) {
6395 			r = TAILQ_NEXT(r, entries);
6396 		} else if (pd->proto == IPPROTO_UDP &&
6397 		    (r->src.xport.range.op || r->dst.xport.range.op)) {
6398 			r = TAILQ_NEXT(r, entries);
6399 		} else if (pd->proto == IPPROTO_TCP &&
6400 		    (r->src.xport.range.op || r->dst.xport.range.op ||
6401 		    r->flagset)) {
6402 			r = TAILQ_NEXT(r, entries);
6403 		} else if ((pd->proto == IPPROTO_ICMP ||
6404 		    pd->proto == IPPROTO_ICMPV6) &&
6405 		    (r->type || r->code)) {
6406 			r = TAILQ_NEXT(r, entries);
6407 		} else if (r->prob && r->prob <= (RandomULong() % (UINT_MAX - 1) + 1)) {
6408 			r = TAILQ_NEXT(r, entries);
6409 		} else if (r->match_tag && !pf_match_tag(r, pd->pf_mtag, &tag)) {
6410 			r = TAILQ_NEXT(r, entries);
6411 		} else {
6412 			if (r->anchor == NULL) {
6413 				match = 1;
6414 				*rm = r;
6415 				*am = a;
6416 				*rsm = ruleset;
6417 				if ((*rm)->quick) {
6418 					break;
6419 				}
6420 				r = TAILQ_NEXT(r, entries);
6421 			} else {
6422 				pf_step_into_anchor(&asd, &ruleset,
6423 				    PF_RULESET_FILTER, &r, &a, &match);
6424 			}
6425 		}
6426 		if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
6427 		    PF_RULESET_FILTER, &r, &a, &match)) {
6428 			break;
6429 		}
6430 	}
6431 	r = *rm;
6432 	a = *am;
6433 	ruleset = *rsm;
6434 
6435 	REASON_SET(&reason, PFRES_MATCH);
6436 
6437 	if (r->log) {
6438 		PFLOG_PACKET(kif, h, pbuf, af, direction, reason, r, a, ruleset,
6439 		    pd);
6440 	}
6441 
6442 	if (r->action != PF_PASS) {
6443 		return PF_DROP;
6444 	}
6445 
6446 	if (pf_tag_packet(pbuf, pd->pf_mtag, tag, -1, NULL)) {
6447 		REASON_SET(&reason, PFRES_MEMORY);
6448 		return PF_DROP;
6449 	}
6450 
6451 	return PF_PASS;
6452 }
6453 
6454 static __attribute__((noinline)) void
pf_pptp_handler(struct pf_state * s,int direction,int off,struct pf_pdesc * pd,struct pfi_kif * kif)6455 pf_pptp_handler(struct pf_state *s, int direction, int off,
6456     struct pf_pdesc *pd, struct pfi_kif *kif)
6457 {
6458 #pragma unused(direction)
6459 	struct tcphdr *th;
6460 	struct pf_pptp_state *pptps;
6461 	struct pf_pptp_ctrl_msg cm;
6462 	size_t plen, tlen;
6463 	struct pf_state *gs;
6464 	u_int16_t ct;
6465 	u_int16_t *pac_call_id;
6466 	u_int16_t *pns_call_id;
6467 	u_int16_t *spoof_call_id;
6468 	u_int8_t *pac_state;
6469 	u_int8_t *pns_state;
6470 	enum { PF_PPTP_PASS, PF_PPTP_INSERT_GRE, PF_PPTP_REMOVE_GRE } op;
6471 	pbuf_t *pbuf;
6472 	struct pf_state_key *sk;
6473 	struct pf_state_key *gsk;
6474 	struct pf_app_state *gas;
6475 
6476 	sk = s->state_key;
6477 	pptps = &sk->app_state->u.pptp;
6478 	gs = pptps->grev1_state;
6479 
6480 	if (gs) {
6481 		gs->expire = pf_time_second();
6482 	}
6483 
6484 	pbuf = pd->mp;
6485 	plen = min(sizeof(cm), pbuf->pb_packet_len - off);
6486 	if (plen < PF_PPTP_CTRL_MSG_MINSIZE) {
6487 		return;
6488 	}
6489 	tlen = plen - PF_PPTP_CTRL_MSG_MINSIZE;
6490 	pbuf_copy_data(pbuf, off, plen, &cm);
6491 
6492 	if (ntohl(cm.hdr.magic) != PF_PPTP_MAGIC_NUMBER) {
6493 		return;
6494 	}
6495 	if (ntohs(cm.hdr.type) != 1) {
6496 		return;
6497 	}
6498 
6499 #define TYPE_LEN_CHECK(_type, _name)                            \
6500 	case PF_PPTP_CTRL_TYPE_##_type:                         \
6501 	        if (tlen < sizeof(struct pf_pptp_ctrl_##_name)) \
6502 	                return;                                 \
6503 	        break;
6504 
6505 	switch (cm.ctrl.type) {
6506 		TYPE_LEN_CHECK(START_REQ, start_req);
6507 		TYPE_LEN_CHECK(START_RPY, start_rpy);
6508 		TYPE_LEN_CHECK(STOP_REQ, stop_req);
6509 		TYPE_LEN_CHECK(STOP_RPY, stop_rpy);
6510 		TYPE_LEN_CHECK(ECHO_REQ, echo_req);
6511 		TYPE_LEN_CHECK(ECHO_RPY, echo_rpy);
6512 		TYPE_LEN_CHECK(CALL_OUT_REQ, call_out_req);
6513 		TYPE_LEN_CHECK(CALL_OUT_RPY, call_out_rpy);
6514 		TYPE_LEN_CHECK(CALL_IN_1ST, call_in_1st);
6515 		TYPE_LEN_CHECK(CALL_IN_2ND, call_in_2nd);
6516 		TYPE_LEN_CHECK(CALL_IN_3RD, call_in_3rd);
6517 		TYPE_LEN_CHECK(CALL_CLR, call_clr);
6518 		TYPE_LEN_CHECK(CALL_DISC, call_disc);
6519 		TYPE_LEN_CHECK(ERROR, error);
6520 		TYPE_LEN_CHECK(SET_LINKINFO, set_linkinfo);
6521 	default:
6522 		return;
6523 	}
6524 #undef TYPE_LEN_CHECK
6525 
6526 	if (!gs) {
6527 		gs = pool_get(&pf_state_pl, PR_WAITOK);
6528 		if (!gs) {
6529 			return;
6530 		}
6531 
6532 		memcpy(gs, s, sizeof(*gs));
6533 
6534 		memset(&gs->entry_id, 0, sizeof(gs->entry_id));
6535 		memset(&gs->entry_list, 0, sizeof(gs->entry_list));
6536 
6537 		TAILQ_INIT(&gs->unlink_hooks);
6538 		gs->rt_kif = NULL;
6539 		gs->creation = 0;
6540 		gs->pfsync_time = 0;
6541 		gs->packets[0] = gs->packets[1] = 0;
6542 		gs->bytes[0] = gs->bytes[1] = 0;
6543 		gs->timeout = PFTM_UNLINKED;
6544 		gs->id = gs->creatorid = 0;
6545 		gs->src.state = gs->dst.state = PFGRE1S_NO_TRAFFIC;
6546 		gs->src.scrub = gs->dst.scrub = 0;
6547 
6548 		gas = pool_get(&pf_app_state_pl, PR_NOWAIT);
6549 		if (!gas) {
6550 			pool_put(&pf_state_pl, gs);
6551 			return;
6552 		}
6553 
6554 		gsk = pf_alloc_state_key(gs, NULL);
6555 		if (!gsk) {
6556 			pool_put(&pf_app_state_pl, gas);
6557 			pool_put(&pf_state_pl, gs);
6558 			return;
6559 		}
6560 
6561 		memcpy(&gsk->lan, &sk->lan, sizeof(gsk->lan));
6562 		memcpy(&gsk->gwy, &sk->gwy, sizeof(gsk->gwy));
6563 		memcpy(&gsk->ext_lan, &sk->ext_lan, sizeof(gsk->ext_lan));
6564 		memcpy(&gsk->ext_gwy, &sk->ext_gwy, sizeof(gsk->ext_gwy));
6565 		gsk->af_lan = sk->af_lan;
6566 		gsk->af_gwy = sk->af_gwy;
6567 		gsk->proto = IPPROTO_GRE;
6568 		gsk->proto_variant = PF_GRE_PPTP_VARIANT;
6569 		gsk->app_state = gas;
6570 		gsk->lan.xport.call_id = 0;
6571 		gsk->gwy.xport.call_id = 0;
6572 		gsk->ext_lan.xport.call_id = 0;
6573 		gsk->ext_gwy.xport.call_id = 0;
6574 		gsk->flowsrc = FLOWSRC_PF;
6575 		gsk->flowhash = pf_calc_state_key_flowhash(gsk);
6576 		memset(gas, 0, sizeof(*gas));
6577 		gas->u.grev1.pptp_state = s;
6578 		STATE_INC_COUNTERS(gs);
6579 		pptps->grev1_state = gs;
6580 		(void) hook_establish(&gs->unlink_hooks, 0,
6581 		    (hook_fn_t) pf_grev1_unlink, gs);
6582 	} else {
6583 		gsk = gs->state_key;
6584 	}
6585 
6586 	switch (sk->direction) {
6587 	case PF_IN:
6588 		pns_call_id = &gsk->ext_lan.xport.call_id;
6589 		pns_state = &gs->dst.state;
6590 		pac_call_id = &gsk->lan.xport.call_id;
6591 		pac_state = &gs->src.state;
6592 		break;
6593 
6594 	case PF_OUT:
6595 		pns_call_id = &gsk->lan.xport.call_id;
6596 		pns_state = &gs->src.state;
6597 		pac_call_id = &gsk->ext_lan.xport.call_id;
6598 		pac_state = &gs->dst.state;
6599 		break;
6600 
6601 	default:
6602 		DPFPRINTF(PF_DEBUG_URGENT,
6603 		    ("pf_pptp_handler: bad directional!\n"));
6604 		return;
6605 	}
6606 
6607 	spoof_call_id = 0;
6608 	op = PF_PPTP_PASS;
6609 
6610 	ct = ntohs(cm.ctrl.type);
6611 
6612 	switch (ct) {
6613 	case PF_PPTP_CTRL_TYPE_CALL_OUT_REQ:
6614 		*pns_call_id = cm.msg.call_out_req.call_id;
6615 		*pns_state = PFGRE1S_INITIATING;
6616 		if (s->nat_rule.ptr && pns_call_id == &gsk->lan.xport.call_id) {
6617 			spoof_call_id = &cm.msg.call_out_req.call_id;
6618 		}
6619 		break;
6620 
6621 	case PF_PPTP_CTRL_TYPE_CALL_OUT_RPY:
6622 		*pac_call_id = cm.msg.call_out_rpy.call_id;
6623 		if (s->nat_rule.ptr) {
6624 			spoof_call_id =
6625 			    (pac_call_id == &gsk->lan.xport.call_id) ?
6626 			    &cm.msg.call_out_rpy.call_id :
6627 			    &cm.msg.call_out_rpy.peer_call_id;
6628 		}
6629 		if (gs->timeout == PFTM_UNLINKED) {
6630 			*pac_state = PFGRE1S_INITIATING;
6631 			op = PF_PPTP_INSERT_GRE;
6632 		}
6633 		break;
6634 
6635 	case PF_PPTP_CTRL_TYPE_CALL_IN_1ST:
6636 		*pns_call_id = cm.msg.call_in_1st.call_id;
6637 		*pns_state = PFGRE1S_INITIATING;
6638 		if (s->nat_rule.ptr && pns_call_id == &gsk->lan.xport.call_id) {
6639 			spoof_call_id = &cm.msg.call_in_1st.call_id;
6640 		}
6641 		break;
6642 
6643 	case PF_PPTP_CTRL_TYPE_CALL_IN_2ND:
6644 		*pac_call_id = cm.msg.call_in_2nd.call_id;
6645 		*pac_state = PFGRE1S_INITIATING;
6646 		if (s->nat_rule.ptr) {
6647 			spoof_call_id =
6648 			    (pac_call_id == &gsk->lan.xport.call_id) ?
6649 			    &cm.msg.call_in_2nd.call_id :
6650 			    &cm.msg.call_in_2nd.peer_call_id;
6651 		}
6652 		break;
6653 
6654 	case PF_PPTP_CTRL_TYPE_CALL_IN_3RD:
6655 		if (s->nat_rule.ptr && pns_call_id == &gsk->lan.xport.call_id) {
6656 			spoof_call_id = &cm.msg.call_in_3rd.call_id;
6657 		}
6658 		if (cm.msg.call_in_3rd.call_id != *pns_call_id) {
6659 			break;
6660 		}
6661 		if (gs->timeout == PFTM_UNLINKED) {
6662 			op = PF_PPTP_INSERT_GRE;
6663 		}
6664 		break;
6665 
6666 	case PF_PPTP_CTRL_TYPE_CALL_CLR:
6667 		if (cm.msg.call_clr.call_id != *pns_call_id) {
6668 			op = PF_PPTP_REMOVE_GRE;
6669 		}
6670 		break;
6671 
6672 	case PF_PPTP_CTRL_TYPE_CALL_DISC:
6673 		if (cm.msg.call_clr.call_id != *pac_call_id) {
6674 			op = PF_PPTP_REMOVE_GRE;
6675 		}
6676 		break;
6677 
6678 	case PF_PPTP_CTRL_TYPE_ERROR:
6679 		if (s->nat_rule.ptr && pns_call_id == &gsk->lan.xport.call_id) {
6680 			spoof_call_id = &cm.msg.error.peer_call_id;
6681 		}
6682 		break;
6683 
6684 	case PF_PPTP_CTRL_TYPE_SET_LINKINFO:
6685 		if (s->nat_rule.ptr && pac_call_id == &gsk->lan.xport.call_id) {
6686 			spoof_call_id = &cm.msg.set_linkinfo.peer_call_id;
6687 		}
6688 		break;
6689 
6690 	default:
6691 		op = PF_PPTP_PASS;
6692 		break;
6693 	}
6694 
6695 	if (!gsk->gwy.xport.call_id && gsk->lan.xport.call_id) {
6696 		gsk->gwy.xport.call_id = gsk->lan.xport.call_id;
6697 		if (spoof_call_id) {
6698 			u_int16_t call_id = 0;
6699 			int n = 0;
6700 			struct pf_state_key_cmp key;
6701 
6702 			key.af_gwy = gsk->af_gwy;
6703 			key.proto = IPPROTO_GRE;
6704 			key.proto_variant = PF_GRE_PPTP_VARIANT;
6705 			PF_ACPY(&key.gwy.addr, &gsk->gwy.addr, key.af_gwy);
6706 			PF_ACPY(&key.ext_gwy.addr, &gsk->ext_gwy.addr, key.af_gwy);
6707 			key.gwy.xport.call_id = gsk->gwy.xport.call_id;
6708 			key.ext_gwy.xport.call_id = gsk->ext_gwy.xport.call_id;
6709 			do {
6710 				call_id = htonl(random());
6711 			} while (!call_id);
6712 
6713 			while (pf_find_state_all(&key, PF_IN, 0)) {
6714 				call_id = ntohs(call_id);
6715 				--call_id;
6716 				if (--call_id == 0) {
6717 					call_id = 0xffff;
6718 				}
6719 				call_id = htons(call_id);
6720 
6721 				key.gwy.xport.call_id = call_id;
6722 
6723 				if (++n > 65535) {
6724 					DPFPRINTF(PF_DEBUG_URGENT,
6725 					    ("pf_pptp_handler: failed to spoof "
6726 					    "call id\n"));
6727 					key.gwy.xport.call_id = 0;
6728 					break;
6729 				}
6730 			}
6731 
6732 			gsk->gwy.xport.call_id = call_id;
6733 		}
6734 	}
6735 
6736 	th = pd->hdr.tcp;
6737 
6738 	if (spoof_call_id && gsk->lan.xport.call_id != gsk->gwy.xport.call_id) {
6739 		if (*spoof_call_id == gsk->gwy.xport.call_id) {
6740 			*spoof_call_id = gsk->lan.xport.call_id;
6741 			th->th_sum = pf_cksum_fixup(th->th_sum,
6742 			    gsk->gwy.xport.call_id, gsk->lan.xport.call_id, 0);
6743 		} else {
6744 			*spoof_call_id = gsk->gwy.xport.call_id;
6745 			th->th_sum = pf_cksum_fixup(th->th_sum,
6746 			    gsk->lan.xport.call_id, gsk->gwy.xport.call_id, 0);
6747 		}
6748 
6749 		if (pf_lazy_makewritable(pd, pbuf, off + plen) == NULL) {
6750 			pptps->grev1_state = NULL;
6751 			STATE_DEC_COUNTERS(gs);
6752 			pool_put(&pf_state_pl, gs);
6753 			return;
6754 		}
6755 		pbuf_copy_back(pbuf, off, plen, &cm);
6756 	}
6757 
6758 	switch (op) {
6759 	case PF_PPTP_REMOVE_GRE:
6760 		gs->timeout = PFTM_PURGE;
6761 		gs->src.state = gs->dst.state = PFGRE1S_NO_TRAFFIC;
6762 		gsk->lan.xport.call_id = 0;
6763 		gsk->gwy.xport.call_id = 0;
6764 		gsk->ext_lan.xport.call_id = 0;
6765 		gsk->ext_gwy.xport.call_id = 0;
6766 		gs->id = gs->creatorid = 0;
6767 		break;
6768 
6769 	case PF_PPTP_INSERT_GRE:
6770 		gs->creation = pf_time_second();
6771 		gs->expire = pf_time_second();
6772 		gs->timeout = PFTM_TCP_ESTABLISHED;
6773 		if (gs->src_node != NULL) {
6774 			++gs->src_node->states;
6775 			VERIFY(gs->src_node->states != 0);
6776 		}
6777 		if (gs->nat_src_node != NULL) {
6778 			++gs->nat_src_node->states;
6779 			VERIFY(gs->nat_src_node->states != 0);
6780 		}
6781 		pf_set_rt_ifp(gs, &sk->lan.addr, sk->af_lan);
6782 		if (pf_insert_state(BOUND_IFACE(s->rule.ptr, kif), gs)) {
6783 			/*
6784 			 * <[email protected]>
6785 			 * FIX ME: insertion can fail when multiple PNS
6786 			 * behind the same NAT open calls to the same PAC
6787 			 * simultaneously because spoofed call ID numbers
6788 			 * are chosen before states are inserted.  This is
6789 			 * hard to fix and happens infrequently enough that
6790 			 * users will normally try again and this ALG will
6791 			 * succeed.  Failures are expected to be rare enough
6792 			 * that fixing this is a low priority.
6793 			 */
6794 			pptps->grev1_state = NULL;
6795 			pd->lmw = -1;   /* Force PF_DROP on PFRES_MEMORY */
6796 			pf_src_tree_remove_state(gs);
6797 			STATE_DEC_COUNTERS(gs);
6798 			pool_put(&pf_state_pl, gs);
6799 			DPFPRINTF(PF_DEBUG_URGENT, ("pf_pptp_handler: error "
6800 			    "inserting GREv1 state.\n"));
6801 		}
6802 		break;
6803 
6804 	default:
6805 		break;
6806 	}
6807 }
6808 
6809 static __attribute__((noinline)) void
pf_pptp_unlink(struct pf_state * s)6810 pf_pptp_unlink(struct pf_state *s)
6811 {
6812 	struct pf_app_state *as = s->state_key->app_state;
6813 	struct pf_state *grev1s = as->u.pptp.grev1_state;
6814 
6815 	if (grev1s) {
6816 		struct pf_app_state *gas = grev1s->state_key->app_state;
6817 
6818 		if (grev1s->timeout < PFTM_MAX) {
6819 			grev1s->timeout = PFTM_PURGE;
6820 		}
6821 		gas->u.grev1.pptp_state = NULL;
6822 		as->u.pptp.grev1_state = NULL;
6823 	}
6824 }
6825 
6826 static __attribute__((noinline)) void
pf_grev1_unlink(struct pf_state * s)6827 pf_grev1_unlink(struct pf_state *s)
6828 {
6829 	struct pf_app_state *as = s->state_key->app_state;
6830 	struct pf_state *pptps = as->u.grev1.pptp_state;
6831 
6832 	if (pptps) {
6833 		struct pf_app_state *pas = pptps->state_key->app_state;
6834 
6835 		pas->u.pptp.grev1_state = NULL;
6836 		as->u.grev1.pptp_state = NULL;
6837 	}
6838 }
6839 
6840 static int
pf_ike_compare(struct pf_app_state * a,struct pf_app_state * b)6841 pf_ike_compare(struct pf_app_state *a, struct pf_app_state *b)
6842 {
6843 	int64_t d = a->u.ike.cookie - b->u.ike.cookie;
6844 	return (d > 0) ? 1 : ((d < 0) ? -1 : 0);
6845 }
6846 
6847 static int
pf_do_nat64(struct pf_state_key * sk,struct pf_pdesc * pd,pbuf_t * pbuf,int off)6848 pf_do_nat64(struct pf_state_key *sk, struct pf_pdesc *pd, pbuf_t *pbuf,
6849     int off)
6850 {
6851 	if (pd->af == AF_INET) {
6852 		if (pd->af != sk->af_lan) {
6853 			pd->ndaddr = sk->lan.addr;
6854 			pd->naddr = sk->ext_lan.addr;
6855 		} else {
6856 			pd->naddr = sk->gwy.addr;
6857 			pd->ndaddr = sk->ext_gwy.addr;
6858 		}
6859 		return pf_nat64_ipv4(pbuf, off, pd);
6860 	} else if (pd->af == AF_INET6) {
6861 		if (pd->af != sk->af_lan) {
6862 			pd->ndaddr = sk->lan.addr;
6863 			pd->naddr = sk->ext_lan.addr;
6864 		} else {
6865 			pd->naddr = sk->gwy.addr;
6866 			pd->ndaddr = sk->ext_gwy.addr;
6867 		}
6868 		return pf_nat64_ipv6(pbuf, off, pd);
6869 	}
6870 	return PF_DROP;
6871 }
6872 
6873 static __attribute__((noinline)) int
pf_test_state_tcp(struct pf_state ** state,int direction,struct pfi_kif * kif,pbuf_t * pbuf,int off,void * h,struct pf_pdesc * pd,u_short * reason)6874 pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
6875     pbuf_t *pbuf, int off, void *h, struct pf_pdesc *pd,
6876     u_short *reason)
6877 {
6878 #pragma unused(h)
6879 	struct pf_state_key_cmp  key;
6880 	struct tcphdr           *th = pd->hdr.tcp;
6881 	u_int16_t                win = ntohs(th->th_win);
6882 	u_int32_t                ack, end, seq, orig_seq;
6883 	u_int8_t                 sws, dws;
6884 	int                      ackskew;
6885 	int                      copyback = 0;
6886 	struct pf_state_peer    *src, *dst;
6887 	struct pf_state_key     *sk;
6888 
6889 	key.app_state = 0;
6890 	key.proto = IPPROTO_TCP;
6891 	key.af_lan = key.af_gwy = pd->af;
6892 
6893 	/*
6894 	 * For NAT64 the first time rule search and state creation
6895 	 * is done on the incoming side only.
6896 	 * Once the state gets created, NAT64's LAN side (ipv6) will
6897 	 * not be able to find the state in ext-gwy tree as that normally
6898 	 * is intended to be looked up for incoming traffic from the
6899 	 * WAN side.
6900 	 * Therefore to handle NAT64 case we init keys here for both
6901 	 * lan-ext as well as ext-gwy trees.
6902 	 * In the state lookup we attempt a lookup on both trees if
6903 	 * first one does not return any result and return a match if
6904 	 * the match state's was created by NAT64 rule.
6905 	 */
6906 	PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy);
6907 	PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy);
6908 	key.ext_gwy.xport.port = th->th_sport;
6909 	key.gwy.xport.port = th->th_dport;
6910 
6911 	PF_ACPY(&key.lan.addr, pd->src, key.af_lan);
6912 	PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan);
6913 	key.lan.xport.port = th->th_sport;
6914 	key.ext_lan.xport.port = th->th_dport;
6915 
6916 	STATE_LOOKUP();
6917 
6918 	sk = (*state)->state_key;
6919 	/*
6920 	 * In case of NAT64 the translation is first applied on the LAN
6921 	 * side. Therefore for stack's address family comparison
6922 	 * we use sk->af_lan.
6923 	 */
6924 	if ((direction == sk->direction) && (pd->af == sk->af_lan)) {
6925 		src = &(*state)->src;
6926 		dst = &(*state)->dst;
6927 	} else {
6928 		src = &(*state)->dst;
6929 		dst = &(*state)->src;
6930 	}
6931 
6932 	if (src->state == PF_TCPS_PROXY_SRC) {
6933 		if (direction != sk->direction) {
6934 			REASON_SET(reason, PFRES_SYNPROXY);
6935 			return PF_SYNPROXY_DROP;
6936 		}
6937 		if (th->th_flags & TH_SYN) {
6938 			if (ntohl(th->th_seq) != src->seqlo) {
6939 				REASON_SET(reason, PFRES_SYNPROXY);
6940 				return PF_DROP;
6941 			}
6942 			pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
6943 			    pd->src, th->th_dport, th->th_sport,
6944 			    src->seqhi, ntohl(th->th_seq) + 1,
6945 			    TH_SYN | TH_ACK, 0, src->mss, 0, 1,
6946 			    0, NULL, NULL);
6947 			REASON_SET(reason, PFRES_SYNPROXY);
6948 			return PF_SYNPROXY_DROP;
6949 		} else if (!(th->th_flags & TH_ACK) ||
6950 		    (ntohl(th->th_ack) != src->seqhi + 1) ||
6951 		    (ntohl(th->th_seq) != src->seqlo + 1)) {
6952 			REASON_SET(reason, PFRES_SYNPROXY);
6953 			return PF_DROP;
6954 		} else if ((*state)->src_node != NULL &&
6955 		    pf_src_connlimit(state)) {
6956 			REASON_SET(reason, PFRES_SRCLIMIT);
6957 			return PF_DROP;
6958 		} else {
6959 			src->state = PF_TCPS_PROXY_DST;
6960 		}
6961 	}
6962 	if (src->state == PF_TCPS_PROXY_DST) {
6963 		struct pf_state_host *psrc, *pdst;
6964 
6965 		if (direction == PF_OUT) {
6966 			psrc = &sk->gwy;
6967 			pdst = &sk->ext_gwy;
6968 		} else {
6969 			psrc = &sk->ext_lan;
6970 			pdst = &sk->lan;
6971 		}
6972 		if (direction == sk->direction) {
6973 			if (((th->th_flags & (TH_SYN | TH_ACK)) != TH_ACK) ||
6974 			    (ntohl(th->th_ack) != src->seqhi + 1) ||
6975 			    (ntohl(th->th_seq) != src->seqlo + 1)) {
6976 				REASON_SET(reason, PFRES_SYNPROXY);
6977 				return PF_DROP;
6978 			}
6979 			src->max_win = MAX(ntohs(th->th_win), 1);
6980 			if (dst->seqhi == 1) {
6981 				dst->seqhi = htonl(random());
6982 			}
6983 			pf_send_tcp((*state)->rule.ptr, pd->af, &psrc->addr,
6984 			    &pdst->addr, psrc->xport.port, pdst->xport.port,
6985 			    dst->seqhi, 0, TH_SYN, 0,
6986 			    src->mss, 0, 0, (*state)->tag, NULL, NULL);
6987 			REASON_SET(reason, PFRES_SYNPROXY);
6988 			return PF_SYNPROXY_DROP;
6989 		} else if (((th->th_flags & (TH_SYN | TH_ACK)) !=
6990 		    (TH_SYN | TH_ACK)) ||
6991 		    (ntohl(th->th_ack) != dst->seqhi + 1)) {
6992 			REASON_SET(reason, PFRES_SYNPROXY);
6993 			return PF_DROP;
6994 		} else {
6995 			dst->max_win = MAX(ntohs(th->th_win), 1);
6996 			dst->seqlo = ntohl(th->th_seq);
6997 			pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
6998 			    pd->src, th->th_dport, th->th_sport,
6999 			    ntohl(th->th_ack), ntohl(th->th_seq) + 1,
7000 			    TH_ACK, src->max_win, 0, 0, 0,
7001 			    (*state)->tag, NULL, NULL);
7002 			pf_send_tcp((*state)->rule.ptr, pd->af, &psrc->addr,
7003 			    &pdst->addr, psrc->xport.port, pdst->xport.port,
7004 			    src->seqhi + 1, src->seqlo + 1,
7005 			    TH_ACK, dst->max_win, 0, 0, 1,
7006 			    0, NULL, NULL);
7007 			src->seqdiff = dst->seqhi -
7008 			    src->seqlo;
7009 			dst->seqdiff = src->seqhi -
7010 			    dst->seqlo;
7011 			src->seqhi = src->seqlo +
7012 			    dst->max_win;
7013 			dst->seqhi = dst->seqlo +
7014 			    src->max_win;
7015 			src->wscale = dst->wscale = 0;
7016 			src->state = dst->state =
7017 			    TCPS_ESTABLISHED;
7018 			REASON_SET(reason, PFRES_SYNPROXY);
7019 			return PF_SYNPROXY_DROP;
7020 		}
7021 	}
7022 
7023 	if (((th->th_flags & (TH_SYN | TH_ACK)) == TH_SYN) &&
7024 	    dst->state >= TCPS_FIN_WAIT_2 &&
7025 	    src->state >= TCPS_FIN_WAIT_2) {
7026 		if (pf_status.debug >= PF_DEBUG_MISC) {
7027 			printf("pf: state reuse ");
7028 			pf_print_state(*state);
7029 			pf_print_flags(th->th_flags);
7030 			printf("\n");
7031 		}
7032 		/* XXX make sure it's the same direction ?? */
7033 		src->state = dst->state = TCPS_CLOSED;
7034 		pf_unlink_state(*state);
7035 		*state = NULL;
7036 		return PF_DROP;
7037 	}
7038 
7039 	if ((th->th_flags & TH_SYN) == 0) {
7040 		sws = (src->wscale & PF_WSCALE_FLAG) ?
7041 		    (src->wscale & PF_WSCALE_MASK) : TCP_MAX_WINSHIFT;
7042 		dws = (dst->wscale & PF_WSCALE_FLAG) ?
7043 		    (dst->wscale & PF_WSCALE_MASK) : TCP_MAX_WINSHIFT;
7044 	} else {
7045 		sws = dws = 0;
7046 	}
7047 
7048 	/*
7049 	 * Sequence tracking algorithm from Guido van Rooij's paper:
7050 	 *   http://www.madison-gurkha.com/publications/tcp_filtering/
7051 	 *	tcp_filtering.ps
7052 	 */
7053 
7054 	orig_seq = seq = ntohl(th->th_seq);
7055 	if (src->seqlo == 0) {
7056 		/* First packet from this end. Set its state */
7057 
7058 		if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) &&
7059 		    src->scrub == NULL) {
7060 			if (pf_normalize_tcp_init(pbuf, off, pd, th, src, dst)) {
7061 				REASON_SET(reason, PFRES_MEMORY);
7062 				return PF_DROP;
7063 			}
7064 		}
7065 
7066 		/* Deferred generation of sequence number modulator */
7067 		if (dst->seqdiff && !src->seqdiff) {
7068 			/* use random iss for the TCP server */
7069 			while ((src->seqdiff = random() - seq) == 0) {
7070 				;
7071 			}
7072 			ack = ntohl(th->th_ack) - dst->seqdiff;
7073 			pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
7074 			    src->seqdiff), 0);
7075 			pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
7076 			copyback = off + sizeof(*th);
7077 		} else {
7078 			ack = ntohl(th->th_ack);
7079 		}
7080 
7081 		end = seq + pd->p_len;
7082 		if (th->th_flags & TH_SYN) {
7083 			end++;
7084 			if (dst->wscale & PF_WSCALE_FLAG) {
7085 				src->wscale = pf_get_wscale(pbuf, off,
7086 				    th->th_off, pd->af);
7087 				if (src->wscale & PF_WSCALE_FLAG) {
7088 					/*
7089 					 * Remove scale factor from initial
7090 					 * window
7091 					 */
7092 					sws = src->wscale & PF_WSCALE_MASK;
7093 					win = ((u_int32_t)win + (1 << sws) - 1)
7094 					    >> sws;
7095 					dws = dst->wscale & PF_WSCALE_MASK;
7096 				} else {
7097 					/*
7098 					 * Window scale negotiation has failed,
7099 					 * therefore we must restore the window
7100 					 * scale in the state record that we
7101 					 * optimistically removed in
7102 					 * pf_test_rule().  Care is required to
7103 					 * prevent arithmetic overflow from
7104 					 * zeroing the window when it's
7105 					 * truncated down to 16-bits.
7106 					 */
7107 					u_int32_t max_win = dst->max_win;
7108 					max_win <<=
7109 					    dst->wscale & PF_WSCALE_MASK;
7110 					dst->max_win = MIN(0xffff, max_win);
7111 					/* in case of a retrans SYN|ACK */
7112 					dst->wscale = 0;
7113 				}
7114 			}
7115 		}
7116 		if (th->th_flags & TH_FIN) {
7117 			end++;
7118 		}
7119 
7120 		src->seqlo = seq;
7121 		if (src->state < TCPS_SYN_SENT) {
7122 			src->state = TCPS_SYN_SENT;
7123 		}
7124 
7125 		/*
7126 		 * May need to slide the window (seqhi may have been set by
7127 		 * the crappy stack check or if we picked up the connection
7128 		 * after establishment)
7129 		 */
7130 		if (src->seqhi == 1 ||
7131 		    SEQ_GEQ(end + MAX(1, (u_int32_t)dst->max_win << dws),
7132 		    src->seqhi)) {
7133 			src->seqhi = end + MAX(1, (u_int32_t)dst->max_win << dws);
7134 		}
7135 		if (win > src->max_win) {
7136 			src->max_win = win;
7137 		}
7138 	} else {
7139 		ack = ntohl(th->th_ack) - dst->seqdiff;
7140 		if (src->seqdiff) {
7141 			/* Modulate sequence numbers */
7142 			pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
7143 			    src->seqdiff), 0);
7144 			pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
7145 			copyback = off + sizeof(*th);
7146 		}
7147 		end = seq + pd->p_len;
7148 		if (th->th_flags & TH_SYN) {
7149 			end++;
7150 		}
7151 		if (th->th_flags & TH_FIN) {
7152 			end++;
7153 		}
7154 	}
7155 
7156 	if ((th->th_flags & TH_ACK) == 0) {
7157 		/* Let it pass through the ack skew check */
7158 		ack = dst->seqlo;
7159 	} else if ((ack == 0 &&
7160 	    (th->th_flags & (TH_ACK | TH_RST)) == (TH_ACK | TH_RST)) ||
7161 	    /* broken tcp stacks do not set ack */
7162 	    (dst->state < TCPS_SYN_SENT)) {
7163 		/*
7164 		 * Many stacks (ours included) will set the ACK number in an
7165 		 * FIN|ACK if the SYN times out -- no sequence to ACK.
7166 		 */
7167 		ack = dst->seqlo;
7168 	}
7169 
7170 	if (seq == end) {
7171 		/* Ease sequencing restrictions on no data packets */
7172 		seq = src->seqlo;
7173 		end = seq;
7174 	}
7175 
7176 	ackskew = dst->seqlo - ack;
7177 
7178 
7179 	/*
7180 	 * Need to demodulate the sequence numbers in any TCP SACK options
7181 	 * (Selective ACK). We could optionally validate the SACK values
7182 	 * against the current ACK window, either forwards or backwards, but
7183 	 * I'm not confident that SACK has been implemented properly
7184 	 * everywhere. It wouldn't surprise me if several stacks accidently
7185 	 * SACK too far backwards of previously ACKed data. There really aren't
7186 	 * any security implications of bad SACKing unless the target stack
7187 	 * doesn't validate the option length correctly. Someone trying to
7188 	 * spoof into a TCP connection won't bother blindly sending SACK
7189 	 * options anyway.
7190 	 */
7191 	if (dst->seqdiff && (th->th_off << 2) > (int)sizeof(struct tcphdr)) {
7192 		copyback = pf_modulate_sack(pbuf, off, pd, th, dst);
7193 		if (copyback == -1) {
7194 			REASON_SET(reason, PFRES_MEMORY);
7195 			return PF_DROP;
7196 		}
7197 
7198 		pbuf = pd->mp;  // XXXSCW: Why?
7199 	}
7200 
7201 
7202 #define MAXACKWINDOW (0xffff + 1500)    /* 1500 is an arbitrary fudge factor */
7203 	if (SEQ_GEQ(src->seqhi, end) &&
7204 	    /* Last octet inside other's window space */
7205 	    SEQ_GEQ(seq, src->seqlo - ((u_int32_t)dst->max_win << dws)) &&
7206 	    /* Retrans: not more than one window back */
7207 	    (ackskew >= -MAXACKWINDOW) &&
7208 	    /* Acking not more than one reassembled fragment backwards */
7209 	    (ackskew <= (MAXACKWINDOW << sws)) &&
7210 	    /* Acking not more than one window forward */
7211 	    ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo ||
7212 	    (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo) ||
7213 	    (pd->flags & PFDESC_IP_REAS) == 0)) {
7214 		/* Require an exact/+1 sequence match on resets when possible */
7215 
7216 		if (dst->scrub || src->scrub) {
7217 			if (pf_normalize_tcp_stateful(pbuf, off, pd, reason, th,
7218 			    *state, src, dst, &copyback)) {
7219 				return PF_DROP;
7220 			}
7221 
7222 			pbuf = pd->mp;  // XXXSCW: Why?
7223 		}
7224 
7225 		/* update max window */
7226 		if (src->max_win < win) {
7227 			src->max_win = win;
7228 		}
7229 		/* synchronize sequencing */
7230 		if (SEQ_GT(end, src->seqlo)) {
7231 			src->seqlo = end;
7232 		}
7233 		/* slide the window of what the other end can send */
7234 		if (SEQ_GEQ(ack + ((u_int32_t)win << sws), dst->seqhi)) {
7235 			dst->seqhi = ack + MAX(((u_int32_t)win << sws), 1);
7236 		}
7237 
7238 		/* update states */
7239 		if (th->th_flags & TH_SYN) {
7240 			if (src->state < TCPS_SYN_SENT) {
7241 				src->state = TCPS_SYN_SENT;
7242 			}
7243 		}
7244 		if (th->th_flags & TH_FIN) {
7245 			if (src->state < TCPS_CLOSING) {
7246 				src->state = TCPS_CLOSING;
7247 			}
7248 		}
7249 		if (th->th_flags & TH_ACK) {
7250 			if (dst->state == TCPS_SYN_SENT) {
7251 				dst->state = TCPS_ESTABLISHED;
7252 				if (src->state == TCPS_ESTABLISHED &&
7253 				    (*state)->src_node != NULL &&
7254 				    pf_src_connlimit(state)) {
7255 					REASON_SET(reason, PFRES_SRCLIMIT);
7256 					return PF_DROP;
7257 				}
7258 			} else if (dst->state == TCPS_CLOSING) {
7259 				dst->state = TCPS_FIN_WAIT_2;
7260 			}
7261 		}
7262 		if (th->th_flags & TH_RST) {
7263 			src->state = dst->state = TCPS_TIME_WAIT;
7264 		}
7265 
7266 		/* update expire time */
7267 		(*state)->expire = pf_time_second();
7268 		if (src->state >= TCPS_FIN_WAIT_2 &&
7269 		    dst->state >= TCPS_FIN_WAIT_2) {
7270 			(*state)->timeout = PFTM_TCP_CLOSED;
7271 		} else if (src->state >= TCPS_CLOSING &&
7272 		    dst->state >= TCPS_CLOSING) {
7273 			(*state)->timeout = PFTM_TCP_FIN_WAIT;
7274 		} else if (src->state < TCPS_ESTABLISHED ||
7275 		    dst->state < TCPS_ESTABLISHED) {
7276 			(*state)->timeout = PFTM_TCP_OPENING;
7277 		} else if (src->state >= TCPS_CLOSING ||
7278 		    dst->state >= TCPS_CLOSING) {
7279 			(*state)->timeout = PFTM_TCP_CLOSING;
7280 		} else {
7281 			(*state)->timeout = PFTM_TCP_ESTABLISHED;
7282 		}
7283 
7284 		/* Fall through to PASS packet */
7285 	} else if ((dst->state < TCPS_SYN_SENT ||
7286 	    dst->state >= TCPS_FIN_WAIT_2 || src->state >= TCPS_FIN_WAIT_2) &&
7287 	    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) &&
7288 	    /* Within a window forward of the originating packet */
7289 	    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
7290 		/* Within a window backward of the originating packet */
7291 
7292 		/*
7293 		 * This currently handles three situations:
7294 		 *  1) Stupid stacks will shotgun SYNs before their peer
7295 		 *     replies.
7296 		 *  2) When PF catches an already established stream (the
7297 		 *     firewall rebooted, the state table was flushed, routes
7298 		 *     changed...)
7299 		 *  3) Packets get funky immediately after the connection
7300 		 *     closes (this should catch Solaris spurious ACK|FINs
7301 		 *     that web servers like to spew after a close)
7302 		 *
7303 		 * This must be a little more careful than the above code
7304 		 * since packet floods will also be caught here. We don't
7305 		 * update the TTL here to mitigate the damage of a packet
7306 		 * flood and so the same code can handle awkward establishment
7307 		 * and a loosened connection close.
7308 		 * In the establishment case, a correct peer response will
7309 		 * validate the connection, go through the normal state code
7310 		 * and keep updating the state TTL.
7311 		 */
7312 
7313 		if (pf_status.debug >= PF_DEBUG_MISC) {
7314 			printf("pf: loose state match: ");
7315 			pf_print_state(*state);
7316 			pf_print_flags(th->th_flags);
7317 			printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
7318 			    "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack,
7319 			    pd->p_len, ackskew, (*state)->packets[0],
7320 			    (*state)->packets[1],
7321 			    direction == PF_IN ? "in" : "out",
7322 			    direction == sk->direction ?
7323 			    "fwd" : "rev");
7324 		}
7325 
7326 		if (dst->scrub || src->scrub) {
7327 			if (pf_normalize_tcp_stateful(pbuf, off, pd, reason, th,
7328 			    *state, src, dst, &copyback)) {
7329 				return PF_DROP;
7330 			}
7331 			pbuf = pd->mp;  // XXXSCW: Why?
7332 		}
7333 
7334 		/* update max window */
7335 		if (src->max_win < win) {
7336 			src->max_win = win;
7337 		}
7338 		/* synchronize sequencing */
7339 		if (SEQ_GT(end, src->seqlo)) {
7340 			src->seqlo = end;
7341 		}
7342 		/* slide the window of what the other end can send */
7343 		if (SEQ_GEQ(ack + ((u_int32_t)win << sws), dst->seqhi)) {
7344 			dst->seqhi = ack + MAX(((u_int32_t)win << sws), 1);
7345 		}
7346 
7347 		/*
7348 		 * Cannot set dst->seqhi here since this could be a shotgunned
7349 		 * SYN and not an already established connection.
7350 		 */
7351 
7352 		if (th->th_flags & TH_FIN) {
7353 			if (src->state < TCPS_CLOSING) {
7354 				src->state = TCPS_CLOSING;
7355 			}
7356 		}
7357 		if (th->th_flags & TH_RST) {
7358 			src->state = dst->state = TCPS_TIME_WAIT;
7359 		}
7360 
7361 		/* Fall through to PASS packet */
7362 	} else {
7363 		if (dst->state == TCPS_SYN_SENT &&
7364 		    src->state == TCPS_SYN_SENT) {
7365 			/* Send RST for state mismatches during handshake */
7366 			if (!(th->th_flags & TH_RST)) {
7367 				pf_send_tcp((*state)->rule.ptr, pd->af,
7368 				    pd->dst, pd->src, th->th_dport,
7369 				    th->th_sport, ntohl(th->th_ack), 0,
7370 				    TH_RST, 0, 0,
7371 				    (*state)->rule.ptr->return_ttl, 1, 0,
7372 				    pd->eh, kif->pfik_ifp);
7373 			}
7374 			src->seqlo = 0;
7375 			src->seqhi = 1;
7376 			src->max_win = 1;
7377 		} else if (pf_status.debug >= PF_DEBUG_MISC) {
7378 			printf("pf: BAD state: ");
7379 			pf_print_state(*state);
7380 			pf_print_flags(th->th_flags);
7381 			printf("\n   seq=%u (%u) ack=%u len=%u ackskew=%d "
7382 			    "sws=%u dws=%u pkts=%llu:%llu dir=%s,%s\n",
7383 			    seq, orig_seq, ack, pd->p_len, ackskew,
7384 			    (unsigned int)sws, (unsigned int)dws,
7385 			    (*state)->packets[0], (*state)->packets[1],
7386 			    direction == PF_IN ? "in" : "out",
7387 			    direction == sk->direction ?
7388 			    "fwd" : "rev");
7389 			printf("pf: State failure on: %c %c %c %c | %c %c\n",
7390 			    SEQ_GEQ(src->seqhi, end) ? ' ' : '1',
7391 			    SEQ_GEQ(seq,
7392 			    src->seqlo - ((u_int32_t)dst->max_win << dws)) ?
7393 			    ' ': '2',
7394 			    (ackskew >= -MAXACKWINDOW) ? ' ' : '3',
7395 			    (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
7396 			    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5',
7397 			    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6');
7398 		}
7399 		REASON_SET(reason, PFRES_BADSTATE);
7400 		return PF_DROP;
7401 	}
7402 
7403 	/* Any packets which have gotten here are to be passed */
7404 
7405 	if (sk->app_state &&
7406 	    sk->app_state->handler) {
7407 		sk->app_state->handler(*state, direction,
7408 		    off + (th->th_off << 2), pd, kif);
7409 		if (pd->lmw < 0) {
7410 			REASON_SET(reason, PFRES_MEMORY);
7411 			return PF_DROP;
7412 		}
7413 		pbuf = pd->mp;  // XXXSCW: Why?
7414 	}
7415 
7416 	/* translate source/destination address, if necessary */
7417 	if (STATE_TRANSLATE(sk)) {
7418 		pd->naf = (pd->af == sk->af_lan) ? sk->af_gwy : sk->af_lan;
7419 
7420 		if (direction == PF_OUT) {
7421 			pf_change_ap(direction, pd->mp, pd->src, &th->th_sport,
7422 			    pd->ip_sum, &th->th_sum, &sk->gwy.addr,
7423 			    sk->gwy.xport.port, 0, pd->af, pd->naf, 1);
7424 		} else {
7425 			if (pd->af != pd->naf) {
7426 				if (pd->af == sk->af_gwy) {
7427 					pf_change_ap(direction, pd->mp, pd->dst,
7428 					    &th->th_dport, pd->ip_sum,
7429 					    &th->th_sum, &sk->lan.addr,
7430 					    sk->lan.xport.port, 0,
7431 					    pd->af, pd->naf, 0);
7432 
7433 					pf_change_ap(direction, pd->mp, pd->src,
7434 					    &th->th_sport, pd->ip_sum,
7435 					    &th->th_sum, &sk->ext_lan.addr,
7436 					    th->th_sport, 0, pd->af,
7437 					    pd->naf, 0);
7438 				} else {
7439 					pf_change_ap(direction, pd->mp, pd->dst,
7440 					    &th->th_dport, pd->ip_sum,
7441 					    &th->th_sum, &sk->ext_gwy.addr,
7442 					    th->th_dport, 0, pd->af,
7443 					    pd->naf, 0);
7444 
7445 					pf_change_ap(direction, pd->mp, pd->src,
7446 					    &th->th_sport, pd->ip_sum,
7447 					    &th->th_sum, &sk->gwy.addr,
7448 					    sk->gwy.xport.port, 0, pd->af,
7449 					    pd->naf, 0);
7450 				}
7451 			} else {
7452 				pf_change_ap(direction, pd->mp, pd->dst,
7453 				    &th->th_dport, pd->ip_sum,
7454 				    &th->th_sum, &sk->lan.addr,
7455 				    sk->lan.xport.port, 0, pd->af,
7456 				    pd->naf, 1);
7457 			}
7458 		}
7459 
7460 		copyback = off + sizeof(*th);
7461 	}
7462 
7463 	if (copyback) {
7464 		if (pf_lazy_makewritable(pd, pbuf, copyback) == NULL) {
7465 			REASON_SET(reason, PFRES_MEMORY);
7466 			return PF_DROP;
7467 		}
7468 
7469 		/* Copyback sequence modulation or stateful scrub changes */
7470 		pbuf_copy_back(pbuf, off, sizeof(*th), th);
7471 
7472 		if (sk->af_lan != sk->af_gwy) {
7473 			return pf_do_nat64(sk, pd, pbuf, off);
7474 		}
7475 	}
7476 	return PF_PASS;
7477 }
7478 
7479 static __attribute__((noinline)) int
pf_test_state_udp(struct pf_state ** state,int direction,struct pfi_kif * kif,pbuf_t * pbuf,int off,void * h,struct pf_pdesc * pd,u_short * reason)7480 pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
7481     pbuf_t *pbuf, int off, void *h, struct pf_pdesc *pd, u_short *reason)
7482 {
7483 #pragma unused(h)
7484 	struct pf_state_peer    *src, *dst;
7485 	struct pf_state_key_cmp  key;
7486 	struct pf_state_key     *sk;
7487 	struct udphdr           *uh = pd->hdr.udp;
7488 	struct pf_app_state as;
7489 	int action, extfilter;
7490 	key.app_state = 0;
7491 	key.proto_variant = PF_EXTFILTER_APD;
7492 
7493 	key.proto = IPPROTO_UDP;
7494 	key.af_lan = key.af_gwy = pd->af;
7495 
7496 	/*
7497 	 * For NAT64 the first time rule search and state creation
7498 	 * is done on the incoming side only.
7499 	 * Once the state gets created, NAT64's LAN side (ipv6) will
7500 	 * not be able to find the state in ext-gwy tree as that normally
7501 	 * is intended to be looked up for incoming traffic from the
7502 	 * WAN side.
7503 	 * Therefore to handle NAT64 case we init keys here for both
7504 	 * lan-ext as well as ext-gwy trees.
7505 	 * In the state lookup we attempt a lookup on both trees if
7506 	 * first one does not return any result and return a match if
7507 	 * the match state's was created by NAT64 rule.
7508 	 */
7509 	PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy);
7510 	PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy);
7511 	key.ext_gwy.xport.port = uh->uh_sport;
7512 	key.gwy.xport.port = uh->uh_dport;
7513 
7514 	PF_ACPY(&key.lan.addr, pd->src, key.af_lan);
7515 	PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan);
7516 	key.lan.xport.port = uh->uh_sport;
7517 	key.ext_lan.xport.port = uh->uh_dport;
7518 
7519 	if (ntohs(uh->uh_sport) == PF_IKE_PORT &&
7520 	    ntohs(uh->uh_dport) == PF_IKE_PORT) {
7521 		struct pf_ike_hdr ike;
7522 		size_t plen = pbuf->pb_packet_len - off - sizeof(*uh);
7523 		if (plen < PF_IKE_PACKET_MINSIZE) {
7524 			DPFPRINTF(PF_DEBUG_MISC,
7525 			    ("pf: IKE message too small.\n"));
7526 			return PF_DROP;
7527 		}
7528 
7529 		if (plen > sizeof(ike)) {
7530 			plen = sizeof(ike);
7531 		}
7532 		pbuf_copy_data(pbuf, off + sizeof(*uh), plen, &ike);
7533 
7534 		if (ike.initiator_cookie) {
7535 			key.app_state = &as;
7536 			as.compare_lan_ext = pf_ike_compare;
7537 			as.compare_ext_gwy = pf_ike_compare;
7538 			as.u.ike.cookie = ike.initiator_cookie;
7539 		} else {
7540 			/*
7541 			 * <http://tools.ietf.org/html/\
7542 			 *    draft-ietf-ipsec-nat-t-ike-01>
7543 			 * Support non-standard NAT-T implementations that
7544 			 * push the ESP packet over the top of the IKE packet.
7545 			 * Do not drop packet.
7546 			 */
7547 			DPFPRINTF(PF_DEBUG_MISC,
7548 			    ("pf: IKE initiator cookie = 0.\n"));
7549 		}
7550 	}
7551 
7552 	*state = pf_find_state(kif, &key, direction);
7553 
7554 	if (!key.app_state && *state == 0) {
7555 		key.proto_variant = PF_EXTFILTER_AD;
7556 		*state = pf_find_state(kif, &key, direction);
7557 	}
7558 
7559 	if (!key.app_state && *state == 0) {
7560 		key.proto_variant = PF_EXTFILTER_EI;
7561 		*state = pf_find_state(kif, &key, direction);
7562 	}
7563 
7564 	/* similar to STATE_LOOKUP() */
7565 	if (*state != NULL && pd != NULL && !(pd->pktflags & PKTF_FLOW_ID)) {
7566 		pd->flowsrc = (*state)->state_key->flowsrc;
7567 		pd->flowhash = (*state)->state_key->flowhash;
7568 		if (pd->flowhash != 0) {
7569 			pd->pktflags |= PKTF_FLOW_ID;
7570 			pd->pktflags &= ~PKTF_FLOW_ADV;
7571 		}
7572 	}
7573 
7574 	if (pf_state_lookup_aux(state, kif, direction, &action)) {
7575 		return action;
7576 	}
7577 
7578 	sk = (*state)->state_key;
7579 
7580 	/*
7581 	 * In case of NAT64 the translation is first applied on the LAN
7582 	 * side. Therefore for stack's address family comparison
7583 	 * we use sk->af_lan.
7584 	 */
7585 	if ((direction == sk->direction) && (pd->af == sk->af_lan)) {
7586 		src = &(*state)->src;
7587 		dst = &(*state)->dst;
7588 	} else {
7589 		src = &(*state)->dst;
7590 		dst = &(*state)->src;
7591 	}
7592 
7593 	/* update states */
7594 	if (src->state < PFUDPS_SINGLE) {
7595 		src->state = PFUDPS_SINGLE;
7596 	}
7597 	if (dst->state == PFUDPS_SINGLE) {
7598 		dst->state = PFUDPS_MULTIPLE;
7599 	}
7600 
7601 	/* update expire time */
7602 	(*state)->expire = pf_time_second();
7603 	if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE) {
7604 		(*state)->timeout = PFTM_UDP_MULTIPLE;
7605 	} else {
7606 		(*state)->timeout = PFTM_UDP_SINGLE;
7607 	}
7608 
7609 	extfilter = sk->proto_variant;
7610 	if (extfilter > PF_EXTFILTER_APD) {
7611 		if (direction == PF_OUT) {
7612 			sk->ext_lan.xport.port = key.ext_lan.xport.port;
7613 			if (extfilter > PF_EXTFILTER_AD) {
7614 				PF_ACPY(&sk->ext_lan.addr, &key.ext_lan.addr,
7615 				    key.af_lan);
7616 			}
7617 		} else {
7618 			sk->ext_gwy.xport.port = key.ext_gwy.xport.port;
7619 			if (extfilter > PF_EXTFILTER_AD) {
7620 				PF_ACPY(&sk->ext_gwy.addr, &key.ext_gwy.addr,
7621 				    key.af_gwy);
7622 			}
7623 		}
7624 	}
7625 
7626 	if (sk->app_state && sk->app_state->handler) {
7627 		sk->app_state->handler(*state, direction, off + uh->uh_ulen,
7628 		    pd, kif);
7629 		if (pd->lmw < 0) {
7630 			REASON_SET(reason, PFRES_MEMORY);
7631 			return PF_DROP;
7632 		}
7633 		pbuf = pd->mp;  // XXXSCW: Why?
7634 	}
7635 
7636 	/* translate source/destination address, if necessary */
7637 	if (STATE_TRANSLATE(sk)) {
7638 		if (pf_lazy_makewritable(pd, pbuf, off + sizeof(*uh)) == NULL) {
7639 			REASON_SET(reason, PFRES_MEMORY);
7640 			return PF_DROP;
7641 		}
7642 
7643 		pd->naf = (pd->af == sk->af_lan) ? sk->af_gwy : sk->af_lan;
7644 
7645 		if (direction == PF_OUT) {
7646 			pf_change_ap(direction, pd->mp, pd->src, &uh->uh_sport,
7647 			    pd->ip_sum, &uh->uh_sum, &sk->gwy.addr,
7648 			    sk->gwy.xport.port, 1, pd->af, pd->naf, 1);
7649 		} else {
7650 			if (pd->af != pd->naf) {
7651 				if (pd->af == sk->af_gwy) {
7652 					pf_change_ap(direction, pd->mp, pd->dst,
7653 					    &uh->uh_dport, pd->ip_sum,
7654 					    &uh->uh_sum, &sk->lan.addr,
7655 					    sk->lan.xport.port, 1,
7656 					    pd->af, pd->naf, 0);
7657 
7658 					pf_change_ap(direction, pd->mp, pd->src,
7659 					    &uh->uh_sport, pd->ip_sum,
7660 					    &uh->uh_sum, &sk->ext_lan.addr,
7661 					    uh->uh_sport, 1, pd->af,
7662 					    pd->naf, 0);
7663 				} else {
7664 					pf_change_ap(direction, pd->mp, pd->dst,
7665 					    &uh->uh_dport, pd->ip_sum,
7666 					    &uh->uh_sum, &sk->ext_gwy.addr,
7667 					    uh->uh_dport, 1, pd->af,
7668 					    pd->naf, 0);
7669 
7670 					pf_change_ap(direction, pd->mp, pd->src,
7671 					    &uh->uh_sport, pd->ip_sum,
7672 					    &uh->uh_sum, &sk->gwy.addr,
7673 					    sk->gwy.xport.port, 1, pd->af,
7674 					    pd->naf, 0);
7675 				}
7676 			} else {
7677 				pf_change_ap(direction, pd->mp, pd->dst,
7678 				    &uh->uh_dport, pd->ip_sum,
7679 				    &uh->uh_sum, &sk->lan.addr,
7680 				    sk->lan.xport.port, 1,
7681 				    pd->af, pd->naf, 1);
7682 			}
7683 		}
7684 
7685 		pbuf_copy_back(pbuf, off, sizeof(*uh), uh);
7686 		if (sk->af_lan != sk->af_gwy) {
7687 			return pf_do_nat64(sk, pd, pbuf, off);
7688 		}
7689 	}
7690 	return PF_PASS;
7691 }
7692 
7693 static __attribute__((noinline)) int
pf_test_state_icmp(struct pf_state ** state,int direction,struct pfi_kif * kif,pbuf_t * pbuf,int off,void * h,struct pf_pdesc * pd,u_short * reason)7694 pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
7695     pbuf_t *pbuf, int off, void *h, struct pf_pdesc *pd, u_short *reason)
7696 {
7697 #pragma unused(h)
7698 	struct pf_addr  *saddr = pd->src, *daddr = pd->dst;
7699 	struct in_addr  srcv4_inaddr = saddr->v4addr;
7700 	u_int16_t        icmpid = 0, *icmpsum = NULL;
7701 	u_int8_t         icmptype = 0;
7702 	int              state_icmp = 0;
7703 	struct pf_state_key_cmp key;
7704 	struct pf_state_key     *sk;
7705 
7706 	struct pf_app_state as;
7707 	key.app_state = 0;
7708 
7709 	pd->off = off;
7710 
7711 	switch (pd->proto) {
7712 #if INET
7713 	case IPPROTO_ICMP:
7714 		icmptype = pd->hdr.icmp->icmp_type;
7715 		icmpid = pd->hdr.icmp->icmp_id;
7716 		icmpsum = &pd->hdr.icmp->icmp_cksum;
7717 
7718 		if (ICMP_ERRORTYPE(icmptype)) {
7719 			state_icmp++;
7720 		}
7721 		break;
7722 #endif /* INET */
7723 	case IPPROTO_ICMPV6:
7724 		icmptype = pd->hdr.icmp6->icmp6_type;
7725 		icmpid = pd->hdr.icmp6->icmp6_id;
7726 		icmpsum = &pd->hdr.icmp6->icmp6_cksum;
7727 
7728 		if (ICMP6_ERRORTYPE(icmptype)) {
7729 			state_icmp++;
7730 		}
7731 		break;
7732 	}
7733 
7734 	if (!state_icmp) {
7735 		/*
7736 		 * ICMP query/reply message not related to a TCP/UDP packet.
7737 		 * Search for an ICMP state.
7738 		 */
7739 		/*
7740 		 * NAT64 requires protocol translation  between ICMPv4
7741 		 * and ICMPv6. TCP and UDP do not require protocol
7742 		 * translation. To avoid adding complexity just to
7743 		 * handle ICMP(v4addr/v6addr), we always lookup  for
7744 		 * proto = IPPROTO_ICMP on both LAN and WAN side
7745 		 */
7746 		key.proto = IPPROTO_ICMP;
7747 		key.af_lan = key.af_gwy = pd->af;
7748 
7749 		PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy);
7750 		PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy);
7751 		key.ext_gwy.xport.port = 0;
7752 		key.gwy.xport.port = icmpid;
7753 
7754 		PF_ACPY(&key.lan.addr, pd->src, key.af_lan);
7755 		PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan);
7756 		key.lan.xport.port = icmpid;
7757 		key.ext_lan.xport.port = 0;
7758 
7759 		STATE_LOOKUP();
7760 
7761 		sk = (*state)->state_key;
7762 		(*state)->expire = pf_time_second();
7763 		(*state)->timeout = PFTM_ICMP_ERROR_REPLY;
7764 
7765 		/* translate source/destination address, if necessary */
7766 		if (STATE_TRANSLATE(sk)) {
7767 			pd->naf = (pd->af == sk->af_lan) ?
7768 			    sk->af_gwy : sk->af_lan;
7769 			if (direction == PF_OUT) {
7770 				switch (pd->af) {
7771 #if INET
7772 				case AF_INET:
7773 					pf_change_a(&saddr->v4addr.s_addr,
7774 					    pd->ip_sum,
7775 					    sk->gwy.addr.v4addr.s_addr, 0);
7776 					pd->hdr.icmp->icmp_cksum =
7777 					    pf_cksum_fixup(
7778 						pd->hdr.icmp->icmp_cksum, icmpid,
7779 						sk->gwy.xport.port, 0);
7780 					pd->hdr.icmp->icmp_id =
7781 					    sk->gwy.xport.port;
7782 					if (pf_lazy_makewritable(pd, pbuf,
7783 					    off + ICMP_MINLEN) == NULL) {
7784 						return PF_DROP;
7785 					}
7786 					pbuf_copy_back(pbuf, off, ICMP_MINLEN,
7787 					    pd->hdr.icmp);
7788 					break;
7789 #endif /* INET */
7790 				case AF_INET6:
7791 					pf_change_a6(saddr,
7792 					    &pd->hdr.icmp6->icmp6_cksum,
7793 					    &sk->gwy.addr, 0);
7794 					if (pf_lazy_makewritable(pd, pbuf,
7795 					    off + sizeof(struct icmp6_hdr)) ==
7796 					    NULL) {
7797 						return PF_DROP;
7798 					}
7799 					pbuf_copy_back(pbuf, off,
7800 					    sizeof(struct icmp6_hdr),
7801 					    pd->hdr.icmp6);
7802 					break;
7803 				}
7804 			} else {
7805 				switch (pd->af) {
7806 #if INET
7807 				case AF_INET:
7808 					if (pd->naf != AF_INET) {
7809 						if (pf_translate_icmp_af(
7810 							    AF_INET6, pd->hdr.icmp)) {
7811 							return PF_DROP;
7812 						}
7813 
7814 						pd->proto = IPPROTO_ICMPV6;
7815 					} else {
7816 						pf_change_a(&daddr->v4addr.s_addr,
7817 						    pd->ip_sum,
7818 						    sk->lan.addr.v4addr.s_addr, 0);
7819 
7820 						pd->hdr.icmp->icmp_cksum =
7821 						    pf_cksum_fixup(
7822 							pd->hdr.icmp->icmp_cksum,
7823 							icmpid, sk->lan.xport.port, 0);
7824 
7825 						pd->hdr.icmp->icmp_id =
7826 						    sk->lan.xport.port;
7827 					}
7828 
7829 					if (pf_lazy_makewritable(pd, pbuf,
7830 					    off + ICMP_MINLEN) == NULL) {
7831 						return PF_DROP;
7832 					}
7833 					pbuf_copy_back(pbuf, off, ICMP_MINLEN,
7834 					    pd->hdr.icmp);
7835 					if (sk->af_lan != sk->af_gwy) {
7836 						return pf_do_nat64(sk, pd,
7837 						           pbuf, off);
7838 					}
7839 					break;
7840 #endif /* INET */
7841 				case AF_INET6:
7842 					if (pd->naf != AF_INET6) {
7843 						if (pf_translate_icmp_af(
7844 							    AF_INET, pd->hdr.icmp6)) {
7845 							return PF_DROP;
7846 						}
7847 
7848 						pd->proto = IPPROTO_ICMP;
7849 					} else {
7850 						pf_change_a6(daddr,
7851 						    &pd->hdr.icmp6->icmp6_cksum,
7852 						    &sk->lan.addr, 0);
7853 					}
7854 					if (pf_lazy_makewritable(pd, pbuf,
7855 					    off + sizeof(struct icmp6_hdr)) ==
7856 					    NULL) {
7857 						return PF_DROP;
7858 					}
7859 					pbuf_copy_back(pbuf, off,
7860 					    sizeof(struct icmp6_hdr),
7861 					    pd->hdr.icmp6);
7862 					if (sk->af_lan != sk->af_gwy) {
7863 						return pf_do_nat64(sk, pd,
7864 						           pbuf, off);
7865 					}
7866 					break;
7867 				}
7868 			}
7869 		}
7870 
7871 		return PF_PASS;
7872 	} else {
7873 		/*
7874 		 * ICMP error message in response to a TCP/UDP packet.
7875 		 * Extract the inner TCP/UDP header and search for that state.
7876 		 */
7877 		struct pf_pdesc pd2; /* For inner (original) header */
7878 #if INET
7879 		struct ip       h2;
7880 #endif /* INET */
7881 		struct ip6_hdr  h2_6;
7882 		int             terminal = 0;
7883 		int             ipoff2 = 0;
7884 		int             off2 = 0;
7885 
7886 		memset(&pd2, 0, sizeof(pd2));
7887 
7888 		pd2.af = pd->af;
7889 		switch (pd->af) {
7890 #if INET
7891 		case AF_INET:
7892 			/* offset of h2 in mbuf chain */
7893 			ipoff2 = off + ICMP_MINLEN;
7894 
7895 			if (!pf_pull_hdr(pbuf, ipoff2, &h2, sizeof(h2),
7896 			    NULL, reason, pd2.af)) {
7897 				DPFPRINTF(PF_DEBUG_MISC,
7898 				    ("pf: ICMP error message too short "
7899 				    "(ip)\n"));
7900 				return PF_DROP;
7901 			}
7902 			/*
7903 			 * ICMP error messages don't refer to non-first
7904 			 * fragments
7905 			 */
7906 			if (h2.ip_off & htons(IP_OFFMASK)) {
7907 				REASON_SET(reason, PFRES_FRAG);
7908 				return PF_DROP;
7909 			}
7910 
7911 			/* offset of protocol header that follows h2 */
7912 			off2 = ipoff2 + (h2.ip_hl << 2);
7913 			/* TODO */
7914 			pd2.off = ipoff2 + (h2.ip_hl << 2);
7915 
7916 			pd2.proto = h2.ip_p;
7917 			pd2.src = (struct pf_addr *)&h2.ip_src;
7918 			pd2.dst = (struct pf_addr *)&h2.ip_dst;
7919 			pd2.ip_sum = &h2.ip_sum;
7920 			break;
7921 #endif /* INET */
7922 		case AF_INET6:
7923 			ipoff2 = off + sizeof(struct icmp6_hdr);
7924 
7925 			if (!pf_pull_hdr(pbuf, ipoff2, &h2_6, sizeof(h2_6),
7926 			    NULL, reason, pd2.af)) {
7927 				DPFPRINTF(PF_DEBUG_MISC,
7928 				    ("pf: ICMP error message too short "
7929 				    "(ip6)\n"));
7930 				return PF_DROP;
7931 			}
7932 			pd2.proto = h2_6.ip6_nxt;
7933 			pd2.src = (struct pf_addr *)(uintptr_t)&h2_6.ip6_src;
7934 			pd2.dst = (struct pf_addr *)(uintptr_t)&h2_6.ip6_dst;
7935 			pd2.ip_sum = NULL;
7936 			off2 = ipoff2 + sizeof(h2_6);
7937 			do {
7938 				switch (pd2.proto) {
7939 				case IPPROTO_FRAGMENT:
7940 					/*
7941 					 * ICMPv6 error messages for
7942 					 * non-first fragments
7943 					 */
7944 					REASON_SET(reason, PFRES_FRAG);
7945 					return PF_DROP;
7946 				case IPPROTO_AH:
7947 				case IPPROTO_HOPOPTS:
7948 				case IPPROTO_ROUTING:
7949 				case IPPROTO_DSTOPTS: {
7950 					/* get next header and header length */
7951 					struct ip6_ext opt6;
7952 
7953 					if (!pf_pull_hdr(pbuf, off2, &opt6,
7954 					    sizeof(opt6), NULL, reason,
7955 					    pd2.af)) {
7956 						DPFPRINTF(PF_DEBUG_MISC,
7957 						    ("pf: ICMPv6 short opt\n"));
7958 						return PF_DROP;
7959 					}
7960 					if (pd2.proto == IPPROTO_AH) {
7961 						off2 += (opt6.ip6e_len + 2) * 4;
7962 					} else {
7963 						off2 += (opt6.ip6e_len + 1) * 8;
7964 					}
7965 					pd2.proto = opt6.ip6e_nxt;
7966 					/* goto the next header */
7967 					break;
7968 				}
7969 				default:
7970 					terminal++;
7971 					break;
7972 				}
7973 			} while (!terminal);
7974 			/* TODO */
7975 			pd2.off = ipoff2;
7976 			break;
7977 		}
7978 
7979 		switch (pd2.proto) {
7980 		case IPPROTO_TCP: {
7981 			struct tcphdr            th;
7982 			u_int32_t                seq;
7983 			struct pf_state_peer    *src, *dst;
7984 			u_int8_t                 dws;
7985 			int                      copyback = 0;
7986 
7987 			/*
7988 			 * Only the first 8 bytes of the TCP header can be
7989 			 * expected. Don't access any TCP header fields after
7990 			 * th_seq, an ackskew test is not possible.
7991 			 */
7992 			if (!pf_pull_hdr(pbuf, off2, &th, 8, NULL, reason,
7993 			    pd2.af)) {
7994 				DPFPRINTF(PF_DEBUG_MISC,
7995 				    ("pf: ICMP error message too short "
7996 				    "(tcp)\n"));
7997 				return PF_DROP;
7998 			}
7999 
8000 			key.proto = IPPROTO_TCP;
8001 			key.af_gwy = pd2.af;
8002 			PF_ACPY(&key.ext_gwy.addr, pd2.dst, key.af_gwy);
8003 			PF_ACPY(&key.gwy.addr, pd2.src, key.af_gwy);
8004 			key.ext_gwy.xport.port = th.th_dport;
8005 			key.gwy.xport.port = th.th_sport;
8006 
8007 			key.af_lan = pd2.af;
8008 			PF_ACPY(&key.lan.addr, pd2.dst, key.af_lan);
8009 			PF_ACPY(&key.ext_lan.addr, pd2.src, key.af_lan);
8010 			key.lan.xport.port = th.th_dport;
8011 			key.ext_lan.xport.port = th.th_sport;
8012 
8013 			STATE_LOOKUP();
8014 
8015 			sk = (*state)->state_key;
8016 			if ((direction == sk->direction) &&
8017 			    ((sk->af_lan == sk->af_gwy) ||
8018 			    (pd2.af == sk->af_lan))) {
8019 				src = &(*state)->dst;
8020 				dst = &(*state)->src;
8021 			} else {
8022 				src = &(*state)->src;
8023 				dst = &(*state)->dst;
8024 			}
8025 
8026 			if (src->wscale && (dst->wscale & PF_WSCALE_FLAG)) {
8027 				dws = dst->wscale & PF_WSCALE_MASK;
8028 			} else {
8029 				dws = TCP_MAX_WINSHIFT;
8030 			}
8031 
8032 			/* Demodulate sequence number */
8033 			seq = ntohl(th.th_seq) - src->seqdiff;
8034 			if (src->seqdiff) {
8035 				pf_change_a(&th.th_seq, icmpsum,
8036 				    htonl(seq), 0);
8037 				copyback = 1;
8038 			}
8039 
8040 			if (!SEQ_GEQ(src->seqhi, seq) ||
8041 			    !SEQ_GEQ(seq,
8042 			    src->seqlo - ((u_int32_t)dst->max_win << dws))) {
8043 				if (pf_status.debug >= PF_DEBUG_MISC) {
8044 					printf("pf: BAD ICMP %d:%d ",
8045 					    icmptype, pd->hdr.icmp->icmp_code);
8046 					pf_print_host(pd->src, 0, pd->af);
8047 					printf(" -> ");
8048 					pf_print_host(pd->dst, 0, pd->af);
8049 					printf(" state: ");
8050 					pf_print_state(*state);
8051 					printf(" seq=%u\n", seq);
8052 				}
8053 				REASON_SET(reason, PFRES_BADSTATE);
8054 				return PF_DROP;
8055 			}
8056 
8057 			pd->naf = pd2.naf = (pd2.af == sk->af_lan) ?
8058 			    sk->af_gwy : sk->af_lan;
8059 
8060 			if (STATE_TRANSLATE(sk)) {
8061 				/* NAT64 case */
8062 				if (sk->af_lan != sk->af_gwy) {
8063 					struct pf_state_host *saddr2, *daddr2;
8064 
8065 					if (pd2.naf == sk->af_lan) {
8066 						saddr2 = &sk->lan;
8067 						daddr2 = &sk->ext_lan;
8068 					} else {
8069 						saddr2 = &sk->ext_gwy;
8070 						daddr2 = &sk->gwy;
8071 					}
8072 
8073 					/* translate ICMP message types and codes */
8074 					if (pf_translate_icmp_af(pd->naf,
8075 					    pd->hdr.icmp)) {
8076 						return PF_DROP;
8077 					}
8078 
8079 					if (pf_lazy_makewritable(pd, pbuf,
8080 					    off2 + 8) == NULL) {
8081 						return PF_DROP;
8082 					}
8083 
8084 					pbuf_copy_back(pbuf, pd->off,
8085 					    sizeof(struct icmp6_hdr),
8086 					    pd->hdr.icmp6);
8087 
8088 					/*
8089 					 * translate inner ip header within the
8090 					 * ICMP message
8091 					 */
8092 					if (pf_change_icmp_af(pbuf, ipoff2, pd,
8093 					    &pd2, &saddr2->addr, &daddr2->addr,
8094 					    pd->af, pd->naf)) {
8095 						return PF_DROP;
8096 					}
8097 
8098 					if (pd->naf == AF_INET) {
8099 						pd->proto = IPPROTO_ICMP;
8100 					} else {
8101 						pd->proto = IPPROTO_ICMPV6;
8102 					}
8103 
8104 					/*
8105 					 * translate inner tcp header within
8106 					 * the ICMP message
8107 					 */
8108 					pf_change_ap(direction, NULL, pd2.src,
8109 					    &th.th_sport, pd2.ip_sum,
8110 					    &th.th_sum, &daddr2->addr,
8111 					    saddr2->xport.port, 0, pd2.af,
8112 					    pd2.naf, 0);
8113 
8114 					pf_change_ap(direction, NULL, pd2.dst,
8115 					    &th.th_dport, pd2.ip_sum,
8116 					    &th.th_sum, &saddr2->addr,
8117 					    daddr2->xport.port, 0, pd2.af,
8118 					    pd2.naf, 0);
8119 
8120 					pbuf_copy_back(pbuf, pd2.off, 8, &th);
8121 
8122 					/* translate outer ip header */
8123 					PF_ACPY(&pd->naddr, &daddr2->addr,
8124 					    pd->naf);
8125 					PF_ACPY(&pd->ndaddr, &saddr2->addr,
8126 					    pd->naf);
8127 					if (pd->af == AF_INET) {
8128 						memcpy(&pd->naddr.addr32[3],
8129 						    &srcv4_inaddr,
8130 						    sizeof(pd->naddr.addr32[3]));
8131 						return pf_nat64_ipv4(pbuf, off,
8132 						           pd);
8133 					} else {
8134 						return pf_nat64_ipv6(pbuf, off,
8135 						           pd);
8136 					}
8137 				}
8138 				if (direction == PF_IN) {
8139 					pf_change_icmp(pd2.src, &th.th_sport,
8140 					    daddr, &sk->lan.addr,
8141 					    sk->lan.xport.port, NULL,
8142 					    pd2.ip_sum, icmpsum,
8143 					    pd->ip_sum, 0, pd2.af);
8144 				} else {
8145 					pf_change_icmp(pd2.dst, &th.th_dport,
8146 					    saddr, &sk->gwy.addr,
8147 					    sk->gwy.xport.port, NULL,
8148 					    pd2.ip_sum, icmpsum,
8149 					    pd->ip_sum, 0, pd2.af);
8150 				}
8151 				copyback = 1;
8152 			}
8153 
8154 			if (copyback) {
8155 				if (pf_lazy_makewritable(pd, pbuf, off2 + 8) ==
8156 				    NULL) {
8157 					return PF_DROP;
8158 				}
8159 				switch (pd2.af) {
8160 #if INET
8161 				case AF_INET:
8162 					pbuf_copy_back(pbuf, off, ICMP_MINLEN,
8163 					    pd->hdr.icmp);
8164 					pbuf_copy_back(pbuf, ipoff2, sizeof(h2),
8165 					    &h2);
8166 					break;
8167 #endif /* INET */
8168 				case AF_INET6:
8169 					pbuf_copy_back(pbuf, off,
8170 					    sizeof(struct icmp6_hdr),
8171 					    pd->hdr.icmp6);
8172 					pbuf_copy_back(pbuf, ipoff2,
8173 					    sizeof(h2_6), &h2_6);
8174 					break;
8175 				}
8176 				pbuf_copy_back(pbuf, off2, 8, &th);
8177 			}
8178 
8179 			return PF_PASS;
8180 		}
8181 		case IPPROTO_UDP: {
8182 			struct udphdr uh;
8183 			int dx, action;
8184 			if (!pf_pull_hdr(pbuf, off2, &uh, sizeof(uh),
8185 			    NULL, reason, pd2.af)) {
8186 				DPFPRINTF(PF_DEBUG_MISC,
8187 				    ("pf: ICMP error message too short "
8188 				    "(udp)\n"));
8189 				return PF_DROP;
8190 			}
8191 
8192 			key.af_gwy = pd2.af;
8193 			PF_ACPY(&key.ext_gwy.addr, pd2.dst, key.af_gwy);
8194 			PF_ACPY(&key.gwy.addr, pd2.src, key.af_gwy);
8195 			key.ext_gwy.xport.port = uh.uh_dport;
8196 			key.gwy.xport.port = uh.uh_sport;
8197 
8198 			key.af_lan = pd2.af;
8199 			PF_ACPY(&key.lan.addr, pd2.dst, key.af_lan);
8200 			PF_ACPY(&key.ext_lan.addr, pd2.src, key.af_lan);
8201 			key.lan.xport.port = uh.uh_dport;
8202 			key.ext_lan.xport.port = uh.uh_sport;
8203 
8204 			key.proto = IPPROTO_UDP;
8205 			key.proto_variant = PF_EXTFILTER_APD;
8206 			dx = direction;
8207 
8208 			if (ntohs(uh.uh_sport) == PF_IKE_PORT &&
8209 			    ntohs(uh.uh_dport) == PF_IKE_PORT) {
8210 				struct pf_ike_hdr ike;
8211 				size_t plen = pbuf->pb_packet_len - off2 -
8212 				    sizeof(uh);
8213 				if (direction == PF_IN &&
8214 				    plen < 8 /* PF_IKE_PACKET_MINSIZE */) {
8215 					DPFPRINTF(PF_DEBUG_MISC, ("pf: "
8216 					    "ICMP error, embedded IKE message "
8217 					    "too small.\n"));
8218 					return PF_DROP;
8219 				}
8220 
8221 				if (plen > sizeof(ike)) {
8222 					plen = sizeof(ike);
8223 				}
8224 				pbuf_copy_data(pbuf, off + sizeof(uh), plen,
8225 				    &ike);
8226 
8227 				key.app_state = &as;
8228 				as.compare_lan_ext = pf_ike_compare;
8229 				as.compare_ext_gwy = pf_ike_compare;
8230 				as.u.ike.cookie = ike.initiator_cookie;
8231 			}
8232 
8233 			*state = pf_find_state(kif, &key, dx);
8234 
8235 			if (key.app_state && *state == 0) {
8236 				key.app_state = 0;
8237 				*state = pf_find_state(kif, &key, dx);
8238 			}
8239 
8240 			if (*state == 0) {
8241 				key.proto_variant = PF_EXTFILTER_AD;
8242 				*state = pf_find_state(kif, &key, dx);
8243 			}
8244 
8245 			if (*state == 0) {
8246 				key.proto_variant = PF_EXTFILTER_EI;
8247 				*state = pf_find_state(kif, &key, dx);
8248 			}
8249 
8250 			/* similar to STATE_LOOKUP() */
8251 			if (*state != NULL && pd != NULL &&
8252 			    !(pd->pktflags & PKTF_FLOW_ID)) {
8253 				pd->flowsrc = (*state)->state_key->flowsrc;
8254 				pd->flowhash = (*state)->state_key->flowhash;
8255 				if (pd->flowhash != 0) {
8256 					pd->pktflags |= PKTF_FLOW_ID;
8257 					pd->pktflags &= ~PKTF_FLOW_ADV;
8258 				}
8259 			}
8260 
8261 			if (pf_state_lookup_aux(state, kif, direction, &action)) {
8262 				return action;
8263 			}
8264 
8265 			sk = (*state)->state_key;
8266 			pd->naf = pd2.naf = (pd2.af == sk->af_lan) ?
8267 			    sk->af_gwy : sk->af_lan;
8268 
8269 			if (STATE_TRANSLATE(sk)) {
8270 				/* NAT64 case */
8271 				if (sk->af_lan != sk->af_gwy) {
8272 					struct pf_state_host *saddr2, *daddr2;
8273 
8274 					if (pd2.naf == sk->af_lan) {
8275 						saddr2 = &sk->lan;
8276 						daddr2 = &sk->ext_lan;
8277 					} else {
8278 						saddr2 = &sk->ext_gwy;
8279 						daddr2 = &sk->gwy;
8280 					}
8281 
8282 					/* translate ICMP message */
8283 					if (pf_translate_icmp_af(pd->naf,
8284 					    pd->hdr.icmp)) {
8285 						return PF_DROP;
8286 					}
8287 					if (pf_lazy_makewritable(pd, pbuf,
8288 					    off2 + 8) == NULL) {
8289 						return PF_DROP;
8290 					}
8291 
8292 					pbuf_copy_back(pbuf, pd->off,
8293 					    sizeof(struct icmp6_hdr),
8294 					    pd->hdr.icmp6);
8295 
8296 					/*
8297 					 * translate inner ip header within the
8298 					 * ICMP message
8299 					 */
8300 					if (pf_change_icmp_af(pbuf, ipoff2, pd,
8301 					    &pd2, &saddr2->addr, &daddr2->addr,
8302 					    pd->af, pd->naf)) {
8303 						return PF_DROP;
8304 					}
8305 
8306 					if (pd->naf == AF_INET) {
8307 						pd->proto = IPPROTO_ICMP;
8308 					} else {
8309 						pd->proto = IPPROTO_ICMPV6;
8310 					}
8311 
8312 					/*
8313 					 * translate inner udp header within
8314 					 * the ICMP message
8315 					 */
8316 					pf_change_ap(direction, NULL, pd2.src,
8317 					    &uh.uh_sport, pd2.ip_sum,
8318 					    &uh.uh_sum, &daddr2->addr,
8319 					    saddr2->xport.port, 0, pd2.af,
8320 					    pd2.naf, 0);
8321 
8322 					pf_change_ap(direction, NULL, pd2.dst,
8323 					    &uh.uh_dport, pd2.ip_sum,
8324 					    &uh.uh_sum, &saddr2->addr,
8325 					    daddr2->xport.port, 0, pd2.af,
8326 					    pd2.naf, 0);
8327 
8328 					pbuf_copy_back(pbuf, pd2.off,
8329 					    sizeof(uh), &uh);
8330 
8331 					/* translate outer ip header */
8332 					PF_ACPY(&pd->naddr, &daddr2->addr,
8333 					    pd->naf);
8334 					PF_ACPY(&pd->ndaddr, &saddr2->addr,
8335 					    pd->naf);
8336 					if (pd->af == AF_INET) {
8337 						memcpy(&pd->naddr.addr32[3],
8338 						    &srcv4_inaddr,
8339 						    sizeof(pd->naddr.addr32[3]));
8340 						return pf_nat64_ipv4(pbuf, off,
8341 						           pd);
8342 					} else {
8343 						return pf_nat64_ipv6(pbuf, off,
8344 						           pd);
8345 					}
8346 				}
8347 				if (direction == PF_IN) {
8348 					pf_change_icmp(pd2.src, &uh.uh_sport,
8349 					    daddr, &sk->lan.addr,
8350 					    sk->lan.xport.port, &uh.uh_sum,
8351 					    pd2.ip_sum, icmpsum,
8352 					    pd->ip_sum, 1, pd2.af);
8353 				} else {
8354 					pf_change_icmp(pd2.dst, &uh.uh_dport,
8355 					    saddr, &sk->gwy.addr,
8356 					    sk->gwy.xport.port, &uh.uh_sum,
8357 					    pd2.ip_sum, icmpsum,
8358 					    pd->ip_sum, 1, pd2.af);
8359 				}
8360 				if (pf_lazy_makewritable(pd, pbuf,
8361 				    off2 + sizeof(uh)) == NULL) {
8362 					return PF_DROP;
8363 				}
8364 				switch (pd2.af) {
8365 #if INET
8366 				case AF_INET:
8367 					pbuf_copy_back(pbuf, off, ICMP_MINLEN,
8368 					    pd->hdr.icmp);
8369 					pbuf_copy_back(pbuf, ipoff2,
8370 					    sizeof(h2), &h2);
8371 					break;
8372 #endif /* INET */
8373 				case AF_INET6:
8374 					pbuf_copy_back(pbuf, off,
8375 					    sizeof(struct icmp6_hdr),
8376 					    pd->hdr.icmp6);
8377 					pbuf_copy_back(pbuf, ipoff2,
8378 					    sizeof(h2_6), &h2_6);
8379 					break;
8380 				}
8381 				pbuf_copy_back(pbuf, off2, sizeof(uh), &uh);
8382 			}
8383 
8384 			return PF_PASS;
8385 		}
8386 #if INET
8387 		case IPPROTO_ICMP: {
8388 			struct icmp             iih;
8389 
8390 			if (!pf_pull_hdr(pbuf, off2, &iih, ICMP_MINLEN,
8391 			    NULL, reason, pd2.af)) {
8392 				DPFPRINTF(PF_DEBUG_MISC,
8393 				    ("pf: ICMP error message too short i"
8394 				    "(icmp)\n"));
8395 				return PF_DROP;
8396 			}
8397 
8398 			key.proto = IPPROTO_ICMP;
8399 			if (direction == PF_IN) {
8400 				key.af_gwy = pd2.af;
8401 				PF_ACPY(&key.ext_gwy.addr, pd2.dst, key.af_gwy);
8402 				PF_ACPY(&key.gwy.addr, pd2.src, key.af_gwy);
8403 				key.ext_gwy.xport.port = 0;
8404 				key.gwy.xport.port = iih.icmp_id;
8405 			} else {
8406 				key.af_lan = pd2.af;
8407 				PF_ACPY(&key.lan.addr, pd2.dst, key.af_lan);
8408 				PF_ACPY(&key.ext_lan.addr, pd2.src, key.af_lan);
8409 				key.lan.xport.port = iih.icmp_id;
8410 				key.ext_lan.xport.port = 0;
8411 			}
8412 
8413 			STATE_LOOKUP();
8414 
8415 			sk = (*state)->state_key;
8416 			if (STATE_TRANSLATE(sk)) {
8417 				if (direction == PF_IN) {
8418 					pf_change_icmp(pd2.src, &iih.icmp_id,
8419 					    daddr, &sk->lan.addr,
8420 					    sk->lan.xport.port, NULL,
8421 					    pd2.ip_sum, icmpsum,
8422 					    pd->ip_sum, 0, AF_INET);
8423 				} else {
8424 					pf_change_icmp(pd2.dst, &iih.icmp_id,
8425 					    saddr, &sk->gwy.addr,
8426 					    sk->gwy.xport.port, NULL,
8427 					    pd2.ip_sum, icmpsum,
8428 					    pd->ip_sum, 0, AF_INET);
8429 				}
8430 				if (pf_lazy_makewritable(pd, pbuf,
8431 				    off2 + ICMP_MINLEN) == NULL) {
8432 					return PF_DROP;
8433 				}
8434 				pbuf_copy_back(pbuf, off, ICMP_MINLEN,
8435 				    pd->hdr.icmp);
8436 				pbuf_copy_back(pbuf, ipoff2, sizeof(h2), &h2);
8437 				pbuf_copy_back(pbuf, off2, ICMP_MINLEN, &iih);
8438 			}
8439 
8440 			return PF_PASS;
8441 		}
8442 #endif /* INET */
8443 		case IPPROTO_ICMPV6: {
8444 			struct icmp6_hdr        iih;
8445 
8446 			if (!pf_pull_hdr(pbuf, off2, &iih,
8447 			    sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) {
8448 				DPFPRINTF(PF_DEBUG_MISC,
8449 				    ("pf: ICMP error message too short "
8450 				    "(icmp6)\n"));
8451 				return PF_DROP;
8452 			}
8453 
8454 			key.proto = IPPROTO_ICMPV6;
8455 			if (direction == PF_IN) {
8456 				key.af_gwy = pd2.af;
8457 				PF_ACPY(&key.ext_gwy.addr, pd2.dst, key.af_gwy);
8458 				PF_ACPY(&key.gwy.addr, pd2.src, key.af_gwy);
8459 				key.ext_gwy.xport.port = 0;
8460 				key.gwy.xport.port = iih.icmp6_id;
8461 			} else {
8462 				key.af_lan = pd2.af;
8463 				PF_ACPY(&key.lan.addr, pd2.dst, key.af_lan);
8464 				PF_ACPY(&key.ext_lan.addr, pd2.src, key.af_lan);
8465 				key.lan.xport.port = iih.icmp6_id;
8466 				key.ext_lan.xport.port = 0;
8467 			}
8468 
8469 			STATE_LOOKUP();
8470 
8471 			sk = (*state)->state_key;
8472 			if (STATE_TRANSLATE(sk)) {
8473 				if (direction == PF_IN) {
8474 					pf_change_icmp(pd2.src, &iih.icmp6_id,
8475 					    daddr, &sk->lan.addr,
8476 					    sk->lan.xport.port, NULL,
8477 					    pd2.ip_sum, icmpsum,
8478 					    pd->ip_sum, 0, AF_INET6);
8479 				} else {
8480 					pf_change_icmp(pd2.dst, &iih.icmp6_id,
8481 					    saddr, &sk->gwy.addr,
8482 					    sk->gwy.xport.port, NULL,
8483 					    pd2.ip_sum, icmpsum,
8484 					    pd->ip_sum, 0, AF_INET6);
8485 				}
8486 				if (pf_lazy_makewritable(pd, pbuf, off2 +
8487 				    sizeof(struct icmp6_hdr)) == NULL) {
8488 					return PF_DROP;
8489 				}
8490 				pbuf_copy_back(pbuf, off,
8491 				    sizeof(struct icmp6_hdr), pd->hdr.icmp6);
8492 				pbuf_copy_back(pbuf, ipoff2, sizeof(h2_6),
8493 				    &h2_6);
8494 				pbuf_copy_back(pbuf, off2,
8495 				    sizeof(struct icmp6_hdr), &iih);
8496 			}
8497 
8498 			return PF_PASS;
8499 		}
8500 		default: {
8501 			key.proto = pd2.proto;
8502 			if (direction == PF_IN) {
8503 				key.af_gwy = pd2.af;
8504 				PF_ACPY(&key.ext_gwy.addr, pd2.dst, key.af_gwy);
8505 				PF_ACPY(&key.gwy.addr, pd2.src, key.af_gwy);
8506 				key.ext_gwy.xport.port = 0;
8507 				key.gwy.xport.port = 0;
8508 			} else {
8509 				key.af_lan = pd2.af;
8510 				PF_ACPY(&key.lan.addr, pd2.dst, key.af_lan);
8511 				PF_ACPY(&key.ext_lan.addr, pd2.src, key.af_lan);
8512 				key.lan.xport.port = 0;
8513 				key.ext_lan.xport.port = 0;
8514 			}
8515 
8516 			STATE_LOOKUP();
8517 
8518 			sk = (*state)->state_key;
8519 			if (STATE_TRANSLATE(sk)) {
8520 				if (direction == PF_IN) {
8521 					pf_change_icmp(pd2.src, NULL, daddr,
8522 					    &sk->lan.addr, 0, NULL,
8523 					    pd2.ip_sum, icmpsum,
8524 					    pd->ip_sum, 0, pd2.af);
8525 				} else {
8526 					pf_change_icmp(pd2.dst, NULL, saddr,
8527 					    &sk->gwy.addr, 0, NULL,
8528 					    pd2.ip_sum, icmpsum,
8529 					    pd->ip_sum, 0, pd2.af);
8530 				}
8531 				switch (pd2.af) {
8532 #if INET
8533 				case AF_INET:
8534 					if (pf_lazy_makewritable(pd, pbuf,
8535 					    ipoff2 + sizeof(h2)) == NULL) {
8536 						return PF_DROP;
8537 					}
8538 					/*
8539 					 * <XXXSCW>
8540 					 * Xnu was missing the following...
8541 					 */
8542 					pbuf_copy_back(pbuf, off, ICMP_MINLEN,
8543 					    pd->hdr.icmp);
8544 					pbuf_copy_back(pbuf, ipoff2,
8545 					    sizeof(h2), &h2);
8546 					break;
8547 					/*
8548 					 * </XXXSCW>
8549 					 */
8550 #endif /* INET */
8551 				case AF_INET6:
8552 					if (pf_lazy_makewritable(pd, pbuf,
8553 					    ipoff2 + sizeof(h2_6)) == NULL) {
8554 						return PF_DROP;
8555 					}
8556 					pbuf_copy_back(pbuf, off,
8557 					    sizeof(struct icmp6_hdr),
8558 					    pd->hdr.icmp6);
8559 					pbuf_copy_back(pbuf, ipoff2,
8560 					    sizeof(h2_6), &h2_6);
8561 					break;
8562 				}
8563 			}
8564 
8565 			return PF_PASS;
8566 		}
8567 		}
8568 	}
8569 }
8570 
8571 static __attribute__((noinline)) int
pf_test_state_grev1(struct pf_state ** state,int direction,struct pfi_kif * kif,int off,struct pf_pdesc * pd)8572 pf_test_state_grev1(struct pf_state **state, int direction,
8573     struct pfi_kif *kif, int off, struct pf_pdesc *pd)
8574 {
8575 	struct pf_state_peer *src;
8576 	struct pf_state_peer *dst;
8577 	struct pf_state_key_cmp key = {};
8578 	struct pf_grev1_hdr *grev1 = pd->hdr.grev1;
8579 
8580 	key.app_state = 0;
8581 	key.proto = IPPROTO_GRE;
8582 	key.proto_variant = PF_GRE_PPTP_VARIANT;
8583 	if (direction == PF_IN) {
8584 		key.af_gwy = pd->af;
8585 		PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy);
8586 		PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy);
8587 		key.gwy.xport.call_id = grev1->call_id;
8588 	} else {
8589 		key.af_lan = pd->af;
8590 		PF_ACPY(&key.lan.addr, pd->src, key.af_lan);
8591 		PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan);
8592 		key.ext_lan.xport.call_id = grev1->call_id;
8593 	}
8594 
8595 	STATE_LOOKUP();
8596 
8597 	if (direction == (*state)->state_key->direction) {
8598 		src = &(*state)->src;
8599 		dst = &(*state)->dst;
8600 	} else {
8601 		src = &(*state)->dst;
8602 		dst = &(*state)->src;
8603 	}
8604 
8605 	/* update states */
8606 	if (src->state < PFGRE1S_INITIATING) {
8607 		src->state = PFGRE1S_INITIATING;
8608 	}
8609 
8610 	/* update expire time */
8611 	(*state)->expire = pf_time_second();
8612 	if (src->state >= PFGRE1S_INITIATING &&
8613 	    dst->state >= PFGRE1S_INITIATING) {
8614 		if ((*state)->timeout != PFTM_TCP_ESTABLISHED) {
8615 			(*state)->timeout = PFTM_GREv1_ESTABLISHED;
8616 		}
8617 		src->state = PFGRE1S_ESTABLISHED;
8618 		dst->state = PFGRE1S_ESTABLISHED;
8619 	} else {
8620 		(*state)->timeout = PFTM_GREv1_INITIATING;
8621 	}
8622 
8623 	if ((*state)->state_key->app_state) {
8624 		(*state)->state_key->app_state->u.grev1.pptp_state->expire =
8625 		    pf_time_second();
8626 	}
8627 
8628 	/* translate source/destination address, if necessary */
8629 	if (STATE_GRE_TRANSLATE((*state)->state_key)) {
8630 		if (direction == PF_OUT) {
8631 			switch (pd->af) {
8632 #if INET
8633 			case AF_INET:
8634 				pf_change_a(&pd->src->v4addr.s_addr,
8635 				    pd->ip_sum,
8636 				    (*state)->state_key->gwy.addr.v4addr.s_addr, 0);
8637 				break;
8638 #endif /* INET */
8639 			case AF_INET6:
8640 				PF_ACPY(pd->src, &(*state)->state_key->gwy.addr,
8641 				    pd->af);
8642 				break;
8643 			}
8644 		} else {
8645 			grev1->call_id = (*state)->state_key->lan.xport.call_id;
8646 
8647 			switch (pd->af) {
8648 #if INET
8649 			case AF_INET:
8650 				pf_change_a(&pd->dst->v4addr.s_addr,
8651 				    pd->ip_sum,
8652 				    (*state)->state_key->lan.addr.v4addr.s_addr, 0);
8653 				break;
8654 #endif /* INET */
8655 			case AF_INET6:
8656 				PF_ACPY(pd->dst, &(*state)->state_key->lan.addr,
8657 				    pd->af);
8658 				break;
8659 			}
8660 		}
8661 
8662 		if (pf_lazy_makewritable(pd, pd->mp, off + sizeof(*grev1)) ==
8663 		    NULL) {
8664 			return PF_DROP;
8665 		}
8666 		pbuf_copy_back(pd->mp, off, sizeof(*grev1), grev1);
8667 	}
8668 
8669 	return PF_PASS;
8670 }
8671 
8672 static __attribute__((noinline)) int
pf_test_state_esp(struct pf_state ** state,int direction,struct pfi_kif * kif,int off,struct pf_pdesc * pd)8673 pf_test_state_esp(struct pf_state **state, int direction, struct pfi_kif *kif,
8674     int off, struct pf_pdesc *pd)
8675 {
8676 #pragma unused(off)
8677 	struct pf_state_peer *src;
8678 	struct pf_state_peer *dst;
8679 	struct pf_state_key_cmp key;
8680 	struct pf_esp_hdr *esp = pd->hdr.esp;
8681 	int action;
8682 
8683 	memset(&key, 0, sizeof(key));
8684 	key.proto = IPPROTO_ESP;
8685 	if (direction == PF_IN) {
8686 		key.af_gwy = pd->af;
8687 		PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy);
8688 		PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy);
8689 		key.gwy.xport.spi = esp->spi;
8690 	} else {
8691 		key.af_lan = pd->af;
8692 		PF_ACPY(&key.lan.addr, pd->src, key.af_lan);
8693 		PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan);
8694 		key.ext_lan.xport.spi = esp->spi;
8695 	}
8696 
8697 	*state = pf_find_state(kif, &key, direction);
8698 
8699 	if (*state == 0) {
8700 		struct pf_state *s;
8701 
8702 		/*
8703 		 * <[email protected]>
8704 		 * No matching state.  Look for a blocking state.  If we find
8705 		 * one, then use that state and move it so that it's keyed to
8706 		 * the SPI in the current packet.
8707 		 */
8708 		if (direction == PF_IN) {
8709 			key.gwy.xport.spi = 0;
8710 
8711 			s = pf_find_state(kif, &key, direction);
8712 			if (s) {
8713 				struct pf_state_key *sk = s->state_key;
8714 
8715 				RB_REMOVE(pf_state_tree_ext_gwy,
8716 				    &pf_statetbl_ext_gwy, sk);
8717 				sk->lan.xport.spi = sk->gwy.xport.spi =
8718 				    esp->spi;
8719 
8720 				if (RB_INSERT(pf_state_tree_ext_gwy,
8721 				    &pf_statetbl_ext_gwy, sk)) {
8722 					pf_detach_state(s, PF_DT_SKIP_EXTGWY);
8723 				} else {
8724 					*state = s;
8725 				}
8726 			}
8727 		} else {
8728 			key.ext_lan.xport.spi = 0;
8729 
8730 			s = pf_find_state(kif, &key, direction);
8731 			if (s) {
8732 				struct pf_state_key *sk = s->state_key;
8733 
8734 				RB_REMOVE(pf_state_tree_lan_ext,
8735 				    &pf_statetbl_lan_ext, sk);
8736 				sk->ext_lan.xport.spi = esp->spi;
8737 
8738 				if (RB_INSERT(pf_state_tree_lan_ext,
8739 				    &pf_statetbl_lan_ext, sk)) {
8740 					pf_detach_state(s, PF_DT_SKIP_LANEXT);
8741 				} else {
8742 					*state = s;
8743 				}
8744 			}
8745 		}
8746 
8747 		if (s) {
8748 			if (*state == 0) {
8749 #if NPFSYNC
8750 				if (s->creatorid == pf_status.hostid) {
8751 					pfsync_delete_state(s);
8752 				}
8753 #endif
8754 				s->timeout = PFTM_UNLINKED;
8755 				hook_runloop(&s->unlink_hooks,
8756 				    HOOK_REMOVE | HOOK_FREE);
8757 				pf_src_tree_remove_state(s);
8758 				pf_free_state(s);
8759 				return PF_DROP;
8760 			}
8761 		}
8762 	}
8763 
8764 	/* similar to STATE_LOOKUP() */
8765 	if (*state != NULL && pd != NULL && !(pd->pktflags & PKTF_FLOW_ID)) {
8766 		pd->flowsrc = (*state)->state_key->flowsrc;
8767 		pd->flowhash = (*state)->state_key->flowhash;
8768 		if (pd->flowhash != 0) {
8769 			pd->pktflags |= PKTF_FLOW_ID;
8770 			pd->pktflags &= ~PKTF_FLOW_ADV;
8771 		}
8772 	}
8773 
8774 	if (pf_state_lookup_aux(state, kif, direction, &action)) {
8775 		return action;
8776 	}
8777 
8778 	if (direction == (*state)->state_key->direction) {
8779 		src = &(*state)->src;
8780 		dst = &(*state)->dst;
8781 	} else {
8782 		src = &(*state)->dst;
8783 		dst = &(*state)->src;
8784 	}
8785 
8786 	/* update states */
8787 	if (src->state < PFESPS_INITIATING) {
8788 		src->state = PFESPS_INITIATING;
8789 	}
8790 
8791 	/* update expire time */
8792 	(*state)->expire = pf_time_second();
8793 	if (src->state >= PFESPS_INITIATING &&
8794 	    dst->state >= PFESPS_INITIATING) {
8795 		(*state)->timeout = PFTM_ESP_ESTABLISHED;
8796 		src->state = PFESPS_ESTABLISHED;
8797 		dst->state = PFESPS_ESTABLISHED;
8798 	} else {
8799 		(*state)->timeout = PFTM_ESP_INITIATING;
8800 	}
8801 	/* translate source/destination address, if necessary */
8802 	if (STATE_ADDR_TRANSLATE((*state)->state_key)) {
8803 		if (direction == PF_OUT) {
8804 			switch (pd->af) {
8805 #if INET
8806 			case AF_INET:
8807 				pf_change_a(&pd->src->v4addr.s_addr,
8808 				    pd->ip_sum,
8809 				    (*state)->state_key->gwy.addr.v4addr.s_addr, 0);
8810 				break;
8811 #endif /* INET */
8812 			case AF_INET6:
8813 				PF_ACPY(pd->src, &(*state)->state_key->gwy.addr,
8814 				    pd->af);
8815 				break;
8816 			}
8817 		} else {
8818 			switch (pd->af) {
8819 #if INET
8820 			case AF_INET:
8821 				pf_change_a(&pd->dst->v4addr.s_addr,
8822 				    pd->ip_sum,
8823 				    (*state)->state_key->lan.addr.v4addr.s_addr, 0);
8824 				break;
8825 #endif /* INET */
8826 			case AF_INET6:
8827 				PF_ACPY(pd->dst, &(*state)->state_key->lan.addr,
8828 				    pd->af);
8829 				break;
8830 			}
8831 		}
8832 	}
8833 
8834 	return PF_PASS;
8835 }
8836 
8837 static __attribute__((noinline)) int
pf_test_state_other(struct pf_state ** state,int direction,struct pfi_kif * kif,struct pf_pdesc * pd)8838 pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif,
8839     struct pf_pdesc *pd)
8840 {
8841 	struct pf_state_peer    *src, *dst;
8842 	struct pf_state_key_cmp  key = {};
8843 
8844 	key.app_state = 0;
8845 	key.proto = pd->proto;
8846 	if (direction == PF_IN) {
8847 		key.af_gwy = pd->af;
8848 		PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy);
8849 		PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy);
8850 		key.ext_gwy.xport.port = 0;
8851 		key.gwy.xport.port = 0;
8852 	} else {
8853 		key.af_lan = pd->af;
8854 		PF_ACPY(&key.lan.addr, pd->src, key.af_lan);
8855 		PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan);
8856 		key.lan.xport.port = 0;
8857 		key.ext_lan.xport.port = 0;
8858 	}
8859 
8860 	STATE_LOOKUP();
8861 
8862 	if (direction == (*state)->state_key->direction) {
8863 		src = &(*state)->src;
8864 		dst = &(*state)->dst;
8865 	} else {
8866 		src = &(*state)->dst;
8867 		dst = &(*state)->src;
8868 	}
8869 
8870 	/* update states */
8871 	if (src->state < PFOTHERS_SINGLE) {
8872 		src->state = PFOTHERS_SINGLE;
8873 	}
8874 	if (dst->state == PFOTHERS_SINGLE) {
8875 		dst->state = PFOTHERS_MULTIPLE;
8876 	}
8877 
8878 	/* update expire time */
8879 	(*state)->expire = pf_time_second();
8880 	if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE) {
8881 		(*state)->timeout = PFTM_OTHER_MULTIPLE;
8882 	} else {
8883 		(*state)->timeout = PFTM_OTHER_SINGLE;
8884 	}
8885 
8886 	/* translate source/destination address, if necessary */
8887 	if (STATE_ADDR_TRANSLATE((*state)->state_key)) {
8888 		if (direction == PF_OUT) {
8889 			switch (pd->af) {
8890 #if INET
8891 			case AF_INET:
8892 				pf_change_a(&pd->src->v4addr.s_addr,
8893 				    pd->ip_sum,
8894 				    (*state)->state_key->gwy.addr.v4addr.s_addr,
8895 				    0);
8896 				break;
8897 #endif /* INET */
8898 			case AF_INET6:
8899 				PF_ACPY(pd->src,
8900 				    &(*state)->state_key->gwy.addr, pd->af);
8901 				break;
8902 			}
8903 		} else {
8904 			switch (pd->af) {
8905 #if INET
8906 			case AF_INET:
8907 				pf_change_a(&pd->dst->v4addr.s_addr,
8908 				    pd->ip_sum,
8909 				    (*state)->state_key->lan.addr.v4addr.s_addr,
8910 				    0);
8911 				break;
8912 #endif /* INET */
8913 			case AF_INET6:
8914 				PF_ACPY(pd->dst,
8915 				    &(*state)->state_key->lan.addr, pd->af);
8916 				break;
8917 			}
8918 		}
8919 	}
8920 
8921 	return PF_PASS;
8922 }
8923 
8924 /*
8925  * ipoff and off are measured from the start of the mbuf chain.
8926  * h must be at "ipoff" on the mbuf chain.
8927  */
8928 void *
pf_pull_hdr(pbuf_t * pbuf,int off,void * p,int len,u_short * actionp,u_short * reasonp,sa_family_t af)8929 pf_pull_hdr(pbuf_t *pbuf, int off, void *p, int len,
8930     u_short *actionp, u_short *reasonp, sa_family_t af)
8931 {
8932 	switch (af) {
8933 #if INET
8934 	case AF_INET: {
8935 		struct ip       *h = pbuf->pb_data;
8936 		u_int16_t        fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
8937 
8938 		if (fragoff) {
8939 			if (fragoff >= len) {
8940 				ACTION_SET(actionp, PF_PASS);
8941 			} else {
8942 				ACTION_SET(actionp, PF_DROP);
8943 				REASON_SET(reasonp, PFRES_FRAG);
8944 			}
8945 			return NULL;
8946 		}
8947 		if (pbuf->pb_packet_len < (unsigned)(off + len) ||
8948 		    ntohs(h->ip_len) < off + len) {
8949 			ACTION_SET(actionp, PF_DROP);
8950 			REASON_SET(reasonp, PFRES_SHORT);
8951 			return NULL;
8952 		}
8953 		break;
8954 	}
8955 #endif /* INET */
8956 	case AF_INET6: {
8957 		struct ip6_hdr  *h = pbuf->pb_data;
8958 
8959 		if (pbuf->pb_packet_len < (unsigned)(off + len) ||
8960 		    (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) <
8961 		    (unsigned)(off + len)) {
8962 			ACTION_SET(actionp, PF_DROP);
8963 			REASON_SET(reasonp, PFRES_SHORT);
8964 			return NULL;
8965 		}
8966 		break;
8967 	}
8968 	}
8969 	pbuf_copy_data(pbuf, off, len, p);
8970 	return p;
8971 }
8972 
8973 int
pf_routable(struct pf_addr * addr,sa_family_t af,struct pfi_kif * kif)8974 pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif)
8975 {
8976 #pragma unused(kif)
8977 	struct sockaddr_in      *dst;
8978 	int                      ret = 1;
8979 	struct sockaddr_in6     *dst6;
8980 	struct route_in6         ro;
8981 
8982 	bzero(&ro, sizeof(ro));
8983 	switch (af) {
8984 	case AF_INET:
8985 		dst = satosin(&ro.ro_dst);
8986 		dst->sin_family = AF_INET;
8987 		dst->sin_len = sizeof(*dst);
8988 		dst->sin_addr = addr->v4addr;
8989 		break;
8990 	case AF_INET6:
8991 		dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
8992 		dst6->sin6_family = AF_INET6;
8993 		dst6->sin6_len = sizeof(*dst6);
8994 		dst6->sin6_addr = addr->v6addr;
8995 		break;
8996 	default:
8997 		return 0;
8998 	}
8999 
9000 	/* XXX: IFT_ENC is not currently used by anything*/
9001 	/* Skip checks for ipsec interfaces */
9002 	if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC) {
9003 		goto out;
9004 	}
9005 
9006 	/* XXX: what is the point of this? */
9007 	rtalloc((struct route *)&ro);
9008 
9009 out:
9010 	ROUTE_RELEASE(&ro);
9011 	return ret;
9012 }
9013 
9014 int
pf_rtlabel_match(struct pf_addr * addr,sa_family_t af,struct pf_addr_wrap * aw)9015 pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw)
9016 {
9017 #pragma unused(aw)
9018 	struct sockaddr_in      *dst;
9019 	struct sockaddr_in6     *dst6;
9020 	struct route_in6         ro;
9021 	int                      ret = 0;
9022 
9023 	bzero(&ro, sizeof(ro));
9024 	switch (af) {
9025 	case AF_INET:
9026 		dst = satosin(&ro.ro_dst);
9027 		dst->sin_family = AF_INET;
9028 		dst->sin_len = sizeof(*dst);
9029 		dst->sin_addr = addr->v4addr;
9030 		break;
9031 	case AF_INET6:
9032 		dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
9033 		dst6->sin6_family = AF_INET6;
9034 		dst6->sin6_len = sizeof(*dst6);
9035 		dst6->sin6_addr = addr->v6addr;
9036 		break;
9037 	default:
9038 		return 0;
9039 	}
9040 
9041 	/* XXX: what is the point of this? */
9042 	rtalloc((struct route *)&ro);
9043 
9044 	ROUTE_RELEASE(&ro);
9045 
9046 	return ret;
9047 }
9048 
9049 #if INET
9050 static __attribute__((noinline)) void
pf_route(pbuf_t ** pbufp,struct pf_rule * r,int dir,struct ifnet * oifp,struct pf_state * s,struct pf_pdesc * pd)9051 pf_route(pbuf_t **pbufp, struct pf_rule *r, int dir, struct ifnet *oifp,
9052     struct pf_state *s, struct pf_pdesc *pd)
9053 {
9054 #pragma unused(pd)
9055 	struct mbuf             *m0, *m1;
9056 	struct route             iproute;
9057 	struct route            *ro = &iproute;
9058 	struct sockaddr_in      *dst;
9059 	struct ip               *ip;
9060 	struct ifnet            *ifp = NULL;
9061 	struct pf_addr           naddr;
9062 	struct pf_src_node      *sn = NULL;
9063 	int                      error = 0;
9064 	uint32_t                 sw_csum;
9065 	int                      interface_mtu = 0;
9066 	bzero(&iproute, sizeof(iproute));
9067 
9068 	if (pbufp == NULL || !pbuf_is_valid(*pbufp) || r == NULL ||
9069 	    (dir != PF_IN && dir != PF_OUT) || oifp == NULL) {
9070 		panic("pf_route: invalid parameters");
9071 	}
9072 
9073 	if (pd->pf_mtag->pftag_routed++ > 3) {
9074 		pbuf_destroy(*pbufp);
9075 		*pbufp = NULL;
9076 		m0 = NULL;
9077 		goto bad;
9078 	}
9079 
9080 	/*
9081 	 * Since this is something of an edge case and may involve the
9082 	 * host stack (for routing, at least for now), we convert the
9083 	 * incoming pbuf into an mbuf.
9084 	 */
9085 	if (r->rt == PF_DUPTO) {
9086 		m0 = pbuf_clone_to_mbuf(*pbufp);
9087 	} else if ((r->rt == PF_REPLYTO) == (r->direction == dir)) {
9088 		return;
9089 	} else {
9090 		/* We're going to consume this packet */
9091 		m0 = pbuf_to_mbuf(*pbufp, TRUE);
9092 		*pbufp = NULL;
9093 	}
9094 
9095 	if (m0 == NULL) {
9096 		goto bad;
9097 	}
9098 
9099 	/* We now have the packet in an mbuf (m0) */
9100 
9101 	if (m0->m_len < (int)sizeof(struct ip)) {
9102 		DPFPRINTF(PF_DEBUG_URGENT,
9103 		    ("pf_route: packet length < sizeof (struct ip)\n"));
9104 		goto bad;
9105 	}
9106 
9107 	ip = mtod(m0, struct ip *);
9108 
9109 	dst = satosin((void *)&ro->ro_dst);
9110 	dst->sin_family = AF_INET;
9111 	dst->sin_len = sizeof(*dst);
9112 	dst->sin_addr = ip->ip_dst;
9113 
9114 	if (r->rt == PF_FASTROUTE) {
9115 		rtalloc(ro);
9116 		if (ro->ro_rt == NULL) {
9117 			ipstat.ips_noroute++;
9118 			goto bad;
9119 		}
9120 
9121 		ifp = ro->ro_rt->rt_ifp;
9122 		RT_LOCK(ro->ro_rt);
9123 		ro->ro_rt->rt_use++;
9124 
9125 		if (ro->ro_rt->rt_flags & RTF_GATEWAY) {
9126 			dst = satosin((void *)ro->ro_rt->rt_gateway);
9127 		}
9128 		RT_UNLOCK(ro->ro_rt);
9129 	} else {
9130 		if (TAILQ_EMPTY(&r->rpool.list)) {
9131 			DPFPRINTF(PF_DEBUG_URGENT,
9132 			    ("pf_route: TAILQ_EMPTY(&r->rpool.list)\n"));
9133 			goto bad;
9134 		}
9135 		if (s == NULL) {
9136 			pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src,
9137 			    &naddr, NULL, &sn);
9138 			if (!PF_AZERO(&naddr, AF_INET)) {
9139 				dst->sin_addr.s_addr = naddr.v4addr.s_addr;
9140 			}
9141 			ifp = r->rpool.cur->kif ?
9142 			    r->rpool.cur->kif->pfik_ifp : NULL;
9143 		} else {
9144 			if (!PF_AZERO(&s->rt_addr, AF_INET)) {
9145 				dst->sin_addr.s_addr =
9146 				    s->rt_addr.v4addr.s_addr;
9147 			}
9148 			ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
9149 		}
9150 	}
9151 	if (ifp == NULL) {
9152 		goto bad;
9153 	}
9154 
9155 	if (oifp != ifp) {
9156 		if (pf_test_mbuf(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) {
9157 			goto bad;
9158 		} else if (m0 == NULL) {
9159 			goto done;
9160 		}
9161 		if (m0->m_len < (int)sizeof(struct ip)) {
9162 			DPFPRINTF(PF_DEBUG_URGENT,
9163 			    ("pf_route: packet length < sizeof (struct ip)\n"));
9164 			goto bad;
9165 		}
9166 		ip = mtod(m0, struct ip *);
9167 	}
9168 
9169 	/* Catch routing changes wrt. hardware checksumming for TCP or UDP. */
9170 	ip_output_checksum(ifp, m0, ((ip->ip_hl) << 2), ntohs(ip->ip_len),
9171 	    &sw_csum);
9172 
9173 	interface_mtu = ifp->if_mtu;
9174 
9175 	if (INTF_ADJUST_MTU_FOR_CLAT46(ifp)) {
9176 		interface_mtu = IN6_LINKMTU(ifp);
9177 		/* Further adjust the size for CLAT46 expansion */
9178 		interface_mtu -= CLAT46_HDR_EXPANSION_OVERHD;
9179 	}
9180 
9181 	if (ntohs(ip->ip_len) <= interface_mtu || TSO_IPV4_OK(ifp, m0) ||
9182 	    (!(ip->ip_off & htons(IP_DF)) &&
9183 	    (ifp->if_hwassist & CSUM_FRAGMENT))) {
9184 		ip->ip_sum = 0;
9185 		if (sw_csum & CSUM_DELAY_IP) {
9186 			ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
9187 			sw_csum &= ~CSUM_DELAY_IP;
9188 			m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_IP;
9189 		}
9190 		error = ifnet_output(ifp, PF_INET, m0, ro->ro_rt, sintosa(dst));
9191 		goto done;
9192 	}
9193 
9194 	/*
9195 	 * Too large for interface; fragment if possible.
9196 	 * Must be able to put at least 8 bytes per fragment.
9197 	 * Balk when DF bit is set or the interface didn't support TSO.
9198 	 */
9199 	if ((ip->ip_off & htons(IP_DF)) ||
9200 	    (m0->m_pkthdr.csum_flags & CSUM_TSO_IPV4)) {
9201 		ipstat.ips_cantfrag++;
9202 		if (r->rt != PF_DUPTO) {
9203 			icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0,
9204 			    interface_mtu);
9205 			goto done;
9206 		} else {
9207 			goto bad;
9208 		}
9209 	}
9210 
9211 	m1 = m0;
9212 
9213 	/* PR-8933605: send ip_len,ip_off to ip_fragment in host byte order */
9214 #if BYTE_ORDER != BIG_ENDIAN
9215 	NTOHS(ip->ip_off);
9216 	NTOHS(ip->ip_len);
9217 #endif
9218 	error = ip_fragment(m0, ifp, interface_mtu, sw_csum);
9219 
9220 	if (error) {
9221 		m0 = NULL;
9222 		goto bad;
9223 	}
9224 
9225 	for (m0 = m1; m0; m0 = m1) {
9226 		m1 = m0->m_nextpkt;
9227 		m0->m_nextpkt = 0;
9228 		if (error == 0) {
9229 			error = ifnet_output(ifp, PF_INET, m0, ro->ro_rt,
9230 			    sintosa(dst));
9231 		} else {
9232 			m_freem(m0);
9233 		}
9234 	}
9235 
9236 	if (error == 0) {
9237 		ipstat.ips_fragmented++;
9238 	}
9239 
9240 done:
9241 	ROUTE_RELEASE(&iproute);
9242 	return;
9243 
9244 bad:
9245 	if (m0) {
9246 		m_freem(m0);
9247 	}
9248 	goto done;
9249 }
9250 #endif /* INET */
9251 
9252 static __attribute__((noinline)) void
pf_route6(pbuf_t ** pbufp,struct pf_rule * r,int dir,struct ifnet * oifp,struct pf_state * s,struct pf_pdesc * pd)9253 pf_route6(pbuf_t **pbufp, struct pf_rule *r, int dir, struct ifnet *oifp,
9254     struct pf_state *s, struct pf_pdesc *pd)
9255 {
9256 #pragma unused(pd)
9257 	struct mbuf             *m0;
9258 	struct route_in6         ip6route;
9259 	struct route_in6        *ro;
9260 	struct sockaddr_in6     *dst;
9261 	struct ip6_hdr          *ip6;
9262 	struct ifnet            *ifp = NULL;
9263 	struct pf_addr           naddr;
9264 	struct pf_src_node      *sn = NULL;
9265 	int                      error = 0;
9266 	struct pf_mtag          *pf_mtag;
9267 
9268 	if (pbufp == NULL || !pbuf_is_valid(*pbufp) || r == NULL ||
9269 	    (dir != PF_IN && dir != PF_OUT) || oifp == NULL) {
9270 		panic("pf_route6: invalid parameters");
9271 	}
9272 
9273 	if (pd->pf_mtag->pftag_routed++ > 3) {
9274 		pbuf_destroy(*pbufp);
9275 		*pbufp = NULL;
9276 		m0 = NULL;
9277 		goto bad;
9278 	}
9279 
9280 	/*
9281 	 * Since this is something of an edge case and may involve the
9282 	 * host stack (for routing, at least for now), we convert the
9283 	 * incoming pbuf into an mbuf.
9284 	 */
9285 	if (r->rt == PF_DUPTO) {
9286 		m0 = pbuf_clone_to_mbuf(*pbufp);
9287 	} else if ((r->rt == PF_REPLYTO) == (r->direction == dir)) {
9288 		return;
9289 	} else {
9290 		/* We're about to consume this packet */
9291 		m0 = pbuf_to_mbuf(*pbufp, TRUE);
9292 		*pbufp = NULL;
9293 	}
9294 
9295 	if (m0 == NULL) {
9296 		goto bad;
9297 	}
9298 
9299 	if (m0->m_len < (int)sizeof(struct ip6_hdr)) {
9300 		DPFPRINTF(PF_DEBUG_URGENT,
9301 		    ("pf_route6: m0->m_len < sizeof (struct ip6_hdr)\n"));
9302 		goto bad;
9303 	}
9304 	ip6 = mtod(m0, struct ip6_hdr *);
9305 
9306 	ro = &ip6route;
9307 	bzero((caddr_t)ro, sizeof(*ro));
9308 	dst = (struct sockaddr_in6 *)&ro->ro_dst;
9309 	dst->sin6_family = AF_INET6;
9310 	dst->sin6_len = sizeof(*dst);
9311 	dst->sin6_addr = ip6->ip6_dst;
9312 
9313 	/* Cheat. XXX why only in the v6addr case??? */
9314 	if (r->rt == PF_FASTROUTE) {
9315 		pf_mtag = pf_get_mtag(m0);
9316 		ASSERT(pf_mtag != NULL);
9317 		pf_mtag->pftag_flags |= PF_TAG_GENERATED;
9318 		ip6_output_setsrcifscope(m0, oifp->if_index, NULL);
9319 		ip6_output_setdstifscope(m0, oifp->if_index, NULL);
9320 		ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
9321 		return;
9322 	}
9323 
9324 	if (TAILQ_EMPTY(&r->rpool.list)) {
9325 		DPFPRINTF(PF_DEBUG_URGENT,
9326 		    ("pf_route6: TAILQ_EMPTY(&r->rpool.list)\n"));
9327 		goto bad;
9328 	}
9329 	if (s == NULL) {
9330 		pf_map_addr(AF_INET6, r, (struct pf_addr *)(uintptr_t)&ip6->ip6_src,
9331 		    &naddr, NULL, &sn);
9332 		if (!PF_AZERO(&naddr, AF_INET6)) {
9333 			PF_ACPY((struct pf_addr *)&dst->sin6_addr,
9334 			    &naddr, AF_INET6);
9335 		}
9336 		ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL;
9337 	} else {
9338 		if (!PF_AZERO(&s->rt_addr, AF_INET6)) {
9339 			PF_ACPY((struct pf_addr *)&dst->sin6_addr,
9340 			    &s->rt_addr, AF_INET6);
9341 		}
9342 		ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
9343 	}
9344 	if (ifp == NULL) {
9345 		goto bad;
9346 	}
9347 
9348 	if (oifp != ifp) {
9349 		if (pf_test6_mbuf(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) {
9350 			goto bad;
9351 		} else if (m0 == NULL) {
9352 			goto done;
9353 		}
9354 		if (m0->m_len < (int)sizeof(struct ip6_hdr)) {
9355 			DPFPRINTF(PF_DEBUG_URGENT, ("pf_route6: m0->m_len "
9356 			    "< sizeof (struct ip6_hdr)\n"));
9357 			goto bad;
9358 		}
9359 		pf_mtag = pf_get_mtag(m0);
9360 		/*
9361 		 * send refragmented packets.
9362 		 */
9363 		if ((pf_mtag->pftag_flags & PF_TAG_REFRAGMENTED) != 0) {
9364 			pf_mtag->pftag_flags &= ~PF_TAG_REFRAGMENTED;
9365 			/*
9366 			 * nd6_output() frees packet chain in both success and
9367 			 * failure cases.
9368 			 */
9369 			error = nd6_output(ifp, ifp, m0, dst, NULL, NULL);
9370 			m0 = NULL;
9371 			if (error) {
9372 				DPFPRINTF(PF_DEBUG_URGENT, ("pf_route6:"
9373 				    "dropped refragmented packet\n"));
9374 			}
9375 			goto done;
9376 		}
9377 		ip6 = mtod(m0, struct ip6_hdr *);
9378 	}
9379 
9380 	/*
9381 	 * If the packet is too large for the outgoing interface,
9382 	 * send back an icmp6 error.
9383 	 */
9384 	if (in6_embedded_scope && IN6_IS_SCOPE_EMBED(&dst->sin6_addr)) {
9385 		dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index);
9386 	}
9387 	if ((unsigned)m0->m_pkthdr.len <= ifp->if_mtu) {
9388 		error = nd6_output(ifp, ifp, m0, dst, NULL, NULL);
9389 	} else {
9390 		in6_ifstat_inc(ifp, ifs6_in_toobig);
9391 		if (r->rt != PF_DUPTO) {
9392 			icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
9393 		} else {
9394 			goto bad;
9395 		}
9396 	}
9397 
9398 done:
9399 	return;
9400 
9401 bad:
9402 	if (m0) {
9403 		m_freem(m0);
9404 		m0 = NULL;
9405 	}
9406 	goto done;
9407 }
9408 
9409 
9410 /*
9411  * check protocol (tcp/udp/icmp/icmp6) checksum and set mbuf flag
9412  *   off is the offset where the protocol header starts
9413  *   len is the total length of protocol header plus payload
9414  * returns 0 when the checksum is valid, otherwise returns 1.
9415  */
9416 static int
pf_check_proto_cksum(pbuf_t * pbuf,int off,int len,u_int8_t p,sa_family_t af)9417 pf_check_proto_cksum(pbuf_t *pbuf, int off, int len, u_int8_t p,
9418     sa_family_t af)
9419 {
9420 	u_int16_t sum;
9421 
9422 	switch (p) {
9423 	case IPPROTO_TCP:
9424 	case IPPROTO_UDP:
9425 		/*
9426 		 * Optimize for the common case; if the hardware calculated
9427 		 * value doesn't include pseudo-header checksum, or if it
9428 		 * is partially-computed (only 16-bit summation), do it in
9429 		 * software below.
9430 		 */
9431 		if ((*pbuf->pb_csum_flags &
9432 		    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
9433 		    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR) &&
9434 		    (*pbuf->pb_csum_data ^ 0xffff) == 0) {
9435 			return 0;
9436 		}
9437 		break;
9438 	case IPPROTO_ICMP:
9439 	case IPPROTO_ICMPV6:
9440 		break;
9441 	default:
9442 		return 1;
9443 	}
9444 	if (off < (int)sizeof(struct ip) || len < (int)sizeof(struct udphdr)) {
9445 		return 1;
9446 	}
9447 	if (pbuf->pb_packet_len < (unsigned)(off + len)) {
9448 		return 1;
9449 	}
9450 	switch (af) {
9451 #if INET
9452 	case AF_INET:
9453 		if (p == IPPROTO_ICMP) {
9454 			if (pbuf->pb_contig_len < (unsigned)off) {
9455 				return 1;
9456 			}
9457 			sum = pbuf_inet_cksum(pbuf, 0, off, len);
9458 		} else {
9459 			if (pbuf->pb_contig_len < (int)sizeof(struct ip)) {
9460 				return 1;
9461 			}
9462 			sum = pbuf_inet_cksum(pbuf, p, off, len);
9463 		}
9464 		break;
9465 #endif /* INET */
9466 	case AF_INET6:
9467 		if (pbuf->pb_contig_len < (int)sizeof(struct ip6_hdr)) {
9468 			return 1;
9469 		}
9470 		sum = pbuf_inet6_cksum(pbuf, p, off, len);
9471 		break;
9472 	default:
9473 		return 1;
9474 	}
9475 	if (sum) {
9476 		switch (p) {
9477 		case IPPROTO_TCP:
9478 			tcpstat.tcps_rcvbadsum++;
9479 			break;
9480 		case IPPROTO_UDP:
9481 			udpstat.udps_badsum++;
9482 			break;
9483 		case IPPROTO_ICMP:
9484 			icmpstat.icps_checksum++;
9485 			break;
9486 		case IPPROTO_ICMPV6:
9487 			icmp6stat.icp6s_checksum++;
9488 			break;
9489 		}
9490 		return 1;
9491 	}
9492 	return 0;
9493 }
9494 
9495 #if INET
9496 #define PF_APPLE_UPDATE_PDESC_IPv4()                            \
9497 	do {                                                    \
9498 	        if (pbuf && pd.mp && pbuf != pd.mp) {           \
9499 	                pbuf = pd.mp;                           \
9500 	                h = pbuf->pb_data;                      \
9501 	                pd.pf_mtag = pf_get_mtag_pbuf(pbuf);            \
9502 	        }                                               \
9503 	} while (0)
9504 
9505 int
pf_test_mbuf(int dir,struct ifnet * ifp,struct mbuf ** m0,struct ether_header * eh,struct ip_fw_args * fwa)9506 pf_test_mbuf(int dir, struct ifnet *ifp, struct mbuf **m0,
9507     struct ether_header *eh, struct ip_fw_args *fwa)
9508 {
9509 	pbuf_t pbuf_store, *pbuf;
9510 	int rv;
9511 
9512 	pbuf_init_mbuf(&pbuf_store, *m0, (*m0)->m_pkthdr.rcvif);
9513 	pbuf = &pbuf_store;
9514 
9515 	rv = pf_test(dir, ifp, &pbuf, eh, fwa);
9516 
9517 	if (pbuf_is_valid(pbuf)) {
9518 		*m0 = pbuf->pb_mbuf;
9519 		pbuf->pb_mbuf = NULL;
9520 		pbuf_destroy(pbuf);
9521 	} else {
9522 		*m0 = NULL;
9523 	}
9524 
9525 	return rv;
9526 }
9527 
9528 static __attribute__((noinline)) int
pf_test(int dir,struct ifnet * ifp,pbuf_t ** pbufp,struct ether_header * eh,struct ip_fw_args * fwa)9529 pf_test(int dir, struct ifnet *ifp, pbuf_t **pbufp,
9530     struct ether_header *eh, struct ip_fw_args *fwa)
9531 {
9532 #if !DUMMYNET
9533 #pragma unused(fwa)
9534 #endif
9535 	struct pfi_kif          *kif;
9536 	u_short                  action = PF_PASS, reason = 0, log = 0;
9537 	pbuf_t                  *pbuf = *pbufp;
9538 	struct ip               *h = 0;
9539 	struct pf_rule          *a = NULL, *r = &pf_default_rule, *tr, *nr;
9540 	struct pf_state         *s = NULL;
9541 	struct pf_state_key     *sk = NULL;
9542 	struct pf_ruleset       *ruleset = NULL;
9543 	struct pf_pdesc          pd;
9544 	int                      off, dirndx, pqid = 0;
9545 
9546 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
9547 
9548 	if (!pf_status.running) {
9549 		return PF_PASS;
9550 	}
9551 
9552 	memset(&pd, 0, sizeof(pd));
9553 
9554 	if ((pd.pf_mtag = pf_get_mtag_pbuf(pbuf)) == NULL) {
9555 		DPFPRINTF(PF_DEBUG_URGENT,
9556 		    ("pf_test: pf_get_mtag_pbuf returned NULL\n"));
9557 		return PF_DROP;
9558 	}
9559 
9560 	if (pd.pf_mtag->pftag_flags & PF_TAG_GENERATED) {
9561 		return PF_PASS;
9562 	}
9563 
9564 	kif = (struct pfi_kif *)ifp->if_pf_kif;
9565 
9566 	if (kif == NULL) {
9567 		DPFPRINTF(PF_DEBUG_URGENT,
9568 		    ("pf_test: kif == NULL, if_name %s\n", ifp->if_name));
9569 		return PF_DROP;
9570 	}
9571 	if (kif->pfik_flags & PFI_IFLAG_SKIP) {
9572 		return PF_PASS;
9573 	}
9574 
9575 	if (pbuf->pb_packet_len < (int)sizeof(*h)) {
9576 		REASON_SET(&reason, PFRES_SHORT);
9577 		return PF_DROP;
9578 	}
9579 
9580 	/* initialize enough of pd for the done label */
9581 	h = pbuf->pb_data;
9582 	pd.mp = pbuf;
9583 	pd.lmw = 0;
9584 	pd.pf_mtag = pf_get_mtag_pbuf(pbuf);
9585 	pd.src = (struct pf_addr *)&h->ip_src;
9586 	pd.dst = (struct pf_addr *)&h->ip_dst;
9587 	PF_ACPY(&pd.baddr, pd.src, AF_INET);
9588 	PF_ACPY(&pd.bdaddr, pd.dst, AF_INET);
9589 	pd.ip_sum = &h->ip_sum;
9590 	pd.proto = h->ip_p;
9591 	pd.proto_variant = 0;
9592 	pd.af = AF_INET;
9593 	pd.tos = h->ip_tos;
9594 	pd.ttl = h->ip_ttl;
9595 	pd.tot_len = ntohs(h->ip_len);
9596 	pd.eh = eh;
9597 
9598 #if DUMMYNET
9599 	if (fwa != NULL && fwa->fwa_pf_rule != NULL) {
9600 		goto nonormalize;
9601 	}
9602 #endif /* DUMMYNET */
9603 
9604 	/* We do IP header normalization and packet reassembly here */
9605 	action = pf_normalize_ip(pbuf, dir, kif, &reason, &pd);
9606 	if (action != PF_PASS || pd.lmw < 0) {
9607 		action = PF_DROP;
9608 		goto done;
9609 	}
9610 
9611 #if DUMMYNET
9612 nonormalize:
9613 #endif /* DUMMYNET */
9614 	/* pf_normalize can mess with pb_data */
9615 	h = pbuf->pb_data;
9616 
9617 	off = h->ip_hl << 2;
9618 	if (off < (int)sizeof(*h)) {
9619 		action = PF_DROP;
9620 		REASON_SET(&reason, PFRES_SHORT);
9621 		log = 1;
9622 		goto done;
9623 	}
9624 
9625 	pd.src = (struct pf_addr *)&h->ip_src;
9626 	pd.dst = (struct pf_addr *)&h->ip_dst;
9627 	PF_ACPY(&pd.baddr, pd.src, AF_INET);
9628 	PF_ACPY(&pd.bdaddr, pd.dst, AF_INET);
9629 	pd.ip_sum = &h->ip_sum;
9630 	pd.proto = h->ip_p;
9631 	pd.proto_variant = 0;
9632 	pd.mp = pbuf;
9633 	pd.lmw = 0;
9634 	pd.pf_mtag = pf_get_mtag_pbuf(pbuf);
9635 	pd.af = AF_INET;
9636 	pd.tos = h->ip_tos;
9637 	pd.ttl = h->ip_ttl;
9638 	pd.sc = MBUF_SCIDX(pbuf_get_service_class(pbuf));
9639 	pd.tot_len = ntohs(h->ip_len);
9640 	pd.eh = eh;
9641 
9642 	if (*pbuf->pb_flags & PKTF_FLOW_ID) {
9643 		pd.flowsrc = *pbuf->pb_flowsrc;
9644 		pd.flowhash = *pbuf->pb_flowid;
9645 		pd.pktflags = *pbuf->pb_flags & PKTF_FLOW_MASK;
9646 	}
9647 
9648 	/* handle fragments that didn't get reassembled by normalization */
9649 	if (h->ip_off & htons(IP_MF | IP_OFFMASK)) {
9650 		pd.flags |= PFDESC_IP_FRAG;
9651 #if DUMMYNET
9652 		/* Traffic goes through dummynet first */
9653 		action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
9654 		if (action == PF_DROP || pbuf == NULL) {
9655 			*pbufp = NULL;
9656 			return action;
9657 		}
9658 #endif /* DUMMYNET */
9659 		action = pf_test_fragment(&r, dir, kif, pbuf, h,
9660 		    &pd, &a, &ruleset);
9661 		goto done;
9662 	}
9663 
9664 	switch (h->ip_p) {
9665 	case IPPROTO_TCP: {
9666 		struct tcphdr   th;
9667 		pd.hdr.tcp = &th;
9668 		if (!pf_pull_hdr(pbuf, off, &th, sizeof(th),
9669 		    &action, &reason, AF_INET)) {
9670 			log = action != PF_PASS;
9671 			goto done;
9672 		}
9673 		pd.p_len = pd.tot_len - off - (th.th_off << 2);
9674 		if ((th.th_flags & TH_ACK) && pd.p_len == 0) {
9675 			pqid = 1;
9676 		}
9677 #if DUMMYNET
9678 		/* Traffic goes through dummynet first */
9679 		action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
9680 		if (action == PF_DROP || pbuf == NULL) {
9681 			*pbufp = NULL;
9682 			return action;
9683 		}
9684 #endif /* DUMMYNET */
9685 		action = pf_normalize_tcp(dir, kif, pbuf, 0, off, h, &pd);
9686 		if (pd.lmw < 0) {
9687 			goto done;
9688 		}
9689 		PF_APPLE_UPDATE_PDESC_IPv4();
9690 		if (action == PF_DROP) {
9691 			goto done;
9692 		}
9693 		if (th.th_sport == 0 || th.th_dport == 0) {
9694 			action = PF_DROP;
9695 			REASON_SET(&reason, PFRES_INVPORT);
9696 			goto done;
9697 		}
9698 		action = pf_test_state_tcp(&s, dir, kif, pbuf, off, h, &pd,
9699 		    &reason);
9700 		if (action == PF_NAT64) {
9701 			goto done;
9702 		}
9703 		if (pd.lmw < 0) {
9704 			goto done;
9705 		}
9706 		PF_APPLE_UPDATE_PDESC_IPv4();
9707 		if (action == PF_PASS) {
9708 #if NPFSYNC
9709 			pfsync_update_state(s);
9710 #endif /* NPFSYNC */
9711 			r = s->rule.ptr;
9712 			a = s->anchor.ptr;
9713 			log = s->log;
9714 		} else if (s == NULL) {
9715 			action = pf_test_rule(&r, &s, dir, kif,
9716 			    pbuf, off, h, &pd, &a, &ruleset, NULL);
9717 		}
9718 		break;
9719 	}
9720 
9721 	case IPPROTO_UDP: {
9722 		struct udphdr   uh;
9723 
9724 		pd.hdr.udp = &uh;
9725 		if (!pf_pull_hdr(pbuf, off, &uh, sizeof(uh),
9726 		    &action, &reason, AF_INET)) {
9727 			log = action != PF_PASS;
9728 			goto done;
9729 		}
9730 		if (uh.uh_sport == 0 || uh.uh_dport == 0) {
9731 			action = PF_DROP;
9732 			REASON_SET(&reason, PFRES_INVPORT);
9733 			goto done;
9734 		}
9735 		if (ntohs(uh.uh_ulen) > pbuf->pb_packet_len - off ||
9736 		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
9737 			action = PF_DROP;
9738 			REASON_SET(&reason, PFRES_SHORT);
9739 			goto done;
9740 		}
9741 #if DUMMYNET
9742 		/* Traffic goes through dummynet first */
9743 		action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
9744 		if (action == PF_DROP || pbuf == NULL) {
9745 			*pbufp = NULL;
9746 			return action;
9747 		}
9748 #endif /* DUMMYNET */
9749 		action = pf_test_state_udp(&s, dir, kif, pbuf, off, h, &pd,
9750 		    &reason);
9751 		if (action == PF_NAT64) {
9752 			goto done;
9753 		}
9754 		if (pd.lmw < 0) {
9755 			goto done;
9756 		}
9757 		PF_APPLE_UPDATE_PDESC_IPv4();
9758 		if (action == PF_PASS) {
9759 #if NPFSYNC
9760 			pfsync_update_state(s);
9761 #endif /* NPFSYNC */
9762 			r = s->rule.ptr;
9763 			a = s->anchor.ptr;
9764 			log = s->log;
9765 		} else if (s == NULL) {
9766 			action = pf_test_rule(&r, &s, dir, kif,
9767 			    pbuf, off, h, &pd, &a, &ruleset, NULL);
9768 		}
9769 		break;
9770 	}
9771 
9772 	case IPPROTO_ICMP: {
9773 		struct icmp     ih;
9774 
9775 		pd.hdr.icmp = &ih;
9776 		if (!pf_pull_hdr(pbuf, off, &ih, ICMP_MINLEN,
9777 		    &action, &reason, AF_INET)) {
9778 			log = action != PF_PASS;
9779 			goto done;
9780 		}
9781 #if DUMMYNET
9782 		/* Traffic goes through dummynet first */
9783 		action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
9784 		if (action == PF_DROP || pbuf == NULL) {
9785 			*pbufp = NULL;
9786 			return action;
9787 		}
9788 #endif /* DUMMYNET */
9789 		action = pf_test_state_icmp(&s, dir, kif, pbuf, off, h, &pd,
9790 		    &reason);
9791 		if (action == PF_NAT64) {
9792 			goto done;
9793 		}
9794 		if (pd.lmw < 0) {
9795 			goto done;
9796 		}
9797 		PF_APPLE_UPDATE_PDESC_IPv4();
9798 		if (action == PF_PASS) {
9799 #if NPFSYNC
9800 			pfsync_update_state(s);
9801 #endif /* NPFSYNC */
9802 			r = s->rule.ptr;
9803 			a = s->anchor.ptr;
9804 			log = s->log;
9805 		} else if (s == NULL) {
9806 			action = pf_test_rule(&r, &s, dir, kif,
9807 			    pbuf, off, h, &pd, &a, &ruleset, NULL);
9808 		}
9809 		break;
9810 	}
9811 
9812 	case IPPROTO_ESP: {
9813 		struct pf_esp_hdr       esp;
9814 
9815 		pd.hdr.esp = &esp;
9816 		if (!pf_pull_hdr(pbuf, off, &esp, sizeof(esp), &action, &reason,
9817 		    AF_INET)) {
9818 			log = action != PF_PASS;
9819 			goto done;
9820 		}
9821 #if DUMMYNET
9822 		/* Traffic goes through dummynet first */
9823 		action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
9824 		if (action == PF_DROP || pbuf == NULL) {
9825 			*pbufp = NULL;
9826 			return action;
9827 		}
9828 #endif /* DUMMYNET */
9829 		action = pf_test_state_esp(&s, dir, kif, off, &pd);
9830 		if (pd.lmw < 0) {
9831 			goto done;
9832 		}
9833 		PF_APPLE_UPDATE_PDESC_IPv4();
9834 		if (action == PF_PASS) {
9835 #if NPFSYNC
9836 			pfsync_update_state(s);
9837 #endif /* NPFSYNC */
9838 			r = s->rule.ptr;
9839 			a = s->anchor.ptr;
9840 			log = s->log;
9841 		} else if (s == NULL) {
9842 			action = pf_test_rule(&r, &s, dir, kif,
9843 			    pbuf, off, h, &pd, &a, &ruleset, NULL);
9844 		}
9845 		break;
9846 	}
9847 
9848 	case IPPROTO_GRE: {
9849 		struct pf_grev1_hdr     grev1;
9850 		pd.hdr.grev1 = &grev1;
9851 		if (!pf_pull_hdr(pbuf, off, &grev1, sizeof(grev1), &action,
9852 		    &reason, AF_INET)) {
9853 			log = (action != PF_PASS);
9854 			goto done;
9855 		}
9856 #if DUMMYNET
9857 		/* Traffic goes through dummynet first */
9858 		action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
9859 		if (action == PF_DROP || pbuf == NULL) {
9860 			*pbufp = NULL;
9861 			return action;
9862 		}
9863 #endif /* DUMMYNET */
9864 		if ((ntohs(grev1.flags) & PF_GRE_FLAG_VERSION_MASK) == 1 &&
9865 		    ntohs(grev1.protocol_type) == PF_GRE_PPP_ETHERTYPE) {
9866 			if (ntohs(grev1.payload_length) >
9867 			    pbuf->pb_packet_len - off) {
9868 				action = PF_DROP;
9869 				REASON_SET(&reason, PFRES_SHORT);
9870 				goto done;
9871 			}
9872 			pd.proto_variant = PF_GRE_PPTP_VARIANT;
9873 			action = pf_test_state_grev1(&s, dir, kif, off, &pd);
9874 			if (pd.lmw < 0) {
9875 				goto done;
9876 			}
9877 			PF_APPLE_UPDATE_PDESC_IPv4();
9878 			if (action == PF_PASS) {
9879 #if NPFSYNC
9880 				pfsync_update_state(s);
9881 #endif /* NPFSYNC */
9882 				r = s->rule.ptr;
9883 				a = s->anchor.ptr;
9884 				log = s->log;
9885 				break;
9886 			} else if (s == NULL) {
9887 				action = pf_test_rule(&r, &s, dir, kif, pbuf,
9888 				    off, h, &pd, &a, &ruleset, NULL);
9889 				if (action == PF_PASS) {
9890 					break;
9891 				}
9892 			}
9893 		}
9894 
9895 		/* not GREv1/PPTP, so treat as ordinary GRE... */
9896 		OS_FALLTHROUGH;
9897 	}
9898 
9899 	default:
9900 #if DUMMYNET
9901 		/* Traffic goes through dummynet first */
9902 		action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
9903 		if (action == PF_DROP || pbuf == NULL) {
9904 			*pbufp = NULL;
9905 			return action;
9906 		}
9907 #endif /* DUMMYNET */
9908 		action = pf_test_state_other(&s, dir, kif, &pd);
9909 		if (pd.lmw < 0) {
9910 			goto done;
9911 		}
9912 		PF_APPLE_UPDATE_PDESC_IPv4();
9913 		if (action == PF_PASS) {
9914 #if NPFSYNC
9915 			pfsync_update_state(s);
9916 #endif /* NPFSYNC */
9917 			r = s->rule.ptr;
9918 			a = s->anchor.ptr;
9919 			log = s->log;
9920 		} else if (s == NULL) {
9921 			action = pf_test_rule(&r, &s, dir, kif, pbuf, off, h,
9922 			    &pd, &a, &ruleset, NULL);
9923 		}
9924 		break;
9925 	}
9926 
9927 done:
9928 	if (action == PF_NAT64) {
9929 		*pbufp = NULL;
9930 		return action;
9931 	}
9932 
9933 	*pbufp = pd.mp;
9934 	PF_APPLE_UPDATE_PDESC_IPv4();
9935 
9936 	if (action != PF_DROP) {
9937 		if (action == PF_PASS && h->ip_hl > 5 &&
9938 		    !((s && s->allow_opts) || r->allow_opts)) {
9939 			action = PF_DROP;
9940 			REASON_SET(&reason, PFRES_IPOPTIONS);
9941 			log = 1;
9942 			DPFPRINTF(PF_DEBUG_MISC,
9943 			    ("pf: dropping packet with ip options [hlen=%u]\n",
9944 			    (unsigned int) h->ip_hl));
9945 		}
9946 
9947 		if ((s && s->tag) || PF_RTABLEID_IS_VALID(r->rtableid) ||
9948 		    (pd.pktflags & PKTF_FLOW_ID)) {
9949 			(void) pf_tag_packet(pbuf, pd.pf_mtag, s ? s->tag : 0,
9950 			    r->rtableid, &pd);
9951 		}
9952 
9953 		if (action == PF_PASS) {
9954 #if PF_ECN
9955 			/* add hints for ecn */
9956 			pd.pf_mtag->pftag_hdr = h;
9957 			/* record address family */
9958 			pd.pf_mtag->pftag_flags &= ~PF_TAG_HDR_INET6;
9959 			pd.pf_mtag->pftag_flags |= PF_TAG_HDR_INET;
9960 #endif /* PF_ECN */
9961 			/* record protocol */
9962 			*pbuf->pb_proto = pd.proto;
9963 
9964 			/*
9965 			 * connections redirected to loopback should not match sockets
9966 			 * bound specifically to loopback due to security implications,
9967 			 * see tcp_input() and in_pcblookup_listen().
9968 			 */
9969 			if (dir == PF_IN && (pd.proto == IPPROTO_TCP ||
9970 			    pd.proto == IPPROTO_UDP) && s != NULL &&
9971 			    s->nat_rule.ptr != NULL &&
9972 			    (s->nat_rule.ptr->action == PF_RDR ||
9973 			    s->nat_rule.ptr->action == PF_BINAT) &&
9974 			    (ntohl(pd.dst->v4addr.s_addr) >> IN_CLASSA_NSHIFT)
9975 			    == IN_LOOPBACKNET) {
9976 				pd.pf_mtag->pftag_flags |= PF_TAG_TRANSLATE_LOCALHOST;
9977 			}
9978 		}
9979 	}
9980 
9981 	if (log) {
9982 		struct pf_rule *lr;
9983 
9984 		if (s != NULL && s->nat_rule.ptr != NULL &&
9985 		    s->nat_rule.ptr->log & PF_LOG_ALL) {
9986 			lr = s->nat_rule.ptr;
9987 		} else {
9988 			lr = r;
9989 		}
9990 		PFLOG_PACKET(kif, h, pbuf, AF_INET, dir, reason, lr, a, ruleset,
9991 		    &pd);
9992 	}
9993 
9994 	kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
9995 	kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS]++;
9996 
9997 	if (action == PF_PASS || r->action == PF_DROP) {
9998 		dirndx = (dir == PF_OUT);
9999 		r->packets[dirndx]++;
10000 		r->bytes[dirndx] += pd.tot_len;
10001 		if (a != NULL) {
10002 			a->packets[dirndx]++;
10003 			a->bytes[dirndx] += pd.tot_len;
10004 		}
10005 		if (s != NULL) {
10006 			sk = s->state_key;
10007 			if (s->nat_rule.ptr != NULL) {
10008 				s->nat_rule.ptr->packets[dirndx]++;
10009 				s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
10010 			}
10011 			if (s->src_node != NULL) {
10012 				s->src_node->packets[dirndx]++;
10013 				s->src_node->bytes[dirndx] += pd.tot_len;
10014 			}
10015 			if (s->nat_src_node != NULL) {
10016 				s->nat_src_node->packets[dirndx]++;
10017 				s->nat_src_node->bytes[dirndx] += pd.tot_len;
10018 			}
10019 			dirndx = (dir == sk->direction) ? 0 : 1;
10020 			s->packets[dirndx]++;
10021 			s->bytes[dirndx] += pd.tot_len;
10022 		}
10023 		tr = r;
10024 		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
10025 		if (nr != NULL) {
10026 			struct pf_addr *x;
10027 			/*
10028 			 * XXX: we need to make sure that the addresses
10029 			 * passed to pfr_update_stats() are the same than
10030 			 * the addresses used during matching (pfr_match)
10031 			 */
10032 			if (r == &pf_default_rule) {
10033 				tr = nr;
10034 				x = (sk == NULL || sk->direction == dir) ?
10035 				    &pd.baddr : &pd.naddr;
10036 			} else {
10037 				x = (sk == NULL || sk->direction == dir) ?
10038 				    &pd.naddr : &pd.baddr;
10039 			}
10040 			if (x == &pd.baddr || s == NULL) {
10041 				/* we need to change the address */
10042 				if (dir == PF_OUT) {
10043 					pd.src = x;
10044 				} else {
10045 					pd.dst = x;
10046 				}
10047 			}
10048 		}
10049 		if (tr->src.addr.type == PF_ADDR_TABLE) {
10050 			pfr_update_stats(tr->src.addr.p.tbl, (sk == NULL ||
10051 			    sk->direction == dir) ?
10052 			    pd.src : pd.dst, pd.af,
10053 			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
10054 			    tr->src.neg);
10055 		}
10056 		if (tr->dst.addr.type == PF_ADDR_TABLE) {
10057 			pfr_update_stats(tr->dst.addr.p.tbl, (sk == NULL ||
10058 			    sk->direction == dir) ? pd.dst : pd.src, pd.af,
10059 			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
10060 			    tr->dst.neg);
10061 		}
10062 	}
10063 
10064 	VERIFY(pbuf == NULL || pd.mp == NULL || pd.mp == pbuf);
10065 
10066 	if (*pbufp) {
10067 		if (pd.lmw < 0) {
10068 			REASON_SET(&reason, PFRES_MEMORY);
10069 			action = PF_DROP;
10070 		}
10071 
10072 		if (action == PF_DROP) {
10073 			pbuf_destroy(*pbufp);
10074 			*pbufp = NULL;
10075 			return PF_DROP;
10076 		}
10077 
10078 		*pbufp = pbuf;
10079 	}
10080 
10081 	if (action == PF_SYNPROXY_DROP) {
10082 		pbuf_destroy(*pbufp);
10083 		*pbufp = NULL;
10084 		action = PF_PASS;
10085 	} else if (r->rt) {
10086 		/* pf_route can free the pbuf causing *pbufp to become NULL */
10087 		pf_route(pbufp, r, dir, kif->pfik_ifp, s, &pd);
10088 	}
10089 
10090 	return action;
10091 }
10092 #endif /* INET */
10093 
10094 #define PF_APPLE_UPDATE_PDESC_IPv6()                            \
10095 	do {                                                    \
10096 	        if (pbuf && pd.mp && pbuf != pd.mp) {           \
10097 	                pbuf = pd.mp;                           \
10098 	        }                                               \
10099 	        h = pbuf->pb_data;                              \
10100 	} while (0)
10101 
10102 int
pf_test6_mbuf(int dir,struct ifnet * ifp,struct mbuf ** m0,struct ether_header * eh,struct ip_fw_args * fwa)10103 pf_test6_mbuf(int dir, struct ifnet *ifp, struct mbuf **m0,
10104     struct ether_header *eh, struct ip_fw_args *fwa)
10105 {
10106 	pbuf_t pbuf_store, *pbuf;
10107 	int rv;
10108 
10109 	pbuf_init_mbuf(&pbuf_store, *m0, (*m0)->m_pkthdr.rcvif);
10110 	pbuf = &pbuf_store;
10111 
10112 	rv = pf_test6(dir, ifp, &pbuf, eh, fwa);
10113 
10114 	if (pbuf_is_valid(pbuf)) {
10115 		*m0 = pbuf->pb_mbuf;
10116 		pbuf->pb_mbuf = NULL;
10117 		pbuf_destroy(pbuf);
10118 	} else {
10119 		*m0 = NULL;
10120 	}
10121 
10122 	return rv;
10123 }
10124 
10125 static __attribute__((noinline)) int
pf_test6(int dir,struct ifnet * ifp,pbuf_t ** pbufp,struct ether_header * eh,struct ip_fw_args * fwa)10126 pf_test6(int dir, struct ifnet *ifp, pbuf_t **pbufp,
10127     struct ether_header *eh, struct ip_fw_args *fwa)
10128 {
10129 #if !DUMMYNET
10130 #pragma unused(fwa)
10131 #endif
10132 	struct pfi_kif          *kif;
10133 	u_short                  action = PF_PASS, reason = 0, log = 0;
10134 	pbuf_t                  *pbuf = *pbufp;
10135 	struct ip6_hdr          *h;
10136 	struct pf_rule          *a = NULL, *r = &pf_default_rule, *tr, *nr;
10137 	struct pf_state         *s = NULL;
10138 	struct pf_state_key     *sk = NULL;
10139 	struct pf_ruleset       *ruleset = NULL;
10140 	struct pf_pdesc          pd;
10141 	int                      off, terminal = 0, dirndx, rh_cnt = 0;
10142 	u_int8_t                 nxt;
10143 	boolean_t                fwd = FALSE;
10144 
10145 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
10146 
10147 	ASSERT(ifp != NULL);
10148 	if ((dir == PF_OUT) && (pbuf->pb_ifp) && (ifp != pbuf->pb_ifp)) {
10149 		fwd = TRUE;
10150 	}
10151 
10152 	if (!pf_status.running) {
10153 		return PF_PASS;
10154 	}
10155 
10156 	memset(&pd, 0, sizeof(pd));
10157 
10158 	if ((pd.pf_mtag = pf_get_mtag_pbuf(pbuf)) == NULL) {
10159 		DPFPRINTF(PF_DEBUG_URGENT,
10160 		    ("pf_test6: pf_get_mtag_pbuf returned NULL\n"));
10161 		return PF_DROP;
10162 	}
10163 
10164 	if (pd.pf_mtag->pftag_flags & PF_TAG_GENERATED) {
10165 		return PF_PASS;
10166 	}
10167 
10168 	kif = (struct pfi_kif *)ifp->if_pf_kif;
10169 
10170 	if (kif == NULL) {
10171 		DPFPRINTF(PF_DEBUG_URGENT,
10172 		    ("pf_test6: kif == NULL, if_name %s\n", ifp->if_name));
10173 		return PF_DROP;
10174 	}
10175 	if (kif->pfik_flags & PFI_IFLAG_SKIP) {
10176 		return PF_PASS;
10177 	}
10178 
10179 	if (pbuf->pb_packet_len < (int)sizeof(*h)) {
10180 		REASON_SET(&reason, PFRES_SHORT);
10181 		return PF_DROP;
10182 	}
10183 
10184 	h = pbuf->pb_data;
10185 	nxt = h->ip6_nxt;
10186 	off = ((caddr_t)h - (caddr_t)pbuf->pb_data) + sizeof(struct ip6_hdr);
10187 	pd.mp = pbuf;
10188 	pd.lmw = 0;
10189 	pd.pf_mtag = pf_get_mtag_pbuf(pbuf);
10190 	pd.src = (struct pf_addr *)(uintptr_t)&h->ip6_src;
10191 	pd.dst = (struct pf_addr *)(uintptr_t)&h->ip6_dst;
10192 	PF_ACPY(&pd.baddr, pd.src, AF_INET6);
10193 	PF_ACPY(&pd.bdaddr, pd.dst, AF_INET6);
10194 	pd.ip_sum = NULL;
10195 	pd.af = AF_INET6;
10196 	pd.proto = nxt;
10197 	pd.proto_variant = 0;
10198 	pd.tos = 0;
10199 	pd.ttl = h->ip6_hlim;
10200 	pd.sc = MBUF_SCIDX(pbuf_get_service_class(pbuf));
10201 	pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
10202 	pd.eh = eh;
10203 
10204 	if (*pbuf->pb_flags & PKTF_FLOW_ID) {
10205 		pd.flowsrc = *pbuf->pb_flowsrc;
10206 		pd.flowhash = *pbuf->pb_flowid;
10207 		pd.pktflags = (*pbuf->pb_flags & PKTF_FLOW_MASK);
10208 	}
10209 
10210 #if DUMMYNET
10211 	if (fwa != NULL && fwa->fwa_pf_rule != NULL) {
10212 		goto nonormalize;
10213 	}
10214 #endif /* DUMMYNET */
10215 
10216 	/* We do IP header normalization and packet reassembly here */
10217 	action = pf_normalize_ip6(pbuf, dir, kif, &reason, &pd);
10218 	if (action != PF_PASS || pd.lmw < 0) {
10219 		action = PF_DROP;
10220 		goto done;
10221 	}
10222 
10223 #if DUMMYNET
10224 nonormalize:
10225 #endif /* DUMMYNET */
10226 	h = pbuf->pb_data;
10227 
10228 	/*
10229 	 * we do not support jumbogram yet.  if we keep going, zero ip6_plen
10230 	 * will do something bad, so drop the packet for now.
10231 	 */
10232 	if (htons(h->ip6_plen) == 0) {
10233 		action = PF_DROP;
10234 		REASON_SET(&reason, PFRES_NORM);        /*XXX*/
10235 		goto done;
10236 	}
10237 	pd.src = (struct pf_addr *)(uintptr_t)&h->ip6_src;
10238 	pd.dst = (struct pf_addr *)(uintptr_t)&h->ip6_dst;
10239 	PF_ACPY(&pd.baddr, pd.src, AF_INET6);
10240 	PF_ACPY(&pd.bdaddr, pd.dst, AF_INET6);
10241 	pd.ip_sum = NULL;
10242 	pd.af = AF_INET6;
10243 	pd.tos = 0;
10244 	pd.ttl = h->ip6_hlim;
10245 	pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
10246 	pd.eh = eh;
10247 
10248 	off = ((caddr_t)h - (caddr_t)pbuf->pb_data) + sizeof(struct ip6_hdr);
10249 	pd.proto = h->ip6_nxt;
10250 	pd.proto_variant = 0;
10251 	pd.mp = pbuf;
10252 	pd.lmw = 0;
10253 	pd.pf_mtag = pf_get_mtag_pbuf(pbuf);
10254 
10255 	do {
10256 		switch (pd.proto) {
10257 		case IPPROTO_FRAGMENT: {
10258 			struct ip6_frag ip6f;
10259 
10260 			pd.flags |= PFDESC_IP_FRAG;
10261 			if (!pf_pull_hdr(pbuf, off, &ip6f, sizeof ip6f, NULL,
10262 			    &reason, pd.af)) {
10263 				DPFPRINTF(PF_DEBUG_MISC,
10264 				    ("pf: IPv6 short fragment header\n"));
10265 				action = PF_DROP;
10266 				REASON_SET(&reason, PFRES_SHORT);
10267 				log = 1;
10268 				goto done;
10269 			}
10270 			pd.proto = ip6f.ip6f_nxt;
10271 #if DUMMYNET
10272 			/* Traffic goes through dummynet first */
10273 			action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd,
10274 			    fwa);
10275 			if (action == PF_DROP || pbuf == NULL) {
10276 				*pbufp = NULL;
10277 				return action;
10278 			}
10279 #endif /* DUMMYNET */
10280 			action = pf_test_fragment(&r, dir, kif, pbuf, h, &pd,
10281 			    &a, &ruleset);
10282 			if (action == PF_DROP) {
10283 				REASON_SET(&reason, PFRES_FRAG);
10284 				log = 1;
10285 			}
10286 			goto done;
10287 		}
10288 		case IPPROTO_ROUTING:
10289 			++rh_cnt;
10290 			OS_FALLTHROUGH;
10291 
10292 		case IPPROTO_AH:
10293 		case IPPROTO_HOPOPTS:
10294 		case IPPROTO_DSTOPTS: {
10295 			/* get next header and header length */
10296 			struct ip6_ext  opt6;
10297 
10298 			if (!pf_pull_hdr(pbuf, off, &opt6, sizeof(opt6),
10299 			    NULL, &reason, pd.af)) {
10300 				DPFPRINTF(PF_DEBUG_MISC,
10301 				    ("pf: IPv6 short opt\n"));
10302 				action = PF_DROP;
10303 				log = 1;
10304 				goto done;
10305 			}
10306 			if (pd.proto == IPPROTO_AH) {
10307 				off += (opt6.ip6e_len + 2) * 4;
10308 			} else {
10309 				off += (opt6.ip6e_len + 1) * 8;
10310 			}
10311 			pd.proto = opt6.ip6e_nxt;
10312 			/* goto the next header */
10313 			break;
10314 		}
10315 		default:
10316 			terminal++;
10317 			break;
10318 		}
10319 	} while (!terminal);
10320 
10321 
10322 	switch (pd.proto) {
10323 	case IPPROTO_TCP: {
10324 		struct tcphdr   th;
10325 
10326 		pd.hdr.tcp = &th;
10327 		if (!pf_pull_hdr(pbuf, off, &th, sizeof(th),
10328 		    &action, &reason, AF_INET6)) {
10329 			log = action != PF_PASS;
10330 			goto done;
10331 		}
10332 		pd.p_len = pd.tot_len - off - (th.th_off << 2);
10333 #if DUMMYNET
10334 		/* Traffic goes through dummynet first */
10335 		action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10336 		if (action == PF_DROP || pbuf == NULL) {
10337 			*pbufp = NULL;
10338 			return action;
10339 		}
10340 #endif /* DUMMYNET */
10341 		action = pf_normalize_tcp(dir, kif, pbuf, 0, off, h, &pd);
10342 		if (pd.lmw < 0) {
10343 			goto done;
10344 		}
10345 		PF_APPLE_UPDATE_PDESC_IPv6();
10346 		if (action == PF_DROP) {
10347 			goto done;
10348 		}
10349 		if (th.th_sport == 0 || th.th_dport == 0) {
10350 			action = PF_DROP;
10351 			REASON_SET(&reason, PFRES_INVPORT);
10352 			goto done;
10353 		}
10354 		action = pf_test_state_tcp(&s, dir, kif, pbuf, off, h, &pd,
10355 		    &reason);
10356 		if (action == PF_NAT64) {
10357 			goto done;
10358 		}
10359 		if (pd.lmw < 0) {
10360 			goto done;
10361 		}
10362 		PF_APPLE_UPDATE_PDESC_IPv6();
10363 		if (action == PF_PASS) {
10364 #if NPFSYNC
10365 			pfsync_update_state(s);
10366 #endif /* NPFSYNC */
10367 			r = s->rule.ptr;
10368 			a = s->anchor.ptr;
10369 			log = s->log;
10370 		} else if (s == NULL) {
10371 			action = pf_test_rule(&r, &s, dir, kif,
10372 			    pbuf, off, h, &pd, &a, &ruleset, NULL);
10373 		}
10374 		break;
10375 	}
10376 
10377 	case IPPROTO_UDP: {
10378 		struct udphdr   uh;
10379 
10380 		pd.hdr.udp = &uh;
10381 		if (!pf_pull_hdr(pbuf, off, &uh, sizeof(uh),
10382 		    &action, &reason, AF_INET6)) {
10383 			log = action != PF_PASS;
10384 			goto done;
10385 		}
10386 		if (uh.uh_sport == 0 || uh.uh_dport == 0) {
10387 			action = PF_DROP;
10388 			REASON_SET(&reason, PFRES_INVPORT);
10389 			goto done;
10390 		}
10391 		if (ntohs(uh.uh_ulen) > pbuf->pb_packet_len - off ||
10392 		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
10393 			action = PF_DROP;
10394 			REASON_SET(&reason, PFRES_SHORT);
10395 			goto done;
10396 		}
10397 #if DUMMYNET
10398 		/* Traffic goes through dummynet first */
10399 		action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10400 		if (action == PF_DROP || pbuf == NULL) {
10401 			*pbufp = NULL;
10402 			return action;
10403 		}
10404 #endif /* DUMMYNET */
10405 		action = pf_test_state_udp(&s, dir, kif, pbuf, off, h, &pd,
10406 		    &reason);
10407 		if (action == PF_NAT64) {
10408 			goto done;
10409 		}
10410 		if (pd.lmw < 0) {
10411 			goto done;
10412 		}
10413 		PF_APPLE_UPDATE_PDESC_IPv6();
10414 		if (action == PF_PASS) {
10415 #if NPFSYNC
10416 			pfsync_update_state(s);
10417 #endif /* NPFSYNC */
10418 			r = s->rule.ptr;
10419 			a = s->anchor.ptr;
10420 			log = s->log;
10421 		} else if (s == NULL) {
10422 			action = pf_test_rule(&r, &s, dir, kif,
10423 			    pbuf, off, h, &pd, &a, &ruleset, NULL);
10424 		}
10425 		break;
10426 	}
10427 
10428 	case IPPROTO_ICMPV6: {
10429 		struct icmp6_hdr        ih;
10430 
10431 		pd.hdr.icmp6 = &ih;
10432 		if (!pf_pull_hdr(pbuf, off, &ih, sizeof(ih),
10433 		    &action, &reason, AF_INET6)) {
10434 			log = action != PF_PASS;
10435 			goto done;
10436 		}
10437 #if DUMMYNET
10438 		/* Traffic goes through dummynet first */
10439 		action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10440 		if (action == PF_DROP || pbuf == NULL) {
10441 			*pbufp = NULL;
10442 			return action;
10443 		}
10444 #endif /* DUMMYNET */
10445 		action = pf_test_state_icmp(&s, dir, kif,
10446 		    pbuf, off, h, &pd, &reason);
10447 		if (action == PF_NAT64) {
10448 			goto done;
10449 		}
10450 		if (pd.lmw < 0) {
10451 			goto done;
10452 		}
10453 		PF_APPLE_UPDATE_PDESC_IPv6();
10454 		if (action == PF_PASS) {
10455 #if NPFSYNC
10456 			pfsync_update_state(s);
10457 #endif /* NPFSYNC */
10458 			r = s->rule.ptr;
10459 			a = s->anchor.ptr;
10460 			log = s->log;
10461 		} else if (s == NULL) {
10462 			action = pf_test_rule(&r, &s, dir, kif,
10463 			    pbuf, off, h, &pd, &a, &ruleset, NULL);
10464 		}
10465 		break;
10466 	}
10467 
10468 	case IPPROTO_ESP: {
10469 		struct pf_esp_hdr       esp;
10470 
10471 		pd.hdr.esp = &esp;
10472 		if (!pf_pull_hdr(pbuf, off, &esp, sizeof(esp), &action,
10473 		    &reason, AF_INET6)) {
10474 			log = action != PF_PASS;
10475 			goto done;
10476 		}
10477 #if DUMMYNET
10478 		/* Traffic goes through dummynet first */
10479 		action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10480 		if (action == PF_DROP || pbuf == NULL) {
10481 			*pbufp = NULL;
10482 			return action;
10483 		}
10484 #endif /* DUMMYNET */
10485 		action = pf_test_state_esp(&s, dir, kif, off, &pd);
10486 		if (pd.lmw < 0) {
10487 			goto done;
10488 		}
10489 		PF_APPLE_UPDATE_PDESC_IPv6();
10490 		if (action == PF_PASS) {
10491 #if NPFSYNC
10492 			pfsync_update_state(s);
10493 #endif /* NPFSYNC */
10494 			r = s->rule.ptr;
10495 			a = s->anchor.ptr;
10496 			log = s->log;
10497 		} else if (s == NULL) {
10498 			action = pf_test_rule(&r, &s, dir, kif,
10499 			    pbuf, off, h, &pd, &a, &ruleset, NULL);
10500 		}
10501 		break;
10502 	}
10503 
10504 	case IPPROTO_GRE: {
10505 		struct pf_grev1_hdr     grev1;
10506 
10507 		pd.hdr.grev1 = &grev1;
10508 		if (!pf_pull_hdr(pbuf, off, &grev1, sizeof(grev1), &action,
10509 		    &reason, AF_INET6)) {
10510 			log = (action != PF_PASS);
10511 			goto done;
10512 		}
10513 #if DUMMYNET
10514 		/* Traffic goes through dummynet first */
10515 		action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10516 		if (action == PF_DROP || pbuf == NULL) {
10517 			*pbufp = NULL;
10518 			return action;
10519 		}
10520 #endif /* DUMMYNET */
10521 		if ((ntohs(grev1.flags) & PF_GRE_FLAG_VERSION_MASK) == 1 &&
10522 		    ntohs(grev1.protocol_type) == PF_GRE_PPP_ETHERTYPE) {
10523 			if (ntohs(grev1.payload_length) >
10524 			    pbuf->pb_packet_len - off) {
10525 				action = PF_DROP;
10526 				REASON_SET(&reason, PFRES_SHORT);
10527 				goto done;
10528 			}
10529 			action = pf_test_state_grev1(&s, dir, kif, off, &pd);
10530 			if (pd.lmw < 0) {
10531 				goto done;
10532 			}
10533 			PF_APPLE_UPDATE_PDESC_IPv6();
10534 			if (action == PF_PASS) {
10535 #if NPFSYNC
10536 				pfsync_update_state(s);
10537 #endif /* NPFSYNC */
10538 				r = s->rule.ptr;
10539 				a = s->anchor.ptr;
10540 				log = s->log;
10541 				break;
10542 			} else if (s == NULL) {
10543 				action = pf_test_rule(&r, &s, dir, kif, pbuf,
10544 				    off, h, &pd, &a, &ruleset, NULL);
10545 				if (action == PF_PASS) {
10546 					break;
10547 				}
10548 			}
10549 		}
10550 
10551 		/* not GREv1/PPTP, so treat as ordinary GRE... */
10552 		OS_FALLTHROUGH; /* XXX is this correct? */
10553 	}
10554 
10555 	default:
10556 #if DUMMYNET
10557 		/* Traffic goes through dummynet first */
10558 		action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10559 		if (action == PF_DROP || pbuf == NULL) {
10560 			*pbufp = NULL;
10561 			return action;
10562 		}
10563 #endif /* DUMMYNET */
10564 		action = pf_test_state_other(&s, dir, kif, &pd);
10565 		if (pd.lmw < 0) {
10566 			goto done;
10567 		}
10568 		PF_APPLE_UPDATE_PDESC_IPv6();
10569 		if (action == PF_PASS) {
10570 #if NPFSYNC
10571 			pfsync_update_state(s);
10572 #endif /* NPFSYNC */
10573 			r = s->rule.ptr;
10574 			a = s->anchor.ptr;
10575 			log = s->log;
10576 		} else if (s == NULL) {
10577 			action = pf_test_rule(&r, &s, dir, kif, pbuf, off, h,
10578 			    &pd, &a, &ruleset, NULL);
10579 		}
10580 		break;
10581 	}
10582 
10583 done:
10584 	if (action == PF_NAT64) {
10585 		*pbufp = NULL;
10586 		return action;
10587 	}
10588 
10589 	*pbufp = pd.mp;
10590 	PF_APPLE_UPDATE_PDESC_IPv6();
10591 
10592 	/* handle dangerous IPv6 extension headers. */
10593 	if (action != PF_DROP) {
10594 		if (action == PF_PASS && rh_cnt &&
10595 		    !((s && s->allow_opts) || r->allow_opts)) {
10596 			action = PF_DROP;
10597 			REASON_SET(&reason, PFRES_IPOPTIONS);
10598 			log = 1;
10599 			DPFPRINTF(PF_DEBUG_MISC,
10600 			    ("pf: dropping packet with dangerous v6addr headers\n"));
10601 		}
10602 
10603 		if ((s && s->tag) || PF_RTABLEID_IS_VALID(r->rtableid) ||
10604 		    (pd.pktflags & PKTF_FLOW_ID)) {
10605 			(void) pf_tag_packet(pbuf, pd.pf_mtag, s ? s->tag : 0,
10606 			    r->rtableid, &pd);
10607 		}
10608 
10609 		if (action == PF_PASS) {
10610 #if PF_ECN
10611 			/* add hints for ecn */
10612 			pd.pf_mtag->pftag_hdr = h;
10613 			/* record address family */
10614 			pd.pf_mtag->pftag_flags &= ~PF_TAG_HDR_INET;
10615 			pd.pf_mtag->pftag_flags |= PF_TAG_HDR_INET6;
10616 #endif /* PF_ECN */
10617 			/* record protocol */
10618 			*pbuf->pb_proto = pd.proto;
10619 			if (dir == PF_IN && (pd.proto == IPPROTO_TCP ||
10620 			    pd.proto == IPPROTO_UDP) && s != NULL &&
10621 			    s->nat_rule.ptr != NULL &&
10622 			    (s->nat_rule.ptr->action == PF_RDR ||
10623 			    s->nat_rule.ptr->action == PF_BINAT) &&
10624 			    IN6_IS_ADDR_LOOPBACK(&pd.dst->v6addr)) {
10625 				pd.pf_mtag->pftag_flags |= PF_TAG_TRANSLATE_LOCALHOST;
10626 			}
10627 		}
10628 	}
10629 
10630 
10631 	if (log) {
10632 		struct pf_rule *lr;
10633 
10634 		if (s != NULL && s->nat_rule.ptr != NULL &&
10635 		    s->nat_rule.ptr->log & PF_LOG_ALL) {
10636 			lr = s->nat_rule.ptr;
10637 		} else {
10638 			lr = r;
10639 		}
10640 		PFLOG_PACKET(kif, h, pbuf, AF_INET6, dir, reason, lr, a, ruleset,
10641 		    &pd);
10642 	}
10643 
10644 	kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
10645 	kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS]++;
10646 
10647 	if (action == PF_PASS || r->action == PF_DROP) {
10648 		dirndx = (dir == PF_OUT);
10649 		r->packets[dirndx]++;
10650 		r->bytes[dirndx] += pd.tot_len;
10651 		if (a != NULL) {
10652 			a->packets[dirndx]++;
10653 			a->bytes[dirndx] += pd.tot_len;
10654 		}
10655 		if (s != NULL) {
10656 			sk = s->state_key;
10657 			if (s->nat_rule.ptr != NULL) {
10658 				s->nat_rule.ptr->packets[dirndx]++;
10659 				s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
10660 			}
10661 			if (s->src_node != NULL) {
10662 				s->src_node->packets[dirndx]++;
10663 				s->src_node->bytes[dirndx] += pd.tot_len;
10664 			}
10665 			if (s->nat_src_node != NULL) {
10666 				s->nat_src_node->packets[dirndx]++;
10667 				s->nat_src_node->bytes[dirndx] += pd.tot_len;
10668 			}
10669 			dirndx = (dir == sk->direction) ? 0 : 1;
10670 			s->packets[dirndx]++;
10671 			s->bytes[dirndx] += pd.tot_len;
10672 		}
10673 		tr = r;
10674 		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
10675 		if (nr != NULL) {
10676 			struct pf_addr *x;
10677 			/*
10678 			 * XXX: we need to make sure that the addresses
10679 			 * passed to pfr_update_stats() are the same than
10680 			 * the addresses used during matching (pfr_match)
10681 			 */
10682 			if (r == &pf_default_rule) {
10683 				tr = nr;
10684 				x = (s == NULL || sk->direction == dir) ?
10685 				    &pd.baddr : &pd.naddr;
10686 			} else {
10687 				x = (s == NULL || sk->direction == dir) ?
10688 				    &pd.naddr : &pd.baddr;
10689 			}
10690 			if (x == &pd.baddr || s == NULL) {
10691 				if (dir == PF_OUT) {
10692 					pd.src = x;
10693 				} else {
10694 					pd.dst = x;
10695 				}
10696 			}
10697 		}
10698 		if (tr->src.addr.type == PF_ADDR_TABLE) {
10699 			pfr_update_stats(tr->src.addr.p.tbl, (sk == NULL ||
10700 			    sk->direction == dir) ? pd.src : pd.dst, pd.af,
10701 			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
10702 			    tr->src.neg);
10703 		}
10704 		if (tr->dst.addr.type == PF_ADDR_TABLE) {
10705 			pfr_update_stats(tr->dst.addr.p.tbl, (sk == NULL ||
10706 			    sk->direction == dir) ? pd.dst : pd.src, pd.af,
10707 			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
10708 			    tr->dst.neg);
10709 		}
10710 	}
10711 
10712 	VERIFY(pbuf == NULL || pd.mp == NULL || pd.mp == pbuf);
10713 
10714 	if (*pbufp) {
10715 		if (pd.lmw < 0) {
10716 			REASON_SET(&reason, PFRES_MEMORY);
10717 			action = PF_DROP;
10718 		}
10719 
10720 		if (action == PF_DROP) {
10721 			pbuf_destroy(*pbufp);
10722 			*pbufp = NULL;
10723 			return PF_DROP;
10724 		}
10725 
10726 		*pbufp = pbuf;
10727 	}
10728 
10729 	if (action == PF_SYNPROXY_DROP) {
10730 		pbuf_destroy(*pbufp);
10731 		*pbufp = NULL;
10732 		action = PF_PASS;
10733 	} else if (r->rt) {
10734 		/* pf_route6 can free the mbuf causing *pbufp to become NULL */
10735 		pf_route6(pbufp, r, dir, kif->pfik_ifp, s, &pd);
10736 	}
10737 
10738 	/* if reassembled packet passed, create new fragments */
10739 	struct pf_fragment_tag *ftag = NULL;
10740 	if ((action == PF_PASS) && (*pbufp != NULL) && (fwd) &&
10741 	    ((ftag = pf_find_fragment_tag_pbuf(*pbufp)) != NULL)) {
10742 		action = pf_refragment6(ifp, pbufp, ftag);
10743 	}
10744 	return action;
10745 }
10746 
10747 static int
pf_check_congestion(struct ifqueue * ifq)10748 pf_check_congestion(struct ifqueue *ifq)
10749 {
10750 #pragma unused(ifq)
10751 	return 0;
10752 }
10753 
10754 void
pool_init(struct pool * pp,size_t size,unsigned int align,unsigned int ioff,int flags,const char * wchan,void * palloc)10755 pool_init(struct pool *pp, size_t size, unsigned int align, unsigned int ioff,
10756     int flags, const char *wchan, void *palloc)
10757 {
10758 #pragma unused(align, ioff, flags, palloc)
10759 	bzero(pp, sizeof(*pp));
10760 	pp->pool_zone = zone_create(wchan, size,
10761 	    ZC_PGZ_USE_GUARDS | ZC_ZFREE_CLEARMEM);
10762 	pp->pool_hiwat = pp->pool_limit = (unsigned int)-1;
10763 	pp->pool_name = wchan;
10764 }
10765 
10766 /* Zones cannot be currently destroyed */
10767 void
pool_destroy(struct pool * pp)10768 pool_destroy(struct pool *pp)
10769 {
10770 #pragma unused(pp)
10771 }
10772 
10773 void
pool_sethiwat(struct pool * pp,int n)10774 pool_sethiwat(struct pool *pp, int n)
10775 {
10776 	pp->pool_hiwat = n;     /* Currently unused */
10777 }
10778 
10779 void
pool_sethardlimit(struct pool * pp,int n,const char * warnmess,int ratecap)10780 pool_sethardlimit(struct pool *pp, int n, const char *warnmess, int ratecap)
10781 {
10782 #pragma unused(warnmess, ratecap)
10783 	pp->pool_limit = n;
10784 }
10785 
10786 void *
pool_get(struct pool * pp,int flags)10787 pool_get(struct pool *pp, int flags)
10788 {
10789 	void *buf;
10790 
10791 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
10792 
10793 	if (pp->pool_count > pp->pool_limit) {
10794 		DPFPRINTF(PF_DEBUG_NOISY,
10795 		    ("pf: pool %s hard limit reached (%d)\n",
10796 		    pp->pool_name != NULL ? pp->pool_name : "unknown",
10797 		    pp->pool_limit));
10798 		pp->pool_fails++;
10799 		return NULL;
10800 	}
10801 
10802 	buf = zalloc_flags(pp->pool_zone,
10803 	    (flags & PR_WAITOK) ? Z_WAITOK : Z_NOWAIT);
10804 	if (buf != NULL) {
10805 		pp->pool_count++;
10806 		VERIFY(pp->pool_count != 0);
10807 	}
10808 	return buf;
10809 }
10810 
10811 void
pool_put(struct pool * pp,void * v)10812 pool_put(struct pool *pp, void *v)
10813 {
10814 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
10815 
10816 	zfree(pp->pool_zone, v);
10817 	VERIFY(pp->pool_count != 0);
10818 	pp->pool_count--;
10819 }
10820 
10821 struct pf_mtag *
pf_find_mtag_pbuf(pbuf_t * pbuf)10822 pf_find_mtag_pbuf(pbuf_t *pbuf)
10823 {
10824 	return pbuf->pb_pftag;
10825 }
10826 
10827 struct pf_mtag *
pf_find_mtag(struct mbuf * m)10828 pf_find_mtag(struct mbuf *m)
10829 {
10830 	return m_pftag(m);
10831 }
10832 
10833 struct pf_mtag *
pf_get_mtag(struct mbuf * m)10834 pf_get_mtag(struct mbuf *m)
10835 {
10836 	return pf_find_mtag(m);
10837 }
10838 
10839 struct pf_mtag *
pf_get_mtag_pbuf(pbuf_t * pbuf)10840 pf_get_mtag_pbuf(pbuf_t *pbuf)
10841 {
10842 	return pf_find_mtag_pbuf(pbuf);
10843 }
10844 
10845 struct pf_fragment_tag *
pf_copy_fragment_tag(struct mbuf * m,struct pf_fragment_tag * ftag,int how)10846 pf_copy_fragment_tag(struct mbuf *m, struct pf_fragment_tag *ftag, int how)
10847 {
10848 	struct m_tag *tag;
10849 	struct pf_mtag *pftag = pf_find_mtag(m);
10850 
10851 	tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_PF_REASS,
10852 	    sizeof(*ftag), how, m);
10853 	if (tag == NULL) {
10854 		return NULL;
10855 	} else {
10856 		m_tag_prepend(m, tag);
10857 		tag = tag + 1;
10858 	}
10859 	bcopy(ftag, tag, sizeof(*ftag));
10860 	pftag->pftag_flags |= PF_TAG_REASSEMBLED;
10861 	return (struct pf_fragment_tag *)tag;
10862 }
10863 
10864 struct pf_fragment_tag *
pf_find_fragment_tag(struct mbuf * m)10865 pf_find_fragment_tag(struct mbuf *m)
10866 {
10867 	struct m_tag *tag;
10868 	struct pf_fragment_tag *ftag;
10869 	struct pf_mtag *pftag = pf_find_mtag(m);
10870 
10871 	tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_PF_REASS,
10872 	    NULL);
10873 	VERIFY((tag == NULL) || (pftag->pftag_flags & PF_TAG_REASSEMBLED));
10874 	if (tag != NULL) {
10875 		tag = tag + 1;
10876 	}
10877 	ftag = (struct pf_fragment_tag *)tag;
10878 	return ftag;
10879 }
10880 
10881 struct pf_fragment_tag *
pf_find_fragment_tag_pbuf(pbuf_t * pbuf)10882 pf_find_fragment_tag_pbuf(pbuf_t *pbuf)
10883 {
10884 	struct pf_mtag *mtag = pf_find_mtag_pbuf(pbuf);
10885 
10886 	return (mtag->pftag_flags & PF_TAG_REASSEMBLED) ?
10887 	       pbuf->pb_pf_fragtag : NULL;
10888 }
10889 
10890 uint64_t
pf_time_second(void)10891 pf_time_second(void)
10892 {
10893 	struct timeval t;
10894 
10895 	microuptime(&t);
10896 	return t.tv_sec;
10897 }
10898 
10899 uint64_t
pf_calendar_time_second(void)10900 pf_calendar_time_second(void)
10901 {
10902 	struct timeval t;
10903 
10904 	getmicrotime(&t);
10905 	return t.tv_sec;
10906 }
10907 
10908 static void *
hook_establish(struct hook_desc_head * head,int tail,hook_fn_t fn,void * arg)10909 hook_establish(struct hook_desc_head *head, int tail, hook_fn_t fn, void *arg)
10910 {
10911 	struct hook_desc *hd;
10912 
10913 	hd = kalloc_type(struct hook_desc, Z_WAITOK | Z_NOFAIL);
10914 
10915 	hd->hd_fn = fn;
10916 	hd->hd_arg = arg;
10917 	if (tail) {
10918 		TAILQ_INSERT_TAIL(head, hd, hd_list);
10919 	} else {
10920 		TAILQ_INSERT_HEAD(head, hd, hd_list);
10921 	}
10922 
10923 	return hd;
10924 }
10925 
10926 static void
hook_runloop(struct hook_desc_head * head,int flags)10927 hook_runloop(struct hook_desc_head *head, int flags)
10928 {
10929 	struct hook_desc *hd;
10930 
10931 	if (!(flags & HOOK_REMOVE)) {
10932 		if (!(flags & HOOK_ABORT)) {
10933 			TAILQ_FOREACH(hd, head, hd_list)
10934 			hd->hd_fn(hd->hd_arg);
10935 		}
10936 	} else {
10937 		while (!!(hd = TAILQ_FIRST(head))) {
10938 			TAILQ_REMOVE(head, hd, hd_list);
10939 			if (!(flags & HOOK_ABORT)) {
10940 				hd->hd_fn(hd->hd_arg);
10941 			}
10942 			if (flags & HOOK_FREE) {
10943 				kfree_type(struct hook_desc, hd);
10944 			}
10945 		}
10946 	}
10947 }
10948 
10949 #if defined(SKYWALK) && defined(XNU_TARGET_OS_OSX)
10950 static bool
pf_check_compatible_anchor(const char * anchor_path)10951 pf_check_compatible_anchor(const char *anchor_path)
10952 {
10953 	// Whitelist reserved anchor
10954 	if (strncmp(anchor_path, PF_RESERVED_ANCHOR, MAXPATHLEN) == 0) {
10955 		return true;
10956 	}
10957 
10958 	// Whitelist com.apple anchor
10959 	if (strncmp(anchor_path, "com.apple", MAXPATHLEN) == 0) {
10960 		return true;
10961 	}
10962 
10963 	for (int i = 0; i < sizeof(compatible_anchors) / sizeof(compatible_anchors[0]); i++) {
10964 		const char *ptr = strnstr(anchor_path, compatible_anchors[i], MAXPATHLEN);
10965 		if (ptr != NULL && ptr == anchor_path) {
10966 			return true;
10967 		}
10968 	}
10969 
10970 	return false;
10971 }
10972 
10973 bool
pf_check_compatible_rules(void)10974 pf_check_compatible_rules(void)
10975 {
10976 	struct pf_anchor *anchor = NULL;
10977 	struct pf_rule *rule = NULL;
10978 
10979 	// Check whitelisted anchors
10980 	RB_FOREACH(anchor, pf_anchor_global, &pf_anchors) {
10981 		if (!pf_check_compatible_anchor(anchor->path)) {
10982 			if (pf_status.debug >= PF_DEBUG_MISC) {
10983 				printf("pf anchor %s not compatible\n", anchor->path);
10984 			}
10985 			return false;
10986 		}
10987 	}
10988 
10989 	// Check rules in main ruleset
10990 	for (int i = PF_RULESET_SCRUB; i < PF_RULESET_MAX; i++) {
10991 		TAILQ_FOREACH(rule, pf_main_ruleset.rules[i].active.ptr, entries) {
10992 			if (rule->anchor == NULL) {
10993 				if (pf_status.debug >= PF_DEBUG_MISC) {
10994 					printf("main ruleset contains rules\n");
10995 				}
10996 				return false;
10997 			}
10998 		}
10999 	}
11000 
11001 	return true;
11002 }
11003 #endif // SKYWALK && defined(XNU_TARGET_OS_OSX)
11004