xref: /xnu-8792.81.2/bsd/net/pf.c (revision 19c3b8c28c31cb8130e034cfb5df6bf9ba342d90)
1 /*
2  * Copyright (c) 2007-2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*	$apfw: git commit 6602420f2f101b74305cd78f7cd9e0c8fdedae97 $ */
30 /*	$OpenBSD: pf.c,v 1.567 2008/02/20 23:40:13 henning Exp $ */
31 
32 /*
33  * Copyright (c) 2001 Daniel Hartmeier
34  * Copyright (c) 2002 - 2013 Henning Brauer
35  * NAT64 - Copyright (c) 2010 Viagenie Inc. (http://www.viagenie.ca)
36  * All rights reserved.
37  *
38  * Redistribution and use in source and binary forms, with or without
39  * modification, are permitted provided that the following conditions
40  * are met:
41  *
42  *    - Redistributions of source code must retain the above copyright
43  *      notice, this list of conditions and the following disclaimer.
44  *    - Redistributions in binary form must reproduce the above
45  *      copyright notice, this list of conditions and the following
46  *      disclaimer in the documentation and/or other materials provided
47  *      with the distribution.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
50  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
51  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
52  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
53  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
54  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
55  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
56  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
57  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
59  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
60  * POSSIBILITY OF SUCH DAMAGE.
61  *
62  * Effort sponsored in part by the Defense Advanced Research Projects
63  * Agency (DARPA) and Air Force Research Laboratory, Air Force
64  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
65  *
66  */
67 
68 #include <machine/endian.h>
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/filio.h>
72 #include <sys/socket.h>
73 #include <sys/socketvar.h>
74 #include <sys/kernel.h>
75 #include <sys/time.h>
76 #include <sys/proc.h>
77 #include <sys/random.h>
78 #include <sys/mcache.h>
79 #include <sys/protosw.h>
80 
81 #include <libkern/crypto/md5.h>
82 #include <libkern/libkern.h>
83 
84 #include <mach/thread_act.h>
85 
86 #include <net/if.h>
87 #include <net/if_types.h>
88 #include <net/bpf.h>
89 #include <net/route.h>
90 #include <net/dlil.h>
91 
92 #include <netinet/in.h>
93 #include <netinet/in_var.h>
94 #include <netinet/in_systm.h>
95 #include <netinet/ip.h>
96 #include <netinet/ip_var.h>
97 #include <netinet/tcp.h>
98 #include <netinet/tcp_seq.h>
99 #include <netinet/udp.h>
100 #include <netinet/ip_icmp.h>
101 #include <netinet/in_pcb.h>
102 #include <netinet/tcp_timer.h>
103 #include <netinet/tcp_var.h>
104 #include <netinet/tcp_fsm.h>
105 #include <netinet/udp_var.h>
106 #include <netinet/icmp_var.h>
107 #include <net/if_ether.h>
108 #include <net/ethernet.h>
109 #include <net/flowhash.h>
110 #include <net/nat464_utils.h>
111 #include <net/pfvar.h>
112 #include <net/if_pflog.h>
113 
114 #if NPFSYNC
115 #include <net/if_pfsync.h>
116 #endif /* NPFSYNC */
117 
118 #include <netinet/ip6.h>
119 #include <netinet6/in6_pcb.h>
120 #include <netinet6/ip6_var.h>
121 #include <netinet/icmp6.h>
122 #include <netinet6/nd6.h>
123 
124 #if DUMMYNET
125 #include <netinet/ip_dummynet.h>
126 #endif /* DUMMYNET */
127 
128 #if SKYWALK
129 #include <skywalk/namespace/flowidns.h>
130 #endif /* SKYWALK */
131 
132 /*
133  * For RandomULong(), to get a 32 bits random value
134  * Note that random() returns a 31 bits value, see rdar://11159750
135  */
136 #include <dev/random/randomdev.h>
137 
138 #define DPFPRINTF(n, x) (pf_status.debug >= (n) ? printf x : ((void)0))
139 
140 /*
141  * On Mac OS X, the rtableid value is treated as the interface scope
142  * value that is equivalent to the interface index used for scoped
143  * routing.  A valid scope value is anything but IFSCOPE_NONE (0),
144  * as per definition of ifindex which is a positive, non-zero number.
145  * The other BSDs treat a negative rtableid value as invalid, hence
146  * the test against INT_MAX to handle userland apps which initialize
147  * the field with a negative number.
148  */
149 #define PF_RTABLEID_IS_VALID(r) \
150 	((r) > IFSCOPE_NONE && (r) <= INT_MAX)
151 
152 /*
153  * Global variables
154  */
155 static LCK_GRP_DECLARE(pf_lock_grp, "pf");
156 LCK_MTX_DECLARE(pf_lock, &pf_lock_grp);
157 
158 static LCK_GRP_DECLARE(pf_perim_lock_grp, "pf_perim");
159 LCK_RW_DECLARE(pf_perim_lock, &pf_perim_lock_grp);
160 
161 /* state tables */
162 struct pf_state_tree_lan_ext     pf_statetbl_lan_ext;
163 struct pf_state_tree_ext_gwy     pf_statetbl_ext_gwy;
164 
165 struct pf_palist         pf_pabuf;
166 struct pf_status         pf_status;
167 
168 u_int32_t                ticket_pabuf;
169 
170 static MD5_CTX           pf_tcp_secret_ctx;
171 static u_char            pf_tcp_secret[16];
172 static int               pf_tcp_secret_init;
173 static int               pf_tcp_iss_off;
174 
175 static struct pf_anchor_stackframe {
176 	struct pf_ruleset                       *rs;
177 	struct pf_rule                          *r;
178 	struct pf_anchor_node                   *parent;
179 	struct pf_anchor                        *child;
180 } pf_anchor_stack[64];
181 
182 struct pool              pf_src_tree_pl, pf_rule_pl, pf_pooladdr_pl;
183 struct pool              pf_state_pl, pf_state_key_pl;
184 
185 typedef void (*hook_fn_t)(void *);
186 
187 struct hook_desc {
188 	TAILQ_ENTRY(hook_desc) hd_list;
189 	hook_fn_t hd_fn;
190 	void *hd_arg;
191 };
192 
193 #define HOOK_REMOVE     0x01
194 #define HOOK_FREE       0x02
195 #define HOOK_ABORT      0x04
196 
197 static void             *hook_establish(struct hook_desc_head *, int,
198     hook_fn_t, void *);
199 static void             hook_runloop(struct hook_desc_head *, int flags);
200 
201 struct pool              pf_app_state_pl;
202 static void              pf_print_addr(struct pf_addr *addr, sa_family_t af);
203 static void              pf_print_sk_host(struct pf_state_host *, u_int8_t, int,
204     u_int8_t);
205 
206 static void              pf_print_host(struct pf_addr *, u_int16_t, u_int8_t);
207 
208 static void              pf_init_threshold(struct pf_threshold *, u_int32_t,
209     u_int32_t);
210 static void              pf_add_threshold(struct pf_threshold *);
211 static int               pf_check_threshold(struct pf_threshold *);
212 
213 static void              pf_change_ap(int, pbuf_t *, struct pf_addr *,
214     u_int16_t *, u_int16_t *, u_int16_t *,
215     struct pf_addr *, u_int16_t, u_int8_t, sa_family_t,
216     sa_family_t, int);
217 static int               pf_modulate_sack(pbuf_t *, int, struct pf_pdesc *,
218     struct tcphdr *, struct pf_state_peer *);
219 static void              pf_change_a6(struct pf_addr *, u_int16_t *,
220     struct pf_addr *, u_int8_t);
221 static void pf_change_addr(struct pf_addr *a, u_int16_t *c, struct pf_addr *an,
222     u_int8_t u, sa_family_t af, sa_family_t afn);
223 static void              pf_change_icmp(struct pf_addr *, u_int16_t *,
224     struct pf_addr *, struct pf_addr *, u_int16_t,
225     u_int16_t *, u_int16_t *, u_int16_t *,
226     u_int16_t *, u_int8_t, sa_family_t);
227 static void              pf_send_tcp(const struct pf_rule *, sa_family_t,
228     const struct pf_addr *, const struct pf_addr *,
229     u_int16_t, u_int16_t, u_int32_t, u_int32_t,
230     u_int8_t, u_int16_t, u_int16_t, u_int8_t, int,
231     u_int16_t, struct ether_header *, struct ifnet *);
232 static void              pf_send_icmp(pbuf_t *, u_int8_t, u_int8_t,
233     sa_family_t, struct pf_rule *);
234 static struct pf_rule   *pf_match_translation(struct pf_pdesc *, pbuf_t *,
235     int, int, struct pfi_kif *, struct pf_addr *,
236     union pf_state_xport *, struct pf_addr *,
237     union pf_state_xport *, int);
238 static struct pf_rule   *pf_get_translation_aux(struct pf_pdesc *,
239     pbuf_t *, int, int, struct pfi_kif *,
240     struct pf_src_node **, struct pf_addr *,
241     union pf_state_xport *, struct pf_addr *,
242     union pf_state_xport *, union pf_state_xport *
243 #if SKYWALK
244     , netns_token *
245 #endif
246     );
247 static void              pf_attach_state(struct pf_state_key *,
248     struct pf_state *, int);
249 static u_int32_t         pf_tcp_iss(struct pf_pdesc *);
250 static int               pf_test_rule(struct pf_rule **, struct pf_state **,
251     int, struct pfi_kif *, pbuf_t *, int,
252     void *, struct pf_pdesc *, struct pf_rule **,
253     struct pf_ruleset **, struct ifqueue *);
254 #if DUMMYNET
255 static int               pf_test_dummynet(struct pf_rule **, int,
256     struct pfi_kif *, pbuf_t **,
257     struct pf_pdesc *, struct ip_fw_args *);
258 #endif /* DUMMYNET */
259 static int               pf_test_fragment(struct pf_rule **, int,
260     struct pfi_kif *, pbuf_t *, void *,
261     struct pf_pdesc *, struct pf_rule **,
262     struct pf_ruleset **);
263 static int               pf_test_state_tcp(struct pf_state **, int,
264     struct pfi_kif *, pbuf_t *, int,
265     void *, struct pf_pdesc *, u_short *);
266 static int               pf_test_state_udp(struct pf_state **, int,
267     struct pfi_kif *, pbuf_t *, int,
268     void *, struct pf_pdesc *, u_short *);
269 static int               pf_test_state_icmp(struct pf_state **, int,
270     struct pfi_kif *, pbuf_t *, int,
271     void *, struct pf_pdesc *, u_short *);
272 static int               pf_test_state_other(struct pf_state **, int,
273     struct pfi_kif *, struct pf_pdesc *);
274 static int               pf_match_tag(struct pf_rule *,
275     struct pf_mtag *, int *);
276 static void              pf_hash(struct pf_addr *, struct pf_addr *,
277     struct pf_poolhashkey *, sa_family_t);
278 static int               pf_map_addr(u_int8_t, struct pf_rule *,
279     struct pf_addr *, struct pf_addr *,
280     struct pf_addr *, struct pf_src_node **);
281 static int               pf_get_sport(struct pf_pdesc *, struct pfi_kif *,
282     struct pf_rule *, struct pf_addr *,
283     union pf_state_xport *, struct pf_addr *,
284     union pf_state_xport *, struct pf_addr *,
285     union pf_state_xport *, struct pf_src_node **
286 #if SKYWALK
287     , netns_token *
288 #endif
289     );
290 static void              pf_route(pbuf_t **, struct pf_rule *, int,
291     struct ifnet *, struct pf_state *,
292     struct pf_pdesc *);
293 static void              pf_route6(pbuf_t **, struct pf_rule *, int,
294     struct ifnet *, struct pf_state *,
295     struct pf_pdesc *);
296 static u_int8_t          pf_get_wscale(pbuf_t *, int, u_int16_t,
297     sa_family_t);
298 static u_int16_t         pf_get_mss(pbuf_t *, int, u_int16_t,
299     sa_family_t);
300 static u_int16_t         pf_calc_mss(struct pf_addr *, sa_family_t,
301     u_int16_t);
302 static void              pf_set_rt_ifp(struct pf_state *,
303     struct pf_addr *, sa_family_t af);
304 static int               pf_check_proto_cksum(pbuf_t *, int, int,
305     u_int8_t, sa_family_t);
306 static int               pf_addr_wrap_neq(struct pf_addr_wrap *,
307     struct pf_addr_wrap *);
308 static struct pf_state  *pf_find_state(struct pfi_kif *,
309     struct pf_state_key_cmp *, u_int);
310 static int               pf_src_connlimit(struct pf_state **);
311 static void              pf_stateins_err(const char *, struct pf_state *,
312     struct pfi_kif *);
313 static int               pf_check_congestion(struct ifqueue *);
314 
315 #if 0
316 static const char *pf_pptp_ctrl_type_name(u_int16_t code);
317 #endif
318 static void             pf_pptp_handler(struct pf_state *, int, int,
319     struct pf_pdesc *, struct pfi_kif *);
320 static void             pf_pptp_unlink(struct pf_state *);
321 static void             pf_grev1_unlink(struct pf_state *);
322 static int              pf_test_state_grev1(struct pf_state **, int,
323     struct pfi_kif *, int, struct pf_pdesc *);
324 static int              pf_ike_compare(struct pf_app_state *,
325     struct pf_app_state *);
326 static int              pf_test_state_esp(struct pf_state **, int,
327     struct pfi_kif *, int, struct pf_pdesc *);
328 static int pf_test6(int, struct ifnet *, pbuf_t **, struct ether_header *,
329     struct ip_fw_args *);
330 #if INET
331 static int pf_test(int, struct ifnet *, pbuf_t **,
332     struct ether_header *, struct ip_fw_args *);
333 #endif /* INET */
334 
335 
336 extern struct pool pfr_ktable_pl;
337 extern struct pool pfr_kentry_pl;
338 extern int path_mtu_discovery;
339 
340 struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = {
341 	{ .pp = &pf_state_pl, .limit = PFSTATE_HIWAT },
342 	{ .pp = &pf_app_state_pl, .limit = PFAPPSTATE_HIWAT },
343 	{ .pp = &pf_src_tree_pl, .limit = PFSNODE_HIWAT },
344 	{ .pp = &pf_frent_pl, .limit = PFFRAG_FRENT_HIWAT },
345 	{ .pp = &pfr_ktable_pl, .limit = PFR_KTABLE_HIWAT },
346 	{ .pp = &pfr_kentry_pl, .limit = PFR_KENTRY_HIWAT },
347 };
348 
349 #if SKYWALK && defined(XNU_TARGET_OS_OSX)
350 const char *compatible_anchors[] = {
351 	"com.apple.internet-sharing",
352 	"com.apple/250.ApplicationFirewall",
353 	"com.apple/200.AirDrop"
354 };
355 #endif // SKYWALK && defined(XNU_TARGET_OS_OSX)
356 
357 void *
pf_lazy_makewritable(struct pf_pdesc * pd,pbuf_t * pbuf,int len)358 pf_lazy_makewritable(struct pf_pdesc *pd, pbuf_t *pbuf, int len)
359 {
360 	void *p;
361 
362 	if (pd->lmw < 0) {
363 		return NULL;
364 	}
365 
366 	VERIFY(pbuf == pd->mp);
367 
368 	p = pbuf->pb_data;
369 	if (len > pd->lmw) {
370 		if ((p = pbuf_ensure_writable(pbuf, len)) == NULL) {
371 			len = -1;
372 		}
373 		pd->lmw = len;
374 		if (len >= 0) {
375 			pd->pf_mtag = pf_find_mtag_pbuf(pbuf);
376 
377 			switch (pd->af) {
378 			case AF_INET: {
379 				struct ip *h = p;
380 				pd->src = (struct pf_addr *)(uintptr_t)&h->ip_src;
381 				pd->dst = (struct pf_addr *)(uintptr_t)&h->ip_dst;
382 				pd->ip_sum = &h->ip_sum;
383 				break;
384 			}
385 			case AF_INET6: {
386 				struct ip6_hdr *h = p;
387 				pd->src = (struct pf_addr *)(uintptr_t)&h->ip6_src;
388 				pd->dst = (struct pf_addr *)(uintptr_t)&h->ip6_dst;
389 				break;
390 			}
391 			}
392 		}
393 	}
394 
395 	return len < 0 ? NULL : p;
396 }
397 
398 static const int *
pf_state_lookup_aux(struct pf_state ** state,struct pfi_kif * kif,int direction,int * action)399 pf_state_lookup_aux(struct pf_state **state, struct pfi_kif *kif,
400     int direction, int *action)
401 {
402 	if (*state == NULL || (*state)->timeout == PFTM_PURGE) {
403 		*action = PF_DROP;
404 		return action;
405 	}
406 
407 	if (direction == PF_OUT &&
408 	    (((*state)->rule.ptr->rt == PF_ROUTETO &&
409 	    (*state)->rule.ptr->direction == PF_OUT) ||
410 	    ((*state)->rule.ptr->rt == PF_REPLYTO &&
411 	    (*state)->rule.ptr->direction == PF_IN)) &&
412 	    (*state)->rt_kif != NULL && (*state)->rt_kif != kif) {
413 		*action = PF_PASS;
414 		return action;
415 	}
416 
417 	return 0;
418 }
419 
420 #define STATE_LOOKUP()                                                   \
421 	do {                                                             \
422 	        int action;                                              \
423 	        *state = pf_find_state(kif, &key, direction);            \
424 	        if (*state != NULL && pd != NULL &&                      \
425 	            !(pd->pktflags & PKTF_FLOW_ID)) {                    \
426 	                pd->flowsrc = (*state)->state_key->flowsrc;      \
427 	                pd->flowhash = (*state)->state_key->flowhash;    \
428 	                if (pd->flowhash != 0) {                         \
429 	                        pd->pktflags |= PKTF_FLOW_ID;            \
430 	                        pd->pktflags &= ~PKTF_FLOW_ADV;          \
431 	                }                                                \
432 	        }                                                        \
433 	        if (pf_state_lookup_aux(state, kif, direction, &action)) \
434 	                return (action);                                 \
435 	} while (0)
436 
437 /*
438  * This macro resets the flowID information in a packet descriptor which was
439  * copied in from a PF state. This should be used after a protocol state lookup
440  * finds a matching PF state, but then decides to not use it for various
441  * reasons.
442  */
443 #define PD_CLEAR_STATE_FLOWID(_pd)                                       \
444 	do {                                                             \
445 	        if (__improbable(((_pd)->pktflags & PKTF_FLOW_ID) &&     \
446 	            ((_pd)->flowsrc == FLOWSRC_PF))) {                   \
447 	                (_pd)->flowhash = 0;                             \
448 	                (_pd)->flowsrc = 0;                              \
449 	                (_pd)->pktflags &= ~PKTF_FLOW_ID;                \
450 	        }                                                        \
451                                                                          \
452 	} while (0)
453 
454 #define STATE_ADDR_TRANSLATE(sk)                                        \
455 	(sk)->lan.addr.addr32[0] != (sk)->gwy.addr.addr32[0] ||         \
456 	((sk)->af_lan == AF_INET6 &&                                    \
457 	((sk)->lan.addr.addr32[1] != (sk)->gwy.addr.addr32[1] ||        \
458 	(sk)->lan.addr.addr32[2] != (sk)->gwy.addr.addr32[2] ||         \
459 	(sk)->lan.addr.addr32[3] != (sk)->gwy.addr.addr32[3]))
460 
461 #define STATE_TRANSLATE(sk)                                             \
462 	((sk)->af_lan != (sk)->af_gwy ||                                \
463 	STATE_ADDR_TRANSLATE(sk) ||                                     \
464 	(sk)->lan.xport.port != (sk)->gwy.xport.port)
465 
466 #define STATE_GRE_TRANSLATE(sk)                                         \
467 	(STATE_ADDR_TRANSLATE(sk) ||                                    \
468 	(sk)->lan.xport.call_id != (sk)->gwy.xport.call_id)
469 
470 #define BOUND_IFACE(r, k) \
471 	((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all
472 
473 #define STATE_INC_COUNTERS(s)                                   \
474 	do {                                                    \
475 	        s->rule.ptr->states++;                          \
476 	        VERIFY(s->rule.ptr->states != 0);               \
477 	        if (s->anchor.ptr != NULL) {                    \
478 	                s->anchor.ptr->states++;                \
479 	                VERIFY(s->anchor.ptr->states != 0);     \
480 	        }                                               \
481 	        if (s->nat_rule.ptr != NULL) {                  \
482 	                s->nat_rule.ptr->states++;              \
483 	                VERIFY(s->nat_rule.ptr->states != 0);   \
484 	        }                                               \
485 	} while (0)
486 
487 #define STATE_DEC_COUNTERS(s)                                   \
488 	do {                                                    \
489 	        if (s->nat_rule.ptr != NULL) {                  \
490 	                VERIFY(s->nat_rule.ptr->states > 0);    \
491 	                s->nat_rule.ptr->states--;              \
492 	        }                                               \
493 	        if (s->anchor.ptr != NULL) {                    \
494 	                VERIFY(s->anchor.ptr->states > 0);      \
495 	                s->anchor.ptr->states--;                \
496 	        }                                               \
497 	        VERIFY(s->rule.ptr->states > 0);                \
498 	        s->rule.ptr->states--;                          \
499 	} while (0)
500 
501 static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *);
502 static __inline int pf_state_compare_lan_ext(struct pf_state_key *,
503     struct pf_state_key *);
504 static __inline int pf_state_compare_ext_gwy(struct pf_state_key *,
505     struct pf_state_key *);
506 static __inline int pf_state_compare_id(struct pf_state *,
507     struct pf_state *);
508 
509 struct pf_src_tree tree_src_tracking;
510 
511 struct pf_state_tree_id tree_id;
512 struct pf_state_queue state_list;
513 
514 RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare);
515 RB_GENERATE(pf_state_tree_lan_ext, pf_state_key,
516     entry_lan_ext, pf_state_compare_lan_ext);
517 RB_GENERATE(pf_state_tree_ext_gwy, pf_state_key,
518     entry_ext_gwy, pf_state_compare_ext_gwy);
519 RB_GENERATE(pf_state_tree_id, pf_state,
520     entry_id, pf_state_compare_id);
521 
522 #define PF_DT_SKIP_LANEXT       0x01
523 #define PF_DT_SKIP_EXTGWY       0x02
524 
525 static const u_int16_t PF_PPTP_PORT = 1723;
526 static const u_int32_t PF_PPTP_MAGIC_NUMBER = 0x1A2B3C4D;
527 
528 struct pf_pptp_hdr {
529 	u_int16_t       length;
530 	u_int16_t       type;
531 	u_int32_t       magic;
532 };
533 
534 struct pf_pptp_ctrl_hdr {
535 	u_int16_t       type;
536 	u_int16_t       reserved_0;
537 };
538 
539 struct pf_pptp_ctrl_generic {
540 	u_int16_t       data[0];
541 };
542 
543 #define PF_PPTP_CTRL_TYPE_START_REQ     1
544 struct pf_pptp_ctrl_start_req {
545 	u_int16_t       protocol_version;
546 	u_int16_t       reserved_1;
547 	u_int32_t       framing_capabilities;
548 	u_int32_t       bearer_capabilities;
549 	u_int16_t       maximum_channels;
550 	u_int16_t       firmware_revision;
551 	u_int8_t        host_name[64];
552 	u_int8_t        vendor_string[64];
553 };
554 
555 #define PF_PPTP_CTRL_TYPE_START_RPY     2
556 struct pf_pptp_ctrl_start_rpy {
557 	u_int16_t       protocol_version;
558 	u_int8_t        result_code;
559 	u_int8_t        error_code;
560 	u_int32_t       framing_capabilities;
561 	u_int32_t       bearer_capabilities;
562 	u_int16_t       maximum_channels;
563 	u_int16_t       firmware_revision;
564 	u_int8_t        host_name[64];
565 	u_int8_t        vendor_string[64];
566 };
567 
568 #define PF_PPTP_CTRL_TYPE_STOP_REQ      3
569 struct pf_pptp_ctrl_stop_req {
570 	u_int8_t        reason;
571 	u_int8_t        reserved_1;
572 	u_int16_t       reserved_2;
573 };
574 
575 #define PF_PPTP_CTRL_TYPE_STOP_RPY      4
576 struct pf_pptp_ctrl_stop_rpy {
577 	u_int8_t        reason;
578 	u_int8_t        error_code;
579 	u_int16_t       reserved_1;
580 };
581 
582 #define PF_PPTP_CTRL_TYPE_ECHO_REQ      5
583 struct pf_pptp_ctrl_echo_req {
584 	u_int32_t       identifier;
585 };
586 
587 #define PF_PPTP_CTRL_TYPE_ECHO_RPY      6
588 struct pf_pptp_ctrl_echo_rpy {
589 	u_int32_t       identifier;
590 	u_int8_t        result_code;
591 	u_int8_t        error_code;
592 	u_int16_t       reserved_1;
593 };
594 
595 #define PF_PPTP_CTRL_TYPE_CALL_OUT_REQ  7
596 struct pf_pptp_ctrl_call_out_req {
597 	u_int16_t       call_id;
598 	u_int16_t       call_sernum;
599 	u_int32_t       min_bps;
600 	u_int32_t       bearer_type;
601 	u_int32_t       framing_type;
602 	u_int16_t       rxwindow_size;
603 	u_int16_t       proc_delay;
604 	u_int8_t        phone_num[64];
605 	u_int8_t        sub_addr[64];
606 };
607 
608 #define PF_PPTP_CTRL_TYPE_CALL_OUT_RPY  8
609 struct pf_pptp_ctrl_call_out_rpy {
610 	u_int16_t       call_id;
611 	u_int16_t       peer_call_id;
612 	u_int8_t        result_code;
613 	u_int8_t        error_code;
614 	u_int16_t       cause_code;
615 	u_int32_t       connect_speed;
616 	u_int16_t       rxwindow_size;
617 	u_int16_t       proc_delay;
618 	u_int32_t       phy_channel_id;
619 };
620 
621 #define PF_PPTP_CTRL_TYPE_CALL_IN_1ST   9
622 struct pf_pptp_ctrl_call_in_1st {
623 	u_int16_t       call_id;
624 	u_int16_t       call_sernum;
625 	u_int32_t       bearer_type;
626 	u_int32_t       phy_channel_id;
627 	u_int16_t       dialed_number_len;
628 	u_int16_t       dialing_number_len;
629 	u_int8_t        dialed_num[64];
630 	u_int8_t        dialing_num[64];
631 	u_int8_t        sub_addr[64];
632 };
633 
634 #define PF_PPTP_CTRL_TYPE_CALL_IN_2ND   10
635 struct pf_pptp_ctrl_call_in_2nd {
636 	u_int16_t       call_id;
637 	u_int16_t       peer_call_id;
638 	u_int8_t        result_code;
639 	u_int8_t        error_code;
640 	u_int16_t       rxwindow_size;
641 	u_int16_t       txdelay;
642 	u_int16_t       reserved_1;
643 };
644 
645 #define PF_PPTP_CTRL_TYPE_CALL_IN_3RD   11
646 struct pf_pptp_ctrl_call_in_3rd {
647 	u_int16_t       call_id;
648 	u_int16_t       reserved_1;
649 	u_int32_t       connect_speed;
650 	u_int16_t       rxwindow_size;
651 	u_int16_t       txdelay;
652 	u_int32_t       framing_type;
653 };
654 
655 #define PF_PPTP_CTRL_TYPE_CALL_CLR      12
656 struct pf_pptp_ctrl_call_clr {
657 	u_int16_t       call_id;
658 	u_int16_t       reserved_1;
659 };
660 
661 #define PF_PPTP_CTRL_TYPE_CALL_DISC     13
662 struct pf_pptp_ctrl_call_disc {
663 	u_int16_t       call_id;
664 	u_int8_t        result_code;
665 	u_int8_t        error_code;
666 	u_int16_t       cause_code;
667 	u_int16_t       reserved_1;
668 	u_int8_t        statistics[128];
669 };
670 
671 #define PF_PPTP_CTRL_TYPE_ERROR 14
672 struct pf_pptp_ctrl_error {
673 	u_int16_t       peer_call_id;
674 	u_int16_t       reserved_1;
675 	u_int32_t       crc_errors;
676 	u_int32_t       fr_errors;
677 	u_int32_t       hw_errors;
678 	u_int32_t       buf_errors;
679 	u_int32_t       tim_errors;
680 	u_int32_t       align_errors;
681 };
682 
683 #define PF_PPTP_CTRL_TYPE_SET_LINKINFO  15
684 struct pf_pptp_ctrl_set_linkinfo {
685 	u_int16_t       peer_call_id;
686 	u_int16_t       reserved_1;
687 	u_int32_t       tx_accm;
688 	u_int32_t       rx_accm;
689 };
690 
691 static const size_t PF_PPTP_CTRL_MSG_MINSIZE =
692     sizeof(struct pf_pptp_hdr) + sizeof(struct pf_pptp_ctrl_hdr);
693 
694 union pf_pptp_ctrl_msg_union {
695 	struct pf_pptp_ctrl_start_req           start_req;
696 	struct pf_pptp_ctrl_start_rpy           start_rpy;
697 	struct pf_pptp_ctrl_stop_req            stop_req;
698 	struct pf_pptp_ctrl_stop_rpy            stop_rpy;
699 	struct pf_pptp_ctrl_echo_req            echo_req;
700 	struct pf_pptp_ctrl_echo_rpy            echo_rpy;
701 	struct pf_pptp_ctrl_call_out_req        call_out_req;
702 	struct pf_pptp_ctrl_call_out_rpy        call_out_rpy;
703 	struct pf_pptp_ctrl_call_in_1st         call_in_1st;
704 	struct pf_pptp_ctrl_call_in_2nd         call_in_2nd;
705 	struct pf_pptp_ctrl_call_in_3rd         call_in_3rd;
706 	struct pf_pptp_ctrl_call_clr            call_clr;
707 	struct pf_pptp_ctrl_call_disc           call_disc;
708 	struct pf_pptp_ctrl_error                       error;
709 	struct pf_pptp_ctrl_set_linkinfo        set_linkinfo;
710 	u_int8_t                                                        data[0];
711 };
712 
713 struct pf_pptp_ctrl_msg {
714 	struct pf_pptp_hdr                              hdr;
715 	struct pf_pptp_ctrl_hdr                 ctrl;
716 	union pf_pptp_ctrl_msg_union    msg;
717 };
718 
719 #define PF_GRE_FLAG_CHECKSUM_PRESENT    0x8000
720 #define PF_GRE_FLAG_VERSION_MASK                0x0007
721 #define PF_GRE_PPP_ETHERTYPE                    0x880B
722 
723 struct pf_grev1_hdr {
724 	u_int16_t flags;
725 	u_int16_t protocol_type;
726 	u_int16_t payload_length;
727 	u_int16_t call_id;
728 	/*
729 	 *  u_int32_t seqno;
730 	 *  u_int32_t ackno;
731 	 */
732 };
733 
734 static const u_int16_t PF_IKE_PORT = 500;
735 
736 struct pf_ike_hdr {
737 	u_int64_t initiator_cookie, responder_cookie;
738 	u_int8_t next_payload, version, exchange_type, flags;
739 	u_int32_t message_id, length;
740 };
741 
742 #define PF_IKE_PACKET_MINSIZE   (sizeof (struct pf_ike_hdr))
743 
744 #define PF_IKEv1_EXCHTYPE_BASE                           1
745 #define PF_IKEv1_EXCHTYPE_ID_PROTECT             2
746 #define PF_IKEv1_EXCHTYPE_AUTH_ONLY                      3
747 #define PF_IKEv1_EXCHTYPE_AGGRESSIVE             4
748 #define PF_IKEv1_EXCHTYPE_INFORMATIONAL          5
749 #define PF_IKEv2_EXCHTYPE_SA_INIT                       34
750 #define PF_IKEv2_EXCHTYPE_AUTH                          35
751 #define PF_IKEv2_EXCHTYPE_CREATE_CHILD_SA       36
752 #define PF_IKEv2_EXCHTYPE_INFORMATIONAL         37
753 
754 #define PF_IKEv1_FLAG_E         0x01
755 #define PF_IKEv1_FLAG_C         0x02
756 #define PF_IKEv1_FLAG_A         0x04
757 #define PF_IKEv2_FLAG_I         0x08
758 #define PF_IKEv2_FLAG_V         0x10
759 #define PF_IKEv2_FLAG_R         0x20
760 
761 struct pf_esp_hdr {
762 	u_int32_t spi;
763 	u_int32_t seqno;
764 	u_int8_t payload[];
765 };
766 
767 static __inline int
pf_addr_compare(struct pf_addr * a,struct pf_addr * b,sa_family_t af)768 pf_addr_compare(struct pf_addr *a, struct pf_addr *b, sa_family_t af)
769 {
770 	switch (af) {
771 #ifdef INET
772 	case AF_INET:
773 		if (a->addr32[0] > b->addr32[0]) {
774 			return 1;
775 		}
776 		if (a->addr32[0] < b->addr32[0]) {
777 			return -1;
778 		}
779 		break;
780 #endif /* INET */
781 	case AF_INET6:
782 		if (a->addr32[3] > b->addr32[3]) {
783 			return 1;
784 		}
785 		if (a->addr32[3] < b->addr32[3]) {
786 			return -1;
787 		}
788 		if (a->addr32[2] > b->addr32[2]) {
789 			return 1;
790 		}
791 		if (a->addr32[2] < b->addr32[2]) {
792 			return -1;
793 		}
794 		if (a->addr32[1] > b->addr32[1]) {
795 			return 1;
796 		}
797 		if (a->addr32[1] < b->addr32[1]) {
798 			return -1;
799 		}
800 		if (a->addr32[0] > b->addr32[0]) {
801 			return 1;
802 		}
803 		if (a->addr32[0] < b->addr32[0]) {
804 			return -1;
805 		}
806 		break;
807 	}
808 	return 0;
809 }
810 
811 static __inline int
pf_src_compare(struct pf_src_node * a,struct pf_src_node * b)812 pf_src_compare(struct pf_src_node *a, struct pf_src_node *b)
813 {
814 	int     diff;
815 
816 	if (a->rule.ptr > b->rule.ptr) {
817 		return 1;
818 	}
819 	if (a->rule.ptr < b->rule.ptr) {
820 		return -1;
821 	}
822 	if ((diff = a->af - b->af) != 0) {
823 		return diff;
824 	}
825 	if ((diff = pf_addr_compare(&a->addr, &b->addr, a->af)) != 0) {
826 		return diff;
827 	}
828 	return 0;
829 }
830 
831 static __inline int
pf_state_compare_lan_ext(struct pf_state_key * a,struct pf_state_key * b)832 pf_state_compare_lan_ext(struct pf_state_key *a, struct pf_state_key *b)
833 {
834 	int     diff;
835 	int     extfilter;
836 
837 	if ((diff = a->proto - b->proto) != 0) {
838 		return diff;
839 	}
840 	if ((diff = a->af_lan - b->af_lan) != 0) {
841 		return diff;
842 	}
843 
844 	extfilter = PF_EXTFILTER_APD;
845 
846 	switch (a->proto) {
847 	case IPPROTO_ICMP:
848 	case IPPROTO_ICMPV6:
849 		if ((diff = a->lan.xport.port - b->lan.xport.port) != 0) {
850 			return diff;
851 		}
852 		break;
853 
854 	case IPPROTO_TCP:
855 		if ((diff = a->lan.xport.port - b->lan.xport.port) != 0) {
856 			return diff;
857 		}
858 		if ((diff = a->ext_lan.xport.port - b->ext_lan.xport.port) != 0) {
859 			return diff;
860 		}
861 		break;
862 
863 	case IPPROTO_UDP:
864 		if ((diff = a->proto_variant - b->proto_variant)) {
865 			return diff;
866 		}
867 		extfilter = a->proto_variant;
868 		if ((diff = a->lan.xport.port - b->lan.xport.port) != 0) {
869 			return diff;
870 		}
871 		if ((extfilter < PF_EXTFILTER_AD) &&
872 		    (diff = a->ext_lan.xport.port - b->ext_lan.xport.port) != 0) {
873 			return diff;
874 		}
875 		break;
876 
877 	case IPPROTO_GRE:
878 		if (a->proto_variant == PF_GRE_PPTP_VARIANT &&
879 		    a->proto_variant == b->proto_variant) {
880 			if (!!(diff = a->ext_lan.xport.call_id -
881 			    b->ext_lan.xport.call_id)) {
882 				return diff;
883 			}
884 		}
885 		break;
886 
887 	case IPPROTO_ESP:
888 		if (!!(diff = a->ext_lan.xport.spi - b->ext_lan.xport.spi)) {
889 			return diff;
890 		}
891 		break;
892 
893 	default:
894 		break;
895 	}
896 
897 	switch (a->af_lan) {
898 #if INET
899 	case AF_INET:
900 		if ((diff = pf_addr_compare(&a->lan.addr, &b->lan.addr,
901 		    a->af_lan)) != 0) {
902 			return diff;
903 		}
904 
905 		if (extfilter < PF_EXTFILTER_EI) {
906 			if ((diff = pf_addr_compare(&a->ext_lan.addr,
907 			    &b->ext_lan.addr,
908 			    a->af_lan)) != 0) {
909 				return diff;
910 			}
911 		}
912 		break;
913 #endif /* INET */
914 	case AF_INET6:
915 		if ((diff = pf_addr_compare(&a->lan.addr, &b->lan.addr,
916 		    a->af_lan)) != 0) {
917 			return diff;
918 		}
919 
920 		if (extfilter < PF_EXTFILTER_EI ||
921 		    !PF_AZERO(&b->ext_lan.addr, AF_INET6)) {
922 			if ((diff = pf_addr_compare(&a->ext_lan.addr,
923 			    &b->ext_lan.addr,
924 			    a->af_lan)) != 0) {
925 				return diff;
926 			}
927 		}
928 		break;
929 	}
930 
931 	if (a->app_state && b->app_state) {
932 		if (a->app_state->compare_lan_ext &&
933 		    b->app_state->compare_lan_ext) {
934 			diff = (const char *)b->app_state->compare_lan_ext -
935 			    (const char *)a->app_state->compare_lan_ext;
936 			if (diff != 0) {
937 				return diff;
938 			}
939 			diff = a->app_state->compare_lan_ext(a->app_state,
940 			    b->app_state);
941 			if (diff != 0) {
942 				return diff;
943 			}
944 		}
945 	}
946 
947 	return 0;
948 }
949 
950 static __inline int
pf_state_compare_ext_gwy(struct pf_state_key * a,struct pf_state_key * b)951 pf_state_compare_ext_gwy(struct pf_state_key *a, struct pf_state_key *b)
952 {
953 	int     diff;
954 	int     extfilter;
955 
956 	if ((diff = a->proto - b->proto) != 0) {
957 		return diff;
958 	}
959 
960 	if ((diff = a->af_gwy - b->af_gwy) != 0) {
961 		return diff;
962 	}
963 
964 	extfilter = PF_EXTFILTER_APD;
965 
966 	switch (a->proto) {
967 	case IPPROTO_ICMP:
968 	case IPPROTO_ICMPV6:
969 		if ((diff = a->gwy.xport.port - b->gwy.xport.port) != 0) {
970 			return diff;
971 		}
972 		break;
973 
974 	case IPPROTO_TCP:
975 		if ((diff = a->ext_gwy.xport.port - b->ext_gwy.xport.port) != 0) {
976 			return diff;
977 		}
978 		if ((diff = a->gwy.xport.port - b->gwy.xport.port) != 0) {
979 			return diff;
980 		}
981 		break;
982 
983 	case IPPROTO_UDP:
984 		if ((diff = a->proto_variant - b->proto_variant)) {
985 			return diff;
986 		}
987 		extfilter = a->proto_variant;
988 		if ((diff = a->gwy.xport.port - b->gwy.xport.port) != 0) {
989 			return diff;
990 		}
991 		if ((extfilter < PF_EXTFILTER_AD) &&
992 		    (diff = a->ext_gwy.xport.port - b->ext_gwy.xport.port) != 0) {
993 			return diff;
994 		}
995 		break;
996 
997 	case IPPROTO_GRE:
998 		if (a->proto_variant == PF_GRE_PPTP_VARIANT &&
999 		    a->proto_variant == b->proto_variant) {
1000 			if (!!(diff = a->gwy.xport.call_id -
1001 			    b->gwy.xport.call_id)) {
1002 				return diff;
1003 			}
1004 		}
1005 		break;
1006 
1007 	case IPPROTO_ESP:
1008 		if (!!(diff = a->gwy.xport.spi - b->gwy.xport.spi)) {
1009 			return diff;
1010 		}
1011 		break;
1012 
1013 	default:
1014 		break;
1015 	}
1016 
1017 	switch (a->af_gwy) {
1018 #if INET
1019 	case AF_INET:
1020 		if ((diff = pf_addr_compare(&a->gwy.addr, &b->gwy.addr,
1021 		    a->af_gwy)) != 0) {
1022 			return diff;
1023 		}
1024 
1025 		if (extfilter < PF_EXTFILTER_EI) {
1026 			if ((diff = pf_addr_compare(&a->ext_gwy.addr, &b->ext_gwy.addr,
1027 			    a->af_gwy)) != 0) {
1028 				return diff;
1029 			}
1030 		}
1031 		break;
1032 #endif /* INET */
1033 	case AF_INET6:
1034 		if ((diff = pf_addr_compare(&a->gwy.addr, &b->gwy.addr,
1035 		    a->af_gwy)) != 0) {
1036 			return diff;
1037 		}
1038 
1039 		if (extfilter < PF_EXTFILTER_EI ||
1040 		    !PF_AZERO(&b->ext_gwy.addr, AF_INET6)) {
1041 			if ((diff = pf_addr_compare(&a->ext_gwy.addr, &b->ext_gwy.addr,
1042 			    a->af_gwy)) != 0) {
1043 				return diff;
1044 			}
1045 		}
1046 		break;
1047 	}
1048 
1049 	if (a->app_state && b->app_state) {
1050 		if (a->app_state->compare_ext_gwy &&
1051 		    b->app_state->compare_ext_gwy) {
1052 			diff = (const char *)b->app_state->compare_ext_gwy -
1053 			    (const char *)a->app_state->compare_ext_gwy;
1054 			if (diff != 0) {
1055 				return diff;
1056 			}
1057 			diff = a->app_state->compare_ext_gwy(a->app_state,
1058 			    b->app_state);
1059 			if (diff != 0) {
1060 				return diff;
1061 			}
1062 		}
1063 	}
1064 
1065 	return 0;
1066 }
1067 
1068 static __inline int
pf_state_compare_id(struct pf_state * a,struct pf_state * b)1069 pf_state_compare_id(struct pf_state *a, struct pf_state *b)
1070 {
1071 	if (a->id > b->id) {
1072 		return 1;
1073 	}
1074 	if (a->id < b->id) {
1075 		return -1;
1076 	}
1077 	if (a->creatorid > b->creatorid) {
1078 		return 1;
1079 	}
1080 	if (a->creatorid < b->creatorid) {
1081 		return -1;
1082 	}
1083 
1084 	return 0;
1085 }
1086 
1087 void
pf_addrcpy(struct pf_addr * dst,struct pf_addr * src,sa_family_t af)1088 pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af)
1089 {
1090 	switch (af) {
1091 #if INET
1092 	case AF_INET:
1093 		dst->addr32[0] = src->addr32[0];
1094 		break;
1095 #endif /* INET */
1096 	case AF_INET6:
1097 		dst->addr32[0] = src->addr32[0];
1098 		dst->addr32[1] = src->addr32[1];
1099 		dst->addr32[2] = src->addr32[2];
1100 		dst->addr32[3] = src->addr32[3];
1101 		break;
1102 	}
1103 }
1104 
1105 struct pf_state *
pf_find_state_byid(struct pf_state_cmp * key)1106 pf_find_state_byid(struct pf_state_cmp *key)
1107 {
1108 	pf_status.fcounters[FCNT_STATE_SEARCH]++;
1109 
1110 	return RB_FIND(pf_state_tree_id, &tree_id,
1111 	           (struct pf_state *)(void *)key);
1112 }
1113 
1114 static struct pf_state *
pf_find_state(struct pfi_kif * kif,struct pf_state_key_cmp * key,u_int dir)1115 pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir)
1116 {
1117 	struct pf_state_key     *sk = NULL;
1118 	struct pf_state         *s;
1119 
1120 	pf_status.fcounters[FCNT_STATE_SEARCH]++;
1121 
1122 	switch (dir) {
1123 	case PF_OUT:
1124 		sk = RB_FIND(pf_state_tree_lan_ext, &pf_statetbl_lan_ext,
1125 		    (struct pf_state_key *)key);
1126 		break;
1127 	case PF_IN:
1128 		sk = RB_FIND(pf_state_tree_ext_gwy, &pf_statetbl_ext_gwy,
1129 		    (struct pf_state_key *)key);
1130 		/*
1131 		 * NAT64 is done only on input, for packets coming in from
1132 		 * from the LAN side, need to lookup the lan_ext tree.
1133 		 */
1134 		if (sk == NULL) {
1135 			sk = RB_FIND(pf_state_tree_lan_ext,
1136 			    &pf_statetbl_lan_ext,
1137 			    (struct pf_state_key *)key);
1138 			if (sk && sk->af_lan == sk->af_gwy) {
1139 				sk = NULL;
1140 			}
1141 		}
1142 		break;
1143 	default:
1144 		panic("pf_find_state");
1145 	}
1146 
1147 	/* list is sorted, if-bound states before floating ones */
1148 	if (sk != NULL) {
1149 		TAILQ_FOREACH(s, &sk->states, next)
1150 		if (s->kif == pfi_all || s->kif == kif) {
1151 			return s;
1152 		}
1153 	}
1154 
1155 	return NULL;
1156 }
1157 
1158 struct pf_state *
pf_find_state_all(struct pf_state_key_cmp * key,u_int dir,int * more)1159 pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more)
1160 {
1161 	struct pf_state_key     *sk = NULL;
1162 	struct pf_state         *s, *ret = NULL;
1163 
1164 	pf_status.fcounters[FCNT_STATE_SEARCH]++;
1165 
1166 	switch (dir) {
1167 	case PF_OUT:
1168 		sk = RB_FIND(pf_state_tree_lan_ext,
1169 		    &pf_statetbl_lan_ext, (struct pf_state_key *)key);
1170 		break;
1171 	case PF_IN:
1172 		sk = RB_FIND(pf_state_tree_ext_gwy,
1173 		    &pf_statetbl_ext_gwy, (struct pf_state_key *)key);
1174 		/*
1175 		 * NAT64 is done only on input, for packets coming in from
1176 		 * from the LAN side, need to lookup the lan_ext tree.
1177 		 */
1178 		if ((sk == NULL) && pf_nat64_configured) {
1179 			sk = RB_FIND(pf_state_tree_lan_ext,
1180 			    &pf_statetbl_lan_ext,
1181 			    (struct pf_state_key *)key);
1182 			if (sk && sk->af_lan == sk->af_gwy) {
1183 				sk = NULL;
1184 			}
1185 		}
1186 		break;
1187 	default:
1188 		panic("pf_find_state_all");
1189 	}
1190 
1191 	if (sk != NULL) {
1192 		ret = TAILQ_FIRST(&sk->states);
1193 		if (more == NULL) {
1194 			return ret;
1195 		}
1196 
1197 		TAILQ_FOREACH(s, &sk->states, next)
1198 		(*more)++;
1199 	}
1200 
1201 	return ret;
1202 }
1203 
1204 static void
pf_init_threshold(struct pf_threshold * threshold,u_int32_t limit,u_int32_t seconds)1205 pf_init_threshold(struct pf_threshold *threshold,
1206     u_int32_t limit, u_int32_t seconds)
1207 {
1208 	threshold->limit = limit * PF_THRESHOLD_MULT;
1209 	threshold->seconds = seconds;
1210 	threshold->count = 0;
1211 	threshold->last = pf_time_second();
1212 }
1213 
1214 static void
pf_add_threshold(struct pf_threshold * threshold)1215 pf_add_threshold(struct pf_threshold *threshold)
1216 {
1217 	u_int32_t t = pf_time_second(), diff = t - threshold->last;
1218 
1219 	if (diff >= threshold->seconds) {
1220 		threshold->count = 0;
1221 	} else {
1222 		threshold->count -= threshold->count * diff /
1223 		    threshold->seconds;
1224 	}
1225 	threshold->count += PF_THRESHOLD_MULT;
1226 	threshold->last = t;
1227 }
1228 
1229 static int
pf_check_threshold(struct pf_threshold * threshold)1230 pf_check_threshold(struct pf_threshold *threshold)
1231 {
1232 	return threshold->count > threshold->limit;
1233 }
1234 
1235 static int
pf_src_connlimit(struct pf_state ** state)1236 pf_src_connlimit(struct pf_state **state)
1237 {
1238 	int bad = 0;
1239 	(*state)->src_node->conn++;
1240 	VERIFY((*state)->src_node->conn != 0);
1241 	(*state)->src.tcp_est = 1;
1242 	pf_add_threshold(&(*state)->src_node->conn_rate);
1243 
1244 	if ((*state)->rule.ptr->max_src_conn &&
1245 	    (*state)->rule.ptr->max_src_conn <
1246 	    (*state)->src_node->conn) {
1247 		pf_status.lcounters[LCNT_SRCCONN]++;
1248 		bad++;
1249 	}
1250 
1251 	if ((*state)->rule.ptr->max_src_conn_rate.limit &&
1252 	    pf_check_threshold(&(*state)->src_node->conn_rate)) {
1253 		pf_status.lcounters[LCNT_SRCCONNRATE]++;
1254 		bad++;
1255 	}
1256 
1257 	if (!bad) {
1258 		return 0;
1259 	}
1260 
1261 	if ((*state)->rule.ptr->overload_tbl) {
1262 		struct pfr_addr p;
1263 		u_int32_t       killed = 0;
1264 
1265 		pf_status.lcounters[LCNT_OVERLOAD_TABLE]++;
1266 		if (pf_status.debug >= PF_DEBUG_MISC) {
1267 			printf("pf_src_connlimit: blocking address ");
1268 			pf_print_host(&(*state)->src_node->addr, 0,
1269 			    (*state)->state_key->af_lan);
1270 		}
1271 
1272 		bzero(&p, sizeof(p));
1273 		p.pfra_af = (*state)->state_key->af_lan;
1274 		switch ((*state)->state_key->af_lan) {
1275 #if INET
1276 		case AF_INET:
1277 			p.pfra_net = 32;
1278 			p.pfra_ip4addr = (*state)->src_node->addr.v4addr;
1279 			break;
1280 #endif /* INET */
1281 		case AF_INET6:
1282 			p.pfra_net = 128;
1283 			p.pfra_ip6addr = (*state)->src_node->addr.v6addr;
1284 			break;
1285 		}
1286 
1287 		pfr_insert_kentry((*state)->rule.ptr->overload_tbl,
1288 		    &p, pf_calendar_time_second());
1289 
1290 		/* kill existing states if that's required. */
1291 		if ((*state)->rule.ptr->flush) {
1292 			struct pf_state_key *sk;
1293 			struct pf_state *st;
1294 
1295 			pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++;
1296 			RB_FOREACH(st, pf_state_tree_id, &tree_id) {
1297 				sk = st->state_key;
1298 				/*
1299 				 * Kill states from this source.  (Only those
1300 				 * from the same rule if PF_FLUSH_GLOBAL is not
1301 				 * set)
1302 				 */
1303 				if (sk->af_lan ==
1304 				    (*state)->state_key->af_lan &&
1305 				    (((*state)->state_key->direction ==
1306 				    PF_OUT &&
1307 				    PF_AEQ(&(*state)->src_node->addr,
1308 				    &sk->lan.addr, sk->af_lan)) ||
1309 				    ((*state)->state_key->direction == PF_IN &&
1310 				    PF_AEQ(&(*state)->src_node->addr,
1311 				    &sk->ext_lan.addr, sk->af_lan))) &&
1312 				    ((*state)->rule.ptr->flush &
1313 				    PF_FLUSH_GLOBAL ||
1314 				    (*state)->rule.ptr == st->rule.ptr)) {
1315 					st->timeout = PFTM_PURGE;
1316 					st->src.state = st->dst.state =
1317 					    TCPS_CLOSED;
1318 					killed++;
1319 				}
1320 			}
1321 			if (pf_status.debug >= PF_DEBUG_MISC) {
1322 				printf(", %u states killed", killed);
1323 			}
1324 		}
1325 		if (pf_status.debug >= PF_DEBUG_MISC) {
1326 			printf("\n");
1327 		}
1328 	}
1329 
1330 	/* kill this state */
1331 	(*state)->timeout = PFTM_PURGE;
1332 	(*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
1333 	return 1;
1334 }
1335 
1336 int
pf_insert_src_node(struct pf_src_node ** sn,struct pf_rule * rule,struct pf_addr * src,sa_family_t af)1337 pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
1338     struct pf_addr *src, sa_family_t af)
1339 {
1340 	struct pf_src_node      k;
1341 
1342 	if (*sn == NULL) {
1343 		k.af = af;
1344 		PF_ACPY(&k.addr, src, af);
1345 		if (rule->rule_flag & PFRULE_RULESRCTRACK ||
1346 		    rule->rpool.opts & PF_POOL_STICKYADDR) {
1347 			k.rule.ptr = rule;
1348 		} else {
1349 			k.rule.ptr = NULL;
1350 		}
1351 		pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
1352 		*sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
1353 	}
1354 	if (*sn == NULL) {
1355 		if (!rule->max_src_nodes ||
1356 		    rule->src_nodes < rule->max_src_nodes) {
1357 			(*sn) = pool_get(&pf_src_tree_pl, PR_WAITOK);
1358 		} else {
1359 			pf_status.lcounters[LCNT_SRCNODES]++;
1360 		}
1361 		if ((*sn) == NULL) {
1362 			return -1;
1363 		}
1364 		bzero(*sn, sizeof(struct pf_src_node));
1365 
1366 		pf_init_threshold(&(*sn)->conn_rate,
1367 		    rule->max_src_conn_rate.limit,
1368 		    rule->max_src_conn_rate.seconds);
1369 
1370 		(*sn)->af = af;
1371 		if (rule->rule_flag & PFRULE_RULESRCTRACK ||
1372 		    rule->rpool.opts & PF_POOL_STICKYADDR) {
1373 			(*sn)->rule.ptr = rule;
1374 		} else {
1375 			(*sn)->rule.ptr = NULL;
1376 		}
1377 		PF_ACPY(&(*sn)->addr, src, af);
1378 		if (RB_INSERT(pf_src_tree,
1379 		    &tree_src_tracking, *sn) != NULL) {
1380 			if (pf_status.debug >= PF_DEBUG_MISC) {
1381 				printf("pf: src_tree insert failed: ");
1382 				pf_print_host(&(*sn)->addr, 0, af);
1383 				printf("\n");
1384 			}
1385 			pool_put(&pf_src_tree_pl, *sn);
1386 			*sn = NULL; /* signal the caller that no additional cleanup is needed */
1387 			return -1;
1388 		}
1389 		(*sn)->creation = pf_time_second();
1390 		(*sn)->ruletype = rule->action;
1391 		if ((*sn)->rule.ptr != NULL) {
1392 			(*sn)->rule.ptr->src_nodes++;
1393 		}
1394 		pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
1395 		pf_status.src_nodes++;
1396 	} else {
1397 		if (rule->max_src_states &&
1398 		    (*sn)->states >= rule->max_src_states) {
1399 			pf_status.lcounters[LCNT_SRCSTATES]++;
1400 			return -1;
1401 		}
1402 	}
1403 	return 0;
1404 }
1405 
1406 static void
pf_stateins_err(const char * tree,struct pf_state * s,struct pfi_kif * kif)1407 pf_stateins_err(const char *tree, struct pf_state *s, struct pfi_kif *kif)
1408 {
1409 	struct pf_state_key     *sk = s->state_key;
1410 
1411 	if (pf_status.debug >= PF_DEBUG_MISC) {
1412 		printf("pf: state insert failed: %s %s ", tree, kif->pfik_name);
1413 		switch (sk->proto) {
1414 		case IPPROTO_TCP:
1415 			printf("TCP");
1416 			break;
1417 		case IPPROTO_UDP:
1418 			printf("UDP");
1419 			break;
1420 		case IPPROTO_ICMP:
1421 			printf("ICMP4");
1422 			break;
1423 		case IPPROTO_ICMPV6:
1424 			printf("ICMP6");
1425 			break;
1426 		default:
1427 			printf("PROTO=%u", sk->proto);
1428 			break;
1429 		}
1430 		printf(" lan: ");
1431 		pf_print_sk_host(&sk->lan, sk->af_lan, sk->proto,
1432 		    sk->proto_variant);
1433 		printf(" gwy: ");
1434 		pf_print_sk_host(&sk->gwy, sk->af_gwy, sk->proto,
1435 		    sk->proto_variant);
1436 		printf(" ext_lan: ");
1437 		pf_print_sk_host(&sk->ext_lan, sk->af_lan, sk->proto,
1438 		    sk->proto_variant);
1439 		printf(" ext_gwy: ");
1440 		pf_print_sk_host(&sk->ext_gwy, sk->af_gwy, sk->proto,
1441 		    sk->proto_variant);
1442 		if (s->sync_flags & PFSTATE_FROMSYNC) {
1443 			printf(" (from sync)");
1444 		}
1445 		printf("\n");
1446 	}
1447 }
1448 
1449 int
pf_insert_state(struct pfi_kif * kif,struct pf_state * s)1450 pf_insert_state(struct pfi_kif *kif, struct pf_state *s)
1451 {
1452 	struct pf_state_key     *cur;
1453 	struct pf_state         *sp;
1454 
1455 	VERIFY(s->state_key != NULL);
1456 	s->kif = kif;
1457 
1458 	if ((cur = RB_INSERT(pf_state_tree_lan_ext, &pf_statetbl_lan_ext,
1459 	    s->state_key)) != NULL) {
1460 		/* key exists. check for same kif, if none, add to key */
1461 		TAILQ_FOREACH(sp, &cur->states, next)
1462 		if (sp->kif == kif) {           /* collision! */
1463 			pf_stateins_err("tree_lan_ext", s, kif);
1464 			pf_detach_state(s,
1465 			    PF_DT_SKIP_LANEXT | PF_DT_SKIP_EXTGWY);
1466 			return -1;
1467 		}
1468 		pf_detach_state(s, PF_DT_SKIP_LANEXT | PF_DT_SKIP_EXTGWY);
1469 		pf_attach_state(cur, s, kif == pfi_all ? 1 : 0);
1470 	}
1471 
1472 	/* if cur != NULL, we already found a state key and attached to it */
1473 	if (cur == NULL && (cur = RB_INSERT(pf_state_tree_ext_gwy,
1474 	    &pf_statetbl_ext_gwy, s->state_key)) != NULL) {
1475 		/* must not happen. we must have found the sk above! */
1476 		pf_stateins_err("tree_ext_gwy", s, kif);
1477 		pf_detach_state(s, PF_DT_SKIP_EXTGWY);
1478 		return -1;
1479 	}
1480 
1481 	if (s->id == 0 && s->creatorid == 0) {
1482 		s->id = htobe64(pf_status.stateid++);
1483 		s->creatorid = pf_status.hostid;
1484 	}
1485 	if (RB_INSERT(pf_state_tree_id, &tree_id, s) != NULL) {
1486 		if (pf_status.debug >= PF_DEBUG_MISC) {
1487 			printf("pf: state insert failed: "
1488 			    "id: %016llx creatorid: %08x",
1489 			    be64toh(s->id), ntohl(s->creatorid));
1490 			if (s->sync_flags & PFSTATE_FROMSYNC) {
1491 				printf(" (from sync)");
1492 			}
1493 			printf("\n");
1494 		}
1495 		pf_detach_state(s, 0);
1496 		return -1;
1497 	}
1498 	TAILQ_INSERT_TAIL(&state_list, s, entry_list);
1499 	pf_status.fcounters[FCNT_STATE_INSERT]++;
1500 	pf_status.states++;
1501 	VERIFY(pf_status.states != 0);
1502 	pfi_kif_ref(kif, PFI_KIF_REF_STATE);
1503 #if NPFSYNC
1504 	pfsync_insert_state(s);
1505 #endif
1506 	return 0;
1507 }
1508 
1509 static int
pf_purge_thread_cont(int err)1510 pf_purge_thread_cont(int err)
1511 {
1512 #pragma unused(err)
1513 	static u_int32_t nloops = 0;
1514 	int t = 1;      /* 1 second */
1515 
1516 	/*
1517 	 * Update coarse-grained networking timestamp (in sec.); the idea
1518 	 * is to piggy-back on the periodic timeout callout to update
1519 	 * the counter returnable via net_uptime().
1520 	 */
1521 	net_update_uptime();
1522 
1523 	lck_rw_lock_shared(&pf_perim_lock);
1524 	lck_mtx_lock(&pf_lock);
1525 
1526 	/* purge everything if not running */
1527 	if (!pf_status.running) {
1528 		pf_purge_expired_states(pf_status.states);
1529 		pf_purge_expired_fragments();
1530 		pf_purge_expired_src_nodes();
1531 
1532 		/* terminate thread (we don't currently do this) */
1533 		if (pf_purge_thread == NULL) {
1534 			lck_mtx_unlock(&pf_lock);
1535 			lck_rw_done(&pf_perim_lock);
1536 
1537 			thread_deallocate(current_thread());
1538 			thread_terminate(current_thread());
1539 			/* NOTREACHED */
1540 			return 0;
1541 		} else {
1542 			/* if there's nothing left, sleep w/o timeout */
1543 			if (pf_status.states == 0 &&
1544 			    pf_normalize_isempty() &&
1545 			    RB_EMPTY(&tree_src_tracking)) {
1546 				nloops = 0;
1547 				t = 0;
1548 			}
1549 			goto done;
1550 		}
1551 	}
1552 
1553 	/* process a fraction of the state table every second */
1554 	pf_purge_expired_states(1 + (pf_status.states
1555 	    / pf_default_rule.timeout[PFTM_INTERVAL]));
1556 
1557 	/* purge other expired types every PFTM_INTERVAL seconds */
1558 	if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) {
1559 		pf_purge_expired_fragments();
1560 		pf_purge_expired_src_nodes();
1561 		nloops = 0;
1562 	}
1563 done:
1564 	lck_mtx_unlock(&pf_lock);
1565 	lck_rw_done(&pf_perim_lock);
1566 
1567 	(void) tsleep0(pf_purge_thread_fn, PWAIT, "pf_purge_cont",
1568 	    t * hz, pf_purge_thread_cont);
1569 	/* NOTREACHED */
1570 	VERIFY(0);
1571 
1572 	return 0;
1573 }
1574 
1575 void
pf_purge_thread_fn(void * v,wait_result_t w)1576 pf_purge_thread_fn(void *v, wait_result_t w)
1577 {
1578 #pragma unused(v, w)
1579 	(void) tsleep0(pf_purge_thread_fn, PWAIT, "pf_purge", 0,
1580 	    pf_purge_thread_cont);
1581 	/*
1582 	 * tsleep0() shouldn't have returned as PCATCH was not set;
1583 	 * therefore assert in this case.
1584 	 */
1585 	VERIFY(0);
1586 }
1587 
1588 u_int64_t
pf_state_expires(const struct pf_state * state)1589 pf_state_expires(const struct pf_state *state)
1590 {
1591 	u_int32_t       t;
1592 	u_int32_t       start;
1593 	u_int32_t       end;
1594 	u_int32_t       states;
1595 
1596 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1597 
1598 	/* handle all PFTM_* > PFTM_MAX here */
1599 	if (state->timeout == PFTM_PURGE) {
1600 		return pf_time_second();
1601 	}
1602 
1603 	VERIFY(state->timeout != PFTM_UNLINKED);
1604 	VERIFY(state->timeout < PFTM_MAX);
1605 	t = state->rule.ptr->timeout[state->timeout];
1606 	if (!t) {
1607 		t = pf_default_rule.timeout[state->timeout];
1608 	}
1609 	start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START];
1610 	if (start) {
1611 		end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END];
1612 		states = state->rule.ptr->states;
1613 	} else {
1614 		start = pf_default_rule.timeout[PFTM_ADAPTIVE_START];
1615 		end = pf_default_rule.timeout[PFTM_ADAPTIVE_END];
1616 		states = pf_status.states;
1617 	}
1618 	if (end && states > start && start < end) {
1619 		if (states < end) {
1620 			return state->expire + t * (end - states) /
1621 			       (end - start);
1622 		} else {
1623 			return pf_time_second();
1624 		}
1625 	}
1626 	return state->expire + t;
1627 }
1628 
1629 void
pf_purge_expired_src_nodes(void)1630 pf_purge_expired_src_nodes(void)
1631 {
1632 	struct pf_src_node              *cur, *next;
1633 
1634 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1635 
1636 	for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) {
1637 		next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur);
1638 
1639 		if (cur->states <= 0 && cur->expire <= pf_time_second()) {
1640 			if (cur->rule.ptr != NULL) {
1641 				cur->rule.ptr->src_nodes--;
1642 				if (cur->rule.ptr->states <= 0 &&
1643 				    cur->rule.ptr->max_src_nodes <= 0) {
1644 					pf_rm_rule(NULL, cur->rule.ptr);
1645 				}
1646 			}
1647 			RB_REMOVE(pf_src_tree, &tree_src_tracking, cur);
1648 			pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
1649 			pf_status.src_nodes--;
1650 			pool_put(&pf_src_tree_pl, cur);
1651 		}
1652 	}
1653 }
1654 
1655 void
pf_src_tree_remove_state(struct pf_state * s)1656 pf_src_tree_remove_state(struct pf_state *s)
1657 {
1658 	u_int32_t t;
1659 
1660 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1661 
1662 	if (s->src_node != NULL) {
1663 		if (s->src.tcp_est) {
1664 			VERIFY(s->src_node->conn > 0);
1665 			--s->src_node->conn;
1666 		}
1667 		VERIFY(s->src_node->states > 0);
1668 		if (--s->src_node->states <= 0) {
1669 			t = s->rule.ptr->timeout[PFTM_SRC_NODE];
1670 			if (!t) {
1671 				t = pf_default_rule.timeout[PFTM_SRC_NODE];
1672 			}
1673 			s->src_node->expire = pf_time_second() + t;
1674 		}
1675 	}
1676 	if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
1677 		VERIFY(s->nat_src_node->states > 0);
1678 		if (--s->nat_src_node->states <= 0) {
1679 			t = s->rule.ptr->timeout[PFTM_SRC_NODE];
1680 			if (!t) {
1681 				t = pf_default_rule.timeout[PFTM_SRC_NODE];
1682 			}
1683 			s->nat_src_node->expire = pf_time_second() + t;
1684 		}
1685 	}
1686 	s->src_node = s->nat_src_node = NULL;
1687 }
1688 
1689 void
pf_unlink_state(struct pf_state * cur)1690 pf_unlink_state(struct pf_state *cur)
1691 {
1692 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1693 
1694 	if (cur->src.state == PF_TCPS_PROXY_DST) {
1695 		pf_send_tcp(cur->rule.ptr, cur->state_key->af_lan,
1696 		    &cur->state_key->ext_lan.addr, &cur->state_key->lan.addr,
1697 		    cur->state_key->ext_lan.xport.port,
1698 		    cur->state_key->lan.xport.port,
1699 		    cur->src.seqhi, cur->src.seqlo + 1,
1700 		    TH_RST | TH_ACK, 0, 0, 0, 1, cur->tag, NULL, NULL);
1701 	}
1702 
1703 	hook_runloop(&cur->unlink_hooks, HOOK_REMOVE | HOOK_FREE);
1704 	RB_REMOVE(pf_state_tree_id, &tree_id, cur);
1705 #if NPFSYNC
1706 	if (cur->creatorid == pf_status.hostid) {
1707 		pfsync_delete_state(cur);
1708 	}
1709 #endif
1710 	cur->timeout = PFTM_UNLINKED;
1711 	pf_src_tree_remove_state(cur);
1712 	pf_detach_state(cur, 0);
1713 }
1714 
1715 /* callers should be at splpf and hold the
1716  * write_lock on pf_consistency_lock */
1717 void
pf_free_state(struct pf_state * cur)1718 pf_free_state(struct pf_state *cur)
1719 {
1720 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1721 #if NPFSYNC
1722 	if (pfsyncif != NULL &&
1723 	    (pfsyncif->sc_bulk_send_next == cur ||
1724 	    pfsyncif->sc_bulk_terminator == cur)) {
1725 		return;
1726 	}
1727 #endif
1728 	VERIFY(cur->timeout == PFTM_UNLINKED);
1729 	VERIFY(cur->rule.ptr->states > 0);
1730 	if (--cur->rule.ptr->states <= 0 &&
1731 	    cur->rule.ptr->src_nodes <= 0) {
1732 		pf_rm_rule(NULL, cur->rule.ptr);
1733 	}
1734 	if (cur->nat_rule.ptr != NULL) {
1735 		VERIFY(cur->nat_rule.ptr->states > 0);
1736 		if (--cur->nat_rule.ptr->states <= 0 &&
1737 		    cur->nat_rule.ptr->src_nodes <= 0) {
1738 			pf_rm_rule(NULL, cur->nat_rule.ptr);
1739 		}
1740 	}
1741 	if (cur->anchor.ptr != NULL) {
1742 		VERIFY(cur->anchor.ptr->states > 0);
1743 		if (--cur->anchor.ptr->states <= 0) {
1744 			pf_rm_rule(NULL, cur->anchor.ptr);
1745 		}
1746 	}
1747 	pf_normalize_tcp_cleanup(cur);
1748 	pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE);
1749 	TAILQ_REMOVE(&state_list, cur, entry_list);
1750 	if (cur->tag) {
1751 		pf_tag_unref(cur->tag);
1752 	}
1753 #if SKYWALK
1754 	netns_release(&cur->nstoken);
1755 #endif
1756 	pool_put(&pf_state_pl, cur);
1757 	pf_status.fcounters[FCNT_STATE_REMOVALS]++;
1758 	VERIFY(pf_status.states > 0);
1759 	pf_status.states--;
1760 }
1761 
1762 void
pf_purge_expired_states(u_int32_t maxcheck)1763 pf_purge_expired_states(u_int32_t maxcheck)
1764 {
1765 	static struct pf_state  *cur = NULL;
1766 	struct pf_state         *next;
1767 
1768 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1769 
1770 	while (maxcheck--) {
1771 		/* wrap to start of list when we hit the end */
1772 		if (cur == NULL) {
1773 			cur = TAILQ_FIRST(&state_list);
1774 			if (cur == NULL) {
1775 				break;  /* list empty */
1776 			}
1777 		}
1778 
1779 		/* get next state, as cur may get deleted */
1780 		next = TAILQ_NEXT(cur, entry_list);
1781 
1782 		if (cur->timeout == PFTM_UNLINKED) {
1783 			pf_free_state(cur);
1784 		} else if (pf_state_expires(cur) <= pf_time_second()) {
1785 			/* unlink and free expired state */
1786 			pf_unlink_state(cur);
1787 			pf_free_state(cur);
1788 		}
1789 		cur = next;
1790 	}
1791 }
1792 
1793 int
pf_tbladdr_setup(struct pf_ruleset * rs,struct pf_addr_wrap * aw)1794 pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw)
1795 {
1796 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1797 
1798 	if (aw->type != PF_ADDR_TABLE) {
1799 		return 0;
1800 	}
1801 	if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname)) == NULL) {
1802 		return 1;
1803 	}
1804 	return 0;
1805 }
1806 
1807 void
pf_tbladdr_remove(struct pf_addr_wrap * aw)1808 pf_tbladdr_remove(struct pf_addr_wrap *aw)
1809 {
1810 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1811 
1812 	if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL) {
1813 		return;
1814 	}
1815 	pfr_detach_table(aw->p.tbl);
1816 	aw->p.tbl = NULL;
1817 }
1818 
1819 void
pf_tbladdr_copyout(struct pf_addr_wrap * aw)1820 pf_tbladdr_copyout(struct pf_addr_wrap *aw)
1821 {
1822 	struct pfr_ktable *kt = aw->p.tbl;
1823 
1824 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1825 
1826 	if (aw->type != PF_ADDR_TABLE || kt == NULL) {
1827 		return;
1828 	}
1829 	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) {
1830 		kt = kt->pfrkt_root;
1831 	}
1832 	aw->p.tbl = NULL;
1833 	aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ?
1834 	    kt->pfrkt_cnt : -1;
1835 }
1836 
1837 static void
pf_print_addr(struct pf_addr * addr,sa_family_t af)1838 pf_print_addr(struct pf_addr *addr, sa_family_t af)
1839 {
1840 	switch (af) {
1841 #if INET
1842 	case AF_INET: {
1843 		u_int32_t a = ntohl(addr->addr32[0]);
1844 		printf("%u.%u.%u.%u", (a >> 24) & 255, (a >> 16) & 255,
1845 		    (a >> 8) & 255, a & 255);
1846 		break;
1847 	}
1848 #endif /* INET */
1849 	case AF_INET6: {
1850 		u_int16_t b;
1851 		u_int8_t i, curstart = 255, curend = 0,
1852 		    maxstart = 0, maxend = 0;
1853 		for (i = 0; i < 8; i++) {
1854 			if (!addr->addr16[i]) {
1855 				if (curstart == 255) {
1856 					curstart = i;
1857 				} else {
1858 					curend = i;
1859 				}
1860 			} else {
1861 				if (curstart) {
1862 					if ((curend - curstart) >
1863 					    (maxend - maxstart)) {
1864 						maxstart = curstart;
1865 						maxend = curend;
1866 						curstart = 255;
1867 					}
1868 				}
1869 			}
1870 		}
1871 		for (i = 0; i < 8; i++) {
1872 			if (i >= maxstart && i <= maxend) {
1873 				if (maxend != 7) {
1874 					if (i == maxstart) {
1875 						printf(":");
1876 					}
1877 				} else {
1878 					if (i == maxend) {
1879 						printf(":");
1880 					}
1881 				}
1882 			} else {
1883 				b = ntohs(addr->addr16[i]);
1884 				printf("%x", b);
1885 				if (i < 7) {
1886 					printf(":");
1887 				}
1888 			}
1889 		}
1890 		break;
1891 	}
1892 	}
1893 }
1894 
1895 static void
pf_print_sk_host(struct pf_state_host * sh,sa_family_t af,int proto,u_int8_t proto_variant)1896 pf_print_sk_host(struct pf_state_host *sh, sa_family_t af, int proto,
1897     u_int8_t proto_variant)
1898 {
1899 	pf_print_addr(&sh->addr, af);
1900 
1901 	switch (proto) {
1902 	case IPPROTO_ESP:
1903 		if (sh->xport.spi) {
1904 			printf("[%08x]", ntohl(sh->xport.spi));
1905 		}
1906 		break;
1907 
1908 	case IPPROTO_GRE:
1909 		if (proto_variant == PF_GRE_PPTP_VARIANT) {
1910 			printf("[%u]", ntohs(sh->xport.call_id));
1911 		}
1912 		break;
1913 
1914 	case IPPROTO_TCP:
1915 	case IPPROTO_UDP:
1916 		printf("[%u]", ntohs(sh->xport.port));
1917 		break;
1918 
1919 	default:
1920 		break;
1921 	}
1922 }
1923 
1924 static void
pf_print_host(struct pf_addr * addr,u_int16_t p,sa_family_t af)1925 pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
1926 {
1927 	pf_print_addr(addr, af);
1928 	if (p) {
1929 		printf("[%u]", ntohs(p));
1930 	}
1931 }
1932 
1933 void
pf_print_state(struct pf_state * s)1934 pf_print_state(struct pf_state *s)
1935 {
1936 	struct pf_state_key *sk = s->state_key;
1937 	switch (sk->proto) {
1938 	case IPPROTO_ESP:
1939 		printf("ESP ");
1940 		break;
1941 	case IPPROTO_GRE:
1942 		printf("GRE%u ", sk->proto_variant);
1943 		break;
1944 	case IPPROTO_TCP:
1945 		printf("TCP ");
1946 		break;
1947 	case IPPROTO_UDP:
1948 		printf("UDP ");
1949 		break;
1950 	case IPPROTO_ICMP:
1951 		printf("ICMP ");
1952 		break;
1953 	case IPPROTO_ICMPV6:
1954 		printf("ICMPV6 ");
1955 		break;
1956 	default:
1957 		printf("%u ", sk->proto);
1958 		break;
1959 	}
1960 	pf_print_sk_host(&sk->lan, sk->af_lan, sk->proto, sk->proto_variant);
1961 	printf(" ");
1962 	pf_print_sk_host(&sk->gwy, sk->af_gwy, sk->proto, sk->proto_variant);
1963 	printf(" ");
1964 	pf_print_sk_host(&sk->ext_lan, sk->af_lan, sk->proto,
1965 	    sk->proto_variant);
1966 	printf(" ");
1967 	pf_print_sk_host(&sk->ext_gwy, sk->af_gwy, sk->proto,
1968 	    sk->proto_variant);
1969 	printf(" [lo=%u high=%u win=%u modulator=%u", s->src.seqlo,
1970 	    s->src.seqhi, s->src.max_win, s->src.seqdiff);
1971 	if (s->src.wscale && s->dst.wscale) {
1972 		printf(" wscale=%u", s->src.wscale & PF_WSCALE_MASK);
1973 	}
1974 	printf("]");
1975 	printf(" [lo=%u high=%u win=%u modulator=%u", s->dst.seqlo,
1976 	    s->dst.seqhi, s->dst.max_win, s->dst.seqdiff);
1977 	if (s->src.wscale && s->dst.wscale) {
1978 		printf(" wscale=%u", s->dst.wscale & PF_WSCALE_MASK);
1979 	}
1980 	printf("]");
1981 	printf(" %u:%u", s->src.state, s->dst.state);
1982 }
1983 
1984 void
pf_print_flags(u_int8_t f)1985 pf_print_flags(u_int8_t f)
1986 {
1987 	if (f) {
1988 		printf(" ");
1989 	}
1990 	if (f & TH_FIN) {
1991 		printf("F");
1992 	}
1993 	if (f & TH_SYN) {
1994 		printf("S");
1995 	}
1996 	if (f & TH_RST) {
1997 		printf("R");
1998 	}
1999 	if (f & TH_PUSH) {
2000 		printf("P");
2001 	}
2002 	if (f & TH_ACK) {
2003 		printf("A");
2004 	}
2005 	if (f & TH_URG) {
2006 		printf("U");
2007 	}
2008 	if (f & TH_ECE) {
2009 		printf("E");
2010 	}
2011 	if (f & TH_CWR) {
2012 		printf("W");
2013 	}
2014 }
2015 
2016 #define PF_SET_SKIP_STEPS(i)                                    \
2017 	do {                                                    \
2018 	        while (head[i] != cur) {                        \
2019 	                head[i]->skip[i].ptr = cur;             \
2020 	                head[i] = TAILQ_NEXT(head[i], entries); \
2021 	        }                                               \
2022 	} while (0)
2023 
2024 void
pf_calc_skip_steps(struct pf_rulequeue * rules)2025 pf_calc_skip_steps(struct pf_rulequeue *rules)
2026 {
2027 	struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT];
2028 	int i;
2029 
2030 	cur = TAILQ_FIRST(rules);
2031 	prev = cur;
2032 	for (i = 0; i < PF_SKIP_COUNT; ++i) {
2033 		head[i] = cur;
2034 	}
2035 	while (cur != NULL) {
2036 		if (cur->kif != prev->kif || cur->ifnot != prev->ifnot) {
2037 			PF_SET_SKIP_STEPS(PF_SKIP_IFP);
2038 		}
2039 		if (cur->direction != prev->direction) {
2040 			PF_SET_SKIP_STEPS(PF_SKIP_DIR);
2041 		}
2042 		if (cur->af != prev->af) {
2043 			PF_SET_SKIP_STEPS(PF_SKIP_AF);
2044 		}
2045 		if (cur->proto != prev->proto) {
2046 			PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
2047 		}
2048 		if (cur->src.neg != prev->src.neg ||
2049 		    pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr)) {
2050 			PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
2051 		}
2052 		{
2053 			union pf_rule_xport *cx = &cur->src.xport;
2054 			union pf_rule_xport *px = &prev->src.xport;
2055 
2056 			switch (cur->proto) {
2057 			case IPPROTO_GRE:
2058 			case IPPROTO_ESP:
2059 				PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
2060 				break;
2061 			default:
2062 				if (prev->proto == IPPROTO_GRE ||
2063 				    prev->proto == IPPROTO_ESP ||
2064 				    cx->range.op != px->range.op ||
2065 				    cx->range.port[0] != px->range.port[0] ||
2066 				    cx->range.port[1] != px->range.port[1]) {
2067 					PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
2068 				}
2069 				break;
2070 			}
2071 		}
2072 		if (cur->dst.neg != prev->dst.neg ||
2073 		    pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr)) {
2074 			PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
2075 		}
2076 		{
2077 			union pf_rule_xport *cx = &cur->dst.xport;
2078 			union pf_rule_xport *px = &prev->dst.xport;
2079 
2080 			switch (cur->proto) {
2081 			case IPPROTO_GRE:
2082 				if (cur->proto != prev->proto ||
2083 				    cx->call_id != px->call_id) {
2084 					PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
2085 				}
2086 				break;
2087 			case IPPROTO_ESP:
2088 				if (cur->proto != prev->proto ||
2089 				    cx->spi != px->spi) {
2090 					PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
2091 				}
2092 				break;
2093 			default:
2094 				if (prev->proto == IPPROTO_GRE ||
2095 				    prev->proto == IPPROTO_ESP ||
2096 				    cx->range.op != px->range.op ||
2097 				    cx->range.port[0] != px->range.port[0] ||
2098 				    cx->range.port[1] != px->range.port[1]) {
2099 					PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
2100 				}
2101 				break;
2102 			}
2103 		}
2104 
2105 		prev = cur;
2106 		cur = TAILQ_NEXT(cur, entries);
2107 	}
2108 	for (i = 0; i < PF_SKIP_COUNT; ++i) {
2109 		PF_SET_SKIP_STEPS(i);
2110 	}
2111 }
2112 
2113 u_int32_t
pf_calc_state_key_flowhash(struct pf_state_key * sk)2114 pf_calc_state_key_flowhash(struct pf_state_key *sk)
2115 {
2116 #if SKYWALK
2117 	uint32_t flowid;
2118 	struct flowidns_flow_key fk;
2119 
2120 	VERIFY(sk->flowsrc == FLOWSRC_PF);
2121 	bzero(&fk, sizeof(fk));
2122 	_CASSERT(sizeof(sk->lan.addr) == sizeof(fk.ffk_laddr));
2123 	_CASSERT(sizeof(sk->ext_lan.addr) == sizeof(fk.ffk_laddr));
2124 	bcopy(&sk->lan.addr, &fk.ffk_laddr, sizeof(fk.ffk_laddr));
2125 	bcopy(&sk->ext_lan.addr, &fk.ffk_raddr, sizeof(fk.ffk_raddr));
2126 	fk.ffk_af = sk->af_lan;
2127 	fk.ffk_proto = sk->proto;
2128 
2129 	switch (sk->proto) {
2130 	case IPPROTO_ESP:
2131 	case IPPROTO_AH:
2132 		fk.ffk_spi = sk->lan.xport.spi;
2133 		break;
2134 	default:
2135 		if (sk->lan.xport.spi <= sk->ext_lan.xport.spi) {
2136 			fk.ffk_lport = sk->lan.xport.port;
2137 			fk.ffk_rport = sk->ext_lan.xport.port;
2138 		} else {
2139 			fk.ffk_lport = sk->ext_lan.xport.port;
2140 			fk.ffk_rport = sk->lan.xport.port;
2141 		}
2142 		break;
2143 	}
2144 
2145 	flowidns_allocate_flowid(FLOWIDNS_DOMAIN_PF, &fk, &flowid);
2146 	return flowid;
2147 
2148 #else /* !SKYWALK */
2149 
2150 	struct pf_flowhash_key fh __attribute__((aligned(8)));
2151 	uint32_t flowhash = 0;
2152 
2153 	bzero(&fh, sizeof(fh));
2154 	if (PF_ALEQ(&sk->lan.addr, &sk->ext_lan.addr, sk->af_lan)) {
2155 		bcopy(&sk->lan.addr, &fh.ap1.addr, sizeof(fh.ap1.addr));
2156 		bcopy(&sk->ext_lan.addr, &fh.ap2.addr, sizeof(fh.ap2.addr));
2157 	} else {
2158 		bcopy(&sk->ext_lan.addr, &fh.ap1.addr, sizeof(fh.ap1.addr));
2159 		bcopy(&sk->lan.addr, &fh.ap2.addr, sizeof(fh.ap2.addr));
2160 	}
2161 	if (sk->lan.xport.spi <= sk->ext_lan.xport.spi) {
2162 		fh.ap1.xport.spi = sk->lan.xport.spi;
2163 		fh.ap2.xport.spi = sk->ext_lan.xport.spi;
2164 	} else {
2165 		fh.ap1.xport.spi = sk->ext_lan.xport.spi;
2166 		fh.ap2.xport.spi = sk->lan.xport.spi;
2167 	}
2168 	fh.af = sk->af_lan;
2169 	fh.proto = sk->proto;
2170 
2171 try_again:
2172 	flowhash = net_flowhash(&fh, sizeof(fh), pf_hash_seed);
2173 	if (flowhash == 0) {
2174 		/* try to get a non-zero flowhash */
2175 		pf_hash_seed = RandomULong();
2176 		goto try_again;
2177 	}
2178 
2179 	return flowhash;
2180 
2181 #endif /* !SKYWALK */
2182 }
2183 
2184 static int
pf_addr_wrap_neq(struct pf_addr_wrap * aw1,struct pf_addr_wrap * aw2)2185 pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
2186 {
2187 	if (aw1->type != aw2->type) {
2188 		return 1;
2189 	}
2190 	switch (aw1->type) {
2191 	case PF_ADDR_ADDRMASK:
2192 	case PF_ADDR_RANGE:
2193 		if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, 0)) {
2194 			return 1;
2195 		}
2196 		if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0)) {
2197 			return 1;
2198 		}
2199 		return 0;
2200 	case PF_ADDR_DYNIFTL:
2201 		return aw1->p.dyn == NULL || aw2->p.dyn == NULL ||
2202 		       aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt;
2203 	case PF_ADDR_NOROUTE:
2204 	case PF_ADDR_URPFFAILED:
2205 		return 0;
2206 	case PF_ADDR_TABLE:
2207 		return aw1->p.tbl != aw2->p.tbl;
2208 	case PF_ADDR_RTLABEL:
2209 		return aw1->v.rtlabel != aw2->v.rtlabel;
2210 	default:
2211 		printf("invalid address type: %d\n", aw1->type);
2212 		return 1;
2213 	}
2214 }
2215 
2216 u_int16_t
pf_cksum_fixup(u_int16_t cksum,u_int16_t old,u_int16_t new,u_int8_t udp)2217 pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
2218 {
2219 	return nat464_cksum_fixup(cksum, old, new, udp);
2220 }
2221 
2222 /*
2223  * change ip address & port
2224  * dir	: packet direction
2225  * a	: address to be changed
2226  * p	: port to be changed
2227  * ic	: ip header checksum
2228  * pc	: protocol checksum
2229  * an	: new ip address
2230  * pn	: new port
2231  * u	: should be 1 if UDP packet else 0
2232  * af	: address family of the packet
2233  * afn	: address family of the new address
2234  * ua	: should be 1 if ip address needs to be updated in the packet else
2235  *	  only the checksum is recalculated & updated.
2236  */
2237 static __attribute__((noinline)) void
pf_change_ap(int dir,pbuf_t * pbuf,struct pf_addr * a,u_int16_t * p,u_int16_t * ic,u_int16_t * pc,struct pf_addr * an,u_int16_t pn,u_int8_t u,sa_family_t af,sa_family_t afn,int ua)2238 pf_change_ap(int dir, pbuf_t *pbuf, struct pf_addr *a, u_int16_t *p,
2239     u_int16_t *ic, u_int16_t *pc, struct pf_addr *an, u_int16_t pn,
2240     u_int8_t u, sa_family_t af, sa_family_t afn, int ua)
2241 {
2242 	struct pf_addr  ao;
2243 	u_int16_t       po = *p;
2244 
2245 	PF_ACPY(&ao, a, af);
2246 	if (ua) {
2247 		PF_ACPY(a, an, afn);
2248 	}
2249 
2250 	*p = pn;
2251 
2252 	switch (af) {
2253 #if INET
2254 	case AF_INET:
2255 		switch (afn) {
2256 		case AF_INET:
2257 			*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
2258 			    ao.addr16[0], an->addr16[0], 0),
2259 			    ao.addr16[1], an->addr16[1], 0);
2260 			*p = pn;
2261 			/*
2262 			 * If the packet is originated from an ALG on the NAT gateway
2263 			 * (source address is loopback or local), in which case the
2264 			 * TCP/UDP checksum field contains the pseudo header checksum
2265 			 * that's not yet complemented.
2266 			 * In that case we do not need to fixup the checksum for port
2267 			 * translation as the pseudo header checksum doesn't include ports.
2268 			 *
2269 			 * A packet generated locally will have UDP/TCP CSUM flag
2270 			 * set (gets set in protocol output).
2271 			 *
2272 			 * It should be noted that the fixup doesn't do anything if the
2273 			 * checksum is 0.
2274 			 */
2275 			if (dir == PF_OUT && pbuf != NULL &&
2276 			    (*pbuf->pb_csum_flags & (CSUM_TCP | CSUM_UDP))) {
2277 				/* Pseudo-header checksum does not include ports */
2278 				*pc = ~pf_cksum_fixup(pf_cksum_fixup(~*pc,
2279 				    ao.addr16[0], an->addr16[0], u),
2280 				    ao.addr16[1], an->addr16[1], u);
2281 			} else {
2282 				*pc =
2283 				    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2284 					    *pc, ao.addr16[0], an->addr16[0], u),
2285 				    ao.addr16[1], an->addr16[1], u),
2286 				    po, pn, u);
2287 			}
2288 			break;
2289 		case AF_INET6:
2290 			*p = pn;
2291 			*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2292 				    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2293 
2294 					    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
2295 					    ao.addr16[0], an->addr16[0], u),
2296 					    ao.addr16[1], an->addr16[1], u),
2297 					    0, an->addr16[2], u),
2298 					    0, an->addr16[3], u),
2299 				    0, an->addr16[4], u),
2300 				    0, an->addr16[5], u),
2301 				    0, an->addr16[6], u),
2302 			    0, an->addr16[7], u),
2303 			    po, pn, u);
2304 			break;
2305 		}
2306 		break;
2307 #endif /* INET */
2308 	case AF_INET6:
2309 		switch (afn) {
2310 		case AF_INET6:
2311 			/*
2312 			 * If the packet is originated from an ALG on the NAT gateway
2313 			 * (source address is loopback or local), in which case the
2314 			 * TCP/UDP checksum field contains the pseudo header checksum
2315 			 * that's not yet complemented.
2316 			 * A packet generated locally
2317 			 * will have UDP/TCP CSUM flag set (gets set in protocol
2318 			 * output).
2319 			 */
2320 			if (dir == PF_OUT && pbuf != NULL &&
2321 			    (*pbuf->pb_csum_flags & (CSUM_TCPIPV6 |
2322 			    CSUM_UDPIPV6))) {
2323 				/* Pseudo-header checksum does not include ports */
2324 				*pc =
2325 				    ~pf_cksum_fixup(pf_cksum_fixup(
2326 					    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2327 						    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2328 							    ~*pc,
2329 							    ao.addr16[0], an->addr16[0], u),
2330 						    ao.addr16[1], an->addr16[1], u),
2331 						    ao.addr16[2], an->addr16[2], u),
2332 						    ao.addr16[3], an->addr16[3], u),
2333 					    ao.addr16[4], an->addr16[4], u),
2334 					    ao.addr16[5], an->addr16[5], u),
2335 					    ao.addr16[6], an->addr16[6], u),
2336 				    ao.addr16[7], an->addr16[7], u);
2337 			} else {
2338 				*pc =
2339 				    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2340 					    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2341 						    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2342 							    *pc,
2343 							    ao.addr16[0], an->addr16[0], u),
2344 						    ao.addr16[1], an->addr16[1], u),
2345 						    ao.addr16[2], an->addr16[2], u),
2346 						    ao.addr16[3], an->addr16[3], u),
2347 					    ao.addr16[4], an->addr16[4], u),
2348 					    ao.addr16[5], an->addr16[5], u),
2349 					    ao.addr16[6], an->addr16[6], u),
2350 				    ao.addr16[7], an->addr16[7], u),
2351 				    po, pn, u);
2352 			}
2353 			break;
2354 #ifdef INET
2355 		case AF_INET:
2356 			*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2357 				    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2358 					    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
2359 					    ao.addr16[0], an->addr16[0], u),
2360 					    ao.addr16[1], an->addr16[1], u),
2361 					    ao.addr16[2], 0, u),
2362 					    ao.addr16[3], 0, u),
2363 				    ao.addr16[4], 0, u),
2364 				    ao.addr16[5], 0, u),
2365 				    ao.addr16[6], 0, u),
2366 			    ao.addr16[7], 0, u),
2367 			    po, pn, u);
2368 			break;
2369 #endif /* INET */
2370 		}
2371 		break;
2372 	}
2373 }
2374 
2375 
2376 /* Changes a u_int32_t.  Uses a void * so there are no align restrictions */
2377 void
pf_change_a(void * a,u_int16_t * c,u_int32_t an,u_int8_t u)2378 pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u)
2379 {
2380 	u_int32_t       ao;
2381 
2382 	memcpy(&ao, a, sizeof(ao));
2383 	memcpy(a, &an, sizeof(u_int32_t));
2384 	*c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u),
2385 	    ao % 65536, an % 65536, u);
2386 }
2387 
2388 static __attribute__((noinline)) void
pf_change_a6(struct pf_addr * a,u_int16_t * c,struct pf_addr * an,u_int8_t u)2389 pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
2390 {
2391 	struct pf_addr  ao;
2392 
2393 	PF_ACPY(&ao, a, AF_INET6);
2394 	PF_ACPY(a, an, AF_INET6);
2395 
2396 	*c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2397 		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2398 			    pf_cksum_fixup(pf_cksum_fixup(*c,
2399 			    ao.addr16[0], an->addr16[0], u),
2400 			    ao.addr16[1], an->addr16[1], u),
2401 			    ao.addr16[2], an->addr16[2], u),
2402 		    ao.addr16[3], an->addr16[3], u),
2403 		    ao.addr16[4], an->addr16[4], u),
2404 		    ao.addr16[5], an->addr16[5], u),
2405 	    ao.addr16[6], an->addr16[6], u),
2406 	    ao.addr16[7], an->addr16[7], u);
2407 }
2408 
2409 static __attribute__((noinline)) void
pf_change_addr(struct pf_addr * a,u_int16_t * c,struct pf_addr * an,u_int8_t u,sa_family_t af,sa_family_t afn)2410 pf_change_addr(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u,
2411     sa_family_t af, sa_family_t afn)
2412 {
2413 	struct pf_addr  ao;
2414 
2415 	if (af != afn) {
2416 		PF_ACPY(&ao, a, af);
2417 		PF_ACPY(a, an, afn);
2418 	}
2419 
2420 	switch (af) {
2421 	case AF_INET:
2422 		switch (afn) {
2423 		case AF_INET:
2424 			pf_change_a(a, c, an->v4addr.s_addr, u);
2425 			break;
2426 		case AF_INET6:
2427 			*c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2428 				    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2429 					    pf_cksum_fixup(pf_cksum_fixup(*c,
2430 					    ao.addr16[0], an->addr16[0], u),
2431 					    ao.addr16[1], an->addr16[1], u),
2432 					    0, an->addr16[2], u),
2433 				    0, an->addr16[3], u),
2434 				    0, an->addr16[4], u),
2435 				    0, an->addr16[5], u),
2436 			    0, an->addr16[6], u),
2437 			    0, an->addr16[7], u);
2438 			break;
2439 		}
2440 		break;
2441 	case AF_INET6:
2442 		switch (afn) {
2443 		case AF_INET:
2444 			*c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2445 				    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2446 					    pf_cksum_fixup(pf_cksum_fixup(*c,
2447 					    ao.addr16[0], an->addr16[0], u),
2448 					    ao.addr16[1], an->addr16[1], u),
2449 					    ao.addr16[2], 0, u),
2450 				    ao.addr16[3], 0, u),
2451 				    ao.addr16[4], 0, u),
2452 				    ao.addr16[5], 0, u),
2453 			    ao.addr16[6], 0, u),
2454 			    ao.addr16[7], 0, u);
2455 			break;
2456 		case AF_INET6:
2457 			pf_change_a6(a, c, an, u);
2458 			break;
2459 		}
2460 		break;
2461 	}
2462 }
2463 
2464 static __attribute__((noinline)) void
pf_change_icmp(struct pf_addr * ia,u_int16_t * ip,struct pf_addr * oa,struct pf_addr * na,u_int16_t np,u_int16_t * pc,u_int16_t * h2c,u_int16_t * ic,u_int16_t * hc,u_int8_t u,sa_family_t af)2465 pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
2466     struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c,
2467     u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af)
2468 {
2469 	struct pf_addr  oia, ooa;
2470 
2471 	PF_ACPY(&oia, ia, af);
2472 	PF_ACPY(&ooa, oa, af);
2473 
2474 	/* Change inner protocol port, fix inner protocol checksum. */
2475 	if (ip != NULL) {
2476 		u_int16_t       oip = *ip;
2477 		u_int32_t       opc = 0;
2478 
2479 		if (pc != NULL) {
2480 			opc = *pc;
2481 		}
2482 		*ip = np;
2483 		if (pc != NULL) {
2484 			*pc = pf_cksum_fixup(*pc, oip, *ip, u);
2485 		}
2486 		*ic = pf_cksum_fixup(*ic, oip, *ip, 0);
2487 		if (pc != NULL) {
2488 			*ic = pf_cksum_fixup(*ic, opc, *pc, 0);
2489 		}
2490 	}
2491 	/* Change inner ip address, fix inner ip and icmp checksums. */
2492 	PF_ACPY(ia, na, af);
2493 	switch (af) {
2494 #if INET
2495 	case AF_INET: {
2496 		u_int32_t        oh2c = *h2c;
2497 
2498 		*h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c,
2499 		    oia.addr16[0], ia->addr16[0], 0),
2500 		    oia.addr16[1], ia->addr16[1], 0);
2501 		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
2502 		    oia.addr16[0], ia->addr16[0], 0),
2503 		    oia.addr16[1], ia->addr16[1], 0);
2504 		*ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0);
2505 		break;
2506 	}
2507 #endif /* INET */
2508 	case AF_INET6:
2509 		*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2510 			    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2511 				    pf_cksum_fixup(pf_cksum_fixup(*ic,
2512 				    oia.addr16[0], ia->addr16[0], u),
2513 				    oia.addr16[1], ia->addr16[1], u),
2514 				    oia.addr16[2], ia->addr16[2], u),
2515 			    oia.addr16[3], ia->addr16[3], u),
2516 			    oia.addr16[4], ia->addr16[4], u),
2517 			    oia.addr16[5], ia->addr16[5], u),
2518 		    oia.addr16[6], ia->addr16[6], u),
2519 		    oia.addr16[7], ia->addr16[7], u);
2520 		break;
2521 	}
2522 	/* Change outer ip address, fix outer ip or icmpv6 checksum. */
2523 	PF_ACPY(oa, na, af);
2524 	switch (af) {
2525 #if INET
2526 	case AF_INET:
2527 		*hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
2528 		    ooa.addr16[0], oa->addr16[0], 0),
2529 		    ooa.addr16[1], oa->addr16[1], 0);
2530 		break;
2531 #endif /* INET */
2532 	case AF_INET6:
2533 		*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2534 			    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2535 				    pf_cksum_fixup(pf_cksum_fixup(*ic,
2536 				    ooa.addr16[0], oa->addr16[0], u),
2537 				    ooa.addr16[1], oa->addr16[1], u),
2538 				    ooa.addr16[2], oa->addr16[2], u),
2539 			    ooa.addr16[3], oa->addr16[3], u),
2540 			    ooa.addr16[4], oa->addr16[4], u),
2541 			    ooa.addr16[5], oa->addr16[5], u),
2542 		    ooa.addr16[6], oa->addr16[6], u),
2543 		    ooa.addr16[7], oa->addr16[7], u);
2544 		break;
2545 	}
2546 }
2547 
2548 
2549 /*
2550  * Need to modulate the sequence numbers in the TCP SACK option
2551  * (credits to Krzysztof Pfaff for report and patch)
2552  */
2553 static __attribute__((noinline)) int
pf_modulate_sack(pbuf_t * pbuf,int off,struct pf_pdesc * pd,struct tcphdr * th,struct pf_state_peer * dst)2554 pf_modulate_sack(pbuf_t *pbuf, int off, struct pf_pdesc *pd,
2555     struct tcphdr *th, struct pf_state_peer *dst)
2556 {
2557 	int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen;
2558 	u_int8_t opts[MAX_TCPOPTLEN], *opt = opts;
2559 	int copyback = 0, i, olen;
2560 	struct sackblk sack;
2561 
2562 #define TCPOLEN_SACKLEN (TCPOLEN_SACK + 2)
2563 	if (hlen < TCPOLEN_SACKLEN ||
2564 	    !pf_pull_hdr(pbuf, off + sizeof(*th), opts, hlen, NULL, NULL, pd->af)) {
2565 		return 0;
2566 	}
2567 
2568 	while (hlen >= TCPOLEN_SACKLEN) {
2569 		olen = opt[1];
2570 		switch (*opt) {
2571 		case TCPOPT_EOL:        /* FALLTHROUGH */
2572 		case TCPOPT_NOP:
2573 			opt++;
2574 			hlen--;
2575 			break;
2576 		case TCPOPT_SACK:
2577 			if (olen > hlen) {
2578 				olen = hlen;
2579 			}
2580 			if (olen >= TCPOLEN_SACKLEN) {
2581 				for (i = 2; i + TCPOLEN_SACK <= olen;
2582 				    i += TCPOLEN_SACK) {
2583 					memcpy(&sack, &opt[i], sizeof(sack));
2584 					pf_change_a(&sack.start, &th->th_sum,
2585 					    htonl(ntohl(sack.start) -
2586 					    dst->seqdiff), 0);
2587 					pf_change_a(&sack.end, &th->th_sum,
2588 					    htonl(ntohl(sack.end) -
2589 					    dst->seqdiff), 0);
2590 					memcpy(&opt[i], &sack, sizeof(sack));
2591 				}
2592 				copyback = off + sizeof(*th) + thoptlen;
2593 			}
2594 			OS_FALLTHROUGH;
2595 		default:
2596 			if (olen < 2) {
2597 				olen = 2;
2598 			}
2599 			hlen -= olen;
2600 			opt += olen;
2601 		}
2602 	}
2603 
2604 	if (copyback) {
2605 		if (pf_lazy_makewritable(pd, pbuf, copyback) == NULL) {
2606 			return -1;
2607 		}
2608 		pbuf_copy_back(pbuf, off + sizeof(*th), thoptlen, opts);
2609 	}
2610 	return copyback;
2611 }
2612 
2613 /*
2614  * XXX
2615  *
2616  * The following functions (pf_send_tcp and pf_send_icmp) are somewhat
2617  * special in that they originate "spurious" packets rather than
2618  * filter/NAT existing packets. As such, they're not a great fit for
2619  * the 'pbuf' shim, which assumes the underlying packet buffers are
2620  * allocated elsewhere.
2621  *
2622  * Since these functions are rarely used, we'll carry on allocating mbufs
2623  * and passing them to the IP stack for eventual routing.
2624  */
2625 static __attribute__((noinline)) void
pf_send_tcp(const struct pf_rule * r,sa_family_t af,const struct pf_addr * saddr,const struct pf_addr * daddr,u_int16_t sport,u_int16_t dport,u_int32_t seq,u_int32_t ack,u_int8_t flags,u_int16_t win,u_int16_t mss,u_int8_t ttl,int tag,u_int16_t rtag,struct ether_header * eh,struct ifnet * ifp)2626 pf_send_tcp(const struct pf_rule *r, sa_family_t af,
2627     const struct pf_addr *saddr, const struct pf_addr *daddr,
2628     u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
2629     u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag,
2630     u_int16_t rtag, struct ether_header *eh, struct ifnet *ifp)
2631 {
2632 #pragma unused(eh, ifp)
2633 	struct mbuf     *m;
2634 	int              len, tlen;
2635 #if INET
2636 	struct ip       *h = NULL;
2637 #endif /* INET */
2638 	struct ip6_hdr  *h6 = NULL;
2639 	struct tcphdr   *th = NULL;
2640 	char            *opt;
2641 	struct pf_mtag  *pf_mtag;
2642 
2643 	/* maximum segment size tcp option */
2644 	tlen = sizeof(struct tcphdr);
2645 	if (mss) {
2646 		tlen += 4;
2647 	}
2648 
2649 	switch (af) {
2650 #if INET
2651 	case AF_INET:
2652 		len = sizeof(struct ip) + tlen;
2653 		break;
2654 #endif /* INET */
2655 	case AF_INET6:
2656 		len = sizeof(struct ip6_hdr) + tlen;
2657 		break;
2658 	default:
2659 		panic("pf_send_tcp: not AF_INET or AF_INET6!");
2660 		return;
2661 	}
2662 
2663 	/* create outgoing mbuf */
2664 	m = m_gethdr(M_DONTWAIT, MT_HEADER);
2665 	if (m == NULL) {
2666 		return;
2667 	}
2668 
2669 	if ((pf_mtag = pf_get_mtag(m)) == NULL) {
2670 		return;
2671 	}
2672 
2673 	if (tag) {
2674 		pf_mtag->pftag_flags |= PF_TAG_GENERATED;
2675 	}
2676 	pf_mtag->pftag_tag = rtag;
2677 
2678 	if (r != NULL && PF_RTABLEID_IS_VALID(r->rtableid)) {
2679 		pf_mtag->pftag_rtableid = r->rtableid;
2680 	}
2681 
2682 #if PF_ECN
2683 	/* add hints for ecn */
2684 	pf_mtag->pftag_hdr = mtod(m, struct ip *);
2685 	/* record address family */
2686 	pf_mtag->pftag_flags &= ~(PF_TAG_HDR_INET | PF_TAG_HDR_INET6);
2687 	switch (af) {
2688 #if INET
2689 	case AF_INET:
2690 		pf_mtag->pftag_flags |= PF_TAG_HDR_INET;
2691 		break;
2692 #endif /* INET */
2693 	case AF_INET6:
2694 		pf_mtag->pftag_flags |= PF_TAG_HDR_INET6;
2695 		break;
2696 	}
2697 #endif /* PF_ECN */
2698 
2699 	/* indicate this is TCP */
2700 	m->m_pkthdr.pkt_proto = IPPROTO_TCP;
2701 
2702 	/* Make sure headers are 32-bit aligned */
2703 	m->m_data += max_linkhdr;
2704 	m->m_pkthdr.len = m->m_len = len;
2705 	m->m_pkthdr.rcvif = NULL;
2706 	bzero(m->m_data, len);
2707 	switch (af) {
2708 #if INET
2709 	case AF_INET:
2710 		h = mtod(m, struct ip *);
2711 
2712 		/* IP header fields included in the TCP checksum */
2713 		h->ip_p = IPPROTO_TCP;
2714 		h->ip_len = htons(tlen);
2715 		h->ip_src.s_addr = saddr->v4addr.s_addr;
2716 		h->ip_dst.s_addr = daddr->v4addr.s_addr;
2717 
2718 		th = (struct tcphdr *)(void *)((caddr_t)h + sizeof(struct ip));
2719 		break;
2720 #endif /* INET */
2721 	case AF_INET6:
2722 		h6 = mtod(m, struct ip6_hdr *);
2723 
2724 		/* IP header fields included in the TCP checksum */
2725 		h6->ip6_nxt = IPPROTO_TCP;
2726 		h6->ip6_plen = htons(tlen);
2727 		memcpy(&h6->ip6_src, &saddr->v6addr, sizeof(struct in6_addr));
2728 		memcpy(&h6->ip6_dst, &daddr->v6addr, sizeof(struct in6_addr));
2729 
2730 		th = (struct tcphdr *)(void *)
2731 		    ((caddr_t)h6 + sizeof(struct ip6_hdr));
2732 		break;
2733 	}
2734 
2735 	/* TCP header */
2736 	th->th_sport = sport;
2737 	th->th_dport = dport;
2738 	th->th_seq = htonl(seq);
2739 	th->th_ack = htonl(ack);
2740 	th->th_off = tlen >> 2;
2741 	th->th_flags = flags;
2742 	th->th_win = htons(win);
2743 
2744 	if (mss) {
2745 		opt = (char *)(th + 1);
2746 		opt[0] = TCPOPT_MAXSEG;
2747 		opt[1] = 4;
2748 #if BYTE_ORDER != BIG_ENDIAN
2749 		HTONS(mss);
2750 #endif
2751 		bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2);
2752 	}
2753 
2754 	switch (af) {
2755 #if INET
2756 	case AF_INET: {
2757 		struct route ro;
2758 
2759 		/* TCP checksum */
2760 		th->th_sum = in_cksum(m, len);
2761 
2762 		/* Finish the IP header */
2763 		h->ip_v = 4;
2764 		h->ip_hl = sizeof(*h) >> 2;
2765 		h->ip_tos = IPTOS_LOWDELAY;
2766 		/*
2767 		 * ip_output() expects ip_len and ip_off to be in host order.
2768 		 */
2769 		h->ip_len = len;
2770 		h->ip_off = (path_mtu_discovery ? IP_DF : 0);
2771 		h->ip_ttl = ttl ? ttl : ip_defttl;
2772 		h->ip_sum = 0;
2773 
2774 		bzero(&ro, sizeof(ro));
2775 		ip_output(m, NULL, &ro, 0, NULL, NULL);
2776 		ROUTE_RELEASE(&ro);
2777 		break;
2778 	}
2779 #endif /* INET */
2780 	case AF_INET6: {
2781 		struct route_in6 ro6;
2782 
2783 		/* TCP checksum */
2784 		th->th_sum = in6_cksum(m, IPPROTO_TCP,
2785 		    sizeof(struct ip6_hdr), tlen);
2786 
2787 		h6->ip6_vfc |= IPV6_VERSION;
2788 		h6->ip6_hlim = IPV6_DEFHLIM;
2789 
2790 		ip6_output_setsrcifscope(m, IFSCOPE_UNKNOWN, NULL);
2791 		ip6_output_setdstifscope(m, IFSCOPE_UNKNOWN, NULL);
2792 		bzero(&ro6, sizeof(ro6));
2793 		ip6_output(m, NULL, &ro6, 0, NULL, NULL, NULL);
2794 		ROUTE_RELEASE(&ro6);
2795 		break;
2796 	}
2797 	}
2798 }
2799 
2800 static __attribute__((noinline)) void
pf_send_icmp(pbuf_t * pbuf,u_int8_t type,u_int8_t code,sa_family_t af,struct pf_rule * r)2801 pf_send_icmp(pbuf_t *pbuf, u_int8_t type, u_int8_t code, sa_family_t af,
2802     struct pf_rule *r)
2803 {
2804 	struct mbuf     *m0;
2805 	struct pf_mtag  *pf_mtag;
2806 
2807 	m0 = pbuf_clone_to_mbuf(pbuf);
2808 	if (m0 == NULL) {
2809 		return;
2810 	}
2811 
2812 	if ((pf_mtag = pf_get_mtag(m0)) == NULL) {
2813 		return;
2814 	}
2815 
2816 	pf_mtag->pftag_flags |= PF_TAG_GENERATED;
2817 
2818 	if (PF_RTABLEID_IS_VALID(r->rtableid)) {
2819 		pf_mtag->pftag_rtableid = r->rtableid;
2820 	}
2821 
2822 #if PF_ECN
2823 	/* add hints for ecn */
2824 	pf_mtag->pftag_hdr = mtod(m0, struct ip *);
2825 	/* record address family */
2826 	pf_mtag->pftag_flags &= ~(PF_TAG_HDR_INET | PF_TAG_HDR_INET6);
2827 	switch (af) {
2828 #if INET
2829 	case AF_INET:
2830 		pf_mtag->pftag_flags |= PF_TAG_HDR_INET;
2831 		m0->m_pkthdr.pkt_proto = IPPROTO_ICMP;
2832 		break;
2833 #endif /* INET */
2834 	case AF_INET6:
2835 		pf_mtag->pftag_flags |= PF_TAG_HDR_INET6;
2836 		m0->m_pkthdr.pkt_proto = IPPROTO_ICMPV6;
2837 		break;
2838 	}
2839 #endif /* PF_ECN */
2840 
2841 	switch (af) {
2842 #if INET
2843 	case AF_INET:
2844 		icmp_error(m0, type, code, 0, 0);
2845 		break;
2846 #endif /* INET */
2847 	case AF_INET6:
2848 		icmp6_error(m0, type, code, 0);
2849 		break;
2850 	}
2851 }
2852 
2853 /*
2854  * Return 1 if the addresses a and b match (with mask m), otherwise return 0.
2855  * If n is 0, they match if they are equal. If n is != 0, they match if they
2856  * are different.
2857  */
2858 int
pf_match_addr(u_int8_t n,struct pf_addr * a,struct pf_addr * m,struct pf_addr * b,sa_family_t af)2859 pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m,
2860     struct pf_addr *b, sa_family_t af)
2861 {
2862 	int     match = 0;
2863 
2864 	switch (af) {
2865 #if INET
2866 	case AF_INET:
2867 		if ((a->addr32[0] & m->addr32[0]) ==
2868 		    (b->addr32[0] & m->addr32[0])) {
2869 			match++;
2870 		}
2871 		break;
2872 #endif /* INET */
2873 	case AF_INET6:
2874 		if (((a->addr32[0] & m->addr32[0]) ==
2875 		    (b->addr32[0] & m->addr32[0])) &&
2876 		    ((a->addr32[1] & m->addr32[1]) ==
2877 		    (b->addr32[1] & m->addr32[1])) &&
2878 		    ((a->addr32[2] & m->addr32[2]) ==
2879 		    (b->addr32[2] & m->addr32[2])) &&
2880 		    ((a->addr32[3] & m->addr32[3]) ==
2881 		    (b->addr32[3] & m->addr32[3]))) {
2882 			match++;
2883 		}
2884 		break;
2885 	}
2886 	if (match) {
2887 		if (n) {
2888 			return 0;
2889 		} else {
2890 			return 1;
2891 		}
2892 	} else {
2893 		if (n) {
2894 			return 1;
2895 		} else {
2896 			return 0;
2897 		}
2898 	}
2899 }
2900 
2901 /*
2902  * Return 1 if b <= a <= e, otherwise return 0.
2903  */
2904 int
pf_match_addr_range(struct pf_addr * b,struct pf_addr * e,struct pf_addr * a,sa_family_t af)2905 pf_match_addr_range(struct pf_addr *b, struct pf_addr *e,
2906     struct pf_addr *a, sa_family_t af)
2907 {
2908 	switch (af) {
2909 #if INET
2910 	case AF_INET:
2911 		if ((a->addr32[0] < b->addr32[0]) ||
2912 		    (a->addr32[0] > e->addr32[0])) {
2913 			return 0;
2914 		}
2915 		break;
2916 #endif /* INET */
2917 	case AF_INET6: {
2918 		int     i;
2919 
2920 		/* check a >= b */
2921 		for (i = 0; i < 4; ++i) {
2922 			if (a->addr32[i] > b->addr32[i]) {
2923 				break;
2924 			} else if (a->addr32[i] < b->addr32[i]) {
2925 				return 0;
2926 			}
2927 		}
2928 		/* check a <= e */
2929 		for (i = 0; i < 4; ++i) {
2930 			if (a->addr32[i] < e->addr32[i]) {
2931 				break;
2932 			} else if (a->addr32[i] > e->addr32[i]) {
2933 				return 0;
2934 			}
2935 		}
2936 		break;
2937 	}
2938 	}
2939 	return 1;
2940 }
2941 
2942 int
pf_match(u_int8_t op,u_int32_t a1,u_int32_t a2,u_int32_t p)2943 pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
2944 {
2945 	switch (op) {
2946 	case PF_OP_IRG:
2947 		return (p > a1) && (p < a2);
2948 	case PF_OP_XRG:
2949 		return (p < a1) || (p > a2);
2950 	case PF_OP_RRG:
2951 		return (p >= a1) && (p <= a2);
2952 	case PF_OP_EQ:
2953 		return p == a1;
2954 	case PF_OP_NE:
2955 		return p != a1;
2956 	case PF_OP_LT:
2957 		return p < a1;
2958 	case PF_OP_LE:
2959 		return p <= a1;
2960 	case PF_OP_GT:
2961 		return p > a1;
2962 	case PF_OP_GE:
2963 		return p >= a1;
2964 	}
2965 	return 0; /* never reached */
2966 }
2967 
2968 int
pf_match_port(u_int8_t op,u_int16_t a1,u_int16_t a2,u_int16_t p)2969 pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
2970 {
2971 #if BYTE_ORDER != BIG_ENDIAN
2972 	NTOHS(a1);
2973 	NTOHS(a2);
2974 	NTOHS(p);
2975 #endif
2976 	return pf_match(op, a1, a2, p);
2977 }
2978 
2979 int
pf_match_xport(u_int8_t proto,u_int8_t proto_variant,union pf_rule_xport * rx,union pf_state_xport * sx)2980 pf_match_xport(u_int8_t proto, u_int8_t proto_variant, union pf_rule_xport *rx,
2981     union pf_state_xport *sx)
2982 {
2983 	int d = !0;
2984 
2985 	if (sx) {
2986 		switch (proto) {
2987 		case IPPROTO_GRE:
2988 			if (proto_variant == PF_GRE_PPTP_VARIANT) {
2989 				d = (rx->call_id == sx->call_id);
2990 			}
2991 			break;
2992 
2993 		case IPPROTO_ESP:
2994 			d = (rx->spi == sx->spi);
2995 			break;
2996 
2997 		case IPPROTO_TCP:
2998 		case IPPROTO_UDP:
2999 		case IPPROTO_ICMP:
3000 		case IPPROTO_ICMPV6:
3001 			if (rx->range.op) {
3002 				d = pf_match_port(rx->range.op,
3003 				    rx->range.port[0], rx->range.port[1],
3004 				    sx->port);
3005 			}
3006 			break;
3007 
3008 		default:
3009 			break;
3010 		}
3011 	}
3012 
3013 	return d;
3014 }
3015 
3016 int
pf_match_uid(u_int8_t op,uid_t a1,uid_t a2,uid_t u)3017 pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
3018 {
3019 	if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE) {
3020 		return 0;
3021 	}
3022 	return pf_match(op, a1, a2, u);
3023 }
3024 
3025 int
pf_match_gid(u_int8_t op,gid_t a1,gid_t a2,gid_t g)3026 pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
3027 {
3028 	if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE) {
3029 		return 0;
3030 	}
3031 	return pf_match(op, a1, a2, g);
3032 }
3033 
3034 static int
pf_match_tag(struct pf_rule * r,struct pf_mtag * pf_mtag,int * tag)3035 pf_match_tag(struct pf_rule *r, struct pf_mtag *pf_mtag,
3036     int *tag)
3037 {
3038 	if (*tag == -1) {
3039 		*tag = pf_mtag->pftag_tag;
3040 	}
3041 
3042 	return (!r->match_tag_not && r->match_tag == *tag) ||
3043 	       (r->match_tag_not && r->match_tag != *tag);
3044 }
3045 
3046 int
pf_tag_packet(pbuf_t * pbuf,struct pf_mtag * pf_mtag,int tag,unsigned int rtableid,struct pf_pdesc * pd)3047 pf_tag_packet(pbuf_t *pbuf, struct pf_mtag *pf_mtag, int tag,
3048     unsigned int rtableid, struct pf_pdesc *pd)
3049 {
3050 	if (tag <= 0 && !PF_RTABLEID_IS_VALID(rtableid) &&
3051 	    (pd == NULL || !(pd->pktflags & PKTF_FLOW_ID))) {
3052 		return 0;
3053 	}
3054 
3055 	if (pf_mtag == NULL && (pf_mtag = pf_get_mtag_pbuf(pbuf)) == NULL) {
3056 		return 1;
3057 	}
3058 
3059 	if (tag > 0) {
3060 		pf_mtag->pftag_tag = tag;
3061 	}
3062 	if (PF_RTABLEID_IS_VALID(rtableid)) {
3063 		pf_mtag->pftag_rtableid = rtableid;
3064 	}
3065 	if (pd != NULL && (pd->pktflags & PKTF_FLOW_ID)) {
3066 		*pbuf->pb_flowsrc = pd->flowsrc;
3067 		*pbuf->pb_flowid = pd->flowhash;
3068 		*pbuf->pb_flags |= pd->pktflags;
3069 		*pbuf->pb_proto = pd->proto;
3070 	}
3071 
3072 	return 0;
3073 }
3074 
3075 void
pf_step_into_anchor(int * depth,struct pf_ruleset ** rs,int n,struct pf_rule ** r,struct pf_rule ** a,int * match)3076 pf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n,
3077     struct pf_rule **r, struct pf_rule **a, int *match)
3078 {
3079 	struct pf_anchor_stackframe     *f;
3080 
3081 	(*r)->anchor->match = 0;
3082 	if (match) {
3083 		*match = 0;
3084 	}
3085 	if (*depth >= (int)sizeof(pf_anchor_stack) /
3086 	    (int)sizeof(pf_anchor_stack[0])) {
3087 		printf("pf_step_into_anchor: stack overflow\n");
3088 		*r = TAILQ_NEXT(*r, entries);
3089 		return;
3090 	} else if (*depth == 0 && a != NULL) {
3091 		*a = *r;
3092 	}
3093 	f = pf_anchor_stack + (*depth)++;
3094 	f->rs = *rs;
3095 	f->r = *r;
3096 	if ((*r)->anchor_wildcard) {
3097 		f->parent = &(*r)->anchor->children;
3098 		if ((f->child = RB_MIN(pf_anchor_node, f->parent)) ==
3099 		    NULL) {
3100 			*r = NULL;
3101 			return;
3102 		}
3103 		*rs = &f->child->ruleset;
3104 	} else {
3105 		f->parent = NULL;
3106 		f->child = NULL;
3107 		*rs = &(*r)->anchor->ruleset;
3108 	}
3109 	*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
3110 }
3111 
3112 int
pf_step_out_of_anchor(int * depth,struct pf_ruleset ** rs,int n,struct pf_rule ** r,struct pf_rule ** a,int * match)3113 pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n,
3114     struct pf_rule **r, struct pf_rule **a, int *match)
3115 {
3116 	struct pf_anchor_stackframe     *f;
3117 	int quick = 0;
3118 
3119 	do {
3120 		if (*depth <= 0) {
3121 			break;
3122 		}
3123 		f = pf_anchor_stack + *depth - 1;
3124 		if (f->parent != NULL && f->child != NULL) {
3125 			if (f->child->match ||
3126 			    (match != NULL && *match)) {
3127 				f->r->anchor->match = 1;
3128 				if (match) {
3129 					*match = 0;
3130 				}
3131 			}
3132 			f->child = RB_NEXT(pf_anchor_node, f->parent, f->child);
3133 			if (f->child != NULL) {
3134 				*rs = &f->child->ruleset;
3135 				*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
3136 				if (*r == NULL) {
3137 					continue;
3138 				} else {
3139 					break;
3140 				}
3141 			}
3142 		}
3143 		(*depth)--;
3144 		if (*depth == 0 && a != NULL) {
3145 			*a = NULL;
3146 		}
3147 		*rs = f->rs;
3148 		if (f->r->anchor->match || (match != NULL && *match)) {
3149 			quick = f->r->quick;
3150 		}
3151 		*r = TAILQ_NEXT(f->r, entries);
3152 	} while (*r == NULL);
3153 
3154 	return quick;
3155 }
3156 
3157 void
pf_poolmask(struct pf_addr * naddr,struct pf_addr * raddr,struct pf_addr * rmask,struct pf_addr * saddr,sa_family_t af)3158 pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
3159     struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
3160 {
3161 	switch (af) {
3162 #if INET
3163 	case AF_INET:
3164 		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
3165 		    ((rmask->addr32[0] ^ 0xffffffff) & saddr->addr32[0]);
3166 		break;
3167 #endif /* INET */
3168 	case AF_INET6:
3169 		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
3170 		    ((rmask->addr32[0] ^ 0xffffffff) & saddr->addr32[0]);
3171 		naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
3172 		    ((rmask->addr32[1] ^ 0xffffffff) & saddr->addr32[1]);
3173 		naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
3174 		    ((rmask->addr32[2] ^ 0xffffffff) & saddr->addr32[2]);
3175 		naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
3176 		    ((rmask->addr32[3] ^ 0xffffffff) & saddr->addr32[3]);
3177 		break;
3178 	}
3179 }
3180 
3181 void
pf_addr_inc(struct pf_addr * addr,sa_family_t af)3182 pf_addr_inc(struct pf_addr *addr, sa_family_t af)
3183 {
3184 	switch (af) {
3185 #if INET
3186 	case AF_INET:
3187 		addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
3188 		break;
3189 #endif /* INET */
3190 	case AF_INET6:
3191 		if (addr->addr32[3] == 0xffffffff) {
3192 			addr->addr32[3] = 0;
3193 			if (addr->addr32[2] == 0xffffffff) {
3194 				addr->addr32[2] = 0;
3195 				if (addr->addr32[1] == 0xffffffff) {
3196 					addr->addr32[1] = 0;
3197 					addr->addr32[0] =
3198 					    htonl(ntohl(addr->addr32[0]) + 1);
3199 				} else {
3200 					addr->addr32[1] =
3201 					    htonl(ntohl(addr->addr32[1]) + 1);
3202 				}
3203 			} else {
3204 				addr->addr32[2] =
3205 				    htonl(ntohl(addr->addr32[2]) + 1);
3206 			}
3207 		} else {
3208 			addr->addr32[3] =
3209 			    htonl(ntohl(addr->addr32[3]) + 1);
3210 		}
3211 		break;
3212 	}
3213 }
3214 
3215 #define mix(a, b, c) \
3216 	do {                                    \
3217 	        a -= b; a -= c; a ^= (c >> 13); \
3218 	        b -= c; b -= a; b ^= (a << 8);  \
3219 	        c -= a; c -= b; c ^= (b >> 13); \
3220 	        a -= b; a -= c; a ^= (c >> 12); \
3221 	        b -= c; b -= a; b ^= (a << 16); \
3222 	        c -= a; c -= b; c ^= (b >> 5);  \
3223 	        a -= b; a -= c; a ^= (c >> 3);  \
3224 	        b -= c; b -= a; b ^= (a << 10); \
3225 	        c -= a; c -= b; c ^= (b >> 15); \
3226 	} while (0)
3227 
3228 /*
3229  * hash function based on bridge_hash in if_bridge.c
3230  */
3231 static void
pf_hash(struct pf_addr * inaddr,struct pf_addr * hash,struct pf_poolhashkey * key,sa_family_t af)3232 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
3233     struct pf_poolhashkey *key, sa_family_t af)
3234 {
3235 	u_int32_t       a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
3236 
3237 	switch (af) {
3238 #if INET
3239 	case AF_INET:
3240 		a += inaddr->addr32[0];
3241 		b += key->key32[1];
3242 		mix(a, b, c);
3243 		hash->addr32[0] = c + key->key32[2];
3244 		break;
3245 #endif /* INET */
3246 	case AF_INET6:
3247 		a += inaddr->addr32[0];
3248 		b += inaddr->addr32[2];
3249 		mix(a, b, c);
3250 		hash->addr32[0] = c;
3251 		a += inaddr->addr32[1];
3252 		b += inaddr->addr32[3];
3253 		c += key->key32[1];
3254 		mix(a, b, c);
3255 		hash->addr32[1] = c;
3256 		a += inaddr->addr32[2];
3257 		b += inaddr->addr32[1];
3258 		c += key->key32[2];
3259 		mix(a, b, c);
3260 		hash->addr32[2] = c;
3261 		a += inaddr->addr32[3];
3262 		b += inaddr->addr32[0];
3263 		c += key->key32[3];
3264 		mix(a, b, c);
3265 		hash->addr32[3] = c;
3266 		break;
3267 	}
3268 }
3269 
3270 static __attribute__((noinline)) int
pf_map_addr(sa_family_t af,struct pf_rule * r,struct pf_addr * saddr,struct pf_addr * naddr,struct pf_addr * init_addr,struct pf_src_node ** sn)3271 pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
3272     struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn)
3273 {
3274 	unsigned char            hash[16];
3275 	struct pf_pool          *rpool = &r->rpool;
3276 	struct pf_addr          *raddr = &rpool->cur->addr.v.a.addr;
3277 	struct pf_addr          *rmask = &rpool->cur->addr.v.a.mask;
3278 	struct pf_pooladdr      *acur = rpool->cur;
3279 	struct pf_src_node       k;
3280 
3281 	if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
3282 	    (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
3283 		k.af = af;
3284 		PF_ACPY(&k.addr, saddr, af);
3285 		if (r->rule_flag & PFRULE_RULESRCTRACK ||
3286 		    r->rpool.opts & PF_POOL_STICKYADDR) {
3287 			k.rule.ptr = r;
3288 		} else {
3289 			k.rule.ptr = NULL;
3290 		}
3291 		pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
3292 		*sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
3293 		if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, rpool->af)) {
3294 			PF_ACPY(naddr, &(*sn)->raddr, rpool->af);
3295 			if (pf_status.debug >= PF_DEBUG_MISC) {
3296 				printf("pf_map_addr: src tracking maps ");
3297 				pf_print_host(&k.addr, 0, af);
3298 				printf(" to ");
3299 				pf_print_host(naddr, 0, rpool->af);
3300 				printf("\n");
3301 			}
3302 			return 0;
3303 		}
3304 	}
3305 
3306 	if (rpool->cur->addr.type == PF_ADDR_NOROUTE) {
3307 		return 1;
3308 	}
3309 	if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
3310 		if (rpool->cur->addr.p.dyn == NULL) {
3311 			return 1;
3312 		}
3313 		switch (rpool->af) {
3314 #if INET
3315 		case AF_INET:
3316 			if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
3317 			    (rpool->opts & PF_POOL_TYPEMASK) !=
3318 			    PF_POOL_ROUNDROBIN) {
3319 				return 1;
3320 			}
3321 			raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
3322 			rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
3323 			break;
3324 #endif /* INET */
3325 		case AF_INET6:
3326 			if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
3327 			    (rpool->opts & PF_POOL_TYPEMASK) !=
3328 			    PF_POOL_ROUNDROBIN) {
3329 				return 1;
3330 			}
3331 			raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
3332 			rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
3333 			break;
3334 		}
3335 	} else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
3336 		if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) {
3337 			return 1; /* unsupported */
3338 		}
3339 	} else {
3340 		raddr = &rpool->cur->addr.v.a.addr;
3341 		rmask = &rpool->cur->addr.v.a.mask;
3342 	}
3343 
3344 	switch (rpool->opts & PF_POOL_TYPEMASK) {
3345 	case PF_POOL_NONE:
3346 		PF_ACPY(naddr, raddr, rpool->af);
3347 		break;
3348 	case PF_POOL_BITMASK:
3349 		ASSERT(af == rpool->af);
3350 		PF_POOLMASK(naddr, raddr, rmask, saddr, af);
3351 		break;
3352 	case PF_POOL_RANDOM:
3353 		if (init_addr != NULL && PF_AZERO(init_addr, rpool->af)) {
3354 			switch (af) {
3355 #if INET
3356 			case AF_INET:
3357 				rpool->counter.addr32[0] = htonl(random());
3358 				break;
3359 #endif /* INET */
3360 			case AF_INET6:
3361 				if (rmask->addr32[3] != 0xffffffff) {
3362 					rpool->counter.addr32[3] =
3363 					    RandomULong();
3364 				} else {
3365 					break;
3366 				}
3367 				if (rmask->addr32[2] != 0xffffffff) {
3368 					rpool->counter.addr32[2] =
3369 					    RandomULong();
3370 				} else {
3371 					break;
3372 				}
3373 				if (rmask->addr32[1] != 0xffffffff) {
3374 					rpool->counter.addr32[1] =
3375 					    RandomULong();
3376 				} else {
3377 					break;
3378 				}
3379 				if (rmask->addr32[0] != 0xffffffff) {
3380 					rpool->counter.addr32[0] =
3381 					    RandomULong();
3382 				}
3383 				break;
3384 			}
3385 			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter,
3386 			    rpool->af);
3387 			PF_ACPY(init_addr, naddr, rpool->af);
3388 		} else {
3389 			PF_AINC(&rpool->counter, rpool->af);
3390 			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter,
3391 			    rpool->af);
3392 		}
3393 		break;
3394 	case PF_POOL_SRCHASH:
3395 		ASSERT(af == rpool->af);
3396 		PF_POOLMASK(naddr, raddr, rmask, saddr, af);
3397 		pf_hash(saddr, (struct pf_addr *)(void *)&hash,
3398 		    &rpool->key, af);
3399 		PF_POOLMASK(naddr, raddr, rmask,
3400 		    (struct pf_addr *)(void *)&hash, af);
3401 		break;
3402 	case PF_POOL_ROUNDROBIN:
3403 		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
3404 			if (!pfr_pool_get(rpool->cur->addr.p.tbl,
3405 			    &rpool->tblidx, &rpool->counter,
3406 			    &raddr, &rmask, rpool->af)) {
3407 				goto get_addr;
3408 			}
3409 		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
3410 			if (rpool->cur->addr.p.dyn != NULL &&
3411 			    !pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
3412 			    &rpool->tblidx, &rpool->counter,
3413 			    &raddr, &rmask, af)) {
3414 				goto get_addr;
3415 			}
3416 		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter,
3417 		    rpool->af)) {
3418 			goto get_addr;
3419 		}
3420 
3421 try_next:
3422 		if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL) {
3423 			rpool->cur = TAILQ_FIRST(&rpool->list);
3424 		}
3425 		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
3426 			rpool->tblidx = -1;
3427 			if (pfr_pool_get(rpool->cur->addr.p.tbl,
3428 			    &rpool->tblidx, &rpool->counter,
3429 			    &raddr, &rmask, rpool->af)) {
3430 				/* table contains no address of type
3431 				 * 'rpool->af' */
3432 				if (rpool->cur != acur) {
3433 					goto try_next;
3434 				}
3435 				return 1;
3436 			}
3437 		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
3438 			rpool->tblidx = -1;
3439 			if (rpool->cur->addr.p.dyn == NULL) {
3440 				return 1;
3441 			}
3442 			if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
3443 			    &rpool->tblidx, &rpool->counter,
3444 			    &raddr, &rmask, rpool->af)) {
3445 				/* table contains no address of type
3446 				 * 'rpool->af' */
3447 				if (rpool->cur != acur) {
3448 					goto try_next;
3449 				}
3450 				return 1;
3451 			}
3452 		} else {
3453 			raddr = &rpool->cur->addr.v.a.addr;
3454 			rmask = &rpool->cur->addr.v.a.mask;
3455 			PF_ACPY(&rpool->counter, raddr, rpool->af);
3456 		}
3457 
3458 get_addr:
3459 		PF_ACPY(naddr, &rpool->counter, rpool->af);
3460 		if (init_addr != NULL && PF_AZERO(init_addr, rpool->af)) {
3461 			PF_ACPY(init_addr, naddr, rpool->af);
3462 		}
3463 		PF_AINC(&rpool->counter, rpool->af);
3464 		break;
3465 	}
3466 	if (*sn != NULL) {
3467 		PF_ACPY(&(*sn)->raddr, naddr, rpool->af);
3468 	}
3469 
3470 	if (pf_status.debug >= PF_DEBUG_MISC &&
3471 	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
3472 		printf("pf_map_addr: selected address ");
3473 		pf_print_host(naddr, 0, rpool->af);
3474 		printf("\n");
3475 	}
3476 
3477 	return 0;
3478 }
3479 
3480 static __attribute__((noinline)) int
pf_get_sport(struct pf_pdesc * pd,struct pfi_kif * kif,struct pf_rule * r,struct pf_addr * saddr,union pf_state_xport * sxport,struct pf_addr * daddr,union pf_state_xport * dxport,struct pf_addr * naddr,union pf_state_xport * nxport,struct pf_src_node ** sn,netns_token * pnstoken)3481 pf_get_sport(struct pf_pdesc *pd, struct pfi_kif *kif, struct pf_rule *r,
3482     struct pf_addr *saddr, union pf_state_xport *sxport, struct pf_addr *daddr,
3483     union pf_state_xport *dxport, struct pf_addr *naddr,
3484     union pf_state_xport *nxport, struct pf_src_node **sn
3485 #if SKYWALK
3486     , netns_token *pnstoken
3487 #endif
3488     )
3489 {
3490 #pragma unused(kif)
3491 	struct pf_state_key_cmp key;
3492 	struct pf_addr          init_addr;
3493 	unsigned int cut;
3494 	sa_family_t af = pd->af;
3495 	u_int8_t proto = pd->proto;
3496 	unsigned int low = r->rpool.proxy_port[0];
3497 	unsigned int high = r->rpool.proxy_port[1];
3498 
3499 	bzero(&init_addr, sizeof(init_addr));
3500 	if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn)) {
3501 		return 1;
3502 	}
3503 
3504 	if (proto == IPPROTO_ICMP) {
3505 		low = 1;
3506 		high = 65535;
3507 	}
3508 
3509 	if (!nxport) {
3510 		return 0; /* No output necessary. */
3511 	}
3512 	/*--- Special mapping rules for UDP ---*/
3513 	if (proto == IPPROTO_UDP) {
3514 		/*--- Never float IKE source port ---*/
3515 		if (ntohs(sxport->port) == PF_IKE_PORT) {
3516 			nxport->port = sxport->port;
3517 			return 0;
3518 		}
3519 
3520 		/*--- Apply exterior mapping options ---*/
3521 		if (r->extmap > PF_EXTMAP_APD) {
3522 			struct pf_state *s;
3523 
3524 			TAILQ_FOREACH(s, &state_list, entry_list) {
3525 				struct pf_state_key *sk = s->state_key;
3526 				if (!sk) {
3527 					continue;
3528 				}
3529 				if (s->nat_rule.ptr != r) {
3530 					continue;
3531 				}
3532 				if (sk->proto != IPPROTO_UDP ||
3533 				    sk->af_lan != af) {
3534 					continue;
3535 				}
3536 				if (sk->lan.xport.port != sxport->port) {
3537 					continue;
3538 				}
3539 				if (PF_ANEQ(&sk->lan.addr, saddr, af)) {
3540 					continue;
3541 				}
3542 				if (r->extmap < PF_EXTMAP_EI &&
3543 				    PF_ANEQ(&sk->ext_lan.addr, daddr, af)) {
3544 					continue;
3545 				}
3546 
3547 #if SKYWALK
3548 				if (netns_reserve(pnstoken, naddr->addr32,
3549 				    NETNS_AF_SIZE(af), proto, sxport->port,
3550 				    NETNS_PF, NULL) != 0) {
3551 					return 1;
3552 				}
3553 #endif
3554 				nxport->port = sk->gwy.xport.port;
3555 				return 0;
3556 			}
3557 		}
3558 	} else if (proto == IPPROTO_TCP) {
3559 		struct pf_state* s;
3560 		/*
3561 		 * APPLE MODIFICATION: <rdar://problem/6546358>
3562 		 * Fix allows....NAT to use a single binding for TCP session
3563 		 * with same source IP and source port
3564 		 */
3565 		TAILQ_FOREACH(s, &state_list, entry_list) {
3566 			struct pf_state_key* sk = s->state_key;
3567 			if (!sk) {
3568 				continue;
3569 			}
3570 			if (s->nat_rule.ptr != r) {
3571 				continue;
3572 			}
3573 			if (sk->proto != IPPROTO_TCP || sk->af_lan != af) {
3574 				continue;
3575 			}
3576 			if (sk->lan.xport.port != sxport->port) {
3577 				continue;
3578 			}
3579 			if (!(PF_AEQ(&sk->lan.addr, saddr, af))) {
3580 				continue;
3581 			}
3582 #if SKYWALK
3583 			if (netns_reserve(pnstoken, naddr->addr32,
3584 			    NETNS_AF_SIZE(af), proto, sxport->port,
3585 			    NETNS_PF, NULL) != 0) {
3586 				return 1;
3587 			}
3588 #endif
3589 			nxport->port = sk->gwy.xport.port;
3590 			return 0;
3591 		}
3592 	}
3593 	do {
3594 		key.af_gwy = af;
3595 		key.proto = proto;
3596 		PF_ACPY(&key.ext_gwy.addr, daddr, key.af_gwy);
3597 		PF_ACPY(&key.gwy.addr, naddr, key.af_gwy);
3598 		switch (proto) {
3599 		case IPPROTO_UDP:
3600 			key.proto_variant = r->extfilter;
3601 			break;
3602 		default:
3603 			key.proto_variant = 0;
3604 			break;
3605 		}
3606 		if (dxport) {
3607 			key.ext_gwy.xport = *dxport;
3608 		} else {
3609 			memset(&key.ext_gwy.xport, 0,
3610 			    sizeof(key.ext_gwy.xport));
3611 		}
3612 		/*
3613 		 * port search; start random, step;
3614 		 * similar 2 portloop in in_pcbbind
3615 		 */
3616 		if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP ||
3617 		    proto == IPPROTO_ICMP)) {
3618 			if (dxport) {
3619 				key.gwy.xport = *dxport;
3620 			} else {
3621 				memset(&key.gwy.xport, 0,
3622 				    sizeof(key.gwy.xport));
3623 			}
3624 #if SKYWALK
3625 			/* Nothing to do: netns handles TCP/UDP only */
3626 #endif
3627 			if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
3628 				return 0;
3629 			}
3630 		} else if (low == 0 && high == 0) {
3631 			key.gwy.xport = *nxport;
3632 			if (pf_find_state_all(&key, PF_IN, NULL) == NULL
3633 #if SKYWALK
3634 			    && ((proto != IPPROTO_TCP && proto != IPPROTO_UDP)
3635 			    || netns_reserve(pnstoken, naddr->addr32,
3636 			    NETNS_AF_SIZE(af), proto, nxport->port,
3637 			    NETNS_PF, NULL) == 0)
3638 #endif
3639 			    ) {
3640 				return 0;
3641 			}
3642 		} else if (low == high) {
3643 			key.gwy.xport.port = htons(low);
3644 			if (pf_find_state_all(&key, PF_IN, NULL) == NULL
3645 #if SKYWALK
3646 			    && ((proto != IPPROTO_TCP && proto != IPPROTO_UDP)
3647 			    || netns_reserve(pnstoken, naddr->addr32,
3648 			    NETNS_AF_SIZE(af), proto, htons(low),
3649 			    NETNS_PF, NULL) == 0)
3650 #endif
3651 			    ) {
3652 				nxport->port = htons(low);
3653 				return 0;
3654 			}
3655 		} else {
3656 			unsigned int tmp;
3657 			if (low > high) {
3658 				tmp = low;
3659 				low = high;
3660 				high = tmp;
3661 			}
3662 			/* low < high */
3663 			cut = htonl(random()) % (1 + high - low) + low;
3664 			/* low <= cut <= high */
3665 			for (tmp = cut; tmp <= high; ++(tmp)) {
3666 				key.gwy.xport.port = htons(tmp);
3667 				if (pf_find_state_all(&key, PF_IN, NULL) == NULL
3668 #if SKYWALK
3669 				    && ((proto != IPPROTO_TCP && proto != IPPROTO_UDP)
3670 				    || netns_reserve(pnstoken, naddr->addr32,
3671 				    NETNS_AF_SIZE(af), proto, htons(tmp),
3672 				    NETNS_PF, NULL) == 0)
3673 #endif
3674 				    ) {
3675 					nxport->port = htons(tmp);
3676 					return 0;
3677 				}
3678 			}
3679 			for (tmp = cut - 1; tmp >= low; --(tmp)) {
3680 				key.gwy.xport.port = htons(tmp);
3681 				if (pf_find_state_all(&key, PF_IN, NULL) == NULL
3682 #if SKYWALK
3683 				    && ((proto != IPPROTO_TCP && proto != IPPROTO_UDP)
3684 				    || netns_reserve(pnstoken, naddr->addr32,
3685 				    NETNS_AF_SIZE(af), proto, htons(tmp),
3686 				    NETNS_PF, NULL) == 0)
3687 #endif
3688 				    ) {
3689 					nxport->port = htons(tmp);
3690 					return 0;
3691 				}
3692 			}
3693 		}
3694 
3695 		switch (r->rpool.opts & PF_POOL_TYPEMASK) {
3696 		case PF_POOL_RANDOM:
3697 		case PF_POOL_ROUNDROBIN:
3698 			if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn)) {
3699 				return 1;
3700 			}
3701 			break;
3702 		case PF_POOL_NONE:
3703 		case PF_POOL_SRCHASH:
3704 		case PF_POOL_BITMASK:
3705 		default:
3706 			return 1;
3707 		}
3708 	} while (!PF_AEQ(&init_addr, naddr, af));
3709 
3710 	return 1;                                     /* none available */
3711 }
3712 
3713 static __attribute__((noinline)) struct pf_rule *
pf_match_translation(struct pf_pdesc * pd,pbuf_t * pbuf,int off,int direction,struct pfi_kif * kif,struct pf_addr * saddr,union pf_state_xport * sxport,struct pf_addr * daddr,union pf_state_xport * dxport,int rs_num)3714 pf_match_translation(struct pf_pdesc *pd, pbuf_t *pbuf, int off,
3715     int direction, struct pfi_kif *kif, struct pf_addr *saddr,
3716     union pf_state_xport *sxport, struct pf_addr *daddr,
3717     union pf_state_xport *dxport, int rs_num)
3718 {
3719 	struct pf_rule          *r, *rm = NULL;
3720 	struct pf_ruleset       *ruleset = NULL;
3721 	int                      tag = -1;
3722 	unsigned int             rtableid = IFSCOPE_NONE;
3723 	int                      asd = 0;
3724 
3725 	r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
3726 	while (r && rm == NULL) {
3727 		struct pf_rule_addr     *src = NULL, *dst = NULL;
3728 		struct pf_addr_wrap     *xdst = NULL;
3729 		struct pf_addr_wrap     *xsrc = NULL;
3730 		union pf_rule_xport     rdrxport;
3731 
3732 		if (r->action == PF_BINAT && direction == PF_IN) {
3733 			src = &r->dst;
3734 			if (r->rpool.cur != NULL) {
3735 				xdst = &r->rpool.cur->addr;
3736 			}
3737 		} else if (r->action == PF_RDR && direction == PF_OUT) {
3738 			dst = &r->src;
3739 			src = &r->dst;
3740 			if (r->rpool.cur != NULL) {
3741 				rdrxport.range.op = PF_OP_EQ;
3742 				rdrxport.range.port[0] =
3743 				    htons(r->rpool.proxy_port[0]);
3744 				xsrc = &r->rpool.cur->addr;
3745 			}
3746 		} else {
3747 			src = &r->src;
3748 			dst = &r->dst;
3749 		}
3750 
3751 		r->evaluations++;
3752 		if (pfi_kif_match(r->kif, kif) == r->ifnot) {
3753 			r = r->skip[PF_SKIP_IFP].ptr;
3754 		} else if (r->direction && r->direction != direction) {
3755 			r = r->skip[PF_SKIP_DIR].ptr;
3756 		} else if (r->af && r->af != pd->af) {
3757 			r = r->skip[PF_SKIP_AF].ptr;
3758 		} else if (r->proto && r->proto != pd->proto) {
3759 			r = r->skip[PF_SKIP_PROTO].ptr;
3760 		} else if (xsrc && PF_MISMATCHAW(xsrc, saddr, pd->af, 0, NULL)) {
3761 			r = TAILQ_NEXT(r, entries);
3762 		} else if (!xsrc && PF_MISMATCHAW(&src->addr, saddr, pd->af,
3763 		    src->neg, kif)) {
3764 			r = TAILQ_NEXT(r, entries);
3765 		} else if (xsrc && (!rdrxport.range.port[0] ||
3766 		    !pf_match_xport(r->proto, r->proto_variant, &rdrxport,
3767 		    sxport))) {
3768 			r = TAILQ_NEXT(r, entries);
3769 		} else if (!xsrc && !pf_match_xport(r->proto,
3770 		    r->proto_variant, &src->xport, sxport)) {
3771 			r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
3772 			    PF_SKIP_DST_PORT].ptr;
3773 		} else if (dst != NULL &&
3774 		    PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL)) {
3775 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
3776 		} else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af,
3777 		    0, NULL)) {
3778 			r = TAILQ_NEXT(r, entries);
3779 		} else if (dst && !pf_match_xport(r->proto, r->proto_variant,
3780 		    &dst->xport, dxport)) {
3781 			r = r->skip[PF_SKIP_DST_PORT].ptr;
3782 		} else if (r->match_tag && !pf_match_tag(r, pd->pf_mtag, &tag)) {
3783 			r = TAILQ_NEXT(r, entries);
3784 		} else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
3785 		    IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, pbuf,
3786 		    off, pd->hdr.tcp), r->os_fingerprint))) {
3787 			r = TAILQ_NEXT(r, entries);
3788 		} else {
3789 			if (r->tag) {
3790 				tag = r->tag;
3791 			}
3792 			if (PF_RTABLEID_IS_VALID(r->rtableid)) {
3793 				rtableid = r->rtableid;
3794 			}
3795 			if (r->anchor == NULL) {
3796 				rm = r;
3797 			} else {
3798 				pf_step_into_anchor(&asd, &ruleset, rs_num,
3799 				    &r, NULL, NULL);
3800 			}
3801 		}
3802 		if (r == NULL) {
3803 			pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r,
3804 			    NULL, NULL);
3805 		}
3806 	}
3807 	if (pf_tag_packet(pbuf, pd->pf_mtag, tag, rtableid, NULL)) {
3808 		return NULL;
3809 	}
3810 	if (rm != NULL && (rm->action == PF_NONAT ||
3811 	    rm->action == PF_NORDR || rm->action == PF_NOBINAT ||
3812 	    rm->action == PF_NONAT64)) {
3813 		return NULL;
3814 	}
3815 	return rm;
3816 }
3817 
3818 /*
3819  * Get address translation information for NAT/BINAT/RDR
3820  * pd		: pf packet descriptor
3821  * pbuf		: pbuf holding the packet
3822  * off		: offset to protocol header
3823  * direction	: direction of packet
3824  * kif		: pf interface info obtained from the packet's recv interface
3825  * sn		: source node pointer (output)
3826  * saddr	: packet source address
3827  * sxport	: packet source port
3828  * daddr	: packet destination address
3829  * dxport	: packet destination port
3830  * nsxport	: translated source port (output)
3831  *
3832  * Translated source & destination address are updated in pd->nsaddr &
3833  * pd->ndaddr
3834  */
3835 static __attribute__((noinline)) struct pf_rule *
pf_get_translation_aux(struct pf_pdesc * pd,pbuf_t * pbuf,int off,int direction,struct pfi_kif * kif,struct pf_src_node ** sn,struct pf_addr * saddr,union pf_state_xport * sxport,struct pf_addr * daddr,union pf_state_xport * dxport,union pf_state_xport * nsxport,netns_token * pnstoken)3836 pf_get_translation_aux(struct pf_pdesc *pd, pbuf_t *pbuf, int off,
3837     int direction, struct pfi_kif *kif, struct pf_src_node **sn,
3838     struct pf_addr *saddr, union pf_state_xport *sxport, struct pf_addr *daddr,
3839     union pf_state_xport *dxport, union pf_state_xport *nsxport
3840 #if SKYWALK
3841     , netns_token *pnstoken
3842 #endif
3843     )
3844 {
3845 	struct pf_rule  *r = NULL;
3846 	pd->naf = pd->af;
3847 
3848 	if (direction == PF_OUT) {
3849 		r = pf_match_translation(pd, pbuf, off, direction, kif, saddr,
3850 		    sxport, daddr, dxport, PF_RULESET_BINAT);
3851 		if (r == NULL) {
3852 			r = pf_match_translation(pd, pbuf, off, direction, kif,
3853 			    saddr, sxport, daddr, dxport, PF_RULESET_RDR);
3854 		}
3855 		if (r == NULL) {
3856 			r = pf_match_translation(pd, pbuf, off, direction, kif,
3857 			    saddr, sxport, daddr, dxport, PF_RULESET_NAT);
3858 		}
3859 	} else {
3860 		r = pf_match_translation(pd, pbuf, off, direction, kif, saddr,
3861 		    sxport, daddr, dxport, PF_RULESET_RDR);
3862 		if (r == NULL) {
3863 			r = pf_match_translation(pd, pbuf, off, direction, kif,
3864 			    saddr, sxport, daddr, dxport, PF_RULESET_BINAT);
3865 		}
3866 	}
3867 
3868 	if (r != NULL) {
3869 		struct pf_addr *nsaddr = &pd->naddr;
3870 		struct pf_addr *ndaddr = &pd->ndaddr;
3871 
3872 		*nsaddr = *saddr;
3873 		*ndaddr = *daddr;
3874 
3875 		switch (r->action) {
3876 		case PF_NONAT:
3877 		case PF_NONAT64:
3878 		case PF_NOBINAT:
3879 		case PF_NORDR:
3880 			return NULL;
3881 		case PF_NAT:
3882 		case PF_NAT64:
3883 			/*
3884 			 * we do NAT64 on incoming path and we call ip_input
3885 			 * which asserts receive interface to be not NULL.
3886 			 * The below check is to prevent NAT64 action on any
3887 			 * packet generated by local entity using synthesized
3888 			 * IPv6 address.
3889 			 */
3890 			if ((r->action == PF_NAT64) && (direction == PF_OUT)) {
3891 				return NULL;
3892 			}
3893 
3894 			if (pf_get_sport(pd, kif, r, saddr, sxport, daddr,
3895 			    dxport, nsaddr, nsxport, sn
3896 #if SKYWALK
3897 			    , pnstoken
3898 #endif
3899 			    )) {
3900 				DPFPRINTF(PF_DEBUG_MISC,
3901 				    ("pf: NAT proxy port allocation "
3902 				    "(%u-%u) failed\n",
3903 				    r->rpool.proxy_port[0],
3904 				    r->rpool.proxy_port[1]));
3905 				return NULL;
3906 			}
3907 			/*
3908 			 * For NAT64 the destination IPv4 address is derived
3909 			 * from the last 32 bits of synthesized IPv6 address
3910 			 */
3911 			if (r->action == PF_NAT64) {
3912 				ndaddr->v4addr.s_addr = daddr->addr32[3];
3913 				pd->naf = AF_INET;
3914 			}
3915 			break;
3916 		case PF_BINAT:
3917 			switch (direction) {
3918 			case PF_OUT:
3919 				if (r->rpool.cur->addr.type ==
3920 				    PF_ADDR_DYNIFTL) {
3921 					if (r->rpool.cur->addr.p.dyn == NULL) {
3922 						return NULL;
3923 					}
3924 					switch (pd->af) {
3925 #if INET
3926 					case AF_INET:
3927 						if (r->rpool.cur->addr.p.dyn->
3928 						    pfid_acnt4 < 1) {
3929 							return NULL;
3930 						}
3931 						PF_POOLMASK(nsaddr,
3932 						    &r->rpool.cur->addr.p.dyn->
3933 						    pfid_addr4,
3934 						    &r->rpool.cur->addr.p.dyn->
3935 						    pfid_mask4,
3936 						    saddr, AF_INET);
3937 						break;
3938 #endif /* INET */
3939 					case AF_INET6:
3940 						if (r->rpool.cur->addr.p.dyn->
3941 						    pfid_acnt6 < 1) {
3942 							return NULL;
3943 						}
3944 						PF_POOLMASK(nsaddr,
3945 						    &r->rpool.cur->addr.p.dyn->
3946 						    pfid_addr6,
3947 						    &r->rpool.cur->addr.p.dyn->
3948 						    pfid_mask6,
3949 						    saddr, AF_INET6);
3950 						break;
3951 					}
3952 				} else {
3953 					PF_POOLMASK(nsaddr,
3954 					    &r->rpool.cur->addr.v.a.addr,
3955 					    &r->rpool.cur->addr.v.a.mask,
3956 					    saddr, pd->af);
3957 				}
3958 				break;
3959 			case PF_IN:
3960 				if (r->src.addr.type == PF_ADDR_DYNIFTL) {
3961 					if (r->src.addr.p.dyn == NULL) {
3962 						return NULL;
3963 					}
3964 					switch (pd->af) {
3965 #if INET
3966 					case AF_INET:
3967 						if (r->src.addr.p.dyn->
3968 						    pfid_acnt4 < 1) {
3969 							return NULL;
3970 						}
3971 						PF_POOLMASK(ndaddr,
3972 						    &r->src.addr.p.dyn->
3973 						    pfid_addr4,
3974 						    &r->src.addr.p.dyn->
3975 						    pfid_mask4,
3976 						    daddr, AF_INET);
3977 						break;
3978 #endif /* INET */
3979 					case AF_INET6:
3980 						if (r->src.addr.p.dyn->
3981 						    pfid_acnt6 < 1) {
3982 							return NULL;
3983 						}
3984 						PF_POOLMASK(ndaddr,
3985 						    &r->src.addr.p.dyn->
3986 						    pfid_addr6,
3987 						    &r->src.addr.p.dyn->
3988 						    pfid_mask6,
3989 						    daddr, AF_INET6);
3990 						break;
3991 					}
3992 				} else {
3993 					PF_POOLMASK(ndaddr,
3994 					    &r->src.addr.v.a.addr,
3995 					    &r->src.addr.v.a.mask, daddr,
3996 					    pd->af);
3997 				}
3998 				break;
3999 			}
4000 			break;
4001 		case PF_RDR: {
4002 			switch (direction) {
4003 			case PF_OUT:
4004 				if (r->dst.addr.type == PF_ADDR_DYNIFTL) {
4005 					if (r->dst.addr.p.dyn == NULL) {
4006 						return NULL;
4007 					}
4008 					switch (pd->af) {
4009 #if INET
4010 					case AF_INET:
4011 						if (r->dst.addr.p.dyn->
4012 						    pfid_acnt4 < 1) {
4013 							return NULL;
4014 						}
4015 						PF_POOLMASK(nsaddr,
4016 						    &r->dst.addr.p.dyn->
4017 						    pfid_addr4,
4018 						    &r->dst.addr.p.dyn->
4019 						    pfid_mask4,
4020 						    daddr, AF_INET);
4021 						break;
4022 #endif /* INET */
4023 					case AF_INET6:
4024 						if (r->dst.addr.p.dyn->
4025 						    pfid_acnt6 < 1) {
4026 							return NULL;
4027 						}
4028 						PF_POOLMASK(nsaddr,
4029 						    &r->dst.addr.p.dyn->
4030 						    pfid_addr6,
4031 						    &r->dst.addr.p.dyn->
4032 						    pfid_mask6,
4033 						    daddr, AF_INET6);
4034 						break;
4035 					}
4036 				} else {
4037 					PF_POOLMASK(nsaddr,
4038 					    &r->dst.addr.v.a.addr,
4039 					    &r->dst.addr.v.a.mask,
4040 					    daddr, pd->af);
4041 				}
4042 				if (nsxport && r->dst.xport.range.port[0]) {
4043 					nsxport->port =
4044 					    r->dst.xport.range.port[0];
4045 				}
4046 				break;
4047 			case PF_IN:
4048 				if (pf_map_addr(pd->af, r, saddr,
4049 				    ndaddr, NULL, sn)) {
4050 					return NULL;
4051 				}
4052 				if ((r->rpool.opts & PF_POOL_TYPEMASK) ==
4053 				    PF_POOL_BITMASK) {
4054 					PF_POOLMASK(ndaddr, ndaddr,
4055 					    &r->rpool.cur->addr.v.a.mask, daddr,
4056 					    pd->af);
4057 				}
4058 
4059 				if (nsxport && dxport) {
4060 					if (r->rpool.proxy_port[1]) {
4061 						u_int32_t       tmp_nport;
4062 
4063 						tmp_nport =
4064 						    ((ntohs(dxport->port) -
4065 						    ntohs(r->dst.xport.range.
4066 						    port[0])) %
4067 						    (r->rpool.proxy_port[1] -
4068 						    r->rpool.proxy_port[0] +
4069 						    1)) + r->rpool.proxy_port[0];
4070 
4071 						/* wrap around if necessary */
4072 						if (tmp_nport > 65535) {
4073 							tmp_nport -= 65535;
4074 						}
4075 						nsxport->port =
4076 						    htons((u_int16_t)tmp_nport);
4077 					} else if (r->rpool.proxy_port[0]) {
4078 						nsxport->port = htons(r->rpool.
4079 						    proxy_port[0]);
4080 					}
4081 				}
4082 				break;
4083 			}
4084 			break;
4085 		}
4086 		default:
4087 			return NULL;
4088 		}
4089 	}
4090 
4091 	return r;
4092 }
4093 
4094 int
pf_socket_lookup(int direction,struct pf_pdesc * pd)4095 pf_socket_lookup(int direction, struct pf_pdesc *pd)
4096 {
4097 	struct pf_addr          *saddr, *daddr;
4098 	u_int16_t                sport, dport;
4099 	struct inpcbinfo        *pi;
4100 	int                     inp = 0;
4101 
4102 	if (pd == NULL) {
4103 		return -1;
4104 	}
4105 	pd->lookup.uid = UID_MAX;
4106 	pd->lookup.gid = GID_MAX;
4107 	pd->lookup.pid = NO_PID;
4108 
4109 	switch (pd->proto) {
4110 	case IPPROTO_TCP:
4111 		if (pd->hdr.tcp == NULL) {
4112 			return -1;
4113 		}
4114 		sport = pd->hdr.tcp->th_sport;
4115 		dport = pd->hdr.tcp->th_dport;
4116 		pi = &tcbinfo;
4117 		break;
4118 	case IPPROTO_UDP:
4119 		if (pd->hdr.udp == NULL) {
4120 			return -1;
4121 		}
4122 		sport = pd->hdr.udp->uh_sport;
4123 		dport = pd->hdr.udp->uh_dport;
4124 		pi = &udbinfo;
4125 		break;
4126 	default:
4127 		return -1;
4128 	}
4129 	if (direction == PF_IN) {
4130 		saddr = pd->src;
4131 		daddr = pd->dst;
4132 	} else {
4133 		u_int16_t       p;
4134 
4135 		p = sport;
4136 		sport = dport;
4137 		dport = p;
4138 		saddr = pd->dst;
4139 		daddr = pd->src;
4140 	}
4141 	switch (pd->af) {
4142 #if INET
4143 	case AF_INET:
4144 		inp = in_pcblookup_hash_exists(pi, saddr->v4addr, sport, daddr->v4addr, dport,
4145 		    0, &pd->lookup.uid, &pd->lookup.gid, NULL);
4146 		if (inp == 0) {
4147 			struct in6_addr s6, d6;
4148 
4149 			memset(&s6, 0, sizeof(s6));
4150 			s6.s6_addr16[5] = htons(0xffff);
4151 			memcpy(&s6.s6_addr32[3], &saddr->v4addr,
4152 			    sizeof(saddr->v4addr));
4153 
4154 			memset(&d6, 0, sizeof(d6));
4155 			d6.s6_addr16[5] = htons(0xffff);
4156 			memcpy(&d6.s6_addr32[3], &daddr->v4addr,
4157 			    sizeof(daddr->v4addr));
4158 
4159 			inp = in6_pcblookup_hash_exists(pi, &s6, sport, IFSCOPE_NONE,
4160 			    &d6, dport, IFSCOPE_NONE, 0, &pd->lookup.uid, &pd->lookup.gid, NULL, false);
4161 			if (inp == 0) {
4162 				inp = in_pcblookup_hash_exists(pi, saddr->v4addr, sport,
4163 				    daddr->v4addr, dport, INPLOOKUP_WILDCARD, &pd->lookup.uid, &pd->lookup.gid, NULL);
4164 				if (inp == 0) {
4165 					inp = in6_pcblookup_hash_exists(pi, &s6, sport, IFSCOPE_NONE,
4166 					    &d6, dport, IFSCOPE_NONE, INPLOOKUP_WILDCARD,
4167 					    &pd->lookup.uid, &pd->lookup.gid, NULL, false);
4168 					if (inp == 0) {
4169 						return -1;
4170 					}
4171 				}
4172 			}
4173 		}
4174 		break;
4175 #endif /* INET */
4176 	case AF_INET6:
4177 		inp = in6_pcblookup_hash_exists(pi, &saddr->v6addr, sport, IFSCOPE_UNKNOWN, &daddr->v6addr,
4178 		    dport, IFSCOPE_UNKNOWN, 0, &pd->lookup.uid, &pd->lookup.gid, NULL, false);
4179 		if (inp == 0) {
4180 			inp = in6_pcblookup_hash_exists(pi, &saddr->v6addr, sport, IFSCOPE_UNKNOWN,
4181 			    &daddr->v6addr, dport, IFSCOPE_UNKNOWN, INPLOOKUP_WILDCARD,
4182 			    &pd->lookup.uid, &pd->lookup.gid, NULL, false);
4183 			if (inp == 0) {
4184 				return -1;
4185 			}
4186 		}
4187 		break;
4188 
4189 	default:
4190 		return -1;
4191 	}
4192 
4193 	return 1;
4194 }
4195 
4196 static __attribute__((noinline)) u_int8_t
pf_get_wscale(pbuf_t * pbuf,int off,u_int16_t th_off,sa_family_t af)4197 pf_get_wscale(pbuf_t *pbuf, int off, u_int16_t th_off, sa_family_t af)
4198 {
4199 	int              hlen;
4200 	u_int8_t         hdr[60];
4201 	u_int8_t        *opt, optlen;
4202 	u_int8_t         wscale = 0;
4203 
4204 	hlen = th_off << 2;             /* hlen <= sizeof (hdr) */
4205 	if (hlen <= (int)sizeof(struct tcphdr)) {
4206 		return 0;
4207 	}
4208 	if (!pf_pull_hdr(pbuf, off, hdr, hlen, NULL, NULL, af)) {
4209 		return 0;
4210 	}
4211 	opt = hdr + sizeof(struct tcphdr);
4212 	hlen -= sizeof(struct tcphdr);
4213 	while (hlen >= 3) {
4214 		switch (*opt) {
4215 		case TCPOPT_EOL:
4216 		case TCPOPT_NOP:
4217 			++opt;
4218 			--hlen;
4219 			break;
4220 		case TCPOPT_WINDOW:
4221 			wscale = opt[2];
4222 			if (wscale > TCP_MAX_WINSHIFT) {
4223 				wscale = TCP_MAX_WINSHIFT;
4224 			}
4225 			wscale |= PF_WSCALE_FLAG;
4226 			OS_FALLTHROUGH;
4227 		default:
4228 			optlen = opt[1];
4229 			if (optlen < 2) {
4230 				optlen = 2;
4231 			}
4232 			hlen -= optlen;
4233 			opt += optlen;
4234 			break;
4235 		}
4236 	}
4237 	return wscale;
4238 }
4239 
4240 static __attribute__((noinline)) u_int16_t
pf_get_mss(pbuf_t * pbuf,int off,u_int16_t th_off,sa_family_t af)4241 pf_get_mss(pbuf_t *pbuf, int off, u_int16_t th_off, sa_family_t af)
4242 {
4243 	int              hlen;
4244 	u_int8_t         hdr[60];
4245 	u_int8_t        *opt, optlen;
4246 	u_int16_t        mss = tcp_mssdflt;
4247 
4248 	hlen = th_off << 2;     /* hlen <= sizeof (hdr) */
4249 	if (hlen <= (int)sizeof(struct tcphdr)) {
4250 		return 0;
4251 	}
4252 	if (!pf_pull_hdr(pbuf, off, hdr, hlen, NULL, NULL, af)) {
4253 		return 0;
4254 	}
4255 	opt = hdr + sizeof(struct tcphdr);
4256 	hlen -= sizeof(struct tcphdr);
4257 	while (hlen >= TCPOLEN_MAXSEG) {
4258 		switch (*opt) {
4259 		case TCPOPT_EOL:
4260 		case TCPOPT_NOP:
4261 			++opt;
4262 			--hlen;
4263 			break;
4264 		case TCPOPT_MAXSEG:
4265 			bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2);
4266 #if BYTE_ORDER != BIG_ENDIAN
4267 			NTOHS(mss);
4268 #endif
4269 			OS_FALLTHROUGH;
4270 		default:
4271 			optlen = opt[1];
4272 			if (optlen < 2) {
4273 				optlen = 2;
4274 			}
4275 			hlen -= optlen;
4276 			opt += optlen;
4277 			break;
4278 		}
4279 	}
4280 	return mss;
4281 }
4282 
4283 static __attribute__((noinline)) u_int16_t
pf_calc_mss(struct pf_addr * addr,sa_family_t af,u_int16_t offer)4284 pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer)
4285 {
4286 #if INET
4287 	struct sockaddr_in      *dst;
4288 	struct route             ro;
4289 #endif /* INET */
4290 	struct sockaddr_in6     *dst6;
4291 	struct route_in6         ro6;
4292 	struct rtentry          *rt = NULL;
4293 	int                      hlen;
4294 	u_int16_t                mss = tcp_mssdflt;
4295 
4296 	switch (af) {
4297 #if INET
4298 	case AF_INET:
4299 		hlen = sizeof(struct ip);
4300 		bzero(&ro, sizeof(ro));
4301 		dst = (struct sockaddr_in *)(void *)&ro.ro_dst;
4302 		dst->sin_family = AF_INET;
4303 		dst->sin_len = sizeof(*dst);
4304 		dst->sin_addr = addr->v4addr;
4305 		rtalloc(&ro);
4306 		rt = ro.ro_rt;
4307 		break;
4308 #endif /* INET */
4309 	case AF_INET6:
4310 		hlen = sizeof(struct ip6_hdr);
4311 		bzero(&ro6, sizeof(ro6));
4312 		dst6 = (struct sockaddr_in6 *)(void *)&ro6.ro_dst;
4313 		dst6->sin6_family = AF_INET6;
4314 		dst6->sin6_len = sizeof(*dst6);
4315 		dst6->sin6_addr = addr->v6addr;
4316 		rtalloc((struct route *)&ro);
4317 		rt = ro6.ro_rt;
4318 		break;
4319 	default:
4320 		panic("pf_calc_mss: not AF_INET or AF_INET6!");
4321 		return 0;
4322 	}
4323 
4324 	if (rt && rt->rt_ifp) {
4325 		/* This is relevant only for PF SYN Proxy */
4326 		int interface_mtu = rt->rt_ifp->if_mtu;
4327 
4328 		if (af == AF_INET &&
4329 		    INTF_ADJUST_MTU_FOR_CLAT46(rt->rt_ifp)) {
4330 			interface_mtu = IN6_LINKMTU(rt->rt_ifp);
4331 			/* Further adjust the size for CLAT46 expansion */
4332 			interface_mtu -= CLAT46_HDR_EXPANSION_OVERHD;
4333 		}
4334 		mss = interface_mtu - hlen - sizeof(struct tcphdr);
4335 		mss = max(tcp_mssdflt, mss);
4336 		rtfree(rt);
4337 	}
4338 	mss = min(mss, offer);
4339 	mss = max(mss, 64);             /* sanity - at least max opt space */
4340 	return mss;
4341 }
4342 
4343 static void
pf_set_rt_ifp(struct pf_state * s,struct pf_addr * saddr,sa_family_t af)4344 pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr, sa_family_t af)
4345 {
4346 	struct pf_rule *r = s->rule.ptr;
4347 
4348 	s->rt_kif = NULL;
4349 
4350 	if (!r->rt || r->rt == PF_FASTROUTE) {
4351 		return;
4352 	}
4353 	if ((af == AF_INET) || (af == AF_INET6)) {
4354 		pf_map_addr(af, r, saddr, &s->rt_addr, NULL,
4355 		    &s->nat_src_node);
4356 		s->rt_kif = r->rpool.cur->kif;
4357 	}
4358 
4359 	return;
4360 }
4361 
4362 static void
pf_attach_state(struct pf_state_key * sk,struct pf_state * s,int tail)4363 pf_attach_state(struct pf_state_key *sk, struct pf_state *s, int tail)
4364 {
4365 	s->state_key = sk;
4366 	sk->refcnt++;
4367 
4368 	/* list is sorted, if-bound states before floating */
4369 	if (tail) {
4370 		TAILQ_INSERT_TAIL(&sk->states, s, next);
4371 	} else {
4372 		TAILQ_INSERT_HEAD(&sk->states, s, next);
4373 	}
4374 }
4375 
4376 static void
pf_state_key_release_flowid(struct pf_state_key * sk)4377 pf_state_key_release_flowid(struct pf_state_key *sk)
4378 {
4379 #pragma unused (sk)
4380 #if SKYWALK
4381 	if ((sk->flowsrc == FLOWSRC_PF) && (sk->flowhash != 0)) {
4382 		flowidns_release_flowid(sk->flowhash);
4383 		sk->flowhash = 0;
4384 		sk->flowsrc = 0;
4385 	}
4386 #endif /* SKYWALK */
4387 }
4388 
4389 void
pf_detach_state(struct pf_state * s,int flags)4390 pf_detach_state(struct pf_state *s, int flags)
4391 {
4392 	struct pf_state_key     *sk = s->state_key;
4393 
4394 	if (sk == NULL) {
4395 		return;
4396 	}
4397 
4398 	s->state_key = NULL;
4399 	TAILQ_REMOVE(&sk->states, s, next);
4400 	if (--sk->refcnt == 0) {
4401 		if (!(flags & PF_DT_SKIP_EXTGWY)) {
4402 			RB_REMOVE(pf_state_tree_ext_gwy,
4403 			    &pf_statetbl_ext_gwy, sk);
4404 		}
4405 		if (!(flags & PF_DT_SKIP_LANEXT)) {
4406 			RB_REMOVE(pf_state_tree_lan_ext,
4407 			    &pf_statetbl_lan_ext, sk);
4408 		}
4409 		if (sk->app_state) {
4410 			pool_put(&pf_app_state_pl, sk->app_state);
4411 		}
4412 		pf_state_key_release_flowid(sk);
4413 		pool_put(&pf_state_key_pl, sk);
4414 	}
4415 }
4416 
4417 struct pf_state_key *
pf_alloc_state_key(struct pf_state * s,struct pf_state_key * psk)4418 pf_alloc_state_key(struct pf_state *s, struct pf_state_key *psk)
4419 {
4420 	struct pf_state_key     *sk;
4421 
4422 	if ((sk = pool_get(&pf_state_key_pl, PR_WAITOK)) == NULL) {
4423 		return NULL;
4424 	}
4425 	bzero(sk, sizeof(*sk));
4426 	TAILQ_INIT(&sk->states);
4427 	pf_attach_state(sk, s, 0);
4428 
4429 	/* initialize state key from psk, if provided */
4430 	if (psk != NULL) {
4431 		bcopy(&psk->lan, &sk->lan, sizeof(sk->lan));
4432 		bcopy(&psk->gwy, &sk->gwy, sizeof(sk->gwy));
4433 		bcopy(&psk->ext_lan, &sk->ext_lan, sizeof(sk->ext_lan));
4434 		bcopy(&psk->ext_gwy, &sk->ext_gwy, sizeof(sk->ext_gwy));
4435 		sk->af_lan = psk->af_lan;
4436 		sk->af_gwy = psk->af_gwy;
4437 		sk->proto = psk->proto;
4438 		sk->direction = psk->direction;
4439 		sk->proto_variant = psk->proto_variant;
4440 		VERIFY(psk->app_state == NULL);
4441 		ASSERT(psk->flowsrc != FLOWSRC_PF);
4442 		sk->flowsrc = psk->flowsrc;
4443 		sk->flowhash = psk->flowhash;
4444 		/* don't touch tree entries, states and refcnt on sk */
4445 	}
4446 
4447 	if (sk->flowhash == 0) {
4448 		ASSERT(sk->flowsrc == 0);
4449 		sk->flowsrc = FLOWSRC_PF;
4450 		sk->flowhash = pf_calc_state_key_flowhash(sk);
4451 	}
4452 
4453 	return sk;
4454 }
4455 
4456 static __attribute__((noinline)) u_int32_t
pf_tcp_iss(struct pf_pdesc * pd)4457 pf_tcp_iss(struct pf_pdesc *pd)
4458 {
4459 	MD5_CTX ctx;
4460 	u_int32_t digest[4];
4461 
4462 	if (pf_tcp_secret_init == 0) {
4463 		read_frandom(pf_tcp_secret, sizeof(pf_tcp_secret));
4464 		MD5Init(&pf_tcp_secret_ctx);
4465 		MD5Update(&pf_tcp_secret_ctx, pf_tcp_secret,
4466 		    sizeof(pf_tcp_secret));
4467 		pf_tcp_secret_init = 1;
4468 	}
4469 	ctx = pf_tcp_secret_ctx;
4470 
4471 	MD5Update(&ctx, (char *)&pd->hdr.tcp->th_sport, sizeof(u_short));
4472 	MD5Update(&ctx, (char *)&pd->hdr.tcp->th_dport, sizeof(u_short));
4473 	if (pd->af == AF_INET6) {
4474 		MD5Update(&ctx, (char *)&pd->src->v6addr, sizeof(struct in6_addr));
4475 		MD5Update(&ctx, (char *)&pd->dst->v6addr, sizeof(struct in6_addr));
4476 	} else {
4477 		MD5Update(&ctx, (char *)&pd->src->v4addr, sizeof(struct in_addr));
4478 		MD5Update(&ctx, (char *)&pd->dst->v4addr, sizeof(struct in_addr));
4479 	}
4480 	MD5Final((u_char *)digest, &ctx);
4481 	pf_tcp_iss_off += 4096;
4482 	return digest[0] + random() + pf_tcp_iss_off;
4483 }
4484 
4485 /*
4486  * This routine is called to perform address family translation on the
4487  * inner IP header (that may come as payload) of an ICMP(v4addr/6) error
4488  * response.
4489  */
4490 static __attribute__((noinline)) int
pf_change_icmp_af(pbuf_t * pbuf,int off,struct pf_pdesc * pd,struct pf_pdesc * pd2,struct pf_addr * src,struct pf_addr * dst,sa_family_t af,sa_family_t naf)4491 pf_change_icmp_af(pbuf_t *pbuf, int off,
4492     struct pf_pdesc *pd, struct pf_pdesc *pd2, struct pf_addr *src,
4493     struct pf_addr *dst, sa_family_t af, sa_family_t naf)
4494 {
4495 	struct ip               *ip4 = NULL;
4496 	struct ip6_hdr          *ip6 = NULL;
4497 	void                    *hdr;
4498 	int                      hlen, olen;
4499 	uint64_t                ipid_salt = (uint64_t)pbuf_get_packet_buffer_address(pbuf);
4500 
4501 	if (af == naf || (af != AF_INET && af != AF_INET6) ||
4502 	    (naf != AF_INET && naf != AF_INET6)) {
4503 		return -1;
4504 	}
4505 
4506 	/* old header */
4507 	olen = pd2->off - off;
4508 	/* new header */
4509 	hlen = naf == AF_INET ? sizeof(*ip4) : sizeof(*ip6);
4510 
4511 	/* Modify the pbuf to accommodate the new header */
4512 	hdr = pbuf_resize_segment(pbuf, off, olen, hlen);
4513 	if (hdr == NULL) {
4514 		return -1;
4515 	}
4516 
4517 	/* translate inner ip/ip6 header */
4518 	switch (naf) {
4519 	case AF_INET:
4520 		ip4 = hdr;
4521 		bzero(ip4, sizeof(*ip4));
4522 		ip4->ip_v   = IPVERSION;
4523 		ip4->ip_hl  = sizeof(*ip4) >> 2;
4524 		ip4->ip_len = htons(sizeof(*ip4) + pd2->tot_len - olen);
4525 		ip4->ip_id  = rfc6864 ? 0 : htons(ip_randomid(ipid_salt));
4526 		ip4->ip_off = htons(IP_DF);
4527 		ip4->ip_ttl = pd2->ttl;
4528 		if (pd2->proto == IPPROTO_ICMPV6) {
4529 			ip4->ip_p = IPPROTO_ICMP;
4530 		} else {
4531 			ip4->ip_p = pd2->proto;
4532 		}
4533 		ip4->ip_src = src->v4addr;
4534 		ip4->ip_dst = dst->v4addr;
4535 		ip4->ip_sum = pbuf_inet_cksum(pbuf, 0, 0, ip4->ip_hl << 2);
4536 		break;
4537 	case AF_INET6:
4538 		ip6 = hdr;
4539 		bzero(ip6, sizeof(*ip6));
4540 		ip6->ip6_vfc  = IPV6_VERSION;
4541 		ip6->ip6_plen = htons(pd2->tot_len - olen);
4542 		if (pd2->proto == IPPROTO_ICMP) {
4543 			ip6->ip6_nxt = IPPROTO_ICMPV6;
4544 		} else {
4545 			ip6->ip6_nxt = pd2->proto;
4546 		}
4547 		if (!pd2->ttl || pd2->ttl > IPV6_DEFHLIM) {
4548 			ip6->ip6_hlim = IPV6_DEFHLIM;
4549 		} else {
4550 			ip6->ip6_hlim = pd2->ttl;
4551 		}
4552 		ip6->ip6_src  = src->v6addr;
4553 		ip6->ip6_dst  = dst->v6addr;
4554 		break;
4555 	}
4556 
4557 	/* adjust payload offset and total packet length */
4558 	pd2->off += hlen - olen;
4559 	pd->tot_len += hlen - olen;
4560 
4561 	return 0;
4562 }
4563 
4564 #define PTR_IP(field)   ((int32_t)offsetof(struct ip, field))
4565 #define PTR_IP6(field)  ((int32_t)offsetof(struct ip6_hdr, field))
4566 
4567 static __attribute__((noinline)) int
pf_translate_icmp_af(int af,void * arg)4568 pf_translate_icmp_af(int af, void *arg)
4569 {
4570 	struct icmp             *icmp4;
4571 	struct icmp6_hdr        *icmp6;
4572 	u_int32_t                mtu;
4573 	int32_t                  ptr = -1;
4574 	u_int8_t                 type;
4575 	u_int8_t                 code;
4576 
4577 	switch (af) {
4578 	case AF_INET:
4579 		icmp6 = arg;
4580 		type  = icmp6->icmp6_type;
4581 		code  = icmp6->icmp6_code;
4582 		mtu   = ntohl(icmp6->icmp6_mtu);
4583 
4584 		switch (type) {
4585 		case ICMP6_ECHO_REQUEST:
4586 			type = ICMP_ECHO;
4587 			break;
4588 		case ICMP6_ECHO_REPLY:
4589 			type = ICMP_ECHOREPLY;
4590 			break;
4591 		case ICMP6_DST_UNREACH:
4592 			type = ICMP_UNREACH;
4593 			switch (code) {
4594 			case ICMP6_DST_UNREACH_NOROUTE:
4595 			case ICMP6_DST_UNREACH_BEYONDSCOPE:
4596 			case ICMP6_DST_UNREACH_ADDR:
4597 				code = ICMP_UNREACH_HOST;
4598 				break;
4599 			case ICMP6_DST_UNREACH_ADMIN:
4600 				code = ICMP_UNREACH_HOST_PROHIB;
4601 				break;
4602 			case ICMP6_DST_UNREACH_NOPORT:
4603 				code = ICMP_UNREACH_PORT;
4604 				break;
4605 			default:
4606 				return -1;
4607 			}
4608 			break;
4609 		case ICMP6_PACKET_TOO_BIG:
4610 			type = ICMP_UNREACH;
4611 			code = ICMP_UNREACH_NEEDFRAG;
4612 			mtu -= 20;
4613 			break;
4614 		case ICMP6_TIME_EXCEEDED:
4615 			type = ICMP_TIMXCEED;
4616 			break;
4617 		case ICMP6_PARAM_PROB:
4618 			switch (code) {
4619 			case ICMP6_PARAMPROB_HEADER:
4620 				type = ICMP_PARAMPROB;
4621 				code = ICMP_PARAMPROB_ERRATPTR;
4622 				ptr  = ntohl(icmp6->icmp6_pptr);
4623 
4624 				if (ptr == PTR_IP6(ip6_vfc)) {
4625 					; /* preserve */
4626 				} else if (ptr == PTR_IP6(ip6_vfc) + 1) {
4627 					ptr = PTR_IP(ip_tos);
4628 				} else if (ptr == PTR_IP6(ip6_plen) ||
4629 				    ptr == PTR_IP6(ip6_plen) + 1) {
4630 					ptr = PTR_IP(ip_len);
4631 				} else if (ptr == PTR_IP6(ip6_nxt)) {
4632 					ptr = PTR_IP(ip_p);
4633 				} else if (ptr == PTR_IP6(ip6_hlim)) {
4634 					ptr = PTR_IP(ip_ttl);
4635 				} else if (ptr >= PTR_IP6(ip6_src) &&
4636 				    ptr < PTR_IP6(ip6_dst)) {
4637 					ptr = PTR_IP(ip_src);
4638 				} else if (ptr >= PTR_IP6(ip6_dst) &&
4639 				    ptr < (int32_t)sizeof(struct ip6_hdr)) {
4640 					ptr = PTR_IP(ip_dst);
4641 				} else {
4642 					return -1;
4643 				}
4644 				break;
4645 			case ICMP6_PARAMPROB_NEXTHEADER:
4646 				type = ICMP_UNREACH;
4647 				code = ICMP_UNREACH_PROTOCOL;
4648 				break;
4649 			default:
4650 				return -1;
4651 			}
4652 			break;
4653 		default:
4654 			return -1;
4655 		}
4656 		icmp6->icmp6_type = type;
4657 		icmp6->icmp6_code = code;
4658 		/* aligns well with a icmpv4 nextmtu */
4659 		icmp6->icmp6_mtu = htonl(mtu);
4660 		/* icmpv4 pptr is a one most significant byte */
4661 		if (ptr >= 0) {
4662 			icmp6->icmp6_pptr = htonl(ptr << 24);
4663 		}
4664 		break;
4665 
4666 	case AF_INET6:
4667 		icmp4 = arg;
4668 		type  = icmp4->icmp_type;
4669 		code  = icmp4->icmp_code;
4670 		mtu   = ntohs(icmp4->icmp_nextmtu);
4671 
4672 		switch (type) {
4673 		case ICMP_ECHO:
4674 			type = ICMP6_ECHO_REQUEST;
4675 			break;
4676 		case ICMP_ECHOREPLY:
4677 			type = ICMP6_ECHO_REPLY;
4678 			break;
4679 		case ICMP_UNREACH:
4680 			type = ICMP6_DST_UNREACH;
4681 			switch (code) {
4682 			case ICMP_UNREACH_NET:
4683 			case ICMP_UNREACH_HOST:
4684 			case ICMP_UNREACH_NET_UNKNOWN:
4685 			case ICMP_UNREACH_HOST_UNKNOWN:
4686 			case ICMP_UNREACH_ISOLATED:
4687 			case ICMP_UNREACH_TOSNET:
4688 			case ICMP_UNREACH_TOSHOST:
4689 				code = ICMP6_DST_UNREACH_NOROUTE;
4690 				break;
4691 			case ICMP_UNREACH_PORT:
4692 				code = ICMP6_DST_UNREACH_NOPORT;
4693 				break;
4694 			case ICMP_UNREACH_NET_PROHIB:
4695 			case ICMP_UNREACH_HOST_PROHIB:
4696 			case ICMP_UNREACH_FILTER_PROHIB:
4697 			case ICMP_UNREACH_PRECEDENCE_CUTOFF:
4698 				code = ICMP6_DST_UNREACH_ADMIN;
4699 				break;
4700 			case ICMP_UNREACH_PROTOCOL:
4701 				type = ICMP6_PARAM_PROB;
4702 				code = ICMP6_PARAMPROB_NEXTHEADER;
4703 				ptr  = offsetof(struct ip6_hdr, ip6_nxt);
4704 				break;
4705 			case ICMP_UNREACH_NEEDFRAG:
4706 				type = ICMP6_PACKET_TOO_BIG;
4707 				code = 0;
4708 				mtu += 20;
4709 				break;
4710 			default:
4711 				return -1;
4712 			}
4713 			break;
4714 		case ICMP_TIMXCEED:
4715 			type = ICMP6_TIME_EXCEEDED;
4716 			break;
4717 		case ICMP_PARAMPROB:
4718 			type = ICMP6_PARAM_PROB;
4719 			switch (code) {
4720 			case ICMP_PARAMPROB_ERRATPTR:
4721 				code = ICMP6_PARAMPROB_HEADER;
4722 				break;
4723 			case ICMP_PARAMPROB_LENGTH:
4724 				code = ICMP6_PARAMPROB_HEADER;
4725 				break;
4726 			default:
4727 				return -1;
4728 			}
4729 
4730 			ptr = icmp4->icmp_pptr;
4731 			if (ptr == 0 || ptr == PTR_IP(ip_tos)) {
4732 				; /* preserve */
4733 			} else if (ptr == PTR_IP(ip_len) ||
4734 			    ptr == PTR_IP(ip_len) + 1) {
4735 				ptr = PTR_IP6(ip6_plen);
4736 			} else if (ptr == PTR_IP(ip_ttl)) {
4737 				ptr = PTR_IP6(ip6_hlim);
4738 			} else if (ptr == PTR_IP(ip_p)) {
4739 				ptr = PTR_IP6(ip6_nxt);
4740 			} else if (ptr >= PTR_IP(ip_src) &&
4741 			    ptr < PTR_IP(ip_dst)) {
4742 				ptr = PTR_IP6(ip6_src);
4743 			} else if (ptr >= PTR_IP(ip_dst) &&
4744 			    ptr < (int32_t)sizeof(struct ip)) {
4745 				ptr = PTR_IP6(ip6_dst);
4746 			} else {
4747 				return -1;
4748 			}
4749 			break;
4750 		default:
4751 			return -1;
4752 		}
4753 		icmp4->icmp_type = type;
4754 		icmp4->icmp_code = code;
4755 		icmp4->icmp_nextmtu = htons(mtu);
4756 		if (ptr >= 0) {
4757 			icmp4->icmp_void = htonl(ptr);
4758 		}
4759 		break;
4760 	}
4761 
4762 	return 0;
4763 }
4764 
4765 /* Note: frees pbuf if PF_NAT64 is returned */
4766 static __attribute__((noinline)) int
pf_nat64_ipv6(pbuf_t * pbuf,int off,struct pf_pdesc * pd)4767 pf_nat64_ipv6(pbuf_t *pbuf, int off, struct pf_pdesc *pd)
4768 {
4769 	struct ip               *ip4;
4770 	struct mbuf *m;
4771 
4772 	/*
4773 	 * ip_input asserts for rcvif to be not NULL
4774 	 * That may not be true for two corner cases
4775 	 * 1. If for some reason a local app sends DNS
4776 	 * AAAA query to local host
4777 	 * 2. If IPv6 stack in kernel internally generates a
4778 	 * message destined for a synthesized IPv6 end-point.
4779 	 */
4780 	if (pbuf->pb_ifp == NULL) {
4781 		return PF_DROP;
4782 	}
4783 
4784 	ip4 = (struct ip *)pbuf_resize_segment(pbuf, 0, off, sizeof(*ip4));
4785 	if (ip4 == NULL) {
4786 		return PF_DROP;
4787 	}
4788 
4789 	ip4->ip_v   = 4;
4790 	ip4->ip_hl  = 5;
4791 	ip4->ip_tos = pd->tos & htonl(0x0ff00000);
4792 	ip4->ip_len = htons(sizeof(*ip4) + (pd->tot_len - off));
4793 	ip4->ip_id  = 0;
4794 	ip4->ip_off = htons(IP_DF);
4795 	ip4->ip_ttl = pd->ttl;
4796 	ip4->ip_p   = pd->proto;
4797 	ip4->ip_sum = 0;
4798 	ip4->ip_src = pd->naddr.v4addr;
4799 	ip4->ip_dst = pd->ndaddr.v4addr;
4800 	ip4->ip_sum = pbuf_inet_cksum(pbuf, 0, 0, ip4->ip_hl << 2);
4801 
4802 	/* recalculate icmp checksums */
4803 	if (pd->proto == IPPROTO_ICMP) {
4804 		struct icmp *icmp;
4805 		int hlen = sizeof(*ip4);
4806 
4807 		icmp = (struct icmp *)pbuf_contig_segment(pbuf, hlen,
4808 		    ICMP_MINLEN);
4809 		if (icmp == NULL) {
4810 			return PF_DROP;
4811 		}
4812 
4813 		icmp->icmp_cksum = 0;
4814 		icmp->icmp_cksum = pbuf_inet_cksum(pbuf, 0, hlen,
4815 		    ntohs(ip4->ip_len) - hlen);
4816 	}
4817 
4818 	if ((m = pbuf_to_mbuf(pbuf, TRUE)) != NULL) {
4819 		ip_input(m);
4820 	}
4821 
4822 	return PF_NAT64;
4823 }
4824 
4825 static __attribute__((noinline)) int
pf_nat64_ipv4(pbuf_t * pbuf,int off,struct pf_pdesc * pd)4826 pf_nat64_ipv4(pbuf_t *pbuf, int off, struct pf_pdesc *pd)
4827 {
4828 	struct ip6_hdr          *ip6;
4829 	struct mbuf *m;
4830 
4831 	if (pbuf->pb_ifp == NULL) {
4832 		return PF_DROP;
4833 	}
4834 
4835 	ip6 = (struct ip6_hdr *)pbuf_resize_segment(pbuf, 0, off, sizeof(*ip6));
4836 	if (ip6 == NULL) {
4837 		return PF_DROP;
4838 	}
4839 
4840 	ip6->ip6_vfc  = htonl((6 << 28) | (pd->tos << 20));
4841 	ip6->ip6_plen = htons(pd->tot_len - off);
4842 	ip6->ip6_nxt  = pd->proto;
4843 	ip6->ip6_hlim = pd->ttl;
4844 	ip6->ip6_src = pd->naddr.v6addr;
4845 	ip6->ip6_dst = pd->ndaddr.v6addr;
4846 
4847 	/* recalculate icmp6 checksums */
4848 	if (pd->proto == IPPROTO_ICMPV6) {
4849 		struct icmp6_hdr *icmp6;
4850 		int hlen = sizeof(*ip6);
4851 
4852 		icmp6 = (struct icmp6_hdr *)pbuf_contig_segment(pbuf, hlen,
4853 		    sizeof(*icmp6));
4854 		if (icmp6 == NULL) {
4855 			return PF_DROP;
4856 		}
4857 
4858 		icmp6->icmp6_cksum = 0;
4859 		icmp6->icmp6_cksum = pbuf_inet6_cksum(pbuf,
4860 		    IPPROTO_ICMPV6, hlen,
4861 		    ntohs(ip6->ip6_plen));
4862 	} else if (pd->proto == IPPROTO_UDP) {
4863 		struct udphdr *uh;
4864 		int hlen = sizeof(*ip6);
4865 
4866 		uh = (struct udphdr *)pbuf_contig_segment(pbuf, hlen,
4867 		    sizeof(*uh));
4868 		if (uh == NULL) {
4869 			return PF_DROP;
4870 		}
4871 
4872 		if (uh->uh_sum == 0) {
4873 			uh->uh_sum = pbuf_inet6_cksum(pbuf, IPPROTO_UDP,
4874 			    hlen, ntohs(ip6->ip6_plen));
4875 		}
4876 	}
4877 
4878 	if ((m = pbuf_to_mbuf(pbuf, TRUE)) != NULL) {
4879 		ip6_input(m);
4880 	}
4881 
4882 	return PF_NAT64;
4883 }
4884 
4885 static __attribute__((noinline)) int
pf_test_rule(struct pf_rule ** rm,struct pf_state ** sm,int direction,struct pfi_kif * kif,pbuf_t * pbuf,int off,void * h,struct pf_pdesc * pd,struct pf_rule ** am,struct pf_ruleset ** rsm,struct ifqueue * ifq)4886 pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
4887     struct pfi_kif *kif, pbuf_t *pbuf, int off, void *h,
4888     struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
4889     struct ifqueue *ifq)
4890 {
4891 #pragma unused(h)
4892 	struct pf_rule          *nr = NULL;
4893 	struct pf_addr          *saddr = pd->src, *daddr = pd->dst;
4894 	sa_family_t              af = pd->af;
4895 	struct pf_rule          *r, *a = NULL;
4896 	struct pf_ruleset       *ruleset = NULL;
4897 	struct pf_src_node      *nsn = NULL;
4898 	struct tcphdr           *th = pd->hdr.tcp;
4899 	struct udphdr           *uh = pd->hdr.udp;
4900 	u_short                  reason;
4901 	int                      rewrite = 0, hdrlen = 0;
4902 	int                      tag = -1;
4903 	unsigned int             rtableid = IFSCOPE_NONE;
4904 	int                      asd = 0;
4905 	int                      match = 0;
4906 	int                      state_icmp = 0;
4907 	u_int16_t                mss = tcp_mssdflt;
4908 	u_int8_t                 icmptype = 0, icmpcode = 0;
4909 #if SKYWALK
4910 	netns_token              nstoken = NULL;
4911 #endif
4912 
4913 	struct pf_grev1_hdr     *grev1 = pd->hdr.grev1;
4914 	union pf_state_xport bxport, bdxport, nxport, sxport, dxport;
4915 	struct pf_state_key      psk;
4916 
4917 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
4918 
4919 	PD_CLEAR_STATE_FLOWID(pd);
4920 
4921 	if (direction == PF_IN && pf_check_congestion(ifq)) {
4922 		REASON_SET(&reason, PFRES_CONGEST);
4923 		return PF_DROP;
4924 	}
4925 
4926 	hdrlen = 0;
4927 	sxport.spi = 0;
4928 	dxport.spi = 0;
4929 	nxport.spi = 0;
4930 
4931 	switch (pd->proto) {
4932 	case IPPROTO_TCP:
4933 		sxport.port = th->th_sport;
4934 		dxport.port = th->th_dport;
4935 		hdrlen = sizeof(*th);
4936 		break;
4937 	case IPPROTO_UDP:
4938 		sxport.port = uh->uh_sport;
4939 		dxport.port = uh->uh_dport;
4940 		hdrlen = sizeof(*uh);
4941 		break;
4942 #if INET
4943 	case IPPROTO_ICMP:
4944 		if (pd->af != AF_INET) {
4945 			break;
4946 		}
4947 		sxport.port = dxport.port = pd->hdr.icmp->icmp_id;
4948 		hdrlen = ICMP_MINLEN;
4949 		icmptype = pd->hdr.icmp->icmp_type;
4950 		icmpcode = pd->hdr.icmp->icmp_code;
4951 
4952 		if (ICMP_ERRORTYPE(icmptype)) {
4953 			state_icmp++;
4954 		}
4955 		break;
4956 #endif /* INET */
4957 	case IPPROTO_ICMPV6:
4958 		if (pd->af != AF_INET6) {
4959 			break;
4960 		}
4961 		sxport.port = dxport.port = pd->hdr.icmp6->icmp6_id;
4962 		hdrlen = sizeof(*pd->hdr.icmp6);
4963 		icmptype = pd->hdr.icmp6->icmp6_type;
4964 		icmpcode = pd->hdr.icmp6->icmp6_code;
4965 
4966 		if (ICMP6_ERRORTYPE(icmptype)) {
4967 			state_icmp++;
4968 		}
4969 		break;
4970 	case IPPROTO_GRE:
4971 		if (pd->proto_variant == PF_GRE_PPTP_VARIANT) {
4972 			sxport.call_id = dxport.call_id =
4973 			    pd->hdr.grev1->call_id;
4974 			hdrlen = sizeof(*pd->hdr.grev1);
4975 		}
4976 		break;
4977 	case IPPROTO_ESP:
4978 		sxport.spi = 0;
4979 		dxport.spi = pd->hdr.esp->spi;
4980 		hdrlen = sizeof(*pd->hdr.esp);
4981 		break;
4982 	}
4983 
4984 	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
4985 
4986 	bxport = sxport;
4987 	bdxport = dxport;
4988 
4989 	if (direction == PF_OUT) {
4990 		nxport = sxport;
4991 	} else {
4992 		nxport = dxport;
4993 	}
4994 
4995 	/* check packet for BINAT/NAT/RDR */
4996 	if ((nr = pf_get_translation_aux(pd, pbuf, off, direction, kif, &nsn,
4997 	    saddr, &sxport, daddr, &dxport, &nxport
4998 #if SKYWALK
4999 	    , &nstoken
5000 #endif
5001 	    )) != NULL) {
5002 		int ua;
5003 		u_int16_t dport;
5004 
5005 		if (pd->af != pd->naf) {
5006 			ua = 0;
5007 		} else {
5008 			ua = 1;
5009 		}
5010 
5011 		PF_ACPY(&pd->baddr, saddr, af);
5012 		PF_ACPY(&pd->bdaddr, daddr, af);
5013 
5014 		switch (pd->proto) {
5015 		case IPPROTO_TCP:
5016 			if (pd->af != pd->naf ||
5017 			    PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5018 				pf_change_ap(direction, pd->mp, saddr,
5019 				    &th->th_sport, pd->ip_sum, &th->th_sum,
5020 				    &pd->naddr, nxport.port, 0, af,
5021 				    pd->naf, ua);
5022 				sxport.port = th->th_sport;
5023 			}
5024 
5025 			if (pd->af != pd->naf ||
5026 			    PF_ANEQ(daddr, &pd->ndaddr, pd->af) ||
5027 			    (nr && (nr->action == PF_RDR) &&
5028 			    (th->th_dport != nxport.port))) {
5029 				if (nr && nr->action == PF_RDR) {
5030 					dport = nxport.port;
5031 				} else {
5032 					dport = th->th_dport;
5033 				}
5034 				pf_change_ap(direction, pd->mp, daddr,
5035 				    &th->th_dport, pd->ip_sum,
5036 				    &th->th_sum, &pd->ndaddr,
5037 				    dport, 0, af, pd->naf, ua);
5038 				dxport.port = th->th_dport;
5039 			}
5040 			rewrite++;
5041 			break;
5042 
5043 		case IPPROTO_UDP:
5044 			if (pd->af != pd->naf ||
5045 			    PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5046 				pf_change_ap(direction, pd->mp, saddr,
5047 				    &uh->uh_sport, pd->ip_sum,
5048 				    &uh->uh_sum, &pd->naddr,
5049 				    nxport.port, 1, af, pd->naf, ua);
5050 				sxport.port = uh->uh_sport;
5051 			}
5052 
5053 			if (pd->af != pd->naf ||
5054 			    PF_ANEQ(daddr, &pd->ndaddr, pd->af) ||
5055 			    (nr && (nr->action == PF_RDR) &&
5056 			    (uh->uh_dport != nxport.port))) {
5057 				if (nr && nr->action == PF_RDR) {
5058 					dport = nxport.port;
5059 				} else {
5060 					dport = uh->uh_dport;
5061 				}
5062 				pf_change_ap(direction, pd->mp, daddr,
5063 				    &uh->uh_dport, pd->ip_sum,
5064 				    &uh->uh_sum, &pd->ndaddr,
5065 				    dport, 0, af, pd->naf, ua);
5066 				dxport.port = uh->uh_dport;
5067 			}
5068 			rewrite++;
5069 			break;
5070 #if INET
5071 		case IPPROTO_ICMP:
5072 			if (pd->af != AF_INET) {
5073 				break;
5074 			}
5075 			/*
5076 			 * TODO:
5077 			 * pd->af != pd->naf not handled yet here and would be
5078 			 * needed for NAT46 needed to support XLAT.
5079 			 * Will cross the bridge when it comes.
5080 			 */
5081 			if (PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5082 				pf_change_a(&saddr->v4addr.s_addr, pd->ip_sum,
5083 				    pd->naddr.v4addr.s_addr, 0);
5084 				pd->hdr.icmp->icmp_cksum = pf_cksum_fixup(
5085 					pd->hdr.icmp->icmp_cksum, sxport.port,
5086 					nxport.port, 0);
5087 				pd->hdr.icmp->icmp_id = nxport.port;
5088 			}
5089 
5090 			if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
5091 				pf_change_a(&daddr->v4addr.s_addr, pd->ip_sum,
5092 				    pd->ndaddr.v4addr.s_addr, 0);
5093 			}
5094 			++rewrite;
5095 			break;
5096 #endif /* INET */
5097 		case IPPROTO_ICMPV6:
5098 			if (pd->af != AF_INET6) {
5099 				break;
5100 			}
5101 
5102 			if (pd->af != pd->naf ||
5103 			    PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5104 				pf_change_addr(saddr,
5105 				    &pd->hdr.icmp6->icmp6_cksum,
5106 				    &pd->naddr, 0, pd->af, pd->naf);
5107 			}
5108 
5109 			if (pd->af != pd->naf ||
5110 			    PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
5111 				pf_change_addr(daddr,
5112 				    &pd->hdr.icmp6->icmp6_cksum,
5113 				    &pd->ndaddr, 0, pd->af, pd->naf);
5114 			}
5115 
5116 			if (pd->af != pd->naf) {
5117 				if (pf_translate_icmp_af(AF_INET,
5118 				    pd->hdr.icmp6)) {
5119 					return PF_DROP;
5120 				}
5121 				pd->proto = IPPROTO_ICMP;
5122 			}
5123 			rewrite++;
5124 			break;
5125 		case IPPROTO_GRE:
5126 			if ((direction == PF_IN) &&
5127 			    (pd->proto_variant == PF_GRE_PPTP_VARIANT)) {
5128 				grev1->call_id = nxport.call_id;
5129 			}
5130 
5131 			switch (pd->af) {
5132 #if INET
5133 			case AF_INET:
5134 				if (PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5135 					pf_change_a(&saddr->v4addr.s_addr,
5136 					    pd->ip_sum,
5137 					    pd->naddr.v4addr.s_addr, 0);
5138 				}
5139 				if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
5140 					pf_change_a(&daddr->v4addr.s_addr,
5141 					    pd->ip_sum,
5142 					    pd->ndaddr.v4addr.s_addr, 0);
5143 				}
5144 				break;
5145 #endif /* INET */
5146 			case AF_INET6:
5147 				if (PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5148 					PF_ACPY(saddr, &pd->naddr, AF_INET6);
5149 				}
5150 				if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
5151 					PF_ACPY(daddr, &pd->ndaddr, AF_INET6);
5152 				}
5153 				break;
5154 			}
5155 			++rewrite;
5156 			break;
5157 		case IPPROTO_ESP:
5158 			if (direction == PF_OUT) {
5159 				bxport.spi = 0;
5160 			}
5161 
5162 			switch (pd->af) {
5163 #if INET
5164 			case AF_INET:
5165 				if (PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5166 					pf_change_a(&saddr->v4addr.s_addr,
5167 					    pd->ip_sum, pd->naddr.v4addr.s_addr, 0);
5168 				}
5169 				if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
5170 					pf_change_a(&daddr->v4addr.s_addr,
5171 					    pd->ip_sum,
5172 					    pd->ndaddr.v4addr.s_addr, 0);
5173 				}
5174 				break;
5175 #endif /* INET */
5176 			case AF_INET6:
5177 				if (PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5178 					PF_ACPY(saddr, &pd->naddr, AF_INET6);
5179 				}
5180 				if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
5181 					PF_ACPY(daddr, &pd->ndaddr, AF_INET6);
5182 				}
5183 				break;
5184 			}
5185 			break;
5186 		default:
5187 			switch (pd->af) {
5188 #if INET
5189 			case AF_INET:
5190 				if ((pd->naf != AF_INET) ||
5191 				    (PF_ANEQ(saddr, &pd->naddr, pd->af))) {
5192 					pf_change_addr(saddr, pd->ip_sum,
5193 					    &pd->naddr, 0, af, pd->naf);
5194 				}
5195 
5196 				if ((pd->naf != AF_INET) ||
5197 				    (PF_ANEQ(daddr, &pd->ndaddr, pd->af))) {
5198 					pf_change_addr(daddr, pd->ip_sum,
5199 					    &pd->ndaddr, 0, af, pd->naf);
5200 				}
5201 				break;
5202 #endif /* INET */
5203 			case AF_INET6:
5204 				if (PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5205 					PF_ACPY(saddr, &pd->naddr, af);
5206 				}
5207 				if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
5208 					PF_ACPY(daddr, &pd->ndaddr, af);
5209 				}
5210 				break;
5211 			}
5212 			break;
5213 		}
5214 
5215 		if (nr->natpass) {
5216 			r = NULL;
5217 		}
5218 		pd->nat_rule = nr;
5219 		pd->af = pd->naf;
5220 	} else {
5221 #if SKYWALK
5222 		VERIFY(!NETNS_TOKEN_VALID(&nstoken));
5223 #endif
5224 	}
5225 
5226 	if (nr && nr->tag > 0) {
5227 		tag = nr->tag;
5228 	}
5229 
5230 	while (r != NULL) {
5231 		r->evaluations++;
5232 		if (pfi_kif_match(r->kif, kif) == r->ifnot) {
5233 			r = r->skip[PF_SKIP_IFP].ptr;
5234 		} else if (r->direction && r->direction != direction) {
5235 			r = r->skip[PF_SKIP_DIR].ptr;
5236 		} else if (r->af && r->af != pd->af) {
5237 			r = r->skip[PF_SKIP_AF].ptr;
5238 		} else if (r->proto && r->proto != pd->proto) {
5239 			r = r->skip[PF_SKIP_PROTO].ptr;
5240 		} else if (PF_MISMATCHAW(&r->src.addr, saddr, pd->af,
5241 		    r->src.neg, kif)) {
5242 			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
5243 		}
5244 		/* tcp/udp only. port_op always 0 in other cases */
5245 		else if (r->proto == pd->proto &&
5246 		    (r->proto == IPPROTO_TCP || r->proto == IPPROTO_UDP) &&
5247 		    r->src.xport.range.op &&
5248 		    !pf_match_port(r->src.xport.range.op,
5249 		    r->src.xport.range.port[0], r->src.xport.range.port[1],
5250 		    th->th_sport)) {
5251 			r = r->skip[PF_SKIP_SRC_PORT].ptr;
5252 		} else if (PF_MISMATCHAW(&r->dst.addr, daddr, pd->af,
5253 		    r->dst.neg, NULL)) {
5254 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
5255 		}
5256 		/* tcp/udp only. port_op always 0 in other cases */
5257 		else if (r->proto == pd->proto &&
5258 		    (r->proto == IPPROTO_TCP || r->proto == IPPROTO_UDP) &&
5259 		    r->dst.xport.range.op &&
5260 		    !pf_match_port(r->dst.xport.range.op,
5261 		    r->dst.xport.range.port[0], r->dst.xport.range.port[1],
5262 		    th->th_dport)) {
5263 			r = r->skip[PF_SKIP_DST_PORT].ptr;
5264 		}
5265 		/* icmp only. type always 0 in other cases */
5266 		else if (r->type && r->type != icmptype + 1) {
5267 			r = TAILQ_NEXT(r, entries);
5268 		}
5269 		/* icmp only. type always 0 in other cases */
5270 		else if (r->code && r->code != icmpcode + 1) {
5271 			r = TAILQ_NEXT(r, entries);
5272 		} else if ((r->rule_flag & PFRULE_TOS) && r->tos &&
5273 		    !(r->tos & pd->tos)) {
5274 			r = TAILQ_NEXT(r, entries);
5275 		} else if ((r->rule_flag & PFRULE_DSCP) && r->tos &&
5276 		    !(r->tos & (pd->tos & DSCP_MASK))) {
5277 			r = TAILQ_NEXT(r, entries);
5278 		} else if ((r->rule_flag & PFRULE_SC) && r->tos &&
5279 		    ((r->tos & SCIDX_MASK) != pd->sc)) {
5280 			r = TAILQ_NEXT(r, entries);
5281 		} else if (r->rule_flag & PFRULE_FRAGMENT) {
5282 			r = TAILQ_NEXT(r, entries);
5283 		} else if (pd->proto == IPPROTO_TCP &&
5284 		    (r->flagset & th->th_flags) != r->flags) {
5285 			r = TAILQ_NEXT(r, entries);
5286 		}
5287 		/* tcp/udp only. uid.op always 0 in other cases */
5288 		else if (r->uid.op && (pd->lookup.done || ((void)(pd->lookup.done =
5289 		    pf_socket_lookup(direction, pd)), 1)) &&
5290 		    !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
5291 		    pd->lookup.uid)) {
5292 			r = TAILQ_NEXT(r, entries);
5293 		}
5294 		/* tcp/udp only. gid.op always 0 in other cases */
5295 		else if (r->gid.op && (pd->lookup.done || ((void)(pd->lookup.done =
5296 		    pf_socket_lookup(direction, pd)), 1)) &&
5297 		    !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
5298 		    pd->lookup.gid)) {
5299 			r = TAILQ_NEXT(r, entries);
5300 		} else if (r->prob && r->prob <= (RandomULong() % (UINT_MAX - 1) + 1)) {
5301 			r = TAILQ_NEXT(r, entries);
5302 		} else if (r->match_tag && !pf_match_tag(r, pd->pf_mtag, &tag)) {
5303 			r = TAILQ_NEXT(r, entries);
5304 		} else if (r->os_fingerprint != PF_OSFP_ANY &&
5305 		    (pd->proto != IPPROTO_TCP || !pf_osfp_match(
5306 			    pf_osfp_fingerprint(pd, pbuf, off, th),
5307 			    r->os_fingerprint))) {
5308 			r = TAILQ_NEXT(r, entries);
5309 		} else {
5310 			if (r->tag) {
5311 				tag = r->tag;
5312 			}
5313 			if (PF_RTABLEID_IS_VALID(r->rtableid)) {
5314 				rtableid = r->rtableid;
5315 			}
5316 			if (r->anchor == NULL) {
5317 				match = 1;
5318 				*rm = r;
5319 				*am = a;
5320 				*rsm = ruleset;
5321 				if ((*rm)->quick) {
5322 					break;
5323 				}
5324 				r = TAILQ_NEXT(r, entries);
5325 			} else {
5326 				pf_step_into_anchor(&asd, &ruleset,
5327 				    PF_RULESET_FILTER, &r, &a, &match);
5328 			}
5329 		}
5330 		if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
5331 		    PF_RULESET_FILTER, &r, &a, &match)) {
5332 			break;
5333 		}
5334 	}
5335 	r = *rm;
5336 	a = *am;
5337 	ruleset = *rsm;
5338 
5339 	REASON_SET(&reason, PFRES_MATCH);
5340 
5341 	if (r->log || (nr != NULL && nr->log)) {
5342 		if (rewrite > 0) {
5343 			if (rewrite < off + hdrlen) {
5344 				rewrite = off + hdrlen;
5345 			}
5346 
5347 			if (pf_lazy_makewritable(pd, pbuf, rewrite) == NULL) {
5348 				REASON_SET(&reason, PFRES_MEMORY);
5349 #if SKYWALK
5350 				netns_release(&nstoken);
5351 #endif
5352 				return PF_DROP;
5353 			}
5354 
5355 			pbuf_copy_back(pbuf, off, hdrlen, pd->hdr.any);
5356 		}
5357 		PFLOG_PACKET(kif, h, pbuf, pd->af, direction, reason,
5358 		    r->log ? r : nr, a, ruleset, pd);
5359 	}
5360 
5361 	if ((r->action == PF_DROP) &&
5362 	    ((r->rule_flag & PFRULE_RETURNRST) ||
5363 	    (r->rule_flag & PFRULE_RETURNICMP) ||
5364 	    (r->rule_flag & PFRULE_RETURN))) {
5365 		/* undo NAT changes, if they have taken place */
5366 		/* XXX For NAT64 we are not reverting the changes */
5367 		if (nr != NULL && nr->action != PF_NAT64) {
5368 			if (direction == PF_OUT) {
5369 				pd->af = af;
5370 				switch (pd->proto) {
5371 				case IPPROTO_TCP:
5372 					pf_change_ap(direction, pd->mp, saddr,
5373 					    &th->th_sport, pd->ip_sum,
5374 					    &th->th_sum, &pd->baddr,
5375 					    bxport.port, 0, af, pd->af, 1);
5376 					sxport.port = th->th_sport;
5377 					rewrite++;
5378 					break;
5379 				case IPPROTO_UDP:
5380 					pf_change_ap(direction, pd->mp, saddr,
5381 					    &pd->hdr.udp->uh_sport, pd->ip_sum,
5382 					    &pd->hdr.udp->uh_sum, &pd->baddr,
5383 					    bxport.port, 1, af, pd->af, 1);
5384 					sxport.port = pd->hdr.udp->uh_sport;
5385 					rewrite++;
5386 					break;
5387 				case IPPROTO_ICMP:
5388 				case IPPROTO_ICMPV6:
5389 					/* nothing! */
5390 					break;
5391 				case IPPROTO_GRE:
5392 					PF_ACPY(&pd->baddr, saddr, af);
5393 					++rewrite;
5394 					switch (af) {
5395 #if INET
5396 					case AF_INET:
5397 						pf_change_a(&saddr->v4addr.s_addr,
5398 						    pd->ip_sum,
5399 						    pd->baddr.v4addr.s_addr, 0);
5400 						break;
5401 #endif /* INET */
5402 					case AF_INET6:
5403 						PF_ACPY(saddr, &pd->baddr,
5404 						    AF_INET6);
5405 						break;
5406 					}
5407 					break;
5408 				case IPPROTO_ESP:
5409 					PF_ACPY(&pd->baddr, saddr, af);
5410 					switch (af) {
5411 #if INET
5412 					case AF_INET:
5413 						pf_change_a(&saddr->v4addr.s_addr,
5414 						    pd->ip_sum,
5415 						    pd->baddr.v4addr.s_addr, 0);
5416 						break;
5417 #endif /* INET */
5418 					case AF_INET6:
5419 						PF_ACPY(saddr, &pd->baddr,
5420 						    AF_INET6);
5421 						break;
5422 					}
5423 					break;
5424 				default:
5425 					switch (af) {
5426 					case AF_INET:
5427 						pf_change_a(&saddr->v4addr.s_addr,
5428 						    pd->ip_sum,
5429 						    pd->baddr.v4addr.s_addr, 0);
5430 						break;
5431 					case AF_INET6:
5432 						PF_ACPY(saddr, &pd->baddr, af);
5433 						break;
5434 					}
5435 				}
5436 			} else {
5437 				switch (pd->proto) {
5438 				case IPPROTO_TCP:
5439 					pf_change_ap(direction, pd->mp, daddr,
5440 					    &th->th_dport, pd->ip_sum,
5441 					    &th->th_sum, &pd->bdaddr,
5442 					    bdxport.port, 0, af, pd->af, 1);
5443 					dxport.port = th->th_dport;
5444 					rewrite++;
5445 					break;
5446 				case IPPROTO_UDP:
5447 					pf_change_ap(direction, pd->mp, daddr,
5448 					    &pd->hdr.udp->uh_dport, pd->ip_sum,
5449 					    &pd->hdr.udp->uh_sum, &pd->bdaddr,
5450 					    bdxport.port, 1, af, pd->af, 1);
5451 					dxport.port = pd->hdr.udp->uh_dport;
5452 					rewrite++;
5453 					break;
5454 				case IPPROTO_ICMP:
5455 				case IPPROTO_ICMPV6:
5456 					/* nothing! */
5457 					break;
5458 				case IPPROTO_GRE:
5459 					if (pd->proto_variant ==
5460 					    PF_GRE_PPTP_VARIANT) {
5461 						grev1->call_id =
5462 						    bdxport.call_id;
5463 					}
5464 					++rewrite;
5465 					switch (af) {
5466 #if INET
5467 					case AF_INET:
5468 						pf_change_a(&daddr->v4addr.s_addr,
5469 						    pd->ip_sum,
5470 						    pd->bdaddr.v4addr.s_addr, 0);
5471 						break;
5472 #endif /* INET */
5473 					case AF_INET6:
5474 						PF_ACPY(daddr, &pd->bdaddr,
5475 						    AF_INET6);
5476 						break;
5477 					}
5478 					break;
5479 				case IPPROTO_ESP:
5480 					switch (af) {
5481 #if INET
5482 					case AF_INET:
5483 						pf_change_a(&daddr->v4addr.s_addr,
5484 						    pd->ip_sum,
5485 						    pd->bdaddr.v4addr.s_addr, 0);
5486 						break;
5487 #endif /* INET */
5488 					case AF_INET6:
5489 						PF_ACPY(daddr, &pd->bdaddr,
5490 						    AF_INET6);
5491 						break;
5492 					}
5493 					break;
5494 				default:
5495 					switch (af) {
5496 					case AF_INET:
5497 						pf_change_a(&daddr->v4addr.s_addr,
5498 						    pd->ip_sum,
5499 						    pd->bdaddr.v4addr.s_addr, 0);
5500 						break;
5501 					case AF_INET6:
5502 						PF_ACPY(daddr, &pd->bdaddr, af);
5503 						break;
5504 					}
5505 				}
5506 			}
5507 		}
5508 		if (pd->proto == IPPROTO_TCP &&
5509 		    ((r->rule_flag & PFRULE_RETURNRST) ||
5510 		    (r->rule_flag & PFRULE_RETURN)) &&
5511 		    !(th->th_flags & TH_RST)) {
5512 			u_int32_t        ack = ntohl(th->th_seq) + pd->p_len;
5513 			int              len = 0;
5514 			struct ip       *h4;
5515 			struct ip6_hdr  *h6;
5516 
5517 			switch (pd->af) {
5518 			case AF_INET:
5519 				h4 = pbuf->pb_data;
5520 				len = ntohs(h4->ip_len) - off;
5521 				break;
5522 			case AF_INET6:
5523 				h6 = pbuf->pb_data;
5524 				len = ntohs(h6->ip6_plen) -
5525 				    (off - sizeof(*h6));
5526 				break;
5527 			}
5528 
5529 			if (pf_check_proto_cksum(pbuf, off, len, IPPROTO_TCP,
5530 			    pd->af)) {
5531 				REASON_SET(&reason, PFRES_PROTCKSUM);
5532 			} else {
5533 				if (th->th_flags & TH_SYN) {
5534 					ack++;
5535 				}
5536 				if (th->th_flags & TH_FIN) {
5537 					ack++;
5538 				}
5539 				pf_send_tcp(r, pd->af, pd->dst,
5540 				    pd->src, th->th_dport, th->th_sport,
5541 				    ntohl(th->th_ack), ack, TH_RST | TH_ACK, 0, 0,
5542 				    r->return_ttl, 1, 0, pd->eh, kif->pfik_ifp);
5543 			}
5544 		} else if (pd->proto != IPPROTO_ICMP && pd->af == AF_INET &&
5545 		    pd->proto != IPPROTO_ESP && pd->proto != IPPROTO_AH &&
5546 		    r->return_icmp) {
5547 			pf_send_icmp(pbuf, r->return_icmp >> 8,
5548 			    r->return_icmp & 255, pd->af, r);
5549 		} else if (pd->proto != IPPROTO_ICMPV6 && af == AF_INET6 &&
5550 		    pd->proto != IPPROTO_ESP && pd->proto != IPPROTO_AH &&
5551 		    r->return_icmp6) {
5552 			pf_send_icmp(pbuf, r->return_icmp6 >> 8,
5553 			    r->return_icmp6 & 255, pd->af, r);
5554 		}
5555 	}
5556 
5557 	if (r->action == PF_DROP) {
5558 #if SKYWALK
5559 		netns_release(&nstoken);
5560 #endif
5561 		return PF_DROP;
5562 	}
5563 
5564 	/* prepare state key, for flowhash and/or the state (if created) */
5565 	bzero(&psk, sizeof(psk));
5566 	psk.proto = pd->proto;
5567 	psk.direction = direction;
5568 	if (pd->proto == IPPROTO_UDP) {
5569 		if (ntohs(pd->hdr.udp->uh_sport) == PF_IKE_PORT &&
5570 		    ntohs(pd->hdr.udp->uh_dport) == PF_IKE_PORT) {
5571 			psk.proto_variant = PF_EXTFILTER_APD;
5572 		} else {
5573 			psk.proto_variant = nr ? nr->extfilter : r->extfilter;
5574 			if (psk.proto_variant < PF_EXTFILTER_APD) {
5575 				psk.proto_variant = PF_EXTFILTER_APD;
5576 			}
5577 		}
5578 	} else if (pd->proto == IPPROTO_GRE) {
5579 		psk.proto_variant = pd->proto_variant;
5580 	}
5581 	if (direction == PF_OUT) {
5582 		psk.af_gwy = af;
5583 		PF_ACPY(&psk.gwy.addr, saddr, af);
5584 		PF_ACPY(&psk.ext_gwy.addr, daddr, af);
5585 		switch (pd->proto) {
5586 		case IPPROTO_ESP:
5587 			psk.gwy.xport.spi = 0;
5588 			psk.ext_gwy.xport.spi = pd->hdr.esp->spi;
5589 			break;
5590 		case IPPROTO_ICMP:
5591 		case IPPROTO_ICMPV6:
5592 			/*
5593 			 * NAT64 requires protocol translation  between ICMPv4
5594 			 * and ICMPv6. TCP and UDP do not require protocol
5595 			 * translation. To avoid adding complexity just to
5596 			 * handle ICMP(v4addr/v6addr), we always lookup  for
5597 			 * proto = IPPROTO_ICMP on both LAN and WAN side
5598 			 */
5599 			psk.proto = IPPROTO_ICMP;
5600 			psk.gwy.xport.port = nxport.port;
5601 			psk.ext_gwy.xport.spi = 0;
5602 			break;
5603 		default:
5604 			psk.gwy.xport = sxport;
5605 			psk.ext_gwy.xport = dxport;
5606 			break;
5607 		}
5608 		psk.af_lan = af;
5609 		if (nr != NULL) {
5610 			PF_ACPY(&psk.lan.addr, &pd->baddr, af);
5611 			psk.lan.xport = bxport;
5612 			PF_ACPY(&psk.ext_lan.addr, &pd->bdaddr, af);
5613 			psk.ext_lan.xport = bdxport;
5614 		} else {
5615 			PF_ACPY(&psk.lan.addr, &psk.gwy.addr, af);
5616 			psk.lan.xport = psk.gwy.xport;
5617 			PF_ACPY(&psk.ext_lan.addr, &psk.ext_gwy.addr, af);
5618 			psk.ext_lan.xport = psk.ext_gwy.xport;
5619 		}
5620 	} else {
5621 		psk.af_lan = af;
5622 		if (nr && nr->action == PF_NAT64) {
5623 			PF_ACPY(&psk.lan.addr, &pd->baddr, af);
5624 			PF_ACPY(&psk.ext_lan.addr, &pd->bdaddr, af);
5625 		} else {
5626 			PF_ACPY(&psk.lan.addr, daddr, af);
5627 			PF_ACPY(&psk.ext_lan.addr, saddr, af);
5628 		}
5629 		switch (pd->proto) {
5630 		case IPPROTO_ICMP:
5631 		case IPPROTO_ICMPV6:
5632 			/*
5633 			 * NAT64 requires protocol translation  between ICMPv4
5634 			 * and ICMPv6. TCP and UDP do not require protocol
5635 			 * translation. To avoid adding complexity just to
5636 			 * handle ICMP(v4addr/v6addr), we always lookup  for
5637 			 * proto = IPPROTO_ICMP on both LAN and WAN side
5638 			 */
5639 			psk.proto = IPPROTO_ICMP;
5640 			if (nr && nr->action == PF_NAT64) {
5641 				psk.lan.xport = bxport;
5642 				psk.ext_lan.xport = bxport;
5643 			} else {
5644 				psk.lan.xport = nxport;
5645 				psk.ext_lan.xport.spi = 0;
5646 			}
5647 			break;
5648 		case IPPROTO_ESP:
5649 			psk.ext_lan.xport.spi = 0;
5650 			psk.lan.xport.spi = pd->hdr.esp->spi;
5651 			break;
5652 		default:
5653 			if (nr != NULL) {
5654 				if (nr->action == PF_NAT64) {
5655 					psk.lan.xport = bxport;
5656 					psk.ext_lan.xport = bdxport;
5657 				} else {
5658 					psk.lan.xport = dxport;
5659 					psk.ext_lan.xport = sxport;
5660 				}
5661 			} else {
5662 				psk.lan.xport = dxport;
5663 				psk.ext_lan.xport = sxport;
5664 			}
5665 			break;
5666 		}
5667 		psk.af_gwy = pd->naf;
5668 		if (nr != NULL) {
5669 			if (nr->action == PF_NAT64) {
5670 				PF_ACPY(&psk.gwy.addr, &pd->naddr, pd->naf);
5671 				PF_ACPY(&psk.ext_gwy.addr, &pd->ndaddr,
5672 				    pd->naf);
5673 				if ((pd->proto == IPPROTO_ICMPV6) ||
5674 				    (pd->proto == IPPROTO_ICMP)) {
5675 					psk.gwy.xport = nxport;
5676 					psk.ext_gwy.xport = nxport;
5677 				} else {
5678 					psk.gwy.xport = sxport;
5679 					psk.ext_gwy.xport = dxport;
5680 				}
5681 			} else {
5682 				PF_ACPY(&psk.gwy.addr, &pd->bdaddr, af);
5683 				psk.gwy.xport = bdxport;
5684 				PF_ACPY(&psk.ext_gwy.addr, saddr, af);
5685 				psk.ext_gwy.xport = sxport;
5686 			}
5687 		} else {
5688 			PF_ACPY(&psk.gwy.addr, &psk.lan.addr, af);
5689 			psk.gwy.xport = psk.lan.xport;
5690 			PF_ACPY(&psk.ext_gwy.addr, &psk.ext_lan.addr, af);
5691 			psk.ext_gwy.xport = psk.ext_lan.xport;
5692 		}
5693 	}
5694 	if (pd->pktflags & PKTF_FLOW_ID) {
5695 		/* flow hash was already computed outside of PF */
5696 		psk.flowsrc = pd->flowsrc;
5697 		psk.flowhash = pd->flowhash;
5698 	} else {
5699 		/*
5700 		 * Allocation of flow identifier is deferred until a PF state
5701 		 * creation is needed for this flow.
5702 		 */
5703 		pd->pktflags &= ~PKTF_FLOW_ADV;
5704 		pd->flowhash = 0;
5705 	}
5706 
5707 	if (__improbable(pf_tag_packet(pbuf, pd->pf_mtag, tag, rtableid, pd))) {
5708 		REASON_SET(&reason, PFRES_MEMORY);
5709 #if SKYWALK
5710 		netns_release(&nstoken);
5711 #endif
5712 		return PF_DROP;
5713 	}
5714 
5715 	if (!state_icmp && (r->keep_state || nr != NULL ||
5716 	    (pd->flags & PFDESC_TCP_NORM))) {
5717 		/* create new state */
5718 		struct pf_state *s = NULL;
5719 		struct pf_state_key *sk = NULL;
5720 		struct pf_src_node *sn = NULL;
5721 		struct pf_ike_hdr ike;
5722 
5723 		if (pd->proto == IPPROTO_UDP) {
5724 			size_t plen = pbuf->pb_packet_len - off - sizeof(*uh);
5725 
5726 			if (ntohs(uh->uh_sport) == PF_IKE_PORT &&
5727 			    ntohs(uh->uh_dport) == PF_IKE_PORT &&
5728 			    plen >= PF_IKE_PACKET_MINSIZE) {
5729 				if (plen > PF_IKE_PACKET_MINSIZE) {
5730 					plen = PF_IKE_PACKET_MINSIZE;
5731 				}
5732 				pbuf_copy_data(pbuf, off + sizeof(*uh), plen,
5733 				    &ike);
5734 			}
5735 		}
5736 
5737 		if (nr != NULL && pd->proto == IPPROTO_ESP &&
5738 		    direction == PF_OUT) {
5739 			struct pf_state_key_cmp sk0;
5740 			struct pf_state *s0;
5741 
5742 			/*
5743 			 * <[email protected]>
5744 			 * This squelches state creation if the external
5745 			 * address matches an existing incomplete state with a
5746 			 * different internal address.  Only one 'blocking'
5747 			 * partial state is allowed for each external address.
5748 			 */
5749 #if SKYWALK
5750 			/*
5751 			 * XXXSCW:
5752 			 *
5753 			 * It's not clear how this impacts netns. The original
5754 			 * state will hold the port reservation token but what
5755 			 * happens to other "Cone NAT" states when the first is
5756 			 * torn down?
5757 			 */
5758 #endif
5759 			memset(&sk0, 0, sizeof(sk0));
5760 			sk0.af_gwy = pd->af;
5761 			sk0.proto = IPPROTO_ESP;
5762 			PF_ACPY(&sk0.gwy.addr, saddr, sk0.af_gwy);
5763 			PF_ACPY(&sk0.ext_gwy.addr, daddr, sk0.af_gwy);
5764 			s0 = pf_find_state(kif, &sk0, PF_IN);
5765 
5766 			if (s0 && PF_ANEQ(&s0->state_key->lan.addr,
5767 			    pd->src, pd->af)) {
5768 				nsn = 0;
5769 				goto cleanup;
5770 			}
5771 		}
5772 
5773 		/* check maximums */
5774 		if (r->max_states && (r->states >= r->max_states)) {
5775 			pf_status.lcounters[LCNT_STATES]++;
5776 			REASON_SET(&reason, PFRES_MAXSTATES);
5777 			goto cleanup;
5778 		}
5779 		/* src node for filter rule */
5780 		if ((r->rule_flag & PFRULE_SRCTRACK ||
5781 		    r->rpool.opts & PF_POOL_STICKYADDR) &&
5782 		    pf_insert_src_node(&sn, r, saddr, af) != 0) {
5783 			REASON_SET(&reason, PFRES_SRCLIMIT);
5784 			goto cleanup;
5785 		}
5786 		/* src node for translation rule */
5787 		if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
5788 		    ((direction == PF_OUT &&
5789 		    nr->action != PF_RDR &&
5790 		    pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
5791 		    (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) {
5792 			REASON_SET(&reason, PFRES_SRCLIMIT);
5793 			goto cleanup;
5794 		}
5795 		s = pool_get(&pf_state_pl, PR_WAITOK);
5796 		if (s == NULL) {
5797 			REASON_SET(&reason, PFRES_MEMORY);
5798 cleanup:
5799 			if (sn != NULL && sn->states == 0 && sn->expire == 0) {
5800 				RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
5801 				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
5802 				pf_status.src_nodes--;
5803 				pool_put(&pf_src_tree_pl, sn);
5804 			}
5805 			if (nsn != sn && nsn != NULL && nsn->states == 0 &&
5806 			    nsn->expire == 0) {
5807 				RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
5808 				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
5809 				pf_status.src_nodes--;
5810 				pool_put(&pf_src_tree_pl, nsn);
5811 			}
5812 			if (s != NULL) {
5813 				pf_detach_state(s, 0);
5814 			} else if (sk != NULL) {
5815 				if (sk->app_state) {
5816 					pool_put(&pf_app_state_pl,
5817 					    sk->app_state);
5818 				}
5819 				pf_state_key_release_flowid(sk);
5820 				pool_put(&pf_state_key_pl, sk);
5821 			}
5822 #if SKYWALK
5823 			netns_release(&nstoken);
5824 #endif
5825 			return PF_DROP;
5826 		}
5827 		bzero(s, sizeof(*s));
5828 		TAILQ_INIT(&s->unlink_hooks);
5829 		s->rule.ptr = r;
5830 		s->nat_rule.ptr = nr;
5831 		s->anchor.ptr = a;
5832 		STATE_INC_COUNTERS(s);
5833 		s->allow_opts = r->allow_opts;
5834 		s->log = r->log & PF_LOG_ALL;
5835 		if (nr != NULL) {
5836 			s->log |= nr->log & PF_LOG_ALL;
5837 		}
5838 		switch (pd->proto) {
5839 		case IPPROTO_TCP:
5840 			s->src.seqlo = ntohl(th->th_seq);
5841 			s->src.seqhi = s->src.seqlo + pd->p_len + 1;
5842 			if ((th->th_flags & (TH_SYN | TH_ACK)) ==
5843 			    TH_SYN && r->keep_state == PF_STATE_MODULATE) {
5844 				/* Generate sequence number modulator */
5845 				if ((s->src.seqdiff = pf_tcp_iss(pd) -
5846 				    s->src.seqlo) == 0) {
5847 					s->src.seqdiff = 1;
5848 				}
5849 				pf_change_a(&th->th_seq, &th->th_sum,
5850 				    htonl(s->src.seqlo + s->src.seqdiff), 0);
5851 				rewrite = off + sizeof(*th);
5852 			} else {
5853 				s->src.seqdiff = 0;
5854 			}
5855 			if (th->th_flags & TH_SYN) {
5856 				s->src.seqhi++;
5857 				s->src.wscale = pf_get_wscale(pbuf, off,
5858 				    th->th_off, af);
5859 			}
5860 			s->src.max_win = MAX(ntohs(th->th_win), 1);
5861 			if (s->src.wscale & PF_WSCALE_MASK) {
5862 				/* Remove scale factor from initial window */
5863 				int win = s->src.max_win;
5864 				win += 1 << (s->src.wscale & PF_WSCALE_MASK);
5865 				s->src.max_win = (win - 1) >>
5866 				    (s->src.wscale & PF_WSCALE_MASK);
5867 			}
5868 			if (th->th_flags & TH_FIN) {
5869 				s->src.seqhi++;
5870 			}
5871 			s->dst.seqhi = 1;
5872 			s->dst.max_win = 1;
5873 			s->src.state = TCPS_SYN_SENT;
5874 			s->dst.state = TCPS_CLOSED;
5875 			s->timeout = PFTM_TCP_FIRST_PACKET;
5876 			break;
5877 		case IPPROTO_UDP:
5878 			s->src.state = PFUDPS_SINGLE;
5879 			s->dst.state = PFUDPS_NO_TRAFFIC;
5880 			s->timeout = PFTM_UDP_FIRST_PACKET;
5881 			break;
5882 		case IPPROTO_ICMP:
5883 		case IPPROTO_ICMPV6:
5884 			s->timeout = PFTM_ICMP_FIRST_PACKET;
5885 			break;
5886 		case IPPROTO_GRE:
5887 			s->src.state = PFGRE1S_INITIATING;
5888 			s->dst.state = PFGRE1S_NO_TRAFFIC;
5889 			s->timeout = PFTM_GREv1_INITIATING;
5890 			break;
5891 		case IPPROTO_ESP:
5892 			s->src.state = PFESPS_INITIATING;
5893 			s->dst.state = PFESPS_NO_TRAFFIC;
5894 			s->timeout = PFTM_ESP_FIRST_PACKET;
5895 			break;
5896 		default:
5897 			s->src.state = PFOTHERS_SINGLE;
5898 			s->dst.state = PFOTHERS_NO_TRAFFIC;
5899 			s->timeout = PFTM_OTHER_FIRST_PACKET;
5900 		}
5901 
5902 		s->creation = pf_time_second();
5903 		s->expire = pf_time_second();
5904 
5905 		if (sn != NULL) {
5906 			s->src_node = sn;
5907 			s->src_node->states++;
5908 			VERIFY(s->src_node->states != 0);
5909 		}
5910 		if (nsn != NULL) {
5911 			PF_ACPY(&nsn->raddr, &pd->naddr, af);
5912 			s->nat_src_node = nsn;
5913 			s->nat_src_node->states++;
5914 			VERIFY(s->nat_src_node->states != 0);
5915 		}
5916 		if (pd->proto == IPPROTO_TCP) {
5917 			if ((pd->flags & PFDESC_TCP_NORM) &&
5918 			    pf_normalize_tcp_init(pbuf, off, pd, th, &s->src,
5919 			    &s->dst)) {
5920 				REASON_SET(&reason, PFRES_MEMORY);
5921 				pf_src_tree_remove_state(s);
5922 				STATE_DEC_COUNTERS(s);
5923 #if SKYWALK
5924 				netns_release(&nstoken);
5925 #endif
5926 				pool_put(&pf_state_pl, s);
5927 				return PF_DROP;
5928 			}
5929 			if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub &&
5930 			    pf_normalize_tcp_stateful(pbuf, off, pd, &reason,
5931 			    th, s, &s->src, &s->dst, &rewrite)) {
5932 				/* This really shouldn't happen!!! */
5933 				DPFPRINTF(PF_DEBUG_URGENT,
5934 				    ("pf_normalize_tcp_stateful failed on "
5935 				    "first pkt"));
5936 #if SKYWALK
5937 				netns_release(&nstoken);
5938 #endif
5939 				pf_normalize_tcp_cleanup(s);
5940 				pf_src_tree_remove_state(s);
5941 				STATE_DEC_COUNTERS(s);
5942 				pool_put(&pf_state_pl, s);
5943 				return PF_DROP;
5944 			}
5945 		}
5946 
5947 		/* allocate state key and import values from psk */
5948 		if (__improbable((sk = pf_alloc_state_key(s, &psk)) == NULL)) {
5949 			REASON_SET(&reason, PFRES_MEMORY);
5950 			/*
5951 			 * XXXSCW: This will leak the freshly-allocated
5952 			 * state structure 's'. Although it should
5953 			 * eventually be aged-out and removed.
5954 			 */
5955 			goto cleanup;
5956 		}
5957 
5958 		if (pd->flowhash == 0) {
5959 			ASSERT(sk->flowhash != 0);
5960 			ASSERT(sk->flowsrc != 0);
5961 			pd->flowsrc = sk->flowsrc;
5962 			pd->flowhash = sk->flowhash;
5963 			pd->pktflags |= PKTF_FLOW_ID;
5964 			pd->pktflags &= ~PKTF_FLOW_ADV;
5965 			if (__improbable(pf_tag_packet(pbuf, pd->pf_mtag,
5966 			    tag, rtableid, pd))) {
5967 				/*
5968 				 * this shouldn't fail as the packet tag has
5969 				 * already been allocated.
5970 				 */
5971 				panic_plain("pf_tag_packet failed");
5972 			}
5973 		}
5974 
5975 		pf_set_rt_ifp(s, saddr, af);    /* needs s->state_key set */
5976 
5977 		pbuf = pd->mp; // XXXSCW: Why?
5978 
5979 		if (sk->app_state == 0) {
5980 			switch (pd->proto) {
5981 			case IPPROTO_TCP: {
5982 				u_int16_t dport = (direction == PF_OUT) ?
5983 				    sk->ext_gwy.xport.port : sk->gwy.xport.port;
5984 
5985 				if (nr != NULL &&
5986 				    ntohs(dport) == PF_PPTP_PORT) {
5987 					struct pf_app_state *as;
5988 
5989 					as = pool_get(&pf_app_state_pl,
5990 					    PR_WAITOK);
5991 					if (!as) {
5992 						REASON_SET(&reason,
5993 						    PFRES_MEMORY);
5994 						goto cleanup;
5995 					}
5996 
5997 					bzero(as, sizeof(*as));
5998 					as->handler = pf_pptp_handler;
5999 					as->compare_lan_ext = 0;
6000 					as->compare_ext_gwy = 0;
6001 					as->u.pptp.grev1_state = 0;
6002 					sk->app_state = as;
6003 					(void) hook_establish(&s->unlink_hooks,
6004 					    0, (hook_fn_t) pf_pptp_unlink, s);
6005 				}
6006 				break;
6007 			}
6008 
6009 			case IPPROTO_UDP: {
6010 				if (nr != NULL &&
6011 				    ntohs(uh->uh_sport) == PF_IKE_PORT &&
6012 				    ntohs(uh->uh_dport) == PF_IKE_PORT) {
6013 					struct pf_app_state *as;
6014 
6015 					as = pool_get(&pf_app_state_pl,
6016 					    PR_WAITOK);
6017 					if (!as) {
6018 						REASON_SET(&reason,
6019 						    PFRES_MEMORY);
6020 						goto cleanup;
6021 					}
6022 
6023 					bzero(as, sizeof(*as));
6024 					as->compare_lan_ext = pf_ike_compare;
6025 					as->compare_ext_gwy = pf_ike_compare;
6026 					as->u.ike.cookie = ike.initiator_cookie;
6027 					sk->app_state = as;
6028 				}
6029 				break;
6030 			}
6031 
6032 			default:
6033 				break;
6034 			}
6035 		}
6036 
6037 		if (__improbable(pf_insert_state(BOUND_IFACE(r, kif), s))) {
6038 			if (pd->proto == IPPROTO_TCP) {
6039 				pf_normalize_tcp_cleanup(s);
6040 			}
6041 			REASON_SET(&reason, PFRES_STATEINS);
6042 			pf_src_tree_remove_state(s);
6043 			STATE_DEC_COUNTERS(s);
6044 #if SKYWALK
6045 			netns_release(&nstoken);
6046 #endif
6047 			pool_put(&pf_state_pl, s);
6048 			return PF_DROP;
6049 		} else {
6050 #if SKYWALK
6051 			s->nstoken = nstoken;
6052 			nstoken = NULL;
6053 #endif
6054 			*sm = s;
6055 		}
6056 		if (tag > 0) {
6057 			pf_tag_ref(tag);
6058 			s->tag = tag;
6059 		}
6060 		if (pd->proto == IPPROTO_TCP &&
6061 		    (th->th_flags & (TH_SYN | TH_ACK)) == TH_SYN &&
6062 		    r->keep_state == PF_STATE_SYNPROXY) {
6063 			int ua = (sk->af_lan == sk->af_gwy) ? 1 : 0;
6064 			s->src.state = PF_TCPS_PROXY_SRC;
6065 			if (nr != NULL) {
6066 				if (direction == PF_OUT) {
6067 					pf_change_ap(direction, pd->mp, saddr,
6068 					    &th->th_sport, pd->ip_sum,
6069 					    &th->th_sum, &pd->baddr,
6070 					    bxport.port, 0, af, pd->af, ua);
6071 					sxport.port = th->th_sport;
6072 				} else {
6073 					pf_change_ap(direction, pd->mp, daddr,
6074 					    &th->th_dport, pd->ip_sum,
6075 					    &th->th_sum, &pd->baddr,
6076 					    bxport.port, 0, af, pd->af, ua);
6077 					sxport.port = th->th_dport;
6078 				}
6079 			}
6080 			s->src.seqhi = htonl(random());
6081 			/* Find mss option */
6082 			mss = pf_get_mss(pbuf, off, th->th_off, af);
6083 			mss = pf_calc_mss(saddr, af, mss);
6084 			mss = pf_calc_mss(daddr, af, mss);
6085 			s->src.mss = mss;
6086 			pf_send_tcp(r, af, daddr, saddr, th->th_dport,
6087 			    th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
6088 			    TH_SYN | TH_ACK, 0, s->src.mss, 0, 1, 0, NULL, NULL);
6089 			REASON_SET(&reason, PFRES_SYNPROXY);
6090 			return PF_SYNPROXY_DROP;
6091 		}
6092 
6093 		if (sk->app_state && sk->app_state->handler) {
6094 			int offx = off;
6095 
6096 			switch (pd->proto) {
6097 			case IPPROTO_TCP:
6098 				offx += th->th_off << 2;
6099 				break;
6100 			case IPPROTO_UDP:
6101 				offx += pd->hdr.udp->uh_ulen << 2;
6102 				break;
6103 			default:
6104 				/* ALG handlers only apply to TCP and UDP rules */
6105 				break;
6106 			}
6107 
6108 			if (offx > off) {
6109 				sk->app_state->handler(s, direction, offx,
6110 				    pd, kif);
6111 				if (pd->lmw < 0) {
6112 					REASON_SET(&reason, PFRES_MEMORY);
6113 					return PF_DROP;
6114 				}
6115 				pbuf = pd->mp;  // XXXSCW: Why?
6116 			}
6117 		}
6118 	}
6119 #if SKYWALK
6120 	else {
6121 		netns_release(&nstoken);
6122 	}
6123 #endif
6124 
6125 	/* copy back packet headers if we performed NAT operations */
6126 	if (rewrite) {
6127 		if (rewrite < off + hdrlen) {
6128 			rewrite = off + hdrlen;
6129 		}
6130 
6131 		if (pf_lazy_makewritable(pd, pd->mp, rewrite) == NULL) {
6132 			REASON_SET(&reason, PFRES_MEMORY);
6133 			return PF_DROP;
6134 		}
6135 
6136 		pbuf_copy_back(pbuf, off, hdrlen, pd->hdr.any);
6137 		if (af == AF_INET6 && pd->naf == AF_INET) {
6138 			return pf_nat64_ipv6(pbuf, off, pd);
6139 		} else if (af == AF_INET && pd->naf == AF_INET6) {
6140 			return pf_nat64_ipv4(pbuf, off, pd);
6141 		}
6142 	}
6143 
6144 	return PF_PASS;
6145 }
6146 
6147 boolean_t is_nlc_enabled_glb = FALSE;
6148 
6149 static inline boolean_t
pf_is_dummynet_enabled(void)6150 pf_is_dummynet_enabled(void)
6151 {
6152 #if DUMMYNET
6153 	if (__probable(!PF_IS_ENABLED)) {
6154 		return FALSE;
6155 	}
6156 
6157 	if (__probable(!DUMMYNET_LOADED)) {
6158 		return FALSE;
6159 	}
6160 
6161 	if (__probable(TAILQ_EMPTY(pf_main_ruleset.
6162 	    rules[PF_RULESET_DUMMYNET].active.ptr))) {
6163 		return FALSE;
6164 	}
6165 
6166 	return TRUE;
6167 #else
6168 	return FALSE;
6169 #endif /* DUMMYNET */
6170 }
6171 
6172 #if DUMMYNET
6173 /*
6174  * When pf_test_dummynet() returns PF_PASS, the rule matching parameter "rm"
6175  * remains unchanged, meaning the packet did not match a dummynet rule.
6176  * when the packet does match a dummynet rule, pf_test_dummynet() returns
6177  * PF_PASS and zero out the mbuf rule as the packet is effectively siphoned
6178  * out by dummynet.
6179  */
6180 static __attribute__((noinline)) int
pf_test_dummynet(struct pf_rule ** rm,int direction,struct pfi_kif * kif,pbuf_t ** pbuf0,struct pf_pdesc * pd,struct ip_fw_args * fwa)6181 pf_test_dummynet(struct pf_rule **rm, int direction, struct pfi_kif *kif,
6182     pbuf_t **pbuf0, struct pf_pdesc *pd, struct ip_fw_args *fwa)
6183 {
6184 	pbuf_t                  *pbuf = *pbuf0;
6185 	struct pf_rule          *am = NULL;
6186 	struct pf_ruleset       *rsm = NULL;
6187 	struct pf_addr          *saddr = pd->src, *daddr = pd->dst;
6188 	sa_family_t              af = pd->af;
6189 	struct pf_rule          *r, *a = NULL;
6190 	struct pf_ruleset       *ruleset = NULL;
6191 	struct tcphdr           *th = pd->hdr.tcp;
6192 	u_short                  reason;
6193 	int                      hdrlen = 0;
6194 	int                      tag = -1;
6195 	unsigned int             rtableid = IFSCOPE_NONE;
6196 	int                      asd = 0;
6197 	int                      match = 0;
6198 	u_int8_t                 icmptype = 0, icmpcode = 0;
6199 	struct ip_fw_args       dnflow;
6200 	struct pf_rule          *prev_matching_rule = fwa ? fwa->fwa_pf_rule : NULL;
6201 	int                     found_prev_rule = (prev_matching_rule) ? 0 : 1;
6202 
6203 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
6204 
6205 	if (!pf_is_dummynet_enabled()) {
6206 		return PF_PASS;
6207 	}
6208 
6209 	if (kif->pfik_ifp->if_xflags & IFXF_NO_TRAFFIC_SHAPING) {
6210 		return PF_PASS;
6211 	}
6212 
6213 	bzero(&dnflow, sizeof(dnflow));
6214 
6215 	hdrlen = 0;
6216 
6217 	/* Fragments don't gave protocol headers */
6218 	if (!(pd->flags & PFDESC_IP_FRAG)) {
6219 		switch (pd->proto) {
6220 		case IPPROTO_TCP:
6221 			dnflow.fwa_id.flags = pd->hdr.tcp->th_flags;
6222 			dnflow.fwa_id.dst_port = ntohs(pd->hdr.tcp->th_dport);
6223 			dnflow.fwa_id.src_port = ntohs(pd->hdr.tcp->th_sport);
6224 			hdrlen = sizeof(*th);
6225 			break;
6226 		case IPPROTO_UDP:
6227 			dnflow.fwa_id.dst_port = ntohs(pd->hdr.udp->uh_dport);
6228 			dnflow.fwa_id.src_port = ntohs(pd->hdr.udp->uh_sport);
6229 			hdrlen = sizeof(*pd->hdr.udp);
6230 			break;
6231 #if INET
6232 		case IPPROTO_ICMP:
6233 			if (af != AF_INET) {
6234 				break;
6235 			}
6236 			hdrlen = ICMP_MINLEN;
6237 			icmptype = pd->hdr.icmp->icmp_type;
6238 			icmpcode = pd->hdr.icmp->icmp_code;
6239 			break;
6240 #endif /* INET */
6241 		case IPPROTO_ICMPV6:
6242 			if (af != AF_INET6) {
6243 				break;
6244 			}
6245 			hdrlen = sizeof(*pd->hdr.icmp6);
6246 			icmptype = pd->hdr.icmp6->icmp6_type;
6247 			icmpcode = pd->hdr.icmp6->icmp6_code;
6248 			break;
6249 		case IPPROTO_GRE:
6250 			if (pd->proto_variant == PF_GRE_PPTP_VARIANT) {
6251 				hdrlen = sizeof(*pd->hdr.grev1);
6252 			}
6253 			break;
6254 		case IPPROTO_ESP:
6255 			hdrlen = sizeof(*pd->hdr.esp);
6256 			break;
6257 		}
6258 	}
6259 
6260 	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_DUMMYNET].active.ptr);
6261 
6262 	while (r != NULL) {
6263 		r->evaluations++;
6264 		if (pfi_kif_match(r->kif, kif) == r->ifnot) {
6265 			r = r->skip[PF_SKIP_IFP].ptr;
6266 		} else if (r->direction && r->direction != direction) {
6267 			r = r->skip[PF_SKIP_DIR].ptr;
6268 		} else if (r->af && r->af != af) {
6269 			r = r->skip[PF_SKIP_AF].ptr;
6270 		} else if (r->proto && r->proto != pd->proto) {
6271 			r = r->skip[PF_SKIP_PROTO].ptr;
6272 		} else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
6273 		    r->src.neg, kif)) {
6274 			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
6275 		}
6276 		/* tcp/udp only. port_op always 0 in other cases */
6277 		else if (r->proto == pd->proto &&
6278 		    (r->proto == IPPROTO_TCP || r->proto == IPPROTO_UDP) &&
6279 		    ((pd->flags & PFDESC_IP_FRAG) ||
6280 		    ((r->src.xport.range.op &&
6281 		    !pf_match_port(r->src.xport.range.op,
6282 		    r->src.xport.range.port[0], r->src.xport.range.port[1],
6283 		    th->th_sport))))) {
6284 			r = r->skip[PF_SKIP_SRC_PORT].ptr;
6285 		} else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
6286 		    r->dst.neg, NULL)) {
6287 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
6288 		}
6289 		/* tcp/udp only. port_op always 0 in other cases */
6290 		else if (r->proto == pd->proto &&
6291 		    (r->proto == IPPROTO_TCP || r->proto == IPPROTO_UDP) &&
6292 		    r->dst.xport.range.op &&
6293 		    ((pd->flags & PFDESC_IP_FRAG) ||
6294 		    !pf_match_port(r->dst.xport.range.op,
6295 		    r->dst.xport.range.port[0], r->dst.xport.range.port[1],
6296 		    th->th_dport))) {
6297 			r = r->skip[PF_SKIP_DST_PORT].ptr;
6298 		}
6299 		/* icmp only. type always 0 in other cases */
6300 		else if (r->type &&
6301 		    ((pd->flags & PFDESC_IP_FRAG) ||
6302 		    r->type != icmptype + 1)) {
6303 			r = TAILQ_NEXT(r, entries);
6304 		}
6305 		/* icmp only. type always 0 in other cases */
6306 		else if (r->code &&
6307 		    ((pd->flags & PFDESC_IP_FRAG) ||
6308 		    r->code != icmpcode + 1)) {
6309 			r = TAILQ_NEXT(r, entries);
6310 		} else if (r->tos && !(r->tos == pd->tos)) {
6311 			r = TAILQ_NEXT(r, entries);
6312 		} else if (r->rule_flag & PFRULE_FRAGMENT) {
6313 			r = TAILQ_NEXT(r, entries);
6314 		} else if (pd->proto == IPPROTO_TCP &&
6315 		    ((pd->flags & PFDESC_IP_FRAG) ||
6316 		    (r->flagset & th->th_flags) != r->flags)) {
6317 			r = TAILQ_NEXT(r, entries);
6318 		} else if (r->prob && r->prob <= (RandomULong() % (UINT_MAX - 1) + 1)) {
6319 			r = TAILQ_NEXT(r, entries);
6320 		} else if (r->match_tag && !pf_match_tag(r, pd->pf_mtag, &tag)) {
6321 			r = TAILQ_NEXT(r, entries);
6322 		} else {
6323 			/*
6324 			 * Need to go past the previous dummynet matching rule
6325 			 */
6326 			if (r->anchor == NULL) {
6327 				if (found_prev_rule) {
6328 					if (r->tag) {
6329 						tag = r->tag;
6330 					}
6331 					if (PF_RTABLEID_IS_VALID(r->rtableid)) {
6332 						rtableid = r->rtableid;
6333 					}
6334 					match = 1;
6335 					*rm = r;
6336 					am = a;
6337 					rsm = ruleset;
6338 					if ((*rm)->quick) {
6339 						break;
6340 					}
6341 				} else if (r == prev_matching_rule) {
6342 					found_prev_rule = 1;
6343 				}
6344 				r = TAILQ_NEXT(r, entries);
6345 			} else {
6346 				pf_step_into_anchor(&asd, &ruleset,
6347 				    PF_RULESET_DUMMYNET, &r, &a, &match);
6348 			}
6349 		}
6350 		if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
6351 		    PF_RULESET_DUMMYNET, &r, &a, &match)) {
6352 			break;
6353 		}
6354 	}
6355 	r = *rm;
6356 	a = am;
6357 	ruleset = rsm;
6358 
6359 	if (!match) {
6360 		return PF_PASS;
6361 	}
6362 
6363 	REASON_SET(&reason, PFRES_DUMMYNET);
6364 
6365 	if (r->log) {
6366 		PFLOG_PACKET(kif, h, pbuf, af, direction, reason, r,
6367 		    a, ruleset, pd);
6368 	}
6369 
6370 	if (r->action == PF_NODUMMYNET) {
6371 		int dirndx = (direction == PF_OUT);
6372 
6373 		r->packets[dirndx]++;
6374 		r->bytes[dirndx] += pd->tot_len;
6375 
6376 		return PF_PASS;
6377 	}
6378 	if (pf_tag_packet(pbuf, pd->pf_mtag, tag, rtableid, pd)) {
6379 		REASON_SET(&reason, PFRES_MEMORY);
6380 
6381 		return PF_DROP;
6382 	}
6383 
6384 	if (r->dnpipe && ip_dn_io_ptr != NULL) {
6385 		struct mbuf *m;
6386 		int dirndx = (direction == PF_OUT);
6387 
6388 		r->packets[dirndx]++;
6389 		r->bytes[dirndx] += pd->tot_len;
6390 
6391 		dnflow.fwa_cookie = r->dnpipe;
6392 		dnflow.fwa_pf_rule = r;
6393 		dnflow.fwa_id.proto = pd->proto;
6394 		dnflow.fwa_flags = r->dntype;
6395 		switch (af) {
6396 		case AF_INET:
6397 			dnflow.fwa_id.addr_type = 4;
6398 			dnflow.fwa_id.src_ip = ntohl(saddr->v4addr.s_addr);
6399 			dnflow.fwa_id.dst_ip = ntohl(daddr->v4addr.s_addr);
6400 			break;
6401 		case AF_INET6:
6402 			dnflow.fwa_id.addr_type = 6;
6403 			dnflow.fwa_id.src_ip6 = saddr->v6addr;
6404 			dnflow.fwa_id.dst_ip6 = saddr->v6addr;
6405 			break;
6406 		}
6407 
6408 		if (fwa != NULL) {
6409 			dnflow.fwa_oif = fwa->fwa_oif;
6410 			dnflow.fwa_oflags = fwa->fwa_oflags;
6411 			/*
6412 			 * Note that fwa_ro, fwa_dst and fwa_ipoa are
6413 			 * actually in a union so the following does work
6414 			 * for both IPv4 and IPv6
6415 			 */
6416 			dnflow.fwa_ro = fwa->fwa_ro;
6417 			dnflow.fwa_dst = fwa->fwa_dst;
6418 			dnflow.fwa_ipoa = fwa->fwa_ipoa;
6419 			dnflow.fwa_ro6_pmtu = fwa->fwa_ro6_pmtu;
6420 			dnflow.fwa_origifp = fwa->fwa_origifp;
6421 			dnflow.fwa_mtu = fwa->fwa_mtu;
6422 			dnflow.fwa_unfragpartlen = fwa->fwa_unfragpartlen;
6423 			dnflow.fwa_exthdrs = fwa->fwa_exthdrs;
6424 		}
6425 
6426 		if (af == AF_INET) {
6427 			struct ip *iphdr = pbuf->pb_data;
6428 			NTOHS(iphdr->ip_len);
6429 			NTOHS(iphdr->ip_off);
6430 		}
6431 		/*
6432 		 * Don't need to unlock pf_lock as NET_THREAD_HELD_PF
6433 		 * allows for recursive behavior
6434 		 */
6435 		m = pbuf_to_mbuf(pbuf, TRUE);
6436 		if (m != NULL) {
6437 			ip_dn_io_ptr(m,
6438 			    dnflow.fwa_cookie, (af == AF_INET) ?
6439 			    ((direction == PF_IN) ? DN_TO_IP_IN : DN_TO_IP_OUT) :
6440 			    ((direction == PF_IN) ? DN_TO_IP6_IN : DN_TO_IP6_OUT),
6441 			    &dnflow);
6442 		}
6443 
6444 		/*
6445 		 * The packet is siphoned out by dummynet so return a NULL
6446 		 * pbuf so the caller can still return success.
6447 		 */
6448 		*pbuf0 = NULL;
6449 
6450 		return PF_PASS;
6451 	}
6452 
6453 	return PF_PASS;
6454 }
6455 #endif /* DUMMYNET */
6456 
6457 static __attribute__((noinline)) int
pf_test_fragment(struct pf_rule ** rm,int direction,struct pfi_kif * kif,pbuf_t * pbuf,void * h,struct pf_pdesc * pd,struct pf_rule ** am,struct pf_ruleset ** rsm)6458 pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif,
6459     pbuf_t *pbuf, void *h, struct pf_pdesc *pd, struct pf_rule **am,
6460     struct pf_ruleset **rsm)
6461 {
6462 #pragma unused(h)
6463 	struct pf_rule          *r, *a = NULL;
6464 	struct pf_ruleset       *ruleset = NULL;
6465 	sa_family_t              af = pd->af;
6466 	u_short                  reason;
6467 	int                      tag = -1;
6468 	int                      asd = 0;
6469 	int                      match = 0;
6470 
6471 	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
6472 	while (r != NULL) {
6473 		r->evaluations++;
6474 		if (pfi_kif_match(r->kif, kif) == r->ifnot) {
6475 			r = r->skip[PF_SKIP_IFP].ptr;
6476 		} else if (r->direction && r->direction != direction) {
6477 			r = r->skip[PF_SKIP_DIR].ptr;
6478 		} else if (r->af && r->af != af) {
6479 			r = r->skip[PF_SKIP_AF].ptr;
6480 		} else if (r->proto && r->proto != pd->proto) {
6481 			r = r->skip[PF_SKIP_PROTO].ptr;
6482 		} else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
6483 		    r->src.neg, kif)) {
6484 			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
6485 		} else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
6486 		    r->dst.neg, NULL)) {
6487 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
6488 		} else if ((r->rule_flag & PFRULE_TOS) && r->tos &&
6489 		    !(r->tos & pd->tos)) {
6490 			r = TAILQ_NEXT(r, entries);
6491 		} else if ((r->rule_flag & PFRULE_DSCP) && r->tos &&
6492 		    !(r->tos & (pd->tos & DSCP_MASK))) {
6493 			r = TAILQ_NEXT(r, entries);
6494 		} else if ((r->rule_flag & PFRULE_SC) && r->tos &&
6495 		    ((r->tos & SCIDX_MASK) != pd->sc)) {
6496 			r = TAILQ_NEXT(r, entries);
6497 		} else if (r->os_fingerprint != PF_OSFP_ANY) {
6498 			r = TAILQ_NEXT(r, entries);
6499 		} else if (pd->proto == IPPROTO_UDP &&
6500 		    (r->src.xport.range.op || r->dst.xport.range.op)) {
6501 			r = TAILQ_NEXT(r, entries);
6502 		} else if (pd->proto == IPPROTO_TCP &&
6503 		    (r->src.xport.range.op || r->dst.xport.range.op ||
6504 		    r->flagset)) {
6505 			r = TAILQ_NEXT(r, entries);
6506 		} else if ((pd->proto == IPPROTO_ICMP ||
6507 		    pd->proto == IPPROTO_ICMPV6) &&
6508 		    (r->type || r->code)) {
6509 			r = TAILQ_NEXT(r, entries);
6510 		} else if (r->prob && r->prob <= (RandomULong() % (UINT_MAX - 1) + 1)) {
6511 			r = TAILQ_NEXT(r, entries);
6512 		} else if (r->match_tag && !pf_match_tag(r, pd->pf_mtag, &tag)) {
6513 			r = TAILQ_NEXT(r, entries);
6514 		} else {
6515 			if (r->anchor == NULL) {
6516 				match = 1;
6517 				*rm = r;
6518 				*am = a;
6519 				*rsm = ruleset;
6520 				if ((*rm)->quick) {
6521 					break;
6522 				}
6523 				r = TAILQ_NEXT(r, entries);
6524 			} else {
6525 				pf_step_into_anchor(&asd, &ruleset,
6526 				    PF_RULESET_FILTER, &r, &a, &match);
6527 			}
6528 		}
6529 		if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
6530 		    PF_RULESET_FILTER, &r, &a, &match)) {
6531 			break;
6532 		}
6533 	}
6534 	r = *rm;
6535 	a = *am;
6536 	ruleset = *rsm;
6537 
6538 	REASON_SET(&reason, PFRES_MATCH);
6539 
6540 	if (r->log) {
6541 		PFLOG_PACKET(kif, h, pbuf, af, direction, reason, r, a, ruleset,
6542 		    pd);
6543 	}
6544 
6545 	if (r->action != PF_PASS) {
6546 		return PF_DROP;
6547 	}
6548 
6549 	if (pf_tag_packet(pbuf, pd->pf_mtag, tag, -1, NULL)) {
6550 		REASON_SET(&reason, PFRES_MEMORY);
6551 		return PF_DROP;
6552 	}
6553 
6554 	return PF_PASS;
6555 }
6556 
6557 static __attribute__((noinline)) void
pf_pptp_handler(struct pf_state * s,int direction,int off,struct pf_pdesc * pd,struct pfi_kif * kif)6558 pf_pptp_handler(struct pf_state *s, int direction, int off,
6559     struct pf_pdesc *pd, struct pfi_kif *kif)
6560 {
6561 #pragma unused(direction)
6562 	struct tcphdr *th;
6563 	struct pf_pptp_state *pptps;
6564 	struct pf_pptp_ctrl_msg cm;
6565 	size_t plen, tlen;
6566 	struct pf_state *gs;
6567 	u_int16_t ct;
6568 	u_int16_t *pac_call_id;
6569 	u_int16_t *pns_call_id;
6570 	u_int16_t *spoof_call_id;
6571 	u_int8_t *pac_state;
6572 	u_int8_t *pns_state;
6573 	enum { PF_PPTP_PASS, PF_PPTP_INSERT_GRE, PF_PPTP_REMOVE_GRE } op;
6574 	pbuf_t *pbuf;
6575 	struct pf_state_key *sk;
6576 	struct pf_state_key *gsk;
6577 	struct pf_app_state *gas;
6578 
6579 	sk = s->state_key;
6580 	pptps = &sk->app_state->u.pptp;
6581 	gs = pptps->grev1_state;
6582 
6583 	if (gs) {
6584 		gs->expire = pf_time_second();
6585 	}
6586 
6587 	pbuf = pd->mp;
6588 	plen = min(sizeof(cm), pbuf->pb_packet_len - off);
6589 	if (plen < PF_PPTP_CTRL_MSG_MINSIZE) {
6590 		return;
6591 	}
6592 	tlen = plen - PF_PPTP_CTRL_MSG_MINSIZE;
6593 	pbuf_copy_data(pbuf, off, plen, &cm);
6594 
6595 	if (ntohl(cm.hdr.magic) != PF_PPTP_MAGIC_NUMBER) {
6596 		return;
6597 	}
6598 	if (ntohs(cm.hdr.type) != 1) {
6599 		return;
6600 	}
6601 
6602 #define TYPE_LEN_CHECK(_type, _name)                            \
6603 	case PF_PPTP_CTRL_TYPE_##_type:                         \
6604 	        if (tlen < sizeof(struct pf_pptp_ctrl_##_name)) \
6605 	                return;                                 \
6606 	        break;
6607 
6608 	switch (cm.ctrl.type) {
6609 		TYPE_LEN_CHECK(START_REQ, start_req);
6610 		TYPE_LEN_CHECK(START_RPY, start_rpy);
6611 		TYPE_LEN_CHECK(STOP_REQ, stop_req);
6612 		TYPE_LEN_CHECK(STOP_RPY, stop_rpy);
6613 		TYPE_LEN_CHECK(ECHO_REQ, echo_req);
6614 		TYPE_LEN_CHECK(ECHO_RPY, echo_rpy);
6615 		TYPE_LEN_CHECK(CALL_OUT_REQ, call_out_req);
6616 		TYPE_LEN_CHECK(CALL_OUT_RPY, call_out_rpy);
6617 		TYPE_LEN_CHECK(CALL_IN_1ST, call_in_1st);
6618 		TYPE_LEN_CHECK(CALL_IN_2ND, call_in_2nd);
6619 		TYPE_LEN_CHECK(CALL_IN_3RD, call_in_3rd);
6620 		TYPE_LEN_CHECK(CALL_CLR, call_clr);
6621 		TYPE_LEN_CHECK(CALL_DISC, call_disc);
6622 		TYPE_LEN_CHECK(ERROR, error);
6623 		TYPE_LEN_CHECK(SET_LINKINFO, set_linkinfo);
6624 	default:
6625 		return;
6626 	}
6627 #undef TYPE_LEN_CHECK
6628 
6629 	if (!gs) {
6630 		gs = pool_get(&pf_state_pl, PR_WAITOK);
6631 		if (!gs) {
6632 			return;
6633 		}
6634 
6635 		memcpy(gs, s, sizeof(*gs));
6636 
6637 		memset(&gs->entry_id, 0, sizeof(gs->entry_id));
6638 		memset(&gs->entry_list, 0, sizeof(gs->entry_list));
6639 
6640 		TAILQ_INIT(&gs->unlink_hooks);
6641 		gs->rt_kif = NULL;
6642 		gs->creation = 0;
6643 		gs->pfsync_time = 0;
6644 		gs->packets[0] = gs->packets[1] = 0;
6645 		gs->bytes[0] = gs->bytes[1] = 0;
6646 		gs->timeout = PFTM_UNLINKED;
6647 		gs->id = gs->creatorid = 0;
6648 		gs->src.state = gs->dst.state = PFGRE1S_NO_TRAFFIC;
6649 		gs->src.scrub = gs->dst.scrub = 0;
6650 
6651 		gas = pool_get(&pf_app_state_pl, PR_NOWAIT);
6652 		if (!gas) {
6653 			pool_put(&pf_state_pl, gs);
6654 			return;
6655 		}
6656 
6657 		gsk = pf_alloc_state_key(gs, NULL);
6658 		if (!gsk) {
6659 			pool_put(&pf_app_state_pl, gas);
6660 			pool_put(&pf_state_pl, gs);
6661 			return;
6662 		}
6663 
6664 		memcpy(&gsk->lan, &sk->lan, sizeof(gsk->lan));
6665 		memcpy(&gsk->gwy, &sk->gwy, sizeof(gsk->gwy));
6666 		memcpy(&gsk->ext_lan, &sk->ext_lan, sizeof(gsk->ext_lan));
6667 		memcpy(&gsk->ext_gwy, &sk->ext_gwy, sizeof(gsk->ext_gwy));
6668 		gsk->af_lan = sk->af_lan;
6669 		gsk->af_gwy = sk->af_gwy;
6670 		gsk->proto = IPPROTO_GRE;
6671 		gsk->proto_variant = PF_GRE_PPTP_VARIANT;
6672 		gsk->app_state = gas;
6673 		gsk->lan.xport.call_id = 0;
6674 		gsk->gwy.xport.call_id = 0;
6675 		gsk->ext_lan.xport.call_id = 0;
6676 		gsk->ext_gwy.xport.call_id = 0;
6677 		ASSERT(gsk->flowsrc == FLOWSRC_PF);
6678 		ASSERT(gsk->flowhash != 0);
6679 		memset(gas, 0, sizeof(*gas));
6680 		gas->u.grev1.pptp_state = s;
6681 		STATE_INC_COUNTERS(gs);
6682 		pptps->grev1_state = gs;
6683 		(void) hook_establish(&gs->unlink_hooks, 0,
6684 		    (hook_fn_t) pf_grev1_unlink, gs);
6685 	} else {
6686 		gsk = gs->state_key;
6687 	}
6688 
6689 	switch (sk->direction) {
6690 	case PF_IN:
6691 		pns_call_id = &gsk->ext_lan.xport.call_id;
6692 		pns_state = &gs->dst.state;
6693 		pac_call_id = &gsk->lan.xport.call_id;
6694 		pac_state = &gs->src.state;
6695 		break;
6696 
6697 	case PF_OUT:
6698 		pns_call_id = &gsk->lan.xport.call_id;
6699 		pns_state = &gs->src.state;
6700 		pac_call_id = &gsk->ext_lan.xport.call_id;
6701 		pac_state = &gs->dst.state;
6702 		break;
6703 
6704 	default:
6705 		DPFPRINTF(PF_DEBUG_URGENT,
6706 		    ("pf_pptp_handler: bad directional!\n"));
6707 		return;
6708 	}
6709 
6710 	spoof_call_id = 0;
6711 	op = PF_PPTP_PASS;
6712 
6713 	ct = ntohs(cm.ctrl.type);
6714 
6715 	switch (ct) {
6716 	case PF_PPTP_CTRL_TYPE_CALL_OUT_REQ:
6717 		*pns_call_id = cm.msg.call_out_req.call_id;
6718 		*pns_state = PFGRE1S_INITIATING;
6719 		if (s->nat_rule.ptr && pns_call_id == &gsk->lan.xport.call_id) {
6720 			spoof_call_id = &cm.msg.call_out_req.call_id;
6721 		}
6722 		break;
6723 
6724 	case PF_PPTP_CTRL_TYPE_CALL_OUT_RPY:
6725 		*pac_call_id = cm.msg.call_out_rpy.call_id;
6726 		if (s->nat_rule.ptr) {
6727 			spoof_call_id =
6728 			    (pac_call_id == &gsk->lan.xport.call_id) ?
6729 			    &cm.msg.call_out_rpy.call_id :
6730 			    &cm.msg.call_out_rpy.peer_call_id;
6731 		}
6732 		if (gs->timeout == PFTM_UNLINKED) {
6733 			*pac_state = PFGRE1S_INITIATING;
6734 			op = PF_PPTP_INSERT_GRE;
6735 		}
6736 		break;
6737 
6738 	case PF_PPTP_CTRL_TYPE_CALL_IN_1ST:
6739 		*pns_call_id = cm.msg.call_in_1st.call_id;
6740 		*pns_state = PFGRE1S_INITIATING;
6741 		if (s->nat_rule.ptr && pns_call_id == &gsk->lan.xport.call_id) {
6742 			spoof_call_id = &cm.msg.call_in_1st.call_id;
6743 		}
6744 		break;
6745 
6746 	case PF_PPTP_CTRL_TYPE_CALL_IN_2ND:
6747 		*pac_call_id = cm.msg.call_in_2nd.call_id;
6748 		*pac_state = PFGRE1S_INITIATING;
6749 		if (s->nat_rule.ptr) {
6750 			spoof_call_id =
6751 			    (pac_call_id == &gsk->lan.xport.call_id) ?
6752 			    &cm.msg.call_in_2nd.call_id :
6753 			    &cm.msg.call_in_2nd.peer_call_id;
6754 		}
6755 		break;
6756 
6757 	case PF_PPTP_CTRL_TYPE_CALL_IN_3RD:
6758 		if (s->nat_rule.ptr && pns_call_id == &gsk->lan.xport.call_id) {
6759 			spoof_call_id = &cm.msg.call_in_3rd.call_id;
6760 		}
6761 		if (cm.msg.call_in_3rd.call_id != *pns_call_id) {
6762 			break;
6763 		}
6764 		if (gs->timeout == PFTM_UNLINKED) {
6765 			op = PF_PPTP_INSERT_GRE;
6766 		}
6767 		break;
6768 
6769 	case PF_PPTP_CTRL_TYPE_CALL_CLR:
6770 		if (cm.msg.call_clr.call_id != *pns_call_id) {
6771 			op = PF_PPTP_REMOVE_GRE;
6772 		}
6773 		break;
6774 
6775 	case PF_PPTP_CTRL_TYPE_CALL_DISC:
6776 		if (cm.msg.call_clr.call_id != *pac_call_id) {
6777 			op = PF_PPTP_REMOVE_GRE;
6778 		}
6779 		break;
6780 
6781 	case PF_PPTP_CTRL_TYPE_ERROR:
6782 		if (s->nat_rule.ptr && pns_call_id == &gsk->lan.xport.call_id) {
6783 			spoof_call_id = &cm.msg.error.peer_call_id;
6784 		}
6785 		break;
6786 
6787 	case PF_PPTP_CTRL_TYPE_SET_LINKINFO:
6788 		if (s->nat_rule.ptr && pac_call_id == &gsk->lan.xport.call_id) {
6789 			spoof_call_id = &cm.msg.set_linkinfo.peer_call_id;
6790 		}
6791 		break;
6792 
6793 	default:
6794 		op = PF_PPTP_PASS;
6795 		break;
6796 	}
6797 
6798 	if (!gsk->gwy.xport.call_id && gsk->lan.xport.call_id) {
6799 		gsk->gwy.xport.call_id = gsk->lan.xport.call_id;
6800 		if (spoof_call_id) {
6801 			u_int16_t call_id = 0;
6802 			int n = 0;
6803 			struct pf_state_key_cmp key;
6804 
6805 			key.af_gwy = gsk->af_gwy;
6806 			key.proto = IPPROTO_GRE;
6807 			key.proto_variant = PF_GRE_PPTP_VARIANT;
6808 			PF_ACPY(&key.gwy.addr, &gsk->gwy.addr, key.af_gwy);
6809 			PF_ACPY(&key.ext_gwy.addr, &gsk->ext_gwy.addr, key.af_gwy);
6810 			key.gwy.xport.call_id = gsk->gwy.xport.call_id;
6811 			key.ext_gwy.xport.call_id = gsk->ext_gwy.xport.call_id;
6812 			do {
6813 				call_id = htonl(random());
6814 			} while (!call_id);
6815 
6816 			while (pf_find_state_all(&key, PF_IN, 0)) {
6817 				call_id = ntohs(call_id);
6818 				--call_id;
6819 				if (--call_id == 0) {
6820 					call_id = 0xffff;
6821 				}
6822 				call_id = htons(call_id);
6823 
6824 				key.gwy.xport.call_id = call_id;
6825 
6826 				if (++n > 65535) {
6827 					DPFPRINTF(PF_DEBUG_URGENT,
6828 					    ("pf_pptp_handler: failed to spoof "
6829 					    "call id\n"));
6830 					key.gwy.xport.call_id = 0;
6831 					break;
6832 				}
6833 			}
6834 
6835 			gsk->gwy.xport.call_id = call_id;
6836 		}
6837 	}
6838 
6839 	th = pd->hdr.tcp;
6840 
6841 	if (spoof_call_id && gsk->lan.xport.call_id != gsk->gwy.xport.call_id) {
6842 		if (*spoof_call_id == gsk->gwy.xport.call_id) {
6843 			*spoof_call_id = gsk->lan.xport.call_id;
6844 			th->th_sum = pf_cksum_fixup(th->th_sum,
6845 			    gsk->gwy.xport.call_id, gsk->lan.xport.call_id, 0);
6846 		} else {
6847 			*spoof_call_id = gsk->gwy.xport.call_id;
6848 			th->th_sum = pf_cksum_fixup(th->th_sum,
6849 			    gsk->lan.xport.call_id, gsk->gwy.xport.call_id, 0);
6850 		}
6851 
6852 		if (pf_lazy_makewritable(pd, pbuf, off + plen) == NULL) {
6853 			pptps->grev1_state = NULL;
6854 			STATE_DEC_COUNTERS(gs);
6855 			pool_put(&pf_state_pl, gs);
6856 			return;
6857 		}
6858 		pbuf_copy_back(pbuf, off, plen, &cm);
6859 	}
6860 
6861 	switch (op) {
6862 	case PF_PPTP_REMOVE_GRE:
6863 		gs->timeout = PFTM_PURGE;
6864 		gs->src.state = gs->dst.state = PFGRE1S_NO_TRAFFIC;
6865 		gsk->lan.xport.call_id = 0;
6866 		gsk->gwy.xport.call_id = 0;
6867 		gsk->ext_lan.xport.call_id = 0;
6868 		gsk->ext_gwy.xport.call_id = 0;
6869 		gs->id = gs->creatorid = 0;
6870 		break;
6871 
6872 	case PF_PPTP_INSERT_GRE:
6873 		gs->creation = pf_time_second();
6874 		gs->expire = pf_time_second();
6875 		gs->timeout = PFTM_TCP_ESTABLISHED;
6876 		if (gs->src_node != NULL) {
6877 			++gs->src_node->states;
6878 			VERIFY(gs->src_node->states != 0);
6879 		}
6880 		if (gs->nat_src_node != NULL) {
6881 			++gs->nat_src_node->states;
6882 			VERIFY(gs->nat_src_node->states != 0);
6883 		}
6884 		pf_set_rt_ifp(gs, &sk->lan.addr, sk->af_lan);
6885 		if (pf_insert_state(BOUND_IFACE(s->rule.ptr, kif), gs)) {
6886 			/*
6887 			 * <[email protected]>
6888 			 * FIX ME: insertion can fail when multiple PNS
6889 			 * behind the same NAT open calls to the same PAC
6890 			 * simultaneously because spoofed call ID numbers
6891 			 * are chosen before states are inserted.  This is
6892 			 * hard to fix and happens infrequently enough that
6893 			 * users will normally try again and this ALG will
6894 			 * succeed.  Failures are expected to be rare enough
6895 			 * that fixing this is a low priority.
6896 			 */
6897 			pptps->grev1_state = NULL;
6898 			pd->lmw = -1;   /* Force PF_DROP on PFRES_MEMORY */
6899 			pf_src_tree_remove_state(gs);
6900 			STATE_DEC_COUNTERS(gs);
6901 			pool_put(&pf_state_pl, gs);
6902 			DPFPRINTF(PF_DEBUG_URGENT, ("pf_pptp_handler: error "
6903 			    "inserting GREv1 state.\n"));
6904 		}
6905 		break;
6906 
6907 	default:
6908 		break;
6909 	}
6910 }
6911 
6912 static __attribute__((noinline)) void
pf_pptp_unlink(struct pf_state * s)6913 pf_pptp_unlink(struct pf_state *s)
6914 {
6915 	struct pf_app_state *as = s->state_key->app_state;
6916 	struct pf_state *grev1s = as->u.pptp.grev1_state;
6917 
6918 	if (grev1s) {
6919 		struct pf_app_state *gas = grev1s->state_key->app_state;
6920 
6921 		if (grev1s->timeout < PFTM_MAX) {
6922 			grev1s->timeout = PFTM_PURGE;
6923 		}
6924 		gas->u.grev1.pptp_state = NULL;
6925 		as->u.pptp.grev1_state = NULL;
6926 	}
6927 }
6928 
6929 static __attribute__((noinline)) void
pf_grev1_unlink(struct pf_state * s)6930 pf_grev1_unlink(struct pf_state *s)
6931 {
6932 	struct pf_app_state *as = s->state_key->app_state;
6933 	struct pf_state *pptps = as->u.grev1.pptp_state;
6934 
6935 	if (pptps) {
6936 		struct pf_app_state *pas = pptps->state_key->app_state;
6937 
6938 		pas->u.pptp.grev1_state = NULL;
6939 		as->u.grev1.pptp_state = NULL;
6940 	}
6941 }
6942 
6943 static int
pf_ike_compare(struct pf_app_state * a,struct pf_app_state * b)6944 pf_ike_compare(struct pf_app_state *a, struct pf_app_state *b)
6945 {
6946 	int64_t d = a->u.ike.cookie - b->u.ike.cookie;
6947 	return (d > 0) ? 1 : ((d < 0) ? -1 : 0);
6948 }
6949 
6950 static int
pf_do_nat64(struct pf_state_key * sk,struct pf_pdesc * pd,pbuf_t * pbuf,int off)6951 pf_do_nat64(struct pf_state_key *sk, struct pf_pdesc *pd, pbuf_t *pbuf,
6952     int off)
6953 {
6954 	if (pd->af == AF_INET) {
6955 		if (pd->af != sk->af_lan) {
6956 			pd->ndaddr = sk->lan.addr;
6957 			pd->naddr = sk->ext_lan.addr;
6958 		} else {
6959 			pd->naddr = sk->gwy.addr;
6960 			pd->ndaddr = sk->ext_gwy.addr;
6961 		}
6962 		return pf_nat64_ipv4(pbuf, off, pd);
6963 	} else if (pd->af == AF_INET6) {
6964 		if (pd->af != sk->af_lan) {
6965 			pd->ndaddr = sk->lan.addr;
6966 			pd->naddr = sk->ext_lan.addr;
6967 		} else {
6968 			pd->naddr = sk->gwy.addr;
6969 			pd->ndaddr = sk->ext_gwy.addr;
6970 		}
6971 		return pf_nat64_ipv6(pbuf, off, pd);
6972 	}
6973 	return PF_DROP;
6974 }
6975 
6976 static __attribute__((noinline)) int
pf_test_state_tcp(struct pf_state ** state,int direction,struct pfi_kif * kif,pbuf_t * pbuf,int off,void * h,struct pf_pdesc * pd,u_short * reason)6977 pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
6978     pbuf_t *pbuf, int off, void *h, struct pf_pdesc *pd,
6979     u_short *reason)
6980 {
6981 #pragma unused(h)
6982 	struct pf_state_key_cmp  key;
6983 	struct tcphdr           *th = pd->hdr.tcp;
6984 	u_int16_t                win = ntohs(th->th_win);
6985 	u_int32_t                ack, end, seq, orig_seq;
6986 	u_int8_t                 sws, dws;
6987 	int                      ackskew;
6988 	int                      copyback = 0;
6989 	struct pf_state_peer    *src, *dst;
6990 	struct pf_state_key     *sk;
6991 
6992 	key.app_state = 0;
6993 	key.proto = IPPROTO_TCP;
6994 	key.af_lan = key.af_gwy = pd->af;
6995 
6996 	/*
6997 	 * For NAT64 the first time rule search and state creation
6998 	 * is done on the incoming side only.
6999 	 * Once the state gets created, NAT64's LAN side (ipv6) will
7000 	 * not be able to find the state in ext-gwy tree as that normally
7001 	 * is intended to be looked up for incoming traffic from the
7002 	 * WAN side.
7003 	 * Therefore to handle NAT64 case we init keys here for both
7004 	 * lan-ext as well as ext-gwy trees.
7005 	 * In the state lookup we attempt a lookup on both trees if
7006 	 * first one does not return any result and return a match if
7007 	 * the match state's was created by NAT64 rule.
7008 	 */
7009 	PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy);
7010 	PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy);
7011 	key.ext_gwy.xport.port = th->th_sport;
7012 	key.gwy.xport.port = th->th_dport;
7013 
7014 	PF_ACPY(&key.lan.addr, pd->src, key.af_lan);
7015 	PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan);
7016 	key.lan.xport.port = th->th_sport;
7017 	key.ext_lan.xport.port = th->th_dport;
7018 
7019 	STATE_LOOKUP();
7020 
7021 	sk = (*state)->state_key;
7022 	/*
7023 	 * In case of NAT64 the translation is first applied on the LAN
7024 	 * side. Therefore for stack's address family comparison
7025 	 * we use sk->af_lan.
7026 	 */
7027 	if ((direction == sk->direction) && (pd->af == sk->af_lan)) {
7028 		src = &(*state)->src;
7029 		dst = &(*state)->dst;
7030 	} else {
7031 		src = &(*state)->dst;
7032 		dst = &(*state)->src;
7033 	}
7034 
7035 	if (src->state == PF_TCPS_PROXY_SRC) {
7036 		if (direction != sk->direction) {
7037 			REASON_SET(reason, PFRES_SYNPROXY);
7038 			return PF_SYNPROXY_DROP;
7039 		}
7040 		if (th->th_flags & TH_SYN) {
7041 			if (ntohl(th->th_seq) != src->seqlo) {
7042 				REASON_SET(reason, PFRES_SYNPROXY);
7043 				return PF_DROP;
7044 			}
7045 			pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
7046 			    pd->src, th->th_dport, th->th_sport,
7047 			    src->seqhi, ntohl(th->th_seq) + 1,
7048 			    TH_SYN | TH_ACK, 0, src->mss, 0, 1,
7049 			    0, NULL, NULL);
7050 			REASON_SET(reason, PFRES_SYNPROXY);
7051 			return PF_SYNPROXY_DROP;
7052 		} else if (!(th->th_flags & TH_ACK) ||
7053 		    (ntohl(th->th_ack) != src->seqhi + 1) ||
7054 		    (ntohl(th->th_seq) != src->seqlo + 1)) {
7055 			REASON_SET(reason, PFRES_SYNPROXY);
7056 			return PF_DROP;
7057 		} else if ((*state)->src_node != NULL &&
7058 		    pf_src_connlimit(state)) {
7059 			REASON_SET(reason, PFRES_SRCLIMIT);
7060 			return PF_DROP;
7061 		} else {
7062 			src->state = PF_TCPS_PROXY_DST;
7063 		}
7064 	}
7065 	if (src->state == PF_TCPS_PROXY_DST) {
7066 		struct pf_state_host *psrc, *pdst;
7067 
7068 		if (direction == PF_OUT) {
7069 			psrc = &sk->gwy;
7070 			pdst = &sk->ext_gwy;
7071 		} else {
7072 			psrc = &sk->ext_lan;
7073 			pdst = &sk->lan;
7074 		}
7075 		if (direction == sk->direction) {
7076 			if (((th->th_flags & (TH_SYN | TH_ACK)) != TH_ACK) ||
7077 			    (ntohl(th->th_ack) != src->seqhi + 1) ||
7078 			    (ntohl(th->th_seq) != src->seqlo + 1)) {
7079 				REASON_SET(reason, PFRES_SYNPROXY);
7080 				return PF_DROP;
7081 			}
7082 			src->max_win = MAX(ntohs(th->th_win), 1);
7083 			if (dst->seqhi == 1) {
7084 				dst->seqhi = htonl(random());
7085 			}
7086 			pf_send_tcp((*state)->rule.ptr, pd->af, &psrc->addr,
7087 			    &pdst->addr, psrc->xport.port, pdst->xport.port,
7088 			    dst->seqhi, 0, TH_SYN, 0,
7089 			    src->mss, 0, 0, (*state)->tag, NULL, NULL);
7090 			REASON_SET(reason, PFRES_SYNPROXY);
7091 			return PF_SYNPROXY_DROP;
7092 		} else if (((th->th_flags & (TH_SYN | TH_ACK)) !=
7093 		    (TH_SYN | TH_ACK)) ||
7094 		    (ntohl(th->th_ack) != dst->seqhi + 1)) {
7095 			REASON_SET(reason, PFRES_SYNPROXY);
7096 			return PF_DROP;
7097 		} else {
7098 			dst->max_win = MAX(ntohs(th->th_win), 1);
7099 			dst->seqlo = ntohl(th->th_seq);
7100 			pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
7101 			    pd->src, th->th_dport, th->th_sport,
7102 			    ntohl(th->th_ack), ntohl(th->th_seq) + 1,
7103 			    TH_ACK, src->max_win, 0, 0, 0,
7104 			    (*state)->tag, NULL, NULL);
7105 			pf_send_tcp((*state)->rule.ptr, pd->af, &psrc->addr,
7106 			    &pdst->addr, psrc->xport.port, pdst->xport.port,
7107 			    src->seqhi + 1, src->seqlo + 1,
7108 			    TH_ACK, dst->max_win, 0, 0, 1,
7109 			    0, NULL, NULL);
7110 			src->seqdiff = dst->seqhi -
7111 			    src->seqlo;
7112 			dst->seqdiff = src->seqhi -
7113 			    dst->seqlo;
7114 			src->seqhi = src->seqlo +
7115 			    dst->max_win;
7116 			dst->seqhi = dst->seqlo +
7117 			    src->max_win;
7118 			src->wscale = dst->wscale = 0;
7119 			src->state = dst->state =
7120 			    TCPS_ESTABLISHED;
7121 			REASON_SET(reason, PFRES_SYNPROXY);
7122 			return PF_SYNPROXY_DROP;
7123 		}
7124 	}
7125 
7126 	if (((th->th_flags & (TH_SYN | TH_ACK)) == TH_SYN) &&
7127 	    dst->state >= TCPS_FIN_WAIT_2 &&
7128 	    src->state >= TCPS_FIN_WAIT_2) {
7129 		if (pf_status.debug >= PF_DEBUG_MISC) {
7130 			printf("pf: state reuse ");
7131 			pf_print_state(*state);
7132 			pf_print_flags(th->th_flags);
7133 			printf("\n");
7134 		}
7135 		/* XXX make sure it's the same direction ?? */
7136 		src->state = dst->state = TCPS_CLOSED;
7137 		pf_unlink_state(*state);
7138 		*state = NULL;
7139 		return PF_DROP;
7140 	}
7141 
7142 	if ((th->th_flags & TH_SYN) == 0) {
7143 		sws = (src->wscale & PF_WSCALE_FLAG) ?
7144 		    (src->wscale & PF_WSCALE_MASK) : TCP_MAX_WINSHIFT;
7145 		dws = (dst->wscale & PF_WSCALE_FLAG) ?
7146 		    (dst->wscale & PF_WSCALE_MASK) : TCP_MAX_WINSHIFT;
7147 	} else {
7148 		sws = dws = 0;
7149 	}
7150 
7151 	/*
7152 	 * Sequence tracking algorithm from Guido van Rooij's paper:
7153 	 *   http://www.madison-gurkha.com/publications/tcp_filtering/
7154 	 *	tcp_filtering.ps
7155 	 */
7156 
7157 	orig_seq = seq = ntohl(th->th_seq);
7158 	if (src->seqlo == 0) {
7159 		/* First packet from this end. Set its state */
7160 
7161 		if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) &&
7162 		    src->scrub == NULL) {
7163 			if (pf_normalize_tcp_init(pbuf, off, pd, th, src, dst)) {
7164 				REASON_SET(reason, PFRES_MEMORY);
7165 				return PF_DROP;
7166 			}
7167 		}
7168 
7169 		/* Deferred generation of sequence number modulator */
7170 		if (dst->seqdiff && !src->seqdiff) {
7171 			/* use random iss for the TCP server */
7172 			while ((src->seqdiff = random() - seq) == 0) {
7173 				;
7174 			}
7175 			ack = ntohl(th->th_ack) - dst->seqdiff;
7176 			pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
7177 			    src->seqdiff), 0);
7178 			pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
7179 			copyback = off + sizeof(*th);
7180 		} else {
7181 			ack = ntohl(th->th_ack);
7182 		}
7183 
7184 		end = seq + pd->p_len;
7185 		if (th->th_flags & TH_SYN) {
7186 			end++;
7187 			if (dst->wscale & PF_WSCALE_FLAG) {
7188 				src->wscale = pf_get_wscale(pbuf, off,
7189 				    th->th_off, pd->af);
7190 				if (src->wscale & PF_WSCALE_FLAG) {
7191 					/*
7192 					 * Remove scale factor from initial
7193 					 * window
7194 					 */
7195 					sws = src->wscale & PF_WSCALE_MASK;
7196 					win = ((u_int32_t)win + (1 << sws) - 1)
7197 					    >> sws;
7198 					dws = dst->wscale & PF_WSCALE_MASK;
7199 				} else {
7200 					/*
7201 					 * Window scale negotiation has failed,
7202 					 * therefore we must restore the window
7203 					 * scale in the state record that we
7204 					 * optimistically removed in
7205 					 * pf_test_rule().  Care is required to
7206 					 * prevent arithmetic overflow from
7207 					 * zeroing the window when it's
7208 					 * truncated down to 16-bits.
7209 					 */
7210 					u_int32_t max_win = dst->max_win;
7211 					max_win <<=
7212 					    dst->wscale & PF_WSCALE_MASK;
7213 					dst->max_win = MIN(0xffff, max_win);
7214 					/* in case of a retrans SYN|ACK */
7215 					dst->wscale = 0;
7216 				}
7217 			}
7218 		}
7219 		if (th->th_flags & TH_FIN) {
7220 			end++;
7221 		}
7222 
7223 		src->seqlo = seq;
7224 		if (src->state < TCPS_SYN_SENT) {
7225 			src->state = TCPS_SYN_SENT;
7226 		}
7227 
7228 		/*
7229 		 * May need to slide the window (seqhi may have been set by
7230 		 * the crappy stack check or if we picked up the connection
7231 		 * after establishment)
7232 		 */
7233 		if (src->seqhi == 1 ||
7234 		    SEQ_GEQ(end + MAX(1, (u_int32_t)dst->max_win << dws),
7235 		    src->seqhi)) {
7236 			src->seqhi = end + MAX(1, (u_int32_t)dst->max_win << dws);
7237 		}
7238 		if (win > src->max_win) {
7239 			src->max_win = win;
7240 		}
7241 	} else {
7242 		ack = ntohl(th->th_ack) - dst->seqdiff;
7243 		if (src->seqdiff) {
7244 			/* Modulate sequence numbers */
7245 			pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
7246 			    src->seqdiff), 0);
7247 			pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
7248 			copyback = off + sizeof(*th);
7249 		}
7250 		end = seq + pd->p_len;
7251 		if (th->th_flags & TH_SYN) {
7252 			end++;
7253 		}
7254 		if (th->th_flags & TH_FIN) {
7255 			end++;
7256 		}
7257 	}
7258 
7259 	if ((th->th_flags & TH_ACK) == 0) {
7260 		/* Let it pass through the ack skew check */
7261 		ack = dst->seqlo;
7262 	} else if ((ack == 0 &&
7263 	    (th->th_flags & (TH_ACK | TH_RST)) == (TH_ACK | TH_RST)) ||
7264 	    /* broken tcp stacks do not set ack */
7265 	    (dst->state < TCPS_SYN_SENT)) {
7266 		/*
7267 		 * Many stacks (ours included) will set the ACK number in an
7268 		 * FIN|ACK if the SYN times out -- no sequence to ACK.
7269 		 */
7270 		ack = dst->seqlo;
7271 	}
7272 
7273 	if (seq == end) {
7274 		/* Ease sequencing restrictions on no data packets */
7275 		seq = src->seqlo;
7276 		end = seq;
7277 	}
7278 
7279 	ackskew = dst->seqlo - ack;
7280 
7281 
7282 	/*
7283 	 * Need to demodulate the sequence numbers in any TCP SACK options
7284 	 * (Selective ACK). We could optionally validate the SACK values
7285 	 * against the current ACK window, either forwards or backwards, but
7286 	 * I'm not confident that SACK has been implemented properly
7287 	 * everywhere. It wouldn't surprise me if several stacks accidently
7288 	 * SACK too far backwards of previously ACKed data. There really aren't
7289 	 * any security implications of bad SACKing unless the target stack
7290 	 * doesn't validate the option length correctly. Someone trying to
7291 	 * spoof into a TCP connection won't bother blindly sending SACK
7292 	 * options anyway.
7293 	 */
7294 	if (dst->seqdiff && (th->th_off << 2) > (int)sizeof(struct tcphdr)) {
7295 		copyback = pf_modulate_sack(pbuf, off, pd, th, dst);
7296 		if (copyback == -1) {
7297 			REASON_SET(reason, PFRES_MEMORY);
7298 			return PF_DROP;
7299 		}
7300 
7301 		pbuf = pd->mp;  // XXXSCW: Why?
7302 	}
7303 
7304 
7305 #define MAXACKWINDOW (0xffff + 1500)    /* 1500 is an arbitrary fudge factor */
7306 	if (SEQ_GEQ(src->seqhi, end) &&
7307 	    /* Last octet inside other's window space */
7308 	    SEQ_GEQ(seq, src->seqlo - ((u_int32_t)dst->max_win << dws)) &&
7309 	    /* Retrans: not more than one window back */
7310 	    (ackskew >= -MAXACKWINDOW) &&
7311 	    /* Acking not more than one reassembled fragment backwards */
7312 	    (ackskew <= (MAXACKWINDOW << sws)) &&
7313 	    /* Acking not more than one window forward */
7314 	    ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo ||
7315 	    (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo) ||
7316 	    (pd->flags & PFDESC_IP_REAS) == 0)) {
7317 		/* Require an exact/+1 sequence match on resets when possible */
7318 
7319 		if (dst->scrub || src->scrub) {
7320 			if (pf_normalize_tcp_stateful(pbuf, off, pd, reason, th,
7321 			    *state, src, dst, &copyback)) {
7322 				return PF_DROP;
7323 			}
7324 
7325 			pbuf = pd->mp;  // XXXSCW: Why?
7326 		}
7327 
7328 		/* update max window */
7329 		if (src->max_win < win) {
7330 			src->max_win = win;
7331 		}
7332 		/* synchronize sequencing */
7333 		if (SEQ_GT(end, src->seqlo)) {
7334 			src->seqlo = end;
7335 		}
7336 		/* slide the window of what the other end can send */
7337 		if (SEQ_GEQ(ack + ((u_int32_t)win << sws), dst->seqhi)) {
7338 			dst->seqhi = ack + MAX(((u_int32_t)win << sws), 1);
7339 		}
7340 
7341 		/* update states */
7342 		if (th->th_flags & TH_SYN) {
7343 			if (src->state < TCPS_SYN_SENT) {
7344 				src->state = TCPS_SYN_SENT;
7345 			}
7346 		}
7347 		if (th->th_flags & TH_FIN) {
7348 			if (src->state < TCPS_CLOSING) {
7349 				src->state = TCPS_CLOSING;
7350 			}
7351 		}
7352 		if (th->th_flags & TH_ACK) {
7353 			if (dst->state == TCPS_SYN_SENT) {
7354 				dst->state = TCPS_ESTABLISHED;
7355 				if (src->state == TCPS_ESTABLISHED &&
7356 				    (*state)->src_node != NULL &&
7357 				    pf_src_connlimit(state)) {
7358 					REASON_SET(reason, PFRES_SRCLIMIT);
7359 					return PF_DROP;
7360 				}
7361 			} else if (dst->state == TCPS_CLOSING) {
7362 				dst->state = TCPS_FIN_WAIT_2;
7363 			}
7364 		}
7365 		if (th->th_flags & TH_RST) {
7366 			src->state = dst->state = TCPS_TIME_WAIT;
7367 		}
7368 
7369 		/* update expire time */
7370 		(*state)->expire = pf_time_second();
7371 		if (src->state >= TCPS_FIN_WAIT_2 &&
7372 		    dst->state >= TCPS_FIN_WAIT_2) {
7373 			(*state)->timeout = PFTM_TCP_CLOSED;
7374 		} else if (src->state >= TCPS_CLOSING &&
7375 		    dst->state >= TCPS_CLOSING) {
7376 			(*state)->timeout = PFTM_TCP_FIN_WAIT;
7377 		} else if (src->state < TCPS_ESTABLISHED ||
7378 		    dst->state < TCPS_ESTABLISHED) {
7379 			(*state)->timeout = PFTM_TCP_OPENING;
7380 		} else if (src->state >= TCPS_CLOSING ||
7381 		    dst->state >= TCPS_CLOSING) {
7382 			(*state)->timeout = PFTM_TCP_CLOSING;
7383 		} else {
7384 			(*state)->timeout = PFTM_TCP_ESTABLISHED;
7385 		}
7386 
7387 		/* Fall through to PASS packet */
7388 	} else if ((dst->state < TCPS_SYN_SENT ||
7389 	    dst->state >= TCPS_FIN_WAIT_2 || src->state >= TCPS_FIN_WAIT_2) &&
7390 	    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) &&
7391 	    /* Within a window forward of the originating packet */
7392 	    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
7393 		/* Within a window backward of the originating packet */
7394 
7395 		/*
7396 		 * This currently handles three situations:
7397 		 *  1) Stupid stacks will shotgun SYNs before their peer
7398 		 *     replies.
7399 		 *  2) When PF catches an already established stream (the
7400 		 *     firewall rebooted, the state table was flushed, routes
7401 		 *     changed...)
7402 		 *  3) Packets get funky immediately after the connection
7403 		 *     closes (this should catch Solaris spurious ACK|FINs
7404 		 *     that web servers like to spew after a close)
7405 		 *
7406 		 * This must be a little more careful than the above code
7407 		 * since packet floods will also be caught here. We don't
7408 		 * update the TTL here to mitigate the damage of a packet
7409 		 * flood and so the same code can handle awkward establishment
7410 		 * and a loosened connection close.
7411 		 * In the establishment case, a correct peer response will
7412 		 * validate the connection, go through the normal state code
7413 		 * and keep updating the state TTL.
7414 		 */
7415 
7416 		if (pf_status.debug >= PF_DEBUG_MISC) {
7417 			printf("pf: loose state match: ");
7418 			pf_print_state(*state);
7419 			pf_print_flags(th->th_flags);
7420 			printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
7421 			    "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack,
7422 			    pd->p_len, ackskew, (*state)->packets[0],
7423 			    (*state)->packets[1],
7424 			    direction == PF_IN ? "in" : "out",
7425 			    direction == sk->direction ?
7426 			    "fwd" : "rev");
7427 		}
7428 
7429 		if (dst->scrub || src->scrub) {
7430 			if (pf_normalize_tcp_stateful(pbuf, off, pd, reason, th,
7431 			    *state, src, dst, &copyback)) {
7432 				return PF_DROP;
7433 			}
7434 			pbuf = pd->mp;  // XXXSCW: Why?
7435 		}
7436 
7437 		/* update max window */
7438 		if (src->max_win < win) {
7439 			src->max_win = win;
7440 		}
7441 		/* synchronize sequencing */
7442 		if (SEQ_GT(end, src->seqlo)) {
7443 			src->seqlo = end;
7444 		}
7445 		/* slide the window of what the other end can send */
7446 		if (SEQ_GEQ(ack + ((u_int32_t)win << sws), dst->seqhi)) {
7447 			dst->seqhi = ack + MAX(((u_int32_t)win << sws), 1);
7448 		}
7449 
7450 		/*
7451 		 * Cannot set dst->seqhi here since this could be a shotgunned
7452 		 * SYN and not an already established connection.
7453 		 */
7454 
7455 		if (th->th_flags & TH_FIN) {
7456 			if (src->state < TCPS_CLOSING) {
7457 				src->state = TCPS_CLOSING;
7458 			}
7459 		}
7460 		if (th->th_flags & TH_RST) {
7461 			src->state = dst->state = TCPS_TIME_WAIT;
7462 		}
7463 
7464 		/* Fall through to PASS packet */
7465 	} else {
7466 		if (dst->state == TCPS_SYN_SENT &&
7467 		    src->state == TCPS_SYN_SENT) {
7468 			/* Send RST for state mismatches during handshake */
7469 			if (!(th->th_flags & TH_RST)) {
7470 				pf_send_tcp((*state)->rule.ptr, pd->af,
7471 				    pd->dst, pd->src, th->th_dport,
7472 				    th->th_sport, ntohl(th->th_ack), 0,
7473 				    TH_RST, 0, 0,
7474 				    (*state)->rule.ptr->return_ttl, 1, 0,
7475 				    pd->eh, kif->pfik_ifp);
7476 			}
7477 			src->seqlo = 0;
7478 			src->seqhi = 1;
7479 			src->max_win = 1;
7480 		} else if (pf_status.debug >= PF_DEBUG_MISC) {
7481 			printf("pf: BAD state: ");
7482 			pf_print_state(*state);
7483 			pf_print_flags(th->th_flags);
7484 			printf("\n   seq=%u (%u) ack=%u len=%u ackskew=%d "
7485 			    "sws=%u dws=%u pkts=%llu:%llu dir=%s,%s\n",
7486 			    seq, orig_seq, ack, pd->p_len, ackskew,
7487 			    (unsigned int)sws, (unsigned int)dws,
7488 			    (*state)->packets[0], (*state)->packets[1],
7489 			    direction == PF_IN ? "in" : "out",
7490 			    direction == sk->direction ?
7491 			    "fwd" : "rev");
7492 			printf("pf: State failure on: %c %c %c %c | %c %c\n",
7493 			    SEQ_GEQ(src->seqhi, end) ? ' ' : '1',
7494 			    SEQ_GEQ(seq,
7495 			    src->seqlo - ((u_int32_t)dst->max_win << dws)) ?
7496 			    ' ': '2',
7497 			    (ackskew >= -MAXACKWINDOW) ? ' ' : '3',
7498 			    (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
7499 			    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5',
7500 			    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6');
7501 		}
7502 		REASON_SET(reason, PFRES_BADSTATE);
7503 		return PF_DROP;
7504 	}
7505 
7506 	/* Any packets which have gotten here are to be passed */
7507 
7508 	if (sk->app_state &&
7509 	    sk->app_state->handler) {
7510 		sk->app_state->handler(*state, direction,
7511 		    off + (th->th_off << 2), pd, kif);
7512 		if (pd->lmw < 0) {
7513 			REASON_SET(reason, PFRES_MEMORY);
7514 			return PF_DROP;
7515 		}
7516 		pbuf = pd->mp;  // XXXSCW: Why?
7517 	}
7518 
7519 	/* translate source/destination address, if necessary */
7520 	if (STATE_TRANSLATE(sk)) {
7521 		pd->naf = (pd->af == sk->af_lan) ? sk->af_gwy : sk->af_lan;
7522 
7523 		if (direction == PF_OUT) {
7524 			pf_change_ap(direction, pd->mp, pd->src, &th->th_sport,
7525 			    pd->ip_sum, &th->th_sum, &sk->gwy.addr,
7526 			    sk->gwy.xport.port, 0, pd->af, pd->naf, 1);
7527 		} else {
7528 			if (pd->af != pd->naf) {
7529 				if (pd->af == sk->af_gwy) {
7530 					pf_change_ap(direction, pd->mp, pd->dst,
7531 					    &th->th_dport, pd->ip_sum,
7532 					    &th->th_sum, &sk->lan.addr,
7533 					    sk->lan.xport.port, 0,
7534 					    pd->af, pd->naf, 0);
7535 
7536 					pf_change_ap(direction, pd->mp, pd->src,
7537 					    &th->th_sport, pd->ip_sum,
7538 					    &th->th_sum, &sk->ext_lan.addr,
7539 					    th->th_sport, 0, pd->af,
7540 					    pd->naf, 0);
7541 				} else {
7542 					pf_change_ap(direction, pd->mp, pd->dst,
7543 					    &th->th_dport, pd->ip_sum,
7544 					    &th->th_sum, &sk->ext_gwy.addr,
7545 					    th->th_dport, 0, pd->af,
7546 					    pd->naf, 0);
7547 
7548 					pf_change_ap(direction, pd->mp, pd->src,
7549 					    &th->th_sport, pd->ip_sum,
7550 					    &th->th_sum, &sk->gwy.addr,
7551 					    sk->gwy.xport.port, 0, pd->af,
7552 					    pd->naf, 0);
7553 				}
7554 			} else {
7555 				pf_change_ap(direction, pd->mp, pd->dst,
7556 				    &th->th_dport, pd->ip_sum,
7557 				    &th->th_sum, &sk->lan.addr,
7558 				    sk->lan.xport.port, 0, pd->af,
7559 				    pd->naf, 1);
7560 			}
7561 		}
7562 
7563 		copyback = off + sizeof(*th);
7564 	}
7565 
7566 	if (copyback) {
7567 		if (pf_lazy_makewritable(pd, pbuf, copyback) == NULL) {
7568 			REASON_SET(reason, PFRES_MEMORY);
7569 			return PF_DROP;
7570 		}
7571 
7572 		/* Copyback sequence modulation or stateful scrub changes */
7573 		pbuf_copy_back(pbuf, off, sizeof(*th), th);
7574 
7575 		if (sk->af_lan != sk->af_gwy) {
7576 			return pf_do_nat64(sk, pd, pbuf, off);
7577 		}
7578 	}
7579 	return PF_PASS;
7580 }
7581 
7582 static __attribute__((noinline)) int
pf_test_state_udp(struct pf_state ** state,int direction,struct pfi_kif * kif,pbuf_t * pbuf,int off,void * h,struct pf_pdesc * pd,u_short * reason)7583 pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
7584     pbuf_t *pbuf, int off, void *h, struct pf_pdesc *pd, u_short *reason)
7585 {
7586 #pragma unused(h)
7587 	struct pf_state_peer    *src, *dst;
7588 	struct pf_state_key_cmp  key;
7589 	struct pf_state_key     *sk;
7590 	struct udphdr           *uh = pd->hdr.udp;
7591 	struct pf_app_state as;
7592 	int action, extfilter;
7593 	key.app_state = 0;
7594 	key.proto_variant = PF_EXTFILTER_APD;
7595 
7596 	key.proto = IPPROTO_UDP;
7597 	key.af_lan = key.af_gwy = pd->af;
7598 
7599 	/*
7600 	 * For NAT64 the first time rule search and state creation
7601 	 * is done on the incoming side only.
7602 	 * Once the state gets created, NAT64's LAN side (ipv6) will
7603 	 * not be able to find the state in ext-gwy tree as that normally
7604 	 * is intended to be looked up for incoming traffic from the
7605 	 * WAN side.
7606 	 * Therefore to handle NAT64 case we init keys here for both
7607 	 * lan-ext as well as ext-gwy trees.
7608 	 * In the state lookup we attempt a lookup on both trees if
7609 	 * first one does not return any result and return a match if
7610 	 * the match state's was created by NAT64 rule.
7611 	 */
7612 	PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy);
7613 	PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy);
7614 	key.ext_gwy.xport.port = uh->uh_sport;
7615 	key.gwy.xport.port = uh->uh_dport;
7616 
7617 	PF_ACPY(&key.lan.addr, pd->src, key.af_lan);
7618 	PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan);
7619 	key.lan.xport.port = uh->uh_sport;
7620 	key.ext_lan.xport.port = uh->uh_dport;
7621 
7622 	if (ntohs(uh->uh_sport) == PF_IKE_PORT &&
7623 	    ntohs(uh->uh_dport) == PF_IKE_PORT) {
7624 		struct pf_ike_hdr ike;
7625 		size_t plen = pbuf->pb_packet_len - off - sizeof(*uh);
7626 		if (plen < PF_IKE_PACKET_MINSIZE) {
7627 			DPFPRINTF(PF_DEBUG_MISC,
7628 			    ("pf: IKE message too small.\n"));
7629 			return PF_DROP;
7630 		}
7631 
7632 		if (plen > sizeof(ike)) {
7633 			plen = sizeof(ike);
7634 		}
7635 		pbuf_copy_data(pbuf, off + sizeof(*uh), plen, &ike);
7636 
7637 		if (ike.initiator_cookie) {
7638 			key.app_state = &as;
7639 			as.compare_lan_ext = pf_ike_compare;
7640 			as.compare_ext_gwy = pf_ike_compare;
7641 			as.u.ike.cookie = ike.initiator_cookie;
7642 		} else {
7643 			/*
7644 			 * <http://tools.ietf.org/html/\
7645 			 *    draft-ietf-ipsec-nat-t-ike-01>
7646 			 * Support non-standard NAT-T implementations that
7647 			 * push the ESP packet over the top of the IKE packet.
7648 			 * Do not drop packet.
7649 			 */
7650 			DPFPRINTF(PF_DEBUG_MISC,
7651 			    ("pf: IKE initiator cookie = 0.\n"));
7652 		}
7653 	}
7654 
7655 	*state = pf_find_state(kif, &key, direction);
7656 
7657 	if (!key.app_state && *state == 0) {
7658 		key.proto_variant = PF_EXTFILTER_AD;
7659 		*state = pf_find_state(kif, &key, direction);
7660 	}
7661 
7662 	if (!key.app_state && *state == 0) {
7663 		key.proto_variant = PF_EXTFILTER_EI;
7664 		*state = pf_find_state(kif, &key, direction);
7665 	}
7666 
7667 	/* similar to STATE_LOOKUP() */
7668 	if (*state != NULL && pd != NULL && !(pd->pktflags & PKTF_FLOW_ID)) {
7669 		pd->flowsrc = (*state)->state_key->flowsrc;
7670 		pd->flowhash = (*state)->state_key->flowhash;
7671 		if (pd->flowhash != 0) {
7672 			pd->pktflags |= PKTF_FLOW_ID;
7673 			pd->pktflags &= ~PKTF_FLOW_ADV;
7674 		}
7675 	}
7676 
7677 	if (pf_state_lookup_aux(state, kif, direction, &action)) {
7678 		return action;
7679 	}
7680 
7681 	sk = (*state)->state_key;
7682 
7683 	/*
7684 	 * In case of NAT64 the translation is first applied on the LAN
7685 	 * side. Therefore for stack's address family comparison
7686 	 * we use sk->af_lan.
7687 	 */
7688 	if ((direction == sk->direction) && (pd->af == sk->af_lan)) {
7689 		src = &(*state)->src;
7690 		dst = &(*state)->dst;
7691 	} else {
7692 		src = &(*state)->dst;
7693 		dst = &(*state)->src;
7694 	}
7695 
7696 	/* update states */
7697 	if (src->state < PFUDPS_SINGLE) {
7698 		src->state = PFUDPS_SINGLE;
7699 	}
7700 	if (dst->state == PFUDPS_SINGLE) {
7701 		dst->state = PFUDPS_MULTIPLE;
7702 	}
7703 
7704 	/* update expire time */
7705 	(*state)->expire = pf_time_second();
7706 	if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE) {
7707 		(*state)->timeout = PFTM_UDP_MULTIPLE;
7708 	} else {
7709 		(*state)->timeout = PFTM_UDP_SINGLE;
7710 	}
7711 
7712 	extfilter = sk->proto_variant;
7713 	if (extfilter > PF_EXTFILTER_APD) {
7714 		if (direction == PF_OUT) {
7715 			sk->ext_lan.xport.port = key.ext_lan.xport.port;
7716 			if (extfilter > PF_EXTFILTER_AD) {
7717 				PF_ACPY(&sk->ext_lan.addr, &key.ext_lan.addr,
7718 				    key.af_lan);
7719 			}
7720 		} else {
7721 			sk->ext_gwy.xport.port = key.ext_gwy.xport.port;
7722 			if (extfilter > PF_EXTFILTER_AD) {
7723 				PF_ACPY(&sk->ext_gwy.addr, &key.ext_gwy.addr,
7724 				    key.af_gwy);
7725 			}
7726 		}
7727 	}
7728 
7729 	if (sk->app_state && sk->app_state->handler) {
7730 		sk->app_state->handler(*state, direction, off + uh->uh_ulen,
7731 		    pd, kif);
7732 		if (pd->lmw < 0) {
7733 			REASON_SET(reason, PFRES_MEMORY);
7734 			return PF_DROP;
7735 		}
7736 		pbuf = pd->mp;  // XXXSCW: Why?
7737 	}
7738 
7739 	/* translate source/destination address, if necessary */
7740 	if (STATE_TRANSLATE(sk)) {
7741 		if (pf_lazy_makewritable(pd, pbuf, off + sizeof(*uh)) == NULL) {
7742 			REASON_SET(reason, PFRES_MEMORY);
7743 			return PF_DROP;
7744 		}
7745 
7746 		pd->naf = (pd->af == sk->af_lan) ? sk->af_gwy : sk->af_lan;
7747 
7748 		if (direction == PF_OUT) {
7749 			pf_change_ap(direction, pd->mp, pd->src, &uh->uh_sport,
7750 			    pd->ip_sum, &uh->uh_sum, &sk->gwy.addr,
7751 			    sk->gwy.xport.port, 1, pd->af, pd->naf, 1);
7752 		} else {
7753 			if (pd->af != pd->naf) {
7754 				if (pd->af == sk->af_gwy) {
7755 					pf_change_ap(direction, pd->mp, pd->dst,
7756 					    &uh->uh_dport, pd->ip_sum,
7757 					    &uh->uh_sum, &sk->lan.addr,
7758 					    sk->lan.xport.port, 1,
7759 					    pd->af, pd->naf, 0);
7760 
7761 					pf_change_ap(direction, pd->mp, pd->src,
7762 					    &uh->uh_sport, pd->ip_sum,
7763 					    &uh->uh_sum, &sk->ext_lan.addr,
7764 					    uh->uh_sport, 1, pd->af,
7765 					    pd->naf, 0);
7766 				} else {
7767 					pf_change_ap(direction, pd->mp, pd->dst,
7768 					    &uh->uh_dport, pd->ip_sum,
7769 					    &uh->uh_sum, &sk->ext_gwy.addr,
7770 					    uh->uh_dport, 1, pd->af,
7771 					    pd->naf, 0);
7772 
7773 					pf_change_ap(direction, pd->mp, pd->src,
7774 					    &uh->uh_sport, pd->ip_sum,
7775 					    &uh->uh_sum, &sk->gwy.addr,
7776 					    sk->gwy.xport.port, 1, pd->af,
7777 					    pd->naf, 0);
7778 				}
7779 			} else {
7780 				pf_change_ap(direction, pd->mp, pd->dst,
7781 				    &uh->uh_dport, pd->ip_sum,
7782 				    &uh->uh_sum, &sk->lan.addr,
7783 				    sk->lan.xport.port, 1,
7784 				    pd->af, pd->naf, 1);
7785 			}
7786 		}
7787 
7788 		pbuf_copy_back(pbuf, off, sizeof(*uh), uh);
7789 		if (sk->af_lan != sk->af_gwy) {
7790 			return pf_do_nat64(sk, pd, pbuf, off);
7791 		}
7792 	}
7793 	return PF_PASS;
7794 }
7795 
7796 static u_int32_t
pf_compute_packet_icmp_gencnt(uint32_t af,u_int32_t type,u_int32_t code)7797 pf_compute_packet_icmp_gencnt(uint32_t af, u_int32_t type, u_int32_t code)
7798 {
7799 	if (af == PF_INET) {
7800 		if (type != ICMP_UNREACH && type != ICMP_TIMXCEED) {
7801 			return 0;
7802 		}
7803 	} else {
7804 		if (type != ICMP6_DST_UNREACH && type != ICMP6_PARAM_PROB &&
7805 		    type != ICMP6_TIME_EXCEEDED) {
7806 			return 0;
7807 		}
7808 	}
7809 	return (af << 24) | (type << 16) | (code << 8);
7810 }
7811 
7812 
7813 static __attribute__((noinline)) int
pf_test_state_icmp(struct pf_state ** state,int direction,struct pfi_kif * kif,pbuf_t * pbuf,int off,void * h,struct pf_pdesc * pd,u_short * reason)7814 pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
7815     pbuf_t *pbuf, int off, void *h, struct pf_pdesc *pd, u_short *reason)
7816 {
7817 #pragma unused(h)
7818 	struct pf_addr  *saddr = pd->src, *daddr = pd->dst;
7819 	struct in_addr  srcv4_inaddr = saddr->v4addr;
7820 	u_int16_t        icmpid = 0, *icmpsum = NULL;
7821 	u_int8_t         icmptype = 0;
7822 	u_int32_t        icmpcode = 0;
7823 	int              state_icmp = 0;
7824 	struct pf_state_key_cmp key;
7825 	struct pf_state_key     *sk;
7826 
7827 	struct pf_app_state as;
7828 	key.app_state = 0;
7829 
7830 	pd->off = off;
7831 
7832 	switch (pd->proto) {
7833 #if INET
7834 	case IPPROTO_ICMP:
7835 		icmptype = pd->hdr.icmp->icmp_type;
7836 		icmpid = pd->hdr.icmp->icmp_id;
7837 		icmpsum = &pd->hdr.icmp->icmp_cksum;
7838 		icmpcode = pd->hdr.icmp->icmp_code;
7839 
7840 		if (ICMP_ERRORTYPE(icmptype)) {
7841 			state_icmp++;
7842 		}
7843 		break;
7844 #endif /* INET */
7845 	case IPPROTO_ICMPV6:
7846 		icmptype = pd->hdr.icmp6->icmp6_type;
7847 		icmpid = pd->hdr.icmp6->icmp6_id;
7848 		icmpsum = &pd->hdr.icmp6->icmp6_cksum;
7849 		icmpcode = pd->hdr.icmp6->icmp6_code;
7850 
7851 		if (ICMP6_ERRORTYPE(icmptype)) {
7852 			state_icmp++;
7853 		}
7854 		break;
7855 	}
7856 
7857 	if (pbuf != NULL && pbuf->pb_flow_gencnt != NULL &&
7858 	    *pbuf->pb_flow_gencnt == 0) {
7859 		u_int32_t af = pd->proto == IPPROTO_ICMP ? PF_INET : PF_INET6;
7860 		*pbuf->pb_flow_gencnt = pf_compute_packet_icmp_gencnt(af, icmptype, icmpcode);
7861 	}
7862 
7863 	if (!state_icmp) {
7864 		/*
7865 		 * ICMP query/reply message not related to a TCP/UDP packet.
7866 		 * Search for an ICMP state.
7867 		 */
7868 		/*
7869 		 * NAT64 requires protocol translation  between ICMPv4
7870 		 * and ICMPv6. TCP and UDP do not require protocol
7871 		 * translation. To avoid adding complexity just to
7872 		 * handle ICMP(v4addr/v6addr), we always lookup  for
7873 		 * proto = IPPROTO_ICMP on both LAN and WAN side
7874 		 */
7875 		key.proto = IPPROTO_ICMP;
7876 		key.af_lan = key.af_gwy = pd->af;
7877 
7878 		PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy);
7879 		PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy);
7880 		key.ext_gwy.xport.port = 0;
7881 		key.gwy.xport.port = icmpid;
7882 
7883 		PF_ACPY(&key.lan.addr, pd->src, key.af_lan);
7884 		PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan);
7885 		key.lan.xport.port = icmpid;
7886 		key.ext_lan.xport.port = 0;
7887 
7888 		STATE_LOOKUP();
7889 
7890 		sk = (*state)->state_key;
7891 		(*state)->expire = pf_time_second();
7892 		(*state)->timeout = PFTM_ICMP_ERROR_REPLY;
7893 
7894 		/* translate source/destination address, if necessary */
7895 		if (STATE_TRANSLATE(sk)) {
7896 			pd->naf = (pd->af == sk->af_lan) ?
7897 			    sk->af_gwy : sk->af_lan;
7898 			if (direction == PF_OUT) {
7899 				switch (pd->af) {
7900 #if INET
7901 				case AF_INET:
7902 					pf_change_a(&saddr->v4addr.s_addr,
7903 					    pd->ip_sum,
7904 					    sk->gwy.addr.v4addr.s_addr, 0);
7905 					pd->hdr.icmp->icmp_cksum =
7906 					    pf_cksum_fixup(
7907 						pd->hdr.icmp->icmp_cksum, icmpid,
7908 						sk->gwy.xport.port, 0);
7909 					pd->hdr.icmp->icmp_id =
7910 					    sk->gwy.xport.port;
7911 					if (pf_lazy_makewritable(pd, pbuf,
7912 					    off + ICMP_MINLEN) == NULL) {
7913 						return PF_DROP;
7914 					}
7915 					pbuf_copy_back(pbuf, off, ICMP_MINLEN,
7916 					    pd->hdr.icmp);
7917 					break;
7918 #endif /* INET */
7919 				case AF_INET6:
7920 					pf_change_a6(saddr,
7921 					    &pd->hdr.icmp6->icmp6_cksum,
7922 					    &sk->gwy.addr, 0);
7923 					if (pf_lazy_makewritable(pd, pbuf,
7924 					    off + sizeof(struct icmp6_hdr)) ==
7925 					    NULL) {
7926 						return PF_DROP;
7927 					}
7928 					pbuf_copy_back(pbuf, off,
7929 					    sizeof(struct icmp6_hdr),
7930 					    pd->hdr.icmp6);
7931 					break;
7932 				}
7933 			} else {
7934 				switch (pd->af) {
7935 #if INET
7936 				case AF_INET:
7937 					if (pd->naf != AF_INET) {
7938 						if (pf_translate_icmp_af(
7939 							    AF_INET6, pd->hdr.icmp)) {
7940 							return PF_DROP;
7941 						}
7942 
7943 						pd->proto = IPPROTO_ICMPV6;
7944 					} else {
7945 						pf_change_a(&daddr->v4addr.s_addr,
7946 						    pd->ip_sum,
7947 						    sk->lan.addr.v4addr.s_addr, 0);
7948 
7949 						pd->hdr.icmp->icmp_cksum =
7950 						    pf_cksum_fixup(
7951 							pd->hdr.icmp->icmp_cksum,
7952 							icmpid, sk->lan.xport.port, 0);
7953 
7954 						pd->hdr.icmp->icmp_id =
7955 						    sk->lan.xport.port;
7956 					}
7957 
7958 					if (pf_lazy_makewritable(pd, pbuf,
7959 					    off + ICMP_MINLEN) == NULL) {
7960 						return PF_DROP;
7961 					}
7962 					pbuf_copy_back(pbuf, off, ICMP_MINLEN,
7963 					    pd->hdr.icmp);
7964 					if (sk->af_lan != sk->af_gwy) {
7965 						return pf_do_nat64(sk, pd,
7966 						           pbuf, off);
7967 					}
7968 					break;
7969 #endif /* INET */
7970 				case AF_INET6:
7971 					if (pd->naf != AF_INET6) {
7972 						if (pf_translate_icmp_af(
7973 							    AF_INET, pd->hdr.icmp6)) {
7974 							return PF_DROP;
7975 						}
7976 
7977 						pd->proto = IPPROTO_ICMP;
7978 					} else {
7979 						pf_change_a6(daddr,
7980 						    &pd->hdr.icmp6->icmp6_cksum,
7981 						    &sk->lan.addr, 0);
7982 					}
7983 					if (pf_lazy_makewritable(pd, pbuf,
7984 					    off + sizeof(struct icmp6_hdr)) ==
7985 					    NULL) {
7986 						return PF_DROP;
7987 					}
7988 					pbuf_copy_back(pbuf, off,
7989 					    sizeof(struct icmp6_hdr),
7990 					    pd->hdr.icmp6);
7991 					if (sk->af_lan != sk->af_gwy) {
7992 						return pf_do_nat64(sk, pd,
7993 						           pbuf, off);
7994 					}
7995 					break;
7996 				}
7997 			}
7998 		}
7999 
8000 		return PF_PASS;
8001 	} else {
8002 		/*
8003 		 * ICMP error message in response to a TCP/UDP packet.
8004 		 * Extract the inner TCP/UDP header and search for that state.
8005 		 */
8006 		struct pf_pdesc pd2; /* For inner (original) header */
8007 #if INET
8008 		struct ip       h2;
8009 #endif /* INET */
8010 		struct ip6_hdr  h2_6;
8011 		int             terminal = 0;
8012 		int             ipoff2 = 0;
8013 		int             off2 = 0;
8014 
8015 		memset(&pd2, 0, sizeof(pd2));
8016 
8017 		pd2.af = pd->af;
8018 		switch (pd->af) {
8019 #if INET
8020 		case AF_INET:
8021 			/* offset of h2 in mbuf chain */
8022 			ipoff2 = off + ICMP_MINLEN;
8023 
8024 			if (!pf_pull_hdr(pbuf, ipoff2, &h2, sizeof(h2),
8025 			    NULL, reason, pd2.af)) {
8026 				DPFPRINTF(PF_DEBUG_MISC,
8027 				    ("pf: ICMP error message too short "
8028 				    "(ip)\n"));
8029 				return PF_DROP;
8030 			}
8031 			/*
8032 			 * ICMP error messages don't refer to non-first
8033 			 * fragments
8034 			 */
8035 			if (h2.ip_off & htons(IP_OFFMASK)) {
8036 				REASON_SET(reason, PFRES_FRAG);
8037 				return PF_DROP;
8038 			}
8039 
8040 			/* offset of protocol header that follows h2 */
8041 			off2 = ipoff2 + (h2.ip_hl << 2);
8042 			/* TODO */
8043 			pd2.off = ipoff2 + (h2.ip_hl << 2);
8044 
8045 			pd2.proto = h2.ip_p;
8046 			pd2.src = (struct pf_addr *)&h2.ip_src;
8047 			pd2.dst = (struct pf_addr *)&h2.ip_dst;
8048 			pd2.ip_sum = &h2.ip_sum;
8049 			break;
8050 #endif /* INET */
8051 		case AF_INET6:
8052 			ipoff2 = off + sizeof(struct icmp6_hdr);
8053 
8054 			if (!pf_pull_hdr(pbuf, ipoff2, &h2_6, sizeof(h2_6),
8055 			    NULL, reason, pd2.af)) {
8056 				DPFPRINTF(PF_DEBUG_MISC,
8057 				    ("pf: ICMP error message too short "
8058 				    "(ip6)\n"));
8059 				return PF_DROP;
8060 			}
8061 			pd2.proto = h2_6.ip6_nxt;
8062 			pd2.src = (struct pf_addr *)(uintptr_t)&h2_6.ip6_src;
8063 			pd2.dst = (struct pf_addr *)(uintptr_t)&h2_6.ip6_dst;
8064 			pd2.ip_sum = NULL;
8065 			off2 = ipoff2 + sizeof(h2_6);
8066 			do {
8067 				switch (pd2.proto) {
8068 				case IPPROTO_FRAGMENT:
8069 					/*
8070 					 * ICMPv6 error messages for
8071 					 * non-first fragments
8072 					 */
8073 					REASON_SET(reason, PFRES_FRAG);
8074 					return PF_DROP;
8075 				case IPPROTO_AH:
8076 				case IPPROTO_HOPOPTS:
8077 				case IPPROTO_ROUTING:
8078 				case IPPROTO_DSTOPTS: {
8079 					/* get next header and header length */
8080 					struct ip6_ext opt6;
8081 
8082 					if (!pf_pull_hdr(pbuf, off2, &opt6,
8083 					    sizeof(opt6), NULL, reason,
8084 					    pd2.af)) {
8085 						DPFPRINTF(PF_DEBUG_MISC,
8086 						    ("pf: ICMPv6 short opt\n"));
8087 						return PF_DROP;
8088 					}
8089 					if (pd2.proto == IPPROTO_AH) {
8090 						off2 += (opt6.ip6e_len + 2) * 4;
8091 					} else {
8092 						off2 += (opt6.ip6e_len + 1) * 8;
8093 					}
8094 					pd2.proto = opt6.ip6e_nxt;
8095 					/* goto the next header */
8096 					break;
8097 				}
8098 				default:
8099 					terminal++;
8100 					break;
8101 				}
8102 			} while (!terminal);
8103 			/* TODO */
8104 			pd2.off = ipoff2;
8105 			break;
8106 		}
8107 
8108 		switch (pd2.proto) {
8109 		case IPPROTO_TCP: {
8110 			struct tcphdr            th;
8111 			u_int32_t                seq;
8112 			struct pf_state_peer    *src, *dst;
8113 			u_int8_t                 dws;
8114 			int                      copyback = 0;
8115 
8116 			/*
8117 			 * Only the first 8 bytes of the TCP header can be
8118 			 * expected. Don't access any TCP header fields after
8119 			 * th_seq, an ackskew test is not possible.
8120 			 */
8121 			if (!pf_pull_hdr(pbuf, off2, &th, 8, NULL, reason,
8122 			    pd2.af)) {
8123 				DPFPRINTF(PF_DEBUG_MISC,
8124 				    ("pf: ICMP error message too short "
8125 				    "(tcp)\n"));
8126 				return PF_DROP;
8127 			}
8128 
8129 			key.proto = IPPROTO_TCP;
8130 			key.af_gwy = pd2.af;
8131 			PF_ACPY(&key.ext_gwy.addr, pd2.dst, key.af_gwy);
8132 			PF_ACPY(&key.gwy.addr, pd2.src, key.af_gwy);
8133 			key.ext_gwy.xport.port = th.th_dport;
8134 			key.gwy.xport.port = th.th_sport;
8135 
8136 			key.af_lan = pd2.af;
8137 			PF_ACPY(&key.lan.addr, pd2.dst, key.af_lan);
8138 			PF_ACPY(&key.ext_lan.addr, pd2.src, key.af_lan);
8139 			key.lan.xport.port = th.th_dport;
8140 			key.ext_lan.xport.port = th.th_sport;
8141 
8142 			STATE_LOOKUP();
8143 
8144 			sk = (*state)->state_key;
8145 			if ((direction == sk->direction) &&
8146 			    ((sk->af_lan == sk->af_gwy) ||
8147 			    (pd2.af == sk->af_lan))) {
8148 				src = &(*state)->dst;
8149 				dst = &(*state)->src;
8150 			} else {
8151 				src = &(*state)->src;
8152 				dst = &(*state)->dst;
8153 			}
8154 
8155 			if (src->wscale && (dst->wscale & PF_WSCALE_FLAG)) {
8156 				dws = dst->wscale & PF_WSCALE_MASK;
8157 			} else {
8158 				dws = TCP_MAX_WINSHIFT;
8159 			}
8160 
8161 			/* Demodulate sequence number */
8162 			seq = ntohl(th.th_seq) - src->seqdiff;
8163 			if (src->seqdiff) {
8164 				pf_change_a(&th.th_seq, icmpsum,
8165 				    htonl(seq), 0);
8166 				copyback = 1;
8167 			}
8168 
8169 			if (!SEQ_GEQ(src->seqhi, seq) ||
8170 			    !SEQ_GEQ(seq,
8171 			    src->seqlo - ((u_int32_t)dst->max_win << dws))) {
8172 				if (pf_status.debug >= PF_DEBUG_MISC) {
8173 					printf("pf: BAD ICMP %d:%d ",
8174 					    icmptype, pd->hdr.icmp->icmp_code);
8175 					pf_print_host(pd->src, 0, pd->af);
8176 					printf(" -> ");
8177 					pf_print_host(pd->dst, 0, pd->af);
8178 					printf(" state: ");
8179 					pf_print_state(*state);
8180 					printf(" seq=%u\n", seq);
8181 				}
8182 				REASON_SET(reason, PFRES_BADSTATE);
8183 				return PF_DROP;
8184 			}
8185 
8186 			pd->naf = pd2.naf = (pd2.af == sk->af_lan) ?
8187 			    sk->af_gwy : sk->af_lan;
8188 
8189 			if (STATE_TRANSLATE(sk)) {
8190 				/* NAT64 case */
8191 				if (sk->af_lan != sk->af_gwy) {
8192 					struct pf_state_host *saddr2, *daddr2;
8193 
8194 					if (pd2.naf == sk->af_lan) {
8195 						saddr2 = &sk->lan;
8196 						daddr2 = &sk->ext_lan;
8197 					} else {
8198 						saddr2 = &sk->ext_gwy;
8199 						daddr2 = &sk->gwy;
8200 					}
8201 
8202 					/* translate ICMP message types and codes */
8203 					if (pf_translate_icmp_af(pd->naf,
8204 					    pd->hdr.icmp)) {
8205 						return PF_DROP;
8206 					}
8207 
8208 					if (pf_lazy_makewritable(pd, pbuf,
8209 					    off2 + 8) == NULL) {
8210 						return PF_DROP;
8211 					}
8212 
8213 					pbuf_copy_back(pbuf, pd->off,
8214 					    sizeof(struct icmp6_hdr),
8215 					    pd->hdr.icmp6);
8216 
8217 					/*
8218 					 * translate inner ip header within the
8219 					 * ICMP message
8220 					 */
8221 					if (pf_change_icmp_af(pbuf, ipoff2, pd,
8222 					    &pd2, &saddr2->addr, &daddr2->addr,
8223 					    pd->af, pd->naf)) {
8224 						return PF_DROP;
8225 					}
8226 
8227 					if (pd->naf == AF_INET) {
8228 						pd->proto = IPPROTO_ICMP;
8229 					} else {
8230 						pd->proto = IPPROTO_ICMPV6;
8231 					}
8232 
8233 					/*
8234 					 * translate inner tcp header within
8235 					 * the ICMP message
8236 					 */
8237 					pf_change_ap(direction, NULL, pd2.src,
8238 					    &th.th_sport, pd2.ip_sum,
8239 					    &th.th_sum, &daddr2->addr,
8240 					    saddr2->xport.port, 0, pd2.af,
8241 					    pd2.naf, 0);
8242 
8243 					pf_change_ap(direction, NULL, pd2.dst,
8244 					    &th.th_dport, pd2.ip_sum,
8245 					    &th.th_sum, &saddr2->addr,
8246 					    daddr2->xport.port, 0, pd2.af,
8247 					    pd2.naf, 0);
8248 
8249 					pbuf_copy_back(pbuf, pd2.off, 8, &th);
8250 
8251 					/* translate outer ip header */
8252 					PF_ACPY(&pd->naddr, &daddr2->addr,
8253 					    pd->naf);
8254 					PF_ACPY(&pd->ndaddr, &saddr2->addr,
8255 					    pd->naf);
8256 					if (pd->af == AF_INET) {
8257 						memcpy(&pd->naddr.addr32[3],
8258 						    &srcv4_inaddr,
8259 						    sizeof(pd->naddr.addr32[3]));
8260 						return pf_nat64_ipv4(pbuf, off,
8261 						           pd);
8262 					} else {
8263 						return pf_nat64_ipv6(pbuf, off,
8264 						           pd);
8265 					}
8266 				}
8267 				if (direction == PF_IN) {
8268 					pf_change_icmp(pd2.src, &th.th_sport,
8269 					    daddr, &sk->lan.addr,
8270 					    sk->lan.xport.port, NULL,
8271 					    pd2.ip_sum, icmpsum,
8272 					    pd->ip_sum, 0, pd2.af);
8273 				} else {
8274 					pf_change_icmp(pd2.dst, &th.th_dport,
8275 					    saddr, &sk->gwy.addr,
8276 					    sk->gwy.xport.port, NULL,
8277 					    pd2.ip_sum, icmpsum,
8278 					    pd->ip_sum, 0, pd2.af);
8279 				}
8280 				copyback = 1;
8281 			}
8282 
8283 			if (copyback) {
8284 				if (pf_lazy_makewritable(pd, pbuf, off2 + 8) ==
8285 				    NULL) {
8286 					return PF_DROP;
8287 				}
8288 				switch (pd2.af) {
8289 #if INET
8290 				case AF_INET:
8291 					pbuf_copy_back(pbuf, off, ICMP_MINLEN,
8292 					    pd->hdr.icmp);
8293 					pbuf_copy_back(pbuf, ipoff2, sizeof(h2),
8294 					    &h2);
8295 					break;
8296 #endif /* INET */
8297 				case AF_INET6:
8298 					pbuf_copy_back(pbuf, off,
8299 					    sizeof(struct icmp6_hdr),
8300 					    pd->hdr.icmp6);
8301 					pbuf_copy_back(pbuf, ipoff2,
8302 					    sizeof(h2_6), &h2_6);
8303 					break;
8304 				}
8305 				pbuf_copy_back(pbuf, off2, 8, &th);
8306 			}
8307 
8308 			return PF_PASS;
8309 		}
8310 		case IPPROTO_UDP: {
8311 			struct udphdr uh;
8312 			int dx, action;
8313 			if (!pf_pull_hdr(pbuf, off2, &uh, sizeof(uh),
8314 			    NULL, reason, pd2.af)) {
8315 				DPFPRINTF(PF_DEBUG_MISC,
8316 				    ("pf: ICMP error message too short "
8317 				    "(udp)\n"));
8318 				return PF_DROP;
8319 			}
8320 
8321 			key.af_gwy = pd2.af;
8322 			PF_ACPY(&key.ext_gwy.addr, pd2.dst, key.af_gwy);
8323 			PF_ACPY(&key.gwy.addr, pd2.src, key.af_gwy);
8324 			key.ext_gwy.xport.port = uh.uh_dport;
8325 			key.gwy.xport.port = uh.uh_sport;
8326 
8327 			key.af_lan = pd2.af;
8328 			PF_ACPY(&key.lan.addr, pd2.dst, key.af_lan);
8329 			PF_ACPY(&key.ext_lan.addr, pd2.src, key.af_lan);
8330 			key.lan.xport.port = uh.uh_dport;
8331 			key.ext_lan.xport.port = uh.uh_sport;
8332 
8333 			key.proto = IPPROTO_UDP;
8334 			key.proto_variant = PF_EXTFILTER_APD;
8335 			dx = direction;
8336 
8337 			if (ntohs(uh.uh_sport) == PF_IKE_PORT &&
8338 			    ntohs(uh.uh_dport) == PF_IKE_PORT) {
8339 				struct pf_ike_hdr ike;
8340 				size_t plen = pbuf->pb_packet_len - off2 -
8341 				    sizeof(uh);
8342 				if (direction == PF_IN &&
8343 				    plen < 8 /* PF_IKE_PACKET_MINSIZE */) {
8344 					DPFPRINTF(PF_DEBUG_MISC, ("pf: "
8345 					    "ICMP error, embedded IKE message "
8346 					    "too small.\n"));
8347 					return PF_DROP;
8348 				}
8349 
8350 				if (plen > sizeof(ike)) {
8351 					plen = sizeof(ike);
8352 				}
8353 				pbuf_copy_data(pbuf, off + sizeof(uh), plen,
8354 				    &ike);
8355 
8356 				key.app_state = &as;
8357 				as.compare_lan_ext = pf_ike_compare;
8358 				as.compare_ext_gwy = pf_ike_compare;
8359 				as.u.ike.cookie = ike.initiator_cookie;
8360 			}
8361 
8362 			*state = pf_find_state(kif, &key, dx);
8363 
8364 			if (key.app_state && *state == 0) {
8365 				key.app_state = 0;
8366 				*state = pf_find_state(kif, &key, dx);
8367 			}
8368 
8369 			if (*state == 0) {
8370 				key.proto_variant = PF_EXTFILTER_AD;
8371 				*state = pf_find_state(kif, &key, dx);
8372 			}
8373 
8374 			if (*state == 0) {
8375 				key.proto_variant = PF_EXTFILTER_EI;
8376 				*state = pf_find_state(kif, &key, dx);
8377 			}
8378 
8379 			/* similar to STATE_LOOKUP() */
8380 			if (*state != NULL && pd != NULL &&
8381 			    !(pd->pktflags & PKTF_FLOW_ID)) {
8382 				pd->flowsrc = (*state)->state_key->flowsrc;
8383 				pd->flowhash = (*state)->state_key->flowhash;
8384 				if (pd->flowhash != 0) {
8385 					pd->pktflags |= PKTF_FLOW_ID;
8386 					pd->pktflags &= ~PKTF_FLOW_ADV;
8387 				}
8388 			}
8389 
8390 			if (pf_state_lookup_aux(state, kif, direction, &action)) {
8391 				return action;
8392 			}
8393 
8394 			sk = (*state)->state_key;
8395 			pd->naf = pd2.naf = (pd2.af == sk->af_lan) ?
8396 			    sk->af_gwy : sk->af_lan;
8397 
8398 			if (STATE_TRANSLATE(sk)) {
8399 				/* NAT64 case */
8400 				if (sk->af_lan != sk->af_gwy) {
8401 					struct pf_state_host *saddr2, *daddr2;
8402 
8403 					if (pd2.naf == sk->af_lan) {
8404 						saddr2 = &sk->lan;
8405 						daddr2 = &sk->ext_lan;
8406 					} else {
8407 						saddr2 = &sk->ext_gwy;
8408 						daddr2 = &sk->gwy;
8409 					}
8410 
8411 					/* translate ICMP message */
8412 					if (pf_translate_icmp_af(pd->naf,
8413 					    pd->hdr.icmp)) {
8414 						return PF_DROP;
8415 					}
8416 					if (pf_lazy_makewritable(pd, pbuf,
8417 					    off2 + 8) == NULL) {
8418 						return PF_DROP;
8419 					}
8420 
8421 					pbuf_copy_back(pbuf, pd->off,
8422 					    sizeof(struct icmp6_hdr),
8423 					    pd->hdr.icmp6);
8424 
8425 					/*
8426 					 * translate inner ip header within the
8427 					 * ICMP message
8428 					 */
8429 					if (pf_change_icmp_af(pbuf, ipoff2, pd,
8430 					    &pd2, &saddr2->addr, &daddr2->addr,
8431 					    pd->af, pd->naf)) {
8432 						return PF_DROP;
8433 					}
8434 
8435 					if (pd->naf == AF_INET) {
8436 						pd->proto = IPPROTO_ICMP;
8437 					} else {
8438 						pd->proto = IPPROTO_ICMPV6;
8439 					}
8440 
8441 					/*
8442 					 * translate inner udp header within
8443 					 * the ICMP message
8444 					 */
8445 					pf_change_ap(direction, NULL, pd2.src,
8446 					    &uh.uh_sport, pd2.ip_sum,
8447 					    &uh.uh_sum, &daddr2->addr,
8448 					    saddr2->xport.port, 0, pd2.af,
8449 					    pd2.naf, 0);
8450 
8451 					pf_change_ap(direction, NULL, pd2.dst,
8452 					    &uh.uh_dport, pd2.ip_sum,
8453 					    &uh.uh_sum, &saddr2->addr,
8454 					    daddr2->xport.port, 0, pd2.af,
8455 					    pd2.naf, 0);
8456 
8457 					pbuf_copy_back(pbuf, pd2.off,
8458 					    sizeof(uh), &uh);
8459 
8460 					/* translate outer ip header */
8461 					PF_ACPY(&pd->naddr, &daddr2->addr,
8462 					    pd->naf);
8463 					PF_ACPY(&pd->ndaddr, &saddr2->addr,
8464 					    pd->naf);
8465 					if (pd->af == AF_INET) {
8466 						memcpy(&pd->naddr.addr32[3],
8467 						    &srcv4_inaddr,
8468 						    sizeof(pd->naddr.addr32[3]));
8469 						return pf_nat64_ipv4(pbuf, off,
8470 						           pd);
8471 					} else {
8472 						return pf_nat64_ipv6(pbuf, off,
8473 						           pd);
8474 					}
8475 				}
8476 				if (direction == PF_IN) {
8477 					pf_change_icmp(pd2.src, &uh.uh_sport,
8478 					    daddr, &sk->lan.addr,
8479 					    sk->lan.xport.port, &uh.uh_sum,
8480 					    pd2.ip_sum, icmpsum,
8481 					    pd->ip_sum, 1, pd2.af);
8482 				} else {
8483 					pf_change_icmp(pd2.dst, &uh.uh_dport,
8484 					    saddr, &sk->gwy.addr,
8485 					    sk->gwy.xport.port, &uh.uh_sum,
8486 					    pd2.ip_sum, icmpsum,
8487 					    pd->ip_sum, 1, pd2.af);
8488 				}
8489 				if (pf_lazy_makewritable(pd, pbuf,
8490 				    off2 + sizeof(uh)) == NULL) {
8491 					return PF_DROP;
8492 				}
8493 				switch (pd2.af) {
8494 #if INET
8495 				case AF_INET:
8496 					pbuf_copy_back(pbuf, off, ICMP_MINLEN,
8497 					    pd->hdr.icmp);
8498 					pbuf_copy_back(pbuf, ipoff2,
8499 					    sizeof(h2), &h2);
8500 					break;
8501 #endif /* INET */
8502 				case AF_INET6:
8503 					pbuf_copy_back(pbuf, off,
8504 					    sizeof(struct icmp6_hdr),
8505 					    pd->hdr.icmp6);
8506 					pbuf_copy_back(pbuf, ipoff2,
8507 					    sizeof(h2_6), &h2_6);
8508 					break;
8509 				}
8510 				pbuf_copy_back(pbuf, off2, sizeof(uh), &uh);
8511 			}
8512 
8513 			return PF_PASS;
8514 		}
8515 #if INET
8516 		case IPPROTO_ICMP: {
8517 			struct icmp             iih;
8518 
8519 			if (!pf_pull_hdr(pbuf, off2, &iih, ICMP_MINLEN,
8520 			    NULL, reason, pd2.af)) {
8521 				DPFPRINTF(PF_DEBUG_MISC,
8522 				    ("pf: ICMP error message too short i"
8523 				    "(icmp)\n"));
8524 				return PF_DROP;
8525 			}
8526 
8527 			key.proto = IPPROTO_ICMP;
8528 			if (direction == PF_IN) {
8529 				key.af_gwy = pd2.af;
8530 				PF_ACPY(&key.ext_gwy.addr, pd2.dst, key.af_gwy);
8531 				PF_ACPY(&key.gwy.addr, pd2.src, key.af_gwy);
8532 				key.ext_gwy.xport.port = 0;
8533 				key.gwy.xport.port = iih.icmp_id;
8534 			} else {
8535 				key.af_lan = pd2.af;
8536 				PF_ACPY(&key.lan.addr, pd2.dst, key.af_lan);
8537 				PF_ACPY(&key.ext_lan.addr, pd2.src, key.af_lan);
8538 				key.lan.xport.port = iih.icmp_id;
8539 				key.ext_lan.xport.port = 0;
8540 			}
8541 
8542 			STATE_LOOKUP();
8543 
8544 			sk = (*state)->state_key;
8545 			if (STATE_TRANSLATE(sk)) {
8546 				if (direction == PF_IN) {
8547 					pf_change_icmp(pd2.src, &iih.icmp_id,
8548 					    daddr, &sk->lan.addr,
8549 					    sk->lan.xport.port, NULL,
8550 					    pd2.ip_sum, icmpsum,
8551 					    pd->ip_sum, 0, AF_INET);
8552 				} else {
8553 					pf_change_icmp(pd2.dst, &iih.icmp_id,
8554 					    saddr, &sk->gwy.addr,
8555 					    sk->gwy.xport.port, NULL,
8556 					    pd2.ip_sum, icmpsum,
8557 					    pd->ip_sum, 0, AF_INET);
8558 				}
8559 				if (pf_lazy_makewritable(pd, pbuf,
8560 				    off2 + ICMP_MINLEN) == NULL) {
8561 					return PF_DROP;
8562 				}
8563 				pbuf_copy_back(pbuf, off, ICMP_MINLEN,
8564 				    pd->hdr.icmp);
8565 				pbuf_copy_back(pbuf, ipoff2, sizeof(h2), &h2);
8566 				pbuf_copy_back(pbuf, off2, ICMP_MINLEN, &iih);
8567 			}
8568 
8569 			return PF_PASS;
8570 		}
8571 #endif /* INET */
8572 		case IPPROTO_ICMPV6: {
8573 			struct icmp6_hdr        iih;
8574 
8575 			if (!pf_pull_hdr(pbuf, off2, &iih,
8576 			    sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) {
8577 				DPFPRINTF(PF_DEBUG_MISC,
8578 				    ("pf: ICMP error message too short "
8579 				    "(icmp6)\n"));
8580 				return PF_DROP;
8581 			}
8582 
8583 			key.proto = IPPROTO_ICMPV6;
8584 			if (direction == PF_IN) {
8585 				key.af_gwy = pd2.af;
8586 				PF_ACPY(&key.ext_gwy.addr, pd2.dst, key.af_gwy);
8587 				PF_ACPY(&key.gwy.addr, pd2.src, key.af_gwy);
8588 				key.ext_gwy.xport.port = 0;
8589 				key.gwy.xport.port = iih.icmp6_id;
8590 			} else {
8591 				key.af_lan = pd2.af;
8592 				PF_ACPY(&key.lan.addr, pd2.dst, key.af_lan);
8593 				PF_ACPY(&key.ext_lan.addr, pd2.src, key.af_lan);
8594 				key.lan.xport.port = iih.icmp6_id;
8595 				key.ext_lan.xport.port = 0;
8596 			}
8597 
8598 			STATE_LOOKUP();
8599 
8600 			sk = (*state)->state_key;
8601 			if (STATE_TRANSLATE(sk)) {
8602 				if (direction == PF_IN) {
8603 					pf_change_icmp(pd2.src, &iih.icmp6_id,
8604 					    daddr, &sk->lan.addr,
8605 					    sk->lan.xport.port, NULL,
8606 					    pd2.ip_sum, icmpsum,
8607 					    pd->ip_sum, 0, AF_INET6);
8608 				} else {
8609 					pf_change_icmp(pd2.dst, &iih.icmp6_id,
8610 					    saddr, &sk->gwy.addr,
8611 					    sk->gwy.xport.port, NULL,
8612 					    pd2.ip_sum, icmpsum,
8613 					    pd->ip_sum, 0, AF_INET6);
8614 				}
8615 				if (pf_lazy_makewritable(pd, pbuf, off2 +
8616 				    sizeof(struct icmp6_hdr)) == NULL) {
8617 					return PF_DROP;
8618 				}
8619 				pbuf_copy_back(pbuf, off,
8620 				    sizeof(struct icmp6_hdr), pd->hdr.icmp6);
8621 				pbuf_copy_back(pbuf, ipoff2, sizeof(h2_6),
8622 				    &h2_6);
8623 				pbuf_copy_back(pbuf, off2,
8624 				    sizeof(struct icmp6_hdr), &iih);
8625 			}
8626 
8627 			return PF_PASS;
8628 		}
8629 		default: {
8630 			key.proto = pd2.proto;
8631 			if (direction == PF_IN) {
8632 				key.af_gwy = pd2.af;
8633 				PF_ACPY(&key.ext_gwy.addr, pd2.dst, key.af_gwy);
8634 				PF_ACPY(&key.gwy.addr, pd2.src, key.af_gwy);
8635 				key.ext_gwy.xport.port = 0;
8636 				key.gwy.xport.port = 0;
8637 			} else {
8638 				key.af_lan = pd2.af;
8639 				PF_ACPY(&key.lan.addr, pd2.dst, key.af_lan);
8640 				PF_ACPY(&key.ext_lan.addr, pd2.src, key.af_lan);
8641 				key.lan.xport.port = 0;
8642 				key.ext_lan.xport.port = 0;
8643 			}
8644 
8645 			STATE_LOOKUP();
8646 
8647 			sk = (*state)->state_key;
8648 			if (STATE_TRANSLATE(sk)) {
8649 				if (direction == PF_IN) {
8650 					pf_change_icmp(pd2.src, NULL, daddr,
8651 					    &sk->lan.addr, 0, NULL,
8652 					    pd2.ip_sum, icmpsum,
8653 					    pd->ip_sum, 0, pd2.af);
8654 				} else {
8655 					pf_change_icmp(pd2.dst, NULL, saddr,
8656 					    &sk->gwy.addr, 0, NULL,
8657 					    pd2.ip_sum, icmpsum,
8658 					    pd->ip_sum, 0, pd2.af);
8659 				}
8660 				switch (pd2.af) {
8661 #if INET
8662 				case AF_INET:
8663 					if (pf_lazy_makewritable(pd, pbuf,
8664 					    ipoff2 + sizeof(h2)) == NULL) {
8665 						return PF_DROP;
8666 					}
8667 					/*
8668 					 * <XXXSCW>
8669 					 * Xnu was missing the following...
8670 					 */
8671 					pbuf_copy_back(pbuf, off, ICMP_MINLEN,
8672 					    pd->hdr.icmp);
8673 					pbuf_copy_back(pbuf, ipoff2,
8674 					    sizeof(h2), &h2);
8675 					break;
8676 					/*
8677 					 * </XXXSCW>
8678 					 */
8679 #endif /* INET */
8680 				case AF_INET6:
8681 					if (pf_lazy_makewritable(pd, pbuf,
8682 					    ipoff2 + sizeof(h2_6)) == NULL) {
8683 						return PF_DROP;
8684 					}
8685 					pbuf_copy_back(pbuf, off,
8686 					    sizeof(struct icmp6_hdr),
8687 					    pd->hdr.icmp6);
8688 					pbuf_copy_back(pbuf, ipoff2,
8689 					    sizeof(h2_6), &h2_6);
8690 					break;
8691 				}
8692 			}
8693 
8694 			return PF_PASS;
8695 		}
8696 		}
8697 	}
8698 }
8699 
8700 static __attribute__((noinline)) int
pf_test_state_grev1(struct pf_state ** state,int direction,struct pfi_kif * kif,int off,struct pf_pdesc * pd)8701 pf_test_state_grev1(struct pf_state **state, int direction,
8702     struct pfi_kif *kif, int off, struct pf_pdesc *pd)
8703 {
8704 	struct pf_state_peer *src;
8705 	struct pf_state_peer *dst;
8706 	struct pf_state_key_cmp key = {};
8707 	struct pf_grev1_hdr *grev1 = pd->hdr.grev1;
8708 
8709 	key.app_state = 0;
8710 	key.proto = IPPROTO_GRE;
8711 	key.proto_variant = PF_GRE_PPTP_VARIANT;
8712 	if (direction == PF_IN) {
8713 		key.af_gwy = pd->af;
8714 		PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy);
8715 		PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy);
8716 		key.gwy.xport.call_id = grev1->call_id;
8717 	} else {
8718 		key.af_lan = pd->af;
8719 		PF_ACPY(&key.lan.addr, pd->src, key.af_lan);
8720 		PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan);
8721 		key.ext_lan.xport.call_id = grev1->call_id;
8722 	}
8723 
8724 	STATE_LOOKUP();
8725 
8726 	if (direction == (*state)->state_key->direction) {
8727 		src = &(*state)->src;
8728 		dst = &(*state)->dst;
8729 	} else {
8730 		src = &(*state)->dst;
8731 		dst = &(*state)->src;
8732 	}
8733 
8734 	/* update states */
8735 	if (src->state < PFGRE1S_INITIATING) {
8736 		src->state = PFGRE1S_INITIATING;
8737 	}
8738 
8739 	/* update expire time */
8740 	(*state)->expire = pf_time_second();
8741 	if (src->state >= PFGRE1S_INITIATING &&
8742 	    dst->state >= PFGRE1S_INITIATING) {
8743 		if ((*state)->timeout != PFTM_TCP_ESTABLISHED) {
8744 			(*state)->timeout = PFTM_GREv1_ESTABLISHED;
8745 		}
8746 		src->state = PFGRE1S_ESTABLISHED;
8747 		dst->state = PFGRE1S_ESTABLISHED;
8748 	} else {
8749 		(*state)->timeout = PFTM_GREv1_INITIATING;
8750 	}
8751 
8752 	if ((*state)->state_key->app_state) {
8753 		(*state)->state_key->app_state->u.grev1.pptp_state->expire =
8754 		    pf_time_second();
8755 	}
8756 
8757 	/* translate source/destination address, if necessary */
8758 	if (STATE_GRE_TRANSLATE((*state)->state_key)) {
8759 		if (direction == PF_OUT) {
8760 			switch (pd->af) {
8761 #if INET
8762 			case AF_INET:
8763 				pf_change_a(&pd->src->v4addr.s_addr,
8764 				    pd->ip_sum,
8765 				    (*state)->state_key->gwy.addr.v4addr.s_addr, 0);
8766 				break;
8767 #endif /* INET */
8768 			case AF_INET6:
8769 				PF_ACPY(pd->src, &(*state)->state_key->gwy.addr,
8770 				    pd->af);
8771 				break;
8772 			}
8773 		} else {
8774 			grev1->call_id = (*state)->state_key->lan.xport.call_id;
8775 
8776 			switch (pd->af) {
8777 #if INET
8778 			case AF_INET:
8779 				pf_change_a(&pd->dst->v4addr.s_addr,
8780 				    pd->ip_sum,
8781 				    (*state)->state_key->lan.addr.v4addr.s_addr, 0);
8782 				break;
8783 #endif /* INET */
8784 			case AF_INET6:
8785 				PF_ACPY(pd->dst, &(*state)->state_key->lan.addr,
8786 				    pd->af);
8787 				break;
8788 			}
8789 		}
8790 
8791 		if (pf_lazy_makewritable(pd, pd->mp, off + sizeof(*grev1)) ==
8792 		    NULL) {
8793 			return PF_DROP;
8794 		}
8795 		pbuf_copy_back(pd->mp, off, sizeof(*grev1), grev1);
8796 	}
8797 
8798 	return PF_PASS;
8799 }
8800 
8801 static __attribute__((noinline)) int
pf_test_state_esp(struct pf_state ** state,int direction,struct pfi_kif * kif,int off,struct pf_pdesc * pd)8802 pf_test_state_esp(struct pf_state **state, int direction, struct pfi_kif *kif,
8803     int off, struct pf_pdesc *pd)
8804 {
8805 #pragma unused(off)
8806 	struct pf_state_peer *src;
8807 	struct pf_state_peer *dst;
8808 	struct pf_state_key_cmp key;
8809 	struct pf_esp_hdr *esp = pd->hdr.esp;
8810 	int action;
8811 
8812 	memset(&key, 0, sizeof(key));
8813 	key.proto = IPPROTO_ESP;
8814 	if (direction == PF_IN) {
8815 		key.af_gwy = pd->af;
8816 		PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy);
8817 		PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy);
8818 		key.gwy.xport.spi = esp->spi;
8819 	} else {
8820 		key.af_lan = pd->af;
8821 		PF_ACPY(&key.lan.addr, pd->src, key.af_lan);
8822 		PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan);
8823 		key.ext_lan.xport.spi = esp->spi;
8824 	}
8825 
8826 	*state = pf_find_state(kif, &key, direction);
8827 
8828 	if (*state == 0) {
8829 		struct pf_state *s;
8830 
8831 		/*
8832 		 * <[email protected]>
8833 		 * No matching state.  Look for a blocking state.  If we find
8834 		 * one, then use that state and move it so that it's keyed to
8835 		 * the SPI in the current packet.
8836 		 */
8837 		if (direction == PF_IN) {
8838 			key.gwy.xport.spi = 0;
8839 
8840 			s = pf_find_state(kif, &key, direction);
8841 			if (s) {
8842 				struct pf_state_key *sk = s->state_key;
8843 
8844 				RB_REMOVE(pf_state_tree_ext_gwy,
8845 				    &pf_statetbl_ext_gwy, sk);
8846 				sk->lan.xport.spi = sk->gwy.xport.spi =
8847 				    esp->spi;
8848 
8849 				if (RB_INSERT(pf_state_tree_ext_gwy,
8850 				    &pf_statetbl_ext_gwy, sk)) {
8851 					pf_detach_state(s, PF_DT_SKIP_EXTGWY);
8852 				} else {
8853 					*state = s;
8854 				}
8855 			}
8856 		} else {
8857 			key.ext_lan.xport.spi = 0;
8858 
8859 			s = pf_find_state(kif, &key, direction);
8860 			if (s) {
8861 				struct pf_state_key *sk = s->state_key;
8862 
8863 				RB_REMOVE(pf_state_tree_lan_ext,
8864 				    &pf_statetbl_lan_ext, sk);
8865 				sk->ext_lan.xport.spi = esp->spi;
8866 
8867 				if (RB_INSERT(pf_state_tree_lan_ext,
8868 				    &pf_statetbl_lan_ext, sk)) {
8869 					pf_detach_state(s, PF_DT_SKIP_LANEXT);
8870 				} else {
8871 					*state = s;
8872 				}
8873 			}
8874 		}
8875 
8876 		if (s) {
8877 			if (*state == 0) {
8878 #if NPFSYNC
8879 				if (s->creatorid == pf_status.hostid) {
8880 					pfsync_delete_state(s);
8881 				}
8882 #endif
8883 				s->timeout = PFTM_UNLINKED;
8884 				hook_runloop(&s->unlink_hooks,
8885 				    HOOK_REMOVE | HOOK_FREE);
8886 				pf_src_tree_remove_state(s);
8887 				pf_free_state(s);
8888 				return PF_DROP;
8889 			}
8890 		}
8891 	}
8892 
8893 	/* similar to STATE_LOOKUP() */
8894 	if (*state != NULL && pd != NULL && !(pd->pktflags & PKTF_FLOW_ID)) {
8895 		pd->flowsrc = (*state)->state_key->flowsrc;
8896 		pd->flowhash = (*state)->state_key->flowhash;
8897 		if (pd->flowhash != 0) {
8898 			pd->pktflags |= PKTF_FLOW_ID;
8899 			pd->pktflags &= ~PKTF_FLOW_ADV;
8900 		}
8901 	}
8902 
8903 	if (pf_state_lookup_aux(state, kif, direction, &action)) {
8904 		return action;
8905 	}
8906 
8907 	if (direction == (*state)->state_key->direction) {
8908 		src = &(*state)->src;
8909 		dst = &(*state)->dst;
8910 	} else {
8911 		src = &(*state)->dst;
8912 		dst = &(*state)->src;
8913 	}
8914 
8915 	/* update states */
8916 	if (src->state < PFESPS_INITIATING) {
8917 		src->state = PFESPS_INITIATING;
8918 	}
8919 
8920 	/* update expire time */
8921 	(*state)->expire = pf_time_second();
8922 	if (src->state >= PFESPS_INITIATING &&
8923 	    dst->state >= PFESPS_INITIATING) {
8924 		(*state)->timeout = PFTM_ESP_ESTABLISHED;
8925 		src->state = PFESPS_ESTABLISHED;
8926 		dst->state = PFESPS_ESTABLISHED;
8927 	} else {
8928 		(*state)->timeout = PFTM_ESP_INITIATING;
8929 	}
8930 	/* translate source/destination address, if necessary */
8931 	if (STATE_ADDR_TRANSLATE((*state)->state_key)) {
8932 		if (direction == PF_OUT) {
8933 			switch (pd->af) {
8934 #if INET
8935 			case AF_INET:
8936 				pf_change_a(&pd->src->v4addr.s_addr,
8937 				    pd->ip_sum,
8938 				    (*state)->state_key->gwy.addr.v4addr.s_addr, 0);
8939 				break;
8940 #endif /* INET */
8941 			case AF_INET6:
8942 				PF_ACPY(pd->src, &(*state)->state_key->gwy.addr,
8943 				    pd->af);
8944 				break;
8945 			}
8946 		} else {
8947 			switch (pd->af) {
8948 #if INET
8949 			case AF_INET:
8950 				pf_change_a(&pd->dst->v4addr.s_addr,
8951 				    pd->ip_sum,
8952 				    (*state)->state_key->lan.addr.v4addr.s_addr, 0);
8953 				break;
8954 #endif /* INET */
8955 			case AF_INET6:
8956 				PF_ACPY(pd->dst, &(*state)->state_key->lan.addr,
8957 				    pd->af);
8958 				break;
8959 			}
8960 		}
8961 	}
8962 
8963 	return PF_PASS;
8964 }
8965 
8966 static __attribute__((noinline)) int
pf_test_state_other(struct pf_state ** state,int direction,struct pfi_kif * kif,struct pf_pdesc * pd)8967 pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif,
8968     struct pf_pdesc *pd)
8969 {
8970 	struct pf_state_peer    *src, *dst;
8971 	struct pf_state_key_cmp  key = {};
8972 
8973 	key.app_state = 0;
8974 	key.proto = pd->proto;
8975 	if (direction == PF_IN) {
8976 		key.af_gwy = pd->af;
8977 		PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy);
8978 		PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy);
8979 		key.ext_gwy.xport.port = 0;
8980 		key.gwy.xport.port = 0;
8981 	} else {
8982 		key.af_lan = pd->af;
8983 		PF_ACPY(&key.lan.addr, pd->src, key.af_lan);
8984 		PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan);
8985 		key.lan.xport.port = 0;
8986 		key.ext_lan.xport.port = 0;
8987 	}
8988 
8989 	STATE_LOOKUP();
8990 
8991 	if (direction == (*state)->state_key->direction) {
8992 		src = &(*state)->src;
8993 		dst = &(*state)->dst;
8994 	} else {
8995 		src = &(*state)->dst;
8996 		dst = &(*state)->src;
8997 	}
8998 
8999 	/* update states */
9000 	if (src->state < PFOTHERS_SINGLE) {
9001 		src->state = PFOTHERS_SINGLE;
9002 	}
9003 	if (dst->state == PFOTHERS_SINGLE) {
9004 		dst->state = PFOTHERS_MULTIPLE;
9005 	}
9006 
9007 	/* update expire time */
9008 	(*state)->expire = pf_time_second();
9009 	if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE) {
9010 		(*state)->timeout = PFTM_OTHER_MULTIPLE;
9011 	} else {
9012 		(*state)->timeout = PFTM_OTHER_SINGLE;
9013 	}
9014 
9015 	/* translate source/destination address, if necessary */
9016 	if (STATE_ADDR_TRANSLATE((*state)->state_key)) {
9017 		if (direction == PF_OUT) {
9018 			switch (pd->af) {
9019 #if INET
9020 			case AF_INET:
9021 				pf_change_a(&pd->src->v4addr.s_addr,
9022 				    pd->ip_sum,
9023 				    (*state)->state_key->gwy.addr.v4addr.s_addr,
9024 				    0);
9025 				break;
9026 #endif /* INET */
9027 			case AF_INET6:
9028 				PF_ACPY(pd->src,
9029 				    &(*state)->state_key->gwy.addr, pd->af);
9030 				break;
9031 			}
9032 		} else {
9033 			switch (pd->af) {
9034 #if INET
9035 			case AF_INET:
9036 				pf_change_a(&pd->dst->v4addr.s_addr,
9037 				    pd->ip_sum,
9038 				    (*state)->state_key->lan.addr.v4addr.s_addr,
9039 				    0);
9040 				break;
9041 #endif /* INET */
9042 			case AF_INET6:
9043 				PF_ACPY(pd->dst,
9044 				    &(*state)->state_key->lan.addr, pd->af);
9045 				break;
9046 			}
9047 		}
9048 	}
9049 
9050 	return PF_PASS;
9051 }
9052 
9053 /*
9054  * ipoff and off are measured from the start of the mbuf chain.
9055  * h must be at "ipoff" on the mbuf chain.
9056  */
9057 void *
pf_pull_hdr(pbuf_t * pbuf,int off,void * p,int len,u_short * actionp,u_short * reasonp,sa_family_t af)9058 pf_pull_hdr(pbuf_t *pbuf, int off, void *p, int len,
9059     u_short *actionp, u_short *reasonp, sa_family_t af)
9060 {
9061 	switch (af) {
9062 #if INET
9063 	case AF_INET: {
9064 		struct ip       *h = pbuf->pb_data;
9065 		u_int16_t        fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
9066 
9067 		if (fragoff) {
9068 			if (fragoff >= len) {
9069 				ACTION_SET(actionp, PF_PASS);
9070 			} else {
9071 				ACTION_SET(actionp, PF_DROP);
9072 				REASON_SET(reasonp, PFRES_FRAG);
9073 			}
9074 			return NULL;
9075 		}
9076 		if (pbuf->pb_packet_len < (unsigned)(off + len) ||
9077 		    ntohs(h->ip_len) < off + len) {
9078 			ACTION_SET(actionp, PF_DROP);
9079 			REASON_SET(reasonp, PFRES_SHORT);
9080 			return NULL;
9081 		}
9082 		break;
9083 	}
9084 #endif /* INET */
9085 	case AF_INET6: {
9086 		struct ip6_hdr  *h = pbuf->pb_data;
9087 
9088 		if (pbuf->pb_packet_len < (unsigned)(off + len) ||
9089 		    (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) <
9090 		    (unsigned)(off + len)) {
9091 			ACTION_SET(actionp, PF_DROP);
9092 			REASON_SET(reasonp, PFRES_SHORT);
9093 			return NULL;
9094 		}
9095 		break;
9096 	}
9097 	}
9098 	pbuf_copy_data(pbuf, off, len, p);
9099 	return p;
9100 }
9101 
9102 int
pf_routable(struct pf_addr * addr,sa_family_t af,struct pfi_kif * kif)9103 pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif)
9104 {
9105 #pragma unused(kif)
9106 	struct sockaddr_in      *dst;
9107 	int                      ret = 1;
9108 	struct sockaddr_in6     *dst6;
9109 	struct route_in6         ro;
9110 
9111 	bzero(&ro, sizeof(ro));
9112 	switch (af) {
9113 	case AF_INET:
9114 		dst = satosin(&ro.ro_dst);
9115 		dst->sin_family = AF_INET;
9116 		dst->sin_len = sizeof(*dst);
9117 		dst->sin_addr = addr->v4addr;
9118 		break;
9119 	case AF_INET6:
9120 		dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
9121 		dst6->sin6_family = AF_INET6;
9122 		dst6->sin6_len = sizeof(*dst6);
9123 		dst6->sin6_addr = addr->v6addr;
9124 		break;
9125 	default:
9126 		return 0;
9127 	}
9128 
9129 	/* XXX: IFT_ENC is not currently used by anything*/
9130 	/* Skip checks for ipsec interfaces */
9131 	if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC) {
9132 		goto out;
9133 	}
9134 
9135 	/* XXX: what is the point of this? */
9136 	rtalloc((struct route *)&ro);
9137 
9138 out:
9139 	ROUTE_RELEASE(&ro);
9140 	return ret;
9141 }
9142 
9143 int
pf_rtlabel_match(struct pf_addr * addr,sa_family_t af,struct pf_addr_wrap * aw)9144 pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw)
9145 {
9146 #pragma unused(aw)
9147 	struct sockaddr_in      *dst;
9148 	struct sockaddr_in6     *dst6;
9149 	struct route_in6         ro;
9150 	int                      ret = 0;
9151 
9152 	bzero(&ro, sizeof(ro));
9153 	switch (af) {
9154 	case AF_INET:
9155 		dst = satosin(&ro.ro_dst);
9156 		dst->sin_family = AF_INET;
9157 		dst->sin_len = sizeof(*dst);
9158 		dst->sin_addr = addr->v4addr;
9159 		break;
9160 	case AF_INET6:
9161 		dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
9162 		dst6->sin6_family = AF_INET6;
9163 		dst6->sin6_len = sizeof(*dst6);
9164 		dst6->sin6_addr = addr->v6addr;
9165 		break;
9166 	default:
9167 		return 0;
9168 	}
9169 
9170 	/* XXX: what is the point of this? */
9171 	rtalloc((struct route *)&ro);
9172 
9173 	ROUTE_RELEASE(&ro);
9174 
9175 	return ret;
9176 }
9177 
9178 #if INET
9179 static __attribute__((noinline)) void
pf_route(pbuf_t ** pbufp,struct pf_rule * r,int dir,struct ifnet * oifp,struct pf_state * s,struct pf_pdesc * pd)9180 pf_route(pbuf_t **pbufp, struct pf_rule *r, int dir, struct ifnet *oifp,
9181     struct pf_state *s, struct pf_pdesc *pd)
9182 {
9183 #pragma unused(pd)
9184 	struct mbuf             *m0, *m1;
9185 	struct route             iproute;
9186 	struct route            *ro = &iproute;
9187 	struct sockaddr_in      *dst;
9188 	struct ip               *ip;
9189 	struct ifnet            *ifp = NULL;
9190 	struct pf_addr           naddr;
9191 	struct pf_src_node      *sn = NULL;
9192 	int                      error = 0;
9193 	uint32_t                 sw_csum;
9194 	int                      interface_mtu = 0;
9195 	bzero(&iproute, sizeof(iproute));
9196 
9197 	if (pbufp == NULL || !pbuf_is_valid(*pbufp) || r == NULL ||
9198 	    (dir != PF_IN && dir != PF_OUT) || oifp == NULL) {
9199 		panic("pf_route: invalid parameters");
9200 	}
9201 
9202 	if (pd->pf_mtag->pftag_routed++ > 3) {
9203 		pbuf_destroy(*pbufp);
9204 		*pbufp = NULL;
9205 		m0 = NULL;
9206 		goto bad;
9207 	}
9208 
9209 	/*
9210 	 * Since this is something of an edge case and may involve the
9211 	 * host stack (for routing, at least for now), we convert the
9212 	 * incoming pbuf into an mbuf.
9213 	 */
9214 	if (r->rt == PF_DUPTO) {
9215 		m0 = pbuf_clone_to_mbuf(*pbufp);
9216 	} else if ((r->rt == PF_REPLYTO) == (r->direction == dir)) {
9217 		return;
9218 	} else {
9219 		/* We're going to consume this packet */
9220 		m0 = pbuf_to_mbuf(*pbufp, TRUE);
9221 		*pbufp = NULL;
9222 	}
9223 
9224 	if (m0 == NULL) {
9225 		goto bad;
9226 	}
9227 
9228 	/* We now have the packet in an mbuf (m0) */
9229 
9230 	if (m0->m_len < (int)sizeof(struct ip)) {
9231 		DPFPRINTF(PF_DEBUG_URGENT,
9232 		    ("pf_route: packet length < sizeof (struct ip)\n"));
9233 		goto bad;
9234 	}
9235 
9236 	ip = mtod(m0, struct ip *);
9237 
9238 	dst = satosin((void *)&ro->ro_dst);
9239 	dst->sin_family = AF_INET;
9240 	dst->sin_len = sizeof(*dst);
9241 	dst->sin_addr = ip->ip_dst;
9242 
9243 	if (r->rt == PF_FASTROUTE) {
9244 		rtalloc(ro);
9245 		if (ro->ro_rt == NULL) {
9246 			ipstat.ips_noroute++;
9247 			goto bad;
9248 		}
9249 
9250 		ifp = ro->ro_rt->rt_ifp;
9251 		RT_LOCK(ro->ro_rt);
9252 		ro->ro_rt->rt_use++;
9253 
9254 		if (ro->ro_rt->rt_flags & RTF_GATEWAY) {
9255 			dst = satosin((void *)ro->ro_rt->rt_gateway);
9256 		}
9257 		RT_UNLOCK(ro->ro_rt);
9258 	} else {
9259 		if (TAILQ_EMPTY(&r->rpool.list)) {
9260 			DPFPRINTF(PF_DEBUG_URGENT,
9261 			    ("pf_route: TAILQ_EMPTY(&r->rpool.list)\n"));
9262 			goto bad;
9263 		}
9264 		if (s == NULL) {
9265 			pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src,
9266 			    &naddr, NULL, &sn);
9267 			if (!PF_AZERO(&naddr, AF_INET)) {
9268 				dst->sin_addr.s_addr = naddr.v4addr.s_addr;
9269 			}
9270 			ifp = r->rpool.cur->kif ?
9271 			    r->rpool.cur->kif->pfik_ifp : NULL;
9272 		} else {
9273 			if (!PF_AZERO(&s->rt_addr, AF_INET)) {
9274 				dst->sin_addr.s_addr =
9275 				    s->rt_addr.v4addr.s_addr;
9276 			}
9277 			ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
9278 		}
9279 	}
9280 	if (ifp == NULL) {
9281 		goto bad;
9282 	}
9283 
9284 	if (oifp != ifp) {
9285 		if (pf_test_mbuf(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) {
9286 			goto bad;
9287 		} else if (m0 == NULL) {
9288 			goto done;
9289 		}
9290 		if (m0->m_len < (int)sizeof(struct ip)) {
9291 			DPFPRINTF(PF_DEBUG_URGENT,
9292 			    ("pf_route: packet length < sizeof (struct ip)\n"));
9293 			goto bad;
9294 		}
9295 		ip = mtod(m0, struct ip *);
9296 	}
9297 
9298 	/* Catch routing changes wrt. hardware checksumming for TCP or UDP. */
9299 	ip_output_checksum(ifp, m0, ((ip->ip_hl) << 2), ntohs(ip->ip_len),
9300 	    &sw_csum);
9301 
9302 	interface_mtu = ifp->if_mtu;
9303 
9304 	if (INTF_ADJUST_MTU_FOR_CLAT46(ifp)) {
9305 		interface_mtu = IN6_LINKMTU(ifp);
9306 		/* Further adjust the size for CLAT46 expansion */
9307 		interface_mtu -= CLAT46_HDR_EXPANSION_OVERHD;
9308 	}
9309 
9310 	if (ntohs(ip->ip_len) <= interface_mtu || TSO_IPV4_OK(ifp, m0) ||
9311 	    (!(ip->ip_off & htons(IP_DF)) &&
9312 	    (ifp->if_hwassist & CSUM_FRAGMENT))) {
9313 		ip->ip_sum = 0;
9314 		if (sw_csum & CSUM_DELAY_IP) {
9315 			ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
9316 			sw_csum &= ~CSUM_DELAY_IP;
9317 			m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_IP;
9318 		}
9319 		error = ifnet_output(ifp, PF_INET, m0, ro->ro_rt, sintosa(dst));
9320 		goto done;
9321 	}
9322 
9323 	/*
9324 	 * Too large for interface; fragment if possible.
9325 	 * Must be able to put at least 8 bytes per fragment.
9326 	 * Balk when DF bit is set or the interface didn't support TSO.
9327 	 */
9328 	if ((ip->ip_off & htons(IP_DF)) ||
9329 	    (m0->m_pkthdr.csum_flags & CSUM_TSO_IPV4)) {
9330 		ipstat.ips_cantfrag++;
9331 		if (r->rt != PF_DUPTO) {
9332 			icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0,
9333 			    interface_mtu);
9334 			goto done;
9335 		} else {
9336 			goto bad;
9337 		}
9338 	}
9339 
9340 	m1 = m0;
9341 
9342 	/* PR-8933605: send ip_len,ip_off to ip_fragment in host byte order */
9343 #if BYTE_ORDER != BIG_ENDIAN
9344 	NTOHS(ip->ip_off);
9345 	NTOHS(ip->ip_len);
9346 #endif
9347 	error = ip_fragment(m0, ifp, interface_mtu, sw_csum);
9348 
9349 	if (error) {
9350 		m0 = NULL;
9351 		goto bad;
9352 	}
9353 
9354 	for (m0 = m1; m0; m0 = m1) {
9355 		m1 = m0->m_nextpkt;
9356 		m0->m_nextpkt = 0;
9357 		if (error == 0) {
9358 			error = ifnet_output(ifp, PF_INET, m0, ro->ro_rt,
9359 			    sintosa(dst));
9360 		} else {
9361 			m_freem(m0);
9362 		}
9363 	}
9364 
9365 	if (error == 0) {
9366 		ipstat.ips_fragmented++;
9367 	}
9368 
9369 done:
9370 	ROUTE_RELEASE(&iproute);
9371 	return;
9372 
9373 bad:
9374 	if (m0) {
9375 		m_freem(m0);
9376 	}
9377 	goto done;
9378 }
9379 #endif /* INET */
9380 
9381 static __attribute__((noinline)) void
pf_route6(pbuf_t ** pbufp,struct pf_rule * r,int dir,struct ifnet * oifp,struct pf_state * s,struct pf_pdesc * pd)9382 pf_route6(pbuf_t **pbufp, struct pf_rule *r, int dir, struct ifnet *oifp,
9383     struct pf_state *s, struct pf_pdesc *pd)
9384 {
9385 #pragma unused(pd)
9386 	struct mbuf             *m0;
9387 	struct route_in6         ip6route;
9388 	struct route_in6        *ro;
9389 	struct sockaddr_in6     *dst;
9390 	struct ip6_hdr          *ip6;
9391 	struct ifnet            *ifp = NULL;
9392 	struct pf_addr           naddr;
9393 	struct pf_src_node      *sn = NULL;
9394 	int                      error = 0;
9395 	struct pf_mtag          *pf_mtag;
9396 
9397 	if (pbufp == NULL || !pbuf_is_valid(*pbufp) || r == NULL ||
9398 	    (dir != PF_IN && dir != PF_OUT) || oifp == NULL) {
9399 		panic("pf_route6: invalid parameters");
9400 	}
9401 
9402 	if (pd->pf_mtag->pftag_routed++ > 3) {
9403 		pbuf_destroy(*pbufp);
9404 		*pbufp = NULL;
9405 		m0 = NULL;
9406 		goto bad;
9407 	}
9408 
9409 	/*
9410 	 * Since this is something of an edge case and may involve the
9411 	 * host stack (for routing, at least for now), we convert the
9412 	 * incoming pbuf into an mbuf.
9413 	 */
9414 	if (r->rt == PF_DUPTO) {
9415 		m0 = pbuf_clone_to_mbuf(*pbufp);
9416 	} else if ((r->rt == PF_REPLYTO) == (r->direction == dir)) {
9417 		return;
9418 	} else {
9419 		/* We're about to consume this packet */
9420 		m0 = pbuf_to_mbuf(*pbufp, TRUE);
9421 		*pbufp = NULL;
9422 	}
9423 
9424 	if (m0 == NULL) {
9425 		goto bad;
9426 	}
9427 
9428 	if (m0->m_len < (int)sizeof(struct ip6_hdr)) {
9429 		DPFPRINTF(PF_DEBUG_URGENT,
9430 		    ("pf_route6: m0->m_len < sizeof (struct ip6_hdr)\n"));
9431 		goto bad;
9432 	}
9433 	ip6 = mtod(m0, struct ip6_hdr *);
9434 
9435 	ro = &ip6route;
9436 	bzero((caddr_t)ro, sizeof(*ro));
9437 	dst = (struct sockaddr_in6 *)&ro->ro_dst;
9438 	dst->sin6_family = AF_INET6;
9439 	dst->sin6_len = sizeof(*dst);
9440 	dst->sin6_addr = ip6->ip6_dst;
9441 
9442 	/* Cheat. XXX why only in the v6addr case??? */
9443 	if (r->rt == PF_FASTROUTE) {
9444 		pf_mtag = pf_get_mtag(m0);
9445 		ASSERT(pf_mtag != NULL);
9446 		pf_mtag->pftag_flags |= PF_TAG_GENERATED;
9447 		ip6_output_setsrcifscope(m0, oifp->if_index, NULL);
9448 		ip6_output_setdstifscope(m0, oifp->if_index, NULL);
9449 		ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
9450 		return;
9451 	}
9452 
9453 	if (TAILQ_EMPTY(&r->rpool.list)) {
9454 		DPFPRINTF(PF_DEBUG_URGENT,
9455 		    ("pf_route6: TAILQ_EMPTY(&r->rpool.list)\n"));
9456 		goto bad;
9457 	}
9458 	if (s == NULL) {
9459 		pf_map_addr(AF_INET6, r, (struct pf_addr *)(uintptr_t)&ip6->ip6_src,
9460 		    &naddr, NULL, &sn);
9461 		if (!PF_AZERO(&naddr, AF_INET6)) {
9462 			PF_ACPY((struct pf_addr *)&dst->sin6_addr,
9463 			    &naddr, AF_INET6);
9464 		}
9465 		ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL;
9466 	} else {
9467 		if (!PF_AZERO(&s->rt_addr, AF_INET6)) {
9468 			PF_ACPY((struct pf_addr *)&dst->sin6_addr,
9469 			    &s->rt_addr, AF_INET6);
9470 		}
9471 		ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
9472 	}
9473 	if (ifp == NULL) {
9474 		goto bad;
9475 	}
9476 
9477 	if (oifp != ifp) {
9478 		if (pf_test6_mbuf(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) {
9479 			goto bad;
9480 		} else if (m0 == NULL) {
9481 			goto done;
9482 		}
9483 		if (m0->m_len < (int)sizeof(struct ip6_hdr)) {
9484 			DPFPRINTF(PF_DEBUG_URGENT, ("pf_route6: m0->m_len "
9485 			    "< sizeof (struct ip6_hdr)\n"));
9486 			goto bad;
9487 		}
9488 		pf_mtag = pf_get_mtag(m0);
9489 		/*
9490 		 * send refragmented packets.
9491 		 */
9492 		if ((pf_mtag->pftag_flags & PF_TAG_REFRAGMENTED) != 0) {
9493 			pf_mtag->pftag_flags &= ~PF_TAG_REFRAGMENTED;
9494 			/*
9495 			 * nd6_output() frees packet chain in both success and
9496 			 * failure cases.
9497 			 */
9498 			error = nd6_output(ifp, ifp, m0, dst, NULL, NULL);
9499 			m0 = NULL;
9500 			if (error) {
9501 				DPFPRINTF(PF_DEBUG_URGENT, ("pf_route6:"
9502 				    "dropped refragmented packet\n"));
9503 			}
9504 			goto done;
9505 		}
9506 		ip6 = mtod(m0, struct ip6_hdr *);
9507 	}
9508 
9509 	/*
9510 	 * If the packet is too large for the outgoing interface,
9511 	 * send back an icmp6 error.
9512 	 */
9513 	if (in6_embedded_scope && IN6_IS_SCOPE_EMBED(&dst->sin6_addr)) {
9514 		dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index);
9515 	}
9516 	if ((unsigned)m0->m_pkthdr.len <= ifp->if_mtu) {
9517 		error = nd6_output(ifp, ifp, m0, dst, NULL, NULL);
9518 	} else {
9519 		in6_ifstat_inc(ifp, ifs6_in_toobig);
9520 		if (r->rt != PF_DUPTO) {
9521 			icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
9522 		} else {
9523 			goto bad;
9524 		}
9525 	}
9526 
9527 done:
9528 	return;
9529 
9530 bad:
9531 	if (m0) {
9532 		m_freem(m0);
9533 		m0 = NULL;
9534 	}
9535 	goto done;
9536 }
9537 
9538 
9539 /*
9540  * check protocol (tcp/udp/icmp/icmp6) checksum and set mbuf flag
9541  *   off is the offset where the protocol header starts
9542  *   len is the total length of protocol header plus payload
9543  * returns 0 when the checksum is valid, otherwise returns 1.
9544  */
9545 static int
pf_check_proto_cksum(pbuf_t * pbuf,int off,int len,u_int8_t p,sa_family_t af)9546 pf_check_proto_cksum(pbuf_t *pbuf, int off, int len, u_int8_t p,
9547     sa_family_t af)
9548 {
9549 	u_int16_t sum;
9550 
9551 	switch (p) {
9552 	case IPPROTO_TCP:
9553 	case IPPROTO_UDP:
9554 		/*
9555 		 * Optimize for the common case; if the hardware calculated
9556 		 * value doesn't include pseudo-header checksum, or if it
9557 		 * is partially-computed (only 16-bit summation), do it in
9558 		 * software below.
9559 		 */
9560 		if ((*pbuf->pb_csum_flags &
9561 		    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
9562 		    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR) &&
9563 		    (*pbuf->pb_csum_data ^ 0xffff) == 0) {
9564 			return 0;
9565 		}
9566 		break;
9567 	case IPPROTO_ICMP:
9568 	case IPPROTO_ICMPV6:
9569 		break;
9570 	default:
9571 		return 1;
9572 	}
9573 	if (off < (int)sizeof(struct ip) || len < (int)sizeof(struct udphdr)) {
9574 		return 1;
9575 	}
9576 	if (pbuf->pb_packet_len < (unsigned)(off + len)) {
9577 		return 1;
9578 	}
9579 	switch (af) {
9580 #if INET
9581 	case AF_INET:
9582 		if (p == IPPROTO_ICMP) {
9583 			if (pbuf->pb_contig_len < (unsigned)off) {
9584 				return 1;
9585 			}
9586 			sum = pbuf_inet_cksum(pbuf, 0, off, len);
9587 		} else {
9588 			if (pbuf->pb_contig_len < (int)sizeof(struct ip)) {
9589 				return 1;
9590 			}
9591 			sum = pbuf_inet_cksum(pbuf, p, off, len);
9592 		}
9593 		break;
9594 #endif /* INET */
9595 	case AF_INET6:
9596 		if (pbuf->pb_contig_len < (int)sizeof(struct ip6_hdr)) {
9597 			return 1;
9598 		}
9599 		sum = pbuf_inet6_cksum(pbuf, p, off, len);
9600 		break;
9601 	default:
9602 		return 1;
9603 	}
9604 	if (sum) {
9605 		switch (p) {
9606 		case IPPROTO_TCP:
9607 			tcpstat.tcps_rcvbadsum++;
9608 			break;
9609 		case IPPROTO_UDP:
9610 			udpstat.udps_badsum++;
9611 			break;
9612 		case IPPROTO_ICMP:
9613 			icmpstat.icps_checksum++;
9614 			break;
9615 		case IPPROTO_ICMPV6:
9616 			icmp6stat.icp6s_checksum++;
9617 			break;
9618 		}
9619 		return 1;
9620 	}
9621 	return 0;
9622 }
9623 
9624 #if INET
9625 #define PF_APPLE_UPDATE_PDESC_IPv4()                            \
9626 	do {                                                    \
9627 	        if (pbuf && pd.mp && pbuf != pd.mp) {           \
9628 	                pbuf = pd.mp;                           \
9629 	                h = pbuf->pb_data;                      \
9630 	                pd.pf_mtag = pf_get_mtag_pbuf(pbuf);            \
9631 	        }                                               \
9632 	} while (0)
9633 
9634 int
pf_test_mbuf(int dir,struct ifnet * ifp,struct mbuf ** m0,struct ether_header * eh,struct ip_fw_args * fwa)9635 pf_test_mbuf(int dir, struct ifnet *ifp, struct mbuf **m0,
9636     struct ether_header *eh, struct ip_fw_args *fwa)
9637 {
9638 	pbuf_t pbuf_store, *pbuf;
9639 	int rv;
9640 
9641 	pbuf_init_mbuf(&pbuf_store, *m0, (*m0)->m_pkthdr.rcvif);
9642 	pbuf = &pbuf_store;
9643 
9644 	rv = pf_test(dir, ifp, &pbuf, eh, fwa);
9645 
9646 	if (pbuf_is_valid(pbuf)) {
9647 		*m0 = pbuf->pb_mbuf;
9648 		pbuf->pb_mbuf = NULL;
9649 		pbuf_destroy(pbuf);
9650 	} else {
9651 		*m0 = NULL;
9652 	}
9653 
9654 	return rv;
9655 }
9656 
9657 static __attribute__((noinline)) int
pf_test(int dir,struct ifnet * ifp,pbuf_t ** pbufp,struct ether_header * eh,struct ip_fw_args * fwa)9658 pf_test(int dir, struct ifnet *ifp, pbuf_t **pbufp,
9659     struct ether_header *eh, struct ip_fw_args *fwa)
9660 {
9661 #if !DUMMYNET
9662 #pragma unused(fwa)
9663 #endif
9664 	struct pfi_kif          *kif;
9665 	u_short                  action = PF_PASS, reason = 0, log = 0;
9666 	pbuf_t                  *pbuf = *pbufp;
9667 	struct ip               *h = 0;
9668 	struct pf_rule          *a = NULL, *r = &pf_default_rule, *tr, *nr;
9669 	struct pf_state         *s = NULL;
9670 	struct pf_state_key     *sk = NULL;
9671 	struct pf_ruleset       *ruleset = NULL;
9672 	struct pf_pdesc          pd;
9673 	int                      off, dirndx, pqid = 0;
9674 
9675 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
9676 
9677 	if (!pf_status.running) {
9678 		return PF_PASS;
9679 	}
9680 
9681 	memset(&pd, 0, sizeof(pd));
9682 
9683 	if ((pd.pf_mtag = pf_get_mtag_pbuf(pbuf)) == NULL) {
9684 		DPFPRINTF(PF_DEBUG_URGENT,
9685 		    ("pf_test: pf_get_mtag_pbuf returned NULL\n"));
9686 		return PF_DROP;
9687 	}
9688 
9689 	if (pd.pf_mtag->pftag_flags & PF_TAG_GENERATED) {
9690 		return PF_PASS;
9691 	}
9692 
9693 	kif = (struct pfi_kif *)ifp->if_pf_kif;
9694 
9695 	if (kif == NULL) {
9696 		DPFPRINTF(PF_DEBUG_URGENT,
9697 		    ("pf_test: kif == NULL, if_name %s\n", ifp->if_name));
9698 		return PF_DROP;
9699 	}
9700 	if (kif->pfik_flags & PFI_IFLAG_SKIP) {
9701 		return PF_PASS;
9702 	}
9703 
9704 	if (pbuf->pb_packet_len < (int)sizeof(*h)) {
9705 		REASON_SET(&reason, PFRES_SHORT);
9706 		return PF_DROP;
9707 	}
9708 
9709 	/* initialize enough of pd for the done label */
9710 	h = pbuf->pb_data;
9711 	pd.mp = pbuf;
9712 	pd.lmw = 0;
9713 	pd.pf_mtag = pf_get_mtag_pbuf(pbuf);
9714 	pd.src = (struct pf_addr *)&h->ip_src;
9715 	pd.dst = (struct pf_addr *)&h->ip_dst;
9716 	PF_ACPY(&pd.baddr, pd.src, AF_INET);
9717 	PF_ACPY(&pd.bdaddr, pd.dst, AF_INET);
9718 	pd.ip_sum = &h->ip_sum;
9719 	pd.proto = h->ip_p;
9720 	pd.proto_variant = 0;
9721 	pd.af = AF_INET;
9722 	pd.tos = h->ip_tos;
9723 	pd.ttl = h->ip_ttl;
9724 	pd.tot_len = ntohs(h->ip_len);
9725 	pd.eh = eh;
9726 
9727 #if DUMMYNET
9728 	if (fwa != NULL && fwa->fwa_pf_rule != NULL) {
9729 		goto nonormalize;
9730 	}
9731 #endif /* DUMMYNET */
9732 
9733 	/* We do IP header normalization and packet reassembly here */
9734 	action = pf_normalize_ip(pbuf, dir, kif, &reason, &pd);
9735 	if (action != PF_PASS || pd.lmw < 0) {
9736 		action = PF_DROP;
9737 		goto done;
9738 	}
9739 
9740 #if DUMMYNET
9741 nonormalize:
9742 #endif /* DUMMYNET */
9743 	/* pf_normalize can mess with pb_data */
9744 	h = pbuf->pb_data;
9745 
9746 	off = h->ip_hl << 2;
9747 	if (off < (int)sizeof(*h)) {
9748 		action = PF_DROP;
9749 		REASON_SET(&reason, PFRES_SHORT);
9750 		log = 1;
9751 		goto done;
9752 	}
9753 
9754 	pd.src = (struct pf_addr *)&h->ip_src;
9755 	pd.dst = (struct pf_addr *)&h->ip_dst;
9756 	PF_ACPY(&pd.baddr, pd.src, AF_INET);
9757 	PF_ACPY(&pd.bdaddr, pd.dst, AF_INET);
9758 	pd.ip_sum = &h->ip_sum;
9759 	pd.proto = h->ip_p;
9760 	pd.proto_variant = 0;
9761 	pd.mp = pbuf;
9762 	pd.lmw = 0;
9763 	pd.pf_mtag = pf_get_mtag_pbuf(pbuf);
9764 	pd.af = AF_INET;
9765 	pd.tos = h->ip_tos;
9766 	pd.ttl = h->ip_ttl;
9767 	pd.sc = MBUF_SCIDX(pbuf_get_service_class(pbuf));
9768 	pd.tot_len = ntohs(h->ip_len);
9769 	pd.eh = eh;
9770 
9771 	if (*pbuf->pb_flags & PKTF_FLOW_ID) {
9772 		pd.flowsrc = *pbuf->pb_flowsrc;
9773 		pd.flowhash = *pbuf->pb_flowid;
9774 		pd.pktflags = *pbuf->pb_flags & PKTF_FLOW_MASK;
9775 	}
9776 
9777 	/* handle fragments that didn't get reassembled by normalization */
9778 	if (h->ip_off & htons(IP_MF | IP_OFFMASK)) {
9779 		pd.flags |= PFDESC_IP_FRAG;
9780 #if DUMMYNET
9781 		/* Traffic goes through dummynet first */
9782 		action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
9783 		if (action == PF_DROP || pbuf == NULL) {
9784 			*pbufp = NULL;
9785 			return action;
9786 		}
9787 #endif /* DUMMYNET */
9788 		action = pf_test_fragment(&r, dir, kif, pbuf, h,
9789 		    &pd, &a, &ruleset);
9790 		goto done;
9791 	}
9792 
9793 	switch (h->ip_p) {
9794 	case IPPROTO_TCP: {
9795 		struct tcphdr   th;
9796 		pd.hdr.tcp = &th;
9797 		if (!pf_pull_hdr(pbuf, off, &th, sizeof(th),
9798 		    &action, &reason, AF_INET)) {
9799 			log = action != PF_PASS;
9800 			goto done;
9801 		}
9802 		pd.p_len = pd.tot_len - off - (th.th_off << 2);
9803 		if ((th.th_flags & TH_ACK) && pd.p_len == 0) {
9804 			pqid = 1;
9805 		}
9806 #if DUMMYNET
9807 		/* Traffic goes through dummynet first */
9808 		action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
9809 		if (action == PF_DROP || pbuf == NULL) {
9810 			*pbufp = NULL;
9811 			return action;
9812 		}
9813 #endif /* DUMMYNET */
9814 		action = pf_normalize_tcp(dir, kif, pbuf, 0, off, h, &pd);
9815 		if (pd.lmw < 0) {
9816 			goto done;
9817 		}
9818 		PF_APPLE_UPDATE_PDESC_IPv4();
9819 		if (action == PF_DROP) {
9820 			goto done;
9821 		}
9822 		if (th.th_sport == 0 || th.th_dport == 0) {
9823 			action = PF_DROP;
9824 			REASON_SET(&reason, PFRES_INVPORT);
9825 			goto done;
9826 		}
9827 		action = pf_test_state_tcp(&s, dir, kif, pbuf, off, h, &pd,
9828 		    &reason);
9829 		if (action == PF_NAT64) {
9830 			goto done;
9831 		}
9832 		if (pd.lmw < 0) {
9833 			goto done;
9834 		}
9835 		PF_APPLE_UPDATE_PDESC_IPv4();
9836 		if (action == PF_PASS) {
9837 #if NPFSYNC
9838 			pfsync_update_state(s);
9839 #endif /* NPFSYNC */
9840 			r = s->rule.ptr;
9841 			a = s->anchor.ptr;
9842 			log = s->log;
9843 		} else if (s == NULL) {
9844 			action = pf_test_rule(&r, &s, dir, kif,
9845 			    pbuf, off, h, &pd, &a, &ruleset, NULL);
9846 		}
9847 		break;
9848 	}
9849 
9850 	case IPPROTO_UDP: {
9851 		struct udphdr   uh;
9852 
9853 		pd.hdr.udp = &uh;
9854 		if (!pf_pull_hdr(pbuf, off, &uh, sizeof(uh),
9855 		    &action, &reason, AF_INET)) {
9856 			log = action != PF_PASS;
9857 			goto done;
9858 		}
9859 		if (uh.uh_sport == 0 || uh.uh_dport == 0) {
9860 			action = PF_DROP;
9861 			REASON_SET(&reason, PFRES_INVPORT);
9862 			goto done;
9863 		}
9864 		if (ntohs(uh.uh_ulen) > pbuf->pb_packet_len - off ||
9865 		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
9866 			action = PF_DROP;
9867 			REASON_SET(&reason, PFRES_SHORT);
9868 			goto done;
9869 		}
9870 #if DUMMYNET
9871 		/* Traffic goes through dummynet first */
9872 		action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
9873 		if (action == PF_DROP || pbuf == NULL) {
9874 			*pbufp = NULL;
9875 			return action;
9876 		}
9877 #endif /* DUMMYNET */
9878 		action = pf_test_state_udp(&s, dir, kif, pbuf, off, h, &pd,
9879 		    &reason);
9880 		if (action == PF_NAT64) {
9881 			goto done;
9882 		}
9883 		if (pd.lmw < 0) {
9884 			goto done;
9885 		}
9886 		PF_APPLE_UPDATE_PDESC_IPv4();
9887 		if (action == PF_PASS) {
9888 #if NPFSYNC
9889 			pfsync_update_state(s);
9890 #endif /* NPFSYNC */
9891 			r = s->rule.ptr;
9892 			a = s->anchor.ptr;
9893 			log = s->log;
9894 		} else if (s == NULL) {
9895 			action = pf_test_rule(&r, &s, dir, kif,
9896 			    pbuf, off, h, &pd, &a, &ruleset, NULL);
9897 		}
9898 		break;
9899 	}
9900 
9901 	case IPPROTO_ICMP: {
9902 		struct icmp     ih;
9903 
9904 		pd.hdr.icmp = &ih;
9905 		if (!pf_pull_hdr(pbuf, off, &ih, ICMP_MINLEN,
9906 		    &action, &reason, AF_INET)) {
9907 			log = action != PF_PASS;
9908 			goto done;
9909 		}
9910 #if DUMMYNET
9911 		/* Traffic goes through dummynet first */
9912 		action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
9913 		if (action == PF_DROP || pbuf == NULL) {
9914 			*pbufp = NULL;
9915 			return action;
9916 		}
9917 #endif /* DUMMYNET */
9918 		action = pf_test_state_icmp(&s, dir, kif, pbuf, off, h, &pd,
9919 		    &reason);
9920 
9921 		if (action == PF_NAT64) {
9922 			goto done;
9923 		}
9924 		if (pd.lmw < 0) {
9925 			goto done;
9926 		}
9927 		PF_APPLE_UPDATE_PDESC_IPv4();
9928 		if (action == PF_PASS) {
9929 #if NPFSYNC
9930 			pfsync_update_state(s);
9931 #endif /* NPFSYNC */
9932 			r = s->rule.ptr;
9933 			a = s->anchor.ptr;
9934 			log = s->log;
9935 		} else if (s == NULL) {
9936 			action = pf_test_rule(&r, &s, dir, kif,
9937 			    pbuf, off, h, &pd, &a, &ruleset, NULL);
9938 		}
9939 		break;
9940 	}
9941 
9942 	case IPPROTO_ESP: {
9943 		struct pf_esp_hdr       esp;
9944 
9945 		pd.hdr.esp = &esp;
9946 		if (!pf_pull_hdr(pbuf, off, &esp, sizeof(esp), &action, &reason,
9947 		    AF_INET)) {
9948 			log = action != PF_PASS;
9949 			goto done;
9950 		}
9951 #if DUMMYNET
9952 		/* Traffic goes through dummynet first */
9953 		action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
9954 		if (action == PF_DROP || pbuf == NULL) {
9955 			*pbufp = NULL;
9956 			return action;
9957 		}
9958 #endif /* DUMMYNET */
9959 		action = pf_test_state_esp(&s, dir, kif, off, &pd);
9960 		if (pd.lmw < 0) {
9961 			goto done;
9962 		}
9963 		PF_APPLE_UPDATE_PDESC_IPv4();
9964 		if (action == PF_PASS) {
9965 #if NPFSYNC
9966 			pfsync_update_state(s);
9967 #endif /* NPFSYNC */
9968 			r = s->rule.ptr;
9969 			a = s->anchor.ptr;
9970 			log = s->log;
9971 		} else if (s == NULL) {
9972 			action = pf_test_rule(&r, &s, dir, kif,
9973 			    pbuf, off, h, &pd, &a, &ruleset, NULL);
9974 		}
9975 		break;
9976 	}
9977 
9978 	case IPPROTO_GRE: {
9979 		struct pf_grev1_hdr     grev1;
9980 		pd.hdr.grev1 = &grev1;
9981 		if (!pf_pull_hdr(pbuf, off, &grev1, sizeof(grev1), &action,
9982 		    &reason, AF_INET)) {
9983 			log = (action != PF_PASS);
9984 			goto done;
9985 		}
9986 #if DUMMYNET
9987 		/* Traffic goes through dummynet first */
9988 		action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
9989 		if (action == PF_DROP || pbuf == NULL) {
9990 			*pbufp = NULL;
9991 			return action;
9992 		}
9993 #endif /* DUMMYNET */
9994 		if ((ntohs(grev1.flags) & PF_GRE_FLAG_VERSION_MASK) == 1 &&
9995 		    ntohs(grev1.protocol_type) == PF_GRE_PPP_ETHERTYPE) {
9996 			if (ntohs(grev1.payload_length) >
9997 			    pbuf->pb_packet_len - off) {
9998 				action = PF_DROP;
9999 				REASON_SET(&reason, PFRES_SHORT);
10000 				goto done;
10001 			}
10002 			pd.proto_variant = PF_GRE_PPTP_VARIANT;
10003 			action = pf_test_state_grev1(&s, dir, kif, off, &pd);
10004 			if (pd.lmw < 0) {
10005 				goto done;
10006 			}
10007 			PF_APPLE_UPDATE_PDESC_IPv4();
10008 			if (action == PF_PASS) {
10009 #if NPFSYNC
10010 				pfsync_update_state(s);
10011 #endif /* NPFSYNC */
10012 				r = s->rule.ptr;
10013 				a = s->anchor.ptr;
10014 				log = s->log;
10015 				break;
10016 			} else if (s == NULL) {
10017 				action = pf_test_rule(&r, &s, dir, kif, pbuf,
10018 				    off, h, &pd, &a, &ruleset, NULL);
10019 				if (action == PF_PASS) {
10020 					break;
10021 				}
10022 			}
10023 		}
10024 
10025 		/* not GREv1/PPTP, so treat as ordinary GRE... */
10026 		OS_FALLTHROUGH;
10027 	}
10028 
10029 	default:
10030 #if DUMMYNET
10031 		/* Traffic goes through dummynet first */
10032 		action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10033 		if (action == PF_DROP || pbuf == NULL) {
10034 			*pbufp = NULL;
10035 			return action;
10036 		}
10037 #endif /* DUMMYNET */
10038 		action = pf_test_state_other(&s, dir, kif, &pd);
10039 		if (pd.lmw < 0) {
10040 			goto done;
10041 		}
10042 		PF_APPLE_UPDATE_PDESC_IPv4();
10043 		if (action == PF_PASS) {
10044 #if NPFSYNC
10045 			pfsync_update_state(s);
10046 #endif /* NPFSYNC */
10047 			r = s->rule.ptr;
10048 			a = s->anchor.ptr;
10049 			log = s->log;
10050 		} else if (s == NULL) {
10051 			action = pf_test_rule(&r, &s, dir, kif, pbuf, off, h,
10052 			    &pd, &a, &ruleset, NULL);
10053 		}
10054 		break;
10055 	}
10056 
10057 done:
10058 	if (action == PF_NAT64) {
10059 		*pbufp = NULL;
10060 		return action;
10061 	}
10062 
10063 	*pbufp = pd.mp;
10064 	PF_APPLE_UPDATE_PDESC_IPv4();
10065 
10066 	if (action != PF_DROP) {
10067 		if (action == PF_PASS && h->ip_hl > 5 &&
10068 		    !((s && s->allow_opts) || r->allow_opts)) {
10069 			action = PF_DROP;
10070 			REASON_SET(&reason, PFRES_IPOPTIONS);
10071 			log = 1;
10072 			DPFPRINTF(PF_DEBUG_MISC,
10073 			    ("pf: dropping packet with ip options [hlen=%u]\n",
10074 			    (unsigned int) h->ip_hl));
10075 		}
10076 
10077 		if ((s && s->tag) || PF_RTABLEID_IS_VALID(r->rtableid) ||
10078 		    (pd.pktflags & PKTF_FLOW_ID)) {
10079 			(void) pf_tag_packet(pbuf, pd.pf_mtag, s ? s->tag : 0,
10080 			    r->rtableid, &pd);
10081 		}
10082 
10083 		if (action == PF_PASS) {
10084 #if PF_ECN
10085 			/* add hints for ecn */
10086 			pd.pf_mtag->pftag_hdr = h;
10087 			/* record address family */
10088 			pd.pf_mtag->pftag_flags &= ~PF_TAG_HDR_INET6;
10089 			pd.pf_mtag->pftag_flags |= PF_TAG_HDR_INET;
10090 #endif /* PF_ECN */
10091 			/* record protocol */
10092 			*pbuf->pb_proto = pd.proto;
10093 
10094 			/*
10095 			 * connections redirected to loopback should not match sockets
10096 			 * bound specifically to loopback due to security implications,
10097 			 * see tcp_input() and in_pcblookup_listen().
10098 			 */
10099 			if (dir == PF_IN && (pd.proto == IPPROTO_TCP ||
10100 			    pd.proto == IPPROTO_UDP) && s != NULL &&
10101 			    s->nat_rule.ptr != NULL &&
10102 			    (s->nat_rule.ptr->action == PF_RDR ||
10103 			    s->nat_rule.ptr->action == PF_BINAT) &&
10104 			    (ntohl(pd.dst->v4addr.s_addr) >> IN_CLASSA_NSHIFT)
10105 			    == IN_LOOPBACKNET) {
10106 				pd.pf_mtag->pftag_flags |= PF_TAG_TRANSLATE_LOCALHOST;
10107 			}
10108 		}
10109 	}
10110 
10111 	if (log) {
10112 		struct pf_rule *lr;
10113 
10114 		if (s != NULL && s->nat_rule.ptr != NULL &&
10115 		    s->nat_rule.ptr->log & PF_LOG_ALL) {
10116 			lr = s->nat_rule.ptr;
10117 		} else {
10118 			lr = r;
10119 		}
10120 		PFLOG_PACKET(kif, h, pbuf, AF_INET, dir, reason, lr, a, ruleset,
10121 		    &pd);
10122 	}
10123 
10124 	kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
10125 	kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS]++;
10126 
10127 	if (action == PF_PASS || r->action == PF_DROP) {
10128 		dirndx = (dir == PF_OUT);
10129 		r->packets[dirndx]++;
10130 		r->bytes[dirndx] += pd.tot_len;
10131 		if (a != NULL) {
10132 			a->packets[dirndx]++;
10133 			a->bytes[dirndx] += pd.tot_len;
10134 		}
10135 		if (s != NULL) {
10136 			sk = s->state_key;
10137 			if (s->nat_rule.ptr != NULL) {
10138 				s->nat_rule.ptr->packets[dirndx]++;
10139 				s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
10140 			}
10141 			if (s->src_node != NULL) {
10142 				s->src_node->packets[dirndx]++;
10143 				s->src_node->bytes[dirndx] += pd.tot_len;
10144 			}
10145 			if (s->nat_src_node != NULL) {
10146 				s->nat_src_node->packets[dirndx]++;
10147 				s->nat_src_node->bytes[dirndx] += pd.tot_len;
10148 			}
10149 			dirndx = (dir == sk->direction) ? 0 : 1;
10150 			s->packets[dirndx]++;
10151 			s->bytes[dirndx] += pd.tot_len;
10152 		}
10153 		tr = r;
10154 		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
10155 		if (nr != NULL) {
10156 			struct pf_addr *x;
10157 			/*
10158 			 * XXX: we need to make sure that the addresses
10159 			 * passed to pfr_update_stats() are the same than
10160 			 * the addresses used during matching (pfr_match)
10161 			 */
10162 			if (r == &pf_default_rule) {
10163 				tr = nr;
10164 				x = (sk == NULL || sk->direction == dir) ?
10165 				    &pd.baddr : &pd.naddr;
10166 			} else {
10167 				x = (sk == NULL || sk->direction == dir) ?
10168 				    &pd.naddr : &pd.baddr;
10169 			}
10170 			if (x == &pd.baddr || s == NULL) {
10171 				/* we need to change the address */
10172 				if (dir == PF_OUT) {
10173 					pd.src = x;
10174 				} else {
10175 					pd.dst = x;
10176 				}
10177 			}
10178 		}
10179 		if (tr->src.addr.type == PF_ADDR_TABLE) {
10180 			pfr_update_stats(tr->src.addr.p.tbl, (sk == NULL ||
10181 			    sk->direction == dir) ?
10182 			    pd.src : pd.dst, pd.af,
10183 			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
10184 			    tr->src.neg);
10185 		}
10186 		if (tr->dst.addr.type == PF_ADDR_TABLE) {
10187 			pfr_update_stats(tr->dst.addr.p.tbl, (sk == NULL ||
10188 			    sk->direction == dir) ? pd.dst : pd.src, pd.af,
10189 			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
10190 			    tr->dst.neg);
10191 		}
10192 	}
10193 
10194 	VERIFY(pbuf == NULL || pd.mp == NULL || pd.mp == pbuf);
10195 
10196 	if (*pbufp) {
10197 		if (pd.lmw < 0) {
10198 			REASON_SET(&reason, PFRES_MEMORY);
10199 			action = PF_DROP;
10200 		}
10201 
10202 		if (action == PF_DROP) {
10203 			pbuf_destroy(*pbufp);
10204 			*pbufp = NULL;
10205 			return PF_DROP;
10206 		}
10207 
10208 		*pbufp = pbuf;
10209 	}
10210 
10211 	if (action == PF_SYNPROXY_DROP) {
10212 		pbuf_destroy(*pbufp);
10213 		*pbufp = NULL;
10214 		action = PF_PASS;
10215 	} else if (r->rt) {
10216 		/* pf_route can free the pbuf causing *pbufp to become NULL */
10217 		pf_route(pbufp, r, dir, kif->pfik_ifp, s, &pd);
10218 	}
10219 
10220 	return action;
10221 }
10222 #endif /* INET */
10223 
10224 #define PF_APPLE_UPDATE_PDESC_IPv6()                            \
10225 	do {                                                    \
10226 	        if (pbuf && pd.mp && pbuf != pd.mp) {           \
10227 	                pbuf = pd.mp;                           \
10228 	        }                                               \
10229 	        h = pbuf->pb_data;                              \
10230 	} while (0)
10231 
10232 int
pf_test6_mbuf(int dir,struct ifnet * ifp,struct mbuf ** m0,struct ether_header * eh,struct ip_fw_args * fwa)10233 pf_test6_mbuf(int dir, struct ifnet *ifp, struct mbuf **m0,
10234     struct ether_header *eh, struct ip_fw_args *fwa)
10235 {
10236 	pbuf_t pbuf_store, *pbuf;
10237 	int rv;
10238 
10239 	pbuf_init_mbuf(&pbuf_store, *m0, (*m0)->m_pkthdr.rcvif);
10240 	pbuf = &pbuf_store;
10241 
10242 	rv = pf_test6(dir, ifp, &pbuf, eh, fwa);
10243 
10244 	if (pbuf_is_valid(pbuf)) {
10245 		*m0 = pbuf->pb_mbuf;
10246 		pbuf->pb_mbuf = NULL;
10247 		pbuf_destroy(pbuf);
10248 	} else {
10249 		*m0 = NULL;
10250 	}
10251 
10252 	return rv;
10253 }
10254 
10255 static __attribute__((noinline)) int
pf_test6(int dir,struct ifnet * ifp,pbuf_t ** pbufp,struct ether_header * eh,struct ip_fw_args * fwa)10256 pf_test6(int dir, struct ifnet *ifp, pbuf_t **pbufp,
10257     struct ether_header *eh, struct ip_fw_args *fwa)
10258 {
10259 #if !DUMMYNET
10260 #pragma unused(fwa)
10261 #endif
10262 	struct pfi_kif          *kif;
10263 	u_short                  action = PF_PASS, reason = 0, log = 0;
10264 	pbuf_t                  *pbuf = *pbufp;
10265 	struct ip6_hdr          *h;
10266 	struct pf_rule          *a = NULL, *r = &pf_default_rule, *tr, *nr;
10267 	struct pf_state         *s = NULL;
10268 	struct pf_state_key     *sk = NULL;
10269 	struct pf_ruleset       *ruleset = NULL;
10270 	struct pf_pdesc          pd;
10271 	int                      off, terminal = 0, dirndx, rh_cnt = 0;
10272 	u_int8_t                 nxt;
10273 	boolean_t                fwd = FALSE;
10274 
10275 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
10276 
10277 	ASSERT(ifp != NULL);
10278 	if ((dir == PF_OUT) && (pbuf->pb_ifp) && (ifp != pbuf->pb_ifp)) {
10279 		fwd = TRUE;
10280 	}
10281 
10282 	if (!pf_status.running) {
10283 		return PF_PASS;
10284 	}
10285 
10286 	memset(&pd, 0, sizeof(pd));
10287 
10288 	if ((pd.pf_mtag = pf_get_mtag_pbuf(pbuf)) == NULL) {
10289 		DPFPRINTF(PF_DEBUG_URGENT,
10290 		    ("pf_test6: pf_get_mtag_pbuf returned NULL\n"));
10291 		return PF_DROP;
10292 	}
10293 
10294 	if (pd.pf_mtag->pftag_flags & PF_TAG_GENERATED) {
10295 		return PF_PASS;
10296 	}
10297 
10298 	kif = (struct pfi_kif *)ifp->if_pf_kif;
10299 
10300 	if (kif == NULL) {
10301 		DPFPRINTF(PF_DEBUG_URGENT,
10302 		    ("pf_test6: kif == NULL, if_name %s\n", ifp->if_name));
10303 		return PF_DROP;
10304 	}
10305 	if (kif->pfik_flags & PFI_IFLAG_SKIP) {
10306 		return PF_PASS;
10307 	}
10308 
10309 	if (pbuf->pb_packet_len < (int)sizeof(*h)) {
10310 		REASON_SET(&reason, PFRES_SHORT);
10311 		return PF_DROP;
10312 	}
10313 
10314 	h = pbuf->pb_data;
10315 	nxt = h->ip6_nxt;
10316 	off = ((caddr_t)h - (caddr_t)pbuf->pb_data) + sizeof(struct ip6_hdr);
10317 	pd.mp = pbuf;
10318 	pd.lmw = 0;
10319 	pd.pf_mtag = pf_get_mtag_pbuf(pbuf);
10320 	pd.src = (struct pf_addr *)(uintptr_t)&h->ip6_src;
10321 	pd.dst = (struct pf_addr *)(uintptr_t)&h->ip6_dst;
10322 	PF_ACPY(&pd.baddr, pd.src, AF_INET6);
10323 	PF_ACPY(&pd.bdaddr, pd.dst, AF_INET6);
10324 	pd.ip_sum = NULL;
10325 	pd.af = AF_INET6;
10326 	pd.proto = nxt;
10327 	pd.proto_variant = 0;
10328 	pd.tos = 0;
10329 	pd.ttl = h->ip6_hlim;
10330 	pd.sc = MBUF_SCIDX(pbuf_get_service_class(pbuf));
10331 	pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
10332 	pd.eh = eh;
10333 
10334 	if (*pbuf->pb_flags & PKTF_FLOW_ID) {
10335 		pd.flowsrc = *pbuf->pb_flowsrc;
10336 		pd.flowhash = *pbuf->pb_flowid;
10337 		pd.pktflags = (*pbuf->pb_flags & PKTF_FLOW_MASK);
10338 	}
10339 
10340 #if DUMMYNET
10341 	if (fwa != NULL && fwa->fwa_pf_rule != NULL) {
10342 		goto nonormalize;
10343 	}
10344 #endif /* DUMMYNET */
10345 
10346 	/* We do IP header normalization and packet reassembly here */
10347 	action = pf_normalize_ip6(pbuf, dir, kif, &reason, &pd);
10348 	if (action != PF_PASS || pd.lmw < 0) {
10349 		action = PF_DROP;
10350 		goto done;
10351 	}
10352 
10353 #if DUMMYNET
10354 nonormalize:
10355 #endif /* DUMMYNET */
10356 	h = pbuf->pb_data;
10357 
10358 	/*
10359 	 * we do not support jumbogram yet.  if we keep going, zero ip6_plen
10360 	 * will do something bad, so drop the packet for now.
10361 	 */
10362 	if (htons(h->ip6_plen) == 0) {
10363 		action = PF_DROP;
10364 		REASON_SET(&reason, PFRES_NORM);        /*XXX*/
10365 		goto done;
10366 	}
10367 	pd.src = (struct pf_addr *)(uintptr_t)&h->ip6_src;
10368 	pd.dst = (struct pf_addr *)(uintptr_t)&h->ip6_dst;
10369 	PF_ACPY(&pd.baddr, pd.src, AF_INET6);
10370 	PF_ACPY(&pd.bdaddr, pd.dst, AF_INET6);
10371 	pd.ip_sum = NULL;
10372 	pd.af = AF_INET6;
10373 	pd.tos = 0;
10374 	pd.ttl = h->ip6_hlim;
10375 	pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
10376 	pd.eh = eh;
10377 
10378 	off = ((caddr_t)h - (caddr_t)pbuf->pb_data) + sizeof(struct ip6_hdr);
10379 	pd.proto = h->ip6_nxt;
10380 	pd.proto_variant = 0;
10381 	pd.mp = pbuf;
10382 	pd.lmw = 0;
10383 	pd.pf_mtag = pf_get_mtag_pbuf(pbuf);
10384 
10385 	do {
10386 		switch (pd.proto) {
10387 		case IPPROTO_FRAGMENT: {
10388 			struct ip6_frag ip6f;
10389 
10390 			pd.flags |= PFDESC_IP_FRAG;
10391 			if (!pf_pull_hdr(pbuf, off, &ip6f, sizeof ip6f, NULL,
10392 			    &reason, pd.af)) {
10393 				DPFPRINTF(PF_DEBUG_MISC,
10394 				    ("pf: IPv6 short fragment header\n"));
10395 				action = PF_DROP;
10396 				REASON_SET(&reason, PFRES_SHORT);
10397 				log = 1;
10398 				goto done;
10399 			}
10400 			pd.proto = ip6f.ip6f_nxt;
10401 #if DUMMYNET
10402 			/* Traffic goes through dummynet first */
10403 			action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd,
10404 			    fwa);
10405 			if (action == PF_DROP || pbuf == NULL) {
10406 				*pbufp = NULL;
10407 				return action;
10408 			}
10409 #endif /* DUMMYNET */
10410 			action = pf_test_fragment(&r, dir, kif, pbuf, h, &pd,
10411 			    &a, &ruleset);
10412 			if (action == PF_DROP) {
10413 				REASON_SET(&reason, PFRES_FRAG);
10414 				log = 1;
10415 			}
10416 			goto done;
10417 		}
10418 		case IPPROTO_ROUTING:
10419 			++rh_cnt;
10420 			OS_FALLTHROUGH;
10421 
10422 		case IPPROTO_AH:
10423 		case IPPROTO_HOPOPTS:
10424 		case IPPROTO_DSTOPTS: {
10425 			/* get next header and header length */
10426 			struct ip6_ext  opt6;
10427 
10428 			if (!pf_pull_hdr(pbuf, off, &opt6, sizeof(opt6),
10429 			    NULL, &reason, pd.af)) {
10430 				DPFPRINTF(PF_DEBUG_MISC,
10431 				    ("pf: IPv6 short opt\n"));
10432 				action = PF_DROP;
10433 				log = 1;
10434 				goto done;
10435 			}
10436 			if (pd.proto == IPPROTO_AH) {
10437 				off += (opt6.ip6e_len + 2) * 4;
10438 			} else {
10439 				off += (opt6.ip6e_len + 1) * 8;
10440 			}
10441 			pd.proto = opt6.ip6e_nxt;
10442 			/* goto the next header */
10443 			break;
10444 		}
10445 		default:
10446 			terminal++;
10447 			break;
10448 		}
10449 	} while (!terminal);
10450 
10451 
10452 	switch (pd.proto) {
10453 	case IPPROTO_TCP: {
10454 		struct tcphdr   th;
10455 
10456 		pd.hdr.tcp = &th;
10457 		if (!pf_pull_hdr(pbuf, off, &th, sizeof(th),
10458 		    &action, &reason, AF_INET6)) {
10459 			log = action != PF_PASS;
10460 			goto done;
10461 		}
10462 		pd.p_len = pd.tot_len - off - (th.th_off << 2);
10463 #if DUMMYNET
10464 		/* Traffic goes through dummynet first */
10465 		action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10466 		if (action == PF_DROP || pbuf == NULL) {
10467 			*pbufp = NULL;
10468 			return action;
10469 		}
10470 #endif /* DUMMYNET */
10471 		action = pf_normalize_tcp(dir, kif, pbuf, 0, off, h, &pd);
10472 		if (pd.lmw < 0) {
10473 			goto done;
10474 		}
10475 		PF_APPLE_UPDATE_PDESC_IPv6();
10476 		if (action == PF_DROP) {
10477 			goto done;
10478 		}
10479 		if (th.th_sport == 0 || th.th_dport == 0) {
10480 			action = PF_DROP;
10481 			REASON_SET(&reason, PFRES_INVPORT);
10482 			goto done;
10483 		}
10484 		action = pf_test_state_tcp(&s, dir, kif, pbuf, off, h, &pd,
10485 		    &reason);
10486 		if (action == PF_NAT64) {
10487 			goto done;
10488 		}
10489 		if (pd.lmw < 0) {
10490 			goto done;
10491 		}
10492 		PF_APPLE_UPDATE_PDESC_IPv6();
10493 		if (action == PF_PASS) {
10494 #if NPFSYNC
10495 			pfsync_update_state(s);
10496 #endif /* NPFSYNC */
10497 			r = s->rule.ptr;
10498 			a = s->anchor.ptr;
10499 			log = s->log;
10500 		} else if (s == NULL) {
10501 			action = pf_test_rule(&r, &s, dir, kif,
10502 			    pbuf, off, h, &pd, &a, &ruleset, NULL);
10503 		}
10504 		break;
10505 	}
10506 
10507 	case IPPROTO_UDP: {
10508 		struct udphdr   uh;
10509 
10510 		pd.hdr.udp = &uh;
10511 		if (!pf_pull_hdr(pbuf, off, &uh, sizeof(uh),
10512 		    &action, &reason, AF_INET6)) {
10513 			log = action != PF_PASS;
10514 			goto done;
10515 		}
10516 		if (uh.uh_sport == 0 || uh.uh_dport == 0) {
10517 			action = PF_DROP;
10518 			REASON_SET(&reason, PFRES_INVPORT);
10519 			goto done;
10520 		}
10521 		if (ntohs(uh.uh_ulen) > pbuf->pb_packet_len - off ||
10522 		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
10523 			action = PF_DROP;
10524 			REASON_SET(&reason, PFRES_SHORT);
10525 			goto done;
10526 		}
10527 #if DUMMYNET
10528 		/* Traffic goes through dummynet first */
10529 		action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10530 		if (action == PF_DROP || pbuf == NULL) {
10531 			*pbufp = NULL;
10532 			return action;
10533 		}
10534 #endif /* DUMMYNET */
10535 		action = pf_test_state_udp(&s, dir, kif, pbuf, off, h, &pd,
10536 		    &reason);
10537 		if (action == PF_NAT64) {
10538 			goto done;
10539 		}
10540 		if (pd.lmw < 0) {
10541 			goto done;
10542 		}
10543 		PF_APPLE_UPDATE_PDESC_IPv6();
10544 		if (action == PF_PASS) {
10545 #if NPFSYNC
10546 			pfsync_update_state(s);
10547 #endif /* NPFSYNC */
10548 			r = s->rule.ptr;
10549 			a = s->anchor.ptr;
10550 			log = s->log;
10551 		} else if (s == NULL) {
10552 			action = pf_test_rule(&r, &s, dir, kif,
10553 			    pbuf, off, h, &pd, &a, &ruleset, NULL);
10554 		}
10555 		break;
10556 	}
10557 
10558 	case IPPROTO_ICMPV6: {
10559 		struct icmp6_hdr        ih;
10560 
10561 		pd.hdr.icmp6 = &ih;
10562 		if (!pf_pull_hdr(pbuf, off, &ih, sizeof(ih),
10563 		    &action, &reason, AF_INET6)) {
10564 			log = action != PF_PASS;
10565 			goto done;
10566 		}
10567 #if DUMMYNET
10568 		/* Traffic goes through dummynet first */
10569 		action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10570 		if (action == PF_DROP || pbuf == NULL) {
10571 			*pbufp = NULL;
10572 			return action;
10573 		}
10574 #endif /* DUMMYNET */
10575 		action = pf_test_state_icmp(&s, dir, kif,
10576 		    pbuf, off, h, &pd, &reason);
10577 		if (action == PF_NAT64) {
10578 			goto done;
10579 		}
10580 		if (pd.lmw < 0) {
10581 			goto done;
10582 		}
10583 		PF_APPLE_UPDATE_PDESC_IPv6();
10584 		if (action == PF_PASS) {
10585 #if NPFSYNC
10586 			pfsync_update_state(s);
10587 #endif /* NPFSYNC */
10588 			r = s->rule.ptr;
10589 			a = s->anchor.ptr;
10590 			log = s->log;
10591 		} else if (s == NULL) {
10592 			action = pf_test_rule(&r, &s, dir, kif,
10593 			    pbuf, off, h, &pd, &a, &ruleset, NULL);
10594 		}
10595 		break;
10596 	}
10597 
10598 	case IPPROTO_ESP: {
10599 		struct pf_esp_hdr       esp;
10600 
10601 		pd.hdr.esp = &esp;
10602 		if (!pf_pull_hdr(pbuf, off, &esp, sizeof(esp), &action,
10603 		    &reason, AF_INET6)) {
10604 			log = action != PF_PASS;
10605 			goto done;
10606 		}
10607 #if DUMMYNET
10608 		/* Traffic goes through dummynet first */
10609 		action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10610 		if (action == PF_DROP || pbuf == NULL) {
10611 			*pbufp = NULL;
10612 			return action;
10613 		}
10614 #endif /* DUMMYNET */
10615 		action = pf_test_state_esp(&s, dir, kif, off, &pd);
10616 		if (pd.lmw < 0) {
10617 			goto done;
10618 		}
10619 		PF_APPLE_UPDATE_PDESC_IPv6();
10620 		if (action == PF_PASS) {
10621 #if NPFSYNC
10622 			pfsync_update_state(s);
10623 #endif /* NPFSYNC */
10624 			r = s->rule.ptr;
10625 			a = s->anchor.ptr;
10626 			log = s->log;
10627 		} else if (s == NULL) {
10628 			action = pf_test_rule(&r, &s, dir, kif,
10629 			    pbuf, off, h, &pd, &a, &ruleset, NULL);
10630 		}
10631 		break;
10632 	}
10633 
10634 	case IPPROTO_GRE: {
10635 		struct pf_grev1_hdr     grev1;
10636 
10637 		pd.hdr.grev1 = &grev1;
10638 		if (!pf_pull_hdr(pbuf, off, &grev1, sizeof(grev1), &action,
10639 		    &reason, AF_INET6)) {
10640 			log = (action != PF_PASS);
10641 			goto done;
10642 		}
10643 #if DUMMYNET
10644 		/* Traffic goes through dummynet first */
10645 		action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10646 		if (action == PF_DROP || pbuf == NULL) {
10647 			*pbufp = NULL;
10648 			return action;
10649 		}
10650 #endif /* DUMMYNET */
10651 		if ((ntohs(grev1.flags) & PF_GRE_FLAG_VERSION_MASK) == 1 &&
10652 		    ntohs(grev1.protocol_type) == PF_GRE_PPP_ETHERTYPE) {
10653 			if (ntohs(grev1.payload_length) >
10654 			    pbuf->pb_packet_len - off) {
10655 				action = PF_DROP;
10656 				REASON_SET(&reason, PFRES_SHORT);
10657 				goto done;
10658 			}
10659 			action = pf_test_state_grev1(&s, dir, kif, off, &pd);
10660 			if (pd.lmw < 0) {
10661 				goto done;
10662 			}
10663 			PF_APPLE_UPDATE_PDESC_IPv6();
10664 			if (action == PF_PASS) {
10665 #if NPFSYNC
10666 				pfsync_update_state(s);
10667 #endif /* NPFSYNC */
10668 				r = s->rule.ptr;
10669 				a = s->anchor.ptr;
10670 				log = s->log;
10671 				break;
10672 			} else if (s == NULL) {
10673 				action = pf_test_rule(&r, &s, dir, kif, pbuf,
10674 				    off, h, &pd, &a, &ruleset, NULL);
10675 				if (action == PF_PASS) {
10676 					break;
10677 				}
10678 			}
10679 		}
10680 
10681 		/* not GREv1/PPTP, so treat as ordinary GRE... */
10682 		OS_FALLTHROUGH; /* XXX is this correct? */
10683 	}
10684 
10685 	default:
10686 #if DUMMYNET
10687 		/* Traffic goes through dummynet first */
10688 		action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10689 		if (action == PF_DROP || pbuf == NULL) {
10690 			*pbufp = NULL;
10691 			return action;
10692 		}
10693 #endif /* DUMMYNET */
10694 		action = pf_test_state_other(&s, dir, kif, &pd);
10695 		if (pd.lmw < 0) {
10696 			goto done;
10697 		}
10698 		PF_APPLE_UPDATE_PDESC_IPv6();
10699 		if (action == PF_PASS) {
10700 #if NPFSYNC
10701 			pfsync_update_state(s);
10702 #endif /* NPFSYNC */
10703 			r = s->rule.ptr;
10704 			a = s->anchor.ptr;
10705 			log = s->log;
10706 		} else if (s == NULL) {
10707 			action = pf_test_rule(&r, &s, dir, kif, pbuf, off, h,
10708 			    &pd, &a, &ruleset, NULL);
10709 		}
10710 		break;
10711 	}
10712 
10713 done:
10714 	if (action == PF_NAT64) {
10715 		*pbufp = NULL;
10716 		return action;
10717 	}
10718 
10719 	*pbufp = pd.mp;
10720 	PF_APPLE_UPDATE_PDESC_IPv6();
10721 
10722 	/* handle dangerous IPv6 extension headers. */
10723 	if (action != PF_DROP) {
10724 		if (action == PF_PASS && rh_cnt &&
10725 		    !((s && s->allow_opts) || r->allow_opts)) {
10726 			action = PF_DROP;
10727 			REASON_SET(&reason, PFRES_IPOPTIONS);
10728 			log = 1;
10729 			DPFPRINTF(PF_DEBUG_MISC,
10730 			    ("pf: dropping packet with dangerous v6addr headers\n"));
10731 		}
10732 
10733 		if ((s && s->tag) || PF_RTABLEID_IS_VALID(r->rtableid) ||
10734 		    (pd.pktflags & PKTF_FLOW_ID)) {
10735 			(void) pf_tag_packet(pbuf, pd.pf_mtag, s ? s->tag : 0,
10736 			    r->rtableid, &pd);
10737 		}
10738 
10739 		if (action == PF_PASS) {
10740 #if PF_ECN
10741 			/* add hints for ecn */
10742 			pd.pf_mtag->pftag_hdr = h;
10743 			/* record address family */
10744 			pd.pf_mtag->pftag_flags &= ~PF_TAG_HDR_INET;
10745 			pd.pf_mtag->pftag_flags |= PF_TAG_HDR_INET6;
10746 #endif /* PF_ECN */
10747 			/* record protocol */
10748 			*pbuf->pb_proto = pd.proto;
10749 			if (dir == PF_IN && (pd.proto == IPPROTO_TCP ||
10750 			    pd.proto == IPPROTO_UDP) && s != NULL &&
10751 			    s->nat_rule.ptr != NULL &&
10752 			    (s->nat_rule.ptr->action == PF_RDR ||
10753 			    s->nat_rule.ptr->action == PF_BINAT) &&
10754 			    IN6_IS_ADDR_LOOPBACK(&pd.dst->v6addr)) {
10755 				pd.pf_mtag->pftag_flags |= PF_TAG_TRANSLATE_LOCALHOST;
10756 			}
10757 		}
10758 	}
10759 
10760 
10761 	if (log) {
10762 		struct pf_rule *lr;
10763 
10764 		if (s != NULL && s->nat_rule.ptr != NULL &&
10765 		    s->nat_rule.ptr->log & PF_LOG_ALL) {
10766 			lr = s->nat_rule.ptr;
10767 		} else {
10768 			lr = r;
10769 		}
10770 		PFLOG_PACKET(kif, h, pbuf, AF_INET6, dir, reason, lr, a, ruleset,
10771 		    &pd);
10772 	}
10773 
10774 	kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
10775 	kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS]++;
10776 
10777 	if (action == PF_PASS || r->action == PF_DROP) {
10778 		dirndx = (dir == PF_OUT);
10779 		r->packets[dirndx]++;
10780 		r->bytes[dirndx] += pd.tot_len;
10781 		if (a != NULL) {
10782 			a->packets[dirndx]++;
10783 			a->bytes[dirndx] += pd.tot_len;
10784 		}
10785 		if (s != NULL) {
10786 			sk = s->state_key;
10787 			if (s->nat_rule.ptr != NULL) {
10788 				s->nat_rule.ptr->packets[dirndx]++;
10789 				s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
10790 			}
10791 			if (s->src_node != NULL) {
10792 				s->src_node->packets[dirndx]++;
10793 				s->src_node->bytes[dirndx] += pd.tot_len;
10794 			}
10795 			if (s->nat_src_node != NULL) {
10796 				s->nat_src_node->packets[dirndx]++;
10797 				s->nat_src_node->bytes[dirndx] += pd.tot_len;
10798 			}
10799 			dirndx = (dir == sk->direction) ? 0 : 1;
10800 			s->packets[dirndx]++;
10801 			s->bytes[dirndx] += pd.tot_len;
10802 		}
10803 		tr = r;
10804 		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
10805 		if (nr != NULL) {
10806 			struct pf_addr *x;
10807 			/*
10808 			 * XXX: we need to make sure that the addresses
10809 			 * passed to pfr_update_stats() are the same than
10810 			 * the addresses used during matching (pfr_match)
10811 			 */
10812 			if (r == &pf_default_rule) {
10813 				tr = nr;
10814 				x = (s == NULL || sk->direction == dir) ?
10815 				    &pd.baddr : &pd.naddr;
10816 			} else {
10817 				x = (s == NULL || sk->direction == dir) ?
10818 				    &pd.naddr : &pd.baddr;
10819 			}
10820 			if (x == &pd.baddr || s == NULL) {
10821 				if (dir == PF_OUT) {
10822 					pd.src = x;
10823 				} else {
10824 					pd.dst = x;
10825 				}
10826 			}
10827 		}
10828 		if (tr->src.addr.type == PF_ADDR_TABLE) {
10829 			pfr_update_stats(tr->src.addr.p.tbl, (sk == NULL ||
10830 			    sk->direction == dir) ? pd.src : pd.dst, pd.af,
10831 			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
10832 			    tr->src.neg);
10833 		}
10834 		if (tr->dst.addr.type == PF_ADDR_TABLE) {
10835 			pfr_update_stats(tr->dst.addr.p.tbl, (sk == NULL ||
10836 			    sk->direction == dir) ? pd.dst : pd.src, pd.af,
10837 			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
10838 			    tr->dst.neg);
10839 		}
10840 	}
10841 
10842 	VERIFY(pbuf == NULL || pd.mp == NULL || pd.mp == pbuf);
10843 
10844 	if (*pbufp) {
10845 		if (pd.lmw < 0) {
10846 			REASON_SET(&reason, PFRES_MEMORY);
10847 			action = PF_DROP;
10848 		}
10849 
10850 		if (action == PF_DROP) {
10851 			pbuf_destroy(*pbufp);
10852 			*pbufp = NULL;
10853 			return PF_DROP;
10854 		}
10855 
10856 		*pbufp = pbuf;
10857 	}
10858 
10859 	if (action == PF_SYNPROXY_DROP) {
10860 		pbuf_destroy(*pbufp);
10861 		*pbufp = NULL;
10862 		action = PF_PASS;
10863 	} else if (r->rt) {
10864 		/* pf_route6 can free the mbuf causing *pbufp to become NULL */
10865 		pf_route6(pbufp, r, dir, kif->pfik_ifp, s, &pd);
10866 	}
10867 
10868 	/* if reassembled packet passed, create new fragments */
10869 	struct pf_fragment_tag *ftag = NULL;
10870 	if ((action == PF_PASS) && (*pbufp != NULL) && (fwd) &&
10871 	    ((ftag = pf_find_fragment_tag_pbuf(*pbufp)) != NULL)) {
10872 		action = pf_refragment6(ifp, pbufp, ftag);
10873 	}
10874 	return action;
10875 }
10876 
10877 static int
pf_check_congestion(struct ifqueue * ifq)10878 pf_check_congestion(struct ifqueue *ifq)
10879 {
10880 #pragma unused(ifq)
10881 	return 0;
10882 }
10883 
10884 void
pool_init(struct pool * pp,size_t size,unsigned int align,unsigned int ioff,int flags,const char * wchan,void * palloc)10885 pool_init(struct pool *pp, size_t size, unsigned int align, unsigned int ioff,
10886     int flags, const char *wchan, void *palloc)
10887 {
10888 #pragma unused(align, ioff, flags, palloc)
10889 	bzero(pp, sizeof(*pp));
10890 	pp->pool_zone = zone_create(wchan, size,
10891 	    ZC_PGZ_USE_GUARDS | ZC_ZFREE_CLEARMEM);
10892 	pp->pool_hiwat = pp->pool_limit = (unsigned int)-1;
10893 	pp->pool_name = wchan;
10894 }
10895 
10896 /* Zones cannot be currently destroyed */
10897 void
pool_destroy(struct pool * pp)10898 pool_destroy(struct pool *pp)
10899 {
10900 #pragma unused(pp)
10901 }
10902 
10903 void
pool_sethiwat(struct pool * pp,int n)10904 pool_sethiwat(struct pool *pp, int n)
10905 {
10906 	pp->pool_hiwat = n;     /* Currently unused */
10907 }
10908 
10909 void
pool_sethardlimit(struct pool * pp,int n,const char * warnmess,int ratecap)10910 pool_sethardlimit(struct pool *pp, int n, const char *warnmess, int ratecap)
10911 {
10912 #pragma unused(warnmess, ratecap)
10913 	pp->pool_limit = n;
10914 }
10915 
10916 void *
pool_get(struct pool * pp,int flags)10917 pool_get(struct pool *pp, int flags)
10918 {
10919 	void *buf;
10920 
10921 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
10922 
10923 	if (pp->pool_count > pp->pool_limit) {
10924 		DPFPRINTF(PF_DEBUG_NOISY,
10925 		    ("pf: pool %s hard limit reached (%d)\n",
10926 		    pp->pool_name != NULL ? pp->pool_name : "unknown",
10927 		    pp->pool_limit));
10928 		pp->pool_fails++;
10929 		return NULL;
10930 	}
10931 
10932 	buf = zalloc_flags(pp->pool_zone,
10933 	    (flags & PR_WAITOK) ? Z_WAITOK : Z_NOWAIT);
10934 	if (buf != NULL) {
10935 		pp->pool_count++;
10936 		VERIFY(pp->pool_count != 0);
10937 	}
10938 	return buf;
10939 }
10940 
10941 void
pool_put(struct pool * pp,void * v)10942 pool_put(struct pool *pp, void *v)
10943 {
10944 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
10945 
10946 	zfree(pp->pool_zone, v);
10947 	VERIFY(pp->pool_count != 0);
10948 	pp->pool_count--;
10949 }
10950 
10951 struct pf_mtag *
pf_find_mtag_pbuf(pbuf_t * pbuf)10952 pf_find_mtag_pbuf(pbuf_t *pbuf)
10953 {
10954 	return pbuf->pb_pftag;
10955 }
10956 
10957 struct pf_mtag *
pf_find_mtag(struct mbuf * m)10958 pf_find_mtag(struct mbuf *m)
10959 {
10960 	return m_pftag(m);
10961 }
10962 
10963 struct pf_mtag *
pf_get_mtag(struct mbuf * m)10964 pf_get_mtag(struct mbuf *m)
10965 {
10966 	return pf_find_mtag(m);
10967 }
10968 
10969 struct pf_mtag *
pf_get_mtag_pbuf(pbuf_t * pbuf)10970 pf_get_mtag_pbuf(pbuf_t *pbuf)
10971 {
10972 	return pf_find_mtag_pbuf(pbuf);
10973 }
10974 
10975 struct pf_fragment_tag *
pf_copy_fragment_tag(struct mbuf * m,struct pf_fragment_tag * ftag,int how)10976 pf_copy_fragment_tag(struct mbuf *m, struct pf_fragment_tag *ftag, int how)
10977 {
10978 	struct m_tag *tag;
10979 	struct pf_mtag *pftag = pf_find_mtag(m);
10980 
10981 	tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_PF_REASS,
10982 	    sizeof(*ftag), how, m);
10983 	if (tag == NULL) {
10984 		return NULL;
10985 	} else {
10986 		m_tag_prepend(m, tag);
10987 		tag = tag + 1;
10988 	}
10989 	bcopy(ftag, tag, sizeof(*ftag));
10990 	pftag->pftag_flags |= PF_TAG_REASSEMBLED;
10991 	return (struct pf_fragment_tag *)tag;
10992 }
10993 
10994 struct pf_fragment_tag *
pf_find_fragment_tag(struct mbuf * m)10995 pf_find_fragment_tag(struct mbuf *m)
10996 {
10997 	struct m_tag *tag;
10998 	struct pf_fragment_tag *ftag;
10999 	struct pf_mtag *pftag = pf_find_mtag(m);
11000 
11001 	tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_PF_REASS,
11002 	    NULL);
11003 	VERIFY((tag == NULL) || (pftag->pftag_flags & PF_TAG_REASSEMBLED));
11004 	if (tag != NULL) {
11005 		tag = tag + 1;
11006 	}
11007 	ftag = (struct pf_fragment_tag *)tag;
11008 	return ftag;
11009 }
11010 
11011 struct pf_fragment_tag *
pf_find_fragment_tag_pbuf(pbuf_t * pbuf)11012 pf_find_fragment_tag_pbuf(pbuf_t *pbuf)
11013 {
11014 	struct pf_mtag *mtag = pf_find_mtag_pbuf(pbuf);
11015 
11016 	return (mtag->pftag_flags & PF_TAG_REASSEMBLED) ?
11017 	       pbuf->pb_pf_fragtag : NULL;
11018 }
11019 
11020 uint64_t
pf_time_second(void)11021 pf_time_second(void)
11022 {
11023 	struct timeval t;
11024 
11025 	microuptime(&t);
11026 	return t.tv_sec;
11027 }
11028 
11029 uint64_t
pf_calendar_time_second(void)11030 pf_calendar_time_second(void)
11031 {
11032 	struct timeval t;
11033 
11034 	getmicrotime(&t);
11035 	return t.tv_sec;
11036 }
11037 
11038 static void *
hook_establish(struct hook_desc_head * head,int tail,hook_fn_t fn,void * arg)11039 hook_establish(struct hook_desc_head *head, int tail, hook_fn_t fn, void *arg)
11040 {
11041 	struct hook_desc *hd;
11042 
11043 	hd = kalloc_type(struct hook_desc, Z_WAITOK | Z_NOFAIL);
11044 
11045 	hd->hd_fn = fn;
11046 	hd->hd_arg = arg;
11047 	if (tail) {
11048 		TAILQ_INSERT_TAIL(head, hd, hd_list);
11049 	} else {
11050 		TAILQ_INSERT_HEAD(head, hd, hd_list);
11051 	}
11052 
11053 	return hd;
11054 }
11055 
11056 static void
hook_runloop(struct hook_desc_head * head,int flags)11057 hook_runloop(struct hook_desc_head *head, int flags)
11058 {
11059 	struct hook_desc *hd;
11060 
11061 	if (!(flags & HOOK_REMOVE)) {
11062 		if (!(flags & HOOK_ABORT)) {
11063 			TAILQ_FOREACH(hd, head, hd_list)
11064 			hd->hd_fn(hd->hd_arg);
11065 		}
11066 	} else {
11067 		while (!!(hd = TAILQ_FIRST(head))) {
11068 			TAILQ_REMOVE(head, hd, hd_list);
11069 			if (!(flags & HOOK_ABORT)) {
11070 				hd->hd_fn(hd->hd_arg);
11071 			}
11072 			if (flags & HOOK_FREE) {
11073 				kfree_type(struct hook_desc, hd);
11074 			}
11075 		}
11076 	}
11077 }
11078 
11079 #if SKYWALK && defined(XNU_TARGET_OS_OSX)
11080 static bool
pf_check_compatible_anchor(const char * anchor_path)11081 pf_check_compatible_anchor(const char *anchor_path)
11082 {
11083 	// Whitelist reserved anchor
11084 	if (strncmp(anchor_path, PF_RESERVED_ANCHOR, MAXPATHLEN) == 0) {
11085 		return true;
11086 	}
11087 
11088 	// Whitelist com.apple anchor
11089 	if (strncmp(anchor_path, "com.apple", MAXPATHLEN) == 0) {
11090 		return true;
11091 	}
11092 
11093 	for (int i = 0; i < sizeof(compatible_anchors) / sizeof(compatible_anchors[0]); i++) {
11094 		const char *ptr = strnstr(anchor_path, compatible_anchors[i], MAXPATHLEN);
11095 		if (ptr != NULL && ptr == anchor_path) {
11096 			return true;
11097 		}
11098 	}
11099 
11100 	return false;
11101 }
11102 
11103 bool
pf_check_compatible_rules(void)11104 pf_check_compatible_rules(void)
11105 {
11106 	struct pf_anchor *anchor = NULL;
11107 	struct pf_rule *rule = NULL;
11108 
11109 	// Check whitelisted anchors
11110 	RB_FOREACH(anchor, pf_anchor_global, &pf_anchors) {
11111 		if (!pf_check_compatible_anchor(anchor->path)) {
11112 			if (pf_status.debug >= PF_DEBUG_MISC) {
11113 				printf("pf anchor %s not compatible\n", anchor->path);
11114 			}
11115 			return false;
11116 		}
11117 	}
11118 
11119 	// Check rules in main ruleset
11120 	for (int i = PF_RULESET_SCRUB; i < PF_RULESET_MAX; i++) {
11121 		TAILQ_FOREACH(rule, pf_main_ruleset.rules[i].active.ptr, entries) {
11122 			if (rule->anchor == NULL) {
11123 				if (pf_status.debug >= PF_DEBUG_MISC) {
11124 					printf("main ruleset contains rules\n");
11125 				}
11126 				return false;
11127 			}
11128 		}
11129 	}
11130 
11131 	return true;
11132 }
11133 #endif // SKYWALK && defined(XNU_TARGET_OS_OSX)
11134