xref: /xnu-11215.1.10/bsd/net/pf.c (revision 8d741a5de7ff4191bf97d57b9f54c2f6d4a15585)
1 /*
2  * Copyright (c) 2007-2023 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*	$apfw: git commit 6602420f2f101b74305cd78f7cd9e0c8fdedae97 $ */
30 /*	$OpenBSD: pf.c,v 1.567 2008/02/20 23:40:13 henning Exp $ */
31 
32 /*
33  * Copyright (c) 2001 Daniel Hartmeier
34  * Copyright (c) 2002 - 2013 Henning Brauer
35  * NAT64 - Copyright (c) 2010 Viagenie Inc. (http://www.viagenie.ca)
36  * All rights reserved.
37  *
38  * Redistribution and use in source and binary forms, with or without
39  * modification, are permitted provided that the following conditions
40  * are met:
41  *
42  *    - Redistributions of source code must retain the above copyright
43  *      notice, this list of conditions and the following disclaimer.
44  *    - Redistributions in binary form must reproduce the above
45  *      copyright notice, this list of conditions and the following
46  *      disclaimer in the documentation and/or other materials provided
47  *      with the distribution.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
50  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
51  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
52  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
53  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
54  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
55  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
56  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
57  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
59  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
60  * POSSIBILITY OF SUCH DAMAGE.
61  *
62  * Effort sponsored in part by the Defense Advanced Research Projects
63  * Agency (DARPA) and Air Force Research Laboratory, Air Force
64  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
65  *
66  */
67 
68 #include <machine/endian.h>
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/filio.h>
72 #include <sys/socket.h>
73 #include <sys/socketvar.h>
74 #include <sys/kernel.h>
75 #include <sys/time.h>
76 #include <sys/proc.h>
77 #include <sys/random.h>
78 #include <sys/mcache.h>
79 #include <sys/protosw.h>
80 
81 #include <libkern/crypto/md5.h>
82 #include <libkern/libkern.h>
83 
84 #include <mach/thread_act.h>
85 
86 #include <net/if.h>
87 #include <net/if_types.h>
88 #include <net/bpf.h>
89 #include <net/route.h>
90 #include <net/dlil.h>
91 
92 #include <netinet/in.h>
93 #include <netinet/in_var.h>
94 #include <netinet/in_systm.h>
95 #include <netinet/ip.h>
96 #include <netinet/ip_var.h>
97 #include <netinet/tcp.h>
98 #include <netinet/tcp_seq.h>
99 #include <netinet/udp.h>
100 #include <netinet/ip_icmp.h>
101 #include <netinet/in_pcb.h>
102 #include <netinet/tcp_timer.h>
103 #include <netinet/tcp_var.h>
104 #include <netinet/tcp_fsm.h>
105 #include <netinet/udp_var.h>
106 #include <netinet/icmp_var.h>
107 #include <net/if_ether.h>
108 #include <net/ethernet.h>
109 #include <net/flowhash.h>
110 #include <net/nat464_utils.h>
111 #include <net/pfvar.h>
112 #include <net/if_pflog.h>
113 
114 #if NPFSYNC
115 #include <net/if_pfsync.h>
116 #endif /* NPFSYNC */
117 
118 #include <netinet/ip6.h>
119 #include <netinet6/in6_pcb.h>
120 #include <netinet6/ip6_var.h>
121 #include <netinet/icmp6.h>
122 #include <netinet6/nd6.h>
123 
124 #if DUMMYNET
125 #include <netinet/ip_dummynet.h>
126 #endif /* DUMMYNET */
127 
128 #if SKYWALK
129 #include <skywalk/namespace/flowidns.h>
130 #endif /* SKYWALK */
131 
132 /*
133  * For RandomULong(), to get a 32 bits random value
134  * Note that random() returns a 31 bits value, see rdar://11159750
135  */
136 #include <dev/random/randomdev.h>
137 
138 #define DPFPRINTF(n, x) (pf_status.debug >= (n) ? printf x : ((void)0))
139 
140 /*
141  * On Mac OS X, the rtableid value is treated as the interface scope
142  * value that is equivalent to the interface index used for scoped
143  * routing.  A valid scope value is anything but IFSCOPE_NONE (0),
144  * as per definition of ifindex which is a positive, non-zero number.
145  * The other BSDs treat a negative rtableid value as invalid, hence
146  * the test against INT_MAX to handle userland apps which initialize
147  * the field with a negative number.
148  */
149 #define PF_RTABLEID_IS_VALID(r) \
150 	((r) > IFSCOPE_NONE && (r) <= INT_MAX)
151 
152 /*
153  * Global variables
154  */
155 static LCK_GRP_DECLARE(pf_lock_grp, "pf");
156 LCK_MTX_DECLARE(pf_lock, &pf_lock_grp);
157 
158 static LCK_GRP_DECLARE(pf_perim_lock_grp, "pf_perim");
159 LCK_RW_DECLARE(pf_perim_lock, &pf_perim_lock_grp);
160 
161 /* state tables */
162 struct pf_state_tree_lan_ext     pf_statetbl_lan_ext;
163 struct pf_state_tree_ext_gwy     pf_statetbl_ext_gwy;
164 static uint32_t pf_state_tree_ext_gwy_nat64_cnt = 0;
165 
166 struct pf_palist         pf_pabuf;
167 struct pf_status         pf_status;
168 
169 u_int32_t                ticket_pabuf;
170 
171 static MD5_CTX           pf_tcp_secret_ctx;
172 static u_char            pf_tcp_secret[16];
173 static int               pf_tcp_secret_init;
174 static int               pf_tcp_iss_off;
175 
176 static struct pf_anchor_stackframe {
177 	struct pf_ruleset                       *rs;
178 	struct pf_rule                          *r;
179 	struct pf_anchor_node                   *parent;
180 	struct pf_anchor                        *child;
181 } pf_anchor_stack[64];
182 
183 struct pool              pf_src_tree_pl, pf_rule_pl, pf_pooladdr_pl;
184 struct pool              pf_state_pl, pf_state_key_pl;
185 
186 typedef void (*hook_fn_t)(void *);
187 
188 struct hook_desc {
189 	TAILQ_ENTRY(hook_desc) hd_list;
190 	hook_fn_t hd_fn;
191 	void *hd_arg;
192 };
193 
194 #define HOOK_REMOVE     0x01
195 #define HOOK_FREE       0x02
196 #define HOOK_ABORT      0x04
197 
198 static void             *hook_establish(struct hook_desc_head *, int,
199     hook_fn_t, void *);
200 static void             hook_runloop(struct hook_desc_head *, int flags);
201 
202 struct pool              pf_app_state_pl;
203 static void              pf_print_addr(struct pf_addr *addr, sa_family_t af);
204 static void              pf_print_sk_host(struct pf_state_host *, u_int8_t, int,
205     u_int8_t);
206 
207 static void              pf_print_host(struct pf_addr *, u_int16_t, u_int8_t);
208 
209 static void              pf_init_threshold(struct pf_threshold *, u_int32_t,
210     u_int32_t);
211 static void              pf_add_threshold(struct pf_threshold *);
212 static int               pf_check_threshold(struct pf_threshold *);
213 
214 static void              pf_change_ap(int, pbuf_t *, struct pf_addr *,
215     u_int16_t *, u_int16_t *, u_int16_t *,
216     struct pf_addr *, u_int16_t, u_int8_t, sa_family_t,
217     sa_family_t, int);
218 static int               pf_modulate_sack(pbuf_t *, int, struct pf_pdesc *,
219     struct tcphdr *, struct pf_state_peer *);
220 static void              pf_change_a6(struct pf_addr *, u_int16_t *,
221     struct pf_addr *, u_int8_t);
222 static void pf_change_addr(struct pf_addr *a, u_int16_t *c, struct pf_addr *an,
223     u_int8_t u, sa_family_t af, sa_family_t afn);
224 static void              pf_change_icmp(struct pf_addr *, u_int16_t *,
225     struct pf_addr *, struct pf_addr *, u_int16_t,
226     u_int16_t *, u_int16_t *, u_int16_t *,
227     u_int16_t *, u_int8_t, sa_family_t);
228 static void              pf_send_tcp(const struct pf_rule *, sa_family_t,
229     const struct pf_addr *, const struct pf_addr *,
230     u_int16_t, u_int16_t, u_int32_t, u_int32_t,
231     u_int8_t, u_int16_t, u_int16_t, u_int8_t, int,
232     u_int16_t, struct ether_header *, struct ifnet *);
233 static void              pf_send_icmp(pbuf_t *, u_int8_t, u_int8_t,
234     sa_family_t, struct pf_rule *);
235 static struct pf_rule   *pf_match_translation(struct pf_pdesc *, pbuf_t *,
236     int, int, struct pfi_kif *, struct pf_addr *,
237     union pf_state_xport *, struct pf_addr *,
238     union pf_state_xport *, int);
239 static struct pf_rule   *pf_get_translation_aux(struct pf_pdesc *,
240     pbuf_t *, int, int, struct pfi_kif *,
241     struct pf_src_node **, struct pf_addr *,
242     union pf_state_xport *, struct pf_addr *,
243     union pf_state_xport *, union pf_state_xport *
244 #if SKYWALK
245     , netns_token *
246 #endif
247     );
248 static void              pf_attach_state(struct pf_state_key *,
249     struct pf_state *, int);
250 static u_int32_t         pf_tcp_iss(struct pf_pdesc *);
251 static int               pf_test_rule(struct pf_rule **, struct pf_state **,
252     int, struct pfi_kif *, pbuf_t *, int,
253     void *, struct pf_pdesc *, struct pf_rule **,
254     struct pf_ruleset **, struct ifqueue *);
255 #if DUMMYNET
256 static int               pf_test_dummynet(struct pf_rule **, int,
257     struct pfi_kif *, pbuf_t **,
258     struct pf_pdesc *, struct ip_fw_args *);
259 #endif /* DUMMYNET */
260 static int               pf_test_fragment(struct pf_rule **, int,
261     struct pfi_kif *, pbuf_t *, void *,
262     struct pf_pdesc *, struct pf_rule **,
263     struct pf_ruleset **);
264 static int               pf_test_state_tcp(struct pf_state **, int,
265     struct pfi_kif *, pbuf_t *, int,
266     void *, struct pf_pdesc *, u_short *);
267 static int               pf_test_state_udp(struct pf_state **, int,
268     struct pfi_kif *, pbuf_t *, int,
269     void *, struct pf_pdesc *, u_short *);
270 static int               pf_test_state_icmp(struct pf_state **, int,
271     struct pfi_kif *, pbuf_t *, int,
272     void *, struct pf_pdesc *, u_short *);
273 static int               pf_test_state_other(struct pf_state **, int,
274     struct pfi_kif *, struct pf_pdesc *);
275 static int               pf_match_tag(struct pf_rule *,
276     struct pf_mtag *, int *);
277 static void              pf_hash(struct pf_addr *, struct pf_addr *,
278     struct pf_poolhashkey *, sa_family_t);
279 static int               pf_map_addr(u_int8_t, struct pf_rule *,
280     struct pf_addr *, struct pf_addr *,
281     struct pf_addr *, struct pf_src_node **);
282 static int               pf_get_sport(struct pf_pdesc *, struct pfi_kif *,
283     struct pf_rule *, struct pf_addr *,
284     union pf_state_xport *, struct pf_addr *,
285     union pf_state_xport *, struct pf_addr *,
286     union pf_state_xport *, struct pf_src_node **
287 #if SKYWALK
288     , netns_token *
289 #endif
290     );
291 static void              pf_route(pbuf_t **, struct pf_rule *, int,
292     struct ifnet *, struct pf_state *,
293     struct pf_pdesc *);
294 static void              pf_route6(pbuf_t **, struct pf_rule *, int,
295     struct ifnet *, struct pf_state *,
296     struct pf_pdesc *);
297 static u_int8_t          pf_get_wscale(pbuf_t *, int, u_int16_t,
298     sa_family_t);
299 static u_int16_t         pf_get_mss(pbuf_t *, int, u_int16_t,
300     sa_family_t);
301 static u_int16_t         pf_calc_mss(struct pf_addr *, sa_family_t,
302     u_int16_t);
303 static void              pf_set_rt_ifp(struct pf_state *,
304     struct pf_addr *, sa_family_t af);
305 static int               pf_check_proto_cksum(pbuf_t *, int, int,
306     u_int8_t, sa_family_t);
307 static int               pf_addr_wrap_neq(struct pf_addr_wrap *,
308     struct pf_addr_wrap *);
309 static struct pf_state  *pf_find_state(struct pfi_kif *,
310     struct pf_state_key_cmp *, u_int);
311 static int               pf_src_connlimit(struct pf_state **);
312 static void              pf_stateins_err(const char *, struct pf_state *,
313     struct pfi_kif *);
314 static int               pf_check_congestion(struct ifqueue *);
315 
316 #if 0
317 static const char *pf_pptp_ctrl_type_name(u_int16_t code);
318 #endif
319 static void             pf_pptp_handler(struct pf_state *, int, int,
320     struct pf_pdesc *, struct pfi_kif *);
321 static void             pf_pptp_unlink(struct pf_state *);
322 static void             pf_grev1_unlink(struct pf_state *);
323 static int              pf_test_state_grev1(struct pf_state **, int,
324     struct pfi_kif *, int, struct pf_pdesc *);
325 static int              pf_ike_compare(struct pf_app_state *,
326     struct pf_app_state *);
327 static int              pf_test_state_esp(struct pf_state **, int,
328     struct pfi_kif *, int, struct pf_pdesc *);
329 static int pf_test6(int, struct ifnet *, pbuf_t **, struct ether_header *,
330     struct ip_fw_args *);
331 #if INET
332 static int pf_test(int, struct ifnet *, pbuf_t **,
333     struct ether_header *, struct ip_fw_args *);
334 #endif /* INET */
335 
336 
337 extern struct pool pfr_ktable_pl;
338 extern struct pool pfr_kentry_pl;
339 extern int path_mtu_discovery;
340 
341 struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = {
342 	{ .pp = &pf_state_pl, .limit = PFSTATE_HIWAT },
343 	{ .pp = &pf_app_state_pl, .limit = PFAPPSTATE_HIWAT },
344 	{ .pp = &pf_src_tree_pl, .limit = PFSNODE_HIWAT },
345 	{ .pp = &pf_frent_pl, .limit = PFFRAG_FRENT_HIWAT },
346 	{ .pp = &pfr_ktable_pl, .limit = PFR_KTABLE_HIWAT },
347 	{ .pp = &pfr_kentry_pl, .limit = PFR_KENTRY_HIWAT },
348 };
349 
350 #if SKYWALK
351 const char *compatible_anchors[] = {
352 	"com.apple.internet-sharing",
353 	"com.apple/250.ApplicationFirewall",
354 	"com.apple/200.AirDrop"
355 };
356 #endif // SKYWALK
357 
358 void *
pf_lazy_makewritable(struct pf_pdesc * pd,pbuf_t * pbuf,int len)359 pf_lazy_makewritable(struct pf_pdesc *pd, pbuf_t *pbuf, int len)
360 {
361 	void *__single p;
362 
363 	if (pd->lmw < 0) {
364 		return NULL;
365 	}
366 
367 	VERIFY(pbuf == pd->mp);
368 
369 	p = pbuf->pb_data;
370 	if (len > pd->lmw) {
371 		if ((p = pbuf_ensure_writable(pbuf, len)) == NULL) {
372 			len = -1;
373 		}
374 		pd->lmw = len;
375 		if (len >= 0) {
376 			pd->pf_mtag = pf_find_mtag_pbuf(pbuf);
377 
378 			switch (pd->af) {
379 			case AF_INET: {
380 				struct ip *__single h = p;
381 				pd->src = (struct pf_addr *)(void *)&h->ip_src;
382 				pd->dst = (struct pf_addr *)(void *)&h->ip_dst;
383 				pd->ip_sum = &h->ip_sum;
384 				break;
385 			}
386 			case AF_INET6: {
387 				struct ip6_hdr *__single h = p;
388 				pd->src = (struct pf_addr *)(void *)&h->ip6_src;
389 				pd->dst = (struct pf_addr *)(void *)&h->ip6_dst;
390 				break;
391 			}
392 			}
393 		}
394 	}
395 
396 	return len < 0 ? NULL : p;
397 }
398 
399 static const int *
pf_state_lookup_aux(struct pf_state ** state,struct pfi_kif * kif,int direction,int * action)400 pf_state_lookup_aux(struct pf_state **state, struct pfi_kif *kif,
401     int direction, int *action)
402 {
403 	if (*state == NULL || (*state)->timeout == PFTM_PURGE) {
404 		*action = PF_DROP;
405 		return action;
406 	}
407 
408 	if (direction == PF_OUT &&
409 	    (((*state)->rule.ptr->rt == PF_ROUTETO &&
410 	    (*state)->rule.ptr->direction == PF_OUT) ||
411 	    ((*state)->rule.ptr->rt == PF_REPLYTO &&
412 	    (*state)->rule.ptr->direction == PF_IN)) &&
413 	    (*state)->rt_kif != NULL && (*state)->rt_kif != kif) {
414 		*action = PF_PASS;
415 		return action;
416 	}
417 
418 	return 0;
419 }
420 
421 #define STATE_LOOKUP()                                                   \
422 	do {                                                             \
423 	        int action;                                              \
424 	        *state = pf_find_state(kif, &key, direction);            \
425 	        if (*state != NULL && pd != NULL &&                      \
426 	            !(pd->pktflags & PKTF_FLOW_ID)) {                    \
427 	                pd->flowsrc = (*state)->state_key->flowsrc;      \
428 	                pd->flowhash = (*state)->state_key->flowhash;    \
429 	                if (pd->flowhash != 0) {                         \
430 	                        pd->pktflags |= PKTF_FLOW_ID;            \
431 	                        pd->pktflags &= ~PKTF_FLOW_ADV;          \
432 	                }                                                \
433 	        }                                                        \
434 	        if (pf_state_lookup_aux(state, kif, direction, &action)) \
435 	                return (action);                                 \
436 	} while (0)
437 
438 /*
439  * This macro resets the flowID information in a packet descriptor which was
440  * copied in from a PF state. This should be used after a protocol state lookup
441  * finds a matching PF state, but then decides to not use it for various
442  * reasons.
443  */
444 #define PD_CLEAR_STATE_FLOWID(_pd)                                       \
445 	do {                                                             \
446 	        if (__improbable(((_pd)->pktflags & PKTF_FLOW_ID) &&     \
447 	            ((_pd)->flowsrc == FLOWSRC_PF))) {                   \
448 	                (_pd)->flowhash = 0;                             \
449 	                (_pd)->flowsrc = 0;                              \
450 	                (_pd)->pktflags &= ~PKTF_FLOW_ID;                \
451 	        }                                                        \
452                                                                          \
453 	} while (0)
454 
455 #define STATE_ADDR_TRANSLATE(sk)                                        \
456 	(sk)->lan.addr.addr32[0] != (sk)->gwy.addr.addr32[0] ||         \
457 	((sk)->af_lan == AF_INET6 &&                                    \
458 	((sk)->lan.addr.addr32[1] != (sk)->gwy.addr.addr32[1] ||        \
459 	(sk)->lan.addr.addr32[2] != (sk)->gwy.addr.addr32[2] ||         \
460 	(sk)->lan.addr.addr32[3] != (sk)->gwy.addr.addr32[3]))
461 
462 #define STATE_TRANSLATE(sk)                                             \
463 	((sk)->af_lan != (sk)->af_gwy ||                                \
464 	STATE_ADDR_TRANSLATE(sk) ||                                     \
465 	(sk)->lan.xport.port != (sk)->gwy.xport.port)
466 
467 #define STATE_GRE_TRANSLATE(sk)                                         \
468 	(STATE_ADDR_TRANSLATE(sk) ||                                    \
469 	(sk)->lan.xport.call_id != (sk)->gwy.xport.call_id)
470 
471 #define BOUND_IFACE(r, k) \
472 	((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all
473 
474 #define STATE_INC_COUNTERS(s)                                   \
475 	do {                                                    \
476 	        s->rule.ptr->states++;                          \
477 	        VERIFY(s->rule.ptr->states != 0);               \
478 	        if (s->anchor.ptr != NULL) {                    \
479 	                s->anchor.ptr->states++;                \
480 	                VERIFY(s->anchor.ptr->states != 0);     \
481 	        }                                               \
482 	        if (s->nat_rule.ptr != NULL) {                  \
483 	                s->nat_rule.ptr->states++;              \
484 	                VERIFY(s->nat_rule.ptr->states != 0);   \
485 	        }                                               \
486 	} while (0)
487 
488 #define STATE_DEC_COUNTERS(s)                                   \
489 	do {                                                    \
490 	        if (s->nat_rule.ptr != NULL) {                  \
491 	                VERIFY(s->nat_rule.ptr->states > 0);    \
492 	                s->nat_rule.ptr->states--;              \
493 	        }                                               \
494 	        if (s->anchor.ptr != NULL) {                    \
495 	                VERIFY(s->anchor.ptr->states > 0);      \
496 	                s->anchor.ptr->states--;                \
497 	        }                                               \
498 	        VERIFY(s->rule.ptr->states > 0);                \
499 	        s->rule.ptr->states--;                          \
500 	} while (0)
501 
502 static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *);
503 static __inline int pf_state_compare_lan_ext(struct pf_state_key *,
504     struct pf_state_key *);
505 static __inline int pf_state_compare_ext_gwy(struct pf_state_key *,
506     struct pf_state_key *);
507 static __inline int pf_state_compare_id(struct pf_state *,
508     struct pf_state *);
509 
510 struct pf_src_tree tree_src_tracking;
511 
512 struct pf_state_tree_id tree_id;
513 struct pf_state_queue state_list;
514 
515 RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare);
516 RB_GENERATE(pf_state_tree_lan_ext, pf_state_key,
517     entry_lan_ext, pf_state_compare_lan_ext);
518 RB_GENERATE(pf_state_tree_ext_gwy, pf_state_key,
519     entry_ext_gwy, pf_state_compare_ext_gwy);
520 RB_GENERATE(pf_state_tree_id, pf_state,
521     entry_id, pf_state_compare_id);
522 
523 #define PF_DT_SKIP_LANEXT       0x01
524 #define PF_DT_SKIP_EXTGWY       0x02
525 
526 static const u_int16_t PF_PPTP_PORT = 1723;
527 static const u_int32_t PF_PPTP_MAGIC_NUMBER = 0x1A2B3C4D;
528 
529 struct pf_pptp_hdr {
530 	u_int16_t       length;
531 	u_int16_t       type;
532 	u_int32_t       magic;
533 };
534 
535 struct pf_pptp_ctrl_hdr {
536 	u_int16_t       type;
537 	u_int16_t       reserved_0;
538 };
539 
540 struct pf_pptp_ctrl_generic {
541 	u_int16_t       data[0];
542 };
543 
544 #define PF_PPTP_CTRL_TYPE_START_REQ     1
545 struct pf_pptp_ctrl_start_req {
546 	u_int16_t       protocol_version;
547 	u_int16_t       reserved_1;
548 	u_int32_t       framing_capabilities;
549 	u_int32_t       bearer_capabilities;
550 	u_int16_t       maximum_channels;
551 	u_int16_t       firmware_revision;
552 	u_int8_t        host_name[64];
553 	u_int8_t        vendor_string[64];
554 };
555 
556 #define PF_PPTP_CTRL_TYPE_START_RPY     2
557 struct pf_pptp_ctrl_start_rpy {
558 	u_int16_t       protocol_version;
559 	u_int8_t        result_code;
560 	u_int8_t        error_code;
561 	u_int32_t       framing_capabilities;
562 	u_int32_t       bearer_capabilities;
563 	u_int16_t       maximum_channels;
564 	u_int16_t       firmware_revision;
565 	u_int8_t        host_name[64];
566 	u_int8_t        vendor_string[64];
567 };
568 
569 #define PF_PPTP_CTRL_TYPE_STOP_REQ      3
570 struct pf_pptp_ctrl_stop_req {
571 	u_int8_t        reason;
572 	u_int8_t        reserved_1;
573 	u_int16_t       reserved_2;
574 };
575 
576 #define PF_PPTP_CTRL_TYPE_STOP_RPY      4
577 struct pf_pptp_ctrl_stop_rpy {
578 	u_int8_t        reason;
579 	u_int8_t        error_code;
580 	u_int16_t       reserved_1;
581 };
582 
583 #define PF_PPTP_CTRL_TYPE_ECHO_REQ      5
584 struct pf_pptp_ctrl_echo_req {
585 	u_int32_t       identifier;
586 };
587 
588 #define PF_PPTP_CTRL_TYPE_ECHO_RPY      6
589 struct pf_pptp_ctrl_echo_rpy {
590 	u_int32_t       identifier;
591 	u_int8_t        result_code;
592 	u_int8_t        error_code;
593 	u_int16_t       reserved_1;
594 };
595 
596 #define PF_PPTP_CTRL_TYPE_CALL_OUT_REQ  7
597 struct pf_pptp_ctrl_call_out_req {
598 	u_int16_t       call_id;
599 	u_int16_t       call_sernum;
600 	u_int32_t       min_bps;
601 	u_int32_t       bearer_type;
602 	u_int32_t       framing_type;
603 	u_int16_t       rxwindow_size;
604 	u_int16_t       proc_delay;
605 	u_int8_t        phone_num[64];
606 	u_int8_t        sub_addr[64];
607 };
608 
609 #define PF_PPTP_CTRL_TYPE_CALL_OUT_RPY  8
610 struct pf_pptp_ctrl_call_out_rpy {
611 	u_int16_t       call_id;
612 	u_int16_t       peer_call_id;
613 	u_int8_t        result_code;
614 	u_int8_t        error_code;
615 	u_int16_t       cause_code;
616 	u_int32_t       connect_speed;
617 	u_int16_t       rxwindow_size;
618 	u_int16_t       proc_delay;
619 	u_int32_t       phy_channel_id;
620 };
621 
622 #define PF_PPTP_CTRL_TYPE_CALL_IN_1ST   9
623 struct pf_pptp_ctrl_call_in_1st {
624 	u_int16_t       call_id;
625 	u_int16_t       call_sernum;
626 	u_int32_t       bearer_type;
627 	u_int32_t       phy_channel_id;
628 	u_int16_t       dialed_number_len;
629 	u_int16_t       dialing_number_len;
630 	u_int8_t        dialed_num[64];
631 	u_int8_t        dialing_num[64];
632 	u_int8_t        sub_addr[64];
633 };
634 
635 #define PF_PPTP_CTRL_TYPE_CALL_IN_2ND   10
636 struct pf_pptp_ctrl_call_in_2nd {
637 	u_int16_t       call_id;
638 	u_int16_t       peer_call_id;
639 	u_int8_t        result_code;
640 	u_int8_t        error_code;
641 	u_int16_t       rxwindow_size;
642 	u_int16_t       txdelay;
643 	u_int16_t       reserved_1;
644 };
645 
646 #define PF_PPTP_CTRL_TYPE_CALL_IN_3RD   11
647 struct pf_pptp_ctrl_call_in_3rd {
648 	u_int16_t       call_id;
649 	u_int16_t       reserved_1;
650 	u_int32_t       connect_speed;
651 	u_int16_t       rxwindow_size;
652 	u_int16_t       txdelay;
653 	u_int32_t       framing_type;
654 };
655 
656 #define PF_PPTP_CTRL_TYPE_CALL_CLR      12
657 struct pf_pptp_ctrl_call_clr {
658 	u_int16_t       call_id;
659 	u_int16_t       reserved_1;
660 };
661 
662 #define PF_PPTP_CTRL_TYPE_CALL_DISC     13
663 struct pf_pptp_ctrl_call_disc {
664 	u_int16_t       call_id;
665 	u_int8_t        result_code;
666 	u_int8_t        error_code;
667 	u_int16_t       cause_code;
668 	u_int16_t       reserved_1;
669 	u_int8_t        statistics[128];
670 };
671 
672 #define PF_PPTP_CTRL_TYPE_ERROR 14
673 struct pf_pptp_ctrl_error {
674 	u_int16_t       peer_call_id;
675 	u_int16_t       reserved_1;
676 	u_int32_t       crc_errors;
677 	u_int32_t       fr_errors;
678 	u_int32_t       hw_errors;
679 	u_int32_t       buf_errors;
680 	u_int32_t       tim_errors;
681 	u_int32_t       align_errors;
682 };
683 
684 #define PF_PPTP_CTRL_TYPE_SET_LINKINFO  15
685 struct pf_pptp_ctrl_set_linkinfo {
686 	u_int16_t       peer_call_id;
687 	u_int16_t       reserved_1;
688 	u_int32_t       tx_accm;
689 	u_int32_t       rx_accm;
690 };
691 
692 static const size_t PF_PPTP_CTRL_MSG_MINSIZE =
693     sizeof(struct pf_pptp_hdr) + sizeof(struct pf_pptp_ctrl_hdr);
694 
695 union pf_pptp_ctrl_msg_union {
696 	struct pf_pptp_ctrl_start_req           start_req;
697 	struct pf_pptp_ctrl_start_rpy           start_rpy;
698 	struct pf_pptp_ctrl_stop_req            stop_req;
699 	struct pf_pptp_ctrl_stop_rpy            stop_rpy;
700 	struct pf_pptp_ctrl_echo_req            echo_req;
701 	struct pf_pptp_ctrl_echo_rpy            echo_rpy;
702 	struct pf_pptp_ctrl_call_out_req        call_out_req;
703 	struct pf_pptp_ctrl_call_out_rpy        call_out_rpy;
704 	struct pf_pptp_ctrl_call_in_1st         call_in_1st;
705 	struct pf_pptp_ctrl_call_in_2nd         call_in_2nd;
706 	struct pf_pptp_ctrl_call_in_3rd         call_in_3rd;
707 	struct pf_pptp_ctrl_call_clr            call_clr;
708 	struct pf_pptp_ctrl_call_disc           call_disc;
709 	struct pf_pptp_ctrl_error                       error;
710 	struct pf_pptp_ctrl_set_linkinfo        set_linkinfo;
711 	u_int8_t                                                        data[0];
712 };
713 
714 struct pf_pptp_ctrl_msg {
715 	struct pf_pptp_hdr                              hdr;
716 	struct pf_pptp_ctrl_hdr                 ctrl;
717 	union pf_pptp_ctrl_msg_union    msg;
718 };
719 
720 #define PF_GRE_FLAG_CHECKSUM_PRESENT    0x8000
721 #define PF_GRE_FLAG_VERSION_MASK                0x0007
722 #define PF_GRE_PPP_ETHERTYPE                    0x880B
723 
724 static const u_int16_t PF_IKE_PORT = 500;
725 
726 struct pf_ike_hdr {
727 	u_int64_t initiator_cookie, responder_cookie;
728 	u_int8_t next_payload, version, exchange_type, flags;
729 	u_int32_t message_id, length;
730 };
731 
732 #define PF_IKE_PACKET_MINSIZE   (sizeof (struct pf_ike_hdr))
733 
734 #define PF_IKEv1_EXCHTYPE_BASE                           1
735 #define PF_IKEv1_EXCHTYPE_ID_PROTECT             2
736 #define PF_IKEv1_EXCHTYPE_AUTH_ONLY                      3
737 #define PF_IKEv1_EXCHTYPE_AGGRESSIVE             4
738 #define PF_IKEv1_EXCHTYPE_INFORMATIONAL          5
739 #define PF_IKEv2_EXCHTYPE_SA_INIT                       34
740 #define PF_IKEv2_EXCHTYPE_AUTH                          35
741 #define PF_IKEv2_EXCHTYPE_CREATE_CHILD_SA       36
742 #define PF_IKEv2_EXCHTYPE_INFORMATIONAL         37
743 
744 #define PF_IKEv1_FLAG_E         0x01
745 #define PF_IKEv1_FLAG_C         0x02
746 #define PF_IKEv1_FLAG_A         0x04
747 #define PF_IKEv2_FLAG_I         0x08
748 #define PF_IKEv2_FLAG_V         0x10
749 #define PF_IKEv2_FLAG_R         0x20
750 
751 
752 static __inline int
pf_addr_compare(struct pf_addr * a,struct pf_addr * b,sa_family_t af)753 pf_addr_compare(struct pf_addr *a, struct pf_addr *b, sa_family_t af)
754 {
755 	switch (af) {
756 #ifdef INET
757 	case AF_INET:
758 		if (a->addr32[0] > b->addr32[0]) {
759 			return 1;
760 		}
761 		if (a->addr32[0] < b->addr32[0]) {
762 			return -1;
763 		}
764 		break;
765 #endif /* INET */
766 	case AF_INET6:
767 		if (a->addr32[3] > b->addr32[3]) {
768 			return 1;
769 		}
770 		if (a->addr32[3] < b->addr32[3]) {
771 			return -1;
772 		}
773 		if (a->addr32[2] > b->addr32[2]) {
774 			return 1;
775 		}
776 		if (a->addr32[2] < b->addr32[2]) {
777 			return -1;
778 		}
779 		if (a->addr32[1] > b->addr32[1]) {
780 			return 1;
781 		}
782 		if (a->addr32[1] < b->addr32[1]) {
783 			return -1;
784 		}
785 		if (a->addr32[0] > b->addr32[0]) {
786 			return 1;
787 		}
788 		if (a->addr32[0] < b->addr32[0]) {
789 			return -1;
790 		}
791 		break;
792 	}
793 	return 0;
794 }
795 
796 static __inline int
pf_src_compare(struct pf_src_node * a,struct pf_src_node * b)797 pf_src_compare(struct pf_src_node *a, struct pf_src_node *b)
798 {
799 	int     diff;
800 
801 	if (a->rule.ptr > b->rule.ptr) {
802 		return 1;
803 	}
804 	if (a->rule.ptr < b->rule.ptr) {
805 		return -1;
806 	}
807 	if ((diff = a->af - b->af) != 0) {
808 		return diff;
809 	}
810 	if ((diff = pf_addr_compare(&a->addr, &b->addr, a->af)) != 0) {
811 		return diff;
812 	}
813 	return 0;
814 }
815 
816 static __inline int
pf_state_compare_lan_ext(struct pf_state_key * a,struct pf_state_key * b)817 pf_state_compare_lan_ext(struct pf_state_key *a, struct pf_state_key *b)
818 {
819 	int     diff;
820 	int     extfilter;
821 
822 	if ((diff = a->proto - b->proto) != 0) {
823 		return diff;
824 	}
825 	if ((diff = a->af_lan - b->af_lan) != 0) {
826 		return diff;
827 	}
828 
829 	extfilter = PF_EXTFILTER_APD;
830 
831 	switch (a->proto) {
832 	case IPPROTO_ICMP:
833 	case IPPROTO_ICMPV6:
834 		if ((diff = a->lan.xport.port - b->lan.xport.port) != 0) {
835 			return diff;
836 		}
837 		break;
838 
839 	case IPPROTO_TCP:
840 		if ((diff = a->lan.xport.port - b->lan.xport.port) != 0) {
841 			return diff;
842 		}
843 		if ((diff = a->ext_lan.xport.port - b->ext_lan.xport.port) != 0) {
844 			return diff;
845 		}
846 		break;
847 
848 	case IPPROTO_UDP:
849 		if ((diff = a->proto_variant - b->proto_variant)) {
850 			return diff;
851 		}
852 		extfilter = a->proto_variant;
853 		if ((diff = a->lan.xport.port - b->lan.xport.port) != 0) {
854 			return diff;
855 		}
856 		if ((extfilter < PF_EXTFILTER_AD) &&
857 		    (diff = a->ext_lan.xport.port - b->ext_lan.xport.port) != 0) {
858 			return diff;
859 		}
860 		break;
861 
862 	case IPPROTO_GRE:
863 		if (a->proto_variant == PF_GRE_PPTP_VARIANT &&
864 		    a->proto_variant == b->proto_variant) {
865 			if (!!(diff = a->ext_lan.xport.call_id -
866 			    b->ext_lan.xport.call_id)) {
867 				return diff;
868 			}
869 		}
870 		break;
871 
872 	case IPPROTO_ESP:
873 		if (!!(diff = a->ext_lan.xport.spi - b->ext_lan.xport.spi)) {
874 			return diff;
875 		}
876 		break;
877 
878 	default:
879 		break;
880 	}
881 
882 	switch (a->af_lan) {
883 #if INET
884 	case AF_INET:
885 		if ((diff = pf_addr_compare(&a->lan.addr, &b->lan.addr,
886 		    a->af_lan)) != 0) {
887 			return diff;
888 		}
889 
890 		if (extfilter < PF_EXTFILTER_EI) {
891 			if ((diff = pf_addr_compare(&a->ext_lan.addr,
892 			    &b->ext_lan.addr,
893 			    a->af_lan)) != 0) {
894 				return diff;
895 			}
896 		}
897 		break;
898 #endif /* INET */
899 	case AF_INET6:
900 		if ((diff = pf_addr_compare(&a->lan.addr, &b->lan.addr,
901 		    a->af_lan)) != 0) {
902 			return diff;
903 		}
904 
905 		if (extfilter < PF_EXTFILTER_EI ||
906 		    !PF_AZERO(&b->ext_lan.addr, AF_INET6)) {
907 			if ((diff = pf_addr_compare(&a->ext_lan.addr,
908 			    &b->ext_lan.addr,
909 			    a->af_lan)) != 0) {
910 				return diff;
911 			}
912 		}
913 		break;
914 	}
915 
916 	if (a->app_state && b->app_state) {
917 		if (a->app_state->compare_lan_ext &&
918 		    b->app_state->compare_lan_ext) {
919 			diff = (const char *)b->app_state->compare_lan_ext -
920 			    (const char *)a->app_state->compare_lan_ext;
921 			if (diff != 0) {
922 				return diff;
923 			}
924 			diff = a->app_state->compare_lan_ext(a->app_state,
925 			    b->app_state);
926 			if (diff != 0) {
927 				return diff;
928 			}
929 		}
930 	}
931 
932 	return 0;
933 }
934 
935 static __inline int
pf_state_compare_ext_gwy(struct pf_state_key * a,struct pf_state_key * b)936 pf_state_compare_ext_gwy(struct pf_state_key *a, struct pf_state_key *b)
937 {
938 	int     diff;
939 	int     extfilter;
940 	int     a_nat64, b_nat64;
941 
942 	if ((diff = a->proto - b->proto) != 0) {
943 		return diff;
944 	}
945 
946 	if ((diff = a->af_gwy - b->af_gwy) != 0) {
947 		return diff;
948 	}
949 
950 	a_nat64 = (a->af_lan == PF_INET6 && a->af_gwy == PF_INET) ? 1 : 0;
951 	b_nat64 = (b->af_lan == PF_INET6 && b->af_gwy == PF_INET) ? 1 : 0;
952 	if ((diff = a_nat64 - b_nat64) != 0) {
953 		return diff;
954 	}
955 
956 	extfilter = PF_EXTFILTER_APD;
957 
958 	switch (a->proto) {
959 	case IPPROTO_ICMP:
960 	case IPPROTO_ICMPV6:
961 		if ((diff = a->gwy.xport.port - b->gwy.xport.port) != 0) {
962 			return diff;
963 		}
964 		break;
965 
966 	case IPPROTO_TCP:
967 		if ((diff = a->ext_gwy.xport.port - b->ext_gwy.xport.port) != 0) {
968 			return diff;
969 		}
970 		if ((diff = a->gwy.xport.port - b->gwy.xport.port) != 0) {
971 			return diff;
972 		}
973 		break;
974 
975 	case IPPROTO_UDP:
976 		if ((diff = a->proto_variant - b->proto_variant)) {
977 			return diff;
978 		}
979 		extfilter = a->proto_variant;
980 		if ((diff = a->gwy.xport.port - b->gwy.xport.port) != 0) {
981 			return diff;
982 		}
983 		if ((extfilter < PF_EXTFILTER_AD) &&
984 		    (diff = a->ext_gwy.xport.port - b->ext_gwy.xport.port) != 0) {
985 			return diff;
986 		}
987 		break;
988 
989 	case IPPROTO_GRE:
990 		if (a->proto_variant == PF_GRE_PPTP_VARIANT &&
991 		    a->proto_variant == b->proto_variant) {
992 			if (!!(diff = a->gwy.xport.call_id -
993 			    b->gwy.xport.call_id)) {
994 				return diff;
995 			}
996 		}
997 		break;
998 
999 	case IPPROTO_ESP:
1000 		if (!!(diff = a->gwy.xport.spi - b->gwy.xport.spi)) {
1001 			return diff;
1002 		}
1003 		break;
1004 
1005 	default:
1006 		break;
1007 	}
1008 
1009 	switch (a->af_gwy) {
1010 #if INET
1011 	case AF_INET:
1012 		if ((diff = pf_addr_compare(&a->gwy.addr, &b->gwy.addr,
1013 		    a->af_gwy)) != 0) {
1014 			return diff;
1015 		}
1016 
1017 		if (extfilter < PF_EXTFILTER_EI) {
1018 			if ((diff = pf_addr_compare(&a->ext_gwy.addr, &b->ext_gwy.addr,
1019 			    a->af_gwy)) != 0) {
1020 				return diff;
1021 			}
1022 		}
1023 		break;
1024 #endif /* INET */
1025 	case AF_INET6:
1026 		if ((diff = pf_addr_compare(&a->gwy.addr, &b->gwy.addr,
1027 		    a->af_gwy)) != 0) {
1028 			return diff;
1029 		}
1030 
1031 		if (extfilter < PF_EXTFILTER_EI ||
1032 		    !PF_AZERO(&b->ext_gwy.addr, AF_INET6)) {
1033 			if ((diff = pf_addr_compare(&a->ext_gwy.addr, &b->ext_gwy.addr,
1034 			    a->af_gwy)) != 0) {
1035 				return diff;
1036 			}
1037 		}
1038 		break;
1039 	}
1040 
1041 	if (a->app_state && b->app_state) {
1042 		if (a->app_state->compare_ext_gwy &&
1043 		    b->app_state->compare_ext_gwy) {
1044 			diff = (const char *)b->app_state->compare_ext_gwy -
1045 			    (const char *)a->app_state->compare_ext_gwy;
1046 			if (diff != 0) {
1047 				return diff;
1048 			}
1049 			diff = a->app_state->compare_ext_gwy(a->app_state,
1050 			    b->app_state);
1051 			if (diff != 0) {
1052 				return diff;
1053 			}
1054 		}
1055 	}
1056 
1057 	return 0;
1058 }
1059 
1060 static __inline int
pf_state_compare_id(struct pf_state * a,struct pf_state * b)1061 pf_state_compare_id(struct pf_state *a, struct pf_state *b)
1062 {
1063 	if (a->id > b->id) {
1064 		return 1;
1065 	}
1066 	if (a->id < b->id) {
1067 		return -1;
1068 	}
1069 	if (a->creatorid > b->creatorid) {
1070 		return 1;
1071 	}
1072 	if (a->creatorid < b->creatorid) {
1073 		return -1;
1074 	}
1075 
1076 	return 0;
1077 }
1078 
1079 void
pf_addrcpy(struct pf_addr * dst,struct pf_addr * src,sa_family_t af)1080 pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af)
1081 {
1082 	switch (af) {
1083 #if INET
1084 	case AF_INET:
1085 		memcpy(&dst->v4addr, &src->v4addr, sizeof(src->v4addr));
1086 		break;
1087 #endif /* INET */
1088 	case AF_INET6:
1089 		memcpy(&dst->v6addr, &src->v6addr, sizeof(src->v6addr));
1090 		break;
1091 	}
1092 }
1093 
1094 struct pf_state *
pf_find_state_byid(struct pf_state_cmp * key)1095 pf_find_state_byid(struct pf_state_cmp *key)
1096 {
1097 	pf_status.fcounters[FCNT_STATE_SEARCH]++;
1098 
1099 	return RB_FIND(pf_state_tree_id, &tree_id,
1100 	           (struct pf_state *)(void *)key);
1101 }
1102 
1103 static struct pf_state *
pf_find_state(struct pfi_kif * kif,struct pf_state_key_cmp * key,u_int dir)1104 pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir)
1105 {
1106 	struct pf_state_key     *sk = NULL;
1107 	struct pf_state         *s;
1108 
1109 	pf_status.fcounters[FCNT_STATE_SEARCH]++;
1110 
1111 	switch (dir) {
1112 	case PF_OUT:
1113 		sk = RB_FIND(pf_state_tree_lan_ext, &pf_statetbl_lan_ext,
1114 		    (struct pf_state_key *)key);
1115 
1116 		break;
1117 	case PF_IN:
1118 
1119 		/*
1120 		 * Generally, a packet can match to
1121 		 * at most 1 state in the GWY table, with the sole exception
1122 		 * of NAT64, where a packet can match with at most 2 states
1123 		 * on the GWY table. This is because, unlike NAT44 or NAT66,
1124 		 * NAT64 forward translation is done on the input, not output.
1125 		 * This means a forwarded packet could cause PF to generate 2 states
1126 		 * on both input and output.
1127 		 *
1128 		 * NAT64 reverse translation is done on input. If a packet
1129 		 * matches NAT64 state on the GWY table, prioritize it
1130 		 * over any IPv4 state on the GWY table.
1131 		 */
1132 		if (pf_state_tree_ext_gwy_nat64_cnt > 0 &&
1133 		    key->af_lan == PF_INET && key->af_gwy == PF_INET) {
1134 			key->af_lan = PF_INET6;
1135 			sk = RB_FIND(pf_state_tree_ext_gwy, &pf_statetbl_ext_gwy,
1136 			    (struct pf_state_key *) key);
1137 			key->af_lan = PF_INET;
1138 		}
1139 
1140 		if (sk == NULL) {
1141 			sk = RB_FIND(pf_state_tree_ext_gwy, &pf_statetbl_ext_gwy,
1142 			    (struct pf_state_key *)key);
1143 		}
1144 		/*
1145 		 * NAT64 is done only on input, for packets coming in from
1146 		 * from the LAN side, need to lookup the lan_ext tree.
1147 		 */
1148 		if (sk == NULL) {
1149 			sk = RB_FIND(pf_state_tree_lan_ext,
1150 			    &pf_statetbl_lan_ext,
1151 			    (struct pf_state_key *)key);
1152 			if (sk && sk->af_lan == sk->af_gwy) {
1153 				sk = NULL;
1154 			}
1155 		}
1156 		break;
1157 	default:
1158 		panic("pf_find_state");
1159 	}
1160 
1161 	/* list is sorted, if-bound states before floating ones */
1162 	if (sk != NULL) {
1163 		TAILQ_FOREACH(s, &sk->states, next)
1164 		if (s->kif == pfi_all || s->kif == kif) {
1165 			return s;
1166 		}
1167 	}
1168 
1169 	return NULL;
1170 }
1171 
1172 struct pf_state *
pf_find_state_all(struct pf_state_key_cmp * key,u_int dir,int * more)1173 pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more)
1174 {
1175 	struct pf_state_key     *sk = NULL;
1176 	struct pf_state         *s, *ret = NULL;
1177 
1178 	pf_status.fcounters[FCNT_STATE_SEARCH]++;
1179 
1180 	switch (dir) {
1181 	case PF_OUT:
1182 		sk = RB_FIND(pf_state_tree_lan_ext,
1183 		    &pf_statetbl_lan_ext, (struct pf_state_key *)key);
1184 		break;
1185 	case PF_IN:
1186 		sk = RB_FIND(pf_state_tree_ext_gwy,
1187 		    &pf_statetbl_ext_gwy, (struct pf_state_key *)key);
1188 		/*
1189 		 * NAT64 is done only on input, for packets coming in from
1190 		 * from the LAN side, need to lookup the lan_ext tree.
1191 		 */
1192 		if ((sk == NULL) && pf_nat64_configured) {
1193 			sk = RB_FIND(pf_state_tree_lan_ext,
1194 			    &pf_statetbl_lan_ext,
1195 			    (struct pf_state_key *)key);
1196 			if (sk && sk->af_lan == sk->af_gwy) {
1197 				sk = NULL;
1198 			}
1199 		}
1200 		break;
1201 	default:
1202 		panic("pf_find_state_all");
1203 	}
1204 
1205 	if (sk != NULL) {
1206 		ret = TAILQ_FIRST(&sk->states);
1207 		if (more == NULL) {
1208 			return ret;
1209 		}
1210 
1211 		TAILQ_FOREACH(s, &sk->states, next)
1212 		(*more)++;
1213 	}
1214 
1215 	return ret;
1216 }
1217 
1218 static void
pf_init_threshold(struct pf_threshold * threshold,u_int32_t limit,u_int32_t seconds)1219 pf_init_threshold(struct pf_threshold *threshold,
1220     u_int32_t limit, u_int32_t seconds)
1221 {
1222 	threshold->limit = limit * PF_THRESHOLD_MULT;
1223 	threshold->seconds = seconds;
1224 	threshold->count = 0;
1225 	threshold->last = pf_time_second();
1226 }
1227 
1228 static void
pf_add_threshold(struct pf_threshold * threshold)1229 pf_add_threshold(struct pf_threshold *threshold)
1230 {
1231 	u_int32_t t = pf_time_second(), diff = t - threshold->last;
1232 
1233 	if (diff >= threshold->seconds) {
1234 		threshold->count = 0;
1235 	} else {
1236 		threshold->count -= threshold->count * diff /
1237 		    threshold->seconds;
1238 	}
1239 	threshold->count += PF_THRESHOLD_MULT;
1240 	threshold->last = t;
1241 }
1242 
1243 static int
pf_check_threshold(struct pf_threshold * threshold)1244 pf_check_threshold(struct pf_threshold *threshold)
1245 {
1246 	return threshold->count > threshold->limit;
1247 }
1248 
1249 static int
pf_src_connlimit(struct pf_state ** state)1250 pf_src_connlimit(struct pf_state **state)
1251 {
1252 	int bad = 0;
1253 	(*state)->src_node->conn++;
1254 	VERIFY((*state)->src_node->conn != 0);
1255 	(*state)->src.tcp_est = 1;
1256 	pf_add_threshold(&(*state)->src_node->conn_rate);
1257 
1258 	if ((*state)->rule.ptr->max_src_conn &&
1259 	    (*state)->rule.ptr->max_src_conn <
1260 	    (*state)->src_node->conn) {
1261 		pf_status.lcounters[LCNT_SRCCONN]++;
1262 		bad++;
1263 	}
1264 
1265 	if ((*state)->rule.ptr->max_src_conn_rate.limit &&
1266 	    pf_check_threshold(&(*state)->src_node->conn_rate)) {
1267 		pf_status.lcounters[LCNT_SRCCONNRATE]++;
1268 		bad++;
1269 	}
1270 
1271 	if (!bad) {
1272 		return 0;
1273 	}
1274 
1275 	if ((*state)->rule.ptr->overload_tbl) {
1276 		struct pfr_addr p;
1277 		u_int32_t       killed = 0;
1278 
1279 		pf_status.lcounters[LCNT_OVERLOAD_TABLE]++;
1280 		if (pf_status.debug >= PF_DEBUG_MISC) {
1281 			printf("pf_src_connlimit: blocking address ");
1282 			pf_print_host(&(*state)->src_node->addr, 0,
1283 			    (*state)->state_key->af_lan);
1284 		}
1285 
1286 		bzero(&p, sizeof(p));
1287 		p.pfra_af = (*state)->state_key->af_lan;
1288 		switch ((*state)->state_key->af_lan) {
1289 #if INET
1290 		case AF_INET:
1291 			p.pfra_net = 32;
1292 			p.pfra_ip4addr = (*state)->src_node->addr.v4addr;
1293 			break;
1294 #endif /* INET */
1295 		case AF_INET6:
1296 			p.pfra_net = 128;
1297 			p.pfra_ip6addr = (*state)->src_node->addr.v6addr;
1298 			break;
1299 		}
1300 
1301 		pfr_insert_kentry((*state)->rule.ptr->overload_tbl,
1302 		    &p, pf_calendar_time_second());
1303 
1304 		/* kill existing states if that's required. */
1305 		if ((*state)->rule.ptr->flush) {
1306 			struct pf_state_key *sk;
1307 			struct pf_state *st;
1308 
1309 			pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++;
1310 			RB_FOREACH(st, pf_state_tree_id, &tree_id) {
1311 				sk = st->state_key;
1312 				/*
1313 				 * Kill states from this source.  (Only those
1314 				 * from the same rule if PF_FLUSH_GLOBAL is not
1315 				 * set)
1316 				 */
1317 				if (sk->af_lan ==
1318 				    (*state)->state_key->af_lan &&
1319 				    (((*state)->state_key->direction ==
1320 				    PF_OUT &&
1321 				    PF_AEQ(&(*state)->src_node->addr,
1322 				    &sk->lan.addr, sk->af_lan)) ||
1323 				    ((*state)->state_key->direction == PF_IN &&
1324 				    PF_AEQ(&(*state)->src_node->addr,
1325 				    &sk->ext_lan.addr, sk->af_lan))) &&
1326 				    ((*state)->rule.ptr->flush &
1327 				    PF_FLUSH_GLOBAL ||
1328 				    (*state)->rule.ptr == st->rule.ptr)) {
1329 					st->timeout = PFTM_PURGE;
1330 					st->src.state = st->dst.state =
1331 					    TCPS_CLOSED;
1332 					killed++;
1333 				}
1334 			}
1335 			if (pf_status.debug >= PF_DEBUG_MISC) {
1336 				printf(", %u states killed", killed);
1337 			}
1338 		}
1339 		if (pf_status.debug >= PF_DEBUG_MISC) {
1340 			printf("\n");
1341 		}
1342 	}
1343 
1344 	/* kill this state */
1345 	(*state)->timeout = PFTM_PURGE;
1346 	(*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
1347 	return 1;
1348 }
1349 
1350 int
pf_insert_src_node(struct pf_src_node ** sn,struct pf_rule * rule,struct pf_addr * src,sa_family_t af)1351 pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
1352     struct pf_addr *src, sa_family_t af)
1353 {
1354 	struct pf_src_node      k;
1355 
1356 	if (*sn == NULL) {
1357 		k.af = af;
1358 		PF_ACPY(&k.addr, src, af);
1359 		if (rule->rule_flag & PFRULE_RULESRCTRACK ||
1360 		    rule->rpool.opts & PF_POOL_STICKYADDR) {
1361 			k.rule.ptr = rule;
1362 		} else {
1363 			k.rule.ptr = NULL;
1364 		}
1365 		pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
1366 		*sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
1367 	}
1368 	if (*sn == NULL) {
1369 		if (!rule->max_src_nodes ||
1370 		    rule->src_nodes < rule->max_src_nodes) {
1371 			(*sn) = pool_get(&pf_src_tree_pl, PR_WAITOK);
1372 		} else {
1373 			pf_status.lcounters[LCNT_SRCNODES]++;
1374 		}
1375 		if ((*sn) == NULL) {
1376 			return -1;
1377 		}
1378 		bzero(*sn, sizeof(struct pf_src_node));
1379 
1380 		pf_init_threshold(&(*sn)->conn_rate,
1381 		    rule->max_src_conn_rate.limit,
1382 		    rule->max_src_conn_rate.seconds);
1383 
1384 		(*sn)->af = af;
1385 		if (rule->rule_flag & PFRULE_RULESRCTRACK ||
1386 		    rule->rpool.opts & PF_POOL_STICKYADDR) {
1387 			(*sn)->rule.ptr = rule;
1388 		} else {
1389 			(*sn)->rule.ptr = NULL;
1390 		}
1391 		PF_ACPY(&(*sn)->addr, src, af);
1392 		if (RB_INSERT(pf_src_tree,
1393 		    &tree_src_tracking, *sn) != NULL) {
1394 			if (pf_status.debug >= PF_DEBUG_MISC) {
1395 				printf("pf: src_tree insert failed: ");
1396 				pf_print_host(&(*sn)->addr, 0, af);
1397 				printf("\n");
1398 			}
1399 			pool_put(&pf_src_tree_pl, *sn);
1400 			*sn = NULL; /* signal the caller that no additional cleanup is needed */
1401 			return -1;
1402 		}
1403 		(*sn)->creation = pf_time_second();
1404 		(*sn)->ruletype = rule->action;
1405 		if ((*sn)->rule.ptr != NULL) {
1406 			(*sn)->rule.ptr->src_nodes++;
1407 		}
1408 		pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
1409 		pf_status.src_nodes++;
1410 	} else {
1411 		if (rule->max_src_states &&
1412 		    (*sn)->states >= rule->max_src_states) {
1413 			pf_status.lcounters[LCNT_SRCSTATES]++;
1414 			return -1;
1415 		}
1416 	}
1417 	return 0;
1418 }
1419 
1420 static void
pf_stateins_err(const char * tree,struct pf_state * s,struct pfi_kif * kif)1421 pf_stateins_err(const char *tree, struct pf_state *s, struct pfi_kif *kif)
1422 {
1423 	struct pf_state_key     *sk = s->state_key;
1424 
1425 	if (pf_status.debug >= PF_DEBUG_MISC) {
1426 		printf("pf: state insert failed: %s %s ", tree, kif->pfik_name);
1427 		switch (sk->proto) {
1428 		case IPPROTO_TCP:
1429 			printf("TCP");
1430 			break;
1431 		case IPPROTO_UDP:
1432 			printf("UDP");
1433 			break;
1434 		case IPPROTO_ICMP:
1435 			printf("ICMP4");
1436 			break;
1437 		case IPPROTO_ICMPV6:
1438 			printf("ICMP6");
1439 			break;
1440 		default:
1441 			printf("PROTO=%u", sk->proto);
1442 			break;
1443 		}
1444 		printf(" lan: ");
1445 		pf_print_sk_host(&sk->lan, sk->af_lan, sk->proto,
1446 		    sk->proto_variant);
1447 		printf(" gwy: ");
1448 		pf_print_sk_host(&sk->gwy, sk->af_gwy, sk->proto,
1449 		    sk->proto_variant);
1450 		printf(" ext_lan: ");
1451 		pf_print_sk_host(&sk->ext_lan, sk->af_lan, sk->proto,
1452 		    sk->proto_variant);
1453 		printf(" ext_gwy: ");
1454 		pf_print_sk_host(&sk->ext_gwy, sk->af_gwy, sk->proto,
1455 		    sk->proto_variant);
1456 		if (s->sync_flags & PFSTATE_FROMSYNC) {
1457 			printf(" (from sync)");
1458 		}
1459 		printf("\n");
1460 	}
1461 }
1462 
1463 static __inline struct pf_state_key *
pf_insert_state_key_ext_gwy(struct pf_state_key * psk)1464 pf_insert_state_key_ext_gwy(struct pf_state_key *psk)
1465 {
1466 	struct pf_state_key * ret = RB_INSERT(pf_state_tree_ext_gwy,
1467 	    &pf_statetbl_ext_gwy, psk);
1468 	if (!ret && psk->af_lan == PF_INET6 &&
1469 	    psk->af_gwy == PF_INET) {
1470 		pf_state_tree_ext_gwy_nat64_cnt++;
1471 	}
1472 	return ret;
1473 }
1474 
1475 static __inline struct pf_state_key *
pf_remove_state_key_ext_gwy(struct pf_state_key * psk)1476 pf_remove_state_key_ext_gwy(struct pf_state_key *psk)
1477 {
1478 	struct pf_state_key * ret = RB_REMOVE(pf_state_tree_ext_gwy,
1479 	    &pf_statetbl_ext_gwy, psk);
1480 	if (ret && psk->af_lan == PF_INET6 &&
1481 	    psk->af_gwy == PF_INET) {
1482 		pf_state_tree_ext_gwy_nat64_cnt--;
1483 	}
1484 	return ret;
1485 }
1486 
1487 int
pf_insert_state(struct pfi_kif * kif,struct pf_state * s)1488 pf_insert_state(struct pfi_kif *kif, struct pf_state *s)
1489 {
1490 	struct pf_state_key     *cur;
1491 	struct pf_state         *sp;
1492 
1493 	VERIFY(s->state_key != NULL);
1494 	s->kif = kif;
1495 
1496 	if ((cur = RB_INSERT(pf_state_tree_lan_ext, &pf_statetbl_lan_ext,
1497 	    s->state_key)) != NULL) {
1498 		/* key exists. check for same kif, if none, add to key */
1499 		TAILQ_FOREACH(sp, &cur->states, next)
1500 		if (sp->kif == kif) {           /* collision! */
1501 			pf_stateins_err("tree_lan_ext", s, kif);
1502 			pf_detach_state(s,
1503 			    PF_DT_SKIP_LANEXT | PF_DT_SKIP_EXTGWY);
1504 			return -1;
1505 		}
1506 		pf_detach_state(s, PF_DT_SKIP_LANEXT | PF_DT_SKIP_EXTGWY);
1507 		pf_attach_state(cur, s, kif == pfi_all ? 1 : 0);
1508 	}
1509 
1510 	/* if cur != NULL, we already found a state key and attached to it */
1511 	if (cur == NULL &&
1512 	    (cur = pf_insert_state_key_ext_gwy(s->state_key)) != NULL) {
1513 		/* must not happen. we must have found the sk above! */
1514 		pf_stateins_err("tree_ext_gwy", s, kif);
1515 		pf_detach_state(s, PF_DT_SKIP_EXTGWY);
1516 		return -1;
1517 	}
1518 
1519 	if (s->id == 0 && s->creatorid == 0) {
1520 		s->id = htobe64(pf_status.stateid++);
1521 		s->creatorid = pf_status.hostid;
1522 	}
1523 	if (RB_INSERT(pf_state_tree_id, &tree_id, s) != NULL) {
1524 		if (pf_status.debug >= PF_DEBUG_MISC) {
1525 			printf("pf: state insert failed: "
1526 			    "id: %016llx creatorid: %08x",
1527 			    be64toh(s->id), ntohl(s->creatorid));
1528 			if (s->sync_flags & PFSTATE_FROMSYNC) {
1529 				printf(" (from sync)");
1530 			}
1531 			printf("\n");
1532 		}
1533 		pf_detach_state(s, 0);
1534 		return -1;
1535 	}
1536 	TAILQ_INSERT_TAIL(&state_list, s, entry_list);
1537 	pf_status.fcounters[FCNT_STATE_INSERT]++;
1538 	pf_status.states++;
1539 	VERIFY(pf_status.states != 0);
1540 	pfi_kif_ref(kif, PFI_KIF_REF_STATE);
1541 #if NPFSYNC
1542 	pfsync_insert_state(s);
1543 #endif
1544 	return 0;
1545 }
1546 
1547 static int
pf_purge_thread_cont(int err)1548 pf_purge_thread_cont(int err)
1549 {
1550 #pragma unused(err)
1551 	static u_int32_t nloops = 0;
1552 	int t = 1;      /* 1 second */
1553 
1554 	/*
1555 	 * Update coarse-grained networking timestamp (in sec.); the idea
1556 	 * is to piggy-back on the periodic timeout callout to update
1557 	 * the counter returnable via net_uptime().
1558 	 */
1559 	net_update_uptime();
1560 
1561 	lck_rw_lock_shared(&pf_perim_lock);
1562 	lck_mtx_lock(&pf_lock);
1563 
1564 	/* purge everything if not running */
1565 	if (!pf_status.running) {
1566 		pf_purge_expired_states(pf_status.states);
1567 		pf_purge_expired_fragments();
1568 		pf_purge_expired_src_nodes();
1569 
1570 		/* terminate thread (we don't currently do this) */
1571 		if (pf_purge_thread == NULL) {
1572 			lck_mtx_unlock(&pf_lock);
1573 			lck_rw_done(&pf_perim_lock);
1574 
1575 			thread_deallocate(current_thread());
1576 			thread_terminate(current_thread());
1577 			/* NOTREACHED */
1578 			return 0;
1579 		} else {
1580 			/* if there's nothing left, sleep w/o timeout */
1581 			if (pf_status.states == 0 &&
1582 			    pf_normalize_isempty() &&
1583 			    RB_EMPTY(&tree_src_tracking)) {
1584 				nloops = 0;
1585 				t = 0;
1586 			}
1587 			goto done;
1588 		}
1589 	}
1590 
1591 	/* process a fraction of the state table every second */
1592 	pf_purge_expired_states(1 + (pf_status.states
1593 	    / pf_default_rule.timeout[PFTM_INTERVAL]));
1594 
1595 	/* purge other expired types every PFTM_INTERVAL seconds */
1596 	if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) {
1597 		pf_purge_expired_fragments();
1598 		pf_purge_expired_src_nodes();
1599 		nloops = 0;
1600 	}
1601 done:
1602 	lck_mtx_unlock(&pf_lock);
1603 	lck_rw_done(&pf_perim_lock);
1604 
1605 	(void) tsleep0(pf_purge_thread_fn, PWAIT, "pf_purge_cont",
1606 	    t * hz, pf_purge_thread_cont);
1607 	/* NOTREACHED */
1608 	VERIFY(0);
1609 
1610 	return 0;
1611 }
1612 
1613 void
pf_purge_thread_fn(void * v,wait_result_t w)1614 pf_purge_thread_fn(void *v, wait_result_t w)
1615 {
1616 #pragma unused(v, w)
1617 	(void) tsleep0(pf_purge_thread_fn, PWAIT, "pf_purge", 0,
1618 	    pf_purge_thread_cont);
1619 	/*
1620 	 * tsleep0() shouldn't have returned as PCATCH was not set;
1621 	 * therefore assert in this case.
1622 	 */
1623 	VERIFY(0);
1624 }
1625 
1626 u_int64_t
pf_state_expires(const struct pf_state * state)1627 pf_state_expires(const struct pf_state *state)
1628 {
1629 	u_int32_t       t;
1630 	u_int32_t       start;
1631 	u_int32_t       end;
1632 	u_int32_t       states;
1633 
1634 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1635 
1636 	/* handle all PFTM_* > PFTM_MAX here */
1637 	if (state->timeout == PFTM_PURGE) {
1638 		return pf_time_second();
1639 	}
1640 
1641 	VERIFY(state->timeout != PFTM_UNLINKED);
1642 	VERIFY(state->timeout < PFTM_MAX);
1643 	t = state->rule.ptr->timeout[state->timeout];
1644 	if (!t) {
1645 		t = pf_default_rule.timeout[state->timeout];
1646 	}
1647 	start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START];
1648 	if (start) {
1649 		end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END];
1650 		states = state->rule.ptr->states;
1651 	} else {
1652 		start = pf_default_rule.timeout[PFTM_ADAPTIVE_START];
1653 		end = pf_default_rule.timeout[PFTM_ADAPTIVE_END];
1654 		states = pf_status.states;
1655 	}
1656 	if (end && states > start && start < end) {
1657 		if (states < end) {
1658 			return state->expire + t * (end - states) /
1659 			       (end - start);
1660 		} else {
1661 			return pf_time_second();
1662 		}
1663 	}
1664 	return state->expire + t;
1665 }
1666 
1667 void
pf_purge_expired_src_nodes(void)1668 pf_purge_expired_src_nodes(void)
1669 {
1670 	struct pf_src_node              *cur, *next;
1671 
1672 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1673 
1674 	for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) {
1675 		next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur);
1676 
1677 		if (cur->states <= 0 && cur->expire <= pf_time_second()) {
1678 			if (cur->rule.ptr != NULL) {
1679 				cur->rule.ptr->src_nodes--;
1680 				if (cur->rule.ptr->states <= 0 &&
1681 				    cur->rule.ptr->max_src_nodes <= 0) {
1682 					pf_rm_rule(NULL, cur->rule.ptr);
1683 				}
1684 			}
1685 			RB_REMOVE(pf_src_tree, &tree_src_tracking, cur);
1686 			pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
1687 			pf_status.src_nodes--;
1688 			pool_put(&pf_src_tree_pl, cur);
1689 		}
1690 	}
1691 }
1692 
1693 void
pf_src_tree_remove_state(struct pf_state * s)1694 pf_src_tree_remove_state(struct pf_state *s)
1695 {
1696 	u_int32_t t;
1697 
1698 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1699 
1700 	if (s->src_node != NULL) {
1701 		if (s->src.tcp_est) {
1702 			VERIFY(s->src_node->conn > 0);
1703 			--s->src_node->conn;
1704 		}
1705 		VERIFY(s->src_node->states > 0);
1706 		if (--s->src_node->states <= 0) {
1707 			t = s->rule.ptr->timeout[PFTM_SRC_NODE];
1708 			if (!t) {
1709 				t = pf_default_rule.timeout[PFTM_SRC_NODE];
1710 			}
1711 			s->src_node->expire = pf_time_second() + t;
1712 		}
1713 	}
1714 	if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
1715 		VERIFY(s->nat_src_node->states > 0);
1716 		if (--s->nat_src_node->states <= 0) {
1717 			t = s->rule.ptr->timeout[PFTM_SRC_NODE];
1718 			if (!t) {
1719 				t = pf_default_rule.timeout[PFTM_SRC_NODE];
1720 			}
1721 			s->nat_src_node->expire = pf_time_second() + t;
1722 		}
1723 	}
1724 	s->src_node = s->nat_src_node = NULL;
1725 }
1726 
1727 void
pf_unlink_state(struct pf_state * cur)1728 pf_unlink_state(struct pf_state *cur)
1729 {
1730 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1731 
1732 	if (cur->src.state == PF_TCPS_PROXY_DST) {
1733 		pf_send_tcp(cur->rule.ptr, cur->state_key->af_lan,
1734 		    &cur->state_key->ext_lan.addr, &cur->state_key->lan.addr,
1735 		    cur->state_key->ext_lan.xport.port,
1736 		    cur->state_key->lan.xport.port,
1737 		    cur->src.seqhi, cur->src.seqlo + 1,
1738 		    TH_RST | TH_ACK, 0, 0, 0, 1, cur->tag, NULL, NULL);
1739 	}
1740 
1741 	hook_runloop(&cur->unlink_hooks, HOOK_REMOVE | HOOK_FREE);
1742 	RB_REMOVE(pf_state_tree_id, &tree_id, cur);
1743 #if NPFSYNC
1744 	if (cur->creatorid == pf_status.hostid) {
1745 		pfsync_delete_state(cur);
1746 	}
1747 #endif
1748 	cur->timeout = PFTM_UNLINKED;
1749 	pf_src_tree_remove_state(cur);
1750 	pf_detach_state(cur, 0);
1751 }
1752 
1753 /* callers should be at splpf and hold the
1754  * write_lock on pf_consistency_lock */
1755 void
pf_free_state(struct pf_state * cur)1756 pf_free_state(struct pf_state *cur)
1757 {
1758 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1759 #if NPFSYNC
1760 	if (pfsyncif != NULL &&
1761 	    (pfsyncif->sc_bulk_send_next == cur ||
1762 	    pfsyncif->sc_bulk_terminator == cur)) {
1763 		return;
1764 	}
1765 #endif
1766 	VERIFY(cur->timeout == PFTM_UNLINKED);
1767 	VERIFY(cur->rule.ptr->states > 0);
1768 	if (--cur->rule.ptr->states <= 0 &&
1769 	    cur->rule.ptr->src_nodes <= 0) {
1770 		pf_rm_rule(NULL, cur->rule.ptr);
1771 	}
1772 	if (cur->nat_rule.ptr != NULL) {
1773 		VERIFY(cur->nat_rule.ptr->states > 0);
1774 		if (--cur->nat_rule.ptr->states <= 0 &&
1775 		    cur->nat_rule.ptr->src_nodes <= 0) {
1776 			pf_rm_rule(NULL, cur->nat_rule.ptr);
1777 		}
1778 	}
1779 	if (cur->anchor.ptr != NULL) {
1780 		VERIFY(cur->anchor.ptr->states > 0);
1781 		if (--cur->anchor.ptr->states <= 0) {
1782 			pf_rm_rule(NULL, cur->anchor.ptr);
1783 		}
1784 	}
1785 	pf_normalize_tcp_cleanup(cur);
1786 	pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE);
1787 	TAILQ_REMOVE(&state_list, cur, entry_list);
1788 	if (cur->tag) {
1789 		pf_tag_unref(cur->tag);
1790 	}
1791 #if SKYWALK
1792 	netns_release(&cur->nstoken);
1793 #endif
1794 	pool_put(&pf_state_pl, cur);
1795 	pf_status.fcounters[FCNT_STATE_REMOVALS]++;
1796 	VERIFY(pf_status.states > 0);
1797 	pf_status.states--;
1798 }
1799 
1800 void
pf_purge_expired_states(u_int32_t maxcheck)1801 pf_purge_expired_states(u_int32_t maxcheck)
1802 {
1803 	static struct pf_state  *cur = NULL;
1804 	struct pf_state         *next;
1805 
1806 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1807 
1808 	while (maxcheck--) {
1809 		/* wrap to start of list when we hit the end */
1810 		if (cur == NULL) {
1811 			cur = TAILQ_FIRST(&state_list);
1812 			if (cur == NULL) {
1813 				break;  /* list empty */
1814 			}
1815 		}
1816 
1817 		/* get next state, as cur may get deleted */
1818 		next = TAILQ_NEXT(cur, entry_list);
1819 
1820 		if (cur->timeout == PFTM_UNLINKED) {
1821 			pf_free_state(cur);
1822 		} else if (pf_state_expires(cur) <= pf_time_second()) {
1823 			/* unlink and free expired state */
1824 			pf_unlink_state(cur);
1825 			pf_free_state(cur);
1826 		}
1827 		cur = next;
1828 	}
1829 }
1830 
1831 int
pf_tbladdr_setup(struct pf_ruleset * rs,struct pf_addr_wrap * aw)1832 pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw)
1833 {
1834 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1835 
1836 	if (aw->type != PF_ADDR_TABLE) {
1837 		return 0;
1838 	}
1839 	if ((aw->p.tbl = pfr_attach_table(rs, __unsafe_null_terminated_from_indexable(aw->v.tblname))) == NULL) {
1840 		return 1;
1841 	}
1842 	return 0;
1843 }
1844 
1845 void
pf_tbladdr_remove(struct pf_addr_wrap * aw)1846 pf_tbladdr_remove(struct pf_addr_wrap *aw)
1847 {
1848 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1849 
1850 	if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL) {
1851 		return;
1852 	}
1853 	pfr_detach_table(aw->p.tbl);
1854 	aw->p.tbl = NULL;
1855 }
1856 
1857 void
pf_tbladdr_copyout(struct pf_addr_wrap * aw)1858 pf_tbladdr_copyout(struct pf_addr_wrap *aw)
1859 {
1860 	struct pfr_ktable *kt = aw->p.tbl;
1861 
1862 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1863 
1864 	if (aw->type != PF_ADDR_TABLE || kt == NULL) {
1865 		return;
1866 	}
1867 	if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) {
1868 		kt = kt->pfrkt_root;
1869 	}
1870 	aw->p.tbl = NULL;
1871 	aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ?
1872 	    kt->pfrkt_cnt : -1;
1873 }
1874 
1875 static void
pf_print_addr(struct pf_addr * addr,sa_family_t af)1876 pf_print_addr(struct pf_addr *addr, sa_family_t af)
1877 {
1878 	switch (af) {
1879 #if INET
1880 	case AF_INET: {
1881 		u_int32_t a = ntohl(addr->addr32[0]);
1882 		printf("%u.%u.%u.%u", (a >> 24) & 255, (a >> 16) & 255,
1883 		    (a >> 8) & 255, a & 255);
1884 		break;
1885 	}
1886 #endif /* INET */
1887 	case AF_INET6: {
1888 		u_int16_t b;
1889 		u_int8_t i, curstart = 255, curend = 0,
1890 		    maxstart = 0, maxend = 0;
1891 		for (i = 0; i < 8; i++) {
1892 			if (!addr->addr16[i]) {
1893 				if (curstart == 255) {
1894 					curstart = i;
1895 				} else {
1896 					curend = i;
1897 				}
1898 			} else {
1899 				if (curstart) {
1900 					if ((curend - curstart) >
1901 					    (maxend - maxstart)) {
1902 						maxstart = curstart;
1903 						maxend = curend;
1904 						curstart = 255;
1905 					}
1906 				}
1907 			}
1908 		}
1909 		for (i = 0; i < 8; i++) {
1910 			if (i >= maxstart && i <= maxend) {
1911 				if (maxend != 7) {
1912 					if (i == maxstart) {
1913 						printf(":");
1914 					}
1915 				} else {
1916 					if (i == maxend) {
1917 						printf(":");
1918 					}
1919 				}
1920 			} else {
1921 				b = ntohs(addr->addr16[i]);
1922 				printf("%x", b);
1923 				if (i < 7) {
1924 					printf(":");
1925 				}
1926 			}
1927 		}
1928 		break;
1929 	}
1930 	}
1931 }
1932 
1933 static void
pf_print_sk_host(struct pf_state_host * sh,sa_family_t af,int proto,u_int8_t proto_variant)1934 pf_print_sk_host(struct pf_state_host *sh, sa_family_t af, int proto,
1935     u_int8_t proto_variant)
1936 {
1937 	pf_print_addr(&sh->addr, af);
1938 
1939 	switch (proto) {
1940 	case IPPROTO_ESP:
1941 		if (sh->xport.spi) {
1942 			printf("[%08x]", ntohl(sh->xport.spi));
1943 		}
1944 		break;
1945 
1946 	case IPPROTO_GRE:
1947 		if (proto_variant == PF_GRE_PPTP_VARIANT) {
1948 			printf("[%u]", ntohs(sh->xport.call_id));
1949 		}
1950 		break;
1951 
1952 	case IPPROTO_TCP:
1953 	case IPPROTO_UDP:
1954 		printf("[%u]", ntohs(sh->xport.port));
1955 		break;
1956 
1957 	default:
1958 		break;
1959 	}
1960 }
1961 
1962 static void
pf_print_host(struct pf_addr * addr,u_int16_t p,sa_family_t af)1963 pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
1964 {
1965 	pf_print_addr(addr, af);
1966 	if (p) {
1967 		printf("[%u]", ntohs(p));
1968 	}
1969 }
1970 
1971 void
pf_print_state(struct pf_state * s)1972 pf_print_state(struct pf_state *s)
1973 {
1974 	struct pf_state_key *sk = s->state_key;
1975 	switch (sk->proto) {
1976 	case IPPROTO_ESP:
1977 		printf("ESP ");
1978 		break;
1979 	case IPPROTO_GRE:
1980 		printf("GRE%u ", sk->proto_variant);
1981 		break;
1982 	case IPPROTO_TCP:
1983 		printf("TCP ");
1984 		break;
1985 	case IPPROTO_UDP:
1986 		printf("UDP ");
1987 		break;
1988 	case IPPROTO_ICMP:
1989 		printf("ICMP ");
1990 		break;
1991 	case IPPROTO_ICMPV6:
1992 		printf("ICMPV6 ");
1993 		break;
1994 	default:
1995 		printf("%u ", sk->proto);
1996 		break;
1997 	}
1998 	pf_print_sk_host(&sk->lan, sk->af_lan, sk->proto, sk->proto_variant);
1999 	printf(" ");
2000 	pf_print_sk_host(&sk->gwy, sk->af_gwy, sk->proto, sk->proto_variant);
2001 	printf(" ");
2002 	pf_print_sk_host(&sk->ext_lan, sk->af_lan, sk->proto,
2003 	    sk->proto_variant);
2004 	printf(" ");
2005 	pf_print_sk_host(&sk->ext_gwy, sk->af_gwy, sk->proto,
2006 	    sk->proto_variant);
2007 	printf(" [lo=%u high=%u win=%u modulator=%u", s->src.seqlo,
2008 	    s->src.seqhi, s->src.max_win, s->src.seqdiff);
2009 	if (s->src.wscale && s->dst.wscale) {
2010 		printf(" wscale=%u", s->src.wscale & PF_WSCALE_MASK);
2011 	}
2012 	printf("]");
2013 	printf(" [lo=%u high=%u win=%u modulator=%u", s->dst.seqlo,
2014 	    s->dst.seqhi, s->dst.max_win, s->dst.seqdiff);
2015 	if (s->src.wscale && s->dst.wscale) {
2016 		printf(" wscale=%u", s->dst.wscale & PF_WSCALE_MASK);
2017 	}
2018 	printf("]");
2019 	printf(" %u:%u", s->src.state, s->dst.state);
2020 }
2021 
2022 void
pf_print_flags(u_int8_t f)2023 pf_print_flags(u_int8_t f)
2024 {
2025 	if (f) {
2026 		printf(" ");
2027 	}
2028 	if (f & TH_FIN) {
2029 		printf("F");
2030 	}
2031 	if (f & TH_SYN) {
2032 		printf("S");
2033 	}
2034 	if (f & TH_RST) {
2035 		printf("R");
2036 	}
2037 	if (f & TH_PUSH) {
2038 		printf("P");
2039 	}
2040 	if (f & TH_ACK) {
2041 		printf("A");
2042 	}
2043 	if (f & TH_URG) {
2044 		printf("U");
2045 	}
2046 	if (f & TH_ECE) {
2047 		printf("E");
2048 	}
2049 	if (f & TH_CWR) {
2050 		printf("W");
2051 	}
2052 }
2053 
2054 #define PF_SET_SKIP_STEPS(i)                                    \
2055 	do {                                                    \
2056 	        while (head[i] != cur) {                        \
2057 	                head[i]->skip[i].ptr = cur;             \
2058 	                head[i] = TAILQ_NEXT(head[i], entries); \
2059 	        }                                               \
2060 	} while (0)
2061 
2062 void
pf_calc_skip_steps(struct pf_rulequeue * rules)2063 pf_calc_skip_steps(struct pf_rulequeue *rules)
2064 {
2065 	struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT];
2066 	int i;
2067 
2068 	cur = TAILQ_FIRST(rules);
2069 	prev = cur;
2070 	for (i = 0; i < PF_SKIP_COUNT; ++i) {
2071 		head[i] = cur;
2072 	}
2073 	while (cur != NULL) {
2074 		if (cur->kif != prev->kif || cur->ifnot != prev->ifnot) {
2075 			PF_SET_SKIP_STEPS(PF_SKIP_IFP);
2076 		}
2077 		if (cur->direction != prev->direction) {
2078 			PF_SET_SKIP_STEPS(PF_SKIP_DIR);
2079 		}
2080 		if (cur->af != prev->af) {
2081 			PF_SET_SKIP_STEPS(PF_SKIP_AF);
2082 		}
2083 		if (cur->proto != prev->proto) {
2084 			PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
2085 		}
2086 		if (cur->src.neg != prev->src.neg ||
2087 		    pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr)) {
2088 			PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
2089 		}
2090 		{
2091 			union pf_rule_xport *cx = &cur->src.xport;
2092 			union pf_rule_xport *px = &prev->src.xport;
2093 
2094 			switch (cur->proto) {
2095 			case IPPROTO_GRE:
2096 			case IPPROTO_ESP:
2097 				PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
2098 				break;
2099 			default:
2100 				if (prev->proto == IPPROTO_GRE ||
2101 				    prev->proto == IPPROTO_ESP ||
2102 				    cx->range.op != px->range.op ||
2103 				    cx->range.port[0] != px->range.port[0] ||
2104 				    cx->range.port[1] != px->range.port[1]) {
2105 					PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
2106 				}
2107 				break;
2108 			}
2109 		}
2110 		if (cur->dst.neg != prev->dst.neg ||
2111 		    pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr)) {
2112 			PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
2113 		}
2114 		{
2115 			union pf_rule_xport *cx = &cur->dst.xport;
2116 			union pf_rule_xport *px = &prev->dst.xport;
2117 
2118 			switch (cur->proto) {
2119 			case IPPROTO_GRE:
2120 				if (cur->proto != prev->proto ||
2121 				    cx->call_id != px->call_id) {
2122 					PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
2123 				}
2124 				break;
2125 			case IPPROTO_ESP:
2126 				if (cur->proto != prev->proto ||
2127 				    cx->spi != px->spi) {
2128 					PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
2129 				}
2130 				break;
2131 			default:
2132 				if (prev->proto == IPPROTO_GRE ||
2133 				    prev->proto == IPPROTO_ESP ||
2134 				    cx->range.op != px->range.op ||
2135 				    cx->range.port[0] != px->range.port[0] ||
2136 				    cx->range.port[1] != px->range.port[1]) {
2137 					PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
2138 				}
2139 				break;
2140 			}
2141 		}
2142 
2143 		prev = cur;
2144 		cur = TAILQ_NEXT(cur, entries);
2145 	}
2146 	for (i = 0; i < PF_SKIP_COUNT; ++i) {
2147 		PF_SET_SKIP_STEPS(i);
2148 	}
2149 }
2150 
2151 u_int32_t
pf_calc_state_key_flowhash(struct pf_state_key * sk)2152 pf_calc_state_key_flowhash(struct pf_state_key *sk)
2153 {
2154 #if SKYWALK
2155 	uint32_t flowid;
2156 	struct flowidns_flow_key fk;
2157 
2158 	VERIFY(sk->flowsrc == FLOWSRC_PF);
2159 	bzero(&fk, sizeof(fk));
2160 	_CASSERT(sizeof(sk->lan.addr) == sizeof(fk.ffk_laddr));
2161 	_CASSERT(sizeof(sk->ext_lan.addr) == sizeof(fk.ffk_laddr));
2162 	bcopy(&sk->lan.addr, &fk.ffk_laddr, sizeof(fk.ffk_laddr));
2163 	bcopy(&sk->ext_lan.addr, &fk.ffk_raddr, sizeof(fk.ffk_raddr));
2164 	fk.ffk_af = sk->af_lan;
2165 	fk.ffk_proto = sk->proto;
2166 
2167 	switch (sk->proto) {
2168 	case IPPROTO_ESP:
2169 	case IPPROTO_AH:
2170 		fk.ffk_spi = sk->lan.xport.spi;
2171 		break;
2172 	default:
2173 		if (sk->lan.xport.spi <= sk->ext_lan.xport.spi) {
2174 			fk.ffk_lport = sk->lan.xport.port;
2175 			fk.ffk_rport = sk->ext_lan.xport.port;
2176 		} else {
2177 			fk.ffk_lport = sk->ext_lan.xport.port;
2178 			fk.ffk_rport = sk->lan.xport.port;
2179 		}
2180 		break;
2181 	}
2182 
2183 	flowidns_allocate_flowid(FLOWIDNS_DOMAIN_PF, &fk, &flowid);
2184 	return flowid;
2185 
2186 #else /* !SKYWALK */
2187 
2188 	struct pf_flowhash_key fh __attribute__((aligned(8)));
2189 	uint32_t flowhash = 0;
2190 
2191 	bzero(&fh, sizeof(fh));
2192 	if (PF_ALEQ(&sk->lan.addr, &sk->ext_lan.addr, sk->af_lan)) {
2193 		bcopy(&sk->lan.addr, &fh.ap1.addr, sizeof(fh.ap1.addr));
2194 		bcopy(&sk->ext_lan.addr, &fh.ap2.addr, sizeof(fh.ap2.addr));
2195 	} else {
2196 		bcopy(&sk->ext_lan.addr, &fh.ap1.addr, sizeof(fh.ap1.addr));
2197 		bcopy(&sk->lan.addr, &fh.ap2.addr, sizeof(fh.ap2.addr));
2198 	}
2199 	if (sk->lan.xport.spi <= sk->ext_lan.xport.spi) {
2200 		fh.ap1.xport.spi = sk->lan.xport.spi;
2201 		fh.ap2.xport.spi = sk->ext_lan.xport.spi;
2202 	} else {
2203 		fh.ap1.xport.spi = sk->ext_lan.xport.spi;
2204 		fh.ap2.xport.spi = sk->lan.xport.spi;
2205 	}
2206 	fh.af = sk->af_lan;
2207 	fh.proto = sk->proto;
2208 
2209 try_again:
2210 	flowhash = net_flowhash(&fh, sizeof(fh), pf_hash_seed);
2211 	if (flowhash == 0) {
2212 		/* try to get a non-zero flowhash */
2213 		pf_hash_seed = RandomULong();
2214 		goto try_again;
2215 	}
2216 
2217 	return flowhash;
2218 
2219 #endif /* !SKYWALK */
2220 }
2221 
2222 static int
pf_addr_wrap_neq(struct pf_addr_wrap * aw1,struct pf_addr_wrap * aw2)2223 pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
2224 {
2225 	if (aw1->type != aw2->type) {
2226 		return 1;
2227 	}
2228 	switch (aw1->type) {
2229 	case PF_ADDR_ADDRMASK:
2230 	case PF_ADDR_RANGE:
2231 		if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, AF_INET6)) {
2232 			return 1;
2233 		}
2234 		if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, AF_INET6)) {
2235 			return 1;
2236 		}
2237 		return 0;
2238 	case PF_ADDR_DYNIFTL:
2239 		return aw1->p.dyn == NULL || aw2->p.dyn == NULL ||
2240 		       aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt;
2241 	case PF_ADDR_NOROUTE:
2242 	case PF_ADDR_URPFFAILED:
2243 		return 0;
2244 	case PF_ADDR_TABLE:
2245 		return aw1->p.tbl != aw2->p.tbl;
2246 	case PF_ADDR_RTLABEL:
2247 		return aw1->v.rtlabel != aw2->v.rtlabel;
2248 	default:
2249 		printf("invalid address type: %d\n", aw1->type);
2250 		return 1;
2251 	}
2252 }
2253 
2254 u_int16_t
pf_cksum_fixup(u_int16_t cksum,u_int16_t old,u_int16_t new,u_int8_t udp)2255 pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
2256 {
2257 	return nat464_cksum_fixup(cksum, old, new, udp);
2258 }
2259 
2260 /*
2261  * change ip address & port
2262  * dir	: packet direction
2263  * a	: address to be changed
2264  * p	: port to be changed
2265  * ic	: ip header checksum
2266  * pc	: protocol checksum
2267  * an	: new ip address
2268  * pn	: new port
2269  * u	: should be 1 if UDP packet else 0
2270  * af	: address family of the packet
2271  * afn	: address family of the new address
2272  * ua	: should be 1 if ip address needs to be updated in the packet else
2273  *	  only the checksum is recalculated & updated.
2274  */
2275 static __attribute__((noinline)) void
pf_change_ap(int dir,pbuf_t * pbuf,struct pf_addr * a,u_int16_t * p,u_int16_t * ic,u_int16_t * pc,struct pf_addr * an,u_int16_t pn,u_int8_t u,sa_family_t af,sa_family_t afn,int ua)2276 pf_change_ap(int dir, pbuf_t *pbuf, struct pf_addr *a, u_int16_t *p,
2277     u_int16_t *ic, u_int16_t *pc, struct pf_addr *an, u_int16_t pn,
2278     u_int8_t u, sa_family_t af, sa_family_t afn, int ua)
2279 {
2280 	struct pf_addr  ao;
2281 	u_int16_t       po = *p;
2282 
2283 	PF_ACPY(&ao, a, af);
2284 	if (ua) {
2285 		PF_ACPY(a, an, afn);
2286 	}
2287 
2288 	*p = pn;
2289 
2290 	switch (af) {
2291 #if INET
2292 	case AF_INET:
2293 		switch (afn) {
2294 		case AF_INET:
2295 			*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
2296 			    ao.addr16[0], an->addr16[0], 0),
2297 			    ao.addr16[1], an->addr16[1], 0);
2298 			*p = pn;
2299 			/*
2300 			 * If the packet is originated from an ALG on the NAT gateway
2301 			 * (source address is loopback or local), in which case the
2302 			 * TCP/UDP checksum field contains the pseudo header checksum
2303 			 * that's not yet complemented.
2304 			 * In that case we do not need to fixup the checksum for port
2305 			 * translation as the pseudo header checksum doesn't include ports.
2306 			 *
2307 			 * A packet generated locally will have UDP/TCP CSUM flag
2308 			 * set (gets set in protocol output).
2309 			 *
2310 			 * It should be noted that the fixup doesn't do anything if the
2311 			 * checksum is 0.
2312 			 */
2313 			if (dir == PF_OUT && pbuf != NULL &&
2314 			    (*pbuf->pb_csum_flags & (CSUM_TCP | CSUM_UDP))) {
2315 				/* Pseudo-header checksum does not include ports */
2316 				*pc = ~pf_cksum_fixup(pf_cksum_fixup(~*pc,
2317 				    ao.addr16[0], an->addr16[0], u),
2318 				    ao.addr16[1], an->addr16[1], u);
2319 			} else {
2320 				*pc =
2321 				    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2322 					    *pc, ao.addr16[0], an->addr16[0], u),
2323 				    ao.addr16[1], an->addr16[1], u),
2324 				    po, pn, u);
2325 			}
2326 			break;
2327 		case AF_INET6:
2328 			*p = pn;
2329 			*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2330 				    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2331 
2332 					    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
2333 					    ao.addr16[0], an->addr16[0], u),
2334 					    ao.addr16[1], an->addr16[1], u),
2335 					    0, an->addr16[2], u),
2336 					    0, an->addr16[3], u),
2337 				    0, an->addr16[4], u),
2338 				    0, an->addr16[5], u),
2339 				    0, an->addr16[6], u),
2340 			    0, an->addr16[7], u),
2341 			    po, pn, u);
2342 			break;
2343 		}
2344 		break;
2345 #endif /* INET */
2346 	case AF_INET6:
2347 		switch (afn) {
2348 		case AF_INET6:
2349 			/*
2350 			 * If the packet is originated from an ALG on the NAT gateway
2351 			 * (source address is loopback or local), in which case the
2352 			 * TCP/UDP checksum field contains the pseudo header checksum
2353 			 * that's not yet complemented.
2354 			 * A packet generated locally
2355 			 * will have UDP/TCP CSUM flag set (gets set in protocol
2356 			 * output).
2357 			 */
2358 			if (dir == PF_OUT && pbuf != NULL &&
2359 			    (*pbuf->pb_csum_flags & (CSUM_TCPIPV6 |
2360 			    CSUM_UDPIPV6))) {
2361 				/* Pseudo-header checksum does not include ports */
2362 				*pc =
2363 				    ~pf_cksum_fixup(pf_cksum_fixup(
2364 					    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2365 						    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2366 							    ~*pc,
2367 							    ao.addr16[0], an->addr16[0], u),
2368 						    ao.addr16[1], an->addr16[1], u),
2369 						    ao.addr16[2], an->addr16[2], u),
2370 						    ao.addr16[3], an->addr16[3], u),
2371 					    ao.addr16[4], an->addr16[4], u),
2372 					    ao.addr16[5], an->addr16[5], u),
2373 					    ao.addr16[6], an->addr16[6], u),
2374 				    ao.addr16[7], an->addr16[7], u);
2375 			} else {
2376 				*pc =
2377 				    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2378 					    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2379 						    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2380 							    *pc,
2381 							    ao.addr16[0], an->addr16[0], u),
2382 						    ao.addr16[1], an->addr16[1], u),
2383 						    ao.addr16[2], an->addr16[2], u),
2384 						    ao.addr16[3], an->addr16[3], u),
2385 					    ao.addr16[4], an->addr16[4], u),
2386 					    ao.addr16[5], an->addr16[5], u),
2387 					    ao.addr16[6], an->addr16[6], u),
2388 				    ao.addr16[7], an->addr16[7], u),
2389 				    po, pn, u);
2390 			}
2391 			break;
2392 #ifdef INET
2393 		case AF_INET:
2394 			*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2395 				    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2396 					    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
2397 					    ao.addr16[0], an->addr16[0], u),
2398 					    ao.addr16[1], an->addr16[1], u),
2399 					    ao.addr16[2], 0, u),
2400 					    ao.addr16[3], 0, u),
2401 				    ao.addr16[4], 0, u),
2402 				    ao.addr16[5], 0, u),
2403 				    ao.addr16[6], 0, u),
2404 			    ao.addr16[7], 0, u),
2405 			    po, pn, u);
2406 			break;
2407 #endif /* INET */
2408 		}
2409 		break;
2410 	}
2411 }
2412 
2413 
2414 /* Changes a u_int32_t.  Uses a void * so there are no align restrictions */
2415 void
pf_change_a(void * a,u_int16_t * c,u_int32_t an,u_int8_t u)2416 pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u)
2417 {
2418 	u_int32_t       ao;
2419 
2420 	memcpy(&ao, (uint32_t *)a, sizeof(ao));
2421 	memcpy((uint32_t *)a, &an, sizeof(u_int32_t));
2422 	*c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u),
2423 	    ao % 65536, an % 65536, u);
2424 }
2425 
2426 static __attribute__((noinline)) void
pf_change_a6(struct pf_addr * a,u_int16_t * c,struct pf_addr * an,u_int8_t u)2427 pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
2428 {
2429 	struct pf_addr  ao;
2430 
2431 	PF_ACPY(&ao, a, AF_INET6);
2432 	PF_ACPY(a, an, AF_INET6);
2433 
2434 	*c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2435 		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2436 			    pf_cksum_fixup(pf_cksum_fixup(*c,
2437 			    ao.addr16[0], an->addr16[0], u),
2438 			    ao.addr16[1], an->addr16[1], u),
2439 			    ao.addr16[2], an->addr16[2], u),
2440 		    ao.addr16[3], an->addr16[3], u),
2441 		    ao.addr16[4], an->addr16[4], u),
2442 		    ao.addr16[5], an->addr16[5], u),
2443 	    ao.addr16[6], an->addr16[6], u),
2444 	    ao.addr16[7], an->addr16[7], u);
2445 }
2446 
2447 static __attribute__((noinline)) void
pf_change_addr(struct pf_addr * a,u_int16_t * c,struct pf_addr * an,u_int8_t u,sa_family_t af,sa_family_t afn)2448 pf_change_addr(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u,
2449     sa_family_t af, sa_family_t afn)
2450 {
2451 	struct pf_addr  ao;
2452 
2453 	if (af != afn) {
2454 		PF_ACPY(&ao, a, af);
2455 		PF_ACPY(a, an, afn);
2456 	}
2457 
2458 	switch (af) {
2459 	case AF_INET:
2460 		switch (afn) {
2461 		case AF_INET:
2462 			pf_change_a(a, c, an->v4addr.s_addr, u);
2463 			break;
2464 		case AF_INET6:
2465 			*c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2466 				    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2467 					    pf_cksum_fixup(pf_cksum_fixup(*c,
2468 					    ao.addr16[0], an->addr16[0], u),
2469 					    ao.addr16[1], an->addr16[1], u),
2470 					    0, an->addr16[2], u),
2471 				    0, an->addr16[3], u),
2472 				    0, an->addr16[4], u),
2473 				    0, an->addr16[5], u),
2474 			    0, an->addr16[6], u),
2475 			    0, an->addr16[7], u);
2476 			break;
2477 		}
2478 		break;
2479 	case AF_INET6:
2480 		switch (afn) {
2481 		case AF_INET:
2482 			*c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2483 				    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2484 					    pf_cksum_fixup(pf_cksum_fixup(*c,
2485 					    ao.addr16[0], an->addr16[0], u),
2486 					    ao.addr16[1], an->addr16[1], u),
2487 					    ao.addr16[2], 0, u),
2488 				    ao.addr16[3], 0, u),
2489 				    ao.addr16[4], 0, u),
2490 				    ao.addr16[5], 0, u),
2491 			    ao.addr16[6], 0, u),
2492 			    ao.addr16[7], 0, u);
2493 			break;
2494 		case AF_INET6:
2495 			pf_change_a6(a, c, an, u);
2496 			break;
2497 		}
2498 		break;
2499 	}
2500 }
2501 
2502 static __attribute__((noinline)) void
pf_change_icmp(struct pf_addr * ia,u_int16_t * ip,struct pf_addr * oa,struct pf_addr * na,u_int16_t np,u_int16_t * pc,u_int16_t * h2c,u_int16_t * ic,u_int16_t * hc,u_int8_t u,sa_family_t af)2503 pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
2504     struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c,
2505     u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af)
2506 {
2507 	struct pf_addr  oia, ooa;
2508 
2509 	PF_ACPY(&oia, ia, af);
2510 	PF_ACPY(&ooa, oa, af);
2511 
2512 	/* Change inner protocol port, fix inner protocol checksum. */
2513 	if (ip != NULL) {
2514 		u_int16_t       oip = *ip;
2515 		u_int32_t       opc = 0;
2516 
2517 		if (pc != NULL) {
2518 			opc = *pc;
2519 		}
2520 		*ip = np;
2521 		if (pc != NULL) {
2522 			*pc = pf_cksum_fixup(*pc, oip, *ip, u);
2523 		}
2524 		*ic = pf_cksum_fixup(*ic, oip, *ip, 0);
2525 		if (pc != NULL) {
2526 			*ic = pf_cksum_fixup(*ic, opc, *pc, 0);
2527 		}
2528 	}
2529 	/* Change inner ip address, fix inner ip and icmp checksums. */
2530 	PF_ACPY(ia, na, af);
2531 	switch (af) {
2532 #if INET
2533 	case AF_INET: {
2534 		u_int32_t        oh2c = *h2c;
2535 
2536 		*h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c,
2537 		    oia.addr16[0], ia->addr16[0], 0),
2538 		    oia.addr16[1], ia->addr16[1], 0);
2539 		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
2540 		    oia.addr16[0], ia->addr16[0], 0),
2541 		    oia.addr16[1], ia->addr16[1], 0);
2542 		*ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0);
2543 		break;
2544 	}
2545 #endif /* INET */
2546 	case AF_INET6:
2547 		*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2548 			    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2549 				    pf_cksum_fixup(pf_cksum_fixup(*ic,
2550 				    oia.addr16[0], ia->addr16[0], u),
2551 				    oia.addr16[1], ia->addr16[1], u),
2552 				    oia.addr16[2], ia->addr16[2], u),
2553 			    oia.addr16[3], ia->addr16[3], u),
2554 			    oia.addr16[4], ia->addr16[4], u),
2555 			    oia.addr16[5], ia->addr16[5], u),
2556 		    oia.addr16[6], ia->addr16[6], u),
2557 		    oia.addr16[7], ia->addr16[7], u);
2558 		break;
2559 	}
2560 	/* Change outer ip address, fix outer ip or icmpv6 checksum. */
2561 	PF_ACPY(oa, na, af);
2562 	switch (af) {
2563 #if INET
2564 	case AF_INET:
2565 		*hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
2566 		    ooa.addr16[0], oa->addr16[0], 0),
2567 		    ooa.addr16[1], oa->addr16[1], 0);
2568 		break;
2569 #endif /* INET */
2570 	case AF_INET6:
2571 		*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2572 			    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2573 				    pf_cksum_fixup(pf_cksum_fixup(*ic,
2574 				    ooa.addr16[0], oa->addr16[0], u),
2575 				    ooa.addr16[1], oa->addr16[1], u),
2576 				    ooa.addr16[2], oa->addr16[2], u),
2577 			    ooa.addr16[3], oa->addr16[3], u),
2578 			    ooa.addr16[4], oa->addr16[4], u),
2579 			    ooa.addr16[5], oa->addr16[5], u),
2580 		    ooa.addr16[6], oa->addr16[6], u),
2581 		    ooa.addr16[7], oa->addr16[7], u);
2582 		break;
2583 	}
2584 }
2585 
2586 
2587 /*
2588  * Need to modulate the sequence numbers in the TCP SACK option
2589  * (credits to Krzysztof Pfaff for report and patch)
2590  */
2591 static __attribute__((noinline)) int
pf_modulate_sack(pbuf_t * pbuf,int off,struct pf_pdesc * pd,struct tcphdr * th,struct pf_state_peer * dst)2592 pf_modulate_sack(pbuf_t *pbuf, int off, struct pf_pdesc *pd,
2593     struct tcphdr *th, struct pf_state_peer *dst)
2594 {
2595 	int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen;
2596 	u_int8_t opts[MAX_TCPOPTLEN], *opt = opts;
2597 	int copyback = 0, i, olen;
2598 	struct sackblk sack;
2599 
2600 #define TCPOLEN_SACKLEN (TCPOLEN_SACK + 2)
2601 	if (hlen < TCPOLEN_SACKLEN ||
2602 	    !pf_pull_hdr(pbuf, off + sizeof(*th), opts, sizeof(opts), hlen, NULL, NULL, pd->af)) {
2603 		return 0;
2604 	}
2605 
2606 	while (hlen >= TCPOLEN_SACKLEN) {
2607 		olen = opt[1];
2608 		switch (*opt) {
2609 		case TCPOPT_EOL:        /* FALLTHROUGH */
2610 		case TCPOPT_NOP:
2611 			opt++;
2612 			hlen--;
2613 			break;
2614 		case TCPOPT_SACK:
2615 			if (olen > hlen) {
2616 				olen = hlen;
2617 			}
2618 			if (olen >= TCPOLEN_SACKLEN) {
2619 				for (i = 2; i + TCPOLEN_SACK <= olen;
2620 				    i += TCPOLEN_SACK) {
2621 					memcpy(&sack, &opt[i], sizeof(sack));
2622 					pf_change_a(&sack.start, &th->th_sum,
2623 					    htonl(ntohl(sack.start) -
2624 					    dst->seqdiff), 0);
2625 					pf_change_a(&sack.end, &th->th_sum,
2626 					    htonl(ntohl(sack.end) -
2627 					    dst->seqdiff), 0);
2628 					memcpy(&opt[i], &sack, sizeof(sack));
2629 				}
2630 				copyback = off + sizeof(*th) + thoptlen;
2631 			}
2632 			OS_FALLTHROUGH;
2633 		default:
2634 			if (olen < 2) {
2635 				olen = 2;
2636 			}
2637 			hlen -= olen;
2638 			opt += olen;
2639 		}
2640 	}
2641 
2642 	if (copyback) {
2643 		if (pf_lazy_makewritable(pd, pbuf, copyback) == NULL) {
2644 			return -1;
2645 		}
2646 		pbuf_copy_back(pbuf, off + sizeof(*th), thoptlen, opts, sizeof(opts));
2647 	}
2648 	return copyback;
2649 }
2650 
2651 /*
2652  * XXX
2653  *
2654  * The following functions (pf_send_tcp and pf_send_icmp) are somewhat
2655  * special in that they originate "spurious" packets rather than
2656  * filter/NAT existing packets. As such, they're not a great fit for
2657  * the 'pbuf' shim, which assumes the underlying packet buffers are
2658  * allocated elsewhere.
2659  *
2660  * Since these functions are rarely used, we'll carry on allocating mbufs
2661  * and passing them to the IP stack for eventual routing.
2662  */
2663 static __attribute__((noinline)) void
pf_send_tcp(const struct pf_rule * r,sa_family_t af,const struct pf_addr * saddr,const struct pf_addr * daddr,u_int16_t sport,u_int16_t dport,u_int32_t seq,u_int32_t ack,u_int8_t flags,u_int16_t win,u_int16_t mss,u_int8_t ttl,int tag,u_int16_t rtag,struct ether_header * eh,struct ifnet * ifp)2664 pf_send_tcp(const struct pf_rule *r, sa_family_t af,
2665     const struct pf_addr *saddr, const struct pf_addr *daddr,
2666     u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
2667     u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag,
2668     u_int16_t rtag, struct ether_header *eh, struct ifnet *ifp)
2669 {
2670 #pragma unused(eh, ifp)
2671 	struct mbuf     *m;
2672 	int              len, tlen;
2673 #if INET
2674 	struct ip       *h = NULL;
2675 #endif /* INET */
2676 	struct ip6_hdr  *h6 = NULL;
2677 	struct tcphdr   *th = NULL;
2678 	char            *opt;
2679 	struct pf_mtag  *pf_mtag;
2680 
2681 	/* maximum segment size tcp option */
2682 	tlen = sizeof(struct tcphdr);
2683 	if (mss) {
2684 		tlen += 4;
2685 	}
2686 
2687 	switch (af) {
2688 #if INET
2689 	case AF_INET:
2690 		len = sizeof(struct ip) + tlen;
2691 		break;
2692 #endif /* INET */
2693 	case AF_INET6:
2694 		len = sizeof(struct ip6_hdr) + tlen;
2695 		break;
2696 	default:
2697 		panic("pf_send_tcp: not AF_INET or AF_INET6!");
2698 		return;
2699 	}
2700 
2701 	/* create outgoing mbuf */
2702 	m = m_gethdr(M_DONTWAIT, MT_HEADER);
2703 	if (m == NULL) {
2704 		return;
2705 	}
2706 
2707 	if ((pf_mtag = pf_get_mtag(m)) == NULL) {
2708 		return;
2709 	}
2710 
2711 	if (tag) {
2712 		pf_mtag->pftag_flags |= PF_TAG_GENERATED;
2713 	}
2714 	pf_mtag->pftag_tag = rtag;
2715 
2716 	if (r != NULL && PF_RTABLEID_IS_VALID(r->rtableid)) {
2717 		pf_mtag->pftag_rtableid = r->rtableid;
2718 	}
2719 
2720 #if PF_ECN
2721 	/* add hints for ecn */
2722 	pf_mtag->pftag_hdr = mtod(m, struct ip *);
2723 	/* record address family */
2724 	pf_mtag->pftag_flags &= ~(PF_TAG_HDR_INET | PF_TAG_HDR_INET6);
2725 	switch (af) {
2726 #if INET
2727 	case AF_INET:
2728 		pf_mtag->pftag_flags |= PF_TAG_HDR_INET;
2729 		break;
2730 #endif /* INET */
2731 	case AF_INET6:
2732 		pf_mtag->pftag_flags |= PF_TAG_HDR_INET6;
2733 		break;
2734 	}
2735 #endif /* PF_ECN */
2736 
2737 	/* indicate this is TCP */
2738 	m->m_pkthdr.pkt_proto = IPPROTO_TCP;
2739 
2740 	/* Make sure headers are 32-bit aligned */
2741 	m->m_data += max_linkhdr;
2742 	m->m_pkthdr.len = m->m_len = len;
2743 	m->m_pkthdr.rcvif = NULL;
2744 	bzero(m_mtod_current(m), len);
2745 	switch (af) {
2746 #if INET
2747 	case AF_INET:
2748 		h = mtod(m, struct ip *);
2749 
2750 		/* IP header fields included in the TCP checksum */
2751 		h->ip_p = IPPROTO_TCP;
2752 		h->ip_len = htons(tlen);
2753 		h->ip_src.s_addr = saddr->v4addr.s_addr;
2754 		h->ip_dst.s_addr = daddr->v4addr.s_addr;
2755 
2756 		th = (struct tcphdr *)(void *)((caddr_t)h + sizeof(struct ip));
2757 		break;
2758 #endif /* INET */
2759 	case AF_INET6:
2760 		h6 = mtod(m, struct ip6_hdr *);
2761 
2762 		/* IP header fields included in the TCP checksum */
2763 		h6->ip6_nxt = IPPROTO_TCP;
2764 		h6->ip6_plen = htons(tlen);
2765 		memcpy((void *)&h6->ip6_src, &saddr->v6addr, sizeof(struct in6_addr));
2766 		memcpy((void *)&h6->ip6_dst, &daddr->v6addr, sizeof(struct in6_addr));
2767 
2768 		th = (struct tcphdr *)(void *)
2769 		    ((caddr_t)h6 + sizeof(struct ip6_hdr));
2770 		break;
2771 	}
2772 
2773 	/* TCP header */
2774 	th->th_sport = sport;
2775 	th->th_dport = dport;
2776 	th->th_seq = htonl(seq);
2777 	th->th_ack = htonl(ack);
2778 	th->th_off = tlen >> 2;
2779 	th->th_flags = flags;
2780 	th->th_win = htons(win);
2781 
2782 	if (mss) {
2783 		opt = (char *)(th + 1);
2784 		opt[0] = TCPOPT_MAXSEG;
2785 		opt[1] = 4;
2786 #if BYTE_ORDER != BIG_ENDIAN
2787 		HTONS(mss);
2788 #endif
2789 		bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2);
2790 	}
2791 
2792 	switch (af) {
2793 #if INET
2794 	case AF_INET: {
2795 		struct route ro;
2796 
2797 		/* TCP checksum */
2798 		th->th_sum = in_cksum(m, len);
2799 
2800 		/* Finish the IP header */
2801 		h->ip_v = 4;
2802 		h->ip_hl = sizeof(*h) >> 2;
2803 		h->ip_tos = IPTOS_LOWDELAY;
2804 		/*
2805 		 * ip_output() expects ip_len and ip_off to be in host order.
2806 		 */
2807 		h->ip_len = len;
2808 		h->ip_off = (path_mtu_discovery ? IP_DF : 0);
2809 		h->ip_ttl = ttl ? ttl : ip_defttl;
2810 		h->ip_sum = 0;
2811 
2812 		bzero(&ro, sizeof(ro));
2813 		ip_output(m, NULL, &ro, 0, NULL, NULL);
2814 		ROUTE_RELEASE(&ro);
2815 		break;
2816 	}
2817 #endif /* INET */
2818 	case AF_INET6: {
2819 		struct route_in6 ro6;
2820 
2821 		/* TCP checksum */
2822 		th->th_sum = in6_cksum(m, IPPROTO_TCP,
2823 		    sizeof(struct ip6_hdr), tlen);
2824 
2825 		h6->ip6_vfc |= IPV6_VERSION;
2826 		h6->ip6_hlim = IPV6_DEFHLIM;
2827 
2828 		ip6_output_setsrcifscope(m, IFSCOPE_UNKNOWN, NULL);
2829 		ip6_output_setdstifscope(m, IFSCOPE_UNKNOWN, NULL);
2830 		bzero(&ro6, sizeof(ro6));
2831 		ip6_output(m, NULL, &ro6, 0, NULL, NULL, NULL);
2832 		ROUTE_RELEASE(&ro6);
2833 		break;
2834 	}
2835 	}
2836 }
2837 
2838 static __attribute__((noinline)) void
pf_send_icmp(pbuf_t * pbuf,u_int8_t type,u_int8_t code,sa_family_t af,struct pf_rule * r)2839 pf_send_icmp(pbuf_t *pbuf, u_int8_t type, u_int8_t code, sa_family_t af,
2840     struct pf_rule *r)
2841 {
2842 	struct mbuf     *m0;
2843 	struct pf_mtag  *pf_mtag;
2844 
2845 	m0 = pbuf_clone_to_mbuf(pbuf);
2846 	if (m0 == NULL) {
2847 		return;
2848 	}
2849 
2850 	if ((pf_mtag = pf_get_mtag(m0)) == NULL) {
2851 		return;
2852 	}
2853 
2854 	pf_mtag->pftag_flags |= PF_TAG_GENERATED;
2855 
2856 	if (PF_RTABLEID_IS_VALID(r->rtableid)) {
2857 		pf_mtag->pftag_rtableid = r->rtableid;
2858 	}
2859 
2860 #if PF_ECN
2861 	/* add hints for ecn */
2862 	pf_mtag->pftag_hdr = mtod(m0, struct ip *);
2863 	/* record address family */
2864 	pf_mtag->pftag_flags &= ~(PF_TAG_HDR_INET | PF_TAG_HDR_INET6);
2865 	switch (af) {
2866 #if INET
2867 	case AF_INET:
2868 		pf_mtag->pftag_flags |= PF_TAG_HDR_INET;
2869 		m0->m_pkthdr.pkt_proto = IPPROTO_ICMP;
2870 		break;
2871 #endif /* INET */
2872 	case AF_INET6:
2873 		pf_mtag->pftag_flags |= PF_TAG_HDR_INET6;
2874 		m0->m_pkthdr.pkt_proto = IPPROTO_ICMPV6;
2875 		break;
2876 	}
2877 #endif /* PF_ECN */
2878 
2879 	switch (af) {
2880 #if INET
2881 	case AF_INET:
2882 		icmp_error(m0, type, code, 0, 0);
2883 		break;
2884 #endif /* INET */
2885 	case AF_INET6:
2886 		icmp6_error(m0, type, code, 0);
2887 		break;
2888 	}
2889 }
2890 
2891 /*
2892  * Return 1 if the addresses a and b match (with mask m), otherwise return 0.
2893  * If n is 0, they match if they are equal. If n is != 0, they match if they
2894  * are different.
2895  */
2896 int
pf_match_addr(u_int8_t n,struct pf_addr * a,struct pf_addr * m,struct pf_addr * b,sa_family_t af)2897 pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m,
2898     struct pf_addr *b, sa_family_t af)
2899 {
2900 	int     match = 0;
2901 
2902 	switch (af) {
2903 #if INET
2904 	case AF_INET:
2905 		if ((a->addr32[0] & m->addr32[0]) ==
2906 		    (b->addr32[0] & m->addr32[0])) {
2907 			match++;
2908 		}
2909 		break;
2910 #endif /* INET */
2911 	case AF_INET6:
2912 		if (((a->addr32[0] & m->addr32[0]) ==
2913 		    (b->addr32[0] & m->addr32[0])) &&
2914 		    ((a->addr32[1] & m->addr32[1]) ==
2915 		    (b->addr32[1] & m->addr32[1])) &&
2916 		    ((a->addr32[2] & m->addr32[2]) ==
2917 		    (b->addr32[2] & m->addr32[2])) &&
2918 		    ((a->addr32[3] & m->addr32[3]) ==
2919 		    (b->addr32[3] & m->addr32[3]))) {
2920 			match++;
2921 		}
2922 		break;
2923 	}
2924 	if (match) {
2925 		if (n) {
2926 			return 0;
2927 		} else {
2928 			return 1;
2929 		}
2930 	} else {
2931 		if (n) {
2932 			return 1;
2933 		} else {
2934 			return 0;
2935 		}
2936 	}
2937 }
2938 
2939 /*
2940  * Return 1 if b <= a <= e, otherwise return 0.
2941  */
2942 int
pf_match_addr_range(struct pf_addr * b,struct pf_addr * e,struct pf_addr * a,sa_family_t af)2943 pf_match_addr_range(struct pf_addr *b, struct pf_addr *e,
2944     struct pf_addr *a, sa_family_t af)
2945 {
2946 	switch (af) {
2947 #if INET
2948 	case AF_INET:
2949 		if ((a->addr32[0] < b->addr32[0]) ||
2950 		    (a->addr32[0] > e->addr32[0])) {
2951 			return 0;
2952 		}
2953 		break;
2954 #endif /* INET */
2955 	case AF_INET6: {
2956 		int     i;
2957 
2958 		/* check a >= b */
2959 		for (i = 0; i < 4; ++i) {
2960 			if (a->addr32[i] > b->addr32[i]) {
2961 				break;
2962 			} else if (a->addr32[i] < b->addr32[i]) {
2963 				return 0;
2964 			}
2965 		}
2966 		/* check a <= e */
2967 		for (i = 0; i < 4; ++i) {
2968 			if (a->addr32[i] < e->addr32[i]) {
2969 				break;
2970 			} else if (a->addr32[i] > e->addr32[i]) {
2971 				return 0;
2972 			}
2973 		}
2974 		break;
2975 	}
2976 	}
2977 	return 1;
2978 }
2979 
2980 int
pf_match(u_int8_t op,u_int32_t a1,u_int32_t a2,u_int32_t p)2981 pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
2982 {
2983 	switch (op) {
2984 	case PF_OP_IRG:
2985 		return (p > a1) && (p < a2);
2986 	case PF_OP_XRG:
2987 		return (p < a1) || (p > a2);
2988 	case PF_OP_RRG:
2989 		return (p >= a1) && (p <= a2);
2990 	case PF_OP_EQ:
2991 		return p == a1;
2992 	case PF_OP_NE:
2993 		return p != a1;
2994 	case PF_OP_LT:
2995 		return p < a1;
2996 	case PF_OP_LE:
2997 		return p <= a1;
2998 	case PF_OP_GT:
2999 		return p > a1;
3000 	case PF_OP_GE:
3001 		return p >= a1;
3002 	}
3003 	return 0; /* never reached */
3004 }
3005 
3006 int
pf_match_port(u_int8_t op,u_int16_t a1,u_int16_t a2,u_int16_t p)3007 pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
3008 {
3009 #if BYTE_ORDER != BIG_ENDIAN
3010 	NTOHS(a1);
3011 	NTOHS(a2);
3012 	NTOHS(p);
3013 #endif
3014 	return pf_match(op, a1, a2, p);
3015 }
3016 
3017 int
pf_match_xport(u_int8_t proto,u_int8_t proto_variant,union pf_rule_xport * rx,union pf_state_xport * sx)3018 pf_match_xport(u_int8_t proto, u_int8_t proto_variant, union pf_rule_xport *rx,
3019     union pf_state_xport *sx)
3020 {
3021 	int d = !0;
3022 
3023 	if (sx) {
3024 		switch (proto) {
3025 		case IPPROTO_GRE:
3026 			if (proto_variant == PF_GRE_PPTP_VARIANT) {
3027 				d = (rx->call_id == sx->call_id);
3028 			}
3029 			break;
3030 
3031 		case IPPROTO_ESP:
3032 			d = (rx->spi == sx->spi);
3033 			break;
3034 
3035 		case IPPROTO_TCP:
3036 		case IPPROTO_UDP:
3037 		case IPPROTO_ICMP:
3038 		case IPPROTO_ICMPV6:
3039 			if (rx->range.op) {
3040 				d = pf_match_port(rx->range.op,
3041 				    rx->range.port[0], rx->range.port[1],
3042 				    sx->port);
3043 			}
3044 			break;
3045 
3046 		default:
3047 			break;
3048 		}
3049 	}
3050 
3051 	return d;
3052 }
3053 
3054 int
pf_match_uid(u_int8_t op,uid_t a1,uid_t a2,uid_t u)3055 pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
3056 {
3057 	if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE) {
3058 		return 0;
3059 	}
3060 	return pf_match(op, a1, a2, u);
3061 }
3062 
3063 int
pf_match_gid(u_int8_t op,gid_t a1,gid_t a2,gid_t g)3064 pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
3065 {
3066 	if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE) {
3067 		return 0;
3068 	}
3069 	return pf_match(op, a1, a2, g);
3070 }
3071 
3072 static int
pf_match_tag(struct pf_rule * r,struct pf_mtag * pf_mtag,int * tag)3073 pf_match_tag(struct pf_rule *r, struct pf_mtag *pf_mtag,
3074     int *tag)
3075 {
3076 	if (*tag == -1) {
3077 		*tag = pf_mtag->pftag_tag;
3078 	}
3079 
3080 	return (!r->match_tag_not && r->match_tag == *tag) ||
3081 	       (r->match_tag_not && r->match_tag != *tag);
3082 }
3083 
3084 int
pf_tag_packet(pbuf_t * pbuf,struct pf_mtag * pf_mtag,int tag,unsigned int rtableid,struct pf_pdesc * pd)3085 pf_tag_packet(pbuf_t *pbuf, struct pf_mtag *pf_mtag, int tag,
3086     unsigned int rtableid, struct pf_pdesc *pd)
3087 {
3088 	if (tag <= 0 && !PF_RTABLEID_IS_VALID(rtableid) &&
3089 	    (pd == NULL || !(pd->pktflags & PKTF_FLOW_ID))) {
3090 		return 0;
3091 	}
3092 
3093 	if (pf_mtag == NULL && (pf_mtag = pf_get_mtag_pbuf(pbuf)) == NULL) {
3094 		return 1;
3095 	}
3096 
3097 	if (tag > 0) {
3098 		pf_mtag->pftag_tag = tag;
3099 	}
3100 	if (PF_RTABLEID_IS_VALID(rtableid)) {
3101 		pf_mtag->pftag_rtableid = rtableid;
3102 	}
3103 	if (pd != NULL && (pd->pktflags & PKTF_FLOW_ID)) {
3104 		*pbuf->pb_flowsrc = pd->flowsrc;
3105 		*pbuf->pb_flowid = pd->flowhash;
3106 		*pbuf->pb_flags |= pd->pktflags;
3107 		*pbuf->pb_proto = pd->proto;
3108 	}
3109 
3110 	return 0;
3111 }
3112 
3113 void
pf_step_into_anchor(int * depth,struct pf_ruleset ** rs,int n,struct pf_rule ** r,struct pf_rule ** a,int * match)3114 pf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n,
3115     struct pf_rule **r, struct pf_rule **a, int *match)
3116 {
3117 	struct pf_anchor_stackframe     *f;
3118 
3119 	(*r)->anchor->match = 0;
3120 	if (match) {
3121 		*match = 0;
3122 	}
3123 	if (*depth >= (int)sizeof(pf_anchor_stack) /
3124 	    (int)sizeof(pf_anchor_stack[0])) {
3125 		printf("pf_step_into_anchor: stack overflow\n");
3126 		*r = TAILQ_NEXT(*r, entries);
3127 		return;
3128 	} else if (*depth == 0 && a != NULL) {
3129 		*a = *r;
3130 	}
3131 	f = pf_anchor_stack + (*depth)++;
3132 	f->rs = *rs;
3133 	f->r = *r;
3134 	if ((*r)->anchor_wildcard) {
3135 		f->parent = &(*r)->anchor->children;
3136 		if ((f->child = RB_MIN(pf_anchor_node, f->parent)) ==
3137 		    NULL) {
3138 			*r = NULL;
3139 			return;
3140 		}
3141 		*rs = &f->child->ruleset;
3142 	} else {
3143 		f->parent = NULL;
3144 		f->child = NULL;
3145 		*rs = &(*r)->anchor->ruleset;
3146 	}
3147 	*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
3148 }
3149 
3150 int
pf_step_out_of_anchor(int * depth,struct pf_ruleset ** rs,int n,struct pf_rule ** r,struct pf_rule ** a,int * match)3151 pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n,
3152     struct pf_rule **r, struct pf_rule **a, int *match)
3153 {
3154 	struct pf_anchor_stackframe     *f;
3155 	int quick = 0;
3156 
3157 	do {
3158 		if (*depth <= 0) {
3159 			break;
3160 		}
3161 		f = pf_anchor_stack + *depth - 1;
3162 		if (f->parent != NULL && f->child != NULL) {
3163 			if (f->child->match ||
3164 			    (match != NULL && *match)) {
3165 				f->r->anchor->match = 1;
3166 				if (match) {
3167 					*match = 0;
3168 				}
3169 			}
3170 			f->child = RB_NEXT(pf_anchor_node, f->parent, f->child);
3171 			if (f->child != NULL) {
3172 				*rs = &f->child->ruleset;
3173 				*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
3174 				if (*r == NULL) {
3175 					continue;
3176 				} else {
3177 					break;
3178 				}
3179 			}
3180 		}
3181 		(*depth)--;
3182 		if (*depth == 0 && a != NULL) {
3183 			*a = NULL;
3184 		}
3185 		*rs = f->rs;
3186 		if (f->r->anchor->match || (match != NULL && *match)) {
3187 			quick = f->r->quick;
3188 		}
3189 		*r = TAILQ_NEXT(f->r, entries);
3190 	} while (*r == NULL);
3191 
3192 	return quick;
3193 }
3194 
3195 void
pf_poolmask(struct pf_addr * naddr,struct pf_addr * raddr,struct pf_addr * rmask,struct pf_addr * saddr,sa_family_t af)3196 pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
3197     struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
3198 {
3199 	switch (af) {
3200 #if INET
3201 	case AF_INET:
3202 		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
3203 		    ((rmask->addr32[0] ^ 0xffffffff) & saddr->addr32[0]);
3204 		break;
3205 #endif /* INET */
3206 	case AF_INET6:
3207 		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
3208 		    ((rmask->addr32[0] ^ 0xffffffff) & saddr->addr32[0]);
3209 		naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
3210 		    ((rmask->addr32[1] ^ 0xffffffff) & saddr->addr32[1]);
3211 		naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
3212 		    ((rmask->addr32[2] ^ 0xffffffff) & saddr->addr32[2]);
3213 		naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
3214 		    ((rmask->addr32[3] ^ 0xffffffff) & saddr->addr32[3]);
3215 		break;
3216 	}
3217 }
3218 
3219 void
pf_addr_inc(struct pf_addr * addr,sa_family_t af)3220 pf_addr_inc(struct pf_addr *addr, sa_family_t af)
3221 {
3222 	switch (af) {
3223 #if INET
3224 	case AF_INET:
3225 		addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
3226 		break;
3227 #endif /* INET */
3228 	case AF_INET6:
3229 		if (addr->addr32[3] == 0xffffffff) {
3230 			addr->addr32[3] = 0;
3231 			if (addr->addr32[2] == 0xffffffff) {
3232 				addr->addr32[2] = 0;
3233 				if (addr->addr32[1] == 0xffffffff) {
3234 					addr->addr32[1] = 0;
3235 					addr->addr32[0] =
3236 					    htonl(ntohl(addr->addr32[0]) + 1);
3237 				} else {
3238 					addr->addr32[1] =
3239 					    htonl(ntohl(addr->addr32[1]) + 1);
3240 				}
3241 			} else {
3242 				addr->addr32[2] =
3243 				    htonl(ntohl(addr->addr32[2]) + 1);
3244 			}
3245 		} else {
3246 			addr->addr32[3] =
3247 			    htonl(ntohl(addr->addr32[3]) + 1);
3248 		}
3249 		break;
3250 	}
3251 }
3252 
3253 #define mix(a, b, c) \
3254 	do {                                    \
3255 	        a -= b; a -= c; a ^= (c >> 13); \
3256 	        b -= c; b -= a; b ^= (a << 8);  \
3257 	        c -= a; c -= b; c ^= (b >> 13); \
3258 	        a -= b; a -= c; a ^= (c >> 12); \
3259 	        b -= c; b -= a; b ^= (a << 16); \
3260 	        c -= a; c -= b; c ^= (b >> 5);  \
3261 	        a -= b; a -= c; a ^= (c >> 3);  \
3262 	        b -= c; b -= a; b ^= (a << 10); \
3263 	        c -= a; c -= b; c ^= (b >> 15); \
3264 	} while (0)
3265 
3266 /*
3267  * hash function based on bridge_hash in if_bridge.c
3268  */
3269 static void
pf_hash(struct pf_addr * inaddr,struct pf_addr * hash,struct pf_poolhashkey * key,sa_family_t af)3270 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
3271     struct pf_poolhashkey *key, sa_family_t af)
3272 {
3273 	u_int32_t       a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
3274 
3275 	switch (af) {
3276 #if INET
3277 	case AF_INET:
3278 		a += inaddr->addr32[0];
3279 		b += key->key32[1];
3280 		mix(a, b, c);
3281 		hash->addr32[0] = c + key->key32[2];
3282 		break;
3283 #endif /* INET */
3284 	case AF_INET6:
3285 		a += inaddr->addr32[0];
3286 		b += inaddr->addr32[2];
3287 		mix(a, b, c);
3288 		hash->addr32[0] = c;
3289 		a += inaddr->addr32[1];
3290 		b += inaddr->addr32[3];
3291 		c += key->key32[1];
3292 		mix(a, b, c);
3293 		hash->addr32[1] = c;
3294 		a += inaddr->addr32[2];
3295 		b += inaddr->addr32[1];
3296 		c += key->key32[2];
3297 		mix(a, b, c);
3298 		hash->addr32[2] = c;
3299 		a += inaddr->addr32[3];
3300 		b += inaddr->addr32[0];
3301 		c += key->key32[3];
3302 		mix(a, b, c);
3303 		hash->addr32[3] = c;
3304 		break;
3305 	}
3306 }
3307 
3308 static __attribute__((noinline)) int
pf_map_addr(sa_family_t af,struct pf_rule * r,struct pf_addr * saddr,struct pf_addr * naddr,struct pf_addr * init_addr,struct pf_src_node ** sn)3309 pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
3310     struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn)
3311 {
3312 	unsigned char            hash[16];
3313 	struct pf_pool          *__single rpool = &r->rpool;
3314 	struct pf_addr          *__single raddr = &rpool->cur->addr.v.a.addr;
3315 	struct pf_addr          *__single rmask = &rpool->cur->addr.v.a.mask;
3316 	struct pf_pooladdr      *__single acur = rpool->cur;
3317 	struct pf_src_node       k;
3318 
3319 	if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
3320 	    (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
3321 		k.af = af;
3322 		PF_ACPY(&k.addr, saddr, af);
3323 		if (r->rule_flag & PFRULE_RULESRCTRACK ||
3324 		    r->rpool.opts & PF_POOL_STICKYADDR) {
3325 			k.rule.ptr = r;
3326 		} else {
3327 			k.rule.ptr = NULL;
3328 		}
3329 		pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
3330 		*sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
3331 		if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, rpool->af)) {
3332 			PF_ACPY(naddr, &(*sn)->raddr, rpool->af);
3333 			if (pf_status.debug >= PF_DEBUG_MISC) {
3334 				printf("pf_map_addr: src tracking maps ");
3335 				pf_print_host(&k.addr, 0, af);
3336 				printf(" to ");
3337 				pf_print_host(naddr, 0, rpool->af);
3338 				printf("\n");
3339 			}
3340 			return 0;
3341 		}
3342 	}
3343 
3344 	if (rpool->cur->addr.type == PF_ADDR_NOROUTE) {
3345 		return 1;
3346 	}
3347 	if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
3348 		if (rpool->cur->addr.p.dyn == NULL) {
3349 			return 1;
3350 		}
3351 		switch (rpool->af) {
3352 #if INET
3353 		case AF_INET:
3354 			if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
3355 			    (rpool->opts & PF_POOL_TYPEMASK) !=
3356 			    PF_POOL_ROUNDROBIN) {
3357 				return 1;
3358 			}
3359 			raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
3360 			rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
3361 			break;
3362 #endif /* INET */
3363 		case AF_INET6:
3364 			if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
3365 			    (rpool->opts & PF_POOL_TYPEMASK) !=
3366 			    PF_POOL_ROUNDROBIN) {
3367 				return 1;
3368 			}
3369 			raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
3370 			rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
3371 			break;
3372 		}
3373 	} else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
3374 		if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) {
3375 			return 1; /* unsupported */
3376 		}
3377 	} else {
3378 		raddr = &rpool->cur->addr.v.a.addr;
3379 		rmask = &rpool->cur->addr.v.a.mask;
3380 	}
3381 
3382 	switch (rpool->opts & PF_POOL_TYPEMASK) {
3383 	case PF_POOL_NONE:
3384 		PF_ACPY(naddr, raddr, rpool->af);
3385 		break;
3386 	case PF_POOL_BITMASK:
3387 		ASSERT(af == rpool->af);
3388 		PF_POOLMASK(naddr, raddr, rmask, saddr, af);
3389 		break;
3390 	case PF_POOL_RANDOM:
3391 		if (init_addr != NULL && PF_AZERO(init_addr, rpool->af)) {
3392 			switch (af) {
3393 #if INET
3394 			case AF_INET:
3395 				rpool->counter.addr32[0] = htonl(random());
3396 				break;
3397 #endif /* INET */
3398 			case AF_INET6:
3399 				if (rmask->addr32[3] != 0xffffffff) {
3400 					rpool->counter.addr32[3] =
3401 					    RandomULong();
3402 				} else {
3403 					break;
3404 				}
3405 				if (rmask->addr32[2] != 0xffffffff) {
3406 					rpool->counter.addr32[2] =
3407 					    RandomULong();
3408 				} else {
3409 					break;
3410 				}
3411 				if (rmask->addr32[1] != 0xffffffff) {
3412 					rpool->counter.addr32[1] =
3413 					    RandomULong();
3414 				} else {
3415 					break;
3416 				}
3417 				if (rmask->addr32[0] != 0xffffffff) {
3418 					rpool->counter.addr32[0] =
3419 					    RandomULong();
3420 				}
3421 				break;
3422 			}
3423 			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter,
3424 			    rpool->af);
3425 			PF_ACPY(init_addr, naddr, rpool->af);
3426 		} else {
3427 			PF_AINC(&rpool->counter, rpool->af);
3428 			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter,
3429 			    rpool->af);
3430 		}
3431 		break;
3432 	case PF_POOL_SRCHASH:
3433 		ASSERT(af == rpool->af);
3434 		PF_POOLMASK(naddr, raddr, rmask, saddr, af);
3435 		pf_hash(saddr, (struct pf_addr *)(void *)&hash,
3436 		    &rpool->key, af);
3437 		PF_POOLMASK(naddr, raddr, rmask,
3438 		    (struct pf_addr *)(void *)&hash, af);
3439 		break;
3440 	case PF_POOL_ROUNDROBIN:
3441 		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
3442 			if (!pfr_pool_get(rpool->cur->addr.p.tbl,
3443 			    &rpool->tblidx, &rpool->counter,
3444 			    &raddr, &rmask, rpool->af)) {
3445 				goto get_addr;
3446 			}
3447 		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
3448 			if (rpool->cur->addr.p.dyn != NULL &&
3449 			    !pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
3450 			    &rpool->tblidx, &rpool->counter,
3451 			    &raddr, &rmask, af)) {
3452 				goto get_addr;
3453 			}
3454 		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter,
3455 		    rpool->af)) {
3456 			goto get_addr;
3457 		}
3458 
3459 try_next:
3460 		if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL) {
3461 			rpool->cur = TAILQ_FIRST(&rpool->list);
3462 		}
3463 		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
3464 			rpool->tblidx = -1;
3465 			if (pfr_pool_get(rpool->cur->addr.p.tbl,
3466 			    &rpool->tblidx, &rpool->counter,
3467 			    &raddr, &rmask, rpool->af)) {
3468 				/* table contains no address of type
3469 				 * 'rpool->af' */
3470 				if (rpool->cur != acur) {
3471 					goto try_next;
3472 				}
3473 				return 1;
3474 			}
3475 		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
3476 			rpool->tblidx = -1;
3477 			if (rpool->cur->addr.p.dyn == NULL) {
3478 				return 1;
3479 			}
3480 			if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
3481 			    &rpool->tblidx, &rpool->counter,
3482 			    &raddr, &rmask, rpool->af)) {
3483 				/* table contains no address of type
3484 				 * 'rpool->af' */
3485 				if (rpool->cur != acur) {
3486 					goto try_next;
3487 				}
3488 				return 1;
3489 			}
3490 		} else {
3491 			raddr = &rpool->cur->addr.v.a.addr;
3492 			rmask = &rpool->cur->addr.v.a.mask;
3493 			PF_ACPY(&rpool->counter, raddr, rpool->af);
3494 		}
3495 
3496 get_addr:
3497 		PF_ACPY(naddr, &rpool->counter, rpool->af);
3498 		if (init_addr != NULL && PF_AZERO(init_addr, rpool->af)) {
3499 			PF_ACPY(init_addr, naddr, rpool->af);
3500 		}
3501 		PF_AINC(&rpool->counter, rpool->af);
3502 		break;
3503 	}
3504 	if (*sn != NULL) {
3505 		PF_ACPY(&(*sn)->raddr, naddr, rpool->af);
3506 	}
3507 
3508 	if (pf_status.debug >= PF_DEBUG_MISC &&
3509 	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
3510 		printf("pf_map_addr: selected address ");
3511 		pf_print_host(naddr, 0, rpool->af);
3512 		printf("\n");
3513 	}
3514 
3515 	return 0;
3516 }
3517 
3518 static __attribute__((noinline)) int
pf_get_sport(struct pf_pdesc * pd,struct pfi_kif * kif,struct pf_rule * r,struct pf_addr * saddr,union pf_state_xport * sxport,struct pf_addr * daddr,union pf_state_xport * dxport,struct pf_addr * naddr,union pf_state_xport * nxport,struct pf_src_node ** sn,netns_token * pnstoken)3519 pf_get_sport(struct pf_pdesc *pd, struct pfi_kif *kif, struct pf_rule *r,
3520     struct pf_addr *saddr, union pf_state_xport *sxport, struct pf_addr *daddr,
3521     union pf_state_xport *dxport, struct pf_addr *naddr,
3522     union pf_state_xport *nxport, struct pf_src_node **sn
3523 #if SKYWALK
3524     , netns_token *pnstoken
3525 #endif
3526     )
3527 {
3528 #pragma unused(kif)
3529 	struct pf_state_key_cmp key;
3530 	struct pf_addr          init_addr;
3531 	unsigned int cut;
3532 	sa_family_t af = pd->af;
3533 	u_int8_t proto = pd->proto;
3534 	unsigned int low = r->rpool.proxy_port[0];
3535 	unsigned int high = r->rpool.proxy_port[1];
3536 
3537 	bzero(&init_addr, sizeof(init_addr));
3538 	if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn)) {
3539 		return 1;
3540 	}
3541 
3542 	if (proto == IPPROTO_ICMP) {
3543 		low = 1;
3544 		high = 65535;
3545 	}
3546 
3547 	if (!nxport) {
3548 		return 0; /* No output necessary. */
3549 	}
3550 	/*--- Special mapping rules for UDP ---*/
3551 	if (proto == IPPROTO_UDP) {
3552 		/*--- Never float IKE source port ---*/
3553 		if (ntohs(sxport->port) == PF_IKE_PORT) {
3554 			nxport->port = sxport->port;
3555 			return 0;
3556 		}
3557 
3558 		/*--- Apply exterior mapping options ---*/
3559 		if (r->extmap > PF_EXTMAP_APD) {
3560 			struct pf_state *s;
3561 
3562 			TAILQ_FOREACH(s, &state_list, entry_list) {
3563 				struct pf_state_key *sk = s->state_key;
3564 				if (!sk) {
3565 					continue;
3566 				}
3567 				if (s->nat_rule.ptr != r) {
3568 					continue;
3569 				}
3570 				if (sk->proto != IPPROTO_UDP ||
3571 				    sk->af_lan != af) {
3572 					continue;
3573 				}
3574 				if (sk->lan.xport.port != sxport->port) {
3575 					continue;
3576 				}
3577 				if (PF_ANEQ(&sk->lan.addr, saddr, af)) {
3578 					continue;
3579 				}
3580 				if (r->extmap < PF_EXTMAP_EI &&
3581 				    PF_ANEQ(&sk->ext_lan.addr, daddr, af)) {
3582 					continue;
3583 				}
3584 
3585 #if SKYWALK
3586 				if (netns_reserve(pnstoken, naddr->addr32,
3587 				    NETNS_AF_SIZE(af), proto, sxport->port,
3588 				    NETNS_PF, NULL) != 0) {
3589 					return 1;
3590 				}
3591 #endif
3592 				nxport->port = sk->gwy.xport.port;
3593 				return 0;
3594 			}
3595 		}
3596 	} else if (proto == IPPROTO_TCP) {
3597 		struct pf_state* s;
3598 		/*
3599 		 * APPLE MODIFICATION: <rdar://problem/6546358>
3600 		 * Fix allows....NAT to use a single binding for TCP session
3601 		 * with same source IP and source port
3602 		 */
3603 		TAILQ_FOREACH(s, &state_list, entry_list) {
3604 			struct pf_state_key* sk = s->state_key;
3605 			if (!sk) {
3606 				continue;
3607 			}
3608 			if (s->nat_rule.ptr != r) {
3609 				continue;
3610 			}
3611 			if (sk->proto != IPPROTO_TCP || sk->af_lan != af) {
3612 				continue;
3613 			}
3614 			if (sk->lan.xport.port != sxport->port) {
3615 				continue;
3616 			}
3617 			if (!(PF_AEQ(&sk->lan.addr, saddr, af))) {
3618 				continue;
3619 			}
3620 #if SKYWALK
3621 			if (netns_reserve(pnstoken, naddr->addr32,
3622 			    NETNS_AF_SIZE(af), proto, sxport->port,
3623 			    NETNS_PF, NULL) != 0) {
3624 				return 1;
3625 			}
3626 #endif
3627 			nxport->port = sk->gwy.xport.port;
3628 			return 0;
3629 		}
3630 	}
3631 	do {
3632 		key.af_gwy = af;
3633 		key.proto = proto;
3634 		PF_ACPY(&key.ext_gwy.addr, daddr, key.af_gwy);
3635 		PF_ACPY(&key.gwy.addr, naddr, key.af_gwy);
3636 		switch (proto) {
3637 		case IPPROTO_UDP:
3638 			key.proto_variant = r->extfilter;
3639 			break;
3640 		default:
3641 			key.proto_variant = 0;
3642 			break;
3643 		}
3644 		if (dxport) {
3645 			key.ext_gwy.xport = *dxport;
3646 		} else {
3647 			memset(&key.ext_gwy.xport, 0,
3648 			    sizeof(key.ext_gwy.xport));
3649 		}
3650 		/*
3651 		 * port search; start random, step;
3652 		 * similar 2 portloop in in_pcbbind
3653 		 */
3654 		if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP ||
3655 		    proto == IPPROTO_ICMP)) {
3656 			if (dxport) {
3657 				key.gwy.xport = *dxport;
3658 			} else {
3659 				memset(&key.gwy.xport, 0,
3660 				    sizeof(key.gwy.xport));
3661 			}
3662 #if SKYWALK
3663 			/* Nothing to do: netns handles TCP/UDP only */
3664 #endif
3665 			if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
3666 				return 0;
3667 			}
3668 		} else if (low == 0 && high == 0) {
3669 			key.gwy.xport = *nxport;
3670 			if (pf_find_state_all(&key, PF_IN, NULL) == NULL
3671 #if SKYWALK
3672 			    && ((proto != IPPROTO_TCP && proto != IPPROTO_UDP)
3673 			    || netns_reserve(pnstoken, naddr->addr32,
3674 			    NETNS_AF_SIZE(af), proto, nxport->port,
3675 			    NETNS_PF, NULL) == 0)
3676 #endif
3677 			    ) {
3678 				return 0;
3679 			}
3680 		} else if (low == high) {
3681 			key.gwy.xport.port = htons(low);
3682 			if (pf_find_state_all(&key, PF_IN, NULL) == NULL
3683 #if SKYWALK
3684 			    && ((proto != IPPROTO_TCP && proto != IPPROTO_UDP)
3685 			    || netns_reserve(pnstoken, naddr->addr32,
3686 			    NETNS_AF_SIZE(af), proto, htons(low),
3687 			    NETNS_PF, NULL) == 0)
3688 #endif
3689 			    ) {
3690 				nxport->port = htons(low);
3691 				return 0;
3692 			}
3693 		} else {
3694 			unsigned int tmp;
3695 			if (low > high) {
3696 				tmp = low;
3697 				low = high;
3698 				high = tmp;
3699 			}
3700 			/* low < high */
3701 			cut = htonl(random()) % (1 + high - low) + low;
3702 			/* low <= cut <= high */
3703 			for (tmp = cut; tmp <= high; ++(tmp)) {
3704 				key.gwy.xport.port = htons(tmp);
3705 				if (pf_find_state_all(&key, PF_IN, NULL) == NULL
3706 #if SKYWALK
3707 				    && ((proto != IPPROTO_TCP && proto != IPPROTO_UDP)
3708 				    || netns_reserve(pnstoken, naddr->addr32,
3709 				    NETNS_AF_SIZE(af), proto, htons(tmp),
3710 				    NETNS_PF, NULL) == 0)
3711 #endif
3712 				    ) {
3713 					nxport->port = htons(tmp);
3714 					return 0;
3715 				}
3716 			}
3717 			for (tmp = cut - 1; tmp >= low; --(tmp)) {
3718 				key.gwy.xport.port = htons(tmp);
3719 				if (pf_find_state_all(&key, PF_IN, NULL) == NULL
3720 #if SKYWALK
3721 				    && ((proto != IPPROTO_TCP && proto != IPPROTO_UDP)
3722 				    || netns_reserve(pnstoken, naddr->addr32,
3723 				    NETNS_AF_SIZE(af), proto, htons(tmp),
3724 				    NETNS_PF, NULL) == 0)
3725 #endif
3726 				    ) {
3727 					nxport->port = htons(tmp);
3728 					return 0;
3729 				}
3730 			}
3731 		}
3732 
3733 		switch (r->rpool.opts & PF_POOL_TYPEMASK) {
3734 		case PF_POOL_RANDOM:
3735 		case PF_POOL_ROUNDROBIN:
3736 			if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn)) {
3737 				return 1;
3738 			}
3739 			break;
3740 		case PF_POOL_NONE:
3741 		case PF_POOL_SRCHASH:
3742 		case PF_POOL_BITMASK:
3743 		default:
3744 			return 1;
3745 		}
3746 	} while (!PF_AEQ(&init_addr, naddr, af));
3747 
3748 	return 1;                                     /* none available */
3749 }
3750 
3751 static __attribute__((noinline)) struct pf_rule *
pf_match_translation(struct pf_pdesc * pd,pbuf_t * pbuf,int off,int direction,struct pfi_kif * kif,struct pf_addr * saddr,union pf_state_xport * sxport,struct pf_addr * daddr,union pf_state_xport * dxport,int rs_num)3752 pf_match_translation(struct pf_pdesc *pd, pbuf_t *pbuf, int off,
3753     int direction, struct pfi_kif *kif, struct pf_addr *saddr,
3754     union pf_state_xport *sxport, struct pf_addr *daddr,
3755     union pf_state_xport *dxport, int rs_num)
3756 {
3757 	struct pf_rule          *__single r, *__single rm = NULL;
3758 	struct pf_ruleset       *__single ruleset = NULL;
3759 	int                      tag = -1;
3760 	unsigned int             rtableid = IFSCOPE_NONE;
3761 	int                      asd = 0;
3762 
3763 	r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
3764 	while (r && rm == NULL) {
3765 		struct pf_rule_addr     *src = NULL, *dst = NULL;
3766 		struct pf_addr_wrap     *xdst = NULL;
3767 		struct pf_addr_wrap     *xsrc = NULL;
3768 		union pf_rule_xport     rdrxport;
3769 
3770 		if (r->action == PF_BINAT && direction == PF_IN) {
3771 			src = &r->dst;
3772 			if (r->rpool.cur != NULL) {
3773 				xdst = &r->rpool.cur->addr;
3774 			}
3775 		} else if (r->action == PF_RDR && direction == PF_OUT) {
3776 			dst = &r->src;
3777 			src = &r->dst;
3778 			if (r->rpool.cur != NULL) {
3779 				rdrxport.range.op = PF_OP_EQ;
3780 				rdrxport.range.port[0] =
3781 				    htons(r->rpool.proxy_port[0]);
3782 				xsrc = &r->rpool.cur->addr;
3783 			}
3784 		} else {
3785 			src = &r->src;
3786 			dst = &r->dst;
3787 		}
3788 
3789 		r->evaluations++;
3790 		if (pfi_kif_match(r->kif, kif) == r->ifnot) {
3791 			r = r->skip[PF_SKIP_IFP].ptr;
3792 		} else if (r->direction && r->direction != direction) {
3793 			r = r->skip[PF_SKIP_DIR].ptr;
3794 		} else if (r->af && r->af != pd->af) {
3795 			r = r->skip[PF_SKIP_AF].ptr;
3796 		} else if (r->proto && r->proto != pd->proto) {
3797 			r = r->skip[PF_SKIP_PROTO].ptr;
3798 		} else if (xsrc && PF_MISMATCHAW(xsrc, saddr, pd->af, 0, NULL)) {
3799 			r = TAILQ_NEXT(r, entries);
3800 		} else if (!xsrc && PF_MISMATCHAW(&src->addr, saddr, pd->af,
3801 		    src->neg, kif)) {
3802 			r = TAILQ_NEXT(r, entries);
3803 		} else if (xsrc && (!rdrxport.range.port[0] ||
3804 		    !pf_match_xport(r->proto, r->proto_variant, &rdrxport,
3805 		    sxport))) {
3806 			r = TAILQ_NEXT(r, entries);
3807 		} else if (!xsrc && !pf_match_xport(r->proto,
3808 		    r->proto_variant, &src->xport, sxport)) {
3809 			r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
3810 			    PF_SKIP_DST_PORT].ptr;
3811 		} else if (dst != NULL &&
3812 		    PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL)) {
3813 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
3814 		} else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af,
3815 		    0, NULL)) {
3816 			r = TAILQ_NEXT(r, entries);
3817 		} else if (dst && !pf_match_xport(r->proto, r->proto_variant,
3818 		    &dst->xport, dxport)) {
3819 			r = r->skip[PF_SKIP_DST_PORT].ptr;
3820 		} else if (r->match_tag && !pf_match_tag(r, pd->pf_mtag, &tag)) {
3821 			r = TAILQ_NEXT(r, entries);
3822 		} else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
3823 		    IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, pbuf,
3824 		    off, pf_pd_get_hdr_tcp(pd)), r->os_fingerprint))) {
3825 			r = TAILQ_NEXT(r, entries);
3826 		} else {
3827 			if (r->tag) {
3828 				tag = r->tag;
3829 			}
3830 			if (PF_RTABLEID_IS_VALID(r->rtableid)) {
3831 				rtableid = r->rtableid;
3832 			}
3833 			if (r->anchor == NULL) {
3834 				rm = r;
3835 			} else {
3836 				pf_step_into_anchor(&asd, &ruleset, rs_num,
3837 				    &r, NULL, NULL);
3838 			}
3839 		}
3840 		if (r == NULL) {
3841 			pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r,
3842 			    NULL, NULL);
3843 		}
3844 	}
3845 	if (pf_tag_packet(pbuf, pd->pf_mtag, tag, rtableid, NULL)) {
3846 		return NULL;
3847 	}
3848 	if (rm != NULL && (rm->action == PF_NONAT ||
3849 	    rm->action == PF_NORDR || rm->action == PF_NOBINAT ||
3850 	    rm->action == PF_NONAT64)) {
3851 		return NULL;
3852 	}
3853 	return rm;
3854 }
3855 
3856 /*
3857  * Get address translation information for NAT/BINAT/RDR
3858  * pd		: pf packet descriptor
3859  * pbuf		: pbuf holding the packet
3860  * off		: offset to protocol header
3861  * direction	: direction of packet
3862  * kif		: pf interface info obtained from the packet's recv interface
3863  * sn		: source node pointer (output)
3864  * saddr	: packet source address
3865  * sxport	: packet source port
3866  * daddr	: packet destination address
3867  * dxport	: packet destination port
3868  * nsxport	: translated source port (output)
3869  *
3870  * Translated source & destination address are updated in pd->nsaddr &
3871  * pd->ndaddr
3872  */
3873 static __attribute__((noinline)) struct pf_rule *
pf_get_translation_aux(struct pf_pdesc * pd,pbuf_t * pbuf,int off,int direction,struct pfi_kif * kif,struct pf_src_node ** sn,struct pf_addr * saddr,union pf_state_xport * sxport,struct pf_addr * daddr,union pf_state_xport * dxport,union pf_state_xport * nsxport,netns_token * pnstoken)3874 pf_get_translation_aux(struct pf_pdesc *pd, pbuf_t *pbuf, int off,
3875     int direction, struct pfi_kif *kif, struct pf_src_node **sn,
3876     struct pf_addr *saddr, union pf_state_xport *sxport, struct pf_addr *daddr,
3877     union pf_state_xport *dxport, union pf_state_xport *nsxport
3878 #if SKYWALK
3879     , netns_token *pnstoken
3880 #endif
3881     )
3882 {
3883 	struct pf_rule  *r = NULL;
3884 	pd->naf = pd->af;
3885 
3886 	if (direction == PF_OUT) {
3887 		r = pf_match_translation(pd, pbuf, off, direction, kif, saddr,
3888 		    sxport, daddr, dxport, PF_RULESET_BINAT);
3889 		if (r == NULL) {
3890 			r = pf_match_translation(pd, pbuf, off, direction, kif,
3891 			    saddr, sxport, daddr, dxport, PF_RULESET_RDR);
3892 		}
3893 		if (r == NULL) {
3894 			r = pf_match_translation(pd, pbuf, off, direction, kif,
3895 			    saddr, sxport, daddr, dxport, PF_RULESET_NAT);
3896 		}
3897 	} else {
3898 		r = pf_match_translation(pd, pbuf, off, direction, kif, saddr,
3899 		    sxport, daddr, dxport, PF_RULESET_RDR);
3900 		if (r == NULL) {
3901 			r = pf_match_translation(pd, pbuf, off, direction, kif,
3902 			    saddr, sxport, daddr, dxport, PF_RULESET_BINAT);
3903 		}
3904 	}
3905 
3906 	if (r != NULL) {
3907 		struct pf_addr *nsaddr = &pd->naddr;
3908 		struct pf_addr *ndaddr = &pd->ndaddr;
3909 
3910 		PF_ACPY(nsaddr, saddr, pd->af);
3911 		PF_ACPY(ndaddr, daddr, pd->af);
3912 
3913 		switch (r->action) {
3914 		case PF_NONAT:
3915 		case PF_NONAT64:
3916 		case PF_NOBINAT:
3917 		case PF_NORDR:
3918 			return NULL;
3919 		case PF_NAT:
3920 		case PF_NAT64:
3921 			/*
3922 			 * we do NAT64 on incoming path and we call ip_input
3923 			 * which asserts receive interface to be not NULL.
3924 			 * The below check is to prevent NAT64 action on any
3925 			 * packet generated by local entity using synthesized
3926 			 * IPv6 address.
3927 			 */
3928 			if ((r->action == PF_NAT64) && (direction == PF_OUT)) {
3929 				return NULL;
3930 			}
3931 
3932 			if (pf_get_sport(pd, kif, r, saddr, sxport, daddr,
3933 			    dxport, nsaddr, nsxport, sn
3934 #if SKYWALK
3935 			    , pnstoken
3936 #endif
3937 			    )) {
3938 				DPFPRINTF(PF_DEBUG_MISC,
3939 				    ("pf: NAT proxy port allocation "
3940 				    "(%u-%u) failed\n",
3941 				    r->rpool.proxy_port[0],
3942 				    r->rpool.proxy_port[1]));
3943 				return NULL;
3944 			}
3945 			/*
3946 			 * For NAT64 the destination IPv4 address is derived
3947 			 * from the last 32 bits of synthesized IPv6 address
3948 			 */
3949 			if (r->action == PF_NAT64) {
3950 				ndaddr->v4addr.s_addr = daddr->addr32[3];
3951 				pd->naf = AF_INET;
3952 			}
3953 			break;
3954 		case PF_BINAT:
3955 			switch (direction) {
3956 			case PF_OUT:
3957 				if (r->rpool.cur->addr.type ==
3958 				    PF_ADDR_DYNIFTL) {
3959 					if (r->rpool.cur->addr.p.dyn == NULL) {
3960 						return NULL;
3961 					}
3962 					switch (pd->af) {
3963 #if INET
3964 					case AF_INET:
3965 						if (r->rpool.cur->addr.p.dyn->
3966 						    pfid_acnt4 < 1) {
3967 							return NULL;
3968 						}
3969 						PF_POOLMASK(nsaddr,
3970 						    &r->rpool.cur->addr.p.dyn->
3971 						    pfid_addr4,
3972 						    &r->rpool.cur->addr.p.dyn->
3973 						    pfid_mask4,
3974 						    saddr, AF_INET);
3975 						break;
3976 #endif /* INET */
3977 					case AF_INET6:
3978 						if (r->rpool.cur->addr.p.dyn->
3979 						    pfid_acnt6 < 1) {
3980 							return NULL;
3981 						}
3982 						PF_POOLMASK(nsaddr,
3983 						    &r->rpool.cur->addr.p.dyn->
3984 						    pfid_addr6,
3985 						    &r->rpool.cur->addr.p.dyn->
3986 						    pfid_mask6,
3987 						    saddr, AF_INET6);
3988 						break;
3989 					}
3990 				} else {
3991 					PF_POOLMASK(nsaddr,
3992 					    &r->rpool.cur->addr.v.a.addr,
3993 					    &r->rpool.cur->addr.v.a.mask,
3994 					    saddr, pd->af);
3995 				}
3996 				break;
3997 			case PF_IN:
3998 				if (r->src.addr.type == PF_ADDR_DYNIFTL) {
3999 					if (r->src.addr.p.dyn == NULL) {
4000 						return NULL;
4001 					}
4002 					switch (pd->af) {
4003 #if INET
4004 					case AF_INET:
4005 						if (r->src.addr.p.dyn->
4006 						    pfid_acnt4 < 1) {
4007 							return NULL;
4008 						}
4009 						PF_POOLMASK(ndaddr,
4010 						    &r->src.addr.p.dyn->
4011 						    pfid_addr4,
4012 						    &r->src.addr.p.dyn->
4013 						    pfid_mask4,
4014 						    daddr, AF_INET);
4015 						break;
4016 #endif /* INET */
4017 					case AF_INET6:
4018 						if (r->src.addr.p.dyn->
4019 						    pfid_acnt6 < 1) {
4020 							return NULL;
4021 						}
4022 						PF_POOLMASK(ndaddr,
4023 						    &r->src.addr.p.dyn->
4024 						    pfid_addr6,
4025 						    &r->src.addr.p.dyn->
4026 						    pfid_mask6,
4027 						    daddr, AF_INET6);
4028 						break;
4029 					}
4030 				} else {
4031 					PF_POOLMASK(ndaddr,
4032 					    &r->src.addr.v.a.addr,
4033 					    &r->src.addr.v.a.mask, daddr,
4034 					    pd->af);
4035 				}
4036 				break;
4037 			}
4038 			break;
4039 		case PF_RDR: {
4040 			switch (direction) {
4041 			case PF_OUT:
4042 				if (r->dst.addr.type == PF_ADDR_DYNIFTL) {
4043 					if (r->dst.addr.p.dyn == NULL) {
4044 						return NULL;
4045 					}
4046 					switch (pd->af) {
4047 #if INET
4048 					case AF_INET:
4049 						if (r->dst.addr.p.dyn->
4050 						    pfid_acnt4 < 1) {
4051 							return NULL;
4052 						}
4053 						PF_POOLMASK(nsaddr,
4054 						    &r->dst.addr.p.dyn->
4055 						    pfid_addr4,
4056 						    &r->dst.addr.p.dyn->
4057 						    pfid_mask4,
4058 						    daddr, AF_INET);
4059 						break;
4060 #endif /* INET */
4061 					case AF_INET6:
4062 						if (r->dst.addr.p.dyn->
4063 						    pfid_acnt6 < 1) {
4064 							return NULL;
4065 						}
4066 						PF_POOLMASK(nsaddr,
4067 						    &r->dst.addr.p.dyn->
4068 						    pfid_addr6,
4069 						    &r->dst.addr.p.dyn->
4070 						    pfid_mask6,
4071 						    daddr, AF_INET6);
4072 						break;
4073 					}
4074 				} else {
4075 					PF_POOLMASK(nsaddr,
4076 					    &r->dst.addr.v.a.addr,
4077 					    &r->dst.addr.v.a.mask,
4078 					    daddr, pd->af);
4079 				}
4080 				if (nsxport && r->dst.xport.range.port[0]) {
4081 					nsxport->port =
4082 					    r->dst.xport.range.port[0];
4083 				}
4084 				break;
4085 			case PF_IN:
4086 				if (pf_map_addr(pd->af, r, saddr,
4087 				    ndaddr, NULL, sn)) {
4088 					return NULL;
4089 				}
4090 				if ((r->rpool.opts & PF_POOL_TYPEMASK) ==
4091 				    PF_POOL_BITMASK) {
4092 					PF_POOLMASK(ndaddr, ndaddr,
4093 					    &r->rpool.cur->addr.v.a.mask, daddr,
4094 					    pd->af);
4095 				}
4096 
4097 				if (nsxport && dxport) {
4098 					if (r->rpool.proxy_port[1]) {
4099 						u_int32_t       tmp_nport;
4100 
4101 						tmp_nport =
4102 						    ((ntohs(dxport->port) -
4103 						    ntohs(r->dst.xport.range.
4104 						    port[0])) %
4105 						    (r->rpool.proxy_port[1] -
4106 						    r->rpool.proxy_port[0] +
4107 						    1)) + r->rpool.proxy_port[0];
4108 
4109 						/* wrap around if necessary */
4110 						if (tmp_nport > 65535) {
4111 							tmp_nport -= 65535;
4112 						}
4113 						nsxport->port =
4114 						    htons((u_int16_t)tmp_nport);
4115 					} else if (r->rpool.proxy_port[0]) {
4116 						nsxport->port = htons(r->rpool.
4117 						    proxy_port[0]);
4118 					}
4119 				}
4120 				break;
4121 			}
4122 			break;
4123 		}
4124 		default:
4125 			return NULL;
4126 		}
4127 	}
4128 
4129 	return r;
4130 }
4131 
4132 int
pf_socket_lookup(int direction,struct pf_pdesc * pd)4133 pf_socket_lookup(int direction, struct pf_pdesc *pd)
4134 {
4135 	struct pf_addr          *__single saddr, *__single daddr;
4136 	u_int16_t                sport, dport;
4137 	struct inpcbinfo        *__single pi;
4138 	int                     inp = 0;
4139 
4140 	if (pd == NULL) {
4141 		return -1;
4142 	}
4143 	pd->lookup.uid = UID_MAX;
4144 	pd->lookup.gid = GID_MAX;
4145 	pd->lookup.pid = NO_PID;
4146 
4147 	switch (pd->proto) {
4148 	case IPPROTO_TCP:
4149 		if (pf_pd_get_hdr_tcp(pd) == NULL) {
4150 			return -1;
4151 		}
4152 		sport = pf_pd_get_hdr_tcp(pd)->th_sport;
4153 		dport = pf_pd_get_hdr_tcp(pd)->th_dport;
4154 		pi = &tcbinfo;
4155 		break;
4156 	case IPPROTO_UDP:
4157 		if (pf_pd_get_hdr_udp(pd) == NULL) {
4158 			return -1;
4159 		}
4160 		sport = pf_pd_get_hdr_udp(pd)->uh_sport;
4161 		dport = pf_pd_get_hdr_udp(pd)->uh_dport;
4162 		pi = &udbinfo;
4163 		break;
4164 	default:
4165 		return -1;
4166 	}
4167 	if (direction == PF_IN) {
4168 		saddr = pd->src;
4169 		daddr = pd->dst;
4170 	} else {
4171 		u_int16_t       p;
4172 
4173 		p = sport;
4174 		sport = dport;
4175 		dport = p;
4176 		saddr = pd->dst;
4177 		daddr = pd->src;
4178 	}
4179 	switch (pd->af) {
4180 #if INET
4181 	case AF_INET:
4182 		inp = in_pcblookup_hash_exists(pi, saddr->v4addr, sport, daddr->v4addr, dport,
4183 		    0, &pd->lookup.uid, &pd->lookup.gid, NULL);
4184 		if (inp == 0) {
4185 			struct in6_addr s6, d6;
4186 
4187 			memset(&s6, 0, sizeof(s6));
4188 			s6.s6_addr16[5] = htons(0xffff);
4189 			memcpy(&s6.s6_addr32[3], &saddr->v4addr,
4190 			    sizeof(saddr->v4addr));
4191 
4192 			memset(&d6, 0, sizeof(d6));
4193 			d6.s6_addr16[5] = htons(0xffff);
4194 			memcpy(&d6.s6_addr32[3], &daddr->v4addr,
4195 			    sizeof(daddr->v4addr));
4196 
4197 			inp = in6_pcblookup_hash_exists(pi, &s6, sport, IFSCOPE_NONE,
4198 			    &d6, dport, IFSCOPE_NONE, 0, &pd->lookup.uid, &pd->lookup.gid, NULL, false);
4199 			if (inp == 0) {
4200 				inp = in_pcblookup_hash_exists(pi, saddr->v4addr, sport,
4201 				    daddr->v4addr, dport, INPLOOKUP_WILDCARD, &pd->lookup.uid, &pd->lookup.gid, NULL);
4202 				if (inp == 0) {
4203 					inp = in6_pcblookup_hash_exists(pi, &s6, sport, IFSCOPE_NONE,
4204 					    &d6, dport, IFSCOPE_NONE, INPLOOKUP_WILDCARD,
4205 					    &pd->lookup.uid, &pd->lookup.gid, NULL, false);
4206 					if (inp == 0) {
4207 						return -1;
4208 					}
4209 				}
4210 			}
4211 		}
4212 		break;
4213 #endif /* INET */
4214 	case AF_INET6:
4215 		inp = in6_pcblookup_hash_exists(pi, &saddr->v6addr, sport, IFSCOPE_UNKNOWN, &daddr->v6addr,
4216 		    dport, IFSCOPE_UNKNOWN, 0, &pd->lookup.uid, &pd->lookup.gid, NULL, false);
4217 		if (inp == 0) {
4218 			inp = in6_pcblookup_hash_exists(pi, &saddr->v6addr, sport, IFSCOPE_UNKNOWN,
4219 			    &daddr->v6addr, dport, IFSCOPE_UNKNOWN, INPLOOKUP_WILDCARD,
4220 			    &pd->lookup.uid, &pd->lookup.gid, NULL, false);
4221 			if (inp == 0) {
4222 				return -1;
4223 			}
4224 		}
4225 		break;
4226 
4227 	default:
4228 		return -1;
4229 	}
4230 
4231 	return 1;
4232 }
4233 
4234 static __attribute__((noinline)) u_int8_t
pf_get_wscale(pbuf_t * pbuf,int off,u_int16_t th_off,sa_family_t af)4235 pf_get_wscale(pbuf_t *pbuf, int off, u_int16_t th_off, sa_family_t af)
4236 {
4237 	int              hlen;
4238 	u_int8_t         hdr[60];
4239 	u_int8_t        *opt, optlen;
4240 	u_int8_t         wscale = 0;
4241 
4242 	hlen = th_off << 2;             /* hlen <= sizeof (hdr) */
4243 	if (hlen <= (int)sizeof(struct tcphdr)) {
4244 		return 0;
4245 	}
4246 	if (!pf_pull_hdr(pbuf, off, hdr, sizeof(hdr), hlen, NULL, NULL, af)) {
4247 		return 0;
4248 	}
4249 	opt = hdr + sizeof(struct tcphdr);
4250 	hlen -= sizeof(struct tcphdr);
4251 	while (hlen >= 3) {
4252 		switch (*opt) {
4253 		case TCPOPT_EOL:
4254 		case TCPOPT_NOP:
4255 			++opt;
4256 			--hlen;
4257 			break;
4258 		case TCPOPT_WINDOW:
4259 			wscale = opt[2];
4260 			if (wscale > TCP_MAX_WINSHIFT) {
4261 				wscale = TCP_MAX_WINSHIFT;
4262 			}
4263 			wscale |= PF_WSCALE_FLAG;
4264 			OS_FALLTHROUGH;
4265 		default:
4266 			optlen = opt[1];
4267 			if (optlen < 2) {
4268 				optlen = 2;
4269 			}
4270 			hlen -= optlen;
4271 			opt += optlen;
4272 			break;
4273 		}
4274 	}
4275 	return wscale;
4276 }
4277 
4278 static __attribute__((noinline)) u_int16_t
pf_get_mss(pbuf_t * pbuf,int off,u_int16_t th_off,sa_family_t af)4279 pf_get_mss(pbuf_t *pbuf, int off, u_int16_t th_off, sa_family_t af)
4280 {
4281 	int              hlen;
4282 	u_int8_t         hdr[60];
4283 	u_int8_t        *opt, optlen;
4284 	u_int16_t        mss = tcp_mssdflt;
4285 
4286 	hlen = th_off << 2;     /* hlen <= sizeof (hdr) */
4287 	if (hlen <= (int)sizeof(struct tcphdr)) {
4288 		return 0;
4289 	}
4290 	if (!pf_pull_hdr(pbuf, off, hdr, sizeof(hdr), hlen, NULL, NULL, af)) {
4291 		return 0;
4292 	}
4293 	opt = hdr + sizeof(struct tcphdr);
4294 	hlen -= sizeof(struct tcphdr);
4295 	while (hlen >= TCPOLEN_MAXSEG) {
4296 		switch (*opt) {
4297 		case TCPOPT_EOL:
4298 		case TCPOPT_NOP:
4299 			++opt;
4300 			--hlen;
4301 			break;
4302 		case TCPOPT_MAXSEG:
4303 			bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2);
4304 #if BYTE_ORDER != BIG_ENDIAN
4305 			NTOHS(mss);
4306 #endif
4307 			OS_FALLTHROUGH;
4308 		default:
4309 			optlen = opt[1];
4310 			if (optlen < 2) {
4311 				optlen = 2;
4312 			}
4313 			hlen -= optlen;
4314 			opt += optlen;
4315 			break;
4316 		}
4317 	}
4318 	return mss;
4319 }
4320 
4321 static __attribute__((noinline)) u_int16_t
pf_calc_mss(struct pf_addr * addr,sa_family_t af,u_int16_t offer)4322 pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer)
4323 {
4324 #if INET
4325 	struct sockaddr_in      *dst;
4326 	struct route             ro;
4327 #endif /* INET */
4328 	struct sockaddr_in6     *dst6;
4329 	struct route_in6         ro6;
4330 	struct rtentry          *rt = NULL;
4331 	int                      hlen;
4332 	u_int16_t                mss = tcp_mssdflt;
4333 
4334 	switch (af) {
4335 #if INET
4336 	case AF_INET:
4337 		hlen = sizeof(struct ip);
4338 		bzero(&ro, sizeof(ro));
4339 		dst = (struct sockaddr_in *)(void *)&ro.ro_dst;
4340 		dst->sin_family = AF_INET;
4341 		dst->sin_len = sizeof(*dst);
4342 		dst->sin_addr = addr->v4addr;
4343 		rtalloc(&ro);
4344 		rt = ro.ro_rt;
4345 		break;
4346 #endif /* INET */
4347 	case AF_INET6:
4348 		hlen = sizeof(struct ip6_hdr);
4349 		bzero(&ro6, sizeof(ro6));
4350 		dst6 = (struct sockaddr_in6 *)(void *)&ro6.ro_dst;
4351 		dst6->sin6_family = AF_INET6;
4352 		dst6->sin6_len = sizeof(*dst6);
4353 		dst6->sin6_addr = addr->v6addr;
4354 		rtalloc((struct route *)&ro);
4355 		rt = ro6.ro_rt;
4356 		break;
4357 	default:
4358 		panic("pf_calc_mss: not AF_INET or AF_INET6!");
4359 		return 0;
4360 	}
4361 
4362 	if (rt && rt->rt_ifp) {
4363 		/* This is relevant only for PF SYN Proxy */
4364 		int interface_mtu = rt->rt_ifp->if_mtu;
4365 
4366 		if (af == AF_INET &&
4367 		    INTF_ADJUST_MTU_FOR_CLAT46(rt->rt_ifp)) {
4368 			interface_mtu = IN6_LINKMTU(rt->rt_ifp);
4369 			/* Further adjust the size for CLAT46 expansion */
4370 			interface_mtu -= CLAT46_HDR_EXPANSION_OVERHD;
4371 		}
4372 		mss = interface_mtu - hlen - sizeof(struct tcphdr);
4373 		mss = max(tcp_mssdflt, mss);
4374 		rtfree(rt);
4375 	}
4376 	mss = min(mss, offer);
4377 	mss = max(mss, 64);             /* sanity - at least max opt space */
4378 	return mss;
4379 }
4380 
4381 static void
pf_set_rt_ifp(struct pf_state * s,struct pf_addr * saddr,sa_family_t af)4382 pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr, sa_family_t af)
4383 {
4384 	struct pf_rule *r = s->rule.ptr;
4385 
4386 	s->rt_kif = NULL;
4387 
4388 	if (!r->rt || r->rt == PF_FASTROUTE) {
4389 		return;
4390 	}
4391 	if ((af == AF_INET) || (af == AF_INET6)) {
4392 		pf_map_addr(af, r, saddr, &s->rt_addr, NULL,
4393 		    &s->nat_src_node);
4394 		s->rt_kif = r->rpool.cur->kif;
4395 	}
4396 
4397 	return;
4398 }
4399 
4400 static void
pf_attach_state(struct pf_state_key * sk,struct pf_state * s,int tail)4401 pf_attach_state(struct pf_state_key *sk, struct pf_state *s, int tail)
4402 {
4403 	s->state_key = sk;
4404 	sk->refcnt++;
4405 
4406 	/* list is sorted, if-bound states before floating */
4407 	if (tail) {
4408 		TAILQ_INSERT_TAIL(&sk->states, s, next);
4409 	} else {
4410 		TAILQ_INSERT_HEAD(&sk->states, s, next);
4411 	}
4412 }
4413 
4414 static void
pf_state_key_release_flowid(struct pf_state_key * sk)4415 pf_state_key_release_flowid(struct pf_state_key *sk)
4416 {
4417 #pragma unused (sk)
4418 #if SKYWALK
4419 	if ((sk->flowsrc == FLOWSRC_PF) && (sk->flowhash != 0)) {
4420 		flowidns_release_flowid(sk->flowhash);
4421 		sk->flowhash = 0;
4422 		sk->flowsrc = 0;
4423 	}
4424 #endif /* SKYWALK */
4425 }
4426 
4427 void
pf_detach_state(struct pf_state * s,int flags)4428 pf_detach_state(struct pf_state *s, int flags)
4429 {
4430 	struct pf_state_key     *sk = s->state_key;
4431 
4432 	if (sk == NULL) {
4433 		return;
4434 	}
4435 
4436 	s->state_key = NULL;
4437 	TAILQ_REMOVE(&sk->states, s, next);
4438 	if (--sk->refcnt == 0) {
4439 		if (!(flags & PF_DT_SKIP_EXTGWY)) {
4440 			pf_remove_state_key_ext_gwy(sk);
4441 		}
4442 		if (!(flags & PF_DT_SKIP_LANEXT)) {
4443 			RB_REMOVE(pf_state_tree_lan_ext,
4444 			    &pf_statetbl_lan_ext, sk);
4445 		}
4446 		if (sk->app_state) {
4447 			pool_put(&pf_app_state_pl, sk->app_state);
4448 		}
4449 		pf_state_key_release_flowid(sk);
4450 		pool_put(&pf_state_key_pl, sk);
4451 	}
4452 }
4453 
4454 struct pf_state_key *
pf_alloc_state_key(struct pf_state * s,struct pf_state_key * psk)4455 pf_alloc_state_key(struct pf_state *s, struct pf_state_key *psk)
4456 {
4457 	struct pf_state_key     *__single sk;
4458 
4459 	if ((sk = pool_get(&pf_state_key_pl, PR_WAITOK)) == NULL) {
4460 		return NULL;
4461 	}
4462 	bzero(sk, sizeof(*sk));
4463 	TAILQ_INIT(&sk->states);
4464 	pf_attach_state(sk, s, 0);
4465 
4466 	/* initialize state key from psk, if provided */
4467 	if (psk != NULL) {
4468 		bcopy(&psk->lan, &sk->lan, sizeof(sk->lan));
4469 		bcopy(&psk->gwy, &sk->gwy, sizeof(sk->gwy));
4470 		bcopy(&psk->ext_lan, &sk->ext_lan, sizeof(sk->ext_lan));
4471 		bcopy(&psk->ext_gwy, &sk->ext_gwy, sizeof(sk->ext_gwy));
4472 		sk->af_lan = psk->af_lan;
4473 		sk->af_gwy = psk->af_gwy;
4474 		sk->proto = psk->proto;
4475 		sk->direction = psk->direction;
4476 		sk->proto_variant = psk->proto_variant;
4477 		VERIFY(psk->app_state == NULL);
4478 		ASSERT(psk->flowsrc != FLOWSRC_PF);
4479 		sk->flowsrc = psk->flowsrc;
4480 		sk->flowhash = psk->flowhash;
4481 		/* don't touch tree entries, states and refcnt on sk */
4482 	}
4483 
4484 	if (sk->flowhash == 0) {
4485 		ASSERT(sk->flowsrc == 0);
4486 		sk->flowsrc = FLOWSRC_PF;
4487 		sk->flowhash = pf_calc_state_key_flowhash(sk);
4488 	}
4489 
4490 	return sk;
4491 }
4492 
4493 static __attribute__((noinline)) u_int32_t
pf_tcp_iss(struct pf_pdesc * pd)4494 pf_tcp_iss(struct pf_pdesc *pd)
4495 {
4496 	MD5_CTX ctx;
4497 	u_int32_t digest[4];
4498 
4499 	if (pf_tcp_secret_init == 0) {
4500 		read_frandom(pf_tcp_secret, sizeof(pf_tcp_secret));
4501 		MD5Init(&pf_tcp_secret_ctx);
4502 		MD5Update(&pf_tcp_secret_ctx, pf_tcp_secret,
4503 		    sizeof(pf_tcp_secret));
4504 		pf_tcp_secret_init = 1;
4505 	}
4506 	ctx = pf_tcp_secret_ctx;
4507 
4508 	MD5Update(&ctx, (char *)&pf_pd_get_hdr_tcp(pd)->th_sport, sizeof(u_short));
4509 	MD5Update(&ctx, (char *)&pf_pd_get_hdr_tcp(pd)->th_dport, sizeof(u_short));
4510 	if (pd->af == AF_INET6) {
4511 		MD5Update(&ctx, (char *)&pd->src->v6addr, sizeof(struct in6_addr));
4512 		MD5Update(&ctx, (char *)&pd->dst->v6addr, sizeof(struct in6_addr));
4513 	} else {
4514 		MD5Update(&ctx, (char *)&pd->src->v4addr, sizeof(struct in_addr));
4515 		MD5Update(&ctx, (char *)&pd->dst->v4addr, sizeof(struct in_addr));
4516 	}
4517 	MD5Final((u_char *)digest, &ctx);
4518 	pf_tcp_iss_off += 4096;
4519 	return digest[0] + random() + pf_tcp_iss_off;
4520 }
4521 
4522 /*
4523  * This routine is called to perform address family translation on the
4524  * inner IP header (that may come as payload) of an ICMP(v4addr/6) error
4525  * response.
4526  */
4527 static __attribute__((noinline)) int
pf_change_icmp_af(pbuf_t * pbuf,int off,struct pf_pdesc * pd,struct pf_pdesc * pd2,struct pf_addr * src,struct pf_addr * dst,sa_family_t af,sa_family_t naf)4528 pf_change_icmp_af(pbuf_t *pbuf, int off,
4529     struct pf_pdesc *pd, struct pf_pdesc *pd2, struct pf_addr *src,
4530     struct pf_addr *dst, sa_family_t af, sa_family_t naf)
4531 {
4532 	struct ip               *__single ip4 = NULL;
4533 	struct ip6_hdr          *__single ip6 = NULL;
4534 	void                    *__single hdr;
4535 	int                      hlen, olen;
4536 	uint64_t                ipid_salt = (uint64_t)pbuf_get_packet_buffer_address(pbuf);
4537 
4538 	if (af == naf || (af != AF_INET && af != AF_INET6) ||
4539 	    (naf != AF_INET && naf != AF_INET6)) {
4540 		return -1;
4541 	}
4542 
4543 	/* old header */
4544 	olen = pd2->off - off;
4545 	/* new header */
4546 	hlen = naf == AF_INET ? sizeof(*ip4) : sizeof(*ip6);
4547 
4548 	/* Modify the pbuf to accommodate the new header */
4549 	hdr = pbuf_resize_segment(pbuf, off, olen, hlen);
4550 	if (hdr == NULL) {
4551 		return -1;
4552 	}
4553 
4554 	/* translate inner ip/ip6 header */
4555 	switch (naf) {
4556 	case AF_INET:
4557 		ip4 = hdr;
4558 		bzero(ip4, sizeof(*ip4));
4559 		ip4->ip_v   = IPVERSION;
4560 		ip4->ip_hl  = sizeof(*ip4) >> 2;
4561 		ip4->ip_len = htons(sizeof(*ip4) + pd2->tot_len - olen);
4562 		ip4->ip_id  = rfc6864 ? 0 : htons(ip_randomid(ipid_salt));
4563 		ip4->ip_off = htons(IP_DF);
4564 		ip4->ip_ttl = pd2->ttl;
4565 		if (pd2->proto == IPPROTO_ICMPV6) {
4566 			ip4->ip_p = IPPROTO_ICMP;
4567 		} else {
4568 			ip4->ip_p = pd2->proto;
4569 		}
4570 		ip4->ip_src = src->v4addr;
4571 		ip4->ip_dst = dst->v4addr;
4572 		ip4->ip_sum = pbuf_inet_cksum(pbuf, 0, 0, ip4->ip_hl << 2);
4573 		break;
4574 	case AF_INET6:
4575 		ip6 = hdr;
4576 		bzero(ip6, sizeof(*ip6));
4577 		ip6->ip6_vfc  = IPV6_VERSION;
4578 		ip6->ip6_plen = htons(pd2->tot_len - olen);
4579 		if (pd2->proto == IPPROTO_ICMP) {
4580 			ip6->ip6_nxt = IPPROTO_ICMPV6;
4581 		} else {
4582 			ip6->ip6_nxt = pd2->proto;
4583 		}
4584 		if (!pd2->ttl || pd2->ttl > IPV6_DEFHLIM) {
4585 			ip6->ip6_hlim = IPV6_DEFHLIM;
4586 		} else {
4587 			ip6->ip6_hlim = pd2->ttl;
4588 		}
4589 		ip6->ip6_src  = src->v6addr;
4590 		ip6->ip6_dst  = dst->v6addr;
4591 		break;
4592 	}
4593 
4594 	/* adjust payload offset and total packet length */
4595 	pd2->off += hlen - olen;
4596 	pd->tot_len += hlen - olen;
4597 
4598 	return 0;
4599 }
4600 
4601 #define PTR_IP(field)   ((int32_t)offsetof(struct ip, field))
4602 #define PTR_IP6(field)  ((int32_t)offsetof(struct ip6_hdr, field))
4603 
4604 static __attribute__((noinline)) int
pf_translate_icmp_af(int af,void * arg)4605 pf_translate_icmp_af(int af, void *arg)
4606 {
4607 	struct icmp             *__single icmp4;
4608 	struct icmp6_hdr        *__single icmp6;
4609 	u_int32_t                mtu;
4610 	int32_t                  ptr = -1;
4611 	u_int8_t                 type;
4612 	u_int8_t                 code;
4613 
4614 	switch (af) {
4615 	case AF_INET:
4616 		icmp6 = (struct icmp6_hdr * __single)arg;
4617 		type  = icmp6->icmp6_type;
4618 		code  = icmp6->icmp6_code;
4619 		mtu   = ntohl(icmp6->icmp6_mtu);
4620 
4621 		switch (type) {
4622 		case ICMP6_ECHO_REQUEST:
4623 			type = ICMP_ECHO;
4624 			break;
4625 		case ICMP6_ECHO_REPLY:
4626 			type = ICMP_ECHOREPLY;
4627 			break;
4628 		case ICMP6_DST_UNREACH:
4629 			type = ICMP_UNREACH;
4630 			switch (code) {
4631 			case ICMP6_DST_UNREACH_NOROUTE:
4632 			case ICMP6_DST_UNREACH_BEYONDSCOPE:
4633 			case ICMP6_DST_UNREACH_ADDR:
4634 				code = ICMP_UNREACH_HOST;
4635 				break;
4636 			case ICMP6_DST_UNREACH_ADMIN:
4637 				code = ICMP_UNREACH_HOST_PROHIB;
4638 				break;
4639 			case ICMP6_DST_UNREACH_NOPORT:
4640 				code = ICMP_UNREACH_PORT;
4641 				break;
4642 			default:
4643 				return -1;
4644 			}
4645 			break;
4646 		case ICMP6_PACKET_TOO_BIG:
4647 			type = ICMP_UNREACH;
4648 			code = ICMP_UNREACH_NEEDFRAG;
4649 			mtu -= 20;
4650 			break;
4651 		case ICMP6_TIME_EXCEEDED:
4652 			type = ICMP_TIMXCEED;
4653 			break;
4654 		case ICMP6_PARAM_PROB:
4655 			switch (code) {
4656 			case ICMP6_PARAMPROB_HEADER:
4657 				type = ICMP_PARAMPROB;
4658 				code = ICMP_PARAMPROB_ERRATPTR;
4659 				ptr  = ntohl(icmp6->icmp6_pptr);
4660 
4661 				if (ptr == PTR_IP6(ip6_vfc)) {
4662 					; /* preserve */
4663 				} else if (ptr == PTR_IP6(ip6_vfc) + 1) {
4664 					ptr = PTR_IP(ip_tos);
4665 				} else if (ptr == PTR_IP6(ip6_plen) ||
4666 				    ptr == PTR_IP6(ip6_plen) + 1) {
4667 					ptr = PTR_IP(ip_len);
4668 				} else if (ptr == PTR_IP6(ip6_nxt)) {
4669 					ptr = PTR_IP(ip_p);
4670 				} else if (ptr == PTR_IP6(ip6_hlim)) {
4671 					ptr = PTR_IP(ip_ttl);
4672 				} else if (ptr >= PTR_IP6(ip6_src) &&
4673 				    ptr < PTR_IP6(ip6_dst)) {
4674 					ptr = PTR_IP(ip_src);
4675 				} else if (ptr >= PTR_IP6(ip6_dst) &&
4676 				    ptr < (int32_t)sizeof(struct ip6_hdr)) {
4677 					ptr = PTR_IP(ip_dst);
4678 				} else {
4679 					return -1;
4680 				}
4681 				break;
4682 			case ICMP6_PARAMPROB_NEXTHEADER:
4683 				type = ICMP_UNREACH;
4684 				code = ICMP_UNREACH_PROTOCOL;
4685 				break;
4686 			default:
4687 				return -1;
4688 			}
4689 			break;
4690 		default:
4691 			return -1;
4692 		}
4693 		icmp6->icmp6_type = type;
4694 		icmp6->icmp6_code = code;
4695 		/* aligns well with a icmpv4 nextmtu */
4696 		icmp6->icmp6_mtu = htonl(mtu);
4697 		/* icmpv4 pptr is a one most significant byte */
4698 		if (ptr >= 0) {
4699 			icmp6->icmp6_pptr = htonl(ptr << 24);
4700 		}
4701 		break;
4702 
4703 	case AF_INET6:
4704 		icmp4 = (struct icmp* __single)arg;
4705 		type  = icmp4->icmp_type;
4706 		code  = icmp4->icmp_code;
4707 		mtu   = ntohs(icmp4->icmp_nextmtu);
4708 
4709 		switch (type) {
4710 		case ICMP_ECHO:
4711 			type = ICMP6_ECHO_REQUEST;
4712 			break;
4713 		case ICMP_ECHOREPLY:
4714 			type = ICMP6_ECHO_REPLY;
4715 			break;
4716 		case ICMP_UNREACH:
4717 			type = ICMP6_DST_UNREACH;
4718 			switch (code) {
4719 			case ICMP_UNREACH_NET:
4720 			case ICMP_UNREACH_HOST:
4721 			case ICMP_UNREACH_NET_UNKNOWN:
4722 			case ICMP_UNREACH_HOST_UNKNOWN:
4723 			case ICMP_UNREACH_ISOLATED:
4724 			case ICMP_UNREACH_TOSNET:
4725 			case ICMP_UNREACH_TOSHOST:
4726 				code = ICMP6_DST_UNREACH_NOROUTE;
4727 				break;
4728 			case ICMP_UNREACH_PORT:
4729 				code = ICMP6_DST_UNREACH_NOPORT;
4730 				break;
4731 			case ICMP_UNREACH_NET_PROHIB:
4732 			case ICMP_UNREACH_HOST_PROHIB:
4733 			case ICMP_UNREACH_FILTER_PROHIB:
4734 			case ICMP_UNREACH_PRECEDENCE_CUTOFF:
4735 				code = ICMP6_DST_UNREACH_ADMIN;
4736 				break;
4737 			case ICMP_UNREACH_PROTOCOL:
4738 				type = ICMP6_PARAM_PROB;
4739 				code = ICMP6_PARAMPROB_NEXTHEADER;
4740 				ptr  = offsetof(struct ip6_hdr, ip6_nxt);
4741 				break;
4742 			case ICMP_UNREACH_NEEDFRAG:
4743 				type = ICMP6_PACKET_TOO_BIG;
4744 				code = 0;
4745 				mtu += 20;
4746 				break;
4747 			default:
4748 				return -1;
4749 			}
4750 			break;
4751 		case ICMP_TIMXCEED:
4752 			type = ICMP6_TIME_EXCEEDED;
4753 			break;
4754 		case ICMP_PARAMPROB:
4755 			type = ICMP6_PARAM_PROB;
4756 			switch (code) {
4757 			case ICMP_PARAMPROB_ERRATPTR:
4758 				code = ICMP6_PARAMPROB_HEADER;
4759 				break;
4760 			case ICMP_PARAMPROB_LENGTH:
4761 				code = ICMP6_PARAMPROB_HEADER;
4762 				break;
4763 			default:
4764 				return -1;
4765 			}
4766 
4767 			ptr = icmp4->icmp_pptr;
4768 			if (ptr == 0 || ptr == PTR_IP(ip_tos)) {
4769 				; /* preserve */
4770 			} else if (ptr == PTR_IP(ip_len) ||
4771 			    ptr == PTR_IP(ip_len) + 1) {
4772 				ptr = PTR_IP6(ip6_plen);
4773 			} else if (ptr == PTR_IP(ip_ttl)) {
4774 				ptr = PTR_IP6(ip6_hlim);
4775 			} else if (ptr == PTR_IP(ip_p)) {
4776 				ptr = PTR_IP6(ip6_nxt);
4777 			} else if (ptr >= PTR_IP(ip_src) &&
4778 			    ptr < PTR_IP(ip_dst)) {
4779 				ptr = PTR_IP6(ip6_src);
4780 			} else if (ptr >= PTR_IP(ip_dst) &&
4781 			    ptr < (int32_t)sizeof(struct ip)) {
4782 				ptr = PTR_IP6(ip6_dst);
4783 			} else {
4784 				return -1;
4785 			}
4786 			break;
4787 		default:
4788 			return -1;
4789 		}
4790 		icmp4->icmp_type = type;
4791 		icmp4->icmp_code = code;
4792 		icmp4->icmp_nextmtu = htons(mtu);
4793 		if (ptr >= 0) {
4794 			icmp4->icmp_void = htonl(ptr);
4795 		}
4796 		break;
4797 	}
4798 
4799 	return 0;
4800 }
4801 
4802 /* Note: frees pbuf if PF_NAT64 is returned */
4803 static __attribute__((noinline)) int
pf_nat64_ipv6(pbuf_t * pbuf,int off,struct pf_pdesc * pd)4804 pf_nat64_ipv6(pbuf_t *pbuf, int off, struct pf_pdesc *pd)
4805 {
4806 	struct ip               *ip4;
4807 	struct mbuf *m;
4808 
4809 	/*
4810 	 * ip_input asserts for rcvif to be not NULL
4811 	 * That may not be true for two corner cases
4812 	 * 1. If for some reason a local app sends DNS
4813 	 * AAAA query to local host
4814 	 * 2. If IPv6 stack in kernel internally generates a
4815 	 * message destined for a synthesized IPv6 end-point.
4816 	 */
4817 	if (pbuf->pb_ifp == NULL) {
4818 		return PF_DROP;
4819 	}
4820 
4821 	ip4 = (struct ip *)pbuf_resize_segment(pbuf, 0, off, sizeof(*ip4));
4822 	if (ip4 == NULL) {
4823 		return PF_DROP;
4824 	}
4825 
4826 	ip4->ip_v   = 4;
4827 	ip4->ip_hl  = 5;
4828 	ip4->ip_tos = pd->tos & htonl(0x0ff00000);
4829 	ip4->ip_len = htons(sizeof(*ip4) + (pd->tot_len - off));
4830 	ip4->ip_id  = 0;
4831 	ip4->ip_off = htons(IP_DF);
4832 	ip4->ip_ttl = pd->ttl;
4833 	ip4->ip_p   = pd->proto;
4834 	ip4->ip_sum = 0;
4835 	ip4->ip_src = pd->naddr.v4addr;
4836 	ip4->ip_dst = pd->ndaddr.v4addr;
4837 	ip4->ip_sum = pbuf_inet_cksum(pbuf, 0, 0, ip4->ip_hl << 2);
4838 
4839 	/* recalculate icmp checksums */
4840 	if (pd->proto == IPPROTO_ICMP) {
4841 		struct icmp *icmp;
4842 		int hlen = sizeof(*ip4);
4843 
4844 		icmp = (struct icmp *)pbuf_contig_segment(pbuf, hlen,
4845 		    ICMP_MINLEN);
4846 		if (icmp == NULL) {
4847 			return PF_DROP;
4848 		}
4849 
4850 		icmp->icmp_cksum = 0;
4851 		icmp->icmp_cksum = pbuf_inet_cksum(pbuf, 0, hlen,
4852 		    ntohs(ip4->ip_len) - hlen);
4853 	}
4854 
4855 	if ((m = pbuf_to_mbuf(pbuf, TRUE)) != NULL) {
4856 		ip_input(m);
4857 	}
4858 
4859 	return PF_NAT64;
4860 }
4861 
4862 static __attribute__((noinline)) int
pf_nat64_ipv4(pbuf_t * pbuf,int off,struct pf_pdesc * pd)4863 pf_nat64_ipv4(pbuf_t *pbuf, int off, struct pf_pdesc *pd)
4864 {
4865 	struct ip6_hdr          *ip6;
4866 	struct mbuf *m;
4867 
4868 	if (pbuf->pb_ifp == NULL) {
4869 		return PF_DROP;
4870 	}
4871 
4872 	ip6 = (struct ip6_hdr *)pbuf_resize_segment(pbuf, 0, off, sizeof(*ip6));
4873 	if (ip6 == NULL) {
4874 		return PF_DROP;
4875 	}
4876 
4877 	ip6->ip6_vfc  = htonl((6 << 28) | (pd->tos << 20));
4878 	ip6->ip6_plen = htons(pd->tot_len - off);
4879 	ip6->ip6_nxt  = pd->proto;
4880 	ip6->ip6_hlim = pd->ttl;
4881 	ip6->ip6_src = pd->naddr.v6addr;
4882 	ip6->ip6_dst = pd->ndaddr.v6addr;
4883 
4884 	/* recalculate icmp6 checksums */
4885 	if (pd->proto == IPPROTO_ICMPV6) {
4886 		struct icmp6_hdr *icmp6;
4887 		int hlen = sizeof(*ip6);
4888 
4889 		icmp6 = (struct icmp6_hdr *)pbuf_contig_segment(pbuf, hlen,
4890 		    sizeof(*icmp6));
4891 		if (icmp6 == NULL) {
4892 			return PF_DROP;
4893 		}
4894 
4895 		icmp6->icmp6_cksum = 0;
4896 		icmp6->icmp6_cksum = pbuf_inet6_cksum(pbuf,
4897 		    IPPROTO_ICMPV6, hlen,
4898 		    ntohs(ip6->ip6_plen));
4899 	} else if (pd->proto == IPPROTO_UDP) {
4900 		struct udphdr *uh;
4901 		int hlen = sizeof(*ip6);
4902 
4903 		uh = (struct udphdr *)pbuf_contig_segment(pbuf, hlen,
4904 		    sizeof(*uh));
4905 		if (uh == NULL) {
4906 			return PF_DROP;
4907 		}
4908 
4909 		if (uh->uh_sum == 0) {
4910 			uh->uh_sum = pbuf_inet6_cksum(pbuf, IPPROTO_UDP,
4911 			    hlen, ntohs(ip6->ip6_plen));
4912 		}
4913 	}
4914 
4915 	if ((m = pbuf_to_mbuf(pbuf, TRUE)) != NULL) {
4916 		ip6_input(m);
4917 	}
4918 
4919 	return PF_NAT64;
4920 }
4921 
4922 static __attribute__((noinline)) int
pf_test_rule(struct pf_rule ** rm,struct pf_state ** sm,int direction,struct pfi_kif * kif,pbuf_t * pbuf,int off,void * h,struct pf_pdesc * pd,struct pf_rule ** am,struct pf_ruleset ** rsm,struct ifqueue * ifq)4923 pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
4924     struct pfi_kif *kif, pbuf_t *pbuf, int off, void *h,
4925     struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
4926     struct ifqueue *ifq)
4927 {
4928 #pragma unused(h)
4929 	struct pf_rule          *__single nr = NULL;
4930 	struct pf_addr          *__single saddr = pd->src, *__single daddr = pd->dst;
4931 	sa_family_t              af = pd->af;
4932 	struct pf_rule          *__single r, *__single a = NULL;
4933 	struct pf_ruleset       *__single ruleset = NULL;
4934 	struct pf_src_node      *__single nsn = NULL;
4935 	struct tcphdr           *__single th = pf_pd_get_hdr_tcp(pd);
4936 	struct udphdr           *__single uh = pf_pd_get_hdr_udp(pd);
4937 	u_short                  reason;
4938 	int                      rewrite = 0, hdrlen = 0;
4939 	int                      tag = -1;
4940 	unsigned int             rtableid = IFSCOPE_NONE;
4941 	int                      asd = 0;
4942 	int                      match = 0;
4943 	int                      state_icmp = 0;
4944 	u_int16_t                mss = tcp_mssdflt;
4945 	u_int8_t                 icmptype = 0, icmpcode = 0;
4946 #if SKYWALK
4947 	struct ns_token *__single nstoken = NULL;
4948 #endif
4949 
4950 	struct pf_grev1_hdr     *__single grev1 = pf_pd_get_hdr_grev1(pd);
4951 	union pf_state_xport bxport, bdxport, nxport, sxport, dxport;
4952 	struct pf_state_key      psk;
4953 
4954 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
4955 
4956 	PD_CLEAR_STATE_FLOWID(pd);
4957 
4958 	if (direction == PF_IN && pf_check_congestion(ifq)) {
4959 		REASON_SET(&reason, PFRES_CONGEST);
4960 		return PF_DROP;
4961 	}
4962 
4963 	hdrlen = 0;
4964 	sxport.spi = 0;
4965 	dxport.spi = 0;
4966 	nxport.spi = 0;
4967 
4968 	switch (pd->proto) {
4969 	case IPPROTO_TCP:
4970 		sxport.port = th->th_sport;
4971 		dxport.port = th->th_dport;
4972 		hdrlen = sizeof(*th);
4973 		break;
4974 	case IPPROTO_UDP:
4975 		sxport.port = uh->uh_sport;
4976 		dxport.port = uh->uh_dport;
4977 		hdrlen = sizeof(*uh);
4978 		break;
4979 #if INET
4980 	case IPPROTO_ICMP:
4981 		if (pd->af != AF_INET) {
4982 			break;
4983 		}
4984 		sxport.port = dxport.port = pf_pd_get_hdr_icmp(pd)->icmp_id;
4985 		hdrlen = ICMP_MINLEN;
4986 		icmptype = pf_pd_get_hdr_icmp(pd)->icmp_type;
4987 		icmpcode = pf_pd_get_hdr_icmp(pd)->icmp_code;
4988 
4989 		if (ICMP_ERRORTYPE(icmptype)) {
4990 			state_icmp++;
4991 		}
4992 		break;
4993 #endif /* INET */
4994 	case IPPROTO_ICMPV6:
4995 		if (pd->af != AF_INET6) {
4996 			break;
4997 		}
4998 		sxport.port = dxport.port = pf_pd_get_hdr_icmp6(pd)->icmp6_id;
4999 		hdrlen = sizeof(*pf_pd_get_hdr_icmp6(pd));
5000 		icmptype = pf_pd_get_hdr_icmp6(pd)->icmp6_type;
5001 		icmpcode = pf_pd_get_hdr_icmp6(pd)->icmp6_code;
5002 
5003 		if (ICMP6_ERRORTYPE(icmptype)) {
5004 			state_icmp++;
5005 		}
5006 		break;
5007 	case IPPROTO_GRE:
5008 		if (pd->proto_variant == PF_GRE_PPTP_VARIANT) {
5009 			sxport.call_id = dxport.call_id =
5010 			    pf_pd_get_hdr_grev1(pd)->call_id;
5011 			hdrlen = sizeof(*pf_pd_get_hdr_grev1(pd));
5012 		}
5013 		break;
5014 	case IPPROTO_ESP:
5015 		sxport.spi = 0;
5016 		dxport.spi = pf_pd_get_hdr_esp(pd)->spi;
5017 		hdrlen = sizeof(*pf_pd_get_hdr_esp(pd));
5018 		break;
5019 	}
5020 
5021 	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
5022 
5023 	bxport = sxport;
5024 	bdxport = dxport;
5025 
5026 	if (direction == PF_OUT) {
5027 		nxport = sxport;
5028 	} else {
5029 		nxport = dxport;
5030 	}
5031 
5032 	/* check packet for BINAT/NAT/RDR */
5033 	if ((nr = pf_get_translation_aux(pd, pbuf, off, direction, kif, &nsn,
5034 	    saddr, &sxport, daddr, &dxport, &nxport
5035 #if SKYWALK
5036 	    , &nstoken
5037 #endif
5038 	    )) != NULL) {
5039 		int ua;
5040 		u_int16_t dport;
5041 
5042 		if (pd->af != pd->naf) {
5043 			ua = 0;
5044 		} else {
5045 			ua = 1;
5046 		}
5047 
5048 		PF_ACPY(&pd->baddr, saddr, af);
5049 		PF_ACPY(&pd->bdaddr, daddr, af);
5050 
5051 		switch (pd->proto) {
5052 		case IPPROTO_TCP:
5053 			if (pd->af != pd->naf ||
5054 			    PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5055 				pf_change_ap(direction, pd->mp, saddr,
5056 				    &th->th_sport, pd->ip_sum, &th->th_sum,
5057 				    &pd->naddr, nxport.port, 0, af,
5058 				    pd->naf, ua);
5059 				sxport.port = th->th_sport;
5060 			}
5061 
5062 			if (pd->af != pd->naf ||
5063 			    PF_ANEQ(daddr, &pd->ndaddr, pd->af) ||
5064 			    (nr && (nr->action == PF_RDR) &&
5065 			    (th->th_dport != nxport.port))) {
5066 				if (nr && nr->action == PF_RDR) {
5067 					dport = nxport.port;
5068 				} else {
5069 					dport = th->th_dport;
5070 				}
5071 				pf_change_ap(direction, pd->mp, daddr,
5072 				    &th->th_dport, pd->ip_sum,
5073 				    &th->th_sum, &pd->ndaddr,
5074 				    dport, 0, af, pd->naf, ua);
5075 				dxport.port = th->th_dport;
5076 			}
5077 			rewrite++;
5078 			break;
5079 
5080 		case IPPROTO_UDP:
5081 			if (pd->af != pd->naf ||
5082 			    PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5083 				pf_change_ap(direction, pd->mp, saddr,
5084 				    &uh->uh_sport, pd->ip_sum,
5085 				    &uh->uh_sum, &pd->naddr,
5086 				    nxport.port, 1, af, pd->naf, ua);
5087 				sxport.port = uh->uh_sport;
5088 			}
5089 
5090 			if (pd->af != pd->naf ||
5091 			    PF_ANEQ(daddr, &pd->ndaddr, pd->af) ||
5092 			    (nr && (nr->action == PF_RDR) &&
5093 			    (uh->uh_dport != nxport.port))) {
5094 				if (nr && nr->action == PF_RDR) {
5095 					dport = nxport.port;
5096 				} else {
5097 					dport = uh->uh_dport;
5098 				}
5099 				pf_change_ap(direction, pd->mp, daddr,
5100 				    &uh->uh_dport, pd->ip_sum,
5101 				    &uh->uh_sum, &pd->ndaddr,
5102 				    dport, 0, af, pd->naf, ua);
5103 				dxport.port = uh->uh_dport;
5104 			}
5105 			rewrite++;
5106 			break;
5107 #if INET
5108 		case IPPROTO_ICMP:
5109 			if (pd->af != AF_INET) {
5110 				break;
5111 			}
5112 			/*
5113 			 * TODO:
5114 			 * pd->af != pd->naf not handled yet here and would be
5115 			 * needed for NAT46 needed to support XLAT.
5116 			 * Will cross the bridge when it comes.
5117 			 */
5118 			if (PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5119 				pf_change_a(&saddr->v4addr.s_addr, pd->ip_sum,
5120 				    pd->naddr.v4addr.s_addr, 0);
5121 				pf_pd_get_hdr_icmp(pd)->icmp_cksum = pf_cksum_fixup(
5122 					pf_pd_get_hdr_icmp(pd)->icmp_cksum, sxport.port,
5123 					nxport.port, 0);
5124 				pf_pd_get_hdr_icmp(pd)->icmp_id = nxport.port;
5125 			}
5126 
5127 			if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
5128 				pf_change_a(&daddr->v4addr.s_addr, pd->ip_sum,
5129 				    pd->ndaddr.v4addr.s_addr, 0);
5130 			}
5131 			++rewrite;
5132 			break;
5133 #endif /* INET */
5134 		case IPPROTO_ICMPV6:
5135 			if (pd->af != AF_INET6) {
5136 				break;
5137 			}
5138 
5139 			if (pd->af != pd->naf ||
5140 			    PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5141 				pf_change_addr(saddr,
5142 				    &pf_pd_get_hdr_icmp6(pd)->icmp6_cksum,
5143 				    &pd->naddr, 0, pd->af, pd->naf);
5144 			}
5145 
5146 			if (pd->af != pd->naf ||
5147 			    PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
5148 				pf_change_addr(daddr,
5149 				    &pf_pd_get_hdr_icmp6(pd)->icmp6_cksum,
5150 				    &pd->ndaddr, 0, pd->af, pd->naf);
5151 			}
5152 
5153 			if (pd->af != pd->naf) {
5154 				if (pf_translate_icmp_af(AF_INET,
5155 				    pf_pd_get_hdr_icmp6(pd))) {
5156 					return PF_DROP;
5157 				}
5158 				pd->proto = IPPROTO_ICMP;
5159 			}
5160 			rewrite++;
5161 			break;
5162 		case IPPROTO_GRE:
5163 			if ((direction == PF_IN) &&
5164 			    (pd->proto_variant == PF_GRE_PPTP_VARIANT)) {
5165 				grev1->call_id = nxport.call_id;
5166 			}
5167 
5168 			switch (pd->af) {
5169 #if INET
5170 			case AF_INET:
5171 				if (PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5172 					pf_change_a(&saddr->v4addr.s_addr,
5173 					    pd->ip_sum,
5174 					    pd->naddr.v4addr.s_addr, 0);
5175 				}
5176 				if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
5177 					pf_change_a(&daddr->v4addr.s_addr,
5178 					    pd->ip_sum,
5179 					    pd->ndaddr.v4addr.s_addr, 0);
5180 				}
5181 				break;
5182 #endif /* INET */
5183 			case AF_INET6:
5184 				if (PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5185 					PF_ACPY(saddr, &pd->naddr, AF_INET6);
5186 				}
5187 				if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
5188 					PF_ACPY(daddr, &pd->ndaddr, AF_INET6);
5189 				}
5190 				break;
5191 			}
5192 			++rewrite;
5193 			break;
5194 		case IPPROTO_ESP:
5195 			if (direction == PF_OUT) {
5196 				bxport.spi = 0;
5197 			}
5198 
5199 			switch (pd->af) {
5200 #if INET
5201 			case AF_INET:
5202 				if (PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5203 					pf_change_a(&saddr->v4addr.s_addr,
5204 					    pd->ip_sum, pd->naddr.v4addr.s_addr, 0);
5205 				}
5206 				if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
5207 					pf_change_a(&daddr->v4addr.s_addr,
5208 					    pd->ip_sum,
5209 					    pd->ndaddr.v4addr.s_addr, 0);
5210 				}
5211 				break;
5212 #endif /* INET */
5213 			case AF_INET6:
5214 				if (PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5215 					PF_ACPY(saddr, &pd->naddr, AF_INET6);
5216 				}
5217 				if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
5218 					PF_ACPY(daddr, &pd->ndaddr, AF_INET6);
5219 				}
5220 				break;
5221 			}
5222 			break;
5223 		default:
5224 			switch (pd->af) {
5225 #if INET
5226 			case AF_INET:
5227 				if ((pd->naf != AF_INET) ||
5228 				    (PF_ANEQ(saddr, &pd->naddr, pd->af))) {
5229 					pf_change_addr(saddr, pd->ip_sum,
5230 					    &pd->naddr, 0, af, pd->naf);
5231 				}
5232 
5233 				if ((pd->naf != AF_INET) ||
5234 				    (PF_ANEQ(daddr, &pd->ndaddr, pd->af))) {
5235 					pf_change_addr(daddr, pd->ip_sum,
5236 					    &pd->ndaddr, 0, af, pd->naf);
5237 				}
5238 				break;
5239 #endif /* INET */
5240 			case AF_INET6:
5241 				if (PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5242 					PF_ACPY(saddr, &pd->naddr, af);
5243 				}
5244 				if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
5245 					PF_ACPY(daddr, &pd->ndaddr, af);
5246 				}
5247 				break;
5248 			}
5249 			break;
5250 		}
5251 
5252 		if (nr->natpass) {
5253 			r = NULL;
5254 		}
5255 		pd->nat_rule = nr;
5256 		pd->af = pd->naf;
5257 	} else {
5258 #if SKYWALK
5259 		VERIFY(!NETNS_TOKEN_VALID(&nstoken));
5260 #endif
5261 	}
5262 
5263 	if (nr && nr->tag > 0) {
5264 		tag = nr->tag;
5265 	}
5266 
5267 	while (r != NULL) {
5268 		r->evaluations++;
5269 		if (pfi_kif_match(r->kif, kif) == r->ifnot) {
5270 			r = r->skip[PF_SKIP_IFP].ptr;
5271 		} else if (r->direction && r->direction != direction) {
5272 			r = r->skip[PF_SKIP_DIR].ptr;
5273 		} else if (r->af && r->af != pd->af) {
5274 			r = r->skip[PF_SKIP_AF].ptr;
5275 		} else if (r->proto && r->proto != pd->proto) {
5276 			r = r->skip[PF_SKIP_PROTO].ptr;
5277 		} else if (PF_MISMATCHAW(&r->src.addr, saddr, pd->af,
5278 		    r->src.neg, kif)) {
5279 			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
5280 		}
5281 		/* tcp/udp only. port_op always 0 in other cases */
5282 		else if (r->proto == pd->proto &&
5283 		    (r->proto == IPPROTO_TCP || r->proto == IPPROTO_UDP) &&
5284 		    r->src.xport.range.op &&
5285 		    !pf_match_port(r->src.xport.range.op,
5286 		    r->src.xport.range.port[0], r->src.xport.range.port[1],
5287 		    th->th_sport)) {
5288 			r = r->skip[PF_SKIP_SRC_PORT].ptr;
5289 		} else if (PF_MISMATCHAW(&r->dst.addr, daddr, pd->af,
5290 		    r->dst.neg, NULL)) {
5291 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
5292 		}
5293 		/* tcp/udp only. port_op always 0 in other cases */
5294 		else if (r->proto == pd->proto &&
5295 		    (r->proto == IPPROTO_TCP || r->proto == IPPROTO_UDP) &&
5296 		    r->dst.xport.range.op &&
5297 		    !pf_match_port(r->dst.xport.range.op,
5298 		    r->dst.xport.range.port[0], r->dst.xport.range.port[1],
5299 		    th->th_dport)) {
5300 			r = r->skip[PF_SKIP_DST_PORT].ptr;
5301 		}
5302 		/* icmp only. type always 0 in other cases */
5303 		else if (r->type && r->type != icmptype + 1) {
5304 			r = TAILQ_NEXT(r, entries);
5305 		}
5306 		/* icmp only. type always 0 in other cases */
5307 		else if (r->code && r->code != icmpcode + 1) {
5308 			r = TAILQ_NEXT(r, entries);
5309 		} else if ((r->rule_flag & PFRULE_TOS) && r->tos &&
5310 		    !(r->tos & pd->tos)) {
5311 			r = TAILQ_NEXT(r, entries);
5312 		} else if ((r->rule_flag & PFRULE_DSCP) && r->tos &&
5313 		    !(r->tos & (pd->tos & DSCP_MASK))) {
5314 			r = TAILQ_NEXT(r, entries);
5315 		} else if ((r->rule_flag & PFRULE_SC) && r->tos &&
5316 		    ((r->tos & SCIDX_MASK) != pd->sc)) {
5317 			r = TAILQ_NEXT(r, entries);
5318 		} else if (r->rule_flag & PFRULE_FRAGMENT) {
5319 			r = TAILQ_NEXT(r, entries);
5320 		} else if (pd->proto == IPPROTO_TCP &&
5321 		    (r->flagset & th->th_flags) != r->flags) {
5322 			r = TAILQ_NEXT(r, entries);
5323 		}
5324 		/* tcp/udp only. uid.op always 0 in other cases */
5325 		else if (r->uid.op && (pd->lookup.done || ((void)(pd->lookup.done =
5326 		    pf_socket_lookup(direction, pd)), 1)) &&
5327 		    !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
5328 		    pd->lookup.uid)) {
5329 			r = TAILQ_NEXT(r, entries);
5330 		}
5331 		/* tcp/udp only. gid.op always 0 in other cases */
5332 		else if (r->gid.op && (pd->lookup.done || ((void)(pd->lookup.done =
5333 		    pf_socket_lookup(direction, pd)), 1)) &&
5334 		    !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
5335 		    pd->lookup.gid)) {
5336 			r = TAILQ_NEXT(r, entries);
5337 		} else if (r->prob && r->prob <= (RandomULong() % (UINT_MAX - 1) + 1)) {
5338 			r = TAILQ_NEXT(r, entries);
5339 		} else if (r->match_tag && !pf_match_tag(r, pd->pf_mtag, &tag)) {
5340 			r = TAILQ_NEXT(r, entries);
5341 		} else if (r->os_fingerprint != PF_OSFP_ANY &&
5342 		    (pd->proto != IPPROTO_TCP || !pf_osfp_match(
5343 			    pf_osfp_fingerprint(pd, pbuf, off, th),
5344 			    r->os_fingerprint))) {
5345 			r = TAILQ_NEXT(r, entries);
5346 		} else {
5347 			if (r->tag) {
5348 				tag = r->tag;
5349 			}
5350 			if (PF_RTABLEID_IS_VALID(r->rtableid)) {
5351 				rtableid = r->rtableid;
5352 			}
5353 			if (r->anchor == NULL) {
5354 				match = 1;
5355 				*rm = r;
5356 				*am = a;
5357 				*rsm = ruleset;
5358 				if ((*rm)->quick) {
5359 					break;
5360 				}
5361 				r = TAILQ_NEXT(r, entries);
5362 			} else {
5363 				pf_step_into_anchor(&asd, &ruleset,
5364 				    PF_RULESET_FILTER, &r, &a, &match);
5365 			}
5366 		}
5367 		if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
5368 		    PF_RULESET_FILTER, &r, &a, &match)) {
5369 			break;
5370 		}
5371 	}
5372 	r = *rm;
5373 	a = *am;
5374 	ruleset = *rsm;
5375 
5376 	REASON_SET(&reason, PFRES_MATCH);
5377 
5378 	if (r->log || (nr != NULL && nr->log)) {
5379 		if (rewrite > 0) {
5380 			if (rewrite < off + pd->hdrlen) {
5381 				rewrite = off + pd->hdrlen;
5382 			}
5383 
5384 			if (pf_lazy_makewritable(pd, pbuf, rewrite) == NULL) {
5385 				REASON_SET(&reason, PFRES_MEMORY);
5386 #if SKYWALK
5387 				netns_release(&nstoken);
5388 #endif
5389 				return PF_DROP;
5390 			}
5391 			pbuf_copy_back(pbuf, off, pd->hdrlen, pf_pd_get_hdr_ptr_any(pd), pd->hdrlen);
5392 		}
5393 		PFLOG_PACKET(kif, h, pbuf, pd->af, direction, reason,
5394 		    r->log ? r : nr, a, ruleset, pd);
5395 	}
5396 
5397 	if ((r->action == PF_DROP) &&
5398 	    ((r->rule_flag & PFRULE_RETURNRST) ||
5399 	    (r->rule_flag & PFRULE_RETURNICMP) ||
5400 	    (r->rule_flag & PFRULE_RETURN))) {
5401 		/* undo NAT changes, if they have taken place */
5402 		/* XXX For NAT64 we are not reverting the changes */
5403 		if (nr != NULL && nr->action != PF_NAT64) {
5404 			if (direction == PF_OUT) {
5405 				pd->af = af;
5406 				switch (pd->proto) {
5407 				case IPPROTO_TCP:
5408 					pf_change_ap(direction, pd->mp, saddr,
5409 					    &th->th_sport, pd->ip_sum,
5410 					    &th->th_sum, &pd->baddr,
5411 					    bxport.port, 0, af, pd->af, 1);
5412 					sxport.port = th->th_sport;
5413 					rewrite++;
5414 					break;
5415 				case IPPROTO_UDP:
5416 					pf_change_ap(direction, pd->mp, saddr,
5417 					    &pf_pd_get_hdr_udp(pd)->uh_sport, pd->ip_sum,
5418 					    &pf_pd_get_hdr_udp(pd)->uh_sum, &pd->baddr,
5419 					    bxport.port, 1, af, pd->af, 1);
5420 					sxport.port = pf_pd_get_hdr_udp(pd)->uh_sport;
5421 					rewrite++;
5422 					break;
5423 				case IPPROTO_ICMP:
5424 				case IPPROTO_ICMPV6:
5425 					/* nothing! */
5426 					break;
5427 				case IPPROTO_GRE:
5428 					PF_ACPY(&pd->baddr, saddr, af);
5429 					++rewrite;
5430 					switch (af) {
5431 #if INET
5432 					case AF_INET:
5433 						pf_change_a(&saddr->v4addr.s_addr,
5434 						    pd->ip_sum,
5435 						    pd->baddr.v4addr.s_addr, 0);
5436 						break;
5437 #endif /* INET */
5438 					case AF_INET6:
5439 						PF_ACPY(saddr, &pd->baddr,
5440 						    AF_INET6);
5441 						break;
5442 					}
5443 					break;
5444 				case IPPROTO_ESP:
5445 					PF_ACPY(&pd->baddr, saddr, af);
5446 					switch (af) {
5447 #if INET
5448 					case AF_INET:
5449 						pf_change_a(&saddr->v4addr.s_addr,
5450 						    pd->ip_sum,
5451 						    pd->baddr.v4addr.s_addr, 0);
5452 						break;
5453 #endif /* INET */
5454 					case AF_INET6:
5455 						PF_ACPY(saddr, &pd->baddr,
5456 						    AF_INET6);
5457 						break;
5458 					}
5459 					break;
5460 				default:
5461 					switch (af) {
5462 					case AF_INET:
5463 						pf_change_a(&saddr->v4addr.s_addr,
5464 						    pd->ip_sum,
5465 						    pd->baddr.v4addr.s_addr, 0);
5466 						break;
5467 					case AF_INET6:
5468 						PF_ACPY(saddr, &pd->baddr, af);
5469 						break;
5470 					}
5471 				}
5472 			} else {
5473 				switch (pd->proto) {
5474 				case IPPROTO_TCP:
5475 					pf_change_ap(direction, pd->mp, daddr,
5476 					    &th->th_dport, pd->ip_sum,
5477 					    &th->th_sum, &pd->bdaddr,
5478 					    bdxport.port, 0, af, pd->af, 1);
5479 					dxport.port = th->th_dport;
5480 					rewrite++;
5481 					break;
5482 				case IPPROTO_UDP:
5483 					pf_change_ap(direction, pd->mp, daddr,
5484 					    &pf_pd_get_hdr_udp(pd)->uh_dport, pd->ip_sum,
5485 					    &pf_pd_get_hdr_udp(pd)->uh_sum, &pd->bdaddr,
5486 					    bdxport.port, 1, af, pd->af, 1);
5487 					dxport.port = pf_pd_get_hdr_udp(pd)->uh_dport;
5488 					rewrite++;
5489 					break;
5490 				case IPPROTO_ICMP:
5491 				case IPPROTO_ICMPV6:
5492 					/* nothing! */
5493 					break;
5494 				case IPPROTO_GRE:
5495 					if (pd->proto_variant ==
5496 					    PF_GRE_PPTP_VARIANT) {
5497 						grev1->call_id =
5498 						    bdxport.call_id;
5499 					}
5500 					++rewrite;
5501 					switch (af) {
5502 #if INET
5503 					case AF_INET:
5504 						pf_change_a(&daddr->v4addr.s_addr,
5505 						    pd->ip_sum,
5506 						    pd->bdaddr.v4addr.s_addr, 0);
5507 						break;
5508 #endif /* INET */
5509 					case AF_INET6:
5510 						PF_ACPY(daddr, &pd->bdaddr,
5511 						    AF_INET6);
5512 						break;
5513 					}
5514 					break;
5515 				case IPPROTO_ESP:
5516 					switch (af) {
5517 #if INET
5518 					case AF_INET:
5519 						pf_change_a(&daddr->v4addr.s_addr,
5520 						    pd->ip_sum,
5521 						    pd->bdaddr.v4addr.s_addr, 0);
5522 						break;
5523 #endif /* INET */
5524 					case AF_INET6:
5525 						PF_ACPY(daddr, &pd->bdaddr,
5526 						    AF_INET6);
5527 						break;
5528 					}
5529 					break;
5530 				default:
5531 					switch (af) {
5532 					case AF_INET:
5533 						pf_change_a(&daddr->v4addr.s_addr,
5534 						    pd->ip_sum,
5535 						    pd->bdaddr.v4addr.s_addr, 0);
5536 						break;
5537 					case AF_INET6:
5538 						PF_ACPY(daddr, &pd->bdaddr, af);
5539 						break;
5540 					}
5541 				}
5542 			}
5543 		}
5544 		if (pd->proto == IPPROTO_TCP &&
5545 		    ((r->rule_flag & PFRULE_RETURNRST) ||
5546 		    (r->rule_flag & PFRULE_RETURN)) &&
5547 		    !(th->th_flags & TH_RST)) {
5548 			u_int32_t        ack = ntohl(th->th_seq) + pd->p_len;
5549 			int              len = 0;
5550 			struct ip       *__single h4;
5551 			struct ip6_hdr  *__single h6;
5552 
5553 			switch (pd->af) {
5554 			case AF_INET:
5555 				h4 = pbuf->pb_data;
5556 				len = ntohs(h4->ip_len) - off;
5557 				break;
5558 			case AF_INET6:
5559 				h6 = pbuf->pb_data;
5560 				len = ntohs(h6->ip6_plen) -
5561 				    (off - sizeof(*h6));
5562 				break;
5563 			}
5564 
5565 			if (pf_check_proto_cksum(pbuf, off, len, IPPROTO_TCP,
5566 			    pd->af)) {
5567 				REASON_SET(&reason, PFRES_PROTCKSUM);
5568 			} else {
5569 				if (th->th_flags & TH_SYN) {
5570 					ack++;
5571 				}
5572 				if (th->th_flags & TH_FIN) {
5573 					ack++;
5574 				}
5575 				pf_send_tcp(r, pd->af, pd->dst,
5576 				    pd->src, th->th_dport, th->th_sport,
5577 				    ntohl(th->th_ack), ack, TH_RST | TH_ACK, 0, 0,
5578 				    r->return_ttl, 1, 0, pd->eh, kif->pfik_ifp);
5579 			}
5580 		} else if (pd->proto != IPPROTO_ICMP && pd->af == AF_INET &&
5581 		    pd->proto != IPPROTO_ESP && pd->proto != IPPROTO_AH &&
5582 		    r->return_icmp) {
5583 			pf_send_icmp(pbuf, r->return_icmp >> 8,
5584 			    r->return_icmp & 255, pd->af, r);
5585 		} else if (pd->proto != IPPROTO_ICMPV6 && af == AF_INET6 &&
5586 		    pd->proto != IPPROTO_ESP && pd->proto != IPPROTO_AH &&
5587 		    r->return_icmp6) {
5588 			pf_send_icmp(pbuf, r->return_icmp6 >> 8,
5589 			    r->return_icmp6 & 255, pd->af, r);
5590 		}
5591 	}
5592 
5593 	if (r->action == PF_DROP) {
5594 #if SKYWALK
5595 		netns_release(&nstoken);
5596 #endif
5597 		return PF_DROP;
5598 	}
5599 
5600 	/* prepare state key, for flowhash and/or the state (if created) */
5601 	bzero(&psk, sizeof(psk));
5602 	psk.proto = pd->proto;
5603 	psk.direction = direction;
5604 	if (pd->proto == IPPROTO_UDP) {
5605 		if (ntohs(pf_pd_get_hdr_udp(pd)->uh_sport) == PF_IKE_PORT &&
5606 		    ntohs(pf_pd_get_hdr_udp(pd)->uh_dport) == PF_IKE_PORT) {
5607 			psk.proto_variant = PF_EXTFILTER_APD;
5608 		} else {
5609 			psk.proto_variant = nr ? nr->extfilter : r->extfilter;
5610 			if (psk.proto_variant < PF_EXTFILTER_APD) {
5611 				psk.proto_variant = PF_EXTFILTER_APD;
5612 			}
5613 		}
5614 	} else if (pd->proto == IPPROTO_GRE) {
5615 		psk.proto_variant = pd->proto_variant;
5616 	}
5617 	if (direction == PF_OUT) {
5618 		psk.af_gwy = af;
5619 		PF_ACPY(&psk.gwy.addr, saddr, af);
5620 		PF_ACPY(&psk.ext_gwy.addr, daddr, af);
5621 		switch (pd->proto) {
5622 		case IPPROTO_ESP:
5623 			psk.gwy.xport.spi = 0;
5624 			psk.ext_gwy.xport.spi = pf_pd_get_hdr_esp(pd)->spi;
5625 			break;
5626 		case IPPROTO_ICMP:
5627 		case IPPROTO_ICMPV6:
5628 			/*
5629 			 * NAT64 requires protocol translation  between ICMPv4
5630 			 * and ICMPv6. TCP and UDP do not require protocol
5631 			 * translation. To avoid adding complexity just to
5632 			 * handle ICMP(v4addr/v6addr), we always lookup  for
5633 			 * proto = IPPROTO_ICMP on both LAN and WAN side
5634 			 */
5635 			psk.proto = IPPROTO_ICMP;
5636 			psk.gwy.xport.port = nxport.port;
5637 			psk.ext_gwy.xport.spi = 0;
5638 			break;
5639 		default:
5640 			psk.gwy.xport = sxport;
5641 			psk.ext_gwy.xport = dxport;
5642 			break;
5643 		}
5644 		psk.af_lan = af;
5645 		if (nr != NULL) {
5646 			PF_ACPY(&psk.lan.addr, &pd->baddr, af);
5647 			psk.lan.xport = bxport;
5648 			PF_ACPY(&psk.ext_lan.addr, &pd->bdaddr, af);
5649 			psk.ext_lan.xport = bdxport;
5650 		} else {
5651 			PF_ACPY(&psk.lan.addr, &psk.gwy.addr, af);
5652 			psk.lan.xport = psk.gwy.xport;
5653 			PF_ACPY(&psk.ext_lan.addr, &psk.ext_gwy.addr, af);
5654 			psk.ext_lan.xport = psk.ext_gwy.xport;
5655 		}
5656 	} else {
5657 		psk.af_lan = af;
5658 		if (nr && nr->action == PF_NAT64) {
5659 			PF_ACPY(&psk.lan.addr, &pd->baddr, af);
5660 			PF_ACPY(&psk.ext_lan.addr, &pd->bdaddr, af);
5661 		} else {
5662 			PF_ACPY(&psk.lan.addr, daddr, af);
5663 			PF_ACPY(&psk.ext_lan.addr, saddr, af);
5664 		}
5665 		switch (pd->proto) {
5666 		case IPPROTO_ICMP:
5667 		case IPPROTO_ICMPV6:
5668 			/*
5669 			 * NAT64 requires protocol translation  between ICMPv4
5670 			 * and ICMPv6. TCP and UDP do not require protocol
5671 			 * translation. To avoid adding complexity just to
5672 			 * handle ICMP(v4addr/v6addr), we always lookup  for
5673 			 * proto = IPPROTO_ICMP on both LAN and WAN side
5674 			 */
5675 			psk.proto = IPPROTO_ICMP;
5676 			if (nr && nr->action == PF_NAT64) {
5677 				psk.lan.xport = bxport;
5678 				psk.ext_lan.xport = bxport;
5679 			} else {
5680 				psk.lan.xport = nxport;
5681 				psk.ext_lan.xport.spi = 0;
5682 			}
5683 			break;
5684 		case IPPROTO_ESP:
5685 			psk.ext_lan.xport.spi = 0;
5686 			psk.lan.xport.spi = pf_pd_get_hdr_esp(pd)->spi;
5687 			break;
5688 		default:
5689 			if (nr != NULL) {
5690 				if (nr->action == PF_NAT64) {
5691 					psk.lan.xport = bxport;
5692 					psk.ext_lan.xport = bdxport;
5693 				} else {
5694 					psk.lan.xport = dxport;
5695 					psk.ext_lan.xport = sxport;
5696 				}
5697 			} else {
5698 				psk.lan.xport = dxport;
5699 				psk.ext_lan.xport = sxport;
5700 			}
5701 			break;
5702 		}
5703 		psk.af_gwy = pd->naf;
5704 		if (nr != NULL) {
5705 			if (nr->action == PF_NAT64) {
5706 				PF_ACPY(&psk.gwy.addr, &pd->naddr, pd->naf);
5707 				PF_ACPY(&psk.ext_gwy.addr, &pd->ndaddr,
5708 				    pd->naf);
5709 				if ((pd->proto == IPPROTO_ICMPV6) ||
5710 				    (pd->proto == IPPROTO_ICMP)) {
5711 					psk.gwy.xport = nxport;
5712 					psk.ext_gwy.xport = nxport;
5713 				} else {
5714 					psk.gwy.xport = sxport;
5715 					psk.ext_gwy.xport = dxport;
5716 				}
5717 			} else {
5718 				PF_ACPY(&psk.gwy.addr, &pd->bdaddr, af);
5719 				psk.gwy.xport = bdxport;
5720 				PF_ACPY(&psk.ext_gwy.addr, saddr, af);
5721 				psk.ext_gwy.xport = sxport;
5722 			}
5723 		} else {
5724 			PF_ACPY(&psk.gwy.addr, &psk.lan.addr, af);
5725 			psk.gwy.xport = psk.lan.xport;
5726 			PF_ACPY(&psk.ext_gwy.addr, &psk.ext_lan.addr, af);
5727 			psk.ext_gwy.xport = psk.ext_lan.xport;
5728 		}
5729 	}
5730 	if (pd->pktflags & PKTF_FLOW_ID) {
5731 		/* flow hash was already computed outside of PF */
5732 		psk.flowsrc = pd->flowsrc;
5733 		psk.flowhash = pd->flowhash;
5734 	} else {
5735 		/*
5736 		 * Allocation of flow identifier is deferred until a PF state
5737 		 * creation is needed for this flow.
5738 		 */
5739 		pd->pktflags &= ~PKTF_FLOW_ADV;
5740 		pd->flowhash = 0;
5741 	}
5742 
5743 	if (__improbable(pf_tag_packet(pbuf, pd->pf_mtag, tag, rtableid, pd))) {
5744 		REASON_SET(&reason, PFRES_MEMORY);
5745 #if SKYWALK
5746 		netns_release(&nstoken);
5747 #endif
5748 		return PF_DROP;
5749 	}
5750 
5751 	if (!state_icmp && (r->keep_state || nr != NULL ||
5752 	    (pd->flags & PFDESC_TCP_NORM))) {
5753 		/* create new state */
5754 		struct pf_state *__single s = NULL;
5755 		struct pf_state_key *__single sk = NULL;
5756 		struct pf_src_node *__single sn = NULL;
5757 		struct pf_ike_hdr ike;
5758 
5759 		if (pd->proto == IPPROTO_UDP) {
5760 			size_t plen = pbuf->pb_packet_len - off - sizeof(*uh);
5761 
5762 			if (ntohs(uh->uh_sport) == PF_IKE_PORT &&
5763 			    ntohs(uh->uh_dport) == PF_IKE_PORT &&
5764 			    plen >= PF_IKE_PACKET_MINSIZE) {
5765 				if (plen > PF_IKE_PACKET_MINSIZE) {
5766 					plen = PF_IKE_PACKET_MINSIZE;
5767 				}
5768 				pbuf_copy_data(pbuf, off + sizeof(*uh), plen,
5769 				    &ike, sizeof(ike));
5770 			}
5771 		}
5772 
5773 		if (nr != NULL && pd->proto == IPPROTO_ESP &&
5774 		    direction == PF_OUT) {
5775 			struct pf_state_key_cmp sk0;
5776 			struct pf_state *s0;
5777 
5778 			/*
5779 			 * <[email protected]>
5780 			 * This squelches state creation if the external
5781 			 * address matches an existing incomplete state with a
5782 			 * different internal address.  Only one 'blocking'
5783 			 * partial state is allowed for each external address.
5784 			 */
5785 #if SKYWALK
5786 			/*
5787 			 * XXXSCW:
5788 			 *
5789 			 * It's not clear how this impacts netns. The original
5790 			 * state will hold the port reservation token but what
5791 			 * happens to other "Cone NAT" states when the first is
5792 			 * torn down?
5793 			 */
5794 #endif
5795 			memset(&sk0, 0, sizeof(sk0));
5796 			sk0.af_gwy = pd->af;
5797 			sk0.proto = IPPROTO_ESP;
5798 			PF_ACPY(&sk0.gwy.addr, saddr, sk0.af_gwy);
5799 			PF_ACPY(&sk0.ext_gwy.addr, daddr, sk0.af_gwy);
5800 			s0 = pf_find_state(kif, &sk0, PF_IN);
5801 
5802 			if (s0 && PF_ANEQ(&s0->state_key->lan.addr,
5803 			    pd->src, pd->af)) {
5804 				nsn = 0;
5805 				goto cleanup;
5806 			}
5807 		}
5808 
5809 		/* check maximums */
5810 		if (r->max_states && (r->states >= r->max_states)) {
5811 			pf_status.lcounters[LCNT_STATES]++;
5812 			REASON_SET(&reason, PFRES_MAXSTATES);
5813 			goto cleanup;
5814 		}
5815 		/* src node for filter rule */
5816 		if ((r->rule_flag & PFRULE_SRCTRACK ||
5817 		    r->rpool.opts & PF_POOL_STICKYADDR) &&
5818 		    pf_insert_src_node(&sn, r, saddr, af) != 0) {
5819 			REASON_SET(&reason, PFRES_SRCLIMIT);
5820 			goto cleanup;
5821 		}
5822 		/* src node for translation rule */
5823 		if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
5824 		    ((direction == PF_OUT &&
5825 		    nr->action != PF_RDR &&
5826 		    pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
5827 		    (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) {
5828 			REASON_SET(&reason, PFRES_SRCLIMIT);
5829 			goto cleanup;
5830 		}
5831 		s = pool_get(&pf_state_pl, PR_WAITOK);
5832 		if (s == NULL) {
5833 			REASON_SET(&reason, PFRES_MEMORY);
5834 cleanup:
5835 			if (sn != NULL && sn->states == 0 && sn->expire == 0) {
5836 				RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
5837 				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
5838 				pf_status.src_nodes--;
5839 				pool_put(&pf_src_tree_pl, sn);
5840 			}
5841 			if (nsn != sn && nsn != NULL && nsn->states == 0 &&
5842 			    nsn->expire == 0) {
5843 				RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
5844 				pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
5845 				pf_status.src_nodes--;
5846 				pool_put(&pf_src_tree_pl, nsn);
5847 			}
5848 			if (s != NULL) {
5849 				pf_detach_state(s, 0);
5850 			} else if (sk != NULL) {
5851 				if (sk->app_state) {
5852 					pool_put(&pf_app_state_pl,
5853 					    sk->app_state);
5854 				}
5855 				pf_state_key_release_flowid(sk);
5856 				pool_put(&pf_state_key_pl, sk);
5857 			}
5858 #if SKYWALK
5859 			netns_release(&nstoken);
5860 #endif
5861 			return PF_DROP;
5862 		}
5863 		bzero(s, sizeof(*s));
5864 		TAILQ_INIT(&s->unlink_hooks);
5865 		s->rule.ptr = r;
5866 		s->nat_rule.ptr = nr;
5867 		s->anchor.ptr = a;
5868 		STATE_INC_COUNTERS(s);
5869 		s->allow_opts = r->allow_opts;
5870 		s->log = r->log & PF_LOG_ALL;
5871 		if (nr != NULL) {
5872 			s->log |= nr->log & PF_LOG_ALL;
5873 		}
5874 		switch (pd->proto) {
5875 		case IPPROTO_TCP:
5876 			s->src.seqlo = ntohl(th->th_seq);
5877 			s->src.seqhi = s->src.seqlo + pd->p_len + 1;
5878 			if ((th->th_flags & (TH_SYN | TH_ACK)) ==
5879 			    TH_SYN && r->keep_state == PF_STATE_MODULATE) {
5880 				/* Generate sequence number modulator */
5881 				if ((s->src.seqdiff = pf_tcp_iss(pd) -
5882 				    s->src.seqlo) == 0) {
5883 					s->src.seqdiff = 1;
5884 				}
5885 				pf_change_a(&th->th_seq, &th->th_sum,
5886 				    htonl(s->src.seqlo + s->src.seqdiff), 0);
5887 				rewrite = off + sizeof(*th);
5888 			} else {
5889 				s->src.seqdiff = 0;
5890 			}
5891 			if (th->th_flags & TH_SYN) {
5892 				s->src.seqhi++;
5893 				s->src.wscale = pf_get_wscale(pbuf, off,
5894 				    th->th_off, af);
5895 			}
5896 			s->src.max_win = MAX(ntohs(th->th_win), 1);
5897 			if (s->src.wscale & PF_WSCALE_MASK) {
5898 				/* Remove scale factor from initial window */
5899 				int win = s->src.max_win;
5900 				win += 1 << (s->src.wscale & PF_WSCALE_MASK);
5901 				s->src.max_win = (win - 1) >>
5902 				    (s->src.wscale & PF_WSCALE_MASK);
5903 			}
5904 			if (th->th_flags & TH_FIN) {
5905 				s->src.seqhi++;
5906 			}
5907 			s->dst.seqhi = 1;
5908 			s->dst.max_win = 1;
5909 			s->src.state = TCPS_SYN_SENT;
5910 			s->dst.state = TCPS_CLOSED;
5911 			s->timeout = PFTM_TCP_FIRST_PACKET;
5912 			break;
5913 		case IPPROTO_UDP:
5914 			s->src.state = PFUDPS_SINGLE;
5915 			s->dst.state = PFUDPS_NO_TRAFFIC;
5916 			s->timeout = PFTM_UDP_FIRST_PACKET;
5917 			break;
5918 		case IPPROTO_ICMP:
5919 		case IPPROTO_ICMPV6:
5920 			s->timeout = PFTM_ICMP_FIRST_PACKET;
5921 			break;
5922 		case IPPROTO_GRE:
5923 			s->src.state = PFGRE1S_INITIATING;
5924 			s->dst.state = PFGRE1S_NO_TRAFFIC;
5925 			s->timeout = PFTM_GREv1_INITIATING;
5926 			break;
5927 		case IPPROTO_ESP:
5928 			s->src.state = PFESPS_INITIATING;
5929 			s->dst.state = PFESPS_NO_TRAFFIC;
5930 			s->timeout = PFTM_ESP_FIRST_PACKET;
5931 			break;
5932 		default:
5933 			s->src.state = PFOTHERS_SINGLE;
5934 			s->dst.state = PFOTHERS_NO_TRAFFIC;
5935 			s->timeout = PFTM_OTHER_FIRST_PACKET;
5936 		}
5937 
5938 		s->creation = pf_time_second();
5939 		s->expire = pf_time_second();
5940 
5941 		if (sn != NULL) {
5942 			s->src_node = sn;
5943 			s->src_node->states++;
5944 			VERIFY(s->src_node->states != 0);
5945 		}
5946 		if (nsn != NULL) {
5947 			PF_ACPY(&nsn->raddr, &pd->naddr, af);
5948 			s->nat_src_node = nsn;
5949 			s->nat_src_node->states++;
5950 			VERIFY(s->nat_src_node->states != 0);
5951 		}
5952 		if (pd->proto == IPPROTO_TCP) {
5953 			if ((pd->flags & PFDESC_TCP_NORM) &&
5954 			    pf_normalize_tcp_init(pbuf, off, pd, th, &s->src,
5955 			    &s->dst)) {
5956 				REASON_SET(&reason, PFRES_MEMORY);
5957 				pf_src_tree_remove_state(s);
5958 				STATE_DEC_COUNTERS(s);
5959 #if SKYWALK
5960 				netns_release(&nstoken);
5961 #endif
5962 				pool_put(&pf_state_pl, s);
5963 				return PF_DROP;
5964 			}
5965 			if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub &&
5966 			    pf_normalize_tcp_stateful(pbuf, off, pd, &reason,
5967 			    th, s, &s->src, &s->dst, &rewrite)) {
5968 				/* This really shouldn't happen!!! */
5969 				DPFPRINTF(PF_DEBUG_URGENT,
5970 				    ("pf_normalize_tcp_stateful failed on "
5971 				    "first pkt"));
5972 #if SKYWALK
5973 				netns_release(&nstoken);
5974 #endif
5975 				pf_normalize_tcp_cleanup(s);
5976 				pf_src_tree_remove_state(s);
5977 				STATE_DEC_COUNTERS(s);
5978 				pool_put(&pf_state_pl, s);
5979 				return PF_DROP;
5980 			}
5981 		}
5982 
5983 		/* allocate state key and import values from psk */
5984 		if (__improbable((sk = pf_alloc_state_key(s, &psk)) == NULL)) {
5985 			REASON_SET(&reason, PFRES_MEMORY);
5986 			/*
5987 			 * XXXSCW: This will leak the freshly-allocated
5988 			 * state structure 's'. Although it should
5989 			 * eventually be aged-out and removed.
5990 			 */
5991 			goto cleanup;
5992 		}
5993 
5994 		if (pd->flowhash == 0) {
5995 			ASSERT(sk->flowhash != 0);
5996 			ASSERT(sk->flowsrc != 0);
5997 			pd->flowsrc = sk->flowsrc;
5998 			pd->flowhash = sk->flowhash;
5999 			pd->pktflags |= PKTF_FLOW_ID;
6000 			pd->pktflags &= ~PKTF_FLOW_ADV;
6001 			if (__improbable(pf_tag_packet(pbuf, pd->pf_mtag,
6002 			    tag, rtableid, pd))) {
6003 				/*
6004 				 * this shouldn't fail as the packet tag has
6005 				 * already been allocated.
6006 				 */
6007 				panic_plain("pf_tag_packet failed");
6008 			}
6009 		}
6010 
6011 		pf_set_rt_ifp(s, saddr, af);    /* needs s->state_key set */
6012 
6013 		pbuf = pd->mp; // XXXSCW: Why?
6014 
6015 		if (sk->app_state == 0) {
6016 			switch (pd->proto) {
6017 			case IPPROTO_TCP: {
6018 				u_int16_t dport = (direction == PF_OUT) ?
6019 				    sk->ext_gwy.xport.port : sk->gwy.xport.port;
6020 
6021 				if (nr != NULL &&
6022 				    ntohs(dport) == PF_PPTP_PORT) {
6023 					struct pf_app_state *__single as;
6024 
6025 					as = pool_get(&pf_app_state_pl,
6026 					    PR_WAITOK);
6027 					if (!as) {
6028 						REASON_SET(&reason,
6029 						    PFRES_MEMORY);
6030 						goto cleanup;
6031 					}
6032 
6033 					bzero(as, sizeof(*as));
6034 					as->handler = pf_pptp_handler;
6035 					as->compare_lan_ext = 0;
6036 					as->compare_ext_gwy = 0;
6037 					as->u.pptp.grev1_state = 0;
6038 					sk->app_state = as;
6039 					(void) hook_establish(&s->unlink_hooks,
6040 					    0, (hook_fn_t) pf_pptp_unlink, s);
6041 				}
6042 				break;
6043 			}
6044 
6045 			case IPPROTO_UDP: {
6046 				if (nr != NULL &&
6047 				    ntohs(uh->uh_sport) == PF_IKE_PORT &&
6048 				    ntohs(uh->uh_dport) == PF_IKE_PORT) {
6049 					struct pf_app_state *__single as;
6050 
6051 					as = pool_get(&pf_app_state_pl,
6052 					    PR_WAITOK);
6053 					if (!as) {
6054 						REASON_SET(&reason,
6055 						    PFRES_MEMORY);
6056 						goto cleanup;
6057 					}
6058 
6059 					bzero(as, sizeof(*as));
6060 					as->compare_lan_ext = pf_ike_compare;
6061 					as->compare_ext_gwy = pf_ike_compare;
6062 					as->u.ike.cookie = ike.initiator_cookie;
6063 					sk->app_state = as;
6064 				}
6065 				break;
6066 			}
6067 
6068 			default:
6069 				break;
6070 			}
6071 		}
6072 
6073 		if (__improbable(pf_insert_state(BOUND_IFACE(r, kif), s))) {
6074 			if (pd->proto == IPPROTO_TCP) {
6075 				pf_normalize_tcp_cleanup(s);
6076 			}
6077 			REASON_SET(&reason, PFRES_STATEINS);
6078 			pf_src_tree_remove_state(s);
6079 			STATE_DEC_COUNTERS(s);
6080 #if SKYWALK
6081 			netns_release(&nstoken);
6082 #endif
6083 			pool_put(&pf_state_pl, s);
6084 			return PF_DROP;
6085 		} else {
6086 #if SKYWALK
6087 			s->nstoken = nstoken;
6088 			nstoken = NULL;
6089 #endif
6090 			*sm = s;
6091 		}
6092 		if (tag > 0) {
6093 			pf_tag_ref(tag);
6094 			s->tag = tag;
6095 		}
6096 		if (pd->proto == IPPROTO_TCP &&
6097 		    (th->th_flags & (TH_SYN | TH_ACK)) == TH_SYN &&
6098 		    r->keep_state == PF_STATE_SYNPROXY) {
6099 			int ua = (sk->af_lan == sk->af_gwy) ? 1 : 0;
6100 			s->src.state = PF_TCPS_PROXY_SRC;
6101 			if (nr != NULL) {
6102 				if (direction == PF_OUT) {
6103 					pf_change_ap(direction, pd->mp, saddr,
6104 					    &th->th_sport, pd->ip_sum,
6105 					    &th->th_sum, &pd->baddr,
6106 					    bxport.port, 0, af, pd->af, ua);
6107 					sxport.port = th->th_sport;
6108 				} else {
6109 					pf_change_ap(direction, pd->mp, daddr,
6110 					    &th->th_dport, pd->ip_sum,
6111 					    &th->th_sum, &pd->baddr,
6112 					    bxport.port, 0, af, pd->af, ua);
6113 					sxport.port = th->th_dport;
6114 				}
6115 			}
6116 			s->src.seqhi = htonl(random());
6117 			/* Find mss option */
6118 			mss = pf_get_mss(pbuf, off, th->th_off, af);
6119 			mss = pf_calc_mss(saddr, af, mss);
6120 			mss = pf_calc_mss(daddr, af, mss);
6121 			s->src.mss = mss;
6122 			pf_send_tcp(r, af, daddr, saddr, th->th_dport,
6123 			    th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
6124 			    TH_SYN | TH_ACK, 0, s->src.mss, 0, 1, 0, NULL, NULL);
6125 			REASON_SET(&reason, PFRES_SYNPROXY);
6126 			return PF_SYNPROXY_DROP;
6127 		}
6128 
6129 		if (sk->app_state && sk->app_state->handler) {
6130 			int offx = off;
6131 
6132 			switch (pd->proto) {
6133 			case IPPROTO_TCP:
6134 				offx += th->th_off << 2;
6135 				break;
6136 			case IPPROTO_UDP:
6137 				offx += pf_pd_get_hdr_udp(pd)->uh_ulen << 2;
6138 				break;
6139 			default:
6140 				/* ALG handlers only apply to TCP and UDP rules */
6141 				break;
6142 			}
6143 
6144 			if (offx > off) {
6145 				sk->app_state->handler(s, direction, offx,
6146 				    pd, kif);
6147 				if (pd->lmw < 0) {
6148 					REASON_SET(&reason, PFRES_MEMORY);
6149 					return PF_DROP;
6150 				}
6151 				pbuf = pd->mp;  // XXXSCW: Why?
6152 			}
6153 		}
6154 	}
6155 #if SKYWALK
6156 	else {
6157 		netns_release(&nstoken);
6158 	}
6159 #endif
6160 
6161 	/* copy back packet headers if we performed NAT operations */
6162 	if (rewrite) {
6163 		if (rewrite < off + pd->hdrlen) {
6164 			rewrite = off + pd->hdrlen;
6165 		}
6166 
6167 		if (pf_lazy_makewritable(pd, pd->mp, rewrite) == NULL) {
6168 			REASON_SET(&reason, PFRES_MEMORY);
6169 			return PF_DROP;
6170 		}
6171 
6172 		pbuf_copy_back(pbuf, off, hdrlen, pf_pd_get_hdr_ptr_any(pd), pd->hdrlen);
6173 		if (af == AF_INET6 && pd->naf == AF_INET) {
6174 			return pf_nat64_ipv6(pbuf, off, pd);
6175 		} else if (af == AF_INET && pd->naf == AF_INET6) {
6176 			return pf_nat64_ipv4(pbuf, off, pd);
6177 		}
6178 	}
6179 
6180 	return PF_PASS;
6181 }
6182 
6183 boolean_t is_nlc_enabled_glb = FALSE;
6184 
6185 static inline boolean_t
pf_is_dummynet_enabled(void)6186 pf_is_dummynet_enabled(void)
6187 {
6188 #if DUMMYNET
6189 	if (__probable(!PF_IS_ENABLED)) {
6190 		return FALSE;
6191 	}
6192 
6193 	if (__probable(!DUMMYNET_LOADED)) {
6194 		return FALSE;
6195 	}
6196 
6197 	if (__probable(TAILQ_EMPTY(pf_main_ruleset.
6198 	    rules[PF_RULESET_DUMMYNET].active.ptr))) {
6199 		return FALSE;
6200 	}
6201 
6202 	return TRUE;
6203 #else
6204 	return FALSE;
6205 #endif /* DUMMYNET */
6206 }
6207 
6208 #if DUMMYNET
6209 /*
6210  * When pf_test_dummynet() returns PF_PASS, the rule matching parameter "rm"
6211  * remains unchanged, meaning the packet did not match a dummynet rule.
6212  * when the packet does match a dummynet rule, pf_test_dummynet() returns
6213  * PF_PASS and zero out the mbuf rule as the packet is effectively siphoned
6214  * out by dummynet.
6215  */
6216 static __attribute__((noinline)) int
pf_test_dummynet(struct pf_rule ** rm,int direction,struct pfi_kif * kif,pbuf_t ** pbuf0,struct pf_pdesc * pd,struct ip_fw_args * fwa)6217 pf_test_dummynet(struct pf_rule **rm, int direction, struct pfi_kif *kif,
6218     pbuf_t **pbuf0, struct pf_pdesc *pd, struct ip_fw_args *fwa)
6219 {
6220 	pbuf_t                  *__single pbuf = *pbuf0;
6221 	struct pf_rule          *__single am = NULL;
6222 	struct pf_ruleset       *__single rsm = NULL;
6223 	struct pf_addr          *__single saddr = pd->src, *__single daddr = pd->dst;
6224 	sa_family_t              af = pd->af;
6225 	struct pf_rule          *__single r, *__single a = NULL;
6226 	struct pf_ruleset       *__single ruleset = NULL;
6227 	struct tcphdr           *__single th = pf_pd_get_hdr_tcp(pd);
6228 	u_short                  reason;
6229 	int                      hdrlen = 0;
6230 	int                      tag = -1;
6231 	unsigned int             rtableid = IFSCOPE_NONE;
6232 	int                      asd = 0;
6233 	int                      match = 0;
6234 	u_int8_t                 icmptype = 0, icmpcode = 0;
6235 	struct ip_fw_args       dnflow;
6236 	struct pf_rule          *__single prev_matching_rule = fwa ? fwa->fwa_pf_rule : NULL;
6237 	int                     found_prev_rule = (prev_matching_rule) ? 0 : 1;
6238 
6239 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
6240 
6241 	if (!pf_is_dummynet_enabled()) {
6242 		return PF_PASS;
6243 	}
6244 
6245 	if (kif->pfik_ifp->if_xflags & IFXF_NO_TRAFFIC_SHAPING) {
6246 		return PF_PASS;
6247 	}
6248 
6249 	bzero(&dnflow, sizeof(dnflow));
6250 
6251 	hdrlen = 0;
6252 
6253 	/* Fragments don't gave protocol headers */
6254 	if (!(pd->flags & PFDESC_IP_FRAG)) {
6255 		switch (pd->proto) {
6256 		case IPPROTO_TCP:
6257 			dnflow.fwa_id.flags = pf_pd_get_hdr_tcp(pd)->th_flags;
6258 			dnflow.fwa_id.dst_port = ntohs(pf_pd_get_hdr_tcp(pd)->th_dport);
6259 			dnflow.fwa_id.src_port = ntohs(pf_pd_get_hdr_tcp(pd)->th_sport);
6260 			hdrlen = sizeof(*th);
6261 			break;
6262 		case IPPROTO_UDP:
6263 			dnflow.fwa_id.dst_port = ntohs(pf_pd_get_hdr_udp(pd)->uh_dport);
6264 			dnflow.fwa_id.src_port = ntohs(pf_pd_get_hdr_udp(pd)->uh_sport);
6265 			hdrlen = sizeof(*pf_pd_get_hdr_udp(pd));
6266 			break;
6267 #if INET
6268 		case IPPROTO_ICMP:
6269 			if (af != AF_INET) {
6270 				break;
6271 			}
6272 			hdrlen = ICMP_MINLEN;
6273 			icmptype = pf_pd_get_hdr_icmp(pd)->icmp_type;
6274 			icmpcode = pf_pd_get_hdr_icmp(pd)->icmp_code;
6275 			break;
6276 #endif /* INET */
6277 		case IPPROTO_ICMPV6:
6278 			if (af != AF_INET6) {
6279 				break;
6280 			}
6281 			hdrlen = sizeof(*pf_pd_get_hdr_icmp6(pd));
6282 			icmptype = pf_pd_get_hdr_icmp6(pd)->icmp6_type;
6283 			icmpcode = pf_pd_get_hdr_icmp6(pd)->icmp6_code;
6284 			break;
6285 		case IPPROTO_GRE:
6286 			if (pd->proto_variant == PF_GRE_PPTP_VARIANT) {
6287 				hdrlen = sizeof(*pf_pd_get_hdr_grev1(pd));
6288 			}
6289 			break;
6290 		case IPPROTO_ESP:
6291 			hdrlen = sizeof(*pf_pd_get_hdr_esp(pd));
6292 			break;
6293 		}
6294 	}
6295 
6296 	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_DUMMYNET].active.ptr);
6297 
6298 	while (r != NULL) {
6299 		r->evaluations++;
6300 		if (pfi_kif_match(r->kif, kif) == r->ifnot) {
6301 			r = r->skip[PF_SKIP_IFP].ptr;
6302 		} else if (r->direction && r->direction != direction) {
6303 			r = r->skip[PF_SKIP_DIR].ptr;
6304 		} else if (r->af && r->af != af) {
6305 			r = r->skip[PF_SKIP_AF].ptr;
6306 		} else if (r->proto && r->proto != pd->proto) {
6307 			r = r->skip[PF_SKIP_PROTO].ptr;
6308 		} else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
6309 		    r->src.neg, kif)) {
6310 			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
6311 		}
6312 		/* tcp/udp only. port_op always 0 in other cases */
6313 		else if (r->proto == pd->proto &&
6314 		    (r->proto == IPPROTO_TCP || r->proto == IPPROTO_UDP) &&
6315 		    ((pd->flags & PFDESC_IP_FRAG) ||
6316 		    ((r->src.xport.range.op &&
6317 		    !pf_match_port(r->src.xport.range.op,
6318 		    r->src.xport.range.port[0], r->src.xport.range.port[1],
6319 		    th->th_sport))))) {
6320 			r = r->skip[PF_SKIP_SRC_PORT].ptr;
6321 		} else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
6322 		    r->dst.neg, NULL)) {
6323 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
6324 		}
6325 		/* tcp/udp only. port_op always 0 in other cases */
6326 		else if (r->proto == pd->proto &&
6327 		    (r->proto == IPPROTO_TCP || r->proto == IPPROTO_UDP) &&
6328 		    r->dst.xport.range.op &&
6329 		    ((pd->flags & PFDESC_IP_FRAG) ||
6330 		    !pf_match_port(r->dst.xport.range.op,
6331 		    r->dst.xport.range.port[0], r->dst.xport.range.port[1],
6332 		    th->th_dport))) {
6333 			r = r->skip[PF_SKIP_DST_PORT].ptr;
6334 		}
6335 		/* icmp only. type always 0 in other cases */
6336 		else if (r->type &&
6337 		    ((pd->flags & PFDESC_IP_FRAG) ||
6338 		    r->type != icmptype + 1)) {
6339 			r = TAILQ_NEXT(r, entries);
6340 		}
6341 		/* icmp only. type always 0 in other cases */
6342 		else if (r->code &&
6343 		    ((pd->flags & PFDESC_IP_FRAG) ||
6344 		    r->code != icmpcode + 1)) {
6345 			r = TAILQ_NEXT(r, entries);
6346 		} else if (r->tos && !(r->tos == pd->tos)) {
6347 			r = TAILQ_NEXT(r, entries);
6348 		} else if (r->rule_flag & PFRULE_FRAGMENT) {
6349 			r = TAILQ_NEXT(r, entries);
6350 		} else if (pd->proto == IPPROTO_TCP &&
6351 		    ((pd->flags & PFDESC_IP_FRAG) ||
6352 		    (r->flagset & th->th_flags) != r->flags)) {
6353 			r = TAILQ_NEXT(r, entries);
6354 		} else if (r->prob && r->prob <= (RandomULong() % (UINT_MAX - 1) + 1)) {
6355 			r = TAILQ_NEXT(r, entries);
6356 		} else if (r->match_tag && !pf_match_tag(r, pd->pf_mtag, &tag)) {
6357 			r = TAILQ_NEXT(r, entries);
6358 		} else {
6359 			/*
6360 			 * Need to go past the previous dummynet matching rule
6361 			 */
6362 			if (r->anchor == NULL) {
6363 				if (found_prev_rule) {
6364 					if (r->tag) {
6365 						tag = r->tag;
6366 					}
6367 					if (PF_RTABLEID_IS_VALID(r->rtableid)) {
6368 						rtableid = r->rtableid;
6369 					}
6370 					match = 1;
6371 					*rm = r;
6372 					am = a;
6373 					rsm = ruleset;
6374 					if ((*rm)->quick) {
6375 						break;
6376 					}
6377 				} else if (r == prev_matching_rule) {
6378 					found_prev_rule = 1;
6379 				}
6380 				r = TAILQ_NEXT(r, entries);
6381 			} else {
6382 				pf_step_into_anchor(&asd, &ruleset,
6383 				    PF_RULESET_DUMMYNET, &r, &a, &match);
6384 			}
6385 		}
6386 		if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
6387 		    PF_RULESET_DUMMYNET, &r, &a, &match)) {
6388 			break;
6389 		}
6390 	}
6391 	r = *rm;
6392 	a = am;
6393 	ruleset = rsm;
6394 
6395 	if (!match) {
6396 		return PF_PASS;
6397 	}
6398 
6399 	REASON_SET(&reason, PFRES_DUMMYNET);
6400 
6401 	if (r->log) {
6402 		PFLOG_PACKET(kif, h, pbuf, af, direction, reason, r,
6403 		    a, ruleset, pd);
6404 	}
6405 
6406 	if (r->action == PF_NODUMMYNET) {
6407 		int dirndx = (direction == PF_OUT);
6408 
6409 		r->packets[dirndx]++;
6410 		r->bytes[dirndx] += pd->tot_len;
6411 
6412 		return PF_PASS;
6413 	}
6414 	if (pf_tag_packet(pbuf, pd->pf_mtag, tag, rtableid, pd)) {
6415 		REASON_SET(&reason, PFRES_MEMORY);
6416 
6417 		return PF_DROP;
6418 	}
6419 
6420 	if (r->dnpipe && ip_dn_io_ptr != NULL) {
6421 		struct mbuf *m;
6422 		int dirndx = (direction == PF_OUT);
6423 
6424 		r->packets[dirndx]++;
6425 		r->bytes[dirndx] += pd->tot_len;
6426 
6427 		dnflow.fwa_cookie = r->dnpipe;
6428 		dnflow.fwa_pf_rule = r;
6429 		dnflow.fwa_id.proto = pd->proto;
6430 		dnflow.fwa_flags = r->dntype;
6431 		switch (af) {
6432 		case AF_INET:
6433 			dnflow.fwa_id.addr_type = 4;
6434 			dnflow.fwa_id.src_ip = ntohl(saddr->v4addr.s_addr);
6435 			dnflow.fwa_id.dst_ip = ntohl(daddr->v4addr.s_addr);
6436 			break;
6437 		case AF_INET6:
6438 			dnflow.fwa_id.addr_type = 6;
6439 			dnflow.fwa_id.src_ip6 = saddr->v6addr;
6440 			dnflow.fwa_id.dst_ip6 = saddr->v6addr;
6441 			break;
6442 		}
6443 
6444 		if (fwa != NULL) {
6445 			dnflow.fwa_oif = fwa->fwa_oif;
6446 			dnflow.fwa_oflags = fwa->fwa_oflags;
6447 			/*
6448 			 * Note that fwa_ro, fwa_dst and fwa_ipoa are
6449 			 * actually in a union so the following does work
6450 			 * for both IPv4 and IPv6
6451 			 */
6452 			dnflow.fwa_ro = fwa->fwa_ro;
6453 			dnflow.fwa_dst = fwa->fwa_dst;
6454 			dnflow.fwa_ipoa = fwa->fwa_ipoa;
6455 			dnflow.fwa_ro6_pmtu = fwa->fwa_ro6_pmtu;
6456 			dnflow.fwa_origifp = fwa->fwa_origifp;
6457 			dnflow.fwa_mtu = fwa->fwa_mtu;
6458 			dnflow.fwa_unfragpartlen = fwa->fwa_unfragpartlen;
6459 			dnflow.fwa_exthdrs = fwa->fwa_exthdrs;
6460 		}
6461 
6462 		if (af == AF_INET) {
6463 			struct ip *__single iphdr = pbuf->pb_data;
6464 			NTOHS(iphdr->ip_len);
6465 			NTOHS(iphdr->ip_off);
6466 		}
6467 		/*
6468 		 * Don't need to unlock pf_lock as NET_THREAD_HELD_PF
6469 		 * allows for recursive behavior
6470 		 */
6471 		m = pbuf_to_mbuf(pbuf, TRUE);
6472 		if (m != NULL) {
6473 			ip_dn_io_ptr(m,
6474 			    dnflow.fwa_cookie, (af == AF_INET) ?
6475 			    ((direction == PF_IN) ? DN_TO_IP_IN : DN_TO_IP_OUT) :
6476 			    ((direction == PF_IN) ? DN_TO_IP6_IN : DN_TO_IP6_OUT),
6477 			    &dnflow);
6478 		}
6479 
6480 		/*
6481 		 * The packet is siphoned out by dummynet so return a NULL
6482 		 * pbuf so the caller can still return success.
6483 		 */
6484 		*pbuf0 = NULL;
6485 
6486 		return PF_PASS;
6487 	}
6488 
6489 	return PF_PASS;
6490 }
6491 #endif /* DUMMYNET */
6492 
6493 static __attribute__((noinline)) int
pf_test_fragment(struct pf_rule ** rm,int direction,struct pfi_kif * kif,pbuf_t * pbuf,void * h,struct pf_pdesc * pd,struct pf_rule ** am,struct pf_ruleset ** rsm)6494 pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif,
6495     pbuf_t *pbuf, void *h, struct pf_pdesc *pd, struct pf_rule **am,
6496     struct pf_ruleset **rsm)
6497 {
6498 #pragma unused(h)
6499 	struct pf_rule          *__single r, *__single a = NULL;
6500 	struct pf_ruleset       *__single ruleset = NULL;
6501 	sa_family_t              af = pd->af;
6502 	u_short                  reason;
6503 	int                      tag = -1;
6504 	int                      asd = 0;
6505 	int                      match = 0;
6506 
6507 	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
6508 	while (r != NULL) {
6509 		r->evaluations++;
6510 		if (pfi_kif_match(r->kif, kif) == r->ifnot) {
6511 			r = r->skip[PF_SKIP_IFP].ptr;
6512 		} else if (r->direction && r->direction != direction) {
6513 			r = r->skip[PF_SKIP_DIR].ptr;
6514 		} else if (r->af && r->af != af) {
6515 			r = r->skip[PF_SKIP_AF].ptr;
6516 		} else if (r->proto && r->proto != pd->proto) {
6517 			r = r->skip[PF_SKIP_PROTO].ptr;
6518 		} else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
6519 		    r->src.neg, kif)) {
6520 			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
6521 		} else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
6522 		    r->dst.neg, NULL)) {
6523 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
6524 		} else if ((r->rule_flag & PFRULE_TOS) && r->tos &&
6525 		    !(r->tos & pd->tos)) {
6526 			r = TAILQ_NEXT(r, entries);
6527 		} else if ((r->rule_flag & PFRULE_DSCP) && r->tos &&
6528 		    !(r->tos & (pd->tos & DSCP_MASK))) {
6529 			r = TAILQ_NEXT(r, entries);
6530 		} else if ((r->rule_flag & PFRULE_SC) && r->tos &&
6531 		    ((r->tos & SCIDX_MASK) != pd->sc)) {
6532 			r = TAILQ_NEXT(r, entries);
6533 		} else if (r->os_fingerprint != PF_OSFP_ANY) {
6534 			r = TAILQ_NEXT(r, entries);
6535 		} else if (pd->proto == IPPROTO_UDP &&
6536 		    (r->src.xport.range.op || r->dst.xport.range.op)) {
6537 			r = TAILQ_NEXT(r, entries);
6538 		} else if (pd->proto == IPPROTO_TCP &&
6539 		    (r->src.xport.range.op || r->dst.xport.range.op ||
6540 		    r->flagset)) {
6541 			r = TAILQ_NEXT(r, entries);
6542 		} else if ((pd->proto == IPPROTO_ICMP ||
6543 		    pd->proto == IPPROTO_ICMPV6) &&
6544 		    (r->type || r->code)) {
6545 			r = TAILQ_NEXT(r, entries);
6546 		} else if (r->prob && r->prob <= (RandomULong() % (UINT_MAX - 1) + 1)) {
6547 			r = TAILQ_NEXT(r, entries);
6548 		} else if (r->match_tag && !pf_match_tag(r, pd->pf_mtag, &tag)) {
6549 			r = TAILQ_NEXT(r, entries);
6550 		} else {
6551 			if (r->anchor == NULL) {
6552 				match = 1;
6553 				*rm = r;
6554 				*am = a;
6555 				*rsm = ruleset;
6556 				if ((*rm)->quick) {
6557 					break;
6558 				}
6559 				r = TAILQ_NEXT(r, entries);
6560 			} else {
6561 				pf_step_into_anchor(&asd, &ruleset,
6562 				    PF_RULESET_FILTER, &r, &a, &match);
6563 			}
6564 		}
6565 		if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
6566 		    PF_RULESET_FILTER, &r, &a, &match)) {
6567 			break;
6568 		}
6569 	}
6570 	r = *rm;
6571 	a = *am;
6572 	ruleset = *rsm;
6573 
6574 	REASON_SET(&reason, PFRES_MATCH);
6575 
6576 	if (r->log) {
6577 		PFLOG_PACKET(kif, h, pbuf, af, direction, reason, r, a, ruleset,
6578 		    pd);
6579 	}
6580 
6581 	if (r->action != PF_PASS) {
6582 		return PF_DROP;
6583 	}
6584 
6585 	if (pf_tag_packet(pbuf, pd->pf_mtag, tag, -1, NULL)) {
6586 		REASON_SET(&reason, PFRES_MEMORY);
6587 		return PF_DROP;
6588 	}
6589 
6590 	return PF_PASS;
6591 }
6592 
6593 static __attribute__((noinline)) void
pf_pptp_handler(struct pf_state * s,int direction,int off,struct pf_pdesc * pd,struct pfi_kif * kif)6594 pf_pptp_handler(struct pf_state *s, int direction, int off,
6595     struct pf_pdesc *pd, struct pfi_kif *kif)
6596 {
6597 #pragma unused(direction)
6598 	struct tcphdr *__single th;
6599 	struct pf_pptp_state *__single pptps;
6600 	struct pf_pptp_ctrl_msg cm;
6601 	size_t plen, tlen;
6602 	struct pf_state *__single gs;
6603 	u_int16_t ct;
6604 	u_int16_t *__single pac_call_id;
6605 	u_int16_t *__single pns_call_id;
6606 	u_int16_t *__single spoof_call_id;
6607 	u_int8_t *__single pac_state;
6608 	u_int8_t *__single pns_state;
6609 	enum { PF_PPTP_PASS, PF_PPTP_INSERT_GRE, PF_PPTP_REMOVE_GRE } op;
6610 	pbuf_t *__single pbuf;
6611 	struct pf_state_key *__single sk;
6612 	struct pf_state_key *__single gsk;
6613 	struct pf_app_state *__single gas;
6614 
6615 	sk = s->state_key;
6616 	pptps = &sk->app_state->u.pptp;
6617 	gs = pptps->grev1_state;
6618 
6619 	if (gs) {
6620 		gs->expire = pf_time_second();
6621 	}
6622 
6623 	pbuf = pd->mp;
6624 	plen = min(sizeof(cm), pbuf->pb_packet_len - off);
6625 	if (plen < PF_PPTP_CTRL_MSG_MINSIZE) {
6626 		return;
6627 	}
6628 	tlen = plen - PF_PPTP_CTRL_MSG_MINSIZE;
6629 	pbuf_copy_data(pbuf, off, plen, &cm, sizeof(cm));
6630 
6631 	if (ntohl(cm.hdr.magic) != PF_PPTP_MAGIC_NUMBER) {
6632 		return;
6633 	}
6634 	if (ntohs(cm.hdr.type) != 1) {
6635 		return;
6636 	}
6637 
6638 #define TYPE_LEN_CHECK(_type, _name)                            \
6639 	case PF_PPTP_CTRL_TYPE_##_type:                         \
6640 	        if (tlen < sizeof(struct pf_pptp_ctrl_##_name)) \
6641 	                return;                                 \
6642 	        break;
6643 
6644 	switch (cm.ctrl.type) {
6645 		TYPE_LEN_CHECK(START_REQ, start_req);
6646 		TYPE_LEN_CHECK(START_RPY, start_rpy);
6647 		TYPE_LEN_CHECK(STOP_REQ, stop_req);
6648 		TYPE_LEN_CHECK(STOP_RPY, stop_rpy);
6649 		TYPE_LEN_CHECK(ECHO_REQ, echo_req);
6650 		TYPE_LEN_CHECK(ECHO_RPY, echo_rpy);
6651 		TYPE_LEN_CHECK(CALL_OUT_REQ, call_out_req);
6652 		TYPE_LEN_CHECK(CALL_OUT_RPY, call_out_rpy);
6653 		TYPE_LEN_CHECK(CALL_IN_1ST, call_in_1st);
6654 		TYPE_LEN_CHECK(CALL_IN_2ND, call_in_2nd);
6655 		TYPE_LEN_CHECK(CALL_IN_3RD, call_in_3rd);
6656 		TYPE_LEN_CHECK(CALL_CLR, call_clr);
6657 		TYPE_LEN_CHECK(CALL_DISC, call_disc);
6658 		TYPE_LEN_CHECK(ERROR, error);
6659 		TYPE_LEN_CHECK(SET_LINKINFO, set_linkinfo);
6660 	default:
6661 		return;
6662 	}
6663 #undef TYPE_LEN_CHECK
6664 
6665 	if (!gs) {
6666 		gs = pool_get(&pf_state_pl, PR_WAITOK);
6667 		if (!gs) {
6668 			return;
6669 		}
6670 
6671 		memcpy(gs, s, sizeof(*gs));
6672 
6673 		memset(&gs->entry_id, 0, sizeof(gs->entry_id));
6674 		memset(&gs->entry_list, 0, sizeof(gs->entry_list));
6675 
6676 		TAILQ_INIT(&gs->unlink_hooks);
6677 		gs->rt_kif = NULL;
6678 		gs->creation = 0;
6679 		gs->pfsync_time = 0;
6680 		gs->packets[0] = gs->packets[1] = 0;
6681 		gs->bytes[0] = gs->bytes[1] = 0;
6682 		gs->timeout = PFTM_UNLINKED;
6683 		gs->id = gs->creatorid = 0;
6684 		gs->src.state = gs->dst.state = PFGRE1S_NO_TRAFFIC;
6685 		gs->src.scrub = gs->dst.scrub = 0;
6686 
6687 		gas = pool_get(&pf_app_state_pl, PR_NOWAIT);
6688 		if (!gas) {
6689 			pool_put(&pf_state_pl, gs);
6690 			return;
6691 		}
6692 
6693 		gsk = pf_alloc_state_key(gs, NULL);
6694 		if (!gsk) {
6695 			pool_put(&pf_app_state_pl, gas);
6696 			pool_put(&pf_state_pl, gs);
6697 			return;
6698 		}
6699 
6700 		memcpy(&gsk->lan, &sk->lan, sizeof(gsk->lan));
6701 		memcpy(&gsk->gwy, &sk->gwy, sizeof(gsk->gwy));
6702 		memcpy(&gsk->ext_lan, &sk->ext_lan, sizeof(gsk->ext_lan));
6703 		memcpy(&gsk->ext_gwy, &sk->ext_gwy, sizeof(gsk->ext_gwy));
6704 		gsk->af_lan = sk->af_lan;
6705 		gsk->af_gwy = sk->af_gwy;
6706 		gsk->proto = IPPROTO_GRE;
6707 		gsk->proto_variant = PF_GRE_PPTP_VARIANT;
6708 		gsk->app_state = gas;
6709 		gsk->lan.xport.call_id = 0;
6710 		gsk->gwy.xport.call_id = 0;
6711 		gsk->ext_lan.xport.call_id = 0;
6712 		gsk->ext_gwy.xport.call_id = 0;
6713 		ASSERT(gsk->flowsrc == FLOWSRC_PF);
6714 		ASSERT(gsk->flowhash != 0);
6715 		memset(gas, 0, sizeof(*gas));
6716 		gas->u.grev1.pptp_state = s;
6717 		STATE_INC_COUNTERS(gs);
6718 		pptps->grev1_state = gs;
6719 		(void) hook_establish(&gs->unlink_hooks, 0,
6720 		    (hook_fn_t) pf_grev1_unlink, gs);
6721 	} else {
6722 		gsk = gs->state_key;
6723 	}
6724 
6725 	switch (sk->direction) {
6726 	case PF_IN:
6727 		pns_call_id = &gsk->ext_lan.xport.call_id;
6728 		pns_state = &gs->dst.state;
6729 		pac_call_id = &gsk->lan.xport.call_id;
6730 		pac_state = &gs->src.state;
6731 		break;
6732 
6733 	case PF_OUT:
6734 		pns_call_id = &gsk->lan.xport.call_id;
6735 		pns_state = &gs->src.state;
6736 		pac_call_id = &gsk->ext_lan.xport.call_id;
6737 		pac_state = &gs->dst.state;
6738 		break;
6739 
6740 	default:
6741 		DPFPRINTF(PF_DEBUG_URGENT,
6742 		    ("pf_pptp_handler: bad directional!\n"));
6743 		return;
6744 	}
6745 
6746 	spoof_call_id = 0;
6747 	op = PF_PPTP_PASS;
6748 
6749 	ct = ntohs(cm.ctrl.type);
6750 
6751 	switch (ct) {
6752 	case PF_PPTP_CTRL_TYPE_CALL_OUT_REQ:
6753 		*pns_call_id = cm.msg.call_out_req.call_id;
6754 		*pns_state = PFGRE1S_INITIATING;
6755 		if (s->nat_rule.ptr && pns_call_id == &gsk->lan.xport.call_id) {
6756 			spoof_call_id = &cm.msg.call_out_req.call_id;
6757 		}
6758 		break;
6759 
6760 	case PF_PPTP_CTRL_TYPE_CALL_OUT_RPY:
6761 		*pac_call_id = cm.msg.call_out_rpy.call_id;
6762 		if (s->nat_rule.ptr) {
6763 			spoof_call_id =
6764 			    (pac_call_id == &gsk->lan.xport.call_id) ?
6765 			    &cm.msg.call_out_rpy.call_id :
6766 			    &cm.msg.call_out_rpy.peer_call_id;
6767 		}
6768 		if (gs->timeout == PFTM_UNLINKED) {
6769 			*pac_state = PFGRE1S_INITIATING;
6770 			op = PF_PPTP_INSERT_GRE;
6771 		}
6772 		break;
6773 
6774 	case PF_PPTP_CTRL_TYPE_CALL_IN_1ST:
6775 		*pns_call_id = cm.msg.call_in_1st.call_id;
6776 		*pns_state = PFGRE1S_INITIATING;
6777 		if (s->nat_rule.ptr && pns_call_id == &gsk->lan.xport.call_id) {
6778 			spoof_call_id = &cm.msg.call_in_1st.call_id;
6779 		}
6780 		break;
6781 
6782 	case PF_PPTP_CTRL_TYPE_CALL_IN_2ND:
6783 		*pac_call_id = cm.msg.call_in_2nd.call_id;
6784 		*pac_state = PFGRE1S_INITIATING;
6785 		if (s->nat_rule.ptr) {
6786 			spoof_call_id =
6787 			    (pac_call_id == &gsk->lan.xport.call_id) ?
6788 			    &cm.msg.call_in_2nd.call_id :
6789 			    &cm.msg.call_in_2nd.peer_call_id;
6790 		}
6791 		break;
6792 
6793 	case PF_PPTP_CTRL_TYPE_CALL_IN_3RD:
6794 		if (s->nat_rule.ptr && pns_call_id == &gsk->lan.xport.call_id) {
6795 			spoof_call_id = &cm.msg.call_in_3rd.call_id;
6796 		}
6797 		if (cm.msg.call_in_3rd.call_id != *pns_call_id) {
6798 			break;
6799 		}
6800 		if (gs->timeout == PFTM_UNLINKED) {
6801 			op = PF_PPTP_INSERT_GRE;
6802 		}
6803 		break;
6804 
6805 	case PF_PPTP_CTRL_TYPE_CALL_CLR:
6806 		if (cm.msg.call_clr.call_id != *pns_call_id) {
6807 			op = PF_PPTP_REMOVE_GRE;
6808 		}
6809 		break;
6810 
6811 	case PF_PPTP_CTRL_TYPE_CALL_DISC:
6812 		if (cm.msg.call_clr.call_id != *pac_call_id) {
6813 			op = PF_PPTP_REMOVE_GRE;
6814 		}
6815 		break;
6816 
6817 	case PF_PPTP_CTRL_TYPE_ERROR:
6818 		if (s->nat_rule.ptr && pns_call_id == &gsk->lan.xport.call_id) {
6819 			spoof_call_id = &cm.msg.error.peer_call_id;
6820 		}
6821 		break;
6822 
6823 	case PF_PPTP_CTRL_TYPE_SET_LINKINFO:
6824 		if (s->nat_rule.ptr && pac_call_id == &gsk->lan.xport.call_id) {
6825 			spoof_call_id = &cm.msg.set_linkinfo.peer_call_id;
6826 		}
6827 		break;
6828 
6829 	default:
6830 		op = PF_PPTP_PASS;
6831 		break;
6832 	}
6833 
6834 	if (!gsk->gwy.xport.call_id && gsk->lan.xport.call_id) {
6835 		gsk->gwy.xport.call_id = gsk->lan.xport.call_id;
6836 		if (spoof_call_id) {
6837 			u_int16_t call_id = 0;
6838 			int n = 0;
6839 			struct pf_state_key_cmp key;
6840 
6841 			key.af_gwy = gsk->af_gwy;
6842 			key.proto = IPPROTO_GRE;
6843 			key.proto_variant = PF_GRE_PPTP_VARIANT;
6844 			PF_ACPY(&key.gwy.addr, &gsk->gwy.addr, key.af_gwy);
6845 			PF_ACPY(&key.ext_gwy.addr, &gsk->ext_gwy.addr, key.af_gwy);
6846 			key.gwy.xport.call_id = gsk->gwy.xport.call_id;
6847 			key.ext_gwy.xport.call_id = gsk->ext_gwy.xport.call_id;
6848 			do {
6849 				call_id = htonl(random());
6850 			} while (!call_id);
6851 
6852 			while (pf_find_state_all(&key, PF_IN, 0)) {
6853 				call_id = ntohs(call_id);
6854 				--call_id;
6855 				if (--call_id == 0) {
6856 					call_id = 0xffff;
6857 				}
6858 				call_id = htons(call_id);
6859 
6860 				key.gwy.xport.call_id = call_id;
6861 
6862 				if (++n > 65535) {
6863 					DPFPRINTF(PF_DEBUG_URGENT,
6864 					    ("pf_pptp_handler: failed to spoof "
6865 					    "call id\n"));
6866 					key.gwy.xport.call_id = 0;
6867 					break;
6868 				}
6869 			}
6870 
6871 			gsk->gwy.xport.call_id = call_id;
6872 		}
6873 	}
6874 
6875 	th = pf_pd_get_hdr_tcp(pd);
6876 
6877 	if (spoof_call_id && gsk->lan.xport.call_id != gsk->gwy.xport.call_id) {
6878 		if (*spoof_call_id == gsk->gwy.xport.call_id) {
6879 			*spoof_call_id = gsk->lan.xport.call_id;
6880 			th->th_sum = pf_cksum_fixup(th->th_sum,
6881 			    gsk->gwy.xport.call_id, gsk->lan.xport.call_id, 0);
6882 		} else {
6883 			*spoof_call_id = gsk->gwy.xport.call_id;
6884 			th->th_sum = pf_cksum_fixup(th->th_sum,
6885 			    gsk->lan.xport.call_id, gsk->gwy.xport.call_id, 0);
6886 		}
6887 
6888 		if (pf_lazy_makewritable(pd, pbuf, off + plen) == NULL) {
6889 			pptps->grev1_state = NULL;
6890 			STATE_DEC_COUNTERS(gs);
6891 			pool_put(&pf_state_pl, gs);
6892 			return;
6893 		}
6894 		pbuf_copy_back(pbuf, off, plen, &cm, sizeof(cm));
6895 	}
6896 
6897 	switch (op) {
6898 	case PF_PPTP_REMOVE_GRE:
6899 		gs->timeout = PFTM_PURGE;
6900 		gs->src.state = gs->dst.state = PFGRE1S_NO_TRAFFIC;
6901 		gsk->lan.xport.call_id = 0;
6902 		gsk->gwy.xport.call_id = 0;
6903 		gsk->ext_lan.xport.call_id = 0;
6904 		gsk->ext_gwy.xport.call_id = 0;
6905 		gs->id = gs->creatorid = 0;
6906 		break;
6907 
6908 	case PF_PPTP_INSERT_GRE:
6909 		gs->creation = pf_time_second();
6910 		gs->expire = pf_time_second();
6911 		gs->timeout = PFTM_TCP_ESTABLISHED;
6912 		if (gs->src_node != NULL) {
6913 			++gs->src_node->states;
6914 			VERIFY(gs->src_node->states != 0);
6915 		}
6916 		if (gs->nat_src_node != NULL) {
6917 			++gs->nat_src_node->states;
6918 			VERIFY(gs->nat_src_node->states != 0);
6919 		}
6920 		pf_set_rt_ifp(gs, &sk->lan.addr, sk->af_lan);
6921 		if (pf_insert_state(BOUND_IFACE(s->rule.ptr, kif), gs)) {
6922 			/*
6923 			 * <[email protected]>
6924 			 * FIX ME: insertion can fail when multiple PNS
6925 			 * behind the same NAT open calls to the same PAC
6926 			 * simultaneously because spoofed call ID numbers
6927 			 * are chosen before states are inserted.  This is
6928 			 * hard to fix and happens infrequently enough that
6929 			 * users will normally try again and this ALG will
6930 			 * succeed.  Failures are expected to be rare enough
6931 			 * that fixing this is a low priority.
6932 			 */
6933 			pptps->grev1_state = NULL;
6934 			pd->lmw = -1;   /* Force PF_DROP on PFRES_MEMORY */
6935 			pf_src_tree_remove_state(gs);
6936 			STATE_DEC_COUNTERS(gs);
6937 			pool_put(&pf_state_pl, gs);
6938 			DPFPRINTF(PF_DEBUG_URGENT, ("pf_pptp_handler: error "
6939 			    "inserting GREv1 state.\n"));
6940 		}
6941 		break;
6942 
6943 	default:
6944 		break;
6945 	}
6946 }
6947 
6948 static __attribute__((noinline)) void
pf_pptp_unlink(struct pf_state * s)6949 pf_pptp_unlink(struct pf_state *s)
6950 {
6951 	struct pf_app_state *as = s->state_key->app_state;
6952 	struct pf_state *grev1s = as->u.pptp.grev1_state;
6953 
6954 	if (grev1s) {
6955 		struct pf_app_state *gas = grev1s->state_key->app_state;
6956 
6957 		if (grev1s->timeout < PFTM_MAX) {
6958 			grev1s->timeout = PFTM_PURGE;
6959 		}
6960 		gas->u.grev1.pptp_state = NULL;
6961 		as->u.pptp.grev1_state = NULL;
6962 	}
6963 }
6964 
6965 static __attribute__((noinline)) void
pf_grev1_unlink(struct pf_state * s)6966 pf_grev1_unlink(struct pf_state *s)
6967 {
6968 	struct pf_app_state *as = s->state_key->app_state;
6969 	struct pf_state *pptps = as->u.grev1.pptp_state;
6970 
6971 	if (pptps) {
6972 		struct pf_app_state *pas = pptps->state_key->app_state;
6973 
6974 		pas->u.pptp.grev1_state = NULL;
6975 		as->u.grev1.pptp_state = NULL;
6976 	}
6977 }
6978 
6979 static int
pf_ike_compare(struct pf_app_state * a,struct pf_app_state * b)6980 pf_ike_compare(struct pf_app_state *a, struct pf_app_state *b)
6981 {
6982 	int64_t d = a->u.ike.cookie - b->u.ike.cookie;
6983 	return (d > 0) ? 1 : ((d < 0) ? -1 : 0);
6984 }
6985 
6986 static int
pf_do_nat64(struct pf_state_key * sk,struct pf_pdesc * pd,pbuf_t * pbuf,int off)6987 pf_do_nat64(struct pf_state_key *sk, struct pf_pdesc *pd, pbuf_t *pbuf,
6988     int off)
6989 {
6990 	if (pd->af == AF_INET) {
6991 		if (pd->af != sk->af_lan) {
6992 			pd->ndaddr = sk->lan.addr;
6993 			pd->naddr = sk->ext_lan.addr;
6994 		} else {
6995 			pd->naddr = sk->gwy.addr;
6996 			pd->ndaddr = sk->ext_gwy.addr;
6997 		}
6998 		return pf_nat64_ipv4(pbuf, off, pd);
6999 	} else if (pd->af == AF_INET6) {
7000 		if (pd->af != sk->af_lan) {
7001 			pd->ndaddr = sk->lan.addr;
7002 			pd->naddr = sk->ext_lan.addr;
7003 		} else {
7004 			pd->naddr = sk->gwy.addr;
7005 			pd->ndaddr = sk->ext_gwy.addr;
7006 		}
7007 		return pf_nat64_ipv6(pbuf, off, pd);
7008 	}
7009 	return PF_DROP;
7010 }
7011 
7012 static __attribute__((noinline)) int
pf_test_state_tcp(struct pf_state ** state,int direction,struct pfi_kif * kif,pbuf_t * pbuf,int off,void * h,struct pf_pdesc * pd,u_short * reason)7013 pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
7014     pbuf_t *pbuf, int off, void *h, struct pf_pdesc *pd,
7015     u_short *reason)
7016 {
7017 #pragma unused(h)
7018 	struct pf_state_key_cmp  key;
7019 	struct tcphdr           *__single th = pf_pd_get_hdr_tcp(pd);
7020 	u_int16_t                win = ntohs(th->th_win);
7021 	u_int32_t                ack, end, seq, orig_seq;
7022 	u_int8_t                 sws, dws;
7023 	int                      ackskew;
7024 	int                      copyback = 0;
7025 	struct pf_state_peer    *src, *dst;
7026 	struct pf_state_key     *sk;
7027 
7028 	key.app_state = 0;
7029 	key.proto = IPPROTO_TCP;
7030 	key.af_lan = key.af_gwy = pd->af;
7031 
7032 	/*
7033 	 * For NAT64 the first time rule search and state creation
7034 	 * is done on the incoming side only.
7035 	 * Once the state gets created, NAT64's LAN side (ipv6) will
7036 	 * not be able to find the state in ext-gwy tree as that normally
7037 	 * is intended to be looked up for incoming traffic from the
7038 	 * WAN side.
7039 	 * Therefore to handle NAT64 case we init keys here for both
7040 	 * lan-ext as well as ext-gwy trees.
7041 	 * In the state lookup we attempt a lookup on both trees if
7042 	 * first one does not return any result and return a match if
7043 	 * the match state's was created by NAT64 rule.
7044 	 */
7045 	PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy);
7046 	PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy);
7047 	key.ext_gwy.xport.port = th->th_sport;
7048 	key.gwy.xport.port = th->th_dport;
7049 
7050 	PF_ACPY(&key.lan.addr, pd->src, key.af_lan);
7051 	PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan);
7052 	key.lan.xport.port = th->th_sport;
7053 	key.ext_lan.xport.port = th->th_dport;
7054 
7055 	STATE_LOOKUP();
7056 
7057 	sk = (*state)->state_key;
7058 	/*
7059 	 * In case of NAT64 the translation is first applied on the LAN
7060 	 * side. Therefore for stack's address family comparison
7061 	 * we use sk->af_lan.
7062 	 */
7063 	if ((direction == sk->direction) && (pd->af == sk->af_lan)) {
7064 		src = &(*state)->src;
7065 		dst = &(*state)->dst;
7066 	} else {
7067 		src = &(*state)->dst;
7068 		dst = &(*state)->src;
7069 	}
7070 
7071 	if (src->state == PF_TCPS_PROXY_SRC) {
7072 		if (direction != sk->direction) {
7073 			REASON_SET(reason, PFRES_SYNPROXY);
7074 			return PF_SYNPROXY_DROP;
7075 		}
7076 		if (th->th_flags & TH_SYN) {
7077 			if (ntohl(th->th_seq) != src->seqlo) {
7078 				REASON_SET(reason, PFRES_SYNPROXY);
7079 				return PF_DROP;
7080 			}
7081 			pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
7082 			    pd->src, th->th_dport, th->th_sport,
7083 			    src->seqhi, ntohl(th->th_seq) + 1,
7084 			    TH_SYN | TH_ACK, 0, src->mss, 0, 1,
7085 			    0, NULL, NULL);
7086 			REASON_SET(reason, PFRES_SYNPROXY);
7087 			return PF_SYNPROXY_DROP;
7088 		} else if (!(th->th_flags & TH_ACK) ||
7089 		    (ntohl(th->th_ack) != src->seqhi + 1) ||
7090 		    (ntohl(th->th_seq) != src->seqlo + 1)) {
7091 			REASON_SET(reason, PFRES_SYNPROXY);
7092 			return PF_DROP;
7093 		} else if ((*state)->src_node != NULL &&
7094 		    pf_src_connlimit(state)) {
7095 			REASON_SET(reason, PFRES_SRCLIMIT);
7096 			return PF_DROP;
7097 		} else {
7098 			src->state = PF_TCPS_PROXY_DST;
7099 		}
7100 	}
7101 	if (src->state == PF_TCPS_PROXY_DST) {
7102 		struct pf_state_host *psrc, *pdst;
7103 
7104 		if (direction == PF_OUT) {
7105 			psrc = &sk->gwy;
7106 			pdst = &sk->ext_gwy;
7107 		} else {
7108 			psrc = &sk->ext_lan;
7109 			pdst = &sk->lan;
7110 		}
7111 		if (direction == sk->direction) {
7112 			if (((th->th_flags & (TH_SYN | TH_ACK)) != TH_ACK) ||
7113 			    (ntohl(th->th_ack) != src->seqhi + 1) ||
7114 			    (ntohl(th->th_seq) != src->seqlo + 1)) {
7115 				REASON_SET(reason, PFRES_SYNPROXY);
7116 				return PF_DROP;
7117 			}
7118 			src->max_win = MAX(ntohs(th->th_win), 1);
7119 			if (dst->seqhi == 1) {
7120 				dst->seqhi = htonl(random());
7121 			}
7122 			pf_send_tcp((*state)->rule.ptr, pd->af, &psrc->addr,
7123 			    &pdst->addr, psrc->xport.port, pdst->xport.port,
7124 			    dst->seqhi, 0, TH_SYN, 0,
7125 			    src->mss, 0, 0, (*state)->tag, NULL, NULL);
7126 			REASON_SET(reason, PFRES_SYNPROXY);
7127 			return PF_SYNPROXY_DROP;
7128 		} else if (((th->th_flags & (TH_SYN | TH_ACK)) !=
7129 		    (TH_SYN | TH_ACK)) ||
7130 		    (ntohl(th->th_ack) != dst->seqhi + 1)) {
7131 			REASON_SET(reason, PFRES_SYNPROXY);
7132 			return PF_DROP;
7133 		} else {
7134 			dst->max_win = MAX(ntohs(th->th_win), 1);
7135 			dst->seqlo = ntohl(th->th_seq);
7136 			pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
7137 			    pd->src, th->th_dport, th->th_sport,
7138 			    ntohl(th->th_ack), ntohl(th->th_seq) + 1,
7139 			    TH_ACK, src->max_win, 0, 0, 0,
7140 			    (*state)->tag, NULL, NULL);
7141 			pf_send_tcp((*state)->rule.ptr, pd->af, &psrc->addr,
7142 			    &pdst->addr, psrc->xport.port, pdst->xport.port,
7143 			    src->seqhi + 1, src->seqlo + 1,
7144 			    TH_ACK, dst->max_win, 0, 0, 1,
7145 			    0, NULL, NULL);
7146 			src->seqdiff = dst->seqhi -
7147 			    src->seqlo;
7148 			dst->seqdiff = src->seqhi -
7149 			    dst->seqlo;
7150 			src->seqhi = src->seqlo +
7151 			    dst->max_win;
7152 			dst->seqhi = dst->seqlo +
7153 			    src->max_win;
7154 			src->wscale = dst->wscale = 0;
7155 			src->state = dst->state =
7156 			    TCPS_ESTABLISHED;
7157 			REASON_SET(reason, PFRES_SYNPROXY);
7158 			return PF_SYNPROXY_DROP;
7159 		}
7160 	}
7161 
7162 	if (((th->th_flags & (TH_SYN | TH_ACK)) == TH_SYN) &&
7163 	    dst->state >= TCPS_FIN_WAIT_2 &&
7164 	    src->state >= TCPS_FIN_WAIT_2) {
7165 		if (pf_status.debug >= PF_DEBUG_MISC) {
7166 			printf("pf: state reuse ");
7167 			pf_print_state(*state);
7168 			pf_print_flags(th->th_flags);
7169 			printf("\n");
7170 		}
7171 		/* XXX make sure it's the same direction ?? */
7172 		src->state = dst->state = TCPS_CLOSED;
7173 		pf_unlink_state(*state);
7174 		*state = NULL;
7175 		return PF_DROP;
7176 	}
7177 
7178 	if ((th->th_flags & TH_SYN) == 0) {
7179 		sws = (src->wscale & PF_WSCALE_FLAG) ?
7180 		    (src->wscale & PF_WSCALE_MASK) : TCP_MAX_WINSHIFT;
7181 		dws = (dst->wscale & PF_WSCALE_FLAG) ?
7182 		    (dst->wscale & PF_WSCALE_MASK) : TCP_MAX_WINSHIFT;
7183 	} else {
7184 		sws = dws = 0;
7185 	}
7186 
7187 	/*
7188 	 * Sequence tracking algorithm from Guido van Rooij's paper:
7189 	 *   http://www.madison-gurkha.com/publications/tcp_filtering/
7190 	 *	tcp_filtering.ps
7191 	 */
7192 
7193 	orig_seq = seq = ntohl(th->th_seq);
7194 	if (src->seqlo == 0) {
7195 		/* First packet from this end. Set its state */
7196 
7197 		if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) &&
7198 		    src->scrub == NULL) {
7199 			if (pf_normalize_tcp_init(pbuf, off, pd, th, src, dst)) {
7200 				REASON_SET(reason, PFRES_MEMORY);
7201 				return PF_DROP;
7202 			}
7203 		}
7204 
7205 		/* Deferred generation of sequence number modulator */
7206 		if (dst->seqdiff && !src->seqdiff) {
7207 			/* use random iss for the TCP server */
7208 			while ((src->seqdiff = random() - seq) == 0) {
7209 				;
7210 			}
7211 			ack = ntohl(th->th_ack) - dst->seqdiff;
7212 			pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
7213 			    src->seqdiff), 0);
7214 			pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
7215 			copyback = off + sizeof(*th);
7216 		} else {
7217 			ack = ntohl(th->th_ack);
7218 		}
7219 
7220 		end = seq + pd->p_len;
7221 		if (th->th_flags & TH_SYN) {
7222 			end++;
7223 			if (dst->wscale & PF_WSCALE_FLAG) {
7224 				src->wscale = pf_get_wscale(pbuf, off,
7225 				    th->th_off, pd->af);
7226 				if (src->wscale & PF_WSCALE_FLAG) {
7227 					/*
7228 					 * Remove scale factor from initial
7229 					 * window
7230 					 */
7231 					sws = src->wscale & PF_WSCALE_MASK;
7232 					win = ((u_int32_t)win + (1 << sws) - 1)
7233 					    >> sws;
7234 					dws = dst->wscale & PF_WSCALE_MASK;
7235 				} else {
7236 					/*
7237 					 * Window scale negotiation has failed,
7238 					 * therefore we must restore the window
7239 					 * scale in the state record that we
7240 					 * optimistically removed in
7241 					 * pf_test_rule().  Care is required to
7242 					 * prevent arithmetic overflow from
7243 					 * zeroing the window when it's
7244 					 * truncated down to 16-bits.
7245 					 */
7246 					u_int32_t max_win = dst->max_win;
7247 					max_win <<=
7248 					    dst->wscale & PF_WSCALE_MASK;
7249 					dst->max_win = MIN(0xffff, max_win);
7250 					/* in case of a retrans SYN|ACK */
7251 					dst->wscale = 0;
7252 				}
7253 			}
7254 		}
7255 		if (th->th_flags & TH_FIN) {
7256 			end++;
7257 		}
7258 
7259 		src->seqlo = seq;
7260 		if (src->state < TCPS_SYN_SENT) {
7261 			src->state = TCPS_SYN_SENT;
7262 		}
7263 
7264 		/*
7265 		 * May need to slide the window (seqhi may have been set by
7266 		 * the crappy stack check or if we picked up the connection
7267 		 * after establishment)
7268 		 */
7269 		if (src->seqhi == 1 ||
7270 		    SEQ_GEQ(end + MAX(1, (u_int32_t)dst->max_win << dws),
7271 		    src->seqhi)) {
7272 			src->seqhi = end + MAX(1, (u_int32_t)dst->max_win << dws);
7273 		}
7274 		if (win > src->max_win) {
7275 			src->max_win = win;
7276 		}
7277 	} else {
7278 		ack = ntohl(th->th_ack) - dst->seqdiff;
7279 		if (src->seqdiff) {
7280 			/* Modulate sequence numbers */
7281 			pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
7282 			    src->seqdiff), 0);
7283 			pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
7284 			copyback = off + sizeof(*th);
7285 		}
7286 		end = seq + pd->p_len;
7287 		if (th->th_flags & TH_SYN) {
7288 			end++;
7289 		}
7290 		if (th->th_flags & TH_FIN) {
7291 			end++;
7292 		}
7293 	}
7294 
7295 	if ((th->th_flags & TH_ACK) == 0) {
7296 		/* Let it pass through the ack skew check */
7297 		ack = dst->seqlo;
7298 	} else if ((ack == 0 &&
7299 	    (th->th_flags & (TH_ACK | TH_RST)) == (TH_ACK | TH_RST)) ||
7300 	    /* broken tcp stacks do not set ack */
7301 	    (dst->state < TCPS_SYN_SENT)) {
7302 		/*
7303 		 * Many stacks (ours included) will set the ACK number in an
7304 		 * FIN|ACK if the SYN times out -- no sequence to ACK.
7305 		 */
7306 		ack = dst->seqlo;
7307 	}
7308 
7309 	if (seq == end) {
7310 		/* Ease sequencing restrictions on no data packets */
7311 		seq = src->seqlo;
7312 		end = seq;
7313 	}
7314 
7315 	ackskew = dst->seqlo - ack;
7316 
7317 
7318 	/*
7319 	 * Need to demodulate the sequence numbers in any TCP SACK options
7320 	 * (Selective ACK). We could optionally validate the SACK values
7321 	 * against the current ACK window, either forwards or backwards, but
7322 	 * I'm not confident that SACK has been implemented properly
7323 	 * everywhere. It wouldn't surprise me if several stacks accidently
7324 	 * SACK too far backwards of previously ACKed data. There really aren't
7325 	 * any security implications of bad SACKing unless the target stack
7326 	 * doesn't validate the option length correctly. Someone trying to
7327 	 * spoof into a TCP connection won't bother blindly sending SACK
7328 	 * options anyway.
7329 	 */
7330 	if (dst->seqdiff && (th->th_off << 2) > (int)sizeof(struct tcphdr)) {
7331 		copyback = pf_modulate_sack(pbuf, off, pd, th, dst);
7332 		if (copyback == -1) {
7333 			REASON_SET(reason, PFRES_MEMORY);
7334 			return PF_DROP;
7335 		}
7336 
7337 		pbuf = pd->mp;  // XXXSCW: Why?
7338 	}
7339 
7340 
7341 #define MAXACKWINDOW (0xffff + 1500)    /* 1500 is an arbitrary fudge factor */
7342 	if (SEQ_GEQ(src->seqhi, end) &&
7343 	    /* Last octet inside other's window space */
7344 	    SEQ_GEQ(seq, src->seqlo - ((u_int32_t)dst->max_win << dws)) &&
7345 	    /* Retrans: not more than one window back */
7346 	    (ackskew >= -MAXACKWINDOW) &&
7347 	    /* Acking not more than one reassembled fragment backwards */
7348 	    (ackskew <= (MAXACKWINDOW << sws)) &&
7349 	    /* Acking not more than one window forward */
7350 	    ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo ||
7351 	    (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo) ||
7352 	    (pd->flags & PFDESC_IP_REAS) == 0)) {
7353 		/* Require an exact/+1 sequence match on resets when possible */
7354 
7355 		if (dst->scrub || src->scrub) {
7356 			if (pf_normalize_tcp_stateful(pbuf, off, pd, reason, th,
7357 			    *state, src, dst, &copyback)) {
7358 				return PF_DROP;
7359 			}
7360 
7361 			pbuf = pd->mp;  // XXXSCW: Why?
7362 		}
7363 
7364 		/* update max window */
7365 		if (src->max_win < win) {
7366 			src->max_win = win;
7367 		}
7368 		/* synchronize sequencing */
7369 		if (SEQ_GT(end, src->seqlo)) {
7370 			src->seqlo = end;
7371 		}
7372 		/* slide the window of what the other end can send */
7373 		if (SEQ_GEQ(ack + ((u_int32_t)win << sws), dst->seqhi)) {
7374 			dst->seqhi = ack + MAX(((u_int32_t)win << sws), 1);
7375 		}
7376 
7377 		/* update states */
7378 		if (th->th_flags & TH_SYN) {
7379 			if (src->state < TCPS_SYN_SENT) {
7380 				src->state = TCPS_SYN_SENT;
7381 			}
7382 		}
7383 		if (th->th_flags & TH_FIN) {
7384 			if (src->state < TCPS_CLOSING) {
7385 				src->state = TCPS_CLOSING;
7386 			}
7387 		}
7388 		if (th->th_flags & TH_ACK) {
7389 			if (dst->state == TCPS_SYN_SENT) {
7390 				dst->state = TCPS_ESTABLISHED;
7391 				if (src->state == TCPS_ESTABLISHED &&
7392 				    (*state)->src_node != NULL &&
7393 				    pf_src_connlimit(state)) {
7394 					REASON_SET(reason, PFRES_SRCLIMIT);
7395 					return PF_DROP;
7396 				}
7397 			} else if (dst->state == TCPS_CLOSING) {
7398 				dst->state = TCPS_FIN_WAIT_2;
7399 			}
7400 		}
7401 		if (th->th_flags & TH_RST) {
7402 			src->state = dst->state = TCPS_TIME_WAIT;
7403 		}
7404 
7405 		/* update expire time */
7406 		(*state)->expire = pf_time_second();
7407 		if (src->state >= TCPS_FIN_WAIT_2 &&
7408 		    dst->state >= TCPS_FIN_WAIT_2) {
7409 			(*state)->timeout = PFTM_TCP_CLOSED;
7410 		} else if (src->state >= TCPS_CLOSING &&
7411 		    dst->state >= TCPS_CLOSING) {
7412 			(*state)->timeout = PFTM_TCP_FIN_WAIT;
7413 		} else if (src->state < TCPS_ESTABLISHED ||
7414 		    dst->state < TCPS_ESTABLISHED) {
7415 			(*state)->timeout = PFTM_TCP_OPENING;
7416 		} else if (src->state >= TCPS_CLOSING ||
7417 		    dst->state >= TCPS_CLOSING) {
7418 			(*state)->timeout = PFTM_TCP_CLOSING;
7419 		} else {
7420 			(*state)->timeout = PFTM_TCP_ESTABLISHED;
7421 		}
7422 
7423 		/* Fall through to PASS packet */
7424 	} else if ((dst->state < TCPS_SYN_SENT ||
7425 	    dst->state >= TCPS_FIN_WAIT_2 || src->state >= TCPS_FIN_WAIT_2) &&
7426 	    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) &&
7427 	    /* Within a window forward of the originating packet */
7428 	    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
7429 		/* Within a window backward of the originating packet */
7430 
7431 		/*
7432 		 * This currently handles three situations:
7433 		 *  1) Stupid stacks will shotgun SYNs before their peer
7434 		 *     replies.
7435 		 *  2) When PF catches an already established stream (the
7436 		 *     firewall rebooted, the state table was flushed, routes
7437 		 *     changed...)
7438 		 *  3) Packets get funky immediately after the connection
7439 		 *     closes (this should catch Solaris spurious ACK|FINs
7440 		 *     that web servers like to spew after a close)
7441 		 *
7442 		 * This must be a little more careful than the above code
7443 		 * since packet floods will also be caught here. We don't
7444 		 * update the TTL here to mitigate the damage of a packet
7445 		 * flood and so the same code can handle awkward establishment
7446 		 * and a loosened connection close.
7447 		 * In the establishment case, a correct peer response will
7448 		 * validate the connection, go through the normal state code
7449 		 * and keep updating the state TTL.
7450 		 */
7451 
7452 		if (pf_status.debug >= PF_DEBUG_MISC) {
7453 			printf("pf: loose state match: ");
7454 			pf_print_state(*state);
7455 			pf_print_flags(th->th_flags);
7456 			printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
7457 			    "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack,
7458 			    pd->p_len, ackskew, (*state)->packets[0],
7459 			    (*state)->packets[1],
7460 			    direction == PF_IN ? "in" : "out",
7461 			    direction == sk->direction ?
7462 			    "fwd" : "rev");
7463 		}
7464 
7465 		if (dst->scrub || src->scrub) {
7466 			if (pf_normalize_tcp_stateful(pbuf, off, pd, reason, th,
7467 			    *state, src, dst, &copyback)) {
7468 				return PF_DROP;
7469 			}
7470 			pbuf = pd->mp;  // XXXSCW: Why?
7471 		}
7472 
7473 		/* update max window */
7474 		if (src->max_win < win) {
7475 			src->max_win = win;
7476 		}
7477 		/* synchronize sequencing */
7478 		if (SEQ_GT(end, src->seqlo)) {
7479 			src->seqlo = end;
7480 		}
7481 		/* slide the window of what the other end can send */
7482 		if (SEQ_GEQ(ack + ((u_int32_t)win << sws), dst->seqhi)) {
7483 			dst->seqhi = ack + MAX(((u_int32_t)win << sws), 1);
7484 		}
7485 
7486 		/*
7487 		 * Cannot set dst->seqhi here since this could be a shotgunned
7488 		 * SYN and not an already established connection.
7489 		 */
7490 
7491 		if (th->th_flags & TH_FIN) {
7492 			if (src->state < TCPS_CLOSING) {
7493 				src->state = TCPS_CLOSING;
7494 			}
7495 		}
7496 		if (th->th_flags & TH_RST) {
7497 			src->state = dst->state = TCPS_TIME_WAIT;
7498 		}
7499 
7500 		/* Fall through to PASS packet */
7501 	} else {
7502 		if (dst->state == TCPS_SYN_SENT &&
7503 		    src->state == TCPS_SYN_SENT) {
7504 			/* Send RST for state mismatches during handshake */
7505 			if (!(th->th_flags & TH_RST)) {
7506 				pf_send_tcp((*state)->rule.ptr, pd->af,
7507 				    pd->dst, pd->src, th->th_dport,
7508 				    th->th_sport, ntohl(th->th_ack), 0,
7509 				    TH_RST, 0, 0,
7510 				    (*state)->rule.ptr->return_ttl, 1, 0,
7511 				    pd->eh, kif->pfik_ifp);
7512 			}
7513 			src->seqlo = 0;
7514 			src->seqhi = 1;
7515 			src->max_win = 1;
7516 		} else if (pf_status.debug >= PF_DEBUG_MISC) {
7517 			printf("pf: BAD state: ");
7518 			pf_print_state(*state);
7519 			pf_print_flags(th->th_flags);
7520 			printf("\n   seq=%u (%u) ack=%u len=%u ackskew=%d "
7521 			    "sws=%u dws=%u pkts=%llu:%llu dir=%s,%s\n",
7522 			    seq, orig_seq, ack, pd->p_len, ackskew,
7523 			    (unsigned int)sws, (unsigned int)dws,
7524 			    (*state)->packets[0], (*state)->packets[1],
7525 			    direction == PF_IN ? "in" : "out",
7526 			    direction == sk->direction ?
7527 			    "fwd" : "rev");
7528 			printf("pf: State failure on: %c %c %c %c | %c %c\n",
7529 			    SEQ_GEQ(src->seqhi, end) ? ' ' : '1',
7530 			    SEQ_GEQ(seq,
7531 			    src->seqlo - ((u_int32_t)dst->max_win << dws)) ?
7532 			    ' ': '2',
7533 			    (ackskew >= -MAXACKWINDOW) ? ' ' : '3',
7534 			    (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
7535 			    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5',
7536 			    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6');
7537 		}
7538 		REASON_SET(reason, PFRES_BADSTATE);
7539 		return PF_DROP;
7540 	}
7541 
7542 	/* Any packets which have gotten here are to be passed */
7543 
7544 	if (sk->app_state &&
7545 	    sk->app_state->handler) {
7546 		sk->app_state->handler(*state, direction,
7547 		    off + (th->th_off << 2), pd, kif);
7548 		if (pd->lmw < 0) {
7549 			REASON_SET(reason, PFRES_MEMORY);
7550 			return PF_DROP;
7551 		}
7552 		pbuf = pd->mp;  // XXXSCW: Why?
7553 	}
7554 
7555 	/* translate source/destination address, if necessary */
7556 	if (STATE_TRANSLATE(sk)) {
7557 		pd->naf = (pd->af == sk->af_lan) ? sk->af_gwy : sk->af_lan;
7558 
7559 		if (direction == PF_OUT) {
7560 			pf_change_ap(direction, pd->mp, pd->src, &th->th_sport,
7561 			    pd->ip_sum, &th->th_sum, &sk->gwy.addr,
7562 			    sk->gwy.xport.port, 0, pd->af, pd->naf, 1);
7563 		} else {
7564 			if (pd->af != pd->naf) {
7565 				if (pd->af == sk->af_gwy) {
7566 					pf_change_ap(direction, pd->mp, pd->dst,
7567 					    &th->th_dport, pd->ip_sum,
7568 					    &th->th_sum, &sk->lan.addr,
7569 					    sk->lan.xport.port, 0,
7570 					    pd->af, pd->naf, 0);
7571 
7572 					pf_change_ap(direction, pd->mp, pd->src,
7573 					    &th->th_sport, pd->ip_sum,
7574 					    &th->th_sum, &sk->ext_lan.addr,
7575 					    th->th_sport, 0, pd->af,
7576 					    pd->naf, 0);
7577 				} else {
7578 					pf_change_ap(direction, pd->mp, pd->dst,
7579 					    &th->th_dport, pd->ip_sum,
7580 					    &th->th_sum, &sk->ext_gwy.addr,
7581 					    th->th_dport, 0, pd->af,
7582 					    pd->naf, 0);
7583 
7584 					pf_change_ap(direction, pd->mp, pd->src,
7585 					    &th->th_sport, pd->ip_sum,
7586 					    &th->th_sum, &sk->gwy.addr,
7587 					    sk->gwy.xport.port, 0, pd->af,
7588 					    pd->naf, 0);
7589 				}
7590 			} else {
7591 				pf_change_ap(direction, pd->mp, pd->dst,
7592 				    &th->th_dport, pd->ip_sum,
7593 				    &th->th_sum, &sk->lan.addr,
7594 				    sk->lan.xport.port, 0, pd->af,
7595 				    pd->naf, 1);
7596 			}
7597 		}
7598 
7599 		copyback = off + sizeof(*th);
7600 	}
7601 
7602 	if (copyback) {
7603 		if (pf_lazy_makewritable(pd, pbuf, copyback) == NULL) {
7604 			REASON_SET(reason, PFRES_MEMORY);
7605 			return PF_DROP;
7606 		}
7607 
7608 		/* Copyback sequence modulation or stateful scrub changes */
7609 		pbuf_copy_back(pbuf, off, sizeof(*th), th, sizeof(*th));
7610 
7611 		if (sk->af_lan != sk->af_gwy) {
7612 			return pf_do_nat64(sk, pd, pbuf, off);
7613 		}
7614 	}
7615 	return PF_PASS;
7616 }
7617 
7618 static __attribute__((noinline)) int
pf_test_state_udp(struct pf_state ** state,int direction,struct pfi_kif * kif,pbuf_t * pbuf,int off,void * h,struct pf_pdesc * pd,u_short * reason)7619 pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
7620     pbuf_t *pbuf, int off, void *h, struct pf_pdesc *pd, u_short *reason)
7621 {
7622 #pragma unused(h)
7623 	struct pf_state_peer    *__single src, *__single dst;
7624 	struct pf_state_key_cmp  key;
7625 	struct pf_state_key     *__single sk;
7626 	struct udphdr           *__single uh = pf_pd_get_hdr_udp(pd);
7627 	struct pf_app_state as;
7628 	int action, extfilter;
7629 	key.app_state = 0;
7630 	key.proto_variant = PF_EXTFILTER_APD;
7631 
7632 	key.proto = IPPROTO_UDP;
7633 	key.af_lan = key.af_gwy = pd->af;
7634 
7635 	/*
7636 	 * For NAT64 the first time rule search and state creation
7637 	 * is done on the incoming side only.
7638 	 * Once the state gets created, NAT64's LAN side (ipv6) will
7639 	 * not be able to find the state in ext-gwy tree as that normally
7640 	 * is intended to be looked up for incoming traffic from the
7641 	 * WAN side.
7642 	 * Therefore to handle NAT64 case we init keys here for both
7643 	 * lan-ext as well as ext-gwy trees.
7644 	 * In the state lookup we attempt a lookup on both trees if
7645 	 * first one does not return any result and return a match if
7646 	 * the match state's was created by NAT64 rule.
7647 	 */
7648 	PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy);
7649 	PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy);
7650 	key.ext_gwy.xport.port = uh->uh_sport;
7651 	key.gwy.xport.port = uh->uh_dport;
7652 
7653 	PF_ACPY(&key.lan.addr, pd->src, key.af_lan);
7654 	PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan);
7655 	key.lan.xport.port = uh->uh_sport;
7656 	key.ext_lan.xport.port = uh->uh_dport;
7657 
7658 	if (ntohs(uh->uh_sport) == PF_IKE_PORT &&
7659 	    ntohs(uh->uh_dport) == PF_IKE_PORT) {
7660 		struct pf_ike_hdr ike;
7661 		size_t plen = pbuf->pb_packet_len - off - sizeof(*uh);
7662 		if (plen < PF_IKE_PACKET_MINSIZE) {
7663 			DPFPRINTF(PF_DEBUG_MISC,
7664 			    ("pf: IKE message too small.\n"));
7665 			return PF_DROP;
7666 		}
7667 
7668 		if (plen > sizeof(ike)) {
7669 			plen = sizeof(ike);
7670 		}
7671 		pbuf_copy_data(pbuf, off + sizeof(*uh), plen, &ike, sizeof(ike));
7672 
7673 		if (ike.initiator_cookie) {
7674 			key.app_state = &as;
7675 			as.compare_lan_ext = pf_ike_compare;
7676 			as.compare_ext_gwy = pf_ike_compare;
7677 			as.u.ike.cookie = ike.initiator_cookie;
7678 		} else {
7679 			/*
7680 			 * <http://tools.ietf.org/html/\
7681 			 *    draft-ietf-ipsec-nat-t-ike-01>
7682 			 * Support non-standard NAT-T implementations that
7683 			 * push the ESP packet over the top of the IKE packet.
7684 			 * Do not drop packet.
7685 			 */
7686 			DPFPRINTF(PF_DEBUG_MISC,
7687 			    ("pf: IKE initiator cookie = 0.\n"));
7688 		}
7689 	}
7690 
7691 	*state = pf_find_state(kif, &key, direction);
7692 
7693 	if (!key.app_state && *state == 0) {
7694 		key.proto_variant = PF_EXTFILTER_AD;
7695 		*state = pf_find_state(kif, &key, direction);
7696 	}
7697 
7698 	if (!key.app_state && *state == 0) {
7699 		key.proto_variant = PF_EXTFILTER_EI;
7700 		*state = pf_find_state(kif, &key, direction);
7701 	}
7702 
7703 	/* similar to STATE_LOOKUP() */
7704 	if (*state != NULL && pd != NULL && !(pd->pktflags & PKTF_FLOW_ID)) {
7705 		pd->flowsrc = (*state)->state_key->flowsrc;
7706 		pd->flowhash = (*state)->state_key->flowhash;
7707 		if (pd->flowhash != 0) {
7708 			pd->pktflags |= PKTF_FLOW_ID;
7709 			pd->pktflags &= ~PKTF_FLOW_ADV;
7710 		}
7711 	}
7712 
7713 	if (pf_state_lookup_aux(state, kif, direction, &action)) {
7714 		return action;
7715 	}
7716 
7717 	sk = (*state)->state_key;
7718 
7719 	/*
7720 	 * In case of NAT64 the translation is first applied on the LAN
7721 	 * side. Therefore for stack's address family comparison
7722 	 * we use sk->af_lan.
7723 	 */
7724 	if ((direction == sk->direction) && (pd->af == sk->af_lan)) {
7725 		src = &(*state)->src;
7726 		dst = &(*state)->dst;
7727 	} else {
7728 		src = &(*state)->dst;
7729 		dst = &(*state)->src;
7730 	}
7731 
7732 	/* update states */
7733 	if (src->state < PFUDPS_SINGLE) {
7734 		src->state = PFUDPS_SINGLE;
7735 	}
7736 	if (dst->state == PFUDPS_SINGLE) {
7737 		dst->state = PFUDPS_MULTIPLE;
7738 	}
7739 
7740 	/* update expire time */
7741 	(*state)->expire = pf_time_second();
7742 	if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE) {
7743 		(*state)->timeout = PFTM_UDP_MULTIPLE;
7744 	} else {
7745 		(*state)->timeout = PFTM_UDP_SINGLE;
7746 	}
7747 
7748 	extfilter = sk->proto_variant;
7749 	if (extfilter > PF_EXTFILTER_APD) {
7750 		if (direction == PF_OUT) {
7751 			sk->ext_lan.xport.port = key.ext_lan.xport.port;
7752 			if (extfilter > PF_EXTFILTER_AD) {
7753 				PF_ACPY(&sk->ext_lan.addr, &key.ext_lan.addr,
7754 				    key.af_lan);
7755 			}
7756 		} else {
7757 			sk->ext_gwy.xport.port = key.ext_gwy.xport.port;
7758 			if (extfilter > PF_EXTFILTER_AD) {
7759 				PF_ACPY(&sk->ext_gwy.addr, &key.ext_gwy.addr,
7760 				    key.af_gwy);
7761 			}
7762 		}
7763 	}
7764 
7765 	if (sk->app_state && sk->app_state->handler) {
7766 		sk->app_state->handler(*state, direction, off + uh->uh_ulen,
7767 		    pd, kif);
7768 		if (pd->lmw < 0) {
7769 			REASON_SET(reason, PFRES_MEMORY);
7770 			return PF_DROP;
7771 		}
7772 		pbuf = pd->mp;  // XXXSCW: Why?
7773 	}
7774 
7775 	/* translate source/destination address, if necessary */
7776 	if (STATE_TRANSLATE(sk)) {
7777 		if (pf_lazy_makewritable(pd, pbuf, off + sizeof(*uh)) == NULL) {
7778 			REASON_SET(reason, PFRES_MEMORY);
7779 			return PF_DROP;
7780 		}
7781 
7782 		pd->naf = (pd->af == sk->af_lan) ? sk->af_gwy : sk->af_lan;
7783 
7784 		if (direction == PF_OUT) {
7785 			pf_change_ap(direction, pd->mp, pd->src, &uh->uh_sport,
7786 			    pd->ip_sum, &uh->uh_sum, &sk->gwy.addr,
7787 			    sk->gwy.xport.port, 1, pd->af, pd->naf, 1);
7788 		} else {
7789 			if (pd->af != pd->naf) {
7790 				if (pd->af == sk->af_gwy) {
7791 					pf_change_ap(direction, pd->mp, pd->dst,
7792 					    &uh->uh_dport, pd->ip_sum,
7793 					    &uh->uh_sum, &sk->lan.addr,
7794 					    sk->lan.xport.port, 1,
7795 					    pd->af, pd->naf, 0);
7796 
7797 					pf_change_ap(direction, pd->mp, pd->src,
7798 					    &uh->uh_sport, pd->ip_sum,
7799 					    &uh->uh_sum, &sk->ext_lan.addr,
7800 					    uh->uh_sport, 1, pd->af,
7801 					    pd->naf, 0);
7802 				} else {
7803 					pf_change_ap(direction, pd->mp, pd->dst,
7804 					    &uh->uh_dport, pd->ip_sum,
7805 					    &uh->uh_sum, &sk->ext_gwy.addr,
7806 					    uh->uh_dport, 1, pd->af,
7807 					    pd->naf, 0);
7808 
7809 					pf_change_ap(direction, pd->mp, pd->src,
7810 					    &uh->uh_sport, pd->ip_sum,
7811 					    &uh->uh_sum, &sk->gwy.addr,
7812 					    sk->gwy.xport.port, 1, pd->af,
7813 					    pd->naf, 0);
7814 				}
7815 			} else {
7816 				pf_change_ap(direction, pd->mp, pd->dst,
7817 				    &uh->uh_dport, pd->ip_sum,
7818 				    &uh->uh_sum, &sk->lan.addr,
7819 				    sk->lan.xport.port, 1,
7820 				    pd->af, pd->naf, 1);
7821 			}
7822 		}
7823 
7824 		pbuf_copy_back(pbuf, off, sizeof(*uh), uh, sizeof(*uh));
7825 		if (sk->af_lan != sk->af_gwy) {
7826 			return pf_do_nat64(sk, pd, pbuf, off);
7827 		}
7828 	}
7829 	return PF_PASS;
7830 }
7831 
7832 static u_int32_t
pf_compute_packet_icmp_gencnt(uint32_t af,u_int32_t type,u_int32_t code)7833 pf_compute_packet_icmp_gencnt(uint32_t af, u_int32_t type, u_int32_t code)
7834 {
7835 	if (af == PF_INET) {
7836 		if (type != ICMP_UNREACH && type != ICMP_TIMXCEED) {
7837 			return 0;
7838 		}
7839 	} else {
7840 		if (type != ICMP6_DST_UNREACH && type != ICMP6_PARAM_PROB &&
7841 		    type != ICMP6_TIME_EXCEEDED) {
7842 			return 0;
7843 		}
7844 	}
7845 	return (af << 24) | (type << 16) | (code << 8);
7846 }
7847 
7848 
7849 static __attribute__((noinline)) int
pf_test_state_icmp(struct pf_state ** state,int direction,struct pfi_kif * kif,pbuf_t * pbuf,int off,void * h,struct pf_pdesc * pd,u_short * reason)7850 pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
7851     pbuf_t *pbuf, int off, void *h, struct pf_pdesc *pd, u_short *reason)
7852 {
7853 #pragma unused(h)
7854 	struct pf_addr  *__single saddr = pd->src, *__single daddr = pd->dst;
7855 	struct in_addr  srcv4_inaddr = saddr->v4addr;
7856 	u_int16_t        icmpid = 0, *__single icmpsum = NULL;
7857 	u_int8_t         icmptype = 0;
7858 	u_int32_t        icmpcode = 0;
7859 	int              state_icmp = 0;
7860 	struct pf_state_key_cmp key;
7861 	struct pf_state_key     *__single sk;
7862 
7863 	struct pf_app_state as;
7864 	key.app_state = 0;
7865 
7866 	pd->off = off;
7867 
7868 	switch (pd->proto) {
7869 #if INET
7870 	case IPPROTO_ICMP:
7871 		icmptype = pf_pd_get_hdr_icmp(pd)->icmp_type;
7872 		icmpid = pf_pd_get_hdr_icmp(pd)->icmp_id;
7873 		icmpsum = &pf_pd_get_hdr_icmp(pd)->icmp_cksum;
7874 		icmpcode = pf_pd_get_hdr_icmp(pd)->icmp_code;
7875 
7876 		if (ICMP_ERRORTYPE(icmptype)) {
7877 			state_icmp++;
7878 		}
7879 		break;
7880 #endif /* INET */
7881 	case IPPROTO_ICMPV6:
7882 		icmptype = pf_pd_get_hdr_icmp6(pd)->icmp6_type;
7883 		icmpid = pf_pd_get_hdr_icmp6(pd)->icmp6_id;
7884 		icmpsum = &pf_pd_get_hdr_icmp6(pd)->icmp6_cksum;
7885 		icmpcode = pf_pd_get_hdr_icmp6(pd)->icmp6_code;
7886 
7887 		if (ICMP6_ERRORTYPE(icmptype)) {
7888 			state_icmp++;
7889 		}
7890 		break;
7891 	}
7892 
7893 	if (pbuf != NULL && pbuf->pb_flow_gencnt != NULL &&
7894 	    *pbuf->pb_flow_gencnt == 0) {
7895 		u_int32_t af = pd->proto == IPPROTO_ICMP ? PF_INET : PF_INET6;
7896 		*pbuf->pb_flow_gencnt = pf_compute_packet_icmp_gencnt(af, icmptype, icmpcode);
7897 	}
7898 
7899 	if (!state_icmp) {
7900 		/*
7901 		 * ICMP query/reply message not related to a TCP/UDP packet.
7902 		 * Search for an ICMP state.
7903 		 */
7904 		/*
7905 		 * NAT64 requires protocol translation  between ICMPv4
7906 		 * and ICMPv6. TCP and UDP do not require protocol
7907 		 * translation. To avoid adding complexity just to
7908 		 * handle ICMP(v4addr/v6addr), we always lookup  for
7909 		 * proto = IPPROTO_ICMP on both LAN and WAN side
7910 		 */
7911 		key.proto = IPPROTO_ICMP;
7912 		key.af_lan = key.af_gwy = pd->af;
7913 
7914 		PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy);
7915 		PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy);
7916 		key.ext_gwy.xport.port = 0;
7917 		key.gwy.xport.port = icmpid;
7918 
7919 		PF_ACPY(&key.lan.addr, pd->src, key.af_lan);
7920 		PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan);
7921 		key.lan.xport.port = icmpid;
7922 		key.ext_lan.xport.port = 0;
7923 
7924 		STATE_LOOKUP();
7925 
7926 		sk = (*state)->state_key;
7927 		(*state)->expire = pf_time_second();
7928 		(*state)->timeout = PFTM_ICMP_ERROR_REPLY;
7929 
7930 		/* translate source/destination address, if necessary */
7931 		if (STATE_TRANSLATE(sk)) {
7932 			pd->naf = (pd->af == sk->af_lan) ?
7933 			    sk->af_gwy : sk->af_lan;
7934 			if (direction == PF_OUT) {
7935 				switch (pd->af) {
7936 #if INET
7937 				case AF_INET:
7938 					pf_change_a(&saddr->v4addr.s_addr,
7939 					    pd->ip_sum,
7940 					    sk->gwy.addr.v4addr.s_addr, 0);
7941 					pf_pd_get_hdr_icmp(pd)->icmp_cksum =
7942 					    pf_cksum_fixup(
7943 						pf_pd_get_hdr_icmp(pd)->icmp_cksum, icmpid,
7944 						sk->gwy.xport.port, 0);
7945 					pf_pd_get_hdr_icmp(pd)->icmp_id =
7946 					    sk->gwy.xport.port;
7947 					if (pf_lazy_makewritable(pd, pbuf,
7948 					    off + ICMP_MINLEN) == NULL) {
7949 						return PF_DROP;
7950 					}
7951 					pbuf_copy_back(pbuf, off, ICMP_MINLEN,
7952 					    pf_pd_get_hdr_ptr_icmp(pd), sizeof(struct icmp));
7953 					break;
7954 #endif /* INET */
7955 				case AF_INET6:
7956 					pf_change_a6(saddr,
7957 					    &pf_pd_get_hdr_icmp6(pd)->icmp6_cksum,
7958 					    &sk->gwy.addr, 0);
7959 					if (pf_lazy_makewritable(pd, pbuf,
7960 					    off + sizeof(struct icmp6_hdr)) ==
7961 					    NULL) {
7962 						return PF_DROP;
7963 					}
7964 					pbuf_copy_back(pbuf, off,
7965 					    sizeof(struct icmp6_hdr),
7966 					    pf_pd_get_hdr_ptr_icmp6(pd), sizeof(struct icmp6_hdr));
7967 					break;
7968 				}
7969 			} else {
7970 				switch (pd->af) {
7971 #if INET
7972 				case AF_INET:
7973 					if (pd->naf != AF_INET) {
7974 						if (pf_translate_icmp_af(
7975 							    AF_INET6, pf_pd_get_hdr_icmp(pd))) {
7976 							return PF_DROP;
7977 						}
7978 
7979 						pd->proto = IPPROTO_ICMPV6;
7980 					} else {
7981 						pf_change_a(&daddr->v4addr.s_addr,
7982 						    pd->ip_sum,
7983 						    sk->lan.addr.v4addr.s_addr, 0);
7984 
7985 						pf_pd_get_hdr_icmp(pd)->icmp_cksum =
7986 						    pf_cksum_fixup(
7987 							pf_pd_get_hdr_icmp(pd)->icmp_cksum,
7988 							icmpid, sk->lan.xport.port, 0);
7989 
7990 						pf_pd_get_hdr_icmp(pd)->icmp_id =
7991 						    sk->lan.xport.port;
7992 					}
7993 
7994 					if (pf_lazy_makewritable(pd, pbuf,
7995 					    off + ICMP_MINLEN) == NULL) {
7996 						return PF_DROP;
7997 					}
7998 					pbuf_copy_back(pbuf, off, ICMP_MINLEN,
7999 					    pf_pd_get_hdr_ptr_icmp(pd), sizeof(struct icmp));
8000 					if (sk->af_lan != sk->af_gwy) {
8001 						return pf_do_nat64(sk, pd,
8002 						           pbuf, off);
8003 					}
8004 					break;
8005 #endif /* INET */
8006 				case AF_INET6:
8007 					if (pd->naf != AF_INET6) {
8008 						if (pf_translate_icmp_af(
8009 							    AF_INET, pf_pd_get_hdr_icmp6(pd))) {
8010 							return PF_DROP;
8011 						}
8012 
8013 						pd->proto = IPPROTO_ICMP;
8014 					} else {
8015 						pf_change_a6(daddr,
8016 						    &pf_pd_get_hdr_icmp6(pd)->icmp6_cksum,
8017 						    &sk->lan.addr, 0);
8018 					}
8019 					if (pf_lazy_makewritable(pd, pbuf,
8020 					    off + sizeof(struct icmp6_hdr)) ==
8021 					    NULL) {
8022 						return PF_DROP;
8023 					}
8024 					pbuf_copy_back(pbuf, off,
8025 					    sizeof(struct icmp6_hdr),
8026 					    pf_pd_get_hdr_ptr_icmp6(pd), sizeof(struct icmp6_hdr));
8027 					if (sk->af_lan != sk->af_gwy) {
8028 						return pf_do_nat64(sk, pd,
8029 						           pbuf, off);
8030 					}
8031 					break;
8032 				}
8033 			}
8034 		}
8035 
8036 		return PF_PASS;
8037 	} else {
8038 		/*
8039 		 * ICMP error message in response to a TCP/UDP packet.
8040 		 * Extract the inner TCP/UDP header and search for that state.
8041 		 */
8042 		struct pf_pdesc pd2; /* For inner (original) header */
8043 #if INET
8044 		struct ip       h2;
8045 #endif /* INET */
8046 		struct ip6_hdr  h2_6;
8047 		int             terminal = 0;
8048 		int             ipoff2 = 0;
8049 		int             off2 = 0;
8050 
8051 		memset(&pd2, 0, sizeof(pd2));
8052 
8053 		pd2.af = pd->af;
8054 		switch (pd->af) {
8055 #if INET
8056 		case AF_INET:
8057 			/* offset of h2 in mbuf chain */
8058 			ipoff2 = off + ICMP_MINLEN;
8059 
8060 			if (!pf_pull_hdr(pbuf, ipoff2, &h2, sizeof(h2), sizeof(h2),
8061 			    NULL, reason, pd2.af)) {
8062 				DPFPRINTF(PF_DEBUG_MISC,
8063 				    ("pf: ICMP error message too short "
8064 				    "(ip)\n"));
8065 				return PF_DROP;
8066 			}
8067 			/*
8068 			 * ICMP error messages don't refer to non-first
8069 			 * fragments
8070 			 */
8071 			if (h2.ip_off & htons(IP_OFFMASK)) {
8072 				REASON_SET(reason, PFRES_FRAG);
8073 				return PF_DROP;
8074 			}
8075 
8076 			/* offset of protocol header that follows h2 */
8077 			off2 = ipoff2 + (h2.ip_hl << 2);
8078 			/* TODO */
8079 			pd2.off = ipoff2 + (h2.ip_hl << 2);
8080 
8081 			pd2.proto = h2.ip_p;
8082 			pd2.src = (struct pf_addr *)&h2.ip_src;
8083 			pd2.dst = (struct pf_addr *)&h2.ip_dst;
8084 			pd2.ip_sum = &h2.ip_sum;
8085 			break;
8086 #endif /* INET */
8087 		case AF_INET6:
8088 			ipoff2 = off + sizeof(struct icmp6_hdr);
8089 
8090 			if (!pf_pull_hdr(pbuf, ipoff2, &h2_6, sizeof(h2_6), sizeof(h2_6),
8091 			    NULL, reason, pd2.af)) {
8092 				DPFPRINTF(PF_DEBUG_MISC,
8093 				    ("pf: ICMP error message too short "
8094 				    "(ip6)\n"));
8095 				return PF_DROP;
8096 			}
8097 			pd2.proto = h2_6.ip6_nxt;
8098 			pd2.src = (struct pf_addr *)(void *)&h2_6.ip6_src;
8099 			pd2.dst = (struct pf_addr *)(void *)&h2_6.ip6_dst;
8100 			pd2.ip_sum = NULL;
8101 			off2 = ipoff2 + sizeof(h2_6);
8102 			do {
8103 				switch (pd2.proto) {
8104 				case IPPROTO_FRAGMENT:
8105 					/*
8106 					 * ICMPv6 error messages for
8107 					 * non-first fragments
8108 					 */
8109 					REASON_SET(reason, PFRES_FRAG);
8110 					return PF_DROP;
8111 				case IPPROTO_AH:
8112 				case IPPROTO_HOPOPTS:
8113 				case IPPROTO_ROUTING:
8114 				case IPPROTO_DSTOPTS: {
8115 					/* get next header and header length */
8116 					struct ip6_ext opt6;
8117 
8118 					if (!pf_pull_hdr(pbuf, off2, &opt6, sizeof(opt6),
8119 					    sizeof(opt6), NULL, reason,
8120 					    pd2.af)) {
8121 						DPFPRINTF(PF_DEBUG_MISC,
8122 						    ("pf: ICMPv6 short opt\n"));
8123 						return PF_DROP;
8124 					}
8125 					if (pd2.proto == IPPROTO_AH) {
8126 						off2 += (opt6.ip6e_len + 2) * 4;
8127 					} else {
8128 						off2 += (opt6.ip6e_len + 1) * 8;
8129 					}
8130 					pd2.proto = opt6.ip6e_nxt;
8131 					/* goto the next header */
8132 					break;
8133 				}
8134 				default:
8135 					terminal++;
8136 					break;
8137 				}
8138 			} while (!terminal);
8139 			/* TODO */
8140 			pd2.off = ipoff2;
8141 			break;
8142 		}
8143 
8144 		switch (pd2.proto) {
8145 		case IPPROTO_TCP: {
8146 			struct tcphdr            th;
8147 			u_int32_t                seq;
8148 			struct pf_state_peer    *src, *dst;
8149 			u_int8_t                 dws;
8150 			int                      copyback = 0;
8151 
8152 			/*
8153 			 * Only the first 8 bytes of the TCP header can be
8154 			 * expected. Don't access any TCP header fields after
8155 			 * th_seq, an ackskew test is not possible.
8156 			 */
8157 			if (!pf_pull_hdr(pbuf, off2, &th, sizeof(th), 8, NULL, reason,
8158 			    pd2.af)) {
8159 				DPFPRINTF(PF_DEBUG_MISC,
8160 				    ("pf: ICMP error message too short "
8161 				    "(tcp)\n"));
8162 				return PF_DROP;
8163 			}
8164 
8165 			key.proto = IPPROTO_TCP;
8166 			key.af_gwy = pd2.af;
8167 			PF_ACPY(&key.ext_gwy.addr, pd2.dst, key.af_gwy);
8168 			PF_ACPY(&key.gwy.addr, pd2.src, key.af_gwy);
8169 			key.ext_gwy.xport.port = th.th_dport;
8170 			key.gwy.xport.port = th.th_sport;
8171 
8172 			key.af_lan = pd2.af;
8173 			PF_ACPY(&key.lan.addr, pd2.dst, key.af_lan);
8174 			PF_ACPY(&key.ext_lan.addr, pd2.src, key.af_lan);
8175 			key.lan.xport.port = th.th_dport;
8176 			key.ext_lan.xport.port = th.th_sport;
8177 
8178 			STATE_LOOKUP();
8179 
8180 			sk = (*state)->state_key;
8181 			if ((direction == sk->direction) &&
8182 			    ((sk->af_lan == sk->af_gwy) ||
8183 			    (pd2.af == sk->af_lan))) {
8184 				src = &(*state)->dst;
8185 				dst = &(*state)->src;
8186 			} else {
8187 				src = &(*state)->src;
8188 				dst = &(*state)->dst;
8189 			}
8190 
8191 			if (src->wscale && (dst->wscale & PF_WSCALE_FLAG)) {
8192 				dws = dst->wscale & PF_WSCALE_MASK;
8193 			} else {
8194 				dws = TCP_MAX_WINSHIFT;
8195 			}
8196 
8197 			/* Demodulate sequence number */
8198 			seq = ntohl(th.th_seq) - src->seqdiff;
8199 			if (src->seqdiff) {
8200 				pf_change_a(&th.th_seq, icmpsum,
8201 				    htonl(seq), 0);
8202 				copyback = 1;
8203 			}
8204 
8205 			if (!SEQ_GEQ(src->seqhi, seq) ||
8206 			    !SEQ_GEQ(seq,
8207 			    src->seqlo - ((u_int32_t)dst->max_win << dws))) {
8208 				if (pf_status.debug >= PF_DEBUG_MISC) {
8209 					printf("pf: BAD ICMP %d:%d ",
8210 					    icmptype, pf_pd_get_hdr_icmp(pd)->icmp_code);
8211 					pf_print_host(pd->src, 0, pd->af);
8212 					printf(" -> ");
8213 					pf_print_host(pd->dst, 0, pd->af);
8214 					printf(" state: ");
8215 					pf_print_state(*state);
8216 					printf(" seq=%u\n", seq);
8217 				}
8218 				REASON_SET(reason, PFRES_BADSTATE);
8219 				return PF_DROP;
8220 			}
8221 
8222 			pd->naf = pd2.naf = (pd2.af == sk->af_lan) ?
8223 			    sk->af_gwy : sk->af_lan;
8224 
8225 			if (STATE_TRANSLATE(sk)) {
8226 				/* NAT64 case */
8227 				if (sk->af_lan != sk->af_gwy) {
8228 					struct pf_state_host *saddr2, *daddr2;
8229 
8230 					if (pd2.naf == sk->af_lan) {
8231 						saddr2 = &sk->lan;
8232 						daddr2 = &sk->ext_lan;
8233 					} else {
8234 						saddr2 = &sk->ext_gwy;
8235 						daddr2 = &sk->gwy;
8236 					}
8237 
8238 					/* translate ICMP message types and codes */
8239 					if (pf_translate_icmp_af(pd->naf,
8240 					    pf_pd_get_hdr_icmp(pd))) {
8241 						return PF_DROP;
8242 					}
8243 
8244 					if (pf_lazy_makewritable(pd, pbuf,
8245 					    off2 + 8) == NULL) {
8246 						return PF_DROP;
8247 					}
8248 
8249 					pbuf_copy_back(pbuf, pd->off,
8250 					    sizeof(struct icmp6_hdr),
8251 					    pf_pd_get_hdr_ptr_icmp6(pd), pd->hdrmaxlen);
8252 
8253 					/*
8254 					 * translate inner ip header within the
8255 					 * ICMP message
8256 					 */
8257 					if (pf_change_icmp_af(pbuf, ipoff2, pd,
8258 					    &pd2, &saddr2->addr, &daddr2->addr,
8259 					    pd->af, pd->naf)) {
8260 						return PF_DROP;
8261 					}
8262 
8263 					if (pd->naf == AF_INET) {
8264 						pd->proto = IPPROTO_ICMP;
8265 					} else {
8266 						pd->proto = IPPROTO_ICMPV6;
8267 					}
8268 
8269 					/*
8270 					 * translate inner tcp header within
8271 					 * the ICMP message
8272 					 */
8273 					pf_change_ap(direction, NULL, pd2.src,
8274 					    &th.th_sport, pd2.ip_sum,
8275 					    &th.th_sum, &daddr2->addr,
8276 					    saddr2->xport.port, 0, pd2.af,
8277 					    pd2.naf, 0);
8278 
8279 					pf_change_ap(direction, NULL, pd2.dst,
8280 					    &th.th_dport, pd2.ip_sum,
8281 					    &th.th_sum, &saddr2->addr,
8282 					    daddr2->xport.port, 0, pd2.af,
8283 					    pd2.naf, 0);
8284 
8285 					pbuf_copy_back(pbuf, pd2.off, 8, &th, sizeof(th));
8286 
8287 					/* translate outer ip header */
8288 					PF_ACPY(&pd->naddr, &daddr2->addr,
8289 					    pd->naf);
8290 					PF_ACPY(&pd->ndaddr, &saddr2->addr,
8291 					    pd->naf);
8292 					if (pd->af == AF_INET) {
8293 						memcpy(&pd->naddr.addr32[3],
8294 						    &srcv4_inaddr,
8295 						    sizeof(pd->naddr.addr32[3]));
8296 						return pf_nat64_ipv4(pbuf, off,
8297 						           pd);
8298 					} else {
8299 						return pf_nat64_ipv6(pbuf, off,
8300 						           pd);
8301 					}
8302 				}
8303 				if (direction == PF_IN) {
8304 					pf_change_icmp(pd2.src, &th.th_sport,
8305 					    daddr, &sk->lan.addr,
8306 					    sk->lan.xport.port, NULL,
8307 					    pd2.ip_sum, icmpsum,
8308 					    pd->ip_sum, 0, pd2.af);
8309 				} else {
8310 					pf_change_icmp(pd2.dst, &th.th_dport,
8311 					    saddr, &sk->gwy.addr,
8312 					    sk->gwy.xport.port, NULL,
8313 					    pd2.ip_sum, icmpsum,
8314 					    pd->ip_sum, 0, pd2.af);
8315 				}
8316 				copyback = 1;
8317 			}
8318 
8319 			if (copyback) {
8320 				if (pf_lazy_makewritable(pd, pbuf, off2 + 8) ==
8321 				    NULL) {
8322 					return PF_DROP;
8323 				}
8324 				switch (pd2.af) {
8325 #if INET
8326 				case AF_INET:
8327 					pbuf_copy_back(pbuf, off, ICMP_MINLEN,
8328 					    pf_pd_get_hdr_ptr_icmp(pd), pd->hdrmaxlen);
8329 					pbuf_copy_back(pbuf, ipoff2, sizeof(h2),
8330 					    &h2, sizeof(h2));
8331 					break;
8332 #endif /* INET */
8333 				case AF_INET6:
8334 					pbuf_copy_back(pbuf, off,
8335 					    sizeof(struct icmp6_hdr),
8336 					    pf_pd_get_hdr_ptr_icmp6(pd), pd->hdrmaxlen);
8337 					pbuf_copy_back(pbuf, ipoff2,
8338 					    sizeof(h2_6), &h2_6, sizeof(h2_6));
8339 					break;
8340 				}
8341 				pbuf_copy_back(pbuf, off2, 8, &th, sizeof(th));
8342 			}
8343 
8344 			return PF_PASS;
8345 		}
8346 		case IPPROTO_UDP: {
8347 			struct udphdr uh;
8348 			int dx, action;
8349 			if (!pf_pull_hdr(pbuf, off2, &uh, sizeof(uh), sizeof(uh),
8350 			    NULL, reason, pd2.af)) {
8351 				DPFPRINTF(PF_DEBUG_MISC,
8352 				    ("pf: ICMP error message too short "
8353 				    "(udp)\n"));
8354 				return PF_DROP;
8355 			}
8356 
8357 			key.af_gwy = pd2.af;
8358 			PF_ACPY(&key.ext_gwy.addr, pd2.dst, key.af_gwy);
8359 			PF_ACPY(&key.gwy.addr, pd2.src, key.af_gwy);
8360 			key.ext_gwy.xport.port = uh.uh_dport;
8361 			key.gwy.xport.port = uh.uh_sport;
8362 
8363 			key.af_lan = pd2.af;
8364 			PF_ACPY(&key.lan.addr, pd2.dst, key.af_lan);
8365 			PF_ACPY(&key.ext_lan.addr, pd2.src, key.af_lan);
8366 			key.lan.xport.port = uh.uh_dport;
8367 			key.ext_lan.xport.port = uh.uh_sport;
8368 
8369 			key.proto = IPPROTO_UDP;
8370 			key.proto_variant = PF_EXTFILTER_APD;
8371 			dx = direction;
8372 
8373 			if (ntohs(uh.uh_sport) == PF_IKE_PORT &&
8374 			    ntohs(uh.uh_dport) == PF_IKE_PORT) {
8375 				struct pf_ike_hdr ike;
8376 				size_t plen = pbuf->pb_packet_len - off2 -
8377 				    sizeof(uh);
8378 				if (direction == PF_IN &&
8379 				    plen < 8 /* PF_IKE_PACKET_MINSIZE */) {
8380 					DPFPRINTF(PF_DEBUG_MISC, ("pf: "
8381 					    "ICMP error, embedded IKE message "
8382 					    "too small.\n"));
8383 					return PF_DROP;
8384 				}
8385 
8386 				if (plen > sizeof(ike)) {
8387 					plen = sizeof(ike);
8388 				}
8389 				pbuf_copy_data(pbuf, off + sizeof(uh), plen,
8390 				    &ike, sizeof(ike));
8391 
8392 				key.app_state = &as;
8393 				as.compare_lan_ext = pf_ike_compare;
8394 				as.compare_ext_gwy = pf_ike_compare;
8395 				as.u.ike.cookie = ike.initiator_cookie;
8396 			}
8397 
8398 			*state = pf_find_state(kif, &key, dx);
8399 
8400 			if (key.app_state && *state == 0) {
8401 				key.app_state = 0;
8402 				*state = pf_find_state(kif, &key, dx);
8403 			}
8404 
8405 			if (*state == 0) {
8406 				key.proto_variant = PF_EXTFILTER_AD;
8407 				*state = pf_find_state(kif, &key, dx);
8408 			}
8409 
8410 			if (*state == 0) {
8411 				key.proto_variant = PF_EXTFILTER_EI;
8412 				*state = pf_find_state(kif, &key, dx);
8413 			}
8414 
8415 			/* similar to STATE_LOOKUP() */
8416 			if (*state != NULL && pd != NULL &&
8417 			    !(pd->pktflags & PKTF_FLOW_ID)) {
8418 				pd->flowsrc = (*state)->state_key->flowsrc;
8419 				pd->flowhash = (*state)->state_key->flowhash;
8420 				if (pd->flowhash != 0) {
8421 					pd->pktflags |= PKTF_FLOW_ID;
8422 					pd->pktflags &= ~PKTF_FLOW_ADV;
8423 				}
8424 			}
8425 
8426 			if (pf_state_lookup_aux(state, kif, direction, &action)) {
8427 				return action;
8428 			}
8429 
8430 			sk = (*state)->state_key;
8431 			pd->naf = pd2.naf = (pd2.af == sk->af_lan) ?
8432 			    sk->af_gwy : sk->af_lan;
8433 
8434 			if (STATE_TRANSLATE(sk)) {
8435 				/* NAT64 case */
8436 				if (sk->af_lan != sk->af_gwy) {
8437 					struct pf_state_host *saddr2, *daddr2;
8438 
8439 					if (pd2.naf == sk->af_lan) {
8440 						saddr2 = &sk->lan;
8441 						daddr2 = &sk->ext_lan;
8442 					} else {
8443 						saddr2 = &sk->ext_gwy;
8444 						daddr2 = &sk->gwy;
8445 					}
8446 
8447 					/* translate ICMP message */
8448 					if (pf_translate_icmp_af(pd->naf,
8449 					    pf_pd_get_hdr_icmp(pd))) {
8450 						return PF_DROP;
8451 					}
8452 					if (pf_lazy_makewritable(pd, pbuf,
8453 					    off2 + 8) == NULL) {
8454 						return PF_DROP;
8455 					}
8456 
8457 					pbuf_copy_back(pbuf, pd->off,
8458 					    sizeof(struct icmp6_hdr),
8459 					    pf_pd_get_hdr_ptr_icmp6(pd), pd->hdrmaxlen);
8460 
8461 					/*
8462 					 * translate inner ip header within the
8463 					 * ICMP message
8464 					 */
8465 					if (pf_change_icmp_af(pbuf, ipoff2, pd,
8466 					    &pd2, &saddr2->addr, &daddr2->addr,
8467 					    pd->af, pd->naf)) {
8468 						return PF_DROP;
8469 					}
8470 
8471 					if (pd->naf == AF_INET) {
8472 						pd->proto = IPPROTO_ICMP;
8473 					} else {
8474 						pd->proto = IPPROTO_ICMPV6;
8475 					}
8476 
8477 					/*
8478 					 * translate inner udp header within
8479 					 * the ICMP message
8480 					 */
8481 					pf_change_ap(direction, NULL, pd2.src,
8482 					    &uh.uh_sport, pd2.ip_sum,
8483 					    &uh.uh_sum, &daddr2->addr,
8484 					    saddr2->xport.port, 0, pd2.af,
8485 					    pd2.naf, 0);
8486 
8487 					pf_change_ap(direction, NULL, pd2.dst,
8488 					    &uh.uh_dport, pd2.ip_sum,
8489 					    &uh.uh_sum, &saddr2->addr,
8490 					    daddr2->xport.port, 0, pd2.af,
8491 					    pd2.naf, 0);
8492 
8493 					pbuf_copy_back(pbuf, pd2.off,
8494 					    sizeof(uh), &uh, sizeof(uh));
8495 
8496 					/* translate outer ip header */
8497 					PF_ACPY(&pd->naddr, &daddr2->addr,
8498 					    pd->naf);
8499 					PF_ACPY(&pd->ndaddr, &saddr2->addr,
8500 					    pd->naf);
8501 					if (pd->af == AF_INET) {
8502 						memcpy(&pd->naddr.addr32[3],
8503 						    &srcv4_inaddr,
8504 						    sizeof(pd->naddr.addr32[3]));
8505 						return pf_nat64_ipv4(pbuf, off,
8506 						           pd);
8507 					} else {
8508 						return pf_nat64_ipv6(pbuf, off,
8509 						           pd);
8510 					}
8511 				}
8512 				if (direction == PF_IN) {
8513 					pf_change_icmp(pd2.src, &uh.uh_sport,
8514 					    daddr, &sk->lan.addr,
8515 					    sk->lan.xport.port, &uh.uh_sum,
8516 					    pd2.ip_sum, icmpsum,
8517 					    pd->ip_sum, 1, pd2.af);
8518 				} else {
8519 					pf_change_icmp(pd2.dst, &uh.uh_dport,
8520 					    saddr, &sk->gwy.addr,
8521 					    sk->gwy.xport.port, &uh.uh_sum,
8522 					    pd2.ip_sum, icmpsum,
8523 					    pd->ip_sum, 1, pd2.af);
8524 				}
8525 				if (pf_lazy_makewritable(pd, pbuf,
8526 				    off2 + sizeof(uh)) == NULL) {
8527 					return PF_DROP;
8528 				}
8529 				switch (pd2.af) {
8530 #if INET
8531 				case AF_INET:
8532 					pbuf_copy_back(pbuf, off, ICMP_MINLEN,
8533 					    pf_pd_get_hdr_ptr_icmp(pd), pd->hdrmaxlen);
8534 					pbuf_copy_back(pbuf, ipoff2,
8535 					    sizeof(h2), &h2, sizeof(h2));
8536 					break;
8537 #endif /* INET */
8538 				case AF_INET6:
8539 					pbuf_copy_back(pbuf, off,
8540 					    sizeof(struct icmp6_hdr),
8541 					    pf_pd_get_hdr_ptr_icmp6(pd), pd->hdrmaxlen);
8542 					pbuf_copy_back(pbuf, ipoff2,
8543 					    sizeof(h2_6), &h2_6, sizeof(h2_6));
8544 					break;
8545 				}
8546 				pbuf_copy_back(pbuf, off2, sizeof(uh), &uh, sizeof(uh));
8547 			}
8548 
8549 			return PF_PASS;
8550 		}
8551 #if INET
8552 		case IPPROTO_ICMP: {
8553 			struct icmp             iih;
8554 
8555 			if (!pf_pull_hdr(pbuf, off2, &iih, sizeof(iih), ICMP_MINLEN,
8556 			    NULL, reason, pd2.af)) {
8557 				DPFPRINTF(PF_DEBUG_MISC,
8558 				    ("pf: ICMP error message too short i"
8559 				    "(icmp)\n"));
8560 				return PF_DROP;
8561 			}
8562 
8563 			key.proto = IPPROTO_ICMP;
8564 			if (direction == PF_IN) {
8565 				key.af_gwy = pd2.af;
8566 				PF_ACPY(&key.ext_gwy.addr, pd2.dst, key.af_gwy);
8567 				PF_ACPY(&key.gwy.addr, pd2.src, key.af_gwy);
8568 				key.ext_gwy.xport.port = 0;
8569 				key.gwy.xport.port = iih.icmp_id;
8570 			} else {
8571 				key.af_lan = pd2.af;
8572 				PF_ACPY(&key.lan.addr, pd2.dst, key.af_lan);
8573 				PF_ACPY(&key.ext_lan.addr, pd2.src, key.af_lan);
8574 				key.lan.xport.port = iih.icmp_id;
8575 				key.ext_lan.xport.port = 0;
8576 			}
8577 
8578 			STATE_LOOKUP();
8579 
8580 			sk = (*state)->state_key;
8581 			if (STATE_TRANSLATE(sk)) {
8582 				if (direction == PF_IN) {
8583 					pf_change_icmp(pd2.src, &iih.icmp_id,
8584 					    daddr, &sk->lan.addr,
8585 					    sk->lan.xport.port, NULL,
8586 					    pd2.ip_sum, icmpsum,
8587 					    pd->ip_sum, 0, AF_INET);
8588 				} else {
8589 					pf_change_icmp(pd2.dst, &iih.icmp_id,
8590 					    saddr, &sk->gwy.addr,
8591 					    sk->gwy.xport.port, NULL,
8592 					    pd2.ip_sum, icmpsum,
8593 					    pd->ip_sum, 0, AF_INET);
8594 				}
8595 				if (pf_lazy_makewritable(pd, pbuf,
8596 				    off2 + ICMP_MINLEN) == NULL) {
8597 					return PF_DROP;
8598 				}
8599 				pbuf_copy_back(pbuf, off, ICMP_MINLEN,
8600 				    pf_pd_get_hdr_ptr_icmp(pd), pd->hdrmaxlen);
8601 				pbuf_copy_back(pbuf, ipoff2, sizeof(h2), &h2, sizeof(h2));
8602 				pbuf_copy_back(pbuf, off2, ICMP_MINLEN, &iih, sizeof(iih));
8603 			}
8604 
8605 			return PF_PASS;
8606 		}
8607 #endif /* INET */
8608 		case IPPROTO_ICMPV6: {
8609 			struct icmp6_hdr        iih;
8610 
8611 			if (!pf_pull_hdr(pbuf, off2, &iih, sizeof(iih),
8612 			    sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) {
8613 				DPFPRINTF(PF_DEBUG_MISC,
8614 				    ("pf: ICMP error message too short "
8615 				    "(icmp6)\n"));
8616 				return PF_DROP;
8617 			}
8618 
8619 			key.proto = IPPROTO_ICMPV6;
8620 			if (direction == PF_IN) {
8621 				key.af_gwy = pd2.af;
8622 				PF_ACPY(&key.ext_gwy.addr, pd2.dst, key.af_gwy);
8623 				PF_ACPY(&key.gwy.addr, pd2.src, key.af_gwy);
8624 				key.ext_gwy.xport.port = 0;
8625 				key.gwy.xport.port = iih.icmp6_id;
8626 			} else {
8627 				key.af_lan = pd2.af;
8628 				PF_ACPY(&key.lan.addr, pd2.dst, key.af_lan);
8629 				PF_ACPY(&key.ext_lan.addr, pd2.src, key.af_lan);
8630 				key.lan.xport.port = iih.icmp6_id;
8631 				key.ext_lan.xport.port = 0;
8632 			}
8633 
8634 			STATE_LOOKUP();
8635 
8636 			sk = (*state)->state_key;
8637 			if (STATE_TRANSLATE(sk)) {
8638 				if (direction == PF_IN) {
8639 					pf_change_icmp(pd2.src, &iih.icmp6_id,
8640 					    daddr, &sk->lan.addr,
8641 					    sk->lan.xport.port, NULL,
8642 					    pd2.ip_sum, icmpsum,
8643 					    pd->ip_sum, 0, AF_INET6);
8644 				} else {
8645 					pf_change_icmp(pd2.dst, &iih.icmp6_id,
8646 					    saddr, &sk->gwy.addr,
8647 					    sk->gwy.xport.port, NULL,
8648 					    pd2.ip_sum, icmpsum,
8649 					    pd->ip_sum, 0, AF_INET6);
8650 				}
8651 				if (pf_lazy_makewritable(pd, pbuf, off2 +
8652 				    sizeof(struct icmp6_hdr)) == NULL) {
8653 					return PF_DROP;
8654 				}
8655 				pbuf_copy_back(pbuf, off,
8656 				    sizeof(struct icmp6_hdr),
8657 				    pf_pd_get_hdr_ptr_icmp6(pd), pd->hdrmaxlen);
8658 				pbuf_copy_back(pbuf, ipoff2, sizeof(h2_6),
8659 				    &h2_6, sizeof(h2_6));
8660 				pbuf_copy_back(pbuf, off2,
8661 				    sizeof(struct icmp6_hdr), &iih, sizeof(iih));
8662 			}
8663 
8664 			return PF_PASS;
8665 		}
8666 		default: {
8667 			key.proto = pd2.proto;
8668 			if (direction == PF_IN) {
8669 				key.af_gwy = pd2.af;
8670 				PF_ACPY(&key.ext_gwy.addr, pd2.dst, key.af_gwy);
8671 				PF_ACPY(&key.gwy.addr, pd2.src, key.af_gwy);
8672 				key.ext_gwy.xport.port = 0;
8673 				key.gwy.xport.port = 0;
8674 			} else {
8675 				key.af_lan = pd2.af;
8676 				PF_ACPY(&key.lan.addr, pd2.dst, key.af_lan);
8677 				PF_ACPY(&key.ext_lan.addr, pd2.src, key.af_lan);
8678 				key.lan.xport.port = 0;
8679 				key.ext_lan.xport.port = 0;
8680 			}
8681 
8682 			STATE_LOOKUP();
8683 
8684 			sk = (*state)->state_key;
8685 			if (STATE_TRANSLATE(sk)) {
8686 				if (direction == PF_IN) {
8687 					pf_change_icmp(pd2.src, NULL, daddr,
8688 					    &sk->lan.addr, 0, NULL,
8689 					    pd2.ip_sum, icmpsum,
8690 					    pd->ip_sum, 0, pd2.af);
8691 				} else {
8692 					pf_change_icmp(pd2.dst, NULL, saddr,
8693 					    &sk->gwy.addr, 0, NULL,
8694 					    pd2.ip_sum, icmpsum,
8695 					    pd->ip_sum, 0, pd2.af);
8696 				}
8697 				switch (pd2.af) {
8698 #if INET
8699 				case AF_INET:
8700 					if (pf_lazy_makewritable(pd, pbuf,
8701 					    ipoff2 + sizeof(h2)) == NULL) {
8702 						return PF_DROP;
8703 					}
8704 					/*
8705 					 * <XXXSCW>
8706 					 * Xnu was missing the following...
8707 					 */
8708 					pbuf_copy_back(pbuf, off, ICMP_MINLEN,
8709 					    pf_pd_get_hdr_ptr_icmp(pd), pd->hdrmaxlen);
8710 					pbuf_copy_back(pbuf, ipoff2,
8711 					    sizeof(h2), &h2, sizeof(h2));
8712 					break;
8713 					/*
8714 					 * </XXXSCW>
8715 					 */
8716 #endif /* INET */
8717 				case AF_INET6:
8718 					if (pf_lazy_makewritable(pd, pbuf,
8719 					    ipoff2 + sizeof(h2_6)) == NULL) {
8720 						return PF_DROP;
8721 					}
8722 					pbuf_copy_back(pbuf, off,
8723 					    sizeof(struct icmp6_hdr),
8724 					    pf_pd_get_hdr_ptr_icmp6(pd), pd->hdrmaxlen);
8725 					pbuf_copy_back(pbuf, ipoff2,
8726 					    sizeof(h2_6), &h2_6, sizeof(h2_6));
8727 					break;
8728 				}
8729 			}
8730 
8731 			return PF_PASS;
8732 		}
8733 		}
8734 	}
8735 }
8736 
8737 static __attribute__((noinline)) int
pf_test_state_grev1(struct pf_state ** state,int direction,struct pfi_kif * kif,int off,struct pf_pdesc * pd)8738 pf_test_state_grev1(struct pf_state **state, int direction,
8739     struct pfi_kif *kif, int off, struct pf_pdesc *pd)
8740 {
8741 	struct pf_state_peer *__single src;
8742 	struct pf_state_peer *__single dst;
8743 	struct pf_state_key_cmp key = {};
8744 	struct pf_grev1_hdr *__single grev1 = pf_pd_get_hdr_grev1(pd);
8745 
8746 	key.app_state = 0;
8747 	key.proto = IPPROTO_GRE;
8748 	key.proto_variant = PF_GRE_PPTP_VARIANT;
8749 	if (direction == PF_IN) {
8750 		key.af_gwy = pd->af;
8751 		PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy);
8752 		PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy);
8753 		key.gwy.xport.call_id = grev1->call_id;
8754 	} else {
8755 		key.af_lan = pd->af;
8756 		PF_ACPY(&key.lan.addr, pd->src, key.af_lan);
8757 		PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan);
8758 		key.ext_lan.xport.call_id = grev1->call_id;
8759 	}
8760 
8761 	STATE_LOOKUP();
8762 
8763 	if (direction == (*state)->state_key->direction) {
8764 		src = &(*state)->src;
8765 		dst = &(*state)->dst;
8766 	} else {
8767 		src = &(*state)->dst;
8768 		dst = &(*state)->src;
8769 	}
8770 
8771 	/* update states */
8772 	if (src->state < PFGRE1S_INITIATING) {
8773 		src->state = PFGRE1S_INITIATING;
8774 	}
8775 
8776 	/* update expire time */
8777 	(*state)->expire = pf_time_second();
8778 	if (src->state >= PFGRE1S_INITIATING &&
8779 	    dst->state >= PFGRE1S_INITIATING) {
8780 		if ((*state)->timeout != PFTM_TCP_ESTABLISHED) {
8781 			(*state)->timeout = PFTM_GREv1_ESTABLISHED;
8782 		}
8783 		src->state = PFGRE1S_ESTABLISHED;
8784 		dst->state = PFGRE1S_ESTABLISHED;
8785 	} else {
8786 		(*state)->timeout = PFTM_GREv1_INITIATING;
8787 	}
8788 
8789 	if ((*state)->state_key->app_state) {
8790 		(*state)->state_key->app_state->u.grev1.pptp_state->expire =
8791 		    pf_time_second();
8792 	}
8793 
8794 	/* translate source/destination address, if necessary */
8795 	if (STATE_GRE_TRANSLATE((*state)->state_key)) {
8796 		if (direction == PF_OUT) {
8797 			switch (pd->af) {
8798 #if INET
8799 			case AF_INET:
8800 				pf_change_a(&pd->src->v4addr.s_addr,
8801 				    pd->ip_sum,
8802 				    (*state)->state_key->gwy.addr.v4addr.s_addr, 0);
8803 				break;
8804 #endif /* INET */
8805 			case AF_INET6:
8806 				PF_ACPY(pd->src, &(*state)->state_key->gwy.addr,
8807 				    pd->af);
8808 				break;
8809 			}
8810 		} else {
8811 			grev1->call_id = (*state)->state_key->lan.xport.call_id;
8812 
8813 			switch (pd->af) {
8814 #if INET
8815 			case AF_INET:
8816 				pf_change_a(&pd->dst->v4addr.s_addr,
8817 				    pd->ip_sum,
8818 				    (*state)->state_key->lan.addr.v4addr.s_addr, 0);
8819 				break;
8820 #endif /* INET */
8821 			case AF_INET6:
8822 				PF_ACPY(pd->dst, &(*state)->state_key->lan.addr,
8823 				    pd->af);
8824 				break;
8825 			}
8826 		}
8827 
8828 		if (pf_lazy_makewritable(pd, pd->mp, off + sizeof(*grev1)) ==
8829 		    NULL) {
8830 			return PF_DROP;
8831 		}
8832 		pbuf_copy_back(pd->mp, off, sizeof(*grev1), grev1, sizeof(*grev1));
8833 	}
8834 
8835 	return PF_PASS;
8836 }
8837 
8838 static __attribute__((noinline)) int
pf_test_state_esp(struct pf_state ** state,int direction,struct pfi_kif * kif,int off,struct pf_pdesc * pd)8839 pf_test_state_esp(struct pf_state **state, int direction, struct pfi_kif *kif,
8840     int off, struct pf_pdesc *pd)
8841 {
8842 #pragma unused(off)
8843 	struct pf_state_peer *__single src;
8844 	struct pf_state_peer *__single dst;
8845 	struct pf_state_key_cmp key;
8846 	struct pf_esp_hdr *__single esp = pf_pd_get_hdr_esp(pd);
8847 	int action;
8848 
8849 	memset(&key, 0, sizeof(key));
8850 	key.proto = IPPROTO_ESP;
8851 	if (direction == PF_IN) {
8852 		key.af_gwy = pd->af;
8853 		PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy);
8854 		PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy);
8855 		key.gwy.xport.spi = esp->spi;
8856 	} else {
8857 		key.af_lan = pd->af;
8858 		PF_ACPY(&key.lan.addr, pd->src, key.af_lan);
8859 		PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan);
8860 		key.ext_lan.xport.spi = esp->spi;
8861 	}
8862 
8863 	*state = pf_find_state(kif, &key, direction);
8864 
8865 	if (*state == 0) {
8866 		struct pf_state *s;
8867 
8868 		/*
8869 		 * <[email protected]>
8870 		 * No matching state.  Look for a blocking state.  If we find
8871 		 * one, then use that state and move it so that it's keyed to
8872 		 * the SPI in the current packet.
8873 		 */
8874 		if (direction == PF_IN) {
8875 			key.gwy.xport.spi = 0;
8876 
8877 			s = pf_find_state(kif, &key, direction);
8878 			if (s) {
8879 				struct pf_state_key *sk = s->state_key;
8880 
8881 				pf_remove_state_key_ext_gwy(sk);
8882 				sk->lan.xport.spi = sk->gwy.xport.spi =
8883 				    esp->spi;
8884 
8885 				if (pf_insert_state_key_ext_gwy(sk)) {
8886 					pf_detach_state(s, PF_DT_SKIP_EXTGWY);
8887 				} else {
8888 					*state = s;
8889 				}
8890 			}
8891 		} else {
8892 			key.ext_lan.xport.spi = 0;
8893 
8894 			s = pf_find_state(kif, &key, direction);
8895 			if (s) {
8896 				struct pf_state_key *sk = s->state_key;
8897 
8898 				RB_REMOVE(pf_state_tree_lan_ext,
8899 				    &pf_statetbl_lan_ext, sk);
8900 				sk->ext_lan.xport.spi = esp->spi;
8901 
8902 				if (RB_INSERT(pf_state_tree_lan_ext,
8903 				    &pf_statetbl_lan_ext, sk)) {
8904 					pf_detach_state(s, PF_DT_SKIP_LANEXT);
8905 				} else {
8906 					*state = s;
8907 				}
8908 			}
8909 		}
8910 
8911 		if (s) {
8912 			if (*state == 0) {
8913 #if NPFSYNC
8914 				if (s->creatorid == pf_status.hostid) {
8915 					pfsync_delete_state(s);
8916 				}
8917 #endif
8918 				s->timeout = PFTM_UNLINKED;
8919 				hook_runloop(&s->unlink_hooks,
8920 				    HOOK_REMOVE | HOOK_FREE);
8921 				pf_src_tree_remove_state(s);
8922 				pf_free_state(s);
8923 				return PF_DROP;
8924 			}
8925 		}
8926 	}
8927 
8928 	/* similar to STATE_LOOKUP() */
8929 	if (*state != NULL && pd != NULL && !(pd->pktflags & PKTF_FLOW_ID)) {
8930 		pd->flowsrc = (*state)->state_key->flowsrc;
8931 		pd->flowhash = (*state)->state_key->flowhash;
8932 		if (pd->flowhash != 0) {
8933 			pd->pktflags |= PKTF_FLOW_ID;
8934 			pd->pktflags &= ~PKTF_FLOW_ADV;
8935 		}
8936 	}
8937 
8938 	if (pf_state_lookup_aux(state, kif, direction, &action)) {
8939 		return action;
8940 	}
8941 
8942 	if (direction == (*state)->state_key->direction) {
8943 		src = &(*state)->src;
8944 		dst = &(*state)->dst;
8945 	} else {
8946 		src = &(*state)->dst;
8947 		dst = &(*state)->src;
8948 	}
8949 
8950 	/* update states */
8951 	if (src->state < PFESPS_INITIATING) {
8952 		src->state = PFESPS_INITIATING;
8953 	}
8954 
8955 	/* update expire time */
8956 	(*state)->expire = pf_time_second();
8957 	if (src->state >= PFESPS_INITIATING &&
8958 	    dst->state >= PFESPS_INITIATING) {
8959 		(*state)->timeout = PFTM_ESP_ESTABLISHED;
8960 		src->state = PFESPS_ESTABLISHED;
8961 		dst->state = PFESPS_ESTABLISHED;
8962 	} else {
8963 		(*state)->timeout = PFTM_ESP_INITIATING;
8964 	}
8965 	/* translate source/destination address, if necessary */
8966 	if (STATE_ADDR_TRANSLATE((*state)->state_key)) {
8967 		if (direction == PF_OUT) {
8968 			switch (pd->af) {
8969 #if INET
8970 			case AF_INET:
8971 				pf_change_a(&pd->src->v4addr.s_addr,
8972 				    pd->ip_sum,
8973 				    (*state)->state_key->gwy.addr.v4addr.s_addr, 0);
8974 				break;
8975 #endif /* INET */
8976 			case AF_INET6:
8977 				PF_ACPY(pd->src, &(*state)->state_key->gwy.addr,
8978 				    pd->af);
8979 				break;
8980 			}
8981 		} else {
8982 			switch (pd->af) {
8983 #if INET
8984 			case AF_INET:
8985 				pf_change_a(&pd->dst->v4addr.s_addr,
8986 				    pd->ip_sum,
8987 				    (*state)->state_key->lan.addr.v4addr.s_addr, 0);
8988 				break;
8989 #endif /* INET */
8990 			case AF_INET6:
8991 				PF_ACPY(pd->dst, &(*state)->state_key->lan.addr,
8992 				    pd->af);
8993 				break;
8994 			}
8995 		}
8996 	}
8997 
8998 	return PF_PASS;
8999 }
9000 
9001 static __attribute__((noinline)) int
pf_test_state_other(struct pf_state ** state,int direction,struct pfi_kif * kif,struct pf_pdesc * pd)9002 pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif,
9003     struct pf_pdesc *pd)
9004 {
9005 	struct pf_state_peer    *src, *dst;
9006 	struct pf_state_key_cmp  key = {};
9007 
9008 	key.app_state = 0;
9009 	key.proto = pd->proto;
9010 	if (direction == PF_IN) {
9011 		key.af_gwy = pd->af;
9012 		PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy);
9013 		PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy);
9014 		key.ext_gwy.xport.port = 0;
9015 		key.gwy.xport.port = 0;
9016 	} else {
9017 		key.af_lan = pd->af;
9018 		PF_ACPY(&key.lan.addr, pd->src, key.af_lan);
9019 		PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan);
9020 		key.lan.xport.port = 0;
9021 		key.ext_lan.xport.port = 0;
9022 	}
9023 
9024 	STATE_LOOKUP();
9025 
9026 	if (direction == (*state)->state_key->direction) {
9027 		src = &(*state)->src;
9028 		dst = &(*state)->dst;
9029 	} else {
9030 		src = &(*state)->dst;
9031 		dst = &(*state)->src;
9032 	}
9033 
9034 	/* update states */
9035 	if (src->state < PFOTHERS_SINGLE) {
9036 		src->state = PFOTHERS_SINGLE;
9037 	}
9038 	if (dst->state == PFOTHERS_SINGLE) {
9039 		dst->state = PFOTHERS_MULTIPLE;
9040 	}
9041 
9042 	/* update expire time */
9043 	(*state)->expire = pf_time_second();
9044 	if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE) {
9045 		(*state)->timeout = PFTM_OTHER_MULTIPLE;
9046 	} else {
9047 		(*state)->timeout = PFTM_OTHER_SINGLE;
9048 	}
9049 
9050 	/* translate source/destination address, if necessary */
9051 	if (STATE_ADDR_TRANSLATE((*state)->state_key)) {
9052 		if (direction == PF_OUT) {
9053 			switch (pd->af) {
9054 #if INET
9055 			case AF_INET:
9056 				pf_change_a(&pd->src->v4addr.s_addr,
9057 				    pd->ip_sum,
9058 				    (*state)->state_key->gwy.addr.v4addr.s_addr,
9059 				    0);
9060 				break;
9061 #endif /* INET */
9062 			case AF_INET6:
9063 				PF_ACPY(pd->src,
9064 				    &(*state)->state_key->gwy.addr, pd->af);
9065 				break;
9066 			}
9067 		} else {
9068 			switch (pd->af) {
9069 #if INET
9070 			case AF_INET:
9071 				pf_change_a(&pd->dst->v4addr.s_addr,
9072 				    pd->ip_sum,
9073 				    (*state)->state_key->lan.addr.v4addr.s_addr,
9074 				    0);
9075 				break;
9076 #endif /* INET */
9077 			case AF_INET6:
9078 				PF_ACPY(pd->dst,
9079 				    &(*state)->state_key->lan.addr, pd->af);
9080 				break;
9081 			}
9082 		}
9083 	}
9084 
9085 	return PF_PASS;
9086 }
9087 
9088 /*
9089  * ipoff and off are measured from the start of the mbuf chain.
9090  * h must be at "ipoff" on the mbuf chain.
9091  */
9092 void *
pf_pull_hdr(pbuf_t * pbuf,int off,void * __sized_by (p_buflen)p,int p_buflen,int copylen,u_short * actionp,u_short * reasonp,sa_family_t af)9093 pf_pull_hdr(pbuf_t *pbuf, int off, void *__sized_by(p_buflen)p, int p_buflen, int copylen,
9094     u_short *actionp, u_short *reasonp, sa_family_t af)
9095 {
9096 	switch (af) {
9097 #if INET
9098 	case AF_INET: {
9099 		struct ip       *__single h = pbuf->pb_data;
9100 		u_int16_t        fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
9101 
9102 		if (fragoff) {
9103 			if (fragoff >= copylen) {
9104 				ACTION_SET(actionp, PF_PASS);
9105 			} else {
9106 				ACTION_SET(actionp, PF_DROP);
9107 				REASON_SET(reasonp, PFRES_FRAG);
9108 			}
9109 			return NULL;
9110 		}
9111 		if (pbuf->pb_packet_len < (unsigned)(off + copylen) ||
9112 		    ntohs(h->ip_len) < off + copylen) {
9113 			ACTION_SET(actionp, PF_DROP);
9114 			REASON_SET(reasonp, PFRES_SHORT);
9115 			return NULL;
9116 		}
9117 		break;
9118 	}
9119 #endif /* INET */
9120 	case AF_INET6: {
9121 		struct ip6_hdr  *__single h = pbuf->pb_data;
9122 
9123 		if (pbuf->pb_packet_len < (unsigned)(off + copylen) ||
9124 		    (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) <
9125 		    (unsigned)(off + copylen)) {
9126 			ACTION_SET(actionp, PF_DROP);
9127 			REASON_SET(reasonp, PFRES_SHORT);
9128 			return NULL;
9129 		}
9130 		break;
9131 	}
9132 	}
9133 	pbuf_copy_data(pbuf, off, copylen, p, p_buflen);
9134 	return p;
9135 }
9136 
9137 int
pf_routable(struct pf_addr * addr,sa_family_t af,struct pfi_kif * kif)9138 pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif)
9139 {
9140 #pragma unused(kif)
9141 	struct sockaddr_in      *dst;
9142 	int                      ret = 1;
9143 	struct sockaddr_in6     *dst6;
9144 	struct route_in6         ro;
9145 
9146 	bzero(&ro, sizeof(ro));
9147 	switch (af) {
9148 	case AF_INET:
9149 		dst = satosin(&ro.ro_dst);
9150 		dst->sin_family = AF_INET;
9151 		dst->sin_len = sizeof(*dst);
9152 		dst->sin_addr = addr->v4addr;
9153 		break;
9154 	case AF_INET6:
9155 		dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
9156 		dst6->sin6_family = AF_INET6;
9157 		dst6->sin6_len = sizeof(*dst6);
9158 		dst6->sin6_addr = addr->v6addr;
9159 		break;
9160 	default:
9161 		return 0;
9162 	}
9163 
9164 	/* XXX: IFT_ENC is not currently used by anything*/
9165 	/* Skip checks for ipsec interfaces */
9166 	if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC) {
9167 		goto out;
9168 	}
9169 
9170 	/* XXX: what is the point of this? */
9171 	rtalloc((struct route *)&ro);
9172 
9173 out:
9174 	ROUTE_RELEASE(&ro);
9175 	return ret;
9176 }
9177 
9178 int
pf_rtlabel_match(struct pf_addr * addr,sa_family_t af,struct pf_addr_wrap * aw)9179 pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw)
9180 {
9181 #pragma unused(aw)
9182 	struct sockaddr_in      *dst;
9183 	struct sockaddr_in6     *dst6;
9184 	struct route_in6         ro;
9185 	int                      ret = 0;
9186 
9187 	bzero(&ro, sizeof(ro));
9188 	switch (af) {
9189 	case AF_INET:
9190 		dst = satosin(&ro.ro_dst);
9191 		dst->sin_family = AF_INET;
9192 		dst->sin_len = sizeof(*dst);
9193 		dst->sin_addr = addr->v4addr;
9194 		break;
9195 	case AF_INET6:
9196 		dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
9197 		dst6->sin6_family = AF_INET6;
9198 		dst6->sin6_len = sizeof(*dst6);
9199 		dst6->sin6_addr = addr->v6addr;
9200 		break;
9201 	default:
9202 		return 0;
9203 	}
9204 
9205 	/* XXX: what is the point of this? */
9206 	rtalloc((struct route *)&ro);
9207 
9208 	ROUTE_RELEASE(&ro);
9209 
9210 	return ret;
9211 }
9212 
9213 #if INET
9214 static __attribute__((noinline)) void
pf_route(pbuf_t ** pbufp,struct pf_rule * r,int dir,struct ifnet * oifp,struct pf_state * s,struct pf_pdesc * pd)9215 pf_route(pbuf_t **pbufp, struct pf_rule *r, int dir, struct ifnet *oifp,
9216     struct pf_state *s, struct pf_pdesc *pd)
9217 {
9218 #pragma unused(pd)
9219 	struct mbuf             *__single m0, *__single m1;
9220 	struct route             iproute;
9221 	struct route            *__single ro = &iproute;
9222 	struct sockaddr_in      *__single dst;
9223 	struct ip               *__single ip;
9224 	struct ifnet            *__single ifp = NULL;
9225 	struct pf_addr           naddr;
9226 	struct pf_src_node      *__single sn = NULL;
9227 	int                      error = 0;
9228 	uint32_t                 sw_csum;
9229 	int                      interface_mtu = 0;
9230 	bzero(&iproute, sizeof(iproute));
9231 
9232 	if (pbufp == NULL || !pbuf_is_valid(*pbufp) || r == NULL ||
9233 	    (dir != PF_IN && dir != PF_OUT) || oifp == NULL) {
9234 		panic("pf_route: invalid parameters");
9235 	}
9236 
9237 	if (pd->pf_mtag->pftag_routed++ > 3) {
9238 		pbuf_destroy(*pbufp);
9239 		*pbufp = NULL;
9240 		m0 = NULL;
9241 		goto bad;
9242 	}
9243 
9244 	/*
9245 	 * Since this is something of an edge case and may involve the
9246 	 * host stack (for routing, at least for now), we convert the
9247 	 * incoming pbuf into an mbuf.
9248 	 */
9249 	if (r->rt == PF_DUPTO) {
9250 		m0 = pbuf_clone_to_mbuf(*pbufp);
9251 	} else if ((r->rt == PF_REPLYTO) == (r->direction == dir)) {
9252 		return;
9253 	} else {
9254 		/* We're going to consume this packet */
9255 		m0 = pbuf_to_mbuf(*pbufp, TRUE);
9256 		*pbufp = NULL;
9257 	}
9258 
9259 	if (m0 == NULL) {
9260 		goto bad;
9261 	}
9262 
9263 	/* We now have the packet in an mbuf (m0) */
9264 
9265 	if (m0->m_len < (int)sizeof(struct ip)) {
9266 		DPFPRINTF(PF_DEBUG_URGENT,
9267 		    ("pf_route: packet length < sizeof (struct ip)\n"));
9268 		goto bad;
9269 	}
9270 
9271 	ip = mtod(m0, struct ip *);
9272 
9273 	dst = satosin((void *)&ro->ro_dst);
9274 	dst->sin_family = AF_INET;
9275 	dst->sin_len = sizeof(*dst);
9276 	dst->sin_addr = ip->ip_dst;
9277 
9278 	if (r->rt == PF_FASTROUTE) {
9279 		rtalloc(ro);
9280 		if (ro->ro_rt == NULL) {
9281 			ipstat.ips_noroute++;
9282 			goto bad;
9283 		}
9284 
9285 		ifp = ro->ro_rt->rt_ifp;
9286 		RT_LOCK(ro->ro_rt);
9287 		ro->ro_rt->rt_use++;
9288 
9289 		if (ro->ro_rt->rt_flags & RTF_GATEWAY) {
9290 			dst = satosin((void *)ro->ro_rt->rt_gateway);
9291 		}
9292 		RT_UNLOCK(ro->ro_rt);
9293 	} else {
9294 		if (TAILQ_EMPTY(&r->rpool.list)) {
9295 			DPFPRINTF(PF_DEBUG_URGENT,
9296 			    ("pf_route: TAILQ_EMPTY(&r->rpool.list)\n"));
9297 			goto bad;
9298 		}
9299 		if (s == NULL) {
9300 			pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src,
9301 			    &naddr, NULL, &sn);
9302 			if (!PF_AZERO(&naddr, AF_INET)) {
9303 				dst->sin_addr.s_addr = naddr.v4addr.s_addr;
9304 			}
9305 			ifp = r->rpool.cur->kif ?
9306 			    r->rpool.cur->kif->pfik_ifp : NULL;
9307 		} else {
9308 			if (!PF_AZERO(&s->rt_addr, AF_INET)) {
9309 				dst->sin_addr.s_addr =
9310 				    s->rt_addr.v4addr.s_addr;
9311 			}
9312 			ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
9313 		}
9314 	}
9315 	if (ifp == NULL) {
9316 		goto bad;
9317 	}
9318 
9319 	if (oifp != ifp) {
9320 		if (pf_test_mbuf(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) {
9321 			goto bad;
9322 		} else if (m0 == NULL) {
9323 			goto done;
9324 		}
9325 		if (m0->m_len < (int)sizeof(struct ip)) {
9326 			DPFPRINTF(PF_DEBUG_URGENT,
9327 			    ("pf_route: packet length < sizeof (struct ip)\n"));
9328 			goto bad;
9329 		}
9330 		ip = mtod(m0, struct ip *);
9331 	}
9332 
9333 	/* Catch routing changes wrt. hardware checksumming for TCP or UDP. */
9334 	ip_output_checksum(ifp, m0, ((ip->ip_hl) << 2), ntohs(ip->ip_len),
9335 	    &sw_csum);
9336 
9337 	interface_mtu = ifp->if_mtu;
9338 
9339 	if (INTF_ADJUST_MTU_FOR_CLAT46(ifp)) {
9340 		interface_mtu = IN6_LINKMTU(ifp);
9341 		/* Further adjust the size for CLAT46 expansion */
9342 		interface_mtu -= CLAT46_HDR_EXPANSION_OVERHD;
9343 	}
9344 
9345 	if (ntohs(ip->ip_len) <= interface_mtu || TSO_IPV4_OK(ifp, m0) ||
9346 	    (!(ip->ip_off & htons(IP_DF)) &&
9347 	    (ifp->if_hwassist & CSUM_FRAGMENT))) {
9348 		ip->ip_sum = 0;
9349 		if (sw_csum & CSUM_DELAY_IP) {
9350 			ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
9351 			sw_csum &= ~CSUM_DELAY_IP;
9352 			m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_IP;
9353 		}
9354 		error = ifnet_output(ifp, PF_INET, m0, ro->ro_rt, sintosa(dst));
9355 		goto done;
9356 	}
9357 
9358 	/*
9359 	 * Too large for interface; fragment if possible.
9360 	 * Must be able to put at least 8 bytes per fragment.
9361 	 * Balk when DF bit is set or the interface didn't support TSO.
9362 	 */
9363 	if ((ip->ip_off & htons(IP_DF)) ||
9364 	    (m0->m_pkthdr.csum_flags & CSUM_TSO_IPV4)) {
9365 		ipstat.ips_cantfrag++;
9366 		if (r->rt != PF_DUPTO) {
9367 			icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0,
9368 			    interface_mtu);
9369 			goto done;
9370 		} else {
9371 			goto bad;
9372 		}
9373 	}
9374 
9375 	m1 = m0;
9376 
9377 	/* PR-8933605: send ip_len,ip_off to ip_fragment in host byte order */
9378 #if BYTE_ORDER != BIG_ENDIAN
9379 	NTOHS(ip->ip_off);
9380 	NTOHS(ip->ip_len);
9381 #endif
9382 	error = ip_fragment(m0, ifp, interface_mtu, sw_csum);
9383 
9384 	if (error) {
9385 		m0 = NULL;
9386 		goto bad;
9387 	}
9388 
9389 	for (m0 = m1; m0; m0 = m1) {
9390 		m1 = m0->m_nextpkt;
9391 		m0->m_nextpkt = 0;
9392 		if (error == 0) {
9393 			error = ifnet_output(ifp, PF_INET, m0, ro->ro_rt,
9394 			    sintosa(dst));
9395 		} else {
9396 			m_freem(m0);
9397 		}
9398 	}
9399 
9400 	if (error == 0) {
9401 		ipstat.ips_fragmented++;
9402 	}
9403 
9404 done:
9405 	ROUTE_RELEASE(&iproute);
9406 	return;
9407 
9408 bad:
9409 	if (m0) {
9410 		m_freem(m0);
9411 	}
9412 	goto done;
9413 }
9414 #endif /* INET */
9415 
9416 static __attribute__((noinline)) void
pf_route6(pbuf_t ** pbufp,struct pf_rule * r,int dir,struct ifnet * oifp,struct pf_state * s,struct pf_pdesc * pd)9417 pf_route6(pbuf_t **pbufp, struct pf_rule *r, int dir, struct ifnet *oifp,
9418     struct pf_state *s, struct pf_pdesc *pd)
9419 {
9420 #pragma unused(pd)
9421 	struct mbuf             *__single m0;
9422 	struct route_in6         ip6route;
9423 	struct route_in6        *__single ro;
9424 	struct sockaddr_in6     *__single dst;
9425 	struct ip6_hdr          *__single ip6;
9426 	struct ifnet            *__single ifp = NULL;
9427 	struct pf_addr           naddr;
9428 	struct pf_src_node      *__single sn = NULL;
9429 	int                      error = 0;
9430 	struct pf_mtag          *__single pf_mtag;
9431 
9432 	if (pbufp == NULL || !pbuf_is_valid(*pbufp) || r == NULL ||
9433 	    (dir != PF_IN && dir != PF_OUT) || oifp == NULL) {
9434 		panic("pf_route6: invalid parameters");
9435 	}
9436 
9437 	if (pd->pf_mtag->pftag_routed++ > 3) {
9438 		pbuf_destroy(*pbufp);
9439 		*pbufp = NULL;
9440 		m0 = NULL;
9441 		goto bad;
9442 	}
9443 
9444 	/*
9445 	 * Since this is something of an edge case and may involve the
9446 	 * host stack (for routing, at least for now), we convert the
9447 	 * incoming pbuf into an mbuf.
9448 	 */
9449 	if (r->rt == PF_DUPTO) {
9450 		m0 = pbuf_clone_to_mbuf(*pbufp);
9451 	} else if ((r->rt == PF_REPLYTO) == (r->direction == dir)) {
9452 		return;
9453 	} else {
9454 		/* We're about to consume this packet */
9455 		m0 = pbuf_to_mbuf(*pbufp, TRUE);
9456 		*pbufp = NULL;
9457 	}
9458 
9459 	if (m0 == NULL) {
9460 		goto bad;
9461 	}
9462 
9463 	if (m0->m_len < (int)sizeof(struct ip6_hdr)) {
9464 		DPFPRINTF(PF_DEBUG_URGENT,
9465 		    ("pf_route6: m0->m_len < sizeof (struct ip6_hdr)\n"));
9466 		goto bad;
9467 	}
9468 	ip6 = mtod(m0, struct ip6_hdr *);
9469 
9470 	ro = &ip6route;
9471 	bzero((void *__bidi_indexable)(struct route_in6 *__bidi_indexable)ro, sizeof(*ro));
9472 	dst = SIN6(&ro->ro_dst);
9473 	dst->sin6_family = AF_INET6;
9474 	dst->sin6_len = sizeof(*dst);
9475 	dst->sin6_addr = ip6->ip6_dst;
9476 
9477 	/* Cheat. XXX why only in the v6addr case??? */
9478 	if (r->rt == PF_FASTROUTE) {
9479 		pf_mtag = pf_get_mtag(m0);
9480 		ASSERT(pf_mtag != NULL);
9481 		pf_mtag->pftag_flags |= PF_TAG_GENERATED;
9482 		ip6_output_setsrcifscope(m0, oifp->if_index, NULL);
9483 		ip6_output_setdstifscope(m0, oifp->if_index, NULL);
9484 		ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
9485 		return;
9486 	}
9487 
9488 	if (TAILQ_EMPTY(&r->rpool.list)) {
9489 		DPFPRINTF(PF_DEBUG_URGENT,
9490 		    ("pf_route6: TAILQ_EMPTY(&r->rpool.list)\n"));
9491 		goto bad;
9492 	}
9493 	if (s == NULL) {
9494 		pf_map_addr(AF_INET6, r, (struct pf_addr *)(void *)&ip6->ip6_src,
9495 		    &naddr, NULL, &sn);
9496 		if (!PF_AZERO(&naddr, AF_INET6)) {
9497 			PF_ACPY((struct pf_addr *)&dst->sin6_addr,
9498 			    &naddr, AF_INET6);
9499 		}
9500 		ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL;
9501 	} else {
9502 		if (!PF_AZERO(&s->rt_addr, AF_INET6)) {
9503 			PF_ACPY((struct pf_addr *)&dst->sin6_addr,
9504 			    &s->rt_addr, AF_INET6);
9505 		}
9506 		ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
9507 	}
9508 	if (ifp == NULL) {
9509 		goto bad;
9510 	}
9511 
9512 	if (oifp != ifp) {
9513 		if (pf_test6_mbuf(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) {
9514 			goto bad;
9515 		} else if (m0 == NULL) {
9516 			goto done;
9517 		}
9518 		if (m0->m_len < (int)sizeof(struct ip6_hdr)) {
9519 			DPFPRINTF(PF_DEBUG_URGENT, ("pf_route6: m0->m_len "
9520 			    "< sizeof (struct ip6_hdr)\n"));
9521 			goto bad;
9522 		}
9523 		pf_mtag = pf_get_mtag(m0);
9524 		/*
9525 		 * send refragmented packets.
9526 		 */
9527 		if ((pf_mtag->pftag_flags & PF_TAG_REFRAGMENTED) != 0) {
9528 			pf_mtag->pftag_flags &= ~PF_TAG_REFRAGMENTED;
9529 			/*
9530 			 * nd6_output() frees packet chain in both success and
9531 			 * failure cases.
9532 			 */
9533 			error = nd6_output(ifp, ifp, m0, dst, NULL, NULL);
9534 			m0 = NULL;
9535 			if (error) {
9536 				DPFPRINTF(PF_DEBUG_URGENT, ("pf_route6:"
9537 				    "dropped refragmented packet\n"));
9538 			}
9539 			goto done;
9540 		}
9541 		ip6 = mtod(m0, struct ip6_hdr *);
9542 	}
9543 
9544 	/*
9545 	 * If the packet is too large for the outgoing interface,
9546 	 * send back an icmp6 error.
9547 	 */
9548 	if (in6_embedded_scope && IN6_IS_SCOPE_EMBED(&dst->sin6_addr)) {
9549 		dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index);
9550 	}
9551 	if ((unsigned)m0->m_pkthdr.len <= ifp->if_mtu) {
9552 		error = nd6_output(ifp, ifp, m0, dst, NULL, NULL);
9553 	} else {
9554 		in6_ifstat_inc(ifp, ifs6_in_toobig);
9555 		if (r->rt != PF_DUPTO) {
9556 			icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
9557 		} else {
9558 			goto bad;
9559 		}
9560 	}
9561 
9562 done:
9563 	return;
9564 
9565 bad:
9566 	if (m0) {
9567 		m_freem(m0);
9568 		m0 = NULL;
9569 	}
9570 	goto done;
9571 }
9572 
9573 
9574 /*
9575  * check protocol (tcp/udp/icmp/icmp6) checksum and set mbuf flag
9576  *   off is the offset where the protocol header starts
9577  *   len is the total length of protocol header plus payload
9578  * returns 0 when the checksum is valid, otherwise returns 1.
9579  */
9580 static int
pf_check_proto_cksum(pbuf_t * pbuf,int off,int len,u_int8_t p,sa_family_t af)9581 pf_check_proto_cksum(pbuf_t *pbuf, int off, int len, u_int8_t p,
9582     sa_family_t af)
9583 {
9584 	u_int16_t sum;
9585 
9586 	switch (p) {
9587 	case IPPROTO_TCP:
9588 	case IPPROTO_UDP:
9589 		/*
9590 		 * Optimize for the common case; if the hardware calculated
9591 		 * value doesn't include pseudo-header checksum, or if it
9592 		 * is partially-computed (only 16-bit summation), do it in
9593 		 * software below.
9594 		 */
9595 		if ((*pbuf->pb_csum_flags &
9596 		    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
9597 		    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR) &&
9598 		    (*pbuf->pb_csum_data ^ 0xffff) == 0) {
9599 			return 0;
9600 		}
9601 		break;
9602 	case IPPROTO_ICMP:
9603 	case IPPROTO_ICMPV6:
9604 		break;
9605 	default:
9606 		return 1;
9607 	}
9608 	if (off < (int)sizeof(struct ip) || len < (int)sizeof(struct udphdr)) {
9609 		return 1;
9610 	}
9611 	if (pbuf->pb_packet_len < (unsigned)(off + len)) {
9612 		return 1;
9613 	}
9614 	switch (af) {
9615 #if INET
9616 	case AF_INET:
9617 		if (p == IPPROTO_ICMP) {
9618 			if (pbuf->pb_contig_len < (unsigned)off) {
9619 				return 1;
9620 			}
9621 			sum = pbuf_inet_cksum(pbuf, 0, off, len);
9622 		} else {
9623 			if (pbuf->pb_contig_len < (int)sizeof(struct ip)) {
9624 				return 1;
9625 			}
9626 			sum = pbuf_inet_cksum(pbuf, p, off, len);
9627 		}
9628 		break;
9629 #endif /* INET */
9630 	case AF_INET6:
9631 		if (pbuf->pb_contig_len < (int)sizeof(struct ip6_hdr)) {
9632 			return 1;
9633 		}
9634 		sum = pbuf_inet6_cksum(pbuf, p, off, len);
9635 		break;
9636 	default:
9637 		return 1;
9638 	}
9639 	if (sum) {
9640 		switch (p) {
9641 		case IPPROTO_TCP:
9642 			tcpstat.tcps_rcvbadsum++;
9643 			break;
9644 		case IPPROTO_UDP:
9645 			udpstat.udps_badsum++;
9646 			break;
9647 		case IPPROTO_ICMP:
9648 			icmpstat.icps_checksum++;
9649 			break;
9650 		case IPPROTO_ICMPV6:
9651 			icmp6stat.icp6s_checksum++;
9652 			break;
9653 		}
9654 		return 1;
9655 	}
9656 	return 0;
9657 }
9658 
9659 #if INET
9660 #define PF_APPLE_UPDATE_PDESC_IPv4()                            \
9661 	do {                                                    \
9662 	        if (pbuf && pd.mp && pbuf != pd.mp) {           \
9663 	                pbuf = pd.mp;                           \
9664 	                h = pbuf->pb_data;                      \
9665 	                pd.pf_mtag = pf_get_mtag_pbuf(pbuf);            \
9666 	        }                                               \
9667 	} while (0)
9668 
9669 int
pf_test_mbuf(int dir,struct ifnet * ifp,struct mbuf ** m0,struct ether_header * eh,struct ip_fw_args * fwa)9670 pf_test_mbuf(int dir, struct ifnet *ifp, struct mbuf **m0,
9671     struct ether_header *eh, struct ip_fw_args *fwa)
9672 {
9673 	pbuf_t pbuf_store, *__single pbuf;
9674 	int rv;
9675 
9676 	pbuf_init_mbuf(&pbuf_store, *m0, (*m0)->m_pkthdr.rcvif);
9677 	pbuf = &pbuf_store;
9678 
9679 	rv = pf_test(dir, ifp, &pbuf, eh, fwa);
9680 
9681 	if (pbuf_is_valid(pbuf)) {
9682 		*m0 = pbuf->pb_mbuf;
9683 		pbuf->pb_mbuf = NULL;
9684 		pbuf_destroy(pbuf);
9685 	} else {
9686 		*m0 = NULL;
9687 	}
9688 
9689 	return rv;
9690 }
9691 
9692 static __attribute__((noinline)) int
pf_test(int dir,struct ifnet * ifp,pbuf_t ** pbufp,struct ether_header * eh,struct ip_fw_args * fwa)9693 pf_test(int dir, struct ifnet *ifp, pbuf_t **pbufp,
9694     struct ether_header *eh, struct ip_fw_args *fwa)
9695 {
9696 #if !DUMMYNET
9697 #pragma unused(fwa)
9698 #endif
9699 	struct pfi_kif          *__single kif;
9700 	u_short                  action = PF_PASS, reason = 0, log = 0;
9701 	pbuf_t                  *__single pbuf = *pbufp;
9702 	struct ip               *__single h = 0;
9703 	struct pf_rule          *__single a = NULL, *__single r = &pf_default_rule, *__single tr, *__single nr;
9704 	struct pf_state         *__single s = NULL;
9705 	struct pf_state_key     *__single sk = NULL;
9706 	struct pf_ruleset       *__single ruleset = NULL;
9707 	struct pf_pdesc          pd;
9708 	int                      off, dirndx, pqid = 0;
9709 
9710 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
9711 
9712 	if (!pf_status.running) {
9713 		return PF_PASS;
9714 	}
9715 
9716 	memset(&pd, 0, sizeof(pd));
9717 
9718 	if ((pd.pf_mtag = pf_get_mtag_pbuf(pbuf)) == NULL) {
9719 		DPFPRINTF(PF_DEBUG_URGENT,
9720 		    ("pf_test: pf_get_mtag_pbuf returned NULL\n"));
9721 		return PF_DROP;
9722 	}
9723 
9724 	if (pd.pf_mtag->pftag_flags & PF_TAG_GENERATED) {
9725 		return PF_PASS;
9726 	}
9727 
9728 	kif = (struct pfi_kif *)ifp->if_pf_kif;
9729 
9730 	if (kif == NULL) {
9731 		DPFPRINTF(PF_DEBUG_URGENT,
9732 		    ("pf_test: kif == NULL, if_name %s\n", ifp->if_name));
9733 		return PF_DROP;
9734 	}
9735 	if (kif->pfik_flags & PFI_IFLAG_SKIP) {
9736 		return PF_PASS;
9737 	}
9738 
9739 	if (pbuf->pb_packet_len < (int)sizeof(*h)) {
9740 		REASON_SET(&reason, PFRES_SHORT);
9741 		return PF_DROP;
9742 	}
9743 
9744 	/* initialize enough of pd for the done label */
9745 	h = pbuf->pb_data;
9746 	pd.mp = pbuf;
9747 	pd.lmw = 0;
9748 	pd.pf_mtag = pf_get_mtag_pbuf(pbuf);
9749 	pd.src = (struct pf_addr *)&h->ip_src;
9750 	pd.dst = (struct pf_addr *)&h->ip_dst;
9751 	PF_ACPY(&pd.baddr, pd.src, AF_INET);
9752 	PF_ACPY(&pd.bdaddr, pd.dst, AF_INET);
9753 	pd.ip_sum = &h->ip_sum;
9754 	pd.proto = h->ip_p;
9755 	pd.proto_variant = 0;
9756 	pd.af = AF_INET;
9757 	pd.tos = h->ip_tos;
9758 	pd.ttl = h->ip_ttl;
9759 	pd.tot_len = ntohs(h->ip_len);
9760 	pd.eh = eh;
9761 
9762 #if DUMMYNET
9763 	if (fwa != NULL && fwa->fwa_pf_rule != NULL) {
9764 		goto nonormalize;
9765 	}
9766 #endif /* DUMMYNET */
9767 
9768 	/* We do IP header normalization and packet reassembly here */
9769 	action = pf_normalize_ip(pbuf, dir, kif, &reason, &pd);
9770 	if (action != PF_PASS || pd.lmw < 0) {
9771 		action = PF_DROP;
9772 		goto done;
9773 	}
9774 
9775 #if DUMMYNET
9776 nonormalize:
9777 #endif /* DUMMYNET */
9778 	/* pf_normalize can mess with pb_data */
9779 	h = pbuf->pb_data;
9780 
9781 	off = h->ip_hl << 2;
9782 	if (off < (int)sizeof(*h)) {
9783 		action = PF_DROP;
9784 		REASON_SET(&reason, PFRES_SHORT);
9785 		log = 1;
9786 		goto done;
9787 	}
9788 
9789 	pd.src = (struct pf_addr *)&h->ip_src;
9790 	pd.dst = (struct pf_addr *)&h->ip_dst;
9791 	PF_ACPY(&pd.baddr, pd.src, AF_INET);
9792 	PF_ACPY(&pd.bdaddr, pd.dst, AF_INET);
9793 	pd.ip_sum = &h->ip_sum;
9794 	pd.proto = h->ip_p;
9795 	pd.proto_variant = 0;
9796 	pd.mp = pbuf;
9797 	pd.lmw = 0;
9798 	pd.pf_mtag = pf_get_mtag_pbuf(pbuf);
9799 	pd.af = AF_INET;
9800 	pd.tos = h->ip_tos;
9801 	pd.ttl = h->ip_ttl;
9802 	pd.sc = MBUF_SCIDX(pbuf_get_service_class(pbuf));
9803 	pd.tot_len = ntohs(h->ip_len);
9804 	pd.eh = eh;
9805 
9806 	if (*pbuf->pb_flags & PKTF_FLOW_ID) {
9807 		pd.flowsrc = *pbuf->pb_flowsrc;
9808 		pd.flowhash = *pbuf->pb_flowid;
9809 		pd.pktflags = *pbuf->pb_flags & PKTF_FLOW_MASK;
9810 	}
9811 
9812 	/* handle fragments that didn't get reassembled by normalization */
9813 	if (h->ip_off & htons(IP_MF | IP_OFFMASK)) {
9814 		pd.flags |= PFDESC_IP_FRAG;
9815 #if DUMMYNET
9816 		/* Traffic goes through dummynet first */
9817 		action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
9818 		if (action == PF_DROP || pbuf == NULL) {
9819 			*pbufp = NULL;
9820 			return action;
9821 		}
9822 #endif /* DUMMYNET */
9823 		action = pf_test_fragment(&r, dir, kif, pbuf, h,
9824 		    &pd, &a, &ruleset);
9825 		goto done;
9826 	}
9827 
9828 	switch (h->ip_p) {
9829 	case IPPROTO_TCP: {
9830 		struct tcphdr   th;
9831 		pf_pd_set_hdr_tcp(&pd, &th);
9832 		if (!pf_pull_hdr(pbuf, off, &th, sizeof(th), sizeof(th),
9833 		    &action, &reason, AF_INET)) {
9834 			log = action != PF_PASS;
9835 			goto done;
9836 		}
9837 		pd.p_len = pd.tot_len - off - (th.th_off << 2);
9838 		if ((th.th_flags & TH_ACK) && pd.p_len == 0) {
9839 			pqid = 1;
9840 		}
9841 #if DUMMYNET
9842 		/* Traffic goes through dummynet first */
9843 		action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
9844 		if (action == PF_DROP || pbuf == NULL) {
9845 			*pbufp = NULL;
9846 			return action;
9847 		}
9848 #endif /* DUMMYNET */
9849 		action = pf_normalize_tcp(dir, kif, pbuf, 0, off, h, &pd);
9850 		if (pd.lmw < 0) {
9851 			goto done;
9852 		}
9853 		PF_APPLE_UPDATE_PDESC_IPv4();
9854 		if (action == PF_DROP) {
9855 			goto done;
9856 		}
9857 		if (th.th_sport == 0 || th.th_dport == 0) {
9858 			action = PF_DROP;
9859 			REASON_SET(&reason, PFRES_INVPORT);
9860 			goto done;
9861 		}
9862 		action = pf_test_state_tcp(&s, dir, kif, pbuf, off, h, &pd,
9863 		    &reason);
9864 		if (action == PF_NAT64) {
9865 			goto done;
9866 		}
9867 		if (pd.lmw < 0) {
9868 			goto done;
9869 		}
9870 		PF_APPLE_UPDATE_PDESC_IPv4();
9871 		if (action == PF_PASS) {
9872 #if NPFSYNC
9873 			pfsync_update_state(s);
9874 #endif /* NPFSYNC */
9875 			r = s->rule.ptr;
9876 			a = s->anchor.ptr;
9877 			log = s->log;
9878 		} else if (s == NULL) {
9879 			action = pf_test_rule(&r, &s, dir, kif,
9880 			    pbuf, off, h, &pd, &a, &ruleset, NULL);
9881 		}
9882 		break;
9883 	}
9884 
9885 	case IPPROTO_UDP: {
9886 		struct udphdr   uh;
9887 
9888 		pf_pd_set_hdr_udp(&pd, &uh);
9889 		if (!pf_pull_hdr(pbuf, off, &uh, sizeof(uh), sizeof(uh),
9890 		    &action, &reason, AF_INET)) {
9891 			log = action != PF_PASS;
9892 			goto done;
9893 		}
9894 		if (uh.uh_sport == 0 || uh.uh_dport == 0) {
9895 			action = PF_DROP;
9896 			REASON_SET(&reason, PFRES_INVPORT);
9897 			goto done;
9898 		}
9899 		if (ntohs(uh.uh_ulen) > pbuf->pb_packet_len - off ||
9900 		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
9901 			action = PF_DROP;
9902 			REASON_SET(&reason, PFRES_SHORT);
9903 			goto done;
9904 		}
9905 #if DUMMYNET
9906 		/* Traffic goes through dummynet first */
9907 		action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
9908 		if (action == PF_DROP || pbuf == NULL) {
9909 			*pbufp = NULL;
9910 			return action;
9911 		}
9912 #endif /* DUMMYNET */
9913 		action = pf_test_state_udp(&s, dir, kif, pbuf, off, h, &pd,
9914 		    &reason);
9915 		if (action == PF_NAT64) {
9916 			goto done;
9917 		}
9918 		if (pd.lmw < 0) {
9919 			goto done;
9920 		}
9921 		PF_APPLE_UPDATE_PDESC_IPv4();
9922 		if (action == PF_PASS) {
9923 #if NPFSYNC
9924 			pfsync_update_state(s);
9925 #endif /* NPFSYNC */
9926 			r = s->rule.ptr;
9927 			a = s->anchor.ptr;
9928 			log = s->log;
9929 		} else if (s == NULL) {
9930 			action = pf_test_rule(&r, &s, dir, kif,
9931 			    pbuf, off, h, &pd, &a, &ruleset, NULL);
9932 		}
9933 		break;
9934 	}
9935 
9936 	case IPPROTO_ICMP: {
9937 		struct icmp     ih;
9938 
9939 		pf_pd_set_hdr_icmp(&pd, &ih, ICMP_MINLEN);
9940 		if (!pf_pull_hdr(pbuf, off, &ih, sizeof(ih), ICMP_MINLEN,
9941 		    &action, &reason, AF_INET)) {
9942 			log = action != PF_PASS;
9943 			goto done;
9944 		}
9945 #if DUMMYNET
9946 		/* Traffic goes through dummynet first */
9947 		action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
9948 		if (action == PF_DROP || pbuf == NULL) {
9949 			*pbufp = NULL;
9950 			return action;
9951 		}
9952 #endif /* DUMMYNET */
9953 		action = pf_test_state_icmp(&s, dir, kif, pbuf, off, h, &pd,
9954 		    &reason);
9955 
9956 		if (action == PF_NAT64) {
9957 			goto done;
9958 		}
9959 		if (pd.lmw < 0) {
9960 			goto done;
9961 		}
9962 		PF_APPLE_UPDATE_PDESC_IPv4();
9963 		if (action == PF_PASS) {
9964 #if NPFSYNC
9965 			pfsync_update_state(s);
9966 #endif /* NPFSYNC */
9967 			r = s->rule.ptr;
9968 			a = s->anchor.ptr;
9969 			log = s->log;
9970 		} else if (s == NULL) {
9971 			action = pf_test_rule(&r, &s, dir, kif,
9972 			    pbuf, off, h, &pd, &a, &ruleset, NULL);
9973 		}
9974 		break;
9975 	}
9976 
9977 	case IPPROTO_ESP: {
9978 		struct pf_esp_hdr       esp;
9979 
9980 		pf_pd_set_hdr_esp(&pd, &esp);
9981 		if (!pf_pull_hdr(pbuf, off, &esp, sizeof(esp), sizeof(esp), &action, &reason,
9982 		    AF_INET)) {
9983 			log = action != PF_PASS;
9984 			goto done;
9985 		}
9986 #if DUMMYNET
9987 		/* Traffic goes through dummynet first */
9988 		action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
9989 		if (action == PF_DROP || pbuf == NULL) {
9990 			*pbufp = NULL;
9991 			return action;
9992 		}
9993 #endif /* DUMMYNET */
9994 		action = pf_test_state_esp(&s, dir, kif, off, &pd);
9995 		if (pd.lmw < 0) {
9996 			goto done;
9997 		}
9998 		PF_APPLE_UPDATE_PDESC_IPv4();
9999 		if (action == PF_PASS) {
10000 #if NPFSYNC
10001 			pfsync_update_state(s);
10002 #endif /* NPFSYNC */
10003 			r = s->rule.ptr;
10004 			a = s->anchor.ptr;
10005 			log = s->log;
10006 		} else if (s == NULL) {
10007 			action = pf_test_rule(&r, &s, dir, kif,
10008 			    pbuf, off, h, &pd, &a, &ruleset, NULL);
10009 		}
10010 		break;
10011 	}
10012 
10013 	case IPPROTO_GRE: {
10014 		struct pf_grev1_hdr     grev1;
10015 		pf_pd_set_hdr_grev1(&pd, &grev1);
10016 		if (!pf_pull_hdr(pbuf, off, &grev1, sizeof(grev1), sizeof(grev1), &action,
10017 		    &reason, AF_INET)) {
10018 			log = (action != PF_PASS);
10019 			goto done;
10020 		}
10021 #if DUMMYNET
10022 		/* Traffic goes through dummynet first */
10023 		action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10024 		if (action == PF_DROP || pbuf == NULL) {
10025 			*pbufp = NULL;
10026 			return action;
10027 		}
10028 #endif /* DUMMYNET */
10029 		if ((ntohs(grev1.flags) & PF_GRE_FLAG_VERSION_MASK) == 1 &&
10030 		    ntohs(grev1.protocol_type) == PF_GRE_PPP_ETHERTYPE) {
10031 			if (ntohs(grev1.payload_length) >
10032 			    pbuf->pb_packet_len - off) {
10033 				action = PF_DROP;
10034 				REASON_SET(&reason, PFRES_SHORT);
10035 				goto done;
10036 			}
10037 			pd.proto_variant = PF_GRE_PPTP_VARIANT;
10038 			action = pf_test_state_grev1(&s, dir, kif, off, &pd);
10039 			if (pd.lmw < 0) {
10040 				goto done;
10041 			}
10042 			PF_APPLE_UPDATE_PDESC_IPv4();
10043 			if (action == PF_PASS) {
10044 #if NPFSYNC
10045 				pfsync_update_state(s);
10046 #endif /* NPFSYNC */
10047 				r = s->rule.ptr;
10048 				a = s->anchor.ptr;
10049 				log = s->log;
10050 				break;
10051 			} else if (s == NULL) {
10052 				action = pf_test_rule(&r, &s, dir, kif, pbuf,
10053 				    off, h, &pd, &a, &ruleset, NULL);
10054 				if (action == PF_PASS) {
10055 					break;
10056 				}
10057 			}
10058 		}
10059 
10060 		/* not GREv1/PPTP, so treat as ordinary GRE... */
10061 		OS_FALLTHROUGH;
10062 	}
10063 
10064 	default:
10065 #if DUMMYNET
10066 		/* Traffic goes through dummynet first */
10067 		action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10068 		if (action == PF_DROP || pbuf == NULL) {
10069 			*pbufp = NULL;
10070 			return action;
10071 		}
10072 #endif /* DUMMYNET */
10073 		action = pf_test_state_other(&s, dir, kif, &pd);
10074 		if (pd.lmw < 0) {
10075 			goto done;
10076 		}
10077 		PF_APPLE_UPDATE_PDESC_IPv4();
10078 		if (action == PF_PASS) {
10079 #if NPFSYNC
10080 			pfsync_update_state(s);
10081 #endif /* NPFSYNC */
10082 			r = s->rule.ptr;
10083 			a = s->anchor.ptr;
10084 			log = s->log;
10085 		} else if (s == NULL) {
10086 			action = pf_test_rule(&r, &s, dir, kif, pbuf, off, h,
10087 			    &pd, &a, &ruleset, NULL);
10088 		}
10089 		break;
10090 	}
10091 
10092 done:
10093 	if (action == PF_NAT64) {
10094 		*pbufp = NULL;
10095 		return action;
10096 	}
10097 
10098 	*pbufp = pd.mp;
10099 	PF_APPLE_UPDATE_PDESC_IPv4();
10100 
10101 	if (action != PF_DROP) {
10102 		if (action == PF_PASS && h->ip_hl > 5 &&
10103 		    !((s && s->allow_opts) || r->allow_opts)) {
10104 			action = PF_DROP;
10105 			REASON_SET(&reason, PFRES_IPOPTIONS);
10106 			log = 1;
10107 			DPFPRINTF(PF_DEBUG_MISC,
10108 			    ("pf: dropping packet with ip options [hlen=%u]\n",
10109 			    (unsigned int) h->ip_hl));
10110 		}
10111 
10112 		if ((s && s->tag) || PF_RTABLEID_IS_VALID(r->rtableid) ||
10113 		    (pd.pktflags & PKTF_FLOW_ID)) {
10114 			(void) pf_tag_packet(pbuf, pd.pf_mtag, s ? s->tag : 0,
10115 			    r->rtableid, &pd);
10116 		}
10117 
10118 		if (action == PF_PASS) {
10119 #if PF_ECN
10120 			/* add hints for ecn */
10121 			pd.pf_mtag->pftag_hdr = h;
10122 			/* record address family */
10123 			pd.pf_mtag->pftag_flags &= ~PF_TAG_HDR_INET6;
10124 			pd.pf_mtag->pftag_flags |= PF_TAG_HDR_INET;
10125 #endif /* PF_ECN */
10126 			/* record protocol */
10127 			*pbuf->pb_proto = pd.proto;
10128 
10129 			/*
10130 			 * connections redirected to loopback should not match sockets
10131 			 * bound specifically to loopback due to security implications,
10132 			 * see tcp_input() and in_pcblookup_listen().
10133 			 */
10134 			if (dir == PF_IN && (pd.proto == IPPROTO_TCP ||
10135 			    pd.proto == IPPROTO_UDP) && s != NULL &&
10136 			    s->nat_rule.ptr != NULL &&
10137 			    (s->nat_rule.ptr->action == PF_RDR ||
10138 			    s->nat_rule.ptr->action == PF_BINAT) &&
10139 			    (ntohl(pd.dst->v4addr.s_addr) >> IN_CLASSA_NSHIFT)
10140 			    == IN_LOOPBACKNET) {
10141 				pd.pf_mtag->pftag_flags |= PF_TAG_TRANSLATE_LOCALHOST;
10142 			}
10143 		}
10144 	}
10145 
10146 	if (log) {
10147 		struct pf_rule *lr;
10148 
10149 		if (s != NULL && s->nat_rule.ptr != NULL &&
10150 		    s->nat_rule.ptr->log & PF_LOG_ALL) {
10151 			lr = s->nat_rule.ptr;
10152 		} else {
10153 			lr = r;
10154 		}
10155 		PFLOG_PACKET(kif, h, pbuf, AF_INET, dir, reason, lr, a, ruleset,
10156 		    &pd);
10157 	}
10158 
10159 	kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
10160 	kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS]++;
10161 
10162 	if (action == PF_PASS || r->action == PF_DROP) {
10163 		dirndx = (dir == PF_OUT);
10164 		r->packets[dirndx]++;
10165 		r->bytes[dirndx] += pd.tot_len;
10166 		if (a != NULL) {
10167 			a->packets[dirndx]++;
10168 			a->bytes[dirndx] += pd.tot_len;
10169 		}
10170 		if (s != NULL) {
10171 			sk = s->state_key;
10172 			if (s->nat_rule.ptr != NULL) {
10173 				s->nat_rule.ptr->packets[dirndx]++;
10174 				s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
10175 			}
10176 			if (s->src_node != NULL) {
10177 				s->src_node->packets[dirndx]++;
10178 				s->src_node->bytes[dirndx] += pd.tot_len;
10179 			}
10180 			if (s->nat_src_node != NULL) {
10181 				s->nat_src_node->packets[dirndx]++;
10182 				s->nat_src_node->bytes[dirndx] += pd.tot_len;
10183 			}
10184 			dirndx = (dir == sk->direction) ? 0 : 1;
10185 			s->packets[dirndx]++;
10186 			s->bytes[dirndx] += pd.tot_len;
10187 		}
10188 		tr = r;
10189 		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
10190 		if (nr != NULL) {
10191 			struct pf_addr *x;
10192 			/*
10193 			 * XXX: we need to make sure that the addresses
10194 			 * passed to pfr_update_stats() are the same than
10195 			 * the addresses used during matching (pfr_match)
10196 			 */
10197 			if (r == &pf_default_rule) {
10198 				tr = nr;
10199 				x = (sk == NULL || sk->direction == dir) ?
10200 				    &pd.baddr : &pd.naddr;
10201 			} else {
10202 				x = (sk == NULL || sk->direction == dir) ?
10203 				    &pd.naddr : &pd.baddr;
10204 			}
10205 			if (x == &pd.baddr || s == NULL) {
10206 				/* we need to change the address */
10207 				if (dir == PF_OUT) {
10208 					pd.src = x;
10209 				} else {
10210 					pd.dst = x;
10211 				}
10212 			}
10213 		}
10214 		if (tr->src.addr.type == PF_ADDR_TABLE) {
10215 			pfr_update_stats(tr->src.addr.p.tbl, (sk == NULL ||
10216 			    sk->direction == dir) ?
10217 			    pd.src : pd.dst, pd.af,
10218 			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
10219 			    tr->src.neg);
10220 		}
10221 		if (tr->dst.addr.type == PF_ADDR_TABLE) {
10222 			pfr_update_stats(tr->dst.addr.p.tbl, (sk == NULL ||
10223 			    sk->direction == dir) ? pd.dst : pd.src, pd.af,
10224 			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
10225 			    tr->dst.neg);
10226 		}
10227 	}
10228 
10229 	VERIFY(pbuf == NULL || pd.mp == NULL || pd.mp == pbuf);
10230 
10231 	if (*pbufp) {
10232 		if (pd.lmw < 0) {
10233 			REASON_SET(&reason, PFRES_MEMORY);
10234 			action = PF_DROP;
10235 		}
10236 
10237 		if (action == PF_DROP) {
10238 			pbuf_destroy(*pbufp);
10239 			*pbufp = NULL;
10240 			return PF_DROP;
10241 		}
10242 
10243 		*pbufp = pbuf;
10244 	}
10245 
10246 	if (action == PF_SYNPROXY_DROP) {
10247 		pbuf_destroy(*pbufp);
10248 		*pbufp = NULL;
10249 		action = PF_PASS;
10250 	} else if (r->rt) {
10251 		/* pf_route can free the pbuf causing *pbufp to become NULL */
10252 		pf_route(pbufp, r, dir, kif->pfik_ifp, s, &pd);
10253 	}
10254 
10255 	return action;
10256 }
10257 #endif /* INET */
10258 
10259 #define PF_APPLE_UPDATE_PDESC_IPv6()                            \
10260 	do {                                                    \
10261 	        if (pbuf && pd.mp && pbuf != pd.mp) {           \
10262 	                pbuf = pd.mp;                           \
10263 	        }                                               \
10264 	        h = pbuf->pb_data;                              \
10265 	} while (0)
10266 
10267 int
pf_test6_mbuf(int dir,struct ifnet * ifp,struct mbuf ** m0,struct ether_header * eh,struct ip_fw_args * fwa)10268 pf_test6_mbuf(int dir, struct ifnet *ifp, struct mbuf **m0,
10269     struct ether_header *eh, struct ip_fw_args *fwa)
10270 {
10271 	pbuf_t pbuf_store, *__single pbuf;
10272 	int rv;
10273 
10274 	pbuf_init_mbuf(&pbuf_store, *m0, (*m0)->m_pkthdr.rcvif);
10275 	pbuf = &pbuf_store;
10276 
10277 	rv = pf_test6(dir, ifp, &pbuf, eh, fwa);
10278 
10279 	if (pbuf_is_valid(pbuf)) {
10280 		*m0 = pbuf->pb_mbuf;
10281 		pbuf->pb_mbuf = NULL;
10282 		pbuf_destroy(pbuf);
10283 	} else {
10284 		*m0 = NULL;
10285 	}
10286 
10287 	return rv;
10288 }
10289 
10290 static __attribute__((noinline)) int
pf_test6(int dir,struct ifnet * ifp,pbuf_t ** pbufp,struct ether_header * eh,struct ip_fw_args * fwa)10291 pf_test6(int dir, struct ifnet *ifp, pbuf_t **pbufp,
10292     struct ether_header *eh, struct ip_fw_args *fwa)
10293 {
10294 #if !DUMMYNET
10295 #pragma unused(fwa)
10296 #endif
10297 	struct pfi_kif          *__single kif;
10298 	u_short                  action = PF_PASS, reason = 0, log = 0;
10299 	pbuf_t                  *__single pbuf = *pbufp;
10300 	struct ip6_hdr          *__single h;
10301 	struct pf_rule          *__single a = NULL, *__single r = &pf_default_rule, *__single tr, *__single nr;
10302 	struct pf_state         *__single s = NULL;
10303 	struct pf_state_key     *__single sk = NULL;
10304 	struct pf_ruleset       *__single ruleset = NULL;
10305 	struct pf_pdesc          pd;
10306 	int                      off, terminal = 0, dirndx, rh_cnt = 0;
10307 	u_int8_t                 nxt;
10308 	boolean_t                fwd = FALSE;
10309 
10310 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
10311 
10312 	ASSERT(ifp != NULL);
10313 	if ((dir == PF_OUT) && (pbuf->pb_ifp) && (ifp != pbuf->pb_ifp)) {
10314 		fwd = TRUE;
10315 	}
10316 
10317 	if (!pf_status.running) {
10318 		return PF_PASS;
10319 	}
10320 
10321 	memset(&pd, 0, sizeof(pd));
10322 
10323 	if ((pd.pf_mtag = pf_get_mtag_pbuf(pbuf)) == NULL) {
10324 		DPFPRINTF(PF_DEBUG_URGENT,
10325 		    ("pf_test6: pf_get_mtag_pbuf returned NULL\n"));
10326 		return PF_DROP;
10327 	}
10328 
10329 	if (pd.pf_mtag->pftag_flags & PF_TAG_GENERATED) {
10330 		return PF_PASS;
10331 	}
10332 
10333 	kif = (struct pfi_kif *)ifp->if_pf_kif;
10334 
10335 	if (kif == NULL) {
10336 		DPFPRINTF(PF_DEBUG_URGENT,
10337 		    ("pf_test6: kif == NULL, if_name %s\n", ifp->if_name));
10338 		return PF_DROP;
10339 	}
10340 	if (kif->pfik_flags & PFI_IFLAG_SKIP) {
10341 		return PF_PASS;
10342 	}
10343 
10344 	if (pbuf->pb_packet_len < (int)sizeof(*h)) {
10345 		REASON_SET(&reason, PFRES_SHORT);
10346 		return PF_DROP;
10347 	}
10348 
10349 	h = pbuf->pb_data;
10350 	nxt = h->ip6_nxt;
10351 	off = ((caddr_t)h - (caddr_t)pbuf->pb_data) + sizeof(struct ip6_hdr);
10352 	pd.mp = pbuf;
10353 	pd.lmw = 0;
10354 	pd.pf_mtag = pf_get_mtag_pbuf(pbuf);
10355 	pd.src = (struct pf_addr *)(void *)&h->ip6_src;
10356 	pd.dst = (struct pf_addr *)(void *)&h->ip6_dst;
10357 	PF_ACPY(&pd.baddr, pd.src, AF_INET6);
10358 	PF_ACPY(&pd.bdaddr, pd.dst, AF_INET6);
10359 	pd.ip_sum = NULL;
10360 	pd.af = AF_INET6;
10361 	pd.proto = nxt;
10362 	pd.proto_variant = 0;
10363 	pd.tos = 0;
10364 	pd.ttl = h->ip6_hlim;
10365 	pd.sc = MBUF_SCIDX(pbuf_get_service_class(pbuf));
10366 	pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
10367 	pd.eh = eh;
10368 
10369 	if (*pbuf->pb_flags & PKTF_FLOW_ID) {
10370 		pd.flowsrc = *pbuf->pb_flowsrc;
10371 		pd.flowhash = *pbuf->pb_flowid;
10372 		pd.pktflags = (*pbuf->pb_flags & PKTF_FLOW_MASK);
10373 	}
10374 
10375 #if DUMMYNET
10376 	if (fwa != NULL && fwa->fwa_pf_rule != NULL) {
10377 		goto nonormalize;
10378 	}
10379 #endif /* DUMMYNET */
10380 
10381 	/* We do IP header normalization and packet reassembly here */
10382 	action = pf_normalize_ip6(pbuf, dir, kif, &reason, &pd);
10383 	if (action != PF_PASS || pd.lmw < 0) {
10384 		action = PF_DROP;
10385 		goto done;
10386 	}
10387 
10388 #if DUMMYNET
10389 nonormalize:
10390 #endif /* DUMMYNET */
10391 	h = pbuf->pb_data;
10392 
10393 	/*
10394 	 * we do not support jumbogram yet.  if we keep going, zero ip6_plen
10395 	 * will do something bad, so drop the packet for now.
10396 	 */
10397 	if (htons(h->ip6_plen) == 0) {
10398 		action = PF_DROP;
10399 		REASON_SET(&reason, PFRES_NORM);        /*XXX*/
10400 		goto done;
10401 	}
10402 	pd.src = (struct pf_addr *)(void *)&h->ip6_src;
10403 	pd.dst = (struct pf_addr *)(void *)&h->ip6_dst;
10404 	PF_ACPY(&pd.baddr, pd.src, AF_INET6);
10405 	PF_ACPY(&pd.bdaddr, pd.dst, AF_INET6);
10406 	pd.ip_sum = NULL;
10407 	pd.af = AF_INET6;
10408 	pd.tos = 0;
10409 	pd.ttl = h->ip6_hlim;
10410 	pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
10411 	pd.eh = eh;
10412 
10413 	off = ((caddr_t)h - (caddr_t)pbuf->pb_data) + sizeof(struct ip6_hdr);
10414 	pd.proto = h->ip6_nxt;
10415 	pd.proto_variant = 0;
10416 	pd.mp = pbuf;
10417 	pd.lmw = 0;
10418 	pd.pf_mtag = pf_get_mtag_pbuf(pbuf);
10419 
10420 	do {
10421 		switch (pd.proto) {
10422 		case IPPROTO_FRAGMENT: {
10423 			struct ip6_frag ip6f;
10424 
10425 			pd.flags |= PFDESC_IP_FRAG;
10426 			if (!pf_pull_hdr(pbuf, off, &ip6f, sizeof ip6f, sizeof ip6f, NULL,
10427 			    &reason, pd.af)) {
10428 				DPFPRINTF(PF_DEBUG_MISC,
10429 				    ("pf: IPv6 short fragment header\n"));
10430 				action = PF_DROP;
10431 				REASON_SET(&reason, PFRES_SHORT);
10432 				log = 1;
10433 				goto done;
10434 			}
10435 			pd.proto = ip6f.ip6f_nxt;
10436 #if DUMMYNET
10437 			/* Traffic goes through dummynet first */
10438 			action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd,
10439 			    fwa);
10440 			if (action == PF_DROP || pbuf == NULL) {
10441 				*pbufp = NULL;
10442 				return action;
10443 			}
10444 #endif /* DUMMYNET */
10445 			action = pf_test_fragment(&r, dir, kif, pbuf, h, &pd,
10446 			    &a, &ruleset);
10447 			if (action == PF_DROP) {
10448 				REASON_SET(&reason, PFRES_FRAG);
10449 				log = 1;
10450 			}
10451 			goto done;
10452 		}
10453 		case IPPROTO_ROUTING:
10454 			++rh_cnt;
10455 			OS_FALLTHROUGH;
10456 
10457 		case IPPROTO_AH:
10458 		case IPPROTO_HOPOPTS:
10459 		case IPPROTO_DSTOPTS: {
10460 			/* get next header and header length */
10461 			struct ip6_ext  opt6;
10462 
10463 			if (!pf_pull_hdr(pbuf, off, &opt6, sizeof(opt6), sizeof(opt6),
10464 			    NULL, &reason, pd.af)) {
10465 				DPFPRINTF(PF_DEBUG_MISC,
10466 				    ("pf: IPv6 short opt\n"));
10467 				action = PF_DROP;
10468 				log = 1;
10469 				goto done;
10470 			}
10471 			if (pd.proto == IPPROTO_AH) {
10472 				off += (opt6.ip6e_len + 2) * 4;
10473 			} else {
10474 				off += (opt6.ip6e_len + 1) * 8;
10475 			}
10476 			pd.proto = opt6.ip6e_nxt;
10477 			/* goto the next header */
10478 			break;
10479 		}
10480 		default:
10481 			terminal++;
10482 			break;
10483 		}
10484 	} while (!terminal);
10485 
10486 
10487 	switch (pd.proto) {
10488 	case IPPROTO_TCP: {
10489 		struct tcphdr   th;
10490 
10491 		pf_pd_set_hdr_tcp(&pd, &th);
10492 		if (!pf_pull_hdr(pbuf, off, &th, sizeof(th), sizeof(th),
10493 		    &action, &reason, AF_INET6)) {
10494 			log = action != PF_PASS;
10495 			goto done;
10496 		}
10497 		pd.p_len = pd.tot_len - off - (th.th_off << 2);
10498 #if DUMMYNET
10499 		/* Traffic goes through dummynet first */
10500 		action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10501 		if (action == PF_DROP || pbuf == NULL) {
10502 			*pbufp = NULL;
10503 			return action;
10504 		}
10505 #endif /* DUMMYNET */
10506 		action = pf_normalize_tcp(dir, kif, pbuf, 0, off, h, &pd);
10507 		if (pd.lmw < 0) {
10508 			goto done;
10509 		}
10510 		PF_APPLE_UPDATE_PDESC_IPv6();
10511 		if (action == PF_DROP) {
10512 			goto done;
10513 		}
10514 		if (th.th_sport == 0 || th.th_dport == 0) {
10515 			action = PF_DROP;
10516 			REASON_SET(&reason, PFRES_INVPORT);
10517 			goto done;
10518 		}
10519 		action = pf_test_state_tcp(&s, dir, kif, pbuf, off, h, &pd,
10520 		    &reason);
10521 		if (action == PF_NAT64) {
10522 			goto done;
10523 		}
10524 		if (pd.lmw < 0) {
10525 			goto done;
10526 		}
10527 		PF_APPLE_UPDATE_PDESC_IPv6();
10528 		if (action == PF_PASS) {
10529 #if NPFSYNC
10530 			pfsync_update_state(s);
10531 #endif /* NPFSYNC */
10532 			r = s->rule.ptr;
10533 			a = s->anchor.ptr;
10534 			log = s->log;
10535 		} else if (s == NULL) {
10536 			action = pf_test_rule(&r, &s, dir, kif,
10537 			    pbuf, off, h, &pd, &a, &ruleset, NULL);
10538 		}
10539 		break;
10540 	}
10541 
10542 	case IPPROTO_UDP: {
10543 		struct udphdr   uh;
10544 
10545 		pf_pd_set_hdr_udp(&pd, &uh);
10546 		if (!pf_pull_hdr(pbuf, off, &uh, sizeof(uh), sizeof(uh),
10547 		    &action, &reason, AF_INET6)) {
10548 			log = action != PF_PASS;
10549 			goto done;
10550 		}
10551 		if (uh.uh_sport == 0 || uh.uh_dport == 0) {
10552 			action = PF_DROP;
10553 			REASON_SET(&reason, PFRES_INVPORT);
10554 			goto done;
10555 		}
10556 		if (ntohs(uh.uh_ulen) > pbuf->pb_packet_len - off ||
10557 		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
10558 			action = PF_DROP;
10559 			REASON_SET(&reason, PFRES_SHORT);
10560 			goto done;
10561 		}
10562 #if DUMMYNET
10563 		/* Traffic goes through dummynet first */
10564 		action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10565 		if (action == PF_DROP || pbuf == NULL) {
10566 			*pbufp = NULL;
10567 			return action;
10568 		}
10569 #endif /* DUMMYNET */
10570 		action = pf_test_state_udp(&s, dir, kif, pbuf, off, h, &pd,
10571 		    &reason);
10572 		if (action == PF_NAT64) {
10573 			goto done;
10574 		}
10575 		if (pd.lmw < 0) {
10576 			goto done;
10577 		}
10578 		PF_APPLE_UPDATE_PDESC_IPv6();
10579 		if (action == PF_PASS) {
10580 #if NPFSYNC
10581 			pfsync_update_state(s);
10582 #endif /* NPFSYNC */
10583 			r = s->rule.ptr;
10584 			a = s->anchor.ptr;
10585 			log = s->log;
10586 		} else if (s == NULL) {
10587 			action = pf_test_rule(&r, &s, dir, kif,
10588 			    pbuf, off, h, &pd, &a, &ruleset, NULL);
10589 		}
10590 		break;
10591 	}
10592 
10593 	case IPPROTO_ICMPV6: {
10594 		struct icmp6_hdr        ih;
10595 
10596 		pf_pd_set_hdr_icmp6(&pd, &ih);
10597 		if (!pf_pull_hdr(pbuf, off, &ih, sizeof(ih), sizeof(ih),
10598 		    &action, &reason, AF_INET6)) {
10599 			log = action != PF_PASS;
10600 			goto done;
10601 		}
10602 #if DUMMYNET
10603 		/* Traffic goes through dummynet first */
10604 		action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10605 		if (action == PF_DROP || pbuf == NULL) {
10606 			*pbufp = NULL;
10607 			return action;
10608 		}
10609 #endif /* DUMMYNET */
10610 		action = pf_test_state_icmp(&s, dir, kif,
10611 		    pbuf, off, h, &pd, &reason);
10612 		if (action == PF_NAT64) {
10613 			goto done;
10614 		}
10615 		if (pd.lmw < 0) {
10616 			goto done;
10617 		}
10618 		PF_APPLE_UPDATE_PDESC_IPv6();
10619 		if (action == PF_PASS) {
10620 #if NPFSYNC
10621 			pfsync_update_state(s);
10622 #endif /* NPFSYNC */
10623 			r = s->rule.ptr;
10624 			a = s->anchor.ptr;
10625 			log = s->log;
10626 		} else if (s == NULL) {
10627 			action = pf_test_rule(&r, &s, dir, kif,
10628 			    pbuf, off, h, &pd, &a, &ruleset, NULL);
10629 		}
10630 		break;
10631 	}
10632 
10633 	case IPPROTO_ESP: {
10634 		struct pf_esp_hdr       esp;
10635 
10636 		pf_pd_set_hdr_esp(&pd, &esp);
10637 		if (!pf_pull_hdr(pbuf, off, &esp, sizeof(esp), sizeof(esp), &action,
10638 		    &reason, AF_INET6)) {
10639 			log = action != PF_PASS;
10640 			goto done;
10641 		}
10642 #if DUMMYNET
10643 		/* Traffic goes through dummynet first */
10644 		action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10645 		if (action == PF_DROP || pbuf == NULL) {
10646 			*pbufp = NULL;
10647 			return action;
10648 		}
10649 #endif /* DUMMYNET */
10650 		action = pf_test_state_esp(&s, dir, kif, off, &pd);
10651 		if (pd.lmw < 0) {
10652 			goto done;
10653 		}
10654 		PF_APPLE_UPDATE_PDESC_IPv6();
10655 		if (action == PF_PASS) {
10656 #if NPFSYNC
10657 			pfsync_update_state(s);
10658 #endif /* NPFSYNC */
10659 			r = s->rule.ptr;
10660 			a = s->anchor.ptr;
10661 			log = s->log;
10662 		} else if (s == NULL) {
10663 			action = pf_test_rule(&r, &s, dir, kif,
10664 			    pbuf, off, h, &pd, &a, &ruleset, NULL);
10665 		}
10666 		break;
10667 	}
10668 
10669 	case IPPROTO_GRE: {
10670 		struct pf_grev1_hdr     grev1;
10671 
10672 		pf_pd_set_hdr_grev1(&pd, &grev1);
10673 		if (!pf_pull_hdr(pbuf, off, &grev1, sizeof(grev1), sizeof(grev1), &action,
10674 		    &reason, AF_INET6)) {
10675 			log = (action != PF_PASS);
10676 			goto done;
10677 		}
10678 #if DUMMYNET
10679 		/* Traffic goes through dummynet first */
10680 		action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10681 		if (action == PF_DROP || pbuf == NULL) {
10682 			*pbufp = NULL;
10683 			return action;
10684 		}
10685 #endif /* DUMMYNET */
10686 		if ((ntohs(grev1.flags) & PF_GRE_FLAG_VERSION_MASK) == 1 &&
10687 		    ntohs(grev1.protocol_type) == PF_GRE_PPP_ETHERTYPE) {
10688 			if (ntohs(grev1.payload_length) >
10689 			    pbuf->pb_packet_len - off) {
10690 				action = PF_DROP;
10691 				REASON_SET(&reason, PFRES_SHORT);
10692 				goto done;
10693 			}
10694 			action = pf_test_state_grev1(&s, dir, kif, off, &pd);
10695 			if (pd.lmw < 0) {
10696 				goto done;
10697 			}
10698 			PF_APPLE_UPDATE_PDESC_IPv6();
10699 			if (action == PF_PASS) {
10700 #if NPFSYNC
10701 				pfsync_update_state(s);
10702 #endif /* NPFSYNC */
10703 				r = s->rule.ptr;
10704 				a = s->anchor.ptr;
10705 				log = s->log;
10706 				break;
10707 			} else if (s == NULL) {
10708 				action = pf_test_rule(&r, &s, dir, kif, pbuf,
10709 				    off, h, &pd, &a, &ruleset, NULL);
10710 				if (action == PF_PASS) {
10711 					break;
10712 				}
10713 			}
10714 		}
10715 
10716 		/* not GREv1/PPTP, so treat as ordinary GRE... */
10717 		OS_FALLTHROUGH; /* XXX is this correct? */
10718 	}
10719 
10720 	default:
10721 #if DUMMYNET
10722 		/* Traffic goes through dummynet first */
10723 		action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10724 		if (action == PF_DROP || pbuf == NULL) {
10725 			*pbufp = NULL;
10726 			return action;
10727 		}
10728 #endif /* DUMMYNET */
10729 		action = pf_test_state_other(&s, dir, kif, &pd);
10730 		if (pd.lmw < 0) {
10731 			goto done;
10732 		}
10733 		PF_APPLE_UPDATE_PDESC_IPv6();
10734 		if (action == PF_PASS) {
10735 #if NPFSYNC
10736 			pfsync_update_state(s);
10737 #endif /* NPFSYNC */
10738 			r = s->rule.ptr;
10739 			a = s->anchor.ptr;
10740 			log = s->log;
10741 		} else if (s == NULL) {
10742 			action = pf_test_rule(&r, &s, dir, kif, pbuf, off, h,
10743 			    &pd, &a, &ruleset, NULL);
10744 		}
10745 		break;
10746 	}
10747 
10748 done:
10749 	if (action == PF_NAT64) {
10750 		*pbufp = NULL;
10751 		return action;
10752 	}
10753 
10754 	*pbufp = pd.mp;
10755 	PF_APPLE_UPDATE_PDESC_IPv6();
10756 
10757 	/* handle dangerous IPv6 extension headers. */
10758 	if (action != PF_DROP) {
10759 		if (action == PF_PASS && rh_cnt &&
10760 		    !((s && s->allow_opts) || r->allow_opts)) {
10761 			action = PF_DROP;
10762 			REASON_SET(&reason, PFRES_IPOPTIONS);
10763 			log = 1;
10764 			DPFPRINTF(PF_DEBUG_MISC,
10765 			    ("pf: dropping packet with dangerous v6addr headers\n"));
10766 		}
10767 
10768 		if ((s && s->tag) || PF_RTABLEID_IS_VALID(r->rtableid) ||
10769 		    (pd.pktflags & PKTF_FLOW_ID)) {
10770 			(void) pf_tag_packet(pbuf, pd.pf_mtag, s ? s->tag : 0,
10771 			    r->rtableid, &pd);
10772 		}
10773 
10774 		if (action == PF_PASS) {
10775 #if PF_ECN
10776 			/* add hints for ecn */
10777 			pd.pf_mtag->pftag_hdr = h;
10778 			/* record address family */
10779 			pd.pf_mtag->pftag_flags &= ~PF_TAG_HDR_INET;
10780 			pd.pf_mtag->pftag_flags |= PF_TAG_HDR_INET6;
10781 #endif /* PF_ECN */
10782 			/* record protocol */
10783 			*pbuf->pb_proto = pd.proto;
10784 			if (dir == PF_IN && (pd.proto == IPPROTO_TCP ||
10785 			    pd.proto == IPPROTO_UDP) && s != NULL &&
10786 			    s->nat_rule.ptr != NULL &&
10787 			    (s->nat_rule.ptr->action == PF_RDR ||
10788 			    s->nat_rule.ptr->action == PF_BINAT) &&
10789 			    IN6_IS_ADDR_LOOPBACK(&pd.dst->v6addr)) {
10790 				pd.pf_mtag->pftag_flags |= PF_TAG_TRANSLATE_LOCALHOST;
10791 			}
10792 		}
10793 	}
10794 
10795 
10796 	if (log) {
10797 		struct pf_rule *lr;
10798 
10799 		if (s != NULL && s->nat_rule.ptr != NULL &&
10800 		    s->nat_rule.ptr->log & PF_LOG_ALL) {
10801 			lr = s->nat_rule.ptr;
10802 		} else {
10803 			lr = r;
10804 		}
10805 		PFLOG_PACKET(kif, h, pbuf, AF_INET6, dir, reason, lr, a, ruleset,
10806 		    &pd);
10807 	}
10808 
10809 	kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
10810 	kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS]++;
10811 
10812 	if (action == PF_PASS || r->action == PF_DROP) {
10813 		dirndx = (dir == PF_OUT);
10814 		r->packets[dirndx]++;
10815 		r->bytes[dirndx] += pd.tot_len;
10816 		if (a != NULL) {
10817 			a->packets[dirndx]++;
10818 			a->bytes[dirndx] += pd.tot_len;
10819 		}
10820 		if (s != NULL) {
10821 			sk = s->state_key;
10822 			if (s->nat_rule.ptr != NULL) {
10823 				s->nat_rule.ptr->packets[dirndx]++;
10824 				s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
10825 			}
10826 			if (s->src_node != NULL) {
10827 				s->src_node->packets[dirndx]++;
10828 				s->src_node->bytes[dirndx] += pd.tot_len;
10829 			}
10830 			if (s->nat_src_node != NULL) {
10831 				s->nat_src_node->packets[dirndx]++;
10832 				s->nat_src_node->bytes[dirndx] += pd.tot_len;
10833 			}
10834 			dirndx = (dir == sk->direction) ? 0 : 1;
10835 			s->packets[dirndx]++;
10836 			s->bytes[dirndx] += pd.tot_len;
10837 		}
10838 		tr = r;
10839 		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
10840 		if (nr != NULL) {
10841 			struct pf_addr *x;
10842 			/*
10843 			 * XXX: we need to make sure that the addresses
10844 			 * passed to pfr_update_stats() are the same than
10845 			 * the addresses used during matching (pfr_match)
10846 			 */
10847 			if (r == &pf_default_rule) {
10848 				tr = nr;
10849 				x = (s == NULL || sk->direction == dir) ?
10850 				    &pd.baddr : &pd.naddr;
10851 			} else {
10852 				x = (s == NULL || sk->direction == dir) ?
10853 				    &pd.naddr : &pd.baddr;
10854 			}
10855 			if (x == &pd.baddr || s == NULL) {
10856 				if (dir == PF_OUT) {
10857 					pd.src = x;
10858 				} else {
10859 					pd.dst = x;
10860 				}
10861 			}
10862 		}
10863 		if (tr->src.addr.type == PF_ADDR_TABLE) {
10864 			pfr_update_stats(tr->src.addr.p.tbl, (sk == NULL ||
10865 			    sk->direction == dir) ? pd.src : pd.dst, pd.af,
10866 			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
10867 			    tr->src.neg);
10868 		}
10869 		if (tr->dst.addr.type == PF_ADDR_TABLE) {
10870 			pfr_update_stats(tr->dst.addr.p.tbl, (sk == NULL ||
10871 			    sk->direction == dir) ? pd.dst : pd.src, pd.af,
10872 			    pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
10873 			    tr->dst.neg);
10874 		}
10875 	}
10876 
10877 	VERIFY(pbuf == NULL || pd.mp == NULL || pd.mp == pbuf);
10878 
10879 	if (*pbufp) {
10880 		if (pd.lmw < 0) {
10881 			REASON_SET(&reason, PFRES_MEMORY);
10882 			action = PF_DROP;
10883 		}
10884 
10885 		if (action == PF_DROP) {
10886 			pbuf_destroy(*pbufp);
10887 			*pbufp = NULL;
10888 			return PF_DROP;
10889 		}
10890 
10891 		*pbufp = pbuf;
10892 	}
10893 
10894 	if (action == PF_SYNPROXY_DROP) {
10895 		pbuf_destroy(*pbufp);
10896 		*pbufp = NULL;
10897 		action = PF_PASS;
10898 	} else if (r->rt) {
10899 		/* pf_route6 can free the mbuf causing *pbufp to become NULL */
10900 		pf_route6(pbufp, r, dir, kif->pfik_ifp, s, &pd);
10901 	}
10902 
10903 	/* if reassembled packet passed, create new fragments */
10904 	struct pf_fragment_tag *ftag = NULL;
10905 	if ((action == PF_PASS) && (*pbufp != NULL) && (fwd) &&
10906 	    ((ftag = pf_find_fragment_tag_pbuf(*pbufp)) != NULL)) {
10907 		action = pf_refragment6(ifp, pbufp, ftag);
10908 	}
10909 	return action;
10910 }
10911 
10912 static int
pf_check_congestion(struct ifqueue * ifq)10913 pf_check_congestion(struct ifqueue *ifq)
10914 {
10915 #pragma unused(ifq)
10916 	return 0;
10917 }
10918 
10919 void
pool_init(struct pool * pp,size_t size,unsigned int align,unsigned int ioff,int flags,const char * wchan,void * palloc)10920 pool_init(struct pool *pp, size_t size, unsigned int align, unsigned int ioff,
10921     int flags, const char *wchan, void *palloc)
10922 {
10923 #pragma unused(align, ioff, flags, palloc)
10924 	bzero(pp, sizeof(*pp));
10925 	pp->pool_zone = zone_create(wchan, size,
10926 	    ZC_PGZ_USE_GUARDS | ZC_ZFREE_CLEARMEM);
10927 	pp->pool_hiwat = pp->pool_limit = (unsigned int)-1;
10928 	pp->pool_name = wchan;
10929 }
10930 
10931 /* Zones cannot be currently destroyed */
10932 void
pool_destroy(struct pool * pp)10933 pool_destroy(struct pool *pp)
10934 {
10935 #pragma unused(pp)
10936 }
10937 
10938 void
pool_sethiwat(struct pool * pp,int n)10939 pool_sethiwat(struct pool *pp, int n)
10940 {
10941 	pp->pool_hiwat = n;     /* Currently unused */
10942 }
10943 
10944 void
pool_sethardlimit(struct pool * pp,int n,const char * warnmess,int ratecap)10945 pool_sethardlimit(struct pool *pp, int n, const char *warnmess, int ratecap)
10946 {
10947 #pragma unused(warnmess, ratecap)
10948 	pp->pool_limit = n;
10949 }
10950 
10951 void *
pool_get(struct pool * pp,int flags)10952 pool_get(struct pool *pp, int flags)
10953 {
10954 	void *buf;
10955 
10956 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
10957 
10958 	if (pp->pool_count > pp->pool_limit) {
10959 		DPFPRINTF(PF_DEBUG_NOISY,
10960 		    ("pf: pool %s hard limit reached (%d)\n",
10961 		    pp->pool_name != NULL ? pp->pool_name : "unknown",
10962 		    pp->pool_limit));
10963 		pp->pool_fails++;
10964 		return NULL;
10965 	}
10966 
10967 	buf = zalloc_flags_buf(pp->pool_zone,
10968 	    (flags & PR_WAITOK) ? Z_WAITOK : Z_NOWAIT);
10969 	if (buf != NULL) {
10970 		pp->pool_count++;
10971 		VERIFY(pp->pool_count != 0);
10972 	}
10973 	return buf;
10974 }
10975 
10976 void
pool_put(struct pool * pp,void * v)10977 pool_put(struct pool *pp, void *v)
10978 {
10979 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
10980 
10981 	zfree(pp->pool_zone, v);
10982 	VERIFY(pp->pool_count != 0);
10983 	pp->pool_count--;
10984 }
10985 
10986 struct pf_mtag *
pf_find_mtag_pbuf(pbuf_t * pbuf)10987 pf_find_mtag_pbuf(pbuf_t *pbuf)
10988 {
10989 	return pbuf->pb_pftag;
10990 }
10991 
10992 struct pf_mtag *
pf_find_mtag(struct mbuf * m)10993 pf_find_mtag(struct mbuf *m)
10994 {
10995 	return m_pftag(m);
10996 }
10997 
10998 struct pf_mtag *
pf_get_mtag(struct mbuf * m)10999 pf_get_mtag(struct mbuf *m)
11000 {
11001 	return pf_find_mtag(m);
11002 }
11003 
11004 struct pf_mtag *
pf_get_mtag_pbuf(pbuf_t * pbuf)11005 pf_get_mtag_pbuf(pbuf_t *pbuf)
11006 {
11007 	return pf_find_mtag_pbuf(pbuf);
11008 }
11009 
11010 struct pf_fragment_tag *
pf_copy_fragment_tag(struct mbuf * m,struct pf_fragment_tag * ftag,int how)11011 pf_copy_fragment_tag(struct mbuf *m, struct pf_fragment_tag *ftag, int how)
11012 {
11013 	struct m_tag *__single tag;
11014 	struct pf_mtag *__single pftag = pf_find_mtag(m);
11015 
11016 	tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_PF_REASS,
11017 	    sizeof(*ftag), how, m);
11018 	if (tag == NULL) {
11019 		return NULL;
11020 	}
11021 	m_tag_prepend(m, tag);
11022 	bcopy(ftag, tag->m_tag_data, sizeof(*ftag));
11023 	pftag->pftag_flags |= PF_TAG_REASSEMBLED;
11024 	return (struct pf_fragment_tag *)tag->m_tag_data;
11025 }
11026 
11027 struct pf_fragment_tag *
pf_find_fragment_tag(struct mbuf * m)11028 pf_find_fragment_tag(struct mbuf *m)
11029 {
11030 	struct m_tag *tag;
11031 	struct pf_fragment_tag *ftag = NULL;
11032 	struct pf_mtag *pftag = pf_find_mtag(m);
11033 
11034 	tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_PF_REASS);
11035 	VERIFY((tag == NULL) || (pftag->pftag_flags & PF_TAG_REASSEMBLED));
11036 	if (tag != NULL) {
11037 		ftag = (struct pf_fragment_tag *)tag->m_tag_data;
11038 	}
11039 	return ftag;
11040 }
11041 
11042 struct pf_fragment_tag *
pf_find_fragment_tag_pbuf(pbuf_t * pbuf)11043 pf_find_fragment_tag_pbuf(pbuf_t *pbuf)
11044 {
11045 	struct pf_mtag *mtag = pf_find_mtag_pbuf(pbuf);
11046 
11047 	return (mtag->pftag_flags & PF_TAG_REASSEMBLED) ?
11048 	       pbuf->pb_pf_fragtag : NULL;
11049 }
11050 
11051 uint64_t
pf_time_second(void)11052 pf_time_second(void)
11053 {
11054 	struct timeval t;
11055 
11056 	microuptime(&t);
11057 	return t.tv_sec;
11058 }
11059 
11060 uint64_t
pf_calendar_time_second(void)11061 pf_calendar_time_second(void)
11062 {
11063 	struct timeval t;
11064 
11065 	getmicrotime(&t);
11066 	return t.tv_sec;
11067 }
11068 
11069 static void *
hook_establish(struct hook_desc_head * head,int tail,hook_fn_t fn,void * arg)11070 hook_establish(struct hook_desc_head *head, int tail, hook_fn_t fn, void *arg)
11071 {
11072 	struct hook_desc *hd;
11073 
11074 	hd = kalloc_type(struct hook_desc, Z_WAITOK | Z_NOFAIL);
11075 
11076 	hd->hd_fn = fn;
11077 	hd->hd_arg = arg;
11078 	if (tail) {
11079 		TAILQ_INSERT_TAIL(head, hd, hd_list);
11080 	} else {
11081 		TAILQ_INSERT_HEAD(head, hd, hd_list);
11082 	}
11083 
11084 	return hd;
11085 }
11086 
11087 static void
hook_runloop(struct hook_desc_head * head,int flags)11088 hook_runloop(struct hook_desc_head *head, int flags)
11089 {
11090 	struct hook_desc *__single hd;
11091 
11092 	if (!(flags & HOOK_REMOVE)) {
11093 		if (!(flags & HOOK_ABORT)) {
11094 			TAILQ_FOREACH(hd, head, hd_list)
11095 			hd->hd_fn(hd->hd_arg);
11096 		}
11097 	} else {
11098 		while (!!(hd = TAILQ_FIRST(head))) {
11099 			TAILQ_REMOVE(head, hd, hd_list);
11100 			if (!(flags & HOOK_ABORT)) {
11101 				hd->hd_fn(hd->hd_arg);
11102 			}
11103 			if (flags & HOOK_FREE) {
11104 				kfree_type(struct hook_desc, hd);
11105 			}
11106 		}
11107 	}
11108 }
11109 
11110 #if SKYWALK
11111 static uint32_t
pf_check_compatible_anchor(struct pf_anchor const * a)11112 pf_check_compatible_anchor(struct pf_anchor const * a)
11113 {
11114 	const char *__null_terminated anchor_path = __unsafe_null_terminated_from_indexable(a->path);
11115 	uint32_t result = 0;
11116 
11117 	if (strcmp(anchor_path, PF_RESERVED_ANCHOR) == 0) {
11118 		goto done;
11119 	}
11120 
11121 	if (strcmp(anchor_path, "com.apple") == 0) {
11122 		goto done;
11123 	}
11124 
11125 	for (int i = 0; i < sizeof(compatible_anchors) / sizeof(compatible_anchors[0]); i++) {
11126 		const char *__null_terminated ptr = strnstr(anchor_path, compatible_anchors[i], MAXPATHLEN);
11127 		if (ptr != NULL && ptr == anchor_path) {
11128 			goto done;
11129 		}
11130 	}
11131 
11132 	result |= PF_COMPATIBLE_FLAGS_CUSTOM_ANCHORS_PRESENT;
11133 	for (int i = PF_RULESET_SCRUB; i < PF_RULESET_MAX; ++i) {
11134 		if (a->ruleset.rules[i].active.rcount != 0) {
11135 			result |= PF_COMPATIBLE_FLAGS_CUSTOM_RULES_PRESENT;
11136 		}
11137 	}
11138 done:
11139 	return result;
11140 }
11141 
11142 uint32_t
pf_check_compatible_rules(void)11143 pf_check_compatible_rules(void)
11144 {
11145 	LCK_RW_ASSERT(&pf_perim_lock, LCK_RW_ASSERT_HELD);
11146 	LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
11147 	struct pf_anchor *anchor = NULL;
11148 	struct pf_rule *rule = NULL;
11149 	uint32_t compat_bitmap = 0;
11150 
11151 	if (PF_IS_ENABLED) {
11152 		compat_bitmap |= PF_COMPATIBLE_FLAGS_PF_ENABLED;
11153 	}
11154 
11155 	RB_FOREACH(anchor, pf_anchor_global, &pf_anchors) {
11156 		compat_bitmap |= pf_check_compatible_anchor(anchor);
11157 #define _CHECK_FLAGS    (PF_COMPATIBLE_FLAGS_CUSTOM_ANCHORS_PRESENT | PF_COMPATIBLE_FLAGS_CUSTOM_RULES_PRESENT)
11158 		if ((compat_bitmap & _CHECK_FLAGS) == _CHECK_FLAGS) {
11159 			goto done;
11160 		}
11161 #undef _CHECK_FLAGS
11162 	}
11163 
11164 	for (int i = PF_RULESET_SCRUB; i < PF_RULESET_MAX; i++) {
11165 		TAILQ_FOREACH(rule, pf_main_ruleset.rules[i].active.ptr, entries) {
11166 			if (rule->anchor == NULL) {
11167 				compat_bitmap |= PF_COMPATIBLE_FLAGS_CUSTOM_RULES_PRESENT;
11168 				goto done;
11169 			}
11170 		}
11171 	}
11172 
11173 done:
11174 	return compat_bitmap;
11175 }
11176 #endif // SKYWALK
11177