1 /*
2 * Copyright (c) 2007-2023 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /* $apfw: git commit 6602420f2f101b74305cd78f7cd9e0c8fdedae97 $ */
30 /* $OpenBSD: pf.c,v 1.567 2008/02/20 23:40:13 henning Exp $ */
31
32 /*
33 * Copyright (c) 2001 Daniel Hartmeier
34 * Copyright (c) 2002 - 2013 Henning Brauer
35 * NAT64 - Copyright (c) 2010 Viagenie Inc. (http://www.viagenie.ca)
36 * All rights reserved.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 *
42 * - Redistributions of source code must retain the above copyright
43 * notice, this list of conditions and the following disclaimer.
44 * - Redistributions in binary form must reproduce the above
45 * copyright notice, this list of conditions and the following
46 * disclaimer in the documentation and/or other materials provided
47 * with the distribution.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
50 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
51 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
52 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
53 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
54 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
55 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
56 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
57 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
59 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
60 * POSSIBILITY OF SUCH DAMAGE.
61 *
62 * Effort sponsored in part by the Defense Advanced Research Projects
63 * Agency (DARPA) and Air Force Research Laboratory, Air Force
64 * Materiel Command, USAF, under agreement number F30602-01-2-0537.
65 *
66 */
67
68 #include <machine/endian.h>
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/filio.h>
72 #include <sys/socket.h>
73 #include <sys/socketvar.h>
74 #include <sys/kernel.h>
75 #include <sys/time.h>
76 #include <sys/proc.h>
77 #include <sys/random.h>
78 #include <sys/mcache.h>
79 #include <sys/protosw.h>
80
81 #include <libkern/crypto/md5.h>
82 #include <libkern/libkern.h>
83
84 #include <mach/thread_act.h>
85
86 #include <net/if.h>
87 #include <net/if_types.h>
88 #include <net/bpf.h>
89 #include <net/route.h>
90 #include <net/dlil.h>
91
92 #include <netinet/in.h>
93 #include <netinet/in_var.h>
94 #include <netinet/in_systm.h>
95 #include <netinet/ip.h>
96 #include <netinet/ip_var.h>
97 #include <netinet/tcp.h>
98 #include <netinet/tcp_seq.h>
99 #include <netinet/udp.h>
100 #include <netinet/ip_icmp.h>
101 #include <netinet/in_pcb.h>
102 #include <netinet/tcp_timer.h>
103 #include <netinet/tcp_var.h>
104 #include <netinet/tcp_fsm.h>
105 #include <netinet/udp_var.h>
106 #include <netinet/icmp_var.h>
107 #include <net/if_ether.h>
108 #include <net/ethernet.h>
109 #include <net/flowhash.h>
110 #include <net/nat464_utils.h>
111 #include <net/pfvar.h>
112 #include <net/if_pflog.h>
113
114 #if NPFSYNC
115 #include <net/if_pfsync.h>
116 #endif /* NPFSYNC */
117
118 #include <netinet/ip6.h>
119 #include <netinet6/in6_pcb.h>
120 #include <netinet6/ip6_var.h>
121 #include <netinet/icmp6.h>
122 #include <netinet6/nd6.h>
123
124 #if DUMMYNET
125 #include <netinet/ip_dummynet.h>
126 #endif /* DUMMYNET */
127
128 #if SKYWALK
129 #include <skywalk/namespace/flowidns.h>
130 #endif /* SKYWALK */
131
132 /*
133 * For RandomULong(), to get a 32 bits random value
134 * Note that random() returns a 31 bits value, see rdar://11159750
135 */
136 #include <dev/random/randomdev.h>
137
138 #define DPFPRINTF(n, x) (pf_status.debug >= (n) ? printf x : ((void)0))
139
140 /*
141 * On Mac OS X, the rtableid value is treated as the interface scope
142 * value that is equivalent to the interface index used for scoped
143 * routing. A valid scope value is anything but IFSCOPE_NONE (0),
144 * as per definition of ifindex which is a positive, non-zero number.
145 * The other BSDs treat a negative rtableid value as invalid, hence
146 * the test against INT_MAX to handle userland apps which initialize
147 * the field with a negative number.
148 */
149 #define PF_RTABLEID_IS_VALID(r) \
150 ((r) > IFSCOPE_NONE && (r) <= INT_MAX)
151
152 /*
153 * Global variables
154 */
155 static LCK_GRP_DECLARE(pf_lock_grp, "pf");
156 LCK_MTX_DECLARE(pf_lock, &pf_lock_grp);
157
158 static LCK_GRP_DECLARE(pf_perim_lock_grp, "pf_perim");
159 LCK_RW_DECLARE(pf_perim_lock, &pf_perim_lock_grp);
160
161 /* state tables */
162 struct pf_state_tree_lan_ext pf_statetbl_lan_ext;
163 struct pf_state_tree_ext_gwy pf_statetbl_ext_gwy;
164 static uint32_t pf_state_tree_ext_gwy_nat64_cnt = 0;
165
166 struct pf_palist pf_pabuf;
167 struct pf_status pf_status;
168
169 u_int32_t ticket_pabuf;
170
171 static MD5_CTX pf_tcp_secret_ctx;
172 static u_char pf_tcp_secret[16];
173 static int pf_tcp_secret_init;
174 static int pf_tcp_iss_off;
175
176 static struct pf_anchor_stackframe {
177 struct pf_ruleset *rs;
178 struct pf_rule *r;
179 struct pf_anchor_node *parent;
180 struct pf_anchor *child;
181 } pf_anchor_stack[64];
182
183 struct pool pf_src_tree_pl, pf_rule_pl, pf_pooladdr_pl;
184 struct pool pf_state_pl, pf_state_key_pl;
185
186 typedef void (*hook_fn_t)(void *);
187
188 struct hook_desc {
189 TAILQ_ENTRY(hook_desc) hd_list;
190 hook_fn_t hd_fn;
191 void *hd_arg;
192 };
193
194 #define HOOK_REMOVE 0x01
195 #define HOOK_FREE 0x02
196 #define HOOK_ABORT 0x04
197
198 static void *hook_establish(struct hook_desc_head *, int,
199 hook_fn_t, void *);
200 static void hook_runloop(struct hook_desc_head *, int flags);
201
202 struct pool pf_app_state_pl;
203 static void pf_print_addr(struct pf_addr *addr, sa_family_t af);
204 static void pf_print_sk_host(struct pf_state_host *, u_int8_t, int,
205 u_int8_t);
206
207 static void pf_print_host(struct pf_addr *, u_int16_t, u_int8_t);
208
209 static void pf_init_threshold(struct pf_threshold *, u_int32_t,
210 u_int32_t);
211 static void pf_add_threshold(struct pf_threshold *);
212 static int pf_check_threshold(struct pf_threshold *);
213
214 static void pf_change_ap(int, pbuf_t *, struct pf_addr *,
215 u_int16_t *, u_int16_t *, u_int16_t *,
216 struct pf_addr *, u_int16_t, u_int8_t, sa_family_t,
217 sa_family_t, int);
218 static int pf_modulate_sack(pbuf_t *, int, struct pf_pdesc *,
219 struct tcphdr *, struct pf_state_peer *);
220 static void pf_change_a6(struct pf_addr *, u_int16_t *,
221 struct pf_addr *, u_int8_t);
222 static void pf_change_addr(struct pf_addr *a, u_int16_t *c, struct pf_addr *an,
223 u_int8_t u, sa_family_t af, sa_family_t afn);
224 static void pf_change_icmp(struct pf_addr *, u_int16_t *,
225 struct pf_addr *, struct pf_addr *, u_int16_t,
226 u_int16_t *, u_int16_t *, u_int16_t *,
227 u_int16_t *, u_int8_t, sa_family_t);
228 static void pf_send_tcp(const struct pf_rule *, sa_family_t,
229 const struct pf_addr *, const struct pf_addr *,
230 u_int16_t, u_int16_t, u_int32_t, u_int32_t,
231 u_int8_t, u_int16_t, u_int16_t, u_int8_t, int,
232 u_int16_t, struct ether_header *, struct ifnet *);
233 static void pf_send_icmp(pbuf_t *, u_int8_t, u_int8_t,
234 sa_family_t, struct pf_rule *);
235 static struct pf_rule *pf_match_translation(struct pf_pdesc *, pbuf_t *,
236 int, int, struct pfi_kif *, struct pf_addr *,
237 union pf_state_xport *, struct pf_addr *,
238 union pf_state_xport *, int);
239 static struct pf_rule *pf_get_translation_aux(struct pf_pdesc *,
240 pbuf_t *, int, int, struct pfi_kif *,
241 struct pf_src_node **, struct pf_addr *,
242 union pf_state_xport *, struct pf_addr *,
243 union pf_state_xport *, union pf_state_xport *
244 #if SKYWALK
245 , netns_token *
246 #endif
247 );
248 static void pf_attach_state(struct pf_state_key *,
249 struct pf_state *, int);
250 static u_int32_t pf_tcp_iss(struct pf_pdesc *);
251 static int pf_test_rule(struct pf_rule **, struct pf_state **,
252 int, struct pfi_kif *, pbuf_t *, int,
253 void *, struct pf_pdesc *, struct pf_rule **,
254 struct pf_ruleset **, struct ifqueue *);
255 #if DUMMYNET
256 static int pf_test_dummynet(struct pf_rule **, int,
257 struct pfi_kif *, pbuf_t **,
258 struct pf_pdesc *, struct ip_fw_args *);
259 #endif /* DUMMYNET */
260 static int pf_test_fragment(struct pf_rule **, int,
261 struct pfi_kif *, pbuf_t *, void *,
262 struct pf_pdesc *, struct pf_rule **,
263 struct pf_ruleset **);
264 static int pf_test_state_tcp(struct pf_state **, int,
265 struct pfi_kif *, pbuf_t *, int,
266 void *, struct pf_pdesc *, u_short *);
267 static int pf_test_state_udp(struct pf_state **, int,
268 struct pfi_kif *, pbuf_t *, int,
269 void *, struct pf_pdesc *, u_short *);
270 static int pf_test_state_icmp(struct pf_state **, int,
271 struct pfi_kif *, pbuf_t *, int,
272 void *, struct pf_pdesc *, u_short *);
273 static int pf_test_state_other(struct pf_state **, int,
274 struct pfi_kif *, struct pf_pdesc *);
275 static int pf_match_tag(struct pf_rule *,
276 struct pf_mtag *, int *);
277 static void pf_hash(struct pf_addr *, struct pf_addr *,
278 struct pf_poolhashkey *, sa_family_t);
279 static int pf_map_addr(u_int8_t, struct pf_rule *,
280 struct pf_addr *, struct pf_addr *,
281 struct pf_addr *, struct pf_src_node **);
282 static int pf_get_sport(struct pf_pdesc *, struct pfi_kif *,
283 struct pf_rule *, struct pf_addr *,
284 union pf_state_xport *, struct pf_addr *,
285 union pf_state_xport *, struct pf_addr *,
286 union pf_state_xport *, struct pf_src_node **
287 #if SKYWALK
288 , netns_token *
289 #endif
290 );
291 static void pf_route(pbuf_t **, struct pf_rule *, int,
292 struct ifnet *, struct pf_state *,
293 struct pf_pdesc *);
294 static void pf_route6(pbuf_t **, struct pf_rule *, int,
295 struct ifnet *, struct pf_state *,
296 struct pf_pdesc *);
297 static u_int8_t pf_get_wscale(pbuf_t *, int, u_int16_t,
298 sa_family_t);
299 static u_int16_t pf_get_mss(pbuf_t *, int, u_int16_t,
300 sa_family_t);
301 static u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t,
302 u_int16_t);
303 static void pf_set_rt_ifp(struct pf_state *,
304 struct pf_addr *, sa_family_t af);
305 static int pf_check_proto_cksum(pbuf_t *, int, int,
306 u_int8_t, sa_family_t);
307 static int pf_addr_wrap_neq(struct pf_addr_wrap *,
308 struct pf_addr_wrap *);
309 static struct pf_state *pf_find_state(struct pfi_kif *,
310 struct pf_state_key_cmp *, u_int);
311 static int pf_src_connlimit(struct pf_state **);
312 static void pf_stateins_err(const char *, struct pf_state *,
313 struct pfi_kif *);
314 static int pf_check_congestion(struct ifqueue *);
315
316 #if 0
317 static const char *pf_pptp_ctrl_type_name(u_int16_t code);
318 #endif
319 static void pf_pptp_handler(struct pf_state *, int, int,
320 struct pf_pdesc *, struct pfi_kif *);
321 static void pf_pptp_unlink(struct pf_state *);
322 static void pf_grev1_unlink(struct pf_state *);
323 static int pf_test_state_grev1(struct pf_state **, int,
324 struct pfi_kif *, int, struct pf_pdesc *);
325 static int pf_ike_compare(struct pf_app_state *,
326 struct pf_app_state *);
327 static int pf_test_state_esp(struct pf_state **, int,
328 struct pfi_kif *, int, struct pf_pdesc *);
329 static int pf_test6(int, struct ifnet *, pbuf_t **, struct ether_header *,
330 struct ip_fw_args *);
331 #if INET
332 static int pf_test(int, struct ifnet *, pbuf_t **,
333 struct ether_header *, struct ip_fw_args *);
334 #endif /* INET */
335
336
337 extern struct pool pfr_ktable_pl;
338 extern struct pool pfr_kentry_pl;
339 extern int path_mtu_discovery;
340
341 struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = {
342 { .pp = &pf_state_pl, .limit = PFSTATE_HIWAT },
343 { .pp = &pf_app_state_pl, .limit = PFAPPSTATE_HIWAT },
344 { .pp = &pf_src_tree_pl, .limit = PFSNODE_HIWAT },
345 { .pp = &pf_frent_pl, .limit = PFFRAG_FRENT_HIWAT },
346 { .pp = &pfr_ktable_pl, .limit = PFR_KTABLE_HIWAT },
347 { .pp = &pfr_kentry_pl, .limit = PFR_KENTRY_HIWAT },
348 };
349
350 #if SKYWALK
351 const char *compatible_anchors[] = {
352 "com.apple.internet-sharing",
353 "com.apple/250.ApplicationFirewall",
354 "com.apple/200.AirDrop"
355 };
356 #endif // SKYWALK
357
358 void *
pf_lazy_makewritable(struct pf_pdesc * pd,pbuf_t * pbuf,int len)359 pf_lazy_makewritable(struct pf_pdesc *pd, pbuf_t *pbuf, int len)
360 {
361 void *__single p;
362
363 if (pd->lmw < 0) {
364 return NULL;
365 }
366
367 VERIFY(pbuf == pd->mp);
368
369 p = pbuf->pb_data;
370 if (len > pd->lmw) {
371 if ((p = pbuf_ensure_writable(pbuf, len)) == NULL) {
372 len = -1;
373 }
374 pd->lmw = len;
375 if (len >= 0) {
376 pd->pf_mtag = pf_find_mtag_pbuf(pbuf);
377
378 switch (pd->af) {
379 case AF_INET: {
380 struct ip *__single h = p;
381 pd->src = (struct pf_addr *)(void *)&h->ip_src;
382 pd->dst = (struct pf_addr *)(void *)&h->ip_dst;
383 pd->ip_sum = &h->ip_sum;
384 break;
385 }
386 case AF_INET6: {
387 struct ip6_hdr *__single h = p;
388 pd->src = (struct pf_addr *)(void *)&h->ip6_src;
389 pd->dst = (struct pf_addr *)(void *)&h->ip6_dst;
390 break;
391 }
392 }
393 }
394 }
395
396 return len < 0 ? NULL : p;
397 }
398
399 static const int *
pf_state_lookup_aux(struct pf_state ** state,struct pfi_kif * kif,int direction,int * action)400 pf_state_lookup_aux(struct pf_state **state, struct pfi_kif *kif,
401 int direction, int *action)
402 {
403 if (*state == NULL || (*state)->timeout == PFTM_PURGE) {
404 *action = PF_DROP;
405 return action;
406 }
407
408 if (direction == PF_OUT &&
409 (((*state)->rule.ptr->rt == PF_ROUTETO &&
410 (*state)->rule.ptr->direction == PF_OUT) ||
411 ((*state)->rule.ptr->rt == PF_REPLYTO &&
412 (*state)->rule.ptr->direction == PF_IN)) &&
413 (*state)->rt_kif != NULL && (*state)->rt_kif != kif) {
414 *action = PF_PASS;
415 return action;
416 }
417
418 return 0;
419 }
420
421 #define STATE_LOOKUP() \
422 do { \
423 int action; \
424 *state = pf_find_state(kif, &key, direction); \
425 if (*state != NULL && pd != NULL && \
426 !(pd->pktflags & PKTF_FLOW_ID)) { \
427 pd->flowsrc = (*state)->state_key->flowsrc; \
428 pd->flowhash = (*state)->state_key->flowhash; \
429 if (pd->flowhash != 0) { \
430 pd->pktflags |= PKTF_FLOW_ID; \
431 pd->pktflags &= ~PKTF_FLOW_ADV; \
432 } \
433 } \
434 if (pf_state_lookup_aux(state, kif, direction, &action)) \
435 return (action); \
436 } while (0)
437
438 /*
439 * This macro resets the flowID information in a packet descriptor which was
440 * copied in from a PF state. This should be used after a protocol state lookup
441 * finds a matching PF state, but then decides to not use it for various
442 * reasons.
443 */
444 #define PD_CLEAR_STATE_FLOWID(_pd) \
445 do { \
446 if (__improbable(((_pd)->pktflags & PKTF_FLOW_ID) && \
447 ((_pd)->flowsrc == FLOWSRC_PF))) { \
448 (_pd)->flowhash = 0; \
449 (_pd)->flowsrc = 0; \
450 (_pd)->pktflags &= ~PKTF_FLOW_ID; \
451 } \
452 \
453 } while (0)
454
455 #define STATE_ADDR_TRANSLATE(sk) \
456 (sk)->lan.addr.addr32[0] != (sk)->gwy.addr.addr32[0] || \
457 ((sk)->af_lan == AF_INET6 && \
458 ((sk)->lan.addr.addr32[1] != (sk)->gwy.addr.addr32[1] || \
459 (sk)->lan.addr.addr32[2] != (sk)->gwy.addr.addr32[2] || \
460 (sk)->lan.addr.addr32[3] != (sk)->gwy.addr.addr32[3]))
461
462 #define STATE_TRANSLATE(sk) \
463 ((sk)->af_lan != (sk)->af_gwy || \
464 STATE_ADDR_TRANSLATE(sk) || \
465 (sk)->lan.xport.port != (sk)->gwy.xport.port)
466
467 #define STATE_GRE_TRANSLATE(sk) \
468 (STATE_ADDR_TRANSLATE(sk) || \
469 (sk)->lan.xport.call_id != (sk)->gwy.xport.call_id)
470
471 #define BOUND_IFACE(r, k) \
472 ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all
473
474 #define STATE_INC_COUNTERS(s) \
475 do { \
476 s->rule.ptr->states++; \
477 VERIFY(s->rule.ptr->states != 0); \
478 if (s->anchor.ptr != NULL) { \
479 s->anchor.ptr->states++; \
480 VERIFY(s->anchor.ptr->states != 0); \
481 } \
482 if (s->nat_rule.ptr != NULL) { \
483 s->nat_rule.ptr->states++; \
484 VERIFY(s->nat_rule.ptr->states != 0); \
485 } \
486 } while (0)
487
488 #define STATE_DEC_COUNTERS(s) \
489 do { \
490 if (s->nat_rule.ptr != NULL) { \
491 VERIFY(s->nat_rule.ptr->states > 0); \
492 s->nat_rule.ptr->states--; \
493 } \
494 if (s->anchor.ptr != NULL) { \
495 VERIFY(s->anchor.ptr->states > 0); \
496 s->anchor.ptr->states--; \
497 } \
498 VERIFY(s->rule.ptr->states > 0); \
499 s->rule.ptr->states--; \
500 } while (0)
501
502 static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *);
503 static __inline int pf_state_compare_lan_ext(struct pf_state_key *,
504 struct pf_state_key *);
505 static __inline int pf_state_compare_ext_gwy(struct pf_state_key *,
506 struct pf_state_key *);
507 static __inline int pf_state_compare_id(struct pf_state *,
508 struct pf_state *);
509
510 struct pf_src_tree tree_src_tracking;
511
512 struct pf_state_tree_id tree_id;
513 struct pf_state_queue state_list;
514
515 RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare);
516 RB_GENERATE(pf_state_tree_lan_ext, pf_state_key,
517 entry_lan_ext, pf_state_compare_lan_ext);
518 RB_GENERATE(pf_state_tree_ext_gwy, pf_state_key,
519 entry_ext_gwy, pf_state_compare_ext_gwy);
520 RB_GENERATE(pf_state_tree_id, pf_state,
521 entry_id, pf_state_compare_id);
522
523 #define PF_DT_SKIP_LANEXT 0x01
524 #define PF_DT_SKIP_EXTGWY 0x02
525
526 static const u_int16_t PF_PPTP_PORT = 1723;
527 static const u_int32_t PF_PPTP_MAGIC_NUMBER = 0x1A2B3C4D;
528
529 struct pf_pptp_hdr {
530 u_int16_t length;
531 u_int16_t type;
532 u_int32_t magic;
533 };
534
535 struct pf_pptp_ctrl_hdr {
536 u_int16_t type;
537 u_int16_t reserved_0;
538 };
539
540 struct pf_pptp_ctrl_generic {
541 u_int16_t data[0];
542 };
543
544 #define PF_PPTP_CTRL_TYPE_START_REQ 1
545 struct pf_pptp_ctrl_start_req {
546 u_int16_t protocol_version;
547 u_int16_t reserved_1;
548 u_int32_t framing_capabilities;
549 u_int32_t bearer_capabilities;
550 u_int16_t maximum_channels;
551 u_int16_t firmware_revision;
552 u_int8_t host_name[64];
553 u_int8_t vendor_string[64];
554 };
555
556 #define PF_PPTP_CTRL_TYPE_START_RPY 2
557 struct pf_pptp_ctrl_start_rpy {
558 u_int16_t protocol_version;
559 u_int8_t result_code;
560 u_int8_t error_code;
561 u_int32_t framing_capabilities;
562 u_int32_t bearer_capabilities;
563 u_int16_t maximum_channels;
564 u_int16_t firmware_revision;
565 u_int8_t host_name[64];
566 u_int8_t vendor_string[64];
567 };
568
569 #define PF_PPTP_CTRL_TYPE_STOP_REQ 3
570 struct pf_pptp_ctrl_stop_req {
571 u_int8_t reason;
572 u_int8_t reserved_1;
573 u_int16_t reserved_2;
574 };
575
576 #define PF_PPTP_CTRL_TYPE_STOP_RPY 4
577 struct pf_pptp_ctrl_stop_rpy {
578 u_int8_t reason;
579 u_int8_t error_code;
580 u_int16_t reserved_1;
581 };
582
583 #define PF_PPTP_CTRL_TYPE_ECHO_REQ 5
584 struct pf_pptp_ctrl_echo_req {
585 u_int32_t identifier;
586 };
587
588 #define PF_PPTP_CTRL_TYPE_ECHO_RPY 6
589 struct pf_pptp_ctrl_echo_rpy {
590 u_int32_t identifier;
591 u_int8_t result_code;
592 u_int8_t error_code;
593 u_int16_t reserved_1;
594 };
595
596 #define PF_PPTP_CTRL_TYPE_CALL_OUT_REQ 7
597 struct pf_pptp_ctrl_call_out_req {
598 u_int16_t call_id;
599 u_int16_t call_sernum;
600 u_int32_t min_bps;
601 u_int32_t bearer_type;
602 u_int32_t framing_type;
603 u_int16_t rxwindow_size;
604 u_int16_t proc_delay;
605 u_int8_t phone_num[64];
606 u_int8_t sub_addr[64];
607 };
608
609 #define PF_PPTP_CTRL_TYPE_CALL_OUT_RPY 8
610 struct pf_pptp_ctrl_call_out_rpy {
611 u_int16_t call_id;
612 u_int16_t peer_call_id;
613 u_int8_t result_code;
614 u_int8_t error_code;
615 u_int16_t cause_code;
616 u_int32_t connect_speed;
617 u_int16_t rxwindow_size;
618 u_int16_t proc_delay;
619 u_int32_t phy_channel_id;
620 };
621
622 #define PF_PPTP_CTRL_TYPE_CALL_IN_1ST 9
623 struct pf_pptp_ctrl_call_in_1st {
624 u_int16_t call_id;
625 u_int16_t call_sernum;
626 u_int32_t bearer_type;
627 u_int32_t phy_channel_id;
628 u_int16_t dialed_number_len;
629 u_int16_t dialing_number_len;
630 u_int8_t dialed_num[64];
631 u_int8_t dialing_num[64];
632 u_int8_t sub_addr[64];
633 };
634
635 #define PF_PPTP_CTRL_TYPE_CALL_IN_2ND 10
636 struct pf_pptp_ctrl_call_in_2nd {
637 u_int16_t call_id;
638 u_int16_t peer_call_id;
639 u_int8_t result_code;
640 u_int8_t error_code;
641 u_int16_t rxwindow_size;
642 u_int16_t txdelay;
643 u_int16_t reserved_1;
644 };
645
646 #define PF_PPTP_CTRL_TYPE_CALL_IN_3RD 11
647 struct pf_pptp_ctrl_call_in_3rd {
648 u_int16_t call_id;
649 u_int16_t reserved_1;
650 u_int32_t connect_speed;
651 u_int16_t rxwindow_size;
652 u_int16_t txdelay;
653 u_int32_t framing_type;
654 };
655
656 #define PF_PPTP_CTRL_TYPE_CALL_CLR 12
657 struct pf_pptp_ctrl_call_clr {
658 u_int16_t call_id;
659 u_int16_t reserved_1;
660 };
661
662 #define PF_PPTP_CTRL_TYPE_CALL_DISC 13
663 struct pf_pptp_ctrl_call_disc {
664 u_int16_t call_id;
665 u_int8_t result_code;
666 u_int8_t error_code;
667 u_int16_t cause_code;
668 u_int16_t reserved_1;
669 u_int8_t statistics[128];
670 };
671
672 #define PF_PPTP_CTRL_TYPE_ERROR 14
673 struct pf_pptp_ctrl_error {
674 u_int16_t peer_call_id;
675 u_int16_t reserved_1;
676 u_int32_t crc_errors;
677 u_int32_t fr_errors;
678 u_int32_t hw_errors;
679 u_int32_t buf_errors;
680 u_int32_t tim_errors;
681 u_int32_t align_errors;
682 };
683
684 #define PF_PPTP_CTRL_TYPE_SET_LINKINFO 15
685 struct pf_pptp_ctrl_set_linkinfo {
686 u_int16_t peer_call_id;
687 u_int16_t reserved_1;
688 u_int32_t tx_accm;
689 u_int32_t rx_accm;
690 };
691
692 static const size_t PF_PPTP_CTRL_MSG_MINSIZE =
693 sizeof(struct pf_pptp_hdr) + sizeof(struct pf_pptp_ctrl_hdr);
694
695 union pf_pptp_ctrl_msg_union {
696 struct pf_pptp_ctrl_start_req start_req;
697 struct pf_pptp_ctrl_start_rpy start_rpy;
698 struct pf_pptp_ctrl_stop_req stop_req;
699 struct pf_pptp_ctrl_stop_rpy stop_rpy;
700 struct pf_pptp_ctrl_echo_req echo_req;
701 struct pf_pptp_ctrl_echo_rpy echo_rpy;
702 struct pf_pptp_ctrl_call_out_req call_out_req;
703 struct pf_pptp_ctrl_call_out_rpy call_out_rpy;
704 struct pf_pptp_ctrl_call_in_1st call_in_1st;
705 struct pf_pptp_ctrl_call_in_2nd call_in_2nd;
706 struct pf_pptp_ctrl_call_in_3rd call_in_3rd;
707 struct pf_pptp_ctrl_call_clr call_clr;
708 struct pf_pptp_ctrl_call_disc call_disc;
709 struct pf_pptp_ctrl_error error;
710 struct pf_pptp_ctrl_set_linkinfo set_linkinfo;
711 u_int8_t data[0];
712 };
713
714 struct pf_pptp_ctrl_msg {
715 struct pf_pptp_hdr hdr;
716 struct pf_pptp_ctrl_hdr ctrl;
717 union pf_pptp_ctrl_msg_union msg;
718 };
719
720 #define PF_GRE_FLAG_CHECKSUM_PRESENT 0x8000
721 #define PF_GRE_FLAG_VERSION_MASK 0x0007
722 #define PF_GRE_PPP_ETHERTYPE 0x880B
723
724 static const u_int16_t PF_IKE_PORT = 500;
725
726 struct pf_ike_hdr {
727 u_int64_t initiator_cookie, responder_cookie;
728 u_int8_t next_payload, version, exchange_type, flags;
729 u_int32_t message_id, length;
730 };
731
732 #define PF_IKE_PACKET_MINSIZE (sizeof (struct pf_ike_hdr))
733
734 #define PF_IKEv1_EXCHTYPE_BASE 1
735 #define PF_IKEv1_EXCHTYPE_ID_PROTECT 2
736 #define PF_IKEv1_EXCHTYPE_AUTH_ONLY 3
737 #define PF_IKEv1_EXCHTYPE_AGGRESSIVE 4
738 #define PF_IKEv1_EXCHTYPE_INFORMATIONAL 5
739 #define PF_IKEv2_EXCHTYPE_SA_INIT 34
740 #define PF_IKEv2_EXCHTYPE_AUTH 35
741 #define PF_IKEv2_EXCHTYPE_CREATE_CHILD_SA 36
742 #define PF_IKEv2_EXCHTYPE_INFORMATIONAL 37
743
744 #define PF_IKEv1_FLAG_E 0x01
745 #define PF_IKEv1_FLAG_C 0x02
746 #define PF_IKEv1_FLAG_A 0x04
747 #define PF_IKEv2_FLAG_I 0x08
748 #define PF_IKEv2_FLAG_V 0x10
749 #define PF_IKEv2_FLAG_R 0x20
750
751
752 static __inline int
pf_addr_compare(struct pf_addr * a,struct pf_addr * b,sa_family_t af)753 pf_addr_compare(struct pf_addr *a, struct pf_addr *b, sa_family_t af)
754 {
755 switch (af) {
756 #ifdef INET
757 case AF_INET:
758 if (a->addr32[0] > b->addr32[0]) {
759 return 1;
760 }
761 if (a->addr32[0] < b->addr32[0]) {
762 return -1;
763 }
764 break;
765 #endif /* INET */
766 case AF_INET6:
767 if (a->addr32[3] > b->addr32[3]) {
768 return 1;
769 }
770 if (a->addr32[3] < b->addr32[3]) {
771 return -1;
772 }
773 if (a->addr32[2] > b->addr32[2]) {
774 return 1;
775 }
776 if (a->addr32[2] < b->addr32[2]) {
777 return -1;
778 }
779 if (a->addr32[1] > b->addr32[1]) {
780 return 1;
781 }
782 if (a->addr32[1] < b->addr32[1]) {
783 return -1;
784 }
785 if (a->addr32[0] > b->addr32[0]) {
786 return 1;
787 }
788 if (a->addr32[0] < b->addr32[0]) {
789 return -1;
790 }
791 break;
792 }
793 return 0;
794 }
795
796 static __inline int
pf_src_compare(struct pf_src_node * a,struct pf_src_node * b)797 pf_src_compare(struct pf_src_node *a, struct pf_src_node *b)
798 {
799 int diff;
800
801 if (a->rule.ptr > b->rule.ptr) {
802 return 1;
803 }
804 if (a->rule.ptr < b->rule.ptr) {
805 return -1;
806 }
807 if ((diff = a->af - b->af) != 0) {
808 return diff;
809 }
810 if ((diff = pf_addr_compare(&a->addr, &b->addr, a->af)) != 0) {
811 return diff;
812 }
813 return 0;
814 }
815
816 static __inline int
pf_state_compare_lan_ext(struct pf_state_key * a,struct pf_state_key * b)817 pf_state_compare_lan_ext(struct pf_state_key *a, struct pf_state_key *b)
818 {
819 int diff;
820 int extfilter;
821
822 if ((diff = a->proto - b->proto) != 0) {
823 return diff;
824 }
825 if ((diff = a->af_lan - b->af_lan) != 0) {
826 return diff;
827 }
828
829 extfilter = PF_EXTFILTER_APD;
830
831 switch (a->proto) {
832 case IPPROTO_ICMP:
833 case IPPROTO_ICMPV6:
834 if ((diff = a->lan.xport.port - b->lan.xport.port) != 0) {
835 return diff;
836 }
837 break;
838
839 case IPPROTO_TCP:
840 if ((diff = a->lan.xport.port - b->lan.xport.port) != 0) {
841 return diff;
842 }
843 if ((diff = a->ext_lan.xport.port - b->ext_lan.xport.port) != 0) {
844 return diff;
845 }
846 break;
847
848 case IPPROTO_UDP:
849 if ((diff = a->proto_variant - b->proto_variant)) {
850 return diff;
851 }
852 extfilter = a->proto_variant;
853 if ((diff = a->lan.xport.port - b->lan.xport.port) != 0) {
854 return diff;
855 }
856 if ((extfilter < PF_EXTFILTER_AD) &&
857 (diff = a->ext_lan.xport.port - b->ext_lan.xport.port) != 0) {
858 return diff;
859 }
860 break;
861
862 case IPPROTO_GRE:
863 if (a->proto_variant == PF_GRE_PPTP_VARIANT &&
864 a->proto_variant == b->proto_variant) {
865 if (!!(diff = a->ext_lan.xport.call_id -
866 b->ext_lan.xport.call_id)) {
867 return diff;
868 }
869 }
870 break;
871
872 case IPPROTO_ESP:
873 if (!!(diff = a->ext_lan.xport.spi - b->ext_lan.xport.spi)) {
874 return diff;
875 }
876 break;
877
878 default:
879 break;
880 }
881
882 switch (a->af_lan) {
883 #if INET
884 case AF_INET:
885 if ((diff = pf_addr_compare(&a->lan.addr, &b->lan.addr,
886 a->af_lan)) != 0) {
887 return diff;
888 }
889
890 if (extfilter < PF_EXTFILTER_EI) {
891 if ((diff = pf_addr_compare(&a->ext_lan.addr,
892 &b->ext_lan.addr,
893 a->af_lan)) != 0) {
894 return diff;
895 }
896 }
897 break;
898 #endif /* INET */
899 case AF_INET6:
900 if ((diff = pf_addr_compare(&a->lan.addr, &b->lan.addr,
901 a->af_lan)) != 0) {
902 return diff;
903 }
904
905 if (extfilter < PF_EXTFILTER_EI ||
906 !PF_AZERO(&b->ext_lan.addr, AF_INET6)) {
907 if ((diff = pf_addr_compare(&a->ext_lan.addr,
908 &b->ext_lan.addr,
909 a->af_lan)) != 0) {
910 return diff;
911 }
912 }
913 break;
914 }
915
916 if (a->app_state && b->app_state) {
917 if (a->app_state->compare_lan_ext &&
918 b->app_state->compare_lan_ext) {
919 diff = (const char *)b->app_state->compare_lan_ext -
920 (const char *)a->app_state->compare_lan_ext;
921 if (diff != 0) {
922 return diff;
923 }
924 diff = a->app_state->compare_lan_ext(a->app_state,
925 b->app_state);
926 if (diff != 0) {
927 return diff;
928 }
929 }
930 }
931
932 return 0;
933 }
934
935 static __inline int
pf_state_compare_ext_gwy(struct pf_state_key * a,struct pf_state_key * b)936 pf_state_compare_ext_gwy(struct pf_state_key *a, struct pf_state_key *b)
937 {
938 int diff;
939 int extfilter;
940 int a_nat64, b_nat64;
941
942 if ((diff = a->proto - b->proto) != 0) {
943 return diff;
944 }
945
946 if ((diff = a->af_gwy - b->af_gwy) != 0) {
947 return diff;
948 }
949
950 a_nat64 = (a->af_lan == PF_INET6 && a->af_gwy == PF_INET) ? 1 : 0;
951 b_nat64 = (b->af_lan == PF_INET6 && b->af_gwy == PF_INET) ? 1 : 0;
952 if ((diff = a_nat64 - b_nat64) != 0) {
953 return diff;
954 }
955
956 extfilter = PF_EXTFILTER_APD;
957
958 switch (a->proto) {
959 case IPPROTO_ICMP:
960 case IPPROTO_ICMPV6:
961 if ((diff = a->gwy.xport.port - b->gwy.xport.port) != 0) {
962 return diff;
963 }
964 break;
965
966 case IPPROTO_TCP:
967 if ((diff = a->ext_gwy.xport.port - b->ext_gwy.xport.port) != 0) {
968 return diff;
969 }
970 if ((diff = a->gwy.xport.port - b->gwy.xport.port) != 0) {
971 return diff;
972 }
973 break;
974
975 case IPPROTO_UDP:
976 if ((diff = a->proto_variant - b->proto_variant)) {
977 return diff;
978 }
979 extfilter = a->proto_variant;
980 if ((diff = a->gwy.xport.port - b->gwy.xport.port) != 0) {
981 return diff;
982 }
983 if ((extfilter < PF_EXTFILTER_AD) &&
984 (diff = a->ext_gwy.xport.port - b->ext_gwy.xport.port) != 0) {
985 return diff;
986 }
987 break;
988
989 case IPPROTO_GRE:
990 if (a->proto_variant == PF_GRE_PPTP_VARIANT &&
991 a->proto_variant == b->proto_variant) {
992 if (!!(diff = a->gwy.xport.call_id -
993 b->gwy.xport.call_id)) {
994 return diff;
995 }
996 }
997 break;
998
999 case IPPROTO_ESP:
1000 if (!!(diff = a->gwy.xport.spi - b->gwy.xport.spi)) {
1001 return diff;
1002 }
1003 break;
1004
1005 default:
1006 break;
1007 }
1008
1009 switch (a->af_gwy) {
1010 #if INET
1011 case AF_INET:
1012 if ((diff = pf_addr_compare(&a->gwy.addr, &b->gwy.addr,
1013 a->af_gwy)) != 0) {
1014 return diff;
1015 }
1016
1017 if (extfilter < PF_EXTFILTER_EI) {
1018 if ((diff = pf_addr_compare(&a->ext_gwy.addr, &b->ext_gwy.addr,
1019 a->af_gwy)) != 0) {
1020 return diff;
1021 }
1022 }
1023 break;
1024 #endif /* INET */
1025 case AF_INET6:
1026 if ((diff = pf_addr_compare(&a->gwy.addr, &b->gwy.addr,
1027 a->af_gwy)) != 0) {
1028 return diff;
1029 }
1030
1031 if (extfilter < PF_EXTFILTER_EI ||
1032 !PF_AZERO(&b->ext_gwy.addr, AF_INET6)) {
1033 if ((diff = pf_addr_compare(&a->ext_gwy.addr, &b->ext_gwy.addr,
1034 a->af_gwy)) != 0) {
1035 return diff;
1036 }
1037 }
1038 break;
1039 }
1040
1041 if (a->app_state && b->app_state) {
1042 if (a->app_state->compare_ext_gwy &&
1043 b->app_state->compare_ext_gwy) {
1044 diff = (const char *)b->app_state->compare_ext_gwy -
1045 (const char *)a->app_state->compare_ext_gwy;
1046 if (diff != 0) {
1047 return diff;
1048 }
1049 diff = a->app_state->compare_ext_gwy(a->app_state,
1050 b->app_state);
1051 if (diff != 0) {
1052 return diff;
1053 }
1054 }
1055 }
1056
1057 return 0;
1058 }
1059
1060 static __inline int
pf_state_compare_id(struct pf_state * a,struct pf_state * b)1061 pf_state_compare_id(struct pf_state *a, struct pf_state *b)
1062 {
1063 if (a->id > b->id) {
1064 return 1;
1065 }
1066 if (a->id < b->id) {
1067 return -1;
1068 }
1069 if (a->creatorid > b->creatorid) {
1070 return 1;
1071 }
1072 if (a->creatorid < b->creatorid) {
1073 return -1;
1074 }
1075
1076 return 0;
1077 }
1078
1079 void
pf_addrcpy(struct pf_addr * dst,struct pf_addr * src,sa_family_t af)1080 pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af)
1081 {
1082 switch (af) {
1083 #if INET
1084 case AF_INET:
1085 memcpy(&dst->v4addr, &src->v4addr, sizeof(src->v4addr));
1086 break;
1087 #endif /* INET */
1088 case AF_INET6:
1089 memcpy(&dst->v6addr, &src->v6addr, sizeof(src->v6addr));
1090 break;
1091 }
1092 }
1093
1094 struct pf_state *
pf_find_state_byid(struct pf_state_cmp * key)1095 pf_find_state_byid(struct pf_state_cmp *key)
1096 {
1097 pf_status.fcounters[FCNT_STATE_SEARCH]++;
1098
1099 return RB_FIND(pf_state_tree_id, &tree_id,
1100 (struct pf_state *)(void *)key);
1101 }
1102
1103 static struct pf_state *
pf_find_state(struct pfi_kif * kif,struct pf_state_key_cmp * key,u_int dir)1104 pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir)
1105 {
1106 struct pf_state_key *sk = NULL;
1107 struct pf_state *s;
1108
1109 pf_status.fcounters[FCNT_STATE_SEARCH]++;
1110
1111 switch (dir) {
1112 case PF_OUT:
1113 sk = RB_FIND(pf_state_tree_lan_ext, &pf_statetbl_lan_ext,
1114 (struct pf_state_key *)key);
1115
1116 break;
1117 case PF_IN:
1118
1119 /*
1120 * Generally, a packet can match to
1121 * at most 1 state in the GWY table, with the sole exception
1122 * of NAT64, where a packet can match with at most 2 states
1123 * on the GWY table. This is because, unlike NAT44 or NAT66,
1124 * NAT64 forward translation is done on the input, not output.
1125 * This means a forwarded packet could cause PF to generate 2 states
1126 * on both input and output.
1127 *
1128 * NAT64 reverse translation is done on input. If a packet
1129 * matches NAT64 state on the GWY table, prioritize it
1130 * over any IPv4 state on the GWY table.
1131 */
1132 if (pf_state_tree_ext_gwy_nat64_cnt > 0 &&
1133 key->af_lan == PF_INET && key->af_gwy == PF_INET) {
1134 key->af_lan = PF_INET6;
1135 sk = RB_FIND(pf_state_tree_ext_gwy, &pf_statetbl_ext_gwy,
1136 (struct pf_state_key *) key);
1137 key->af_lan = PF_INET;
1138 }
1139
1140 if (sk == NULL) {
1141 sk = RB_FIND(pf_state_tree_ext_gwy, &pf_statetbl_ext_gwy,
1142 (struct pf_state_key *)key);
1143 }
1144 /*
1145 * NAT64 is done only on input, for packets coming in from
1146 * from the LAN side, need to lookup the lan_ext tree.
1147 */
1148 if (sk == NULL) {
1149 sk = RB_FIND(pf_state_tree_lan_ext,
1150 &pf_statetbl_lan_ext,
1151 (struct pf_state_key *)key);
1152 if (sk && sk->af_lan == sk->af_gwy) {
1153 sk = NULL;
1154 }
1155 }
1156 break;
1157 default:
1158 panic("pf_find_state");
1159 }
1160
1161 /* list is sorted, if-bound states before floating ones */
1162 if (sk != NULL) {
1163 TAILQ_FOREACH(s, &sk->states, next)
1164 if (s->kif == pfi_all || s->kif == kif) {
1165 return s;
1166 }
1167 }
1168
1169 return NULL;
1170 }
1171
1172 struct pf_state *
pf_find_state_all(struct pf_state_key_cmp * key,u_int dir,int * more)1173 pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more)
1174 {
1175 struct pf_state_key *sk = NULL;
1176 struct pf_state *s, *ret = NULL;
1177
1178 pf_status.fcounters[FCNT_STATE_SEARCH]++;
1179
1180 switch (dir) {
1181 case PF_OUT:
1182 sk = RB_FIND(pf_state_tree_lan_ext,
1183 &pf_statetbl_lan_ext, (struct pf_state_key *)key);
1184 break;
1185 case PF_IN:
1186 sk = RB_FIND(pf_state_tree_ext_gwy,
1187 &pf_statetbl_ext_gwy, (struct pf_state_key *)key);
1188 /*
1189 * NAT64 is done only on input, for packets coming in from
1190 * from the LAN side, need to lookup the lan_ext tree.
1191 */
1192 if ((sk == NULL) && pf_nat64_configured) {
1193 sk = RB_FIND(pf_state_tree_lan_ext,
1194 &pf_statetbl_lan_ext,
1195 (struct pf_state_key *)key);
1196 if (sk && sk->af_lan == sk->af_gwy) {
1197 sk = NULL;
1198 }
1199 }
1200 break;
1201 default:
1202 panic("pf_find_state_all");
1203 }
1204
1205 if (sk != NULL) {
1206 ret = TAILQ_FIRST(&sk->states);
1207 if (more == NULL) {
1208 return ret;
1209 }
1210
1211 TAILQ_FOREACH(s, &sk->states, next)
1212 (*more)++;
1213 }
1214
1215 return ret;
1216 }
1217
1218 static void
pf_init_threshold(struct pf_threshold * threshold,u_int32_t limit,u_int32_t seconds)1219 pf_init_threshold(struct pf_threshold *threshold,
1220 u_int32_t limit, u_int32_t seconds)
1221 {
1222 threshold->limit = limit * PF_THRESHOLD_MULT;
1223 threshold->seconds = seconds;
1224 threshold->count = 0;
1225 threshold->last = pf_time_second();
1226 }
1227
1228 static void
pf_add_threshold(struct pf_threshold * threshold)1229 pf_add_threshold(struct pf_threshold *threshold)
1230 {
1231 u_int32_t t = pf_time_second(), diff = t - threshold->last;
1232
1233 if (diff >= threshold->seconds) {
1234 threshold->count = 0;
1235 } else {
1236 threshold->count -= threshold->count * diff /
1237 threshold->seconds;
1238 }
1239 threshold->count += PF_THRESHOLD_MULT;
1240 threshold->last = t;
1241 }
1242
1243 static int
pf_check_threshold(struct pf_threshold * threshold)1244 pf_check_threshold(struct pf_threshold *threshold)
1245 {
1246 return threshold->count > threshold->limit;
1247 }
1248
1249 static int
pf_src_connlimit(struct pf_state ** state)1250 pf_src_connlimit(struct pf_state **state)
1251 {
1252 int bad = 0;
1253 (*state)->src_node->conn++;
1254 VERIFY((*state)->src_node->conn != 0);
1255 (*state)->src.tcp_est = 1;
1256 pf_add_threshold(&(*state)->src_node->conn_rate);
1257
1258 if ((*state)->rule.ptr->max_src_conn &&
1259 (*state)->rule.ptr->max_src_conn <
1260 (*state)->src_node->conn) {
1261 pf_status.lcounters[LCNT_SRCCONN]++;
1262 bad++;
1263 }
1264
1265 if ((*state)->rule.ptr->max_src_conn_rate.limit &&
1266 pf_check_threshold(&(*state)->src_node->conn_rate)) {
1267 pf_status.lcounters[LCNT_SRCCONNRATE]++;
1268 bad++;
1269 }
1270
1271 if (!bad) {
1272 return 0;
1273 }
1274
1275 if ((*state)->rule.ptr->overload_tbl) {
1276 struct pfr_addr p;
1277 u_int32_t killed = 0;
1278
1279 pf_status.lcounters[LCNT_OVERLOAD_TABLE]++;
1280 if (pf_status.debug >= PF_DEBUG_MISC) {
1281 printf("pf_src_connlimit: blocking address ");
1282 pf_print_host(&(*state)->src_node->addr, 0,
1283 (*state)->state_key->af_lan);
1284 }
1285
1286 bzero(&p, sizeof(p));
1287 p.pfra_af = (*state)->state_key->af_lan;
1288 switch ((*state)->state_key->af_lan) {
1289 #if INET
1290 case AF_INET:
1291 p.pfra_net = 32;
1292 p.pfra_ip4addr = (*state)->src_node->addr.v4addr;
1293 break;
1294 #endif /* INET */
1295 case AF_INET6:
1296 p.pfra_net = 128;
1297 p.pfra_ip6addr = (*state)->src_node->addr.v6addr;
1298 break;
1299 }
1300
1301 pfr_insert_kentry((*state)->rule.ptr->overload_tbl,
1302 &p, pf_calendar_time_second());
1303
1304 /* kill existing states if that's required. */
1305 if ((*state)->rule.ptr->flush) {
1306 struct pf_state_key *sk;
1307 struct pf_state *st;
1308
1309 pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++;
1310 RB_FOREACH(st, pf_state_tree_id, &tree_id) {
1311 sk = st->state_key;
1312 /*
1313 * Kill states from this source. (Only those
1314 * from the same rule if PF_FLUSH_GLOBAL is not
1315 * set)
1316 */
1317 if (sk->af_lan ==
1318 (*state)->state_key->af_lan &&
1319 (((*state)->state_key->direction ==
1320 PF_OUT &&
1321 PF_AEQ(&(*state)->src_node->addr,
1322 &sk->lan.addr, sk->af_lan)) ||
1323 ((*state)->state_key->direction == PF_IN &&
1324 PF_AEQ(&(*state)->src_node->addr,
1325 &sk->ext_lan.addr, sk->af_lan))) &&
1326 ((*state)->rule.ptr->flush &
1327 PF_FLUSH_GLOBAL ||
1328 (*state)->rule.ptr == st->rule.ptr)) {
1329 st->timeout = PFTM_PURGE;
1330 st->src.state = st->dst.state =
1331 TCPS_CLOSED;
1332 killed++;
1333 }
1334 }
1335 if (pf_status.debug >= PF_DEBUG_MISC) {
1336 printf(", %u states killed", killed);
1337 }
1338 }
1339 if (pf_status.debug >= PF_DEBUG_MISC) {
1340 printf("\n");
1341 }
1342 }
1343
1344 /* kill this state */
1345 (*state)->timeout = PFTM_PURGE;
1346 (*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
1347 return 1;
1348 }
1349
1350 int
pf_insert_src_node(struct pf_src_node ** sn,struct pf_rule * rule,struct pf_addr * src,sa_family_t af)1351 pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
1352 struct pf_addr *src, sa_family_t af)
1353 {
1354 struct pf_src_node k;
1355
1356 if (*sn == NULL) {
1357 k.af = af;
1358 PF_ACPY(&k.addr, src, af);
1359 if (rule->rule_flag & PFRULE_RULESRCTRACK ||
1360 rule->rpool.opts & PF_POOL_STICKYADDR) {
1361 k.rule.ptr = rule;
1362 } else {
1363 k.rule.ptr = NULL;
1364 }
1365 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
1366 *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
1367 }
1368 if (*sn == NULL) {
1369 if (!rule->max_src_nodes ||
1370 rule->src_nodes < rule->max_src_nodes) {
1371 (*sn) = pool_get(&pf_src_tree_pl, PR_WAITOK);
1372 } else {
1373 pf_status.lcounters[LCNT_SRCNODES]++;
1374 }
1375 if ((*sn) == NULL) {
1376 return -1;
1377 }
1378 bzero(*sn, sizeof(struct pf_src_node));
1379
1380 pf_init_threshold(&(*sn)->conn_rate,
1381 rule->max_src_conn_rate.limit,
1382 rule->max_src_conn_rate.seconds);
1383
1384 (*sn)->af = af;
1385 if (rule->rule_flag & PFRULE_RULESRCTRACK ||
1386 rule->rpool.opts & PF_POOL_STICKYADDR) {
1387 (*sn)->rule.ptr = rule;
1388 } else {
1389 (*sn)->rule.ptr = NULL;
1390 }
1391 PF_ACPY(&(*sn)->addr, src, af);
1392 if (RB_INSERT(pf_src_tree,
1393 &tree_src_tracking, *sn) != NULL) {
1394 if (pf_status.debug >= PF_DEBUG_MISC) {
1395 printf("pf: src_tree insert failed: ");
1396 pf_print_host(&(*sn)->addr, 0, af);
1397 printf("\n");
1398 }
1399 pool_put(&pf_src_tree_pl, *sn);
1400 *sn = NULL; /* signal the caller that no additional cleanup is needed */
1401 return -1;
1402 }
1403 (*sn)->creation = pf_time_second();
1404 (*sn)->ruletype = rule->action;
1405 if ((*sn)->rule.ptr != NULL) {
1406 (*sn)->rule.ptr->src_nodes++;
1407 }
1408 pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
1409 pf_status.src_nodes++;
1410 } else {
1411 if (rule->max_src_states &&
1412 (*sn)->states >= rule->max_src_states) {
1413 pf_status.lcounters[LCNT_SRCSTATES]++;
1414 return -1;
1415 }
1416 }
1417 return 0;
1418 }
1419
1420 static void
pf_stateins_err(const char * tree,struct pf_state * s,struct pfi_kif * kif)1421 pf_stateins_err(const char *tree, struct pf_state *s, struct pfi_kif *kif)
1422 {
1423 struct pf_state_key *sk = s->state_key;
1424
1425 if (pf_status.debug >= PF_DEBUG_MISC) {
1426 printf("pf: state insert failed: %s %s ", tree, kif->pfik_name);
1427 switch (sk->proto) {
1428 case IPPROTO_TCP:
1429 printf("TCP");
1430 break;
1431 case IPPROTO_UDP:
1432 printf("UDP");
1433 break;
1434 case IPPROTO_ICMP:
1435 printf("ICMP4");
1436 break;
1437 case IPPROTO_ICMPV6:
1438 printf("ICMP6");
1439 break;
1440 default:
1441 printf("PROTO=%u", sk->proto);
1442 break;
1443 }
1444 printf(" lan: ");
1445 pf_print_sk_host(&sk->lan, sk->af_lan, sk->proto,
1446 sk->proto_variant);
1447 printf(" gwy: ");
1448 pf_print_sk_host(&sk->gwy, sk->af_gwy, sk->proto,
1449 sk->proto_variant);
1450 printf(" ext_lan: ");
1451 pf_print_sk_host(&sk->ext_lan, sk->af_lan, sk->proto,
1452 sk->proto_variant);
1453 printf(" ext_gwy: ");
1454 pf_print_sk_host(&sk->ext_gwy, sk->af_gwy, sk->proto,
1455 sk->proto_variant);
1456 if (s->sync_flags & PFSTATE_FROMSYNC) {
1457 printf(" (from sync)");
1458 }
1459 printf("\n");
1460 }
1461 }
1462
1463 static __inline struct pf_state_key *
pf_insert_state_key_ext_gwy(struct pf_state_key * psk)1464 pf_insert_state_key_ext_gwy(struct pf_state_key *psk)
1465 {
1466 struct pf_state_key * ret = RB_INSERT(pf_state_tree_ext_gwy,
1467 &pf_statetbl_ext_gwy, psk);
1468 if (!ret && psk->af_lan == PF_INET6 &&
1469 psk->af_gwy == PF_INET) {
1470 pf_state_tree_ext_gwy_nat64_cnt++;
1471 }
1472 return ret;
1473 }
1474
1475 static __inline struct pf_state_key *
pf_remove_state_key_ext_gwy(struct pf_state_key * psk)1476 pf_remove_state_key_ext_gwy(struct pf_state_key *psk)
1477 {
1478 struct pf_state_key * ret = RB_REMOVE(pf_state_tree_ext_gwy,
1479 &pf_statetbl_ext_gwy, psk);
1480 if (ret && psk->af_lan == PF_INET6 &&
1481 psk->af_gwy == PF_INET) {
1482 pf_state_tree_ext_gwy_nat64_cnt--;
1483 }
1484 return ret;
1485 }
1486
1487 int
pf_insert_state(struct pfi_kif * kif,struct pf_state * s)1488 pf_insert_state(struct pfi_kif *kif, struct pf_state *s)
1489 {
1490 struct pf_state_key *cur;
1491 struct pf_state *sp;
1492
1493 VERIFY(s->state_key != NULL);
1494 s->kif = kif;
1495
1496 if ((cur = RB_INSERT(pf_state_tree_lan_ext, &pf_statetbl_lan_ext,
1497 s->state_key)) != NULL) {
1498 /* key exists. check for same kif, if none, add to key */
1499 TAILQ_FOREACH(sp, &cur->states, next)
1500 if (sp->kif == kif) { /* collision! */
1501 pf_stateins_err("tree_lan_ext", s, kif);
1502 pf_detach_state(s,
1503 PF_DT_SKIP_LANEXT | PF_DT_SKIP_EXTGWY);
1504 return -1;
1505 }
1506 pf_detach_state(s, PF_DT_SKIP_LANEXT | PF_DT_SKIP_EXTGWY);
1507 pf_attach_state(cur, s, kif == pfi_all ? 1 : 0);
1508 }
1509
1510 /* if cur != NULL, we already found a state key and attached to it */
1511 if (cur == NULL &&
1512 (cur = pf_insert_state_key_ext_gwy(s->state_key)) != NULL) {
1513 /* must not happen. we must have found the sk above! */
1514 pf_stateins_err("tree_ext_gwy", s, kif);
1515 pf_detach_state(s, PF_DT_SKIP_EXTGWY);
1516 return -1;
1517 }
1518
1519 if (s->id == 0 && s->creatorid == 0) {
1520 s->id = htobe64(pf_status.stateid++);
1521 s->creatorid = pf_status.hostid;
1522 }
1523 if (RB_INSERT(pf_state_tree_id, &tree_id, s) != NULL) {
1524 if (pf_status.debug >= PF_DEBUG_MISC) {
1525 printf("pf: state insert failed: "
1526 "id: %016llx creatorid: %08x",
1527 be64toh(s->id), ntohl(s->creatorid));
1528 if (s->sync_flags & PFSTATE_FROMSYNC) {
1529 printf(" (from sync)");
1530 }
1531 printf("\n");
1532 }
1533 pf_detach_state(s, 0);
1534 return -1;
1535 }
1536 TAILQ_INSERT_TAIL(&state_list, s, entry_list);
1537 pf_status.fcounters[FCNT_STATE_INSERT]++;
1538 pf_status.states++;
1539 VERIFY(pf_status.states != 0);
1540 pfi_kif_ref(kif, PFI_KIF_REF_STATE);
1541 #if NPFSYNC
1542 pfsync_insert_state(s);
1543 #endif
1544 return 0;
1545 }
1546
1547 static int
pf_purge_thread_cont(int err)1548 pf_purge_thread_cont(int err)
1549 {
1550 #pragma unused(err)
1551 static u_int32_t nloops = 0;
1552 int t = 1; /* 1 second */
1553
1554 /*
1555 * Update coarse-grained networking timestamp (in sec.); the idea
1556 * is to piggy-back on the periodic timeout callout to update
1557 * the counter returnable via net_uptime().
1558 */
1559 net_update_uptime();
1560
1561 lck_rw_lock_shared(&pf_perim_lock);
1562 lck_mtx_lock(&pf_lock);
1563
1564 /* purge everything if not running */
1565 if (!pf_status.running) {
1566 pf_purge_expired_states(pf_status.states);
1567 pf_purge_expired_fragments();
1568 pf_purge_expired_src_nodes();
1569
1570 /* terminate thread (we don't currently do this) */
1571 if (pf_purge_thread == NULL) {
1572 lck_mtx_unlock(&pf_lock);
1573 lck_rw_done(&pf_perim_lock);
1574
1575 thread_deallocate(current_thread());
1576 thread_terminate(current_thread());
1577 /* NOTREACHED */
1578 return 0;
1579 } else {
1580 /* if there's nothing left, sleep w/o timeout */
1581 if (pf_status.states == 0 &&
1582 pf_normalize_isempty() &&
1583 RB_EMPTY(&tree_src_tracking)) {
1584 nloops = 0;
1585 t = 0;
1586 }
1587 goto done;
1588 }
1589 }
1590
1591 /* process a fraction of the state table every second */
1592 pf_purge_expired_states(1 + (pf_status.states
1593 / pf_default_rule.timeout[PFTM_INTERVAL]));
1594
1595 /* purge other expired types every PFTM_INTERVAL seconds */
1596 if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) {
1597 pf_purge_expired_fragments();
1598 pf_purge_expired_src_nodes();
1599 nloops = 0;
1600 }
1601 done:
1602 lck_mtx_unlock(&pf_lock);
1603 lck_rw_done(&pf_perim_lock);
1604
1605 (void) tsleep0(pf_purge_thread_fn, PWAIT, "pf_purge_cont",
1606 t * hz, pf_purge_thread_cont);
1607 /* NOTREACHED */
1608 VERIFY(0);
1609
1610 return 0;
1611 }
1612
1613 void
pf_purge_thread_fn(void * v,wait_result_t w)1614 pf_purge_thread_fn(void *v, wait_result_t w)
1615 {
1616 #pragma unused(v, w)
1617 (void) tsleep0(pf_purge_thread_fn, PWAIT, "pf_purge", 0,
1618 pf_purge_thread_cont);
1619 /*
1620 * tsleep0() shouldn't have returned as PCATCH was not set;
1621 * therefore assert in this case.
1622 */
1623 VERIFY(0);
1624 }
1625
1626 u_int64_t
pf_state_expires(const struct pf_state * state)1627 pf_state_expires(const struct pf_state *state)
1628 {
1629 u_int32_t t;
1630 u_int32_t start;
1631 u_int32_t end;
1632 u_int32_t states;
1633
1634 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1635
1636 /* handle all PFTM_* > PFTM_MAX here */
1637 if (state->timeout == PFTM_PURGE) {
1638 return pf_time_second();
1639 }
1640
1641 VERIFY(state->timeout != PFTM_UNLINKED);
1642 VERIFY(state->timeout < PFTM_MAX);
1643 t = state->rule.ptr->timeout[state->timeout];
1644 if (!t) {
1645 t = pf_default_rule.timeout[state->timeout];
1646 }
1647 start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START];
1648 if (start) {
1649 end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END];
1650 states = state->rule.ptr->states;
1651 } else {
1652 start = pf_default_rule.timeout[PFTM_ADAPTIVE_START];
1653 end = pf_default_rule.timeout[PFTM_ADAPTIVE_END];
1654 states = pf_status.states;
1655 }
1656 if (end && states > start && start < end) {
1657 if (states < end) {
1658 return state->expire + t * (end - states) /
1659 (end - start);
1660 } else {
1661 return pf_time_second();
1662 }
1663 }
1664 return state->expire + t;
1665 }
1666
1667 void
pf_purge_expired_src_nodes(void)1668 pf_purge_expired_src_nodes(void)
1669 {
1670 struct pf_src_node *cur, *next;
1671
1672 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1673
1674 for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) {
1675 next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur);
1676
1677 if (cur->states <= 0 && cur->expire <= pf_time_second()) {
1678 if (cur->rule.ptr != NULL) {
1679 cur->rule.ptr->src_nodes--;
1680 if (cur->rule.ptr->states <= 0 &&
1681 cur->rule.ptr->max_src_nodes <= 0) {
1682 pf_rm_rule(NULL, cur->rule.ptr);
1683 }
1684 }
1685 RB_REMOVE(pf_src_tree, &tree_src_tracking, cur);
1686 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
1687 pf_status.src_nodes--;
1688 pool_put(&pf_src_tree_pl, cur);
1689 }
1690 }
1691 }
1692
1693 void
pf_src_tree_remove_state(struct pf_state * s)1694 pf_src_tree_remove_state(struct pf_state *s)
1695 {
1696 u_int32_t t;
1697
1698 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1699
1700 if (s->src_node != NULL) {
1701 if (s->src.tcp_est) {
1702 VERIFY(s->src_node->conn > 0);
1703 --s->src_node->conn;
1704 }
1705 VERIFY(s->src_node->states > 0);
1706 if (--s->src_node->states <= 0) {
1707 t = s->rule.ptr->timeout[PFTM_SRC_NODE];
1708 if (!t) {
1709 t = pf_default_rule.timeout[PFTM_SRC_NODE];
1710 }
1711 s->src_node->expire = pf_time_second() + t;
1712 }
1713 }
1714 if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
1715 VERIFY(s->nat_src_node->states > 0);
1716 if (--s->nat_src_node->states <= 0) {
1717 t = s->rule.ptr->timeout[PFTM_SRC_NODE];
1718 if (!t) {
1719 t = pf_default_rule.timeout[PFTM_SRC_NODE];
1720 }
1721 s->nat_src_node->expire = pf_time_second() + t;
1722 }
1723 }
1724 s->src_node = s->nat_src_node = NULL;
1725 }
1726
1727 void
pf_unlink_state(struct pf_state * cur)1728 pf_unlink_state(struct pf_state *cur)
1729 {
1730 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1731
1732 if (cur->src.state == PF_TCPS_PROXY_DST) {
1733 pf_send_tcp(cur->rule.ptr, cur->state_key->af_lan,
1734 &cur->state_key->ext_lan.addr, &cur->state_key->lan.addr,
1735 cur->state_key->ext_lan.xport.port,
1736 cur->state_key->lan.xport.port,
1737 cur->src.seqhi, cur->src.seqlo + 1,
1738 TH_RST | TH_ACK, 0, 0, 0, 1, cur->tag, NULL, NULL);
1739 }
1740
1741 hook_runloop(&cur->unlink_hooks, HOOK_REMOVE | HOOK_FREE);
1742 RB_REMOVE(pf_state_tree_id, &tree_id, cur);
1743 #if NPFSYNC
1744 if (cur->creatorid == pf_status.hostid) {
1745 pfsync_delete_state(cur);
1746 }
1747 #endif
1748 cur->timeout = PFTM_UNLINKED;
1749 pf_src_tree_remove_state(cur);
1750 pf_detach_state(cur, 0);
1751 }
1752
1753 /* callers should be at splpf and hold the
1754 * write_lock on pf_consistency_lock */
1755 void
pf_free_state(struct pf_state * cur)1756 pf_free_state(struct pf_state *cur)
1757 {
1758 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1759 #if NPFSYNC
1760 if (pfsyncif != NULL &&
1761 (pfsyncif->sc_bulk_send_next == cur ||
1762 pfsyncif->sc_bulk_terminator == cur)) {
1763 return;
1764 }
1765 #endif
1766 VERIFY(cur->timeout == PFTM_UNLINKED);
1767 VERIFY(cur->rule.ptr->states > 0);
1768 if (--cur->rule.ptr->states <= 0 &&
1769 cur->rule.ptr->src_nodes <= 0) {
1770 pf_rm_rule(NULL, cur->rule.ptr);
1771 }
1772 if (cur->nat_rule.ptr != NULL) {
1773 VERIFY(cur->nat_rule.ptr->states > 0);
1774 if (--cur->nat_rule.ptr->states <= 0 &&
1775 cur->nat_rule.ptr->src_nodes <= 0) {
1776 pf_rm_rule(NULL, cur->nat_rule.ptr);
1777 }
1778 }
1779 if (cur->anchor.ptr != NULL) {
1780 VERIFY(cur->anchor.ptr->states > 0);
1781 if (--cur->anchor.ptr->states <= 0) {
1782 pf_rm_rule(NULL, cur->anchor.ptr);
1783 }
1784 }
1785 pf_normalize_tcp_cleanup(cur);
1786 pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE);
1787 TAILQ_REMOVE(&state_list, cur, entry_list);
1788 if (cur->tag) {
1789 pf_tag_unref(cur->tag);
1790 }
1791 #if SKYWALK
1792 netns_release(&cur->nstoken);
1793 #endif
1794 pool_put(&pf_state_pl, cur);
1795 pf_status.fcounters[FCNT_STATE_REMOVALS]++;
1796 VERIFY(pf_status.states > 0);
1797 pf_status.states--;
1798 }
1799
1800 void
pf_purge_expired_states(u_int32_t maxcheck)1801 pf_purge_expired_states(u_int32_t maxcheck)
1802 {
1803 static struct pf_state *cur = NULL;
1804 struct pf_state *next;
1805
1806 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1807
1808 while (maxcheck--) {
1809 /* wrap to start of list when we hit the end */
1810 if (cur == NULL) {
1811 cur = TAILQ_FIRST(&state_list);
1812 if (cur == NULL) {
1813 break; /* list empty */
1814 }
1815 }
1816
1817 /* get next state, as cur may get deleted */
1818 next = TAILQ_NEXT(cur, entry_list);
1819
1820 if (cur->timeout == PFTM_UNLINKED) {
1821 pf_free_state(cur);
1822 } else if (pf_state_expires(cur) <= pf_time_second()) {
1823 /* unlink and free expired state */
1824 pf_unlink_state(cur);
1825 pf_free_state(cur);
1826 }
1827 cur = next;
1828 }
1829 }
1830
1831 int
pf_tbladdr_setup(struct pf_ruleset * rs,struct pf_addr_wrap * aw)1832 pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw)
1833 {
1834 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1835
1836 if (aw->type != PF_ADDR_TABLE) {
1837 return 0;
1838 }
1839 if ((aw->p.tbl = pfr_attach_table(rs, __unsafe_null_terminated_from_indexable(aw->v.tblname))) == NULL) {
1840 return 1;
1841 }
1842 return 0;
1843 }
1844
1845 void
pf_tbladdr_remove(struct pf_addr_wrap * aw)1846 pf_tbladdr_remove(struct pf_addr_wrap *aw)
1847 {
1848 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1849
1850 if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL) {
1851 return;
1852 }
1853 pfr_detach_table(aw->p.tbl);
1854 aw->p.tbl = NULL;
1855 }
1856
1857 void
pf_tbladdr_copyout(struct pf_addr_wrap * aw)1858 pf_tbladdr_copyout(struct pf_addr_wrap *aw)
1859 {
1860 struct pfr_ktable *kt = aw->p.tbl;
1861
1862 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1863
1864 if (aw->type != PF_ADDR_TABLE || kt == NULL) {
1865 return;
1866 }
1867 if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) {
1868 kt = kt->pfrkt_root;
1869 }
1870 aw->p.tbl = NULL;
1871 aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ?
1872 kt->pfrkt_cnt : -1;
1873 }
1874
1875 static void
pf_print_addr(struct pf_addr * addr,sa_family_t af)1876 pf_print_addr(struct pf_addr *addr, sa_family_t af)
1877 {
1878 switch (af) {
1879 #if INET
1880 case AF_INET: {
1881 u_int32_t a = ntohl(addr->addr32[0]);
1882 printf("%u.%u.%u.%u", (a >> 24) & 255, (a >> 16) & 255,
1883 (a >> 8) & 255, a & 255);
1884 break;
1885 }
1886 #endif /* INET */
1887 case AF_INET6: {
1888 u_int16_t b;
1889 u_int8_t i, curstart = 255, curend = 0,
1890 maxstart = 0, maxend = 0;
1891 for (i = 0; i < 8; i++) {
1892 if (!addr->addr16[i]) {
1893 if (curstart == 255) {
1894 curstart = i;
1895 } else {
1896 curend = i;
1897 }
1898 } else {
1899 if (curstart) {
1900 if ((curend - curstart) >
1901 (maxend - maxstart)) {
1902 maxstart = curstart;
1903 maxend = curend;
1904 curstart = 255;
1905 }
1906 }
1907 }
1908 }
1909 for (i = 0; i < 8; i++) {
1910 if (i >= maxstart && i <= maxend) {
1911 if (maxend != 7) {
1912 if (i == maxstart) {
1913 printf(":");
1914 }
1915 } else {
1916 if (i == maxend) {
1917 printf(":");
1918 }
1919 }
1920 } else {
1921 b = ntohs(addr->addr16[i]);
1922 printf("%x", b);
1923 if (i < 7) {
1924 printf(":");
1925 }
1926 }
1927 }
1928 break;
1929 }
1930 }
1931 }
1932
1933 static void
pf_print_sk_host(struct pf_state_host * sh,sa_family_t af,int proto,u_int8_t proto_variant)1934 pf_print_sk_host(struct pf_state_host *sh, sa_family_t af, int proto,
1935 u_int8_t proto_variant)
1936 {
1937 pf_print_addr(&sh->addr, af);
1938
1939 switch (proto) {
1940 case IPPROTO_ESP:
1941 if (sh->xport.spi) {
1942 printf("[%08x]", ntohl(sh->xport.spi));
1943 }
1944 break;
1945
1946 case IPPROTO_GRE:
1947 if (proto_variant == PF_GRE_PPTP_VARIANT) {
1948 printf("[%u]", ntohs(sh->xport.call_id));
1949 }
1950 break;
1951
1952 case IPPROTO_TCP:
1953 case IPPROTO_UDP:
1954 printf("[%u]", ntohs(sh->xport.port));
1955 break;
1956
1957 default:
1958 break;
1959 }
1960 }
1961
1962 static void
pf_print_host(struct pf_addr * addr,u_int16_t p,sa_family_t af)1963 pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
1964 {
1965 pf_print_addr(addr, af);
1966 if (p) {
1967 printf("[%u]", ntohs(p));
1968 }
1969 }
1970
1971 void
pf_print_state(struct pf_state * s)1972 pf_print_state(struct pf_state *s)
1973 {
1974 struct pf_state_key *sk = s->state_key;
1975 switch (sk->proto) {
1976 case IPPROTO_ESP:
1977 printf("ESP ");
1978 break;
1979 case IPPROTO_GRE:
1980 printf("GRE%u ", sk->proto_variant);
1981 break;
1982 case IPPROTO_TCP:
1983 printf("TCP ");
1984 break;
1985 case IPPROTO_UDP:
1986 printf("UDP ");
1987 break;
1988 case IPPROTO_ICMP:
1989 printf("ICMP ");
1990 break;
1991 case IPPROTO_ICMPV6:
1992 printf("ICMPV6 ");
1993 break;
1994 default:
1995 printf("%u ", sk->proto);
1996 break;
1997 }
1998 pf_print_sk_host(&sk->lan, sk->af_lan, sk->proto, sk->proto_variant);
1999 printf(" ");
2000 pf_print_sk_host(&sk->gwy, sk->af_gwy, sk->proto, sk->proto_variant);
2001 printf(" ");
2002 pf_print_sk_host(&sk->ext_lan, sk->af_lan, sk->proto,
2003 sk->proto_variant);
2004 printf(" ");
2005 pf_print_sk_host(&sk->ext_gwy, sk->af_gwy, sk->proto,
2006 sk->proto_variant);
2007 printf(" [lo=%u high=%u win=%u modulator=%u", s->src.seqlo,
2008 s->src.seqhi, s->src.max_win, s->src.seqdiff);
2009 if (s->src.wscale && s->dst.wscale) {
2010 printf(" wscale=%u", s->src.wscale & PF_WSCALE_MASK);
2011 }
2012 printf("]");
2013 printf(" [lo=%u high=%u win=%u modulator=%u", s->dst.seqlo,
2014 s->dst.seqhi, s->dst.max_win, s->dst.seqdiff);
2015 if (s->src.wscale && s->dst.wscale) {
2016 printf(" wscale=%u", s->dst.wscale & PF_WSCALE_MASK);
2017 }
2018 printf("]");
2019 printf(" %u:%u", s->src.state, s->dst.state);
2020 }
2021
2022 void
pf_print_flags(u_int8_t f)2023 pf_print_flags(u_int8_t f)
2024 {
2025 if (f) {
2026 printf(" ");
2027 }
2028 if (f & TH_FIN) {
2029 printf("F");
2030 }
2031 if (f & TH_SYN) {
2032 printf("S");
2033 }
2034 if (f & TH_RST) {
2035 printf("R");
2036 }
2037 if (f & TH_PUSH) {
2038 printf("P");
2039 }
2040 if (f & TH_ACK) {
2041 printf("A");
2042 }
2043 if (f & TH_URG) {
2044 printf("U");
2045 }
2046 if (f & TH_ECE) {
2047 printf("E");
2048 }
2049 if (f & TH_CWR) {
2050 printf("W");
2051 }
2052 }
2053
2054 #define PF_SET_SKIP_STEPS(i) \
2055 do { \
2056 while (head[i] != cur) { \
2057 head[i]->skip[i].ptr = cur; \
2058 head[i] = TAILQ_NEXT(head[i], entries); \
2059 } \
2060 } while (0)
2061
2062 void
pf_calc_skip_steps(struct pf_rulequeue * rules)2063 pf_calc_skip_steps(struct pf_rulequeue *rules)
2064 {
2065 struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT];
2066 int i;
2067
2068 cur = TAILQ_FIRST(rules);
2069 prev = cur;
2070 for (i = 0; i < PF_SKIP_COUNT; ++i) {
2071 head[i] = cur;
2072 }
2073 while (cur != NULL) {
2074 if (cur->kif != prev->kif || cur->ifnot != prev->ifnot) {
2075 PF_SET_SKIP_STEPS(PF_SKIP_IFP);
2076 }
2077 if (cur->direction != prev->direction) {
2078 PF_SET_SKIP_STEPS(PF_SKIP_DIR);
2079 }
2080 if (cur->af != prev->af) {
2081 PF_SET_SKIP_STEPS(PF_SKIP_AF);
2082 }
2083 if (cur->proto != prev->proto) {
2084 PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
2085 }
2086 if (cur->src.neg != prev->src.neg ||
2087 pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr)) {
2088 PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
2089 }
2090 {
2091 union pf_rule_xport *cx = &cur->src.xport;
2092 union pf_rule_xport *px = &prev->src.xport;
2093
2094 switch (cur->proto) {
2095 case IPPROTO_GRE:
2096 case IPPROTO_ESP:
2097 PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
2098 break;
2099 default:
2100 if (prev->proto == IPPROTO_GRE ||
2101 prev->proto == IPPROTO_ESP ||
2102 cx->range.op != px->range.op ||
2103 cx->range.port[0] != px->range.port[0] ||
2104 cx->range.port[1] != px->range.port[1]) {
2105 PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
2106 }
2107 break;
2108 }
2109 }
2110 if (cur->dst.neg != prev->dst.neg ||
2111 pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr)) {
2112 PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
2113 }
2114 {
2115 union pf_rule_xport *cx = &cur->dst.xport;
2116 union pf_rule_xport *px = &prev->dst.xport;
2117
2118 switch (cur->proto) {
2119 case IPPROTO_GRE:
2120 if (cur->proto != prev->proto ||
2121 cx->call_id != px->call_id) {
2122 PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
2123 }
2124 break;
2125 case IPPROTO_ESP:
2126 if (cur->proto != prev->proto ||
2127 cx->spi != px->spi) {
2128 PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
2129 }
2130 break;
2131 default:
2132 if (prev->proto == IPPROTO_GRE ||
2133 prev->proto == IPPROTO_ESP ||
2134 cx->range.op != px->range.op ||
2135 cx->range.port[0] != px->range.port[0] ||
2136 cx->range.port[1] != px->range.port[1]) {
2137 PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
2138 }
2139 break;
2140 }
2141 }
2142
2143 prev = cur;
2144 cur = TAILQ_NEXT(cur, entries);
2145 }
2146 for (i = 0; i < PF_SKIP_COUNT; ++i) {
2147 PF_SET_SKIP_STEPS(i);
2148 }
2149 }
2150
2151 u_int32_t
pf_calc_state_key_flowhash(struct pf_state_key * sk)2152 pf_calc_state_key_flowhash(struct pf_state_key *sk)
2153 {
2154 #if SKYWALK
2155 uint32_t flowid;
2156 struct flowidns_flow_key fk;
2157
2158 VERIFY(sk->flowsrc == FLOWSRC_PF);
2159 bzero(&fk, sizeof(fk));
2160 _CASSERT(sizeof(sk->lan.addr) == sizeof(fk.ffk_laddr));
2161 _CASSERT(sizeof(sk->ext_lan.addr) == sizeof(fk.ffk_laddr));
2162 bcopy(&sk->lan.addr, &fk.ffk_laddr, sizeof(fk.ffk_laddr));
2163 bcopy(&sk->ext_lan.addr, &fk.ffk_raddr, sizeof(fk.ffk_raddr));
2164 fk.ffk_af = sk->af_lan;
2165 fk.ffk_proto = sk->proto;
2166
2167 switch (sk->proto) {
2168 case IPPROTO_ESP:
2169 case IPPROTO_AH:
2170 fk.ffk_spi = sk->lan.xport.spi;
2171 break;
2172 default:
2173 if (sk->lan.xport.spi <= sk->ext_lan.xport.spi) {
2174 fk.ffk_lport = sk->lan.xport.port;
2175 fk.ffk_rport = sk->ext_lan.xport.port;
2176 } else {
2177 fk.ffk_lport = sk->ext_lan.xport.port;
2178 fk.ffk_rport = sk->lan.xport.port;
2179 }
2180 break;
2181 }
2182
2183 flowidns_allocate_flowid(FLOWIDNS_DOMAIN_PF, &fk, &flowid);
2184 return flowid;
2185
2186 #else /* !SKYWALK */
2187
2188 struct pf_flowhash_key fh __attribute__((aligned(8)));
2189 uint32_t flowhash = 0;
2190
2191 bzero(&fh, sizeof(fh));
2192 if (PF_ALEQ(&sk->lan.addr, &sk->ext_lan.addr, sk->af_lan)) {
2193 bcopy(&sk->lan.addr, &fh.ap1.addr, sizeof(fh.ap1.addr));
2194 bcopy(&sk->ext_lan.addr, &fh.ap2.addr, sizeof(fh.ap2.addr));
2195 } else {
2196 bcopy(&sk->ext_lan.addr, &fh.ap1.addr, sizeof(fh.ap1.addr));
2197 bcopy(&sk->lan.addr, &fh.ap2.addr, sizeof(fh.ap2.addr));
2198 }
2199 if (sk->lan.xport.spi <= sk->ext_lan.xport.spi) {
2200 fh.ap1.xport.spi = sk->lan.xport.spi;
2201 fh.ap2.xport.spi = sk->ext_lan.xport.spi;
2202 } else {
2203 fh.ap1.xport.spi = sk->ext_lan.xport.spi;
2204 fh.ap2.xport.spi = sk->lan.xport.spi;
2205 }
2206 fh.af = sk->af_lan;
2207 fh.proto = sk->proto;
2208
2209 try_again:
2210 flowhash = net_flowhash(&fh, sizeof(fh), pf_hash_seed);
2211 if (flowhash == 0) {
2212 /* try to get a non-zero flowhash */
2213 pf_hash_seed = RandomULong();
2214 goto try_again;
2215 }
2216
2217 return flowhash;
2218
2219 #endif /* !SKYWALK */
2220 }
2221
2222 static int
pf_addr_wrap_neq(struct pf_addr_wrap * aw1,struct pf_addr_wrap * aw2)2223 pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
2224 {
2225 if (aw1->type != aw2->type) {
2226 return 1;
2227 }
2228 switch (aw1->type) {
2229 case PF_ADDR_ADDRMASK:
2230 case PF_ADDR_RANGE:
2231 if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, AF_INET6)) {
2232 return 1;
2233 }
2234 if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, AF_INET6)) {
2235 return 1;
2236 }
2237 return 0;
2238 case PF_ADDR_DYNIFTL:
2239 return aw1->p.dyn == NULL || aw2->p.dyn == NULL ||
2240 aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt;
2241 case PF_ADDR_NOROUTE:
2242 case PF_ADDR_URPFFAILED:
2243 return 0;
2244 case PF_ADDR_TABLE:
2245 return aw1->p.tbl != aw2->p.tbl;
2246 case PF_ADDR_RTLABEL:
2247 return aw1->v.rtlabel != aw2->v.rtlabel;
2248 default:
2249 printf("invalid address type: %d\n", aw1->type);
2250 return 1;
2251 }
2252 }
2253
2254 u_int16_t
pf_cksum_fixup(u_int16_t cksum,u_int16_t old,u_int16_t new,u_int8_t udp)2255 pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
2256 {
2257 return nat464_cksum_fixup(cksum, old, new, udp);
2258 }
2259
2260 /*
2261 * change ip address & port
2262 * dir : packet direction
2263 * a : address to be changed
2264 * p : port to be changed
2265 * ic : ip header checksum
2266 * pc : protocol checksum
2267 * an : new ip address
2268 * pn : new port
2269 * u : should be 1 if UDP packet else 0
2270 * af : address family of the packet
2271 * afn : address family of the new address
2272 * ua : should be 1 if ip address needs to be updated in the packet else
2273 * only the checksum is recalculated & updated.
2274 */
2275 static __attribute__((noinline)) void
pf_change_ap(int dir,pbuf_t * pbuf,struct pf_addr * a,u_int16_t * p,u_int16_t * ic,u_int16_t * pc,struct pf_addr * an,u_int16_t pn,u_int8_t u,sa_family_t af,sa_family_t afn,int ua)2276 pf_change_ap(int dir, pbuf_t *pbuf, struct pf_addr *a, u_int16_t *p,
2277 u_int16_t *ic, u_int16_t *pc, struct pf_addr *an, u_int16_t pn,
2278 u_int8_t u, sa_family_t af, sa_family_t afn, int ua)
2279 {
2280 struct pf_addr ao;
2281 u_int16_t po = *p;
2282
2283 PF_ACPY(&ao, a, af);
2284 if (ua) {
2285 PF_ACPY(a, an, afn);
2286 }
2287
2288 *p = pn;
2289
2290 switch (af) {
2291 #if INET
2292 case AF_INET:
2293 switch (afn) {
2294 case AF_INET:
2295 *ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
2296 ao.addr16[0], an->addr16[0], 0),
2297 ao.addr16[1], an->addr16[1], 0);
2298 *p = pn;
2299 /*
2300 * If the packet is originated from an ALG on the NAT gateway
2301 * (source address is loopback or local), in which case the
2302 * TCP/UDP checksum field contains the pseudo header checksum
2303 * that's not yet complemented.
2304 * In that case we do not need to fixup the checksum for port
2305 * translation as the pseudo header checksum doesn't include ports.
2306 *
2307 * A packet generated locally will have UDP/TCP CSUM flag
2308 * set (gets set in protocol output).
2309 *
2310 * It should be noted that the fixup doesn't do anything if the
2311 * checksum is 0.
2312 */
2313 if (dir == PF_OUT && pbuf != NULL &&
2314 (*pbuf->pb_csum_flags & (CSUM_TCP | CSUM_UDP))) {
2315 /* Pseudo-header checksum does not include ports */
2316 *pc = ~pf_cksum_fixup(pf_cksum_fixup(~*pc,
2317 ao.addr16[0], an->addr16[0], u),
2318 ao.addr16[1], an->addr16[1], u);
2319 } else {
2320 *pc =
2321 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2322 *pc, ao.addr16[0], an->addr16[0], u),
2323 ao.addr16[1], an->addr16[1], u),
2324 po, pn, u);
2325 }
2326 break;
2327 case AF_INET6:
2328 *p = pn;
2329 *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2330 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2331
2332 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
2333 ao.addr16[0], an->addr16[0], u),
2334 ao.addr16[1], an->addr16[1], u),
2335 0, an->addr16[2], u),
2336 0, an->addr16[3], u),
2337 0, an->addr16[4], u),
2338 0, an->addr16[5], u),
2339 0, an->addr16[6], u),
2340 0, an->addr16[7], u),
2341 po, pn, u);
2342 break;
2343 }
2344 break;
2345 #endif /* INET */
2346 case AF_INET6:
2347 switch (afn) {
2348 case AF_INET6:
2349 /*
2350 * If the packet is originated from an ALG on the NAT gateway
2351 * (source address is loopback or local), in which case the
2352 * TCP/UDP checksum field contains the pseudo header checksum
2353 * that's not yet complemented.
2354 * A packet generated locally
2355 * will have UDP/TCP CSUM flag set (gets set in protocol
2356 * output).
2357 */
2358 if (dir == PF_OUT && pbuf != NULL &&
2359 (*pbuf->pb_csum_flags & (CSUM_TCPIPV6 |
2360 CSUM_UDPIPV6))) {
2361 /* Pseudo-header checksum does not include ports */
2362 *pc =
2363 ~pf_cksum_fixup(pf_cksum_fixup(
2364 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2365 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2366 ~*pc,
2367 ao.addr16[0], an->addr16[0], u),
2368 ao.addr16[1], an->addr16[1], u),
2369 ao.addr16[2], an->addr16[2], u),
2370 ao.addr16[3], an->addr16[3], u),
2371 ao.addr16[4], an->addr16[4], u),
2372 ao.addr16[5], an->addr16[5], u),
2373 ao.addr16[6], an->addr16[6], u),
2374 ao.addr16[7], an->addr16[7], u);
2375 } else {
2376 *pc =
2377 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2378 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2379 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2380 *pc,
2381 ao.addr16[0], an->addr16[0], u),
2382 ao.addr16[1], an->addr16[1], u),
2383 ao.addr16[2], an->addr16[2], u),
2384 ao.addr16[3], an->addr16[3], u),
2385 ao.addr16[4], an->addr16[4], u),
2386 ao.addr16[5], an->addr16[5], u),
2387 ao.addr16[6], an->addr16[6], u),
2388 ao.addr16[7], an->addr16[7], u),
2389 po, pn, u);
2390 }
2391 break;
2392 #ifdef INET
2393 case AF_INET:
2394 *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2395 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2396 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
2397 ao.addr16[0], an->addr16[0], u),
2398 ao.addr16[1], an->addr16[1], u),
2399 ao.addr16[2], 0, u),
2400 ao.addr16[3], 0, u),
2401 ao.addr16[4], 0, u),
2402 ao.addr16[5], 0, u),
2403 ao.addr16[6], 0, u),
2404 ao.addr16[7], 0, u),
2405 po, pn, u);
2406 break;
2407 #endif /* INET */
2408 }
2409 break;
2410 }
2411 }
2412
2413
2414 /* Changes a u_int32_t. Uses a void * so there are no align restrictions */
2415 void
pf_change_a(void * a,u_int16_t * c,u_int32_t an,u_int8_t u)2416 pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u)
2417 {
2418 u_int32_t ao;
2419
2420 memcpy(&ao, (uint32_t *)a, sizeof(ao));
2421 memcpy((uint32_t *)a, &an, sizeof(u_int32_t));
2422 *c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u),
2423 ao % 65536, an % 65536, u);
2424 }
2425
2426 static __attribute__((noinline)) void
pf_change_a6(struct pf_addr * a,u_int16_t * c,struct pf_addr * an,u_int8_t u)2427 pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
2428 {
2429 struct pf_addr ao;
2430
2431 PF_ACPY(&ao, a, AF_INET6);
2432 PF_ACPY(a, an, AF_INET6);
2433
2434 *c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2435 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2436 pf_cksum_fixup(pf_cksum_fixup(*c,
2437 ao.addr16[0], an->addr16[0], u),
2438 ao.addr16[1], an->addr16[1], u),
2439 ao.addr16[2], an->addr16[2], u),
2440 ao.addr16[3], an->addr16[3], u),
2441 ao.addr16[4], an->addr16[4], u),
2442 ao.addr16[5], an->addr16[5], u),
2443 ao.addr16[6], an->addr16[6], u),
2444 ao.addr16[7], an->addr16[7], u);
2445 }
2446
2447 static __attribute__((noinline)) void
pf_change_addr(struct pf_addr * a,u_int16_t * c,struct pf_addr * an,u_int8_t u,sa_family_t af,sa_family_t afn)2448 pf_change_addr(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u,
2449 sa_family_t af, sa_family_t afn)
2450 {
2451 struct pf_addr ao;
2452
2453 if (af != afn) {
2454 PF_ACPY(&ao, a, af);
2455 PF_ACPY(a, an, afn);
2456 }
2457
2458 switch (af) {
2459 case AF_INET:
2460 switch (afn) {
2461 case AF_INET:
2462 pf_change_a(a, c, an->v4addr.s_addr, u);
2463 break;
2464 case AF_INET6:
2465 *c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2466 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2467 pf_cksum_fixup(pf_cksum_fixup(*c,
2468 ao.addr16[0], an->addr16[0], u),
2469 ao.addr16[1], an->addr16[1], u),
2470 0, an->addr16[2], u),
2471 0, an->addr16[3], u),
2472 0, an->addr16[4], u),
2473 0, an->addr16[5], u),
2474 0, an->addr16[6], u),
2475 0, an->addr16[7], u);
2476 break;
2477 }
2478 break;
2479 case AF_INET6:
2480 switch (afn) {
2481 case AF_INET:
2482 *c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2483 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2484 pf_cksum_fixup(pf_cksum_fixup(*c,
2485 ao.addr16[0], an->addr16[0], u),
2486 ao.addr16[1], an->addr16[1], u),
2487 ao.addr16[2], 0, u),
2488 ao.addr16[3], 0, u),
2489 ao.addr16[4], 0, u),
2490 ao.addr16[5], 0, u),
2491 ao.addr16[6], 0, u),
2492 ao.addr16[7], 0, u);
2493 break;
2494 case AF_INET6:
2495 pf_change_a6(a, c, an, u);
2496 break;
2497 }
2498 break;
2499 }
2500 }
2501
2502 static __attribute__((noinline)) void
pf_change_icmp(struct pf_addr * ia,u_int16_t * ip,struct pf_addr * oa,struct pf_addr * na,u_int16_t np,u_int16_t * pc,u_int16_t * h2c,u_int16_t * ic,u_int16_t * hc,u_int8_t u,sa_family_t af)2503 pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
2504 struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c,
2505 u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af)
2506 {
2507 struct pf_addr oia, ooa;
2508
2509 PF_ACPY(&oia, ia, af);
2510 PF_ACPY(&ooa, oa, af);
2511
2512 /* Change inner protocol port, fix inner protocol checksum. */
2513 if (ip != NULL) {
2514 u_int16_t oip = *ip;
2515 u_int32_t opc = 0;
2516
2517 if (pc != NULL) {
2518 opc = *pc;
2519 }
2520 *ip = np;
2521 if (pc != NULL) {
2522 *pc = pf_cksum_fixup(*pc, oip, *ip, u);
2523 }
2524 *ic = pf_cksum_fixup(*ic, oip, *ip, 0);
2525 if (pc != NULL) {
2526 *ic = pf_cksum_fixup(*ic, opc, *pc, 0);
2527 }
2528 }
2529 /* Change inner ip address, fix inner ip and icmp checksums. */
2530 PF_ACPY(ia, na, af);
2531 switch (af) {
2532 #if INET
2533 case AF_INET: {
2534 u_int32_t oh2c = *h2c;
2535
2536 *h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c,
2537 oia.addr16[0], ia->addr16[0], 0),
2538 oia.addr16[1], ia->addr16[1], 0);
2539 *ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
2540 oia.addr16[0], ia->addr16[0], 0),
2541 oia.addr16[1], ia->addr16[1], 0);
2542 *ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0);
2543 break;
2544 }
2545 #endif /* INET */
2546 case AF_INET6:
2547 *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2548 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2549 pf_cksum_fixup(pf_cksum_fixup(*ic,
2550 oia.addr16[0], ia->addr16[0], u),
2551 oia.addr16[1], ia->addr16[1], u),
2552 oia.addr16[2], ia->addr16[2], u),
2553 oia.addr16[3], ia->addr16[3], u),
2554 oia.addr16[4], ia->addr16[4], u),
2555 oia.addr16[5], ia->addr16[5], u),
2556 oia.addr16[6], ia->addr16[6], u),
2557 oia.addr16[7], ia->addr16[7], u);
2558 break;
2559 }
2560 /* Change outer ip address, fix outer ip or icmpv6 checksum. */
2561 PF_ACPY(oa, na, af);
2562 switch (af) {
2563 #if INET
2564 case AF_INET:
2565 *hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
2566 ooa.addr16[0], oa->addr16[0], 0),
2567 ooa.addr16[1], oa->addr16[1], 0);
2568 break;
2569 #endif /* INET */
2570 case AF_INET6:
2571 *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2572 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2573 pf_cksum_fixup(pf_cksum_fixup(*ic,
2574 ooa.addr16[0], oa->addr16[0], u),
2575 ooa.addr16[1], oa->addr16[1], u),
2576 ooa.addr16[2], oa->addr16[2], u),
2577 ooa.addr16[3], oa->addr16[3], u),
2578 ooa.addr16[4], oa->addr16[4], u),
2579 ooa.addr16[5], oa->addr16[5], u),
2580 ooa.addr16[6], oa->addr16[6], u),
2581 ooa.addr16[7], oa->addr16[7], u);
2582 break;
2583 }
2584 }
2585
2586
2587 /*
2588 * Need to modulate the sequence numbers in the TCP SACK option
2589 * (credits to Krzysztof Pfaff for report and patch)
2590 */
2591 static __attribute__((noinline)) int
pf_modulate_sack(pbuf_t * pbuf,int off,struct pf_pdesc * pd,struct tcphdr * th,struct pf_state_peer * dst)2592 pf_modulate_sack(pbuf_t *pbuf, int off, struct pf_pdesc *pd,
2593 struct tcphdr *th, struct pf_state_peer *dst)
2594 {
2595 int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen;
2596 u_int8_t opts[MAX_TCPOPTLEN], *opt = opts;
2597 int copyback = 0, i, olen;
2598 struct sackblk sack;
2599
2600 #define TCPOLEN_SACKLEN (TCPOLEN_SACK + 2)
2601 if (hlen < TCPOLEN_SACKLEN ||
2602 !pf_pull_hdr(pbuf, off + sizeof(*th), opts, sizeof(opts), hlen, NULL, NULL, pd->af)) {
2603 return 0;
2604 }
2605
2606 while (hlen >= TCPOLEN_SACKLEN) {
2607 olen = opt[1];
2608 switch (*opt) {
2609 case TCPOPT_EOL: /* FALLTHROUGH */
2610 case TCPOPT_NOP:
2611 opt++;
2612 hlen--;
2613 break;
2614 case TCPOPT_SACK:
2615 if (olen > hlen) {
2616 olen = hlen;
2617 }
2618 if (olen >= TCPOLEN_SACKLEN) {
2619 for (i = 2; i + TCPOLEN_SACK <= olen;
2620 i += TCPOLEN_SACK) {
2621 memcpy(&sack, &opt[i], sizeof(sack));
2622 pf_change_a(&sack.start, &th->th_sum,
2623 htonl(ntohl(sack.start) -
2624 dst->seqdiff), 0);
2625 pf_change_a(&sack.end, &th->th_sum,
2626 htonl(ntohl(sack.end) -
2627 dst->seqdiff), 0);
2628 memcpy(&opt[i], &sack, sizeof(sack));
2629 }
2630 copyback = off + sizeof(*th) + thoptlen;
2631 }
2632 OS_FALLTHROUGH;
2633 default:
2634 if (olen < 2) {
2635 olen = 2;
2636 }
2637 hlen -= olen;
2638 opt += olen;
2639 }
2640 }
2641
2642 if (copyback) {
2643 if (pf_lazy_makewritable(pd, pbuf, copyback) == NULL) {
2644 return -1;
2645 }
2646 pbuf_copy_back(pbuf, off + sizeof(*th), thoptlen, opts, sizeof(opts));
2647 }
2648 return copyback;
2649 }
2650
2651 /*
2652 * XXX
2653 *
2654 * The following functions (pf_send_tcp and pf_send_icmp) are somewhat
2655 * special in that they originate "spurious" packets rather than
2656 * filter/NAT existing packets. As such, they're not a great fit for
2657 * the 'pbuf' shim, which assumes the underlying packet buffers are
2658 * allocated elsewhere.
2659 *
2660 * Since these functions are rarely used, we'll carry on allocating mbufs
2661 * and passing them to the IP stack for eventual routing.
2662 */
2663 static __attribute__((noinline)) void
pf_send_tcp(const struct pf_rule * r,sa_family_t af,const struct pf_addr * saddr,const struct pf_addr * daddr,u_int16_t sport,u_int16_t dport,u_int32_t seq,u_int32_t ack,u_int8_t flags,u_int16_t win,u_int16_t mss,u_int8_t ttl,int tag,u_int16_t rtag,struct ether_header * eh,struct ifnet * ifp)2664 pf_send_tcp(const struct pf_rule *r, sa_family_t af,
2665 const struct pf_addr *saddr, const struct pf_addr *daddr,
2666 u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
2667 u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag,
2668 u_int16_t rtag, struct ether_header *eh, struct ifnet *ifp)
2669 {
2670 #pragma unused(eh, ifp)
2671 struct mbuf *m;
2672 int len, tlen;
2673 #if INET
2674 struct ip *h = NULL;
2675 #endif /* INET */
2676 struct ip6_hdr *h6 = NULL;
2677 struct tcphdr *th = NULL;
2678 char *opt;
2679 struct pf_mtag *pf_mtag;
2680
2681 /* maximum segment size tcp option */
2682 tlen = sizeof(struct tcphdr);
2683 if (mss) {
2684 tlen += 4;
2685 }
2686
2687 switch (af) {
2688 #if INET
2689 case AF_INET:
2690 len = sizeof(struct ip) + tlen;
2691 break;
2692 #endif /* INET */
2693 case AF_INET6:
2694 len = sizeof(struct ip6_hdr) + tlen;
2695 break;
2696 default:
2697 panic("pf_send_tcp: not AF_INET or AF_INET6!");
2698 return;
2699 }
2700
2701 /* create outgoing mbuf */
2702 m = m_gethdr(M_DONTWAIT, MT_HEADER);
2703 if (m == NULL) {
2704 return;
2705 }
2706
2707 if ((pf_mtag = pf_get_mtag(m)) == NULL) {
2708 return;
2709 }
2710
2711 if (tag) {
2712 pf_mtag->pftag_flags |= PF_TAG_GENERATED;
2713 }
2714 pf_mtag->pftag_tag = rtag;
2715
2716 if (r != NULL && PF_RTABLEID_IS_VALID(r->rtableid)) {
2717 pf_mtag->pftag_rtableid = r->rtableid;
2718 }
2719
2720 #if PF_ECN
2721 /* add hints for ecn */
2722 pf_mtag->pftag_hdr = mtod(m, struct ip *);
2723 /* record address family */
2724 pf_mtag->pftag_flags &= ~(PF_TAG_HDR_INET | PF_TAG_HDR_INET6);
2725 switch (af) {
2726 #if INET
2727 case AF_INET:
2728 pf_mtag->pftag_flags |= PF_TAG_HDR_INET;
2729 break;
2730 #endif /* INET */
2731 case AF_INET6:
2732 pf_mtag->pftag_flags |= PF_TAG_HDR_INET6;
2733 break;
2734 }
2735 #endif /* PF_ECN */
2736
2737 /* indicate this is TCP */
2738 m->m_pkthdr.pkt_proto = IPPROTO_TCP;
2739
2740 /* Make sure headers are 32-bit aligned */
2741 m->m_data += max_linkhdr;
2742 m->m_pkthdr.len = m->m_len = len;
2743 m->m_pkthdr.rcvif = NULL;
2744 bzero(m_mtod_current(m), len);
2745 switch (af) {
2746 #if INET
2747 case AF_INET:
2748 h = mtod(m, struct ip *);
2749
2750 /* IP header fields included in the TCP checksum */
2751 h->ip_p = IPPROTO_TCP;
2752 h->ip_len = htons(tlen);
2753 h->ip_src.s_addr = saddr->v4addr.s_addr;
2754 h->ip_dst.s_addr = daddr->v4addr.s_addr;
2755
2756 th = (struct tcphdr *)(void *)((caddr_t)h + sizeof(struct ip));
2757 break;
2758 #endif /* INET */
2759 case AF_INET6:
2760 h6 = mtod(m, struct ip6_hdr *);
2761
2762 /* IP header fields included in the TCP checksum */
2763 h6->ip6_nxt = IPPROTO_TCP;
2764 h6->ip6_plen = htons(tlen);
2765 memcpy((void *)&h6->ip6_src, &saddr->v6addr, sizeof(struct in6_addr));
2766 memcpy((void *)&h6->ip6_dst, &daddr->v6addr, sizeof(struct in6_addr));
2767
2768 th = (struct tcphdr *)(void *)
2769 ((caddr_t)h6 + sizeof(struct ip6_hdr));
2770 break;
2771 }
2772
2773 /* TCP header */
2774 th->th_sport = sport;
2775 th->th_dport = dport;
2776 th->th_seq = htonl(seq);
2777 th->th_ack = htonl(ack);
2778 th->th_off = tlen >> 2;
2779 th->th_flags = flags;
2780 th->th_win = htons(win);
2781
2782 if (mss) {
2783 opt = (char *)(th + 1);
2784 opt[0] = TCPOPT_MAXSEG;
2785 opt[1] = 4;
2786 #if BYTE_ORDER != BIG_ENDIAN
2787 HTONS(mss);
2788 #endif
2789 bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2);
2790 }
2791
2792 switch (af) {
2793 #if INET
2794 case AF_INET: {
2795 struct route ro;
2796
2797 /* TCP checksum */
2798 th->th_sum = in_cksum(m, len);
2799
2800 /* Finish the IP header */
2801 h->ip_v = 4;
2802 h->ip_hl = sizeof(*h) >> 2;
2803 h->ip_tos = IPTOS_LOWDELAY;
2804 /*
2805 * ip_output() expects ip_len and ip_off to be in host order.
2806 */
2807 h->ip_len = len;
2808 h->ip_off = (path_mtu_discovery ? IP_DF : 0);
2809 h->ip_ttl = ttl ? ttl : ip_defttl;
2810 h->ip_sum = 0;
2811
2812 bzero(&ro, sizeof(ro));
2813 ip_output(m, NULL, &ro, 0, NULL, NULL);
2814 ROUTE_RELEASE(&ro);
2815 break;
2816 }
2817 #endif /* INET */
2818 case AF_INET6: {
2819 struct route_in6 ro6;
2820
2821 /* TCP checksum */
2822 th->th_sum = in6_cksum(m, IPPROTO_TCP,
2823 sizeof(struct ip6_hdr), tlen);
2824
2825 h6->ip6_vfc |= IPV6_VERSION;
2826 h6->ip6_hlim = IPV6_DEFHLIM;
2827
2828 ip6_output_setsrcifscope(m, IFSCOPE_UNKNOWN, NULL);
2829 ip6_output_setdstifscope(m, IFSCOPE_UNKNOWN, NULL);
2830 bzero(&ro6, sizeof(ro6));
2831 ip6_output(m, NULL, &ro6, 0, NULL, NULL, NULL);
2832 ROUTE_RELEASE(&ro6);
2833 break;
2834 }
2835 }
2836 }
2837
2838 static __attribute__((noinline)) void
pf_send_icmp(pbuf_t * pbuf,u_int8_t type,u_int8_t code,sa_family_t af,struct pf_rule * r)2839 pf_send_icmp(pbuf_t *pbuf, u_int8_t type, u_int8_t code, sa_family_t af,
2840 struct pf_rule *r)
2841 {
2842 struct mbuf *m0;
2843 struct pf_mtag *pf_mtag;
2844
2845 m0 = pbuf_clone_to_mbuf(pbuf);
2846 if (m0 == NULL) {
2847 return;
2848 }
2849
2850 if ((pf_mtag = pf_get_mtag(m0)) == NULL) {
2851 return;
2852 }
2853
2854 pf_mtag->pftag_flags |= PF_TAG_GENERATED;
2855
2856 if (PF_RTABLEID_IS_VALID(r->rtableid)) {
2857 pf_mtag->pftag_rtableid = r->rtableid;
2858 }
2859
2860 #if PF_ECN
2861 /* add hints for ecn */
2862 pf_mtag->pftag_hdr = mtod(m0, struct ip *);
2863 /* record address family */
2864 pf_mtag->pftag_flags &= ~(PF_TAG_HDR_INET | PF_TAG_HDR_INET6);
2865 switch (af) {
2866 #if INET
2867 case AF_INET:
2868 pf_mtag->pftag_flags |= PF_TAG_HDR_INET;
2869 m0->m_pkthdr.pkt_proto = IPPROTO_ICMP;
2870 break;
2871 #endif /* INET */
2872 case AF_INET6:
2873 pf_mtag->pftag_flags |= PF_TAG_HDR_INET6;
2874 m0->m_pkthdr.pkt_proto = IPPROTO_ICMPV6;
2875 break;
2876 }
2877 #endif /* PF_ECN */
2878
2879 switch (af) {
2880 #if INET
2881 case AF_INET:
2882 icmp_error(m0, type, code, 0, 0);
2883 break;
2884 #endif /* INET */
2885 case AF_INET6:
2886 icmp6_error(m0, type, code, 0);
2887 break;
2888 }
2889 }
2890
2891 /*
2892 * Return 1 if the addresses a and b match (with mask m), otherwise return 0.
2893 * If n is 0, they match if they are equal. If n is != 0, they match if they
2894 * are different.
2895 */
2896 int
pf_match_addr(u_int8_t n,struct pf_addr * a,struct pf_addr * m,struct pf_addr * b,sa_family_t af)2897 pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m,
2898 struct pf_addr *b, sa_family_t af)
2899 {
2900 int match = 0;
2901
2902 switch (af) {
2903 #if INET
2904 case AF_INET:
2905 if ((a->addr32[0] & m->addr32[0]) ==
2906 (b->addr32[0] & m->addr32[0])) {
2907 match++;
2908 }
2909 break;
2910 #endif /* INET */
2911 case AF_INET6:
2912 if (((a->addr32[0] & m->addr32[0]) ==
2913 (b->addr32[0] & m->addr32[0])) &&
2914 ((a->addr32[1] & m->addr32[1]) ==
2915 (b->addr32[1] & m->addr32[1])) &&
2916 ((a->addr32[2] & m->addr32[2]) ==
2917 (b->addr32[2] & m->addr32[2])) &&
2918 ((a->addr32[3] & m->addr32[3]) ==
2919 (b->addr32[3] & m->addr32[3]))) {
2920 match++;
2921 }
2922 break;
2923 }
2924 if (match) {
2925 if (n) {
2926 return 0;
2927 } else {
2928 return 1;
2929 }
2930 } else {
2931 if (n) {
2932 return 1;
2933 } else {
2934 return 0;
2935 }
2936 }
2937 }
2938
2939 /*
2940 * Return 1 if b <= a <= e, otherwise return 0.
2941 */
2942 int
pf_match_addr_range(struct pf_addr * b,struct pf_addr * e,struct pf_addr * a,sa_family_t af)2943 pf_match_addr_range(struct pf_addr *b, struct pf_addr *e,
2944 struct pf_addr *a, sa_family_t af)
2945 {
2946 switch (af) {
2947 #if INET
2948 case AF_INET:
2949 if ((a->addr32[0] < b->addr32[0]) ||
2950 (a->addr32[0] > e->addr32[0])) {
2951 return 0;
2952 }
2953 break;
2954 #endif /* INET */
2955 case AF_INET6: {
2956 int i;
2957
2958 /* check a >= b */
2959 for (i = 0; i < 4; ++i) {
2960 if (a->addr32[i] > b->addr32[i]) {
2961 break;
2962 } else if (a->addr32[i] < b->addr32[i]) {
2963 return 0;
2964 }
2965 }
2966 /* check a <= e */
2967 for (i = 0; i < 4; ++i) {
2968 if (a->addr32[i] < e->addr32[i]) {
2969 break;
2970 } else if (a->addr32[i] > e->addr32[i]) {
2971 return 0;
2972 }
2973 }
2974 break;
2975 }
2976 }
2977 return 1;
2978 }
2979
2980 int
pf_match(u_int8_t op,u_int32_t a1,u_int32_t a2,u_int32_t p)2981 pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
2982 {
2983 switch (op) {
2984 case PF_OP_IRG:
2985 return (p > a1) && (p < a2);
2986 case PF_OP_XRG:
2987 return (p < a1) || (p > a2);
2988 case PF_OP_RRG:
2989 return (p >= a1) && (p <= a2);
2990 case PF_OP_EQ:
2991 return p == a1;
2992 case PF_OP_NE:
2993 return p != a1;
2994 case PF_OP_LT:
2995 return p < a1;
2996 case PF_OP_LE:
2997 return p <= a1;
2998 case PF_OP_GT:
2999 return p > a1;
3000 case PF_OP_GE:
3001 return p >= a1;
3002 }
3003 return 0; /* never reached */
3004 }
3005
3006 int
pf_match_port(u_int8_t op,u_int16_t a1,u_int16_t a2,u_int16_t p)3007 pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
3008 {
3009 #if BYTE_ORDER != BIG_ENDIAN
3010 NTOHS(a1);
3011 NTOHS(a2);
3012 NTOHS(p);
3013 #endif
3014 return pf_match(op, a1, a2, p);
3015 }
3016
3017 int
pf_match_xport(u_int8_t proto,u_int8_t proto_variant,union pf_rule_xport * rx,union pf_state_xport * sx)3018 pf_match_xport(u_int8_t proto, u_int8_t proto_variant, union pf_rule_xport *rx,
3019 union pf_state_xport *sx)
3020 {
3021 int d = !0;
3022
3023 if (sx) {
3024 switch (proto) {
3025 case IPPROTO_GRE:
3026 if (proto_variant == PF_GRE_PPTP_VARIANT) {
3027 d = (rx->call_id == sx->call_id);
3028 }
3029 break;
3030
3031 case IPPROTO_ESP:
3032 d = (rx->spi == sx->spi);
3033 break;
3034
3035 case IPPROTO_TCP:
3036 case IPPROTO_UDP:
3037 case IPPROTO_ICMP:
3038 case IPPROTO_ICMPV6:
3039 if (rx->range.op) {
3040 d = pf_match_port(rx->range.op,
3041 rx->range.port[0], rx->range.port[1],
3042 sx->port);
3043 }
3044 break;
3045
3046 default:
3047 break;
3048 }
3049 }
3050
3051 return d;
3052 }
3053
3054 int
pf_match_uid(u_int8_t op,uid_t a1,uid_t a2,uid_t u)3055 pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
3056 {
3057 if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE) {
3058 return 0;
3059 }
3060 return pf_match(op, a1, a2, u);
3061 }
3062
3063 int
pf_match_gid(u_int8_t op,gid_t a1,gid_t a2,gid_t g)3064 pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
3065 {
3066 if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE) {
3067 return 0;
3068 }
3069 return pf_match(op, a1, a2, g);
3070 }
3071
3072 static int
pf_match_tag(struct pf_rule * r,struct pf_mtag * pf_mtag,int * tag)3073 pf_match_tag(struct pf_rule *r, struct pf_mtag *pf_mtag,
3074 int *tag)
3075 {
3076 if (*tag == -1) {
3077 *tag = pf_mtag->pftag_tag;
3078 }
3079
3080 return (!r->match_tag_not && r->match_tag == *tag) ||
3081 (r->match_tag_not && r->match_tag != *tag);
3082 }
3083
3084 int
pf_tag_packet(pbuf_t * pbuf,struct pf_mtag * pf_mtag,int tag,unsigned int rtableid,struct pf_pdesc * pd)3085 pf_tag_packet(pbuf_t *pbuf, struct pf_mtag *pf_mtag, int tag,
3086 unsigned int rtableid, struct pf_pdesc *pd)
3087 {
3088 if (tag <= 0 && !PF_RTABLEID_IS_VALID(rtableid) &&
3089 (pd == NULL || !(pd->pktflags & PKTF_FLOW_ID))) {
3090 return 0;
3091 }
3092
3093 if (pf_mtag == NULL && (pf_mtag = pf_get_mtag_pbuf(pbuf)) == NULL) {
3094 return 1;
3095 }
3096
3097 if (tag > 0) {
3098 pf_mtag->pftag_tag = tag;
3099 }
3100 if (PF_RTABLEID_IS_VALID(rtableid)) {
3101 pf_mtag->pftag_rtableid = rtableid;
3102 }
3103 if (pd != NULL && (pd->pktflags & PKTF_FLOW_ID)) {
3104 *pbuf->pb_flowsrc = pd->flowsrc;
3105 *pbuf->pb_flowid = pd->flowhash;
3106 *pbuf->pb_flags |= pd->pktflags;
3107 *pbuf->pb_proto = pd->proto;
3108 }
3109
3110 return 0;
3111 }
3112
3113 void
pf_step_into_anchor(int * depth,struct pf_ruleset ** rs,int n,struct pf_rule ** r,struct pf_rule ** a,int * match)3114 pf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n,
3115 struct pf_rule **r, struct pf_rule **a, int *match)
3116 {
3117 struct pf_anchor_stackframe *f;
3118
3119 (*r)->anchor->match = 0;
3120 if (match) {
3121 *match = 0;
3122 }
3123 if (*depth >= (int)sizeof(pf_anchor_stack) /
3124 (int)sizeof(pf_anchor_stack[0])) {
3125 printf("pf_step_into_anchor: stack overflow\n");
3126 *r = TAILQ_NEXT(*r, entries);
3127 return;
3128 } else if (*depth == 0 && a != NULL) {
3129 *a = *r;
3130 }
3131 f = pf_anchor_stack + (*depth)++;
3132 f->rs = *rs;
3133 f->r = *r;
3134 if ((*r)->anchor_wildcard) {
3135 f->parent = &(*r)->anchor->children;
3136 if ((f->child = RB_MIN(pf_anchor_node, f->parent)) ==
3137 NULL) {
3138 *r = NULL;
3139 return;
3140 }
3141 *rs = &f->child->ruleset;
3142 } else {
3143 f->parent = NULL;
3144 f->child = NULL;
3145 *rs = &(*r)->anchor->ruleset;
3146 }
3147 *r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
3148 }
3149
3150 int
pf_step_out_of_anchor(int * depth,struct pf_ruleset ** rs,int n,struct pf_rule ** r,struct pf_rule ** a,int * match)3151 pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n,
3152 struct pf_rule **r, struct pf_rule **a, int *match)
3153 {
3154 struct pf_anchor_stackframe *f;
3155 int quick = 0;
3156
3157 do {
3158 if (*depth <= 0) {
3159 break;
3160 }
3161 f = pf_anchor_stack + *depth - 1;
3162 if (f->parent != NULL && f->child != NULL) {
3163 if (f->child->match ||
3164 (match != NULL && *match)) {
3165 f->r->anchor->match = 1;
3166 if (match) {
3167 *match = 0;
3168 }
3169 }
3170 f->child = RB_NEXT(pf_anchor_node, f->parent, f->child);
3171 if (f->child != NULL) {
3172 *rs = &f->child->ruleset;
3173 *r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
3174 if (*r == NULL) {
3175 continue;
3176 } else {
3177 break;
3178 }
3179 }
3180 }
3181 (*depth)--;
3182 if (*depth == 0 && a != NULL) {
3183 *a = NULL;
3184 }
3185 *rs = f->rs;
3186 if (f->r->anchor->match || (match != NULL && *match)) {
3187 quick = f->r->quick;
3188 }
3189 *r = TAILQ_NEXT(f->r, entries);
3190 } while (*r == NULL);
3191
3192 return quick;
3193 }
3194
3195 void
pf_poolmask(struct pf_addr * naddr,struct pf_addr * raddr,struct pf_addr * rmask,struct pf_addr * saddr,sa_family_t af)3196 pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
3197 struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
3198 {
3199 switch (af) {
3200 #if INET
3201 case AF_INET:
3202 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
3203 ((rmask->addr32[0] ^ 0xffffffff) & saddr->addr32[0]);
3204 break;
3205 #endif /* INET */
3206 case AF_INET6:
3207 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
3208 ((rmask->addr32[0] ^ 0xffffffff) & saddr->addr32[0]);
3209 naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
3210 ((rmask->addr32[1] ^ 0xffffffff) & saddr->addr32[1]);
3211 naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
3212 ((rmask->addr32[2] ^ 0xffffffff) & saddr->addr32[2]);
3213 naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
3214 ((rmask->addr32[3] ^ 0xffffffff) & saddr->addr32[3]);
3215 break;
3216 }
3217 }
3218
3219 void
pf_addr_inc(struct pf_addr * addr,sa_family_t af)3220 pf_addr_inc(struct pf_addr *addr, sa_family_t af)
3221 {
3222 switch (af) {
3223 #if INET
3224 case AF_INET:
3225 addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
3226 break;
3227 #endif /* INET */
3228 case AF_INET6:
3229 if (addr->addr32[3] == 0xffffffff) {
3230 addr->addr32[3] = 0;
3231 if (addr->addr32[2] == 0xffffffff) {
3232 addr->addr32[2] = 0;
3233 if (addr->addr32[1] == 0xffffffff) {
3234 addr->addr32[1] = 0;
3235 addr->addr32[0] =
3236 htonl(ntohl(addr->addr32[0]) + 1);
3237 } else {
3238 addr->addr32[1] =
3239 htonl(ntohl(addr->addr32[1]) + 1);
3240 }
3241 } else {
3242 addr->addr32[2] =
3243 htonl(ntohl(addr->addr32[2]) + 1);
3244 }
3245 } else {
3246 addr->addr32[3] =
3247 htonl(ntohl(addr->addr32[3]) + 1);
3248 }
3249 break;
3250 }
3251 }
3252
3253 #define mix(a, b, c) \
3254 do { \
3255 a -= b; a -= c; a ^= (c >> 13); \
3256 b -= c; b -= a; b ^= (a << 8); \
3257 c -= a; c -= b; c ^= (b >> 13); \
3258 a -= b; a -= c; a ^= (c >> 12); \
3259 b -= c; b -= a; b ^= (a << 16); \
3260 c -= a; c -= b; c ^= (b >> 5); \
3261 a -= b; a -= c; a ^= (c >> 3); \
3262 b -= c; b -= a; b ^= (a << 10); \
3263 c -= a; c -= b; c ^= (b >> 15); \
3264 } while (0)
3265
3266 /*
3267 * hash function based on bridge_hash in if_bridge.c
3268 */
3269 static void
pf_hash(struct pf_addr * inaddr,struct pf_addr * hash,struct pf_poolhashkey * key,sa_family_t af)3270 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
3271 struct pf_poolhashkey *key, sa_family_t af)
3272 {
3273 u_int32_t a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
3274
3275 switch (af) {
3276 #if INET
3277 case AF_INET:
3278 a += inaddr->addr32[0];
3279 b += key->key32[1];
3280 mix(a, b, c);
3281 hash->addr32[0] = c + key->key32[2];
3282 break;
3283 #endif /* INET */
3284 case AF_INET6:
3285 a += inaddr->addr32[0];
3286 b += inaddr->addr32[2];
3287 mix(a, b, c);
3288 hash->addr32[0] = c;
3289 a += inaddr->addr32[1];
3290 b += inaddr->addr32[3];
3291 c += key->key32[1];
3292 mix(a, b, c);
3293 hash->addr32[1] = c;
3294 a += inaddr->addr32[2];
3295 b += inaddr->addr32[1];
3296 c += key->key32[2];
3297 mix(a, b, c);
3298 hash->addr32[2] = c;
3299 a += inaddr->addr32[3];
3300 b += inaddr->addr32[0];
3301 c += key->key32[3];
3302 mix(a, b, c);
3303 hash->addr32[3] = c;
3304 break;
3305 }
3306 }
3307
3308 static __attribute__((noinline)) int
pf_map_addr(sa_family_t af,struct pf_rule * r,struct pf_addr * saddr,struct pf_addr * naddr,struct pf_addr * init_addr,struct pf_src_node ** sn)3309 pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
3310 struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn)
3311 {
3312 unsigned char hash[16];
3313 struct pf_pool *__single rpool = &r->rpool;
3314 struct pf_addr *__single raddr = &rpool->cur->addr.v.a.addr;
3315 struct pf_addr *__single rmask = &rpool->cur->addr.v.a.mask;
3316 struct pf_pooladdr *__single acur = rpool->cur;
3317 struct pf_src_node k;
3318
3319 if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
3320 (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
3321 k.af = af;
3322 PF_ACPY(&k.addr, saddr, af);
3323 if (r->rule_flag & PFRULE_RULESRCTRACK ||
3324 r->rpool.opts & PF_POOL_STICKYADDR) {
3325 k.rule.ptr = r;
3326 } else {
3327 k.rule.ptr = NULL;
3328 }
3329 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
3330 *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
3331 if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, rpool->af)) {
3332 PF_ACPY(naddr, &(*sn)->raddr, rpool->af);
3333 if (pf_status.debug >= PF_DEBUG_MISC) {
3334 printf("pf_map_addr: src tracking maps ");
3335 pf_print_host(&k.addr, 0, af);
3336 printf(" to ");
3337 pf_print_host(naddr, 0, rpool->af);
3338 printf("\n");
3339 }
3340 return 0;
3341 }
3342 }
3343
3344 if (rpool->cur->addr.type == PF_ADDR_NOROUTE) {
3345 return 1;
3346 }
3347 if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
3348 if (rpool->cur->addr.p.dyn == NULL) {
3349 return 1;
3350 }
3351 switch (rpool->af) {
3352 #if INET
3353 case AF_INET:
3354 if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
3355 (rpool->opts & PF_POOL_TYPEMASK) !=
3356 PF_POOL_ROUNDROBIN) {
3357 return 1;
3358 }
3359 raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
3360 rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
3361 break;
3362 #endif /* INET */
3363 case AF_INET6:
3364 if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
3365 (rpool->opts & PF_POOL_TYPEMASK) !=
3366 PF_POOL_ROUNDROBIN) {
3367 return 1;
3368 }
3369 raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
3370 rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
3371 break;
3372 }
3373 } else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
3374 if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) {
3375 return 1; /* unsupported */
3376 }
3377 } else {
3378 raddr = &rpool->cur->addr.v.a.addr;
3379 rmask = &rpool->cur->addr.v.a.mask;
3380 }
3381
3382 switch (rpool->opts & PF_POOL_TYPEMASK) {
3383 case PF_POOL_NONE:
3384 PF_ACPY(naddr, raddr, rpool->af);
3385 break;
3386 case PF_POOL_BITMASK:
3387 ASSERT(af == rpool->af);
3388 PF_POOLMASK(naddr, raddr, rmask, saddr, af);
3389 break;
3390 case PF_POOL_RANDOM:
3391 if (init_addr != NULL && PF_AZERO(init_addr, rpool->af)) {
3392 switch (af) {
3393 #if INET
3394 case AF_INET:
3395 rpool->counter.addr32[0] = htonl(random());
3396 break;
3397 #endif /* INET */
3398 case AF_INET6:
3399 if (rmask->addr32[3] != 0xffffffff) {
3400 rpool->counter.addr32[3] =
3401 RandomULong();
3402 } else {
3403 break;
3404 }
3405 if (rmask->addr32[2] != 0xffffffff) {
3406 rpool->counter.addr32[2] =
3407 RandomULong();
3408 } else {
3409 break;
3410 }
3411 if (rmask->addr32[1] != 0xffffffff) {
3412 rpool->counter.addr32[1] =
3413 RandomULong();
3414 } else {
3415 break;
3416 }
3417 if (rmask->addr32[0] != 0xffffffff) {
3418 rpool->counter.addr32[0] =
3419 RandomULong();
3420 }
3421 break;
3422 }
3423 PF_POOLMASK(naddr, raddr, rmask, &rpool->counter,
3424 rpool->af);
3425 PF_ACPY(init_addr, naddr, rpool->af);
3426 } else {
3427 PF_AINC(&rpool->counter, rpool->af);
3428 PF_POOLMASK(naddr, raddr, rmask, &rpool->counter,
3429 rpool->af);
3430 }
3431 break;
3432 case PF_POOL_SRCHASH:
3433 ASSERT(af == rpool->af);
3434 PF_POOLMASK(naddr, raddr, rmask, saddr, af);
3435 pf_hash(saddr, (struct pf_addr *)(void *)&hash,
3436 &rpool->key, af);
3437 PF_POOLMASK(naddr, raddr, rmask,
3438 (struct pf_addr *)(void *)&hash, af);
3439 break;
3440 case PF_POOL_ROUNDROBIN:
3441 if (rpool->cur->addr.type == PF_ADDR_TABLE) {
3442 if (!pfr_pool_get(rpool->cur->addr.p.tbl,
3443 &rpool->tblidx, &rpool->counter,
3444 &raddr, &rmask, rpool->af)) {
3445 goto get_addr;
3446 }
3447 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
3448 if (rpool->cur->addr.p.dyn != NULL &&
3449 !pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
3450 &rpool->tblidx, &rpool->counter,
3451 &raddr, &rmask, af)) {
3452 goto get_addr;
3453 }
3454 } else if (pf_match_addr(0, raddr, rmask, &rpool->counter,
3455 rpool->af)) {
3456 goto get_addr;
3457 }
3458
3459 try_next:
3460 if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL) {
3461 rpool->cur = TAILQ_FIRST(&rpool->list);
3462 }
3463 if (rpool->cur->addr.type == PF_ADDR_TABLE) {
3464 rpool->tblidx = -1;
3465 if (pfr_pool_get(rpool->cur->addr.p.tbl,
3466 &rpool->tblidx, &rpool->counter,
3467 &raddr, &rmask, rpool->af)) {
3468 /* table contains no address of type
3469 * 'rpool->af' */
3470 if (rpool->cur != acur) {
3471 goto try_next;
3472 }
3473 return 1;
3474 }
3475 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
3476 rpool->tblidx = -1;
3477 if (rpool->cur->addr.p.dyn == NULL) {
3478 return 1;
3479 }
3480 if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
3481 &rpool->tblidx, &rpool->counter,
3482 &raddr, &rmask, rpool->af)) {
3483 /* table contains no address of type
3484 * 'rpool->af' */
3485 if (rpool->cur != acur) {
3486 goto try_next;
3487 }
3488 return 1;
3489 }
3490 } else {
3491 raddr = &rpool->cur->addr.v.a.addr;
3492 rmask = &rpool->cur->addr.v.a.mask;
3493 PF_ACPY(&rpool->counter, raddr, rpool->af);
3494 }
3495
3496 get_addr:
3497 PF_ACPY(naddr, &rpool->counter, rpool->af);
3498 if (init_addr != NULL && PF_AZERO(init_addr, rpool->af)) {
3499 PF_ACPY(init_addr, naddr, rpool->af);
3500 }
3501 PF_AINC(&rpool->counter, rpool->af);
3502 break;
3503 }
3504 if (*sn != NULL) {
3505 PF_ACPY(&(*sn)->raddr, naddr, rpool->af);
3506 }
3507
3508 if (pf_status.debug >= PF_DEBUG_MISC &&
3509 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
3510 printf("pf_map_addr: selected address ");
3511 pf_print_host(naddr, 0, rpool->af);
3512 printf("\n");
3513 }
3514
3515 return 0;
3516 }
3517
3518 static __attribute__((noinline)) int
pf_get_sport(struct pf_pdesc * pd,struct pfi_kif * kif,struct pf_rule * r,struct pf_addr * saddr,union pf_state_xport * sxport,struct pf_addr * daddr,union pf_state_xport * dxport,struct pf_addr * naddr,union pf_state_xport * nxport,struct pf_src_node ** sn,netns_token * pnstoken)3519 pf_get_sport(struct pf_pdesc *pd, struct pfi_kif *kif, struct pf_rule *r,
3520 struct pf_addr *saddr, union pf_state_xport *sxport, struct pf_addr *daddr,
3521 union pf_state_xport *dxport, struct pf_addr *naddr,
3522 union pf_state_xport *nxport, struct pf_src_node **sn
3523 #if SKYWALK
3524 , netns_token *pnstoken
3525 #endif
3526 )
3527 {
3528 #pragma unused(kif)
3529 struct pf_state_key_cmp key;
3530 struct pf_addr init_addr;
3531 unsigned int cut;
3532 sa_family_t af = pd->af;
3533 u_int8_t proto = pd->proto;
3534 unsigned int low = r->rpool.proxy_port[0];
3535 unsigned int high = r->rpool.proxy_port[1];
3536
3537 bzero(&init_addr, sizeof(init_addr));
3538 if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn)) {
3539 return 1;
3540 }
3541
3542 if (proto == IPPROTO_ICMP) {
3543 low = 1;
3544 high = 65535;
3545 }
3546
3547 if (!nxport) {
3548 return 0; /* No output necessary. */
3549 }
3550 /*--- Special mapping rules for UDP ---*/
3551 if (proto == IPPROTO_UDP) {
3552 /*--- Never float IKE source port ---*/
3553 if (ntohs(sxport->port) == PF_IKE_PORT) {
3554 nxport->port = sxport->port;
3555 return 0;
3556 }
3557
3558 /*--- Apply exterior mapping options ---*/
3559 if (r->extmap > PF_EXTMAP_APD) {
3560 struct pf_state *s;
3561
3562 TAILQ_FOREACH(s, &state_list, entry_list) {
3563 struct pf_state_key *sk = s->state_key;
3564 if (!sk) {
3565 continue;
3566 }
3567 if (s->nat_rule.ptr != r) {
3568 continue;
3569 }
3570 if (sk->proto != IPPROTO_UDP ||
3571 sk->af_lan != af) {
3572 continue;
3573 }
3574 if (sk->lan.xport.port != sxport->port) {
3575 continue;
3576 }
3577 if (PF_ANEQ(&sk->lan.addr, saddr, af)) {
3578 continue;
3579 }
3580 if (r->extmap < PF_EXTMAP_EI &&
3581 PF_ANEQ(&sk->ext_lan.addr, daddr, af)) {
3582 continue;
3583 }
3584
3585 #if SKYWALK
3586 if (netns_reserve(pnstoken, naddr->addr32,
3587 NETNS_AF_SIZE(af), proto, sxport->port,
3588 NETNS_PF, NULL) != 0) {
3589 return 1;
3590 }
3591 #endif
3592 nxport->port = sk->gwy.xport.port;
3593 return 0;
3594 }
3595 }
3596 } else if (proto == IPPROTO_TCP) {
3597 struct pf_state* s;
3598 /*
3599 * APPLE MODIFICATION: <rdar://problem/6546358>
3600 * Fix allows....NAT to use a single binding for TCP session
3601 * with same source IP and source port
3602 */
3603 TAILQ_FOREACH(s, &state_list, entry_list) {
3604 struct pf_state_key* sk = s->state_key;
3605 if (!sk) {
3606 continue;
3607 }
3608 if (s->nat_rule.ptr != r) {
3609 continue;
3610 }
3611 if (sk->proto != IPPROTO_TCP || sk->af_lan != af) {
3612 continue;
3613 }
3614 if (sk->lan.xport.port != sxport->port) {
3615 continue;
3616 }
3617 if (!(PF_AEQ(&sk->lan.addr, saddr, af))) {
3618 continue;
3619 }
3620 #if SKYWALK
3621 if (netns_reserve(pnstoken, naddr->addr32,
3622 NETNS_AF_SIZE(af), proto, sxport->port,
3623 NETNS_PF, NULL) != 0) {
3624 return 1;
3625 }
3626 #endif
3627 nxport->port = sk->gwy.xport.port;
3628 return 0;
3629 }
3630 }
3631 do {
3632 key.af_gwy = af;
3633 key.proto = proto;
3634 PF_ACPY(&key.ext_gwy.addr, daddr, key.af_gwy);
3635 PF_ACPY(&key.gwy.addr, naddr, key.af_gwy);
3636 switch (proto) {
3637 case IPPROTO_UDP:
3638 key.proto_variant = r->extfilter;
3639 break;
3640 default:
3641 key.proto_variant = 0;
3642 break;
3643 }
3644 if (dxport) {
3645 key.ext_gwy.xport = *dxport;
3646 } else {
3647 memset(&key.ext_gwy.xport, 0,
3648 sizeof(key.ext_gwy.xport));
3649 }
3650 /*
3651 * port search; start random, step;
3652 * similar 2 portloop in in_pcbbind
3653 */
3654 if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP ||
3655 proto == IPPROTO_ICMP)) {
3656 if (dxport) {
3657 key.gwy.xport = *dxport;
3658 } else {
3659 memset(&key.gwy.xport, 0,
3660 sizeof(key.gwy.xport));
3661 }
3662 #if SKYWALK
3663 /* Nothing to do: netns handles TCP/UDP only */
3664 #endif
3665 if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
3666 return 0;
3667 }
3668 } else if (low == 0 && high == 0) {
3669 key.gwy.xport = *nxport;
3670 if (pf_find_state_all(&key, PF_IN, NULL) == NULL
3671 #if SKYWALK
3672 && ((proto != IPPROTO_TCP && proto != IPPROTO_UDP)
3673 || netns_reserve(pnstoken, naddr->addr32,
3674 NETNS_AF_SIZE(af), proto, nxport->port,
3675 NETNS_PF, NULL) == 0)
3676 #endif
3677 ) {
3678 return 0;
3679 }
3680 } else if (low == high) {
3681 key.gwy.xport.port = htons(low);
3682 if (pf_find_state_all(&key, PF_IN, NULL) == NULL
3683 #if SKYWALK
3684 && ((proto != IPPROTO_TCP && proto != IPPROTO_UDP)
3685 || netns_reserve(pnstoken, naddr->addr32,
3686 NETNS_AF_SIZE(af), proto, htons(low),
3687 NETNS_PF, NULL) == 0)
3688 #endif
3689 ) {
3690 nxport->port = htons(low);
3691 return 0;
3692 }
3693 } else {
3694 unsigned int tmp;
3695 if (low > high) {
3696 tmp = low;
3697 low = high;
3698 high = tmp;
3699 }
3700 /* low < high */
3701 cut = htonl(random()) % (1 + high - low) + low;
3702 /* low <= cut <= high */
3703 for (tmp = cut; tmp <= high; ++(tmp)) {
3704 key.gwy.xport.port = htons(tmp);
3705 if (pf_find_state_all(&key, PF_IN, NULL) == NULL
3706 #if SKYWALK
3707 && ((proto != IPPROTO_TCP && proto != IPPROTO_UDP)
3708 || netns_reserve(pnstoken, naddr->addr32,
3709 NETNS_AF_SIZE(af), proto, htons(tmp),
3710 NETNS_PF, NULL) == 0)
3711 #endif
3712 ) {
3713 nxport->port = htons(tmp);
3714 return 0;
3715 }
3716 }
3717 for (tmp = cut - 1; tmp >= low; --(tmp)) {
3718 key.gwy.xport.port = htons(tmp);
3719 if (pf_find_state_all(&key, PF_IN, NULL) == NULL
3720 #if SKYWALK
3721 && ((proto != IPPROTO_TCP && proto != IPPROTO_UDP)
3722 || netns_reserve(pnstoken, naddr->addr32,
3723 NETNS_AF_SIZE(af), proto, htons(tmp),
3724 NETNS_PF, NULL) == 0)
3725 #endif
3726 ) {
3727 nxport->port = htons(tmp);
3728 return 0;
3729 }
3730 }
3731 }
3732
3733 switch (r->rpool.opts & PF_POOL_TYPEMASK) {
3734 case PF_POOL_RANDOM:
3735 case PF_POOL_ROUNDROBIN:
3736 if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn)) {
3737 return 1;
3738 }
3739 break;
3740 case PF_POOL_NONE:
3741 case PF_POOL_SRCHASH:
3742 case PF_POOL_BITMASK:
3743 default:
3744 return 1;
3745 }
3746 } while (!PF_AEQ(&init_addr, naddr, af));
3747
3748 return 1; /* none available */
3749 }
3750
3751 static __attribute__((noinline)) struct pf_rule *
pf_match_translation(struct pf_pdesc * pd,pbuf_t * pbuf,int off,int direction,struct pfi_kif * kif,struct pf_addr * saddr,union pf_state_xport * sxport,struct pf_addr * daddr,union pf_state_xport * dxport,int rs_num)3752 pf_match_translation(struct pf_pdesc *pd, pbuf_t *pbuf, int off,
3753 int direction, struct pfi_kif *kif, struct pf_addr *saddr,
3754 union pf_state_xport *sxport, struct pf_addr *daddr,
3755 union pf_state_xport *dxport, int rs_num)
3756 {
3757 struct pf_rule *__single r, *__single rm = NULL;
3758 struct pf_ruleset *__single ruleset = NULL;
3759 int tag = -1;
3760 unsigned int rtableid = IFSCOPE_NONE;
3761 int asd = 0;
3762
3763 r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
3764 while (r && rm == NULL) {
3765 struct pf_rule_addr *src = NULL, *dst = NULL;
3766 struct pf_addr_wrap *xdst = NULL;
3767 struct pf_addr_wrap *xsrc = NULL;
3768 union pf_rule_xport rdrxport;
3769
3770 if (r->action == PF_BINAT && direction == PF_IN) {
3771 src = &r->dst;
3772 if (r->rpool.cur != NULL) {
3773 xdst = &r->rpool.cur->addr;
3774 }
3775 } else if (r->action == PF_RDR && direction == PF_OUT) {
3776 dst = &r->src;
3777 src = &r->dst;
3778 if (r->rpool.cur != NULL) {
3779 rdrxport.range.op = PF_OP_EQ;
3780 rdrxport.range.port[0] =
3781 htons(r->rpool.proxy_port[0]);
3782 xsrc = &r->rpool.cur->addr;
3783 }
3784 } else {
3785 src = &r->src;
3786 dst = &r->dst;
3787 }
3788
3789 r->evaluations++;
3790 if (pfi_kif_match(r->kif, kif) == r->ifnot) {
3791 r = r->skip[PF_SKIP_IFP].ptr;
3792 } else if (r->direction && r->direction != direction) {
3793 r = r->skip[PF_SKIP_DIR].ptr;
3794 } else if (r->af && r->af != pd->af) {
3795 r = r->skip[PF_SKIP_AF].ptr;
3796 } else if (r->proto && r->proto != pd->proto) {
3797 r = r->skip[PF_SKIP_PROTO].ptr;
3798 } else if (xsrc && PF_MISMATCHAW(xsrc, saddr, pd->af, 0, NULL)) {
3799 r = TAILQ_NEXT(r, entries);
3800 } else if (!xsrc && PF_MISMATCHAW(&src->addr, saddr, pd->af,
3801 src->neg, kif)) {
3802 r = TAILQ_NEXT(r, entries);
3803 } else if (xsrc && (!rdrxport.range.port[0] ||
3804 !pf_match_xport(r->proto, r->proto_variant, &rdrxport,
3805 sxport))) {
3806 r = TAILQ_NEXT(r, entries);
3807 } else if (!xsrc && !pf_match_xport(r->proto,
3808 r->proto_variant, &src->xport, sxport)) {
3809 r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
3810 PF_SKIP_DST_PORT].ptr;
3811 } else if (dst != NULL &&
3812 PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL)) {
3813 r = r->skip[PF_SKIP_DST_ADDR].ptr;
3814 } else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af,
3815 0, NULL)) {
3816 r = TAILQ_NEXT(r, entries);
3817 } else if (dst && !pf_match_xport(r->proto, r->proto_variant,
3818 &dst->xport, dxport)) {
3819 r = r->skip[PF_SKIP_DST_PORT].ptr;
3820 } else if (r->match_tag && !pf_match_tag(r, pd->pf_mtag, &tag)) {
3821 r = TAILQ_NEXT(r, entries);
3822 } else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
3823 IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, pbuf,
3824 off, pf_pd_get_hdr_tcp(pd)), r->os_fingerprint))) {
3825 r = TAILQ_NEXT(r, entries);
3826 } else {
3827 if (r->tag) {
3828 tag = r->tag;
3829 }
3830 if (PF_RTABLEID_IS_VALID(r->rtableid)) {
3831 rtableid = r->rtableid;
3832 }
3833 if (r->anchor == NULL) {
3834 rm = r;
3835 } else {
3836 pf_step_into_anchor(&asd, &ruleset, rs_num,
3837 &r, NULL, NULL);
3838 }
3839 }
3840 if (r == NULL) {
3841 pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r,
3842 NULL, NULL);
3843 }
3844 }
3845 if (pf_tag_packet(pbuf, pd->pf_mtag, tag, rtableid, NULL)) {
3846 return NULL;
3847 }
3848 if (rm != NULL && (rm->action == PF_NONAT ||
3849 rm->action == PF_NORDR || rm->action == PF_NOBINAT ||
3850 rm->action == PF_NONAT64)) {
3851 return NULL;
3852 }
3853 return rm;
3854 }
3855
3856 /*
3857 * Get address translation information for NAT/BINAT/RDR
3858 * pd : pf packet descriptor
3859 * pbuf : pbuf holding the packet
3860 * off : offset to protocol header
3861 * direction : direction of packet
3862 * kif : pf interface info obtained from the packet's recv interface
3863 * sn : source node pointer (output)
3864 * saddr : packet source address
3865 * sxport : packet source port
3866 * daddr : packet destination address
3867 * dxport : packet destination port
3868 * nsxport : translated source port (output)
3869 *
3870 * Translated source & destination address are updated in pd->nsaddr &
3871 * pd->ndaddr
3872 */
3873 static __attribute__((noinline)) struct pf_rule *
pf_get_translation_aux(struct pf_pdesc * pd,pbuf_t * pbuf,int off,int direction,struct pfi_kif * kif,struct pf_src_node ** sn,struct pf_addr * saddr,union pf_state_xport * sxport,struct pf_addr * daddr,union pf_state_xport * dxport,union pf_state_xport * nsxport,netns_token * pnstoken)3874 pf_get_translation_aux(struct pf_pdesc *pd, pbuf_t *pbuf, int off,
3875 int direction, struct pfi_kif *kif, struct pf_src_node **sn,
3876 struct pf_addr *saddr, union pf_state_xport *sxport, struct pf_addr *daddr,
3877 union pf_state_xport *dxport, union pf_state_xport *nsxport
3878 #if SKYWALK
3879 , netns_token *pnstoken
3880 #endif
3881 )
3882 {
3883 struct pf_rule *r = NULL;
3884 pd->naf = pd->af;
3885
3886 if (direction == PF_OUT) {
3887 r = pf_match_translation(pd, pbuf, off, direction, kif, saddr,
3888 sxport, daddr, dxport, PF_RULESET_BINAT);
3889 if (r == NULL) {
3890 r = pf_match_translation(pd, pbuf, off, direction, kif,
3891 saddr, sxport, daddr, dxport, PF_RULESET_RDR);
3892 }
3893 if (r == NULL) {
3894 r = pf_match_translation(pd, pbuf, off, direction, kif,
3895 saddr, sxport, daddr, dxport, PF_RULESET_NAT);
3896 }
3897 } else {
3898 r = pf_match_translation(pd, pbuf, off, direction, kif, saddr,
3899 sxport, daddr, dxport, PF_RULESET_RDR);
3900 if (r == NULL) {
3901 r = pf_match_translation(pd, pbuf, off, direction, kif,
3902 saddr, sxport, daddr, dxport, PF_RULESET_BINAT);
3903 }
3904 }
3905
3906 if (r != NULL) {
3907 struct pf_addr *nsaddr = &pd->naddr;
3908 struct pf_addr *ndaddr = &pd->ndaddr;
3909
3910 PF_ACPY(nsaddr, saddr, pd->af);
3911 PF_ACPY(ndaddr, daddr, pd->af);
3912
3913 switch (r->action) {
3914 case PF_NONAT:
3915 case PF_NONAT64:
3916 case PF_NOBINAT:
3917 case PF_NORDR:
3918 return NULL;
3919 case PF_NAT:
3920 case PF_NAT64:
3921 /*
3922 * we do NAT64 on incoming path and we call ip_input
3923 * which asserts receive interface to be not NULL.
3924 * The below check is to prevent NAT64 action on any
3925 * packet generated by local entity using synthesized
3926 * IPv6 address.
3927 */
3928 if ((r->action == PF_NAT64) && (direction == PF_OUT)) {
3929 return NULL;
3930 }
3931
3932 if (pf_get_sport(pd, kif, r, saddr, sxport, daddr,
3933 dxport, nsaddr, nsxport, sn
3934 #if SKYWALK
3935 , pnstoken
3936 #endif
3937 )) {
3938 DPFPRINTF(PF_DEBUG_MISC,
3939 ("pf: NAT proxy port allocation "
3940 "(%u-%u) failed\n",
3941 r->rpool.proxy_port[0],
3942 r->rpool.proxy_port[1]));
3943 return NULL;
3944 }
3945 /*
3946 * For NAT64 the destination IPv4 address is derived
3947 * from the last 32 bits of synthesized IPv6 address
3948 */
3949 if (r->action == PF_NAT64) {
3950 ndaddr->v4addr.s_addr = daddr->addr32[3];
3951 pd->naf = AF_INET;
3952 }
3953 break;
3954 case PF_BINAT:
3955 switch (direction) {
3956 case PF_OUT:
3957 if (r->rpool.cur->addr.type ==
3958 PF_ADDR_DYNIFTL) {
3959 if (r->rpool.cur->addr.p.dyn == NULL) {
3960 return NULL;
3961 }
3962 switch (pd->af) {
3963 #if INET
3964 case AF_INET:
3965 if (r->rpool.cur->addr.p.dyn->
3966 pfid_acnt4 < 1) {
3967 return NULL;
3968 }
3969 PF_POOLMASK(nsaddr,
3970 &r->rpool.cur->addr.p.dyn->
3971 pfid_addr4,
3972 &r->rpool.cur->addr.p.dyn->
3973 pfid_mask4,
3974 saddr, AF_INET);
3975 break;
3976 #endif /* INET */
3977 case AF_INET6:
3978 if (r->rpool.cur->addr.p.dyn->
3979 pfid_acnt6 < 1) {
3980 return NULL;
3981 }
3982 PF_POOLMASK(nsaddr,
3983 &r->rpool.cur->addr.p.dyn->
3984 pfid_addr6,
3985 &r->rpool.cur->addr.p.dyn->
3986 pfid_mask6,
3987 saddr, AF_INET6);
3988 break;
3989 }
3990 } else {
3991 PF_POOLMASK(nsaddr,
3992 &r->rpool.cur->addr.v.a.addr,
3993 &r->rpool.cur->addr.v.a.mask,
3994 saddr, pd->af);
3995 }
3996 break;
3997 case PF_IN:
3998 if (r->src.addr.type == PF_ADDR_DYNIFTL) {
3999 if (r->src.addr.p.dyn == NULL) {
4000 return NULL;
4001 }
4002 switch (pd->af) {
4003 #if INET
4004 case AF_INET:
4005 if (r->src.addr.p.dyn->
4006 pfid_acnt4 < 1) {
4007 return NULL;
4008 }
4009 PF_POOLMASK(ndaddr,
4010 &r->src.addr.p.dyn->
4011 pfid_addr4,
4012 &r->src.addr.p.dyn->
4013 pfid_mask4,
4014 daddr, AF_INET);
4015 break;
4016 #endif /* INET */
4017 case AF_INET6:
4018 if (r->src.addr.p.dyn->
4019 pfid_acnt6 < 1) {
4020 return NULL;
4021 }
4022 PF_POOLMASK(ndaddr,
4023 &r->src.addr.p.dyn->
4024 pfid_addr6,
4025 &r->src.addr.p.dyn->
4026 pfid_mask6,
4027 daddr, AF_INET6);
4028 break;
4029 }
4030 } else {
4031 PF_POOLMASK(ndaddr,
4032 &r->src.addr.v.a.addr,
4033 &r->src.addr.v.a.mask, daddr,
4034 pd->af);
4035 }
4036 break;
4037 }
4038 break;
4039 case PF_RDR: {
4040 switch (direction) {
4041 case PF_OUT:
4042 if (r->dst.addr.type == PF_ADDR_DYNIFTL) {
4043 if (r->dst.addr.p.dyn == NULL) {
4044 return NULL;
4045 }
4046 switch (pd->af) {
4047 #if INET
4048 case AF_INET:
4049 if (r->dst.addr.p.dyn->
4050 pfid_acnt4 < 1) {
4051 return NULL;
4052 }
4053 PF_POOLMASK(nsaddr,
4054 &r->dst.addr.p.dyn->
4055 pfid_addr4,
4056 &r->dst.addr.p.dyn->
4057 pfid_mask4,
4058 daddr, AF_INET);
4059 break;
4060 #endif /* INET */
4061 case AF_INET6:
4062 if (r->dst.addr.p.dyn->
4063 pfid_acnt6 < 1) {
4064 return NULL;
4065 }
4066 PF_POOLMASK(nsaddr,
4067 &r->dst.addr.p.dyn->
4068 pfid_addr6,
4069 &r->dst.addr.p.dyn->
4070 pfid_mask6,
4071 daddr, AF_INET6);
4072 break;
4073 }
4074 } else {
4075 PF_POOLMASK(nsaddr,
4076 &r->dst.addr.v.a.addr,
4077 &r->dst.addr.v.a.mask,
4078 daddr, pd->af);
4079 }
4080 if (nsxport && r->dst.xport.range.port[0]) {
4081 nsxport->port =
4082 r->dst.xport.range.port[0];
4083 }
4084 break;
4085 case PF_IN:
4086 if (pf_map_addr(pd->af, r, saddr,
4087 ndaddr, NULL, sn)) {
4088 return NULL;
4089 }
4090 if ((r->rpool.opts & PF_POOL_TYPEMASK) ==
4091 PF_POOL_BITMASK) {
4092 PF_POOLMASK(ndaddr, ndaddr,
4093 &r->rpool.cur->addr.v.a.mask, daddr,
4094 pd->af);
4095 }
4096
4097 if (nsxport && dxport) {
4098 if (r->rpool.proxy_port[1]) {
4099 u_int32_t tmp_nport;
4100
4101 tmp_nport =
4102 ((ntohs(dxport->port) -
4103 ntohs(r->dst.xport.range.
4104 port[0])) %
4105 (r->rpool.proxy_port[1] -
4106 r->rpool.proxy_port[0] +
4107 1)) + r->rpool.proxy_port[0];
4108
4109 /* wrap around if necessary */
4110 if (tmp_nport > 65535) {
4111 tmp_nport -= 65535;
4112 }
4113 nsxport->port =
4114 htons((u_int16_t)tmp_nport);
4115 } else if (r->rpool.proxy_port[0]) {
4116 nsxport->port = htons(r->rpool.
4117 proxy_port[0]);
4118 }
4119 }
4120 break;
4121 }
4122 break;
4123 }
4124 default:
4125 return NULL;
4126 }
4127 }
4128
4129 return r;
4130 }
4131
4132 int
pf_socket_lookup(int direction,struct pf_pdesc * pd)4133 pf_socket_lookup(int direction, struct pf_pdesc *pd)
4134 {
4135 struct pf_addr *__single saddr, *__single daddr;
4136 u_int16_t sport, dport;
4137 struct inpcbinfo *__single pi;
4138 int inp = 0;
4139
4140 if (pd == NULL) {
4141 return -1;
4142 }
4143 pd->lookup.uid = UID_MAX;
4144 pd->lookup.gid = GID_MAX;
4145 pd->lookup.pid = NO_PID;
4146
4147 switch (pd->proto) {
4148 case IPPROTO_TCP:
4149 if (pf_pd_get_hdr_tcp(pd) == NULL) {
4150 return -1;
4151 }
4152 sport = pf_pd_get_hdr_tcp(pd)->th_sport;
4153 dport = pf_pd_get_hdr_tcp(pd)->th_dport;
4154 pi = &tcbinfo;
4155 break;
4156 case IPPROTO_UDP:
4157 if (pf_pd_get_hdr_udp(pd) == NULL) {
4158 return -1;
4159 }
4160 sport = pf_pd_get_hdr_udp(pd)->uh_sport;
4161 dport = pf_pd_get_hdr_udp(pd)->uh_dport;
4162 pi = &udbinfo;
4163 break;
4164 default:
4165 return -1;
4166 }
4167 if (direction == PF_IN) {
4168 saddr = pd->src;
4169 daddr = pd->dst;
4170 } else {
4171 u_int16_t p;
4172
4173 p = sport;
4174 sport = dport;
4175 dport = p;
4176 saddr = pd->dst;
4177 daddr = pd->src;
4178 }
4179 switch (pd->af) {
4180 #if INET
4181 case AF_INET:
4182 inp = in_pcblookup_hash_exists(pi, saddr->v4addr, sport, daddr->v4addr, dport,
4183 0, &pd->lookup.uid, &pd->lookup.gid, NULL);
4184 if (inp == 0) {
4185 struct in6_addr s6, d6;
4186
4187 memset(&s6, 0, sizeof(s6));
4188 s6.s6_addr16[5] = htons(0xffff);
4189 memcpy(&s6.s6_addr32[3], &saddr->v4addr,
4190 sizeof(saddr->v4addr));
4191
4192 memset(&d6, 0, sizeof(d6));
4193 d6.s6_addr16[5] = htons(0xffff);
4194 memcpy(&d6.s6_addr32[3], &daddr->v4addr,
4195 sizeof(daddr->v4addr));
4196
4197 inp = in6_pcblookup_hash_exists(pi, &s6, sport, IFSCOPE_NONE,
4198 &d6, dport, IFSCOPE_NONE, 0, &pd->lookup.uid, &pd->lookup.gid, NULL, false);
4199 if (inp == 0) {
4200 inp = in_pcblookup_hash_exists(pi, saddr->v4addr, sport,
4201 daddr->v4addr, dport, INPLOOKUP_WILDCARD, &pd->lookup.uid, &pd->lookup.gid, NULL);
4202 if (inp == 0) {
4203 inp = in6_pcblookup_hash_exists(pi, &s6, sport, IFSCOPE_NONE,
4204 &d6, dport, IFSCOPE_NONE, INPLOOKUP_WILDCARD,
4205 &pd->lookup.uid, &pd->lookup.gid, NULL, false);
4206 if (inp == 0) {
4207 return -1;
4208 }
4209 }
4210 }
4211 }
4212 break;
4213 #endif /* INET */
4214 case AF_INET6:
4215 inp = in6_pcblookup_hash_exists(pi, &saddr->v6addr, sport, IFSCOPE_UNKNOWN, &daddr->v6addr,
4216 dport, IFSCOPE_UNKNOWN, 0, &pd->lookup.uid, &pd->lookup.gid, NULL, false);
4217 if (inp == 0) {
4218 inp = in6_pcblookup_hash_exists(pi, &saddr->v6addr, sport, IFSCOPE_UNKNOWN,
4219 &daddr->v6addr, dport, IFSCOPE_UNKNOWN, INPLOOKUP_WILDCARD,
4220 &pd->lookup.uid, &pd->lookup.gid, NULL, false);
4221 if (inp == 0) {
4222 return -1;
4223 }
4224 }
4225 break;
4226
4227 default:
4228 return -1;
4229 }
4230
4231 return 1;
4232 }
4233
4234 static __attribute__((noinline)) u_int8_t
pf_get_wscale(pbuf_t * pbuf,int off,u_int16_t th_off,sa_family_t af)4235 pf_get_wscale(pbuf_t *pbuf, int off, u_int16_t th_off, sa_family_t af)
4236 {
4237 int hlen;
4238 u_int8_t hdr[60];
4239 u_int8_t *opt, optlen;
4240 u_int8_t wscale = 0;
4241
4242 hlen = th_off << 2; /* hlen <= sizeof (hdr) */
4243 if (hlen <= (int)sizeof(struct tcphdr)) {
4244 return 0;
4245 }
4246 if (!pf_pull_hdr(pbuf, off, hdr, sizeof(hdr), hlen, NULL, NULL, af)) {
4247 return 0;
4248 }
4249 opt = hdr + sizeof(struct tcphdr);
4250 hlen -= sizeof(struct tcphdr);
4251 while (hlen >= 3) {
4252 switch (*opt) {
4253 case TCPOPT_EOL:
4254 case TCPOPT_NOP:
4255 ++opt;
4256 --hlen;
4257 break;
4258 case TCPOPT_WINDOW:
4259 wscale = opt[2];
4260 if (wscale > TCP_MAX_WINSHIFT) {
4261 wscale = TCP_MAX_WINSHIFT;
4262 }
4263 wscale |= PF_WSCALE_FLAG;
4264 OS_FALLTHROUGH;
4265 default:
4266 optlen = opt[1];
4267 if (optlen < 2) {
4268 optlen = 2;
4269 }
4270 hlen -= optlen;
4271 opt += optlen;
4272 break;
4273 }
4274 }
4275 return wscale;
4276 }
4277
4278 static __attribute__((noinline)) u_int16_t
pf_get_mss(pbuf_t * pbuf,int off,u_int16_t th_off,sa_family_t af)4279 pf_get_mss(pbuf_t *pbuf, int off, u_int16_t th_off, sa_family_t af)
4280 {
4281 int hlen;
4282 u_int8_t hdr[60];
4283 u_int8_t *opt, optlen;
4284 u_int16_t mss = tcp_mssdflt;
4285
4286 hlen = th_off << 2; /* hlen <= sizeof (hdr) */
4287 if (hlen <= (int)sizeof(struct tcphdr)) {
4288 return 0;
4289 }
4290 if (!pf_pull_hdr(pbuf, off, hdr, sizeof(hdr), hlen, NULL, NULL, af)) {
4291 return 0;
4292 }
4293 opt = hdr + sizeof(struct tcphdr);
4294 hlen -= sizeof(struct tcphdr);
4295 while (hlen >= TCPOLEN_MAXSEG) {
4296 switch (*opt) {
4297 case TCPOPT_EOL:
4298 case TCPOPT_NOP:
4299 ++opt;
4300 --hlen;
4301 break;
4302 case TCPOPT_MAXSEG:
4303 bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2);
4304 #if BYTE_ORDER != BIG_ENDIAN
4305 NTOHS(mss);
4306 #endif
4307 OS_FALLTHROUGH;
4308 default:
4309 optlen = opt[1];
4310 if (optlen < 2) {
4311 optlen = 2;
4312 }
4313 hlen -= optlen;
4314 opt += optlen;
4315 break;
4316 }
4317 }
4318 return mss;
4319 }
4320
4321 static __attribute__((noinline)) u_int16_t
pf_calc_mss(struct pf_addr * addr,sa_family_t af,u_int16_t offer)4322 pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer)
4323 {
4324 #if INET
4325 struct sockaddr_in *dst;
4326 struct route ro;
4327 #endif /* INET */
4328 struct sockaddr_in6 *dst6;
4329 struct route_in6 ro6;
4330 struct rtentry *rt = NULL;
4331 int hlen;
4332 u_int16_t mss = tcp_mssdflt;
4333
4334 switch (af) {
4335 #if INET
4336 case AF_INET:
4337 hlen = sizeof(struct ip);
4338 bzero(&ro, sizeof(ro));
4339 dst = (struct sockaddr_in *)(void *)&ro.ro_dst;
4340 dst->sin_family = AF_INET;
4341 dst->sin_len = sizeof(*dst);
4342 dst->sin_addr = addr->v4addr;
4343 rtalloc(&ro);
4344 rt = ro.ro_rt;
4345 break;
4346 #endif /* INET */
4347 case AF_INET6:
4348 hlen = sizeof(struct ip6_hdr);
4349 bzero(&ro6, sizeof(ro6));
4350 dst6 = (struct sockaddr_in6 *)(void *)&ro6.ro_dst;
4351 dst6->sin6_family = AF_INET6;
4352 dst6->sin6_len = sizeof(*dst6);
4353 dst6->sin6_addr = addr->v6addr;
4354 rtalloc((struct route *)&ro);
4355 rt = ro6.ro_rt;
4356 break;
4357 default:
4358 panic("pf_calc_mss: not AF_INET or AF_INET6!");
4359 return 0;
4360 }
4361
4362 if (rt && rt->rt_ifp) {
4363 /* This is relevant only for PF SYN Proxy */
4364 int interface_mtu = rt->rt_ifp->if_mtu;
4365
4366 if (af == AF_INET &&
4367 INTF_ADJUST_MTU_FOR_CLAT46(rt->rt_ifp)) {
4368 interface_mtu = IN6_LINKMTU(rt->rt_ifp);
4369 /* Further adjust the size for CLAT46 expansion */
4370 interface_mtu -= CLAT46_HDR_EXPANSION_OVERHD;
4371 }
4372 mss = interface_mtu - hlen - sizeof(struct tcphdr);
4373 mss = max(tcp_mssdflt, mss);
4374 rtfree(rt);
4375 }
4376 mss = min(mss, offer);
4377 mss = max(mss, 64); /* sanity - at least max opt space */
4378 return mss;
4379 }
4380
4381 static void
pf_set_rt_ifp(struct pf_state * s,struct pf_addr * saddr,sa_family_t af)4382 pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr, sa_family_t af)
4383 {
4384 struct pf_rule *r = s->rule.ptr;
4385
4386 s->rt_kif = NULL;
4387
4388 if (!r->rt || r->rt == PF_FASTROUTE) {
4389 return;
4390 }
4391 if ((af == AF_INET) || (af == AF_INET6)) {
4392 pf_map_addr(af, r, saddr, &s->rt_addr, NULL,
4393 &s->nat_src_node);
4394 s->rt_kif = r->rpool.cur->kif;
4395 }
4396
4397 return;
4398 }
4399
4400 static void
pf_attach_state(struct pf_state_key * sk,struct pf_state * s,int tail)4401 pf_attach_state(struct pf_state_key *sk, struct pf_state *s, int tail)
4402 {
4403 s->state_key = sk;
4404 sk->refcnt++;
4405
4406 /* list is sorted, if-bound states before floating */
4407 if (tail) {
4408 TAILQ_INSERT_TAIL(&sk->states, s, next);
4409 } else {
4410 TAILQ_INSERT_HEAD(&sk->states, s, next);
4411 }
4412 }
4413
4414 static void
pf_state_key_release_flowid(struct pf_state_key * sk)4415 pf_state_key_release_flowid(struct pf_state_key *sk)
4416 {
4417 #pragma unused (sk)
4418 #if SKYWALK
4419 if ((sk->flowsrc == FLOWSRC_PF) && (sk->flowhash != 0)) {
4420 flowidns_release_flowid(sk->flowhash);
4421 sk->flowhash = 0;
4422 sk->flowsrc = 0;
4423 }
4424 #endif /* SKYWALK */
4425 }
4426
4427 void
pf_detach_state(struct pf_state * s,int flags)4428 pf_detach_state(struct pf_state *s, int flags)
4429 {
4430 struct pf_state_key *sk = s->state_key;
4431
4432 if (sk == NULL) {
4433 return;
4434 }
4435
4436 s->state_key = NULL;
4437 TAILQ_REMOVE(&sk->states, s, next);
4438 if (--sk->refcnt == 0) {
4439 if (!(flags & PF_DT_SKIP_EXTGWY)) {
4440 pf_remove_state_key_ext_gwy(sk);
4441 }
4442 if (!(flags & PF_DT_SKIP_LANEXT)) {
4443 RB_REMOVE(pf_state_tree_lan_ext,
4444 &pf_statetbl_lan_ext, sk);
4445 }
4446 if (sk->app_state) {
4447 pool_put(&pf_app_state_pl, sk->app_state);
4448 }
4449 pf_state_key_release_flowid(sk);
4450 pool_put(&pf_state_key_pl, sk);
4451 }
4452 }
4453
4454 struct pf_state_key *
pf_alloc_state_key(struct pf_state * s,struct pf_state_key * psk)4455 pf_alloc_state_key(struct pf_state *s, struct pf_state_key *psk)
4456 {
4457 struct pf_state_key *__single sk;
4458
4459 if ((sk = pool_get(&pf_state_key_pl, PR_WAITOK)) == NULL) {
4460 return NULL;
4461 }
4462 bzero(sk, sizeof(*sk));
4463 TAILQ_INIT(&sk->states);
4464 pf_attach_state(sk, s, 0);
4465
4466 /* initialize state key from psk, if provided */
4467 if (psk != NULL) {
4468 bcopy(&psk->lan, &sk->lan, sizeof(sk->lan));
4469 bcopy(&psk->gwy, &sk->gwy, sizeof(sk->gwy));
4470 bcopy(&psk->ext_lan, &sk->ext_lan, sizeof(sk->ext_lan));
4471 bcopy(&psk->ext_gwy, &sk->ext_gwy, sizeof(sk->ext_gwy));
4472 sk->af_lan = psk->af_lan;
4473 sk->af_gwy = psk->af_gwy;
4474 sk->proto = psk->proto;
4475 sk->direction = psk->direction;
4476 sk->proto_variant = psk->proto_variant;
4477 VERIFY(psk->app_state == NULL);
4478 ASSERT(psk->flowsrc != FLOWSRC_PF);
4479 sk->flowsrc = psk->flowsrc;
4480 sk->flowhash = psk->flowhash;
4481 /* don't touch tree entries, states and refcnt on sk */
4482 }
4483
4484 if (sk->flowhash == 0) {
4485 ASSERT(sk->flowsrc == 0);
4486 sk->flowsrc = FLOWSRC_PF;
4487 sk->flowhash = pf_calc_state_key_flowhash(sk);
4488 }
4489
4490 return sk;
4491 }
4492
4493 static __attribute__((noinline)) u_int32_t
pf_tcp_iss(struct pf_pdesc * pd)4494 pf_tcp_iss(struct pf_pdesc *pd)
4495 {
4496 MD5_CTX ctx;
4497 u_int32_t digest[4];
4498
4499 if (pf_tcp_secret_init == 0) {
4500 read_frandom(pf_tcp_secret, sizeof(pf_tcp_secret));
4501 MD5Init(&pf_tcp_secret_ctx);
4502 MD5Update(&pf_tcp_secret_ctx, pf_tcp_secret,
4503 sizeof(pf_tcp_secret));
4504 pf_tcp_secret_init = 1;
4505 }
4506 ctx = pf_tcp_secret_ctx;
4507
4508 MD5Update(&ctx, (char *)&pf_pd_get_hdr_tcp(pd)->th_sport, sizeof(u_short));
4509 MD5Update(&ctx, (char *)&pf_pd_get_hdr_tcp(pd)->th_dport, sizeof(u_short));
4510 if (pd->af == AF_INET6) {
4511 MD5Update(&ctx, (char *)&pd->src->v6addr, sizeof(struct in6_addr));
4512 MD5Update(&ctx, (char *)&pd->dst->v6addr, sizeof(struct in6_addr));
4513 } else {
4514 MD5Update(&ctx, (char *)&pd->src->v4addr, sizeof(struct in_addr));
4515 MD5Update(&ctx, (char *)&pd->dst->v4addr, sizeof(struct in_addr));
4516 }
4517 MD5Final((u_char *)digest, &ctx);
4518 pf_tcp_iss_off += 4096;
4519 return digest[0] + random() + pf_tcp_iss_off;
4520 }
4521
4522 /*
4523 * This routine is called to perform address family translation on the
4524 * inner IP header (that may come as payload) of an ICMP(v4addr/6) error
4525 * response.
4526 */
4527 static __attribute__((noinline)) int
pf_change_icmp_af(pbuf_t * pbuf,int off,struct pf_pdesc * pd,struct pf_pdesc * pd2,struct pf_addr * src,struct pf_addr * dst,sa_family_t af,sa_family_t naf)4528 pf_change_icmp_af(pbuf_t *pbuf, int off,
4529 struct pf_pdesc *pd, struct pf_pdesc *pd2, struct pf_addr *src,
4530 struct pf_addr *dst, sa_family_t af, sa_family_t naf)
4531 {
4532 struct ip *__single ip4 = NULL;
4533 struct ip6_hdr *__single ip6 = NULL;
4534 void *__single hdr;
4535 int hlen, olen;
4536 uint64_t ipid_salt = (uint64_t)pbuf_get_packet_buffer_address(pbuf);
4537
4538 if (af == naf || (af != AF_INET && af != AF_INET6) ||
4539 (naf != AF_INET && naf != AF_INET6)) {
4540 return -1;
4541 }
4542
4543 /* old header */
4544 olen = pd2->off - off;
4545 /* new header */
4546 hlen = naf == AF_INET ? sizeof(*ip4) : sizeof(*ip6);
4547
4548 /* Modify the pbuf to accommodate the new header */
4549 hdr = pbuf_resize_segment(pbuf, off, olen, hlen);
4550 if (hdr == NULL) {
4551 return -1;
4552 }
4553
4554 /* translate inner ip/ip6 header */
4555 switch (naf) {
4556 case AF_INET:
4557 ip4 = hdr;
4558 bzero(ip4, sizeof(*ip4));
4559 ip4->ip_v = IPVERSION;
4560 ip4->ip_hl = sizeof(*ip4) >> 2;
4561 ip4->ip_len = htons(sizeof(*ip4) + pd2->tot_len - olen);
4562 ip4->ip_id = rfc6864 ? 0 : htons(ip_randomid(ipid_salt));
4563 ip4->ip_off = htons(IP_DF);
4564 ip4->ip_ttl = pd2->ttl;
4565 if (pd2->proto == IPPROTO_ICMPV6) {
4566 ip4->ip_p = IPPROTO_ICMP;
4567 } else {
4568 ip4->ip_p = pd2->proto;
4569 }
4570 ip4->ip_src = src->v4addr;
4571 ip4->ip_dst = dst->v4addr;
4572 ip4->ip_sum = pbuf_inet_cksum(pbuf, 0, 0, ip4->ip_hl << 2);
4573 break;
4574 case AF_INET6:
4575 ip6 = hdr;
4576 bzero(ip6, sizeof(*ip6));
4577 ip6->ip6_vfc = IPV6_VERSION;
4578 ip6->ip6_plen = htons(pd2->tot_len - olen);
4579 if (pd2->proto == IPPROTO_ICMP) {
4580 ip6->ip6_nxt = IPPROTO_ICMPV6;
4581 } else {
4582 ip6->ip6_nxt = pd2->proto;
4583 }
4584 if (!pd2->ttl || pd2->ttl > IPV6_DEFHLIM) {
4585 ip6->ip6_hlim = IPV6_DEFHLIM;
4586 } else {
4587 ip6->ip6_hlim = pd2->ttl;
4588 }
4589 ip6->ip6_src = src->v6addr;
4590 ip6->ip6_dst = dst->v6addr;
4591 break;
4592 }
4593
4594 /* adjust payload offset and total packet length */
4595 pd2->off += hlen - olen;
4596 pd->tot_len += hlen - olen;
4597
4598 return 0;
4599 }
4600
4601 #define PTR_IP(field) ((int32_t)offsetof(struct ip, field))
4602 #define PTR_IP6(field) ((int32_t)offsetof(struct ip6_hdr, field))
4603
4604 static __attribute__((noinline)) int
pf_translate_icmp_af(int af,void * arg)4605 pf_translate_icmp_af(int af, void *arg)
4606 {
4607 struct icmp *__single icmp4;
4608 struct icmp6_hdr *__single icmp6;
4609 u_int32_t mtu;
4610 int32_t ptr = -1;
4611 u_int8_t type;
4612 u_int8_t code;
4613
4614 switch (af) {
4615 case AF_INET:
4616 icmp6 = (struct icmp6_hdr * __single)arg;
4617 type = icmp6->icmp6_type;
4618 code = icmp6->icmp6_code;
4619 mtu = ntohl(icmp6->icmp6_mtu);
4620
4621 switch (type) {
4622 case ICMP6_ECHO_REQUEST:
4623 type = ICMP_ECHO;
4624 break;
4625 case ICMP6_ECHO_REPLY:
4626 type = ICMP_ECHOREPLY;
4627 break;
4628 case ICMP6_DST_UNREACH:
4629 type = ICMP_UNREACH;
4630 switch (code) {
4631 case ICMP6_DST_UNREACH_NOROUTE:
4632 case ICMP6_DST_UNREACH_BEYONDSCOPE:
4633 case ICMP6_DST_UNREACH_ADDR:
4634 code = ICMP_UNREACH_HOST;
4635 break;
4636 case ICMP6_DST_UNREACH_ADMIN:
4637 code = ICMP_UNREACH_HOST_PROHIB;
4638 break;
4639 case ICMP6_DST_UNREACH_NOPORT:
4640 code = ICMP_UNREACH_PORT;
4641 break;
4642 default:
4643 return -1;
4644 }
4645 break;
4646 case ICMP6_PACKET_TOO_BIG:
4647 type = ICMP_UNREACH;
4648 code = ICMP_UNREACH_NEEDFRAG;
4649 mtu -= 20;
4650 break;
4651 case ICMP6_TIME_EXCEEDED:
4652 type = ICMP_TIMXCEED;
4653 break;
4654 case ICMP6_PARAM_PROB:
4655 switch (code) {
4656 case ICMP6_PARAMPROB_HEADER:
4657 type = ICMP_PARAMPROB;
4658 code = ICMP_PARAMPROB_ERRATPTR;
4659 ptr = ntohl(icmp6->icmp6_pptr);
4660
4661 if (ptr == PTR_IP6(ip6_vfc)) {
4662 ; /* preserve */
4663 } else if (ptr == PTR_IP6(ip6_vfc) + 1) {
4664 ptr = PTR_IP(ip_tos);
4665 } else if (ptr == PTR_IP6(ip6_plen) ||
4666 ptr == PTR_IP6(ip6_plen) + 1) {
4667 ptr = PTR_IP(ip_len);
4668 } else if (ptr == PTR_IP6(ip6_nxt)) {
4669 ptr = PTR_IP(ip_p);
4670 } else if (ptr == PTR_IP6(ip6_hlim)) {
4671 ptr = PTR_IP(ip_ttl);
4672 } else if (ptr >= PTR_IP6(ip6_src) &&
4673 ptr < PTR_IP6(ip6_dst)) {
4674 ptr = PTR_IP(ip_src);
4675 } else if (ptr >= PTR_IP6(ip6_dst) &&
4676 ptr < (int32_t)sizeof(struct ip6_hdr)) {
4677 ptr = PTR_IP(ip_dst);
4678 } else {
4679 return -1;
4680 }
4681 break;
4682 case ICMP6_PARAMPROB_NEXTHEADER:
4683 type = ICMP_UNREACH;
4684 code = ICMP_UNREACH_PROTOCOL;
4685 break;
4686 default:
4687 return -1;
4688 }
4689 break;
4690 default:
4691 return -1;
4692 }
4693 icmp6->icmp6_type = type;
4694 icmp6->icmp6_code = code;
4695 /* aligns well with a icmpv4 nextmtu */
4696 icmp6->icmp6_mtu = htonl(mtu);
4697 /* icmpv4 pptr is a one most significant byte */
4698 if (ptr >= 0) {
4699 icmp6->icmp6_pptr = htonl(ptr << 24);
4700 }
4701 break;
4702
4703 case AF_INET6:
4704 icmp4 = (struct icmp* __single)arg;
4705 type = icmp4->icmp_type;
4706 code = icmp4->icmp_code;
4707 mtu = ntohs(icmp4->icmp_nextmtu);
4708
4709 switch (type) {
4710 case ICMP_ECHO:
4711 type = ICMP6_ECHO_REQUEST;
4712 break;
4713 case ICMP_ECHOREPLY:
4714 type = ICMP6_ECHO_REPLY;
4715 break;
4716 case ICMP_UNREACH:
4717 type = ICMP6_DST_UNREACH;
4718 switch (code) {
4719 case ICMP_UNREACH_NET:
4720 case ICMP_UNREACH_HOST:
4721 case ICMP_UNREACH_NET_UNKNOWN:
4722 case ICMP_UNREACH_HOST_UNKNOWN:
4723 case ICMP_UNREACH_ISOLATED:
4724 case ICMP_UNREACH_TOSNET:
4725 case ICMP_UNREACH_TOSHOST:
4726 code = ICMP6_DST_UNREACH_NOROUTE;
4727 break;
4728 case ICMP_UNREACH_PORT:
4729 code = ICMP6_DST_UNREACH_NOPORT;
4730 break;
4731 case ICMP_UNREACH_NET_PROHIB:
4732 case ICMP_UNREACH_HOST_PROHIB:
4733 case ICMP_UNREACH_FILTER_PROHIB:
4734 case ICMP_UNREACH_PRECEDENCE_CUTOFF:
4735 code = ICMP6_DST_UNREACH_ADMIN;
4736 break;
4737 case ICMP_UNREACH_PROTOCOL:
4738 type = ICMP6_PARAM_PROB;
4739 code = ICMP6_PARAMPROB_NEXTHEADER;
4740 ptr = offsetof(struct ip6_hdr, ip6_nxt);
4741 break;
4742 case ICMP_UNREACH_NEEDFRAG:
4743 type = ICMP6_PACKET_TOO_BIG;
4744 code = 0;
4745 mtu += 20;
4746 break;
4747 default:
4748 return -1;
4749 }
4750 break;
4751 case ICMP_TIMXCEED:
4752 type = ICMP6_TIME_EXCEEDED;
4753 break;
4754 case ICMP_PARAMPROB:
4755 type = ICMP6_PARAM_PROB;
4756 switch (code) {
4757 case ICMP_PARAMPROB_ERRATPTR:
4758 code = ICMP6_PARAMPROB_HEADER;
4759 break;
4760 case ICMP_PARAMPROB_LENGTH:
4761 code = ICMP6_PARAMPROB_HEADER;
4762 break;
4763 default:
4764 return -1;
4765 }
4766
4767 ptr = icmp4->icmp_pptr;
4768 if (ptr == 0 || ptr == PTR_IP(ip_tos)) {
4769 ; /* preserve */
4770 } else if (ptr == PTR_IP(ip_len) ||
4771 ptr == PTR_IP(ip_len) + 1) {
4772 ptr = PTR_IP6(ip6_plen);
4773 } else if (ptr == PTR_IP(ip_ttl)) {
4774 ptr = PTR_IP6(ip6_hlim);
4775 } else if (ptr == PTR_IP(ip_p)) {
4776 ptr = PTR_IP6(ip6_nxt);
4777 } else if (ptr >= PTR_IP(ip_src) &&
4778 ptr < PTR_IP(ip_dst)) {
4779 ptr = PTR_IP6(ip6_src);
4780 } else if (ptr >= PTR_IP(ip_dst) &&
4781 ptr < (int32_t)sizeof(struct ip)) {
4782 ptr = PTR_IP6(ip6_dst);
4783 } else {
4784 return -1;
4785 }
4786 break;
4787 default:
4788 return -1;
4789 }
4790 icmp4->icmp_type = type;
4791 icmp4->icmp_code = code;
4792 icmp4->icmp_nextmtu = htons(mtu);
4793 if (ptr >= 0) {
4794 icmp4->icmp_void = htonl(ptr);
4795 }
4796 break;
4797 }
4798
4799 return 0;
4800 }
4801
4802 /* Note: frees pbuf if PF_NAT64 is returned */
4803 static __attribute__((noinline)) int
pf_nat64_ipv6(pbuf_t * pbuf,int off,struct pf_pdesc * pd)4804 pf_nat64_ipv6(pbuf_t *pbuf, int off, struct pf_pdesc *pd)
4805 {
4806 struct ip *ip4;
4807 struct mbuf *m;
4808
4809 /*
4810 * ip_input asserts for rcvif to be not NULL
4811 * That may not be true for two corner cases
4812 * 1. If for some reason a local app sends DNS
4813 * AAAA query to local host
4814 * 2. If IPv6 stack in kernel internally generates a
4815 * message destined for a synthesized IPv6 end-point.
4816 */
4817 if (pbuf->pb_ifp == NULL) {
4818 return PF_DROP;
4819 }
4820
4821 ip4 = (struct ip *)pbuf_resize_segment(pbuf, 0, off, sizeof(*ip4));
4822 if (ip4 == NULL) {
4823 return PF_DROP;
4824 }
4825
4826 ip4->ip_v = 4;
4827 ip4->ip_hl = 5;
4828 ip4->ip_tos = pd->tos & htonl(0x0ff00000);
4829 ip4->ip_len = htons(sizeof(*ip4) + (pd->tot_len - off));
4830 ip4->ip_id = 0;
4831 ip4->ip_off = htons(IP_DF);
4832 ip4->ip_ttl = pd->ttl;
4833 ip4->ip_p = pd->proto;
4834 ip4->ip_sum = 0;
4835 ip4->ip_src = pd->naddr.v4addr;
4836 ip4->ip_dst = pd->ndaddr.v4addr;
4837 ip4->ip_sum = pbuf_inet_cksum(pbuf, 0, 0, ip4->ip_hl << 2);
4838
4839 /* recalculate icmp checksums */
4840 if (pd->proto == IPPROTO_ICMP) {
4841 struct icmp *icmp;
4842 int hlen = sizeof(*ip4);
4843
4844 icmp = (struct icmp *)pbuf_contig_segment(pbuf, hlen,
4845 ICMP_MINLEN);
4846 if (icmp == NULL) {
4847 return PF_DROP;
4848 }
4849
4850 icmp->icmp_cksum = 0;
4851 icmp->icmp_cksum = pbuf_inet_cksum(pbuf, 0, hlen,
4852 ntohs(ip4->ip_len) - hlen);
4853 }
4854
4855 if ((m = pbuf_to_mbuf(pbuf, TRUE)) != NULL) {
4856 ip_input(m);
4857 }
4858
4859 return PF_NAT64;
4860 }
4861
4862 static __attribute__((noinline)) int
pf_nat64_ipv4(pbuf_t * pbuf,int off,struct pf_pdesc * pd)4863 pf_nat64_ipv4(pbuf_t *pbuf, int off, struct pf_pdesc *pd)
4864 {
4865 struct ip6_hdr *ip6;
4866 struct mbuf *m;
4867
4868 if (pbuf->pb_ifp == NULL) {
4869 return PF_DROP;
4870 }
4871
4872 ip6 = (struct ip6_hdr *)pbuf_resize_segment(pbuf, 0, off, sizeof(*ip6));
4873 if (ip6 == NULL) {
4874 return PF_DROP;
4875 }
4876
4877 ip6->ip6_vfc = htonl((6 << 28) | (pd->tos << 20));
4878 ip6->ip6_plen = htons(pd->tot_len - off);
4879 ip6->ip6_nxt = pd->proto;
4880 ip6->ip6_hlim = pd->ttl;
4881 ip6->ip6_src = pd->naddr.v6addr;
4882 ip6->ip6_dst = pd->ndaddr.v6addr;
4883
4884 /* recalculate icmp6 checksums */
4885 if (pd->proto == IPPROTO_ICMPV6) {
4886 struct icmp6_hdr *icmp6;
4887 int hlen = sizeof(*ip6);
4888
4889 icmp6 = (struct icmp6_hdr *)pbuf_contig_segment(pbuf, hlen,
4890 sizeof(*icmp6));
4891 if (icmp6 == NULL) {
4892 return PF_DROP;
4893 }
4894
4895 icmp6->icmp6_cksum = 0;
4896 icmp6->icmp6_cksum = pbuf_inet6_cksum(pbuf,
4897 IPPROTO_ICMPV6, hlen,
4898 ntohs(ip6->ip6_plen));
4899 } else if (pd->proto == IPPROTO_UDP) {
4900 struct udphdr *uh;
4901 int hlen = sizeof(*ip6);
4902
4903 uh = (struct udphdr *)pbuf_contig_segment(pbuf, hlen,
4904 sizeof(*uh));
4905 if (uh == NULL) {
4906 return PF_DROP;
4907 }
4908
4909 if (uh->uh_sum == 0) {
4910 uh->uh_sum = pbuf_inet6_cksum(pbuf, IPPROTO_UDP,
4911 hlen, ntohs(ip6->ip6_plen));
4912 }
4913 }
4914
4915 if ((m = pbuf_to_mbuf(pbuf, TRUE)) != NULL) {
4916 ip6_input(m);
4917 }
4918
4919 return PF_NAT64;
4920 }
4921
4922 static __attribute__((noinline)) int
pf_test_rule(struct pf_rule ** rm,struct pf_state ** sm,int direction,struct pfi_kif * kif,pbuf_t * pbuf,int off,void * h,struct pf_pdesc * pd,struct pf_rule ** am,struct pf_ruleset ** rsm,struct ifqueue * ifq)4923 pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
4924 struct pfi_kif *kif, pbuf_t *pbuf, int off, void *h,
4925 struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
4926 struct ifqueue *ifq)
4927 {
4928 #pragma unused(h)
4929 struct pf_rule *__single nr = NULL;
4930 struct pf_addr *__single saddr = pd->src, *__single daddr = pd->dst;
4931 sa_family_t af = pd->af;
4932 struct pf_rule *__single r, *__single a = NULL;
4933 struct pf_ruleset *__single ruleset = NULL;
4934 struct pf_src_node *__single nsn = NULL;
4935 struct tcphdr *__single th = pf_pd_get_hdr_tcp(pd);
4936 struct udphdr *__single uh = pf_pd_get_hdr_udp(pd);
4937 u_short reason;
4938 int rewrite = 0, hdrlen = 0;
4939 int tag = -1;
4940 unsigned int rtableid = IFSCOPE_NONE;
4941 int asd = 0;
4942 int match = 0;
4943 int state_icmp = 0;
4944 u_int16_t mss = tcp_mssdflt;
4945 u_int8_t icmptype = 0, icmpcode = 0;
4946 #if SKYWALK
4947 struct ns_token *__single nstoken = NULL;
4948 #endif
4949
4950 struct pf_grev1_hdr *__single grev1 = pf_pd_get_hdr_grev1(pd);
4951 union pf_state_xport bxport, bdxport, nxport, sxport, dxport;
4952 struct pf_state_key psk;
4953
4954 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
4955
4956 PD_CLEAR_STATE_FLOWID(pd);
4957
4958 if (direction == PF_IN && pf_check_congestion(ifq)) {
4959 REASON_SET(&reason, PFRES_CONGEST);
4960 return PF_DROP;
4961 }
4962
4963 hdrlen = 0;
4964 sxport.spi = 0;
4965 dxport.spi = 0;
4966 nxport.spi = 0;
4967
4968 switch (pd->proto) {
4969 case IPPROTO_TCP:
4970 sxport.port = th->th_sport;
4971 dxport.port = th->th_dport;
4972 hdrlen = sizeof(*th);
4973 break;
4974 case IPPROTO_UDP:
4975 sxport.port = uh->uh_sport;
4976 dxport.port = uh->uh_dport;
4977 hdrlen = sizeof(*uh);
4978 break;
4979 #if INET
4980 case IPPROTO_ICMP:
4981 if (pd->af != AF_INET) {
4982 break;
4983 }
4984 sxport.port = dxport.port = pf_pd_get_hdr_icmp(pd)->icmp_id;
4985 hdrlen = ICMP_MINLEN;
4986 icmptype = pf_pd_get_hdr_icmp(pd)->icmp_type;
4987 icmpcode = pf_pd_get_hdr_icmp(pd)->icmp_code;
4988
4989 if (ICMP_ERRORTYPE(icmptype)) {
4990 state_icmp++;
4991 }
4992 break;
4993 #endif /* INET */
4994 case IPPROTO_ICMPV6:
4995 if (pd->af != AF_INET6) {
4996 break;
4997 }
4998 sxport.port = dxport.port = pf_pd_get_hdr_icmp6(pd)->icmp6_id;
4999 hdrlen = sizeof(*pf_pd_get_hdr_icmp6(pd));
5000 icmptype = pf_pd_get_hdr_icmp6(pd)->icmp6_type;
5001 icmpcode = pf_pd_get_hdr_icmp6(pd)->icmp6_code;
5002
5003 if (ICMP6_ERRORTYPE(icmptype)) {
5004 state_icmp++;
5005 }
5006 break;
5007 case IPPROTO_GRE:
5008 if (pd->proto_variant == PF_GRE_PPTP_VARIANT) {
5009 sxport.call_id = dxport.call_id =
5010 pf_pd_get_hdr_grev1(pd)->call_id;
5011 hdrlen = sizeof(*pf_pd_get_hdr_grev1(pd));
5012 }
5013 break;
5014 case IPPROTO_ESP:
5015 sxport.spi = 0;
5016 dxport.spi = pf_pd_get_hdr_esp(pd)->spi;
5017 hdrlen = sizeof(*pf_pd_get_hdr_esp(pd));
5018 break;
5019 }
5020
5021 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
5022
5023 bxport = sxport;
5024 bdxport = dxport;
5025
5026 if (direction == PF_OUT) {
5027 nxport = sxport;
5028 } else {
5029 nxport = dxport;
5030 }
5031
5032 /* check packet for BINAT/NAT/RDR */
5033 if ((nr = pf_get_translation_aux(pd, pbuf, off, direction, kif, &nsn,
5034 saddr, &sxport, daddr, &dxport, &nxport
5035 #if SKYWALK
5036 , &nstoken
5037 #endif
5038 )) != NULL) {
5039 int ua;
5040 u_int16_t dport;
5041
5042 if (pd->af != pd->naf) {
5043 ua = 0;
5044 } else {
5045 ua = 1;
5046 }
5047
5048 PF_ACPY(&pd->baddr, saddr, af);
5049 PF_ACPY(&pd->bdaddr, daddr, af);
5050
5051 switch (pd->proto) {
5052 case IPPROTO_TCP:
5053 if (pd->af != pd->naf ||
5054 PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5055 pf_change_ap(direction, pd->mp, saddr,
5056 &th->th_sport, pd->ip_sum, &th->th_sum,
5057 &pd->naddr, nxport.port, 0, af,
5058 pd->naf, ua);
5059 sxport.port = th->th_sport;
5060 }
5061
5062 if (pd->af != pd->naf ||
5063 PF_ANEQ(daddr, &pd->ndaddr, pd->af) ||
5064 (nr && (nr->action == PF_RDR) &&
5065 (th->th_dport != nxport.port))) {
5066 if (nr && nr->action == PF_RDR) {
5067 dport = nxport.port;
5068 } else {
5069 dport = th->th_dport;
5070 }
5071 pf_change_ap(direction, pd->mp, daddr,
5072 &th->th_dport, pd->ip_sum,
5073 &th->th_sum, &pd->ndaddr,
5074 dport, 0, af, pd->naf, ua);
5075 dxport.port = th->th_dport;
5076 }
5077 rewrite++;
5078 break;
5079
5080 case IPPROTO_UDP:
5081 if (pd->af != pd->naf ||
5082 PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5083 pf_change_ap(direction, pd->mp, saddr,
5084 &uh->uh_sport, pd->ip_sum,
5085 &uh->uh_sum, &pd->naddr,
5086 nxport.port, 1, af, pd->naf, ua);
5087 sxport.port = uh->uh_sport;
5088 }
5089
5090 if (pd->af != pd->naf ||
5091 PF_ANEQ(daddr, &pd->ndaddr, pd->af) ||
5092 (nr && (nr->action == PF_RDR) &&
5093 (uh->uh_dport != nxport.port))) {
5094 if (nr && nr->action == PF_RDR) {
5095 dport = nxport.port;
5096 } else {
5097 dport = uh->uh_dport;
5098 }
5099 pf_change_ap(direction, pd->mp, daddr,
5100 &uh->uh_dport, pd->ip_sum,
5101 &uh->uh_sum, &pd->ndaddr,
5102 dport, 0, af, pd->naf, ua);
5103 dxport.port = uh->uh_dport;
5104 }
5105 rewrite++;
5106 break;
5107 #if INET
5108 case IPPROTO_ICMP:
5109 if (pd->af != AF_INET) {
5110 break;
5111 }
5112 /*
5113 * TODO:
5114 * pd->af != pd->naf not handled yet here and would be
5115 * needed for NAT46 needed to support XLAT.
5116 * Will cross the bridge when it comes.
5117 */
5118 if (PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5119 pf_change_a(&saddr->v4addr.s_addr, pd->ip_sum,
5120 pd->naddr.v4addr.s_addr, 0);
5121 pf_pd_get_hdr_icmp(pd)->icmp_cksum = pf_cksum_fixup(
5122 pf_pd_get_hdr_icmp(pd)->icmp_cksum, sxport.port,
5123 nxport.port, 0);
5124 pf_pd_get_hdr_icmp(pd)->icmp_id = nxport.port;
5125 }
5126
5127 if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
5128 pf_change_a(&daddr->v4addr.s_addr, pd->ip_sum,
5129 pd->ndaddr.v4addr.s_addr, 0);
5130 }
5131 ++rewrite;
5132 break;
5133 #endif /* INET */
5134 case IPPROTO_ICMPV6:
5135 if (pd->af != AF_INET6) {
5136 break;
5137 }
5138
5139 if (pd->af != pd->naf ||
5140 PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5141 pf_change_addr(saddr,
5142 &pf_pd_get_hdr_icmp6(pd)->icmp6_cksum,
5143 &pd->naddr, 0, pd->af, pd->naf);
5144 }
5145
5146 if (pd->af != pd->naf ||
5147 PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
5148 pf_change_addr(daddr,
5149 &pf_pd_get_hdr_icmp6(pd)->icmp6_cksum,
5150 &pd->ndaddr, 0, pd->af, pd->naf);
5151 }
5152
5153 if (pd->af != pd->naf) {
5154 if (pf_translate_icmp_af(AF_INET,
5155 pf_pd_get_hdr_icmp6(pd))) {
5156 return PF_DROP;
5157 }
5158 pd->proto = IPPROTO_ICMP;
5159 }
5160 rewrite++;
5161 break;
5162 case IPPROTO_GRE:
5163 if ((direction == PF_IN) &&
5164 (pd->proto_variant == PF_GRE_PPTP_VARIANT)) {
5165 grev1->call_id = nxport.call_id;
5166 }
5167
5168 switch (pd->af) {
5169 #if INET
5170 case AF_INET:
5171 if (PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5172 pf_change_a(&saddr->v4addr.s_addr,
5173 pd->ip_sum,
5174 pd->naddr.v4addr.s_addr, 0);
5175 }
5176 if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
5177 pf_change_a(&daddr->v4addr.s_addr,
5178 pd->ip_sum,
5179 pd->ndaddr.v4addr.s_addr, 0);
5180 }
5181 break;
5182 #endif /* INET */
5183 case AF_INET6:
5184 if (PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5185 PF_ACPY(saddr, &pd->naddr, AF_INET6);
5186 }
5187 if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
5188 PF_ACPY(daddr, &pd->ndaddr, AF_INET6);
5189 }
5190 break;
5191 }
5192 ++rewrite;
5193 break;
5194 case IPPROTO_ESP:
5195 if (direction == PF_OUT) {
5196 bxport.spi = 0;
5197 }
5198
5199 switch (pd->af) {
5200 #if INET
5201 case AF_INET:
5202 if (PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5203 pf_change_a(&saddr->v4addr.s_addr,
5204 pd->ip_sum, pd->naddr.v4addr.s_addr, 0);
5205 }
5206 if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
5207 pf_change_a(&daddr->v4addr.s_addr,
5208 pd->ip_sum,
5209 pd->ndaddr.v4addr.s_addr, 0);
5210 }
5211 break;
5212 #endif /* INET */
5213 case AF_INET6:
5214 if (PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5215 PF_ACPY(saddr, &pd->naddr, AF_INET6);
5216 }
5217 if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
5218 PF_ACPY(daddr, &pd->ndaddr, AF_INET6);
5219 }
5220 break;
5221 }
5222 break;
5223 default:
5224 switch (pd->af) {
5225 #if INET
5226 case AF_INET:
5227 if ((pd->naf != AF_INET) ||
5228 (PF_ANEQ(saddr, &pd->naddr, pd->af))) {
5229 pf_change_addr(saddr, pd->ip_sum,
5230 &pd->naddr, 0, af, pd->naf);
5231 }
5232
5233 if ((pd->naf != AF_INET) ||
5234 (PF_ANEQ(daddr, &pd->ndaddr, pd->af))) {
5235 pf_change_addr(daddr, pd->ip_sum,
5236 &pd->ndaddr, 0, af, pd->naf);
5237 }
5238 break;
5239 #endif /* INET */
5240 case AF_INET6:
5241 if (PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5242 PF_ACPY(saddr, &pd->naddr, af);
5243 }
5244 if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
5245 PF_ACPY(daddr, &pd->ndaddr, af);
5246 }
5247 break;
5248 }
5249 break;
5250 }
5251
5252 if (nr->natpass) {
5253 r = NULL;
5254 }
5255 pd->nat_rule = nr;
5256 pd->af = pd->naf;
5257 } else {
5258 #if SKYWALK
5259 VERIFY(!NETNS_TOKEN_VALID(&nstoken));
5260 #endif
5261 }
5262
5263 if (nr && nr->tag > 0) {
5264 tag = nr->tag;
5265 }
5266
5267 while (r != NULL) {
5268 r->evaluations++;
5269 if (pfi_kif_match(r->kif, kif) == r->ifnot) {
5270 r = r->skip[PF_SKIP_IFP].ptr;
5271 } else if (r->direction && r->direction != direction) {
5272 r = r->skip[PF_SKIP_DIR].ptr;
5273 } else if (r->af && r->af != pd->af) {
5274 r = r->skip[PF_SKIP_AF].ptr;
5275 } else if (r->proto && r->proto != pd->proto) {
5276 r = r->skip[PF_SKIP_PROTO].ptr;
5277 } else if (PF_MISMATCHAW(&r->src.addr, saddr, pd->af,
5278 r->src.neg, kif)) {
5279 r = r->skip[PF_SKIP_SRC_ADDR].ptr;
5280 }
5281 /* tcp/udp only. port_op always 0 in other cases */
5282 else if (r->proto == pd->proto &&
5283 (r->proto == IPPROTO_TCP || r->proto == IPPROTO_UDP) &&
5284 r->src.xport.range.op &&
5285 !pf_match_port(r->src.xport.range.op,
5286 r->src.xport.range.port[0], r->src.xport.range.port[1],
5287 th->th_sport)) {
5288 r = r->skip[PF_SKIP_SRC_PORT].ptr;
5289 } else if (PF_MISMATCHAW(&r->dst.addr, daddr, pd->af,
5290 r->dst.neg, NULL)) {
5291 r = r->skip[PF_SKIP_DST_ADDR].ptr;
5292 }
5293 /* tcp/udp only. port_op always 0 in other cases */
5294 else if (r->proto == pd->proto &&
5295 (r->proto == IPPROTO_TCP || r->proto == IPPROTO_UDP) &&
5296 r->dst.xport.range.op &&
5297 !pf_match_port(r->dst.xport.range.op,
5298 r->dst.xport.range.port[0], r->dst.xport.range.port[1],
5299 th->th_dport)) {
5300 r = r->skip[PF_SKIP_DST_PORT].ptr;
5301 }
5302 /* icmp only. type always 0 in other cases */
5303 else if (r->type && r->type != icmptype + 1) {
5304 r = TAILQ_NEXT(r, entries);
5305 }
5306 /* icmp only. type always 0 in other cases */
5307 else if (r->code && r->code != icmpcode + 1) {
5308 r = TAILQ_NEXT(r, entries);
5309 } else if ((r->rule_flag & PFRULE_TOS) && r->tos &&
5310 !(r->tos & pd->tos)) {
5311 r = TAILQ_NEXT(r, entries);
5312 } else if ((r->rule_flag & PFRULE_DSCP) && r->tos &&
5313 !(r->tos & (pd->tos & DSCP_MASK))) {
5314 r = TAILQ_NEXT(r, entries);
5315 } else if ((r->rule_flag & PFRULE_SC) && r->tos &&
5316 ((r->tos & SCIDX_MASK) != pd->sc)) {
5317 r = TAILQ_NEXT(r, entries);
5318 } else if (r->rule_flag & PFRULE_FRAGMENT) {
5319 r = TAILQ_NEXT(r, entries);
5320 } else if (pd->proto == IPPROTO_TCP &&
5321 (r->flagset & th->th_flags) != r->flags) {
5322 r = TAILQ_NEXT(r, entries);
5323 }
5324 /* tcp/udp only. uid.op always 0 in other cases */
5325 else if (r->uid.op && (pd->lookup.done || ((void)(pd->lookup.done =
5326 pf_socket_lookup(direction, pd)), 1)) &&
5327 !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
5328 pd->lookup.uid)) {
5329 r = TAILQ_NEXT(r, entries);
5330 }
5331 /* tcp/udp only. gid.op always 0 in other cases */
5332 else if (r->gid.op && (pd->lookup.done || ((void)(pd->lookup.done =
5333 pf_socket_lookup(direction, pd)), 1)) &&
5334 !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
5335 pd->lookup.gid)) {
5336 r = TAILQ_NEXT(r, entries);
5337 } else if (r->prob && r->prob <= (RandomULong() % (UINT_MAX - 1) + 1)) {
5338 r = TAILQ_NEXT(r, entries);
5339 } else if (r->match_tag && !pf_match_tag(r, pd->pf_mtag, &tag)) {
5340 r = TAILQ_NEXT(r, entries);
5341 } else if (r->os_fingerprint != PF_OSFP_ANY &&
5342 (pd->proto != IPPROTO_TCP || !pf_osfp_match(
5343 pf_osfp_fingerprint(pd, pbuf, off, th),
5344 r->os_fingerprint))) {
5345 r = TAILQ_NEXT(r, entries);
5346 } else {
5347 if (r->tag) {
5348 tag = r->tag;
5349 }
5350 if (PF_RTABLEID_IS_VALID(r->rtableid)) {
5351 rtableid = r->rtableid;
5352 }
5353 if (r->anchor == NULL) {
5354 match = 1;
5355 *rm = r;
5356 *am = a;
5357 *rsm = ruleset;
5358 if ((*rm)->quick) {
5359 break;
5360 }
5361 r = TAILQ_NEXT(r, entries);
5362 } else {
5363 pf_step_into_anchor(&asd, &ruleset,
5364 PF_RULESET_FILTER, &r, &a, &match);
5365 }
5366 }
5367 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
5368 PF_RULESET_FILTER, &r, &a, &match)) {
5369 break;
5370 }
5371 }
5372 r = *rm;
5373 a = *am;
5374 ruleset = *rsm;
5375
5376 REASON_SET(&reason, PFRES_MATCH);
5377
5378 if (r->log || (nr != NULL && nr->log)) {
5379 if (rewrite > 0) {
5380 if (rewrite < off + pd->hdrlen) {
5381 rewrite = off + pd->hdrlen;
5382 }
5383
5384 if (pf_lazy_makewritable(pd, pbuf, rewrite) == NULL) {
5385 REASON_SET(&reason, PFRES_MEMORY);
5386 #if SKYWALK
5387 netns_release(&nstoken);
5388 #endif
5389 return PF_DROP;
5390 }
5391 pbuf_copy_back(pbuf, off, pd->hdrlen, pf_pd_get_hdr_ptr_any(pd), pd->hdrlen);
5392 }
5393 PFLOG_PACKET(kif, h, pbuf, pd->af, direction, reason,
5394 r->log ? r : nr, a, ruleset, pd);
5395 }
5396
5397 if ((r->action == PF_DROP) &&
5398 ((r->rule_flag & PFRULE_RETURNRST) ||
5399 (r->rule_flag & PFRULE_RETURNICMP) ||
5400 (r->rule_flag & PFRULE_RETURN))) {
5401 /* undo NAT changes, if they have taken place */
5402 /* XXX For NAT64 we are not reverting the changes */
5403 if (nr != NULL && nr->action != PF_NAT64) {
5404 if (direction == PF_OUT) {
5405 pd->af = af;
5406 switch (pd->proto) {
5407 case IPPROTO_TCP:
5408 pf_change_ap(direction, pd->mp, saddr,
5409 &th->th_sport, pd->ip_sum,
5410 &th->th_sum, &pd->baddr,
5411 bxport.port, 0, af, pd->af, 1);
5412 sxport.port = th->th_sport;
5413 rewrite++;
5414 break;
5415 case IPPROTO_UDP:
5416 pf_change_ap(direction, pd->mp, saddr,
5417 &pf_pd_get_hdr_udp(pd)->uh_sport, pd->ip_sum,
5418 &pf_pd_get_hdr_udp(pd)->uh_sum, &pd->baddr,
5419 bxport.port, 1, af, pd->af, 1);
5420 sxport.port = pf_pd_get_hdr_udp(pd)->uh_sport;
5421 rewrite++;
5422 break;
5423 case IPPROTO_ICMP:
5424 case IPPROTO_ICMPV6:
5425 /* nothing! */
5426 break;
5427 case IPPROTO_GRE:
5428 PF_ACPY(&pd->baddr, saddr, af);
5429 ++rewrite;
5430 switch (af) {
5431 #if INET
5432 case AF_INET:
5433 pf_change_a(&saddr->v4addr.s_addr,
5434 pd->ip_sum,
5435 pd->baddr.v4addr.s_addr, 0);
5436 break;
5437 #endif /* INET */
5438 case AF_INET6:
5439 PF_ACPY(saddr, &pd->baddr,
5440 AF_INET6);
5441 break;
5442 }
5443 break;
5444 case IPPROTO_ESP:
5445 PF_ACPY(&pd->baddr, saddr, af);
5446 switch (af) {
5447 #if INET
5448 case AF_INET:
5449 pf_change_a(&saddr->v4addr.s_addr,
5450 pd->ip_sum,
5451 pd->baddr.v4addr.s_addr, 0);
5452 break;
5453 #endif /* INET */
5454 case AF_INET6:
5455 PF_ACPY(saddr, &pd->baddr,
5456 AF_INET6);
5457 break;
5458 }
5459 break;
5460 default:
5461 switch (af) {
5462 case AF_INET:
5463 pf_change_a(&saddr->v4addr.s_addr,
5464 pd->ip_sum,
5465 pd->baddr.v4addr.s_addr, 0);
5466 break;
5467 case AF_INET6:
5468 PF_ACPY(saddr, &pd->baddr, af);
5469 break;
5470 }
5471 }
5472 } else {
5473 switch (pd->proto) {
5474 case IPPROTO_TCP:
5475 pf_change_ap(direction, pd->mp, daddr,
5476 &th->th_dport, pd->ip_sum,
5477 &th->th_sum, &pd->bdaddr,
5478 bdxport.port, 0, af, pd->af, 1);
5479 dxport.port = th->th_dport;
5480 rewrite++;
5481 break;
5482 case IPPROTO_UDP:
5483 pf_change_ap(direction, pd->mp, daddr,
5484 &pf_pd_get_hdr_udp(pd)->uh_dport, pd->ip_sum,
5485 &pf_pd_get_hdr_udp(pd)->uh_sum, &pd->bdaddr,
5486 bdxport.port, 1, af, pd->af, 1);
5487 dxport.port = pf_pd_get_hdr_udp(pd)->uh_dport;
5488 rewrite++;
5489 break;
5490 case IPPROTO_ICMP:
5491 case IPPROTO_ICMPV6:
5492 /* nothing! */
5493 break;
5494 case IPPROTO_GRE:
5495 if (pd->proto_variant ==
5496 PF_GRE_PPTP_VARIANT) {
5497 grev1->call_id =
5498 bdxport.call_id;
5499 }
5500 ++rewrite;
5501 switch (af) {
5502 #if INET
5503 case AF_INET:
5504 pf_change_a(&daddr->v4addr.s_addr,
5505 pd->ip_sum,
5506 pd->bdaddr.v4addr.s_addr, 0);
5507 break;
5508 #endif /* INET */
5509 case AF_INET6:
5510 PF_ACPY(daddr, &pd->bdaddr,
5511 AF_INET6);
5512 break;
5513 }
5514 break;
5515 case IPPROTO_ESP:
5516 switch (af) {
5517 #if INET
5518 case AF_INET:
5519 pf_change_a(&daddr->v4addr.s_addr,
5520 pd->ip_sum,
5521 pd->bdaddr.v4addr.s_addr, 0);
5522 break;
5523 #endif /* INET */
5524 case AF_INET6:
5525 PF_ACPY(daddr, &pd->bdaddr,
5526 AF_INET6);
5527 break;
5528 }
5529 break;
5530 default:
5531 switch (af) {
5532 case AF_INET:
5533 pf_change_a(&daddr->v4addr.s_addr,
5534 pd->ip_sum,
5535 pd->bdaddr.v4addr.s_addr, 0);
5536 break;
5537 case AF_INET6:
5538 PF_ACPY(daddr, &pd->bdaddr, af);
5539 break;
5540 }
5541 }
5542 }
5543 }
5544 if (pd->proto == IPPROTO_TCP &&
5545 ((r->rule_flag & PFRULE_RETURNRST) ||
5546 (r->rule_flag & PFRULE_RETURN)) &&
5547 !(th->th_flags & TH_RST)) {
5548 u_int32_t ack = ntohl(th->th_seq) + pd->p_len;
5549 int len = 0;
5550 struct ip *__single h4;
5551 struct ip6_hdr *__single h6;
5552
5553 switch (pd->af) {
5554 case AF_INET:
5555 h4 = pbuf->pb_data;
5556 len = ntohs(h4->ip_len) - off;
5557 break;
5558 case AF_INET6:
5559 h6 = pbuf->pb_data;
5560 len = ntohs(h6->ip6_plen) -
5561 (off - sizeof(*h6));
5562 break;
5563 }
5564
5565 if (pf_check_proto_cksum(pbuf, off, len, IPPROTO_TCP,
5566 pd->af)) {
5567 REASON_SET(&reason, PFRES_PROTCKSUM);
5568 } else {
5569 if (th->th_flags & TH_SYN) {
5570 ack++;
5571 }
5572 if (th->th_flags & TH_FIN) {
5573 ack++;
5574 }
5575 pf_send_tcp(r, pd->af, pd->dst,
5576 pd->src, th->th_dport, th->th_sport,
5577 ntohl(th->th_ack), ack, TH_RST | TH_ACK, 0, 0,
5578 r->return_ttl, 1, 0, pd->eh, kif->pfik_ifp);
5579 }
5580 } else if (pd->proto != IPPROTO_ICMP && pd->af == AF_INET &&
5581 pd->proto != IPPROTO_ESP && pd->proto != IPPROTO_AH &&
5582 r->return_icmp) {
5583 pf_send_icmp(pbuf, r->return_icmp >> 8,
5584 r->return_icmp & 255, pd->af, r);
5585 } else if (pd->proto != IPPROTO_ICMPV6 && af == AF_INET6 &&
5586 pd->proto != IPPROTO_ESP && pd->proto != IPPROTO_AH &&
5587 r->return_icmp6) {
5588 pf_send_icmp(pbuf, r->return_icmp6 >> 8,
5589 r->return_icmp6 & 255, pd->af, r);
5590 }
5591 }
5592
5593 if (r->action == PF_DROP) {
5594 #if SKYWALK
5595 netns_release(&nstoken);
5596 #endif
5597 return PF_DROP;
5598 }
5599
5600 /* prepare state key, for flowhash and/or the state (if created) */
5601 bzero(&psk, sizeof(psk));
5602 psk.proto = pd->proto;
5603 psk.direction = direction;
5604 if (pd->proto == IPPROTO_UDP) {
5605 if (ntohs(pf_pd_get_hdr_udp(pd)->uh_sport) == PF_IKE_PORT &&
5606 ntohs(pf_pd_get_hdr_udp(pd)->uh_dport) == PF_IKE_PORT) {
5607 psk.proto_variant = PF_EXTFILTER_APD;
5608 } else {
5609 psk.proto_variant = nr ? nr->extfilter : r->extfilter;
5610 if (psk.proto_variant < PF_EXTFILTER_APD) {
5611 psk.proto_variant = PF_EXTFILTER_APD;
5612 }
5613 }
5614 } else if (pd->proto == IPPROTO_GRE) {
5615 psk.proto_variant = pd->proto_variant;
5616 }
5617 if (direction == PF_OUT) {
5618 psk.af_gwy = af;
5619 PF_ACPY(&psk.gwy.addr, saddr, af);
5620 PF_ACPY(&psk.ext_gwy.addr, daddr, af);
5621 switch (pd->proto) {
5622 case IPPROTO_ESP:
5623 psk.gwy.xport.spi = 0;
5624 psk.ext_gwy.xport.spi = pf_pd_get_hdr_esp(pd)->spi;
5625 break;
5626 case IPPROTO_ICMP:
5627 case IPPROTO_ICMPV6:
5628 /*
5629 * NAT64 requires protocol translation between ICMPv4
5630 * and ICMPv6. TCP and UDP do not require protocol
5631 * translation. To avoid adding complexity just to
5632 * handle ICMP(v4addr/v6addr), we always lookup for
5633 * proto = IPPROTO_ICMP on both LAN and WAN side
5634 */
5635 psk.proto = IPPROTO_ICMP;
5636 psk.gwy.xport.port = nxport.port;
5637 psk.ext_gwy.xport.spi = 0;
5638 break;
5639 default:
5640 psk.gwy.xport = sxport;
5641 psk.ext_gwy.xport = dxport;
5642 break;
5643 }
5644 psk.af_lan = af;
5645 if (nr != NULL) {
5646 PF_ACPY(&psk.lan.addr, &pd->baddr, af);
5647 psk.lan.xport = bxport;
5648 PF_ACPY(&psk.ext_lan.addr, &pd->bdaddr, af);
5649 psk.ext_lan.xport = bdxport;
5650 } else {
5651 PF_ACPY(&psk.lan.addr, &psk.gwy.addr, af);
5652 psk.lan.xport = psk.gwy.xport;
5653 PF_ACPY(&psk.ext_lan.addr, &psk.ext_gwy.addr, af);
5654 psk.ext_lan.xport = psk.ext_gwy.xport;
5655 }
5656 } else {
5657 psk.af_lan = af;
5658 if (nr && nr->action == PF_NAT64) {
5659 PF_ACPY(&psk.lan.addr, &pd->baddr, af);
5660 PF_ACPY(&psk.ext_lan.addr, &pd->bdaddr, af);
5661 } else {
5662 PF_ACPY(&psk.lan.addr, daddr, af);
5663 PF_ACPY(&psk.ext_lan.addr, saddr, af);
5664 }
5665 switch (pd->proto) {
5666 case IPPROTO_ICMP:
5667 case IPPROTO_ICMPV6:
5668 /*
5669 * NAT64 requires protocol translation between ICMPv4
5670 * and ICMPv6. TCP and UDP do not require protocol
5671 * translation. To avoid adding complexity just to
5672 * handle ICMP(v4addr/v6addr), we always lookup for
5673 * proto = IPPROTO_ICMP on both LAN and WAN side
5674 */
5675 psk.proto = IPPROTO_ICMP;
5676 if (nr && nr->action == PF_NAT64) {
5677 psk.lan.xport = bxport;
5678 psk.ext_lan.xport = bxport;
5679 } else {
5680 psk.lan.xport = nxport;
5681 psk.ext_lan.xport.spi = 0;
5682 }
5683 break;
5684 case IPPROTO_ESP:
5685 psk.ext_lan.xport.spi = 0;
5686 psk.lan.xport.spi = pf_pd_get_hdr_esp(pd)->spi;
5687 break;
5688 default:
5689 if (nr != NULL) {
5690 if (nr->action == PF_NAT64) {
5691 psk.lan.xport = bxport;
5692 psk.ext_lan.xport = bdxport;
5693 } else {
5694 psk.lan.xport = dxport;
5695 psk.ext_lan.xport = sxport;
5696 }
5697 } else {
5698 psk.lan.xport = dxport;
5699 psk.ext_lan.xport = sxport;
5700 }
5701 break;
5702 }
5703 psk.af_gwy = pd->naf;
5704 if (nr != NULL) {
5705 if (nr->action == PF_NAT64) {
5706 PF_ACPY(&psk.gwy.addr, &pd->naddr, pd->naf);
5707 PF_ACPY(&psk.ext_gwy.addr, &pd->ndaddr,
5708 pd->naf);
5709 if ((pd->proto == IPPROTO_ICMPV6) ||
5710 (pd->proto == IPPROTO_ICMP)) {
5711 psk.gwy.xport = nxport;
5712 psk.ext_gwy.xport = nxport;
5713 } else {
5714 psk.gwy.xport = sxport;
5715 psk.ext_gwy.xport = dxport;
5716 }
5717 } else {
5718 PF_ACPY(&psk.gwy.addr, &pd->bdaddr, af);
5719 psk.gwy.xport = bdxport;
5720 PF_ACPY(&psk.ext_gwy.addr, saddr, af);
5721 psk.ext_gwy.xport = sxport;
5722 }
5723 } else {
5724 PF_ACPY(&psk.gwy.addr, &psk.lan.addr, af);
5725 psk.gwy.xport = psk.lan.xport;
5726 PF_ACPY(&psk.ext_gwy.addr, &psk.ext_lan.addr, af);
5727 psk.ext_gwy.xport = psk.ext_lan.xport;
5728 }
5729 }
5730 if (pd->pktflags & PKTF_FLOW_ID) {
5731 /* flow hash was already computed outside of PF */
5732 psk.flowsrc = pd->flowsrc;
5733 psk.flowhash = pd->flowhash;
5734 } else {
5735 /*
5736 * Allocation of flow identifier is deferred until a PF state
5737 * creation is needed for this flow.
5738 */
5739 pd->pktflags &= ~PKTF_FLOW_ADV;
5740 pd->flowhash = 0;
5741 }
5742
5743 if (__improbable(pf_tag_packet(pbuf, pd->pf_mtag, tag, rtableid, pd))) {
5744 REASON_SET(&reason, PFRES_MEMORY);
5745 #if SKYWALK
5746 netns_release(&nstoken);
5747 #endif
5748 return PF_DROP;
5749 }
5750
5751 if (!state_icmp && (r->keep_state || nr != NULL ||
5752 (pd->flags & PFDESC_TCP_NORM))) {
5753 /* create new state */
5754 struct pf_state *__single s = NULL;
5755 struct pf_state_key *__single sk = NULL;
5756 struct pf_src_node *__single sn = NULL;
5757 struct pf_ike_hdr ike;
5758
5759 if (pd->proto == IPPROTO_UDP) {
5760 size_t plen = pbuf->pb_packet_len - off - sizeof(*uh);
5761
5762 if (ntohs(uh->uh_sport) == PF_IKE_PORT &&
5763 ntohs(uh->uh_dport) == PF_IKE_PORT &&
5764 plen >= PF_IKE_PACKET_MINSIZE) {
5765 if (plen > PF_IKE_PACKET_MINSIZE) {
5766 plen = PF_IKE_PACKET_MINSIZE;
5767 }
5768 pbuf_copy_data(pbuf, off + sizeof(*uh), plen,
5769 &ike, sizeof(ike));
5770 }
5771 }
5772
5773 if (nr != NULL && pd->proto == IPPROTO_ESP &&
5774 direction == PF_OUT) {
5775 struct pf_state_key_cmp sk0;
5776 struct pf_state *s0;
5777
5778 /*
5779 * <[email protected]>
5780 * This squelches state creation if the external
5781 * address matches an existing incomplete state with a
5782 * different internal address. Only one 'blocking'
5783 * partial state is allowed for each external address.
5784 */
5785 #if SKYWALK
5786 /*
5787 * XXXSCW:
5788 *
5789 * It's not clear how this impacts netns. The original
5790 * state will hold the port reservation token but what
5791 * happens to other "Cone NAT" states when the first is
5792 * torn down?
5793 */
5794 #endif
5795 memset(&sk0, 0, sizeof(sk0));
5796 sk0.af_gwy = pd->af;
5797 sk0.proto = IPPROTO_ESP;
5798 PF_ACPY(&sk0.gwy.addr, saddr, sk0.af_gwy);
5799 PF_ACPY(&sk0.ext_gwy.addr, daddr, sk0.af_gwy);
5800 s0 = pf_find_state(kif, &sk0, PF_IN);
5801
5802 if (s0 && PF_ANEQ(&s0->state_key->lan.addr,
5803 pd->src, pd->af)) {
5804 nsn = 0;
5805 goto cleanup;
5806 }
5807 }
5808
5809 /* check maximums */
5810 if (r->max_states && (r->states >= r->max_states)) {
5811 pf_status.lcounters[LCNT_STATES]++;
5812 REASON_SET(&reason, PFRES_MAXSTATES);
5813 goto cleanup;
5814 }
5815 /* src node for filter rule */
5816 if ((r->rule_flag & PFRULE_SRCTRACK ||
5817 r->rpool.opts & PF_POOL_STICKYADDR) &&
5818 pf_insert_src_node(&sn, r, saddr, af) != 0) {
5819 REASON_SET(&reason, PFRES_SRCLIMIT);
5820 goto cleanup;
5821 }
5822 /* src node for translation rule */
5823 if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
5824 ((direction == PF_OUT &&
5825 nr->action != PF_RDR &&
5826 pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
5827 (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) {
5828 REASON_SET(&reason, PFRES_SRCLIMIT);
5829 goto cleanup;
5830 }
5831 s = pool_get(&pf_state_pl, PR_WAITOK);
5832 if (s == NULL) {
5833 REASON_SET(&reason, PFRES_MEMORY);
5834 cleanup:
5835 if (sn != NULL && sn->states == 0 && sn->expire == 0) {
5836 RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
5837 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
5838 pf_status.src_nodes--;
5839 pool_put(&pf_src_tree_pl, sn);
5840 }
5841 if (nsn != sn && nsn != NULL && nsn->states == 0 &&
5842 nsn->expire == 0) {
5843 RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
5844 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
5845 pf_status.src_nodes--;
5846 pool_put(&pf_src_tree_pl, nsn);
5847 }
5848 if (s != NULL) {
5849 pf_detach_state(s, 0);
5850 } else if (sk != NULL) {
5851 if (sk->app_state) {
5852 pool_put(&pf_app_state_pl,
5853 sk->app_state);
5854 }
5855 pf_state_key_release_flowid(sk);
5856 pool_put(&pf_state_key_pl, sk);
5857 }
5858 #if SKYWALK
5859 netns_release(&nstoken);
5860 #endif
5861 return PF_DROP;
5862 }
5863 bzero(s, sizeof(*s));
5864 TAILQ_INIT(&s->unlink_hooks);
5865 s->rule.ptr = r;
5866 s->nat_rule.ptr = nr;
5867 s->anchor.ptr = a;
5868 STATE_INC_COUNTERS(s);
5869 s->allow_opts = r->allow_opts;
5870 s->log = r->log & PF_LOG_ALL;
5871 if (nr != NULL) {
5872 s->log |= nr->log & PF_LOG_ALL;
5873 }
5874 switch (pd->proto) {
5875 case IPPROTO_TCP:
5876 s->src.seqlo = ntohl(th->th_seq);
5877 s->src.seqhi = s->src.seqlo + pd->p_len + 1;
5878 if ((th->th_flags & (TH_SYN | TH_ACK)) ==
5879 TH_SYN && r->keep_state == PF_STATE_MODULATE) {
5880 /* Generate sequence number modulator */
5881 if ((s->src.seqdiff = pf_tcp_iss(pd) -
5882 s->src.seqlo) == 0) {
5883 s->src.seqdiff = 1;
5884 }
5885 pf_change_a(&th->th_seq, &th->th_sum,
5886 htonl(s->src.seqlo + s->src.seqdiff), 0);
5887 rewrite = off + sizeof(*th);
5888 } else {
5889 s->src.seqdiff = 0;
5890 }
5891 if (th->th_flags & TH_SYN) {
5892 s->src.seqhi++;
5893 s->src.wscale = pf_get_wscale(pbuf, off,
5894 th->th_off, af);
5895 }
5896 s->src.max_win = MAX(ntohs(th->th_win), 1);
5897 if (s->src.wscale & PF_WSCALE_MASK) {
5898 /* Remove scale factor from initial window */
5899 int win = s->src.max_win;
5900 win += 1 << (s->src.wscale & PF_WSCALE_MASK);
5901 s->src.max_win = (win - 1) >>
5902 (s->src.wscale & PF_WSCALE_MASK);
5903 }
5904 if (th->th_flags & TH_FIN) {
5905 s->src.seqhi++;
5906 }
5907 s->dst.seqhi = 1;
5908 s->dst.max_win = 1;
5909 s->src.state = TCPS_SYN_SENT;
5910 s->dst.state = TCPS_CLOSED;
5911 s->timeout = PFTM_TCP_FIRST_PACKET;
5912 break;
5913 case IPPROTO_UDP:
5914 s->src.state = PFUDPS_SINGLE;
5915 s->dst.state = PFUDPS_NO_TRAFFIC;
5916 s->timeout = PFTM_UDP_FIRST_PACKET;
5917 break;
5918 case IPPROTO_ICMP:
5919 case IPPROTO_ICMPV6:
5920 s->timeout = PFTM_ICMP_FIRST_PACKET;
5921 break;
5922 case IPPROTO_GRE:
5923 s->src.state = PFGRE1S_INITIATING;
5924 s->dst.state = PFGRE1S_NO_TRAFFIC;
5925 s->timeout = PFTM_GREv1_INITIATING;
5926 break;
5927 case IPPROTO_ESP:
5928 s->src.state = PFESPS_INITIATING;
5929 s->dst.state = PFESPS_NO_TRAFFIC;
5930 s->timeout = PFTM_ESP_FIRST_PACKET;
5931 break;
5932 default:
5933 s->src.state = PFOTHERS_SINGLE;
5934 s->dst.state = PFOTHERS_NO_TRAFFIC;
5935 s->timeout = PFTM_OTHER_FIRST_PACKET;
5936 }
5937
5938 s->creation = pf_time_second();
5939 s->expire = pf_time_second();
5940
5941 if (sn != NULL) {
5942 s->src_node = sn;
5943 s->src_node->states++;
5944 VERIFY(s->src_node->states != 0);
5945 }
5946 if (nsn != NULL) {
5947 PF_ACPY(&nsn->raddr, &pd->naddr, af);
5948 s->nat_src_node = nsn;
5949 s->nat_src_node->states++;
5950 VERIFY(s->nat_src_node->states != 0);
5951 }
5952 if (pd->proto == IPPROTO_TCP) {
5953 if ((pd->flags & PFDESC_TCP_NORM) &&
5954 pf_normalize_tcp_init(pbuf, off, pd, th, &s->src,
5955 &s->dst)) {
5956 REASON_SET(&reason, PFRES_MEMORY);
5957 pf_src_tree_remove_state(s);
5958 STATE_DEC_COUNTERS(s);
5959 #if SKYWALK
5960 netns_release(&nstoken);
5961 #endif
5962 pool_put(&pf_state_pl, s);
5963 return PF_DROP;
5964 }
5965 if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub &&
5966 pf_normalize_tcp_stateful(pbuf, off, pd, &reason,
5967 th, s, &s->src, &s->dst, &rewrite)) {
5968 /* This really shouldn't happen!!! */
5969 DPFPRINTF(PF_DEBUG_URGENT,
5970 ("pf_normalize_tcp_stateful failed on "
5971 "first pkt"));
5972 #if SKYWALK
5973 netns_release(&nstoken);
5974 #endif
5975 pf_normalize_tcp_cleanup(s);
5976 pf_src_tree_remove_state(s);
5977 STATE_DEC_COUNTERS(s);
5978 pool_put(&pf_state_pl, s);
5979 return PF_DROP;
5980 }
5981 }
5982
5983 /* allocate state key and import values from psk */
5984 if (__improbable((sk = pf_alloc_state_key(s, &psk)) == NULL)) {
5985 REASON_SET(&reason, PFRES_MEMORY);
5986 /*
5987 * XXXSCW: This will leak the freshly-allocated
5988 * state structure 's'. Although it should
5989 * eventually be aged-out and removed.
5990 */
5991 goto cleanup;
5992 }
5993
5994 if (pd->flowhash == 0) {
5995 ASSERT(sk->flowhash != 0);
5996 ASSERT(sk->flowsrc != 0);
5997 pd->flowsrc = sk->flowsrc;
5998 pd->flowhash = sk->flowhash;
5999 pd->pktflags |= PKTF_FLOW_ID;
6000 pd->pktflags &= ~PKTF_FLOW_ADV;
6001 if (__improbable(pf_tag_packet(pbuf, pd->pf_mtag,
6002 tag, rtableid, pd))) {
6003 /*
6004 * this shouldn't fail as the packet tag has
6005 * already been allocated.
6006 */
6007 panic_plain("pf_tag_packet failed");
6008 }
6009 }
6010
6011 pf_set_rt_ifp(s, saddr, af); /* needs s->state_key set */
6012
6013 pbuf = pd->mp; // XXXSCW: Why?
6014
6015 if (sk->app_state == 0) {
6016 switch (pd->proto) {
6017 case IPPROTO_TCP: {
6018 u_int16_t dport = (direction == PF_OUT) ?
6019 sk->ext_gwy.xport.port : sk->gwy.xport.port;
6020
6021 if (nr != NULL &&
6022 ntohs(dport) == PF_PPTP_PORT) {
6023 struct pf_app_state *__single as;
6024
6025 as = pool_get(&pf_app_state_pl,
6026 PR_WAITOK);
6027 if (!as) {
6028 REASON_SET(&reason,
6029 PFRES_MEMORY);
6030 goto cleanup;
6031 }
6032
6033 bzero(as, sizeof(*as));
6034 as->handler = pf_pptp_handler;
6035 as->compare_lan_ext = 0;
6036 as->compare_ext_gwy = 0;
6037 as->u.pptp.grev1_state = 0;
6038 sk->app_state = as;
6039 (void) hook_establish(&s->unlink_hooks,
6040 0, (hook_fn_t) pf_pptp_unlink, s);
6041 }
6042 break;
6043 }
6044
6045 case IPPROTO_UDP: {
6046 if (nr != NULL &&
6047 ntohs(uh->uh_sport) == PF_IKE_PORT &&
6048 ntohs(uh->uh_dport) == PF_IKE_PORT) {
6049 struct pf_app_state *__single as;
6050
6051 as = pool_get(&pf_app_state_pl,
6052 PR_WAITOK);
6053 if (!as) {
6054 REASON_SET(&reason,
6055 PFRES_MEMORY);
6056 goto cleanup;
6057 }
6058
6059 bzero(as, sizeof(*as));
6060 as->compare_lan_ext = pf_ike_compare;
6061 as->compare_ext_gwy = pf_ike_compare;
6062 as->u.ike.cookie = ike.initiator_cookie;
6063 sk->app_state = as;
6064 }
6065 break;
6066 }
6067
6068 default:
6069 break;
6070 }
6071 }
6072
6073 if (__improbable(pf_insert_state(BOUND_IFACE(r, kif), s))) {
6074 if (pd->proto == IPPROTO_TCP) {
6075 pf_normalize_tcp_cleanup(s);
6076 }
6077 REASON_SET(&reason, PFRES_STATEINS);
6078 pf_src_tree_remove_state(s);
6079 STATE_DEC_COUNTERS(s);
6080 #if SKYWALK
6081 netns_release(&nstoken);
6082 #endif
6083 pool_put(&pf_state_pl, s);
6084 return PF_DROP;
6085 } else {
6086 #if SKYWALK
6087 s->nstoken = nstoken;
6088 nstoken = NULL;
6089 #endif
6090 *sm = s;
6091 }
6092 if (tag > 0) {
6093 pf_tag_ref(tag);
6094 s->tag = tag;
6095 }
6096 if (pd->proto == IPPROTO_TCP &&
6097 (th->th_flags & (TH_SYN | TH_ACK)) == TH_SYN &&
6098 r->keep_state == PF_STATE_SYNPROXY) {
6099 int ua = (sk->af_lan == sk->af_gwy) ? 1 : 0;
6100 s->src.state = PF_TCPS_PROXY_SRC;
6101 if (nr != NULL) {
6102 if (direction == PF_OUT) {
6103 pf_change_ap(direction, pd->mp, saddr,
6104 &th->th_sport, pd->ip_sum,
6105 &th->th_sum, &pd->baddr,
6106 bxport.port, 0, af, pd->af, ua);
6107 sxport.port = th->th_sport;
6108 } else {
6109 pf_change_ap(direction, pd->mp, daddr,
6110 &th->th_dport, pd->ip_sum,
6111 &th->th_sum, &pd->baddr,
6112 bxport.port, 0, af, pd->af, ua);
6113 sxport.port = th->th_dport;
6114 }
6115 }
6116 s->src.seqhi = htonl(random());
6117 /* Find mss option */
6118 mss = pf_get_mss(pbuf, off, th->th_off, af);
6119 mss = pf_calc_mss(saddr, af, mss);
6120 mss = pf_calc_mss(daddr, af, mss);
6121 s->src.mss = mss;
6122 pf_send_tcp(r, af, daddr, saddr, th->th_dport,
6123 th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
6124 TH_SYN | TH_ACK, 0, s->src.mss, 0, 1, 0, NULL, NULL);
6125 REASON_SET(&reason, PFRES_SYNPROXY);
6126 return PF_SYNPROXY_DROP;
6127 }
6128
6129 if (sk->app_state && sk->app_state->handler) {
6130 int offx = off;
6131
6132 switch (pd->proto) {
6133 case IPPROTO_TCP:
6134 offx += th->th_off << 2;
6135 break;
6136 case IPPROTO_UDP:
6137 offx += pf_pd_get_hdr_udp(pd)->uh_ulen << 2;
6138 break;
6139 default:
6140 /* ALG handlers only apply to TCP and UDP rules */
6141 break;
6142 }
6143
6144 if (offx > off) {
6145 sk->app_state->handler(s, direction, offx,
6146 pd, kif);
6147 if (pd->lmw < 0) {
6148 REASON_SET(&reason, PFRES_MEMORY);
6149 return PF_DROP;
6150 }
6151 pbuf = pd->mp; // XXXSCW: Why?
6152 }
6153 }
6154 }
6155 #if SKYWALK
6156 else {
6157 netns_release(&nstoken);
6158 }
6159 #endif
6160
6161 /* copy back packet headers if we performed NAT operations */
6162 if (rewrite) {
6163 if (rewrite < off + pd->hdrlen) {
6164 rewrite = off + pd->hdrlen;
6165 }
6166
6167 if (pf_lazy_makewritable(pd, pd->mp, rewrite) == NULL) {
6168 REASON_SET(&reason, PFRES_MEMORY);
6169 return PF_DROP;
6170 }
6171
6172 pbuf_copy_back(pbuf, off, hdrlen, pf_pd_get_hdr_ptr_any(pd), pd->hdrlen);
6173 if (af == AF_INET6 && pd->naf == AF_INET) {
6174 return pf_nat64_ipv6(pbuf, off, pd);
6175 } else if (af == AF_INET && pd->naf == AF_INET6) {
6176 return pf_nat64_ipv4(pbuf, off, pd);
6177 }
6178 }
6179
6180 return PF_PASS;
6181 }
6182
6183 boolean_t is_nlc_enabled_glb = FALSE;
6184
6185 static inline boolean_t
pf_is_dummynet_enabled(void)6186 pf_is_dummynet_enabled(void)
6187 {
6188 #if DUMMYNET
6189 if (__probable(!PF_IS_ENABLED)) {
6190 return FALSE;
6191 }
6192
6193 if (__probable(!DUMMYNET_LOADED)) {
6194 return FALSE;
6195 }
6196
6197 if (__probable(TAILQ_EMPTY(pf_main_ruleset.
6198 rules[PF_RULESET_DUMMYNET].active.ptr))) {
6199 return FALSE;
6200 }
6201
6202 return TRUE;
6203 #else
6204 return FALSE;
6205 #endif /* DUMMYNET */
6206 }
6207
6208 #if DUMMYNET
6209 /*
6210 * When pf_test_dummynet() returns PF_PASS, the rule matching parameter "rm"
6211 * remains unchanged, meaning the packet did not match a dummynet rule.
6212 * when the packet does match a dummynet rule, pf_test_dummynet() returns
6213 * PF_PASS and zero out the mbuf rule as the packet is effectively siphoned
6214 * out by dummynet.
6215 */
6216 static __attribute__((noinline)) int
pf_test_dummynet(struct pf_rule ** rm,int direction,struct pfi_kif * kif,pbuf_t ** pbuf0,struct pf_pdesc * pd,struct ip_fw_args * fwa)6217 pf_test_dummynet(struct pf_rule **rm, int direction, struct pfi_kif *kif,
6218 pbuf_t **pbuf0, struct pf_pdesc *pd, struct ip_fw_args *fwa)
6219 {
6220 pbuf_t *__single pbuf = *pbuf0;
6221 struct pf_rule *__single am = NULL;
6222 struct pf_ruleset *__single rsm = NULL;
6223 struct pf_addr *__single saddr = pd->src, *__single daddr = pd->dst;
6224 sa_family_t af = pd->af;
6225 struct pf_rule *__single r, *__single a = NULL;
6226 struct pf_ruleset *__single ruleset = NULL;
6227 struct tcphdr *__single th = pf_pd_get_hdr_tcp(pd);
6228 u_short reason;
6229 int hdrlen = 0;
6230 int tag = -1;
6231 unsigned int rtableid = IFSCOPE_NONE;
6232 int asd = 0;
6233 int match = 0;
6234 u_int8_t icmptype = 0, icmpcode = 0;
6235 struct ip_fw_args dnflow;
6236 struct pf_rule *__single prev_matching_rule = fwa ? fwa->fwa_pf_rule : NULL;
6237 int found_prev_rule = (prev_matching_rule) ? 0 : 1;
6238
6239 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
6240
6241 if (!pf_is_dummynet_enabled()) {
6242 return PF_PASS;
6243 }
6244
6245 if (kif->pfik_ifp->if_xflags & IFXF_NO_TRAFFIC_SHAPING) {
6246 return PF_PASS;
6247 }
6248
6249 bzero(&dnflow, sizeof(dnflow));
6250
6251 hdrlen = 0;
6252
6253 /* Fragments don't gave protocol headers */
6254 if (!(pd->flags & PFDESC_IP_FRAG)) {
6255 switch (pd->proto) {
6256 case IPPROTO_TCP:
6257 dnflow.fwa_id.flags = pf_pd_get_hdr_tcp(pd)->th_flags;
6258 dnflow.fwa_id.dst_port = ntohs(pf_pd_get_hdr_tcp(pd)->th_dport);
6259 dnflow.fwa_id.src_port = ntohs(pf_pd_get_hdr_tcp(pd)->th_sport);
6260 hdrlen = sizeof(*th);
6261 break;
6262 case IPPROTO_UDP:
6263 dnflow.fwa_id.dst_port = ntohs(pf_pd_get_hdr_udp(pd)->uh_dport);
6264 dnflow.fwa_id.src_port = ntohs(pf_pd_get_hdr_udp(pd)->uh_sport);
6265 hdrlen = sizeof(*pf_pd_get_hdr_udp(pd));
6266 break;
6267 #if INET
6268 case IPPROTO_ICMP:
6269 if (af != AF_INET) {
6270 break;
6271 }
6272 hdrlen = ICMP_MINLEN;
6273 icmptype = pf_pd_get_hdr_icmp(pd)->icmp_type;
6274 icmpcode = pf_pd_get_hdr_icmp(pd)->icmp_code;
6275 break;
6276 #endif /* INET */
6277 case IPPROTO_ICMPV6:
6278 if (af != AF_INET6) {
6279 break;
6280 }
6281 hdrlen = sizeof(*pf_pd_get_hdr_icmp6(pd));
6282 icmptype = pf_pd_get_hdr_icmp6(pd)->icmp6_type;
6283 icmpcode = pf_pd_get_hdr_icmp6(pd)->icmp6_code;
6284 break;
6285 case IPPROTO_GRE:
6286 if (pd->proto_variant == PF_GRE_PPTP_VARIANT) {
6287 hdrlen = sizeof(*pf_pd_get_hdr_grev1(pd));
6288 }
6289 break;
6290 case IPPROTO_ESP:
6291 hdrlen = sizeof(*pf_pd_get_hdr_esp(pd));
6292 break;
6293 }
6294 }
6295
6296 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_DUMMYNET].active.ptr);
6297
6298 while (r != NULL) {
6299 r->evaluations++;
6300 if (pfi_kif_match(r->kif, kif) == r->ifnot) {
6301 r = r->skip[PF_SKIP_IFP].ptr;
6302 } else if (r->direction && r->direction != direction) {
6303 r = r->skip[PF_SKIP_DIR].ptr;
6304 } else if (r->af && r->af != af) {
6305 r = r->skip[PF_SKIP_AF].ptr;
6306 } else if (r->proto && r->proto != pd->proto) {
6307 r = r->skip[PF_SKIP_PROTO].ptr;
6308 } else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
6309 r->src.neg, kif)) {
6310 r = r->skip[PF_SKIP_SRC_ADDR].ptr;
6311 }
6312 /* tcp/udp only. port_op always 0 in other cases */
6313 else if (r->proto == pd->proto &&
6314 (r->proto == IPPROTO_TCP || r->proto == IPPROTO_UDP) &&
6315 ((pd->flags & PFDESC_IP_FRAG) ||
6316 ((r->src.xport.range.op &&
6317 !pf_match_port(r->src.xport.range.op,
6318 r->src.xport.range.port[0], r->src.xport.range.port[1],
6319 th->th_sport))))) {
6320 r = r->skip[PF_SKIP_SRC_PORT].ptr;
6321 } else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
6322 r->dst.neg, NULL)) {
6323 r = r->skip[PF_SKIP_DST_ADDR].ptr;
6324 }
6325 /* tcp/udp only. port_op always 0 in other cases */
6326 else if (r->proto == pd->proto &&
6327 (r->proto == IPPROTO_TCP || r->proto == IPPROTO_UDP) &&
6328 r->dst.xport.range.op &&
6329 ((pd->flags & PFDESC_IP_FRAG) ||
6330 !pf_match_port(r->dst.xport.range.op,
6331 r->dst.xport.range.port[0], r->dst.xport.range.port[1],
6332 th->th_dport))) {
6333 r = r->skip[PF_SKIP_DST_PORT].ptr;
6334 }
6335 /* icmp only. type always 0 in other cases */
6336 else if (r->type &&
6337 ((pd->flags & PFDESC_IP_FRAG) ||
6338 r->type != icmptype + 1)) {
6339 r = TAILQ_NEXT(r, entries);
6340 }
6341 /* icmp only. type always 0 in other cases */
6342 else if (r->code &&
6343 ((pd->flags & PFDESC_IP_FRAG) ||
6344 r->code != icmpcode + 1)) {
6345 r = TAILQ_NEXT(r, entries);
6346 } else if (r->tos && !(r->tos == pd->tos)) {
6347 r = TAILQ_NEXT(r, entries);
6348 } else if (r->rule_flag & PFRULE_FRAGMENT) {
6349 r = TAILQ_NEXT(r, entries);
6350 } else if (pd->proto == IPPROTO_TCP &&
6351 ((pd->flags & PFDESC_IP_FRAG) ||
6352 (r->flagset & th->th_flags) != r->flags)) {
6353 r = TAILQ_NEXT(r, entries);
6354 } else if (r->prob && r->prob <= (RandomULong() % (UINT_MAX - 1) + 1)) {
6355 r = TAILQ_NEXT(r, entries);
6356 } else if (r->match_tag && !pf_match_tag(r, pd->pf_mtag, &tag)) {
6357 r = TAILQ_NEXT(r, entries);
6358 } else {
6359 /*
6360 * Need to go past the previous dummynet matching rule
6361 */
6362 if (r->anchor == NULL) {
6363 if (found_prev_rule) {
6364 if (r->tag) {
6365 tag = r->tag;
6366 }
6367 if (PF_RTABLEID_IS_VALID(r->rtableid)) {
6368 rtableid = r->rtableid;
6369 }
6370 match = 1;
6371 *rm = r;
6372 am = a;
6373 rsm = ruleset;
6374 if ((*rm)->quick) {
6375 break;
6376 }
6377 } else if (r == prev_matching_rule) {
6378 found_prev_rule = 1;
6379 }
6380 r = TAILQ_NEXT(r, entries);
6381 } else {
6382 pf_step_into_anchor(&asd, &ruleset,
6383 PF_RULESET_DUMMYNET, &r, &a, &match);
6384 }
6385 }
6386 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
6387 PF_RULESET_DUMMYNET, &r, &a, &match)) {
6388 break;
6389 }
6390 }
6391 r = *rm;
6392 a = am;
6393 ruleset = rsm;
6394
6395 if (!match) {
6396 return PF_PASS;
6397 }
6398
6399 REASON_SET(&reason, PFRES_DUMMYNET);
6400
6401 if (r->log) {
6402 PFLOG_PACKET(kif, h, pbuf, af, direction, reason, r,
6403 a, ruleset, pd);
6404 }
6405
6406 if (r->action == PF_NODUMMYNET) {
6407 int dirndx = (direction == PF_OUT);
6408
6409 r->packets[dirndx]++;
6410 r->bytes[dirndx] += pd->tot_len;
6411
6412 return PF_PASS;
6413 }
6414 if (pf_tag_packet(pbuf, pd->pf_mtag, tag, rtableid, pd)) {
6415 REASON_SET(&reason, PFRES_MEMORY);
6416
6417 return PF_DROP;
6418 }
6419
6420 if (r->dnpipe && ip_dn_io_ptr != NULL) {
6421 struct mbuf *m;
6422 int dirndx = (direction == PF_OUT);
6423
6424 r->packets[dirndx]++;
6425 r->bytes[dirndx] += pd->tot_len;
6426
6427 dnflow.fwa_cookie = r->dnpipe;
6428 dnflow.fwa_pf_rule = r;
6429 dnflow.fwa_id.proto = pd->proto;
6430 dnflow.fwa_flags = r->dntype;
6431 switch (af) {
6432 case AF_INET:
6433 dnflow.fwa_id.addr_type = 4;
6434 dnflow.fwa_id.src_ip = ntohl(saddr->v4addr.s_addr);
6435 dnflow.fwa_id.dst_ip = ntohl(daddr->v4addr.s_addr);
6436 break;
6437 case AF_INET6:
6438 dnflow.fwa_id.addr_type = 6;
6439 dnflow.fwa_id.src_ip6 = saddr->v6addr;
6440 dnflow.fwa_id.dst_ip6 = saddr->v6addr;
6441 break;
6442 }
6443
6444 if (fwa != NULL) {
6445 dnflow.fwa_oif = fwa->fwa_oif;
6446 dnflow.fwa_oflags = fwa->fwa_oflags;
6447 /*
6448 * Note that fwa_ro, fwa_dst and fwa_ipoa are
6449 * actually in a union so the following does work
6450 * for both IPv4 and IPv6
6451 */
6452 dnflow.fwa_ro = fwa->fwa_ro;
6453 dnflow.fwa_dst = fwa->fwa_dst;
6454 dnflow.fwa_ipoa = fwa->fwa_ipoa;
6455 dnflow.fwa_ro6_pmtu = fwa->fwa_ro6_pmtu;
6456 dnflow.fwa_origifp = fwa->fwa_origifp;
6457 dnflow.fwa_mtu = fwa->fwa_mtu;
6458 dnflow.fwa_unfragpartlen = fwa->fwa_unfragpartlen;
6459 dnflow.fwa_exthdrs = fwa->fwa_exthdrs;
6460 }
6461
6462 if (af == AF_INET) {
6463 struct ip *__single iphdr = pbuf->pb_data;
6464 NTOHS(iphdr->ip_len);
6465 NTOHS(iphdr->ip_off);
6466 }
6467 /*
6468 * Don't need to unlock pf_lock as NET_THREAD_HELD_PF
6469 * allows for recursive behavior
6470 */
6471 m = pbuf_to_mbuf(pbuf, TRUE);
6472 if (m != NULL) {
6473 ip_dn_io_ptr(m,
6474 dnflow.fwa_cookie, (af == AF_INET) ?
6475 ((direction == PF_IN) ? DN_TO_IP_IN : DN_TO_IP_OUT) :
6476 ((direction == PF_IN) ? DN_TO_IP6_IN : DN_TO_IP6_OUT),
6477 &dnflow);
6478 }
6479
6480 /*
6481 * The packet is siphoned out by dummynet so return a NULL
6482 * pbuf so the caller can still return success.
6483 */
6484 *pbuf0 = NULL;
6485
6486 return PF_PASS;
6487 }
6488
6489 return PF_PASS;
6490 }
6491 #endif /* DUMMYNET */
6492
6493 static __attribute__((noinline)) int
pf_test_fragment(struct pf_rule ** rm,int direction,struct pfi_kif * kif,pbuf_t * pbuf,void * h,struct pf_pdesc * pd,struct pf_rule ** am,struct pf_ruleset ** rsm)6494 pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif,
6495 pbuf_t *pbuf, void *h, struct pf_pdesc *pd, struct pf_rule **am,
6496 struct pf_ruleset **rsm)
6497 {
6498 #pragma unused(h)
6499 struct pf_rule *__single r, *__single a = NULL;
6500 struct pf_ruleset *__single ruleset = NULL;
6501 sa_family_t af = pd->af;
6502 u_short reason;
6503 int tag = -1;
6504 int asd = 0;
6505 int match = 0;
6506
6507 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
6508 while (r != NULL) {
6509 r->evaluations++;
6510 if (pfi_kif_match(r->kif, kif) == r->ifnot) {
6511 r = r->skip[PF_SKIP_IFP].ptr;
6512 } else if (r->direction && r->direction != direction) {
6513 r = r->skip[PF_SKIP_DIR].ptr;
6514 } else if (r->af && r->af != af) {
6515 r = r->skip[PF_SKIP_AF].ptr;
6516 } else if (r->proto && r->proto != pd->proto) {
6517 r = r->skip[PF_SKIP_PROTO].ptr;
6518 } else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
6519 r->src.neg, kif)) {
6520 r = r->skip[PF_SKIP_SRC_ADDR].ptr;
6521 } else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
6522 r->dst.neg, NULL)) {
6523 r = r->skip[PF_SKIP_DST_ADDR].ptr;
6524 } else if ((r->rule_flag & PFRULE_TOS) && r->tos &&
6525 !(r->tos & pd->tos)) {
6526 r = TAILQ_NEXT(r, entries);
6527 } else if ((r->rule_flag & PFRULE_DSCP) && r->tos &&
6528 !(r->tos & (pd->tos & DSCP_MASK))) {
6529 r = TAILQ_NEXT(r, entries);
6530 } else if ((r->rule_flag & PFRULE_SC) && r->tos &&
6531 ((r->tos & SCIDX_MASK) != pd->sc)) {
6532 r = TAILQ_NEXT(r, entries);
6533 } else if (r->os_fingerprint != PF_OSFP_ANY) {
6534 r = TAILQ_NEXT(r, entries);
6535 } else if (pd->proto == IPPROTO_UDP &&
6536 (r->src.xport.range.op || r->dst.xport.range.op)) {
6537 r = TAILQ_NEXT(r, entries);
6538 } else if (pd->proto == IPPROTO_TCP &&
6539 (r->src.xport.range.op || r->dst.xport.range.op ||
6540 r->flagset)) {
6541 r = TAILQ_NEXT(r, entries);
6542 } else if ((pd->proto == IPPROTO_ICMP ||
6543 pd->proto == IPPROTO_ICMPV6) &&
6544 (r->type || r->code)) {
6545 r = TAILQ_NEXT(r, entries);
6546 } else if (r->prob && r->prob <= (RandomULong() % (UINT_MAX - 1) + 1)) {
6547 r = TAILQ_NEXT(r, entries);
6548 } else if (r->match_tag && !pf_match_tag(r, pd->pf_mtag, &tag)) {
6549 r = TAILQ_NEXT(r, entries);
6550 } else {
6551 if (r->anchor == NULL) {
6552 match = 1;
6553 *rm = r;
6554 *am = a;
6555 *rsm = ruleset;
6556 if ((*rm)->quick) {
6557 break;
6558 }
6559 r = TAILQ_NEXT(r, entries);
6560 } else {
6561 pf_step_into_anchor(&asd, &ruleset,
6562 PF_RULESET_FILTER, &r, &a, &match);
6563 }
6564 }
6565 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
6566 PF_RULESET_FILTER, &r, &a, &match)) {
6567 break;
6568 }
6569 }
6570 r = *rm;
6571 a = *am;
6572 ruleset = *rsm;
6573
6574 REASON_SET(&reason, PFRES_MATCH);
6575
6576 if (r->log) {
6577 PFLOG_PACKET(kif, h, pbuf, af, direction, reason, r, a, ruleset,
6578 pd);
6579 }
6580
6581 if (r->action != PF_PASS) {
6582 return PF_DROP;
6583 }
6584
6585 if (pf_tag_packet(pbuf, pd->pf_mtag, tag, -1, NULL)) {
6586 REASON_SET(&reason, PFRES_MEMORY);
6587 return PF_DROP;
6588 }
6589
6590 return PF_PASS;
6591 }
6592
6593 static __attribute__((noinline)) void
pf_pptp_handler(struct pf_state * s,int direction,int off,struct pf_pdesc * pd,struct pfi_kif * kif)6594 pf_pptp_handler(struct pf_state *s, int direction, int off,
6595 struct pf_pdesc *pd, struct pfi_kif *kif)
6596 {
6597 #pragma unused(direction)
6598 struct tcphdr *__single th;
6599 struct pf_pptp_state *__single pptps;
6600 struct pf_pptp_ctrl_msg cm;
6601 size_t plen, tlen;
6602 struct pf_state *__single gs;
6603 u_int16_t ct;
6604 u_int16_t *__single pac_call_id;
6605 u_int16_t *__single pns_call_id;
6606 u_int16_t *__single spoof_call_id;
6607 u_int8_t *__single pac_state;
6608 u_int8_t *__single pns_state;
6609 enum { PF_PPTP_PASS, PF_PPTP_INSERT_GRE, PF_PPTP_REMOVE_GRE } op;
6610 pbuf_t *__single pbuf;
6611 struct pf_state_key *__single sk;
6612 struct pf_state_key *__single gsk;
6613 struct pf_app_state *__single gas;
6614
6615 sk = s->state_key;
6616 pptps = &sk->app_state->u.pptp;
6617 gs = pptps->grev1_state;
6618
6619 if (gs) {
6620 gs->expire = pf_time_second();
6621 }
6622
6623 pbuf = pd->mp;
6624 plen = min(sizeof(cm), pbuf->pb_packet_len - off);
6625 if (plen < PF_PPTP_CTRL_MSG_MINSIZE) {
6626 return;
6627 }
6628 tlen = plen - PF_PPTP_CTRL_MSG_MINSIZE;
6629 pbuf_copy_data(pbuf, off, plen, &cm, sizeof(cm));
6630
6631 if (ntohl(cm.hdr.magic) != PF_PPTP_MAGIC_NUMBER) {
6632 return;
6633 }
6634 if (ntohs(cm.hdr.type) != 1) {
6635 return;
6636 }
6637
6638 #define TYPE_LEN_CHECK(_type, _name) \
6639 case PF_PPTP_CTRL_TYPE_##_type: \
6640 if (tlen < sizeof(struct pf_pptp_ctrl_##_name)) \
6641 return; \
6642 break;
6643
6644 switch (cm.ctrl.type) {
6645 TYPE_LEN_CHECK(START_REQ, start_req);
6646 TYPE_LEN_CHECK(START_RPY, start_rpy);
6647 TYPE_LEN_CHECK(STOP_REQ, stop_req);
6648 TYPE_LEN_CHECK(STOP_RPY, stop_rpy);
6649 TYPE_LEN_CHECK(ECHO_REQ, echo_req);
6650 TYPE_LEN_CHECK(ECHO_RPY, echo_rpy);
6651 TYPE_LEN_CHECK(CALL_OUT_REQ, call_out_req);
6652 TYPE_LEN_CHECK(CALL_OUT_RPY, call_out_rpy);
6653 TYPE_LEN_CHECK(CALL_IN_1ST, call_in_1st);
6654 TYPE_LEN_CHECK(CALL_IN_2ND, call_in_2nd);
6655 TYPE_LEN_CHECK(CALL_IN_3RD, call_in_3rd);
6656 TYPE_LEN_CHECK(CALL_CLR, call_clr);
6657 TYPE_LEN_CHECK(CALL_DISC, call_disc);
6658 TYPE_LEN_CHECK(ERROR, error);
6659 TYPE_LEN_CHECK(SET_LINKINFO, set_linkinfo);
6660 default:
6661 return;
6662 }
6663 #undef TYPE_LEN_CHECK
6664
6665 if (!gs) {
6666 gs = pool_get(&pf_state_pl, PR_WAITOK);
6667 if (!gs) {
6668 return;
6669 }
6670
6671 memcpy(gs, s, sizeof(*gs));
6672
6673 memset(&gs->entry_id, 0, sizeof(gs->entry_id));
6674 memset(&gs->entry_list, 0, sizeof(gs->entry_list));
6675
6676 TAILQ_INIT(&gs->unlink_hooks);
6677 gs->rt_kif = NULL;
6678 gs->creation = 0;
6679 gs->pfsync_time = 0;
6680 gs->packets[0] = gs->packets[1] = 0;
6681 gs->bytes[0] = gs->bytes[1] = 0;
6682 gs->timeout = PFTM_UNLINKED;
6683 gs->id = gs->creatorid = 0;
6684 gs->src.state = gs->dst.state = PFGRE1S_NO_TRAFFIC;
6685 gs->src.scrub = gs->dst.scrub = 0;
6686
6687 gas = pool_get(&pf_app_state_pl, PR_NOWAIT);
6688 if (!gas) {
6689 pool_put(&pf_state_pl, gs);
6690 return;
6691 }
6692
6693 gsk = pf_alloc_state_key(gs, NULL);
6694 if (!gsk) {
6695 pool_put(&pf_app_state_pl, gas);
6696 pool_put(&pf_state_pl, gs);
6697 return;
6698 }
6699
6700 memcpy(&gsk->lan, &sk->lan, sizeof(gsk->lan));
6701 memcpy(&gsk->gwy, &sk->gwy, sizeof(gsk->gwy));
6702 memcpy(&gsk->ext_lan, &sk->ext_lan, sizeof(gsk->ext_lan));
6703 memcpy(&gsk->ext_gwy, &sk->ext_gwy, sizeof(gsk->ext_gwy));
6704 gsk->af_lan = sk->af_lan;
6705 gsk->af_gwy = sk->af_gwy;
6706 gsk->proto = IPPROTO_GRE;
6707 gsk->proto_variant = PF_GRE_PPTP_VARIANT;
6708 gsk->app_state = gas;
6709 gsk->lan.xport.call_id = 0;
6710 gsk->gwy.xport.call_id = 0;
6711 gsk->ext_lan.xport.call_id = 0;
6712 gsk->ext_gwy.xport.call_id = 0;
6713 ASSERT(gsk->flowsrc == FLOWSRC_PF);
6714 ASSERT(gsk->flowhash != 0);
6715 memset(gas, 0, sizeof(*gas));
6716 gas->u.grev1.pptp_state = s;
6717 STATE_INC_COUNTERS(gs);
6718 pptps->grev1_state = gs;
6719 (void) hook_establish(&gs->unlink_hooks, 0,
6720 (hook_fn_t) pf_grev1_unlink, gs);
6721 } else {
6722 gsk = gs->state_key;
6723 }
6724
6725 switch (sk->direction) {
6726 case PF_IN:
6727 pns_call_id = &gsk->ext_lan.xport.call_id;
6728 pns_state = &gs->dst.state;
6729 pac_call_id = &gsk->lan.xport.call_id;
6730 pac_state = &gs->src.state;
6731 break;
6732
6733 case PF_OUT:
6734 pns_call_id = &gsk->lan.xport.call_id;
6735 pns_state = &gs->src.state;
6736 pac_call_id = &gsk->ext_lan.xport.call_id;
6737 pac_state = &gs->dst.state;
6738 break;
6739
6740 default:
6741 DPFPRINTF(PF_DEBUG_URGENT,
6742 ("pf_pptp_handler: bad directional!\n"));
6743 return;
6744 }
6745
6746 spoof_call_id = 0;
6747 op = PF_PPTP_PASS;
6748
6749 ct = ntohs(cm.ctrl.type);
6750
6751 switch (ct) {
6752 case PF_PPTP_CTRL_TYPE_CALL_OUT_REQ:
6753 *pns_call_id = cm.msg.call_out_req.call_id;
6754 *pns_state = PFGRE1S_INITIATING;
6755 if (s->nat_rule.ptr && pns_call_id == &gsk->lan.xport.call_id) {
6756 spoof_call_id = &cm.msg.call_out_req.call_id;
6757 }
6758 break;
6759
6760 case PF_PPTP_CTRL_TYPE_CALL_OUT_RPY:
6761 *pac_call_id = cm.msg.call_out_rpy.call_id;
6762 if (s->nat_rule.ptr) {
6763 spoof_call_id =
6764 (pac_call_id == &gsk->lan.xport.call_id) ?
6765 &cm.msg.call_out_rpy.call_id :
6766 &cm.msg.call_out_rpy.peer_call_id;
6767 }
6768 if (gs->timeout == PFTM_UNLINKED) {
6769 *pac_state = PFGRE1S_INITIATING;
6770 op = PF_PPTP_INSERT_GRE;
6771 }
6772 break;
6773
6774 case PF_PPTP_CTRL_TYPE_CALL_IN_1ST:
6775 *pns_call_id = cm.msg.call_in_1st.call_id;
6776 *pns_state = PFGRE1S_INITIATING;
6777 if (s->nat_rule.ptr && pns_call_id == &gsk->lan.xport.call_id) {
6778 spoof_call_id = &cm.msg.call_in_1st.call_id;
6779 }
6780 break;
6781
6782 case PF_PPTP_CTRL_TYPE_CALL_IN_2ND:
6783 *pac_call_id = cm.msg.call_in_2nd.call_id;
6784 *pac_state = PFGRE1S_INITIATING;
6785 if (s->nat_rule.ptr) {
6786 spoof_call_id =
6787 (pac_call_id == &gsk->lan.xport.call_id) ?
6788 &cm.msg.call_in_2nd.call_id :
6789 &cm.msg.call_in_2nd.peer_call_id;
6790 }
6791 break;
6792
6793 case PF_PPTP_CTRL_TYPE_CALL_IN_3RD:
6794 if (s->nat_rule.ptr && pns_call_id == &gsk->lan.xport.call_id) {
6795 spoof_call_id = &cm.msg.call_in_3rd.call_id;
6796 }
6797 if (cm.msg.call_in_3rd.call_id != *pns_call_id) {
6798 break;
6799 }
6800 if (gs->timeout == PFTM_UNLINKED) {
6801 op = PF_PPTP_INSERT_GRE;
6802 }
6803 break;
6804
6805 case PF_PPTP_CTRL_TYPE_CALL_CLR:
6806 if (cm.msg.call_clr.call_id != *pns_call_id) {
6807 op = PF_PPTP_REMOVE_GRE;
6808 }
6809 break;
6810
6811 case PF_PPTP_CTRL_TYPE_CALL_DISC:
6812 if (cm.msg.call_clr.call_id != *pac_call_id) {
6813 op = PF_PPTP_REMOVE_GRE;
6814 }
6815 break;
6816
6817 case PF_PPTP_CTRL_TYPE_ERROR:
6818 if (s->nat_rule.ptr && pns_call_id == &gsk->lan.xport.call_id) {
6819 spoof_call_id = &cm.msg.error.peer_call_id;
6820 }
6821 break;
6822
6823 case PF_PPTP_CTRL_TYPE_SET_LINKINFO:
6824 if (s->nat_rule.ptr && pac_call_id == &gsk->lan.xport.call_id) {
6825 spoof_call_id = &cm.msg.set_linkinfo.peer_call_id;
6826 }
6827 break;
6828
6829 default:
6830 op = PF_PPTP_PASS;
6831 break;
6832 }
6833
6834 if (!gsk->gwy.xport.call_id && gsk->lan.xport.call_id) {
6835 gsk->gwy.xport.call_id = gsk->lan.xport.call_id;
6836 if (spoof_call_id) {
6837 u_int16_t call_id = 0;
6838 int n = 0;
6839 struct pf_state_key_cmp key;
6840
6841 key.af_gwy = gsk->af_gwy;
6842 key.proto = IPPROTO_GRE;
6843 key.proto_variant = PF_GRE_PPTP_VARIANT;
6844 PF_ACPY(&key.gwy.addr, &gsk->gwy.addr, key.af_gwy);
6845 PF_ACPY(&key.ext_gwy.addr, &gsk->ext_gwy.addr, key.af_gwy);
6846 key.gwy.xport.call_id = gsk->gwy.xport.call_id;
6847 key.ext_gwy.xport.call_id = gsk->ext_gwy.xport.call_id;
6848 do {
6849 call_id = htonl(random());
6850 } while (!call_id);
6851
6852 while (pf_find_state_all(&key, PF_IN, 0)) {
6853 call_id = ntohs(call_id);
6854 --call_id;
6855 if (--call_id == 0) {
6856 call_id = 0xffff;
6857 }
6858 call_id = htons(call_id);
6859
6860 key.gwy.xport.call_id = call_id;
6861
6862 if (++n > 65535) {
6863 DPFPRINTF(PF_DEBUG_URGENT,
6864 ("pf_pptp_handler: failed to spoof "
6865 "call id\n"));
6866 key.gwy.xport.call_id = 0;
6867 break;
6868 }
6869 }
6870
6871 gsk->gwy.xport.call_id = call_id;
6872 }
6873 }
6874
6875 th = pf_pd_get_hdr_tcp(pd);
6876
6877 if (spoof_call_id && gsk->lan.xport.call_id != gsk->gwy.xport.call_id) {
6878 if (*spoof_call_id == gsk->gwy.xport.call_id) {
6879 *spoof_call_id = gsk->lan.xport.call_id;
6880 th->th_sum = pf_cksum_fixup(th->th_sum,
6881 gsk->gwy.xport.call_id, gsk->lan.xport.call_id, 0);
6882 } else {
6883 *spoof_call_id = gsk->gwy.xport.call_id;
6884 th->th_sum = pf_cksum_fixup(th->th_sum,
6885 gsk->lan.xport.call_id, gsk->gwy.xport.call_id, 0);
6886 }
6887
6888 if (pf_lazy_makewritable(pd, pbuf, off + plen) == NULL) {
6889 pptps->grev1_state = NULL;
6890 STATE_DEC_COUNTERS(gs);
6891 pool_put(&pf_state_pl, gs);
6892 return;
6893 }
6894 pbuf_copy_back(pbuf, off, plen, &cm, sizeof(cm));
6895 }
6896
6897 switch (op) {
6898 case PF_PPTP_REMOVE_GRE:
6899 gs->timeout = PFTM_PURGE;
6900 gs->src.state = gs->dst.state = PFGRE1S_NO_TRAFFIC;
6901 gsk->lan.xport.call_id = 0;
6902 gsk->gwy.xport.call_id = 0;
6903 gsk->ext_lan.xport.call_id = 0;
6904 gsk->ext_gwy.xport.call_id = 0;
6905 gs->id = gs->creatorid = 0;
6906 break;
6907
6908 case PF_PPTP_INSERT_GRE:
6909 gs->creation = pf_time_second();
6910 gs->expire = pf_time_second();
6911 gs->timeout = PFTM_TCP_ESTABLISHED;
6912 if (gs->src_node != NULL) {
6913 ++gs->src_node->states;
6914 VERIFY(gs->src_node->states != 0);
6915 }
6916 if (gs->nat_src_node != NULL) {
6917 ++gs->nat_src_node->states;
6918 VERIFY(gs->nat_src_node->states != 0);
6919 }
6920 pf_set_rt_ifp(gs, &sk->lan.addr, sk->af_lan);
6921 if (pf_insert_state(BOUND_IFACE(s->rule.ptr, kif), gs)) {
6922 /*
6923 * <[email protected]>
6924 * FIX ME: insertion can fail when multiple PNS
6925 * behind the same NAT open calls to the same PAC
6926 * simultaneously because spoofed call ID numbers
6927 * are chosen before states are inserted. This is
6928 * hard to fix and happens infrequently enough that
6929 * users will normally try again and this ALG will
6930 * succeed. Failures are expected to be rare enough
6931 * that fixing this is a low priority.
6932 */
6933 pptps->grev1_state = NULL;
6934 pd->lmw = -1; /* Force PF_DROP on PFRES_MEMORY */
6935 pf_src_tree_remove_state(gs);
6936 STATE_DEC_COUNTERS(gs);
6937 pool_put(&pf_state_pl, gs);
6938 DPFPRINTF(PF_DEBUG_URGENT, ("pf_pptp_handler: error "
6939 "inserting GREv1 state.\n"));
6940 }
6941 break;
6942
6943 default:
6944 break;
6945 }
6946 }
6947
6948 static __attribute__((noinline)) void
pf_pptp_unlink(struct pf_state * s)6949 pf_pptp_unlink(struct pf_state *s)
6950 {
6951 struct pf_app_state *as = s->state_key->app_state;
6952 struct pf_state *grev1s = as->u.pptp.grev1_state;
6953
6954 if (grev1s) {
6955 struct pf_app_state *gas = grev1s->state_key->app_state;
6956
6957 if (grev1s->timeout < PFTM_MAX) {
6958 grev1s->timeout = PFTM_PURGE;
6959 }
6960 gas->u.grev1.pptp_state = NULL;
6961 as->u.pptp.grev1_state = NULL;
6962 }
6963 }
6964
6965 static __attribute__((noinline)) void
pf_grev1_unlink(struct pf_state * s)6966 pf_grev1_unlink(struct pf_state *s)
6967 {
6968 struct pf_app_state *as = s->state_key->app_state;
6969 struct pf_state *pptps = as->u.grev1.pptp_state;
6970
6971 if (pptps) {
6972 struct pf_app_state *pas = pptps->state_key->app_state;
6973
6974 pas->u.pptp.grev1_state = NULL;
6975 as->u.grev1.pptp_state = NULL;
6976 }
6977 }
6978
6979 static int
pf_ike_compare(struct pf_app_state * a,struct pf_app_state * b)6980 pf_ike_compare(struct pf_app_state *a, struct pf_app_state *b)
6981 {
6982 int64_t d = a->u.ike.cookie - b->u.ike.cookie;
6983 return (d > 0) ? 1 : ((d < 0) ? -1 : 0);
6984 }
6985
6986 static int
pf_do_nat64(struct pf_state_key * sk,struct pf_pdesc * pd,pbuf_t * pbuf,int off)6987 pf_do_nat64(struct pf_state_key *sk, struct pf_pdesc *pd, pbuf_t *pbuf,
6988 int off)
6989 {
6990 if (pd->af == AF_INET) {
6991 if (pd->af != sk->af_lan) {
6992 pd->ndaddr = sk->lan.addr;
6993 pd->naddr = sk->ext_lan.addr;
6994 } else {
6995 pd->naddr = sk->gwy.addr;
6996 pd->ndaddr = sk->ext_gwy.addr;
6997 }
6998 return pf_nat64_ipv4(pbuf, off, pd);
6999 } else if (pd->af == AF_INET6) {
7000 if (pd->af != sk->af_lan) {
7001 pd->ndaddr = sk->lan.addr;
7002 pd->naddr = sk->ext_lan.addr;
7003 } else {
7004 pd->naddr = sk->gwy.addr;
7005 pd->ndaddr = sk->ext_gwy.addr;
7006 }
7007 return pf_nat64_ipv6(pbuf, off, pd);
7008 }
7009 return PF_DROP;
7010 }
7011
7012 static __attribute__((noinline)) int
pf_test_state_tcp(struct pf_state ** state,int direction,struct pfi_kif * kif,pbuf_t * pbuf,int off,void * h,struct pf_pdesc * pd,u_short * reason)7013 pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
7014 pbuf_t *pbuf, int off, void *h, struct pf_pdesc *pd,
7015 u_short *reason)
7016 {
7017 #pragma unused(h)
7018 struct pf_state_key_cmp key;
7019 struct tcphdr *__single th = pf_pd_get_hdr_tcp(pd);
7020 u_int16_t win = ntohs(th->th_win);
7021 u_int32_t ack, end, seq, orig_seq;
7022 u_int8_t sws, dws;
7023 int ackskew;
7024 int copyback = 0;
7025 struct pf_state_peer *src, *dst;
7026 struct pf_state_key *sk;
7027
7028 key.app_state = 0;
7029 key.proto = IPPROTO_TCP;
7030 key.af_lan = key.af_gwy = pd->af;
7031
7032 /*
7033 * For NAT64 the first time rule search and state creation
7034 * is done on the incoming side only.
7035 * Once the state gets created, NAT64's LAN side (ipv6) will
7036 * not be able to find the state in ext-gwy tree as that normally
7037 * is intended to be looked up for incoming traffic from the
7038 * WAN side.
7039 * Therefore to handle NAT64 case we init keys here for both
7040 * lan-ext as well as ext-gwy trees.
7041 * In the state lookup we attempt a lookup on both trees if
7042 * first one does not return any result and return a match if
7043 * the match state's was created by NAT64 rule.
7044 */
7045 PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy);
7046 PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy);
7047 key.ext_gwy.xport.port = th->th_sport;
7048 key.gwy.xport.port = th->th_dport;
7049
7050 PF_ACPY(&key.lan.addr, pd->src, key.af_lan);
7051 PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan);
7052 key.lan.xport.port = th->th_sport;
7053 key.ext_lan.xport.port = th->th_dport;
7054
7055 STATE_LOOKUP();
7056
7057 sk = (*state)->state_key;
7058 /*
7059 * In case of NAT64 the translation is first applied on the LAN
7060 * side. Therefore for stack's address family comparison
7061 * we use sk->af_lan.
7062 */
7063 if ((direction == sk->direction) && (pd->af == sk->af_lan)) {
7064 src = &(*state)->src;
7065 dst = &(*state)->dst;
7066 } else {
7067 src = &(*state)->dst;
7068 dst = &(*state)->src;
7069 }
7070
7071 if (src->state == PF_TCPS_PROXY_SRC) {
7072 if (direction != sk->direction) {
7073 REASON_SET(reason, PFRES_SYNPROXY);
7074 return PF_SYNPROXY_DROP;
7075 }
7076 if (th->th_flags & TH_SYN) {
7077 if (ntohl(th->th_seq) != src->seqlo) {
7078 REASON_SET(reason, PFRES_SYNPROXY);
7079 return PF_DROP;
7080 }
7081 pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
7082 pd->src, th->th_dport, th->th_sport,
7083 src->seqhi, ntohl(th->th_seq) + 1,
7084 TH_SYN | TH_ACK, 0, src->mss, 0, 1,
7085 0, NULL, NULL);
7086 REASON_SET(reason, PFRES_SYNPROXY);
7087 return PF_SYNPROXY_DROP;
7088 } else if (!(th->th_flags & TH_ACK) ||
7089 (ntohl(th->th_ack) != src->seqhi + 1) ||
7090 (ntohl(th->th_seq) != src->seqlo + 1)) {
7091 REASON_SET(reason, PFRES_SYNPROXY);
7092 return PF_DROP;
7093 } else if ((*state)->src_node != NULL &&
7094 pf_src_connlimit(state)) {
7095 REASON_SET(reason, PFRES_SRCLIMIT);
7096 return PF_DROP;
7097 } else {
7098 src->state = PF_TCPS_PROXY_DST;
7099 }
7100 }
7101 if (src->state == PF_TCPS_PROXY_DST) {
7102 struct pf_state_host *psrc, *pdst;
7103
7104 if (direction == PF_OUT) {
7105 psrc = &sk->gwy;
7106 pdst = &sk->ext_gwy;
7107 } else {
7108 psrc = &sk->ext_lan;
7109 pdst = &sk->lan;
7110 }
7111 if (direction == sk->direction) {
7112 if (((th->th_flags & (TH_SYN | TH_ACK)) != TH_ACK) ||
7113 (ntohl(th->th_ack) != src->seqhi + 1) ||
7114 (ntohl(th->th_seq) != src->seqlo + 1)) {
7115 REASON_SET(reason, PFRES_SYNPROXY);
7116 return PF_DROP;
7117 }
7118 src->max_win = MAX(ntohs(th->th_win), 1);
7119 if (dst->seqhi == 1) {
7120 dst->seqhi = htonl(random());
7121 }
7122 pf_send_tcp((*state)->rule.ptr, pd->af, &psrc->addr,
7123 &pdst->addr, psrc->xport.port, pdst->xport.port,
7124 dst->seqhi, 0, TH_SYN, 0,
7125 src->mss, 0, 0, (*state)->tag, NULL, NULL);
7126 REASON_SET(reason, PFRES_SYNPROXY);
7127 return PF_SYNPROXY_DROP;
7128 } else if (((th->th_flags & (TH_SYN | TH_ACK)) !=
7129 (TH_SYN | TH_ACK)) ||
7130 (ntohl(th->th_ack) != dst->seqhi + 1)) {
7131 REASON_SET(reason, PFRES_SYNPROXY);
7132 return PF_DROP;
7133 } else {
7134 dst->max_win = MAX(ntohs(th->th_win), 1);
7135 dst->seqlo = ntohl(th->th_seq);
7136 pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
7137 pd->src, th->th_dport, th->th_sport,
7138 ntohl(th->th_ack), ntohl(th->th_seq) + 1,
7139 TH_ACK, src->max_win, 0, 0, 0,
7140 (*state)->tag, NULL, NULL);
7141 pf_send_tcp((*state)->rule.ptr, pd->af, &psrc->addr,
7142 &pdst->addr, psrc->xport.port, pdst->xport.port,
7143 src->seqhi + 1, src->seqlo + 1,
7144 TH_ACK, dst->max_win, 0, 0, 1,
7145 0, NULL, NULL);
7146 src->seqdiff = dst->seqhi -
7147 src->seqlo;
7148 dst->seqdiff = src->seqhi -
7149 dst->seqlo;
7150 src->seqhi = src->seqlo +
7151 dst->max_win;
7152 dst->seqhi = dst->seqlo +
7153 src->max_win;
7154 src->wscale = dst->wscale = 0;
7155 src->state = dst->state =
7156 TCPS_ESTABLISHED;
7157 REASON_SET(reason, PFRES_SYNPROXY);
7158 return PF_SYNPROXY_DROP;
7159 }
7160 }
7161
7162 if (((th->th_flags & (TH_SYN | TH_ACK)) == TH_SYN) &&
7163 dst->state >= TCPS_FIN_WAIT_2 &&
7164 src->state >= TCPS_FIN_WAIT_2) {
7165 if (pf_status.debug >= PF_DEBUG_MISC) {
7166 printf("pf: state reuse ");
7167 pf_print_state(*state);
7168 pf_print_flags(th->th_flags);
7169 printf("\n");
7170 }
7171 /* XXX make sure it's the same direction ?? */
7172 src->state = dst->state = TCPS_CLOSED;
7173 pf_unlink_state(*state);
7174 *state = NULL;
7175 return PF_DROP;
7176 }
7177
7178 if ((th->th_flags & TH_SYN) == 0) {
7179 sws = (src->wscale & PF_WSCALE_FLAG) ?
7180 (src->wscale & PF_WSCALE_MASK) : TCP_MAX_WINSHIFT;
7181 dws = (dst->wscale & PF_WSCALE_FLAG) ?
7182 (dst->wscale & PF_WSCALE_MASK) : TCP_MAX_WINSHIFT;
7183 } else {
7184 sws = dws = 0;
7185 }
7186
7187 /*
7188 * Sequence tracking algorithm from Guido van Rooij's paper:
7189 * http://www.madison-gurkha.com/publications/tcp_filtering/
7190 * tcp_filtering.ps
7191 */
7192
7193 orig_seq = seq = ntohl(th->th_seq);
7194 if (src->seqlo == 0) {
7195 /* First packet from this end. Set its state */
7196
7197 if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) &&
7198 src->scrub == NULL) {
7199 if (pf_normalize_tcp_init(pbuf, off, pd, th, src, dst)) {
7200 REASON_SET(reason, PFRES_MEMORY);
7201 return PF_DROP;
7202 }
7203 }
7204
7205 /* Deferred generation of sequence number modulator */
7206 if (dst->seqdiff && !src->seqdiff) {
7207 /* use random iss for the TCP server */
7208 while ((src->seqdiff = random() - seq) == 0) {
7209 ;
7210 }
7211 ack = ntohl(th->th_ack) - dst->seqdiff;
7212 pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
7213 src->seqdiff), 0);
7214 pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
7215 copyback = off + sizeof(*th);
7216 } else {
7217 ack = ntohl(th->th_ack);
7218 }
7219
7220 end = seq + pd->p_len;
7221 if (th->th_flags & TH_SYN) {
7222 end++;
7223 if (dst->wscale & PF_WSCALE_FLAG) {
7224 src->wscale = pf_get_wscale(pbuf, off,
7225 th->th_off, pd->af);
7226 if (src->wscale & PF_WSCALE_FLAG) {
7227 /*
7228 * Remove scale factor from initial
7229 * window
7230 */
7231 sws = src->wscale & PF_WSCALE_MASK;
7232 win = ((u_int32_t)win + (1 << sws) - 1)
7233 >> sws;
7234 dws = dst->wscale & PF_WSCALE_MASK;
7235 } else {
7236 /*
7237 * Window scale negotiation has failed,
7238 * therefore we must restore the window
7239 * scale in the state record that we
7240 * optimistically removed in
7241 * pf_test_rule(). Care is required to
7242 * prevent arithmetic overflow from
7243 * zeroing the window when it's
7244 * truncated down to 16-bits.
7245 */
7246 u_int32_t max_win = dst->max_win;
7247 max_win <<=
7248 dst->wscale & PF_WSCALE_MASK;
7249 dst->max_win = MIN(0xffff, max_win);
7250 /* in case of a retrans SYN|ACK */
7251 dst->wscale = 0;
7252 }
7253 }
7254 }
7255 if (th->th_flags & TH_FIN) {
7256 end++;
7257 }
7258
7259 src->seqlo = seq;
7260 if (src->state < TCPS_SYN_SENT) {
7261 src->state = TCPS_SYN_SENT;
7262 }
7263
7264 /*
7265 * May need to slide the window (seqhi may have been set by
7266 * the crappy stack check or if we picked up the connection
7267 * after establishment)
7268 */
7269 if (src->seqhi == 1 ||
7270 SEQ_GEQ(end + MAX(1, (u_int32_t)dst->max_win << dws),
7271 src->seqhi)) {
7272 src->seqhi = end + MAX(1, (u_int32_t)dst->max_win << dws);
7273 }
7274 if (win > src->max_win) {
7275 src->max_win = win;
7276 }
7277 } else {
7278 ack = ntohl(th->th_ack) - dst->seqdiff;
7279 if (src->seqdiff) {
7280 /* Modulate sequence numbers */
7281 pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
7282 src->seqdiff), 0);
7283 pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
7284 copyback = off + sizeof(*th);
7285 }
7286 end = seq + pd->p_len;
7287 if (th->th_flags & TH_SYN) {
7288 end++;
7289 }
7290 if (th->th_flags & TH_FIN) {
7291 end++;
7292 }
7293 }
7294
7295 if ((th->th_flags & TH_ACK) == 0) {
7296 /* Let it pass through the ack skew check */
7297 ack = dst->seqlo;
7298 } else if ((ack == 0 &&
7299 (th->th_flags & (TH_ACK | TH_RST)) == (TH_ACK | TH_RST)) ||
7300 /* broken tcp stacks do not set ack */
7301 (dst->state < TCPS_SYN_SENT)) {
7302 /*
7303 * Many stacks (ours included) will set the ACK number in an
7304 * FIN|ACK if the SYN times out -- no sequence to ACK.
7305 */
7306 ack = dst->seqlo;
7307 }
7308
7309 if (seq == end) {
7310 /* Ease sequencing restrictions on no data packets */
7311 seq = src->seqlo;
7312 end = seq;
7313 }
7314
7315 ackskew = dst->seqlo - ack;
7316
7317
7318 /*
7319 * Need to demodulate the sequence numbers in any TCP SACK options
7320 * (Selective ACK). We could optionally validate the SACK values
7321 * against the current ACK window, either forwards or backwards, but
7322 * I'm not confident that SACK has been implemented properly
7323 * everywhere. It wouldn't surprise me if several stacks accidently
7324 * SACK too far backwards of previously ACKed data. There really aren't
7325 * any security implications of bad SACKing unless the target stack
7326 * doesn't validate the option length correctly. Someone trying to
7327 * spoof into a TCP connection won't bother blindly sending SACK
7328 * options anyway.
7329 */
7330 if (dst->seqdiff && (th->th_off << 2) > (int)sizeof(struct tcphdr)) {
7331 copyback = pf_modulate_sack(pbuf, off, pd, th, dst);
7332 if (copyback == -1) {
7333 REASON_SET(reason, PFRES_MEMORY);
7334 return PF_DROP;
7335 }
7336
7337 pbuf = pd->mp; // XXXSCW: Why?
7338 }
7339
7340
7341 #define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */
7342 if (SEQ_GEQ(src->seqhi, end) &&
7343 /* Last octet inside other's window space */
7344 SEQ_GEQ(seq, src->seqlo - ((u_int32_t)dst->max_win << dws)) &&
7345 /* Retrans: not more than one window back */
7346 (ackskew >= -MAXACKWINDOW) &&
7347 /* Acking not more than one reassembled fragment backwards */
7348 (ackskew <= (MAXACKWINDOW << sws)) &&
7349 /* Acking not more than one window forward */
7350 ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo ||
7351 (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo) ||
7352 (pd->flags & PFDESC_IP_REAS) == 0)) {
7353 /* Require an exact/+1 sequence match on resets when possible */
7354
7355 if (dst->scrub || src->scrub) {
7356 if (pf_normalize_tcp_stateful(pbuf, off, pd, reason, th,
7357 *state, src, dst, ©back)) {
7358 return PF_DROP;
7359 }
7360
7361 pbuf = pd->mp; // XXXSCW: Why?
7362 }
7363
7364 /* update max window */
7365 if (src->max_win < win) {
7366 src->max_win = win;
7367 }
7368 /* synchronize sequencing */
7369 if (SEQ_GT(end, src->seqlo)) {
7370 src->seqlo = end;
7371 }
7372 /* slide the window of what the other end can send */
7373 if (SEQ_GEQ(ack + ((u_int32_t)win << sws), dst->seqhi)) {
7374 dst->seqhi = ack + MAX(((u_int32_t)win << sws), 1);
7375 }
7376
7377 /* update states */
7378 if (th->th_flags & TH_SYN) {
7379 if (src->state < TCPS_SYN_SENT) {
7380 src->state = TCPS_SYN_SENT;
7381 }
7382 }
7383 if (th->th_flags & TH_FIN) {
7384 if (src->state < TCPS_CLOSING) {
7385 src->state = TCPS_CLOSING;
7386 }
7387 }
7388 if (th->th_flags & TH_ACK) {
7389 if (dst->state == TCPS_SYN_SENT) {
7390 dst->state = TCPS_ESTABLISHED;
7391 if (src->state == TCPS_ESTABLISHED &&
7392 (*state)->src_node != NULL &&
7393 pf_src_connlimit(state)) {
7394 REASON_SET(reason, PFRES_SRCLIMIT);
7395 return PF_DROP;
7396 }
7397 } else if (dst->state == TCPS_CLOSING) {
7398 dst->state = TCPS_FIN_WAIT_2;
7399 }
7400 }
7401 if (th->th_flags & TH_RST) {
7402 src->state = dst->state = TCPS_TIME_WAIT;
7403 }
7404
7405 /* update expire time */
7406 (*state)->expire = pf_time_second();
7407 if (src->state >= TCPS_FIN_WAIT_2 &&
7408 dst->state >= TCPS_FIN_WAIT_2) {
7409 (*state)->timeout = PFTM_TCP_CLOSED;
7410 } else if (src->state >= TCPS_CLOSING &&
7411 dst->state >= TCPS_CLOSING) {
7412 (*state)->timeout = PFTM_TCP_FIN_WAIT;
7413 } else if (src->state < TCPS_ESTABLISHED ||
7414 dst->state < TCPS_ESTABLISHED) {
7415 (*state)->timeout = PFTM_TCP_OPENING;
7416 } else if (src->state >= TCPS_CLOSING ||
7417 dst->state >= TCPS_CLOSING) {
7418 (*state)->timeout = PFTM_TCP_CLOSING;
7419 } else {
7420 (*state)->timeout = PFTM_TCP_ESTABLISHED;
7421 }
7422
7423 /* Fall through to PASS packet */
7424 } else if ((dst->state < TCPS_SYN_SENT ||
7425 dst->state >= TCPS_FIN_WAIT_2 || src->state >= TCPS_FIN_WAIT_2) &&
7426 SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) &&
7427 /* Within a window forward of the originating packet */
7428 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
7429 /* Within a window backward of the originating packet */
7430
7431 /*
7432 * This currently handles three situations:
7433 * 1) Stupid stacks will shotgun SYNs before their peer
7434 * replies.
7435 * 2) When PF catches an already established stream (the
7436 * firewall rebooted, the state table was flushed, routes
7437 * changed...)
7438 * 3) Packets get funky immediately after the connection
7439 * closes (this should catch Solaris spurious ACK|FINs
7440 * that web servers like to spew after a close)
7441 *
7442 * This must be a little more careful than the above code
7443 * since packet floods will also be caught here. We don't
7444 * update the TTL here to mitigate the damage of a packet
7445 * flood and so the same code can handle awkward establishment
7446 * and a loosened connection close.
7447 * In the establishment case, a correct peer response will
7448 * validate the connection, go through the normal state code
7449 * and keep updating the state TTL.
7450 */
7451
7452 if (pf_status.debug >= PF_DEBUG_MISC) {
7453 printf("pf: loose state match: ");
7454 pf_print_state(*state);
7455 pf_print_flags(th->th_flags);
7456 printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
7457 "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack,
7458 pd->p_len, ackskew, (*state)->packets[0],
7459 (*state)->packets[1],
7460 direction == PF_IN ? "in" : "out",
7461 direction == sk->direction ?
7462 "fwd" : "rev");
7463 }
7464
7465 if (dst->scrub || src->scrub) {
7466 if (pf_normalize_tcp_stateful(pbuf, off, pd, reason, th,
7467 *state, src, dst, ©back)) {
7468 return PF_DROP;
7469 }
7470 pbuf = pd->mp; // XXXSCW: Why?
7471 }
7472
7473 /* update max window */
7474 if (src->max_win < win) {
7475 src->max_win = win;
7476 }
7477 /* synchronize sequencing */
7478 if (SEQ_GT(end, src->seqlo)) {
7479 src->seqlo = end;
7480 }
7481 /* slide the window of what the other end can send */
7482 if (SEQ_GEQ(ack + ((u_int32_t)win << sws), dst->seqhi)) {
7483 dst->seqhi = ack + MAX(((u_int32_t)win << sws), 1);
7484 }
7485
7486 /*
7487 * Cannot set dst->seqhi here since this could be a shotgunned
7488 * SYN and not an already established connection.
7489 */
7490
7491 if (th->th_flags & TH_FIN) {
7492 if (src->state < TCPS_CLOSING) {
7493 src->state = TCPS_CLOSING;
7494 }
7495 }
7496 if (th->th_flags & TH_RST) {
7497 src->state = dst->state = TCPS_TIME_WAIT;
7498 }
7499
7500 /* Fall through to PASS packet */
7501 } else {
7502 if (dst->state == TCPS_SYN_SENT &&
7503 src->state == TCPS_SYN_SENT) {
7504 /* Send RST for state mismatches during handshake */
7505 if (!(th->th_flags & TH_RST)) {
7506 pf_send_tcp((*state)->rule.ptr, pd->af,
7507 pd->dst, pd->src, th->th_dport,
7508 th->th_sport, ntohl(th->th_ack), 0,
7509 TH_RST, 0, 0,
7510 (*state)->rule.ptr->return_ttl, 1, 0,
7511 pd->eh, kif->pfik_ifp);
7512 }
7513 src->seqlo = 0;
7514 src->seqhi = 1;
7515 src->max_win = 1;
7516 } else if (pf_status.debug >= PF_DEBUG_MISC) {
7517 printf("pf: BAD state: ");
7518 pf_print_state(*state);
7519 pf_print_flags(th->th_flags);
7520 printf("\n seq=%u (%u) ack=%u len=%u ackskew=%d "
7521 "sws=%u dws=%u pkts=%llu:%llu dir=%s,%s\n",
7522 seq, orig_seq, ack, pd->p_len, ackskew,
7523 (unsigned int)sws, (unsigned int)dws,
7524 (*state)->packets[0], (*state)->packets[1],
7525 direction == PF_IN ? "in" : "out",
7526 direction == sk->direction ?
7527 "fwd" : "rev");
7528 printf("pf: State failure on: %c %c %c %c | %c %c\n",
7529 SEQ_GEQ(src->seqhi, end) ? ' ' : '1',
7530 SEQ_GEQ(seq,
7531 src->seqlo - ((u_int32_t)dst->max_win << dws)) ?
7532 ' ': '2',
7533 (ackskew >= -MAXACKWINDOW) ? ' ' : '3',
7534 (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
7535 SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5',
7536 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6');
7537 }
7538 REASON_SET(reason, PFRES_BADSTATE);
7539 return PF_DROP;
7540 }
7541
7542 /* Any packets which have gotten here are to be passed */
7543
7544 if (sk->app_state &&
7545 sk->app_state->handler) {
7546 sk->app_state->handler(*state, direction,
7547 off + (th->th_off << 2), pd, kif);
7548 if (pd->lmw < 0) {
7549 REASON_SET(reason, PFRES_MEMORY);
7550 return PF_DROP;
7551 }
7552 pbuf = pd->mp; // XXXSCW: Why?
7553 }
7554
7555 /* translate source/destination address, if necessary */
7556 if (STATE_TRANSLATE(sk)) {
7557 pd->naf = (pd->af == sk->af_lan) ? sk->af_gwy : sk->af_lan;
7558
7559 if (direction == PF_OUT) {
7560 pf_change_ap(direction, pd->mp, pd->src, &th->th_sport,
7561 pd->ip_sum, &th->th_sum, &sk->gwy.addr,
7562 sk->gwy.xport.port, 0, pd->af, pd->naf, 1);
7563 } else {
7564 if (pd->af != pd->naf) {
7565 if (pd->af == sk->af_gwy) {
7566 pf_change_ap(direction, pd->mp, pd->dst,
7567 &th->th_dport, pd->ip_sum,
7568 &th->th_sum, &sk->lan.addr,
7569 sk->lan.xport.port, 0,
7570 pd->af, pd->naf, 0);
7571
7572 pf_change_ap(direction, pd->mp, pd->src,
7573 &th->th_sport, pd->ip_sum,
7574 &th->th_sum, &sk->ext_lan.addr,
7575 th->th_sport, 0, pd->af,
7576 pd->naf, 0);
7577 } else {
7578 pf_change_ap(direction, pd->mp, pd->dst,
7579 &th->th_dport, pd->ip_sum,
7580 &th->th_sum, &sk->ext_gwy.addr,
7581 th->th_dport, 0, pd->af,
7582 pd->naf, 0);
7583
7584 pf_change_ap(direction, pd->mp, pd->src,
7585 &th->th_sport, pd->ip_sum,
7586 &th->th_sum, &sk->gwy.addr,
7587 sk->gwy.xport.port, 0, pd->af,
7588 pd->naf, 0);
7589 }
7590 } else {
7591 pf_change_ap(direction, pd->mp, pd->dst,
7592 &th->th_dport, pd->ip_sum,
7593 &th->th_sum, &sk->lan.addr,
7594 sk->lan.xport.port, 0, pd->af,
7595 pd->naf, 1);
7596 }
7597 }
7598
7599 copyback = off + sizeof(*th);
7600 }
7601
7602 if (copyback) {
7603 if (pf_lazy_makewritable(pd, pbuf, copyback) == NULL) {
7604 REASON_SET(reason, PFRES_MEMORY);
7605 return PF_DROP;
7606 }
7607
7608 /* Copyback sequence modulation or stateful scrub changes */
7609 pbuf_copy_back(pbuf, off, sizeof(*th), th, sizeof(*th));
7610
7611 if (sk->af_lan != sk->af_gwy) {
7612 return pf_do_nat64(sk, pd, pbuf, off);
7613 }
7614 }
7615 return PF_PASS;
7616 }
7617
7618 static __attribute__((noinline)) int
pf_test_state_udp(struct pf_state ** state,int direction,struct pfi_kif * kif,pbuf_t * pbuf,int off,void * h,struct pf_pdesc * pd,u_short * reason)7619 pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
7620 pbuf_t *pbuf, int off, void *h, struct pf_pdesc *pd, u_short *reason)
7621 {
7622 #pragma unused(h)
7623 struct pf_state_peer *__single src, *__single dst;
7624 struct pf_state_key_cmp key;
7625 struct pf_state_key *__single sk;
7626 struct udphdr *__single uh = pf_pd_get_hdr_udp(pd);
7627 struct pf_app_state as;
7628 int action, extfilter;
7629 key.app_state = 0;
7630 key.proto_variant = PF_EXTFILTER_APD;
7631
7632 key.proto = IPPROTO_UDP;
7633 key.af_lan = key.af_gwy = pd->af;
7634
7635 /*
7636 * For NAT64 the first time rule search and state creation
7637 * is done on the incoming side only.
7638 * Once the state gets created, NAT64's LAN side (ipv6) will
7639 * not be able to find the state in ext-gwy tree as that normally
7640 * is intended to be looked up for incoming traffic from the
7641 * WAN side.
7642 * Therefore to handle NAT64 case we init keys here for both
7643 * lan-ext as well as ext-gwy trees.
7644 * In the state lookup we attempt a lookup on both trees if
7645 * first one does not return any result and return a match if
7646 * the match state's was created by NAT64 rule.
7647 */
7648 PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy);
7649 PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy);
7650 key.ext_gwy.xport.port = uh->uh_sport;
7651 key.gwy.xport.port = uh->uh_dport;
7652
7653 PF_ACPY(&key.lan.addr, pd->src, key.af_lan);
7654 PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan);
7655 key.lan.xport.port = uh->uh_sport;
7656 key.ext_lan.xport.port = uh->uh_dport;
7657
7658 if (ntohs(uh->uh_sport) == PF_IKE_PORT &&
7659 ntohs(uh->uh_dport) == PF_IKE_PORT) {
7660 struct pf_ike_hdr ike;
7661 size_t plen = pbuf->pb_packet_len - off - sizeof(*uh);
7662 if (plen < PF_IKE_PACKET_MINSIZE) {
7663 DPFPRINTF(PF_DEBUG_MISC,
7664 ("pf: IKE message too small.\n"));
7665 return PF_DROP;
7666 }
7667
7668 if (plen > sizeof(ike)) {
7669 plen = sizeof(ike);
7670 }
7671 pbuf_copy_data(pbuf, off + sizeof(*uh), plen, &ike, sizeof(ike));
7672
7673 if (ike.initiator_cookie) {
7674 key.app_state = &as;
7675 as.compare_lan_ext = pf_ike_compare;
7676 as.compare_ext_gwy = pf_ike_compare;
7677 as.u.ike.cookie = ike.initiator_cookie;
7678 } else {
7679 /*
7680 * <http://tools.ietf.org/html/\
7681 * draft-ietf-ipsec-nat-t-ike-01>
7682 * Support non-standard NAT-T implementations that
7683 * push the ESP packet over the top of the IKE packet.
7684 * Do not drop packet.
7685 */
7686 DPFPRINTF(PF_DEBUG_MISC,
7687 ("pf: IKE initiator cookie = 0.\n"));
7688 }
7689 }
7690
7691 *state = pf_find_state(kif, &key, direction);
7692
7693 if (!key.app_state && *state == 0) {
7694 key.proto_variant = PF_EXTFILTER_AD;
7695 *state = pf_find_state(kif, &key, direction);
7696 }
7697
7698 if (!key.app_state && *state == 0) {
7699 key.proto_variant = PF_EXTFILTER_EI;
7700 *state = pf_find_state(kif, &key, direction);
7701 }
7702
7703 /* similar to STATE_LOOKUP() */
7704 if (*state != NULL && pd != NULL && !(pd->pktflags & PKTF_FLOW_ID)) {
7705 pd->flowsrc = (*state)->state_key->flowsrc;
7706 pd->flowhash = (*state)->state_key->flowhash;
7707 if (pd->flowhash != 0) {
7708 pd->pktflags |= PKTF_FLOW_ID;
7709 pd->pktflags &= ~PKTF_FLOW_ADV;
7710 }
7711 }
7712
7713 if (pf_state_lookup_aux(state, kif, direction, &action)) {
7714 return action;
7715 }
7716
7717 sk = (*state)->state_key;
7718
7719 /*
7720 * In case of NAT64 the translation is first applied on the LAN
7721 * side. Therefore for stack's address family comparison
7722 * we use sk->af_lan.
7723 */
7724 if ((direction == sk->direction) && (pd->af == sk->af_lan)) {
7725 src = &(*state)->src;
7726 dst = &(*state)->dst;
7727 } else {
7728 src = &(*state)->dst;
7729 dst = &(*state)->src;
7730 }
7731
7732 /* update states */
7733 if (src->state < PFUDPS_SINGLE) {
7734 src->state = PFUDPS_SINGLE;
7735 }
7736 if (dst->state == PFUDPS_SINGLE) {
7737 dst->state = PFUDPS_MULTIPLE;
7738 }
7739
7740 /* update expire time */
7741 (*state)->expire = pf_time_second();
7742 if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE) {
7743 (*state)->timeout = PFTM_UDP_MULTIPLE;
7744 } else {
7745 (*state)->timeout = PFTM_UDP_SINGLE;
7746 }
7747
7748 extfilter = sk->proto_variant;
7749 if (extfilter > PF_EXTFILTER_APD) {
7750 if (direction == PF_OUT) {
7751 sk->ext_lan.xport.port = key.ext_lan.xport.port;
7752 if (extfilter > PF_EXTFILTER_AD) {
7753 PF_ACPY(&sk->ext_lan.addr, &key.ext_lan.addr,
7754 key.af_lan);
7755 }
7756 } else {
7757 sk->ext_gwy.xport.port = key.ext_gwy.xport.port;
7758 if (extfilter > PF_EXTFILTER_AD) {
7759 PF_ACPY(&sk->ext_gwy.addr, &key.ext_gwy.addr,
7760 key.af_gwy);
7761 }
7762 }
7763 }
7764
7765 if (sk->app_state && sk->app_state->handler) {
7766 sk->app_state->handler(*state, direction, off + uh->uh_ulen,
7767 pd, kif);
7768 if (pd->lmw < 0) {
7769 REASON_SET(reason, PFRES_MEMORY);
7770 return PF_DROP;
7771 }
7772 pbuf = pd->mp; // XXXSCW: Why?
7773 }
7774
7775 /* translate source/destination address, if necessary */
7776 if (STATE_TRANSLATE(sk)) {
7777 if (pf_lazy_makewritable(pd, pbuf, off + sizeof(*uh)) == NULL) {
7778 REASON_SET(reason, PFRES_MEMORY);
7779 return PF_DROP;
7780 }
7781
7782 pd->naf = (pd->af == sk->af_lan) ? sk->af_gwy : sk->af_lan;
7783
7784 if (direction == PF_OUT) {
7785 pf_change_ap(direction, pd->mp, pd->src, &uh->uh_sport,
7786 pd->ip_sum, &uh->uh_sum, &sk->gwy.addr,
7787 sk->gwy.xport.port, 1, pd->af, pd->naf, 1);
7788 } else {
7789 if (pd->af != pd->naf) {
7790 if (pd->af == sk->af_gwy) {
7791 pf_change_ap(direction, pd->mp, pd->dst,
7792 &uh->uh_dport, pd->ip_sum,
7793 &uh->uh_sum, &sk->lan.addr,
7794 sk->lan.xport.port, 1,
7795 pd->af, pd->naf, 0);
7796
7797 pf_change_ap(direction, pd->mp, pd->src,
7798 &uh->uh_sport, pd->ip_sum,
7799 &uh->uh_sum, &sk->ext_lan.addr,
7800 uh->uh_sport, 1, pd->af,
7801 pd->naf, 0);
7802 } else {
7803 pf_change_ap(direction, pd->mp, pd->dst,
7804 &uh->uh_dport, pd->ip_sum,
7805 &uh->uh_sum, &sk->ext_gwy.addr,
7806 uh->uh_dport, 1, pd->af,
7807 pd->naf, 0);
7808
7809 pf_change_ap(direction, pd->mp, pd->src,
7810 &uh->uh_sport, pd->ip_sum,
7811 &uh->uh_sum, &sk->gwy.addr,
7812 sk->gwy.xport.port, 1, pd->af,
7813 pd->naf, 0);
7814 }
7815 } else {
7816 pf_change_ap(direction, pd->mp, pd->dst,
7817 &uh->uh_dport, pd->ip_sum,
7818 &uh->uh_sum, &sk->lan.addr,
7819 sk->lan.xport.port, 1,
7820 pd->af, pd->naf, 1);
7821 }
7822 }
7823
7824 pbuf_copy_back(pbuf, off, sizeof(*uh), uh, sizeof(*uh));
7825 if (sk->af_lan != sk->af_gwy) {
7826 return pf_do_nat64(sk, pd, pbuf, off);
7827 }
7828 }
7829 return PF_PASS;
7830 }
7831
7832 static u_int32_t
pf_compute_packet_icmp_gencnt(uint32_t af,u_int32_t type,u_int32_t code)7833 pf_compute_packet_icmp_gencnt(uint32_t af, u_int32_t type, u_int32_t code)
7834 {
7835 if (af == PF_INET) {
7836 if (type != ICMP_UNREACH && type != ICMP_TIMXCEED) {
7837 return 0;
7838 }
7839 } else {
7840 if (type != ICMP6_DST_UNREACH && type != ICMP6_PARAM_PROB &&
7841 type != ICMP6_TIME_EXCEEDED) {
7842 return 0;
7843 }
7844 }
7845 return (af << 24) | (type << 16) | (code << 8);
7846 }
7847
7848
7849 static __attribute__((noinline)) int
pf_test_state_icmp(struct pf_state ** state,int direction,struct pfi_kif * kif,pbuf_t * pbuf,int off,void * h,struct pf_pdesc * pd,u_short * reason)7850 pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
7851 pbuf_t *pbuf, int off, void *h, struct pf_pdesc *pd, u_short *reason)
7852 {
7853 #pragma unused(h)
7854 struct pf_addr *__single saddr = pd->src, *__single daddr = pd->dst;
7855 struct in_addr srcv4_inaddr = saddr->v4addr;
7856 u_int16_t icmpid = 0, *__single icmpsum = NULL;
7857 u_int8_t icmptype = 0;
7858 u_int32_t icmpcode = 0;
7859 int state_icmp = 0;
7860 struct pf_state_key_cmp key;
7861 struct pf_state_key *__single sk;
7862
7863 struct pf_app_state as;
7864 key.app_state = 0;
7865
7866 pd->off = off;
7867
7868 switch (pd->proto) {
7869 #if INET
7870 case IPPROTO_ICMP:
7871 icmptype = pf_pd_get_hdr_icmp(pd)->icmp_type;
7872 icmpid = pf_pd_get_hdr_icmp(pd)->icmp_id;
7873 icmpsum = &pf_pd_get_hdr_icmp(pd)->icmp_cksum;
7874 icmpcode = pf_pd_get_hdr_icmp(pd)->icmp_code;
7875
7876 if (ICMP_ERRORTYPE(icmptype)) {
7877 state_icmp++;
7878 }
7879 break;
7880 #endif /* INET */
7881 case IPPROTO_ICMPV6:
7882 icmptype = pf_pd_get_hdr_icmp6(pd)->icmp6_type;
7883 icmpid = pf_pd_get_hdr_icmp6(pd)->icmp6_id;
7884 icmpsum = &pf_pd_get_hdr_icmp6(pd)->icmp6_cksum;
7885 icmpcode = pf_pd_get_hdr_icmp6(pd)->icmp6_code;
7886
7887 if (ICMP6_ERRORTYPE(icmptype)) {
7888 state_icmp++;
7889 }
7890 break;
7891 }
7892
7893 if (pbuf != NULL && pbuf->pb_flow_gencnt != NULL &&
7894 *pbuf->pb_flow_gencnt == 0) {
7895 u_int32_t af = pd->proto == IPPROTO_ICMP ? PF_INET : PF_INET6;
7896 *pbuf->pb_flow_gencnt = pf_compute_packet_icmp_gencnt(af, icmptype, icmpcode);
7897 }
7898
7899 if (!state_icmp) {
7900 /*
7901 * ICMP query/reply message not related to a TCP/UDP packet.
7902 * Search for an ICMP state.
7903 */
7904 /*
7905 * NAT64 requires protocol translation between ICMPv4
7906 * and ICMPv6. TCP and UDP do not require protocol
7907 * translation. To avoid adding complexity just to
7908 * handle ICMP(v4addr/v6addr), we always lookup for
7909 * proto = IPPROTO_ICMP on both LAN and WAN side
7910 */
7911 key.proto = IPPROTO_ICMP;
7912 key.af_lan = key.af_gwy = pd->af;
7913
7914 PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy);
7915 PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy);
7916 key.ext_gwy.xport.port = 0;
7917 key.gwy.xport.port = icmpid;
7918
7919 PF_ACPY(&key.lan.addr, pd->src, key.af_lan);
7920 PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan);
7921 key.lan.xport.port = icmpid;
7922 key.ext_lan.xport.port = 0;
7923
7924 STATE_LOOKUP();
7925
7926 sk = (*state)->state_key;
7927 (*state)->expire = pf_time_second();
7928 (*state)->timeout = PFTM_ICMP_ERROR_REPLY;
7929
7930 /* translate source/destination address, if necessary */
7931 if (STATE_TRANSLATE(sk)) {
7932 pd->naf = (pd->af == sk->af_lan) ?
7933 sk->af_gwy : sk->af_lan;
7934 if (direction == PF_OUT) {
7935 switch (pd->af) {
7936 #if INET
7937 case AF_INET:
7938 pf_change_a(&saddr->v4addr.s_addr,
7939 pd->ip_sum,
7940 sk->gwy.addr.v4addr.s_addr, 0);
7941 pf_pd_get_hdr_icmp(pd)->icmp_cksum =
7942 pf_cksum_fixup(
7943 pf_pd_get_hdr_icmp(pd)->icmp_cksum, icmpid,
7944 sk->gwy.xport.port, 0);
7945 pf_pd_get_hdr_icmp(pd)->icmp_id =
7946 sk->gwy.xport.port;
7947 if (pf_lazy_makewritable(pd, pbuf,
7948 off + ICMP_MINLEN) == NULL) {
7949 return PF_DROP;
7950 }
7951 pbuf_copy_back(pbuf, off, ICMP_MINLEN,
7952 pf_pd_get_hdr_ptr_icmp(pd), sizeof(struct icmp));
7953 break;
7954 #endif /* INET */
7955 case AF_INET6:
7956 pf_change_a6(saddr,
7957 &pf_pd_get_hdr_icmp6(pd)->icmp6_cksum,
7958 &sk->gwy.addr, 0);
7959 if (pf_lazy_makewritable(pd, pbuf,
7960 off + sizeof(struct icmp6_hdr)) ==
7961 NULL) {
7962 return PF_DROP;
7963 }
7964 pbuf_copy_back(pbuf, off,
7965 sizeof(struct icmp6_hdr),
7966 pf_pd_get_hdr_ptr_icmp6(pd), sizeof(struct icmp6_hdr));
7967 break;
7968 }
7969 } else {
7970 switch (pd->af) {
7971 #if INET
7972 case AF_INET:
7973 if (pd->naf != AF_INET) {
7974 if (pf_translate_icmp_af(
7975 AF_INET6, pf_pd_get_hdr_icmp(pd))) {
7976 return PF_DROP;
7977 }
7978
7979 pd->proto = IPPROTO_ICMPV6;
7980 } else {
7981 pf_change_a(&daddr->v4addr.s_addr,
7982 pd->ip_sum,
7983 sk->lan.addr.v4addr.s_addr, 0);
7984
7985 pf_pd_get_hdr_icmp(pd)->icmp_cksum =
7986 pf_cksum_fixup(
7987 pf_pd_get_hdr_icmp(pd)->icmp_cksum,
7988 icmpid, sk->lan.xport.port, 0);
7989
7990 pf_pd_get_hdr_icmp(pd)->icmp_id =
7991 sk->lan.xport.port;
7992 }
7993
7994 if (pf_lazy_makewritable(pd, pbuf,
7995 off + ICMP_MINLEN) == NULL) {
7996 return PF_DROP;
7997 }
7998 pbuf_copy_back(pbuf, off, ICMP_MINLEN,
7999 pf_pd_get_hdr_ptr_icmp(pd), sizeof(struct icmp));
8000 if (sk->af_lan != sk->af_gwy) {
8001 return pf_do_nat64(sk, pd,
8002 pbuf, off);
8003 }
8004 break;
8005 #endif /* INET */
8006 case AF_INET6:
8007 if (pd->naf != AF_INET6) {
8008 if (pf_translate_icmp_af(
8009 AF_INET, pf_pd_get_hdr_icmp6(pd))) {
8010 return PF_DROP;
8011 }
8012
8013 pd->proto = IPPROTO_ICMP;
8014 } else {
8015 pf_change_a6(daddr,
8016 &pf_pd_get_hdr_icmp6(pd)->icmp6_cksum,
8017 &sk->lan.addr, 0);
8018 }
8019 if (pf_lazy_makewritable(pd, pbuf,
8020 off + sizeof(struct icmp6_hdr)) ==
8021 NULL) {
8022 return PF_DROP;
8023 }
8024 pbuf_copy_back(pbuf, off,
8025 sizeof(struct icmp6_hdr),
8026 pf_pd_get_hdr_ptr_icmp6(pd), sizeof(struct icmp6_hdr));
8027 if (sk->af_lan != sk->af_gwy) {
8028 return pf_do_nat64(sk, pd,
8029 pbuf, off);
8030 }
8031 break;
8032 }
8033 }
8034 }
8035
8036 return PF_PASS;
8037 } else {
8038 /*
8039 * ICMP error message in response to a TCP/UDP packet.
8040 * Extract the inner TCP/UDP header and search for that state.
8041 */
8042 struct pf_pdesc pd2; /* For inner (original) header */
8043 #if INET
8044 struct ip h2;
8045 #endif /* INET */
8046 struct ip6_hdr h2_6;
8047 int terminal = 0;
8048 int ipoff2 = 0;
8049 int off2 = 0;
8050
8051 memset(&pd2, 0, sizeof(pd2));
8052
8053 pd2.af = pd->af;
8054 switch (pd->af) {
8055 #if INET
8056 case AF_INET:
8057 /* offset of h2 in mbuf chain */
8058 ipoff2 = off + ICMP_MINLEN;
8059
8060 if (!pf_pull_hdr(pbuf, ipoff2, &h2, sizeof(h2), sizeof(h2),
8061 NULL, reason, pd2.af)) {
8062 DPFPRINTF(PF_DEBUG_MISC,
8063 ("pf: ICMP error message too short "
8064 "(ip)\n"));
8065 return PF_DROP;
8066 }
8067 /*
8068 * ICMP error messages don't refer to non-first
8069 * fragments
8070 */
8071 if (h2.ip_off & htons(IP_OFFMASK)) {
8072 REASON_SET(reason, PFRES_FRAG);
8073 return PF_DROP;
8074 }
8075
8076 /* offset of protocol header that follows h2 */
8077 off2 = ipoff2 + (h2.ip_hl << 2);
8078 /* TODO */
8079 pd2.off = ipoff2 + (h2.ip_hl << 2);
8080
8081 pd2.proto = h2.ip_p;
8082 pd2.src = (struct pf_addr *)&h2.ip_src;
8083 pd2.dst = (struct pf_addr *)&h2.ip_dst;
8084 pd2.ip_sum = &h2.ip_sum;
8085 break;
8086 #endif /* INET */
8087 case AF_INET6:
8088 ipoff2 = off + sizeof(struct icmp6_hdr);
8089
8090 if (!pf_pull_hdr(pbuf, ipoff2, &h2_6, sizeof(h2_6), sizeof(h2_6),
8091 NULL, reason, pd2.af)) {
8092 DPFPRINTF(PF_DEBUG_MISC,
8093 ("pf: ICMP error message too short "
8094 "(ip6)\n"));
8095 return PF_DROP;
8096 }
8097 pd2.proto = h2_6.ip6_nxt;
8098 pd2.src = (struct pf_addr *)(void *)&h2_6.ip6_src;
8099 pd2.dst = (struct pf_addr *)(void *)&h2_6.ip6_dst;
8100 pd2.ip_sum = NULL;
8101 off2 = ipoff2 + sizeof(h2_6);
8102 do {
8103 switch (pd2.proto) {
8104 case IPPROTO_FRAGMENT:
8105 /*
8106 * ICMPv6 error messages for
8107 * non-first fragments
8108 */
8109 REASON_SET(reason, PFRES_FRAG);
8110 return PF_DROP;
8111 case IPPROTO_AH:
8112 case IPPROTO_HOPOPTS:
8113 case IPPROTO_ROUTING:
8114 case IPPROTO_DSTOPTS: {
8115 /* get next header and header length */
8116 struct ip6_ext opt6;
8117
8118 if (!pf_pull_hdr(pbuf, off2, &opt6, sizeof(opt6),
8119 sizeof(opt6), NULL, reason,
8120 pd2.af)) {
8121 DPFPRINTF(PF_DEBUG_MISC,
8122 ("pf: ICMPv6 short opt\n"));
8123 return PF_DROP;
8124 }
8125 if (pd2.proto == IPPROTO_AH) {
8126 off2 += (opt6.ip6e_len + 2) * 4;
8127 } else {
8128 off2 += (opt6.ip6e_len + 1) * 8;
8129 }
8130 pd2.proto = opt6.ip6e_nxt;
8131 /* goto the next header */
8132 break;
8133 }
8134 default:
8135 terminal++;
8136 break;
8137 }
8138 } while (!terminal);
8139 /* TODO */
8140 pd2.off = ipoff2;
8141 break;
8142 }
8143
8144 switch (pd2.proto) {
8145 case IPPROTO_TCP: {
8146 struct tcphdr th;
8147 u_int32_t seq;
8148 struct pf_state_peer *src, *dst;
8149 u_int8_t dws;
8150 int copyback = 0;
8151
8152 /*
8153 * Only the first 8 bytes of the TCP header can be
8154 * expected. Don't access any TCP header fields after
8155 * th_seq, an ackskew test is not possible.
8156 */
8157 if (!pf_pull_hdr(pbuf, off2, &th, sizeof(th), 8, NULL, reason,
8158 pd2.af)) {
8159 DPFPRINTF(PF_DEBUG_MISC,
8160 ("pf: ICMP error message too short "
8161 "(tcp)\n"));
8162 return PF_DROP;
8163 }
8164
8165 key.proto = IPPROTO_TCP;
8166 key.af_gwy = pd2.af;
8167 PF_ACPY(&key.ext_gwy.addr, pd2.dst, key.af_gwy);
8168 PF_ACPY(&key.gwy.addr, pd2.src, key.af_gwy);
8169 key.ext_gwy.xport.port = th.th_dport;
8170 key.gwy.xport.port = th.th_sport;
8171
8172 key.af_lan = pd2.af;
8173 PF_ACPY(&key.lan.addr, pd2.dst, key.af_lan);
8174 PF_ACPY(&key.ext_lan.addr, pd2.src, key.af_lan);
8175 key.lan.xport.port = th.th_dport;
8176 key.ext_lan.xport.port = th.th_sport;
8177
8178 STATE_LOOKUP();
8179
8180 sk = (*state)->state_key;
8181 if ((direction == sk->direction) &&
8182 ((sk->af_lan == sk->af_gwy) ||
8183 (pd2.af == sk->af_lan))) {
8184 src = &(*state)->dst;
8185 dst = &(*state)->src;
8186 } else {
8187 src = &(*state)->src;
8188 dst = &(*state)->dst;
8189 }
8190
8191 if (src->wscale && (dst->wscale & PF_WSCALE_FLAG)) {
8192 dws = dst->wscale & PF_WSCALE_MASK;
8193 } else {
8194 dws = TCP_MAX_WINSHIFT;
8195 }
8196
8197 /* Demodulate sequence number */
8198 seq = ntohl(th.th_seq) - src->seqdiff;
8199 if (src->seqdiff) {
8200 pf_change_a(&th.th_seq, icmpsum,
8201 htonl(seq), 0);
8202 copyback = 1;
8203 }
8204
8205 if (!SEQ_GEQ(src->seqhi, seq) ||
8206 !SEQ_GEQ(seq,
8207 src->seqlo - ((u_int32_t)dst->max_win << dws))) {
8208 if (pf_status.debug >= PF_DEBUG_MISC) {
8209 printf("pf: BAD ICMP %d:%d ",
8210 icmptype, pf_pd_get_hdr_icmp(pd)->icmp_code);
8211 pf_print_host(pd->src, 0, pd->af);
8212 printf(" -> ");
8213 pf_print_host(pd->dst, 0, pd->af);
8214 printf(" state: ");
8215 pf_print_state(*state);
8216 printf(" seq=%u\n", seq);
8217 }
8218 REASON_SET(reason, PFRES_BADSTATE);
8219 return PF_DROP;
8220 }
8221
8222 pd->naf = pd2.naf = (pd2.af == sk->af_lan) ?
8223 sk->af_gwy : sk->af_lan;
8224
8225 if (STATE_TRANSLATE(sk)) {
8226 /* NAT64 case */
8227 if (sk->af_lan != sk->af_gwy) {
8228 struct pf_state_host *saddr2, *daddr2;
8229
8230 if (pd2.naf == sk->af_lan) {
8231 saddr2 = &sk->lan;
8232 daddr2 = &sk->ext_lan;
8233 } else {
8234 saddr2 = &sk->ext_gwy;
8235 daddr2 = &sk->gwy;
8236 }
8237
8238 /* translate ICMP message types and codes */
8239 if (pf_translate_icmp_af(pd->naf,
8240 pf_pd_get_hdr_icmp(pd))) {
8241 return PF_DROP;
8242 }
8243
8244 if (pf_lazy_makewritable(pd, pbuf,
8245 off2 + 8) == NULL) {
8246 return PF_DROP;
8247 }
8248
8249 pbuf_copy_back(pbuf, pd->off,
8250 sizeof(struct icmp6_hdr),
8251 pf_pd_get_hdr_ptr_icmp6(pd), pd->hdrmaxlen);
8252
8253 /*
8254 * translate inner ip header within the
8255 * ICMP message
8256 */
8257 if (pf_change_icmp_af(pbuf, ipoff2, pd,
8258 &pd2, &saddr2->addr, &daddr2->addr,
8259 pd->af, pd->naf)) {
8260 return PF_DROP;
8261 }
8262
8263 if (pd->naf == AF_INET) {
8264 pd->proto = IPPROTO_ICMP;
8265 } else {
8266 pd->proto = IPPROTO_ICMPV6;
8267 }
8268
8269 /*
8270 * translate inner tcp header within
8271 * the ICMP message
8272 */
8273 pf_change_ap(direction, NULL, pd2.src,
8274 &th.th_sport, pd2.ip_sum,
8275 &th.th_sum, &daddr2->addr,
8276 saddr2->xport.port, 0, pd2.af,
8277 pd2.naf, 0);
8278
8279 pf_change_ap(direction, NULL, pd2.dst,
8280 &th.th_dport, pd2.ip_sum,
8281 &th.th_sum, &saddr2->addr,
8282 daddr2->xport.port, 0, pd2.af,
8283 pd2.naf, 0);
8284
8285 pbuf_copy_back(pbuf, pd2.off, 8, &th, sizeof(th));
8286
8287 /* translate outer ip header */
8288 PF_ACPY(&pd->naddr, &daddr2->addr,
8289 pd->naf);
8290 PF_ACPY(&pd->ndaddr, &saddr2->addr,
8291 pd->naf);
8292 if (pd->af == AF_INET) {
8293 memcpy(&pd->naddr.addr32[3],
8294 &srcv4_inaddr,
8295 sizeof(pd->naddr.addr32[3]));
8296 return pf_nat64_ipv4(pbuf, off,
8297 pd);
8298 } else {
8299 return pf_nat64_ipv6(pbuf, off,
8300 pd);
8301 }
8302 }
8303 if (direction == PF_IN) {
8304 pf_change_icmp(pd2.src, &th.th_sport,
8305 daddr, &sk->lan.addr,
8306 sk->lan.xport.port, NULL,
8307 pd2.ip_sum, icmpsum,
8308 pd->ip_sum, 0, pd2.af);
8309 } else {
8310 pf_change_icmp(pd2.dst, &th.th_dport,
8311 saddr, &sk->gwy.addr,
8312 sk->gwy.xport.port, NULL,
8313 pd2.ip_sum, icmpsum,
8314 pd->ip_sum, 0, pd2.af);
8315 }
8316 copyback = 1;
8317 }
8318
8319 if (copyback) {
8320 if (pf_lazy_makewritable(pd, pbuf, off2 + 8) ==
8321 NULL) {
8322 return PF_DROP;
8323 }
8324 switch (pd2.af) {
8325 #if INET
8326 case AF_INET:
8327 pbuf_copy_back(pbuf, off, ICMP_MINLEN,
8328 pf_pd_get_hdr_ptr_icmp(pd), pd->hdrmaxlen);
8329 pbuf_copy_back(pbuf, ipoff2, sizeof(h2),
8330 &h2, sizeof(h2));
8331 break;
8332 #endif /* INET */
8333 case AF_INET6:
8334 pbuf_copy_back(pbuf, off,
8335 sizeof(struct icmp6_hdr),
8336 pf_pd_get_hdr_ptr_icmp6(pd), pd->hdrmaxlen);
8337 pbuf_copy_back(pbuf, ipoff2,
8338 sizeof(h2_6), &h2_6, sizeof(h2_6));
8339 break;
8340 }
8341 pbuf_copy_back(pbuf, off2, 8, &th, sizeof(th));
8342 }
8343
8344 return PF_PASS;
8345 }
8346 case IPPROTO_UDP: {
8347 struct udphdr uh;
8348 int dx, action;
8349 if (!pf_pull_hdr(pbuf, off2, &uh, sizeof(uh), sizeof(uh),
8350 NULL, reason, pd2.af)) {
8351 DPFPRINTF(PF_DEBUG_MISC,
8352 ("pf: ICMP error message too short "
8353 "(udp)\n"));
8354 return PF_DROP;
8355 }
8356
8357 key.af_gwy = pd2.af;
8358 PF_ACPY(&key.ext_gwy.addr, pd2.dst, key.af_gwy);
8359 PF_ACPY(&key.gwy.addr, pd2.src, key.af_gwy);
8360 key.ext_gwy.xport.port = uh.uh_dport;
8361 key.gwy.xport.port = uh.uh_sport;
8362
8363 key.af_lan = pd2.af;
8364 PF_ACPY(&key.lan.addr, pd2.dst, key.af_lan);
8365 PF_ACPY(&key.ext_lan.addr, pd2.src, key.af_lan);
8366 key.lan.xport.port = uh.uh_dport;
8367 key.ext_lan.xport.port = uh.uh_sport;
8368
8369 key.proto = IPPROTO_UDP;
8370 key.proto_variant = PF_EXTFILTER_APD;
8371 dx = direction;
8372
8373 if (ntohs(uh.uh_sport) == PF_IKE_PORT &&
8374 ntohs(uh.uh_dport) == PF_IKE_PORT) {
8375 struct pf_ike_hdr ike;
8376 size_t plen = pbuf->pb_packet_len - off2 -
8377 sizeof(uh);
8378 if (direction == PF_IN &&
8379 plen < 8 /* PF_IKE_PACKET_MINSIZE */) {
8380 DPFPRINTF(PF_DEBUG_MISC, ("pf: "
8381 "ICMP error, embedded IKE message "
8382 "too small.\n"));
8383 return PF_DROP;
8384 }
8385
8386 if (plen > sizeof(ike)) {
8387 plen = sizeof(ike);
8388 }
8389 pbuf_copy_data(pbuf, off + sizeof(uh), plen,
8390 &ike, sizeof(ike));
8391
8392 key.app_state = &as;
8393 as.compare_lan_ext = pf_ike_compare;
8394 as.compare_ext_gwy = pf_ike_compare;
8395 as.u.ike.cookie = ike.initiator_cookie;
8396 }
8397
8398 *state = pf_find_state(kif, &key, dx);
8399
8400 if (key.app_state && *state == 0) {
8401 key.app_state = 0;
8402 *state = pf_find_state(kif, &key, dx);
8403 }
8404
8405 if (*state == 0) {
8406 key.proto_variant = PF_EXTFILTER_AD;
8407 *state = pf_find_state(kif, &key, dx);
8408 }
8409
8410 if (*state == 0) {
8411 key.proto_variant = PF_EXTFILTER_EI;
8412 *state = pf_find_state(kif, &key, dx);
8413 }
8414
8415 /* similar to STATE_LOOKUP() */
8416 if (*state != NULL && pd != NULL &&
8417 !(pd->pktflags & PKTF_FLOW_ID)) {
8418 pd->flowsrc = (*state)->state_key->flowsrc;
8419 pd->flowhash = (*state)->state_key->flowhash;
8420 if (pd->flowhash != 0) {
8421 pd->pktflags |= PKTF_FLOW_ID;
8422 pd->pktflags &= ~PKTF_FLOW_ADV;
8423 }
8424 }
8425
8426 if (pf_state_lookup_aux(state, kif, direction, &action)) {
8427 return action;
8428 }
8429
8430 sk = (*state)->state_key;
8431 pd->naf = pd2.naf = (pd2.af == sk->af_lan) ?
8432 sk->af_gwy : sk->af_lan;
8433
8434 if (STATE_TRANSLATE(sk)) {
8435 /* NAT64 case */
8436 if (sk->af_lan != sk->af_gwy) {
8437 struct pf_state_host *saddr2, *daddr2;
8438
8439 if (pd2.naf == sk->af_lan) {
8440 saddr2 = &sk->lan;
8441 daddr2 = &sk->ext_lan;
8442 } else {
8443 saddr2 = &sk->ext_gwy;
8444 daddr2 = &sk->gwy;
8445 }
8446
8447 /* translate ICMP message */
8448 if (pf_translate_icmp_af(pd->naf,
8449 pf_pd_get_hdr_icmp(pd))) {
8450 return PF_DROP;
8451 }
8452 if (pf_lazy_makewritable(pd, pbuf,
8453 off2 + 8) == NULL) {
8454 return PF_DROP;
8455 }
8456
8457 pbuf_copy_back(pbuf, pd->off,
8458 sizeof(struct icmp6_hdr),
8459 pf_pd_get_hdr_ptr_icmp6(pd), pd->hdrmaxlen);
8460
8461 /*
8462 * translate inner ip header within the
8463 * ICMP message
8464 */
8465 if (pf_change_icmp_af(pbuf, ipoff2, pd,
8466 &pd2, &saddr2->addr, &daddr2->addr,
8467 pd->af, pd->naf)) {
8468 return PF_DROP;
8469 }
8470
8471 if (pd->naf == AF_INET) {
8472 pd->proto = IPPROTO_ICMP;
8473 } else {
8474 pd->proto = IPPROTO_ICMPV6;
8475 }
8476
8477 /*
8478 * translate inner udp header within
8479 * the ICMP message
8480 */
8481 pf_change_ap(direction, NULL, pd2.src,
8482 &uh.uh_sport, pd2.ip_sum,
8483 &uh.uh_sum, &daddr2->addr,
8484 saddr2->xport.port, 0, pd2.af,
8485 pd2.naf, 0);
8486
8487 pf_change_ap(direction, NULL, pd2.dst,
8488 &uh.uh_dport, pd2.ip_sum,
8489 &uh.uh_sum, &saddr2->addr,
8490 daddr2->xport.port, 0, pd2.af,
8491 pd2.naf, 0);
8492
8493 pbuf_copy_back(pbuf, pd2.off,
8494 sizeof(uh), &uh, sizeof(uh));
8495
8496 /* translate outer ip header */
8497 PF_ACPY(&pd->naddr, &daddr2->addr,
8498 pd->naf);
8499 PF_ACPY(&pd->ndaddr, &saddr2->addr,
8500 pd->naf);
8501 if (pd->af == AF_INET) {
8502 memcpy(&pd->naddr.addr32[3],
8503 &srcv4_inaddr,
8504 sizeof(pd->naddr.addr32[3]));
8505 return pf_nat64_ipv4(pbuf, off,
8506 pd);
8507 } else {
8508 return pf_nat64_ipv6(pbuf, off,
8509 pd);
8510 }
8511 }
8512 if (direction == PF_IN) {
8513 pf_change_icmp(pd2.src, &uh.uh_sport,
8514 daddr, &sk->lan.addr,
8515 sk->lan.xport.port, &uh.uh_sum,
8516 pd2.ip_sum, icmpsum,
8517 pd->ip_sum, 1, pd2.af);
8518 } else {
8519 pf_change_icmp(pd2.dst, &uh.uh_dport,
8520 saddr, &sk->gwy.addr,
8521 sk->gwy.xport.port, &uh.uh_sum,
8522 pd2.ip_sum, icmpsum,
8523 pd->ip_sum, 1, pd2.af);
8524 }
8525 if (pf_lazy_makewritable(pd, pbuf,
8526 off2 + sizeof(uh)) == NULL) {
8527 return PF_DROP;
8528 }
8529 switch (pd2.af) {
8530 #if INET
8531 case AF_INET:
8532 pbuf_copy_back(pbuf, off, ICMP_MINLEN,
8533 pf_pd_get_hdr_ptr_icmp(pd), pd->hdrmaxlen);
8534 pbuf_copy_back(pbuf, ipoff2,
8535 sizeof(h2), &h2, sizeof(h2));
8536 break;
8537 #endif /* INET */
8538 case AF_INET6:
8539 pbuf_copy_back(pbuf, off,
8540 sizeof(struct icmp6_hdr),
8541 pf_pd_get_hdr_ptr_icmp6(pd), pd->hdrmaxlen);
8542 pbuf_copy_back(pbuf, ipoff2,
8543 sizeof(h2_6), &h2_6, sizeof(h2_6));
8544 break;
8545 }
8546 pbuf_copy_back(pbuf, off2, sizeof(uh), &uh, sizeof(uh));
8547 }
8548
8549 return PF_PASS;
8550 }
8551 #if INET
8552 case IPPROTO_ICMP: {
8553 struct icmp iih;
8554
8555 if (!pf_pull_hdr(pbuf, off2, &iih, sizeof(iih), ICMP_MINLEN,
8556 NULL, reason, pd2.af)) {
8557 DPFPRINTF(PF_DEBUG_MISC,
8558 ("pf: ICMP error message too short i"
8559 "(icmp)\n"));
8560 return PF_DROP;
8561 }
8562
8563 key.proto = IPPROTO_ICMP;
8564 if (direction == PF_IN) {
8565 key.af_gwy = pd2.af;
8566 PF_ACPY(&key.ext_gwy.addr, pd2.dst, key.af_gwy);
8567 PF_ACPY(&key.gwy.addr, pd2.src, key.af_gwy);
8568 key.ext_gwy.xport.port = 0;
8569 key.gwy.xport.port = iih.icmp_id;
8570 } else {
8571 key.af_lan = pd2.af;
8572 PF_ACPY(&key.lan.addr, pd2.dst, key.af_lan);
8573 PF_ACPY(&key.ext_lan.addr, pd2.src, key.af_lan);
8574 key.lan.xport.port = iih.icmp_id;
8575 key.ext_lan.xport.port = 0;
8576 }
8577
8578 STATE_LOOKUP();
8579
8580 sk = (*state)->state_key;
8581 if (STATE_TRANSLATE(sk)) {
8582 if (direction == PF_IN) {
8583 pf_change_icmp(pd2.src, &iih.icmp_id,
8584 daddr, &sk->lan.addr,
8585 sk->lan.xport.port, NULL,
8586 pd2.ip_sum, icmpsum,
8587 pd->ip_sum, 0, AF_INET);
8588 } else {
8589 pf_change_icmp(pd2.dst, &iih.icmp_id,
8590 saddr, &sk->gwy.addr,
8591 sk->gwy.xport.port, NULL,
8592 pd2.ip_sum, icmpsum,
8593 pd->ip_sum, 0, AF_INET);
8594 }
8595 if (pf_lazy_makewritable(pd, pbuf,
8596 off2 + ICMP_MINLEN) == NULL) {
8597 return PF_DROP;
8598 }
8599 pbuf_copy_back(pbuf, off, ICMP_MINLEN,
8600 pf_pd_get_hdr_ptr_icmp(pd), pd->hdrmaxlen);
8601 pbuf_copy_back(pbuf, ipoff2, sizeof(h2), &h2, sizeof(h2));
8602 pbuf_copy_back(pbuf, off2, ICMP_MINLEN, &iih, sizeof(iih));
8603 }
8604
8605 return PF_PASS;
8606 }
8607 #endif /* INET */
8608 case IPPROTO_ICMPV6: {
8609 struct icmp6_hdr iih;
8610
8611 if (!pf_pull_hdr(pbuf, off2, &iih, sizeof(iih),
8612 sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) {
8613 DPFPRINTF(PF_DEBUG_MISC,
8614 ("pf: ICMP error message too short "
8615 "(icmp6)\n"));
8616 return PF_DROP;
8617 }
8618
8619 key.proto = IPPROTO_ICMPV6;
8620 if (direction == PF_IN) {
8621 key.af_gwy = pd2.af;
8622 PF_ACPY(&key.ext_gwy.addr, pd2.dst, key.af_gwy);
8623 PF_ACPY(&key.gwy.addr, pd2.src, key.af_gwy);
8624 key.ext_gwy.xport.port = 0;
8625 key.gwy.xport.port = iih.icmp6_id;
8626 } else {
8627 key.af_lan = pd2.af;
8628 PF_ACPY(&key.lan.addr, pd2.dst, key.af_lan);
8629 PF_ACPY(&key.ext_lan.addr, pd2.src, key.af_lan);
8630 key.lan.xport.port = iih.icmp6_id;
8631 key.ext_lan.xport.port = 0;
8632 }
8633
8634 STATE_LOOKUP();
8635
8636 sk = (*state)->state_key;
8637 if (STATE_TRANSLATE(sk)) {
8638 if (direction == PF_IN) {
8639 pf_change_icmp(pd2.src, &iih.icmp6_id,
8640 daddr, &sk->lan.addr,
8641 sk->lan.xport.port, NULL,
8642 pd2.ip_sum, icmpsum,
8643 pd->ip_sum, 0, AF_INET6);
8644 } else {
8645 pf_change_icmp(pd2.dst, &iih.icmp6_id,
8646 saddr, &sk->gwy.addr,
8647 sk->gwy.xport.port, NULL,
8648 pd2.ip_sum, icmpsum,
8649 pd->ip_sum, 0, AF_INET6);
8650 }
8651 if (pf_lazy_makewritable(pd, pbuf, off2 +
8652 sizeof(struct icmp6_hdr)) == NULL) {
8653 return PF_DROP;
8654 }
8655 pbuf_copy_back(pbuf, off,
8656 sizeof(struct icmp6_hdr),
8657 pf_pd_get_hdr_ptr_icmp6(pd), pd->hdrmaxlen);
8658 pbuf_copy_back(pbuf, ipoff2, sizeof(h2_6),
8659 &h2_6, sizeof(h2_6));
8660 pbuf_copy_back(pbuf, off2,
8661 sizeof(struct icmp6_hdr), &iih, sizeof(iih));
8662 }
8663
8664 return PF_PASS;
8665 }
8666 default: {
8667 key.proto = pd2.proto;
8668 if (direction == PF_IN) {
8669 key.af_gwy = pd2.af;
8670 PF_ACPY(&key.ext_gwy.addr, pd2.dst, key.af_gwy);
8671 PF_ACPY(&key.gwy.addr, pd2.src, key.af_gwy);
8672 key.ext_gwy.xport.port = 0;
8673 key.gwy.xport.port = 0;
8674 } else {
8675 key.af_lan = pd2.af;
8676 PF_ACPY(&key.lan.addr, pd2.dst, key.af_lan);
8677 PF_ACPY(&key.ext_lan.addr, pd2.src, key.af_lan);
8678 key.lan.xport.port = 0;
8679 key.ext_lan.xport.port = 0;
8680 }
8681
8682 STATE_LOOKUP();
8683
8684 sk = (*state)->state_key;
8685 if (STATE_TRANSLATE(sk)) {
8686 if (direction == PF_IN) {
8687 pf_change_icmp(pd2.src, NULL, daddr,
8688 &sk->lan.addr, 0, NULL,
8689 pd2.ip_sum, icmpsum,
8690 pd->ip_sum, 0, pd2.af);
8691 } else {
8692 pf_change_icmp(pd2.dst, NULL, saddr,
8693 &sk->gwy.addr, 0, NULL,
8694 pd2.ip_sum, icmpsum,
8695 pd->ip_sum, 0, pd2.af);
8696 }
8697 switch (pd2.af) {
8698 #if INET
8699 case AF_INET:
8700 if (pf_lazy_makewritable(pd, pbuf,
8701 ipoff2 + sizeof(h2)) == NULL) {
8702 return PF_DROP;
8703 }
8704 /*
8705 * <XXXSCW>
8706 * Xnu was missing the following...
8707 */
8708 pbuf_copy_back(pbuf, off, ICMP_MINLEN,
8709 pf_pd_get_hdr_ptr_icmp(pd), pd->hdrmaxlen);
8710 pbuf_copy_back(pbuf, ipoff2,
8711 sizeof(h2), &h2, sizeof(h2));
8712 break;
8713 /*
8714 * </XXXSCW>
8715 */
8716 #endif /* INET */
8717 case AF_INET6:
8718 if (pf_lazy_makewritable(pd, pbuf,
8719 ipoff2 + sizeof(h2_6)) == NULL) {
8720 return PF_DROP;
8721 }
8722 pbuf_copy_back(pbuf, off,
8723 sizeof(struct icmp6_hdr),
8724 pf_pd_get_hdr_ptr_icmp6(pd), pd->hdrmaxlen);
8725 pbuf_copy_back(pbuf, ipoff2,
8726 sizeof(h2_6), &h2_6, sizeof(h2_6));
8727 break;
8728 }
8729 }
8730
8731 return PF_PASS;
8732 }
8733 }
8734 }
8735 }
8736
8737 static __attribute__((noinline)) int
pf_test_state_grev1(struct pf_state ** state,int direction,struct pfi_kif * kif,int off,struct pf_pdesc * pd)8738 pf_test_state_grev1(struct pf_state **state, int direction,
8739 struct pfi_kif *kif, int off, struct pf_pdesc *pd)
8740 {
8741 struct pf_state_peer *__single src;
8742 struct pf_state_peer *__single dst;
8743 struct pf_state_key_cmp key = {};
8744 struct pf_grev1_hdr *__single grev1 = pf_pd_get_hdr_grev1(pd);
8745
8746 key.app_state = 0;
8747 key.proto = IPPROTO_GRE;
8748 key.proto_variant = PF_GRE_PPTP_VARIANT;
8749 if (direction == PF_IN) {
8750 key.af_gwy = pd->af;
8751 PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy);
8752 PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy);
8753 key.gwy.xport.call_id = grev1->call_id;
8754 } else {
8755 key.af_lan = pd->af;
8756 PF_ACPY(&key.lan.addr, pd->src, key.af_lan);
8757 PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan);
8758 key.ext_lan.xport.call_id = grev1->call_id;
8759 }
8760
8761 STATE_LOOKUP();
8762
8763 if (direction == (*state)->state_key->direction) {
8764 src = &(*state)->src;
8765 dst = &(*state)->dst;
8766 } else {
8767 src = &(*state)->dst;
8768 dst = &(*state)->src;
8769 }
8770
8771 /* update states */
8772 if (src->state < PFGRE1S_INITIATING) {
8773 src->state = PFGRE1S_INITIATING;
8774 }
8775
8776 /* update expire time */
8777 (*state)->expire = pf_time_second();
8778 if (src->state >= PFGRE1S_INITIATING &&
8779 dst->state >= PFGRE1S_INITIATING) {
8780 if ((*state)->timeout != PFTM_TCP_ESTABLISHED) {
8781 (*state)->timeout = PFTM_GREv1_ESTABLISHED;
8782 }
8783 src->state = PFGRE1S_ESTABLISHED;
8784 dst->state = PFGRE1S_ESTABLISHED;
8785 } else {
8786 (*state)->timeout = PFTM_GREv1_INITIATING;
8787 }
8788
8789 if ((*state)->state_key->app_state) {
8790 (*state)->state_key->app_state->u.grev1.pptp_state->expire =
8791 pf_time_second();
8792 }
8793
8794 /* translate source/destination address, if necessary */
8795 if (STATE_GRE_TRANSLATE((*state)->state_key)) {
8796 if (direction == PF_OUT) {
8797 switch (pd->af) {
8798 #if INET
8799 case AF_INET:
8800 pf_change_a(&pd->src->v4addr.s_addr,
8801 pd->ip_sum,
8802 (*state)->state_key->gwy.addr.v4addr.s_addr, 0);
8803 break;
8804 #endif /* INET */
8805 case AF_INET6:
8806 PF_ACPY(pd->src, &(*state)->state_key->gwy.addr,
8807 pd->af);
8808 break;
8809 }
8810 } else {
8811 grev1->call_id = (*state)->state_key->lan.xport.call_id;
8812
8813 switch (pd->af) {
8814 #if INET
8815 case AF_INET:
8816 pf_change_a(&pd->dst->v4addr.s_addr,
8817 pd->ip_sum,
8818 (*state)->state_key->lan.addr.v4addr.s_addr, 0);
8819 break;
8820 #endif /* INET */
8821 case AF_INET6:
8822 PF_ACPY(pd->dst, &(*state)->state_key->lan.addr,
8823 pd->af);
8824 break;
8825 }
8826 }
8827
8828 if (pf_lazy_makewritable(pd, pd->mp, off + sizeof(*grev1)) ==
8829 NULL) {
8830 return PF_DROP;
8831 }
8832 pbuf_copy_back(pd->mp, off, sizeof(*grev1), grev1, sizeof(*grev1));
8833 }
8834
8835 return PF_PASS;
8836 }
8837
8838 static __attribute__((noinline)) int
pf_test_state_esp(struct pf_state ** state,int direction,struct pfi_kif * kif,int off,struct pf_pdesc * pd)8839 pf_test_state_esp(struct pf_state **state, int direction, struct pfi_kif *kif,
8840 int off, struct pf_pdesc *pd)
8841 {
8842 #pragma unused(off)
8843 struct pf_state_peer *__single src;
8844 struct pf_state_peer *__single dst;
8845 struct pf_state_key_cmp key;
8846 struct pf_esp_hdr *__single esp = pf_pd_get_hdr_esp(pd);
8847 int action;
8848
8849 memset(&key, 0, sizeof(key));
8850 key.proto = IPPROTO_ESP;
8851 if (direction == PF_IN) {
8852 key.af_gwy = pd->af;
8853 PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy);
8854 PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy);
8855 key.gwy.xport.spi = esp->spi;
8856 } else {
8857 key.af_lan = pd->af;
8858 PF_ACPY(&key.lan.addr, pd->src, key.af_lan);
8859 PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan);
8860 key.ext_lan.xport.spi = esp->spi;
8861 }
8862
8863 *state = pf_find_state(kif, &key, direction);
8864
8865 if (*state == 0) {
8866 struct pf_state *s;
8867
8868 /*
8869 * <[email protected]>
8870 * No matching state. Look for a blocking state. If we find
8871 * one, then use that state and move it so that it's keyed to
8872 * the SPI in the current packet.
8873 */
8874 if (direction == PF_IN) {
8875 key.gwy.xport.spi = 0;
8876
8877 s = pf_find_state(kif, &key, direction);
8878 if (s) {
8879 struct pf_state_key *sk = s->state_key;
8880
8881 pf_remove_state_key_ext_gwy(sk);
8882 sk->lan.xport.spi = sk->gwy.xport.spi =
8883 esp->spi;
8884
8885 if (pf_insert_state_key_ext_gwy(sk)) {
8886 pf_detach_state(s, PF_DT_SKIP_EXTGWY);
8887 } else {
8888 *state = s;
8889 }
8890 }
8891 } else {
8892 key.ext_lan.xport.spi = 0;
8893
8894 s = pf_find_state(kif, &key, direction);
8895 if (s) {
8896 struct pf_state_key *sk = s->state_key;
8897
8898 RB_REMOVE(pf_state_tree_lan_ext,
8899 &pf_statetbl_lan_ext, sk);
8900 sk->ext_lan.xport.spi = esp->spi;
8901
8902 if (RB_INSERT(pf_state_tree_lan_ext,
8903 &pf_statetbl_lan_ext, sk)) {
8904 pf_detach_state(s, PF_DT_SKIP_LANEXT);
8905 } else {
8906 *state = s;
8907 }
8908 }
8909 }
8910
8911 if (s) {
8912 if (*state == 0) {
8913 #if NPFSYNC
8914 if (s->creatorid == pf_status.hostid) {
8915 pfsync_delete_state(s);
8916 }
8917 #endif
8918 s->timeout = PFTM_UNLINKED;
8919 hook_runloop(&s->unlink_hooks,
8920 HOOK_REMOVE | HOOK_FREE);
8921 pf_src_tree_remove_state(s);
8922 pf_free_state(s);
8923 return PF_DROP;
8924 }
8925 }
8926 }
8927
8928 /* similar to STATE_LOOKUP() */
8929 if (*state != NULL && pd != NULL && !(pd->pktflags & PKTF_FLOW_ID)) {
8930 pd->flowsrc = (*state)->state_key->flowsrc;
8931 pd->flowhash = (*state)->state_key->flowhash;
8932 if (pd->flowhash != 0) {
8933 pd->pktflags |= PKTF_FLOW_ID;
8934 pd->pktflags &= ~PKTF_FLOW_ADV;
8935 }
8936 }
8937
8938 if (pf_state_lookup_aux(state, kif, direction, &action)) {
8939 return action;
8940 }
8941
8942 if (direction == (*state)->state_key->direction) {
8943 src = &(*state)->src;
8944 dst = &(*state)->dst;
8945 } else {
8946 src = &(*state)->dst;
8947 dst = &(*state)->src;
8948 }
8949
8950 /* update states */
8951 if (src->state < PFESPS_INITIATING) {
8952 src->state = PFESPS_INITIATING;
8953 }
8954
8955 /* update expire time */
8956 (*state)->expire = pf_time_second();
8957 if (src->state >= PFESPS_INITIATING &&
8958 dst->state >= PFESPS_INITIATING) {
8959 (*state)->timeout = PFTM_ESP_ESTABLISHED;
8960 src->state = PFESPS_ESTABLISHED;
8961 dst->state = PFESPS_ESTABLISHED;
8962 } else {
8963 (*state)->timeout = PFTM_ESP_INITIATING;
8964 }
8965 /* translate source/destination address, if necessary */
8966 if (STATE_ADDR_TRANSLATE((*state)->state_key)) {
8967 if (direction == PF_OUT) {
8968 switch (pd->af) {
8969 #if INET
8970 case AF_INET:
8971 pf_change_a(&pd->src->v4addr.s_addr,
8972 pd->ip_sum,
8973 (*state)->state_key->gwy.addr.v4addr.s_addr, 0);
8974 break;
8975 #endif /* INET */
8976 case AF_INET6:
8977 PF_ACPY(pd->src, &(*state)->state_key->gwy.addr,
8978 pd->af);
8979 break;
8980 }
8981 } else {
8982 switch (pd->af) {
8983 #if INET
8984 case AF_INET:
8985 pf_change_a(&pd->dst->v4addr.s_addr,
8986 pd->ip_sum,
8987 (*state)->state_key->lan.addr.v4addr.s_addr, 0);
8988 break;
8989 #endif /* INET */
8990 case AF_INET6:
8991 PF_ACPY(pd->dst, &(*state)->state_key->lan.addr,
8992 pd->af);
8993 break;
8994 }
8995 }
8996 }
8997
8998 return PF_PASS;
8999 }
9000
9001 static __attribute__((noinline)) int
pf_test_state_other(struct pf_state ** state,int direction,struct pfi_kif * kif,struct pf_pdesc * pd)9002 pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif,
9003 struct pf_pdesc *pd)
9004 {
9005 struct pf_state_peer *src, *dst;
9006 struct pf_state_key_cmp key = {};
9007
9008 key.app_state = 0;
9009 key.proto = pd->proto;
9010 if (direction == PF_IN) {
9011 key.af_gwy = pd->af;
9012 PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy);
9013 PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy);
9014 key.ext_gwy.xport.port = 0;
9015 key.gwy.xport.port = 0;
9016 } else {
9017 key.af_lan = pd->af;
9018 PF_ACPY(&key.lan.addr, pd->src, key.af_lan);
9019 PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan);
9020 key.lan.xport.port = 0;
9021 key.ext_lan.xport.port = 0;
9022 }
9023
9024 STATE_LOOKUP();
9025
9026 if (direction == (*state)->state_key->direction) {
9027 src = &(*state)->src;
9028 dst = &(*state)->dst;
9029 } else {
9030 src = &(*state)->dst;
9031 dst = &(*state)->src;
9032 }
9033
9034 /* update states */
9035 if (src->state < PFOTHERS_SINGLE) {
9036 src->state = PFOTHERS_SINGLE;
9037 }
9038 if (dst->state == PFOTHERS_SINGLE) {
9039 dst->state = PFOTHERS_MULTIPLE;
9040 }
9041
9042 /* update expire time */
9043 (*state)->expire = pf_time_second();
9044 if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE) {
9045 (*state)->timeout = PFTM_OTHER_MULTIPLE;
9046 } else {
9047 (*state)->timeout = PFTM_OTHER_SINGLE;
9048 }
9049
9050 /* translate source/destination address, if necessary */
9051 if (STATE_ADDR_TRANSLATE((*state)->state_key)) {
9052 if (direction == PF_OUT) {
9053 switch (pd->af) {
9054 #if INET
9055 case AF_INET:
9056 pf_change_a(&pd->src->v4addr.s_addr,
9057 pd->ip_sum,
9058 (*state)->state_key->gwy.addr.v4addr.s_addr,
9059 0);
9060 break;
9061 #endif /* INET */
9062 case AF_INET6:
9063 PF_ACPY(pd->src,
9064 &(*state)->state_key->gwy.addr, pd->af);
9065 break;
9066 }
9067 } else {
9068 switch (pd->af) {
9069 #if INET
9070 case AF_INET:
9071 pf_change_a(&pd->dst->v4addr.s_addr,
9072 pd->ip_sum,
9073 (*state)->state_key->lan.addr.v4addr.s_addr,
9074 0);
9075 break;
9076 #endif /* INET */
9077 case AF_INET6:
9078 PF_ACPY(pd->dst,
9079 &(*state)->state_key->lan.addr, pd->af);
9080 break;
9081 }
9082 }
9083 }
9084
9085 return PF_PASS;
9086 }
9087
9088 /*
9089 * ipoff and off are measured from the start of the mbuf chain.
9090 * h must be at "ipoff" on the mbuf chain.
9091 */
9092 void *
pf_pull_hdr(pbuf_t * pbuf,int off,void * __sized_by (p_buflen)p,int p_buflen,int copylen,u_short * actionp,u_short * reasonp,sa_family_t af)9093 pf_pull_hdr(pbuf_t *pbuf, int off, void *__sized_by(p_buflen)p, int p_buflen, int copylen,
9094 u_short *actionp, u_short *reasonp, sa_family_t af)
9095 {
9096 switch (af) {
9097 #if INET
9098 case AF_INET: {
9099 struct ip *__single h = pbuf->pb_data;
9100 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
9101
9102 if (fragoff) {
9103 if (fragoff >= copylen) {
9104 ACTION_SET(actionp, PF_PASS);
9105 } else {
9106 ACTION_SET(actionp, PF_DROP);
9107 REASON_SET(reasonp, PFRES_FRAG);
9108 }
9109 return NULL;
9110 }
9111 if (pbuf->pb_packet_len < (unsigned)(off + copylen) ||
9112 ntohs(h->ip_len) < off + copylen) {
9113 ACTION_SET(actionp, PF_DROP);
9114 REASON_SET(reasonp, PFRES_SHORT);
9115 return NULL;
9116 }
9117 break;
9118 }
9119 #endif /* INET */
9120 case AF_INET6: {
9121 struct ip6_hdr *__single h = pbuf->pb_data;
9122
9123 if (pbuf->pb_packet_len < (unsigned)(off + copylen) ||
9124 (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) <
9125 (unsigned)(off + copylen)) {
9126 ACTION_SET(actionp, PF_DROP);
9127 REASON_SET(reasonp, PFRES_SHORT);
9128 return NULL;
9129 }
9130 break;
9131 }
9132 }
9133 pbuf_copy_data(pbuf, off, copylen, p, p_buflen);
9134 return p;
9135 }
9136
9137 int
pf_routable(struct pf_addr * addr,sa_family_t af,struct pfi_kif * kif)9138 pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif)
9139 {
9140 #pragma unused(kif)
9141 struct sockaddr_in *dst;
9142 int ret = 1;
9143 struct sockaddr_in6 *dst6;
9144 struct route_in6 ro;
9145
9146 bzero(&ro, sizeof(ro));
9147 switch (af) {
9148 case AF_INET:
9149 dst = satosin(&ro.ro_dst);
9150 dst->sin_family = AF_INET;
9151 dst->sin_len = sizeof(*dst);
9152 dst->sin_addr = addr->v4addr;
9153 break;
9154 case AF_INET6:
9155 dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
9156 dst6->sin6_family = AF_INET6;
9157 dst6->sin6_len = sizeof(*dst6);
9158 dst6->sin6_addr = addr->v6addr;
9159 break;
9160 default:
9161 return 0;
9162 }
9163
9164 /* XXX: IFT_ENC is not currently used by anything*/
9165 /* Skip checks for ipsec interfaces */
9166 if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC) {
9167 goto out;
9168 }
9169
9170 /* XXX: what is the point of this? */
9171 rtalloc((struct route *)&ro);
9172
9173 out:
9174 ROUTE_RELEASE(&ro);
9175 return ret;
9176 }
9177
9178 int
pf_rtlabel_match(struct pf_addr * addr,sa_family_t af,struct pf_addr_wrap * aw)9179 pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw)
9180 {
9181 #pragma unused(aw)
9182 struct sockaddr_in *dst;
9183 struct sockaddr_in6 *dst6;
9184 struct route_in6 ro;
9185 int ret = 0;
9186
9187 bzero(&ro, sizeof(ro));
9188 switch (af) {
9189 case AF_INET:
9190 dst = satosin(&ro.ro_dst);
9191 dst->sin_family = AF_INET;
9192 dst->sin_len = sizeof(*dst);
9193 dst->sin_addr = addr->v4addr;
9194 break;
9195 case AF_INET6:
9196 dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
9197 dst6->sin6_family = AF_INET6;
9198 dst6->sin6_len = sizeof(*dst6);
9199 dst6->sin6_addr = addr->v6addr;
9200 break;
9201 default:
9202 return 0;
9203 }
9204
9205 /* XXX: what is the point of this? */
9206 rtalloc((struct route *)&ro);
9207
9208 ROUTE_RELEASE(&ro);
9209
9210 return ret;
9211 }
9212
9213 #if INET
9214 static __attribute__((noinline)) void
pf_route(pbuf_t ** pbufp,struct pf_rule * r,int dir,struct ifnet * oifp,struct pf_state * s,struct pf_pdesc * pd)9215 pf_route(pbuf_t **pbufp, struct pf_rule *r, int dir, struct ifnet *oifp,
9216 struct pf_state *s, struct pf_pdesc *pd)
9217 {
9218 #pragma unused(pd)
9219 struct mbuf *__single m0, *__single m1;
9220 struct route iproute;
9221 struct route *__single ro = &iproute;
9222 struct sockaddr_in *__single dst;
9223 struct ip *__single ip;
9224 struct ifnet *__single ifp = NULL;
9225 struct pf_addr naddr;
9226 struct pf_src_node *__single sn = NULL;
9227 int error = 0;
9228 uint32_t sw_csum;
9229 int interface_mtu = 0;
9230 bzero(&iproute, sizeof(iproute));
9231
9232 if (pbufp == NULL || !pbuf_is_valid(*pbufp) || r == NULL ||
9233 (dir != PF_IN && dir != PF_OUT) || oifp == NULL) {
9234 panic("pf_route: invalid parameters");
9235 }
9236
9237 if (pd->pf_mtag->pftag_routed++ > 3) {
9238 pbuf_destroy(*pbufp);
9239 *pbufp = NULL;
9240 m0 = NULL;
9241 goto bad;
9242 }
9243
9244 /*
9245 * Since this is something of an edge case and may involve the
9246 * host stack (for routing, at least for now), we convert the
9247 * incoming pbuf into an mbuf.
9248 */
9249 if (r->rt == PF_DUPTO) {
9250 m0 = pbuf_clone_to_mbuf(*pbufp);
9251 } else if ((r->rt == PF_REPLYTO) == (r->direction == dir)) {
9252 return;
9253 } else {
9254 /* We're going to consume this packet */
9255 m0 = pbuf_to_mbuf(*pbufp, TRUE);
9256 *pbufp = NULL;
9257 }
9258
9259 if (m0 == NULL) {
9260 goto bad;
9261 }
9262
9263 /* We now have the packet in an mbuf (m0) */
9264
9265 if (m0->m_len < (int)sizeof(struct ip)) {
9266 DPFPRINTF(PF_DEBUG_URGENT,
9267 ("pf_route: packet length < sizeof (struct ip)\n"));
9268 goto bad;
9269 }
9270
9271 ip = mtod(m0, struct ip *);
9272
9273 dst = satosin((void *)&ro->ro_dst);
9274 dst->sin_family = AF_INET;
9275 dst->sin_len = sizeof(*dst);
9276 dst->sin_addr = ip->ip_dst;
9277
9278 if (r->rt == PF_FASTROUTE) {
9279 rtalloc(ro);
9280 if (ro->ro_rt == NULL) {
9281 ipstat.ips_noroute++;
9282 goto bad;
9283 }
9284
9285 ifp = ro->ro_rt->rt_ifp;
9286 RT_LOCK(ro->ro_rt);
9287 ro->ro_rt->rt_use++;
9288
9289 if (ro->ro_rt->rt_flags & RTF_GATEWAY) {
9290 dst = satosin((void *)ro->ro_rt->rt_gateway);
9291 }
9292 RT_UNLOCK(ro->ro_rt);
9293 } else {
9294 if (TAILQ_EMPTY(&r->rpool.list)) {
9295 DPFPRINTF(PF_DEBUG_URGENT,
9296 ("pf_route: TAILQ_EMPTY(&r->rpool.list)\n"));
9297 goto bad;
9298 }
9299 if (s == NULL) {
9300 pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src,
9301 &naddr, NULL, &sn);
9302 if (!PF_AZERO(&naddr, AF_INET)) {
9303 dst->sin_addr.s_addr = naddr.v4addr.s_addr;
9304 }
9305 ifp = r->rpool.cur->kif ?
9306 r->rpool.cur->kif->pfik_ifp : NULL;
9307 } else {
9308 if (!PF_AZERO(&s->rt_addr, AF_INET)) {
9309 dst->sin_addr.s_addr =
9310 s->rt_addr.v4addr.s_addr;
9311 }
9312 ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
9313 }
9314 }
9315 if (ifp == NULL) {
9316 goto bad;
9317 }
9318
9319 if (oifp != ifp) {
9320 if (pf_test_mbuf(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) {
9321 goto bad;
9322 } else if (m0 == NULL) {
9323 goto done;
9324 }
9325 if (m0->m_len < (int)sizeof(struct ip)) {
9326 DPFPRINTF(PF_DEBUG_URGENT,
9327 ("pf_route: packet length < sizeof (struct ip)\n"));
9328 goto bad;
9329 }
9330 ip = mtod(m0, struct ip *);
9331 }
9332
9333 /* Catch routing changes wrt. hardware checksumming for TCP or UDP. */
9334 ip_output_checksum(ifp, m0, ((ip->ip_hl) << 2), ntohs(ip->ip_len),
9335 &sw_csum);
9336
9337 interface_mtu = ifp->if_mtu;
9338
9339 if (INTF_ADJUST_MTU_FOR_CLAT46(ifp)) {
9340 interface_mtu = IN6_LINKMTU(ifp);
9341 /* Further adjust the size for CLAT46 expansion */
9342 interface_mtu -= CLAT46_HDR_EXPANSION_OVERHD;
9343 }
9344
9345 if (ntohs(ip->ip_len) <= interface_mtu || TSO_IPV4_OK(ifp, m0) ||
9346 (!(ip->ip_off & htons(IP_DF)) &&
9347 (ifp->if_hwassist & CSUM_FRAGMENT))) {
9348 ip->ip_sum = 0;
9349 if (sw_csum & CSUM_DELAY_IP) {
9350 ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
9351 sw_csum &= ~CSUM_DELAY_IP;
9352 m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_IP;
9353 }
9354 error = ifnet_output(ifp, PF_INET, m0, ro->ro_rt, sintosa(dst));
9355 goto done;
9356 }
9357
9358 /*
9359 * Too large for interface; fragment if possible.
9360 * Must be able to put at least 8 bytes per fragment.
9361 * Balk when DF bit is set or the interface didn't support TSO.
9362 */
9363 if ((ip->ip_off & htons(IP_DF)) ||
9364 (m0->m_pkthdr.csum_flags & CSUM_TSO_IPV4)) {
9365 ipstat.ips_cantfrag++;
9366 if (r->rt != PF_DUPTO) {
9367 icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0,
9368 interface_mtu);
9369 goto done;
9370 } else {
9371 goto bad;
9372 }
9373 }
9374
9375 m1 = m0;
9376
9377 /* PR-8933605: send ip_len,ip_off to ip_fragment in host byte order */
9378 #if BYTE_ORDER != BIG_ENDIAN
9379 NTOHS(ip->ip_off);
9380 NTOHS(ip->ip_len);
9381 #endif
9382 error = ip_fragment(m0, ifp, interface_mtu, sw_csum);
9383
9384 if (error) {
9385 m0 = NULL;
9386 goto bad;
9387 }
9388
9389 for (m0 = m1; m0; m0 = m1) {
9390 m1 = m0->m_nextpkt;
9391 m0->m_nextpkt = 0;
9392 if (error == 0) {
9393 error = ifnet_output(ifp, PF_INET, m0, ro->ro_rt,
9394 sintosa(dst));
9395 } else {
9396 m_freem(m0);
9397 }
9398 }
9399
9400 if (error == 0) {
9401 ipstat.ips_fragmented++;
9402 }
9403
9404 done:
9405 ROUTE_RELEASE(&iproute);
9406 return;
9407
9408 bad:
9409 if (m0) {
9410 m_freem(m0);
9411 }
9412 goto done;
9413 }
9414 #endif /* INET */
9415
9416 static __attribute__((noinline)) void
pf_route6(pbuf_t ** pbufp,struct pf_rule * r,int dir,struct ifnet * oifp,struct pf_state * s,struct pf_pdesc * pd)9417 pf_route6(pbuf_t **pbufp, struct pf_rule *r, int dir, struct ifnet *oifp,
9418 struct pf_state *s, struct pf_pdesc *pd)
9419 {
9420 #pragma unused(pd)
9421 struct mbuf *__single m0;
9422 struct route_in6 ip6route;
9423 struct route_in6 *__single ro;
9424 struct sockaddr_in6 *__single dst;
9425 struct ip6_hdr *__single ip6;
9426 struct ifnet *__single ifp = NULL;
9427 struct pf_addr naddr;
9428 struct pf_src_node *__single sn = NULL;
9429 int error = 0;
9430 struct pf_mtag *__single pf_mtag;
9431
9432 if (pbufp == NULL || !pbuf_is_valid(*pbufp) || r == NULL ||
9433 (dir != PF_IN && dir != PF_OUT) || oifp == NULL) {
9434 panic("pf_route6: invalid parameters");
9435 }
9436
9437 if (pd->pf_mtag->pftag_routed++ > 3) {
9438 pbuf_destroy(*pbufp);
9439 *pbufp = NULL;
9440 m0 = NULL;
9441 goto bad;
9442 }
9443
9444 /*
9445 * Since this is something of an edge case and may involve the
9446 * host stack (for routing, at least for now), we convert the
9447 * incoming pbuf into an mbuf.
9448 */
9449 if (r->rt == PF_DUPTO) {
9450 m0 = pbuf_clone_to_mbuf(*pbufp);
9451 } else if ((r->rt == PF_REPLYTO) == (r->direction == dir)) {
9452 return;
9453 } else {
9454 /* We're about to consume this packet */
9455 m0 = pbuf_to_mbuf(*pbufp, TRUE);
9456 *pbufp = NULL;
9457 }
9458
9459 if (m0 == NULL) {
9460 goto bad;
9461 }
9462
9463 if (m0->m_len < (int)sizeof(struct ip6_hdr)) {
9464 DPFPRINTF(PF_DEBUG_URGENT,
9465 ("pf_route6: m0->m_len < sizeof (struct ip6_hdr)\n"));
9466 goto bad;
9467 }
9468 ip6 = mtod(m0, struct ip6_hdr *);
9469
9470 ro = &ip6route;
9471 bzero((void *__bidi_indexable)(struct route_in6 *__bidi_indexable)ro, sizeof(*ro));
9472 dst = SIN6(&ro->ro_dst);
9473 dst->sin6_family = AF_INET6;
9474 dst->sin6_len = sizeof(*dst);
9475 dst->sin6_addr = ip6->ip6_dst;
9476
9477 /* Cheat. XXX why only in the v6addr case??? */
9478 if (r->rt == PF_FASTROUTE) {
9479 pf_mtag = pf_get_mtag(m0);
9480 ASSERT(pf_mtag != NULL);
9481 pf_mtag->pftag_flags |= PF_TAG_GENERATED;
9482 ip6_output_setsrcifscope(m0, oifp->if_index, NULL);
9483 ip6_output_setdstifscope(m0, oifp->if_index, NULL);
9484 ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
9485 return;
9486 }
9487
9488 if (TAILQ_EMPTY(&r->rpool.list)) {
9489 DPFPRINTF(PF_DEBUG_URGENT,
9490 ("pf_route6: TAILQ_EMPTY(&r->rpool.list)\n"));
9491 goto bad;
9492 }
9493 if (s == NULL) {
9494 pf_map_addr(AF_INET6, r, (struct pf_addr *)(void *)&ip6->ip6_src,
9495 &naddr, NULL, &sn);
9496 if (!PF_AZERO(&naddr, AF_INET6)) {
9497 PF_ACPY((struct pf_addr *)&dst->sin6_addr,
9498 &naddr, AF_INET6);
9499 }
9500 ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL;
9501 } else {
9502 if (!PF_AZERO(&s->rt_addr, AF_INET6)) {
9503 PF_ACPY((struct pf_addr *)&dst->sin6_addr,
9504 &s->rt_addr, AF_INET6);
9505 }
9506 ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
9507 }
9508 if (ifp == NULL) {
9509 goto bad;
9510 }
9511
9512 if (oifp != ifp) {
9513 if (pf_test6_mbuf(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) {
9514 goto bad;
9515 } else if (m0 == NULL) {
9516 goto done;
9517 }
9518 if (m0->m_len < (int)sizeof(struct ip6_hdr)) {
9519 DPFPRINTF(PF_DEBUG_URGENT, ("pf_route6: m0->m_len "
9520 "< sizeof (struct ip6_hdr)\n"));
9521 goto bad;
9522 }
9523 pf_mtag = pf_get_mtag(m0);
9524 /*
9525 * send refragmented packets.
9526 */
9527 if ((pf_mtag->pftag_flags & PF_TAG_REFRAGMENTED) != 0) {
9528 pf_mtag->pftag_flags &= ~PF_TAG_REFRAGMENTED;
9529 /*
9530 * nd6_output() frees packet chain in both success and
9531 * failure cases.
9532 */
9533 error = nd6_output(ifp, ifp, m0, dst, NULL, NULL);
9534 m0 = NULL;
9535 if (error) {
9536 DPFPRINTF(PF_DEBUG_URGENT, ("pf_route6:"
9537 "dropped refragmented packet\n"));
9538 }
9539 goto done;
9540 }
9541 ip6 = mtod(m0, struct ip6_hdr *);
9542 }
9543
9544 /*
9545 * If the packet is too large for the outgoing interface,
9546 * send back an icmp6 error.
9547 */
9548 if (in6_embedded_scope && IN6_IS_SCOPE_EMBED(&dst->sin6_addr)) {
9549 dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index);
9550 }
9551 if ((unsigned)m0->m_pkthdr.len <= ifp->if_mtu) {
9552 error = nd6_output(ifp, ifp, m0, dst, NULL, NULL);
9553 } else {
9554 in6_ifstat_inc(ifp, ifs6_in_toobig);
9555 if (r->rt != PF_DUPTO) {
9556 icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
9557 } else {
9558 goto bad;
9559 }
9560 }
9561
9562 done:
9563 return;
9564
9565 bad:
9566 if (m0) {
9567 m_freem(m0);
9568 m0 = NULL;
9569 }
9570 goto done;
9571 }
9572
9573
9574 /*
9575 * check protocol (tcp/udp/icmp/icmp6) checksum and set mbuf flag
9576 * off is the offset where the protocol header starts
9577 * len is the total length of protocol header plus payload
9578 * returns 0 when the checksum is valid, otherwise returns 1.
9579 */
9580 static int
pf_check_proto_cksum(pbuf_t * pbuf,int off,int len,u_int8_t p,sa_family_t af)9581 pf_check_proto_cksum(pbuf_t *pbuf, int off, int len, u_int8_t p,
9582 sa_family_t af)
9583 {
9584 u_int16_t sum;
9585
9586 switch (p) {
9587 case IPPROTO_TCP:
9588 case IPPROTO_UDP:
9589 /*
9590 * Optimize for the common case; if the hardware calculated
9591 * value doesn't include pseudo-header checksum, or if it
9592 * is partially-computed (only 16-bit summation), do it in
9593 * software below.
9594 */
9595 if ((*pbuf->pb_csum_flags &
9596 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
9597 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR) &&
9598 (*pbuf->pb_csum_data ^ 0xffff) == 0) {
9599 return 0;
9600 }
9601 break;
9602 case IPPROTO_ICMP:
9603 case IPPROTO_ICMPV6:
9604 break;
9605 default:
9606 return 1;
9607 }
9608 if (off < (int)sizeof(struct ip) || len < (int)sizeof(struct udphdr)) {
9609 return 1;
9610 }
9611 if (pbuf->pb_packet_len < (unsigned)(off + len)) {
9612 return 1;
9613 }
9614 switch (af) {
9615 #if INET
9616 case AF_INET:
9617 if (p == IPPROTO_ICMP) {
9618 if (pbuf->pb_contig_len < (unsigned)off) {
9619 return 1;
9620 }
9621 sum = pbuf_inet_cksum(pbuf, 0, off, len);
9622 } else {
9623 if (pbuf->pb_contig_len < (int)sizeof(struct ip)) {
9624 return 1;
9625 }
9626 sum = pbuf_inet_cksum(pbuf, p, off, len);
9627 }
9628 break;
9629 #endif /* INET */
9630 case AF_INET6:
9631 if (pbuf->pb_contig_len < (int)sizeof(struct ip6_hdr)) {
9632 return 1;
9633 }
9634 sum = pbuf_inet6_cksum(pbuf, p, off, len);
9635 break;
9636 default:
9637 return 1;
9638 }
9639 if (sum) {
9640 switch (p) {
9641 case IPPROTO_TCP:
9642 tcpstat.tcps_rcvbadsum++;
9643 break;
9644 case IPPROTO_UDP:
9645 udpstat.udps_badsum++;
9646 break;
9647 case IPPROTO_ICMP:
9648 icmpstat.icps_checksum++;
9649 break;
9650 case IPPROTO_ICMPV6:
9651 icmp6stat.icp6s_checksum++;
9652 break;
9653 }
9654 return 1;
9655 }
9656 return 0;
9657 }
9658
9659 #if INET
9660 #define PF_APPLE_UPDATE_PDESC_IPv4() \
9661 do { \
9662 if (pbuf && pd.mp && pbuf != pd.mp) { \
9663 pbuf = pd.mp; \
9664 h = pbuf->pb_data; \
9665 pd.pf_mtag = pf_get_mtag_pbuf(pbuf); \
9666 } \
9667 } while (0)
9668
9669 int
pf_test_mbuf(int dir,struct ifnet * ifp,struct mbuf ** m0,struct ether_header * eh,struct ip_fw_args * fwa)9670 pf_test_mbuf(int dir, struct ifnet *ifp, struct mbuf **m0,
9671 struct ether_header *eh, struct ip_fw_args *fwa)
9672 {
9673 pbuf_t pbuf_store, *__single pbuf;
9674 int rv;
9675
9676 pbuf_init_mbuf(&pbuf_store, *m0, (*m0)->m_pkthdr.rcvif);
9677 pbuf = &pbuf_store;
9678
9679 rv = pf_test(dir, ifp, &pbuf, eh, fwa);
9680
9681 if (pbuf_is_valid(pbuf)) {
9682 *m0 = pbuf->pb_mbuf;
9683 pbuf->pb_mbuf = NULL;
9684 pbuf_destroy(pbuf);
9685 } else {
9686 *m0 = NULL;
9687 }
9688
9689 return rv;
9690 }
9691
9692 static __attribute__((noinline)) int
pf_test(int dir,struct ifnet * ifp,pbuf_t ** pbufp,struct ether_header * eh,struct ip_fw_args * fwa)9693 pf_test(int dir, struct ifnet *ifp, pbuf_t **pbufp,
9694 struct ether_header *eh, struct ip_fw_args *fwa)
9695 {
9696 #if !DUMMYNET
9697 #pragma unused(fwa)
9698 #endif
9699 struct pfi_kif *__single kif;
9700 u_short action = PF_PASS, reason = 0, log = 0;
9701 pbuf_t *__single pbuf = *pbufp;
9702 struct ip *__single h = 0;
9703 struct pf_rule *__single a = NULL, *__single r = &pf_default_rule, *__single tr, *__single nr;
9704 struct pf_state *__single s = NULL;
9705 struct pf_state_key *__single sk = NULL;
9706 struct pf_ruleset *__single ruleset = NULL;
9707 struct pf_pdesc pd;
9708 int off, dirndx, pqid = 0;
9709
9710 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
9711
9712 if (!pf_status.running) {
9713 return PF_PASS;
9714 }
9715
9716 memset(&pd, 0, sizeof(pd));
9717
9718 if ((pd.pf_mtag = pf_get_mtag_pbuf(pbuf)) == NULL) {
9719 DPFPRINTF(PF_DEBUG_URGENT,
9720 ("pf_test: pf_get_mtag_pbuf returned NULL\n"));
9721 return PF_DROP;
9722 }
9723
9724 if (pd.pf_mtag->pftag_flags & PF_TAG_GENERATED) {
9725 return PF_PASS;
9726 }
9727
9728 kif = (struct pfi_kif *)ifp->if_pf_kif;
9729
9730 if (kif == NULL) {
9731 DPFPRINTF(PF_DEBUG_URGENT,
9732 ("pf_test: kif == NULL, if_name %s\n", ifp->if_name));
9733 return PF_DROP;
9734 }
9735 if (kif->pfik_flags & PFI_IFLAG_SKIP) {
9736 return PF_PASS;
9737 }
9738
9739 if (pbuf->pb_packet_len < (int)sizeof(*h)) {
9740 REASON_SET(&reason, PFRES_SHORT);
9741 return PF_DROP;
9742 }
9743
9744 /* initialize enough of pd for the done label */
9745 h = pbuf->pb_data;
9746 pd.mp = pbuf;
9747 pd.lmw = 0;
9748 pd.pf_mtag = pf_get_mtag_pbuf(pbuf);
9749 pd.src = (struct pf_addr *)&h->ip_src;
9750 pd.dst = (struct pf_addr *)&h->ip_dst;
9751 PF_ACPY(&pd.baddr, pd.src, AF_INET);
9752 PF_ACPY(&pd.bdaddr, pd.dst, AF_INET);
9753 pd.ip_sum = &h->ip_sum;
9754 pd.proto = h->ip_p;
9755 pd.proto_variant = 0;
9756 pd.af = AF_INET;
9757 pd.tos = h->ip_tos;
9758 pd.ttl = h->ip_ttl;
9759 pd.tot_len = ntohs(h->ip_len);
9760 pd.eh = eh;
9761
9762 #if DUMMYNET
9763 if (fwa != NULL && fwa->fwa_pf_rule != NULL) {
9764 goto nonormalize;
9765 }
9766 #endif /* DUMMYNET */
9767
9768 /* We do IP header normalization and packet reassembly here */
9769 action = pf_normalize_ip(pbuf, dir, kif, &reason, &pd);
9770 if (action != PF_PASS || pd.lmw < 0) {
9771 action = PF_DROP;
9772 goto done;
9773 }
9774
9775 #if DUMMYNET
9776 nonormalize:
9777 #endif /* DUMMYNET */
9778 /* pf_normalize can mess with pb_data */
9779 h = pbuf->pb_data;
9780
9781 off = h->ip_hl << 2;
9782 if (off < (int)sizeof(*h)) {
9783 action = PF_DROP;
9784 REASON_SET(&reason, PFRES_SHORT);
9785 log = 1;
9786 goto done;
9787 }
9788
9789 pd.src = (struct pf_addr *)&h->ip_src;
9790 pd.dst = (struct pf_addr *)&h->ip_dst;
9791 PF_ACPY(&pd.baddr, pd.src, AF_INET);
9792 PF_ACPY(&pd.bdaddr, pd.dst, AF_INET);
9793 pd.ip_sum = &h->ip_sum;
9794 pd.proto = h->ip_p;
9795 pd.proto_variant = 0;
9796 pd.mp = pbuf;
9797 pd.lmw = 0;
9798 pd.pf_mtag = pf_get_mtag_pbuf(pbuf);
9799 pd.af = AF_INET;
9800 pd.tos = h->ip_tos;
9801 pd.ttl = h->ip_ttl;
9802 pd.sc = MBUF_SCIDX(pbuf_get_service_class(pbuf));
9803 pd.tot_len = ntohs(h->ip_len);
9804 pd.eh = eh;
9805
9806 if (*pbuf->pb_flags & PKTF_FLOW_ID) {
9807 pd.flowsrc = *pbuf->pb_flowsrc;
9808 pd.flowhash = *pbuf->pb_flowid;
9809 pd.pktflags = *pbuf->pb_flags & PKTF_FLOW_MASK;
9810 }
9811
9812 /* handle fragments that didn't get reassembled by normalization */
9813 if (h->ip_off & htons(IP_MF | IP_OFFMASK)) {
9814 pd.flags |= PFDESC_IP_FRAG;
9815 #if DUMMYNET
9816 /* Traffic goes through dummynet first */
9817 action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
9818 if (action == PF_DROP || pbuf == NULL) {
9819 *pbufp = NULL;
9820 return action;
9821 }
9822 #endif /* DUMMYNET */
9823 action = pf_test_fragment(&r, dir, kif, pbuf, h,
9824 &pd, &a, &ruleset);
9825 goto done;
9826 }
9827
9828 switch (h->ip_p) {
9829 case IPPROTO_TCP: {
9830 struct tcphdr th;
9831 pf_pd_set_hdr_tcp(&pd, &th);
9832 if (!pf_pull_hdr(pbuf, off, &th, sizeof(th), sizeof(th),
9833 &action, &reason, AF_INET)) {
9834 log = action != PF_PASS;
9835 goto done;
9836 }
9837 pd.p_len = pd.tot_len - off - (th.th_off << 2);
9838 if ((th.th_flags & TH_ACK) && pd.p_len == 0) {
9839 pqid = 1;
9840 }
9841 #if DUMMYNET
9842 /* Traffic goes through dummynet first */
9843 action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
9844 if (action == PF_DROP || pbuf == NULL) {
9845 *pbufp = NULL;
9846 return action;
9847 }
9848 #endif /* DUMMYNET */
9849 action = pf_normalize_tcp(dir, kif, pbuf, 0, off, h, &pd);
9850 if (pd.lmw < 0) {
9851 goto done;
9852 }
9853 PF_APPLE_UPDATE_PDESC_IPv4();
9854 if (action == PF_DROP) {
9855 goto done;
9856 }
9857 if (th.th_sport == 0 || th.th_dport == 0) {
9858 action = PF_DROP;
9859 REASON_SET(&reason, PFRES_INVPORT);
9860 goto done;
9861 }
9862 action = pf_test_state_tcp(&s, dir, kif, pbuf, off, h, &pd,
9863 &reason);
9864 if (action == PF_NAT64) {
9865 goto done;
9866 }
9867 if (pd.lmw < 0) {
9868 goto done;
9869 }
9870 PF_APPLE_UPDATE_PDESC_IPv4();
9871 if (action == PF_PASS) {
9872 #if NPFSYNC
9873 pfsync_update_state(s);
9874 #endif /* NPFSYNC */
9875 r = s->rule.ptr;
9876 a = s->anchor.ptr;
9877 log = s->log;
9878 } else if (s == NULL) {
9879 action = pf_test_rule(&r, &s, dir, kif,
9880 pbuf, off, h, &pd, &a, &ruleset, NULL);
9881 }
9882 break;
9883 }
9884
9885 case IPPROTO_UDP: {
9886 struct udphdr uh;
9887
9888 pf_pd_set_hdr_udp(&pd, &uh);
9889 if (!pf_pull_hdr(pbuf, off, &uh, sizeof(uh), sizeof(uh),
9890 &action, &reason, AF_INET)) {
9891 log = action != PF_PASS;
9892 goto done;
9893 }
9894 if (uh.uh_sport == 0 || uh.uh_dport == 0) {
9895 action = PF_DROP;
9896 REASON_SET(&reason, PFRES_INVPORT);
9897 goto done;
9898 }
9899 if (ntohs(uh.uh_ulen) > pbuf->pb_packet_len - off ||
9900 ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
9901 action = PF_DROP;
9902 REASON_SET(&reason, PFRES_SHORT);
9903 goto done;
9904 }
9905 #if DUMMYNET
9906 /* Traffic goes through dummynet first */
9907 action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
9908 if (action == PF_DROP || pbuf == NULL) {
9909 *pbufp = NULL;
9910 return action;
9911 }
9912 #endif /* DUMMYNET */
9913 action = pf_test_state_udp(&s, dir, kif, pbuf, off, h, &pd,
9914 &reason);
9915 if (action == PF_NAT64) {
9916 goto done;
9917 }
9918 if (pd.lmw < 0) {
9919 goto done;
9920 }
9921 PF_APPLE_UPDATE_PDESC_IPv4();
9922 if (action == PF_PASS) {
9923 #if NPFSYNC
9924 pfsync_update_state(s);
9925 #endif /* NPFSYNC */
9926 r = s->rule.ptr;
9927 a = s->anchor.ptr;
9928 log = s->log;
9929 } else if (s == NULL) {
9930 action = pf_test_rule(&r, &s, dir, kif,
9931 pbuf, off, h, &pd, &a, &ruleset, NULL);
9932 }
9933 break;
9934 }
9935
9936 case IPPROTO_ICMP: {
9937 struct icmp ih;
9938
9939 pf_pd_set_hdr_icmp(&pd, &ih, ICMP_MINLEN);
9940 if (!pf_pull_hdr(pbuf, off, &ih, sizeof(ih), ICMP_MINLEN,
9941 &action, &reason, AF_INET)) {
9942 log = action != PF_PASS;
9943 goto done;
9944 }
9945 #if DUMMYNET
9946 /* Traffic goes through dummynet first */
9947 action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
9948 if (action == PF_DROP || pbuf == NULL) {
9949 *pbufp = NULL;
9950 return action;
9951 }
9952 #endif /* DUMMYNET */
9953 action = pf_test_state_icmp(&s, dir, kif, pbuf, off, h, &pd,
9954 &reason);
9955
9956 if (action == PF_NAT64) {
9957 goto done;
9958 }
9959 if (pd.lmw < 0) {
9960 goto done;
9961 }
9962 PF_APPLE_UPDATE_PDESC_IPv4();
9963 if (action == PF_PASS) {
9964 #if NPFSYNC
9965 pfsync_update_state(s);
9966 #endif /* NPFSYNC */
9967 r = s->rule.ptr;
9968 a = s->anchor.ptr;
9969 log = s->log;
9970 } else if (s == NULL) {
9971 action = pf_test_rule(&r, &s, dir, kif,
9972 pbuf, off, h, &pd, &a, &ruleset, NULL);
9973 }
9974 break;
9975 }
9976
9977 case IPPROTO_ESP: {
9978 struct pf_esp_hdr esp;
9979
9980 pf_pd_set_hdr_esp(&pd, &esp);
9981 if (!pf_pull_hdr(pbuf, off, &esp, sizeof(esp), sizeof(esp), &action, &reason,
9982 AF_INET)) {
9983 log = action != PF_PASS;
9984 goto done;
9985 }
9986 #if DUMMYNET
9987 /* Traffic goes through dummynet first */
9988 action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
9989 if (action == PF_DROP || pbuf == NULL) {
9990 *pbufp = NULL;
9991 return action;
9992 }
9993 #endif /* DUMMYNET */
9994 action = pf_test_state_esp(&s, dir, kif, off, &pd);
9995 if (pd.lmw < 0) {
9996 goto done;
9997 }
9998 PF_APPLE_UPDATE_PDESC_IPv4();
9999 if (action == PF_PASS) {
10000 #if NPFSYNC
10001 pfsync_update_state(s);
10002 #endif /* NPFSYNC */
10003 r = s->rule.ptr;
10004 a = s->anchor.ptr;
10005 log = s->log;
10006 } else if (s == NULL) {
10007 action = pf_test_rule(&r, &s, dir, kif,
10008 pbuf, off, h, &pd, &a, &ruleset, NULL);
10009 }
10010 break;
10011 }
10012
10013 case IPPROTO_GRE: {
10014 struct pf_grev1_hdr grev1;
10015 pf_pd_set_hdr_grev1(&pd, &grev1);
10016 if (!pf_pull_hdr(pbuf, off, &grev1, sizeof(grev1), sizeof(grev1), &action,
10017 &reason, AF_INET)) {
10018 log = (action != PF_PASS);
10019 goto done;
10020 }
10021 #if DUMMYNET
10022 /* Traffic goes through dummynet first */
10023 action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10024 if (action == PF_DROP || pbuf == NULL) {
10025 *pbufp = NULL;
10026 return action;
10027 }
10028 #endif /* DUMMYNET */
10029 if ((ntohs(grev1.flags) & PF_GRE_FLAG_VERSION_MASK) == 1 &&
10030 ntohs(grev1.protocol_type) == PF_GRE_PPP_ETHERTYPE) {
10031 if (ntohs(grev1.payload_length) >
10032 pbuf->pb_packet_len - off) {
10033 action = PF_DROP;
10034 REASON_SET(&reason, PFRES_SHORT);
10035 goto done;
10036 }
10037 pd.proto_variant = PF_GRE_PPTP_VARIANT;
10038 action = pf_test_state_grev1(&s, dir, kif, off, &pd);
10039 if (pd.lmw < 0) {
10040 goto done;
10041 }
10042 PF_APPLE_UPDATE_PDESC_IPv4();
10043 if (action == PF_PASS) {
10044 #if NPFSYNC
10045 pfsync_update_state(s);
10046 #endif /* NPFSYNC */
10047 r = s->rule.ptr;
10048 a = s->anchor.ptr;
10049 log = s->log;
10050 break;
10051 } else if (s == NULL) {
10052 action = pf_test_rule(&r, &s, dir, kif, pbuf,
10053 off, h, &pd, &a, &ruleset, NULL);
10054 if (action == PF_PASS) {
10055 break;
10056 }
10057 }
10058 }
10059
10060 /* not GREv1/PPTP, so treat as ordinary GRE... */
10061 OS_FALLTHROUGH;
10062 }
10063
10064 default:
10065 #if DUMMYNET
10066 /* Traffic goes through dummynet first */
10067 action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10068 if (action == PF_DROP || pbuf == NULL) {
10069 *pbufp = NULL;
10070 return action;
10071 }
10072 #endif /* DUMMYNET */
10073 action = pf_test_state_other(&s, dir, kif, &pd);
10074 if (pd.lmw < 0) {
10075 goto done;
10076 }
10077 PF_APPLE_UPDATE_PDESC_IPv4();
10078 if (action == PF_PASS) {
10079 #if NPFSYNC
10080 pfsync_update_state(s);
10081 #endif /* NPFSYNC */
10082 r = s->rule.ptr;
10083 a = s->anchor.ptr;
10084 log = s->log;
10085 } else if (s == NULL) {
10086 action = pf_test_rule(&r, &s, dir, kif, pbuf, off, h,
10087 &pd, &a, &ruleset, NULL);
10088 }
10089 break;
10090 }
10091
10092 done:
10093 if (action == PF_NAT64) {
10094 *pbufp = NULL;
10095 return action;
10096 }
10097
10098 *pbufp = pd.mp;
10099 PF_APPLE_UPDATE_PDESC_IPv4();
10100
10101 if (action != PF_DROP) {
10102 if (action == PF_PASS && h->ip_hl > 5 &&
10103 !((s && s->allow_opts) || r->allow_opts)) {
10104 action = PF_DROP;
10105 REASON_SET(&reason, PFRES_IPOPTIONS);
10106 log = 1;
10107 DPFPRINTF(PF_DEBUG_MISC,
10108 ("pf: dropping packet with ip options [hlen=%u]\n",
10109 (unsigned int) h->ip_hl));
10110 }
10111
10112 if ((s && s->tag) || PF_RTABLEID_IS_VALID(r->rtableid) ||
10113 (pd.pktflags & PKTF_FLOW_ID)) {
10114 (void) pf_tag_packet(pbuf, pd.pf_mtag, s ? s->tag : 0,
10115 r->rtableid, &pd);
10116 }
10117
10118 if (action == PF_PASS) {
10119 #if PF_ECN
10120 /* add hints for ecn */
10121 pd.pf_mtag->pftag_hdr = h;
10122 /* record address family */
10123 pd.pf_mtag->pftag_flags &= ~PF_TAG_HDR_INET6;
10124 pd.pf_mtag->pftag_flags |= PF_TAG_HDR_INET;
10125 #endif /* PF_ECN */
10126 /* record protocol */
10127 *pbuf->pb_proto = pd.proto;
10128
10129 /*
10130 * connections redirected to loopback should not match sockets
10131 * bound specifically to loopback due to security implications,
10132 * see tcp_input() and in_pcblookup_listen().
10133 */
10134 if (dir == PF_IN && (pd.proto == IPPROTO_TCP ||
10135 pd.proto == IPPROTO_UDP) && s != NULL &&
10136 s->nat_rule.ptr != NULL &&
10137 (s->nat_rule.ptr->action == PF_RDR ||
10138 s->nat_rule.ptr->action == PF_BINAT) &&
10139 (ntohl(pd.dst->v4addr.s_addr) >> IN_CLASSA_NSHIFT)
10140 == IN_LOOPBACKNET) {
10141 pd.pf_mtag->pftag_flags |= PF_TAG_TRANSLATE_LOCALHOST;
10142 }
10143 }
10144 }
10145
10146 if (log) {
10147 struct pf_rule *lr;
10148
10149 if (s != NULL && s->nat_rule.ptr != NULL &&
10150 s->nat_rule.ptr->log & PF_LOG_ALL) {
10151 lr = s->nat_rule.ptr;
10152 } else {
10153 lr = r;
10154 }
10155 PFLOG_PACKET(kif, h, pbuf, AF_INET, dir, reason, lr, a, ruleset,
10156 &pd);
10157 }
10158
10159 kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
10160 kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS]++;
10161
10162 if (action == PF_PASS || r->action == PF_DROP) {
10163 dirndx = (dir == PF_OUT);
10164 r->packets[dirndx]++;
10165 r->bytes[dirndx] += pd.tot_len;
10166 if (a != NULL) {
10167 a->packets[dirndx]++;
10168 a->bytes[dirndx] += pd.tot_len;
10169 }
10170 if (s != NULL) {
10171 sk = s->state_key;
10172 if (s->nat_rule.ptr != NULL) {
10173 s->nat_rule.ptr->packets[dirndx]++;
10174 s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
10175 }
10176 if (s->src_node != NULL) {
10177 s->src_node->packets[dirndx]++;
10178 s->src_node->bytes[dirndx] += pd.tot_len;
10179 }
10180 if (s->nat_src_node != NULL) {
10181 s->nat_src_node->packets[dirndx]++;
10182 s->nat_src_node->bytes[dirndx] += pd.tot_len;
10183 }
10184 dirndx = (dir == sk->direction) ? 0 : 1;
10185 s->packets[dirndx]++;
10186 s->bytes[dirndx] += pd.tot_len;
10187 }
10188 tr = r;
10189 nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
10190 if (nr != NULL) {
10191 struct pf_addr *x;
10192 /*
10193 * XXX: we need to make sure that the addresses
10194 * passed to pfr_update_stats() are the same than
10195 * the addresses used during matching (pfr_match)
10196 */
10197 if (r == &pf_default_rule) {
10198 tr = nr;
10199 x = (sk == NULL || sk->direction == dir) ?
10200 &pd.baddr : &pd.naddr;
10201 } else {
10202 x = (sk == NULL || sk->direction == dir) ?
10203 &pd.naddr : &pd.baddr;
10204 }
10205 if (x == &pd.baddr || s == NULL) {
10206 /* we need to change the address */
10207 if (dir == PF_OUT) {
10208 pd.src = x;
10209 } else {
10210 pd.dst = x;
10211 }
10212 }
10213 }
10214 if (tr->src.addr.type == PF_ADDR_TABLE) {
10215 pfr_update_stats(tr->src.addr.p.tbl, (sk == NULL ||
10216 sk->direction == dir) ?
10217 pd.src : pd.dst, pd.af,
10218 pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
10219 tr->src.neg);
10220 }
10221 if (tr->dst.addr.type == PF_ADDR_TABLE) {
10222 pfr_update_stats(tr->dst.addr.p.tbl, (sk == NULL ||
10223 sk->direction == dir) ? pd.dst : pd.src, pd.af,
10224 pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
10225 tr->dst.neg);
10226 }
10227 }
10228
10229 VERIFY(pbuf == NULL || pd.mp == NULL || pd.mp == pbuf);
10230
10231 if (*pbufp) {
10232 if (pd.lmw < 0) {
10233 REASON_SET(&reason, PFRES_MEMORY);
10234 action = PF_DROP;
10235 }
10236
10237 if (action == PF_DROP) {
10238 pbuf_destroy(*pbufp);
10239 *pbufp = NULL;
10240 return PF_DROP;
10241 }
10242
10243 *pbufp = pbuf;
10244 }
10245
10246 if (action == PF_SYNPROXY_DROP) {
10247 pbuf_destroy(*pbufp);
10248 *pbufp = NULL;
10249 action = PF_PASS;
10250 } else if (r->rt) {
10251 /* pf_route can free the pbuf causing *pbufp to become NULL */
10252 pf_route(pbufp, r, dir, kif->pfik_ifp, s, &pd);
10253 }
10254
10255 return action;
10256 }
10257 #endif /* INET */
10258
10259 #define PF_APPLE_UPDATE_PDESC_IPv6() \
10260 do { \
10261 if (pbuf && pd.mp && pbuf != pd.mp) { \
10262 pbuf = pd.mp; \
10263 } \
10264 h = pbuf->pb_data; \
10265 } while (0)
10266
10267 int
pf_test6_mbuf(int dir,struct ifnet * ifp,struct mbuf ** m0,struct ether_header * eh,struct ip_fw_args * fwa)10268 pf_test6_mbuf(int dir, struct ifnet *ifp, struct mbuf **m0,
10269 struct ether_header *eh, struct ip_fw_args *fwa)
10270 {
10271 pbuf_t pbuf_store, *__single pbuf;
10272 int rv;
10273
10274 pbuf_init_mbuf(&pbuf_store, *m0, (*m0)->m_pkthdr.rcvif);
10275 pbuf = &pbuf_store;
10276
10277 rv = pf_test6(dir, ifp, &pbuf, eh, fwa);
10278
10279 if (pbuf_is_valid(pbuf)) {
10280 *m0 = pbuf->pb_mbuf;
10281 pbuf->pb_mbuf = NULL;
10282 pbuf_destroy(pbuf);
10283 } else {
10284 *m0 = NULL;
10285 }
10286
10287 return rv;
10288 }
10289
10290 static __attribute__((noinline)) int
pf_test6(int dir,struct ifnet * ifp,pbuf_t ** pbufp,struct ether_header * eh,struct ip_fw_args * fwa)10291 pf_test6(int dir, struct ifnet *ifp, pbuf_t **pbufp,
10292 struct ether_header *eh, struct ip_fw_args *fwa)
10293 {
10294 #if !DUMMYNET
10295 #pragma unused(fwa)
10296 #endif
10297 struct pfi_kif *__single kif;
10298 u_short action = PF_PASS, reason = 0, log = 0;
10299 pbuf_t *__single pbuf = *pbufp;
10300 struct ip6_hdr *__single h;
10301 struct pf_rule *__single a = NULL, *__single r = &pf_default_rule, *__single tr, *__single nr;
10302 struct pf_state *__single s = NULL;
10303 struct pf_state_key *__single sk = NULL;
10304 struct pf_ruleset *__single ruleset = NULL;
10305 struct pf_pdesc pd;
10306 int off, terminal = 0, dirndx, rh_cnt = 0;
10307 u_int8_t nxt;
10308 boolean_t fwd = FALSE;
10309
10310 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
10311
10312 ASSERT(ifp != NULL);
10313 if ((dir == PF_OUT) && (pbuf->pb_ifp) && (ifp != pbuf->pb_ifp)) {
10314 fwd = TRUE;
10315 }
10316
10317 if (!pf_status.running) {
10318 return PF_PASS;
10319 }
10320
10321 memset(&pd, 0, sizeof(pd));
10322
10323 if ((pd.pf_mtag = pf_get_mtag_pbuf(pbuf)) == NULL) {
10324 DPFPRINTF(PF_DEBUG_URGENT,
10325 ("pf_test6: pf_get_mtag_pbuf returned NULL\n"));
10326 return PF_DROP;
10327 }
10328
10329 if (pd.pf_mtag->pftag_flags & PF_TAG_GENERATED) {
10330 return PF_PASS;
10331 }
10332
10333 kif = (struct pfi_kif *)ifp->if_pf_kif;
10334
10335 if (kif == NULL) {
10336 DPFPRINTF(PF_DEBUG_URGENT,
10337 ("pf_test6: kif == NULL, if_name %s\n", ifp->if_name));
10338 return PF_DROP;
10339 }
10340 if (kif->pfik_flags & PFI_IFLAG_SKIP) {
10341 return PF_PASS;
10342 }
10343
10344 if (pbuf->pb_packet_len < (int)sizeof(*h)) {
10345 REASON_SET(&reason, PFRES_SHORT);
10346 return PF_DROP;
10347 }
10348
10349 h = pbuf->pb_data;
10350 nxt = h->ip6_nxt;
10351 off = ((caddr_t)h - (caddr_t)pbuf->pb_data) + sizeof(struct ip6_hdr);
10352 pd.mp = pbuf;
10353 pd.lmw = 0;
10354 pd.pf_mtag = pf_get_mtag_pbuf(pbuf);
10355 pd.src = (struct pf_addr *)(void *)&h->ip6_src;
10356 pd.dst = (struct pf_addr *)(void *)&h->ip6_dst;
10357 PF_ACPY(&pd.baddr, pd.src, AF_INET6);
10358 PF_ACPY(&pd.bdaddr, pd.dst, AF_INET6);
10359 pd.ip_sum = NULL;
10360 pd.af = AF_INET6;
10361 pd.proto = nxt;
10362 pd.proto_variant = 0;
10363 pd.tos = 0;
10364 pd.ttl = h->ip6_hlim;
10365 pd.sc = MBUF_SCIDX(pbuf_get_service_class(pbuf));
10366 pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
10367 pd.eh = eh;
10368
10369 if (*pbuf->pb_flags & PKTF_FLOW_ID) {
10370 pd.flowsrc = *pbuf->pb_flowsrc;
10371 pd.flowhash = *pbuf->pb_flowid;
10372 pd.pktflags = (*pbuf->pb_flags & PKTF_FLOW_MASK);
10373 }
10374
10375 #if DUMMYNET
10376 if (fwa != NULL && fwa->fwa_pf_rule != NULL) {
10377 goto nonormalize;
10378 }
10379 #endif /* DUMMYNET */
10380
10381 /* We do IP header normalization and packet reassembly here */
10382 action = pf_normalize_ip6(pbuf, dir, kif, &reason, &pd);
10383 if (action != PF_PASS || pd.lmw < 0) {
10384 action = PF_DROP;
10385 goto done;
10386 }
10387
10388 #if DUMMYNET
10389 nonormalize:
10390 #endif /* DUMMYNET */
10391 h = pbuf->pb_data;
10392
10393 /*
10394 * we do not support jumbogram yet. if we keep going, zero ip6_plen
10395 * will do something bad, so drop the packet for now.
10396 */
10397 if (htons(h->ip6_plen) == 0) {
10398 action = PF_DROP;
10399 REASON_SET(&reason, PFRES_NORM); /*XXX*/
10400 goto done;
10401 }
10402 pd.src = (struct pf_addr *)(void *)&h->ip6_src;
10403 pd.dst = (struct pf_addr *)(void *)&h->ip6_dst;
10404 PF_ACPY(&pd.baddr, pd.src, AF_INET6);
10405 PF_ACPY(&pd.bdaddr, pd.dst, AF_INET6);
10406 pd.ip_sum = NULL;
10407 pd.af = AF_INET6;
10408 pd.tos = 0;
10409 pd.ttl = h->ip6_hlim;
10410 pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
10411 pd.eh = eh;
10412
10413 off = ((caddr_t)h - (caddr_t)pbuf->pb_data) + sizeof(struct ip6_hdr);
10414 pd.proto = h->ip6_nxt;
10415 pd.proto_variant = 0;
10416 pd.mp = pbuf;
10417 pd.lmw = 0;
10418 pd.pf_mtag = pf_get_mtag_pbuf(pbuf);
10419
10420 do {
10421 switch (pd.proto) {
10422 case IPPROTO_FRAGMENT: {
10423 struct ip6_frag ip6f;
10424
10425 pd.flags |= PFDESC_IP_FRAG;
10426 if (!pf_pull_hdr(pbuf, off, &ip6f, sizeof ip6f, sizeof ip6f, NULL,
10427 &reason, pd.af)) {
10428 DPFPRINTF(PF_DEBUG_MISC,
10429 ("pf: IPv6 short fragment header\n"));
10430 action = PF_DROP;
10431 REASON_SET(&reason, PFRES_SHORT);
10432 log = 1;
10433 goto done;
10434 }
10435 pd.proto = ip6f.ip6f_nxt;
10436 #if DUMMYNET
10437 /* Traffic goes through dummynet first */
10438 action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd,
10439 fwa);
10440 if (action == PF_DROP || pbuf == NULL) {
10441 *pbufp = NULL;
10442 return action;
10443 }
10444 #endif /* DUMMYNET */
10445 action = pf_test_fragment(&r, dir, kif, pbuf, h, &pd,
10446 &a, &ruleset);
10447 if (action == PF_DROP) {
10448 REASON_SET(&reason, PFRES_FRAG);
10449 log = 1;
10450 }
10451 goto done;
10452 }
10453 case IPPROTO_ROUTING:
10454 ++rh_cnt;
10455 OS_FALLTHROUGH;
10456
10457 case IPPROTO_AH:
10458 case IPPROTO_HOPOPTS:
10459 case IPPROTO_DSTOPTS: {
10460 /* get next header and header length */
10461 struct ip6_ext opt6;
10462
10463 if (!pf_pull_hdr(pbuf, off, &opt6, sizeof(opt6), sizeof(opt6),
10464 NULL, &reason, pd.af)) {
10465 DPFPRINTF(PF_DEBUG_MISC,
10466 ("pf: IPv6 short opt\n"));
10467 action = PF_DROP;
10468 log = 1;
10469 goto done;
10470 }
10471 if (pd.proto == IPPROTO_AH) {
10472 off += (opt6.ip6e_len + 2) * 4;
10473 } else {
10474 off += (opt6.ip6e_len + 1) * 8;
10475 }
10476 pd.proto = opt6.ip6e_nxt;
10477 /* goto the next header */
10478 break;
10479 }
10480 default:
10481 terminal++;
10482 break;
10483 }
10484 } while (!terminal);
10485
10486
10487 switch (pd.proto) {
10488 case IPPROTO_TCP: {
10489 struct tcphdr th;
10490
10491 pf_pd_set_hdr_tcp(&pd, &th);
10492 if (!pf_pull_hdr(pbuf, off, &th, sizeof(th), sizeof(th),
10493 &action, &reason, AF_INET6)) {
10494 log = action != PF_PASS;
10495 goto done;
10496 }
10497 pd.p_len = pd.tot_len - off - (th.th_off << 2);
10498 #if DUMMYNET
10499 /* Traffic goes through dummynet first */
10500 action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10501 if (action == PF_DROP || pbuf == NULL) {
10502 *pbufp = NULL;
10503 return action;
10504 }
10505 #endif /* DUMMYNET */
10506 action = pf_normalize_tcp(dir, kif, pbuf, 0, off, h, &pd);
10507 if (pd.lmw < 0) {
10508 goto done;
10509 }
10510 PF_APPLE_UPDATE_PDESC_IPv6();
10511 if (action == PF_DROP) {
10512 goto done;
10513 }
10514 if (th.th_sport == 0 || th.th_dport == 0) {
10515 action = PF_DROP;
10516 REASON_SET(&reason, PFRES_INVPORT);
10517 goto done;
10518 }
10519 action = pf_test_state_tcp(&s, dir, kif, pbuf, off, h, &pd,
10520 &reason);
10521 if (action == PF_NAT64) {
10522 goto done;
10523 }
10524 if (pd.lmw < 0) {
10525 goto done;
10526 }
10527 PF_APPLE_UPDATE_PDESC_IPv6();
10528 if (action == PF_PASS) {
10529 #if NPFSYNC
10530 pfsync_update_state(s);
10531 #endif /* NPFSYNC */
10532 r = s->rule.ptr;
10533 a = s->anchor.ptr;
10534 log = s->log;
10535 } else if (s == NULL) {
10536 action = pf_test_rule(&r, &s, dir, kif,
10537 pbuf, off, h, &pd, &a, &ruleset, NULL);
10538 }
10539 break;
10540 }
10541
10542 case IPPROTO_UDP: {
10543 struct udphdr uh;
10544
10545 pf_pd_set_hdr_udp(&pd, &uh);
10546 if (!pf_pull_hdr(pbuf, off, &uh, sizeof(uh), sizeof(uh),
10547 &action, &reason, AF_INET6)) {
10548 log = action != PF_PASS;
10549 goto done;
10550 }
10551 if (uh.uh_sport == 0 || uh.uh_dport == 0) {
10552 action = PF_DROP;
10553 REASON_SET(&reason, PFRES_INVPORT);
10554 goto done;
10555 }
10556 if (ntohs(uh.uh_ulen) > pbuf->pb_packet_len - off ||
10557 ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
10558 action = PF_DROP;
10559 REASON_SET(&reason, PFRES_SHORT);
10560 goto done;
10561 }
10562 #if DUMMYNET
10563 /* Traffic goes through dummynet first */
10564 action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10565 if (action == PF_DROP || pbuf == NULL) {
10566 *pbufp = NULL;
10567 return action;
10568 }
10569 #endif /* DUMMYNET */
10570 action = pf_test_state_udp(&s, dir, kif, pbuf, off, h, &pd,
10571 &reason);
10572 if (action == PF_NAT64) {
10573 goto done;
10574 }
10575 if (pd.lmw < 0) {
10576 goto done;
10577 }
10578 PF_APPLE_UPDATE_PDESC_IPv6();
10579 if (action == PF_PASS) {
10580 #if NPFSYNC
10581 pfsync_update_state(s);
10582 #endif /* NPFSYNC */
10583 r = s->rule.ptr;
10584 a = s->anchor.ptr;
10585 log = s->log;
10586 } else if (s == NULL) {
10587 action = pf_test_rule(&r, &s, dir, kif,
10588 pbuf, off, h, &pd, &a, &ruleset, NULL);
10589 }
10590 break;
10591 }
10592
10593 case IPPROTO_ICMPV6: {
10594 struct icmp6_hdr ih;
10595
10596 pf_pd_set_hdr_icmp6(&pd, &ih);
10597 if (!pf_pull_hdr(pbuf, off, &ih, sizeof(ih), sizeof(ih),
10598 &action, &reason, AF_INET6)) {
10599 log = action != PF_PASS;
10600 goto done;
10601 }
10602 #if DUMMYNET
10603 /* Traffic goes through dummynet first */
10604 action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10605 if (action == PF_DROP || pbuf == NULL) {
10606 *pbufp = NULL;
10607 return action;
10608 }
10609 #endif /* DUMMYNET */
10610 action = pf_test_state_icmp(&s, dir, kif,
10611 pbuf, off, h, &pd, &reason);
10612 if (action == PF_NAT64) {
10613 goto done;
10614 }
10615 if (pd.lmw < 0) {
10616 goto done;
10617 }
10618 PF_APPLE_UPDATE_PDESC_IPv6();
10619 if (action == PF_PASS) {
10620 #if NPFSYNC
10621 pfsync_update_state(s);
10622 #endif /* NPFSYNC */
10623 r = s->rule.ptr;
10624 a = s->anchor.ptr;
10625 log = s->log;
10626 } else if (s == NULL) {
10627 action = pf_test_rule(&r, &s, dir, kif,
10628 pbuf, off, h, &pd, &a, &ruleset, NULL);
10629 }
10630 break;
10631 }
10632
10633 case IPPROTO_ESP: {
10634 struct pf_esp_hdr esp;
10635
10636 pf_pd_set_hdr_esp(&pd, &esp);
10637 if (!pf_pull_hdr(pbuf, off, &esp, sizeof(esp), sizeof(esp), &action,
10638 &reason, AF_INET6)) {
10639 log = action != PF_PASS;
10640 goto done;
10641 }
10642 #if DUMMYNET
10643 /* Traffic goes through dummynet first */
10644 action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10645 if (action == PF_DROP || pbuf == NULL) {
10646 *pbufp = NULL;
10647 return action;
10648 }
10649 #endif /* DUMMYNET */
10650 action = pf_test_state_esp(&s, dir, kif, off, &pd);
10651 if (pd.lmw < 0) {
10652 goto done;
10653 }
10654 PF_APPLE_UPDATE_PDESC_IPv6();
10655 if (action == PF_PASS) {
10656 #if NPFSYNC
10657 pfsync_update_state(s);
10658 #endif /* NPFSYNC */
10659 r = s->rule.ptr;
10660 a = s->anchor.ptr;
10661 log = s->log;
10662 } else if (s == NULL) {
10663 action = pf_test_rule(&r, &s, dir, kif,
10664 pbuf, off, h, &pd, &a, &ruleset, NULL);
10665 }
10666 break;
10667 }
10668
10669 case IPPROTO_GRE: {
10670 struct pf_grev1_hdr grev1;
10671
10672 pf_pd_set_hdr_grev1(&pd, &grev1);
10673 if (!pf_pull_hdr(pbuf, off, &grev1, sizeof(grev1), sizeof(grev1), &action,
10674 &reason, AF_INET6)) {
10675 log = (action != PF_PASS);
10676 goto done;
10677 }
10678 #if DUMMYNET
10679 /* Traffic goes through dummynet first */
10680 action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10681 if (action == PF_DROP || pbuf == NULL) {
10682 *pbufp = NULL;
10683 return action;
10684 }
10685 #endif /* DUMMYNET */
10686 if ((ntohs(grev1.flags) & PF_GRE_FLAG_VERSION_MASK) == 1 &&
10687 ntohs(grev1.protocol_type) == PF_GRE_PPP_ETHERTYPE) {
10688 if (ntohs(grev1.payload_length) >
10689 pbuf->pb_packet_len - off) {
10690 action = PF_DROP;
10691 REASON_SET(&reason, PFRES_SHORT);
10692 goto done;
10693 }
10694 action = pf_test_state_grev1(&s, dir, kif, off, &pd);
10695 if (pd.lmw < 0) {
10696 goto done;
10697 }
10698 PF_APPLE_UPDATE_PDESC_IPv6();
10699 if (action == PF_PASS) {
10700 #if NPFSYNC
10701 pfsync_update_state(s);
10702 #endif /* NPFSYNC */
10703 r = s->rule.ptr;
10704 a = s->anchor.ptr;
10705 log = s->log;
10706 break;
10707 } else if (s == NULL) {
10708 action = pf_test_rule(&r, &s, dir, kif, pbuf,
10709 off, h, &pd, &a, &ruleset, NULL);
10710 if (action == PF_PASS) {
10711 break;
10712 }
10713 }
10714 }
10715
10716 /* not GREv1/PPTP, so treat as ordinary GRE... */
10717 OS_FALLTHROUGH; /* XXX is this correct? */
10718 }
10719
10720 default:
10721 #if DUMMYNET
10722 /* Traffic goes through dummynet first */
10723 action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10724 if (action == PF_DROP || pbuf == NULL) {
10725 *pbufp = NULL;
10726 return action;
10727 }
10728 #endif /* DUMMYNET */
10729 action = pf_test_state_other(&s, dir, kif, &pd);
10730 if (pd.lmw < 0) {
10731 goto done;
10732 }
10733 PF_APPLE_UPDATE_PDESC_IPv6();
10734 if (action == PF_PASS) {
10735 #if NPFSYNC
10736 pfsync_update_state(s);
10737 #endif /* NPFSYNC */
10738 r = s->rule.ptr;
10739 a = s->anchor.ptr;
10740 log = s->log;
10741 } else if (s == NULL) {
10742 action = pf_test_rule(&r, &s, dir, kif, pbuf, off, h,
10743 &pd, &a, &ruleset, NULL);
10744 }
10745 break;
10746 }
10747
10748 done:
10749 if (action == PF_NAT64) {
10750 *pbufp = NULL;
10751 return action;
10752 }
10753
10754 *pbufp = pd.mp;
10755 PF_APPLE_UPDATE_PDESC_IPv6();
10756
10757 /* handle dangerous IPv6 extension headers. */
10758 if (action != PF_DROP) {
10759 if (action == PF_PASS && rh_cnt &&
10760 !((s && s->allow_opts) || r->allow_opts)) {
10761 action = PF_DROP;
10762 REASON_SET(&reason, PFRES_IPOPTIONS);
10763 log = 1;
10764 DPFPRINTF(PF_DEBUG_MISC,
10765 ("pf: dropping packet with dangerous v6addr headers\n"));
10766 }
10767
10768 if ((s && s->tag) || PF_RTABLEID_IS_VALID(r->rtableid) ||
10769 (pd.pktflags & PKTF_FLOW_ID)) {
10770 (void) pf_tag_packet(pbuf, pd.pf_mtag, s ? s->tag : 0,
10771 r->rtableid, &pd);
10772 }
10773
10774 if (action == PF_PASS) {
10775 #if PF_ECN
10776 /* add hints for ecn */
10777 pd.pf_mtag->pftag_hdr = h;
10778 /* record address family */
10779 pd.pf_mtag->pftag_flags &= ~PF_TAG_HDR_INET;
10780 pd.pf_mtag->pftag_flags |= PF_TAG_HDR_INET6;
10781 #endif /* PF_ECN */
10782 /* record protocol */
10783 *pbuf->pb_proto = pd.proto;
10784 if (dir == PF_IN && (pd.proto == IPPROTO_TCP ||
10785 pd.proto == IPPROTO_UDP) && s != NULL &&
10786 s->nat_rule.ptr != NULL &&
10787 (s->nat_rule.ptr->action == PF_RDR ||
10788 s->nat_rule.ptr->action == PF_BINAT) &&
10789 IN6_IS_ADDR_LOOPBACK(&pd.dst->v6addr)) {
10790 pd.pf_mtag->pftag_flags |= PF_TAG_TRANSLATE_LOCALHOST;
10791 }
10792 }
10793 }
10794
10795
10796 if (log) {
10797 struct pf_rule *lr;
10798
10799 if (s != NULL && s->nat_rule.ptr != NULL &&
10800 s->nat_rule.ptr->log & PF_LOG_ALL) {
10801 lr = s->nat_rule.ptr;
10802 } else {
10803 lr = r;
10804 }
10805 PFLOG_PACKET(kif, h, pbuf, AF_INET6, dir, reason, lr, a, ruleset,
10806 &pd);
10807 }
10808
10809 kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
10810 kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS]++;
10811
10812 if (action == PF_PASS || r->action == PF_DROP) {
10813 dirndx = (dir == PF_OUT);
10814 r->packets[dirndx]++;
10815 r->bytes[dirndx] += pd.tot_len;
10816 if (a != NULL) {
10817 a->packets[dirndx]++;
10818 a->bytes[dirndx] += pd.tot_len;
10819 }
10820 if (s != NULL) {
10821 sk = s->state_key;
10822 if (s->nat_rule.ptr != NULL) {
10823 s->nat_rule.ptr->packets[dirndx]++;
10824 s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
10825 }
10826 if (s->src_node != NULL) {
10827 s->src_node->packets[dirndx]++;
10828 s->src_node->bytes[dirndx] += pd.tot_len;
10829 }
10830 if (s->nat_src_node != NULL) {
10831 s->nat_src_node->packets[dirndx]++;
10832 s->nat_src_node->bytes[dirndx] += pd.tot_len;
10833 }
10834 dirndx = (dir == sk->direction) ? 0 : 1;
10835 s->packets[dirndx]++;
10836 s->bytes[dirndx] += pd.tot_len;
10837 }
10838 tr = r;
10839 nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
10840 if (nr != NULL) {
10841 struct pf_addr *x;
10842 /*
10843 * XXX: we need to make sure that the addresses
10844 * passed to pfr_update_stats() are the same than
10845 * the addresses used during matching (pfr_match)
10846 */
10847 if (r == &pf_default_rule) {
10848 tr = nr;
10849 x = (s == NULL || sk->direction == dir) ?
10850 &pd.baddr : &pd.naddr;
10851 } else {
10852 x = (s == NULL || sk->direction == dir) ?
10853 &pd.naddr : &pd.baddr;
10854 }
10855 if (x == &pd.baddr || s == NULL) {
10856 if (dir == PF_OUT) {
10857 pd.src = x;
10858 } else {
10859 pd.dst = x;
10860 }
10861 }
10862 }
10863 if (tr->src.addr.type == PF_ADDR_TABLE) {
10864 pfr_update_stats(tr->src.addr.p.tbl, (sk == NULL ||
10865 sk->direction == dir) ? pd.src : pd.dst, pd.af,
10866 pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
10867 tr->src.neg);
10868 }
10869 if (tr->dst.addr.type == PF_ADDR_TABLE) {
10870 pfr_update_stats(tr->dst.addr.p.tbl, (sk == NULL ||
10871 sk->direction == dir) ? pd.dst : pd.src, pd.af,
10872 pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
10873 tr->dst.neg);
10874 }
10875 }
10876
10877 VERIFY(pbuf == NULL || pd.mp == NULL || pd.mp == pbuf);
10878
10879 if (*pbufp) {
10880 if (pd.lmw < 0) {
10881 REASON_SET(&reason, PFRES_MEMORY);
10882 action = PF_DROP;
10883 }
10884
10885 if (action == PF_DROP) {
10886 pbuf_destroy(*pbufp);
10887 *pbufp = NULL;
10888 return PF_DROP;
10889 }
10890
10891 *pbufp = pbuf;
10892 }
10893
10894 if (action == PF_SYNPROXY_DROP) {
10895 pbuf_destroy(*pbufp);
10896 *pbufp = NULL;
10897 action = PF_PASS;
10898 } else if (r->rt) {
10899 /* pf_route6 can free the mbuf causing *pbufp to become NULL */
10900 pf_route6(pbufp, r, dir, kif->pfik_ifp, s, &pd);
10901 }
10902
10903 /* if reassembled packet passed, create new fragments */
10904 struct pf_fragment_tag *ftag = NULL;
10905 if ((action == PF_PASS) && (*pbufp != NULL) && (fwd) &&
10906 ((ftag = pf_find_fragment_tag_pbuf(*pbufp)) != NULL)) {
10907 action = pf_refragment6(ifp, pbufp, ftag);
10908 }
10909 return action;
10910 }
10911
10912 static int
pf_check_congestion(struct ifqueue * ifq)10913 pf_check_congestion(struct ifqueue *ifq)
10914 {
10915 #pragma unused(ifq)
10916 return 0;
10917 }
10918
10919 void
pool_init(struct pool * pp,size_t size,unsigned int align,unsigned int ioff,int flags,const char * wchan,void * palloc)10920 pool_init(struct pool *pp, size_t size, unsigned int align, unsigned int ioff,
10921 int flags, const char *wchan, void *palloc)
10922 {
10923 #pragma unused(align, ioff, flags, palloc)
10924 bzero(pp, sizeof(*pp));
10925 pp->pool_zone = zone_create(wchan, size,
10926 ZC_PGZ_USE_GUARDS | ZC_ZFREE_CLEARMEM);
10927 pp->pool_hiwat = pp->pool_limit = (unsigned int)-1;
10928 pp->pool_name = wchan;
10929 }
10930
10931 /* Zones cannot be currently destroyed */
10932 void
pool_destroy(struct pool * pp)10933 pool_destroy(struct pool *pp)
10934 {
10935 #pragma unused(pp)
10936 }
10937
10938 void
pool_sethiwat(struct pool * pp,int n)10939 pool_sethiwat(struct pool *pp, int n)
10940 {
10941 pp->pool_hiwat = n; /* Currently unused */
10942 }
10943
10944 void
pool_sethardlimit(struct pool * pp,int n,const char * warnmess,int ratecap)10945 pool_sethardlimit(struct pool *pp, int n, const char *warnmess, int ratecap)
10946 {
10947 #pragma unused(warnmess, ratecap)
10948 pp->pool_limit = n;
10949 }
10950
10951 void *
pool_get(struct pool * pp,int flags)10952 pool_get(struct pool *pp, int flags)
10953 {
10954 void *buf;
10955
10956 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
10957
10958 if (pp->pool_count > pp->pool_limit) {
10959 DPFPRINTF(PF_DEBUG_NOISY,
10960 ("pf: pool %s hard limit reached (%d)\n",
10961 pp->pool_name != NULL ? pp->pool_name : "unknown",
10962 pp->pool_limit));
10963 pp->pool_fails++;
10964 return NULL;
10965 }
10966
10967 buf = zalloc_flags_buf(pp->pool_zone,
10968 (flags & PR_WAITOK) ? Z_WAITOK : Z_NOWAIT);
10969 if (buf != NULL) {
10970 pp->pool_count++;
10971 VERIFY(pp->pool_count != 0);
10972 }
10973 return buf;
10974 }
10975
10976 void
pool_put(struct pool * pp,void * v)10977 pool_put(struct pool *pp, void *v)
10978 {
10979 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
10980
10981 zfree(pp->pool_zone, v);
10982 VERIFY(pp->pool_count != 0);
10983 pp->pool_count--;
10984 }
10985
10986 struct pf_mtag *
pf_find_mtag_pbuf(pbuf_t * pbuf)10987 pf_find_mtag_pbuf(pbuf_t *pbuf)
10988 {
10989 return pbuf->pb_pftag;
10990 }
10991
10992 struct pf_mtag *
pf_find_mtag(struct mbuf * m)10993 pf_find_mtag(struct mbuf *m)
10994 {
10995 return m_pftag(m);
10996 }
10997
10998 struct pf_mtag *
pf_get_mtag(struct mbuf * m)10999 pf_get_mtag(struct mbuf *m)
11000 {
11001 return pf_find_mtag(m);
11002 }
11003
11004 struct pf_mtag *
pf_get_mtag_pbuf(pbuf_t * pbuf)11005 pf_get_mtag_pbuf(pbuf_t *pbuf)
11006 {
11007 return pf_find_mtag_pbuf(pbuf);
11008 }
11009
11010 struct pf_fragment_tag *
pf_copy_fragment_tag(struct mbuf * m,struct pf_fragment_tag * ftag,int how)11011 pf_copy_fragment_tag(struct mbuf *m, struct pf_fragment_tag *ftag, int how)
11012 {
11013 struct m_tag *__single tag;
11014 struct pf_mtag *__single pftag = pf_find_mtag(m);
11015
11016 tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_PF_REASS,
11017 sizeof(*ftag), how, m);
11018 if (tag == NULL) {
11019 return NULL;
11020 }
11021 m_tag_prepend(m, tag);
11022 bcopy(ftag, tag->m_tag_data, sizeof(*ftag));
11023 pftag->pftag_flags |= PF_TAG_REASSEMBLED;
11024 return (struct pf_fragment_tag *)tag->m_tag_data;
11025 }
11026
11027 struct pf_fragment_tag *
pf_find_fragment_tag(struct mbuf * m)11028 pf_find_fragment_tag(struct mbuf *m)
11029 {
11030 struct m_tag *tag;
11031 struct pf_fragment_tag *ftag = NULL;
11032 struct pf_mtag *pftag = pf_find_mtag(m);
11033
11034 tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_PF_REASS);
11035 VERIFY((tag == NULL) || (pftag->pftag_flags & PF_TAG_REASSEMBLED));
11036 if (tag != NULL) {
11037 ftag = (struct pf_fragment_tag *)tag->m_tag_data;
11038 }
11039 return ftag;
11040 }
11041
11042 struct pf_fragment_tag *
pf_find_fragment_tag_pbuf(pbuf_t * pbuf)11043 pf_find_fragment_tag_pbuf(pbuf_t *pbuf)
11044 {
11045 struct pf_mtag *mtag = pf_find_mtag_pbuf(pbuf);
11046
11047 return (mtag->pftag_flags & PF_TAG_REASSEMBLED) ?
11048 pbuf->pb_pf_fragtag : NULL;
11049 }
11050
11051 uint64_t
pf_time_second(void)11052 pf_time_second(void)
11053 {
11054 struct timeval t;
11055
11056 microuptime(&t);
11057 return t.tv_sec;
11058 }
11059
11060 uint64_t
pf_calendar_time_second(void)11061 pf_calendar_time_second(void)
11062 {
11063 struct timeval t;
11064
11065 getmicrotime(&t);
11066 return t.tv_sec;
11067 }
11068
11069 static void *
hook_establish(struct hook_desc_head * head,int tail,hook_fn_t fn,void * arg)11070 hook_establish(struct hook_desc_head *head, int tail, hook_fn_t fn, void *arg)
11071 {
11072 struct hook_desc *hd;
11073
11074 hd = kalloc_type(struct hook_desc, Z_WAITOK | Z_NOFAIL);
11075
11076 hd->hd_fn = fn;
11077 hd->hd_arg = arg;
11078 if (tail) {
11079 TAILQ_INSERT_TAIL(head, hd, hd_list);
11080 } else {
11081 TAILQ_INSERT_HEAD(head, hd, hd_list);
11082 }
11083
11084 return hd;
11085 }
11086
11087 static void
hook_runloop(struct hook_desc_head * head,int flags)11088 hook_runloop(struct hook_desc_head *head, int flags)
11089 {
11090 struct hook_desc *__single hd;
11091
11092 if (!(flags & HOOK_REMOVE)) {
11093 if (!(flags & HOOK_ABORT)) {
11094 TAILQ_FOREACH(hd, head, hd_list)
11095 hd->hd_fn(hd->hd_arg);
11096 }
11097 } else {
11098 while (!!(hd = TAILQ_FIRST(head))) {
11099 TAILQ_REMOVE(head, hd, hd_list);
11100 if (!(flags & HOOK_ABORT)) {
11101 hd->hd_fn(hd->hd_arg);
11102 }
11103 if (flags & HOOK_FREE) {
11104 kfree_type(struct hook_desc, hd);
11105 }
11106 }
11107 }
11108 }
11109
11110 #if SKYWALK
11111 static uint32_t
pf_check_compatible_anchor(struct pf_anchor const * a)11112 pf_check_compatible_anchor(struct pf_anchor const * a)
11113 {
11114 const char *__null_terminated anchor_path = __unsafe_null_terminated_from_indexable(a->path);
11115 uint32_t result = 0;
11116
11117 if (strcmp(anchor_path, PF_RESERVED_ANCHOR) == 0) {
11118 goto done;
11119 }
11120
11121 if (strcmp(anchor_path, "com.apple") == 0) {
11122 goto done;
11123 }
11124
11125 for (int i = 0; i < sizeof(compatible_anchors) / sizeof(compatible_anchors[0]); i++) {
11126 const char *__null_terminated ptr = strnstr(anchor_path, compatible_anchors[i], MAXPATHLEN);
11127 if (ptr != NULL && ptr == anchor_path) {
11128 goto done;
11129 }
11130 }
11131
11132 result |= PF_COMPATIBLE_FLAGS_CUSTOM_ANCHORS_PRESENT;
11133 for (int i = PF_RULESET_SCRUB; i < PF_RULESET_MAX; ++i) {
11134 if (a->ruleset.rules[i].active.rcount != 0) {
11135 result |= PF_COMPATIBLE_FLAGS_CUSTOM_RULES_PRESENT;
11136 }
11137 }
11138 done:
11139 return result;
11140 }
11141
11142 uint32_t
pf_check_compatible_rules(void)11143 pf_check_compatible_rules(void)
11144 {
11145 LCK_RW_ASSERT(&pf_perim_lock, LCK_RW_ASSERT_HELD);
11146 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
11147 struct pf_anchor *anchor = NULL;
11148 struct pf_rule *rule = NULL;
11149 uint32_t compat_bitmap = 0;
11150
11151 if (PF_IS_ENABLED) {
11152 compat_bitmap |= PF_COMPATIBLE_FLAGS_PF_ENABLED;
11153 }
11154
11155 RB_FOREACH(anchor, pf_anchor_global, &pf_anchors) {
11156 compat_bitmap |= pf_check_compatible_anchor(anchor);
11157 #define _CHECK_FLAGS (PF_COMPATIBLE_FLAGS_CUSTOM_ANCHORS_PRESENT | PF_COMPATIBLE_FLAGS_CUSTOM_RULES_PRESENT)
11158 if ((compat_bitmap & _CHECK_FLAGS) == _CHECK_FLAGS) {
11159 goto done;
11160 }
11161 #undef _CHECK_FLAGS
11162 }
11163
11164 for (int i = PF_RULESET_SCRUB; i < PF_RULESET_MAX; i++) {
11165 TAILQ_FOREACH(rule, pf_main_ruleset.rules[i].active.ptr, entries) {
11166 if (rule->anchor == NULL) {
11167 compat_bitmap |= PF_COMPATIBLE_FLAGS_CUSTOM_RULES_PRESENT;
11168 goto done;
11169 }
11170 }
11171 }
11172
11173 done:
11174 return compat_bitmap;
11175 }
11176 #endif // SKYWALK
11177