1 /*
2 * Copyright (c) 2007-2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /* $apfw: git commit 6602420f2f101b74305cd78f7cd9e0c8fdedae97 $ */
30 /* $OpenBSD: pf.c,v 1.567 2008/02/20 23:40:13 henning Exp $ */
31
32 /*
33 * Copyright (c) 2001 Daniel Hartmeier
34 * Copyright (c) 2002 - 2013 Henning Brauer
35 * NAT64 - Copyright (c) 2010 Viagenie Inc. (http://www.viagenie.ca)
36 * All rights reserved.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 *
42 * - Redistributions of source code must retain the above copyright
43 * notice, this list of conditions and the following disclaimer.
44 * - Redistributions in binary form must reproduce the above
45 * copyright notice, this list of conditions and the following
46 * disclaimer in the documentation and/or other materials provided
47 * with the distribution.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
50 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
51 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
52 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
53 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
54 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
55 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
56 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
57 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
59 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
60 * POSSIBILITY OF SUCH DAMAGE.
61 *
62 * Effort sponsored in part by the Defense Advanced Research Projects
63 * Agency (DARPA) and Air Force Research Laboratory, Air Force
64 * Materiel Command, USAF, under agreement number F30602-01-2-0537.
65 *
66 */
67
68 #include <machine/endian.h>
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/filio.h>
72 #include <sys/socket.h>
73 #include <sys/socketvar.h>
74 #include <sys/kernel.h>
75 #include <sys/time.h>
76 #include <sys/proc.h>
77 #include <sys/random.h>
78 #include <sys/mcache.h>
79 #include <sys/protosw.h>
80
81 #include <libkern/crypto/md5.h>
82 #include <libkern/libkern.h>
83
84 #include <mach/thread_act.h>
85
86 #include <net/if.h>
87 #include <net/if_types.h>
88 #include <net/bpf.h>
89 #include <net/route.h>
90 #include <net/dlil.h>
91
92 #include <netinet/in.h>
93 #include <netinet/in_var.h>
94 #include <netinet/in_systm.h>
95 #include <netinet/ip.h>
96 #include <netinet/ip_var.h>
97 #include <netinet/tcp.h>
98 #include <netinet/tcp_seq.h>
99 #include <netinet/udp.h>
100 #include <netinet/ip_icmp.h>
101 #include <netinet/in_pcb.h>
102 #include <netinet/tcp_timer.h>
103 #include <netinet/tcp_var.h>
104 #include <netinet/tcp_fsm.h>
105 #include <netinet/udp_var.h>
106 #include <netinet/icmp_var.h>
107 #include <net/if_ether.h>
108 #include <net/ethernet.h>
109 #include <net/flowhash.h>
110 #include <net/nat464_utils.h>
111 #include <net/pfvar.h>
112 #include <net/if_pflog.h>
113
114 #if NPFSYNC
115 #include <net/if_pfsync.h>
116 #endif /* NPFSYNC */
117
118 #include <netinet/ip6.h>
119 #include <netinet6/in6_pcb.h>
120 #include <netinet6/ip6_var.h>
121 #include <netinet/icmp6.h>
122 #include <netinet6/nd6.h>
123
124 #if DUMMYNET
125 #include <netinet/ip_dummynet.h>
126 #endif /* DUMMYNET */
127
128 #if SKYWALK
129 #include <skywalk/namespace/flowidns.h>
130 #endif /* SKYWALK */
131
132 /*
133 * For RandomULong(), to get a 32 bits random value
134 * Note that random() returns a 31 bits value, see rdar://11159750
135 */
136 #include <dev/random/randomdev.h>
137
138 #define DPFPRINTF(n, x) (pf_status.debug >= (n) ? printf x : ((void)0))
139
140 /*
141 * On Mac OS X, the rtableid value is treated as the interface scope
142 * value that is equivalent to the interface index used for scoped
143 * routing. A valid scope value is anything but IFSCOPE_NONE (0),
144 * as per definition of ifindex which is a positive, non-zero number.
145 * The other BSDs treat a negative rtableid value as invalid, hence
146 * the test against INT_MAX to handle userland apps which initialize
147 * the field with a negative number.
148 */
149 #define PF_RTABLEID_IS_VALID(r) \
150 ((r) > IFSCOPE_NONE && (r) <= INT_MAX)
151
152 /*
153 * Global variables
154 */
155 static LCK_GRP_DECLARE(pf_lock_grp, "pf");
156 LCK_MTX_DECLARE(pf_lock, &pf_lock_grp);
157
158 static LCK_GRP_DECLARE(pf_perim_lock_grp, "pf_perim");
159 LCK_RW_DECLARE(pf_perim_lock, &pf_perim_lock_grp);
160
161 /* state tables */
162 struct pf_state_tree_lan_ext pf_statetbl_lan_ext;
163 struct pf_state_tree_ext_gwy pf_statetbl_ext_gwy;
164
165 struct pf_palist pf_pabuf;
166 struct pf_status pf_status;
167
168 u_int32_t ticket_pabuf;
169
170 static MD5_CTX pf_tcp_secret_ctx;
171 static u_char pf_tcp_secret[16];
172 static int pf_tcp_secret_init;
173 static int pf_tcp_iss_off;
174
175 static struct pf_anchor_stackframe {
176 struct pf_ruleset *rs;
177 struct pf_rule *r;
178 struct pf_anchor_node *parent;
179 struct pf_anchor *child;
180 } pf_anchor_stack[64];
181
182 struct pool pf_src_tree_pl, pf_rule_pl, pf_pooladdr_pl;
183 struct pool pf_state_pl, pf_state_key_pl;
184
185 typedef void (*hook_fn_t)(void *);
186
187 struct hook_desc {
188 TAILQ_ENTRY(hook_desc) hd_list;
189 hook_fn_t hd_fn;
190 void *hd_arg;
191 };
192
193 #define HOOK_REMOVE 0x01
194 #define HOOK_FREE 0x02
195 #define HOOK_ABORT 0x04
196
197 static void *hook_establish(struct hook_desc_head *, int,
198 hook_fn_t, void *);
199 static void hook_runloop(struct hook_desc_head *, int flags);
200
201 struct pool pf_app_state_pl;
202 static void pf_print_addr(struct pf_addr *addr, sa_family_t af);
203 static void pf_print_sk_host(struct pf_state_host *, u_int8_t, int,
204 u_int8_t);
205
206 static void pf_print_host(struct pf_addr *, u_int16_t, u_int8_t);
207
208 static void pf_init_threshold(struct pf_threshold *, u_int32_t,
209 u_int32_t);
210 static void pf_add_threshold(struct pf_threshold *);
211 static int pf_check_threshold(struct pf_threshold *);
212
213 static void pf_change_ap(int, pbuf_t *, struct pf_addr *,
214 u_int16_t *, u_int16_t *, u_int16_t *,
215 struct pf_addr *, u_int16_t, u_int8_t, sa_family_t,
216 sa_family_t, int);
217 static int pf_modulate_sack(pbuf_t *, int, struct pf_pdesc *,
218 struct tcphdr *, struct pf_state_peer *);
219 static void pf_change_a6(struct pf_addr *, u_int16_t *,
220 struct pf_addr *, u_int8_t);
221 static void pf_change_addr(struct pf_addr *a, u_int16_t *c, struct pf_addr *an,
222 u_int8_t u, sa_family_t af, sa_family_t afn);
223 static void pf_change_icmp(struct pf_addr *, u_int16_t *,
224 struct pf_addr *, struct pf_addr *, u_int16_t,
225 u_int16_t *, u_int16_t *, u_int16_t *,
226 u_int16_t *, u_int8_t, sa_family_t);
227 static void pf_send_tcp(const struct pf_rule *, sa_family_t,
228 const struct pf_addr *, const struct pf_addr *,
229 u_int16_t, u_int16_t, u_int32_t, u_int32_t,
230 u_int8_t, u_int16_t, u_int16_t, u_int8_t, int,
231 u_int16_t, struct ether_header *, struct ifnet *);
232 static void pf_send_icmp(pbuf_t *, u_int8_t, u_int8_t,
233 sa_family_t, struct pf_rule *);
234 static struct pf_rule *pf_match_translation(struct pf_pdesc *, pbuf_t *,
235 int, int, struct pfi_kif *, struct pf_addr *,
236 union pf_state_xport *, struct pf_addr *,
237 union pf_state_xport *, int);
238 static struct pf_rule *pf_get_translation_aux(struct pf_pdesc *,
239 pbuf_t *, int, int, struct pfi_kif *,
240 struct pf_src_node **, struct pf_addr *,
241 union pf_state_xport *, struct pf_addr *,
242 union pf_state_xport *, union pf_state_xport *
243 #if SKYWALK
244 , netns_token *
245 #endif
246 );
247 static void pf_attach_state(struct pf_state_key *,
248 struct pf_state *, int);
249 static u_int32_t pf_tcp_iss(struct pf_pdesc *);
250 static int pf_test_rule(struct pf_rule **, struct pf_state **,
251 int, struct pfi_kif *, pbuf_t *, int,
252 void *, struct pf_pdesc *, struct pf_rule **,
253 struct pf_ruleset **, struct ifqueue *);
254 #if DUMMYNET
255 static int pf_test_dummynet(struct pf_rule **, int,
256 struct pfi_kif *, pbuf_t **,
257 struct pf_pdesc *, struct ip_fw_args *);
258 #endif /* DUMMYNET */
259 static int pf_test_fragment(struct pf_rule **, int,
260 struct pfi_kif *, pbuf_t *, void *,
261 struct pf_pdesc *, struct pf_rule **,
262 struct pf_ruleset **);
263 static int pf_test_state_tcp(struct pf_state **, int,
264 struct pfi_kif *, pbuf_t *, int,
265 void *, struct pf_pdesc *, u_short *);
266 static int pf_test_state_udp(struct pf_state **, int,
267 struct pfi_kif *, pbuf_t *, int,
268 void *, struct pf_pdesc *, u_short *);
269 static int pf_test_state_icmp(struct pf_state **, int,
270 struct pfi_kif *, pbuf_t *, int,
271 void *, struct pf_pdesc *, u_short *);
272 static int pf_test_state_other(struct pf_state **, int,
273 struct pfi_kif *, struct pf_pdesc *);
274 static int pf_match_tag(struct pf_rule *,
275 struct pf_mtag *, int *);
276 static void pf_hash(struct pf_addr *, struct pf_addr *,
277 struct pf_poolhashkey *, sa_family_t);
278 static int pf_map_addr(u_int8_t, struct pf_rule *,
279 struct pf_addr *, struct pf_addr *,
280 struct pf_addr *, struct pf_src_node **);
281 static int pf_get_sport(struct pf_pdesc *, struct pfi_kif *,
282 struct pf_rule *, struct pf_addr *,
283 union pf_state_xport *, struct pf_addr *,
284 union pf_state_xport *, struct pf_addr *,
285 union pf_state_xport *, struct pf_src_node **
286 #if SKYWALK
287 , netns_token *
288 #endif
289 );
290 static void pf_route(pbuf_t **, struct pf_rule *, int,
291 struct ifnet *, struct pf_state *,
292 struct pf_pdesc *);
293 static void pf_route6(pbuf_t **, struct pf_rule *, int,
294 struct ifnet *, struct pf_state *,
295 struct pf_pdesc *);
296 static u_int8_t pf_get_wscale(pbuf_t *, int, u_int16_t,
297 sa_family_t);
298 static u_int16_t pf_get_mss(pbuf_t *, int, u_int16_t,
299 sa_family_t);
300 static u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t,
301 u_int16_t);
302 static void pf_set_rt_ifp(struct pf_state *,
303 struct pf_addr *, sa_family_t af);
304 static int pf_check_proto_cksum(pbuf_t *, int, int,
305 u_int8_t, sa_family_t);
306 static int pf_addr_wrap_neq(struct pf_addr_wrap *,
307 struct pf_addr_wrap *);
308 static struct pf_state *pf_find_state(struct pfi_kif *,
309 struct pf_state_key_cmp *, u_int);
310 static int pf_src_connlimit(struct pf_state **);
311 static void pf_stateins_err(const char *, struct pf_state *,
312 struct pfi_kif *);
313 static int pf_check_congestion(struct ifqueue *);
314
315 #if 0
316 static const char *pf_pptp_ctrl_type_name(u_int16_t code);
317 #endif
318 static void pf_pptp_handler(struct pf_state *, int, int,
319 struct pf_pdesc *, struct pfi_kif *);
320 static void pf_pptp_unlink(struct pf_state *);
321 static void pf_grev1_unlink(struct pf_state *);
322 static int pf_test_state_grev1(struct pf_state **, int,
323 struct pfi_kif *, int, struct pf_pdesc *);
324 static int pf_ike_compare(struct pf_app_state *,
325 struct pf_app_state *);
326 static int pf_test_state_esp(struct pf_state **, int,
327 struct pfi_kif *, int, struct pf_pdesc *);
328 static int pf_test6(int, struct ifnet *, pbuf_t **, struct ether_header *,
329 struct ip_fw_args *);
330 #if INET
331 static int pf_test(int, struct ifnet *, pbuf_t **,
332 struct ether_header *, struct ip_fw_args *);
333 #endif /* INET */
334
335
336 extern struct pool pfr_ktable_pl;
337 extern struct pool pfr_kentry_pl;
338 extern int path_mtu_discovery;
339
340 struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = {
341 { .pp = &pf_state_pl, .limit = PFSTATE_HIWAT },
342 { .pp = &pf_app_state_pl, .limit = PFAPPSTATE_HIWAT },
343 { .pp = &pf_src_tree_pl, .limit = PFSNODE_HIWAT },
344 { .pp = &pf_frent_pl, .limit = PFFRAG_FRENT_HIWAT },
345 { .pp = &pfr_ktable_pl, .limit = PFR_KTABLE_HIWAT },
346 { .pp = &pfr_kentry_pl, .limit = PFR_KENTRY_HIWAT },
347 };
348
349 #if SKYWALK && defined(XNU_TARGET_OS_OSX)
350 const char *compatible_anchors[] = {
351 "com.apple.internet-sharing",
352 "com.apple/250.ApplicationFirewall",
353 "com.apple/200.AirDrop"
354 };
355 #endif // SKYWALK && defined(XNU_TARGET_OS_OSX)
356
357 void *
pf_lazy_makewritable(struct pf_pdesc * pd,pbuf_t * pbuf,int len)358 pf_lazy_makewritable(struct pf_pdesc *pd, pbuf_t *pbuf, int len)
359 {
360 void *p;
361
362 if (pd->lmw < 0) {
363 return NULL;
364 }
365
366 VERIFY(pbuf == pd->mp);
367
368 p = pbuf->pb_data;
369 if (len > pd->lmw) {
370 if ((p = pbuf_ensure_writable(pbuf, len)) == NULL) {
371 len = -1;
372 }
373 pd->lmw = len;
374 if (len >= 0) {
375 pd->pf_mtag = pf_find_mtag_pbuf(pbuf);
376
377 switch (pd->af) {
378 case AF_INET: {
379 struct ip *h = p;
380 pd->src = (struct pf_addr *)(uintptr_t)&h->ip_src;
381 pd->dst = (struct pf_addr *)(uintptr_t)&h->ip_dst;
382 pd->ip_sum = &h->ip_sum;
383 break;
384 }
385 case AF_INET6: {
386 struct ip6_hdr *h = p;
387 pd->src = (struct pf_addr *)(uintptr_t)&h->ip6_src;
388 pd->dst = (struct pf_addr *)(uintptr_t)&h->ip6_dst;
389 break;
390 }
391 }
392 }
393 }
394
395 return len < 0 ? NULL : p;
396 }
397
398 static const int *
pf_state_lookup_aux(struct pf_state ** state,struct pfi_kif * kif,int direction,int * action)399 pf_state_lookup_aux(struct pf_state **state, struct pfi_kif *kif,
400 int direction, int *action)
401 {
402 if (*state == NULL || (*state)->timeout == PFTM_PURGE) {
403 *action = PF_DROP;
404 return action;
405 }
406
407 if (direction == PF_OUT &&
408 (((*state)->rule.ptr->rt == PF_ROUTETO &&
409 (*state)->rule.ptr->direction == PF_OUT) ||
410 ((*state)->rule.ptr->rt == PF_REPLYTO &&
411 (*state)->rule.ptr->direction == PF_IN)) &&
412 (*state)->rt_kif != NULL && (*state)->rt_kif != kif) {
413 *action = PF_PASS;
414 return action;
415 }
416
417 return 0;
418 }
419
420 #define STATE_LOOKUP() \
421 do { \
422 int action; \
423 *state = pf_find_state(kif, &key, direction); \
424 if (*state != NULL && pd != NULL && \
425 !(pd->pktflags & PKTF_FLOW_ID)) { \
426 pd->flowsrc = (*state)->state_key->flowsrc; \
427 pd->flowhash = (*state)->state_key->flowhash; \
428 if (pd->flowhash != 0) { \
429 pd->pktflags |= PKTF_FLOW_ID; \
430 pd->pktflags &= ~PKTF_FLOW_ADV; \
431 } \
432 } \
433 if (pf_state_lookup_aux(state, kif, direction, &action)) \
434 return (action); \
435 } while (0)
436
437 /*
438 * This macro resets the flowID information in a packet descriptor which was
439 * copied in from a PF state. This should be used after a protocol state lookup
440 * finds a matching PF state, but then decides to not use it for various
441 * reasons.
442 */
443 #define PD_CLEAR_STATE_FLOWID(_pd) \
444 do { \
445 if (__improbable(((_pd)->pktflags & PKTF_FLOW_ID) && \
446 ((_pd)->flowsrc == FLOWSRC_PF))) { \
447 (_pd)->flowhash = 0; \
448 (_pd)->flowsrc = 0; \
449 (_pd)->pktflags &= ~PKTF_FLOW_ID; \
450 } \
451 \
452 } while (0)
453
454 #define STATE_ADDR_TRANSLATE(sk) \
455 (sk)->lan.addr.addr32[0] != (sk)->gwy.addr.addr32[0] || \
456 ((sk)->af_lan == AF_INET6 && \
457 ((sk)->lan.addr.addr32[1] != (sk)->gwy.addr.addr32[1] || \
458 (sk)->lan.addr.addr32[2] != (sk)->gwy.addr.addr32[2] || \
459 (sk)->lan.addr.addr32[3] != (sk)->gwy.addr.addr32[3]))
460
461 #define STATE_TRANSLATE(sk) \
462 ((sk)->af_lan != (sk)->af_gwy || \
463 STATE_ADDR_TRANSLATE(sk) || \
464 (sk)->lan.xport.port != (sk)->gwy.xport.port)
465
466 #define STATE_GRE_TRANSLATE(sk) \
467 (STATE_ADDR_TRANSLATE(sk) || \
468 (sk)->lan.xport.call_id != (sk)->gwy.xport.call_id)
469
470 #define BOUND_IFACE(r, k) \
471 ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all
472
473 #define STATE_INC_COUNTERS(s) \
474 do { \
475 s->rule.ptr->states++; \
476 VERIFY(s->rule.ptr->states != 0); \
477 if (s->anchor.ptr != NULL) { \
478 s->anchor.ptr->states++; \
479 VERIFY(s->anchor.ptr->states != 0); \
480 } \
481 if (s->nat_rule.ptr != NULL) { \
482 s->nat_rule.ptr->states++; \
483 VERIFY(s->nat_rule.ptr->states != 0); \
484 } \
485 } while (0)
486
487 #define STATE_DEC_COUNTERS(s) \
488 do { \
489 if (s->nat_rule.ptr != NULL) { \
490 VERIFY(s->nat_rule.ptr->states > 0); \
491 s->nat_rule.ptr->states--; \
492 } \
493 if (s->anchor.ptr != NULL) { \
494 VERIFY(s->anchor.ptr->states > 0); \
495 s->anchor.ptr->states--; \
496 } \
497 VERIFY(s->rule.ptr->states > 0); \
498 s->rule.ptr->states--; \
499 } while (0)
500
501 static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *);
502 static __inline int pf_state_compare_lan_ext(struct pf_state_key *,
503 struct pf_state_key *);
504 static __inline int pf_state_compare_ext_gwy(struct pf_state_key *,
505 struct pf_state_key *);
506 static __inline int pf_state_compare_id(struct pf_state *,
507 struct pf_state *);
508
509 struct pf_src_tree tree_src_tracking;
510
511 struct pf_state_tree_id tree_id;
512 struct pf_state_queue state_list;
513
514 RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare);
515 RB_GENERATE(pf_state_tree_lan_ext, pf_state_key,
516 entry_lan_ext, pf_state_compare_lan_ext);
517 RB_GENERATE(pf_state_tree_ext_gwy, pf_state_key,
518 entry_ext_gwy, pf_state_compare_ext_gwy);
519 RB_GENERATE(pf_state_tree_id, pf_state,
520 entry_id, pf_state_compare_id);
521
522 #define PF_DT_SKIP_LANEXT 0x01
523 #define PF_DT_SKIP_EXTGWY 0x02
524
525 static const u_int16_t PF_PPTP_PORT = 1723;
526 static const u_int32_t PF_PPTP_MAGIC_NUMBER = 0x1A2B3C4D;
527
528 struct pf_pptp_hdr {
529 u_int16_t length;
530 u_int16_t type;
531 u_int32_t magic;
532 };
533
534 struct pf_pptp_ctrl_hdr {
535 u_int16_t type;
536 u_int16_t reserved_0;
537 };
538
539 struct pf_pptp_ctrl_generic {
540 u_int16_t data[0];
541 };
542
543 #define PF_PPTP_CTRL_TYPE_START_REQ 1
544 struct pf_pptp_ctrl_start_req {
545 u_int16_t protocol_version;
546 u_int16_t reserved_1;
547 u_int32_t framing_capabilities;
548 u_int32_t bearer_capabilities;
549 u_int16_t maximum_channels;
550 u_int16_t firmware_revision;
551 u_int8_t host_name[64];
552 u_int8_t vendor_string[64];
553 };
554
555 #define PF_PPTP_CTRL_TYPE_START_RPY 2
556 struct pf_pptp_ctrl_start_rpy {
557 u_int16_t protocol_version;
558 u_int8_t result_code;
559 u_int8_t error_code;
560 u_int32_t framing_capabilities;
561 u_int32_t bearer_capabilities;
562 u_int16_t maximum_channels;
563 u_int16_t firmware_revision;
564 u_int8_t host_name[64];
565 u_int8_t vendor_string[64];
566 };
567
568 #define PF_PPTP_CTRL_TYPE_STOP_REQ 3
569 struct pf_pptp_ctrl_stop_req {
570 u_int8_t reason;
571 u_int8_t reserved_1;
572 u_int16_t reserved_2;
573 };
574
575 #define PF_PPTP_CTRL_TYPE_STOP_RPY 4
576 struct pf_pptp_ctrl_stop_rpy {
577 u_int8_t reason;
578 u_int8_t error_code;
579 u_int16_t reserved_1;
580 };
581
582 #define PF_PPTP_CTRL_TYPE_ECHO_REQ 5
583 struct pf_pptp_ctrl_echo_req {
584 u_int32_t identifier;
585 };
586
587 #define PF_PPTP_CTRL_TYPE_ECHO_RPY 6
588 struct pf_pptp_ctrl_echo_rpy {
589 u_int32_t identifier;
590 u_int8_t result_code;
591 u_int8_t error_code;
592 u_int16_t reserved_1;
593 };
594
595 #define PF_PPTP_CTRL_TYPE_CALL_OUT_REQ 7
596 struct pf_pptp_ctrl_call_out_req {
597 u_int16_t call_id;
598 u_int16_t call_sernum;
599 u_int32_t min_bps;
600 u_int32_t bearer_type;
601 u_int32_t framing_type;
602 u_int16_t rxwindow_size;
603 u_int16_t proc_delay;
604 u_int8_t phone_num[64];
605 u_int8_t sub_addr[64];
606 };
607
608 #define PF_PPTP_CTRL_TYPE_CALL_OUT_RPY 8
609 struct pf_pptp_ctrl_call_out_rpy {
610 u_int16_t call_id;
611 u_int16_t peer_call_id;
612 u_int8_t result_code;
613 u_int8_t error_code;
614 u_int16_t cause_code;
615 u_int32_t connect_speed;
616 u_int16_t rxwindow_size;
617 u_int16_t proc_delay;
618 u_int32_t phy_channel_id;
619 };
620
621 #define PF_PPTP_CTRL_TYPE_CALL_IN_1ST 9
622 struct pf_pptp_ctrl_call_in_1st {
623 u_int16_t call_id;
624 u_int16_t call_sernum;
625 u_int32_t bearer_type;
626 u_int32_t phy_channel_id;
627 u_int16_t dialed_number_len;
628 u_int16_t dialing_number_len;
629 u_int8_t dialed_num[64];
630 u_int8_t dialing_num[64];
631 u_int8_t sub_addr[64];
632 };
633
634 #define PF_PPTP_CTRL_TYPE_CALL_IN_2ND 10
635 struct pf_pptp_ctrl_call_in_2nd {
636 u_int16_t call_id;
637 u_int16_t peer_call_id;
638 u_int8_t result_code;
639 u_int8_t error_code;
640 u_int16_t rxwindow_size;
641 u_int16_t txdelay;
642 u_int16_t reserved_1;
643 };
644
645 #define PF_PPTP_CTRL_TYPE_CALL_IN_3RD 11
646 struct pf_pptp_ctrl_call_in_3rd {
647 u_int16_t call_id;
648 u_int16_t reserved_1;
649 u_int32_t connect_speed;
650 u_int16_t rxwindow_size;
651 u_int16_t txdelay;
652 u_int32_t framing_type;
653 };
654
655 #define PF_PPTP_CTRL_TYPE_CALL_CLR 12
656 struct pf_pptp_ctrl_call_clr {
657 u_int16_t call_id;
658 u_int16_t reserved_1;
659 };
660
661 #define PF_PPTP_CTRL_TYPE_CALL_DISC 13
662 struct pf_pptp_ctrl_call_disc {
663 u_int16_t call_id;
664 u_int8_t result_code;
665 u_int8_t error_code;
666 u_int16_t cause_code;
667 u_int16_t reserved_1;
668 u_int8_t statistics[128];
669 };
670
671 #define PF_PPTP_CTRL_TYPE_ERROR 14
672 struct pf_pptp_ctrl_error {
673 u_int16_t peer_call_id;
674 u_int16_t reserved_1;
675 u_int32_t crc_errors;
676 u_int32_t fr_errors;
677 u_int32_t hw_errors;
678 u_int32_t buf_errors;
679 u_int32_t tim_errors;
680 u_int32_t align_errors;
681 };
682
683 #define PF_PPTP_CTRL_TYPE_SET_LINKINFO 15
684 struct pf_pptp_ctrl_set_linkinfo {
685 u_int16_t peer_call_id;
686 u_int16_t reserved_1;
687 u_int32_t tx_accm;
688 u_int32_t rx_accm;
689 };
690
691 static const size_t PF_PPTP_CTRL_MSG_MINSIZE =
692 sizeof(struct pf_pptp_hdr) + sizeof(struct pf_pptp_ctrl_hdr);
693
694 union pf_pptp_ctrl_msg_union {
695 struct pf_pptp_ctrl_start_req start_req;
696 struct pf_pptp_ctrl_start_rpy start_rpy;
697 struct pf_pptp_ctrl_stop_req stop_req;
698 struct pf_pptp_ctrl_stop_rpy stop_rpy;
699 struct pf_pptp_ctrl_echo_req echo_req;
700 struct pf_pptp_ctrl_echo_rpy echo_rpy;
701 struct pf_pptp_ctrl_call_out_req call_out_req;
702 struct pf_pptp_ctrl_call_out_rpy call_out_rpy;
703 struct pf_pptp_ctrl_call_in_1st call_in_1st;
704 struct pf_pptp_ctrl_call_in_2nd call_in_2nd;
705 struct pf_pptp_ctrl_call_in_3rd call_in_3rd;
706 struct pf_pptp_ctrl_call_clr call_clr;
707 struct pf_pptp_ctrl_call_disc call_disc;
708 struct pf_pptp_ctrl_error error;
709 struct pf_pptp_ctrl_set_linkinfo set_linkinfo;
710 u_int8_t data[0];
711 };
712
713 struct pf_pptp_ctrl_msg {
714 struct pf_pptp_hdr hdr;
715 struct pf_pptp_ctrl_hdr ctrl;
716 union pf_pptp_ctrl_msg_union msg;
717 };
718
719 #define PF_GRE_FLAG_CHECKSUM_PRESENT 0x8000
720 #define PF_GRE_FLAG_VERSION_MASK 0x0007
721 #define PF_GRE_PPP_ETHERTYPE 0x880B
722
723 struct pf_grev1_hdr {
724 u_int16_t flags;
725 u_int16_t protocol_type;
726 u_int16_t payload_length;
727 u_int16_t call_id;
728 /*
729 * u_int32_t seqno;
730 * u_int32_t ackno;
731 */
732 };
733
734 static const u_int16_t PF_IKE_PORT = 500;
735
736 struct pf_ike_hdr {
737 u_int64_t initiator_cookie, responder_cookie;
738 u_int8_t next_payload, version, exchange_type, flags;
739 u_int32_t message_id, length;
740 };
741
742 #define PF_IKE_PACKET_MINSIZE (sizeof (struct pf_ike_hdr))
743
744 #define PF_IKEv1_EXCHTYPE_BASE 1
745 #define PF_IKEv1_EXCHTYPE_ID_PROTECT 2
746 #define PF_IKEv1_EXCHTYPE_AUTH_ONLY 3
747 #define PF_IKEv1_EXCHTYPE_AGGRESSIVE 4
748 #define PF_IKEv1_EXCHTYPE_INFORMATIONAL 5
749 #define PF_IKEv2_EXCHTYPE_SA_INIT 34
750 #define PF_IKEv2_EXCHTYPE_AUTH 35
751 #define PF_IKEv2_EXCHTYPE_CREATE_CHILD_SA 36
752 #define PF_IKEv2_EXCHTYPE_INFORMATIONAL 37
753
754 #define PF_IKEv1_FLAG_E 0x01
755 #define PF_IKEv1_FLAG_C 0x02
756 #define PF_IKEv1_FLAG_A 0x04
757 #define PF_IKEv2_FLAG_I 0x08
758 #define PF_IKEv2_FLAG_V 0x10
759 #define PF_IKEv2_FLAG_R 0x20
760
761 struct pf_esp_hdr {
762 u_int32_t spi;
763 u_int32_t seqno;
764 u_int8_t payload[];
765 };
766
767 static __inline int
pf_addr_compare(struct pf_addr * a,struct pf_addr * b,sa_family_t af)768 pf_addr_compare(struct pf_addr *a, struct pf_addr *b, sa_family_t af)
769 {
770 switch (af) {
771 #ifdef INET
772 case AF_INET:
773 if (a->addr32[0] > b->addr32[0]) {
774 return 1;
775 }
776 if (a->addr32[0] < b->addr32[0]) {
777 return -1;
778 }
779 break;
780 #endif /* INET */
781 case AF_INET6:
782 if (a->addr32[3] > b->addr32[3]) {
783 return 1;
784 }
785 if (a->addr32[3] < b->addr32[3]) {
786 return -1;
787 }
788 if (a->addr32[2] > b->addr32[2]) {
789 return 1;
790 }
791 if (a->addr32[2] < b->addr32[2]) {
792 return -1;
793 }
794 if (a->addr32[1] > b->addr32[1]) {
795 return 1;
796 }
797 if (a->addr32[1] < b->addr32[1]) {
798 return -1;
799 }
800 if (a->addr32[0] > b->addr32[0]) {
801 return 1;
802 }
803 if (a->addr32[0] < b->addr32[0]) {
804 return -1;
805 }
806 break;
807 }
808 return 0;
809 }
810
811 static __inline int
pf_src_compare(struct pf_src_node * a,struct pf_src_node * b)812 pf_src_compare(struct pf_src_node *a, struct pf_src_node *b)
813 {
814 int diff;
815
816 if (a->rule.ptr > b->rule.ptr) {
817 return 1;
818 }
819 if (a->rule.ptr < b->rule.ptr) {
820 return -1;
821 }
822 if ((diff = a->af - b->af) != 0) {
823 return diff;
824 }
825 if ((diff = pf_addr_compare(&a->addr, &b->addr, a->af)) != 0) {
826 return diff;
827 }
828 return 0;
829 }
830
831 static __inline int
pf_state_compare_lan_ext(struct pf_state_key * a,struct pf_state_key * b)832 pf_state_compare_lan_ext(struct pf_state_key *a, struct pf_state_key *b)
833 {
834 int diff;
835 int extfilter;
836
837 if ((diff = a->proto - b->proto) != 0) {
838 return diff;
839 }
840 if ((diff = a->af_lan - b->af_lan) != 0) {
841 return diff;
842 }
843
844 extfilter = PF_EXTFILTER_APD;
845
846 switch (a->proto) {
847 case IPPROTO_ICMP:
848 case IPPROTO_ICMPV6:
849 if ((diff = a->lan.xport.port - b->lan.xport.port) != 0) {
850 return diff;
851 }
852 break;
853
854 case IPPROTO_TCP:
855 if ((diff = a->lan.xport.port - b->lan.xport.port) != 0) {
856 return diff;
857 }
858 if ((diff = a->ext_lan.xport.port - b->ext_lan.xport.port) != 0) {
859 return diff;
860 }
861 break;
862
863 case IPPROTO_UDP:
864 if ((diff = a->proto_variant - b->proto_variant)) {
865 return diff;
866 }
867 extfilter = a->proto_variant;
868 if ((diff = a->lan.xport.port - b->lan.xport.port) != 0) {
869 return diff;
870 }
871 if ((extfilter < PF_EXTFILTER_AD) &&
872 (diff = a->ext_lan.xport.port - b->ext_lan.xport.port) != 0) {
873 return diff;
874 }
875 break;
876
877 case IPPROTO_GRE:
878 if (a->proto_variant == PF_GRE_PPTP_VARIANT &&
879 a->proto_variant == b->proto_variant) {
880 if (!!(diff = a->ext_lan.xport.call_id -
881 b->ext_lan.xport.call_id)) {
882 return diff;
883 }
884 }
885 break;
886
887 case IPPROTO_ESP:
888 if (!!(diff = a->ext_lan.xport.spi - b->ext_lan.xport.spi)) {
889 return diff;
890 }
891 break;
892
893 default:
894 break;
895 }
896
897 switch (a->af_lan) {
898 #if INET
899 case AF_INET:
900 if ((diff = pf_addr_compare(&a->lan.addr, &b->lan.addr,
901 a->af_lan)) != 0) {
902 return diff;
903 }
904
905 if (extfilter < PF_EXTFILTER_EI) {
906 if ((diff = pf_addr_compare(&a->ext_lan.addr,
907 &b->ext_lan.addr,
908 a->af_lan)) != 0) {
909 return diff;
910 }
911 }
912 break;
913 #endif /* INET */
914 case AF_INET6:
915 if ((diff = pf_addr_compare(&a->lan.addr, &b->lan.addr,
916 a->af_lan)) != 0) {
917 return diff;
918 }
919
920 if (extfilter < PF_EXTFILTER_EI ||
921 !PF_AZERO(&b->ext_lan.addr, AF_INET6)) {
922 if ((diff = pf_addr_compare(&a->ext_lan.addr,
923 &b->ext_lan.addr,
924 a->af_lan)) != 0) {
925 return diff;
926 }
927 }
928 break;
929 }
930
931 if (a->app_state && b->app_state) {
932 if (a->app_state->compare_lan_ext &&
933 b->app_state->compare_lan_ext) {
934 diff = (const char *)b->app_state->compare_lan_ext -
935 (const char *)a->app_state->compare_lan_ext;
936 if (diff != 0) {
937 return diff;
938 }
939 diff = a->app_state->compare_lan_ext(a->app_state,
940 b->app_state);
941 if (diff != 0) {
942 return diff;
943 }
944 }
945 }
946
947 return 0;
948 }
949
950 static __inline int
pf_state_compare_ext_gwy(struct pf_state_key * a,struct pf_state_key * b)951 pf_state_compare_ext_gwy(struct pf_state_key *a, struct pf_state_key *b)
952 {
953 int diff;
954 int extfilter;
955
956 if ((diff = a->proto - b->proto) != 0) {
957 return diff;
958 }
959
960 if ((diff = a->af_gwy - b->af_gwy) != 0) {
961 return diff;
962 }
963
964 extfilter = PF_EXTFILTER_APD;
965
966 switch (a->proto) {
967 case IPPROTO_ICMP:
968 case IPPROTO_ICMPV6:
969 if ((diff = a->gwy.xport.port - b->gwy.xport.port) != 0) {
970 return diff;
971 }
972 break;
973
974 case IPPROTO_TCP:
975 if ((diff = a->ext_gwy.xport.port - b->ext_gwy.xport.port) != 0) {
976 return diff;
977 }
978 if ((diff = a->gwy.xport.port - b->gwy.xport.port) != 0) {
979 return diff;
980 }
981 break;
982
983 case IPPROTO_UDP:
984 if ((diff = a->proto_variant - b->proto_variant)) {
985 return diff;
986 }
987 extfilter = a->proto_variant;
988 if ((diff = a->gwy.xport.port - b->gwy.xport.port) != 0) {
989 return diff;
990 }
991 if ((extfilter < PF_EXTFILTER_AD) &&
992 (diff = a->ext_gwy.xport.port - b->ext_gwy.xport.port) != 0) {
993 return diff;
994 }
995 break;
996
997 case IPPROTO_GRE:
998 if (a->proto_variant == PF_GRE_PPTP_VARIANT &&
999 a->proto_variant == b->proto_variant) {
1000 if (!!(diff = a->gwy.xport.call_id -
1001 b->gwy.xport.call_id)) {
1002 return diff;
1003 }
1004 }
1005 break;
1006
1007 case IPPROTO_ESP:
1008 if (!!(diff = a->gwy.xport.spi - b->gwy.xport.spi)) {
1009 return diff;
1010 }
1011 break;
1012
1013 default:
1014 break;
1015 }
1016
1017 switch (a->af_gwy) {
1018 #if INET
1019 case AF_INET:
1020 if ((diff = pf_addr_compare(&a->gwy.addr, &b->gwy.addr,
1021 a->af_gwy)) != 0) {
1022 return diff;
1023 }
1024
1025 if (extfilter < PF_EXTFILTER_EI) {
1026 if ((diff = pf_addr_compare(&a->ext_gwy.addr, &b->ext_gwy.addr,
1027 a->af_gwy)) != 0) {
1028 return diff;
1029 }
1030 }
1031 break;
1032 #endif /* INET */
1033 case AF_INET6:
1034 if ((diff = pf_addr_compare(&a->gwy.addr, &b->gwy.addr,
1035 a->af_gwy)) != 0) {
1036 return diff;
1037 }
1038
1039 if (extfilter < PF_EXTFILTER_EI ||
1040 !PF_AZERO(&b->ext_gwy.addr, AF_INET6)) {
1041 if ((diff = pf_addr_compare(&a->ext_gwy.addr, &b->ext_gwy.addr,
1042 a->af_gwy)) != 0) {
1043 return diff;
1044 }
1045 }
1046 break;
1047 }
1048
1049 if (a->app_state && b->app_state) {
1050 if (a->app_state->compare_ext_gwy &&
1051 b->app_state->compare_ext_gwy) {
1052 diff = (const char *)b->app_state->compare_ext_gwy -
1053 (const char *)a->app_state->compare_ext_gwy;
1054 if (diff != 0) {
1055 return diff;
1056 }
1057 diff = a->app_state->compare_ext_gwy(a->app_state,
1058 b->app_state);
1059 if (diff != 0) {
1060 return diff;
1061 }
1062 }
1063 }
1064
1065 return 0;
1066 }
1067
1068 static __inline int
pf_state_compare_id(struct pf_state * a,struct pf_state * b)1069 pf_state_compare_id(struct pf_state *a, struct pf_state *b)
1070 {
1071 if (a->id > b->id) {
1072 return 1;
1073 }
1074 if (a->id < b->id) {
1075 return -1;
1076 }
1077 if (a->creatorid > b->creatorid) {
1078 return 1;
1079 }
1080 if (a->creatorid < b->creatorid) {
1081 return -1;
1082 }
1083
1084 return 0;
1085 }
1086
1087 void
pf_addrcpy(struct pf_addr * dst,struct pf_addr * src,sa_family_t af)1088 pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af)
1089 {
1090 switch (af) {
1091 #if INET
1092 case AF_INET:
1093 dst->addr32[0] = src->addr32[0];
1094 break;
1095 #endif /* INET */
1096 case AF_INET6:
1097 dst->addr32[0] = src->addr32[0];
1098 dst->addr32[1] = src->addr32[1];
1099 dst->addr32[2] = src->addr32[2];
1100 dst->addr32[3] = src->addr32[3];
1101 break;
1102 }
1103 }
1104
1105 struct pf_state *
pf_find_state_byid(struct pf_state_cmp * key)1106 pf_find_state_byid(struct pf_state_cmp *key)
1107 {
1108 pf_status.fcounters[FCNT_STATE_SEARCH]++;
1109
1110 return RB_FIND(pf_state_tree_id, &tree_id,
1111 (struct pf_state *)(void *)key);
1112 }
1113
1114 static struct pf_state *
pf_find_state(struct pfi_kif * kif,struct pf_state_key_cmp * key,u_int dir)1115 pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir)
1116 {
1117 struct pf_state_key *sk = NULL;
1118 struct pf_state *s;
1119
1120 pf_status.fcounters[FCNT_STATE_SEARCH]++;
1121
1122 switch (dir) {
1123 case PF_OUT:
1124 sk = RB_FIND(pf_state_tree_lan_ext, &pf_statetbl_lan_ext,
1125 (struct pf_state_key *)key);
1126 break;
1127 case PF_IN:
1128 sk = RB_FIND(pf_state_tree_ext_gwy, &pf_statetbl_ext_gwy,
1129 (struct pf_state_key *)key);
1130 /*
1131 * NAT64 is done only on input, for packets coming in from
1132 * from the LAN side, need to lookup the lan_ext tree.
1133 */
1134 if (sk == NULL) {
1135 sk = RB_FIND(pf_state_tree_lan_ext,
1136 &pf_statetbl_lan_ext,
1137 (struct pf_state_key *)key);
1138 if (sk && sk->af_lan == sk->af_gwy) {
1139 sk = NULL;
1140 }
1141 }
1142 break;
1143 default:
1144 panic("pf_find_state");
1145 }
1146
1147 /* list is sorted, if-bound states before floating ones */
1148 if (sk != NULL) {
1149 TAILQ_FOREACH(s, &sk->states, next)
1150 if (s->kif == pfi_all || s->kif == kif) {
1151 return s;
1152 }
1153 }
1154
1155 return NULL;
1156 }
1157
1158 struct pf_state *
pf_find_state_all(struct pf_state_key_cmp * key,u_int dir,int * more)1159 pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more)
1160 {
1161 struct pf_state_key *sk = NULL;
1162 struct pf_state *s, *ret = NULL;
1163
1164 pf_status.fcounters[FCNT_STATE_SEARCH]++;
1165
1166 switch (dir) {
1167 case PF_OUT:
1168 sk = RB_FIND(pf_state_tree_lan_ext,
1169 &pf_statetbl_lan_ext, (struct pf_state_key *)key);
1170 break;
1171 case PF_IN:
1172 sk = RB_FIND(pf_state_tree_ext_gwy,
1173 &pf_statetbl_ext_gwy, (struct pf_state_key *)key);
1174 /*
1175 * NAT64 is done only on input, for packets coming in from
1176 * from the LAN side, need to lookup the lan_ext tree.
1177 */
1178 if ((sk == NULL) && pf_nat64_configured) {
1179 sk = RB_FIND(pf_state_tree_lan_ext,
1180 &pf_statetbl_lan_ext,
1181 (struct pf_state_key *)key);
1182 if (sk && sk->af_lan == sk->af_gwy) {
1183 sk = NULL;
1184 }
1185 }
1186 break;
1187 default:
1188 panic("pf_find_state_all");
1189 }
1190
1191 if (sk != NULL) {
1192 ret = TAILQ_FIRST(&sk->states);
1193 if (more == NULL) {
1194 return ret;
1195 }
1196
1197 TAILQ_FOREACH(s, &sk->states, next)
1198 (*more)++;
1199 }
1200
1201 return ret;
1202 }
1203
1204 static void
pf_init_threshold(struct pf_threshold * threshold,u_int32_t limit,u_int32_t seconds)1205 pf_init_threshold(struct pf_threshold *threshold,
1206 u_int32_t limit, u_int32_t seconds)
1207 {
1208 threshold->limit = limit * PF_THRESHOLD_MULT;
1209 threshold->seconds = seconds;
1210 threshold->count = 0;
1211 threshold->last = pf_time_second();
1212 }
1213
1214 static void
pf_add_threshold(struct pf_threshold * threshold)1215 pf_add_threshold(struct pf_threshold *threshold)
1216 {
1217 u_int32_t t = pf_time_second(), diff = t - threshold->last;
1218
1219 if (diff >= threshold->seconds) {
1220 threshold->count = 0;
1221 } else {
1222 threshold->count -= threshold->count * diff /
1223 threshold->seconds;
1224 }
1225 threshold->count += PF_THRESHOLD_MULT;
1226 threshold->last = t;
1227 }
1228
1229 static int
pf_check_threshold(struct pf_threshold * threshold)1230 pf_check_threshold(struct pf_threshold *threshold)
1231 {
1232 return threshold->count > threshold->limit;
1233 }
1234
1235 static int
pf_src_connlimit(struct pf_state ** state)1236 pf_src_connlimit(struct pf_state **state)
1237 {
1238 int bad = 0;
1239 (*state)->src_node->conn++;
1240 VERIFY((*state)->src_node->conn != 0);
1241 (*state)->src.tcp_est = 1;
1242 pf_add_threshold(&(*state)->src_node->conn_rate);
1243
1244 if ((*state)->rule.ptr->max_src_conn &&
1245 (*state)->rule.ptr->max_src_conn <
1246 (*state)->src_node->conn) {
1247 pf_status.lcounters[LCNT_SRCCONN]++;
1248 bad++;
1249 }
1250
1251 if ((*state)->rule.ptr->max_src_conn_rate.limit &&
1252 pf_check_threshold(&(*state)->src_node->conn_rate)) {
1253 pf_status.lcounters[LCNT_SRCCONNRATE]++;
1254 bad++;
1255 }
1256
1257 if (!bad) {
1258 return 0;
1259 }
1260
1261 if ((*state)->rule.ptr->overload_tbl) {
1262 struct pfr_addr p;
1263 u_int32_t killed = 0;
1264
1265 pf_status.lcounters[LCNT_OVERLOAD_TABLE]++;
1266 if (pf_status.debug >= PF_DEBUG_MISC) {
1267 printf("pf_src_connlimit: blocking address ");
1268 pf_print_host(&(*state)->src_node->addr, 0,
1269 (*state)->state_key->af_lan);
1270 }
1271
1272 bzero(&p, sizeof(p));
1273 p.pfra_af = (*state)->state_key->af_lan;
1274 switch ((*state)->state_key->af_lan) {
1275 #if INET
1276 case AF_INET:
1277 p.pfra_net = 32;
1278 p.pfra_ip4addr = (*state)->src_node->addr.v4addr;
1279 break;
1280 #endif /* INET */
1281 case AF_INET6:
1282 p.pfra_net = 128;
1283 p.pfra_ip6addr = (*state)->src_node->addr.v6addr;
1284 break;
1285 }
1286
1287 pfr_insert_kentry((*state)->rule.ptr->overload_tbl,
1288 &p, pf_calendar_time_second());
1289
1290 /* kill existing states if that's required. */
1291 if ((*state)->rule.ptr->flush) {
1292 struct pf_state_key *sk;
1293 struct pf_state *st;
1294
1295 pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++;
1296 RB_FOREACH(st, pf_state_tree_id, &tree_id) {
1297 sk = st->state_key;
1298 /*
1299 * Kill states from this source. (Only those
1300 * from the same rule if PF_FLUSH_GLOBAL is not
1301 * set)
1302 */
1303 if (sk->af_lan ==
1304 (*state)->state_key->af_lan &&
1305 (((*state)->state_key->direction ==
1306 PF_OUT &&
1307 PF_AEQ(&(*state)->src_node->addr,
1308 &sk->lan.addr, sk->af_lan)) ||
1309 ((*state)->state_key->direction == PF_IN &&
1310 PF_AEQ(&(*state)->src_node->addr,
1311 &sk->ext_lan.addr, sk->af_lan))) &&
1312 ((*state)->rule.ptr->flush &
1313 PF_FLUSH_GLOBAL ||
1314 (*state)->rule.ptr == st->rule.ptr)) {
1315 st->timeout = PFTM_PURGE;
1316 st->src.state = st->dst.state =
1317 TCPS_CLOSED;
1318 killed++;
1319 }
1320 }
1321 if (pf_status.debug >= PF_DEBUG_MISC) {
1322 printf(", %u states killed", killed);
1323 }
1324 }
1325 if (pf_status.debug >= PF_DEBUG_MISC) {
1326 printf("\n");
1327 }
1328 }
1329
1330 /* kill this state */
1331 (*state)->timeout = PFTM_PURGE;
1332 (*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
1333 return 1;
1334 }
1335
1336 int
pf_insert_src_node(struct pf_src_node ** sn,struct pf_rule * rule,struct pf_addr * src,sa_family_t af)1337 pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
1338 struct pf_addr *src, sa_family_t af)
1339 {
1340 struct pf_src_node k;
1341
1342 if (*sn == NULL) {
1343 k.af = af;
1344 PF_ACPY(&k.addr, src, af);
1345 if (rule->rule_flag & PFRULE_RULESRCTRACK ||
1346 rule->rpool.opts & PF_POOL_STICKYADDR) {
1347 k.rule.ptr = rule;
1348 } else {
1349 k.rule.ptr = NULL;
1350 }
1351 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
1352 *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
1353 }
1354 if (*sn == NULL) {
1355 if (!rule->max_src_nodes ||
1356 rule->src_nodes < rule->max_src_nodes) {
1357 (*sn) = pool_get(&pf_src_tree_pl, PR_WAITOK);
1358 } else {
1359 pf_status.lcounters[LCNT_SRCNODES]++;
1360 }
1361 if ((*sn) == NULL) {
1362 return -1;
1363 }
1364 bzero(*sn, sizeof(struct pf_src_node));
1365
1366 pf_init_threshold(&(*sn)->conn_rate,
1367 rule->max_src_conn_rate.limit,
1368 rule->max_src_conn_rate.seconds);
1369
1370 (*sn)->af = af;
1371 if (rule->rule_flag & PFRULE_RULESRCTRACK ||
1372 rule->rpool.opts & PF_POOL_STICKYADDR) {
1373 (*sn)->rule.ptr = rule;
1374 } else {
1375 (*sn)->rule.ptr = NULL;
1376 }
1377 PF_ACPY(&(*sn)->addr, src, af);
1378 if (RB_INSERT(pf_src_tree,
1379 &tree_src_tracking, *sn) != NULL) {
1380 if (pf_status.debug >= PF_DEBUG_MISC) {
1381 printf("pf: src_tree insert failed: ");
1382 pf_print_host(&(*sn)->addr, 0, af);
1383 printf("\n");
1384 }
1385 pool_put(&pf_src_tree_pl, *sn);
1386 *sn = NULL; /* signal the caller that no additional cleanup is needed */
1387 return -1;
1388 }
1389 (*sn)->creation = pf_time_second();
1390 (*sn)->ruletype = rule->action;
1391 if ((*sn)->rule.ptr != NULL) {
1392 (*sn)->rule.ptr->src_nodes++;
1393 }
1394 pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
1395 pf_status.src_nodes++;
1396 } else {
1397 if (rule->max_src_states &&
1398 (*sn)->states >= rule->max_src_states) {
1399 pf_status.lcounters[LCNT_SRCSTATES]++;
1400 return -1;
1401 }
1402 }
1403 return 0;
1404 }
1405
1406 static void
pf_stateins_err(const char * tree,struct pf_state * s,struct pfi_kif * kif)1407 pf_stateins_err(const char *tree, struct pf_state *s, struct pfi_kif *kif)
1408 {
1409 struct pf_state_key *sk = s->state_key;
1410
1411 if (pf_status.debug >= PF_DEBUG_MISC) {
1412 printf("pf: state insert failed: %s %s ", tree, kif->pfik_name);
1413 switch (sk->proto) {
1414 case IPPROTO_TCP:
1415 printf("TCP");
1416 break;
1417 case IPPROTO_UDP:
1418 printf("UDP");
1419 break;
1420 case IPPROTO_ICMP:
1421 printf("ICMP4");
1422 break;
1423 case IPPROTO_ICMPV6:
1424 printf("ICMP6");
1425 break;
1426 default:
1427 printf("PROTO=%u", sk->proto);
1428 break;
1429 }
1430 printf(" lan: ");
1431 pf_print_sk_host(&sk->lan, sk->af_lan, sk->proto,
1432 sk->proto_variant);
1433 printf(" gwy: ");
1434 pf_print_sk_host(&sk->gwy, sk->af_gwy, sk->proto,
1435 sk->proto_variant);
1436 printf(" ext_lan: ");
1437 pf_print_sk_host(&sk->ext_lan, sk->af_lan, sk->proto,
1438 sk->proto_variant);
1439 printf(" ext_gwy: ");
1440 pf_print_sk_host(&sk->ext_gwy, sk->af_gwy, sk->proto,
1441 sk->proto_variant);
1442 if (s->sync_flags & PFSTATE_FROMSYNC) {
1443 printf(" (from sync)");
1444 }
1445 printf("\n");
1446 }
1447 }
1448
1449 int
pf_insert_state(struct pfi_kif * kif,struct pf_state * s)1450 pf_insert_state(struct pfi_kif *kif, struct pf_state *s)
1451 {
1452 struct pf_state_key *cur;
1453 struct pf_state *sp;
1454
1455 VERIFY(s->state_key != NULL);
1456 s->kif = kif;
1457
1458 if ((cur = RB_INSERT(pf_state_tree_lan_ext, &pf_statetbl_lan_ext,
1459 s->state_key)) != NULL) {
1460 /* key exists. check for same kif, if none, add to key */
1461 TAILQ_FOREACH(sp, &cur->states, next)
1462 if (sp->kif == kif) { /* collision! */
1463 pf_stateins_err("tree_lan_ext", s, kif);
1464 pf_detach_state(s,
1465 PF_DT_SKIP_LANEXT | PF_DT_SKIP_EXTGWY);
1466 return -1;
1467 }
1468 pf_detach_state(s, PF_DT_SKIP_LANEXT | PF_DT_SKIP_EXTGWY);
1469 pf_attach_state(cur, s, kif == pfi_all ? 1 : 0);
1470 }
1471
1472 /* if cur != NULL, we already found a state key and attached to it */
1473 if (cur == NULL && (cur = RB_INSERT(pf_state_tree_ext_gwy,
1474 &pf_statetbl_ext_gwy, s->state_key)) != NULL) {
1475 /* must not happen. we must have found the sk above! */
1476 pf_stateins_err("tree_ext_gwy", s, kif);
1477 pf_detach_state(s, PF_DT_SKIP_EXTGWY);
1478 return -1;
1479 }
1480
1481 if (s->id == 0 && s->creatorid == 0) {
1482 s->id = htobe64(pf_status.stateid++);
1483 s->creatorid = pf_status.hostid;
1484 }
1485 if (RB_INSERT(pf_state_tree_id, &tree_id, s) != NULL) {
1486 if (pf_status.debug >= PF_DEBUG_MISC) {
1487 printf("pf: state insert failed: "
1488 "id: %016llx creatorid: %08x",
1489 be64toh(s->id), ntohl(s->creatorid));
1490 if (s->sync_flags & PFSTATE_FROMSYNC) {
1491 printf(" (from sync)");
1492 }
1493 printf("\n");
1494 }
1495 pf_detach_state(s, 0);
1496 return -1;
1497 }
1498 TAILQ_INSERT_TAIL(&state_list, s, entry_list);
1499 pf_status.fcounters[FCNT_STATE_INSERT]++;
1500 pf_status.states++;
1501 VERIFY(pf_status.states != 0);
1502 pfi_kif_ref(kif, PFI_KIF_REF_STATE);
1503 #if NPFSYNC
1504 pfsync_insert_state(s);
1505 #endif
1506 return 0;
1507 }
1508
1509 static int
pf_purge_thread_cont(int err)1510 pf_purge_thread_cont(int err)
1511 {
1512 #pragma unused(err)
1513 static u_int32_t nloops = 0;
1514 int t = 1; /* 1 second */
1515
1516 /*
1517 * Update coarse-grained networking timestamp (in sec.); the idea
1518 * is to piggy-back on the periodic timeout callout to update
1519 * the counter returnable via net_uptime().
1520 */
1521 net_update_uptime();
1522
1523 lck_rw_lock_shared(&pf_perim_lock);
1524 lck_mtx_lock(&pf_lock);
1525
1526 /* purge everything if not running */
1527 if (!pf_status.running) {
1528 pf_purge_expired_states(pf_status.states);
1529 pf_purge_expired_fragments();
1530 pf_purge_expired_src_nodes();
1531
1532 /* terminate thread (we don't currently do this) */
1533 if (pf_purge_thread == NULL) {
1534 lck_mtx_unlock(&pf_lock);
1535 lck_rw_done(&pf_perim_lock);
1536
1537 thread_deallocate(current_thread());
1538 thread_terminate(current_thread());
1539 /* NOTREACHED */
1540 return 0;
1541 } else {
1542 /* if there's nothing left, sleep w/o timeout */
1543 if (pf_status.states == 0 &&
1544 pf_normalize_isempty() &&
1545 RB_EMPTY(&tree_src_tracking)) {
1546 nloops = 0;
1547 t = 0;
1548 }
1549 goto done;
1550 }
1551 }
1552
1553 /* process a fraction of the state table every second */
1554 pf_purge_expired_states(1 + (pf_status.states
1555 / pf_default_rule.timeout[PFTM_INTERVAL]));
1556
1557 /* purge other expired types every PFTM_INTERVAL seconds */
1558 if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) {
1559 pf_purge_expired_fragments();
1560 pf_purge_expired_src_nodes();
1561 nloops = 0;
1562 }
1563 done:
1564 lck_mtx_unlock(&pf_lock);
1565 lck_rw_done(&pf_perim_lock);
1566
1567 (void) tsleep0(pf_purge_thread_fn, PWAIT, "pf_purge_cont",
1568 t * hz, pf_purge_thread_cont);
1569 /* NOTREACHED */
1570 VERIFY(0);
1571
1572 return 0;
1573 }
1574
1575 void
pf_purge_thread_fn(void * v,wait_result_t w)1576 pf_purge_thread_fn(void *v, wait_result_t w)
1577 {
1578 #pragma unused(v, w)
1579 (void) tsleep0(pf_purge_thread_fn, PWAIT, "pf_purge", 0,
1580 pf_purge_thread_cont);
1581 /*
1582 * tsleep0() shouldn't have returned as PCATCH was not set;
1583 * therefore assert in this case.
1584 */
1585 VERIFY(0);
1586 }
1587
1588 u_int64_t
pf_state_expires(const struct pf_state * state)1589 pf_state_expires(const struct pf_state *state)
1590 {
1591 u_int32_t t;
1592 u_int32_t start;
1593 u_int32_t end;
1594 u_int32_t states;
1595
1596 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1597
1598 /* handle all PFTM_* > PFTM_MAX here */
1599 if (state->timeout == PFTM_PURGE) {
1600 return pf_time_second();
1601 }
1602
1603 VERIFY(state->timeout != PFTM_UNLINKED);
1604 VERIFY(state->timeout < PFTM_MAX);
1605 t = state->rule.ptr->timeout[state->timeout];
1606 if (!t) {
1607 t = pf_default_rule.timeout[state->timeout];
1608 }
1609 start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START];
1610 if (start) {
1611 end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END];
1612 states = state->rule.ptr->states;
1613 } else {
1614 start = pf_default_rule.timeout[PFTM_ADAPTIVE_START];
1615 end = pf_default_rule.timeout[PFTM_ADAPTIVE_END];
1616 states = pf_status.states;
1617 }
1618 if (end && states > start && start < end) {
1619 if (states < end) {
1620 return state->expire + t * (end - states) /
1621 (end - start);
1622 } else {
1623 return pf_time_second();
1624 }
1625 }
1626 return state->expire + t;
1627 }
1628
1629 void
pf_purge_expired_src_nodes(void)1630 pf_purge_expired_src_nodes(void)
1631 {
1632 struct pf_src_node *cur, *next;
1633
1634 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1635
1636 for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) {
1637 next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur);
1638
1639 if (cur->states <= 0 && cur->expire <= pf_time_second()) {
1640 if (cur->rule.ptr != NULL) {
1641 cur->rule.ptr->src_nodes--;
1642 if (cur->rule.ptr->states <= 0 &&
1643 cur->rule.ptr->max_src_nodes <= 0) {
1644 pf_rm_rule(NULL, cur->rule.ptr);
1645 }
1646 }
1647 RB_REMOVE(pf_src_tree, &tree_src_tracking, cur);
1648 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
1649 pf_status.src_nodes--;
1650 pool_put(&pf_src_tree_pl, cur);
1651 }
1652 }
1653 }
1654
1655 void
pf_src_tree_remove_state(struct pf_state * s)1656 pf_src_tree_remove_state(struct pf_state *s)
1657 {
1658 u_int32_t t;
1659
1660 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1661
1662 if (s->src_node != NULL) {
1663 if (s->src.tcp_est) {
1664 VERIFY(s->src_node->conn > 0);
1665 --s->src_node->conn;
1666 }
1667 VERIFY(s->src_node->states > 0);
1668 if (--s->src_node->states <= 0) {
1669 t = s->rule.ptr->timeout[PFTM_SRC_NODE];
1670 if (!t) {
1671 t = pf_default_rule.timeout[PFTM_SRC_NODE];
1672 }
1673 s->src_node->expire = pf_time_second() + t;
1674 }
1675 }
1676 if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
1677 VERIFY(s->nat_src_node->states > 0);
1678 if (--s->nat_src_node->states <= 0) {
1679 t = s->rule.ptr->timeout[PFTM_SRC_NODE];
1680 if (!t) {
1681 t = pf_default_rule.timeout[PFTM_SRC_NODE];
1682 }
1683 s->nat_src_node->expire = pf_time_second() + t;
1684 }
1685 }
1686 s->src_node = s->nat_src_node = NULL;
1687 }
1688
1689 void
pf_unlink_state(struct pf_state * cur)1690 pf_unlink_state(struct pf_state *cur)
1691 {
1692 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1693
1694 if (cur->src.state == PF_TCPS_PROXY_DST) {
1695 pf_send_tcp(cur->rule.ptr, cur->state_key->af_lan,
1696 &cur->state_key->ext_lan.addr, &cur->state_key->lan.addr,
1697 cur->state_key->ext_lan.xport.port,
1698 cur->state_key->lan.xport.port,
1699 cur->src.seqhi, cur->src.seqlo + 1,
1700 TH_RST | TH_ACK, 0, 0, 0, 1, cur->tag, NULL, NULL);
1701 }
1702
1703 hook_runloop(&cur->unlink_hooks, HOOK_REMOVE | HOOK_FREE);
1704 RB_REMOVE(pf_state_tree_id, &tree_id, cur);
1705 #if NPFSYNC
1706 if (cur->creatorid == pf_status.hostid) {
1707 pfsync_delete_state(cur);
1708 }
1709 #endif
1710 cur->timeout = PFTM_UNLINKED;
1711 pf_src_tree_remove_state(cur);
1712 pf_detach_state(cur, 0);
1713 }
1714
1715 /* callers should be at splpf and hold the
1716 * write_lock on pf_consistency_lock */
1717 void
pf_free_state(struct pf_state * cur)1718 pf_free_state(struct pf_state *cur)
1719 {
1720 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1721 #if NPFSYNC
1722 if (pfsyncif != NULL &&
1723 (pfsyncif->sc_bulk_send_next == cur ||
1724 pfsyncif->sc_bulk_terminator == cur)) {
1725 return;
1726 }
1727 #endif
1728 VERIFY(cur->timeout == PFTM_UNLINKED);
1729 VERIFY(cur->rule.ptr->states > 0);
1730 if (--cur->rule.ptr->states <= 0 &&
1731 cur->rule.ptr->src_nodes <= 0) {
1732 pf_rm_rule(NULL, cur->rule.ptr);
1733 }
1734 if (cur->nat_rule.ptr != NULL) {
1735 VERIFY(cur->nat_rule.ptr->states > 0);
1736 if (--cur->nat_rule.ptr->states <= 0 &&
1737 cur->nat_rule.ptr->src_nodes <= 0) {
1738 pf_rm_rule(NULL, cur->nat_rule.ptr);
1739 }
1740 }
1741 if (cur->anchor.ptr != NULL) {
1742 VERIFY(cur->anchor.ptr->states > 0);
1743 if (--cur->anchor.ptr->states <= 0) {
1744 pf_rm_rule(NULL, cur->anchor.ptr);
1745 }
1746 }
1747 pf_normalize_tcp_cleanup(cur);
1748 pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE);
1749 TAILQ_REMOVE(&state_list, cur, entry_list);
1750 if (cur->tag) {
1751 pf_tag_unref(cur->tag);
1752 }
1753 #if SKYWALK
1754 netns_release(&cur->nstoken);
1755 #endif
1756 pool_put(&pf_state_pl, cur);
1757 pf_status.fcounters[FCNT_STATE_REMOVALS]++;
1758 VERIFY(pf_status.states > 0);
1759 pf_status.states--;
1760 }
1761
1762 void
pf_purge_expired_states(u_int32_t maxcheck)1763 pf_purge_expired_states(u_int32_t maxcheck)
1764 {
1765 static struct pf_state *cur = NULL;
1766 struct pf_state *next;
1767
1768 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1769
1770 while (maxcheck--) {
1771 /* wrap to start of list when we hit the end */
1772 if (cur == NULL) {
1773 cur = TAILQ_FIRST(&state_list);
1774 if (cur == NULL) {
1775 break; /* list empty */
1776 }
1777 }
1778
1779 /* get next state, as cur may get deleted */
1780 next = TAILQ_NEXT(cur, entry_list);
1781
1782 if (cur->timeout == PFTM_UNLINKED) {
1783 pf_free_state(cur);
1784 } else if (pf_state_expires(cur) <= pf_time_second()) {
1785 /* unlink and free expired state */
1786 pf_unlink_state(cur);
1787 pf_free_state(cur);
1788 }
1789 cur = next;
1790 }
1791 }
1792
1793 int
pf_tbladdr_setup(struct pf_ruleset * rs,struct pf_addr_wrap * aw)1794 pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw)
1795 {
1796 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1797
1798 if (aw->type != PF_ADDR_TABLE) {
1799 return 0;
1800 }
1801 if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname)) == NULL) {
1802 return 1;
1803 }
1804 return 0;
1805 }
1806
1807 void
pf_tbladdr_remove(struct pf_addr_wrap * aw)1808 pf_tbladdr_remove(struct pf_addr_wrap *aw)
1809 {
1810 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1811
1812 if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL) {
1813 return;
1814 }
1815 pfr_detach_table(aw->p.tbl);
1816 aw->p.tbl = NULL;
1817 }
1818
1819 void
pf_tbladdr_copyout(struct pf_addr_wrap * aw)1820 pf_tbladdr_copyout(struct pf_addr_wrap *aw)
1821 {
1822 struct pfr_ktable *kt = aw->p.tbl;
1823
1824 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1825
1826 if (aw->type != PF_ADDR_TABLE || kt == NULL) {
1827 return;
1828 }
1829 if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) {
1830 kt = kt->pfrkt_root;
1831 }
1832 aw->p.tbl = NULL;
1833 aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ?
1834 kt->pfrkt_cnt : -1;
1835 }
1836
1837 static void
pf_print_addr(struct pf_addr * addr,sa_family_t af)1838 pf_print_addr(struct pf_addr *addr, sa_family_t af)
1839 {
1840 switch (af) {
1841 #if INET
1842 case AF_INET: {
1843 u_int32_t a = ntohl(addr->addr32[0]);
1844 printf("%u.%u.%u.%u", (a >> 24) & 255, (a >> 16) & 255,
1845 (a >> 8) & 255, a & 255);
1846 break;
1847 }
1848 #endif /* INET */
1849 case AF_INET6: {
1850 u_int16_t b;
1851 u_int8_t i, curstart = 255, curend = 0,
1852 maxstart = 0, maxend = 0;
1853 for (i = 0; i < 8; i++) {
1854 if (!addr->addr16[i]) {
1855 if (curstart == 255) {
1856 curstart = i;
1857 } else {
1858 curend = i;
1859 }
1860 } else {
1861 if (curstart) {
1862 if ((curend - curstart) >
1863 (maxend - maxstart)) {
1864 maxstart = curstart;
1865 maxend = curend;
1866 curstart = 255;
1867 }
1868 }
1869 }
1870 }
1871 for (i = 0; i < 8; i++) {
1872 if (i >= maxstart && i <= maxend) {
1873 if (maxend != 7) {
1874 if (i == maxstart) {
1875 printf(":");
1876 }
1877 } else {
1878 if (i == maxend) {
1879 printf(":");
1880 }
1881 }
1882 } else {
1883 b = ntohs(addr->addr16[i]);
1884 printf("%x", b);
1885 if (i < 7) {
1886 printf(":");
1887 }
1888 }
1889 }
1890 break;
1891 }
1892 }
1893 }
1894
1895 static void
pf_print_sk_host(struct pf_state_host * sh,sa_family_t af,int proto,u_int8_t proto_variant)1896 pf_print_sk_host(struct pf_state_host *sh, sa_family_t af, int proto,
1897 u_int8_t proto_variant)
1898 {
1899 pf_print_addr(&sh->addr, af);
1900
1901 switch (proto) {
1902 case IPPROTO_ESP:
1903 if (sh->xport.spi) {
1904 printf("[%08x]", ntohl(sh->xport.spi));
1905 }
1906 break;
1907
1908 case IPPROTO_GRE:
1909 if (proto_variant == PF_GRE_PPTP_VARIANT) {
1910 printf("[%u]", ntohs(sh->xport.call_id));
1911 }
1912 break;
1913
1914 case IPPROTO_TCP:
1915 case IPPROTO_UDP:
1916 printf("[%u]", ntohs(sh->xport.port));
1917 break;
1918
1919 default:
1920 break;
1921 }
1922 }
1923
1924 static void
pf_print_host(struct pf_addr * addr,u_int16_t p,sa_family_t af)1925 pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
1926 {
1927 pf_print_addr(addr, af);
1928 if (p) {
1929 printf("[%u]", ntohs(p));
1930 }
1931 }
1932
1933 void
pf_print_state(struct pf_state * s)1934 pf_print_state(struct pf_state *s)
1935 {
1936 struct pf_state_key *sk = s->state_key;
1937 switch (sk->proto) {
1938 case IPPROTO_ESP:
1939 printf("ESP ");
1940 break;
1941 case IPPROTO_GRE:
1942 printf("GRE%u ", sk->proto_variant);
1943 break;
1944 case IPPROTO_TCP:
1945 printf("TCP ");
1946 break;
1947 case IPPROTO_UDP:
1948 printf("UDP ");
1949 break;
1950 case IPPROTO_ICMP:
1951 printf("ICMP ");
1952 break;
1953 case IPPROTO_ICMPV6:
1954 printf("ICMPV6 ");
1955 break;
1956 default:
1957 printf("%u ", sk->proto);
1958 break;
1959 }
1960 pf_print_sk_host(&sk->lan, sk->af_lan, sk->proto, sk->proto_variant);
1961 printf(" ");
1962 pf_print_sk_host(&sk->gwy, sk->af_gwy, sk->proto, sk->proto_variant);
1963 printf(" ");
1964 pf_print_sk_host(&sk->ext_lan, sk->af_lan, sk->proto,
1965 sk->proto_variant);
1966 printf(" ");
1967 pf_print_sk_host(&sk->ext_gwy, sk->af_gwy, sk->proto,
1968 sk->proto_variant);
1969 printf(" [lo=%u high=%u win=%u modulator=%u", s->src.seqlo,
1970 s->src.seqhi, s->src.max_win, s->src.seqdiff);
1971 if (s->src.wscale && s->dst.wscale) {
1972 printf(" wscale=%u", s->src.wscale & PF_WSCALE_MASK);
1973 }
1974 printf("]");
1975 printf(" [lo=%u high=%u win=%u modulator=%u", s->dst.seqlo,
1976 s->dst.seqhi, s->dst.max_win, s->dst.seqdiff);
1977 if (s->src.wscale && s->dst.wscale) {
1978 printf(" wscale=%u", s->dst.wscale & PF_WSCALE_MASK);
1979 }
1980 printf("]");
1981 printf(" %u:%u", s->src.state, s->dst.state);
1982 }
1983
1984 void
pf_print_flags(u_int8_t f)1985 pf_print_flags(u_int8_t f)
1986 {
1987 if (f) {
1988 printf(" ");
1989 }
1990 if (f & TH_FIN) {
1991 printf("F");
1992 }
1993 if (f & TH_SYN) {
1994 printf("S");
1995 }
1996 if (f & TH_RST) {
1997 printf("R");
1998 }
1999 if (f & TH_PUSH) {
2000 printf("P");
2001 }
2002 if (f & TH_ACK) {
2003 printf("A");
2004 }
2005 if (f & TH_URG) {
2006 printf("U");
2007 }
2008 if (f & TH_ECE) {
2009 printf("E");
2010 }
2011 if (f & TH_CWR) {
2012 printf("W");
2013 }
2014 }
2015
2016 #define PF_SET_SKIP_STEPS(i) \
2017 do { \
2018 while (head[i] != cur) { \
2019 head[i]->skip[i].ptr = cur; \
2020 head[i] = TAILQ_NEXT(head[i], entries); \
2021 } \
2022 } while (0)
2023
2024 void
pf_calc_skip_steps(struct pf_rulequeue * rules)2025 pf_calc_skip_steps(struct pf_rulequeue *rules)
2026 {
2027 struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT];
2028 int i;
2029
2030 cur = TAILQ_FIRST(rules);
2031 prev = cur;
2032 for (i = 0; i < PF_SKIP_COUNT; ++i) {
2033 head[i] = cur;
2034 }
2035 while (cur != NULL) {
2036 if (cur->kif != prev->kif || cur->ifnot != prev->ifnot) {
2037 PF_SET_SKIP_STEPS(PF_SKIP_IFP);
2038 }
2039 if (cur->direction != prev->direction) {
2040 PF_SET_SKIP_STEPS(PF_SKIP_DIR);
2041 }
2042 if (cur->af != prev->af) {
2043 PF_SET_SKIP_STEPS(PF_SKIP_AF);
2044 }
2045 if (cur->proto != prev->proto) {
2046 PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
2047 }
2048 if (cur->src.neg != prev->src.neg ||
2049 pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr)) {
2050 PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
2051 }
2052 {
2053 union pf_rule_xport *cx = &cur->src.xport;
2054 union pf_rule_xport *px = &prev->src.xport;
2055
2056 switch (cur->proto) {
2057 case IPPROTO_GRE:
2058 case IPPROTO_ESP:
2059 PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
2060 break;
2061 default:
2062 if (prev->proto == IPPROTO_GRE ||
2063 prev->proto == IPPROTO_ESP ||
2064 cx->range.op != px->range.op ||
2065 cx->range.port[0] != px->range.port[0] ||
2066 cx->range.port[1] != px->range.port[1]) {
2067 PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
2068 }
2069 break;
2070 }
2071 }
2072 if (cur->dst.neg != prev->dst.neg ||
2073 pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr)) {
2074 PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
2075 }
2076 {
2077 union pf_rule_xport *cx = &cur->dst.xport;
2078 union pf_rule_xport *px = &prev->dst.xport;
2079
2080 switch (cur->proto) {
2081 case IPPROTO_GRE:
2082 if (cur->proto != prev->proto ||
2083 cx->call_id != px->call_id) {
2084 PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
2085 }
2086 break;
2087 case IPPROTO_ESP:
2088 if (cur->proto != prev->proto ||
2089 cx->spi != px->spi) {
2090 PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
2091 }
2092 break;
2093 default:
2094 if (prev->proto == IPPROTO_GRE ||
2095 prev->proto == IPPROTO_ESP ||
2096 cx->range.op != px->range.op ||
2097 cx->range.port[0] != px->range.port[0] ||
2098 cx->range.port[1] != px->range.port[1]) {
2099 PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
2100 }
2101 break;
2102 }
2103 }
2104
2105 prev = cur;
2106 cur = TAILQ_NEXT(cur, entries);
2107 }
2108 for (i = 0; i < PF_SKIP_COUNT; ++i) {
2109 PF_SET_SKIP_STEPS(i);
2110 }
2111 }
2112
2113 u_int32_t
pf_calc_state_key_flowhash(struct pf_state_key * sk)2114 pf_calc_state_key_flowhash(struct pf_state_key *sk)
2115 {
2116 #if SKYWALK
2117 uint32_t flowid;
2118 struct flowidns_flow_key fk;
2119
2120 VERIFY(sk->flowsrc == FLOWSRC_PF);
2121 bzero(&fk, sizeof(fk));
2122 _CASSERT(sizeof(sk->lan.addr) == sizeof(fk.ffk_laddr));
2123 _CASSERT(sizeof(sk->ext_lan.addr) == sizeof(fk.ffk_laddr));
2124 bcopy(&sk->lan.addr, &fk.ffk_laddr, sizeof(fk.ffk_laddr));
2125 bcopy(&sk->ext_lan.addr, &fk.ffk_raddr, sizeof(fk.ffk_raddr));
2126 fk.ffk_af = sk->af_lan;
2127 fk.ffk_proto = sk->proto;
2128
2129 switch (sk->proto) {
2130 case IPPROTO_ESP:
2131 case IPPROTO_AH:
2132 fk.ffk_spi = sk->lan.xport.spi;
2133 break;
2134 default:
2135 if (sk->lan.xport.spi <= sk->ext_lan.xport.spi) {
2136 fk.ffk_lport = sk->lan.xport.port;
2137 fk.ffk_rport = sk->ext_lan.xport.port;
2138 } else {
2139 fk.ffk_lport = sk->ext_lan.xport.port;
2140 fk.ffk_rport = sk->lan.xport.port;
2141 }
2142 break;
2143 }
2144
2145 flowidns_allocate_flowid(FLOWIDNS_DOMAIN_PF, &fk, &flowid);
2146 return flowid;
2147
2148 #else /* !SKYWALK */
2149
2150 struct pf_flowhash_key fh __attribute__((aligned(8)));
2151 uint32_t flowhash = 0;
2152
2153 bzero(&fh, sizeof(fh));
2154 if (PF_ALEQ(&sk->lan.addr, &sk->ext_lan.addr, sk->af_lan)) {
2155 bcopy(&sk->lan.addr, &fh.ap1.addr, sizeof(fh.ap1.addr));
2156 bcopy(&sk->ext_lan.addr, &fh.ap2.addr, sizeof(fh.ap2.addr));
2157 } else {
2158 bcopy(&sk->ext_lan.addr, &fh.ap1.addr, sizeof(fh.ap1.addr));
2159 bcopy(&sk->lan.addr, &fh.ap2.addr, sizeof(fh.ap2.addr));
2160 }
2161 if (sk->lan.xport.spi <= sk->ext_lan.xport.spi) {
2162 fh.ap1.xport.spi = sk->lan.xport.spi;
2163 fh.ap2.xport.spi = sk->ext_lan.xport.spi;
2164 } else {
2165 fh.ap1.xport.spi = sk->ext_lan.xport.spi;
2166 fh.ap2.xport.spi = sk->lan.xport.spi;
2167 }
2168 fh.af = sk->af_lan;
2169 fh.proto = sk->proto;
2170
2171 try_again:
2172 flowhash = net_flowhash(&fh, sizeof(fh), pf_hash_seed);
2173 if (flowhash == 0) {
2174 /* try to get a non-zero flowhash */
2175 pf_hash_seed = RandomULong();
2176 goto try_again;
2177 }
2178
2179 return flowhash;
2180
2181 #endif /* !SKYWALK */
2182 }
2183
2184 static int
pf_addr_wrap_neq(struct pf_addr_wrap * aw1,struct pf_addr_wrap * aw2)2185 pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
2186 {
2187 if (aw1->type != aw2->type) {
2188 return 1;
2189 }
2190 switch (aw1->type) {
2191 case PF_ADDR_ADDRMASK:
2192 case PF_ADDR_RANGE:
2193 if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, 0)) {
2194 return 1;
2195 }
2196 if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0)) {
2197 return 1;
2198 }
2199 return 0;
2200 case PF_ADDR_DYNIFTL:
2201 return aw1->p.dyn == NULL || aw2->p.dyn == NULL ||
2202 aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt;
2203 case PF_ADDR_NOROUTE:
2204 case PF_ADDR_URPFFAILED:
2205 return 0;
2206 case PF_ADDR_TABLE:
2207 return aw1->p.tbl != aw2->p.tbl;
2208 case PF_ADDR_RTLABEL:
2209 return aw1->v.rtlabel != aw2->v.rtlabel;
2210 default:
2211 printf("invalid address type: %d\n", aw1->type);
2212 return 1;
2213 }
2214 }
2215
2216 u_int16_t
pf_cksum_fixup(u_int16_t cksum,u_int16_t old,u_int16_t new,u_int8_t udp)2217 pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
2218 {
2219 return nat464_cksum_fixup(cksum, old, new, udp);
2220 }
2221
2222 /*
2223 * change ip address & port
2224 * dir : packet direction
2225 * a : address to be changed
2226 * p : port to be changed
2227 * ic : ip header checksum
2228 * pc : protocol checksum
2229 * an : new ip address
2230 * pn : new port
2231 * u : should be 1 if UDP packet else 0
2232 * af : address family of the packet
2233 * afn : address family of the new address
2234 * ua : should be 1 if ip address needs to be updated in the packet else
2235 * only the checksum is recalculated & updated.
2236 */
2237 static __attribute__((noinline)) void
pf_change_ap(int dir,pbuf_t * pbuf,struct pf_addr * a,u_int16_t * p,u_int16_t * ic,u_int16_t * pc,struct pf_addr * an,u_int16_t pn,u_int8_t u,sa_family_t af,sa_family_t afn,int ua)2238 pf_change_ap(int dir, pbuf_t *pbuf, struct pf_addr *a, u_int16_t *p,
2239 u_int16_t *ic, u_int16_t *pc, struct pf_addr *an, u_int16_t pn,
2240 u_int8_t u, sa_family_t af, sa_family_t afn, int ua)
2241 {
2242 struct pf_addr ao;
2243 u_int16_t po = *p;
2244
2245 PF_ACPY(&ao, a, af);
2246 if (ua) {
2247 PF_ACPY(a, an, afn);
2248 }
2249
2250 *p = pn;
2251
2252 switch (af) {
2253 #if INET
2254 case AF_INET:
2255 switch (afn) {
2256 case AF_INET:
2257 *ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
2258 ao.addr16[0], an->addr16[0], 0),
2259 ao.addr16[1], an->addr16[1], 0);
2260 *p = pn;
2261 /*
2262 * If the packet is originated from an ALG on the NAT gateway
2263 * (source address is loopback or local), in which case the
2264 * TCP/UDP checksum field contains the pseudo header checksum
2265 * that's not yet complemented.
2266 * In that case we do not need to fixup the checksum for port
2267 * translation as the pseudo header checksum doesn't include ports.
2268 *
2269 * A packet generated locally will have UDP/TCP CSUM flag
2270 * set (gets set in protocol output).
2271 *
2272 * It should be noted that the fixup doesn't do anything if the
2273 * checksum is 0.
2274 */
2275 if (dir == PF_OUT && pbuf != NULL &&
2276 (*pbuf->pb_csum_flags & (CSUM_TCP | CSUM_UDP))) {
2277 /* Pseudo-header checksum does not include ports */
2278 *pc = ~pf_cksum_fixup(pf_cksum_fixup(~*pc,
2279 ao.addr16[0], an->addr16[0], u),
2280 ao.addr16[1], an->addr16[1], u);
2281 } else {
2282 *pc =
2283 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2284 *pc, ao.addr16[0], an->addr16[0], u),
2285 ao.addr16[1], an->addr16[1], u),
2286 po, pn, u);
2287 }
2288 break;
2289 case AF_INET6:
2290 *p = pn;
2291 *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2292 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2293
2294 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
2295 ao.addr16[0], an->addr16[0], u),
2296 ao.addr16[1], an->addr16[1], u),
2297 0, an->addr16[2], u),
2298 0, an->addr16[3], u),
2299 0, an->addr16[4], u),
2300 0, an->addr16[5], u),
2301 0, an->addr16[6], u),
2302 0, an->addr16[7], u),
2303 po, pn, u);
2304 break;
2305 }
2306 break;
2307 #endif /* INET */
2308 case AF_INET6:
2309 switch (afn) {
2310 case AF_INET6:
2311 /*
2312 * If the packet is originated from an ALG on the NAT gateway
2313 * (source address is loopback or local), in which case the
2314 * TCP/UDP checksum field contains the pseudo header checksum
2315 * that's not yet complemented.
2316 * A packet generated locally
2317 * will have UDP/TCP CSUM flag set (gets set in protocol
2318 * output).
2319 */
2320 if (dir == PF_OUT && pbuf != NULL &&
2321 (*pbuf->pb_csum_flags & (CSUM_TCPIPV6 |
2322 CSUM_UDPIPV6))) {
2323 /* Pseudo-header checksum does not include ports */
2324 *pc =
2325 ~pf_cksum_fixup(pf_cksum_fixup(
2326 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2327 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2328 ~*pc,
2329 ao.addr16[0], an->addr16[0], u),
2330 ao.addr16[1], an->addr16[1], u),
2331 ao.addr16[2], an->addr16[2], u),
2332 ao.addr16[3], an->addr16[3], u),
2333 ao.addr16[4], an->addr16[4], u),
2334 ao.addr16[5], an->addr16[5], u),
2335 ao.addr16[6], an->addr16[6], u),
2336 ao.addr16[7], an->addr16[7], u);
2337 } else {
2338 *pc =
2339 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2340 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2341 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2342 *pc,
2343 ao.addr16[0], an->addr16[0], u),
2344 ao.addr16[1], an->addr16[1], u),
2345 ao.addr16[2], an->addr16[2], u),
2346 ao.addr16[3], an->addr16[3], u),
2347 ao.addr16[4], an->addr16[4], u),
2348 ao.addr16[5], an->addr16[5], u),
2349 ao.addr16[6], an->addr16[6], u),
2350 ao.addr16[7], an->addr16[7], u),
2351 po, pn, u);
2352 }
2353 break;
2354 #ifdef INET
2355 case AF_INET:
2356 *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2357 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2358 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
2359 ao.addr16[0], an->addr16[0], u),
2360 ao.addr16[1], an->addr16[1], u),
2361 ao.addr16[2], 0, u),
2362 ao.addr16[3], 0, u),
2363 ao.addr16[4], 0, u),
2364 ao.addr16[5], 0, u),
2365 ao.addr16[6], 0, u),
2366 ao.addr16[7], 0, u),
2367 po, pn, u);
2368 break;
2369 #endif /* INET */
2370 }
2371 break;
2372 }
2373 }
2374
2375
2376 /* Changes a u_int32_t. Uses a void * so there are no align restrictions */
2377 void
pf_change_a(void * a,u_int16_t * c,u_int32_t an,u_int8_t u)2378 pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u)
2379 {
2380 u_int32_t ao;
2381
2382 memcpy(&ao, a, sizeof(ao));
2383 memcpy(a, &an, sizeof(u_int32_t));
2384 *c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u),
2385 ao % 65536, an % 65536, u);
2386 }
2387
2388 static __attribute__((noinline)) void
pf_change_a6(struct pf_addr * a,u_int16_t * c,struct pf_addr * an,u_int8_t u)2389 pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
2390 {
2391 struct pf_addr ao;
2392
2393 PF_ACPY(&ao, a, AF_INET6);
2394 PF_ACPY(a, an, AF_INET6);
2395
2396 *c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2397 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2398 pf_cksum_fixup(pf_cksum_fixup(*c,
2399 ao.addr16[0], an->addr16[0], u),
2400 ao.addr16[1], an->addr16[1], u),
2401 ao.addr16[2], an->addr16[2], u),
2402 ao.addr16[3], an->addr16[3], u),
2403 ao.addr16[4], an->addr16[4], u),
2404 ao.addr16[5], an->addr16[5], u),
2405 ao.addr16[6], an->addr16[6], u),
2406 ao.addr16[7], an->addr16[7], u);
2407 }
2408
2409 static __attribute__((noinline)) void
pf_change_addr(struct pf_addr * a,u_int16_t * c,struct pf_addr * an,u_int8_t u,sa_family_t af,sa_family_t afn)2410 pf_change_addr(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u,
2411 sa_family_t af, sa_family_t afn)
2412 {
2413 struct pf_addr ao;
2414
2415 if (af != afn) {
2416 PF_ACPY(&ao, a, af);
2417 PF_ACPY(a, an, afn);
2418 }
2419
2420 switch (af) {
2421 case AF_INET:
2422 switch (afn) {
2423 case AF_INET:
2424 pf_change_a(a, c, an->v4addr.s_addr, u);
2425 break;
2426 case AF_INET6:
2427 *c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2428 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2429 pf_cksum_fixup(pf_cksum_fixup(*c,
2430 ao.addr16[0], an->addr16[0], u),
2431 ao.addr16[1], an->addr16[1], u),
2432 0, an->addr16[2], u),
2433 0, an->addr16[3], u),
2434 0, an->addr16[4], u),
2435 0, an->addr16[5], u),
2436 0, an->addr16[6], u),
2437 0, an->addr16[7], u);
2438 break;
2439 }
2440 break;
2441 case AF_INET6:
2442 switch (afn) {
2443 case AF_INET:
2444 *c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2445 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2446 pf_cksum_fixup(pf_cksum_fixup(*c,
2447 ao.addr16[0], an->addr16[0], u),
2448 ao.addr16[1], an->addr16[1], u),
2449 ao.addr16[2], 0, u),
2450 ao.addr16[3], 0, u),
2451 ao.addr16[4], 0, u),
2452 ao.addr16[5], 0, u),
2453 ao.addr16[6], 0, u),
2454 ao.addr16[7], 0, u);
2455 break;
2456 case AF_INET6:
2457 pf_change_a6(a, c, an, u);
2458 break;
2459 }
2460 break;
2461 }
2462 }
2463
2464 static __attribute__((noinline)) void
pf_change_icmp(struct pf_addr * ia,u_int16_t * ip,struct pf_addr * oa,struct pf_addr * na,u_int16_t np,u_int16_t * pc,u_int16_t * h2c,u_int16_t * ic,u_int16_t * hc,u_int8_t u,sa_family_t af)2465 pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
2466 struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c,
2467 u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af)
2468 {
2469 struct pf_addr oia, ooa;
2470
2471 PF_ACPY(&oia, ia, af);
2472 PF_ACPY(&ooa, oa, af);
2473
2474 /* Change inner protocol port, fix inner protocol checksum. */
2475 if (ip != NULL) {
2476 u_int16_t oip = *ip;
2477 u_int32_t opc = 0;
2478
2479 if (pc != NULL) {
2480 opc = *pc;
2481 }
2482 *ip = np;
2483 if (pc != NULL) {
2484 *pc = pf_cksum_fixup(*pc, oip, *ip, u);
2485 }
2486 *ic = pf_cksum_fixup(*ic, oip, *ip, 0);
2487 if (pc != NULL) {
2488 *ic = pf_cksum_fixup(*ic, opc, *pc, 0);
2489 }
2490 }
2491 /* Change inner ip address, fix inner ip and icmp checksums. */
2492 PF_ACPY(ia, na, af);
2493 switch (af) {
2494 #if INET
2495 case AF_INET: {
2496 u_int32_t oh2c = *h2c;
2497
2498 *h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c,
2499 oia.addr16[0], ia->addr16[0], 0),
2500 oia.addr16[1], ia->addr16[1], 0);
2501 *ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
2502 oia.addr16[0], ia->addr16[0], 0),
2503 oia.addr16[1], ia->addr16[1], 0);
2504 *ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0);
2505 break;
2506 }
2507 #endif /* INET */
2508 case AF_INET6:
2509 *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2510 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2511 pf_cksum_fixup(pf_cksum_fixup(*ic,
2512 oia.addr16[0], ia->addr16[0], u),
2513 oia.addr16[1], ia->addr16[1], u),
2514 oia.addr16[2], ia->addr16[2], u),
2515 oia.addr16[3], ia->addr16[3], u),
2516 oia.addr16[4], ia->addr16[4], u),
2517 oia.addr16[5], ia->addr16[5], u),
2518 oia.addr16[6], ia->addr16[6], u),
2519 oia.addr16[7], ia->addr16[7], u);
2520 break;
2521 }
2522 /* Change outer ip address, fix outer ip or icmpv6 checksum. */
2523 PF_ACPY(oa, na, af);
2524 switch (af) {
2525 #if INET
2526 case AF_INET:
2527 *hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
2528 ooa.addr16[0], oa->addr16[0], 0),
2529 ooa.addr16[1], oa->addr16[1], 0);
2530 break;
2531 #endif /* INET */
2532 case AF_INET6:
2533 *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2534 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2535 pf_cksum_fixup(pf_cksum_fixup(*ic,
2536 ooa.addr16[0], oa->addr16[0], u),
2537 ooa.addr16[1], oa->addr16[1], u),
2538 ooa.addr16[2], oa->addr16[2], u),
2539 ooa.addr16[3], oa->addr16[3], u),
2540 ooa.addr16[4], oa->addr16[4], u),
2541 ooa.addr16[5], oa->addr16[5], u),
2542 ooa.addr16[6], oa->addr16[6], u),
2543 ooa.addr16[7], oa->addr16[7], u);
2544 break;
2545 }
2546 }
2547
2548
2549 /*
2550 * Need to modulate the sequence numbers in the TCP SACK option
2551 * (credits to Krzysztof Pfaff for report and patch)
2552 */
2553 static __attribute__((noinline)) int
pf_modulate_sack(pbuf_t * pbuf,int off,struct pf_pdesc * pd,struct tcphdr * th,struct pf_state_peer * dst)2554 pf_modulate_sack(pbuf_t *pbuf, int off, struct pf_pdesc *pd,
2555 struct tcphdr *th, struct pf_state_peer *dst)
2556 {
2557 int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen;
2558 u_int8_t opts[MAX_TCPOPTLEN], *opt = opts;
2559 int copyback = 0, i, olen;
2560 struct sackblk sack;
2561
2562 #define TCPOLEN_SACKLEN (TCPOLEN_SACK + 2)
2563 if (hlen < TCPOLEN_SACKLEN ||
2564 !pf_pull_hdr(pbuf, off + sizeof(*th), opts, hlen, NULL, NULL, pd->af)) {
2565 return 0;
2566 }
2567
2568 while (hlen >= TCPOLEN_SACKLEN) {
2569 olen = opt[1];
2570 switch (*opt) {
2571 case TCPOPT_EOL: /* FALLTHROUGH */
2572 case TCPOPT_NOP:
2573 opt++;
2574 hlen--;
2575 break;
2576 case TCPOPT_SACK:
2577 if (olen > hlen) {
2578 olen = hlen;
2579 }
2580 if (olen >= TCPOLEN_SACKLEN) {
2581 for (i = 2; i + TCPOLEN_SACK <= olen;
2582 i += TCPOLEN_SACK) {
2583 memcpy(&sack, &opt[i], sizeof(sack));
2584 pf_change_a(&sack.start, &th->th_sum,
2585 htonl(ntohl(sack.start) -
2586 dst->seqdiff), 0);
2587 pf_change_a(&sack.end, &th->th_sum,
2588 htonl(ntohl(sack.end) -
2589 dst->seqdiff), 0);
2590 memcpy(&opt[i], &sack, sizeof(sack));
2591 }
2592 copyback = off + sizeof(*th) + thoptlen;
2593 }
2594 OS_FALLTHROUGH;
2595 default:
2596 if (olen < 2) {
2597 olen = 2;
2598 }
2599 hlen -= olen;
2600 opt += olen;
2601 }
2602 }
2603
2604 if (copyback) {
2605 if (pf_lazy_makewritable(pd, pbuf, copyback) == NULL) {
2606 return -1;
2607 }
2608 pbuf_copy_back(pbuf, off + sizeof(*th), thoptlen, opts);
2609 }
2610 return copyback;
2611 }
2612
2613 /*
2614 * XXX
2615 *
2616 * The following functions (pf_send_tcp and pf_send_icmp) are somewhat
2617 * special in that they originate "spurious" packets rather than
2618 * filter/NAT existing packets. As such, they're not a great fit for
2619 * the 'pbuf' shim, which assumes the underlying packet buffers are
2620 * allocated elsewhere.
2621 *
2622 * Since these functions are rarely used, we'll carry on allocating mbufs
2623 * and passing them to the IP stack for eventual routing.
2624 */
2625 static __attribute__((noinline)) void
pf_send_tcp(const struct pf_rule * r,sa_family_t af,const struct pf_addr * saddr,const struct pf_addr * daddr,u_int16_t sport,u_int16_t dport,u_int32_t seq,u_int32_t ack,u_int8_t flags,u_int16_t win,u_int16_t mss,u_int8_t ttl,int tag,u_int16_t rtag,struct ether_header * eh,struct ifnet * ifp)2626 pf_send_tcp(const struct pf_rule *r, sa_family_t af,
2627 const struct pf_addr *saddr, const struct pf_addr *daddr,
2628 u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
2629 u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag,
2630 u_int16_t rtag, struct ether_header *eh, struct ifnet *ifp)
2631 {
2632 #pragma unused(eh, ifp)
2633 struct mbuf *m;
2634 int len, tlen;
2635 #if INET
2636 struct ip *h = NULL;
2637 #endif /* INET */
2638 struct ip6_hdr *h6 = NULL;
2639 struct tcphdr *th = NULL;
2640 char *opt;
2641 struct pf_mtag *pf_mtag;
2642
2643 /* maximum segment size tcp option */
2644 tlen = sizeof(struct tcphdr);
2645 if (mss) {
2646 tlen += 4;
2647 }
2648
2649 switch (af) {
2650 #if INET
2651 case AF_INET:
2652 len = sizeof(struct ip) + tlen;
2653 break;
2654 #endif /* INET */
2655 case AF_INET6:
2656 len = sizeof(struct ip6_hdr) + tlen;
2657 break;
2658 default:
2659 panic("pf_send_tcp: not AF_INET or AF_INET6!");
2660 return;
2661 }
2662
2663 /* create outgoing mbuf */
2664 m = m_gethdr(M_DONTWAIT, MT_HEADER);
2665 if (m == NULL) {
2666 return;
2667 }
2668
2669 if ((pf_mtag = pf_get_mtag(m)) == NULL) {
2670 return;
2671 }
2672
2673 if (tag) {
2674 pf_mtag->pftag_flags |= PF_TAG_GENERATED;
2675 }
2676 pf_mtag->pftag_tag = rtag;
2677
2678 if (r != NULL && PF_RTABLEID_IS_VALID(r->rtableid)) {
2679 pf_mtag->pftag_rtableid = r->rtableid;
2680 }
2681
2682 #if PF_ECN
2683 /* add hints for ecn */
2684 pf_mtag->pftag_hdr = mtod(m, struct ip *);
2685 /* record address family */
2686 pf_mtag->pftag_flags &= ~(PF_TAG_HDR_INET | PF_TAG_HDR_INET6);
2687 switch (af) {
2688 #if INET
2689 case AF_INET:
2690 pf_mtag->pftag_flags |= PF_TAG_HDR_INET;
2691 break;
2692 #endif /* INET */
2693 case AF_INET6:
2694 pf_mtag->pftag_flags |= PF_TAG_HDR_INET6;
2695 break;
2696 }
2697 #endif /* PF_ECN */
2698
2699 /* indicate this is TCP */
2700 m->m_pkthdr.pkt_proto = IPPROTO_TCP;
2701
2702 /* Make sure headers are 32-bit aligned */
2703 m->m_data += max_linkhdr;
2704 m->m_pkthdr.len = m->m_len = len;
2705 m->m_pkthdr.rcvif = NULL;
2706 bzero(m->m_data, len);
2707 switch (af) {
2708 #if INET
2709 case AF_INET:
2710 h = mtod(m, struct ip *);
2711
2712 /* IP header fields included in the TCP checksum */
2713 h->ip_p = IPPROTO_TCP;
2714 h->ip_len = htons(tlen);
2715 h->ip_src.s_addr = saddr->v4addr.s_addr;
2716 h->ip_dst.s_addr = daddr->v4addr.s_addr;
2717
2718 th = (struct tcphdr *)(void *)((caddr_t)h + sizeof(struct ip));
2719 break;
2720 #endif /* INET */
2721 case AF_INET6:
2722 h6 = mtod(m, struct ip6_hdr *);
2723
2724 /* IP header fields included in the TCP checksum */
2725 h6->ip6_nxt = IPPROTO_TCP;
2726 h6->ip6_plen = htons(tlen);
2727 memcpy(&h6->ip6_src, &saddr->v6addr, sizeof(struct in6_addr));
2728 memcpy(&h6->ip6_dst, &daddr->v6addr, sizeof(struct in6_addr));
2729
2730 th = (struct tcphdr *)(void *)
2731 ((caddr_t)h6 + sizeof(struct ip6_hdr));
2732 break;
2733 }
2734
2735 /* TCP header */
2736 th->th_sport = sport;
2737 th->th_dport = dport;
2738 th->th_seq = htonl(seq);
2739 th->th_ack = htonl(ack);
2740 th->th_off = tlen >> 2;
2741 th->th_flags = flags;
2742 th->th_win = htons(win);
2743
2744 if (mss) {
2745 opt = (char *)(th + 1);
2746 opt[0] = TCPOPT_MAXSEG;
2747 opt[1] = 4;
2748 #if BYTE_ORDER != BIG_ENDIAN
2749 HTONS(mss);
2750 #endif
2751 bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2);
2752 }
2753
2754 switch (af) {
2755 #if INET
2756 case AF_INET: {
2757 struct route ro;
2758
2759 /* TCP checksum */
2760 th->th_sum = in_cksum(m, len);
2761
2762 /* Finish the IP header */
2763 h->ip_v = 4;
2764 h->ip_hl = sizeof(*h) >> 2;
2765 h->ip_tos = IPTOS_LOWDELAY;
2766 /*
2767 * ip_output() expects ip_len and ip_off to be in host order.
2768 */
2769 h->ip_len = len;
2770 h->ip_off = (path_mtu_discovery ? IP_DF : 0);
2771 h->ip_ttl = ttl ? ttl : ip_defttl;
2772 h->ip_sum = 0;
2773
2774 bzero(&ro, sizeof(ro));
2775 ip_output(m, NULL, &ro, 0, NULL, NULL);
2776 ROUTE_RELEASE(&ro);
2777 break;
2778 }
2779 #endif /* INET */
2780 case AF_INET6: {
2781 struct route_in6 ro6;
2782
2783 /* TCP checksum */
2784 th->th_sum = in6_cksum(m, IPPROTO_TCP,
2785 sizeof(struct ip6_hdr), tlen);
2786
2787 h6->ip6_vfc |= IPV6_VERSION;
2788 h6->ip6_hlim = IPV6_DEFHLIM;
2789
2790 ip6_output_setsrcifscope(m, IFSCOPE_UNKNOWN, NULL);
2791 ip6_output_setdstifscope(m, IFSCOPE_UNKNOWN, NULL);
2792 bzero(&ro6, sizeof(ro6));
2793 ip6_output(m, NULL, &ro6, 0, NULL, NULL, NULL);
2794 ROUTE_RELEASE(&ro6);
2795 break;
2796 }
2797 }
2798 }
2799
2800 static __attribute__((noinline)) void
pf_send_icmp(pbuf_t * pbuf,u_int8_t type,u_int8_t code,sa_family_t af,struct pf_rule * r)2801 pf_send_icmp(pbuf_t *pbuf, u_int8_t type, u_int8_t code, sa_family_t af,
2802 struct pf_rule *r)
2803 {
2804 struct mbuf *m0;
2805 struct pf_mtag *pf_mtag;
2806
2807 m0 = pbuf_clone_to_mbuf(pbuf);
2808 if (m0 == NULL) {
2809 return;
2810 }
2811
2812 if ((pf_mtag = pf_get_mtag(m0)) == NULL) {
2813 return;
2814 }
2815
2816 pf_mtag->pftag_flags |= PF_TAG_GENERATED;
2817
2818 if (PF_RTABLEID_IS_VALID(r->rtableid)) {
2819 pf_mtag->pftag_rtableid = r->rtableid;
2820 }
2821
2822 #if PF_ECN
2823 /* add hints for ecn */
2824 pf_mtag->pftag_hdr = mtod(m0, struct ip *);
2825 /* record address family */
2826 pf_mtag->pftag_flags &= ~(PF_TAG_HDR_INET | PF_TAG_HDR_INET6);
2827 switch (af) {
2828 #if INET
2829 case AF_INET:
2830 pf_mtag->pftag_flags |= PF_TAG_HDR_INET;
2831 m0->m_pkthdr.pkt_proto = IPPROTO_ICMP;
2832 break;
2833 #endif /* INET */
2834 case AF_INET6:
2835 pf_mtag->pftag_flags |= PF_TAG_HDR_INET6;
2836 m0->m_pkthdr.pkt_proto = IPPROTO_ICMPV6;
2837 break;
2838 }
2839 #endif /* PF_ECN */
2840
2841 switch (af) {
2842 #if INET
2843 case AF_INET:
2844 icmp_error(m0, type, code, 0, 0);
2845 break;
2846 #endif /* INET */
2847 case AF_INET6:
2848 icmp6_error(m0, type, code, 0);
2849 break;
2850 }
2851 }
2852
2853 /*
2854 * Return 1 if the addresses a and b match (with mask m), otherwise return 0.
2855 * If n is 0, they match if they are equal. If n is != 0, they match if they
2856 * are different.
2857 */
2858 int
pf_match_addr(u_int8_t n,struct pf_addr * a,struct pf_addr * m,struct pf_addr * b,sa_family_t af)2859 pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m,
2860 struct pf_addr *b, sa_family_t af)
2861 {
2862 int match = 0;
2863
2864 switch (af) {
2865 #if INET
2866 case AF_INET:
2867 if ((a->addr32[0] & m->addr32[0]) ==
2868 (b->addr32[0] & m->addr32[0])) {
2869 match++;
2870 }
2871 break;
2872 #endif /* INET */
2873 case AF_INET6:
2874 if (((a->addr32[0] & m->addr32[0]) ==
2875 (b->addr32[0] & m->addr32[0])) &&
2876 ((a->addr32[1] & m->addr32[1]) ==
2877 (b->addr32[1] & m->addr32[1])) &&
2878 ((a->addr32[2] & m->addr32[2]) ==
2879 (b->addr32[2] & m->addr32[2])) &&
2880 ((a->addr32[3] & m->addr32[3]) ==
2881 (b->addr32[3] & m->addr32[3]))) {
2882 match++;
2883 }
2884 break;
2885 }
2886 if (match) {
2887 if (n) {
2888 return 0;
2889 } else {
2890 return 1;
2891 }
2892 } else {
2893 if (n) {
2894 return 1;
2895 } else {
2896 return 0;
2897 }
2898 }
2899 }
2900
2901 /*
2902 * Return 1 if b <= a <= e, otherwise return 0.
2903 */
2904 int
pf_match_addr_range(struct pf_addr * b,struct pf_addr * e,struct pf_addr * a,sa_family_t af)2905 pf_match_addr_range(struct pf_addr *b, struct pf_addr *e,
2906 struct pf_addr *a, sa_family_t af)
2907 {
2908 switch (af) {
2909 #if INET
2910 case AF_INET:
2911 if ((a->addr32[0] < b->addr32[0]) ||
2912 (a->addr32[0] > e->addr32[0])) {
2913 return 0;
2914 }
2915 break;
2916 #endif /* INET */
2917 case AF_INET6: {
2918 int i;
2919
2920 /* check a >= b */
2921 for (i = 0; i < 4; ++i) {
2922 if (a->addr32[i] > b->addr32[i]) {
2923 break;
2924 } else if (a->addr32[i] < b->addr32[i]) {
2925 return 0;
2926 }
2927 }
2928 /* check a <= e */
2929 for (i = 0; i < 4; ++i) {
2930 if (a->addr32[i] < e->addr32[i]) {
2931 break;
2932 } else if (a->addr32[i] > e->addr32[i]) {
2933 return 0;
2934 }
2935 }
2936 break;
2937 }
2938 }
2939 return 1;
2940 }
2941
2942 int
pf_match(u_int8_t op,u_int32_t a1,u_int32_t a2,u_int32_t p)2943 pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
2944 {
2945 switch (op) {
2946 case PF_OP_IRG:
2947 return (p > a1) && (p < a2);
2948 case PF_OP_XRG:
2949 return (p < a1) || (p > a2);
2950 case PF_OP_RRG:
2951 return (p >= a1) && (p <= a2);
2952 case PF_OP_EQ:
2953 return p == a1;
2954 case PF_OP_NE:
2955 return p != a1;
2956 case PF_OP_LT:
2957 return p < a1;
2958 case PF_OP_LE:
2959 return p <= a1;
2960 case PF_OP_GT:
2961 return p > a1;
2962 case PF_OP_GE:
2963 return p >= a1;
2964 }
2965 return 0; /* never reached */
2966 }
2967
2968 int
pf_match_port(u_int8_t op,u_int16_t a1,u_int16_t a2,u_int16_t p)2969 pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
2970 {
2971 #if BYTE_ORDER != BIG_ENDIAN
2972 NTOHS(a1);
2973 NTOHS(a2);
2974 NTOHS(p);
2975 #endif
2976 return pf_match(op, a1, a2, p);
2977 }
2978
2979 int
pf_match_xport(u_int8_t proto,u_int8_t proto_variant,union pf_rule_xport * rx,union pf_state_xport * sx)2980 pf_match_xport(u_int8_t proto, u_int8_t proto_variant, union pf_rule_xport *rx,
2981 union pf_state_xport *sx)
2982 {
2983 int d = !0;
2984
2985 if (sx) {
2986 switch (proto) {
2987 case IPPROTO_GRE:
2988 if (proto_variant == PF_GRE_PPTP_VARIANT) {
2989 d = (rx->call_id == sx->call_id);
2990 }
2991 break;
2992
2993 case IPPROTO_ESP:
2994 d = (rx->spi == sx->spi);
2995 break;
2996
2997 case IPPROTO_TCP:
2998 case IPPROTO_UDP:
2999 case IPPROTO_ICMP:
3000 case IPPROTO_ICMPV6:
3001 if (rx->range.op) {
3002 d = pf_match_port(rx->range.op,
3003 rx->range.port[0], rx->range.port[1],
3004 sx->port);
3005 }
3006 break;
3007
3008 default:
3009 break;
3010 }
3011 }
3012
3013 return d;
3014 }
3015
3016 int
pf_match_uid(u_int8_t op,uid_t a1,uid_t a2,uid_t u)3017 pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
3018 {
3019 if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE) {
3020 return 0;
3021 }
3022 return pf_match(op, a1, a2, u);
3023 }
3024
3025 int
pf_match_gid(u_int8_t op,gid_t a1,gid_t a2,gid_t g)3026 pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
3027 {
3028 if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE) {
3029 return 0;
3030 }
3031 return pf_match(op, a1, a2, g);
3032 }
3033
3034 static int
pf_match_tag(struct pf_rule * r,struct pf_mtag * pf_mtag,int * tag)3035 pf_match_tag(struct pf_rule *r, struct pf_mtag *pf_mtag,
3036 int *tag)
3037 {
3038 if (*tag == -1) {
3039 *tag = pf_mtag->pftag_tag;
3040 }
3041
3042 return (!r->match_tag_not && r->match_tag == *tag) ||
3043 (r->match_tag_not && r->match_tag != *tag);
3044 }
3045
3046 int
pf_tag_packet(pbuf_t * pbuf,struct pf_mtag * pf_mtag,int tag,unsigned int rtableid,struct pf_pdesc * pd)3047 pf_tag_packet(pbuf_t *pbuf, struct pf_mtag *pf_mtag, int tag,
3048 unsigned int rtableid, struct pf_pdesc *pd)
3049 {
3050 if (tag <= 0 && !PF_RTABLEID_IS_VALID(rtableid) &&
3051 (pd == NULL || !(pd->pktflags & PKTF_FLOW_ID))) {
3052 return 0;
3053 }
3054
3055 if (pf_mtag == NULL && (pf_mtag = pf_get_mtag_pbuf(pbuf)) == NULL) {
3056 return 1;
3057 }
3058
3059 if (tag > 0) {
3060 pf_mtag->pftag_tag = tag;
3061 }
3062 if (PF_RTABLEID_IS_VALID(rtableid)) {
3063 pf_mtag->pftag_rtableid = rtableid;
3064 }
3065 if (pd != NULL && (pd->pktflags & PKTF_FLOW_ID)) {
3066 *pbuf->pb_flowsrc = pd->flowsrc;
3067 *pbuf->pb_flowid = pd->flowhash;
3068 *pbuf->pb_flags |= pd->pktflags;
3069 *pbuf->pb_proto = pd->proto;
3070 }
3071
3072 return 0;
3073 }
3074
3075 void
pf_step_into_anchor(int * depth,struct pf_ruleset ** rs,int n,struct pf_rule ** r,struct pf_rule ** a,int * match)3076 pf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n,
3077 struct pf_rule **r, struct pf_rule **a, int *match)
3078 {
3079 struct pf_anchor_stackframe *f;
3080
3081 (*r)->anchor->match = 0;
3082 if (match) {
3083 *match = 0;
3084 }
3085 if (*depth >= (int)sizeof(pf_anchor_stack) /
3086 (int)sizeof(pf_anchor_stack[0])) {
3087 printf("pf_step_into_anchor: stack overflow\n");
3088 *r = TAILQ_NEXT(*r, entries);
3089 return;
3090 } else if (*depth == 0 && a != NULL) {
3091 *a = *r;
3092 }
3093 f = pf_anchor_stack + (*depth)++;
3094 f->rs = *rs;
3095 f->r = *r;
3096 if ((*r)->anchor_wildcard) {
3097 f->parent = &(*r)->anchor->children;
3098 if ((f->child = RB_MIN(pf_anchor_node, f->parent)) ==
3099 NULL) {
3100 *r = NULL;
3101 return;
3102 }
3103 *rs = &f->child->ruleset;
3104 } else {
3105 f->parent = NULL;
3106 f->child = NULL;
3107 *rs = &(*r)->anchor->ruleset;
3108 }
3109 *r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
3110 }
3111
3112 int
pf_step_out_of_anchor(int * depth,struct pf_ruleset ** rs,int n,struct pf_rule ** r,struct pf_rule ** a,int * match)3113 pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n,
3114 struct pf_rule **r, struct pf_rule **a, int *match)
3115 {
3116 struct pf_anchor_stackframe *f;
3117 int quick = 0;
3118
3119 do {
3120 if (*depth <= 0) {
3121 break;
3122 }
3123 f = pf_anchor_stack + *depth - 1;
3124 if (f->parent != NULL && f->child != NULL) {
3125 if (f->child->match ||
3126 (match != NULL && *match)) {
3127 f->r->anchor->match = 1;
3128 if (match) {
3129 *match = 0;
3130 }
3131 }
3132 f->child = RB_NEXT(pf_anchor_node, f->parent, f->child);
3133 if (f->child != NULL) {
3134 *rs = &f->child->ruleset;
3135 *r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
3136 if (*r == NULL) {
3137 continue;
3138 } else {
3139 break;
3140 }
3141 }
3142 }
3143 (*depth)--;
3144 if (*depth == 0 && a != NULL) {
3145 *a = NULL;
3146 }
3147 *rs = f->rs;
3148 if (f->r->anchor->match || (match != NULL && *match)) {
3149 quick = f->r->quick;
3150 }
3151 *r = TAILQ_NEXT(f->r, entries);
3152 } while (*r == NULL);
3153
3154 return quick;
3155 }
3156
3157 void
pf_poolmask(struct pf_addr * naddr,struct pf_addr * raddr,struct pf_addr * rmask,struct pf_addr * saddr,sa_family_t af)3158 pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
3159 struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
3160 {
3161 switch (af) {
3162 #if INET
3163 case AF_INET:
3164 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
3165 ((rmask->addr32[0] ^ 0xffffffff) & saddr->addr32[0]);
3166 break;
3167 #endif /* INET */
3168 case AF_INET6:
3169 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
3170 ((rmask->addr32[0] ^ 0xffffffff) & saddr->addr32[0]);
3171 naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
3172 ((rmask->addr32[1] ^ 0xffffffff) & saddr->addr32[1]);
3173 naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
3174 ((rmask->addr32[2] ^ 0xffffffff) & saddr->addr32[2]);
3175 naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
3176 ((rmask->addr32[3] ^ 0xffffffff) & saddr->addr32[3]);
3177 break;
3178 }
3179 }
3180
3181 void
pf_addr_inc(struct pf_addr * addr,sa_family_t af)3182 pf_addr_inc(struct pf_addr *addr, sa_family_t af)
3183 {
3184 switch (af) {
3185 #if INET
3186 case AF_INET:
3187 addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
3188 break;
3189 #endif /* INET */
3190 case AF_INET6:
3191 if (addr->addr32[3] == 0xffffffff) {
3192 addr->addr32[3] = 0;
3193 if (addr->addr32[2] == 0xffffffff) {
3194 addr->addr32[2] = 0;
3195 if (addr->addr32[1] == 0xffffffff) {
3196 addr->addr32[1] = 0;
3197 addr->addr32[0] =
3198 htonl(ntohl(addr->addr32[0]) + 1);
3199 } else {
3200 addr->addr32[1] =
3201 htonl(ntohl(addr->addr32[1]) + 1);
3202 }
3203 } else {
3204 addr->addr32[2] =
3205 htonl(ntohl(addr->addr32[2]) + 1);
3206 }
3207 } else {
3208 addr->addr32[3] =
3209 htonl(ntohl(addr->addr32[3]) + 1);
3210 }
3211 break;
3212 }
3213 }
3214
3215 #define mix(a, b, c) \
3216 do { \
3217 a -= b; a -= c; a ^= (c >> 13); \
3218 b -= c; b -= a; b ^= (a << 8); \
3219 c -= a; c -= b; c ^= (b >> 13); \
3220 a -= b; a -= c; a ^= (c >> 12); \
3221 b -= c; b -= a; b ^= (a << 16); \
3222 c -= a; c -= b; c ^= (b >> 5); \
3223 a -= b; a -= c; a ^= (c >> 3); \
3224 b -= c; b -= a; b ^= (a << 10); \
3225 c -= a; c -= b; c ^= (b >> 15); \
3226 } while (0)
3227
3228 /*
3229 * hash function based on bridge_hash in if_bridge.c
3230 */
3231 static void
pf_hash(struct pf_addr * inaddr,struct pf_addr * hash,struct pf_poolhashkey * key,sa_family_t af)3232 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
3233 struct pf_poolhashkey *key, sa_family_t af)
3234 {
3235 u_int32_t a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
3236
3237 switch (af) {
3238 #if INET
3239 case AF_INET:
3240 a += inaddr->addr32[0];
3241 b += key->key32[1];
3242 mix(a, b, c);
3243 hash->addr32[0] = c + key->key32[2];
3244 break;
3245 #endif /* INET */
3246 case AF_INET6:
3247 a += inaddr->addr32[0];
3248 b += inaddr->addr32[2];
3249 mix(a, b, c);
3250 hash->addr32[0] = c;
3251 a += inaddr->addr32[1];
3252 b += inaddr->addr32[3];
3253 c += key->key32[1];
3254 mix(a, b, c);
3255 hash->addr32[1] = c;
3256 a += inaddr->addr32[2];
3257 b += inaddr->addr32[1];
3258 c += key->key32[2];
3259 mix(a, b, c);
3260 hash->addr32[2] = c;
3261 a += inaddr->addr32[3];
3262 b += inaddr->addr32[0];
3263 c += key->key32[3];
3264 mix(a, b, c);
3265 hash->addr32[3] = c;
3266 break;
3267 }
3268 }
3269
3270 static __attribute__((noinline)) int
pf_map_addr(sa_family_t af,struct pf_rule * r,struct pf_addr * saddr,struct pf_addr * naddr,struct pf_addr * init_addr,struct pf_src_node ** sn)3271 pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
3272 struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn)
3273 {
3274 unsigned char hash[16];
3275 struct pf_pool *rpool = &r->rpool;
3276 struct pf_addr *raddr = &rpool->cur->addr.v.a.addr;
3277 struct pf_addr *rmask = &rpool->cur->addr.v.a.mask;
3278 struct pf_pooladdr *acur = rpool->cur;
3279 struct pf_src_node k;
3280
3281 if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
3282 (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
3283 k.af = af;
3284 PF_ACPY(&k.addr, saddr, af);
3285 if (r->rule_flag & PFRULE_RULESRCTRACK ||
3286 r->rpool.opts & PF_POOL_STICKYADDR) {
3287 k.rule.ptr = r;
3288 } else {
3289 k.rule.ptr = NULL;
3290 }
3291 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
3292 *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
3293 if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, rpool->af)) {
3294 PF_ACPY(naddr, &(*sn)->raddr, rpool->af);
3295 if (pf_status.debug >= PF_DEBUG_MISC) {
3296 printf("pf_map_addr: src tracking maps ");
3297 pf_print_host(&k.addr, 0, af);
3298 printf(" to ");
3299 pf_print_host(naddr, 0, rpool->af);
3300 printf("\n");
3301 }
3302 return 0;
3303 }
3304 }
3305
3306 if (rpool->cur->addr.type == PF_ADDR_NOROUTE) {
3307 return 1;
3308 }
3309 if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
3310 if (rpool->cur->addr.p.dyn == NULL) {
3311 return 1;
3312 }
3313 switch (rpool->af) {
3314 #if INET
3315 case AF_INET:
3316 if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
3317 (rpool->opts & PF_POOL_TYPEMASK) !=
3318 PF_POOL_ROUNDROBIN) {
3319 return 1;
3320 }
3321 raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
3322 rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
3323 break;
3324 #endif /* INET */
3325 case AF_INET6:
3326 if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
3327 (rpool->opts & PF_POOL_TYPEMASK) !=
3328 PF_POOL_ROUNDROBIN) {
3329 return 1;
3330 }
3331 raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
3332 rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
3333 break;
3334 }
3335 } else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
3336 if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) {
3337 return 1; /* unsupported */
3338 }
3339 } else {
3340 raddr = &rpool->cur->addr.v.a.addr;
3341 rmask = &rpool->cur->addr.v.a.mask;
3342 }
3343
3344 switch (rpool->opts & PF_POOL_TYPEMASK) {
3345 case PF_POOL_NONE:
3346 PF_ACPY(naddr, raddr, rpool->af);
3347 break;
3348 case PF_POOL_BITMASK:
3349 ASSERT(af == rpool->af);
3350 PF_POOLMASK(naddr, raddr, rmask, saddr, af);
3351 break;
3352 case PF_POOL_RANDOM:
3353 if (init_addr != NULL && PF_AZERO(init_addr, rpool->af)) {
3354 switch (af) {
3355 #if INET
3356 case AF_INET:
3357 rpool->counter.addr32[0] = htonl(random());
3358 break;
3359 #endif /* INET */
3360 case AF_INET6:
3361 if (rmask->addr32[3] != 0xffffffff) {
3362 rpool->counter.addr32[3] =
3363 RandomULong();
3364 } else {
3365 break;
3366 }
3367 if (rmask->addr32[2] != 0xffffffff) {
3368 rpool->counter.addr32[2] =
3369 RandomULong();
3370 } else {
3371 break;
3372 }
3373 if (rmask->addr32[1] != 0xffffffff) {
3374 rpool->counter.addr32[1] =
3375 RandomULong();
3376 } else {
3377 break;
3378 }
3379 if (rmask->addr32[0] != 0xffffffff) {
3380 rpool->counter.addr32[0] =
3381 RandomULong();
3382 }
3383 break;
3384 }
3385 PF_POOLMASK(naddr, raddr, rmask, &rpool->counter,
3386 rpool->af);
3387 PF_ACPY(init_addr, naddr, rpool->af);
3388 } else {
3389 PF_AINC(&rpool->counter, rpool->af);
3390 PF_POOLMASK(naddr, raddr, rmask, &rpool->counter,
3391 rpool->af);
3392 }
3393 break;
3394 case PF_POOL_SRCHASH:
3395 ASSERT(af == rpool->af);
3396 PF_POOLMASK(naddr, raddr, rmask, saddr, af);
3397 pf_hash(saddr, (struct pf_addr *)(void *)&hash,
3398 &rpool->key, af);
3399 PF_POOLMASK(naddr, raddr, rmask,
3400 (struct pf_addr *)(void *)&hash, af);
3401 break;
3402 case PF_POOL_ROUNDROBIN:
3403 if (rpool->cur->addr.type == PF_ADDR_TABLE) {
3404 if (!pfr_pool_get(rpool->cur->addr.p.tbl,
3405 &rpool->tblidx, &rpool->counter,
3406 &raddr, &rmask, rpool->af)) {
3407 goto get_addr;
3408 }
3409 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
3410 if (rpool->cur->addr.p.dyn != NULL &&
3411 !pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
3412 &rpool->tblidx, &rpool->counter,
3413 &raddr, &rmask, af)) {
3414 goto get_addr;
3415 }
3416 } else if (pf_match_addr(0, raddr, rmask, &rpool->counter,
3417 rpool->af)) {
3418 goto get_addr;
3419 }
3420
3421 try_next:
3422 if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL) {
3423 rpool->cur = TAILQ_FIRST(&rpool->list);
3424 }
3425 if (rpool->cur->addr.type == PF_ADDR_TABLE) {
3426 rpool->tblidx = -1;
3427 if (pfr_pool_get(rpool->cur->addr.p.tbl,
3428 &rpool->tblidx, &rpool->counter,
3429 &raddr, &rmask, rpool->af)) {
3430 /* table contains no address of type
3431 * 'rpool->af' */
3432 if (rpool->cur != acur) {
3433 goto try_next;
3434 }
3435 return 1;
3436 }
3437 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
3438 rpool->tblidx = -1;
3439 if (rpool->cur->addr.p.dyn == NULL) {
3440 return 1;
3441 }
3442 if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
3443 &rpool->tblidx, &rpool->counter,
3444 &raddr, &rmask, rpool->af)) {
3445 /* table contains no address of type
3446 * 'rpool->af' */
3447 if (rpool->cur != acur) {
3448 goto try_next;
3449 }
3450 return 1;
3451 }
3452 } else {
3453 raddr = &rpool->cur->addr.v.a.addr;
3454 rmask = &rpool->cur->addr.v.a.mask;
3455 PF_ACPY(&rpool->counter, raddr, rpool->af);
3456 }
3457
3458 get_addr:
3459 PF_ACPY(naddr, &rpool->counter, rpool->af);
3460 if (init_addr != NULL && PF_AZERO(init_addr, rpool->af)) {
3461 PF_ACPY(init_addr, naddr, rpool->af);
3462 }
3463 PF_AINC(&rpool->counter, rpool->af);
3464 break;
3465 }
3466 if (*sn != NULL) {
3467 PF_ACPY(&(*sn)->raddr, naddr, rpool->af);
3468 }
3469
3470 if (pf_status.debug >= PF_DEBUG_MISC &&
3471 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
3472 printf("pf_map_addr: selected address ");
3473 pf_print_host(naddr, 0, rpool->af);
3474 printf("\n");
3475 }
3476
3477 return 0;
3478 }
3479
3480 static __attribute__((noinline)) int
pf_get_sport(struct pf_pdesc * pd,struct pfi_kif * kif,struct pf_rule * r,struct pf_addr * saddr,union pf_state_xport * sxport,struct pf_addr * daddr,union pf_state_xport * dxport,struct pf_addr * naddr,union pf_state_xport * nxport,struct pf_src_node ** sn,netns_token * pnstoken)3481 pf_get_sport(struct pf_pdesc *pd, struct pfi_kif *kif, struct pf_rule *r,
3482 struct pf_addr *saddr, union pf_state_xport *sxport, struct pf_addr *daddr,
3483 union pf_state_xport *dxport, struct pf_addr *naddr,
3484 union pf_state_xport *nxport, struct pf_src_node **sn
3485 #if SKYWALK
3486 , netns_token *pnstoken
3487 #endif
3488 )
3489 {
3490 #pragma unused(kif)
3491 struct pf_state_key_cmp key;
3492 struct pf_addr init_addr;
3493 unsigned int cut;
3494 sa_family_t af = pd->af;
3495 u_int8_t proto = pd->proto;
3496 unsigned int low = r->rpool.proxy_port[0];
3497 unsigned int high = r->rpool.proxy_port[1];
3498
3499 bzero(&init_addr, sizeof(init_addr));
3500 if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn)) {
3501 return 1;
3502 }
3503
3504 if (proto == IPPROTO_ICMP) {
3505 low = 1;
3506 high = 65535;
3507 }
3508
3509 if (!nxport) {
3510 return 0; /* No output necessary. */
3511 }
3512 /*--- Special mapping rules for UDP ---*/
3513 if (proto == IPPROTO_UDP) {
3514 /*--- Never float IKE source port ---*/
3515 if (ntohs(sxport->port) == PF_IKE_PORT) {
3516 nxport->port = sxport->port;
3517 return 0;
3518 }
3519
3520 /*--- Apply exterior mapping options ---*/
3521 if (r->extmap > PF_EXTMAP_APD) {
3522 struct pf_state *s;
3523
3524 TAILQ_FOREACH(s, &state_list, entry_list) {
3525 struct pf_state_key *sk = s->state_key;
3526 if (!sk) {
3527 continue;
3528 }
3529 if (s->nat_rule.ptr != r) {
3530 continue;
3531 }
3532 if (sk->proto != IPPROTO_UDP ||
3533 sk->af_lan != af) {
3534 continue;
3535 }
3536 if (sk->lan.xport.port != sxport->port) {
3537 continue;
3538 }
3539 if (PF_ANEQ(&sk->lan.addr, saddr, af)) {
3540 continue;
3541 }
3542 if (r->extmap < PF_EXTMAP_EI &&
3543 PF_ANEQ(&sk->ext_lan.addr, daddr, af)) {
3544 continue;
3545 }
3546
3547 #if SKYWALK
3548 if (netns_reserve(pnstoken, naddr->addr32,
3549 NETNS_AF_SIZE(af), proto, sxport->port,
3550 NETNS_PF, NULL) != 0) {
3551 return 1;
3552 }
3553 #endif
3554 nxport->port = sk->gwy.xport.port;
3555 return 0;
3556 }
3557 }
3558 } else if (proto == IPPROTO_TCP) {
3559 struct pf_state* s;
3560 /*
3561 * APPLE MODIFICATION: <rdar://problem/6546358>
3562 * Fix allows....NAT to use a single binding for TCP session
3563 * with same source IP and source port
3564 */
3565 TAILQ_FOREACH(s, &state_list, entry_list) {
3566 struct pf_state_key* sk = s->state_key;
3567 if (!sk) {
3568 continue;
3569 }
3570 if (s->nat_rule.ptr != r) {
3571 continue;
3572 }
3573 if (sk->proto != IPPROTO_TCP || sk->af_lan != af) {
3574 continue;
3575 }
3576 if (sk->lan.xport.port != sxport->port) {
3577 continue;
3578 }
3579 if (!(PF_AEQ(&sk->lan.addr, saddr, af))) {
3580 continue;
3581 }
3582 #if SKYWALK
3583 if (netns_reserve(pnstoken, naddr->addr32,
3584 NETNS_AF_SIZE(af), proto, sxport->port,
3585 NETNS_PF, NULL) != 0) {
3586 return 1;
3587 }
3588 #endif
3589 nxport->port = sk->gwy.xport.port;
3590 return 0;
3591 }
3592 }
3593 do {
3594 key.af_gwy = af;
3595 key.proto = proto;
3596 PF_ACPY(&key.ext_gwy.addr, daddr, key.af_gwy);
3597 PF_ACPY(&key.gwy.addr, naddr, key.af_gwy);
3598 switch (proto) {
3599 case IPPROTO_UDP:
3600 key.proto_variant = r->extfilter;
3601 break;
3602 default:
3603 key.proto_variant = 0;
3604 break;
3605 }
3606 if (dxport) {
3607 key.ext_gwy.xport = *dxport;
3608 } else {
3609 memset(&key.ext_gwy.xport, 0,
3610 sizeof(key.ext_gwy.xport));
3611 }
3612 /*
3613 * port search; start random, step;
3614 * similar 2 portloop in in_pcbbind
3615 */
3616 if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP ||
3617 proto == IPPROTO_ICMP)) {
3618 if (dxport) {
3619 key.gwy.xport = *dxport;
3620 } else {
3621 memset(&key.gwy.xport, 0,
3622 sizeof(key.gwy.xport));
3623 }
3624 #if SKYWALK
3625 /* Nothing to do: netns handles TCP/UDP only */
3626 #endif
3627 if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
3628 return 0;
3629 }
3630 } else if (low == 0 && high == 0) {
3631 key.gwy.xport = *nxport;
3632 if (pf_find_state_all(&key, PF_IN, NULL) == NULL
3633 #if SKYWALK
3634 && ((proto != IPPROTO_TCP && proto != IPPROTO_UDP)
3635 || netns_reserve(pnstoken, naddr->addr32,
3636 NETNS_AF_SIZE(af), proto, nxport->port,
3637 NETNS_PF, NULL) == 0)
3638 #endif
3639 ) {
3640 return 0;
3641 }
3642 } else if (low == high) {
3643 key.gwy.xport.port = htons(low);
3644 if (pf_find_state_all(&key, PF_IN, NULL) == NULL
3645 #if SKYWALK
3646 && ((proto != IPPROTO_TCP && proto != IPPROTO_UDP)
3647 || netns_reserve(pnstoken, naddr->addr32,
3648 NETNS_AF_SIZE(af), proto, htons(low),
3649 NETNS_PF, NULL) == 0)
3650 #endif
3651 ) {
3652 nxport->port = htons(low);
3653 return 0;
3654 }
3655 } else {
3656 unsigned int tmp;
3657 if (low > high) {
3658 tmp = low;
3659 low = high;
3660 high = tmp;
3661 }
3662 /* low < high */
3663 cut = htonl(random()) % (1 + high - low) + low;
3664 /* low <= cut <= high */
3665 for (tmp = cut; tmp <= high; ++(tmp)) {
3666 key.gwy.xport.port = htons(tmp);
3667 if (pf_find_state_all(&key, PF_IN, NULL) == NULL
3668 #if SKYWALK
3669 && ((proto != IPPROTO_TCP && proto != IPPROTO_UDP)
3670 || netns_reserve(pnstoken, naddr->addr32,
3671 NETNS_AF_SIZE(af), proto, htons(tmp),
3672 NETNS_PF, NULL) == 0)
3673 #endif
3674 ) {
3675 nxport->port = htons(tmp);
3676 return 0;
3677 }
3678 }
3679 for (tmp = cut - 1; tmp >= low; --(tmp)) {
3680 key.gwy.xport.port = htons(tmp);
3681 if (pf_find_state_all(&key, PF_IN, NULL) == NULL
3682 #if SKYWALK
3683 && ((proto != IPPROTO_TCP && proto != IPPROTO_UDP)
3684 || netns_reserve(pnstoken, naddr->addr32,
3685 NETNS_AF_SIZE(af), proto, htons(tmp),
3686 NETNS_PF, NULL) == 0)
3687 #endif
3688 ) {
3689 nxport->port = htons(tmp);
3690 return 0;
3691 }
3692 }
3693 }
3694
3695 switch (r->rpool.opts & PF_POOL_TYPEMASK) {
3696 case PF_POOL_RANDOM:
3697 case PF_POOL_ROUNDROBIN:
3698 if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn)) {
3699 return 1;
3700 }
3701 break;
3702 case PF_POOL_NONE:
3703 case PF_POOL_SRCHASH:
3704 case PF_POOL_BITMASK:
3705 default:
3706 return 1;
3707 }
3708 } while (!PF_AEQ(&init_addr, naddr, af));
3709
3710 return 1; /* none available */
3711 }
3712
3713 static __attribute__((noinline)) struct pf_rule *
pf_match_translation(struct pf_pdesc * pd,pbuf_t * pbuf,int off,int direction,struct pfi_kif * kif,struct pf_addr * saddr,union pf_state_xport * sxport,struct pf_addr * daddr,union pf_state_xport * dxport,int rs_num)3714 pf_match_translation(struct pf_pdesc *pd, pbuf_t *pbuf, int off,
3715 int direction, struct pfi_kif *kif, struct pf_addr *saddr,
3716 union pf_state_xport *sxport, struct pf_addr *daddr,
3717 union pf_state_xport *dxport, int rs_num)
3718 {
3719 struct pf_rule *r, *rm = NULL;
3720 struct pf_ruleset *ruleset = NULL;
3721 int tag = -1;
3722 unsigned int rtableid = IFSCOPE_NONE;
3723 int asd = 0;
3724
3725 r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
3726 while (r && rm == NULL) {
3727 struct pf_rule_addr *src = NULL, *dst = NULL;
3728 struct pf_addr_wrap *xdst = NULL;
3729 struct pf_addr_wrap *xsrc = NULL;
3730 union pf_rule_xport rdrxport;
3731
3732 if (r->action == PF_BINAT && direction == PF_IN) {
3733 src = &r->dst;
3734 if (r->rpool.cur != NULL) {
3735 xdst = &r->rpool.cur->addr;
3736 }
3737 } else if (r->action == PF_RDR && direction == PF_OUT) {
3738 dst = &r->src;
3739 src = &r->dst;
3740 if (r->rpool.cur != NULL) {
3741 rdrxport.range.op = PF_OP_EQ;
3742 rdrxport.range.port[0] =
3743 htons(r->rpool.proxy_port[0]);
3744 xsrc = &r->rpool.cur->addr;
3745 }
3746 } else {
3747 src = &r->src;
3748 dst = &r->dst;
3749 }
3750
3751 r->evaluations++;
3752 if (pfi_kif_match(r->kif, kif) == r->ifnot) {
3753 r = r->skip[PF_SKIP_IFP].ptr;
3754 } else if (r->direction && r->direction != direction) {
3755 r = r->skip[PF_SKIP_DIR].ptr;
3756 } else if (r->af && r->af != pd->af) {
3757 r = r->skip[PF_SKIP_AF].ptr;
3758 } else if (r->proto && r->proto != pd->proto) {
3759 r = r->skip[PF_SKIP_PROTO].ptr;
3760 } else if (xsrc && PF_MISMATCHAW(xsrc, saddr, pd->af, 0, NULL)) {
3761 r = TAILQ_NEXT(r, entries);
3762 } else if (!xsrc && PF_MISMATCHAW(&src->addr, saddr, pd->af,
3763 src->neg, kif)) {
3764 r = TAILQ_NEXT(r, entries);
3765 } else if (xsrc && (!rdrxport.range.port[0] ||
3766 !pf_match_xport(r->proto, r->proto_variant, &rdrxport,
3767 sxport))) {
3768 r = TAILQ_NEXT(r, entries);
3769 } else if (!xsrc && !pf_match_xport(r->proto,
3770 r->proto_variant, &src->xport, sxport)) {
3771 r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
3772 PF_SKIP_DST_PORT].ptr;
3773 } else if (dst != NULL &&
3774 PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL)) {
3775 r = r->skip[PF_SKIP_DST_ADDR].ptr;
3776 } else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af,
3777 0, NULL)) {
3778 r = TAILQ_NEXT(r, entries);
3779 } else if (dst && !pf_match_xport(r->proto, r->proto_variant,
3780 &dst->xport, dxport)) {
3781 r = r->skip[PF_SKIP_DST_PORT].ptr;
3782 } else if (r->match_tag && !pf_match_tag(r, pd->pf_mtag, &tag)) {
3783 r = TAILQ_NEXT(r, entries);
3784 } else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
3785 IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, pbuf,
3786 off, pd->hdr.tcp), r->os_fingerprint))) {
3787 r = TAILQ_NEXT(r, entries);
3788 } else {
3789 if (r->tag) {
3790 tag = r->tag;
3791 }
3792 if (PF_RTABLEID_IS_VALID(r->rtableid)) {
3793 rtableid = r->rtableid;
3794 }
3795 if (r->anchor == NULL) {
3796 rm = r;
3797 } else {
3798 pf_step_into_anchor(&asd, &ruleset, rs_num,
3799 &r, NULL, NULL);
3800 }
3801 }
3802 if (r == NULL) {
3803 pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r,
3804 NULL, NULL);
3805 }
3806 }
3807 if (pf_tag_packet(pbuf, pd->pf_mtag, tag, rtableid, NULL)) {
3808 return NULL;
3809 }
3810 if (rm != NULL && (rm->action == PF_NONAT ||
3811 rm->action == PF_NORDR || rm->action == PF_NOBINAT ||
3812 rm->action == PF_NONAT64)) {
3813 return NULL;
3814 }
3815 return rm;
3816 }
3817
3818 /*
3819 * Get address translation information for NAT/BINAT/RDR
3820 * pd : pf packet descriptor
3821 * pbuf : pbuf holding the packet
3822 * off : offset to protocol header
3823 * direction : direction of packet
3824 * kif : pf interface info obtained from the packet's recv interface
3825 * sn : source node pointer (output)
3826 * saddr : packet source address
3827 * sxport : packet source port
3828 * daddr : packet destination address
3829 * dxport : packet destination port
3830 * nsxport : translated source port (output)
3831 *
3832 * Translated source & destination address are updated in pd->nsaddr &
3833 * pd->ndaddr
3834 */
3835 static __attribute__((noinline)) struct pf_rule *
pf_get_translation_aux(struct pf_pdesc * pd,pbuf_t * pbuf,int off,int direction,struct pfi_kif * kif,struct pf_src_node ** sn,struct pf_addr * saddr,union pf_state_xport * sxport,struct pf_addr * daddr,union pf_state_xport * dxport,union pf_state_xport * nsxport,netns_token * pnstoken)3836 pf_get_translation_aux(struct pf_pdesc *pd, pbuf_t *pbuf, int off,
3837 int direction, struct pfi_kif *kif, struct pf_src_node **sn,
3838 struct pf_addr *saddr, union pf_state_xport *sxport, struct pf_addr *daddr,
3839 union pf_state_xport *dxport, union pf_state_xport *nsxport
3840 #if SKYWALK
3841 , netns_token *pnstoken
3842 #endif
3843 )
3844 {
3845 struct pf_rule *r = NULL;
3846 pd->naf = pd->af;
3847
3848 if (direction == PF_OUT) {
3849 r = pf_match_translation(pd, pbuf, off, direction, kif, saddr,
3850 sxport, daddr, dxport, PF_RULESET_BINAT);
3851 if (r == NULL) {
3852 r = pf_match_translation(pd, pbuf, off, direction, kif,
3853 saddr, sxport, daddr, dxport, PF_RULESET_RDR);
3854 }
3855 if (r == NULL) {
3856 r = pf_match_translation(pd, pbuf, off, direction, kif,
3857 saddr, sxport, daddr, dxport, PF_RULESET_NAT);
3858 }
3859 } else {
3860 r = pf_match_translation(pd, pbuf, off, direction, kif, saddr,
3861 sxport, daddr, dxport, PF_RULESET_RDR);
3862 if (r == NULL) {
3863 r = pf_match_translation(pd, pbuf, off, direction, kif,
3864 saddr, sxport, daddr, dxport, PF_RULESET_BINAT);
3865 }
3866 }
3867
3868 if (r != NULL) {
3869 struct pf_addr *nsaddr = &pd->naddr;
3870 struct pf_addr *ndaddr = &pd->ndaddr;
3871
3872 *nsaddr = *saddr;
3873 *ndaddr = *daddr;
3874
3875 switch (r->action) {
3876 case PF_NONAT:
3877 case PF_NONAT64:
3878 case PF_NOBINAT:
3879 case PF_NORDR:
3880 return NULL;
3881 case PF_NAT:
3882 case PF_NAT64:
3883 /*
3884 * we do NAT64 on incoming path and we call ip_input
3885 * which asserts receive interface to be not NULL.
3886 * The below check is to prevent NAT64 action on any
3887 * packet generated by local entity using synthesized
3888 * IPv6 address.
3889 */
3890 if ((r->action == PF_NAT64) && (direction == PF_OUT)) {
3891 return NULL;
3892 }
3893
3894 if (pf_get_sport(pd, kif, r, saddr, sxport, daddr,
3895 dxport, nsaddr, nsxport, sn
3896 #if SKYWALK
3897 , pnstoken
3898 #endif
3899 )) {
3900 DPFPRINTF(PF_DEBUG_MISC,
3901 ("pf: NAT proxy port allocation "
3902 "(%u-%u) failed\n",
3903 r->rpool.proxy_port[0],
3904 r->rpool.proxy_port[1]));
3905 return NULL;
3906 }
3907 /*
3908 * For NAT64 the destination IPv4 address is derived
3909 * from the last 32 bits of synthesized IPv6 address
3910 */
3911 if (r->action == PF_NAT64) {
3912 ndaddr->v4addr.s_addr = daddr->addr32[3];
3913 pd->naf = AF_INET;
3914 }
3915 break;
3916 case PF_BINAT:
3917 switch (direction) {
3918 case PF_OUT:
3919 if (r->rpool.cur->addr.type ==
3920 PF_ADDR_DYNIFTL) {
3921 if (r->rpool.cur->addr.p.dyn == NULL) {
3922 return NULL;
3923 }
3924 switch (pd->af) {
3925 #if INET
3926 case AF_INET:
3927 if (r->rpool.cur->addr.p.dyn->
3928 pfid_acnt4 < 1) {
3929 return NULL;
3930 }
3931 PF_POOLMASK(nsaddr,
3932 &r->rpool.cur->addr.p.dyn->
3933 pfid_addr4,
3934 &r->rpool.cur->addr.p.dyn->
3935 pfid_mask4,
3936 saddr, AF_INET);
3937 break;
3938 #endif /* INET */
3939 case AF_INET6:
3940 if (r->rpool.cur->addr.p.dyn->
3941 pfid_acnt6 < 1) {
3942 return NULL;
3943 }
3944 PF_POOLMASK(nsaddr,
3945 &r->rpool.cur->addr.p.dyn->
3946 pfid_addr6,
3947 &r->rpool.cur->addr.p.dyn->
3948 pfid_mask6,
3949 saddr, AF_INET6);
3950 break;
3951 }
3952 } else {
3953 PF_POOLMASK(nsaddr,
3954 &r->rpool.cur->addr.v.a.addr,
3955 &r->rpool.cur->addr.v.a.mask,
3956 saddr, pd->af);
3957 }
3958 break;
3959 case PF_IN:
3960 if (r->src.addr.type == PF_ADDR_DYNIFTL) {
3961 if (r->src.addr.p.dyn == NULL) {
3962 return NULL;
3963 }
3964 switch (pd->af) {
3965 #if INET
3966 case AF_INET:
3967 if (r->src.addr.p.dyn->
3968 pfid_acnt4 < 1) {
3969 return NULL;
3970 }
3971 PF_POOLMASK(ndaddr,
3972 &r->src.addr.p.dyn->
3973 pfid_addr4,
3974 &r->src.addr.p.dyn->
3975 pfid_mask4,
3976 daddr, AF_INET);
3977 break;
3978 #endif /* INET */
3979 case AF_INET6:
3980 if (r->src.addr.p.dyn->
3981 pfid_acnt6 < 1) {
3982 return NULL;
3983 }
3984 PF_POOLMASK(ndaddr,
3985 &r->src.addr.p.dyn->
3986 pfid_addr6,
3987 &r->src.addr.p.dyn->
3988 pfid_mask6,
3989 daddr, AF_INET6);
3990 break;
3991 }
3992 } else {
3993 PF_POOLMASK(ndaddr,
3994 &r->src.addr.v.a.addr,
3995 &r->src.addr.v.a.mask, daddr,
3996 pd->af);
3997 }
3998 break;
3999 }
4000 break;
4001 case PF_RDR: {
4002 switch (direction) {
4003 case PF_OUT:
4004 if (r->dst.addr.type == PF_ADDR_DYNIFTL) {
4005 if (r->dst.addr.p.dyn == NULL) {
4006 return NULL;
4007 }
4008 switch (pd->af) {
4009 #if INET
4010 case AF_INET:
4011 if (r->dst.addr.p.dyn->
4012 pfid_acnt4 < 1) {
4013 return NULL;
4014 }
4015 PF_POOLMASK(nsaddr,
4016 &r->dst.addr.p.dyn->
4017 pfid_addr4,
4018 &r->dst.addr.p.dyn->
4019 pfid_mask4,
4020 daddr, AF_INET);
4021 break;
4022 #endif /* INET */
4023 case AF_INET6:
4024 if (r->dst.addr.p.dyn->
4025 pfid_acnt6 < 1) {
4026 return NULL;
4027 }
4028 PF_POOLMASK(nsaddr,
4029 &r->dst.addr.p.dyn->
4030 pfid_addr6,
4031 &r->dst.addr.p.dyn->
4032 pfid_mask6,
4033 daddr, AF_INET6);
4034 break;
4035 }
4036 } else {
4037 PF_POOLMASK(nsaddr,
4038 &r->dst.addr.v.a.addr,
4039 &r->dst.addr.v.a.mask,
4040 daddr, pd->af);
4041 }
4042 if (nsxport && r->dst.xport.range.port[0]) {
4043 nsxport->port =
4044 r->dst.xport.range.port[0];
4045 }
4046 break;
4047 case PF_IN:
4048 if (pf_map_addr(pd->af, r, saddr,
4049 ndaddr, NULL, sn)) {
4050 return NULL;
4051 }
4052 if ((r->rpool.opts & PF_POOL_TYPEMASK) ==
4053 PF_POOL_BITMASK) {
4054 PF_POOLMASK(ndaddr, ndaddr,
4055 &r->rpool.cur->addr.v.a.mask, daddr,
4056 pd->af);
4057 }
4058
4059 if (nsxport && dxport) {
4060 if (r->rpool.proxy_port[1]) {
4061 u_int32_t tmp_nport;
4062
4063 tmp_nport =
4064 ((ntohs(dxport->port) -
4065 ntohs(r->dst.xport.range.
4066 port[0])) %
4067 (r->rpool.proxy_port[1] -
4068 r->rpool.proxy_port[0] +
4069 1)) + r->rpool.proxy_port[0];
4070
4071 /* wrap around if necessary */
4072 if (tmp_nport > 65535) {
4073 tmp_nport -= 65535;
4074 }
4075 nsxport->port =
4076 htons((u_int16_t)tmp_nport);
4077 } else if (r->rpool.proxy_port[0]) {
4078 nsxport->port = htons(r->rpool.
4079 proxy_port[0]);
4080 }
4081 }
4082 break;
4083 }
4084 break;
4085 }
4086 default:
4087 return NULL;
4088 }
4089 }
4090
4091 return r;
4092 }
4093
4094 int
pf_socket_lookup(int direction,struct pf_pdesc * pd)4095 pf_socket_lookup(int direction, struct pf_pdesc *pd)
4096 {
4097 struct pf_addr *saddr, *daddr;
4098 u_int16_t sport, dport;
4099 struct inpcbinfo *pi;
4100 int inp = 0;
4101
4102 if (pd == NULL) {
4103 return -1;
4104 }
4105 pd->lookup.uid = UID_MAX;
4106 pd->lookup.gid = GID_MAX;
4107 pd->lookup.pid = NO_PID;
4108
4109 switch (pd->proto) {
4110 case IPPROTO_TCP:
4111 if (pd->hdr.tcp == NULL) {
4112 return -1;
4113 }
4114 sport = pd->hdr.tcp->th_sport;
4115 dport = pd->hdr.tcp->th_dport;
4116 pi = &tcbinfo;
4117 break;
4118 case IPPROTO_UDP:
4119 if (pd->hdr.udp == NULL) {
4120 return -1;
4121 }
4122 sport = pd->hdr.udp->uh_sport;
4123 dport = pd->hdr.udp->uh_dport;
4124 pi = &udbinfo;
4125 break;
4126 default:
4127 return -1;
4128 }
4129 if (direction == PF_IN) {
4130 saddr = pd->src;
4131 daddr = pd->dst;
4132 } else {
4133 u_int16_t p;
4134
4135 p = sport;
4136 sport = dport;
4137 dport = p;
4138 saddr = pd->dst;
4139 daddr = pd->src;
4140 }
4141 switch (pd->af) {
4142 #if INET
4143 case AF_INET:
4144 inp = in_pcblookup_hash_exists(pi, saddr->v4addr, sport, daddr->v4addr, dport,
4145 0, &pd->lookup.uid, &pd->lookup.gid, NULL);
4146 if (inp == 0) {
4147 struct in6_addr s6, d6;
4148
4149 memset(&s6, 0, sizeof(s6));
4150 s6.s6_addr16[5] = htons(0xffff);
4151 memcpy(&s6.s6_addr32[3], &saddr->v4addr,
4152 sizeof(saddr->v4addr));
4153
4154 memset(&d6, 0, sizeof(d6));
4155 d6.s6_addr16[5] = htons(0xffff);
4156 memcpy(&d6.s6_addr32[3], &daddr->v4addr,
4157 sizeof(daddr->v4addr));
4158
4159 inp = in6_pcblookup_hash_exists(pi, &s6, sport, IFSCOPE_NONE,
4160 &d6, dport, IFSCOPE_NONE, 0, &pd->lookup.uid, &pd->lookup.gid, NULL, false);
4161 if (inp == 0) {
4162 inp = in_pcblookup_hash_exists(pi, saddr->v4addr, sport,
4163 daddr->v4addr, dport, INPLOOKUP_WILDCARD, &pd->lookup.uid, &pd->lookup.gid, NULL);
4164 if (inp == 0) {
4165 inp = in6_pcblookup_hash_exists(pi, &s6, sport, IFSCOPE_NONE,
4166 &d6, dport, IFSCOPE_NONE, INPLOOKUP_WILDCARD,
4167 &pd->lookup.uid, &pd->lookup.gid, NULL, false);
4168 if (inp == 0) {
4169 return -1;
4170 }
4171 }
4172 }
4173 }
4174 break;
4175 #endif /* INET */
4176 case AF_INET6:
4177 inp = in6_pcblookup_hash_exists(pi, &saddr->v6addr, sport, IFSCOPE_UNKNOWN, &daddr->v6addr,
4178 dport, IFSCOPE_UNKNOWN, 0, &pd->lookup.uid, &pd->lookup.gid, NULL, false);
4179 if (inp == 0) {
4180 inp = in6_pcblookup_hash_exists(pi, &saddr->v6addr, sport, IFSCOPE_UNKNOWN,
4181 &daddr->v6addr, dport, IFSCOPE_UNKNOWN, INPLOOKUP_WILDCARD,
4182 &pd->lookup.uid, &pd->lookup.gid, NULL, false);
4183 if (inp == 0) {
4184 return -1;
4185 }
4186 }
4187 break;
4188
4189 default:
4190 return -1;
4191 }
4192
4193 return 1;
4194 }
4195
4196 static __attribute__((noinline)) u_int8_t
pf_get_wscale(pbuf_t * pbuf,int off,u_int16_t th_off,sa_family_t af)4197 pf_get_wscale(pbuf_t *pbuf, int off, u_int16_t th_off, sa_family_t af)
4198 {
4199 int hlen;
4200 u_int8_t hdr[60];
4201 u_int8_t *opt, optlen;
4202 u_int8_t wscale = 0;
4203
4204 hlen = th_off << 2; /* hlen <= sizeof (hdr) */
4205 if (hlen <= (int)sizeof(struct tcphdr)) {
4206 return 0;
4207 }
4208 if (!pf_pull_hdr(pbuf, off, hdr, hlen, NULL, NULL, af)) {
4209 return 0;
4210 }
4211 opt = hdr + sizeof(struct tcphdr);
4212 hlen -= sizeof(struct tcphdr);
4213 while (hlen >= 3) {
4214 switch (*opt) {
4215 case TCPOPT_EOL:
4216 case TCPOPT_NOP:
4217 ++opt;
4218 --hlen;
4219 break;
4220 case TCPOPT_WINDOW:
4221 wscale = opt[2];
4222 if (wscale > TCP_MAX_WINSHIFT) {
4223 wscale = TCP_MAX_WINSHIFT;
4224 }
4225 wscale |= PF_WSCALE_FLAG;
4226 OS_FALLTHROUGH;
4227 default:
4228 optlen = opt[1];
4229 if (optlen < 2) {
4230 optlen = 2;
4231 }
4232 hlen -= optlen;
4233 opt += optlen;
4234 break;
4235 }
4236 }
4237 return wscale;
4238 }
4239
4240 static __attribute__((noinline)) u_int16_t
pf_get_mss(pbuf_t * pbuf,int off,u_int16_t th_off,sa_family_t af)4241 pf_get_mss(pbuf_t *pbuf, int off, u_int16_t th_off, sa_family_t af)
4242 {
4243 int hlen;
4244 u_int8_t hdr[60];
4245 u_int8_t *opt, optlen;
4246 u_int16_t mss = tcp_mssdflt;
4247
4248 hlen = th_off << 2; /* hlen <= sizeof (hdr) */
4249 if (hlen <= (int)sizeof(struct tcphdr)) {
4250 return 0;
4251 }
4252 if (!pf_pull_hdr(pbuf, off, hdr, hlen, NULL, NULL, af)) {
4253 return 0;
4254 }
4255 opt = hdr + sizeof(struct tcphdr);
4256 hlen -= sizeof(struct tcphdr);
4257 while (hlen >= TCPOLEN_MAXSEG) {
4258 switch (*opt) {
4259 case TCPOPT_EOL:
4260 case TCPOPT_NOP:
4261 ++opt;
4262 --hlen;
4263 break;
4264 case TCPOPT_MAXSEG:
4265 bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2);
4266 #if BYTE_ORDER != BIG_ENDIAN
4267 NTOHS(mss);
4268 #endif
4269 OS_FALLTHROUGH;
4270 default:
4271 optlen = opt[1];
4272 if (optlen < 2) {
4273 optlen = 2;
4274 }
4275 hlen -= optlen;
4276 opt += optlen;
4277 break;
4278 }
4279 }
4280 return mss;
4281 }
4282
4283 static __attribute__((noinline)) u_int16_t
pf_calc_mss(struct pf_addr * addr,sa_family_t af,u_int16_t offer)4284 pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer)
4285 {
4286 #if INET
4287 struct sockaddr_in *dst;
4288 struct route ro;
4289 #endif /* INET */
4290 struct sockaddr_in6 *dst6;
4291 struct route_in6 ro6;
4292 struct rtentry *rt = NULL;
4293 int hlen;
4294 u_int16_t mss = tcp_mssdflt;
4295
4296 switch (af) {
4297 #if INET
4298 case AF_INET:
4299 hlen = sizeof(struct ip);
4300 bzero(&ro, sizeof(ro));
4301 dst = (struct sockaddr_in *)(void *)&ro.ro_dst;
4302 dst->sin_family = AF_INET;
4303 dst->sin_len = sizeof(*dst);
4304 dst->sin_addr = addr->v4addr;
4305 rtalloc(&ro);
4306 rt = ro.ro_rt;
4307 break;
4308 #endif /* INET */
4309 case AF_INET6:
4310 hlen = sizeof(struct ip6_hdr);
4311 bzero(&ro6, sizeof(ro6));
4312 dst6 = (struct sockaddr_in6 *)(void *)&ro6.ro_dst;
4313 dst6->sin6_family = AF_INET6;
4314 dst6->sin6_len = sizeof(*dst6);
4315 dst6->sin6_addr = addr->v6addr;
4316 rtalloc((struct route *)&ro);
4317 rt = ro6.ro_rt;
4318 break;
4319 default:
4320 panic("pf_calc_mss: not AF_INET or AF_INET6!");
4321 return 0;
4322 }
4323
4324 if (rt && rt->rt_ifp) {
4325 /* This is relevant only for PF SYN Proxy */
4326 int interface_mtu = rt->rt_ifp->if_mtu;
4327
4328 if (af == AF_INET &&
4329 INTF_ADJUST_MTU_FOR_CLAT46(rt->rt_ifp)) {
4330 interface_mtu = IN6_LINKMTU(rt->rt_ifp);
4331 /* Further adjust the size for CLAT46 expansion */
4332 interface_mtu -= CLAT46_HDR_EXPANSION_OVERHD;
4333 }
4334 mss = interface_mtu - hlen - sizeof(struct tcphdr);
4335 mss = max(tcp_mssdflt, mss);
4336 rtfree(rt);
4337 }
4338 mss = min(mss, offer);
4339 mss = max(mss, 64); /* sanity - at least max opt space */
4340 return mss;
4341 }
4342
4343 static void
pf_set_rt_ifp(struct pf_state * s,struct pf_addr * saddr,sa_family_t af)4344 pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr, sa_family_t af)
4345 {
4346 struct pf_rule *r = s->rule.ptr;
4347
4348 s->rt_kif = NULL;
4349
4350 if (!r->rt || r->rt == PF_FASTROUTE) {
4351 return;
4352 }
4353 if ((af == AF_INET) || (af == AF_INET6)) {
4354 pf_map_addr(af, r, saddr, &s->rt_addr, NULL,
4355 &s->nat_src_node);
4356 s->rt_kif = r->rpool.cur->kif;
4357 }
4358
4359 return;
4360 }
4361
4362 static void
pf_attach_state(struct pf_state_key * sk,struct pf_state * s,int tail)4363 pf_attach_state(struct pf_state_key *sk, struct pf_state *s, int tail)
4364 {
4365 s->state_key = sk;
4366 sk->refcnt++;
4367
4368 /* list is sorted, if-bound states before floating */
4369 if (tail) {
4370 TAILQ_INSERT_TAIL(&sk->states, s, next);
4371 } else {
4372 TAILQ_INSERT_HEAD(&sk->states, s, next);
4373 }
4374 }
4375
4376 static void
pf_state_key_release_flowid(struct pf_state_key * sk)4377 pf_state_key_release_flowid(struct pf_state_key *sk)
4378 {
4379 #pragma unused (sk)
4380 #if SKYWALK
4381 if ((sk->flowsrc == FLOWSRC_PF) && (sk->flowhash != 0)) {
4382 flowidns_release_flowid(sk->flowhash);
4383 sk->flowhash = 0;
4384 sk->flowsrc = 0;
4385 }
4386 #endif /* SKYWALK */
4387 }
4388
4389 void
pf_detach_state(struct pf_state * s,int flags)4390 pf_detach_state(struct pf_state *s, int flags)
4391 {
4392 struct pf_state_key *sk = s->state_key;
4393
4394 if (sk == NULL) {
4395 return;
4396 }
4397
4398 s->state_key = NULL;
4399 TAILQ_REMOVE(&sk->states, s, next);
4400 if (--sk->refcnt == 0) {
4401 if (!(flags & PF_DT_SKIP_EXTGWY)) {
4402 RB_REMOVE(pf_state_tree_ext_gwy,
4403 &pf_statetbl_ext_gwy, sk);
4404 }
4405 if (!(flags & PF_DT_SKIP_LANEXT)) {
4406 RB_REMOVE(pf_state_tree_lan_ext,
4407 &pf_statetbl_lan_ext, sk);
4408 }
4409 if (sk->app_state) {
4410 pool_put(&pf_app_state_pl, sk->app_state);
4411 }
4412 pf_state_key_release_flowid(sk);
4413 pool_put(&pf_state_key_pl, sk);
4414 }
4415 }
4416
4417 struct pf_state_key *
pf_alloc_state_key(struct pf_state * s,struct pf_state_key * psk)4418 pf_alloc_state_key(struct pf_state *s, struct pf_state_key *psk)
4419 {
4420 struct pf_state_key *sk;
4421
4422 if ((sk = pool_get(&pf_state_key_pl, PR_WAITOK)) == NULL) {
4423 return NULL;
4424 }
4425 bzero(sk, sizeof(*sk));
4426 TAILQ_INIT(&sk->states);
4427 pf_attach_state(sk, s, 0);
4428
4429 /* initialize state key from psk, if provided */
4430 if (psk != NULL) {
4431 bcopy(&psk->lan, &sk->lan, sizeof(sk->lan));
4432 bcopy(&psk->gwy, &sk->gwy, sizeof(sk->gwy));
4433 bcopy(&psk->ext_lan, &sk->ext_lan, sizeof(sk->ext_lan));
4434 bcopy(&psk->ext_gwy, &sk->ext_gwy, sizeof(sk->ext_gwy));
4435 sk->af_lan = psk->af_lan;
4436 sk->af_gwy = psk->af_gwy;
4437 sk->proto = psk->proto;
4438 sk->direction = psk->direction;
4439 sk->proto_variant = psk->proto_variant;
4440 VERIFY(psk->app_state == NULL);
4441 ASSERT(psk->flowsrc != FLOWSRC_PF);
4442 sk->flowsrc = psk->flowsrc;
4443 sk->flowhash = psk->flowhash;
4444 /* don't touch tree entries, states and refcnt on sk */
4445 }
4446
4447 if (sk->flowhash == 0) {
4448 ASSERT(sk->flowsrc == 0);
4449 sk->flowsrc = FLOWSRC_PF;
4450 sk->flowhash = pf_calc_state_key_flowhash(sk);
4451 }
4452
4453 return sk;
4454 }
4455
4456 static __attribute__((noinline)) u_int32_t
pf_tcp_iss(struct pf_pdesc * pd)4457 pf_tcp_iss(struct pf_pdesc *pd)
4458 {
4459 MD5_CTX ctx;
4460 u_int32_t digest[4];
4461
4462 if (pf_tcp_secret_init == 0) {
4463 read_frandom(pf_tcp_secret, sizeof(pf_tcp_secret));
4464 MD5Init(&pf_tcp_secret_ctx);
4465 MD5Update(&pf_tcp_secret_ctx, pf_tcp_secret,
4466 sizeof(pf_tcp_secret));
4467 pf_tcp_secret_init = 1;
4468 }
4469 ctx = pf_tcp_secret_ctx;
4470
4471 MD5Update(&ctx, (char *)&pd->hdr.tcp->th_sport, sizeof(u_short));
4472 MD5Update(&ctx, (char *)&pd->hdr.tcp->th_dport, sizeof(u_short));
4473 if (pd->af == AF_INET6) {
4474 MD5Update(&ctx, (char *)&pd->src->v6addr, sizeof(struct in6_addr));
4475 MD5Update(&ctx, (char *)&pd->dst->v6addr, sizeof(struct in6_addr));
4476 } else {
4477 MD5Update(&ctx, (char *)&pd->src->v4addr, sizeof(struct in_addr));
4478 MD5Update(&ctx, (char *)&pd->dst->v4addr, sizeof(struct in_addr));
4479 }
4480 MD5Final((u_char *)digest, &ctx);
4481 pf_tcp_iss_off += 4096;
4482 return digest[0] + random() + pf_tcp_iss_off;
4483 }
4484
4485 /*
4486 * This routine is called to perform address family translation on the
4487 * inner IP header (that may come as payload) of an ICMP(v4addr/6) error
4488 * response.
4489 */
4490 static __attribute__((noinline)) int
pf_change_icmp_af(pbuf_t * pbuf,int off,struct pf_pdesc * pd,struct pf_pdesc * pd2,struct pf_addr * src,struct pf_addr * dst,sa_family_t af,sa_family_t naf)4491 pf_change_icmp_af(pbuf_t *pbuf, int off,
4492 struct pf_pdesc *pd, struct pf_pdesc *pd2, struct pf_addr *src,
4493 struct pf_addr *dst, sa_family_t af, sa_family_t naf)
4494 {
4495 struct ip *ip4 = NULL;
4496 struct ip6_hdr *ip6 = NULL;
4497 void *hdr;
4498 int hlen, olen;
4499 uint64_t ipid_salt = (uint64_t)pbuf_get_packet_buffer_address(pbuf);
4500
4501 if (af == naf || (af != AF_INET && af != AF_INET6) ||
4502 (naf != AF_INET && naf != AF_INET6)) {
4503 return -1;
4504 }
4505
4506 /* old header */
4507 olen = pd2->off - off;
4508 /* new header */
4509 hlen = naf == AF_INET ? sizeof(*ip4) : sizeof(*ip6);
4510
4511 /* Modify the pbuf to accommodate the new header */
4512 hdr = pbuf_resize_segment(pbuf, off, olen, hlen);
4513 if (hdr == NULL) {
4514 return -1;
4515 }
4516
4517 /* translate inner ip/ip6 header */
4518 switch (naf) {
4519 case AF_INET:
4520 ip4 = hdr;
4521 bzero(ip4, sizeof(*ip4));
4522 ip4->ip_v = IPVERSION;
4523 ip4->ip_hl = sizeof(*ip4) >> 2;
4524 ip4->ip_len = htons(sizeof(*ip4) + pd2->tot_len - olen);
4525 ip4->ip_id = rfc6864 ? 0 : htons(ip_randomid(ipid_salt));
4526 ip4->ip_off = htons(IP_DF);
4527 ip4->ip_ttl = pd2->ttl;
4528 if (pd2->proto == IPPROTO_ICMPV6) {
4529 ip4->ip_p = IPPROTO_ICMP;
4530 } else {
4531 ip4->ip_p = pd2->proto;
4532 }
4533 ip4->ip_src = src->v4addr;
4534 ip4->ip_dst = dst->v4addr;
4535 ip4->ip_sum = pbuf_inet_cksum(pbuf, 0, 0, ip4->ip_hl << 2);
4536 break;
4537 case AF_INET6:
4538 ip6 = hdr;
4539 bzero(ip6, sizeof(*ip6));
4540 ip6->ip6_vfc = IPV6_VERSION;
4541 ip6->ip6_plen = htons(pd2->tot_len - olen);
4542 if (pd2->proto == IPPROTO_ICMP) {
4543 ip6->ip6_nxt = IPPROTO_ICMPV6;
4544 } else {
4545 ip6->ip6_nxt = pd2->proto;
4546 }
4547 if (!pd2->ttl || pd2->ttl > IPV6_DEFHLIM) {
4548 ip6->ip6_hlim = IPV6_DEFHLIM;
4549 } else {
4550 ip6->ip6_hlim = pd2->ttl;
4551 }
4552 ip6->ip6_src = src->v6addr;
4553 ip6->ip6_dst = dst->v6addr;
4554 break;
4555 }
4556
4557 /* adjust payload offset and total packet length */
4558 pd2->off += hlen - olen;
4559 pd->tot_len += hlen - olen;
4560
4561 return 0;
4562 }
4563
4564 #define PTR_IP(field) ((int32_t)offsetof(struct ip, field))
4565 #define PTR_IP6(field) ((int32_t)offsetof(struct ip6_hdr, field))
4566
4567 static __attribute__((noinline)) int
pf_translate_icmp_af(int af,void * arg)4568 pf_translate_icmp_af(int af, void *arg)
4569 {
4570 struct icmp *icmp4;
4571 struct icmp6_hdr *icmp6;
4572 u_int32_t mtu;
4573 int32_t ptr = -1;
4574 u_int8_t type;
4575 u_int8_t code;
4576
4577 switch (af) {
4578 case AF_INET:
4579 icmp6 = arg;
4580 type = icmp6->icmp6_type;
4581 code = icmp6->icmp6_code;
4582 mtu = ntohl(icmp6->icmp6_mtu);
4583
4584 switch (type) {
4585 case ICMP6_ECHO_REQUEST:
4586 type = ICMP_ECHO;
4587 break;
4588 case ICMP6_ECHO_REPLY:
4589 type = ICMP_ECHOREPLY;
4590 break;
4591 case ICMP6_DST_UNREACH:
4592 type = ICMP_UNREACH;
4593 switch (code) {
4594 case ICMP6_DST_UNREACH_NOROUTE:
4595 case ICMP6_DST_UNREACH_BEYONDSCOPE:
4596 case ICMP6_DST_UNREACH_ADDR:
4597 code = ICMP_UNREACH_HOST;
4598 break;
4599 case ICMP6_DST_UNREACH_ADMIN:
4600 code = ICMP_UNREACH_HOST_PROHIB;
4601 break;
4602 case ICMP6_DST_UNREACH_NOPORT:
4603 code = ICMP_UNREACH_PORT;
4604 break;
4605 default:
4606 return -1;
4607 }
4608 break;
4609 case ICMP6_PACKET_TOO_BIG:
4610 type = ICMP_UNREACH;
4611 code = ICMP_UNREACH_NEEDFRAG;
4612 mtu -= 20;
4613 break;
4614 case ICMP6_TIME_EXCEEDED:
4615 type = ICMP_TIMXCEED;
4616 break;
4617 case ICMP6_PARAM_PROB:
4618 switch (code) {
4619 case ICMP6_PARAMPROB_HEADER:
4620 type = ICMP_PARAMPROB;
4621 code = ICMP_PARAMPROB_ERRATPTR;
4622 ptr = ntohl(icmp6->icmp6_pptr);
4623
4624 if (ptr == PTR_IP6(ip6_vfc)) {
4625 ; /* preserve */
4626 } else if (ptr == PTR_IP6(ip6_vfc) + 1) {
4627 ptr = PTR_IP(ip_tos);
4628 } else if (ptr == PTR_IP6(ip6_plen) ||
4629 ptr == PTR_IP6(ip6_plen) + 1) {
4630 ptr = PTR_IP(ip_len);
4631 } else if (ptr == PTR_IP6(ip6_nxt)) {
4632 ptr = PTR_IP(ip_p);
4633 } else if (ptr == PTR_IP6(ip6_hlim)) {
4634 ptr = PTR_IP(ip_ttl);
4635 } else if (ptr >= PTR_IP6(ip6_src) &&
4636 ptr < PTR_IP6(ip6_dst)) {
4637 ptr = PTR_IP(ip_src);
4638 } else if (ptr >= PTR_IP6(ip6_dst) &&
4639 ptr < (int32_t)sizeof(struct ip6_hdr)) {
4640 ptr = PTR_IP(ip_dst);
4641 } else {
4642 return -1;
4643 }
4644 break;
4645 case ICMP6_PARAMPROB_NEXTHEADER:
4646 type = ICMP_UNREACH;
4647 code = ICMP_UNREACH_PROTOCOL;
4648 break;
4649 default:
4650 return -1;
4651 }
4652 break;
4653 default:
4654 return -1;
4655 }
4656 icmp6->icmp6_type = type;
4657 icmp6->icmp6_code = code;
4658 /* aligns well with a icmpv4 nextmtu */
4659 icmp6->icmp6_mtu = htonl(mtu);
4660 /* icmpv4 pptr is a one most significant byte */
4661 if (ptr >= 0) {
4662 icmp6->icmp6_pptr = htonl(ptr << 24);
4663 }
4664 break;
4665
4666 case AF_INET6:
4667 icmp4 = arg;
4668 type = icmp4->icmp_type;
4669 code = icmp4->icmp_code;
4670 mtu = ntohs(icmp4->icmp_nextmtu);
4671
4672 switch (type) {
4673 case ICMP_ECHO:
4674 type = ICMP6_ECHO_REQUEST;
4675 break;
4676 case ICMP_ECHOREPLY:
4677 type = ICMP6_ECHO_REPLY;
4678 break;
4679 case ICMP_UNREACH:
4680 type = ICMP6_DST_UNREACH;
4681 switch (code) {
4682 case ICMP_UNREACH_NET:
4683 case ICMP_UNREACH_HOST:
4684 case ICMP_UNREACH_NET_UNKNOWN:
4685 case ICMP_UNREACH_HOST_UNKNOWN:
4686 case ICMP_UNREACH_ISOLATED:
4687 case ICMP_UNREACH_TOSNET:
4688 case ICMP_UNREACH_TOSHOST:
4689 code = ICMP6_DST_UNREACH_NOROUTE;
4690 break;
4691 case ICMP_UNREACH_PORT:
4692 code = ICMP6_DST_UNREACH_NOPORT;
4693 break;
4694 case ICMP_UNREACH_NET_PROHIB:
4695 case ICMP_UNREACH_HOST_PROHIB:
4696 case ICMP_UNREACH_FILTER_PROHIB:
4697 case ICMP_UNREACH_PRECEDENCE_CUTOFF:
4698 code = ICMP6_DST_UNREACH_ADMIN;
4699 break;
4700 case ICMP_UNREACH_PROTOCOL:
4701 type = ICMP6_PARAM_PROB;
4702 code = ICMP6_PARAMPROB_NEXTHEADER;
4703 ptr = offsetof(struct ip6_hdr, ip6_nxt);
4704 break;
4705 case ICMP_UNREACH_NEEDFRAG:
4706 type = ICMP6_PACKET_TOO_BIG;
4707 code = 0;
4708 mtu += 20;
4709 break;
4710 default:
4711 return -1;
4712 }
4713 break;
4714 case ICMP_TIMXCEED:
4715 type = ICMP6_TIME_EXCEEDED;
4716 break;
4717 case ICMP_PARAMPROB:
4718 type = ICMP6_PARAM_PROB;
4719 switch (code) {
4720 case ICMP_PARAMPROB_ERRATPTR:
4721 code = ICMP6_PARAMPROB_HEADER;
4722 break;
4723 case ICMP_PARAMPROB_LENGTH:
4724 code = ICMP6_PARAMPROB_HEADER;
4725 break;
4726 default:
4727 return -1;
4728 }
4729
4730 ptr = icmp4->icmp_pptr;
4731 if (ptr == 0 || ptr == PTR_IP(ip_tos)) {
4732 ; /* preserve */
4733 } else if (ptr == PTR_IP(ip_len) ||
4734 ptr == PTR_IP(ip_len) + 1) {
4735 ptr = PTR_IP6(ip6_plen);
4736 } else if (ptr == PTR_IP(ip_ttl)) {
4737 ptr = PTR_IP6(ip6_hlim);
4738 } else if (ptr == PTR_IP(ip_p)) {
4739 ptr = PTR_IP6(ip6_nxt);
4740 } else if (ptr >= PTR_IP(ip_src) &&
4741 ptr < PTR_IP(ip_dst)) {
4742 ptr = PTR_IP6(ip6_src);
4743 } else if (ptr >= PTR_IP(ip_dst) &&
4744 ptr < (int32_t)sizeof(struct ip)) {
4745 ptr = PTR_IP6(ip6_dst);
4746 } else {
4747 return -1;
4748 }
4749 break;
4750 default:
4751 return -1;
4752 }
4753 icmp4->icmp_type = type;
4754 icmp4->icmp_code = code;
4755 icmp4->icmp_nextmtu = htons(mtu);
4756 if (ptr >= 0) {
4757 icmp4->icmp_void = htonl(ptr);
4758 }
4759 break;
4760 }
4761
4762 return 0;
4763 }
4764
4765 /* Note: frees pbuf if PF_NAT64 is returned */
4766 static __attribute__((noinline)) int
pf_nat64_ipv6(pbuf_t * pbuf,int off,struct pf_pdesc * pd)4767 pf_nat64_ipv6(pbuf_t *pbuf, int off, struct pf_pdesc *pd)
4768 {
4769 struct ip *ip4;
4770 struct mbuf *m;
4771
4772 /*
4773 * ip_input asserts for rcvif to be not NULL
4774 * That may not be true for two corner cases
4775 * 1. If for some reason a local app sends DNS
4776 * AAAA query to local host
4777 * 2. If IPv6 stack in kernel internally generates a
4778 * message destined for a synthesized IPv6 end-point.
4779 */
4780 if (pbuf->pb_ifp == NULL) {
4781 return PF_DROP;
4782 }
4783
4784 ip4 = (struct ip *)pbuf_resize_segment(pbuf, 0, off, sizeof(*ip4));
4785 if (ip4 == NULL) {
4786 return PF_DROP;
4787 }
4788
4789 ip4->ip_v = 4;
4790 ip4->ip_hl = 5;
4791 ip4->ip_tos = pd->tos & htonl(0x0ff00000);
4792 ip4->ip_len = htons(sizeof(*ip4) + (pd->tot_len - off));
4793 ip4->ip_id = 0;
4794 ip4->ip_off = htons(IP_DF);
4795 ip4->ip_ttl = pd->ttl;
4796 ip4->ip_p = pd->proto;
4797 ip4->ip_sum = 0;
4798 ip4->ip_src = pd->naddr.v4addr;
4799 ip4->ip_dst = pd->ndaddr.v4addr;
4800 ip4->ip_sum = pbuf_inet_cksum(pbuf, 0, 0, ip4->ip_hl << 2);
4801
4802 /* recalculate icmp checksums */
4803 if (pd->proto == IPPROTO_ICMP) {
4804 struct icmp *icmp;
4805 int hlen = sizeof(*ip4);
4806
4807 icmp = (struct icmp *)pbuf_contig_segment(pbuf, hlen,
4808 ICMP_MINLEN);
4809 if (icmp == NULL) {
4810 return PF_DROP;
4811 }
4812
4813 icmp->icmp_cksum = 0;
4814 icmp->icmp_cksum = pbuf_inet_cksum(pbuf, 0, hlen,
4815 ntohs(ip4->ip_len) - hlen);
4816 }
4817
4818 if ((m = pbuf_to_mbuf(pbuf, TRUE)) != NULL) {
4819 ip_input(m);
4820 }
4821
4822 return PF_NAT64;
4823 }
4824
4825 static __attribute__((noinline)) int
pf_nat64_ipv4(pbuf_t * pbuf,int off,struct pf_pdesc * pd)4826 pf_nat64_ipv4(pbuf_t *pbuf, int off, struct pf_pdesc *pd)
4827 {
4828 struct ip6_hdr *ip6;
4829 struct mbuf *m;
4830
4831 if (pbuf->pb_ifp == NULL) {
4832 return PF_DROP;
4833 }
4834
4835 ip6 = (struct ip6_hdr *)pbuf_resize_segment(pbuf, 0, off, sizeof(*ip6));
4836 if (ip6 == NULL) {
4837 return PF_DROP;
4838 }
4839
4840 ip6->ip6_vfc = htonl((6 << 28) | (pd->tos << 20));
4841 ip6->ip6_plen = htons(pd->tot_len - off);
4842 ip6->ip6_nxt = pd->proto;
4843 ip6->ip6_hlim = pd->ttl;
4844 ip6->ip6_src = pd->naddr.v6addr;
4845 ip6->ip6_dst = pd->ndaddr.v6addr;
4846
4847 /* recalculate icmp6 checksums */
4848 if (pd->proto == IPPROTO_ICMPV6) {
4849 struct icmp6_hdr *icmp6;
4850 int hlen = sizeof(*ip6);
4851
4852 icmp6 = (struct icmp6_hdr *)pbuf_contig_segment(pbuf, hlen,
4853 sizeof(*icmp6));
4854 if (icmp6 == NULL) {
4855 return PF_DROP;
4856 }
4857
4858 icmp6->icmp6_cksum = 0;
4859 icmp6->icmp6_cksum = pbuf_inet6_cksum(pbuf,
4860 IPPROTO_ICMPV6, hlen,
4861 ntohs(ip6->ip6_plen));
4862 } else if (pd->proto == IPPROTO_UDP) {
4863 struct udphdr *uh;
4864 int hlen = sizeof(*ip6);
4865
4866 uh = (struct udphdr *)pbuf_contig_segment(pbuf, hlen,
4867 sizeof(*uh));
4868 if (uh == NULL) {
4869 return PF_DROP;
4870 }
4871
4872 if (uh->uh_sum == 0) {
4873 uh->uh_sum = pbuf_inet6_cksum(pbuf, IPPROTO_UDP,
4874 hlen, ntohs(ip6->ip6_plen));
4875 }
4876 }
4877
4878 if ((m = pbuf_to_mbuf(pbuf, TRUE)) != NULL) {
4879 ip6_input(m);
4880 }
4881
4882 return PF_NAT64;
4883 }
4884
4885 static __attribute__((noinline)) int
pf_test_rule(struct pf_rule ** rm,struct pf_state ** sm,int direction,struct pfi_kif * kif,pbuf_t * pbuf,int off,void * h,struct pf_pdesc * pd,struct pf_rule ** am,struct pf_ruleset ** rsm,struct ifqueue * ifq)4886 pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
4887 struct pfi_kif *kif, pbuf_t *pbuf, int off, void *h,
4888 struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
4889 struct ifqueue *ifq)
4890 {
4891 #pragma unused(h)
4892 struct pf_rule *nr = NULL;
4893 struct pf_addr *saddr = pd->src, *daddr = pd->dst;
4894 sa_family_t af = pd->af;
4895 struct pf_rule *r, *a = NULL;
4896 struct pf_ruleset *ruleset = NULL;
4897 struct pf_src_node *nsn = NULL;
4898 struct tcphdr *th = pd->hdr.tcp;
4899 struct udphdr *uh = pd->hdr.udp;
4900 u_short reason;
4901 int rewrite = 0, hdrlen = 0;
4902 int tag = -1;
4903 unsigned int rtableid = IFSCOPE_NONE;
4904 int asd = 0;
4905 int match = 0;
4906 int state_icmp = 0;
4907 u_int16_t mss = tcp_mssdflt;
4908 u_int8_t icmptype = 0, icmpcode = 0;
4909 #if SKYWALK
4910 netns_token nstoken = NULL;
4911 #endif
4912
4913 struct pf_grev1_hdr *grev1 = pd->hdr.grev1;
4914 union pf_state_xport bxport, bdxport, nxport, sxport, dxport;
4915 struct pf_state_key psk;
4916
4917 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
4918
4919 PD_CLEAR_STATE_FLOWID(pd);
4920
4921 if (direction == PF_IN && pf_check_congestion(ifq)) {
4922 REASON_SET(&reason, PFRES_CONGEST);
4923 return PF_DROP;
4924 }
4925
4926 hdrlen = 0;
4927 sxport.spi = 0;
4928 dxport.spi = 0;
4929 nxport.spi = 0;
4930
4931 switch (pd->proto) {
4932 case IPPROTO_TCP:
4933 sxport.port = th->th_sport;
4934 dxport.port = th->th_dport;
4935 hdrlen = sizeof(*th);
4936 break;
4937 case IPPROTO_UDP:
4938 sxport.port = uh->uh_sport;
4939 dxport.port = uh->uh_dport;
4940 hdrlen = sizeof(*uh);
4941 break;
4942 #if INET
4943 case IPPROTO_ICMP:
4944 if (pd->af != AF_INET) {
4945 break;
4946 }
4947 sxport.port = dxport.port = pd->hdr.icmp->icmp_id;
4948 hdrlen = ICMP_MINLEN;
4949 icmptype = pd->hdr.icmp->icmp_type;
4950 icmpcode = pd->hdr.icmp->icmp_code;
4951
4952 if (ICMP_ERRORTYPE(icmptype)) {
4953 state_icmp++;
4954 }
4955 break;
4956 #endif /* INET */
4957 case IPPROTO_ICMPV6:
4958 if (pd->af != AF_INET6) {
4959 break;
4960 }
4961 sxport.port = dxport.port = pd->hdr.icmp6->icmp6_id;
4962 hdrlen = sizeof(*pd->hdr.icmp6);
4963 icmptype = pd->hdr.icmp6->icmp6_type;
4964 icmpcode = pd->hdr.icmp6->icmp6_code;
4965
4966 if (ICMP6_ERRORTYPE(icmptype)) {
4967 state_icmp++;
4968 }
4969 break;
4970 case IPPROTO_GRE:
4971 if (pd->proto_variant == PF_GRE_PPTP_VARIANT) {
4972 sxport.call_id = dxport.call_id =
4973 pd->hdr.grev1->call_id;
4974 hdrlen = sizeof(*pd->hdr.grev1);
4975 }
4976 break;
4977 case IPPROTO_ESP:
4978 sxport.spi = 0;
4979 dxport.spi = pd->hdr.esp->spi;
4980 hdrlen = sizeof(*pd->hdr.esp);
4981 break;
4982 }
4983
4984 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
4985
4986 bxport = sxport;
4987 bdxport = dxport;
4988
4989 if (direction == PF_OUT) {
4990 nxport = sxport;
4991 } else {
4992 nxport = dxport;
4993 }
4994
4995 /* check packet for BINAT/NAT/RDR */
4996 if ((nr = pf_get_translation_aux(pd, pbuf, off, direction, kif, &nsn,
4997 saddr, &sxport, daddr, &dxport, &nxport
4998 #if SKYWALK
4999 , &nstoken
5000 #endif
5001 )) != NULL) {
5002 int ua;
5003 u_int16_t dport;
5004
5005 if (pd->af != pd->naf) {
5006 ua = 0;
5007 } else {
5008 ua = 1;
5009 }
5010
5011 PF_ACPY(&pd->baddr, saddr, af);
5012 PF_ACPY(&pd->bdaddr, daddr, af);
5013
5014 switch (pd->proto) {
5015 case IPPROTO_TCP:
5016 if (pd->af != pd->naf ||
5017 PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5018 pf_change_ap(direction, pd->mp, saddr,
5019 &th->th_sport, pd->ip_sum, &th->th_sum,
5020 &pd->naddr, nxport.port, 0, af,
5021 pd->naf, ua);
5022 sxport.port = th->th_sport;
5023 }
5024
5025 if (pd->af != pd->naf ||
5026 PF_ANEQ(daddr, &pd->ndaddr, pd->af) ||
5027 (nr && (nr->action == PF_RDR) &&
5028 (th->th_dport != nxport.port))) {
5029 if (nr && nr->action == PF_RDR) {
5030 dport = nxport.port;
5031 } else {
5032 dport = th->th_dport;
5033 }
5034 pf_change_ap(direction, pd->mp, daddr,
5035 &th->th_dport, pd->ip_sum,
5036 &th->th_sum, &pd->ndaddr,
5037 dport, 0, af, pd->naf, ua);
5038 dxport.port = th->th_dport;
5039 }
5040 rewrite++;
5041 break;
5042
5043 case IPPROTO_UDP:
5044 if (pd->af != pd->naf ||
5045 PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5046 pf_change_ap(direction, pd->mp, saddr,
5047 &uh->uh_sport, pd->ip_sum,
5048 &uh->uh_sum, &pd->naddr,
5049 nxport.port, 1, af, pd->naf, ua);
5050 sxport.port = uh->uh_sport;
5051 }
5052
5053 if (pd->af != pd->naf ||
5054 PF_ANEQ(daddr, &pd->ndaddr, pd->af) ||
5055 (nr && (nr->action == PF_RDR) &&
5056 (uh->uh_dport != nxport.port))) {
5057 if (nr && nr->action == PF_RDR) {
5058 dport = nxport.port;
5059 } else {
5060 dport = uh->uh_dport;
5061 }
5062 pf_change_ap(direction, pd->mp, daddr,
5063 &uh->uh_dport, pd->ip_sum,
5064 &uh->uh_sum, &pd->ndaddr,
5065 dport, 0, af, pd->naf, ua);
5066 dxport.port = uh->uh_dport;
5067 }
5068 rewrite++;
5069 break;
5070 #if INET
5071 case IPPROTO_ICMP:
5072 if (pd->af != AF_INET) {
5073 break;
5074 }
5075 /*
5076 * TODO:
5077 * pd->af != pd->naf not handled yet here and would be
5078 * needed for NAT46 needed to support XLAT.
5079 * Will cross the bridge when it comes.
5080 */
5081 if (PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5082 pf_change_a(&saddr->v4addr.s_addr, pd->ip_sum,
5083 pd->naddr.v4addr.s_addr, 0);
5084 pd->hdr.icmp->icmp_cksum = pf_cksum_fixup(
5085 pd->hdr.icmp->icmp_cksum, sxport.port,
5086 nxport.port, 0);
5087 pd->hdr.icmp->icmp_id = nxport.port;
5088 }
5089
5090 if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
5091 pf_change_a(&daddr->v4addr.s_addr, pd->ip_sum,
5092 pd->ndaddr.v4addr.s_addr, 0);
5093 }
5094 ++rewrite;
5095 break;
5096 #endif /* INET */
5097 case IPPROTO_ICMPV6:
5098 if (pd->af != AF_INET6) {
5099 break;
5100 }
5101
5102 if (pd->af != pd->naf ||
5103 PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5104 pf_change_addr(saddr,
5105 &pd->hdr.icmp6->icmp6_cksum,
5106 &pd->naddr, 0, pd->af, pd->naf);
5107 }
5108
5109 if (pd->af != pd->naf ||
5110 PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
5111 pf_change_addr(daddr,
5112 &pd->hdr.icmp6->icmp6_cksum,
5113 &pd->ndaddr, 0, pd->af, pd->naf);
5114 }
5115
5116 if (pd->af != pd->naf) {
5117 if (pf_translate_icmp_af(AF_INET,
5118 pd->hdr.icmp6)) {
5119 return PF_DROP;
5120 }
5121 pd->proto = IPPROTO_ICMP;
5122 }
5123 rewrite++;
5124 break;
5125 case IPPROTO_GRE:
5126 if ((direction == PF_IN) &&
5127 (pd->proto_variant == PF_GRE_PPTP_VARIANT)) {
5128 grev1->call_id = nxport.call_id;
5129 }
5130
5131 switch (pd->af) {
5132 #if INET
5133 case AF_INET:
5134 if (PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5135 pf_change_a(&saddr->v4addr.s_addr,
5136 pd->ip_sum,
5137 pd->naddr.v4addr.s_addr, 0);
5138 }
5139 if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
5140 pf_change_a(&daddr->v4addr.s_addr,
5141 pd->ip_sum,
5142 pd->ndaddr.v4addr.s_addr, 0);
5143 }
5144 break;
5145 #endif /* INET */
5146 case AF_INET6:
5147 if (PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5148 PF_ACPY(saddr, &pd->naddr, AF_INET6);
5149 }
5150 if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
5151 PF_ACPY(daddr, &pd->ndaddr, AF_INET6);
5152 }
5153 break;
5154 }
5155 ++rewrite;
5156 break;
5157 case IPPROTO_ESP:
5158 if (direction == PF_OUT) {
5159 bxport.spi = 0;
5160 }
5161
5162 switch (pd->af) {
5163 #if INET
5164 case AF_INET:
5165 if (PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5166 pf_change_a(&saddr->v4addr.s_addr,
5167 pd->ip_sum, pd->naddr.v4addr.s_addr, 0);
5168 }
5169 if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
5170 pf_change_a(&daddr->v4addr.s_addr,
5171 pd->ip_sum,
5172 pd->ndaddr.v4addr.s_addr, 0);
5173 }
5174 break;
5175 #endif /* INET */
5176 case AF_INET6:
5177 if (PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5178 PF_ACPY(saddr, &pd->naddr, AF_INET6);
5179 }
5180 if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
5181 PF_ACPY(daddr, &pd->ndaddr, AF_INET6);
5182 }
5183 break;
5184 }
5185 break;
5186 default:
5187 switch (pd->af) {
5188 #if INET
5189 case AF_INET:
5190 if ((pd->naf != AF_INET) ||
5191 (PF_ANEQ(saddr, &pd->naddr, pd->af))) {
5192 pf_change_addr(saddr, pd->ip_sum,
5193 &pd->naddr, 0, af, pd->naf);
5194 }
5195
5196 if ((pd->naf != AF_INET) ||
5197 (PF_ANEQ(daddr, &pd->ndaddr, pd->af))) {
5198 pf_change_addr(daddr, pd->ip_sum,
5199 &pd->ndaddr, 0, af, pd->naf);
5200 }
5201 break;
5202 #endif /* INET */
5203 case AF_INET6:
5204 if (PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5205 PF_ACPY(saddr, &pd->naddr, af);
5206 }
5207 if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
5208 PF_ACPY(daddr, &pd->ndaddr, af);
5209 }
5210 break;
5211 }
5212 break;
5213 }
5214
5215 if (nr->natpass) {
5216 r = NULL;
5217 }
5218 pd->nat_rule = nr;
5219 pd->af = pd->naf;
5220 } else {
5221 #if SKYWALK
5222 VERIFY(!NETNS_TOKEN_VALID(&nstoken));
5223 #endif
5224 }
5225
5226 if (nr && nr->tag > 0) {
5227 tag = nr->tag;
5228 }
5229
5230 while (r != NULL) {
5231 r->evaluations++;
5232 if (pfi_kif_match(r->kif, kif) == r->ifnot) {
5233 r = r->skip[PF_SKIP_IFP].ptr;
5234 } else if (r->direction && r->direction != direction) {
5235 r = r->skip[PF_SKIP_DIR].ptr;
5236 } else if (r->af && r->af != pd->af) {
5237 r = r->skip[PF_SKIP_AF].ptr;
5238 } else if (r->proto && r->proto != pd->proto) {
5239 r = r->skip[PF_SKIP_PROTO].ptr;
5240 } else if (PF_MISMATCHAW(&r->src.addr, saddr, pd->af,
5241 r->src.neg, kif)) {
5242 r = r->skip[PF_SKIP_SRC_ADDR].ptr;
5243 }
5244 /* tcp/udp only. port_op always 0 in other cases */
5245 else if (r->proto == pd->proto &&
5246 (r->proto == IPPROTO_TCP || r->proto == IPPROTO_UDP) &&
5247 r->src.xport.range.op &&
5248 !pf_match_port(r->src.xport.range.op,
5249 r->src.xport.range.port[0], r->src.xport.range.port[1],
5250 th->th_sport)) {
5251 r = r->skip[PF_SKIP_SRC_PORT].ptr;
5252 } else if (PF_MISMATCHAW(&r->dst.addr, daddr, pd->af,
5253 r->dst.neg, NULL)) {
5254 r = r->skip[PF_SKIP_DST_ADDR].ptr;
5255 }
5256 /* tcp/udp only. port_op always 0 in other cases */
5257 else if (r->proto == pd->proto &&
5258 (r->proto == IPPROTO_TCP || r->proto == IPPROTO_UDP) &&
5259 r->dst.xport.range.op &&
5260 !pf_match_port(r->dst.xport.range.op,
5261 r->dst.xport.range.port[0], r->dst.xport.range.port[1],
5262 th->th_dport)) {
5263 r = r->skip[PF_SKIP_DST_PORT].ptr;
5264 }
5265 /* icmp only. type always 0 in other cases */
5266 else if (r->type && r->type != icmptype + 1) {
5267 r = TAILQ_NEXT(r, entries);
5268 }
5269 /* icmp only. type always 0 in other cases */
5270 else if (r->code && r->code != icmpcode + 1) {
5271 r = TAILQ_NEXT(r, entries);
5272 } else if ((r->rule_flag & PFRULE_TOS) && r->tos &&
5273 !(r->tos & pd->tos)) {
5274 r = TAILQ_NEXT(r, entries);
5275 } else if ((r->rule_flag & PFRULE_DSCP) && r->tos &&
5276 !(r->tos & (pd->tos & DSCP_MASK))) {
5277 r = TAILQ_NEXT(r, entries);
5278 } else if ((r->rule_flag & PFRULE_SC) && r->tos &&
5279 ((r->tos & SCIDX_MASK) != pd->sc)) {
5280 r = TAILQ_NEXT(r, entries);
5281 } else if (r->rule_flag & PFRULE_FRAGMENT) {
5282 r = TAILQ_NEXT(r, entries);
5283 } else if (pd->proto == IPPROTO_TCP &&
5284 (r->flagset & th->th_flags) != r->flags) {
5285 r = TAILQ_NEXT(r, entries);
5286 }
5287 /* tcp/udp only. uid.op always 0 in other cases */
5288 else if (r->uid.op && (pd->lookup.done || ((void)(pd->lookup.done =
5289 pf_socket_lookup(direction, pd)), 1)) &&
5290 !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
5291 pd->lookup.uid)) {
5292 r = TAILQ_NEXT(r, entries);
5293 }
5294 /* tcp/udp only. gid.op always 0 in other cases */
5295 else if (r->gid.op && (pd->lookup.done || ((void)(pd->lookup.done =
5296 pf_socket_lookup(direction, pd)), 1)) &&
5297 !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
5298 pd->lookup.gid)) {
5299 r = TAILQ_NEXT(r, entries);
5300 } else if (r->prob && r->prob <= (RandomULong() % (UINT_MAX - 1) + 1)) {
5301 r = TAILQ_NEXT(r, entries);
5302 } else if (r->match_tag && !pf_match_tag(r, pd->pf_mtag, &tag)) {
5303 r = TAILQ_NEXT(r, entries);
5304 } else if (r->os_fingerprint != PF_OSFP_ANY &&
5305 (pd->proto != IPPROTO_TCP || !pf_osfp_match(
5306 pf_osfp_fingerprint(pd, pbuf, off, th),
5307 r->os_fingerprint))) {
5308 r = TAILQ_NEXT(r, entries);
5309 } else {
5310 if (r->tag) {
5311 tag = r->tag;
5312 }
5313 if (PF_RTABLEID_IS_VALID(r->rtableid)) {
5314 rtableid = r->rtableid;
5315 }
5316 if (r->anchor == NULL) {
5317 match = 1;
5318 *rm = r;
5319 *am = a;
5320 *rsm = ruleset;
5321 if ((*rm)->quick) {
5322 break;
5323 }
5324 r = TAILQ_NEXT(r, entries);
5325 } else {
5326 pf_step_into_anchor(&asd, &ruleset,
5327 PF_RULESET_FILTER, &r, &a, &match);
5328 }
5329 }
5330 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
5331 PF_RULESET_FILTER, &r, &a, &match)) {
5332 break;
5333 }
5334 }
5335 r = *rm;
5336 a = *am;
5337 ruleset = *rsm;
5338
5339 REASON_SET(&reason, PFRES_MATCH);
5340
5341 if (r->log || (nr != NULL && nr->log)) {
5342 if (rewrite > 0) {
5343 if (rewrite < off + hdrlen) {
5344 rewrite = off + hdrlen;
5345 }
5346
5347 if (pf_lazy_makewritable(pd, pbuf, rewrite) == NULL) {
5348 REASON_SET(&reason, PFRES_MEMORY);
5349 #if SKYWALK
5350 netns_release(&nstoken);
5351 #endif
5352 return PF_DROP;
5353 }
5354
5355 pbuf_copy_back(pbuf, off, hdrlen, pd->hdr.any);
5356 }
5357 PFLOG_PACKET(kif, h, pbuf, pd->af, direction, reason,
5358 r->log ? r : nr, a, ruleset, pd);
5359 }
5360
5361 if ((r->action == PF_DROP) &&
5362 ((r->rule_flag & PFRULE_RETURNRST) ||
5363 (r->rule_flag & PFRULE_RETURNICMP) ||
5364 (r->rule_flag & PFRULE_RETURN))) {
5365 /* undo NAT changes, if they have taken place */
5366 /* XXX For NAT64 we are not reverting the changes */
5367 if (nr != NULL && nr->action != PF_NAT64) {
5368 if (direction == PF_OUT) {
5369 pd->af = af;
5370 switch (pd->proto) {
5371 case IPPROTO_TCP:
5372 pf_change_ap(direction, pd->mp, saddr,
5373 &th->th_sport, pd->ip_sum,
5374 &th->th_sum, &pd->baddr,
5375 bxport.port, 0, af, pd->af, 1);
5376 sxport.port = th->th_sport;
5377 rewrite++;
5378 break;
5379 case IPPROTO_UDP:
5380 pf_change_ap(direction, pd->mp, saddr,
5381 &pd->hdr.udp->uh_sport, pd->ip_sum,
5382 &pd->hdr.udp->uh_sum, &pd->baddr,
5383 bxport.port, 1, af, pd->af, 1);
5384 sxport.port = pd->hdr.udp->uh_sport;
5385 rewrite++;
5386 break;
5387 case IPPROTO_ICMP:
5388 case IPPROTO_ICMPV6:
5389 /* nothing! */
5390 break;
5391 case IPPROTO_GRE:
5392 PF_ACPY(&pd->baddr, saddr, af);
5393 ++rewrite;
5394 switch (af) {
5395 #if INET
5396 case AF_INET:
5397 pf_change_a(&saddr->v4addr.s_addr,
5398 pd->ip_sum,
5399 pd->baddr.v4addr.s_addr, 0);
5400 break;
5401 #endif /* INET */
5402 case AF_INET6:
5403 PF_ACPY(saddr, &pd->baddr,
5404 AF_INET6);
5405 break;
5406 }
5407 break;
5408 case IPPROTO_ESP:
5409 PF_ACPY(&pd->baddr, saddr, af);
5410 switch (af) {
5411 #if INET
5412 case AF_INET:
5413 pf_change_a(&saddr->v4addr.s_addr,
5414 pd->ip_sum,
5415 pd->baddr.v4addr.s_addr, 0);
5416 break;
5417 #endif /* INET */
5418 case AF_INET6:
5419 PF_ACPY(saddr, &pd->baddr,
5420 AF_INET6);
5421 break;
5422 }
5423 break;
5424 default:
5425 switch (af) {
5426 case AF_INET:
5427 pf_change_a(&saddr->v4addr.s_addr,
5428 pd->ip_sum,
5429 pd->baddr.v4addr.s_addr, 0);
5430 break;
5431 case AF_INET6:
5432 PF_ACPY(saddr, &pd->baddr, af);
5433 break;
5434 }
5435 }
5436 } else {
5437 switch (pd->proto) {
5438 case IPPROTO_TCP:
5439 pf_change_ap(direction, pd->mp, daddr,
5440 &th->th_dport, pd->ip_sum,
5441 &th->th_sum, &pd->bdaddr,
5442 bdxport.port, 0, af, pd->af, 1);
5443 dxport.port = th->th_dport;
5444 rewrite++;
5445 break;
5446 case IPPROTO_UDP:
5447 pf_change_ap(direction, pd->mp, daddr,
5448 &pd->hdr.udp->uh_dport, pd->ip_sum,
5449 &pd->hdr.udp->uh_sum, &pd->bdaddr,
5450 bdxport.port, 1, af, pd->af, 1);
5451 dxport.port = pd->hdr.udp->uh_dport;
5452 rewrite++;
5453 break;
5454 case IPPROTO_ICMP:
5455 case IPPROTO_ICMPV6:
5456 /* nothing! */
5457 break;
5458 case IPPROTO_GRE:
5459 if (pd->proto_variant ==
5460 PF_GRE_PPTP_VARIANT) {
5461 grev1->call_id =
5462 bdxport.call_id;
5463 }
5464 ++rewrite;
5465 switch (af) {
5466 #if INET
5467 case AF_INET:
5468 pf_change_a(&daddr->v4addr.s_addr,
5469 pd->ip_sum,
5470 pd->bdaddr.v4addr.s_addr, 0);
5471 break;
5472 #endif /* INET */
5473 case AF_INET6:
5474 PF_ACPY(daddr, &pd->bdaddr,
5475 AF_INET6);
5476 break;
5477 }
5478 break;
5479 case IPPROTO_ESP:
5480 switch (af) {
5481 #if INET
5482 case AF_INET:
5483 pf_change_a(&daddr->v4addr.s_addr,
5484 pd->ip_sum,
5485 pd->bdaddr.v4addr.s_addr, 0);
5486 break;
5487 #endif /* INET */
5488 case AF_INET6:
5489 PF_ACPY(daddr, &pd->bdaddr,
5490 AF_INET6);
5491 break;
5492 }
5493 break;
5494 default:
5495 switch (af) {
5496 case AF_INET:
5497 pf_change_a(&daddr->v4addr.s_addr,
5498 pd->ip_sum,
5499 pd->bdaddr.v4addr.s_addr, 0);
5500 break;
5501 case AF_INET6:
5502 PF_ACPY(daddr, &pd->bdaddr, af);
5503 break;
5504 }
5505 }
5506 }
5507 }
5508 if (pd->proto == IPPROTO_TCP &&
5509 ((r->rule_flag & PFRULE_RETURNRST) ||
5510 (r->rule_flag & PFRULE_RETURN)) &&
5511 !(th->th_flags & TH_RST)) {
5512 u_int32_t ack = ntohl(th->th_seq) + pd->p_len;
5513 int len = 0;
5514 struct ip *h4;
5515 struct ip6_hdr *h6;
5516
5517 switch (pd->af) {
5518 case AF_INET:
5519 h4 = pbuf->pb_data;
5520 len = ntohs(h4->ip_len) - off;
5521 break;
5522 case AF_INET6:
5523 h6 = pbuf->pb_data;
5524 len = ntohs(h6->ip6_plen) -
5525 (off - sizeof(*h6));
5526 break;
5527 }
5528
5529 if (pf_check_proto_cksum(pbuf, off, len, IPPROTO_TCP,
5530 pd->af)) {
5531 REASON_SET(&reason, PFRES_PROTCKSUM);
5532 } else {
5533 if (th->th_flags & TH_SYN) {
5534 ack++;
5535 }
5536 if (th->th_flags & TH_FIN) {
5537 ack++;
5538 }
5539 pf_send_tcp(r, pd->af, pd->dst,
5540 pd->src, th->th_dport, th->th_sport,
5541 ntohl(th->th_ack), ack, TH_RST | TH_ACK, 0, 0,
5542 r->return_ttl, 1, 0, pd->eh, kif->pfik_ifp);
5543 }
5544 } else if (pd->proto != IPPROTO_ICMP && pd->af == AF_INET &&
5545 pd->proto != IPPROTO_ESP && pd->proto != IPPROTO_AH &&
5546 r->return_icmp) {
5547 pf_send_icmp(pbuf, r->return_icmp >> 8,
5548 r->return_icmp & 255, pd->af, r);
5549 } else if (pd->proto != IPPROTO_ICMPV6 && af == AF_INET6 &&
5550 pd->proto != IPPROTO_ESP && pd->proto != IPPROTO_AH &&
5551 r->return_icmp6) {
5552 pf_send_icmp(pbuf, r->return_icmp6 >> 8,
5553 r->return_icmp6 & 255, pd->af, r);
5554 }
5555 }
5556
5557 if (r->action == PF_DROP) {
5558 #if SKYWALK
5559 netns_release(&nstoken);
5560 #endif
5561 return PF_DROP;
5562 }
5563
5564 /* prepare state key, for flowhash and/or the state (if created) */
5565 bzero(&psk, sizeof(psk));
5566 psk.proto = pd->proto;
5567 psk.direction = direction;
5568 if (pd->proto == IPPROTO_UDP) {
5569 if (ntohs(pd->hdr.udp->uh_sport) == PF_IKE_PORT &&
5570 ntohs(pd->hdr.udp->uh_dport) == PF_IKE_PORT) {
5571 psk.proto_variant = PF_EXTFILTER_APD;
5572 } else {
5573 psk.proto_variant = nr ? nr->extfilter : r->extfilter;
5574 if (psk.proto_variant < PF_EXTFILTER_APD) {
5575 psk.proto_variant = PF_EXTFILTER_APD;
5576 }
5577 }
5578 } else if (pd->proto == IPPROTO_GRE) {
5579 psk.proto_variant = pd->proto_variant;
5580 }
5581 if (direction == PF_OUT) {
5582 psk.af_gwy = af;
5583 PF_ACPY(&psk.gwy.addr, saddr, af);
5584 PF_ACPY(&psk.ext_gwy.addr, daddr, af);
5585 switch (pd->proto) {
5586 case IPPROTO_ESP:
5587 psk.gwy.xport.spi = 0;
5588 psk.ext_gwy.xport.spi = pd->hdr.esp->spi;
5589 break;
5590 case IPPROTO_ICMP:
5591 case IPPROTO_ICMPV6:
5592 /*
5593 * NAT64 requires protocol translation between ICMPv4
5594 * and ICMPv6. TCP and UDP do not require protocol
5595 * translation. To avoid adding complexity just to
5596 * handle ICMP(v4addr/v6addr), we always lookup for
5597 * proto = IPPROTO_ICMP on both LAN and WAN side
5598 */
5599 psk.proto = IPPROTO_ICMP;
5600 psk.gwy.xport.port = nxport.port;
5601 psk.ext_gwy.xport.spi = 0;
5602 break;
5603 default:
5604 psk.gwy.xport = sxport;
5605 psk.ext_gwy.xport = dxport;
5606 break;
5607 }
5608 psk.af_lan = af;
5609 if (nr != NULL) {
5610 PF_ACPY(&psk.lan.addr, &pd->baddr, af);
5611 psk.lan.xport = bxport;
5612 PF_ACPY(&psk.ext_lan.addr, &pd->bdaddr, af);
5613 psk.ext_lan.xport = bdxport;
5614 } else {
5615 PF_ACPY(&psk.lan.addr, &psk.gwy.addr, af);
5616 psk.lan.xport = psk.gwy.xport;
5617 PF_ACPY(&psk.ext_lan.addr, &psk.ext_gwy.addr, af);
5618 psk.ext_lan.xport = psk.ext_gwy.xport;
5619 }
5620 } else {
5621 psk.af_lan = af;
5622 if (nr && nr->action == PF_NAT64) {
5623 PF_ACPY(&psk.lan.addr, &pd->baddr, af);
5624 PF_ACPY(&psk.ext_lan.addr, &pd->bdaddr, af);
5625 } else {
5626 PF_ACPY(&psk.lan.addr, daddr, af);
5627 PF_ACPY(&psk.ext_lan.addr, saddr, af);
5628 }
5629 switch (pd->proto) {
5630 case IPPROTO_ICMP:
5631 case IPPROTO_ICMPV6:
5632 /*
5633 * NAT64 requires protocol translation between ICMPv4
5634 * and ICMPv6. TCP and UDP do not require protocol
5635 * translation. To avoid adding complexity just to
5636 * handle ICMP(v4addr/v6addr), we always lookup for
5637 * proto = IPPROTO_ICMP on both LAN and WAN side
5638 */
5639 psk.proto = IPPROTO_ICMP;
5640 if (nr && nr->action == PF_NAT64) {
5641 psk.lan.xport = bxport;
5642 psk.ext_lan.xport = bxport;
5643 } else {
5644 psk.lan.xport = nxport;
5645 psk.ext_lan.xport.spi = 0;
5646 }
5647 break;
5648 case IPPROTO_ESP:
5649 psk.ext_lan.xport.spi = 0;
5650 psk.lan.xport.spi = pd->hdr.esp->spi;
5651 break;
5652 default:
5653 if (nr != NULL) {
5654 if (nr->action == PF_NAT64) {
5655 psk.lan.xport = bxport;
5656 psk.ext_lan.xport = bdxport;
5657 } else {
5658 psk.lan.xport = dxport;
5659 psk.ext_lan.xport = sxport;
5660 }
5661 } else {
5662 psk.lan.xport = dxport;
5663 psk.ext_lan.xport = sxport;
5664 }
5665 break;
5666 }
5667 psk.af_gwy = pd->naf;
5668 if (nr != NULL) {
5669 if (nr->action == PF_NAT64) {
5670 PF_ACPY(&psk.gwy.addr, &pd->naddr, pd->naf);
5671 PF_ACPY(&psk.ext_gwy.addr, &pd->ndaddr,
5672 pd->naf);
5673 if ((pd->proto == IPPROTO_ICMPV6) ||
5674 (pd->proto == IPPROTO_ICMP)) {
5675 psk.gwy.xport = nxport;
5676 psk.ext_gwy.xport = nxport;
5677 } else {
5678 psk.gwy.xport = sxport;
5679 psk.ext_gwy.xport = dxport;
5680 }
5681 } else {
5682 PF_ACPY(&psk.gwy.addr, &pd->bdaddr, af);
5683 psk.gwy.xport = bdxport;
5684 PF_ACPY(&psk.ext_gwy.addr, saddr, af);
5685 psk.ext_gwy.xport = sxport;
5686 }
5687 } else {
5688 PF_ACPY(&psk.gwy.addr, &psk.lan.addr, af);
5689 psk.gwy.xport = psk.lan.xport;
5690 PF_ACPY(&psk.ext_gwy.addr, &psk.ext_lan.addr, af);
5691 psk.ext_gwy.xport = psk.ext_lan.xport;
5692 }
5693 }
5694 if (pd->pktflags & PKTF_FLOW_ID) {
5695 /* flow hash was already computed outside of PF */
5696 psk.flowsrc = pd->flowsrc;
5697 psk.flowhash = pd->flowhash;
5698 } else {
5699 /*
5700 * Allocation of flow identifier is deferred until a PF state
5701 * creation is needed for this flow.
5702 */
5703 pd->pktflags &= ~PKTF_FLOW_ADV;
5704 pd->flowhash = 0;
5705 }
5706
5707 if (__improbable(pf_tag_packet(pbuf, pd->pf_mtag, tag, rtableid, pd))) {
5708 REASON_SET(&reason, PFRES_MEMORY);
5709 #if SKYWALK
5710 netns_release(&nstoken);
5711 #endif
5712 return PF_DROP;
5713 }
5714
5715 if (!state_icmp && (r->keep_state || nr != NULL ||
5716 (pd->flags & PFDESC_TCP_NORM))) {
5717 /* create new state */
5718 struct pf_state *s = NULL;
5719 struct pf_state_key *sk = NULL;
5720 struct pf_src_node *sn = NULL;
5721 struct pf_ike_hdr ike;
5722
5723 if (pd->proto == IPPROTO_UDP) {
5724 size_t plen = pbuf->pb_packet_len - off - sizeof(*uh);
5725
5726 if (ntohs(uh->uh_sport) == PF_IKE_PORT &&
5727 ntohs(uh->uh_dport) == PF_IKE_PORT &&
5728 plen >= PF_IKE_PACKET_MINSIZE) {
5729 if (plen > PF_IKE_PACKET_MINSIZE) {
5730 plen = PF_IKE_PACKET_MINSIZE;
5731 }
5732 pbuf_copy_data(pbuf, off + sizeof(*uh), plen,
5733 &ike);
5734 }
5735 }
5736
5737 if (nr != NULL && pd->proto == IPPROTO_ESP &&
5738 direction == PF_OUT) {
5739 struct pf_state_key_cmp sk0;
5740 struct pf_state *s0;
5741
5742 /*
5743 * <[email protected]>
5744 * This squelches state creation if the external
5745 * address matches an existing incomplete state with a
5746 * different internal address. Only one 'blocking'
5747 * partial state is allowed for each external address.
5748 */
5749 #if SKYWALK
5750 /*
5751 * XXXSCW:
5752 *
5753 * It's not clear how this impacts netns. The original
5754 * state will hold the port reservation token but what
5755 * happens to other "Cone NAT" states when the first is
5756 * torn down?
5757 */
5758 #endif
5759 memset(&sk0, 0, sizeof(sk0));
5760 sk0.af_gwy = pd->af;
5761 sk0.proto = IPPROTO_ESP;
5762 PF_ACPY(&sk0.gwy.addr, saddr, sk0.af_gwy);
5763 PF_ACPY(&sk0.ext_gwy.addr, daddr, sk0.af_gwy);
5764 s0 = pf_find_state(kif, &sk0, PF_IN);
5765
5766 if (s0 && PF_ANEQ(&s0->state_key->lan.addr,
5767 pd->src, pd->af)) {
5768 nsn = 0;
5769 goto cleanup;
5770 }
5771 }
5772
5773 /* check maximums */
5774 if (r->max_states && (r->states >= r->max_states)) {
5775 pf_status.lcounters[LCNT_STATES]++;
5776 REASON_SET(&reason, PFRES_MAXSTATES);
5777 goto cleanup;
5778 }
5779 /* src node for filter rule */
5780 if ((r->rule_flag & PFRULE_SRCTRACK ||
5781 r->rpool.opts & PF_POOL_STICKYADDR) &&
5782 pf_insert_src_node(&sn, r, saddr, af) != 0) {
5783 REASON_SET(&reason, PFRES_SRCLIMIT);
5784 goto cleanup;
5785 }
5786 /* src node for translation rule */
5787 if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
5788 ((direction == PF_OUT &&
5789 nr->action != PF_RDR &&
5790 pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
5791 (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) {
5792 REASON_SET(&reason, PFRES_SRCLIMIT);
5793 goto cleanup;
5794 }
5795 s = pool_get(&pf_state_pl, PR_WAITOK);
5796 if (s == NULL) {
5797 REASON_SET(&reason, PFRES_MEMORY);
5798 cleanup:
5799 if (sn != NULL && sn->states == 0 && sn->expire == 0) {
5800 RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
5801 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
5802 pf_status.src_nodes--;
5803 pool_put(&pf_src_tree_pl, sn);
5804 }
5805 if (nsn != sn && nsn != NULL && nsn->states == 0 &&
5806 nsn->expire == 0) {
5807 RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
5808 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
5809 pf_status.src_nodes--;
5810 pool_put(&pf_src_tree_pl, nsn);
5811 }
5812 if (s != NULL) {
5813 pf_detach_state(s, 0);
5814 } else if (sk != NULL) {
5815 if (sk->app_state) {
5816 pool_put(&pf_app_state_pl,
5817 sk->app_state);
5818 }
5819 pf_state_key_release_flowid(sk);
5820 pool_put(&pf_state_key_pl, sk);
5821 }
5822 #if SKYWALK
5823 netns_release(&nstoken);
5824 #endif
5825 return PF_DROP;
5826 }
5827 bzero(s, sizeof(*s));
5828 TAILQ_INIT(&s->unlink_hooks);
5829 s->rule.ptr = r;
5830 s->nat_rule.ptr = nr;
5831 s->anchor.ptr = a;
5832 STATE_INC_COUNTERS(s);
5833 s->allow_opts = r->allow_opts;
5834 s->log = r->log & PF_LOG_ALL;
5835 if (nr != NULL) {
5836 s->log |= nr->log & PF_LOG_ALL;
5837 }
5838 switch (pd->proto) {
5839 case IPPROTO_TCP:
5840 s->src.seqlo = ntohl(th->th_seq);
5841 s->src.seqhi = s->src.seqlo + pd->p_len + 1;
5842 if ((th->th_flags & (TH_SYN | TH_ACK)) ==
5843 TH_SYN && r->keep_state == PF_STATE_MODULATE) {
5844 /* Generate sequence number modulator */
5845 if ((s->src.seqdiff = pf_tcp_iss(pd) -
5846 s->src.seqlo) == 0) {
5847 s->src.seqdiff = 1;
5848 }
5849 pf_change_a(&th->th_seq, &th->th_sum,
5850 htonl(s->src.seqlo + s->src.seqdiff), 0);
5851 rewrite = off + sizeof(*th);
5852 } else {
5853 s->src.seqdiff = 0;
5854 }
5855 if (th->th_flags & TH_SYN) {
5856 s->src.seqhi++;
5857 s->src.wscale = pf_get_wscale(pbuf, off,
5858 th->th_off, af);
5859 }
5860 s->src.max_win = MAX(ntohs(th->th_win), 1);
5861 if (s->src.wscale & PF_WSCALE_MASK) {
5862 /* Remove scale factor from initial window */
5863 int win = s->src.max_win;
5864 win += 1 << (s->src.wscale & PF_WSCALE_MASK);
5865 s->src.max_win = (win - 1) >>
5866 (s->src.wscale & PF_WSCALE_MASK);
5867 }
5868 if (th->th_flags & TH_FIN) {
5869 s->src.seqhi++;
5870 }
5871 s->dst.seqhi = 1;
5872 s->dst.max_win = 1;
5873 s->src.state = TCPS_SYN_SENT;
5874 s->dst.state = TCPS_CLOSED;
5875 s->timeout = PFTM_TCP_FIRST_PACKET;
5876 break;
5877 case IPPROTO_UDP:
5878 s->src.state = PFUDPS_SINGLE;
5879 s->dst.state = PFUDPS_NO_TRAFFIC;
5880 s->timeout = PFTM_UDP_FIRST_PACKET;
5881 break;
5882 case IPPROTO_ICMP:
5883 case IPPROTO_ICMPV6:
5884 s->timeout = PFTM_ICMP_FIRST_PACKET;
5885 break;
5886 case IPPROTO_GRE:
5887 s->src.state = PFGRE1S_INITIATING;
5888 s->dst.state = PFGRE1S_NO_TRAFFIC;
5889 s->timeout = PFTM_GREv1_INITIATING;
5890 break;
5891 case IPPROTO_ESP:
5892 s->src.state = PFESPS_INITIATING;
5893 s->dst.state = PFESPS_NO_TRAFFIC;
5894 s->timeout = PFTM_ESP_FIRST_PACKET;
5895 break;
5896 default:
5897 s->src.state = PFOTHERS_SINGLE;
5898 s->dst.state = PFOTHERS_NO_TRAFFIC;
5899 s->timeout = PFTM_OTHER_FIRST_PACKET;
5900 }
5901
5902 s->creation = pf_time_second();
5903 s->expire = pf_time_second();
5904
5905 if (sn != NULL) {
5906 s->src_node = sn;
5907 s->src_node->states++;
5908 VERIFY(s->src_node->states != 0);
5909 }
5910 if (nsn != NULL) {
5911 PF_ACPY(&nsn->raddr, &pd->naddr, af);
5912 s->nat_src_node = nsn;
5913 s->nat_src_node->states++;
5914 VERIFY(s->nat_src_node->states != 0);
5915 }
5916 if (pd->proto == IPPROTO_TCP) {
5917 if ((pd->flags & PFDESC_TCP_NORM) &&
5918 pf_normalize_tcp_init(pbuf, off, pd, th, &s->src,
5919 &s->dst)) {
5920 REASON_SET(&reason, PFRES_MEMORY);
5921 pf_src_tree_remove_state(s);
5922 STATE_DEC_COUNTERS(s);
5923 #if SKYWALK
5924 netns_release(&nstoken);
5925 #endif
5926 pool_put(&pf_state_pl, s);
5927 return PF_DROP;
5928 }
5929 if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub &&
5930 pf_normalize_tcp_stateful(pbuf, off, pd, &reason,
5931 th, s, &s->src, &s->dst, &rewrite)) {
5932 /* This really shouldn't happen!!! */
5933 DPFPRINTF(PF_DEBUG_URGENT,
5934 ("pf_normalize_tcp_stateful failed on "
5935 "first pkt"));
5936 #if SKYWALK
5937 netns_release(&nstoken);
5938 #endif
5939 pf_normalize_tcp_cleanup(s);
5940 pf_src_tree_remove_state(s);
5941 STATE_DEC_COUNTERS(s);
5942 pool_put(&pf_state_pl, s);
5943 return PF_DROP;
5944 }
5945 }
5946
5947 /* allocate state key and import values from psk */
5948 if (__improbable((sk = pf_alloc_state_key(s, &psk)) == NULL)) {
5949 REASON_SET(&reason, PFRES_MEMORY);
5950 /*
5951 * XXXSCW: This will leak the freshly-allocated
5952 * state structure 's'. Although it should
5953 * eventually be aged-out and removed.
5954 */
5955 goto cleanup;
5956 }
5957
5958 if (pd->flowhash == 0) {
5959 ASSERT(sk->flowhash != 0);
5960 ASSERT(sk->flowsrc != 0);
5961 pd->flowsrc = sk->flowsrc;
5962 pd->flowhash = sk->flowhash;
5963 pd->pktflags |= PKTF_FLOW_ID;
5964 pd->pktflags &= ~PKTF_FLOW_ADV;
5965 if (__improbable(pf_tag_packet(pbuf, pd->pf_mtag,
5966 tag, rtableid, pd))) {
5967 /*
5968 * this shouldn't fail as the packet tag has
5969 * already been allocated.
5970 */
5971 panic_plain("pf_tag_packet failed");
5972 }
5973 }
5974
5975 pf_set_rt_ifp(s, saddr, af); /* needs s->state_key set */
5976
5977 pbuf = pd->mp; // XXXSCW: Why?
5978
5979 if (sk->app_state == 0) {
5980 switch (pd->proto) {
5981 case IPPROTO_TCP: {
5982 u_int16_t dport = (direction == PF_OUT) ?
5983 sk->ext_gwy.xport.port : sk->gwy.xport.port;
5984
5985 if (nr != NULL &&
5986 ntohs(dport) == PF_PPTP_PORT) {
5987 struct pf_app_state *as;
5988
5989 as = pool_get(&pf_app_state_pl,
5990 PR_WAITOK);
5991 if (!as) {
5992 REASON_SET(&reason,
5993 PFRES_MEMORY);
5994 goto cleanup;
5995 }
5996
5997 bzero(as, sizeof(*as));
5998 as->handler = pf_pptp_handler;
5999 as->compare_lan_ext = 0;
6000 as->compare_ext_gwy = 0;
6001 as->u.pptp.grev1_state = 0;
6002 sk->app_state = as;
6003 (void) hook_establish(&s->unlink_hooks,
6004 0, (hook_fn_t) pf_pptp_unlink, s);
6005 }
6006 break;
6007 }
6008
6009 case IPPROTO_UDP: {
6010 if (nr != NULL &&
6011 ntohs(uh->uh_sport) == PF_IKE_PORT &&
6012 ntohs(uh->uh_dport) == PF_IKE_PORT) {
6013 struct pf_app_state *as;
6014
6015 as = pool_get(&pf_app_state_pl,
6016 PR_WAITOK);
6017 if (!as) {
6018 REASON_SET(&reason,
6019 PFRES_MEMORY);
6020 goto cleanup;
6021 }
6022
6023 bzero(as, sizeof(*as));
6024 as->compare_lan_ext = pf_ike_compare;
6025 as->compare_ext_gwy = pf_ike_compare;
6026 as->u.ike.cookie = ike.initiator_cookie;
6027 sk->app_state = as;
6028 }
6029 break;
6030 }
6031
6032 default:
6033 break;
6034 }
6035 }
6036
6037 if (__improbable(pf_insert_state(BOUND_IFACE(r, kif), s))) {
6038 if (pd->proto == IPPROTO_TCP) {
6039 pf_normalize_tcp_cleanup(s);
6040 }
6041 REASON_SET(&reason, PFRES_STATEINS);
6042 pf_src_tree_remove_state(s);
6043 STATE_DEC_COUNTERS(s);
6044 #if SKYWALK
6045 netns_release(&nstoken);
6046 #endif
6047 pool_put(&pf_state_pl, s);
6048 return PF_DROP;
6049 } else {
6050 #if SKYWALK
6051 s->nstoken = nstoken;
6052 nstoken = NULL;
6053 #endif
6054 *sm = s;
6055 }
6056 if (tag > 0) {
6057 pf_tag_ref(tag);
6058 s->tag = tag;
6059 }
6060 if (pd->proto == IPPROTO_TCP &&
6061 (th->th_flags & (TH_SYN | TH_ACK)) == TH_SYN &&
6062 r->keep_state == PF_STATE_SYNPROXY) {
6063 int ua = (sk->af_lan == sk->af_gwy) ? 1 : 0;
6064 s->src.state = PF_TCPS_PROXY_SRC;
6065 if (nr != NULL) {
6066 if (direction == PF_OUT) {
6067 pf_change_ap(direction, pd->mp, saddr,
6068 &th->th_sport, pd->ip_sum,
6069 &th->th_sum, &pd->baddr,
6070 bxport.port, 0, af, pd->af, ua);
6071 sxport.port = th->th_sport;
6072 } else {
6073 pf_change_ap(direction, pd->mp, daddr,
6074 &th->th_dport, pd->ip_sum,
6075 &th->th_sum, &pd->baddr,
6076 bxport.port, 0, af, pd->af, ua);
6077 sxport.port = th->th_dport;
6078 }
6079 }
6080 s->src.seqhi = htonl(random());
6081 /* Find mss option */
6082 mss = pf_get_mss(pbuf, off, th->th_off, af);
6083 mss = pf_calc_mss(saddr, af, mss);
6084 mss = pf_calc_mss(daddr, af, mss);
6085 s->src.mss = mss;
6086 pf_send_tcp(r, af, daddr, saddr, th->th_dport,
6087 th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
6088 TH_SYN | TH_ACK, 0, s->src.mss, 0, 1, 0, NULL, NULL);
6089 REASON_SET(&reason, PFRES_SYNPROXY);
6090 return PF_SYNPROXY_DROP;
6091 }
6092
6093 if (sk->app_state && sk->app_state->handler) {
6094 int offx = off;
6095
6096 switch (pd->proto) {
6097 case IPPROTO_TCP:
6098 offx += th->th_off << 2;
6099 break;
6100 case IPPROTO_UDP:
6101 offx += pd->hdr.udp->uh_ulen << 2;
6102 break;
6103 default:
6104 /* ALG handlers only apply to TCP and UDP rules */
6105 break;
6106 }
6107
6108 if (offx > off) {
6109 sk->app_state->handler(s, direction, offx,
6110 pd, kif);
6111 if (pd->lmw < 0) {
6112 REASON_SET(&reason, PFRES_MEMORY);
6113 return PF_DROP;
6114 }
6115 pbuf = pd->mp; // XXXSCW: Why?
6116 }
6117 }
6118 }
6119 #if SKYWALK
6120 else {
6121 netns_release(&nstoken);
6122 }
6123 #endif
6124
6125 /* copy back packet headers if we performed NAT operations */
6126 if (rewrite) {
6127 if (rewrite < off + hdrlen) {
6128 rewrite = off + hdrlen;
6129 }
6130
6131 if (pf_lazy_makewritable(pd, pd->mp, rewrite) == NULL) {
6132 REASON_SET(&reason, PFRES_MEMORY);
6133 return PF_DROP;
6134 }
6135
6136 pbuf_copy_back(pbuf, off, hdrlen, pd->hdr.any);
6137 if (af == AF_INET6 && pd->naf == AF_INET) {
6138 return pf_nat64_ipv6(pbuf, off, pd);
6139 } else if (af == AF_INET && pd->naf == AF_INET6) {
6140 return pf_nat64_ipv4(pbuf, off, pd);
6141 }
6142 }
6143
6144 return PF_PASS;
6145 }
6146
6147 boolean_t is_nlc_enabled_glb = FALSE;
6148
6149 static inline boolean_t
pf_is_dummynet_enabled(void)6150 pf_is_dummynet_enabled(void)
6151 {
6152 #if DUMMYNET
6153 if (__probable(!PF_IS_ENABLED)) {
6154 return FALSE;
6155 }
6156
6157 if (__probable(!DUMMYNET_LOADED)) {
6158 return FALSE;
6159 }
6160
6161 if (__probable(TAILQ_EMPTY(pf_main_ruleset.
6162 rules[PF_RULESET_DUMMYNET].active.ptr))) {
6163 return FALSE;
6164 }
6165
6166 return TRUE;
6167 #else
6168 return FALSE;
6169 #endif /* DUMMYNET */
6170 }
6171
6172 #if DUMMYNET
6173 /*
6174 * When pf_test_dummynet() returns PF_PASS, the rule matching parameter "rm"
6175 * remains unchanged, meaning the packet did not match a dummynet rule.
6176 * when the packet does match a dummynet rule, pf_test_dummynet() returns
6177 * PF_PASS and zero out the mbuf rule as the packet is effectively siphoned
6178 * out by dummynet.
6179 */
6180 static __attribute__((noinline)) int
pf_test_dummynet(struct pf_rule ** rm,int direction,struct pfi_kif * kif,pbuf_t ** pbuf0,struct pf_pdesc * pd,struct ip_fw_args * fwa)6181 pf_test_dummynet(struct pf_rule **rm, int direction, struct pfi_kif *kif,
6182 pbuf_t **pbuf0, struct pf_pdesc *pd, struct ip_fw_args *fwa)
6183 {
6184 pbuf_t *pbuf = *pbuf0;
6185 struct pf_rule *am = NULL;
6186 struct pf_ruleset *rsm = NULL;
6187 struct pf_addr *saddr = pd->src, *daddr = pd->dst;
6188 sa_family_t af = pd->af;
6189 struct pf_rule *r, *a = NULL;
6190 struct pf_ruleset *ruleset = NULL;
6191 struct tcphdr *th = pd->hdr.tcp;
6192 u_short reason;
6193 int hdrlen = 0;
6194 int tag = -1;
6195 unsigned int rtableid = IFSCOPE_NONE;
6196 int asd = 0;
6197 int match = 0;
6198 u_int8_t icmptype = 0, icmpcode = 0;
6199 struct ip_fw_args dnflow;
6200 struct pf_rule *prev_matching_rule = fwa ? fwa->fwa_pf_rule : NULL;
6201 int found_prev_rule = (prev_matching_rule) ? 0 : 1;
6202
6203 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
6204
6205 if (!pf_is_dummynet_enabled()) {
6206 return PF_PASS;
6207 }
6208
6209 if (kif->pfik_ifp->if_xflags & IFXF_NO_TRAFFIC_SHAPING) {
6210 return PF_PASS;
6211 }
6212
6213 bzero(&dnflow, sizeof(dnflow));
6214
6215 hdrlen = 0;
6216
6217 /* Fragments don't gave protocol headers */
6218 if (!(pd->flags & PFDESC_IP_FRAG)) {
6219 switch (pd->proto) {
6220 case IPPROTO_TCP:
6221 dnflow.fwa_id.flags = pd->hdr.tcp->th_flags;
6222 dnflow.fwa_id.dst_port = ntohs(pd->hdr.tcp->th_dport);
6223 dnflow.fwa_id.src_port = ntohs(pd->hdr.tcp->th_sport);
6224 hdrlen = sizeof(*th);
6225 break;
6226 case IPPROTO_UDP:
6227 dnflow.fwa_id.dst_port = ntohs(pd->hdr.udp->uh_dport);
6228 dnflow.fwa_id.src_port = ntohs(pd->hdr.udp->uh_sport);
6229 hdrlen = sizeof(*pd->hdr.udp);
6230 break;
6231 #if INET
6232 case IPPROTO_ICMP:
6233 if (af != AF_INET) {
6234 break;
6235 }
6236 hdrlen = ICMP_MINLEN;
6237 icmptype = pd->hdr.icmp->icmp_type;
6238 icmpcode = pd->hdr.icmp->icmp_code;
6239 break;
6240 #endif /* INET */
6241 case IPPROTO_ICMPV6:
6242 if (af != AF_INET6) {
6243 break;
6244 }
6245 hdrlen = sizeof(*pd->hdr.icmp6);
6246 icmptype = pd->hdr.icmp6->icmp6_type;
6247 icmpcode = pd->hdr.icmp6->icmp6_code;
6248 break;
6249 case IPPROTO_GRE:
6250 if (pd->proto_variant == PF_GRE_PPTP_VARIANT) {
6251 hdrlen = sizeof(*pd->hdr.grev1);
6252 }
6253 break;
6254 case IPPROTO_ESP:
6255 hdrlen = sizeof(*pd->hdr.esp);
6256 break;
6257 }
6258 }
6259
6260 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_DUMMYNET].active.ptr);
6261
6262 while (r != NULL) {
6263 r->evaluations++;
6264 if (pfi_kif_match(r->kif, kif) == r->ifnot) {
6265 r = r->skip[PF_SKIP_IFP].ptr;
6266 } else if (r->direction && r->direction != direction) {
6267 r = r->skip[PF_SKIP_DIR].ptr;
6268 } else if (r->af && r->af != af) {
6269 r = r->skip[PF_SKIP_AF].ptr;
6270 } else if (r->proto && r->proto != pd->proto) {
6271 r = r->skip[PF_SKIP_PROTO].ptr;
6272 } else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
6273 r->src.neg, kif)) {
6274 r = r->skip[PF_SKIP_SRC_ADDR].ptr;
6275 }
6276 /* tcp/udp only. port_op always 0 in other cases */
6277 else if (r->proto == pd->proto &&
6278 (r->proto == IPPROTO_TCP || r->proto == IPPROTO_UDP) &&
6279 ((pd->flags & PFDESC_IP_FRAG) ||
6280 ((r->src.xport.range.op &&
6281 !pf_match_port(r->src.xport.range.op,
6282 r->src.xport.range.port[0], r->src.xport.range.port[1],
6283 th->th_sport))))) {
6284 r = r->skip[PF_SKIP_SRC_PORT].ptr;
6285 } else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
6286 r->dst.neg, NULL)) {
6287 r = r->skip[PF_SKIP_DST_ADDR].ptr;
6288 }
6289 /* tcp/udp only. port_op always 0 in other cases */
6290 else if (r->proto == pd->proto &&
6291 (r->proto == IPPROTO_TCP || r->proto == IPPROTO_UDP) &&
6292 r->dst.xport.range.op &&
6293 ((pd->flags & PFDESC_IP_FRAG) ||
6294 !pf_match_port(r->dst.xport.range.op,
6295 r->dst.xport.range.port[0], r->dst.xport.range.port[1],
6296 th->th_dport))) {
6297 r = r->skip[PF_SKIP_DST_PORT].ptr;
6298 }
6299 /* icmp only. type always 0 in other cases */
6300 else if (r->type &&
6301 ((pd->flags & PFDESC_IP_FRAG) ||
6302 r->type != icmptype + 1)) {
6303 r = TAILQ_NEXT(r, entries);
6304 }
6305 /* icmp only. type always 0 in other cases */
6306 else if (r->code &&
6307 ((pd->flags & PFDESC_IP_FRAG) ||
6308 r->code != icmpcode + 1)) {
6309 r = TAILQ_NEXT(r, entries);
6310 } else if (r->tos && !(r->tos == pd->tos)) {
6311 r = TAILQ_NEXT(r, entries);
6312 } else if (r->rule_flag & PFRULE_FRAGMENT) {
6313 r = TAILQ_NEXT(r, entries);
6314 } else if (pd->proto == IPPROTO_TCP &&
6315 ((pd->flags & PFDESC_IP_FRAG) ||
6316 (r->flagset & th->th_flags) != r->flags)) {
6317 r = TAILQ_NEXT(r, entries);
6318 } else if (r->prob && r->prob <= (RandomULong() % (UINT_MAX - 1) + 1)) {
6319 r = TAILQ_NEXT(r, entries);
6320 } else if (r->match_tag && !pf_match_tag(r, pd->pf_mtag, &tag)) {
6321 r = TAILQ_NEXT(r, entries);
6322 } else {
6323 /*
6324 * Need to go past the previous dummynet matching rule
6325 */
6326 if (r->anchor == NULL) {
6327 if (found_prev_rule) {
6328 if (r->tag) {
6329 tag = r->tag;
6330 }
6331 if (PF_RTABLEID_IS_VALID(r->rtableid)) {
6332 rtableid = r->rtableid;
6333 }
6334 match = 1;
6335 *rm = r;
6336 am = a;
6337 rsm = ruleset;
6338 if ((*rm)->quick) {
6339 break;
6340 }
6341 } else if (r == prev_matching_rule) {
6342 found_prev_rule = 1;
6343 }
6344 r = TAILQ_NEXT(r, entries);
6345 } else {
6346 pf_step_into_anchor(&asd, &ruleset,
6347 PF_RULESET_DUMMYNET, &r, &a, &match);
6348 }
6349 }
6350 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
6351 PF_RULESET_DUMMYNET, &r, &a, &match)) {
6352 break;
6353 }
6354 }
6355 r = *rm;
6356 a = am;
6357 ruleset = rsm;
6358
6359 if (!match) {
6360 return PF_PASS;
6361 }
6362
6363 REASON_SET(&reason, PFRES_DUMMYNET);
6364
6365 if (r->log) {
6366 PFLOG_PACKET(kif, h, pbuf, af, direction, reason, r,
6367 a, ruleset, pd);
6368 }
6369
6370 if (r->action == PF_NODUMMYNET) {
6371 int dirndx = (direction == PF_OUT);
6372
6373 r->packets[dirndx]++;
6374 r->bytes[dirndx] += pd->tot_len;
6375
6376 return PF_PASS;
6377 }
6378 if (pf_tag_packet(pbuf, pd->pf_mtag, tag, rtableid, pd)) {
6379 REASON_SET(&reason, PFRES_MEMORY);
6380
6381 return PF_DROP;
6382 }
6383
6384 if (r->dnpipe && ip_dn_io_ptr != NULL) {
6385 struct mbuf *m;
6386 int dirndx = (direction == PF_OUT);
6387
6388 r->packets[dirndx]++;
6389 r->bytes[dirndx] += pd->tot_len;
6390
6391 dnflow.fwa_cookie = r->dnpipe;
6392 dnflow.fwa_pf_rule = r;
6393 dnflow.fwa_id.proto = pd->proto;
6394 dnflow.fwa_flags = r->dntype;
6395 switch (af) {
6396 case AF_INET:
6397 dnflow.fwa_id.addr_type = 4;
6398 dnflow.fwa_id.src_ip = ntohl(saddr->v4addr.s_addr);
6399 dnflow.fwa_id.dst_ip = ntohl(daddr->v4addr.s_addr);
6400 break;
6401 case AF_INET6:
6402 dnflow.fwa_id.addr_type = 6;
6403 dnflow.fwa_id.src_ip6 = saddr->v6addr;
6404 dnflow.fwa_id.dst_ip6 = saddr->v6addr;
6405 break;
6406 }
6407
6408 if (fwa != NULL) {
6409 dnflow.fwa_oif = fwa->fwa_oif;
6410 dnflow.fwa_oflags = fwa->fwa_oflags;
6411 /*
6412 * Note that fwa_ro, fwa_dst and fwa_ipoa are
6413 * actually in a union so the following does work
6414 * for both IPv4 and IPv6
6415 */
6416 dnflow.fwa_ro = fwa->fwa_ro;
6417 dnflow.fwa_dst = fwa->fwa_dst;
6418 dnflow.fwa_ipoa = fwa->fwa_ipoa;
6419 dnflow.fwa_ro6_pmtu = fwa->fwa_ro6_pmtu;
6420 dnflow.fwa_origifp = fwa->fwa_origifp;
6421 dnflow.fwa_mtu = fwa->fwa_mtu;
6422 dnflow.fwa_unfragpartlen = fwa->fwa_unfragpartlen;
6423 dnflow.fwa_exthdrs = fwa->fwa_exthdrs;
6424 }
6425
6426 if (af == AF_INET) {
6427 struct ip *iphdr = pbuf->pb_data;
6428 NTOHS(iphdr->ip_len);
6429 NTOHS(iphdr->ip_off);
6430 }
6431 /*
6432 * Don't need to unlock pf_lock as NET_THREAD_HELD_PF
6433 * allows for recursive behavior
6434 */
6435 m = pbuf_to_mbuf(pbuf, TRUE);
6436 if (m != NULL) {
6437 ip_dn_io_ptr(m,
6438 dnflow.fwa_cookie, (af == AF_INET) ?
6439 ((direction == PF_IN) ? DN_TO_IP_IN : DN_TO_IP_OUT) :
6440 ((direction == PF_IN) ? DN_TO_IP6_IN : DN_TO_IP6_OUT),
6441 &dnflow);
6442 }
6443
6444 /*
6445 * The packet is siphoned out by dummynet so return a NULL
6446 * pbuf so the caller can still return success.
6447 */
6448 *pbuf0 = NULL;
6449
6450 return PF_PASS;
6451 }
6452
6453 return PF_PASS;
6454 }
6455 #endif /* DUMMYNET */
6456
6457 static __attribute__((noinline)) int
pf_test_fragment(struct pf_rule ** rm,int direction,struct pfi_kif * kif,pbuf_t * pbuf,void * h,struct pf_pdesc * pd,struct pf_rule ** am,struct pf_ruleset ** rsm)6458 pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif,
6459 pbuf_t *pbuf, void *h, struct pf_pdesc *pd, struct pf_rule **am,
6460 struct pf_ruleset **rsm)
6461 {
6462 #pragma unused(h)
6463 struct pf_rule *r, *a = NULL;
6464 struct pf_ruleset *ruleset = NULL;
6465 sa_family_t af = pd->af;
6466 u_short reason;
6467 int tag = -1;
6468 int asd = 0;
6469 int match = 0;
6470
6471 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
6472 while (r != NULL) {
6473 r->evaluations++;
6474 if (pfi_kif_match(r->kif, kif) == r->ifnot) {
6475 r = r->skip[PF_SKIP_IFP].ptr;
6476 } else if (r->direction && r->direction != direction) {
6477 r = r->skip[PF_SKIP_DIR].ptr;
6478 } else if (r->af && r->af != af) {
6479 r = r->skip[PF_SKIP_AF].ptr;
6480 } else if (r->proto && r->proto != pd->proto) {
6481 r = r->skip[PF_SKIP_PROTO].ptr;
6482 } else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
6483 r->src.neg, kif)) {
6484 r = r->skip[PF_SKIP_SRC_ADDR].ptr;
6485 } else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
6486 r->dst.neg, NULL)) {
6487 r = r->skip[PF_SKIP_DST_ADDR].ptr;
6488 } else if ((r->rule_flag & PFRULE_TOS) && r->tos &&
6489 !(r->tos & pd->tos)) {
6490 r = TAILQ_NEXT(r, entries);
6491 } else if ((r->rule_flag & PFRULE_DSCP) && r->tos &&
6492 !(r->tos & (pd->tos & DSCP_MASK))) {
6493 r = TAILQ_NEXT(r, entries);
6494 } else if ((r->rule_flag & PFRULE_SC) && r->tos &&
6495 ((r->tos & SCIDX_MASK) != pd->sc)) {
6496 r = TAILQ_NEXT(r, entries);
6497 } else if (r->os_fingerprint != PF_OSFP_ANY) {
6498 r = TAILQ_NEXT(r, entries);
6499 } else if (pd->proto == IPPROTO_UDP &&
6500 (r->src.xport.range.op || r->dst.xport.range.op)) {
6501 r = TAILQ_NEXT(r, entries);
6502 } else if (pd->proto == IPPROTO_TCP &&
6503 (r->src.xport.range.op || r->dst.xport.range.op ||
6504 r->flagset)) {
6505 r = TAILQ_NEXT(r, entries);
6506 } else if ((pd->proto == IPPROTO_ICMP ||
6507 pd->proto == IPPROTO_ICMPV6) &&
6508 (r->type || r->code)) {
6509 r = TAILQ_NEXT(r, entries);
6510 } else if (r->prob && r->prob <= (RandomULong() % (UINT_MAX - 1) + 1)) {
6511 r = TAILQ_NEXT(r, entries);
6512 } else if (r->match_tag && !pf_match_tag(r, pd->pf_mtag, &tag)) {
6513 r = TAILQ_NEXT(r, entries);
6514 } else {
6515 if (r->anchor == NULL) {
6516 match = 1;
6517 *rm = r;
6518 *am = a;
6519 *rsm = ruleset;
6520 if ((*rm)->quick) {
6521 break;
6522 }
6523 r = TAILQ_NEXT(r, entries);
6524 } else {
6525 pf_step_into_anchor(&asd, &ruleset,
6526 PF_RULESET_FILTER, &r, &a, &match);
6527 }
6528 }
6529 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
6530 PF_RULESET_FILTER, &r, &a, &match)) {
6531 break;
6532 }
6533 }
6534 r = *rm;
6535 a = *am;
6536 ruleset = *rsm;
6537
6538 REASON_SET(&reason, PFRES_MATCH);
6539
6540 if (r->log) {
6541 PFLOG_PACKET(kif, h, pbuf, af, direction, reason, r, a, ruleset,
6542 pd);
6543 }
6544
6545 if (r->action != PF_PASS) {
6546 return PF_DROP;
6547 }
6548
6549 if (pf_tag_packet(pbuf, pd->pf_mtag, tag, -1, NULL)) {
6550 REASON_SET(&reason, PFRES_MEMORY);
6551 return PF_DROP;
6552 }
6553
6554 return PF_PASS;
6555 }
6556
6557 static __attribute__((noinline)) void
pf_pptp_handler(struct pf_state * s,int direction,int off,struct pf_pdesc * pd,struct pfi_kif * kif)6558 pf_pptp_handler(struct pf_state *s, int direction, int off,
6559 struct pf_pdesc *pd, struct pfi_kif *kif)
6560 {
6561 #pragma unused(direction)
6562 struct tcphdr *th;
6563 struct pf_pptp_state *pptps;
6564 struct pf_pptp_ctrl_msg cm;
6565 size_t plen, tlen;
6566 struct pf_state *gs;
6567 u_int16_t ct;
6568 u_int16_t *pac_call_id;
6569 u_int16_t *pns_call_id;
6570 u_int16_t *spoof_call_id;
6571 u_int8_t *pac_state;
6572 u_int8_t *pns_state;
6573 enum { PF_PPTP_PASS, PF_PPTP_INSERT_GRE, PF_PPTP_REMOVE_GRE } op;
6574 pbuf_t *pbuf;
6575 struct pf_state_key *sk;
6576 struct pf_state_key *gsk;
6577 struct pf_app_state *gas;
6578
6579 sk = s->state_key;
6580 pptps = &sk->app_state->u.pptp;
6581 gs = pptps->grev1_state;
6582
6583 if (gs) {
6584 gs->expire = pf_time_second();
6585 }
6586
6587 pbuf = pd->mp;
6588 plen = min(sizeof(cm), pbuf->pb_packet_len - off);
6589 if (plen < PF_PPTP_CTRL_MSG_MINSIZE) {
6590 return;
6591 }
6592 tlen = plen - PF_PPTP_CTRL_MSG_MINSIZE;
6593 pbuf_copy_data(pbuf, off, plen, &cm);
6594
6595 if (ntohl(cm.hdr.magic) != PF_PPTP_MAGIC_NUMBER) {
6596 return;
6597 }
6598 if (ntohs(cm.hdr.type) != 1) {
6599 return;
6600 }
6601
6602 #define TYPE_LEN_CHECK(_type, _name) \
6603 case PF_PPTP_CTRL_TYPE_##_type: \
6604 if (tlen < sizeof(struct pf_pptp_ctrl_##_name)) \
6605 return; \
6606 break;
6607
6608 switch (cm.ctrl.type) {
6609 TYPE_LEN_CHECK(START_REQ, start_req);
6610 TYPE_LEN_CHECK(START_RPY, start_rpy);
6611 TYPE_LEN_CHECK(STOP_REQ, stop_req);
6612 TYPE_LEN_CHECK(STOP_RPY, stop_rpy);
6613 TYPE_LEN_CHECK(ECHO_REQ, echo_req);
6614 TYPE_LEN_CHECK(ECHO_RPY, echo_rpy);
6615 TYPE_LEN_CHECK(CALL_OUT_REQ, call_out_req);
6616 TYPE_LEN_CHECK(CALL_OUT_RPY, call_out_rpy);
6617 TYPE_LEN_CHECK(CALL_IN_1ST, call_in_1st);
6618 TYPE_LEN_CHECK(CALL_IN_2ND, call_in_2nd);
6619 TYPE_LEN_CHECK(CALL_IN_3RD, call_in_3rd);
6620 TYPE_LEN_CHECK(CALL_CLR, call_clr);
6621 TYPE_LEN_CHECK(CALL_DISC, call_disc);
6622 TYPE_LEN_CHECK(ERROR, error);
6623 TYPE_LEN_CHECK(SET_LINKINFO, set_linkinfo);
6624 default:
6625 return;
6626 }
6627 #undef TYPE_LEN_CHECK
6628
6629 if (!gs) {
6630 gs = pool_get(&pf_state_pl, PR_WAITOK);
6631 if (!gs) {
6632 return;
6633 }
6634
6635 memcpy(gs, s, sizeof(*gs));
6636
6637 memset(&gs->entry_id, 0, sizeof(gs->entry_id));
6638 memset(&gs->entry_list, 0, sizeof(gs->entry_list));
6639
6640 TAILQ_INIT(&gs->unlink_hooks);
6641 gs->rt_kif = NULL;
6642 gs->creation = 0;
6643 gs->pfsync_time = 0;
6644 gs->packets[0] = gs->packets[1] = 0;
6645 gs->bytes[0] = gs->bytes[1] = 0;
6646 gs->timeout = PFTM_UNLINKED;
6647 gs->id = gs->creatorid = 0;
6648 gs->src.state = gs->dst.state = PFGRE1S_NO_TRAFFIC;
6649 gs->src.scrub = gs->dst.scrub = 0;
6650
6651 gas = pool_get(&pf_app_state_pl, PR_NOWAIT);
6652 if (!gas) {
6653 pool_put(&pf_state_pl, gs);
6654 return;
6655 }
6656
6657 gsk = pf_alloc_state_key(gs, NULL);
6658 if (!gsk) {
6659 pool_put(&pf_app_state_pl, gas);
6660 pool_put(&pf_state_pl, gs);
6661 return;
6662 }
6663
6664 memcpy(&gsk->lan, &sk->lan, sizeof(gsk->lan));
6665 memcpy(&gsk->gwy, &sk->gwy, sizeof(gsk->gwy));
6666 memcpy(&gsk->ext_lan, &sk->ext_lan, sizeof(gsk->ext_lan));
6667 memcpy(&gsk->ext_gwy, &sk->ext_gwy, sizeof(gsk->ext_gwy));
6668 gsk->af_lan = sk->af_lan;
6669 gsk->af_gwy = sk->af_gwy;
6670 gsk->proto = IPPROTO_GRE;
6671 gsk->proto_variant = PF_GRE_PPTP_VARIANT;
6672 gsk->app_state = gas;
6673 gsk->lan.xport.call_id = 0;
6674 gsk->gwy.xport.call_id = 0;
6675 gsk->ext_lan.xport.call_id = 0;
6676 gsk->ext_gwy.xport.call_id = 0;
6677 ASSERT(gsk->flowsrc == FLOWSRC_PF);
6678 ASSERT(gsk->flowhash != 0);
6679 memset(gas, 0, sizeof(*gas));
6680 gas->u.grev1.pptp_state = s;
6681 STATE_INC_COUNTERS(gs);
6682 pptps->grev1_state = gs;
6683 (void) hook_establish(&gs->unlink_hooks, 0,
6684 (hook_fn_t) pf_grev1_unlink, gs);
6685 } else {
6686 gsk = gs->state_key;
6687 }
6688
6689 switch (sk->direction) {
6690 case PF_IN:
6691 pns_call_id = &gsk->ext_lan.xport.call_id;
6692 pns_state = &gs->dst.state;
6693 pac_call_id = &gsk->lan.xport.call_id;
6694 pac_state = &gs->src.state;
6695 break;
6696
6697 case PF_OUT:
6698 pns_call_id = &gsk->lan.xport.call_id;
6699 pns_state = &gs->src.state;
6700 pac_call_id = &gsk->ext_lan.xport.call_id;
6701 pac_state = &gs->dst.state;
6702 break;
6703
6704 default:
6705 DPFPRINTF(PF_DEBUG_URGENT,
6706 ("pf_pptp_handler: bad directional!\n"));
6707 return;
6708 }
6709
6710 spoof_call_id = 0;
6711 op = PF_PPTP_PASS;
6712
6713 ct = ntohs(cm.ctrl.type);
6714
6715 switch (ct) {
6716 case PF_PPTP_CTRL_TYPE_CALL_OUT_REQ:
6717 *pns_call_id = cm.msg.call_out_req.call_id;
6718 *pns_state = PFGRE1S_INITIATING;
6719 if (s->nat_rule.ptr && pns_call_id == &gsk->lan.xport.call_id) {
6720 spoof_call_id = &cm.msg.call_out_req.call_id;
6721 }
6722 break;
6723
6724 case PF_PPTP_CTRL_TYPE_CALL_OUT_RPY:
6725 *pac_call_id = cm.msg.call_out_rpy.call_id;
6726 if (s->nat_rule.ptr) {
6727 spoof_call_id =
6728 (pac_call_id == &gsk->lan.xport.call_id) ?
6729 &cm.msg.call_out_rpy.call_id :
6730 &cm.msg.call_out_rpy.peer_call_id;
6731 }
6732 if (gs->timeout == PFTM_UNLINKED) {
6733 *pac_state = PFGRE1S_INITIATING;
6734 op = PF_PPTP_INSERT_GRE;
6735 }
6736 break;
6737
6738 case PF_PPTP_CTRL_TYPE_CALL_IN_1ST:
6739 *pns_call_id = cm.msg.call_in_1st.call_id;
6740 *pns_state = PFGRE1S_INITIATING;
6741 if (s->nat_rule.ptr && pns_call_id == &gsk->lan.xport.call_id) {
6742 spoof_call_id = &cm.msg.call_in_1st.call_id;
6743 }
6744 break;
6745
6746 case PF_PPTP_CTRL_TYPE_CALL_IN_2ND:
6747 *pac_call_id = cm.msg.call_in_2nd.call_id;
6748 *pac_state = PFGRE1S_INITIATING;
6749 if (s->nat_rule.ptr) {
6750 spoof_call_id =
6751 (pac_call_id == &gsk->lan.xport.call_id) ?
6752 &cm.msg.call_in_2nd.call_id :
6753 &cm.msg.call_in_2nd.peer_call_id;
6754 }
6755 break;
6756
6757 case PF_PPTP_CTRL_TYPE_CALL_IN_3RD:
6758 if (s->nat_rule.ptr && pns_call_id == &gsk->lan.xport.call_id) {
6759 spoof_call_id = &cm.msg.call_in_3rd.call_id;
6760 }
6761 if (cm.msg.call_in_3rd.call_id != *pns_call_id) {
6762 break;
6763 }
6764 if (gs->timeout == PFTM_UNLINKED) {
6765 op = PF_PPTP_INSERT_GRE;
6766 }
6767 break;
6768
6769 case PF_PPTP_CTRL_TYPE_CALL_CLR:
6770 if (cm.msg.call_clr.call_id != *pns_call_id) {
6771 op = PF_PPTP_REMOVE_GRE;
6772 }
6773 break;
6774
6775 case PF_PPTP_CTRL_TYPE_CALL_DISC:
6776 if (cm.msg.call_clr.call_id != *pac_call_id) {
6777 op = PF_PPTP_REMOVE_GRE;
6778 }
6779 break;
6780
6781 case PF_PPTP_CTRL_TYPE_ERROR:
6782 if (s->nat_rule.ptr && pns_call_id == &gsk->lan.xport.call_id) {
6783 spoof_call_id = &cm.msg.error.peer_call_id;
6784 }
6785 break;
6786
6787 case PF_PPTP_CTRL_TYPE_SET_LINKINFO:
6788 if (s->nat_rule.ptr && pac_call_id == &gsk->lan.xport.call_id) {
6789 spoof_call_id = &cm.msg.set_linkinfo.peer_call_id;
6790 }
6791 break;
6792
6793 default:
6794 op = PF_PPTP_PASS;
6795 break;
6796 }
6797
6798 if (!gsk->gwy.xport.call_id && gsk->lan.xport.call_id) {
6799 gsk->gwy.xport.call_id = gsk->lan.xport.call_id;
6800 if (spoof_call_id) {
6801 u_int16_t call_id = 0;
6802 int n = 0;
6803 struct pf_state_key_cmp key;
6804
6805 key.af_gwy = gsk->af_gwy;
6806 key.proto = IPPROTO_GRE;
6807 key.proto_variant = PF_GRE_PPTP_VARIANT;
6808 PF_ACPY(&key.gwy.addr, &gsk->gwy.addr, key.af_gwy);
6809 PF_ACPY(&key.ext_gwy.addr, &gsk->ext_gwy.addr, key.af_gwy);
6810 key.gwy.xport.call_id = gsk->gwy.xport.call_id;
6811 key.ext_gwy.xport.call_id = gsk->ext_gwy.xport.call_id;
6812 do {
6813 call_id = htonl(random());
6814 } while (!call_id);
6815
6816 while (pf_find_state_all(&key, PF_IN, 0)) {
6817 call_id = ntohs(call_id);
6818 --call_id;
6819 if (--call_id == 0) {
6820 call_id = 0xffff;
6821 }
6822 call_id = htons(call_id);
6823
6824 key.gwy.xport.call_id = call_id;
6825
6826 if (++n > 65535) {
6827 DPFPRINTF(PF_DEBUG_URGENT,
6828 ("pf_pptp_handler: failed to spoof "
6829 "call id\n"));
6830 key.gwy.xport.call_id = 0;
6831 break;
6832 }
6833 }
6834
6835 gsk->gwy.xport.call_id = call_id;
6836 }
6837 }
6838
6839 th = pd->hdr.tcp;
6840
6841 if (spoof_call_id && gsk->lan.xport.call_id != gsk->gwy.xport.call_id) {
6842 if (*spoof_call_id == gsk->gwy.xport.call_id) {
6843 *spoof_call_id = gsk->lan.xport.call_id;
6844 th->th_sum = pf_cksum_fixup(th->th_sum,
6845 gsk->gwy.xport.call_id, gsk->lan.xport.call_id, 0);
6846 } else {
6847 *spoof_call_id = gsk->gwy.xport.call_id;
6848 th->th_sum = pf_cksum_fixup(th->th_sum,
6849 gsk->lan.xport.call_id, gsk->gwy.xport.call_id, 0);
6850 }
6851
6852 if (pf_lazy_makewritable(pd, pbuf, off + plen) == NULL) {
6853 pptps->grev1_state = NULL;
6854 STATE_DEC_COUNTERS(gs);
6855 pool_put(&pf_state_pl, gs);
6856 return;
6857 }
6858 pbuf_copy_back(pbuf, off, plen, &cm);
6859 }
6860
6861 switch (op) {
6862 case PF_PPTP_REMOVE_GRE:
6863 gs->timeout = PFTM_PURGE;
6864 gs->src.state = gs->dst.state = PFGRE1S_NO_TRAFFIC;
6865 gsk->lan.xport.call_id = 0;
6866 gsk->gwy.xport.call_id = 0;
6867 gsk->ext_lan.xport.call_id = 0;
6868 gsk->ext_gwy.xport.call_id = 0;
6869 gs->id = gs->creatorid = 0;
6870 break;
6871
6872 case PF_PPTP_INSERT_GRE:
6873 gs->creation = pf_time_second();
6874 gs->expire = pf_time_second();
6875 gs->timeout = PFTM_TCP_ESTABLISHED;
6876 if (gs->src_node != NULL) {
6877 ++gs->src_node->states;
6878 VERIFY(gs->src_node->states != 0);
6879 }
6880 if (gs->nat_src_node != NULL) {
6881 ++gs->nat_src_node->states;
6882 VERIFY(gs->nat_src_node->states != 0);
6883 }
6884 pf_set_rt_ifp(gs, &sk->lan.addr, sk->af_lan);
6885 if (pf_insert_state(BOUND_IFACE(s->rule.ptr, kif), gs)) {
6886 /*
6887 * <[email protected]>
6888 * FIX ME: insertion can fail when multiple PNS
6889 * behind the same NAT open calls to the same PAC
6890 * simultaneously because spoofed call ID numbers
6891 * are chosen before states are inserted. This is
6892 * hard to fix and happens infrequently enough that
6893 * users will normally try again and this ALG will
6894 * succeed. Failures are expected to be rare enough
6895 * that fixing this is a low priority.
6896 */
6897 pptps->grev1_state = NULL;
6898 pd->lmw = -1; /* Force PF_DROP on PFRES_MEMORY */
6899 pf_src_tree_remove_state(gs);
6900 STATE_DEC_COUNTERS(gs);
6901 pool_put(&pf_state_pl, gs);
6902 DPFPRINTF(PF_DEBUG_URGENT, ("pf_pptp_handler: error "
6903 "inserting GREv1 state.\n"));
6904 }
6905 break;
6906
6907 default:
6908 break;
6909 }
6910 }
6911
6912 static __attribute__((noinline)) void
pf_pptp_unlink(struct pf_state * s)6913 pf_pptp_unlink(struct pf_state *s)
6914 {
6915 struct pf_app_state *as = s->state_key->app_state;
6916 struct pf_state *grev1s = as->u.pptp.grev1_state;
6917
6918 if (grev1s) {
6919 struct pf_app_state *gas = grev1s->state_key->app_state;
6920
6921 if (grev1s->timeout < PFTM_MAX) {
6922 grev1s->timeout = PFTM_PURGE;
6923 }
6924 gas->u.grev1.pptp_state = NULL;
6925 as->u.pptp.grev1_state = NULL;
6926 }
6927 }
6928
6929 static __attribute__((noinline)) void
pf_grev1_unlink(struct pf_state * s)6930 pf_grev1_unlink(struct pf_state *s)
6931 {
6932 struct pf_app_state *as = s->state_key->app_state;
6933 struct pf_state *pptps = as->u.grev1.pptp_state;
6934
6935 if (pptps) {
6936 struct pf_app_state *pas = pptps->state_key->app_state;
6937
6938 pas->u.pptp.grev1_state = NULL;
6939 as->u.grev1.pptp_state = NULL;
6940 }
6941 }
6942
6943 static int
pf_ike_compare(struct pf_app_state * a,struct pf_app_state * b)6944 pf_ike_compare(struct pf_app_state *a, struct pf_app_state *b)
6945 {
6946 int64_t d = a->u.ike.cookie - b->u.ike.cookie;
6947 return (d > 0) ? 1 : ((d < 0) ? -1 : 0);
6948 }
6949
6950 static int
pf_do_nat64(struct pf_state_key * sk,struct pf_pdesc * pd,pbuf_t * pbuf,int off)6951 pf_do_nat64(struct pf_state_key *sk, struct pf_pdesc *pd, pbuf_t *pbuf,
6952 int off)
6953 {
6954 if (pd->af == AF_INET) {
6955 if (pd->af != sk->af_lan) {
6956 pd->ndaddr = sk->lan.addr;
6957 pd->naddr = sk->ext_lan.addr;
6958 } else {
6959 pd->naddr = sk->gwy.addr;
6960 pd->ndaddr = sk->ext_gwy.addr;
6961 }
6962 return pf_nat64_ipv4(pbuf, off, pd);
6963 } else if (pd->af == AF_INET6) {
6964 if (pd->af != sk->af_lan) {
6965 pd->ndaddr = sk->lan.addr;
6966 pd->naddr = sk->ext_lan.addr;
6967 } else {
6968 pd->naddr = sk->gwy.addr;
6969 pd->ndaddr = sk->ext_gwy.addr;
6970 }
6971 return pf_nat64_ipv6(pbuf, off, pd);
6972 }
6973 return PF_DROP;
6974 }
6975
6976 static __attribute__((noinline)) int
pf_test_state_tcp(struct pf_state ** state,int direction,struct pfi_kif * kif,pbuf_t * pbuf,int off,void * h,struct pf_pdesc * pd,u_short * reason)6977 pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
6978 pbuf_t *pbuf, int off, void *h, struct pf_pdesc *pd,
6979 u_short *reason)
6980 {
6981 #pragma unused(h)
6982 struct pf_state_key_cmp key;
6983 struct tcphdr *th = pd->hdr.tcp;
6984 u_int16_t win = ntohs(th->th_win);
6985 u_int32_t ack, end, seq, orig_seq;
6986 u_int8_t sws, dws;
6987 int ackskew;
6988 int copyback = 0;
6989 struct pf_state_peer *src, *dst;
6990 struct pf_state_key *sk;
6991
6992 key.app_state = 0;
6993 key.proto = IPPROTO_TCP;
6994 key.af_lan = key.af_gwy = pd->af;
6995
6996 /*
6997 * For NAT64 the first time rule search and state creation
6998 * is done on the incoming side only.
6999 * Once the state gets created, NAT64's LAN side (ipv6) will
7000 * not be able to find the state in ext-gwy tree as that normally
7001 * is intended to be looked up for incoming traffic from the
7002 * WAN side.
7003 * Therefore to handle NAT64 case we init keys here for both
7004 * lan-ext as well as ext-gwy trees.
7005 * In the state lookup we attempt a lookup on both trees if
7006 * first one does not return any result and return a match if
7007 * the match state's was created by NAT64 rule.
7008 */
7009 PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy);
7010 PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy);
7011 key.ext_gwy.xport.port = th->th_sport;
7012 key.gwy.xport.port = th->th_dport;
7013
7014 PF_ACPY(&key.lan.addr, pd->src, key.af_lan);
7015 PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan);
7016 key.lan.xport.port = th->th_sport;
7017 key.ext_lan.xport.port = th->th_dport;
7018
7019 STATE_LOOKUP();
7020
7021 sk = (*state)->state_key;
7022 /*
7023 * In case of NAT64 the translation is first applied on the LAN
7024 * side. Therefore for stack's address family comparison
7025 * we use sk->af_lan.
7026 */
7027 if ((direction == sk->direction) && (pd->af == sk->af_lan)) {
7028 src = &(*state)->src;
7029 dst = &(*state)->dst;
7030 } else {
7031 src = &(*state)->dst;
7032 dst = &(*state)->src;
7033 }
7034
7035 if (src->state == PF_TCPS_PROXY_SRC) {
7036 if (direction != sk->direction) {
7037 REASON_SET(reason, PFRES_SYNPROXY);
7038 return PF_SYNPROXY_DROP;
7039 }
7040 if (th->th_flags & TH_SYN) {
7041 if (ntohl(th->th_seq) != src->seqlo) {
7042 REASON_SET(reason, PFRES_SYNPROXY);
7043 return PF_DROP;
7044 }
7045 pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
7046 pd->src, th->th_dport, th->th_sport,
7047 src->seqhi, ntohl(th->th_seq) + 1,
7048 TH_SYN | TH_ACK, 0, src->mss, 0, 1,
7049 0, NULL, NULL);
7050 REASON_SET(reason, PFRES_SYNPROXY);
7051 return PF_SYNPROXY_DROP;
7052 } else if (!(th->th_flags & TH_ACK) ||
7053 (ntohl(th->th_ack) != src->seqhi + 1) ||
7054 (ntohl(th->th_seq) != src->seqlo + 1)) {
7055 REASON_SET(reason, PFRES_SYNPROXY);
7056 return PF_DROP;
7057 } else if ((*state)->src_node != NULL &&
7058 pf_src_connlimit(state)) {
7059 REASON_SET(reason, PFRES_SRCLIMIT);
7060 return PF_DROP;
7061 } else {
7062 src->state = PF_TCPS_PROXY_DST;
7063 }
7064 }
7065 if (src->state == PF_TCPS_PROXY_DST) {
7066 struct pf_state_host *psrc, *pdst;
7067
7068 if (direction == PF_OUT) {
7069 psrc = &sk->gwy;
7070 pdst = &sk->ext_gwy;
7071 } else {
7072 psrc = &sk->ext_lan;
7073 pdst = &sk->lan;
7074 }
7075 if (direction == sk->direction) {
7076 if (((th->th_flags & (TH_SYN | TH_ACK)) != TH_ACK) ||
7077 (ntohl(th->th_ack) != src->seqhi + 1) ||
7078 (ntohl(th->th_seq) != src->seqlo + 1)) {
7079 REASON_SET(reason, PFRES_SYNPROXY);
7080 return PF_DROP;
7081 }
7082 src->max_win = MAX(ntohs(th->th_win), 1);
7083 if (dst->seqhi == 1) {
7084 dst->seqhi = htonl(random());
7085 }
7086 pf_send_tcp((*state)->rule.ptr, pd->af, &psrc->addr,
7087 &pdst->addr, psrc->xport.port, pdst->xport.port,
7088 dst->seqhi, 0, TH_SYN, 0,
7089 src->mss, 0, 0, (*state)->tag, NULL, NULL);
7090 REASON_SET(reason, PFRES_SYNPROXY);
7091 return PF_SYNPROXY_DROP;
7092 } else if (((th->th_flags & (TH_SYN | TH_ACK)) !=
7093 (TH_SYN | TH_ACK)) ||
7094 (ntohl(th->th_ack) != dst->seqhi + 1)) {
7095 REASON_SET(reason, PFRES_SYNPROXY);
7096 return PF_DROP;
7097 } else {
7098 dst->max_win = MAX(ntohs(th->th_win), 1);
7099 dst->seqlo = ntohl(th->th_seq);
7100 pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
7101 pd->src, th->th_dport, th->th_sport,
7102 ntohl(th->th_ack), ntohl(th->th_seq) + 1,
7103 TH_ACK, src->max_win, 0, 0, 0,
7104 (*state)->tag, NULL, NULL);
7105 pf_send_tcp((*state)->rule.ptr, pd->af, &psrc->addr,
7106 &pdst->addr, psrc->xport.port, pdst->xport.port,
7107 src->seqhi + 1, src->seqlo + 1,
7108 TH_ACK, dst->max_win, 0, 0, 1,
7109 0, NULL, NULL);
7110 src->seqdiff = dst->seqhi -
7111 src->seqlo;
7112 dst->seqdiff = src->seqhi -
7113 dst->seqlo;
7114 src->seqhi = src->seqlo +
7115 dst->max_win;
7116 dst->seqhi = dst->seqlo +
7117 src->max_win;
7118 src->wscale = dst->wscale = 0;
7119 src->state = dst->state =
7120 TCPS_ESTABLISHED;
7121 REASON_SET(reason, PFRES_SYNPROXY);
7122 return PF_SYNPROXY_DROP;
7123 }
7124 }
7125
7126 if (((th->th_flags & (TH_SYN | TH_ACK)) == TH_SYN) &&
7127 dst->state >= TCPS_FIN_WAIT_2 &&
7128 src->state >= TCPS_FIN_WAIT_2) {
7129 if (pf_status.debug >= PF_DEBUG_MISC) {
7130 printf("pf: state reuse ");
7131 pf_print_state(*state);
7132 pf_print_flags(th->th_flags);
7133 printf("\n");
7134 }
7135 /* XXX make sure it's the same direction ?? */
7136 src->state = dst->state = TCPS_CLOSED;
7137 pf_unlink_state(*state);
7138 *state = NULL;
7139 return PF_DROP;
7140 }
7141
7142 if ((th->th_flags & TH_SYN) == 0) {
7143 sws = (src->wscale & PF_WSCALE_FLAG) ?
7144 (src->wscale & PF_WSCALE_MASK) : TCP_MAX_WINSHIFT;
7145 dws = (dst->wscale & PF_WSCALE_FLAG) ?
7146 (dst->wscale & PF_WSCALE_MASK) : TCP_MAX_WINSHIFT;
7147 } else {
7148 sws = dws = 0;
7149 }
7150
7151 /*
7152 * Sequence tracking algorithm from Guido van Rooij's paper:
7153 * http://www.madison-gurkha.com/publications/tcp_filtering/
7154 * tcp_filtering.ps
7155 */
7156
7157 orig_seq = seq = ntohl(th->th_seq);
7158 if (src->seqlo == 0) {
7159 /* First packet from this end. Set its state */
7160
7161 if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) &&
7162 src->scrub == NULL) {
7163 if (pf_normalize_tcp_init(pbuf, off, pd, th, src, dst)) {
7164 REASON_SET(reason, PFRES_MEMORY);
7165 return PF_DROP;
7166 }
7167 }
7168
7169 /* Deferred generation of sequence number modulator */
7170 if (dst->seqdiff && !src->seqdiff) {
7171 /* use random iss for the TCP server */
7172 while ((src->seqdiff = random() - seq) == 0) {
7173 ;
7174 }
7175 ack = ntohl(th->th_ack) - dst->seqdiff;
7176 pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
7177 src->seqdiff), 0);
7178 pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
7179 copyback = off + sizeof(*th);
7180 } else {
7181 ack = ntohl(th->th_ack);
7182 }
7183
7184 end = seq + pd->p_len;
7185 if (th->th_flags & TH_SYN) {
7186 end++;
7187 if (dst->wscale & PF_WSCALE_FLAG) {
7188 src->wscale = pf_get_wscale(pbuf, off,
7189 th->th_off, pd->af);
7190 if (src->wscale & PF_WSCALE_FLAG) {
7191 /*
7192 * Remove scale factor from initial
7193 * window
7194 */
7195 sws = src->wscale & PF_WSCALE_MASK;
7196 win = ((u_int32_t)win + (1 << sws) - 1)
7197 >> sws;
7198 dws = dst->wscale & PF_WSCALE_MASK;
7199 } else {
7200 /*
7201 * Window scale negotiation has failed,
7202 * therefore we must restore the window
7203 * scale in the state record that we
7204 * optimistically removed in
7205 * pf_test_rule(). Care is required to
7206 * prevent arithmetic overflow from
7207 * zeroing the window when it's
7208 * truncated down to 16-bits.
7209 */
7210 u_int32_t max_win = dst->max_win;
7211 max_win <<=
7212 dst->wscale & PF_WSCALE_MASK;
7213 dst->max_win = MIN(0xffff, max_win);
7214 /* in case of a retrans SYN|ACK */
7215 dst->wscale = 0;
7216 }
7217 }
7218 }
7219 if (th->th_flags & TH_FIN) {
7220 end++;
7221 }
7222
7223 src->seqlo = seq;
7224 if (src->state < TCPS_SYN_SENT) {
7225 src->state = TCPS_SYN_SENT;
7226 }
7227
7228 /*
7229 * May need to slide the window (seqhi may have been set by
7230 * the crappy stack check or if we picked up the connection
7231 * after establishment)
7232 */
7233 if (src->seqhi == 1 ||
7234 SEQ_GEQ(end + MAX(1, (u_int32_t)dst->max_win << dws),
7235 src->seqhi)) {
7236 src->seqhi = end + MAX(1, (u_int32_t)dst->max_win << dws);
7237 }
7238 if (win > src->max_win) {
7239 src->max_win = win;
7240 }
7241 } else {
7242 ack = ntohl(th->th_ack) - dst->seqdiff;
7243 if (src->seqdiff) {
7244 /* Modulate sequence numbers */
7245 pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
7246 src->seqdiff), 0);
7247 pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
7248 copyback = off + sizeof(*th);
7249 }
7250 end = seq + pd->p_len;
7251 if (th->th_flags & TH_SYN) {
7252 end++;
7253 }
7254 if (th->th_flags & TH_FIN) {
7255 end++;
7256 }
7257 }
7258
7259 if ((th->th_flags & TH_ACK) == 0) {
7260 /* Let it pass through the ack skew check */
7261 ack = dst->seqlo;
7262 } else if ((ack == 0 &&
7263 (th->th_flags & (TH_ACK | TH_RST)) == (TH_ACK | TH_RST)) ||
7264 /* broken tcp stacks do not set ack */
7265 (dst->state < TCPS_SYN_SENT)) {
7266 /*
7267 * Many stacks (ours included) will set the ACK number in an
7268 * FIN|ACK if the SYN times out -- no sequence to ACK.
7269 */
7270 ack = dst->seqlo;
7271 }
7272
7273 if (seq == end) {
7274 /* Ease sequencing restrictions on no data packets */
7275 seq = src->seqlo;
7276 end = seq;
7277 }
7278
7279 ackskew = dst->seqlo - ack;
7280
7281
7282 /*
7283 * Need to demodulate the sequence numbers in any TCP SACK options
7284 * (Selective ACK). We could optionally validate the SACK values
7285 * against the current ACK window, either forwards or backwards, but
7286 * I'm not confident that SACK has been implemented properly
7287 * everywhere. It wouldn't surprise me if several stacks accidently
7288 * SACK too far backwards of previously ACKed data. There really aren't
7289 * any security implications of bad SACKing unless the target stack
7290 * doesn't validate the option length correctly. Someone trying to
7291 * spoof into a TCP connection won't bother blindly sending SACK
7292 * options anyway.
7293 */
7294 if (dst->seqdiff && (th->th_off << 2) > (int)sizeof(struct tcphdr)) {
7295 copyback = pf_modulate_sack(pbuf, off, pd, th, dst);
7296 if (copyback == -1) {
7297 REASON_SET(reason, PFRES_MEMORY);
7298 return PF_DROP;
7299 }
7300
7301 pbuf = pd->mp; // XXXSCW: Why?
7302 }
7303
7304
7305 #define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */
7306 if (SEQ_GEQ(src->seqhi, end) &&
7307 /* Last octet inside other's window space */
7308 SEQ_GEQ(seq, src->seqlo - ((u_int32_t)dst->max_win << dws)) &&
7309 /* Retrans: not more than one window back */
7310 (ackskew >= -MAXACKWINDOW) &&
7311 /* Acking not more than one reassembled fragment backwards */
7312 (ackskew <= (MAXACKWINDOW << sws)) &&
7313 /* Acking not more than one window forward */
7314 ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo ||
7315 (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo) ||
7316 (pd->flags & PFDESC_IP_REAS) == 0)) {
7317 /* Require an exact/+1 sequence match on resets when possible */
7318
7319 if (dst->scrub || src->scrub) {
7320 if (pf_normalize_tcp_stateful(pbuf, off, pd, reason, th,
7321 *state, src, dst, ©back)) {
7322 return PF_DROP;
7323 }
7324
7325 pbuf = pd->mp; // XXXSCW: Why?
7326 }
7327
7328 /* update max window */
7329 if (src->max_win < win) {
7330 src->max_win = win;
7331 }
7332 /* synchronize sequencing */
7333 if (SEQ_GT(end, src->seqlo)) {
7334 src->seqlo = end;
7335 }
7336 /* slide the window of what the other end can send */
7337 if (SEQ_GEQ(ack + ((u_int32_t)win << sws), dst->seqhi)) {
7338 dst->seqhi = ack + MAX(((u_int32_t)win << sws), 1);
7339 }
7340
7341 /* update states */
7342 if (th->th_flags & TH_SYN) {
7343 if (src->state < TCPS_SYN_SENT) {
7344 src->state = TCPS_SYN_SENT;
7345 }
7346 }
7347 if (th->th_flags & TH_FIN) {
7348 if (src->state < TCPS_CLOSING) {
7349 src->state = TCPS_CLOSING;
7350 }
7351 }
7352 if (th->th_flags & TH_ACK) {
7353 if (dst->state == TCPS_SYN_SENT) {
7354 dst->state = TCPS_ESTABLISHED;
7355 if (src->state == TCPS_ESTABLISHED &&
7356 (*state)->src_node != NULL &&
7357 pf_src_connlimit(state)) {
7358 REASON_SET(reason, PFRES_SRCLIMIT);
7359 return PF_DROP;
7360 }
7361 } else if (dst->state == TCPS_CLOSING) {
7362 dst->state = TCPS_FIN_WAIT_2;
7363 }
7364 }
7365 if (th->th_flags & TH_RST) {
7366 src->state = dst->state = TCPS_TIME_WAIT;
7367 }
7368
7369 /* update expire time */
7370 (*state)->expire = pf_time_second();
7371 if (src->state >= TCPS_FIN_WAIT_2 &&
7372 dst->state >= TCPS_FIN_WAIT_2) {
7373 (*state)->timeout = PFTM_TCP_CLOSED;
7374 } else if (src->state >= TCPS_CLOSING &&
7375 dst->state >= TCPS_CLOSING) {
7376 (*state)->timeout = PFTM_TCP_FIN_WAIT;
7377 } else if (src->state < TCPS_ESTABLISHED ||
7378 dst->state < TCPS_ESTABLISHED) {
7379 (*state)->timeout = PFTM_TCP_OPENING;
7380 } else if (src->state >= TCPS_CLOSING ||
7381 dst->state >= TCPS_CLOSING) {
7382 (*state)->timeout = PFTM_TCP_CLOSING;
7383 } else {
7384 (*state)->timeout = PFTM_TCP_ESTABLISHED;
7385 }
7386
7387 /* Fall through to PASS packet */
7388 } else if ((dst->state < TCPS_SYN_SENT ||
7389 dst->state >= TCPS_FIN_WAIT_2 || src->state >= TCPS_FIN_WAIT_2) &&
7390 SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) &&
7391 /* Within a window forward of the originating packet */
7392 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
7393 /* Within a window backward of the originating packet */
7394
7395 /*
7396 * This currently handles three situations:
7397 * 1) Stupid stacks will shotgun SYNs before their peer
7398 * replies.
7399 * 2) When PF catches an already established stream (the
7400 * firewall rebooted, the state table was flushed, routes
7401 * changed...)
7402 * 3) Packets get funky immediately after the connection
7403 * closes (this should catch Solaris spurious ACK|FINs
7404 * that web servers like to spew after a close)
7405 *
7406 * This must be a little more careful than the above code
7407 * since packet floods will also be caught here. We don't
7408 * update the TTL here to mitigate the damage of a packet
7409 * flood and so the same code can handle awkward establishment
7410 * and a loosened connection close.
7411 * In the establishment case, a correct peer response will
7412 * validate the connection, go through the normal state code
7413 * and keep updating the state TTL.
7414 */
7415
7416 if (pf_status.debug >= PF_DEBUG_MISC) {
7417 printf("pf: loose state match: ");
7418 pf_print_state(*state);
7419 pf_print_flags(th->th_flags);
7420 printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
7421 "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack,
7422 pd->p_len, ackskew, (*state)->packets[0],
7423 (*state)->packets[1],
7424 direction == PF_IN ? "in" : "out",
7425 direction == sk->direction ?
7426 "fwd" : "rev");
7427 }
7428
7429 if (dst->scrub || src->scrub) {
7430 if (pf_normalize_tcp_stateful(pbuf, off, pd, reason, th,
7431 *state, src, dst, ©back)) {
7432 return PF_DROP;
7433 }
7434 pbuf = pd->mp; // XXXSCW: Why?
7435 }
7436
7437 /* update max window */
7438 if (src->max_win < win) {
7439 src->max_win = win;
7440 }
7441 /* synchronize sequencing */
7442 if (SEQ_GT(end, src->seqlo)) {
7443 src->seqlo = end;
7444 }
7445 /* slide the window of what the other end can send */
7446 if (SEQ_GEQ(ack + ((u_int32_t)win << sws), dst->seqhi)) {
7447 dst->seqhi = ack + MAX(((u_int32_t)win << sws), 1);
7448 }
7449
7450 /*
7451 * Cannot set dst->seqhi here since this could be a shotgunned
7452 * SYN and not an already established connection.
7453 */
7454
7455 if (th->th_flags & TH_FIN) {
7456 if (src->state < TCPS_CLOSING) {
7457 src->state = TCPS_CLOSING;
7458 }
7459 }
7460 if (th->th_flags & TH_RST) {
7461 src->state = dst->state = TCPS_TIME_WAIT;
7462 }
7463
7464 /* Fall through to PASS packet */
7465 } else {
7466 if (dst->state == TCPS_SYN_SENT &&
7467 src->state == TCPS_SYN_SENT) {
7468 /* Send RST for state mismatches during handshake */
7469 if (!(th->th_flags & TH_RST)) {
7470 pf_send_tcp((*state)->rule.ptr, pd->af,
7471 pd->dst, pd->src, th->th_dport,
7472 th->th_sport, ntohl(th->th_ack), 0,
7473 TH_RST, 0, 0,
7474 (*state)->rule.ptr->return_ttl, 1, 0,
7475 pd->eh, kif->pfik_ifp);
7476 }
7477 src->seqlo = 0;
7478 src->seqhi = 1;
7479 src->max_win = 1;
7480 } else if (pf_status.debug >= PF_DEBUG_MISC) {
7481 printf("pf: BAD state: ");
7482 pf_print_state(*state);
7483 pf_print_flags(th->th_flags);
7484 printf("\n seq=%u (%u) ack=%u len=%u ackskew=%d "
7485 "sws=%u dws=%u pkts=%llu:%llu dir=%s,%s\n",
7486 seq, orig_seq, ack, pd->p_len, ackskew,
7487 (unsigned int)sws, (unsigned int)dws,
7488 (*state)->packets[0], (*state)->packets[1],
7489 direction == PF_IN ? "in" : "out",
7490 direction == sk->direction ?
7491 "fwd" : "rev");
7492 printf("pf: State failure on: %c %c %c %c | %c %c\n",
7493 SEQ_GEQ(src->seqhi, end) ? ' ' : '1',
7494 SEQ_GEQ(seq,
7495 src->seqlo - ((u_int32_t)dst->max_win << dws)) ?
7496 ' ': '2',
7497 (ackskew >= -MAXACKWINDOW) ? ' ' : '3',
7498 (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
7499 SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5',
7500 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6');
7501 }
7502 REASON_SET(reason, PFRES_BADSTATE);
7503 return PF_DROP;
7504 }
7505
7506 /* Any packets which have gotten here are to be passed */
7507
7508 if (sk->app_state &&
7509 sk->app_state->handler) {
7510 sk->app_state->handler(*state, direction,
7511 off + (th->th_off << 2), pd, kif);
7512 if (pd->lmw < 0) {
7513 REASON_SET(reason, PFRES_MEMORY);
7514 return PF_DROP;
7515 }
7516 pbuf = pd->mp; // XXXSCW: Why?
7517 }
7518
7519 /* translate source/destination address, if necessary */
7520 if (STATE_TRANSLATE(sk)) {
7521 pd->naf = (pd->af == sk->af_lan) ? sk->af_gwy : sk->af_lan;
7522
7523 if (direction == PF_OUT) {
7524 pf_change_ap(direction, pd->mp, pd->src, &th->th_sport,
7525 pd->ip_sum, &th->th_sum, &sk->gwy.addr,
7526 sk->gwy.xport.port, 0, pd->af, pd->naf, 1);
7527 } else {
7528 if (pd->af != pd->naf) {
7529 if (pd->af == sk->af_gwy) {
7530 pf_change_ap(direction, pd->mp, pd->dst,
7531 &th->th_dport, pd->ip_sum,
7532 &th->th_sum, &sk->lan.addr,
7533 sk->lan.xport.port, 0,
7534 pd->af, pd->naf, 0);
7535
7536 pf_change_ap(direction, pd->mp, pd->src,
7537 &th->th_sport, pd->ip_sum,
7538 &th->th_sum, &sk->ext_lan.addr,
7539 th->th_sport, 0, pd->af,
7540 pd->naf, 0);
7541 } else {
7542 pf_change_ap(direction, pd->mp, pd->dst,
7543 &th->th_dport, pd->ip_sum,
7544 &th->th_sum, &sk->ext_gwy.addr,
7545 th->th_dport, 0, pd->af,
7546 pd->naf, 0);
7547
7548 pf_change_ap(direction, pd->mp, pd->src,
7549 &th->th_sport, pd->ip_sum,
7550 &th->th_sum, &sk->gwy.addr,
7551 sk->gwy.xport.port, 0, pd->af,
7552 pd->naf, 0);
7553 }
7554 } else {
7555 pf_change_ap(direction, pd->mp, pd->dst,
7556 &th->th_dport, pd->ip_sum,
7557 &th->th_sum, &sk->lan.addr,
7558 sk->lan.xport.port, 0, pd->af,
7559 pd->naf, 1);
7560 }
7561 }
7562
7563 copyback = off + sizeof(*th);
7564 }
7565
7566 if (copyback) {
7567 if (pf_lazy_makewritable(pd, pbuf, copyback) == NULL) {
7568 REASON_SET(reason, PFRES_MEMORY);
7569 return PF_DROP;
7570 }
7571
7572 /* Copyback sequence modulation or stateful scrub changes */
7573 pbuf_copy_back(pbuf, off, sizeof(*th), th);
7574
7575 if (sk->af_lan != sk->af_gwy) {
7576 return pf_do_nat64(sk, pd, pbuf, off);
7577 }
7578 }
7579 return PF_PASS;
7580 }
7581
7582 static __attribute__((noinline)) int
pf_test_state_udp(struct pf_state ** state,int direction,struct pfi_kif * kif,pbuf_t * pbuf,int off,void * h,struct pf_pdesc * pd,u_short * reason)7583 pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
7584 pbuf_t *pbuf, int off, void *h, struct pf_pdesc *pd, u_short *reason)
7585 {
7586 #pragma unused(h)
7587 struct pf_state_peer *src, *dst;
7588 struct pf_state_key_cmp key;
7589 struct pf_state_key *sk;
7590 struct udphdr *uh = pd->hdr.udp;
7591 struct pf_app_state as;
7592 int action, extfilter;
7593 key.app_state = 0;
7594 key.proto_variant = PF_EXTFILTER_APD;
7595
7596 key.proto = IPPROTO_UDP;
7597 key.af_lan = key.af_gwy = pd->af;
7598
7599 /*
7600 * For NAT64 the first time rule search and state creation
7601 * is done on the incoming side only.
7602 * Once the state gets created, NAT64's LAN side (ipv6) will
7603 * not be able to find the state in ext-gwy tree as that normally
7604 * is intended to be looked up for incoming traffic from the
7605 * WAN side.
7606 * Therefore to handle NAT64 case we init keys here for both
7607 * lan-ext as well as ext-gwy trees.
7608 * In the state lookup we attempt a lookup on both trees if
7609 * first one does not return any result and return a match if
7610 * the match state's was created by NAT64 rule.
7611 */
7612 PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy);
7613 PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy);
7614 key.ext_gwy.xport.port = uh->uh_sport;
7615 key.gwy.xport.port = uh->uh_dport;
7616
7617 PF_ACPY(&key.lan.addr, pd->src, key.af_lan);
7618 PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan);
7619 key.lan.xport.port = uh->uh_sport;
7620 key.ext_lan.xport.port = uh->uh_dport;
7621
7622 if (ntohs(uh->uh_sport) == PF_IKE_PORT &&
7623 ntohs(uh->uh_dport) == PF_IKE_PORT) {
7624 struct pf_ike_hdr ike;
7625 size_t plen = pbuf->pb_packet_len - off - sizeof(*uh);
7626 if (plen < PF_IKE_PACKET_MINSIZE) {
7627 DPFPRINTF(PF_DEBUG_MISC,
7628 ("pf: IKE message too small.\n"));
7629 return PF_DROP;
7630 }
7631
7632 if (plen > sizeof(ike)) {
7633 plen = sizeof(ike);
7634 }
7635 pbuf_copy_data(pbuf, off + sizeof(*uh), plen, &ike);
7636
7637 if (ike.initiator_cookie) {
7638 key.app_state = &as;
7639 as.compare_lan_ext = pf_ike_compare;
7640 as.compare_ext_gwy = pf_ike_compare;
7641 as.u.ike.cookie = ike.initiator_cookie;
7642 } else {
7643 /*
7644 * <http://tools.ietf.org/html/\
7645 * draft-ietf-ipsec-nat-t-ike-01>
7646 * Support non-standard NAT-T implementations that
7647 * push the ESP packet over the top of the IKE packet.
7648 * Do not drop packet.
7649 */
7650 DPFPRINTF(PF_DEBUG_MISC,
7651 ("pf: IKE initiator cookie = 0.\n"));
7652 }
7653 }
7654
7655 *state = pf_find_state(kif, &key, direction);
7656
7657 if (!key.app_state && *state == 0) {
7658 key.proto_variant = PF_EXTFILTER_AD;
7659 *state = pf_find_state(kif, &key, direction);
7660 }
7661
7662 if (!key.app_state && *state == 0) {
7663 key.proto_variant = PF_EXTFILTER_EI;
7664 *state = pf_find_state(kif, &key, direction);
7665 }
7666
7667 /* similar to STATE_LOOKUP() */
7668 if (*state != NULL && pd != NULL && !(pd->pktflags & PKTF_FLOW_ID)) {
7669 pd->flowsrc = (*state)->state_key->flowsrc;
7670 pd->flowhash = (*state)->state_key->flowhash;
7671 if (pd->flowhash != 0) {
7672 pd->pktflags |= PKTF_FLOW_ID;
7673 pd->pktflags &= ~PKTF_FLOW_ADV;
7674 }
7675 }
7676
7677 if (pf_state_lookup_aux(state, kif, direction, &action)) {
7678 return action;
7679 }
7680
7681 sk = (*state)->state_key;
7682
7683 /*
7684 * In case of NAT64 the translation is first applied on the LAN
7685 * side. Therefore for stack's address family comparison
7686 * we use sk->af_lan.
7687 */
7688 if ((direction == sk->direction) && (pd->af == sk->af_lan)) {
7689 src = &(*state)->src;
7690 dst = &(*state)->dst;
7691 } else {
7692 src = &(*state)->dst;
7693 dst = &(*state)->src;
7694 }
7695
7696 /* update states */
7697 if (src->state < PFUDPS_SINGLE) {
7698 src->state = PFUDPS_SINGLE;
7699 }
7700 if (dst->state == PFUDPS_SINGLE) {
7701 dst->state = PFUDPS_MULTIPLE;
7702 }
7703
7704 /* update expire time */
7705 (*state)->expire = pf_time_second();
7706 if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE) {
7707 (*state)->timeout = PFTM_UDP_MULTIPLE;
7708 } else {
7709 (*state)->timeout = PFTM_UDP_SINGLE;
7710 }
7711
7712 extfilter = sk->proto_variant;
7713 if (extfilter > PF_EXTFILTER_APD) {
7714 if (direction == PF_OUT) {
7715 sk->ext_lan.xport.port = key.ext_lan.xport.port;
7716 if (extfilter > PF_EXTFILTER_AD) {
7717 PF_ACPY(&sk->ext_lan.addr, &key.ext_lan.addr,
7718 key.af_lan);
7719 }
7720 } else {
7721 sk->ext_gwy.xport.port = key.ext_gwy.xport.port;
7722 if (extfilter > PF_EXTFILTER_AD) {
7723 PF_ACPY(&sk->ext_gwy.addr, &key.ext_gwy.addr,
7724 key.af_gwy);
7725 }
7726 }
7727 }
7728
7729 if (sk->app_state && sk->app_state->handler) {
7730 sk->app_state->handler(*state, direction, off + uh->uh_ulen,
7731 pd, kif);
7732 if (pd->lmw < 0) {
7733 REASON_SET(reason, PFRES_MEMORY);
7734 return PF_DROP;
7735 }
7736 pbuf = pd->mp; // XXXSCW: Why?
7737 }
7738
7739 /* translate source/destination address, if necessary */
7740 if (STATE_TRANSLATE(sk)) {
7741 if (pf_lazy_makewritable(pd, pbuf, off + sizeof(*uh)) == NULL) {
7742 REASON_SET(reason, PFRES_MEMORY);
7743 return PF_DROP;
7744 }
7745
7746 pd->naf = (pd->af == sk->af_lan) ? sk->af_gwy : sk->af_lan;
7747
7748 if (direction == PF_OUT) {
7749 pf_change_ap(direction, pd->mp, pd->src, &uh->uh_sport,
7750 pd->ip_sum, &uh->uh_sum, &sk->gwy.addr,
7751 sk->gwy.xport.port, 1, pd->af, pd->naf, 1);
7752 } else {
7753 if (pd->af != pd->naf) {
7754 if (pd->af == sk->af_gwy) {
7755 pf_change_ap(direction, pd->mp, pd->dst,
7756 &uh->uh_dport, pd->ip_sum,
7757 &uh->uh_sum, &sk->lan.addr,
7758 sk->lan.xport.port, 1,
7759 pd->af, pd->naf, 0);
7760
7761 pf_change_ap(direction, pd->mp, pd->src,
7762 &uh->uh_sport, pd->ip_sum,
7763 &uh->uh_sum, &sk->ext_lan.addr,
7764 uh->uh_sport, 1, pd->af,
7765 pd->naf, 0);
7766 } else {
7767 pf_change_ap(direction, pd->mp, pd->dst,
7768 &uh->uh_dport, pd->ip_sum,
7769 &uh->uh_sum, &sk->ext_gwy.addr,
7770 uh->uh_dport, 1, pd->af,
7771 pd->naf, 0);
7772
7773 pf_change_ap(direction, pd->mp, pd->src,
7774 &uh->uh_sport, pd->ip_sum,
7775 &uh->uh_sum, &sk->gwy.addr,
7776 sk->gwy.xport.port, 1, pd->af,
7777 pd->naf, 0);
7778 }
7779 } else {
7780 pf_change_ap(direction, pd->mp, pd->dst,
7781 &uh->uh_dport, pd->ip_sum,
7782 &uh->uh_sum, &sk->lan.addr,
7783 sk->lan.xport.port, 1,
7784 pd->af, pd->naf, 1);
7785 }
7786 }
7787
7788 pbuf_copy_back(pbuf, off, sizeof(*uh), uh);
7789 if (sk->af_lan != sk->af_gwy) {
7790 return pf_do_nat64(sk, pd, pbuf, off);
7791 }
7792 }
7793 return PF_PASS;
7794 }
7795
7796 static u_int32_t
pf_compute_packet_icmp_gencnt(uint32_t af,u_int32_t type,u_int32_t code)7797 pf_compute_packet_icmp_gencnt(uint32_t af, u_int32_t type, u_int32_t code)
7798 {
7799 if (af == PF_INET) {
7800 if (type != ICMP_UNREACH && type != ICMP_TIMXCEED) {
7801 return 0;
7802 }
7803 } else {
7804 if (type != ICMP6_DST_UNREACH && type != ICMP6_PARAM_PROB &&
7805 type != ICMP6_TIME_EXCEEDED) {
7806 return 0;
7807 }
7808 }
7809 return (af << 24) | (type << 16) | (code << 8);
7810 }
7811
7812
7813 static __attribute__((noinline)) int
pf_test_state_icmp(struct pf_state ** state,int direction,struct pfi_kif * kif,pbuf_t * pbuf,int off,void * h,struct pf_pdesc * pd,u_short * reason)7814 pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
7815 pbuf_t *pbuf, int off, void *h, struct pf_pdesc *pd, u_short *reason)
7816 {
7817 #pragma unused(h)
7818 struct pf_addr *saddr = pd->src, *daddr = pd->dst;
7819 struct in_addr srcv4_inaddr = saddr->v4addr;
7820 u_int16_t icmpid = 0, *icmpsum = NULL;
7821 u_int8_t icmptype = 0;
7822 u_int32_t icmpcode = 0;
7823 int state_icmp = 0;
7824 struct pf_state_key_cmp key;
7825 struct pf_state_key *sk;
7826
7827 struct pf_app_state as;
7828 key.app_state = 0;
7829
7830 pd->off = off;
7831
7832 switch (pd->proto) {
7833 #if INET
7834 case IPPROTO_ICMP:
7835 icmptype = pd->hdr.icmp->icmp_type;
7836 icmpid = pd->hdr.icmp->icmp_id;
7837 icmpsum = &pd->hdr.icmp->icmp_cksum;
7838 icmpcode = pd->hdr.icmp->icmp_code;
7839
7840 if (ICMP_ERRORTYPE(icmptype)) {
7841 state_icmp++;
7842 }
7843 break;
7844 #endif /* INET */
7845 case IPPROTO_ICMPV6:
7846 icmptype = pd->hdr.icmp6->icmp6_type;
7847 icmpid = pd->hdr.icmp6->icmp6_id;
7848 icmpsum = &pd->hdr.icmp6->icmp6_cksum;
7849 icmpcode = pd->hdr.icmp6->icmp6_code;
7850
7851 if (ICMP6_ERRORTYPE(icmptype)) {
7852 state_icmp++;
7853 }
7854 break;
7855 }
7856
7857 if (pbuf != NULL && pbuf->pb_flow_gencnt != NULL &&
7858 *pbuf->pb_flow_gencnt == 0) {
7859 u_int32_t af = pd->proto == IPPROTO_ICMP ? PF_INET : PF_INET6;
7860 *pbuf->pb_flow_gencnt = pf_compute_packet_icmp_gencnt(af, icmptype, icmpcode);
7861 }
7862
7863 if (!state_icmp) {
7864 /*
7865 * ICMP query/reply message not related to a TCP/UDP packet.
7866 * Search for an ICMP state.
7867 */
7868 /*
7869 * NAT64 requires protocol translation between ICMPv4
7870 * and ICMPv6. TCP and UDP do not require protocol
7871 * translation. To avoid adding complexity just to
7872 * handle ICMP(v4addr/v6addr), we always lookup for
7873 * proto = IPPROTO_ICMP on both LAN and WAN side
7874 */
7875 key.proto = IPPROTO_ICMP;
7876 key.af_lan = key.af_gwy = pd->af;
7877
7878 PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy);
7879 PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy);
7880 key.ext_gwy.xport.port = 0;
7881 key.gwy.xport.port = icmpid;
7882
7883 PF_ACPY(&key.lan.addr, pd->src, key.af_lan);
7884 PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan);
7885 key.lan.xport.port = icmpid;
7886 key.ext_lan.xport.port = 0;
7887
7888 STATE_LOOKUP();
7889
7890 sk = (*state)->state_key;
7891 (*state)->expire = pf_time_second();
7892 (*state)->timeout = PFTM_ICMP_ERROR_REPLY;
7893
7894 /* translate source/destination address, if necessary */
7895 if (STATE_TRANSLATE(sk)) {
7896 pd->naf = (pd->af == sk->af_lan) ?
7897 sk->af_gwy : sk->af_lan;
7898 if (direction == PF_OUT) {
7899 switch (pd->af) {
7900 #if INET
7901 case AF_INET:
7902 pf_change_a(&saddr->v4addr.s_addr,
7903 pd->ip_sum,
7904 sk->gwy.addr.v4addr.s_addr, 0);
7905 pd->hdr.icmp->icmp_cksum =
7906 pf_cksum_fixup(
7907 pd->hdr.icmp->icmp_cksum, icmpid,
7908 sk->gwy.xport.port, 0);
7909 pd->hdr.icmp->icmp_id =
7910 sk->gwy.xport.port;
7911 if (pf_lazy_makewritable(pd, pbuf,
7912 off + ICMP_MINLEN) == NULL) {
7913 return PF_DROP;
7914 }
7915 pbuf_copy_back(pbuf, off, ICMP_MINLEN,
7916 pd->hdr.icmp);
7917 break;
7918 #endif /* INET */
7919 case AF_INET6:
7920 pf_change_a6(saddr,
7921 &pd->hdr.icmp6->icmp6_cksum,
7922 &sk->gwy.addr, 0);
7923 if (pf_lazy_makewritable(pd, pbuf,
7924 off + sizeof(struct icmp6_hdr)) ==
7925 NULL) {
7926 return PF_DROP;
7927 }
7928 pbuf_copy_back(pbuf, off,
7929 sizeof(struct icmp6_hdr),
7930 pd->hdr.icmp6);
7931 break;
7932 }
7933 } else {
7934 switch (pd->af) {
7935 #if INET
7936 case AF_INET:
7937 if (pd->naf != AF_INET) {
7938 if (pf_translate_icmp_af(
7939 AF_INET6, pd->hdr.icmp)) {
7940 return PF_DROP;
7941 }
7942
7943 pd->proto = IPPROTO_ICMPV6;
7944 } else {
7945 pf_change_a(&daddr->v4addr.s_addr,
7946 pd->ip_sum,
7947 sk->lan.addr.v4addr.s_addr, 0);
7948
7949 pd->hdr.icmp->icmp_cksum =
7950 pf_cksum_fixup(
7951 pd->hdr.icmp->icmp_cksum,
7952 icmpid, sk->lan.xport.port, 0);
7953
7954 pd->hdr.icmp->icmp_id =
7955 sk->lan.xport.port;
7956 }
7957
7958 if (pf_lazy_makewritable(pd, pbuf,
7959 off + ICMP_MINLEN) == NULL) {
7960 return PF_DROP;
7961 }
7962 pbuf_copy_back(pbuf, off, ICMP_MINLEN,
7963 pd->hdr.icmp);
7964 if (sk->af_lan != sk->af_gwy) {
7965 return pf_do_nat64(sk, pd,
7966 pbuf, off);
7967 }
7968 break;
7969 #endif /* INET */
7970 case AF_INET6:
7971 if (pd->naf != AF_INET6) {
7972 if (pf_translate_icmp_af(
7973 AF_INET, pd->hdr.icmp6)) {
7974 return PF_DROP;
7975 }
7976
7977 pd->proto = IPPROTO_ICMP;
7978 } else {
7979 pf_change_a6(daddr,
7980 &pd->hdr.icmp6->icmp6_cksum,
7981 &sk->lan.addr, 0);
7982 }
7983 if (pf_lazy_makewritable(pd, pbuf,
7984 off + sizeof(struct icmp6_hdr)) ==
7985 NULL) {
7986 return PF_DROP;
7987 }
7988 pbuf_copy_back(pbuf, off,
7989 sizeof(struct icmp6_hdr),
7990 pd->hdr.icmp6);
7991 if (sk->af_lan != sk->af_gwy) {
7992 return pf_do_nat64(sk, pd,
7993 pbuf, off);
7994 }
7995 break;
7996 }
7997 }
7998 }
7999
8000 return PF_PASS;
8001 } else {
8002 /*
8003 * ICMP error message in response to a TCP/UDP packet.
8004 * Extract the inner TCP/UDP header and search for that state.
8005 */
8006 struct pf_pdesc pd2; /* For inner (original) header */
8007 #if INET
8008 struct ip h2;
8009 #endif /* INET */
8010 struct ip6_hdr h2_6;
8011 int terminal = 0;
8012 int ipoff2 = 0;
8013 int off2 = 0;
8014
8015 memset(&pd2, 0, sizeof(pd2));
8016
8017 pd2.af = pd->af;
8018 switch (pd->af) {
8019 #if INET
8020 case AF_INET:
8021 /* offset of h2 in mbuf chain */
8022 ipoff2 = off + ICMP_MINLEN;
8023
8024 if (!pf_pull_hdr(pbuf, ipoff2, &h2, sizeof(h2),
8025 NULL, reason, pd2.af)) {
8026 DPFPRINTF(PF_DEBUG_MISC,
8027 ("pf: ICMP error message too short "
8028 "(ip)\n"));
8029 return PF_DROP;
8030 }
8031 /*
8032 * ICMP error messages don't refer to non-first
8033 * fragments
8034 */
8035 if (h2.ip_off & htons(IP_OFFMASK)) {
8036 REASON_SET(reason, PFRES_FRAG);
8037 return PF_DROP;
8038 }
8039
8040 /* offset of protocol header that follows h2 */
8041 off2 = ipoff2 + (h2.ip_hl << 2);
8042 /* TODO */
8043 pd2.off = ipoff2 + (h2.ip_hl << 2);
8044
8045 pd2.proto = h2.ip_p;
8046 pd2.src = (struct pf_addr *)&h2.ip_src;
8047 pd2.dst = (struct pf_addr *)&h2.ip_dst;
8048 pd2.ip_sum = &h2.ip_sum;
8049 break;
8050 #endif /* INET */
8051 case AF_INET6:
8052 ipoff2 = off + sizeof(struct icmp6_hdr);
8053
8054 if (!pf_pull_hdr(pbuf, ipoff2, &h2_6, sizeof(h2_6),
8055 NULL, reason, pd2.af)) {
8056 DPFPRINTF(PF_DEBUG_MISC,
8057 ("pf: ICMP error message too short "
8058 "(ip6)\n"));
8059 return PF_DROP;
8060 }
8061 pd2.proto = h2_6.ip6_nxt;
8062 pd2.src = (struct pf_addr *)(uintptr_t)&h2_6.ip6_src;
8063 pd2.dst = (struct pf_addr *)(uintptr_t)&h2_6.ip6_dst;
8064 pd2.ip_sum = NULL;
8065 off2 = ipoff2 + sizeof(h2_6);
8066 do {
8067 switch (pd2.proto) {
8068 case IPPROTO_FRAGMENT:
8069 /*
8070 * ICMPv6 error messages for
8071 * non-first fragments
8072 */
8073 REASON_SET(reason, PFRES_FRAG);
8074 return PF_DROP;
8075 case IPPROTO_AH:
8076 case IPPROTO_HOPOPTS:
8077 case IPPROTO_ROUTING:
8078 case IPPROTO_DSTOPTS: {
8079 /* get next header and header length */
8080 struct ip6_ext opt6;
8081
8082 if (!pf_pull_hdr(pbuf, off2, &opt6,
8083 sizeof(opt6), NULL, reason,
8084 pd2.af)) {
8085 DPFPRINTF(PF_DEBUG_MISC,
8086 ("pf: ICMPv6 short opt\n"));
8087 return PF_DROP;
8088 }
8089 if (pd2.proto == IPPROTO_AH) {
8090 off2 += (opt6.ip6e_len + 2) * 4;
8091 } else {
8092 off2 += (opt6.ip6e_len + 1) * 8;
8093 }
8094 pd2.proto = opt6.ip6e_nxt;
8095 /* goto the next header */
8096 break;
8097 }
8098 default:
8099 terminal++;
8100 break;
8101 }
8102 } while (!terminal);
8103 /* TODO */
8104 pd2.off = ipoff2;
8105 break;
8106 }
8107
8108 switch (pd2.proto) {
8109 case IPPROTO_TCP: {
8110 struct tcphdr th;
8111 u_int32_t seq;
8112 struct pf_state_peer *src, *dst;
8113 u_int8_t dws;
8114 int copyback = 0;
8115
8116 /*
8117 * Only the first 8 bytes of the TCP header can be
8118 * expected. Don't access any TCP header fields after
8119 * th_seq, an ackskew test is not possible.
8120 */
8121 if (!pf_pull_hdr(pbuf, off2, &th, 8, NULL, reason,
8122 pd2.af)) {
8123 DPFPRINTF(PF_DEBUG_MISC,
8124 ("pf: ICMP error message too short "
8125 "(tcp)\n"));
8126 return PF_DROP;
8127 }
8128
8129 key.proto = IPPROTO_TCP;
8130 key.af_gwy = pd2.af;
8131 PF_ACPY(&key.ext_gwy.addr, pd2.dst, key.af_gwy);
8132 PF_ACPY(&key.gwy.addr, pd2.src, key.af_gwy);
8133 key.ext_gwy.xport.port = th.th_dport;
8134 key.gwy.xport.port = th.th_sport;
8135
8136 key.af_lan = pd2.af;
8137 PF_ACPY(&key.lan.addr, pd2.dst, key.af_lan);
8138 PF_ACPY(&key.ext_lan.addr, pd2.src, key.af_lan);
8139 key.lan.xport.port = th.th_dport;
8140 key.ext_lan.xport.port = th.th_sport;
8141
8142 STATE_LOOKUP();
8143
8144 sk = (*state)->state_key;
8145 if ((direction == sk->direction) &&
8146 ((sk->af_lan == sk->af_gwy) ||
8147 (pd2.af == sk->af_lan))) {
8148 src = &(*state)->dst;
8149 dst = &(*state)->src;
8150 } else {
8151 src = &(*state)->src;
8152 dst = &(*state)->dst;
8153 }
8154
8155 if (src->wscale && (dst->wscale & PF_WSCALE_FLAG)) {
8156 dws = dst->wscale & PF_WSCALE_MASK;
8157 } else {
8158 dws = TCP_MAX_WINSHIFT;
8159 }
8160
8161 /* Demodulate sequence number */
8162 seq = ntohl(th.th_seq) - src->seqdiff;
8163 if (src->seqdiff) {
8164 pf_change_a(&th.th_seq, icmpsum,
8165 htonl(seq), 0);
8166 copyback = 1;
8167 }
8168
8169 if (!SEQ_GEQ(src->seqhi, seq) ||
8170 !SEQ_GEQ(seq,
8171 src->seqlo - ((u_int32_t)dst->max_win << dws))) {
8172 if (pf_status.debug >= PF_DEBUG_MISC) {
8173 printf("pf: BAD ICMP %d:%d ",
8174 icmptype, pd->hdr.icmp->icmp_code);
8175 pf_print_host(pd->src, 0, pd->af);
8176 printf(" -> ");
8177 pf_print_host(pd->dst, 0, pd->af);
8178 printf(" state: ");
8179 pf_print_state(*state);
8180 printf(" seq=%u\n", seq);
8181 }
8182 REASON_SET(reason, PFRES_BADSTATE);
8183 return PF_DROP;
8184 }
8185
8186 pd->naf = pd2.naf = (pd2.af == sk->af_lan) ?
8187 sk->af_gwy : sk->af_lan;
8188
8189 if (STATE_TRANSLATE(sk)) {
8190 /* NAT64 case */
8191 if (sk->af_lan != sk->af_gwy) {
8192 struct pf_state_host *saddr2, *daddr2;
8193
8194 if (pd2.naf == sk->af_lan) {
8195 saddr2 = &sk->lan;
8196 daddr2 = &sk->ext_lan;
8197 } else {
8198 saddr2 = &sk->ext_gwy;
8199 daddr2 = &sk->gwy;
8200 }
8201
8202 /* translate ICMP message types and codes */
8203 if (pf_translate_icmp_af(pd->naf,
8204 pd->hdr.icmp)) {
8205 return PF_DROP;
8206 }
8207
8208 if (pf_lazy_makewritable(pd, pbuf,
8209 off2 + 8) == NULL) {
8210 return PF_DROP;
8211 }
8212
8213 pbuf_copy_back(pbuf, pd->off,
8214 sizeof(struct icmp6_hdr),
8215 pd->hdr.icmp6);
8216
8217 /*
8218 * translate inner ip header within the
8219 * ICMP message
8220 */
8221 if (pf_change_icmp_af(pbuf, ipoff2, pd,
8222 &pd2, &saddr2->addr, &daddr2->addr,
8223 pd->af, pd->naf)) {
8224 return PF_DROP;
8225 }
8226
8227 if (pd->naf == AF_INET) {
8228 pd->proto = IPPROTO_ICMP;
8229 } else {
8230 pd->proto = IPPROTO_ICMPV6;
8231 }
8232
8233 /*
8234 * translate inner tcp header within
8235 * the ICMP message
8236 */
8237 pf_change_ap(direction, NULL, pd2.src,
8238 &th.th_sport, pd2.ip_sum,
8239 &th.th_sum, &daddr2->addr,
8240 saddr2->xport.port, 0, pd2.af,
8241 pd2.naf, 0);
8242
8243 pf_change_ap(direction, NULL, pd2.dst,
8244 &th.th_dport, pd2.ip_sum,
8245 &th.th_sum, &saddr2->addr,
8246 daddr2->xport.port, 0, pd2.af,
8247 pd2.naf, 0);
8248
8249 pbuf_copy_back(pbuf, pd2.off, 8, &th);
8250
8251 /* translate outer ip header */
8252 PF_ACPY(&pd->naddr, &daddr2->addr,
8253 pd->naf);
8254 PF_ACPY(&pd->ndaddr, &saddr2->addr,
8255 pd->naf);
8256 if (pd->af == AF_INET) {
8257 memcpy(&pd->naddr.addr32[3],
8258 &srcv4_inaddr,
8259 sizeof(pd->naddr.addr32[3]));
8260 return pf_nat64_ipv4(pbuf, off,
8261 pd);
8262 } else {
8263 return pf_nat64_ipv6(pbuf, off,
8264 pd);
8265 }
8266 }
8267 if (direction == PF_IN) {
8268 pf_change_icmp(pd2.src, &th.th_sport,
8269 daddr, &sk->lan.addr,
8270 sk->lan.xport.port, NULL,
8271 pd2.ip_sum, icmpsum,
8272 pd->ip_sum, 0, pd2.af);
8273 } else {
8274 pf_change_icmp(pd2.dst, &th.th_dport,
8275 saddr, &sk->gwy.addr,
8276 sk->gwy.xport.port, NULL,
8277 pd2.ip_sum, icmpsum,
8278 pd->ip_sum, 0, pd2.af);
8279 }
8280 copyback = 1;
8281 }
8282
8283 if (copyback) {
8284 if (pf_lazy_makewritable(pd, pbuf, off2 + 8) ==
8285 NULL) {
8286 return PF_DROP;
8287 }
8288 switch (pd2.af) {
8289 #if INET
8290 case AF_INET:
8291 pbuf_copy_back(pbuf, off, ICMP_MINLEN,
8292 pd->hdr.icmp);
8293 pbuf_copy_back(pbuf, ipoff2, sizeof(h2),
8294 &h2);
8295 break;
8296 #endif /* INET */
8297 case AF_INET6:
8298 pbuf_copy_back(pbuf, off,
8299 sizeof(struct icmp6_hdr),
8300 pd->hdr.icmp6);
8301 pbuf_copy_back(pbuf, ipoff2,
8302 sizeof(h2_6), &h2_6);
8303 break;
8304 }
8305 pbuf_copy_back(pbuf, off2, 8, &th);
8306 }
8307
8308 return PF_PASS;
8309 }
8310 case IPPROTO_UDP: {
8311 struct udphdr uh;
8312 int dx, action;
8313 if (!pf_pull_hdr(pbuf, off2, &uh, sizeof(uh),
8314 NULL, reason, pd2.af)) {
8315 DPFPRINTF(PF_DEBUG_MISC,
8316 ("pf: ICMP error message too short "
8317 "(udp)\n"));
8318 return PF_DROP;
8319 }
8320
8321 key.af_gwy = pd2.af;
8322 PF_ACPY(&key.ext_gwy.addr, pd2.dst, key.af_gwy);
8323 PF_ACPY(&key.gwy.addr, pd2.src, key.af_gwy);
8324 key.ext_gwy.xport.port = uh.uh_dport;
8325 key.gwy.xport.port = uh.uh_sport;
8326
8327 key.af_lan = pd2.af;
8328 PF_ACPY(&key.lan.addr, pd2.dst, key.af_lan);
8329 PF_ACPY(&key.ext_lan.addr, pd2.src, key.af_lan);
8330 key.lan.xport.port = uh.uh_dport;
8331 key.ext_lan.xport.port = uh.uh_sport;
8332
8333 key.proto = IPPROTO_UDP;
8334 key.proto_variant = PF_EXTFILTER_APD;
8335 dx = direction;
8336
8337 if (ntohs(uh.uh_sport) == PF_IKE_PORT &&
8338 ntohs(uh.uh_dport) == PF_IKE_PORT) {
8339 struct pf_ike_hdr ike;
8340 size_t plen = pbuf->pb_packet_len - off2 -
8341 sizeof(uh);
8342 if (direction == PF_IN &&
8343 plen < 8 /* PF_IKE_PACKET_MINSIZE */) {
8344 DPFPRINTF(PF_DEBUG_MISC, ("pf: "
8345 "ICMP error, embedded IKE message "
8346 "too small.\n"));
8347 return PF_DROP;
8348 }
8349
8350 if (plen > sizeof(ike)) {
8351 plen = sizeof(ike);
8352 }
8353 pbuf_copy_data(pbuf, off + sizeof(uh), plen,
8354 &ike);
8355
8356 key.app_state = &as;
8357 as.compare_lan_ext = pf_ike_compare;
8358 as.compare_ext_gwy = pf_ike_compare;
8359 as.u.ike.cookie = ike.initiator_cookie;
8360 }
8361
8362 *state = pf_find_state(kif, &key, dx);
8363
8364 if (key.app_state && *state == 0) {
8365 key.app_state = 0;
8366 *state = pf_find_state(kif, &key, dx);
8367 }
8368
8369 if (*state == 0) {
8370 key.proto_variant = PF_EXTFILTER_AD;
8371 *state = pf_find_state(kif, &key, dx);
8372 }
8373
8374 if (*state == 0) {
8375 key.proto_variant = PF_EXTFILTER_EI;
8376 *state = pf_find_state(kif, &key, dx);
8377 }
8378
8379 /* similar to STATE_LOOKUP() */
8380 if (*state != NULL && pd != NULL &&
8381 !(pd->pktflags & PKTF_FLOW_ID)) {
8382 pd->flowsrc = (*state)->state_key->flowsrc;
8383 pd->flowhash = (*state)->state_key->flowhash;
8384 if (pd->flowhash != 0) {
8385 pd->pktflags |= PKTF_FLOW_ID;
8386 pd->pktflags &= ~PKTF_FLOW_ADV;
8387 }
8388 }
8389
8390 if (pf_state_lookup_aux(state, kif, direction, &action)) {
8391 return action;
8392 }
8393
8394 sk = (*state)->state_key;
8395 pd->naf = pd2.naf = (pd2.af == sk->af_lan) ?
8396 sk->af_gwy : sk->af_lan;
8397
8398 if (STATE_TRANSLATE(sk)) {
8399 /* NAT64 case */
8400 if (sk->af_lan != sk->af_gwy) {
8401 struct pf_state_host *saddr2, *daddr2;
8402
8403 if (pd2.naf == sk->af_lan) {
8404 saddr2 = &sk->lan;
8405 daddr2 = &sk->ext_lan;
8406 } else {
8407 saddr2 = &sk->ext_gwy;
8408 daddr2 = &sk->gwy;
8409 }
8410
8411 /* translate ICMP message */
8412 if (pf_translate_icmp_af(pd->naf,
8413 pd->hdr.icmp)) {
8414 return PF_DROP;
8415 }
8416 if (pf_lazy_makewritable(pd, pbuf,
8417 off2 + 8) == NULL) {
8418 return PF_DROP;
8419 }
8420
8421 pbuf_copy_back(pbuf, pd->off,
8422 sizeof(struct icmp6_hdr),
8423 pd->hdr.icmp6);
8424
8425 /*
8426 * translate inner ip header within the
8427 * ICMP message
8428 */
8429 if (pf_change_icmp_af(pbuf, ipoff2, pd,
8430 &pd2, &saddr2->addr, &daddr2->addr,
8431 pd->af, pd->naf)) {
8432 return PF_DROP;
8433 }
8434
8435 if (pd->naf == AF_INET) {
8436 pd->proto = IPPROTO_ICMP;
8437 } else {
8438 pd->proto = IPPROTO_ICMPV6;
8439 }
8440
8441 /*
8442 * translate inner udp header within
8443 * the ICMP message
8444 */
8445 pf_change_ap(direction, NULL, pd2.src,
8446 &uh.uh_sport, pd2.ip_sum,
8447 &uh.uh_sum, &daddr2->addr,
8448 saddr2->xport.port, 0, pd2.af,
8449 pd2.naf, 0);
8450
8451 pf_change_ap(direction, NULL, pd2.dst,
8452 &uh.uh_dport, pd2.ip_sum,
8453 &uh.uh_sum, &saddr2->addr,
8454 daddr2->xport.port, 0, pd2.af,
8455 pd2.naf, 0);
8456
8457 pbuf_copy_back(pbuf, pd2.off,
8458 sizeof(uh), &uh);
8459
8460 /* translate outer ip header */
8461 PF_ACPY(&pd->naddr, &daddr2->addr,
8462 pd->naf);
8463 PF_ACPY(&pd->ndaddr, &saddr2->addr,
8464 pd->naf);
8465 if (pd->af == AF_INET) {
8466 memcpy(&pd->naddr.addr32[3],
8467 &srcv4_inaddr,
8468 sizeof(pd->naddr.addr32[3]));
8469 return pf_nat64_ipv4(pbuf, off,
8470 pd);
8471 } else {
8472 return pf_nat64_ipv6(pbuf, off,
8473 pd);
8474 }
8475 }
8476 if (direction == PF_IN) {
8477 pf_change_icmp(pd2.src, &uh.uh_sport,
8478 daddr, &sk->lan.addr,
8479 sk->lan.xport.port, &uh.uh_sum,
8480 pd2.ip_sum, icmpsum,
8481 pd->ip_sum, 1, pd2.af);
8482 } else {
8483 pf_change_icmp(pd2.dst, &uh.uh_dport,
8484 saddr, &sk->gwy.addr,
8485 sk->gwy.xport.port, &uh.uh_sum,
8486 pd2.ip_sum, icmpsum,
8487 pd->ip_sum, 1, pd2.af);
8488 }
8489 if (pf_lazy_makewritable(pd, pbuf,
8490 off2 + sizeof(uh)) == NULL) {
8491 return PF_DROP;
8492 }
8493 switch (pd2.af) {
8494 #if INET
8495 case AF_INET:
8496 pbuf_copy_back(pbuf, off, ICMP_MINLEN,
8497 pd->hdr.icmp);
8498 pbuf_copy_back(pbuf, ipoff2,
8499 sizeof(h2), &h2);
8500 break;
8501 #endif /* INET */
8502 case AF_INET6:
8503 pbuf_copy_back(pbuf, off,
8504 sizeof(struct icmp6_hdr),
8505 pd->hdr.icmp6);
8506 pbuf_copy_back(pbuf, ipoff2,
8507 sizeof(h2_6), &h2_6);
8508 break;
8509 }
8510 pbuf_copy_back(pbuf, off2, sizeof(uh), &uh);
8511 }
8512
8513 return PF_PASS;
8514 }
8515 #if INET
8516 case IPPROTO_ICMP: {
8517 struct icmp iih;
8518
8519 if (!pf_pull_hdr(pbuf, off2, &iih, ICMP_MINLEN,
8520 NULL, reason, pd2.af)) {
8521 DPFPRINTF(PF_DEBUG_MISC,
8522 ("pf: ICMP error message too short i"
8523 "(icmp)\n"));
8524 return PF_DROP;
8525 }
8526
8527 key.proto = IPPROTO_ICMP;
8528 if (direction == PF_IN) {
8529 key.af_gwy = pd2.af;
8530 PF_ACPY(&key.ext_gwy.addr, pd2.dst, key.af_gwy);
8531 PF_ACPY(&key.gwy.addr, pd2.src, key.af_gwy);
8532 key.ext_gwy.xport.port = 0;
8533 key.gwy.xport.port = iih.icmp_id;
8534 } else {
8535 key.af_lan = pd2.af;
8536 PF_ACPY(&key.lan.addr, pd2.dst, key.af_lan);
8537 PF_ACPY(&key.ext_lan.addr, pd2.src, key.af_lan);
8538 key.lan.xport.port = iih.icmp_id;
8539 key.ext_lan.xport.port = 0;
8540 }
8541
8542 STATE_LOOKUP();
8543
8544 sk = (*state)->state_key;
8545 if (STATE_TRANSLATE(sk)) {
8546 if (direction == PF_IN) {
8547 pf_change_icmp(pd2.src, &iih.icmp_id,
8548 daddr, &sk->lan.addr,
8549 sk->lan.xport.port, NULL,
8550 pd2.ip_sum, icmpsum,
8551 pd->ip_sum, 0, AF_INET);
8552 } else {
8553 pf_change_icmp(pd2.dst, &iih.icmp_id,
8554 saddr, &sk->gwy.addr,
8555 sk->gwy.xport.port, NULL,
8556 pd2.ip_sum, icmpsum,
8557 pd->ip_sum, 0, AF_INET);
8558 }
8559 if (pf_lazy_makewritable(pd, pbuf,
8560 off2 + ICMP_MINLEN) == NULL) {
8561 return PF_DROP;
8562 }
8563 pbuf_copy_back(pbuf, off, ICMP_MINLEN,
8564 pd->hdr.icmp);
8565 pbuf_copy_back(pbuf, ipoff2, sizeof(h2), &h2);
8566 pbuf_copy_back(pbuf, off2, ICMP_MINLEN, &iih);
8567 }
8568
8569 return PF_PASS;
8570 }
8571 #endif /* INET */
8572 case IPPROTO_ICMPV6: {
8573 struct icmp6_hdr iih;
8574
8575 if (!pf_pull_hdr(pbuf, off2, &iih,
8576 sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) {
8577 DPFPRINTF(PF_DEBUG_MISC,
8578 ("pf: ICMP error message too short "
8579 "(icmp6)\n"));
8580 return PF_DROP;
8581 }
8582
8583 key.proto = IPPROTO_ICMPV6;
8584 if (direction == PF_IN) {
8585 key.af_gwy = pd2.af;
8586 PF_ACPY(&key.ext_gwy.addr, pd2.dst, key.af_gwy);
8587 PF_ACPY(&key.gwy.addr, pd2.src, key.af_gwy);
8588 key.ext_gwy.xport.port = 0;
8589 key.gwy.xport.port = iih.icmp6_id;
8590 } else {
8591 key.af_lan = pd2.af;
8592 PF_ACPY(&key.lan.addr, pd2.dst, key.af_lan);
8593 PF_ACPY(&key.ext_lan.addr, pd2.src, key.af_lan);
8594 key.lan.xport.port = iih.icmp6_id;
8595 key.ext_lan.xport.port = 0;
8596 }
8597
8598 STATE_LOOKUP();
8599
8600 sk = (*state)->state_key;
8601 if (STATE_TRANSLATE(sk)) {
8602 if (direction == PF_IN) {
8603 pf_change_icmp(pd2.src, &iih.icmp6_id,
8604 daddr, &sk->lan.addr,
8605 sk->lan.xport.port, NULL,
8606 pd2.ip_sum, icmpsum,
8607 pd->ip_sum, 0, AF_INET6);
8608 } else {
8609 pf_change_icmp(pd2.dst, &iih.icmp6_id,
8610 saddr, &sk->gwy.addr,
8611 sk->gwy.xport.port, NULL,
8612 pd2.ip_sum, icmpsum,
8613 pd->ip_sum, 0, AF_INET6);
8614 }
8615 if (pf_lazy_makewritable(pd, pbuf, off2 +
8616 sizeof(struct icmp6_hdr)) == NULL) {
8617 return PF_DROP;
8618 }
8619 pbuf_copy_back(pbuf, off,
8620 sizeof(struct icmp6_hdr), pd->hdr.icmp6);
8621 pbuf_copy_back(pbuf, ipoff2, sizeof(h2_6),
8622 &h2_6);
8623 pbuf_copy_back(pbuf, off2,
8624 sizeof(struct icmp6_hdr), &iih);
8625 }
8626
8627 return PF_PASS;
8628 }
8629 default: {
8630 key.proto = pd2.proto;
8631 if (direction == PF_IN) {
8632 key.af_gwy = pd2.af;
8633 PF_ACPY(&key.ext_gwy.addr, pd2.dst, key.af_gwy);
8634 PF_ACPY(&key.gwy.addr, pd2.src, key.af_gwy);
8635 key.ext_gwy.xport.port = 0;
8636 key.gwy.xport.port = 0;
8637 } else {
8638 key.af_lan = pd2.af;
8639 PF_ACPY(&key.lan.addr, pd2.dst, key.af_lan);
8640 PF_ACPY(&key.ext_lan.addr, pd2.src, key.af_lan);
8641 key.lan.xport.port = 0;
8642 key.ext_lan.xport.port = 0;
8643 }
8644
8645 STATE_LOOKUP();
8646
8647 sk = (*state)->state_key;
8648 if (STATE_TRANSLATE(sk)) {
8649 if (direction == PF_IN) {
8650 pf_change_icmp(pd2.src, NULL, daddr,
8651 &sk->lan.addr, 0, NULL,
8652 pd2.ip_sum, icmpsum,
8653 pd->ip_sum, 0, pd2.af);
8654 } else {
8655 pf_change_icmp(pd2.dst, NULL, saddr,
8656 &sk->gwy.addr, 0, NULL,
8657 pd2.ip_sum, icmpsum,
8658 pd->ip_sum, 0, pd2.af);
8659 }
8660 switch (pd2.af) {
8661 #if INET
8662 case AF_INET:
8663 if (pf_lazy_makewritable(pd, pbuf,
8664 ipoff2 + sizeof(h2)) == NULL) {
8665 return PF_DROP;
8666 }
8667 /*
8668 * <XXXSCW>
8669 * Xnu was missing the following...
8670 */
8671 pbuf_copy_back(pbuf, off, ICMP_MINLEN,
8672 pd->hdr.icmp);
8673 pbuf_copy_back(pbuf, ipoff2,
8674 sizeof(h2), &h2);
8675 break;
8676 /*
8677 * </XXXSCW>
8678 */
8679 #endif /* INET */
8680 case AF_INET6:
8681 if (pf_lazy_makewritable(pd, pbuf,
8682 ipoff2 + sizeof(h2_6)) == NULL) {
8683 return PF_DROP;
8684 }
8685 pbuf_copy_back(pbuf, off,
8686 sizeof(struct icmp6_hdr),
8687 pd->hdr.icmp6);
8688 pbuf_copy_back(pbuf, ipoff2,
8689 sizeof(h2_6), &h2_6);
8690 break;
8691 }
8692 }
8693
8694 return PF_PASS;
8695 }
8696 }
8697 }
8698 }
8699
8700 static __attribute__((noinline)) int
pf_test_state_grev1(struct pf_state ** state,int direction,struct pfi_kif * kif,int off,struct pf_pdesc * pd)8701 pf_test_state_grev1(struct pf_state **state, int direction,
8702 struct pfi_kif *kif, int off, struct pf_pdesc *pd)
8703 {
8704 struct pf_state_peer *src;
8705 struct pf_state_peer *dst;
8706 struct pf_state_key_cmp key = {};
8707 struct pf_grev1_hdr *grev1 = pd->hdr.grev1;
8708
8709 key.app_state = 0;
8710 key.proto = IPPROTO_GRE;
8711 key.proto_variant = PF_GRE_PPTP_VARIANT;
8712 if (direction == PF_IN) {
8713 key.af_gwy = pd->af;
8714 PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy);
8715 PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy);
8716 key.gwy.xport.call_id = grev1->call_id;
8717 } else {
8718 key.af_lan = pd->af;
8719 PF_ACPY(&key.lan.addr, pd->src, key.af_lan);
8720 PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan);
8721 key.ext_lan.xport.call_id = grev1->call_id;
8722 }
8723
8724 STATE_LOOKUP();
8725
8726 if (direction == (*state)->state_key->direction) {
8727 src = &(*state)->src;
8728 dst = &(*state)->dst;
8729 } else {
8730 src = &(*state)->dst;
8731 dst = &(*state)->src;
8732 }
8733
8734 /* update states */
8735 if (src->state < PFGRE1S_INITIATING) {
8736 src->state = PFGRE1S_INITIATING;
8737 }
8738
8739 /* update expire time */
8740 (*state)->expire = pf_time_second();
8741 if (src->state >= PFGRE1S_INITIATING &&
8742 dst->state >= PFGRE1S_INITIATING) {
8743 if ((*state)->timeout != PFTM_TCP_ESTABLISHED) {
8744 (*state)->timeout = PFTM_GREv1_ESTABLISHED;
8745 }
8746 src->state = PFGRE1S_ESTABLISHED;
8747 dst->state = PFGRE1S_ESTABLISHED;
8748 } else {
8749 (*state)->timeout = PFTM_GREv1_INITIATING;
8750 }
8751
8752 if ((*state)->state_key->app_state) {
8753 (*state)->state_key->app_state->u.grev1.pptp_state->expire =
8754 pf_time_second();
8755 }
8756
8757 /* translate source/destination address, if necessary */
8758 if (STATE_GRE_TRANSLATE((*state)->state_key)) {
8759 if (direction == PF_OUT) {
8760 switch (pd->af) {
8761 #if INET
8762 case AF_INET:
8763 pf_change_a(&pd->src->v4addr.s_addr,
8764 pd->ip_sum,
8765 (*state)->state_key->gwy.addr.v4addr.s_addr, 0);
8766 break;
8767 #endif /* INET */
8768 case AF_INET6:
8769 PF_ACPY(pd->src, &(*state)->state_key->gwy.addr,
8770 pd->af);
8771 break;
8772 }
8773 } else {
8774 grev1->call_id = (*state)->state_key->lan.xport.call_id;
8775
8776 switch (pd->af) {
8777 #if INET
8778 case AF_INET:
8779 pf_change_a(&pd->dst->v4addr.s_addr,
8780 pd->ip_sum,
8781 (*state)->state_key->lan.addr.v4addr.s_addr, 0);
8782 break;
8783 #endif /* INET */
8784 case AF_INET6:
8785 PF_ACPY(pd->dst, &(*state)->state_key->lan.addr,
8786 pd->af);
8787 break;
8788 }
8789 }
8790
8791 if (pf_lazy_makewritable(pd, pd->mp, off + sizeof(*grev1)) ==
8792 NULL) {
8793 return PF_DROP;
8794 }
8795 pbuf_copy_back(pd->mp, off, sizeof(*grev1), grev1);
8796 }
8797
8798 return PF_PASS;
8799 }
8800
8801 static __attribute__((noinline)) int
pf_test_state_esp(struct pf_state ** state,int direction,struct pfi_kif * kif,int off,struct pf_pdesc * pd)8802 pf_test_state_esp(struct pf_state **state, int direction, struct pfi_kif *kif,
8803 int off, struct pf_pdesc *pd)
8804 {
8805 #pragma unused(off)
8806 struct pf_state_peer *src;
8807 struct pf_state_peer *dst;
8808 struct pf_state_key_cmp key;
8809 struct pf_esp_hdr *esp = pd->hdr.esp;
8810 int action;
8811
8812 memset(&key, 0, sizeof(key));
8813 key.proto = IPPROTO_ESP;
8814 if (direction == PF_IN) {
8815 key.af_gwy = pd->af;
8816 PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy);
8817 PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy);
8818 key.gwy.xport.spi = esp->spi;
8819 } else {
8820 key.af_lan = pd->af;
8821 PF_ACPY(&key.lan.addr, pd->src, key.af_lan);
8822 PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan);
8823 key.ext_lan.xport.spi = esp->spi;
8824 }
8825
8826 *state = pf_find_state(kif, &key, direction);
8827
8828 if (*state == 0) {
8829 struct pf_state *s;
8830
8831 /*
8832 * <[email protected]>
8833 * No matching state. Look for a blocking state. If we find
8834 * one, then use that state and move it so that it's keyed to
8835 * the SPI in the current packet.
8836 */
8837 if (direction == PF_IN) {
8838 key.gwy.xport.spi = 0;
8839
8840 s = pf_find_state(kif, &key, direction);
8841 if (s) {
8842 struct pf_state_key *sk = s->state_key;
8843
8844 RB_REMOVE(pf_state_tree_ext_gwy,
8845 &pf_statetbl_ext_gwy, sk);
8846 sk->lan.xport.spi = sk->gwy.xport.spi =
8847 esp->spi;
8848
8849 if (RB_INSERT(pf_state_tree_ext_gwy,
8850 &pf_statetbl_ext_gwy, sk)) {
8851 pf_detach_state(s, PF_DT_SKIP_EXTGWY);
8852 } else {
8853 *state = s;
8854 }
8855 }
8856 } else {
8857 key.ext_lan.xport.spi = 0;
8858
8859 s = pf_find_state(kif, &key, direction);
8860 if (s) {
8861 struct pf_state_key *sk = s->state_key;
8862
8863 RB_REMOVE(pf_state_tree_lan_ext,
8864 &pf_statetbl_lan_ext, sk);
8865 sk->ext_lan.xport.spi = esp->spi;
8866
8867 if (RB_INSERT(pf_state_tree_lan_ext,
8868 &pf_statetbl_lan_ext, sk)) {
8869 pf_detach_state(s, PF_DT_SKIP_LANEXT);
8870 } else {
8871 *state = s;
8872 }
8873 }
8874 }
8875
8876 if (s) {
8877 if (*state == 0) {
8878 #if NPFSYNC
8879 if (s->creatorid == pf_status.hostid) {
8880 pfsync_delete_state(s);
8881 }
8882 #endif
8883 s->timeout = PFTM_UNLINKED;
8884 hook_runloop(&s->unlink_hooks,
8885 HOOK_REMOVE | HOOK_FREE);
8886 pf_src_tree_remove_state(s);
8887 pf_free_state(s);
8888 return PF_DROP;
8889 }
8890 }
8891 }
8892
8893 /* similar to STATE_LOOKUP() */
8894 if (*state != NULL && pd != NULL && !(pd->pktflags & PKTF_FLOW_ID)) {
8895 pd->flowsrc = (*state)->state_key->flowsrc;
8896 pd->flowhash = (*state)->state_key->flowhash;
8897 if (pd->flowhash != 0) {
8898 pd->pktflags |= PKTF_FLOW_ID;
8899 pd->pktflags &= ~PKTF_FLOW_ADV;
8900 }
8901 }
8902
8903 if (pf_state_lookup_aux(state, kif, direction, &action)) {
8904 return action;
8905 }
8906
8907 if (direction == (*state)->state_key->direction) {
8908 src = &(*state)->src;
8909 dst = &(*state)->dst;
8910 } else {
8911 src = &(*state)->dst;
8912 dst = &(*state)->src;
8913 }
8914
8915 /* update states */
8916 if (src->state < PFESPS_INITIATING) {
8917 src->state = PFESPS_INITIATING;
8918 }
8919
8920 /* update expire time */
8921 (*state)->expire = pf_time_second();
8922 if (src->state >= PFESPS_INITIATING &&
8923 dst->state >= PFESPS_INITIATING) {
8924 (*state)->timeout = PFTM_ESP_ESTABLISHED;
8925 src->state = PFESPS_ESTABLISHED;
8926 dst->state = PFESPS_ESTABLISHED;
8927 } else {
8928 (*state)->timeout = PFTM_ESP_INITIATING;
8929 }
8930 /* translate source/destination address, if necessary */
8931 if (STATE_ADDR_TRANSLATE((*state)->state_key)) {
8932 if (direction == PF_OUT) {
8933 switch (pd->af) {
8934 #if INET
8935 case AF_INET:
8936 pf_change_a(&pd->src->v4addr.s_addr,
8937 pd->ip_sum,
8938 (*state)->state_key->gwy.addr.v4addr.s_addr, 0);
8939 break;
8940 #endif /* INET */
8941 case AF_INET6:
8942 PF_ACPY(pd->src, &(*state)->state_key->gwy.addr,
8943 pd->af);
8944 break;
8945 }
8946 } else {
8947 switch (pd->af) {
8948 #if INET
8949 case AF_INET:
8950 pf_change_a(&pd->dst->v4addr.s_addr,
8951 pd->ip_sum,
8952 (*state)->state_key->lan.addr.v4addr.s_addr, 0);
8953 break;
8954 #endif /* INET */
8955 case AF_INET6:
8956 PF_ACPY(pd->dst, &(*state)->state_key->lan.addr,
8957 pd->af);
8958 break;
8959 }
8960 }
8961 }
8962
8963 return PF_PASS;
8964 }
8965
8966 static __attribute__((noinline)) int
pf_test_state_other(struct pf_state ** state,int direction,struct pfi_kif * kif,struct pf_pdesc * pd)8967 pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif,
8968 struct pf_pdesc *pd)
8969 {
8970 struct pf_state_peer *src, *dst;
8971 struct pf_state_key_cmp key = {};
8972
8973 key.app_state = 0;
8974 key.proto = pd->proto;
8975 if (direction == PF_IN) {
8976 key.af_gwy = pd->af;
8977 PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy);
8978 PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy);
8979 key.ext_gwy.xport.port = 0;
8980 key.gwy.xport.port = 0;
8981 } else {
8982 key.af_lan = pd->af;
8983 PF_ACPY(&key.lan.addr, pd->src, key.af_lan);
8984 PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan);
8985 key.lan.xport.port = 0;
8986 key.ext_lan.xport.port = 0;
8987 }
8988
8989 STATE_LOOKUP();
8990
8991 if (direction == (*state)->state_key->direction) {
8992 src = &(*state)->src;
8993 dst = &(*state)->dst;
8994 } else {
8995 src = &(*state)->dst;
8996 dst = &(*state)->src;
8997 }
8998
8999 /* update states */
9000 if (src->state < PFOTHERS_SINGLE) {
9001 src->state = PFOTHERS_SINGLE;
9002 }
9003 if (dst->state == PFOTHERS_SINGLE) {
9004 dst->state = PFOTHERS_MULTIPLE;
9005 }
9006
9007 /* update expire time */
9008 (*state)->expire = pf_time_second();
9009 if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE) {
9010 (*state)->timeout = PFTM_OTHER_MULTIPLE;
9011 } else {
9012 (*state)->timeout = PFTM_OTHER_SINGLE;
9013 }
9014
9015 /* translate source/destination address, if necessary */
9016 if (STATE_ADDR_TRANSLATE((*state)->state_key)) {
9017 if (direction == PF_OUT) {
9018 switch (pd->af) {
9019 #if INET
9020 case AF_INET:
9021 pf_change_a(&pd->src->v4addr.s_addr,
9022 pd->ip_sum,
9023 (*state)->state_key->gwy.addr.v4addr.s_addr,
9024 0);
9025 break;
9026 #endif /* INET */
9027 case AF_INET6:
9028 PF_ACPY(pd->src,
9029 &(*state)->state_key->gwy.addr, pd->af);
9030 break;
9031 }
9032 } else {
9033 switch (pd->af) {
9034 #if INET
9035 case AF_INET:
9036 pf_change_a(&pd->dst->v4addr.s_addr,
9037 pd->ip_sum,
9038 (*state)->state_key->lan.addr.v4addr.s_addr,
9039 0);
9040 break;
9041 #endif /* INET */
9042 case AF_INET6:
9043 PF_ACPY(pd->dst,
9044 &(*state)->state_key->lan.addr, pd->af);
9045 break;
9046 }
9047 }
9048 }
9049
9050 return PF_PASS;
9051 }
9052
9053 /*
9054 * ipoff and off are measured from the start of the mbuf chain.
9055 * h must be at "ipoff" on the mbuf chain.
9056 */
9057 void *
pf_pull_hdr(pbuf_t * pbuf,int off,void * p,int len,u_short * actionp,u_short * reasonp,sa_family_t af)9058 pf_pull_hdr(pbuf_t *pbuf, int off, void *p, int len,
9059 u_short *actionp, u_short *reasonp, sa_family_t af)
9060 {
9061 switch (af) {
9062 #if INET
9063 case AF_INET: {
9064 struct ip *h = pbuf->pb_data;
9065 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
9066
9067 if (fragoff) {
9068 if (fragoff >= len) {
9069 ACTION_SET(actionp, PF_PASS);
9070 } else {
9071 ACTION_SET(actionp, PF_DROP);
9072 REASON_SET(reasonp, PFRES_FRAG);
9073 }
9074 return NULL;
9075 }
9076 if (pbuf->pb_packet_len < (unsigned)(off + len) ||
9077 ntohs(h->ip_len) < off + len) {
9078 ACTION_SET(actionp, PF_DROP);
9079 REASON_SET(reasonp, PFRES_SHORT);
9080 return NULL;
9081 }
9082 break;
9083 }
9084 #endif /* INET */
9085 case AF_INET6: {
9086 struct ip6_hdr *h = pbuf->pb_data;
9087
9088 if (pbuf->pb_packet_len < (unsigned)(off + len) ||
9089 (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) <
9090 (unsigned)(off + len)) {
9091 ACTION_SET(actionp, PF_DROP);
9092 REASON_SET(reasonp, PFRES_SHORT);
9093 return NULL;
9094 }
9095 break;
9096 }
9097 }
9098 pbuf_copy_data(pbuf, off, len, p);
9099 return p;
9100 }
9101
9102 int
pf_routable(struct pf_addr * addr,sa_family_t af,struct pfi_kif * kif)9103 pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif)
9104 {
9105 #pragma unused(kif)
9106 struct sockaddr_in *dst;
9107 int ret = 1;
9108 struct sockaddr_in6 *dst6;
9109 struct route_in6 ro;
9110
9111 bzero(&ro, sizeof(ro));
9112 switch (af) {
9113 case AF_INET:
9114 dst = satosin(&ro.ro_dst);
9115 dst->sin_family = AF_INET;
9116 dst->sin_len = sizeof(*dst);
9117 dst->sin_addr = addr->v4addr;
9118 break;
9119 case AF_INET6:
9120 dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
9121 dst6->sin6_family = AF_INET6;
9122 dst6->sin6_len = sizeof(*dst6);
9123 dst6->sin6_addr = addr->v6addr;
9124 break;
9125 default:
9126 return 0;
9127 }
9128
9129 /* XXX: IFT_ENC is not currently used by anything*/
9130 /* Skip checks for ipsec interfaces */
9131 if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC) {
9132 goto out;
9133 }
9134
9135 /* XXX: what is the point of this? */
9136 rtalloc((struct route *)&ro);
9137
9138 out:
9139 ROUTE_RELEASE(&ro);
9140 return ret;
9141 }
9142
9143 int
pf_rtlabel_match(struct pf_addr * addr,sa_family_t af,struct pf_addr_wrap * aw)9144 pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw)
9145 {
9146 #pragma unused(aw)
9147 struct sockaddr_in *dst;
9148 struct sockaddr_in6 *dst6;
9149 struct route_in6 ro;
9150 int ret = 0;
9151
9152 bzero(&ro, sizeof(ro));
9153 switch (af) {
9154 case AF_INET:
9155 dst = satosin(&ro.ro_dst);
9156 dst->sin_family = AF_INET;
9157 dst->sin_len = sizeof(*dst);
9158 dst->sin_addr = addr->v4addr;
9159 break;
9160 case AF_INET6:
9161 dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
9162 dst6->sin6_family = AF_INET6;
9163 dst6->sin6_len = sizeof(*dst6);
9164 dst6->sin6_addr = addr->v6addr;
9165 break;
9166 default:
9167 return 0;
9168 }
9169
9170 /* XXX: what is the point of this? */
9171 rtalloc((struct route *)&ro);
9172
9173 ROUTE_RELEASE(&ro);
9174
9175 return ret;
9176 }
9177
9178 #if INET
9179 static __attribute__((noinline)) void
pf_route(pbuf_t ** pbufp,struct pf_rule * r,int dir,struct ifnet * oifp,struct pf_state * s,struct pf_pdesc * pd)9180 pf_route(pbuf_t **pbufp, struct pf_rule *r, int dir, struct ifnet *oifp,
9181 struct pf_state *s, struct pf_pdesc *pd)
9182 {
9183 #pragma unused(pd)
9184 struct mbuf *m0, *m1;
9185 struct route iproute;
9186 struct route *ro = &iproute;
9187 struct sockaddr_in *dst;
9188 struct ip *ip;
9189 struct ifnet *ifp = NULL;
9190 struct pf_addr naddr;
9191 struct pf_src_node *sn = NULL;
9192 int error = 0;
9193 uint32_t sw_csum;
9194 int interface_mtu = 0;
9195 bzero(&iproute, sizeof(iproute));
9196
9197 if (pbufp == NULL || !pbuf_is_valid(*pbufp) || r == NULL ||
9198 (dir != PF_IN && dir != PF_OUT) || oifp == NULL) {
9199 panic("pf_route: invalid parameters");
9200 }
9201
9202 if (pd->pf_mtag->pftag_routed++ > 3) {
9203 pbuf_destroy(*pbufp);
9204 *pbufp = NULL;
9205 m0 = NULL;
9206 goto bad;
9207 }
9208
9209 /*
9210 * Since this is something of an edge case and may involve the
9211 * host stack (for routing, at least for now), we convert the
9212 * incoming pbuf into an mbuf.
9213 */
9214 if (r->rt == PF_DUPTO) {
9215 m0 = pbuf_clone_to_mbuf(*pbufp);
9216 } else if ((r->rt == PF_REPLYTO) == (r->direction == dir)) {
9217 return;
9218 } else {
9219 /* We're going to consume this packet */
9220 m0 = pbuf_to_mbuf(*pbufp, TRUE);
9221 *pbufp = NULL;
9222 }
9223
9224 if (m0 == NULL) {
9225 goto bad;
9226 }
9227
9228 /* We now have the packet in an mbuf (m0) */
9229
9230 if (m0->m_len < (int)sizeof(struct ip)) {
9231 DPFPRINTF(PF_DEBUG_URGENT,
9232 ("pf_route: packet length < sizeof (struct ip)\n"));
9233 goto bad;
9234 }
9235
9236 ip = mtod(m0, struct ip *);
9237
9238 dst = satosin((void *)&ro->ro_dst);
9239 dst->sin_family = AF_INET;
9240 dst->sin_len = sizeof(*dst);
9241 dst->sin_addr = ip->ip_dst;
9242
9243 if (r->rt == PF_FASTROUTE) {
9244 rtalloc(ro);
9245 if (ro->ro_rt == NULL) {
9246 ipstat.ips_noroute++;
9247 goto bad;
9248 }
9249
9250 ifp = ro->ro_rt->rt_ifp;
9251 RT_LOCK(ro->ro_rt);
9252 ro->ro_rt->rt_use++;
9253
9254 if (ro->ro_rt->rt_flags & RTF_GATEWAY) {
9255 dst = satosin((void *)ro->ro_rt->rt_gateway);
9256 }
9257 RT_UNLOCK(ro->ro_rt);
9258 } else {
9259 if (TAILQ_EMPTY(&r->rpool.list)) {
9260 DPFPRINTF(PF_DEBUG_URGENT,
9261 ("pf_route: TAILQ_EMPTY(&r->rpool.list)\n"));
9262 goto bad;
9263 }
9264 if (s == NULL) {
9265 pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src,
9266 &naddr, NULL, &sn);
9267 if (!PF_AZERO(&naddr, AF_INET)) {
9268 dst->sin_addr.s_addr = naddr.v4addr.s_addr;
9269 }
9270 ifp = r->rpool.cur->kif ?
9271 r->rpool.cur->kif->pfik_ifp : NULL;
9272 } else {
9273 if (!PF_AZERO(&s->rt_addr, AF_INET)) {
9274 dst->sin_addr.s_addr =
9275 s->rt_addr.v4addr.s_addr;
9276 }
9277 ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
9278 }
9279 }
9280 if (ifp == NULL) {
9281 goto bad;
9282 }
9283
9284 if (oifp != ifp) {
9285 if (pf_test_mbuf(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) {
9286 goto bad;
9287 } else if (m0 == NULL) {
9288 goto done;
9289 }
9290 if (m0->m_len < (int)sizeof(struct ip)) {
9291 DPFPRINTF(PF_DEBUG_URGENT,
9292 ("pf_route: packet length < sizeof (struct ip)\n"));
9293 goto bad;
9294 }
9295 ip = mtod(m0, struct ip *);
9296 }
9297
9298 /* Catch routing changes wrt. hardware checksumming for TCP or UDP. */
9299 ip_output_checksum(ifp, m0, ((ip->ip_hl) << 2), ntohs(ip->ip_len),
9300 &sw_csum);
9301
9302 interface_mtu = ifp->if_mtu;
9303
9304 if (INTF_ADJUST_MTU_FOR_CLAT46(ifp)) {
9305 interface_mtu = IN6_LINKMTU(ifp);
9306 /* Further adjust the size for CLAT46 expansion */
9307 interface_mtu -= CLAT46_HDR_EXPANSION_OVERHD;
9308 }
9309
9310 if (ntohs(ip->ip_len) <= interface_mtu || TSO_IPV4_OK(ifp, m0) ||
9311 (!(ip->ip_off & htons(IP_DF)) &&
9312 (ifp->if_hwassist & CSUM_FRAGMENT))) {
9313 ip->ip_sum = 0;
9314 if (sw_csum & CSUM_DELAY_IP) {
9315 ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
9316 sw_csum &= ~CSUM_DELAY_IP;
9317 m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_IP;
9318 }
9319 error = ifnet_output(ifp, PF_INET, m0, ro->ro_rt, sintosa(dst));
9320 goto done;
9321 }
9322
9323 /*
9324 * Too large for interface; fragment if possible.
9325 * Must be able to put at least 8 bytes per fragment.
9326 * Balk when DF bit is set or the interface didn't support TSO.
9327 */
9328 if ((ip->ip_off & htons(IP_DF)) ||
9329 (m0->m_pkthdr.csum_flags & CSUM_TSO_IPV4)) {
9330 ipstat.ips_cantfrag++;
9331 if (r->rt != PF_DUPTO) {
9332 icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0,
9333 interface_mtu);
9334 goto done;
9335 } else {
9336 goto bad;
9337 }
9338 }
9339
9340 m1 = m0;
9341
9342 /* PR-8933605: send ip_len,ip_off to ip_fragment in host byte order */
9343 #if BYTE_ORDER != BIG_ENDIAN
9344 NTOHS(ip->ip_off);
9345 NTOHS(ip->ip_len);
9346 #endif
9347 error = ip_fragment(m0, ifp, interface_mtu, sw_csum);
9348
9349 if (error) {
9350 m0 = NULL;
9351 goto bad;
9352 }
9353
9354 for (m0 = m1; m0; m0 = m1) {
9355 m1 = m0->m_nextpkt;
9356 m0->m_nextpkt = 0;
9357 if (error == 0) {
9358 error = ifnet_output(ifp, PF_INET, m0, ro->ro_rt,
9359 sintosa(dst));
9360 } else {
9361 m_freem(m0);
9362 }
9363 }
9364
9365 if (error == 0) {
9366 ipstat.ips_fragmented++;
9367 }
9368
9369 done:
9370 ROUTE_RELEASE(&iproute);
9371 return;
9372
9373 bad:
9374 if (m0) {
9375 m_freem(m0);
9376 }
9377 goto done;
9378 }
9379 #endif /* INET */
9380
9381 static __attribute__((noinline)) void
pf_route6(pbuf_t ** pbufp,struct pf_rule * r,int dir,struct ifnet * oifp,struct pf_state * s,struct pf_pdesc * pd)9382 pf_route6(pbuf_t **pbufp, struct pf_rule *r, int dir, struct ifnet *oifp,
9383 struct pf_state *s, struct pf_pdesc *pd)
9384 {
9385 #pragma unused(pd)
9386 struct mbuf *m0;
9387 struct route_in6 ip6route;
9388 struct route_in6 *ro;
9389 struct sockaddr_in6 *dst;
9390 struct ip6_hdr *ip6;
9391 struct ifnet *ifp = NULL;
9392 struct pf_addr naddr;
9393 struct pf_src_node *sn = NULL;
9394 int error = 0;
9395 struct pf_mtag *pf_mtag;
9396
9397 if (pbufp == NULL || !pbuf_is_valid(*pbufp) || r == NULL ||
9398 (dir != PF_IN && dir != PF_OUT) || oifp == NULL) {
9399 panic("pf_route6: invalid parameters");
9400 }
9401
9402 if (pd->pf_mtag->pftag_routed++ > 3) {
9403 pbuf_destroy(*pbufp);
9404 *pbufp = NULL;
9405 m0 = NULL;
9406 goto bad;
9407 }
9408
9409 /*
9410 * Since this is something of an edge case and may involve the
9411 * host stack (for routing, at least for now), we convert the
9412 * incoming pbuf into an mbuf.
9413 */
9414 if (r->rt == PF_DUPTO) {
9415 m0 = pbuf_clone_to_mbuf(*pbufp);
9416 } else if ((r->rt == PF_REPLYTO) == (r->direction == dir)) {
9417 return;
9418 } else {
9419 /* We're about to consume this packet */
9420 m0 = pbuf_to_mbuf(*pbufp, TRUE);
9421 *pbufp = NULL;
9422 }
9423
9424 if (m0 == NULL) {
9425 goto bad;
9426 }
9427
9428 if (m0->m_len < (int)sizeof(struct ip6_hdr)) {
9429 DPFPRINTF(PF_DEBUG_URGENT,
9430 ("pf_route6: m0->m_len < sizeof (struct ip6_hdr)\n"));
9431 goto bad;
9432 }
9433 ip6 = mtod(m0, struct ip6_hdr *);
9434
9435 ro = &ip6route;
9436 bzero((caddr_t)ro, sizeof(*ro));
9437 dst = (struct sockaddr_in6 *)&ro->ro_dst;
9438 dst->sin6_family = AF_INET6;
9439 dst->sin6_len = sizeof(*dst);
9440 dst->sin6_addr = ip6->ip6_dst;
9441
9442 /* Cheat. XXX why only in the v6addr case??? */
9443 if (r->rt == PF_FASTROUTE) {
9444 pf_mtag = pf_get_mtag(m0);
9445 ASSERT(pf_mtag != NULL);
9446 pf_mtag->pftag_flags |= PF_TAG_GENERATED;
9447 ip6_output_setsrcifscope(m0, oifp->if_index, NULL);
9448 ip6_output_setdstifscope(m0, oifp->if_index, NULL);
9449 ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
9450 return;
9451 }
9452
9453 if (TAILQ_EMPTY(&r->rpool.list)) {
9454 DPFPRINTF(PF_DEBUG_URGENT,
9455 ("pf_route6: TAILQ_EMPTY(&r->rpool.list)\n"));
9456 goto bad;
9457 }
9458 if (s == NULL) {
9459 pf_map_addr(AF_INET6, r, (struct pf_addr *)(uintptr_t)&ip6->ip6_src,
9460 &naddr, NULL, &sn);
9461 if (!PF_AZERO(&naddr, AF_INET6)) {
9462 PF_ACPY((struct pf_addr *)&dst->sin6_addr,
9463 &naddr, AF_INET6);
9464 }
9465 ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL;
9466 } else {
9467 if (!PF_AZERO(&s->rt_addr, AF_INET6)) {
9468 PF_ACPY((struct pf_addr *)&dst->sin6_addr,
9469 &s->rt_addr, AF_INET6);
9470 }
9471 ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
9472 }
9473 if (ifp == NULL) {
9474 goto bad;
9475 }
9476
9477 if (oifp != ifp) {
9478 if (pf_test6_mbuf(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) {
9479 goto bad;
9480 } else if (m0 == NULL) {
9481 goto done;
9482 }
9483 if (m0->m_len < (int)sizeof(struct ip6_hdr)) {
9484 DPFPRINTF(PF_DEBUG_URGENT, ("pf_route6: m0->m_len "
9485 "< sizeof (struct ip6_hdr)\n"));
9486 goto bad;
9487 }
9488 pf_mtag = pf_get_mtag(m0);
9489 /*
9490 * send refragmented packets.
9491 */
9492 if ((pf_mtag->pftag_flags & PF_TAG_REFRAGMENTED) != 0) {
9493 pf_mtag->pftag_flags &= ~PF_TAG_REFRAGMENTED;
9494 /*
9495 * nd6_output() frees packet chain in both success and
9496 * failure cases.
9497 */
9498 error = nd6_output(ifp, ifp, m0, dst, NULL, NULL);
9499 m0 = NULL;
9500 if (error) {
9501 DPFPRINTF(PF_DEBUG_URGENT, ("pf_route6:"
9502 "dropped refragmented packet\n"));
9503 }
9504 goto done;
9505 }
9506 ip6 = mtod(m0, struct ip6_hdr *);
9507 }
9508
9509 /*
9510 * If the packet is too large for the outgoing interface,
9511 * send back an icmp6 error.
9512 */
9513 if (in6_embedded_scope && IN6_IS_SCOPE_EMBED(&dst->sin6_addr)) {
9514 dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index);
9515 }
9516 if ((unsigned)m0->m_pkthdr.len <= ifp->if_mtu) {
9517 error = nd6_output(ifp, ifp, m0, dst, NULL, NULL);
9518 } else {
9519 in6_ifstat_inc(ifp, ifs6_in_toobig);
9520 if (r->rt != PF_DUPTO) {
9521 icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
9522 } else {
9523 goto bad;
9524 }
9525 }
9526
9527 done:
9528 return;
9529
9530 bad:
9531 if (m0) {
9532 m_freem(m0);
9533 m0 = NULL;
9534 }
9535 goto done;
9536 }
9537
9538
9539 /*
9540 * check protocol (tcp/udp/icmp/icmp6) checksum and set mbuf flag
9541 * off is the offset where the protocol header starts
9542 * len is the total length of protocol header plus payload
9543 * returns 0 when the checksum is valid, otherwise returns 1.
9544 */
9545 static int
pf_check_proto_cksum(pbuf_t * pbuf,int off,int len,u_int8_t p,sa_family_t af)9546 pf_check_proto_cksum(pbuf_t *pbuf, int off, int len, u_int8_t p,
9547 sa_family_t af)
9548 {
9549 u_int16_t sum;
9550
9551 switch (p) {
9552 case IPPROTO_TCP:
9553 case IPPROTO_UDP:
9554 /*
9555 * Optimize for the common case; if the hardware calculated
9556 * value doesn't include pseudo-header checksum, or if it
9557 * is partially-computed (only 16-bit summation), do it in
9558 * software below.
9559 */
9560 if ((*pbuf->pb_csum_flags &
9561 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
9562 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR) &&
9563 (*pbuf->pb_csum_data ^ 0xffff) == 0) {
9564 return 0;
9565 }
9566 break;
9567 case IPPROTO_ICMP:
9568 case IPPROTO_ICMPV6:
9569 break;
9570 default:
9571 return 1;
9572 }
9573 if (off < (int)sizeof(struct ip) || len < (int)sizeof(struct udphdr)) {
9574 return 1;
9575 }
9576 if (pbuf->pb_packet_len < (unsigned)(off + len)) {
9577 return 1;
9578 }
9579 switch (af) {
9580 #if INET
9581 case AF_INET:
9582 if (p == IPPROTO_ICMP) {
9583 if (pbuf->pb_contig_len < (unsigned)off) {
9584 return 1;
9585 }
9586 sum = pbuf_inet_cksum(pbuf, 0, off, len);
9587 } else {
9588 if (pbuf->pb_contig_len < (int)sizeof(struct ip)) {
9589 return 1;
9590 }
9591 sum = pbuf_inet_cksum(pbuf, p, off, len);
9592 }
9593 break;
9594 #endif /* INET */
9595 case AF_INET6:
9596 if (pbuf->pb_contig_len < (int)sizeof(struct ip6_hdr)) {
9597 return 1;
9598 }
9599 sum = pbuf_inet6_cksum(pbuf, p, off, len);
9600 break;
9601 default:
9602 return 1;
9603 }
9604 if (sum) {
9605 switch (p) {
9606 case IPPROTO_TCP:
9607 tcpstat.tcps_rcvbadsum++;
9608 break;
9609 case IPPROTO_UDP:
9610 udpstat.udps_badsum++;
9611 break;
9612 case IPPROTO_ICMP:
9613 icmpstat.icps_checksum++;
9614 break;
9615 case IPPROTO_ICMPV6:
9616 icmp6stat.icp6s_checksum++;
9617 break;
9618 }
9619 return 1;
9620 }
9621 return 0;
9622 }
9623
9624 #if INET
9625 #define PF_APPLE_UPDATE_PDESC_IPv4() \
9626 do { \
9627 if (pbuf && pd.mp && pbuf != pd.mp) { \
9628 pbuf = pd.mp; \
9629 h = pbuf->pb_data; \
9630 pd.pf_mtag = pf_get_mtag_pbuf(pbuf); \
9631 } \
9632 } while (0)
9633
9634 int
pf_test_mbuf(int dir,struct ifnet * ifp,struct mbuf ** m0,struct ether_header * eh,struct ip_fw_args * fwa)9635 pf_test_mbuf(int dir, struct ifnet *ifp, struct mbuf **m0,
9636 struct ether_header *eh, struct ip_fw_args *fwa)
9637 {
9638 pbuf_t pbuf_store, *pbuf;
9639 int rv;
9640
9641 pbuf_init_mbuf(&pbuf_store, *m0, (*m0)->m_pkthdr.rcvif);
9642 pbuf = &pbuf_store;
9643
9644 rv = pf_test(dir, ifp, &pbuf, eh, fwa);
9645
9646 if (pbuf_is_valid(pbuf)) {
9647 *m0 = pbuf->pb_mbuf;
9648 pbuf->pb_mbuf = NULL;
9649 pbuf_destroy(pbuf);
9650 } else {
9651 *m0 = NULL;
9652 }
9653
9654 return rv;
9655 }
9656
9657 static __attribute__((noinline)) int
pf_test(int dir,struct ifnet * ifp,pbuf_t ** pbufp,struct ether_header * eh,struct ip_fw_args * fwa)9658 pf_test(int dir, struct ifnet *ifp, pbuf_t **pbufp,
9659 struct ether_header *eh, struct ip_fw_args *fwa)
9660 {
9661 #if !DUMMYNET
9662 #pragma unused(fwa)
9663 #endif
9664 struct pfi_kif *kif;
9665 u_short action = PF_PASS, reason = 0, log = 0;
9666 pbuf_t *pbuf = *pbufp;
9667 struct ip *h = 0;
9668 struct pf_rule *a = NULL, *r = &pf_default_rule, *tr, *nr;
9669 struct pf_state *s = NULL;
9670 struct pf_state_key *sk = NULL;
9671 struct pf_ruleset *ruleset = NULL;
9672 struct pf_pdesc pd;
9673 int off, dirndx, pqid = 0;
9674
9675 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
9676
9677 if (!pf_status.running) {
9678 return PF_PASS;
9679 }
9680
9681 memset(&pd, 0, sizeof(pd));
9682
9683 if ((pd.pf_mtag = pf_get_mtag_pbuf(pbuf)) == NULL) {
9684 DPFPRINTF(PF_DEBUG_URGENT,
9685 ("pf_test: pf_get_mtag_pbuf returned NULL\n"));
9686 return PF_DROP;
9687 }
9688
9689 if (pd.pf_mtag->pftag_flags & PF_TAG_GENERATED) {
9690 return PF_PASS;
9691 }
9692
9693 kif = (struct pfi_kif *)ifp->if_pf_kif;
9694
9695 if (kif == NULL) {
9696 DPFPRINTF(PF_DEBUG_URGENT,
9697 ("pf_test: kif == NULL, if_name %s\n", ifp->if_name));
9698 return PF_DROP;
9699 }
9700 if (kif->pfik_flags & PFI_IFLAG_SKIP) {
9701 return PF_PASS;
9702 }
9703
9704 if (pbuf->pb_packet_len < (int)sizeof(*h)) {
9705 REASON_SET(&reason, PFRES_SHORT);
9706 return PF_DROP;
9707 }
9708
9709 /* initialize enough of pd for the done label */
9710 h = pbuf->pb_data;
9711 pd.mp = pbuf;
9712 pd.lmw = 0;
9713 pd.pf_mtag = pf_get_mtag_pbuf(pbuf);
9714 pd.src = (struct pf_addr *)&h->ip_src;
9715 pd.dst = (struct pf_addr *)&h->ip_dst;
9716 PF_ACPY(&pd.baddr, pd.src, AF_INET);
9717 PF_ACPY(&pd.bdaddr, pd.dst, AF_INET);
9718 pd.ip_sum = &h->ip_sum;
9719 pd.proto = h->ip_p;
9720 pd.proto_variant = 0;
9721 pd.af = AF_INET;
9722 pd.tos = h->ip_tos;
9723 pd.ttl = h->ip_ttl;
9724 pd.tot_len = ntohs(h->ip_len);
9725 pd.eh = eh;
9726
9727 #if DUMMYNET
9728 if (fwa != NULL && fwa->fwa_pf_rule != NULL) {
9729 goto nonormalize;
9730 }
9731 #endif /* DUMMYNET */
9732
9733 /* We do IP header normalization and packet reassembly here */
9734 action = pf_normalize_ip(pbuf, dir, kif, &reason, &pd);
9735 if (action != PF_PASS || pd.lmw < 0) {
9736 action = PF_DROP;
9737 goto done;
9738 }
9739
9740 #if DUMMYNET
9741 nonormalize:
9742 #endif /* DUMMYNET */
9743 /* pf_normalize can mess with pb_data */
9744 h = pbuf->pb_data;
9745
9746 off = h->ip_hl << 2;
9747 if (off < (int)sizeof(*h)) {
9748 action = PF_DROP;
9749 REASON_SET(&reason, PFRES_SHORT);
9750 log = 1;
9751 goto done;
9752 }
9753
9754 pd.src = (struct pf_addr *)&h->ip_src;
9755 pd.dst = (struct pf_addr *)&h->ip_dst;
9756 PF_ACPY(&pd.baddr, pd.src, AF_INET);
9757 PF_ACPY(&pd.bdaddr, pd.dst, AF_INET);
9758 pd.ip_sum = &h->ip_sum;
9759 pd.proto = h->ip_p;
9760 pd.proto_variant = 0;
9761 pd.mp = pbuf;
9762 pd.lmw = 0;
9763 pd.pf_mtag = pf_get_mtag_pbuf(pbuf);
9764 pd.af = AF_INET;
9765 pd.tos = h->ip_tos;
9766 pd.ttl = h->ip_ttl;
9767 pd.sc = MBUF_SCIDX(pbuf_get_service_class(pbuf));
9768 pd.tot_len = ntohs(h->ip_len);
9769 pd.eh = eh;
9770
9771 if (*pbuf->pb_flags & PKTF_FLOW_ID) {
9772 pd.flowsrc = *pbuf->pb_flowsrc;
9773 pd.flowhash = *pbuf->pb_flowid;
9774 pd.pktflags = *pbuf->pb_flags & PKTF_FLOW_MASK;
9775 }
9776
9777 /* handle fragments that didn't get reassembled by normalization */
9778 if (h->ip_off & htons(IP_MF | IP_OFFMASK)) {
9779 pd.flags |= PFDESC_IP_FRAG;
9780 #if DUMMYNET
9781 /* Traffic goes through dummynet first */
9782 action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
9783 if (action == PF_DROP || pbuf == NULL) {
9784 *pbufp = NULL;
9785 return action;
9786 }
9787 #endif /* DUMMYNET */
9788 action = pf_test_fragment(&r, dir, kif, pbuf, h,
9789 &pd, &a, &ruleset);
9790 goto done;
9791 }
9792
9793 switch (h->ip_p) {
9794 case IPPROTO_TCP: {
9795 struct tcphdr th;
9796 pd.hdr.tcp = &th;
9797 if (!pf_pull_hdr(pbuf, off, &th, sizeof(th),
9798 &action, &reason, AF_INET)) {
9799 log = action != PF_PASS;
9800 goto done;
9801 }
9802 pd.p_len = pd.tot_len - off - (th.th_off << 2);
9803 if ((th.th_flags & TH_ACK) && pd.p_len == 0) {
9804 pqid = 1;
9805 }
9806 #if DUMMYNET
9807 /* Traffic goes through dummynet first */
9808 action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
9809 if (action == PF_DROP || pbuf == NULL) {
9810 *pbufp = NULL;
9811 return action;
9812 }
9813 #endif /* DUMMYNET */
9814 action = pf_normalize_tcp(dir, kif, pbuf, 0, off, h, &pd);
9815 if (pd.lmw < 0) {
9816 goto done;
9817 }
9818 PF_APPLE_UPDATE_PDESC_IPv4();
9819 if (action == PF_DROP) {
9820 goto done;
9821 }
9822 if (th.th_sport == 0 || th.th_dport == 0) {
9823 action = PF_DROP;
9824 REASON_SET(&reason, PFRES_INVPORT);
9825 goto done;
9826 }
9827 action = pf_test_state_tcp(&s, dir, kif, pbuf, off, h, &pd,
9828 &reason);
9829 if (action == PF_NAT64) {
9830 goto done;
9831 }
9832 if (pd.lmw < 0) {
9833 goto done;
9834 }
9835 PF_APPLE_UPDATE_PDESC_IPv4();
9836 if (action == PF_PASS) {
9837 #if NPFSYNC
9838 pfsync_update_state(s);
9839 #endif /* NPFSYNC */
9840 r = s->rule.ptr;
9841 a = s->anchor.ptr;
9842 log = s->log;
9843 } else if (s == NULL) {
9844 action = pf_test_rule(&r, &s, dir, kif,
9845 pbuf, off, h, &pd, &a, &ruleset, NULL);
9846 }
9847 break;
9848 }
9849
9850 case IPPROTO_UDP: {
9851 struct udphdr uh;
9852
9853 pd.hdr.udp = &uh;
9854 if (!pf_pull_hdr(pbuf, off, &uh, sizeof(uh),
9855 &action, &reason, AF_INET)) {
9856 log = action != PF_PASS;
9857 goto done;
9858 }
9859 if (uh.uh_sport == 0 || uh.uh_dport == 0) {
9860 action = PF_DROP;
9861 REASON_SET(&reason, PFRES_INVPORT);
9862 goto done;
9863 }
9864 if (ntohs(uh.uh_ulen) > pbuf->pb_packet_len - off ||
9865 ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
9866 action = PF_DROP;
9867 REASON_SET(&reason, PFRES_SHORT);
9868 goto done;
9869 }
9870 #if DUMMYNET
9871 /* Traffic goes through dummynet first */
9872 action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
9873 if (action == PF_DROP || pbuf == NULL) {
9874 *pbufp = NULL;
9875 return action;
9876 }
9877 #endif /* DUMMYNET */
9878 action = pf_test_state_udp(&s, dir, kif, pbuf, off, h, &pd,
9879 &reason);
9880 if (action == PF_NAT64) {
9881 goto done;
9882 }
9883 if (pd.lmw < 0) {
9884 goto done;
9885 }
9886 PF_APPLE_UPDATE_PDESC_IPv4();
9887 if (action == PF_PASS) {
9888 #if NPFSYNC
9889 pfsync_update_state(s);
9890 #endif /* NPFSYNC */
9891 r = s->rule.ptr;
9892 a = s->anchor.ptr;
9893 log = s->log;
9894 } else if (s == NULL) {
9895 action = pf_test_rule(&r, &s, dir, kif,
9896 pbuf, off, h, &pd, &a, &ruleset, NULL);
9897 }
9898 break;
9899 }
9900
9901 case IPPROTO_ICMP: {
9902 struct icmp ih;
9903
9904 pd.hdr.icmp = &ih;
9905 if (!pf_pull_hdr(pbuf, off, &ih, ICMP_MINLEN,
9906 &action, &reason, AF_INET)) {
9907 log = action != PF_PASS;
9908 goto done;
9909 }
9910 #if DUMMYNET
9911 /* Traffic goes through dummynet first */
9912 action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
9913 if (action == PF_DROP || pbuf == NULL) {
9914 *pbufp = NULL;
9915 return action;
9916 }
9917 #endif /* DUMMYNET */
9918 action = pf_test_state_icmp(&s, dir, kif, pbuf, off, h, &pd,
9919 &reason);
9920
9921 if (action == PF_NAT64) {
9922 goto done;
9923 }
9924 if (pd.lmw < 0) {
9925 goto done;
9926 }
9927 PF_APPLE_UPDATE_PDESC_IPv4();
9928 if (action == PF_PASS) {
9929 #if NPFSYNC
9930 pfsync_update_state(s);
9931 #endif /* NPFSYNC */
9932 r = s->rule.ptr;
9933 a = s->anchor.ptr;
9934 log = s->log;
9935 } else if (s == NULL) {
9936 action = pf_test_rule(&r, &s, dir, kif,
9937 pbuf, off, h, &pd, &a, &ruleset, NULL);
9938 }
9939 break;
9940 }
9941
9942 case IPPROTO_ESP: {
9943 struct pf_esp_hdr esp;
9944
9945 pd.hdr.esp = &esp;
9946 if (!pf_pull_hdr(pbuf, off, &esp, sizeof(esp), &action, &reason,
9947 AF_INET)) {
9948 log = action != PF_PASS;
9949 goto done;
9950 }
9951 #if DUMMYNET
9952 /* Traffic goes through dummynet first */
9953 action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
9954 if (action == PF_DROP || pbuf == NULL) {
9955 *pbufp = NULL;
9956 return action;
9957 }
9958 #endif /* DUMMYNET */
9959 action = pf_test_state_esp(&s, dir, kif, off, &pd);
9960 if (pd.lmw < 0) {
9961 goto done;
9962 }
9963 PF_APPLE_UPDATE_PDESC_IPv4();
9964 if (action == PF_PASS) {
9965 #if NPFSYNC
9966 pfsync_update_state(s);
9967 #endif /* NPFSYNC */
9968 r = s->rule.ptr;
9969 a = s->anchor.ptr;
9970 log = s->log;
9971 } else if (s == NULL) {
9972 action = pf_test_rule(&r, &s, dir, kif,
9973 pbuf, off, h, &pd, &a, &ruleset, NULL);
9974 }
9975 break;
9976 }
9977
9978 case IPPROTO_GRE: {
9979 struct pf_grev1_hdr grev1;
9980 pd.hdr.grev1 = &grev1;
9981 if (!pf_pull_hdr(pbuf, off, &grev1, sizeof(grev1), &action,
9982 &reason, AF_INET)) {
9983 log = (action != PF_PASS);
9984 goto done;
9985 }
9986 #if DUMMYNET
9987 /* Traffic goes through dummynet first */
9988 action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
9989 if (action == PF_DROP || pbuf == NULL) {
9990 *pbufp = NULL;
9991 return action;
9992 }
9993 #endif /* DUMMYNET */
9994 if ((ntohs(grev1.flags) & PF_GRE_FLAG_VERSION_MASK) == 1 &&
9995 ntohs(grev1.protocol_type) == PF_GRE_PPP_ETHERTYPE) {
9996 if (ntohs(grev1.payload_length) >
9997 pbuf->pb_packet_len - off) {
9998 action = PF_DROP;
9999 REASON_SET(&reason, PFRES_SHORT);
10000 goto done;
10001 }
10002 pd.proto_variant = PF_GRE_PPTP_VARIANT;
10003 action = pf_test_state_grev1(&s, dir, kif, off, &pd);
10004 if (pd.lmw < 0) {
10005 goto done;
10006 }
10007 PF_APPLE_UPDATE_PDESC_IPv4();
10008 if (action == PF_PASS) {
10009 #if NPFSYNC
10010 pfsync_update_state(s);
10011 #endif /* NPFSYNC */
10012 r = s->rule.ptr;
10013 a = s->anchor.ptr;
10014 log = s->log;
10015 break;
10016 } else if (s == NULL) {
10017 action = pf_test_rule(&r, &s, dir, kif, pbuf,
10018 off, h, &pd, &a, &ruleset, NULL);
10019 if (action == PF_PASS) {
10020 break;
10021 }
10022 }
10023 }
10024
10025 /* not GREv1/PPTP, so treat as ordinary GRE... */
10026 OS_FALLTHROUGH;
10027 }
10028
10029 default:
10030 #if DUMMYNET
10031 /* Traffic goes through dummynet first */
10032 action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10033 if (action == PF_DROP || pbuf == NULL) {
10034 *pbufp = NULL;
10035 return action;
10036 }
10037 #endif /* DUMMYNET */
10038 action = pf_test_state_other(&s, dir, kif, &pd);
10039 if (pd.lmw < 0) {
10040 goto done;
10041 }
10042 PF_APPLE_UPDATE_PDESC_IPv4();
10043 if (action == PF_PASS) {
10044 #if NPFSYNC
10045 pfsync_update_state(s);
10046 #endif /* NPFSYNC */
10047 r = s->rule.ptr;
10048 a = s->anchor.ptr;
10049 log = s->log;
10050 } else if (s == NULL) {
10051 action = pf_test_rule(&r, &s, dir, kif, pbuf, off, h,
10052 &pd, &a, &ruleset, NULL);
10053 }
10054 break;
10055 }
10056
10057 done:
10058 if (action == PF_NAT64) {
10059 *pbufp = NULL;
10060 return action;
10061 }
10062
10063 *pbufp = pd.mp;
10064 PF_APPLE_UPDATE_PDESC_IPv4();
10065
10066 if (action != PF_DROP) {
10067 if (action == PF_PASS && h->ip_hl > 5 &&
10068 !((s && s->allow_opts) || r->allow_opts)) {
10069 action = PF_DROP;
10070 REASON_SET(&reason, PFRES_IPOPTIONS);
10071 log = 1;
10072 DPFPRINTF(PF_DEBUG_MISC,
10073 ("pf: dropping packet with ip options [hlen=%u]\n",
10074 (unsigned int) h->ip_hl));
10075 }
10076
10077 if ((s && s->tag) || PF_RTABLEID_IS_VALID(r->rtableid) ||
10078 (pd.pktflags & PKTF_FLOW_ID)) {
10079 (void) pf_tag_packet(pbuf, pd.pf_mtag, s ? s->tag : 0,
10080 r->rtableid, &pd);
10081 }
10082
10083 if (action == PF_PASS) {
10084 #if PF_ECN
10085 /* add hints for ecn */
10086 pd.pf_mtag->pftag_hdr = h;
10087 /* record address family */
10088 pd.pf_mtag->pftag_flags &= ~PF_TAG_HDR_INET6;
10089 pd.pf_mtag->pftag_flags |= PF_TAG_HDR_INET;
10090 #endif /* PF_ECN */
10091 /* record protocol */
10092 *pbuf->pb_proto = pd.proto;
10093
10094 /*
10095 * connections redirected to loopback should not match sockets
10096 * bound specifically to loopback due to security implications,
10097 * see tcp_input() and in_pcblookup_listen().
10098 */
10099 if (dir == PF_IN && (pd.proto == IPPROTO_TCP ||
10100 pd.proto == IPPROTO_UDP) && s != NULL &&
10101 s->nat_rule.ptr != NULL &&
10102 (s->nat_rule.ptr->action == PF_RDR ||
10103 s->nat_rule.ptr->action == PF_BINAT) &&
10104 (ntohl(pd.dst->v4addr.s_addr) >> IN_CLASSA_NSHIFT)
10105 == IN_LOOPBACKNET) {
10106 pd.pf_mtag->pftag_flags |= PF_TAG_TRANSLATE_LOCALHOST;
10107 }
10108 }
10109 }
10110
10111 if (log) {
10112 struct pf_rule *lr;
10113
10114 if (s != NULL && s->nat_rule.ptr != NULL &&
10115 s->nat_rule.ptr->log & PF_LOG_ALL) {
10116 lr = s->nat_rule.ptr;
10117 } else {
10118 lr = r;
10119 }
10120 PFLOG_PACKET(kif, h, pbuf, AF_INET, dir, reason, lr, a, ruleset,
10121 &pd);
10122 }
10123
10124 kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
10125 kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS]++;
10126
10127 if (action == PF_PASS || r->action == PF_DROP) {
10128 dirndx = (dir == PF_OUT);
10129 r->packets[dirndx]++;
10130 r->bytes[dirndx] += pd.tot_len;
10131 if (a != NULL) {
10132 a->packets[dirndx]++;
10133 a->bytes[dirndx] += pd.tot_len;
10134 }
10135 if (s != NULL) {
10136 sk = s->state_key;
10137 if (s->nat_rule.ptr != NULL) {
10138 s->nat_rule.ptr->packets[dirndx]++;
10139 s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
10140 }
10141 if (s->src_node != NULL) {
10142 s->src_node->packets[dirndx]++;
10143 s->src_node->bytes[dirndx] += pd.tot_len;
10144 }
10145 if (s->nat_src_node != NULL) {
10146 s->nat_src_node->packets[dirndx]++;
10147 s->nat_src_node->bytes[dirndx] += pd.tot_len;
10148 }
10149 dirndx = (dir == sk->direction) ? 0 : 1;
10150 s->packets[dirndx]++;
10151 s->bytes[dirndx] += pd.tot_len;
10152 }
10153 tr = r;
10154 nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
10155 if (nr != NULL) {
10156 struct pf_addr *x;
10157 /*
10158 * XXX: we need to make sure that the addresses
10159 * passed to pfr_update_stats() are the same than
10160 * the addresses used during matching (pfr_match)
10161 */
10162 if (r == &pf_default_rule) {
10163 tr = nr;
10164 x = (sk == NULL || sk->direction == dir) ?
10165 &pd.baddr : &pd.naddr;
10166 } else {
10167 x = (sk == NULL || sk->direction == dir) ?
10168 &pd.naddr : &pd.baddr;
10169 }
10170 if (x == &pd.baddr || s == NULL) {
10171 /* we need to change the address */
10172 if (dir == PF_OUT) {
10173 pd.src = x;
10174 } else {
10175 pd.dst = x;
10176 }
10177 }
10178 }
10179 if (tr->src.addr.type == PF_ADDR_TABLE) {
10180 pfr_update_stats(tr->src.addr.p.tbl, (sk == NULL ||
10181 sk->direction == dir) ?
10182 pd.src : pd.dst, pd.af,
10183 pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
10184 tr->src.neg);
10185 }
10186 if (tr->dst.addr.type == PF_ADDR_TABLE) {
10187 pfr_update_stats(tr->dst.addr.p.tbl, (sk == NULL ||
10188 sk->direction == dir) ? pd.dst : pd.src, pd.af,
10189 pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
10190 tr->dst.neg);
10191 }
10192 }
10193
10194 VERIFY(pbuf == NULL || pd.mp == NULL || pd.mp == pbuf);
10195
10196 if (*pbufp) {
10197 if (pd.lmw < 0) {
10198 REASON_SET(&reason, PFRES_MEMORY);
10199 action = PF_DROP;
10200 }
10201
10202 if (action == PF_DROP) {
10203 pbuf_destroy(*pbufp);
10204 *pbufp = NULL;
10205 return PF_DROP;
10206 }
10207
10208 *pbufp = pbuf;
10209 }
10210
10211 if (action == PF_SYNPROXY_DROP) {
10212 pbuf_destroy(*pbufp);
10213 *pbufp = NULL;
10214 action = PF_PASS;
10215 } else if (r->rt) {
10216 /* pf_route can free the pbuf causing *pbufp to become NULL */
10217 pf_route(pbufp, r, dir, kif->pfik_ifp, s, &pd);
10218 }
10219
10220 return action;
10221 }
10222 #endif /* INET */
10223
10224 #define PF_APPLE_UPDATE_PDESC_IPv6() \
10225 do { \
10226 if (pbuf && pd.mp && pbuf != pd.mp) { \
10227 pbuf = pd.mp; \
10228 } \
10229 h = pbuf->pb_data; \
10230 } while (0)
10231
10232 int
pf_test6_mbuf(int dir,struct ifnet * ifp,struct mbuf ** m0,struct ether_header * eh,struct ip_fw_args * fwa)10233 pf_test6_mbuf(int dir, struct ifnet *ifp, struct mbuf **m0,
10234 struct ether_header *eh, struct ip_fw_args *fwa)
10235 {
10236 pbuf_t pbuf_store, *pbuf;
10237 int rv;
10238
10239 pbuf_init_mbuf(&pbuf_store, *m0, (*m0)->m_pkthdr.rcvif);
10240 pbuf = &pbuf_store;
10241
10242 rv = pf_test6(dir, ifp, &pbuf, eh, fwa);
10243
10244 if (pbuf_is_valid(pbuf)) {
10245 *m0 = pbuf->pb_mbuf;
10246 pbuf->pb_mbuf = NULL;
10247 pbuf_destroy(pbuf);
10248 } else {
10249 *m0 = NULL;
10250 }
10251
10252 return rv;
10253 }
10254
10255 static __attribute__((noinline)) int
pf_test6(int dir,struct ifnet * ifp,pbuf_t ** pbufp,struct ether_header * eh,struct ip_fw_args * fwa)10256 pf_test6(int dir, struct ifnet *ifp, pbuf_t **pbufp,
10257 struct ether_header *eh, struct ip_fw_args *fwa)
10258 {
10259 #if !DUMMYNET
10260 #pragma unused(fwa)
10261 #endif
10262 struct pfi_kif *kif;
10263 u_short action = PF_PASS, reason = 0, log = 0;
10264 pbuf_t *pbuf = *pbufp;
10265 struct ip6_hdr *h;
10266 struct pf_rule *a = NULL, *r = &pf_default_rule, *tr, *nr;
10267 struct pf_state *s = NULL;
10268 struct pf_state_key *sk = NULL;
10269 struct pf_ruleset *ruleset = NULL;
10270 struct pf_pdesc pd;
10271 int off, terminal = 0, dirndx, rh_cnt = 0;
10272 u_int8_t nxt;
10273 boolean_t fwd = FALSE;
10274
10275 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
10276
10277 ASSERT(ifp != NULL);
10278 if ((dir == PF_OUT) && (pbuf->pb_ifp) && (ifp != pbuf->pb_ifp)) {
10279 fwd = TRUE;
10280 }
10281
10282 if (!pf_status.running) {
10283 return PF_PASS;
10284 }
10285
10286 memset(&pd, 0, sizeof(pd));
10287
10288 if ((pd.pf_mtag = pf_get_mtag_pbuf(pbuf)) == NULL) {
10289 DPFPRINTF(PF_DEBUG_URGENT,
10290 ("pf_test6: pf_get_mtag_pbuf returned NULL\n"));
10291 return PF_DROP;
10292 }
10293
10294 if (pd.pf_mtag->pftag_flags & PF_TAG_GENERATED) {
10295 return PF_PASS;
10296 }
10297
10298 kif = (struct pfi_kif *)ifp->if_pf_kif;
10299
10300 if (kif == NULL) {
10301 DPFPRINTF(PF_DEBUG_URGENT,
10302 ("pf_test6: kif == NULL, if_name %s\n", ifp->if_name));
10303 return PF_DROP;
10304 }
10305 if (kif->pfik_flags & PFI_IFLAG_SKIP) {
10306 return PF_PASS;
10307 }
10308
10309 if (pbuf->pb_packet_len < (int)sizeof(*h)) {
10310 REASON_SET(&reason, PFRES_SHORT);
10311 return PF_DROP;
10312 }
10313
10314 h = pbuf->pb_data;
10315 nxt = h->ip6_nxt;
10316 off = ((caddr_t)h - (caddr_t)pbuf->pb_data) + sizeof(struct ip6_hdr);
10317 pd.mp = pbuf;
10318 pd.lmw = 0;
10319 pd.pf_mtag = pf_get_mtag_pbuf(pbuf);
10320 pd.src = (struct pf_addr *)(uintptr_t)&h->ip6_src;
10321 pd.dst = (struct pf_addr *)(uintptr_t)&h->ip6_dst;
10322 PF_ACPY(&pd.baddr, pd.src, AF_INET6);
10323 PF_ACPY(&pd.bdaddr, pd.dst, AF_INET6);
10324 pd.ip_sum = NULL;
10325 pd.af = AF_INET6;
10326 pd.proto = nxt;
10327 pd.proto_variant = 0;
10328 pd.tos = 0;
10329 pd.ttl = h->ip6_hlim;
10330 pd.sc = MBUF_SCIDX(pbuf_get_service_class(pbuf));
10331 pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
10332 pd.eh = eh;
10333
10334 if (*pbuf->pb_flags & PKTF_FLOW_ID) {
10335 pd.flowsrc = *pbuf->pb_flowsrc;
10336 pd.flowhash = *pbuf->pb_flowid;
10337 pd.pktflags = (*pbuf->pb_flags & PKTF_FLOW_MASK);
10338 }
10339
10340 #if DUMMYNET
10341 if (fwa != NULL && fwa->fwa_pf_rule != NULL) {
10342 goto nonormalize;
10343 }
10344 #endif /* DUMMYNET */
10345
10346 /* We do IP header normalization and packet reassembly here */
10347 action = pf_normalize_ip6(pbuf, dir, kif, &reason, &pd);
10348 if (action != PF_PASS || pd.lmw < 0) {
10349 action = PF_DROP;
10350 goto done;
10351 }
10352
10353 #if DUMMYNET
10354 nonormalize:
10355 #endif /* DUMMYNET */
10356 h = pbuf->pb_data;
10357
10358 /*
10359 * we do not support jumbogram yet. if we keep going, zero ip6_plen
10360 * will do something bad, so drop the packet for now.
10361 */
10362 if (htons(h->ip6_plen) == 0) {
10363 action = PF_DROP;
10364 REASON_SET(&reason, PFRES_NORM); /*XXX*/
10365 goto done;
10366 }
10367 pd.src = (struct pf_addr *)(uintptr_t)&h->ip6_src;
10368 pd.dst = (struct pf_addr *)(uintptr_t)&h->ip6_dst;
10369 PF_ACPY(&pd.baddr, pd.src, AF_INET6);
10370 PF_ACPY(&pd.bdaddr, pd.dst, AF_INET6);
10371 pd.ip_sum = NULL;
10372 pd.af = AF_INET6;
10373 pd.tos = 0;
10374 pd.ttl = h->ip6_hlim;
10375 pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
10376 pd.eh = eh;
10377
10378 off = ((caddr_t)h - (caddr_t)pbuf->pb_data) + sizeof(struct ip6_hdr);
10379 pd.proto = h->ip6_nxt;
10380 pd.proto_variant = 0;
10381 pd.mp = pbuf;
10382 pd.lmw = 0;
10383 pd.pf_mtag = pf_get_mtag_pbuf(pbuf);
10384
10385 do {
10386 switch (pd.proto) {
10387 case IPPROTO_FRAGMENT: {
10388 struct ip6_frag ip6f;
10389
10390 pd.flags |= PFDESC_IP_FRAG;
10391 if (!pf_pull_hdr(pbuf, off, &ip6f, sizeof ip6f, NULL,
10392 &reason, pd.af)) {
10393 DPFPRINTF(PF_DEBUG_MISC,
10394 ("pf: IPv6 short fragment header\n"));
10395 action = PF_DROP;
10396 REASON_SET(&reason, PFRES_SHORT);
10397 log = 1;
10398 goto done;
10399 }
10400 pd.proto = ip6f.ip6f_nxt;
10401 #if DUMMYNET
10402 /* Traffic goes through dummynet first */
10403 action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd,
10404 fwa);
10405 if (action == PF_DROP || pbuf == NULL) {
10406 *pbufp = NULL;
10407 return action;
10408 }
10409 #endif /* DUMMYNET */
10410 action = pf_test_fragment(&r, dir, kif, pbuf, h, &pd,
10411 &a, &ruleset);
10412 if (action == PF_DROP) {
10413 REASON_SET(&reason, PFRES_FRAG);
10414 log = 1;
10415 }
10416 goto done;
10417 }
10418 case IPPROTO_ROUTING:
10419 ++rh_cnt;
10420 OS_FALLTHROUGH;
10421
10422 case IPPROTO_AH:
10423 case IPPROTO_HOPOPTS:
10424 case IPPROTO_DSTOPTS: {
10425 /* get next header and header length */
10426 struct ip6_ext opt6;
10427
10428 if (!pf_pull_hdr(pbuf, off, &opt6, sizeof(opt6),
10429 NULL, &reason, pd.af)) {
10430 DPFPRINTF(PF_DEBUG_MISC,
10431 ("pf: IPv6 short opt\n"));
10432 action = PF_DROP;
10433 log = 1;
10434 goto done;
10435 }
10436 if (pd.proto == IPPROTO_AH) {
10437 off += (opt6.ip6e_len + 2) * 4;
10438 } else {
10439 off += (opt6.ip6e_len + 1) * 8;
10440 }
10441 pd.proto = opt6.ip6e_nxt;
10442 /* goto the next header */
10443 break;
10444 }
10445 default:
10446 terminal++;
10447 break;
10448 }
10449 } while (!terminal);
10450
10451
10452 switch (pd.proto) {
10453 case IPPROTO_TCP: {
10454 struct tcphdr th;
10455
10456 pd.hdr.tcp = &th;
10457 if (!pf_pull_hdr(pbuf, off, &th, sizeof(th),
10458 &action, &reason, AF_INET6)) {
10459 log = action != PF_PASS;
10460 goto done;
10461 }
10462 pd.p_len = pd.tot_len - off - (th.th_off << 2);
10463 #if DUMMYNET
10464 /* Traffic goes through dummynet first */
10465 action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10466 if (action == PF_DROP || pbuf == NULL) {
10467 *pbufp = NULL;
10468 return action;
10469 }
10470 #endif /* DUMMYNET */
10471 action = pf_normalize_tcp(dir, kif, pbuf, 0, off, h, &pd);
10472 if (pd.lmw < 0) {
10473 goto done;
10474 }
10475 PF_APPLE_UPDATE_PDESC_IPv6();
10476 if (action == PF_DROP) {
10477 goto done;
10478 }
10479 if (th.th_sport == 0 || th.th_dport == 0) {
10480 action = PF_DROP;
10481 REASON_SET(&reason, PFRES_INVPORT);
10482 goto done;
10483 }
10484 action = pf_test_state_tcp(&s, dir, kif, pbuf, off, h, &pd,
10485 &reason);
10486 if (action == PF_NAT64) {
10487 goto done;
10488 }
10489 if (pd.lmw < 0) {
10490 goto done;
10491 }
10492 PF_APPLE_UPDATE_PDESC_IPv6();
10493 if (action == PF_PASS) {
10494 #if NPFSYNC
10495 pfsync_update_state(s);
10496 #endif /* NPFSYNC */
10497 r = s->rule.ptr;
10498 a = s->anchor.ptr;
10499 log = s->log;
10500 } else if (s == NULL) {
10501 action = pf_test_rule(&r, &s, dir, kif,
10502 pbuf, off, h, &pd, &a, &ruleset, NULL);
10503 }
10504 break;
10505 }
10506
10507 case IPPROTO_UDP: {
10508 struct udphdr uh;
10509
10510 pd.hdr.udp = &uh;
10511 if (!pf_pull_hdr(pbuf, off, &uh, sizeof(uh),
10512 &action, &reason, AF_INET6)) {
10513 log = action != PF_PASS;
10514 goto done;
10515 }
10516 if (uh.uh_sport == 0 || uh.uh_dport == 0) {
10517 action = PF_DROP;
10518 REASON_SET(&reason, PFRES_INVPORT);
10519 goto done;
10520 }
10521 if (ntohs(uh.uh_ulen) > pbuf->pb_packet_len - off ||
10522 ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
10523 action = PF_DROP;
10524 REASON_SET(&reason, PFRES_SHORT);
10525 goto done;
10526 }
10527 #if DUMMYNET
10528 /* Traffic goes through dummynet first */
10529 action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10530 if (action == PF_DROP || pbuf == NULL) {
10531 *pbufp = NULL;
10532 return action;
10533 }
10534 #endif /* DUMMYNET */
10535 action = pf_test_state_udp(&s, dir, kif, pbuf, off, h, &pd,
10536 &reason);
10537 if (action == PF_NAT64) {
10538 goto done;
10539 }
10540 if (pd.lmw < 0) {
10541 goto done;
10542 }
10543 PF_APPLE_UPDATE_PDESC_IPv6();
10544 if (action == PF_PASS) {
10545 #if NPFSYNC
10546 pfsync_update_state(s);
10547 #endif /* NPFSYNC */
10548 r = s->rule.ptr;
10549 a = s->anchor.ptr;
10550 log = s->log;
10551 } else if (s == NULL) {
10552 action = pf_test_rule(&r, &s, dir, kif,
10553 pbuf, off, h, &pd, &a, &ruleset, NULL);
10554 }
10555 break;
10556 }
10557
10558 case IPPROTO_ICMPV6: {
10559 struct icmp6_hdr ih;
10560
10561 pd.hdr.icmp6 = &ih;
10562 if (!pf_pull_hdr(pbuf, off, &ih, sizeof(ih),
10563 &action, &reason, AF_INET6)) {
10564 log = action != PF_PASS;
10565 goto done;
10566 }
10567 #if DUMMYNET
10568 /* Traffic goes through dummynet first */
10569 action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10570 if (action == PF_DROP || pbuf == NULL) {
10571 *pbufp = NULL;
10572 return action;
10573 }
10574 #endif /* DUMMYNET */
10575 action = pf_test_state_icmp(&s, dir, kif,
10576 pbuf, off, h, &pd, &reason);
10577 if (action == PF_NAT64) {
10578 goto done;
10579 }
10580 if (pd.lmw < 0) {
10581 goto done;
10582 }
10583 PF_APPLE_UPDATE_PDESC_IPv6();
10584 if (action == PF_PASS) {
10585 #if NPFSYNC
10586 pfsync_update_state(s);
10587 #endif /* NPFSYNC */
10588 r = s->rule.ptr;
10589 a = s->anchor.ptr;
10590 log = s->log;
10591 } else if (s == NULL) {
10592 action = pf_test_rule(&r, &s, dir, kif,
10593 pbuf, off, h, &pd, &a, &ruleset, NULL);
10594 }
10595 break;
10596 }
10597
10598 case IPPROTO_ESP: {
10599 struct pf_esp_hdr esp;
10600
10601 pd.hdr.esp = &esp;
10602 if (!pf_pull_hdr(pbuf, off, &esp, sizeof(esp), &action,
10603 &reason, AF_INET6)) {
10604 log = action != PF_PASS;
10605 goto done;
10606 }
10607 #if DUMMYNET
10608 /* Traffic goes through dummynet first */
10609 action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10610 if (action == PF_DROP || pbuf == NULL) {
10611 *pbufp = NULL;
10612 return action;
10613 }
10614 #endif /* DUMMYNET */
10615 action = pf_test_state_esp(&s, dir, kif, off, &pd);
10616 if (pd.lmw < 0) {
10617 goto done;
10618 }
10619 PF_APPLE_UPDATE_PDESC_IPv6();
10620 if (action == PF_PASS) {
10621 #if NPFSYNC
10622 pfsync_update_state(s);
10623 #endif /* NPFSYNC */
10624 r = s->rule.ptr;
10625 a = s->anchor.ptr;
10626 log = s->log;
10627 } else if (s == NULL) {
10628 action = pf_test_rule(&r, &s, dir, kif,
10629 pbuf, off, h, &pd, &a, &ruleset, NULL);
10630 }
10631 break;
10632 }
10633
10634 case IPPROTO_GRE: {
10635 struct pf_grev1_hdr grev1;
10636
10637 pd.hdr.grev1 = &grev1;
10638 if (!pf_pull_hdr(pbuf, off, &grev1, sizeof(grev1), &action,
10639 &reason, AF_INET6)) {
10640 log = (action != PF_PASS);
10641 goto done;
10642 }
10643 #if DUMMYNET
10644 /* Traffic goes through dummynet first */
10645 action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10646 if (action == PF_DROP || pbuf == NULL) {
10647 *pbufp = NULL;
10648 return action;
10649 }
10650 #endif /* DUMMYNET */
10651 if ((ntohs(grev1.flags) & PF_GRE_FLAG_VERSION_MASK) == 1 &&
10652 ntohs(grev1.protocol_type) == PF_GRE_PPP_ETHERTYPE) {
10653 if (ntohs(grev1.payload_length) >
10654 pbuf->pb_packet_len - off) {
10655 action = PF_DROP;
10656 REASON_SET(&reason, PFRES_SHORT);
10657 goto done;
10658 }
10659 action = pf_test_state_grev1(&s, dir, kif, off, &pd);
10660 if (pd.lmw < 0) {
10661 goto done;
10662 }
10663 PF_APPLE_UPDATE_PDESC_IPv6();
10664 if (action == PF_PASS) {
10665 #if NPFSYNC
10666 pfsync_update_state(s);
10667 #endif /* NPFSYNC */
10668 r = s->rule.ptr;
10669 a = s->anchor.ptr;
10670 log = s->log;
10671 break;
10672 } else if (s == NULL) {
10673 action = pf_test_rule(&r, &s, dir, kif, pbuf,
10674 off, h, &pd, &a, &ruleset, NULL);
10675 if (action == PF_PASS) {
10676 break;
10677 }
10678 }
10679 }
10680
10681 /* not GREv1/PPTP, so treat as ordinary GRE... */
10682 OS_FALLTHROUGH; /* XXX is this correct? */
10683 }
10684
10685 default:
10686 #if DUMMYNET
10687 /* Traffic goes through dummynet first */
10688 action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10689 if (action == PF_DROP || pbuf == NULL) {
10690 *pbufp = NULL;
10691 return action;
10692 }
10693 #endif /* DUMMYNET */
10694 action = pf_test_state_other(&s, dir, kif, &pd);
10695 if (pd.lmw < 0) {
10696 goto done;
10697 }
10698 PF_APPLE_UPDATE_PDESC_IPv6();
10699 if (action == PF_PASS) {
10700 #if NPFSYNC
10701 pfsync_update_state(s);
10702 #endif /* NPFSYNC */
10703 r = s->rule.ptr;
10704 a = s->anchor.ptr;
10705 log = s->log;
10706 } else if (s == NULL) {
10707 action = pf_test_rule(&r, &s, dir, kif, pbuf, off, h,
10708 &pd, &a, &ruleset, NULL);
10709 }
10710 break;
10711 }
10712
10713 done:
10714 if (action == PF_NAT64) {
10715 *pbufp = NULL;
10716 return action;
10717 }
10718
10719 *pbufp = pd.mp;
10720 PF_APPLE_UPDATE_PDESC_IPv6();
10721
10722 /* handle dangerous IPv6 extension headers. */
10723 if (action != PF_DROP) {
10724 if (action == PF_PASS && rh_cnt &&
10725 !((s && s->allow_opts) || r->allow_opts)) {
10726 action = PF_DROP;
10727 REASON_SET(&reason, PFRES_IPOPTIONS);
10728 log = 1;
10729 DPFPRINTF(PF_DEBUG_MISC,
10730 ("pf: dropping packet with dangerous v6addr headers\n"));
10731 }
10732
10733 if ((s && s->tag) || PF_RTABLEID_IS_VALID(r->rtableid) ||
10734 (pd.pktflags & PKTF_FLOW_ID)) {
10735 (void) pf_tag_packet(pbuf, pd.pf_mtag, s ? s->tag : 0,
10736 r->rtableid, &pd);
10737 }
10738
10739 if (action == PF_PASS) {
10740 #if PF_ECN
10741 /* add hints for ecn */
10742 pd.pf_mtag->pftag_hdr = h;
10743 /* record address family */
10744 pd.pf_mtag->pftag_flags &= ~PF_TAG_HDR_INET;
10745 pd.pf_mtag->pftag_flags |= PF_TAG_HDR_INET6;
10746 #endif /* PF_ECN */
10747 /* record protocol */
10748 *pbuf->pb_proto = pd.proto;
10749 if (dir == PF_IN && (pd.proto == IPPROTO_TCP ||
10750 pd.proto == IPPROTO_UDP) && s != NULL &&
10751 s->nat_rule.ptr != NULL &&
10752 (s->nat_rule.ptr->action == PF_RDR ||
10753 s->nat_rule.ptr->action == PF_BINAT) &&
10754 IN6_IS_ADDR_LOOPBACK(&pd.dst->v6addr)) {
10755 pd.pf_mtag->pftag_flags |= PF_TAG_TRANSLATE_LOCALHOST;
10756 }
10757 }
10758 }
10759
10760
10761 if (log) {
10762 struct pf_rule *lr;
10763
10764 if (s != NULL && s->nat_rule.ptr != NULL &&
10765 s->nat_rule.ptr->log & PF_LOG_ALL) {
10766 lr = s->nat_rule.ptr;
10767 } else {
10768 lr = r;
10769 }
10770 PFLOG_PACKET(kif, h, pbuf, AF_INET6, dir, reason, lr, a, ruleset,
10771 &pd);
10772 }
10773
10774 kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
10775 kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS]++;
10776
10777 if (action == PF_PASS || r->action == PF_DROP) {
10778 dirndx = (dir == PF_OUT);
10779 r->packets[dirndx]++;
10780 r->bytes[dirndx] += pd.tot_len;
10781 if (a != NULL) {
10782 a->packets[dirndx]++;
10783 a->bytes[dirndx] += pd.tot_len;
10784 }
10785 if (s != NULL) {
10786 sk = s->state_key;
10787 if (s->nat_rule.ptr != NULL) {
10788 s->nat_rule.ptr->packets[dirndx]++;
10789 s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
10790 }
10791 if (s->src_node != NULL) {
10792 s->src_node->packets[dirndx]++;
10793 s->src_node->bytes[dirndx] += pd.tot_len;
10794 }
10795 if (s->nat_src_node != NULL) {
10796 s->nat_src_node->packets[dirndx]++;
10797 s->nat_src_node->bytes[dirndx] += pd.tot_len;
10798 }
10799 dirndx = (dir == sk->direction) ? 0 : 1;
10800 s->packets[dirndx]++;
10801 s->bytes[dirndx] += pd.tot_len;
10802 }
10803 tr = r;
10804 nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
10805 if (nr != NULL) {
10806 struct pf_addr *x;
10807 /*
10808 * XXX: we need to make sure that the addresses
10809 * passed to pfr_update_stats() are the same than
10810 * the addresses used during matching (pfr_match)
10811 */
10812 if (r == &pf_default_rule) {
10813 tr = nr;
10814 x = (s == NULL || sk->direction == dir) ?
10815 &pd.baddr : &pd.naddr;
10816 } else {
10817 x = (s == NULL || sk->direction == dir) ?
10818 &pd.naddr : &pd.baddr;
10819 }
10820 if (x == &pd.baddr || s == NULL) {
10821 if (dir == PF_OUT) {
10822 pd.src = x;
10823 } else {
10824 pd.dst = x;
10825 }
10826 }
10827 }
10828 if (tr->src.addr.type == PF_ADDR_TABLE) {
10829 pfr_update_stats(tr->src.addr.p.tbl, (sk == NULL ||
10830 sk->direction == dir) ? pd.src : pd.dst, pd.af,
10831 pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
10832 tr->src.neg);
10833 }
10834 if (tr->dst.addr.type == PF_ADDR_TABLE) {
10835 pfr_update_stats(tr->dst.addr.p.tbl, (sk == NULL ||
10836 sk->direction == dir) ? pd.dst : pd.src, pd.af,
10837 pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
10838 tr->dst.neg);
10839 }
10840 }
10841
10842 VERIFY(pbuf == NULL || pd.mp == NULL || pd.mp == pbuf);
10843
10844 if (*pbufp) {
10845 if (pd.lmw < 0) {
10846 REASON_SET(&reason, PFRES_MEMORY);
10847 action = PF_DROP;
10848 }
10849
10850 if (action == PF_DROP) {
10851 pbuf_destroy(*pbufp);
10852 *pbufp = NULL;
10853 return PF_DROP;
10854 }
10855
10856 *pbufp = pbuf;
10857 }
10858
10859 if (action == PF_SYNPROXY_DROP) {
10860 pbuf_destroy(*pbufp);
10861 *pbufp = NULL;
10862 action = PF_PASS;
10863 } else if (r->rt) {
10864 /* pf_route6 can free the mbuf causing *pbufp to become NULL */
10865 pf_route6(pbufp, r, dir, kif->pfik_ifp, s, &pd);
10866 }
10867
10868 /* if reassembled packet passed, create new fragments */
10869 struct pf_fragment_tag *ftag = NULL;
10870 if ((action == PF_PASS) && (*pbufp != NULL) && (fwd) &&
10871 ((ftag = pf_find_fragment_tag_pbuf(*pbufp)) != NULL)) {
10872 action = pf_refragment6(ifp, pbufp, ftag);
10873 }
10874 return action;
10875 }
10876
10877 static int
pf_check_congestion(struct ifqueue * ifq)10878 pf_check_congestion(struct ifqueue *ifq)
10879 {
10880 #pragma unused(ifq)
10881 return 0;
10882 }
10883
10884 void
pool_init(struct pool * pp,size_t size,unsigned int align,unsigned int ioff,int flags,const char * wchan,void * palloc)10885 pool_init(struct pool *pp, size_t size, unsigned int align, unsigned int ioff,
10886 int flags, const char *wchan, void *palloc)
10887 {
10888 #pragma unused(align, ioff, flags, palloc)
10889 bzero(pp, sizeof(*pp));
10890 pp->pool_zone = zone_create(wchan, size,
10891 ZC_PGZ_USE_GUARDS | ZC_ZFREE_CLEARMEM);
10892 pp->pool_hiwat = pp->pool_limit = (unsigned int)-1;
10893 pp->pool_name = wchan;
10894 }
10895
10896 /* Zones cannot be currently destroyed */
10897 void
pool_destroy(struct pool * pp)10898 pool_destroy(struct pool *pp)
10899 {
10900 #pragma unused(pp)
10901 }
10902
10903 void
pool_sethiwat(struct pool * pp,int n)10904 pool_sethiwat(struct pool *pp, int n)
10905 {
10906 pp->pool_hiwat = n; /* Currently unused */
10907 }
10908
10909 void
pool_sethardlimit(struct pool * pp,int n,const char * warnmess,int ratecap)10910 pool_sethardlimit(struct pool *pp, int n, const char *warnmess, int ratecap)
10911 {
10912 #pragma unused(warnmess, ratecap)
10913 pp->pool_limit = n;
10914 }
10915
10916 void *
pool_get(struct pool * pp,int flags)10917 pool_get(struct pool *pp, int flags)
10918 {
10919 void *buf;
10920
10921 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
10922
10923 if (pp->pool_count > pp->pool_limit) {
10924 DPFPRINTF(PF_DEBUG_NOISY,
10925 ("pf: pool %s hard limit reached (%d)\n",
10926 pp->pool_name != NULL ? pp->pool_name : "unknown",
10927 pp->pool_limit));
10928 pp->pool_fails++;
10929 return NULL;
10930 }
10931
10932 buf = zalloc_flags(pp->pool_zone,
10933 (flags & PR_WAITOK) ? Z_WAITOK : Z_NOWAIT);
10934 if (buf != NULL) {
10935 pp->pool_count++;
10936 VERIFY(pp->pool_count != 0);
10937 }
10938 return buf;
10939 }
10940
10941 void
pool_put(struct pool * pp,void * v)10942 pool_put(struct pool *pp, void *v)
10943 {
10944 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
10945
10946 zfree(pp->pool_zone, v);
10947 VERIFY(pp->pool_count != 0);
10948 pp->pool_count--;
10949 }
10950
10951 struct pf_mtag *
pf_find_mtag_pbuf(pbuf_t * pbuf)10952 pf_find_mtag_pbuf(pbuf_t *pbuf)
10953 {
10954 return pbuf->pb_pftag;
10955 }
10956
10957 struct pf_mtag *
pf_find_mtag(struct mbuf * m)10958 pf_find_mtag(struct mbuf *m)
10959 {
10960 return m_pftag(m);
10961 }
10962
10963 struct pf_mtag *
pf_get_mtag(struct mbuf * m)10964 pf_get_mtag(struct mbuf *m)
10965 {
10966 return pf_find_mtag(m);
10967 }
10968
10969 struct pf_mtag *
pf_get_mtag_pbuf(pbuf_t * pbuf)10970 pf_get_mtag_pbuf(pbuf_t *pbuf)
10971 {
10972 return pf_find_mtag_pbuf(pbuf);
10973 }
10974
10975 struct pf_fragment_tag *
pf_copy_fragment_tag(struct mbuf * m,struct pf_fragment_tag * ftag,int how)10976 pf_copy_fragment_tag(struct mbuf *m, struct pf_fragment_tag *ftag, int how)
10977 {
10978 struct m_tag *tag;
10979 struct pf_mtag *pftag = pf_find_mtag(m);
10980
10981 tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_PF_REASS,
10982 sizeof(*ftag), how, m);
10983 if (tag == NULL) {
10984 return NULL;
10985 } else {
10986 m_tag_prepend(m, tag);
10987 tag = tag + 1;
10988 }
10989 bcopy(ftag, tag, sizeof(*ftag));
10990 pftag->pftag_flags |= PF_TAG_REASSEMBLED;
10991 return (struct pf_fragment_tag *)tag;
10992 }
10993
10994 struct pf_fragment_tag *
pf_find_fragment_tag(struct mbuf * m)10995 pf_find_fragment_tag(struct mbuf *m)
10996 {
10997 struct m_tag *tag;
10998 struct pf_fragment_tag *ftag;
10999 struct pf_mtag *pftag = pf_find_mtag(m);
11000
11001 tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_PF_REASS,
11002 NULL);
11003 VERIFY((tag == NULL) || (pftag->pftag_flags & PF_TAG_REASSEMBLED));
11004 if (tag != NULL) {
11005 tag = tag + 1;
11006 }
11007 ftag = (struct pf_fragment_tag *)tag;
11008 return ftag;
11009 }
11010
11011 struct pf_fragment_tag *
pf_find_fragment_tag_pbuf(pbuf_t * pbuf)11012 pf_find_fragment_tag_pbuf(pbuf_t *pbuf)
11013 {
11014 struct pf_mtag *mtag = pf_find_mtag_pbuf(pbuf);
11015
11016 return (mtag->pftag_flags & PF_TAG_REASSEMBLED) ?
11017 pbuf->pb_pf_fragtag : NULL;
11018 }
11019
11020 uint64_t
pf_time_second(void)11021 pf_time_second(void)
11022 {
11023 struct timeval t;
11024
11025 microuptime(&t);
11026 return t.tv_sec;
11027 }
11028
11029 uint64_t
pf_calendar_time_second(void)11030 pf_calendar_time_second(void)
11031 {
11032 struct timeval t;
11033
11034 getmicrotime(&t);
11035 return t.tv_sec;
11036 }
11037
11038 static void *
hook_establish(struct hook_desc_head * head,int tail,hook_fn_t fn,void * arg)11039 hook_establish(struct hook_desc_head *head, int tail, hook_fn_t fn, void *arg)
11040 {
11041 struct hook_desc *hd;
11042
11043 hd = kalloc_type(struct hook_desc, Z_WAITOK | Z_NOFAIL);
11044
11045 hd->hd_fn = fn;
11046 hd->hd_arg = arg;
11047 if (tail) {
11048 TAILQ_INSERT_TAIL(head, hd, hd_list);
11049 } else {
11050 TAILQ_INSERT_HEAD(head, hd, hd_list);
11051 }
11052
11053 return hd;
11054 }
11055
11056 static void
hook_runloop(struct hook_desc_head * head,int flags)11057 hook_runloop(struct hook_desc_head *head, int flags)
11058 {
11059 struct hook_desc *hd;
11060
11061 if (!(flags & HOOK_REMOVE)) {
11062 if (!(flags & HOOK_ABORT)) {
11063 TAILQ_FOREACH(hd, head, hd_list)
11064 hd->hd_fn(hd->hd_arg);
11065 }
11066 } else {
11067 while (!!(hd = TAILQ_FIRST(head))) {
11068 TAILQ_REMOVE(head, hd, hd_list);
11069 if (!(flags & HOOK_ABORT)) {
11070 hd->hd_fn(hd->hd_arg);
11071 }
11072 if (flags & HOOK_FREE) {
11073 kfree_type(struct hook_desc, hd);
11074 }
11075 }
11076 }
11077 }
11078
11079 #if SKYWALK && defined(XNU_TARGET_OS_OSX)
11080 static bool
pf_check_compatible_anchor(const char * anchor_path)11081 pf_check_compatible_anchor(const char *anchor_path)
11082 {
11083 // Whitelist reserved anchor
11084 if (strncmp(anchor_path, PF_RESERVED_ANCHOR, MAXPATHLEN) == 0) {
11085 return true;
11086 }
11087
11088 // Whitelist com.apple anchor
11089 if (strncmp(anchor_path, "com.apple", MAXPATHLEN) == 0) {
11090 return true;
11091 }
11092
11093 for (int i = 0; i < sizeof(compatible_anchors) / sizeof(compatible_anchors[0]); i++) {
11094 const char *ptr = strnstr(anchor_path, compatible_anchors[i], MAXPATHLEN);
11095 if (ptr != NULL && ptr == anchor_path) {
11096 return true;
11097 }
11098 }
11099
11100 return false;
11101 }
11102
11103 bool
pf_check_compatible_rules(void)11104 pf_check_compatible_rules(void)
11105 {
11106 struct pf_anchor *anchor = NULL;
11107 struct pf_rule *rule = NULL;
11108
11109 // Check whitelisted anchors
11110 RB_FOREACH(anchor, pf_anchor_global, &pf_anchors) {
11111 if (!pf_check_compatible_anchor(anchor->path)) {
11112 if (pf_status.debug >= PF_DEBUG_MISC) {
11113 printf("pf anchor %s not compatible\n", anchor->path);
11114 }
11115 return false;
11116 }
11117 }
11118
11119 // Check rules in main ruleset
11120 for (int i = PF_RULESET_SCRUB; i < PF_RULESET_MAX; i++) {
11121 TAILQ_FOREACH(rule, pf_main_ruleset.rules[i].active.ptr, entries) {
11122 if (rule->anchor == NULL) {
11123 if (pf_status.debug >= PF_DEBUG_MISC) {
11124 printf("main ruleset contains rules\n");
11125 }
11126 return false;
11127 }
11128 }
11129 }
11130
11131 return true;
11132 }
11133 #endif // SKYWALK && defined(XNU_TARGET_OS_OSX)
11134