1 /*
2 * Copyright (c) 2007-2023 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /* $apfw: git commit 6602420f2f101b74305cd78f7cd9e0c8fdedae97 $ */
30 /* $OpenBSD: pf.c,v 1.567 2008/02/20 23:40:13 henning Exp $ */
31
32 /*
33 * Copyright (c) 2001 Daniel Hartmeier
34 * Copyright (c) 2002 - 2013 Henning Brauer
35 * NAT64 - Copyright (c) 2010 Viagenie Inc. (http://www.viagenie.ca)
36 * All rights reserved.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 *
42 * - Redistributions of source code must retain the above copyright
43 * notice, this list of conditions and the following disclaimer.
44 * - Redistributions in binary form must reproduce the above
45 * copyright notice, this list of conditions and the following
46 * disclaimer in the documentation and/or other materials provided
47 * with the distribution.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
50 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
51 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
52 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
53 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
54 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
55 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
56 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
57 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
59 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
60 * POSSIBILITY OF SUCH DAMAGE.
61 *
62 * Effort sponsored in part by the Defense Advanced Research Projects
63 * Agency (DARPA) and Air Force Research Laboratory, Air Force
64 * Materiel Command, USAF, under agreement number F30602-01-2-0537.
65 *
66 */
67
68 #include <machine/endian.h>
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/filio.h>
72 #include <sys/socket.h>
73 #include <sys/socketvar.h>
74 #include <sys/kernel.h>
75 #include <sys/time.h>
76 #include <sys/proc.h>
77 #include <sys/random.h>
78 #include <sys/mcache.h>
79 #include <sys/protosw.h>
80
81 #include <libkern/crypto/md5.h>
82 #include <libkern/libkern.h>
83
84 #include <mach/thread_act.h>
85
86 #include <kern/uipc_domain.h>
87
88 #include <net/droptap.h>
89 #include <net/if.h>
90 #include <net/if_types.h>
91 #include <net/bpf.h>
92 #include <net/route.h>
93 #include <net/dlil.h>
94
95 #include <netinet/in.h>
96 #include <netinet/in_var.h>
97 #include <netinet/in_systm.h>
98 #include <netinet/ip.h>
99 #include <netinet/ip_var.h>
100 #include <netinet/tcp.h>
101 #include <netinet/tcp_seq.h>
102 #include <netinet/udp.h>
103 #include <netinet/ip_icmp.h>
104 #include <netinet/in_pcb.h>
105 #include <netinet/tcp_timer.h>
106 #include <netinet/tcp_var.h>
107 #include <netinet/tcp_fsm.h>
108 #include <netinet/udp_var.h>
109 #include <netinet/icmp_var.h>
110 #include <net/if_ether.h>
111 #include <net/ethernet.h>
112 #include <net/flowhash.h>
113 #include <net/nat464_utils.h>
114 #include <net/pfvar.h>
115 #include <net/if_pflog.h>
116
117 #if NPFSYNC
118 #include <net/if_pfsync.h>
119 #endif /* NPFSYNC */
120
121 #include <netinet/ip6.h>
122 #include <netinet6/in6_pcb.h>
123 #include <netinet6/ip6_var.h>
124 #include <netinet/icmp6.h>
125 #include <netinet6/nd6.h>
126
127 #if DUMMYNET
128 #include <netinet/ip_dummynet.h>
129 #endif /* DUMMYNET */
130
131 #if SKYWALK
132 #include <skywalk/namespace/flowidns.h>
133 #endif /* SKYWALK */
134
135 /*
136 * For RandomULong(), to get a 32 bits random value
137 * Note that random() returns a 31 bits value, see rdar://11159750
138 */
139 #include <dev/random/randomdev.h>
140
141 #define DPFPRINTF(n, x) (pf_status.debug >= (n) ? printf x : ((void)0))
142
143 /*
144 * On Mac OS X, the rtableid value is treated as the interface scope
145 * value that is equivalent to the interface index used for scoped
146 * routing. A valid scope value is anything but IFSCOPE_NONE (0),
147 * as per definition of ifindex which is a positive, non-zero number.
148 * The other BSDs treat a negative rtableid value as invalid, hence
149 * the test against INT_MAX to handle userland apps which initialize
150 * the field with a negative number.
151 */
152 #define PF_RTABLEID_IS_VALID(r) \
153 ((r) > IFSCOPE_NONE && (r) <= INT_MAX)
154
155 /*
156 * Global variables
157 */
158 static LCK_GRP_DECLARE(pf_lock_grp, "pf");
159 LCK_MTX_DECLARE(pf_lock, &pf_lock_grp);
160
161 static LCK_GRP_DECLARE(pf_perim_lock_grp, "pf_perim");
162 LCK_RW_DECLARE(pf_perim_lock, &pf_perim_lock_grp);
163
164 /* state tables */
165 struct pf_state_tree_lan_ext pf_statetbl_lan_ext;
166 struct pf_state_tree_ext_gwy pf_statetbl_ext_gwy;
167 static uint32_t pf_state_tree_ext_gwy_nat64_cnt = 0;
168
169 struct pf_palist pf_pabuf;
170 struct pf_status pf_status;
171
172 u_int32_t ticket_pabuf;
173
174 static MD5_CTX pf_tcp_secret_ctx;
175 static u_char pf_tcp_secret[16];
176 static int pf_tcp_secret_init;
177 static int pf_tcp_iss_off;
178
179 static struct pf_anchor_stackframe {
180 struct pf_ruleset *rs;
181 struct pf_rule *r;
182 struct pf_anchor_node *parent;
183 struct pf_anchor *child;
184 } pf_anchor_stack[64];
185
186 struct pool pf_src_tree_pl, pf_rule_pl, pf_pooladdr_pl;
187 struct pool pf_state_pl, pf_state_key_pl;
188
189 typedef void (*hook_fn_t)(void *);
190
191 struct hook_desc {
192 TAILQ_ENTRY(hook_desc) hd_list;
193 hook_fn_t hd_fn;
194 void *hd_arg;
195 };
196
197 #define HOOK_REMOVE 0x01
198 #define HOOK_FREE 0x02
199 #define HOOK_ABORT 0x04
200
201 static void *hook_establish(struct hook_desc_head *, int,
202 hook_fn_t, void *);
203 static void hook_runloop(struct hook_desc_head *, int flags);
204
205 struct pool pf_app_state_pl;
206 static void pf_print_addr(struct pf_addr *addr, sa_family_t af);
207 static void pf_print_sk_host(struct pf_state_host *, u_int8_t, int,
208 u_int8_t);
209
210 static void pf_print_host(struct pf_addr *, u_int16_t, u_int8_t);
211
212 static void pf_init_threshold(struct pf_threshold *, u_int32_t,
213 u_int32_t);
214 static void pf_add_threshold(struct pf_threshold *);
215 static int pf_check_threshold(struct pf_threshold *);
216
217 static void pf_change_ap(int, pbuf_t *, struct pf_addr *,
218 u_int16_t *, u_int16_t *, u_int16_t *,
219 struct pf_addr *, u_int16_t, u_int8_t, sa_family_t,
220 sa_family_t, int);
221 static int pf_modulate_sack(pbuf_t *, int, struct pf_pdesc *,
222 struct tcphdr *, struct pf_state_peer *);
223 static void pf_change_a6(struct pf_addr *, u_int16_t *,
224 struct pf_addr *, u_int8_t);
225 static void pf_change_addr(struct pf_addr *a, u_int16_t *c, struct pf_addr *an,
226 u_int8_t u, sa_family_t af, sa_family_t afn);
227 static void pf_change_icmp(struct pf_addr *, u_int16_t *,
228 struct pf_addr *, struct pf_addr *, u_int16_t,
229 u_int16_t *, u_int16_t *, u_int16_t *,
230 u_int16_t *, u_int8_t, sa_family_t);
231 static void pf_send_tcp(const struct pf_rule *, sa_family_t,
232 const struct pf_addr *, const struct pf_addr *,
233 u_int16_t, u_int16_t, u_int32_t, u_int32_t,
234 u_int8_t, u_int16_t, u_int16_t, u_int8_t, int,
235 u_int16_t, struct ether_header *, struct ifnet *);
236 static void pf_send_icmp(pbuf_t *, u_int8_t, u_int8_t,
237 sa_family_t, struct pf_rule *);
238 static struct pf_rule *pf_match_translation(struct pf_pdesc *, pbuf_t *,
239 int, int, struct pfi_kif *, struct pf_addr *,
240 union pf_state_xport *, struct pf_addr *,
241 union pf_state_xport *, int);
242 static struct pf_rule *pf_get_translation_aux(struct pf_pdesc *,
243 pbuf_t *, int, int, struct pfi_kif *,
244 struct pf_src_node **, struct pf_addr *,
245 union pf_state_xport *, struct pf_addr *,
246 union pf_state_xport *, union pf_state_xport *
247 #if SKYWALK
248 , netns_token *
249 #endif
250 );
251 static void pf_attach_state(struct pf_state_key *,
252 struct pf_state *, int);
253 static u_int32_t pf_tcp_iss(struct pf_pdesc *);
254 static int pf_test_rule(struct pf_rule **, struct pf_state **,
255 int, struct pfi_kif *, pbuf_t *, int,
256 void *, struct pf_pdesc *, struct pf_rule **,
257 struct pf_ruleset **, struct ifqueue *);
258 #if DUMMYNET
259 static int pf_test_dummynet(struct pf_rule **, int,
260 struct pfi_kif *, pbuf_t **,
261 struct pf_pdesc *, struct ip_fw_args *);
262 #endif /* DUMMYNET */
263 static int pf_test_fragment(struct pf_rule **, int,
264 struct pfi_kif *, pbuf_t *, void *,
265 struct pf_pdesc *, struct pf_rule **,
266 struct pf_ruleset **);
267 static int pf_test_state_tcp(struct pf_state **, int,
268 struct pfi_kif *, pbuf_t *, int,
269 void *, struct pf_pdesc *, u_short *);
270 static int pf_test_state_udp(struct pf_state **, int,
271 struct pfi_kif *, pbuf_t *, int,
272 void *, struct pf_pdesc *, u_short *);
273 static int pf_test_state_icmp(struct pf_state **, int,
274 struct pfi_kif *, pbuf_t *, int,
275 void *, struct pf_pdesc *, u_short *);
276 static int pf_test_state_other(struct pf_state **, int,
277 struct pfi_kif *, struct pf_pdesc *);
278 static int pf_match_tag(struct pf_rule *,
279 struct pf_mtag *, int *);
280 static void pf_hash(struct pf_addr *, struct pf_addr *,
281 struct pf_poolhashkey *, sa_family_t);
282 static int pf_map_addr(u_int8_t, struct pf_rule *,
283 struct pf_addr *, struct pf_addr *,
284 struct pf_addr *, struct pf_src_node **);
285 static int pf_get_sport(struct pf_pdesc *, struct pfi_kif *,
286 struct pf_rule *, struct pf_addr *,
287 union pf_state_xport *, struct pf_addr *,
288 union pf_state_xport *, struct pf_addr *,
289 union pf_state_xport *, struct pf_src_node **
290 #if SKYWALK
291 , netns_token *
292 #endif
293 );
294 static void pf_route(pbuf_t **, struct pf_rule *, int,
295 struct ifnet *, struct pf_state *,
296 struct pf_pdesc *);
297 static void pf_route6(pbuf_t **, struct pf_rule *, int,
298 struct ifnet *, struct pf_state *,
299 struct pf_pdesc *);
300 static u_int8_t pf_get_wscale(pbuf_t *, int, u_int16_t,
301 sa_family_t);
302 static u_int16_t pf_get_mss(pbuf_t *, int, u_int16_t,
303 sa_family_t);
304 static u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t,
305 u_int16_t);
306 static void pf_set_rt_ifp(struct pf_state *,
307 struct pf_addr *, sa_family_t af);
308 static int pf_check_proto_cksum(pbuf_t *, int, int,
309 u_int8_t, sa_family_t);
310 static int pf_addr_wrap_neq(struct pf_addr_wrap *,
311 struct pf_addr_wrap *);
312 static struct pf_state *pf_find_state(struct pfi_kif *,
313 struct pf_state_key_cmp *, u_int);
314 static int pf_src_connlimit(struct pf_state **);
315 static void pf_stateins_err(const char *, struct pf_state *,
316 struct pfi_kif *);
317 static int pf_check_congestion(struct ifqueue *);
318
319 #if 0
320 static const char *pf_pptp_ctrl_type_name(u_int16_t code);
321 #endif
322 static void pf_pptp_handler(struct pf_state *, int, int,
323 struct pf_pdesc *, struct pfi_kif *);
324 static void pf_pptp_unlink(struct pf_state *);
325 static void pf_grev1_unlink(struct pf_state *);
326 static int pf_test_state_grev1(struct pf_state **, int,
327 struct pfi_kif *, int, struct pf_pdesc *);
328 static int pf_ike_compare(struct pf_app_state *,
329 struct pf_app_state *);
330 static int pf_test_state_esp(struct pf_state **, int,
331 struct pfi_kif *, int, struct pf_pdesc *);
332 static int pf_test6(int, struct ifnet *, pbuf_t **, struct ether_header *,
333 struct ip_fw_args *);
334 #if INET
335 static int pf_test(int, struct ifnet *, pbuf_t **,
336 struct ether_header *, struct ip_fw_args *);
337 #endif /* INET */
338
339
340 extern struct pool pfr_ktable_pl;
341 extern struct pool pfr_kentry_pl;
342 extern int path_mtu_discovery;
343
344 struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = {
345 { .pp = &pf_state_pl, .limit = PFSTATE_HIWAT },
346 { .pp = &pf_app_state_pl, .limit = PFAPPSTATE_HIWAT },
347 { .pp = &pf_src_tree_pl, .limit = PFSNODE_HIWAT },
348 { .pp = &pf_frent_pl, .limit = PFFRAG_FRENT_HIWAT },
349 { .pp = &pfr_ktable_pl, .limit = PFR_KTABLE_HIWAT },
350 { .pp = &pfr_kentry_pl, .limit = PFR_KENTRY_HIWAT },
351 };
352
353 #if SKYWALK
354 const char *compatible_anchors[] = {
355 "com.apple.internet-sharing",
356 "com.apple/250.ApplicationFirewall",
357 "com.apple/200.AirDrop"
358 };
359 #endif // SKYWALK
360
361 void *
pf_lazy_makewritable(struct pf_pdesc * pd,pbuf_t * pbuf,int len)362 pf_lazy_makewritable(struct pf_pdesc *pd, pbuf_t *pbuf, int len)
363 {
364 void *__single p;
365
366 if (pd->lmw < 0) {
367 return NULL;
368 }
369
370 VERIFY(pbuf == pd->mp);
371
372 p = pbuf->pb_data;
373 if (len > pd->lmw) {
374 if ((p = pbuf_ensure_writable(pbuf, len)) == NULL) {
375 len = -1;
376 }
377 pd->lmw = len;
378 if (len >= 0) {
379 pd->pf_mtag = pf_find_mtag_pbuf(pbuf);
380
381 switch (pd->af) {
382 case AF_INET: {
383 struct ip *__single h = p;
384 pd->src = (struct pf_addr *)(void *)&h->ip_src;
385 pd->dst = (struct pf_addr *)(void *)&h->ip_dst;
386 pd->ip_sum = &h->ip_sum;
387 break;
388 }
389 case AF_INET6: {
390 struct ip6_hdr *__single h = p;
391 pd->src = (struct pf_addr *)(void *)&h->ip6_src;
392 pd->dst = (struct pf_addr *)(void *)&h->ip6_dst;
393 break;
394 }
395 }
396 }
397 }
398
399 return len < 0 ? NULL : p;
400 }
401
402 static const int *
pf_state_lookup_aux(struct pf_state ** state,struct pfi_kif * kif,int direction,int * action)403 pf_state_lookup_aux(struct pf_state **state, struct pfi_kif *kif,
404 int direction, int *action)
405 {
406 if (*state == NULL || (*state)->timeout == PFTM_PURGE) {
407 *action = PF_DROP;
408 return action;
409 }
410
411 if (direction == PF_OUT &&
412 (((*state)->rule.ptr->rt == PF_ROUTETO &&
413 (*state)->rule.ptr->direction == PF_OUT) ||
414 ((*state)->rule.ptr->rt == PF_REPLYTO &&
415 (*state)->rule.ptr->direction == PF_IN)) &&
416 (*state)->rt_kif != NULL && (*state)->rt_kif != kif) {
417 *action = PF_PASS;
418 return action;
419 }
420
421 return 0;
422 }
423
424 #define STATE_LOOKUP() \
425 do { \
426 int action; \
427 *state = pf_find_state(kif, &key, direction); \
428 if (*state != NULL && pd != NULL && \
429 !(pd->pktflags & PKTF_FLOW_ID)) { \
430 pd->flowsrc = (*state)->state_key->flowsrc; \
431 pd->flowhash = (*state)->state_key->flowhash; \
432 if (pd->flowhash != 0) { \
433 pd->pktflags |= PKTF_FLOW_ID; \
434 pd->pktflags &= ~PKTF_FLOW_ADV; \
435 } \
436 } \
437 if (pf_state_lookup_aux(state, kif, direction, &action)) \
438 return (action); \
439 } while (0)
440
441 /*
442 * This macro resets the flowID information in a packet descriptor which was
443 * copied in from a PF state. This should be used after a protocol state lookup
444 * finds a matching PF state, but then decides to not use it for various
445 * reasons.
446 */
447 #define PD_CLEAR_STATE_FLOWID(_pd) \
448 do { \
449 if (__improbable(((_pd)->pktflags & PKTF_FLOW_ID) && \
450 ((_pd)->flowsrc == FLOWSRC_PF))) { \
451 (_pd)->flowhash = 0; \
452 (_pd)->flowsrc = 0; \
453 (_pd)->pktflags &= ~PKTF_FLOW_ID; \
454 } \
455 \
456 } while (0)
457
458 #define STATE_ADDR_TRANSLATE(sk) \
459 (sk)->lan.addr.addr32[0] != (sk)->gwy.addr.addr32[0] || \
460 ((sk)->af_lan == AF_INET6 && \
461 ((sk)->lan.addr.addr32[1] != (sk)->gwy.addr.addr32[1] || \
462 (sk)->lan.addr.addr32[2] != (sk)->gwy.addr.addr32[2] || \
463 (sk)->lan.addr.addr32[3] != (sk)->gwy.addr.addr32[3]))
464
465 #define STATE_TRANSLATE(sk) \
466 ((sk)->af_lan != (sk)->af_gwy || \
467 STATE_ADDR_TRANSLATE(sk) || \
468 (sk)->lan.xport.port != (sk)->gwy.xport.port)
469
470 #define STATE_GRE_TRANSLATE(sk) \
471 (STATE_ADDR_TRANSLATE(sk) || \
472 (sk)->lan.xport.call_id != (sk)->gwy.xport.call_id)
473
474 #define BOUND_IFACE(r, k) \
475 ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all
476
477 #define STATE_INC_COUNTERS(s) \
478 do { \
479 s->rule.ptr->states++; \
480 VERIFY(s->rule.ptr->states != 0); \
481 if (s->anchor.ptr != NULL) { \
482 s->anchor.ptr->states++; \
483 VERIFY(s->anchor.ptr->states != 0); \
484 } \
485 if (s->nat_rule.ptr != NULL) { \
486 s->nat_rule.ptr->states++; \
487 VERIFY(s->nat_rule.ptr->states != 0); \
488 } \
489 } while (0)
490
491 #define STATE_DEC_COUNTERS(s) \
492 do { \
493 if (s->nat_rule.ptr != NULL) { \
494 VERIFY(s->nat_rule.ptr->states > 0); \
495 s->nat_rule.ptr->states--; \
496 } \
497 if (s->anchor.ptr != NULL) { \
498 VERIFY(s->anchor.ptr->states > 0); \
499 s->anchor.ptr->states--; \
500 } \
501 VERIFY(s->rule.ptr->states > 0); \
502 s->rule.ptr->states--; \
503 } while (0)
504
505 static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *);
506 static __inline int pf_state_compare_lan_ext(struct pf_state_key *,
507 struct pf_state_key *);
508 static __inline int pf_state_compare_ext_gwy(struct pf_state_key *,
509 struct pf_state_key *);
510 static __inline int pf_state_compare_id(struct pf_state *,
511 struct pf_state *);
512
513 struct pf_src_tree tree_src_tracking;
514
515 struct pf_state_tree_id tree_id;
516 struct pf_state_queue state_list;
517
518 RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare);
519 RB_GENERATE(pf_state_tree_lan_ext, pf_state_key,
520 entry_lan_ext, pf_state_compare_lan_ext);
521 RB_GENERATE(pf_state_tree_ext_gwy, pf_state_key,
522 entry_ext_gwy, pf_state_compare_ext_gwy);
523 RB_GENERATE(pf_state_tree_id, pf_state,
524 entry_id, pf_state_compare_id);
525
526 #define PF_DT_SKIP_LANEXT 0x01
527 #define PF_DT_SKIP_EXTGWY 0x02
528
529 static const u_int16_t PF_PPTP_PORT = 1723;
530 static const u_int32_t PF_PPTP_MAGIC_NUMBER = 0x1A2B3C4D;
531
532 struct pf_pptp_hdr {
533 u_int16_t length;
534 u_int16_t type;
535 u_int32_t magic;
536 };
537
538 struct pf_pptp_ctrl_hdr {
539 u_int16_t type;
540 u_int16_t reserved_0;
541 };
542
543 struct pf_pptp_ctrl_generic {
544 u_int16_t data[0];
545 };
546
547 #define PF_PPTP_CTRL_TYPE_START_REQ 1
548 struct pf_pptp_ctrl_start_req {
549 u_int16_t protocol_version;
550 u_int16_t reserved_1;
551 u_int32_t framing_capabilities;
552 u_int32_t bearer_capabilities;
553 u_int16_t maximum_channels;
554 u_int16_t firmware_revision;
555 u_int8_t host_name[64];
556 u_int8_t vendor_string[64];
557 };
558
559 #define PF_PPTP_CTRL_TYPE_START_RPY 2
560 struct pf_pptp_ctrl_start_rpy {
561 u_int16_t protocol_version;
562 u_int8_t result_code;
563 u_int8_t error_code;
564 u_int32_t framing_capabilities;
565 u_int32_t bearer_capabilities;
566 u_int16_t maximum_channels;
567 u_int16_t firmware_revision;
568 u_int8_t host_name[64];
569 u_int8_t vendor_string[64];
570 };
571
572 #define PF_PPTP_CTRL_TYPE_STOP_REQ 3
573 struct pf_pptp_ctrl_stop_req {
574 u_int8_t reason;
575 u_int8_t reserved_1;
576 u_int16_t reserved_2;
577 };
578
579 #define PF_PPTP_CTRL_TYPE_STOP_RPY 4
580 struct pf_pptp_ctrl_stop_rpy {
581 u_int8_t reason;
582 u_int8_t error_code;
583 u_int16_t reserved_1;
584 };
585
586 #define PF_PPTP_CTRL_TYPE_ECHO_REQ 5
587 struct pf_pptp_ctrl_echo_req {
588 u_int32_t identifier;
589 };
590
591 #define PF_PPTP_CTRL_TYPE_ECHO_RPY 6
592 struct pf_pptp_ctrl_echo_rpy {
593 u_int32_t identifier;
594 u_int8_t result_code;
595 u_int8_t error_code;
596 u_int16_t reserved_1;
597 };
598
599 #define PF_PPTP_CTRL_TYPE_CALL_OUT_REQ 7
600 struct pf_pptp_ctrl_call_out_req {
601 u_int16_t call_id;
602 u_int16_t call_sernum;
603 u_int32_t min_bps;
604 u_int32_t bearer_type;
605 u_int32_t framing_type;
606 u_int16_t rxwindow_size;
607 u_int16_t proc_delay;
608 u_int8_t phone_num[64];
609 u_int8_t sub_addr[64];
610 };
611
612 #define PF_PPTP_CTRL_TYPE_CALL_OUT_RPY 8
613 struct pf_pptp_ctrl_call_out_rpy {
614 u_int16_t call_id;
615 u_int16_t peer_call_id;
616 u_int8_t result_code;
617 u_int8_t error_code;
618 u_int16_t cause_code;
619 u_int32_t connect_speed;
620 u_int16_t rxwindow_size;
621 u_int16_t proc_delay;
622 u_int32_t phy_channel_id;
623 };
624
625 #define PF_PPTP_CTRL_TYPE_CALL_IN_1ST 9
626 struct pf_pptp_ctrl_call_in_1st {
627 u_int16_t call_id;
628 u_int16_t call_sernum;
629 u_int32_t bearer_type;
630 u_int32_t phy_channel_id;
631 u_int16_t dialed_number_len;
632 u_int16_t dialing_number_len;
633 u_int8_t dialed_num[64];
634 u_int8_t dialing_num[64];
635 u_int8_t sub_addr[64];
636 };
637
638 #define PF_PPTP_CTRL_TYPE_CALL_IN_2ND 10
639 struct pf_pptp_ctrl_call_in_2nd {
640 u_int16_t call_id;
641 u_int16_t peer_call_id;
642 u_int8_t result_code;
643 u_int8_t error_code;
644 u_int16_t rxwindow_size;
645 u_int16_t txdelay;
646 u_int16_t reserved_1;
647 };
648
649 #define PF_PPTP_CTRL_TYPE_CALL_IN_3RD 11
650 struct pf_pptp_ctrl_call_in_3rd {
651 u_int16_t call_id;
652 u_int16_t reserved_1;
653 u_int32_t connect_speed;
654 u_int16_t rxwindow_size;
655 u_int16_t txdelay;
656 u_int32_t framing_type;
657 };
658
659 #define PF_PPTP_CTRL_TYPE_CALL_CLR 12
660 struct pf_pptp_ctrl_call_clr {
661 u_int16_t call_id;
662 u_int16_t reserved_1;
663 };
664
665 #define PF_PPTP_CTRL_TYPE_CALL_DISC 13
666 struct pf_pptp_ctrl_call_disc {
667 u_int16_t call_id;
668 u_int8_t result_code;
669 u_int8_t error_code;
670 u_int16_t cause_code;
671 u_int16_t reserved_1;
672 u_int8_t statistics[128];
673 };
674
675 #define PF_PPTP_CTRL_TYPE_ERROR 14
676 struct pf_pptp_ctrl_error {
677 u_int16_t peer_call_id;
678 u_int16_t reserved_1;
679 u_int32_t crc_errors;
680 u_int32_t fr_errors;
681 u_int32_t hw_errors;
682 u_int32_t buf_errors;
683 u_int32_t tim_errors;
684 u_int32_t align_errors;
685 };
686
687 #define PF_PPTP_CTRL_TYPE_SET_LINKINFO 15
688 struct pf_pptp_ctrl_set_linkinfo {
689 u_int16_t peer_call_id;
690 u_int16_t reserved_1;
691 u_int32_t tx_accm;
692 u_int32_t rx_accm;
693 };
694
695 static const size_t PF_PPTP_CTRL_MSG_MINSIZE =
696 sizeof(struct pf_pptp_hdr) + sizeof(struct pf_pptp_ctrl_hdr);
697
698 union pf_pptp_ctrl_msg_union {
699 struct pf_pptp_ctrl_start_req start_req;
700 struct pf_pptp_ctrl_start_rpy start_rpy;
701 struct pf_pptp_ctrl_stop_req stop_req;
702 struct pf_pptp_ctrl_stop_rpy stop_rpy;
703 struct pf_pptp_ctrl_echo_req echo_req;
704 struct pf_pptp_ctrl_echo_rpy echo_rpy;
705 struct pf_pptp_ctrl_call_out_req call_out_req;
706 struct pf_pptp_ctrl_call_out_rpy call_out_rpy;
707 struct pf_pptp_ctrl_call_in_1st call_in_1st;
708 struct pf_pptp_ctrl_call_in_2nd call_in_2nd;
709 struct pf_pptp_ctrl_call_in_3rd call_in_3rd;
710 struct pf_pptp_ctrl_call_clr call_clr;
711 struct pf_pptp_ctrl_call_disc call_disc;
712 struct pf_pptp_ctrl_error error;
713 struct pf_pptp_ctrl_set_linkinfo set_linkinfo;
714 u_int8_t data[0];
715 };
716
717 struct pf_pptp_ctrl_msg {
718 struct pf_pptp_hdr hdr;
719 struct pf_pptp_ctrl_hdr ctrl;
720 union pf_pptp_ctrl_msg_union msg;
721 };
722
723 #define PF_GRE_FLAG_CHECKSUM_PRESENT 0x8000
724 #define PF_GRE_FLAG_VERSION_MASK 0x0007
725 #define PF_GRE_PPP_ETHERTYPE 0x880B
726
727 static const u_int16_t PF_IKE_PORT = 500;
728
729 struct pf_ike_hdr {
730 u_int64_t initiator_cookie, responder_cookie;
731 u_int8_t next_payload, version, exchange_type, flags;
732 u_int32_t message_id, length;
733 };
734
735 #define PF_IKE_PACKET_MINSIZE (sizeof (struct pf_ike_hdr))
736
737 #define PF_IKEv1_EXCHTYPE_BASE 1
738 #define PF_IKEv1_EXCHTYPE_ID_PROTECT 2
739 #define PF_IKEv1_EXCHTYPE_AUTH_ONLY 3
740 #define PF_IKEv1_EXCHTYPE_AGGRESSIVE 4
741 #define PF_IKEv1_EXCHTYPE_INFORMATIONAL 5
742 #define PF_IKEv2_EXCHTYPE_SA_INIT 34
743 #define PF_IKEv2_EXCHTYPE_AUTH 35
744 #define PF_IKEv2_EXCHTYPE_CREATE_CHILD_SA 36
745 #define PF_IKEv2_EXCHTYPE_INFORMATIONAL 37
746
747 #define PF_IKEv1_FLAG_E 0x01
748 #define PF_IKEv1_FLAG_C 0x02
749 #define PF_IKEv1_FLAG_A 0x04
750 #define PF_IKEv2_FLAG_I 0x08
751 #define PF_IKEv2_FLAG_V 0x10
752 #define PF_IKEv2_FLAG_R 0x20
753
754
755 static __inline int
pf_addr_compare(struct pf_addr * a,struct pf_addr * b,sa_family_t af)756 pf_addr_compare(struct pf_addr *a, struct pf_addr *b, sa_family_t af)
757 {
758 switch (af) {
759 #ifdef INET
760 case AF_INET:
761 if (a->addr32[0] > b->addr32[0]) {
762 return 1;
763 }
764 if (a->addr32[0] < b->addr32[0]) {
765 return -1;
766 }
767 break;
768 #endif /* INET */
769 case AF_INET6:
770 if (a->addr32[3] > b->addr32[3]) {
771 return 1;
772 }
773 if (a->addr32[3] < b->addr32[3]) {
774 return -1;
775 }
776 if (a->addr32[2] > b->addr32[2]) {
777 return 1;
778 }
779 if (a->addr32[2] < b->addr32[2]) {
780 return -1;
781 }
782 if (a->addr32[1] > b->addr32[1]) {
783 return 1;
784 }
785 if (a->addr32[1] < b->addr32[1]) {
786 return -1;
787 }
788 if (a->addr32[0] > b->addr32[0]) {
789 return 1;
790 }
791 if (a->addr32[0] < b->addr32[0]) {
792 return -1;
793 }
794 break;
795 }
796 return 0;
797 }
798
799 static __inline int
pf_src_compare(struct pf_src_node * a,struct pf_src_node * b)800 pf_src_compare(struct pf_src_node *a, struct pf_src_node *b)
801 {
802 int diff;
803
804 if (a->rule.ptr > b->rule.ptr) {
805 return 1;
806 }
807 if (a->rule.ptr < b->rule.ptr) {
808 return -1;
809 }
810 if ((diff = a->af - b->af) != 0) {
811 return diff;
812 }
813 if ((diff = pf_addr_compare(&a->addr, &b->addr, a->af)) != 0) {
814 return diff;
815 }
816 return 0;
817 }
818
819 static __inline int
pf_state_compare_lan_ext(struct pf_state_key * a,struct pf_state_key * b)820 pf_state_compare_lan_ext(struct pf_state_key *a, struct pf_state_key *b)
821 {
822 int diff;
823 int extfilter;
824
825 if ((diff = a->proto - b->proto) != 0) {
826 return diff;
827 }
828 if ((diff = a->af_lan - b->af_lan) != 0) {
829 return diff;
830 }
831
832 extfilter = PF_EXTFILTER_APD;
833
834 switch (a->proto) {
835 case IPPROTO_ICMP:
836 case IPPROTO_ICMPV6:
837 if ((diff = a->lan.xport.port - b->lan.xport.port) != 0) {
838 return diff;
839 }
840 break;
841
842 case IPPROTO_TCP:
843 if ((diff = a->lan.xport.port - b->lan.xport.port) != 0) {
844 return diff;
845 }
846 if ((diff = a->ext_lan.xport.port - b->ext_lan.xport.port) != 0) {
847 return diff;
848 }
849 break;
850
851 case IPPROTO_UDP:
852 if ((diff = a->proto_variant - b->proto_variant)) {
853 return diff;
854 }
855 extfilter = a->proto_variant;
856 if ((diff = a->lan.xport.port - b->lan.xport.port) != 0) {
857 return diff;
858 }
859 if ((extfilter < PF_EXTFILTER_AD) &&
860 (diff = a->ext_lan.xport.port - b->ext_lan.xport.port) != 0) {
861 return diff;
862 }
863 break;
864
865 case IPPROTO_GRE:
866 if (a->proto_variant == PF_GRE_PPTP_VARIANT &&
867 a->proto_variant == b->proto_variant) {
868 if (!!(diff = a->ext_lan.xport.call_id -
869 b->ext_lan.xport.call_id)) {
870 return diff;
871 }
872 }
873 break;
874
875 case IPPROTO_ESP:
876 if (!!(diff = a->ext_lan.xport.spi - b->ext_lan.xport.spi)) {
877 return diff;
878 }
879 break;
880
881 default:
882 break;
883 }
884
885 switch (a->af_lan) {
886 #if INET
887 case AF_INET:
888 if ((diff = pf_addr_compare(&a->lan.addr, &b->lan.addr,
889 a->af_lan)) != 0) {
890 return diff;
891 }
892
893 if (extfilter < PF_EXTFILTER_EI) {
894 if ((diff = pf_addr_compare(&a->ext_lan.addr,
895 &b->ext_lan.addr,
896 a->af_lan)) != 0) {
897 return diff;
898 }
899 }
900 break;
901 #endif /* INET */
902 case AF_INET6:
903 if ((diff = pf_addr_compare(&a->lan.addr, &b->lan.addr,
904 a->af_lan)) != 0) {
905 return diff;
906 }
907
908 if (extfilter < PF_EXTFILTER_EI ||
909 !PF_AZERO(&b->ext_lan.addr, AF_INET6)) {
910 if ((diff = pf_addr_compare(&a->ext_lan.addr,
911 &b->ext_lan.addr,
912 a->af_lan)) != 0) {
913 return diff;
914 }
915 }
916 break;
917 }
918
919 if (a->app_state && b->app_state) {
920 if (a->app_state->compare_lan_ext &&
921 b->app_state->compare_lan_ext) {
922 diff = (const char *)b->app_state->compare_lan_ext -
923 (const char *)a->app_state->compare_lan_ext;
924 if (diff != 0) {
925 return diff;
926 }
927 diff = a->app_state->compare_lan_ext(a->app_state,
928 b->app_state);
929 if (diff != 0) {
930 return diff;
931 }
932 }
933 }
934
935 return 0;
936 }
937
938 static __inline int
pf_state_compare_ext_gwy(struct pf_state_key * a,struct pf_state_key * b)939 pf_state_compare_ext_gwy(struct pf_state_key *a, struct pf_state_key *b)
940 {
941 int diff;
942 int extfilter;
943 int a_nat64, b_nat64;
944
945 if ((diff = a->proto - b->proto) != 0) {
946 return diff;
947 }
948
949 if ((diff = a->af_gwy - b->af_gwy) != 0) {
950 return diff;
951 }
952
953 a_nat64 = (a->af_lan == PF_INET6 && a->af_gwy == PF_INET) ? 1 : 0;
954 b_nat64 = (b->af_lan == PF_INET6 && b->af_gwy == PF_INET) ? 1 : 0;
955 if ((diff = a_nat64 - b_nat64) != 0) {
956 return diff;
957 }
958
959 extfilter = PF_EXTFILTER_APD;
960
961 switch (a->proto) {
962 case IPPROTO_ICMP:
963 case IPPROTO_ICMPV6:
964 if ((diff = a->gwy.xport.port - b->gwy.xport.port) != 0) {
965 return diff;
966 }
967 break;
968
969 case IPPROTO_TCP:
970 if ((diff = a->ext_gwy.xport.port - b->ext_gwy.xport.port) != 0) {
971 return diff;
972 }
973 if ((diff = a->gwy.xport.port - b->gwy.xport.port) != 0) {
974 return diff;
975 }
976 break;
977
978 case IPPROTO_UDP:
979 if ((diff = a->proto_variant - b->proto_variant)) {
980 return diff;
981 }
982 extfilter = a->proto_variant;
983 if ((diff = a->gwy.xport.port - b->gwy.xport.port) != 0) {
984 return diff;
985 }
986 if ((extfilter < PF_EXTFILTER_AD) &&
987 (diff = a->ext_gwy.xport.port - b->ext_gwy.xport.port) != 0) {
988 return diff;
989 }
990 break;
991
992 case IPPROTO_GRE:
993 if (a->proto_variant == PF_GRE_PPTP_VARIANT &&
994 a->proto_variant == b->proto_variant) {
995 if (!!(diff = a->gwy.xport.call_id -
996 b->gwy.xport.call_id)) {
997 return diff;
998 }
999 }
1000 break;
1001
1002 case IPPROTO_ESP:
1003 if (!!(diff = a->gwy.xport.spi - b->gwy.xport.spi)) {
1004 return diff;
1005 }
1006 break;
1007
1008 default:
1009 break;
1010 }
1011
1012 switch (a->af_gwy) {
1013 #if INET
1014 case AF_INET:
1015 if ((diff = pf_addr_compare(&a->gwy.addr, &b->gwy.addr,
1016 a->af_gwy)) != 0) {
1017 return diff;
1018 }
1019
1020 if (extfilter < PF_EXTFILTER_EI) {
1021 if ((diff = pf_addr_compare(&a->ext_gwy.addr, &b->ext_gwy.addr,
1022 a->af_gwy)) != 0) {
1023 return diff;
1024 }
1025 }
1026 break;
1027 #endif /* INET */
1028 case AF_INET6:
1029 if ((diff = pf_addr_compare(&a->gwy.addr, &b->gwy.addr,
1030 a->af_gwy)) != 0) {
1031 return diff;
1032 }
1033
1034 if (extfilter < PF_EXTFILTER_EI ||
1035 !PF_AZERO(&b->ext_gwy.addr, AF_INET6)) {
1036 if ((diff = pf_addr_compare(&a->ext_gwy.addr, &b->ext_gwy.addr,
1037 a->af_gwy)) != 0) {
1038 return diff;
1039 }
1040 }
1041 break;
1042 }
1043
1044 if (a->app_state && b->app_state) {
1045 if (a->app_state->compare_ext_gwy &&
1046 b->app_state->compare_ext_gwy) {
1047 diff = (const char *)b->app_state->compare_ext_gwy -
1048 (const char *)a->app_state->compare_ext_gwy;
1049 if (diff != 0) {
1050 return diff;
1051 }
1052 diff = a->app_state->compare_ext_gwy(a->app_state,
1053 b->app_state);
1054 if (diff != 0) {
1055 return diff;
1056 }
1057 }
1058 }
1059
1060 return 0;
1061 }
1062
1063 static __inline int
pf_state_compare_id(struct pf_state * a,struct pf_state * b)1064 pf_state_compare_id(struct pf_state *a, struct pf_state *b)
1065 {
1066 if (a->id > b->id) {
1067 return 1;
1068 }
1069 if (a->id < b->id) {
1070 return -1;
1071 }
1072 if (a->creatorid > b->creatorid) {
1073 return 1;
1074 }
1075 if (a->creatorid < b->creatorid) {
1076 return -1;
1077 }
1078
1079 return 0;
1080 }
1081
1082 void
pf_addrcpy(struct pf_addr * dst,struct pf_addr * src,sa_family_t af)1083 pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af)
1084 {
1085 switch (af) {
1086 #if INET
1087 case AF_INET:
1088 memcpy(&dst->v4addr, &src->v4addr, sizeof(src->v4addr));
1089 break;
1090 #endif /* INET */
1091 case AF_INET6:
1092 memcpy(&dst->v6addr, &src->v6addr, sizeof(src->v6addr));
1093 break;
1094 }
1095 }
1096
1097 struct pf_state *
pf_find_state_byid(struct pf_state_cmp * key)1098 pf_find_state_byid(struct pf_state_cmp *key)
1099 {
1100 pf_status.fcounters[FCNT_STATE_SEARCH]++;
1101
1102 return RB_FIND(pf_state_tree_id, &tree_id,
1103 (struct pf_state *)(void *)key);
1104 }
1105
1106 static struct pf_state *
pf_find_state(struct pfi_kif * kif,struct pf_state_key_cmp * key,u_int dir)1107 pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir)
1108 {
1109 struct pf_state_key *sk = NULL;
1110 struct pf_state *s;
1111
1112 pf_status.fcounters[FCNT_STATE_SEARCH]++;
1113
1114 switch (dir) {
1115 case PF_OUT:
1116 sk = RB_FIND(pf_state_tree_lan_ext, &pf_statetbl_lan_ext,
1117 (struct pf_state_key *)key);
1118
1119 break;
1120 case PF_IN:
1121
1122 /*
1123 * Generally, a packet can match to
1124 * at most 1 state in the GWY table, with the sole exception
1125 * of NAT64, where a packet can match with at most 2 states
1126 * on the GWY table. This is because, unlike NAT44 or NAT66,
1127 * NAT64 forward translation is done on the input, not output.
1128 * This means a forwarded packet could cause PF to generate 2 states
1129 * on both input and output.
1130 *
1131 * NAT64 reverse translation is done on input. If a packet
1132 * matches NAT64 state on the GWY table, prioritize it
1133 * over any IPv4 state on the GWY table.
1134 */
1135 if (pf_state_tree_ext_gwy_nat64_cnt > 0 &&
1136 key->af_lan == PF_INET && key->af_gwy == PF_INET) {
1137 key->af_lan = PF_INET6;
1138 sk = RB_FIND(pf_state_tree_ext_gwy, &pf_statetbl_ext_gwy,
1139 (struct pf_state_key *) key);
1140 key->af_lan = PF_INET;
1141 }
1142
1143 if (sk == NULL) {
1144 sk = RB_FIND(pf_state_tree_ext_gwy, &pf_statetbl_ext_gwy,
1145 (struct pf_state_key *)key);
1146 }
1147 /*
1148 * NAT64 is done only on input, for packets coming in from
1149 * from the LAN side, need to lookup the lan_ext tree.
1150 */
1151 if (sk == NULL) {
1152 sk = RB_FIND(pf_state_tree_lan_ext,
1153 &pf_statetbl_lan_ext,
1154 (struct pf_state_key *)key);
1155 if (sk && sk->af_lan == sk->af_gwy) {
1156 sk = NULL;
1157 }
1158 }
1159 break;
1160 default:
1161 panic("pf_find_state");
1162 }
1163
1164 /* list is sorted, if-bound states before floating ones */
1165 if (sk != NULL) {
1166 TAILQ_FOREACH(s, &sk->states, next)
1167 if (s->kif == pfi_all || s->kif == kif) {
1168 return s;
1169 }
1170 }
1171
1172 return NULL;
1173 }
1174
1175 struct pf_state *
pf_find_state_all(struct pf_state_key_cmp * key,u_int dir,int * more)1176 pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more)
1177 {
1178 struct pf_state_key *sk = NULL;
1179 struct pf_state *s, *ret = NULL;
1180
1181 pf_status.fcounters[FCNT_STATE_SEARCH]++;
1182
1183 switch (dir) {
1184 case PF_OUT:
1185 sk = RB_FIND(pf_state_tree_lan_ext,
1186 &pf_statetbl_lan_ext, (struct pf_state_key *)key);
1187 break;
1188 case PF_IN:
1189 sk = RB_FIND(pf_state_tree_ext_gwy,
1190 &pf_statetbl_ext_gwy, (struct pf_state_key *)key);
1191 /*
1192 * NAT64 is done only on input, for packets coming in from
1193 * from the LAN side, need to lookup the lan_ext tree.
1194 */
1195 if ((sk == NULL) && pf_nat64_configured) {
1196 sk = RB_FIND(pf_state_tree_lan_ext,
1197 &pf_statetbl_lan_ext,
1198 (struct pf_state_key *)key);
1199 if (sk && sk->af_lan == sk->af_gwy) {
1200 sk = NULL;
1201 }
1202 }
1203 break;
1204 default:
1205 panic("pf_find_state_all");
1206 }
1207
1208 if (sk != NULL) {
1209 ret = TAILQ_FIRST(&sk->states);
1210 if (more == NULL) {
1211 return ret;
1212 }
1213
1214 TAILQ_FOREACH(s, &sk->states, next)
1215 (*more)++;
1216 }
1217
1218 return ret;
1219 }
1220
1221 static void
pf_init_threshold(struct pf_threshold * threshold,u_int32_t limit,u_int32_t seconds)1222 pf_init_threshold(struct pf_threshold *threshold,
1223 u_int32_t limit, u_int32_t seconds)
1224 {
1225 threshold->limit = limit * PF_THRESHOLD_MULT;
1226 threshold->seconds = seconds;
1227 threshold->count = 0;
1228 threshold->last = pf_time_second();
1229 }
1230
1231 static void
pf_add_threshold(struct pf_threshold * threshold)1232 pf_add_threshold(struct pf_threshold *threshold)
1233 {
1234 u_int32_t t = pf_time_second(), diff = t - threshold->last;
1235
1236 if (diff >= threshold->seconds) {
1237 threshold->count = 0;
1238 } else {
1239 threshold->count -= threshold->count * diff /
1240 threshold->seconds;
1241 }
1242 threshold->count += PF_THRESHOLD_MULT;
1243 threshold->last = t;
1244 }
1245
1246 static int
pf_check_threshold(struct pf_threshold * threshold)1247 pf_check_threshold(struct pf_threshold *threshold)
1248 {
1249 return threshold->count > threshold->limit;
1250 }
1251
1252 static int
pf_src_connlimit(struct pf_state ** state)1253 pf_src_connlimit(struct pf_state **state)
1254 {
1255 int bad = 0;
1256 (*state)->src_node->conn++;
1257 VERIFY((*state)->src_node->conn != 0);
1258 (*state)->src.tcp_est = 1;
1259 pf_add_threshold(&(*state)->src_node->conn_rate);
1260
1261 if ((*state)->rule.ptr->max_src_conn &&
1262 (*state)->rule.ptr->max_src_conn <
1263 (*state)->src_node->conn) {
1264 pf_status.lcounters[LCNT_SRCCONN]++;
1265 bad++;
1266 }
1267
1268 if ((*state)->rule.ptr->max_src_conn_rate.limit &&
1269 pf_check_threshold(&(*state)->src_node->conn_rate)) {
1270 pf_status.lcounters[LCNT_SRCCONNRATE]++;
1271 bad++;
1272 }
1273
1274 if (!bad) {
1275 return 0;
1276 }
1277
1278 if ((*state)->rule.ptr->overload_tbl) {
1279 struct pfr_addr p;
1280 u_int32_t killed = 0;
1281
1282 pf_status.lcounters[LCNT_OVERLOAD_TABLE]++;
1283 if (pf_status.debug >= PF_DEBUG_MISC) {
1284 printf("pf_src_connlimit: blocking address ");
1285 pf_print_host(&(*state)->src_node->addr, 0,
1286 (*state)->state_key->af_lan);
1287 }
1288
1289 bzero(&p, sizeof(p));
1290 p.pfra_af = (*state)->state_key->af_lan;
1291 switch ((*state)->state_key->af_lan) {
1292 #if INET
1293 case AF_INET:
1294 p.pfra_net = 32;
1295 p.pfra_ip4addr = (*state)->src_node->addr.v4addr;
1296 break;
1297 #endif /* INET */
1298 case AF_INET6:
1299 p.pfra_net = 128;
1300 p.pfra_ip6addr = (*state)->src_node->addr.v6addr;
1301 break;
1302 }
1303
1304 pfr_insert_kentry((*state)->rule.ptr->overload_tbl,
1305 &p, pf_calendar_time_second());
1306
1307 /* kill existing states if that's required. */
1308 if ((*state)->rule.ptr->flush) {
1309 struct pf_state_key *sk;
1310 struct pf_state *st;
1311
1312 pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++;
1313 RB_FOREACH(st, pf_state_tree_id, &tree_id) {
1314 sk = st->state_key;
1315 /*
1316 * Kill states from this source. (Only those
1317 * from the same rule if PF_FLUSH_GLOBAL is not
1318 * set)
1319 */
1320 if (sk->af_lan ==
1321 (*state)->state_key->af_lan &&
1322 (((*state)->state_key->direction ==
1323 PF_OUT &&
1324 PF_AEQ(&(*state)->src_node->addr,
1325 &sk->lan.addr, sk->af_lan)) ||
1326 ((*state)->state_key->direction == PF_IN &&
1327 PF_AEQ(&(*state)->src_node->addr,
1328 &sk->ext_lan.addr, sk->af_lan))) &&
1329 ((*state)->rule.ptr->flush &
1330 PF_FLUSH_GLOBAL ||
1331 (*state)->rule.ptr == st->rule.ptr)) {
1332 st->timeout = PFTM_PURGE;
1333 st->src.state = st->dst.state =
1334 TCPS_CLOSED;
1335 killed++;
1336 }
1337 }
1338 if (pf_status.debug >= PF_DEBUG_MISC) {
1339 printf(", %u states killed", killed);
1340 }
1341 }
1342 if (pf_status.debug >= PF_DEBUG_MISC) {
1343 printf("\n");
1344 }
1345 }
1346
1347 /* kill this state */
1348 (*state)->timeout = PFTM_PURGE;
1349 (*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
1350 return 1;
1351 }
1352
1353 int
pf_insert_src_node(struct pf_src_node ** sn,struct pf_rule * rule,struct pf_addr * src,sa_family_t af)1354 pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
1355 struct pf_addr *src, sa_family_t af)
1356 {
1357 struct pf_src_node k;
1358
1359 if (*sn == NULL) {
1360 k.af = af;
1361 PF_ACPY(&k.addr, src, af);
1362 if (rule->rule_flag & PFRULE_RULESRCTRACK ||
1363 rule->rpool.opts & PF_POOL_STICKYADDR) {
1364 k.rule.ptr = rule;
1365 } else {
1366 k.rule.ptr = NULL;
1367 }
1368 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
1369 *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
1370 }
1371 if (*sn == NULL) {
1372 if (!rule->max_src_nodes ||
1373 rule->src_nodes < rule->max_src_nodes) {
1374 (*sn) = pool_get(&pf_src_tree_pl, PR_WAITOK);
1375 } else {
1376 pf_status.lcounters[LCNT_SRCNODES]++;
1377 }
1378 if ((*sn) == NULL) {
1379 return -1;
1380 }
1381 bzero(*sn, sizeof(struct pf_src_node));
1382
1383 pf_init_threshold(&(*sn)->conn_rate,
1384 rule->max_src_conn_rate.limit,
1385 rule->max_src_conn_rate.seconds);
1386
1387 (*sn)->af = af;
1388 if (rule->rule_flag & PFRULE_RULESRCTRACK ||
1389 rule->rpool.opts & PF_POOL_STICKYADDR) {
1390 (*sn)->rule.ptr = rule;
1391 } else {
1392 (*sn)->rule.ptr = NULL;
1393 }
1394 PF_ACPY(&(*sn)->addr, src, af);
1395 if (RB_INSERT(pf_src_tree,
1396 &tree_src_tracking, *sn) != NULL) {
1397 if (pf_status.debug >= PF_DEBUG_MISC) {
1398 printf("pf: src_tree insert failed: ");
1399 pf_print_host(&(*sn)->addr, 0, af);
1400 printf("\n");
1401 }
1402 pool_put(&pf_src_tree_pl, *sn);
1403 *sn = NULL; /* signal the caller that no additional cleanup is needed */
1404 return -1;
1405 }
1406 (*sn)->creation = pf_time_second();
1407 (*sn)->ruletype = rule->action;
1408 if ((*sn)->rule.ptr != NULL) {
1409 (*sn)->rule.ptr->src_nodes++;
1410 }
1411 pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
1412 pf_status.src_nodes++;
1413 } else {
1414 if (rule->max_src_states &&
1415 (*sn)->states >= rule->max_src_states) {
1416 pf_status.lcounters[LCNT_SRCSTATES]++;
1417 return -1;
1418 }
1419 }
1420 return 0;
1421 }
1422
1423 static void
pf_stateins_err(const char * tree,struct pf_state * s,struct pfi_kif * kif)1424 pf_stateins_err(const char *tree, struct pf_state *s, struct pfi_kif *kif)
1425 {
1426 struct pf_state_key *sk = s->state_key;
1427
1428 if (pf_status.debug >= PF_DEBUG_MISC) {
1429 printf("pf: state insert failed: %s %s ", tree, kif->pfik_name);
1430 switch (sk->proto) {
1431 case IPPROTO_TCP:
1432 printf("TCP");
1433 break;
1434 case IPPROTO_UDP:
1435 printf("UDP");
1436 break;
1437 case IPPROTO_ICMP:
1438 printf("ICMP4");
1439 break;
1440 case IPPROTO_ICMPV6:
1441 printf("ICMP6");
1442 break;
1443 default:
1444 printf("PROTO=%u", sk->proto);
1445 break;
1446 }
1447 printf(" lan: ");
1448 pf_print_sk_host(&sk->lan, sk->af_lan, sk->proto,
1449 sk->proto_variant);
1450 printf(" gwy: ");
1451 pf_print_sk_host(&sk->gwy, sk->af_gwy, sk->proto,
1452 sk->proto_variant);
1453 printf(" ext_lan: ");
1454 pf_print_sk_host(&sk->ext_lan, sk->af_lan, sk->proto,
1455 sk->proto_variant);
1456 printf(" ext_gwy: ");
1457 pf_print_sk_host(&sk->ext_gwy, sk->af_gwy, sk->proto,
1458 sk->proto_variant);
1459 if (s->sync_flags & PFSTATE_FROMSYNC) {
1460 printf(" (from sync)");
1461 }
1462 printf("\n");
1463 }
1464 }
1465
1466 static __inline struct pf_state_key *
pf_insert_state_key_ext_gwy(struct pf_state_key * psk)1467 pf_insert_state_key_ext_gwy(struct pf_state_key *psk)
1468 {
1469 struct pf_state_key * ret = RB_INSERT(pf_state_tree_ext_gwy,
1470 &pf_statetbl_ext_gwy, psk);
1471 if (!ret && psk->af_lan == PF_INET6 &&
1472 psk->af_gwy == PF_INET) {
1473 pf_state_tree_ext_gwy_nat64_cnt++;
1474 }
1475 return ret;
1476 }
1477
1478 static __inline struct pf_state_key *
pf_remove_state_key_ext_gwy(struct pf_state_key * psk)1479 pf_remove_state_key_ext_gwy(struct pf_state_key *psk)
1480 {
1481 struct pf_state_key * ret = RB_REMOVE(pf_state_tree_ext_gwy,
1482 &pf_statetbl_ext_gwy, psk);
1483 if (ret && psk->af_lan == PF_INET6 &&
1484 psk->af_gwy == PF_INET) {
1485 pf_state_tree_ext_gwy_nat64_cnt--;
1486 }
1487 return ret;
1488 }
1489
1490 int
pf_insert_state(struct pfi_kif * kif,struct pf_state * s)1491 pf_insert_state(struct pfi_kif *kif, struct pf_state *s)
1492 {
1493 struct pf_state_key *cur;
1494 struct pf_state *sp;
1495
1496 VERIFY(s->state_key != NULL);
1497 s->kif = kif;
1498
1499 if ((cur = RB_INSERT(pf_state_tree_lan_ext, &pf_statetbl_lan_ext,
1500 s->state_key)) != NULL) {
1501 /* key exists. check for same kif, if none, add to key */
1502 TAILQ_FOREACH(sp, &cur->states, next)
1503 if (sp->kif == kif) { /* collision! */
1504 pf_stateins_err("tree_lan_ext", s, kif);
1505 pf_detach_state(s,
1506 PF_DT_SKIP_LANEXT | PF_DT_SKIP_EXTGWY);
1507 return -1;
1508 }
1509 pf_detach_state(s, PF_DT_SKIP_LANEXT | PF_DT_SKIP_EXTGWY);
1510 pf_attach_state(cur, s, kif == pfi_all ? 1 : 0);
1511 }
1512
1513 /* if cur != NULL, we already found a state key and attached to it */
1514 if (cur == NULL &&
1515 (cur = pf_insert_state_key_ext_gwy(s->state_key)) != NULL) {
1516 /* must not happen. we must have found the sk above! */
1517 pf_stateins_err("tree_ext_gwy", s, kif);
1518 pf_detach_state(s, PF_DT_SKIP_EXTGWY);
1519 return -1;
1520 }
1521
1522 if (s->id == 0 && s->creatorid == 0) {
1523 s->id = htobe64(pf_status.stateid++);
1524 s->creatorid = pf_status.hostid;
1525 }
1526 if (RB_INSERT(pf_state_tree_id, &tree_id, s) != NULL) {
1527 if (pf_status.debug >= PF_DEBUG_MISC) {
1528 printf("pf: state insert failed: "
1529 "id: %016llx creatorid: %08x",
1530 be64toh(s->id), ntohl(s->creatorid));
1531 if (s->sync_flags & PFSTATE_FROMSYNC) {
1532 printf(" (from sync)");
1533 }
1534 printf("\n");
1535 }
1536 pf_detach_state(s, 0);
1537 return -1;
1538 }
1539 TAILQ_INSERT_TAIL(&state_list, s, entry_list);
1540 pf_status.fcounters[FCNT_STATE_INSERT]++;
1541 pf_status.states++;
1542 VERIFY(pf_status.states != 0);
1543 pfi_kif_ref(kif, PFI_KIF_REF_STATE);
1544 #if NPFSYNC
1545 pfsync_insert_state(s);
1546 #endif
1547 return 0;
1548 }
1549
1550 static int
pf_purge_thread_cont(int err)1551 pf_purge_thread_cont(int err)
1552 {
1553 #pragma unused(err)
1554 static u_int32_t nloops = 0;
1555 int t = 1; /* 1 second */
1556
1557 /*
1558 * Update coarse-grained networking timestamp (in sec.); the idea
1559 * is to piggy-back on the periodic timeout callout to update
1560 * the counter returnable via net_uptime().
1561 */
1562 net_update_uptime();
1563
1564 lck_rw_lock_shared(&pf_perim_lock);
1565 lck_mtx_lock(&pf_lock);
1566
1567 /* purge everything if not running */
1568 if (!pf_status.running) {
1569 pf_purge_expired_states(pf_status.states);
1570 pf_purge_expired_fragments();
1571 pf_purge_expired_src_nodes();
1572
1573 /* terminate thread (we don't currently do this) */
1574 if (pf_purge_thread == NULL) {
1575 lck_mtx_unlock(&pf_lock);
1576 lck_rw_done(&pf_perim_lock);
1577
1578 thread_deallocate(current_thread());
1579 thread_terminate(current_thread());
1580 /* NOTREACHED */
1581 return 0;
1582 } else {
1583 /* if there's nothing left, sleep w/o timeout */
1584 if (pf_status.states == 0 &&
1585 pf_normalize_isempty() &&
1586 RB_EMPTY(&tree_src_tracking)) {
1587 nloops = 0;
1588 t = 0;
1589 }
1590 goto done;
1591 }
1592 }
1593
1594 /* process a fraction of the state table every second */
1595 pf_purge_expired_states(1 + (pf_status.states
1596 / pf_default_rule.timeout[PFTM_INTERVAL]));
1597
1598 /* purge other expired types every PFTM_INTERVAL seconds */
1599 if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) {
1600 pf_purge_expired_fragments();
1601 pf_purge_expired_src_nodes();
1602 nloops = 0;
1603 }
1604 done:
1605 lck_mtx_unlock(&pf_lock);
1606 lck_rw_done(&pf_perim_lock);
1607
1608 (void) tsleep0(pf_purge_thread_fn, PWAIT, "pf_purge_cont",
1609 t * hz, pf_purge_thread_cont);
1610 /* NOTREACHED */
1611 VERIFY(0);
1612
1613 return 0;
1614 }
1615
1616 void
pf_purge_thread_fn(void * v,wait_result_t w)1617 pf_purge_thread_fn(void *v, wait_result_t w)
1618 {
1619 #pragma unused(v, w)
1620 (void) tsleep0(pf_purge_thread_fn, PWAIT, "pf_purge", 0,
1621 pf_purge_thread_cont);
1622 /*
1623 * tsleep0() shouldn't have returned as PCATCH was not set;
1624 * therefore assert in this case.
1625 */
1626 VERIFY(0);
1627 }
1628
1629 u_int64_t
pf_state_expires(const struct pf_state * state)1630 pf_state_expires(const struct pf_state *state)
1631 {
1632 u_int32_t t;
1633 u_int32_t start;
1634 u_int32_t end;
1635 u_int32_t states;
1636
1637 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1638
1639 /* handle all PFTM_* > PFTM_MAX here */
1640 if (state->timeout == PFTM_PURGE) {
1641 return pf_time_second();
1642 }
1643
1644 VERIFY(state->timeout != PFTM_UNLINKED);
1645 VERIFY(state->timeout < PFTM_MAX);
1646 t = state->rule.ptr->timeout[state->timeout];
1647 if (!t) {
1648 t = pf_default_rule.timeout[state->timeout];
1649 }
1650 start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START];
1651 if (start) {
1652 end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END];
1653 states = state->rule.ptr->states;
1654 } else {
1655 start = pf_default_rule.timeout[PFTM_ADAPTIVE_START];
1656 end = pf_default_rule.timeout[PFTM_ADAPTIVE_END];
1657 states = pf_status.states;
1658 }
1659 if (end && states > start && start < end) {
1660 if (states < end) {
1661 return state->expire + t * (end - states) /
1662 (end - start);
1663 } else {
1664 return pf_time_second();
1665 }
1666 }
1667 return state->expire + t;
1668 }
1669
1670 void
pf_purge_expired_src_nodes(void)1671 pf_purge_expired_src_nodes(void)
1672 {
1673 struct pf_src_node *cur, *next;
1674
1675 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1676
1677 for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) {
1678 next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur);
1679
1680 if (cur->states <= 0 && cur->expire <= pf_time_second()) {
1681 if (cur->rule.ptr != NULL) {
1682 cur->rule.ptr->src_nodes--;
1683 if (cur->rule.ptr->states <= 0 &&
1684 cur->rule.ptr->max_src_nodes <= 0) {
1685 pf_rm_rule(NULL, cur->rule.ptr);
1686 }
1687 }
1688 RB_REMOVE(pf_src_tree, &tree_src_tracking, cur);
1689 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
1690 pf_status.src_nodes--;
1691 pool_put(&pf_src_tree_pl, cur);
1692 }
1693 }
1694 }
1695
1696 void
pf_src_tree_remove_state(struct pf_state * s)1697 pf_src_tree_remove_state(struct pf_state *s)
1698 {
1699 u_int32_t t;
1700
1701 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1702
1703 if (s->src_node != NULL) {
1704 if (s->src.tcp_est) {
1705 VERIFY(s->src_node->conn > 0);
1706 --s->src_node->conn;
1707 }
1708 VERIFY(s->src_node->states > 0);
1709 if (--s->src_node->states <= 0) {
1710 t = s->rule.ptr->timeout[PFTM_SRC_NODE];
1711 if (!t) {
1712 t = pf_default_rule.timeout[PFTM_SRC_NODE];
1713 }
1714 s->src_node->expire = pf_time_second() + t;
1715 }
1716 }
1717 if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
1718 VERIFY(s->nat_src_node->states > 0);
1719 if (--s->nat_src_node->states <= 0) {
1720 t = s->rule.ptr->timeout[PFTM_SRC_NODE];
1721 if (!t) {
1722 t = pf_default_rule.timeout[PFTM_SRC_NODE];
1723 }
1724 s->nat_src_node->expire = pf_time_second() + t;
1725 }
1726 }
1727 s->src_node = s->nat_src_node = NULL;
1728 }
1729
1730 void
pf_unlink_state(struct pf_state * cur)1731 pf_unlink_state(struct pf_state *cur)
1732 {
1733 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1734
1735 if (cur->src.state == PF_TCPS_PROXY_DST) {
1736 pf_send_tcp(cur->rule.ptr, cur->state_key->af_lan,
1737 &cur->state_key->ext_lan.addr, &cur->state_key->lan.addr,
1738 cur->state_key->ext_lan.xport.port,
1739 cur->state_key->lan.xport.port,
1740 cur->src.seqhi, cur->src.seqlo + 1,
1741 TH_RST | TH_ACK, 0, 0, 0, 1, cur->tag, NULL, NULL);
1742 }
1743
1744 hook_runloop(&cur->unlink_hooks, HOOK_REMOVE | HOOK_FREE);
1745 RB_REMOVE(pf_state_tree_id, &tree_id, cur);
1746 #if NPFSYNC
1747 if (cur->creatorid == pf_status.hostid) {
1748 pfsync_delete_state(cur);
1749 }
1750 #endif
1751 cur->timeout = PFTM_UNLINKED;
1752 pf_src_tree_remove_state(cur);
1753 pf_detach_state(cur, 0);
1754 }
1755
1756 /* callers should be at splpf and hold the
1757 * write_lock on pf_consistency_lock */
1758 void
pf_free_state(struct pf_state * cur)1759 pf_free_state(struct pf_state *cur)
1760 {
1761 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1762 #if NPFSYNC
1763 if (pfsyncif != NULL &&
1764 (pfsyncif->sc_bulk_send_next == cur ||
1765 pfsyncif->sc_bulk_terminator == cur)) {
1766 return;
1767 }
1768 #endif
1769 VERIFY(cur->timeout == PFTM_UNLINKED);
1770 VERIFY(cur->rule.ptr->states > 0);
1771 if (--cur->rule.ptr->states <= 0 &&
1772 cur->rule.ptr->src_nodes <= 0) {
1773 pf_rm_rule(NULL, cur->rule.ptr);
1774 }
1775 if (cur->nat_rule.ptr != NULL) {
1776 VERIFY(cur->nat_rule.ptr->states > 0);
1777 if (--cur->nat_rule.ptr->states <= 0 &&
1778 cur->nat_rule.ptr->src_nodes <= 0) {
1779 pf_rm_rule(NULL, cur->nat_rule.ptr);
1780 }
1781 }
1782 if (cur->anchor.ptr != NULL) {
1783 VERIFY(cur->anchor.ptr->states > 0);
1784 if (--cur->anchor.ptr->states <= 0) {
1785 pf_rm_rule(NULL, cur->anchor.ptr);
1786 }
1787 }
1788 pf_normalize_tcp_cleanup(cur);
1789 pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE);
1790 TAILQ_REMOVE(&state_list, cur, entry_list);
1791 if (cur->tag) {
1792 pf_tag_unref(cur->tag);
1793 }
1794 #if SKYWALK
1795 netns_release(&cur->nstoken);
1796 #endif
1797 pool_put(&pf_state_pl, cur);
1798 pf_status.fcounters[FCNT_STATE_REMOVALS]++;
1799 VERIFY(pf_status.states > 0);
1800 pf_status.states--;
1801 }
1802
1803 void
pf_purge_expired_states(u_int32_t maxcheck)1804 pf_purge_expired_states(u_int32_t maxcheck)
1805 {
1806 static struct pf_state *cur = NULL;
1807 struct pf_state *next;
1808
1809 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1810
1811 while (maxcheck--) {
1812 /* wrap to start of list when we hit the end */
1813 if (cur == NULL) {
1814 cur = TAILQ_FIRST(&state_list);
1815 if (cur == NULL) {
1816 break; /* list empty */
1817 }
1818 }
1819
1820 /* get next state, as cur may get deleted */
1821 next = TAILQ_NEXT(cur, entry_list);
1822
1823 if (cur->timeout == PFTM_UNLINKED) {
1824 pf_free_state(cur);
1825 } else if (pf_state_expires(cur) <= pf_time_second()) {
1826 /* unlink and free expired state */
1827 pf_unlink_state(cur);
1828 pf_free_state(cur);
1829 }
1830 cur = next;
1831 }
1832 }
1833
1834 int
pf_tbladdr_setup(struct pf_ruleset * rs,struct pf_addr_wrap * aw)1835 pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw)
1836 {
1837 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1838
1839 if (aw->type != PF_ADDR_TABLE) {
1840 return 0;
1841 }
1842 if ((aw->p.tbl = pfr_attach_table(rs, __unsafe_null_terminated_from_indexable(aw->v.tblname))) == NULL) {
1843 return 1;
1844 }
1845 return 0;
1846 }
1847
1848 void
pf_tbladdr_remove(struct pf_addr_wrap * aw)1849 pf_tbladdr_remove(struct pf_addr_wrap *aw)
1850 {
1851 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1852
1853 if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL) {
1854 return;
1855 }
1856 pfr_detach_table(aw->p.tbl);
1857 aw->p.tbl = NULL;
1858 }
1859
1860 void
pf_tbladdr_copyout(struct pf_addr_wrap * aw)1861 pf_tbladdr_copyout(struct pf_addr_wrap *aw)
1862 {
1863 struct pfr_ktable *kt = aw->p.tbl;
1864
1865 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
1866
1867 if (aw->type != PF_ADDR_TABLE || kt == NULL) {
1868 return;
1869 }
1870 if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) {
1871 kt = kt->pfrkt_root;
1872 }
1873 aw->p.tbl = NULL;
1874 aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ?
1875 kt->pfrkt_cnt : -1;
1876 }
1877
1878 static void
pf_print_addr(struct pf_addr * addr,sa_family_t af)1879 pf_print_addr(struct pf_addr *addr, sa_family_t af)
1880 {
1881 switch (af) {
1882 #if INET
1883 case AF_INET: {
1884 u_int32_t a = ntohl(addr->addr32[0]);
1885 printf("%u.%u.%u.%u", (a >> 24) & 255, (a >> 16) & 255,
1886 (a >> 8) & 255, a & 255);
1887 break;
1888 }
1889 #endif /* INET */
1890 case AF_INET6: {
1891 u_int16_t b;
1892 u_int8_t i, curstart = 255, curend = 0,
1893 maxstart = 0, maxend = 0;
1894 for (i = 0; i < 8; i++) {
1895 if (!addr->addr16[i]) {
1896 if (curstart == 255) {
1897 curstart = i;
1898 } else {
1899 curend = i;
1900 }
1901 } else {
1902 if (curstart) {
1903 if ((curend - curstart) >
1904 (maxend - maxstart)) {
1905 maxstart = curstart;
1906 maxend = curend;
1907 curstart = 255;
1908 }
1909 }
1910 }
1911 }
1912 for (i = 0; i < 8; i++) {
1913 if (i >= maxstart && i <= maxend) {
1914 if (maxend != 7) {
1915 if (i == maxstart) {
1916 printf(":");
1917 }
1918 } else {
1919 if (i == maxend) {
1920 printf(":");
1921 }
1922 }
1923 } else {
1924 b = ntohs(addr->addr16[i]);
1925 printf("%x", b);
1926 if (i < 7) {
1927 printf(":");
1928 }
1929 }
1930 }
1931 break;
1932 }
1933 }
1934 }
1935
1936 static void
pf_print_sk_host(struct pf_state_host * sh,sa_family_t af,int proto,u_int8_t proto_variant)1937 pf_print_sk_host(struct pf_state_host *sh, sa_family_t af, int proto,
1938 u_int8_t proto_variant)
1939 {
1940 pf_print_addr(&sh->addr, af);
1941
1942 switch (proto) {
1943 case IPPROTO_ESP:
1944 if (sh->xport.spi) {
1945 printf("[%08x]", ntohl(sh->xport.spi));
1946 }
1947 break;
1948
1949 case IPPROTO_GRE:
1950 if (proto_variant == PF_GRE_PPTP_VARIANT) {
1951 printf("[%u]", ntohs(sh->xport.call_id));
1952 }
1953 break;
1954
1955 case IPPROTO_TCP:
1956 case IPPROTO_UDP:
1957 printf("[%u]", ntohs(sh->xport.port));
1958 break;
1959
1960 default:
1961 break;
1962 }
1963 }
1964
1965 static void
pf_print_host(struct pf_addr * addr,u_int16_t p,sa_family_t af)1966 pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
1967 {
1968 pf_print_addr(addr, af);
1969 if (p) {
1970 printf("[%u]", ntohs(p));
1971 }
1972 }
1973
1974 void
pf_print_state(struct pf_state * s)1975 pf_print_state(struct pf_state *s)
1976 {
1977 struct pf_state_key *sk = s->state_key;
1978 switch (sk->proto) {
1979 case IPPROTO_ESP:
1980 printf("ESP ");
1981 break;
1982 case IPPROTO_GRE:
1983 printf("GRE%u ", sk->proto_variant);
1984 break;
1985 case IPPROTO_TCP:
1986 printf("TCP ");
1987 break;
1988 case IPPROTO_UDP:
1989 printf("UDP ");
1990 break;
1991 case IPPROTO_ICMP:
1992 printf("ICMP ");
1993 break;
1994 case IPPROTO_ICMPV6:
1995 printf("ICMPV6 ");
1996 break;
1997 default:
1998 printf("%u ", sk->proto);
1999 break;
2000 }
2001 pf_print_sk_host(&sk->lan, sk->af_lan, sk->proto, sk->proto_variant);
2002 printf(" ");
2003 pf_print_sk_host(&sk->gwy, sk->af_gwy, sk->proto, sk->proto_variant);
2004 printf(" ");
2005 pf_print_sk_host(&sk->ext_lan, sk->af_lan, sk->proto,
2006 sk->proto_variant);
2007 printf(" ");
2008 pf_print_sk_host(&sk->ext_gwy, sk->af_gwy, sk->proto,
2009 sk->proto_variant);
2010 printf(" [lo=%u high=%u win=%u modulator=%u", s->src.seqlo,
2011 s->src.seqhi, s->src.max_win, s->src.seqdiff);
2012 if (s->src.wscale && s->dst.wscale) {
2013 printf(" wscale=%u", s->src.wscale & PF_WSCALE_MASK);
2014 }
2015 printf("]");
2016 printf(" [lo=%u high=%u win=%u modulator=%u", s->dst.seqlo,
2017 s->dst.seqhi, s->dst.max_win, s->dst.seqdiff);
2018 if (s->src.wscale && s->dst.wscale) {
2019 printf(" wscale=%u", s->dst.wscale & PF_WSCALE_MASK);
2020 }
2021 printf("]");
2022 printf(" %u:%u", s->src.state, s->dst.state);
2023 }
2024
2025 void
pf_print_flags(u_int8_t f)2026 pf_print_flags(u_int8_t f)
2027 {
2028 if (f) {
2029 printf(" ");
2030 }
2031 if (f & TH_FIN) {
2032 printf("F");
2033 }
2034 if (f & TH_SYN) {
2035 printf("S");
2036 }
2037 if (f & TH_RST) {
2038 printf("R");
2039 }
2040 if (f & TH_PUSH) {
2041 printf("P");
2042 }
2043 if (f & TH_ACK) {
2044 printf("A");
2045 }
2046 if (f & TH_URG) {
2047 printf("U");
2048 }
2049 if (f & TH_ECE) {
2050 printf("E");
2051 }
2052 if (f & TH_CWR) {
2053 printf("W");
2054 }
2055 }
2056
2057 #define PF_SET_SKIP_STEPS(i) \
2058 do { \
2059 while (head[i] != cur) { \
2060 head[i]->skip[i].ptr = cur; \
2061 head[i] = TAILQ_NEXT(head[i], entries); \
2062 } \
2063 } while (0)
2064
2065 void
pf_calc_skip_steps(struct pf_rulequeue * rules)2066 pf_calc_skip_steps(struct pf_rulequeue *rules)
2067 {
2068 struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT];
2069 int i;
2070
2071 cur = TAILQ_FIRST(rules);
2072 prev = cur;
2073 for (i = 0; i < PF_SKIP_COUNT; ++i) {
2074 head[i] = cur;
2075 }
2076 while (cur != NULL) {
2077 if (cur->kif != prev->kif || cur->ifnot != prev->ifnot) {
2078 PF_SET_SKIP_STEPS(PF_SKIP_IFP);
2079 }
2080 if (cur->direction != prev->direction) {
2081 PF_SET_SKIP_STEPS(PF_SKIP_DIR);
2082 }
2083 if (cur->af != prev->af) {
2084 PF_SET_SKIP_STEPS(PF_SKIP_AF);
2085 }
2086 if (cur->proto != prev->proto) {
2087 PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
2088 }
2089 if (cur->src.neg != prev->src.neg ||
2090 pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr)) {
2091 PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
2092 }
2093 {
2094 union pf_rule_xport *cx = &cur->src.xport;
2095 union pf_rule_xport *px = &prev->src.xport;
2096
2097 switch (cur->proto) {
2098 case IPPROTO_GRE:
2099 case IPPROTO_ESP:
2100 PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
2101 break;
2102 default:
2103 if (prev->proto == IPPROTO_GRE ||
2104 prev->proto == IPPROTO_ESP ||
2105 cx->range.op != px->range.op ||
2106 cx->range.port[0] != px->range.port[0] ||
2107 cx->range.port[1] != px->range.port[1]) {
2108 PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
2109 }
2110 break;
2111 }
2112 }
2113 if (cur->dst.neg != prev->dst.neg ||
2114 pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr)) {
2115 PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
2116 }
2117 {
2118 union pf_rule_xport *cx = &cur->dst.xport;
2119 union pf_rule_xport *px = &prev->dst.xport;
2120
2121 switch (cur->proto) {
2122 case IPPROTO_GRE:
2123 if (cur->proto != prev->proto ||
2124 cx->call_id != px->call_id) {
2125 PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
2126 }
2127 break;
2128 case IPPROTO_ESP:
2129 if (cur->proto != prev->proto ||
2130 cx->spi != px->spi) {
2131 PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
2132 }
2133 break;
2134 default:
2135 if (prev->proto == IPPROTO_GRE ||
2136 prev->proto == IPPROTO_ESP ||
2137 cx->range.op != px->range.op ||
2138 cx->range.port[0] != px->range.port[0] ||
2139 cx->range.port[1] != px->range.port[1]) {
2140 PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
2141 }
2142 break;
2143 }
2144 }
2145
2146 prev = cur;
2147 cur = TAILQ_NEXT(cur, entries);
2148 }
2149 for (i = 0; i < PF_SKIP_COUNT; ++i) {
2150 PF_SET_SKIP_STEPS(i);
2151 }
2152 }
2153
2154 u_int32_t
pf_calc_state_key_flowhash(struct pf_state_key * sk)2155 pf_calc_state_key_flowhash(struct pf_state_key *sk)
2156 {
2157 #if SKYWALK
2158 uint32_t flowid;
2159 struct flowidns_flow_key fk;
2160
2161 VERIFY(sk->flowsrc == FLOWSRC_PF);
2162 bzero(&fk, sizeof(fk));
2163 static_assert(sizeof(sk->lan.addr) == sizeof(fk.ffk_laddr));
2164 static_assert(sizeof(sk->ext_lan.addr) == sizeof(fk.ffk_laddr));
2165 bcopy(&sk->lan.addr, &fk.ffk_laddr, sizeof(fk.ffk_laddr));
2166 bcopy(&sk->ext_lan.addr, &fk.ffk_raddr, sizeof(fk.ffk_raddr));
2167 fk.ffk_af = sk->af_lan;
2168 fk.ffk_proto = sk->proto;
2169
2170 switch (sk->proto) {
2171 case IPPROTO_ESP:
2172 case IPPROTO_AH:
2173 fk.ffk_spi = sk->lan.xport.spi;
2174 break;
2175 default:
2176 if (sk->lan.xport.spi <= sk->ext_lan.xport.spi) {
2177 fk.ffk_lport = sk->lan.xport.port;
2178 fk.ffk_rport = sk->ext_lan.xport.port;
2179 } else {
2180 fk.ffk_lport = sk->ext_lan.xport.port;
2181 fk.ffk_rport = sk->lan.xport.port;
2182 }
2183 break;
2184 }
2185
2186 flowidns_allocate_flowid(FLOWIDNS_DOMAIN_PF, &fk, &flowid);
2187 return flowid;
2188
2189 #else /* !SKYWALK */
2190
2191 struct pf_flowhash_key fh __attribute__((aligned(8)));
2192 uint32_t flowhash = 0;
2193
2194 bzero(&fh, sizeof(fh));
2195 if (PF_ALEQ(&sk->lan.addr, &sk->ext_lan.addr, sk->af_lan)) {
2196 bcopy(&sk->lan.addr, &fh.ap1.addr, sizeof(fh.ap1.addr));
2197 bcopy(&sk->ext_lan.addr, &fh.ap2.addr, sizeof(fh.ap2.addr));
2198 } else {
2199 bcopy(&sk->ext_lan.addr, &fh.ap1.addr, sizeof(fh.ap1.addr));
2200 bcopy(&sk->lan.addr, &fh.ap2.addr, sizeof(fh.ap2.addr));
2201 }
2202 if (sk->lan.xport.spi <= sk->ext_lan.xport.spi) {
2203 fh.ap1.xport.spi = sk->lan.xport.spi;
2204 fh.ap2.xport.spi = sk->ext_lan.xport.spi;
2205 } else {
2206 fh.ap1.xport.spi = sk->ext_lan.xport.spi;
2207 fh.ap2.xport.spi = sk->lan.xport.spi;
2208 }
2209 fh.af = sk->af_lan;
2210 fh.proto = sk->proto;
2211
2212 try_again:
2213 flowhash = net_flowhash(&fh, sizeof(fh), pf_hash_seed);
2214 if (flowhash == 0) {
2215 /* try to get a non-zero flowhash */
2216 pf_hash_seed = RandomULong();
2217 goto try_again;
2218 }
2219
2220 return flowhash;
2221
2222 #endif /* !SKYWALK */
2223 }
2224
2225 static int
pf_addr_wrap_neq(struct pf_addr_wrap * aw1,struct pf_addr_wrap * aw2)2226 pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
2227 {
2228 if (aw1->type != aw2->type) {
2229 return 1;
2230 }
2231 switch (aw1->type) {
2232 case PF_ADDR_ADDRMASK:
2233 case PF_ADDR_RANGE:
2234 if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, AF_INET6)) {
2235 return 1;
2236 }
2237 if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, AF_INET6)) {
2238 return 1;
2239 }
2240 return 0;
2241 case PF_ADDR_DYNIFTL:
2242 return aw1->p.dyn == NULL || aw2->p.dyn == NULL ||
2243 aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt;
2244 case PF_ADDR_NOROUTE:
2245 case PF_ADDR_URPFFAILED:
2246 return 0;
2247 case PF_ADDR_TABLE:
2248 return aw1->p.tbl != aw2->p.tbl;
2249 case PF_ADDR_RTLABEL:
2250 return aw1->v.rtlabel != aw2->v.rtlabel;
2251 default:
2252 printf("invalid address type: %d\n", aw1->type);
2253 return 1;
2254 }
2255 }
2256
2257 u_int16_t
pf_cksum_fixup(u_int16_t cksum,u_int16_t old,u_int16_t new,u_int8_t udp)2258 pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
2259 {
2260 return nat464_cksum_fixup(cksum, old, new, udp);
2261 }
2262
2263 /*
2264 * change ip address & port
2265 * dir : packet direction
2266 * a : address to be changed
2267 * p : port to be changed
2268 * ic : ip header checksum
2269 * pc : protocol checksum
2270 * an : new ip address
2271 * pn : new port
2272 * u : should be 1 if UDP packet else 0
2273 * af : address family of the packet
2274 * afn : address family of the new address
2275 * ua : should be 1 if ip address needs to be updated in the packet else
2276 * only the checksum is recalculated & updated.
2277 */
2278 static __attribute__((noinline)) void
pf_change_ap(int dir,pbuf_t * pbuf,struct pf_addr * a,u_int16_t * p,u_int16_t * ic,u_int16_t * pc,struct pf_addr * an,u_int16_t pn,u_int8_t u,sa_family_t af,sa_family_t afn,int ua)2279 pf_change_ap(int dir, pbuf_t *pbuf, struct pf_addr *a, u_int16_t *p,
2280 u_int16_t *ic, u_int16_t *pc, struct pf_addr *an, u_int16_t pn,
2281 u_int8_t u, sa_family_t af, sa_family_t afn, int ua)
2282 {
2283 struct pf_addr ao;
2284 u_int16_t po = *p;
2285
2286 PF_ACPY(&ao, a, af);
2287 if (ua) {
2288 PF_ACPY(a, an, afn);
2289 }
2290
2291 *p = pn;
2292
2293 switch (af) {
2294 #if INET
2295 case AF_INET:
2296 switch (afn) {
2297 case AF_INET:
2298 *ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
2299 ao.addr16[0], an->addr16[0], 0),
2300 ao.addr16[1], an->addr16[1], 0);
2301 *p = pn;
2302 /*
2303 * If the packet is originated from an ALG on the NAT gateway
2304 * (source address is loopback or local), in which case the
2305 * TCP/UDP checksum field contains the pseudo header checksum
2306 * that's not yet complemented.
2307 * In that case we do not need to fixup the checksum for port
2308 * translation as the pseudo header checksum doesn't include ports.
2309 *
2310 * A packet generated locally will have UDP/TCP CSUM flag
2311 * set (gets set in protocol output).
2312 *
2313 * It should be noted that the fixup doesn't do anything if the
2314 * checksum is 0.
2315 */
2316 if (dir == PF_OUT && pbuf != NULL &&
2317 (*pbuf->pb_csum_flags & (CSUM_TCP | CSUM_UDP))) {
2318 /* Pseudo-header checksum does not include ports */
2319 *pc = ~pf_cksum_fixup(pf_cksum_fixup(~*pc,
2320 ao.addr16[0], an->addr16[0], u),
2321 ao.addr16[1], an->addr16[1], u);
2322 } else {
2323 *pc =
2324 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2325 *pc, ao.addr16[0], an->addr16[0], u),
2326 ao.addr16[1], an->addr16[1], u),
2327 po, pn, u);
2328 }
2329 break;
2330 case AF_INET6:
2331 *p = pn;
2332 *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2333 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2334
2335 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
2336 ao.addr16[0], an->addr16[0], u),
2337 ao.addr16[1], an->addr16[1], u),
2338 0, an->addr16[2], u),
2339 0, an->addr16[3], u),
2340 0, an->addr16[4], u),
2341 0, an->addr16[5], u),
2342 0, an->addr16[6], u),
2343 0, an->addr16[7], u),
2344 po, pn, u);
2345 break;
2346 }
2347 break;
2348 #endif /* INET */
2349 case AF_INET6:
2350 switch (afn) {
2351 case AF_INET6:
2352 /*
2353 * If the packet is originated from an ALG on the NAT gateway
2354 * (source address is loopback or local), in which case the
2355 * TCP/UDP checksum field contains the pseudo header checksum
2356 * that's not yet complemented.
2357 * A packet generated locally
2358 * will have UDP/TCP CSUM flag set (gets set in protocol
2359 * output).
2360 */
2361 if (dir == PF_OUT && pbuf != NULL &&
2362 (*pbuf->pb_csum_flags & (CSUM_TCPIPV6 |
2363 CSUM_UDPIPV6))) {
2364 /* Pseudo-header checksum does not include ports */
2365 *pc =
2366 ~pf_cksum_fixup(pf_cksum_fixup(
2367 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2368 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2369 ~*pc,
2370 ao.addr16[0], an->addr16[0], u),
2371 ao.addr16[1], an->addr16[1], u),
2372 ao.addr16[2], an->addr16[2], u),
2373 ao.addr16[3], an->addr16[3], u),
2374 ao.addr16[4], an->addr16[4], u),
2375 ao.addr16[5], an->addr16[5], u),
2376 ao.addr16[6], an->addr16[6], u),
2377 ao.addr16[7], an->addr16[7], u);
2378 } else {
2379 *pc =
2380 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2381 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2382 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2383 *pc,
2384 ao.addr16[0], an->addr16[0], u),
2385 ao.addr16[1], an->addr16[1], u),
2386 ao.addr16[2], an->addr16[2], u),
2387 ao.addr16[3], an->addr16[3], u),
2388 ao.addr16[4], an->addr16[4], u),
2389 ao.addr16[5], an->addr16[5], u),
2390 ao.addr16[6], an->addr16[6], u),
2391 ao.addr16[7], an->addr16[7], u),
2392 po, pn, u);
2393 }
2394 break;
2395 #ifdef INET
2396 case AF_INET:
2397 *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2398 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2399 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
2400 ao.addr16[0], an->addr16[0], u),
2401 ao.addr16[1], an->addr16[1], u),
2402 ao.addr16[2], 0, u),
2403 ao.addr16[3], 0, u),
2404 ao.addr16[4], 0, u),
2405 ao.addr16[5], 0, u),
2406 ao.addr16[6], 0, u),
2407 ao.addr16[7], 0, u),
2408 po, pn, u);
2409 break;
2410 #endif /* INET */
2411 }
2412 break;
2413 }
2414 }
2415
2416
2417 /* Changes a u_int32_t. Uses a void * so there are no align restrictions */
2418 void
pf_change_a(void * a,u_int16_t * c,u_int32_t an,u_int8_t u)2419 pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u)
2420 {
2421 u_int32_t ao;
2422
2423 memcpy(&ao, (uint32_t *)a, sizeof(ao));
2424 memcpy((uint32_t *)a, &an, sizeof(u_int32_t));
2425 *c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u),
2426 ao % 65536, an % 65536, u);
2427 }
2428
2429 static __attribute__((noinline)) void
pf_change_a6(struct pf_addr * a,u_int16_t * c,struct pf_addr * an,u_int8_t u)2430 pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
2431 {
2432 struct pf_addr ao;
2433
2434 PF_ACPY(&ao, a, AF_INET6);
2435 PF_ACPY(a, an, AF_INET6);
2436
2437 *c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2438 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2439 pf_cksum_fixup(pf_cksum_fixup(*c,
2440 ao.addr16[0], an->addr16[0], u),
2441 ao.addr16[1], an->addr16[1], u),
2442 ao.addr16[2], an->addr16[2], u),
2443 ao.addr16[3], an->addr16[3], u),
2444 ao.addr16[4], an->addr16[4], u),
2445 ao.addr16[5], an->addr16[5], u),
2446 ao.addr16[6], an->addr16[6], u),
2447 ao.addr16[7], an->addr16[7], u);
2448 }
2449
2450 static __attribute__((noinline)) void
pf_change_addr(struct pf_addr * a,u_int16_t * c,struct pf_addr * an,u_int8_t u,sa_family_t af,sa_family_t afn)2451 pf_change_addr(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u,
2452 sa_family_t af, sa_family_t afn)
2453 {
2454 struct pf_addr ao;
2455
2456 if (af != afn) {
2457 PF_ACPY(&ao, a, af);
2458 PF_ACPY(a, an, afn);
2459 }
2460
2461 switch (af) {
2462 case AF_INET:
2463 switch (afn) {
2464 case AF_INET:
2465 pf_change_a(a, c, an->v4addr.s_addr, u);
2466 break;
2467 case AF_INET6:
2468 *c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2469 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2470 pf_cksum_fixup(pf_cksum_fixup(*c,
2471 ao.addr16[0], an->addr16[0], u),
2472 ao.addr16[1], an->addr16[1], u),
2473 0, an->addr16[2], u),
2474 0, an->addr16[3], u),
2475 0, an->addr16[4], u),
2476 0, an->addr16[5], u),
2477 0, an->addr16[6], u),
2478 0, an->addr16[7], u);
2479 break;
2480 }
2481 break;
2482 case AF_INET6:
2483 switch (afn) {
2484 case AF_INET:
2485 *c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2486 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2487 pf_cksum_fixup(pf_cksum_fixup(*c,
2488 ao.addr16[0], an->addr16[0], u),
2489 ao.addr16[1], an->addr16[1], u),
2490 ao.addr16[2], 0, u),
2491 ao.addr16[3], 0, u),
2492 ao.addr16[4], 0, u),
2493 ao.addr16[5], 0, u),
2494 ao.addr16[6], 0, u),
2495 ao.addr16[7], 0, u);
2496 break;
2497 case AF_INET6:
2498 pf_change_a6(a, c, an, u);
2499 break;
2500 }
2501 break;
2502 }
2503 }
2504
2505 static __attribute__((noinline)) void
pf_change_icmp(struct pf_addr * ia,u_int16_t * ip,struct pf_addr * oa,struct pf_addr * na,u_int16_t np,u_int16_t * pc,u_int16_t * h2c,u_int16_t * ic,u_int16_t * hc,u_int8_t u,sa_family_t af)2506 pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
2507 struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c,
2508 u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af)
2509 {
2510 struct pf_addr oia, ooa;
2511
2512 PF_ACPY(&oia, ia, af);
2513 PF_ACPY(&ooa, oa, af);
2514
2515 /* Change inner protocol port, fix inner protocol checksum. */
2516 if (ip != NULL) {
2517 u_int16_t oip = *ip;
2518 u_int32_t opc = 0;
2519
2520 if (pc != NULL) {
2521 opc = *pc;
2522 }
2523 *ip = np;
2524 if (pc != NULL) {
2525 *pc = pf_cksum_fixup(*pc, oip, *ip, u);
2526 }
2527 *ic = pf_cksum_fixup(*ic, oip, *ip, 0);
2528 if (pc != NULL) {
2529 *ic = pf_cksum_fixup(*ic, opc, *pc, 0);
2530 }
2531 }
2532 /* Change inner ip address, fix inner ip and icmp checksums. */
2533 PF_ACPY(ia, na, af);
2534 switch (af) {
2535 #if INET
2536 case AF_INET: {
2537 u_int32_t oh2c = *h2c;
2538
2539 *h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c,
2540 oia.addr16[0], ia->addr16[0], 0),
2541 oia.addr16[1], ia->addr16[1], 0);
2542 *ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
2543 oia.addr16[0], ia->addr16[0], 0),
2544 oia.addr16[1], ia->addr16[1], 0);
2545 *ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0);
2546 break;
2547 }
2548 #endif /* INET */
2549 case AF_INET6:
2550 *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2551 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2552 pf_cksum_fixup(pf_cksum_fixup(*ic,
2553 oia.addr16[0], ia->addr16[0], u),
2554 oia.addr16[1], ia->addr16[1], u),
2555 oia.addr16[2], ia->addr16[2], u),
2556 oia.addr16[3], ia->addr16[3], u),
2557 oia.addr16[4], ia->addr16[4], u),
2558 oia.addr16[5], ia->addr16[5], u),
2559 oia.addr16[6], ia->addr16[6], u),
2560 oia.addr16[7], ia->addr16[7], u);
2561 break;
2562 }
2563 /* Change outer ip address, fix outer ip or icmpv6 checksum. */
2564 PF_ACPY(oa, na, af);
2565 switch (af) {
2566 #if INET
2567 case AF_INET:
2568 *hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
2569 ooa.addr16[0], oa->addr16[0], 0),
2570 ooa.addr16[1], oa->addr16[1], 0);
2571 break;
2572 #endif /* INET */
2573 case AF_INET6:
2574 *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2575 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
2576 pf_cksum_fixup(pf_cksum_fixup(*ic,
2577 ooa.addr16[0], oa->addr16[0], u),
2578 ooa.addr16[1], oa->addr16[1], u),
2579 ooa.addr16[2], oa->addr16[2], u),
2580 ooa.addr16[3], oa->addr16[3], u),
2581 ooa.addr16[4], oa->addr16[4], u),
2582 ooa.addr16[5], oa->addr16[5], u),
2583 ooa.addr16[6], oa->addr16[6], u),
2584 ooa.addr16[7], oa->addr16[7], u);
2585 break;
2586 }
2587 }
2588
2589
2590 /*
2591 * Need to modulate the sequence numbers in the TCP SACK option
2592 * (credits to Krzysztof Pfaff for report and patch)
2593 */
2594 static __attribute__((noinline)) int
pf_modulate_sack(pbuf_t * pbuf,int off,struct pf_pdesc * pd,struct tcphdr * th,struct pf_state_peer * dst)2595 pf_modulate_sack(pbuf_t *pbuf, int off, struct pf_pdesc *pd,
2596 struct tcphdr *th, struct pf_state_peer *dst)
2597 {
2598 int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen;
2599 u_int8_t opts[MAX_TCPOPTLEN], *opt = opts;
2600 int copyback = 0, i, olen;
2601 struct sackblk sack;
2602
2603 #define TCPOLEN_SACKLEN (TCPOLEN_SACK + 2)
2604 if (hlen < TCPOLEN_SACKLEN ||
2605 !pf_pull_hdr(pbuf, off + sizeof(*th), opts, sizeof(opts), hlen, NULL, NULL, pd->af)) {
2606 return 0;
2607 }
2608
2609 while (hlen >= TCPOLEN_SACKLEN) {
2610 olen = opt[1];
2611 switch (*opt) {
2612 case TCPOPT_EOL: /* FALLTHROUGH */
2613 case TCPOPT_NOP:
2614 opt++;
2615 hlen--;
2616 break;
2617 case TCPOPT_SACK:
2618 if (olen > hlen) {
2619 olen = hlen;
2620 }
2621 if (olen >= TCPOLEN_SACKLEN) {
2622 for (i = 2; i + TCPOLEN_SACK <= olen;
2623 i += TCPOLEN_SACK) {
2624 memcpy(&sack, &opt[i], sizeof(sack));
2625 pf_change_a(&sack.start, &th->th_sum,
2626 htonl(ntohl(sack.start) -
2627 dst->seqdiff), 0);
2628 pf_change_a(&sack.end, &th->th_sum,
2629 htonl(ntohl(sack.end) -
2630 dst->seqdiff), 0);
2631 memcpy(&opt[i], &sack, sizeof(sack));
2632 }
2633 copyback = off + sizeof(*th) + thoptlen;
2634 }
2635 OS_FALLTHROUGH;
2636 default:
2637 if (olen < 2) {
2638 olen = 2;
2639 }
2640 hlen -= olen;
2641 opt += olen;
2642 }
2643 }
2644
2645 if (copyback) {
2646 if (pf_lazy_makewritable(pd, pbuf, copyback) == NULL) {
2647 return -1;
2648 }
2649 pbuf_copy_back(pbuf, off + sizeof(*th), thoptlen, opts, sizeof(opts));
2650 }
2651 return copyback;
2652 }
2653
2654 /*
2655 * XXX
2656 *
2657 * The following functions (pf_send_tcp and pf_send_icmp) are somewhat
2658 * special in that they originate "spurious" packets rather than
2659 * filter/NAT existing packets. As such, they're not a great fit for
2660 * the 'pbuf' shim, which assumes the underlying packet buffers are
2661 * allocated elsewhere.
2662 *
2663 * Since these functions are rarely used, we'll carry on allocating mbufs
2664 * and passing them to the IP stack for eventual routing.
2665 */
2666 static __attribute__((noinline)) void
pf_send_tcp(const struct pf_rule * r,sa_family_t af,const struct pf_addr * saddr,const struct pf_addr * daddr,u_int16_t sport,u_int16_t dport,u_int32_t seq,u_int32_t ack,u_int8_t flags,u_int16_t win,u_int16_t mss,u_int8_t ttl,int tag,u_int16_t rtag,struct ether_header * eh,struct ifnet * ifp)2667 pf_send_tcp(const struct pf_rule *r, sa_family_t af,
2668 const struct pf_addr *saddr, const struct pf_addr *daddr,
2669 u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
2670 u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag,
2671 u_int16_t rtag, struct ether_header *eh, struct ifnet *ifp)
2672 {
2673 #pragma unused(eh, ifp)
2674 struct mbuf *m;
2675 int len, tlen;
2676 #if INET
2677 struct ip *h = NULL;
2678 #endif /* INET */
2679 struct ip6_hdr *h6 = NULL;
2680 struct tcphdr *th = NULL;
2681 char *opt;
2682 struct pf_mtag *pf_mtag;
2683
2684 /* maximum segment size tcp option */
2685 tlen = sizeof(struct tcphdr);
2686 if (mss) {
2687 tlen += 4;
2688 }
2689
2690 switch (af) {
2691 #if INET
2692 case AF_INET:
2693 len = sizeof(struct ip) + tlen;
2694 break;
2695 #endif /* INET */
2696 case AF_INET6:
2697 len = sizeof(struct ip6_hdr) + tlen;
2698 break;
2699 default:
2700 panic("pf_send_tcp: not AF_INET or AF_INET6!");
2701 return;
2702 }
2703
2704 /* create outgoing mbuf */
2705 m = m_gethdr(M_DONTWAIT, MT_HEADER);
2706 if (m == NULL) {
2707 return;
2708 }
2709
2710 if ((pf_mtag = pf_get_mtag(m)) == NULL) {
2711 return;
2712 }
2713
2714 if (tag) {
2715 pf_mtag->pftag_flags |= PF_TAG_GENERATED;
2716 }
2717 pf_mtag->pftag_tag = rtag;
2718
2719 if (r != NULL && PF_RTABLEID_IS_VALID(r->rtableid)) {
2720 pf_mtag->pftag_rtableid = r->rtableid;
2721 }
2722
2723 #if PF_ECN
2724 /* add hints for ecn */
2725 pf_mtag->pftag_hdr = mtod(m, struct ip *);
2726 /* record address family */
2727 pf_mtag->pftag_flags &= ~(PF_TAG_HDR_INET | PF_TAG_HDR_INET6);
2728 switch (af) {
2729 #if INET
2730 case AF_INET:
2731 pf_mtag->pftag_flags |= PF_TAG_HDR_INET;
2732 break;
2733 #endif /* INET */
2734 case AF_INET6:
2735 pf_mtag->pftag_flags |= PF_TAG_HDR_INET6;
2736 break;
2737 }
2738 #endif /* PF_ECN */
2739
2740 /* indicate this is TCP */
2741 m->m_pkthdr.pkt_proto = IPPROTO_TCP;
2742
2743 /* Make sure headers are 32-bit aligned */
2744 m->m_data += max_linkhdr;
2745 m->m_pkthdr.len = m->m_len = len;
2746 m->m_pkthdr.rcvif = NULL;
2747 bzero(m_mtod_current(m), len);
2748 switch (af) {
2749 #if INET
2750 case AF_INET:
2751 h = mtod(m, struct ip *);
2752
2753 /* IP header fields included in the TCP checksum */
2754 h->ip_p = IPPROTO_TCP;
2755 h->ip_len = htons(tlen);
2756 h->ip_src.s_addr = saddr->v4addr.s_addr;
2757 h->ip_dst.s_addr = daddr->v4addr.s_addr;
2758
2759 th = (struct tcphdr *)(void *)((caddr_t)h + sizeof(struct ip));
2760 break;
2761 #endif /* INET */
2762 case AF_INET6:
2763 h6 = mtod(m, struct ip6_hdr *);
2764
2765 /* IP header fields included in the TCP checksum */
2766 h6->ip6_nxt = IPPROTO_TCP;
2767 h6->ip6_plen = htons(tlen);
2768 memcpy((void *)&h6->ip6_src, &saddr->v6addr, sizeof(struct in6_addr));
2769 memcpy((void *)&h6->ip6_dst, &daddr->v6addr, sizeof(struct in6_addr));
2770
2771 th = (struct tcphdr *)(void *)
2772 ((caddr_t)h6 + sizeof(struct ip6_hdr));
2773 break;
2774 }
2775
2776 /* TCP header */
2777 th->th_sport = sport;
2778 th->th_dport = dport;
2779 th->th_seq = htonl(seq);
2780 th->th_ack = htonl(ack);
2781 th->th_off = tlen >> 2;
2782 th->th_flags = flags;
2783 th->th_win = htons(win);
2784
2785 if (mss) {
2786 opt = (char *)(th + 1);
2787 opt[0] = TCPOPT_MAXSEG;
2788 opt[1] = 4;
2789 #if BYTE_ORDER != BIG_ENDIAN
2790 HTONS(mss);
2791 #endif
2792 bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2);
2793 }
2794
2795 switch (af) {
2796 #if INET
2797 case AF_INET: {
2798 struct route ro;
2799
2800 /* TCP checksum */
2801 th->th_sum = in_cksum(m, len);
2802
2803 /* Finish the IP header */
2804 h->ip_v = 4;
2805 h->ip_hl = sizeof(*h) >> 2;
2806 h->ip_tos = IPTOS_LOWDELAY;
2807 /*
2808 * ip_output() expects ip_len and ip_off to be in host order.
2809 */
2810 h->ip_len = len;
2811 h->ip_off = (path_mtu_discovery ? IP_DF : 0);
2812 h->ip_ttl = ttl ? ttl : ip_defttl;
2813 h->ip_sum = 0;
2814
2815 bzero(&ro, sizeof(ro));
2816 ip_output(m, NULL, &ro, 0, NULL, NULL);
2817 ROUTE_RELEASE(&ro);
2818 break;
2819 }
2820 #endif /* INET */
2821 case AF_INET6: {
2822 struct route_in6 ro6;
2823
2824 /* TCP checksum */
2825 th->th_sum = in6_cksum(m, IPPROTO_TCP,
2826 sizeof(struct ip6_hdr), tlen);
2827
2828 h6->ip6_vfc |= IPV6_VERSION;
2829 h6->ip6_hlim = IPV6_DEFHLIM;
2830
2831 ip6_output_setsrcifscope(m, IFSCOPE_UNKNOWN, NULL);
2832 ip6_output_setdstifscope(m, IFSCOPE_UNKNOWN, NULL);
2833 bzero(&ro6, sizeof(ro6));
2834 ip6_output(m, NULL, &ro6, 0, NULL, NULL, NULL);
2835 ROUTE_RELEASE(&ro6);
2836 break;
2837 }
2838 }
2839 }
2840
2841 static __attribute__((noinline)) void
pf_send_icmp(pbuf_t * pbuf,u_int8_t type,u_int8_t code,sa_family_t af,struct pf_rule * r)2842 pf_send_icmp(pbuf_t *pbuf, u_int8_t type, u_int8_t code, sa_family_t af,
2843 struct pf_rule *r)
2844 {
2845 struct mbuf *m0;
2846 struct pf_mtag *pf_mtag;
2847
2848 m0 = pbuf_clone_to_mbuf(pbuf);
2849 if (m0 == NULL) {
2850 return;
2851 }
2852
2853 if ((pf_mtag = pf_get_mtag(m0)) == NULL) {
2854 return;
2855 }
2856
2857 pf_mtag->pftag_flags |= PF_TAG_GENERATED;
2858
2859 if (PF_RTABLEID_IS_VALID(r->rtableid)) {
2860 pf_mtag->pftag_rtableid = r->rtableid;
2861 }
2862
2863 #if PF_ECN
2864 /* add hints for ecn */
2865 pf_mtag->pftag_hdr = mtod(m0, struct ip *);
2866 /* record address family */
2867 pf_mtag->pftag_flags &= ~(PF_TAG_HDR_INET | PF_TAG_HDR_INET6);
2868 switch (af) {
2869 #if INET
2870 case AF_INET:
2871 pf_mtag->pftag_flags |= PF_TAG_HDR_INET;
2872 m0->m_pkthdr.pkt_proto = IPPROTO_ICMP;
2873 break;
2874 #endif /* INET */
2875 case AF_INET6:
2876 pf_mtag->pftag_flags |= PF_TAG_HDR_INET6;
2877 m0->m_pkthdr.pkt_proto = IPPROTO_ICMPV6;
2878 break;
2879 }
2880 #endif /* PF_ECN */
2881
2882 switch (af) {
2883 #if INET
2884 case AF_INET:
2885 icmp_error(m0, type, code, 0, 0);
2886 break;
2887 #endif /* INET */
2888 case AF_INET6:
2889 icmp6_error(m0, type, code, 0);
2890 break;
2891 }
2892 }
2893
2894 /*
2895 * Return 1 if the addresses a and b match (with mask m), otherwise return 0.
2896 * If n is 0, they match if they are equal. If n is != 0, they match if they
2897 * are different.
2898 */
2899 int
pf_match_addr(u_int8_t n,struct pf_addr * a,struct pf_addr * m,struct pf_addr * b,sa_family_t af)2900 pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m,
2901 struct pf_addr *b, sa_family_t af)
2902 {
2903 int match = 0;
2904
2905 switch (af) {
2906 #if INET
2907 case AF_INET:
2908 if ((a->addr32[0] & m->addr32[0]) ==
2909 (b->addr32[0] & m->addr32[0])) {
2910 match++;
2911 }
2912 break;
2913 #endif /* INET */
2914 case AF_INET6:
2915 if (((a->addr32[0] & m->addr32[0]) ==
2916 (b->addr32[0] & m->addr32[0])) &&
2917 ((a->addr32[1] & m->addr32[1]) ==
2918 (b->addr32[1] & m->addr32[1])) &&
2919 ((a->addr32[2] & m->addr32[2]) ==
2920 (b->addr32[2] & m->addr32[2])) &&
2921 ((a->addr32[3] & m->addr32[3]) ==
2922 (b->addr32[3] & m->addr32[3]))) {
2923 match++;
2924 }
2925 break;
2926 }
2927 if (match) {
2928 if (n) {
2929 return 0;
2930 } else {
2931 return 1;
2932 }
2933 } else {
2934 if (n) {
2935 return 1;
2936 } else {
2937 return 0;
2938 }
2939 }
2940 }
2941
2942 /*
2943 * Return 1 if b <= a <= e, otherwise return 0.
2944 */
2945 int
pf_match_addr_range(struct pf_addr * b,struct pf_addr * e,struct pf_addr * a,sa_family_t af)2946 pf_match_addr_range(struct pf_addr *b, struct pf_addr *e,
2947 struct pf_addr *a, sa_family_t af)
2948 {
2949 switch (af) {
2950 #if INET
2951 case AF_INET:
2952 if ((a->addr32[0] < b->addr32[0]) ||
2953 (a->addr32[0] > e->addr32[0])) {
2954 return 0;
2955 }
2956 break;
2957 #endif /* INET */
2958 case AF_INET6: {
2959 int i;
2960
2961 /* check a >= b */
2962 for (i = 0; i < 4; ++i) {
2963 if (a->addr32[i] > b->addr32[i]) {
2964 break;
2965 } else if (a->addr32[i] < b->addr32[i]) {
2966 return 0;
2967 }
2968 }
2969 /* check a <= e */
2970 for (i = 0; i < 4; ++i) {
2971 if (a->addr32[i] < e->addr32[i]) {
2972 break;
2973 } else if (a->addr32[i] > e->addr32[i]) {
2974 return 0;
2975 }
2976 }
2977 break;
2978 }
2979 }
2980 return 1;
2981 }
2982
2983 int
pf_match(u_int8_t op,u_int32_t a1,u_int32_t a2,u_int32_t p)2984 pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
2985 {
2986 switch (op) {
2987 case PF_OP_IRG:
2988 return (p > a1) && (p < a2);
2989 case PF_OP_XRG:
2990 return (p < a1) || (p > a2);
2991 case PF_OP_RRG:
2992 return (p >= a1) && (p <= a2);
2993 case PF_OP_EQ:
2994 return p == a1;
2995 case PF_OP_NE:
2996 return p != a1;
2997 case PF_OP_LT:
2998 return p < a1;
2999 case PF_OP_LE:
3000 return p <= a1;
3001 case PF_OP_GT:
3002 return p > a1;
3003 case PF_OP_GE:
3004 return p >= a1;
3005 }
3006 return 0; /* never reached */
3007 }
3008
3009 int
pf_match_port(u_int8_t op,u_int16_t a1,u_int16_t a2,u_int16_t p)3010 pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
3011 {
3012 #if BYTE_ORDER != BIG_ENDIAN
3013 NTOHS(a1);
3014 NTOHS(a2);
3015 NTOHS(p);
3016 #endif
3017 return pf_match(op, a1, a2, p);
3018 }
3019
3020 int
pf_match_xport(u_int8_t proto,u_int8_t proto_variant,union pf_rule_xport * rx,union pf_state_xport * sx)3021 pf_match_xport(u_int8_t proto, u_int8_t proto_variant, union pf_rule_xport *rx,
3022 union pf_state_xport *sx)
3023 {
3024 int d = !0;
3025
3026 if (sx) {
3027 switch (proto) {
3028 case IPPROTO_GRE:
3029 if (proto_variant == PF_GRE_PPTP_VARIANT) {
3030 d = (rx->call_id == sx->call_id);
3031 }
3032 break;
3033
3034 case IPPROTO_ESP:
3035 d = (rx->spi == sx->spi);
3036 break;
3037
3038 case IPPROTO_TCP:
3039 case IPPROTO_UDP:
3040 case IPPROTO_ICMP:
3041 case IPPROTO_ICMPV6:
3042 if (rx->range.op) {
3043 d = pf_match_port(rx->range.op,
3044 rx->range.port[0], rx->range.port[1],
3045 sx->port);
3046 }
3047 break;
3048
3049 default:
3050 break;
3051 }
3052 }
3053
3054 return d;
3055 }
3056
3057 int
pf_match_uid(u_int8_t op,uid_t a1,uid_t a2,uid_t u)3058 pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
3059 {
3060 if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE) {
3061 return 0;
3062 }
3063 return pf_match(op, a1, a2, u);
3064 }
3065
3066 int
pf_match_gid(u_int8_t op,gid_t a1,gid_t a2,gid_t g)3067 pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
3068 {
3069 if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE) {
3070 return 0;
3071 }
3072 return pf_match(op, a1, a2, g);
3073 }
3074
3075 static int
pf_match_tag(struct pf_rule * r,struct pf_mtag * pf_mtag,int * tag)3076 pf_match_tag(struct pf_rule *r, struct pf_mtag *pf_mtag,
3077 int *tag)
3078 {
3079 if (*tag == -1) {
3080 *tag = pf_mtag->pftag_tag;
3081 }
3082
3083 return (!r->match_tag_not && r->match_tag == *tag) ||
3084 (r->match_tag_not && r->match_tag != *tag);
3085 }
3086
3087 int
pf_tag_packet(pbuf_t * pbuf,struct pf_mtag * pf_mtag,int tag,unsigned int rtableid,struct pf_pdesc * pd)3088 pf_tag_packet(pbuf_t *pbuf, struct pf_mtag *pf_mtag, int tag,
3089 unsigned int rtableid, struct pf_pdesc *pd)
3090 {
3091 if (tag <= 0 && !PF_RTABLEID_IS_VALID(rtableid) &&
3092 (pd == NULL || !(pd->pktflags & PKTF_FLOW_ID))) {
3093 return 0;
3094 }
3095
3096 if (pf_mtag == NULL && (pf_mtag = pf_get_mtag_pbuf(pbuf)) == NULL) {
3097 return 1;
3098 }
3099
3100 if (tag > 0) {
3101 pf_mtag->pftag_tag = tag;
3102 }
3103 if (PF_RTABLEID_IS_VALID(rtableid)) {
3104 pf_mtag->pftag_rtableid = rtableid;
3105 }
3106 if (pd != NULL && (pd->pktflags & PKTF_FLOW_ID)) {
3107 *pbuf->pb_flowsrc = pd->flowsrc;
3108 *pbuf->pb_flowid = pd->flowhash;
3109 *pbuf->pb_flags |= pd->pktflags;
3110 *pbuf->pb_proto = pd->proto;
3111 }
3112
3113 return 0;
3114 }
3115
3116 void
pf_step_into_anchor(int * depth,struct pf_ruleset ** rs,int n,struct pf_rule ** r,struct pf_rule ** a,int * match)3117 pf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n,
3118 struct pf_rule **r, struct pf_rule **a, int *match)
3119 {
3120 struct pf_anchor_stackframe *f;
3121
3122 (*r)->anchor->match = 0;
3123 if (match) {
3124 *match = 0;
3125 }
3126 if (*depth >= (int)sizeof(pf_anchor_stack) /
3127 (int)sizeof(pf_anchor_stack[0])) {
3128 printf("pf_step_into_anchor: stack overflow\n");
3129 *r = TAILQ_NEXT(*r, entries);
3130 return;
3131 } else if (*depth == 0 && a != NULL) {
3132 *a = *r;
3133 }
3134 f = pf_anchor_stack + (*depth)++;
3135 f->rs = *rs;
3136 f->r = *r;
3137 if ((*r)->anchor_wildcard) {
3138 f->parent = &(*r)->anchor->children;
3139 if ((f->child = RB_MIN(pf_anchor_node, f->parent)) ==
3140 NULL) {
3141 *r = NULL;
3142 return;
3143 }
3144 *rs = &f->child->ruleset;
3145 } else {
3146 f->parent = NULL;
3147 f->child = NULL;
3148 *rs = &(*r)->anchor->ruleset;
3149 }
3150 *r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
3151 }
3152
3153 int
pf_step_out_of_anchor(int * depth,struct pf_ruleset ** rs,int n,struct pf_rule ** r,struct pf_rule ** a,int * match)3154 pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n,
3155 struct pf_rule **r, struct pf_rule **a, int *match)
3156 {
3157 struct pf_anchor_stackframe *f;
3158 int quick = 0;
3159
3160 do {
3161 if (*depth <= 0) {
3162 break;
3163 }
3164 f = pf_anchor_stack + *depth - 1;
3165 if (f->parent != NULL && f->child != NULL) {
3166 if (f->child->match ||
3167 (match != NULL && *match)) {
3168 f->r->anchor->match = 1;
3169 if (match) {
3170 *match = 0;
3171 }
3172 }
3173 f->child = RB_NEXT(pf_anchor_node, f->parent, f->child);
3174 if (f->child != NULL) {
3175 *rs = &f->child->ruleset;
3176 *r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
3177 if (*r == NULL) {
3178 continue;
3179 } else {
3180 break;
3181 }
3182 }
3183 }
3184 (*depth)--;
3185 if (*depth == 0 && a != NULL) {
3186 *a = NULL;
3187 }
3188 *rs = f->rs;
3189 if (f->r->anchor->match || (match != NULL && *match)) {
3190 quick = f->r->quick;
3191 }
3192 *r = TAILQ_NEXT(f->r, entries);
3193 } while (*r == NULL);
3194
3195 return quick;
3196 }
3197
3198 void
pf_poolmask(struct pf_addr * naddr,struct pf_addr * raddr,struct pf_addr * rmask,struct pf_addr * saddr,sa_family_t af)3199 pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
3200 struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
3201 {
3202 switch (af) {
3203 #if INET
3204 case AF_INET:
3205 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
3206 ((rmask->addr32[0] ^ 0xffffffff) & saddr->addr32[0]);
3207 break;
3208 #endif /* INET */
3209 case AF_INET6:
3210 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
3211 ((rmask->addr32[0] ^ 0xffffffff) & saddr->addr32[0]);
3212 naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
3213 ((rmask->addr32[1] ^ 0xffffffff) & saddr->addr32[1]);
3214 naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
3215 ((rmask->addr32[2] ^ 0xffffffff) & saddr->addr32[2]);
3216 naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
3217 ((rmask->addr32[3] ^ 0xffffffff) & saddr->addr32[3]);
3218 break;
3219 }
3220 }
3221
3222 void
pf_addr_inc(struct pf_addr * addr,sa_family_t af)3223 pf_addr_inc(struct pf_addr *addr, sa_family_t af)
3224 {
3225 switch (af) {
3226 #if INET
3227 case AF_INET:
3228 addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
3229 break;
3230 #endif /* INET */
3231 case AF_INET6:
3232 if (addr->addr32[3] == 0xffffffff) {
3233 addr->addr32[3] = 0;
3234 if (addr->addr32[2] == 0xffffffff) {
3235 addr->addr32[2] = 0;
3236 if (addr->addr32[1] == 0xffffffff) {
3237 addr->addr32[1] = 0;
3238 addr->addr32[0] =
3239 htonl(ntohl(addr->addr32[0]) + 1);
3240 } else {
3241 addr->addr32[1] =
3242 htonl(ntohl(addr->addr32[1]) + 1);
3243 }
3244 } else {
3245 addr->addr32[2] =
3246 htonl(ntohl(addr->addr32[2]) + 1);
3247 }
3248 } else {
3249 addr->addr32[3] =
3250 htonl(ntohl(addr->addr32[3]) + 1);
3251 }
3252 break;
3253 }
3254 }
3255
3256 #define mix(a, b, c) \
3257 do { \
3258 a -= b; a -= c; a ^= (c >> 13); \
3259 b -= c; b -= a; b ^= (a << 8); \
3260 c -= a; c -= b; c ^= (b >> 13); \
3261 a -= b; a -= c; a ^= (c >> 12); \
3262 b -= c; b -= a; b ^= (a << 16); \
3263 c -= a; c -= b; c ^= (b >> 5); \
3264 a -= b; a -= c; a ^= (c >> 3); \
3265 b -= c; b -= a; b ^= (a << 10); \
3266 c -= a; c -= b; c ^= (b >> 15); \
3267 } while (0)
3268
3269 /*
3270 * hash function based on bridge_hash in if_bridge.c
3271 */
3272 static void
pf_hash(struct pf_addr * inaddr,struct pf_addr * hash,struct pf_poolhashkey * key,sa_family_t af)3273 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
3274 struct pf_poolhashkey *key, sa_family_t af)
3275 {
3276 u_int32_t a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
3277
3278 switch (af) {
3279 #if INET
3280 case AF_INET:
3281 a += inaddr->addr32[0];
3282 b += key->key32[1];
3283 mix(a, b, c);
3284 hash->addr32[0] = c + key->key32[2];
3285 break;
3286 #endif /* INET */
3287 case AF_INET6:
3288 a += inaddr->addr32[0];
3289 b += inaddr->addr32[2];
3290 mix(a, b, c);
3291 hash->addr32[0] = c;
3292 a += inaddr->addr32[1];
3293 b += inaddr->addr32[3];
3294 c += key->key32[1];
3295 mix(a, b, c);
3296 hash->addr32[1] = c;
3297 a += inaddr->addr32[2];
3298 b += inaddr->addr32[1];
3299 c += key->key32[2];
3300 mix(a, b, c);
3301 hash->addr32[2] = c;
3302 a += inaddr->addr32[3];
3303 b += inaddr->addr32[0];
3304 c += key->key32[3];
3305 mix(a, b, c);
3306 hash->addr32[3] = c;
3307 break;
3308 }
3309 }
3310
3311 static __attribute__((noinline)) int
pf_map_addr(sa_family_t af,struct pf_rule * r,struct pf_addr * saddr,struct pf_addr * naddr,struct pf_addr * init_addr,struct pf_src_node ** sn)3312 pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
3313 struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn)
3314 {
3315 unsigned char hash[16];
3316 struct pf_pool *__single rpool = &r->rpool;
3317 struct pf_addr *__single raddr = &rpool->cur->addr.v.a.addr;
3318 struct pf_addr *__single rmask = &rpool->cur->addr.v.a.mask;
3319 struct pf_pooladdr *__single acur = rpool->cur;
3320 struct pf_src_node k;
3321
3322 if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
3323 (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
3324 k.af = af;
3325 PF_ACPY(&k.addr, saddr, af);
3326 if (r->rule_flag & PFRULE_RULESRCTRACK ||
3327 r->rpool.opts & PF_POOL_STICKYADDR) {
3328 k.rule.ptr = r;
3329 } else {
3330 k.rule.ptr = NULL;
3331 }
3332 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
3333 *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
3334 if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, rpool->af)) {
3335 PF_ACPY(naddr, &(*sn)->raddr, rpool->af);
3336 if (pf_status.debug >= PF_DEBUG_MISC) {
3337 printf("pf_map_addr: src tracking maps ");
3338 pf_print_host(&k.addr, 0, af);
3339 printf(" to ");
3340 pf_print_host(naddr, 0, rpool->af);
3341 printf("\n");
3342 }
3343 return 0;
3344 }
3345 }
3346
3347 if (rpool->cur->addr.type == PF_ADDR_NOROUTE) {
3348 return 1;
3349 }
3350 if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
3351 if (rpool->cur->addr.p.dyn == NULL) {
3352 return 1;
3353 }
3354 switch (rpool->af) {
3355 #if INET
3356 case AF_INET:
3357 if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
3358 (rpool->opts & PF_POOL_TYPEMASK) !=
3359 PF_POOL_ROUNDROBIN) {
3360 return 1;
3361 }
3362 raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
3363 rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
3364 break;
3365 #endif /* INET */
3366 case AF_INET6:
3367 if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
3368 (rpool->opts & PF_POOL_TYPEMASK) !=
3369 PF_POOL_ROUNDROBIN) {
3370 return 1;
3371 }
3372 raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
3373 rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
3374 break;
3375 }
3376 } else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
3377 if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) {
3378 return 1; /* unsupported */
3379 }
3380 } else {
3381 raddr = &rpool->cur->addr.v.a.addr;
3382 rmask = &rpool->cur->addr.v.a.mask;
3383 }
3384
3385 switch (rpool->opts & PF_POOL_TYPEMASK) {
3386 case PF_POOL_NONE:
3387 PF_ACPY(naddr, raddr, rpool->af);
3388 break;
3389 case PF_POOL_BITMASK:
3390 ASSERT(af == rpool->af);
3391 PF_POOLMASK(naddr, raddr, rmask, saddr, af);
3392 break;
3393 case PF_POOL_RANDOM:
3394 if (init_addr != NULL && PF_AZERO(init_addr, rpool->af)) {
3395 switch (af) {
3396 #if INET
3397 case AF_INET:
3398 rpool->counter.addr32[0] = htonl(random());
3399 break;
3400 #endif /* INET */
3401 case AF_INET6:
3402 if (rmask->addr32[3] != 0xffffffff) {
3403 rpool->counter.addr32[3] =
3404 RandomULong();
3405 } else {
3406 break;
3407 }
3408 if (rmask->addr32[2] != 0xffffffff) {
3409 rpool->counter.addr32[2] =
3410 RandomULong();
3411 } else {
3412 break;
3413 }
3414 if (rmask->addr32[1] != 0xffffffff) {
3415 rpool->counter.addr32[1] =
3416 RandomULong();
3417 } else {
3418 break;
3419 }
3420 if (rmask->addr32[0] != 0xffffffff) {
3421 rpool->counter.addr32[0] =
3422 RandomULong();
3423 }
3424 break;
3425 }
3426 PF_POOLMASK(naddr, raddr, rmask, &rpool->counter,
3427 rpool->af);
3428 PF_ACPY(init_addr, naddr, rpool->af);
3429 } else {
3430 PF_AINC(&rpool->counter, rpool->af);
3431 PF_POOLMASK(naddr, raddr, rmask, &rpool->counter,
3432 rpool->af);
3433 }
3434 break;
3435 case PF_POOL_SRCHASH:
3436 ASSERT(af == rpool->af);
3437 PF_POOLMASK(naddr, raddr, rmask, saddr, af);
3438 pf_hash(saddr, (struct pf_addr *)(void *)&hash,
3439 &rpool->key, af);
3440 PF_POOLMASK(naddr, raddr, rmask,
3441 (struct pf_addr *)(void *)&hash, af);
3442 break;
3443 case PF_POOL_ROUNDROBIN:
3444 if (rpool->cur->addr.type == PF_ADDR_TABLE) {
3445 if (!pfr_pool_get(rpool->cur->addr.p.tbl,
3446 &rpool->tblidx, &rpool->counter,
3447 &raddr, &rmask, rpool->af)) {
3448 goto get_addr;
3449 }
3450 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
3451 if (rpool->cur->addr.p.dyn != NULL &&
3452 !pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
3453 &rpool->tblidx, &rpool->counter,
3454 &raddr, &rmask, af)) {
3455 goto get_addr;
3456 }
3457 } else if (pf_match_addr(0, raddr, rmask, &rpool->counter,
3458 rpool->af)) {
3459 goto get_addr;
3460 }
3461
3462 try_next:
3463 if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL) {
3464 rpool->cur = TAILQ_FIRST(&rpool->list);
3465 }
3466 if (rpool->cur->addr.type == PF_ADDR_TABLE) {
3467 rpool->tblidx = -1;
3468 if (pfr_pool_get(rpool->cur->addr.p.tbl,
3469 &rpool->tblidx, &rpool->counter,
3470 &raddr, &rmask, rpool->af)) {
3471 /* table contains no address of type
3472 * 'rpool->af' */
3473 if (rpool->cur != acur) {
3474 goto try_next;
3475 }
3476 return 1;
3477 }
3478 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
3479 rpool->tblidx = -1;
3480 if (rpool->cur->addr.p.dyn == NULL) {
3481 return 1;
3482 }
3483 if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
3484 &rpool->tblidx, &rpool->counter,
3485 &raddr, &rmask, rpool->af)) {
3486 /* table contains no address of type
3487 * 'rpool->af' */
3488 if (rpool->cur != acur) {
3489 goto try_next;
3490 }
3491 return 1;
3492 }
3493 } else {
3494 raddr = &rpool->cur->addr.v.a.addr;
3495 rmask = &rpool->cur->addr.v.a.mask;
3496 PF_ACPY(&rpool->counter, raddr, rpool->af);
3497 }
3498
3499 get_addr:
3500 PF_ACPY(naddr, &rpool->counter, rpool->af);
3501 if (init_addr != NULL && PF_AZERO(init_addr, rpool->af)) {
3502 PF_ACPY(init_addr, naddr, rpool->af);
3503 }
3504 PF_AINC(&rpool->counter, rpool->af);
3505 break;
3506 }
3507 if (*sn != NULL) {
3508 PF_ACPY(&(*sn)->raddr, naddr, rpool->af);
3509 }
3510
3511 if (pf_status.debug >= PF_DEBUG_MISC &&
3512 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
3513 printf("pf_map_addr: selected address ");
3514 pf_print_host(naddr, 0, rpool->af);
3515 printf("\n");
3516 }
3517
3518 return 0;
3519 }
3520
3521 static __attribute__((noinline)) int
pf_get_sport(struct pf_pdesc * pd,struct pfi_kif * kif,struct pf_rule * r,struct pf_addr * saddr,union pf_state_xport * sxport,struct pf_addr * daddr,union pf_state_xport * dxport,struct pf_addr * naddr,union pf_state_xport * nxport,struct pf_src_node ** sn,netns_token * pnstoken)3522 pf_get_sport(struct pf_pdesc *pd, struct pfi_kif *kif, struct pf_rule *r,
3523 struct pf_addr *saddr, union pf_state_xport *sxport, struct pf_addr *daddr,
3524 union pf_state_xport *dxport, struct pf_addr *naddr,
3525 union pf_state_xport *nxport, struct pf_src_node **sn
3526 #if SKYWALK
3527 , netns_token *pnstoken
3528 #endif
3529 )
3530 {
3531 #pragma unused(kif)
3532 struct pf_state_key_cmp key;
3533 struct pf_addr init_addr;
3534 unsigned int cut;
3535 sa_family_t af = pd->af;
3536 u_int8_t proto = pd->proto;
3537 unsigned int low = r->rpool.proxy_port[0];
3538 unsigned int high = r->rpool.proxy_port[1];
3539
3540 bzero(&init_addr, sizeof(init_addr));
3541 if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn)) {
3542 return 1;
3543 }
3544
3545 if (proto == IPPROTO_ICMP) {
3546 low = 1;
3547 high = 65535;
3548 }
3549
3550 if (!nxport) {
3551 return 0; /* No output necessary. */
3552 }
3553 /*--- Special mapping rules for UDP ---*/
3554 if (proto == IPPROTO_UDP) {
3555 /*--- Never float IKE source port ---*/
3556 if (ntohs(sxport->port) == PF_IKE_PORT) {
3557 nxport->port = sxport->port;
3558 return 0;
3559 }
3560
3561 /*--- Apply exterior mapping options ---*/
3562 if (r->extmap > PF_EXTMAP_APD) {
3563 struct pf_state *s;
3564
3565 TAILQ_FOREACH(s, &state_list, entry_list) {
3566 struct pf_state_key *sk = s->state_key;
3567 if (!sk) {
3568 continue;
3569 }
3570 if (s->nat_rule.ptr != r) {
3571 continue;
3572 }
3573 if (sk->proto != IPPROTO_UDP ||
3574 sk->af_lan != af) {
3575 continue;
3576 }
3577 if (sk->lan.xport.port != sxport->port) {
3578 continue;
3579 }
3580 if (PF_ANEQ(&sk->lan.addr, saddr, af)) {
3581 continue;
3582 }
3583 if (r->extmap < PF_EXTMAP_EI &&
3584 PF_ANEQ(&sk->ext_lan.addr, daddr, af)) {
3585 continue;
3586 }
3587
3588 #if SKYWALK
3589 if (netns_reserve(pnstoken, naddr->addr32,
3590 NETNS_AF_SIZE(af), proto, sxport->port,
3591 NETNS_PF, NULL) != 0) {
3592 return 1;
3593 }
3594 #endif
3595 nxport->port = sk->gwy.xport.port;
3596 return 0;
3597 }
3598 }
3599 } else if (proto == IPPROTO_TCP) {
3600 struct pf_state* s;
3601 /*
3602 * APPLE MODIFICATION: <rdar://problem/6546358>
3603 * Fix allows....NAT to use a single binding for TCP session
3604 * with same source IP and source port
3605 */
3606 TAILQ_FOREACH(s, &state_list, entry_list) {
3607 struct pf_state_key* sk = s->state_key;
3608 if (!sk) {
3609 continue;
3610 }
3611 if (s->nat_rule.ptr != r) {
3612 continue;
3613 }
3614 if (sk->proto != IPPROTO_TCP || sk->af_lan != af) {
3615 continue;
3616 }
3617 if (sk->lan.xport.port != sxport->port) {
3618 continue;
3619 }
3620 if (!(PF_AEQ(&sk->lan.addr, saddr, af))) {
3621 continue;
3622 }
3623 #if SKYWALK
3624 if (netns_reserve(pnstoken, naddr->addr32,
3625 NETNS_AF_SIZE(af), proto, sxport->port,
3626 NETNS_PF, NULL) != 0) {
3627 return 1;
3628 }
3629 #endif
3630 nxport->port = sk->gwy.xport.port;
3631 return 0;
3632 }
3633 }
3634 do {
3635 key.af_gwy = af;
3636 key.proto = proto;
3637 PF_ACPY(&key.ext_gwy.addr, daddr, key.af_gwy);
3638 PF_ACPY(&key.gwy.addr, naddr, key.af_gwy);
3639 switch (proto) {
3640 case IPPROTO_UDP:
3641 key.proto_variant = r->extfilter;
3642 break;
3643 default:
3644 key.proto_variant = 0;
3645 break;
3646 }
3647 if (dxport) {
3648 key.ext_gwy.xport = *dxport;
3649 } else {
3650 memset(&key.ext_gwy.xport, 0,
3651 sizeof(key.ext_gwy.xport));
3652 }
3653 /*
3654 * port search; start random, step;
3655 * similar 2 portloop in in_pcbbind
3656 */
3657 if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP ||
3658 proto == IPPROTO_ICMP)) {
3659 if (dxport) {
3660 key.gwy.xport = *dxport;
3661 } else {
3662 memset(&key.gwy.xport, 0,
3663 sizeof(key.gwy.xport));
3664 }
3665 #if SKYWALK
3666 /* Nothing to do: netns handles TCP/UDP only */
3667 #endif
3668 if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
3669 return 0;
3670 }
3671 } else if (low == 0 && high == 0) {
3672 key.gwy.xport = *nxport;
3673 if (pf_find_state_all(&key, PF_IN, NULL) == NULL
3674 #if SKYWALK
3675 && ((proto != IPPROTO_TCP && proto != IPPROTO_UDP)
3676 || netns_reserve(pnstoken, naddr->addr32,
3677 NETNS_AF_SIZE(af), proto, nxport->port,
3678 NETNS_PF, NULL) == 0)
3679 #endif
3680 ) {
3681 return 0;
3682 }
3683 } else if (low == high) {
3684 key.gwy.xport.port = htons(low);
3685 if (pf_find_state_all(&key, PF_IN, NULL) == NULL
3686 #if SKYWALK
3687 && ((proto != IPPROTO_TCP && proto != IPPROTO_UDP)
3688 || netns_reserve(pnstoken, naddr->addr32,
3689 NETNS_AF_SIZE(af), proto, htons(low),
3690 NETNS_PF, NULL) == 0)
3691 #endif
3692 ) {
3693 nxport->port = htons(low);
3694 return 0;
3695 }
3696 } else {
3697 unsigned int tmp;
3698 if (low > high) {
3699 tmp = low;
3700 low = high;
3701 high = tmp;
3702 }
3703 /* low < high */
3704 cut = htonl(random()) % (1 + high - low) + low;
3705 /* low <= cut <= high */
3706 for (tmp = cut; tmp <= high; ++(tmp)) {
3707 key.gwy.xport.port = htons(tmp);
3708 if (pf_find_state_all(&key, PF_IN, NULL) == NULL
3709 #if SKYWALK
3710 && ((proto != IPPROTO_TCP && proto != IPPROTO_UDP)
3711 || netns_reserve(pnstoken, naddr->addr32,
3712 NETNS_AF_SIZE(af), proto, htons(tmp),
3713 NETNS_PF, NULL) == 0)
3714 #endif
3715 ) {
3716 nxport->port = htons(tmp);
3717 return 0;
3718 }
3719 }
3720 for (tmp = cut - 1; tmp >= low; --(tmp)) {
3721 key.gwy.xport.port = htons(tmp);
3722 if (pf_find_state_all(&key, PF_IN, NULL) == NULL
3723 #if SKYWALK
3724 && ((proto != IPPROTO_TCP && proto != IPPROTO_UDP)
3725 || netns_reserve(pnstoken, naddr->addr32,
3726 NETNS_AF_SIZE(af), proto, htons(tmp),
3727 NETNS_PF, NULL) == 0)
3728 #endif
3729 ) {
3730 nxport->port = htons(tmp);
3731 return 0;
3732 }
3733 }
3734 }
3735
3736 switch (r->rpool.opts & PF_POOL_TYPEMASK) {
3737 case PF_POOL_RANDOM:
3738 case PF_POOL_ROUNDROBIN:
3739 if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn)) {
3740 return 1;
3741 }
3742 break;
3743 case PF_POOL_NONE:
3744 case PF_POOL_SRCHASH:
3745 case PF_POOL_BITMASK:
3746 default:
3747 return 1;
3748 }
3749 } while (!PF_AEQ(&init_addr, naddr, af));
3750
3751 return 1; /* none available */
3752 }
3753
3754 static __attribute__((noinline)) struct pf_rule *
pf_match_translation(struct pf_pdesc * pd,pbuf_t * pbuf,int off,int direction,struct pfi_kif * kif,struct pf_addr * saddr,union pf_state_xport * sxport,struct pf_addr * daddr,union pf_state_xport * dxport,int rs_num)3755 pf_match_translation(struct pf_pdesc *pd, pbuf_t *pbuf, int off,
3756 int direction, struct pfi_kif *kif, struct pf_addr *saddr,
3757 union pf_state_xport *sxport, struct pf_addr *daddr,
3758 union pf_state_xport *dxport, int rs_num)
3759 {
3760 struct pf_rule *__single r, *__single rm = NULL;
3761 struct pf_ruleset *__single ruleset = NULL;
3762 int tag = -1;
3763 unsigned int rtableid = IFSCOPE_NONE;
3764 int asd = 0;
3765
3766 r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
3767 while (r && rm == NULL) {
3768 struct pf_rule_addr *src = NULL, *dst = NULL;
3769 struct pf_addr_wrap *xdst = NULL;
3770 struct pf_addr_wrap *xsrc = NULL;
3771 union pf_rule_xport rdrxport;
3772
3773 if (r->action == PF_BINAT && direction == PF_IN) {
3774 src = &r->dst;
3775 if (r->rpool.cur != NULL) {
3776 xdst = &r->rpool.cur->addr;
3777 }
3778 } else if (r->action == PF_RDR && direction == PF_OUT) {
3779 dst = &r->src;
3780 src = &r->dst;
3781 if (r->rpool.cur != NULL) {
3782 rdrxport.range.op = PF_OP_EQ;
3783 rdrxport.range.port[0] =
3784 htons(r->rpool.proxy_port[0]);
3785 xsrc = &r->rpool.cur->addr;
3786 }
3787 } else {
3788 src = &r->src;
3789 dst = &r->dst;
3790 }
3791
3792 r->evaluations++;
3793 if (pfi_kif_match(r->kif, kif) == r->ifnot) {
3794 r = r->skip[PF_SKIP_IFP].ptr;
3795 } else if (r->direction && r->direction != direction) {
3796 r = r->skip[PF_SKIP_DIR].ptr;
3797 } else if (r->af && r->af != pd->af) {
3798 r = r->skip[PF_SKIP_AF].ptr;
3799 } else if (r->proto && r->proto != pd->proto) {
3800 r = r->skip[PF_SKIP_PROTO].ptr;
3801 } else if (xsrc && PF_MISMATCHAW(xsrc, saddr, pd->af, 0, NULL)) {
3802 r = TAILQ_NEXT(r, entries);
3803 } else if (!xsrc && PF_MISMATCHAW(&src->addr, saddr, pd->af,
3804 src->neg, kif)) {
3805 r = TAILQ_NEXT(r, entries);
3806 } else if (xsrc && (!rdrxport.range.port[0] ||
3807 !pf_match_xport(r->proto, r->proto_variant, &rdrxport,
3808 sxport))) {
3809 r = TAILQ_NEXT(r, entries);
3810 } else if (!xsrc && !pf_match_xport(r->proto,
3811 r->proto_variant, &src->xport, sxport)) {
3812 r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
3813 PF_SKIP_DST_PORT].ptr;
3814 } else if (dst != NULL &&
3815 PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL)) {
3816 r = r->skip[PF_SKIP_DST_ADDR].ptr;
3817 } else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af,
3818 0, NULL)) {
3819 r = TAILQ_NEXT(r, entries);
3820 } else if (dst && !pf_match_xport(r->proto, r->proto_variant,
3821 &dst->xport, dxport)) {
3822 r = r->skip[PF_SKIP_DST_PORT].ptr;
3823 } else if (r->match_tag && !pf_match_tag(r, pd->pf_mtag, &tag)) {
3824 r = TAILQ_NEXT(r, entries);
3825 } else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
3826 IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, pbuf,
3827 off, pf_pd_get_hdr_tcp(pd)), r->os_fingerprint))) {
3828 r = TAILQ_NEXT(r, entries);
3829 } else {
3830 if (r->tag) {
3831 tag = r->tag;
3832 }
3833 if (PF_RTABLEID_IS_VALID(r->rtableid)) {
3834 rtableid = r->rtableid;
3835 }
3836 if (r->anchor == NULL) {
3837 rm = r;
3838 } else {
3839 pf_step_into_anchor(&asd, &ruleset, rs_num,
3840 &r, NULL, NULL);
3841 }
3842 }
3843 if (r == NULL) {
3844 pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r,
3845 NULL, NULL);
3846 }
3847 }
3848 if (pf_tag_packet(pbuf, pd->pf_mtag, tag, rtableid, NULL)) {
3849 return NULL;
3850 }
3851 if (rm != NULL && (rm->action == PF_NONAT ||
3852 rm->action == PF_NORDR || rm->action == PF_NOBINAT ||
3853 rm->action == PF_NONAT64)) {
3854 return NULL;
3855 }
3856 return rm;
3857 }
3858
3859 /*
3860 * Get address translation information for NAT/BINAT/RDR
3861 * pd : pf packet descriptor
3862 * pbuf : pbuf holding the packet
3863 * off : offset to protocol header
3864 * direction : direction of packet
3865 * kif : pf interface info obtained from the packet's recv interface
3866 * sn : source node pointer (output)
3867 * saddr : packet source address
3868 * sxport : packet source port
3869 * daddr : packet destination address
3870 * dxport : packet destination port
3871 * nsxport : translated source port (output)
3872 *
3873 * Translated source & destination address are updated in pd->nsaddr &
3874 * pd->ndaddr
3875 */
3876 static __attribute__((noinline)) struct pf_rule *
pf_get_translation_aux(struct pf_pdesc * pd,pbuf_t * pbuf,int off,int direction,struct pfi_kif * kif,struct pf_src_node ** sn,struct pf_addr * saddr,union pf_state_xport * sxport,struct pf_addr * daddr,union pf_state_xport * dxport,union pf_state_xport * nsxport,netns_token * pnstoken)3877 pf_get_translation_aux(struct pf_pdesc *pd, pbuf_t *pbuf, int off,
3878 int direction, struct pfi_kif *kif, struct pf_src_node **sn,
3879 struct pf_addr *saddr, union pf_state_xport *sxport, struct pf_addr *daddr,
3880 union pf_state_xport *dxport, union pf_state_xport *nsxport
3881 #if SKYWALK
3882 , netns_token *pnstoken
3883 #endif
3884 )
3885 {
3886 struct pf_rule *r = NULL;
3887 pd->naf = pd->af;
3888
3889 if (direction == PF_OUT) {
3890 r = pf_match_translation(pd, pbuf, off, direction, kif, saddr,
3891 sxport, daddr, dxport, PF_RULESET_BINAT);
3892 if (r == NULL) {
3893 r = pf_match_translation(pd, pbuf, off, direction, kif,
3894 saddr, sxport, daddr, dxport, PF_RULESET_RDR);
3895 }
3896 if (r == NULL) {
3897 r = pf_match_translation(pd, pbuf, off, direction, kif,
3898 saddr, sxport, daddr, dxport, PF_RULESET_NAT);
3899 }
3900 } else {
3901 r = pf_match_translation(pd, pbuf, off, direction, kif, saddr,
3902 sxport, daddr, dxport, PF_RULESET_RDR);
3903 if (r == NULL) {
3904 r = pf_match_translation(pd, pbuf, off, direction, kif,
3905 saddr, sxport, daddr, dxport, PF_RULESET_BINAT);
3906 }
3907 }
3908
3909 if (r != NULL) {
3910 struct pf_addr *nsaddr = &pd->naddr;
3911 struct pf_addr *ndaddr = &pd->ndaddr;
3912
3913 PF_ACPY(nsaddr, saddr, pd->af);
3914 PF_ACPY(ndaddr, daddr, pd->af);
3915
3916 switch (r->action) {
3917 case PF_NONAT:
3918 case PF_NONAT64:
3919 case PF_NOBINAT:
3920 case PF_NORDR:
3921 return NULL;
3922 case PF_NAT:
3923 case PF_NAT64:
3924 /*
3925 * we do NAT64 on incoming path and we call ip_input
3926 * which asserts receive interface to be not NULL.
3927 * The below check is to prevent NAT64 action on any
3928 * packet generated by local entity using synthesized
3929 * IPv6 address.
3930 */
3931 if ((r->action == PF_NAT64) && (direction == PF_OUT)) {
3932 return NULL;
3933 }
3934
3935 if (pf_get_sport(pd, kif, r, saddr, sxport, daddr,
3936 dxport, nsaddr, nsxport, sn
3937 #if SKYWALK
3938 , pnstoken
3939 #endif
3940 )) {
3941 DPFPRINTF(PF_DEBUG_MISC,
3942 ("pf: NAT proxy port allocation "
3943 "(%u-%u) failed\n",
3944 r->rpool.proxy_port[0],
3945 r->rpool.proxy_port[1]));
3946 return NULL;
3947 }
3948 /*
3949 * For NAT64 the destination IPv4 address is derived
3950 * from the last 32 bits of synthesized IPv6 address
3951 */
3952 if (r->action == PF_NAT64) {
3953 ndaddr->v4addr.s_addr = daddr->addr32[3];
3954 pd->naf = AF_INET;
3955 }
3956 break;
3957 case PF_BINAT:
3958 switch (direction) {
3959 case PF_OUT:
3960 if (r->rpool.cur->addr.type ==
3961 PF_ADDR_DYNIFTL) {
3962 if (r->rpool.cur->addr.p.dyn == NULL) {
3963 return NULL;
3964 }
3965 switch (pd->af) {
3966 #if INET
3967 case AF_INET:
3968 if (r->rpool.cur->addr.p.dyn->
3969 pfid_acnt4 < 1) {
3970 return NULL;
3971 }
3972 PF_POOLMASK(nsaddr,
3973 &r->rpool.cur->addr.p.dyn->
3974 pfid_addr4,
3975 &r->rpool.cur->addr.p.dyn->
3976 pfid_mask4,
3977 saddr, AF_INET);
3978 break;
3979 #endif /* INET */
3980 case AF_INET6:
3981 if (r->rpool.cur->addr.p.dyn->
3982 pfid_acnt6 < 1) {
3983 return NULL;
3984 }
3985 PF_POOLMASK(nsaddr,
3986 &r->rpool.cur->addr.p.dyn->
3987 pfid_addr6,
3988 &r->rpool.cur->addr.p.dyn->
3989 pfid_mask6,
3990 saddr, AF_INET6);
3991 break;
3992 }
3993 } else {
3994 PF_POOLMASK(nsaddr,
3995 &r->rpool.cur->addr.v.a.addr,
3996 &r->rpool.cur->addr.v.a.mask,
3997 saddr, pd->af);
3998 }
3999 break;
4000 case PF_IN:
4001 if (r->src.addr.type == PF_ADDR_DYNIFTL) {
4002 if (r->src.addr.p.dyn == NULL) {
4003 return NULL;
4004 }
4005 switch (pd->af) {
4006 #if INET
4007 case AF_INET:
4008 if (r->src.addr.p.dyn->
4009 pfid_acnt4 < 1) {
4010 return NULL;
4011 }
4012 PF_POOLMASK(ndaddr,
4013 &r->src.addr.p.dyn->
4014 pfid_addr4,
4015 &r->src.addr.p.dyn->
4016 pfid_mask4,
4017 daddr, AF_INET);
4018 break;
4019 #endif /* INET */
4020 case AF_INET6:
4021 if (r->src.addr.p.dyn->
4022 pfid_acnt6 < 1) {
4023 return NULL;
4024 }
4025 PF_POOLMASK(ndaddr,
4026 &r->src.addr.p.dyn->
4027 pfid_addr6,
4028 &r->src.addr.p.dyn->
4029 pfid_mask6,
4030 daddr, AF_INET6);
4031 break;
4032 }
4033 } else {
4034 PF_POOLMASK(ndaddr,
4035 &r->src.addr.v.a.addr,
4036 &r->src.addr.v.a.mask, daddr,
4037 pd->af);
4038 }
4039 break;
4040 }
4041 break;
4042 case PF_RDR: {
4043 switch (direction) {
4044 case PF_OUT:
4045 if (r->dst.addr.type == PF_ADDR_DYNIFTL) {
4046 if (r->dst.addr.p.dyn == NULL) {
4047 return NULL;
4048 }
4049 switch (pd->af) {
4050 #if INET
4051 case AF_INET:
4052 if (r->dst.addr.p.dyn->
4053 pfid_acnt4 < 1) {
4054 return NULL;
4055 }
4056 PF_POOLMASK(nsaddr,
4057 &r->dst.addr.p.dyn->
4058 pfid_addr4,
4059 &r->dst.addr.p.dyn->
4060 pfid_mask4,
4061 daddr, AF_INET);
4062 break;
4063 #endif /* INET */
4064 case AF_INET6:
4065 if (r->dst.addr.p.dyn->
4066 pfid_acnt6 < 1) {
4067 return NULL;
4068 }
4069 PF_POOLMASK(nsaddr,
4070 &r->dst.addr.p.dyn->
4071 pfid_addr6,
4072 &r->dst.addr.p.dyn->
4073 pfid_mask6,
4074 daddr, AF_INET6);
4075 break;
4076 }
4077 } else {
4078 PF_POOLMASK(nsaddr,
4079 &r->dst.addr.v.a.addr,
4080 &r->dst.addr.v.a.mask,
4081 daddr, pd->af);
4082 }
4083 if (nsxport && r->dst.xport.range.port[0]) {
4084 nsxport->port =
4085 r->dst.xport.range.port[0];
4086 }
4087 break;
4088 case PF_IN:
4089 if (pf_map_addr(pd->af, r, saddr,
4090 ndaddr, NULL, sn)) {
4091 return NULL;
4092 }
4093 if ((r->rpool.opts & PF_POOL_TYPEMASK) ==
4094 PF_POOL_BITMASK) {
4095 PF_POOLMASK(ndaddr, ndaddr,
4096 &r->rpool.cur->addr.v.a.mask, daddr,
4097 pd->af);
4098 }
4099
4100 if (nsxport && dxport) {
4101 if (r->rpool.proxy_port[1]) {
4102 u_int32_t tmp_nport;
4103
4104 tmp_nport =
4105 ((ntohs(dxport->port) -
4106 ntohs(r->dst.xport.range.
4107 port[0])) %
4108 (r->rpool.proxy_port[1] -
4109 r->rpool.proxy_port[0] +
4110 1)) + r->rpool.proxy_port[0];
4111
4112 /* wrap around if necessary */
4113 if (tmp_nport > 65535) {
4114 tmp_nport -= 65535;
4115 }
4116 nsxport->port =
4117 htons((u_int16_t)tmp_nport);
4118 } else if (r->rpool.proxy_port[0]) {
4119 nsxport->port = htons(r->rpool.
4120 proxy_port[0]);
4121 }
4122 }
4123 break;
4124 }
4125 break;
4126 }
4127 default:
4128 return NULL;
4129 }
4130 }
4131
4132 return r;
4133 }
4134
4135 int
pf_socket_lookup(int direction,struct pf_pdesc * pd)4136 pf_socket_lookup(int direction, struct pf_pdesc *pd)
4137 {
4138 struct pf_addr *__single saddr, *__single daddr;
4139 u_int16_t sport, dport;
4140 struct inpcbinfo *__single pi;
4141 int inp = 0;
4142
4143 if (pd == NULL) {
4144 return -1;
4145 }
4146 pd->lookup.uid = UID_MAX;
4147 pd->lookup.gid = GID_MAX;
4148 pd->lookup.pid = NO_PID;
4149
4150 switch (pd->proto) {
4151 case IPPROTO_TCP:
4152 if (pf_pd_get_hdr_tcp(pd) == NULL) {
4153 return -1;
4154 }
4155 sport = pf_pd_get_hdr_tcp(pd)->th_sport;
4156 dport = pf_pd_get_hdr_tcp(pd)->th_dport;
4157 pi = &tcbinfo;
4158 break;
4159 case IPPROTO_UDP:
4160 if (pf_pd_get_hdr_udp(pd) == NULL) {
4161 return -1;
4162 }
4163 sport = pf_pd_get_hdr_udp(pd)->uh_sport;
4164 dport = pf_pd_get_hdr_udp(pd)->uh_dport;
4165 pi = &udbinfo;
4166 break;
4167 default:
4168 return -1;
4169 }
4170 if (direction == PF_IN) {
4171 saddr = pd->src;
4172 daddr = pd->dst;
4173 } else {
4174 u_int16_t p;
4175
4176 p = sport;
4177 sport = dport;
4178 dport = p;
4179 saddr = pd->dst;
4180 daddr = pd->src;
4181 }
4182 switch (pd->af) {
4183 #if INET
4184 case AF_INET:
4185 inp = in_pcblookup_hash_exists(pi, saddr->v4addr, sport, daddr->v4addr, dport,
4186 0, &pd->lookup.uid, &pd->lookup.gid, NULL);
4187 if (inp == 0) {
4188 struct in6_addr s6, d6;
4189
4190 memset(&s6, 0, sizeof(s6));
4191 s6.s6_addr16[5] = htons(0xffff);
4192 memcpy(&s6.s6_addr32[3], &saddr->v4addr,
4193 sizeof(saddr->v4addr));
4194
4195 memset(&d6, 0, sizeof(d6));
4196 d6.s6_addr16[5] = htons(0xffff);
4197 memcpy(&d6.s6_addr32[3], &daddr->v4addr,
4198 sizeof(daddr->v4addr));
4199
4200 inp = in6_pcblookup_hash_exists(pi, &s6, sport, IFSCOPE_NONE,
4201 &d6, dport, IFSCOPE_NONE, 0, &pd->lookup.uid, &pd->lookup.gid, NULL, false);
4202 if (inp == 0) {
4203 inp = in_pcblookup_hash_exists(pi, saddr->v4addr, sport,
4204 daddr->v4addr, dport, INPLOOKUP_WILDCARD, &pd->lookup.uid, &pd->lookup.gid, NULL);
4205 if (inp == 0) {
4206 inp = in6_pcblookup_hash_exists(pi, &s6, sport, IFSCOPE_NONE,
4207 &d6, dport, IFSCOPE_NONE, INPLOOKUP_WILDCARD,
4208 &pd->lookup.uid, &pd->lookup.gid, NULL, false);
4209 if (inp == 0) {
4210 return -1;
4211 }
4212 }
4213 }
4214 }
4215 break;
4216 #endif /* INET */
4217 case AF_INET6:
4218 inp = in6_pcblookup_hash_exists(pi, &saddr->v6addr, sport, IFSCOPE_UNKNOWN, &daddr->v6addr,
4219 dport, IFSCOPE_UNKNOWN, 0, &pd->lookup.uid, &pd->lookup.gid, NULL, false);
4220 if (inp == 0) {
4221 inp = in6_pcblookup_hash_exists(pi, &saddr->v6addr, sport, IFSCOPE_UNKNOWN,
4222 &daddr->v6addr, dport, IFSCOPE_UNKNOWN, INPLOOKUP_WILDCARD,
4223 &pd->lookup.uid, &pd->lookup.gid, NULL, false);
4224 if (inp == 0) {
4225 return -1;
4226 }
4227 }
4228 break;
4229
4230 default:
4231 return -1;
4232 }
4233
4234 return 1;
4235 }
4236
4237 static __attribute__((noinline)) u_int8_t
pf_get_wscale(pbuf_t * pbuf,int off,u_int16_t th_off,sa_family_t af)4238 pf_get_wscale(pbuf_t *pbuf, int off, u_int16_t th_off, sa_family_t af)
4239 {
4240 int hlen;
4241 u_int8_t hdr[60];
4242 u_int8_t *opt, optlen;
4243 u_int8_t wscale = 0;
4244
4245 hlen = th_off << 2; /* hlen <= sizeof (hdr) */
4246 if (hlen <= (int)sizeof(struct tcphdr)) {
4247 return 0;
4248 }
4249 if (!pf_pull_hdr(pbuf, off, hdr, sizeof(hdr), hlen, NULL, NULL, af)) {
4250 return 0;
4251 }
4252 opt = hdr + sizeof(struct tcphdr);
4253 hlen -= sizeof(struct tcphdr);
4254 while (hlen >= 3) {
4255 switch (*opt) {
4256 case TCPOPT_EOL:
4257 case TCPOPT_NOP:
4258 ++opt;
4259 --hlen;
4260 break;
4261 case TCPOPT_WINDOW:
4262 wscale = opt[2];
4263 if (wscale > TCP_MAX_WINSHIFT) {
4264 wscale = TCP_MAX_WINSHIFT;
4265 }
4266 wscale |= PF_WSCALE_FLAG;
4267 OS_FALLTHROUGH;
4268 default:
4269 optlen = opt[1];
4270 if (optlen < 2) {
4271 optlen = 2;
4272 }
4273 hlen -= optlen;
4274 opt += optlen;
4275 break;
4276 }
4277 }
4278 return wscale;
4279 }
4280
4281 static __attribute__((noinline)) u_int16_t
pf_get_mss(pbuf_t * pbuf,int off,u_int16_t th_off,sa_family_t af)4282 pf_get_mss(pbuf_t *pbuf, int off, u_int16_t th_off, sa_family_t af)
4283 {
4284 int hlen;
4285 u_int8_t hdr[60];
4286 u_int8_t *opt, optlen;
4287 u_int16_t mss = tcp_mssdflt;
4288
4289 hlen = th_off << 2; /* hlen <= sizeof (hdr) */
4290 if (hlen <= (int)sizeof(struct tcphdr)) {
4291 return 0;
4292 }
4293 if (!pf_pull_hdr(pbuf, off, hdr, sizeof(hdr), hlen, NULL, NULL, af)) {
4294 return 0;
4295 }
4296 opt = hdr + sizeof(struct tcphdr);
4297 hlen -= sizeof(struct tcphdr);
4298 while (hlen >= TCPOLEN_MAXSEG) {
4299 switch (*opt) {
4300 case TCPOPT_EOL:
4301 case TCPOPT_NOP:
4302 ++opt;
4303 --hlen;
4304 break;
4305 case TCPOPT_MAXSEG:
4306 bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2);
4307 #if BYTE_ORDER != BIG_ENDIAN
4308 NTOHS(mss);
4309 #endif
4310 OS_FALLTHROUGH;
4311 default:
4312 optlen = opt[1];
4313 if (optlen < 2) {
4314 optlen = 2;
4315 }
4316 hlen -= optlen;
4317 opt += optlen;
4318 break;
4319 }
4320 }
4321 return mss;
4322 }
4323
4324 static __attribute__((noinline)) u_int16_t
pf_calc_mss(struct pf_addr * addr,sa_family_t af,u_int16_t offer)4325 pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer)
4326 {
4327 #if INET
4328 struct sockaddr_in *dst;
4329 struct route ro;
4330 #endif /* INET */
4331 struct sockaddr_in6 *dst6;
4332 struct route_in6 ro6;
4333 struct rtentry *rt = NULL;
4334 int hlen;
4335 u_int16_t mss = tcp_mssdflt;
4336
4337 switch (af) {
4338 #if INET
4339 case AF_INET:
4340 hlen = sizeof(struct ip);
4341 bzero(&ro, sizeof(ro));
4342 dst = (struct sockaddr_in *)(void *)&ro.ro_dst;
4343 dst->sin_family = AF_INET;
4344 dst->sin_len = sizeof(*dst);
4345 dst->sin_addr = addr->v4addr;
4346 rtalloc(&ro);
4347 rt = ro.ro_rt;
4348 break;
4349 #endif /* INET */
4350 case AF_INET6:
4351 hlen = sizeof(struct ip6_hdr);
4352 bzero(&ro6, sizeof(ro6));
4353 dst6 = (struct sockaddr_in6 *)(void *)&ro6.ro_dst;
4354 dst6->sin6_family = AF_INET6;
4355 dst6->sin6_len = sizeof(*dst6);
4356 dst6->sin6_addr = addr->v6addr;
4357 rtalloc((struct route *)&ro);
4358 rt = ro6.ro_rt;
4359 break;
4360 default:
4361 panic("pf_calc_mss: not AF_INET or AF_INET6!");
4362 return 0;
4363 }
4364
4365 if (rt && rt->rt_ifp) {
4366 /* This is relevant only for PF SYN Proxy */
4367 int interface_mtu = rt->rt_ifp->if_mtu;
4368
4369 if (af == AF_INET &&
4370 INTF_ADJUST_MTU_FOR_CLAT46(rt->rt_ifp)) {
4371 interface_mtu = IN6_LINKMTU(rt->rt_ifp);
4372 /* Further adjust the size for CLAT46 expansion */
4373 interface_mtu -= CLAT46_HDR_EXPANSION_OVERHD;
4374 }
4375 mss = interface_mtu - hlen - sizeof(struct tcphdr);
4376 mss = max(tcp_mssdflt, mss);
4377 rtfree(rt);
4378 }
4379 mss = min(mss, offer);
4380 mss = max(mss, 64); /* sanity - at least max opt space */
4381 return mss;
4382 }
4383
4384 static void
pf_set_rt_ifp(struct pf_state * s,struct pf_addr * saddr,sa_family_t af)4385 pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr, sa_family_t af)
4386 {
4387 struct pf_rule *r = s->rule.ptr;
4388
4389 s->rt_kif = NULL;
4390
4391 if (!r->rt || r->rt == PF_FASTROUTE) {
4392 return;
4393 }
4394 if ((af == AF_INET) || (af == AF_INET6)) {
4395 pf_map_addr(af, r, saddr, &s->rt_addr, NULL,
4396 &s->nat_src_node);
4397 s->rt_kif = r->rpool.cur->kif;
4398 }
4399
4400 return;
4401 }
4402
4403 static void
pf_attach_state(struct pf_state_key * sk,struct pf_state * s,int tail)4404 pf_attach_state(struct pf_state_key *sk, struct pf_state *s, int tail)
4405 {
4406 s->state_key = sk;
4407 sk->refcnt++;
4408
4409 /* list is sorted, if-bound states before floating */
4410 if (tail) {
4411 TAILQ_INSERT_TAIL(&sk->states, s, next);
4412 } else {
4413 TAILQ_INSERT_HEAD(&sk->states, s, next);
4414 }
4415 }
4416
4417 static void
pf_state_key_release_flowid(struct pf_state_key * sk)4418 pf_state_key_release_flowid(struct pf_state_key *sk)
4419 {
4420 #pragma unused (sk)
4421 #if SKYWALK
4422 if ((sk->flowsrc == FLOWSRC_PF) && (sk->flowhash != 0)) {
4423 flowidns_release_flowid(sk->flowhash);
4424 sk->flowhash = 0;
4425 sk->flowsrc = 0;
4426 }
4427 #endif /* SKYWALK */
4428 }
4429
4430 void
pf_detach_state(struct pf_state * s,int flags)4431 pf_detach_state(struct pf_state *s, int flags)
4432 {
4433 struct pf_state_key *sk = s->state_key;
4434
4435 if (sk == NULL) {
4436 return;
4437 }
4438
4439 s->state_key = NULL;
4440 TAILQ_REMOVE(&sk->states, s, next);
4441 if (--sk->refcnt == 0) {
4442 if (!(flags & PF_DT_SKIP_EXTGWY)) {
4443 pf_remove_state_key_ext_gwy(sk);
4444 }
4445 if (!(flags & PF_DT_SKIP_LANEXT)) {
4446 RB_REMOVE(pf_state_tree_lan_ext,
4447 &pf_statetbl_lan_ext, sk);
4448 }
4449 if (sk->app_state) {
4450 pool_put(&pf_app_state_pl, sk->app_state);
4451 }
4452 pf_state_key_release_flowid(sk);
4453 pool_put(&pf_state_key_pl, sk);
4454 }
4455 }
4456
4457 struct pf_state_key *
pf_alloc_state_key(struct pf_state * s,struct pf_state_key * psk)4458 pf_alloc_state_key(struct pf_state *s, struct pf_state_key *psk)
4459 {
4460 struct pf_state_key *__single sk;
4461
4462 if ((sk = pool_get(&pf_state_key_pl, PR_WAITOK)) == NULL) {
4463 return NULL;
4464 }
4465 bzero(sk, sizeof(*sk));
4466 TAILQ_INIT(&sk->states);
4467 pf_attach_state(sk, s, 0);
4468
4469 /* initialize state key from psk, if provided */
4470 if (psk != NULL) {
4471 bcopy(&psk->lan, &sk->lan, sizeof(sk->lan));
4472 bcopy(&psk->gwy, &sk->gwy, sizeof(sk->gwy));
4473 bcopy(&psk->ext_lan, &sk->ext_lan, sizeof(sk->ext_lan));
4474 bcopy(&psk->ext_gwy, &sk->ext_gwy, sizeof(sk->ext_gwy));
4475 sk->af_lan = psk->af_lan;
4476 sk->af_gwy = psk->af_gwy;
4477 sk->proto = psk->proto;
4478 sk->direction = psk->direction;
4479 sk->proto_variant = psk->proto_variant;
4480 VERIFY(psk->app_state == NULL);
4481 ASSERT(psk->flowsrc != FLOWSRC_PF);
4482 sk->flowsrc = psk->flowsrc;
4483 sk->flowhash = psk->flowhash;
4484 /* don't touch tree entries, states and refcnt on sk */
4485 }
4486
4487 if (sk->flowhash == 0) {
4488 ASSERT(sk->flowsrc == 0);
4489 sk->flowsrc = FLOWSRC_PF;
4490 sk->flowhash = pf_calc_state_key_flowhash(sk);
4491 }
4492
4493 return sk;
4494 }
4495
4496 static __attribute__((noinline)) u_int32_t
pf_tcp_iss(struct pf_pdesc * pd)4497 pf_tcp_iss(struct pf_pdesc *pd)
4498 {
4499 MD5_CTX ctx;
4500 u_int32_t digest[4];
4501
4502 if (pf_tcp_secret_init == 0) {
4503 read_frandom(pf_tcp_secret, sizeof(pf_tcp_secret));
4504 MD5Init(&pf_tcp_secret_ctx);
4505 MD5Update(&pf_tcp_secret_ctx, pf_tcp_secret,
4506 sizeof(pf_tcp_secret));
4507 pf_tcp_secret_init = 1;
4508 }
4509 ctx = pf_tcp_secret_ctx;
4510
4511 MD5Update(&ctx, (char *)&pf_pd_get_hdr_tcp(pd)->th_sport, sizeof(u_short));
4512 MD5Update(&ctx, (char *)&pf_pd_get_hdr_tcp(pd)->th_dport, sizeof(u_short));
4513 if (pd->af == AF_INET6) {
4514 MD5Update(&ctx, (char *)&pd->src->v6addr, sizeof(struct in6_addr));
4515 MD5Update(&ctx, (char *)&pd->dst->v6addr, sizeof(struct in6_addr));
4516 } else {
4517 MD5Update(&ctx, (char *)&pd->src->v4addr, sizeof(struct in_addr));
4518 MD5Update(&ctx, (char *)&pd->dst->v4addr, sizeof(struct in_addr));
4519 }
4520 MD5Final((u_char *)digest, &ctx);
4521 pf_tcp_iss_off += 4096;
4522 return digest[0] + random() + pf_tcp_iss_off;
4523 }
4524
4525 /*
4526 * This routine is called to perform address family translation on the
4527 * inner IP header (that may come as payload) of an ICMP(v4addr/6) error
4528 * response.
4529 */
4530 static __attribute__((noinline)) int
pf_change_icmp_af(pbuf_t * pbuf,int off,struct pf_pdesc * pd,struct pf_pdesc * pd2,struct pf_addr * src,struct pf_addr * dst,sa_family_t af,sa_family_t naf)4531 pf_change_icmp_af(pbuf_t *pbuf, int off,
4532 struct pf_pdesc *pd, struct pf_pdesc *pd2, struct pf_addr *src,
4533 struct pf_addr *dst, sa_family_t af, sa_family_t naf)
4534 {
4535 struct ip *__single ip4 = NULL;
4536 struct ip6_hdr *__single ip6 = NULL;
4537 void *__single hdr;
4538 int hlen, olen;
4539 uint64_t ipid_salt = (uint64_t)pbuf_get_packet_buffer_address(pbuf);
4540
4541 if (af == naf || (af != AF_INET && af != AF_INET6) ||
4542 (naf != AF_INET && naf != AF_INET6)) {
4543 return -1;
4544 }
4545
4546 /* old header */
4547 olen = pd2->off - off;
4548 /* new header */
4549 hlen = naf == AF_INET ? sizeof(*ip4) : sizeof(*ip6);
4550
4551 /* Modify the pbuf to accommodate the new header */
4552 hdr = pbuf_resize_segment(pbuf, off, olen, hlen);
4553 if (hdr == NULL) {
4554 return -1;
4555 }
4556
4557 /* translate inner ip/ip6 header */
4558 switch (naf) {
4559 case AF_INET:
4560 ip4 = hdr;
4561 bzero(ip4, sizeof(*ip4));
4562 ip4->ip_v = IPVERSION;
4563 ip4->ip_hl = sizeof(*ip4) >> 2;
4564 ip4->ip_len = htons(sizeof(*ip4) + pd2->tot_len - olen);
4565 ip4->ip_id = rfc6864 ? 0 : htons(ip_randomid(ipid_salt));
4566 ip4->ip_off = htons(IP_DF);
4567 ip4->ip_ttl = pd2->ttl;
4568 if (pd2->proto == IPPROTO_ICMPV6) {
4569 ip4->ip_p = IPPROTO_ICMP;
4570 } else {
4571 ip4->ip_p = pd2->proto;
4572 }
4573 ip4->ip_src = src->v4addr;
4574 ip4->ip_dst = dst->v4addr;
4575 ip4->ip_sum = pbuf_inet_cksum(pbuf, 0, 0, ip4->ip_hl << 2);
4576 break;
4577 case AF_INET6:
4578 ip6 = hdr;
4579 bzero(ip6, sizeof(*ip6));
4580 ip6->ip6_vfc = IPV6_VERSION;
4581 ip6->ip6_plen = htons(pd2->tot_len - olen);
4582 if (pd2->proto == IPPROTO_ICMP) {
4583 ip6->ip6_nxt = IPPROTO_ICMPV6;
4584 } else {
4585 ip6->ip6_nxt = pd2->proto;
4586 }
4587 if (!pd2->ttl || pd2->ttl > IPV6_DEFHLIM) {
4588 ip6->ip6_hlim = IPV6_DEFHLIM;
4589 } else {
4590 ip6->ip6_hlim = pd2->ttl;
4591 }
4592 ip6->ip6_src = src->v6addr;
4593 ip6->ip6_dst = dst->v6addr;
4594 break;
4595 }
4596
4597 /* adjust payload offset and total packet length */
4598 pd2->off += hlen - olen;
4599 pd->tot_len += hlen - olen;
4600
4601 return 0;
4602 }
4603
4604 #define PTR_IP(field) ((int32_t)offsetof(struct ip, field))
4605 #define PTR_IP6(field) ((int32_t)offsetof(struct ip6_hdr, field))
4606
4607 static __attribute__((noinline)) int
pf_translate_icmp_af(int af,void * arg)4608 pf_translate_icmp_af(int af, void *arg)
4609 {
4610 struct icmp *__single icmp4;
4611 struct icmp6_hdr *__single icmp6;
4612 u_int32_t mtu;
4613 int32_t ptr = -1;
4614 u_int8_t type;
4615 u_int8_t code;
4616
4617 switch (af) {
4618 case AF_INET:
4619 icmp6 = (struct icmp6_hdr * __single)arg;
4620 type = icmp6->icmp6_type;
4621 code = icmp6->icmp6_code;
4622 mtu = ntohl(icmp6->icmp6_mtu);
4623
4624 switch (type) {
4625 case ICMP6_ECHO_REQUEST:
4626 type = ICMP_ECHO;
4627 break;
4628 case ICMP6_ECHO_REPLY:
4629 type = ICMP_ECHOREPLY;
4630 break;
4631 case ICMP6_DST_UNREACH:
4632 type = ICMP_UNREACH;
4633 switch (code) {
4634 case ICMP6_DST_UNREACH_NOROUTE:
4635 case ICMP6_DST_UNREACH_BEYONDSCOPE:
4636 case ICMP6_DST_UNREACH_ADDR:
4637 code = ICMP_UNREACH_HOST;
4638 break;
4639 case ICMP6_DST_UNREACH_ADMIN:
4640 code = ICMP_UNREACH_HOST_PROHIB;
4641 break;
4642 case ICMP6_DST_UNREACH_NOPORT:
4643 code = ICMP_UNREACH_PORT;
4644 break;
4645 default:
4646 return -1;
4647 }
4648 break;
4649 case ICMP6_PACKET_TOO_BIG:
4650 type = ICMP_UNREACH;
4651 code = ICMP_UNREACH_NEEDFRAG;
4652 mtu -= 20;
4653 break;
4654 case ICMP6_TIME_EXCEEDED:
4655 type = ICMP_TIMXCEED;
4656 break;
4657 case ICMP6_PARAM_PROB:
4658 switch (code) {
4659 case ICMP6_PARAMPROB_HEADER:
4660 type = ICMP_PARAMPROB;
4661 code = ICMP_PARAMPROB_ERRATPTR;
4662 ptr = ntohl(icmp6->icmp6_pptr);
4663
4664 if (ptr == PTR_IP6(ip6_vfc)) {
4665 ; /* preserve */
4666 } else if (ptr == PTR_IP6(ip6_vfc) + 1) {
4667 ptr = PTR_IP(ip_tos);
4668 } else if (ptr == PTR_IP6(ip6_plen) ||
4669 ptr == PTR_IP6(ip6_plen) + 1) {
4670 ptr = PTR_IP(ip_len);
4671 } else if (ptr == PTR_IP6(ip6_nxt)) {
4672 ptr = PTR_IP(ip_p);
4673 } else if (ptr == PTR_IP6(ip6_hlim)) {
4674 ptr = PTR_IP(ip_ttl);
4675 } else if (ptr >= PTR_IP6(ip6_src) &&
4676 ptr < PTR_IP6(ip6_dst)) {
4677 ptr = PTR_IP(ip_src);
4678 } else if (ptr >= PTR_IP6(ip6_dst) &&
4679 ptr < (int32_t)sizeof(struct ip6_hdr)) {
4680 ptr = PTR_IP(ip_dst);
4681 } else {
4682 return -1;
4683 }
4684 break;
4685 case ICMP6_PARAMPROB_NEXTHEADER:
4686 type = ICMP_UNREACH;
4687 code = ICMP_UNREACH_PROTOCOL;
4688 break;
4689 default:
4690 return -1;
4691 }
4692 break;
4693 default:
4694 return -1;
4695 }
4696 icmp6->icmp6_type = type;
4697 icmp6->icmp6_code = code;
4698 /* aligns well with a icmpv4 nextmtu */
4699 icmp6->icmp6_mtu = htonl(mtu);
4700 /* icmpv4 pptr is a one most significant byte */
4701 if (ptr >= 0) {
4702 icmp6->icmp6_pptr = htonl(ptr << 24);
4703 }
4704 break;
4705
4706 case AF_INET6:
4707 icmp4 = (struct icmp* __single)arg;
4708 type = icmp4->icmp_type;
4709 code = icmp4->icmp_code;
4710 mtu = ntohs(icmp4->icmp_nextmtu);
4711
4712 switch (type) {
4713 case ICMP_ECHO:
4714 type = ICMP6_ECHO_REQUEST;
4715 break;
4716 case ICMP_ECHOREPLY:
4717 type = ICMP6_ECHO_REPLY;
4718 break;
4719 case ICMP_UNREACH:
4720 type = ICMP6_DST_UNREACH;
4721 switch (code) {
4722 case ICMP_UNREACH_NET:
4723 case ICMP_UNREACH_HOST:
4724 case ICMP_UNREACH_NET_UNKNOWN:
4725 case ICMP_UNREACH_HOST_UNKNOWN:
4726 case ICMP_UNREACH_ISOLATED:
4727 case ICMP_UNREACH_TOSNET:
4728 case ICMP_UNREACH_TOSHOST:
4729 code = ICMP6_DST_UNREACH_NOROUTE;
4730 break;
4731 case ICMP_UNREACH_PORT:
4732 code = ICMP6_DST_UNREACH_NOPORT;
4733 break;
4734 case ICMP_UNREACH_NET_PROHIB:
4735 case ICMP_UNREACH_HOST_PROHIB:
4736 case ICMP_UNREACH_FILTER_PROHIB:
4737 case ICMP_UNREACH_PRECEDENCE_CUTOFF:
4738 code = ICMP6_DST_UNREACH_ADMIN;
4739 break;
4740 case ICMP_UNREACH_PROTOCOL:
4741 type = ICMP6_PARAM_PROB;
4742 code = ICMP6_PARAMPROB_NEXTHEADER;
4743 ptr = offsetof(struct ip6_hdr, ip6_nxt);
4744 break;
4745 case ICMP_UNREACH_NEEDFRAG:
4746 type = ICMP6_PACKET_TOO_BIG;
4747 code = 0;
4748 mtu += 20;
4749 break;
4750 default:
4751 return -1;
4752 }
4753 break;
4754 case ICMP_TIMXCEED:
4755 type = ICMP6_TIME_EXCEEDED;
4756 break;
4757 case ICMP_PARAMPROB:
4758 type = ICMP6_PARAM_PROB;
4759 switch (code) {
4760 case ICMP_PARAMPROB_ERRATPTR:
4761 code = ICMP6_PARAMPROB_HEADER;
4762 break;
4763 case ICMP_PARAMPROB_LENGTH:
4764 code = ICMP6_PARAMPROB_HEADER;
4765 break;
4766 default:
4767 return -1;
4768 }
4769
4770 ptr = icmp4->icmp_pptr;
4771 if (ptr == 0 || ptr == PTR_IP(ip_tos)) {
4772 ; /* preserve */
4773 } else if (ptr == PTR_IP(ip_len) ||
4774 ptr == PTR_IP(ip_len) + 1) {
4775 ptr = PTR_IP6(ip6_plen);
4776 } else if (ptr == PTR_IP(ip_ttl)) {
4777 ptr = PTR_IP6(ip6_hlim);
4778 } else if (ptr == PTR_IP(ip_p)) {
4779 ptr = PTR_IP6(ip6_nxt);
4780 } else if (ptr >= PTR_IP(ip_src) &&
4781 ptr < PTR_IP(ip_dst)) {
4782 ptr = PTR_IP6(ip6_src);
4783 } else if (ptr >= PTR_IP(ip_dst) &&
4784 ptr < (int32_t)sizeof(struct ip)) {
4785 ptr = PTR_IP6(ip6_dst);
4786 } else {
4787 return -1;
4788 }
4789 break;
4790 default:
4791 return -1;
4792 }
4793 icmp4->icmp_type = type;
4794 icmp4->icmp_code = code;
4795 icmp4->icmp_nextmtu = htons(mtu);
4796 if (ptr >= 0) {
4797 icmp4->icmp_void = htonl(ptr);
4798 }
4799 break;
4800 }
4801
4802 return 0;
4803 }
4804
4805 /* Note: frees pbuf if PF_NAT64 is returned */
4806 static __attribute__((noinline)) int
pf_nat64_ipv6(pbuf_t * pbuf,int off,struct pf_pdesc * pd)4807 pf_nat64_ipv6(pbuf_t *pbuf, int off, struct pf_pdesc *pd)
4808 {
4809 struct ip *ip4;
4810 struct mbuf *m;
4811
4812 /*
4813 * ip_input asserts for rcvif to be not NULL
4814 * That may not be true for two corner cases
4815 * 1. If for some reason a local app sends DNS
4816 * AAAA query to local host
4817 * 2. If IPv6 stack in kernel internally generates a
4818 * message destined for a synthesized IPv6 end-point.
4819 */
4820 if (pbuf->pb_ifp == NULL) {
4821 return PF_DROP;
4822 }
4823
4824 ip4 = (struct ip *)pbuf_resize_segment(pbuf, 0, off, sizeof(*ip4));
4825 if (ip4 == NULL) {
4826 return PF_DROP;
4827 }
4828
4829 ip4->ip_v = 4;
4830 ip4->ip_hl = 5;
4831 ip4->ip_tos = pd->tos & htonl(0x0ff00000);
4832 ip4->ip_len = htons(sizeof(*ip4) + (pd->tot_len - off));
4833 ip4->ip_id = 0;
4834 ip4->ip_off = htons(IP_DF);
4835 ip4->ip_ttl = pd->ttl;
4836 ip4->ip_p = pd->proto;
4837 ip4->ip_sum = 0;
4838 ip4->ip_src = pd->naddr.v4addr;
4839 ip4->ip_dst = pd->ndaddr.v4addr;
4840 ip4->ip_sum = pbuf_inet_cksum(pbuf, 0, 0, ip4->ip_hl << 2);
4841
4842 /* recalculate icmp checksums */
4843 if (pd->proto == IPPROTO_ICMP) {
4844 struct icmp *icmp;
4845 int hlen = sizeof(*ip4);
4846
4847 icmp = (struct icmp *)pbuf_contig_segment(pbuf, hlen,
4848 ICMP_MINLEN);
4849 if (icmp == NULL) {
4850 return PF_DROP;
4851 }
4852
4853 icmp->icmp_cksum = 0;
4854 icmp->icmp_cksum = pbuf_inet_cksum(pbuf, 0, hlen,
4855 ntohs(ip4->ip_len) - hlen);
4856 }
4857
4858 if ((m = pbuf_to_mbuf(pbuf, TRUE)) != NULL) {
4859 ip_proto_input(AF_INET, m);
4860 }
4861
4862 return PF_NAT64;
4863 }
4864
4865 static __attribute__((noinline)) int
pf_nat64_ipv4(pbuf_t * pbuf,int off,struct pf_pdesc * pd)4866 pf_nat64_ipv4(pbuf_t *pbuf, int off, struct pf_pdesc *pd)
4867 {
4868 struct ip6_hdr *ip6;
4869 struct mbuf *m;
4870
4871 if (pbuf->pb_ifp == NULL) {
4872 return PF_DROP;
4873 }
4874
4875 ip6 = (struct ip6_hdr *)pbuf_resize_segment(pbuf, 0, off, sizeof(*ip6));
4876 if (ip6 == NULL) {
4877 return PF_DROP;
4878 }
4879
4880 ip6->ip6_vfc = htonl((6 << 28) | (pd->tos << 20));
4881 ip6->ip6_plen = htons(pd->tot_len - off);
4882 ip6->ip6_nxt = pd->proto;
4883 ip6->ip6_hlim = pd->ttl;
4884 ip6->ip6_src = pd->naddr.v6addr;
4885 ip6->ip6_dst = pd->ndaddr.v6addr;
4886
4887 /* recalculate icmp6 checksums */
4888 if (pd->proto == IPPROTO_ICMPV6) {
4889 struct icmp6_hdr *icmp6;
4890 int hlen = sizeof(*ip6);
4891
4892 icmp6 = (struct icmp6_hdr *)pbuf_contig_segment(pbuf, hlen,
4893 sizeof(*icmp6));
4894 if (icmp6 == NULL) {
4895 return PF_DROP;
4896 }
4897
4898 icmp6->icmp6_cksum = 0;
4899 icmp6->icmp6_cksum = pbuf_inet6_cksum(pbuf,
4900 IPPROTO_ICMPV6, hlen,
4901 ntohs(ip6->ip6_plen));
4902 } else if (pd->proto == IPPROTO_UDP) {
4903 struct udphdr *uh;
4904 int hlen = sizeof(*ip6);
4905
4906 uh = (struct udphdr *)pbuf_contig_segment(pbuf, hlen,
4907 sizeof(*uh));
4908 if (uh == NULL) {
4909 return PF_DROP;
4910 }
4911
4912 if (uh->uh_sum == 0) {
4913 uh->uh_sum = pbuf_inet6_cksum(pbuf, IPPROTO_UDP,
4914 hlen, ntohs(ip6->ip6_plen));
4915 }
4916 }
4917
4918 if ((m = pbuf_to_mbuf(pbuf, TRUE)) != NULL) {
4919 ip6_input(m);
4920 }
4921
4922 return PF_NAT64;
4923 }
4924
4925 static __attribute__((noinline)) int
pf_test_rule(struct pf_rule ** rm,struct pf_state ** sm,int direction,struct pfi_kif * kif,pbuf_t * pbuf,int off,void * h,struct pf_pdesc * pd,struct pf_rule ** am,struct pf_ruleset ** rsm,struct ifqueue * ifq)4926 pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
4927 struct pfi_kif *kif, pbuf_t *pbuf, int off, void *h,
4928 struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm,
4929 struct ifqueue *ifq)
4930 {
4931 #pragma unused(h)
4932 struct pf_rule *__single nr = NULL;
4933 struct pf_addr *__single saddr = pd->src, *__single daddr = pd->dst;
4934 sa_family_t af = pd->af;
4935 struct pf_rule *__single r, *__single a = NULL;
4936 struct pf_ruleset *__single ruleset = NULL;
4937 struct pf_src_node *__single nsn = NULL;
4938 struct tcphdr *__single th = pf_pd_get_hdr_tcp(pd);
4939 struct udphdr *__single uh = pf_pd_get_hdr_udp(pd);
4940 u_short reason;
4941 int rewrite = 0, hdrlen = 0;
4942 int tag = -1;
4943 unsigned int rtableid = IFSCOPE_NONE;
4944 int asd = 0;
4945 int match = 0;
4946 int state_icmp = 0;
4947 u_int16_t mss = tcp_mssdflt;
4948 u_int8_t icmptype = 0, icmpcode = 0;
4949 #if SKYWALK
4950 struct ns_token *__single nstoken = NULL;
4951 #endif
4952
4953 struct pf_grev1_hdr *__single grev1 = pf_pd_get_hdr_grev1(pd);
4954 union pf_state_xport bxport, bdxport, nxport, sxport, dxport;
4955 struct pf_state_key psk;
4956
4957 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
4958
4959 PD_CLEAR_STATE_FLOWID(pd);
4960
4961 if (direction == PF_IN && pf_check_congestion(ifq)) {
4962 REASON_SET(&reason, PFRES_CONGEST);
4963 return PF_DROP;
4964 }
4965
4966 hdrlen = 0;
4967 sxport.spi = 0;
4968 dxport.spi = 0;
4969 nxport.spi = 0;
4970
4971 switch (pd->proto) {
4972 case IPPROTO_TCP:
4973 sxport.port = th->th_sport;
4974 dxport.port = th->th_dport;
4975 hdrlen = sizeof(*th);
4976 break;
4977 case IPPROTO_UDP:
4978 sxport.port = uh->uh_sport;
4979 dxport.port = uh->uh_dport;
4980 hdrlen = sizeof(*uh);
4981 break;
4982 #if INET
4983 case IPPROTO_ICMP:
4984 if (pd->af != AF_INET) {
4985 break;
4986 }
4987 sxport.port = dxport.port = pf_pd_get_hdr_icmp(pd)->icmp_id;
4988 hdrlen = ICMP_MINLEN;
4989 icmptype = pf_pd_get_hdr_icmp(pd)->icmp_type;
4990 icmpcode = pf_pd_get_hdr_icmp(pd)->icmp_code;
4991
4992 if (ICMP_ERRORTYPE(icmptype)) {
4993 state_icmp++;
4994 }
4995 break;
4996 #endif /* INET */
4997 case IPPROTO_ICMPV6:
4998 if (pd->af != AF_INET6) {
4999 break;
5000 }
5001 sxport.port = dxport.port = pf_pd_get_hdr_icmp6(pd)->icmp6_id;
5002 hdrlen = sizeof(*pf_pd_get_hdr_icmp6(pd));
5003 icmptype = pf_pd_get_hdr_icmp6(pd)->icmp6_type;
5004 icmpcode = pf_pd_get_hdr_icmp6(pd)->icmp6_code;
5005
5006 if (ICMP6_ERRORTYPE(icmptype)) {
5007 state_icmp++;
5008 }
5009 break;
5010 case IPPROTO_GRE:
5011 if (pd->proto_variant == PF_GRE_PPTP_VARIANT) {
5012 sxport.call_id = dxport.call_id =
5013 pf_pd_get_hdr_grev1(pd)->call_id;
5014 hdrlen = sizeof(*pf_pd_get_hdr_grev1(pd));
5015 }
5016 break;
5017 case IPPROTO_ESP:
5018 sxport.spi = 0;
5019 dxport.spi = pf_pd_get_hdr_esp(pd)->spi;
5020 hdrlen = sizeof(*pf_pd_get_hdr_esp(pd));
5021 break;
5022 }
5023
5024 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
5025
5026 bxport = sxport;
5027 bdxport = dxport;
5028
5029 if (direction == PF_OUT) {
5030 nxport = sxport;
5031 } else {
5032 nxport = dxport;
5033 }
5034
5035 /* check packet for BINAT/NAT/RDR */
5036 if ((nr = pf_get_translation_aux(pd, pbuf, off, direction, kif, &nsn,
5037 saddr, &sxport, daddr, &dxport, &nxport
5038 #if SKYWALK
5039 , &nstoken
5040 #endif
5041 )) != NULL) {
5042 int ua;
5043 u_int16_t dport;
5044
5045 if (pd->af != pd->naf) {
5046 ua = 0;
5047 } else {
5048 ua = 1;
5049 }
5050
5051 PF_ACPY(&pd->baddr, saddr, af);
5052 PF_ACPY(&pd->bdaddr, daddr, af);
5053
5054 switch (pd->proto) {
5055 case IPPROTO_TCP:
5056 if (pd->af != pd->naf ||
5057 PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5058 pf_change_ap(direction, pd->mp, saddr,
5059 &th->th_sport, pd->ip_sum, &th->th_sum,
5060 &pd->naddr, nxport.port, 0, af,
5061 pd->naf, ua);
5062 sxport.port = th->th_sport;
5063 }
5064
5065 if (pd->af != pd->naf ||
5066 PF_ANEQ(daddr, &pd->ndaddr, pd->af) ||
5067 (nr && (nr->action == PF_RDR) &&
5068 (th->th_dport != nxport.port))) {
5069 if (nr && nr->action == PF_RDR) {
5070 dport = nxport.port;
5071 } else {
5072 dport = th->th_dport;
5073 }
5074 pf_change_ap(direction, pd->mp, daddr,
5075 &th->th_dport, pd->ip_sum,
5076 &th->th_sum, &pd->ndaddr,
5077 dport, 0, af, pd->naf, ua);
5078 dxport.port = th->th_dport;
5079 }
5080 rewrite++;
5081 break;
5082
5083 case IPPROTO_UDP:
5084 if (pd->af != pd->naf ||
5085 PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5086 pf_change_ap(direction, pd->mp, saddr,
5087 &uh->uh_sport, pd->ip_sum,
5088 &uh->uh_sum, &pd->naddr,
5089 nxport.port, 1, af, pd->naf, ua);
5090 sxport.port = uh->uh_sport;
5091 }
5092
5093 if (pd->af != pd->naf ||
5094 PF_ANEQ(daddr, &pd->ndaddr, pd->af) ||
5095 (nr && (nr->action == PF_RDR) &&
5096 (uh->uh_dport != nxport.port))) {
5097 if (nr && nr->action == PF_RDR) {
5098 dport = nxport.port;
5099 } else {
5100 dport = uh->uh_dport;
5101 }
5102 pf_change_ap(direction, pd->mp, daddr,
5103 &uh->uh_dport, pd->ip_sum,
5104 &uh->uh_sum, &pd->ndaddr,
5105 dport, 0, af, pd->naf, ua);
5106 dxport.port = uh->uh_dport;
5107 }
5108 rewrite++;
5109 break;
5110 #if INET
5111 case IPPROTO_ICMP:
5112 if (pd->af != AF_INET) {
5113 break;
5114 }
5115 /*
5116 * TODO:
5117 * pd->af != pd->naf not handled yet here and would be
5118 * needed for NAT46 needed to support XLAT.
5119 * Will cross the bridge when it comes.
5120 */
5121 if (PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5122 pf_change_a(&saddr->v4addr.s_addr, pd->ip_sum,
5123 pd->naddr.v4addr.s_addr, 0);
5124 pf_pd_get_hdr_icmp(pd)->icmp_cksum = pf_cksum_fixup(
5125 pf_pd_get_hdr_icmp(pd)->icmp_cksum, sxport.port,
5126 nxport.port, 0);
5127 pf_pd_get_hdr_icmp(pd)->icmp_id = nxport.port;
5128 }
5129
5130 if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
5131 pf_change_a(&daddr->v4addr.s_addr, pd->ip_sum,
5132 pd->ndaddr.v4addr.s_addr, 0);
5133 }
5134 ++rewrite;
5135 break;
5136 #endif /* INET */
5137 case IPPROTO_ICMPV6:
5138 if (pd->af != AF_INET6) {
5139 break;
5140 }
5141
5142 if (pd->af != pd->naf ||
5143 PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5144 pf_change_addr(saddr,
5145 &pf_pd_get_hdr_icmp6(pd)->icmp6_cksum,
5146 &pd->naddr, 0, pd->af, pd->naf);
5147 }
5148
5149 if (pd->af != pd->naf ||
5150 PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
5151 pf_change_addr(daddr,
5152 &pf_pd_get_hdr_icmp6(pd)->icmp6_cksum,
5153 &pd->ndaddr, 0, pd->af, pd->naf);
5154 }
5155
5156 if (pd->af != pd->naf) {
5157 if (pf_translate_icmp_af(AF_INET,
5158 pf_pd_get_hdr_icmp6(pd))) {
5159 return PF_DROP;
5160 }
5161 pd->proto = IPPROTO_ICMP;
5162 }
5163 rewrite++;
5164 break;
5165 case IPPROTO_GRE:
5166 if ((direction == PF_IN) &&
5167 (pd->proto_variant == PF_GRE_PPTP_VARIANT)) {
5168 grev1->call_id = nxport.call_id;
5169 }
5170
5171 switch (pd->af) {
5172 #if INET
5173 case AF_INET:
5174 if (PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5175 pf_change_a(&saddr->v4addr.s_addr,
5176 pd->ip_sum,
5177 pd->naddr.v4addr.s_addr, 0);
5178 }
5179 if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
5180 pf_change_a(&daddr->v4addr.s_addr,
5181 pd->ip_sum,
5182 pd->ndaddr.v4addr.s_addr, 0);
5183 }
5184 break;
5185 #endif /* INET */
5186 case AF_INET6:
5187 if (PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5188 PF_ACPY(saddr, &pd->naddr, AF_INET6);
5189 }
5190 if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
5191 PF_ACPY(daddr, &pd->ndaddr, AF_INET6);
5192 }
5193 break;
5194 }
5195 ++rewrite;
5196 break;
5197 case IPPROTO_ESP:
5198 if (direction == PF_OUT) {
5199 bxport.spi = 0;
5200 }
5201
5202 switch (pd->af) {
5203 #if INET
5204 case AF_INET:
5205 if (PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5206 pf_change_a(&saddr->v4addr.s_addr,
5207 pd->ip_sum, pd->naddr.v4addr.s_addr, 0);
5208 }
5209 if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
5210 pf_change_a(&daddr->v4addr.s_addr,
5211 pd->ip_sum,
5212 pd->ndaddr.v4addr.s_addr, 0);
5213 }
5214 break;
5215 #endif /* INET */
5216 case AF_INET6:
5217 if (PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5218 PF_ACPY(saddr, &pd->naddr, AF_INET6);
5219 }
5220 if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
5221 PF_ACPY(daddr, &pd->ndaddr, AF_INET6);
5222 }
5223 break;
5224 }
5225 break;
5226 default:
5227 switch (pd->af) {
5228 #if INET
5229 case AF_INET:
5230 if ((pd->naf != AF_INET) ||
5231 (PF_ANEQ(saddr, &pd->naddr, pd->af))) {
5232 pf_change_addr(saddr, pd->ip_sum,
5233 &pd->naddr, 0, af, pd->naf);
5234 }
5235
5236 if ((pd->naf != AF_INET) ||
5237 (PF_ANEQ(daddr, &pd->ndaddr, pd->af))) {
5238 pf_change_addr(daddr, pd->ip_sum,
5239 &pd->ndaddr, 0, af, pd->naf);
5240 }
5241 break;
5242 #endif /* INET */
5243 case AF_INET6:
5244 if (PF_ANEQ(saddr, &pd->naddr, pd->af)) {
5245 PF_ACPY(saddr, &pd->naddr, af);
5246 }
5247 if (PF_ANEQ(daddr, &pd->ndaddr, pd->af)) {
5248 PF_ACPY(daddr, &pd->ndaddr, af);
5249 }
5250 break;
5251 }
5252 break;
5253 }
5254
5255 if (nr->natpass) {
5256 r = NULL;
5257 }
5258 pd->nat_rule = nr;
5259 pd->af = pd->naf;
5260 } else {
5261 #if SKYWALK
5262 VERIFY(!NETNS_TOKEN_VALID(&nstoken));
5263 #endif
5264 }
5265
5266 if (nr && nr->tag > 0) {
5267 tag = nr->tag;
5268 }
5269
5270 while (r != NULL) {
5271 r->evaluations++;
5272 if (pfi_kif_match(r->kif, kif) == r->ifnot) {
5273 r = r->skip[PF_SKIP_IFP].ptr;
5274 } else if (r->direction && r->direction != direction) {
5275 r = r->skip[PF_SKIP_DIR].ptr;
5276 } else if (r->af && r->af != pd->af) {
5277 r = r->skip[PF_SKIP_AF].ptr;
5278 } else if (r->proto && r->proto != pd->proto) {
5279 r = r->skip[PF_SKIP_PROTO].ptr;
5280 } else if (PF_MISMATCHAW(&r->src.addr, saddr, pd->af,
5281 r->src.neg, kif)) {
5282 r = r->skip[PF_SKIP_SRC_ADDR].ptr;
5283 }
5284 /* tcp/udp only. port_op always 0 in other cases */
5285 else if (r->proto == pd->proto &&
5286 (r->proto == IPPROTO_TCP || r->proto == IPPROTO_UDP) &&
5287 r->src.xport.range.op &&
5288 !pf_match_port(r->src.xport.range.op,
5289 r->src.xport.range.port[0], r->src.xport.range.port[1],
5290 th->th_sport)) {
5291 r = r->skip[PF_SKIP_SRC_PORT].ptr;
5292 } else if (PF_MISMATCHAW(&r->dst.addr, daddr, pd->af,
5293 r->dst.neg, NULL)) {
5294 r = r->skip[PF_SKIP_DST_ADDR].ptr;
5295 }
5296 /* tcp/udp only. port_op always 0 in other cases */
5297 else if (r->proto == pd->proto &&
5298 (r->proto == IPPROTO_TCP || r->proto == IPPROTO_UDP) &&
5299 r->dst.xport.range.op &&
5300 !pf_match_port(r->dst.xport.range.op,
5301 r->dst.xport.range.port[0], r->dst.xport.range.port[1],
5302 th->th_dport)) {
5303 r = r->skip[PF_SKIP_DST_PORT].ptr;
5304 }
5305 /* icmp only. type always 0 in other cases */
5306 else if (r->type && r->type != icmptype + 1) {
5307 r = TAILQ_NEXT(r, entries);
5308 }
5309 /* icmp only. type always 0 in other cases */
5310 else if (r->code && r->code != icmpcode + 1) {
5311 r = TAILQ_NEXT(r, entries);
5312 } else if ((r->rule_flag & PFRULE_TOS) && r->tos &&
5313 !(r->tos & pd->tos)) {
5314 r = TAILQ_NEXT(r, entries);
5315 } else if ((r->rule_flag & PFRULE_DSCP) && r->tos &&
5316 !(r->tos & (pd->tos & DSCP_MASK))) {
5317 r = TAILQ_NEXT(r, entries);
5318 } else if ((r->rule_flag & PFRULE_SC) && r->tos &&
5319 ((r->tos & SCIDX_MASK) != pd->sc)) {
5320 r = TAILQ_NEXT(r, entries);
5321 } else if (r->rule_flag & PFRULE_FRAGMENT) {
5322 r = TAILQ_NEXT(r, entries);
5323 } else if (pd->proto == IPPROTO_TCP &&
5324 (r->flagset & th->th_flags) != r->flags) {
5325 r = TAILQ_NEXT(r, entries);
5326 }
5327 /* tcp/udp only. uid.op always 0 in other cases */
5328 else if (r->uid.op && (pd->lookup.done || ((void)(pd->lookup.done =
5329 pf_socket_lookup(direction, pd)), 1)) &&
5330 !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
5331 pd->lookup.uid)) {
5332 r = TAILQ_NEXT(r, entries);
5333 }
5334 /* tcp/udp only. gid.op always 0 in other cases */
5335 else if (r->gid.op && (pd->lookup.done || ((void)(pd->lookup.done =
5336 pf_socket_lookup(direction, pd)), 1)) &&
5337 !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
5338 pd->lookup.gid)) {
5339 r = TAILQ_NEXT(r, entries);
5340 } else if (r->prob && r->prob <= (RandomULong() % (UINT_MAX - 1) + 1)) {
5341 r = TAILQ_NEXT(r, entries);
5342 } else if (r->match_tag && !pf_match_tag(r, pd->pf_mtag, &tag)) {
5343 r = TAILQ_NEXT(r, entries);
5344 } else if (r->os_fingerprint != PF_OSFP_ANY &&
5345 (pd->proto != IPPROTO_TCP || !pf_osfp_match(
5346 pf_osfp_fingerprint(pd, pbuf, off, th),
5347 r->os_fingerprint))) {
5348 r = TAILQ_NEXT(r, entries);
5349 } else {
5350 if (r->tag) {
5351 tag = r->tag;
5352 }
5353 if (PF_RTABLEID_IS_VALID(r->rtableid)) {
5354 rtableid = r->rtableid;
5355 }
5356 if (r->anchor == NULL) {
5357 match = 1;
5358 *rm = r;
5359 *am = a;
5360 *rsm = ruleset;
5361 if ((*rm)->quick) {
5362 break;
5363 }
5364 r = TAILQ_NEXT(r, entries);
5365 } else {
5366 pf_step_into_anchor(&asd, &ruleset,
5367 PF_RULESET_FILTER, &r, &a, &match);
5368 }
5369 }
5370 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
5371 PF_RULESET_FILTER, &r, &a, &match)) {
5372 break;
5373 }
5374 }
5375 r = *rm;
5376 a = *am;
5377 ruleset = *rsm;
5378
5379 REASON_SET(&reason, PFRES_MATCH);
5380
5381 if (r->log || (nr != NULL && nr->log)) {
5382 if (rewrite > 0) {
5383 if (rewrite < off + pd->hdrlen) {
5384 rewrite = off + pd->hdrlen;
5385 }
5386
5387 if (pf_lazy_makewritable(pd, pbuf, rewrite) == NULL) {
5388 REASON_SET(&reason, PFRES_MEMORY);
5389 #if SKYWALK
5390 netns_release(&nstoken);
5391 #endif
5392 return PF_DROP;
5393 }
5394 pbuf_copy_back(pbuf, off, pd->hdrlen, pf_pd_get_hdr_ptr_any(pd), pd->hdrlen);
5395 }
5396 PFLOG_PACKET(kif, h, pbuf, pd->af, direction, reason,
5397 r->log ? r : nr, a, ruleset, pd);
5398 }
5399
5400 if ((r->action == PF_DROP) &&
5401 ((r->rule_flag & PFRULE_RETURNRST) ||
5402 (r->rule_flag & PFRULE_RETURNICMP) ||
5403 (r->rule_flag & PFRULE_RETURN))) {
5404 /* undo NAT changes, if they have taken place */
5405 /* XXX For NAT64 we are not reverting the changes */
5406 if (nr != NULL && nr->action != PF_NAT64) {
5407 if (direction == PF_OUT) {
5408 pd->af = af;
5409 switch (pd->proto) {
5410 case IPPROTO_TCP:
5411 pf_change_ap(direction, pd->mp, saddr,
5412 &th->th_sport, pd->ip_sum,
5413 &th->th_sum, &pd->baddr,
5414 bxport.port, 0, af, pd->af, 1);
5415 sxport.port = th->th_sport;
5416 rewrite++;
5417 break;
5418 case IPPROTO_UDP:
5419 pf_change_ap(direction, pd->mp, saddr,
5420 &pf_pd_get_hdr_udp(pd)->uh_sport, pd->ip_sum,
5421 &pf_pd_get_hdr_udp(pd)->uh_sum, &pd->baddr,
5422 bxport.port, 1, af, pd->af, 1);
5423 sxport.port = pf_pd_get_hdr_udp(pd)->uh_sport;
5424 rewrite++;
5425 break;
5426 case IPPROTO_ICMP:
5427 case IPPROTO_ICMPV6:
5428 /* nothing! */
5429 break;
5430 case IPPROTO_GRE:
5431 PF_ACPY(&pd->baddr, saddr, af);
5432 ++rewrite;
5433 switch (af) {
5434 #if INET
5435 case AF_INET:
5436 pf_change_a(&saddr->v4addr.s_addr,
5437 pd->ip_sum,
5438 pd->baddr.v4addr.s_addr, 0);
5439 break;
5440 #endif /* INET */
5441 case AF_INET6:
5442 PF_ACPY(saddr, &pd->baddr,
5443 AF_INET6);
5444 break;
5445 }
5446 break;
5447 case IPPROTO_ESP:
5448 PF_ACPY(&pd->baddr, saddr, af);
5449 switch (af) {
5450 #if INET
5451 case AF_INET:
5452 pf_change_a(&saddr->v4addr.s_addr,
5453 pd->ip_sum,
5454 pd->baddr.v4addr.s_addr, 0);
5455 break;
5456 #endif /* INET */
5457 case AF_INET6:
5458 PF_ACPY(saddr, &pd->baddr,
5459 AF_INET6);
5460 break;
5461 }
5462 break;
5463 default:
5464 switch (af) {
5465 case AF_INET:
5466 pf_change_a(&saddr->v4addr.s_addr,
5467 pd->ip_sum,
5468 pd->baddr.v4addr.s_addr, 0);
5469 break;
5470 case AF_INET6:
5471 PF_ACPY(saddr, &pd->baddr, af);
5472 break;
5473 }
5474 }
5475 } else {
5476 switch (pd->proto) {
5477 case IPPROTO_TCP:
5478 pf_change_ap(direction, pd->mp, daddr,
5479 &th->th_dport, pd->ip_sum,
5480 &th->th_sum, &pd->bdaddr,
5481 bdxport.port, 0, af, pd->af, 1);
5482 dxport.port = th->th_dport;
5483 rewrite++;
5484 break;
5485 case IPPROTO_UDP:
5486 pf_change_ap(direction, pd->mp, daddr,
5487 &pf_pd_get_hdr_udp(pd)->uh_dport, pd->ip_sum,
5488 &pf_pd_get_hdr_udp(pd)->uh_sum, &pd->bdaddr,
5489 bdxport.port, 1, af, pd->af, 1);
5490 dxport.port = pf_pd_get_hdr_udp(pd)->uh_dport;
5491 rewrite++;
5492 break;
5493 case IPPROTO_ICMP:
5494 case IPPROTO_ICMPV6:
5495 /* nothing! */
5496 break;
5497 case IPPROTO_GRE:
5498 if (pd->proto_variant ==
5499 PF_GRE_PPTP_VARIANT) {
5500 grev1->call_id =
5501 bdxport.call_id;
5502 }
5503 ++rewrite;
5504 switch (af) {
5505 #if INET
5506 case AF_INET:
5507 pf_change_a(&daddr->v4addr.s_addr,
5508 pd->ip_sum,
5509 pd->bdaddr.v4addr.s_addr, 0);
5510 break;
5511 #endif /* INET */
5512 case AF_INET6:
5513 PF_ACPY(daddr, &pd->bdaddr,
5514 AF_INET6);
5515 break;
5516 }
5517 break;
5518 case IPPROTO_ESP:
5519 switch (af) {
5520 #if INET
5521 case AF_INET:
5522 pf_change_a(&daddr->v4addr.s_addr,
5523 pd->ip_sum,
5524 pd->bdaddr.v4addr.s_addr, 0);
5525 break;
5526 #endif /* INET */
5527 case AF_INET6:
5528 PF_ACPY(daddr, &pd->bdaddr,
5529 AF_INET6);
5530 break;
5531 }
5532 break;
5533 default:
5534 switch (af) {
5535 case AF_INET:
5536 pf_change_a(&daddr->v4addr.s_addr,
5537 pd->ip_sum,
5538 pd->bdaddr.v4addr.s_addr, 0);
5539 break;
5540 case AF_INET6:
5541 PF_ACPY(daddr, &pd->bdaddr, af);
5542 break;
5543 }
5544 }
5545 }
5546 }
5547 if (pd->proto == IPPROTO_TCP &&
5548 ((r->rule_flag & PFRULE_RETURNRST) ||
5549 (r->rule_flag & PFRULE_RETURN)) &&
5550 !(th->th_flags & TH_RST)) {
5551 u_int32_t ack = ntohl(th->th_seq) + pd->p_len;
5552 int len = 0;
5553 struct ip *__single h4;
5554 struct ip6_hdr *__single h6;
5555
5556 switch (pd->af) {
5557 case AF_INET:
5558 h4 = pbuf->pb_data;
5559 len = ntohs(h4->ip_len) - off;
5560 break;
5561 case AF_INET6:
5562 h6 = pbuf->pb_data;
5563 len = ntohs(h6->ip6_plen) -
5564 (off - sizeof(*h6));
5565 break;
5566 }
5567
5568 if (pf_check_proto_cksum(pbuf, off, len, IPPROTO_TCP,
5569 pd->af)) {
5570 REASON_SET(&reason, PFRES_PROTCKSUM);
5571 } else {
5572 if (th->th_flags & TH_SYN) {
5573 ack++;
5574 }
5575 if (th->th_flags & TH_FIN) {
5576 ack++;
5577 }
5578 pf_send_tcp(r, pd->af, pd->dst,
5579 pd->src, th->th_dport, th->th_sport,
5580 ntohl(th->th_ack), ack, TH_RST | TH_ACK, 0, 0,
5581 r->return_ttl, 1, 0, pd->eh, kif->pfik_ifp);
5582 }
5583 } else if (pd->proto != IPPROTO_ICMP && pd->af == AF_INET &&
5584 pd->proto != IPPROTO_ESP && pd->proto != IPPROTO_AH &&
5585 r->return_icmp) {
5586 pf_send_icmp(pbuf, r->return_icmp >> 8,
5587 r->return_icmp & 255, pd->af, r);
5588 } else if (pd->proto != IPPROTO_ICMPV6 && af == AF_INET6 &&
5589 pd->proto != IPPROTO_ESP && pd->proto != IPPROTO_AH &&
5590 r->return_icmp6) {
5591 pf_send_icmp(pbuf, r->return_icmp6 >> 8,
5592 r->return_icmp6 & 255, pd->af, r);
5593 }
5594 }
5595
5596 if (r->action == PF_DROP) {
5597 #if SKYWALK
5598 netns_release(&nstoken);
5599 #endif
5600 return PF_DROP;
5601 }
5602
5603 /* prepare state key, for flowhash and/or the state (if created) */
5604 bzero(&psk, sizeof(psk));
5605 psk.proto = pd->proto;
5606 psk.direction = direction;
5607 if (pd->proto == IPPROTO_UDP) {
5608 if (ntohs(pf_pd_get_hdr_udp(pd)->uh_sport) == PF_IKE_PORT &&
5609 ntohs(pf_pd_get_hdr_udp(pd)->uh_dport) == PF_IKE_PORT) {
5610 psk.proto_variant = PF_EXTFILTER_APD;
5611 } else {
5612 psk.proto_variant = nr ? nr->extfilter : r->extfilter;
5613 if (psk.proto_variant < PF_EXTFILTER_APD) {
5614 psk.proto_variant = PF_EXTFILTER_APD;
5615 }
5616 }
5617 } else if (pd->proto == IPPROTO_GRE) {
5618 psk.proto_variant = pd->proto_variant;
5619 }
5620 if (direction == PF_OUT) {
5621 psk.af_gwy = af;
5622 PF_ACPY(&psk.gwy.addr, saddr, af);
5623 PF_ACPY(&psk.ext_gwy.addr, daddr, af);
5624 switch (pd->proto) {
5625 case IPPROTO_ESP:
5626 psk.gwy.xport.spi = 0;
5627 psk.ext_gwy.xport.spi = pf_pd_get_hdr_esp(pd)->spi;
5628 break;
5629 case IPPROTO_ICMP:
5630 case IPPROTO_ICMPV6:
5631 /*
5632 * NAT64 requires protocol translation between ICMPv4
5633 * and ICMPv6. TCP and UDP do not require protocol
5634 * translation. To avoid adding complexity just to
5635 * handle ICMP(v4addr/v6addr), we always lookup for
5636 * proto = IPPROTO_ICMP on both LAN and WAN side
5637 */
5638 psk.proto = IPPROTO_ICMP;
5639 psk.gwy.xport.port = nxport.port;
5640 psk.ext_gwy.xport.spi = 0;
5641 break;
5642 default:
5643 psk.gwy.xport = sxport;
5644 psk.ext_gwy.xport = dxport;
5645 break;
5646 }
5647 psk.af_lan = af;
5648 if (nr != NULL) {
5649 PF_ACPY(&psk.lan.addr, &pd->baddr, af);
5650 psk.lan.xport = bxport;
5651 PF_ACPY(&psk.ext_lan.addr, &pd->bdaddr, af);
5652 psk.ext_lan.xport = bdxport;
5653 } else {
5654 PF_ACPY(&psk.lan.addr, &psk.gwy.addr, af);
5655 psk.lan.xport = psk.gwy.xport;
5656 PF_ACPY(&psk.ext_lan.addr, &psk.ext_gwy.addr, af);
5657 psk.ext_lan.xport = psk.ext_gwy.xport;
5658 }
5659 } else {
5660 psk.af_lan = af;
5661 if (nr && nr->action == PF_NAT64) {
5662 PF_ACPY(&psk.lan.addr, &pd->baddr, af);
5663 PF_ACPY(&psk.ext_lan.addr, &pd->bdaddr, af);
5664 } else {
5665 PF_ACPY(&psk.lan.addr, daddr, af);
5666 PF_ACPY(&psk.ext_lan.addr, saddr, af);
5667 }
5668 switch (pd->proto) {
5669 case IPPROTO_ICMP:
5670 case IPPROTO_ICMPV6:
5671 /*
5672 * NAT64 requires protocol translation between ICMPv4
5673 * and ICMPv6. TCP and UDP do not require protocol
5674 * translation. To avoid adding complexity just to
5675 * handle ICMP(v4addr/v6addr), we always lookup for
5676 * proto = IPPROTO_ICMP on both LAN and WAN side
5677 */
5678 psk.proto = IPPROTO_ICMP;
5679 if (nr && nr->action == PF_NAT64) {
5680 psk.lan.xport = bxport;
5681 psk.ext_lan.xport = bxport;
5682 } else {
5683 psk.lan.xport = nxport;
5684 psk.ext_lan.xport.spi = 0;
5685 }
5686 break;
5687 case IPPROTO_ESP:
5688 psk.ext_lan.xport.spi = 0;
5689 psk.lan.xport.spi = pf_pd_get_hdr_esp(pd)->spi;
5690 break;
5691 default:
5692 if (nr != NULL) {
5693 if (nr->action == PF_NAT64) {
5694 psk.lan.xport = bxport;
5695 psk.ext_lan.xport = bdxport;
5696 } else {
5697 psk.lan.xport = dxport;
5698 psk.ext_lan.xport = sxport;
5699 }
5700 } else {
5701 psk.lan.xport = dxport;
5702 psk.ext_lan.xport = sxport;
5703 }
5704 break;
5705 }
5706 psk.af_gwy = pd->naf;
5707 if (nr != NULL) {
5708 if (nr->action == PF_NAT64) {
5709 PF_ACPY(&psk.gwy.addr, &pd->naddr, pd->naf);
5710 PF_ACPY(&psk.ext_gwy.addr, &pd->ndaddr,
5711 pd->naf);
5712 if ((pd->proto == IPPROTO_ICMPV6) ||
5713 (pd->proto == IPPROTO_ICMP)) {
5714 psk.gwy.xport = nxport;
5715 psk.ext_gwy.xport = nxport;
5716 } else {
5717 psk.gwy.xport = sxport;
5718 psk.ext_gwy.xport = dxport;
5719 }
5720 } else {
5721 PF_ACPY(&psk.gwy.addr, &pd->bdaddr, af);
5722 psk.gwy.xport = bdxport;
5723 PF_ACPY(&psk.ext_gwy.addr, saddr, af);
5724 psk.ext_gwy.xport = sxport;
5725 }
5726 } else {
5727 PF_ACPY(&psk.gwy.addr, &psk.lan.addr, af);
5728 psk.gwy.xport = psk.lan.xport;
5729 PF_ACPY(&psk.ext_gwy.addr, &psk.ext_lan.addr, af);
5730 psk.ext_gwy.xport = psk.ext_lan.xport;
5731 }
5732 }
5733 if (pd->pktflags & PKTF_FLOW_ID) {
5734 /* flow hash was already computed outside of PF */
5735 psk.flowsrc = pd->flowsrc;
5736 psk.flowhash = pd->flowhash;
5737 } else {
5738 /*
5739 * Allocation of flow identifier is deferred until a PF state
5740 * creation is needed for this flow.
5741 */
5742 pd->pktflags &= ~PKTF_FLOW_ADV;
5743 pd->flowhash = 0;
5744 }
5745
5746 if (__improbable(pf_tag_packet(pbuf, pd->pf_mtag, tag, rtableid, pd))) {
5747 REASON_SET(&reason, PFRES_MEMORY);
5748 #if SKYWALK
5749 netns_release(&nstoken);
5750 #endif
5751 return PF_DROP;
5752 }
5753
5754 if (!state_icmp && (r->keep_state || nr != NULL ||
5755 (pd->flags & PFDESC_TCP_NORM))) {
5756 /* create new state */
5757 struct pf_state *__single s = NULL;
5758 struct pf_state_key *__single sk = NULL;
5759 struct pf_src_node *__single sn = NULL;
5760 struct pf_ike_hdr ike;
5761
5762 if (pd->proto == IPPROTO_UDP) {
5763 size_t plen = pbuf->pb_packet_len - off - sizeof(*uh);
5764
5765 if (ntohs(uh->uh_sport) == PF_IKE_PORT &&
5766 ntohs(uh->uh_dport) == PF_IKE_PORT &&
5767 plen >= PF_IKE_PACKET_MINSIZE) {
5768 if (plen > PF_IKE_PACKET_MINSIZE) {
5769 plen = PF_IKE_PACKET_MINSIZE;
5770 }
5771 pbuf_copy_data(pbuf, off + sizeof(*uh), plen,
5772 &ike, sizeof(ike));
5773 }
5774 }
5775
5776 if (nr != NULL && pd->proto == IPPROTO_ESP &&
5777 direction == PF_OUT) {
5778 struct pf_state_key_cmp sk0;
5779 struct pf_state *s0;
5780
5781 /*
5782 * <[email protected]>
5783 * This squelches state creation if the external
5784 * address matches an existing incomplete state with a
5785 * different internal address. Only one 'blocking'
5786 * partial state is allowed for each external address.
5787 */
5788 #if SKYWALK
5789 /*
5790 * XXXSCW:
5791 *
5792 * It's not clear how this impacts netns. The original
5793 * state will hold the port reservation token but what
5794 * happens to other "Cone NAT" states when the first is
5795 * torn down?
5796 */
5797 #endif
5798 memset(&sk0, 0, sizeof(sk0));
5799 sk0.af_gwy = pd->af;
5800 sk0.proto = IPPROTO_ESP;
5801 PF_ACPY(&sk0.gwy.addr, saddr, sk0.af_gwy);
5802 PF_ACPY(&sk0.ext_gwy.addr, daddr, sk0.af_gwy);
5803 s0 = pf_find_state(kif, &sk0, PF_IN);
5804
5805 if (s0 && PF_ANEQ(&s0->state_key->lan.addr,
5806 pd->src, pd->af)) {
5807 nsn = 0;
5808 goto cleanup;
5809 }
5810 }
5811
5812 /* check maximums */
5813 if (r->max_states && (r->states >= r->max_states)) {
5814 pf_status.lcounters[LCNT_STATES]++;
5815 REASON_SET(&reason, PFRES_MAXSTATES);
5816 goto cleanup;
5817 }
5818 /* src node for filter rule */
5819 if ((r->rule_flag & PFRULE_SRCTRACK ||
5820 r->rpool.opts & PF_POOL_STICKYADDR) &&
5821 pf_insert_src_node(&sn, r, saddr, af) != 0) {
5822 REASON_SET(&reason, PFRES_SRCLIMIT);
5823 goto cleanup;
5824 }
5825 /* src node for translation rule */
5826 if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
5827 ((direction == PF_OUT &&
5828 nr->action != PF_RDR &&
5829 pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) ||
5830 (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) {
5831 REASON_SET(&reason, PFRES_SRCLIMIT);
5832 goto cleanup;
5833 }
5834 s = pool_get(&pf_state_pl, PR_WAITOK);
5835 if (s == NULL) {
5836 REASON_SET(&reason, PFRES_MEMORY);
5837 cleanup:
5838 if (sn != NULL && sn->states == 0 && sn->expire == 0) {
5839 RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
5840 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
5841 pf_status.src_nodes--;
5842 pool_put(&pf_src_tree_pl, sn);
5843 }
5844 if (nsn != sn && nsn != NULL && nsn->states == 0 &&
5845 nsn->expire == 0) {
5846 RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn);
5847 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
5848 pf_status.src_nodes--;
5849 pool_put(&pf_src_tree_pl, nsn);
5850 }
5851 if (s != NULL) {
5852 pf_detach_state(s, 0);
5853 } else if (sk != NULL) {
5854 if (sk->app_state) {
5855 pool_put(&pf_app_state_pl,
5856 sk->app_state);
5857 }
5858 pf_state_key_release_flowid(sk);
5859 pool_put(&pf_state_key_pl, sk);
5860 }
5861 #if SKYWALK
5862 netns_release(&nstoken);
5863 #endif
5864 return PF_DROP;
5865 }
5866 bzero(s, sizeof(*s));
5867 TAILQ_INIT(&s->unlink_hooks);
5868 s->rule.ptr = r;
5869 s->nat_rule.ptr = nr;
5870 s->anchor.ptr = a;
5871 STATE_INC_COUNTERS(s);
5872 s->allow_opts = r->allow_opts;
5873 s->log = r->log & PF_LOG_ALL;
5874 if (nr != NULL) {
5875 s->log |= nr->log & PF_LOG_ALL;
5876 }
5877 switch (pd->proto) {
5878 case IPPROTO_TCP:
5879 s->src.seqlo = ntohl(th->th_seq);
5880 s->src.seqhi = s->src.seqlo + pd->p_len + 1;
5881 if ((th->th_flags & (TH_SYN | TH_ACK)) ==
5882 TH_SYN && r->keep_state == PF_STATE_MODULATE) {
5883 /* Generate sequence number modulator */
5884 if ((s->src.seqdiff = pf_tcp_iss(pd) -
5885 s->src.seqlo) == 0) {
5886 s->src.seqdiff = 1;
5887 }
5888 pf_change_a(&th->th_seq, &th->th_sum,
5889 htonl(s->src.seqlo + s->src.seqdiff), 0);
5890 rewrite = off + sizeof(*th);
5891 } else {
5892 s->src.seqdiff = 0;
5893 }
5894 if (th->th_flags & TH_SYN) {
5895 s->src.seqhi++;
5896 s->src.wscale = pf_get_wscale(pbuf, off,
5897 th->th_off, af);
5898 }
5899 s->src.max_win = MAX(ntohs(th->th_win), 1);
5900 if (s->src.wscale & PF_WSCALE_MASK) {
5901 /* Remove scale factor from initial window */
5902 int win = s->src.max_win;
5903 win += 1 << (s->src.wscale & PF_WSCALE_MASK);
5904 s->src.max_win = (win - 1) >>
5905 (s->src.wscale & PF_WSCALE_MASK);
5906 }
5907 if (th->th_flags & TH_FIN) {
5908 s->src.seqhi++;
5909 }
5910 s->dst.seqhi = 1;
5911 s->dst.max_win = 1;
5912 s->src.state = TCPS_SYN_SENT;
5913 s->dst.state = TCPS_CLOSED;
5914 s->timeout = PFTM_TCP_FIRST_PACKET;
5915 break;
5916 case IPPROTO_UDP:
5917 s->src.state = PFUDPS_SINGLE;
5918 s->dst.state = PFUDPS_NO_TRAFFIC;
5919 s->timeout = PFTM_UDP_FIRST_PACKET;
5920 break;
5921 case IPPROTO_ICMP:
5922 case IPPROTO_ICMPV6:
5923 s->timeout = PFTM_ICMP_FIRST_PACKET;
5924 break;
5925 case IPPROTO_GRE:
5926 s->src.state = PFGRE1S_INITIATING;
5927 s->dst.state = PFGRE1S_NO_TRAFFIC;
5928 s->timeout = PFTM_GREv1_INITIATING;
5929 break;
5930 case IPPROTO_ESP:
5931 s->src.state = PFESPS_INITIATING;
5932 s->dst.state = PFESPS_NO_TRAFFIC;
5933 s->timeout = PFTM_ESP_FIRST_PACKET;
5934 break;
5935 default:
5936 s->src.state = PFOTHERS_SINGLE;
5937 s->dst.state = PFOTHERS_NO_TRAFFIC;
5938 s->timeout = PFTM_OTHER_FIRST_PACKET;
5939 }
5940
5941 s->creation = pf_time_second();
5942 s->expire = pf_time_second();
5943
5944 if (sn != NULL) {
5945 s->src_node = sn;
5946 s->src_node->states++;
5947 VERIFY(s->src_node->states != 0);
5948 }
5949 if (nsn != NULL) {
5950 PF_ACPY(&nsn->raddr, &pd->naddr, af);
5951 s->nat_src_node = nsn;
5952 s->nat_src_node->states++;
5953 VERIFY(s->nat_src_node->states != 0);
5954 }
5955 if (pd->proto == IPPROTO_TCP) {
5956 if ((pd->flags & PFDESC_TCP_NORM) &&
5957 pf_normalize_tcp_init(pbuf, off, pd, th, &s->src,
5958 &s->dst)) {
5959 REASON_SET(&reason, PFRES_MEMORY);
5960 pf_src_tree_remove_state(s);
5961 STATE_DEC_COUNTERS(s);
5962 #if SKYWALK
5963 netns_release(&nstoken);
5964 #endif
5965 pool_put(&pf_state_pl, s);
5966 return PF_DROP;
5967 }
5968 if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub &&
5969 pf_normalize_tcp_stateful(pbuf, off, pd, &reason,
5970 th, s, &s->src, &s->dst, &rewrite)) {
5971 /* This really shouldn't happen!!! */
5972 DPFPRINTF(PF_DEBUG_URGENT,
5973 ("pf_normalize_tcp_stateful failed on "
5974 "first pkt"));
5975 #if SKYWALK
5976 netns_release(&nstoken);
5977 #endif
5978 pf_normalize_tcp_cleanup(s);
5979 pf_src_tree_remove_state(s);
5980 STATE_DEC_COUNTERS(s);
5981 pool_put(&pf_state_pl, s);
5982 return PF_DROP;
5983 }
5984 }
5985
5986 /* allocate state key and import values from psk */
5987 if (__improbable((sk = pf_alloc_state_key(s, &psk)) == NULL)) {
5988 REASON_SET(&reason, PFRES_MEMORY);
5989 /*
5990 * XXXSCW: This will leak the freshly-allocated
5991 * state structure 's'. Although it should
5992 * eventually be aged-out and removed.
5993 */
5994 goto cleanup;
5995 }
5996
5997 if (pd->flowhash == 0) {
5998 ASSERT(sk->flowhash != 0);
5999 ASSERT(sk->flowsrc != 0);
6000 pd->flowsrc = sk->flowsrc;
6001 pd->flowhash = sk->flowhash;
6002 pd->pktflags |= PKTF_FLOW_ID;
6003 pd->pktflags &= ~PKTF_FLOW_ADV;
6004 if (__improbable(pf_tag_packet(pbuf, pd->pf_mtag,
6005 tag, rtableid, pd))) {
6006 /*
6007 * this shouldn't fail as the packet tag has
6008 * already been allocated.
6009 */
6010 panic_plain("pf_tag_packet failed");
6011 }
6012 }
6013
6014 pf_set_rt_ifp(s, saddr, af); /* needs s->state_key set */
6015
6016 pbuf = pd->mp; // XXXSCW: Why?
6017
6018 if (sk->app_state == 0) {
6019 switch (pd->proto) {
6020 case IPPROTO_TCP: {
6021 u_int16_t dport = (direction == PF_OUT) ?
6022 sk->ext_gwy.xport.port : sk->gwy.xport.port;
6023
6024 if (nr != NULL &&
6025 ntohs(dport) == PF_PPTP_PORT) {
6026 struct pf_app_state *__single as;
6027
6028 as = pool_get(&pf_app_state_pl,
6029 PR_WAITOK);
6030 if (!as) {
6031 REASON_SET(&reason,
6032 PFRES_MEMORY);
6033 goto cleanup;
6034 }
6035
6036 bzero(as, sizeof(*as));
6037 as->handler = pf_pptp_handler;
6038 as->compare_lan_ext = 0;
6039 as->compare_ext_gwy = 0;
6040 as->u.pptp.grev1_state = 0;
6041 sk->app_state = as;
6042 (void) hook_establish(&s->unlink_hooks,
6043 0, (hook_fn_t) pf_pptp_unlink, s);
6044 }
6045 break;
6046 }
6047
6048 case IPPROTO_UDP: {
6049 if (nr != NULL &&
6050 ntohs(uh->uh_sport) == PF_IKE_PORT &&
6051 ntohs(uh->uh_dport) == PF_IKE_PORT) {
6052 struct pf_app_state *__single as;
6053
6054 as = pool_get(&pf_app_state_pl,
6055 PR_WAITOK);
6056 if (!as) {
6057 REASON_SET(&reason,
6058 PFRES_MEMORY);
6059 goto cleanup;
6060 }
6061
6062 bzero(as, sizeof(*as));
6063 as->compare_lan_ext = pf_ike_compare;
6064 as->compare_ext_gwy = pf_ike_compare;
6065 as->u.ike.cookie = ike.initiator_cookie;
6066 sk->app_state = as;
6067 }
6068 break;
6069 }
6070
6071 default:
6072 break;
6073 }
6074 }
6075
6076 if (__improbable(pf_insert_state(BOUND_IFACE(r, kif), s))) {
6077 if (pd->proto == IPPROTO_TCP) {
6078 pf_normalize_tcp_cleanup(s);
6079 }
6080 REASON_SET(&reason, PFRES_STATEINS);
6081 pf_src_tree_remove_state(s);
6082 STATE_DEC_COUNTERS(s);
6083 #if SKYWALK
6084 netns_release(&nstoken);
6085 #endif
6086 pool_put(&pf_state_pl, s);
6087 return PF_DROP;
6088 } else {
6089 #if SKYWALK
6090 s->nstoken = nstoken;
6091 nstoken = NULL;
6092 #endif
6093 *sm = s;
6094 }
6095 if (tag > 0) {
6096 pf_tag_ref(tag);
6097 s->tag = tag;
6098 }
6099 if (pd->proto == IPPROTO_TCP &&
6100 (th->th_flags & (TH_SYN | TH_ACK)) == TH_SYN &&
6101 r->keep_state == PF_STATE_SYNPROXY) {
6102 int ua = (sk->af_lan == sk->af_gwy) ? 1 : 0;
6103 s->src.state = PF_TCPS_PROXY_SRC;
6104 if (nr != NULL) {
6105 if (direction == PF_OUT) {
6106 pf_change_ap(direction, pd->mp, saddr,
6107 &th->th_sport, pd->ip_sum,
6108 &th->th_sum, &pd->baddr,
6109 bxport.port, 0, af, pd->af, ua);
6110 sxport.port = th->th_sport;
6111 } else {
6112 pf_change_ap(direction, pd->mp, daddr,
6113 &th->th_dport, pd->ip_sum,
6114 &th->th_sum, &pd->baddr,
6115 bxport.port, 0, af, pd->af, ua);
6116 sxport.port = th->th_dport;
6117 }
6118 }
6119 s->src.seqhi = htonl(random());
6120 /* Find mss option */
6121 mss = pf_get_mss(pbuf, off, th->th_off, af);
6122 mss = pf_calc_mss(saddr, af, mss);
6123 mss = pf_calc_mss(daddr, af, mss);
6124 s->src.mss = mss;
6125 pf_send_tcp(r, af, daddr, saddr, th->th_dport,
6126 th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
6127 TH_SYN | TH_ACK, 0, s->src.mss, 0, 1, 0, NULL, NULL);
6128 REASON_SET(&reason, PFRES_SYNPROXY);
6129 return PF_SYNPROXY_DROP;
6130 }
6131
6132 if (sk->app_state && sk->app_state->handler) {
6133 int offx = off;
6134
6135 switch (pd->proto) {
6136 case IPPROTO_TCP:
6137 offx += th->th_off << 2;
6138 break;
6139 case IPPROTO_UDP:
6140 offx += pf_pd_get_hdr_udp(pd)->uh_ulen << 2;
6141 break;
6142 default:
6143 /* ALG handlers only apply to TCP and UDP rules */
6144 break;
6145 }
6146
6147 if (offx > off) {
6148 sk->app_state->handler(s, direction, offx,
6149 pd, kif);
6150 if (pd->lmw < 0) {
6151 REASON_SET(&reason, PFRES_MEMORY);
6152 return PF_DROP;
6153 }
6154 pbuf = pd->mp; // XXXSCW: Why?
6155 }
6156 }
6157 }
6158 #if SKYWALK
6159 else {
6160 netns_release(&nstoken);
6161 }
6162 #endif
6163
6164 /* copy back packet headers if we performed NAT operations */
6165 if (rewrite) {
6166 if (rewrite < off + pd->hdrlen) {
6167 rewrite = off + pd->hdrlen;
6168 }
6169
6170 if (pf_lazy_makewritable(pd, pd->mp, rewrite) == NULL) {
6171 REASON_SET(&reason, PFRES_MEMORY);
6172 return PF_DROP;
6173 }
6174
6175 pbuf_copy_back(pbuf, off, hdrlen, pf_pd_get_hdr_ptr_any(pd), pd->hdrlen);
6176 if (af == AF_INET6 && pd->naf == AF_INET) {
6177 return pf_nat64_ipv6(pbuf, off, pd);
6178 } else if (af == AF_INET && pd->naf == AF_INET6) {
6179 return pf_nat64_ipv4(pbuf, off, pd);
6180 }
6181 }
6182
6183 return PF_PASS;
6184 }
6185
6186 boolean_t is_nlc_enabled_glb = FALSE;
6187
6188 static inline boolean_t
pf_is_dummynet_enabled(void)6189 pf_is_dummynet_enabled(void)
6190 {
6191 #if DUMMYNET
6192 if (__probable(!PF_IS_ENABLED)) {
6193 return FALSE;
6194 }
6195
6196 if (__probable(!DUMMYNET_LOADED)) {
6197 return FALSE;
6198 }
6199
6200 if (__probable(TAILQ_EMPTY(pf_main_ruleset.
6201 rules[PF_RULESET_DUMMYNET].active.ptr))) {
6202 return FALSE;
6203 }
6204
6205 return TRUE;
6206 #else
6207 return FALSE;
6208 #endif /* DUMMYNET */
6209 }
6210
6211 #if DUMMYNET
6212 /*
6213 * When pf_test_dummynet() returns PF_PASS, the rule matching parameter "rm"
6214 * remains unchanged, meaning the packet did not match a dummynet rule.
6215 * when the packet does match a dummynet rule, pf_test_dummynet() returns
6216 * PF_PASS and zero out the mbuf rule as the packet is effectively siphoned
6217 * out by dummynet.
6218 */
6219 static __attribute__((noinline)) int
pf_test_dummynet(struct pf_rule ** rm,int direction,struct pfi_kif * kif,pbuf_t ** pbuf0,struct pf_pdesc * pd,struct ip_fw_args * fwa)6220 pf_test_dummynet(struct pf_rule **rm, int direction, struct pfi_kif *kif,
6221 pbuf_t **pbuf0, struct pf_pdesc *pd, struct ip_fw_args *fwa)
6222 {
6223 pbuf_t *__single pbuf = *pbuf0;
6224 struct pf_rule *__single am = NULL;
6225 struct pf_ruleset *__single rsm = NULL;
6226 struct pf_addr *__single saddr = pd->src, *__single daddr = pd->dst;
6227 sa_family_t af = pd->af;
6228 struct pf_rule *__single r, *__single a = NULL;
6229 struct pf_ruleset *__single ruleset = NULL;
6230 struct tcphdr *__single th = pf_pd_get_hdr_tcp(pd);
6231 u_short reason;
6232 int hdrlen = 0;
6233 int tag = -1;
6234 unsigned int rtableid = IFSCOPE_NONE;
6235 int asd = 0;
6236 int match = 0;
6237 u_int8_t icmptype = 0, icmpcode = 0;
6238 struct ip_fw_args dnflow;
6239 struct pf_rule *__single prev_matching_rule = fwa ? fwa->fwa_pf_rule : NULL;
6240 int found_prev_rule = (prev_matching_rule) ? 0 : 1;
6241
6242 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
6243
6244 if (!pf_is_dummynet_enabled()) {
6245 return PF_PASS;
6246 }
6247
6248 if (kif->pfik_ifp->if_xflags & IFXF_NO_TRAFFIC_SHAPING) {
6249 return PF_PASS;
6250 }
6251
6252 bzero(&dnflow, sizeof(dnflow));
6253
6254 hdrlen = 0;
6255
6256 /* Fragments don't gave protocol headers */
6257 if (!(pd->flags & PFDESC_IP_FRAG)) {
6258 switch (pd->proto) {
6259 case IPPROTO_TCP:
6260 dnflow.fwa_id.flags = pf_pd_get_hdr_tcp(pd)->th_flags;
6261 dnflow.fwa_id.dst_port = ntohs(pf_pd_get_hdr_tcp(pd)->th_dport);
6262 dnflow.fwa_id.src_port = ntohs(pf_pd_get_hdr_tcp(pd)->th_sport);
6263 hdrlen = sizeof(*th);
6264 break;
6265 case IPPROTO_UDP:
6266 dnflow.fwa_id.dst_port = ntohs(pf_pd_get_hdr_udp(pd)->uh_dport);
6267 dnflow.fwa_id.src_port = ntohs(pf_pd_get_hdr_udp(pd)->uh_sport);
6268 hdrlen = sizeof(*pf_pd_get_hdr_udp(pd));
6269 break;
6270 #if INET
6271 case IPPROTO_ICMP:
6272 if (af != AF_INET) {
6273 break;
6274 }
6275 hdrlen = ICMP_MINLEN;
6276 icmptype = pf_pd_get_hdr_icmp(pd)->icmp_type;
6277 icmpcode = pf_pd_get_hdr_icmp(pd)->icmp_code;
6278 break;
6279 #endif /* INET */
6280 case IPPROTO_ICMPV6:
6281 if (af != AF_INET6) {
6282 break;
6283 }
6284 hdrlen = sizeof(*pf_pd_get_hdr_icmp6(pd));
6285 icmptype = pf_pd_get_hdr_icmp6(pd)->icmp6_type;
6286 icmpcode = pf_pd_get_hdr_icmp6(pd)->icmp6_code;
6287 break;
6288 case IPPROTO_GRE:
6289 if (pd->proto_variant == PF_GRE_PPTP_VARIANT) {
6290 hdrlen = sizeof(*pf_pd_get_hdr_grev1(pd));
6291 }
6292 break;
6293 case IPPROTO_ESP:
6294 hdrlen = sizeof(*pf_pd_get_hdr_esp(pd));
6295 break;
6296 }
6297 }
6298
6299 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_DUMMYNET].active.ptr);
6300
6301 while (r != NULL) {
6302 r->evaluations++;
6303 if (pfi_kif_match(r->kif, kif) == r->ifnot) {
6304 r = r->skip[PF_SKIP_IFP].ptr;
6305 } else if (r->direction && r->direction != direction) {
6306 r = r->skip[PF_SKIP_DIR].ptr;
6307 } else if (r->af && r->af != af) {
6308 r = r->skip[PF_SKIP_AF].ptr;
6309 } else if (r->proto && r->proto != pd->proto) {
6310 r = r->skip[PF_SKIP_PROTO].ptr;
6311 } else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
6312 r->src.neg, kif)) {
6313 r = r->skip[PF_SKIP_SRC_ADDR].ptr;
6314 }
6315 /* tcp/udp only. port_op always 0 in other cases */
6316 else if (r->proto == pd->proto &&
6317 (r->proto == IPPROTO_TCP || r->proto == IPPROTO_UDP) &&
6318 ((pd->flags & PFDESC_IP_FRAG) ||
6319 ((r->src.xport.range.op &&
6320 !pf_match_port(r->src.xport.range.op,
6321 r->src.xport.range.port[0], r->src.xport.range.port[1],
6322 th->th_sport))))) {
6323 r = r->skip[PF_SKIP_SRC_PORT].ptr;
6324 } else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
6325 r->dst.neg, NULL)) {
6326 r = r->skip[PF_SKIP_DST_ADDR].ptr;
6327 }
6328 /* tcp/udp only. port_op always 0 in other cases */
6329 else if (r->proto == pd->proto &&
6330 (r->proto == IPPROTO_TCP || r->proto == IPPROTO_UDP) &&
6331 r->dst.xport.range.op &&
6332 ((pd->flags & PFDESC_IP_FRAG) ||
6333 !pf_match_port(r->dst.xport.range.op,
6334 r->dst.xport.range.port[0], r->dst.xport.range.port[1],
6335 th->th_dport))) {
6336 r = r->skip[PF_SKIP_DST_PORT].ptr;
6337 }
6338 /* icmp only. type always 0 in other cases */
6339 else if (r->type &&
6340 ((pd->flags & PFDESC_IP_FRAG) ||
6341 r->type != icmptype + 1)) {
6342 r = TAILQ_NEXT(r, entries);
6343 }
6344 /* icmp only. type always 0 in other cases */
6345 else if (r->code &&
6346 ((pd->flags & PFDESC_IP_FRAG) ||
6347 r->code != icmpcode + 1)) {
6348 r = TAILQ_NEXT(r, entries);
6349 } else if (r->tos && !(r->tos == pd->tos)) {
6350 r = TAILQ_NEXT(r, entries);
6351 } else if (r->rule_flag & PFRULE_FRAGMENT) {
6352 r = TAILQ_NEXT(r, entries);
6353 } else if (pd->proto == IPPROTO_TCP &&
6354 ((pd->flags & PFDESC_IP_FRAG) ||
6355 (r->flagset & th->th_flags) != r->flags)) {
6356 r = TAILQ_NEXT(r, entries);
6357 } else if (r->prob && r->prob <= (RandomULong() % (UINT_MAX - 1) + 1)) {
6358 r = TAILQ_NEXT(r, entries);
6359 } else if (r->match_tag && !pf_match_tag(r, pd->pf_mtag, &tag)) {
6360 r = TAILQ_NEXT(r, entries);
6361 } else {
6362 /*
6363 * Need to go past the previous dummynet matching rule
6364 */
6365 if (r->anchor == NULL) {
6366 if (found_prev_rule) {
6367 if (r->tag) {
6368 tag = r->tag;
6369 }
6370 if (PF_RTABLEID_IS_VALID(r->rtableid)) {
6371 rtableid = r->rtableid;
6372 }
6373 match = 1;
6374 *rm = r;
6375 am = a;
6376 rsm = ruleset;
6377 if ((*rm)->quick) {
6378 break;
6379 }
6380 } else if (r == prev_matching_rule) {
6381 found_prev_rule = 1;
6382 }
6383 r = TAILQ_NEXT(r, entries);
6384 } else {
6385 pf_step_into_anchor(&asd, &ruleset,
6386 PF_RULESET_DUMMYNET, &r, &a, &match);
6387 }
6388 }
6389 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
6390 PF_RULESET_DUMMYNET, &r, &a, &match)) {
6391 break;
6392 }
6393 }
6394 r = *rm;
6395 a = am;
6396 ruleset = rsm;
6397
6398 if (!match) {
6399 return PF_PASS;
6400 }
6401
6402 REASON_SET(&reason, PFRES_DUMMYNET);
6403
6404 if (r->log) {
6405 PFLOG_PACKET(kif, h, pbuf, af, direction, reason, r,
6406 a, ruleset, pd);
6407 }
6408
6409 if (r->action == PF_NODUMMYNET) {
6410 int dirndx = (direction == PF_OUT);
6411
6412 r->packets[dirndx]++;
6413 r->bytes[dirndx] += pd->tot_len;
6414
6415 return PF_PASS;
6416 }
6417 if (pf_tag_packet(pbuf, pd->pf_mtag, tag, rtableid, pd)) {
6418 REASON_SET(&reason, PFRES_MEMORY);
6419
6420 return PF_DROP;
6421 }
6422
6423 if (r->dnpipe && ip_dn_io_ptr != NULL) {
6424 struct mbuf *m;
6425 int dirndx = (direction == PF_OUT);
6426
6427 r->packets[dirndx]++;
6428 r->bytes[dirndx] += pd->tot_len;
6429
6430 dnflow.fwa_cookie = r->dnpipe;
6431 dnflow.fwa_pf_rule = r;
6432 dnflow.fwa_id.proto = pd->proto;
6433 dnflow.fwa_flags = r->dntype;
6434 switch (af) {
6435 case AF_INET:
6436 dnflow.fwa_id.addr_type = 4;
6437 dnflow.fwa_id.src_ip = ntohl(saddr->v4addr.s_addr);
6438 dnflow.fwa_id.dst_ip = ntohl(daddr->v4addr.s_addr);
6439 break;
6440 case AF_INET6:
6441 dnflow.fwa_id.addr_type = 6;
6442 dnflow.fwa_id.src_ip6 = saddr->v6addr;
6443 dnflow.fwa_id.dst_ip6 = saddr->v6addr;
6444 break;
6445 }
6446
6447 if (fwa != NULL) {
6448 dnflow.fwa_oif = fwa->fwa_oif;
6449 dnflow.fwa_oflags = fwa->fwa_oflags;
6450 /*
6451 * Note that fwa_ro, fwa_dst and fwa_ipoa are
6452 * actually in a union so the following does work
6453 * for both IPv4 and IPv6
6454 */
6455 dnflow.fwa_ro = fwa->fwa_ro;
6456 dnflow.fwa_dst = fwa->fwa_dst;
6457 dnflow.fwa_ipoa = fwa->fwa_ipoa;
6458 dnflow.fwa_ro6_pmtu = fwa->fwa_ro6_pmtu;
6459 dnflow.fwa_origifp = fwa->fwa_origifp;
6460 dnflow.fwa_mtu = fwa->fwa_mtu;
6461 dnflow.fwa_unfragpartlen = fwa->fwa_unfragpartlen;
6462 dnflow.fwa_exthdrs = fwa->fwa_exthdrs;
6463 }
6464
6465 if (af == AF_INET) {
6466 struct ip *__single iphdr = pbuf->pb_data;
6467 NTOHS(iphdr->ip_len);
6468 NTOHS(iphdr->ip_off);
6469 }
6470 /*
6471 * Don't need to unlock pf_lock as NET_THREAD_HELD_PF
6472 * allows for recursive behavior
6473 */
6474 m = pbuf_to_mbuf(pbuf, TRUE);
6475 if (m != NULL) {
6476 ip_dn_io_ptr(m,
6477 dnflow.fwa_cookie, (af == AF_INET) ?
6478 ((direction == PF_IN) ? DN_TO_IP_IN : DN_TO_IP_OUT) :
6479 ((direction == PF_IN) ? DN_TO_IP6_IN : DN_TO_IP6_OUT),
6480 &dnflow);
6481 }
6482
6483 /*
6484 * The packet is siphoned out by dummynet so return a NULL
6485 * pbuf so the caller can still return success.
6486 */
6487 *pbuf0 = NULL;
6488
6489 return PF_PASS;
6490 }
6491
6492 return PF_PASS;
6493 }
6494 #endif /* DUMMYNET */
6495
6496 static __attribute__((noinline)) int
pf_test_fragment(struct pf_rule ** rm,int direction,struct pfi_kif * kif,pbuf_t * pbuf,void * h,struct pf_pdesc * pd,struct pf_rule ** am,struct pf_ruleset ** rsm)6497 pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif,
6498 pbuf_t *pbuf, void *h, struct pf_pdesc *pd, struct pf_rule **am,
6499 struct pf_ruleset **rsm)
6500 {
6501 #pragma unused(h)
6502 struct pf_rule *__single r, *__single a = NULL;
6503 struct pf_ruleset *__single ruleset = NULL;
6504 sa_family_t af = pd->af;
6505 u_short reason;
6506 int tag = -1;
6507 int asd = 0;
6508 int match = 0;
6509
6510 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
6511 while (r != NULL) {
6512 r->evaluations++;
6513 if (pfi_kif_match(r->kif, kif) == r->ifnot) {
6514 r = r->skip[PF_SKIP_IFP].ptr;
6515 } else if (r->direction && r->direction != direction) {
6516 r = r->skip[PF_SKIP_DIR].ptr;
6517 } else if (r->af && r->af != af) {
6518 r = r->skip[PF_SKIP_AF].ptr;
6519 } else if (r->proto && r->proto != pd->proto) {
6520 r = r->skip[PF_SKIP_PROTO].ptr;
6521 } else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
6522 r->src.neg, kif)) {
6523 r = r->skip[PF_SKIP_SRC_ADDR].ptr;
6524 } else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
6525 r->dst.neg, NULL)) {
6526 r = r->skip[PF_SKIP_DST_ADDR].ptr;
6527 } else if ((r->rule_flag & PFRULE_TOS) && r->tos &&
6528 !(r->tos & pd->tos)) {
6529 r = TAILQ_NEXT(r, entries);
6530 } else if ((r->rule_flag & PFRULE_DSCP) && r->tos &&
6531 !(r->tos & (pd->tos & DSCP_MASK))) {
6532 r = TAILQ_NEXT(r, entries);
6533 } else if ((r->rule_flag & PFRULE_SC) && r->tos &&
6534 ((r->tos & SCIDX_MASK) != pd->sc)) {
6535 r = TAILQ_NEXT(r, entries);
6536 } else if (r->os_fingerprint != PF_OSFP_ANY) {
6537 r = TAILQ_NEXT(r, entries);
6538 } else if (pd->proto == IPPROTO_UDP &&
6539 (r->src.xport.range.op || r->dst.xport.range.op)) {
6540 r = TAILQ_NEXT(r, entries);
6541 } else if (pd->proto == IPPROTO_TCP &&
6542 (r->src.xport.range.op || r->dst.xport.range.op ||
6543 r->flagset)) {
6544 r = TAILQ_NEXT(r, entries);
6545 } else if ((pd->proto == IPPROTO_ICMP ||
6546 pd->proto == IPPROTO_ICMPV6) &&
6547 (r->type || r->code)) {
6548 r = TAILQ_NEXT(r, entries);
6549 } else if (r->prob && r->prob <= (RandomULong() % (UINT_MAX - 1) + 1)) {
6550 r = TAILQ_NEXT(r, entries);
6551 } else if (r->match_tag && !pf_match_tag(r, pd->pf_mtag, &tag)) {
6552 r = TAILQ_NEXT(r, entries);
6553 } else {
6554 if (r->anchor == NULL) {
6555 match = 1;
6556 *rm = r;
6557 *am = a;
6558 *rsm = ruleset;
6559 if ((*rm)->quick) {
6560 break;
6561 }
6562 r = TAILQ_NEXT(r, entries);
6563 } else {
6564 pf_step_into_anchor(&asd, &ruleset,
6565 PF_RULESET_FILTER, &r, &a, &match);
6566 }
6567 }
6568 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset,
6569 PF_RULESET_FILTER, &r, &a, &match)) {
6570 break;
6571 }
6572 }
6573 r = *rm;
6574 a = *am;
6575 ruleset = *rsm;
6576
6577 REASON_SET(&reason, PFRES_MATCH);
6578
6579 if (r->log) {
6580 PFLOG_PACKET(kif, h, pbuf, af, direction, reason, r, a, ruleset,
6581 pd);
6582 }
6583
6584 if (r->action != PF_PASS) {
6585 return PF_DROP;
6586 }
6587
6588 if (pf_tag_packet(pbuf, pd->pf_mtag, tag, -1, NULL)) {
6589 REASON_SET(&reason, PFRES_MEMORY);
6590 return PF_DROP;
6591 }
6592
6593 return PF_PASS;
6594 }
6595
6596 static __attribute__((noinline)) void
pf_pptp_handler(struct pf_state * s,int direction,int off,struct pf_pdesc * pd,struct pfi_kif * kif)6597 pf_pptp_handler(struct pf_state *s, int direction, int off,
6598 struct pf_pdesc *pd, struct pfi_kif *kif)
6599 {
6600 #pragma unused(direction)
6601 struct tcphdr *__single th;
6602 struct pf_pptp_state *__single pptps;
6603 struct pf_pptp_ctrl_msg cm;
6604 size_t plen, tlen;
6605 struct pf_state *__single gs;
6606 u_int16_t ct;
6607 u_int16_t *__single pac_call_id;
6608 u_int16_t *__single pns_call_id;
6609 u_int16_t *__single spoof_call_id;
6610 u_int8_t *__single pac_state;
6611 u_int8_t *__single pns_state;
6612 enum { PF_PPTP_PASS, PF_PPTP_INSERT_GRE, PF_PPTP_REMOVE_GRE } op;
6613 pbuf_t *__single pbuf;
6614 struct pf_state_key *__single sk;
6615 struct pf_state_key *__single gsk;
6616 struct pf_app_state *__single gas;
6617
6618 sk = s->state_key;
6619 pptps = &sk->app_state->u.pptp;
6620 gs = pptps->grev1_state;
6621
6622 if (gs) {
6623 gs->expire = pf_time_second();
6624 }
6625
6626 pbuf = pd->mp;
6627 plen = min(sizeof(cm), pbuf->pb_packet_len - off);
6628 if (plen < PF_PPTP_CTRL_MSG_MINSIZE) {
6629 return;
6630 }
6631 tlen = plen - PF_PPTP_CTRL_MSG_MINSIZE;
6632 pbuf_copy_data(pbuf, off, plen, &cm, sizeof(cm));
6633
6634 if (ntohl(cm.hdr.magic) != PF_PPTP_MAGIC_NUMBER) {
6635 return;
6636 }
6637 if (ntohs(cm.hdr.type) != 1) {
6638 return;
6639 }
6640
6641 #define TYPE_LEN_CHECK(_type, _name) \
6642 case PF_PPTP_CTRL_TYPE_##_type: \
6643 if (tlen < sizeof(struct pf_pptp_ctrl_##_name)) \
6644 return; \
6645 break;
6646
6647 switch (cm.ctrl.type) {
6648 TYPE_LEN_CHECK(START_REQ, start_req);
6649 TYPE_LEN_CHECK(START_RPY, start_rpy);
6650 TYPE_LEN_CHECK(STOP_REQ, stop_req);
6651 TYPE_LEN_CHECK(STOP_RPY, stop_rpy);
6652 TYPE_LEN_CHECK(ECHO_REQ, echo_req);
6653 TYPE_LEN_CHECK(ECHO_RPY, echo_rpy);
6654 TYPE_LEN_CHECK(CALL_OUT_REQ, call_out_req);
6655 TYPE_LEN_CHECK(CALL_OUT_RPY, call_out_rpy);
6656 TYPE_LEN_CHECK(CALL_IN_1ST, call_in_1st);
6657 TYPE_LEN_CHECK(CALL_IN_2ND, call_in_2nd);
6658 TYPE_LEN_CHECK(CALL_IN_3RD, call_in_3rd);
6659 TYPE_LEN_CHECK(CALL_CLR, call_clr);
6660 TYPE_LEN_CHECK(CALL_DISC, call_disc);
6661 TYPE_LEN_CHECK(ERROR, error);
6662 TYPE_LEN_CHECK(SET_LINKINFO, set_linkinfo);
6663 default:
6664 return;
6665 }
6666 #undef TYPE_LEN_CHECK
6667
6668 if (!gs) {
6669 gs = pool_get(&pf_state_pl, PR_WAITOK);
6670 if (!gs) {
6671 return;
6672 }
6673
6674 memcpy(gs, s, sizeof(*gs));
6675
6676 memset(&gs->entry_id, 0, sizeof(gs->entry_id));
6677 memset(&gs->entry_list, 0, sizeof(gs->entry_list));
6678
6679 TAILQ_INIT(&gs->unlink_hooks);
6680 gs->rt_kif = NULL;
6681 gs->creation = 0;
6682 gs->pfsync_time = 0;
6683 gs->packets[0] = gs->packets[1] = 0;
6684 gs->bytes[0] = gs->bytes[1] = 0;
6685 gs->timeout = PFTM_UNLINKED;
6686 gs->id = gs->creatorid = 0;
6687 gs->src.state = gs->dst.state = PFGRE1S_NO_TRAFFIC;
6688 gs->src.scrub = gs->dst.scrub = 0;
6689
6690 gas = pool_get(&pf_app_state_pl, PR_NOWAIT);
6691 if (!gas) {
6692 pool_put(&pf_state_pl, gs);
6693 return;
6694 }
6695
6696 gsk = pf_alloc_state_key(gs, NULL);
6697 if (!gsk) {
6698 pool_put(&pf_app_state_pl, gas);
6699 pool_put(&pf_state_pl, gs);
6700 return;
6701 }
6702
6703 memcpy(&gsk->lan, &sk->lan, sizeof(gsk->lan));
6704 memcpy(&gsk->gwy, &sk->gwy, sizeof(gsk->gwy));
6705 memcpy(&gsk->ext_lan, &sk->ext_lan, sizeof(gsk->ext_lan));
6706 memcpy(&gsk->ext_gwy, &sk->ext_gwy, sizeof(gsk->ext_gwy));
6707 gsk->af_lan = sk->af_lan;
6708 gsk->af_gwy = sk->af_gwy;
6709 gsk->proto = IPPROTO_GRE;
6710 gsk->proto_variant = PF_GRE_PPTP_VARIANT;
6711 gsk->app_state = gas;
6712 gsk->lan.xport.call_id = 0;
6713 gsk->gwy.xport.call_id = 0;
6714 gsk->ext_lan.xport.call_id = 0;
6715 gsk->ext_gwy.xport.call_id = 0;
6716 ASSERT(gsk->flowsrc == FLOWSRC_PF);
6717 ASSERT(gsk->flowhash != 0);
6718 memset(gas, 0, sizeof(*gas));
6719 gas->u.grev1.pptp_state = s;
6720 STATE_INC_COUNTERS(gs);
6721 pptps->grev1_state = gs;
6722 (void) hook_establish(&gs->unlink_hooks, 0,
6723 (hook_fn_t) pf_grev1_unlink, gs);
6724 } else {
6725 gsk = gs->state_key;
6726 }
6727
6728 switch (sk->direction) {
6729 case PF_IN:
6730 pns_call_id = &gsk->ext_lan.xport.call_id;
6731 pns_state = &gs->dst.state;
6732 pac_call_id = &gsk->lan.xport.call_id;
6733 pac_state = &gs->src.state;
6734 break;
6735
6736 case PF_OUT:
6737 pns_call_id = &gsk->lan.xport.call_id;
6738 pns_state = &gs->src.state;
6739 pac_call_id = &gsk->ext_lan.xport.call_id;
6740 pac_state = &gs->dst.state;
6741 break;
6742
6743 default:
6744 DPFPRINTF(PF_DEBUG_URGENT,
6745 ("pf_pptp_handler: bad directional!\n"));
6746 return;
6747 }
6748
6749 spoof_call_id = 0;
6750 op = PF_PPTP_PASS;
6751
6752 ct = ntohs(cm.ctrl.type);
6753
6754 switch (ct) {
6755 case PF_PPTP_CTRL_TYPE_CALL_OUT_REQ:
6756 *pns_call_id = cm.msg.call_out_req.call_id;
6757 *pns_state = PFGRE1S_INITIATING;
6758 if (s->nat_rule.ptr && pns_call_id == &gsk->lan.xport.call_id) {
6759 spoof_call_id = &cm.msg.call_out_req.call_id;
6760 }
6761 break;
6762
6763 case PF_PPTP_CTRL_TYPE_CALL_OUT_RPY:
6764 *pac_call_id = cm.msg.call_out_rpy.call_id;
6765 if (s->nat_rule.ptr) {
6766 spoof_call_id =
6767 (pac_call_id == &gsk->lan.xport.call_id) ?
6768 &cm.msg.call_out_rpy.call_id :
6769 &cm.msg.call_out_rpy.peer_call_id;
6770 }
6771 if (gs->timeout == PFTM_UNLINKED) {
6772 *pac_state = PFGRE1S_INITIATING;
6773 op = PF_PPTP_INSERT_GRE;
6774 }
6775 break;
6776
6777 case PF_PPTP_CTRL_TYPE_CALL_IN_1ST:
6778 *pns_call_id = cm.msg.call_in_1st.call_id;
6779 *pns_state = PFGRE1S_INITIATING;
6780 if (s->nat_rule.ptr && pns_call_id == &gsk->lan.xport.call_id) {
6781 spoof_call_id = &cm.msg.call_in_1st.call_id;
6782 }
6783 break;
6784
6785 case PF_PPTP_CTRL_TYPE_CALL_IN_2ND:
6786 *pac_call_id = cm.msg.call_in_2nd.call_id;
6787 *pac_state = PFGRE1S_INITIATING;
6788 if (s->nat_rule.ptr) {
6789 spoof_call_id =
6790 (pac_call_id == &gsk->lan.xport.call_id) ?
6791 &cm.msg.call_in_2nd.call_id :
6792 &cm.msg.call_in_2nd.peer_call_id;
6793 }
6794 break;
6795
6796 case PF_PPTP_CTRL_TYPE_CALL_IN_3RD:
6797 if (s->nat_rule.ptr && pns_call_id == &gsk->lan.xport.call_id) {
6798 spoof_call_id = &cm.msg.call_in_3rd.call_id;
6799 }
6800 if (cm.msg.call_in_3rd.call_id != *pns_call_id) {
6801 break;
6802 }
6803 if (gs->timeout == PFTM_UNLINKED) {
6804 op = PF_PPTP_INSERT_GRE;
6805 }
6806 break;
6807
6808 case PF_PPTP_CTRL_TYPE_CALL_CLR:
6809 if (cm.msg.call_clr.call_id != *pns_call_id) {
6810 op = PF_PPTP_REMOVE_GRE;
6811 }
6812 break;
6813
6814 case PF_PPTP_CTRL_TYPE_CALL_DISC:
6815 if (cm.msg.call_clr.call_id != *pac_call_id) {
6816 op = PF_PPTP_REMOVE_GRE;
6817 }
6818 break;
6819
6820 case PF_PPTP_CTRL_TYPE_ERROR:
6821 if (s->nat_rule.ptr && pns_call_id == &gsk->lan.xport.call_id) {
6822 spoof_call_id = &cm.msg.error.peer_call_id;
6823 }
6824 break;
6825
6826 case PF_PPTP_CTRL_TYPE_SET_LINKINFO:
6827 if (s->nat_rule.ptr && pac_call_id == &gsk->lan.xport.call_id) {
6828 spoof_call_id = &cm.msg.set_linkinfo.peer_call_id;
6829 }
6830 break;
6831
6832 default:
6833 op = PF_PPTP_PASS;
6834 break;
6835 }
6836
6837 if (!gsk->gwy.xport.call_id && gsk->lan.xport.call_id) {
6838 gsk->gwy.xport.call_id = gsk->lan.xport.call_id;
6839 if (spoof_call_id) {
6840 u_int16_t call_id = 0;
6841 int n = 0;
6842 struct pf_state_key_cmp key;
6843
6844 key.af_gwy = gsk->af_gwy;
6845 key.proto = IPPROTO_GRE;
6846 key.proto_variant = PF_GRE_PPTP_VARIANT;
6847 PF_ACPY(&key.gwy.addr, &gsk->gwy.addr, key.af_gwy);
6848 PF_ACPY(&key.ext_gwy.addr, &gsk->ext_gwy.addr, key.af_gwy);
6849 key.gwy.xport.call_id = gsk->gwy.xport.call_id;
6850 key.ext_gwy.xport.call_id = gsk->ext_gwy.xport.call_id;
6851 do {
6852 call_id = htonl(random());
6853 } while (!call_id);
6854
6855 while (pf_find_state_all(&key, PF_IN, 0)) {
6856 call_id = ntohs(call_id);
6857 --call_id;
6858 if (--call_id == 0) {
6859 call_id = 0xffff;
6860 }
6861 call_id = htons(call_id);
6862
6863 key.gwy.xport.call_id = call_id;
6864
6865 if (++n > 65535) {
6866 DPFPRINTF(PF_DEBUG_URGENT,
6867 ("pf_pptp_handler: failed to spoof "
6868 "call id\n"));
6869 key.gwy.xport.call_id = 0;
6870 break;
6871 }
6872 }
6873
6874 gsk->gwy.xport.call_id = call_id;
6875 }
6876 }
6877
6878 th = pf_pd_get_hdr_tcp(pd);
6879
6880 if (spoof_call_id && gsk->lan.xport.call_id != gsk->gwy.xport.call_id) {
6881 if (*spoof_call_id == gsk->gwy.xport.call_id) {
6882 *spoof_call_id = gsk->lan.xport.call_id;
6883 th->th_sum = pf_cksum_fixup(th->th_sum,
6884 gsk->gwy.xport.call_id, gsk->lan.xport.call_id, 0);
6885 } else {
6886 *spoof_call_id = gsk->gwy.xport.call_id;
6887 th->th_sum = pf_cksum_fixup(th->th_sum,
6888 gsk->lan.xport.call_id, gsk->gwy.xport.call_id, 0);
6889 }
6890
6891 if (pf_lazy_makewritable(pd, pbuf, off + plen) == NULL) {
6892 pptps->grev1_state = NULL;
6893 STATE_DEC_COUNTERS(gs);
6894 pool_put(&pf_state_pl, gs);
6895 return;
6896 }
6897 pbuf_copy_back(pbuf, off, plen, &cm, sizeof(cm));
6898 }
6899
6900 switch (op) {
6901 case PF_PPTP_REMOVE_GRE:
6902 gs->timeout = PFTM_PURGE;
6903 gs->src.state = gs->dst.state = PFGRE1S_NO_TRAFFIC;
6904 gsk->lan.xport.call_id = 0;
6905 gsk->gwy.xport.call_id = 0;
6906 gsk->ext_lan.xport.call_id = 0;
6907 gsk->ext_gwy.xport.call_id = 0;
6908 gs->id = gs->creatorid = 0;
6909 break;
6910
6911 case PF_PPTP_INSERT_GRE:
6912 gs->creation = pf_time_second();
6913 gs->expire = pf_time_second();
6914 gs->timeout = PFTM_TCP_ESTABLISHED;
6915 if (gs->src_node != NULL) {
6916 ++gs->src_node->states;
6917 VERIFY(gs->src_node->states != 0);
6918 }
6919 if (gs->nat_src_node != NULL) {
6920 ++gs->nat_src_node->states;
6921 VERIFY(gs->nat_src_node->states != 0);
6922 }
6923 pf_set_rt_ifp(gs, &sk->lan.addr, sk->af_lan);
6924 if (pf_insert_state(BOUND_IFACE(s->rule.ptr, kif), gs)) {
6925 /*
6926 * <[email protected]>
6927 * FIX ME: insertion can fail when multiple PNS
6928 * behind the same NAT open calls to the same PAC
6929 * simultaneously because spoofed call ID numbers
6930 * are chosen before states are inserted. This is
6931 * hard to fix and happens infrequently enough that
6932 * users will normally try again and this ALG will
6933 * succeed. Failures are expected to be rare enough
6934 * that fixing this is a low priority.
6935 */
6936 pptps->grev1_state = NULL;
6937 pd->lmw = -1; /* Force PF_DROP on PFRES_MEMORY */
6938 pf_src_tree_remove_state(gs);
6939 STATE_DEC_COUNTERS(gs);
6940 pool_put(&pf_state_pl, gs);
6941 DPFPRINTF(PF_DEBUG_URGENT, ("pf_pptp_handler: error "
6942 "inserting GREv1 state.\n"));
6943 }
6944 break;
6945
6946 default:
6947 break;
6948 }
6949 }
6950
6951 static __attribute__((noinline)) void
pf_pptp_unlink(struct pf_state * s)6952 pf_pptp_unlink(struct pf_state *s)
6953 {
6954 struct pf_app_state *as = s->state_key->app_state;
6955 struct pf_state *grev1s = as->u.pptp.grev1_state;
6956
6957 if (grev1s) {
6958 struct pf_app_state *gas = grev1s->state_key->app_state;
6959
6960 if (grev1s->timeout < PFTM_MAX) {
6961 grev1s->timeout = PFTM_PURGE;
6962 }
6963 gas->u.grev1.pptp_state = NULL;
6964 as->u.pptp.grev1_state = NULL;
6965 }
6966 }
6967
6968 static __attribute__((noinline)) void
pf_grev1_unlink(struct pf_state * s)6969 pf_grev1_unlink(struct pf_state *s)
6970 {
6971 struct pf_app_state *as = s->state_key->app_state;
6972 struct pf_state *pptps = as->u.grev1.pptp_state;
6973
6974 if (pptps) {
6975 struct pf_app_state *pas = pptps->state_key->app_state;
6976
6977 pas->u.pptp.grev1_state = NULL;
6978 as->u.grev1.pptp_state = NULL;
6979 }
6980 }
6981
6982 static int
pf_ike_compare(struct pf_app_state * a,struct pf_app_state * b)6983 pf_ike_compare(struct pf_app_state *a, struct pf_app_state *b)
6984 {
6985 int64_t d = a->u.ike.cookie - b->u.ike.cookie;
6986 return (d > 0) ? 1 : ((d < 0) ? -1 : 0);
6987 }
6988
6989 static int
pf_do_nat64(struct pf_state_key * sk,struct pf_pdesc * pd,pbuf_t * pbuf,int off)6990 pf_do_nat64(struct pf_state_key *sk, struct pf_pdesc *pd, pbuf_t *pbuf,
6991 int off)
6992 {
6993 if (pd->af == AF_INET) {
6994 if (pd->af != sk->af_lan) {
6995 pd->ndaddr = sk->lan.addr;
6996 pd->naddr = sk->ext_lan.addr;
6997 } else {
6998 pd->naddr = sk->gwy.addr;
6999 pd->ndaddr = sk->ext_gwy.addr;
7000 }
7001 return pf_nat64_ipv4(pbuf, off, pd);
7002 } else if (pd->af == AF_INET6) {
7003 if (pd->af != sk->af_lan) {
7004 pd->ndaddr = sk->lan.addr;
7005 pd->naddr = sk->ext_lan.addr;
7006 } else {
7007 pd->naddr = sk->gwy.addr;
7008 pd->ndaddr = sk->ext_gwy.addr;
7009 }
7010 return pf_nat64_ipv6(pbuf, off, pd);
7011 }
7012 return PF_DROP;
7013 }
7014
7015 static __attribute__((noinline)) int
pf_test_state_tcp(struct pf_state ** state,int direction,struct pfi_kif * kif,pbuf_t * pbuf,int off,void * h,struct pf_pdesc * pd,u_short * reason)7016 pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
7017 pbuf_t *pbuf, int off, void *h, struct pf_pdesc *pd,
7018 u_short *reason)
7019 {
7020 #pragma unused(h)
7021 struct pf_state_key_cmp key;
7022 struct tcphdr *__single th = pf_pd_get_hdr_tcp(pd);
7023 u_int16_t win = ntohs(th->th_win);
7024 u_int32_t ack, end, seq, orig_seq;
7025 u_int8_t sws, dws;
7026 int ackskew;
7027 int copyback = 0;
7028 struct pf_state_peer *src, *dst;
7029 struct pf_state_key *sk;
7030
7031 key.app_state = 0;
7032 key.proto = IPPROTO_TCP;
7033 key.af_lan = key.af_gwy = pd->af;
7034
7035 /*
7036 * For NAT64 the first time rule search and state creation
7037 * is done on the incoming side only.
7038 * Once the state gets created, NAT64's LAN side (ipv6) will
7039 * not be able to find the state in ext-gwy tree as that normally
7040 * is intended to be looked up for incoming traffic from the
7041 * WAN side.
7042 * Therefore to handle NAT64 case we init keys here for both
7043 * lan-ext as well as ext-gwy trees.
7044 * In the state lookup we attempt a lookup on both trees if
7045 * first one does not return any result and return a match if
7046 * the match state's was created by NAT64 rule.
7047 */
7048 PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy);
7049 PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy);
7050 key.ext_gwy.xport.port = th->th_sport;
7051 key.gwy.xport.port = th->th_dport;
7052
7053 PF_ACPY(&key.lan.addr, pd->src, key.af_lan);
7054 PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan);
7055 key.lan.xport.port = th->th_sport;
7056 key.ext_lan.xport.port = th->th_dport;
7057
7058 STATE_LOOKUP();
7059
7060 sk = (*state)->state_key;
7061 /*
7062 * In case of NAT64 the translation is first applied on the LAN
7063 * side. Therefore for stack's address family comparison
7064 * we use sk->af_lan.
7065 */
7066 if ((direction == sk->direction) && (pd->af == sk->af_lan)) {
7067 src = &(*state)->src;
7068 dst = &(*state)->dst;
7069 } else {
7070 src = &(*state)->dst;
7071 dst = &(*state)->src;
7072 }
7073
7074 if (src->state == PF_TCPS_PROXY_SRC) {
7075 if (direction != sk->direction) {
7076 REASON_SET(reason, PFRES_SYNPROXY);
7077 return PF_SYNPROXY_DROP;
7078 }
7079 if (th->th_flags & TH_SYN) {
7080 if (ntohl(th->th_seq) != src->seqlo) {
7081 REASON_SET(reason, PFRES_SYNPROXY);
7082 return PF_DROP;
7083 }
7084 pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
7085 pd->src, th->th_dport, th->th_sport,
7086 src->seqhi, ntohl(th->th_seq) + 1,
7087 TH_SYN | TH_ACK, 0, src->mss, 0, 1,
7088 0, NULL, NULL);
7089 REASON_SET(reason, PFRES_SYNPROXY);
7090 return PF_SYNPROXY_DROP;
7091 } else if (!(th->th_flags & TH_ACK) ||
7092 (ntohl(th->th_ack) != src->seqhi + 1) ||
7093 (ntohl(th->th_seq) != src->seqlo + 1)) {
7094 REASON_SET(reason, PFRES_SYNPROXY);
7095 return PF_DROP;
7096 } else if ((*state)->src_node != NULL &&
7097 pf_src_connlimit(state)) {
7098 REASON_SET(reason, PFRES_SRCLIMIT);
7099 return PF_DROP;
7100 } else {
7101 src->state = PF_TCPS_PROXY_DST;
7102 }
7103 }
7104 if (src->state == PF_TCPS_PROXY_DST) {
7105 struct pf_state_host *psrc, *pdst;
7106
7107 if (direction == PF_OUT) {
7108 psrc = &sk->gwy;
7109 pdst = &sk->ext_gwy;
7110 } else {
7111 psrc = &sk->ext_lan;
7112 pdst = &sk->lan;
7113 }
7114 if (direction == sk->direction) {
7115 if (((th->th_flags & (TH_SYN | TH_ACK)) != TH_ACK) ||
7116 (ntohl(th->th_ack) != src->seqhi + 1) ||
7117 (ntohl(th->th_seq) != src->seqlo + 1)) {
7118 REASON_SET(reason, PFRES_SYNPROXY);
7119 return PF_DROP;
7120 }
7121 src->max_win = MAX(ntohs(th->th_win), 1);
7122 if (dst->seqhi == 1) {
7123 dst->seqhi = htonl(random());
7124 }
7125 pf_send_tcp((*state)->rule.ptr, pd->af, &psrc->addr,
7126 &pdst->addr, psrc->xport.port, pdst->xport.port,
7127 dst->seqhi, 0, TH_SYN, 0,
7128 src->mss, 0, 0, (*state)->tag, NULL, NULL);
7129 REASON_SET(reason, PFRES_SYNPROXY);
7130 return PF_SYNPROXY_DROP;
7131 } else if (((th->th_flags & (TH_SYN | TH_ACK)) !=
7132 (TH_SYN | TH_ACK)) ||
7133 (ntohl(th->th_ack) != dst->seqhi + 1)) {
7134 REASON_SET(reason, PFRES_SYNPROXY);
7135 return PF_DROP;
7136 } else {
7137 dst->max_win = MAX(ntohs(th->th_win), 1);
7138 dst->seqlo = ntohl(th->th_seq);
7139 pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst,
7140 pd->src, th->th_dport, th->th_sport,
7141 ntohl(th->th_ack), ntohl(th->th_seq) + 1,
7142 TH_ACK, src->max_win, 0, 0, 0,
7143 (*state)->tag, NULL, NULL);
7144 pf_send_tcp((*state)->rule.ptr, pd->af, &psrc->addr,
7145 &pdst->addr, psrc->xport.port, pdst->xport.port,
7146 src->seqhi + 1, src->seqlo + 1,
7147 TH_ACK, dst->max_win, 0, 0, 1,
7148 0, NULL, NULL);
7149 src->seqdiff = dst->seqhi -
7150 src->seqlo;
7151 dst->seqdiff = src->seqhi -
7152 dst->seqlo;
7153 src->seqhi = src->seqlo +
7154 dst->max_win;
7155 dst->seqhi = dst->seqlo +
7156 src->max_win;
7157 src->wscale = dst->wscale = 0;
7158 src->state = dst->state =
7159 TCPS_ESTABLISHED;
7160 REASON_SET(reason, PFRES_SYNPROXY);
7161 return PF_SYNPROXY_DROP;
7162 }
7163 }
7164
7165 if (((th->th_flags & (TH_SYN | TH_ACK)) == TH_SYN) &&
7166 dst->state >= TCPS_FIN_WAIT_2 &&
7167 src->state >= TCPS_FIN_WAIT_2) {
7168 if (pf_status.debug >= PF_DEBUG_MISC) {
7169 printf("pf: state reuse ");
7170 pf_print_state(*state);
7171 pf_print_flags(th->th_flags);
7172 printf("\n");
7173 }
7174 /* XXX make sure it's the same direction ?? */
7175 src->state = dst->state = TCPS_CLOSED;
7176 pf_unlink_state(*state);
7177 *state = NULL;
7178 return PF_DROP;
7179 }
7180
7181 if ((th->th_flags & TH_SYN) == 0) {
7182 sws = (src->wscale & PF_WSCALE_FLAG) ?
7183 (src->wscale & PF_WSCALE_MASK) : TCP_MAX_WINSHIFT;
7184 dws = (dst->wscale & PF_WSCALE_FLAG) ?
7185 (dst->wscale & PF_WSCALE_MASK) : TCP_MAX_WINSHIFT;
7186 } else {
7187 sws = dws = 0;
7188 }
7189
7190 /*
7191 * Sequence tracking algorithm from Guido van Rooij's paper:
7192 * http://www.madison-gurkha.com/publications/tcp_filtering/
7193 * tcp_filtering.ps
7194 */
7195
7196 orig_seq = seq = ntohl(th->th_seq);
7197 if (src->seqlo == 0) {
7198 /* First packet from this end. Set its state */
7199
7200 if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) &&
7201 src->scrub == NULL) {
7202 if (pf_normalize_tcp_init(pbuf, off, pd, th, src, dst)) {
7203 REASON_SET(reason, PFRES_MEMORY);
7204 return PF_DROP;
7205 }
7206 }
7207
7208 /* Deferred generation of sequence number modulator */
7209 if (dst->seqdiff && !src->seqdiff) {
7210 /* use random iss for the TCP server */
7211 while ((src->seqdiff = random() - seq) == 0) {
7212 ;
7213 }
7214 ack = ntohl(th->th_ack) - dst->seqdiff;
7215 pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
7216 src->seqdiff), 0);
7217 pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
7218 copyback = off + sizeof(*th);
7219 } else {
7220 ack = ntohl(th->th_ack);
7221 }
7222
7223 end = seq + pd->p_len;
7224 if (th->th_flags & TH_SYN) {
7225 end++;
7226 if (dst->wscale & PF_WSCALE_FLAG) {
7227 src->wscale = pf_get_wscale(pbuf, off,
7228 th->th_off, pd->af);
7229 if (src->wscale & PF_WSCALE_FLAG) {
7230 /*
7231 * Remove scale factor from initial
7232 * window
7233 */
7234 sws = src->wscale & PF_WSCALE_MASK;
7235 win = ((u_int32_t)win + (1 << sws) - 1)
7236 >> sws;
7237 dws = dst->wscale & PF_WSCALE_MASK;
7238 } else {
7239 /*
7240 * Window scale negotiation has failed,
7241 * therefore we must restore the window
7242 * scale in the state record that we
7243 * optimistically removed in
7244 * pf_test_rule(). Care is required to
7245 * prevent arithmetic overflow from
7246 * zeroing the window when it's
7247 * truncated down to 16-bits.
7248 */
7249 u_int32_t max_win = dst->max_win;
7250 max_win <<=
7251 dst->wscale & PF_WSCALE_MASK;
7252 dst->max_win = MIN(0xffff, max_win);
7253 /* in case of a retrans SYN|ACK */
7254 dst->wscale = 0;
7255 }
7256 }
7257 }
7258 if (th->th_flags & TH_FIN) {
7259 end++;
7260 }
7261
7262 src->seqlo = seq;
7263 if (src->state < TCPS_SYN_SENT) {
7264 src->state = TCPS_SYN_SENT;
7265 }
7266
7267 /*
7268 * May need to slide the window (seqhi may have been set by
7269 * the crappy stack check or if we picked up the connection
7270 * after establishment)
7271 */
7272 if (src->seqhi == 1 ||
7273 SEQ_GEQ(end + MAX(1, (u_int32_t)dst->max_win << dws),
7274 src->seqhi)) {
7275 src->seqhi = end + MAX(1, (u_int32_t)dst->max_win << dws);
7276 }
7277 if (win > src->max_win) {
7278 src->max_win = win;
7279 }
7280 } else {
7281 ack = ntohl(th->th_ack) - dst->seqdiff;
7282 if (src->seqdiff) {
7283 /* Modulate sequence numbers */
7284 pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
7285 src->seqdiff), 0);
7286 pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
7287 copyback = off + sizeof(*th);
7288 }
7289 end = seq + pd->p_len;
7290 if (th->th_flags & TH_SYN) {
7291 end++;
7292 }
7293 if (th->th_flags & TH_FIN) {
7294 end++;
7295 }
7296 }
7297
7298 if ((th->th_flags & TH_ACK) == 0) {
7299 /* Let it pass through the ack skew check */
7300 ack = dst->seqlo;
7301 } else if ((ack == 0 &&
7302 (th->th_flags & (TH_ACK | TH_RST)) == (TH_ACK | TH_RST)) ||
7303 /* broken tcp stacks do not set ack */
7304 (dst->state < TCPS_SYN_SENT)) {
7305 /*
7306 * Many stacks (ours included) will set the ACK number in an
7307 * FIN|ACK if the SYN times out -- no sequence to ACK.
7308 */
7309 ack = dst->seqlo;
7310 }
7311
7312 if (seq == end) {
7313 /* Ease sequencing restrictions on no data packets */
7314 seq = src->seqlo;
7315 end = seq;
7316 }
7317
7318 ackskew = dst->seqlo - ack;
7319
7320
7321 /*
7322 * Need to demodulate the sequence numbers in any TCP SACK options
7323 * (Selective ACK). We could optionally validate the SACK values
7324 * against the current ACK window, either forwards or backwards, but
7325 * I'm not confident that SACK has been implemented properly
7326 * everywhere. It wouldn't surprise me if several stacks accidently
7327 * SACK too far backwards of previously ACKed data. There really aren't
7328 * any security implications of bad SACKing unless the target stack
7329 * doesn't validate the option length correctly. Someone trying to
7330 * spoof into a TCP connection won't bother blindly sending SACK
7331 * options anyway.
7332 */
7333 if (dst->seqdiff && (th->th_off << 2) > (int)sizeof(struct tcphdr)) {
7334 copyback = pf_modulate_sack(pbuf, off, pd, th, dst);
7335 if (copyback == -1) {
7336 REASON_SET(reason, PFRES_MEMORY);
7337 return PF_DROP;
7338 }
7339
7340 pbuf = pd->mp; // XXXSCW: Why?
7341 }
7342
7343
7344 #define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */
7345 if (SEQ_GEQ(src->seqhi, end) &&
7346 /* Last octet inside other's window space */
7347 SEQ_GEQ(seq, src->seqlo - ((u_int32_t)dst->max_win << dws)) &&
7348 /* Retrans: not more than one window back */
7349 (ackskew >= -MAXACKWINDOW) &&
7350 /* Acking not more than one reassembled fragment backwards */
7351 (ackskew <= (MAXACKWINDOW << sws)) &&
7352 /* Acking not more than one window forward */
7353 ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo ||
7354 (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo) ||
7355 (pd->flags & PFDESC_IP_REAS) == 0)) {
7356 /* Require an exact/+1 sequence match on resets when possible */
7357
7358 if (dst->scrub || src->scrub) {
7359 if (pf_normalize_tcp_stateful(pbuf, off, pd, reason, th,
7360 *state, src, dst, ©back)) {
7361 return PF_DROP;
7362 }
7363
7364 pbuf = pd->mp; // XXXSCW: Why?
7365 }
7366
7367 /* update max window */
7368 if (src->max_win < win) {
7369 src->max_win = win;
7370 }
7371 /* synchronize sequencing */
7372 if (SEQ_GT(end, src->seqlo)) {
7373 src->seqlo = end;
7374 }
7375 /* slide the window of what the other end can send */
7376 if (SEQ_GEQ(ack + ((u_int32_t)win << sws), dst->seqhi)) {
7377 dst->seqhi = ack + MAX(((u_int32_t)win << sws), 1);
7378 }
7379
7380 /* update states */
7381 if (th->th_flags & TH_SYN) {
7382 if (src->state < TCPS_SYN_SENT) {
7383 src->state = TCPS_SYN_SENT;
7384 }
7385 }
7386 if (th->th_flags & TH_FIN) {
7387 if (src->state < TCPS_CLOSING) {
7388 src->state = TCPS_CLOSING;
7389 }
7390 }
7391 if (th->th_flags & TH_ACK) {
7392 if (dst->state == TCPS_SYN_SENT) {
7393 dst->state = TCPS_ESTABLISHED;
7394 if (src->state == TCPS_ESTABLISHED &&
7395 (*state)->src_node != NULL &&
7396 pf_src_connlimit(state)) {
7397 REASON_SET(reason, PFRES_SRCLIMIT);
7398 return PF_DROP;
7399 }
7400 } else if (dst->state == TCPS_CLOSING) {
7401 dst->state = TCPS_FIN_WAIT_2;
7402 }
7403 }
7404 if (th->th_flags & TH_RST) {
7405 src->state = dst->state = TCPS_TIME_WAIT;
7406 }
7407
7408 /* update expire time */
7409 (*state)->expire = pf_time_second();
7410 if (src->state >= TCPS_FIN_WAIT_2 &&
7411 dst->state >= TCPS_FIN_WAIT_2) {
7412 (*state)->timeout = PFTM_TCP_CLOSED;
7413 } else if (src->state >= TCPS_CLOSING &&
7414 dst->state >= TCPS_CLOSING) {
7415 (*state)->timeout = PFTM_TCP_FIN_WAIT;
7416 } else if (src->state < TCPS_ESTABLISHED ||
7417 dst->state < TCPS_ESTABLISHED) {
7418 (*state)->timeout = PFTM_TCP_OPENING;
7419 } else if (src->state >= TCPS_CLOSING ||
7420 dst->state >= TCPS_CLOSING) {
7421 (*state)->timeout = PFTM_TCP_CLOSING;
7422 } else {
7423 (*state)->timeout = PFTM_TCP_ESTABLISHED;
7424 }
7425
7426 /* Fall through to PASS packet */
7427 } else if ((dst->state < TCPS_SYN_SENT ||
7428 dst->state >= TCPS_FIN_WAIT_2 || src->state >= TCPS_FIN_WAIT_2) &&
7429 SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) &&
7430 /* Within a window forward of the originating packet */
7431 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
7432 /* Within a window backward of the originating packet */
7433
7434 /*
7435 * This currently handles three situations:
7436 * 1) Stupid stacks will shotgun SYNs before their peer
7437 * replies.
7438 * 2) When PF catches an already established stream (the
7439 * firewall rebooted, the state table was flushed, routes
7440 * changed...)
7441 * 3) Packets get funky immediately after the connection
7442 * closes (this should catch Solaris spurious ACK|FINs
7443 * that web servers like to spew after a close)
7444 *
7445 * This must be a little more careful than the above code
7446 * since packet floods will also be caught here. We don't
7447 * update the TTL here to mitigate the damage of a packet
7448 * flood and so the same code can handle awkward establishment
7449 * and a loosened connection close.
7450 * In the establishment case, a correct peer response will
7451 * validate the connection, go through the normal state code
7452 * and keep updating the state TTL.
7453 */
7454
7455 if (pf_status.debug >= PF_DEBUG_MISC) {
7456 printf("pf: loose state match: ");
7457 pf_print_state(*state);
7458 pf_print_flags(th->th_flags);
7459 printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
7460 "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack,
7461 pd->p_len, ackskew, (*state)->packets[0],
7462 (*state)->packets[1],
7463 direction == PF_IN ? "in" : "out",
7464 direction == sk->direction ?
7465 "fwd" : "rev");
7466 }
7467
7468 if (dst->scrub || src->scrub) {
7469 if (pf_normalize_tcp_stateful(pbuf, off, pd, reason, th,
7470 *state, src, dst, ©back)) {
7471 return PF_DROP;
7472 }
7473 pbuf = pd->mp; // XXXSCW: Why?
7474 }
7475
7476 /* update max window */
7477 if (src->max_win < win) {
7478 src->max_win = win;
7479 }
7480 /* synchronize sequencing */
7481 if (SEQ_GT(end, src->seqlo)) {
7482 src->seqlo = end;
7483 }
7484 /* slide the window of what the other end can send */
7485 if (SEQ_GEQ(ack + ((u_int32_t)win << sws), dst->seqhi)) {
7486 dst->seqhi = ack + MAX(((u_int32_t)win << sws), 1);
7487 }
7488
7489 /*
7490 * Cannot set dst->seqhi here since this could be a shotgunned
7491 * SYN and not an already established connection.
7492 */
7493
7494 if (th->th_flags & TH_FIN) {
7495 if (src->state < TCPS_CLOSING) {
7496 src->state = TCPS_CLOSING;
7497 }
7498 }
7499 if (th->th_flags & TH_RST) {
7500 src->state = dst->state = TCPS_TIME_WAIT;
7501 }
7502
7503 /* Fall through to PASS packet */
7504 } else {
7505 if (dst->state == TCPS_SYN_SENT &&
7506 src->state == TCPS_SYN_SENT) {
7507 /* Send RST for state mismatches during handshake */
7508 if (!(th->th_flags & TH_RST)) {
7509 pf_send_tcp((*state)->rule.ptr, pd->af,
7510 pd->dst, pd->src, th->th_dport,
7511 th->th_sport, ntohl(th->th_ack), 0,
7512 TH_RST, 0, 0,
7513 (*state)->rule.ptr->return_ttl, 1, 0,
7514 pd->eh, kif->pfik_ifp);
7515 }
7516 src->seqlo = 0;
7517 src->seqhi = 1;
7518 src->max_win = 1;
7519 } else if (pf_status.debug >= PF_DEBUG_MISC) {
7520 printf("pf: BAD state: ");
7521 pf_print_state(*state);
7522 pf_print_flags(th->th_flags);
7523 printf("\n seq=%u (%u) ack=%u len=%u ackskew=%d "
7524 "sws=%u dws=%u pkts=%llu:%llu dir=%s,%s\n",
7525 seq, orig_seq, ack, pd->p_len, ackskew,
7526 (unsigned int)sws, (unsigned int)dws,
7527 (*state)->packets[0], (*state)->packets[1],
7528 direction == PF_IN ? "in" : "out",
7529 direction == sk->direction ?
7530 "fwd" : "rev");
7531 printf("pf: State failure on: %c %c %c %c | %c %c\n",
7532 SEQ_GEQ(src->seqhi, end) ? ' ' : '1',
7533 SEQ_GEQ(seq,
7534 src->seqlo - ((u_int32_t)dst->max_win << dws)) ?
7535 ' ': '2',
7536 (ackskew >= -MAXACKWINDOW) ? ' ' : '3',
7537 (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
7538 SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5',
7539 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6');
7540 }
7541 REASON_SET(reason, PFRES_BADSTATE);
7542 return PF_DROP;
7543 }
7544
7545 /* Any packets which have gotten here are to be passed */
7546
7547 if (sk->app_state &&
7548 sk->app_state->handler) {
7549 sk->app_state->handler(*state, direction,
7550 off + (th->th_off << 2), pd, kif);
7551 if (pd->lmw < 0) {
7552 REASON_SET(reason, PFRES_MEMORY);
7553 return PF_DROP;
7554 }
7555 pbuf = pd->mp; // XXXSCW: Why?
7556 }
7557
7558 /* translate source/destination address, if necessary */
7559 if (STATE_TRANSLATE(sk)) {
7560 pd->naf = (pd->af == sk->af_lan) ? sk->af_gwy : sk->af_lan;
7561
7562 if (direction == PF_OUT) {
7563 pf_change_ap(direction, pd->mp, pd->src, &th->th_sport,
7564 pd->ip_sum, &th->th_sum, &sk->gwy.addr,
7565 sk->gwy.xport.port, 0, pd->af, pd->naf, 1);
7566 } else {
7567 if (pd->af != pd->naf) {
7568 if (pd->af == sk->af_gwy) {
7569 pf_change_ap(direction, pd->mp, pd->dst,
7570 &th->th_dport, pd->ip_sum,
7571 &th->th_sum, &sk->lan.addr,
7572 sk->lan.xport.port, 0,
7573 pd->af, pd->naf, 0);
7574
7575 pf_change_ap(direction, pd->mp, pd->src,
7576 &th->th_sport, pd->ip_sum,
7577 &th->th_sum, &sk->ext_lan.addr,
7578 th->th_sport, 0, pd->af,
7579 pd->naf, 0);
7580 } else {
7581 pf_change_ap(direction, pd->mp, pd->dst,
7582 &th->th_dport, pd->ip_sum,
7583 &th->th_sum, &sk->ext_gwy.addr,
7584 th->th_dport, 0, pd->af,
7585 pd->naf, 0);
7586
7587 pf_change_ap(direction, pd->mp, pd->src,
7588 &th->th_sport, pd->ip_sum,
7589 &th->th_sum, &sk->gwy.addr,
7590 sk->gwy.xport.port, 0, pd->af,
7591 pd->naf, 0);
7592 }
7593 } else {
7594 pf_change_ap(direction, pd->mp, pd->dst,
7595 &th->th_dport, pd->ip_sum,
7596 &th->th_sum, &sk->lan.addr,
7597 sk->lan.xport.port, 0, pd->af,
7598 pd->naf, 1);
7599 }
7600 }
7601
7602 copyback = off + sizeof(*th);
7603 }
7604
7605 if (copyback) {
7606 if (pf_lazy_makewritable(pd, pbuf, copyback) == NULL) {
7607 REASON_SET(reason, PFRES_MEMORY);
7608 return PF_DROP;
7609 }
7610
7611 /* Copyback sequence modulation or stateful scrub changes */
7612 pbuf_copy_back(pbuf, off, sizeof(*th), th, sizeof(*th));
7613
7614 if (sk->af_lan != sk->af_gwy) {
7615 return pf_do_nat64(sk, pd, pbuf, off);
7616 }
7617 }
7618 return PF_PASS;
7619 }
7620
7621 static __attribute__((noinline)) int
pf_test_state_udp(struct pf_state ** state,int direction,struct pfi_kif * kif,pbuf_t * pbuf,int off,void * h,struct pf_pdesc * pd,u_short * reason)7622 pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
7623 pbuf_t *pbuf, int off, void *h, struct pf_pdesc *pd, u_short *reason)
7624 {
7625 #pragma unused(h)
7626 struct pf_state_peer *__single src, *__single dst;
7627 struct pf_state_key_cmp key;
7628 struct pf_state_key *__single sk;
7629 struct udphdr *__single uh = pf_pd_get_hdr_udp(pd);
7630 struct pf_app_state as;
7631 int action, extfilter;
7632 key.app_state = 0;
7633 key.proto_variant = PF_EXTFILTER_APD;
7634
7635 key.proto = IPPROTO_UDP;
7636 key.af_lan = key.af_gwy = pd->af;
7637
7638 /*
7639 * For NAT64 the first time rule search and state creation
7640 * is done on the incoming side only.
7641 * Once the state gets created, NAT64's LAN side (ipv6) will
7642 * not be able to find the state in ext-gwy tree as that normally
7643 * is intended to be looked up for incoming traffic from the
7644 * WAN side.
7645 * Therefore to handle NAT64 case we init keys here for both
7646 * lan-ext as well as ext-gwy trees.
7647 * In the state lookup we attempt a lookup on both trees if
7648 * first one does not return any result and return a match if
7649 * the match state's was created by NAT64 rule.
7650 */
7651 PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy);
7652 PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy);
7653 key.ext_gwy.xport.port = uh->uh_sport;
7654 key.gwy.xport.port = uh->uh_dport;
7655
7656 PF_ACPY(&key.lan.addr, pd->src, key.af_lan);
7657 PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan);
7658 key.lan.xport.port = uh->uh_sport;
7659 key.ext_lan.xport.port = uh->uh_dport;
7660
7661 if (ntohs(uh->uh_sport) == PF_IKE_PORT &&
7662 ntohs(uh->uh_dport) == PF_IKE_PORT) {
7663 struct pf_ike_hdr ike;
7664 size_t plen = pbuf->pb_packet_len - off - sizeof(*uh);
7665 if (plen < PF_IKE_PACKET_MINSIZE) {
7666 DPFPRINTF(PF_DEBUG_MISC,
7667 ("pf: IKE message too small.\n"));
7668 return PF_DROP;
7669 }
7670
7671 if (plen > sizeof(ike)) {
7672 plen = sizeof(ike);
7673 }
7674 pbuf_copy_data(pbuf, off + sizeof(*uh), plen, &ike, sizeof(ike));
7675
7676 if (ike.initiator_cookie) {
7677 key.app_state = &as;
7678 as.compare_lan_ext = pf_ike_compare;
7679 as.compare_ext_gwy = pf_ike_compare;
7680 as.u.ike.cookie = ike.initiator_cookie;
7681 } else {
7682 /*
7683 * <http://tools.ietf.org/html/\
7684 * draft-ietf-ipsec-nat-t-ike-01>
7685 * Support non-standard NAT-T implementations that
7686 * push the ESP packet over the top of the IKE packet.
7687 * Do not drop packet.
7688 */
7689 DPFPRINTF(PF_DEBUG_MISC,
7690 ("pf: IKE initiator cookie = 0.\n"));
7691 }
7692 }
7693
7694 *state = pf_find_state(kif, &key, direction);
7695
7696 if (!key.app_state && *state == 0) {
7697 key.proto_variant = PF_EXTFILTER_AD;
7698 *state = pf_find_state(kif, &key, direction);
7699 }
7700
7701 if (!key.app_state && *state == 0) {
7702 key.proto_variant = PF_EXTFILTER_EI;
7703 *state = pf_find_state(kif, &key, direction);
7704 }
7705
7706 /* similar to STATE_LOOKUP() */
7707 if (*state != NULL && pd != NULL && !(pd->pktflags & PKTF_FLOW_ID)) {
7708 pd->flowsrc = (*state)->state_key->flowsrc;
7709 pd->flowhash = (*state)->state_key->flowhash;
7710 if (pd->flowhash != 0) {
7711 pd->pktflags |= PKTF_FLOW_ID;
7712 pd->pktflags &= ~PKTF_FLOW_ADV;
7713 }
7714 }
7715
7716 if (pf_state_lookup_aux(state, kif, direction, &action)) {
7717 return action;
7718 }
7719
7720 sk = (*state)->state_key;
7721
7722 /*
7723 * In case of NAT64 the translation is first applied on the LAN
7724 * side. Therefore for stack's address family comparison
7725 * we use sk->af_lan.
7726 */
7727 if ((direction == sk->direction) && (pd->af == sk->af_lan)) {
7728 src = &(*state)->src;
7729 dst = &(*state)->dst;
7730 } else {
7731 src = &(*state)->dst;
7732 dst = &(*state)->src;
7733 }
7734
7735 /* update states */
7736 if (src->state < PFUDPS_SINGLE) {
7737 src->state = PFUDPS_SINGLE;
7738 }
7739 if (dst->state == PFUDPS_SINGLE) {
7740 dst->state = PFUDPS_MULTIPLE;
7741 }
7742
7743 /* update expire time */
7744 (*state)->expire = pf_time_second();
7745 if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE) {
7746 (*state)->timeout = PFTM_UDP_MULTIPLE;
7747 } else {
7748 (*state)->timeout = PFTM_UDP_SINGLE;
7749 }
7750
7751 extfilter = sk->proto_variant;
7752 if (extfilter > PF_EXTFILTER_APD) {
7753 if (direction == PF_OUT) {
7754 sk->ext_lan.xport.port = key.ext_lan.xport.port;
7755 if (extfilter > PF_EXTFILTER_AD) {
7756 PF_ACPY(&sk->ext_lan.addr, &key.ext_lan.addr,
7757 key.af_lan);
7758 }
7759 } else {
7760 sk->ext_gwy.xport.port = key.ext_gwy.xport.port;
7761 if (extfilter > PF_EXTFILTER_AD) {
7762 PF_ACPY(&sk->ext_gwy.addr, &key.ext_gwy.addr,
7763 key.af_gwy);
7764 }
7765 }
7766 }
7767
7768 if (sk->app_state && sk->app_state->handler) {
7769 sk->app_state->handler(*state, direction, off + uh->uh_ulen,
7770 pd, kif);
7771 if (pd->lmw < 0) {
7772 REASON_SET(reason, PFRES_MEMORY);
7773 return PF_DROP;
7774 }
7775 pbuf = pd->mp; // XXXSCW: Why?
7776 }
7777
7778 /* translate source/destination address, if necessary */
7779 if (STATE_TRANSLATE(sk)) {
7780 if (pf_lazy_makewritable(pd, pbuf, off + sizeof(*uh)) == NULL) {
7781 REASON_SET(reason, PFRES_MEMORY);
7782 return PF_DROP;
7783 }
7784
7785 pd->naf = (pd->af == sk->af_lan) ? sk->af_gwy : sk->af_lan;
7786
7787 if (direction == PF_OUT) {
7788 pf_change_ap(direction, pd->mp, pd->src, &uh->uh_sport,
7789 pd->ip_sum, &uh->uh_sum, &sk->gwy.addr,
7790 sk->gwy.xport.port, 1, pd->af, pd->naf, 1);
7791 } else {
7792 if (pd->af != pd->naf) {
7793 if (pd->af == sk->af_gwy) {
7794 pf_change_ap(direction, pd->mp, pd->dst,
7795 &uh->uh_dport, pd->ip_sum,
7796 &uh->uh_sum, &sk->lan.addr,
7797 sk->lan.xport.port, 1,
7798 pd->af, pd->naf, 0);
7799
7800 pf_change_ap(direction, pd->mp, pd->src,
7801 &uh->uh_sport, pd->ip_sum,
7802 &uh->uh_sum, &sk->ext_lan.addr,
7803 uh->uh_sport, 1, pd->af,
7804 pd->naf, 0);
7805 } else {
7806 pf_change_ap(direction, pd->mp, pd->dst,
7807 &uh->uh_dport, pd->ip_sum,
7808 &uh->uh_sum, &sk->ext_gwy.addr,
7809 uh->uh_dport, 1, pd->af,
7810 pd->naf, 0);
7811
7812 pf_change_ap(direction, pd->mp, pd->src,
7813 &uh->uh_sport, pd->ip_sum,
7814 &uh->uh_sum, &sk->gwy.addr,
7815 sk->gwy.xport.port, 1, pd->af,
7816 pd->naf, 0);
7817 }
7818 } else {
7819 pf_change_ap(direction, pd->mp, pd->dst,
7820 &uh->uh_dport, pd->ip_sum,
7821 &uh->uh_sum, &sk->lan.addr,
7822 sk->lan.xport.port, 1,
7823 pd->af, pd->naf, 1);
7824 }
7825 }
7826
7827 pbuf_copy_back(pbuf, off, sizeof(*uh), uh, sizeof(*uh));
7828 if (sk->af_lan != sk->af_gwy) {
7829 return pf_do_nat64(sk, pd, pbuf, off);
7830 }
7831 }
7832 return PF_PASS;
7833 }
7834
7835 static u_int32_t
pf_compute_packet_icmp_gencnt(uint32_t af,u_int32_t type,u_int32_t code)7836 pf_compute_packet_icmp_gencnt(uint32_t af, u_int32_t type, u_int32_t code)
7837 {
7838 if (af == PF_INET) {
7839 if (type != ICMP_UNREACH && type != ICMP_TIMXCEED) {
7840 return 0;
7841 }
7842 } else {
7843 if (type != ICMP6_DST_UNREACH && type != ICMP6_PARAM_PROB &&
7844 type != ICMP6_TIME_EXCEEDED) {
7845 return 0;
7846 }
7847 }
7848 return (af << 24) | (type << 16) | (code << 8);
7849 }
7850
7851
7852 static __attribute__((noinline)) int
pf_test_state_icmp(struct pf_state ** state,int direction,struct pfi_kif * kif,pbuf_t * pbuf,int off,void * h,struct pf_pdesc * pd,u_short * reason)7853 pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
7854 pbuf_t *pbuf, int off, void *h, struct pf_pdesc *pd, u_short *reason)
7855 {
7856 #pragma unused(h)
7857 struct pf_addr *__single saddr = pd->src, *__single daddr = pd->dst;
7858 struct in_addr srcv4_inaddr = saddr->v4addr;
7859 u_int16_t icmpid = 0, *__single icmpsum = NULL;
7860 u_int8_t icmptype = 0;
7861 u_int32_t icmpcode = 0;
7862 int state_icmp = 0;
7863 struct pf_state_key_cmp key;
7864 struct pf_state_key *__single sk;
7865
7866 struct pf_app_state as;
7867 key.app_state = 0;
7868
7869 pd->off = off;
7870
7871 switch (pd->proto) {
7872 #if INET
7873 case IPPROTO_ICMP:
7874 icmptype = pf_pd_get_hdr_icmp(pd)->icmp_type;
7875 icmpid = pf_pd_get_hdr_icmp(pd)->icmp_id;
7876 icmpsum = &pf_pd_get_hdr_icmp(pd)->icmp_cksum;
7877 icmpcode = pf_pd_get_hdr_icmp(pd)->icmp_code;
7878
7879 if (ICMP_ERRORTYPE(icmptype)) {
7880 state_icmp++;
7881 }
7882 break;
7883 #endif /* INET */
7884 case IPPROTO_ICMPV6:
7885 icmptype = pf_pd_get_hdr_icmp6(pd)->icmp6_type;
7886 icmpid = pf_pd_get_hdr_icmp6(pd)->icmp6_id;
7887 icmpsum = &pf_pd_get_hdr_icmp6(pd)->icmp6_cksum;
7888 icmpcode = pf_pd_get_hdr_icmp6(pd)->icmp6_code;
7889
7890 if (ICMP6_ERRORTYPE(icmptype)) {
7891 state_icmp++;
7892 }
7893 break;
7894 }
7895
7896 if (pbuf != NULL && pbuf->pb_flow_gencnt != NULL &&
7897 *pbuf->pb_flow_gencnt == 0) {
7898 u_int32_t af = pd->proto == IPPROTO_ICMP ? PF_INET : PF_INET6;
7899 *pbuf->pb_flow_gencnt = pf_compute_packet_icmp_gencnt(af, icmptype, icmpcode);
7900 }
7901
7902 if (!state_icmp) {
7903 /*
7904 * ICMP query/reply message not related to a TCP/UDP packet.
7905 * Search for an ICMP state.
7906 */
7907 /*
7908 * NAT64 requires protocol translation between ICMPv4
7909 * and ICMPv6. TCP and UDP do not require protocol
7910 * translation. To avoid adding complexity just to
7911 * handle ICMP(v4addr/v6addr), we always lookup for
7912 * proto = IPPROTO_ICMP on both LAN and WAN side
7913 */
7914 key.proto = IPPROTO_ICMP;
7915 key.af_lan = key.af_gwy = pd->af;
7916
7917 PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy);
7918 PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy);
7919 key.ext_gwy.xport.port = 0;
7920 key.gwy.xport.port = icmpid;
7921
7922 PF_ACPY(&key.lan.addr, pd->src, key.af_lan);
7923 PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan);
7924 key.lan.xport.port = icmpid;
7925 key.ext_lan.xport.port = 0;
7926
7927 STATE_LOOKUP();
7928
7929 sk = (*state)->state_key;
7930 (*state)->expire = pf_time_second();
7931 (*state)->timeout = PFTM_ICMP_ERROR_REPLY;
7932
7933 /* translate source/destination address, if necessary */
7934 if (STATE_TRANSLATE(sk)) {
7935 pd->naf = (pd->af == sk->af_lan) ?
7936 sk->af_gwy : sk->af_lan;
7937 if (direction == PF_OUT) {
7938 switch (pd->af) {
7939 #if INET
7940 case AF_INET:
7941 pf_change_a(&saddr->v4addr.s_addr,
7942 pd->ip_sum,
7943 sk->gwy.addr.v4addr.s_addr, 0);
7944 pf_pd_get_hdr_icmp(pd)->icmp_cksum =
7945 pf_cksum_fixup(
7946 pf_pd_get_hdr_icmp(pd)->icmp_cksum, icmpid,
7947 sk->gwy.xport.port, 0);
7948 pf_pd_get_hdr_icmp(pd)->icmp_id =
7949 sk->gwy.xport.port;
7950 if (pf_lazy_makewritable(pd, pbuf,
7951 off + ICMP_MINLEN) == NULL) {
7952 return PF_DROP;
7953 }
7954 pbuf_copy_back(pbuf, off, ICMP_MINLEN,
7955 pf_pd_get_hdr_ptr_icmp(pd), sizeof(struct icmp));
7956 break;
7957 #endif /* INET */
7958 case AF_INET6:
7959 pf_change_a6(saddr,
7960 &pf_pd_get_hdr_icmp6(pd)->icmp6_cksum,
7961 &sk->gwy.addr, 0);
7962 if (pf_lazy_makewritable(pd, pbuf,
7963 off + sizeof(struct icmp6_hdr)) ==
7964 NULL) {
7965 return PF_DROP;
7966 }
7967 pbuf_copy_back(pbuf, off,
7968 sizeof(struct icmp6_hdr),
7969 pf_pd_get_hdr_ptr_icmp6(pd), sizeof(struct icmp6_hdr));
7970 break;
7971 }
7972 } else {
7973 switch (pd->af) {
7974 #if INET
7975 case AF_INET:
7976 if (pd->naf != AF_INET) {
7977 if (pf_translate_icmp_af(
7978 AF_INET6, pf_pd_get_hdr_icmp(pd))) {
7979 return PF_DROP;
7980 }
7981
7982 pd->proto = IPPROTO_ICMPV6;
7983 } else {
7984 pf_change_a(&daddr->v4addr.s_addr,
7985 pd->ip_sum,
7986 sk->lan.addr.v4addr.s_addr, 0);
7987
7988 pf_pd_get_hdr_icmp(pd)->icmp_cksum =
7989 pf_cksum_fixup(
7990 pf_pd_get_hdr_icmp(pd)->icmp_cksum,
7991 icmpid, sk->lan.xport.port, 0);
7992
7993 pf_pd_get_hdr_icmp(pd)->icmp_id =
7994 sk->lan.xport.port;
7995 }
7996
7997 if (pf_lazy_makewritable(pd, pbuf,
7998 off + ICMP_MINLEN) == NULL) {
7999 return PF_DROP;
8000 }
8001 pbuf_copy_back(pbuf, off, ICMP_MINLEN,
8002 pf_pd_get_hdr_ptr_icmp(pd), sizeof(struct icmp));
8003 if (sk->af_lan != sk->af_gwy) {
8004 return pf_do_nat64(sk, pd,
8005 pbuf, off);
8006 }
8007 break;
8008 #endif /* INET */
8009 case AF_INET6:
8010 if (pd->naf != AF_INET6) {
8011 if (pf_translate_icmp_af(
8012 AF_INET, pf_pd_get_hdr_icmp6(pd))) {
8013 return PF_DROP;
8014 }
8015
8016 pd->proto = IPPROTO_ICMP;
8017 } else {
8018 pf_change_a6(daddr,
8019 &pf_pd_get_hdr_icmp6(pd)->icmp6_cksum,
8020 &sk->lan.addr, 0);
8021 }
8022 if (pf_lazy_makewritable(pd, pbuf,
8023 off + sizeof(struct icmp6_hdr)) ==
8024 NULL) {
8025 return PF_DROP;
8026 }
8027 pbuf_copy_back(pbuf, off,
8028 sizeof(struct icmp6_hdr),
8029 pf_pd_get_hdr_ptr_icmp6(pd), sizeof(struct icmp6_hdr));
8030 if (sk->af_lan != sk->af_gwy) {
8031 return pf_do_nat64(sk, pd,
8032 pbuf, off);
8033 }
8034 break;
8035 }
8036 }
8037 }
8038
8039 return PF_PASS;
8040 } else {
8041 /*
8042 * ICMP error message in response to a TCP/UDP packet.
8043 * Extract the inner TCP/UDP header and search for that state.
8044 */
8045 struct pf_pdesc pd2; /* For inner (original) header */
8046 #if INET
8047 struct ip h2;
8048 #endif /* INET */
8049 struct ip6_hdr h2_6;
8050 int terminal = 0;
8051 int ipoff2 = 0;
8052 int off2 = 0;
8053
8054 memset(&pd2, 0, sizeof(pd2));
8055
8056 pd2.af = pd->af;
8057 switch (pd->af) {
8058 #if INET
8059 case AF_INET:
8060 /* offset of h2 in mbuf chain */
8061 ipoff2 = off + ICMP_MINLEN;
8062
8063 if (!pf_pull_hdr(pbuf, ipoff2, &h2, sizeof(h2), sizeof(h2),
8064 NULL, reason, pd2.af)) {
8065 DPFPRINTF(PF_DEBUG_MISC,
8066 ("pf: ICMP error message too short "
8067 "(ip)\n"));
8068 return PF_DROP;
8069 }
8070 /*
8071 * ICMP error messages don't refer to non-first
8072 * fragments
8073 */
8074 if (h2.ip_off & htons(IP_OFFMASK)) {
8075 REASON_SET(reason, PFRES_FRAG);
8076 return PF_DROP;
8077 }
8078
8079 /* offset of protocol header that follows h2 */
8080 off2 = ipoff2 + (h2.ip_hl << 2);
8081 /* TODO */
8082 pd2.off = ipoff2 + (h2.ip_hl << 2);
8083
8084 pd2.proto = h2.ip_p;
8085 pd2.src = (struct pf_addr *)&h2.ip_src;
8086 pd2.dst = (struct pf_addr *)&h2.ip_dst;
8087 pd2.ip_sum = &h2.ip_sum;
8088 break;
8089 #endif /* INET */
8090 case AF_INET6:
8091 ipoff2 = off + sizeof(struct icmp6_hdr);
8092
8093 if (!pf_pull_hdr(pbuf, ipoff2, &h2_6, sizeof(h2_6), sizeof(h2_6),
8094 NULL, reason, pd2.af)) {
8095 DPFPRINTF(PF_DEBUG_MISC,
8096 ("pf: ICMP error message too short "
8097 "(ip6)\n"));
8098 return PF_DROP;
8099 }
8100 pd2.proto = h2_6.ip6_nxt;
8101 pd2.src = (struct pf_addr *)(void *)&h2_6.ip6_src;
8102 pd2.dst = (struct pf_addr *)(void *)&h2_6.ip6_dst;
8103 pd2.ip_sum = NULL;
8104 off2 = ipoff2 + sizeof(h2_6);
8105 do {
8106 switch (pd2.proto) {
8107 case IPPROTO_FRAGMENT:
8108 /*
8109 * ICMPv6 error messages for
8110 * non-first fragments
8111 */
8112 REASON_SET(reason, PFRES_FRAG);
8113 return PF_DROP;
8114 case IPPROTO_AH:
8115 case IPPROTO_HOPOPTS:
8116 case IPPROTO_ROUTING:
8117 case IPPROTO_DSTOPTS: {
8118 /* get next header and header length */
8119 struct ip6_ext opt6;
8120
8121 if (!pf_pull_hdr(pbuf, off2, &opt6, sizeof(opt6),
8122 sizeof(opt6), NULL, reason,
8123 pd2.af)) {
8124 DPFPRINTF(PF_DEBUG_MISC,
8125 ("pf: ICMPv6 short opt\n"));
8126 return PF_DROP;
8127 }
8128 if (pd2.proto == IPPROTO_AH) {
8129 off2 += (opt6.ip6e_len + 2) * 4;
8130 } else {
8131 off2 += (opt6.ip6e_len + 1) * 8;
8132 }
8133 pd2.proto = opt6.ip6e_nxt;
8134 /* goto the next header */
8135 break;
8136 }
8137 default:
8138 terminal++;
8139 break;
8140 }
8141 } while (!terminal);
8142 /* TODO */
8143 pd2.off = ipoff2;
8144 break;
8145 }
8146
8147 switch (pd2.proto) {
8148 case IPPROTO_TCP: {
8149 struct tcphdr th;
8150 u_int32_t seq;
8151 struct pf_state_peer *src, *dst;
8152 u_int8_t dws;
8153 int copyback = 0;
8154
8155 /*
8156 * Only the first 8 bytes of the TCP header can be
8157 * expected. Don't access any TCP header fields after
8158 * th_seq, an ackskew test is not possible.
8159 */
8160 if (!pf_pull_hdr(pbuf, off2, &th, sizeof(th), 8, NULL, reason,
8161 pd2.af)) {
8162 DPFPRINTF(PF_DEBUG_MISC,
8163 ("pf: ICMP error message too short "
8164 "(tcp)\n"));
8165 return PF_DROP;
8166 }
8167
8168 key.proto = IPPROTO_TCP;
8169 key.af_gwy = pd2.af;
8170 PF_ACPY(&key.ext_gwy.addr, pd2.dst, key.af_gwy);
8171 PF_ACPY(&key.gwy.addr, pd2.src, key.af_gwy);
8172 key.ext_gwy.xport.port = th.th_dport;
8173 key.gwy.xport.port = th.th_sport;
8174
8175 key.af_lan = pd2.af;
8176 PF_ACPY(&key.lan.addr, pd2.dst, key.af_lan);
8177 PF_ACPY(&key.ext_lan.addr, pd2.src, key.af_lan);
8178 key.lan.xport.port = th.th_dport;
8179 key.ext_lan.xport.port = th.th_sport;
8180
8181 STATE_LOOKUP();
8182
8183 sk = (*state)->state_key;
8184 if ((direction == sk->direction) &&
8185 ((sk->af_lan == sk->af_gwy) ||
8186 (pd2.af == sk->af_lan))) {
8187 src = &(*state)->dst;
8188 dst = &(*state)->src;
8189 } else {
8190 src = &(*state)->src;
8191 dst = &(*state)->dst;
8192 }
8193
8194 if (src->wscale && (dst->wscale & PF_WSCALE_FLAG)) {
8195 dws = dst->wscale & PF_WSCALE_MASK;
8196 } else {
8197 dws = TCP_MAX_WINSHIFT;
8198 }
8199
8200 /* Demodulate sequence number */
8201 seq = ntohl(th.th_seq) - src->seqdiff;
8202 if (src->seqdiff) {
8203 pf_change_a(&th.th_seq, icmpsum,
8204 htonl(seq), 0);
8205 copyback = 1;
8206 }
8207
8208 if (!SEQ_GEQ(src->seqhi, seq) ||
8209 !SEQ_GEQ(seq,
8210 src->seqlo - ((u_int32_t)dst->max_win << dws))) {
8211 if (pf_status.debug >= PF_DEBUG_MISC) {
8212 printf("pf: BAD ICMP %d:%d ",
8213 icmptype, pf_pd_get_hdr_icmp(pd)->icmp_code);
8214 pf_print_host(pd->src, 0, pd->af);
8215 printf(" -> ");
8216 pf_print_host(pd->dst, 0, pd->af);
8217 printf(" state: ");
8218 pf_print_state(*state);
8219 printf(" seq=%u\n", seq);
8220 }
8221 REASON_SET(reason, PFRES_BADSTATE);
8222 return PF_DROP;
8223 }
8224
8225 pd->naf = pd2.naf = (pd2.af == sk->af_lan) ?
8226 sk->af_gwy : sk->af_lan;
8227
8228 if (STATE_TRANSLATE(sk)) {
8229 /* NAT64 case */
8230 if (sk->af_lan != sk->af_gwy) {
8231 struct pf_state_host *saddr2, *daddr2;
8232
8233 if (pd2.naf == sk->af_lan) {
8234 saddr2 = &sk->lan;
8235 daddr2 = &sk->ext_lan;
8236 } else {
8237 saddr2 = &sk->ext_gwy;
8238 daddr2 = &sk->gwy;
8239 }
8240
8241 /* translate ICMP message types and codes */
8242 if (pf_translate_icmp_af(pd->naf,
8243 pf_pd_get_hdr_icmp(pd))) {
8244 return PF_DROP;
8245 }
8246
8247 if (pf_lazy_makewritable(pd, pbuf,
8248 off2 + 8) == NULL) {
8249 return PF_DROP;
8250 }
8251
8252 pbuf_copy_back(pbuf, pd->off,
8253 sizeof(struct icmp6_hdr),
8254 pf_pd_get_hdr_ptr_icmp6(pd), pd->hdrmaxlen);
8255
8256 /*
8257 * translate inner ip header within the
8258 * ICMP message
8259 */
8260 if (pf_change_icmp_af(pbuf, ipoff2, pd,
8261 &pd2, &saddr2->addr, &daddr2->addr,
8262 pd->af, pd->naf)) {
8263 return PF_DROP;
8264 }
8265
8266 if (pd->naf == AF_INET) {
8267 pd->proto = IPPROTO_ICMP;
8268 } else {
8269 pd->proto = IPPROTO_ICMPV6;
8270 }
8271
8272 /*
8273 * translate inner tcp header within
8274 * the ICMP message
8275 */
8276 pf_change_ap(direction, NULL, pd2.src,
8277 &th.th_sport, pd2.ip_sum,
8278 &th.th_sum, &daddr2->addr,
8279 saddr2->xport.port, 0, pd2.af,
8280 pd2.naf, 0);
8281
8282 pf_change_ap(direction, NULL, pd2.dst,
8283 &th.th_dport, pd2.ip_sum,
8284 &th.th_sum, &saddr2->addr,
8285 daddr2->xport.port, 0, pd2.af,
8286 pd2.naf, 0);
8287
8288 pbuf_copy_back(pbuf, pd2.off, 8, &th, sizeof(th));
8289
8290 /* translate outer ip header */
8291 PF_ACPY(&pd->naddr, &daddr2->addr,
8292 pd->naf);
8293 PF_ACPY(&pd->ndaddr, &saddr2->addr,
8294 pd->naf);
8295 if (pd->af == AF_INET) {
8296 memcpy(&pd->naddr.addr32[3],
8297 &srcv4_inaddr,
8298 sizeof(pd->naddr.addr32[3]));
8299 return pf_nat64_ipv4(pbuf, off,
8300 pd);
8301 } else {
8302 return pf_nat64_ipv6(pbuf, off,
8303 pd);
8304 }
8305 }
8306 if (direction == PF_IN) {
8307 pf_change_icmp(pd2.src, &th.th_sport,
8308 daddr, &sk->lan.addr,
8309 sk->lan.xport.port, NULL,
8310 pd2.ip_sum, icmpsum,
8311 pd->ip_sum, 0, pd2.af);
8312 } else {
8313 pf_change_icmp(pd2.dst, &th.th_dport,
8314 saddr, &sk->gwy.addr,
8315 sk->gwy.xport.port, NULL,
8316 pd2.ip_sum, icmpsum,
8317 pd->ip_sum, 0, pd2.af);
8318 }
8319 copyback = 1;
8320 }
8321
8322 if (copyback) {
8323 if (pf_lazy_makewritable(pd, pbuf, off2 + 8) ==
8324 NULL) {
8325 return PF_DROP;
8326 }
8327 switch (pd2.af) {
8328 #if INET
8329 case AF_INET:
8330 pbuf_copy_back(pbuf, off, ICMP_MINLEN,
8331 pf_pd_get_hdr_ptr_icmp(pd), pd->hdrmaxlen);
8332 pbuf_copy_back(pbuf, ipoff2, sizeof(h2),
8333 &h2, sizeof(h2));
8334 break;
8335 #endif /* INET */
8336 case AF_INET6:
8337 pbuf_copy_back(pbuf, off,
8338 sizeof(struct icmp6_hdr),
8339 pf_pd_get_hdr_ptr_icmp6(pd), pd->hdrmaxlen);
8340 pbuf_copy_back(pbuf, ipoff2,
8341 sizeof(h2_6), &h2_6, sizeof(h2_6));
8342 break;
8343 }
8344 pbuf_copy_back(pbuf, off2, 8, &th, sizeof(th));
8345 }
8346
8347 return PF_PASS;
8348 }
8349 case IPPROTO_UDP: {
8350 struct udphdr uh;
8351 int dx, action;
8352 if (!pf_pull_hdr(pbuf, off2, &uh, sizeof(uh), sizeof(uh),
8353 NULL, reason, pd2.af)) {
8354 DPFPRINTF(PF_DEBUG_MISC,
8355 ("pf: ICMP error message too short "
8356 "(udp)\n"));
8357 return PF_DROP;
8358 }
8359
8360 key.af_gwy = pd2.af;
8361 PF_ACPY(&key.ext_gwy.addr, pd2.dst, key.af_gwy);
8362 PF_ACPY(&key.gwy.addr, pd2.src, key.af_gwy);
8363 key.ext_gwy.xport.port = uh.uh_dport;
8364 key.gwy.xport.port = uh.uh_sport;
8365
8366 key.af_lan = pd2.af;
8367 PF_ACPY(&key.lan.addr, pd2.dst, key.af_lan);
8368 PF_ACPY(&key.ext_lan.addr, pd2.src, key.af_lan);
8369 key.lan.xport.port = uh.uh_dport;
8370 key.ext_lan.xport.port = uh.uh_sport;
8371
8372 key.proto = IPPROTO_UDP;
8373 key.proto_variant = PF_EXTFILTER_APD;
8374 dx = direction;
8375
8376 if (ntohs(uh.uh_sport) == PF_IKE_PORT &&
8377 ntohs(uh.uh_dport) == PF_IKE_PORT) {
8378 struct pf_ike_hdr ike;
8379 size_t plen = pbuf->pb_packet_len - off2 -
8380 sizeof(uh);
8381 if (direction == PF_IN &&
8382 plen < 8 /* PF_IKE_PACKET_MINSIZE */) {
8383 DPFPRINTF(PF_DEBUG_MISC, ("pf: "
8384 "ICMP error, embedded IKE message "
8385 "too small.\n"));
8386 return PF_DROP;
8387 }
8388
8389 if (plen > sizeof(ike)) {
8390 plen = sizeof(ike);
8391 }
8392 pbuf_copy_data(pbuf, off + sizeof(uh), plen,
8393 &ike, sizeof(ike));
8394
8395 key.app_state = &as;
8396 as.compare_lan_ext = pf_ike_compare;
8397 as.compare_ext_gwy = pf_ike_compare;
8398 as.u.ike.cookie = ike.initiator_cookie;
8399 }
8400
8401 *state = pf_find_state(kif, &key, dx);
8402
8403 if (key.app_state && *state == 0) {
8404 key.app_state = 0;
8405 *state = pf_find_state(kif, &key, dx);
8406 }
8407
8408 if (*state == 0) {
8409 key.proto_variant = PF_EXTFILTER_AD;
8410 *state = pf_find_state(kif, &key, dx);
8411 }
8412
8413 if (*state == 0) {
8414 key.proto_variant = PF_EXTFILTER_EI;
8415 *state = pf_find_state(kif, &key, dx);
8416 }
8417
8418 /* similar to STATE_LOOKUP() */
8419 if (*state != NULL && pd != NULL &&
8420 !(pd->pktflags & PKTF_FLOW_ID)) {
8421 pd->flowsrc = (*state)->state_key->flowsrc;
8422 pd->flowhash = (*state)->state_key->flowhash;
8423 if (pd->flowhash != 0) {
8424 pd->pktflags |= PKTF_FLOW_ID;
8425 pd->pktflags &= ~PKTF_FLOW_ADV;
8426 }
8427 }
8428
8429 if (pf_state_lookup_aux(state, kif, direction, &action)) {
8430 return action;
8431 }
8432
8433 sk = (*state)->state_key;
8434 pd->naf = pd2.naf = (pd2.af == sk->af_lan) ?
8435 sk->af_gwy : sk->af_lan;
8436
8437 if (STATE_TRANSLATE(sk)) {
8438 /* NAT64 case */
8439 if (sk->af_lan != sk->af_gwy) {
8440 struct pf_state_host *saddr2, *daddr2;
8441
8442 if (pd2.naf == sk->af_lan) {
8443 saddr2 = &sk->lan;
8444 daddr2 = &sk->ext_lan;
8445 } else {
8446 saddr2 = &sk->ext_gwy;
8447 daddr2 = &sk->gwy;
8448 }
8449
8450 /* translate ICMP message */
8451 if (pf_translate_icmp_af(pd->naf,
8452 pf_pd_get_hdr_icmp(pd))) {
8453 return PF_DROP;
8454 }
8455 if (pf_lazy_makewritable(pd, pbuf,
8456 off2 + 8) == NULL) {
8457 return PF_DROP;
8458 }
8459
8460 pbuf_copy_back(pbuf, pd->off,
8461 sizeof(struct icmp6_hdr),
8462 pf_pd_get_hdr_ptr_icmp6(pd), pd->hdrmaxlen);
8463
8464 /*
8465 * translate inner ip header within the
8466 * ICMP message
8467 */
8468 if (pf_change_icmp_af(pbuf, ipoff2, pd,
8469 &pd2, &saddr2->addr, &daddr2->addr,
8470 pd->af, pd->naf)) {
8471 return PF_DROP;
8472 }
8473
8474 if (pd->naf == AF_INET) {
8475 pd->proto = IPPROTO_ICMP;
8476 } else {
8477 pd->proto = IPPROTO_ICMPV6;
8478 }
8479
8480 /*
8481 * translate inner udp header within
8482 * the ICMP message
8483 */
8484 pf_change_ap(direction, NULL, pd2.src,
8485 &uh.uh_sport, pd2.ip_sum,
8486 &uh.uh_sum, &daddr2->addr,
8487 saddr2->xport.port, 0, pd2.af,
8488 pd2.naf, 0);
8489
8490 pf_change_ap(direction, NULL, pd2.dst,
8491 &uh.uh_dport, pd2.ip_sum,
8492 &uh.uh_sum, &saddr2->addr,
8493 daddr2->xport.port, 0, pd2.af,
8494 pd2.naf, 0);
8495
8496 pbuf_copy_back(pbuf, pd2.off,
8497 sizeof(uh), &uh, sizeof(uh));
8498
8499 /* translate outer ip header */
8500 PF_ACPY(&pd->naddr, &daddr2->addr,
8501 pd->naf);
8502 PF_ACPY(&pd->ndaddr, &saddr2->addr,
8503 pd->naf);
8504 if (pd->af == AF_INET) {
8505 memcpy(&pd->naddr.addr32[3],
8506 &srcv4_inaddr,
8507 sizeof(pd->naddr.addr32[3]));
8508 return pf_nat64_ipv4(pbuf, off,
8509 pd);
8510 } else {
8511 return pf_nat64_ipv6(pbuf, off,
8512 pd);
8513 }
8514 }
8515 if (direction == PF_IN) {
8516 pf_change_icmp(pd2.src, &uh.uh_sport,
8517 daddr, &sk->lan.addr,
8518 sk->lan.xport.port, &uh.uh_sum,
8519 pd2.ip_sum, icmpsum,
8520 pd->ip_sum, 1, pd2.af);
8521 } else {
8522 pf_change_icmp(pd2.dst, &uh.uh_dport,
8523 saddr, &sk->gwy.addr,
8524 sk->gwy.xport.port, &uh.uh_sum,
8525 pd2.ip_sum, icmpsum,
8526 pd->ip_sum, 1, pd2.af);
8527 }
8528 if (pf_lazy_makewritable(pd, pbuf,
8529 off2 + sizeof(uh)) == NULL) {
8530 return PF_DROP;
8531 }
8532 switch (pd2.af) {
8533 #if INET
8534 case AF_INET:
8535 pbuf_copy_back(pbuf, off, ICMP_MINLEN,
8536 pf_pd_get_hdr_ptr_icmp(pd), pd->hdrmaxlen);
8537 pbuf_copy_back(pbuf, ipoff2,
8538 sizeof(h2), &h2, sizeof(h2));
8539 break;
8540 #endif /* INET */
8541 case AF_INET6:
8542 pbuf_copy_back(pbuf, off,
8543 sizeof(struct icmp6_hdr),
8544 pf_pd_get_hdr_ptr_icmp6(pd), pd->hdrmaxlen);
8545 pbuf_copy_back(pbuf, ipoff2,
8546 sizeof(h2_6), &h2_6, sizeof(h2_6));
8547 break;
8548 }
8549 pbuf_copy_back(pbuf, off2, sizeof(uh), &uh, sizeof(uh));
8550 }
8551
8552 return PF_PASS;
8553 }
8554 #if INET
8555 case IPPROTO_ICMP: {
8556 struct icmp iih;
8557
8558 if (!pf_pull_hdr(pbuf, off2, &iih, sizeof(iih), ICMP_MINLEN,
8559 NULL, reason, pd2.af)) {
8560 DPFPRINTF(PF_DEBUG_MISC,
8561 ("pf: ICMP error message too short i"
8562 "(icmp)\n"));
8563 return PF_DROP;
8564 }
8565
8566 key.proto = IPPROTO_ICMP;
8567 if (direction == PF_IN) {
8568 key.af_gwy = pd2.af;
8569 PF_ACPY(&key.ext_gwy.addr, pd2.dst, key.af_gwy);
8570 PF_ACPY(&key.gwy.addr, pd2.src, key.af_gwy);
8571 key.ext_gwy.xport.port = 0;
8572 key.gwy.xport.port = iih.icmp_id;
8573 } else {
8574 key.af_lan = pd2.af;
8575 PF_ACPY(&key.lan.addr, pd2.dst, key.af_lan);
8576 PF_ACPY(&key.ext_lan.addr, pd2.src, key.af_lan);
8577 key.lan.xport.port = iih.icmp_id;
8578 key.ext_lan.xport.port = 0;
8579 }
8580
8581 STATE_LOOKUP();
8582
8583 sk = (*state)->state_key;
8584 if (STATE_TRANSLATE(sk)) {
8585 if (direction == PF_IN) {
8586 pf_change_icmp(pd2.src, &iih.icmp_id,
8587 daddr, &sk->lan.addr,
8588 sk->lan.xport.port, NULL,
8589 pd2.ip_sum, icmpsum,
8590 pd->ip_sum, 0, AF_INET);
8591 } else {
8592 pf_change_icmp(pd2.dst, &iih.icmp_id,
8593 saddr, &sk->gwy.addr,
8594 sk->gwy.xport.port, NULL,
8595 pd2.ip_sum, icmpsum,
8596 pd->ip_sum, 0, AF_INET);
8597 }
8598 if (pf_lazy_makewritable(pd, pbuf,
8599 off2 + ICMP_MINLEN) == NULL) {
8600 return PF_DROP;
8601 }
8602 pbuf_copy_back(pbuf, off, ICMP_MINLEN,
8603 pf_pd_get_hdr_ptr_icmp(pd), pd->hdrmaxlen);
8604 pbuf_copy_back(pbuf, ipoff2, sizeof(h2), &h2, sizeof(h2));
8605 pbuf_copy_back(pbuf, off2, ICMP_MINLEN, &iih, sizeof(iih));
8606 }
8607
8608 return PF_PASS;
8609 }
8610 #endif /* INET */
8611 case IPPROTO_ICMPV6: {
8612 struct icmp6_hdr iih;
8613
8614 if (!pf_pull_hdr(pbuf, off2, &iih, sizeof(iih),
8615 sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) {
8616 DPFPRINTF(PF_DEBUG_MISC,
8617 ("pf: ICMP error message too short "
8618 "(icmp6)\n"));
8619 return PF_DROP;
8620 }
8621
8622 key.proto = IPPROTO_ICMPV6;
8623 if (direction == PF_IN) {
8624 key.af_gwy = pd2.af;
8625 PF_ACPY(&key.ext_gwy.addr, pd2.dst, key.af_gwy);
8626 PF_ACPY(&key.gwy.addr, pd2.src, key.af_gwy);
8627 key.ext_gwy.xport.port = 0;
8628 key.gwy.xport.port = iih.icmp6_id;
8629 } else {
8630 key.af_lan = pd2.af;
8631 PF_ACPY(&key.lan.addr, pd2.dst, key.af_lan);
8632 PF_ACPY(&key.ext_lan.addr, pd2.src, key.af_lan);
8633 key.lan.xport.port = iih.icmp6_id;
8634 key.ext_lan.xport.port = 0;
8635 }
8636
8637 STATE_LOOKUP();
8638
8639 sk = (*state)->state_key;
8640 if (STATE_TRANSLATE(sk)) {
8641 if (direction == PF_IN) {
8642 pf_change_icmp(pd2.src, &iih.icmp6_id,
8643 daddr, &sk->lan.addr,
8644 sk->lan.xport.port, NULL,
8645 pd2.ip_sum, icmpsum,
8646 pd->ip_sum, 0, AF_INET6);
8647 } else {
8648 pf_change_icmp(pd2.dst, &iih.icmp6_id,
8649 saddr, &sk->gwy.addr,
8650 sk->gwy.xport.port, NULL,
8651 pd2.ip_sum, icmpsum,
8652 pd->ip_sum, 0, AF_INET6);
8653 }
8654 if (pf_lazy_makewritable(pd, pbuf, off2 +
8655 sizeof(struct icmp6_hdr)) == NULL) {
8656 return PF_DROP;
8657 }
8658 pbuf_copy_back(pbuf, off,
8659 sizeof(struct icmp6_hdr),
8660 pf_pd_get_hdr_ptr_icmp6(pd), pd->hdrmaxlen);
8661 pbuf_copy_back(pbuf, ipoff2, sizeof(h2_6),
8662 &h2_6, sizeof(h2_6));
8663 pbuf_copy_back(pbuf, off2,
8664 sizeof(struct icmp6_hdr), &iih, sizeof(iih));
8665 }
8666
8667 return PF_PASS;
8668 }
8669 default: {
8670 key.proto = pd2.proto;
8671 if (direction == PF_IN) {
8672 key.af_gwy = pd2.af;
8673 PF_ACPY(&key.ext_gwy.addr, pd2.dst, key.af_gwy);
8674 PF_ACPY(&key.gwy.addr, pd2.src, key.af_gwy);
8675 key.ext_gwy.xport.port = 0;
8676 key.gwy.xport.port = 0;
8677 } else {
8678 key.af_lan = pd2.af;
8679 PF_ACPY(&key.lan.addr, pd2.dst, key.af_lan);
8680 PF_ACPY(&key.ext_lan.addr, pd2.src, key.af_lan);
8681 key.lan.xport.port = 0;
8682 key.ext_lan.xport.port = 0;
8683 }
8684
8685 STATE_LOOKUP();
8686
8687 sk = (*state)->state_key;
8688 if (STATE_TRANSLATE(sk)) {
8689 if (direction == PF_IN) {
8690 pf_change_icmp(pd2.src, NULL, daddr,
8691 &sk->lan.addr, 0, NULL,
8692 pd2.ip_sum, icmpsum,
8693 pd->ip_sum, 0, pd2.af);
8694 } else {
8695 pf_change_icmp(pd2.dst, NULL, saddr,
8696 &sk->gwy.addr, 0, NULL,
8697 pd2.ip_sum, icmpsum,
8698 pd->ip_sum, 0, pd2.af);
8699 }
8700 switch (pd2.af) {
8701 #if INET
8702 case AF_INET:
8703 if (pf_lazy_makewritable(pd, pbuf,
8704 ipoff2 + sizeof(h2)) == NULL) {
8705 return PF_DROP;
8706 }
8707 /*
8708 * <XXXSCW>
8709 * Xnu was missing the following...
8710 */
8711 pbuf_copy_back(pbuf, off, ICMP_MINLEN,
8712 pf_pd_get_hdr_ptr_icmp(pd), pd->hdrmaxlen);
8713 pbuf_copy_back(pbuf, ipoff2,
8714 sizeof(h2), &h2, sizeof(h2));
8715 break;
8716 /*
8717 * </XXXSCW>
8718 */
8719 #endif /* INET */
8720 case AF_INET6:
8721 if (pf_lazy_makewritable(pd, pbuf,
8722 ipoff2 + sizeof(h2_6)) == NULL) {
8723 return PF_DROP;
8724 }
8725 pbuf_copy_back(pbuf, off,
8726 sizeof(struct icmp6_hdr),
8727 pf_pd_get_hdr_ptr_icmp6(pd), pd->hdrmaxlen);
8728 pbuf_copy_back(pbuf, ipoff2,
8729 sizeof(h2_6), &h2_6, sizeof(h2_6));
8730 break;
8731 }
8732 }
8733
8734 return PF_PASS;
8735 }
8736 }
8737 }
8738 }
8739
8740 static __attribute__((noinline)) int
pf_test_state_grev1(struct pf_state ** state,int direction,struct pfi_kif * kif,int off,struct pf_pdesc * pd)8741 pf_test_state_grev1(struct pf_state **state, int direction,
8742 struct pfi_kif *kif, int off, struct pf_pdesc *pd)
8743 {
8744 struct pf_state_peer *__single src;
8745 struct pf_state_peer *__single dst;
8746 struct pf_state_key_cmp key = {};
8747 struct pf_grev1_hdr *__single grev1 = pf_pd_get_hdr_grev1(pd);
8748
8749 key.app_state = 0;
8750 key.proto = IPPROTO_GRE;
8751 key.proto_variant = PF_GRE_PPTP_VARIANT;
8752 if (direction == PF_IN) {
8753 key.af_gwy = pd->af;
8754 PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy);
8755 PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy);
8756 key.gwy.xport.call_id = grev1->call_id;
8757 } else {
8758 key.af_lan = pd->af;
8759 PF_ACPY(&key.lan.addr, pd->src, key.af_lan);
8760 PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan);
8761 key.ext_lan.xport.call_id = grev1->call_id;
8762 }
8763
8764 STATE_LOOKUP();
8765
8766 if (direction == (*state)->state_key->direction) {
8767 src = &(*state)->src;
8768 dst = &(*state)->dst;
8769 } else {
8770 src = &(*state)->dst;
8771 dst = &(*state)->src;
8772 }
8773
8774 /* update states */
8775 if (src->state < PFGRE1S_INITIATING) {
8776 src->state = PFGRE1S_INITIATING;
8777 }
8778
8779 /* update expire time */
8780 (*state)->expire = pf_time_second();
8781 if (src->state >= PFGRE1S_INITIATING &&
8782 dst->state >= PFGRE1S_INITIATING) {
8783 if ((*state)->timeout != PFTM_TCP_ESTABLISHED) {
8784 (*state)->timeout = PFTM_GREv1_ESTABLISHED;
8785 }
8786 src->state = PFGRE1S_ESTABLISHED;
8787 dst->state = PFGRE1S_ESTABLISHED;
8788 } else {
8789 (*state)->timeout = PFTM_GREv1_INITIATING;
8790 }
8791
8792 if ((*state)->state_key->app_state) {
8793 (*state)->state_key->app_state->u.grev1.pptp_state->expire =
8794 pf_time_second();
8795 }
8796
8797 /* translate source/destination address, if necessary */
8798 if (STATE_GRE_TRANSLATE((*state)->state_key)) {
8799 if (direction == PF_OUT) {
8800 switch (pd->af) {
8801 #if INET
8802 case AF_INET:
8803 pf_change_a(&pd->src->v4addr.s_addr,
8804 pd->ip_sum,
8805 (*state)->state_key->gwy.addr.v4addr.s_addr, 0);
8806 break;
8807 #endif /* INET */
8808 case AF_INET6:
8809 PF_ACPY(pd->src, &(*state)->state_key->gwy.addr,
8810 pd->af);
8811 break;
8812 }
8813 } else {
8814 grev1->call_id = (*state)->state_key->lan.xport.call_id;
8815
8816 switch (pd->af) {
8817 #if INET
8818 case AF_INET:
8819 pf_change_a(&pd->dst->v4addr.s_addr,
8820 pd->ip_sum,
8821 (*state)->state_key->lan.addr.v4addr.s_addr, 0);
8822 break;
8823 #endif /* INET */
8824 case AF_INET6:
8825 PF_ACPY(pd->dst, &(*state)->state_key->lan.addr,
8826 pd->af);
8827 break;
8828 }
8829 }
8830
8831 if (pf_lazy_makewritable(pd, pd->mp, off + sizeof(*grev1)) ==
8832 NULL) {
8833 return PF_DROP;
8834 }
8835 pbuf_copy_back(pd->mp, off, sizeof(*grev1), grev1, sizeof(*grev1));
8836 }
8837
8838 return PF_PASS;
8839 }
8840
8841 static __attribute__((noinline)) int
pf_test_state_esp(struct pf_state ** state,int direction,struct pfi_kif * kif,int off,struct pf_pdesc * pd)8842 pf_test_state_esp(struct pf_state **state, int direction, struct pfi_kif *kif,
8843 int off, struct pf_pdesc *pd)
8844 {
8845 #pragma unused(off)
8846 struct pf_state_peer *__single src;
8847 struct pf_state_peer *__single dst;
8848 struct pf_state_key_cmp key;
8849 struct pf_esp_hdr *__single esp = pf_pd_get_hdr_esp(pd);
8850 int action;
8851
8852 memset(&key, 0, sizeof(key));
8853 key.proto = IPPROTO_ESP;
8854 if (direction == PF_IN) {
8855 key.af_gwy = pd->af;
8856 PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy);
8857 PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy);
8858 key.gwy.xport.spi = esp->spi;
8859 } else {
8860 key.af_lan = pd->af;
8861 PF_ACPY(&key.lan.addr, pd->src, key.af_lan);
8862 PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan);
8863 key.ext_lan.xport.spi = esp->spi;
8864 }
8865
8866 *state = pf_find_state(kif, &key, direction);
8867
8868 if (*state == 0) {
8869 struct pf_state *s;
8870
8871 /*
8872 * <[email protected]>
8873 * No matching state. Look for a blocking state. If we find
8874 * one, then use that state and move it so that it's keyed to
8875 * the SPI in the current packet.
8876 */
8877 if (direction == PF_IN) {
8878 key.gwy.xport.spi = 0;
8879
8880 s = pf_find_state(kif, &key, direction);
8881 if (s) {
8882 struct pf_state_key *sk = s->state_key;
8883
8884 pf_remove_state_key_ext_gwy(sk);
8885 sk->lan.xport.spi = sk->gwy.xport.spi =
8886 esp->spi;
8887
8888 if (pf_insert_state_key_ext_gwy(sk)) {
8889 pf_detach_state(s, PF_DT_SKIP_EXTGWY);
8890 } else {
8891 *state = s;
8892 }
8893 }
8894 } else {
8895 key.ext_lan.xport.spi = 0;
8896
8897 s = pf_find_state(kif, &key, direction);
8898 if (s) {
8899 struct pf_state_key *sk = s->state_key;
8900
8901 RB_REMOVE(pf_state_tree_lan_ext,
8902 &pf_statetbl_lan_ext, sk);
8903 sk->ext_lan.xport.spi = esp->spi;
8904
8905 if (RB_INSERT(pf_state_tree_lan_ext,
8906 &pf_statetbl_lan_ext, sk)) {
8907 pf_detach_state(s, PF_DT_SKIP_LANEXT);
8908 } else {
8909 *state = s;
8910 }
8911 }
8912 }
8913
8914 if (s) {
8915 if (*state == 0) {
8916 #if NPFSYNC
8917 if (s->creatorid == pf_status.hostid) {
8918 pfsync_delete_state(s);
8919 }
8920 #endif
8921 s->timeout = PFTM_UNLINKED;
8922 hook_runloop(&s->unlink_hooks,
8923 HOOK_REMOVE | HOOK_FREE);
8924 pf_src_tree_remove_state(s);
8925 pf_free_state(s);
8926 return PF_DROP;
8927 }
8928 }
8929 }
8930
8931 /* similar to STATE_LOOKUP() */
8932 if (*state != NULL && pd != NULL && !(pd->pktflags & PKTF_FLOW_ID)) {
8933 pd->flowsrc = (*state)->state_key->flowsrc;
8934 pd->flowhash = (*state)->state_key->flowhash;
8935 if (pd->flowhash != 0) {
8936 pd->pktflags |= PKTF_FLOW_ID;
8937 pd->pktflags &= ~PKTF_FLOW_ADV;
8938 }
8939 }
8940
8941 if (pf_state_lookup_aux(state, kif, direction, &action)) {
8942 return action;
8943 }
8944
8945 if (direction == (*state)->state_key->direction) {
8946 src = &(*state)->src;
8947 dst = &(*state)->dst;
8948 } else {
8949 src = &(*state)->dst;
8950 dst = &(*state)->src;
8951 }
8952
8953 /* update states */
8954 if (src->state < PFESPS_INITIATING) {
8955 src->state = PFESPS_INITIATING;
8956 }
8957
8958 /* update expire time */
8959 (*state)->expire = pf_time_second();
8960 if (src->state >= PFESPS_INITIATING &&
8961 dst->state >= PFESPS_INITIATING) {
8962 (*state)->timeout = PFTM_ESP_ESTABLISHED;
8963 src->state = PFESPS_ESTABLISHED;
8964 dst->state = PFESPS_ESTABLISHED;
8965 } else {
8966 (*state)->timeout = PFTM_ESP_INITIATING;
8967 }
8968 /* translate source/destination address, if necessary */
8969 if (STATE_ADDR_TRANSLATE((*state)->state_key)) {
8970 if (direction == PF_OUT) {
8971 switch (pd->af) {
8972 #if INET
8973 case AF_INET:
8974 pf_change_a(&pd->src->v4addr.s_addr,
8975 pd->ip_sum,
8976 (*state)->state_key->gwy.addr.v4addr.s_addr, 0);
8977 break;
8978 #endif /* INET */
8979 case AF_INET6:
8980 PF_ACPY(pd->src, &(*state)->state_key->gwy.addr,
8981 pd->af);
8982 break;
8983 }
8984 } else {
8985 switch (pd->af) {
8986 #if INET
8987 case AF_INET:
8988 pf_change_a(&pd->dst->v4addr.s_addr,
8989 pd->ip_sum,
8990 (*state)->state_key->lan.addr.v4addr.s_addr, 0);
8991 break;
8992 #endif /* INET */
8993 case AF_INET6:
8994 PF_ACPY(pd->dst, &(*state)->state_key->lan.addr,
8995 pd->af);
8996 break;
8997 }
8998 }
8999 }
9000
9001 return PF_PASS;
9002 }
9003
9004 static __attribute__((noinline)) int
pf_test_state_other(struct pf_state ** state,int direction,struct pfi_kif * kif,struct pf_pdesc * pd)9005 pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif,
9006 struct pf_pdesc *pd)
9007 {
9008 struct pf_state_peer *src, *dst;
9009 struct pf_state_key_cmp key = {};
9010
9011 key.app_state = 0;
9012 key.proto = pd->proto;
9013 if (direction == PF_IN) {
9014 key.af_gwy = pd->af;
9015 PF_ACPY(&key.ext_gwy.addr, pd->src, key.af_gwy);
9016 PF_ACPY(&key.gwy.addr, pd->dst, key.af_gwy);
9017 key.ext_gwy.xport.port = 0;
9018 key.gwy.xport.port = 0;
9019 } else {
9020 key.af_lan = pd->af;
9021 PF_ACPY(&key.lan.addr, pd->src, key.af_lan);
9022 PF_ACPY(&key.ext_lan.addr, pd->dst, key.af_lan);
9023 key.lan.xport.port = 0;
9024 key.ext_lan.xport.port = 0;
9025 }
9026
9027 STATE_LOOKUP();
9028
9029 if (direction == (*state)->state_key->direction) {
9030 src = &(*state)->src;
9031 dst = &(*state)->dst;
9032 } else {
9033 src = &(*state)->dst;
9034 dst = &(*state)->src;
9035 }
9036
9037 /* update states */
9038 if (src->state < PFOTHERS_SINGLE) {
9039 src->state = PFOTHERS_SINGLE;
9040 }
9041 if (dst->state == PFOTHERS_SINGLE) {
9042 dst->state = PFOTHERS_MULTIPLE;
9043 }
9044
9045 /* update expire time */
9046 (*state)->expire = pf_time_second();
9047 if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE) {
9048 (*state)->timeout = PFTM_OTHER_MULTIPLE;
9049 } else {
9050 (*state)->timeout = PFTM_OTHER_SINGLE;
9051 }
9052
9053 /* translate source/destination address, if necessary */
9054 if (STATE_ADDR_TRANSLATE((*state)->state_key)) {
9055 if (direction == PF_OUT) {
9056 switch (pd->af) {
9057 #if INET
9058 case AF_INET:
9059 pf_change_a(&pd->src->v4addr.s_addr,
9060 pd->ip_sum,
9061 (*state)->state_key->gwy.addr.v4addr.s_addr,
9062 0);
9063 break;
9064 #endif /* INET */
9065 case AF_INET6:
9066 PF_ACPY(pd->src,
9067 &(*state)->state_key->gwy.addr, pd->af);
9068 break;
9069 }
9070 } else {
9071 switch (pd->af) {
9072 #if INET
9073 case AF_INET:
9074 pf_change_a(&pd->dst->v4addr.s_addr,
9075 pd->ip_sum,
9076 (*state)->state_key->lan.addr.v4addr.s_addr,
9077 0);
9078 break;
9079 #endif /* INET */
9080 case AF_INET6:
9081 PF_ACPY(pd->dst,
9082 &(*state)->state_key->lan.addr, pd->af);
9083 break;
9084 }
9085 }
9086 }
9087
9088 return PF_PASS;
9089 }
9090
9091 /*
9092 * ipoff and off are measured from the start of the mbuf chain.
9093 * h must be at "ipoff" on the mbuf chain.
9094 */
9095 void *
pf_pull_hdr(pbuf_t * pbuf,int off,void * __sized_by (p_buflen)p,int p_buflen,int copylen,u_short * actionp,u_short * reasonp,sa_family_t af)9096 pf_pull_hdr(pbuf_t *pbuf, int off, void *__sized_by(p_buflen)p, int p_buflen, int copylen,
9097 u_short *actionp, u_short *reasonp, sa_family_t af)
9098 {
9099 switch (af) {
9100 #if INET
9101 case AF_INET: {
9102 struct ip *__single h = pbuf->pb_data;
9103 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
9104
9105 if (fragoff) {
9106 if (fragoff >= copylen) {
9107 ACTION_SET(actionp, PF_PASS);
9108 } else {
9109 ACTION_SET(actionp, PF_DROP);
9110 REASON_SET(reasonp, PFRES_FRAG);
9111 }
9112 return NULL;
9113 }
9114 if (pbuf->pb_packet_len < (unsigned)(off + copylen) ||
9115 ntohs(h->ip_len) < off + copylen) {
9116 ACTION_SET(actionp, PF_DROP);
9117 REASON_SET(reasonp, PFRES_SHORT);
9118 return NULL;
9119 }
9120 break;
9121 }
9122 #endif /* INET */
9123 case AF_INET6: {
9124 struct ip6_hdr *__single h = pbuf->pb_data;
9125
9126 if (pbuf->pb_packet_len < (unsigned)(off + copylen) ||
9127 (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) <
9128 (unsigned)(off + copylen)) {
9129 ACTION_SET(actionp, PF_DROP);
9130 REASON_SET(reasonp, PFRES_SHORT);
9131 return NULL;
9132 }
9133 break;
9134 }
9135 }
9136 pbuf_copy_data(pbuf, off, copylen, p, p_buflen);
9137 return p;
9138 }
9139
9140 int
pf_routable(struct pf_addr * addr,sa_family_t af,struct pfi_kif * kif)9141 pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif)
9142 {
9143 #pragma unused(kif)
9144 struct sockaddr_in *dst;
9145 int ret = 1;
9146 struct sockaddr_in6 *dst6;
9147 struct route_in6 ro;
9148
9149 bzero(&ro, sizeof(ro));
9150 switch (af) {
9151 case AF_INET:
9152 dst = satosin(&ro.ro_dst);
9153 dst->sin_family = AF_INET;
9154 dst->sin_len = sizeof(*dst);
9155 dst->sin_addr = addr->v4addr;
9156 break;
9157 case AF_INET6:
9158 dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
9159 dst6->sin6_family = AF_INET6;
9160 dst6->sin6_len = sizeof(*dst6);
9161 dst6->sin6_addr = addr->v6addr;
9162 break;
9163 default:
9164 return 0;
9165 }
9166
9167 /* XXX: IFT_ENC is not currently used by anything*/
9168 /* Skip checks for ipsec interfaces */
9169 if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC) {
9170 goto out;
9171 }
9172
9173 /* XXX: what is the point of this? */
9174 rtalloc((struct route *)&ro);
9175
9176 out:
9177 ROUTE_RELEASE(&ro);
9178 return ret;
9179 }
9180
9181 int
pf_rtlabel_match(struct pf_addr * addr,sa_family_t af,struct pf_addr_wrap * aw)9182 pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw)
9183 {
9184 #pragma unused(aw)
9185 struct sockaddr_in *dst;
9186 struct sockaddr_in6 *dst6;
9187 struct route_in6 ro;
9188 int ret = 0;
9189
9190 bzero(&ro, sizeof(ro));
9191 switch (af) {
9192 case AF_INET:
9193 dst = satosin(&ro.ro_dst);
9194 dst->sin_family = AF_INET;
9195 dst->sin_len = sizeof(*dst);
9196 dst->sin_addr = addr->v4addr;
9197 break;
9198 case AF_INET6:
9199 dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
9200 dst6->sin6_family = AF_INET6;
9201 dst6->sin6_len = sizeof(*dst6);
9202 dst6->sin6_addr = addr->v6addr;
9203 break;
9204 default:
9205 return 0;
9206 }
9207
9208 /* XXX: what is the point of this? */
9209 rtalloc((struct route *)&ro);
9210
9211 ROUTE_RELEASE(&ro);
9212
9213 return ret;
9214 }
9215
9216 #if INET
9217 static __attribute__((noinline)) void
pf_route(pbuf_t ** pbufp,struct pf_rule * r,int dir,struct ifnet * oifp,struct pf_state * s,struct pf_pdesc * pd)9218 pf_route(pbuf_t **pbufp, struct pf_rule *r, int dir, struct ifnet *oifp,
9219 struct pf_state *s, struct pf_pdesc *pd)
9220 {
9221 #pragma unused(pd)
9222 struct mbuf *__single m0, *__single m1;
9223 struct route iproute;
9224 struct route *__single ro = &iproute;
9225 struct sockaddr_in *__single dst;
9226 struct ip *__single ip;
9227 struct ifnet *__single ifp = NULL;
9228 struct pf_addr naddr;
9229 struct pf_src_node *__single sn = NULL;
9230 int error = 0;
9231 uint32_t sw_csum;
9232 int interface_mtu = 0;
9233 drop_reason_t drop_reason = DROP_REASON_PF_UNSPECIFIED;
9234
9235 bzero(&iproute, sizeof(iproute));
9236
9237 if (pbufp == NULL || !pbuf_is_valid(*pbufp) || r == NULL ||
9238 (dir != PF_IN && dir != PF_OUT) || oifp == NULL) {
9239 panic("pf_route: invalid parameters");
9240 }
9241
9242 if (pd->pf_mtag->pftag_routed++ > 3) {
9243 pbuf_destroy(*pbufp);
9244 *pbufp = NULL;
9245 m0 = NULL;
9246 goto bad;
9247 }
9248
9249 /*
9250 * Since this is something of an edge case and may involve the
9251 * host stack (for routing, at least for now), we convert the
9252 * incoming pbuf into an mbuf.
9253 */
9254 if (r->rt == PF_DUPTO) {
9255 m0 = pbuf_clone_to_mbuf(*pbufp);
9256 } else if ((r->rt == PF_REPLYTO) == (r->direction == dir)) {
9257 return;
9258 } else {
9259 /* We're going to consume this packet */
9260 m0 = pbuf_to_mbuf(*pbufp, TRUE);
9261 *pbufp = NULL;
9262 }
9263
9264 if (m0 == NULL) {
9265 goto bad;
9266 }
9267
9268 /* We now have the packet in an mbuf (m0) */
9269
9270 if (m0->m_len < (int)sizeof(struct ip)) {
9271 DPFPRINTF(PF_DEBUG_URGENT,
9272 ("pf_route: packet length < sizeof (struct ip)\n"));
9273 drop_reason = DROP_REASON_PF_UNDERSIZED;
9274 goto bad;
9275 }
9276
9277 ip = mtod(m0, struct ip *);
9278
9279 dst = satosin((void *)&ro->ro_dst);
9280 dst->sin_family = AF_INET;
9281 dst->sin_len = sizeof(*dst);
9282 dst->sin_addr = ip->ip_dst;
9283
9284 if (r->rt == PF_FASTROUTE) {
9285 rtalloc(ro);
9286 if (ro->ro_rt == NULL) {
9287 ipstat.ips_noroute++;
9288 drop_reason = DROP_REASON_PF_NO_ROUTE;
9289 goto bad;
9290 }
9291
9292 ifp = ro->ro_rt->rt_ifp;
9293 RT_LOCK(ro->ro_rt);
9294 ro->ro_rt->rt_use++;
9295
9296 if (ro->ro_rt->rt_flags & RTF_GATEWAY) {
9297 dst = satosin((void *)ro->ro_rt->rt_gateway);
9298 }
9299 RT_UNLOCK(ro->ro_rt);
9300 } else {
9301 if (TAILQ_EMPTY(&r->rpool.list)) {
9302 DPFPRINTF(PF_DEBUG_URGENT,
9303 ("pf_route: TAILQ_EMPTY(&r->rpool.list)\n"));
9304 drop_reason = DROP_REASON_PF_NO_ROUTE;
9305 goto bad;
9306 }
9307 if (s == NULL) {
9308 pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src,
9309 &naddr, NULL, &sn);
9310 if (!PF_AZERO(&naddr, AF_INET)) {
9311 dst->sin_addr.s_addr = naddr.v4addr.s_addr;
9312 }
9313 ifp = r->rpool.cur->kif ?
9314 r->rpool.cur->kif->pfik_ifp : NULL;
9315 } else {
9316 if (!PF_AZERO(&s->rt_addr, AF_INET)) {
9317 dst->sin_addr.s_addr =
9318 s->rt_addr.v4addr.s_addr;
9319 }
9320 ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
9321 }
9322 }
9323 if (ifp == NULL) {
9324 drop_reason = DROP_REASON_PF_NULL_IFP;
9325 goto bad;
9326 }
9327
9328 if (oifp != ifp) {
9329 if (pf_test_mbuf(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) {
9330 drop_reason = DROP_REASON_PF_DROP;
9331 goto bad;
9332 } else if (m0 == NULL) {
9333 goto done;
9334 }
9335 if (m0->m_len < (int)sizeof(struct ip)) {
9336 DPFPRINTF(PF_DEBUG_URGENT,
9337 ("pf_route: packet length < sizeof (struct ip)\n"));
9338 drop_reason = DROP_REASON_PF_UNDERSIZED;
9339 goto bad;
9340 }
9341 ip = mtod(m0, struct ip *);
9342 }
9343
9344 /* Catch routing changes wrt. hardware checksumming for TCP or UDP. */
9345 ip_output_checksum(ifp, m0, ((ip->ip_hl) << 2), ntohs(ip->ip_len),
9346 &sw_csum);
9347
9348 interface_mtu = ifp->if_mtu;
9349
9350 if (INTF_ADJUST_MTU_FOR_CLAT46(ifp)) {
9351 interface_mtu = IN6_LINKMTU(ifp);
9352 /* Further adjust the size for CLAT46 expansion */
9353 interface_mtu -= CLAT46_HDR_EXPANSION_OVERHD;
9354 }
9355
9356 if (ntohs(ip->ip_len) <= interface_mtu || TSO_IPV4_OK(ifp, m0) ||
9357 (!(ip->ip_off & htons(IP_DF)) &&
9358 (ifp->if_hwassist & CSUM_FRAGMENT))) {
9359 ip->ip_sum = 0;
9360 if (sw_csum & CSUM_DELAY_IP) {
9361 ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
9362 sw_csum &= ~CSUM_DELAY_IP;
9363 m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_IP;
9364 }
9365 error = ifnet_output(ifp, PF_INET, m0, ro->ro_rt, sintosa(dst));
9366 goto done;
9367 }
9368
9369 /*
9370 * Too large for interface; fragment if possible.
9371 * Must be able to put at least 8 bytes per fragment.
9372 * Balk when DF bit is set or the interface didn't support TSO.
9373 */
9374 if ((ip->ip_off & htons(IP_DF)) ||
9375 (m0->m_pkthdr.csum_flags & CSUM_TSO_IPV4)) {
9376 ipstat.ips_cantfrag++;
9377 if (r->rt != PF_DUPTO) {
9378 icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0,
9379 interface_mtu);
9380 goto done;
9381 } else {
9382 drop_reason = DROP_REASON_PF_NO_TSO;
9383 goto bad;
9384 }
9385 }
9386
9387 m1 = m0;
9388
9389 /* PR-8933605: send ip_len,ip_off to ip_fragment in host byte order */
9390 #if BYTE_ORDER != BIG_ENDIAN
9391 NTOHS(ip->ip_off);
9392 NTOHS(ip->ip_len);
9393 #endif
9394 error = ip_fragment(m0, ifp, interface_mtu, sw_csum);
9395
9396 if (error) {
9397 m0 = NULL;
9398 drop_reason = DROP_REASON_PF_CANNOT_FRAGMENT;
9399 goto bad;
9400 }
9401
9402 for (m0 = m1; m0; m0 = m1) {
9403 m1 = m0->m_nextpkt;
9404 m0->m_nextpkt = 0;
9405 if (error == 0) {
9406 error = ifnet_output(ifp, PF_INET, m0, ro->ro_rt,
9407 sintosa(dst));
9408 } else {
9409 m_freem(m0);
9410 }
9411 }
9412
9413 if (error == 0) {
9414 ipstat.ips_fragmented++;
9415 }
9416
9417 done:
9418 ROUTE_RELEASE(&iproute);
9419 return;
9420
9421 bad:
9422 if (m0) {
9423 m_drop(m0, DROPTAP_FLAG_DIR_IN, drop_reason, NULL, 0);
9424 m0 = NULL;
9425 }
9426 goto done;
9427 }
9428 #endif /* INET */
9429
9430 static __attribute__((noinline)) void
pf_route6(pbuf_t ** pbufp,struct pf_rule * r,int dir,struct ifnet * oifp,struct pf_state * s,struct pf_pdesc * pd)9431 pf_route6(pbuf_t **pbufp, struct pf_rule *r, int dir, struct ifnet *oifp,
9432 struct pf_state *s, struct pf_pdesc *pd)
9433 {
9434 #pragma unused(pd)
9435 struct mbuf *__single m0;
9436 struct route_in6 ip6route;
9437 struct route_in6 *__single ro;
9438 struct sockaddr_in6 *__single dst;
9439 struct ip6_hdr *__single ip6;
9440 struct ifnet *__single ifp = NULL;
9441 struct pf_addr naddr;
9442 struct pf_src_node *__single sn = NULL;
9443 int error = 0;
9444 struct pf_mtag *__single pf_mtag;
9445 drop_reason_t drop_reason = DROP_REASON_PF_UNSPECIFIED;
9446
9447 if (pbufp == NULL || !pbuf_is_valid(*pbufp) || r == NULL ||
9448 (dir != PF_IN && dir != PF_OUT) || oifp == NULL) {
9449 panic("pf_route6: invalid parameters");
9450 }
9451
9452 if (pd->pf_mtag->pftag_routed++ > 3) {
9453 pbuf_destroy(*pbufp);
9454 *pbufp = NULL;
9455 m0 = NULL;
9456 goto bad;
9457 }
9458
9459 /*
9460 * Since this is something of an edge case and may involve the
9461 * host stack (for routing, at least for now), we convert the
9462 * incoming pbuf into an mbuf.
9463 */
9464 if (r->rt == PF_DUPTO) {
9465 m0 = pbuf_clone_to_mbuf(*pbufp);
9466 } else if ((r->rt == PF_REPLYTO) == (r->direction == dir)) {
9467 return;
9468 } else {
9469 /* We're about to consume this packet */
9470 m0 = pbuf_to_mbuf(*pbufp, TRUE);
9471 *pbufp = NULL;
9472 }
9473
9474 if (m0 == NULL) {
9475 goto bad;
9476 }
9477
9478 if (m0->m_len < (int)sizeof(struct ip6_hdr)) {
9479 DPFPRINTF(PF_DEBUG_URGENT,
9480 ("pf_route6: m0->m_len < sizeof (struct ip6_hdr)\n"));
9481 drop_reason = DROP_REASON_PF_UNDERSIZED;
9482 goto bad;
9483 }
9484 ip6 = mtod(m0, struct ip6_hdr *);
9485
9486 ro = &ip6route;
9487 bzero((void *__bidi_indexable)(struct route_in6 *__bidi_indexable)ro, sizeof(*ro));
9488 dst = SIN6(&ro->ro_dst);
9489 dst->sin6_family = AF_INET6;
9490 dst->sin6_len = sizeof(*dst);
9491 dst->sin6_addr = ip6->ip6_dst;
9492
9493 /* Cheat. XXX why only in the v6addr case??? */
9494 if (r->rt == PF_FASTROUTE) {
9495 pf_mtag = pf_get_mtag(m0);
9496 ASSERT(pf_mtag != NULL);
9497 pf_mtag->pftag_flags |= PF_TAG_GENERATED;
9498 ip6_output_setsrcifscope(m0, oifp->if_index, NULL);
9499 ip6_output_setdstifscope(m0, oifp->if_index, NULL);
9500 ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
9501 return;
9502 }
9503
9504 if (TAILQ_EMPTY(&r->rpool.list)) {
9505 DPFPRINTF(PF_DEBUG_URGENT,
9506 ("pf_route6: TAILQ_EMPTY(&r->rpool.list)\n"));
9507 drop_reason = DROP_REASON_PF_NO_ROUTE;
9508 goto bad;
9509 }
9510 if (s == NULL) {
9511 pf_map_addr(AF_INET6, r, (struct pf_addr *)(void *)&ip6->ip6_src,
9512 &naddr, NULL, &sn);
9513 if (!PF_AZERO(&naddr, AF_INET6)) {
9514 PF_ACPY((struct pf_addr *)&dst->sin6_addr,
9515 &naddr, AF_INET6);
9516 }
9517 ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL;
9518 } else {
9519 if (!PF_AZERO(&s->rt_addr, AF_INET6)) {
9520 PF_ACPY((struct pf_addr *)&dst->sin6_addr,
9521 &s->rt_addr, AF_INET6);
9522 }
9523 ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
9524 }
9525 if (ifp == NULL) {
9526 drop_reason = DROP_REASON_PF_NULL_IFP;
9527 goto bad;
9528 }
9529
9530 if (oifp != ifp) {
9531 if (pf_test6_mbuf(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) {
9532 drop_reason = DROP_REASON_PF_DROP;
9533 goto bad;
9534 } else if (m0 == NULL) {
9535 goto done;
9536 }
9537 if (m0->m_len < (int)sizeof(struct ip6_hdr)) {
9538 DPFPRINTF(PF_DEBUG_URGENT, ("pf_route6: m0->m_len "
9539 "< sizeof (struct ip6_hdr)\n"));
9540 drop_reason = DROP_REASON_PF_UNDERSIZED;
9541 goto bad;
9542 }
9543 pf_mtag = pf_get_mtag(m0);
9544 /*
9545 * send refragmented packets.
9546 */
9547 if ((pf_mtag->pftag_flags & PF_TAG_REFRAGMENTED) != 0) {
9548 pf_mtag->pftag_flags &= ~PF_TAG_REFRAGMENTED;
9549 /*
9550 * nd6_output() frees packet chain in both success and
9551 * failure cases.
9552 */
9553 error = nd6_output(ifp, ifp, m0, dst, NULL, NULL);
9554 m0 = NULL;
9555 if (error) {
9556 DPFPRINTF(PF_DEBUG_URGENT, ("pf_route6:"
9557 "dropped refragmented packet\n"));
9558 }
9559 goto done;
9560 }
9561 ip6 = mtod(m0, struct ip6_hdr *);
9562 }
9563
9564 /*
9565 * If the packet is too large for the outgoing interface,
9566 * send back an icmp6 error.
9567 */
9568 if (in6_embedded_scope && IN6_IS_SCOPE_EMBED(&dst->sin6_addr)) {
9569 dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index);
9570 }
9571 if ((unsigned)m0->m_pkthdr.len <= ifp->if_mtu) {
9572 error = nd6_output(ifp, ifp, m0, dst, NULL, NULL);
9573 } else {
9574 in6_ifstat_inc(ifp, ifs6_in_toobig);
9575 if (r->rt != PF_DUPTO) {
9576 icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
9577 } else {
9578 drop_reason = DROP_REASON_PF_NO_TSO;
9579 goto bad;
9580 }
9581 }
9582
9583 done:
9584 return;
9585
9586 bad:
9587 if (m0) {
9588 m_drop(m0, DROPTAP_FLAG_DIR_IN, drop_reason, NULL, 0);
9589 m0 = NULL;
9590 }
9591 goto done;
9592 }
9593
9594
9595 /*
9596 * check protocol (tcp/udp/icmp/icmp6) checksum and set mbuf flag
9597 * off is the offset where the protocol header starts
9598 * len is the total length of protocol header plus payload
9599 * returns 0 when the checksum is valid, otherwise returns 1.
9600 */
9601 static int
pf_check_proto_cksum(pbuf_t * pbuf,int off,int len,u_int8_t p,sa_family_t af)9602 pf_check_proto_cksum(pbuf_t *pbuf, int off, int len, u_int8_t p,
9603 sa_family_t af)
9604 {
9605 u_int16_t sum;
9606
9607 switch (p) {
9608 case IPPROTO_TCP:
9609 case IPPROTO_UDP:
9610 /*
9611 * Optimize for the common case; if the hardware calculated
9612 * value doesn't include pseudo-header checksum, or if it
9613 * is partially-computed (only 16-bit summation), do it in
9614 * software below.
9615 */
9616 if ((*pbuf->pb_csum_flags &
9617 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
9618 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR) &&
9619 (*pbuf->pb_csum_data ^ 0xffff) == 0) {
9620 return 0;
9621 }
9622 break;
9623 case IPPROTO_ICMP:
9624 case IPPROTO_ICMPV6:
9625 break;
9626 default:
9627 return 1;
9628 }
9629 if (off < (int)sizeof(struct ip) || len < (int)sizeof(struct udphdr)) {
9630 return 1;
9631 }
9632 if (pbuf->pb_packet_len < (unsigned)(off + len)) {
9633 return 1;
9634 }
9635 switch (af) {
9636 #if INET
9637 case AF_INET:
9638 if (p == IPPROTO_ICMP) {
9639 if (pbuf->pb_contig_len < (unsigned)off) {
9640 return 1;
9641 }
9642 sum = pbuf_inet_cksum(pbuf, 0, off, len);
9643 } else {
9644 if (pbuf->pb_contig_len < (int)sizeof(struct ip)) {
9645 return 1;
9646 }
9647 sum = pbuf_inet_cksum(pbuf, p, off, len);
9648 }
9649 break;
9650 #endif /* INET */
9651 case AF_INET6:
9652 if (pbuf->pb_contig_len < (int)sizeof(struct ip6_hdr)) {
9653 return 1;
9654 }
9655 sum = pbuf_inet6_cksum(pbuf, p, off, len);
9656 break;
9657 default:
9658 return 1;
9659 }
9660 if (sum) {
9661 switch (p) {
9662 case IPPROTO_TCP:
9663 tcpstat.tcps_rcvbadsum++;
9664 break;
9665 case IPPROTO_UDP:
9666 udpstat.udps_badsum++;
9667 break;
9668 case IPPROTO_ICMP:
9669 icmpstat.icps_checksum++;
9670 break;
9671 case IPPROTO_ICMPV6:
9672 icmp6stat.icp6s_checksum++;
9673 break;
9674 }
9675 return 1;
9676 }
9677 return 0;
9678 }
9679
9680 #if INET
9681 #define PF_APPLE_UPDATE_PDESC_IPv4() \
9682 do { \
9683 if (pbuf && pd.mp && pbuf != pd.mp) { \
9684 pbuf = pd.mp; \
9685 h = pbuf->pb_data; \
9686 pd.pf_mtag = pf_get_mtag_pbuf(pbuf); \
9687 } \
9688 } while (0)
9689
9690 int
pf_test_mbuf(int dir,struct ifnet * ifp,struct mbuf ** m0,struct ether_header * eh,struct ip_fw_args * fwa)9691 pf_test_mbuf(int dir, struct ifnet *ifp, struct mbuf **m0,
9692 struct ether_header *eh, struct ip_fw_args *fwa)
9693 {
9694 pbuf_t pbuf_store, *__single pbuf;
9695 int rv;
9696
9697 pbuf_init_mbuf(&pbuf_store, *m0, (*m0)->m_pkthdr.rcvif);
9698 pbuf = &pbuf_store;
9699
9700 rv = pf_test(dir, ifp, &pbuf, eh, fwa);
9701
9702 if (pbuf_is_valid(pbuf)) {
9703 *m0 = pbuf->pb_mbuf;
9704 pbuf->pb_mbuf = NULL;
9705 pbuf_destroy(pbuf);
9706 } else {
9707 *m0 = NULL;
9708 }
9709
9710 return rv;
9711 }
9712
9713 static __attribute__((noinline)) int
pf_test(int dir,struct ifnet * ifp,pbuf_t ** pbufp,struct ether_header * eh,struct ip_fw_args * fwa)9714 pf_test(int dir, struct ifnet *ifp, pbuf_t **pbufp,
9715 struct ether_header *eh, struct ip_fw_args *fwa)
9716 {
9717 #if !DUMMYNET
9718 #pragma unused(fwa)
9719 #endif
9720 struct pfi_kif *__single kif;
9721 u_short action = PF_PASS, reason = 0, log = 0;
9722 pbuf_t *__single pbuf = *pbufp;
9723 struct ip *__single h = 0;
9724 struct pf_rule *__single a = NULL, *__single r = &pf_default_rule, *__single tr, *__single nr;
9725 struct pf_state *__single s = NULL;
9726 struct pf_state_key *__single sk = NULL;
9727 struct pf_ruleset *__single ruleset = NULL;
9728 struct pf_pdesc pd;
9729 int off, dirndx, pqid = 0;
9730
9731 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
9732
9733 if (!pf_status.running) {
9734 return PF_PASS;
9735 }
9736
9737 memset(&pd, 0, sizeof(pd));
9738
9739 if ((pd.pf_mtag = pf_get_mtag_pbuf(pbuf)) == NULL) {
9740 DPFPRINTF(PF_DEBUG_URGENT,
9741 ("pf_test: pf_get_mtag_pbuf returned NULL\n"));
9742 return PF_DROP;
9743 }
9744
9745 if (pd.pf_mtag->pftag_flags & PF_TAG_GENERATED) {
9746 return PF_PASS;
9747 }
9748
9749 kif = (struct pfi_kif *)ifp->if_pf_kif;
9750
9751 if (kif == NULL) {
9752 DPFPRINTF(PF_DEBUG_URGENT,
9753 ("pf_test: kif == NULL, if_name %s\n", ifp->if_name));
9754 return PF_DROP;
9755 }
9756 if (kif->pfik_flags & PFI_IFLAG_SKIP) {
9757 return PF_PASS;
9758 }
9759
9760 if (pbuf->pb_packet_len < (int)sizeof(*h)) {
9761 REASON_SET(&reason, PFRES_SHORT);
9762 return PF_DROP;
9763 }
9764
9765 /* initialize enough of pd for the done label */
9766 h = pbuf->pb_data;
9767 pd.mp = pbuf;
9768 pd.lmw = 0;
9769 pd.pf_mtag = pf_get_mtag_pbuf(pbuf);
9770 pd.src = (struct pf_addr *)&h->ip_src;
9771 pd.dst = (struct pf_addr *)&h->ip_dst;
9772 PF_ACPY(&pd.baddr, pd.src, AF_INET);
9773 PF_ACPY(&pd.bdaddr, pd.dst, AF_INET);
9774 pd.ip_sum = &h->ip_sum;
9775 pd.proto = h->ip_p;
9776 pd.proto_variant = 0;
9777 pd.af = AF_INET;
9778 pd.tos = h->ip_tos;
9779 pd.ttl = h->ip_ttl;
9780 pd.tot_len = ntohs(h->ip_len);
9781 pd.eh = eh;
9782
9783 #if DUMMYNET
9784 if (fwa != NULL && fwa->fwa_pf_rule != NULL) {
9785 goto nonormalize;
9786 }
9787 #endif /* DUMMYNET */
9788
9789 /* We do IP header normalization and packet reassembly here */
9790 action = pf_normalize_ip(pbuf, dir, kif, &reason, &pd);
9791 if (action != PF_PASS || pd.lmw < 0) {
9792 action = PF_DROP;
9793 goto done;
9794 }
9795
9796 #if DUMMYNET
9797 nonormalize:
9798 #endif /* DUMMYNET */
9799 /* pf_normalize can mess with pb_data */
9800 h = pbuf->pb_data;
9801
9802 off = h->ip_hl << 2;
9803 if (off < (int)sizeof(*h)) {
9804 action = PF_DROP;
9805 REASON_SET(&reason, PFRES_SHORT);
9806 log = 1;
9807 goto done;
9808 }
9809
9810 pd.src = (struct pf_addr *)&h->ip_src;
9811 pd.dst = (struct pf_addr *)&h->ip_dst;
9812 PF_ACPY(&pd.baddr, pd.src, AF_INET);
9813 PF_ACPY(&pd.bdaddr, pd.dst, AF_INET);
9814 pd.ip_sum = &h->ip_sum;
9815 pd.proto = h->ip_p;
9816 pd.proto_variant = 0;
9817 pd.mp = pbuf;
9818 pd.lmw = 0;
9819 pd.pf_mtag = pf_get_mtag_pbuf(pbuf);
9820 pd.af = AF_INET;
9821 pd.tos = h->ip_tos;
9822 pd.ttl = h->ip_ttl;
9823 pd.sc = MBUF_SCIDX(pbuf_get_service_class(pbuf));
9824 pd.tot_len = ntohs(h->ip_len);
9825 pd.eh = eh;
9826
9827 if (*pbuf->pb_flags & PKTF_FLOW_ID) {
9828 pd.flowsrc = *pbuf->pb_flowsrc;
9829 pd.flowhash = *pbuf->pb_flowid;
9830 pd.pktflags = *pbuf->pb_flags & PKTF_FLOW_MASK;
9831 }
9832
9833 /* handle fragments that didn't get reassembled by normalization */
9834 if (h->ip_off & htons(IP_MF | IP_OFFMASK)) {
9835 pd.flags |= PFDESC_IP_FRAG;
9836 #if DUMMYNET
9837 /* Traffic goes through dummynet first */
9838 action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
9839 if (action == PF_DROP || pbuf == NULL) {
9840 *pbufp = NULL;
9841 return action;
9842 }
9843 #endif /* DUMMYNET */
9844 action = pf_test_fragment(&r, dir, kif, pbuf, h,
9845 &pd, &a, &ruleset);
9846 goto done;
9847 }
9848
9849 switch (h->ip_p) {
9850 case IPPROTO_TCP: {
9851 struct tcphdr th;
9852 pf_pd_set_hdr_tcp(&pd, &th);
9853 if (!pf_pull_hdr(pbuf, off, &th, sizeof(th), sizeof(th),
9854 &action, &reason, AF_INET)) {
9855 log = action != PF_PASS;
9856 goto done;
9857 }
9858 pd.p_len = pd.tot_len - off - (th.th_off << 2);
9859 if ((th.th_flags & TH_ACK) && pd.p_len == 0) {
9860 pqid = 1;
9861 }
9862 #if DUMMYNET
9863 /* Traffic goes through dummynet first */
9864 action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
9865 if (action == PF_DROP || pbuf == NULL) {
9866 *pbufp = NULL;
9867 return action;
9868 }
9869 #endif /* DUMMYNET */
9870 action = pf_normalize_tcp(dir, kif, pbuf, 0, off, h, &pd);
9871 if (pd.lmw < 0) {
9872 goto done;
9873 }
9874 PF_APPLE_UPDATE_PDESC_IPv4();
9875 if (action == PF_DROP) {
9876 goto done;
9877 }
9878 if (th.th_sport == 0 || th.th_dport == 0) {
9879 action = PF_DROP;
9880 REASON_SET(&reason, PFRES_INVPORT);
9881 goto done;
9882 }
9883 action = pf_test_state_tcp(&s, dir, kif, pbuf, off, h, &pd,
9884 &reason);
9885 if (action == PF_NAT64) {
9886 goto done;
9887 }
9888 if (pd.lmw < 0) {
9889 goto done;
9890 }
9891 PF_APPLE_UPDATE_PDESC_IPv4();
9892 if (action == PF_PASS) {
9893 #if NPFSYNC
9894 pfsync_update_state(s);
9895 #endif /* NPFSYNC */
9896 r = s->rule.ptr;
9897 a = s->anchor.ptr;
9898 log = s->log;
9899 } else if (s == NULL) {
9900 action = pf_test_rule(&r, &s, dir, kif,
9901 pbuf, off, h, &pd, &a, &ruleset, NULL);
9902 }
9903 break;
9904 }
9905
9906 case IPPROTO_UDP: {
9907 struct udphdr uh;
9908
9909 pf_pd_set_hdr_udp(&pd, &uh);
9910 if (!pf_pull_hdr(pbuf, off, &uh, sizeof(uh), sizeof(uh),
9911 &action, &reason, AF_INET)) {
9912 log = action != PF_PASS;
9913 goto done;
9914 }
9915 if (uh.uh_sport == 0 || uh.uh_dport == 0) {
9916 action = PF_DROP;
9917 REASON_SET(&reason, PFRES_INVPORT);
9918 goto done;
9919 }
9920 if (ntohs(uh.uh_ulen) > pbuf->pb_packet_len - off ||
9921 ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
9922 action = PF_DROP;
9923 REASON_SET(&reason, PFRES_SHORT);
9924 goto done;
9925 }
9926 #if DUMMYNET
9927 /* Traffic goes through dummynet first */
9928 action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
9929 if (action == PF_DROP || pbuf == NULL) {
9930 *pbufp = NULL;
9931 return action;
9932 }
9933 #endif /* DUMMYNET */
9934 action = pf_test_state_udp(&s, dir, kif, pbuf, off, h, &pd,
9935 &reason);
9936 if (action == PF_NAT64) {
9937 goto done;
9938 }
9939 if (pd.lmw < 0) {
9940 goto done;
9941 }
9942 PF_APPLE_UPDATE_PDESC_IPv4();
9943 if (action == PF_PASS) {
9944 #if NPFSYNC
9945 pfsync_update_state(s);
9946 #endif /* NPFSYNC */
9947 r = s->rule.ptr;
9948 a = s->anchor.ptr;
9949 log = s->log;
9950 } else if (s == NULL) {
9951 action = pf_test_rule(&r, &s, dir, kif,
9952 pbuf, off, h, &pd, &a, &ruleset, NULL);
9953 }
9954 break;
9955 }
9956
9957 case IPPROTO_ICMP: {
9958 struct icmp ih;
9959
9960 pf_pd_set_hdr_icmp(&pd, &ih, ICMP_MINLEN);
9961 if (!pf_pull_hdr(pbuf, off, &ih, sizeof(ih), ICMP_MINLEN,
9962 &action, &reason, AF_INET)) {
9963 log = action != PF_PASS;
9964 goto done;
9965 }
9966 #if DUMMYNET
9967 /* Traffic goes through dummynet first */
9968 action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
9969 if (action == PF_DROP || pbuf == NULL) {
9970 *pbufp = NULL;
9971 return action;
9972 }
9973 #endif /* DUMMYNET */
9974 action = pf_test_state_icmp(&s, dir, kif, pbuf, off, h, &pd,
9975 &reason);
9976
9977 if (action == PF_NAT64) {
9978 goto done;
9979 }
9980 if (pd.lmw < 0) {
9981 goto done;
9982 }
9983 PF_APPLE_UPDATE_PDESC_IPv4();
9984 if (action == PF_PASS) {
9985 #if NPFSYNC
9986 pfsync_update_state(s);
9987 #endif /* NPFSYNC */
9988 r = s->rule.ptr;
9989 a = s->anchor.ptr;
9990 log = s->log;
9991 } else if (s == NULL) {
9992 action = pf_test_rule(&r, &s, dir, kif,
9993 pbuf, off, h, &pd, &a, &ruleset, NULL);
9994 }
9995 break;
9996 }
9997
9998 case IPPROTO_ESP: {
9999 struct pf_esp_hdr esp;
10000
10001 pf_pd_set_hdr_esp(&pd, &esp);
10002 if (!pf_pull_hdr(pbuf, off, &esp, sizeof(esp), sizeof(esp), &action, &reason,
10003 AF_INET)) {
10004 log = action != PF_PASS;
10005 goto done;
10006 }
10007 #if DUMMYNET
10008 /* Traffic goes through dummynet first */
10009 action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10010 if (action == PF_DROP || pbuf == NULL) {
10011 *pbufp = NULL;
10012 return action;
10013 }
10014 #endif /* DUMMYNET */
10015 action = pf_test_state_esp(&s, dir, kif, off, &pd);
10016 if (pd.lmw < 0) {
10017 goto done;
10018 }
10019 PF_APPLE_UPDATE_PDESC_IPv4();
10020 if (action == PF_PASS) {
10021 #if NPFSYNC
10022 pfsync_update_state(s);
10023 #endif /* NPFSYNC */
10024 r = s->rule.ptr;
10025 a = s->anchor.ptr;
10026 log = s->log;
10027 } else if (s == NULL) {
10028 action = pf_test_rule(&r, &s, dir, kif,
10029 pbuf, off, h, &pd, &a, &ruleset, NULL);
10030 }
10031 break;
10032 }
10033
10034 case IPPROTO_GRE: {
10035 struct pf_grev1_hdr grev1;
10036 pf_pd_set_hdr_grev1(&pd, &grev1);
10037 if (!pf_pull_hdr(pbuf, off, &grev1, sizeof(grev1), sizeof(grev1), &action,
10038 &reason, AF_INET)) {
10039 log = (action != PF_PASS);
10040 goto done;
10041 }
10042 #if DUMMYNET
10043 /* Traffic goes through dummynet first */
10044 action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10045 if (action == PF_DROP || pbuf == NULL) {
10046 *pbufp = NULL;
10047 return action;
10048 }
10049 #endif /* DUMMYNET */
10050 if ((ntohs(grev1.flags) & PF_GRE_FLAG_VERSION_MASK) == 1 &&
10051 ntohs(grev1.protocol_type) == PF_GRE_PPP_ETHERTYPE) {
10052 if (ntohs(grev1.payload_length) >
10053 pbuf->pb_packet_len - off) {
10054 action = PF_DROP;
10055 REASON_SET(&reason, PFRES_SHORT);
10056 goto done;
10057 }
10058 pd.proto_variant = PF_GRE_PPTP_VARIANT;
10059 action = pf_test_state_grev1(&s, dir, kif, off, &pd);
10060 if (pd.lmw < 0) {
10061 goto done;
10062 }
10063 PF_APPLE_UPDATE_PDESC_IPv4();
10064 if (action == PF_PASS) {
10065 #if NPFSYNC
10066 pfsync_update_state(s);
10067 #endif /* NPFSYNC */
10068 r = s->rule.ptr;
10069 a = s->anchor.ptr;
10070 log = s->log;
10071 break;
10072 } else if (s == NULL) {
10073 action = pf_test_rule(&r, &s, dir, kif, pbuf,
10074 off, h, &pd, &a, &ruleset, NULL);
10075 if (action == PF_PASS) {
10076 break;
10077 }
10078 }
10079 }
10080
10081 /* not GREv1/PPTP, so treat as ordinary GRE... */
10082 OS_FALLTHROUGH;
10083 }
10084
10085 default:
10086 #if DUMMYNET
10087 /* Traffic goes through dummynet first */
10088 action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10089 if (action == PF_DROP || pbuf == NULL) {
10090 *pbufp = NULL;
10091 return action;
10092 }
10093 #endif /* DUMMYNET */
10094 action = pf_test_state_other(&s, dir, kif, &pd);
10095 if (pd.lmw < 0) {
10096 goto done;
10097 }
10098 PF_APPLE_UPDATE_PDESC_IPv4();
10099 if (action == PF_PASS) {
10100 #if NPFSYNC
10101 pfsync_update_state(s);
10102 #endif /* NPFSYNC */
10103 r = s->rule.ptr;
10104 a = s->anchor.ptr;
10105 log = s->log;
10106 } else if (s == NULL) {
10107 action = pf_test_rule(&r, &s, dir, kif, pbuf, off, h,
10108 &pd, &a, &ruleset, NULL);
10109 }
10110 break;
10111 }
10112
10113 done:
10114 if (action == PF_NAT64) {
10115 *pbufp = NULL;
10116 return action;
10117 }
10118
10119 *pbufp = pd.mp;
10120 PF_APPLE_UPDATE_PDESC_IPv4();
10121
10122 if (action != PF_DROP) {
10123 if (action == PF_PASS && h->ip_hl > 5 &&
10124 !((s && s->allow_opts) || r->allow_opts)) {
10125 action = PF_DROP;
10126 REASON_SET(&reason, PFRES_IPOPTIONS);
10127 log = 1;
10128 DPFPRINTF(PF_DEBUG_MISC,
10129 ("pf: dropping packet with ip options [hlen=%u]\n",
10130 (unsigned int) h->ip_hl));
10131 }
10132
10133 if ((s && s->tag) || PF_RTABLEID_IS_VALID(r->rtableid) ||
10134 (pd.pktflags & PKTF_FLOW_ID)) {
10135 (void) pf_tag_packet(pbuf, pd.pf_mtag, s ? s->tag : 0,
10136 r->rtableid, &pd);
10137 }
10138
10139 if (action == PF_PASS) {
10140 #if PF_ECN
10141 /* add hints for ecn */
10142 pd.pf_mtag->pftag_hdr = h;
10143 /* record address family */
10144 pd.pf_mtag->pftag_flags &= ~PF_TAG_HDR_INET6;
10145 pd.pf_mtag->pftag_flags |= PF_TAG_HDR_INET;
10146 #endif /* PF_ECN */
10147 /* record protocol */
10148 *pbuf->pb_proto = pd.proto;
10149
10150 /*
10151 * connections redirected to loopback should not match sockets
10152 * bound specifically to loopback due to security implications,
10153 * see tcp_input() and in_pcblookup_listen().
10154 */
10155 if (dir == PF_IN && (pd.proto == IPPROTO_TCP ||
10156 pd.proto == IPPROTO_UDP) && s != NULL &&
10157 s->nat_rule.ptr != NULL &&
10158 (s->nat_rule.ptr->action == PF_RDR ||
10159 s->nat_rule.ptr->action == PF_BINAT) &&
10160 (ntohl(pd.dst->v4addr.s_addr) >> IN_CLASSA_NSHIFT)
10161 == IN_LOOPBACKNET) {
10162 pd.pf_mtag->pftag_flags |= PF_TAG_TRANSLATE_LOCALHOST;
10163 }
10164 }
10165 }
10166
10167 if (log) {
10168 struct pf_rule *lr;
10169
10170 if (s != NULL && s->nat_rule.ptr != NULL &&
10171 s->nat_rule.ptr->log & PF_LOG_ALL) {
10172 lr = s->nat_rule.ptr;
10173 } else {
10174 lr = r;
10175 }
10176 PFLOG_PACKET(kif, h, pbuf, AF_INET, dir, reason, lr, a, ruleset,
10177 &pd);
10178 }
10179
10180 kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
10181 kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS]++;
10182
10183 if (action == PF_PASS || r->action == PF_DROP) {
10184 dirndx = (dir == PF_OUT);
10185 r->packets[dirndx]++;
10186 r->bytes[dirndx] += pd.tot_len;
10187 if (a != NULL) {
10188 a->packets[dirndx]++;
10189 a->bytes[dirndx] += pd.tot_len;
10190 }
10191 if (s != NULL) {
10192 sk = s->state_key;
10193 if (s->nat_rule.ptr != NULL) {
10194 s->nat_rule.ptr->packets[dirndx]++;
10195 s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
10196 }
10197 if (s->src_node != NULL) {
10198 s->src_node->packets[dirndx]++;
10199 s->src_node->bytes[dirndx] += pd.tot_len;
10200 }
10201 if (s->nat_src_node != NULL) {
10202 s->nat_src_node->packets[dirndx]++;
10203 s->nat_src_node->bytes[dirndx] += pd.tot_len;
10204 }
10205 dirndx = (dir == sk->direction) ? 0 : 1;
10206 s->packets[dirndx]++;
10207 s->bytes[dirndx] += pd.tot_len;
10208 }
10209 tr = r;
10210 nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
10211 if (nr != NULL) {
10212 struct pf_addr *x;
10213 /*
10214 * XXX: we need to make sure that the addresses
10215 * passed to pfr_update_stats() are the same than
10216 * the addresses used during matching (pfr_match)
10217 */
10218 if (r == &pf_default_rule) {
10219 tr = nr;
10220 x = (sk == NULL || sk->direction == dir) ?
10221 &pd.baddr : &pd.naddr;
10222 } else {
10223 x = (sk == NULL || sk->direction == dir) ?
10224 &pd.naddr : &pd.baddr;
10225 }
10226 if (x == &pd.baddr || s == NULL) {
10227 /* we need to change the address */
10228 if (dir == PF_OUT) {
10229 pd.src = x;
10230 } else {
10231 pd.dst = x;
10232 }
10233 }
10234 }
10235 if (tr->src.addr.type == PF_ADDR_TABLE) {
10236 pfr_update_stats(tr->src.addr.p.tbl, (sk == NULL ||
10237 sk->direction == dir) ?
10238 pd.src : pd.dst, pd.af,
10239 pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
10240 tr->src.neg);
10241 }
10242 if (tr->dst.addr.type == PF_ADDR_TABLE) {
10243 pfr_update_stats(tr->dst.addr.p.tbl, (sk == NULL ||
10244 sk->direction == dir) ? pd.dst : pd.src, pd.af,
10245 pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
10246 tr->dst.neg);
10247 }
10248 }
10249
10250 VERIFY(pbuf == NULL || pd.mp == NULL || pd.mp == pbuf);
10251
10252 if (*pbufp) {
10253 if (pd.lmw < 0) {
10254 REASON_SET(&reason, PFRES_MEMORY);
10255 action = PF_DROP;
10256 }
10257
10258 if (action == PF_DROP) {
10259 pbuf_destroy(*pbufp);
10260 *pbufp = NULL;
10261 return PF_DROP;
10262 }
10263
10264 *pbufp = pbuf;
10265 }
10266
10267 if (action == PF_SYNPROXY_DROP) {
10268 pbuf_destroy(*pbufp);
10269 *pbufp = NULL;
10270 action = PF_PASS;
10271 } else if (r->rt) {
10272 /* pf_route can free the pbuf causing *pbufp to become NULL */
10273 pf_route(pbufp, r, dir, kif->pfik_ifp, s, &pd);
10274 }
10275
10276 return action;
10277 }
10278 #endif /* INET */
10279
10280 #define PF_APPLE_UPDATE_PDESC_IPv6() \
10281 do { \
10282 if (pbuf && pd.mp && pbuf != pd.mp) { \
10283 pbuf = pd.mp; \
10284 } \
10285 h = pbuf->pb_data; \
10286 } while (0)
10287
10288 int
pf_test6_mbuf(int dir,struct ifnet * ifp,struct mbuf ** m0,struct ether_header * eh,struct ip_fw_args * fwa)10289 pf_test6_mbuf(int dir, struct ifnet *ifp, struct mbuf **m0,
10290 struct ether_header *eh, struct ip_fw_args *fwa)
10291 {
10292 pbuf_t pbuf_store, *__single pbuf;
10293 int rv;
10294
10295 pbuf_init_mbuf(&pbuf_store, *m0, (*m0)->m_pkthdr.rcvif);
10296 pbuf = &pbuf_store;
10297
10298 rv = pf_test6(dir, ifp, &pbuf, eh, fwa);
10299
10300 if (pbuf_is_valid(pbuf)) {
10301 *m0 = pbuf->pb_mbuf;
10302 pbuf->pb_mbuf = NULL;
10303 pbuf_destroy(pbuf);
10304 } else {
10305 *m0 = NULL;
10306 }
10307
10308 return rv;
10309 }
10310
10311 static __attribute__((noinline)) int
pf_test6(int dir,struct ifnet * ifp,pbuf_t ** pbufp,struct ether_header * eh,struct ip_fw_args * fwa)10312 pf_test6(int dir, struct ifnet *ifp, pbuf_t **pbufp,
10313 struct ether_header *eh, struct ip_fw_args *fwa)
10314 {
10315 #if !DUMMYNET
10316 #pragma unused(fwa)
10317 #endif
10318 struct pfi_kif *__single kif;
10319 u_short action = PF_PASS, reason = 0, log = 0;
10320 pbuf_t *__single pbuf = *pbufp;
10321 struct ip6_hdr *__single h;
10322 struct pf_rule *__single a = NULL, *__single r = &pf_default_rule, *__single tr, *__single nr;
10323 struct pf_state *__single s = NULL;
10324 struct pf_state_key *__single sk = NULL;
10325 struct pf_ruleset *__single ruleset = NULL;
10326 struct pf_pdesc pd;
10327 int off, terminal = 0, dirndx, rh_cnt = 0;
10328 u_int8_t nxt;
10329 boolean_t fwd = FALSE;
10330
10331 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
10332
10333 ASSERT(ifp != NULL);
10334 if ((dir == PF_OUT) && (pbuf->pb_ifp) && (ifp != pbuf->pb_ifp)) {
10335 fwd = TRUE;
10336 }
10337
10338 if (!pf_status.running) {
10339 return PF_PASS;
10340 }
10341
10342 memset(&pd, 0, sizeof(pd));
10343
10344 if ((pd.pf_mtag = pf_get_mtag_pbuf(pbuf)) == NULL) {
10345 DPFPRINTF(PF_DEBUG_URGENT,
10346 ("pf_test6: pf_get_mtag_pbuf returned NULL\n"));
10347 return PF_DROP;
10348 }
10349
10350 if (pd.pf_mtag->pftag_flags & PF_TAG_GENERATED) {
10351 return PF_PASS;
10352 }
10353
10354 kif = (struct pfi_kif *)ifp->if_pf_kif;
10355
10356 if (kif == NULL) {
10357 DPFPRINTF(PF_DEBUG_URGENT,
10358 ("pf_test6: kif == NULL, if_name %s\n", ifp->if_name));
10359 return PF_DROP;
10360 }
10361 if (kif->pfik_flags & PFI_IFLAG_SKIP) {
10362 return PF_PASS;
10363 }
10364
10365 if (pbuf->pb_packet_len < (int)sizeof(*h)) {
10366 REASON_SET(&reason, PFRES_SHORT);
10367 return PF_DROP;
10368 }
10369
10370 h = pbuf->pb_data;
10371 nxt = h->ip6_nxt;
10372 off = ((caddr_t)h - (caddr_t)pbuf->pb_data) + sizeof(struct ip6_hdr);
10373 pd.mp = pbuf;
10374 pd.lmw = 0;
10375 pd.pf_mtag = pf_get_mtag_pbuf(pbuf);
10376 pd.src = (struct pf_addr *)(void *)&h->ip6_src;
10377 pd.dst = (struct pf_addr *)(void *)&h->ip6_dst;
10378 PF_ACPY(&pd.baddr, pd.src, AF_INET6);
10379 PF_ACPY(&pd.bdaddr, pd.dst, AF_INET6);
10380 pd.ip_sum = NULL;
10381 pd.af = AF_INET6;
10382 pd.proto = nxt;
10383 pd.proto_variant = 0;
10384 pd.tos = 0;
10385 pd.ttl = h->ip6_hlim;
10386 pd.sc = MBUF_SCIDX(pbuf_get_service_class(pbuf));
10387 pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
10388 pd.eh = eh;
10389
10390 if (*pbuf->pb_flags & PKTF_FLOW_ID) {
10391 pd.flowsrc = *pbuf->pb_flowsrc;
10392 pd.flowhash = *pbuf->pb_flowid;
10393 pd.pktflags = (*pbuf->pb_flags & PKTF_FLOW_MASK);
10394 }
10395
10396 #if DUMMYNET
10397 if (fwa != NULL && fwa->fwa_pf_rule != NULL) {
10398 goto nonormalize;
10399 }
10400 #endif /* DUMMYNET */
10401
10402 /* We do IP header normalization and packet reassembly here */
10403 action = pf_normalize_ip6(pbuf, dir, kif, &reason, &pd);
10404 if (action != PF_PASS || pd.lmw < 0) {
10405 action = PF_DROP;
10406 goto done;
10407 }
10408
10409 #if DUMMYNET
10410 nonormalize:
10411 #endif /* DUMMYNET */
10412 h = pbuf->pb_data;
10413
10414 /*
10415 * we do not support jumbogram yet. if we keep going, zero ip6_plen
10416 * will do something bad, so drop the packet for now.
10417 */
10418 if (htons(h->ip6_plen) == 0) {
10419 action = PF_DROP;
10420 REASON_SET(&reason, PFRES_NORM); /*XXX*/
10421 goto done;
10422 }
10423 pd.src = (struct pf_addr *)(void *)&h->ip6_src;
10424 pd.dst = (struct pf_addr *)(void *)&h->ip6_dst;
10425 PF_ACPY(&pd.baddr, pd.src, AF_INET6);
10426 PF_ACPY(&pd.bdaddr, pd.dst, AF_INET6);
10427 pd.ip_sum = NULL;
10428 pd.af = AF_INET6;
10429 pd.tos = 0;
10430 pd.ttl = h->ip6_hlim;
10431 pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
10432 pd.eh = eh;
10433
10434 off = ((caddr_t)h - (caddr_t)pbuf->pb_data) + sizeof(struct ip6_hdr);
10435 pd.proto = h->ip6_nxt;
10436 pd.proto_variant = 0;
10437 pd.mp = pbuf;
10438 pd.lmw = 0;
10439 pd.pf_mtag = pf_get_mtag_pbuf(pbuf);
10440
10441 do {
10442 switch (pd.proto) {
10443 case IPPROTO_FRAGMENT: {
10444 struct ip6_frag ip6f;
10445
10446 pd.flags |= PFDESC_IP_FRAG;
10447 if (!pf_pull_hdr(pbuf, off, &ip6f, sizeof ip6f, sizeof ip6f, NULL,
10448 &reason, pd.af)) {
10449 DPFPRINTF(PF_DEBUG_MISC,
10450 ("pf: IPv6 short fragment header\n"));
10451 action = PF_DROP;
10452 REASON_SET(&reason, PFRES_SHORT);
10453 log = 1;
10454 goto done;
10455 }
10456 pd.proto = ip6f.ip6f_nxt;
10457 #if DUMMYNET
10458 /* Traffic goes through dummynet first */
10459 action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd,
10460 fwa);
10461 if (action == PF_DROP || pbuf == NULL) {
10462 *pbufp = NULL;
10463 return action;
10464 }
10465 #endif /* DUMMYNET */
10466 action = pf_test_fragment(&r, dir, kif, pbuf, h, &pd,
10467 &a, &ruleset);
10468 if (action == PF_DROP) {
10469 REASON_SET(&reason, PFRES_FRAG);
10470 log = 1;
10471 }
10472 goto done;
10473 }
10474 case IPPROTO_ROUTING:
10475 ++rh_cnt;
10476 OS_FALLTHROUGH;
10477
10478 case IPPROTO_AH:
10479 case IPPROTO_HOPOPTS:
10480 case IPPROTO_DSTOPTS: {
10481 /* get next header and header length */
10482 struct ip6_ext opt6;
10483
10484 if (!pf_pull_hdr(pbuf, off, &opt6, sizeof(opt6), sizeof(opt6),
10485 NULL, &reason, pd.af)) {
10486 DPFPRINTF(PF_DEBUG_MISC,
10487 ("pf: IPv6 short opt\n"));
10488 action = PF_DROP;
10489 log = 1;
10490 goto done;
10491 }
10492 if (pd.proto == IPPROTO_AH) {
10493 off += (opt6.ip6e_len + 2) * 4;
10494 } else {
10495 off += (opt6.ip6e_len + 1) * 8;
10496 }
10497 pd.proto = opt6.ip6e_nxt;
10498 /* goto the next header */
10499 break;
10500 }
10501 default:
10502 terminal++;
10503 break;
10504 }
10505 } while (!terminal);
10506
10507
10508 switch (pd.proto) {
10509 case IPPROTO_TCP: {
10510 struct tcphdr th;
10511
10512 pf_pd_set_hdr_tcp(&pd, &th);
10513 if (!pf_pull_hdr(pbuf, off, &th, sizeof(th), sizeof(th),
10514 &action, &reason, AF_INET6)) {
10515 log = action != PF_PASS;
10516 goto done;
10517 }
10518 pd.p_len = pd.tot_len - off - (th.th_off << 2);
10519 #if DUMMYNET
10520 /* Traffic goes through dummynet first */
10521 action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10522 if (action == PF_DROP || pbuf == NULL) {
10523 *pbufp = NULL;
10524 return action;
10525 }
10526 #endif /* DUMMYNET */
10527 action = pf_normalize_tcp(dir, kif, pbuf, 0, off, h, &pd);
10528 if (pd.lmw < 0) {
10529 goto done;
10530 }
10531 PF_APPLE_UPDATE_PDESC_IPv6();
10532 if (action == PF_DROP) {
10533 goto done;
10534 }
10535 if (th.th_sport == 0 || th.th_dport == 0) {
10536 action = PF_DROP;
10537 REASON_SET(&reason, PFRES_INVPORT);
10538 goto done;
10539 }
10540 action = pf_test_state_tcp(&s, dir, kif, pbuf, off, h, &pd,
10541 &reason);
10542 if (action == PF_NAT64) {
10543 goto done;
10544 }
10545 if (pd.lmw < 0) {
10546 goto done;
10547 }
10548 PF_APPLE_UPDATE_PDESC_IPv6();
10549 if (action == PF_PASS) {
10550 #if NPFSYNC
10551 pfsync_update_state(s);
10552 #endif /* NPFSYNC */
10553 r = s->rule.ptr;
10554 a = s->anchor.ptr;
10555 log = s->log;
10556 } else if (s == NULL) {
10557 action = pf_test_rule(&r, &s, dir, kif,
10558 pbuf, off, h, &pd, &a, &ruleset, NULL);
10559 }
10560 break;
10561 }
10562
10563 case IPPROTO_UDP: {
10564 struct udphdr uh;
10565
10566 pf_pd_set_hdr_udp(&pd, &uh);
10567 if (!pf_pull_hdr(pbuf, off, &uh, sizeof(uh), sizeof(uh),
10568 &action, &reason, AF_INET6)) {
10569 log = action != PF_PASS;
10570 goto done;
10571 }
10572 if (uh.uh_sport == 0 || uh.uh_dport == 0) {
10573 action = PF_DROP;
10574 REASON_SET(&reason, PFRES_INVPORT);
10575 goto done;
10576 }
10577 if (ntohs(uh.uh_ulen) > pbuf->pb_packet_len - off ||
10578 ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
10579 action = PF_DROP;
10580 REASON_SET(&reason, PFRES_SHORT);
10581 goto done;
10582 }
10583 #if DUMMYNET
10584 /* Traffic goes through dummynet first */
10585 action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10586 if (action == PF_DROP || pbuf == NULL) {
10587 *pbufp = NULL;
10588 return action;
10589 }
10590 #endif /* DUMMYNET */
10591 action = pf_test_state_udp(&s, dir, kif, pbuf, off, h, &pd,
10592 &reason);
10593 if (action == PF_NAT64) {
10594 goto done;
10595 }
10596 if (pd.lmw < 0) {
10597 goto done;
10598 }
10599 PF_APPLE_UPDATE_PDESC_IPv6();
10600 if (action == PF_PASS) {
10601 #if NPFSYNC
10602 pfsync_update_state(s);
10603 #endif /* NPFSYNC */
10604 r = s->rule.ptr;
10605 a = s->anchor.ptr;
10606 log = s->log;
10607 } else if (s == NULL) {
10608 action = pf_test_rule(&r, &s, dir, kif,
10609 pbuf, off, h, &pd, &a, &ruleset, NULL);
10610 }
10611 break;
10612 }
10613
10614 case IPPROTO_ICMPV6: {
10615 struct icmp6_hdr ih;
10616
10617 pf_pd_set_hdr_icmp6(&pd, &ih);
10618 if (!pf_pull_hdr(pbuf, off, &ih, sizeof(ih), sizeof(ih),
10619 &action, &reason, AF_INET6)) {
10620 log = action != PF_PASS;
10621 goto done;
10622 }
10623 #if DUMMYNET
10624 /* Traffic goes through dummynet first */
10625 action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10626 if (action == PF_DROP || pbuf == NULL) {
10627 *pbufp = NULL;
10628 return action;
10629 }
10630 #endif /* DUMMYNET */
10631 action = pf_test_state_icmp(&s, dir, kif,
10632 pbuf, off, h, &pd, &reason);
10633 if (action == PF_NAT64) {
10634 goto done;
10635 }
10636 if (pd.lmw < 0) {
10637 goto done;
10638 }
10639 PF_APPLE_UPDATE_PDESC_IPv6();
10640 if (action == PF_PASS) {
10641 #if NPFSYNC
10642 pfsync_update_state(s);
10643 #endif /* NPFSYNC */
10644 r = s->rule.ptr;
10645 a = s->anchor.ptr;
10646 log = s->log;
10647 } else if (s == NULL) {
10648 action = pf_test_rule(&r, &s, dir, kif,
10649 pbuf, off, h, &pd, &a, &ruleset, NULL);
10650 }
10651 break;
10652 }
10653
10654 case IPPROTO_ESP: {
10655 struct pf_esp_hdr esp;
10656
10657 pf_pd_set_hdr_esp(&pd, &esp);
10658 if (!pf_pull_hdr(pbuf, off, &esp, sizeof(esp), sizeof(esp), &action,
10659 &reason, AF_INET6)) {
10660 log = action != PF_PASS;
10661 goto done;
10662 }
10663 #if DUMMYNET
10664 /* Traffic goes through dummynet first */
10665 action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10666 if (action == PF_DROP || pbuf == NULL) {
10667 *pbufp = NULL;
10668 return action;
10669 }
10670 #endif /* DUMMYNET */
10671 action = pf_test_state_esp(&s, dir, kif, off, &pd);
10672 if (pd.lmw < 0) {
10673 goto done;
10674 }
10675 PF_APPLE_UPDATE_PDESC_IPv6();
10676 if (action == PF_PASS) {
10677 #if NPFSYNC
10678 pfsync_update_state(s);
10679 #endif /* NPFSYNC */
10680 r = s->rule.ptr;
10681 a = s->anchor.ptr;
10682 log = s->log;
10683 } else if (s == NULL) {
10684 action = pf_test_rule(&r, &s, dir, kif,
10685 pbuf, off, h, &pd, &a, &ruleset, NULL);
10686 }
10687 break;
10688 }
10689
10690 case IPPROTO_GRE: {
10691 struct pf_grev1_hdr grev1;
10692
10693 pf_pd_set_hdr_grev1(&pd, &grev1);
10694 if (!pf_pull_hdr(pbuf, off, &grev1, sizeof(grev1), sizeof(grev1), &action,
10695 &reason, AF_INET6)) {
10696 log = (action != PF_PASS);
10697 goto done;
10698 }
10699 #if DUMMYNET
10700 /* Traffic goes through dummynet first */
10701 action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10702 if (action == PF_DROP || pbuf == NULL) {
10703 *pbufp = NULL;
10704 return action;
10705 }
10706 #endif /* DUMMYNET */
10707 if ((ntohs(grev1.flags) & PF_GRE_FLAG_VERSION_MASK) == 1 &&
10708 ntohs(grev1.protocol_type) == PF_GRE_PPP_ETHERTYPE) {
10709 if (ntohs(grev1.payload_length) >
10710 pbuf->pb_packet_len - off) {
10711 action = PF_DROP;
10712 REASON_SET(&reason, PFRES_SHORT);
10713 goto done;
10714 }
10715 action = pf_test_state_grev1(&s, dir, kif, off, &pd);
10716 if (pd.lmw < 0) {
10717 goto done;
10718 }
10719 PF_APPLE_UPDATE_PDESC_IPv6();
10720 if (action == PF_PASS) {
10721 #if NPFSYNC
10722 pfsync_update_state(s);
10723 #endif /* NPFSYNC */
10724 r = s->rule.ptr;
10725 a = s->anchor.ptr;
10726 log = s->log;
10727 break;
10728 } else if (s == NULL) {
10729 action = pf_test_rule(&r, &s, dir, kif, pbuf,
10730 off, h, &pd, &a, &ruleset, NULL);
10731 if (action == PF_PASS) {
10732 break;
10733 }
10734 }
10735 }
10736
10737 /* not GREv1/PPTP, so treat as ordinary GRE... */
10738 OS_FALLTHROUGH; /* XXX is this correct? */
10739 }
10740
10741 default:
10742 #if DUMMYNET
10743 /* Traffic goes through dummynet first */
10744 action = pf_test_dummynet(&r, dir, kif, &pbuf, &pd, fwa);
10745 if (action == PF_DROP || pbuf == NULL) {
10746 *pbufp = NULL;
10747 return action;
10748 }
10749 #endif /* DUMMYNET */
10750 action = pf_test_state_other(&s, dir, kif, &pd);
10751 if (pd.lmw < 0) {
10752 goto done;
10753 }
10754 PF_APPLE_UPDATE_PDESC_IPv6();
10755 if (action == PF_PASS) {
10756 #if NPFSYNC
10757 pfsync_update_state(s);
10758 #endif /* NPFSYNC */
10759 r = s->rule.ptr;
10760 a = s->anchor.ptr;
10761 log = s->log;
10762 } else if (s == NULL) {
10763 action = pf_test_rule(&r, &s, dir, kif, pbuf, off, h,
10764 &pd, &a, &ruleset, NULL);
10765 }
10766 break;
10767 }
10768
10769 done:
10770 if (action == PF_NAT64) {
10771 *pbufp = NULL;
10772 return action;
10773 }
10774
10775 *pbufp = pd.mp;
10776 PF_APPLE_UPDATE_PDESC_IPv6();
10777
10778 /* handle dangerous IPv6 extension headers. */
10779 if (action != PF_DROP) {
10780 if (action == PF_PASS && rh_cnt &&
10781 !((s && s->allow_opts) || r->allow_opts)) {
10782 action = PF_DROP;
10783 REASON_SET(&reason, PFRES_IPOPTIONS);
10784 log = 1;
10785 DPFPRINTF(PF_DEBUG_MISC,
10786 ("pf: dropping packet with dangerous v6addr headers\n"));
10787 }
10788
10789 if ((s && s->tag) || PF_RTABLEID_IS_VALID(r->rtableid) ||
10790 (pd.pktflags & PKTF_FLOW_ID)) {
10791 (void) pf_tag_packet(pbuf, pd.pf_mtag, s ? s->tag : 0,
10792 r->rtableid, &pd);
10793 }
10794
10795 if (action == PF_PASS) {
10796 #if PF_ECN
10797 /* add hints for ecn */
10798 pd.pf_mtag->pftag_hdr = h;
10799 /* record address family */
10800 pd.pf_mtag->pftag_flags &= ~PF_TAG_HDR_INET;
10801 pd.pf_mtag->pftag_flags |= PF_TAG_HDR_INET6;
10802 #endif /* PF_ECN */
10803 /* record protocol */
10804 *pbuf->pb_proto = pd.proto;
10805 if (dir == PF_IN && (pd.proto == IPPROTO_TCP ||
10806 pd.proto == IPPROTO_UDP) && s != NULL &&
10807 s->nat_rule.ptr != NULL &&
10808 (s->nat_rule.ptr->action == PF_RDR ||
10809 s->nat_rule.ptr->action == PF_BINAT) &&
10810 IN6_IS_ADDR_LOOPBACK(&pd.dst->v6addr)) {
10811 pd.pf_mtag->pftag_flags |= PF_TAG_TRANSLATE_LOCALHOST;
10812 }
10813 }
10814 }
10815
10816
10817 if (log) {
10818 struct pf_rule *lr;
10819
10820 if (s != NULL && s->nat_rule.ptr != NULL &&
10821 s->nat_rule.ptr->log & PF_LOG_ALL) {
10822 lr = s->nat_rule.ptr;
10823 } else {
10824 lr = r;
10825 }
10826 PFLOG_PACKET(kif, h, pbuf, AF_INET6, dir, reason, lr, a, ruleset,
10827 &pd);
10828 }
10829
10830 kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
10831 kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS]++;
10832
10833 if (action == PF_PASS || r->action == PF_DROP) {
10834 dirndx = (dir == PF_OUT);
10835 r->packets[dirndx]++;
10836 r->bytes[dirndx] += pd.tot_len;
10837 if (a != NULL) {
10838 a->packets[dirndx]++;
10839 a->bytes[dirndx] += pd.tot_len;
10840 }
10841 if (s != NULL) {
10842 sk = s->state_key;
10843 if (s->nat_rule.ptr != NULL) {
10844 s->nat_rule.ptr->packets[dirndx]++;
10845 s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
10846 }
10847 if (s->src_node != NULL) {
10848 s->src_node->packets[dirndx]++;
10849 s->src_node->bytes[dirndx] += pd.tot_len;
10850 }
10851 if (s->nat_src_node != NULL) {
10852 s->nat_src_node->packets[dirndx]++;
10853 s->nat_src_node->bytes[dirndx] += pd.tot_len;
10854 }
10855 dirndx = (dir == sk->direction) ? 0 : 1;
10856 s->packets[dirndx]++;
10857 s->bytes[dirndx] += pd.tot_len;
10858 }
10859 tr = r;
10860 nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
10861 if (nr != NULL) {
10862 struct pf_addr *x;
10863 /*
10864 * XXX: we need to make sure that the addresses
10865 * passed to pfr_update_stats() are the same than
10866 * the addresses used during matching (pfr_match)
10867 */
10868 if (r == &pf_default_rule) {
10869 tr = nr;
10870 x = (s == NULL || sk->direction == dir) ?
10871 &pd.baddr : &pd.naddr;
10872 } else {
10873 x = (s == NULL || sk->direction == dir) ?
10874 &pd.naddr : &pd.baddr;
10875 }
10876 if (x == &pd.baddr || s == NULL) {
10877 if (dir == PF_OUT) {
10878 pd.src = x;
10879 } else {
10880 pd.dst = x;
10881 }
10882 }
10883 }
10884 if (tr->src.addr.type == PF_ADDR_TABLE) {
10885 pfr_update_stats(tr->src.addr.p.tbl, (sk == NULL ||
10886 sk->direction == dir) ? pd.src : pd.dst, pd.af,
10887 pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
10888 tr->src.neg);
10889 }
10890 if (tr->dst.addr.type == PF_ADDR_TABLE) {
10891 pfr_update_stats(tr->dst.addr.p.tbl, (sk == NULL ||
10892 sk->direction == dir) ? pd.dst : pd.src, pd.af,
10893 pd.tot_len, dir == PF_OUT, r->action == PF_PASS,
10894 tr->dst.neg);
10895 }
10896 }
10897
10898 VERIFY(pbuf == NULL || pd.mp == NULL || pd.mp == pbuf);
10899
10900 if (*pbufp) {
10901 if (pd.lmw < 0) {
10902 REASON_SET(&reason, PFRES_MEMORY);
10903 action = PF_DROP;
10904 }
10905
10906 if (action == PF_DROP) {
10907 pbuf_destroy(*pbufp);
10908 *pbufp = NULL;
10909 return PF_DROP;
10910 }
10911
10912 *pbufp = pbuf;
10913 }
10914
10915 if (action == PF_SYNPROXY_DROP) {
10916 pbuf_destroy(*pbufp);
10917 *pbufp = NULL;
10918 action = PF_PASS;
10919 } else if (r->rt) {
10920 /* pf_route6 can free the mbuf causing *pbufp to become NULL */
10921 pf_route6(pbufp, r, dir, kif->pfik_ifp, s, &pd);
10922 }
10923
10924 /* if reassembled packet passed, create new fragments */
10925 struct pf_fragment_tag *ftag = NULL;
10926 if ((action == PF_PASS) && (*pbufp != NULL) && (fwd) &&
10927 ((ftag = pf_find_fragment_tag_pbuf(*pbufp)) != NULL)) {
10928 action = pf_refragment6(ifp, pbufp, ftag);
10929 }
10930 return action;
10931 }
10932
10933 static int
pf_check_congestion(struct ifqueue * ifq)10934 pf_check_congestion(struct ifqueue *ifq)
10935 {
10936 #pragma unused(ifq)
10937 return 0;
10938 }
10939
10940 void
pool_init(struct pool * pp,size_t size,unsigned int align,unsigned int ioff,int flags,const char * wchan,void * palloc)10941 pool_init(struct pool *pp, size_t size, unsigned int align, unsigned int ioff,
10942 int flags, const char *wchan, void *palloc)
10943 {
10944 #pragma unused(align, ioff, flags, palloc)
10945 bzero(pp, sizeof(*pp));
10946 pp->pool_zone = zone_create(wchan, size, ZC_ZFREE_CLEARMEM);
10947 pp->pool_hiwat = pp->pool_limit = (unsigned int)-1;
10948 pp->pool_name = wchan;
10949 }
10950
10951 /* Zones cannot be currently destroyed */
10952 void
pool_destroy(struct pool * pp)10953 pool_destroy(struct pool *pp)
10954 {
10955 #pragma unused(pp)
10956 }
10957
10958 void
pool_sethiwat(struct pool * pp,int n)10959 pool_sethiwat(struct pool *pp, int n)
10960 {
10961 pp->pool_hiwat = n; /* Currently unused */
10962 }
10963
10964 void
pool_sethardlimit(struct pool * pp,int n,const char * warnmess,int ratecap)10965 pool_sethardlimit(struct pool *pp, int n, const char *warnmess, int ratecap)
10966 {
10967 #pragma unused(warnmess, ratecap)
10968 pp->pool_limit = n;
10969 }
10970
10971 void *
pool_get(struct pool * pp,int flags)10972 pool_get(struct pool *pp, int flags)
10973 {
10974 void *buf;
10975
10976 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
10977
10978 if (pp->pool_count > pp->pool_limit) {
10979 DPFPRINTF(PF_DEBUG_NOISY,
10980 ("pf: pool %s hard limit reached (%d)\n",
10981 pp->pool_name != NULL ? pp->pool_name : "unknown",
10982 pp->pool_limit));
10983 pp->pool_fails++;
10984 return NULL;
10985 }
10986
10987 buf = zalloc_flags_buf(pp->pool_zone,
10988 (flags & PR_WAITOK) ? Z_WAITOK : Z_NOWAIT);
10989 if (buf != NULL) {
10990 pp->pool_count++;
10991 VERIFY(pp->pool_count != 0);
10992 }
10993 return buf;
10994 }
10995
10996 void
pool_put(struct pool * pp,void * v)10997 pool_put(struct pool *pp, void *v)
10998 {
10999 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
11000
11001 zfree(pp->pool_zone, v);
11002 VERIFY(pp->pool_count != 0);
11003 pp->pool_count--;
11004 }
11005
11006 struct pf_mtag *
pf_find_mtag_pbuf(pbuf_t * pbuf)11007 pf_find_mtag_pbuf(pbuf_t *pbuf)
11008 {
11009 return pbuf->pb_pftag;
11010 }
11011
11012 struct pf_mtag *
pf_find_mtag(struct mbuf * m)11013 pf_find_mtag(struct mbuf *m)
11014 {
11015 return m_pftag(m);
11016 }
11017
11018 struct pf_mtag *
pf_get_mtag(struct mbuf * m)11019 pf_get_mtag(struct mbuf *m)
11020 {
11021 return pf_find_mtag(m);
11022 }
11023
11024 struct pf_mtag *
pf_get_mtag_pbuf(pbuf_t * pbuf)11025 pf_get_mtag_pbuf(pbuf_t *pbuf)
11026 {
11027 return pf_find_mtag_pbuf(pbuf);
11028 }
11029
11030 struct pf_fragment_tag *
pf_copy_fragment_tag(struct mbuf * m,struct pf_fragment_tag * ftag,int how)11031 pf_copy_fragment_tag(struct mbuf *m, struct pf_fragment_tag *ftag, int how)
11032 {
11033 struct m_tag *__single tag;
11034 struct pf_mtag *__single pftag = pf_find_mtag(m);
11035
11036 tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_PF_REASS,
11037 sizeof(*ftag), how, m);
11038 if (tag == NULL) {
11039 return NULL;
11040 }
11041 m_tag_prepend(m, tag);
11042 bcopy(ftag, tag->m_tag_data, sizeof(*ftag));
11043 pftag->pftag_flags |= PF_TAG_REASSEMBLED;
11044 return (struct pf_fragment_tag *)tag->m_tag_data;
11045 }
11046
11047 struct pf_fragment_tag *
pf_find_fragment_tag(struct mbuf * m)11048 pf_find_fragment_tag(struct mbuf *m)
11049 {
11050 struct m_tag *tag;
11051 struct pf_fragment_tag *ftag = NULL;
11052 struct pf_mtag *pftag = pf_find_mtag(m);
11053
11054 tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_PF_REASS);
11055 VERIFY((tag == NULL) || (pftag->pftag_flags & PF_TAG_REASSEMBLED));
11056 if (tag != NULL) {
11057 ftag = (struct pf_fragment_tag *)tag->m_tag_data;
11058 }
11059 return ftag;
11060 }
11061
11062 struct pf_fragment_tag *
pf_find_fragment_tag_pbuf(pbuf_t * pbuf)11063 pf_find_fragment_tag_pbuf(pbuf_t *pbuf)
11064 {
11065 struct pf_mtag *mtag = pf_find_mtag_pbuf(pbuf);
11066
11067 return (mtag->pftag_flags & PF_TAG_REASSEMBLED) ?
11068 pbuf->pb_pf_fragtag : NULL;
11069 }
11070
11071 uint64_t
pf_time_second(void)11072 pf_time_second(void)
11073 {
11074 struct timeval t;
11075
11076 microuptime(&t);
11077 return t.tv_sec;
11078 }
11079
11080 uint64_t
pf_calendar_time_second(void)11081 pf_calendar_time_second(void)
11082 {
11083 struct timeval t;
11084
11085 getmicrotime(&t);
11086 return t.tv_sec;
11087 }
11088
11089 static void *
hook_establish(struct hook_desc_head * head,int tail,hook_fn_t fn,void * arg)11090 hook_establish(struct hook_desc_head *head, int tail, hook_fn_t fn, void *arg)
11091 {
11092 struct hook_desc *hd;
11093
11094 hd = kalloc_type(struct hook_desc, Z_WAITOK | Z_NOFAIL);
11095
11096 hd->hd_fn = fn;
11097 hd->hd_arg = arg;
11098 if (tail) {
11099 TAILQ_INSERT_TAIL(head, hd, hd_list);
11100 } else {
11101 TAILQ_INSERT_HEAD(head, hd, hd_list);
11102 }
11103
11104 return hd;
11105 }
11106
11107 static void
hook_runloop(struct hook_desc_head * head,int flags)11108 hook_runloop(struct hook_desc_head *head, int flags)
11109 {
11110 struct hook_desc *__single hd;
11111
11112 if (!(flags & HOOK_REMOVE)) {
11113 if (!(flags & HOOK_ABORT)) {
11114 TAILQ_FOREACH(hd, head, hd_list)
11115 hd->hd_fn(hd->hd_arg);
11116 }
11117 } else {
11118 while (!!(hd = TAILQ_FIRST(head))) {
11119 TAILQ_REMOVE(head, hd, hd_list);
11120 if (!(flags & HOOK_ABORT)) {
11121 hd->hd_fn(hd->hd_arg);
11122 }
11123 if (flags & HOOK_FREE) {
11124 kfree_type(struct hook_desc, hd);
11125 }
11126 }
11127 }
11128 }
11129
11130 #if SKYWALK
11131 static uint32_t
pf_check_compatible_anchor(struct pf_anchor const * a)11132 pf_check_compatible_anchor(struct pf_anchor const * a)
11133 {
11134 const char *__null_terminated anchor_path = __unsafe_null_terminated_from_indexable(a->path);
11135 uint32_t result = 0;
11136
11137 if (strcmp(anchor_path, PF_RESERVED_ANCHOR) == 0) {
11138 goto done;
11139 }
11140
11141 if (strcmp(anchor_path, "com.apple") == 0) {
11142 goto done;
11143 }
11144
11145 for (int i = 0; i < sizeof(compatible_anchors) / sizeof(compatible_anchors[0]); i++) {
11146 const char *__null_terminated ptr = strnstr(anchor_path, compatible_anchors[i], MAXPATHLEN);
11147 if (ptr != NULL && ptr == anchor_path) {
11148 goto done;
11149 }
11150 }
11151
11152 result |= PF_COMPATIBLE_FLAGS_CUSTOM_ANCHORS_PRESENT;
11153 for (int i = PF_RULESET_SCRUB; i < PF_RULESET_MAX; ++i) {
11154 if (a->ruleset.rules[i].active.rcount != 0) {
11155 result |= PF_COMPATIBLE_FLAGS_CUSTOM_RULES_PRESENT;
11156 }
11157 }
11158 done:
11159 return result;
11160 }
11161
11162 uint32_t
pf_check_compatible_rules(void)11163 pf_check_compatible_rules(void)
11164 {
11165 LCK_RW_ASSERT(&pf_perim_lock, LCK_RW_ASSERT_HELD);
11166 LCK_MTX_ASSERT(&pf_lock, LCK_MTX_ASSERT_OWNED);
11167 struct pf_anchor *anchor = NULL;
11168 struct pf_rule *rule = NULL;
11169 uint32_t compat_bitmap = 0;
11170
11171 if (PF_IS_ENABLED) {
11172 compat_bitmap |= PF_COMPATIBLE_FLAGS_PF_ENABLED;
11173 }
11174
11175 RB_FOREACH(anchor, pf_anchor_global, &pf_anchors) {
11176 compat_bitmap |= pf_check_compatible_anchor(anchor);
11177 #define _CHECK_FLAGS (PF_COMPATIBLE_FLAGS_CUSTOM_ANCHORS_PRESENT | PF_COMPATIBLE_FLAGS_CUSTOM_RULES_PRESENT)
11178 if ((compat_bitmap & _CHECK_FLAGS) == _CHECK_FLAGS) {
11179 goto done;
11180 }
11181 #undef _CHECK_FLAGS
11182 }
11183
11184 for (int i = PF_RULESET_SCRUB; i < PF_RULESET_MAX; i++) {
11185 TAILQ_FOREACH(rule, pf_main_ruleset.rules[i].active.ptr, entries) {
11186 if (rule->anchor == NULL) {
11187 compat_bitmap |= PF_COMPATIBLE_FLAGS_CUSTOM_RULES_PRESENT;
11188 goto done;
11189 }
11190 }
11191 }
11192
11193 done:
11194 return compat_bitmap;
11195 }
11196 #endif // SKYWALK
11197