xref: /xnu-11215.41.3/bsd/net/packet_mangler.c (revision 33de042d024d46de5ff4e89f2471de6608e37fa4)
1 /*
2  * Copyright (c) 2015-2024 Apple Inc. All rights reserved.
3  *
4  * @APPLE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. Please obtain a copy of the License at
10  * http: www.opensource.apple.com/apsl/ and read it before using this
11  * file.
12  *
13  * The Original Code and all software distributed under the License are
14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18  * Please see the License for the specific language governing rights and
19  * limitations under the License.
20  *
21  * @APPLE_LICENSE_HEADER_END@
22  */
23 
24 /*
25  * THEORY OF OPERATION
26  *
27  * The packet mangler subsystem provides a limited way for user space
28  * applications to apply certain actions on certain flows.
29  *
30  * A user space applications opens a kernel control socket with the name
31  * PACKET_MANGLER_CONTROL_NAME to attach to the packet mangler subsystem.
32  * When connected, a "struct packet_mangler" is created and set as the
33  * "unitinfo" of the corresponding kernel control socket instance.
34  * Connect call for packet mangler's kernel control socket also registers
35  * ip filers with cookie set to the packet_mangler instance.
36  * The ip filters are removed when control socket is disconnected.
37  */
38 #include <sys/types.h>
39 #include <sys/kern_control.h>
40 #include <sys/domain.h>
41 #include <sys/protosw.h>
42 #include <sys/syslog.h>
43 
44 #include <kern/locks.h>
45 #include <kern/zalloc.h>
46 #include <kern/debug.h>
47 
48 #include <net/packet_mangler.h>
49 
50 #include <netinet/mptcp.h>
51 #include <netinet/tcp.h>
52 #include <netinet/tcp_var.h>
53 #include <netinet/ip.h>
54 #include <netinet/ip6.h>
55 #include <netinet/kpi_ipfilter.h>
56 #include <string.h>
57 #include <libkern/libkern.h>
58 
59 #include <net/sockaddr_utils.h>
60 
61 #define MAX_PACKET_MANGLER                      1
62 
63 #define PKT_MNGLR_FLG_IPFILTER_ATTACHED         0x00000001
64 
65 SYSCTL_NODE(_net, OID_AUTO, pktmnglr, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "pktmnglr");
66 SYSCTL_INT(_net_pktmnglr, OID_AUTO, log, CTLFLAG_RW | CTLFLAG_LOCKED,
67     &pkt_mnglr_log_level, 0, "");
68 /*
69  * The structure packet_mangler represents a user space packet filter
70  * It's created and associated with a kernel control socket instance
71  */
72 struct packet_mangler {
73 	kern_ctl_ref                    pkt_mnglr_kcref;
74 	uint32_t                        pkt_mnglr_kcunit;
75 	uint32_t                        pkt_mnglr_flags;
76 	/* IP filter related params */
77 	ipfilter_t                      pkt_mnglr_ipfref;
78 	ipfilter_t                      pkt_mnglr_ipfrefv6;
79 	struct ipf_filter               pkt_mnglr_ipfilter;
80 
81 	/* Options */
82 	uint8_t                         activate;
83 	Pkt_Mnglr_Flow                  dir;
84 	struct sockaddr_storage         lsaddr;
85 	struct sockaddr_storage         rsaddr;
86 	struct sockaddr_storage         swap_lsaddr;
87 	struct sockaddr_storage         swap_rsaddr;
88 	uint32_t                        ip_action_mask;
89 	uint16_t                        lport;
90 	uint16_t                        rport;
91 	uint32_t                        proto;
92 	uint32_t                        proto_action_mask;
93 };
94 
95 /* Array of all the packet mangler instancesi */
96 struct packet_mangler *packet_manglers[MAX_PACKET_MANGLER];
97 
98 uint32_t pkt_mnglr_active_count = 0;    /* Number of active packet filters */
99 uint32_t pkt_mnglr_close_wait_timeout = 1000; /* in milliseconds */
100 
101 static kern_ctl_ref pkt_mnglr_kctlref = NULL;
102 
103 /* The lock below protects packet_manglers DS, packet_mangler DS */
104 static LCK_GRP_DECLARE(pkt_mnglr_lck_grp, "packet mangler");
105 static LCK_RW_DECLARE(pkt_mnglr_lck_rw, &pkt_mnglr_lck_grp);
106 
107 #define PKT_MNGLR_RW_LCK_MAX    8
108 
109 int pkt_mnglr_rw_nxt_lck = 0;
110 void* pkt_mnglr_rw_lock_history[PKT_MNGLR_RW_LCK_MAX];
111 
112 int pkt_mnglr_rw_nxt_unlck = 0;
113 void* pkt_mnglr_rw_unlock_history[PKT_MNGLR_RW_LCK_MAX];
114 
115 static KALLOC_TYPE_DEFINE(packet_mangler_zone, struct packet_mangler, NET_KT_DEFAULT);
116 
117 /*
118  * For troubleshooting
119  */
120 int pkt_mnglr_log_level = LOG_ERR;
121 int pkt_mnglr_debug = 1;
122 
123 /*
124  * Forward declaration to appease the compiler
125  */
126 static void pkt_mnglr_rw_lock_exclusive(lck_rw_t *);
127 static void pkt_mnglr_rw_unlock_exclusive(lck_rw_t *);
128 static void pkt_mnglr_rw_lock_shared(lck_rw_t *);
129 static void pkt_mnglr_rw_unlock_shared(lck_rw_t *);
130 
131 static errno_t pktmnglr_ipfilter_output(void *cookie, mbuf_t *data,
132     ipf_pktopts_t options);
133 static errno_t pktmnglr_ipfilter_input(void *cookie, mbuf_t *data,
134     int offset, u_int8_t protocol);
135 static void pktmnglr_ipfilter_detach(void *cookie);
136 
137 static void chksm_update(mbuf_t data);
138 
139 #define TCP_OPT_MULTIPATH_TCP   30
140 #define MPTCP_SBT_VER_OFFSET    2
141 
142 /*
143  * packet filter global read write lock
144  */
145 
146 static void
pkt_mnglr_rw_lock_exclusive(lck_rw_t * lck)147 pkt_mnglr_rw_lock_exclusive(lck_rw_t *lck)
148 {
149 	void *__single lr_saved;
150 
151 	lr_saved = __unsafe_forge_single(void *, __builtin_return_address(0));
152 
153 	lck_rw_lock_exclusive(lck);
154 
155 	pkt_mnglr_rw_lock_history[pkt_mnglr_rw_nxt_lck] = lr_saved;
156 	pkt_mnglr_rw_nxt_lck =
157 	    (pkt_mnglr_rw_nxt_lck + 1) % PKT_MNGLR_RW_LCK_MAX;
158 }
159 
160 static void
pkt_mnglr_rw_unlock_exclusive(lck_rw_t * lck)161 pkt_mnglr_rw_unlock_exclusive(lck_rw_t *lck)
162 {
163 	void *__single lr_saved;
164 
165 	lr_saved = __unsafe_forge_single(void *, __builtin_return_address(0));
166 
167 	lck_rw_unlock_exclusive(lck);
168 
169 	pkt_mnglr_rw_unlock_history[pkt_mnglr_rw_nxt_unlck] =
170 	    lr_saved;
171 	pkt_mnglr_rw_nxt_unlck = (pkt_mnglr_rw_nxt_unlck + 1) % PKT_MNGLR_RW_LCK_MAX;
172 }
173 
174 static void
pkt_mnglr_rw_lock_shared(lck_rw_t * lck)175 pkt_mnglr_rw_lock_shared(lck_rw_t *lck)
176 {
177 	void *__single lr_saved;
178 
179 	lr_saved = __unsafe_forge_single(void *, __builtin_return_address(0));
180 
181 	lck_rw_lock_shared(lck);
182 
183 	pkt_mnglr_rw_lock_history[pkt_mnglr_rw_nxt_lck] = lr_saved;
184 	pkt_mnglr_rw_nxt_lck = (pkt_mnglr_rw_nxt_lck + 1) % PKT_MNGLR_RW_LCK_MAX;
185 }
186 
187 static void
pkt_mnglr_rw_unlock_shared(lck_rw_t * lck)188 pkt_mnglr_rw_unlock_shared(lck_rw_t *lck)
189 {
190 	void *__single lr_saved;
191 
192 	lr_saved = __unsafe_forge_single(void *, __builtin_return_address(0));
193 
194 	lck_rw_unlock_shared(lck);
195 
196 	pkt_mnglr_rw_unlock_history[pkt_mnglr_rw_nxt_unlck] = lr_saved;
197 	pkt_mnglr_rw_nxt_unlck = (pkt_mnglr_rw_nxt_unlck + 1) % PKT_MNGLR_RW_LCK_MAX;
198 }
199 
200 /*
201  * Packet Mangler's Kernel control socket callbacks
202  */
203 static errno_t
pkt_mnglr_ctl_connect(kern_ctl_ref kctlref,struct sockaddr_ctl * sac,void ** unitinfo)204 pkt_mnglr_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
205     void **unitinfo)
206 {
207 	errno_t error = 0;
208 	struct packet_mangler *p_pkt_mnglr = NULL;
209 
210 	PKT_MNGLR_LOG(LOG_NOTICE, "Connecting packet mangler filter.");
211 
212 	if (sac->sc_unit == 0 || sac->sc_unit > MAX_PACKET_MANGLER) {
213 		PKT_MNGLR_LOG(LOG_ERR, "bad sc_unit %u", sac->sc_unit);
214 		error = EINVAL;
215 		goto fail;
216 	}
217 
218 	p_pkt_mnglr = zalloc_flags(packet_mangler_zone,
219 	    Z_WAITOK | Z_ZERO | Z_NOFAIL);
220 
221 	pkt_mnglr_rw_lock_exclusive(&pkt_mnglr_lck_rw);
222 
223 	if (packet_manglers[sac->sc_unit - 1] != NULL) {
224 		PKT_MNGLR_LOG(LOG_ERR, "sc_unit %u in use", sac->sc_unit);
225 		error = EADDRINUSE;
226 		pkt_mnglr_rw_unlock_exclusive(&pkt_mnglr_lck_rw);
227 		goto fail_free;
228 	} else {
229 		/*
230 		 * kernel control socket kcunit numbers start at 1
231 		 */
232 		packet_manglers[sac->sc_unit - 1] = p_pkt_mnglr;
233 
234 		p_pkt_mnglr->pkt_mnglr_kcref = kctlref;
235 		p_pkt_mnglr->pkt_mnglr_kcunit = sac->sc_unit;
236 
237 		pkt_mnglr_active_count++;
238 	}
239 
240 	p_pkt_mnglr->pkt_mnglr_ipfilter.cookie = p_pkt_mnglr;
241 	p_pkt_mnglr->pkt_mnglr_ipfilter.name = "com.apple.pktmnglripfilter";
242 	p_pkt_mnglr->pkt_mnglr_ipfilter.ipf_input = pktmnglr_ipfilter_input;
243 	p_pkt_mnglr->pkt_mnglr_ipfilter.ipf_output = pktmnglr_ipfilter_output;
244 	p_pkt_mnglr->pkt_mnglr_ipfilter.ipf_detach = pktmnglr_ipfilter_detach;
245 	error = ipf_addv4(&(p_pkt_mnglr->pkt_mnglr_ipfilter), &(p_pkt_mnglr->pkt_mnglr_ipfref));
246 	if (error) {
247 		PKT_MNGLR_LOG(LOG_ERR, "Could not register packet mangler's IPv4 Filter");
248 		goto fail_locked;
249 	}
250 	error = ipf_addv6(&(p_pkt_mnglr->pkt_mnglr_ipfilter), &(p_pkt_mnglr->pkt_mnglr_ipfrefv6));
251 	if (error) {
252 		ipf_remove(p_pkt_mnglr->pkt_mnglr_ipfref);
253 		PKT_MNGLR_LOG(LOG_ERR, "Could not register packet mangler's IPv6 Filter");
254 		goto fail_locked;
255 	}
256 
257 	PKT_MNGLR_LOG(LOG_INFO, "Registered packet mangler's IP Filters");
258 	p_pkt_mnglr->pkt_mnglr_flags |= PKT_MNGLR_FLG_IPFILTER_ATTACHED;
259 	pkt_mnglr_rw_unlock_exclusive(&pkt_mnglr_lck_rw);
260 
261 	if (error) {
262 fail_locked:
263 		pkt_mnglr_active_count--;
264 
265 		packet_manglers[sac->sc_unit - 1] = NULL;
266 		*unitinfo = NULL;
267 
268 		pkt_mnglr_rw_unlock_exclusive(&pkt_mnglr_lck_rw);
269 
270 fail_free:
271 		zfree(packet_mangler_zone, p_pkt_mnglr);
272 	}
273 
274 fail:
275 	*unitinfo = p_pkt_mnglr;
276 
277 	PKT_MNGLR_LOG(LOG_INFO, "return %d pkt_mnglr_active_count %u kcunit %u",
278 	    error, pkt_mnglr_active_count, sac->sc_unit);
279 
280 	return error;
281 }
282 
283 static errno_t
pkt_mnglr_ctl_disconnect(kern_ctl_ref kctlref,u_int32_t kcunit,void * unitinfo)284 pkt_mnglr_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo)
285 {
286 #pragma unused(kctlref)
287 	errno_t error = 0;
288 	struct packet_mangler *p_pkt_mnglr;
289 
290 	PKT_MNGLR_LOG(LOG_INFO, "Disconnecting packet mangler kernel control");
291 
292 	if (unitinfo == NULL) {
293 		goto done;
294 	}
295 
296 	if (kcunit > MAX_PACKET_MANGLER) {
297 		PKT_MNGLR_LOG(LOG_ERR, "kcunit %u > MAX_PACKET_MANGLER (%d)",
298 		    kcunit, MAX_PACKET_MANGLER);
299 		error = EINVAL;
300 		goto done;
301 	}
302 
303 	p_pkt_mnglr = (struct packet_mangler *)unitinfo;
304 
305 	pkt_mnglr_rw_lock_exclusive(&pkt_mnglr_lck_rw);
306 	if (packet_manglers[kcunit - 1] != p_pkt_mnglr || p_pkt_mnglr->pkt_mnglr_kcunit != kcunit) {
307 		PKT_MNGLR_LOG(LOG_ERR, "bad unit info %u",
308 		    kcunit);
309 		pkt_mnglr_rw_unlock_exclusive(&pkt_mnglr_lck_rw);
310 		goto done;
311 	}
312 
313 	/*
314 	 * Make filter inactive
315 	 */
316 	packet_manglers[kcunit - 1] = NULL;
317 	pkt_mnglr_active_count--;
318 	if (p_pkt_mnglr->pkt_mnglr_flags & PKT_MNGLR_FLG_IPFILTER_ATTACHED) {
319 		(void) ipf_remove(p_pkt_mnglr->pkt_mnglr_ipfref);
320 		(void) ipf_remove(p_pkt_mnglr->pkt_mnglr_ipfrefv6);
321 	}
322 	pkt_mnglr_rw_unlock_exclusive(&pkt_mnglr_lck_rw);
323 	zfree(packet_mangler_zone, p_pkt_mnglr);
324 done:
325 	PKT_MNGLR_LOG(LOG_INFO, "return %d pkt_mnglr_active_count %u kcunit %u",
326 	    error, pkt_mnglr_active_count, kcunit);
327 
328 	return error;
329 }
330 
331 static errno_t
pkt_mnglr_ctl_getopt(kern_ctl_ref kctlref,u_int32_t kcunit,void * unitinfo,int opt,void * data,size_t * len)332 pkt_mnglr_ctl_getopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
333     int opt, void *data, size_t *len)
334 {
335 #pragma unused(kctlref, opt)
336 	errno_t error = 0;
337 	struct packet_mangler *p_pkt_mnglr = (struct packet_mangler *)unitinfo;
338 
339 	PKT_MNGLR_LOG(LOG_NOTICE, "");
340 
341 	pkt_mnglr_rw_lock_shared(&pkt_mnglr_lck_rw);
342 
343 	if (kcunit > MAX_PACKET_MANGLER || kcunit == 0) {
344 		PKT_MNGLR_LOG(LOG_ERR, "kcunit %u > MAX_PACKET_MANGLER (%d) || kcunit == 0",
345 		    kcunit, MAX_PACKET_MANGLER);
346 		error = EINVAL;
347 		goto done;
348 	}
349 	if (p_pkt_mnglr != (void *)packet_manglers[kcunit - 1]) {
350 		PKT_MNGLR_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
351 		    kcunit);
352 		error = EINVAL;
353 		goto done;
354 	}
355 	switch (opt) {
356 	case PKT_MNGLR_OPT_PROTO_ACT_MASK:
357 		if (*len < sizeof(uint32_t)) {
358 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_PROTO_ACT_MASK "
359 			    "len too small %lu", *len);
360 			error = EINVAL;
361 			goto done;
362 		}
363 
364 		if (data != NULL) {
365 			*(uint32_t *)data = p_pkt_mnglr->proto_action_mask;
366 		}
367 		break;
368 	case PKT_MNGLR_OPT_IP_ACT_MASK:
369 		if (*len < sizeof(uint32_t)) {
370 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_IP_ACT_MASK "
371 			    "len too small %lu", *len);
372 			error = EINVAL;
373 			goto done;
374 		}
375 
376 		if (data != NULL) {
377 			*(uint32_t *)data = p_pkt_mnglr->ip_action_mask;
378 		}
379 		break;
380 	case PKT_MNGLR_OPT_LOCAL_IP:
381 		if (*len < sizeof(struct sockaddr_storage)) {
382 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_LOCAL_IP "
383 			    "len too small %lu", *len);
384 			error = EINVAL;
385 			goto done;
386 		}
387 
388 		if (data != NULL) {
389 			*(struct sockaddr_storage *)data = p_pkt_mnglr->lsaddr;
390 		}
391 		break;
392 	case PKT_MNGLR_OPT_REMOTE_IP:
393 		if (*len < sizeof(struct sockaddr_storage)) {
394 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_REMOTE_IP "
395 			    "len too small %lu", *len);
396 			error = EINVAL;
397 			goto done;
398 		}
399 
400 		if (data != NULL) {
401 			*(struct sockaddr_storage *)data = p_pkt_mnglr->rsaddr;
402 		}
403 		break;
404 	case PKT_MNGLR_OPT_LOCAL_PORT:
405 		if (*len < sizeof(uint16_t)) {
406 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_LOCAL_PORT "
407 			    "len too small %lu", *len);
408 			error = EINVAL;
409 			goto done;
410 		}
411 
412 		if (data != NULL) {
413 			*(uint16_t *)data = p_pkt_mnglr->lport;
414 		}
415 		break;
416 	case PKT_MNGLR_OPT_REMOTE_PORT:
417 		if (*len < sizeof(uint16_t)) {
418 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_REMOTE_PORT "
419 			    "len too small %lu", *len);
420 			error = EINVAL;
421 			goto done;
422 		}
423 
424 		if (data != NULL) {
425 			*(uint16_t *)data = p_pkt_mnglr->rport;
426 		}
427 		break;
428 	case PKT_MNGLR_OPT_DIRECTION:
429 		if (*len < sizeof(uint32_t)) {
430 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_DIRECTION "
431 			    "len too small %lu", *len);
432 			error = EINVAL;
433 			goto done;
434 		}
435 		if (data != NULL) {
436 			*(uint32_t *)data = p_pkt_mnglr->dir;
437 		}
438 		break;
439 	case PKT_MNGLR_OPT_PROTOCOL:
440 		if (*len < sizeof(uint32_t)) {
441 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_PROTOCOL "
442 			    "len too small %lu", *len);
443 			error = EINVAL;
444 			goto done;
445 		}
446 		if (data != NULL) {
447 			*(uint32_t *)data = p_pkt_mnglr->proto;
448 		}
449 		break;
450 	case PKT_MNGLR_OPT_ACTIVATE:
451 		if (*len < sizeof(uint8_t)) {
452 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_ACTIVATE "
453 			    "len too small %lu", *len);
454 			error = EINVAL;
455 			goto done;
456 		}
457 
458 		if (data != NULL) {
459 			*(uint8_t *)data = p_pkt_mnglr->activate;
460 		}
461 		break;
462 	default:
463 		error = ENOPROTOOPT;
464 		break;
465 	}
466 done:
467 	pkt_mnglr_rw_unlock_shared(&pkt_mnglr_lck_rw);
468 
469 	return error;
470 }
471 
472 static errno_t
pkt_mnglr_ctl_setopt(kern_ctl_ref kctlref,u_int32_t kcunit,void * unitinfo,int opt,void * data,size_t len)473 pkt_mnglr_ctl_setopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
474     int opt, void *data, size_t len)
475 {
476 #pragma unused(kctlref, opt)
477 	errno_t error = 0;
478 	struct packet_mangler *p_pkt_mnglr = (struct packet_mangler *)unitinfo;
479 
480 	PKT_MNGLR_LOG(LOG_NOTICE, "");
481 
482 	pkt_mnglr_rw_lock_exclusive(&pkt_mnglr_lck_rw);
483 
484 	if (kcunit > MAX_PACKET_MANGLER) {
485 		PKT_MNGLR_LOG(LOG_ERR, "kcunit %u > MAX_PACKET_MANGLER (%d)",
486 		    kcunit, MAX_PACKET_MANGLER);
487 		error = EINVAL;
488 		goto done;
489 	}
490 	if (p_pkt_mnglr != (void *)packet_manglers[kcunit - 1]) {
491 		PKT_MNGLR_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
492 		    kcunit);
493 		error = EINVAL;
494 		goto done;
495 	}
496 	switch (opt) {
497 	case PKT_MNGLR_OPT_PROTO_ACT_MASK:
498 		if (len < sizeof(uint32_t)) {
499 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_PROTO_ACT_MASK "
500 			    "len too small %lu", len);
501 			error = EINVAL;
502 			goto done;
503 		}
504 		if (p_pkt_mnglr->proto_action_mask != 0) {
505 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_PROTO_ACT_MASK "
506 			    "already set %u",
507 			    p_pkt_mnglr->proto_action_mask);
508 			error = EINVAL;
509 			goto done;
510 		}
511 		p_pkt_mnglr->proto_action_mask = *(uint32_t *)data;
512 		PKT_MNGLR_LOG(LOG_INFO, "p_pkt_mnglr->proto_action_mask set to :%d", p_pkt_mnglr->proto_action_mask);
513 		break;
514 	case PKT_MNGLR_OPT_IP_ACT_MASK:
515 		if (len < sizeof(uint32_t)) {
516 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_IP_ACT_MASK "
517 			    "len too small %lu", len);
518 			error = EINVAL;
519 			goto done;
520 		}
521 		if (p_pkt_mnglr->ip_action_mask != 0) {
522 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_IP_ACT_MASK "
523 			    "already set %u",
524 			    p_pkt_mnglr->ip_action_mask);
525 			error = EINVAL;
526 			goto done;
527 		}
528 		p_pkt_mnglr->ip_action_mask = *(uint32_t *)data;
529 		break;
530 	case PKT_MNGLR_OPT_LOCAL_IP:
531 		if (len < sizeof(struct sockaddr_storage)) {
532 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_LOCAL_IP "
533 			    "len too small %lu", len);
534 			error = EINVAL;
535 			goto done;
536 		}
537 		if (p_pkt_mnglr->lsaddr.ss_family) {
538 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_LOCAL_IP "
539 			    "already set");
540 			error = EINVAL;
541 			goto done;
542 		}
543 		p_pkt_mnglr->lsaddr = *(struct sockaddr_storage *)data;
544 		break;
545 	case PKT_MNGLR_OPT_REMOTE_IP:
546 		if (len < sizeof(struct sockaddr_storage)) {
547 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_REMOTE_IP "
548 			    "len too small %lu", len);
549 			error = EINVAL;
550 			goto done;
551 		}
552 		if (p_pkt_mnglr->rsaddr.ss_family) {
553 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_REMOTE_IP "
554 			    "already set");
555 			error = EINVAL;
556 			goto done;
557 		}
558 
559 		p_pkt_mnglr->rsaddr = *(struct sockaddr_storage *)data;
560 		PKT_MNGLR_LOG(LOG_INFO,
561 		    "Remote IP registered for address family: %d",
562 		    p_pkt_mnglr->rsaddr.ss_family);
563 		break;
564 	case PKT_MNGLR_OPT_LOCAL_PORT:
565 		if (len < sizeof(uint16_t)) {
566 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_LOCAL_PORT "
567 			    "len too small %lu", len);
568 			error = EINVAL;
569 			goto done;
570 		}
571 		if (p_pkt_mnglr->lport != 0) {
572 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_LOCAL_PORT "
573 			    "already set %d",
574 			    p_pkt_mnglr->lport);
575 			error = EINVAL;
576 			goto done;
577 		}
578 		p_pkt_mnglr->lport = *(uint16_t *)data;
579 		break;
580 	case PKT_MNGLR_OPT_REMOTE_PORT:
581 		if (len < sizeof(uint16_t)) {
582 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_REMOTE_PORT "
583 			    "len too small %lu", len);
584 			error = EINVAL;
585 			goto done;
586 		}
587 		if (p_pkt_mnglr->rport != 0) {
588 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_REMOTE_PORT "
589 			    "already set %d",
590 			    p_pkt_mnglr->rport);
591 			error = EINVAL;
592 			goto done;
593 		}
594 		p_pkt_mnglr->rport = *(uint16_t *)data;
595 		break;
596 	case PKT_MNGLR_OPT_DIRECTION:
597 		if (len < sizeof(uint32_t)) {
598 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_DIRECTION "
599 			    "len too small %lu", len);
600 			error = EINVAL;
601 			goto done;
602 		}
603 		if (p_pkt_mnglr->dir != 0) {
604 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_DIRECTION "
605 			    "already set %u",
606 			    p_pkt_mnglr->dir);
607 			error = EINVAL;
608 			goto done;
609 		}
610 		p_pkt_mnglr->dir = *(uint32_t *)data;
611 		break;
612 	case PKT_MNGLR_OPT_PROTOCOL:
613 		if (len < sizeof(uint32_t)) {
614 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_PROTOCOL "
615 			    "len too small %lu", len);
616 			error = EINVAL;
617 			goto done;
618 		}
619 		if (p_pkt_mnglr->proto != 0) {
620 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_PROTOCOL "
621 			    "already set %u",
622 			    p_pkt_mnglr->proto);
623 			error = EINVAL;
624 			goto done;
625 		}
626 		p_pkt_mnglr->proto = *(uint32_t *)data;
627 		break;
628 	case PKT_MNGLR_OPT_ACTIVATE:
629 		if (len < sizeof(uint8_t)) {
630 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_ACTIVATE "
631 			    "len too small %lu", len);
632 			error = EINVAL;
633 			goto done;
634 		}
635 		if (p_pkt_mnglr->activate != 0) {
636 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_ACTIVATE "
637 			    "already set %u",
638 			    p_pkt_mnglr->activate);
639 			error = EINVAL;
640 			goto done;
641 		}
642 		p_pkt_mnglr->activate = *(uint8_t *)data;
643 		PKT_MNGLR_LOG(LOG_ERR, "p_pkt_mnglr->activate set to :%d",
644 		    p_pkt_mnglr->activate);
645 		break;
646 	default:
647 		error = ENOPROTOOPT;
648 		break;
649 	}
650 done:
651 	pkt_mnglr_rw_unlock_exclusive(&pkt_mnglr_lck_rw);
652 
653 	return error;
654 }
655 
656 void
pkt_mnglr_init(void)657 pkt_mnglr_init(void)
658 {
659 	struct kern_ctl_reg kern_ctl;
660 	errno_t error = 0;
661 
662 	PKT_MNGLR_LOG(LOG_NOTICE, "");
663 
664 	/*
665 	 * Compile time verifications
666 	 */
667 	_CASSERT(PKT_MNGLR_MAX_FILTER_COUNT == MAX_PACKET_MANGLER);
668 
669 	/*
670 	 * Register kernel control
671 	 */
672 	bzero(&kern_ctl, sizeof(kern_ctl));
673 	strlcpy(kern_ctl.ctl_name, PACKET_MANGLER_CONTROL_NAME,
674 	    sizeof(kern_ctl.ctl_name));
675 	kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_EXTENDED;
676 	kern_ctl.ctl_connect = pkt_mnglr_ctl_connect;
677 	kern_ctl.ctl_disconnect = pkt_mnglr_ctl_disconnect;
678 	kern_ctl.ctl_getopt = pkt_mnglr_ctl_getopt;
679 	kern_ctl.ctl_setopt = pkt_mnglr_ctl_setopt;
680 	error = ctl_register(&kern_ctl, &pkt_mnglr_kctlref);
681 	if (error != 0) {
682 		PKT_MNGLR_LOG(LOG_ERR, "ctl_register failed: %d", error);
683 	} else {
684 		PKT_MNGLR_LOG(LOG_INFO, "Registered packet mangler kernel control.");
685 	}
686 }
687 
688 static errno_t
pktmnglr_ipfilter_output(void * cookie,mbuf_t * data,ipf_pktopts_t options)689 pktmnglr_ipfilter_output(void *cookie, mbuf_t *data, ipf_pktopts_t options)
690 {
691 	struct packet_mangler *p_pkt_mnglr = (struct packet_mangler *)cookie;
692 	struct ip ip;
693 	struct tcphdr tcp;
694 	int optlen = 0;
695 	errno_t error = 0;
696 
697 #pragma unused(tcp, optlen, options)
698 	if (p_pkt_mnglr == NULL) {
699 		goto output_done;
700 	}
701 
702 	if (!p_pkt_mnglr->activate) {
703 		goto output_done;
704 	}
705 
706 	if (p_pkt_mnglr->dir == IN) {
707 		goto output_done;
708 	}
709 
710 	if (data == NULL) {
711 		PKT_MNGLR_LOG(LOG_ERR, "Data pointer is NULL");
712 		goto output_done;
713 	}
714 
715 	/* Check for IP filter options */
716 	error = mbuf_copydata(*data, 0, sizeof(ip), &ip);
717 	if (error) {
718 		PKT_MNGLR_LOG(LOG_ERR, "Could not make local IP header copy");
719 		goto output_done;
720 	}
721 
722 	if ((p_pkt_mnglr->lsaddr.ss_family == AF_INET6) && (ip.ip_v == 4)) {
723 		goto output_done;
724 	}
725 
726 	if ((p_pkt_mnglr->lsaddr.ss_family == AF_INET) && (ip.ip_v == 6)) {
727 		goto output_done;
728 	}
729 
730 	if (p_pkt_mnglr->lsaddr.ss_family == AF_INET) {
731 		struct sockaddr_in laddr = *SIN(&p_pkt_mnglr->lsaddr);
732 		if (ip.ip_src.s_addr != laddr.sin_addr.s_addr) {
733 			goto output_done;
734 		}
735 	}
736 
737 	if (p_pkt_mnglr->rsaddr.ss_family == AF_INET) {
738 		struct sockaddr_in raddr = *SIN(&p_pkt_mnglr->rsaddr);
739 		if (ip.ip_dst.s_addr != raddr.sin_addr.s_addr) {
740 			goto output_done;
741 		}
742 	}
743 
744 	if (ip.ip_v != 4) {
745 		PKT_MNGLR_LOG(LOG_INFO,
746 		    "%s:%d Not handling IP version %d\n",
747 		    __func__, __LINE__, ip.ip_v);
748 		goto output_done;
749 	}
750 
751 output_done:
752 	/* Not handling output flow */
753 	return 0;
754 }
755 
756 #define TCP_MAX_OPTLEN  40
757 
758 static errno_t
pktmnglr_ipfilter_input(void * cookie,mbuf_t * data,int offset,u_int8_t protocol)759 pktmnglr_ipfilter_input(void *cookie, mbuf_t *data, int offset, u_int8_t protocol)
760 {
761 	struct packet_mangler *p_pkt_mnglr = (struct packet_mangler *)cookie;
762 	struct ip6_hdr ip6;
763 	struct ip ip;
764 	struct tcphdr tcp;
765 	size_t ip_pld_len;
766 	errno_t error = 0;
767 
768 	if (p_pkt_mnglr == NULL) {
769 		PKT_MNGLR_LOG(LOG_ERR, "p_pkt_mnglr is NULL");
770 		goto input_done;
771 	}
772 
773 	if (p_pkt_mnglr->activate == 0) {
774 		PKT_MNGLR_LOG(LOG_INFO, "p_pkt_mnglr not yet activated");
775 		goto input_done;
776 	}
777 
778 	if (p_pkt_mnglr->dir == OUT) {
779 		goto input_done;
780 	}
781 
782 	if (data == NULL) {
783 		PKT_MNGLR_LOG(LOG_ERR, "Data pointer is NULL");
784 		goto input_done;
785 	}
786 
787 	/* Check for IP filter options */
788 	error = mbuf_copydata(*data, 0, sizeof(ip), &ip);
789 	if (error) {
790 		PKT_MNGLR_LOG(LOG_ERR, "Could not make local IP header copy");
791 		goto input_done;
792 	}
793 
794 	if (ip.ip_v == 6) {
795 		error = mbuf_copydata(*data, 0, sizeof(ip6), &ip6);
796 		if (error) {
797 			PKT_MNGLR_LOG(LOG_ERR, "Could not make local IPv6 header copy");
798 			goto input_done;
799 		}
800 	}
801 
802 	if ((p_pkt_mnglr->lsaddr.ss_family == AF_INET6) && (ip.ip_v == 4)) {
803 		PKT_MNGLR_LOG(LOG_INFO, "Skipping filtering as address family of packet is IPv4 but local "
804 		    "address is set to IPv6");
805 		goto input_done;
806 	}
807 
808 	if ((p_pkt_mnglr->lsaddr.ss_family == AF_INET) && (ip.ip_v == 6)) {
809 		PKT_MNGLR_LOG(LOG_INFO, "Skipping filtering as address family "
810 		    "of packet is IPv6 but local address is set to IPv4");
811 		goto input_done;
812 	}
813 
814 	if (p_pkt_mnglr->lsaddr.ss_family == AF_INET) {
815 		struct sockaddr_in laddr = *SIN(&p_pkt_mnglr->lsaddr);
816 		if (ip.ip_dst.s_addr != laddr.sin_addr.s_addr) {
817 			goto input_done;
818 		}
819 	} else if (p_pkt_mnglr->lsaddr.ss_family == AF_INET6) {
820 		struct sockaddr_in6 laddr = *SIN6(&p_pkt_mnglr->lsaddr);
821 		if (!IN6_ARE_ADDR_EQUAL(&ip6.ip6_dst, &laddr.sin6_addr)) {
822 			goto input_done;
823 		}
824 	}
825 
826 	if (p_pkt_mnglr->rsaddr.ss_family == AF_INET) {
827 		struct sockaddr_in raddr = *SIN(&p_pkt_mnglr->rsaddr);
828 		if (ip.ip_src.s_addr != raddr.sin_addr.s_addr) {
829 			goto input_done;
830 		}
831 		PKT_MNGLR_LOG(LOG_INFO, "Remote IP: %x Source IP: %x in input path",
832 		    raddr.sin_addr.s_addr,
833 		    ip.ip_src.s_addr);
834 	} else if (p_pkt_mnglr->rsaddr.ss_family == AF_INET6) {
835 		struct sockaddr_in6 raddr = *SIN6(&p_pkt_mnglr->rsaddr);
836 		if (!IN6_ARE_ADDR_EQUAL(&ip6.ip6_src, &raddr.sin6_addr)) {
837 			goto input_done;
838 		}
839 	}
840 
841 	if (ip.ip_v == 4) {
842 		ip_pld_len = ntohs(ip.ip_len) - (ip.ip_hl << 2);
843 	} else if (ip.ip_v == 6) {
844 		if (ip6.ip6_nxt != p_pkt_mnglr->proto) {
845 			/* Don't support IPv6 extension headers */
846 			goto input_done;
847 		}
848 		ip_pld_len = ntohs(ip6.ip6_plen);
849 	} else {
850 		goto input_done;
851 	}
852 
853 
854 	if (protocol != p_pkt_mnglr->proto) {
855 		PKT_MNGLR_LOG(LOG_INFO, "Skip: Protocol mismatch");
856 		goto input_done;
857 	}
858 
859 	switch (protocol) {
860 	case IPPROTO_TCP:
861 		if (ip_pld_len < sizeof(tcp)) {
862 			PKT_MNGLR_LOG(LOG_ERR, "IP total len not big enough for TCP: %zu", ip_pld_len);
863 			goto drop_it;
864 		}
865 
866 		error = mbuf_copydata(*data, (size_t)offset, sizeof(tcp), &tcp);
867 		if (error) {
868 			PKT_MNGLR_LOG(LOG_ERR, "Could not make local TCP header copy");
869 			goto input_done;
870 		}
871 
872 		if (p_pkt_mnglr->lport && (p_pkt_mnglr->lport != tcp.th_dport)) {
873 			PKT_MNGLR_LOG(LOG_INFO, "Local port and IP des port do not match");
874 			goto input_done;
875 		}
876 
877 		if (p_pkt_mnglr->rport && (p_pkt_mnglr->rport != tcp.th_sport)) {
878 			PKT_MNGLR_LOG(LOG_INFO, "Remote port and IP src port do not match");
879 			goto input_done;
880 		}
881 		break;
882 	case IPPROTO_UDP:
883 		goto input_done;
884 	case IPPROTO_ICMP:
885 		goto input_done;
886 	case IPPROTO_ICMPV6:
887 		goto input_done;
888 	default:
889 		goto input_done;
890 	}
891 
892 	/* XXX Do IP actions here */
893 	PKT_MNGLR_LOG(LOG_INFO, "Proceeding with packet mangler actions on the packet");
894 
895 	/* Protocol actions */
896 	switch (protocol) {
897 	case IPPROTO_TCP:
898 		if (p_pkt_mnglr->proto_action_mask) {
899 			unsigned char tcp_opt_buf[TCP_MAX_OPTLEN] = {0};
900 			size_t orig_tcp_optlen;
901 			size_t tcp_optlen = 0;
902 			size_t i = 0, off;
903 
904 			off = (tcp.th_off << 2);
905 
906 			if (off < sizeof(struct tcphdr) || off > ip_pld_len) {
907 				PKT_MNGLR_LOG(LOG_ERR, "TCP header offset is wrong: %zu", off);
908 				goto drop_it;
909 			}
910 
911 			tcp_optlen = off - sizeof(struct tcphdr);
912 
913 			PKT_MNGLR_LOG(LOG_INFO, "Packet from F5 is TCP\n");
914 			PKT_MNGLR_LOG(LOG_INFO, "Optlen: %zu\n", tcp_optlen);
915 			orig_tcp_optlen = tcp_optlen;
916 			if (orig_tcp_optlen) {
917 				error = mbuf_copydata(*data, (size_t)offset + sizeof(struct tcphdr), orig_tcp_optlen, tcp_opt_buf);
918 				if (error) {
919 					PKT_MNGLR_LOG(LOG_ERR, "Failed to copy tcp options: error %d offset %d optlen %zu", error, offset, orig_tcp_optlen);
920 					goto input_done;
921 				}
922 			}
923 
924 			while (tcp_optlen > 0) {
925 				if (tcp_opt_buf[i] == 0x1) {
926 					PKT_MNGLR_LOG(LOG_INFO, "Skipping NOP\n");
927 					tcp_optlen--;
928 					i++;
929 					continue;
930 				} else if ((tcp_opt_buf[i] != 0) && (tcp_opt_buf[i] != TCP_OPT_MULTIPATH_TCP)) {
931 					unsigned char optlen;
932 
933 					PKT_MNGLR_LOG(LOG_INFO, "Skipping option %x\n", tcp_opt_buf[i]);
934 
935 					if (tcp_optlen < 2) {
936 						PKT_MNGLR_LOG(LOG_ERR, "Received short TCP option");
937 						goto drop_it;
938 					}
939 
940 					/* Minimum TCP option size is 2 */
941 					optlen = tcp_opt_buf[i + 1];
942 					if (optlen < 2 || optlen > tcp_optlen) {
943 						PKT_MNGLR_LOG(LOG_ERR, "Received suspicious TCP option");
944 						goto drop_it;
945 					}
946 					tcp_optlen -= optlen;
947 					i += optlen;
948 					continue;
949 				} else if (tcp_opt_buf[i] == TCP_OPT_MULTIPATH_TCP) {
950 					size_t j = 0;
951 					unsigned char mptcpoptlen;
952 					uint8_t sbtver;
953 					uint8_t subtype;
954 
955 					if (tcp_optlen < 3) {
956 						PKT_MNGLR_LOG(LOG_ERR, "Received short MPTCP option");
957 						goto drop_it;
958 					}
959 
960 					/* Minimum MPTCP option size is 3 */
961 					mptcpoptlen = tcp_opt_buf[i + 1];
962 					if (mptcpoptlen < 3 || mptcpoptlen > tcp_optlen) {
963 						PKT_MNGLR_LOG(LOG_ERR, "Received suspicious MPTCP option");
964 						goto drop_it;
965 					}
966 
967 					sbtver = tcp_opt_buf[i + MPTCP_SBT_VER_OFFSET];
968 					subtype = sbtver >> 4;
969 
970 					PKT_MNGLR_LOG(LOG_INFO, "Got MPTCP option %x\n", tcp_opt_buf[i]);
971 					PKT_MNGLR_LOG(LOG_INFO, "Got MPTCP subtype %x\n", subtype);
972 					if (subtype == MPO_DSS) {
973 						PKT_MNGLR_LOG(LOG_INFO, "Got DSS option\n");
974 						PKT_MNGLR_LOG(LOG_INFO, "Protocol option mask: %d\n", p_pkt_mnglr->proto_action_mask);
975 						if (p_pkt_mnglr->proto_action_mask &
976 						    PKT_MNGLR_TCP_ACT_DSS_DROP) {
977 							goto drop_it;
978 						}
979 					}
980 
981 					PKT_MNGLR_LOG(LOG_INFO, "Got MPTCP option %x\n", tcp_opt_buf[i]);
982 					for (; j < mptcpoptlen && j < tcp_optlen; j++) {
983 						if (p_pkt_mnglr->proto_action_mask &
984 						    PKT_MNGLR_TCP_ACT_NOP_MPTCP) {
985 							tcp_opt_buf[i + j] = 0x1;
986 						}
987 					}
988 					tcp_optlen -= mptcpoptlen;
989 					i += mptcpoptlen;
990 				} else {
991 					tcp_optlen--;
992 					i++;
993 				}
994 			}
995 
996 			if (orig_tcp_optlen) {
997 				error = mbuf_copyback(*data,
998 				    (size_t)offset + sizeof(struct tcphdr),
999 				    orig_tcp_optlen, tcp_opt_buf, MBUF_WAITOK);
1000 
1001 				if (error) {
1002 					PKT_MNGLR_LOG(LOG_ERR,
1003 					    "Failed to copy tcp options back: error %d offset %d optlen %zu",
1004 					    error, offset, orig_tcp_optlen);
1005 					goto input_done;
1006 				}
1007 			}
1008 		}
1009 		break;
1010 	case IPPROTO_UDP:
1011 		/* Don't handle UDP */
1012 		break;
1013 	case IPPROTO_ICMP:
1014 		break;
1015 	case IPPROTO_ICMPV6:
1016 		break;
1017 	default:
1018 		break;
1019 	}
1020 	chksm_update(*data);
1021 input_done:
1022 	return 0;
1023 
1024 drop_it:
1025 	PKT_MNGLR_LOG(LOG_INFO, "Dropping packet\n");
1026 	mbuf_freem(*data);
1027 	return EJUSTRETURN;
1028 }
1029 
1030 static void
pktmnglr_ipfilter_detach(void * cookie)1031 pktmnglr_ipfilter_detach(void *cookie)
1032 {
1033 #pragma unused(cookie)
1034 	return;
1035 }
1036 
1037 /* XXX Still need to modify this to use mbuf_copy* macros */
1038 static void
chksm_update(mbuf_t data)1039 chksm_update(mbuf_t data)
1040 {
1041 	u_int16_t ip_sum;
1042 	u_int16_t tsum;
1043 	struct tcphdr *tcp;
1044 	errno_t err;
1045 
1046 	unsigned char *ptr = mtod(data, unsigned char *);
1047 	struct ip *ip = (struct ip *)(void *)ptr;
1048 	if (ip->ip_v != 4) {
1049 		return;
1050 	}
1051 
1052 	ip->ip_sum = 0;
1053 	err = mbuf_inet_cksum(data, 0, 0, ip->ip_hl << 2, &ip_sum); // ip sum
1054 	if (err == 0) {
1055 		ip->ip_sum = ip_sum;
1056 	}
1057 	switch (ip->ip_p) {
1058 	case IPPROTO_TCP:
1059 		tcp = (struct tcphdr *)(void *)(ptr + (ip->ip_hl << 2));
1060 		tcp->th_sum = 0;
1061 		err = mbuf_inet_cksum(data, IPPROTO_TCP, ip->ip_hl << 2,
1062 		        ntohs(ip->ip_len) - (ip->ip_hl << 2), &tsum);
1063 		if (err == 0) {
1064 			tcp->th_sum = tsum;
1065 		}
1066 		break;
1067 	case IPPROTO_UDP:
1068 		/* Don't handle UDP */
1069 		break;
1070 	case IPPROTO_ICMP:
1071 		break;
1072 	case IPPROTO_ICMPV6:
1073 		break;
1074 	default:
1075 		break;
1076 	}
1077 
1078 	mbuf_clear_csum_performed(data);
1079 	return;
1080 }
1081