xref: /xnu-10002.81.5/bsd/net/packet_mangler.c (revision 5e3eaea39dcf651e66cb99ba7d70e32cc4a99587)
1 /*
2  * Copyright (c) 2015-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. Please obtain a copy of the License at
10  * http: www.opensource.apple.com/apsl/ and read it before using this
11  * file.
12  *
13  * The Original Code and all software distributed under the License are
14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18  * Please see the License for the specific language governing rights and
19  * limitations under the License.
20  *
21  * @APPLE_LICENSE_HEADER_END@
22  */
23 
24 /*
25  * THEORY OF OPERATION
26  *
27  * The packet mangler subsystem provides a limited way for user space
28  * applications to apply certain actions on certain flows.
29  *
30  * A user space applications opens a kernel control socket with the name
31  * PACKET_MANGLER_CONTROL_NAME to attach to the packet mangler subsystem.
32  * When connected, a "struct packet_mangler" is created and set as the
33  * "unitinfo" of the corresponding kernel control socket instance.
34  * Connect call for packet mangler's kernel control socket also registers
35  * ip filers with cookie set to the packet_mangler instance.
36  * The ip filters are removed when control socket is disconnected.
37  */
38 #include <sys/types.h>
39 #include <sys/kern_control.h>
40 #include <sys/domain.h>
41 #include <sys/protosw.h>
42 #include <sys/syslog.h>
43 
44 #include <kern/locks.h>
45 #include <kern/zalloc.h>
46 #include <kern/debug.h>
47 
48 #include <net/packet_mangler.h>
49 
50 #include <netinet/mptcp.h>
51 #include <netinet/tcp.h>
52 #include <netinet/tcp_var.h>
53 #include <netinet/ip.h>
54 #include <netinet/ip6.h>
55 #include <netinet/kpi_ipfilter.h>
56 #include <string.h>
57 #include <libkern/libkern.h>
58 
59 #define MAX_PACKET_MANGLER                      1
60 
61 #define PKT_MNGLR_FLG_IPFILTER_ATTACHED         0x00000001
62 
63 SYSCTL_NODE(_net, OID_AUTO, pktmnglr, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "pktmnglr");
64 SYSCTL_INT(_net_pktmnglr, OID_AUTO, log, CTLFLAG_RW | CTLFLAG_LOCKED,
65     &pkt_mnglr_log_level, 0, "");
66 /*
67  * The structure packet_mangler represents a user space packet filter
68  * It's created and associated with a kernel control socket instance
69  */
70 struct packet_mangler {
71 	kern_ctl_ref                    pkt_mnglr_kcref;
72 	uint32_t                        pkt_mnglr_kcunit;
73 	uint32_t                        pkt_mnglr_flags;
74 	/* IP filter related params */
75 	ipfilter_t                      pkt_mnglr_ipfref;
76 	ipfilter_t                      pkt_mnglr_ipfrefv6;
77 	struct ipf_filter               pkt_mnglr_ipfilter;
78 
79 	/* Options */
80 	uint8_t                         activate;
81 	Pkt_Mnglr_Flow                  dir;
82 	struct sockaddr_storage         lsaddr;
83 	struct sockaddr_storage         rsaddr;
84 	struct sockaddr_storage         swap_lsaddr;
85 	struct sockaddr_storage         swap_rsaddr;
86 	uint32_t                        ip_action_mask;
87 	uint16_t                        lport;
88 	uint16_t                        rport;
89 	uint32_t                        proto;
90 	uint32_t                        proto_action_mask;
91 };
92 
93 /* Array of all the packet mangler instancesi */
94 struct packet_mangler *packet_manglers[MAX_PACKET_MANGLER];
95 
96 uint32_t pkt_mnglr_active_count = 0;    /* Number of active packet filters */
97 uint32_t pkt_mnglr_close_wait_timeout = 1000; /* in milliseconds */
98 
99 static kern_ctl_ref pkt_mnglr_kctlref = NULL;
100 
101 /* The lock below protects packet_manglers DS, packet_mangler DS */
102 static LCK_GRP_DECLARE(pkt_mnglr_lck_grp, "packet mangler");
103 static LCK_RW_DECLARE(pkt_mnglr_lck_rw, &pkt_mnglr_lck_grp);
104 
105 #define PKT_MNGLR_RW_LCK_MAX    8
106 
107 int pkt_mnglr_rw_nxt_lck = 0;
108 void* pkt_mnglr_rw_lock_history[PKT_MNGLR_RW_LCK_MAX];
109 
110 int pkt_mnglr_rw_nxt_unlck = 0;
111 void* pkt_mnglr_rw_unlock_history[PKT_MNGLR_RW_LCK_MAX];
112 
113 static KALLOC_TYPE_DEFINE(packet_mangler_zone, struct packet_mangler, NET_KT_DEFAULT);
114 
115 /*
116  * For troubleshooting
117  */
118 int pkt_mnglr_log_level = LOG_ERR;
119 int pkt_mnglr_debug = 1;
120 
121 /*
122  * Forward declaration to appease the compiler
123  */
124 static void pkt_mnglr_rw_lock_exclusive(lck_rw_t *);
125 static void pkt_mnglr_rw_unlock_exclusive(lck_rw_t *);
126 static void pkt_mnglr_rw_lock_shared(lck_rw_t *);
127 static void pkt_mnglr_rw_unlock_shared(lck_rw_t *);
128 
129 static errno_t pktmnglr_ipfilter_output(void *cookie, mbuf_t *data,
130     ipf_pktopts_t options);
131 static errno_t pktmnglr_ipfilter_input(void *cookie, mbuf_t *data,
132     int offset, u_int8_t protocol);
133 static void pktmnglr_ipfilter_detach(void *cookie);
134 
135 static void chksm_update(mbuf_t data);
136 
137 #define TCP_OPT_MULTIPATH_TCP   30
138 #define MPTCP_SBT_VER_OFFSET    2
139 
140 /*
141  * packet filter global read write lock
142  */
143 
144 static void
pkt_mnglr_rw_lock_exclusive(lck_rw_t * lck)145 pkt_mnglr_rw_lock_exclusive(lck_rw_t *lck)
146 {
147 	void *lr_saved;
148 
149 	lr_saved = __builtin_return_address(0);
150 
151 	lck_rw_lock_exclusive(lck);
152 
153 	pkt_mnglr_rw_lock_history[pkt_mnglr_rw_nxt_lck] = lr_saved;
154 	pkt_mnglr_rw_nxt_lck =
155 	    (pkt_mnglr_rw_nxt_lck + 1) % PKT_MNGLR_RW_LCK_MAX;
156 }
157 
158 static void
pkt_mnglr_rw_unlock_exclusive(lck_rw_t * lck)159 pkt_mnglr_rw_unlock_exclusive(lck_rw_t *lck)
160 {
161 	void *lr_saved;
162 
163 	lr_saved = __builtin_return_address(0);
164 
165 	lck_rw_unlock_exclusive(lck);
166 
167 	pkt_mnglr_rw_unlock_history[pkt_mnglr_rw_nxt_unlck] =
168 	    lr_saved;
169 	pkt_mnglr_rw_nxt_unlck = (pkt_mnglr_rw_nxt_unlck + 1) % PKT_MNGLR_RW_LCK_MAX;
170 }
171 
172 static void
pkt_mnglr_rw_lock_shared(lck_rw_t * lck)173 pkt_mnglr_rw_lock_shared(lck_rw_t *lck)
174 {
175 	void *lr_saved;
176 
177 	lr_saved = __builtin_return_address(0);
178 
179 	lck_rw_lock_shared(lck);
180 
181 	pkt_mnglr_rw_lock_history[pkt_mnglr_rw_nxt_lck] = lr_saved;
182 	pkt_mnglr_rw_nxt_lck = (pkt_mnglr_rw_nxt_lck + 1) % PKT_MNGLR_RW_LCK_MAX;
183 }
184 
185 static void
pkt_mnglr_rw_unlock_shared(lck_rw_t * lck)186 pkt_mnglr_rw_unlock_shared(lck_rw_t *lck)
187 {
188 	void *lr_saved;
189 
190 	lr_saved = __builtin_return_address(0);
191 
192 	lck_rw_unlock_shared(lck);
193 
194 	pkt_mnglr_rw_unlock_history[pkt_mnglr_rw_nxt_unlck] = lr_saved;
195 	pkt_mnglr_rw_nxt_unlck = (pkt_mnglr_rw_nxt_unlck + 1) % PKT_MNGLR_RW_LCK_MAX;
196 }
197 
198 /*
199  * Packet Mangler's Kernel control socket callbacks
200  */
201 static errno_t
pkt_mnglr_ctl_connect(kern_ctl_ref kctlref,struct sockaddr_ctl * sac,void ** unitinfo)202 pkt_mnglr_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
203     void **unitinfo)
204 {
205 	errno_t error = 0;
206 	struct packet_mangler *p_pkt_mnglr = NULL;
207 
208 	PKT_MNGLR_LOG(LOG_NOTICE, "Connecting packet mangler filter.");
209 
210 	if (sac->sc_unit == 0 || sac->sc_unit > MAX_PACKET_MANGLER) {
211 		PKT_MNGLR_LOG(LOG_ERR, "bad sc_unit %u", sac->sc_unit);
212 		error = EINVAL;
213 		goto fail;
214 	}
215 
216 	p_pkt_mnglr = zalloc_flags(packet_mangler_zone,
217 	    Z_WAITOK | Z_ZERO | Z_NOFAIL);
218 
219 	pkt_mnglr_rw_lock_exclusive(&pkt_mnglr_lck_rw);
220 
221 	if (packet_manglers[sac->sc_unit - 1] != NULL) {
222 		PKT_MNGLR_LOG(LOG_ERR, "sc_unit %u in use", sac->sc_unit);
223 		error = EADDRINUSE;
224 		pkt_mnglr_rw_unlock_exclusive(&pkt_mnglr_lck_rw);
225 		goto fail_free;
226 	} else {
227 		/*
228 		 * kernel control socket kcunit numbers start at 1
229 		 */
230 		packet_manglers[sac->sc_unit - 1] = p_pkt_mnglr;
231 
232 		p_pkt_mnglr->pkt_mnglr_kcref = kctlref;
233 		p_pkt_mnglr->pkt_mnglr_kcunit = sac->sc_unit;
234 
235 		pkt_mnglr_active_count++;
236 	}
237 
238 	p_pkt_mnglr->pkt_mnglr_ipfilter.cookie = p_pkt_mnglr;
239 	p_pkt_mnglr->pkt_mnglr_ipfilter.name = "com.apple.pktmnglripfilter";
240 	p_pkt_mnglr->pkt_mnglr_ipfilter.ipf_input = pktmnglr_ipfilter_input;
241 	p_pkt_mnglr->pkt_mnglr_ipfilter.ipf_output = pktmnglr_ipfilter_output;
242 	p_pkt_mnglr->pkt_mnglr_ipfilter.ipf_detach = pktmnglr_ipfilter_detach;
243 	error = ipf_addv4(&(p_pkt_mnglr->pkt_mnglr_ipfilter), &(p_pkt_mnglr->pkt_mnglr_ipfref));
244 	if (error) {
245 		PKT_MNGLR_LOG(LOG_ERR, "Could not register packet mangler's IPv4 Filter");
246 		goto fail_locked;
247 	}
248 	error = ipf_addv6(&(p_pkt_mnglr->pkt_mnglr_ipfilter), &(p_pkt_mnglr->pkt_mnglr_ipfrefv6));
249 	if (error) {
250 		ipf_remove(p_pkt_mnglr->pkt_mnglr_ipfref);
251 		PKT_MNGLR_LOG(LOG_ERR, "Could not register packet mangler's IPv6 Filter");
252 		goto fail_locked;
253 	}
254 
255 	PKT_MNGLR_LOG(LOG_INFO, "Registered packet mangler's IP Filters");
256 	p_pkt_mnglr->pkt_mnglr_flags |= PKT_MNGLR_FLG_IPFILTER_ATTACHED;
257 	pkt_mnglr_rw_unlock_exclusive(&pkt_mnglr_lck_rw);
258 
259 	if (error) {
260 fail_locked:
261 		pkt_mnglr_active_count--;
262 
263 		packet_manglers[sac->sc_unit - 1] = NULL;
264 		*unitinfo = NULL;
265 
266 		pkt_mnglr_rw_unlock_exclusive(&pkt_mnglr_lck_rw);
267 
268 fail_free:
269 		zfree(packet_mangler_zone, p_pkt_mnglr);
270 	}
271 
272 fail:
273 	*unitinfo = p_pkt_mnglr;
274 
275 	PKT_MNGLR_LOG(LOG_INFO, "return %d pkt_mnglr_active_count %u kcunit %u",
276 	    error, pkt_mnglr_active_count, sac->sc_unit);
277 
278 	return error;
279 }
280 
281 static errno_t
pkt_mnglr_ctl_disconnect(kern_ctl_ref kctlref,u_int32_t kcunit,void * unitinfo)282 pkt_mnglr_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo)
283 {
284 #pragma unused(kctlref)
285 	errno_t error = 0;
286 	struct packet_mangler *p_pkt_mnglr;
287 
288 	PKT_MNGLR_LOG(LOG_INFO, "Disconnecting packet mangler kernel control");
289 
290 	if (unitinfo == NULL) {
291 		goto done;
292 	}
293 
294 	if (kcunit > MAX_PACKET_MANGLER) {
295 		PKT_MNGLR_LOG(LOG_ERR, "kcunit %u > MAX_PACKET_MANGLER (%d)",
296 		    kcunit, MAX_PACKET_MANGLER);
297 		error = EINVAL;
298 		goto done;
299 	}
300 
301 	p_pkt_mnglr = (struct packet_mangler *)unitinfo;
302 
303 	pkt_mnglr_rw_lock_exclusive(&pkt_mnglr_lck_rw);
304 	if (packet_manglers[kcunit - 1] != p_pkt_mnglr || p_pkt_mnglr->pkt_mnglr_kcunit != kcunit) {
305 		PKT_MNGLR_LOG(LOG_ERR, "bad unit info %u",
306 		    kcunit);
307 		pkt_mnglr_rw_unlock_exclusive(&pkt_mnglr_lck_rw);
308 		goto done;
309 	}
310 
311 	/*
312 	 * Make filter inactive
313 	 */
314 	packet_manglers[kcunit - 1] = NULL;
315 	pkt_mnglr_active_count--;
316 	if (p_pkt_mnglr->pkt_mnglr_flags & PKT_MNGLR_FLG_IPFILTER_ATTACHED) {
317 		(void) ipf_remove(p_pkt_mnglr->pkt_mnglr_ipfref);
318 		(void) ipf_remove(p_pkt_mnglr->pkt_mnglr_ipfrefv6);
319 	}
320 	pkt_mnglr_rw_unlock_exclusive(&pkt_mnglr_lck_rw);
321 	zfree(packet_mangler_zone, p_pkt_mnglr);
322 done:
323 	PKT_MNGLR_LOG(LOG_INFO, "return %d pkt_mnglr_active_count %u kcunit %u",
324 	    error, pkt_mnglr_active_count, kcunit);
325 
326 	return error;
327 }
328 
329 static errno_t
pkt_mnglr_ctl_getopt(kern_ctl_ref kctlref,u_int32_t kcunit,void * unitinfo,int opt,void * data,size_t * len)330 pkt_mnglr_ctl_getopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
331     int opt, void *data, size_t *len)
332 {
333 #pragma unused(kctlref, opt)
334 	errno_t error = 0;
335 	struct packet_mangler *p_pkt_mnglr = (struct packet_mangler *)unitinfo;
336 
337 	PKT_MNGLR_LOG(LOG_NOTICE, "");
338 
339 	pkt_mnglr_rw_lock_shared(&pkt_mnglr_lck_rw);
340 
341 	if (kcunit > MAX_PACKET_MANGLER) {
342 		PKT_MNGLR_LOG(LOG_ERR, "kcunit %u > MAX_PACKET_MANGLER (%d)",
343 		    kcunit, MAX_PACKET_MANGLER);
344 		error = EINVAL;
345 		goto done;
346 	}
347 	if (p_pkt_mnglr != (void *)packet_manglers[kcunit - 1]) {
348 		PKT_MNGLR_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
349 		    kcunit);
350 		error = EINVAL;
351 		goto done;
352 	}
353 	switch (opt) {
354 	case PKT_MNGLR_OPT_PROTO_ACT_MASK:
355 		if (*len < sizeof(uint32_t)) {
356 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_PROTO_ACT_MASK "
357 			    "len too small %lu", *len);
358 			error = EINVAL;
359 			goto done;
360 		}
361 
362 		if (data != NULL) {
363 			*(uint32_t *)data = p_pkt_mnglr->proto_action_mask;
364 		}
365 		break;
366 	case PKT_MNGLR_OPT_IP_ACT_MASK:
367 		if (*len < sizeof(uint32_t)) {
368 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_IP_ACT_MASK "
369 			    "len too small %lu", *len);
370 			error = EINVAL;
371 			goto done;
372 		}
373 
374 		if (data != NULL) {
375 			*(uint32_t *)data = p_pkt_mnglr->ip_action_mask;
376 		}
377 		break;
378 	case PKT_MNGLR_OPT_LOCAL_IP:
379 		if (*len < sizeof(struct sockaddr_storage)) {
380 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_LOCAL_IP "
381 			    "len too small %lu", *len);
382 			error = EINVAL;
383 			goto done;
384 		}
385 
386 		if (data != NULL) {
387 			*(struct sockaddr_storage *)data = p_pkt_mnglr->lsaddr;
388 		}
389 		break;
390 	case PKT_MNGLR_OPT_REMOTE_IP:
391 		if (*len < sizeof(struct sockaddr_storage)) {
392 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_REMOTE_IP "
393 			    "len too small %lu", *len);
394 			error = EINVAL;
395 			goto done;
396 		}
397 
398 		if (data != NULL) {
399 			*(struct sockaddr_storage *)data = p_pkt_mnglr->rsaddr;
400 		}
401 		break;
402 	case PKT_MNGLR_OPT_LOCAL_PORT:
403 		if (*len < sizeof(uint16_t)) {
404 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_LOCAL_PORT "
405 			    "len too small %lu", *len);
406 			error = EINVAL;
407 			goto done;
408 		}
409 
410 		if (data != NULL) {
411 			*(uint16_t *)data = p_pkt_mnglr->lport;
412 		}
413 		break;
414 	case PKT_MNGLR_OPT_REMOTE_PORT:
415 		if (*len < sizeof(uint16_t)) {
416 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_REMOTE_PORT "
417 			    "len too small %lu", *len);
418 			error = EINVAL;
419 			goto done;
420 		}
421 
422 		if (data != NULL) {
423 			*(uint16_t *)data = p_pkt_mnglr->rport;
424 		}
425 		break;
426 	case PKT_MNGLR_OPT_DIRECTION:
427 		if (*len < sizeof(uint32_t)) {
428 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_DIRECTION "
429 			    "len too small %lu", *len);
430 			error = EINVAL;
431 			goto done;
432 		}
433 		if (data != NULL) {
434 			*(uint32_t *)data = p_pkt_mnglr->dir;
435 		}
436 		break;
437 	case PKT_MNGLR_OPT_PROTOCOL:
438 		if (*len < sizeof(uint32_t)) {
439 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_PROTOCOL "
440 			    "len too small %lu", *len);
441 			error = EINVAL;
442 			goto done;
443 		}
444 		if (data != NULL) {
445 			*(uint32_t *)data = p_pkt_mnglr->proto;
446 		}
447 		break;
448 	case PKT_MNGLR_OPT_ACTIVATE:
449 		if (*len < sizeof(uint8_t)) {
450 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_ACTIVATE "
451 			    "len too small %lu", *len);
452 			error = EINVAL;
453 			goto done;
454 		}
455 
456 		if (data != NULL) {
457 			*(uint8_t *)data = p_pkt_mnglr->activate;
458 		}
459 		break;
460 	default:
461 		error = ENOPROTOOPT;
462 		break;
463 	}
464 done:
465 	pkt_mnglr_rw_unlock_shared(&pkt_mnglr_lck_rw);
466 
467 	return error;
468 }
469 
470 static errno_t
pkt_mnglr_ctl_setopt(kern_ctl_ref kctlref,u_int32_t kcunit,void * unitinfo,int opt,void * data,size_t len)471 pkt_mnglr_ctl_setopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
472     int opt, void *data, size_t len)
473 {
474 #pragma unused(kctlref, opt)
475 	errno_t error = 0;
476 	struct packet_mangler *p_pkt_mnglr = (struct packet_mangler *)unitinfo;
477 
478 	PKT_MNGLR_LOG(LOG_NOTICE, "");
479 
480 	pkt_mnglr_rw_lock_exclusive(&pkt_mnglr_lck_rw);
481 
482 	if (kcunit > MAX_PACKET_MANGLER) {
483 		PKT_MNGLR_LOG(LOG_ERR, "kcunit %u > MAX_PACKET_MANGLER (%d)",
484 		    kcunit, MAX_PACKET_MANGLER);
485 		error = EINVAL;
486 		goto done;
487 	}
488 	if (p_pkt_mnglr != (void *)packet_manglers[kcunit - 1]) {
489 		PKT_MNGLR_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
490 		    kcunit);
491 		error = EINVAL;
492 		goto done;
493 	}
494 	switch (opt) {
495 	case PKT_MNGLR_OPT_PROTO_ACT_MASK:
496 		if (len < sizeof(uint32_t)) {
497 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_PROTO_ACT_MASK "
498 			    "len too small %lu", len);
499 			error = EINVAL;
500 			goto done;
501 		}
502 		if (p_pkt_mnglr->proto_action_mask != 0) {
503 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_PROTO_ACT_MASK "
504 			    "already set %u",
505 			    p_pkt_mnglr->proto_action_mask);
506 			error = EINVAL;
507 			goto done;
508 		}
509 		p_pkt_mnglr->proto_action_mask = *(uint32_t *)data;
510 		PKT_MNGLR_LOG(LOG_INFO, "p_pkt_mnglr->proto_action_mask set to :%d", p_pkt_mnglr->proto_action_mask);
511 		break;
512 	case PKT_MNGLR_OPT_IP_ACT_MASK:
513 		if (len < sizeof(uint32_t)) {
514 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_IP_ACT_MASK "
515 			    "len too small %lu", len);
516 			error = EINVAL;
517 			goto done;
518 		}
519 		if (p_pkt_mnglr->ip_action_mask != 0) {
520 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_IP_ACT_MASK "
521 			    "already set %u",
522 			    p_pkt_mnglr->ip_action_mask);
523 			error = EINVAL;
524 			goto done;
525 		}
526 		p_pkt_mnglr->ip_action_mask = *(uint32_t *)data;
527 		break;
528 	case PKT_MNGLR_OPT_LOCAL_IP:
529 		if (len < sizeof(struct sockaddr_storage)) {
530 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_LOCAL_IP "
531 			    "len too small %lu", len);
532 			error = EINVAL;
533 			goto done;
534 		}
535 		if (p_pkt_mnglr->lsaddr.ss_family) {
536 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_LOCAL_IP "
537 			    "already set");
538 			error = EINVAL;
539 			goto done;
540 		}
541 		p_pkt_mnglr->lsaddr = *(struct sockaddr_storage *)data;
542 		break;
543 	case PKT_MNGLR_OPT_REMOTE_IP:
544 		if (len < sizeof(struct sockaddr_storage)) {
545 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_REMOTE_IP "
546 			    "len too small %lu", len);
547 			error = EINVAL;
548 			goto done;
549 		}
550 		if (p_pkt_mnglr->rsaddr.ss_family) {
551 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_REMOTE_IP "
552 			    "already set");
553 			error = EINVAL;
554 			goto done;
555 		}
556 
557 		p_pkt_mnglr->rsaddr = *(struct sockaddr_storage *)data;
558 		PKT_MNGLR_LOG(LOG_INFO,
559 		    "Remote IP registered for address family: %d",
560 		    p_pkt_mnglr->rsaddr.ss_family);
561 		break;
562 	case PKT_MNGLR_OPT_LOCAL_PORT:
563 		if (len < sizeof(uint16_t)) {
564 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_LOCAL_PORT "
565 			    "len too small %lu", len);
566 			error = EINVAL;
567 			goto done;
568 		}
569 		if (p_pkt_mnglr->lport != 0) {
570 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_LOCAL_PORT "
571 			    "already set %d",
572 			    p_pkt_mnglr->lport);
573 			error = EINVAL;
574 			goto done;
575 		}
576 		p_pkt_mnglr->lport = *(uint16_t *)data;
577 		break;
578 	case PKT_MNGLR_OPT_REMOTE_PORT:
579 		if (len < sizeof(uint16_t)) {
580 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_REMOTE_PORT "
581 			    "len too small %lu", len);
582 			error = EINVAL;
583 			goto done;
584 		}
585 		if (p_pkt_mnglr->rport != 0) {
586 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_REMOTE_PORT "
587 			    "already set %d",
588 			    p_pkt_mnglr->rport);
589 			error = EINVAL;
590 			goto done;
591 		}
592 		p_pkt_mnglr->rport = *(uint16_t *)data;
593 		break;
594 	case PKT_MNGLR_OPT_DIRECTION:
595 		if (len < sizeof(uint32_t)) {
596 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_DIRECTION "
597 			    "len too small %lu", len);
598 			error = EINVAL;
599 			goto done;
600 		}
601 		if (p_pkt_mnglr->dir != 0) {
602 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_DIRECTION "
603 			    "already set %u",
604 			    p_pkt_mnglr->dir);
605 			error = EINVAL;
606 			goto done;
607 		}
608 		p_pkt_mnglr->dir = *(uint32_t *)data;
609 		break;
610 	case PKT_MNGLR_OPT_PROTOCOL:
611 		if (len < sizeof(uint32_t)) {
612 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_PROTOCOL "
613 			    "len too small %lu", len);
614 			error = EINVAL;
615 			goto done;
616 		}
617 		if (p_pkt_mnglr->proto != 0) {
618 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_PROTOCOL "
619 			    "already set %u",
620 			    p_pkt_mnglr->proto);
621 			error = EINVAL;
622 			goto done;
623 		}
624 		p_pkt_mnglr->proto = *(uint32_t *)data;
625 		break;
626 	case PKT_MNGLR_OPT_ACTIVATE:
627 		if (len < sizeof(uint8_t)) {
628 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_ACTIVATE "
629 			    "len too small %lu", len);
630 			error = EINVAL;
631 			goto done;
632 		}
633 		if (p_pkt_mnglr->activate != 0) {
634 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_ACTIVATE "
635 			    "already set %u",
636 			    p_pkt_mnglr->activate);
637 			error = EINVAL;
638 			goto done;
639 		}
640 		p_pkt_mnglr->activate = *(uint8_t *)data;
641 		PKT_MNGLR_LOG(LOG_ERR, "p_pkt_mnglr->activate set to :%d",
642 		    p_pkt_mnglr->activate);
643 		break;
644 	default:
645 		error = ENOPROTOOPT;
646 		break;
647 	}
648 done:
649 	pkt_mnglr_rw_unlock_exclusive(&pkt_mnglr_lck_rw);
650 
651 	return error;
652 }
653 
654 void
pkt_mnglr_init(void)655 pkt_mnglr_init(void)
656 {
657 	struct kern_ctl_reg kern_ctl;
658 	errno_t error = 0;
659 
660 	PKT_MNGLR_LOG(LOG_NOTICE, "");
661 
662 	/*
663 	 * Compile time verifications
664 	 */
665 	_CASSERT(PKT_MNGLR_MAX_FILTER_COUNT == MAX_PACKET_MANGLER);
666 
667 	/*
668 	 * Register kernel control
669 	 */
670 	bzero(&kern_ctl, sizeof(kern_ctl));
671 	strlcpy(kern_ctl.ctl_name, PACKET_MANGLER_CONTROL_NAME,
672 	    sizeof(kern_ctl.ctl_name));
673 	kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_EXTENDED;
674 	kern_ctl.ctl_connect = pkt_mnglr_ctl_connect;
675 	kern_ctl.ctl_disconnect = pkt_mnglr_ctl_disconnect;
676 	kern_ctl.ctl_getopt = pkt_mnglr_ctl_getopt;
677 	kern_ctl.ctl_setopt = pkt_mnglr_ctl_setopt;
678 	error = ctl_register(&kern_ctl, &pkt_mnglr_kctlref);
679 	if (error != 0) {
680 		PKT_MNGLR_LOG(LOG_ERR, "ctl_register failed: %d", error);
681 	} else {
682 		PKT_MNGLR_LOG(LOG_INFO, "Registered packet mangler kernel control.");
683 	}
684 }
685 
686 static errno_t
pktmnglr_ipfilter_output(void * cookie,mbuf_t * data,ipf_pktopts_t options)687 pktmnglr_ipfilter_output(void *cookie, mbuf_t *data, ipf_pktopts_t options)
688 {
689 	struct packet_mangler *p_pkt_mnglr = (struct packet_mangler *)cookie;
690 	struct ip ip;
691 	struct tcphdr tcp;
692 	int optlen = 0;
693 	errno_t error = 0;
694 
695 #pragma unused(tcp, optlen, options)
696 	if (p_pkt_mnglr == NULL) {
697 		goto output_done;
698 	}
699 
700 	if (!p_pkt_mnglr->activate) {
701 		goto output_done;
702 	}
703 
704 	if (p_pkt_mnglr->dir == IN) {
705 		goto output_done;
706 	}
707 
708 	if (data == NULL) {
709 		PKT_MNGLR_LOG(LOG_ERR, "Data pointer is NULL");
710 		goto output_done;
711 	}
712 
713 	/* Check for IP filter options */
714 	error = mbuf_copydata(*data, 0, sizeof(ip), &ip);
715 	if (error) {
716 		PKT_MNGLR_LOG(LOG_ERR, "Could not make local IP header copy");
717 		goto output_done;
718 	}
719 
720 	if ((p_pkt_mnglr->lsaddr.ss_family == AF_INET6) && (ip.ip_v == 4)) {
721 		goto output_done;
722 	}
723 
724 	if ((p_pkt_mnglr->lsaddr.ss_family == AF_INET) && (ip.ip_v == 6)) {
725 		goto output_done;
726 	}
727 
728 	if (p_pkt_mnglr->lsaddr.ss_family == AF_INET) {
729 		struct sockaddr_in laddr = *(struct sockaddr_in *)(&(p_pkt_mnglr->lsaddr));
730 		if (ip.ip_src.s_addr != laddr.sin_addr.s_addr) {
731 			goto output_done;
732 		}
733 	}
734 
735 	if (p_pkt_mnglr->rsaddr.ss_family == AF_INET) {
736 		struct sockaddr_in raddr = *(struct sockaddr_in *)(&(p_pkt_mnglr->rsaddr));
737 		if (ip.ip_dst.s_addr != raddr.sin_addr.s_addr) {
738 			goto output_done;
739 		}
740 	}
741 
742 	if (ip.ip_v != 4) {
743 		PKT_MNGLR_LOG(LOG_INFO,
744 		    "%s:%d Not handling IP version %d\n",
745 		    __func__, __LINE__, ip.ip_v);
746 		goto output_done;
747 	}
748 
749 output_done:
750 	/* Not handling output flow */
751 	return 0;
752 }
753 
754 #define TCP_MAX_OPTLEN  40
755 
756 static errno_t
pktmnglr_ipfilter_input(void * cookie,mbuf_t * data,int offset,u_int8_t protocol)757 pktmnglr_ipfilter_input(void *cookie, mbuf_t *data, int offset, u_int8_t protocol)
758 {
759 	struct packet_mangler *p_pkt_mnglr = (struct packet_mangler *)cookie;
760 	struct ip6_hdr ip6;
761 	struct ip ip;
762 	struct tcphdr tcp;
763 	size_t ip_pld_len;
764 	errno_t error = 0;
765 
766 	if (p_pkt_mnglr == NULL) {
767 		PKT_MNGLR_LOG(LOG_ERR, "p_pkt_mnglr is NULL");
768 		goto input_done;
769 	}
770 
771 	if (p_pkt_mnglr->activate == 0) {
772 		PKT_MNGLR_LOG(LOG_INFO, "p_pkt_mnglr not yet activated");
773 		goto input_done;
774 	}
775 
776 	if (p_pkt_mnglr->dir == OUT) {
777 		goto input_done;
778 	}
779 
780 	if (data == NULL) {
781 		PKT_MNGLR_LOG(LOG_ERR, "Data pointer is NULL");
782 		goto input_done;
783 	}
784 
785 	/* Check for IP filter options */
786 	error = mbuf_copydata(*data, 0, sizeof(ip), &ip);
787 	if (error) {
788 		PKT_MNGLR_LOG(LOG_ERR, "Could not make local IP header copy");
789 		goto input_done;
790 	}
791 
792 	if (ip.ip_v == 6) {
793 		error = mbuf_copydata(*data, 0, sizeof(ip6), &ip6);
794 		if (error) {
795 			PKT_MNGLR_LOG(LOG_ERR, "Could not make local IPv6 header copy");
796 			goto input_done;
797 		}
798 	}
799 
800 	if ((p_pkt_mnglr->lsaddr.ss_family == AF_INET6) && (ip.ip_v == 4)) {
801 		PKT_MNGLR_LOG(LOG_INFO, "Skipping filtering as address family of packet is IPv4 but local "
802 		    "address is set to IPv6");
803 		goto input_done;
804 	}
805 
806 	if ((p_pkt_mnglr->lsaddr.ss_family == AF_INET) && (ip.ip_v == 6)) {
807 		PKT_MNGLR_LOG(LOG_INFO, "Skipping filtering as address family "
808 		    "of packet is IPv6 but local address is set to IPv4");
809 		goto input_done;
810 	}
811 
812 	if (p_pkt_mnglr->lsaddr.ss_family == AF_INET) {
813 		struct sockaddr_in laddr = *(struct sockaddr_in *)(&(p_pkt_mnglr->lsaddr));
814 		if (ip.ip_dst.s_addr != laddr.sin_addr.s_addr) {
815 			goto input_done;
816 		}
817 	} else if (p_pkt_mnglr->lsaddr.ss_family == AF_INET6) {
818 		struct sockaddr_in6 laddr = *(struct sockaddr_in6 *)(&(p_pkt_mnglr->lsaddr));
819 		if (!IN6_ARE_ADDR_EQUAL(&ip6.ip6_dst, &laddr.sin6_addr)) {
820 			goto input_done;
821 		}
822 	}
823 
824 	if (p_pkt_mnglr->rsaddr.ss_family == AF_INET) {
825 		struct sockaddr_in raddr = *(struct sockaddr_in *)(&(p_pkt_mnglr->rsaddr));
826 		if (ip.ip_src.s_addr != raddr.sin_addr.s_addr) {
827 			goto input_done;
828 		}
829 		PKT_MNGLR_LOG(LOG_INFO, "Remote IP: %x Source IP: %x in input path",
830 		    raddr.sin_addr.s_addr,
831 		    ip.ip_src.s_addr);
832 	} else if (p_pkt_mnglr->rsaddr.ss_family == AF_INET6) {
833 		struct sockaddr_in6 raddr = *(struct sockaddr_in6 *)(&(p_pkt_mnglr->rsaddr));
834 		if (!IN6_ARE_ADDR_EQUAL(&ip6.ip6_src, &raddr.sin6_addr)) {
835 			goto input_done;
836 		}
837 	}
838 
839 	if (ip.ip_v == 4) {
840 		ip_pld_len = ntohs(ip.ip_len) - (ip.ip_hl << 2);
841 	} else if (ip.ip_v == 6) {
842 		if (ip6.ip6_nxt != p_pkt_mnglr->proto) {
843 			/* Don't support IPv6 extension headers */
844 			goto input_done;
845 		}
846 		ip_pld_len = ntohs(ip6.ip6_plen);
847 	} else {
848 		goto input_done;
849 	}
850 
851 
852 	if (protocol != p_pkt_mnglr->proto) {
853 		PKT_MNGLR_LOG(LOG_INFO, "Skip: Protocol mismatch");
854 		goto input_done;
855 	}
856 
857 	switch (protocol) {
858 	case IPPROTO_TCP:
859 		if (ip_pld_len < sizeof(tcp)) {
860 			PKT_MNGLR_LOG(LOG_ERR, "IP total len not big enough for TCP: %zu", ip_pld_len);
861 			goto drop_it;
862 		}
863 
864 		error = mbuf_copydata(*data, (size_t)offset, sizeof(tcp), &tcp);
865 		if (error) {
866 			PKT_MNGLR_LOG(LOG_ERR, "Could not make local TCP header copy");
867 			goto input_done;
868 		}
869 
870 		if (p_pkt_mnglr->lport && (p_pkt_mnglr->lport != tcp.th_dport)) {
871 			PKT_MNGLR_LOG(LOG_INFO, "Local port and IP des port do not match");
872 			goto input_done;
873 		}
874 
875 		if (p_pkt_mnglr->rport && (p_pkt_mnglr->rport != tcp.th_sport)) {
876 			PKT_MNGLR_LOG(LOG_INFO, "Remote port and IP src port do not match");
877 			goto input_done;
878 		}
879 		break;
880 	case IPPROTO_UDP:
881 		goto input_done;
882 	case IPPROTO_ICMP:
883 		goto input_done;
884 	case IPPROTO_ICMPV6:
885 		goto input_done;
886 	default:
887 		goto input_done;
888 	}
889 
890 	/* XXX Do IP actions here */
891 	PKT_MNGLR_LOG(LOG_INFO, "Proceeding with packet mangler actions on the packet");
892 
893 	/* Protocol actions */
894 	switch (protocol) {
895 	case IPPROTO_TCP:
896 		if (p_pkt_mnglr->proto_action_mask) {
897 			unsigned char tcp_opt_buf[TCP_MAX_OPTLEN] = {0};
898 			size_t orig_tcp_optlen;
899 			size_t tcp_optlen = 0;
900 			size_t i = 0, off;
901 
902 			off = (tcp.th_off << 2);
903 
904 			if (off < sizeof(struct tcphdr) || off > ip_pld_len) {
905 				PKT_MNGLR_LOG(LOG_ERR, "TCP header offset is wrong: %zu", off);
906 				goto drop_it;
907 			}
908 
909 			tcp_optlen = off - sizeof(struct tcphdr);
910 
911 			PKT_MNGLR_LOG(LOG_INFO, "Packet from F5 is TCP\n");
912 			PKT_MNGLR_LOG(LOG_INFO, "Optlen: %zu\n", tcp_optlen);
913 			orig_tcp_optlen = tcp_optlen;
914 			if (orig_tcp_optlen) {
915 				error = mbuf_copydata(*data, (size_t)offset + sizeof(struct tcphdr), orig_tcp_optlen, tcp_opt_buf);
916 				if (error) {
917 					PKT_MNGLR_LOG(LOG_ERR, "Failed to copy tcp options: error %d offset %d optlen %zu", error, offset, orig_tcp_optlen);
918 					goto input_done;
919 				}
920 			}
921 
922 			while (tcp_optlen > 0) {
923 				if (tcp_opt_buf[i] == 0x1) {
924 					PKT_MNGLR_LOG(LOG_INFO, "Skipping NOP\n");
925 					tcp_optlen--;
926 					i++;
927 					continue;
928 				} else if ((tcp_opt_buf[i] != 0) && (tcp_opt_buf[i] != TCP_OPT_MULTIPATH_TCP)) {
929 					unsigned char optlen;
930 
931 					PKT_MNGLR_LOG(LOG_INFO, "Skipping option %x\n", tcp_opt_buf[i]);
932 
933 					if (tcp_optlen < 2) {
934 						PKT_MNGLR_LOG(LOG_ERR, "Received short TCP option");
935 						goto drop_it;
936 					}
937 
938 					/* Minimum TCP option size is 2 */
939 					optlen = tcp_opt_buf[i + 1];
940 					if (optlen < 2 || optlen > tcp_optlen) {
941 						PKT_MNGLR_LOG(LOG_ERR, "Received suspicious TCP option");
942 						goto drop_it;
943 					}
944 					tcp_optlen -= optlen;
945 					i += optlen;
946 					continue;
947 				} else if (tcp_opt_buf[i] == TCP_OPT_MULTIPATH_TCP) {
948 					size_t j = 0;
949 					unsigned char mptcpoptlen;
950 					uint8_t sbtver;
951 					uint8_t subtype;
952 
953 					if (tcp_optlen < 3) {
954 						PKT_MNGLR_LOG(LOG_ERR, "Received short MPTCP option");
955 						goto drop_it;
956 					}
957 
958 					/* Minimum MPTCP option size is 3 */
959 					mptcpoptlen = tcp_opt_buf[i + 1];
960 					if (mptcpoptlen < 3 || mptcpoptlen > tcp_optlen) {
961 						PKT_MNGLR_LOG(LOG_ERR, "Received suspicious MPTCP option");
962 						goto drop_it;
963 					}
964 
965 					sbtver = tcp_opt_buf[i + MPTCP_SBT_VER_OFFSET];
966 					subtype = sbtver >> 4;
967 
968 					PKT_MNGLR_LOG(LOG_INFO, "Got MPTCP option %x\n", tcp_opt_buf[i]);
969 					PKT_MNGLR_LOG(LOG_INFO, "Got MPTCP subtype %x\n", subtype);
970 					if (subtype == MPO_DSS) {
971 						PKT_MNGLR_LOG(LOG_INFO, "Got DSS option\n");
972 						PKT_MNGLR_LOG(LOG_INFO, "Protocol option mask: %d\n", p_pkt_mnglr->proto_action_mask);
973 						if (p_pkt_mnglr->proto_action_mask &
974 						    PKT_MNGLR_TCP_ACT_DSS_DROP) {
975 							goto drop_it;
976 						}
977 					}
978 
979 					PKT_MNGLR_LOG(LOG_INFO, "Got MPTCP option %x\n", tcp_opt_buf[i]);
980 					for (; j < mptcpoptlen && j < tcp_optlen; j++) {
981 						if (p_pkt_mnglr->proto_action_mask &
982 						    PKT_MNGLR_TCP_ACT_NOP_MPTCP) {
983 							tcp_opt_buf[i + j] = 0x1;
984 						}
985 					}
986 					tcp_optlen -= mptcpoptlen;
987 					i += mptcpoptlen;
988 				} else {
989 					tcp_optlen--;
990 					i++;
991 				}
992 			}
993 
994 			if (orig_tcp_optlen) {
995 				error = mbuf_copyback(*data,
996 				    (size_t)offset + sizeof(struct tcphdr),
997 				    orig_tcp_optlen, tcp_opt_buf, MBUF_WAITOK);
998 
999 				if (error) {
1000 					PKT_MNGLR_LOG(LOG_ERR,
1001 					    "Failed to copy tcp options back: error %d offset %d optlen %zu",
1002 					    error, offset, orig_tcp_optlen);
1003 					goto input_done;
1004 				}
1005 			}
1006 		}
1007 		break;
1008 	case IPPROTO_UDP:
1009 		/* Don't handle UDP */
1010 		break;
1011 	case IPPROTO_ICMP:
1012 		break;
1013 	case IPPROTO_ICMPV6:
1014 		break;
1015 	default:
1016 		break;
1017 	}
1018 	chksm_update(*data);
1019 input_done:
1020 	return 0;
1021 
1022 drop_it:
1023 	PKT_MNGLR_LOG(LOG_INFO, "Dropping packet\n");
1024 	mbuf_freem(*data);
1025 	return EJUSTRETURN;
1026 }
1027 
1028 static void
pktmnglr_ipfilter_detach(void * cookie)1029 pktmnglr_ipfilter_detach(void *cookie)
1030 {
1031 #pragma unused(cookie)
1032 	return;
1033 }
1034 
1035 /* XXX Still need to modify this to use mbuf_copy* macros */
1036 static void
chksm_update(mbuf_t data)1037 chksm_update(mbuf_t data)
1038 {
1039 	u_int16_t ip_sum;
1040 	u_int16_t tsum;
1041 	struct tcphdr *tcp;
1042 	errno_t err;
1043 
1044 	unsigned char *ptr = (unsigned char *)mbuf_data(data);
1045 	struct ip *ip = (struct ip *)(void *)ptr;
1046 	if (ip->ip_v != 4) {
1047 		return;
1048 	}
1049 
1050 	ip->ip_sum = 0;
1051 	err = mbuf_inet_cksum(data, 0, 0, ip->ip_hl << 2, &ip_sum); // ip sum
1052 	if (err == 0) {
1053 		ip->ip_sum = ip_sum;
1054 	}
1055 	switch (ip->ip_p) {
1056 	case IPPROTO_TCP:
1057 		tcp = (struct tcphdr *)(void *)(ptr + (ip->ip_hl << 2));
1058 		tcp->th_sum = 0;
1059 		err = mbuf_inet_cksum(data, IPPROTO_TCP, ip->ip_hl << 2,
1060 		        ntohs(ip->ip_len) - (ip->ip_hl << 2), &tsum);
1061 		if (err == 0) {
1062 			tcp->th_sum = tsum;
1063 		}
1064 		break;
1065 	case IPPROTO_UDP:
1066 		/* Don't handle UDP */
1067 		break;
1068 	case IPPROTO_ICMP:
1069 		break;
1070 	case IPPROTO_ICMPV6:
1071 		break;
1072 	default:
1073 		break;
1074 	}
1075 
1076 	mbuf_clear_csum_performed(data);
1077 	return;
1078 }
1079