xref: /xnu-8792.61.2/bsd/net/packet_mangler.c (revision 42e220869062b56f8d7d0726fd4c88954f87902c)
1 /*
2  * Copyright (c) 2015-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. Please obtain a copy of the License at
10  * http: www.opensource.apple.com/apsl/ and read it before using this
11  * file.
12  *
13  * The Original Code and all software distributed under the License are
14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18  * Please see the License for the specific language governing rights and
19  * limitations under the License.
20  *
21  * @APPLE_LICENSE_HEADER_END@
22  */
23 
24 /*
25  * THEORY OF OPERATION
26  *
27  * The packet mangler subsystem provides a limited way for user space
28  * applications to apply certain actions on certain flows.
29  *
30  * A user space applications opens a kernel control socket with the name
31  * PACKET_MANGLER_CONTROL_NAME to attach to the packet mangler subsystem.
32  * When connected, a "struct packet_mangler" is created and set as the
33  * "unitinfo" of the corresponding kernel control socket instance.
34  * Connect call for packet mangler's kernel control socket also registers
35  * ip filers with cookie set to the packet_mangler instance.
36  * The ip filters are removed when control socket is disconnected.
37  */
38 #include <sys/types.h>
39 #include <sys/kern_control.h>
40 #include <sys/domain.h>
41 #include <sys/protosw.h>
42 #include <sys/syslog.h>
43 
44 #include <kern/locks.h>
45 #include <kern/zalloc.h>
46 #include <kern/debug.h>
47 
48 #include <net/packet_mangler.h>
49 
50 #include <netinet/mptcp.h>
51 #include <netinet/tcp.h>
52 #include <netinet/tcp_var.h>
53 #include <netinet/ip.h>
54 #include <netinet/ip6.h>
55 #include <netinet/kpi_ipfilter.h>
56 #include <string.h>
57 #include <libkern/libkern.h>
58 
59 #define MAX_PACKET_MANGLER                      1
60 
61 #define PKT_MNGLR_FLG_IPFILTER_ATTACHED         0x00000001
62 
63 SYSCTL_NODE(_net, OID_AUTO, pktmnglr, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "pktmnglr");
64 SYSCTL_INT(_net_pktmnglr, OID_AUTO, log, CTLFLAG_RW | CTLFLAG_LOCKED,
65     &pkt_mnglr_log_level, 0, "");
66 /*
67  * The structure packet_mangler represents a user space packet filter
68  * It's created and associated with a kernel control socket instance
69  */
70 struct packet_mangler {
71 	kern_ctl_ref                    pkt_mnglr_kcref;
72 	uint32_t                        pkt_mnglr_kcunit;
73 	uint32_t                        pkt_mnglr_flags;
74 	/* IP filter related params */
75 	ipfilter_t                      pkt_mnglr_ipfref;
76 	ipfilter_t                      pkt_mnglr_ipfrefv6;
77 	struct ipf_filter               pkt_mnglr_ipfilter;
78 
79 	/* Options */
80 	uint8_t                         activate;
81 	Pkt_Mnglr_Flow                  dir;
82 	struct sockaddr_storage         lsaddr;
83 	struct sockaddr_storage         rsaddr;
84 	struct sockaddr_storage         swap_lsaddr;
85 	struct sockaddr_storage         swap_rsaddr;
86 	uint32_t                        ip_action_mask;
87 	uint16_t                        lport;
88 	uint16_t                        rport;
89 	uint32_t                        proto;
90 	uint32_t                        proto_action_mask;
91 };
92 
93 /* Array of all the packet mangler instancesi */
94 struct packet_mangler *packet_manglers[MAX_PACKET_MANGLER];
95 
96 uint32_t pkt_mnglr_active_count = 0;    /* Number of active packet filters */
97 uint32_t pkt_mnglr_close_wait_timeout = 1000; /* in milliseconds */
98 
99 static kern_ctl_ref pkt_mnglr_kctlref = NULL;
100 
101 /* The lock below protects packet_manglers DS, packet_mangler DS */
102 static LCK_GRP_DECLARE(pkt_mnglr_lck_grp, "packet mangler");
103 static LCK_RW_DECLARE(pkt_mnglr_lck_rw, &pkt_mnglr_lck_grp);
104 
105 #define PKT_MNGLR_RW_LCK_MAX    8
106 
107 int pkt_mnglr_rw_nxt_lck = 0;
108 void* pkt_mnglr_rw_lock_history[PKT_MNGLR_RW_LCK_MAX];
109 
110 int pkt_mnglr_rw_nxt_unlck = 0;
111 void* pkt_mnglr_rw_unlock_history[PKT_MNGLR_RW_LCK_MAX];
112 
113 static ZONE_DEFINE(packet_mangler_zone, "packet_mangler",
114     sizeof(struct packet_mangler), ZC_NONE);
115 
116 /*
117  * For troubleshooting
118  */
119 int pkt_mnglr_log_level = LOG_ERR;
120 int pkt_mnglr_debug = 1;
121 
122 /*
123  * Forward declaration to appease the compiler
124  */
125 static void pkt_mnglr_rw_lock_exclusive(lck_rw_t *);
126 static void pkt_mnglr_rw_unlock_exclusive(lck_rw_t *);
127 static void pkt_mnglr_rw_lock_shared(lck_rw_t *);
128 static void pkt_mnglr_rw_unlock_shared(lck_rw_t *);
129 
130 static errno_t pktmnglr_ipfilter_output(void *cookie, mbuf_t *data,
131     ipf_pktopts_t options);
132 static errno_t pktmnglr_ipfilter_input(void *cookie, mbuf_t *data,
133     int offset, u_int8_t protocol);
134 static void pktmnglr_ipfilter_detach(void *cookie);
135 
136 static void chksm_update(mbuf_t data);
137 
138 #define TCP_OPT_MULTIPATH_TCP   30
139 #define MPTCP_SBT_VER_OFFSET    2
140 
141 /*
142  * packet filter global read write lock
143  */
144 
145 static void
pkt_mnglr_rw_lock_exclusive(lck_rw_t * lck)146 pkt_mnglr_rw_lock_exclusive(lck_rw_t *lck)
147 {
148 	void *lr_saved;
149 
150 	lr_saved = __builtin_return_address(0);
151 
152 	lck_rw_lock_exclusive(lck);
153 
154 	pkt_mnglr_rw_lock_history[pkt_mnglr_rw_nxt_lck] = lr_saved;
155 	pkt_mnglr_rw_nxt_lck =
156 	    (pkt_mnglr_rw_nxt_lck + 1) % PKT_MNGLR_RW_LCK_MAX;
157 }
158 
159 static void
pkt_mnglr_rw_unlock_exclusive(lck_rw_t * lck)160 pkt_mnglr_rw_unlock_exclusive(lck_rw_t *lck)
161 {
162 	void *lr_saved;
163 
164 	lr_saved = __builtin_return_address(0);
165 
166 	lck_rw_unlock_exclusive(lck);
167 
168 	pkt_mnglr_rw_unlock_history[pkt_mnglr_rw_nxt_unlck] =
169 	    lr_saved;
170 	pkt_mnglr_rw_nxt_unlck = (pkt_mnglr_rw_nxt_unlck + 1) % PKT_MNGLR_RW_LCK_MAX;
171 }
172 
173 static void
pkt_mnglr_rw_lock_shared(lck_rw_t * lck)174 pkt_mnglr_rw_lock_shared(lck_rw_t *lck)
175 {
176 	void *lr_saved;
177 
178 	lr_saved = __builtin_return_address(0);
179 
180 	lck_rw_lock_shared(lck);
181 
182 	pkt_mnglr_rw_lock_history[pkt_mnglr_rw_nxt_lck] = lr_saved;
183 	pkt_mnglr_rw_nxt_lck = (pkt_mnglr_rw_nxt_lck + 1) % PKT_MNGLR_RW_LCK_MAX;
184 }
185 
186 static void
pkt_mnglr_rw_unlock_shared(lck_rw_t * lck)187 pkt_mnglr_rw_unlock_shared(lck_rw_t *lck)
188 {
189 	void *lr_saved;
190 
191 	lr_saved = __builtin_return_address(0);
192 
193 	lck_rw_unlock_shared(lck);
194 
195 	pkt_mnglr_rw_unlock_history[pkt_mnglr_rw_nxt_unlck] = lr_saved;
196 	pkt_mnglr_rw_nxt_unlck = (pkt_mnglr_rw_nxt_unlck + 1) % PKT_MNGLR_RW_LCK_MAX;
197 }
198 
199 /*
200  * Packet Mangler's Kernel control socket callbacks
201  */
202 static errno_t
pkt_mnglr_ctl_connect(kern_ctl_ref kctlref,struct sockaddr_ctl * sac,void ** unitinfo)203 pkt_mnglr_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
204     void **unitinfo)
205 {
206 	errno_t error = 0;
207 	struct packet_mangler *p_pkt_mnglr = NULL;
208 
209 	PKT_MNGLR_LOG(LOG_NOTICE, "Connecting packet mangler filter.");
210 
211 	if (sac->sc_unit == 0 || sac->sc_unit > MAX_PACKET_MANGLER) {
212 		PKT_MNGLR_LOG(LOG_ERR, "bad sc_unit %u", sac->sc_unit);
213 		error = EINVAL;
214 		goto fail;
215 	}
216 
217 	p_pkt_mnglr = zalloc_flags(packet_mangler_zone,
218 	    Z_WAITOK | Z_ZERO | Z_NOFAIL);
219 
220 	pkt_mnglr_rw_lock_exclusive(&pkt_mnglr_lck_rw);
221 
222 	if (packet_manglers[sac->sc_unit - 1] != NULL) {
223 		PKT_MNGLR_LOG(LOG_ERR, "sc_unit %u in use", sac->sc_unit);
224 		error = EADDRINUSE;
225 		pkt_mnglr_rw_unlock_exclusive(&pkt_mnglr_lck_rw);
226 		goto fail_free;
227 	} else {
228 		/*
229 		 * kernel control socket kcunit numbers start at 1
230 		 */
231 		packet_manglers[sac->sc_unit - 1] = p_pkt_mnglr;
232 
233 		p_pkt_mnglr->pkt_mnglr_kcref = kctlref;
234 		p_pkt_mnglr->pkt_mnglr_kcunit = sac->sc_unit;
235 
236 		pkt_mnglr_active_count++;
237 	}
238 
239 	p_pkt_mnglr->pkt_mnglr_ipfilter.cookie = p_pkt_mnglr;
240 	p_pkt_mnglr->pkt_mnglr_ipfilter.name = "com.apple.pktmnglripfilter";
241 	p_pkt_mnglr->pkt_mnglr_ipfilter.ipf_input = pktmnglr_ipfilter_input;
242 	p_pkt_mnglr->pkt_mnglr_ipfilter.ipf_output = pktmnglr_ipfilter_output;
243 	p_pkt_mnglr->pkt_mnglr_ipfilter.ipf_detach = pktmnglr_ipfilter_detach;
244 	error = ipf_addv4(&(p_pkt_mnglr->pkt_mnglr_ipfilter), &(p_pkt_mnglr->pkt_mnglr_ipfref));
245 	if (error) {
246 		PKT_MNGLR_LOG(LOG_ERR, "Could not register packet mangler's IPv4 Filter");
247 		goto fail_locked;
248 	}
249 	error = ipf_addv6(&(p_pkt_mnglr->pkt_mnglr_ipfilter), &(p_pkt_mnglr->pkt_mnglr_ipfrefv6));
250 	if (error) {
251 		ipf_remove(p_pkt_mnglr->pkt_mnglr_ipfref);
252 		PKT_MNGLR_LOG(LOG_ERR, "Could not register packet mangler's IPv6 Filter");
253 		goto fail_locked;
254 	}
255 
256 	PKT_MNGLR_LOG(LOG_INFO, "Registered packet mangler's IP Filters");
257 	p_pkt_mnglr->pkt_mnglr_flags |= PKT_MNGLR_FLG_IPFILTER_ATTACHED;
258 	pkt_mnglr_rw_unlock_exclusive(&pkt_mnglr_lck_rw);
259 
260 	if (error) {
261 fail_locked:
262 		pkt_mnglr_active_count--;
263 
264 		packet_manglers[sac->sc_unit - 1] = NULL;
265 		*unitinfo = NULL;
266 
267 		pkt_mnglr_rw_unlock_exclusive(&pkt_mnglr_lck_rw);
268 
269 fail_free:
270 		zfree(packet_mangler_zone, p_pkt_mnglr);
271 	}
272 
273 fail:
274 	*unitinfo = p_pkt_mnglr;
275 
276 	PKT_MNGLR_LOG(LOG_INFO, "return %d pkt_mnglr_active_count %u kcunit %u",
277 	    error, pkt_mnglr_active_count, sac->sc_unit);
278 
279 	return error;
280 }
281 
282 static errno_t
pkt_mnglr_ctl_disconnect(kern_ctl_ref kctlref,u_int32_t kcunit,void * unitinfo)283 pkt_mnglr_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo)
284 {
285 #pragma unused(kctlref)
286 	errno_t error = 0;
287 	struct packet_mangler *p_pkt_mnglr;
288 
289 	PKT_MNGLR_LOG(LOG_INFO, "Disconnecting packet mangler kernel control");
290 
291 	if (unitinfo == NULL) {
292 		goto done;
293 	}
294 
295 	if (kcunit > MAX_PACKET_MANGLER) {
296 		PKT_MNGLR_LOG(LOG_ERR, "kcunit %u > MAX_PACKET_MANGLER (%d)",
297 		    kcunit, MAX_PACKET_MANGLER);
298 		error = EINVAL;
299 		goto done;
300 	}
301 
302 	p_pkt_mnglr = (struct packet_mangler *)unitinfo;
303 
304 	pkt_mnglr_rw_lock_exclusive(&pkt_mnglr_lck_rw);
305 	if (packet_manglers[kcunit - 1] != p_pkt_mnglr || p_pkt_mnglr->pkt_mnglr_kcunit != kcunit) {
306 		PKT_MNGLR_LOG(LOG_ERR, "bad unit info %u",
307 		    kcunit);
308 		pkt_mnglr_rw_unlock_exclusive(&pkt_mnglr_lck_rw);
309 		goto done;
310 	}
311 
312 	/*
313 	 * Make filter inactive
314 	 */
315 	packet_manglers[kcunit - 1] = NULL;
316 	pkt_mnglr_active_count--;
317 	if (p_pkt_mnglr->pkt_mnglr_flags & PKT_MNGLR_FLG_IPFILTER_ATTACHED) {
318 		(void) ipf_remove(p_pkt_mnglr->pkt_mnglr_ipfref);
319 		(void) ipf_remove(p_pkt_mnglr->pkt_mnglr_ipfrefv6);
320 	}
321 	pkt_mnglr_rw_unlock_exclusive(&pkt_mnglr_lck_rw);
322 	zfree(packet_mangler_zone, p_pkt_mnglr);
323 done:
324 	PKT_MNGLR_LOG(LOG_INFO, "return %d pkt_mnglr_active_count %u kcunit %u",
325 	    error, pkt_mnglr_active_count, kcunit);
326 
327 	return error;
328 }
329 
330 static errno_t
pkt_mnglr_ctl_getopt(kern_ctl_ref kctlref,u_int32_t kcunit,void * unitinfo,int opt,void * data,size_t * len)331 pkt_mnglr_ctl_getopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
332     int opt, void *data, size_t *len)
333 {
334 #pragma unused(kctlref, opt)
335 	errno_t error = 0;
336 	struct packet_mangler *p_pkt_mnglr = (struct packet_mangler *)unitinfo;
337 
338 	PKT_MNGLR_LOG(LOG_NOTICE, "");
339 
340 	pkt_mnglr_rw_lock_shared(&pkt_mnglr_lck_rw);
341 
342 	if (kcunit > MAX_PACKET_MANGLER) {
343 		PKT_MNGLR_LOG(LOG_ERR, "kcunit %u > MAX_PACKET_MANGLER (%d)",
344 		    kcunit, MAX_PACKET_MANGLER);
345 		error = EINVAL;
346 		goto done;
347 	}
348 	if (p_pkt_mnglr != (void *)packet_manglers[kcunit - 1]) {
349 		PKT_MNGLR_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
350 		    kcunit);
351 		error = EINVAL;
352 		goto done;
353 	}
354 	switch (opt) {
355 	case PKT_MNGLR_OPT_PROTO_ACT_MASK:
356 		if (*len < sizeof(uint32_t)) {
357 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_PROTO_ACT_MASK "
358 			    "len too small %lu", *len);
359 			error = EINVAL;
360 			goto done;
361 		}
362 
363 		if (data != NULL) {
364 			*(uint32_t *)data = p_pkt_mnglr->proto_action_mask;
365 		}
366 		break;
367 	case PKT_MNGLR_OPT_IP_ACT_MASK:
368 		if (*len < sizeof(uint32_t)) {
369 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_IP_ACT_MASK "
370 			    "len too small %lu", *len);
371 			error = EINVAL;
372 			goto done;
373 		}
374 
375 		if (data != NULL) {
376 			*(uint32_t *)data = p_pkt_mnglr->ip_action_mask;
377 		}
378 		break;
379 	case PKT_MNGLR_OPT_LOCAL_IP:
380 		if (*len < sizeof(struct sockaddr_storage)) {
381 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_LOCAL_IP "
382 			    "len too small %lu", *len);
383 			error = EINVAL;
384 			goto done;
385 		}
386 
387 		if (data != NULL) {
388 			*(struct sockaddr_storage *)data = p_pkt_mnglr->lsaddr;
389 		}
390 		break;
391 	case PKT_MNGLR_OPT_REMOTE_IP:
392 		if (*len < sizeof(struct sockaddr_storage)) {
393 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_REMOTE_IP "
394 			    "len too small %lu", *len);
395 			error = EINVAL;
396 			goto done;
397 		}
398 
399 		if (data != NULL) {
400 			*(struct sockaddr_storage *)data = p_pkt_mnglr->rsaddr;
401 		}
402 		break;
403 	case PKT_MNGLR_OPT_LOCAL_PORT:
404 		if (*len < sizeof(uint16_t)) {
405 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_LOCAL_PORT "
406 			    "len too small %lu", *len);
407 			error = EINVAL;
408 			goto done;
409 		}
410 
411 		if (data != NULL) {
412 			*(uint16_t *)data = p_pkt_mnglr->lport;
413 		}
414 		break;
415 	case PKT_MNGLR_OPT_REMOTE_PORT:
416 		if (*len < sizeof(uint16_t)) {
417 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_REMOTE_PORT "
418 			    "len too small %lu", *len);
419 			error = EINVAL;
420 			goto done;
421 		}
422 
423 		if (data != NULL) {
424 			*(uint16_t *)data = p_pkt_mnglr->rport;
425 		}
426 		break;
427 	case PKT_MNGLR_OPT_DIRECTION:
428 		if (*len < sizeof(uint32_t)) {
429 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_DIRECTION "
430 			    "len too small %lu", *len);
431 			error = EINVAL;
432 			goto done;
433 		}
434 		if (data != NULL) {
435 			*(uint32_t *)data = p_pkt_mnglr->dir;
436 		}
437 		break;
438 	case PKT_MNGLR_OPT_PROTOCOL:
439 		if (*len < sizeof(uint32_t)) {
440 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_PROTOCOL "
441 			    "len too small %lu", *len);
442 			error = EINVAL;
443 			goto done;
444 		}
445 		if (data != NULL) {
446 			*(uint32_t *)data = p_pkt_mnglr->proto;
447 		}
448 		break;
449 	case PKT_MNGLR_OPT_ACTIVATE:
450 		if (*len < sizeof(uint8_t)) {
451 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_ACTIVATE "
452 			    "len too small %lu", *len);
453 			error = EINVAL;
454 			goto done;
455 		}
456 
457 		if (data != NULL) {
458 			*(uint8_t *)data = p_pkt_mnglr->activate;
459 		}
460 		break;
461 	default:
462 		error = ENOPROTOOPT;
463 		break;
464 	}
465 done:
466 	pkt_mnglr_rw_unlock_shared(&pkt_mnglr_lck_rw);
467 
468 	return error;
469 }
470 
471 static errno_t
pkt_mnglr_ctl_setopt(kern_ctl_ref kctlref,u_int32_t kcunit,void * unitinfo,int opt,void * data,size_t len)472 pkt_mnglr_ctl_setopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
473     int opt, void *data, size_t len)
474 {
475 #pragma unused(kctlref, opt)
476 	errno_t error = 0;
477 	struct packet_mangler *p_pkt_mnglr = (struct packet_mangler *)unitinfo;
478 
479 	PKT_MNGLR_LOG(LOG_NOTICE, "");
480 
481 	pkt_mnglr_rw_lock_exclusive(&pkt_mnglr_lck_rw);
482 
483 	if (kcunit > MAX_PACKET_MANGLER) {
484 		PKT_MNGLR_LOG(LOG_ERR, "kcunit %u > MAX_PACKET_MANGLER (%d)",
485 		    kcunit, MAX_PACKET_MANGLER);
486 		error = EINVAL;
487 		goto done;
488 	}
489 	if (p_pkt_mnglr != (void *)packet_manglers[kcunit - 1]) {
490 		PKT_MNGLR_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
491 		    kcunit);
492 		error = EINVAL;
493 		goto done;
494 	}
495 	switch (opt) {
496 	case PKT_MNGLR_OPT_PROTO_ACT_MASK:
497 		if (len < sizeof(uint32_t)) {
498 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_PROTO_ACT_MASK "
499 			    "len too small %lu", len);
500 			error = EINVAL;
501 			goto done;
502 		}
503 		if (p_pkt_mnglr->proto_action_mask != 0) {
504 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_PROTO_ACT_MASK "
505 			    "already set %u",
506 			    p_pkt_mnglr->proto_action_mask);
507 			error = EINVAL;
508 			goto done;
509 		}
510 		p_pkt_mnglr->proto_action_mask = *(uint32_t *)data;
511 		PKT_MNGLR_LOG(LOG_INFO, "p_pkt_mnglr->proto_action_mask set to :%d", p_pkt_mnglr->proto_action_mask);
512 		break;
513 	case PKT_MNGLR_OPT_IP_ACT_MASK:
514 		if (len < sizeof(uint32_t)) {
515 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_IP_ACT_MASK "
516 			    "len too small %lu", len);
517 			error = EINVAL;
518 			goto done;
519 		}
520 		if (p_pkt_mnglr->ip_action_mask != 0) {
521 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_IP_ACT_MASK "
522 			    "already set %u",
523 			    p_pkt_mnglr->ip_action_mask);
524 			error = EINVAL;
525 			goto done;
526 		}
527 		p_pkt_mnglr->ip_action_mask = *(uint32_t *)data;
528 		break;
529 	case PKT_MNGLR_OPT_LOCAL_IP:
530 		if (len < sizeof(struct sockaddr_storage)) {
531 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_LOCAL_IP "
532 			    "len too small %lu", len);
533 			error = EINVAL;
534 			goto done;
535 		}
536 		if (p_pkt_mnglr->lsaddr.ss_family) {
537 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_LOCAL_IP "
538 			    "already set");
539 			error = EINVAL;
540 			goto done;
541 		}
542 		p_pkt_mnglr->lsaddr = *(struct sockaddr_storage *)data;
543 		break;
544 	case PKT_MNGLR_OPT_REMOTE_IP:
545 		if (len < sizeof(struct sockaddr_storage)) {
546 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_REMOTE_IP "
547 			    "len too small %lu", len);
548 			error = EINVAL;
549 			goto done;
550 		}
551 		if (p_pkt_mnglr->rsaddr.ss_family) {
552 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_REMOTE_IP "
553 			    "already set");
554 			error = EINVAL;
555 			goto done;
556 		}
557 
558 		p_pkt_mnglr->rsaddr = *(struct sockaddr_storage *)data;
559 		PKT_MNGLR_LOG(LOG_INFO,
560 		    "Remote IP registered for address family: %d",
561 		    p_pkt_mnglr->rsaddr.ss_family);
562 		break;
563 	case PKT_MNGLR_OPT_LOCAL_PORT:
564 		if (len < sizeof(uint16_t)) {
565 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_LOCAL_PORT "
566 			    "len too small %lu", len);
567 			error = EINVAL;
568 			goto done;
569 		}
570 		if (p_pkt_mnglr->lport != 0) {
571 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_LOCAL_PORT "
572 			    "already set %d",
573 			    p_pkt_mnglr->lport);
574 			error = EINVAL;
575 			goto done;
576 		}
577 		p_pkt_mnglr->lport = *(uint16_t *)data;
578 		break;
579 	case PKT_MNGLR_OPT_REMOTE_PORT:
580 		if (len < sizeof(uint16_t)) {
581 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_REMOTE_PORT "
582 			    "len too small %lu", len);
583 			error = EINVAL;
584 			goto done;
585 		}
586 		if (p_pkt_mnglr->rport != 0) {
587 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_REMOTE_PORT "
588 			    "already set %d",
589 			    p_pkt_mnglr->rport);
590 			error = EINVAL;
591 			goto done;
592 		}
593 		p_pkt_mnglr->rport = *(uint16_t *)data;
594 		break;
595 	case PKT_MNGLR_OPT_DIRECTION:
596 		if (len < sizeof(uint32_t)) {
597 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_DIRECTION "
598 			    "len too small %lu", len);
599 			error = EINVAL;
600 			goto done;
601 		}
602 		if (p_pkt_mnglr->dir != 0) {
603 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_DIRECTION "
604 			    "already set %u",
605 			    p_pkt_mnglr->dir);
606 			error = EINVAL;
607 			goto done;
608 		}
609 		p_pkt_mnglr->dir = *(uint32_t *)data;
610 		break;
611 	case PKT_MNGLR_OPT_PROTOCOL:
612 		if (len < sizeof(uint32_t)) {
613 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_PROTOCOL "
614 			    "len too small %lu", len);
615 			error = EINVAL;
616 			goto done;
617 		}
618 		if (p_pkt_mnglr->proto != 0) {
619 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_PROTOCOL "
620 			    "already set %u",
621 			    p_pkt_mnglr->proto);
622 			error = EINVAL;
623 			goto done;
624 		}
625 		p_pkt_mnglr->proto = *(uint32_t *)data;
626 		break;
627 	case PKT_MNGLR_OPT_ACTIVATE:
628 		if (len < sizeof(uint8_t)) {
629 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_ACTIVATE "
630 			    "len too small %lu", len);
631 			error = EINVAL;
632 			goto done;
633 		}
634 		if (p_pkt_mnglr->activate != 0) {
635 			PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_ACTIVATE "
636 			    "already set %u",
637 			    p_pkt_mnglr->activate);
638 			error = EINVAL;
639 			goto done;
640 		}
641 		p_pkt_mnglr->activate = *(uint8_t *)data;
642 		PKT_MNGLR_LOG(LOG_ERR, "p_pkt_mnglr->activate set to :%d",
643 		    p_pkt_mnglr->activate);
644 		break;
645 	default:
646 		error = ENOPROTOOPT;
647 		break;
648 	}
649 done:
650 	pkt_mnglr_rw_unlock_exclusive(&pkt_mnglr_lck_rw);
651 
652 	return error;
653 }
654 
655 void
pkt_mnglr_init(void)656 pkt_mnglr_init(void)
657 {
658 	struct kern_ctl_reg kern_ctl;
659 	errno_t error = 0;
660 
661 	PKT_MNGLR_LOG(LOG_NOTICE, "");
662 
663 	/*
664 	 * Compile time verifications
665 	 */
666 	_CASSERT(PKT_MNGLR_MAX_FILTER_COUNT == MAX_PACKET_MANGLER);
667 
668 	/*
669 	 * Register kernel control
670 	 */
671 	bzero(&kern_ctl, sizeof(kern_ctl));
672 	strlcpy(kern_ctl.ctl_name, PACKET_MANGLER_CONTROL_NAME,
673 	    sizeof(kern_ctl.ctl_name));
674 	kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_EXTENDED;
675 	kern_ctl.ctl_connect = pkt_mnglr_ctl_connect;
676 	kern_ctl.ctl_disconnect = pkt_mnglr_ctl_disconnect;
677 	kern_ctl.ctl_getopt = pkt_mnglr_ctl_getopt;
678 	kern_ctl.ctl_setopt = pkt_mnglr_ctl_setopt;
679 	error = ctl_register(&kern_ctl, &pkt_mnglr_kctlref);
680 	if (error != 0) {
681 		PKT_MNGLR_LOG(LOG_ERR, "ctl_register failed: %d", error);
682 	} else {
683 		PKT_MNGLR_LOG(LOG_INFO, "Registered packet mangler kernel control.");
684 	}
685 }
686 
687 static errno_t
pktmnglr_ipfilter_output(void * cookie,mbuf_t * data,ipf_pktopts_t options)688 pktmnglr_ipfilter_output(void *cookie, mbuf_t *data, ipf_pktopts_t options)
689 {
690 	struct packet_mangler *p_pkt_mnglr = (struct packet_mangler *)cookie;
691 	struct ip ip;
692 	struct tcphdr tcp;
693 	int optlen = 0;
694 	errno_t error = 0;
695 
696 #pragma unused(tcp, optlen, options)
697 	if (p_pkt_mnglr == NULL) {
698 		goto output_done;
699 	}
700 
701 	if (!p_pkt_mnglr->activate) {
702 		goto output_done;
703 	}
704 
705 	if (p_pkt_mnglr->dir == IN) {
706 		goto output_done;
707 	}
708 
709 	if (data == NULL) {
710 		PKT_MNGLR_LOG(LOG_ERR, "Data pointer is NULL");
711 		goto output_done;
712 	}
713 
714 	/* Check for IP filter options */
715 	error = mbuf_copydata(*data, 0, sizeof(ip), &ip);
716 	if (error) {
717 		PKT_MNGLR_LOG(LOG_ERR, "Could not make local IP header copy");
718 		goto output_done;
719 	}
720 
721 	if ((p_pkt_mnglr->lsaddr.ss_family == AF_INET6) && (ip.ip_v == 4)) {
722 		goto output_done;
723 	}
724 
725 	if ((p_pkt_mnglr->lsaddr.ss_family == AF_INET) && (ip.ip_v == 6)) {
726 		goto output_done;
727 	}
728 
729 	if (p_pkt_mnglr->lsaddr.ss_family == AF_INET) {
730 		struct sockaddr_in laddr = *(struct sockaddr_in *)(&(p_pkt_mnglr->lsaddr));
731 		if (ip.ip_src.s_addr != laddr.sin_addr.s_addr) {
732 			goto output_done;
733 		}
734 	}
735 
736 	if (p_pkt_mnglr->rsaddr.ss_family == AF_INET) {
737 		struct sockaddr_in raddr = *(struct sockaddr_in *)(&(p_pkt_mnglr->rsaddr));
738 		if (ip.ip_dst.s_addr != raddr.sin_addr.s_addr) {
739 			goto output_done;
740 		}
741 	}
742 
743 	if (ip.ip_v != 4) {
744 		PKT_MNGLR_LOG(LOG_INFO,
745 		    "%s:%d Not handling IP version %d\n",
746 		    __func__, __LINE__, ip.ip_v);
747 		goto output_done;
748 	}
749 
750 output_done:
751 	/* Not handling output flow */
752 	return 0;
753 }
754 
755 #define TCP_MAX_OPTLEN  40
756 
757 static errno_t
pktmnglr_ipfilter_input(void * cookie,mbuf_t * data,int offset,u_int8_t protocol)758 pktmnglr_ipfilter_input(void *cookie, mbuf_t *data, int offset, u_int8_t protocol)
759 {
760 	struct packet_mangler *p_pkt_mnglr = (struct packet_mangler *)cookie;
761 	struct ip6_hdr ip6;
762 	struct ip ip;
763 	struct tcphdr tcp;
764 	size_t ip_pld_len;
765 	errno_t error = 0;
766 
767 	if (p_pkt_mnglr == NULL) {
768 		PKT_MNGLR_LOG(LOG_ERR, "p_pkt_mnglr is NULL");
769 		goto input_done;
770 	}
771 
772 	if (p_pkt_mnglr->activate == 0) {
773 		PKT_MNGLR_LOG(LOG_INFO, "p_pkt_mnglr not yet activated");
774 		goto input_done;
775 	}
776 
777 	if (p_pkt_mnglr->dir == OUT) {
778 		goto input_done;
779 	}
780 
781 	if (data == NULL) {
782 		PKT_MNGLR_LOG(LOG_ERR, "Data pointer is NULL");
783 		goto input_done;
784 	}
785 
786 	/* Check for IP filter options */
787 	error = mbuf_copydata(*data, 0, sizeof(ip), &ip);
788 	if (error) {
789 		PKT_MNGLR_LOG(LOG_ERR, "Could not make local IP header copy");
790 		goto input_done;
791 	}
792 
793 	if (ip.ip_v == 6) {
794 		error = mbuf_copydata(*data, 0, sizeof(ip6), &ip6);
795 		if (error) {
796 			PKT_MNGLR_LOG(LOG_ERR, "Could not make local IPv6 header copy");
797 			goto input_done;
798 		}
799 	}
800 
801 	if ((p_pkt_mnglr->lsaddr.ss_family == AF_INET6) && (ip.ip_v == 4)) {
802 		PKT_MNGLR_LOG(LOG_INFO, "Skipping filtering as address family of packet is IPv4 but local "
803 		    "address is set to IPv6");
804 		goto input_done;
805 	}
806 
807 	if ((p_pkt_mnglr->lsaddr.ss_family == AF_INET) && (ip.ip_v == 6)) {
808 		PKT_MNGLR_LOG(LOG_INFO, "Skipping filtering as address family "
809 		    "of packet is IPv6 but local address is set to IPv4");
810 		goto input_done;
811 	}
812 
813 	if (p_pkt_mnglr->lsaddr.ss_family == AF_INET) {
814 		struct sockaddr_in laddr = *(struct sockaddr_in *)(&(p_pkt_mnglr->lsaddr));
815 		if (ip.ip_dst.s_addr != laddr.sin_addr.s_addr) {
816 			goto input_done;
817 		}
818 	} else if (p_pkt_mnglr->lsaddr.ss_family == AF_INET6) {
819 		struct sockaddr_in6 laddr = *(struct sockaddr_in6 *)(&(p_pkt_mnglr->lsaddr));
820 		if (!IN6_ARE_ADDR_EQUAL(&ip6.ip6_dst, &laddr.sin6_addr)) {
821 			goto input_done;
822 		}
823 	}
824 
825 	if (p_pkt_mnglr->rsaddr.ss_family == AF_INET) {
826 		struct sockaddr_in raddr = *(struct sockaddr_in *)(&(p_pkt_mnglr->rsaddr));
827 		if (ip.ip_src.s_addr != raddr.sin_addr.s_addr) {
828 			goto input_done;
829 		}
830 		PKT_MNGLR_LOG(LOG_INFO, "Remote IP: %x Source IP: %x in input path",
831 		    raddr.sin_addr.s_addr,
832 		    ip.ip_src.s_addr);
833 	} else if (p_pkt_mnglr->rsaddr.ss_family == AF_INET6) {
834 		struct sockaddr_in6 raddr = *(struct sockaddr_in6 *)(&(p_pkt_mnglr->rsaddr));
835 		if (!IN6_ARE_ADDR_EQUAL(&ip6.ip6_src, &raddr.sin6_addr)) {
836 			goto input_done;
837 		}
838 	}
839 
840 	if (ip.ip_v == 4) {
841 		ip_pld_len = ntohs(ip.ip_len) - (ip.ip_hl << 2);
842 	} else if (ip.ip_v == 6) {
843 		if (ip6.ip6_nxt != p_pkt_mnglr->proto) {
844 			/* Don't support IPv6 extension headers */
845 			goto input_done;
846 		}
847 		ip_pld_len = ntohs(ip6.ip6_plen);
848 	} else {
849 		goto input_done;
850 	}
851 
852 
853 	if (protocol != p_pkt_mnglr->proto) {
854 		PKT_MNGLR_LOG(LOG_INFO, "Skip: Protocol mismatch");
855 		goto input_done;
856 	}
857 
858 	switch (protocol) {
859 	case IPPROTO_TCP:
860 		if (ip_pld_len < sizeof(tcp)) {
861 			PKT_MNGLR_LOG(LOG_ERR, "IP total len not big enough for TCP: %zu", ip_pld_len);
862 			goto drop_it;
863 		}
864 
865 		error = mbuf_copydata(*data, (size_t)offset, sizeof(tcp), &tcp);
866 		if (error) {
867 			PKT_MNGLR_LOG(LOG_ERR, "Could not make local TCP header copy");
868 			goto input_done;
869 		}
870 
871 		if (p_pkt_mnglr->lport && (p_pkt_mnglr->lport != tcp.th_dport)) {
872 			PKT_MNGLR_LOG(LOG_INFO, "Local port and IP des port do not match");
873 			goto input_done;
874 		}
875 
876 		if (p_pkt_mnglr->rport && (p_pkt_mnglr->rport != tcp.th_sport)) {
877 			PKT_MNGLR_LOG(LOG_INFO, "Remote port and IP src port do not match");
878 			goto input_done;
879 		}
880 		break;
881 	case IPPROTO_UDP:
882 		goto input_done;
883 	case IPPROTO_ICMP:
884 		goto input_done;
885 	case IPPROTO_ICMPV6:
886 		goto input_done;
887 	default:
888 		goto input_done;
889 	}
890 
891 	/* XXX Do IP actions here */
892 	PKT_MNGLR_LOG(LOG_INFO, "Proceeding with packet mangler actions on the packet");
893 
894 	/* Protocol actions */
895 	switch (protocol) {
896 	case IPPROTO_TCP:
897 		if (p_pkt_mnglr->proto_action_mask) {
898 			unsigned char tcp_opt_buf[TCP_MAX_OPTLEN] = {0};
899 			size_t orig_tcp_optlen;
900 			size_t tcp_optlen = 0;
901 			size_t i = 0, off;
902 
903 			off = (tcp.th_off << 2);
904 
905 			if (off < sizeof(struct tcphdr) || off > ip_pld_len) {
906 				PKT_MNGLR_LOG(LOG_ERR, "TCP header offset is wrong: %zu", off);
907 				goto drop_it;
908 			}
909 
910 			tcp_optlen = off - sizeof(struct tcphdr);
911 
912 			PKT_MNGLR_LOG(LOG_INFO, "Packet from F5 is TCP\n");
913 			PKT_MNGLR_LOG(LOG_INFO, "Optlen: %zu\n", tcp_optlen);
914 			orig_tcp_optlen = tcp_optlen;
915 			if (orig_tcp_optlen) {
916 				error = mbuf_copydata(*data, (size_t)offset + sizeof(struct tcphdr), orig_tcp_optlen, tcp_opt_buf);
917 				if (error) {
918 					PKT_MNGLR_LOG(LOG_ERR, "Failed to copy tcp options: error %d offset %d optlen %zu", error, offset, orig_tcp_optlen);
919 					goto input_done;
920 				}
921 			}
922 
923 			while (tcp_optlen > 0) {
924 				if (tcp_opt_buf[i] == 0x1) {
925 					PKT_MNGLR_LOG(LOG_INFO, "Skipping NOP\n");
926 					tcp_optlen--;
927 					i++;
928 					continue;
929 				} else if ((tcp_opt_buf[i] != 0) && (tcp_opt_buf[i] != TCP_OPT_MULTIPATH_TCP)) {
930 					unsigned char optlen;
931 
932 					PKT_MNGLR_LOG(LOG_INFO, "Skipping option %x\n", tcp_opt_buf[i]);
933 
934 					if (tcp_optlen < 2) {
935 						PKT_MNGLR_LOG(LOG_ERR, "Received short TCP option");
936 						goto drop_it;
937 					}
938 
939 					/* Minimum TCP option size is 2 */
940 					optlen = tcp_opt_buf[i + 1];
941 					if (optlen < 2 || optlen > tcp_optlen) {
942 						PKT_MNGLR_LOG(LOG_ERR, "Received suspicious TCP option");
943 						goto drop_it;
944 					}
945 					tcp_optlen -= optlen;
946 					i += optlen;
947 					continue;
948 				} else if (tcp_opt_buf[i] == TCP_OPT_MULTIPATH_TCP) {
949 					size_t j = 0;
950 					unsigned char mptcpoptlen;
951 					uint8_t sbtver;
952 					uint8_t subtype;
953 
954 					if (tcp_optlen < 3) {
955 						PKT_MNGLR_LOG(LOG_ERR, "Received short MPTCP option");
956 						goto drop_it;
957 					}
958 
959 					/* Minimum MPTCP option size is 3 */
960 					mptcpoptlen = tcp_opt_buf[i + 1];
961 					if (mptcpoptlen < 3 || mptcpoptlen > tcp_optlen) {
962 						PKT_MNGLR_LOG(LOG_ERR, "Received suspicious MPTCP option");
963 						goto drop_it;
964 					}
965 
966 					sbtver = tcp_opt_buf[i + MPTCP_SBT_VER_OFFSET];
967 					subtype = sbtver >> 4;
968 
969 					PKT_MNGLR_LOG(LOG_INFO, "Got MPTCP option %x\n", tcp_opt_buf[i]);
970 					PKT_MNGLR_LOG(LOG_INFO, "Got MPTCP subtype %x\n", subtype);
971 					if (subtype == MPO_DSS) {
972 						PKT_MNGLR_LOG(LOG_INFO, "Got DSS option\n");
973 						PKT_MNGLR_LOG(LOG_INFO, "Protocol option mask: %d\n", p_pkt_mnglr->proto_action_mask);
974 						if (p_pkt_mnglr->proto_action_mask &
975 						    PKT_MNGLR_TCP_ACT_DSS_DROP) {
976 							goto drop_it;
977 						}
978 					}
979 
980 					PKT_MNGLR_LOG(LOG_INFO, "Got MPTCP option %x\n", tcp_opt_buf[i]);
981 					for (; j < mptcpoptlen && j < tcp_optlen; j++) {
982 						if (p_pkt_mnglr->proto_action_mask &
983 						    PKT_MNGLR_TCP_ACT_NOP_MPTCP) {
984 							tcp_opt_buf[i + j] = 0x1;
985 						}
986 					}
987 					tcp_optlen -= mptcpoptlen;
988 					i += mptcpoptlen;
989 				} else {
990 					tcp_optlen--;
991 					i++;
992 				}
993 			}
994 
995 			if (orig_tcp_optlen) {
996 				error = mbuf_copyback(*data,
997 				    (size_t)offset + sizeof(struct tcphdr),
998 				    orig_tcp_optlen, tcp_opt_buf, MBUF_WAITOK);
999 
1000 				if (error) {
1001 					PKT_MNGLR_LOG(LOG_ERR,
1002 					    "Failed to copy tcp options back: error %d offset %d optlen %zu",
1003 					    error, offset, orig_tcp_optlen);
1004 					goto input_done;
1005 				}
1006 			}
1007 		}
1008 		break;
1009 	case IPPROTO_UDP:
1010 		/* Don't handle UDP */
1011 		break;
1012 	case IPPROTO_ICMP:
1013 		break;
1014 	case IPPROTO_ICMPV6:
1015 		break;
1016 	default:
1017 		break;
1018 	}
1019 	chksm_update(*data);
1020 input_done:
1021 	return 0;
1022 
1023 drop_it:
1024 	PKT_MNGLR_LOG(LOG_INFO, "Dropping packet\n");
1025 	mbuf_freem(*data);
1026 	return EJUSTRETURN;
1027 }
1028 
1029 static void
pktmnglr_ipfilter_detach(void * cookie)1030 pktmnglr_ipfilter_detach(void *cookie)
1031 {
1032 #pragma unused(cookie)
1033 	return;
1034 }
1035 
1036 /* XXX Still need to modify this to use mbuf_copy* macros */
1037 static void
chksm_update(mbuf_t data)1038 chksm_update(mbuf_t data)
1039 {
1040 	u_int16_t ip_sum;
1041 	u_int16_t tsum;
1042 	struct tcphdr *tcp;
1043 	errno_t err;
1044 
1045 	unsigned char *ptr = (unsigned char *)mbuf_data(data);
1046 	struct ip *ip = (struct ip *)(void *)ptr;
1047 	if (ip->ip_v != 4) {
1048 		return;
1049 	}
1050 
1051 	ip->ip_sum = 0;
1052 	err = mbuf_inet_cksum(data, 0, 0, ip->ip_hl << 2, &ip_sum); // ip sum
1053 	if (err == 0) {
1054 		ip->ip_sum = ip_sum;
1055 	}
1056 	switch (ip->ip_p) {
1057 	case IPPROTO_TCP:
1058 		tcp = (struct tcphdr *)(void *)(ptr + (ip->ip_hl << 2));
1059 		tcp->th_sum = 0;
1060 		err = mbuf_inet_cksum(data, IPPROTO_TCP, ip->ip_hl << 2,
1061 		        ntohs(ip->ip_len) - (ip->ip_hl << 2), &tsum);
1062 		if (err == 0) {
1063 			tcp->th_sum = tsum;
1064 		}
1065 		break;
1066 	case IPPROTO_UDP:
1067 		/* Don't handle UDP */
1068 		break;
1069 	case IPPROTO_ICMP:
1070 		break;
1071 	case IPPROTO_ICMPV6:
1072 		break;
1073 	default:
1074 		break;
1075 	}
1076 
1077 	mbuf_clear_csum_performed(data);
1078 	return;
1079 }
1080