xref: /xnu-10002.81.5/bsd/netinet/ip_encap.c (revision 5e3eaea39dcf651e66cb99ba7d70e32cc4a99587)
1 /*
2  * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*	$FreeBSD: src/sys/netinet/ip_encap.c,v 1.1.2.2 2001/07/03 11:01:46 ume Exp $	*/
29 /*	$KAME: ip_encap.c,v 1.41 2001/03/15 08:35:08 itojun Exp $	*/
30 
31 /*
32  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
33  * All rights reserved.
34  *
35  * Redistribution and use in source and binary forms, with or without
36  * modification, are permitted provided that the following conditions
37  * are met:
38  * 1. Redistributions of source code must retain the above copyright
39  *    notice, this list of conditions and the following disclaimer.
40  * 2. Redistributions in binary form must reproduce the above copyright
41  *    notice, this list of conditions and the following disclaimer in the
42  *    documentation and/or other materials provided with the distribution.
43  * 3. Neither the name of the project nor the names of its contributors
44  *    may be used to endorse or promote products derived from this software
45  *    without specific prior written permission.
46  *
47  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
48  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
51  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57  * SUCH DAMAGE.
58  */
59 /*
60  * My grandfather said that there's a devil inside tunnelling technology...
61  *
62  * We have surprisingly many protocols that want packets with IP protocol
63  * #4 or #41.  Here's a list of protocols that want protocol #41:
64  *	RFC1933 configured tunnel
65  *	RFC1933 automatic tunnel
66  *	RFC2401 IPsec tunnel
67  *	RFC2473 IPv6 generic packet tunnelling
68  *	RFC2529 6over4 tunnel
69  *	mobile-ip6 (uses RFC2473)
70  *	6to4 tunnel
71  * Here's a list of protocol that want protocol #4:
72  *	RFC1853 IPv4-in-IPv4 tunnelling
73  *	RFC2003 IPv4 encapsulation within IPv4
74  *	RFC2344 reverse tunnelling for mobile-ip4
75  *	RFC2401 IPsec tunnel
76  * Well, what can I say.  They impose different en/decapsulation mechanism
77  * from each other, so they need separate protocol handler.  The only one
78  * we can easily determine by protocol # is IPsec, which always has
79  * AH/ESP header right after outer IP header.
80  *
81  * So, clearly good old protosw does not work for protocol #4 and #41.
82  * The code will let you match protocol via src/dst address pair.
83  */
84 /* XXX is M_NETADDR correct? */
85 
86 #include <sys/param.h>
87 #include <sys/systm.h>
88 #include <sys/socket.h>
89 #include <sys/sockio.h>
90 #include <sys/mbuf.h>
91 #include <sys/mcache.h>
92 #include <sys/errno.h>
93 #include <sys/domain.h>
94 #include <sys/protosw.h>
95 #include <sys/queue.h>
96 
97 #include <net/if.h>
98 #include <net/route.h>
99 
100 #include <netinet/in.h>
101 #include <netinet/in_systm.h>
102 #include <netinet/ip.h>
103 #include <netinet/ip_var.h>
104 #include <netinet/ip_encap.h>
105 
106 #include <netinet/ip6.h>
107 #include <netinet6/ip6_var.h>
108 #include <netinet6/ip6protosw.h>
109 
110 #include <net/net_osdep.h>
111 
112 #ifndef __APPLE__
113 #include <sys/kernel.h>
114 #include <sys/malloc.h>
115 MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure");
116 #endif
117 
118 static void encap_add_locked(struct encaptab *);
119 static int mask_match(const struct encaptab *, const struct sockaddr *,
120     const struct sockaddr *);
121 static void encap_fillarg(struct mbuf *, void *arg);
122 
123 LIST_HEAD(, encaptab) encaptab = LIST_HEAD_INITIALIZER(&encaptab);
124 
125 static LCK_GRP_DECLARE(encaptab_lock_grp, "encaptab lock");
126 static LCK_RW_DECLARE(encaptab_lock, &encaptab_lock_grp);
127 
128 #if INET
129 void
encap4_input(struct mbuf * m,int off)130 encap4_input(struct mbuf *m, int off)
131 {
132 	int proto;
133 	struct ip *ip;
134 	struct sockaddr_in s, d;
135 	const struct protosw *psw;
136 	struct encaptab *ep, *match;
137 	int prio, matchprio;
138 	void *match_arg = NULL;
139 
140 #ifndef __APPLE__
141 	va_start(ap, m);
142 	off = va_arg(ap, int);
143 	proto = va_arg(ap, int);
144 	va_end(ap);
145 #endif
146 
147 	/* Expect 32-bit aligned data pointer on strict-align platforms */
148 	MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
149 
150 	ip = mtod(m, struct ip *);
151 #ifdef __APPLE__
152 	proto = ip->ip_p;
153 #endif
154 
155 	bzero(&s, sizeof(s));
156 	s.sin_family = AF_INET;
157 	s.sin_len = sizeof(struct sockaddr_in);
158 	s.sin_addr = ip->ip_src;
159 	bzero(&d, sizeof(d));
160 	d.sin_family = AF_INET;
161 	d.sin_len = sizeof(struct sockaddr_in);
162 	d.sin_addr = ip->ip_dst;
163 
164 	match = NULL;
165 	matchprio = 0;
166 
167 	lck_rw_lock_shared(&encaptab_lock);
168 	for (ep = LIST_FIRST(&encaptab); ep; ep = LIST_NEXT(ep, chain)) {
169 		if (ep->af != AF_INET) {
170 			continue;
171 		}
172 		if (ep->proto >= 0 && ep->proto != proto) {
173 			continue;
174 		}
175 		if (ep->func) {
176 			prio = (*ep->func)(m, off, proto, ep->arg);
177 		} else {
178 			/*
179 			 * it's inbound traffic, we need to match in reverse
180 			 * order
181 			 */
182 			prio = mask_match(ep, (struct sockaddr *)&d,
183 			    (struct sockaddr *)&s);
184 		}
185 
186 		/*
187 		 * We prioritize the matches by using bit length of the
188 		 * matches.  mask_match() and user-supplied matching function
189 		 * should return the bit length of the matches (for example,
190 		 * if both src/dst are matched for IPv4, 64 should be returned).
191 		 * 0 or negative return value means "it did not match".
192 		 *
193 		 * The question is, since we have two "mask" portion, we
194 		 * cannot really define total order between entries.
195 		 * For example, which of these should be preferred?
196 		 * mask_match() returns 48 (32 + 16) for both of them.
197 		 *	src=3ffe::/16, dst=3ffe:501::/32
198 		 *	src=3ffe:501::/32, dst=3ffe::/16
199 		 *
200 		 * We need to loop through all the possible candidates
201 		 * to get the best match - the search takes O(n) for
202 		 * n attachments (i.e. interfaces).
203 		 */
204 		if (prio <= 0) {
205 			continue;
206 		}
207 		if (prio > matchprio) {
208 			matchprio = prio;
209 			match = ep;
210 			psw = (const struct protosw *)match->psw;
211 			match_arg = ep->arg;
212 		}
213 	}
214 	lck_rw_unlock_shared(&encaptab_lock);
215 
216 	if (match) {
217 		/* found a match, "match" has the best one */
218 		if (psw && psw->pr_input) {
219 			encap_fillarg(m, match_arg);
220 			(*psw->pr_input)(m, off);
221 		} else {
222 			m_freem(m);
223 		}
224 		return;
225 	}
226 
227 	/* last resort: inject to raw socket */
228 	rip_input(m, off);
229 }
230 #endif
231 
232 int
encap6_input(struct mbuf ** mp,int * offp,int proto)233 encap6_input(struct mbuf **mp, int *offp, int proto)
234 {
235 	struct mbuf *m = *mp;
236 	struct ip6_hdr *ip6;
237 	struct sockaddr_in6 s, d;
238 	const struct ip6protosw *psw;
239 	struct encaptab *ep, *match;
240 	int prio, matchprio;
241 	void *match_arg = NULL;
242 
243 	/* Expect 32-bit aligned data pointer on strict-align platforms */
244 	MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
245 
246 	ip6 = mtod(m, struct ip6_hdr *);
247 	bzero(&s, sizeof(s));
248 	s.sin6_family = AF_INET6;
249 	s.sin6_len = sizeof(struct sockaddr_in6);
250 	s.sin6_addr = ip6->ip6_src;
251 	bzero(&d, sizeof(d));
252 	d.sin6_family = AF_INET6;
253 	d.sin6_len = sizeof(struct sockaddr_in6);
254 	d.sin6_addr = ip6->ip6_dst;
255 
256 	match = NULL;
257 	matchprio = 0;
258 
259 	lck_rw_lock_shared(&encaptab_lock);
260 	for (ep = LIST_FIRST(&encaptab); ep; ep = LIST_NEXT(ep, chain)) {
261 		if (ep->af != AF_INET6) {
262 			continue;
263 		}
264 		if (ep->proto >= 0 && ep->proto != proto) {
265 			continue;
266 		}
267 		if (ep->func) {
268 			prio = (*ep->func)(m, *offp, proto, ep->arg);
269 		} else {
270 			/*
271 			 * it's inbound traffic, we need to match in reverse
272 			 * order
273 			 */
274 			prio = mask_match(ep, (struct sockaddr *)&d,
275 			    (struct sockaddr *)&s);
276 		}
277 
278 		/* see encap4_input() for issues here */
279 		if (prio <= 0) {
280 			continue;
281 		}
282 		if (prio > matchprio) {
283 			matchprio = prio;
284 			match = ep;
285 			psw = (const struct ip6protosw *)match->psw;
286 			match_arg = ep->arg;
287 		}
288 	}
289 	lck_rw_unlock_shared(&encaptab_lock);
290 
291 	if (match) {
292 		/* found a match */
293 		if (psw && psw->pr_input) {
294 			encap_fillarg(m, match_arg);
295 			return (*psw->pr_input)(mp, offp, proto);
296 		} else {
297 			m_freem(m);
298 			return IPPROTO_DONE;
299 		}
300 	}
301 
302 	/* last resort: inject to raw socket */
303 	return rip6_input(mp, offp, proto);
304 }
305 
306 static void
encap_add_locked(struct encaptab * ep)307 encap_add_locked(struct encaptab *ep)
308 {
309 	LCK_RW_ASSERT(&encaptab_lock, LCK_RW_ASSERT_EXCLUSIVE);
310 	LIST_INSERT_HEAD(&encaptab, ep, chain);
311 }
312 
313 /*
314  * sp (src ptr) is always my side, and dp (dst ptr) is always remote side.
315  * length of mask (sm and dm) is assumed to be same as sp/dp.
316  * Return value will be necessary as input (cookie) for encap_detach().
317  */
318 const struct encaptab *
encap_attach(int af,int proto,const struct sockaddr * sp,const struct sockaddr * sm,const struct sockaddr * dp,const struct sockaddr * dm,const struct protosw * psw,void * arg)319 encap_attach(int af, int proto, const struct sockaddr *sp,
320     const struct sockaddr *sm, const struct sockaddr *dp,
321     const struct sockaddr *dm, const struct protosw *psw, void *arg)
322 {
323 	struct encaptab *ep = NULL;
324 	struct encaptab *new_ep = NULL;
325 	int error;
326 
327 	/* sanity check on args */
328 	if (sp->sa_len > sizeof(new_ep->src) || dp->sa_len > sizeof(new_ep->dst)) {
329 		error = EINVAL;
330 		goto fail;
331 	}
332 	if (sp->sa_len != dp->sa_len) {
333 		error = EINVAL;
334 		goto fail;
335 	}
336 	if (af != sp->sa_family || af != dp->sa_family) {
337 		error = EINVAL;
338 		goto fail;
339 	}
340 
341 	new_ep = kalloc_type(struct encaptab, Z_WAITOK | Z_ZERO | Z_NOFAIL);
342 
343 	/* check if anyone have already attached with exactly same config */
344 	lck_rw_lock_exclusive(&encaptab_lock);
345 	for (ep = LIST_FIRST(&encaptab); ep; ep = LIST_NEXT(ep, chain)) {
346 		if (ep->af != af) {
347 			continue;
348 		}
349 		if (ep->proto != proto) {
350 			continue;
351 		}
352 		if (ep->src.ss_len != sp->sa_len ||
353 		    bcmp(&ep->src, sp, sp->sa_len) != 0 ||
354 		    bcmp(&ep->srcmask, sm, sp->sa_len) != 0) {
355 			continue;
356 		}
357 		if (ep->dst.ss_len != dp->sa_len ||
358 		    bcmp(&ep->dst, dp, dp->sa_len) != 0 ||
359 		    bcmp(&ep->dstmask, dm, dp->sa_len) != 0) {
360 			continue;
361 		}
362 
363 		error = EEXIST;
364 		goto fail_locked;
365 	}
366 
367 	new_ep->af = af;
368 	new_ep->proto = proto;
369 	bcopy(sp, &new_ep->src, sp->sa_len);
370 	bcopy(sm, &new_ep->srcmask, sp->sa_len);
371 	bcopy(dp, &new_ep->dst, dp->sa_len);
372 	bcopy(dm, &new_ep->dstmask, dp->sa_len);
373 	new_ep->psw = psw;
374 	new_ep->arg = arg;
375 
376 	encap_add_locked(new_ep);
377 	lck_rw_unlock_exclusive(&encaptab_lock);
378 
379 	error = 0;
380 	return new_ep;
381 
382 fail_locked:
383 	lck_rw_unlock_exclusive(&encaptab_lock);
384 	if (new_ep != NULL) {
385 		kfree_type(struct encaptab, new_ep);
386 	}
387 fail:
388 	return NULL;
389 }
390 
391 const struct encaptab *
encap_attach_func(int af,int proto,int (* func)(const struct mbuf *,int,int,void *),const struct protosw * psw,void * arg)392 encap_attach_func( int af, int proto,
393     int (*func)(const struct mbuf *, int, int, void *),
394     const struct protosw *psw, void *arg)
395 {
396 	struct encaptab *ep;
397 	int error;
398 
399 	/* sanity check on args */
400 	if (!func) {
401 		error = EINVAL;
402 		goto fail;
403 	}
404 
405 	ep = kalloc_type(struct encaptab, Z_WAITOK | Z_ZERO | Z_NOFAIL); /* XXX */
406 
407 	ep->af = af;
408 	ep->proto = proto;
409 	ep->func = func;
410 	ep->psw = psw;
411 	ep->arg = arg;
412 
413 	lck_rw_lock_exclusive(&encaptab_lock);
414 	encap_add_locked(ep);
415 	lck_rw_unlock_exclusive(&encaptab_lock);
416 
417 	error = 0;
418 	return ep;
419 
420 fail:
421 	return NULL;
422 }
423 
424 int
encap_detach(const struct encaptab * cookie)425 encap_detach(const struct encaptab *cookie)
426 {
427 	const struct encaptab *ep = cookie;
428 	struct encaptab *p;
429 
430 	lck_rw_lock_exclusive(&encaptab_lock);
431 	for (p = LIST_FIRST(&encaptab); p; p = LIST_NEXT(p, chain)) {
432 		if (p == ep) {
433 			LIST_REMOVE(p, chain);
434 			lck_rw_unlock_exclusive(&encaptab_lock);
435 			kfree_type(struct encaptab, p);    /*XXX*/
436 			return 0;
437 		}
438 	}
439 	lck_rw_unlock_exclusive(&encaptab_lock);
440 
441 	return EINVAL;
442 }
443 
444 static int
mask_match(const struct encaptab * ep,const struct sockaddr * sp,const struct sockaddr * dp)445 mask_match(const struct encaptab *ep, const struct sockaddr *sp,
446     const struct sockaddr *dp)
447 {
448 	struct sockaddr_storage s;
449 	struct sockaddr_storage d;
450 	int i;
451 	const u_int8_t *p, *q;
452 	u_int8_t *r;
453 	int matchlen;
454 
455 	if (sp->sa_len > sizeof(s) || dp->sa_len > sizeof(d)) {
456 		return 0;
457 	}
458 	if (sp->sa_family != ep->af || dp->sa_family != ep->af) {
459 		return 0;
460 	}
461 	if (sp->sa_len != ep->src.ss_len || dp->sa_len != ep->dst.ss_len) {
462 		return 0;
463 	}
464 
465 	matchlen = 0;
466 
467 	p = (const u_int8_t *)sp;
468 	q = (const u_int8_t *)&ep->srcmask;
469 	r = (u_int8_t *)&s;
470 	for (i = 0; i < sp->sa_len; i++) {
471 		r[i] = p[i] & q[i];
472 		/* XXX estimate */
473 		matchlen += (q[i] ? 8 : 0);
474 	}
475 
476 	p = (const u_int8_t *)dp;
477 	q = (const u_int8_t *)&ep->dstmask;
478 	r = (u_int8_t *)&d;
479 	for (i = 0; i < dp->sa_len; i++) {
480 		r[i] = p[i] & q[i];
481 		/* XXX rough estimate */
482 		matchlen += (q[i] ? 8 : 0);
483 	}
484 
485 	/* need to overwrite len/family portion as we don't compare them */
486 	s.ss_len = sp->sa_len;
487 	s.ss_family = sp->sa_family;
488 	d.ss_len = dp->sa_len;
489 	d.ss_family = dp->sa_family;
490 
491 	if (bcmp(&s, &ep->src, ep->src.ss_len) == 0 &&
492 	    bcmp(&d, &ep->dst, ep->dst.ss_len) == 0) {
493 		return matchlen;
494 	} else {
495 		return 0;
496 	}
497 }
498 
499 struct encaptabtag {
500 	void*                   *arg;
501 };
502 
503 static void
encap_fillarg(struct mbuf * m,void * arg)504 encap_fillarg(
505 	struct mbuf *m,
506 	void *arg)
507 {
508 	struct m_tag    *tag;
509 	struct encaptabtag *et;
510 
511 	tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_ENCAP,
512 	    sizeof(struct encaptabtag), M_WAITOK, m);
513 
514 	if (tag != NULL) {
515 		et = (struct encaptabtag*)(tag->m_tag_data);
516 		et->arg = arg;
517 		m_tag_prepend(m, tag);
518 	}
519 }
520 
521 void *
encap_getarg(struct mbuf * m)522 encap_getarg(struct mbuf *m)
523 {
524 	struct m_tag    *tag;
525 	struct encaptabtag *et;
526 	void *p = NULL;
527 
528 	tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_ENCAP);
529 	if (tag) {
530 		et = (struct encaptabtag*)(tag->m_tag_data);
531 		p = et->arg;
532 		m_tag_delete(m, tag);
533 	}
534 
535 	return p;
536 }
537 
538 struct encaptab_tag_container {
539 	struct m_tag            encaptab_m_tag;
540 	struct encaptabtag      encaptab_tag;
541 };
542 
543 static struct m_tag *
m_tag_kalloc_encap(u_int32_t id,u_int16_t type,uint16_t len,int wait)544 m_tag_kalloc_encap(u_int32_t id, u_int16_t type, uint16_t len, int wait)
545 {
546 	struct encaptab_tag_container *tag_container;
547 	struct m_tag *tag = NULL;
548 
549 	assert3u(id, ==, KERNEL_MODULE_TAG_ID);
550 	assert3u(type, ==, KERNEL_TAG_TYPE_ENCAP);
551 	assert3u(len, ==, sizeof(struct encaptabtag));
552 
553 	if (len != sizeof(struct encaptabtag)) {
554 		return NULL;
555 	}
556 
557 	tag_container = kalloc_type(struct encaptab_tag_container, wait | M_ZERO);
558 	if (tag_container != NULL) {
559 		tag =  &tag_container->encaptab_m_tag;
560 
561 		assert3p(tag, ==, tag_container);
562 
563 		M_TAG_INIT(tag, id, type, len, &tag_container->encaptab_tag, NULL);
564 	}
565 
566 	return tag;
567 }
568 
569 static void
m_tag_kfree_encap(struct m_tag * tag)570 m_tag_kfree_encap(struct m_tag *tag)
571 {
572 	struct encaptab_tag_container *tag_container = (struct encaptab_tag_container *)tag;
573 
574 	assert3u(tag->m_tag_len, ==, sizeof(struct encaptabtag));
575 
576 	kfree_type(struct encaptab_tag_container, tag_container);
577 }
578 
579 void
encap_register_m_tag(void)580 encap_register_m_tag(void)
581 {
582 	int error;
583 
584 	error = m_register_internal_tag_type(KERNEL_TAG_TYPE_ENCAP, sizeof(struct encaptabtag),
585 	    m_tag_kalloc_encap, m_tag_kfree_encap);
586 
587 	assert3u(error, ==, 0);
588 }
589