xref: /xnu-8019.80.24/bsd/netinet/mp_pcb.c (revision a325d9c4a84054e40bbe985afedcb50ab80993ea)
1 /*
2  * Copyright (c) 2012-2017 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/kernel.h>
32 #include <sys/mbuf.h>
33 #include <sys/mcache.h>
34 #include <sys/syslog.h>
35 #include <sys/socket.h>
36 #include <sys/socketvar.h>
37 #include <sys/protosw.h>
38 #include <sys/proc_internal.h>
39 
40 #include <mach/boolean.h>
41 #include <kern/zalloc.h>
42 #include <kern/locks.h>
43 
44 #include <netinet/mp_pcb.h>
45 #include <netinet/mptcp_var.h>
46 #include <netinet6/in6_pcb.h>
47 
48 static LCK_GRP_DECLARE(mp_lock_grp, "multipath");
49 static LCK_ATTR_DECLARE(mp_lock_attr, 0, 0);
50 static LCK_MTX_DECLARE_ATTR(mp_lock, &mp_lock_grp, &mp_lock_attr);
51 static LCK_MTX_DECLARE_ATTR(mp_timeout_lock, &mp_lock_grp, &mp_lock_attr);
52 
53 static TAILQ_HEAD(, mppcbinfo) mppi_head = TAILQ_HEAD_INITIALIZER(mppi_head);
54 
55 static boolean_t mp_timeout_run;        /* MP timer is scheduled to run */
56 static boolean_t mp_garbage_collecting;
57 static boolean_t mp_ticking;
58 static void mp_sched_timeout(void);
59 static void mp_timeout(void *);
60 
61 static void
mpp_lock_assert_held(struct mppcb * mp)62 mpp_lock_assert_held(struct mppcb *mp)
63 {
64 #if !MACH_ASSERT
65 #pragma unused(mp)
66 #endif
67 	LCK_MTX_ASSERT(&mp->mpp_lock, LCK_MTX_ASSERT_OWNED);
68 }
69 
70 static void
mp_timeout(void * arg)71 mp_timeout(void *arg)
72 {
73 #pragma unused(arg)
74 	struct mppcbinfo *mppi;
75 	boolean_t t, gc;
76 	uint32_t t_act = 0;
77 	uint32_t gc_act = 0;
78 
79 	/*
80 	 * Update coarse-grained networking timestamp (in sec.); the idea
81 	 * is to piggy-back on the timeout callout to update the counter
82 	 * returnable via net_uptime().
83 	 */
84 	net_update_uptime();
85 
86 	lck_mtx_lock_spin(&mp_timeout_lock);
87 	gc = mp_garbage_collecting;
88 	mp_garbage_collecting = FALSE;
89 
90 	t = mp_ticking;
91 	mp_ticking = FALSE;
92 
93 	if (gc || t) {
94 		lck_mtx_unlock(&mp_timeout_lock);
95 
96 		lck_mtx_lock(&mp_lock);
97 		TAILQ_FOREACH(mppi, &mppi_head, mppi_entry) {
98 			if ((gc && mppi->mppi_gc != NULL) ||
99 			    (t && mppi->mppi_timer != NULL)) {
100 				lck_mtx_lock(&mppi->mppi_lock);
101 				if (gc && mppi->mppi_gc != NULL) {
102 					gc_act += mppi->mppi_gc(mppi);
103 				}
104 				if (t && mppi->mppi_timer != NULL) {
105 					t_act += mppi->mppi_timer(mppi);
106 				}
107 				lck_mtx_unlock(&mppi->mppi_lock);
108 			}
109 		}
110 		lck_mtx_unlock(&mp_lock);
111 
112 		lck_mtx_lock_spin(&mp_timeout_lock);
113 	}
114 
115 	/* lock was dropped above, so check first before overriding */
116 	if (!mp_garbage_collecting) {
117 		mp_garbage_collecting = (gc_act != 0);
118 	}
119 	if (!mp_ticking) {
120 		mp_ticking = (t_act != 0);
121 	}
122 
123 	/* re-arm the timer if there's work to do */
124 	mp_timeout_run = FALSE;
125 	mp_sched_timeout();
126 	lck_mtx_unlock(&mp_timeout_lock);
127 }
128 
129 static void
mp_sched_timeout(void)130 mp_sched_timeout(void)
131 {
132 	LCK_MTX_ASSERT(&mp_timeout_lock, LCK_MTX_ASSERT_OWNED);
133 
134 	if (!mp_timeout_run && (mp_garbage_collecting || mp_ticking)) {
135 		lck_mtx_convert_spin(&mp_timeout_lock);
136 		mp_timeout_run = TRUE;
137 		timeout(mp_timeout, NULL, hz);
138 	}
139 }
140 
141 void
mp_gc_sched(void)142 mp_gc_sched(void)
143 {
144 	lck_mtx_lock_spin(&mp_timeout_lock);
145 	mp_garbage_collecting = TRUE;
146 	mp_sched_timeout();
147 	lck_mtx_unlock(&mp_timeout_lock);
148 }
149 
150 void
mptcp_timer_sched(void)151 mptcp_timer_sched(void)
152 {
153 	lck_mtx_lock_spin(&mp_timeout_lock);
154 	mp_ticking = TRUE;
155 	mp_sched_timeout();
156 	lck_mtx_unlock(&mp_timeout_lock);
157 }
158 
159 void
mp_pcbinfo_attach(struct mppcbinfo * mppi)160 mp_pcbinfo_attach(struct mppcbinfo *mppi)
161 {
162 	struct mppcbinfo *mppi0;
163 
164 	lck_mtx_lock(&mp_lock);
165 	TAILQ_FOREACH(mppi0, &mppi_head, mppi_entry) {
166 		if (mppi0 == mppi) {
167 			panic("%s: mppi %p already in the list",
168 			    __func__, mppi);
169 			/* NOTREACHED */
170 		}
171 	}
172 	TAILQ_INSERT_TAIL(&mppi_head, mppi, mppi_entry);
173 	lck_mtx_unlock(&mp_lock);
174 }
175 
176 int
mp_pcbinfo_detach(struct mppcbinfo * mppi)177 mp_pcbinfo_detach(struct mppcbinfo *mppi)
178 {
179 	struct mppcbinfo *mppi0;
180 	int error = 0;
181 
182 	lck_mtx_lock(&mp_lock);
183 	TAILQ_FOREACH(mppi0, &mppi_head, mppi_entry) {
184 		if (mppi0 == mppi) {
185 			break;
186 		}
187 	}
188 	if (mppi0 != NULL) {
189 		TAILQ_REMOVE(&mppi_head, mppi0, mppi_entry);
190 	} else {
191 		error = ENXIO;
192 	}
193 	lck_mtx_unlock(&mp_lock);
194 
195 	return error;
196 }
197 
198 int
mp_pcballoc(struct socket * so,struct mppcbinfo * mppi)199 mp_pcballoc(struct socket *so, struct mppcbinfo *mppi)
200 {
201 	struct mppcb *mpp = NULL;
202 	int error;
203 
204 	VERIFY(mpsotomppcb(so) == NULL);
205 
206 	mpp = zalloc_flags(mppi->mppi_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
207 
208 	lck_mtx_init(&mpp->mpp_lock, mppi->mppi_lock_grp, &mppi->mppi_lock_attr);
209 	mpp->mpp_pcbinfo = mppi;
210 	mpp->mpp_state = MPPCB_STATE_INUSE;
211 	mpp->mpp_socket = so;
212 	so->so_pcb = mpp;
213 
214 	error = mptcp_session_create(mpp);
215 	if (error) {
216 		lck_mtx_destroy(&mpp->mpp_lock, mppi->mppi_lock_grp);
217 		zfree(mppi->mppi_zone, mpp);
218 		return error;
219 	}
220 
221 	lck_mtx_lock(&mppi->mppi_lock);
222 	mpp->mpp_flags |= MPP_ATTACHED;
223 	TAILQ_INSERT_TAIL(&mppi->mppi_pcbs, mpp, mpp_entry);
224 	mppi->mppi_count++;
225 
226 	lck_mtx_unlock(&mppi->mppi_lock);
227 
228 	return 0;
229 }
230 
231 void
mp_pcbdetach(struct socket * mp_so)232 mp_pcbdetach(struct socket *mp_so)
233 {
234 	struct mppcb *mpp = mpsotomppcb(mp_so);
235 
236 	mpp->mpp_state = MPPCB_STATE_DEAD;
237 
238 	mp_gc_sched();
239 }
240 
241 void
mptcp_pcbdispose(struct mppcb * mpp)242 mptcp_pcbdispose(struct mppcb *mpp)
243 {
244 	struct mppcbinfo *mppi = mpp->mpp_pcbinfo;
245 	struct socket *mp_so = mpp->mpp_socket;
246 
247 	VERIFY(mppi != NULL);
248 
249 	LCK_MTX_ASSERT(&mppi->mppi_lock, LCK_MTX_ASSERT_OWNED);
250 	mpp_lock_assert_held(mpp);
251 
252 	VERIFY(mpp->mpp_state == MPPCB_STATE_DEAD);
253 	VERIFY(mpp->mpp_flags & MPP_ATTACHED);
254 
255 	mpp->mpp_flags &= ~MPP_ATTACHED;
256 	TAILQ_REMOVE(&mppi->mppi_pcbs, mpp, mpp_entry);
257 	VERIFY(mppi->mppi_count != 0);
258 	mppi->mppi_count--;
259 
260 	if (mppi->mppi_count == 0) {
261 		if (mptcp_cellicon_refcount) {
262 			os_log_error(mptcp_log_handle, "%s: No more MPTCP-flows, but cell icon counter is %u\n",
263 			    __func__, mptcp_cellicon_refcount);
264 			mptcp_clear_cellicon();
265 			mptcp_cellicon_refcount = 0;
266 		}
267 	}
268 
269 	VERIFY(mpp->mpp_inside == 0);
270 	mpp_unlock(mpp);
271 
272 #if NECP
273 	necp_mppcb_dispose(mpp);
274 #endif /* NECP */
275 
276 	sofreelastref(mp_so, 0);
277 	if (mp_so->so_rcv.sb_cc > 0 || mp_so->so_snd.sb_cc > 0) {
278 		/*
279 		 * selthreadclear() already called
280 		 * during sofreelastref() above.
281 		 */
282 		sbrelease(&mp_so->so_rcv);
283 		sbrelease(&mp_so->so_snd);
284 	}
285 
286 	lck_mtx_destroy(&mpp->mpp_lock, mppi->mppi_lock_grp);
287 
288 	VERIFY(mpp->mpp_socket != NULL);
289 	VERIFY(mpp->mpp_socket->so_usecount == 0);
290 	mpp->mpp_socket->so_pcb = NULL;
291 	mpp->mpp_socket = NULL;
292 
293 	zfree(mppi->mppi_zone, mpp);
294 }
295 
296 static int
mp_getaddr_v4(struct socket * mp_so,struct sockaddr ** nam,boolean_t peer)297 mp_getaddr_v4(struct socket *mp_so, struct sockaddr **nam, boolean_t peer)
298 {
299 	struct mptses *mpte = mpsotompte(mp_so);
300 	struct sockaddr_in *sin;
301 
302 	/*
303 	 * Do the malloc first in case it blocks.
304 	 */
305 	sin = (struct sockaddr_in *)alloc_sockaddr(sizeof(*sin),
306 	    Z_WAITOK | Z_NOFAIL);
307 
308 	sin->sin_family = AF_INET;
309 
310 	if (!peer) {
311 		sin->sin_port = mpte->__mpte_src_v4.sin_port;
312 		sin->sin_addr = mpte->__mpte_src_v4.sin_addr;
313 	} else {
314 		sin->sin_port = mpte->__mpte_dst_v4.sin_port;
315 		sin->sin_addr = mpte->__mpte_dst_v4.sin_addr;
316 	}
317 
318 	*nam = (struct sockaddr *)sin;
319 	return 0;
320 }
321 
322 static int
mp_getaddr_v6(struct socket * mp_so,struct sockaddr ** nam,boolean_t peer)323 mp_getaddr_v6(struct socket *mp_so, struct sockaddr **nam, boolean_t peer)
324 {
325 	struct mptses *mpte = mpsotompte(mp_so);
326 	struct in6_addr addr;
327 	in_port_t port;
328 	uint32_t ifscope;
329 
330 	if (!peer) {
331 		port = mpte->__mpte_src_v6.sin6_port;
332 		addr = mpte->__mpte_src_v6.sin6_addr;
333 		ifscope = mpte->__mpte_src_v6.sin6_scope_id;
334 	} else {
335 		port = mpte->__mpte_dst_v6.sin6_port;
336 		addr = mpte->__mpte_dst_v6.sin6_addr;
337 		ifscope = mpte->__mpte_dst_v6.sin6_scope_id;
338 	}
339 
340 	*nam = in6_sockaddr(port, &addr, ifscope);
341 	if (*nam == NULL) {
342 		return ENOBUFS;
343 	}
344 
345 	return 0;
346 }
347 
348 int
mp_getsockaddr(struct socket * mp_so,struct sockaddr ** nam)349 mp_getsockaddr(struct socket *mp_so, struct sockaddr **nam)
350 {
351 	struct mptses *mpte = mpsotompte(mp_so);
352 
353 	if (mpte->mpte_src.sa_family == AF_INET || mpte->mpte_src.sa_family == 0) {
354 		return mp_getaddr_v4(mp_so, nam, false);
355 	} else if (mpte->mpte_src.sa_family == AF_INET6) {
356 		return mp_getaddr_v6(mp_so, nam, false);
357 	} else {
358 		return EINVAL;
359 	}
360 }
361 
362 int
mp_getpeeraddr(struct socket * mp_so,struct sockaddr ** nam)363 mp_getpeeraddr(struct socket *mp_so, struct sockaddr **nam)
364 {
365 	struct mptses *mpte = mpsotompte(mp_so);
366 
367 	if (mpte->mpte_src.sa_family == AF_INET || mpte->mpte_src.sa_family == 0) {
368 		return mp_getaddr_v4(mp_so, nam, true);
369 	} else if (mpte->mpte_src.sa_family == AF_INET6) {
370 		return mp_getaddr_v6(mp_so, nam, true);
371 	} else {
372 		return EINVAL;
373 	}
374 }
375