xref: /xnu-8796.121.2/bsd/netinet/mptcp_usrreq.c (revision c54f35ca767986246321eb901baf8f5ff7923f6a)
1 /*
2  * Copyright (c) 2012-2020 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/kernel.h>
32 #include <sys/socket.h>
33 #include <sys/socketvar.h>
34 #include <sys/protosw.h>
35 #include <sys/mcache.h>
36 #include <sys/syslog.h>
37 #include <sys/proc.h>
38 #include <sys/proc_internal.h>
39 #include <sys/resourcevar.h>
40 #include <sys/kauth.h>
41 #include <sys/priv.h>
42 
43 #include <net/if.h>
44 #include <netinet/in.h>
45 #include <netinet/in_var.h>
46 #include <netinet/tcp.h>
47 #include <netinet/tcp_fsm.h>
48 #include <netinet/tcp_seq.h>
49 #include <netinet/tcp_var.h>
50 #include <netinet/tcp_timer.h>
51 #include <netinet/mptcp.h>
52 #include <netinet/mptcp_var.h>
53 #include <netinet/mptcp_timer.h>
54 
55 #include <mach/sdt.h>
56 
57 static int mptcp_usr_attach(struct socket *, int, struct proc *);
58 static int mptcp_usr_detach(struct socket *);
59 static int mptcp_attach(struct socket *, struct proc *);
60 static int mptcp_usr_connectx(struct socket *, struct sockaddr *,
61     struct sockaddr *, struct proc *, uint32_t, sae_associd_t,
62     sae_connid_t *, uint32_t, void *, uint32_t, struct uio *, user_ssize_t *);
63 static int mptcp_getassocids(struct mptses *, uint32_t *, user_addr_t);
64 static int mptcp_getconnids(struct mptses *, sae_associd_t, uint32_t *,
65     user_addr_t);
66 static int mptcp_getconninfo(struct mptses *, sae_connid_t *, uint32_t *,
67     uint32_t *, int32_t *, user_addr_t, socklen_t *, user_addr_t, socklen_t *,
68     uint32_t *, user_addr_t, uint32_t *);
69 static int mptcp_usr_control(struct socket *, u_long, caddr_t, struct ifnet *,
70     struct proc *);
71 static int mptcp_disconnect(struct mptses *);
72 static int mptcp_usr_disconnect(struct socket *);
73 static int mptcp_usr_disconnectx(struct socket *, sae_associd_t, sae_connid_t);
74 static struct mptses *mptcp_usrclosed(struct mptses *);
75 static int mptcp_usr_rcvd(struct socket *, int);
76 static int mptcp_usr_send(struct socket *, int, struct mbuf *,
77     struct sockaddr *, struct mbuf *, struct proc *);
78 static int mptcp_usr_shutdown(struct socket *);
79 static int mptcp_usr_sosend(struct socket *, struct sockaddr *, struct uio *,
80     struct mbuf *, struct mbuf *, int);
81 static int mptcp_usr_socheckopt(struct socket *, struct sockopt *);
82 static int mptcp_usr_preconnect(struct socket *so);
83 
84 struct pr_usrreqs mptcp_usrreqs = {
85 	.pru_attach =           mptcp_usr_attach,
86 	.pru_connectx =         mptcp_usr_connectx,
87 	.pru_control =          mptcp_usr_control,
88 	.pru_detach =           mptcp_usr_detach,
89 	.pru_disconnect =       mptcp_usr_disconnect,
90 	.pru_disconnectx =      mptcp_usr_disconnectx,
91 	.pru_peeraddr =         mp_getpeeraddr,
92 	.pru_rcvd =             mptcp_usr_rcvd,
93 	.pru_send =             mptcp_usr_send,
94 	.pru_shutdown =         mptcp_usr_shutdown,
95 	.pru_sockaddr =         mp_getsockaddr,
96 	.pru_sosend =           mptcp_usr_sosend,
97 	.pru_soreceive =        soreceive,
98 	.pru_socheckopt =       mptcp_usr_socheckopt,
99 	.pru_preconnect =       mptcp_usr_preconnect,
100 };
101 
102 
103 int mptcp_developer_mode = 0;
104 SYSCTL_INT(_net_inet_mptcp, OID_AUTO, allow_aggregate, CTLFLAG_RW | CTLFLAG_LOCKED,
105     &mptcp_developer_mode, 0, "Allow the Multipath aggregation mode");
106 
107 int mptcp_no_first_party = 0;
108 SYSCTL_INT(_net_inet_mptcp, OID_AUTO, no_first_party, CTLFLAG_RW | CTLFLAG_LOCKED,
109     &mptcp_no_first_party, 0, "Do not do first-party app exemptions");
110 
111 static unsigned long mptcp_expected_progress_headstart = 5000;
112 SYSCTL_ULONG(_net_inet_mptcp, OID_AUTO, expected_progress_headstart, CTLFLAG_RW | CTLFLAG_LOCKED,
113     &mptcp_expected_progress_headstart, "Headstart to give MPTCP before meeting the progress deadline");
114 
115 
116 /*
117  * Attaches an MPTCP control block to a socket.
118  */
119 static int
mptcp_usr_attach(struct socket * mp_so,int proto,struct proc * p)120 mptcp_usr_attach(struct socket *mp_so, int proto, struct proc *p)
121 {
122 #pragma unused(proto)
123 	int error;
124 
125 	VERIFY(mpsotomppcb(mp_so) == NULL);
126 
127 	error = mptcp_attach(mp_so, p);
128 	if (error) {
129 		goto out;
130 	}
131 
132 	if ((mp_so->so_options & SO_LINGER) && mp_so->so_linger == 0) {
133 		mp_so->so_linger = (short)(TCP_LINGERTIME * hz);
134 	}
135 out:
136 	return error;
137 }
138 
139 /*
140  * Detaches an MPTCP control block from a socket.
141  */
142 static int
mptcp_usr_detach(struct socket * mp_so)143 mptcp_usr_detach(struct socket *mp_so)
144 {
145 	struct mptses *mpte = mpsotompte(mp_so);
146 	struct mppcb *mpp = mpsotomppcb(mp_so);
147 
148 	if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
149 		os_log_error(mptcp_log_handle, "%s - %lx: state: %d\n",
150 		    __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
151 		    mpp ? mpp->mpp_state : -1);
152 		return EINVAL;
153 	}
154 
155 	/*
156 	 * We are done with this MPTCP socket (it has been closed);
157 	 * trigger all subflows to be disconnected, if not already,
158 	 * by initiating the PCB detach sequence (SOF_PCBCLEARING
159 	 * will be set.)
160 	 */
161 	mp_pcbdetach(mp_so);
162 
163 	mptcp_disconnect(mpte);
164 
165 	return 0;
166 }
167 
168 /*
169  * Attach MPTCP protocol to socket, allocating MP control block,
170  * MPTCP session, control block, buffer space, etc.
171  */
172 static int
mptcp_attach(struct socket * mp_so,struct proc * p)173 mptcp_attach(struct socket *mp_so, struct proc *p)
174 {
175 #pragma unused(p)
176 	struct mptses *mpte = NULL;
177 	struct mptcb *mp_tp = NULL;
178 	struct mppcb *mpp = NULL;
179 	int error = 0;
180 
181 	if (mp_so->so_snd.sb_hiwat == 0 || mp_so->so_rcv.sb_hiwat == 0) {
182 		error = soreserve(mp_so, tcp_sendspace, tcp_recvspace);
183 		if (error != 0) {
184 			goto out;
185 		}
186 	}
187 
188 	if (mp_so->so_snd.sb_preconn_hiwat == 0) {
189 		soreserve_preconnect(mp_so, 2048);
190 	}
191 
192 	if ((mp_so->so_rcv.sb_flags & SB_USRSIZE) == 0) {
193 		mp_so->so_rcv.sb_flags |= SB_AUTOSIZE;
194 	}
195 	if ((mp_so->so_snd.sb_flags & SB_USRSIZE) == 0) {
196 		mp_so->so_snd.sb_flags |= SB_AUTOSIZE;
197 	}
198 
199 	/*
200 	 * MPTCP send-socket buffers cannot be compressed, due to the
201 	 * fact that each mbuf chained via m_next is a M_PKTHDR
202 	 * which carries some MPTCP metadata.
203 	 */
204 	mp_so->so_snd.sb_flags |= SB_NOCOMPRESS;
205 
206 	if ((error = mp_pcballoc(mp_so, &mtcbinfo)) != 0) {
207 		goto out;
208 	}
209 
210 	mpp = mpsotomppcb(mp_so);
211 	mpte = (struct mptses *)mpp->mpp_pcbe;
212 	mp_tp = mpte->mpte_mptcb;
213 
214 	VERIFY(mp_tp != NULL);
215 out:
216 	return error;
217 }
218 
219 static int
mptcp_entitlement_check(struct socket * mp_so,uint8_t svctype)220 mptcp_entitlement_check(struct socket *mp_so, uint8_t svctype)
221 {
222 	struct mptses *mpte = mpsotompte(mp_so);
223 
224 	if (mptcp_no_first_party) {
225 		return 0;
226 	}
227 
228 	/* First, check for mptcp_extended without delegation */
229 	if (soopt_cred_check(mp_so, PRIV_NET_RESTRICTED_MULTIPATH_EXTENDED, TRUE, FALSE) == 0) {
230 		/*
231 		 * This means the app has the extended entitlement. Thus,
232 		 * it's a first party app and can run without restrictions.
233 		 */
234 		mpte->mpte_flags |= MPTE_FIRSTPARTY;
235 		return 0;
236 	}
237 
238 	/* Now with delegation */
239 	if (mp_so->so_flags & SOF_DELEGATED &&
240 	    soopt_cred_check(mp_so, PRIV_NET_RESTRICTED_MULTIPATH_EXTENDED, TRUE, TRUE) == 0) {
241 		/*
242 		 * This means the app has the extended entitlement. Thus,
243 		 * it's a first party app and can run without restrictions.
244 		 */
245 		mpte->mpte_flags |= MPTE_FIRSTPARTY;
246 		return 0;
247 	}
248 
249 	if (svctype == MPTCP_SVCTYPE_AGGREGATE) {
250 		if (mptcp_developer_mode) {
251 			return 0;
252 		}
253 
254 		os_log_error(mptcp_log_handle, "%s - %lx: MPTCP prohibited on svc %u\n",
255 		    __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpte->mpte_svctype);
256 		return -1;
257 	}
258 
259 	return 0;
260 }
261 
262 /*
263  * Common subroutine to open a MPTCP connection to one of the remote hosts
264  * specified by dst_sl.  This includes allocating and establishing a
265  * subflow TCP connection, either initially to establish MPTCP connection,
266  * or to join an existing one.  Returns a connection handle upon success.
267  */
268 static int
mptcp_connectx(struct mptses * mpte,struct sockaddr * src,struct sockaddr * dst,uint32_t ifscope,sae_connid_t * pcid)269 mptcp_connectx(struct mptses *mpte, struct sockaddr *src,
270     struct sockaddr *dst, uint32_t ifscope, sae_connid_t *pcid)
271 {
272 	int error = 0;
273 
274 	VERIFY(dst != NULL);
275 	VERIFY(pcid != NULL);
276 
277 	error = mptcp_subflow_add(mpte, src, dst, ifscope, pcid);
278 
279 	return error;
280 }
281 
282 /*
283  * User-protocol pru_connectx callback.
284  */
285 static int
mptcp_usr_connectx(struct socket * mp_so,struct sockaddr * src,struct sockaddr * dst,struct proc * p,uint32_t ifscope,sae_associd_t aid,sae_connid_t * pcid,uint32_t flags,void * arg,uint32_t arglen,struct uio * auio,user_ssize_t * bytes_written)286 mptcp_usr_connectx(struct socket *mp_so, struct sockaddr *src,
287     struct sockaddr *dst, struct proc *p, uint32_t ifscope,
288     sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg,
289     uint32_t arglen, struct uio *auio, user_ssize_t *bytes_written)
290 {
291 #pragma unused(p, aid, flags, arg, arglen)
292 	struct mppcb *mpp = mpsotomppcb(mp_so);
293 	struct mptses *mpte = NULL;
294 	struct mptcb *mp_tp = NULL;
295 	user_ssize_t    datalen;
296 	int error = 0;
297 
298 	if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
299 		os_log_error(mptcp_log_handle, "%s - %lx: state %d\n",
300 		    __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
301 		    mpp ? mpp->mpp_state : -1);
302 		error = EINVAL;
303 		goto out;
304 	}
305 	mpte = mptompte(mpp);
306 	mp_tp = mpte->mpte_mptcb;
307 
308 	if (mp_tp->mpt_flags &  MPTCPF_FALLBACK_TO_TCP) {
309 		os_log_error(mptcp_log_handle, "%s - %lx: fell back to TCP\n",
310 		    __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte));
311 		error = EINVAL;
312 		goto out;
313 	}
314 
315 	if (dst->sa_family != AF_INET && dst->sa_family != AF_INET6) {
316 		error = EAFNOSUPPORT;
317 		goto out;
318 	}
319 
320 	if (dst->sa_family == AF_INET &&
321 	    dst->sa_len != sizeof(mpte->__mpte_dst_v4)) {
322 		os_log_error(mptcp_log_handle, "%s - %lx: IPv4 dst len %u\n",
323 		    __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), dst->sa_len);
324 		error = EINVAL;
325 		goto out;
326 	}
327 
328 	if (dst->sa_family == AF_INET6 &&
329 	    dst->sa_len != sizeof(mpte->__mpte_dst_v6)) {
330 		os_log_error(mptcp_log_handle, "%s - %lx: IPv6 dst len %u\n",
331 		    __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), dst->sa_len);
332 		error = EINVAL;
333 		goto out;
334 	}
335 
336 	if (!(mpte->mpte_flags & MPTE_SVCTYPE_CHECKED)) {
337 		if (mptcp_entitlement_check(mp_so, mpte->mpte_svctype) < 0) {
338 			error = EPERM;
339 			goto out;
340 		}
341 
342 		mpte->mpte_flags |= MPTE_SVCTYPE_CHECKED;
343 	}
344 
345 	if ((mp_so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING)) == 0) {
346 		memcpy(&mpte->mpte_u_dst, dst, dst->sa_len);
347 
348 		if (dst->sa_family == AF_INET) {
349 			memcpy(&mpte->mpte_sub_dst_v4, dst, dst->sa_len);
350 		} else {
351 			memcpy(&mpte->mpte_sub_dst_v6, dst, dst->sa_len);
352 		}
353 	}
354 
355 	if (src) {
356 		if (src->sa_family != AF_INET && src->sa_family != AF_INET6) {
357 			error = EAFNOSUPPORT;
358 			goto out;
359 		}
360 
361 		if (src->sa_family == AF_INET &&
362 		    src->sa_len != sizeof(mpte->__mpte_src_v4)) {
363 			os_log_error(mptcp_log_handle, "%s - %lx: IPv4 src len %u\n",
364 			    __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), src->sa_len);
365 			error = EINVAL;
366 			goto out;
367 		}
368 
369 		if (src->sa_family == AF_INET6 &&
370 		    src->sa_len != sizeof(mpte->__mpte_src_v6)) {
371 			os_log_error(mptcp_log_handle, "%s - %lx: IPv6 src len %u\n",
372 			    __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), src->sa_len);
373 			error = EINVAL;
374 			goto out;
375 		}
376 
377 		if ((mp_so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING)) == 0) {
378 			memcpy(&mpte->mpte_u_src, src, src->sa_len);
379 		}
380 	}
381 
382 	error = mptcp_connectx(mpte, src, dst, ifscope, pcid);
383 
384 	/* If there is data, copy it */
385 	if (auio != NULL) {
386 		datalen = uio_resid(auio);
387 		socket_unlock(mp_so, 0);
388 		error = mp_so->so_proto->pr_usrreqs->pru_sosend(mp_so, NULL,
389 		    (uio_t) auio, NULL, NULL, 0);
390 
391 		if (error == 0 || error == EWOULDBLOCK) {
392 			*bytes_written = datalen - uio_resid(auio);
393 		}
394 
395 		if (error == EWOULDBLOCK) {
396 			error = EINPROGRESS;
397 		}
398 
399 		socket_lock(mp_so, 0);
400 	}
401 
402 out:
403 	return error;
404 }
405 
406 /*
407  * Handle SIOCGASSOCIDS ioctl for PF_MULTIPATH domain.
408  */
409 static int
mptcp_getassocids(struct mptses * mpte,uint32_t * cnt,user_addr_t aidp)410 mptcp_getassocids(struct mptses *mpte, uint32_t *cnt, user_addr_t aidp)
411 {
412 	/* MPTCP has at most 1 association */
413 	*cnt = (mpte->mpte_associd != SAE_ASSOCID_ANY) ? 1 : 0;
414 
415 	/* just asking how many there are? */
416 	if (aidp == USER_ADDR_NULL) {
417 		return 0;
418 	}
419 
420 	return copyout(&mpte->mpte_associd, aidp,
421 	           sizeof(mpte->mpte_associd));
422 }
423 
424 /*
425  * Handle SIOCGCONNIDS ioctl for PF_MULTIPATH domain.
426  */
427 static int
mptcp_getconnids(struct mptses * mpte,sae_associd_t aid,uint32_t * cnt,user_addr_t cidp)428 mptcp_getconnids(struct mptses *mpte, sae_associd_t aid, uint32_t *cnt,
429     user_addr_t cidp)
430 {
431 	struct mptsub *mpts;
432 	int error = 0;
433 
434 	if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL &&
435 	    aid != mpte->mpte_associd) {
436 		return EINVAL;
437 	}
438 
439 	*cnt = mpte->mpte_numflows;
440 
441 	/* just asking how many there are? */
442 	if (cidp == USER_ADDR_NULL) {
443 		return 0;
444 	}
445 
446 	TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
447 		if ((error = copyout(&mpts->mpts_connid, cidp,
448 		    sizeof(mpts->mpts_connid))) != 0) {
449 			break;
450 		}
451 
452 		cidp += sizeof(mpts->mpts_connid);
453 	}
454 
455 	return error;
456 }
457 
458 /*
459  * Handle SIOCGCONNINFO ioctl for PF_MULTIPATH domain.
460  */
461 static int
mptcp_getconninfo(struct mptses * mpte,sae_connid_t * cid,uint32_t * flags,uint32_t * ifindex,int32_t * soerror,user_addr_t src,socklen_t * src_len,user_addr_t dst,socklen_t * dst_len,uint32_t * aux_type,user_addr_t aux_data,uint32_t * aux_len)462 mptcp_getconninfo(struct mptses *mpte, sae_connid_t *cid, uint32_t *flags,
463     uint32_t *ifindex, int32_t *soerror, user_addr_t src, socklen_t *src_len,
464     user_addr_t dst, socklen_t *dst_len, uint32_t *aux_type,
465     user_addr_t aux_data, uint32_t *aux_len)
466 {
467 	*flags = 0;
468 	*aux_type = 0;
469 	*ifindex = 0;
470 	*soerror = 0;
471 	struct mptcb *mp_tp = mpte->mpte_mptcb;
472 
473 	/* MPTCP-level global stats */
474 	if (*cid == SAE_CONNID_ALL) {
475 		struct socket *mp_so = mptetoso(mpte);
476 		struct conninfo_multipathtcp mptcp_ci;
477 		int error = 0;
478 
479 		if (*aux_len != 0 && *aux_len != sizeof(mptcp_ci)) {
480 			return EINVAL;
481 		}
482 
483 		if (mp_so->so_state & SS_ISCONNECTING) {
484 			*flags |= CIF_CONNECTING;
485 		}
486 		if (mp_so->so_state & SS_ISCONNECTED) {
487 			*flags |= CIF_CONNECTED;
488 		}
489 		if (mp_so->so_state & SS_ISDISCONNECTING) {
490 			*flags |= CIF_DISCONNECTING;
491 		}
492 		if (mp_so->so_state & SS_ISDISCONNECTED) {
493 			*flags |= CIF_DISCONNECTED;
494 		}
495 		if (!(mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP)) {
496 			*flags |= CIF_MP_CAPABLE;
497 		}
498 		if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) {
499 			*flags |= CIF_MP_DEGRADED;
500 		}
501 		if (mp_tp->mpt_version == MPTCP_VERSION_1) {
502 			*flags |= CIF_MP_V1;
503 		}
504 
505 		*src_len = 0;
506 		*dst_len = 0;
507 
508 		*aux_type = CIAUX_MPTCP;
509 		*aux_len = sizeof(mptcp_ci);
510 
511 		if (aux_data != USER_ADDR_NULL) {
512 			const struct mptsub *mpts;
513 			int initial_info_set = 0;
514 			unsigned long i = 0;
515 
516 			bzero(&mptcp_ci, sizeof(mptcp_ci));
517 			mptcp_ci.mptcpci_subflow_count = mpte->mpte_numflows;
518 			mptcp_ci.mptcpci_switch_count = mpte->mpte_subflow_switches;
519 
520 			VERIFY(sizeof(mptcp_ci.mptcpci_itfstats) == sizeof(mpte->mpte_itfstats));
521 			memcpy(mptcp_ci.mptcpci_itfstats, mpte->mpte_itfstats, sizeof(mptcp_ci.mptcpci_itfstats));
522 
523 			TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
524 				if (i >= sizeof(mptcp_ci.mptcpci_subflow_connids) / sizeof(sae_connid_t)) {
525 					break;
526 				}
527 				mptcp_ci.mptcpci_subflow_connids[i] = mpts->mpts_connid;
528 
529 				if (mpts->mpts_flags & MPTSF_INITIAL_SUB) {
530 					const struct inpcb *inp;
531 
532 					inp = sotoinpcb(mpts->mpts_socket);
533 
534 					mptcp_ci.mptcpci_init_rxbytes = inp->inp_stat->rxbytes;
535 					mptcp_ci.mptcpci_init_txbytes = inp->inp_stat->txbytes;
536 					initial_info_set = 1;
537 				}
538 
539 				mptcpstats_update(mptcp_ci.mptcpci_itfstats, mpts);
540 
541 				i++;
542 			}
543 
544 			if (initial_info_set == 0) {
545 				mptcp_ci.mptcpci_init_rxbytes = mpte->mpte_init_rxbytes;
546 				mptcp_ci.mptcpci_init_txbytes = mpte->mpte_init_txbytes;
547 			}
548 
549 			if (mpte->mpte_flags & MPTE_FIRSTPARTY) {
550 				mptcp_ci.mptcpci_flags |= MPTCPCI_FIRSTPARTY;
551 			}
552 
553 			error = copyout(&mptcp_ci, aux_data, sizeof(mptcp_ci));
554 			if (error != 0) {
555 				os_log_error(mptcp_log_handle, "%s - %lx: copyout failed: %d\n",
556 				    __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), error);
557 				return error;
558 			}
559 		}
560 
561 		return 0;
562 	}
563 
564 	/* Any stats of any subflow */
565 	if (*cid == SAE_CONNID_ANY) {
566 		const struct mptsub *mpts;
567 		struct socket *so;
568 		const struct inpcb *inp;
569 		int error = 0;
570 
571 		mpts = TAILQ_FIRST(&mpte->mpte_subflows);
572 		if (mpts == NULL) {
573 			return ENXIO;
574 		}
575 
576 		so = mpts->mpts_socket;
577 		inp = sotoinpcb(so);
578 
579 		if (inp->inp_vflag & INP_IPV4) {
580 			error = in_getconninfo(so, SAE_CONNID_ANY, flags, ifindex,
581 			    soerror, src, src_len, dst, dst_len,
582 			    aux_type, aux_data, aux_len);
583 		} else {
584 			error = in6_getconninfo(so, SAE_CONNID_ANY, flags, ifindex,
585 			    soerror, src, src_len, dst, dst_len,
586 			    aux_type, aux_data, aux_len);
587 		}
588 
589 		if (error != 0) {
590 			os_log_error(mptcp_log_handle, "%s - %lx:error from in_getconninfo %d\n",
591 			    __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), error);
592 			return error;
593 		}
594 
595 		if (mpts->mpts_flags & MPTSF_MP_CAPABLE) {
596 			*flags |= CIF_MP_CAPABLE;
597 		}
598 		if (mpts->mpts_flags & MPTSF_MP_DEGRADED) {
599 			*flags |= CIF_MP_DEGRADED;
600 		}
601 		if (mpts->mpts_flags & MPTSF_MP_READY) {
602 			*flags |= CIF_MP_READY;
603 		}
604 		if (mpts->mpts_flags & MPTSF_ACTIVE) {
605 			*flags |= CIF_MP_ACTIVE;
606 		}
607 		if (mp_tp->mpt_version == MPTCP_VERSION_1) {
608 			*flags |= CIF_MP_V1;
609 		}
610 
611 		return 0;
612 	} else {
613 		/* Per-interface stats */
614 		const struct mptsub *mpts, *orig_mpts = NULL;
615 		struct conninfo_tcp tcp_ci;
616 		const struct inpcb *inp;
617 		struct socket *so;
618 		int error = 0;
619 		int index;
620 
621 		/* cid is thus an ifindex - range-check first! */
622 		if (*cid > USHRT_MAX) {
623 			return EINVAL;
624 		}
625 
626 		bzero(&tcp_ci, sizeof(tcp_ci));
627 
628 		/* First, get a subflow to fill in the "regular" info. */
629 		TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
630 			const struct ifnet *ifp = sotoinpcb(mpts->mpts_socket)->inp_last_outifp;
631 
632 			if (ifp && ifp->if_index == *cid) {
633 				break;
634 			}
635 		}
636 
637 		if (mpts == NULL) {
638 			/* No subflow there - well, let's just get the basic itf-info */
639 			goto interface_info;
640 		}
641 
642 		so = mpts->mpts_socket;
643 		inp = sotoinpcb(so);
644 
645 		/* Give it USER_ADDR_NULL, because we are doing this on our own */
646 		if (inp->inp_vflag & INP_IPV4) {
647 			error = in_getconninfo(so, SAE_CONNID_ANY, flags, ifindex,
648 			    soerror, src, src_len, dst, dst_len,
649 			    aux_type, USER_ADDR_NULL, aux_len);
650 		} else {
651 			error = in6_getconninfo(so, SAE_CONNID_ANY, flags, ifindex,
652 			    soerror, src, src_len, dst, dst_len,
653 			    aux_type, USER_ADDR_NULL, aux_len);
654 		}
655 
656 		if (error != 0) {
657 			os_log_error(mptcp_log_handle, "%s - %lx:error from in_getconninfo %d\n",
658 			    __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), error);
659 			return error;
660 		}
661 
662 		/* ToDo: Nobody is reading these flags on subflows. Why bother ? */
663 		if (mpts->mpts_flags & MPTSF_MP_CAPABLE) {
664 			*flags |= CIF_MP_CAPABLE;
665 		}
666 		if (mpts->mpts_flags & MPTSF_MP_DEGRADED) {
667 			*flags |= CIF_MP_DEGRADED;
668 		}
669 		if (mpts->mpts_flags & MPTSF_MP_READY) {
670 			*flags |= CIF_MP_READY;
671 		}
672 		if (mpts->mpts_flags & MPTSF_ACTIVE) {
673 			*flags |= CIF_MP_ACTIVE;
674 		}
675 		if (mp_tp->mpt_version == MPTCP_VERSION_1) {
676 			*flags |= CIF_MP_V1;
677 		}
678 
679 		/*
680 		 * Now, we gather the metrics (aka., tcp_info) and roll them in
681 		 * across all subflows of this interface to build an aggregated
682 		 * view.
683 		 *
684 		 * We take the TCP_INFO from the first subflow as the "master",
685 		 * feeding into those fields that we do not roll.
686 		 */
687 		if (aux_data != USER_ADDR_NULL) {
688 			tcp_getconninfo(so, &tcp_ci);
689 
690 			orig_mpts = mpts;
691 			TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
692 				const struct inpcb *mptsinp = sotoinpcb(mpts->mpts_socket);
693 				const struct ifnet *ifp;
694 
695 				ifp = mptsinp->inp_last_outifp;
696 
697 				if (ifp == NULL || ifp->if_index != *cid || mpts == orig_mpts) {
698 					continue;
699 				}
700 
701 				/* Roll the itf-stats into the tcp_info */
702 				tcp_ci.tcpci_tcp_info.tcpi_txbytes +=
703 				    mptsinp->inp_stat->txbytes;
704 				tcp_ci.tcpci_tcp_info.tcpi_rxbytes +=
705 				    mptsinp->inp_stat->rxbytes;
706 
707 				tcp_ci.tcpci_tcp_info.tcpi_wifi_txbytes +=
708 				    mptsinp->inp_wstat->txbytes;
709 				tcp_ci.tcpci_tcp_info.tcpi_wifi_rxbytes +=
710 				    mptsinp->inp_wstat->rxbytes;
711 
712 				tcp_ci.tcpci_tcp_info.tcpi_wired_txbytes +=
713 				    mptsinp->inp_Wstat->txbytes;
714 				tcp_ci.tcpci_tcp_info.tcpi_wired_rxbytes +=
715 				    mptsinp->inp_Wstat->rxbytes;
716 
717 				tcp_ci.tcpci_tcp_info.tcpi_cell_txbytes +=
718 				    mptsinp->inp_cstat->txbytes;
719 				tcp_ci.tcpci_tcp_info.tcpi_cell_rxbytes +=
720 				    mptsinp->inp_cstat->rxbytes;
721 			}
722 		}
723 
724 interface_info:
725 		*aux_type = CIAUX_TCP;
726 		if (*aux_len == 0) {
727 			*aux_len = sizeof(tcp_ci);
728 		} else if (aux_data != USER_ADDR_NULL) {
729 			boolean_t create;
730 
731 			/*
732 			 * Finally, old subflows might have been closed - we
733 			 * want this data as well, so grab it from the interface
734 			 * stats.
735 			 */
736 			create = orig_mpts != NULL;
737 
738 			/*
739 			 * When we found a subflow, we are willing to create a stats-index
740 			 * because we have some data to return. If there isn't a subflow,
741 			 * nor anything in the stats, return EINVAL. Because the
742 			 * ifindex belongs to something that doesn't exist.
743 			 */
744 			index = mptcpstats_get_index_by_ifindex(mpte->mpte_itfstats, (u_short)(*cid), false);
745 			if (index == -1) {
746 				os_log_error(mptcp_log_handle,
747 				    "%s - %lx: Asking for too many ifindex: %u subcount %u, mpts? %s\n",
748 				    __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
749 				    *cid, mpte->mpte_numflows,
750 				    orig_mpts ? "yes" : "no");
751 
752 				if (orig_mpts == NULL) {
753 					return EINVAL;
754 				}
755 			} else {
756 				struct mptcp_itf_stats *stats;
757 
758 				stats = &mpte->mpte_itfstats[index];
759 
760 				/* Roll the itf-stats into the tcp_info */
761 				tcp_ci.tcpci_tcp_info.tcpi_last_outif = *cid;
762 				tcp_ci.tcpci_tcp_info.tcpi_txbytes +=
763 				    stats->mpis_txbytes;
764 				tcp_ci.tcpci_tcp_info.tcpi_rxbytes +=
765 				    stats->mpis_rxbytes;
766 
767 				tcp_ci.tcpci_tcp_info.tcpi_wifi_txbytes +=
768 				    stats->mpis_wifi_txbytes;
769 				tcp_ci.tcpci_tcp_info.tcpi_wifi_rxbytes +=
770 				    stats->mpis_wifi_rxbytes;
771 
772 				tcp_ci.tcpci_tcp_info.tcpi_wired_txbytes +=
773 				    stats->mpis_wired_txbytes;
774 				tcp_ci.tcpci_tcp_info.tcpi_wired_rxbytes +=
775 				    stats->mpis_wired_rxbytes;
776 
777 				tcp_ci.tcpci_tcp_info.tcpi_cell_txbytes +=
778 				    stats->mpis_cell_txbytes;
779 				tcp_ci.tcpci_tcp_info.tcpi_cell_rxbytes +=
780 				    stats->mpis_cell_rxbytes;
781 			}
782 
783 			*aux_len = min(*aux_len, sizeof(tcp_ci));
784 			error = copyout(&tcp_ci, aux_data, *aux_len);
785 			if (error != 0) {
786 				return error;
787 			}
788 		}
789 	}
790 
791 	return 0;
792 }
793 
794 /*
795  * User-protocol pru_control callback.
796  */
797 static int
mptcp_usr_control(struct socket * mp_so,u_long cmd,caddr_t data,struct ifnet * ifp,struct proc * p)798 mptcp_usr_control(struct socket *mp_so, u_long cmd, caddr_t data,
799     struct ifnet *ifp, struct proc *p)
800 {
801 #pragma unused(ifp, p)
802 	struct mppcb *mpp = mpsotomppcb(mp_so);
803 	struct mptses *mpte;
804 	int error = 0;
805 
806 	if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
807 		error = EINVAL;
808 		goto out;
809 	}
810 	mpte = mptompte(mpp);
811 
812 	switch (cmd) {
813 	case SIOCGASSOCIDS32: {         /* struct so_aidreq32 */
814 		struct so_aidreq32 aidr;
815 		bcopy(data, &aidr, sizeof(aidr));
816 		error = mptcp_getassocids(mpte, &aidr.sar_cnt,
817 		    aidr.sar_aidp);
818 		if (error == 0) {
819 			bcopy(&aidr, data, sizeof(aidr));
820 		}
821 		break;
822 	}
823 
824 	case SIOCGASSOCIDS64: {         /* struct so_aidreq64 */
825 		struct so_aidreq64 aidr;
826 		bcopy(data, &aidr, sizeof(aidr));
827 		error = mptcp_getassocids(mpte, &aidr.sar_cnt,
828 		    (user_addr_t)aidr.sar_aidp);
829 		if (error == 0) {
830 			bcopy(&aidr, data, sizeof(aidr));
831 		}
832 		break;
833 	}
834 
835 	case SIOCGCONNIDS32: {          /* struct so_cidreq32 */
836 		struct so_cidreq32 cidr;
837 		bcopy(data, &cidr, sizeof(cidr));
838 		error = mptcp_getconnids(mpte, cidr.scr_aid, &cidr.scr_cnt,
839 		    cidr.scr_cidp);
840 		if (error == 0) {
841 			bcopy(&cidr, data, sizeof(cidr));
842 		}
843 		break;
844 	}
845 
846 	case SIOCGCONNIDS64: {          /* struct so_cidreq64 */
847 		struct so_cidreq64 cidr;
848 		bcopy(data, &cidr, sizeof(cidr));
849 		error = mptcp_getconnids(mpte, cidr.scr_aid, &cidr.scr_cnt,
850 		    (user_addr_t)cidr.scr_cidp);
851 		if (error == 0) {
852 			bcopy(&cidr, data, sizeof(cidr));
853 		}
854 		break;
855 	}
856 
857 	case SIOCGCONNINFO32: {         /* struct so_cinforeq32 */
858 		struct so_cinforeq32 cifr;
859 		bcopy(data, &cifr, sizeof(cifr));
860 		error = mptcp_getconninfo(mpte, &cifr.scir_cid,
861 		    &cifr.scir_flags, &cifr.scir_ifindex, &cifr.scir_error,
862 		    cifr.scir_src, &cifr.scir_src_len, cifr.scir_dst,
863 		    &cifr.scir_dst_len, &cifr.scir_aux_type, cifr.scir_aux_data,
864 		    &cifr.scir_aux_len);
865 		if (error == 0) {
866 			bcopy(&cifr, data, sizeof(cifr));
867 		}
868 		break;
869 	}
870 
871 	case SIOCGCONNINFO64: {         /* struct so_cinforeq64 */
872 		struct so_cinforeq64 cifr;
873 		bcopy(data, &cifr, sizeof(cifr));
874 		error = mptcp_getconninfo(mpte, &cifr.scir_cid,
875 		    &cifr.scir_flags, &cifr.scir_ifindex, &cifr.scir_error,
876 		    (user_addr_t)cifr.scir_src, &cifr.scir_src_len,
877 		    (user_addr_t)cifr.scir_dst, &cifr.scir_dst_len,
878 		    &cifr.scir_aux_type, (user_addr_t)cifr.scir_aux_data,
879 		    &cifr.scir_aux_len);
880 		if (error == 0) {
881 			bcopy(&cifr, data, sizeof(cifr));
882 		}
883 		break;
884 	}
885 
886 	default:
887 		error = EOPNOTSUPP;
888 		break;
889 	}
890 out:
891 	return error;
892 }
893 
894 static int
mptcp_disconnect(struct mptses * mpte)895 mptcp_disconnect(struct mptses *mpte)
896 {
897 	struct socket *mp_so;
898 	struct mptcb *mp_tp;
899 	int error = 0;
900 
901 	mp_so = mptetoso(mpte);
902 	mp_tp = mpte->mpte_mptcb;
903 
904 	DTRACE_MPTCP3(disconnectx, struct mptses *, mpte,
905 	    struct socket *, mp_so, struct mptcb *, mp_tp);
906 
907 	/* if we're not detached, go thru socket state checks */
908 	if (!(mp_so->so_flags & SOF_PCBCLEARING) && !(mp_so->so_flags & SOF_DEFUNCT)) {
909 		if (!(mp_so->so_state & (SS_ISCONNECTED |
910 		    SS_ISCONNECTING))) {
911 			error = ENOTCONN;
912 			goto out;
913 		}
914 		if (mp_so->so_state & SS_ISDISCONNECTING) {
915 			error = EALREADY;
916 			goto out;
917 		}
918 	}
919 
920 	mptcp_cancel_all_timers(mp_tp);
921 	if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) {
922 		mptcp_close(mpte, mp_tp);
923 	} else if ((mp_so->so_options & SO_LINGER) &&
924 	    mp_so->so_linger == 0) {
925 		mptcp_drop(mpte, mp_tp, 0);
926 	} else {
927 		soisdisconnecting(mp_so);
928 		sbflush(&mp_so->so_rcv);
929 		if (mptcp_usrclosed(mpte) != NULL) {
930 			mptcp_output(mpte);
931 		}
932 	}
933 
934 	if (error == 0) {
935 		mptcp_subflow_workloop(mpte);
936 	}
937 
938 out:
939 	return error;
940 }
941 
942 /*
943  * Wrapper function to support disconnect on socket
944  */
945 static int
mptcp_usr_disconnect(struct socket * mp_so)946 mptcp_usr_disconnect(struct socket *mp_so)
947 {
948 	return mptcp_disconnect(mpsotompte(mp_so));
949 }
950 
951 /*
952  * User-protocol pru_disconnectx callback.
953  */
954 static int
mptcp_usr_disconnectx(struct socket * mp_so,sae_associd_t aid,sae_connid_t cid)955 mptcp_usr_disconnectx(struct socket *mp_so, sae_associd_t aid, sae_connid_t cid)
956 {
957 	if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL) {
958 		return EINVAL;
959 	}
960 
961 	if (cid != SAE_CONNID_ANY && cid != SAE_CONNID_ALL) {
962 		return EINVAL;
963 	}
964 
965 	return mptcp_usr_disconnect(mp_so);
966 }
967 
968 void
mptcp_finish_usrclosed(struct mptses * mpte)969 mptcp_finish_usrclosed(struct mptses *mpte)
970 {
971 	struct mptcb *mp_tp = mpte->mpte_mptcb;
972 	struct socket *mp_so = mptetoso(mpte);
973 
974 	if (mp_tp->mpt_state == MPTCPS_CLOSED || mp_tp->mpt_state == MPTCPS_TERMINATE) {
975 		mpte = mptcp_close(mpte, mp_tp);
976 	} else if (mp_tp->mpt_state >= MPTCPS_FIN_WAIT_2) {
977 		soisdisconnected(mp_so);
978 	} else {
979 		struct mptsub *mpts;
980 
981 		TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
982 			if ((mp_so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
983 			    (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
984 				mptcp_subflow_disconnect(mpte, mpts);
985 			} else {
986 				mptcp_subflow_shutdown(mpte, mpts);
987 			}
988 		}
989 	}
990 }
991 
992 /*
993  * User issued close, and wish to trail thru shutdown states.
994  */
995 static struct mptses *
mptcp_usrclosed(struct mptses * mpte)996 mptcp_usrclosed(struct mptses *mpte)
997 {
998 	struct mptcb *mp_tp = mpte->mpte_mptcb;
999 
1000 	mptcp_close_fsm(mp_tp, MPCE_CLOSE);
1001 
1002 	/* Not everything has been acknowledged - don't close the subflows! */
1003 	if (mp_tp->mpt_state != MPTCPS_TERMINATE &&
1004 	    mp_tp->mpt_sndnxt + 1 != mp_tp->mpt_sndmax) {
1005 		return mpte;
1006 	}
1007 
1008 	mptcp_finish_usrclosed(mpte);
1009 
1010 	return mpte;
1011 }
1012 
1013 /*
1014  * After a receive, possible send some update to peer.
1015  */
1016 static int
mptcp_usr_rcvd(struct socket * mp_so,int flags)1017 mptcp_usr_rcvd(struct socket *mp_so, int flags)
1018 {
1019 #pragma unused(flags)
1020 	struct mppcb *mpp = mpsotomppcb(mp_so);
1021 	struct mptses *mpte;
1022 	struct mptsub *mpts;
1023 	int error = 0;
1024 
1025 	if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
1026 		error = EINVAL;
1027 		goto out;
1028 	}
1029 
1030 	mpte = mptompte(mpp);
1031 
1032 	TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1033 		struct socket *so = mpts->mpts_socket;
1034 
1035 		if (so->so_proto->pr_flags & PR_WANTRCVD && so->so_pcb != NULL) {
1036 			(*so->so_proto->pr_usrreqs->pru_rcvd)(so, 0);
1037 		}
1038 	}
1039 
1040 	error = mptcp_output(mpte);
1041 out:
1042 	return error;
1043 }
1044 
1045 /*
1046  * Do a send by putting data in the output queue.
1047  */
1048 static int
mptcp_usr_send(struct socket * mp_so,int prus_flags,struct mbuf * m,struct sockaddr * nam,struct mbuf * control,struct proc * p)1049 mptcp_usr_send(struct socket *mp_so, int prus_flags, struct mbuf *m,
1050     struct sockaddr *nam, struct mbuf *control, struct proc *p)
1051 {
1052 #pragma unused(nam, p)
1053 	struct mppcb *mpp = mpsotomppcb(mp_so);
1054 	struct mptses *mpte;
1055 	int error = 0;
1056 
1057 	if (prus_flags & (PRUS_OOB | PRUS_EOF)) {
1058 		error = EOPNOTSUPP;
1059 		goto out;
1060 	}
1061 
1062 	if (nam != NULL) {
1063 		error = EOPNOTSUPP;
1064 		goto out;
1065 	}
1066 
1067 	if (control != NULL && control->m_len != 0) {
1068 		error = EOPNOTSUPP;
1069 		goto out;
1070 	}
1071 
1072 	if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
1073 		error = ECONNRESET;
1074 		goto out;
1075 	}
1076 	mpte = mptompte(mpp);
1077 	VERIFY(mpte != NULL);
1078 
1079 	if (!(mp_so->so_state & SS_ISCONNECTED) &&
1080 	    !(mp_so->so_flags1 & SOF1_PRECONNECT_DATA)) {
1081 		error = ENOTCONN;
1082 		goto out;
1083 	}
1084 
1085 	mptcp_insert_dsn(mpp, m);
1086 	VERIFY(mp_so->so_snd.sb_flags & SB_NOCOMPRESS);
1087 	sbappendstream(&mp_so->so_snd, m);
1088 	m = NULL;
1089 
1090 	error = mptcp_output(mpte);
1091 	if (error != 0) {
1092 		goto out;
1093 	}
1094 
1095 	if (mp_so->so_state & SS_ISCONNECTING) {
1096 		if (mp_so->so_state & SS_NBIO) {
1097 			error = EWOULDBLOCK;
1098 		} else {
1099 			error = sbwait(&mp_so->so_snd);
1100 		}
1101 	}
1102 
1103 out:
1104 	if (error) {
1105 		if (m != NULL) {
1106 			m_freem(m);
1107 		}
1108 		if (control != NULL) {
1109 			m_freem(control);
1110 		}
1111 	}
1112 	return error;
1113 }
1114 
1115 /*
1116  * Mark the MPTCP connection as being incapable of further output.
1117  */
1118 static int
mptcp_usr_shutdown(struct socket * mp_so)1119 mptcp_usr_shutdown(struct socket *mp_so)
1120 {
1121 	struct mppcb *mpp = mpsotomppcb(mp_so);
1122 	struct mptses *mpte;
1123 	int error = 0;
1124 
1125 	if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
1126 		error = EINVAL;
1127 		goto out;
1128 	}
1129 	mpte = mptompte(mpp);
1130 	VERIFY(mpte != NULL);
1131 
1132 	socantsendmore(mp_so);
1133 
1134 	mpte = mptcp_usrclosed(mpte);
1135 	if (mpte != NULL) {
1136 		error = mptcp_output(mpte);
1137 	}
1138 out:
1139 	return error;
1140 }
1141 
1142 /*
1143  * Copy the contents of uio into a properly sized mbuf chain.
1144  */
1145 static int
mptcp_uiotombuf(struct uio * uio,int how,user_ssize_t space,struct mbuf ** top)1146 mptcp_uiotombuf(struct uio *uio, int how, user_ssize_t space, struct mbuf **top)
1147 {
1148 	struct mbuf *m, *mb, *nm = NULL, *mtail = NULL;
1149 	int progress, len, error;
1150 	user_ssize_t resid, tot;
1151 
1152 	VERIFY(top != NULL && *top == NULL);
1153 
1154 	/*
1155 	 * space can be zero or an arbitrary large value bound by
1156 	 * the total data supplied by the uio.
1157 	 */
1158 	resid = uio_resid(uio);
1159 	if (space > 0) {
1160 		tot = MIN(resid, space);
1161 	} else {
1162 		tot = resid;
1163 	}
1164 
1165 	if (tot < 0 || tot > INT_MAX) {
1166 		return EINVAL;
1167 	}
1168 
1169 	len = (int)tot;
1170 	if (len == 0) {
1171 		len = 1;
1172 	}
1173 
1174 	/* Loop and append maximum sized mbufs to the chain tail. */
1175 	while (len > 0) {
1176 		uint32_t m_needed = 1;
1177 
1178 		if (njcl > 0 && len > MBIGCLBYTES) {
1179 			mb = m_getpackets_internal(&m_needed, 1,
1180 			    how, 1, M16KCLBYTES);
1181 		} else if (len > MCLBYTES) {
1182 			mb = m_getpackets_internal(&m_needed, 1,
1183 			    how, 1, MBIGCLBYTES);
1184 		} else if (len >= (signed)MINCLSIZE) {
1185 			mb = m_getpackets_internal(&m_needed, 1,
1186 			    how, 1, MCLBYTES);
1187 		} else {
1188 			mb = m_gethdr(how, MT_DATA);
1189 		}
1190 
1191 		/* Fail the whole operation if one mbuf can't be allocated. */
1192 		if (mb == NULL) {
1193 			if (nm != NULL) {
1194 				m_freem(nm);
1195 			}
1196 			return ENOBUFS;
1197 		}
1198 
1199 		/* Book keeping. */
1200 		VERIFY(mb->m_flags & M_PKTHDR);
1201 		len -= ((mb->m_flags & M_EXT) ? mb->m_ext.ext_size : MHLEN);
1202 		if (mtail != NULL) {
1203 			mtail->m_next = mb;
1204 		} else {
1205 			nm = mb;
1206 		}
1207 		mtail = mb;
1208 	}
1209 
1210 	m = nm;
1211 
1212 	progress = 0;
1213 	/* Fill all mbufs with uio data and update header information. */
1214 	for (mb = m; mb != NULL; mb = mb->m_next) {
1215 		/* tot >= 0 && tot <= INT_MAX (see above) */
1216 		len = MIN((int)M_TRAILINGSPACE(mb), (int)(tot - progress));
1217 
1218 		error = uiomove(mtod(mb, char *), len, uio);
1219 		if (error != 0) {
1220 			m_freem(m);
1221 			return error;
1222 		}
1223 
1224 		/* each mbuf is M_PKTHDR chained via m_next */
1225 		mb->m_len = len;
1226 		mb->m_pkthdr.len = len;
1227 
1228 		progress += len;
1229 	}
1230 	VERIFY(progress == tot);
1231 	*top = m;
1232 	return 0;
1233 }
1234 
1235 /*
1236  * MPTCP socket protocol-user socket send routine, derived from sosend().
1237  */
1238 static int
mptcp_usr_sosend(struct socket * mp_so,struct sockaddr * addr,struct uio * uio,struct mbuf * top,struct mbuf * control,int flags)1239 mptcp_usr_sosend(struct socket *mp_so, struct sockaddr *addr, struct uio *uio,
1240     struct mbuf *top, struct mbuf *control, int flags)
1241 {
1242 #pragma unused(addr)
1243 	user_ssize_t resid, space;
1244 	int error, sendflags;
1245 	struct proc *p = current_proc();
1246 	int sblocked = 0;
1247 
1248 	/* UIO is required for now, due to per-mbuf M_PKTHDR constrains */
1249 	if (uio == NULL || top != NULL) {
1250 		error = EINVAL;
1251 		goto out;
1252 	}
1253 	resid = uio_resid(uio);
1254 
1255 	socket_lock(mp_so, 1);
1256 	so_update_last_owner_locked(mp_so, p);
1257 	so_update_policy(mp_so);
1258 
1259 	VERIFY(mp_so->so_type == SOCK_STREAM);
1260 	VERIFY(!(mp_so->so_flags & SOF_MP_SUBFLOW));
1261 
1262 	if (flags & (MSG_OOB | MSG_DONTROUTE)) {
1263 		error = EOPNOTSUPP;
1264 		socket_unlock(mp_so, 1);
1265 		goto out;
1266 	}
1267 
1268 	/*
1269 	 * In theory resid should be unsigned.  However, space must be
1270 	 * signed, as it might be less than 0 if we over-committed, and we
1271 	 * must use a signed comparison of space and resid.  On the other
1272 	 * hand, a negative resid causes us to loop sending 0-length
1273 	 * segments to the protocol.
1274 	 */
1275 	if (resid < 0 || resid > INT_MAX ||
1276 	    (flags & MSG_EOR) || control != NULL) {
1277 		error = EINVAL;
1278 		socket_unlock(mp_so, 1);
1279 		goto out;
1280 	}
1281 
1282 	OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgsnd);
1283 
1284 	do {
1285 		error = sosendcheck(mp_so, NULL, resid, 0, 0, flags,
1286 		    &sblocked);
1287 		if (error != 0) {
1288 			goto release;
1289 		}
1290 
1291 		space = sbspace(&mp_so->so_snd);
1292 		do {
1293 			socket_unlock(mp_so, 0);
1294 			/*
1295 			 * Copy the data from userland into an mbuf chain.
1296 			 */
1297 			error = mptcp_uiotombuf(uio, M_WAITOK, space, &top);
1298 			if (error != 0) {
1299 				socket_lock(mp_so, 0);
1300 				goto release;
1301 			}
1302 			VERIFY(top != NULL);
1303 			space -= resid - uio_resid(uio);
1304 			resid = uio_resid(uio);
1305 			socket_lock(mp_so, 0);
1306 
1307 			/*
1308 			 * Compute flags here, for pru_send and NKEs.
1309 			 */
1310 			sendflags = (resid > 0 && space > 0) ?
1311 			    PRUS_MORETOCOME : 0;
1312 
1313 			/*
1314 			 * Socket filter processing
1315 			 */
1316 			VERIFY(control == NULL);
1317 			error = sflt_data_out(mp_so, NULL, &top, &control, 0);
1318 			if (error != 0) {
1319 				if (error == EJUSTRETURN) {
1320 					error = 0;
1321 					top = NULL;
1322 					/* always free control if any */
1323 				}
1324 				goto release;
1325 			}
1326 			if (control != NULL) {
1327 				m_freem(control);
1328 				control = NULL;
1329 			}
1330 
1331 			/*
1332 			 * Pass data to protocol.
1333 			 */
1334 			error = (*mp_so->so_proto->pr_usrreqs->pru_send)
1335 			    (mp_so, sendflags, top, NULL, NULL, p);
1336 
1337 			top = NULL;
1338 			if (error != 0) {
1339 				goto release;
1340 			}
1341 		} while (resid != 0 && space > 0);
1342 	} while (resid != 0);
1343 
1344 release:
1345 	if (sblocked) {
1346 		sbunlock(&mp_so->so_snd, FALSE); /* will unlock socket */
1347 	} else {
1348 		socket_unlock(mp_so, 1);
1349 	}
1350 out:
1351 	if (top != NULL) {
1352 		m_freem(top);
1353 	}
1354 	if (control != NULL) {
1355 		m_freem(control);
1356 	}
1357 
1358 	soclearfastopen(mp_so);
1359 
1360 	return error;
1361 }
1362 
1363 /*
1364  * Called to filter SOPT_{SET,GET} for SOL_SOCKET level socket options.
1365  * This routine simply indicates to the caller whether or not to proceed
1366  * further with the given socket option.  This is invoked by sosetoptlock()
1367  * and sogetoptlock().
1368  */
1369 static int
mptcp_usr_socheckopt(struct socket * mp_so,struct sockopt * sopt)1370 mptcp_usr_socheckopt(struct socket *mp_so, struct sockopt *sopt)
1371 {
1372 #pragma unused(mp_so)
1373 	int error = 0;
1374 
1375 	VERIFY(sopt->sopt_level == SOL_SOCKET);
1376 
1377 	/*
1378 	 * We could check for sopt_dir (set/get) here, but we'll just
1379 	 * let the caller deal with it as appropriate; therefore the
1380 	 * following is a superset of the socket options which we
1381 	 * allow for set/get.
1382 	 *
1383 	 * XXX: [email protected]
1384 	 *
1385 	 * Need to consider the following cases:
1386 	 *
1387 	 *   a.	Certain socket options don't have a clear definition
1388 	 *	on the expected behavior post connect(2).  At the time
1389 	 *	those options are issued on the MP socket, there may
1390 	 *	be existing subflow sockets that are already connected.
1391 	 */
1392 	switch (sopt->sopt_name) {
1393 	case SO_LINGER:                         /* MP */
1394 	case SO_LINGER_SEC:                     /* MP */
1395 	case SO_TYPE:                           /* MP */
1396 	case SO_NREAD:                          /* MP */
1397 	case SO_NWRITE:                         /* MP */
1398 	case SO_ERROR:                          /* MP */
1399 	case SO_SNDBUF:                         /* MP */
1400 	case SO_RCVBUF:                         /* MP */
1401 	case SO_SNDLOWAT:                       /* MP */
1402 	case SO_RCVLOWAT:                       /* MP */
1403 	case SO_SNDTIMEO:                       /* MP */
1404 	case SO_RCVTIMEO:                       /* MP */
1405 	case SO_NKE:                            /* MP */
1406 	case SO_NOSIGPIPE:                      /* MP */
1407 	case SO_NOADDRERR:                      /* MP */
1408 	case SO_LABEL:                          /* MP */
1409 	case SO_PEERLABEL:                      /* MP */
1410 	case SO_DEFUNCTIT:                      /* MP */
1411 	case SO_DEFUNCTOK:                      /* MP */
1412 	case SO_ISDEFUNCT:                      /* MP */
1413 	case SO_TRAFFIC_CLASS_DBG:              /* MP */
1414 	case SO_DELEGATED:                      /* MP */
1415 	case SO_DELEGATED_UUID:                 /* MP */
1416 #if NECP
1417 	case SO_NECP_ATTRIBUTES:
1418 	case SO_NECP_CLIENTUUID:
1419 #endif /* NECP */
1420 	case SO_MPKL_SEND_INFO:
1421 		/*
1422 		 * Tell the caller that these options are to be processed.
1423 		 */
1424 		break;
1425 
1426 	case SO_DEBUG:                          /* MP + subflow */
1427 	case SO_KEEPALIVE:                      /* MP + subflow */
1428 	case SO_USELOOPBACK:                    /* MP + subflow */
1429 	case SO_RANDOMPORT:                     /* MP + subflow */
1430 	case SO_TRAFFIC_CLASS:                  /* MP + subflow */
1431 	case SO_RECV_TRAFFIC_CLASS:             /* MP + subflow */
1432 	case SO_PRIVILEGED_TRAFFIC_CLASS:       /* MP + subflow */
1433 	case SO_RECV_ANYIF:                     /* MP + subflow */
1434 	case SO_RESTRICTIONS:                   /* MP + subflow */
1435 	case SO_FLUSH:                          /* MP + subflow */
1436 	case SO_NOWAKEFROMSLEEP:
1437 	case SO_NOAPNFALLBK:
1438 	case SO_MARK_CELLFALLBACK:
1439 	case SO_MARK_CELLFALLBACK_UUID:
1440 	case SO_MARK_KNOWN_TRACKER:
1441 	case SO_MARK_KNOWN_TRACKER_NON_APP_INITIATED:
1442 	case SO_MARK_APPROVED_APP_DOMAIN:
1443 	case SO_FALLBACK_MODE:
1444 		/*
1445 		 * Tell the caller that these options are to be processed;
1446 		 * these will also be recorded later by mptcp_setopt().
1447 		 *
1448 		 * NOTE: Only support integer option value for now.
1449 		 */
1450 		if (sopt->sopt_valsize != sizeof(int)) {
1451 			error = EINVAL;
1452 		}
1453 		break;
1454 
1455 	default:
1456 		/*
1457 		 * Tell the caller to stop immediately and return an error.
1458 		 */
1459 		error = ENOPROTOOPT;
1460 		break;
1461 	}
1462 
1463 	return error;
1464 }
1465 
1466 /*
1467  * Issue SOPT_SET for all MPTCP subflows (for integer option values.)
1468  */
1469 static int
mptcp_setopt_apply(struct mptses * mpte,struct mptopt * mpo)1470 mptcp_setopt_apply(struct mptses *mpte, struct mptopt *mpo)
1471 {
1472 	struct socket *mp_so;
1473 	struct mptsub *mpts;
1474 	struct mptopt smpo;
1475 	int error = 0;
1476 
1477 	/* just bail now if this isn't applicable to subflow sockets */
1478 	if (!(mpo->mpo_flags & MPOF_SUBFLOW_OK)) {
1479 		error = ENOPROTOOPT;
1480 		goto out;
1481 	}
1482 
1483 	/*
1484 	 * Skip those that are handled internally; these options
1485 	 * should not have been recorded and marked with the
1486 	 * MPOF_SUBFLOW_OK by mptcp_setopt(), but just in case.
1487 	 */
1488 	if (mpo->mpo_level == SOL_SOCKET &&
1489 	    (mpo->mpo_name == SO_NOSIGPIPE || mpo->mpo_name == SO_NOADDRERR)) {
1490 		error = ENOPROTOOPT;
1491 		goto out;
1492 	}
1493 
1494 	mp_so = mptetoso(mpte);
1495 
1496 	/*
1497 	 * Don't bother going further if there's no subflow; mark the option
1498 	 * with MPOF_INTERIM so that we know whether or not to remove this
1499 	 * option upon encountering an error while issuing it during subflow
1500 	 * socket creation.
1501 	 */
1502 	if (mpte->mpte_numflows == 0) {
1503 		VERIFY(TAILQ_EMPTY(&mpte->mpte_subflows));
1504 		mpo->mpo_flags |= MPOF_INTERIM;
1505 		/* return success */
1506 		goto out;
1507 	}
1508 
1509 	bzero(&smpo, sizeof(smpo));
1510 	smpo.mpo_flags |= MPOF_SUBFLOW_OK;
1511 	smpo.mpo_level = mpo->mpo_level;
1512 	smpo.mpo_name = mpo->mpo_name;
1513 
1514 	/* grab exisiting values in case we need to rollback */
1515 	TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1516 		struct socket *so;
1517 
1518 		mpts->mpts_flags &= ~(MPTSF_SOPT_OLDVAL | MPTSF_SOPT_INPROG);
1519 		mpts->mpts_oldintval = 0;
1520 		smpo.mpo_intval = 0;
1521 		VERIFY(mpts->mpts_socket != NULL);
1522 		so = mpts->mpts_socket;
1523 		if (mptcp_subflow_sogetopt(mpte, so, &smpo) == 0) {
1524 			mpts->mpts_flags |= MPTSF_SOPT_OLDVAL;
1525 			mpts->mpts_oldintval = smpo.mpo_intval;
1526 		}
1527 	}
1528 
1529 	/* apply socket option */
1530 	TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1531 		struct socket *so;
1532 
1533 		mpts->mpts_flags |= MPTSF_SOPT_INPROG;
1534 		VERIFY(mpts->mpts_socket != NULL);
1535 		so = mpts->mpts_socket;
1536 		error = mptcp_subflow_sosetopt(mpte, mpts, mpo);
1537 		if (error != 0) {
1538 			break;
1539 		}
1540 	}
1541 
1542 	/* cleanup, and rollback if needed */
1543 	TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1544 		struct socket *so;
1545 
1546 		if (!(mpts->mpts_flags & MPTSF_SOPT_INPROG)) {
1547 			/* clear in case it's set */
1548 			mpts->mpts_flags &= ~MPTSF_SOPT_OLDVAL;
1549 			mpts->mpts_oldintval = 0;
1550 			continue;
1551 		}
1552 		if (!(mpts->mpts_flags & MPTSF_SOPT_OLDVAL)) {
1553 			mpts->mpts_flags &= ~MPTSF_SOPT_INPROG;
1554 			VERIFY(mpts->mpts_oldintval == 0);
1555 			continue;
1556 		}
1557 		/* error during sosetopt, so roll it back */
1558 		if (error != 0) {
1559 			VERIFY(mpts->mpts_socket != NULL);
1560 			so = mpts->mpts_socket;
1561 			smpo.mpo_intval = mpts->mpts_oldintval;
1562 			mptcp_subflow_sosetopt(mpte, mpts, &smpo);
1563 		}
1564 		mpts->mpts_oldintval = 0;
1565 		mpts->mpts_flags &= ~(MPTSF_SOPT_OLDVAL | MPTSF_SOPT_INPROG);
1566 	}
1567 
1568 out:
1569 	return error;
1570 }
1571 
1572 /*
1573  * Handle SOPT_SET for socket options issued on MP socket.
1574  */
1575 static int
mptcp_setopt(struct mptses * mpte,struct sockopt * sopt)1576 mptcp_setopt(struct mptses *mpte, struct sockopt *sopt)
1577 {
1578 	int error = 0, optval = 0, level, optname, rec = 1;
1579 	struct mptopt smpo, *mpo = NULL;
1580 	struct socket *mp_so;
1581 
1582 	level = sopt->sopt_level;
1583 	optname = sopt->sopt_name;
1584 
1585 	mp_so = mptetoso(mpte);
1586 
1587 	VERIFY(!(mpsotomppcb(mp_so)->mpp_flags & MPP_INSIDE_SETGETOPT));
1588 	mpsotomppcb(mp_so)->mpp_flags |= MPP_INSIDE_SETGETOPT;
1589 
1590 	/*
1591 	 * Record socket options which are applicable to subflow sockets so
1592 	 * that we can replay them for new ones; see mptcp_usr_socheckopt()
1593 	 * for the list of eligible socket-level options.
1594 	 */
1595 	if (level == SOL_SOCKET) {
1596 		switch (optname) {
1597 		case SO_DEBUG:
1598 		case SO_KEEPALIVE:
1599 		case SO_USELOOPBACK:
1600 		case SO_RANDOMPORT:
1601 		case SO_TRAFFIC_CLASS:
1602 		case SO_RECV_TRAFFIC_CLASS:
1603 		case SO_PRIVILEGED_TRAFFIC_CLASS:
1604 		case SO_RECV_ANYIF:
1605 		case SO_RESTRICTIONS:
1606 		case SO_NOWAKEFROMSLEEP:
1607 		case SO_NOAPNFALLBK:
1608 		case SO_MARK_CELLFALLBACK:
1609 		case SO_MARK_KNOWN_TRACKER:
1610 		case SO_MARK_KNOWN_TRACKER_NON_APP_INITIATED:
1611 		case SO_MARK_APPROVED_APP_DOMAIN:
1612 		case SO_FALLBACK_MODE:
1613 			/* record it */
1614 			break;
1615 		case SO_FLUSH:
1616 			/* don't record it */
1617 			rec = 0;
1618 			break;
1619 
1620 		/* Next ones, record at MPTCP-level */
1621 		case SO_DELEGATED:
1622 			error = sooptcopyin(sopt, &mpte->mpte_epid,
1623 			    sizeof(int), sizeof(int));
1624 			if (error != 0) {
1625 				goto err_out;
1626 			}
1627 
1628 			goto out;
1629 		case SO_DELEGATED_UUID:
1630 			error = sooptcopyin(sopt, &mpte->mpte_euuid,
1631 			    sizeof(uuid_t), sizeof(uuid_t));
1632 			if (error != 0) {
1633 				goto err_out;
1634 			}
1635 
1636 			goto out;
1637 #if NECP
1638 		case SO_NECP_CLIENTUUID:
1639 			if (!uuid_is_null(mpsotomppcb(mp_so)->necp_client_uuid)) {
1640 				error = EINVAL;
1641 				goto err_out;
1642 			}
1643 
1644 			error = sooptcopyin(sopt, &mpsotomppcb(mp_so)->necp_client_uuid,
1645 			    sizeof(uuid_t), sizeof(uuid_t));
1646 			if (error != 0) {
1647 				goto err_out;
1648 			}
1649 
1650 			mpsotomppcb(mp_so)->necp_cb = mptcp_session_necp_cb;
1651 			error = necp_client_register_multipath_cb(mp_so->last_pid,
1652 			    mpsotomppcb(mp_so)->necp_client_uuid,
1653 			    mpsotomppcb(mp_so));
1654 			if (error) {
1655 				goto err_out;
1656 			}
1657 
1658 			if (uuid_is_null(mpsotomppcb(mp_so)->necp_client_uuid)) {
1659 				error = EINVAL;
1660 				goto err_out;
1661 			}
1662 
1663 			goto out;
1664 		case SO_NECP_ATTRIBUTES:
1665 			error = necp_set_socket_attributes(&mpsotomppcb(mp_so)->inp_necp_attributes, sopt);
1666 			if (error) {
1667 				goto err_out;
1668 			}
1669 
1670 			goto out;
1671 #endif /* NECP */
1672 		default:
1673 			/* nothing to do; just return */
1674 			goto out;
1675 		}
1676 	} else {
1677 		switch (optname) {
1678 		case TCP_NODELAY:
1679 		case TCP_RXT_FINDROP:
1680 		case TCP_KEEPALIVE:
1681 		case TCP_KEEPINTVL:
1682 		case TCP_KEEPCNT:
1683 		case TCP_CONNECTIONTIMEOUT:
1684 		case TCP_RXT_CONNDROPTIME:
1685 		case PERSIST_TIMEOUT:
1686 		case TCP_ADAPTIVE_READ_TIMEOUT:
1687 		case TCP_ADAPTIVE_WRITE_TIMEOUT:
1688 		case TCP_FASTOPEN_FORCE_ENABLE:
1689 			/* eligible; record it */
1690 			break;
1691 		case TCP_NOTSENT_LOWAT:
1692 			/* record at MPTCP level */
1693 			error = sooptcopyin(sopt, &optval, sizeof(optval),
1694 			    sizeof(optval));
1695 			if (error) {
1696 				goto err_out;
1697 			}
1698 			if (optval < 0) {
1699 				error = EINVAL;
1700 				goto err_out;
1701 			} else {
1702 				if (optval == 0) {
1703 					mp_so->so_flags &= ~SOF_NOTSENT_LOWAT;
1704 					error = mptcp_set_notsent_lowat(mpte, 0);
1705 				} else {
1706 					mp_so->so_flags |= SOF_NOTSENT_LOWAT;
1707 					error = mptcp_set_notsent_lowat(mpte,
1708 					    optval);
1709 				}
1710 
1711 				if (error) {
1712 					goto err_out;
1713 				}
1714 			}
1715 			goto out;
1716 		case MPTCP_SERVICE_TYPE:
1717 			/* record at MPTCP level */
1718 			error = sooptcopyin(sopt, &optval, sizeof(optval),
1719 			    sizeof(optval));
1720 			if (error) {
1721 				goto err_out;
1722 			}
1723 			if (optval < 0 || optval >= MPTCP_SVCTYPE_MAX) {
1724 				error = EINVAL;
1725 				goto err_out;
1726 			}
1727 
1728 			if (mptcp_entitlement_check(mp_so, (uint8_t)optval) < 0) {
1729 				error = EACCES;
1730 				goto err_out;
1731 			}
1732 
1733 			mpte->mpte_svctype = (uint8_t)optval;
1734 			mpte->mpte_flags |= MPTE_SVCTYPE_CHECKED;
1735 
1736 			goto out;
1737 		case MPTCP_ALTERNATE_PORT:
1738 			/* record at MPTCP level */
1739 			error = sooptcopyin(sopt, &optval, sizeof(optval),
1740 			    sizeof(optval));
1741 			if (error) {
1742 				goto err_out;
1743 			}
1744 
1745 			if (optval < 0 || optval > UINT16_MAX) {
1746 				error = EINVAL;
1747 				goto err_out;
1748 			}
1749 
1750 			mpte->mpte_alternate_port = (uint16_t)optval;
1751 
1752 			goto out;
1753 		case MPTCP_FORCE_ENABLE:
1754 			/* record at MPTCP level */
1755 			error = sooptcopyin(sopt, &optval, sizeof(optval),
1756 			    sizeof(optval));
1757 			if (error) {
1758 				goto err_out;
1759 			}
1760 
1761 			if (optval < 0 || optval > 1) {
1762 				error = EINVAL;
1763 				goto err_out;
1764 			}
1765 
1766 			if (optval) {
1767 				mpte->mpte_flags |= MPTE_FORCE_ENABLE;
1768 			} else {
1769 				mpte->mpte_flags &= ~MPTE_FORCE_ENABLE;
1770 			}
1771 
1772 			goto out;
1773 		case MPTCP_FORCE_VERSION:
1774 			error = sooptcopyin(sopt, &optval, sizeof(optval),
1775 			    sizeof(optval));
1776 			if (error) {
1777 				goto err_out;
1778 			}
1779 
1780 			if (optval != 0 && optval != 1) {
1781 				error = EINVAL;
1782 				goto err_out;
1783 			}
1784 
1785 			if (optval == 0) {
1786 				mpte->mpte_flags |= MPTE_FORCE_V0;
1787 				mpte->mpte_flags &= ~MPTE_FORCE_V1;
1788 			} else {
1789 				mpte->mpte_flags |= MPTE_FORCE_V1;
1790 				mpte->mpte_flags &= ~MPTE_FORCE_V0;
1791 			}
1792 
1793 			goto out;
1794 		case MPTCP_EXPECTED_PROGRESS_TARGET:
1795 		{
1796 			struct mptcb *mp_tp = mpte->mpte_mptcb;
1797 			uint64_t mach_time_target;
1798 			uint64_t nanoseconds;
1799 
1800 			if (mpte->mpte_svctype != MPTCP_SVCTYPE_TARGET_BASED) {
1801 				os_log(mptcp_log_handle, "%s - %lx: Can't set urgent activity when svctype is %u\n",
1802 				    __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpte->mpte_svctype);
1803 				error = EINVAL;
1804 				goto err_out;
1805 			}
1806 
1807 			error = sooptcopyin(sopt, &mach_time_target, sizeof(mach_time_target), sizeof(mach_time_target));
1808 			if (error) {
1809 				goto err_out;
1810 			}
1811 
1812 			if (!mptcp_ok_to_create_subflows(mp_tp)) {
1813 				os_log(mptcp_log_handle, "%s - %lx: Not ok to create subflows, state %u flags %#x\n",
1814 				    __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mp_tp->mpt_state, mp_tp->mpt_flags);
1815 				error = EINVAL;
1816 				goto err_out;
1817 			}
1818 
1819 			if (mach_time_target) {
1820 				uint64_t time_now = 0;
1821 				uint64_t time_now_nanoseconds;
1822 
1823 				absolutetime_to_nanoseconds(mach_time_target, &nanoseconds);
1824 				nanoseconds = nanoseconds - (mptcp_expected_progress_headstart * NSEC_PER_MSEC);
1825 
1826 				time_now = mach_continuous_time();
1827 				absolutetime_to_nanoseconds(time_now, &time_now_nanoseconds);
1828 
1829 				nanoseconds_to_absolutetime(nanoseconds, &mach_time_target);
1830 				/* If the timer is already running and it would
1831 				 * fire in less than mptcp_expected_progress_headstart
1832 				 * seconds, then it's not worth canceling it.
1833 				 */
1834 				if (mpte->mpte_time_target &&
1835 				    mpte->mpte_time_target < time_now &&
1836 				    time_now_nanoseconds > nanoseconds - (mptcp_expected_progress_headstart * NSEC_PER_MSEC)) {
1837 					os_log(mptcp_log_handle, "%s - %lx: Not rescheduling timer %llu now %llu target %llu\n",
1838 					    __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
1839 					    mpte->mpte_time_target,
1840 					    time_now,
1841 					    mach_time_target);
1842 					goto out;
1843 				}
1844 			}
1845 
1846 			mpte->mpte_time_target = mach_time_target;
1847 			mptcp_set_urgency_timer(mpte);
1848 
1849 			goto out;
1850 		}
1851 		default:
1852 			/* not eligible */
1853 			error = ENOPROTOOPT;
1854 			goto err_out;
1855 		}
1856 	}
1857 
1858 	if ((error = sooptcopyin(sopt, &optval, sizeof(optval),
1859 	    sizeof(optval))) != 0) {
1860 		goto err_out;
1861 	}
1862 
1863 	if (rec) {
1864 		/* search for an existing one; if not found, allocate */
1865 		if ((mpo = mptcp_sopt_find(mpte, sopt)) == NULL) {
1866 			mpo = mptcp_sopt_alloc(Z_WAITOK);
1867 		}
1868 
1869 		if (mpo == NULL) {
1870 			error = ENOBUFS;
1871 			goto err_out;
1872 		} else {
1873 			/* initialize or update, as needed */
1874 			mpo->mpo_intval = optval;
1875 			if (!(mpo->mpo_flags & MPOF_ATTACHED)) {
1876 				mpo->mpo_level = level;
1877 				mpo->mpo_name = optname;
1878 				mptcp_sopt_insert(mpte, mpo);
1879 			}
1880 			/* this can be issued on the subflow socket */
1881 			mpo->mpo_flags |= MPOF_SUBFLOW_OK;
1882 		}
1883 	} else {
1884 		bzero(&smpo, sizeof(smpo));
1885 		mpo = &smpo;
1886 		mpo->mpo_flags |= MPOF_SUBFLOW_OK;
1887 		mpo->mpo_level = level;
1888 		mpo->mpo_name = optname;
1889 		mpo->mpo_intval = optval;
1890 	}
1891 
1892 	/* issue this socket option on existing subflows */
1893 	error = mptcp_setopt_apply(mpte, mpo);
1894 	if (error != 0 && (mpo->mpo_flags & MPOF_ATTACHED)) {
1895 		VERIFY(mpo != &smpo);
1896 		mptcp_sopt_remove(mpte, mpo);
1897 		mptcp_sopt_free(mpo);
1898 	}
1899 	if (mpo == &smpo) {
1900 		mpo->mpo_flags &= ~MPOF_INTERIM;
1901 	}
1902 
1903 	if (error) {
1904 		goto err_out;
1905 	}
1906 
1907 out:
1908 
1909 	mpsotomppcb(mp_so)->mpp_flags &= ~MPP_INSIDE_SETGETOPT;
1910 	return 0;
1911 
1912 err_out:
1913 	os_log_error(mptcp_log_handle, "%s - %lx: sopt %s (%d, %d) val %d can't be issued error %d\n",
1914 	    __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
1915 	    mptcp_sopt2str(level, optname), level, optname, optval, error);
1916 	mpsotomppcb(mp_so)->mpp_flags &= ~MPP_INSIDE_SETGETOPT;
1917 	return error;
1918 }
1919 
1920 static void
mptcp_fill_info_bytestats(struct tcp_info * ti,struct mptses * mpte)1921 mptcp_fill_info_bytestats(struct tcp_info *ti, struct mptses *mpte)
1922 {
1923 	struct mptsub *mpts;
1924 	int i;
1925 
1926 	TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1927 		const struct inpcb *inp = sotoinpcb(mpts->mpts_socket);
1928 
1929 		if (inp == NULL) {
1930 			continue;
1931 		}
1932 
1933 		ti->tcpi_txbytes += inp->inp_stat->txbytes;
1934 		ti->tcpi_rxbytes += inp->inp_stat->rxbytes;
1935 		ti->tcpi_cell_txbytes += inp->inp_cstat->txbytes;
1936 		ti->tcpi_cell_rxbytes += inp->inp_cstat->rxbytes;
1937 		ti->tcpi_wifi_txbytes += inp->inp_wstat->txbytes;
1938 		ti->tcpi_wifi_rxbytes += inp->inp_wstat->rxbytes;
1939 		ti->tcpi_wired_txbytes += inp->inp_Wstat->txbytes;
1940 		ti->tcpi_wired_rxbytes += inp->inp_Wstat->rxbytes;
1941 	}
1942 
1943 	for (i = 0; i < MPTCP_ITFSTATS_SIZE; i++) {
1944 		struct mptcp_itf_stats *stats = &mpte->mpte_itfstats[i];
1945 
1946 		ti->tcpi_txbytes += stats->mpis_txbytes;
1947 		ti->tcpi_rxbytes += stats->mpis_rxbytes;
1948 
1949 		ti->tcpi_wifi_txbytes += stats->mpis_wifi_txbytes;
1950 		ti->tcpi_wifi_rxbytes += stats->mpis_wifi_rxbytes;
1951 
1952 		ti->tcpi_wired_txbytes += stats->mpis_wired_txbytes;
1953 		ti->tcpi_wired_rxbytes += stats->mpis_wired_rxbytes;
1954 
1955 		ti->tcpi_cell_txbytes += stats->mpis_cell_txbytes;
1956 		ti->tcpi_cell_rxbytes += stats->mpis_cell_rxbytes;
1957 	}
1958 }
1959 
1960 static void
mptcp_fill_info(struct mptses * mpte,struct tcp_info * ti)1961 mptcp_fill_info(struct mptses *mpte, struct tcp_info *ti)
1962 {
1963 	struct mptsub *actsub = mpte->mpte_active_sub;
1964 	struct mptcb *mp_tp = mpte->mpte_mptcb;
1965 	struct tcpcb *acttp = NULL;
1966 
1967 	if (actsub) {
1968 		acttp = sototcpcb(actsub->mpts_socket);
1969 	}
1970 
1971 	bzero(ti, sizeof(*ti));
1972 
1973 	ti->tcpi_state = (uint8_t)mp_tp->mpt_state;
1974 	/* tcpi_options */
1975 	/* tcpi_snd_wscale */
1976 	/* tcpi_rcv_wscale */
1977 	/* tcpi_flags */
1978 	if (acttp) {
1979 		ti->tcpi_rto = acttp->t_timer[TCPT_REXMT] ? acttp->t_rxtcur : 0;
1980 	}
1981 
1982 	/* tcpi_snd_mss */
1983 	/* tcpi_rcv_mss */
1984 	if (acttp) {
1985 		ti->tcpi_rttcur = acttp->t_rttcur;
1986 		ti->tcpi_srtt = acttp->t_srtt >> TCP_RTT_SHIFT;
1987 		ti->tcpi_rttvar = acttp->t_rttvar >> TCP_RTTVAR_SHIFT;
1988 		ti->tcpi_rttbest = acttp->t_rttbest >> TCP_RTT_SHIFT;
1989 		ti->tcpi_rcv_srtt = acttp->rcv_srtt >> TCP_RTT_SHIFT;
1990 	}
1991 	/* tcpi_snd_ssthresh */
1992 	/* tcpi_snd_cwnd */
1993 	/* tcpi_rcv_space */
1994 	ti->tcpi_snd_wnd = mp_tp->mpt_sndwnd;
1995 	ti->tcpi_snd_nxt = (uint32_t)mp_tp->mpt_sndnxt;
1996 	ti->tcpi_rcv_nxt = (uint32_t)mp_tp->mpt_rcvnxt;
1997 	if (acttp) {
1998 		ti->tcpi_last_outif = (acttp->t_inpcb->inp_last_outifp == NULL) ? 0 :
1999 		    acttp->t_inpcb->inp_last_outifp->if_index;
2000 	}
2001 
2002 	mptcp_fill_info_bytestats(ti, mpte);
2003 	/* tcpi_txpackets */
2004 
2005 	/* tcpi_txretransmitbytes */
2006 	/* tcpi_txunacked */
2007 	/* tcpi_rxpackets */
2008 
2009 	/* tcpi_rxduplicatebytes */
2010 	/* tcpi_rxoutoforderbytes */
2011 	/* tcpi_snd_bw */
2012 	/* tcpi_synrexmits */
2013 	/* tcpi_unused1 */
2014 	/* tcpi_unused2 */
2015 	/* tcpi_cell_rxpackets */
2016 
2017 	/* tcpi_cell_txpackets */
2018 
2019 	/* tcpi_wifi_rxpackets */
2020 
2021 	/* tcpi_wifi_txpackets */
2022 
2023 	/* tcpi_wired_rxpackets */
2024 	/* tcpi_wired_txpackets */
2025 	/* tcpi_connstatus */
2026 	/* TFO-stuff */
2027 	/* ECN stuff */
2028 	/* tcpi_ecn_recv_ce */
2029 	/* tcpi_ecn_recv_cwr */
2030 	if (acttp) {
2031 		ti->tcpi_rcvoopack = acttp->t_rcvoopack;
2032 	}
2033 	/* tcpi_pawsdrop */
2034 	/* tcpi_sack_recovery_episode */
2035 	/* tcpi_reordered_pkts */
2036 	/* tcpi_dsack_sent */
2037 	/* tcpi_dsack_recvd */
2038 	/* tcpi_flowhash */
2039 	if (acttp) {
2040 		ti->tcpi_txretransmitpackets = acttp->t_stat.rxmitpkts;
2041 	}
2042 }
2043 
2044 /*
2045  * Handle SOPT_GET for socket options issued on MP socket.
2046  */
2047 static int
mptcp_getopt(struct mptses * mpte,struct sockopt * sopt)2048 mptcp_getopt(struct mptses *mpte, struct sockopt *sopt)
2049 {
2050 	int error = 0, optval = 0;
2051 	struct socket *mp_so;
2052 
2053 	mp_so = mptetoso(mpte);
2054 
2055 	VERIFY(!(mpsotomppcb(mp_so)->mpp_flags & MPP_INSIDE_SETGETOPT));
2056 	mpsotomppcb(mp_so)->mpp_flags |= MPP_INSIDE_SETGETOPT;
2057 
2058 	/*
2059 	 * We only handle SOPT_GET for TCP level socket options; we should
2060 	 * not get here for socket level options since they are already
2061 	 * handled at the socket layer.
2062 	 */
2063 	if (sopt->sopt_level != IPPROTO_TCP) {
2064 		error = ENOPROTOOPT;
2065 		goto out;
2066 	}
2067 
2068 	switch (sopt->sopt_name) {
2069 	case PERSIST_TIMEOUT:
2070 		/* Only case for which we have a non-zero default */
2071 		optval = tcp_max_persist_timeout;
2072 		OS_FALLTHROUGH;
2073 	case TCP_NODELAY:
2074 	case TCP_RXT_FINDROP:
2075 	case TCP_KEEPALIVE:
2076 	case TCP_KEEPINTVL:
2077 	case TCP_KEEPCNT:
2078 	case TCP_CONNECTIONTIMEOUT:
2079 	case TCP_RXT_CONNDROPTIME:
2080 	case TCP_ADAPTIVE_READ_TIMEOUT:
2081 	case TCP_ADAPTIVE_WRITE_TIMEOUT:
2082 	case TCP_FASTOPEN_FORCE_ENABLE:
2083 	{
2084 		struct mptopt *mpo = mptcp_sopt_find(mpte, sopt);
2085 
2086 		if (mpo != NULL) {
2087 			optval = mpo->mpo_intval;
2088 		}
2089 		break;
2090 	}
2091 
2092 	/* The next ones are stored at the MPTCP-level */
2093 	case TCP_NOTSENT_LOWAT:
2094 		if (mptetoso(mpte)->so_flags & SOF_NOTSENT_LOWAT) {
2095 			optval = mptcp_get_notsent_lowat(mpte);
2096 		} else {
2097 			optval = 0;
2098 		}
2099 		break;
2100 	case TCP_INFO:
2101 	{
2102 		struct tcp_info ti;
2103 
2104 		mptcp_fill_info(mpte, &ti);
2105 		error = sooptcopyout(sopt, &ti, sizeof(struct tcp_info));
2106 
2107 		goto out;
2108 	}
2109 	case MPTCP_SERVICE_TYPE:
2110 		optval = mpte->mpte_svctype;
2111 		break;
2112 	case MPTCP_ALTERNATE_PORT:
2113 		optval = mpte->mpte_alternate_port;
2114 		break;
2115 	case MPTCP_FORCE_ENABLE:
2116 		optval = !!(mpte->mpte_flags & MPTE_FORCE_ENABLE);
2117 		break;
2118 	case MPTCP_FORCE_VERSION:
2119 		if (mpte->mpte_flags & MPTE_FORCE_V0) {
2120 			optval = 0;
2121 		} else if (mpte->mpte_flags & MPTE_FORCE_V1) {
2122 			optval = 1;
2123 		} else {
2124 			optval = -1;
2125 		}
2126 		break;
2127 	case MPTCP_EXPECTED_PROGRESS_TARGET:
2128 		error = sooptcopyout(sopt, &mpte->mpte_time_target, sizeof(mpte->mpte_time_target));
2129 
2130 		goto out;
2131 	default:
2132 		/* not eligible */
2133 		error = ENOPROTOOPT;
2134 		break;
2135 	}
2136 
2137 	if (error == 0) {
2138 		error = sooptcopyout(sopt, &optval, sizeof(int));
2139 	}
2140 
2141 out:
2142 	mpsotomppcb(mp_so)->mpp_flags &= ~MPP_INSIDE_SETGETOPT;
2143 	return error;
2144 }
2145 
2146 /*
2147  * MPTCP SOPT_{SET,GET} socket option handler, for options issued on the MP
2148  * socket, at SOL_SOCKET and IPPROTO_TCP levels.  The former is restricted
2149  * to those that are allowed by mptcp_usr_socheckopt().
2150  */
2151 int
mptcp_ctloutput(struct socket * mp_so,struct sockopt * sopt)2152 mptcp_ctloutput(struct socket *mp_so, struct sockopt *sopt)
2153 {
2154 	struct mppcb *mpp = mpsotomppcb(mp_so);
2155 	struct mptses *mpte;
2156 	int error = 0;
2157 
2158 	if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
2159 		error = EINVAL;
2160 		goto out;
2161 	}
2162 	mpte = mptompte(mpp);
2163 	socket_lock_assert_owned(mp_so);
2164 
2165 	/* we only handle socket and TCP-level socket options for MPTCP */
2166 	if (sopt->sopt_level != SOL_SOCKET && sopt->sopt_level != IPPROTO_TCP) {
2167 		error = EINVAL;
2168 		goto out;
2169 	}
2170 
2171 	switch (sopt->sopt_dir) {
2172 	case SOPT_SET:
2173 		error = mptcp_setopt(mpte, sopt);
2174 		break;
2175 
2176 	case SOPT_GET:
2177 		error = mptcp_getopt(mpte, sopt);
2178 		break;
2179 	}
2180 out:
2181 	return error;
2182 }
2183 
2184 const char *
mptcp_sopt2str(int level,int optname)2185 mptcp_sopt2str(int level, int optname)
2186 {
2187 	switch (level) {
2188 	case SOL_SOCKET:
2189 		switch (optname) {
2190 		case SO_LINGER:
2191 			return "SO_LINGER";
2192 		case SO_LINGER_SEC:
2193 			return "SO_LINGER_SEC";
2194 		case SO_DEBUG:
2195 			return "SO_DEBUG";
2196 		case SO_KEEPALIVE:
2197 			return "SO_KEEPALIVE";
2198 		case SO_USELOOPBACK:
2199 			return "SO_USELOOPBACK";
2200 		case SO_TYPE:
2201 			return "SO_TYPE";
2202 		case SO_NREAD:
2203 			return "SO_NREAD";
2204 		case SO_NWRITE:
2205 			return "SO_NWRITE";
2206 		case SO_ERROR:
2207 			return "SO_ERROR";
2208 		case SO_SNDBUF:
2209 			return "SO_SNDBUF";
2210 		case SO_RCVBUF:
2211 			return "SO_RCVBUF";
2212 		case SO_SNDLOWAT:
2213 			return "SO_SNDLOWAT";
2214 		case SO_RCVLOWAT:
2215 			return "SO_RCVLOWAT";
2216 		case SO_SNDTIMEO:
2217 			return "SO_SNDTIMEO";
2218 		case SO_RCVTIMEO:
2219 			return "SO_RCVTIMEO";
2220 		case SO_NKE:
2221 			return "SO_NKE";
2222 		case SO_NOSIGPIPE:
2223 			return "SO_NOSIGPIPE";
2224 		case SO_NOADDRERR:
2225 			return "SO_NOADDRERR";
2226 		case SO_RESTRICTIONS:
2227 			return "SO_RESTRICTIONS";
2228 		case SO_LABEL:
2229 			return "SO_LABEL";
2230 		case SO_PEERLABEL:
2231 			return "SO_PEERLABEL";
2232 		case SO_RANDOMPORT:
2233 			return "SO_RANDOMPORT";
2234 		case SO_TRAFFIC_CLASS:
2235 			return "SO_TRAFFIC_CLASS";
2236 		case SO_RECV_TRAFFIC_CLASS:
2237 			return "SO_RECV_TRAFFIC_CLASS";
2238 		case SO_TRAFFIC_CLASS_DBG:
2239 			return "SO_TRAFFIC_CLASS_DBG";
2240 		case SO_PRIVILEGED_TRAFFIC_CLASS:
2241 			return "SO_PRIVILEGED_TRAFFIC_CLASS";
2242 		case SO_DEFUNCTIT:
2243 			return "SO_DEFUNCTIT";
2244 		case SO_DEFUNCTOK:
2245 			return "SO_DEFUNCTOK";
2246 		case SO_ISDEFUNCT:
2247 			return "SO_ISDEFUNCT";
2248 		case SO_OPPORTUNISTIC:
2249 			return "SO_OPPORTUNISTIC";
2250 		case SO_FLUSH:
2251 			return "SO_FLUSH";
2252 		case SO_RECV_ANYIF:
2253 			return "SO_RECV_ANYIF";
2254 		case SO_NOWAKEFROMSLEEP:
2255 			return "SO_NOWAKEFROMSLEEP";
2256 		case SO_NOAPNFALLBK:
2257 			return "SO_NOAPNFALLBK";
2258 		case SO_MARK_CELLFALLBACK:
2259 			return "SO_CELLFALLBACK";
2260 		case SO_FALLBACK_MODE:
2261 			return "SO_FALLBACK_MODE";
2262 		case SO_MARK_KNOWN_TRACKER:
2263 			return "SO_MARK_KNOWN_TRACKER";
2264 		case SO_MARK_KNOWN_TRACKER_NON_APP_INITIATED:
2265 			return "SO_MARK_KNOWN_TRACKER_NON_APP_INITIATED";
2266 		case SO_MARK_APPROVED_APP_DOMAIN:
2267 			return "SO_MARK_APPROVED_APP_DOMAIN";
2268 		case SO_DELEGATED:
2269 			return "SO_DELEGATED";
2270 		case SO_DELEGATED_UUID:
2271 			return "SO_DELEGATED_UUID";
2272 #if NECP
2273 		case SO_NECP_ATTRIBUTES:
2274 			return "SO_NECP_ATTRIBUTES";
2275 		case SO_NECP_CLIENTUUID:
2276 			return "SO_NECP_CLIENTUUID";
2277 #endif /* NECP */
2278 		}
2279 
2280 		break;
2281 	case IPPROTO_TCP:
2282 		switch (optname) {
2283 		case TCP_NODELAY:
2284 			return "TCP_NODELAY";
2285 		case TCP_KEEPALIVE:
2286 			return "TCP_KEEPALIVE";
2287 		case TCP_KEEPINTVL:
2288 			return "TCP_KEEPINTVL";
2289 		case TCP_KEEPCNT:
2290 			return "TCP_KEEPCNT";
2291 		case TCP_CONNECTIONTIMEOUT:
2292 			return "TCP_CONNECTIONTIMEOUT";
2293 		case TCP_RXT_CONNDROPTIME:
2294 			return "TCP_RXT_CONNDROPTIME";
2295 		case PERSIST_TIMEOUT:
2296 			return "PERSIST_TIMEOUT";
2297 		case TCP_NOTSENT_LOWAT:
2298 			return "NOTSENT_LOWAT";
2299 		case TCP_ADAPTIVE_READ_TIMEOUT:
2300 			return "ADAPTIVE_READ_TIMEOUT";
2301 		case TCP_ADAPTIVE_WRITE_TIMEOUT:
2302 			return "ADAPTIVE_WRITE_TIMEOUT";
2303 		case TCP_FASTOPEN_FORCE_ENABLE:
2304 			return "TCP_FASTOPEN_FORCE_ENABLE";
2305 		case MPTCP_SERVICE_TYPE:
2306 			return "MPTCP_SERVICE_TYPE";
2307 		case MPTCP_ALTERNATE_PORT:
2308 			return "MPTCP_ALTERNATE_PORT";
2309 		case MPTCP_FORCE_ENABLE:
2310 			return "MPTCP_FORCE_ENABLE";
2311 		case MPTCP_FORCE_VERSION:
2312 			return "MPTCP_FORCE_VERSION";
2313 		case MPTCP_EXPECTED_PROGRESS_TARGET:
2314 			return "MPTCP_EXPECTED_PROGRESS_TARGET";
2315 		}
2316 
2317 		break;
2318 	}
2319 
2320 	return "unknown";
2321 }
2322 
2323 static int
mptcp_usr_preconnect(struct socket * mp_so)2324 mptcp_usr_preconnect(struct socket *mp_so)
2325 {
2326 	struct mptsub *mpts = NULL;
2327 	struct mppcb *mpp = mpsotomppcb(mp_so);
2328 	struct mptses *mpte;
2329 	struct socket *so;
2330 	struct tcpcb *tp = NULL;
2331 	int error;
2332 
2333 	mpte = mptompte(mpp);
2334 
2335 	mpts = mptcp_get_subflow(mpte, NULL);
2336 	if (mpts == NULL) {
2337 		os_log_error(mptcp_log_handle, "%s - %lx: invalid preconnect ",
2338 		    __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte));
2339 		return EINVAL;
2340 	}
2341 	mpts->mpts_flags &= ~MPTSF_TFO_REQD;
2342 	so = mpts->mpts_socket;
2343 	tp = intotcpcb(sotoinpcb(so));
2344 	tp->t_mpflags &= ~TMPF_TFO_REQUEST;
2345 	error = tcp_output(sototcpcb(so));
2346 
2347 	soclearfastopen(mp_so);
2348 
2349 	return error;
2350 }
2351