1 /*
2 * Copyright (c) 2012-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/kernel.h>
32 #include <sys/socket.h>
33 #include <sys/socketvar.h>
34 #include <sys/protosw.h>
35 #include <sys/mcache.h>
36 #include <sys/syslog.h>
37 #include <sys/proc.h>
38 #include <sys/proc_internal.h>
39 #include <sys/resourcevar.h>
40 #include <sys/kauth.h>
41 #include <sys/priv.h>
42
43 #include <net/if.h>
44 #include <netinet/in.h>
45 #include <netinet/in_var.h>
46 #include <netinet/tcp.h>
47 #include <netinet/tcp_fsm.h>
48 #include <netinet/tcp_seq.h>
49 #include <netinet/tcp_var.h>
50 #include <netinet/tcp_timer.h>
51 #include <netinet/mptcp_var.h>
52 #include <netinet/mptcp_timer.h>
53
54 #include <mach/sdt.h>
55
56 static int mptcp_usr_attach(struct socket *, int, struct proc *);
57 static int mptcp_usr_detach(struct socket *);
58 static int mptcp_attach(struct socket *, struct proc *);
59 static int mptcp_usr_connectx(struct socket *, struct sockaddr *,
60 struct sockaddr *, struct proc *, uint32_t, sae_associd_t,
61 sae_connid_t *, uint32_t, void *, uint32_t, struct uio *, user_ssize_t *);
62 static int mptcp_getassocids(struct mptses *, uint32_t *, user_addr_t);
63 static int mptcp_getconnids(struct mptses *, sae_associd_t, uint32_t *,
64 user_addr_t);
65 static int mptcp_getconninfo(struct mptses *, sae_connid_t *, uint32_t *,
66 uint32_t *, int32_t *, user_addr_t, socklen_t *, user_addr_t, socklen_t *,
67 uint32_t *, user_addr_t, uint32_t *);
68 static int mptcp_usr_control(struct socket *, u_long, caddr_t, struct ifnet *,
69 struct proc *);
70 static int mptcp_disconnect(struct mptses *);
71 static int mptcp_usr_disconnect(struct socket *);
72 static int mptcp_usr_disconnectx(struct socket *, sae_associd_t, sae_connid_t);
73 static struct mptses *mptcp_usrclosed(struct mptses *);
74 static int mptcp_usr_rcvd(struct socket *, int);
75 static int mptcp_usr_send(struct socket *, int, struct mbuf *,
76 struct sockaddr *, struct mbuf *, struct proc *);
77 static int mptcp_usr_shutdown(struct socket *);
78 static int mptcp_usr_sosend(struct socket *, struct sockaddr *, struct uio *,
79 struct mbuf *, struct mbuf *, int);
80 static int mptcp_usr_socheckopt(struct socket *, struct sockopt *);
81 static int mptcp_usr_preconnect(struct socket *so);
82
83 struct pr_usrreqs mptcp_usrreqs = {
84 .pru_attach = mptcp_usr_attach,
85 .pru_connectx = mptcp_usr_connectx,
86 .pru_control = mptcp_usr_control,
87 .pru_detach = mptcp_usr_detach,
88 .pru_disconnect = mptcp_usr_disconnect,
89 .pru_disconnectx = mptcp_usr_disconnectx,
90 .pru_peeraddr = mp_getpeeraddr,
91 .pru_rcvd = mptcp_usr_rcvd,
92 .pru_send = mptcp_usr_send,
93 .pru_shutdown = mptcp_usr_shutdown,
94 .pru_sockaddr = mp_getsockaddr,
95 .pru_sosend = mptcp_usr_sosend,
96 .pru_soreceive = soreceive,
97 .pru_socheckopt = mptcp_usr_socheckopt,
98 .pru_preconnect = mptcp_usr_preconnect,
99 };
100
101
102 #if (DEVELOPMENT || DEBUG)
103 static int mptcp_disable_entitlements = 0;
104 SYSCTL_INT(_net_inet_mptcp, OID_AUTO, disable_entitlements, CTLFLAG_RW | CTLFLAG_LOCKED,
105 &mptcp_disable_entitlements, 0, "Disable Multipath TCP Entitlement Checking");
106 #endif
107
108 int mptcp_developer_mode = 0;
109 SYSCTL_INT(_net_inet_mptcp, OID_AUTO, allow_aggregate, CTLFLAG_RW | CTLFLAG_LOCKED,
110 &mptcp_developer_mode, 0, "Allow the Multipath aggregation mode");
111
112 int mptcp_no_first_party = 0;
113 SYSCTL_INT(_net_inet_mptcp, OID_AUTO, no_first_party, CTLFLAG_RW | CTLFLAG_LOCKED,
114 &mptcp_no_first_party, 0, "Do not do first-party app exemptions");
115
116 static unsigned long mptcp_expected_progress_headstart = 5000;
117 SYSCTL_ULONG(_net_inet_mptcp, OID_AUTO, expected_progress_headstart, CTLFLAG_RW | CTLFLAG_LOCKED,
118 &mptcp_expected_progress_headstart, "Headstart to give MPTCP before meeting the progress deadline");
119
120
121 /*
122 * Attaches an MPTCP control block to a socket.
123 */
124 static int
mptcp_usr_attach(struct socket * mp_so,int proto,struct proc * p)125 mptcp_usr_attach(struct socket *mp_so, int proto, struct proc *p)
126 {
127 #pragma unused(proto)
128 int error;
129
130 VERIFY(mpsotomppcb(mp_so) == NULL);
131
132 error = mptcp_attach(mp_so, p);
133 if (error) {
134 goto out;
135 }
136
137 if ((mp_so->so_options & SO_LINGER) && mp_so->so_linger == 0) {
138 mp_so->so_linger = (short)(TCP_LINGERTIME * hz);
139 }
140 out:
141 return error;
142 }
143
144 /*
145 * Detaches an MPTCP control block from a socket.
146 */
147 static int
mptcp_usr_detach(struct socket * mp_so)148 mptcp_usr_detach(struct socket *mp_so)
149 {
150 struct mptses *mpte = mpsotompte(mp_so);
151 struct mppcb *mpp = mpsotomppcb(mp_so);
152
153 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
154 os_log_error(mptcp_log_handle, "%s - %lx: state: %d\n",
155 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
156 mpp ? mpp->mpp_state : -1);
157 return EINVAL;
158 }
159
160 /*
161 * We are done with this MPTCP socket (it has been closed);
162 * trigger all subflows to be disconnected, if not already,
163 * by initiating the PCB detach sequence (SOF_PCBCLEARING
164 * will be set.)
165 */
166 mp_pcbdetach(mp_so);
167
168 mptcp_disconnect(mpte);
169
170 return 0;
171 }
172
173 /*
174 * Attach MPTCP protocol to socket, allocating MP control block,
175 * MPTCP session, control block, buffer space, etc.
176 */
177 static int
mptcp_attach(struct socket * mp_so,struct proc * p)178 mptcp_attach(struct socket *mp_so, struct proc *p)
179 {
180 #pragma unused(p)
181 struct mptses *mpte = NULL;
182 struct mptcb *mp_tp = NULL;
183 struct mppcb *mpp = NULL;
184 int error = 0;
185
186 if (mp_so->so_snd.sb_hiwat == 0 || mp_so->so_rcv.sb_hiwat == 0) {
187 error = soreserve(mp_so, tcp_sendspace, tcp_recvspace);
188 if (error != 0) {
189 goto out;
190 }
191 }
192
193 if (mp_so->so_snd.sb_preconn_hiwat == 0) {
194 soreserve_preconnect(mp_so, 2048);
195 }
196
197 if ((mp_so->so_rcv.sb_flags & SB_USRSIZE) == 0) {
198 mp_so->so_rcv.sb_flags |= SB_AUTOSIZE;
199 }
200 if ((mp_so->so_snd.sb_flags & SB_USRSIZE) == 0) {
201 mp_so->so_snd.sb_flags |= SB_AUTOSIZE;
202 }
203
204 /*
205 * MPTCP send-socket buffers cannot be compressed, due to the
206 * fact that each mbuf chained via m_next is a M_PKTHDR
207 * which carries some MPTCP metadata.
208 */
209 mp_so->so_snd.sb_flags |= SB_NOCOMPRESS;
210
211 if ((error = mp_pcballoc(mp_so, &mtcbinfo)) != 0) {
212 goto out;
213 }
214
215 mpp = mpsotomppcb(mp_so);
216 mpte = (struct mptses *)mpp->mpp_pcbe;
217 mp_tp = mpte->mpte_mptcb;
218
219 VERIFY(mp_tp != NULL);
220 out:
221 return error;
222 }
223
224 static int
mptcp_entitlement_check(struct socket * mp_so,uint8_t svctype)225 mptcp_entitlement_check(struct socket *mp_so, uint8_t svctype)
226 {
227 struct mptses *mpte = mpsotompte(mp_so);
228
229 if (mptcp_no_first_party) {
230 return 0;
231 }
232
233 /* First, check for mptcp_extended without delegation */
234 if (soopt_cred_check(mp_so, PRIV_NET_RESTRICTED_MULTIPATH_EXTENDED, TRUE, FALSE) == 0) {
235 /*
236 * This means the app has the extended entitlement. Thus,
237 * it's a first party app and can run without restrictions.
238 */
239 mpte->mpte_flags |= MPTE_FIRSTPARTY;
240 return 0;
241 }
242
243 /* Now with delegation */
244 if (mp_so->so_flags & SOF_DELEGATED &&
245 soopt_cred_check(mp_so, PRIV_NET_RESTRICTED_MULTIPATH_EXTENDED, TRUE, TRUE) == 0) {
246 /*
247 * This means the app has the extended entitlement. Thus,
248 * it's a first party app and can run without restrictions.
249 */
250 mpte->mpte_flags |= MPTE_FIRSTPARTY;
251 return 0;
252 }
253
254 if (svctype == MPTCP_SVCTYPE_AGGREGATE) {
255 if (mptcp_developer_mode) {
256 return 0;
257 }
258
259 os_log_error(mptcp_log_handle, "%s - %lx: MPTCP prohibited on svc %u\n",
260 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpte->mpte_svctype);
261 return -1;
262 }
263
264 return 0;
265 }
266
267 /*
268 * Common subroutine to open a MPTCP connection to one of the remote hosts
269 * specified by dst_sl. This includes allocating and establishing a
270 * subflow TCP connection, either initially to establish MPTCP connection,
271 * or to join an existing one. Returns a connection handle upon success.
272 */
273 static int
mptcp_connectx(struct mptses * mpte,struct sockaddr * src,struct sockaddr * dst,uint32_t ifscope,sae_connid_t * pcid)274 mptcp_connectx(struct mptses *mpte, struct sockaddr *src,
275 struct sockaddr *dst, uint32_t ifscope, sae_connid_t *pcid)
276 {
277 int error = 0;
278
279 VERIFY(dst != NULL);
280 VERIFY(pcid != NULL);
281
282 error = mptcp_subflow_add(mpte, src, dst, ifscope, pcid);
283
284 return error;
285 }
286
287 /*
288 * User-protocol pru_connectx callback.
289 */
290 static int
mptcp_usr_connectx(struct socket * mp_so,struct sockaddr * src,struct sockaddr * dst,struct proc * p,uint32_t ifscope,sae_associd_t aid,sae_connid_t * pcid,uint32_t flags,void * arg,uint32_t arglen,struct uio * auio,user_ssize_t * bytes_written)291 mptcp_usr_connectx(struct socket *mp_so, struct sockaddr *src,
292 struct sockaddr *dst, struct proc *p, uint32_t ifscope,
293 sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg,
294 uint32_t arglen, struct uio *auio, user_ssize_t *bytes_written)
295 {
296 #pragma unused(p, aid, flags, arg, arglen)
297 struct mppcb *mpp = mpsotomppcb(mp_so);
298 struct mptses *mpte = NULL;
299 struct mptcb *mp_tp = NULL;
300 user_ssize_t datalen;
301 int error = 0;
302
303 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
304 os_log_error(mptcp_log_handle, "%s - %lx: state %d\n",
305 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
306 mpp ? mpp->mpp_state : -1);
307 error = EINVAL;
308 goto out;
309 }
310 mpte = mptompte(mpp);
311 mp_tp = mpte->mpte_mptcb;
312
313 if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) {
314 os_log_error(mptcp_log_handle, "%s - %lx: fell back to TCP\n",
315 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte));
316 error = EINVAL;
317 goto out;
318 }
319
320 if (dst->sa_family != AF_INET && dst->sa_family != AF_INET6) {
321 error = EAFNOSUPPORT;
322 goto out;
323 }
324
325 if (dst->sa_family == AF_INET &&
326 dst->sa_len != sizeof(mpte->__mpte_dst_v4)) {
327 os_log_error(mptcp_log_handle, "%s - %lx: IPv4 dst len %u\n",
328 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), dst->sa_len);
329 error = EINVAL;
330 goto out;
331 }
332
333 if (dst->sa_family == AF_INET6 &&
334 dst->sa_len != sizeof(mpte->__mpte_dst_v6)) {
335 os_log_error(mptcp_log_handle, "%s - %lx: IPv6 dst len %u\n",
336 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), dst->sa_len);
337 error = EINVAL;
338 goto out;
339 }
340
341 if (!(mpte->mpte_flags & MPTE_SVCTYPE_CHECKED)) {
342 if (mptcp_entitlement_check(mp_so, mpte->mpte_svctype) < 0) {
343 error = EPERM;
344 goto out;
345 }
346
347 mpte->mpte_flags |= MPTE_SVCTYPE_CHECKED;
348 }
349
350 if ((mp_so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING)) == 0) {
351 memcpy(&mpte->mpte_u_dst, dst, dst->sa_len);
352
353 if (dst->sa_family == AF_INET) {
354 memcpy(&mpte->mpte_sub_dst_v4, dst, dst->sa_len);
355 } else {
356 memcpy(&mpte->mpte_sub_dst_v6, dst, dst->sa_len);
357 }
358 }
359
360 if (src) {
361 if (src->sa_family != AF_INET && src->sa_family != AF_INET6) {
362 error = EAFNOSUPPORT;
363 goto out;
364 }
365
366 if (src->sa_family == AF_INET &&
367 src->sa_len != sizeof(mpte->__mpte_src_v4)) {
368 os_log_error(mptcp_log_handle, "%s - %lx: IPv4 src len %u\n",
369 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), src->sa_len);
370 error = EINVAL;
371 goto out;
372 }
373
374 if (src->sa_family == AF_INET6 &&
375 src->sa_len != sizeof(mpte->__mpte_src_v6)) {
376 os_log_error(mptcp_log_handle, "%s - %lx: IPv6 src len %u\n",
377 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), src->sa_len);
378 error = EINVAL;
379 goto out;
380 }
381
382 if ((mp_so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING)) == 0) {
383 memcpy(&mpte->mpte_u_src, src, src->sa_len);
384 }
385 }
386
387 error = mptcp_connectx(mpte, src, dst, ifscope, pcid);
388
389 /* If there is data, copy it */
390 if (auio != NULL) {
391 datalen = uio_resid(auio);
392 socket_unlock(mp_so, 0);
393 error = mp_so->so_proto->pr_usrreqs->pru_sosend(mp_so, NULL,
394 (uio_t) auio, NULL, NULL, 0);
395
396 if (error == 0 || error == EWOULDBLOCK) {
397 *bytes_written = datalen - uio_resid(auio);
398 }
399
400 if (error == EWOULDBLOCK) {
401 error = EINPROGRESS;
402 }
403
404 socket_lock(mp_so, 0);
405 }
406
407 out:
408 return error;
409 }
410
411 /*
412 * Handle SIOCGASSOCIDS ioctl for PF_MULTIPATH domain.
413 */
414 static int
mptcp_getassocids(struct mptses * mpte,uint32_t * cnt,user_addr_t aidp)415 mptcp_getassocids(struct mptses *mpte, uint32_t *cnt, user_addr_t aidp)
416 {
417 /* MPTCP has at most 1 association */
418 *cnt = (mpte->mpte_associd != SAE_ASSOCID_ANY) ? 1 : 0;
419
420 /* just asking how many there are? */
421 if (aidp == USER_ADDR_NULL) {
422 return 0;
423 }
424
425 return copyout(&mpte->mpte_associd, aidp,
426 sizeof(mpte->mpte_associd));
427 }
428
429 /*
430 * Handle SIOCGCONNIDS ioctl for PF_MULTIPATH domain.
431 */
432 static int
mptcp_getconnids(struct mptses * mpte,sae_associd_t aid,uint32_t * cnt,user_addr_t cidp)433 mptcp_getconnids(struct mptses *mpte, sae_associd_t aid, uint32_t *cnt,
434 user_addr_t cidp)
435 {
436 struct mptsub *mpts;
437 int error = 0;
438
439 if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL &&
440 aid != mpte->mpte_associd) {
441 return EINVAL;
442 }
443
444 *cnt = mpte->mpte_numflows;
445
446 /* just asking how many there are? */
447 if (cidp == USER_ADDR_NULL) {
448 return 0;
449 }
450
451 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
452 if ((error = copyout(&mpts->mpts_connid, cidp,
453 sizeof(mpts->mpts_connid))) != 0) {
454 break;
455 }
456
457 cidp += sizeof(mpts->mpts_connid);
458 }
459
460 return error;
461 }
462
463 /*
464 * Handle SIOCGCONNINFO ioctl for PF_MULTIPATH domain.
465 */
466 static int
mptcp_getconninfo(struct mptses * mpte,sae_connid_t * cid,uint32_t * flags,uint32_t * ifindex,int32_t * soerror,user_addr_t src,socklen_t * src_len,user_addr_t dst,socklen_t * dst_len,uint32_t * aux_type,user_addr_t aux_data,uint32_t * aux_len)467 mptcp_getconninfo(struct mptses *mpte, sae_connid_t *cid, uint32_t *flags,
468 uint32_t *ifindex, int32_t *soerror, user_addr_t src, socklen_t *src_len,
469 user_addr_t dst, socklen_t *dst_len, uint32_t *aux_type,
470 user_addr_t aux_data, uint32_t *aux_len)
471 {
472 *flags = 0;
473 *aux_type = 0;
474 *ifindex = 0;
475 *soerror = 0;
476
477 /* MPTCP-level global stats */
478 if (*cid == SAE_CONNID_ALL) {
479 struct socket *mp_so = mptetoso(mpte);
480 struct mptcb *mp_tp = mpte->mpte_mptcb;
481 struct conninfo_multipathtcp mptcp_ci;
482 int error = 0;
483
484 if (*aux_len != 0 && *aux_len != sizeof(mptcp_ci)) {
485 return EINVAL;
486 }
487
488 if (mp_so->so_state & SS_ISCONNECTING) {
489 *flags |= CIF_CONNECTING;
490 }
491 if (mp_so->so_state & SS_ISCONNECTED) {
492 *flags |= CIF_CONNECTED;
493 }
494 if (mp_so->so_state & SS_ISDISCONNECTING) {
495 *flags |= CIF_DISCONNECTING;
496 }
497 if (mp_so->so_state & SS_ISDISCONNECTED) {
498 *flags |= CIF_DISCONNECTED;
499 }
500 if (!(mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP)) {
501 *flags |= CIF_MP_CAPABLE;
502 }
503 if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) {
504 *flags |= CIF_MP_DEGRADED;
505 }
506
507 *src_len = 0;
508 *dst_len = 0;
509
510 *aux_type = CIAUX_MPTCP;
511 *aux_len = sizeof(mptcp_ci);
512
513 if (aux_data != USER_ADDR_NULL) {
514 const struct mptsub *mpts;
515 int initial_info_set = 0;
516 unsigned long i = 0;
517
518 bzero(&mptcp_ci, sizeof(mptcp_ci));
519 mptcp_ci.mptcpci_subflow_count = mpte->mpte_numflows;
520 mptcp_ci.mptcpci_switch_count = mpte->mpte_subflow_switches;
521
522 VERIFY(sizeof(mptcp_ci.mptcpci_itfstats) == sizeof(mpte->mpte_itfstats));
523 memcpy(mptcp_ci.mptcpci_itfstats, mpte->mpte_itfstats, sizeof(mptcp_ci.mptcpci_itfstats));
524
525 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
526 if (i >= sizeof(mptcp_ci.mptcpci_subflow_connids) / sizeof(sae_connid_t)) {
527 break;
528 }
529 mptcp_ci.mptcpci_subflow_connids[i] = mpts->mpts_connid;
530
531 if (mpts->mpts_flags & MPTSF_INITIAL_SUB) {
532 const struct inpcb *inp;
533
534 inp = sotoinpcb(mpts->mpts_socket);
535
536 mptcp_ci.mptcpci_init_rxbytes = inp->inp_stat->rxbytes;
537 mptcp_ci.mptcpci_init_txbytes = inp->inp_stat->txbytes;
538 initial_info_set = 1;
539 }
540
541 mptcpstats_update(mptcp_ci.mptcpci_itfstats, mpts);
542
543 i++;
544 }
545
546 if (initial_info_set == 0) {
547 mptcp_ci.mptcpci_init_rxbytes = mpte->mpte_init_rxbytes;
548 mptcp_ci.mptcpci_init_txbytes = mpte->mpte_init_txbytes;
549 }
550
551 if (mpte->mpte_flags & MPTE_FIRSTPARTY) {
552 mptcp_ci.mptcpci_flags |= MPTCPCI_FIRSTPARTY;
553 }
554
555 error = copyout(&mptcp_ci, aux_data, sizeof(mptcp_ci));
556 if (error != 0) {
557 os_log_error(mptcp_log_handle, "%s - %lx: copyout failed: %d\n",
558 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), error);
559 return error;
560 }
561 }
562
563 return 0;
564 }
565
566 /* Any stats of any subflow */
567 if (*cid == SAE_CONNID_ANY) {
568 const struct mptsub *mpts;
569 struct socket *so;
570 const struct inpcb *inp;
571 int error = 0;
572
573 mpts = TAILQ_FIRST(&mpte->mpte_subflows);
574 if (mpts == NULL) {
575 return ENXIO;
576 }
577
578 so = mpts->mpts_socket;
579 inp = sotoinpcb(so);
580
581 if (inp->inp_vflag & INP_IPV4) {
582 error = in_getconninfo(so, SAE_CONNID_ANY, flags, ifindex,
583 soerror, src, src_len, dst, dst_len,
584 aux_type, aux_data, aux_len);
585 } else {
586 error = in6_getconninfo(so, SAE_CONNID_ANY, flags, ifindex,
587 soerror, src, src_len, dst, dst_len,
588 aux_type, aux_data, aux_len);
589 }
590
591 if (error != 0) {
592 os_log_error(mptcp_log_handle, "%s - %lx:error from in_getconninfo %d\n",
593 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), error);
594 return error;
595 }
596
597 if (mpts->mpts_flags & MPTSF_MP_CAPABLE) {
598 *flags |= CIF_MP_CAPABLE;
599 }
600 if (mpts->mpts_flags & MPTSF_MP_DEGRADED) {
601 *flags |= CIF_MP_DEGRADED;
602 }
603 if (mpts->mpts_flags & MPTSF_MP_READY) {
604 *flags |= CIF_MP_READY;
605 }
606 if (mpts->mpts_flags & MPTSF_ACTIVE) {
607 *flags |= CIF_MP_ACTIVE;
608 }
609
610 return 0;
611 } else {
612 /* Per-interface stats */
613 const struct mptsub *mpts, *orig_mpts = NULL;
614 struct conninfo_tcp tcp_ci;
615 const struct inpcb *inp;
616 struct socket *so;
617 int error = 0;
618 int index;
619
620 /* cid is thus an ifindex - range-check first! */
621 if (*cid > USHRT_MAX) {
622 return EINVAL;
623 }
624
625 bzero(&tcp_ci, sizeof(tcp_ci));
626
627 /* First, get a subflow to fill in the "regular" info. */
628 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
629 const struct ifnet *ifp = sotoinpcb(mpts->mpts_socket)->inp_last_outifp;
630
631 if (ifp && ifp->if_index == *cid) {
632 break;
633 }
634 }
635
636 if (mpts == NULL) {
637 /* No subflow there - well, let's just get the basic itf-info */
638 goto interface_info;
639 }
640
641 so = mpts->mpts_socket;
642 inp = sotoinpcb(so);
643
644 /* Give it USER_ADDR_NULL, because we are doing this on our own */
645 if (inp->inp_vflag & INP_IPV4) {
646 error = in_getconninfo(so, SAE_CONNID_ANY, flags, ifindex,
647 soerror, src, src_len, dst, dst_len,
648 aux_type, USER_ADDR_NULL, aux_len);
649 } else {
650 error = in6_getconninfo(so, SAE_CONNID_ANY, flags, ifindex,
651 soerror, src, src_len, dst, dst_len,
652 aux_type, USER_ADDR_NULL, aux_len);
653 }
654
655 if (error != 0) {
656 os_log_error(mptcp_log_handle, "%s - %lx:error from in_getconninfo %d\n",
657 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), error);
658 return error;
659 }
660
661 /* ToDo: Nobody is reading these flags on subflows. Why bother ? */
662 if (mpts->mpts_flags & MPTSF_MP_CAPABLE) {
663 *flags |= CIF_MP_CAPABLE;
664 }
665 if (mpts->mpts_flags & MPTSF_MP_DEGRADED) {
666 *flags |= CIF_MP_DEGRADED;
667 }
668 if (mpts->mpts_flags & MPTSF_MP_READY) {
669 *flags |= CIF_MP_READY;
670 }
671 if (mpts->mpts_flags & MPTSF_ACTIVE) {
672 *flags |= CIF_MP_ACTIVE;
673 }
674
675 /*
676 * Now, we gather the metrics (aka., tcp_info) and roll them in
677 * across all subflows of this interface to build an aggregated
678 * view.
679 *
680 * We take the TCP_INFO from the first subflow as the "master",
681 * feeding into those fields that we do not roll.
682 */
683 if (aux_data != USER_ADDR_NULL) {
684 tcp_getconninfo(so, &tcp_ci);
685
686 orig_mpts = mpts;
687 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
688 const struct inpcb *mptsinp = sotoinpcb(mpts->mpts_socket);
689 const struct ifnet *ifp;
690
691 ifp = mptsinp->inp_last_outifp;
692
693 if (ifp == NULL || ifp->if_index != *cid || mpts == orig_mpts) {
694 continue;
695 }
696
697 /* Roll the itf-stats into the tcp_info */
698 tcp_ci.tcpci_tcp_info.tcpi_txbytes +=
699 mptsinp->inp_stat->txbytes;
700 tcp_ci.tcpci_tcp_info.tcpi_rxbytes +=
701 mptsinp->inp_stat->rxbytes;
702
703 tcp_ci.tcpci_tcp_info.tcpi_wifi_txbytes +=
704 mptsinp->inp_wstat->txbytes;
705 tcp_ci.tcpci_tcp_info.tcpi_wifi_rxbytes +=
706 mptsinp->inp_wstat->rxbytes;
707
708 tcp_ci.tcpci_tcp_info.tcpi_wired_txbytes +=
709 mptsinp->inp_Wstat->txbytes;
710 tcp_ci.tcpci_tcp_info.tcpi_wired_rxbytes +=
711 mptsinp->inp_Wstat->rxbytes;
712
713 tcp_ci.tcpci_tcp_info.tcpi_cell_txbytes +=
714 mptsinp->inp_cstat->txbytes;
715 tcp_ci.tcpci_tcp_info.tcpi_cell_rxbytes +=
716 mptsinp->inp_cstat->rxbytes;
717 }
718 }
719
720 interface_info:
721 *aux_type = CIAUX_TCP;
722 if (*aux_len == 0) {
723 *aux_len = sizeof(tcp_ci);
724 } else if (aux_data != USER_ADDR_NULL) {
725 boolean_t create;
726
727 /*
728 * Finally, old subflows might have been closed - we
729 * want this data as well, so grab it from the interface
730 * stats.
731 */
732 create = orig_mpts != NULL;
733
734 /*
735 * When we found a subflow, we are willing to create a stats-index
736 * because we have some data to return. If there isn't a subflow,
737 * nor anything in the stats, return EINVAL. Because the
738 * ifindex belongs to something that doesn't exist.
739 */
740 index = mptcpstats_get_index_by_ifindex(mpte->mpte_itfstats, (u_short)(*cid), false);
741 if (index == -1) {
742 os_log_error(mptcp_log_handle,
743 "%s - %lx: Asking for too many ifindex: %u subcount %u, mpts? %s\n",
744 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
745 *cid, mpte->mpte_numflows,
746 orig_mpts ? "yes" : "no");
747
748 if (orig_mpts == NULL) {
749 return EINVAL;
750 }
751 } else {
752 struct mptcp_itf_stats *stats;
753
754 stats = &mpte->mpte_itfstats[index];
755
756 /* Roll the itf-stats into the tcp_info */
757 tcp_ci.tcpci_tcp_info.tcpi_last_outif = *cid;
758 tcp_ci.tcpci_tcp_info.tcpi_txbytes +=
759 stats->mpis_txbytes;
760 tcp_ci.tcpci_tcp_info.tcpi_rxbytes +=
761 stats->mpis_rxbytes;
762
763 tcp_ci.tcpci_tcp_info.tcpi_wifi_txbytes +=
764 stats->mpis_wifi_txbytes;
765 tcp_ci.tcpci_tcp_info.tcpi_wifi_rxbytes +=
766 stats->mpis_wifi_rxbytes;
767
768 tcp_ci.tcpci_tcp_info.tcpi_wired_txbytes +=
769 stats->mpis_wired_txbytes;
770 tcp_ci.tcpci_tcp_info.tcpi_wired_rxbytes +=
771 stats->mpis_wired_rxbytes;
772
773 tcp_ci.tcpci_tcp_info.tcpi_cell_txbytes +=
774 stats->mpis_cell_txbytes;
775 tcp_ci.tcpci_tcp_info.tcpi_cell_rxbytes +=
776 stats->mpis_cell_rxbytes;
777 }
778
779 *aux_len = min(*aux_len, sizeof(tcp_ci));
780 error = copyout(&tcp_ci, aux_data, *aux_len);
781 if (error != 0) {
782 return error;
783 }
784 }
785 }
786
787 return 0;
788 }
789
790 /*
791 * User-protocol pru_control callback.
792 */
793 static int
mptcp_usr_control(struct socket * mp_so,u_long cmd,caddr_t data,struct ifnet * ifp,struct proc * p)794 mptcp_usr_control(struct socket *mp_so, u_long cmd, caddr_t data,
795 struct ifnet *ifp, struct proc *p)
796 {
797 #pragma unused(ifp, p)
798 struct mppcb *mpp = mpsotomppcb(mp_so);
799 struct mptses *mpte;
800 int error = 0;
801
802 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
803 error = EINVAL;
804 goto out;
805 }
806 mpte = mptompte(mpp);
807
808 switch (cmd) {
809 case SIOCGASSOCIDS32: { /* struct so_aidreq32 */
810 struct so_aidreq32 aidr;
811 bcopy(data, &aidr, sizeof(aidr));
812 error = mptcp_getassocids(mpte, &aidr.sar_cnt,
813 aidr.sar_aidp);
814 if (error == 0) {
815 bcopy(&aidr, data, sizeof(aidr));
816 }
817 break;
818 }
819
820 case SIOCGASSOCIDS64: { /* struct so_aidreq64 */
821 struct so_aidreq64 aidr;
822 bcopy(data, &aidr, sizeof(aidr));
823 error = mptcp_getassocids(mpte, &aidr.sar_cnt,
824 (user_addr_t)aidr.sar_aidp);
825 if (error == 0) {
826 bcopy(&aidr, data, sizeof(aidr));
827 }
828 break;
829 }
830
831 case SIOCGCONNIDS32: { /* struct so_cidreq32 */
832 struct so_cidreq32 cidr;
833 bcopy(data, &cidr, sizeof(cidr));
834 error = mptcp_getconnids(mpte, cidr.scr_aid, &cidr.scr_cnt,
835 cidr.scr_cidp);
836 if (error == 0) {
837 bcopy(&cidr, data, sizeof(cidr));
838 }
839 break;
840 }
841
842 case SIOCGCONNIDS64: { /* struct so_cidreq64 */
843 struct so_cidreq64 cidr;
844 bcopy(data, &cidr, sizeof(cidr));
845 error = mptcp_getconnids(mpte, cidr.scr_aid, &cidr.scr_cnt,
846 (user_addr_t)cidr.scr_cidp);
847 if (error == 0) {
848 bcopy(&cidr, data, sizeof(cidr));
849 }
850 break;
851 }
852
853 case SIOCGCONNINFO32: { /* struct so_cinforeq32 */
854 struct so_cinforeq32 cifr;
855 bcopy(data, &cifr, sizeof(cifr));
856 error = mptcp_getconninfo(mpte, &cifr.scir_cid,
857 &cifr.scir_flags, &cifr.scir_ifindex, &cifr.scir_error,
858 cifr.scir_src, &cifr.scir_src_len, cifr.scir_dst,
859 &cifr.scir_dst_len, &cifr.scir_aux_type, cifr.scir_aux_data,
860 &cifr.scir_aux_len);
861 if (error == 0) {
862 bcopy(&cifr, data, sizeof(cifr));
863 }
864 break;
865 }
866
867 case SIOCGCONNINFO64: { /* struct so_cinforeq64 */
868 struct so_cinforeq64 cifr;
869 bcopy(data, &cifr, sizeof(cifr));
870 error = mptcp_getconninfo(mpte, &cifr.scir_cid,
871 &cifr.scir_flags, &cifr.scir_ifindex, &cifr.scir_error,
872 (user_addr_t)cifr.scir_src, &cifr.scir_src_len,
873 (user_addr_t)cifr.scir_dst, &cifr.scir_dst_len,
874 &cifr.scir_aux_type, (user_addr_t)cifr.scir_aux_data,
875 &cifr.scir_aux_len);
876 if (error == 0) {
877 bcopy(&cifr, data, sizeof(cifr));
878 }
879 break;
880 }
881
882 default:
883 error = EOPNOTSUPP;
884 break;
885 }
886 out:
887 return error;
888 }
889
890 static int
mptcp_disconnect(struct mptses * mpte)891 mptcp_disconnect(struct mptses *mpte)
892 {
893 struct socket *mp_so;
894 struct mptcb *mp_tp;
895 int error = 0;
896
897 mp_so = mptetoso(mpte);
898 mp_tp = mpte->mpte_mptcb;
899
900 DTRACE_MPTCP3(disconnectx, struct mptses *, mpte,
901 struct socket *, mp_so, struct mptcb *, mp_tp);
902
903 /* if we're not detached, go thru socket state checks */
904 if (!(mp_so->so_flags & SOF_PCBCLEARING) && !(mp_so->so_flags & SOF_DEFUNCT)) {
905 if (!(mp_so->so_state & (SS_ISCONNECTED |
906 SS_ISCONNECTING))) {
907 error = ENOTCONN;
908 goto out;
909 }
910 if (mp_so->so_state & SS_ISDISCONNECTING) {
911 error = EALREADY;
912 goto out;
913 }
914 }
915
916 mptcp_cancel_all_timers(mp_tp);
917 if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) {
918 mptcp_close(mpte, mp_tp);
919 } else if ((mp_so->so_options & SO_LINGER) &&
920 mp_so->so_linger == 0) {
921 mptcp_drop(mpte, mp_tp, 0);
922 } else {
923 soisdisconnecting(mp_so);
924 sbflush(&mp_so->so_rcv);
925 if (mptcp_usrclosed(mpte) != NULL) {
926 mptcp_output(mpte);
927 }
928 }
929
930 if (error == 0) {
931 mptcp_subflow_workloop(mpte);
932 }
933
934 out:
935 return error;
936 }
937
938 /*
939 * Wrapper function to support disconnect on socket
940 */
941 static int
mptcp_usr_disconnect(struct socket * mp_so)942 mptcp_usr_disconnect(struct socket *mp_so)
943 {
944 return mptcp_disconnect(mpsotompte(mp_so));
945 }
946
947 /*
948 * User-protocol pru_disconnectx callback.
949 */
950 static int
mptcp_usr_disconnectx(struct socket * mp_so,sae_associd_t aid,sae_connid_t cid)951 mptcp_usr_disconnectx(struct socket *mp_so, sae_associd_t aid, sae_connid_t cid)
952 {
953 if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL) {
954 return EINVAL;
955 }
956
957 if (cid != SAE_CONNID_ANY && cid != SAE_CONNID_ALL) {
958 return EINVAL;
959 }
960
961 return mptcp_usr_disconnect(mp_so);
962 }
963
964 void
mptcp_finish_usrclosed(struct mptses * mpte)965 mptcp_finish_usrclosed(struct mptses *mpte)
966 {
967 struct mptcb *mp_tp = mpte->mpte_mptcb;
968 struct socket *mp_so = mptetoso(mpte);
969
970 if (mp_tp->mpt_state == MPTCPS_CLOSED || mp_tp->mpt_state == MPTCPS_TERMINATE) {
971 mpte = mptcp_close(mpte, mp_tp);
972 } else if (mp_tp->mpt_state >= MPTCPS_FIN_WAIT_2) {
973 soisdisconnected(mp_so);
974 } else {
975 struct mptsub *mpts;
976
977 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
978 if ((mp_so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
979 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
980 mptcp_subflow_disconnect(mpte, mpts);
981 } else {
982 mptcp_subflow_shutdown(mpte, mpts);
983 }
984 }
985 }
986 }
987
988 /*
989 * User issued close, and wish to trail thru shutdown states.
990 */
991 static struct mptses *
mptcp_usrclosed(struct mptses * mpte)992 mptcp_usrclosed(struct mptses *mpte)
993 {
994 struct mptcb *mp_tp = mpte->mpte_mptcb;
995
996 mptcp_close_fsm(mp_tp, MPCE_CLOSE);
997
998 /* Not everything has been acknowledged - don't close the subflows! */
999 if (mp_tp->mpt_state != MPTCPS_TERMINATE &&
1000 mp_tp->mpt_sndnxt + 1 != mp_tp->mpt_sndmax) {
1001 return mpte;
1002 }
1003
1004 mptcp_finish_usrclosed(mpte);
1005
1006 return mpte;
1007 }
1008
1009 /*
1010 * After a receive, possible send some update to peer.
1011 */
1012 static int
mptcp_usr_rcvd(struct socket * mp_so,int flags)1013 mptcp_usr_rcvd(struct socket *mp_so, int flags)
1014 {
1015 #pragma unused(flags)
1016 struct mppcb *mpp = mpsotomppcb(mp_so);
1017 struct mptses *mpte;
1018 struct mptsub *mpts;
1019 int error = 0;
1020
1021 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
1022 error = EINVAL;
1023 goto out;
1024 }
1025
1026 mpte = mptompte(mpp);
1027
1028 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1029 struct socket *so = mpts->mpts_socket;
1030
1031 if (so->so_proto->pr_flags & PR_WANTRCVD && so->so_pcb != NULL) {
1032 (*so->so_proto->pr_usrreqs->pru_rcvd)(so, 0);
1033 }
1034 }
1035
1036 error = mptcp_output(mpte);
1037 out:
1038 return error;
1039 }
1040
1041 /*
1042 * Do a send by putting data in the output queue.
1043 */
1044 static int
mptcp_usr_send(struct socket * mp_so,int prus_flags,struct mbuf * m,struct sockaddr * nam,struct mbuf * control,struct proc * p)1045 mptcp_usr_send(struct socket *mp_so, int prus_flags, struct mbuf *m,
1046 struct sockaddr *nam, struct mbuf *control, struct proc *p)
1047 {
1048 #pragma unused(nam, p)
1049 struct mppcb *mpp = mpsotomppcb(mp_so);
1050 struct mptses *mpte;
1051 int error = 0;
1052
1053 if (prus_flags & (PRUS_OOB | PRUS_EOF)) {
1054 error = EOPNOTSUPP;
1055 goto out;
1056 }
1057
1058 if (nam != NULL) {
1059 error = EOPNOTSUPP;
1060 goto out;
1061 }
1062
1063 if (control != NULL && control->m_len != 0) {
1064 error = EOPNOTSUPP;
1065 goto out;
1066 }
1067
1068 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
1069 error = ECONNRESET;
1070 goto out;
1071 }
1072 mpte = mptompte(mpp);
1073 VERIFY(mpte != NULL);
1074
1075 if (!(mp_so->so_state & SS_ISCONNECTED) &&
1076 !(mp_so->so_flags1 & SOF1_PRECONNECT_DATA)) {
1077 error = ENOTCONN;
1078 goto out;
1079 }
1080
1081 mptcp_insert_dsn(mpp, m);
1082 VERIFY(mp_so->so_snd.sb_flags & SB_NOCOMPRESS);
1083 sbappendstream(&mp_so->so_snd, m);
1084 m = NULL;
1085
1086 error = mptcp_output(mpte);
1087 if (error != 0) {
1088 goto out;
1089 }
1090
1091 if (mp_so->so_state & SS_ISCONNECTING) {
1092 if (mp_so->so_state & SS_NBIO) {
1093 error = EWOULDBLOCK;
1094 } else {
1095 error = sbwait(&mp_so->so_snd);
1096 }
1097 }
1098
1099 out:
1100 if (error) {
1101 if (m != NULL) {
1102 m_freem(m);
1103 }
1104 if (control != NULL) {
1105 m_freem(control);
1106 }
1107 }
1108 return error;
1109 }
1110
1111 /*
1112 * Mark the MPTCP connection as being incapable of further output.
1113 */
1114 static int
mptcp_usr_shutdown(struct socket * mp_so)1115 mptcp_usr_shutdown(struct socket *mp_so)
1116 {
1117 struct mppcb *mpp = mpsotomppcb(mp_so);
1118 struct mptses *mpte;
1119 int error = 0;
1120
1121 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
1122 error = EINVAL;
1123 goto out;
1124 }
1125 mpte = mptompte(mpp);
1126 VERIFY(mpte != NULL);
1127
1128 socantsendmore(mp_so);
1129
1130 mpte = mptcp_usrclosed(mpte);
1131 if (mpte != NULL) {
1132 error = mptcp_output(mpte);
1133 }
1134 out:
1135 return error;
1136 }
1137
1138 /*
1139 * Copy the contents of uio into a properly sized mbuf chain.
1140 */
1141 static int
mptcp_uiotombuf(struct uio * uio,int how,user_ssize_t space,struct mbuf ** top)1142 mptcp_uiotombuf(struct uio *uio, int how, user_ssize_t space, struct mbuf **top)
1143 {
1144 struct mbuf *m, *mb, *nm = NULL, *mtail = NULL;
1145 int progress, len, error;
1146 user_ssize_t resid, tot;
1147
1148 VERIFY(top != NULL && *top == NULL);
1149
1150 /*
1151 * space can be zero or an arbitrary large value bound by
1152 * the total data supplied by the uio.
1153 */
1154 resid = uio_resid(uio);
1155 if (space > 0) {
1156 tot = MIN(resid, space);
1157 } else {
1158 tot = resid;
1159 }
1160
1161 if (tot < 0 || tot > INT_MAX) {
1162 return EINVAL;
1163 }
1164
1165 len = (int)tot;
1166 if (len == 0) {
1167 len = 1;
1168 }
1169
1170 /* Loop and append maximum sized mbufs to the chain tail. */
1171 while (len > 0) {
1172 uint32_t m_needed = 1;
1173
1174 if (njcl > 0 && len > MBIGCLBYTES) {
1175 mb = m_getpackets_internal(&m_needed, 1,
1176 how, 1, M16KCLBYTES);
1177 } else if (len > MCLBYTES) {
1178 mb = m_getpackets_internal(&m_needed, 1,
1179 how, 1, MBIGCLBYTES);
1180 } else if (len >= (signed)MINCLSIZE) {
1181 mb = m_getpackets_internal(&m_needed, 1,
1182 how, 1, MCLBYTES);
1183 } else {
1184 mb = m_gethdr(how, MT_DATA);
1185 }
1186
1187 /* Fail the whole operation if one mbuf can't be allocated. */
1188 if (mb == NULL) {
1189 if (nm != NULL) {
1190 m_freem(nm);
1191 }
1192 return ENOBUFS;
1193 }
1194
1195 /* Book keeping. */
1196 VERIFY(mb->m_flags & M_PKTHDR);
1197 len -= ((mb->m_flags & M_EXT) ? mb->m_ext.ext_size : MHLEN);
1198 if (mtail != NULL) {
1199 mtail->m_next = mb;
1200 } else {
1201 nm = mb;
1202 }
1203 mtail = mb;
1204 }
1205
1206 m = nm;
1207
1208 progress = 0;
1209 /* Fill all mbufs with uio data and update header information. */
1210 for (mb = m; mb != NULL; mb = mb->m_next) {
1211 /* tot >= 0 && tot <= INT_MAX (see above) */
1212 len = MIN((int)M_TRAILINGSPACE(mb), (int)(tot - progress));
1213
1214 error = uiomove(mtod(mb, char *), len, uio);
1215 if (error != 0) {
1216 m_freem(m);
1217 return error;
1218 }
1219
1220 /* each mbuf is M_PKTHDR chained via m_next */
1221 mb->m_len = len;
1222 mb->m_pkthdr.len = len;
1223
1224 progress += len;
1225 }
1226 VERIFY(progress == tot);
1227 *top = m;
1228 return 0;
1229 }
1230
1231 /*
1232 * MPTCP socket protocol-user socket send routine, derived from sosend().
1233 */
1234 static int
mptcp_usr_sosend(struct socket * mp_so,struct sockaddr * addr,struct uio * uio,struct mbuf * top,struct mbuf * control,int flags)1235 mptcp_usr_sosend(struct socket *mp_so, struct sockaddr *addr, struct uio *uio,
1236 struct mbuf *top, struct mbuf *control, int flags)
1237 {
1238 #pragma unused(addr)
1239 user_ssize_t resid, space;
1240 int error, sendflags;
1241 struct proc *p = current_proc();
1242 int sblocked = 0;
1243
1244 /* UIO is required for now, due to per-mbuf M_PKTHDR constrains */
1245 if (uio == NULL || top != NULL) {
1246 error = EINVAL;
1247 goto out;
1248 }
1249 resid = uio_resid(uio);
1250
1251 socket_lock(mp_so, 1);
1252 so_update_last_owner_locked(mp_so, p);
1253 so_update_policy(mp_so);
1254
1255 VERIFY(mp_so->so_type == SOCK_STREAM);
1256 VERIFY(!(mp_so->so_flags & SOF_MP_SUBFLOW));
1257
1258 if (flags & (MSG_OOB | MSG_DONTROUTE)) {
1259 error = EOPNOTSUPP;
1260 socket_unlock(mp_so, 1);
1261 goto out;
1262 }
1263
1264 /*
1265 * In theory resid should be unsigned. However, space must be
1266 * signed, as it might be less than 0 if we over-committed, and we
1267 * must use a signed comparison of space and resid. On the other
1268 * hand, a negative resid causes us to loop sending 0-length
1269 * segments to the protocol.
1270 */
1271 if (resid < 0 || resid > INT_MAX ||
1272 (flags & MSG_EOR) || control != NULL) {
1273 error = EINVAL;
1274 socket_unlock(mp_so, 1);
1275 goto out;
1276 }
1277
1278 OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgsnd);
1279
1280 do {
1281 error = sosendcheck(mp_so, NULL, resid, 0, 0, flags,
1282 &sblocked);
1283 if (error != 0) {
1284 goto release;
1285 }
1286
1287 space = sbspace(&mp_so->so_snd);
1288 do {
1289 socket_unlock(mp_so, 0);
1290 /*
1291 * Copy the data from userland into an mbuf chain.
1292 */
1293 error = mptcp_uiotombuf(uio, M_WAITOK, space, &top);
1294 if (error != 0) {
1295 socket_lock(mp_so, 0);
1296 goto release;
1297 }
1298 VERIFY(top != NULL);
1299 space -= resid - uio_resid(uio);
1300 resid = uio_resid(uio);
1301 socket_lock(mp_so, 0);
1302
1303 /*
1304 * Compute flags here, for pru_send and NKEs.
1305 */
1306 sendflags = (resid > 0 && space > 0) ?
1307 PRUS_MORETOCOME : 0;
1308
1309 /*
1310 * Socket filter processing
1311 */
1312 VERIFY(control == NULL);
1313 error = sflt_data_out(mp_so, NULL, &top, &control, 0);
1314 if (error != 0) {
1315 if (error == EJUSTRETURN) {
1316 error = 0;
1317 top = NULL;
1318 /* always free control if any */
1319 }
1320 goto release;
1321 }
1322 if (control != NULL) {
1323 m_freem(control);
1324 control = NULL;
1325 }
1326
1327 /*
1328 * Pass data to protocol.
1329 */
1330 error = (*mp_so->so_proto->pr_usrreqs->pru_send)
1331 (mp_so, sendflags, top, NULL, NULL, p);
1332
1333 top = NULL;
1334 if (error != 0) {
1335 goto release;
1336 }
1337 } while (resid != 0 && space > 0);
1338 } while (resid != 0);
1339
1340 release:
1341 if (sblocked) {
1342 sbunlock(&mp_so->so_snd, FALSE); /* will unlock socket */
1343 } else {
1344 socket_unlock(mp_so, 1);
1345 }
1346 out:
1347 if (top != NULL) {
1348 m_freem(top);
1349 }
1350 if (control != NULL) {
1351 m_freem(control);
1352 }
1353
1354 soclearfastopen(mp_so);
1355
1356 return error;
1357 }
1358
1359 /*
1360 * Called to filter SOPT_{SET,GET} for SOL_SOCKET level socket options.
1361 * This routine simply indicates to the caller whether or not to proceed
1362 * further with the given socket option. This is invoked by sosetoptlock()
1363 * and sogetoptlock().
1364 */
1365 static int
mptcp_usr_socheckopt(struct socket * mp_so,struct sockopt * sopt)1366 mptcp_usr_socheckopt(struct socket *mp_so, struct sockopt *sopt)
1367 {
1368 #pragma unused(mp_so)
1369 int error = 0;
1370
1371 VERIFY(sopt->sopt_level == SOL_SOCKET);
1372
1373 /*
1374 * We could check for sopt_dir (set/get) here, but we'll just
1375 * let the caller deal with it as appropriate; therefore the
1376 * following is a superset of the socket options which we
1377 * allow for set/get.
1378 *
1379 * XXX: [email protected]
1380 *
1381 * Need to consider the following cases:
1382 *
1383 * a. Certain socket options don't have a clear definition
1384 * on the expected behavior post connect(2). At the time
1385 * those options are issued on the MP socket, there may
1386 * be existing subflow sockets that are already connected.
1387 */
1388 switch (sopt->sopt_name) {
1389 case SO_LINGER: /* MP */
1390 case SO_LINGER_SEC: /* MP */
1391 case SO_TYPE: /* MP */
1392 case SO_NREAD: /* MP */
1393 case SO_NWRITE: /* MP */
1394 case SO_ERROR: /* MP */
1395 case SO_SNDBUF: /* MP */
1396 case SO_RCVBUF: /* MP */
1397 case SO_SNDLOWAT: /* MP */
1398 case SO_RCVLOWAT: /* MP */
1399 case SO_SNDTIMEO: /* MP */
1400 case SO_RCVTIMEO: /* MP */
1401 case SO_NKE: /* MP */
1402 case SO_NOSIGPIPE: /* MP */
1403 case SO_NOADDRERR: /* MP */
1404 case SO_LABEL: /* MP */
1405 case SO_PEERLABEL: /* MP */
1406 case SO_DEFUNCTIT: /* MP */
1407 case SO_DEFUNCTOK: /* MP */
1408 case SO_ISDEFUNCT: /* MP */
1409 case SO_TRAFFIC_CLASS_DBG: /* MP */
1410 case SO_DELEGATED: /* MP */
1411 case SO_DELEGATED_UUID: /* MP */
1412 #if NECP
1413 case SO_NECP_ATTRIBUTES:
1414 case SO_NECP_CLIENTUUID:
1415 #endif /* NECP */
1416 case SO_MPKL_SEND_INFO:
1417 /*
1418 * Tell the caller that these options are to be processed.
1419 */
1420 break;
1421
1422 case SO_DEBUG: /* MP + subflow */
1423 case SO_KEEPALIVE: /* MP + subflow */
1424 case SO_USELOOPBACK: /* MP + subflow */
1425 case SO_RANDOMPORT: /* MP + subflow */
1426 case SO_TRAFFIC_CLASS: /* MP + subflow */
1427 case SO_RECV_TRAFFIC_CLASS: /* MP + subflow */
1428 case SO_PRIVILEGED_TRAFFIC_CLASS: /* MP + subflow */
1429 case SO_RECV_ANYIF: /* MP + subflow */
1430 case SO_RESTRICTIONS: /* MP + subflow */
1431 case SO_FLUSH: /* MP + subflow */
1432 case SO_NOWAKEFROMSLEEP:
1433 case SO_NOAPNFALLBK:
1434 case SO_MARK_CELLFALLBACK:
1435 case SO_MARK_KNOWN_TRACKER:
1436 case SO_MARK_KNOWN_TRACKER_NON_APP_INITIATED:
1437 case SO_MARK_APPROVED_APP_DOMAIN:
1438 case SO_FALLBACK_MODE:
1439 /*
1440 * Tell the caller that these options are to be processed;
1441 * these will also be recorded later by mptcp_setopt().
1442 *
1443 * NOTE: Only support integer option value for now.
1444 */
1445 if (sopt->sopt_valsize != sizeof(int)) {
1446 error = EINVAL;
1447 }
1448 break;
1449
1450 default:
1451 /*
1452 * Tell the caller to stop immediately and return an error.
1453 */
1454 error = ENOPROTOOPT;
1455 break;
1456 }
1457
1458 return error;
1459 }
1460
1461 /*
1462 * Issue SOPT_SET for all MPTCP subflows (for integer option values.)
1463 */
1464 static int
mptcp_setopt_apply(struct mptses * mpte,struct mptopt * mpo)1465 mptcp_setopt_apply(struct mptses *mpte, struct mptopt *mpo)
1466 {
1467 struct socket *mp_so;
1468 struct mptsub *mpts;
1469 struct mptopt smpo;
1470 int error = 0;
1471
1472 /* just bail now if this isn't applicable to subflow sockets */
1473 if (!(mpo->mpo_flags & MPOF_SUBFLOW_OK)) {
1474 error = ENOPROTOOPT;
1475 goto out;
1476 }
1477
1478 /*
1479 * Skip those that are handled internally; these options
1480 * should not have been recorded and marked with the
1481 * MPOF_SUBFLOW_OK by mptcp_setopt(), but just in case.
1482 */
1483 if (mpo->mpo_level == SOL_SOCKET &&
1484 (mpo->mpo_name == SO_NOSIGPIPE || mpo->mpo_name == SO_NOADDRERR)) {
1485 error = ENOPROTOOPT;
1486 goto out;
1487 }
1488
1489 mp_so = mptetoso(mpte);
1490
1491 /*
1492 * Don't bother going further if there's no subflow; mark the option
1493 * with MPOF_INTERIM so that we know whether or not to remove this
1494 * option upon encountering an error while issuing it during subflow
1495 * socket creation.
1496 */
1497 if (mpte->mpte_numflows == 0) {
1498 VERIFY(TAILQ_EMPTY(&mpte->mpte_subflows));
1499 mpo->mpo_flags |= MPOF_INTERIM;
1500 /* return success */
1501 goto out;
1502 }
1503
1504 bzero(&smpo, sizeof(smpo));
1505 smpo.mpo_flags |= MPOF_SUBFLOW_OK;
1506 smpo.mpo_level = mpo->mpo_level;
1507 smpo.mpo_name = mpo->mpo_name;
1508
1509 /* grab exisiting values in case we need to rollback */
1510 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1511 struct socket *so;
1512
1513 mpts->mpts_flags &= ~(MPTSF_SOPT_OLDVAL | MPTSF_SOPT_INPROG);
1514 mpts->mpts_oldintval = 0;
1515 smpo.mpo_intval = 0;
1516 VERIFY(mpts->mpts_socket != NULL);
1517 so = mpts->mpts_socket;
1518 if (mptcp_subflow_sogetopt(mpte, so, &smpo) == 0) {
1519 mpts->mpts_flags |= MPTSF_SOPT_OLDVAL;
1520 mpts->mpts_oldintval = smpo.mpo_intval;
1521 }
1522 }
1523
1524 /* apply socket option */
1525 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1526 struct socket *so;
1527
1528 mpts->mpts_flags |= MPTSF_SOPT_INPROG;
1529 VERIFY(mpts->mpts_socket != NULL);
1530 so = mpts->mpts_socket;
1531 error = mptcp_subflow_sosetopt(mpte, mpts, mpo);
1532 if (error != 0) {
1533 break;
1534 }
1535 }
1536
1537 /* cleanup, and rollback if needed */
1538 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1539 struct socket *so;
1540
1541 if (!(mpts->mpts_flags & MPTSF_SOPT_INPROG)) {
1542 /* clear in case it's set */
1543 mpts->mpts_flags &= ~MPTSF_SOPT_OLDVAL;
1544 mpts->mpts_oldintval = 0;
1545 continue;
1546 }
1547 if (!(mpts->mpts_flags & MPTSF_SOPT_OLDVAL)) {
1548 mpts->mpts_flags &= ~MPTSF_SOPT_INPROG;
1549 VERIFY(mpts->mpts_oldintval == 0);
1550 continue;
1551 }
1552 /* error during sosetopt, so roll it back */
1553 if (error != 0) {
1554 VERIFY(mpts->mpts_socket != NULL);
1555 so = mpts->mpts_socket;
1556 smpo.mpo_intval = mpts->mpts_oldintval;
1557 mptcp_subflow_sosetopt(mpte, mpts, &smpo);
1558 }
1559 mpts->mpts_oldintval = 0;
1560 mpts->mpts_flags &= ~(MPTSF_SOPT_OLDVAL | MPTSF_SOPT_INPROG);
1561 }
1562
1563 out:
1564 return error;
1565 }
1566
1567 /*
1568 * Handle SOPT_SET for socket options issued on MP socket.
1569 */
1570 static int
mptcp_setopt(struct mptses * mpte,struct sockopt * sopt)1571 mptcp_setopt(struct mptses *mpte, struct sockopt *sopt)
1572 {
1573 int error = 0, optval = 0, level, optname, rec = 1;
1574 struct mptopt smpo, *mpo = NULL;
1575 struct socket *mp_so;
1576
1577 level = sopt->sopt_level;
1578 optname = sopt->sopt_name;
1579
1580 mp_so = mptetoso(mpte);
1581
1582 VERIFY(!(mpsotomppcb(mp_so)->mpp_flags & MPP_INSIDE_SETGETOPT));
1583 mpsotomppcb(mp_so)->mpp_flags |= MPP_INSIDE_SETGETOPT;
1584
1585 /*
1586 * Record socket options which are applicable to subflow sockets so
1587 * that we can replay them for new ones; see mptcp_usr_socheckopt()
1588 * for the list of eligible socket-level options.
1589 */
1590 if (level == SOL_SOCKET) {
1591 switch (optname) {
1592 case SO_DEBUG:
1593 case SO_KEEPALIVE:
1594 case SO_USELOOPBACK:
1595 case SO_RANDOMPORT:
1596 case SO_TRAFFIC_CLASS:
1597 case SO_RECV_TRAFFIC_CLASS:
1598 case SO_PRIVILEGED_TRAFFIC_CLASS:
1599 case SO_RECV_ANYIF:
1600 case SO_RESTRICTIONS:
1601 case SO_NOWAKEFROMSLEEP:
1602 case SO_NOAPNFALLBK:
1603 case SO_MARK_CELLFALLBACK:
1604 case SO_MARK_KNOWN_TRACKER:
1605 case SO_MARK_KNOWN_TRACKER_NON_APP_INITIATED:
1606 case SO_MARK_APPROVED_APP_DOMAIN:
1607 case SO_FALLBACK_MODE:
1608 /* record it */
1609 break;
1610 case SO_FLUSH:
1611 /* don't record it */
1612 rec = 0;
1613 break;
1614
1615 /* Next ones, record at MPTCP-level */
1616 case SO_DELEGATED:
1617 error = sooptcopyin(sopt, &mpte->mpte_epid,
1618 sizeof(int), sizeof(int));
1619 if (error != 0) {
1620 goto err_out;
1621 }
1622
1623 goto out;
1624 case SO_DELEGATED_UUID:
1625 error = sooptcopyin(sopt, &mpte->mpte_euuid,
1626 sizeof(uuid_t), sizeof(uuid_t));
1627 if (error != 0) {
1628 goto err_out;
1629 }
1630
1631 goto out;
1632 #if NECP
1633 case SO_NECP_CLIENTUUID:
1634 if (!uuid_is_null(mpsotomppcb(mp_so)->necp_client_uuid)) {
1635 error = EINVAL;
1636 goto err_out;
1637 }
1638
1639 error = sooptcopyin(sopt, &mpsotomppcb(mp_so)->necp_client_uuid,
1640 sizeof(uuid_t), sizeof(uuid_t));
1641 if (error != 0) {
1642 goto err_out;
1643 }
1644
1645 mpsotomppcb(mp_so)->necp_cb = mptcp_session_necp_cb;
1646 error = necp_client_register_multipath_cb(mp_so->last_pid,
1647 mpsotomppcb(mp_so)->necp_client_uuid,
1648 mpsotomppcb(mp_so));
1649 if (error) {
1650 goto err_out;
1651 }
1652
1653 if (uuid_is_null(mpsotomppcb(mp_so)->necp_client_uuid)) {
1654 error = EINVAL;
1655 goto err_out;
1656 }
1657
1658 goto out;
1659 case SO_NECP_ATTRIBUTES:
1660 error = necp_set_socket_attributes(&mpsotomppcb(mp_so)->inp_necp_attributes, sopt);
1661 if (error) {
1662 goto err_out;
1663 }
1664
1665 goto out;
1666 #endif /* NECP */
1667 default:
1668 /* nothing to do; just return */
1669 goto out;
1670 }
1671 } else {
1672 switch (optname) {
1673 case TCP_NODELAY:
1674 case TCP_RXT_FINDROP:
1675 case TCP_KEEPALIVE:
1676 case TCP_KEEPINTVL:
1677 case TCP_KEEPCNT:
1678 case TCP_CONNECTIONTIMEOUT:
1679 case TCP_RXT_CONNDROPTIME:
1680 case PERSIST_TIMEOUT:
1681 case TCP_ADAPTIVE_READ_TIMEOUT:
1682 case TCP_ADAPTIVE_WRITE_TIMEOUT:
1683 case TCP_FASTOPEN_FORCE_ENABLE:
1684 /* eligible; record it */
1685 break;
1686 case TCP_NOTSENT_LOWAT:
1687 /* record at MPTCP level */
1688 error = sooptcopyin(sopt, &optval, sizeof(optval),
1689 sizeof(optval));
1690 if (error) {
1691 goto err_out;
1692 }
1693 if (optval < 0) {
1694 error = EINVAL;
1695 goto err_out;
1696 } else {
1697 if (optval == 0) {
1698 mp_so->so_flags &= ~SOF_NOTSENT_LOWAT;
1699 error = mptcp_set_notsent_lowat(mpte, 0);
1700 } else {
1701 mp_so->so_flags |= SOF_NOTSENT_LOWAT;
1702 error = mptcp_set_notsent_lowat(mpte,
1703 optval);
1704 }
1705
1706 if (error) {
1707 goto err_out;
1708 }
1709 }
1710 goto out;
1711 case MPTCP_SERVICE_TYPE:
1712 /* record at MPTCP level */
1713 error = sooptcopyin(sopt, &optval, sizeof(optval),
1714 sizeof(optval));
1715 if (error) {
1716 goto err_out;
1717 }
1718 if (optval < 0 || optval >= MPTCP_SVCTYPE_MAX) {
1719 error = EINVAL;
1720 goto err_out;
1721 }
1722
1723 if (mptcp_entitlement_check(mp_so, (uint8_t)optval) < 0) {
1724 error = EACCES;
1725 goto err_out;
1726 }
1727
1728 mpte->mpte_svctype = (uint8_t)optval;
1729 mpte->mpte_flags |= MPTE_SVCTYPE_CHECKED;
1730
1731 goto out;
1732 case MPTCP_ALTERNATE_PORT:
1733 /* record at MPTCP level */
1734 error = sooptcopyin(sopt, &optval, sizeof(optval),
1735 sizeof(optval));
1736 if (error) {
1737 goto err_out;
1738 }
1739
1740 if (optval < 0 || optval > UINT16_MAX) {
1741 error = EINVAL;
1742 goto err_out;
1743 }
1744
1745 mpte->mpte_alternate_port = (uint16_t)optval;
1746
1747 goto out;
1748 case MPTCP_FORCE_ENABLE:
1749 /* record at MPTCP level */
1750 error = sooptcopyin(sopt, &optval, sizeof(optval),
1751 sizeof(optval));
1752 if (error) {
1753 goto err_out;
1754 }
1755
1756 if (optval < 0 || optval > 1) {
1757 error = EINVAL;
1758 goto err_out;
1759 }
1760
1761 if (optval) {
1762 mpte->mpte_flags |= MPTE_FORCE_ENABLE;
1763 } else {
1764 mpte->mpte_flags &= ~MPTE_FORCE_ENABLE;
1765 }
1766
1767 goto out;
1768 case MPTCP_FORCE_VERSION:
1769 error = sooptcopyin(sopt, &optval, sizeof(optval),
1770 sizeof(optval));
1771 if (error) {
1772 goto err_out;
1773 }
1774
1775 if (optval != 0 && optval != 1) {
1776 error = EINVAL;
1777 goto err_out;
1778 }
1779
1780 if (optval == 0) {
1781 mpte->mpte_flags |= MPTE_FORCE_V0;
1782 mpte->mpte_flags &= ~MPTE_FORCE_V1;
1783 } else {
1784 mpte->mpte_flags |= MPTE_FORCE_V1;
1785 mpte->mpte_flags &= ~MPTE_FORCE_V0;
1786 }
1787
1788 goto out;
1789 case MPTCP_EXPECTED_PROGRESS_TARGET:
1790 {
1791 struct mptcb *mp_tp = mpte->mpte_mptcb;
1792 uint64_t mach_time_target;
1793 uint64_t nanoseconds;
1794
1795 if (mpte->mpte_svctype != MPTCP_SVCTYPE_TARGET_BASED) {
1796 os_log(mptcp_log_handle, "%s - %lx: Can't set urgent activity when svctype is %u\n",
1797 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpte->mpte_svctype);
1798 error = EINVAL;
1799 goto err_out;
1800 }
1801
1802 error = sooptcopyin(sopt, &mach_time_target, sizeof(mach_time_target), sizeof(mach_time_target));
1803 if (error) {
1804 goto err_out;
1805 }
1806
1807 if (!mptcp_ok_to_create_subflows(mp_tp)) {
1808 os_log(mptcp_log_handle, "%s - %lx: Not ok to create subflows, state %u flags %#x\n",
1809 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mp_tp->mpt_state, mp_tp->mpt_flags);
1810 error = EINVAL;
1811 goto err_out;
1812 }
1813
1814 if (mach_time_target) {
1815 uint64_t time_now = 0;
1816 uint64_t time_now_nanoseconds;
1817
1818 absolutetime_to_nanoseconds(mach_time_target, &nanoseconds);
1819 nanoseconds = nanoseconds - (mptcp_expected_progress_headstart * NSEC_PER_MSEC);
1820
1821 time_now = mach_continuous_time();
1822 absolutetime_to_nanoseconds(time_now, &time_now_nanoseconds);
1823
1824 nanoseconds_to_absolutetime(nanoseconds, &mach_time_target);
1825 /* If the timer is already running and it would
1826 * fire in less than mptcp_expected_progress_headstart
1827 * seconds, then it's not worth canceling it.
1828 */
1829 if (mpte->mpte_time_target &&
1830 mpte->mpte_time_target < time_now &&
1831 time_now_nanoseconds > nanoseconds - (mptcp_expected_progress_headstart * NSEC_PER_MSEC)) {
1832 os_log(mptcp_log_handle, "%s - %lx: Not rescheduling timer %llu now %llu target %llu\n",
1833 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
1834 mpte->mpte_time_target,
1835 time_now,
1836 mach_time_target);
1837 goto out;
1838 }
1839 }
1840
1841 mpte->mpte_time_target = mach_time_target;
1842 mptcp_set_urgency_timer(mpte);
1843
1844 goto out;
1845 }
1846 default:
1847 /* not eligible */
1848 error = ENOPROTOOPT;
1849 goto err_out;
1850 }
1851 }
1852
1853 if ((error = sooptcopyin(sopt, &optval, sizeof(optval),
1854 sizeof(optval))) != 0) {
1855 goto err_out;
1856 }
1857
1858 if (rec) {
1859 /* search for an existing one; if not found, allocate */
1860 if ((mpo = mptcp_sopt_find(mpte, sopt)) == NULL) {
1861 mpo = mptcp_sopt_alloc(Z_WAITOK);
1862 }
1863
1864 if (mpo == NULL) {
1865 error = ENOBUFS;
1866 goto err_out;
1867 } else {
1868 /* initialize or update, as needed */
1869 mpo->mpo_intval = optval;
1870 if (!(mpo->mpo_flags & MPOF_ATTACHED)) {
1871 mpo->mpo_level = level;
1872 mpo->mpo_name = optname;
1873 mptcp_sopt_insert(mpte, mpo);
1874 }
1875 /* this can be issued on the subflow socket */
1876 mpo->mpo_flags |= MPOF_SUBFLOW_OK;
1877 }
1878 } else {
1879 bzero(&smpo, sizeof(smpo));
1880 mpo = &smpo;
1881 mpo->mpo_flags |= MPOF_SUBFLOW_OK;
1882 mpo->mpo_level = level;
1883 mpo->mpo_name = optname;
1884 mpo->mpo_intval = optval;
1885 }
1886
1887 /* issue this socket option on existing subflows */
1888 error = mptcp_setopt_apply(mpte, mpo);
1889 if (error != 0 && (mpo->mpo_flags & MPOF_ATTACHED)) {
1890 VERIFY(mpo != &smpo);
1891 mptcp_sopt_remove(mpte, mpo);
1892 mptcp_sopt_free(mpo);
1893 }
1894 if (mpo == &smpo) {
1895 mpo->mpo_flags &= ~MPOF_INTERIM;
1896 }
1897
1898 if (error) {
1899 goto err_out;
1900 }
1901
1902 out:
1903
1904 mpsotomppcb(mp_so)->mpp_flags &= ~MPP_INSIDE_SETGETOPT;
1905 return 0;
1906
1907 err_out:
1908 os_log_error(mptcp_log_handle, "%s - %lx: sopt %s (%d, %d) val %d can't be issued error %d\n",
1909 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
1910 mptcp_sopt2str(level, optname), level, optname, optval, error);
1911 mpsotomppcb(mp_so)->mpp_flags &= ~MPP_INSIDE_SETGETOPT;
1912 return error;
1913 }
1914
1915 static void
mptcp_fill_info_bytestats(struct tcp_info * ti,struct mptses * mpte)1916 mptcp_fill_info_bytestats(struct tcp_info *ti, struct mptses *mpte)
1917 {
1918 struct mptsub *mpts;
1919 int i;
1920
1921 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1922 const struct inpcb *inp = sotoinpcb(mpts->mpts_socket);
1923
1924 if (inp == NULL) {
1925 continue;
1926 }
1927
1928 ti->tcpi_txbytes += inp->inp_stat->txbytes;
1929 ti->tcpi_rxbytes += inp->inp_stat->rxbytes;
1930 ti->tcpi_cell_txbytes += inp->inp_cstat->txbytes;
1931 ti->tcpi_cell_rxbytes += inp->inp_cstat->rxbytes;
1932 ti->tcpi_wifi_txbytes += inp->inp_wstat->txbytes;
1933 ti->tcpi_wifi_rxbytes += inp->inp_wstat->rxbytes;
1934 ti->tcpi_wired_txbytes += inp->inp_Wstat->txbytes;
1935 ti->tcpi_wired_rxbytes += inp->inp_Wstat->rxbytes;
1936 }
1937
1938 for (i = 0; i < MPTCP_ITFSTATS_SIZE; i++) {
1939 struct mptcp_itf_stats *stats = &mpte->mpte_itfstats[i];
1940
1941 ti->tcpi_txbytes += stats->mpis_txbytes;
1942 ti->tcpi_rxbytes += stats->mpis_rxbytes;
1943
1944 ti->tcpi_wifi_txbytes += stats->mpis_wifi_txbytes;
1945 ti->tcpi_wifi_rxbytes += stats->mpis_wifi_rxbytes;
1946
1947 ti->tcpi_wired_txbytes += stats->mpis_wired_txbytes;
1948 ti->tcpi_wired_rxbytes += stats->mpis_wired_rxbytes;
1949
1950 ti->tcpi_cell_txbytes += stats->mpis_cell_txbytes;
1951 ti->tcpi_cell_rxbytes += stats->mpis_cell_rxbytes;
1952 }
1953 }
1954
1955 static void
mptcp_fill_info(struct mptses * mpte,struct tcp_info * ti)1956 mptcp_fill_info(struct mptses *mpte, struct tcp_info *ti)
1957 {
1958 struct mptsub *actsub = mpte->mpte_active_sub;
1959 struct mptcb *mp_tp = mpte->mpte_mptcb;
1960 struct tcpcb *acttp = NULL;
1961
1962 if (actsub) {
1963 acttp = sototcpcb(actsub->mpts_socket);
1964 }
1965
1966 bzero(ti, sizeof(*ti));
1967
1968 ti->tcpi_state = (uint8_t)mp_tp->mpt_state;
1969 /* tcpi_options */
1970 /* tcpi_snd_wscale */
1971 /* tcpi_rcv_wscale */
1972 /* tcpi_flags */
1973 if (acttp) {
1974 ti->tcpi_rto = acttp->t_timer[TCPT_REXMT] ? acttp->t_rxtcur : 0;
1975 }
1976
1977 /* tcpi_snd_mss */
1978 /* tcpi_rcv_mss */
1979 if (acttp) {
1980 ti->tcpi_rttcur = acttp->t_rttcur;
1981 ti->tcpi_srtt = acttp->t_srtt >> TCP_RTT_SHIFT;
1982 ti->tcpi_rttvar = acttp->t_rttvar >> TCP_RTTVAR_SHIFT;
1983 ti->tcpi_rttbest = acttp->t_rttbest >> TCP_RTT_SHIFT;
1984 ti->tcpi_rcv_srtt = acttp->rcv_srtt >> TCP_RTT_SHIFT;
1985 }
1986 /* tcpi_snd_ssthresh */
1987 /* tcpi_snd_cwnd */
1988 /* tcpi_rcv_space */
1989 ti->tcpi_snd_wnd = mp_tp->mpt_sndwnd;
1990 ti->tcpi_snd_nxt = (uint32_t)mp_tp->mpt_sndnxt;
1991 ti->tcpi_rcv_nxt = (uint32_t)mp_tp->mpt_rcvnxt;
1992 if (acttp) {
1993 ti->tcpi_last_outif = (acttp->t_inpcb->inp_last_outifp == NULL) ? 0 :
1994 acttp->t_inpcb->inp_last_outifp->if_index;
1995 }
1996
1997 mptcp_fill_info_bytestats(ti, mpte);
1998 /* tcpi_txpackets */
1999
2000 /* tcpi_txretransmitbytes */
2001 /* tcpi_txunacked */
2002 /* tcpi_rxpackets */
2003
2004 /* tcpi_rxduplicatebytes */
2005 /* tcpi_rxoutoforderbytes */
2006 /* tcpi_snd_bw */
2007 /* tcpi_synrexmits */
2008 /* tcpi_unused1 */
2009 /* tcpi_unused2 */
2010 /* tcpi_cell_rxpackets */
2011
2012 /* tcpi_cell_txpackets */
2013
2014 /* tcpi_wifi_rxpackets */
2015
2016 /* tcpi_wifi_txpackets */
2017
2018 /* tcpi_wired_rxpackets */
2019 /* tcpi_wired_txpackets */
2020 /* tcpi_connstatus */
2021 /* TFO-stuff */
2022 /* ECN stuff */
2023 /* tcpi_ecn_recv_ce */
2024 /* tcpi_ecn_recv_cwr */
2025 if (acttp) {
2026 ti->tcpi_rcvoopack = acttp->t_rcvoopack;
2027 }
2028 /* tcpi_pawsdrop */
2029 /* tcpi_sack_recovery_episode */
2030 /* tcpi_reordered_pkts */
2031 /* tcpi_dsack_sent */
2032 /* tcpi_dsack_recvd */
2033 /* tcpi_flowhash */
2034 if (acttp) {
2035 ti->tcpi_txretransmitpackets = acttp->t_stat.rxmitpkts;
2036 }
2037 }
2038
2039 /*
2040 * Handle SOPT_GET for socket options issued on MP socket.
2041 */
2042 static int
mptcp_getopt(struct mptses * mpte,struct sockopt * sopt)2043 mptcp_getopt(struct mptses *mpte, struct sockopt *sopt)
2044 {
2045 int error = 0, optval = 0;
2046 struct socket *mp_so;
2047
2048 mp_so = mptetoso(mpte);
2049
2050 VERIFY(!(mpsotomppcb(mp_so)->mpp_flags & MPP_INSIDE_SETGETOPT));
2051 mpsotomppcb(mp_so)->mpp_flags |= MPP_INSIDE_SETGETOPT;
2052
2053 /*
2054 * We only handle SOPT_GET for TCP level socket options; we should
2055 * not get here for socket level options since they are already
2056 * handled at the socket layer.
2057 */
2058 if (sopt->sopt_level != IPPROTO_TCP) {
2059 error = ENOPROTOOPT;
2060 goto out;
2061 }
2062
2063 switch (sopt->sopt_name) {
2064 case PERSIST_TIMEOUT:
2065 /* Only case for which we have a non-zero default */
2066 optval = tcp_max_persist_timeout;
2067 OS_FALLTHROUGH;
2068 case TCP_NODELAY:
2069 case TCP_RXT_FINDROP:
2070 case TCP_KEEPALIVE:
2071 case TCP_KEEPINTVL:
2072 case TCP_KEEPCNT:
2073 case TCP_CONNECTIONTIMEOUT:
2074 case TCP_RXT_CONNDROPTIME:
2075 case TCP_ADAPTIVE_READ_TIMEOUT:
2076 case TCP_ADAPTIVE_WRITE_TIMEOUT:
2077 case TCP_FASTOPEN_FORCE_ENABLE:
2078 {
2079 struct mptopt *mpo = mptcp_sopt_find(mpte, sopt);
2080
2081 if (mpo != NULL) {
2082 optval = mpo->mpo_intval;
2083 }
2084 break;
2085 }
2086
2087 /* The next ones are stored at the MPTCP-level */
2088 case TCP_NOTSENT_LOWAT:
2089 if (mptetoso(mpte)->so_flags & SOF_NOTSENT_LOWAT) {
2090 optval = mptcp_get_notsent_lowat(mpte);
2091 } else {
2092 optval = 0;
2093 }
2094 break;
2095 case TCP_INFO:
2096 {
2097 struct tcp_info ti;
2098
2099 mptcp_fill_info(mpte, &ti);
2100 error = sooptcopyout(sopt, &ti, sizeof(struct tcp_info));
2101
2102 goto out;
2103 }
2104 case MPTCP_SERVICE_TYPE:
2105 optval = mpte->mpte_svctype;
2106 break;
2107 case MPTCP_ALTERNATE_PORT:
2108 optval = mpte->mpte_alternate_port;
2109 break;
2110 case MPTCP_FORCE_ENABLE:
2111 optval = !!(mpte->mpte_flags & MPTE_FORCE_ENABLE);
2112 break;
2113 case MPTCP_FORCE_VERSION:
2114 if (mpte->mpte_flags & MPTE_FORCE_V0) {
2115 optval = 0;
2116 } else if (mpte->mpte_flags & MPTE_FORCE_V1) {
2117 optval = 1;
2118 } else {
2119 optval = -1;
2120 }
2121 break;
2122 case MPTCP_EXPECTED_PROGRESS_TARGET:
2123 error = sooptcopyout(sopt, &mpte->mpte_time_target, sizeof(mpte->mpte_time_target));
2124
2125 goto out;
2126 default:
2127 /* not eligible */
2128 error = ENOPROTOOPT;
2129 break;
2130 }
2131
2132 if (error == 0) {
2133 error = sooptcopyout(sopt, &optval, sizeof(int));
2134 }
2135
2136 out:
2137 mpsotomppcb(mp_so)->mpp_flags &= ~MPP_INSIDE_SETGETOPT;
2138 return error;
2139 }
2140
2141 /*
2142 * MPTCP SOPT_{SET,GET} socket option handler, for options issued on the MP
2143 * socket, at SOL_SOCKET and IPPROTO_TCP levels. The former is restricted
2144 * to those that are allowed by mptcp_usr_socheckopt().
2145 */
2146 int
mptcp_ctloutput(struct socket * mp_so,struct sockopt * sopt)2147 mptcp_ctloutput(struct socket *mp_so, struct sockopt *sopt)
2148 {
2149 struct mppcb *mpp = mpsotomppcb(mp_so);
2150 struct mptses *mpte;
2151 int error = 0;
2152
2153 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
2154 error = EINVAL;
2155 goto out;
2156 }
2157 mpte = mptompte(mpp);
2158 socket_lock_assert_owned(mp_so);
2159
2160 /* we only handle socket and TCP-level socket options for MPTCP */
2161 if (sopt->sopt_level != SOL_SOCKET && sopt->sopt_level != IPPROTO_TCP) {
2162 error = EINVAL;
2163 goto out;
2164 }
2165
2166 switch (sopt->sopt_dir) {
2167 case SOPT_SET:
2168 error = mptcp_setopt(mpte, sopt);
2169 break;
2170
2171 case SOPT_GET:
2172 error = mptcp_getopt(mpte, sopt);
2173 break;
2174 }
2175 out:
2176 return error;
2177 }
2178
2179 const char *
mptcp_sopt2str(int level,int optname)2180 mptcp_sopt2str(int level, int optname)
2181 {
2182 switch (level) {
2183 case SOL_SOCKET:
2184 switch (optname) {
2185 case SO_LINGER:
2186 return "SO_LINGER";
2187 case SO_LINGER_SEC:
2188 return "SO_LINGER_SEC";
2189 case SO_DEBUG:
2190 return "SO_DEBUG";
2191 case SO_KEEPALIVE:
2192 return "SO_KEEPALIVE";
2193 case SO_USELOOPBACK:
2194 return "SO_USELOOPBACK";
2195 case SO_TYPE:
2196 return "SO_TYPE";
2197 case SO_NREAD:
2198 return "SO_NREAD";
2199 case SO_NWRITE:
2200 return "SO_NWRITE";
2201 case SO_ERROR:
2202 return "SO_ERROR";
2203 case SO_SNDBUF:
2204 return "SO_SNDBUF";
2205 case SO_RCVBUF:
2206 return "SO_RCVBUF";
2207 case SO_SNDLOWAT:
2208 return "SO_SNDLOWAT";
2209 case SO_RCVLOWAT:
2210 return "SO_RCVLOWAT";
2211 case SO_SNDTIMEO:
2212 return "SO_SNDTIMEO";
2213 case SO_RCVTIMEO:
2214 return "SO_RCVTIMEO";
2215 case SO_NKE:
2216 return "SO_NKE";
2217 case SO_NOSIGPIPE:
2218 return "SO_NOSIGPIPE";
2219 case SO_NOADDRERR:
2220 return "SO_NOADDRERR";
2221 case SO_RESTRICTIONS:
2222 return "SO_RESTRICTIONS";
2223 case SO_LABEL:
2224 return "SO_LABEL";
2225 case SO_PEERLABEL:
2226 return "SO_PEERLABEL";
2227 case SO_RANDOMPORT:
2228 return "SO_RANDOMPORT";
2229 case SO_TRAFFIC_CLASS:
2230 return "SO_TRAFFIC_CLASS";
2231 case SO_RECV_TRAFFIC_CLASS:
2232 return "SO_RECV_TRAFFIC_CLASS";
2233 case SO_TRAFFIC_CLASS_DBG:
2234 return "SO_TRAFFIC_CLASS_DBG";
2235 case SO_PRIVILEGED_TRAFFIC_CLASS:
2236 return "SO_PRIVILEGED_TRAFFIC_CLASS";
2237 case SO_DEFUNCTIT:
2238 return "SO_DEFUNCTIT";
2239 case SO_DEFUNCTOK:
2240 return "SO_DEFUNCTOK";
2241 case SO_ISDEFUNCT:
2242 return "SO_ISDEFUNCT";
2243 case SO_OPPORTUNISTIC:
2244 return "SO_OPPORTUNISTIC";
2245 case SO_FLUSH:
2246 return "SO_FLUSH";
2247 case SO_RECV_ANYIF:
2248 return "SO_RECV_ANYIF";
2249 case SO_NOWAKEFROMSLEEP:
2250 return "SO_NOWAKEFROMSLEEP";
2251 case SO_NOAPNFALLBK:
2252 return "SO_NOAPNFALLBK";
2253 case SO_MARK_CELLFALLBACK:
2254 return "SO_CELLFALLBACK";
2255 case SO_FALLBACK_MODE:
2256 return "SO_FALLBACK_MODE";
2257 case SO_MARK_KNOWN_TRACKER:
2258 return "SO_MARK_KNOWN_TRACKER";
2259 case SO_MARK_KNOWN_TRACKER_NON_APP_INITIATED:
2260 return "SO_MARK_KNOWN_TRACKER_NON_APP_INITIATED";
2261 case SO_MARK_APPROVED_APP_DOMAIN:
2262 return "SO_MARK_APPROVED_APP_DOMAIN";
2263 case SO_DELEGATED:
2264 return "SO_DELEGATED";
2265 case SO_DELEGATED_UUID:
2266 return "SO_DELEGATED_UUID";
2267 #if NECP
2268 case SO_NECP_ATTRIBUTES:
2269 return "SO_NECP_ATTRIBUTES";
2270 case SO_NECP_CLIENTUUID:
2271 return "SO_NECP_CLIENTUUID";
2272 #endif /* NECP */
2273 }
2274
2275 break;
2276 case IPPROTO_TCP:
2277 switch (optname) {
2278 case TCP_NODELAY:
2279 return "TCP_NODELAY";
2280 case TCP_KEEPALIVE:
2281 return "TCP_KEEPALIVE";
2282 case TCP_KEEPINTVL:
2283 return "TCP_KEEPINTVL";
2284 case TCP_KEEPCNT:
2285 return "TCP_KEEPCNT";
2286 case TCP_CONNECTIONTIMEOUT:
2287 return "TCP_CONNECTIONTIMEOUT";
2288 case TCP_RXT_CONNDROPTIME:
2289 return "TCP_RXT_CONNDROPTIME";
2290 case PERSIST_TIMEOUT:
2291 return "PERSIST_TIMEOUT";
2292 case TCP_NOTSENT_LOWAT:
2293 return "NOTSENT_LOWAT";
2294 case TCP_ADAPTIVE_READ_TIMEOUT:
2295 return "ADAPTIVE_READ_TIMEOUT";
2296 case TCP_ADAPTIVE_WRITE_TIMEOUT:
2297 return "ADAPTIVE_WRITE_TIMEOUT";
2298 case TCP_FASTOPEN_FORCE_ENABLE:
2299 return "TCP_FASTOPEN_FORCE_ENABLE";
2300 case MPTCP_SERVICE_TYPE:
2301 return "MPTCP_SERVICE_TYPE";
2302 case MPTCP_ALTERNATE_PORT:
2303 return "MPTCP_ALTERNATE_PORT";
2304 case MPTCP_FORCE_ENABLE:
2305 return "MPTCP_FORCE_ENABLE";
2306 case MPTCP_FORCE_VERSION:
2307 return "MPTCP_FORCE_VERSION";
2308 case MPTCP_EXPECTED_PROGRESS_TARGET:
2309 return "MPTCP_EXPECTED_PROGRESS_TARGET";
2310 }
2311
2312 break;
2313 }
2314
2315 return "unknown";
2316 }
2317
2318 static int
mptcp_usr_preconnect(struct socket * mp_so)2319 mptcp_usr_preconnect(struct socket *mp_so)
2320 {
2321 struct mptsub *mpts = NULL;
2322 struct mppcb *mpp = mpsotomppcb(mp_so);
2323 struct mptses *mpte;
2324 struct socket *so;
2325 struct tcpcb *tp = NULL;
2326 int error;
2327
2328 mpte = mptompte(mpp);
2329
2330 mpts = mptcp_get_subflow(mpte, NULL);
2331 if (mpts == NULL) {
2332 os_log_error(mptcp_log_handle, "%s - %lx: invalid preconnect ",
2333 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte));
2334 return EINVAL;
2335 }
2336 mpts->mpts_flags &= ~MPTSF_TFO_REQD;
2337 so = mpts->mpts_socket;
2338 tp = intotcpcb(sotoinpcb(so));
2339 tp->t_mpflags &= ~TMPF_TFO_REQUEST;
2340 error = tcp_output(sototcpcb(so));
2341
2342 soclearfastopen(mp_so);
2343
2344 return error;
2345 }
2346