1 /*
2 * Copyright (c) 2012-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/kernel.h>
32 #include <sys/socket.h>
33 #include <sys/socketvar.h>
34 #include <sys/protosw.h>
35 #include <sys/mcache.h>
36 #include <sys/syslog.h>
37 #include <sys/proc.h>
38 #include <sys/proc_internal.h>
39 #include <sys/resourcevar.h>
40 #include <sys/kauth.h>
41 #include <sys/priv.h>
42
43 #include <net/if.h>
44 #include <netinet/in.h>
45 #include <netinet/in_var.h>
46 #include <netinet/tcp.h>
47 #include <netinet/tcp_fsm.h>
48 #include <netinet/tcp_seq.h>
49 #include <netinet/tcp_var.h>
50 #include <netinet/tcp_timer.h>
51 #include <netinet/mptcp.h>
52 #include <netinet/mptcp_var.h>
53 #include <netinet/mptcp_timer.h>
54
55 #include <mach/sdt.h>
56 #include <net/sockaddr_utils.h>
57
58 static int mptcp_usr_attach(struct socket *, int, struct proc *);
59 static int mptcp_usr_detach(struct socket *);
60 static int mptcp_attach(struct socket *, struct proc *);
61 static int mptcp_usr_connectx(struct socket *, struct sockaddr *,
62 struct sockaddr *, struct proc *, uint32_t, sae_associd_t,
63 sae_connid_t *, uint32_t, void *, uint32_t, struct uio *, user_ssize_t *);
64 static int mptcp_getassocids(struct mptses *, uint32_t *, user_addr_t);
65 static int mptcp_getconnids(struct mptses *, sae_associd_t, uint32_t *,
66 user_addr_t);
67 static int mptcp_getconninfo(struct mptses *, sae_connid_t *, uint32_t *,
68 uint32_t *, int32_t *, user_addr_t, socklen_t *, user_addr_t, socklen_t *,
69 uint32_t *, user_addr_t, uint32_t *);
70 static int mptcp_usr_control(struct socket *, u_long cmd,
71 caddr_t __sized_by(IOCPARM_LEN(cmd)), struct ifnet *,
72 struct proc *);
73 static int mptcp_disconnect(struct mptses *);
74 static int mptcp_usr_disconnect(struct socket *);
75 static int mptcp_usr_disconnectx(struct socket *, sae_associd_t, sae_connid_t);
76 static struct mptses *mptcp_usrclosed(struct mptses *);
77 static int mptcp_usr_rcvd(struct socket *, int);
78 static int mptcp_usr_send(struct socket *, int, struct mbuf *,
79 struct sockaddr *, struct mbuf *, struct proc *);
80 static int mptcp_usr_shutdown(struct socket *);
81 static int mptcp_usr_sosend(struct socket *, struct sockaddr *, struct uio *,
82 struct mbuf *, struct mbuf *, int);
83 static int mptcp_usr_socheckopt(struct socket *, struct sockopt *);
84 static int mptcp_usr_preconnect(struct socket *so);
85
86 struct pr_usrreqs mptcp_usrreqs = {
87 .pru_attach = mptcp_usr_attach,
88 .pru_connectx = mptcp_usr_connectx,
89 .pru_control = mptcp_usr_control,
90 .pru_detach = mptcp_usr_detach,
91 .pru_disconnect = mptcp_usr_disconnect,
92 .pru_disconnectx = mptcp_usr_disconnectx,
93 .pru_peeraddr = mp_getpeeraddr,
94 .pru_rcvd = mptcp_usr_rcvd,
95 .pru_send = mptcp_usr_send,
96 .pru_shutdown = mptcp_usr_shutdown,
97 .pru_sockaddr = mp_getsockaddr,
98 .pru_sosend = mptcp_usr_sosend,
99 .pru_soreceive = soreceive,
100 .pru_socheckopt = mptcp_usr_socheckopt,
101 .pru_preconnect = mptcp_usr_preconnect,
102 };
103
104
105 int mptcp_developer_mode = 0;
106 SYSCTL_INT(_net_inet_mptcp, OID_AUTO, allow_aggregate, CTLFLAG_RW | CTLFLAG_LOCKED,
107 &mptcp_developer_mode, 0, "Allow the Multipath aggregation mode");
108
109 int mptcp_no_first_party = 0;
110 SYSCTL_INT(_net_inet_mptcp, OID_AUTO, no_first_party, CTLFLAG_RW | CTLFLAG_LOCKED,
111 &mptcp_no_first_party, 0, "Do not do first-party app exemptions");
112
113 static unsigned long mptcp_expected_progress_headstart = 5000;
114 SYSCTL_ULONG(_net_inet_mptcp, OID_AUTO, expected_progress_headstart, CTLFLAG_RW | CTLFLAG_LOCKED,
115 &mptcp_expected_progress_headstart, "Headstart to give MPTCP before meeting the progress deadline");
116
117
118 /*
119 * Attaches an MPTCP control block to a socket.
120 */
121 static int
mptcp_usr_attach(struct socket * mp_so,int proto,struct proc * p)122 mptcp_usr_attach(struct socket *mp_so, int proto, struct proc *p)
123 {
124 #pragma unused(proto)
125 int error;
126
127 VERIFY(mpsotomppcb(mp_so) == NULL);
128
129 error = mptcp_attach(mp_so, p);
130 if (error) {
131 goto out;
132 }
133
134 if ((mp_so->so_options & SO_LINGER) && mp_so->so_linger == 0) {
135 mp_so->so_linger = (short)(TCP_LINGERTIME * hz);
136 }
137 out:
138 return error;
139 }
140
141 /*
142 * Detaches an MPTCP control block from a socket.
143 */
144 static int
mptcp_usr_detach(struct socket * mp_so)145 mptcp_usr_detach(struct socket *mp_so)
146 {
147 struct mptses *__single mpte = mpsotompte(mp_so);
148 struct mppcb *__single mpp = mpsotomppcb(mp_so);
149
150 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
151 os_log_error(mptcp_log_handle, "%s - %lx: state: %d\n",
152 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
153 mpp ? mpp->mpp_state : -1);
154 return EINVAL;
155 }
156
157 /*
158 * We are done with this MPTCP socket (it has been closed);
159 * trigger all subflows to be disconnected, if not already,
160 * by initiating the PCB detach sequence (SOF_PCBCLEARING
161 * will be set.)
162 */
163 mp_pcbdetach(mp_so);
164
165 mptcp_disconnect(mpte);
166
167 return 0;
168 }
169
170 /*
171 * Attach MPTCP protocol to socket, allocating MP control block,
172 * MPTCP session, control block, buffer space, etc.
173 */
174 static int
mptcp_attach(struct socket * mp_so,struct proc * p)175 mptcp_attach(struct socket *mp_so, struct proc *p)
176 {
177 #pragma unused(p)
178 struct mptses *__single mpte = NULL;
179 struct mptcb *__single mp_tp = NULL;
180 struct mppcb *__single mpp = NULL;
181 int error = 0;
182
183 if (mp_so->so_snd.sb_hiwat == 0 || mp_so->so_rcv.sb_hiwat == 0) {
184 error = soreserve(mp_so, tcp_sendspace, tcp_recvspace);
185 if (error != 0) {
186 goto out;
187 }
188 }
189
190 if (mp_so->so_snd.sb_preconn_hiwat == 0) {
191 soreserve_preconnect(mp_so, 2048);
192 }
193
194 if ((mp_so->so_rcv.sb_flags & SB_USRSIZE) == 0) {
195 mp_so->so_rcv.sb_flags |= SB_AUTOSIZE;
196 }
197 if ((mp_so->so_snd.sb_flags & SB_USRSIZE) == 0) {
198 mp_so->so_snd.sb_flags |= SB_AUTOSIZE;
199 }
200
201 /*
202 * MPTCP send-socket buffers cannot be compressed, due to the
203 * fact that each mbuf chained via m_next is a M_PKTHDR
204 * which carries some MPTCP metadata.
205 */
206 mp_so->so_snd.sb_flags |= SB_NOCOMPRESS;
207
208 if ((error = mp_pcballoc(mp_so, &mtcbinfo)) != 0) {
209 goto out;
210 }
211
212 mpp = mpsotomppcb(mp_so);
213 mpte = (struct mptses *)mpp->mpp_pcbe;
214 mp_tp = mpte->mpte_mptcb;
215
216 VERIFY(mp_tp != NULL);
217 out:
218 return error;
219 }
220
221 static int
mptcp_entitlement_check(struct socket * mp_so,uint8_t svctype)222 mptcp_entitlement_check(struct socket *mp_so, uint8_t svctype)
223 {
224 struct mptses *mpte = mpsotompte(mp_so);
225
226 if (mptcp_no_first_party) {
227 return 0;
228 }
229
230 /* First, check for mptcp_extended without delegation */
231 if (soopt_cred_check(mp_so, PRIV_NET_RESTRICTED_MULTIPATH_EXTENDED, TRUE, FALSE) == 0) {
232 /*
233 * This means the app has the extended entitlement. Thus,
234 * it's a first party app and can run without restrictions.
235 */
236 mpte->mpte_flags |= MPTE_FIRSTPARTY;
237 return 0;
238 }
239
240 /* Now with delegation */
241 if (mp_so->so_flags & SOF_DELEGATED &&
242 soopt_cred_check(mp_so, PRIV_NET_RESTRICTED_MULTIPATH_EXTENDED, TRUE, TRUE) == 0) {
243 /*
244 * This means the app has the extended entitlement. Thus,
245 * it's a first party app and can run without restrictions.
246 */
247 mpte->mpte_flags |= MPTE_FIRSTPARTY;
248 return 0;
249 }
250
251 if (svctype == MPTCP_SVCTYPE_AGGREGATE) {
252 if (mptcp_developer_mode) {
253 return 0;
254 }
255
256 os_log_error(mptcp_log_handle, "%s - %lx: MPTCP prohibited on svc %u\n",
257 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpte->mpte_svctype);
258 return -1;
259 }
260
261 return 0;
262 }
263
264 /*
265 * Common subroutine to open a MPTCP connection to one of the remote hosts
266 * specified by dst_sl. This includes allocating and establishing a
267 * subflow TCP connection, either initially to establish MPTCP connection,
268 * or to join an existing one. Returns a connection handle upon success.
269 */
270 static int
mptcp_connectx(struct mptses * mpte,struct sockaddr * src,struct sockaddr * dst,uint32_t ifscope,sae_connid_t * pcid)271 mptcp_connectx(struct mptses *mpte, struct sockaddr *src,
272 struct sockaddr *dst, uint32_t ifscope, sae_connid_t *pcid)
273 {
274 int error = 0;
275
276 VERIFY(dst != NULL);
277 VERIFY(pcid != NULL);
278
279 error = mptcp_subflow_add(mpte, src, dst, ifscope, pcid);
280
281 return error;
282 }
283
284 /*
285 * User-protocol pru_connectx callback.
286 */
287 static int
mptcp_usr_connectx(struct socket * mp_so,struct sockaddr * src,struct sockaddr * dst,struct proc * p,uint32_t ifscope,sae_associd_t aid,sae_connid_t * pcid,uint32_t flags,void * arg,uint32_t arglen,struct uio * auio,user_ssize_t * bytes_written)288 mptcp_usr_connectx(struct socket *mp_so, struct sockaddr *src,
289 struct sockaddr *dst, struct proc *p, uint32_t ifscope,
290 sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg,
291 uint32_t arglen, struct uio *auio, user_ssize_t *bytes_written)
292 {
293 #pragma unused(p, aid, flags, arg, arglen)
294 struct mppcb *mpp = mpsotomppcb(mp_so);
295 struct mptses *mpte = NULL;
296 struct mptcb *mp_tp = NULL;
297 user_ssize_t datalen;
298 int error = 0;
299
300 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
301 os_log_error(mptcp_log_handle, "%s - %lx: state %d\n",
302 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
303 mpp ? mpp->mpp_state : -1);
304 error = EINVAL;
305 goto out;
306 }
307 mpte = mptompte(mpp);
308 mp_tp = mpte->mpte_mptcb;
309
310 if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) {
311 os_log_error(mptcp_log_handle, "%s - %lx: fell back to TCP\n",
312 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte));
313 error = EINVAL;
314 goto out;
315 }
316
317 if (dst->sa_family != AF_INET && dst->sa_family != AF_INET6) {
318 error = EAFNOSUPPORT;
319 goto out;
320 }
321
322 if (dst->sa_family == AF_INET &&
323 dst->sa_len != sizeof(mpte->__mpte_dst_v4)) {
324 os_log_error(mptcp_log_handle, "%s - %lx: IPv4 dst len %u\n",
325 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), dst->sa_len);
326 error = EINVAL;
327 goto out;
328 }
329
330 if (dst->sa_family == AF_INET6 &&
331 dst->sa_len != sizeof(mpte->__mpte_dst_v6)) {
332 os_log_error(mptcp_log_handle, "%s - %lx: IPv6 dst len %u\n",
333 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), dst->sa_len);
334 error = EINVAL;
335 goto out;
336 }
337
338 if (!(mpte->mpte_flags & MPTE_SVCTYPE_CHECKED)) {
339 if (mptcp_entitlement_check(mp_so, mpte->mpte_svctype) < 0) {
340 error = EPERM;
341 goto out;
342 }
343
344 mpte->mpte_flags |= MPTE_SVCTYPE_CHECKED;
345 }
346
347 if ((mp_so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING)) == 0) {
348 SOCKADDR_COPY(dst, &mpte->mpte_dst, dst->sa_len);
349
350 if (dst->sa_family == AF_INET) {
351 SOCKADDR_COPY(dst, &mpte->mpte_sub_dst_v4, dst->sa_len);
352 } else {
353 SOCKADDR_COPY(dst, &mpte->mpte_sub_dst_v6, dst->sa_len);
354 }
355 }
356
357 if (src) {
358 if (src->sa_family != AF_INET && src->sa_family != AF_INET6) {
359 error = EAFNOSUPPORT;
360 goto out;
361 }
362
363 if (src->sa_family == AF_INET &&
364 src->sa_len != sizeof(mpte->__mpte_src_v4)) {
365 os_log_error(mptcp_log_handle, "%s - %lx: IPv4 src len %u\n",
366 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), src->sa_len);
367 error = EINVAL;
368 goto out;
369 }
370
371 if (src->sa_family == AF_INET6 &&
372 src->sa_len != sizeof(mpte->__mpte_src_v6)) {
373 os_log_error(mptcp_log_handle, "%s - %lx: IPv6 src len %u\n",
374 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), src->sa_len);
375 error = EINVAL;
376 goto out;
377 }
378
379 if ((mp_so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING)) == 0) {
380 SOCKADDR_COPY(src, &mpte->mpte_src, src->sa_len);
381 }
382 }
383
384 error = mptcp_connectx(mpte, src, dst, ifscope, pcid);
385
386 /* If there is data, copy it */
387 if (auio != NULL) {
388 datalen = uio_resid(auio);
389 socket_unlock(mp_so, 0);
390 error = mp_so->so_proto->pr_usrreqs->pru_sosend(mp_so, NULL,
391 (uio_t) auio, NULL, NULL, 0);
392
393 if (error == 0 || error == EWOULDBLOCK) {
394 *bytes_written = datalen - uio_resid(auio);
395 }
396
397 if (error == EWOULDBLOCK) {
398 error = EINPROGRESS;
399 }
400
401 socket_lock(mp_so, 0);
402 }
403
404 out:
405 return error;
406 }
407
408 /*
409 * Handle SIOCGASSOCIDS ioctl for PF_MULTIPATH domain.
410 */
411 static int
mptcp_getassocids(struct mptses * mpte,uint32_t * cnt,user_addr_t aidp)412 mptcp_getassocids(struct mptses *mpte, uint32_t *cnt, user_addr_t aidp)
413 {
414 /* MPTCP has at most 1 association */
415 *cnt = (mpte->mpte_associd != SAE_ASSOCID_ANY) ? 1 : 0;
416
417 /* just asking how many there are? */
418 if (aidp == USER_ADDR_NULL) {
419 return 0;
420 }
421
422 return copyout(&mpte->mpte_associd, aidp,
423 sizeof(mpte->mpte_associd));
424 }
425
426 /*
427 * Handle SIOCGCONNIDS ioctl for PF_MULTIPATH domain.
428 */
429 static int
mptcp_getconnids(struct mptses * mpte,sae_associd_t aid,uint32_t * cnt,user_addr_t cidp)430 mptcp_getconnids(struct mptses *mpte, sae_associd_t aid, uint32_t *cnt,
431 user_addr_t cidp)
432 {
433 struct mptsub *mpts;
434 int error = 0;
435
436 if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL &&
437 aid != mpte->mpte_associd) {
438 return EINVAL;
439 }
440
441 *cnt = mpte->mpte_numflows;
442
443 /* just asking how many there are? */
444 if (cidp == USER_ADDR_NULL) {
445 return 0;
446 }
447
448 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
449 if ((error = copyout(&mpts->mpts_connid, cidp,
450 sizeof(mpts->mpts_connid))) != 0) {
451 break;
452 }
453
454 cidp += sizeof(mpts->mpts_connid);
455 }
456
457 return error;
458 }
459
460 /*
461 * Handle SIOCGCONNINFO ioctl for PF_MULTIPATH domain.
462 */
463 static int
mptcp_getconninfo(struct mptses * mpte,sae_connid_t * cid,uint32_t * flags,uint32_t * ifindex,int32_t * soerror,user_addr_t src,socklen_t * src_len,user_addr_t dst,socklen_t * dst_len,uint32_t * aux_type,user_addr_t aux_data,uint32_t * aux_len)464 mptcp_getconninfo(struct mptses *mpte, sae_connid_t *cid, uint32_t *flags,
465 uint32_t *ifindex, int32_t *soerror, user_addr_t src, socklen_t *src_len,
466 user_addr_t dst, socklen_t *dst_len, uint32_t *aux_type,
467 user_addr_t aux_data, uint32_t *aux_len)
468 {
469 *flags = 0;
470 *aux_type = 0;
471 *ifindex = 0;
472 *soerror = 0;
473 struct mptcb *mp_tp = mpte->mpte_mptcb;
474
475 /* MPTCP-level global stats */
476 if (*cid == SAE_CONNID_ALL) {
477 struct socket *mp_so = mptetoso(mpte);
478 struct conninfo_multipathtcp mptcp_ci;
479 int error = 0;
480
481 if (*aux_len != 0 && *aux_len != sizeof(mptcp_ci)) {
482 return EINVAL;
483 }
484
485 if (mp_so->so_state & SS_ISCONNECTING) {
486 *flags |= CIF_CONNECTING;
487 }
488 if (mp_so->so_state & SS_ISCONNECTED) {
489 *flags |= CIF_CONNECTED;
490 }
491 if (mp_so->so_state & SS_ISDISCONNECTING) {
492 *flags |= CIF_DISCONNECTING;
493 }
494 if (mp_so->so_state & SS_ISDISCONNECTED) {
495 *flags |= CIF_DISCONNECTED;
496 }
497 if (!(mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP)) {
498 *flags |= CIF_MP_CAPABLE;
499 }
500 if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) {
501 *flags |= CIF_MP_DEGRADED;
502 }
503 if (mp_tp->mpt_version == MPTCP_VERSION_1) {
504 *flags |= CIF_MP_V1;
505 }
506
507 *src_len = 0;
508 *dst_len = 0;
509
510 *aux_type = CIAUX_MPTCP;
511 *aux_len = sizeof(mptcp_ci);
512
513 if (aux_data != USER_ADDR_NULL) {
514 const struct mptsub *mpts;
515 int initial_info_set = 0;
516 unsigned long i = 0;
517
518 bzero(&mptcp_ci, sizeof(mptcp_ci));
519 mptcp_ci.mptcpci_subflow_count = mpte->mpte_numflows;
520 mptcp_ci.mptcpci_switch_count = mpte->mpte_subflow_switches;
521
522 VERIFY(sizeof(mptcp_ci.mptcpci_itfstats) == sizeof(mpte->mpte_itfstats));
523 memcpy(mptcp_ci.mptcpci_itfstats, mpte->mpte_itfstats, sizeof(mptcp_ci.mptcpci_itfstats));
524
525 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
526 if (i >= sizeof(mptcp_ci.mptcpci_subflow_connids) / sizeof(sae_connid_t)) {
527 break;
528 }
529 mptcp_ci.mptcpci_subflow_connids[i] = mpts->mpts_connid;
530
531 if (mpts->mpts_flags & MPTSF_INITIAL_SUB) {
532 const struct inpcb *inp;
533
534 inp = sotoinpcb(mpts->mpts_socket);
535
536 mptcp_ci.mptcpci_init_rxbytes = inp->inp_mstat.ms_total.ts_rxbytes;
537 mptcp_ci.mptcpci_init_txbytes = inp->inp_mstat.ms_total.ts_txbytes;
538 initial_info_set = 1;
539 }
540
541 mptcpstats_update(mptcp_ci.mptcpci_itfstats, MPTCP_ITFSTATS_SIZE, mpts);
542
543 i++;
544 }
545
546 if (initial_info_set == 0) {
547 mptcp_ci.mptcpci_init_rxbytes = mpte->mpte_init_rxbytes;
548 mptcp_ci.mptcpci_init_txbytes = mpte->mpte_init_txbytes;
549 }
550
551 if (mpte->mpte_flags & MPTE_FIRSTPARTY) {
552 mptcp_ci.mptcpci_flags |= MPTCPCI_FIRSTPARTY;
553 }
554
555 error = copyout(&mptcp_ci, aux_data, sizeof(mptcp_ci));
556 if (error != 0) {
557 os_log_error(mptcp_log_handle, "%s - %lx: copyout failed: %d\n",
558 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), error);
559 return error;
560 }
561 }
562
563 return 0;
564 }
565
566 /* Any stats of any subflow */
567 if (*cid == SAE_CONNID_ANY) {
568 const struct mptsub *mpts;
569 struct socket *so;
570 const struct inpcb *inp;
571 int error = 0;
572
573 mpts = TAILQ_FIRST(&mpte->mpte_subflows);
574 if (mpts == NULL) {
575 return ENXIO;
576 }
577
578 so = mpts->mpts_socket;
579 inp = sotoinpcb(so);
580
581 if (inp->inp_vflag & INP_IPV4) {
582 error = in_getconninfo(so, SAE_CONNID_ANY, flags, ifindex,
583 soerror, src, src_len, dst, dst_len,
584 aux_type, aux_data, aux_len);
585 } else {
586 error = in6_getconninfo(so, SAE_CONNID_ANY, flags, ifindex,
587 soerror, src, src_len, dst, dst_len,
588 aux_type, aux_data, aux_len);
589 }
590
591 if (error != 0) {
592 os_log_error(mptcp_log_handle, "%s - %lx:error from in_getconninfo %d\n",
593 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), error);
594 return error;
595 }
596
597 if (mpts->mpts_flags & MPTSF_MP_CAPABLE) {
598 *flags |= CIF_MP_CAPABLE;
599 }
600 if (mpts->mpts_flags & MPTSF_MP_DEGRADED) {
601 *flags |= CIF_MP_DEGRADED;
602 }
603 if (mpts->mpts_flags & MPTSF_MP_READY) {
604 *flags |= CIF_MP_READY;
605 }
606 if (mpts->mpts_flags & MPTSF_ACTIVE) {
607 *flags |= CIF_MP_ACTIVE;
608 }
609 if (mp_tp->mpt_version == MPTCP_VERSION_1) {
610 *flags |= CIF_MP_V1;
611 }
612
613 return 0;
614 } else {
615 /* Per-interface stats */
616 const struct mptsub *mpts, *orig_mpts = NULL;
617 struct conninfo_tcp tcp_ci;
618 const struct inpcb *inp;
619 struct socket *so;
620 int error = 0;
621 int index;
622
623 /* cid is thus an ifindex - range-check first! */
624 if (*cid > USHRT_MAX) {
625 return EINVAL;
626 }
627
628 bzero(&tcp_ci, sizeof(tcp_ci));
629
630 /* First, get a subflow to fill in the "regular" info. */
631 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
632 const struct ifnet *ifp = sotoinpcb(mpts->mpts_socket)->inp_last_outifp;
633
634 if (ifp && ifp->if_index == *cid) {
635 break;
636 }
637 }
638
639 if (mpts == NULL) {
640 /* No subflow there - well, let's just get the basic itf-info */
641 goto interface_info;
642 }
643
644 so = mpts->mpts_socket;
645 inp = sotoinpcb(so);
646
647 /* Give it USER_ADDR_NULL, because we are doing this on our own */
648 if (inp->inp_vflag & INP_IPV4) {
649 error = in_getconninfo(so, SAE_CONNID_ANY, flags, ifindex,
650 soerror, src, src_len, dst, dst_len,
651 aux_type, USER_ADDR_NULL, aux_len);
652 } else {
653 error = in6_getconninfo(so, SAE_CONNID_ANY, flags, ifindex,
654 soerror, src, src_len, dst, dst_len,
655 aux_type, USER_ADDR_NULL, aux_len);
656 }
657
658 if (error != 0) {
659 os_log_error(mptcp_log_handle, "%s - %lx:error from in_getconninfo %d\n",
660 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), error);
661 return error;
662 }
663
664 /* ToDo: Nobody is reading these flags on subflows. Why bother ? */
665 if (mpts->mpts_flags & MPTSF_MP_CAPABLE) {
666 *flags |= CIF_MP_CAPABLE;
667 }
668 if (mpts->mpts_flags & MPTSF_MP_DEGRADED) {
669 *flags |= CIF_MP_DEGRADED;
670 }
671 if (mpts->mpts_flags & MPTSF_MP_READY) {
672 *flags |= CIF_MP_READY;
673 }
674 if (mpts->mpts_flags & MPTSF_ACTIVE) {
675 *flags |= CIF_MP_ACTIVE;
676 }
677 if (mp_tp->mpt_version == MPTCP_VERSION_1) {
678 *flags |= CIF_MP_V1;
679 }
680
681 /*
682 * Now, we gather the metrics (aka., tcp_info) and roll them in
683 * across all subflows of this interface to build an aggregated
684 * view.
685 *
686 * We take the TCP_INFO from the first subflow as the "master",
687 * feeding into those fields that we do not roll.
688 */
689 if (aux_data != USER_ADDR_NULL) {
690 tcp_getconninfo(so, &tcp_ci);
691
692 orig_mpts = mpts;
693 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
694 const struct inpcb *mptsinp = sotoinpcb(mpts->mpts_socket);
695 const struct ifnet *ifp;
696
697 ifp = mptsinp->inp_last_outifp;
698
699 if (ifp == NULL || ifp->if_index != *cid || mpts == orig_mpts) {
700 continue;
701 }
702
703 /* Roll the itf-stats into the tcp_info */
704 tcp_ci.tcpci_tcp_info.tcpi_txbytes +=
705 mptsinp->inp_mstat.ms_total.ts_txbytes;
706 tcp_ci.tcpci_tcp_info.tcpi_rxbytes +=
707 mptsinp->inp_mstat.ms_total.ts_rxbytes;
708
709 tcp_ci.tcpci_tcp_info.tcpi_wifi_txbytes +=
710 mptsinp->inp_mstat.ms_wifi_infra.ts_txbytes +
711 mptsinp->inp_mstat.ms_wifi_non_infra.ts_txbytes;
712 tcp_ci.tcpci_tcp_info.tcpi_wifi_rxbytes +=
713 mptsinp->inp_mstat.ms_wifi_infra.ts_rxbytes +
714 mptsinp->inp_mstat.ms_wifi_non_infra.ts_rxbytes;
715
716 tcp_ci.tcpci_tcp_info.tcpi_wired_txbytes +=
717 mptsinp->inp_mstat.ms_wired.ts_txbytes;
718 tcp_ci.tcpci_tcp_info.tcpi_wired_rxbytes +=
719 mptsinp->inp_mstat.ms_wired.ts_rxbytes;
720
721 tcp_ci.tcpci_tcp_info.tcpi_cell_txbytes +=
722 mptsinp->inp_mstat.ms_cellular.ts_txbytes;
723 tcp_ci.tcpci_tcp_info.tcpi_cell_rxbytes +=
724 mptsinp->inp_mstat.ms_cellular.ts_rxbytes;
725 }
726 }
727
728 interface_info:
729 *aux_type = CIAUX_TCP;
730 if (*aux_len == 0) {
731 *aux_len = sizeof(tcp_ci);
732 } else if (aux_data != USER_ADDR_NULL) {
733 boolean_t create;
734
735 /*
736 * Finally, old subflows might have been closed - we
737 * want this data as well, so grab it from the interface
738 * stats.
739 */
740 create = orig_mpts != NULL;
741
742 /*
743 * When we found a subflow, we are willing to create a stats-index
744 * because we have some data to return. If there isn't a subflow,
745 * nor anything in the stats, return EINVAL. Because the
746 * ifindex belongs to something that doesn't exist.
747 */
748 index = mptcpstats_get_index_by_ifindex(mpte->mpte_itfstats, MPTCP_ITFSTATS_SIZE, (u_short)(*cid), false);
749 if (index == -1) {
750 os_log_error(mptcp_log_handle,
751 "%s - %lx: Asking for too many ifindex: %u subcount %u, mpts? %s\n",
752 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
753 *cid, mpte->mpte_numflows,
754 orig_mpts ? "yes" : "no");
755
756 if (orig_mpts == NULL) {
757 return EINVAL;
758 }
759 } else {
760 struct mptcp_itf_stats *stats;
761
762 stats = &mpte->mpte_itfstats[index];
763
764 /* Roll the itf-stats into the tcp_info */
765 tcp_ci.tcpci_tcp_info.tcpi_last_outif = *cid;
766 tcp_ci.tcpci_tcp_info.tcpi_txbytes +=
767 stats->mpis_txbytes;
768 tcp_ci.tcpci_tcp_info.tcpi_rxbytes +=
769 stats->mpis_rxbytes;
770
771 tcp_ci.tcpci_tcp_info.tcpi_wifi_txbytes +=
772 stats->mpis_wifi_txbytes;
773 tcp_ci.tcpci_tcp_info.tcpi_wifi_rxbytes +=
774 stats->mpis_wifi_rxbytes;
775
776 tcp_ci.tcpci_tcp_info.tcpi_wired_txbytes +=
777 stats->mpis_wired_txbytes;
778 tcp_ci.tcpci_tcp_info.tcpi_wired_rxbytes +=
779 stats->mpis_wired_rxbytes;
780
781 tcp_ci.tcpci_tcp_info.tcpi_cell_txbytes +=
782 stats->mpis_cell_txbytes;
783 tcp_ci.tcpci_tcp_info.tcpi_cell_rxbytes +=
784 stats->mpis_cell_rxbytes;
785 }
786
787 *aux_len = min(*aux_len, sizeof(tcp_ci));
788 error = copyout(&tcp_ci, aux_data, *aux_len);
789 if (error != 0) {
790 return error;
791 }
792 }
793 }
794
795 return 0;
796 }
797
798 /*
799 * User-protocol pru_control callback.
800 */
801 static int
mptcp_usr_control(struct socket * mp_so,u_long cmd,caddr_t __sized_by (IOCPARM_LEN (cmd))data,struct ifnet * ifp,struct proc * p)802 mptcp_usr_control(struct socket *mp_so, u_long cmd,
803 caddr_t __sized_by(IOCPARM_LEN(cmd)) data,
804 struct ifnet *ifp, struct proc *p)
805 {
806 #pragma unused(ifp, p)
807 struct mppcb *mpp = mpsotomppcb(mp_so);
808 struct mptses *mpte;
809 int error = 0;
810
811 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
812 error = EINVAL;
813 goto out;
814 }
815 mpte = mptompte(mpp);
816
817 switch (cmd) {
818 case SIOCGASSOCIDS32: { /* struct so_aidreq32 */
819 struct so_aidreq32 aidr;
820 bcopy(data, &aidr, sizeof(aidr));
821 error = mptcp_getassocids(mpte, &aidr.sar_cnt,
822 aidr.sar_aidp);
823 if (error == 0) {
824 bcopy(&aidr, data, sizeof(aidr));
825 }
826 break;
827 }
828
829 case SIOCGASSOCIDS64: { /* struct so_aidreq64 */
830 struct so_aidreq64 aidr;
831 bcopy(data, &aidr, sizeof(aidr));
832 error = mptcp_getassocids(mpte, &aidr.sar_cnt,
833 (user_addr_t)aidr.sar_aidp);
834 if (error == 0) {
835 bcopy(&aidr, data, sizeof(aidr));
836 }
837 break;
838 }
839
840 case SIOCGCONNIDS32: { /* struct so_cidreq32 */
841 struct so_cidreq32 cidr;
842 bcopy(data, &cidr, sizeof(cidr));
843 error = mptcp_getconnids(mpte, cidr.scr_aid, &cidr.scr_cnt,
844 cidr.scr_cidp);
845 if (error == 0) {
846 bcopy(&cidr, data, sizeof(cidr));
847 }
848 break;
849 }
850
851 case SIOCGCONNIDS64: { /* struct so_cidreq64 */
852 struct so_cidreq64 cidr;
853 bcopy(data, &cidr, sizeof(cidr));
854 error = mptcp_getconnids(mpte, cidr.scr_aid, &cidr.scr_cnt,
855 (user_addr_t)cidr.scr_cidp);
856 if (error == 0) {
857 bcopy(&cidr, data, sizeof(cidr));
858 }
859 break;
860 }
861
862 case SIOCGCONNINFO32: { /* struct so_cinforeq32 */
863 struct so_cinforeq32 cifr;
864 bcopy(data, &cifr, sizeof(cifr));
865 error = mptcp_getconninfo(mpte, &cifr.scir_cid,
866 &cifr.scir_flags, &cifr.scir_ifindex, &cifr.scir_error,
867 cifr.scir_src, &cifr.scir_src_len, cifr.scir_dst,
868 &cifr.scir_dst_len, &cifr.scir_aux_type, cifr.scir_aux_data,
869 &cifr.scir_aux_len);
870 if (error == 0) {
871 bcopy(&cifr, data, sizeof(cifr));
872 }
873 break;
874 }
875
876 case SIOCGCONNINFO64: { /* struct so_cinforeq64 */
877 struct so_cinforeq64 cifr;
878 bcopy(data, &cifr, sizeof(cifr));
879 error = mptcp_getconninfo(mpte, &cifr.scir_cid,
880 &cifr.scir_flags, &cifr.scir_ifindex, &cifr.scir_error,
881 (user_addr_t)cifr.scir_src, &cifr.scir_src_len,
882 (user_addr_t)cifr.scir_dst, &cifr.scir_dst_len,
883 &cifr.scir_aux_type, (user_addr_t)cifr.scir_aux_data,
884 &cifr.scir_aux_len);
885 if (error == 0) {
886 bcopy(&cifr, data, sizeof(cifr));
887 }
888 break;
889 }
890
891 default:
892 error = EOPNOTSUPP;
893 break;
894 }
895 out:
896 return error;
897 }
898
899 static int
mptcp_disconnect(struct mptses * mpte)900 mptcp_disconnect(struct mptses *mpte)
901 {
902 struct socket *mp_so;
903 struct mptcb *mp_tp;
904 int error = 0;
905
906 mp_so = mptetoso(mpte);
907 mp_tp = mpte->mpte_mptcb;
908
909 /* if we're not detached, go thru socket state checks */
910 if (!(mp_so->so_flags & SOF_PCBCLEARING) && !(mp_so->so_flags & SOF_DEFUNCT)) {
911 if (!(mp_so->so_state & (SS_ISCONNECTED |
912 SS_ISCONNECTING))) {
913 error = ENOTCONN;
914 goto out;
915 }
916 if (mp_so->so_state & SS_ISDISCONNECTING) {
917 error = EALREADY;
918 goto out;
919 }
920 }
921
922 mptcp_cancel_all_timers(mp_tp);
923 if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) {
924 mptcp_close(mpte, mp_tp);
925 } else if (((mp_so->so_options & SO_LINGER) &&
926 mp_so->so_linger == 0) ||
927 (mp_so->so_flags1 & SOF1_DEFUNCTINPROG)) {
928 mptcp_drop(mpte, mp_tp, 0);
929 } else {
930 soisdisconnecting(mp_so);
931 sbflush(&mp_so->so_rcv);
932 if (mptcp_usrclosed(mpte) != NULL) {
933 mptcp_output(mpte);
934 }
935 }
936
937 if (error == 0) {
938 mptcp_subflow_workloop(mpte);
939 }
940
941 out:
942 return error;
943 }
944
945 /*
946 * Wrapper function to support disconnect on socket
947 */
948 static int
mptcp_usr_disconnect(struct socket * mp_so)949 mptcp_usr_disconnect(struct socket *mp_so)
950 {
951 return mptcp_disconnect(mpsotompte(mp_so));
952 }
953
954 /*
955 * User-protocol pru_disconnectx callback.
956 */
957 static int
mptcp_usr_disconnectx(struct socket * mp_so,sae_associd_t aid,sae_connid_t cid)958 mptcp_usr_disconnectx(struct socket *mp_so, sae_associd_t aid, sae_connid_t cid)
959 {
960 if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL) {
961 return EINVAL;
962 }
963
964 if (cid != SAE_CONNID_ANY && cid != SAE_CONNID_ALL) {
965 return EINVAL;
966 }
967
968 return mptcp_usr_disconnect(mp_so);
969 }
970
971 void
mptcp_finish_usrclosed(struct mptses * mpte)972 mptcp_finish_usrclosed(struct mptses *mpte)
973 {
974 struct mptcb *mp_tp = mpte->mpte_mptcb;
975 struct socket *mp_so = mptetoso(mpte);
976
977 if (mp_tp->mpt_state == MPTCPS_CLOSED || mp_tp->mpt_state == MPTCPS_TERMINATE) {
978 mpte = mptcp_close(mpte, mp_tp);
979 } else if (mp_tp->mpt_state >= MPTCPS_FIN_WAIT_2) {
980 soisdisconnected(mp_so);
981 } else {
982 struct mptsub *mpts;
983
984 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
985 if ((mp_so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
986 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
987 mptcp_subflow_disconnect(mpte, mpts);
988 } else {
989 mptcp_subflow_shutdown(mpte, mpts);
990 }
991 }
992 }
993 }
994
995 /*
996 * User issued close, and wish to trail thru shutdown states.
997 */
998 static struct mptses *
mptcp_usrclosed(struct mptses * mpte)999 mptcp_usrclosed(struct mptses *mpte)
1000 {
1001 struct mptcb *mp_tp = mpte->mpte_mptcb;
1002
1003 mptcp_close_fsm(mp_tp, MPCE_CLOSE);
1004
1005 /* Not everything has been acknowledged - don't close the subflows! */
1006 if (mp_tp->mpt_state != MPTCPS_TERMINATE &&
1007 mp_tp->mpt_sndnxt + 1 != mp_tp->mpt_sndmax) {
1008 return mpte;
1009 }
1010
1011 mptcp_finish_usrclosed(mpte);
1012
1013 return mpte;
1014 }
1015
1016 /*
1017 * After a receive, possible send some update to peer.
1018 */
1019 static int
mptcp_usr_rcvd(struct socket * mp_so,int flags)1020 mptcp_usr_rcvd(struct socket *mp_so, int flags)
1021 {
1022 #pragma unused(flags)
1023 struct mppcb *mpp = mpsotomppcb(mp_so);
1024 struct mptses *mpte;
1025 struct mptsub *mpts;
1026 int error = 0;
1027
1028 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
1029 error = EINVAL;
1030 goto out;
1031 }
1032
1033 mpte = mptompte(mpp);
1034
1035 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1036 struct socket *so = mpts->mpts_socket;
1037
1038 if (so->so_proto->pr_flags & PR_WANTRCVD && so->so_pcb != NULL) {
1039 (*so->so_proto->pr_usrreqs->pru_rcvd)(so, 0);
1040 }
1041 }
1042
1043 error = mptcp_output(mpte);
1044 out:
1045 return error;
1046 }
1047
1048 /*
1049 * Do a send by putting data in the output queue.
1050 */
1051 static int
mptcp_usr_send(struct socket * mp_so,int prus_flags,struct mbuf * m,struct sockaddr * nam,struct mbuf * control,struct proc * p)1052 mptcp_usr_send(struct socket *mp_so, int prus_flags, struct mbuf *m,
1053 struct sockaddr *nam, struct mbuf *control, struct proc *p)
1054 {
1055 #pragma unused(nam, p)
1056 struct mppcb *mpp = mpsotomppcb(mp_so);
1057 struct mptses *mpte;
1058 int error = 0;
1059
1060 if (prus_flags & (PRUS_OOB | PRUS_EOF)) {
1061 error = EOPNOTSUPP;
1062 goto out;
1063 }
1064
1065 if (nam != NULL) {
1066 error = EOPNOTSUPP;
1067 goto out;
1068 }
1069
1070 if (control != NULL && control->m_len != 0) {
1071 error = EOPNOTSUPP;
1072 goto out;
1073 }
1074
1075 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
1076 error = ECONNRESET;
1077 goto out;
1078 }
1079 mpte = mptompte(mpp);
1080 VERIFY(mpte != NULL);
1081
1082 if (!(mp_so->so_state & SS_ISCONNECTED) &&
1083 !(mp_so->so_flags1 & SOF1_PRECONNECT_DATA)) {
1084 error = ENOTCONN;
1085 goto out;
1086 }
1087
1088 mptcp_insert_dsn(mpp, m);
1089 VERIFY(mp_so->so_snd.sb_flags & SB_NOCOMPRESS);
1090 sbappendstream(&mp_so->so_snd, m);
1091 m = NULL;
1092
1093 error = mptcp_output(mpte);
1094 if (error != 0) {
1095 goto out;
1096 }
1097
1098 if (mp_so->so_state & SS_ISCONNECTING) {
1099 if (mp_so->so_state & SS_NBIO) {
1100 error = EWOULDBLOCK;
1101 } else {
1102 error = sbwait(&mp_so->so_snd);
1103 }
1104 }
1105
1106 out:
1107 if (error) {
1108 if (m != NULL) {
1109 m_freem(m);
1110 }
1111 if (control != NULL) {
1112 m_freem(control);
1113 }
1114 }
1115 return error;
1116 }
1117
1118 /*
1119 * Mark the MPTCP connection as being incapable of further output.
1120 */
1121 static int
mptcp_usr_shutdown(struct socket * mp_so)1122 mptcp_usr_shutdown(struct socket *mp_so)
1123 {
1124 struct mppcb *mpp = mpsotomppcb(mp_so);
1125 struct mptses *mpte;
1126 int error = 0;
1127
1128 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
1129 error = EINVAL;
1130 goto out;
1131 }
1132 mpte = mptompte(mpp);
1133 VERIFY(mpte != NULL);
1134
1135 socantsendmore(mp_so);
1136
1137 mpte = mptcp_usrclosed(mpte);
1138 if (mpte != NULL) {
1139 error = mptcp_output(mpte);
1140 }
1141 out:
1142 return error;
1143 }
1144
1145 /*
1146 * Copy the contents of uio into a properly sized mbuf chain.
1147 */
1148 static int
mptcp_uiotombuf(struct uio * uio,int how,user_ssize_t space,struct mbuf ** top)1149 mptcp_uiotombuf(struct uio *uio, int how, user_ssize_t space, struct mbuf **top)
1150 {
1151 struct mbuf *m, *mb, *nm = NULL, *mtail = NULL;
1152 int progress, len, error;
1153 user_ssize_t resid, tot;
1154
1155 VERIFY(top != NULL && *top == NULL);
1156
1157 /*
1158 * space can be zero or an arbitrary large value bound by
1159 * the total data supplied by the uio.
1160 */
1161 resid = uio_resid(uio);
1162 if (space > 0) {
1163 tot = MIN(resid, space);
1164 } else {
1165 tot = resid;
1166 }
1167
1168 if (tot < 0 || tot > INT_MAX) {
1169 return EINVAL;
1170 }
1171
1172 len = (int)tot;
1173 if (len == 0) {
1174 len = 1;
1175 }
1176
1177 /* Loop and append maximum sized mbufs to the chain tail. */
1178 while (len > 0) {
1179 uint32_t m_needed = 1;
1180
1181 if (len > MBIGCLBYTES) {
1182 mb = m_getpackets_internal(&m_needed, 1,
1183 how, 1, M16KCLBYTES);
1184 } else if (len > MCLBYTES) {
1185 mb = m_getpackets_internal(&m_needed, 1,
1186 how, 1, MBIGCLBYTES);
1187 } else if (len >= (signed)MINCLSIZE) {
1188 mb = m_getpackets_internal(&m_needed, 1,
1189 how, 1, MCLBYTES);
1190 } else {
1191 mb = m_gethdr(how, MT_DATA);
1192 }
1193
1194 /* Fail the whole operation if one mbuf can't be allocated. */
1195 if (mb == NULL) {
1196 if (nm != NULL) {
1197 m_freem(nm);
1198 }
1199 return ENOBUFS;
1200 }
1201
1202 /* Book keeping. */
1203 VERIFY(mb->m_flags & M_PKTHDR);
1204 len -= ((mb->m_flags & M_EXT) ? mb->m_ext.ext_size : MHLEN);
1205 if (mtail != NULL) {
1206 mtail->m_next = mb;
1207 } else {
1208 nm = mb;
1209 }
1210 mtail = mb;
1211 }
1212
1213 m = nm;
1214
1215 progress = 0;
1216 /* Fill all mbufs with uio data and update header information. */
1217 for (mb = m; mb != NULL; mb = mb->m_next) {
1218 /* tot >= 0 && tot <= INT_MAX (see above) */
1219 len = MIN((int)M_TRAILINGSPACE(mb), (int)(tot - progress));
1220
1221 error = uiomove(mtod(mb, char *), len, uio);
1222 if (error != 0) {
1223 m_freem(m);
1224 return error;
1225 }
1226
1227 /* each mbuf is M_PKTHDR chained via m_next */
1228 mb->m_len = len;
1229 mb->m_pkthdr.len = len;
1230
1231 progress += len;
1232 }
1233 VERIFY(progress == tot);
1234 *top = m;
1235 return 0;
1236 }
1237
1238 /*
1239 * MPTCP socket protocol-user socket send routine, derived from sosend().
1240 */
1241 static int
mptcp_usr_sosend(struct socket * mp_so,struct sockaddr * addr,struct uio * uio,struct mbuf * top,struct mbuf * control,int flags)1242 mptcp_usr_sosend(struct socket *mp_so, struct sockaddr *addr, struct uio *uio,
1243 struct mbuf *top, struct mbuf *control, int flags)
1244 {
1245 #pragma unused(addr)
1246 user_ssize_t resid, space;
1247 int error, sendflags;
1248 struct proc *p = current_proc();
1249 int sblocked = 0;
1250
1251 /* UIO is required for now, due to per-mbuf M_PKTHDR constrains */
1252 if (uio == NULL || top != NULL) {
1253 error = EINVAL;
1254 goto out;
1255 }
1256 resid = uio_resid(uio);
1257
1258 socket_lock(mp_so, 1);
1259 so_update_last_owner_locked(mp_so, p);
1260 so_update_policy(mp_so);
1261
1262 VERIFY(mp_so->so_type == SOCK_STREAM);
1263 VERIFY(!(mp_so->so_flags & SOF_MP_SUBFLOW));
1264
1265 if (flags & (MSG_OOB | MSG_DONTROUTE)) {
1266 error = EOPNOTSUPP;
1267 socket_unlock(mp_so, 1);
1268 goto out;
1269 }
1270
1271 /*
1272 * In theory resid should be unsigned. However, space must be
1273 * signed, as it might be less than 0 if we over-committed, and we
1274 * must use a signed comparison of space and resid. On the other
1275 * hand, a negative resid causes us to loop sending 0-length
1276 * segments to the protocol.
1277 */
1278 if (resid < 0 || resid > INT_MAX ||
1279 (flags & MSG_EOR) || control != NULL) {
1280 error = EINVAL;
1281 socket_unlock(mp_so, 1);
1282 goto out;
1283 }
1284
1285 OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgsnd);
1286
1287 do {
1288 error = sosendcheck(mp_so, NULL, resid, 0, 0, flags,
1289 &sblocked);
1290 if (error != 0) {
1291 goto release;
1292 }
1293
1294 space = sbspace(&mp_so->so_snd);
1295 do {
1296 socket_unlock(mp_so, 0);
1297 /*
1298 * Copy the data from userland into an mbuf chain.
1299 */
1300 error = mptcp_uiotombuf(uio, M_WAITOK, space, &top);
1301 if (error != 0) {
1302 socket_lock(mp_so, 0);
1303 goto release;
1304 }
1305 VERIFY(top != NULL);
1306 space -= resid - uio_resid(uio);
1307 resid = uio_resid(uio);
1308 socket_lock(mp_so, 0);
1309
1310 /*
1311 * Compute flags here, for pru_send and NKEs.
1312 */
1313 sendflags = (resid > 0 && space > 0) ?
1314 PRUS_MORETOCOME : 0;
1315
1316 /*
1317 * Socket filter processing
1318 */
1319 VERIFY(control == NULL);
1320 error = sflt_data_out(mp_so, NULL, &top, &control, 0);
1321 if (error != 0) {
1322 if (error == EJUSTRETURN) {
1323 error = 0;
1324 top = NULL;
1325 /* always free control if any */
1326 }
1327 goto release;
1328 }
1329 if (control != NULL) {
1330 m_freem(control);
1331 control = NULL;
1332 }
1333
1334 /*
1335 * Pass data to protocol.
1336 */
1337 error = (*mp_so->so_proto->pr_usrreqs->pru_send)
1338 (mp_so, sendflags, top, NULL, NULL, p);
1339 if (error == EJUSTRETURN) {
1340 error = 0;
1341 }
1342
1343 top = NULL;
1344 if (error != 0) {
1345 goto release;
1346 }
1347 } while (resid != 0 && space > 0);
1348 } while (resid != 0);
1349
1350 release:
1351 if (sblocked) {
1352 sbunlock(&mp_so->so_snd, FALSE); /* will unlock socket */
1353 } else {
1354 socket_unlock(mp_so, 1);
1355 }
1356 out:
1357 if (top != NULL) {
1358 m_freem(top);
1359 }
1360 if (control != NULL) {
1361 m_freem(control);
1362 }
1363
1364 soclearfastopen(mp_so);
1365
1366 return error;
1367 }
1368
1369 /*
1370 * Called to filter SOPT_{SET,GET} for SOL_SOCKET level socket options.
1371 * This routine simply indicates to the caller whether or not to proceed
1372 * further with the given socket option. This is invoked by sosetoptlock()
1373 * and sogetoptlock().
1374 */
1375 static int
mptcp_usr_socheckopt(struct socket * mp_so,struct sockopt * sopt)1376 mptcp_usr_socheckopt(struct socket *mp_so, struct sockopt *sopt)
1377 {
1378 #pragma unused(mp_so)
1379 int error = 0;
1380
1381 VERIFY(sopt->sopt_level == SOL_SOCKET);
1382
1383 /*
1384 * We could check for sopt_dir (set/get) here, but we'll just
1385 * let the caller deal with it as appropriate; therefore the
1386 * following is a superset of the socket options which we
1387 * allow for set/get.
1388 *
1389 * XXX: [email protected]
1390 *
1391 * Need to consider the following cases:
1392 *
1393 * a. Certain socket options don't have a clear definition
1394 * on the expected behavior post connect(2). At the time
1395 * those options are issued on the MP socket, there may
1396 * be existing subflow sockets that are already connected.
1397 */
1398 switch (sopt->sopt_name) {
1399 case SO_LINGER: /* MP */
1400 case SO_LINGER_SEC: /* MP */
1401 case SO_TYPE: /* MP */
1402 case SO_NREAD: /* MP */
1403 case SO_NWRITE: /* MP */
1404 case SO_ERROR: /* MP */
1405 case SO_SNDBUF: /* MP */
1406 case SO_RCVBUF: /* MP */
1407 case SO_SNDLOWAT: /* MP */
1408 case SO_RCVLOWAT: /* MP */
1409 case SO_SNDTIMEO: /* MP */
1410 case SO_RCVTIMEO: /* MP */
1411 case SO_NKE: /* MP */
1412 case SO_NOSIGPIPE: /* MP */
1413 case SO_NOADDRERR: /* MP */
1414 case SO_LABEL: /* MP */
1415 case SO_PEERLABEL: /* MP */
1416 case SO_DEFUNCTIT: /* MP */
1417 case SO_DEFUNCTOK: /* MP */
1418 case SO_ISDEFUNCT: /* MP */
1419 case SO_TRAFFIC_CLASS_DBG: /* MP */
1420 case SO_DELEGATED: /* MP */
1421 case SO_DELEGATED_UUID: /* MP */
1422 #if NECP
1423 case SO_NECP_ATTRIBUTES:
1424 case SO_NECP_CLIENTUUID:
1425 #endif /* NECP */
1426 case SO_MPKL_SEND_INFO:
1427 /*
1428 * Tell the caller that these options are to be processed.
1429 */
1430 break;
1431
1432 case SO_DEBUG: /* MP + subflow */
1433 case SO_KEEPALIVE: /* MP + subflow */
1434 case SO_USELOOPBACK: /* MP + subflow */
1435 case SO_RANDOMPORT: /* MP + subflow */
1436 case SO_TRAFFIC_CLASS: /* MP + subflow */
1437 case SO_RECV_TRAFFIC_CLASS: /* MP + subflow */
1438 case SO_PRIVILEGED_TRAFFIC_CLASS: /* MP + subflow */
1439 case SO_RECV_ANYIF: /* MP + subflow */
1440 case SO_RESTRICTIONS: /* MP + subflow */
1441 case SO_FLUSH: /* MP + subflow */
1442 case SO_NOWAKEFROMSLEEP:
1443 case SO_NOAPNFALLBK:
1444 case SO_MARK_CELLFALLBACK:
1445 case SO_MARK_CELLFALLBACK_UUID:
1446 case SO_MARK_KNOWN_TRACKER:
1447 case SO_MARK_KNOWN_TRACKER_NON_APP_INITIATED:
1448 case SO_MARK_APPROVED_APP_DOMAIN:
1449 case SO_FALLBACK_MODE:
1450 /*
1451 * Tell the caller that these options are to be processed;
1452 * these will also be recorded later by mptcp_setopt().
1453 *
1454 * NOTE: Only support integer option value for now.
1455 */
1456 if (sopt->sopt_valsize != sizeof(int)) {
1457 error = EINVAL;
1458 }
1459 break;
1460
1461 default:
1462 /*
1463 * Tell the caller to stop immediately and return an error.
1464 */
1465 error = ENOPROTOOPT;
1466 break;
1467 }
1468
1469 return error;
1470 }
1471
1472 /*
1473 * Issue SOPT_SET for all MPTCP subflows (for integer option values.)
1474 */
1475 static int
mptcp_setopt_apply(struct mptses * mpte,struct mptopt * mpo)1476 mptcp_setopt_apply(struct mptses *mpte, struct mptopt *mpo)
1477 {
1478 struct socket *mp_so;
1479 struct mptsub *mpts;
1480 struct mptopt smpo;
1481 int error = 0;
1482
1483 /* just bail now if this isn't applicable to subflow sockets */
1484 if (!(mpo->mpo_flags & MPOF_SUBFLOW_OK)) {
1485 error = ENOPROTOOPT;
1486 goto out;
1487 }
1488
1489 /*
1490 * Skip those that are handled internally; these options
1491 * should not have been recorded and marked with the
1492 * MPOF_SUBFLOW_OK by mptcp_setopt(), but just in case.
1493 */
1494 if (mpo->mpo_level == SOL_SOCKET &&
1495 (mpo->mpo_name == SO_NOSIGPIPE || mpo->mpo_name == SO_NOADDRERR)) {
1496 error = ENOPROTOOPT;
1497 goto out;
1498 }
1499
1500 mp_so = mptetoso(mpte);
1501
1502 /*
1503 * Don't bother going further if there's no subflow; mark the option
1504 * with MPOF_INTERIM so that we know whether or not to remove this
1505 * option upon encountering an error while issuing it during subflow
1506 * socket creation.
1507 */
1508 if (mpte->mpte_numflows == 0) {
1509 VERIFY(TAILQ_EMPTY(&mpte->mpte_subflows));
1510 mpo->mpo_flags |= MPOF_INTERIM;
1511 /* return success */
1512 goto out;
1513 }
1514
1515 bzero(&smpo, sizeof(smpo));
1516 smpo.mpo_flags |= MPOF_SUBFLOW_OK;
1517 smpo.mpo_level = mpo->mpo_level;
1518 smpo.mpo_name = mpo->mpo_name;
1519
1520 /* grab exisiting values in case we need to rollback */
1521 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1522 struct socket *so;
1523
1524 mpts->mpts_flags &= ~(MPTSF_SOPT_OLDVAL | MPTSF_SOPT_INPROG);
1525 mpts->mpts_oldintval = 0;
1526 smpo.mpo_intval = 0;
1527 VERIFY(mpts->mpts_socket != NULL);
1528 so = mpts->mpts_socket;
1529 if (mptcp_subflow_sogetopt(mpte, so, &smpo) == 0) {
1530 mpts->mpts_flags |= MPTSF_SOPT_OLDVAL;
1531 mpts->mpts_oldintval = smpo.mpo_intval;
1532 }
1533 }
1534
1535 /* apply socket option */
1536 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1537 struct socket *so;
1538
1539 mpts->mpts_flags |= MPTSF_SOPT_INPROG;
1540 VERIFY(mpts->mpts_socket != NULL);
1541 so = mpts->mpts_socket;
1542 error = mptcp_subflow_sosetopt(mpte, mpts, mpo);
1543 if (error != 0) {
1544 break;
1545 }
1546 }
1547
1548 /* cleanup, and rollback if needed */
1549 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1550 struct socket *so;
1551
1552 if (!(mpts->mpts_flags & MPTSF_SOPT_INPROG)) {
1553 /* clear in case it's set */
1554 mpts->mpts_flags &= ~MPTSF_SOPT_OLDVAL;
1555 mpts->mpts_oldintval = 0;
1556 continue;
1557 }
1558 if (!(mpts->mpts_flags & MPTSF_SOPT_OLDVAL)) {
1559 mpts->mpts_flags &= ~MPTSF_SOPT_INPROG;
1560 VERIFY(mpts->mpts_oldintval == 0);
1561 continue;
1562 }
1563 /* error during sosetopt, so roll it back */
1564 if (error != 0) {
1565 VERIFY(mpts->mpts_socket != NULL);
1566 so = mpts->mpts_socket;
1567 smpo.mpo_intval = mpts->mpts_oldintval;
1568 mptcp_subflow_sosetopt(mpte, mpts, &smpo);
1569 }
1570 mpts->mpts_oldintval = 0;
1571 mpts->mpts_flags &= ~(MPTSF_SOPT_OLDVAL | MPTSF_SOPT_INPROG);
1572 }
1573
1574 out:
1575 return error;
1576 }
1577
1578 /*
1579 * Handle SOPT_SET for socket options issued on MP socket.
1580 */
1581 static int
mptcp_setopt(struct mptses * mpte,struct sockopt * sopt)1582 mptcp_setopt(struct mptses *mpte, struct sockopt *sopt)
1583 {
1584 int error = 0, optval = 0, level, optname, rec = 1;
1585 struct mptopt smpo, *mpo = NULL;
1586 struct socket *mp_so;
1587
1588 level = sopt->sopt_level;
1589 optname = sopt->sopt_name;
1590
1591 mp_so = mptetoso(mpte);
1592
1593 VERIFY(!(mpsotomppcb(mp_so)->mpp_flags & MPP_INSIDE_SETGETOPT));
1594 mpsotomppcb(mp_so)->mpp_flags |= MPP_INSIDE_SETGETOPT;
1595
1596 /*
1597 * Record socket options which are applicable to subflow sockets so
1598 * that we can replay them for new ones; see mptcp_usr_socheckopt()
1599 * for the list of eligible socket-level options.
1600 */
1601 if (level == SOL_SOCKET) {
1602 switch (optname) {
1603 case SO_DEBUG:
1604 case SO_KEEPALIVE:
1605 case SO_USELOOPBACK:
1606 case SO_RANDOMPORT:
1607 case SO_TRAFFIC_CLASS:
1608 case SO_RECV_TRAFFIC_CLASS:
1609 case SO_PRIVILEGED_TRAFFIC_CLASS:
1610 case SO_RECV_ANYIF:
1611 case SO_RESTRICTIONS:
1612 case SO_NOWAKEFROMSLEEP:
1613 case SO_NOAPNFALLBK:
1614 case SO_MARK_CELLFALLBACK:
1615 case SO_MARK_KNOWN_TRACKER:
1616 case SO_MARK_KNOWN_TRACKER_NON_APP_INITIATED:
1617 case SO_MARK_APPROVED_APP_DOMAIN:
1618 case SO_FALLBACK_MODE:
1619 /* record it */
1620 break;
1621 case SO_FLUSH:
1622 /* don't record it */
1623 rec = 0;
1624 break;
1625
1626 /* Next ones, record at MPTCP-level */
1627 case SO_DELEGATED:
1628 error = sooptcopyin(sopt, &mpte->mpte_epid,
1629 sizeof(int), sizeof(int));
1630 if (error != 0) {
1631 goto err_out;
1632 }
1633
1634 goto out;
1635 case SO_DELEGATED_UUID:
1636 error = sooptcopyin(sopt, &mpte->mpte_euuid,
1637 sizeof(uuid_t), sizeof(uuid_t));
1638 if (error != 0) {
1639 goto err_out;
1640 }
1641
1642 goto out;
1643 #if NECP
1644 case SO_NECP_CLIENTUUID:
1645 if (!uuid_is_null(mpsotomppcb(mp_so)->necp_client_uuid)) {
1646 error = EINVAL;
1647 goto err_out;
1648 }
1649
1650 error = sooptcopyin(sopt, &mpsotomppcb(mp_so)->necp_client_uuid,
1651 sizeof(uuid_t), sizeof(uuid_t));
1652 if (error != 0) {
1653 goto err_out;
1654 }
1655
1656 mpsotomppcb(mp_so)->necp_cb = mptcp_session_necp_cb;
1657 error = necp_client_register_multipath_cb(mp_so->last_pid,
1658 mpsotomppcb(mp_so)->necp_client_uuid,
1659 mpsotomppcb(mp_so));
1660 if (error) {
1661 goto err_out;
1662 }
1663
1664 if (uuid_is_null(mpsotomppcb(mp_so)->necp_client_uuid)) {
1665 error = EINVAL;
1666 goto err_out;
1667 }
1668
1669 goto out;
1670 case SO_NECP_ATTRIBUTES:
1671 error = necp_set_socket_attributes(&mpsotomppcb(mp_so)->inp_necp_attributes, sopt);
1672 if (error) {
1673 goto err_out;
1674 }
1675
1676 goto out;
1677 #endif /* NECP */
1678 default:
1679 /* nothing to do; just return */
1680 goto out;
1681 }
1682 } else if (sopt->sopt_level == IPPROTO_IP) {
1683 switch (optname) {
1684 case IP_TOS:
1685 /* eligible; record it */
1686 break;
1687 default:
1688 /* not eligible */
1689 error = ENOPROTOOPT;
1690 goto err_out;
1691 }
1692 } else if (sopt->sopt_level == IPPROTO_IPV6) {
1693 switch (optname) {
1694 case IPV6_TCLASS:
1695 /* eligible; record it */
1696 break;
1697 default:
1698 /* not eligible */
1699 error = ENOPROTOOPT;
1700 goto err_out;
1701 }
1702 } else {
1703 switch (optname) {
1704 case TCP_NODELAY:
1705 case TCP_RXT_FINDROP:
1706 case TCP_KEEPALIVE:
1707 case TCP_KEEPINTVL:
1708 case TCP_KEEPCNT:
1709 case TCP_CONNECTIONTIMEOUT:
1710 case TCP_RXT_CONNDROPTIME:
1711 case PERSIST_TIMEOUT:
1712 case TCP_ADAPTIVE_READ_TIMEOUT:
1713 case TCP_ADAPTIVE_WRITE_TIMEOUT:
1714 case TCP_FASTOPEN_FORCE_ENABLE:
1715 /* eligible; record it */
1716 break;
1717 case TCP_NOTSENT_LOWAT:
1718 /* record at MPTCP level */
1719 error = sooptcopyin(sopt, &optval, sizeof(optval),
1720 sizeof(optval));
1721 if (error) {
1722 goto err_out;
1723 }
1724 if (optval < 0) {
1725 error = EINVAL;
1726 goto err_out;
1727 } else {
1728 if (optval == 0) {
1729 mp_so->so_flags &= ~SOF_NOTSENT_LOWAT;
1730 error = mptcp_set_notsent_lowat(mpte, 0);
1731 } else {
1732 mp_so->so_flags |= SOF_NOTSENT_LOWAT;
1733 error = mptcp_set_notsent_lowat(mpte,
1734 optval);
1735 }
1736
1737 if (error) {
1738 goto err_out;
1739 }
1740 }
1741 goto out;
1742 case MPTCP_SERVICE_TYPE:
1743 /* record at MPTCP level */
1744 error = sooptcopyin(sopt, &optval, sizeof(optval),
1745 sizeof(optval));
1746 if (error) {
1747 goto err_out;
1748 }
1749 if (optval < 0 || optval >= MPTCP_SVCTYPE_MAX) {
1750 error = EINVAL;
1751 goto err_out;
1752 }
1753
1754 if (mptcp_entitlement_check(mp_so, (uint8_t)optval) < 0) {
1755 error = EACCES;
1756 goto err_out;
1757 }
1758
1759 mpte->mpte_svctype = (uint8_t)optval;
1760 mpte->mpte_flags |= MPTE_SVCTYPE_CHECKED;
1761
1762 goto out;
1763 case MPTCP_ALTERNATE_PORT:
1764 /* record at MPTCP level */
1765 error = sooptcopyin(sopt, &optval, sizeof(optval),
1766 sizeof(optval));
1767 if (error) {
1768 goto err_out;
1769 }
1770
1771 if (optval < 0 || optval > UINT16_MAX) {
1772 error = EINVAL;
1773 goto err_out;
1774 }
1775
1776 mpte->mpte_alternate_port = (uint16_t)optval;
1777
1778 goto out;
1779 case MPTCP_FORCE_ENABLE:
1780 /* record at MPTCP level */
1781 error = sooptcopyin(sopt, &optval, sizeof(optval),
1782 sizeof(optval));
1783 if (error) {
1784 goto err_out;
1785 }
1786
1787 if (optval < 0 || optval > 1) {
1788 error = EINVAL;
1789 goto err_out;
1790 }
1791
1792 if (optval) {
1793 mpte->mpte_flags |= MPTE_FORCE_ENABLE;
1794 } else {
1795 mpte->mpte_flags &= ~MPTE_FORCE_ENABLE;
1796 }
1797
1798 goto out;
1799 case MPTCP_FORCE_VERSION:
1800 error = sooptcopyin(sopt, &optval, sizeof(optval),
1801 sizeof(optval));
1802 if (error) {
1803 goto err_out;
1804 }
1805
1806 if (optval != 0 && optval != 1) {
1807 error = EINVAL;
1808 goto err_out;
1809 }
1810
1811 if (optval == 0) {
1812 mpte->mpte_flags |= MPTE_FORCE_V0;
1813 mpte->mpte_flags &= ~MPTE_FORCE_V1;
1814 } else {
1815 mpte->mpte_flags |= MPTE_FORCE_V1;
1816 mpte->mpte_flags &= ~MPTE_FORCE_V0;
1817 }
1818
1819 goto out;
1820 case MPTCP_EXPECTED_PROGRESS_TARGET:
1821 {
1822 struct mptcb *mp_tp = mpte->mpte_mptcb;
1823 uint64_t mach_time_target;
1824 uint64_t nanoseconds;
1825
1826 if (mpte->mpte_svctype != MPTCP_SVCTYPE_TARGET_BASED) {
1827 os_log(mptcp_log_handle, "%s - %lx: Can't set urgent activity when svctype is %u\n",
1828 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpte->mpte_svctype);
1829 error = EINVAL;
1830 goto err_out;
1831 }
1832
1833 error = sooptcopyin(sopt, &mach_time_target, sizeof(mach_time_target), sizeof(mach_time_target));
1834 if (error) {
1835 goto err_out;
1836 }
1837
1838 if (!mptcp_ok_to_create_subflows(mp_tp)) {
1839 os_log(mptcp_log_handle, "%s - %lx: Not ok to create subflows, state %u flags %#x\n",
1840 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mp_tp->mpt_state, mp_tp->mpt_flags);
1841 error = EINVAL;
1842 goto err_out;
1843 }
1844
1845 if (mach_time_target) {
1846 uint64_t time_now = 0;
1847 uint64_t time_now_nanoseconds;
1848
1849 absolutetime_to_nanoseconds(mach_time_target, &nanoseconds);
1850 nanoseconds = nanoseconds - (mptcp_expected_progress_headstart * NSEC_PER_MSEC);
1851
1852 time_now = mach_continuous_time();
1853 absolutetime_to_nanoseconds(time_now, &time_now_nanoseconds);
1854
1855 nanoseconds_to_absolutetime(nanoseconds, &mach_time_target);
1856 /* If the timer is already running and it would
1857 * fire in less than mptcp_expected_progress_headstart
1858 * seconds, then it's not worth canceling it.
1859 */
1860 if (mpte->mpte_time_target &&
1861 mpte->mpte_time_target < time_now &&
1862 time_now_nanoseconds > nanoseconds - (mptcp_expected_progress_headstart * NSEC_PER_MSEC)) {
1863 os_log(mptcp_log_handle, "%s - %lx: Not rescheduling timer %llu now %llu target %llu\n",
1864 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
1865 mpte->mpte_time_target,
1866 time_now,
1867 mach_time_target);
1868 goto out;
1869 }
1870 }
1871
1872 mpte->mpte_time_target = mach_time_target;
1873 mptcp_set_urgency_timer(mpte);
1874
1875 goto out;
1876 }
1877 default:
1878 /* not eligible */
1879 error = ENOPROTOOPT;
1880 goto err_out;
1881 }
1882 }
1883
1884 if ((error = sooptcopyin(sopt, &optval, sizeof(optval),
1885 sizeof(optval))) != 0) {
1886 goto err_out;
1887 }
1888
1889 if (rec) {
1890 /* search for an existing one; if not found, allocate */
1891 if ((mpo = mptcp_sopt_find(mpte, sopt)) == NULL) {
1892 mpo = mptcp_sopt_alloc();
1893 }
1894
1895 /* initialize or update, as needed */
1896 mpo->mpo_intval = optval;
1897 if (!(mpo->mpo_flags & MPOF_ATTACHED)) {
1898 mpo->mpo_level = level;
1899 mpo->mpo_name = optname;
1900 mptcp_sopt_insert(mpte, mpo);
1901 }
1902 /* this can be issued on the subflow socket */
1903 mpo->mpo_flags |= MPOF_SUBFLOW_OK;
1904 } else {
1905 bzero(&smpo, sizeof(smpo));
1906 mpo = &smpo;
1907 mpo->mpo_flags |= MPOF_SUBFLOW_OK;
1908 mpo->mpo_level = level;
1909 mpo->mpo_name = optname;
1910 mpo->mpo_intval = optval;
1911 }
1912
1913 /* issue this socket option on existing subflows */
1914 error = mptcp_setopt_apply(mpte, mpo);
1915 if (error != 0 && (mpo->mpo_flags & MPOF_ATTACHED)) {
1916 VERIFY(mpo != &smpo);
1917 mptcp_sopt_remove(mpte, mpo);
1918 mptcp_sopt_free(mpo);
1919 }
1920 if (mpo == &smpo) {
1921 mpo->mpo_flags &= ~MPOF_INTERIM;
1922 }
1923
1924 if (error) {
1925 goto err_out;
1926 }
1927
1928 out:
1929
1930 mpsotomppcb(mp_so)->mpp_flags &= ~MPP_INSIDE_SETGETOPT;
1931 return 0;
1932
1933 err_out:
1934 os_log_error(mptcp_log_handle, "%s - %lx: sopt %s (%d, %d) val %d can't be issued error %d\n",
1935 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
1936 mptcp_sopt2str(level, optname), level, optname, optval, error);
1937 mpsotomppcb(mp_so)->mpp_flags &= ~MPP_INSIDE_SETGETOPT;
1938 return error;
1939 }
1940
1941 static void
mptcp_fill_info_bytestats(struct tcp_info * ti,struct mptses * mpte)1942 mptcp_fill_info_bytestats(struct tcp_info *ti, struct mptses *mpte)
1943 {
1944 struct mptsub *mpts;
1945 int i;
1946
1947 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1948 const struct inpcb *inp = sotoinpcb(mpts->mpts_socket);
1949
1950 if (inp == NULL) {
1951 continue;
1952 }
1953
1954 ti->tcpi_txbytes += inp->inp_mstat.ms_total.ts_txbytes;
1955 ti->tcpi_rxbytes += inp->inp_mstat.ms_total.ts_rxbytes;
1956 ti->tcpi_cell_txbytes += inp->inp_mstat.ms_cellular.ts_txbytes;
1957 ti->tcpi_cell_rxbytes += inp->inp_mstat.ms_cellular.ts_rxbytes;
1958 ti->tcpi_wifi_txbytes += inp->inp_mstat.ms_wifi_infra.ts_txbytes +
1959 inp->inp_mstat.ms_wifi_non_infra.ts_txbytes;
1960 ti->tcpi_wifi_rxbytes += inp->inp_mstat.ms_wifi_infra.ts_rxbytes +
1961 inp->inp_mstat.ms_wifi_non_infra.ts_rxbytes;
1962 ti->tcpi_wired_txbytes += inp->inp_mstat.ms_wired.ts_txbytes;
1963 ti->tcpi_wired_rxbytes += inp->inp_mstat.ms_wired.ts_rxbytes;
1964 }
1965
1966 for (i = 0; i < MPTCP_ITFSTATS_SIZE; i++) {
1967 struct mptcp_itf_stats *stats = &mpte->mpte_itfstats[i];
1968
1969 ti->tcpi_txbytes += stats->mpis_txbytes;
1970 ti->tcpi_rxbytes += stats->mpis_rxbytes;
1971
1972 ti->tcpi_wifi_txbytes += stats->mpis_wifi_txbytes;
1973 ti->tcpi_wifi_rxbytes += stats->mpis_wifi_rxbytes;
1974
1975 ti->tcpi_wired_txbytes += stats->mpis_wired_txbytes;
1976 ti->tcpi_wired_rxbytes += stats->mpis_wired_rxbytes;
1977
1978 ti->tcpi_cell_txbytes += stats->mpis_cell_txbytes;
1979 ti->tcpi_cell_rxbytes += stats->mpis_cell_rxbytes;
1980 }
1981 }
1982
1983 static void
mptcp_fill_info(struct mptses * mpte,struct tcp_info * ti)1984 mptcp_fill_info(struct mptses *mpte, struct tcp_info *ti)
1985 {
1986 struct mptsub *actsub = mpte->mpte_active_sub;
1987 struct mptcb *mp_tp = mpte->mpte_mptcb;
1988 struct tcpcb *acttp = NULL;
1989
1990 if (actsub) {
1991 acttp = sototcpcb(actsub->mpts_socket);
1992 }
1993
1994 bzero(ti, sizeof(*ti));
1995
1996 ti->tcpi_state = (uint8_t)mp_tp->mpt_state;
1997 /* tcpi_options */
1998 /* tcpi_snd_wscale */
1999 /* tcpi_rcv_wscale */
2000 /* tcpi_flags */
2001 if (acttp) {
2002 ti->tcpi_rto = acttp->t_timer[TCPT_REXMT] ? acttp->t_rxtcur : 0;
2003 }
2004
2005 /* tcpi_snd_mss */
2006 /* tcpi_rcv_mss */
2007 if (acttp) {
2008 ti->tcpi_rttcur = acttp->t_rttcur;
2009 ti->tcpi_srtt = acttp->t_srtt >> TCP_RTT_SHIFT;
2010 ti->tcpi_rttvar = acttp->t_rttvar >> TCP_RTTVAR_SHIFT;
2011 ti->tcpi_rttbest = acttp->t_rttbest >> TCP_RTT_SHIFT;
2012 ti->tcpi_rcv_srtt = acttp->rcv_srtt >> TCP_RTT_SHIFT;
2013 }
2014 /* tcpi_snd_ssthresh */
2015 /* tcpi_snd_cwnd */
2016 /* tcpi_rcv_space */
2017 ti->tcpi_snd_wnd = mp_tp->mpt_sndwnd;
2018 ti->tcpi_snd_nxt = (uint32_t)mp_tp->mpt_sndnxt;
2019 ti->tcpi_rcv_nxt = (uint32_t)mp_tp->mpt_rcvnxt;
2020 if (acttp) {
2021 ti->tcpi_last_outif = (acttp->t_inpcb->inp_last_outifp == NULL) ? 0 :
2022 acttp->t_inpcb->inp_last_outifp->if_index;
2023 }
2024
2025 mptcp_fill_info_bytestats(ti, mpte);
2026 /* tcpi_txpackets */
2027
2028 /* tcpi_txretransmitbytes */
2029 /* tcpi_txunacked */
2030 /* tcpi_rxpackets */
2031
2032 /* tcpi_rxduplicatebytes */
2033 /* tcpi_rxoutoforderbytes */
2034 /* tcpi_snd_bw */
2035 /* tcpi_synrexmits */
2036 /* tcpi_unused1 */
2037 /* tcpi_unused2 */
2038 /* tcpi_cell_rxpackets */
2039
2040 /* tcpi_cell_txpackets */
2041
2042 /* tcpi_wifi_rxpackets */
2043
2044 /* tcpi_wifi_txpackets */
2045
2046 /* tcpi_wired_rxpackets */
2047 /* tcpi_wired_txpackets */
2048 /* tcpi_connstatus */
2049 /* TFO-stuff */
2050 /* ECN stuff */
2051 /* tcpi_ecn_recv_ce */
2052 /* tcpi_ecn_recv_cwr */
2053 if (acttp) {
2054 ti->tcpi_rcvoopack = acttp->t_rcvoopack;
2055 }
2056 /* tcpi_pawsdrop */
2057 /* tcpi_sack_recovery_episode */
2058 /* tcpi_reordered_pkts */
2059 /* tcpi_dsack_sent */
2060 /* tcpi_dsack_recvd */
2061 /* tcpi_flowhash */
2062 if (acttp) {
2063 ti->tcpi_txretransmitpackets = acttp->t_stat.rxmitpkts;
2064 }
2065 }
2066
2067 /*
2068 * Handle SOPT_GET for socket options issued on MP socket.
2069 */
2070 static int
mptcp_getopt(struct mptses * mpte,struct sockopt * sopt)2071 mptcp_getopt(struct mptses *mpte, struct sockopt *sopt)
2072 {
2073 int error = 0, optval = 0;
2074 struct socket *__single mp_so;
2075
2076 mp_so = mptetoso(mpte);
2077
2078 VERIFY(!(mpsotomppcb(mp_so)->mpp_flags & MPP_INSIDE_SETGETOPT));
2079 mpsotomppcb(mp_so)->mpp_flags |= MPP_INSIDE_SETGETOPT;
2080
2081 /*
2082 * We only handle SOPT_GET for TCP level socket options; we should
2083 * not get here for socket level options since they are already
2084 * handled at the socket layer.
2085 */
2086 if (sopt->sopt_level != IPPROTO_TCP) {
2087 error = ENOPROTOOPT;
2088 goto out;
2089 }
2090
2091 switch (sopt->sopt_name) {
2092 case PERSIST_TIMEOUT:
2093 /* Only case for which we have a non-zero default */
2094 optval = tcp_max_persist_timeout;
2095 OS_FALLTHROUGH;
2096 case TCP_NODELAY:
2097 case TCP_RXT_FINDROP:
2098 case TCP_KEEPALIVE:
2099 case TCP_KEEPINTVL:
2100 case TCP_KEEPCNT:
2101 case TCP_CONNECTIONTIMEOUT:
2102 case TCP_RXT_CONNDROPTIME:
2103 case TCP_ADAPTIVE_READ_TIMEOUT:
2104 case TCP_ADAPTIVE_WRITE_TIMEOUT:
2105 case TCP_FASTOPEN_FORCE_ENABLE:
2106 {
2107 struct mptopt *__single mpo = mptcp_sopt_find(mpte, sopt);
2108
2109 if (mpo != NULL) {
2110 optval = mpo->mpo_intval;
2111 }
2112 break;
2113 }
2114
2115 /* The next ones are stored at the MPTCP-level */
2116 case TCP_NOTSENT_LOWAT:
2117 if (mptetoso(mpte)->so_flags & SOF_NOTSENT_LOWAT) {
2118 optval = mptcp_get_notsent_lowat(mpte);
2119 } else {
2120 optval = 0;
2121 }
2122 break;
2123 case TCP_INFO:
2124 {
2125 struct tcp_info ti;
2126
2127 mptcp_fill_info(mpte, &ti);
2128 error = sooptcopyout(sopt, &ti, sizeof(struct tcp_info));
2129
2130 goto out;
2131 }
2132 case MPTCP_SERVICE_TYPE:
2133 optval = mpte->mpte_svctype;
2134 break;
2135 case MPTCP_ALTERNATE_PORT:
2136 optval = mpte->mpte_alternate_port;
2137 break;
2138 case MPTCP_FORCE_ENABLE:
2139 optval = !!(mpte->mpte_flags & MPTE_FORCE_ENABLE);
2140 break;
2141 case MPTCP_FORCE_VERSION:
2142 if (mpte->mpte_flags & MPTE_FORCE_V0) {
2143 optval = 0;
2144 } else if (mpte->mpte_flags & MPTE_FORCE_V1) {
2145 optval = 1;
2146 } else {
2147 optval = -1;
2148 }
2149 break;
2150 case MPTCP_EXPECTED_PROGRESS_TARGET:
2151 error = sooptcopyout(sopt, &mpte->mpte_time_target, sizeof(mpte->mpte_time_target));
2152
2153 goto out;
2154 default:
2155 /* not eligible */
2156 error = ENOPROTOOPT;
2157 break;
2158 }
2159
2160 if (error == 0) {
2161 error = sooptcopyout(sopt, &optval, sizeof(int));
2162 }
2163
2164 out:
2165 mpsotomppcb(mp_so)->mpp_flags &= ~MPP_INSIDE_SETGETOPT;
2166 return error;
2167 }
2168
2169 /*
2170 * MPTCP SOPT_{SET,GET} socket option handler, for options issued on the MP
2171 * socket, at SOL_SOCKET and IPPROTO_TCP levels. The former is restricted
2172 * to those that are allowed by mptcp_usr_socheckopt().
2173 */
2174 int
mptcp_ctloutput(struct socket * mp_so,struct sockopt * sopt)2175 mptcp_ctloutput(struct socket *mp_so, struct sockopt *sopt)
2176 {
2177 struct mppcb *__single mpp = mpsotomppcb(mp_so);
2178 struct mptses *__single mpte;
2179 int error = 0;
2180
2181 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
2182 error = EINVAL;
2183 goto out;
2184 }
2185 mpte = mptompte(mpp);
2186 socket_lock_assert_owned(mp_so);
2187
2188 /* we only handle socket and TCP-level socket options for MPTCP */
2189 if (sopt->sopt_level != SOL_SOCKET && sopt->sopt_level != IPPROTO_TCP &&
2190 sopt->sopt_level != IPPROTO_IP && sopt->sopt_level != IPPROTO_IPV6) {
2191 error = EINVAL;
2192 goto out;
2193 }
2194
2195 switch (sopt->sopt_dir) {
2196 case SOPT_SET:
2197 error = mptcp_setopt(mpte, sopt);
2198 break;
2199
2200 case SOPT_GET:
2201 error = mptcp_getopt(mpte, sopt);
2202 break;
2203 }
2204 out:
2205 return error;
2206 }
2207
2208 const char *
mptcp_sopt2str(int level,int optname)2209 mptcp_sopt2str(int level, int optname)
2210 {
2211 switch (level) {
2212 case SOL_SOCKET:
2213 switch (optname) {
2214 case SO_LINGER:
2215 return "SO_LINGER";
2216 case SO_LINGER_SEC:
2217 return "SO_LINGER_SEC";
2218 case SO_DEBUG:
2219 return "SO_DEBUG";
2220 case SO_KEEPALIVE:
2221 return "SO_KEEPALIVE";
2222 case SO_USELOOPBACK:
2223 return "SO_USELOOPBACK";
2224 case SO_TYPE:
2225 return "SO_TYPE";
2226 case SO_NREAD:
2227 return "SO_NREAD";
2228 case SO_NWRITE:
2229 return "SO_NWRITE";
2230 case SO_ERROR:
2231 return "SO_ERROR";
2232 case SO_SNDBUF:
2233 return "SO_SNDBUF";
2234 case SO_RCVBUF:
2235 return "SO_RCVBUF";
2236 case SO_SNDLOWAT:
2237 return "SO_SNDLOWAT";
2238 case SO_RCVLOWAT:
2239 return "SO_RCVLOWAT";
2240 case SO_SNDTIMEO:
2241 return "SO_SNDTIMEO";
2242 case SO_RCVTIMEO:
2243 return "SO_RCVTIMEO";
2244 case SO_NKE:
2245 return "SO_NKE";
2246 case SO_NOSIGPIPE:
2247 return "SO_NOSIGPIPE";
2248 case SO_NOADDRERR:
2249 return "SO_NOADDRERR";
2250 case SO_RESTRICTIONS:
2251 return "SO_RESTRICTIONS";
2252 case SO_LABEL:
2253 return "SO_LABEL";
2254 case SO_PEERLABEL:
2255 return "SO_PEERLABEL";
2256 case SO_RANDOMPORT:
2257 return "SO_RANDOMPORT";
2258 case SO_TRAFFIC_CLASS:
2259 return "SO_TRAFFIC_CLASS";
2260 case SO_RECV_TRAFFIC_CLASS:
2261 return "SO_RECV_TRAFFIC_CLASS";
2262 case SO_TRAFFIC_CLASS_DBG:
2263 return "SO_TRAFFIC_CLASS_DBG";
2264 case SO_PRIVILEGED_TRAFFIC_CLASS:
2265 return "SO_PRIVILEGED_TRAFFIC_CLASS";
2266 case SO_DEFUNCTIT:
2267 return "SO_DEFUNCTIT";
2268 case SO_DEFUNCTOK:
2269 return "SO_DEFUNCTOK";
2270 case SO_ISDEFUNCT:
2271 return "SO_ISDEFUNCT";
2272 case SO_OPPORTUNISTIC:
2273 return "SO_OPPORTUNISTIC";
2274 case SO_FLUSH:
2275 return "SO_FLUSH";
2276 case SO_RECV_ANYIF:
2277 return "SO_RECV_ANYIF";
2278 case SO_NOWAKEFROMSLEEP:
2279 return "SO_NOWAKEFROMSLEEP";
2280 case SO_NOAPNFALLBK:
2281 return "SO_NOAPNFALLBK";
2282 case SO_MARK_CELLFALLBACK:
2283 return "SO_CELLFALLBACK";
2284 case SO_FALLBACK_MODE:
2285 return "SO_FALLBACK_MODE";
2286 case SO_MARK_KNOWN_TRACKER:
2287 return "SO_MARK_KNOWN_TRACKER";
2288 case SO_MARK_KNOWN_TRACKER_NON_APP_INITIATED:
2289 return "SO_MARK_KNOWN_TRACKER_NON_APP_INITIATED";
2290 case SO_MARK_APPROVED_APP_DOMAIN:
2291 return "SO_MARK_APPROVED_APP_DOMAIN";
2292 case SO_DELEGATED:
2293 return "SO_DELEGATED";
2294 case SO_DELEGATED_UUID:
2295 return "SO_DELEGATED_UUID";
2296 #if NECP
2297 case SO_NECP_ATTRIBUTES:
2298 return "SO_NECP_ATTRIBUTES";
2299 case SO_NECP_CLIENTUUID:
2300 return "SO_NECP_CLIENTUUID";
2301 #endif /* NECP */
2302 }
2303
2304 break;
2305 case IPPROTO_IP:
2306 switch (optname) {
2307 case IP_TOS:
2308 return "IP_TOS";
2309 }
2310
2311 break;
2312 case IPPROTO_IPV6:
2313 switch (optname) {
2314 case IPV6_TCLASS:
2315 return "IPV6_TCLASS";
2316 }
2317
2318 break;
2319 case IPPROTO_TCP:
2320 switch (optname) {
2321 case TCP_NODELAY:
2322 return "TCP_NODELAY";
2323 case TCP_KEEPALIVE:
2324 return "TCP_KEEPALIVE";
2325 case TCP_KEEPINTVL:
2326 return "TCP_KEEPINTVL";
2327 case TCP_KEEPCNT:
2328 return "TCP_KEEPCNT";
2329 case TCP_CONNECTIONTIMEOUT:
2330 return "TCP_CONNECTIONTIMEOUT";
2331 case TCP_RXT_CONNDROPTIME:
2332 return "TCP_RXT_CONNDROPTIME";
2333 case PERSIST_TIMEOUT:
2334 return "PERSIST_TIMEOUT";
2335 case TCP_NOTSENT_LOWAT:
2336 return "NOTSENT_LOWAT";
2337 case TCP_ADAPTIVE_READ_TIMEOUT:
2338 return "ADAPTIVE_READ_TIMEOUT";
2339 case TCP_ADAPTIVE_WRITE_TIMEOUT:
2340 return "ADAPTIVE_WRITE_TIMEOUT";
2341 case TCP_FASTOPEN_FORCE_ENABLE:
2342 return "TCP_FASTOPEN_FORCE_ENABLE";
2343 case MPTCP_SERVICE_TYPE:
2344 return "MPTCP_SERVICE_TYPE";
2345 case MPTCP_ALTERNATE_PORT:
2346 return "MPTCP_ALTERNATE_PORT";
2347 case MPTCP_FORCE_ENABLE:
2348 return "MPTCP_FORCE_ENABLE";
2349 case MPTCP_FORCE_VERSION:
2350 return "MPTCP_FORCE_VERSION";
2351 case MPTCP_EXPECTED_PROGRESS_TARGET:
2352 return "MPTCP_EXPECTED_PROGRESS_TARGET";
2353 }
2354
2355 break;
2356 }
2357
2358 return "unknown";
2359 }
2360
2361 static int
mptcp_usr_preconnect(struct socket * mp_so)2362 mptcp_usr_preconnect(struct socket *mp_so)
2363 {
2364 struct mptsub *__single mpts = NULL;
2365 struct mppcb *__single mpp = mpsotomppcb(mp_so);
2366 struct mptses *__single mpte;
2367 struct socket *__single so;
2368 struct tcpcb *__single tp = NULL;
2369 int error;
2370
2371 mpte = mptompte(mpp);
2372
2373 mpts = mptcp_get_subflow(mpte, NULL);
2374 if (mpts == NULL) {
2375 os_log_error(mptcp_log_handle, "%s - %lx: invalid preconnect ",
2376 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte));
2377 return EINVAL;
2378 }
2379 mpts->mpts_flags &= ~MPTSF_TFO_REQD;
2380 so = mpts->mpts_socket;
2381 tp = intotcpcb(sotoinpcb(so));
2382 tp->t_mpflags &= ~TMPF_TFO_REQUEST;
2383 error = tcp_output(sototcpcb(so));
2384
2385 soclearfastopen(mp_so);
2386
2387 return error;
2388 }
2389