1 /*
2 * Copyright (c) 2012-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/kernel.h>
32 #include <sys/socket.h>
33 #include <sys/socketvar.h>
34 #include <sys/protosw.h>
35 #include <sys/mcache.h>
36 #include <sys/syslog.h>
37 #include <sys/proc.h>
38 #include <sys/proc_internal.h>
39 #include <sys/resourcevar.h>
40 #include <sys/kauth.h>
41 #include <sys/priv.h>
42
43 #include <net/if.h>
44 #include <netinet/in.h>
45 #include <netinet/in_var.h>
46 #include <netinet/tcp.h>
47 #include <netinet/tcp_fsm.h>
48 #include <netinet/tcp_seq.h>
49 #include <netinet/tcp_var.h>
50 #include <netinet/tcp_timer.h>
51 #include <netinet/mptcp.h>
52 #include <netinet/mptcp_var.h>
53 #include <netinet/mptcp_timer.h>
54
55 #include <mach/sdt.h>
56
57 static int mptcp_usr_attach(struct socket *, int, struct proc *);
58 static int mptcp_usr_detach(struct socket *);
59 static int mptcp_attach(struct socket *, struct proc *);
60 static int mptcp_usr_connectx(struct socket *, struct sockaddr *,
61 struct sockaddr *, struct proc *, uint32_t, sae_associd_t,
62 sae_connid_t *, uint32_t, void *, uint32_t, struct uio *, user_ssize_t *);
63 static int mptcp_getassocids(struct mptses *, uint32_t *, user_addr_t);
64 static int mptcp_getconnids(struct mptses *, sae_associd_t, uint32_t *,
65 user_addr_t);
66 static int mptcp_getconninfo(struct mptses *, sae_connid_t *, uint32_t *,
67 uint32_t *, int32_t *, user_addr_t, socklen_t *, user_addr_t, socklen_t *,
68 uint32_t *, user_addr_t, uint32_t *);
69 static int mptcp_usr_control(struct socket *, u_long, caddr_t, struct ifnet *,
70 struct proc *);
71 static int mptcp_disconnect(struct mptses *);
72 static int mptcp_usr_disconnect(struct socket *);
73 static int mptcp_usr_disconnectx(struct socket *, sae_associd_t, sae_connid_t);
74 static struct mptses *mptcp_usrclosed(struct mptses *);
75 static int mptcp_usr_rcvd(struct socket *, int);
76 static int mptcp_usr_send(struct socket *, int, struct mbuf *,
77 struct sockaddr *, struct mbuf *, struct proc *);
78 static int mptcp_usr_shutdown(struct socket *);
79 static int mptcp_usr_sosend(struct socket *, struct sockaddr *, struct uio *,
80 struct mbuf *, struct mbuf *, int);
81 static int mptcp_usr_socheckopt(struct socket *, struct sockopt *);
82 static int mptcp_usr_preconnect(struct socket *so);
83
84 struct pr_usrreqs mptcp_usrreqs = {
85 .pru_attach = mptcp_usr_attach,
86 .pru_connectx = mptcp_usr_connectx,
87 .pru_control = mptcp_usr_control,
88 .pru_detach = mptcp_usr_detach,
89 .pru_disconnect = mptcp_usr_disconnect,
90 .pru_disconnectx = mptcp_usr_disconnectx,
91 .pru_peeraddr = mp_getpeeraddr,
92 .pru_rcvd = mptcp_usr_rcvd,
93 .pru_send = mptcp_usr_send,
94 .pru_shutdown = mptcp_usr_shutdown,
95 .pru_sockaddr = mp_getsockaddr,
96 .pru_sosend = mptcp_usr_sosend,
97 .pru_soreceive = soreceive,
98 .pru_socheckopt = mptcp_usr_socheckopt,
99 .pru_preconnect = mptcp_usr_preconnect,
100 };
101
102
103 int mptcp_developer_mode = 0;
104 SYSCTL_INT(_net_inet_mptcp, OID_AUTO, allow_aggregate, CTLFLAG_RW | CTLFLAG_LOCKED,
105 &mptcp_developer_mode, 0, "Allow the Multipath aggregation mode");
106
107 int mptcp_no_first_party = 0;
108 SYSCTL_INT(_net_inet_mptcp, OID_AUTO, no_first_party, CTLFLAG_RW | CTLFLAG_LOCKED,
109 &mptcp_no_first_party, 0, "Do not do first-party app exemptions");
110
111 static unsigned long mptcp_expected_progress_headstart = 5000;
112 SYSCTL_ULONG(_net_inet_mptcp, OID_AUTO, expected_progress_headstart, CTLFLAG_RW | CTLFLAG_LOCKED,
113 &mptcp_expected_progress_headstart, "Headstart to give MPTCP before meeting the progress deadline");
114
115
116 /*
117 * Attaches an MPTCP control block to a socket.
118 */
119 static int
mptcp_usr_attach(struct socket * mp_so,int proto,struct proc * p)120 mptcp_usr_attach(struct socket *mp_so, int proto, struct proc *p)
121 {
122 #pragma unused(proto)
123 int error;
124
125 VERIFY(mpsotomppcb(mp_so) == NULL);
126
127 error = mptcp_attach(mp_so, p);
128 if (error) {
129 goto out;
130 }
131
132 if ((mp_so->so_options & SO_LINGER) && mp_so->so_linger == 0) {
133 mp_so->so_linger = (short)(TCP_LINGERTIME * hz);
134 }
135 out:
136 return error;
137 }
138
139 /*
140 * Detaches an MPTCP control block from a socket.
141 */
142 static int
mptcp_usr_detach(struct socket * mp_so)143 mptcp_usr_detach(struct socket *mp_so)
144 {
145 struct mptses *mpte = mpsotompte(mp_so);
146 struct mppcb *mpp = mpsotomppcb(mp_so);
147
148 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
149 os_log_error(mptcp_log_handle, "%s - %lx: state: %d\n",
150 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
151 mpp ? mpp->mpp_state : -1);
152 return EINVAL;
153 }
154
155 /*
156 * We are done with this MPTCP socket (it has been closed);
157 * trigger all subflows to be disconnected, if not already,
158 * by initiating the PCB detach sequence (SOF_PCBCLEARING
159 * will be set.)
160 */
161 mp_pcbdetach(mp_so);
162
163 mptcp_disconnect(mpte);
164
165 return 0;
166 }
167
168 /*
169 * Attach MPTCP protocol to socket, allocating MP control block,
170 * MPTCP session, control block, buffer space, etc.
171 */
172 static int
mptcp_attach(struct socket * mp_so,struct proc * p)173 mptcp_attach(struct socket *mp_so, struct proc *p)
174 {
175 #pragma unused(p)
176 struct mptses *mpte = NULL;
177 struct mptcb *mp_tp = NULL;
178 struct mppcb *mpp = NULL;
179 int error = 0;
180
181 if (mp_so->so_snd.sb_hiwat == 0 || mp_so->so_rcv.sb_hiwat == 0) {
182 error = soreserve(mp_so, tcp_sendspace, tcp_recvspace);
183 if (error != 0) {
184 goto out;
185 }
186 }
187
188 if (mp_so->so_snd.sb_preconn_hiwat == 0) {
189 soreserve_preconnect(mp_so, 2048);
190 }
191
192 if ((mp_so->so_rcv.sb_flags & SB_USRSIZE) == 0) {
193 mp_so->so_rcv.sb_flags |= SB_AUTOSIZE;
194 }
195 if ((mp_so->so_snd.sb_flags & SB_USRSIZE) == 0) {
196 mp_so->so_snd.sb_flags |= SB_AUTOSIZE;
197 }
198
199 /*
200 * MPTCP send-socket buffers cannot be compressed, due to the
201 * fact that each mbuf chained via m_next is a M_PKTHDR
202 * which carries some MPTCP metadata.
203 */
204 mp_so->so_snd.sb_flags |= SB_NOCOMPRESS;
205
206 if ((error = mp_pcballoc(mp_so, &mtcbinfo)) != 0) {
207 goto out;
208 }
209
210 mpp = mpsotomppcb(mp_so);
211 mpte = (struct mptses *)mpp->mpp_pcbe;
212 mp_tp = mpte->mpte_mptcb;
213
214 VERIFY(mp_tp != NULL);
215 out:
216 return error;
217 }
218
219 static int
mptcp_entitlement_check(struct socket * mp_so,uint8_t svctype)220 mptcp_entitlement_check(struct socket *mp_so, uint8_t svctype)
221 {
222 struct mptses *mpte = mpsotompte(mp_so);
223
224 if (mptcp_no_first_party) {
225 return 0;
226 }
227
228 /* First, check for mptcp_extended without delegation */
229 if (soopt_cred_check(mp_so, PRIV_NET_RESTRICTED_MULTIPATH_EXTENDED, TRUE, FALSE) == 0) {
230 /*
231 * This means the app has the extended entitlement. Thus,
232 * it's a first party app and can run without restrictions.
233 */
234 mpte->mpte_flags |= MPTE_FIRSTPARTY;
235 return 0;
236 }
237
238 /* Now with delegation */
239 if (mp_so->so_flags & SOF_DELEGATED &&
240 soopt_cred_check(mp_so, PRIV_NET_RESTRICTED_MULTIPATH_EXTENDED, TRUE, TRUE) == 0) {
241 /*
242 * This means the app has the extended entitlement. Thus,
243 * it's a first party app and can run without restrictions.
244 */
245 mpte->mpte_flags |= MPTE_FIRSTPARTY;
246 return 0;
247 }
248
249 if (svctype == MPTCP_SVCTYPE_AGGREGATE) {
250 if (mptcp_developer_mode) {
251 return 0;
252 }
253
254 os_log_error(mptcp_log_handle, "%s - %lx: MPTCP prohibited on svc %u\n",
255 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpte->mpte_svctype);
256 return -1;
257 }
258
259 return 0;
260 }
261
262 /*
263 * Common subroutine to open a MPTCP connection to one of the remote hosts
264 * specified by dst_sl. This includes allocating and establishing a
265 * subflow TCP connection, either initially to establish MPTCP connection,
266 * or to join an existing one. Returns a connection handle upon success.
267 */
268 static int
mptcp_connectx(struct mptses * mpte,struct sockaddr * src,struct sockaddr * dst,uint32_t ifscope,sae_connid_t * pcid)269 mptcp_connectx(struct mptses *mpte, struct sockaddr *src,
270 struct sockaddr *dst, uint32_t ifscope, sae_connid_t *pcid)
271 {
272 int error = 0;
273
274 VERIFY(dst != NULL);
275 VERIFY(pcid != NULL);
276
277 error = mptcp_subflow_add(mpte, src, dst, ifscope, pcid);
278
279 return error;
280 }
281
282 /*
283 * User-protocol pru_connectx callback.
284 */
285 static int
mptcp_usr_connectx(struct socket * mp_so,struct sockaddr * src,struct sockaddr * dst,struct proc * p,uint32_t ifscope,sae_associd_t aid,sae_connid_t * pcid,uint32_t flags,void * arg,uint32_t arglen,struct uio * auio,user_ssize_t * bytes_written)286 mptcp_usr_connectx(struct socket *mp_so, struct sockaddr *src,
287 struct sockaddr *dst, struct proc *p, uint32_t ifscope,
288 sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg,
289 uint32_t arglen, struct uio *auio, user_ssize_t *bytes_written)
290 {
291 #pragma unused(p, aid, flags, arg, arglen)
292 struct mppcb *mpp = mpsotomppcb(mp_so);
293 struct mptses *mpte = NULL;
294 struct mptcb *mp_tp = NULL;
295 user_ssize_t datalen;
296 int error = 0;
297
298 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
299 os_log_error(mptcp_log_handle, "%s - %lx: state %d\n",
300 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
301 mpp ? mpp->mpp_state : -1);
302 error = EINVAL;
303 goto out;
304 }
305 mpte = mptompte(mpp);
306 mp_tp = mpte->mpte_mptcb;
307
308 if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) {
309 os_log_error(mptcp_log_handle, "%s - %lx: fell back to TCP\n",
310 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte));
311 error = EINVAL;
312 goto out;
313 }
314
315 if (dst->sa_family != AF_INET && dst->sa_family != AF_INET6) {
316 error = EAFNOSUPPORT;
317 goto out;
318 }
319
320 if (dst->sa_family == AF_INET &&
321 dst->sa_len != sizeof(mpte->__mpte_dst_v4)) {
322 os_log_error(mptcp_log_handle, "%s - %lx: IPv4 dst len %u\n",
323 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), dst->sa_len);
324 error = EINVAL;
325 goto out;
326 }
327
328 if (dst->sa_family == AF_INET6 &&
329 dst->sa_len != sizeof(mpte->__mpte_dst_v6)) {
330 os_log_error(mptcp_log_handle, "%s - %lx: IPv6 dst len %u\n",
331 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), dst->sa_len);
332 error = EINVAL;
333 goto out;
334 }
335
336 if (!(mpte->mpte_flags & MPTE_SVCTYPE_CHECKED)) {
337 if (mptcp_entitlement_check(mp_so, mpte->mpte_svctype) < 0) {
338 error = EPERM;
339 goto out;
340 }
341
342 mpte->mpte_flags |= MPTE_SVCTYPE_CHECKED;
343 }
344
345 if ((mp_so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING)) == 0) {
346 memcpy(&mpte->mpte_u_dst, dst, dst->sa_len);
347
348 if (dst->sa_family == AF_INET) {
349 memcpy(&mpte->mpte_sub_dst_v4, dst, dst->sa_len);
350 } else {
351 memcpy(&mpte->mpte_sub_dst_v6, dst, dst->sa_len);
352 }
353 }
354
355 if (src) {
356 if (src->sa_family != AF_INET && src->sa_family != AF_INET6) {
357 error = EAFNOSUPPORT;
358 goto out;
359 }
360
361 if (src->sa_family == AF_INET &&
362 src->sa_len != sizeof(mpte->__mpte_src_v4)) {
363 os_log_error(mptcp_log_handle, "%s - %lx: IPv4 src len %u\n",
364 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), src->sa_len);
365 error = EINVAL;
366 goto out;
367 }
368
369 if (src->sa_family == AF_INET6 &&
370 src->sa_len != sizeof(mpte->__mpte_src_v6)) {
371 os_log_error(mptcp_log_handle, "%s - %lx: IPv6 src len %u\n",
372 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), src->sa_len);
373 error = EINVAL;
374 goto out;
375 }
376
377 if ((mp_so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING)) == 0) {
378 memcpy(&mpte->mpte_u_src, src, src->sa_len);
379 }
380 }
381
382 error = mptcp_connectx(mpte, src, dst, ifscope, pcid);
383
384 /* If there is data, copy it */
385 if (auio != NULL) {
386 datalen = uio_resid(auio);
387 socket_unlock(mp_so, 0);
388 error = mp_so->so_proto->pr_usrreqs->pru_sosend(mp_so, NULL,
389 (uio_t) auio, NULL, NULL, 0);
390
391 if (error == 0 || error == EWOULDBLOCK) {
392 *bytes_written = datalen - uio_resid(auio);
393 }
394
395 if (error == EWOULDBLOCK) {
396 error = EINPROGRESS;
397 }
398
399 socket_lock(mp_so, 0);
400 }
401
402 out:
403 return error;
404 }
405
406 /*
407 * Handle SIOCGASSOCIDS ioctl for PF_MULTIPATH domain.
408 */
409 static int
mptcp_getassocids(struct mptses * mpte,uint32_t * cnt,user_addr_t aidp)410 mptcp_getassocids(struct mptses *mpte, uint32_t *cnt, user_addr_t aidp)
411 {
412 /* MPTCP has at most 1 association */
413 *cnt = (mpte->mpte_associd != SAE_ASSOCID_ANY) ? 1 : 0;
414
415 /* just asking how many there are? */
416 if (aidp == USER_ADDR_NULL) {
417 return 0;
418 }
419
420 return copyout(&mpte->mpte_associd, aidp,
421 sizeof(mpte->mpte_associd));
422 }
423
424 /*
425 * Handle SIOCGCONNIDS ioctl for PF_MULTIPATH domain.
426 */
427 static int
mptcp_getconnids(struct mptses * mpte,sae_associd_t aid,uint32_t * cnt,user_addr_t cidp)428 mptcp_getconnids(struct mptses *mpte, sae_associd_t aid, uint32_t *cnt,
429 user_addr_t cidp)
430 {
431 struct mptsub *mpts;
432 int error = 0;
433
434 if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL &&
435 aid != mpte->mpte_associd) {
436 return EINVAL;
437 }
438
439 *cnt = mpte->mpte_numflows;
440
441 /* just asking how many there are? */
442 if (cidp == USER_ADDR_NULL) {
443 return 0;
444 }
445
446 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
447 if ((error = copyout(&mpts->mpts_connid, cidp,
448 sizeof(mpts->mpts_connid))) != 0) {
449 break;
450 }
451
452 cidp += sizeof(mpts->mpts_connid);
453 }
454
455 return error;
456 }
457
458 /*
459 * Handle SIOCGCONNINFO ioctl for PF_MULTIPATH domain.
460 */
461 static int
mptcp_getconninfo(struct mptses * mpte,sae_connid_t * cid,uint32_t * flags,uint32_t * ifindex,int32_t * soerror,user_addr_t src,socklen_t * src_len,user_addr_t dst,socklen_t * dst_len,uint32_t * aux_type,user_addr_t aux_data,uint32_t * aux_len)462 mptcp_getconninfo(struct mptses *mpte, sae_connid_t *cid, uint32_t *flags,
463 uint32_t *ifindex, int32_t *soerror, user_addr_t src, socklen_t *src_len,
464 user_addr_t dst, socklen_t *dst_len, uint32_t *aux_type,
465 user_addr_t aux_data, uint32_t *aux_len)
466 {
467 *flags = 0;
468 *aux_type = 0;
469 *ifindex = 0;
470 *soerror = 0;
471 struct mptcb *mp_tp = mpte->mpte_mptcb;
472
473 /* MPTCP-level global stats */
474 if (*cid == SAE_CONNID_ALL) {
475 struct socket *mp_so = mptetoso(mpte);
476 struct conninfo_multipathtcp mptcp_ci;
477 int error = 0;
478
479 if (*aux_len != 0 && *aux_len != sizeof(mptcp_ci)) {
480 return EINVAL;
481 }
482
483 if (mp_so->so_state & SS_ISCONNECTING) {
484 *flags |= CIF_CONNECTING;
485 }
486 if (mp_so->so_state & SS_ISCONNECTED) {
487 *flags |= CIF_CONNECTED;
488 }
489 if (mp_so->so_state & SS_ISDISCONNECTING) {
490 *flags |= CIF_DISCONNECTING;
491 }
492 if (mp_so->so_state & SS_ISDISCONNECTED) {
493 *flags |= CIF_DISCONNECTED;
494 }
495 if (!(mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP)) {
496 *flags |= CIF_MP_CAPABLE;
497 }
498 if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) {
499 *flags |= CIF_MP_DEGRADED;
500 }
501 if (mp_tp->mpt_version == MPTCP_VERSION_1) {
502 *flags |= CIF_MP_V1;
503 }
504
505 *src_len = 0;
506 *dst_len = 0;
507
508 *aux_type = CIAUX_MPTCP;
509 *aux_len = sizeof(mptcp_ci);
510
511 if (aux_data != USER_ADDR_NULL) {
512 const struct mptsub *mpts;
513 int initial_info_set = 0;
514 unsigned long i = 0;
515
516 bzero(&mptcp_ci, sizeof(mptcp_ci));
517 mptcp_ci.mptcpci_subflow_count = mpte->mpte_numflows;
518 mptcp_ci.mptcpci_switch_count = mpte->mpte_subflow_switches;
519
520 VERIFY(sizeof(mptcp_ci.mptcpci_itfstats) == sizeof(mpte->mpte_itfstats));
521 memcpy(mptcp_ci.mptcpci_itfstats, mpte->mpte_itfstats, sizeof(mptcp_ci.mptcpci_itfstats));
522
523 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
524 if (i >= sizeof(mptcp_ci.mptcpci_subflow_connids) / sizeof(sae_connid_t)) {
525 break;
526 }
527 mptcp_ci.mptcpci_subflow_connids[i] = mpts->mpts_connid;
528
529 if (mpts->mpts_flags & MPTSF_INITIAL_SUB) {
530 const struct inpcb *inp;
531
532 inp = sotoinpcb(mpts->mpts_socket);
533
534 mptcp_ci.mptcpci_init_rxbytes = inp->inp_stat->rxbytes;
535 mptcp_ci.mptcpci_init_txbytes = inp->inp_stat->txbytes;
536 initial_info_set = 1;
537 }
538
539 mptcpstats_update(mptcp_ci.mptcpci_itfstats, mpts);
540
541 i++;
542 }
543
544 if (initial_info_set == 0) {
545 mptcp_ci.mptcpci_init_rxbytes = mpte->mpte_init_rxbytes;
546 mptcp_ci.mptcpci_init_txbytes = mpte->mpte_init_txbytes;
547 }
548
549 if (mpte->mpte_flags & MPTE_FIRSTPARTY) {
550 mptcp_ci.mptcpci_flags |= MPTCPCI_FIRSTPARTY;
551 }
552
553 error = copyout(&mptcp_ci, aux_data, sizeof(mptcp_ci));
554 if (error != 0) {
555 os_log_error(mptcp_log_handle, "%s - %lx: copyout failed: %d\n",
556 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), error);
557 return error;
558 }
559 }
560
561 return 0;
562 }
563
564 /* Any stats of any subflow */
565 if (*cid == SAE_CONNID_ANY) {
566 const struct mptsub *mpts;
567 struct socket *so;
568 const struct inpcb *inp;
569 int error = 0;
570
571 mpts = TAILQ_FIRST(&mpte->mpte_subflows);
572 if (mpts == NULL) {
573 return ENXIO;
574 }
575
576 so = mpts->mpts_socket;
577 inp = sotoinpcb(so);
578
579 if (inp->inp_vflag & INP_IPV4) {
580 error = in_getconninfo(so, SAE_CONNID_ANY, flags, ifindex,
581 soerror, src, src_len, dst, dst_len,
582 aux_type, aux_data, aux_len);
583 } else {
584 error = in6_getconninfo(so, SAE_CONNID_ANY, flags, ifindex,
585 soerror, src, src_len, dst, dst_len,
586 aux_type, aux_data, aux_len);
587 }
588
589 if (error != 0) {
590 os_log_error(mptcp_log_handle, "%s - %lx:error from in_getconninfo %d\n",
591 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), error);
592 return error;
593 }
594
595 if (mpts->mpts_flags & MPTSF_MP_CAPABLE) {
596 *flags |= CIF_MP_CAPABLE;
597 }
598 if (mpts->mpts_flags & MPTSF_MP_DEGRADED) {
599 *flags |= CIF_MP_DEGRADED;
600 }
601 if (mpts->mpts_flags & MPTSF_MP_READY) {
602 *flags |= CIF_MP_READY;
603 }
604 if (mpts->mpts_flags & MPTSF_ACTIVE) {
605 *flags |= CIF_MP_ACTIVE;
606 }
607 if (mp_tp->mpt_version == MPTCP_VERSION_1) {
608 *flags |= CIF_MP_V1;
609 }
610
611 return 0;
612 } else {
613 /* Per-interface stats */
614 const struct mptsub *mpts, *orig_mpts = NULL;
615 struct conninfo_tcp tcp_ci;
616 const struct inpcb *inp;
617 struct socket *so;
618 int error = 0;
619 int index;
620
621 /* cid is thus an ifindex - range-check first! */
622 if (*cid > USHRT_MAX) {
623 return EINVAL;
624 }
625
626 bzero(&tcp_ci, sizeof(tcp_ci));
627
628 /* First, get a subflow to fill in the "regular" info. */
629 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
630 const struct ifnet *ifp = sotoinpcb(mpts->mpts_socket)->inp_last_outifp;
631
632 if (ifp && ifp->if_index == *cid) {
633 break;
634 }
635 }
636
637 if (mpts == NULL) {
638 /* No subflow there - well, let's just get the basic itf-info */
639 goto interface_info;
640 }
641
642 so = mpts->mpts_socket;
643 inp = sotoinpcb(so);
644
645 /* Give it USER_ADDR_NULL, because we are doing this on our own */
646 if (inp->inp_vflag & INP_IPV4) {
647 error = in_getconninfo(so, SAE_CONNID_ANY, flags, ifindex,
648 soerror, src, src_len, dst, dst_len,
649 aux_type, USER_ADDR_NULL, aux_len);
650 } else {
651 error = in6_getconninfo(so, SAE_CONNID_ANY, flags, ifindex,
652 soerror, src, src_len, dst, dst_len,
653 aux_type, USER_ADDR_NULL, aux_len);
654 }
655
656 if (error != 0) {
657 os_log_error(mptcp_log_handle, "%s - %lx:error from in_getconninfo %d\n",
658 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), error);
659 return error;
660 }
661
662 /* ToDo: Nobody is reading these flags on subflows. Why bother ? */
663 if (mpts->mpts_flags & MPTSF_MP_CAPABLE) {
664 *flags |= CIF_MP_CAPABLE;
665 }
666 if (mpts->mpts_flags & MPTSF_MP_DEGRADED) {
667 *flags |= CIF_MP_DEGRADED;
668 }
669 if (mpts->mpts_flags & MPTSF_MP_READY) {
670 *flags |= CIF_MP_READY;
671 }
672 if (mpts->mpts_flags & MPTSF_ACTIVE) {
673 *flags |= CIF_MP_ACTIVE;
674 }
675 if (mp_tp->mpt_version == MPTCP_VERSION_1) {
676 *flags |= CIF_MP_V1;
677 }
678
679 /*
680 * Now, we gather the metrics (aka., tcp_info) and roll them in
681 * across all subflows of this interface to build an aggregated
682 * view.
683 *
684 * We take the TCP_INFO from the first subflow as the "master",
685 * feeding into those fields that we do not roll.
686 */
687 if (aux_data != USER_ADDR_NULL) {
688 tcp_getconninfo(so, &tcp_ci);
689
690 orig_mpts = mpts;
691 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
692 const struct inpcb *mptsinp = sotoinpcb(mpts->mpts_socket);
693 const struct ifnet *ifp;
694
695 ifp = mptsinp->inp_last_outifp;
696
697 if (ifp == NULL || ifp->if_index != *cid || mpts == orig_mpts) {
698 continue;
699 }
700
701 /* Roll the itf-stats into the tcp_info */
702 tcp_ci.tcpci_tcp_info.tcpi_txbytes +=
703 mptsinp->inp_stat->txbytes;
704 tcp_ci.tcpci_tcp_info.tcpi_rxbytes +=
705 mptsinp->inp_stat->rxbytes;
706
707 tcp_ci.tcpci_tcp_info.tcpi_wifi_txbytes +=
708 mptsinp->inp_wstat->txbytes;
709 tcp_ci.tcpci_tcp_info.tcpi_wifi_rxbytes +=
710 mptsinp->inp_wstat->rxbytes;
711
712 tcp_ci.tcpci_tcp_info.tcpi_wired_txbytes +=
713 mptsinp->inp_Wstat->txbytes;
714 tcp_ci.tcpci_tcp_info.tcpi_wired_rxbytes +=
715 mptsinp->inp_Wstat->rxbytes;
716
717 tcp_ci.tcpci_tcp_info.tcpi_cell_txbytes +=
718 mptsinp->inp_cstat->txbytes;
719 tcp_ci.tcpci_tcp_info.tcpi_cell_rxbytes +=
720 mptsinp->inp_cstat->rxbytes;
721 }
722 }
723
724 interface_info:
725 *aux_type = CIAUX_TCP;
726 if (*aux_len == 0) {
727 *aux_len = sizeof(tcp_ci);
728 } else if (aux_data != USER_ADDR_NULL) {
729 boolean_t create;
730
731 /*
732 * Finally, old subflows might have been closed - we
733 * want this data as well, so grab it from the interface
734 * stats.
735 */
736 create = orig_mpts != NULL;
737
738 /*
739 * When we found a subflow, we are willing to create a stats-index
740 * because we have some data to return. If there isn't a subflow,
741 * nor anything in the stats, return EINVAL. Because the
742 * ifindex belongs to something that doesn't exist.
743 */
744 index = mptcpstats_get_index_by_ifindex(mpte->mpte_itfstats, (u_short)(*cid), false);
745 if (index == -1) {
746 os_log_error(mptcp_log_handle,
747 "%s - %lx: Asking for too many ifindex: %u subcount %u, mpts? %s\n",
748 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
749 *cid, mpte->mpte_numflows,
750 orig_mpts ? "yes" : "no");
751
752 if (orig_mpts == NULL) {
753 return EINVAL;
754 }
755 } else {
756 struct mptcp_itf_stats *stats;
757
758 stats = &mpte->mpte_itfstats[index];
759
760 /* Roll the itf-stats into the tcp_info */
761 tcp_ci.tcpci_tcp_info.tcpi_last_outif = *cid;
762 tcp_ci.tcpci_tcp_info.tcpi_txbytes +=
763 stats->mpis_txbytes;
764 tcp_ci.tcpci_tcp_info.tcpi_rxbytes +=
765 stats->mpis_rxbytes;
766
767 tcp_ci.tcpci_tcp_info.tcpi_wifi_txbytes +=
768 stats->mpis_wifi_txbytes;
769 tcp_ci.tcpci_tcp_info.tcpi_wifi_rxbytes +=
770 stats->mpis_wifi_rxbytes;
771
772 tcp_ci.tcpci_tcp_info.tcpi_wired_txbytes +=
773 stats->mpis_wired_txbytes;
774 tcp_ci.tcpci_tcp_info.tcpi_wired_rxbytes +=
775 stats->mpis_wired_rxbytes;
776
777 tcp_ci.tcpci_tcp_info.tcpi_cell_txbytes +=
778 stats->mpis_cell_txbytes;
779 tcp_ci.tcpci_tcp_info.tcpi_cell_rxbytes +=
780 stats->mpis_cell_rxbytes;
781 }
782
783 *aux_len = min(*aux_len, sizeof(tcp_ci));
784 error = copyout(&tcp_ci, aux_data, *aux_len);
785 if (error != 0) {
786 return error;
787 }
788 }
789 }
790
791 return 0;
792 }
793
794 /*
795 * User-protocol pru_control callback.
796 */
797 static int
mptcp_usr_control(struct socket * mp_so,u_long cmd,caddr_t data,struct ifnet * ifp,struct proc * p)798 mptcp_usr_control(struct socket *mp_so, u_long cmd, caddr_t data,
799 struct ifnet *ifp, struct proc *p)
800 {
801 #pragma unused(ifp, p)
802 struct mppcb *mpp = mpsotomppcb(mp_so);
803 struct mptses *mpte;
804 int error = 0;
805
806 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
807 error = EINVAL;
808 goto out;
809 }
810 mpte = mptompte(mpp);
811
812 switch (cmd) {
813 case SIOCGASSOCIDS32: { /* struct so_aidreq32 */
814 struct so_aidreq32 aidr;
815 bcopy(data, &aidr, sizeof(aidr));
816 error = mptcp_getassocids(mpte, &aidr.sar_cnt,
817 aidr.sar_aidp);
818 if (error == 0) {
819 bcopy(&aidr, data, sizeof(aidr));
820 }
821 break;
822 }
823
824 case SIOCGASSOCIDS64: { /* struct so_aidreq64 */
825 struct so_aidreq64 aidr;
826 bcopy(data, &aidr, sizeof(aidr));
827 error = mptcp_getassocids(mpte, &aidr.sar_cnt,
828 (user_addr_t)aidr.sar_aidp);
829 if (error == 0) {
830 bcopy(&aidr, data, sizeof(aidr));
831 }
832 break;
833 }
834
835 case SIOCGCONNIDS32: { /* struct so_cidreq32 */
836 struct so_cidreq32 cidr;
837 bcopy(data, &cidr, sizeof(cidr));
838 error = mptcp_getconnids(mpte, cidr.scr_aid, &cidr.scr_cnt,
839 cidr.scr_cidp);
840 if (error == 0) {
841 bcopy(&cidr, data, sizeof(cidr));
842 }
843 break;
844 }
845
846 case SIOCGCONNIDS64: { /* struct so_cidreq64 */
847 struct so_cidreq64 cidr;
848 bcopy(data, &cidr, sizeof(cidr));
849 error = mptcp_getconnids(mpte, cidr.scr_aid, &cidr.scr_cnt,
850 (user_addr_t)cidr.scr_cidp);
851 if (error == 0) {
852 bcopy(&cidr, data, sizeof(cidr));
853 }
854 break;
855 }
856
857 case SIOCGCONNINFO32: { /* struct so_cinforeq32 */
858 struct so_cinforeq32 cifr;
859 bcopy(data, &cifr, sizeof(cifr));
860 error = mptcp_getconninfo(mpte, &cifr.scir_cid,
861 &cifr.scir_flags, &cifr.scir_ifindex, &cifr.scir_error,
862 cifr.scir_src, &cifr.scir_src_len, cifr.scir_dst,
863 &cifr.scir_dst_len, &cifr.scir_aux_type, cifr.scir_aux_data,
864 &cifr.scir_aux_len);
865 if (error == 0) {
866 bcopy(&cifr, data, sizeof(cifr));
867 }
868 break;
869 }
870
871 case SIOCGCONNINFO64: { /* struct so_cinforeq64 */
872 struct so_cinforeq64 cifr;
873 bcopy(data, &cifr, sizeof(cifr));
874 error = mptcp_getconninfo(mpte, &cifr.scir_cid,
875 &cifr.scir_flags, &cifr.scir_ifindex, &cifr.scir_error,
876 (user_addr_t)cifr.scir_src, &cifr.scir_src_len,
877 (user_addr_t)cifr.scir_dst, &cifr.scir_dst_len,
878 &cifr.scir_aux_type, (user_addr_t)cifr.scir_aux_data,
879 &cifr.scir_aux_len);
880 if (error == 0) {
881 bcopy(&cifr, data, sizeof(cifr));
882 }
883 break;
884 }
885
886 default:
887 error = EOPNOTSUPP;
888 break;
889 }
890 out:
891 return error;
892 }
893
894 static int
mptcp_disconnect(struct mptses * mpte)895 mptcp_disconnect(struct mptses *mpte)
896 {
897 struct socket *mp_so;
898 struct mptcb *mp_tp;
899 int error = 0;
900
901 mp_so = mptetoso(mpte);
902 mp_tp = mpte->mpte_mptcb;
903
904 DTRACE_MPTCP3(disconnectx, struct mptses *, mpte,
905 struct socket *, mp_so, struct mptcb *, mp_tp);
906
907 /* if we're not detached, go thru socket state checks */
908 if (!(mp_so->so_flags & SOF_PCBCLEARING) && !(mp_so->so_flags & SOF_DEFUNCT)) {
909 if (!(mp_so->so_state & (SS_ISCONNECTED |
910 SS_ISCONNECTING))) {
911 error = ENOTCONN;
912 goto out;
913 }
914 if (mp_so->so_state & SS_ISDISCONNECTING) {
915 error = EALREADY;
916 goto out;
917 }
918 }
919
920 mptcp_cancel_all_timers(mp_tp);
921 if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) {
922 mptcp_close(mpte, mp_tp);
923 } else if ((mp_so->so_options & SO_LINGER) &&
924 mp_so->so_linger == 0) {
925 mptcp_drop(mpte, mp_tp, 0);
926 } else {
927 soisdisconnecting(mp_so);
928 sbflush(&mp_so->so_rcv);
929 if (mptcp_usrclosed(mpte) != NULL) {
930 mptcp_output(mpte);
931 }
932 }
933
934 if (error == 0) {
935 mptcp_subflow_workloop(mpte);
936 }
937
938 out:
939 return error;
940 }
941
942 /*
943 * Wrapper function to support disconnect on socket
944 */
945 static int
mptcp_usr_disconnect(struct socket * mp_so)946 mptcp_usr_disconnect(struct socket *mp_so)
947 {
948 return mptcp_disconnect(mpsotompte(mp_so));
949 }
950
951 /*
952 * User-protocol pru_disconnectx callback.
953 */
954 static int
mptcp_usr_disconnectx(struct socket * mp_so,sae_associd_t aid,sae_connid_t cid)955 mptcp_usr_disconnectx(struct socket *mp_so, sae_associd_t aid, sae_connid_t cid)
956 {
957 if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL) {
958 return EINVAL;
959 }
960
961 if (cid != SAE_CONNID_ANY && cid != SAE_CONNID_ALL) {
962 return EINVAL;
963 }
964
965 return mptcp_usr_disconnect(mp_so);
966 }
967
968 void
mptcp_finish_usrclosed(struct mptses * mpte)969 mptcp_finish_usrclosed(struct mptses *mpte)
970 {
971 struct mptcb *mp_tp = mpte->mpte_mptcb;
972 struct socket *mp_so = mptetoso(mpte);
973
974 if (mp_tp->mpt_state == MPTCPS_CLOSED || mp_tp->mpt_state == MPTCPS_TERMINATE) {
975 mpte = mptcp_close(mpte, mp_tp);
976 } else if (mp_tp->mpt_state >= MPTCPS_FIN_WAIT_2) {
977 soisdisconnected(mp_so);
978 } else {
979 struct mptsub *mpts;
980
981 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
982 if ((mp_so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
983 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
984 mptcp_subflow_disconnect(mpte, mpts);
985 } else {
986 mptcp_subflow_shutdown(mpte, mpts);
987 }
988 }
989 }
990 }
991
992 /*
993 * User issued close, and wish to trail thru shutdown states.
994 */
995 static struct mptses *
mptcp_usrclosed(struct mptses * mpte)996 mptcp_usrclosed(struct mptses *mpte)
997 {
998 struct mptcb *mp_tp = mpte->mpte_mptcb;
999
1000 mptcp_close_fsm(mp_tp, MPCE_CLOSE);
1001
1002 /* Not everything has been acknowledged - don't close the subflows! */
1003 if (mp_tp->mpt_state != MPTCPS_TERMINATE &&
1004 mp_tp->mpt_sndnxt + 1 != mp_tp->mpt_sndmax) {
1005 return mpte;
1006 }
1007
1008 mptcp_finish_usrclosed(mpte);
1009
1010 return mpte;
1011 }
1012
1013 /*
1014 * After a receive, possible send some update to peer.
1015 */
1016 static int
mptcp_usr_rcvd(struct socket * mp_so,int flags)1017 mptcp_usr_rcvd(struct socket *mp_so, int flags)
1018 {
1019 #pragma unused(flags)
1020 struct mppcb *mpp = mpsotomppcb(mp_so);
1021 struct mptses *mpte;
1022 struct mptsub *mpts;
1023 int error = 0;
1024
1025 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
1026 error = EINVAL;
1027 goto out;
1028 }
1029
1030 mpte = mptompte(mpp);
1031
1032 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1033 struct socket *so = mpts->mpts_socket;
1034
1035 if (so->so_proto->pr_flags & PR_WANTRCVD && so->so_pcb != NULL) {
1036 (*so->so_proto->pr_usrreqs->pru_rcvd)(so, 0);
1037 }
1038 }
1039
1040 error = mptcp_output(mpte);
1041 out:
1042 return error;
1043 }
1044
1045 /*
1046 * Do a send by putting data in the output queue.
1047 */
1048 static int
mptcp_usr_send(struct socket * mp_so,int prus_flags,struct mbuf * m,struct sockaddr * nam,struct mbuf * control,struct proc * p)1049 mptcp_usr_send(struct socket *mp_so, int prus_flags, struct mbuf *m,
1050 struct sockaddr *nam, struct mbuf *control, struct proc *p)
1051 {
1052 #pragma unused(nam, p)
1053 struct mppcb *mpp = mpsotomppcb(mp_so);
1054 struct mptses *mpte;
1055 int error = 0;
1056
1057 if (prus_flags & (PRUS_OOB | PRUS_EOF)) {
1058 error = EOPNOTSUPP;
1059 goto out;
1060 }
1061
1062 if (nam != NULL) {
1063 error = EOPNOTSUPP;
1064 goto out;
1065 }
1066
1067 if (control != NULL && control->m_len != 0) {
1068 error = EOPNOTSUPP;
1069 goto out;
1070 }
1071
1072 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
1073 error = ECONNRESET;
1074 goto out;
1075 }
1076 mpte = mptompte(mpp);
1077 VERIFY(mpte != NULL);
1078
1079 if (!(mp_so->so_state & SS_ISCONNECTED) &&
1080 !(mp_so->so_flags1 & SOF1_PRECONNECT_DATA)) {
1081 error = ENOTCONN;
1082 goto out;
1083 }
1084
1085 mptcp_insert_dsn(mpp, m);
1086 VERIFY(mp_so->so_snd.sb_flags & SB_NOCOMPRESS);
1087 sbappendstream(&mp_so->so_snd, m);
1088 m = NULL;
1089
1090 error = mptcp_output(mpte);
1091 if (error != 0) {
1092 goto out;
1093 }
1094
1095 if (mp_so->so_state & SS_ISCONNECTING) {
1096 if (mp_so->so_state & SS_NBIO) {
1097 error = EWOULDBLOCK;
1098 } else {
1099 error = sbwait(&mp_so->so_snd);
1100 }
1101 }
1102
1103 out:
1104 if (error) {
1105 if (m != NULL) {
1106 m_freem(m);
1107 }
1108 if (control != NULL) {
1109 m_freem(control);
1110 }
1111 }
1112 return error;
1113 }
1114
1115 /*
1116 * Mark the MPTCP connection as being incapable of further output.
1117 */
1118 static int
mptcp_usr_shutdown(struct socket * mp_so)1119 mptcp_usr_shutdown(struct socket *mp_so)
1120 {
1121 struct mppcb *mpp = mpsotomppcb(mp_so);
1122 struct mptses *mpte;
1123 int error = 0;
1124
1125 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
1126 error = EINVAL;
1127 goto out;
1128 }
1129 mpte = mptompte(mpp);
1130 VERIFY(mpte != NULL);
1131
1132 socantsendmore(mp_so);
1133
1134 mpte = mptcp_usrclosed(mpte);
1135 if (mpte != NULL) {
1136 error = mptcp_output(mpte);
1137 }
1138 out:
1139 return error;
1140 }
1141
1142 /*
1143 * Copy the contents of uio into a properly sized mbuf chain.
1144 */
1145 static int
mptcp_uiotombuf(struct uio * uio,int how,user_ssize_t space,struct mbuf ** top)1146 mptcp_uiotombuf(struct uio *uio, int how, user_ssize_t space, struct mbuf **top)
1147 {
1148 struct mbuf *m, *mb, *nm = NULL, *mtail = NULL;
1149 int progress, len, error;
1150 user_ssize_t resid, tot;
1151
1152 VERIFY(top != NULL && *top == NULL);
1153
1154 /*
1155 * space can be zero or an arbitrary large value bound by
1156 * the total data supplied by the uio.
1157 */
1158 resid = uio_resid(uio);
1159 if (space > 0) {
1160 tot = MIN(resid, space);
1161 } else {
1162 tot = resid;
1163 }
1164
1165 if (tot < 0 || tot > INT_MAX) {
1166 return EINVAL;
1167 }
1168
1169 len = (int)tot;
1170 if (len == 0) {
1171 len = 1;
1172 }
1173
1174 /* Loop and append maximum sized mbufs to the chain tail. */
1175 while (len > 0) {
1176 uint32_t m_needed = 1;
1177
1178 if (njcl > 0 && len > MBIGCLBYTES) {
1179 mb = m_getpackets_internal(&m_needed, 1,
1180 how, 1, M16KCLBYTES);
1181 } else if (len > MCLBYTES) {
1182 mb = m_getpackets_internal(&m_needed, 1,
1183 how, 1, MBIGCLBYTES);
1184 } else if (len >= (signed)MINCLSIZE) {
1185 mb = m_getpackets_internal(&m_needed, 1,
1186 how, 1, MCLBYTES);
1187 } else {
1188 mb = m_gethdr(how, MT_DATA);
1189 }
1190
1191 /* Fail the whole operation if one mbuf can't be allocated. */
1192 if (mb == NULL) {
1193 if (nm != NULL) {
1194 m_freem(nm);
1195 }
1196 return ENOBUFS;
1197 }
1198
1199 /* Book keeping. */
1200 VERIFY(mb->m_flags & M_PKTHDR);
1201 len -= ((mb->m_flags & M_EXT) ? mb->m_ext.ext_size : MHLEN);
1202 if (mtail != NULL) {
1203 mtail->m_next = mb;
1204 } else {
1205 nm = mb;
1206 }
1207 mtail = mb;
1208 }
1209
1210 m = nm;
1211
1212 progress = 0;
1213 /* Fill all mbufs with uio data and update header information. */
1214 for (mb = m; mb != NULL; mb = mb->m_next) {
1215 /* tot >= 0 && tot <= INT_MAX (see above) */
1216 len = MIN((int)M_TRAILINGSPACE(mb), (int)(tot - progress));
1217
1218 error = uiomove(mtod(mb, char *), len, uio);
1219 if (error != 0) {
1220 m_freem(m);
1221 return error;
1222 }
1223
1224 /* each mbuf is M_PKTHDR chained via m_next */
1225 mb->m_len = len;
1226 mb->m_pkthdr.len = len;
1227
1228 progress += len;
1229 }
1230 VERIFY(progress == tot);
1231 *top = m;
1232 return 0;
1233 }
1234
1235 /*
1236 * MPTCP socket protocol-user socket send routine, derived from sosend().
1237 */
1238 static int
mptcp_usr_sosend(struct socket * mp_so,struct sockaddr * addr,struct uio * uio,struct mbuf * top,struct mbuf * control,int flags)1239 mptcp_usr_sosend(struct socket *mp_so, struct sockaddr *addr, struct uio *uio,
1240 struct mbuf *top, struct mbuf *control, int flags)
1241 {
1242 #pragma unused(addr)
1243 user_ssize_t resid, space;
1244 int error, sendflags;
1245 struct proc *p = current_proc();
1246 int sblocked = 0;
1247
1248 /* UIO is required for now, due to per-mbuf M_PKTHDR constrains */
1249 if (uio == NULL || top != NULL) {
1250 error = EINVAL;
1251 goto out;
1252 }
1253 resid = uio_resid(uio);
1254
1255 socket_lock(mp_so, 1);
1256 so_update_last_owner_locked(mp_so, p);
1257 so_update_policy(mp_so);
1258
1259 VERIFY(mp_so->so_type == SOCK_STREAM);
1260 VERIFY(!(mp_so->so_flags & SOF_MP_SUBFLOW));
1261
1262 if (flags & (MSG_OOB | MSG_DONTROUTE)) {
1263 error = EOPNOTSUPP;
1264 socket_unlock(mp_so, 1);
1265 goto out;
1266 }
1267
1268 /*
1269 * In theory resid should be unsigned. However, space must be
1270 * signed, as it might be less than 0 if we over-committed, and we
1271 * must use a signed comparison of space and resid. On the other
1272 * hand, a negative resid causes us to loop sending 0-length
1273 * segments to the protocol.
1274 */
1275 if (resid < 0 || resid > INT_MAX ||
1276 (flags & MSG_EOR) || control != NULL) {
1277 error = EINVAL;
1278 socket_unlock(mp_so, 1);
1279 goto out;
1280 }
1281
1282 OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgsnd);
1283
1284 do {
1285 error = sosendcheck(mp_so, NULL, resid, 0, 0, flags,
1286 &sblocked);
1287 if (error != 0) {
1288 goto release;
1289 }
1290
1291 space = sbspace(&mp_so->so_snd);
1292 do {
1293 socket_unlock(mp_so, 0);
1294 /*
1295 * Copy the data from userland into an mbuf chain.
1296 */
1297 error = mptcp_uiotombuf(uio, M_WAITOK, space, &top);
1298 if (error != 0) {
1299 socket_lock(mp_so, 0);
1300 goto release;
1301 }
1302 VERIFY(top != NULL);
1303 space -= resid - uio_resid(uio);
1304 resid = uio_resid(uio);
1305 socket_lock(mp_so, 0);
1306
1307 /*
1308 * Compute flags here, for pru_send and NKEs.
1309 */
1310 sendflags = (resid > 0 && space > 0) ?
1311 PRUS_MORETOCOME : 0;
1312
1313 /*
1314 * Socket filter processing
1315 */
1316 VERIFY(control == NULL);
1317 error = sflt_data_out(mp_so, NULL, &top, &control, 0);
1318 if (error != 0) {
1319 if (error == EJUSTRETURN) {
1320 error = 0;
1321 top = NULL;
1322 /* always free control if any */
1323 }
1324 goto release;
1325 }
1326 if (control != NULL) {
1327 m_freem(control);
1328 control = NULL;
1329 }
1330
1331 /*
1332 * Pass data to protocol.
1333 */
1334 error = (*mp_so->so_proto->pr_usrreqs->pru_send)
1335 (mp_so, sendflags, top, NULL, NULL, p);
1336
1337 top = NULL;
1338 if (error != 0) {
1339 goto release;
1340 }
1341 } while (resid != 0 && space > 0);
1342 } while (resid != 0);
1343
1344 release:
1345 if (sblocked) {
1346 sbunlock(&mp_so->so_snd, FALSE); /* will unlock socket */
1347 } else {
1348 socket_unlock(mp_so, 1);
1349 }
1350 out:
1351 if (top != NULL) {
1352 m_freem(top);
1353 }
1354 if (control != NULL) {
1355 m_freem(control);
1356 }
1357
1358 soclearfastopen(mp_so);
1359
1360 return error;
1361 }
1362
1363 /*
1364 * Called to filter SOPT_{SET,GET} for SOL_SOCKET level socket options.
1365 * This routine simply indicates to the caller whether or not to proceed
1366 * further with the given socket option. This is invoked by sosetoptlock()
1367 * and sogetoptlock().
1368 */
1369 static int
mptcp_usr_socheckopt(struct socket * mp_so,struct sockopt * sopt)1370 mptcp_usr_socheckopt(struct socket *mp_so, struct sockopt *sopt)
1371 {
1372 #pragma unused(mp_so)
1373 int error = 0;
1374
1375 VERIFY(sopt->sopt_level == SOL_SOCKET);
1376
1377 /*
1378 * We could check for sopt_dir (set/get) here, but we'll just
1379 * let the caller deal with it as appropriate; therefore the
1380 * following is a superset of the socket options which we
1381 * allow for set/get.
1382 *
1383 * XXX: [email protected]
1384 *
1385 * Need to consider the following cases:
1386 *
1387 * a. Certain socket options don't have a clear definition
1388 * on the expected behavior post connect(2). At the time
1389 * those options are issued on the MP socket, there may
1390 * be existing subflow sockets that are already connected.
1391 */
1392 switch (sopt->sopt_name) {
1393 case SO_LINGER: /* MP */
1394 case SO_LINGER_SEC: /* MP */
1395 case SO_TYPE: /* MP */
1396 case SO_NREAD: /* MP */
1397 case SO_NWRITE: /* MP */
1398 case SO_ERROR: /* MP */
1399 case SO_SNDBUF: /* MP */
1400 case SO_RCVBUF: /* MP */
1401 case SO_SNDLOWAT: /* MP */
1402 case SO_RCVLOWAT: /* MP */
1403 case SO_SNDTIMEO: /* MP */
1404 case SO_RCVTIMEO: /* MP */
1405 case SO_NKE: /* MP */
1406 case SO_NOSIGPIPE: /* MP */
1407 case SO_NOADDRERR: /* MP */
1408 case SO_LABEL: /* MP */
1409 case SO_PEERLABEL: /* MP */
1410 case SO_DEFUNCTIT: /* MP */
1411 case SO_DEFUNCTOK: /* MP */
1412 case SO_ISDEFUNCT: /* MP */
1413 case SO_TRAFFIC_CLASS_DBG: /* MP */
1414 case SO_DELEGATED: /* MP */
1415 case SO_DELEGATED_UUID: /* MP */
1416 #if NECP
1417 case SO_NECP_ATTRIBUTES:
1418 case SO_NECP_CLIENTUUID:
1419 #endif /* NECP */
1420 case SO_MPKL_SEND_INFO:
1421 /*
1422 * Tell the caller that these options are to be processed.
1423 */
1424 break;
1425
1426 case SO_DEBUG: /* MP + subflow */
1427 case SO_KEEPALIVE: /* MP + subflow */
1428 case SO_USELOOPBACK: /* MP + subflow */
1429 case SO_RANDOMPORT: /* MP + subflow */
1430 case SO_TRAFFIC_CLASS: /* MP + subflow */
1431 case SO_RECV_TRAFFIC_CLASS: /* MP + subflow */
1432 case SO_PRIVILEGED_TRAFFIC_CLASS: /* MP + subflow */
1433 case SO_RECV_ANYIF: /* MP + subflow */
1434 case SO_RESTRICTIONS: /* MP + subflow */
1435 case SO_FLUSH: /* MP + subflow */
1436 case SO_NOWAKEFROMSLEEP:
1437 case SO_NOAPNFALLBK:
1438 case SO_MARK_CELLFALLBACK:
1439 case SO_MARK_CELLFALLBACK_UUID:
1440 case SO_MARK_KNOWN_TRACKER:
1441 case SO_MARK_KNOWN_TRACKER_NON_APP_INITIATED:
1442 case SO_MARK_APPROVED_APP_DOMAIN:
1443 case SO_FALLBACK_MODE:
1444 /*
1445 * Tell the caller that these options are to be processed;
1446 * these will also be recorded later by mptcp_setopt().
1447 *
1448 * NOTE: Only support integer option value for now.
1449 */
1450 if (sopt->sopt_valsize != sizeof(int)) {
1451 error = EINVAL;
1452 }
1453 break;
1454
1455 default:
1456 /*
1457 * Tell the caller to stop immediately and return an error.
1458 */
1459 error = ENOPROTOOPT;
1460 break;
1461 }
1462
1463 return error;
1464 }
1465
1466 /*
1467 * Issue SOPT_SET for all MPTCP subflows (for integer option values.)
1468 */
1469 static int
mptcp_setopt_apply(struct mptses * mpte,struct mptopt * mpo)1470 mptcp_setopt_apply(struct mptses *mpte, struct mptopt *mpo)
1471 {
1472 struct socket *mp_so;
1473 struct mptsub *mpts;
1474 struct mptopt smpo;
1475 int error = 0;
1476
1477 /* just bail now if this isn't applicable to subflow sockets */
1478 if (!(mpo->mpo_flags & MPOF_SUBFLOW_OK)) {
1479 error = ENOPROTOOPT;
1480 goto out;
1481 }
1482
1483 /*
1484 * Skip those that are handled internally; these options
1485 * should not have been recorded and marked with the
1486 * MPOF_SUBFLOW_OK by mptcp_setopt(), but just in case.
1487 */
1488 if (mpo->mpo_level == SOL_SOCKET &&
1489 (mpo->mpo_name == SO_NOSIGPIPE || mpo->mpo_name == SO_NOADDRERR)) {
1490 error = ENOPROTOOPT;
1491 goto out;
1492 }
1493
1494 mp_so = mptetoso(mpte);
1495
1496 /*
1497 * Don't bother going further if there's no subflow; mark the option
1498 * with MPOF_INTERIM so that we know whether or not to remove this
1499 * option upon encountering an error while issuing it during subflow
1500 * socket creation.
1501 */
1502 if (mpte->mpte_numflows == 0) {
1503 VERIFY(TAILQ_EMPTY(&mpte->mpte_subflows));
1504 mpo->mpo_flags |= MPOF_INTERIM;
1505 /* return success */
1506 goto out;
1507 }
1508
1509 bzero(&smpo, sizeof(smpo));
1510 smpo.mpo_flags |= MPOF_SUBFLOW_OK;
1511 smpo.mpo_level = mpo->mpo_level;
1512 smpo.mpo_name = mpo->mpo_name;
1513
1514 /* grab exisiting values in case we need to rollback */
1515 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1516 struct socket *so;
1517
1518 mpts->mpts_flags &= ~(MPTSF_SOPT_OLDVAL | MPTSF_SOPT_INPROG);
1519 mpts->mpts_oldintval = 0;
1520 smpo.mpo_intval = 0;
1521 VERIFY(mpts->mpts_socket != NULL);
1522 so = mpts->mpts_socket;
1523 if (mptcp_subflow_sogetopt(mpte, so, &smpo) == 0) {
1524 mpts->mpts_flags |= MPTSF_SOPT_OLDVAL;
1525 mpts->mpts_oldintval = smpo.mpo_intval;
1526 }
1527 }
1528
1529 /* apply socket option */
1530 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1531 struct socket *so;
1532
1533 mpts->mpts_flags |= MPTSF_SOPT_INPROG;
1534 VERIFY(mpts->mpts_socket != NULL);
1535 so = mpts->mpts_socket;
1536 error = mptcp_subflow_sosetopt(mpte, mpts, mpo);
1537 if (error != 0) {
1538 break;
1539 }
1540 }
1541
1542 /* cleanup, and rollback if needed */
1543 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1544 struct socket *so;
1545
1546 if (!(mpts->mpts_flags & MPTSF_SOPT_INPROG)) {
1547 /* clear in case it's set */
1548 mpts->mpts_flags &= ~MPTSF_SOPT_OLDVAL;
1549 mpts->mpts_oldintval = 0;
1550 continue;
1551 }
1552 if (!(mpts->mpts_flags & MPTSF_SOPT_OLDVAL)) {
1553 mpts->mpts_flags &= ~MPTSF_SOPT_INPROG;
1554 VERIFY(mpts->mpts_oldintval == 0);
1555 continue;
1556 }
1557 /* error during sosetopt, so roll it back */
1558 if (error != 0) {
1559 VERIFY(mpts->mpts_socket != NULL);
1560 so = mpts->mpts_socket;
1561 smpo.mpo_intval = mpts->mpts_oldintval;
1562 mptcp_subflow_sosetopt(mpte, mpts, &smpo);
1563 }
1564 mpts->mpts_oldintval = 0;
1565 mpts->mpts_flags &= ~(MPTSF_SOPT_OLDVAL | MPTSF_SOPT_INPROG);
1566 }
1567
1568 out:
1569 return error;
1570 }
1571
1572 /*
1573 * Handle SOPT_SET for socket options issued on MP socket.
1574 */
1575 static int
mptcp_setopt(struct mptses * mpte,struct sockopt * sopt)1576 mptcp_setopt(struct mptses *mpte, struct sockopt *sopt)
1577 {
1578 int error = 0, optval = 0, level, optname, rec = 1;
1579 struct mptopt smpo, *mpo = NULL;
1580 struct socket *mp_so;
1581
1582 level = sopt->sopt_level;
1583 optname = sopt->sopt_name;
1584
1585 mp_so = mptetoso(mpte);
1586
1587 VERIFY(!(mpsotomppcb(mp_so)->mpp_flags & MPP_INSIDE_SETGETOPT));
1588 mpsotomppcb(mp_so)->mpp_flags |= MPP_INSIDE_SETGETOPT;
1589
1590 /*
1591 * Record socket options which are applicable to subflow sockets so
1592 * that we can replay them for new ones; see mptcp_usr_socheckopt()
1593 * for the list of eligible socket-level options.
1594 */
1595 if (level == SOL_SOCKET) {
1596 switch (optname) {
1597 case SO_DEBUG:
1598 case SO_KEEPALIVE:
1599 case SO_USELOOPBACK:
1600 case SO_RANDOMPORT:
1601 case SO_TRAFFIC_CLASS:
1602 case SO_RECV_TRAFFIC_CLASS:
1603 case SO_PRIVILEGED_TRAFFIC_CLASS:
1604 case SO_RECV_ANYIF:
1605 case SO_RESTRICTIONS:
1606 case SO_NOWAKEFROMSLEEP:
1607 case SO_NOAPNFALLBK:
1608 case SO_MARK_CELLFALLBACK:
1609 case SO_MARK_KNOWN_TRACKER:
1610 case SO_MARK_KNOWN_TRACKER_NON_APP_INITIATED:
1611 case SO_MARK_APPROVED_APP_DOMAIN:
1612 case SO_FALLBACK_MODE:
1613 /* record it */
1614 break;
1615 case SO_FLUSH:
1616 /* don't record it */
1617 rec = 0;
1618 break;
1619
1620 /* Next ones, record at MPTCP-level */
1621 case SO_DELEGATED:
1622 error = sooptcopyin(sopt, &mpte->mpte_epid,
1623 sizeof(int), sizeof(int));
1624 if (error != 0) {
1625 goto err_out;
1626 }
1627
1628 goto out;
1629 case SO_DELEGATED_UUID:
1630 error = sooptcopyin(sopt, &mpte->mpte_euuid,
1631 sizeof(uuid_t), sizeof(uuid_t));
1632 if (error != 0) {
1633 goto err_out;
1634 }
1635
1636 goto out;
1637 #if NECP
1638 case SO_NECP_CLIENTUUID:
1639 if (!uuid_is_null(mpsotomppcb(mp_so)->necp_client_uuid)) {
1640 error = EINVAL;
1641 goto err_out;
1642 }
1643
1644 error = sooptcopyin(sopt, &mpsotomppcb(mp_so)->necp_client_uuid,
1645 sizeof(uuid_t), sizeof(uuid_t));
1646 if (error != 0) {
1647 goto err_out;
1648 }
1649
1650 mpsotomppcb(mp_so)->necp_cb = mptcp_session_necp_cb;
1651 error = necp_client_register_multipath_cb(mp_so->last_pid,
1652 mpsotomppcb(mp_so)->necp_client_uuid,
1653 mpsotomppcb(mp_so));
1654 if (error) {
1655 goto err_out;
1656 }
1657
1658 if (uuid_is_null(mpsotomppcb(mp_so)->necp_client_uuid)) {
1659 error = EINVAL;
1660 goto err_out;
1661 }
1662
1663 goto out;
1664 case SO_NECP_ATTRIBUTES:
1665 error = necp_set_socket_attributes(&mpsotomppcb(mp_so)->inp_necp_attributes, sopt);
1666 if (error) {
1667 goto err_out;
1668 }
1669
1670 goto out;
1671 #endif /* NECP */
1672 default:
1673 /* nothing to do; just return */
1674 goto out;
1675 }
1676 } else {
1677 switch (optname) {
1678 case TCP_NODELAY:
1679 case TCP_RXT_FINDROP:
1680 case TCP_KEEPALIVE:
1681 case TCP_KEEPINTVL:
1682 case TCP_KEEPCNT:
1683 case TCP_CONNECTIONTIMEOUT:
1684 case TCP_RXT_CONNDROPTIME:
1685 case PERSIST_TIMEOUT:
1686 case TCP_ADAPTIVE_READ_TIMEOUT:
1687 case TCP_ADAPTIVE_WRITE_TIMEOUT:
1688 case TCP_FASTOPEN_FORCE_ENABLE:
1689 /* eligible; record it */
1690 break;
1691 case TCP_NOTSENT_LOWAT:
1692 /* record at MPTCP level */
1693 error = sooptcopyin(sopt, &optval, sizeof(optval),
1694 sizeof(optval));
1695 if (error) {
1696 goto err_out;
1697 }
1698 if (optval < 0) {
1699 error = EINVAL;
1700 goto err_out;
1701 } else {
1702 if (optval == 0) {
1703 mp_so->so_flags &= ~SOF_NOTSENT_LOWAT;
1704 error = mptcp_set_notsent_lowat(mpte, 0);
1705 } else {
1706 mp_so->so_flags |= SOF_NOTSENT_LOWAT;
1707 error = mptcp_set_notsent_lowat(mpte,
1708 optval);
1709 }
1710
1711 if (error) {
1712 goto err_out;
1713 }
1714 }
1715 goto out;
1716 case MPTCP_SERVICE_TYPE:
1717 /* record at MPTCP level */
1718 error = sooptcopyin(sopt, &optval, sizeof(optval),
1719 sizeof(optval));
1720 if (error) {
1721 goto err_out;
1722 }
1723 if (optval < 0 || optval >= MPTCP_SVCTYPE_MAX) {
1724 error = EINVAL;
1725 goto err_out;
1726 }
1727
1728 if (mptcp_entitlement_check(mp_so, (uint8_t)optval) < 0) {
1729 error = EACCES;
1730 goto err_out;
1731 }
1732
1733 mpte->mpte_svctype = (uint8_t)optval;
1734 mpte->mpte_flags |= MPTE_SVCTYPE_CHECKED;
1735
1736 goto out;
1737 case MPTCP_ALTERNATE_PORT:
1738 /* record at MPTCP level */
1739 error = sooptcopyin(sopt, &optval, sizeof(optval),
1740 sizeof(optval));
1741 if (error) {
1742 goto err_out;
1743 }
1744
1745 if (optval < 0 || optval > UINT16_MAX) {
1746 error = EINVAL;
1747 goto err_out;
1748 }
1749
1750 mpte->mpte_alternate_port = (uint16_t)optval;
1751
1752 goto out;
1753 case MPTCP_FORCE_ENABLE:
1754 /* record at MPTCP level */
1755 error = sooptcopyin(sopt, &optval, sizeof(optval),
1756 sizeof(optval));
1757 if (error) {
1758 goto err_out;
1759 }
1760
1761 if (optval < 0 || optval > 1) {
1762 error = EINVAL;
1763 goto err_out;
1764 }
1765
1766 if (optval) {
1767 mpte->mpte_flags |= MPTE_FORCE_ENABLE;
1768 } else {
1769 mpte->mpte_flags &= ~MPTE_FORCE_ENABLE;
1770 }
1771
1772 goto out;
1773 case MPTCP_FORCE_VERSION:
1774 error = sooptcopyin(sopt, &optval, sizeof(optval),
1775 sizeof(optval));
1776 if (error) {
1777 goto err_out;
1778 }
1779
1780 if (optval != 0 && optval != 1) {
1781 error = EINVAL;
1782 goto err_out;
1783 }
1784
1785 if (optval == 0) {
1786 mpte->mpte_flags |= MPTE_FORCE_V0;
1787 mpte->mpte_flags &= ~MPTE_FORCE_V1;
1788 } else {
1789 mpte->mpte_flags |= MPTE_FORCE_V1;
1790 mpte->mpte_flags &= ~MPTE_FORCE_V0;
1791 }
1792
1793 goto out;
1794 case MPTCP_EXPECTED_PROGRESS_TARGET:
1795 {
1796 struct mptcb *mp_tp = mpte->mpte_mptcb;
1797 uint64_t mach_time_target;
1798 uint64_t nanoseconds;
1799
1800 if (mpte->mpte_svctype != MPTCP_SVCTYPE_TARGET_BASED) {
1801 os_log(mptcp_log_handle, "%s - %lx: Can't set urgent activity when svctype is %u\n",
1802 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpte->mpte_svctype);
1803 error = EINVAL;
1804 goto err_out;
1805 }
1806
1807 error = sooptcopyin(sopt, &mach_time_target, sizeof(mach_time_target), sizeof(mach_time_target));
1808 if (error) {
1809 goto err_out;
1810 }
1811
1812 if (!mptcp_ok_to_create_subflows(mp_tp)) {
1813 os_log(mptcp_log_handle, "%s - %lx: Not ok to create subflows, state %u flags %#x\n",
1814 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mp_tp->mpt_state, mp_tp->mpt_flags);
1815 error = EINVAL;
1816 goto err_out;
1817 }
1818
1819 if (mach_time_target) {
1820 uint64_t time_now = 0;
1821 uint64_t time_now_nanoseconds;
1822
1823 absolutetime_to_nanoseconds(mach_time_target, &nanoseconds);
1824 nanoseconds = nanoseconds - (mptcp_expected_progress_headstart * NSEC_PER_MSEC);
1825
1826 time_now = mach_continuous_time();
1827 absolutetime_to_nanoseconds(time_now, &time_now_nanoseconds);
1828
1829 nanoseconds_to_absolutetime(nanoseconds, &mach_time_target);
1830 /* If the timer is already running and it would
1831 * fire in less than mptcp_expected_progress_headstart
1832 * seconds, then it's not worth canceling it.
1833 */
1834 if (mpte->mpte_time_target &&
1835 mpte->mpte_time_target < time_now &&
1836 time_now_nanoseconds > nanoseconds - (mptcp_expected_progress_headstart * NSEC_PER_MSEC)) {
1837 os_log(mptcp_log_handle, "%s - %lx: Not rescheduling timer %llu now %llu target %llu\n",
1838 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
1839 mpte->mpte_time_target,
1840 time_now,
1841 mach_time_target);
1842 goto out;
1843 }
1844 }
1845
1846 mpte->mpte_time_target = mach_time_target;
1847 mptcp_set_urgency_timer(mpte);
1848
1849 goto out;
1850 }
1851 default:
1852 /* not eligible */
1853 error = ENOPROTOOPT;
1854 goto err_out;
1855 }
1856 }
1857
1858 if ((error = sooptcopyin(sopt, &optval, sizeof(optval),
1859 sizeof(optval))) != 0) {
1860 goto err_out;
1861 }
1862
1863 if (rec) {
1864 /* search for an existing one; if not found, allocate */
1865 if ((mpo = mptcp_sopt_find(mpte, sopt)) == NULL) {
1866 mpo = mptcp_sopt_alloc(Z_WAITOK);
1867 }
1868
1869 if (mpo == NULL) {
1870 error = ENOBUFS;
1871 goto err_out;
1872 } else {
1873 /* initialize or update, as needed */
1874 mpo->mpo_intval = optval;
1875 if (!(mpo->mpo_flags & MPOF_ATTACHED)) {
1876 mpo->mpo_level = level;
1877 mpo->mpo_name = optname;
1878 mptcp_sopt_insert(mpte, mpo);
1879 }
1880 /* this can be issued on the subflow socket */
1881 mpo->mpo_flags |= MPOF_SUBFLOW_OK;
1882 }
1883 } else {
1884 bzero(&smpo, sizeof(smpo));
1885 mpo = &smpo;
1886 mpo->mpo_flags |= MPOF_SUBFLOW_OK;
1887 mpo->mpo_level = level;
1888 mpo->mpo_name = optname;
1889 mpo->mpo_intval = optval;
1890 }
1891
1892 /* issue this socket option on existing subflows */
1893 error = mptcp_setopt_apply(mpte, mpo);
1894 if (error != 0 && (mpo->mpo_flags & MPOF_ATTACHED)) {
1895 VERIFY(mpo != &smpo);
1896 mptcp_sopt_remove(mpte, mpo);
1897 mptcp_sopt_free(mpo);
1898 }
1899 if (mpo == &smpo) {
1900 mpo->mpo_flags &= ~MPOF_INTERIM;
1901 }
1902
1903 if (error) {
1904 goto err_out;
1905 }
1906
1907 out:
1908
1909 mpsotomppcb(mp_so)->mpp_flags &= ~MPP_INSIDE_SETGETOPT;
1910 return 0;
1911
1912 err_out:
1913 os_log_error(mptcp_log_handle, "%s - %lx: sopt %s (%d, %d) val %d can't be issued error %d\n",
1914 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
1915 mptcp_sopt2str(level, optname), level, optname, optval, error);
1916 mpsotomppcb(mp_so)->mpp_flags &= ~MPP_INSIDE_SETGETOPT;
1917 return error;
1918 }
1919
1920 static void
mptcp_fill_info_bytestats(struct tcp_info * ti,struct mptses * mpte)1921 mptcp_fill_info_bytestats(struct tcp_info *ti, struct mptses *mpte)
1922 {
1923 struct mptsub *mpts;
1924 int i;
1925
1926 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1927 const struct inpcb *inp = sotoinpcb(mpts->mpts_socket);
1928
1929 if (inp == NULL) {
1930 continue;
1931 }
1932
1933 ti->tcpi_txbytes += inp->inp_stat->txbytes;
1934 ti->tcpi_rxbytes += inp->inp_stat->rxbytes;
1935 ti->tcpi_cell_txbytes += inp->inp_cstat->txbytes;
1936 ti->tcpi_cell_rxbytes += inp->inp_cstat->rxbytes;
1937 ti->tcpi_wifi_txbytes += inp->inp_wstat->txbytes;
1938 ti->tcpi_wifi_rxbytes += inp->inp_wstat->rxbytes;
1939 ti->tcpi_wired_txbytes += inp->inp_Wstat->txbytes;
1940 ti->tcpi_wired_rxbytes += inp->inp_Wstat->rxbytes;
1941 }
1942
1943 for (i = 0; i < MPTCP_ITFSTATS_SIZE; i++) {
1944 struct mptcp_itf_stats *stats = &mpte->mpte_itfstats[i];
1945
1946 ti->tcpi_txbytes += stats->mpis_txbytes;
1947 ti->tcpi_rxbytes += stats->mpis_rxbytes;
1948
1949 ti->tcpi_wifi_txbytes += stats->mpis_wifi_txbytes;
1950 ti->tcpi_wifi_rxbytes += stats->mpis_wifi_rxbytes;
1951
1952 ti->tcpi_wired_txbytes += stats->mpis_wired_txbytes;
1953 ti->tcpi_wired_rxbytes += stats->mpis_wired_rxbytes;
1954
1955 ti->tcpi_cell_txbytes += stats->mpis_cell_txbytes;
1956 ti->tcpi_cell_rxbytes += stats->mpis_cell_rxbytes;
1957 }
1958 }
1959
1960 static void
mptcp_fill_info(struct mptses * mpte,struct tcp_info * ti)1961 mptcp_fill_info(struct mptses *mpte, struct tcp_info *ti)
1962 {
1963 struct mptsub *actsub = mpte->mpte_active_sub;
1964 struct mptcb *mp_tp = mpte->mpte_mptcb;
1965 struct tcpcb *acttp = NULL;
1966
1967 if (actsub) {
1968 acttp = sototcpcb(actsub->mpts_socket);
1969 }
1970
1971 bzero(ti, sizeof(*ti));
1972
1973 ti->tcpi_state = (uint8_t)mp_tp->mpt_state;
1974 /* tcpi_options */
1975 /* tcpi_snd_wscale */
1976 /* tcpi_rcv_wscale */
1977 /* tcpi_flags */
1978 if (acttp) {
1979 ti->tcpi_rto = acttp->t_timer[TCPT_REXMT] ? acttp->t_rxtcur : 0;
1980 }
1981
1982 /* tcpi_snd_mss */
1983 /* tcpi_rcv_mss */
1984 if (acttp) {
1985 ti->tcpi_rttcur = acttp->t_rttcur;
1986 ti->tcpi_srtt = acttp->t_srtt >> TCP_RTT_SHIFT;
1987 ti->tcpi_rttvar = acttp->t_rttvar >> TCP_RTTVAR_SHIFT;
1988 ti->tcpi_rttbest = acttp->t_rttbest >> TCP_RTT_SHIFT;
1989 ti->tcpi_rcv_srtt = acttp->rcv_srtt >> TCP_RTT_SHIFT;
1990 }
1991 /* tcpi_snd_ssthresh */
1992 /* tcpi_snd_cwnd */
1993 /* tcpi_rcv_space */
1994 ti->tcpi_snd_wnd = mp_tp->mpt_sndwnd;
1995 ti->tcpi_snd_nxt = (uint32_t)mp_tp->mpt_sndnxt;
1996 ti->tcpi_rcv_nxt = (uint32_t)mp_tp->mpt_rcvnxt;
1997 if (acttp) {
1998 ti->tcpi_last_outif = (acttp->t_inpcb->inp_last_outifp == NULL) ? 0 :
1999 acttp->t_inpcb->inp_last_outifp->if_index;
2000 }
2001
2002 mptcp_fill_info_bytestats(ti, mpte);
2003 /* tcpi_txpackets */
2004
2005 /* tcpi_txretransmitbytes */
2006 /* tcpi_txunacked */
2007 /* tcpi_rxpackets */
2008
2009 /* tcpi_rxduplicatebytes */
2010 /* tcpi_rxoutoforderbytes */
2011 /* tcpi_snd_bw */
2012 /* tcpi_synrexmits */
2013 /* tcpi_unused1 */
2014 /* tcpi_unused2 */
2015 /* tcpi_cell_rxpackets */
2016
2017 /* tcpi_cell_txpackets */
2018
2019 /* tcpi_wifi_rxpackets */
2020
2021 /* tcpi_wifi_txpackets */
2022
2023 /* tcpi_wired_rxpackets */
2024 /* tcpi_wired_txpackets */
2025 /* tcpi_connstatus */
2026 /* TFO-stuff */
2027 /* ECN stuff */
2028 /* tcpi_ecn_recv_ce */
2029 /* tcpi_ecn_recv_cwr */
2030 if (acttp) {
2031 ti->tcpi_rcvoopack = acttp->t_rcvoopack;
2032 }
2033 /* tcpi_pawsdrop */
2034 /* tcpi_sack_recovery_episode */
2035 /* tcpi_reordered_pkts */
2036 /* tcpi_dsack_sent */
2037 /* tcpi_dsack_recvd */
2038 /* tcpi_flowhash */
2039 if (acttp) {
2040 ti->tcpi_txretransmitpackets = acttp->t_stat.rxmitpkts;
2041 }
2042 }
2043
2044 /*
2045 * Handle SOPT_GET for socket options issued on MP socket.
2046 */
2047 static int
mptcp_getopt(struct mptses * mpte,struct sockopt * sopt)2048 mptcp_getopt(struct mptses *mpte, struct sockopt *sopt)
2049 {
2050 int error = 0, optval = 0;
2051 struct socket *mp_so;
2052
2053 mp_so = mptetoso(mpte);
2054
2055 VERIFY(!(mpsotomppcb(mp_so)->mpp_flags & MPP_INSIDE_SETGETOPT));
2056 mpsotomppcb(mp_so)->mpp_flags |= MPP_INSIDE_SETGETOPT;
2057
2058 /*
2059 * We only handle SOPT_GET for TCP level socket options; we should
2060 * not get here for socket level options since they are already
2061 * handled at the socket layer.
2062 */
2063 if (sopt->sopt_level != IPPROTO_TCP) {
2064 error = ENOPROTOOPT;
2065 goto out;
2066 }
2067
2068 switch (sopt->sopt_name) {
2069 case PERSIST_TIMEOUT:
2070 /* Only case for which we have a non-zero default */
2071 optval = tcp_max_persist_timeout;
2072 OS_FALLTHROUGH;
2073 case TCP_NODELAY:
2074 case TCP_RXT_FINDROP:
2075 case TCP_KEEPALIVE:
2076 case TCP_KEEPINTVL:
2077 case TCP_KEEPCNT:
2078 case TCP_CONNECTIONTIMEOUT:
2079 case TCP_RXT_CONNDROPTIME:
2080 case TCP_ADAPTIVE_READ_TIMEOUT:
2081 case TCP_ADAPTIVE_WRITE_TIMEOUT:
2082 case TCP_FASTOPEN_FORCE_ENABLE:
2083 {
2084 struct mptopt *mpo = mptcp_sopt_find(mpte, sopt);
2085
2086 if (mpo != NULL) {
2087 optval = mpo->mpo_intval;
2088 }
2089 break;
2090 }
2091
2092 /* The next ones are stored at the MPTCP-level */
2093 case TCP_NOTSENT_LOWAT:
2094 if (mptetoso(mpte)->so_flags & SOF_NOTSENT_LOWAT) {
2095 optval = mptcp_get_notsent_lowat(mpte);
2096 } else {
2097 optval = 0;
2098 }
2099 break;
2100 case TCP_INFO:
2101 {
2102 struct tcp_info ti;
2103
2104 mptcp_fill_info(mpte, &ti);
2105 error = sooptcopyout(sopt, &ti, sizeof(struct tcp_info));
2106
2107 goto out;
2108 }
2109 case MPTCP_SERVICE_TYPE:
2110 optval = mpte->mpte_svctype;
2111 break;
2112 case MPTCP_ALTERNATE_PORT:
2113 optval = mpte->mpte_alternate_port;
2114 break;
2115 case MPTCP_FORCE_ENABLE:
2116 optval = !!(mpte->mpte_flags & MPTE_FORCE_ENABLE);
2117 break;
2118 case MPTCP_FORCE_VERSION:
2119 if (mpte->mpte_flags & MPTE_FORCE_V0) {
2120 optval = 0;
2121 } else if (mpte->mpte_flags & MPTE_FORCE_V1) {
2122 optval = 1;
2123 } else {
2124 optval = -1;
2125 }
2126 break;
2127 case MPTCP_EXPECTED_PROGRESS_TARGET:
2128 error = sooptcopyout(sopt, &mpte->mpte_time_target, sizeof(mpte->mpte_time_target));
2129
2130 goto out;
2131 default:
2132 /* not eligible */
2133 error = ENOPROTOOPT;
2134 break;
2135 }
2136
2137 if (error == 0) {
2138 error = sooptcopyout(sopt, &optval, sizeof(int));
2139 }
2140
2141 out:
2142 mpsotomppcb(mp_so)->mpp_flags &= ~MPP_INSIDE_SETGETOPT;
2143 return error;
2144 }
2145
2146 /*
2147 * MPTCP SOPT_{SET,GET} socket option handler, for options issued on the MP
2148 * socket, at SOL_SOCKET and IPPROTO_TCP levels. The former is restricted
2149 * to those that are allowed by mptcp_usr_socheckopt().
2150 */
2151 int
mptcp_ctloutput(struct socket * mp_so,struct sockopt * sopt)2152 mptcp_ctloutput(struct socket *mp_so, struct sockopt *sopt)
2153 {
2154 struct mppcb *mpp = mpsotomppcb(mp_so);
2155 struct mptses *mpte;
2156 int error = 0;
2157
2158 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
2159 error = EINVAL;
2160 goto out;
2161 }
2162 mpte = mptompte(mpp);
2163 socket_lock_assert_owned(mp_so);
2164
2165 /* we only handle socket and TCP-level socket options for MPTCP */
2166 if (sopt->sopt_level != SOL_SOCKET && sopt->sopt_level != IPPROTO_TCP) {
2167 error = EINVAL;
2168 goto out;
2169 }
2170
2171 switch (sopt->sopt_dir) {
2172 case SOPT_SET:
2173 error = mptcp_setopt(mpte, sopt);
2174 break;
2175
2176 case SOPT_GET:
2177 error = mptcp_getopt(mpte, sopt);
2178 break;
2179 }
2180 out:
2181 return error;
2182 }
2183
2184 const char *
mptcp_sopt2str(int level,int optname)2185 mptcp_sopt2str(int level, int optname)
2186 {
2187 switch (level) {
2188 case SOL_SOCKET:
2189 switch (optname) {
2190 case SO_LINGER:
2191 return "SO_LINGER";
2192 case SO_LINGER_SEC:
2193 return "SO_LINGER_SEC";
2194 case SO_DEBUG:
2195 return "SO_DEBUG";
2196 case SO_KEEPALIVE:
2197 return "SO_KEEPALIVE";
2198 case SO_USELOOPBACK:
2199 return "SO_USELOOPBACK";
2200 case SO_TYPE:
2201 return "SO_TYPE";
2202 case SO_NREAD:
2203 return "SO_NREAD";
2204 case SO_NWRITE:
2205 return "SO_NWRITE";
2206 case SO_ERROR:
2207 return "SO_ERROR";
2208 case SO_SNDBUF:
2209 return "SO_SNDBUF";
2210 case SO_RCVBUF:
2211 return "SO_RCVBUF";
2212 case SO_SNDLOWAT:
2213 return "SO_SNDLOWAT";
2214 case SO_RCVLOWAT:
2215 return "SO_RCVLOWAT";
2216 case SO_SNDTIMEO:
2217 return "SO_SNDTIMEO";
2218 case SO_RCVTIMEO:
2219 return "SO_RCVTIMEO";
2220 case SO_NKE:
2221 return "SO_NKE";
2222 case SO_NOSIGPIPE:
2223 return "SO_NOSIGPIPE";
2224 case SO_NOADDRERR:
2225 return "SO_NOADDRERR";
2226 case SO_RESTRICTIONS:
2227 return "SO_RESTRICTIONS";
2228 case SO_LABEL:
2229 return "SO_LABEL";
2230 case SO_PEERLABEL:
2231 return "SO_PEERLABEL";
2232 case SO_RANDOMPORT:
2233 return "SO_RANDOMPORT";
2234 case SO_TRAFFIC_CLASS:
2235 return "SO_TRAFFIC_CLASS";
2236 case SO_RECV_TRAFFIC_CLASS:
2237 return "SO_RECV_TRAFFIC_CLASS";
2238 case SO_TRAFFIC_CLASS_DBG:
2239 return "SO_TRAFFIC_CLASS_DBG";
2240 case SO_PRIVILEGED_TRAFFIC_CLASS:
2241 return "SO_PRIVILEGED_TRAFFIC_CLASS";
2242 case SO_DEFUNCTIT:
2243 return "SO_DEFUNCTIT";
2244 case SO_DEFUNCTOK:
2245 return "SO_DEFUNCTOK";
2246 case SO_ISDEFUNCT:
2247 return "SO_ISDEFUNCT";
2248 case SO_OPPORTUNISTIC:
2249 return "SO_OPPORTUNISTIC";
2250 case SO_FLUSH:
2251 return "SO_FLUSH";
2252 case SO_RECV_ANYIF:
2253 return "SO_RECV_ANYIF";
2254 case SO_NOWAKEFROMSLEEP:
2255 return "SO_NOWAKEFROMSLEEP";
2256 case SO_NOAPNFALLBK:
2257 return "SO_NOAPNFALLBK";
2258 case SO_MARK_CELLFALLBACK:
2259 return "SO_CELLFALLBACK";
2260 case SO_FALLBACK_MODE:
2261 return "SO_FALLBACK_MODE";
2262 case SO_MARK_KNOWN_TRACKER:
2263 return "SO_MARK_KNOWN_TRACKER";
2264 case SO_MARK_KNOWN_TRACKER_NON_APP_INITIATED:
2265 return "SO_MARK_KNOWN_TRACKER_NON_APP_INITIATED";
2266 case SO_MARK_APPROVED_APP_DOMAIN:
2267 return "SO_MARK_APPROVED_APP_DOMAIN";
2268 case SO_DELEGATED:
2269 return "SO_DELEGATED";
2270 case SO_DELEGATED_UUID:
2271 return "SO_DELEGATED_UUID";
2272 #if NECP
2273 case SO_NECP_ATTRIBUTES:
2274 return "SO_NECP_ATTRIBUTES";
2275 case SO_NECP_CLIENTUUID:
2276 return "SO_NECP_CLIENTUUID";
2277 #endif /* NECP */
2278 }
2279
2280 break;
2281 case IPPROTO_TCP:
2282 switch (optname) {
2283 case TCP_NODELAY:
2284 return "TCP_NODELAY";
2285 case TCP_KEEPALIVE:
2286 return "TCP_KEEPALIVE";
2287 case TCP_KEEPINTVL:
2288 return "TCP_KEEPINTVL";
2289 case TCP_KEEPCNT:
2290 return "TCP_KEEPCNT";
2291 case TCP_CONNECTIONTIMEOUT:
2292 return "TCP_CONNECTIONTIMEOUT";
2293 case TCP_RXT_CONNDROPTIME:
2294 return "TCP_RXT_CONNDROPTIME";
2295 case PERSIST_TIMEOUT:
2296 return "PERSIST_TIMEOUT";
2297 case TCP_NOTSENT_LOWAT:
2298 return "NOTSENT_LOWAT";
2299 case TCP_ADAPTIVE_READ_TIMEOUT:
2300 return "ADAPTIVE_READ_TIMEOUT";
2301 case TCP_ADAPTIVE_WRITE_TIMEOUT:
2302 return "ADAPTIVE_WRITE_TIMEOUT";
2303 case TCP_FASTOPEN_FORCE_ENABLE:
2304 return "TCP_FASTOPEN_FORCE_ENABLE";
2305 case MPTCP_SERVICE_TYPE:
2306 return "MPTCP_SERVICE_TYPE";
2307 case MPTCP_ALTERNATE_PORT:
2308 return "MPTCP_ALTERNATE_PORT";
2309 case MPTCP_FORCE_ENABLE:
2310 return "MPTCP_FORCE_ENABLE";
2311 case MPTCP_FORCE_VERSION:
2312 return "MPTCP_FORCE_VERSION";
2313 case MPTCP_EXPECTED_PROGRESS_TARGET:
2314 return "MPTCP_EXPECTED_PROGRESS_TARGET";
2315 }
2316
2317 break;
2318 }
2319
2320 return "unknown";
2321 }
2322
2323 static int
mptcp_usr_preconnect(struct socket * mp_so)2324 mptcp_usr_preconnect(struct socket *mp_so)
2325 {
2326 struct mptsub *mpts = NULL;
2327 struct mppcb *mpp = mpsotomppcb(mp_so);
2328 struct mptses *mpte;
2329 struct socket *so;
2330 struct tcpcb *tp = NULL;
2331 int error;
2332
2333 mpte = mptompte(mpp);
2334
2335 mpts = mptcp_get_subflow(mpte, NULL);
2336 if (mpts == NULL) {
2337 os_log_error(mptcp_log_handle, "%s - %lx: invalid preconnect ",
2338 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte));
2339 return EINVAL;
2340 }
2341 mpts->mpts_flags &= ~MPTSF_TFO_REQD;
2342 so = mpts->mpts_socket;
2343 tp = intotcpcb(sotoinpcb(so));
2344 tp->t_mpflags &= ~TMPF_TFO_REQUEST;
2345 error = tcp_output(sototcpcb(so));
2346
2347 soclearfastopen(mp_so);
2348
2349 return error;
2350 }
2351