1 /*
2 * Copyright (c) 2012-2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <netinet/in_systm.h>
31 #include <sys/socket.h>
32 #include <sys/socketvar.h>
33 #include <sys/syslog.h>
34 #include <net/route.h>
35 #include <netinet/in.h>
36 #include <net/if.h>
37
38 #include <netinet/ip.h>
39 #include <netinet/ip_var.h>
40 #include <netinet/in_var.h>
41 #include <netinet/tcp.h>
42 #include <netinet/tcp_cache.h>
43 #include <netinet/tcp_seq.h>
44 #include <netinet/tcpip.h>
45 #include <netinet/tcp_fsm.h>
46 #include <netinet/mptcp_var.h>
47 #include <netinet/mptcp.h>
48 #include <netinet/mptcp_opt.h>
49 #include <netinet/mptcp_seq.h>
50
51 #include <libkern/crypto/sha1.h>
52 #include <libkern/crypto/sha2.h>
53 #include <netinet/mptcp_timer.h>
54
55 #include <mach/sdt.h>
56
57 static int mptcp_validate_join_hmac(struct tcpcb *, u_char*, int);
58 static int mptcp_snd_mpprio(struct tcpcb *tp, u_char *cp, int optlen);
59 static void mptcp_send_remaddr_opt(struct tcpcb *, struct mptcp_remaddr_opt *);
60 static int mptcp_echo_add_addr(struct tcpcb *, u_char *, unsigned int);
61
62 /*
63 * MPTCP Options Output Processing
64 */
65
66 static unsigned
mptcp_setup_first_subflow_syn_opts(struct socket * so,u_char * opt,unsigned optlen)67 mptcp_setup_first_subflow_syn_opts(struct socket *so, u_char *opt, unsigned optlen)
68 {
69 struct mptcp_mpcapable_opt_rsp mptcp_opt;
70 struct tcpcb *tp = sototcpcb(so);
71 struct mptcb *mp_tp = tptomptp(tp);
72 struct mptses *mpte = mp_tp->mpt_mpte;
73 int ret;
74
75 uint8_t mmco_len = mp_tp->mpt_version == MPTCP_VERSION_0 ?
76 sizeof(struct mptcp_mpcapable_opt_rsp) :
77 sizeof(struct mptcp_mpcapable_opt_common);
78
79 ret = tcp_heuristic_do_mptcp(tp);
80 if (ret > 0) {
81 os_log_info(mptcp_log_handle, "%s - %lx: Not doing MPTCP due to heuristics",
82 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mp_tp->mpt_mpte));
83 mp_tp->mpt_flags |= MPTCPF_FALLBACK_HEURISTIC;
84 return optlen;
85 }
86
87 /*
88 * Avoid retransmitting the MP_CAPABLE option.
89 */
90 if (ret == 0 &&
91 tp->t_rxtshift > mptcp_mpcap_retries &&
92 !(mpte->mpte_flags & MPTE_FORCE_ENABLE)) {
93 if (!(mp_tp->mpt_flags & (MPTCPF_FALLBACK_HEURISTIC | MPTCPF_HEURISTIC_TRAC))) {
94 mp_tp->mpt_flags |= MPTCPF_HEURISTIC_TRAC;
95 tcp_heuristic_mptcp_loss(tp);
96 tcp_cache_update_mptcp_version(tp, FALSE);
97 }
98 return optlen;
99 }
100
101 bzero(&mptcp_opt, sizeof(struct mptcp_mpcapable_opt_rsp));
102
103 mptcp_opt.mmc_common.mmco_kind = TCPOPT_MULTIPATH;
104 mptcp_opt.mmc_common.mmco_len = mmco_len;
105 mptcp_opt.mmc_common.mmco_subtype = MPO_CAPABLE;
106 mptcp_opt.mmc_common.mmco_version = mp_tp->mpt_version;
107 mptcp_opt.mmc_common.mmco_flags |= MPCAP_PROPOSAL_SBIT;
108 if (mp_tp->mpt_flags & MPTCPF_CHECKSUM) {
109 mptcp_opt.mmc_common.mmco_flags |= MPCAP_CHECKSUM_CBIT;
110 }
111 mptcp_opt.mmc_localkey = mp_tp->mpt_localkey;
112
113 memcpy(opt + optlen, &mptcp_opt, mmco_len);
114 optlen += mmco_len;
115
116 return optlen;
117 }
118
119 static unsigned
mptcp_setup_join_subflow_syn_opts(struct socket * so,u_char * opt,unsigned optlen)120 mptcp_setup_join_subflow_syn_opts(struct socket *so, u_char *opt, unsigned optlen)
121 {
122 struct mptcp_mpjoin_opt_req mpjoin_req;
123 struct inpcb *inp = sotoinpcb(so);
124 struct tcpcb *tp = NULL;
125 struct mptsub *mpts;
126
127 if (!inp) {
128 return optlen;
129 }
130
131 tp = intotcpcb(inp);
132 if (!tp) {
133 return optlen;
134 }
135
136 mpts = tp->t_mpsub;
137
138 bzero(&mpjoin_req, sizeof(mpjoin_req));
139 mpjoin_req.mmjo_kind = TCPOPT_MULTIPATH;
140 mpjoin_req.mmjo_len = sizeof(mpjoin_req);
141 mpjoin_req.mmjo_subtype_bkp = MPO_JOIN << 4;
142
143 if (tp->t_mpflags & TMPF_BACKUP_PATH) {
144 mpjoin_req.mmjo_subtype_bkp |= MPTCP_BACKUP;
145 } else if (inp->inp_boundifp && IFNET_IS_CELLULAR(inp->inp_boundifp) &&
146 mptcp_subflows_need_backup_flag(mpts->mpts_mpte)) {
147 mpjoin_req.mmjo_subtype_bkp |= MPTCP_BACKUP;
148 tp->t_mpflags |= TMPF_BACKUP_PATH;
149 } else {
150 mpts->mpts_flags |= MPTSF_PREFERRED;
151 }
152
153 mpjoin_req.mmjo_addr_id = tp->t_local_aid;
154 mpjoin_req.mmjo_peer_token = tptomptp(tp)->mpt_remotetoken;
155 if (mpjoin_req.mmjo_peer_token == 0) {
156 mptcplog((LOG_DEBUG, "%s: peer token 0", __func__),
157 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
158 }
159 mptcp_get_rands(tp->t_local_aid, tptomptp(tp),
160 &mpjoin_req.mmjo_rand, NULL);
161 memcpy(opt + optlen, &mpjoin_req, mpjoin_req.mmjo_len);
162 optlen += mpjoin_req.mmjo_len;
163
164 return optlen;
165 }
166
167 unsigned
mptcp_setup_join_ack_opts(struct tcpcb * tp,u_char * opt,unsigned optlen)168 mptcp_setup_join_ack_opts(struct tcpcb *tp, u_char *opt, unsigned optlen)
169 {
170 unsigned new_optlen;
171 struct mptcp_mpjoin_opt_rsp2 join_rsp2;
172
173 if ((MAX_TCPOPTLEN - optlen) < sizeof(struct mptcp_mpjoin_opt_rsp2)) {
174 printf("%s: no space left %d \n", __func__, optlen);
175 return optlen;
176 }
177
178 bzero(&join_rsp2, sizeof(struct mptcp_mpjoin_opt_rsp2));
179 join_rsp2.mmjo_kind = TCPOPT_MULTIPATH;
180 join_rsp2.mmjo_len = sizeof(struct mptcp_mpjoin_opt_rsp2);
181 join_rsp2.mmjo_subtype = MPO_JOIN;
182 mptcp_get_mpjoin_hmac(tp->t_local_aid, tptomptp(tp),
183 (u_char*)&join_rsp2.mmjo_mac, HMAC_TRUNCATED_ACK);
184 memcpy(opt + optlen, &join_rsp2, join_rsp2.mmjo_len);
185 new_optlen = optlen + join_rsp2.mmjo_len;
186 return new_optlen;
187 }
188
189 unsigned
mptcp_setup_syn_opts(struct socket * so,u_char * opt,unsigned optlen)190 mptcp_setup_syn_opts(struct socket *so, u_char *opt, unsigned optlen)
191 {
192 unsigned new_optlen;
193
194 if (!(so->so_flags & SOF_MP_SEC_SUBFLOW)) {
195 new_optlen = mptcp_setup_first_subflow_syn_opts(so, opt, optlen);
196 } else {
197 new_optlen = mptcp_setup_join_subflow_syn_opts(so, opt, optlen);
198 }
199
200 return new_optlen;
201 }
202
203 static int
mptcp_send_mpfail(struct tcpcb * tp,u_char * opt,unsigned int optlen)204 mptcp_send_mpfail(struct tcpcb *tp, u_char *opt, unsigned int optlen)
205 {
206 #pragma unused(tp, opt, optlen)
207
208 struct mptcb *mp_tp = NULL;
209 struct mptcp_mpfail_opt fail_opt;
210 uint64_t dsn;
211 uint8_t len = sizeof(struct mptcp_mpfail_opt);
212
213 mp_tp = tptomptp(tp);
214 if (mp_tp == NULL) {
215 tp->t_mpflags &= ~TMPF_SND_MPFAIL;
216 return optlen;
217 }
218
219 /* if option space low give up */
220 if ((MAX_TCPOPTLEN - optlen) < sizeof(struct mptcp_mpfail_opt)) {
221 tp->t_mpflags &= ~TMPF_SND_MPFAIL;
222 return optlen;
223 }
224
225 dsn = mp_tp->mpt_rcvnxt;
226
227 bzero(&fail_opt, sizeof(fail_opt));
228 fail_opt.mfail_kind = TCPOPT_MULTIPATH;
229 fail_opt.mfail_len = len;
230 fail_opt.mfail_subtype = MPO_FAIL;
231 fail_opt.mfail_dsn = mptcp_hton64(dsn);
232 memcpy(opt + optlen, &fail_opt, len);
233 optlen += len;
234 tp->t_mpflags &= ~TMPF_SND_MPFAIL;
235 mptcplog((LOG_DEBUG, "%s: %d \n", __func__,
236 tp->t_local_aid), (MPTCP_SOCKET_DBG | MPTCP_SENDER_DBG),
237 MPTCP_LOGLVL_LOG);
238 return optlen;
239 }
240
241 static int
mptcp_send_infinite_mapping(struct tcpcb * tp,u_char * opt,unsigned int optlen)242 mptcp_send_infinite_mapping(struct tcpcb *tp, u_char *opt, unsigned int optlen)
243 {
244 struct socket *so = tp->t_inpcb->inp_socket;
245 uint8_t len = sizeof(struct mptcp_dsn_opt);
246 struct mptcp_dsn_opt infin_opt;
247 struct mptcb *mp_tp = NULL;
248 uint8_t csum_len = 0;
249
250 if (!so) {
251 return optlen;
252 }
253
254 mp_tp = tptomptp(tp);
255 if (mp_tp == NULL) {
256 return optlen;
257 }
258
259 if (mp_tp->mpt_flags & MPTCPF_CHECKSUM) {
260 csum_len = 2;
261 }
262
263 /* try later */
264 if ((MAX_TCPOPTLEN - optlen) < (len + csum_len)) {
265 return optlen;
266 }
267
268 bzero(&infin_opt, sizeof(infin_opt));
269 infin_opt.mdss_copt.mdss_kind = TCPOPT_MULTIPATH;
270 infin_opt.mdss_copt.mdss_len = len + csum_len;
271 infin_opt.mdss_copt.mdss_subtype = MPO_DSS;
272 infin_opt.mdss_copt.mdss_flags |= MDSS_M;
273 if (mp_tp->mpt_flags & MPTCPF_RECVD_MPFAIL) {
274 infin_opt.mdss_dsn = (u_int32_t)
275 MPTCP_DATASEQ_LOW32(mp_tp->mpt_dsn_at_csum_fail);
276 infin_opt.mdss_subflow_seqn = mp_tp->mpt_ssn_at_csum_fail;
277 } else {
278 /*
279 * If MPTCP fallback happens, but TFO succeeds, the data on the
280 * SYN does not belong to the MPTCP data sequence space.
281 */
282 if ((tp->t_tfo_stats & TFO_S_SYN_DATA_ACKED) &&
283 ((mp_tp->mpt_local_idsn + 1) == mp_tp->mpt_snduna)) {
284 infin_opt.mdss_subflow_seqn = 1;
285
286 mptcplog((LOG_DEBUG, "%s: idsn %llu snduna %llu \n",
287 __func__, mp_tp->mpt_local_idsn,
288 mp_tp->mpt_snduna),
289 (MPTCP_SOCKET_DBG | MPTCP_SENDER_DBG),
290 MPTCP_LOGLVL_LOG);
291 } else {
292 infin_opt.mdss_subflow_seqn = tp->snd_una - tp->t_mpsub->mpts_iss;
293 }
294 infin_opt.mdss_dsn = (u_int32_t)
295 MPTCP_DATASEQ_LOW32(mp_tp->mpt_snduna);
296 }
297
298 if ((infin_opt.mdss_dsn == 0) || (infin_opt.mdss_subflow_seqn == 0)) {
299 return optlen;
300 }
301 infin_opt.mdss_dsn = htonl(infin_opt.mdss_dsn);
302 infin_opt.mdss_subflow_seqn = htonl(infin_opt.mdss_subflow_seqn);
303 infin_opt.mdss_data_len = 0;
304
305 memcpy(opt + optlen, &infin_opt, len);
306 optlen += len;
307 if (csum_len != 0) {
308 /* The checksum field is set to 0 for infinite mapping */
309 uint16_t csum = 0;
310 memcpy(opt + optlen, &csum, csum_len);
311 optlen += csum_len;
312 }
313
314 mptcplog((LOG_DEBUG, "%s: dsn = %x, seq = %x len = %x\n", __func__,
315 ntohl(infin_opt.mdss_dsn),
316 ntohl(infin_opt.mdss_subflow_seqn),
317 ntohs(infin_opt.mdss_data_len)),
318 (MPTCP_SOCKET_DBG | MPTCP_SENDER_DBG),
319 MPTCP_LOGLVL_LOG);
320
321 tp->t_mpflags |= TMPF_INFIN_SENT;
322 tcpstat.tcps_estab_fallback++;
323 return optlen;
324 }
325
326
327 static int
mptcp_ok_to_fin(struct tcpcb * tp,u_int64_t dsn,u_int32_t datalen)328 mptcp_ok_to_fin(struct tcpcb *tp, u_int64_t dsn, u_int32_t datalen)
329 {
330 struct mptcb *mp_tp = tptomptp(tp);
331
332 dsn = (mp_tp->mpt_sndmax & MPTCP_DATASEQ_LOW32_MASK) | dsn;
333 if ((dsn + datalen) == mp_tp->mpt_sndmax) {
334 return 1;
335 }
336
337 return 0;
338 }
339
340 unsigned int
mptcp_setup_opts(struct tcpcb * tp,int32_t off,u_char * opt,unsigned int optlen,int flags,int len,boolean_t * p_mptcp_acknow,boolean_t * do_not_compress)341 mptcp_setup_opts(struct tcpcb *tp, int32_t off, u_char *opt,
342 unsigned int optlen, int flags, int len,
343 boolean_t *p_mptcp_acknow, boolean_t *do_not_compress)
344 {
345 struct inpcb *inp = (struct inpcb *)tp->t_inpcb;
346 struct socket *so = inp->inp_socket;
347 struct mptcb *mp_tp = tptomptp(tp);
348 boolean_t do_csum = FALSE;
349 boolean_t send_64bit_dsn = FALSE;
350 boolean_t send_64bit_ack = FALSE;
351 u_int32_t old_mpt_flags = tp->t_mpflags & TMPF_MPTCP_SIGNALS;
352 boolean_t initial_data = FALSE;
353
354 if (mptcp_enable == 0 || mp_tp == NULL || tp->t_state == TCPS_CLOSED) {
355 /* do nothing */
356 goto ret_optlen;
357 }
358
359 socket_lock_assert_owned(mptetoso(mp_tp->mpt_mpte));
360
361 if (mp_tp->mpt_flags & MPTCPF_CHECKSUM) {
362 do_csum = TRUE;
363 }
364
365 /* tcp_output handles the SYN path separately */
366 if (flags & TH_SYN) {
367 goto ret_optlen;
368 }
369
370 if ((MAX_TCPOPTLEN - optlen) <
371 sizeof(struct mptcp_mpcapable_opt_common)) {
372 mptcplog((LOG_ERR, "%s: no space left %d flags %x tp->t_mpflags %x len %d\n",
373 __func__, optlen, flags, tp->t_mpflags, len),
374 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
375 goto ret_optlen;
376 }
377
378 if (tp->t_mpflags & TMPF_TCP_FALLBACK) {
379 if (tp->t_mpflags & TMPF_SND_MPFAIL) {
380 optlen = mptcp_send_mpfail(tp, opt, optlen);
381 } else if (!(tp->t_mpflags & TMPF_INFIN_SENT)) {
382 optlen = mptcp_send_infinite_mapping(tp, opt, optlen);
383 }
384
385 *do_not_compress = TRUE;
386
387 goto ret_optlen;
388 }
389
390 if (len > 0 && off == 0 && tp->t_mpflags & TMPF_SEND_DSN && tp->t_mpflags & TMPF_SND_KEYS) {
391 uint64_t dsn = 0;
392 uint32_t relseq = 0;
393 uint16_t data_len = 0, dss_csum = 0;
394 mptcp_output_getm_dsnmap64(so, off, &dsn, &relseq, &data_len, &dss_csum);
395 if (dsn == mp_tp->mpt_local_idsn + 1) {
396 initial_data = TRUE;
397 }
398 }
399
400 /* send MP_CAPABLE when it's the INITIAL ACK or data */
401 if (tp->t_mpflags & TMPF_SND_KEYS &&
402 (mp_tp->mpt_version == MPTCP_VERSION_0 || initial_data ||
403 (mp_tp->mpt_sndnxt == mp_tp->mpt_local_idsn + 1 && len == 0))) {
404 struct mptcp_mpcapable_opt_rsp2 mptcp_opt;
405 boolean_t send_data_level_details = tp->t_mpflags & TMPF_SEND_DSN ? TRUE : FALSE;
406
407 uint8_t mmco_len = sizeof(struct mptcp_mpcapable_opt_rsp1);
408 if (send_data_level_details) {
409 mmco_len += 2;
410 if (do_csum) {
411 mmco_len += 2;
412 }
413 }
414 if ((MAX_TCPOPTLEN - optlen) < mmco_len) {
415 os_log_error(mptcp_log_handle, "%s - %lx: not enough space in TCP option, "
416 "optlen: %u, mmco_len: %d\n", __func__,
417 (unsigned long)VM_KERNEL_ADDRPERM(mp_tp->mpt_mpte),
418 optlen, mmco_len);
419 goto ret_optlen;
420 }
421
422 bzero(&mptcp_opt, sizeof(struct mptcp_mpcapable_opt_rsp2));
423 mptcp_opt.mmc_rsp1.mmc_common.mmco_kind = TCPOPT_MULTIPATH;
424 mptcp_opt.mmc_rsp1.mmc_common.mmco_len = mmco_len;
425 mptcp_opt.mmc_rsp1.mmc_common.mmco_subtype = MPO_CAPABLE;
426 mptcp_opt.mmc_rsp1.mmc_common.mmco_version = mp_tp->mpt_version;
427 mptcp_opt.mmc_rsp1.mmc_common.mmco_flags |= MPCAP_PROPOSAL_SBIT;
428 if (do_csum) {
429 mptcp_opt.mmc_rsp1.mmc_common.mmco_flags |= MPCAP_CHECKSUM_CBIT;
430 }
431 mptcp_opt.mmc_rsp1.mmc_localkey = mp_tp->mpt_localkey;
432 mptcp_opt.mmc_rsp1.mmc_remotekey = mp_tp->mpt_remotekey;
433 if (send_data_level_details) {
434 mptcp_output_getm_data_level_details(so, off, &mptcp_opt.data_len, &mptcp_opt.csum);
435 mptcp_opt.data_len = htons(mptcp_opt.data_len);
436 }
437 memcpy(opt + optlen, &mptcp_opt, mmco_len);
438
439 if (mp_tp->mpt_version == MPTCP_VERSION_0) {
440 tp->t_mpflags &= ~TMPF_SND_KEYS;
441 }
442 optlen += mmco_len;
443
444 if (!tp->t_mpuna) {
445 tp->t_mpuna = tp->snd_una;
446 } else {
447 /* its a retransmission of the MP_CAPABLE ACK */
448 }
449
450 *do_not_compress = TRUE;
451
452 goto ret_optlen;
453 }
454
455 if (tp->t_mpflags & TMPF_SND_JACK) {
456 *do_not_compress = TRUE;
457 optlen = mptcp_setup_join_ack_opts(tp, opt, optlen);
458 if (!tp->t_mpuna) {
459 tp->t_mpuna = tp->snd_una;
460 }
461 /* Start a timer to retransmit the ACK */
462 tp->t_timer[TCPT_JACK_RXMT] =
463 OFFSET_FROM_START(tp, tcp_jack_rxmt);
464
465 tp->t_mpflags &= ~TMPF_SND_JACK;
466 goto ret_optlen;
467 }
468
469 if (!(tp->t_mpflags & (TMPF_MPTCP_TRUE | TMPF_PREESTABLISHED))) {
470 goto ret_optlen;
471 }
472 /*
473 * From here on, all options are sent only if MPTCP_TRUE
474 * or when data is sent early on as in Fast Join
475 */
476
477 if ((tp->t_mpflags & TMPF_MPTCP_TRUE) &&
478 (tp->t_mpflags & TMPF_SND_REM_ADDR)) {
479 int rem_opt_len = sizeof(struct mptcp_remaddr_opt);
480 if ((optlen + rem_opt_len) <= MAX_TCPOPTLEN) {
481 mptcp_send_remaddr_opt(tp,
482 (struct mptcp_remaddr_opt *)(opt + optlen));
483 optlen += rem_opt_len;
484 } else {
485 tp->t_mpflags &= ~TMPF_SND_REM_ADDR;
486 }
487
488 *do_not_compress = TRUE;
489 }
490
491 if (tp->t_mpflags & TMPF_MPTCP_ECHO_ADDR) {
492 optlen = mptcp_echo_add_addr(tp, opt, optlen);
493 }
494
495 if (tp->t_mpflags & TMPF_SND_MPPRIO) {
496 optlen = mptcp_snd_mpprio(tp, opt, optlen);
497
498 *do_not_compress = TRUE;
499 }
500
501 if (mp_tp->mpt_flags & MPTCPF_SND_64BITDSN) {
502 send_64bit_dsn = TRUE;
503 }
504 if (mp_tp->mpt_flags & MPTCPF_SND_64BITACK) {
505 send_64bit_ack = TRUE;
506 }
507
508 #define CHECK_OPTLEN { \
509 if ((MAX_TCPOPTLEN - optlen) < dssoptlen) { \
510 os_log_error(mptcp_log_handle, "%s: dssoptlen %d optlen %d \n", __func__, \
511 dssoptlen, optlen); \
512 goto ret_optlen; \
513 } \
514 }
515
516 #define DO_FIN(dsn_opt) { \
517 int sndfin = 0; \
518 sndfin = mptcp_ok_to_fin(tp, dsn_opt.mdss_dsn, len); \
519 if (sndfin) { \
520 dsn_opt.mdss_copt.mdss_flags |= MDSS_F; \
521 dsn_opt.mdss_data_len += 1; \
522 if (do_csum) \
523 dss_csum = in_addword(dss_csum, 1); \
524 } \
525 }
526
527 #define CHECK_DATALEN { \
528 /* MPTCP socket does not support IP options */ \
529 if ((len + optlen + dssoptlen) > tp->t_maxopd) { \
530 os_log_error(mptcp_log_handle, "%s: nosp %d len %d opt %d %d %d\n", \
531 __func__, len, dssoptlen, optlen, \
532 tp->t_maxseg, tp->t_maxopd); \
533 /* remove option length from payload len */ \
534 len = tp->t_maxopd - optlen - dssoptlen; \
535 } \
536 }
537
538 if ((tp->t_mpflags & TMPF_SEND_DSN) &&
539 (send_64bit_dsn)) {
540 /*
541 * If there was the need to send 64-bit Data ACK along
542 * with 64-bit DSN, then 26 or 28 bytes would be used.
543 * With timestamps and NOOP padding that will cause
544 * overflow. Hence, in the rare event that both 64-bit
545 * DSN and 64-bit ACK have to be sent, delay the send of
546 * 64-bit ACK until our 64-bit DSN is acked with a 64-bit ack.
547 * XXX If this delay causes issue, remove the 2-byte padding.
548 */
549 struct mptcp_dss64_ack32_opt dsn_ack_opt;
550 uint8_t dssoptlen = sizeof(dsn_ack_opt);
551 uint16_t dss_csum;
552
553 if (do_csum) {
554 dssoptlen += 2;
555 }
556
557 CHECK_OPTLEN;
558
559 bzero(&dsn_ack_opt, sizeof(dsn_ack_opt));
560 dsn_ack_opt.mdss_copt.mdss_kind = TCPOPT_MULTIPATH;
561 dsn_ack_opt.mdss_copt.mdss_subtype = MPO_DSS;
562 dsn_ack_opt.mdss_copt.mdss_len = dssoptlen;
563 dsn_ack_opt.mdss_copt.mdss_flags |=
564 MDSS_M | MDSS_m | MDSS_A;
565
566 CHECK_DATALEN;
567
568 mptcp_output_getm_dsnmap64(so, off,
569 &dsn_ack_opt.mdss_dsn,
570 &dsn_ack_opt.mdss_subflow_seqn,
571 &dsn_ack_opt.mdss_data_len,
572 &dss_csum);
573
574 if ((dsn_ack_opt.mdss_data_len == 0) ||
575 (dsn_ack_opt.mdss_dsn == 0)) {
576 goto ret_optlen;
577 }
578
579 if (tp->t_mpflags & TMPF_SEND_DFIN) {
580 DO_FIN(dsn_ack_opt);
581 }
582
583 dsn_ack_opt.mdss_ack =
584 htonl(MPTCP_DATAACK_LOW32(mp_tp->mpt_rcvnxt));
585
586 dsn_ack_opt.mdss_dsn = mptcp_hton64(dsn_ack_opt.mdss_dsn);
587 dsn_ack_opt.mdss_subflow_seqn = htonl(
588 dsn_ack_opt.mdss_subflow_seqn);
589 dsn_ack_opt.mdss_data_len = htons(
590 dsn_ack_opt.mdss_data_len);
591
592 memcpy(opt + optlen, &dsn_ack_opt, sizeof(dsn_ack_opt));
593 if (do_csum) {
594 *((uint16_t *)(void *)(opt + optlen + sizeof(dsn_ack_opt))) = dss_csum;
595 }
596
597 optlen += dssoptlen;
598
599 tp->t_mpflags &= ~TMPF_MPTCP_ACKNOW;
600
601 *do_not_compress = TRUE;
602
603 goto ret_optlen;
604 }
605
606 if ((tp->t_mpflags & TMPF_SEND_DSN) &&
607 (!send_64bit_dsn) &&
608 !(tp->t_mpflags & TMPF_MPTCP_ACKNOW)) {
609 struct mptcp_dsn_opt dsn_opt;
610 uint8_t dssoptlen = sizeof(struct mptcp_dsn_opt);
611 uint16_t dss_csum;
612
613 if (do_csum) {
614 dssoptlen += 2;
615 }
616
617 CHECK_OPTLEN;
618
619 bzero(&dsn_opt, sizeof(dsn_opt));
620 dsn_opt.mdss_copt.mdss_kind = TCPOPT_MULTIPATH;
621 dsn_opt.mdss_copt.mdss_subtype = MPO_DSS;
622 dsn_opt.mdss_copt.mdss_len = dssoptlen;
623 dsn_opt.mdss_copt.mdss_flags |= MDSS_M;
624
625 CHECK_DATALEN;
626
627 mptcp_output_getm_dsnmap32(so, off, &dsn_opt.mdss_dsn,
628 &dsn_opt.mdss_subflow_seqn,
629 &dsn_opt.mdss_data_len,
630 &dss_csum);
631
632 if ((dsn_opt.mdss_data_len == 0) ||
633 (dsn_opt.mdss_dsn == 0)) {
634 goto ret_optlen;
635 }
636
637 if (tp->t_mpflags & TMPF_SEND_DFIN) {
638 DO_FIN(dsn_opt);
639 }
640
641 dsn_opt.mdss_dsn = htonl(dsn_opt.mdss_dsn);
642 dsn_opt.mdss_subflow_seqn = htonl(dsn_opt.mdss_subflow_seqn);
643 dsn_opt.mdss_data_len = htons(dsn_opt.mdss_data_len);
644 memcpy(opt + optlen, &dsn_opt, sizeof(dsn_opt));
645 if (do_csum) {
646 *((uint16_t *)(void *)(opt + optlen + sizeof(dsn_opt))) = dss_csum;
647 }
648
649 optlen += dssoptlen;
650 tp->t_mpflags &= ~TMPF_MPTCP_ACKNOW;
651
652 *do_not_compress = TRUE;
653
654 goto ret_optlen;
655 }
656
657 /* 32-bit Data ACK option */
658 if ((tp->t_mpflags & TMPF_MPTCP_ACKNOW) &&
659 (!send_64bit_ack) &&
660 !(tp->t_mpflags & TMPF_SEND_DSN) &&
661 !(tp->t_mpflags & TMPF_SEND_DFIN)) {
662 struct mptcp_data_ack_opt dack_opt;
663 uint8_t dssoptlen = 0;
664 do_ack32_only:
665 dssoptlen = sizeof(dack_opt);
666
667 CHECK_OPTLEN;
668
669 bzero(&dack_opt, dssoptlen);
670 dack_opt.mdss_copt.mdss_kind = TCPOPT_MULTIPATH;
671 dack_opt.mdss_copt.mdss_len = dssoptlen;
672 dack_opt.mdss_copt.mdss_subtype = MPO_DSS;
673 dack_opt.mdss_copt.mdss_flags |= MDSS_A;
674 dack_opt.mdss_ack =
675 htonl(MPTCP_DATAACK_LOW32(mp_tp->mpt_rcvnxt));
676 memcpy(opt + optlen, &dack_opt, dssoptlen);
677 optlen += dssoptlen;
678 VERIFY(optlen <= MAX_TCPOPTLEN);
679 tp->t_mpflags &= ~TMPF_MPTCP_ACKNOW;
680 goto ret_optlen;
681 }
682
683 /* 64-bit Data ACK option */
684 if ((tp->t_mpflags & TMPF_MPTCP_ACKNOW) &&
685 (send_64bit_ack) &&
686 !(tp->t_mpflags & TMPF_SEND_DSN) &&
687 !(tp->t_mpflags & TMPF_SEND_DFIN)) {
688 struct mptcp_data_ack64_opt dack_opt;
689 uint8_t dssoptlen = 0;
690 do_ack64_only:
691 dssoptlen = sizeof(dack_opt);
692
693 CHECK_OPTLEN;
694
695 bzero(&dack_opt, dssoptlen);
696 dack_opt.mdss_copt.mdss_kind = TCPOPT_MULTIPATH;
697 dack_opt.mdss_copt.mdss_len = dssoptlen;
698 dack_opt.mdss_copt.mdss_subtype = MPO_DSS;
699 dack_opt.mdss_copt.mdss_flags |= (MDSS_A | MDSS_a);
700 dack_opt.mdss_ack = mptcp_hton64(mp_tp->mpt_rcvnxt);
701 /*
702 * The other end should retransmit 64-bit DSN until it
703 * receives a 64-bit ACK.
704 */
705 mp_tp->mpt_flags &= ~MPTCPF_SND_64BITACK;
706 memcpy(opt + optlen, &dack_opt, dssoptlen);
707 optlen += dssoptlen;
708 VERIFY(optlen <= MAX_TCPOPTLEN);
709 tp->t_mpflags &= ~TMPF_MPTCP_ACKNOW;
710 goto ret_optlen;
711 }
712
713 /* 32-bit DSS+Data ACK option */
714 if ((tp->t_mpflags & TMPF_SEND_DSN) &&
715 (!send_64bit_dsn) &&
716 (!send_64bit_ack) &&
717 (tp->t_mpflags & TMPF_MPTCP_ACKNOW)) {
718 struct mptcp_dss_ack_opt dss_ack_opt;
719 uint8_t dssoptlen = sizeof(dss_ack_opt);
720 uint16_t dss_csum;
721
722 if (do_csum) {
723 dssoptlen += 2;
724 }
725
726 CHECK_OPTLEN;
727
728 bzero(&dss_ack_opt, sizeof(dss_ack_opt));
729 dss_ack_opt.mdss_copt.mdss_kind = TCPOPT_MULTIPATH;
730 dss_ack_opt.mdss_copt.mdss_len = dssoptlen;
731 dss_ack_opt.mdss_copt.mdss_subtype = MPO_DSS;
732 dss_ack_opt.mdss_copt.mdss_flags |= MDSS_A | MDSS_M;
733 dss_ack_opt.mdss_ack =
734 htonl(MPTCP_DATAACK_LOW32(mp_tp->mpt_rcvnxt));
735
736 CHECK_DATALEN;
737
738 mptcp_output_getm_dsnmap32(so, off, &dss_ack_opt.mdss_dsn,
739 &dss_ack_opt.mdss_subflow_seqn,
740 &dss_ack_opt.mdss_data_len,
741 &dss_csum);
742
743 if ((dss_ack_opt.mdss_data_len == 0) ||
744 (dss_ack_opt.mdss_dsn == 0)) {
745 goto do_ack32_only;
746 }
747
748 if (tp->t_mpflags & TMPF_SEND_DFIN) {
749 DO_FIN(dss_ack_opt);
750 }
751
752 dss_ack_opt.mdss_dsn = htonl(dss_ack_opt.mdss_dsn);
753 dss_ack_opt.mdss_subflow_seqn =
754 htonl(dss_ack_opt.mdss_subflow_seqn);
755 dss_ack_opt.mdss_data_len = htons(dss_ack_opt.mdss_data_len);
756 memcpy(opt + optlen, &dss_ack_opt, sizeof(dss_ack_opt));
757 if (do_csum) {
758 *((uint16_t *)(void *)(opt + optlen + sizeof(dss_ack_opt))) = dss_csum;
759 }
760
761 optlen += dssoptlen;
762
763 if (optlen > MAX_TCPOPTLEN) {
764 panic("optlen too large");
765 }
766 tp->t_mpflags &= ~TMPF_MPTCP_ACKNOW;
767 goto ret_optlen;
768 }
769
770 /* 32-bit DSS + 64-bit DACK option */
771 if ((tp->t_mpflags & TMPF_SEND_DSN) &&
772 (!send_64bit_dsn) &&
773 (send_64bit_ack) &&
774 (tp->t_mpflags & TMPF_MPTCP_ACKNOW)) {
775 struct mptcp_dss32_ack64_opt dss_ack_opt;
776 uint8_t dssoptlen = sizeof(dss_ack_opt);
777 uint16_t dss_csum;
778
779 if (do_csum) {
780 dssoptlen += 2;
781 }
782
783 CHECK_OPTLEN;
784
785 bzero(&dss_ack_opt, sizeof(dss_ack_opt));
786 dss_ack_opt.mdss_copt.mdss_kind = TCPOPT_MULTIPATH;
787 dss_ack_opt.mdss_copt.mdss_len = dssoptlen;
788 dss_ack_opt.mdss_copt.mdss_subtype = MPO_DSS;
789 dss_ack_opt.mdss_copt.mdss_flags |= MDSS_M | MDSS_A | MDSS_a;
790 dss_ack_opt.mdss_ack =
791 mptcp_hton64(mp_tp->mpt_rcvnxt);
792
793 CHECK_DATALEN;
794
795 mptcp_output_getm_dsnmap32(so, off, &dss_ack_opt.mdss_dsn,
796 &dss_ack_opt.mdss_subflow_seqn,
797 &dss_ack_opt.mdss_data_len,
798 &dss_csum);
799
800 if ((dss_ack_opt.mdss_data_len == 0) ||
801 (dss_ack_opt.mdss_dsn == 0)) {
802 goto do_ack64_only;
803 }
804
805 if (tp->t_mpflags & TMPF_SEND_DFIN) {
806 DO_FIN(dss_ack_opt);
807 }
808
809 dss_ack_opt.mdss_dsn = htonl(dss_ack_opt.mdss_dsn);
810 dss_ack_opt.mdss_subflow_seqn =
811 htonl(dss_ack_opt.mdss_subflow_seqn);
812 dss_ack_opt.mdss_data_len = htons(dss_ack_opt.mdss_data_len);
813 memcpy(opt + optlen, &dss_ack_opt, sizeof(dss_ack_opt));
814 if (do_csum) {
815 *((uint16_t *)(void *)(opt + optlen + sizeof(dss_ack_opt))) = dss_csum;
816 }
817
818 optlen += dssoptlen;
819
820 if (optlen > MAX_TCPOPTLEN) {
821 panic("optlen too large");
822 }
823 tp->t_mpflags &= ~TMPF_MPTCP_ACKNOW;
824
825 *do_not_compress = TRUE;
826
827 goto ret_optlen;
828 }
829
830 if (tp->t_mpflags & TMPF_SEND_DFIN) {
831 uint8_t dssoptlen = sizeof(struct mptcp_dss_ack_opt);
832 struct mptcp_dss_ack_opt dss_ack_opt;
833 uint16_t dss_csum;
834
835 if (do_csum) {
836 uint64_t dss_val = mptcp_hton64(mp_tp->mpt_sndmax - 1);
837 uint16_t dlen = htons(1);
838 uint32_t sseq = 0;
839 uint32_t sum;
840
841
842 dssoptlen += 2;
843
844 sum = in_pseudo64(dss_val, sseq, dlen);
845 ADDCARRY(sum);
846 dss_csum = ~sum & 0xffff;
847 }
848
849 CHECK_OPTLEN;
850
851 bzero(&dss_ack_opt, sizeof(dss_ack_opt));
852
853 /*
854 * Data FIN occupies one sequence space.
855 * Don't send it if it has been Acked.
856 */
857 if ((mp_tp->mpt_sndnxt + 1 != mp_tp->mpt_sndmax) ||
858 (mp_tp->mpt_snduna == mp_tp->mpt_sndmax)) {
859 goto ret_optlen;
860 }
861
862 dss_ack_opt.mdss_copt.mdss_kind = TCPOPT_MULTIPATH;
863 dss_ack_opt.mdss_copt.mdss_len = dssoptlen;
864 dss_ack_opt.mdss_copt.mdss_subtype = MPO_DSS;
865 dss_ack_opt.mdss_copt.mdss_flags |= MDSS_A | MDSS_M | MDSS_F;
866 dss_ack_opt.mdss_ack =
867 htonl(MPTCP_DATAACK_LOW32(mp_tp->mpt_rcvnxt));
868 dss_ack_opt.mdss_dsn =
869 htonl(MPTCP_DATASEQ_LOW32(mp_tp->mpt_sndmax - 1));
870 dss_ack_opt.mdss_subflow_seqn = 0;
871 dss_ack_opt.mdss_data_len = 1;
872 dss_ack_opt.mdss_data_len = htons(dss_ack_opt.mdss_data_len);
873 memcpy(opt + optlen, &dss_ack_opt, sizeof(dss_ack_opt));
874 if (do_csum) {
875 *((uint16_t *)(void *)(opt + optlen + sizeof(dss_ack_opt))) = dss_csum;
876 }
877
878 optlen += dssoptlen;
879
880 *do_not_compress = TRUE;
881 }
882
883 ret_optlen:
884 if (TRUE == *p_mptcp_acknow) {
885 u_int32_t new_mpt_flags = tp->t_mpflags & TMPF_MPTCP_SIGNALS;
886
887 /*
888 * If none of the above mpflags were acted on by
889 * this routine, reset these flags and set p_mptcp_acknow
890 * to false.
891 *
892 * XXX The reset value of p_mptcp_acknow can be used
893 * to communicate tcp_output to NOT send a pure ack without any
894 * MPTCP options as it will be treated as a dup ack.
895 * Since the instances of mptcp_setup_opts not acting on
896 * these options are mostly corner cases and sending a dup
897 * ack here would only have an impact if the system
898 * has sent consecutive dup acks before this false one,
899 * we haven't modified the logic in tcp_output to avoid
900 * that.
901 */
902 if (old_mpt_flags == new_mpt_flags) {
903 tp->t_mpflags &= ~TMPF_MPTCP_SIGNALS;
904 *p_mptcp_acknow = FALSE;
905 mptcplog((LOG_DEBUG, "%s: no action \n", __func__),
906 MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG);
907 } else {
908 mptcplog((LOG_DEBUG, "%s: acknow set, old flags %x new flags %x \n",
909 __func__, old_mpt_flags, new_mpt_flags),
910 MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG);
911 }
912 }
913
914 return optlen;
915 }
916
917 /*
918 * MPTCP Options Input Processing
919 */
920
921 static int
mptcp_sanitize_option(struct tcpcb * tp,int mptcp_subtype)922 mptcp_sanitize_option(struct tcpcb *tp, int mptcp_subtype)
923 {
924 struct mptcb *mp_tp = tptomptp(tp);
925 int ret = 1;
926
927 switch (mptcp_subtype) {
928 case MPO_CAPABLE:
929 break;
930 case MPO_JOIN: /* fall through */
931 case MPO_DSS: /* fall through */
932 case MPO_FASTCLOSE: /* fall through */
933 case MPO_FAIL: /* fall through */
934 case MPO_REMOVE_ADDR: /* fall through */
935 case MPO_ADD_ADDR: /* fall through */
936 case MPO_PRIO: /* fall through */
937 if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) {
938 ret = 0;
939 }
940 break;
941 default:
942 ret = 0;
943 os_log_error(mptcp_log_handle, "%s - %lx: type = %d \n", __func__,
944 (unsigned long)VM_KERNEL_ADDRPERM(mp_tp->mpt_mpte), mptcp_subtype);
945 break;
946 }
947 return ret;
948 }
949
950 static int
mptcp_valid_mpcapable_common_opt(u_char * cp)951 mptcp_valid_mpcapable_common_opt(u_char *cp)
952 {
953 struct mptcp_mpcapable_opt_common *rsp =
954 (struct mptcp_mpcapable_opt_common *)cp;
955
956 /* mmco_kind, mmco_len and mmco_subtype are validated before */
957
958 if (!(rsp->mmco_flags & MPCAP_PROPOSAL_SBIT)) {
959 return 0;
960 }
961
962 if (rsp->mmco_flags & (MPCAP_BBIT | MPCAP_DBIT |
963 MPCAP_EBIT | MPCAP_FBIT | MPCAP_GBIT)) {
964 return 0;
965 }
966
967 return 1;
968 }
969
970
971 static void
mptcp_do_mpcapable_opt(struct tcpcb * tp,u_char * cp,struct tcphdr * th,uint8_t optlen)972 mptcp_do_mpcapable_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th,
973 uint8_t optlen)
974 {
975 struct mptcp_mpcapable_opt_rsp *rsp = NULL;
976 struct mptcb *mp_tp = tptomptp(tp);
977 struct mptses *mpte = mp_tp->mpt_mpte;
978
979 /* Only valid on SYN/ACK */
980 if ((th->th_flags & (TH_SYN | TH_ACK)) != (TH_SYN | TH_ACK)) {
981 return;
982 }
983
984 /* Validate the kind, len, flags */
985 if (mptcp_valid_mpcapable_common_opt(cp) != 1) {
986 tcpstat.tcps_invalid_mpcap++;
987 return;
988 }
989
990 /* handle SYN/ACK retransmission by acknowledging with ACK */
991 if (mp_tp->mpt_state >= MPTCPS_ESTABLISHED) {
992 return;
993 }
994
995 /* A SYN/ACK contains peer's key and flags */
996 if (optlen != sizeof(struct mptcp_mpcapable_opt_rsp)) {
997 /* complain */
998 os_log_error(mptcp_log_handle, "%s - %lx: SYN_ACK optlen = %u, sizeof mp opt = %lu \n",
999 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), optlen,
1000 sizeof(struct mptcp_mpcapable_opt_rsp));
1001 tcpstat.tcps_invalid_mpcap++;
1002 return;
1003 }
1004
1005 /*
1006 * If checksum flag is set, enable MPTCP checksum, even if
1007 * it was not negotiated on the first SYN.
1008 */
1009 if (((struct mptcp_mpcapable_opt_common *)cp)->mmco_flags &
1010 MPCAP_CHECKSUM_CBIT) {
1011 mp_tp->mpt_flags |= MPTCPF_CHECKSUM;
1012 }
1013
1014 if (((struct mptcp_mpcapable_opt_common *)cp)->mmco_flags &
1015 MPCAP_UNICAST_IPBIT) {
1016 mpte->mpte_flags |= MPTE_UNICAST_IP;
1017
1018 /* We need an explicit signal for the addresses - zero the existing ones */
1019 memset(&mpte->mpte_sub_dst_v4, 0, sizeof(mpte->mpte_sub_dst_v4));
1020 memset(&mpte->mpte_sub_dst_v6, 0, sizeof(mpte->mpte_sub_dst_v6));
1021 }
1022
1023 rsp = (struct mptcp_mpcapable_opt_rsp *)cp;
1024 mp_tp->mpt_remotekey = rsp->mmc_localkey;
1025 /* For now just downgrade to the peer's version */
1026 mp_tp->mpt_peer_version = rsp->mmc_common.mmco_version;
1027 if (rsp->mmc_common.mmco_version < mp_tp->mpt_version) {
1028 os_log_error(mptcp_log_handle, "local version: %d > peer version %d", mp_tp->mpt_version, rsp->mmc_common.mmco_version);
1029 mp_tp->mpt_version = rsp->mmc_common.mmco_version;
1030 tcpstat.tcps_mp_verdowngrade++;
1031 return;
1032 }
1033 if (mptcp_init_remote_parms(mp_tp) != 0) {
1034 tcpstat.tcps_invalid_mpcap++;
1035 return;
1036 }
1037 tcp_heuristic_mptcp_success(tp);
1038 tcp_cache_update_mptcp_version(tp, TRUE);
1039 tp->t_mpflags |= (TMPF_SND_KEYS | TMPF_MPTCP_TRUE);
1040 }
1041
1042
1043 static void
mptcp_do_mpjoin_opt(struct tcpcb * tp,u_char * cp,struct tcphdr * th,uint8_t optlen)1044 mptcp_do_mpjoin_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th, uint8_t optlen)
1045 {
1046 #define MPTCP_JOPT_ERROR_PATH(tp) { \
1047 tcpstat.tcps_invalid_joins++; \
1048 if (tp->t_inpcb->inp_socket != NULL) { \
1049 soevent(tp->t_inpcb->inp_socket, \
1050 SO_FILT_HINT_LOCKED | SO_FILT_HINT_MUSTRST); \
1051 } \
1052 }
1053 int error = 0;
1054 struct mptcp_mpjoin_opt_rsp *join_rsp =
1055 (struct mptcp_mpjoin_opt_rsp *)cp;
1056
1057 /* Only valid on SYN/ACK */
1058 if ((th->th_flags & (TH_SYN | TH_ACK)) != (TH_SYN | TH_ACK)) {
1059 return;
1060 }
1061
1062 if (optlen != sizeof(struct mptcp_mpjoin_opt_rsp)) {
1063 os_log_error(mptcp_log_handle, "%s - %lx: SYN_ACK: unexpected optlen = %u mp option = %lu\n",
1064 __func__, (unsigned long)VM_KERNEL_ADDRPERM(tptomptp(tp)->mpt_mpte),
1065 optlen, sizeof(struct mptcp_mpjoin_opt_rsp));
1066 tp->t_mpflags &= ~TMPF_PREESTABLISHED;
1067 /* send RST and close */
1068 MPTCP_JOPT_ERROR_PATH(tp);
1069 return;
1070 }
1071
1072 mptcp_set_raddr_rand(tp->t_local_aid, tptomptp(tp),
1073 join_rsp->mmjo_addr_id, join_rsp->mmjo_rand);
1074 error = mptcp_validate_join_hmac(tp,
1075 (u_char*)&join_rsp->mmjo_mac, HMAC_TRUNCATED_SYNACK);
1076 if (error) {
1077 os_log_error(mptcp_log_handle, "%s - %lx: SYN_ACK error = %d \n",
1078 __func__, (unsigned long)VM_KERNEL_ADDRPERM(tptomptp(tp)->mpt_mpte),
1079 error);
1080 tp->t_mpflags &= ~TMPF_PREESTABLISHED;
1081 /* send RST and close */
1082 MPTCP_JOPT_ERROR_PATH(tp);
1083 return;
1084 }
1085 tp->t_mpflags |= (TMPF_SENT_JOIN | TMPF_SND_JACK);
1086 }
1087
1088 static int
mptcp_validate_join_hmac(struct tcpcb * tp,u_char * hmac,int mac_len)1089 mptcp_validate_join_hmac(struct tcpcb *tp, u_char* hmac, int mac_len)
1090 {
1091 u_char digest[MAX(SHA1_RESULTLEN, SHA256_DIGEST_LENGTH)] = {0};
1092 struct mptcb *mp_tp = tptomptp(tp);
1093 u_int32_t rem_rand, loc_rand;
1094
1095 rem_rand = loc_rand = 0;
1096
1097 mptcp_get_rands(tp->t_local_aid, mp_tp, &loc_rand, &rem_rand);
1098 if ((rem_rand == 0) || (loc_rand == 0)) {
1099 return -1;
1100 }
1101
1102 if (mp_tp->mpt_version == MPTCP_VERSION_0) {
1103 mptcp_hmac_sha1(mp_tp->mpt_remotekey, mp_tp->mpt_localkey, rem_rand, loc_rand,
1104 digest);
1105 } else {
1106 uint32_t data[2];
1107 data[0] = rem_rand;
1108 data[1] = loc_rand;
1109 mptcp_hmac_sha256(mp_tp->mpt_remotekey, mp_tp->mpt_localkey, (u_char *)data, 8, digest);
1110 }
1111
1112 if (bcmp(digest, hmac, mac_len) == 0) {
1113 return 0; /* matches */
1114 } else {
1115 printf("%s: remote key %llx local key %llx remote rand %x "
1116 "local rand %x \n", __func__, mp_tp->mpt_remotekey, mp_tp->mpt_localkey,
1117 rem_rand, loc_rand);
1118 return -1;
1119 }
1120 }
1121
1122 /*
1123 * Update the mptcb send state variables, but the actual sbdrop occurs
1124 * in MPTCP layer
1125 */
1126 void
mptcp_data_ack_rcvd(struct mptcb * mp_tp,struct tcpcb * tp,u_int64_t full_dack)1127 mptcp_data_ack_rcvd(struct mptcb *mp_tp, struct tcpcb *tp, u_int64_t full_dack)
1128 {
1129 uint64_t acked = full_dack - mp_tp->mpt_snduna;
1130
1131 VERIFY(acked <= INT_MAX);
1132
1133 if (acked) {
1134 struct socket *mp_so = mptetoso(mp_tp->mpt_mpte);
1135
1136 if (acked > mp_so->so_snd.sb_cc) {
1137 if (acked > mp_so->so_snd.sb_cc + 1 ||
1138 mp_tp->mpt_state < MPTCPS_FIN_WAIT_1) {
1139 os_log_error(mptcp_log_handle, "%s - %lx: acked %u, sb_cc %u full %u suna %u state %u\n",
1140 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mp_tp->mpt_mpte),
1141 (uint32_t)acked, mp_so->so_snd.sb_cc,
1142 (uint32_t)full_dack, (uint32_t)mp_tp->mpt_snduna,
1143 mp_tp->mpt_state);
1144 }
1145
1146 sbdrop(&mp_so->so_snd, (int)mp_so->so_snd.sb_cc);
1147 } else {
1148 sbdrop(&mp_so->so_snd, (int)acked);
1149 }
1150
1151 mp_tp->mpt_snduna += acked;
1152 /* In degraded mode, we may get some Data ACKs */
1153 if ((tp->t_mpflags & TMPF_TCP_FALLBACK) &&
1154 !(mp_tp->mpt_flags & MPTCPF_POST_FALLBACK_SYNC) &&
1155 MPTCP_SEQ_GT(mp_tp->mpt_sndnxt, mp_tp->mpt_snduna)) {
1156 /* bring back sndnxt to retransmit MPTCP data */
1157 mp_tp->mpt_sndnxt = mp_tp->mpt_dsn_at_csum_fail;
1158 mp_tp->mpt_flags |= MPTCPF_POST_FALLBACK_SYNC;
1159 tp->t_inpcb->inp_socket->so_flags1 |=
1160 SOF1_POST_FALLBACK_SYNC;
1161 }
1162
1163 mptcp_clean_reinjectq(mp_tp->mpt_mpte);
1164
1165 sowwakeup(mp_so);
1166 }
1167 if (full_dack == mp_tp->mpt_sndmax &&
1168 mp_tp->mpt_state >= MPTCPS_FIN_WAIT_1) {
1169 mptcp_close_fsm(mp_tp, MPCE_RECV_DATA_ACK);
1170 tp->t_mpflags &= ~TMPF_SEND_DFIN;
1171 }
1172
1173 if ((tp->t_mpflags & TMPF_SND_KEYS) &&
1174 MPTCP_SEQ_GT(mp_tp->mpt_snduna, mp_tp->mpt_local_idsn + 1)) {
1175 tp->t_mpflags &= ~TMPF_SND_KEYS;
1176 }
1177 }
1178
1179 void
mptcp_update_window_wakeup(struct tcpcb * tp)1180 mptcp_update_window_wakeup(struct tcpcb *tp)
1181 {
1182 struct mptcb *mp_tp = tptomptp(tp);
1183
1184 socket_lock_assert_owned(mptetoso(mp_tp->mpt_mpte));
1185
1186 if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) {
1187 mp_tp->mpt_sndwnd = tp->snd_wnd;
1188 mp_tp->mpt_sndwl1 = mp_tp->mpt_rcvnxt;
1189 mp_tp->mpt_sndwl2 = mp_tp->mpt_snduna;
1190 }
1191
1192 sowwakeup(tp->t_inpcb->inp_socket);
1193 }
1194
1195 static void
mptcp_update_window(struct mptcb * mp_tp,u_int64_t ack,u_int64_t seq,u_int32_t tiwin)1196 mptcp_update_window(struct mptcb *mp_tp, u_int64_t ack, u_int64_t seq, u_int32_t tiwin)
1197 {
1198 if (MPTCP_SEQ_LT(mp_tp->mpt_sndwl1, seq) ||
1199 (mp_tp->mpt_sndwl1 == seq &&
1200 (MPTCP_SEQ_LT(mp_tp->mpt_sndwl2, ack) ||
1201 (mp_tp->mpt_sndwl2 == ack && tiwin > mp_tp->mpt_sndwnd)))) {
1202 mp_tp->mpt_sndwnd = tiwin;
1203 mp_tp->mpt_sndwl1 = seq;
1204 mp_tp->mpt_sndwl2 = ack;
1205 }
1206 }
1207
1208 static void
mptcp_do_dss_opt_ack_meat(u_int64_t full_dack,u_int64_t full_dsn,struct tcpcb * tp,u_int32_t tiwin)1209 mptcp_do_dss_opt_ack_meat(u_int64_t full_dack, u_int64_t full_dsn,
1210 struct tcpcb *tp, u_int32_t tiwin)
1211 {
1212 struct mptcb *mp_tp = tptomptp(tp);
1213 int close_notify = 0;
1214
1215 tp->t_mpflags |= TMPF_RCVD_DACK;
1216
1217 if (MPTCP_SEQ_LEQ(full_dack, mp_tp->mpt_sndmax) &&
1218 MPTCP_SEQ_GEQ(full_dack, mp_tp->mpt_snduna)) {
1219 mptcp_data_ack_rcvd(mp_tp, tp, full_dack);
1220 if (mp_tp->mpt_state > MPTCPS_FIN_WAIT_2) {
1221 close_notify = 1;
1222 }
1223 if (mp_tp->mpt_flags & MPTCPF_RCVD_64BITACK) {
1224 mp_tp->mpt_flags &= ~MPTCPF_RCVD_64BITACK;
1225 mp_tp->mpt_flags &= ~MPTCPF_SND_64BITDSN;
1226 }
1227 mptcp_notify_mpready(tp->t_inpcb->inp_socket);
1228 if (close_notify) {
1229 mptcp_notify_close(tp->t_inpcb->inp_socket);
1230 }
1231 }
1232
1233 mptcp_update_window(mp_tp, full_dack, full_dsn, tiwin);
1234 }
1235
1236 static void
mptcp_do_dss_opt_meat(u_char * cp,struct tcpcb * tp,struct tcphdr * th)1237 mptcp_do_dss_opt_meat(u_char *cp, struct tcpcb *tp, struct tcphdr *th)
1238 {
1239 struct mptcp_dss_copt *dss_rsp = (struct mptcp_dss_copt *)cp;
1240 u_int64_t full_dack = 0;
1241 u_int32_t tiwin = th->th_win << tp->snd_scale;
1242 struct mptcb *mp_tp = tptomptp(tp);
1243 int csum_len = 0;
1244
1245 #define MPTCP_DSS_OPT_SZ_CHK(len, expected_len) { \
1246 if (len != expected_len) { \
1247 mptcplog((LOG_ERR, "%s: bad len = %d dss: %x \n", __func__, \
1248 len, dss_rsp->mdss_flags), \
1249 (MPTCP_SOCKET_DBG|MPTCP_RECEIVER_DBG), \
1250 MPTCP_LOGLVL_LOG); \
1251 return; \
1252 } \
1253 }
1254
1255 if (mp_tp->mpt_flags & MPTCPF_CHECKSUM) {
1256 csum_len = 2;
1257 }
1258
1259 dss_rsp->mdss_flags &= (MDSS_A | MDSS_a | MDSS_M | MDSS_m);
1260 switch (dss_rsp->mdss_flags) {
1261 case (MDSS_M):
1262 {
1263 /* 32-bit DSS, No Data ACK */
1264 struct mptcp_dsn_opt *dss_rsp1;
1265 dss_rsp1 = (struct mptcp_dsn_opt *)cp;
1266
1267 MPTCP_DSS_OPT_SZ_CHK(dss_rsp1->mdss_copt.mdss_len,
1268 sizeof(struct mptcp_dsn_opt) + csum_len);
1269 if (csum_len == 0) {
1270 mptcp_update_dss_rcv_state(dss_rsp1, tp, 0);
1271 } else {
1272 mptcp_update_dss_rcv_state(dss_rsp1, tp,
1273 *(uint16_t *)(void *)(cp +
1274 (dss_rsp1->mdss_copt.mdss_len - csum_len)));
1275 }
1276 break;
1277 }
1278 case (MDSS_A):
1279 {
1280 /* 32-bit Data ACK, no DSS */
1281 struct mptcp_data_ack_opt *dack_opt;
1282 dack_opt = (struct mptcp_data_ack_opt *)cp;
1283
1284 MPTCP_DSS_OPT_SZ_CHK(dack_opt->mdss_copt.mdss_len,
1285 sizeof(struct mptcp_data_ack_opt));
1286
1287 u_int32_t dack = dack_opt->mdss_ack;
1288 NTOHL(dack);
1289 MPTCP_EXTEND_DSN(mp_tp->mpt_snduna, dack, full_dack);
1290 mptcp_do_dss_opt_ack_meat(full_dack, mp_tp->mpt_sndwl1, tp, tiwin);
1291 break;
1292 }
1293 case (MDSS_M | MDSS_A):
1294 {
1295 /* 32-bit Data ACK + 32-bit DSS */
1296 struct mptcp_dss_ack_opt *dss_ack_rsp;
1297 dss_ack_rsp = (struct mptcp_dss_ack_opt *)cp;
1298 u_int64_t full_dsn;
1299 uint16_t csum = 0;
1300
1301 MPTCP_DSS_OPT_SZ_CHK(dss_ack_rsp->mdss_copt.mdss_len,
1302 sizeof(struct mptcp_dss_ack_opt) + csum_len);
1303
1304 u_int32_t dack = dss_ack_rsp->mdss_ack;
1305 NTOHL(dack);
1306 MPTCP_EXTEND_DSN(mp_tp->mpt_snduna, dack, full_dack);
1307
1308 NTOHL(dss_ack_rsp->mdss_dsn);
1309 NTOHL(dss_ack_rsp->mdss_subflow_seqn);
1310 NTOHS(dss_ack_rsp->mdss_data_len);
1311 MPTCP_EXTEND_DSN(mp_tp->mpt_rcvnxt, dss_ack_rsp->mdss_dsn, full_dsn);
1312
1313 mptcp_do_dss_opt_ack_meat(full_dack, full_dsn, tp, tiwin);
1314
1315 if (csum_len != 0) {
1316 csum = *(uint16_t *)(void *)(cp + (dss_ack_rsp->mdss_copt.mdss_len - csum_len));
1317 }
1318
1319 mptcp_update_rcv_state_meat(mp_tp, tp,
1320 full_dsn,
1321 dss_ack_rsp->mdss_subflow_seqn,
1322 dss_ack_rsp->mdss_data_len,
1323 csum);
1324 break;
1325 }
1326 case (MDSS_M | MDSS_m):
1327 {
1328 /* 64-bit DSS , No Data ACK */
1329 struct mptcp_dsn64_opt *dsn64;
1330 dsn64 = (struct mptcp_dsn64_opt *)cp;
1331 u_int64_t full_dsn;
1332 uint16_t csum = 0;
1333
1334 MPTCP_DSS_OPT_SZ_CHK(dsn64->mdss_copt.mdss_len,
1335 sizeof(struct mptcp_dsn64_opt) + csum_len);
1336
1337 mp_tp->mpt_flags |= MPTCPF_SND_64BITACK;
1338
1339 full_dsn = mptcp_ntoh64(dsn64->mdss_dsn);
1340 NTOHL(dsn64->mdss_subflow_seqn);
1341 NTOHS(dsn64->mdss_data_len);
1342
1343 if (csum_len != 0) {
1344 csum = *(uint16_t *)(void *)(cp + dsn64->mdss_copt.mdss_len - csum_len);
1345 }
1346
1347 mptcp_update_rcv_state_meat(mp_tp, tp, full_dsn,
1348 dsn64->mdss_subflow_seqn,
1349 dsn64->mdss_data_len,
1350 csum);
1351 break;
1352 }
1353 case (MDSS_A | MDSS_a):
1354 {
1355 /* 64-bit Data ACK, no DSS */
1356 struct mptcp_data_ack64_opt *dack64;
1357 dack64 = (struct mptcp_data_ack64_opt *)cp;
1358
1359 MPTCP_DSS_OPT_SZ_CHK(dack64->mdss_copt.mdss_len,
1360 sizeof(struct mptcp_data_ack64_opt));
1361
1362 mp_tp->mpt_flags |= MPTCPF_RCVD_64BITACK;
1363
1364 full_dack = mptcp_ntoh64(dack64->mdss_ack);
1365 mptcp_do_dss_opt_ack_meat(full_dack, mp_tp->mpt_sndwl1, tp, tiwin);
1366 break;
1367 }
1368 case (MDSS_M | MDSS_m | MDSS_A):
1369 {
1370 /* 64-bit DSS + 32-bit Data ACK */
1371 struct mptcp_dss64_ack32_opt *dss_ack_rsp;
1372 dss_ack_rsp = (struct mptcp_dss64_ack32_opt *)cp;
1373 u_int64_t full_dsn;
1374 uint16_t csum = 0;
1375
1376 MPTCP_DSS_OPT_SZ_CHK(dss_ack_rsp->mdss_copt.mdss_len,
1377 sizeof(struct mptcp_dss64_ack32_opt) + csum_len);
1378
1379 u_int32_t dack = dss_ack_rsp->mdss_ack;
1380 NTOHL(dack);
1381 mp_tp->mpt_flags |= MPTCPF_SND_64BITACK;
1382 MPTCP_EXTEND_DSN(mp_tp->mpt_snduna, dack, full_dack);
1383
1384 full_dsn = mptcp_ntoh64(dss_ack_rsp->mdss_dsn);
1385 NTOHL(dss_ack_rsp->mdss_subflow_seqn);
1386 NTOHS(dss_ack_rsp->mdss_data_len);
1387
1388 mptcp_do_dss_opt_ack_meat(full_dack, full_dsn, tp, tiwin);
1389
1390 if (csum_len != 0) {
1391 csum = *(uint16_t *)(void *)(cp + dss_ack_rsp->mdss_copt.mdss_len - csum_len);
1392 }
1393
1394 mptcp_update_rcv_state_meat(mp_tp, tp, full_dsn,
1395 dss_ack_rsp->mdss_subflow_seqn,
1396 dss_ack_rsp->mdss_data_len,
1397 csum);
1398
1399 break;
1400 }
1401 case (MDSS_M | MDSS_A | MDSS_a):
1402 {
1403 /* 32-bit DSS + 64-bit Data ACK */
1404 struct mptcp_dss32_ack64_opt *dss32_ack64_opt;
1405 dss32_ack64_opt = (struct mptcp_dss32_ack64_opt *)cp;
1406 u_int64_t full_dsn;
1407
1408 MPTCP_DSS_OPT_SZ_CHK(
1409 dss32_ack64_opt->mdss_copt.mdss_len,
1410 sizeof(struct mptcp_dss32_ack64_opt) + csum_len);
1411
1412 full_dack = mptcp_ntoh64(dss32_ack64_opt->mdss_ack);
1413 NTOHL(dss32_ack64_opt->mdss_dsn);
1414 mp_tp->mpt_flags |= MPTCPF_RCVD_64BITACK;
1415 MPTCP_EXTEND_DSN(mp_tp->mpt_rcvnxt,
1416 dss32_ack64_opt->mdss_dsn, full_dsn);
1417 NTOHL(dss32_ack64_opt->mdss_subflow_seqn);
1418 NTOHS(dss32_ack64_opt->mdss_data_len);
1419
1420 mptcp_do_dss_opt_ack_meat(full_dack, full_dsn, tp, tiwin);
1421 if (csum_len == 0) {
1422 mptcp_update_rcv_state_meat(mp_tp, tp, full_dsn,
1423 dss32_ack64_opt->mdss_subflow_seqn,
1424 dss32_ack64_opt->mdss_data_len, 0);
1425 } else {
1426 mptcp_update_rcv_state_meat(mp_tp, tp, full_dsn,
1427 dss32_ack64_opt->mdss_subflow_seqn,
1428 dss32_ack64_opt->mdss_data_len,
1429 *(uint16_t *)(void *)(cp +
1430 dss32_ack64_opt->mdss_copt.mdss_len -
1431 csum_len));
1432 }
1433 break;
1434 }
1435 case (MDSS_M | MDSS_m | MDSS_A | MDSS_a):
1436 {
1437 /* 64-bit DSS + 64-bit Data ACK */
1438 struct mptcp_dss64_ack64_opt *dss64_ack64;
1439 dss64_ack64 = (struct mptcp_dss64_ack64_opt *)cp;
1440 u_int64_t full_dsn;
1441
1442 MPTCP_DSS_OPT_SZ_CHK(dss64_ack64->mdss_copt.mdss_len,
1443 sizeof(struct mptcp_dss64_ack64_opt) + csum_len);
1444
1445 mp_tp->mpt_flags |= MPTCPF_RCVD_64BITACK;
1446 mp_tp->mpt_flags |= MPTCPF_SND_64BITACK;
1447 full_dsn = mptcp_ntoh64(dss64_ack64->mdss_dsn);
1448 full_dack = mptcp_ntoh64(dss64_ack64->mdss_dsn);
1449 mptcp_do_dss_opt_ack_meat(full_dack, full_dsn, tp, tiwin);
1450 NTOHL(dss64_ack64->mdss_subflow_seqn);
1451 NTOHS(dss64_ack64->mdss_data_len);
1452 if (csum_len == 0) {
1453 mptcp_update_rcv_state_meat(mp_tp, tp, full_dsn,
1454 dss64_ack64->mdss_subflow_seqn,
1455 dss64_ack64->mdss_data_len, 0);
1456 } else {
1457 mptcp_update_rcv_state_meat(mp_tp, tp, full_dsn,
1458 dss64_ack64->mdss_subflow_seqn,
1459 dss64_ack64->mdss_data_len,
1460 *(uint16_t *)(void *)(cp +
1461 dss64_ack64->mdss_copt.mdss_len -
1462 csum_len));
1463 }
1464 break;
1465 }
1466 default:
1467 mptcplog((LOG_DEBUG, "%s: File bug, DSS flags = %x\n",
1468 __func__, dss_rsp->mdss_flags),
1469 (MPTCP_SOCKET_DBG | MPTCP_RECEIVER_DBG),
1470 MPTCP_LOGLVL_LOG);
1471 break;
1472 }
1473 }
1474
1475 static void
mptcp_do_dss_opt(struct tcpcb * tp,u_char * cp,struct tcphdr * th)1476 mptcp_do_dss_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th)
1477 {
1478 struct mptcp_dss_copt *dss_rsp = (struct mptcp_dss_copt *)cp;
1479 struct mptcb *mp_tp = tptomptp(tp);
1480
1481 if (!mp_tp) {
1482 return;
1483 }
1484
1485 if (dss_rsp->mdss_subtype == MPO_DSS) {
1486 if (dss_rsp->mdss_flags & MDSS_F) {
1487 tp->t_rcv_map.mpt_dfin = 1;
1488 } else {
1489 tp->t_rcv_map.mpt_dfin = 0;
1490 }
1491
1492 mptcp_do_dss_opt_meat(cp, tp, th);
1493 }
1494 }
1495
1496 static void
mptcp_do_fastclose_opt(struct tcpcb * tp,u_char * cp,struct tcphdr * th)1497 mptcp_do_fastclose_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th)
1498 {
1499 struct mptcb *mp_tp = NULL;
1500 struct mptcp_fastclose_opt *fc_opt = (struct mptcp_fastclose_opt *)cp;
1501
1502 if (th->th_flags != TH_ACK) {
1503 return;
1504 }
1505
1506 if (fc_opt->mfast_len != sizeof(struct mptcp_fastclose_opt)) {
1507 tcpstat.tcps_invalid_opt++;
1508 return;
1509 }
1510
1511 mp_tp = tptomptp(tp);
1512 if (!mp_tp) {
1513 return;
1514 }
1515
1516 if (fc_opt->mfast_key != mp_tp->mpt_localkey) {
1517 tcpstat.tcps_invalid_opt++;
1518 return;
1519 }
1520
1521 /*
1522 * fastclose could make us more vulnerable to attacks, hence
1523 * accept only those that are at the next expected sequence number.
1524 */
1525 if (th->th_seq != tp->rcv_nxt) {
1526 tcpstat.tcps_invalid_opt++;
1527 return;
1528 }
1529
1530 /* Reset this flow */
1531 tp->t_mpflags |= TMPF_FASTCLOSERCV;
1532
1533 if (tp->t_inpcb->inp_socket != NULL) {
1534 soevent(tp->t_inpcb->inp_socket,
1535 SO_FILT_HINT_LOCKED | SO_FILT_HINT_MUSTRST);
1536 }
1537 }
1538
1539
1540 static void
mptcp_do_mpfail_opt(struct tcpcb * tp,u_char * cp,struct tcphdr * th)1541 mptcp_do_mpfail_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th)
1542 {
1543 struct mptcp_mpfail_opt *fail_opt = (struct mptcp_mpfail_opt *)cp;
1544 u_int32_t mdss_subflow_seqn = 0;
1545 struct mptcb *mp_tp;
1546 int error = 0;
1547
1548 /*
1549 * mpfail could make us more vulnerable to attacks. Hence accept
1550 * only those that are the next expected sequence number.
1551 */
1552 if (th->th_seq != tp->rcv_nxt) {
1553 tcpstat.tcps_invalid_opt++;
1554 return;
1555 }
1556
1557 /* A packet without RST, must atleast have the ACK bit set */
1558 if ((th->th_flags != TH_ACK) && (th->th_flags != TH_RST)) {
1559 return;
1560 }
1561
1562 if (fail_opt->mfail_len != sizeof(struct mptcp_mpfail_opt)) {
1563 return;
1564 }
1565
1566 mp_tp = tptomptp(tp);
1567
1568 mp_tp->mpt_flags |= MPTCPF_RECVD_MPFAIL;
1569 mp_tp->mpt_dsn_at_csum_fail = mptcp_hton64(fail_opt->mfail_dsn);
1570 error = mptcp_get_map_for_dsn(tp->t_inpcb->inp_socket,
1571 mp_tp->mpt_dsn_at_csum_fail, &mdss_subflow_seqn);
1572 if (error == 0) {
1573 mp_tp->mpt_ssn_at_csum_fail = mdss_subflow_seqn;
1574 }
1575
1576 mptcp_notify_mpfail(tp->t_inpcb->inp_socket);
1577 }
1578
1579 static boolean_t
mptcp_validate_add_addr_hmac(struct tcpcb * tp,u_char * hmac,u_char * msg,uint16_t msg_len,uint16_t mac_len)1580 mptcp_validate_add_addr_hmac(struct tcpcb *tp, u_char *hmac,
1581 u_char *msg, uint16_t msg_len, uint16_t mac_len)
1582 {
1583 u_char digest[SHA256_DIGEST_LENGTH] = {0};
1584 struct mptcb *mp_tp = tptomptp(tp);
1585
1586 VERIFY(mac_len <= SHA256_DIGEST_LENGTH);
1587 mptcp_hmac_sha256(mp_tp->mpt_remotekey, mp_tp->mpt_localkey, msg, msg_len, digest);
1588
1589 if (bcmp(digest + SHA256_DIGEST_LENGTH - mac_len, hmac, mac_len) == 0) {
1590 return true; /* matches */
1591 } else {
1592 return false;
1593 }
1594 }
1595
1596 static void
mptcp_do_add_addr_opt_v1(struct tcpcb * tp,u_char * cp)1597 mptcp_do_add_addr_opt_v1(struct tcpcb *tp, u_char *cp)
1598 {
1599 struct mptcb *mp_tp = tptomptp(tp);
1600 struct mptses *mpte = mp_tp->mpt_mpte;
1601
1602 struct mptcp_add_addr_opt *addr_opt = (struct mptcp_add_addr_opt *)cp;
1603
1604 if (addr_opt->maddr_len != MPTCP_V1_ADD_ADDR_OPT_LEN_V4 &&
1605 addr_opt->maddr_len != MPTCP_V1_ADD_ADDR_OPT_LEN_V4 + 2 &&
1606 addr_opt->maddr_len != MPTCP_V1_ADD_ADDR_OPT_LEN_V6 &&
1607 addr_opt->maddr_len != MPTCP_V1_ADD_ADDR_OPT_LEN_V6 + 2) {
1608 os_log_info(mptcp_log_handle, "%s - %lx: Wrong ADD_ADDR length %u\n",
1609 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
1610 addr_opt->maddr_len);
1611
1612 return;
1613 }
1614
1615 if ((addr_opt->maddr_flags & MPTCP_V1_ADD_ADDR_ECHO) != 0) {
1616 os_log_info(mptcp_log_handle, "%s - %lx: Received ADD_ADDR with echo bit\n",
1617 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte));
1618
1619 return;
1620 }
1621
1622 if (addr_opt->maddr_len < MPTCP_V1_ADD_ADDR_OPT_LEN_V6) {
1623 struct sockaddr_in *dst = &mpte->mpte_sub_dst_v4;
1624 struct in_addr *addr = &addr_opt->maddr_u.maddr_addrv4;
1625 in_addr_t haddr = ntohl(addr->s_addr);
1626
1627 if (IN_ZERONET(haddr) ||
1628 IN_LOOPBACK(haddr) ||
1629 IN_LINKLOCAL(haddr) ||
1630 IN_DS_LITE(haddr) ||
1631 IN_6TO4_RELAY_ANYCAST(haddr) ||
1632 IN_MULTICAST(haddr) ||
1633 INADDR_BROADCAST == haddr ||
1634 IN_PRIVATE(haddr) ||
1635 IN_SHARED_ADDRESS_SPACE(haddr)) {
1636 os_log_info(mptcp_log_handle, "%s - %lx: ADD_ADDR invalid addr: %x\n",
1637 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
1638 addr->s_addr);
1639
1640 return;
1641 }
1642
1643 u_char *hmac = (void *)(cp + addr_opt->maddr_len - HMAC_TRUNCATED_ADD_ADDR);
1644 uint16_t msg_len = sizeof(struct mptcp_add_addr_hmac_msg_v4);
1645 struct mptcp_add_addr_hmac_msg_v4 msg = {0};
1646 msg.maddr_addrid = addr_opt->maddr_addrid;
1647 msg.maddr_addr = addr_opt->maddr_u.maddr_addrv4;
1648 if (addr_opt->maddr_len > MPTCP_V1_ADD_ADDR_OPT_LEN_V4) {
1649 msg.maddr_port = *(uint16_t *)(void *)(cp + addr_opt->maddr_len - HMAC_TRUNCATED_ADD_ADDR - 2);
1650 }
1651 if (!mptcp_validate_add_addr_hmac(tp, hmac, (u_char *)&msg, msg_len, HMAC_TRUNCATED_ADD_ADDR)) {
1652 os_log_info(mptcp_log_handle, "%s - %lx: ADD_ADDR addr: %x invalid HMAC\n",
1653 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
1654 addr->s_addr);
1655 return;
1656 }
1657
1658 dst->sin_len = sizeof(*dst);
1659 dst->sin_family = AF_INET;
1660 if (addr_opt->maddr_len > MPTCP_V1_ADD_ADDR_OPT_LEN_V4) {
1661 dst->sin_port = *(uint16_t *)(void *)(cp + addr_opt->maddr_len - HMAC_TRUNCATED_ADD_ADDR - 2);
1662 } else {
1663 dst->sin_port = mpte->__mpte_dst_v4.sin_port;
1664 }
1665 dst->sin_addr.s_addr = addr->s_addr;
1666 mpte->sub_dst_addr_id_v4 = addr_opt->maddr_addrid;
1667 mpte->mpte_last_added_addr_is_v4 = TRUE;
1668 } else {
1669 struct sockaddr_in6 *dst = &mpte->mpte_sub_dst_v6;
1670 struct in6_addr *addr = &addr_opt->maddr_u.maddr_addrv6;
1671
1672 if (IN6_IS_ADDR_LINKLOCAL(addr) ||
1673 IN6_IS_ADDR_MULTICAST(addr) ||
1674 IN6_IS_ADDR_UNSPECIFIED(addr) ||
1675 IN6_IS_ADDR_LOOPBACK(addr) ||
1676 IN6_IS_ADDR_V4COMPAT(addr) ||
1677 IN6_IS_ADDR_V4MAPPED(addr)) {
1678 char dbuf[MAX_IPv6_STR_LEN];
1679
1680 inet_ntop(AF_INET6, addr, dbuf, sizeof(dbuf));
1681 os_log_info(mptcp_log_handle, "%s - %lx: ADD_ADDRv6 invalid addr: %s\n",
1682 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
1683 dbuf);
1684
1685 return;
1686 }
1687
1688 u_char *hmac = (void *)(cp + addr_opt->maddr_len - HMAC_TRUNCATED_ADD_ADDR);
1689 uint16_t msg_len = sizeof(struct mptcp_add_addr_hmac_msg_v6);
1690 struct mptcp_add_addr_hmac_msg_v6 msg = {0};
1691 msg.maddr_addrid = addr_opt->maddr_addrid;
1692 msg.maddr_addr = addr_opt->maddr_u.maddr_addrv6;
1693 if (addr_opt->maddr_len > MPTCP_V1_ADD_ADDR_OPT_LEN_V6) {
1694 msg.maddr_port = *(uint16_t *)(void *)(cp + addr_opt->maddr_len - HMAC_TRUNCATED_ADD_ADDR - 2);
1695 }
1696 if (!mptcp_validate_add_addr_hmac(tp, hmac, (u_char *)&msg, msg_len, HMAC_TRUNCATED_ADD_ADDR)) {
1697 char dbuf[MAX_IPv6_STR_LEN];
1698
1699 inet_ntop(AF_INET6, addr, dbuf, sizeof(dbuf));
1700 os_log_info(mptcp_log_handle, "%s - %lx: ADD_ADDR addr: %s invalid HMAC\n",
1701 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
1702 dbuf);
1703 return;
1704 }
1705
1706 dst->sin6_len = sizeof(*dst);
1707 dst->sin6_family = AF_INET6;
1708 if (addr_opt->maddr_len > MPTCP_V1_ADD_ADDR_OPT_LEN_V6) {
1709 dst->sin6_port = *(uint16_t *)(void *)(cp + addr_opt->maddr_len - HMAC_TRUNCATED_ADD_ADDR - 2);
1710 } else {
1711 dst->sin6_port = mpte->__mpte_dst_v6.sin6_port;
1712 }
1713 memcpy(&dst->sin6_addr, addr, sizeof(*addr));
1714 mpte->sub_dst_addr_id_v6 = addr_opt->maddr_addrid;
1715 mpte->mpte_last_added_addr_is_v4 = FALSE;
1716 }
1717
1718 os_log_info(mptcp_log_handle, "%s - %lx: Received ADD_ADDRv%u\n",
1719 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
1720 addr_opt->maddr_flags);
1721
1722 tp->t_mpflags |= TMPF_MPTCP_ECHO_ADDR;
1723 mptcp_sched_create_subflows(mpte);
1724 }
1725
1726 static void
mptcp_do_add_addr_opt_v0(struct mptses * mpte,u_char * cp)1727 mptcp_do_add_addr_opt_v0(struct mptses *mpte, u_char *cp)
1728 {
1729 struct mptcp_add_addr_opt *addr_opt = (struct mptcp_add_addr_opt *)cp;
1730
1731 if (addr_opt->maddr_len != MPTCP_V0_ADD_ADDR_OPT_LEN_V4 &&
1732 addr_opt->maddr_len != MPTCP_V0_ADD_ADDR_OPT_LEN_V6) {
1733 os_log_info(mptcp_log_handle, "%s - %lx: Wrong ADD_ADDR length %u\n",
1734 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
1735 addr_opt->maddr_len);
1736
1737 return;
1738 }
1739
1740 if (addr_opt->maddr_len == MPTCP_V0_ADD_ADDR_OPT_LEN_V4 &&
1741 addr_opt->maddr_flags != 4) {
1742 os_log_info(mptcp_log_handle, "%s - %lx: ADD_ADDR length for v4 but version is %u\n",
1743 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
1744 addr_opt->maddr_flags);
1745
1746 return;
1747 }
1748
1749 if (addr_opt->maddr_len == MPTCP_V0_ADD_ADDR_OPT_LEN_V6 &&
1750 addr_opt->maddr_flags != 6) {
1751 os_log_info(mptcp_log_handle, "%s - %lx: ADD_ADDR length for v6 but version is %u\n",
1752 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
1753 addr_opt->maddr_flags);
1754
1755 return;
1756 }
1757
1758 if (addr_opt->maddr_len == MPTCP_V0_ADD_ADDR_OPT_LEN_V4) {
1759 struct sockaddr_in *dst = &mpte->mpte_sub_dst_v4;
1760 struct in_addr *addr = &addr_opt->maddr_u.maddr_addrv4;
1761 in_addr_t haddr = ntohl(addr->s_addr);
1762
1763 if (IN_ZERONET(haddr) ||
1764 IN_LOOPBACK(haddr) ||
1765 IN_LINKLOCAL(haddr) ||
1766 IN_DS_LITE(haddr) ||
1767 IN_6TO4_RELAY_ANYCAST(haddr) ||
1768 IN_MULTICAST(haddr) ||
1769 INADDR_BROADCAST == haddr ||
1770 IN_PRIVATE(haddr) ||
1771 IN_SHARED_ADDRESS_SPACE(haddr)) {
1772 os_log_info(mptcp_log_handle, "%s - %lx: ADD_ADDR invalid addr: %x\n",
1773 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
1774 addr->s_addr);
1775
1776 return;
1777 }
1778
1779 dst->sin_len = sizeof(*dst);
1780 dst->sin_family = AF_INET;
1781 dst->sin_port = mpte->__mpte_dst_v4.sin_port;
1782 dst->sin_addr.s_addr = addr->s_addr;
1783 mpte->mpte_last_added_addr_is_v4 = TRUE;
1784 } else {
1785 struct sockaddr_in6 *dst = &mpte->mpte_sub_dst_v6;
1786 struct in6_addr *addr = &addr_opt->maddr_u.maddr_addrv6;
1787
1788 if (IN6_IS_ADDR_LINKLOCAL(addr) ||
1789 IN6_IS_ADDR_MULTICAST(addr) ||
1790 IN6_IS_ADDR_UNSPECIFIED(addr) ||
1791 IN6_IS_ADDR_LOOPBACK(addr) ||
1792 IN6_IS_ADDR_V4COMPAT(addr) ||
1793 IN6_IS_ADDR_V4MAPPED(addr)) {
1794 char dbuf[MAX_IPv6_STR_LEN];
1795
1796 inet_ntop(AF_INET6, addr, dbuf, sizeof(dbuf));
1797 os_log_info(mptcp_log_handle, "%s - %lx: ADD_ADDRv6 invalid addr: %s\n",
1798 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
1799 dbuf);
1800
1801 return;
1802 }
1803
1804 dst->sin6_len = sizeof(*dst);
1805 dst->sin6_family = AF_INET6;
1806 dst->sin6_port = mpte->__mpte_dst_v6.sin6_port;
1807 dst->sin6_addr = *addr;
1808 mpte->mpte_last_added_addr_is_v4 = FALSE;
1809 }
1810
1811 os_log_info(mptcp_log_handle, "%s - %lx: Received ADD_ADDRv%u\n",
1812 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
1813 addr_opt->maddr_flags);
1814
1815 mptcp_sched_create_subflows(mpte);
1816 }
1817
1818 void
tcp_do_mptcp_options(struct tcpcb * tp,u_char * cp,struct tcphdr * th,struct tcpopt * to,uint8_t optlen)1819 tcp_do_mptcp_options(struct tcpcb *tp, u_char *cp, struct tcphdr *th,
1820 struct tcpopt *to, uint8_t optlen)
1821 {
1822 int mptcp_subtype;
1823 struct mptcb *mp_tp = tptomptp(tp);
1824
1825 if (mp_tp == NULL) {
1826 return;
1827 }
1828
1829 socket_lock_assert_owned(mptetoso(mp_tp->mpt_mpte));
1830
1831 /* All MPTCP options have atleast 4 bytes */
1832 if (optlen < 4) {
1833 return;
1834 }
1835
1836 mptcp_subtype = (cp[2] >> 4);
1837
1838 if (mptcp_sanitize_option(tp, mptcp_subtype) == 0) {
1839 return;
1840 }
1841
1842 switch (mptcp_subtype) {
1843 case MPO_CAPABLE:
1844 mptcp_do_mpcapable_opt(tp, cp, th, optlen);
1845 break;
1846 case MPO_JOIN:
1847 mptcp_do_mpjoin_opt(tp, cp, th, optlen);
1848 break;
1849 case MPO_DSS:
1850 mptcp_do_dss_opt(tp, cp, th);
1851 break;
1852 case MPO_FASTCLOSE:
1853 mptcp_do_fastclose_opt(tp, cp, th);
1854 break;
1855 case MPO_FAIL:
1856 mptcp_do_mpfail_opt(tp, cp, th);
1857 break;
1858 case MPO_ADD_ADDR:
1859 if (mp_tp->mpt_version == MPTCP_VERSION_0) {
1860 mptcp_do_add_addr_opt_v0(mp_tp->mpt_mpte, cp);
1861 } else {
1862 mptcp_do_add_addr_opt_v1(tp, cp);
1863 }
1864 break;
1865 case MPO_REMOVE_ADDR: /* fall through */
1866 case MPO_PRIO:
1867 to->to_flags |= TOF_MPTCP;
1868 break;
1869 default:
1870 break;
1871 }
1872 return;
1873 }
1874
1875 /* REMOVE_ADDR option is sent when a source address goes away */
1876 static void
mptcp_send_remaddr_opt(struct tcpcb * tp,struct mptcp_remaddr_opt * opt)1877 mptcp_send_remaddr_opt(struct tcpcb *tp, struct mptcp_remaddr_opt *opt)
1878 {
1879 mptcplog((LOG_DEBUG, "%s: local id %d remove id %d \n",
1880 __func__, tp->t_local_aid, tp->t_rem_aid),
1881 (MPTCP_SOCKET_DBG | MPTCP_SENDER_DBG), MPTCP_LOGLVL_LOG);
1882
1883 bzero(opt, sizeof(*opt));
1884 opt->mr_kind = TCPOPT_MULTIPATH;
1885 opt->mr_len = sizeof(*opt);
1886 opt->mr_subtype = MPO_REMOVE_ADDR;
1887 opt->mr_addr_id = tp->t_rem_aid;
1888 tp->t_mpflags &= ~TMPF_SND_REM_ADDR;
1889 }
1890
1891 static int
mptcp_echo_add_addr(struct tcpcb * tp,u_char * cp,unsigned int optlen)1892 mptcp_echo_add_addr(struct tcpcb *tp, u_char *cp, unsigned int optlen)
1893 {
1894 struct mptcp_add_addr_opt mpaddr;
1895 struct mptcb *mp_tp = tptomptp(tp);
1896 struct mptses *mpte = mp_tp->mpt_mpte;
1897
1898 // MPTCP v0 doesn't require echoing add_addr
1899 if (mp_tp->mpt_version == MPTCP_VERSION_0) {
1900 return optlen;
1901 }
1902
1903 size_t mpaddr_size = mpte->mpte_last_added_addr_is_v4 ? MPTCP_V1_ADD_ADDR_ECHO_OPT_LEN_V4 : MPTCP_V1_ADD_ADDR_ECHO_OPT_LEN_V6;
1904 if ((MAX_TCPOPTLEN - optlen) < mpaddr_size) {
1905 return optlen;
1906 }
1907
1908 bzero(&mpaddr, sizeof(mpaddr));
1909 mpaddr.maddr_kind = TCPOPT_MULTIPATH;
1910 mpaddr.maddr_len = (uint8_t)mpaddr_size;
1911 mpaddr.maddr_subtype = MPO_ADD_ADDR;
1912 mpaddr.maddr_flags = MPTCP_V1_ADD_ADDR_ECHO;
1913 if (mpte->mpte_last_added_addr_is_v4) {
1914 mpaddr.maddr_u.maddr_addrv4.s_addr = mpte->mpte_sub_dst_v4.sin_addr.s_addr;
1915 mpaddr.maddr_addrid = mpte->sub_dst_addr_id_v4;
1916 } else {
1917 mpaddr.maddr_u.maddr_addrv6 = mpte->mpte_sub_dst_v6.sin6_addr;
1918 mpaddr.maddr_addrid = mpte->sub_dst_addr_id_v6;
1919 }
1920
1921 memcpy(cp + optlen, &mpaddr, mpaddr_size);
1922 optlen += mpaddr_size;
1923 tp->t_mpflags &= ~TMPF_MPTCP_ECHO_ADDR;
1924 return optlen;
1925 }
1926
1927 /* We send MP_PRIO option based on the values set by the SIOCSCONNORDER ioctl */
1928 static int
mptcp_snd_mpprio(struct tcpcb * tp,u_char * cp,int optlen)1929 mptcp_snd_mpprio(struct tcpcb *tp, u_char *cp, int optlen)
1930 {
1931 struct mptcp_mpprio_addr_opt mpprio;
1932 struct mptcb *mp_tp = tptomptp(tp);
1933 size_t mpprio_size = sizeof(mpprio);
1934 // MP_PRIO of MPTCPv1 doesn't include AddrID
1935 if (mp_tp->mpt_version == MPTCP_VERSION_1) {
1936 mpprio_size -= sizeof(uint8_t);
1937 }
1938
1939 if (tp->t_state != TCPS_ESTABLISHED) {
1940 tp->t_mpflags &= ~TMPF_SND_MPPRIO;
1941 return optlen;
1942 }
1943
1944 if ((MAX_TCPOPTLEN - optlen) < (int)mpprio_size) {
1945 return optlen;
1946 }
1947
1948 bzero(&mpprio, sizeof(mpprio));
1949 mpprio.mpprio_kind = TCPOPT_MULTIPATH;
1950 mpprio.mpprio_len = (uint8_t)mpprio_size;
1951 mpprio.mpprio_subtype = MPO_PRIO;
1952 if (tp->t_mpflags & TMPF_BACKUP_PATH) {
1953 mpprio.mpprio_flags |= MPTCP_MPPRIO_BKP;
1954 }
1955 mpprio.mpprio_addrid = tp->t_local_aid;
1956 memcpy(cp + optlen, &mpprio, mpprio_size);
1957 optlen += mpprio_size;
1958 tp->t_mpflags &= ~TMPF_SND_MPPRIO;
1959 mptcplog((LOG_DEBUG, "%s: aid = %d \n", __func__,
1960 tp->t_local_aid),
1961 (MPTCP_SOCKET_DBG | MPTCP_SENDER_DBG), MPTCP_LOGLVL_LOG);
1962 return optlen;
1963 }
1964