xref: /xnu-12377.41.6/bsd/netinet/mptcp_opt.c (revision bbb1b6f9e71b8cdde6e5cd6f4841f207dee3d828)
1 /*
2  * Copyright (c) 2012-2017 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/sysctl.h>
31 #include <netinet/in_systm.h>
32 #include <sys/socket.h>
33 #include <sys/socketvar.h>
34 #include <sys/syslog.h>
35 #include <net/route.h>
36 #include <netinet/in.h>
37 #include <net/if.h>
38 
39 #include <netinet/ip.h>
40 #include <netinet/ip_var.h>
41 #include <netinet/in_var.h>
42 #include <netinet/tcp.h>
43 #include <netinet/tcp_cache.h>
44 #include <netinet/tcp_seq.h>
45 #include <netinet/tcpip.h>
46 #include <netinet/tcp_fsm.h>
47 #include <netinet/mptcp_var.h>
48 #include <netinet/mptcp.h>
49 #include <netinet/mptcp_opt.h>
50 #include <netinet/mptcp_seq.h>
51 
52 #include <libkern/crypto/sha1.h>
53 #include <libkern/crypto/sha2.h>
54 #include <netinet/mptcp_timer.h>
55 
56 #include <mach/sdt.h>
57 
58 static int mptcp_validate_join_hmac(struct tcpcb *, u_char* __sized_by(maclen), int maclen);
59 static int mptcp_snd_mpprio(struct tcpcb *tp, u_char *cp __ended_by(optend), u_char *optend, int optlen);
60 static void mptcp_send_remaddr_opt(struct tcpcb *tp, struct mptcp_remaddr_opt *opt);
61 static int mptcp_echo_add_addr(struct tcpcb *tp, u_char * __indexable cp, unsigned int optlen);
62 
63 /*
64  * MPTCP Options Output Processing
65  */
66 
67 static unsigned
mptcp_setup_first_subflow_syn_opts(struct socket * so,u_char * opt __ended_by (optend),u_char * optend __unused,unsigned optlen)68 mptcp_setup_first_subflow_syn_opts(struct socket *so, u_char *opt __ended_by(optend), u_char *optend __unused, unsigned optlen)
69 {
70 	struct mptcp_mpcapable_opt_rsp mptcp_opt;
71 	struct tcpcb *tp = sototcpcb(so);
72 	struct mptcb *mp_tp = tptomptp(tp);
73 	struct mptses *mpte = mp_tp->mpt_mpte;
74 	int ret;
75 
76 	uint8_t mmco_len = mp_tp->mpt_version == MPTCP_VERSION_0 ?
77 	    sizeof(struct mptcp_mpcapable_opt_rsp) :
78 	    sizeof(struct mptcp_mpcapable_opt_common);
79 
80 	ret = tcp_heuristic_do_mptcp(tp);
81 	if (ret > 0) {
82 		os_log(mptcp_log_handle, "%s - %lx: Not doing MPTCP due to heuristics",
83 		    __func__, (unsigned long)VM_KERNEL_ADDRPERM(mp_tp->mpt_mpte));
84 		mp_tp->mpt_flags |= MPTCPF_FALLBACK_HEURISTIC;
85 		return optlen;
86 	}
87 
88 	/*
89 	 * Avoid retransmitting the MP_CAPABLE option.
90 	 */
91 	if (ret == 0 &&
92 	    tp->t_rxtshift > mptcp_mpcap_retries &&
93 	    !(mpte->mpte_flags & MPTE_FORCE_ENABLE)) {
94 		if (!(mp_tp->mpt_flags & (MPTCPF_FALLBACK_HEURISTIC | MPTCPF_HEURISTIC_TRAC))) {
95 			mp_tp->mpt_flags |= MPTCPF_HEURISTIC_TRAC;
96 			tcp_heuristic_mptcp_loss(tp);
97 		}
98 		return optlen;
99 	}
100 
101 	bzero(&mptcp_opt, sizeof(struct mptcp_mpcapable_opt_rsp));
102 
103 	mptcp_opt.mmc_common.mmco_kind = TCPOPT_MULTIPATH;
104 	mptcp_opt.mmc_common.mmco_len = mmco_len;
105 	mptcp_opt.mmc_common.mmco_subtype = MPO_CAPABLE;
106 	mptcp_opt.mmc_common.mmco_version = mp_tp->mpt_version;
107 	mptcp_opt.mmc_common.mmco_flags |= MPCAP_PROPOSAL_SBIT;
108 	if (mp_tp->mpt_flags & MPTCPF_CHECKSUM) {
109 		mptcp_opt.mmc_common.mmco_flags |= MPCAP_CHECKSUM_CBIT;
110 	}
111 	mptcp_opt.mmc_localkey = mp_tp->mpt_localkey;
112 
113 	memcpy(opt + optlen, &mptcp_opt, mmco_len);
114 	optlen += mmco_len;
115 
116 	return optlen;
117 }
118 
119 static unsigned
mptcp_setup_join_subflow_syn_opts(struct socket * so,u_char * opt __ended_by (optend),u_char * optend __unused,unsigned optlen)120 mptcp_setup_join_subflow_syn_opts(struct socket *so, u_char *opt __ended_by(optend), u_char *optend __unused, unsigned optlen)
121 {
122 	struct mptcp_mpjoin_opt_req mpjoin_req;
123 	struct inpcb *inp = sotoinpcb(so);
124 	struct tcpcb *tp = NULL;
125 	struct mptsub *mpts;
126 
127 	if (!inp) {
128 		return optlen;
129 	}
130 
131 	tp = intotcpcb(inp);
132 	if (!tp) {
133 		return optlen;
134 	}
135 
136 	mpts = tp->t_mpsub;
137 
138 	bzero(&mpjoin_req, sizeof(mpjoin_req));
139 	mpjoin_req.mmjo_kind = TCPOPT_MULTIPATH;
140 	mpjoin_req.mmjo_len = sizeof(mpjoin_req);
141 	mpjoin_req.mmjo_subtype_bkp = MPO_JOIN << 4;
142 
143 	if (tp->t_mpflags & TMPF_BACKUP_PATH) {
144 		mpjoin_req.mmjo_subtype_bkp |= MPTCP_BACKUP;
145 	} else if (inp->inp_boundifp && IFNET_IS_CELLULAR(inp->inp_boundifp) &&
146 	    mptcp_subflows_need_backup_flag(mpts->mpts_mpte)) {
147 		mpjoin_req.mmjo_subtype_bkp |= MPTCP_BACKUP;
148 		tp->t_mpflags |= TMPF_BACKUP_PATH;
149 	} else {
150 		mpts->mpts_flags |= MPTSF_PREFERRED;
151 	}
152 
153 	mpjoin_req.mmjo_addr_id = tp->t_local_aid;
154 	mpjoin_req.mmjo_peer_token = tptomptp(tp)->mpt_remotetoken;
155 	mptcp_get_rands(tp->t_local_aid, tptomptp(tp),
156 	    &mpjoin_req.mmjo_rand, NULL);
157 	memcpy(opt + optlen, &mpjoin_req, mpjoin_req.mmjo_len);
158 	optlen += mpjoin_req.mmjo_len;
159 
160 	return optlen;
161 }
162 
163 unsigned
mptcp_setup_join_ack_opts(struct tcpcb * tp,u_char * opt __ended_by (optend),u_char * optend __unused,unsigned optlen)164 mptcp_setup_join_ack_opts(struct tcpcb *tp, u_char *opt __ended_by(optend), u_char *optend __unused, unsigned optlen)
165 {
166 	unsigned new_optlen;
167 	struct mptcp_mpjoin_opt_rsp2 join_rsp2;
168 
169 	if ((MAX_TCPOPTLEN - optlen) < sizeof(struct mptcp_mpjoin_opt_rsp2)) {
170 		printf("%s: no space left %d \n", __func__, optlen);
171 		return optlen;
172 	}
173 
174 	bzero(&join_rsp2, sizeof(struct mptcp_mpjoin_opt_rsp2));
175 	join_rsp2.mmjo_kind = TCPOPT_MULTIPATH;
176 	join_rsp2.mmjo_len = sizeof(struct mptcp_mpjoin_opt_rsp2);
177 	join_rsp2.mmjo_subtype = MPO_JOIN;
178 	mptcp_get_mpjoin_hmac(tp->t_local_aid, tptomptp(tp),
179 	    (u_char*)&join_rsp2.mmjo_mac, HMAC_TRUNCATED_ACK);
180 	memcpy(opt + optlen, &join_rsp2, join_rsp2.mmjo_len);
181 	new_optlen = optlen + join_rsp2.mmjo_len;
182 	return new_optlen;
183 }
184 
185 unsigned
mptcp_setup_syn_opts(struct socket * so,u_char * opt __ended_by (optend),u_char * optend,unsigned optlen)186 mptcp_setup_syn_opts(struct socket *so, u_char *opt __ended_by(optend), u_char *optend, unsigned optlen)
187 {
188 	unsigned new_optlen;
189 
190 	if (!(so->so_flags & SOF_MP_SEC_SUBFLOW)) {
191 		new_optlen = mptcp_setup_first_subflow_syn_opts(so, opt, optend, optlen);
192 	} else {
193 		new_optlen = mptcp_setup_join_subflow_syn_opts(so, opt, optend, optlen);
194 	}
195 
196 	return new_optlen;
197 }
198 
199 static int
mptcp_send_mpfail(struct tcpcb * tp,u_char * opt __ended_by (optend),u_char * optend,unsigned int optlen)200 mptcp_send_mpfail(struct tcpcb *tp, u_char *opt __ended_by(optend), u_char *optend, unsigned int optlen)
201 {
202 #pragma unused(tp, opt, optend, optlen)
203 
204 	struct mptcb *mp_tp = NULL;
205 	struct mptcp_mpfail_opt fail_opt;
206 	uint64_t dsn;
207 	uint8_t len = sizeof(struct mptcp_mpfail_opt);
208 
209 	mp_tp = tptomptp(tp);
210 	if (mp_tp == NULL) {
211 		tp->t_mpflags &= ~TMPF_SND_MPFAIL;
212 		return optlen;
213 	}
214 
215 	/* if option space low give up */
216 	if ((MAX_TCPOPTLEN - optlen) < sizeof(struct mptcp_mpfail_opt)) {
217 		tp->t_mpflags &= ~TMPF_SND_MPFAIL;
218 		return optlen;
219 	}
220 
221 	dsn = mp_tp->mpt_rcvnxt;
222 
223 	bzero(&fail_opt, sizeof(fail_opt));
224 	fail_opt.mfail_kind = TCPOPT_MULTIPATH;
225 	fail_opt.mfail_len = len;
226 	fail_opt.mfail_subtype = MPO_FAIL;
227 	fail_opt.mfail_dsn = mptcp_hton64(dsn);
228 	memcpy(opt + optlen, &fail_opt, len);
229 	optlen += len;
230 	tp->t_mpflags &= ~TMPF_SND_MPFAIL;
231 	return optlen;
232 }
233 
234 static int
mptcp_send_infinite_mapping(struct tcpcb * tp,u_char * opt __ended_by (optend),u_char * optend __unused,unsigned int optlen)235 mptcp_send_infinite_mapping(struct tcpcb *tp, u_char *opt __ended_by(optend), u_char *optend __unused, unsigned int optlen)
236 {
237 	struct socket *so = tp->t_inpcb->inp_socket;
238 	uint8_t len = sizeof(struct mptcp_dsn_opt);
239 	struct mptcp_dsn_opt infin_opt;
240 	struct mptcb *mp_tp = NULL;
241 	uint8_t csum_len = 0;
242 
243 	if (!so) {
244 		return optlen;
245 	}
246 
247 	mp_tp = tptomptp(tp);
248 	if (mp_tp == NULL) {
249 		return optlen;
250 	}
251 
252 	if (mp_tp->mpt_flags & MPTCPF_CHECKSUM) {
253 		csum_len = 2;
254 	}
255 
256 	/* try later */
257 	if ((MAX_TCPOPTLEN - optlen) < (len + csum_len)) {
258 		return optlen;
259 	}
260 
261 	bzero(&infin_opt, sizeof(infin_opt));
262 	infin_opt.mdss_copt.mdss_kind = TCPOPT_MULTIPATH;
263 	infin_opt.mdss_copt.mdss_len = len + csum_len;
264 	infin_opt.mdss_copt.mdss_subtype = MPO_DSS;
265 	infin_opt.mdss_copt.mdss_flags |= MDSS_M;
266 	if (mp_tp->mpt_flags & MPTCPF_RECVD_MPFAIL) {
267 		infin_opt.mdss_dsn = (u_int32_t)
268 		    MPTCP_DATASEQ_LOW32(mp_tp->mpt_dsn_at_csum_fail);
269 		infin_opt.mdss_subflow_seqn = mp_tp->mpt_ssn_at_csum_fail;
270 	} else {
271 		/*
272 		 * If MPTCP fallback happens, but TFO succeeds, the data on the
273 		 * SYN does not belong to the MPTCP data sequence space.
274 		 */
275 		if ((tp->t_tfo_stats & TFO_S_SYN_DATA_ACKED) &&
276 		    ((mp_tp->mpt_local_idsn + 1) == mp_tp->mpt_snduna)) {
277 			infin_opt.mdss_subflow_seqn = 1;
278 		} else {
279 			infin_opt.mdss_subflow_seqn = tp->snd_una - tp->t_mpsub->mpts_iss;
280 		}
281 		infin_opt.mdss_dsn = (u_int32_t)
282 		    MPTCP_DATASEQ_LOW32(mp_tp->mpt_snduna);
283 	}
284 
285 	if ((infin_opt.mdss_dsn == 0) || (infin_opt.mdss_subflow_seqn == 0)) {
286 		return optlen;
287 	}
288 	infin_opt.mdss_dsn = htonl(infin_opt.mdss_dsn);
289 	infin_opt.mdss_subflow_seqn = htonl(infin_opt.mdss_subflow_seqn);
290 	infin_opt.mdss_data_len = 0;
291 
292 	memcpy(opt + optlen, &infin_opt, len);
293 	optlen += len;
294 	if (csum_len != 0) {
295 		/* The checksum field is set to 0 for infinite mapping */
296 		uint16_t csum = 0;
297 		memcpy(opt + optlen, &csum, csum_len);
298 		optlen += csum_len;
299 	}
300 
301 	tp->t_mpflags |= TMPF_INFIN_SENT;
302 	tcpstat.tcps_estab_fallback++;
303 	return optlen;
304 }
305 
306 
307 static int
mptcp_ok_to_fin(struct tcpcb * tp,u_int64_t dsn,u_int32_t datalen)308 mptcp_ok_to_fin(struct tcpcb *tp, u_int64_t dsn, u_int32_t datalen)
309 {
310 	struct mptcb *mp_tp = tptomptp(tp);
311 
312 	dsn = (mp_tp->mpt_sndmax & MPTCP_DATASEQ_LOW32_MASK) | dsn;
313 	if ((dsn + datalen) == mp_tp->mpt_sndmax) {
314 		return 1;
315 	}
316 
317 	return 0;
318 }
319 
320 unsigned int
mptcp_setup_opts(struct tcpcb * tp,int32_t off,u_char * opt __ended_by (optend),u_char * optend,unsigned int optlen,int flags,int len,boolean_t * p_mptcp_acknow,boolean_t * do_not_compress)321 mptcp_setup_opts(struct tcpcb *tp, int32_t off, u_char *opt __ended_by(optend), u_char *optend,
322     unsigned int optlen, int flags, int len,
323     boolean_t *p_mptcp_acknow, boolean_t *do_not_compress)
324 {
325 	struct inpcb *inp = (struct inpcb *)tp->t_inpcb;
326 	struct socket *so = inp->inp_socket;
327 	struct mptcb *mp_tp = tptomptp(tp);
328 	boolean_t do_csum = FALSE;
329 	boolean_t send_64bit_dsn = FALSE;
330 	boolean_t send_64bit_ack = FALSE;
331 	uint32_t old_mpt_flags = tp->t_mpflags & TMPF_MPTCP_SIGNALS;
332 	boolean_t initial_data = FALSE;
333 
334 	/* There is a case where offset can become negative. tcp_output()
335 	 * gracefully handles this. So, let's make MPTCP more robust as well.
336 	 */
337 	if (off < 0) {
338 		off = 0;
339 	}
340 
341 	if (mptcp_enable == 0 || mp_tp == NULL || tp->t_state == TCPS_CLOSED) {
342 		/* do nothing */
343 		goto ret_optlen;
344 	}
345 
346 	socket_lock_assert_owned(mptetoso(mp_tp->mpt_mpte));
347 
348 	if (mp_tp->mpt_flags & MPTCPF_CHECKSUM) {
349 		do_csum = TRUE;
350 	}
351 
352 	/* tcp_output handles the SYN path separately */
353 	if (flags & TH_SYN) {
354 		goto ret_optlen;
355 	}
356 
357 	if ((MAX_TCPOPTLEN - optlen) <
358 	    sizeof(struct mptcp_mpcapable_opt_common)) {
359 		os_log_error(mptcp_log_handle, "%s - %lx: no space left %d flags %x tp->t_mpflags %x len %d\n",
360 		    __func__, (unsigned long)VM_KERNEL_ADDRPERM(mp_tp->mpt_mpte),
361 		    optlen, flags, tp->t_mpflags, len);
362 		goto ret_optlen;
363 	}
364 
365 	if (tp->t_mpflags & TMPF_TCP_FALLBACK) {
366 		if (tp->t_mpflags & TMPF_SND_MPFAIL) {
367 			optlen = mptcp_send_mpfail(tp, opt, optend, optlen);
368 		} else if (!(tp->t_mpflags & TMPF_INFIN_SENT)) {
369 			optlen = mptcp_send_infinite_mapping(tp, opt, optend, optlen);
370 		}
371 
372 		*do_not_compress = TRUE;
373 
374 		goto ret_optlen;
375 	}
376 
377 	if (len > 0 && off == 0 && tp->t_mpflags & TMPF_SEND_DSN && tp->t_mpflags & TMPF_SND_KEYS) {
378 		uint64_t dsn = 0;
379 		uint32_t relseq = 0;
380 		uint16_t data_len = 0, dss_csum = 0;
381 		mptcp_output_getm_dsnmap64(so, off, &dsn, &relseq, &data_len, &dss_csum);
382 		if (dsn == mp_tp->mpt_local_idsn + 1) {
383 			initial_data = TRUE;
384 		}
385 	}
386 
387 	/* send MP_CAPABLE when it's the INITIAL ACK or data */
388 	if (tp->t_mpflags & TMPF_SND_KEYS &&
389 	    (mp_tp->mpt_version == MPTCP_VERSION_0 || initial_data ||
390 	    (mp_tp->mpt_sndnxt == mp_tp->mpt_local_idsn + 1 && len == 0))) {
391 		struct mptcp_mpcapable_opt_rsp2 mptcp_opt;
392 		boolean_t send_data_level_details = tp->t_mpflags & TMPF_SEND_DSN ? TRUE : FALSE;
393 
394 		uint8_t mmco_len = sizeof(struct mptcp_mpcapable_opt_rsp1);
395 		if (send_data_level_details) {
396 			mmco_len += 2;
397 			if (do_csum) {
398 				mmco_len += 2;
399 			}
400 		}
401 		if ((MAX_TCPOPTLEN - optlen) < mmco_len) {
402 			os_log_error(mptcp_log_handle, "%s - %lx: not enough space in TCP option, "
403 			    "optlen: %u, mmco_len: %d\n", __func__,
404 			    (unsigned long)VM_KERNEL_ADDRPERM(mp_tp->mpt_mpte),
405 			    optlen, mmco_len);
406 			goto ret_optlen;
407 		}
408 
409 		bzero(&mptcp_opt, sizeof(struct mptcp_mpcapable_opt_rsp2));
410 		mptcp_opt.mmc_rsp1.mmc_common.mmco_kind = TCPOPT_MULTIPATH;
411 		mptcp_opt.mmc_rsp1.mmc_common.mmco_len = mmco_len;
412 		mptcp_opt.mmc_rsp1.mmc_common.mmco_subtype = MPO_CAPABLE;
413 		mptcp_opt.mmc_rsp1.mmc_common.mmco_version = mp_tp->mpt_version;
414 		mptcp_opt.mmc_rsp1.mmc_common.mmco_flags |= MPCAP_PROPOSAL_SBIT;
415 		if (do_csum) {
416 			mptcp_opt.mmc_rsp1.mmc_common.mmco_flags |= MPCAP_CHECKSUM_CBIT;
417 		}
418 		mptcp_opt.mmc_rsp1.mmc_localkey = mp_tp->mpt_localkey;
419 		mptcp_opt.mmc_rsp1.mmc_remotekey = mp_tp->mpt_remotekey;
420 		if (send_data_level_details) {
421 			mptcp_output_getm_data_level_details(so, off, &mptcp_opt.data_len, &mptcp_opt.csum);
422 			mptcp_opt.data_len = htons(mptcp_opt.data_len);
423 		}
424 		memcpy(opt + optlen, &mptcp_opt, mmco_len);
425 
426 		if (mp_tp->mpt_version == MPTCP_VERSION_0) {
427 			tp->t_mpflags &= ~TMPF_SND_KEYS;
428 		}
429 		optlen += mmco_len;
430 
431 		if (!tp->t_mpuna) {
432 			tp->t_mpuna = tp->snd_una;
433 		} else {
434 			/* its a retransmission of the MP_CAPABLE ACK */
435 		}
436 
437 		*do_not_compress = TRUE;
438 
439 		goto ret_optlen;
440 	}
441 
442 	if (tp->t_mpflags & TMPF_SND_JACK) {
443 		*do_not_compress = TRUE;
444 		optlen = mptcp_setup_join_ack_opts(tp, opt, optend, optlen);
445 		if (!tp->t_mpuna) {
446 			tp->t_mpuna = tp->snd_una;
447 		}
448 		/* Start a timer to retransmit the ACK */
449 		tp->t_timer[TCPT_JACK_RXMT] =
450 		    tcp_offset_from_start(tp, tcp_jack_rxmt);
451 
452 		tp->t_mpflags &= ~TMPF_SND_JACK;
453 		goto ret_optlen;
454 	}
455 
456 	if (!(tp->t_mpflags & (TMPF_MPTCP_TRUE | TMPF_PREESTABLISHED))) {
457 		goto ret_optlen;
458 	}
459 	/*
460 	 * From here on, all options are sent only if MPTCP_TRUE
461 	 * or when data is sent early on as in Fast Join
462 	 */
463 
464 	if ((tp->t_mpflags & TMPF_MPTCP_TRUE) &&
465 	    (tp->t_mpflags & TMPF_SND_REM_ADDR)) {
466 		int rem_opt_len = sizeof(struct mptcp_remaddr_opt);
467 		if (optlen + rem_opt_len <= MAX_TCPOPTLEN) {
468 			mptcp_send_remaddr_opt(tp,
469 			    (struct mptcp_remaddr_opt *)(opt + optlen));
470 			optlen += rem_opt_len;
471 		} else {
472 			tp->t_mpflags &= ~TMPF_SND_REM_ADDR;
473 		}
474 
475 		*do_not_compress = TRUE;
476 	}
477 
478 	if (tp->t_mpflags & TMPF_MPTCP_ECHO_ADDR) {
479 		optlen = mptcp_echo_add_addr(tp, opt, optlen);
480 	}
481 
482 	if (tp->t_mpflags & TMPF_SND_MPPRIO) {
483 		optlen = mptcp_snd_mpprio(tp, opt, optend, optlen);
484 
485 		*do_not_compress = TRUE;
486 	}
487 
488 	if (mp_tp->mpt_flags & MPTCPF_SND_64BITDSN) {
489 		send_64bit_dsn = TRUE;
490 	}
491 	if (mp_tp->mpt_flags & MPTCPF_SND_64BITACK) {
492 		send_64bit_ack = TRUE;
493 	}
494 
495 #define CHECK_OPTLEN    {                                                                   \
496 	if (MAX_TCPOPTLEN - optlen < dssoptlen) {                                         \
497 	        os_log_error(mptcp_log_handle, "%s: dssoptlen %d optlen %d \n", __func__,   \
498 	            dssoptlen, optlen);                                                     \
499 	            goto ret_optlen;                                                        \
500 	}                                                                                   \
501 }
502 
503 #define DO_FIN(dsn_opt) {                                               \
504 	int sndfin = 0;                                                 \
505 	sndfin = mptcp_ok_to_fin(tp, dsn_opt.mdss_dsn, len);            \
506 	if (sndfin) {                                                   \
507 	        dsn_opt.mdss_copt.mdss_flags |= MDSS_F;                 \
508 	        dsn_opt.mdss_data_len += 1;                             \
509 	        if (do_csum)                                            \
510 	                dss_csum = in_addword(dss_csum, 1);             \
511 	}                                                               \
512 }
513 
514 #define CHECK_DATALEN {                                                             \
515 	/* MPTCP socket does not support IP options */                              \
516 	if ((len + optlen + dssoptlen) > tp->t_maxopd) {                            \
517 	        os_log_error(mptcp_log_handle, "%s: nosp %d len %d opt %d %d %d\n", \
518 	            __func__, len, dssoptlen, optlen,                               \
519 	            tp->t_maxseg, tp->t_maxopd);                                    \
520 	/* remove option length from payload len */                         \
521 	        len = tp->t_maxopd - optlen - dssoptlen;                            \
522 	}                                                                           \
523 }
524 
525 	if ((tp->t_mpflags & TMPF_SEND_DSN) &&
526 	    (send_64bit_dsn)) {
527 		/*
528 		 * If there was the need to send 64-bit Data ACK along
529 		 * with 64-bit DSN, then 26 or 28 bytes would be used.
530 		 * With timestamps and NOOP padding that will cause
531 		 * overflow. Hence, in the rare event that both 64-bit
532 		 * DSN and 64-bit ACK have to be sent, delay the send of
533 		 * 64-bit ACK until our 64-bit DSN is acked with a 64-bit ack.
534 		 * XXX If this delay causes issue, remove the 2-byte padding.
535 		 */
536 		struct mptcp_dss64_ack32_opt dsn_ack_opt;
537 		uint8_t dssoptlen = sizeof(dsn_ack_opt);
538 		uint16_t dss_csum;
539 
540 		if (do_csum) {
541 			dssoptlen += 2;
542 		}
543 
544 		CHECK_OPTLEN;
545 
546 		bzero(&dsn_ack_opt, sizeof(dsn_ack_opt));
547 		dsn_ack_opt.mdss_copt.mdss_kind = TCPOPT_MULTIPATH;
548 		dsn_ack_opt.mdss_copt.mdss_subtype = MPO_DSS;
549 		dsn_ack_opt.mdss_copt.mdss_len = dssoptlen;
550 		dsn_ack_opt.mdss_copt.mdss_flags |=
551 		    MDSS_M | MDSS_m | MDSS_A;
552 
553 		CHECK_DATALEN;
554 
555 		mptcp_output_getm_dsnmap64(so, off,
556 		    &dsn_ack_opt.mdss_dsn,
557 		    &dsn_ack_opt.mdss_subflow_seqn,
558 		    &dsn_ack_opt.mdss_data_len,
559 		    &dss_csum);
560 
561 		if ((dsn_ack_opt.mdss_data_len == 0) ||
562 		    (dsn_ack_opt.mdss_dsn == 0)) {
563 			goto ret_optlen;
564 		}
565 
566 		if (tp->t_mpflags & TMPF_SEND_DFIN) {
567 			DO_FIN(dsn_ack_opt);
568 		}
569 
570 		dsn_ack_opt.mdss_ack =
571 		    htonl(MPTCP_DATAACK_LOW32(mp_tp->mpt_rcvnxt));
572 
573 		dsn_ack_opt.mdss_dsn = mptcp_hton64(dsn_ack_opt.mdss_dsn);
574 		dsn_ack_opt.mdss_subflow_seqn = htonl(
575 			dsn_ack_opt.mdss_subflow_seqn);
576 		dsn_ack_opt.mdss_data_len = htons(
577 			dsn_ack_opt.mdss_data_len);
578 
579 		memcpy(opt + optlen, &dsn_ack_opt, sizeof(dsn_ack_opt));
580 		if (do_csum) {
581 			*((uint16_t *)(void *)(opt + optlen + sizeof(dsn_ack_opt))) = dss_csum;
582 		}
583 
584 		optlen += dssoptlen;
585 
586 		tp->t_mpflags &= ~TMPF_MPTCP_ACKNOW;
587 
588 		*do_not_compress = TRUE;
589 
590 		goto ret_optlen;
591 	}
592 
593 	if ((tp->t_mpflags & TMPF_SEND_DSN) &&
594 	    (!send_64bit_dsn) &&
595 	    !(tp->t_mpflags & TMPF_MPTCP_ACKNOW)) {
596 		struct mptcp_dsn_opt dsn_opt;
597 		uint8_t dssoptlen = sizeof(struct mptcp_dsn_opt);
598 		uint16_t dss_csum;
599 
600 		if (do_csum) {
601 			dssoptlen += 2;
602 		}
603 
604 		CHECK_OPTLEN;
605 
606 		bzero(&dsn_opt, sizeof(dsn_opt));
607 		dsn_opt.mdss_copt.mdss_kind = TCPOPT_MULTIPATH;
608 		dsn_opt.mdss_copt.mdss_subtype = MPO_DSS;
609 		dsn_opt.mdss_copt.mdss_len = dssoptlen;
610 		dsn_opt.mdss_copt.mdss_flags |= MDSS_M;
611 
612 		CHECK_DATALEN;
613 
614 		mptcp_output_getm_dsnmap32(so, off, &dsn_opt.mdss_dsn,
615 		    &dsn_opt.mdss_subflow_seqn,
616 		    &dsn_opt.mdss_data_len,
617 		    &dss_csum);
618 
619 		if ((dsn_opt.mdss_data_len == 0) ||
620 		    (dsn_opt.mdss_dsn == 0)) {
621 			goto ret_optlen;
622 		}
623 
624 		if (tp->t_mpflags & TMPF_SEND_DFIN) {
625 			DO_FIN(dsn_opt);
626 		}
627 
628 		dsn_opt.mdss_dsn = htonl(dsn_opt.mdss_dsn);
629 		dsn_opt.mdss_subflow_seqn = htonl(dsn_opt.mdss_subflow_seqn);
630 		dsn_opt.mdss_data_len = htons(dsn_opt.mdss_data_len);
631 		memcpy(opt + optlen, &dsn_opt, sizeof(dsn_opt));
632 		if (do_csum) {
633 			*((uint16_t *)(void *)(opt + optlen + sizeof(dsn_opt))) = dss_csum;
634 		}
635 
636 		optlen += dssoptlen;
637 		tp->t_mpflags &= ~TMPF_MPTCP_ACKNOW;
638 
639 		*do_not_compress = TRUE;
640 
641 		goto ret_optlen;
642 	}
643 
644 	/* 32-bit Data ACK option */
645 	if ((tp->t_mpflags & TMPF_MPTCP_ACKNOW) &&
646 	    (!send_64bit_ack) &&
647 	    !(tp->t_mpflags & TMPF_SEND_DSN) &&
648 	    !(tp->t_mpflags & TMPF_SEND_DFIN)) {
649 		struct mptcp_data_ack_opt dack_opt;
650 		uint8_t dssoptlen = 0;
651 do_ack32_only:
652 		dssoptlen = sizeof(dack_opt);
653 
654 		CHECK_OPTLEN;
655 
656 		bzero(&dack_opt, dssoptlen);
657 		dack_opt.mdss_copt.mdss_kind = TCPOPT_MULTIPATH;
658 		dack_opt.mdss_copt.mdss_len = dssoptlen;
659 		dack_opt.mdss_copt.mdss_subtype = MPO_DSS;
660 		dack_opt.mdss_copt.mdss_flags |= MDSS_A;
661 		dack_opt.mdss_ack =
662 		    htonl(MPTCP_DATAACK_LOW32(mp_tp->mpt_rcvnxt));
663 		memcpy(opt + optlen, &dack_opt, dssoptlen);
664 		optlen += dssoptlen;
665 		VERIFY(optlen <= MAX_TCPOPTLEN);
666 		tp->t_mpflags &= ~TMPF_MPTCP_ACKNOW;
667 		goto ret_optlen;
668 	}
669 
670 	/* 64-bit Data ACK option */
671 	if ((tp->t_mpflags & TMPF_MPTCP_ACKNOW) &&
672 	    (send_64bit_ack) &&
673 	    !(tp->t_mpflags & TMPF_SEND_DSN) &&
674 	    !(tp->t_mpflags & TMPF_SEND_DFIN)) {
675 		struct mptcp_data_ack64_opt dack_opt;
676 		uint8_t dssoptlen = 0;
677 do_ack64_only:
678 		dssoptlen = sizeof(dack_opt);
679 
680 		CHECK_OPTLEN;
681 
682 		bzero(&dack_opt, dssoptlen);
683 		dack_opt.mdss_copt.mdss_kind = TCPOPT_MULTIPATH;
684 		dack_opt.mdss_copt.mdss_len = dssoptlen;
685 		dack_opt.mdss_copt.mdss_subtype = MPO_DSS;
686 		dack_opt.mdss_copt.mdss_flags |= (MDSS_A | MDSS_a);
687 		dack_opt.mdss_ack = mptcp_hton64(mp_tp->mpt_rcvnxt);
688 		/*
689 		 * The other end should retransmit 64-bit DSN until it
690 		 * receives a 64-bit ACK.
691 		 */
692 		mp_tp->mpt_flags &= ~MPTCPF_SND_64BITACK;
693 		memcpy(opt + optlen, &dack_opt, dssoptlen);
694 		optlen += dssoptlen;
695 		VERIFY(optlen <= MAX_TCPOPTLEN);
696 		tp->t_mpflags &= ~TMPF_MPTCP_ACKNOW;
697 		goto ret_optlen;
698 	}
699 
700 	/* 32-bit DSS+Data ACK option */
701 	if ((tp->t_mpflags & TMPF_SEND_DSN) &&
702 	    (!send_64bit_dsn) &&
703 	    (!send_64bit_ack) &&
704 	    (tp->t_mpflags & TMPF_MPTCP_ACKNOW)) {
705 		struct mptcp_dss_ack_opt dss_ack_opt;
706 		uint8_t dssoptlen = sizeof(dss_ack_opt);
707 		uint16_t dss_csum;
708 
709 		if (do_csum) {
710 			dssoptlen += 2;
711 		}
712 
713 		CHECK_OPTLEN;
714 
715 		bzero(&dss_ack_opt, sizeof(dss_ack_opt));
716 		dss_ack_opt.mdss_copt.mdss_kind = TCPOPT_MULTIPATH;
717 		dss_ack_opt.mdss_copt.mdss_len = dssoptlen;
718 		dss_ack_opt.mdss_copt.mdss_subtype = MPO_DSS;
719 		dss_ack_opt.mdss_copt.mdss_flags |= MDSS_A | MDSS_M;
720 		dss_ack_opt.mdss_ack =
721 		    htonl(MPTCP_DATAACK_LOW32(mp_tp->mpt_rcvnxt));
722 
723 		CHECK_DATALEN;
724 
725 		mptcp_output_getm_dsnmap32(so, off, &dss_ack_opt.mdss_dsn,
726 		    &dss_ack_opt.mdss_subflow_seqn,
727 		    &dss_ack_opt.mdss_data_len,
728 		    &dss_csum);
729 
730 		if ((dss_ack_opt.mdss_data_len == 0) ||
731 		    (dss_ack_opt.mdss_dsn == 0)) {
732 			goto do_ack32_only;
733 		}
734 
735 		if (tp->t_mpflags & TMPF_SEND_DFIN) {
736 			DO_FIN(dss_ack_opt);
737 		}
738 
739 		dss_ack_opt.mdss_dsn = htonl(dss_ack_opt.mdss_dsn);
740 		dss_ack_opt.mdss_subflow_seqn =
741 		    htonl(dss_ack_opt.mdss_subflow_seqn);
742 		dss_ack_opt.mdss_data_len = htons(dss_ack_opt.mdss_data_len);
743 		memcpy(opt + optlen, &dss_ack_opt, sizeof(dss_ack_opt));
744 		if (do_csum) {
745 			*((uint16_t *)(void *)(opt + optlen + sizeof(dss_ack_opt))) = dss_csum;
746 		}
747 
748 		optlen += dssoptlen;
749 
750 		if (optlen > MAX_TCPOPTLEN) {
751 			panic("optlen too large");
752 		}
753 		tp->t_mpflags &= ~TMPF_MPTCP_ACKNOW;
754 		goto ret_optlen;
755 	}
756 
757 	/* 32-bit DSS + 64-bit DACK option */
758 	if ((tp->t_mpflags & TMPF_SEND_DSN) &&
759 	    (!send_64bit_dsn) &&
760 	    (send_64bit_ack) &&
761 	    (tp->t_mpflags & TMPF_MPTCP_ACKNOW)) {
762 		struct mptcp_dss32_ack64_opt dss_ack_opt;
763 		uint8_t dssoptlen = sizeof(dss_ack_opt);
764 		uint16_t dss_csum;
765 
766 		if (do_csum) {
767 			dssoptlen += 2;
768 		}
769 
770 		CHECK_OPTLEN;
771 
772 		bzero(&dss_ack_opt, sizeof(dss_ack_opt));
773 		dss_ack_opt.mdss_copt.mdss_kind = TCPOPT_MULTIPATH;
774 		dss_ack_opt.mdss_copt.mdss_len = dssoptlen;
775 		dss_ack_opt.mdss_copt.mdss_subtype = MPO_DSS;
776 		dss_ack_opt.mdss_copt.mdss_flags |= MDSS_M | MDSS_A | MDSS_a;
777 		dss_ack_opt.mdss_ack =
778 		    mptcp_hton64(mp_tp->mpt_rcvnxt);
779 
780 		CHECK_DATALEN;
781 
782 		mptcp_output_getm_dsnmap32(so, off, &dss_ack_opt.mdss_dsn,
783 		    &dss_ack_opt.mdss_subflow_seqn,
784 		    &dss_ack_opt.mdss_data_len,
785 		    &dss_csum);
786 
787 		if ((dss_ack_opt.mdss_data_len == 0) ||
788 		    (dss_ack_opt.mdss_dsn == 0)) {
789 			goto do_ack64_only;
790 		}
791 
792 		if (tp->t_mpflags & TMPF_SEND_DFIN) {
793 			DO_FIN(dss_ack_opt);
794 		}
795 
796 		dss_ack_opt.mdss_dsn = htonl(dss_ack_opt.mdss_dsn);
797 		dss_ack_opt.mdss_subflow_seqn =
798 		    htonl(dss_ack_opt.mdss_subflow_seqn);
799 		dss_ack_opt.mdss_data_len = htons(dss_ack_opt.mdss_data_len);
800 		memcpy(opt + optlen, &dss_ack_opt, sizeof(dss_ack_opt));
801 		if (do_csum) {
802 			*((uint16_t *)(void *)(opt + optlen + sizeof(dss_ack_opt))) = dss_csum;
803 		}
804 
805 		optlen += dssoptlen;
806 
807 		if (optlen > MAX_TCPOPTLEN) {
808 			panic("optlen too large");
809 		}
810 		tp->t_mpflags &= ~TMPF_MPTCP_ACKNOW;
811 
812 		*do_not_compress = TRUE;
813 
814 		goto ret_optlen;
815 	}
816 
817 	if (tp->t_mpflags & TMPF_SEND_DFIN) {
818 		uint8_t dssoptlen = sizeof(struct mptcp_dss_ack_opt);
819 		struct mptcp_dss_ack_opt dss_ack_opt;
820 		uint16_t dss_csum;
821 
822 		if (do_csum) {
823 			uint64_t dss_val = mptcp_hton64(mp_tp->mpt_sndmax - 1);
824 			uint16_t dlen = htons(1);
825 			uint32_t sseq = 0;
826 			uint32_t sum;
827 
828 
829 			dssoptlen += 2;
830 
831 			sum = in_pseudo64(dss_val, sseq, dlen);
832 			ADDCARRY(sum);
833 			dss_csum = ~sum & 0xffff;
834 		}
835 
836 		CHECK_OPTLEN;
837 
838 		bzero(&dss_ack_opt, sizeof(dss_ack_opt));
839 
840 		/*
841 		 * Data FIN occupies one sequence space.
842 		 * Don't send it if it has been Acked.
843 		 */
844 		if ((mp_tp->mpt_sndnxt + 1 != mp_tp->mpt_sndmax) ||
845 		    (mp_tp->mpt_snduna == mp_tp->mpt_sndmax)) {
846 			goto ret_optlen;
847 		}
848 
849 		dss_ack_opt.mdss_copt.mdss_kind = TCPOPT_MULTIPATH;
850 		dss_ack_opt.mdss_copt.mdss_len = dssoptlen;
851 		dss_ack_opt.mdss_copt.mdss_subtype = MPO_DSS;
852 		dss_ack_opt.mdss_copt.mdss_flags |= MDSS_A | MDSS_M | MDSS_F;
853 		dss_ack_opt.mdss_ack =
854 		    htonl(MPTCP_DATAACK_LOW32(mp_tp->mpt_rcvnxt));
855 		dss_ack_opt.mdss_dsn =
856 		    htonl(MPTCP_DATASEQ_LOW32(mp_tp->mpt_sndmax - 1));
857 		dss_ack_opt.mdss_subflow_seqn = 0;
858 		dss_ack_opt.mdss_data_len = 1;
859 		dss_ack_opt.mdss_data_len = htons(dss_ack_opt.mdss_data_len);
860 		memcpy(opt + optlen, &dss_ack_opt, sizeof(dss_ack_opt));
861 		if (do_csum) {
862 			*((uint16_t *)(void *)(opt + optlen + sizeof(dss_ack_opt))) = dss_csum;
863 		}
864 
865 		optlen += dssoptlen;
866 
867 		*do_not_compress = TRUE;
868 	}
869 
870 ret_optlen:
871 	if (TRUE == *p_mptcp_acknow) {
872 		uint32_t new_mpt_flags = tp->t_mpflags & TMPF_MPTCP_SIGNALS;
873 
874 		/*
875 		 * If none of the above mpflags were acted on by
876 		 * this routine, reset these flags and set p_mptcp_acknow
877 		 * to false.
878 		 *
879 		 * XXX The reset value of p_mptcp_acknow can be used
880 		 * to communicate tcp_output to NOT send a pure ack without any
881 		 * MPTCP options as it will be treated as a dup ack.
882 		 * Since the instances of mptcp_setup_opts not acting on
883 		 * these options are mostly corner cases and sending a dup
884 		 * ack here would only have an impact if the system
885 		 * has sent consecutive dup acks before this false one,
886 		 * we haven't modified the logic in tcp_output to avoid
887 		 * that.
888 		 */
889 		if (old_mpt_flags == new_mpt_flags) {
890 			tp->t_mpflags &= ~TMPF_MPTCP_SIGNALS;
891 			*p_mptcp_acknow = FALSE;
892 		}
893 	}
894 
895 	return optlen;
896 }
897 
898 /*
899  * MPTCP Options Input Processing
900  */
901 
902 static int
mptcp_sanitize_option(struct tcpcb * tp,int mptcp_subtype)903 mptcp_sanitize_option(struct tcpcb *tp, int mptcp_subtype)
904 {
905 	struct mptcb *mp_tp = tptomptp(tp);
906 	int ret = 1;
907 
908 	switch (mptcp_subtype) {
909 	case MPO_CAPABLE:
910 		break;
911 	case MPO_JOIN:                  /* fall through */
912 	case MPO_DSS:                   /* fall through */
913 	case MPO_FASTCLOSE:             /* fall through */
914 	case MPO_FAIL:                  /* fall through */
915 	case MPO_REMOVE_ADDR:           /* fall through */
916 	case MPO_ADD_ADDR:              /* fall through */
917 	case MPO_PRIO:                  /* fall through */
918 		if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) {
919 			ret = 0;
920 		}
921 		break;
922 	default:
923 		ret = 0;
924 		os_log_error(mptcp_log_handle, "%s - %lx: type = %d \n", __func__,
925 		    (unsigned long)VM_KERNEL_ADDRPERM(mp_tp->mpt_mpte), mptcp_subtype);
926 		break;
927 	}
928 	return ret;
929 }
930 
931 static void
mptcp_do_mpcapable_opt(struct tcpcb * tp,u_char * cp __ended_by (optend),u_char * optend __unused,struct tcphdr * th,uint8_t optlen)932 mptcp_do_mpcapable_opt(struct tcpcb *tp, u_char *cp __ended_by(optend), u_char *optend __unused, struct tcphdr *th,
933     uint8_t optlen)
934 {
935 	struct mptcp_mpcapable_opt_rsp *rsp;
936 	struct mptcb *mp_tp = tptomptp(tp);
937 	struct mptses *mpte = mp_tp->mpt_mpte;
938 
939 	/* Only valid on SYN/ACK */
940 	if ((th->th_flags & (TH_SYN | TH_ACK)) != (TH_SYN | TH_ACK)) {
941 		return;
942 	}
943 
944 	/* handle SYN/ACK retransmission by acknowledging with ACK */
945 	if (mp_tp->mpt_state >= MPTCPS_ESTABLISHED) {
946 		return;
947 	}
948 
949 	/* A SYN/ACK contains peer's key and flags */
950 	if (optlen != sizeof(struct mptcp_mpcapable_opt_rsp)) {
951 		/* complain */
952 		os_log_error(mptcp_log_handle, "%s - %lx: SYN_ACK optlen = %u, sizeof mp opt = %lu \n",
953 		    __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), optlen,
954 		    sizeof(struct mptcp_mpcapable_opt_rsp));
955 		tcpstat.tcps_invalid_mpcap++;
956 		return;
957 	}
958 
959 	rsp = (struct mptcp_mpcapable_opt_rsp *)cp;
960 
961 	if (!(rsp->mmc_common.mmco_flags & MPCAP_PROPOSAL_SBIT) ||
962 	    rsp->mmc_common.mmco_flags & (MPCAP_BBIT | MPCAP_DBIT |
963 	    MPCAP_EBIT | MPCAP_FBIT | MPCAP_GBIT)) {
964 		tcpstat.tcps_invalid_mpcap++;
965 		return;
966 	}
967 
968 	/*
969 	 * If checksum flag is set, enable MPTCP checksum, even if
970 	 * it was not negotiated on the first SYN.
971 	 */
972 	if (rsp->mmc_common.mmco_flags & MPCAP_CHECKSUM_CBIT) {
973 		mp_tp->mpt_flags |= MPTCPF_CHECKSUM;
974 	}
975 
976 	if (rsp->mmc_common.mmco_flags & MPCAP_UNICAST_IPBIT) {
977 		mpte->mpte_flags |= MPTE_UNICAST_IP;
978 
979 		/* We need an explicit signal for the addresses - zero the existing ones */
980 		memset(&mpte->mpte_sub_dst_v4, 0, sizeof(mpte->mpte_sub_dst_v4));
981 		memset(&mpte->mpte_sub_dst_v6, 0, sizeof(mpte->mpte_sub_dst_v6));
982 	}
983 
984 	mp_tp->mpt_remotekey = rsp->mmc_localkey;
985 	/* For now just downgrade to the peer's version */
986 	if (rsp->mmc_common.mmco_version < mp_tp->mpt_version) {
987 		os_log_error(mptcp_log_handle, "local version: %d > peer version %d", mp_tp->mpt_version, rsp->mmc_common.mmco_version);
988 		mp_tp->mpt_version = rsp->mmc_common.mmco_version;
989 		tcpstat.tcps_mp_verdowngrade++;
990 		return;
991 	}
992 	if (mptcp_init_remote_parms(mp_tp) != 0) {
993 		tcpstat.tcps_invalid_mpcap++;
994 		return;
995 	}
996 	tcp_heuristic_mptcp_success(tp);
997 	tcp_cache_update_mptcp_version(tp, TRUE);
998 	tp->t_mpflags |= (TMPF_SND_KEYS | TMPF_MPTCP_TRUE);
999 }
1000 
1001 
1002 static void
mptcp_do_mpjoin_opt(struct tcpcb * tp,u_char * cp __ended_by (optend),u_char * optend __unused,struct tcphdr * th,uint8_t optlen)1003 mptcp_do_mpjoin_opt(struct tcpcb *tp, u_char *cp __ended_by(optend), u_char *optend __unused, struct tcphdr *th, uint8_t optlen)
1004 {
1005 	struct mptcp_mpjoin_opt_rsp *join_rsp;
1006 	int error;
1007 
1008 	/* Only valid on SYN/ACK */
1009 	if ((th->th_flags & (TH_SYN | TH_ACK)) != (TH_SYN | TH_ACK)) {
1010 		return;
1011 	}
1012 
1013 	if (optlen != sizeof(struct mptcp_mpjoin_opt_rsp)) {
1014 		os_log_error(mptcp_log_handle, "%s - %lx: SYN_ACK: unexpected "
1015 		    "optlen = %u mp option = %lu\n", __func__,
1016 		    (unsigned long)VM_KERNEL_ADDRPERM(tptomptp(tp)->mpt_mpte),
1017 		    optlen, sizeof(struct mptcp_mpjoin_opt_rsp));
1018 		tp->t_mpflags &= ~TMPF_PREESTABLISHED;
1019 		/* send RST and close */
1020 		goto join_error;
1021 	}
1022 
1023 	join_rsp = (struct mptcp_mpjoin_opt_rsp *)cp;
1024 
1025 	mptcp_set_raddr_rand(tp->t_local_aid, tptomptp(tp),
1026 	    join_rsp->mmjo_addr_id, join_rsp->mmjo_rand);
1027 	error = mptcp_validate_join_hmac(tp,
1028 	    (u_char*)&join_rsp->mmjo_mac, HMAC_TRUNCATED_SYNACK);
1029 	if (error) {
1030 		os_log_error(mptcp_log_handle, "%s - %lx: SYN_ACK error=%d \n",
1031 		    __func__,
1032 		    (unsigned long)VM_KERNEL_ADDRPERM(tptomptp(tp)->mpt_mpte),
1033 		    error);
1034 		tp->t_mpflags &= ~TMPF_PREESTABLISHED;
1035 		/* send RST and close */
1036 		goto join_error;
1037 	}
1038 	tp->t_mpflags |= (TMPF_SENT_JOIN | TMPF_SND_JACK);
1039 
1040 	return;
1041 
1042 join_error:
1043 	tcpstat.tcps_invalid_joins++;
1044 	if (tp->t_inpcb->inp_socket != NULL) {
1045 		soevent(tp->t_inpcb->inp_socket,
1046 		    SO_FILT_HINT_LOCKED | SO_FILT_HINT_MUSTRST);
1047 	}
1048 }
1049 
1050 static int
mptcp_validate_join_hmac(struct tcpcb * tp,u_char * hmac __sized_by (mac_len),int mac_len)1051 mptcp_validate_join_hmac(struct tcpcb *tp, u_char* hmac __sized_by(mac_len), int mac_len)
1052 {
1053 	u_char digest[MAX(SHA1_RESULTLEN, SHA256_DIGEST_LENGTH)] = {0};
1054 	struct mptcb *mp_tp = tptomptp(tp);
1055 	u_int32_t rem_rand, loc_rand;
1056 
1057 	rem_rand = loc_rand = 0;
1058 
1059 	mptcp_get_rands(tp->t_local_aid, mp_tp, &loc_rand, &rem_rand);
1060 	if ((rem_rand == 0) || (loc_rand == 0)) {
1061 		return -1;
1062 	}
1063 
1064 	if (mp_tp->mpt_version == MPTCP_VERSION_0) {
1065 		mptcp_hmac_sha1(mp_tp->mpt_remotekey, mp_tp->mpt_localkey, rem_rand, loc_rand,
1066 		    digest);
1067 	} else {
1068 		uint32_t data[2];
1069 		data[0] = rem_rand;
1070 		data[1] = loc_rand;
1071 		mptcp_hmac_sha256(mp_tp->mpt_remotekey, mp_tp->mpt_localkey, (u_char *)data, 8, digest);
1072 	}
1073 
1074 	if (bcmp(digest, hmac, mac_len) == 0) {
1075 		return 0; /* matches */
1076 	} else {
1077 		printf("%s: remote key %llx local key %llx remote rand %x "
1078 		    "local rand %x \n", __func__, mp_tp->mpt_remotekey, mp_tp->mpt_localkey,
1079 		    rem_rand, loc_rand);
1080 		return -1;
1081 	}
1082 }
1083 
1084 /*
1085  * Update the mptcb send state variables, but the actual sbdrop occurs
1086  * in MPTCP layer
1087  */
1088 void
mptcp_data_ack_rcvd(struct mptcb * mp_tp,struct tcpcb * tp,u_int64_t full_dack)1089 mptcp_data_ack_rcvd(struct mptcb *mp_tp, struct tcpcb *tp, u_int64_t full_dack)
1090 {
1091 	uint64_t acked = full_dack - mp_tp->mpt_snduna;
1092 
1093 	VERIFY(acked <= INT_MAX);
1094 
1095 	if (acked) {
1096 		struct socket *mp_so = mptetoso(mp_tp->mpt_mpte);
1097 
1098 		if (acked > mp_so->so_snd.sb_cc) {
1099 			if (acked > mp_so->so_snd.sb_cc + 1 ||
1100 			    mp_tp->mpt_state < MPTCPS_FIN_WAIT_1) {
1101 				os_log_error(mptcp_log_handle, "%s - %lx: acked %u, sb_cc %u full %u suna %u state %u\n",
1102 				    __func__, (unsigned long)VM_KERNEL_ADDRPERM(mp_tp->mpt_mpte),
1103 				    (uint32_t)acked, mp_so->so_snd.sb_cc,
1104 				    (uint32_t)full_dack, (uint32_t)mp_tp->mpt_snduna,
1105 				    mp_tp->mpt_state);
1106 			}
1107 
1108 			sbdrop(&mp_so->so_snd, (int)mp_so->so_snd.sb_cc);
1109 		} else {
1110 			sbdrop(&mp_so->so_snd, (int)acked);
1111 		}
1112 
1113 		mp_tp->mpt_snduna += acked;
1114 		/* In degraded mode, we may get some Data ACKs */
1115 		if ((tp->t_mpflags & TMPF_TCP_FALLBACK) &&
1116 		    !(mp_tp->mpt_flags & MPTCPF_POST_FALLBACK_SYNC) &&
1117 		    MPTCP_SEQ_GT(mp_tp->mpt_sndnxt, mp_tp->mpt_snduna)) {
1118 			/* bring back sndnxt to retransmit MPTCP data */
1119 			mp_tp->mpt_sndnxt = mp_tp->mpt_dsn_at_csum_fail;
1120 			mp_tp->mpt_flags |= MPTCPF_POST_FALLBACK_SYNC;
1121 			tp->t_inpcb->inp_socket->so_flags1 |=
1122 			    SOF1_POST_FALLBACK_SYNC;
1123 		}
1124 
1125 		mptcp_clean_reinjectq(mp_tp->mpt_mpte);
1126 
1127 		sowwakeup(mp_so);
1128 	}
1129 	if (full_dack == mp_tp->mpt_sndmax &&
1130 	    mp_tp->mpt_state >= MPTCPS_FIN_WAIT_1) {
1131 		mptcp_close_fsm(mp_tp, MPCE_RECV_DATA_ACK);
1132 		tp->t_mpflags &= ~TMPF_SEND_DFIN;
1133 	}
1134 
1135 	if ((tp->t_mpflags & TMPF_SND_KEYS) &&
1136 	    MPTCP_SEQ_GT(mp_tp->mpt_snduna, mp_tp->mpt_local_idsn + 1)) {
1137 		tp->t_mpflags &= ~TMPF_SND_KEYS;
1138 	}
1139 }
1140 
1141 void
mptcp_update_window_wakeup(struct tcpcb * tp)1142 mptcp_update_window_wakeup(struct tcpcb *tp)
1143 {
1144 	struct mptcb *mp_tp = tptomptp(tp);
1145 
1146 	socket_lock_assert_owned(mptetoso(mp_tp->mpt_mpte));
1147 
1148 	if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) {
1149 		mp_tp->mpt_sndwnd = tp->snd_wnd;
1150 		mp_tp->mpt_sndwl1 = mp_tp->mpt_rcvnxt;
1151 		mp_tp->mpt_sndwl2 = mp_tp->mpt_snduna;
1152 	}
1153 
1154 	sowwakeup(tp->t_inpcb->inp_socket);
1155 }
1156 
1157 static void
mptcp_update_window(struct mptcb * mp_tp,u_int64_t ack,u_int64_t seq,u_int32_t tiwin)1158 mptcp_update_window(struct mptcb *mp_tp, u_int64_t ack, u_int64_t seq, u_int32_t tiwin)
1159 {
1160 	if (MPTCP_SEQ_LT(mp_tp->mpt_sndwl1, seq) ||
1161 	    (mp_tp->mpt_sndwl1 == seq &&
1162 	    (MPTCP_SEQ_LT(mp_tp->mpt_sndwl2, ack) ||
1163 	    (mp_tp->mpt_sndwl2 == ack && tiwin > mp_tp->mpt_sndwnd)))) {
1164 		mp_tp->mpt_sndwnd = tiwin;
1165 		mp_tp->mpt_sndwl1 = seq;
1166 		mp_tp->mpt_sndwl2 = ack;
1167 	}
1168 }
1169 
1170 static void
mptcp_do_dss_opt_ack_meat(u_int64_t full_dack,u_int64_t full_dsn,struct tcpcb * tp,u_int32_t tiwin)1171 mptcp_do_dss_opt_ack_meat(u_int64_t full_dack, u_int64_t full_dsn,
1172     struct tcpcb *tp, u_int32_t tiwin)
1173 {
1174 	struct mptcb *mp_tp = tptomptp(tp);
1175 	int close_notify = 0;
1176 
1177 	tp->t_mpflags |= TMPF_RCVD_DACK;
1178 
1179 	if (MPTCP_SEQ_LEQ(full_dack, mp_tp->mpt_sndmax) &&
1180 	    MPTCP_SEQ_GEQ(full_dack, mp_tp->mpt_snduna)) {
1181 		mptcp_data_ack_rcvd(mp_tp, tp, full_dack);
1182 		if (mp_tp->mpt_state > MPTCPS_FIN_WAIT_2) {
1183 			close_notify = 1;
1184 		}
1185 		if (mp_tp->mpt_flags & MPTCPF_RCVD_64BITACK) {
1186 			mp_tp->mpt_flags &= ~MPTCPF_RCVD_64BITACK;
1187 			mp_tp->mpt_flags &= ~MPTCPF_SND_64BITDSN;
1188 		}
1189 		mptcp_notify_mpready(tp->t_inpcb->inp_socket);
1190 		if (close_notify) {
1191 			mptcp_notify_close(tp->t_inpcb->inp_socket);
1192 		}
1193 	}
1194 
1195 	mptcp_update_window(mp_tp, full_dack, full_dsn, tiwin);
1196 }
1197 
1198 static void
mptcp_do_dss_opt_meat(u_char * cp __ended_by (optend),u_char * optend __unused,struct tcpcb * tp,struct tcphdr * th)1199 mptcp_do_dss_opt_meat(u_char *cp __ended_by(optend), u_char *optend __unused, struct tcpcb *tp, struct tcphdr *th)
1200 {
1201 	struct mptcp_dss_copt *dss_rsp;
1202 	uint64_t full_dack = 0;
1203 	uint32_t tiwin = th->th_win << tp->snd_scale;
1204 	struct mptcb *mp_tp = tptomptp(tp);
1205 	unsigned int csum_len = 0;
1206 
1207 	/* bounds-checks happens in the caller of the function */
1208 	dss_rsp = (struct mptcp_dss_copt *)cp;
1209 
1210 	if (mp_tp->mpt_flags & MPTCPF_CHECKSUM) {
1211 		csum_len = 2;
1212 	}
1213 
1214 	dss_rsp->mdss_flags &= (MDSS_A | MDSS_a | MDSS_M | MDSS_m);
1215 	switch (dss_rsp->mdss_flags) {
1216 	case (MDSS_M):
1217 	{
1218 		/* 32-bit DSS, No Data ACK */
1219 		struct mptcp_dsn_opt *dss_rsp1;
1220 
1221 		if (dss_rsp->mdss_len != sizeof(struct mptcp_dsn_opt) + csum_len) {
1222 			goto err_len;
1223 		}
1224 
1225 		dss_rsp1 = (struct mptcp_dsn_opt *)cp;
1226 
1227 		if (csum_len == 0) {
1228 			mptcp_update_dss_rcv_state(dss_rsp1, tp, 0);
1229 		} else {
1230 			mptcp_update_dss_rcv_state(dss_rsp1, tp,
1231 			    *(uint16_t *)(void *)(cp +
1232 			    (dss_rsp1->mdss_copt.mdss_len - csum_len)));
1233 		}
1234 		break;
1235 	}
1236 	case (MDSS_A):
1237 	{
1238 		/* 32-bit Data ACK, no DSS */
1239 		struct mptcp_data_ack_opt *dack_opt;
1240 		uint32_t dack;
1241 
1242 		if (dss_rsp->mdss_len != sizeof(struct mptcp_data_ack_opt)) {
1243 			goto err_len;
1244 		}
1245 
1246 		dack_opt = (struct mptcp_data_ack_opt *)cp;
1247 
1248 		dack = dack_opt->mdss_ack;
1249 		NTOHL(dack);
1250 		MPTCP_EXTEND_DSN(mp_tp->mpt_snduna, dack, full_dack);
1251 		mptcp_do_dss_opt_ack_meat(full_dack, mp_tp->mpt_sndwl1, tp, tiwin);
1252 		break;
1253 	}
1254 	case (MDSS_M | MDSS_A):
1255 	{
1256 		/* 32-bit Data ACK + 32-bit DSS */
1257 		struct mptcp_dss_ack_opt *dss_ack_rsp;
1258 		uint64_t full_dsn;
1259 		uint16_t csum = 0;
1260 		uint32_t dack;
1261 
1262 		if (dss_rsp->mdss_len != sizeof(struct mptcp_dss_ack_opt) + csum_len) {
1263 			goto err_len;
1264 		}
1265 
1266 		dss_ack_rsp = (struct mptcp_dss_ack_opt *)cp;
1267 
1268 		dack = ntohl(dss_ack_rsp->mdss_ack);
1269 
1270 		MPTCP_EXTEND_DSN(mp_tp->mpt_snduna, dack, full_dack);
1271 
1272 		NTOHL(dss_ack_rsp->mdss_dsn);
1273 		NTOHL(dss_ack_rsp->mdss_subflow_seqn);
1274 		NTOHS(dss_ack_rsp->mdss_data_len);
1275 		MPTCP_EXTEND_DSN(mp_tp->mpt_rcvnxt, dss_ack_rsp->mdss_dsn, full_dsn);
1276 
1277 		mptcp_do_dss_opt_ack_meat(full_dack, full_dsn, tp, tiwin);
1278 
1279 		if (csum_len != 0) {
1280 			csum = *(uint16_t *)(void *)(cp + (dss_ack_rsp->mdss_copt.mdss_len - csum_len));
1281 		}
1282 
1283 		mptcp_update_rcv_state_meat(mp_tp, tp,
1284 		    full_dsn,
1285 		    dss_ack_rsp->mdss_subflow_seqn,
1286 		    dss_ack_rsp->mdss_data_len,
1287 		    csum);
1288 		break;
1289 	}
1290 	case (MDSS_M | MDSS_m):
1291 	{
1292 		/* 64-bit DSS , No Data ACK */
1293 		struct mptcp_dsn64_opt *dsn64;
1294 		uint64_t full_dsn;
1295 		uint16_t csum = 0;
1296 
1297 		if (dss_rsp->mdss_len != sizeof(struct mptcp_dsn64_opt) + csum_len) {
1298 			goto err_len;
1299 		}
1300 
1301 		dsn64 = (struct mptcp_dsn64_opt *)cp;
1302 
1303 		mp_tp->mpt_flags |= MPTCPF_SND_64BITACK;
1304 
1305 		full_dsn = mptcp_ntoh64(dsn64->mdss_dsn);
1306 		NTOHL(dsn64->mdss_subflow_seqn);
1307 		NTOHS(dsn64->mdss_data_len);
1308 
1309 		if (csum_len != 0) {
1310 			csum = *(uint16_t *)(void *)(cp + dsn64->mdss_copt.mdss_len - csum_len);
1311 		}
1312 
1313 		mptcp_update_rcv_state_meat(mp_tp, tp, full_dsn,
1314 		    dsn64->mdss_subflow_seqn,
1315 		    dsn64->mdss_data_len,
1316 		    csum);
1317 		break;
1318 	}
1319 	case (MDSS_A | MDSS_a):
1320 	{
1321 		/* 64-bit Data ACK, no DSS */
1322 		struct mptcp_data_ack64_opt *dack64;
1323 
1324 		if (dss_rsp->mdss_len != sizeof(struct mptcp_data_ack64_opt)) {
1325 			goto err_len;
1326 		}
1327 
1328 		dack64 = (struct mptcp_data_ack64_opt *)cp;
1329 
1330 		mp_tp->mpt_flags |= MPTCPF_RCVD_64BITACK;
1331 
1332 		full_dack = mptcp_ntoh64(dack64->mdss_ack);
1333 		mptcp_do_dss_opt_ack_meat(full_dack, mp_tp->mpt_sndwl1, tp, tiwin);
1334 		break;
1335 	}
1336 	case (MDSS_M | MDSS_m | MDSS_A):
1337 	{
1338 		/* 64-bit DSS + 32-bit Data ACK */
1339 		struct mptcp_dss64_ack32_opt *dss_ack_rsp;
1340 		uint64_t full_dsn;
1341 		uint16_t csum = 0;
1342 
1343 		if (dss_rsp->mdss_len != sizeof(struct mptcp_dss64_ack32_opt) + csum_len) {
1344 			goto err_len;
1345 		}
1346 
1347 		dss_ack_rsp = (struct mptcp_dss64_ack32_opt *)cp;
1348 
1349 		uint32_t dack = dss_ack_rsp->mdss_ack;
1350 		NTOHL(dack);
1351 		mp_tp->mpt_flags |= MPTCPF_SND_64BITACK;
1352 		MPTCP_EXTEND_DSN(mp_tp->mpt_snduna, dack, full_dack);
1353 
1354 		full_dsn = mptcp_ntoh64(dss_ack_rsp->mdss_dsn);
1355 		NTOHL(dss_ack_rsp->mdss_subflow_seqn);
1356 		NTOHS(dss_ack_rsp->mdss_data_len);
1357 
1358 		mptcp_do_dss_opt_ack_meat(full_dack, full_dsn, tp, tiwin);
1359 
1360 		if (csum_len != 0) {
1361 			csum = *(uint16_t *)(void *)(cp + dss_ack_rsp->mdss_copt.mdss_len - csum_len);
1362 		}
1363 
1364 		mptcp_update_rcv_state_meat(mp_tp, tp, full_dsn,
1365 		    dss_ack_rsp->mdss_subflow_seqn,
1366 		    dss_ack_rsp->mdss_data_len,
1367 		    csum);
1368 
1369 		break;
1370 	}
1371 	case (MDSS_M | MDSS_A | MDSS_a):
1372 	{
1373 		/* 32-bit DSS + 64-bit Data ACK */
1374 		struct mptcp_dss32_ack64_opt *dss32_ack64_opt;
1375 		uint64_t full_dsn;
1376 
1377 		if (dss_rsp->mdss_len != sizeof(struct mptcp_dss32_ack64_opt) + csum_len) {
1378 			goto err_len;
1379 		}
1380 
1381 		dss32_ack64_opt = (struct mptcp_dss32_ack64_opt *)cp;
1382 
1383 		full_dack = mptcp_ntoh64(dss32_ack64_opt->mdss_ack);
1384 		NTOHL(dss32_ack64_opt->mdss_dsn);
1385 		mp_tp->mpt_flags |= MPTCPF_RCVD_64BITACK;
1386 		MPTCP_EXTEND_DSN(mp_tp->mpt_rcvnxt,
1387 		    dss32_ack64_opt->mdss_dsn, full_dsn);
1388 		NTOHL(dss32_ack64_opt->mdss_subflow_seqn);
1389 		NTOHS(dss32_ack64_opt->mdss_data_len);
1390 
1391 		mptcp_do_dss_opt_ack_meat(full_dack, full_dsn, tp, tiwin);
1392 		if (csum_len == 0) {
1393 			mptcp_update_rcv_state_meat(mp_tp, tp, full_dsn,
1394 			    dss32_ack64_opt->mdss_subflow_seqn,
1395 			    dss32_ack64_opt->mdss_data_len, 0);
1396 		} else {
1397 			mptcp_update_rcv_state_meat(mp_tp, tp, full_dsn,
1398 			    dss32_ack64_opt->mdss_subflow_seqn,
1399 			    dss32_ack64_opt->mdss_data_len,
1400 			    *(uint16_t *)(void *)(cp +
1401 			    dss32_ack64_opt->mdss_copt.mdss_len -
1402 			    csum_len));
1403 		}
1404 		break;
1405 	}
1406 	case (MDSS_M | MDSS_m | MDSS_A | MDSS_a):
1407 	{
1408 		/* 64-bit DSS + 64-bit Data ACK */
1409 		struct mptcp_dss64_ack64_opt *dss64_ack64;
1410 		uint64_t full_dsn;
1411 
1412 		if (dss_rsp->mdss_len != sizeof(struct mptcp_dss64_ack64_opt) + csum_len) {
1413 			goto err_len;
1414 		}
1415 
1416 		dss64_ack64 = (struct mptcp_dss64_ack64_opt *)cp;
1417 
1418 		mp_tp->mpt_flags |= MPTCPF_RCVD_64BITACK;
1419 		mp_tp->mpt_flags |= MPTCPF_SND_64BITACK;
1420 		full_dsn = mptcp_ntoh64(dss64_ack64->mdss_dsn);
1421 		full_dack = mptcp_ntoh64(dss64_ack64->mdss_dsn);
1422 		mptcp_do_dss_opt_ack_meat(full_dack, full_dsn, tp, tiwin);
1423 		NTOHL(dss64_ack64->mdss_subflow_seqn);
1424 		NTOHS(dss64_ack64->mdss_data_len);
1425 		if (csum_len == 0) {
1426 			mptcp_update_rcv_state_meat(mp_tp, tp, full_dsn,
1427 			    dss64_ack64->mdss_subflow_seqn,
1428 			    dss64_ack64->mdss_data_len, 0);
1429 		} else {
1430 			mptcp_update_rcv_state_meat(mp_tp, tp, full_dsn,
1431 			    dss64_ack64->mdss_subflow_seqn,
1432 			    dss64_ack64->mdss_data_len,
1433 			    *(uint16_t *)(void *)(cp +
1434 			    dss64_ack64->mdss_copt.mdss_len -
1435 			    csum_len));
1436 		}
1437 		break;
1438 	}
1439 	default:
1440 		break;
1441 	}
1442 
1443 	return;
1444 
1445 err_len:
1446 	os_log_error(mptcp_log_handle, "%s - %lx: bad len = %d dss: %x\n",
1447 	    __func__, (unsigned long)VM_KERNEL_ADDRPERM(mp_tp->mpt_mpte),
1448 	    dss_rsp->mdss_len, dss_rsp->mdss_flags);
1449 	return;
1450 }
1451 
1452 static void
mptcp_do_dss_opt(struct tcpcb * tp,u_char * cp __ended_by (optend),u_char * optend,struct tcphdr * th,uint8_t optlen)1453 mptcp_do_dss_opt(struct tcpcb *tp, u_char *cp __ended_by(optend), u_char *optend, struct tcphdr *th, uint8_t optlen)
1454 {
1455 	struct mptcp_dss_copt *dss_rsp;
1456 
1457 	if (!tptomptp(tp)) {
1458 		return;
1459 	}
1460 
1461 	if (optlen < sizeof(struct mptcp_dss_copt)) {
1462 		tcpstat.tcps_invalid_opt++;
1463 		return;
1464 	}
1465 	dss_rsp = (struct mptcp_dss_copt *)cp;
1466 
1467 	if (dss_rsp->mdss_subtype == MPO_DSS) {
1468 		if (dss_rsp->mdss_flags & MDSS_F) {
1469 			tp->t_mpsub->mpts_rcv_map.mpt_dfin = 1;
1470 		} else {
1471 			tp->t_mpsub->mpts_rcv_map.mpt_dfin = 0;
1472 		}
1473 
1474 		mptcp_do_dss_opt_meat(cp, optend, tp, th);
1475 	}
1476 }
1477 
1478 static void
mptcp_do_fastclose_opt(struct tcpcb * tp,u_char * cp __ended_by (optend),u_char * optend __unused,struct tcphdr * th,uint8_t optlen)1479 mptcp_do_fastclose_opt(struct tcpcb *tp, u_char *cp __ended_by(optend), u_char *optend __unused, struct tcphdr *th, uint8_t optlen)
1480 {
1481 	struct mptcp_fastclose_opt *fc_opt;
1482 	struct mptcb *mp_tp;
1483 
1484 	if (th->th_flags != TH_ACK) {
1485 		return;
1486 	}
1487 
1488 	if (optlen != sizeof(struct mptcp_fastclose_opt)) {
1489 		tcpstat.tcps_invalid_opt++;
1490 		return;
1491 	}
1492 
1493 	mp_tp = tptomptp(tp);
1494 	if (!mp_tp) {
1495 		return;
1496 	}
1497 
1498 	fc_opt = (struct mptcp_fastclose_opt *)cp;
1499 
1500 	if (fc_opt->mfast_key != mp_tp->mpt_localkey) {
1501 		tcpstat.tcps_invalid_opt++;
1502 		return;
1503 	}
1504 
1505 	/*
1506 	 * fastclose could make us more vulnerable to attacks, hence
1507 	 * accept only those that are at the next expected sequence number.
1508 	 */
1509 	if (th->th_seq != tp->rcv_nxt) {
1510 		tcpstat.tcps_invalid_opt++;
1511 		return;
1512 	}
1513 
1514 	/* Reset this flow */
1515 	tp->t_mpflags |= TMPF_FASTCLOSERCV;
1516 
1517 	if (tp->t_inpcb->inp_socket != NULL) {
1518 		soevent(tp->t_inpcb->inp_socket,
1519 		    SO_FILT_HINT_LOCKED | SO_FILT_HINT_MUSTRST);
1520 	}
1521 }
1522 
1523 
1524 static void
mptcp_do_mpfail_opt(struct tcpcb * tp,u_char * cp __ended_by (optend),u_char * optend __unused,struct tcphdr * th,uint8_t optlen)1525 mptcp_do_mpfail_opt(struct tcpcb *tp, u_char *cp __ended_by(optend), u_char *optend __unused, struct tcphdr *th, uint8_t optlen)
1526 {
1527 	struct mptcp_mpfail_opt *fail_opt;
1528 	uint32_t mdss_subflow_seqn = 0;
1529 	struct mptcb *mp_tp;
1530 	int error = 0;
1531 
1532 	/*
1533 	 * mpfail could make us more vulnerable to attacks. Hence accept
1534 	 * only those that are the next expected sequence number.
1535 	 */
1536 	if (th->th_seq != tp->rcv_nxt) {
1537 		tcpstat.tcps_invalid_opt++;
1538 		return;
1539 	}
1540 
1541 	/* A packet without RST, must atleast have the ACK bit set */
1542 	if ((th->th_flags != TH_ACK) && (th->th_flags != TH_RST)) {
1543 		return;
1544 	}
1545 
1546 	if (optlen != sizeof(struct mptcp_mpfail_opt)) {
1547 		return;
1548 	}
1549 
1550 	fail_opt = (struct mptcp_mpfail_opt *)cp;
1551 
1552 	mp_tp = tptomptp(tp);
1553 
1554 	mp_tp->mpt_flags |= MPTCPF_RECVD_MPFAIL;
1555 	mp_tp->mpt_dsn_at_csum_fail = mptcp_hton64(fail_opt->mfail_dsn);
1556 	error = mptcp_get_map_for_dsn(tp->t_inpcb->inp_socket,
1557 	    mp_tp->mpt_dsn_at_csum_fail, &mdss_subflow_seqn);
1558 	if (error == 0) {
1559 		mp_tp->mpt_ssn_at_csum_fail = mdss_subflow_seqn;
1560 	}
1561 
1562 	mptcp_notify_mpfail(tp->t_inpcb->inp_socket);
1563 }
1564 
1565 static boolean_t
mptcp_validate_add_addr_hmac(struct tcpcb * tp,u_char * hmac __sized_by (mac_len),u_char * msg __sized_by (msg_len),uint16_t msg_len,uint16_t mac_len)1566 mptcp_validate_add_addr_hmac(struct tcpcb *tp, u_char *hmac __sized_by(mac_len),
1567     u_char *msg __sized_by(msg_len), uint16_t msg_len, uint16_t mac_len)
1568 {
1569 	u_char digest[SHA256_DIGEST_LENGTH] = {0};
1570 	struct mptcb *mp_tp = tptomptp(tp);
1571 
1572 	VERIFY(mac_len <= SHA256_DIGEST_LENGTH);
1573 	mptcp_hmac_sha256(mp_tp->mpt_remotekey, mp_tp->mpt_localkey, msg, msg_len, digest);
1574 
1575 	if (bcmp(digest + SHA256_DIGEST_LENGTH - mac_len, hmac, mac_len) == 0) {
1576 		return true; /* matches */
1577 	} else {
1578 		return false;
1579 	}
1580 }
1581 
1582 static void
mptcp_do_add_addr_opt_v1(struct tcpcb * tp,u_char * cp __ended_by (optend),u_char * optend __unused,uint8_t optlen)1583 mptcp_do_add_addr_opt_v1(struct tcpcb *tp, u_char *cp __ended_by(optend), u_char *optend __unused, uint8_t optlen)
1584 {
1585 	struct mptcp_add_addr_opt *addr_opt;
1586 	struct mptcb *mp_tp = tptomptp(tp);
1587 	struct mptses *mpte = mp_tp->mpt_mpte;
1588 
1589 	if (optlen != MPTCP_V1_ADD_ADDR_OPT_LEN_V4 &&
1590 	    optlen != MPTCP_V1_ADD_ADDR_OPT_LEN_V4 + 2 &&
1591 	    optlen != MPTCP_V1_ADD_ADDR_OPT_LEN_V6 &&
1592 	    optlen != MPTCP_V1_ADD_ADDR_OPT_LEN_V6 + 2) {
1593 		os_log_error(mptcp_log_handle, "%s - %lx: Wrong ADD_ADDR length %u\n",
1594 		    __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
1595 		    optlen);
1596 
1597 		return;
1598 	}
1599 
1600 	addr_opt = (struct mptcp_add_addr_opt *)cp;
1601 
1602 	if ((addr_opt->maddr_flags & MPTCP_V1_ADD_ADDR_ECHO) != 0) {
1603 		os_log(mptcp_log_handle, "%s - %lx: Received ADD_ADDR with echo bit\n",
1604 		    __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte));
1605 
1606 		return;
1607 	}
1608 
1609 	if (addr_opt->maddr_len < MPTCP_V1_ADD_ADDR_OPT_LEN_V6) {
1610 		struct sockaddr_in *dst = &mpte->mpte_sub_dst_v4;
1611 		struct in_addr *addr = (struct in_addr *)(void *)(cp + sizeof(*addr_opt));
1612 		in_addr_t haddr = ntohl(addr->s_addr);
1613 
1614 		if (IN_ZERONET(haddr) ||
1615 		    IN_LOOPBACK(haddr) ||
1616 		    IN_LINKLOCAL(haddr) ||
1617 		    IN_DS_LITE(haddr) ||
1618 		    IN_6TO4_RELAY_ANYCAST(haddr) ||
1619 		    IN_MULTICAST(haddr) ||
1620 		    INADDR_BROADCAST == haddr ||
1621 		    IN_PRIVATE(haddr) ||
1622 		    IN_SHARED_ADDRESS_SPACE(haddr)) {
1623 			os_log_error(mptcp_log_handle, "%s - %lx: ADD_ADDR invalid addr: %x\n",
1624 			    __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
1625 			    addr->s_addr);
1626 
1627 			return;
1628 		}
1629 
1630 		u_char *hmac = (void *)(cp + addr_opt->maddr_len - HMAC_TRUNCATED_ADD_ADDR);
1631 		uint16_t msg_len = sizeof(struct mptcp_add_addr_hmac_msg_v4);
1632 		struct mptcp_add_addr_hmac_msg_v4 msg  = {0};
1633 		msg.maddr_addrid = addr_opt->maddr_addrid;
1634 		msg.maddr_addr = *addr;
1635 		if (addr_opt->maddr_len > MPTCP_V1_ADD_ADDR_OPT_LEN_V4) {
1636 			msg.maddr_port = *(uint16_t *)(void *)(cp + addr_opt->maddr_len - HMAC_TRUNCATED_ADD_ADDR - 2);
1637 		}
1638 		if (!mptcp_validate_add_addr_hmac(tp, hmac, (u_char *)&msg, msg_len, HMAC_TRUNCATED_ADD_ADDR)) {
1639 			os_log_error(mptcp_log_handle, "%s - %lx: ADD_ADDR addr: %x invalid HMAC\n",
1640 			    __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
1641 			    addr->s_addr);
1642 			return;
1643 		}
1644 
1645 		dst->sin_len = sizeof(*dst);
1646 		dst->sin_family = AF_INET;
1647 		if (addr_opt->maddr_len > MPTCP_V1_ADD_ADDR_OPT_LEN_V4) {
1648 			dst->sin_port = *(uint16_t *)(void *)(cp + addr_opt->maddr_len - HMAC_TRUNCATED_ADD_ADDR - 2);
1649 		} else {
1650 			dst->sin_port = mpte->__mpte_dst_v4.sin_port;
1651 		}
1652 		dst->sin_addr.s_addr = addr->s_addr;
1653 		mpte->sub_dst_addr_id_v4 = addr_opt->maddr_addrid;
1654 		mpte->mpte_last_added_addr_is_v4 = TRUE;
1655 	} else {
1656 		struct sockaddr_in6 *dst = &mpte->mpte_sub_dst_v6;
1657 		struct in6_addr *addr = (struct in6_addr *)(void *)(cp + sizeof(*addr_opt));
1658 
1659 		if (IN6_IS_ADDR_LINKLOCAL(addr) ||
1660 		    IN6_IS_ADDR_MULTICAST(addr) ||
1661 		    IN6_IS_ADDR_UNSPECIFIED(addr) ||
1662 		    IN6_IS_ADDR_LOOPBACK(addr) ||
1663 		    IN6_IS_ADDR_V4COMPAT(addr) ||
1664 		    IN6_IS_ADDR_V4MAPPED(addr)) {
1665 			char dbuf[MAX_IPv6_STR_LEN];
1666 
1667 			inet_ntop(AF_INET6, addr, dbuf, sizeof(dbuf));
1668 			os_log_error(mptcp_log_handle, "%s - %lx: ADD_ADDRv6 invalid addr: %s\n",
1669 			    __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
1670 			    dbuf);
1671 
1672 			return;
1673 		}
1674 
1675 		u_char *hmac = (void *)(cp + addr_opt->maddr_len - HMAC_TRUNCATED_ADD_ADDR);
1676 		uint16_t msg_len = sizeof(struct mptcp_add_addr_hmac_msg_v6);
1677 		struct mptcp_add_addr_hmac_msg_v6 msg  = {0};
1678 		msg.maddr_addrid = addr_opt->maddr_addrid;
1679 		msg.maddr_addr = *addr;
1680 		if (addr_opt->maddr_len > MPTCP_V1_ADD_ADDR_OPT_LEN_V6) {
1681 			msg.maddr_port = *(uint16_t *)(void *)(cp + addr_opt->maddr_len - HMAC_TRUNCATED_ADD_ADDR - 2);
1682 		}
1683 		if (!mptcp_validate_add_addr_hmac(tp, hmac, (u_char *)&msg, msg_len, HMAC_TRUNCATED_ADD_ADDR)) {
1684 			char dbuf[MAX_IPv6_STR_LEN];
1685 
1686 			inet_ntop(AF_INET6, addr, dbuf, sizeof(dbuf));
1687 			os_log_error(mptcp_log_handle, "%s - %lx: ADD_ADDR addr: %s invalid HMAC\n",
1688 			    __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
1689 			    dbuf);
1690 			return;
1691 		}
1692 
1693 		dst->sin6_len = sizeof(*dst);
1694 		dst->sin6_family = AF_INET6;
1695 		if (addr_opt->maddr_len > MPTCP_V1_ADD_ADDR_OPT_LEN_V6) {
1696 			dst->sin6_port = *(uint16_t *)(void *)(cp + addr_opt->maddr_len - HMAC_TRUNCATED_ADD_ADDR - 2);
1697 		} else {
1698 			dst->sin6_port = mpte->__mpte_dst_v6.sin6_port;
1699 		}
1700 		memcpy(&dst->sin6_addr, addr, sizeof(*addr));
1701 		mpte->sub_dst_addr_id_v6 = addr_opt->maddr_addrid;
1702 		mpte->mpte_last_added_addr_is_v4 = FALSE;
1703 	}
1704 
1705 	os_log(mptcp_log_handle, "%s - %lx: Received ADD_ADDRv1\n",
1706 	    __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte));
1707 
1708 	/* Once an incoming ADD_ADDR for v1 is valid, it means that the peer
1709 	 * receiver our keys.
1710 	 */
1711 	tp->t_mpflags &= ~TMPF_SND_KEYS;
1712 	tp->t_mpflags |= TMPF_MPTCP_ECHO_ADDR;
1713 	tp->t_flags |= TF_ACKNOW;
1714 	mptcp_sched_create_subflows(mpte);
1715 }
1716 
1717 static void
mptcp_do_add_addr_opt_v0(struct mptses * mpte,u_char * cp __ended_by (optend),u_char * optend __unused,uint8_t optlen)1718 mptcp_do_add_addr_opt_v0(struct mptses *mpte, u_char *cp __ended_by(optend), u_char *optend __unused, uint8_t optlen)
1719 {
1720 	struct mptcp_add_addr_opt *addr_opt;
1721 
1722 	if (optlen != MPTCP_V0_ADD_ADDR_OPT_LEN_V4 &&
1723 	    optlen != MPTCP_V0_ADD_ADDR_OPT_LEN_V6) {
1724 		os_log_error(mptcp_log_handle, "%s - %lx: Wrong ADD_ADDR length %u\n",
1725 		    __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
1726 		    optlen);
1727 
1728 		return;
1729 	}
1730 
1731 	addr_opt = (struct mptcp_add_addr_opt *)cp;
1732 
1733 	if (addr_opt->maddr_len == MPTCP_V0_ADD_ADDR_OPT_LEN_V4 &&
1734 	    addr_opt->maddr_flags != MPTCP_V0_ADD_ADDR_IPV4) {
1735 		os_log_error(mptcp_log_handle, "%s - %lx: ADD_ADDR length for v4 but version is %u\n",
1736 		    __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
1737 		    addr_opt->maddr_flags);
1738 
1739 		return;
1740 	}
1741 
1742 	if (addr_opt->maddr_len == MPTCP_V0_ADD_ADDR_OPT_LEN_V6 &&
1743 	    addr_opt->maddr_flags != MPTCP_V0_ADD_ADDR_IPV6) {
1744 		os_log_error(mptcp_log_handle, "%s - %lx: ADD_ADDR length for v6 but version is %u\n",
1745 		    __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
1746 		    addr_opt->maddr_flags);
1747 
1748 		return;
1749 	}
1750 
1751 	if (addr_opt->maddr_len == MPTCP_V0_ADD_ADDR_OPT_LEN_V4) {
1752 		struct sockaddr_in *dst = &mpte->mpte_sub_dst_v4;
1753 		struct in_addr *addr = (struct in_addr *)(void *)(cp + sizeof(*addr_opt));
1754 		in_addr_t haddr = ntohl(addr->s_addr);
1755 
1756 		if (IN_ZERONET(haddr) ||
1757 		    IN_LOOPBACK(haddr) ||
1758 		    IN_LINKLOCAL(haddr) ||
1759 		    IN_DS_LITE(haddr) ||
1760 		    IN_6TO4_RELAY_ANYCAST(haddr) ||
1761 		    IN_MULTICAST(haddr) ||
1762 		    INADDR_BROADCAST == haddr ||
1763 		    IN_PRIVATE(haddr) ||
1764 		    IN_SHARED_ADDRESS_SPACE(haddr)) {
1765 			os_log_error(mptcp_log_handle, "%s - %lx: ADD_ADDR invalid addr: %x\n",
1766 			    __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
1767 			    addr->s_addr);
1768 
1769 			return;
1770 		}
1771 
1772 		dst->sin_len = sizeof(*dst);
1773 		dst->sin_family = AF_INET;
1774 		dst->sin_port = mpte->__mpte_dst_v4.sin_port;
1775 		dst->sin_addr.s_addr = addr->s_addr;
1776 		mpte->mpte_last_added_addr_is_v4 = TRUE;
1777 	} else {
1778 		struct sockaddr_in6 *dst = &mpte->mpte_sub_dst_v6;
1779 		struct in6_addr *addr = (struct in6_addr *)(void *)(cp + sizeof(*addr_opt));
1780 
1781 		if (IN6_IS_ADDR_LINKLOCAL(addr) ||
1782 		    IN6_IS_ADDR_MULTICAST(addr) ||
1783 		    IN6_IS_ADDR_UNSPECIFIED(addr) ||
1784 		    IN6_IS_ADDR_LOOPBACK(addr) ||
1785 		    IN6_IS_ADDR_V4COMPAT(addr) ||
1786 		    IN6_IS_ADDR_V4MAPPED(addr)) {
1787 			char dbuf[MAX_IPv6_STR_LEN];
1788 
1789 			inet_ntop(AF_INET6, addr, dbuf, sizeof(dbuf));
1790 			os_log_error(mptcp_log_handle, "%s - %lx: ADD_ADDRv6 invalid addr: %s\n",
1791 			    __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
1792 			    dbuf);
1793 
1794 			return;
1795 		}
1796 
1797 		dst->sin6_len = sizeof(*dst);
1798 		dst->sin6_family = AF_INET6;
1799 		dst->sin6_port = mpte->__mpte_dst_v6.sin6_port;
1800 		dst->sin6_addr = *addr;
1801 		mpte->mpte_last_added_addr_is_v4 = FALSE;
1802 	}
1803 
1804 	os_log(mptcp_log_handle, "%s - %lx: Received ADD_ADDRv0\n",
1805 	    __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte));
1806 
1807 	mptcp_sched_create_subflows(mpte);
1808 }
1809 
1810 void
tcp_do_mptcp_options(struct tcpcb * tp,u_char * cp __ended_by (optend),u_char * optend,struct tcphdr * th,struct tcpopt * to,uint8_t optlen)1811 tcp_do_mptcp_options(struct tcpcb *tp, u_char *cp __ended_by(optend), u_char *optend, struct tcphdr *th,
1812     struct tcpopt *to, uint8_t optlen)
1813 {
1814 	int mptcp_subtype = 0;
1815 	struct mptcb *mp_tp = tptomptp(tp);
1816 
1817 	/* We expect the TCP stack to ensure this */
1818 	ASSERT(cp + optlen <= optend);
1819 
1820 	if (mp_tp == NULL) {
1821 		return;
1822 	}
1823 
1824 	socket_lock_assert_owned(mptetoso(mp_tp->mpt_mpte));
1825 
1826 	/* All MPTCP options have atleast 4 bytes */
1827 	if (optlen < 4) {
1828 		return;
1829 	}
1830 
1831 	mptcp_subtype = (cp[2] >> 4);
1832 
1833 	if (mptcp_sanitize_option(tp, mptcp_subtype) == 0) {
1834 		return;
1835 	}
1836 
1837 	switch (mptcp_subtype) {
1838 	case MPO_CAPABLE:
1839 		mptcp_do_mpcapable_opt(tp, cp, optend, th, optlen);
1840 		break;
1841 	case MPO_JOIN:
1842 		mptcp_do_mpjoin_opt(tp, cp, optend, th, optlen);
1843 		break;
1844 	case MPO_DSS:
1845 		mptcp_do_dss_opt(tp, cp, optend, th, optlen);
1846 		break;
1847 	case MPO_FASTCLOSE:
1848 		mptcp_do_fastclose_opt(tp, cp, optend, th, optlen);
1849 		break;
1850 	case MPO_FAIL:
1851 		mptcp_do_mpfail_opt(tp, cp, optend, th, optlen);
1852 		break;
1853 	case MPO_ADD_ADDR:
1854 		if (mp_tp->mpt_version == MPTCP_VERSION_0) {
1855 			mptcp_do_add_addr_opt_v0(mp_tp->mpt_mpte, cp, optend, optlen);
1856 		} else {
1857 			mptcp_do_add_addr_opt_v1(tp, cp, optend, optlen);
1858 		}
1859 		break;
1860 	case MPO_REMOVE_ADDR:           /* fall through */
1861 	case MPO_PRIO:
1862 		to->to_flags |= TOF_MPTCP;
1863 		break;
1864 	default:
1865 		break;
1866 	}
1867 	return;
1868 }
1869 
1870 /* REMOVE_ADDR option is sent when a source address goes away */
1871 static void
mptcp_send_remaddr_opt(struct tcpcb * tp,struct mptcp_remaddr_opt * opt)1872 mptcp_send_remaddr_opt(struct tcpcb *tp, struct mptcp_remaddr_opt *opt)
1873 {
1874 	bzero(opt, sizeof(*opt));
1875 	opt->mr_kind = TCPOPT_MULTIPATH;
1876 	opt->mr_len = sizeof(*opt);
1877 	opt->mr_subtype = MPO_REMOVE_ADDR;
1878 	opt->mr_addr_id = tp->t_rem_aid;
1879 	tp->t_mpflags &= ~TMPF_SND_REM_ADDR;
1880 }
1881 
1882 static int
mptcp_echo_add_addr(struct tcpcb * tp,u_char * __indexable cp,unsigned int optlen)1883 mptcp_echo_add_addr(struct tcpcb *tp, u_char * __indexable cp, unsigned int optlen)
1884 {
1885 	struct mptcp_add_addr_opt *mpaddr;
1886 	struct mptcb *mp_tp;
1887 	struct mptses *mpte;
1888 
1889 	mp_tp = tptomptp(tp);
1890 	mpte = mp_tp->mpt_mpte;
1891 
1892 	// MPTCP v0 doesn't require echoing add_addr
1893 	if (mp_tp->mpt_version == MPTCP_VERSION_0) {
1894 		return optlen;
1895 	}
1896 
1897 	size_t mpaddr_size = mpte->mpte_last_added_addr_is_v4 ? MPTCP_V1_ADD_ADDR_ECHO_OPT_LEN_V4 : MPTCP_V1_ADD_ADDR_ECHO_OPT_LEN_V6;
1898 	if ((MAX_TCPOPTLEN - optlen) < mpaddr_size) {
1899 		return optlen;
1900 	}
1901 
1902 	cp += optlen;
1903 	mpaddr = (struct mptcp_add_addr_opt *)cp;
1904 
1905 	mpaddr->maddr_kind = TCPOPT_MULTIPATH;
1906 	mpaddr->maddr_len = (uint8_t)mpaddr_size;
1907 	mpaddr->maddr_subtype = MPO_ADD_ADDR;
1908 	mpaddr->maddr_flags = MPTCP_V1_ADD_ADDR_ECHO;
1909 	if (mpte->mpte_last_added_addr_is_v4) {
1910 		struct in_addr *addr = (struct in_addr *)(void *)(cp + sizeof(struct mptcp_add_addr_opt));
1911 		addr->s_addr = mpte->mpte_sub_dst_v4.sin_addr.s_addr;
1912 		mpaddr->maddr_addrid = mpte->sub_dst_addr_id_v4;
1913 	} else {
1914 		struct in6_addr *addr = (struct in6_addr *)(void *)(cp + sizeof(struct mptcp_add_addr_opt));
1915 		*addr = mpte->mpte_sub_dst_v6.sin6_addr;
1916 		mpaddr->maddr_addrid = mpte->sub_dst_addr_id_v6;
1917 	}
1918 
1919 	optlen += mpaddr_size;
1920 	tp->t_mpflags &= ~TMPF_MPTCP_ECHO_ADDR;
1921 	return optlen;
1922 }
1923 
1924 /* We send MP_PRIO option based on the values set by the SIOCSCONNORDER ioctl */
1925 static int
mptcp_snd_mpprio(struct tcpcb * tp,u_char * cp __ended_by (optend),u_char * optend __unused,int optlen)1926 mptcp_snd_mpprio(struct tcpcb *tp, u_char *cp __ended_by(optend), u_char *optend __unused, int optlen)
1927 {
1928 	struct mptcp_mpprio_addr_opt mpprio;
1929 	struct mptcb *mp_tp = tptomptp(tp);
1930 	size_t mpprio_size = sizeof(mpprio);
1931 	// MP_PRIO of MPTCPv1 doesn't include AddrID
1932 	if (mp_tp->mpt_version == MPTCP_VERSION_1) {
1933 		mpprio_size -= sizeof(uint8_t);
1934 	}
1935 
1936 	if (tp->t_state != TCPS_ESTABLISHED) {
1937 		tp->t_mpflags &= ~TMPF_SND_MPPRIO;
1938 		return optlen;
1939 	}
1940 
1941 	if ((MAX_TCPOPTLEN - optlen) < (int)mpprio_size) {
1942 		return optlen;
1943 	}
1944 
1945 	bzero(&mpprio, sizeof(mpprio));
1946 	mpprio.mpprio_kind = TCPOPT_MULTIPATH;
1947 	mpprio.mpprio_len = (uint8_t)mpprio_size;
1948 	mpprio.mpprio_subtype = MPO_PRIO;
1949 	if (tp->t_mpflags & TMPF_BACKUP_PATH) {
1950 		mpprio.mpprio_flags |= MPTCP_MPPRIO_BKP;
1951 	}
1952 	mpprio.mpprio_addrid = tp->t_local_aid;
1953 	memcpy(cp + optlen, &mpprio, mpprio_size);
1954 	optlen += mpprio_size;
1955 	tp->t_mpflags &= ~TMPF_SND_MPPRIO;
1956 	return optlen;
1957 }
1958