xref: /xnu-12377.1.9/bsd/netinet/tcp_syncookie.c (revision f6217f891ac0bb64f3d375211650a4c1ff8ca1ea)
1 /*
2  * Copyright (c) 2024 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*-
30  * SPDX-License-Identifier: BSD-2-Clause
31  *
32  * Copyright (c) 2001 McAfee, Inc.
33  * Copyright (c) 2006,2013 Andre Oppermann, Internet Business Solutions AG
34  * All rights reserved.
35  *
36  * This software was developed for the FreeBSD Project by Jonathan Lemon
37  * and McAfee Research, the Security Research Division of McAfee, Inc. under
38  * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
39  * DARPA CHATS research program. [2001 McAfee, Inc.]
40  *
41  * Redistribution and use in source and binary forms, with or without
42  * modification, are permitted provided that the following conditions
43  * are met:
44  * 1. Redistributions of source code must retain the above copyright
45  *    notice, this list of conditions and the following disclaimer.
46  * 2. Redistributions in binary form must reproduce the above copyright
47  *    notice, this list of conditions and the following disclaimer in the
48  *    documentation and/or other materials provided with the distribution.
49  *
50  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
51  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
54  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60  * SUCH DAMAGE.
61  */
62 
63 #include "tcp_includes.h"
64 
65 #include <corecrypto/cchmac.h>
66 #include <corecrypto/ccsha2.h>
67 #include <net/if_var_private.h>
68 #include <netinet/in_tclass.h>
69 #include <netinet/ip.h>
70 #include <netinet/ip6.h>
71 #include <netinet/tcpip.h>
72 #include <netinet/tcp_syncookie.h>
73 #include <netinet6/nd6.h>
74 #include <net/siphash.h>
75 #include <os/ptrtools.h>
76 #include <sys/random.h>
77 
78 extern int path_mtu_discovery;
79 int tcp_syncookie_hmac_sha256 = 0;
80 
81 SYSCTL_INT(_net_inet_tcp, OID_AUTO, syncookie_hmac_sha256,
82     CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_syncookie_hmac_sha256, 0,
83     "0: disable, 1: Use HMAC with SHA-256 for generating SYN cookie");
84 
85 static bool
86 syncookie_respond(struct socket *so, struct tcpcb *tp, struct tcp_inp *tpi, uint16_t flags,
87     struct sockaddr *local, struct sockaddr *remote);
88 static uint32_t syncookie_siphash(struct tcp_inp *tpi, uint8_t flags, uint8_t key[SYNCOOKIE_SECRET_SIZE]);
89 static uint32_t syncookie_hmac_sha256(struct tcp_inp *tpi, uint8_t flags, uint8_t key[CCSHA256_OUTPUT_SIZE]);
90 static uint32_t syncookie_mac(struct tcp_inp *tpi, uint8_t flags, uint8_t secbit);
91 static tcp_seq syncookie_generate(struct tcp_inp *tpi, bool has_ecn);
92 static bool syncookie_lookup(struct tcp_inp *tpi);
93 static void syncookie_reseed(void);
94 
95 static struct syncookie_secret tcp_syncookie_secret;
96 
97 /*
98  * This function gets called when we receive an ACK for a
99  * socket in the LISTEN state.  We create the connection
100  * and set its state based on information from SYN cookies
101  * and options/flags received in last ACK. The returned
102  * tcpcb is in the SYN-RECEIVED state.
103  *
104  * Return true on success and false on failure.
105  */
106 bool
tcp_syncookie_ack(struct tcp_inp * tpi,struct socket ** so2,int * dropsocket)107 tcp_syncookie_ack(struct tcp_inp *tpi, struct socket **so2, int* dropsocket)
108 {
109 #define TCP_LOG_HDR (isipv6 ? (void *)ip6 : (void *)ip)
110 
111 	ASSERT((tcp_get_flags(tpi->th) & (TH_RST | TH_ACK | TH_SYN)) == TH_ACK);
112 	/*
113 	 * We don't support syncache, so see if this ACK is
114 	 * a returning syncookie. To do this,  check that the
115 	 * syncookie is valid.
116 	 */
117 	bool ret = syncookie_lookup(tpi);
118 
119 	if (ret == false) {
120 		TCP_LOG(*tpi->tp, "Segment failed SYNCOOKIE authentication, "
121 		    "segment rejected (probably spoofed)");
122 		goto failed;
123 	}
124 
125 	ret = tcp_create_server_socket(tpi, so2, NULL, dropsocket);
126 
127 	if (ret == false) {
128 		goto failed;
129 	}
130 
131 	ret = tcp_setup_server_socket(tpi, *so2, true);
132 
133 	/* Set snd state for newly created tcpcb */
134 	(*tpi->tp)->snd_nxt = (*tpi->tp)->snd_max = tpi->th->th_ack;
135 
136 	if (ret == false) {
137 		/*
138 		 * We failed to setup the server socket, return failure
139 		 * so that tcp_input can cleanup the socket and the
140 		 * incoming segment
141 		 */
142 		goto failed;
143 	}
144 	*dropsocket = 0;         /* committed to socket */
145 
146 	if (__improbable(*so2 == NULL)) {
147 		tcpstat.tcps_sc_aborted++;
148 	} else {
149 		tcpstat.tcps_sc_completed++;
150 	}
151 
152 	return true;
153 
154 failed:
155 	return false;
156 }
157 
158 static uint8_t
syncookie_process_accecn_syn(struct tcpcb * tp,uint32_t ace_flags,uint8_t ip_ecn)159 syncookie_process_accecn_syn(struct tcpcb *tp, uint32_t ace_flags,
160     uint8_t ip_ecn)
161 {
162 	uint8_t setup_flags = 0;
163 	switch (ace_flags) {
164 	case (0 | 0 | 0):
165 		/* No ECN */
166 		break;
167 	case (0 | TH_CWR | TH_ECE):
168 		/* Legacy ECN-setup */
169 		setup_flags |= SC_ECN_SETUP;
170 		break;
171 	case (TH_ACE):
172 		/* Accurate ECN */
173 		if (tp->l4s_enabled) {
174 			switch (ip_ecn) {
175 			case IPTOS_ECN_NOTECT:
176 				setup_flags |= SC_ACE_SETUP_NOT_ECT;
177 				break;
178 			case IPTOS_ECN_ECT1:
179 				setup_flags |= SC_ACE_SETUP_ECT1;
180 				break;
181 			case IPTOS_ECN_ECT0:
182 				setup_flags |= SC_ACE_SETUP_ECT0;
183 				break;
184 			case IPTOS_ECN_CE:
185 				setup_flags |= SC_ACE_SETUP_CE;
186 				break;
187 			}
188 		} else {
189 			/*
190 			 * If AccECN is not enabled, ignore
191 			 * the TH_AE bit and do Legacy ECN-setup
192 			 */
193 			setup_flags |= SC_ECN_SETUP;
194 		}
195 	default:
196 		/* Forward Compatibility */
197 		/* Accurate ECN */
198 		if (tp->l4s_enabled) {
199 			switch (ip_ecn) {
200 			case IPTOS_ECN_NOTECT:
201 				setup_flags |= SC_ACE_SETUP_NOT_ECT;
202 				break;
203 			case IPTOS_ECN_ECT1:
204 				setup_flags |= SC_ACE_SETUP_ECT1;
205 				break;
206 			case IPTOS_ECN_ECT0:
207 				setup_flags |= SC_ACE_SETUP_ECT0;
208 				break;
209 			case IPTOS_ECN_CE:
210 				setup_flags |= SC_ACE_SETUP_CE;
211 				break;
212 			}
213 		}
214 		break;
215 	}
216 	return setup_flags;
217 }
218 
219 static uint16_t
syncookie_respond_accecn(uint8_t setup_flags,uint16_t thflags)220 syncookie_respond_accecn(uint8_t setup_flags, uint16_t thflags)
221 {
222 	switch (setup_flags) {
223 	case SC_ECN_SETUP:
224 		thflags |= TH_ECE;
225 		break;
226 	case SC_ACE_SETUP_NOT_ECT:
227 		thflags |= TH_CWR;
228 		break;
229 	case SC_ACE_SETUP_ECT1:
230 		thflags |= (TH_CWR | TH_ECE);
231 		break;
232 	case SC_ACE_SETUP_ECT0:
233 		thflags |= TH_AE;
234 		break;
235 	case SC_ACE_SETUP_CE:
236 		thflags |= (TH_AE | TH_CWR);
237 		break;
238 	}
239 
240 	return thflags;
241 }
242 
243 /*
244  * Given a LISTEN socket and an inbound SYN request, generate
245  * a SYN cookie, and send back a segment:
246  *	<SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>
247  * to the source.
248  */
249 void
tcp_syncookie_syn(struct tcp_inp * tpi,struct sockaddr * local,struct sockaddr * remote)250 tcp_syncookie_syn(struct tcp_inp *tpi, struct sockaddr *local,
251     struct sockaddr *remote)
252 {
253 	struct socket *so = tpi->so;
254 	struct inpcb *inp;
255 	struct tcpcb *tp;
256 	uint8_t ip_tos, ip_ecn;
257 	uint8_t ace_setup_flags = 0;
258 
259 	/* make sure inp is locked for listen socket */
260 	socket_lock_assert_owned(so);
261 
262 	ASSERT((tcp_get_flags(tpi->th) & (TH_RST | TH_ACK | TH_SYN)) == TH_SYN);
263 
264 	ASSERT((so->so_options & SO_ACCEPTCONN) != 0);
265 
266 	/* Reseed the key if SYNCOOKIE_LIFETIME time has elapsed */
267 	if (tcp_now > tcp_syncookie_secret.last_updated +
268 	    SYNCOOKIE_LIFETIME * TCP_RETRANSHZ) {
269 		syncookie_reseed();
270 	}
271 	inp = sotoinpcb(so);
272 	tp = sototcpcb(so);
273 
274 	if (tpi->isipv6) {
275 		if ((inp->in6p_outputopts == NULL) ||
276 		    (inp->in6p_outputopts->ip6po_tclass == -1)) {
277 			ip_tos = 0;
278 		} else {
279 			ip_tos = (uint8_t)inp->in6p_outputopts->ip6po_tclass;
280 		}
281 	} else {
282 		ip_tos = inp->inp_ip_tos;
283 	}
284 
285 	ip_ecn = ip_tos & IPTOS_ECN_MASK;
286 
287 	/* Is ECN enabled? */
288 	bool is_ecn = tcp_ecn_enabled(tp->ecn_flags);
289 	/* ECN Handshake */
290 	if (is_ecn) {
291 		int ace_flags = ((tpi->th->th_x2 << 8) | tpi->th->th_flags) & TH_ACE;
292 		ace_setup_flags = syncookie_process_accecn_syn(tp, ace_flags, ip_ecn);
293 	}
294 	bool classic_ecn = !!(ace_setup_flags & SC_ECN_SETUP);
295 
296 	tpi->iss = syncookie_generate(tpi, classic_ecn);
297 
298 	uint16_t output_flags = TH_SYN | TH_ACK;
299 	output_flags = syncookie_respond_accecn(ace_setup_flags, output_flags);
300 	/*
301 	 * Do a standard 3-way handshake.
302 	 */
303 	if (syncookie_respond(so, tp, tpi, output_flags, local, remote)) {
304 		tcpstat.tcps_sndacks++;
305 		tcpstat.tcps_sndtotal++;
306 	} else {
307 		tcpstat.tcps_sc_dropped++;
308 	}
309 	if (tpi->m != NULL) {
310 		m_freem(tpi->m);
311 	}
312 }
313 
314 /*
315  * Send SYN|ACK to the peer in response to a peer's SYN segment
316  */
317 static bool
syncookie_respond(struct socket * so,struct tcpcb * tp,struct tcp_inp * tpi,uint16_t flags,struct sockaddr * local,struct sockaddr * remote)318 syncookie_respond(struct socket *so, struct tcpcb *tp, struct tcp_inp *tpi, uint16_t flags,
319     struct sockaddr *local, struct sockaddr *remote)
320 {
321 	struct tcptemp *__single t_template;
322 	struct mbuf *__single m;
323 	tcp_seq seq;
324 	uint16_t mss = 0;
325 	uint32_t win;
326 
327 	if (flags & TH_SYN) {
328 		seq = tpi->iss;
329 	} else {
330 		seq = tpi->iss + 1;
331 	}
332 
333 	t_template = tcp_maketemplate(tp, &m, local, remote);
334 	if (t_template != NULL) {
335 		/* Use the properties of listener socket for sending SYN-ACK with cookie */
336 		struct inpcb *inp = tp->t_inpcb;
337 
338 		uint16_t min_protoh = tpi->isipv6 ? sizeof(struct ip6_hdr) + sizeof(struct tcphdr)
339 		    : sizeof(struct tcpiphdr);
340 		if (tpi->isipv6) {
341 			mss = (uint16_t)IN6_LINKMTU(tpi->ifp);
342 		} else {
343 			mss = (uint16_t)tpi->ifp->if_mtu;
344 		}
345 		mss -= min_protoh;
346 
347 		win = ((so->so_rcv.sb_flags & SB_USRSIZE) != 0) ?
348 		    so->so_rcv.sb_hiwat : tcp_autorcvbuf_max;
349 		win = imin(win, TCP_MAXWIN);
350 		uint8_t rcv_scale = tcp_get_max_rwinscale(tp, so);
351 
352 		struct tcp_respond_args tra;
353 
354 		bzero(&tra, sizeof(tra));
355 		tra.nocell = INP_NO_CELLULAR(inp) ? 1 : 0;
356 		tra.noexpensive = INP_NO_EXPENSIVE(inp) ? 1 : 0;
357 		tra.noconstrained = INP_NO_CONSTRAINED(inp) ? 1 : 0;
358 		tra.awdl_unrestricted = INP_AWDL_UNRESTRICTED(inp) ? 1 : 0;
359 		tra.intcoproc_allowed = INP_INTCOPROC_ALLOWED(inp) ? 1 : 0;
360 		tra.management_allowed = INP_MANAGEMENT_ALLOWED(inp) ? 1 : 0;
361 		tra.keep_alive = 1;
362 		if (tp->t_inpcb->inp_flags & INP_BOUND_IF) {
363 			tra.ifscope = tp->t_inpcb->inp_boundifp->if_index;
364 		} else {
365 			tra.ifscope = IFSCOPE_NONE;
366 		}
367 		tcp_respond((struct tcpcb*) 0, t_template->tt_ipgen, sizeof(t_template->tt_ipgen),
368 		    &t_template->tt_t, (struct mbuf *)NULL,
369 		    tpi->th->th_seq + 1, seq, win, flags, tpi->to, mss, rcv_scale, tpi->ts_offset, &tra, true);
370 		(void) m_free(m);
371 
372 		tcpstat.tcps_sc_sendcookie++;
373 
374 		return true;
375 	} else {
376 		return false;
377 	}
378 }
379 
380 /*
381  * The purpose of syncookies is to handle spoofed SYN flooding DoS attacks
382  * that exceed the capacity of the listen queue by avoiding the storage of any
383  * of the SYNs we receive.  Syncookies defend against blind SYN flooding
384  * attacks where the attacker does not have access to our responses.
385  *
386  * Syncookies encode and include all necessary information about the
387  * connection setup within the SYN|ACK that we send back.  That way we
388  * can avoid keeping any local state until the ACK to our SYN|ACK returns
389  * (if ever).
390  *
391  * The only reliable information persisting the 3WHS is our initial sequence
392  * number ISS of 32 bits.  Syncookies embed a cryptographically sufficient
393  * strong hash (MAC) value and a few bits of TCP SYN options in the ISS
394  * of our SYN|ACK.  The MAC can be recomputed when the ACK to our SYN|ACK
395  * returns and signifies a legitimate connection if it matches the ACK.
396  *
397  * The available space of 32 bits to store the hash and to encode the SYN
398  * option information is very tight and we should have at least 24 bits for
399  * the MAC to keep the number of guesses by blind spoofing reasonably high.
400  *
401  * SYN option information we have to encode to fully restore a connection:
402  * MSS: is imporant to chose an optimal segment size to avoid IP level
403  *   fragmentation along the path.  The common MSS values can be encoded
404  *   in a 3-bit table.  Uncommon values are captured by the next lower value
405  *   in the table leading to a slight increase in packetization overhead.
406  * WSCALE: is necessary to allow large windows to be used for high delay-
407  *   bandwidth product links.  Not scaling the window when it was initially
408  *   negotiated is bad for performance as lack of scaling further decreases
409  *   the apparent available send window.  We only need to encode the WSCALE
410  *   we received from the remote end.  Our end can be recalculated at any
411  *   time.  The common WSCALE values can be encoded in a 3-bit table.
412  *   Uncommon values are captured by the next lower value in the table
413  *   making us under-estimate the available window size halving our
414  *   theoretically possible maximum throughput for that connection.
415  * SACK: Greatly assists in packet loss recovery and requires 1 bit.
416  * TIMESTAMP is not encoded because it is a permanent option
417  *   that is included in all segments on a connection.  We enable it when
418  *   the ACK has it.
419  * Accurate ECN is not encoded because the last ACK has enough state to
420  *   determine the state negotiated during SYN/ACK.
421  *
422  * Security of syncookies and attack vectors:
423  *
424  * The MAC is computed over (faddr||laddr||fport||lport||irs||flags)
425  * together with the global secret to make it unique per connection attempt.
426  * Thus any change of any of those parameters results in a different MAC output
427  * in an unpredictable way unless a collision is encountered.  24 bits of the
428  * MAC are embedded into the ISS.
429  *
430  * To prevent replay attacks two rotating global secrets are updated with a
431  * new random value every 15 seconds.  The life-time of a syncookie is thus
432  * 15-30 seconds.
433  *
434  * Vector 1: Attacking the secret.  This requires finding a weakness in the
435  * MAC itself or the way it is used here.  The attacker can do a chosen plain
436  * text attack by varying and testing the all parameters under his control.
437  * The strength depends on the size and randomness of the secret, and the
438  * cryptographic security of the MAC function.  Due to the constant updating
439  * of the secret the attacker has at most 29.999 seconds to find the secret
440  * and launch spoofed connections.  After that he has to start all over again.
441  *
442  * Vector 2: Collision attack on the MAC of a single ACK.  With a 24 bit MAC
443  * size an average of 4,823 attempts are required for a 50% chance of success
444  * to spoof a single syncookie (birthday collision paradox).  However the
445  * attacker is blind and doesn't know if one of his attempts succeeded unless
446  * he has a side channel to interfere success from.  A single connection setup
447  * success average of 90% requires 8,790 packets, 99.99% requires 17,578 packets.
448  * This many attempts are required for each one blind spoofed connection.  For
449  * every additional spoofed connection he has to launch another N attempts.
450  * Thus for a sustained rate 100 spoofed connections per second approximately
451  * 1,800,000 packets per second would have to be sent.
452  *
453  * NB: The MAC function should be fast so that it doesn't become a CPU
454  * exhaustion attack vector itself.
455  *
456  * References:
457  *  RFC4987 TCP SYN Flooding Attacks and Common Mitigations
458  *  SYN cookies were first proposed by cryptographer Dan J. Bernstein in 1996
459  *   http://cr.yp.to/syncookies.html    (overview)
460  *   http://cr.yp.to/syncookies/archive (details)
461  *
462  *
463  * Schematic construction of a syncookie enabled Initial Sequence Number:
464  *  0        1         2         3
465  *  12345678901234567890123456789012
466  * |xxxxxxxxxxxxxxxxxxxxxxxxWWWMMMSP|
467  *
468  *  x 24 MAC (truncated)
469  *  W  3 Send Window Scale index
470  *  M  2 MSS index
471  *  E  1 Classic ECN permitted
472  *  S  1 SACK permitted
473  *  P  1 Odd/even secret
474  */
475 /*
476  * Distribution and probability of certain MSS values.  Those in between are
477  * rounded down to the next lower one.
478  */
479 static uint16_t tcp_sc_msstab_v4[] = { 536, 1300, 1460, 4036 };
480 
481 static uint16_t tcp_sc_msstab_v6[] = { 1220, 1420, 1440, 4016 };
482 
483 /*
484  * Distribution and probability of certain WSCALE values.  We have to map the
485  * (send) window scale (shift) option with a range of 0-14 from 4 bits into 3
486  * bits based on prevalence of certain values.  Where we don't have an exact
487  * match for are rounded down to the next lower one letting us under-estimate
488  * the true available window.  At the moment this would happen only for the
489  * very uncommon values 2, 5 and those above 9 (more than 32MB socket buffer
490  * and window size).  The absence of the WSCALE option (no scaling in either
491  * direction) is encoded with index zero.
492  */
493 static uint8_t tcp_sc_wstab[] = { 0, 1, 3, 4, 6, 7, 8, 9 };
494 
495 #define nitems(_x_) (sizeof(_x_) / sizeof(*_x_))
496 
497 /*
498  * Compute the MAC for the SYN cookie.  SIPHASH-2-4 is chosen for its speed
499  * and good cryptographic properties.
500  */
501 static uint32_t
syncookie_siphash(struct tcp_inp * tpi,uint8_t flags,uint8_t key[SYNCOOKIE_SECRET_SIZE])502 syncookie_siphash(struct tcp_inp *tpi, uint8_t flags, uint8_t key[SYNCOOKIE_SECRET_SIZE])
503 {
504 	SIPHASH_CTX ctx;
505 	uint32_t siphash[2];
506 
507 	SipHash24_Init(&ctx);
508 	SipHash_SetKey(&ctx, key);
509 	if (tpi->isipv6) {
510 		SipHash_Update(&ctx, &tpi->ip6->ip6_src.s6_addr, sizeof(tpi->ip6->ip6_src.s6_addr));
511 		SipHash_Update(&ctx, &tpi->ip6->ip6_dst.s6_addr, sizeof(tpi->ip6->ip6_dst.s6_addr));
512 	} else {
513 		SipHash_Update(&ctx, &tpi->ip->ip_src.s_addr, sizeof(tpi->ip->ip_src.s_addr));
514 		SipHash_Update(&ctx, &tpi->ip->ip_dst.s_addr, sizeof(tpi->ip->ip_dst.s_addr));
515 	}
516 
517 	SipHash_Update(&ctx, &tpi->th->th_sport, sizeof(tpi->th->th_sport));
518 	SipHash_Update(&ctx, &tpi->th->th_dport, sizeof(tpi->th->th_dport));
519 	SipHash_Update(&ctx, &tpi->irs, sizeof(tpi->irs));
520 	SipHash_Update(&ctx, &flags, sizeof(flags));
521 	SipHash_Final((u_int8_t *)&siphash, &ctx);
522 
523 	tpi->ts_offset = siphash[1];
524 
525 	return siphash[0] ^ siphash[1];
526 }
527 
528 /*
529  * HMAC with SHA-256 is only used for comparison with Siphash
530  */
531 static uint32_t
syncookie_hmac_sha256(struct tcp_inp * tpi,uint8_t flags,uint8_t key[CCSHA256_OUTPUT_SIZE])532 syncookie_hmac_sha256(struct tcp_inp *tpi, uint8_t flags, uint8_t key[CCSHA256_OUTPUT_SIZE])
533 {
534 	/* SHA256 mac is 32 bytes */
535 	uint32_t mac[8] = {};
536 	const struct ccdigest_info *di = ccsha256_di();
537 
538 	cchmac_ctx_decl(di->state_size, di->block_size, ctx);
539 	cchmac_init(di, ctx, CCSHA256_OUTPUT_SIZE, key);
540 	if (tpi->isipv6) {
541 		cchmac_update(di, ctx, sizeof(tpi->ip6->ip6_src.s6_addr), &tpi->ip6->ip6_src.s6_addr);
542 		cchmac_update(di, ctx, sizeof(tpi->ip6->ip6_dst.s6_addr), &tpi->ip6->ip6_dst.s6_addr);
543 	} else {
544 		cchmac_update(di, ctx, sizeof(tpi->ip->ip_src.s_addr), &tpi->ip->ip_src.s_addr);
545 		cchmac_update(di, ctx, sizeof(tpi->ip->ip_dst.s_addr), &tpi->ip->ip_dst.s_addr);
546 	}
547 	cchmac_update(di, ctx, sizeof(tpi->th->th_sport), &tpi->th->th_sport);
548 	cchmac_update(di, ctx, sizeof(tpi->th->th_dport), &tpi->th->th_dport);
549 	cchmac_update(di, ctx, sizeof(tpi->irs), &tpi->irs);
550 	cchmac_update(di, ctx, sizeof(flags), &flags);
551 	cchmac_final(di, ctx, (uint8_t *)mac);
552 
553 	tpi->ts_offset = mac[1];
554 
555 	return mac[0] ^ mac[1] ^ mac[2] ^ mac[3] ^ mac[4] ^ mac[5] ^ mac[6] ^ mac[7];
556 }
557 
558 static uint32_t
syncookie_mac(struct tcp_inp * tpi,uint8_t flags,uint8_t secbit)559 syncookie_mac(struct tcp_inp *tpi, uint8_t flags, uint8_t secbit)
560 {
561 	if (tcp_syncookie_hmac_sha256) {
562 		/* key size is 32 bytes */
563 		return syncookie_hmac_sha256(tpi, flags, (uint8_t *) tcp_syncookie_secret.key);
564 	} else {
565 		/* key size is 16 bytes */
566 		return syncookie_siphash(tpi, flags, tcp_syncookie_secret.key[secbit]);
567 	}
568 }
569 
570 static tcp_seq
syncookie_generate(struct tcp_inp * tpi,bool has_ecn)571 syncookie_generate(struct tcp_inp *tpi, bool has_ecn)
572 {
573 	uint8_t i, secbit, peer_wscale = 0;
574 	uint32_t iss, hash;
575 	syncookie cookie;
576 	uint16_t peer_mss = 0;
577 
578 	cookie.cookie = 0;
579 
580 	struct tcpopt *to = tpi->to;
581 
582 	if (to->to_flags & TOF_MSS) {
583 		peer_mss = to->to_mss;  /* peer mss may be zero */
584 	}
585 	if (to->to_flags & TOF_SCALE) {
586 		peer_wscale = to->to_wscale;
587 	}
588 
589 	/* Map our computed MSS into the 2-bit index. */
590 	if (tpi->isipv6) {
591 		for (i = nitems(tcp_sc_msstab_v6) - 1;
592 		    tcp_sc_msstab_v6[i] > peer_mss && i > 0;
593 		    i--) {
594 			;
595 		}
596 	} else {
597 		for (i = nitems(tcp_sc_msstab_v4) - 1;
598 		    tcp_sc_msstab_v4[i] > peer_mss && i > 0;
599 		    i--) {
600 			;
601 		}
602 	}
603 	cookie.flags.mss_idx = i;
604 	/*
605 	 * Map the send window scale into the 3-bit index but only if
606 	 * the wscale option was received.
607 	 */
608 	if (peer_wscale > 0) {
609 		for (i = nitems(tcp_sc_wstab) - 1;
610 		    tcp_sc_wstab[i] > peer_wscale && i > 0;
611 		    i--) {
612 			;
613 		}
614 		cookie.flags.wscale_idx = i;
615 	}
616 	/* Can we do SACK? */
617 	if (to->to_flags & TOF_SACKPERM) {
618 		cookie.flags.sack_ok = 1;
619 	}
620 
621 	/* Should we do classic ECN? */
622 	if (has_ecn) {
623 		cookie.flags.ecn_ok = 1;
624 	}
625 
626 	/* Which of the two secrets to use. */
627 	secbit = tcp_syncookie_secret.oddeven & 0x1;
628 	cookie.flags.odd_even = secbit;
629 	tpi->irs = tpi->th->th_seq;
630 	hash = syncookie_mac(tpi, cookie.cookie, secbit);
631 	/*
632 	 * Put the flags into the hash and XOR them to get better ISS number
633 	 * variance.  This doesn't enhance the cryptographic strength and is
634 	 * done to prevent the 8 cookie bits from showing up directly on the
635 	 * wire.
636 	 */
637 	iss = hash & ~0xff;
638 	iss |= cookie.cookie ^ (hash >> 24);
639 
640 	tcpstat.tcps_sc_sendcookie++;
641 
642 	return iss;
643 }
644 
645 /*
646  * Validate received SYN cookie in th_ack. Returns true on success
647  * and a false on failure
648  */
649 static bool
syncookie_lookup(struct tcp_inp * tpi)650 syncookie_lookup(struct tcp_inp *tpi)
651 {
652 	syncookie cookie;
653 	uint32_t hash;
654 	tcp_seq ack;
655 	/*
656 	 * Pull information out of SYN-ACK/ACK and revert sequence number
657 	 * advances.
658 	 */
659 	ack = tpi->th->th_ack - 1;
660 	tpi->irs = tpi->th->th_seq - 1;
661 
662 	/*
663 	 * Unpack the flags containing enough information to restore the
664 	 * connection.
665 	 */
666 	cookie.cookie = (ack & 0xff) ^ (ack >> 24);
667 	hash = syncookie_mac(tpi, cookie.cookie, cookie.flags.odd_even);
668 
669 	/* The recomputed hash failed to match the ACK */
670 	if ((ack & ~0xff) != (hash & ~0xff)) {
671 		return false;
672 	}
673 	if (tpi->isipv6) {
674 		tpi->peer_mss = tcp_sc_msstab_v6[cookie.flags.mss_idx];
675 	} else {
676 		tpi->peer_mss = tcp_sc_msstab_v4[cookie.flags.mss_idx];
677 	}
678 
679 	/* Only use wscale if it was enabled in the orignal SYN. */
680 	if (cookie.flags.wscale_idx > 0) {
681 		tpi->peer_wscale = tcp_sc_wstab[cookie.flags.wscale_idx];
682 	}
683 	if (cookie.flags.sack_ok) {
684 		tpi->sackok = true;
685 	}
686 
687 	if (cookie.flags.ecn_ok) {
688 		tpi->ecnok = true;
689 	}
690 
691 	tcpstat.tcps_sc_recvcookie++;
692 	return true;
693 }
694 
695 /*
696  * We reseed when we receive a new connection request if
697  * last update was done SYNCOOKIE_LIFETIME ago
698  */
699 static void
syncookie_reseed(void)700 syncookie_reseed(void)
701 {
702 	struct syncookie_secret *secret = &tcp_syncookie_secret;
703 	uint8_t *secbits;
704 	int secbit;
705 
706 	/*
707 	 * Reseeding the secret doesn't have to be protected by a lock.
708 	 * It only must be ensured that the new random values are visible
709 	 * to all CPUs in a SMP environment.  The atomic with release
710 	 * semantics ensures that.
711 	 */
712 	secbit = (secret->oddeven & 0x1) ? 0 : 1;
713 	secbits = secret->key[secbit];
714 	read_frandom(secbits, SYNCOOKIE_SECRET_SIZE);
715 	os_atomic_add(&secret->oddeven, 1, relaxed);
716 
717 	tcp_syncookie_secret.last_updated = tcp_now;
718 }
719 
720 void
tcp_syncookie_init()721 tcp_syncookie_init()
722 {
723 	/* Init syncookie secret */
724 	read_frandom(tcp_syncookie_secret.key[0], SYNCOOKIE_SECRET_SIZE);
725 	read_frandom(tcp_syncookie_secret.key[1], SYNCOOKIE_SECRET_SIZE);
726 	tcp_syncookie_secret.last_updated = tcp_now;
727 }
728