xref: /xnu-10063.121.3/bsd/netinet/tcp_cache.c (revision 2c2f96dc2b9a4408a43d3150ae9c105355ca3daa)
1 /*
2  * Copyright (c) 2015-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /* TCP-cache to store and retrieve TCP-related information */
30 
31 #include <net/flowhash.h>
32 #include <net/route.h>
33 #include <net/necp.h>
34 #include <netinet/in_pcb.h>
35 #include <netinet/mptcp.h>
36 #include <netinet/mptcp_var.h>
37 #include <netinet/tcp_cache.h>
38 #include <netinet/tcp_seq.h>
39 #include <netinet/tcp_var.h>
40 #include <kern/locks.h>
41 #include <sys/queue.h>
42 #include <dev/random/randomdev.h>
43 #include <net/sockaddr_utils.h>
44 
45 typedef union {
46 	struct in_addr addr;
47 	struct in6_addr addr6;
48 } in_4_6_addr;
49 
50 struct tcp_heuristic_key {
51 	union {
52 		uint8_t thk_net_signature[IFNET_SIGNATURELEN];
53 		in_4_6_addr thk_ip;
54 	};
55 	sa_family_t     thk_family;
56 };
57 
58 struct tcp_heuristic {
59 	SLIST_ENTRY(tcp_heuristic) list;
60 
61 	uint32_t        th_last_access;
62 
63 	struct tcp_heuristic_key        th_key;
64 
65 	char            th_val_start[0]; /* Marker for memsetting to 0 */
66 
67 	uint8_t         th_tfo_data_loss; /* The number of times a SYN+data has been lost */
68 	uint8_t         th_tfo_req_loss; /* The number of times a SYN+cookie-req has been lost */
69 	uint8_t         th_tfo_data_rst; /* The number of times a SYN+data has received a RST */
70 	uint8_t         th_tfo_req_rst; /* The number of times a SYN+cookie-req has received a RST */
71 	uint8_t         th_mptcp_loss; /* The number of times a SYN+MP_CAPABLE has been lost */
72 	uint8_t         th_mptcp_success; /* The number of times MPTCP-negotiation has been successful */
73 	uint8_t         th_ecn_loss; /* The number of times a SYN+ecn has been lost */
74 	uint8_t         th_ecn_aggressive; /* The number of times we did an aggressive fallback */
75 	uint8_t         th_ecn_droprst; /* The number of times ECN connections received a RST after first data pkt */
76 	uint8_t         th_ecn_droprxmt; /* The number of times ECN connection is dropped after multiple retransmits */
77 	uint8_t         th_ecn_synrst;  /* number of times RST was received in response to an ECN enabled SYN */
78 	uint32_t        th_tfo_enabled_time; /* The moment when we reenabled TFO after backing off */
79 	uint32_t        th_tfo_backoff_until; /* Time until when we should not try out TFO */
80 	uint32_t        th_tfo_backoff; /* Current backoff timer */
81 	uint32_t        th_mptcp_backoff; /* Time until when we should not try out MPTCP */
82 	uint32_t        th_ecn_backoff; /* Time until when we should not try out ECN */
83 
84 	uint8_t         th_tfo_in_backoff:1, /* Are we avoiding TFO due to the backoff timer? */
85 	    th_mptcp_in_backoff:1,             /* Are we avoiding MPTCP due to the backoff timer? */
86 	    th_mptcp_heuristic_disabled:1;             /* Are heuristics disabled? */
87 
88 	char            th_val_end[0]; /* Marker for memsetting to 0 */
89 };
90 
91 struct tcp_heuristics_head {
92 	SLIST_HEAD(tcp_heur_bucket, tcp_heuristic) tcp_heuristics;
93 
94 	/* Per-hashbucket lock to avoid lock-contention */
95 	lck_mtx_t       thh_mtx;
96 };
97 
98 struct tcp_cache_key {
99 	sa_family_t     tck_family;
100 
101 	struct tcp_heuristic_key tck_src;
102 	in_4_6_addr tck_dst;
103 };
104 
105 #define MPTCP_VERSION_SUPPORTED 1
106 #define MPTCP_VERSION_UNSUPPORTED -1
107 #define MPTCP_VERSION_SUPPORTED_UNKNOWN 0
108 struct tcp_cache {
109 	SLIST_ENTRY(tcp_cache) list;
110 
111 	uint32_t       tc_last_access;
112 
113 	struct tcp_cache_key tc_key;
114 
115 	uint8_t        tc_tfo_cookie[TFO_COOKIE_LEN_MAX];
116 	uint8_t        tc_tfo_cookie_len;
117 
118 	uint8_t        tc_mptcp_version_confirmed:1;
119 	uint8_t        tc_mptcp_version; /* version to use right now */
120 	uint32_t       tc_mptcp_next_version_try; /* Time, until we try preferred version again */
121 };
122 
123 struct tcp_cache_head {
124 	SLIST_HEAD(tcp_cache_bucket, tcp_cache) tcp_caches;
125 
126 	/* Per-hashbucket lock to avoid lock-contention */
127 	lck_mtx_t       tch_mtx;
128 };
129 
130 struct tcp_cache_key_src {
131 	struct ifnet *ifp;
132 	in_4_6_addr laddr;
133 	in_4_6_addr faddr;
134 	int af;
135 };
136 
137 static uint32_t tcp_cache_hash_seed;
138 
139 size_t tcp_cache_size;
140 
141 /*
142  * The maximum depth of the hash-bucket. This way we limit the tcp_cache to
143  * TCP_CACHE_BUCKET_SIZE * tcp_cache_size and have "natural" garbage collection
144  */
145 #define TCP_CACHE_BUCKET_SIZE 5
146 
147 static struct tcp_cache_head *tcp_cache;
148 
149 static LCK_ATTR_DECLARE(tcp_cache_mtx_attr, 0, 0);
150 static LCK_GRP_DECLARE(tcp_cache_mtx_grp, "tcpcache");
151 
152 static struct tcp_heuristics_head *tcp_heuristics;
153 
154 static LCK_ATTR_DECLARE(tcp_heuristic_mtx_attr, 0, 0);
155 static LCK_GRP_DECLARE(tcp_heuristic_mtx_grp, "tcpheuristic");
156 
157 static uint32_t tcp_backoff_maximum = 65536;
158 
159 SYSCTL_UINT(_net_inet_tcp, OID_AUTO, backoff_maximum, CTLFLAG_RW | CTLFLAG_LOCKED,
160     &tcp_backoff_maximum, 0, "Maximum time for which we won't try TFO");
161 
162 static uint32_t tcp_ecn_timeout = 60;
163 
164 SYSCTL_UINT(_net_inet_tcp, OID_AUTO, ecn_timeout, CTLFLAG_RW | CTLFLAG_LOCKED,
165     &tcp_ecn_timeout, 60, "Initial minutes to wait before re-trying ECN");
166 
167 static int disable_tcp_heuristics = 0;
168 SYSCTL_INT(_net_inet_tcp, OID_AUTO, disable_tcp_heuristics, CTLFLAG_RW | CTLFLAG_LOCKED,
169     &disable_tcp_heuristics, 0, "Set to 1, to disable all TCP heuristics (TFO, ECN, MPTCP)");
170 
171 static uint32_t mptcp_version_timeout = 24 * 60;
172 
173 SYSCTL_UINT(_net_inet_tcp, OID_AUTO, mptcp_version_timeout, CTLFLAG_RW | CTLFLAG_LOCKED,
174     &mptcp_version_timeout, 24 * 60, "Initial minutes to wait before re-trying MPTCP's preferred version");
175 
176 
177 static uint32_t
tcp_min_to_hz(uint32_t minutes)178 tcp_min_to_hz(uint32_t minutes)
179 {
180 	if (minutes > 65536) {
181 		return (uint32_t)65536 * 60 * TCP_RETRANSHZ;
182 	}
183 
184 	return minutes * 60 * TCP_RETRANSHZ;
185 }
186 
187 /*
188  * This number is coupled with tcp_ecn_timeout, because we want to prevent
189  * integer overflow. Need to find an unexpensive way to prevent integer overflow
190  * while still allowing a dynamic sysctl.
191  */
192 #define TCP_CACHE_OVERFLOW_PROTECT      9
193 
194 /* Number of SYN-losses we accept */
195 #define TFO_MAX_COOKIE_LOSS     2
196 #define ECN_MAX_SYN_LOSS        2
197 #define MPTCP_MAX_SYN_LOSS      2
198 #define MPTCP_SUCCESS_TRIGGER   10
199 #define MPTCP_VERSION_MAX_FAIL  2
200 #define ECN_MAX_DROPRST         1
201 #define ECN_MAX_DROPRXMT        4
202 #define ECN_MAX_SYNRST          4
203 
204 /* Flags for setting/unsetting loss-heuristics, limited to 4 bytes */
205 #define TCPCACHE_F_TFO_REQ      0x01
206 #define TCPCACHE_F_TFO_DATA     0x02
207 #define TCPCACHE_F_ECN          0x04
208 #define TCPCACHE_F_MPTCP        0x08
209 #define TCPCACHE_F_ECN_DROPRST  0x10
210 #define TCPCACHE_F_ECN_DROPRXMT 0x20
211 #define TCPCACHE_F_TFO_REQ_RST  0x40
212 #define TCPCACHE_F_TFO_DATA_RST 0x80
213 #define TCPCACHE_F_ECN_SYNRST   0x100
214 
215 /* Always retry ECN after backing off to this level for some heuristics */
216 #define ECN_RETRY_LIMIT 9
217 
218 #define TCP_CACHE_INC_IFNET_STAT(_ifp_, _af_, _stat_) { \
219 	if ((_ifp_) != NULL) { \
220 	        if ((_af_) == AF_INET6) { \
221 	                (_ifp_)->if_ipv6_stat->_stat_++;\
222 	        } else { \
223 	                (_ifp_)->if_ipv4_stat->_stat_++;\
224 	        }\
225 	}\
226 }
227 
228 /*
229  * Round up to next higher power-of 2.  See "Bit Twiddling Hacks".
230  *
231  * Might be worth moving this to a library so that others
232  * (e.g., scale_to_powerof2()) can use this as well instead of a while-loop.
233  */
234 static uint32_t
tcp_cache_roundup2(uint32_t a)235 tcp_cache_roundup2(uint32_t a)
236 {
237 	a--;
238 	a |= a >> 1;
239 	a |= a >> 2;
240 	a |= a >> 4;
241 	a |= a >> 8;
242 	a |= a >> 16;
243 	a++;
244 
245 	return a;
246 }
247 
248 static void
tcp_cache_hash_src(struct tcp_cache_key_src * tcks,struct tcp_heuristic_key * key)249 tcp_cache_hash_src(struct tcp_cache_key_src *tcks, struct tcp_heuristic_key *key)
250 {
251 	struct ifnet *ifp = tcks->ifp;
252 	uint8_t len = sizeof(key->thk_net_signature);
253 	uint16_t flags;
254 
255 	if (tcks->af == AF_INET6) {
256 		int ret;
257 
258 		key->thk_family = AF_INET6;
259 		ret = ifnet_get_netsignature(ifp, AF_INET6, &len, &flags,
260 		    key->thk_net_signature);
261 
262 		/*
263 		 * ifnet_get_netsignature only returns EINVAL if ifn is NULL
264 		 * (we made sure that in the other cases it does not). So,
265 		 * in this case we should take the connection's address.
266 		 */
267 		if (ret == ENOENT || ret == EINVAL) {
268 			memcpy(&key->thk_ip.addr6, &tcks->laddr.addr6, sizeof(struct in6_addr));
269 		}
270 	} else {
271 		int ret;
272 
273 		key->thk_family = AF_INET;
274 		ret = ifnet_get_netsignature(ifp, AF_INET, &len, &flags,
275 		    key->thk_net_signature);
276 
277 		/*
278 		 * ifnet_get_netsignature only returns EINVAL if ifn is NULL
279 		 * (we made sure that in the other cases it does not). So,
280 		 * in this case we should take the connection's address.
281 		 */
282 		if (ret == ENOENT || ret == EINVAL) {
283 			memcpy(&key->thk_ip.addr, &tcks->laddr.addr, sizeof(struct in_addr));
284 		}
285 	}
286 }
287 
288 static uint16_t
tcp_cache_hash(struct tcp_cache_key_src * tcks,struct tcp_cache_key * key)289 tcp_cache_hash(struct tcp_cache_key_src *tcks, struct tcp_cache_key *key)
290 {
291 	uint32_t hash;
292 
293 	bzero(key, sizeof(struct tcp_cache_key));
294 
295 	tcp_cache_hash_src(tcks, &key->tck_src);
296 
297 	if (tcks->af == AF_INET6) {
298 		key->tck_family = AF_INET6;
299 		memcpy(&key->tck_dst.addr6, &tcks->faddr.addr6,
300 		    sizeof(struct in6_addr));
301 	} else {
302 		key->tck_family = AF_INET;
303 		memcpy(&key->tck_dst.addr, &tcks->faddr.addr,
304 		    sizeof(struct in_addr));
305 	}
306 
307 	hash = net_flowhash(key, sizeof(struct tcp_cache_key),
308 	    tcp_cache_hash_seed);
309 
310 	return (uint16_t)(hash & (tcp_cache_size - 1));
311 }
312 
313 static void
tcp_cache_unlock(struct tcp_cache_head * head)314 tcp_cache_unlock(struct tcp_cache_head *head)
315 {
316 	lck_mtx_unlock(&head->tch_mtx);
317 }
318 
319 /*
320  * Make sure that everything that happens after tcp_getcache_with_lock()
321  * is short enough to justify that you hold the per-bucket lock!!!
322  *
323  * Otherwise, better build another lookup-function that does not hold the
324  * lock and you copy out the bits and bytes.
325  *
326  * That's why we provide the head as a "return"-pointer so that the caller
327  * can give it back to use for tcp_cache_unlock().
328  */
329 static struct tcp_cache *
tcp_getcache_with_lock(struct tcp_cache_key_src * tcks,int create,struct tcp_cache_head ** headarg)330 tcp_getcache_with_lock(struct tcp_cache_key_src *tcks,
331     int create, struct tcp_cache_head **headarg)
332 {
333 	struct tcp_cache *tpcache = NULL;
334 	struct tcp_cache_head *head;
335 	struct tcp_cache_key key;
336 	uint16_t hash;
337 	int i = 0;
338 
339 	hash = tcp_cache_hash(tcks, &key);
340 	head = &tcp_cache[hash];
341 
342 	lck_mtx_lock(&head->tch_mtx);
343 
344 	/*** First step: Look for the tcp_cache in our bucket ***/
345 	SLIST_FOREACH(tpcache, &head->tcp_caches, list) {
346 		if (memcmp(&tpcache->tc_key, &key, sizeof(key)) == 0) {
347 			break;
348 		}
349 
350 		i++;
351 	}
352 
353 	/*** Second step: If it's not there, create/recycle it ***/
354 	if ((tpcache == NULL) && create) {
355 		if (i >= TCP_CACHE_BUCKET_SIZE) {
356 			struct tcp_cache *oldest_cache = NULL;
357 			uint32_t max_age = 0;
358 
359 			/* Look for the oldest tcp_cache in the bucket */
360 			SLIST_FOREACH(tpcache, &head->tcp_caches, list) {
361 				uint32_t age = tcp_now - tpcache->tc_last_access;
362 				if (age > max_age) {
363 					max_age = age;
364 					oldest_cache = tpcache;
365 				}
366 			}
367 			VERIFY(oldest_cache != NULL);
368 
369 			tpcache = oldest_cache;
370 
371 			/* We recycle, thus let's indicate that there is no cookie */
372 			tpcache->tc_tfo_cookie_len = 0;
373 		} else {
374 			/* Create a new cache and add it to the list */
375 			tpcache = kalloc_type(struct tcp_cache, Z_NOPAGEWAIT | Z_ZERO);
376 			if (tpcache == NULL) {
377 				os_log_error(OS_LOG_DEFAULT, "%s could not allocate cache", __func__);
378 				goto out_null;
379 			}
380 
381 			tpcache->tc_mptcp_version = (uint8_t)mptcp_preferred_version;
382 			tpcache->tc_mptcp_next_version_try = tcp_now;
383 
384 			SLIST_INSERT_HEAD(&head->tcp_caches, tpcache, list);
385 		}
386 
387 		memcpy(&tpcache->tc_key, &key, sizeof(key));
388 	}
389 
390 	if (tpcache == NULL) {
391 		goto out_null;
392 	}
393 
394 	/* Update timestamp for garbage collection purposes */
395 	tpcache->tc_last_access = tcp_now;
396 	*headarg = head;
397 
398 	return tpcache;
399 
400 out_null:
401 	tcp_cache_unlock(head);
402 	return NULL;
403 }
404 
405 static void
tcp_cache_key_src_create(struct tcpcb * tp,struct tcp_cache_key_src * tcks)406 tcp_cache_key_src_create(struct tcpcb *tp, struct tcp_cache_key_src *tcks)
407 {
408 	struct inpcb *inp = tp->t_inpcb;
409 	memset(tcks, 0, sizeof(*tcks));
410 
411 	tcks->ifp = inp->inp_last_outifp;
412 
413 	if (inp->inp_vflag & INP_IPV6) {
414 		memcpy(&tcks->laddr.addr6, &inp->in6p_laddr, sizeof(struct in6_addr));
415 		memcpy(&tcks->faddr.addr6, &inp->in6p_faddr, sizeof(struct in6_addr));
416 		tcks->af = AF_INET6;
417 	} else {
418 		memcpy(&tcks->laddr.addr, &inp->inp_laddr, sizeof(struct in_addr));
419 		memcpy(&tcks->faddr.addr, &inp->inp_faddr, sizeof(struct in_addr));
420 		tcks->af = AF_INET;
421 	}
422 
423 	return;
424 }
425 
426 static void
mptcp_version_cache_key_src_init(struct sockaddr * dst,struct tcp_cache_key_src * tcks)427 mptcp_version_cache_key_src_init(struct sockaddr *dst, struct tcp_cache_key_src *tcks)
428 {
429 	memset(tcks, 0, sizeof(*tcks));
430 
431 	if (dst->sa_family == AF_INET) {
432 		memcpy(&tcks->faddr.addr, &SIN(dst)->sin_addr, sizeof(struct in_addr));
433 		tcks->af = AF_INET;
434 	} else {
435 		memcpy(&tcks->faddr.addr6, &SIN6(dst)->sin6_addr, sizeof(struct in6_addr));
436 		tcks->af = AF_INET6;
437 	}
438 
439 	return;
440 }
441 
442 static void
tcp_cache_set_cookie_common(struct tcp_cache_key_src * tcks,u_char * cookie,uint8_t len)443 tcp_cache_set_cookie_common(struct tcp_cache_key_src *tcks, u_char *cookie, uint8_t len)
444 {
445 	struct tcp_cache_head *head;
446 	struct tcp_cache *tpcache;
447 
448 	/* Call lookup/create function */
449 	tpcache = tcp_getcache_with_lock(tcks, 1, &head);
450 	if (tpcache == NULL) {
451 		return;
452 	}
453 
454 	tpcache->tc_tfo_cookie_len = len > TFO_COOKIE_LEN_MAX ?
455 	    TFO_COOKIE_LEN_MAX : len;
456 	memcpy(tpcache->tc_tfo_cookie, cookie, tpcache->tc_tfo_cookie_len);
457 
458 	tcp_cache_unlock(head);
459 }
460 
461 void
tcp_cache_set_cookie(struct tcpcb * tp,u_char * cookie,uint8_t len)462 tcp_cache_set_cookie(struct tcpcb *tp, u_char *cookie, uint8_t len)
463 {
464 	struct tcp_cache_key_src tcks;
465 
466 	tcp_cache_key_src_create(tp, &tcks);
467 	tcp_cache_set_cookie_common(&tcks, cookie, len);
468 }
469 
470 static int
tcp_cache_get_cookie_common(struct tcp_cache_key_src * tcks,u_char * cookie,uint8_t * len)471 tcp_cache_get_cookie_common(struct tcp_cache_key_src *tcks, u_char *cookie, uint8_t *len)
472 {
473 	struct tcp_cache_head *head;
474 	struct tcp_cache *tpcache;
475 
476 	/* Call lookup/create function */
477 	tpcache = tcp_getcache_with_lock(tcks, 1, &head);
478 	if (tpcache == NULL) {
479 		return 0;
480 	}
481 
482 	if (tpcache->tc_tfo_cookie_len == 0) {
483 		tcp_cache_unlock(head);
484 		return 0;
485 	}
486 
487 	/*
488 	 * Not enough space - this should never happen as it has been checked
489 	 * in tcp_tfo_check. So, fail here!
490 	 */
491 	VERIFY(tpcache->tc_tfo_cookie_len <= *len);
492 
493 	memcpy(cookie, tpcache->tc_tfo_cookie, tpcache->tc_tfo_cookie_len);
494 	*len = tpcache->tc_tfo_cookie_len;
495 
496 	tcp_cache_unlock(head);
497 
498 	return 1;
499 }
500 
501 /*
502  * Get the cookie related to 'tp', and copy it into 'cookie', provided that len
503  * is big enough (len designates the available memory.
504  * Upon return, 'len' is set to the cookie's length.
505  *
506  * Returns 0 if we should request a cookie.
507  * Returns 1 if the cookie has been found and written.
508  */
509 int
tcp_cache_get_cookie(struct tcpcb * tp,u_char * cookie,uint8_t * len)510 tcp_cache_get_cookie(struct tcpcb *tp, u_char *cookie, uint8_t *len)
511 {
512 	struct tcp_cache_key_src tcks;
513 
514 	tcp_cache_key_src_create(tp, &tcks);
515 	return tcp_cache_get_cookie_common(&tcks, cookie, len);
516 }
517 
518 static unsigned int
tcp_cache_get_cookie_len_common(struct tcp_cache_key_src * tcks)519 tcp_cache_get_cookie_len_common(struct tcp_cache_key_src *tcks)
520 {
521 	struct tcp_cache_head *head;
522 	struct tcp_cache *tpcache;
523 	unsigned int cookie_len;
524 
525 	/* Call lookup/create function */
526 	tpcache = tcp_getcache_with_lock(tcks, 1, &head);
527 	if (tpcache == NULL) {
528 		return 0;
529 	}
530 
531 	cookie_len = tpcache->tc_tfo_cookie_len;
532 
533 	tcp_cache_unlock(head);
534 
535 	return cookie_len;
536 }
537 
538 unsigned int
tcp_cache_get_cookie_len(struct tcpcb * tp)539 tcp_cache_get_cookie_len(struct tcpcb *tp)
540 {
541 	struct tcp_cache_key_src tcks;
542 
543 	tcp_cache_key_src_create(tp, &tcks);
544 	return tcp_cache_get_cookie_len_common(&tcks);
545 }
546 
547 /*
548  * @return:
549  *         0	MPTCP_VERSION_0
550  *         1	MPTCP_VERSION_1
551  */
552 uint8_t
tcp_cache_get_mptcp_version(struct sockaddr * dst)553 tcp_cache_get_mptcp_version(struct sockaddr *dst)
554 {
555 	struct tcp_cache_key_src tcks;
556 	mptcp_version_cache_key_src_init(dst, &tcks);
557 	uint8_t version = (uint8_t) mptcp_preferred_version;
558 
559 	struct tcp_cache_head *head;
560 	struct tcp_cache *tpcache;
561 
562 	/* Call lookup/create function */
563 	tpcache = tcp_getcache_with_lock(&tcks, 1, &head);
564 	if (tpcache == NULL) {
565 		return version;
566 	}
567 
568 	version = tpcache->tc_mptcp_version;
569 
570 	/* Let's see if we should try the preferred version again */
571 	if (!tpcache->tc_mptcp_version_confirmed &&
572 	    version != mptcp_preferred_version &&
573 	    TSTMP_GEQ(tcp_now, tpcache->tc_mptcp_next_version_try)) {
574 		version = (uint8_t) mptcp_preferred_version;
575 	}
576 
577 	tcp_cache_unlock(head);
578 	return version;
579 }
580 
581 void
tcp_cache_update_mptcp_version(struct tcpcb * tp,boolean_t succeeded)582 tcp_cache_update_mptcp_version(struct tcpcb *tp, boolean_t succeeded)
583 {
584 	uint8_t version = tptomptp(tp)->mpt_version;
585 	struct inpcb *inp = tp->t_inpcb;
586 	struct tcp_cache_key_src tcks;
587 	struct tcp_cache_head *head;
588 	struct tcp_cache *tpcache;
589 
590 	if (inp->inp_vflag & INP_IPV6) {
591 		struct sockaddr_in6 dst = {
592 			.sin6_len = sizeof(struct sockaddr_in6),
593 			.sin6_family = AF_INET6,
594 			.sin6_addr = inp->in6p_faddr,
595 		};
596 		mptcp_version_cache_key_src_init(SA(&dst), &tcks);
597 	} else {
598 		struct sockaddr_in dst = {
599 			.sin_len = sizeof(struct sockaddr_in),
600 			.sin_family = AF_INET,
601 			.sin_addr = inp->inp_faddr,
602 		};
603 		mptcp_version_cache_key_src_init(SA(&dst), &tcks);
604 	}
605 
606 	/* Call lookup/create function */
607 	tpcache = tcp_getcache_with_lock(&tcks, 1, &head);
608 	if (tpcache == NULL) {
609 		return;
610 	}
611 
612 	/* We are still in probing phase */
613 	if (tpcache->tc_mptcp_version_confirmed) {
614 		goto exit;
615 	}
616 
617 	if (succeeded) {
618 		if (version == (uint8_t)mptcp_preferred_version) {
619 			/* Preferred version succeeded - make it sticky */
620 			tpcache->tc_mptcp_version_confirmed = true;
621 			tpcache->tc_mptcp_version = version;
622 		} else {
623 			/* If we are past the next version try, set it
624 			 * so that we try preferred again in 24h
625 			 */
626 			if (TSTMP_GEQ(tcp_now, tpcache->tc_mptcp_next_version_try)) {
627 				tpcache->tc_mptcp_next_version_try = tcp_now + tcp_min_to_hz(mptcp_version_timeout);
628 			}
629 		}
630 	} else {
631 		if (version == (uint8_t)mptcp_preferred_version) {
632 			/* Preferred version failed - try the other version */
633 			tpcache->tc_mptcp_version = version == MPTCP_VERSION_0 ? MPTCP_VERSION_1 : MPTCP_VERSION_0;
634 		}
635 		/* Preferred version failed - make sure we give the preferred another
636 		 * shot in 24h.
637 		 */
638 		if (TSTMP_GEQ(tcp_now, tpcache->tc_mptcp_next_version_try)) {
639 			tpcache->tc_mptcp_next_version_try = tcp_now + tcp_min_to_hz(mptcp_version_timeout);
640 		}
641 	}
642 
643 exit:
644 	tcp_cache_unlock(head);
645 }
646 
647 static uint16_t
tcp_heuristics_hash(struct tcp_cache_key_src * tcks,struct tcp_heuristic_key * key)648 tcp_heuristics_hash(struct tcp_cache_key_src *tcks, struct tcp_heuristic_key *key)
649 {
650 	uint32_t hash;
651 
652 	bzero(key, sizeof(struct tcp_heuristic_key));
653 
654 	tcp_cache_hash_src(tcks, key);
655 
656 	hash = net_flowhash(key, sizeof(struct tcp_heuristic_key),
657 	    tcp_cache_hash_seed);
658 
659 	return (uint16_t)(hash & (tcp_cache_size - 1));
660 }
661 
662 static void
tcp_heuristic_unlock(struct tcp_heuristics_head * head)663 tcp_heuristic_unlock(struct tcp_heuristics_head *head)
664 {
665 	lck_mtx_unlock(&head->thh_mtx);
666 }
667 
668 /*
669  * Make sure that everything that happens after tcp_getheuristic_with_lock()
670  * is short enough to justify that you hold the per-bucket lock!!!
671  *
672  * Otherwise, better build another lookup-function that does not hold the
673  * lock and you copy out the bits and bytes.
674  *
675  * That's why we provide the head as a "return"-pointer so that the caller
676  * can give it back to use for tcp_heur_unlock().
677  *
678  *
679  * ToDo - way too much code-duplication. We should create an interface to handle
680  * bucketized hashtables with recycling of the oldest element.
681  */
682 static struct tcp_heuristic *
tcp_getheuristic_with_lock(struct tcp_cache_key_src * tcks,int create,struct tcp_heuristics_head ** headarg)683 tcp_getheuristic_with_lock(struct tcp_cache_key_src *tcks,
684     int create, struct tcp_heuristics_head **headarg)
685 {
686 	struct tcp_heuristic *tpheur = NULL;
687 	struct tcp_heuristics_head *head;
688 	struct tcp_heuristic_key key;
689 	uint16_t hash;
690 	int i = 0;
691 
692 	hash = tcp_heuristics_hash(tcks, &key);
693 	head = &tcp_heuristics[hash];
694 
695 	lck_mtx_lock(&head->thh_mtx);
696 
697 	/*** First step: Look for the tcp_heur in our bucket ***/
698 	SLIST_FOREACH(tpheur, &head->tcp_heuristics, list) {
699 		if (memcmp(&tpheur->th_key, &key, sizeof(key)) == 0) {
700 			break;
701 		}
702 
703 		i++;
704 	}
705 
706 	/*** Second step: If it's not there, create/recycle it ***/
707 	if ((tpheur == NULL) && create) {
708 		if (i >= TCP_CACHE_BUCKET_SIZE) {
709 			struct tcp_heuristic *oldest_heur = NULL;
710 			uint32_t max_age = 0;
711 
712 			/* Look for the oldest tcp_heur in the bucket */
713 			SLIST_FOREACH(tpheur, &head->tcp_heuristics, list) {
714 				uint32_t age = tcp_now - tpheur->th_last_access;
715 				if (age > max_age) {
716 					max_age = age;
717 					oldest_heur = tpheur;
718 				}
719 			}
720 			VERIFY(oldest_heur != NULL);
721 
722 			tpheur = oldest_heur;
723 
724 			/* We recycle - set everything to 0 */
725 			bzero(tpheur->th_val_start,
726 			    tpheur->th_val_end - tpheur->th_val_start);
727 		} else {
728 			/* Create a new heuristic and add it to the list */
729 			tpheur = kalloc_type(struct tcp_heuristic, Z_NOPAGEWAIT | Z_ZERO);
730 			if (tpheur == NULL) {
731 				os_log_error(OS_LOG_DEFAULT, "%s could not allocate heuristic", __func__);
732 				goto out_null;
733 			}
734 
735 			SLIST_INSERT_HEAD(&head->tcp_heuristics, tpheur, list);
736 		}
737 
738 		/*
739 		 * Set to tcp_now, to make sure it won't be > than tcp_now in the
740 		 * near future.
741 		 */
742 		tpheur->th_ecn_backoff = tcp_now;
743 		tpheur->th_tfo_backoff_until = tcp_now;
744 		tpheur->th_mptcp_backoff = tcp_now;
745 		tpheur->th_tfo_backoff = tcp_min_to_hz(tcp_ecn_timeout);
746 
747 		memcpy(&tpheur->th_key, &key, sizeof(key));
748 	}
749 
750 	if (tpheur == NULL) {
751 		goto out_null;
752 	}
753 
754 	/* Update timestamp for garbage collection purposes */
755 	tpheur->th_last_access = tcp_now;
756 	*headarg = head;
757 
758 	return tpheur;
759 
760 out_null:
761 	tcp_heuristic_unlock(head);
762 	return NULL;
763 }
764 
765 static void
tcp_heuristic_reset_counters(struct tcp_cache_key_src * tcks,uint8_t flags)766 tcp_heuristic_reset_counters(struct tcp_cache_key_src *tcks, uint8_t flags)
767 {
768 	struct tcp_heuristics_head *head;
769 	struct tcp_heuristic *tpheur;
770 
771 	/*
772 	 * Always create heuristics here because MPTCP needs to write success
773 	 * into it. Thus, we always end up creating them.
774 	 */
775 	tpheur = tcp_getheuristic_with_lock(tcks, 1, &head);
776 	if (tpheur == NULL) {
777 		return;
778 	}
779 
780 	if (flags & TCPCACHE_F_TFO_DATA) {
781 		if (tpheur->th_tfo_data_loss >= TFO_MAX_COOKIE_LOSS) {
782 			os_log(OS_LOG_DEFAULT, "%s: Resetting TFO-data loss to 0 from %u on heur %lx\n",
783 			    __func__, tpheur->th_tfo_data_loss, (unsigned long)VM_KERNEL_ADDRPERM(tpheur));
784 		}
785 		tpheur->th_tfo_data_loss = 0;
786 	}
787 
788 	if (flags & TCPCACHE_F_TFO_REQ) {
789 		if (tpheur->th_tfo_req_loss >= TFO_MAX_COOKIE_LOSS) {
790 			os_log(OS_LOG_DEFAULT, "%s: Resetting TFO-req loss to 0 from %u on heur %lx\n",
791 			    __func__, tpheur->th_tfo_req_loss, (unsigned long)VM_KERNEL_ADDRPERM(tpheur));
792 		}
793 		tpheur->th_tfo_req_loss = 0;
794 	}
795 
796 	if (flags & TCPCACHE_F_TFO_DATA_RST) {
797 		if (tpheur->th_tfo_data_rst >= TFO_MAX_COOKIE_LOSS) {
798 			os_log(OS_LOG_DEFAULT, "%s: Resetting TFO-data RST to 0 from %u on heur %lx\n",
799 			    __func__, tpheur->th_tfo_data_rst, (unsigned long)VM_KERNEL_ADDRPERM(tpheur));
800 		}
801 		tpheur->th_tfo_data_rst = 0;
802 	}
803 
804 	if (flags & TCPCACHE_F_TFO_REQ_RST) {
805 		if (tpheur->th_tfo_req_rst >= TFO_MAX_COOKIE_LOSS) {
806 			os_log(OS_LOG_DEFAULT, "%s: Resetting TFO-req RST to 0 from %u on heur %lx\n",
807 			    __func__, tpheur->th_tfo_req_rst, (unsigned long)VM_KERNEL_ADDRPERM(tpheur));
808 		}
809 		tpheur->th_tfo_req_rst = 0;
810 	}
811 
812 	if (flags & TCPCACHE_F_ECN) {
813 		if (tpheur->th_ecn_loss >= ECN_MAX_SYN_LOSS || tpheur->th_ecn_synrst >= ECN_MAX_SYNRST) {
814 			os_log(OS_LOG_DEFAULT, "%s: Resetting ECN-loss to 0 from %u and synrst from %u on heur %lx\n",
815 			    __func__, tpheur->th_ecn_loss, tpheur->th_ecn_synrst, (unsigned long)VM_KERNEL_ADDRPERM(tpheur));
816 		}
817 		tpheur->th_ecn_loss = 0;
818 		tpheur->th_ecn_synrst = 0;
819 	}
820 
821 	if (flags & TCPCACHE_F_MPTCP) {
822 		tpheur->th_mptcp_loss = 0;
823 		if (tpheur->th_mptcp_success < MPTCP_SUCCESS_TRIGGER) {
824 			tpheur->th_mptcp_success++;
825 
826 			if (tpheur->th_mptcp_success == MPTCP_SUCCESS_TRIGGER) {
827 				os_log(mptcp_log_handle, "%s disabling heuristics for 12 hours", __func__);
828 				tpheur->th_mptcp_heuristic_disabled = 1;
829 				/* Disable heuristics for 12 hours */
830 				tpheur->th_mptcp_backoff = tcp_now + tcp_min_to_hz(tcp_ecn_timeout * 12);
831 			}
832 		}
833 	}
834 
835 	tcp_heuristic_unlock(head);
836 }
837 
838 void
tcp_heuristic_tfo_success(struct tcpcb * tp)839 tcp_heuristic_tfo_success(struct tcpcb *tp)
840 {
841 	struct tcp_cache_key_src tcks;
842 	uint8_t flag = 0;
843 
844 	tcp_cache_key_src_create(tp, &tcks);
845 
846 	if (tp->t_tfo_stats & TFO_S_SYN_DATA_SENT) {
847 		flag = (TCPCACHE_F_TFO_DATA | TCPCACHE_F_TFO_REQ |
848 		    TCPCACHE_F_TFO_DATA_RST | TCPCACHE_F_TFO_REQ_RST);
849 	}
850 	if (tp->t_tfo_stats & TFO_S_COOKIE_REQ) {
851 		flag = (TCPCACHE_F_TFO_REQ | TCPCACHE_F_TFO_REQ_RST);
852 	}
853 
854 	tcp_heuristic_reset_counters(&tcks, flag);
855 }
856 
857 void
tcp_heuristic_mptcp_success(struct tcpcb * tp)858 tcp_heuristic_mptcp_success(struct tcpcb *tp)
859 {
860 	struct tcp_cache_key_src tcks;
861 
862 	tcp_cache_key_src_create(tp, &tcks);
863 	tcp_heuristic_reset_counters(&tcks, TCPCACHE_F_MPTCP);
864 }
865 
866 void
tcp_heuristic_ecn_success(struct tcpcb * tp)867 tcp_heuristic_ecn_success(struct tcpcb *tp)
868 {
869 	struct tcp_cache_key_src tcks;
870 
871 	tcp_cache_key_src_create(tp, &tcks);
872 	tcp_heuristic_reset_counters(&tcks, TCPCACHE_F_ECN);
873 }
874 
875 static void
__tcp_heuristic_tfo_middlebox_common(struct tcp_heuristic * tpheur)876 __tcp_heuristic_tfo_middlebox_common(struct tcp_heuristic *tpheur)
877 {
878 	if (tpheur->th_tfo_in_backoff) {
879 		return;
880 	}
881 
882 	tpheur->th_tfo_in_backoff = 1;
883 
884 	if (tpheur->th_tfo_enabled_time) {
885 		uint32_t old_backoff = tpheur->th_tfo_backoff;
886 
887 		tpheur->th_tfo_backoff -= (tcp_now - tpheur->th_tfo_enabled_time);
888 		if (tpheur->th_tfo_backoff > old_backoff) {
889 			tpheur->th_tfo_backoff = tcp_min_to_hz(tcp_ecn_timeout);
890 		}
891 	}
892 
893 	tpheur->th_tfo_backoff_until = tcp_now + tpheur->th_tfo_backoff;
894 
895 	/* Then, increase the backoff time */
896 	tpheur->th_tfo_backoff *= 2;
897 
898 	if (tpheur->th_tfo_backoff > tcp_min_to_hz(tcp_backoff_maximum)) {
899 		tpheur->th_tfo_backoff = tcp_min_to_hz(tcp_ecn_timeout);
900 	}
901 
902 	os_log(OS_LOG_DEFAULT, "%s disable TFO until %u now %u on %lx\n", __func__,
903 	    tpheur->th_tfo_backoff_until, tcp_now, (unsigned long)VM_KERNEL_ADDRPERM(tpheur));
904 }
905 
906 static void
tcp_heuristic_tfo_middlebox_common(struct tcp_cache_key_src * tcks)907 tcp_heuristic_tfo_middlebox_common(struct tcp_cache_key_src *tcks)
908 {
909 	struct tcp_heuristics_head *head;
910 	struct tcp_heuristic *tpheur;
911 
912 	tpheur = tcp_getheuristic_with_lock(tcks, 1, &head);
913 	if (tpheur == NULL) {
914 		return;
915 	}
916 
917 	__tcp_heuristic_tfo_middlebox_common(tpheur);
918 
919 	tcp_heuristic_unlock(head);
920 }
921 
922 static void
tcp_heuristic_inc_counters(struct tcp_cache_key_src * tcks,uint32_t flags)923 tcp_heuristic_inc_counters(struct tcp_cache_key_src *tcks,
924     uint32_t flags)
925 {
926 	struct tcp_heuristics_head *head;
927 	struct tcp_heuristic *tpheur;
928 
929 	tpheur = tcp_getheuristic_with_lock(tcks, 1, &head);
930 	if (tpheur == NULL) {
931 		return;
932 	}
933 
934 	/* Limit to prevent integer-overflow during exponential backoff */
935 	if ((flags & TCPCACHE_F_TFO_DATA) && tpheur->th_tfo_data_loss < TCP_CACHE_OVERFLOW_PROTECT) {
936 		tpheur->th_tfo_data_loss++;
937 
938 		if (tpheur->th_tfo_data_loss >= TFO_MAX_COOKIE_LOSS) {
939 			__tcp_heuristic_tfo_middlebox_common(tpheur);
940 		}
941 	}
942 
943 	if ((flags & TCPCACHE_F_TFO_REQ) && tpheur->th_tfo_req_loss < TCP_CACHE_OVERFLOW_PROTECT) {
944 		tpheur->th_tfo_req_loss++;
945 
946 		if (tpheur->th_tfo_req_loss >= TFO_MAX_COOKIE_LOSS) {
947 			__tcp_heuristic_tfo_middlebox_common(tpheur);
948 		}
949 	}
950 
951 	if ((flags & TCPCACHE_F_TFO_DATA_RST) && tpheur->th_tfo_data_rst < TCP_CACHE_OVERFLOW_PROTECT) {
952 		tpheur->th_tfo_data_rst++;
953 
954 		if (tpheur->th_tfo_data_rst >= TFO_MAX_COOKIE_LOSS) {
955 			__tcp_heuristic_tfo_middlebox_common(tpheur);
956 		}
957 	}
958 
959 	if ((flags & TCPCACHE_F_TFO_REQ_RST) && tpheur->th_tfo_req_rst < TCP_CACHE_OVERFLOW_PROTECT) {
960 		tpheur->th_tfo_req_rst++;
961 
962 		if (tpheur->th_tfo_req_rst >= TFO_MAX_COOKIE_LOSS) {
963 			__tcp_heuristic_tfo_middlebox_common(tpheur);
964 		}
965 	}
966 
967 	if ((flags & TCPCACHE_F_ECN) &&
968 	    tpheur->th_ecn_loss < TCP_CACHE_OVERFLOW_PROTECT &&
969 	    TSTMP_LEQ(tpheur->th_ecn_backoff, tcp_now)) {
970 		tpheur->th_ecn_loss++;
971 		if (tpheur->th_ecn_loss >= ECN_MAX_SYN_LOSS) {
972 			tcpstat.tcps_ecn_fallback_synloss++;
973 			TCP_CACHE_INC_IFNET_STAT(tcks->ifp, tcks->af, ecn_fallback_synloss);
974 			tpheur->th_ecn_backoff = tcp_now +
975 			    (tcp_min_to_hz(tcp_ecn_timeout) <<
976 			    (tpheur->th_ecn_loss - ECN_MAX_SYN_LOSS));
977 
978 			os_log(OS_LOG_DEFAULT, "%s disable ECN until %u now %u on %lx for SYN-loss\n",
979 			    __func__, tpheur->th_ecn_backoff, tcp_now,
980 			    (unsigned long)VM_KERNEL_ADDRPERM(tpheur));
981 		}
982 	}
983 
984 	if ((flags & TCPCACHE_F_MPTCP) &&
985 	    tpheur->th_mptcp_loss < TCP_CACHE_OVERFLOW_PROTECT &&
986 	    tpheur->th_mptcp_heuristic_disabled == 0) {
987 		tpheur->th_mptcp_loss++;
988 		if (tpheur->th_mptcp_loss >= MPTCP_MAX_SYN_LOSS) {
989 			/*
990 			 * Yes, we take tcp_ecn_timeout, to avoid adding yet
991 			 * another sysctl that is just used for testing.
992 			 */
993 			tpheur->th_mptcp_backoff = tcp_now +
994 			    (tcp_min_to_hz(tcp_ecn_timeout) <<
995 			    (tpheur->th_mptcp_loss - MPTCP_MAX_SYN_LOSS));
996 			tpheur->th_mptcp_in_backoff = 1;
997 
998 			os_log(OS_LOG_DEFAULT, "%s disable MPTCP until %u now %u on %lx\n",
999 			    __func__, tpheur->th_mptcp_backoff, tcp_now,
1000 			    (unsigned long)VM_KERNEL_ADDRPERM(tpheur));
1001 		}
1002 	}
1003 
1004 	if ((flags & TCPCACHE_F_ECN_DROPRST) &&
1005 	    tpheur->th_ecn_droprst < TCP_CACHE_OVERFLOW_PROTECT &&
1006 	    TSTMP_LEQ(tpheur->th_ecn_backoff, tcp_now)) {
1007 		tpheur->th_ecn_droprst++;
1008 		if (tpheur->th_ecn_droprst >= ECN_MAX_DROPRST) {
1009 			tcpstat.tcps_ecn_fallback_droprst++;
1010 			TCP_CACHE_INC_IFNET_STAT(tcks->ifp, tcks->af,
1011 			    ecn_fallback_droprst);
1012 			tpheur->th_ecn_backoff = tcp_now +
1013 			    (tcp_min_to_hz(tcp_ecn_timeout) <<
1014 			    (tpheur->th_ecn_droprst - ECN_MAX_DROPRST));
1015 
1016 			os_log(OS_LOG_DEFAULT, "%s disable ECN until %u now %u on %lx for drop-RST\n",
1017 			    __func__, tpheur->th_ecn_backoff, tcp_now,
1018 			    (unsigned long)VM_KERNEL_ADDRPERM(tpheur));
1019 		}
1020 	}
1021 
1022 	if ((flags & TCPCACHE_F_ECN_DROPRXMT) &&
1023 	    tpheur->th_ecn_droprxmt < TCP_CACHE_OVERFLOW_PROTECT &&
1024 	    TSTMP_LEQ(tpheur->th_ecn_backoff, tcp_now)) {
1025 		tpheur->th_ecn_droprxmt++;
1026 		if (tpheur->th_ecn_droprxmt >= ECN_MAX_DROPRXMT) {
1027 			tcpstat.tcps_ecn_fallback_droprxmt++;
1028 			TCP_CACHE_INC_IFNET_STAT(tcks->ifp, tcks->af,
1029 			    ecn_fallback_droprxmt);
1030 			tpheur->th_ecn_backoff = tcp_now +
1031 			    (tcp_min_to_hz(tcp_ecn_timeout) <<
1032 			    (tpheur->th_ecn_droprxmt - ECN_MAX_DROPRXMT));
1033 
1034 			os_log(OS_LOG_DEFAULT, "%s disable ECN until %u now %u on %lx for drop-Rxmit\n",
1035 			    __func__, tpheur->th_ecn_backoff, tcp_now,
1036 			    (unsigned long)VM_KERNEL_ADDRPERM(tpheur));
1037 		}
1038 	}
1039 	if ((flags & TCPCACHE_F_ECN_SYNRST) &&
1040 	    tpheur->th_ecn_synrst < TCP_CACHE_OVERFLOW_PROTECT) {
1041 		tpheur->th_ecn_synrst++;
1042 		if (tpheur->th_ecn_synrst >= ECN_MAX_SYNRST) {
1043 			tcpstat.tcps_ecn_fallback_synrst++;
1044 			TCP_CACHE_INC_IFNET_STAT(tcks->ifp, tcks->af,
1045 			    ecn_fallback_synrst);
1046 			tpheur->th_ecn_backoff = tcp_now +
1047 			    (tcp_min_to_hz(tcp_ecn_timeout) <<
1048 			    (tpheur->th_ecn_synrst - ECN_MAX_SYNRST));
1049 
1050 			os_log(OS_LOG_DEFAULT, "%s disable ECN until %u now %u on %lx for SYN-RST\n",
1051 			    __func__, tpheur->th_ecn_backoff, tcp_now,
1052 			    (unsigned long)VM_KERNEL_ADDRPERM(tpheur));
1053 		}
1054 	}
1055 	tcp_heuristic_unlock(head);
1056 }
1057 
1058 void
tcp_heuristic_tfo_loss(struct tcpcb * tp)1059 tcp_heuristic_tfo_loss(struct tcpcb *tp)
1060 {
1061 	struct tcp_cache_key_src tcks;
1062 	uint32_t flag = 0;
1063 
1064 	if (symptoms_is_wifi_lossy() &&
1065 	    IFNET_IS_WIFI(tp->t_inpcb->inp_last_outifp)) {
1066 		return;
1067 	}
1068 
1069 	tcp_cache_key_src_create(tp, &tcks);
1070 
1071 	if (tp->t_tfo_stats & TFO_S_SYN_DATA_SENT) {
1072 		flag = (TCPCACHE_F_TFO_DATA | TCPCACHE_F_TFO_REQ);
1073 	}
1074 	if (tp->t_tfo_stats & TFO_S_COOKIE_REQ) {
1075 		flag = TCPCACHE_F_TFO_REQ;
1076 	}
1077 
1078 	tcp_heuristic_inc_counters(&tcks, flag);
1079 }
1080 
1081 void
tcp_heuristic_tfo_rst(struct tcpcb * tp)1082 tcp_heuristic_tfo_rst(struct tcpcb *tp)
1083 {
1084 	struct tcp_cache_key_src tcks;
1085 	uint32_t flag = 0;
1086 
1087 	tcp_cache_key_src_create(tp, &tcks);
1088 
1089 	if (tp->t_tfo_stats & TFO_S_SYN_DATA_SENT) {
1090 		flag = (TCPCACHE_F_TFO_DATA_RST | TCPCACHE_F_TFO_REQ_RST);
1091 	}
1092 	if (tp->t_tfo_stats & TFO_S_COOKIE_REQ) {
1093 		flag = TCPCACHE_F_TFO_REQ_RST;
1094 	}
1095 
1096 	tcp_heuristic_inc_counters(&tcks, flag);
1097 }
1098 
1099 void
tcp_heuristic_mptcp_loss(struct tcpcb * tp)1100 tcp_heuristic_mptcp_loss(struct tcpcb *tp)
1101 {
1102 	struct tcp_cache_key_src tcks;
1103 
1104 	if (symptoms_is_wifi_lossy() &&
1105 	    IFNET_IS_WIFI(tp->t_inpcb->inp_last_outifp)) {
1106 		return;
1107 	}
1108 
1109 	tcp_cache_key_src_create(tp, &tcks);
1110 
1111 	tcp_heuristic_inc_counters(&tcks, TCPCACHE_F_MPTCP);
1112 }
1113 
1114 void
tcp_heuristic_ecn_loss(struct tcpcb * tp)1115 tcp_heuristic_ecn_loss(struct tcpcb *tp)
1116 {
1117 	struct tcp_cache_key_src tcks;
1118 
1119 	if (symptoms_is_wifi_lossy() &&
1120 	    IFNET_IS_WIFI(tp->t_inpcb->inp_last_outifp)) {
1121 		return;
1122 	}
1123 
1124 	tcp_cache_key_src_create(tp, &tcks);
1125 
1126 	tcp_heuristic_inc_counters(&tcks, TCPCACHE_F_ECN);
1127 }
1128 
1129 void
tcp_heuristic_ecn_droprst(struct tcpcb * tp)1130 tcp_heuristic_ecn_droprst(struct tcpcb *tp)
1131 {
1132 	struct tcp_cache_key_src tcks;
1133 
1134 	tcp_cache_key_src_create(tp, &tcks);
1135 
1136 	tcp_heuristic_inc_counters(&tcks, TCPCACHE_F_ECN_DROPRST);
1137 }
1138 
1139 void
tcp_heuristic_ecn_droprxmt(struct tcpcb * tp)1140 tcp_heuristic_ecn_droprxmt(struct tcpcb *tp)
1141 {
1142 	struct tcp_cache_key_src tcks;
1143 
1144 	tcp_cache_key_src_create(tp, &tcks);
1145 
1146 	tcp_heuristic_inc_counters(&tcks, TCPCACHE_F_ECN_DROPRXMT);
1147 }
1148 
1149 void
tcp_heuristic_ecn_synrst(struct tcpcb * tp)1150 tcp_heuristic_ecn_synrst(struct tcpcb *tp)
1151 {
1152 	struct tcp_cache_key_src tcks;
1153 
1154 	tcp_cache_key_src_create(tp, &tcks);
1155 
1156 	tcp_heuristic_inc_counters(&tcks, TCPCACHE_F_ECN_SYNRST);
1157 }
1158 
1159 void
tcp_heuristic_tfo_middlebox(struct tcpcb * tp)1160 tcp_heuristic_tfo_middlebox(struct tcpcb *tp)
1161 {
1162 	struct tcp_cache_key_src tcks;
1163 
1164 	tp->t_tfo_flags |= TFO_F_HEURISTIC_DONE;
1165 
1166 	tcp_cache_key_src_create(tp, &tcks);
1167 	tcp_heuristic_tfo_middlebox_common(&tcks);
1168 }
1169 
1170 static void
tcp_heuristic_ecn_aggressive_common(struct tcp_cache_key_src * tcks)1171 tcp_heuristic_ecn_aggressive_common(struct tcp_cache_key_src *tcks)
1172 {
1173 	struct tcp_heuristics_head *head;
1174 	struct tcp_heuristic *tpheur;
1175 
1176 	tpheur = tcp_getheuristic_with_lock(tcks, 1, &head);
1177 	if (tpheur == NULL) {
1178 		return;
1179 	}
1180 
1181 	if (TSTMP_GT(tpheur->th_ecn_backoff, tcp_now)) {
1182 		/* We are already in aggressive mode */
1183 		tcp_heuristic_unlock(head);
1184 		return;
1185 	}
1186 
1187 	/* Must be done before, otherwise we will start off with expo-backoff */
1188 	tpheur->th_ecn_backoff = tcp_now +
1189 	    (tcp_min_to_hz(tcp_ecn_timeout) << (tpheur->th_ecn_aggressive));
1190 
1191 	/*
1192 	 * Ugly way to prevent integer overflow... limit to prevent in
1193 	 * overflow during exp. backoff.
1194 	 */
1195 	if (tpheur->th_ecn_aggressive < TCP_CACHE_OVERFLOW_PROTECT) {
1196 		tpheur->th_ecn_aggressive++;
1197 	}
1198 
1199 	tcp_heuristic_unlock(head);
1200 
1201 	os_log(OS_LOG_DEFAULT, "%s disable ECN until %u now %u on %lx\n", __func__,
1202 	    tpheur->th_ecn_backoff, tcp_now, (unsigned long)VM_KERNEL_ADDRPERM(tpheur));
1203 }
1204 
1205 void
tcp_heuristic_ecn_aggressive(struct tcpcb * tp)1206 tcp_heuristic_ecn_aggressive(struct tcpcb *tp)
1207 {
1208 	struct tcp_cache_key_src tcks;
1209 
1210 	tcp_cache_key_src_create(tp, &tcks);
1211 	tcp_heuristic_ecn_aggressive_common(&tcks);
1212 }
1213 
1214 static boolean_t
tcp_heuristic_do_tfo_common(struct tcp_cache_key_src * tcks)1215 tcp_heuristic_do_tfo_common(struct tcp_cache_key_src *tcks)
1216 {
1217 	struct tcp_heuristics_head *head;
1218 	struct tcp_heuristic *tpheur;
1219 
1220 	if (disable_tcp_heuristics) {
1221 		return TRUE;
1222 	}
1223 
1224 	/* Get the tcp-heuristic. */
1225 	tpheur = tcp_getheuristic_with_lock(tcks, 0, &head);
1226 	if (tpheur == NULL) {
1227 		return TRUE;
1228 	}
1229 
1230 	if (tpheur->th_tfo_in_backoff == 0) {
1231 		goto tfo_ok;
1232 	}
1233 
1234 	if (TSTMP_GT(tcp_now, tpheur->th_tfo_backoff_until)) {
1235 		tpheur->th_tfo_in_backoff = 0;
1236 		tpheur->th_tfo_enabled_time = tcp_now;
1237 
1238 		goto tfo_ok;
1239 	}
1240 
1241 	tcp_heuristic_unlock(head);
1242 	return FALSE;
1243 
1244 tfo_ok:
1245 	tcp_heuristic_unlock(head);
1246 	return TRUE;
1247 }
1248 
1249 boolean_t
tcp_heuristic_do_tfo(struct tcpcb * tp)1250 tcp_heuristic_do_tfo(struct tcpcb *tp)
1251 {
1252 	struct tcp_cache_key_src tcks;
1253 
1254 	tcp_cache_key_src_create(tp, &tcks);
1255 	if (tcp_heuristic_do_tfo_common(&tcks)) {
1256 		return TRUE;
1257 	}
1258 
1259 	return FALSE;
1260 }
1261 /*
1262  * @return:
1263  *         0	Enable MPTCP (we are still discovering middleboxes)
1264  *         -1	Enable MPTCP (heuristics have been temporarily disabled)
1265  *         1	Disable MPTCP
1266  */
1267 int
tcp_heuristic_do_mptcp(struct tcpcb * tp)1268 tcp_heuristic_do_mptcp(struct tcpcb *tp)
1269 {
1270 	struct tcp_cache_key_src tcks;
1271 	struct tcp_heuristics_head *head = NULL;
1272 	struct tcp_heuristic *tpheur;
1273 	int ret = 0;
1274 
1275 	if (disable_tcp_heuristics ||
1276 	    (tptomptp(tp)->mpt_mpte->mpte_flags & MPTE_FORCE_ENABLE)) {
1277 		return 0;
1278 	}
1279 
1280 	tcp_cache_key_src_create(tp, &tcks);
1281 
1282 	/* Get the tcp-heuristic. */
1283 	tpheur = tcp_getheuristic_with_lock(&tcks, 0, &head);
1284 	if (tpheur == NULL) {
1285 		return 0;
1286 	}
1287 
1288 	if (tpheur->th_mptcp_in_backoff == 0 ||
1289 	    tpheur->th_mptcp_heuristic_disabled == 1) {
1290 		goto mptcp_ok;
1291 	}
1292 
1293 	if (TSTMP_GT(tpheur->th_mptcp_backoff, tcp_now)) {
1294 		goto fallback;
1295 	}
1296 
1297 	tpheur->th_mptcp_in_backoff = 0;
1298 
1299 mptcp_ok:
1300 	if (tpheur->th_mptcp_heuristic_disabled) {
1301 		ret = -1;
1302 
1303 		if (TSTMP_GT(tcp_now, tpheur->th_mptcp_backoff)) {
1304 			tpheur->th_mptcp_heuristic_disabled = 0;
1305 			tpheur->th_mptcp_success = 0;
1306 		}
1307 	}
1308 
1309 	tcp_heuristic_unlock(head);
1310 	return ret;
1311 
1312 fallback:
1313 	if (head) {
1314 		tcp_heuristic_unlock(head);
1315 	}
1316 
1317 	if (tptomptp(tp)->mpt_mpte->mpte_flags & MPTE_FIRSTPARTY) {
1318 		tcpstat.tcps_mptcp_fp_heuristic_fallback++;
1319 	} else {
1320 		tcpstat.tcps_mptcp_heuristic_fallback++;
1321 	}
1322 
1323 	return 1;
1324 }
1325 
1326 static boolean_t
tcp_heuristic_do_ecn_common(struct tcp_cache_key_src * tcks)1327 tcp_heuristic_do_ecn_common(struct tcp_cache_key_src *tcks)
1328 {
1329 	struct tcp_heuristics_head *head;
1330 	struct tcp_heuristic *tpheur;
1331 	boolean_t ret = TRUE;
1332 
1333 	if (disable_tcp_heuristics) {
1334 		return TRUE;
1335 	}
1336 
1337 	/* Get the tcp-heuristic. */
1338 	tpheur = tcp_getheuristic_with_lock(tcks, 0, &head);
1339 	if (tpheur == NULL) {
1340 		return ret;
1341 	}
1342 
1343 	if (TSTMP_GT(tpheur->th_ecn_backoff, tcp_now)) {
1344 		ret = FALSE;
1345 	} else {
1346 		/* Reset the following counters to start re-evaluating */
1347 		if (tpheur->th_ecn_droprst >= ECN_RETRY_LIMIT) {
1348 			tpheur->th_ecn_droprst = 0;
1349 		}
1350 		if (tpheur->th_ecn_droprxmt >= ECN_RETRY_LIMIT) {
1351 			tpheur->th_ecn_droprxmt = 0;
1352 		}
1353 		if (tpheur->th_ecn_synrst >= ECN_RETRY_LIMIT) {
1354 			tpheur->th_ecn_synrst = 0;
1355 		}
1356 
1357 		/* Make sure it follows along */
1358 		tpheur->th_ecn_backoff = tcp_now;
1359 	}
1360 
1361 	tcp_heuristic_unlock(head);
1362 
1363 	return ret;
1364 }
1365 
1366 boolean_t
tcp_heuristic_do_ecn(struct tcpcb * tp)1367 tcp_heuristic_do_ecn(struct tcpcb *tp)
1368 {
1369 	struct tcp_cache_key_src tcks;
1370 
1371 	tcp_cache_key_src_create(tp, &tcks);
1372 	return tcp_heuristic_do_ecn_common(&tcks);
1373 }
1374 
1375 boolean_t
tcp_heuristic_do_ecn_with_address(struct ifnet * ifp,union sockaddr_in_4_6 * local_address)1376 tcp_heuristic_do_ecn_with_address(struct ifnet *ifp,
1377     union sockaddr_in_4_6 *local_address)
1378 {
1379 	struct tcp_cache_key_src tcks;
1380 
1381 	memset(&tcks, 0, sizeof(tcks));
1382 	tcks.ifp = ifp;
1383 
1384 	calculate_tcp_clock();
1385 
1386 	if (local_address->sa.sa_family == AF_INET6) {
1387 		memcpy(&tcks.laddr.addr6, &local_address->sin6.sin6_addr, sizeof(struct in6_addr));
1388 		tcks.af = AF_INET6;
1389 	} else if (local_address->sa.sa_family == AF_INET) {
1390 		memcpy(&tcks.laddr.addr, &local_address->sin.sin_addr, sizeof(struct in_addr));
1391 		tcks.af = AF_INET;
1392 	}
1393 
1394 	return tcp_heuristic_do_ecn_common(&tcks);
1395 }
1396 
1397 void
tcp_heuristics_ecn_update(struct necp_tcp_ecn_cache * necp_buffer,struct ifnet * ifp,union sockaddr_in_4_6 * local_address)1398 tcp_heuristics_ecn_update(struct necp_tcp_ecn_cache *necp_buffer,
1399     struct ifnet *ifp, union sockaddr_in_4_6 *local_address)
1400 {
1401 	struct tcp_cache_key_src tcks;
1402 
1403 	memset(&tcks, 0, sizeof(tcks));
1404 	tcks.ifp = ifp;
1405 
1406 	calculate_tcp_clock();
1407 
1408 	if (local_address->sa.sa_family == AF_INET6) {
1409 		memcpy(&tcks.laddr.addr6, &local_address->sin6.sin6_addr, sizeof(struct in6_addr));
1410 		tcks.af = AF_INET6;
1411 	} else if (local_address->sa.sa_family == AF_INET) {
1412 		memcpy(&tcks.laddr.addr, &local_address->sin.sin_addr, sizeof(struct in_addr));
1413 		tcks.af = AF_INET;
1414 	}
1415 
1416 	if (necp_buffer->necp_tcp_ecn_heuristics_success) {
1417 		tcp_heuristic_reset_counters(&tcks, TCPCACHE_F_ECN);
1418 	} else if (necp_buffer->necp_tcp_ecn_heuristics_loss) {
1419 		tcp_heuristic_inc_counters(&tcks, TCPCACHE_F_ECN);
1420 	} else if (necp_buffer->necp_tcp_ecn_heuristics_drop_rst) {
1421 		tcp_heuristic_inc_counters(&tcks, TCPCACHE_F_ECN_DROPRST);
1422 	} else if (necp_buffer->necp_tcp_ecn_heuristics_drop_rxmt) {
1423 		tcp_heuristic_inc_counters(&tcks, TCPCACHE_F_ECN_DROPRXMT);
1424 	} else if (necp_buffer->necp_tcp_ecn_heuristics_syn_rst) {
1425 		tcp_heuristic_inc_counters(&tcks, TCPCACHE_F_ECN_SYNRST);
1426 	} else if (necp_buffer->necp_tcp_ecn_heuristics_aggressive) {
1427 		tcp_heuristic_ecn_aggressive_common(&tcks);
1428 	}
1429 
1430 	return;
1431 }
1432 
1433 boolean_t
tcp_heuristic_do_tfo_with_address(struct ifnet * ifp,union sockaddr_in_4_6 * local_address,union sockaddr_in_4_6 * remote_address,uint8_t * cookie,uint8_t * cookie_len)1434 tcp_heuristic_do_tfo_with_address(struct ifnet *ifp,
1435     union sockaddr_in_4_6 *local_address, union sockaddr_in_4_6 *remote_address,
1436     uint8_t *cookie, uint8_t *cookie_len)
1437 {
1438 	struct tcp_cache_key_src tcks;
1439 
1440 	memset(&tcks, 0, sizeof(tcks));
1441 	tcks.ifp = ifp;
1442 
1443 	calculate_tcp_clock();
1444 
1445 	if (remote_address->sa.sa_family == AF_INET6) {
1446 		memcpy(&tcks.laddr.addr6, &local_address->sin6.sin6_addr, sizeof(struct in6_addr));
1447 		memcpy(&tcks.faddr.addr6, &remote_address->sin6.sin6_addr, sizeof(struct in6_addr));
1448 		tcks.af = AF_INET6;
1449 	} else if (remote_address->sa.sa_family == AF_INET) {
1450 		memcpy(&tcks.laddr.addr, &local_address->sin.sin_addr, sizeof(struct in_addr));
1451 		memcpy(&tcks.faddr.addr, &remote_address->sin.sin_addr, sizeof(struct in_addr));
1452 		tcks.af = AF_INET;
1453 	}
1454 
1455 	if (tcp_heuristic_do_tfo_common(&tcks)) {
1456 		if (!tcp_cache_get_cookie_common(&tcks, cookie, cookie_len)) {
1457 			*cookie_len = 0;
1458 		}
1459 		return TRUE;
1460 	}
1461 
1462 	return FALSE;
1463 }
1464 
1465 void
tcp_heuristics_tfo_update(struct necp_tcp_tfo_cache * necp_buffer,struct ifnet * ifp,union sockaddr_in_4_6 * local_address,union sockaddr_in_4_6 * remote_address)1466 tcp_heuristics_tfo_update(struct necp_tcp_tfo_cache *necp_buffer,
1467     struct ifnet *ifp, union sockaddr_in_4_6 *local_address,
1468     union sockaddr_in_4_6 *remote_address)
1469 {
1470 	struct tcp_cache_key_src tcks;
1471 
1472 	memset(&tcks, 0, sizeof(tcks));
1473 	tcks.ifp = ifp;
1474 
1475 	calculate_tcp_clock();
1476 
1477 	if (remote_address->sa.sa_family == AF_INET6) {
1478 		memcpy(&tcks.laddr.addr6, &local_address->sin6.sin6_addr, sizeof(struct in6_addr));
1479 		memcpy(&tcks.faddr.addr6, &remote_address->sin6.sin6_addr, sizeof(struct in6_addr));
1480 		tcks.af = AF_INET6;
1481 	} else if (remote_address->sa.sa_family == AF_INET) {
1482 		memcpy(&tcks.laddr.addr, &local_address->sin.sin_addr, sizeof(struct in_addr));
1483 		memcpy(&tcks.faddr.addr, &remote_address->sin.sin_addr, sizeof(struct in_addr));
1484 		tcks.af = AF_INET;
1485 	}
1486 
1487 	if (necp_buffer->necp_tcp_tfo_heuristics_success) {
1488 		tcp_heuristic_reset_counters(&tcks, TCPCACHE_F_TFO_REQ | TCPCACHE_F_TFO_DATA |
1489 		    TCPCACHE_F_TFO_REQ_RST | TCPCACHE_F_TFO_DATA_RST);
1490 	}
1491 
1492 	if (necp_buffer->necp_tcp_tfo_heuristics_success_req) {
1493 		tcp_heuristic_reset_counters(&tcks, TCPCACHE_F_TFO_REQ | TCPCACHE_F_TFO_REQ_RST);
1494 	}
1495 
1496 	if (necp_buffer->necp_tcp_tfo_heuristics_loss) {
1497 		tcp_heuristic_inc_counters(&tcks, TCPCACHE_F_TFO_REQ | TCPCACHE_F_TFO_DATA);
1498 	}
1499 
1500 	if (necp_buffer->necp_tcp_tfo_heuristics_loss_req) {
1501 		tcp_heuristic_inc_counters(&tcks, TCPCACHE_F_TFO_REQ);
1502 	}
1503 
1504 	if (necp_buffer->necp_tcp_tfo_heuristics_rst_data) {
1505 		tcp_heuristic_inc_counters(&tcks, TCPCACHE_F_TFO_REQ_RST | TCPCACHE_F_TFO_DATA_RST);
1506 	}
1507 
1508 	if (necp_buffer->necp_tcp_tfo_heuristics_rst_req) {
1509 		tcp_heuristic_inc_counters(&tcks, TCPCACHE_F_TFO_REQ_RST);
1510 	}
1511 
1512 	if (necp_buffer->necp_tcp_tfo_heuristics_middlebox) {
1513 		tcp_heuristic_tfo_middlebox_common(&tcks);
1514 	}
1515 
1516 	if (necp_buffer->necp_tcp_tfo_cookie_len != 0) {
1517 		tcp_cache_set_cookie_common(&tcks,
1518 		    necp_buffer->necp_tcp_tfo_cookie, necp_buffer->necp_tcp_tfo_cookie_len);
1519 	}
1520 
1521 	return;
1522 }
1523 
1524 static void
sysctl_cleartfocache(void)1525 sysctl_cleartfocache(void)
1526 {
1527 	int i;
1528 
1529 	for (i = 0; i < tcp_cache_size; i++) {
1530 		struct tcp_cache_head *head = &tcp_cache[i];
1531 		struct tcp_cache *tpcache, *tmp;
1532 		struct tcp_heuristics_head *hhead = &tcp_heuristics[i];
1533 		struct tcp_heuristic *tpheur, *htmp;
1534 
1535 		lck_mtx_lock(&head->tch_mtx);
1536 		SLIST_FOREACH_SAFE(tpcache, &head->tcp_caches, list, tmp) {
1537 			SLIST_REMOVE(&head->tcp_caches, tpcache, tcp_cache, list);
1538 			kfree_type(struct tcp_cache, tpcache);
1539 		}
1540 		lck_mtx_unlock(&head->tch_mtx);
1541 
1542 		lck_mtx_lock(&hhead->thh_mtx);
1543 		SLIST_FOREACH_SAFE(tpheur, &hhead->tcp_heuristics, list, htmp) {
1544 			SLIST_REMOVE(&hhead->tcp_heuristics, tpheur, tcp_heuristic, list);
1545 			kfree_type(struct tcp_heuristic, tpheur);
1546 		}
1547 		lck_mtx_unlock(&hhead->thh_mtx);
1548 	}
1549 }
1550 
1551 /* This sysctl is useful for testing purposes only */
1552 static int tcpcleartfo = 0;
1553 
1554 static int sysctl_cleartfo SYSCTL_HANDLER_ARGS
1555 {
1556 #pragma unused(arg1, arg2)
1557 	int error = 0, val, oldval = tcpcleartfo;
1558 
1559 	val = oldval;
1560 	error = sysctl_handle_int(oidp, &val, 0, req);
1561 	if (error || !req->newptr) {
1562 		if (error) {
1563 			os_log_error(OS_LOG_DEFAULT, "%s could not parse int: %d", __func__, error);
1564 		}
1565 		return error;
1566 	}
1567 
1568 	/*
1569 	 * The actual value does not matter. If the value is set, it triggers
1570 	 * the clearing of the TFO cache. If a future implementation does not
1571 	 * use the route entry to hold the TFO cache, replace the route sysctl.
1572 	 */
1573 
1574 	if (val != oldval) {
1575 		sysctl_cleartfocache();
1576 	}
1577 
1578 	tcpcleartfo = val;
1579 
1580 	return error;
1581 }
1582 
1583 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, clear_tfocache, CTLTYPE_INT | CTLFLAG_RW |
1584     CTLFLAG_LOCKED, &tcpcleartfo, 0, &sysctl_cleartfo, "I",
1585     "Toggle to clear the TFO destination based heuristic cache");
1586 
1587 void
tcp_cache_init(void)1588 tcp_cache_init(void)
1589 {
1590 	uint64_t sane_size_meg = sane_size / 1024 / 1024;
1591 
1592 	/*
1593 	 * On machines with <100MB of memory this will result in a (full) cache-size
1594 	 * of 32 entries, thus 32 * 5 * 64bytes = 10KB. (about 0.01 %)
1595 	 * On machines with > 4GB of memory, we have a cache-size of 1024 entries,
1596 	 * thus about 327KB.
1597 	 *
1598 	 * Side-note: we convert to uint32_t. If sane_size is more than
1599 	 * 16000 TB, we loose precision. But, who cares? :)
1600 	 */
1601 	tcp_cache_size = tcp_cache_roundup2((uint32_t)(sane_size_meg >> 2));
1602 	if (tcp_cache_size < 32) {
1603 		tcp_cache_size = 32;
1604 	} else if (tcp_cache_size > 1024) {
1605 		tcp_cache_size = 1024;
1606 	}
1607 
1608 	tcp_cache = zalloc_permanent(sizeof(struct tcp_cache_head) * tcp_cache_size,
1609 	    ZALIGN(struct tcp_cache_head));
1610 
1611 	tcp_heuristics = zalloc_permanent(sizeof(struct tcp_heuristics_head) * tcp_cache_size,
1612 	    ZALIGN(struct tcp_heuristics_head));
1613 
1614 	for (int i = 0; i < tcp_cache_size; i++) {
1615 		lck_mtx_init(&tcp_cache[i].tch_mtx, &tcp_cache_mtx_grp,
1616 		    &tcp_cache_mtx_attr);
1617 		SLIST_INIT(&tcp_cache[i].tcp_caches);
1618 
1619 		lck_mtx_init(&tcp_heuristics[i].thh_mtx, &tcp_heuristic_mtx_grp,
1620 		    &tcp_heuristic_mtx_attr);
1621 		SLIST_INIT(&tcp_heuristics[i].tcp_heuristics);
1622 	}
1623 
1624 	tcp_cache_hash_seed = RandomULong();
1625 }
1626