xref: /xnu-12377.81.4/bsd/netinet/tcp_timer.h (revision 043036a2b3718f7f0be807e2870f8f47d3fa0796)
1 /*
2  * Copyright (c) 2000-2024 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Copyright (c) 1982, 1986, 1993
30  *	The Regents of the University of California.  All rights reserved.
31  *
32  * Redistribution and use in source and binary forms, with or without
33  * modification, are permitted provided that the following conditions
34  * are met:
35  * 1. Redistributions of source code must retain the above copyright
36  *    notice, this list of conditions and the following disclaimer.
37  * 2. Redistributions in binary form must reproduce the above copyright
38  *    notice, this list of conditions and the following disclaimer in the
39  *    documentation and/or other materials provided with the distribution.
40  * 3. All advertising materials mentioning features or use of this software
41  *    must display the following acknowledgement:
42  *	This product includes software developed by the University of
43  *	California, Berkeley and its contributors.
44  * 4. Neither the name of the University nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  *
60  *	@(#)tcp_timer.h	8.1 (Berkeley) 6/10/93
61  * $FreeBSD: src/sys/netinet/tcp_timer.h,v 1.18 1999/12/29 04:41:03 peter Exp $
62  */
63 
64 #ifndef _NETINET_TCP_TIMER_H_
65 #define _NETINET_TCP_TIMER_H_
66 #include <sys/appleapiopts.h>
67 
68 #ifdef BSD_KERNEL_PRIVATE
69 #include <kern/thread_call.h>
70 #endif /* BSD_KERNEL_PRIVATE */
71 
72 /* Keep the external definition the same for binary compatibility */
73 #define TCPT_NTIMERS_EXT        4
74 
75 /*
76  * Definitions of the TCP timers.
77  *
78  * The TCPT_PTO timer is used for probing for a tail loss in a send window.
79  * If this probe gets acknowledged using SACK, it will allow the connection
80  * to enter fast-recovery instead of hitting a retransmit timeout. A probe
81  * timeout will send the last unacknowledged segment to generate more acks
82  * with SACK information which can be used for fast-retransmiting the lost
83  * packets. This will fire in the order of 10ms.
84  *
85  * The TCPT_REXMT timer is used to force retransmissions.
86  * The TCP has the TCPT_REXMT timer set whenever segments
87  * have been sent for which ACKs are expected but not yet
88  * received.  If an ACK is received which advances tp->snd_una,
89  * then the retransmit timer is cleared (if there are no more
90  * outstanding segments) or reset to the base value (if there
91  * are more ACKs expected).  Whenever the retransmit timer goes off,
92  * we retransmit one unacknowledged segment, and do a backoff
93  * on the retransmit timer.
94  *
95  * The TCPT_DELACK timer is used for transmitting delayed acknowledgements
96  * if an acknowledgement was delayed in anticipation of a new segment.
97  *
98  * The TCPT_PERSIST timer is used to keep window size information
99  * flowing even if the window goes shut.  If all previous transmissions
100  * have been acknowledged(so that there are no retransmissions in progress),
101  * and the window is too small to bother sending anything, then we start
102  * the TCPT_PERSIST timer.  When it expires, if the window is nonzero,
103  * we go to transmit state.  Otherwise, at intervals send a single byte
104  * into the peer's window to force him to update our window information.
105  * We do this at most as often as TCPT_PERSMIN time intervals,
106  * but no more frequently than the current estimate of round-trip
107  * packet time.  The TCPT_PERSIST timer is cleared whenever we receive
108  * a window update from the peer.
109  *
110  * The TCPT_KEEP timer is used to keep connections alive.  If an
111  * connection is idle (no segments received) for TCPTV_KEEP_INIT amount
112  * of time, but not yet established, then we drop the connection.
113  * Once the connection is established, if the connection is idle for
114  * TCPTV_KEEP_IDLE time (and keepalives have been enabled on the socket),
115  * we begin to probe the connection.  We force the peer to send us a
116  * segment by sending:
117  *	<SEQ=SND.UNA-1><ACK=RCV.NXT><CTL=ACK>
118  * This segment is (deliberately) outside the window, and should elicit
119  * an ack segment in response from the peer.  If, despite the TCPT_KEEP
120  * initiated segments we cannot elicit a response from a peer in
121  * TCPT_MAXIDLE amount of time probing, then we drop the connection.
122  *
123  * The TCPT_2MSL timer is used for keeping the conenction in Time-wait state
124  * before fully closing it so that the connection 4-tuple can be reused.
125  */
126 #ifdef BSD_KERNEL_PRIVATE
127 
128 #define TCPT_PTO        0       /* Probe timeout */
129 #define TCPT_DELAYFR    1       /* Delay recovery if there is reordering */
130 #define TCPT_REORDER    2       /* Reordering timer for RACK */
131 #define TCPT_REXMT      3       /* retransmit */
132 #define TCPT_DELACK     4       /* delayed ack */
133 #define TCPT_PERSIST    5       /* retransmit persistence */
134 #define TCPT_KEEP       6       /* keep alive */
135 #define TCPT_2MSL       7       /* 2*msl quiet time timer */
136 #if MPTCP
137 #define TCPT_JACK_RXMT  8       /* retransmit timer for join ack */
138 #define TCPT_CELLICON   9       /* Timer to check for cell-activity */
139 #define TCPT_MAX        9
140 #else /* MPTCP */
141 #define TCPT_MAX        7
142 #endif /* !MPTCP */
143 
144 #define TCPT_NONE       (TCPT_MAX + 1)
145 #define TCPT_NTIMERS    (TCPT_MAX + 1)
146 
147 /* External definitions */
148 #define TCPT_REXMT_EXT          0
149 #define TCPT_PERSIST_EXT        1
150 #define TCPT_KEEP_EXT           2
151 #define TCPT_2MSL_EXT           3
152 #define TCPT_DELACK_EXT         4
153 
154 #else /* !BSD_KERNEL_PRIVATE */
155 #define TCPT_REXMT      0               /* retransmit */
156 #define TCPT_PERSIST    1               /* retransmit persistence */
157 #define TCPT_KEEP       2               /* keep alive */
158 #define TCPT_2MSL       3               /* 2*msl quiet time timer */
159 #define TCPT_DELACK     4               /* delayed ack timer */
160 #if MPTCP
161 #define TCPT_JACK_RXMT  5       /* retransmit timer for join ack */
162 #define TCPT_MAX        5
163 #else /* MPTCP */
164 #define TCPT_MAX        4
165 #endif /* !MPTCP */
166 #define TCPT_NONE       (TCPT_MAX + 1)
167 #define TCPT_NTIMERS    (TCPT_MAX + 1)
168 
169 #endif /* BSD_KERNEL_PRIVATE */
170 
171 #ifdef BSD_KERNEL_PRIVATE
172 /*
173  * Time constants.
174  */
175 #define TCPTV_MSL       ( 15*TCP_RETRANSHZ)     /* max seg lifetime */
176 #define TCPTV_SRTTBASE  0       /* base roundtrip time; if 0, no idea yet */
177 #define TCPTV_RTOBASE   (  1*TCP_RETRANSHZ)     /* assumed RTO if no info */
178 #define TCPTV_SRTTDFLT  (  1*TCP_RETRANSHZ)     /* assumed RTT if no info */
179 #define TCPTV_PERSMIN   (  5*TCP_RETRANSHZ)     /* retransmit persistence */
180 #define TCPTV_PERSMAX   ( 60*TCP_RETRANSHZ)     /* maximum persist interval */
181 
182 extern int tcptv_persmin_val;
183 
184 #define TCPTV_KEEP_INIT ( 75*TCP_RETRANSHZ)     /* connect keep alive */
185 #define TCPTV_KEEP_IDLE (120*60*TCP_RETRANSHZ)  /* time before probing */
186 #define TCPTV_KEEPINTVL ( 75*TCP_RETRANSHZ)     /* default probe interval */
187 #define TCPTV_KEEPCNT   8                       /* max probes before drop */
188 
189 #define TCPTV_REXMTMAX  ( 64*TCP_RETRANSHZ )    /* max REXMT value */
190 #define TCPTV_REXMTMIN  ( TCP_RETRANSHZ/33 )    /* min REXMT for non-local connections */
191 
192 #define TCPTV_FINWAIT2  ( 60*TCP_RETRANSHZ)     /* timeout to get out of FIN_WAIT_2 */
193 
194 #define TCPTV_TWTRUNC   8               /* RTO factor to truncate TW */
195 
196 #define TCP_LINGERTIME  120             /* linger at most 2 minutes */
197 
198 #define TCP_MAXRXTSHIFT 12              /* maximum retransmits */
199 
200 #ifdef  TCPTIMERS
201 static char *tcptimers[] =
202 { "REXMT", "PERSIST", "KEEP", "2MSL", "DELACK"};
203 #endif /* TCPTIMERS */
204 
205 /*
206  * Persist, keep, 2msl and MPTCP's join-ack timer as slow timers which can
207  * be coalesced at a higher granularity (500 ms).
208  *
209  * Rexmt and delayed ack timers are considered as fast timers which run
210  * in the order of 100ms.
211  *
212  * Probe timeout and RACK reordering timer are quick timers which will run in the order of 10ms.
213  */
214 #define IS_TIMER_HZ_500MS(i)    ((i) >= TCPT_PERSIST)
215 #define IS_TIMER_HZ_100MS(i)    ((i) >= TCPT_REXMT && (i) < TCPT_PERSIST)
216 #define IS_TIMER_HZ_10MS(i)     ((i) < TCPT_REXMT)
217 
218 struct tcptimerlist;
219 
220 struct tcptimerentry {
221 	LIST_ENTRY(tcptimerentry) te_le;   /* links for timer list */
222 	uint32_t te_timer_start;   /* tcp clock when the timer was started */
223 	uint16_t te_index;         /* index of lowest timer that needs to run first */
224 	uint16_t te_mode;          /* Bit-wise OR of timers that are active */
225 	uint32_t te_runtime;       /* deadline at which the first timer has to fire */
226 };
227 
228 LIST_HEAD(timerlisthead, tcptimerentry);
229 
230 struct tcptimerlist {
231 	struct timerlisthead lhead;     /* head of the list */
232 	lck_mtx_t mtx;          /* lock to protect the list */
233 	lck_grp_t *mtx_grp;     /* mutex group definition */
234 	thread_call_t call;     /* call entry */
235 	uint32_t runtime;       /* time at which this list is going to run */
236 	uint32_t schedtime;     /* time at which this list was scheduled */
237 	uint32_t started_at;     /* time at which this list started to run */
238 	uint32_t entries;       /* Number of entries on the list */
239 	uint32_t maxentries;    /* Max number of entries at any time */
240 	uint32_t processed_count;       /* Number of entries that have been processed */
241 
242 	/* Set desired mode when timer list running */
243 	boolean_t running;      /* Set when timer list is being processed */
244 	boolean_t scheduled;    /* set when the timer is scheduled */
245 #define TCP_TIMERLIST_10MS_MODE 0x1
246 #define TCP_TIMERLIST_100MS_MODE 0x2
247 #define TCP_TIMERLIST_500MS_MODE 0x4
248 	uint32_t mode;          /* Current mode of the timer */
249 	uint32_t pref_mode;     /* Preferred mode set by a connection */
250 	uint32_t pref_offset;   /* Preferred offset set by a connection */
251 	uint32_t idleruns;      /* Number of times the list has been idle in fast mode */
252 	struct tcptimerentry *next_te;  /* next timer entry pointer to process */
253 	u_int16_t probe_if_index; /* Interface index that needs to send probes */
254 };
255 
256 /* number of idle runs allowed for TCP timer list in fast or quick modes */
257 #define TCP_FASTMODE_IDLERUN_MAX 10
258 
259 /*
260  * Minimum retransmit timeout is set to 30ms. We add a slop of
261  * 200 ms to the retransmit value to account for processing
262  * variance and delayed ack. This extra 200ms will help to avoid
263  * spurious retransmits by taking into consideration the receivers
264  * that wait for delayed ack timer instead of generating an ack
265  * for every two packets.
266  *
267  * On a local link, the minimum retransmit timeout is 100ms and
268  * variance is set to 0. This will make the sender a little bit more
269  * aggressive on local link. When the connection is not established yet,
270  * there is no need to add an extra 200ms to retransmit timeout because
271  * the initial value is high (1s) and delayed ack is not a problem in
272  * that case.
273  */
274 #define TCPTV_REXMTSLOP ( TCP_RETRANSHZ/5 )     /* extra 200 ms slop */
275 
276 /* macro to decide when retransmit slop (described above) should be added */
277 #define TCP_ADD_REXMTSLOP(tp) (tp->t_state >= TCPS_ESTABLISHED)
278 
279 #define TCPT_RANGESET(tv, value, tvmin, tvmax, addslop) do { \
280 	(tv) = ((addslop) ? tcp_rexmt_slop : 0) + (value); \
281 	if ((uint32_t)(tv) < (uint32_t)(tvmin)) \
282 	        (tv) = (tvmin); \
283 	else if ((uint32_t)(tv) > (uint32_t)(tvmax)) \
284 	        (tv) = (tvmax); \
285 } while(0)
286 
287 #define TCP_CONN_KEEPIDLE(tp) \
288 	((tp)->t_keepidle && \
289 	((tp)->t_inpcb->inp_socket->so_options & SO_KEEPALIVE) ? \
290 	        (tp)->t_keepidle : (uint32_t)tcp_keepidle)
291 #define TCP_CONN_KEEPINIT(tp) \
292 	(((tp)->t_keepinit > 0) ? (tp)->t_keepinit : (uint32_t)tcp_keepinit)
293 #define TCP_CONN_KEEPCNT(tp) \
294 	(((tp)->t_keepcnt > 0) ? (tp)->t_keepcnt : (uint32_t)tcp_keepcnt)
295 #define TCP_CONN_KEEPINTVL(tp) \
296 	(((tp)->t_keepintvl > 0) ? (tp)->t_keepintvl : (uint32_t)tcp_keepintvl)
297 #define TCP_CONN_MAXIDLE(tp) \
298 	(TCP_CONN_KEEPCNT(tp) * TCP_CONN_KEEPINTVL(tp))
299 
300 #define TCP_IDLETIMEOUT(tp) \
301 	(((TCP_ADD_REXMTSLOP(tp)) ? 0 : tcp_rexmt_slop) + tp->t_rxtcur)
302 
303 TAILQ_HEAD(tcptailq, tcpcb);
304 
305 extern int tcp_keepinit;        /* time to establish connection */
306 extern int tcp_keepidle;        /* time before keepalive probes begin */
307 extern int tcp_keepintvl;       /* time between keepalive probes */
308 extern int tcp_keepcnt;         /* number of keepalives */
309 extern int tcp_delack;          /* delayed ack timer */
310 extern int tcp_maxpersistidle;
311 extern int tcp_msl;
312 extern int tcp_ttl;             /* time to live for TCP segs */
313 extern int tcp_backoff[TCP_MAXRXTSHIFT + 1];
314 extern int tcp_rexmt_slop;
315 extern u_int32_t tcp_max_persist_timeout;       /* Maximum persistence for Zero Window Probes */
316 
317 #endif /* BSD_KERNEL_PRIVATE */
318 #endif /* !_NETINET_TCP_TIMER_H_ */
319