xref: /xnu-11417.140.69/bsd/netinet/tcp_timer.h (revision 43a90889846e00bfb5cf1d255cdc0a701a1e05a4)
1 /*
2  * Copyright (c) 2000-2024 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Copyright (c) 1982, 1986, 1993
30  *	The Regents of the University of California.  All rights reserved.
31  *
32  * Redistribution and use in source and binary forms, with or without
33  * modification, are permitted provided that the following conditions
34  * are met:
35  * 1. Redistributions of source code must retain the above copyright
36  *    notice, this list of conditions and the following disclaimer.
37  * 2. Redistributions in binary form must reproduce the above copyright
38  *    notice, this list of conditions and the following disclaimer in the
39  *    documentation and/or other materials provided with the distribution.
40  * 3. All advertising materials mentioning features or use of this software
41  *    must display the following acknowledgement:
42  *	This product includes software developed by the University of
43  *	California, Berkeley and its contributors.
44  * 4. Neither the name of the University nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  *
60  *	@(#)tcp_timer.h	8.1 (Berkeley) 6/10/93
61  * $FreeBSD: src/sys/netinet/tcp_timer.h,v 1.18 1999/12/29 04:41:03 peter Exp $
62  */
63 
64 #ifndef _NETINET_TCP_TIMER_H_
65 #define _NETINET_TCP_TIMER_H_
66 #include <sys/appleapiopts.h>
67 
68 #ifdef BSD_KERNEL_PRIVATE
69 #include <kern/thread_call.h>
70 #endif /* BSD_KERNEL_PRIVATE */
71 
72 /* Keep the external definition the same for binary compatibility */
73 #define TCPT_NTIMERS_EXT        4
74 
75 /*
76  * Definitions of the TCP timers.
77  *
78  * The TCPT_PTO timer is used for probing for a tail loss in a send window.
79  * If this probe gets acknowledged using SACK, it will allow the connection
80  * to enter fast-recovery instead of hitting a retransmit timeout. A probe
81  * timeout will send the last unacknowledged segment to generate more acks
82  * with SACK information which can be used for fast-retransmiting the lost
83  * packets. This will fire in the order of 10ms.
84  *
85  * The TCPT_REXMT timer is used to force retransmissions.
86  * The TCP has the TCPT_REXMT timer set whenever segments
87  * have been sent for which ACKs are expected but not yet
88  * received.  If an ACK is received which advances tp->snd_una,
89  * then the retransmit timer is cleared (if there are no more
90  * outstanding segments) or reset to the base value (if there
91  * are more ACKs expected).  Whenever the retransmit timer goes off,
92  * we retransmit one unacknowledged segment, and do a backoff
93  * on the retransmit timer.
94  *
95  * The TCPT_DELACK timer is used for transmitting delayed acknowledgements
96  * if an acknowledgement was delayed in anticipation of a new segment.
97  *
98  * The TCPT_PERSIST timer is used to keep window size information
99  * flowing even if the window goes shut.  If all previous transmissions
100  * have been acknowledged(so that there are no retransmissions in progress),
101  * and the window is too small to bother sending anything, then we start
102  * the TCPT_PERSIST timer.  When it expires, if the window is nonzero,
103  * we go to transmit state.  Otherwise, at intervals send a single byte
104  * into the peer's window to force him to update our window information.
105  * We do this at most as often as TCPT_PERSMIN time intervals,
106  * but no more frequently than the current estimate of round-trip
107  * packet time.  The TCPT_PERSIST timer is cleared whenever we receive
108  * a window update from the peer.
109  *
110  * The TCPT_KEEP timer is used to keep connections alive.  If an
111  * connection is idle (no segments received) for TCPTV_KEEP_INIT amount
112  * of time, but not yet established, then we drop the connection.
113  * Once the connection is established, if the connection is idle for
114  * TCPTV_KEEP_IDLE time (and keepalives have been enabled on the socket),
115  * we begin to probe the connection.  We force the peer to send us a
116  * segment by sending:
117  *	<SEQ=SND.UNA-1><ACK=RCV.NXT><CTL=ACK>
118  * This segment is (deliberately) outside the window, and should elicit
119  * an ack segment in response from the peer.  If, despite the TCPT_KEEP
120  * initiated segments we cannot elicit a response from a peer in
121  * TCPT_MAXIDLE amount of time probing, then we drop the connection.
122  *
123  * The TCPT_2MSL timer is used for keeping the conenction in Time-wait state
124  * before fully closing it so that the connection 4-tuple can be reused.
125  */
126 #ifdef BSD_KERNEL_PRIVATE
127 
128 #define TCPT_PTO        0       /* Probe timeout */
129 #define TCPT_DELAYFR    1       /* Delay recovery if there is reordering */
130 #define TCPT_REORDER    2       /* Reordering timer for RACK */
131 #define TCPT_REXMT      3       /* retransmit */
132 #define TCPT_DELACK     4       /* delayed ack */
133 #define TCPT_PERSIST    5       /* retransmit persistence */
134 #define TCPT_KEEP       6       /* keep alive */
135 #define TCPT_2MSL       7       /* 2*msl quiet time timer */
136 #if MPTCP
137 #define TCPT_JACK_RXMT  8       /* retransmit timer for join ack */
138 #define TCPT_CELLICON   9       /* Timer to check for cell-activity */
139 #define TCPT_MAX        9
140 #else /* MPTCP */
141 #define TCPT_MAX        7
142 #endif /* !MPTCP */
143 
144 #define TCPT_NONE       (TCPT_MAX + 1)
145 #define TCPT_NTIMERS    (TCPT_MAX + 1)
146 
147 /* External definitions */
148 #define TCPT_REXMT_EXT          0
149 #define TCPT_PERSIST_EXT        1
150 #define TCPT_KEEP_EXT           2
151 #define TCPT_2MSL_EXT           3
152 #define TCPT_DELACK_EXT         4
153 
154 #else /* !BSD_KERNEL_PRIVATE */
155 #define TCPT_REXMT      0               /* retransmit */
156 #define TCPT_PERSIST    1               /* retransmit persistence */
157 #define TCPT_KEEP       2               /* keep alive */
158 #define TCPT_2MSL       3               /* 2*msl quiet time timer */
159 #define TCPT_DELACK     4               /* delayed ack timer */
160 #if MPTCP
161 #define TCPT_JACK_RXMT  5       /* retransmit timer for join ack */
162 #define TCPT_MAX        5
163 #else /* MPTCP */
164 #define TCPT_MAX        4
165 #endif /* !MPTCP */
166 #define TCPT_NONE       (TCPT_MAX + 1)
167 #define TCPT_NTIMERS    (TCPT_MAX + 1)
168 
169 #endif /* BSD_KERNEL_PRIVATE */
170 
171 #ifdef BSD_KERNEL_PRIVATE
172 /*
173  * Time constants.
174  */
175 #define TCPTV_MSL       ( 15*TCP_RETRANSHZ)     /* max seg lifetime */
176 #define TCPTV_SRTTBASE  0       /* base roundtrip time; if 0, no idea yet */
177 #define TCPTV_RTOBASE   (  1*TCP_RETRANSHZ)     /* assumed RTO if no info */
178 #define TCPTV_SRTTDFLT  (  1*TCP_RETRANSHZ)     /* assumed RTT if no info */
179 #define TCPTV_PERSMIN   (  5*TCP_RETRANSHZ)     /* retransmit persistence */
180 #define TCPTV_PERSMAX   ( 60*TCP_RETRANSHZ)     /* maximum persist interval */
181 
182 extern int tcptv_persmin_val;
183 
184 #define TCPTV_KEEP_INIT ( 75*TCP_RETRANSHZ)     /* connect keep alive */
185 #define TCPTV_KEEP_IDLE (120*60*TCP_RETRANSHZ)  /* time before probing */
186 #define TCPTV_KEEPINTVL ( 75*TCP_RETRANSHZ)     /* default probe interval */
187 #define TCPTV_KEEPCNT   8                       /* max probes before drop */
188 
189 #define TCPTV_REXMTMAX  ( 64*TCP_RETRANSHZ )    /* max REXMT value */
190 #define TCPTV_REXMTMIN  ( TCP_RETRANSHZ/33 )    /* min REXMT for non-local connections */
191 
192 #define TCPTV_FINWAIT2  ( 60*TCP_RETRANSHZ)     /* timeout to get out of FIN_WAIT_2 */
193 
194 /*
195  * Window for counting received bytes to see if ack-stretching
196  * can start (default 100 ms)
197  */
198 #define TCPTV_UNACKWIN  ( TCP_RETRANSHZ/10 )
199 
200 /* Receiver idle time, avoid ack-stretching after this idle time */
201 #define TCPTV_MAXRCVIDLE (TCP_RETRANSHZ/5 )
202 
203 /*
204  * No ack stretching during slow-start, until we see some packets.
205  * By the time the receiver gets 512 packets, the senders cwnd
206  * should open by a few hundred packets consdering the
207  * slow-start progression.
208  */
209 #define TCP_RCV_SS_PKTCOUNT     512
210 
211 #define TCPTV_TWTRUNC   8               /* RTO factor to truncate TW */
212 
213 #define TCP_LINGERTIME  120             /* linger at most 2 minutes */
214 
215 #define TCP_MAXRXTSHIFT 12              /* maximum retransmits */
216 
217 #ifdef  TCPTIMERS
218 static char *tcptimers[] =
219 { "REXMT", "PERSIST", "KEEP", "2MSL", "DELACK"};
220 #endif /* TCPTIMERS */
221 
222 /*
223  * Persist, keep, 2msl and MPTCP's join-ack timer as slow timers which can
224  * be coalesced at a higher granularity (500 ms).
225  *
226  * Rexmt and delayed ack timers are considered as fast timers which run
227  * in the order of 100ms.
228  *
229  * Probe timeout and RACK reordering timer are quick timers which will run in the order of 10ms.
230  */
231 #define IS_TIMER_HZ_500MS(i)    ((i) >= TCPT_PERSIST)
232 #define IS_TIMER_HZ_100MS(i)    ((i) >= TCPT_REXMT && (i) < TCPT_PERSIST)
233 #define IS_TIMER_HZ_10MS(i)     ((i) < TCPT_REXMT)
234 
235 struct tcptimerlist;
236 
237 struct tcptimerentry {
238 	LIST_ENTRY(tcptimerentry) le;   /* links for timer list */
239 	uint32_t timer_start;   /* tcp clock when the timer was started */
240 	uint16_t index;         /* index of lowest timer that needs to run first */
241 	uint16_t mode;          /* Bit-wise OR of timers that are active */
242 	uint32_t runtime;       /* deadline at which the first timer has to fire */
243 };
244 
245 LIST_HEAD(timerlisthead, tcptimerentry);
246 
247 struct tcptimerlist {
248 	struct timerlisthead lhead;     /* head of the list */
249 	lck_mtx_t mtx;          /* lock to protect the list */
250 	lck_grp_t *mtx_grp;     /* mutex group definition */
251 	thread_call_t call;     /* call entry */
252 	uint32_t runtime;       /* time at which this list is going to run */
253 	uint32_t schedtime;     /* time at which this list was scheduled */
254 	uint32_t entries;       /* Number of entries on the list */
255 	uint32_t maxentries;    /* Max number of entries at any time */
256 
257 	/* Set desired mode when timer list running */
258 	boolean_t running;      /* Set when timer list is being processed */
259 	boolean_t scheduled;    /* set when the timer is scheduled */
260 #define TCP_TIMERLIST_10MS_MODE 0x1
261 #define TCP_TIMERLIST_100MS_MODE 0x2
262 #define TCP_TIMERLIST_500MS_MODE 0x4
263 	uint32_t mode;          /* Current mode of the timer */
264 	uint32_t pref_mode;     /* Preferred mode set by a connection */
265 	uint32_t pref_offset;   /* Preferred offset set by a connection */
266 	uint32_t idleruns;      /* Number of times the list has been idle in fast mode */
267 	struct tcptimerentry *next_te;  /* next timer entry pointer to process */
268 	u_int16_t probe_if_index; /* Interface index that needs to send probes */
269 };
270 
271 /* number of idle runs allowed for TCP timer list in fast or quick modes */
272 #define TCP_FASTMODE_IDLERUN_MAX 10
273 
274 /*
275  * Minimum retransmit timeout is set to 30ms. We add a slop of
276  * 200 ms to the retransmit value to account for processing
277  * variance and delayed ack. This extra 200ms will help to avoid
278  * spurious retransmits by taking into consideration the receivers
279  * that wait for delayed ack timer instead of generating an ack
280  * for every two packets.
281  *
282  * On a local link, the minimum retransmit timeout is 100ms and
283  * variance is set to 0. This will make the sender a little bit more
284  * aggressive on local link. When the connection is not established yet,
285  * there is no need to add an extra 200ms to retransmit timeout because
286  * the initial value is high (1s) and delayed ack is not a problem in
287  * that case.
288  */
289 #define TCPTV_REXMTSLOP ( TCP_RETRANSHZ/5 )     /* extra 200 ms slop */
290 
291 /* macro to decide when retransmit slop (described above) should be added */
292 #define TCP_ADD_REXMTSLOP(tp) (tp->t_state >= TCPS_ESTABLISHED)
293 
294 #define TCPT_RANGESET(tv, value, tvmin, tvmax, addslop) do { \
295 	(tv) = ((addslop) ? tcp_rexmt_slop : 0) + (value); \
296 	if ((uint32_t)(tv) < (uint32_t)(tvmin)) \
297 	        (tv) = (tvmin); \
298 	else if ((uint32_t)(tv) > (uint32_t)(tvmax)) \
299 	        (tv) = (tvmax); \
300 } while(0)
301 
302 #define TCP_CONN_KEEPIDLE(tp) \
303 	((tp)->t_keepidle && \
304 	((tp)->t_inpcb->inp_socket->so_options & SO_KEEPALIVE) ? \
305 	        (tp)->t_keepidle : (uint32_t)tcp_keepidle)
306 #define TCP_CONN_KEEPINIT(tp) \
307 	(((tp)->t_keepinit > 0) ? (tp)->t_keepinit : (uint32_t)tcp_keepinit)
308 #define TCP_CONN_KEEPCNT(tp) \
309 	(((tp)->t_keepcnt > 0) ? (tp)->t_keepcnt : (uint32_t)tcp_keepcnt)
310 #define TCP_CONN_KEEPINTVL(tp) \
311 	(((tp)->t_keepintvl > 0) ? (tp)->t_keepintvl : (uint32_t)tcp_keepintvl)
312 #define TCP_CONN_MAXIDLE(tp) \
313 	(TCP_CONN_KEEPCNT(tp) * TCP_CONN_KEEPINTVL(tp))
314 
315 #define TCP_IDLETIMEOUT(tp) \
316 	(((TCP_ADD_REXMTSLOP(tp)) ? 0 : tcp_rexmt_slop) + tp->t_rxtcur)
317 
318 TAILQ_HEAD(tcptailq, tcpcb);
319 
320 extern int tcp_keepinit;        /* time to establish connection */
321 extern int tcp_keepidle;        /* time before keepalive probes begin */
322 extern int tcp_keepintvl;       /* time between keepalive probes */
323 extern int tcp_keepcnt;         /* number of keepalives */
324 extern int tcp_delack;          /* delayed ack timer */
325 extern int tcp_maxpersistidle;
326 extern int tcp_msl;
327 extern int tcp_ttl;             /* time to live for TCP segs */
328 extern int tcp_backoff[TCP_MAXRXTSHIFT + 1];
329 extern int tcp_rexmt_slop;
330 extern u_int32_t tcp_max_persist_timeout;       /* Maximum persistence for Zero Window Probes */
331 
332 #define OFFSET_FROM_START(tp, off) ((tcp_now + (off)) - (tp)->tentry.timer_start)
333 
334 #endif /* BSD_KERNEL_PRIVATE */
335 #endif /* !_NETINET_TCP_TIMER_H_ */
336