xref: /xnu-8792.81.2/bsd/netinet/tcp_timer.h (revision 19c3b8c28c31cb8130e034cfb5df6bf9ba342d90)
1 /*
2  * Copyright (c) 2000-2014 Apple Computer, Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Copyright (c) 1982, 1986, 1993
30  *	The Regents of the University of California.  All rights reserved.
31  *
32  * Redistribution and use in source and binary forms, with or without
33  * modification, are permitted provided that the following conditions
34  * are met:
35  * 1. Redistributions of source code must retain the above copyright
36  *    notice, this list of conditions and the following disclaimer.
37  * 2. Redistributions in binary form must reproduce the above copyright
38  *    notice, this list of conditions and the following disclaimer in the
39  *    documentation and/or other materials provided with the distribution.
40  * 3. All advertising materials mentioning features or use of this software
41  *    must display the following acknowledgement:
42  *	This product includes software developed by the University of
43  *	California, Berkeley and its contributors.
44  * 4. Neither the name of the University nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  *
60  *	@(#)tcp_timer.h	8.1 (Berkeley) 6/10/93
61  * $FreeBSD: src/sys/netinet/tcp_timer.h,v 1.18 1999/12/29 04:41:03 peter Exp $
62  */
63 
64 #ifndef _NETINET_TCP_TIMER_H_
65 #define _NETINET_TCP_TIMER_H_
66 #include <sys/appleapiopts.h>
67 
68 #ifdef BSD_KERNEL_PRIVATE
69 #include <kern/thread_call.h>
70 #endif /* BSD_KERNEL_PRIVATE */
71 
72 /* Keep the external definition the same for binary compatibility */
73 #define TCPT_NTIMERS_EXT        4
74 
75 /*
76  * Definitions of the TCP timers.
77  *
78  * The TCPT_PTO timer is used for probing for a tail loss in a send window.
79  * If this probe gets acknowledged using SACK, it will allow the connection
80  * to enter fast-recovery instead of hitting a retransmit timeout. A probe
81  * timeout will send the last unacknowledged segment to generate more acks
82  * with SACK information which can be used for fast-retransmiting the lost
83  * packets. This will fire in the order of 10ms.
84  *
85  * The TCPT_REXMT timer is used to force retransmissions.
86  * The TCP has the TCPT_REXMT timer set whenever segments
87  * have been sent for which ACKs are expected but not yet
88  * received.  If an ACK is received which advances tp->snd_una,
89  * then the retransmit timer is cleared (if there are no more
90  * outstanding segments) or reset to the base value (if there
91  * are more ACKs expected).  Whenever the retransmit timer goes off,
92  * we retransmit one unacknowledged segment, and do a backoff
93  * on the retransmit timer.
94  *
95  * The TCPT_DELACK timer is used for transmitting delayed acknowledgements
96  * if an acknowledgement was delayed in anticipation of a new segment.
97  *
98  * The TCPT_PERSIST timer is used to keep window size information
99  * flowing even if the window goes shut.  If all previous transmissions
100  * have been acknowledged(so that there are no retransmissions in progress),
101  * and the window is too small to bother sending anything, then we start
102  * the TCPT_PERSIST timer.  When it expires, if the window is nonzero,
103  * we go to transmit state.  Otherwise, at intervals send a single byte
104  * into the peer's window to force him to update our window information.
105  * We do this at most as often as TCPT_PERSMIN time intervals,
106  * but no more frequently than the current estimate of round-trip
107  * packet time.  The TCPT_PERSIST timer is cleared whenever we receive
108  * a window update from the peer.
109  *
110  * The TCPT_KEEP timer is used to keep connections alive.  If an
111  * connection is idle (no segments received) for TCPTV_KEEP_INIT amount
112  * of time, but not yet established, then we drop the connection.
113  * Once the connection is established, if the connection is idle for
114  * TCPTV_KEEP_IDLE time (and keepalives have been enabled on the socket),
115  * we begin to probe the connection.  We force the peer to send us a
116  * segment by sending:
117  *	<SEQ=SND.UNA-1><ACK=RCV.NXT><CTL=ACK>
118  * This segment is (deliberately) outside the window, and should elicit
119  * an ack segment in response from the peer.  If, despite the TCPT_KEEP
120  * initiated segments we cannot elicit a response from a peer in
121  * TCPT_MAXIDLE amount of time probing, then we drop the connection.
122  *
123  * The TCPT_2MSL timer is used for keeping the conenction in Time-wait state
124  * before fully closing it so that the connection 4-tuple can be reused.
125  */
126 #ifdef BSD_KERNEL_PRIVATE
127 
128 #define TCPT_PTO        0       /* Probe timeout */
129 #define TCPT_DELAYFR    1       /* Delay recovery if there is reordering */
130 #define TCPT_REXMT      2       /* retransmit */
131 #define TCPT_DELACK     3       /* delayed ack */
132 #define TCPT_PERSIST    4       /* retransmit persistence */
133 #define TCPT_KEEP       5       /* keep alive */
134 #define TCPT_2MSL       6       /* 2*msl quiet time timer */
135 #if MPTCP
136 #define TCPT_JACK_RXMT  7       /* retransmit timer for join ack */
137 #define TCPT_CELLICON   8       /* Timer to check for cell-activity */
138 #define TCPT_MAX        8
139 #else /* MPTCP */
140 #define TCPT_MAX        6
141 #endif /* !MPTCP */
142 
143 #define TCPT_NONE       (TCPT_MAX + 1)
144 #define TCPT_NTIMERS    (TCPT_MAX + 1)
145 
146 /* External definitions */
147 #define TCPT_REXMT_EXT          0
148 #define TCPT_PERSIST_EXT        1
149 #define TCPT_KEEP_EXT           2
150 #define TCPT_2MSL_EXT           3
151 #define TCPT_DELACK_EXT         4
152 
153 #else /* !BSD_KERNEL_PRIVATE */
154 #define TCPT_REXMT      0               /* retransmit */
155 #define TCPT_PERSIST    1               /* retransmit persistence */
156 #define TCPT_KEEP       2               /* keep alive */
157 #define TCPT_2MSL       3               /* 2*msl quiet time timer */
158 #define TCPT_DELACK     4               /* delayed ack timer */
159 #if MPTCP
160 #define TCPT_JACK_RXMT  5       /* retransmit timer for join ack */
161 #define TCPT_MAX        5
162 #else /* MPTCP */
163 #define TCPT_MAX        4
164 #endif /* !MPTCP */
165 #define TCPT_NONE       (TCPT_MAX + 1)
166 #define TCPT_NTIMERS    (TCPT_MAX + 1)
167 
168 #endif /* BSD_KERNEL_PRIVATE */
169 
170 #ifdef BSD_KERNEL_PRIVATE
171 /*
172  * Time constants.
173  */
174 #define TCPTV_MSL       ( 15*TCP_RETRANSHZ)     /* max seg lifetime */
175 #define TCPTV_SRTTBASE  0       /* base roundtrip time; if 0, no idea yet */
176 #define TCPTV_RTOBASE   (  1*TCP_RETRANSHZ)     /* assumed RTO if no info */
177 #define TCPTV_SRTTDFLT  (  1*TCP_RETRANSHZ)     /* assumed RTT if no info */
178 #define TCPTV_PERSMIN   (  5*TCP_RETRANSHZ)     /* retransmit persistence */
179 #define TCPTV_PERSMAX   ( 60*TCP_RETRANSHZ)     /* maximum persist interval */
180 
181 extern int tcptv_persmin_val;
182 
183 #define TCPTV_KEEP_INIT ( 75*TCP_RETRANSHZ)     /* connect keep alive */
184 #define TCPTV_KEEP_IDLE (120*60*TCP_RETRANSHZ)  /* time before probing */
185 #define TCPTV_KEEPINTVL ( 75*TCP_RETRANSHZ)     /* default probe interval */
186 #define TCPTV_KEEPCNT   8                       /* max probes before drop */
187 
188 #define TCPTV_REXMTMAX  ( 64*TCP_RETRANSHZ )    /* max REXMT value */
189 #define TCPTV_REXMTMIN  ( TCP_RETRANSHZ/33 )    /* min REXMT for non-local connections */
190 
191 #define TCPTV_FINWAIT2  ( 60*TCP_RETRANSHZ)     /* timeout to get out of FIN_WAIT_2 */
192 
193 /*
194  * Window for counting received bytes to see if ack-stretching
195  * can start (default 100 ms)
196  */
197 #define TCPTV_UNACKWIN  ( TCP_RETRANSHZ/10 )
198 
199 /* Receiver idle time, avoid ack-stretching after this idle time */
200 #define TCPTV_MAXRCVIDLE (TCP_RETRANSHZ/5 )
201 
202 /*
203  * No ack stretching during slow-start, until we see some packets.
204  * By the time the receiver gets 512 packets, the senders cwnd
205  * should open by a few hundred packets consdering the
206  * slow-start progression.
207  */
208 #define TCP_RCV_SS_PKTCOUNT     512
209 
210 #define TCPTV_TWTRUNC   8               /* RTO factor to truncate TW */
211 
212 #define TCP_LINGERTIME  120             /* linger at most 2 minutes */
213 
214 #define TCP_MAXRXTSHIFT 12              /* maximum retransmits */
215 
216 #ifdef  TCPTIMERS
217 static char *tcptimers[] =
218 { "REXMT", "PERSIST", "KEEP", "2MSL", "DELACK"};
219 #endif /* TCPTIMERS */
220 
221 /*
222  * Persist, keep, 2msl and MPTCP's join-ack timer as slow timers which can
223  * be coalesced at a higher granularity (500 ms).
224  *
225  * Rexmt and delayed ack timers are considered as fast timers which run
226  * in the order of 100ms.
227  *
228  * Probe timeout is a quick timer which will run in the order of 10ms.
229  */
230 #define IS_TIMER_HZ_500MS(i)    ((i) >= TCPT_PERSIST)
231 #define IS_TIMER_HZ_100MS(i)    ((i) >= TCPT_REXMT && (i) < TCPT_PERSIST)
232 #define IS_TIMER_HZ_10MS(i)     ((i) < TCPT_REXMT)
233 
234 struct tcptimerlist;
235 
236 struct tcptimerentry {
237 	LIST_ENTRY(tcptimerentry) le;   /* links for timer list */
238 	uint32_t timer_start;   /* tcp clock when the timer was started */
239 	uint16_t index;         /* index of lowest timer that needs to run first */
240 	uint16_t mode;          /* Bit-wise OR of timers that are active */
241 	uint32_t runtime;       /* deadline at which the first timer has to fire */
242 };
243 
244 LIST_HEAD(timerlisthead, tcptimerentry);
245 
246 struct tcptimerlist {
247 	struct timerlisthead lhead;     /* head of the list */
248 	lck_mtx_t mtx;          /* lock to protect the list */
249 	lck_grp_t *mtx_grp;     /* mutex group definition */
250 	thread_call_t call;     /* call entry */
251 	uint32_t runtime;       /* time at which this list is going to run */
252 	uint32_t schedtime;     /* time at which this list was scheduled */
253 	uint32_t entries;       /* Number of entries on the list */
254 	uint32_t maxentries;    /* Max number of entries at any time */
255 
256 	/* Set desired mode when timer list running */
257 	boolean_t running;      /* Set when timer list is being processed */
258 	boolean_t scheduled;    /* set when the timer is scheduled */
259 #define TCP_TIMERLIST_10MS_MODE 0x1
260 #define TCP_TIMERLIST_100MS_MODE 0x2
261 #define TCP_TIMERLIST_500MS_MODE 0x4
262 	uint32_t mode;          /* Current mode of the timer */
263 	uint32_t pref_mode;     /* Preferred mode set by a connection */
264 	uint32_t pref_offset;   /* Preferred offset set by a connection */
265 	uint32_t idleruns;      /* Number of times the list has been idle in fast mode */
266 	struct tcptimerentry *next_te;  /* next timer entry pointer to process */
267 	u_int16_t probe_if_index; /* Interface index that needs to send probes */
268 };
269 
270 /* number of idle runs allowed for TCP timer list in fast or quick modes */
271 #define TCP_FASTMODE_IDLERUN_MAX 10
272 
273 /*
274  * Minimum retransmit timeout is set to 30ms. We add a slop of
275  * 200 ms to the retransmit value to account for processing
276  * variance and delayed ack. This extra 200ms will help to avoid
277  * spurious retransmits by taking into consideration the receivers
278  * that wait for delayed ack timer instead of generating an ack
279  * for every two packets.
280  *
281  * On a local link, the minimum retransmit timeout is 100ms and
282  * variance is set to 0. This will make the sender a little bit more
283  * aggressive on local link. When the connection is not established yet,
284  * there is no need to add an extra 200ms to retransmit timeout because
285  * the initial value is high (1s) and delayed ack is not a problem in
286  * that case.
287  */
288 #define TCPTV_REXMTSLOP ( TCP_RETRANSHZ/5 )     /* extra 200 ms slop */
289 
290 /* macro to decide when retransmit slop (described above) should be added */
291 #define TCP_ADD_REXMTSLOP(tp) (tp->t_state >= TCPS_ESTABLISHED)
292 
293 #define TCPT_RANGESET(tv, value, tvmin, tvmax, addslop) do { \
294 	(tv) = ((addslop) ? tcp_rexmt_slop : 0) + (value); \
295 	if ((uint32_t)(tv) < (uint32_t)(tvmin)) \
296 	        (tv) = (tvmin); \
297 	else if ((uint32_t)(tv) > (uint32_t)(tvmax)) \
298 	        (tv) = (tvmax); \
299 } while(0)
300 
301 #define TCP_CONN_KEEPIDLE(tp) \
302 	((tp)->t_keepidle && \
303 	((tp)->t_inpcb->inp_socket->so_options & SO_KEEPALIVE) ? \
304 	        (tp)->t_keepidle : (uint32_t)tcp_keepidle)
305 #define TCP_CONN_KEEPINIT(tp) \
306 	(((tp)->t_keepinit > 0) ? (tp)->t_keepinit : (uint32_t)tcp_keepinit)
307 #define TCP_CONN_KEEPCNT(tp) \
308 	(((tp)->t_keepcnt > 0) ? (tp)->t_keepcnt : (uint32_t)tcp_keepcnt)
309 #define TCP_CONN_KEEPINTVL(tp) \
310 	(((tp)->t_keepintvl > 0) ? (tp)->t_keepintvl : (uint32_t)tcp_keepintvl)
311 #define TCP_CONN_MAXIDLE(tp) \
312 	(TCP_CONN_KEEPCNT(tp) * TCP_CONN_KEEPINTVL(tp))
313 
314 #define TCP_IDLETIMEOUT(tp) \
315 	(((TCP_ADD_REXMTSLOP(tp)) ? 0 : tcp_rexmt_slop) + tp->t_rxtcur)
316 
317 TAILQ_HEAD(tcptailq, tcpcb);
318 
319 extern int tcp_keepinit;        /* time to establish connection */
320 extern int tcp_keepidle;        /* time before keepalive probes begin */
321 extern int tcp_keepintvl;       /* time between keepalive probes */
322 extern int tcp_keepcnt;         /* number of keepalives */
323 extern int tcp_delack;          /* delayed ack timer */
324 extern int tcp_maxpersistidle;
325 extern int tcp_msl;
326 extern int tcp_ttl;             /* time to live for TCP segs */
327 extern int tcp_backoff[];
328 extern int tcp_rexmt_slop;
329 extern u_int32_t tcp_max_persist_timeout;       /* Maximum persistence for Zero Window Probes */
330 
331 #define OFFSET_FROM_START(tp, off) ((tcp_now + (off)) - (tp)->tentry.timer_start)
332 
333 #endif /* BSD_KERNEL_PRIVATE */
334 #endif /* !_NETINET_TCP_TIMER_H_ */
335