xref: /xnu-11215.81.4/bsd/netinet/tcp_cc.h (revision d4514f0bc1d3f944c22d92e68b646ac3fb40d452)
1 /*
2  * Copyright (c) 2010-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*-
29  * Copyright (c) 2008 Swinburne University of Technology, Melbourne, Australia
30  * All rights reserved.
31  *
32  * This software was developed at the Centre for Advanced Internet
33  * Architectures, Swinburne University, by Lawrence Stewart and James Healy,
34  * made possible in part by a grant from the Cisco University Research Program
35  * Fund at Community Foundation Silicon Valley.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  *
46  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
47  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
48  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
49  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
50  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
51  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
52  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
54  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
55  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
56  * SUCH DAMAGE.
57  *
58  * $FreeBSD$
59  */
60 
61 #ifndef _NETINET_CC_H_
62 #define _NETINET_CC_H_
63 
64 #ifdef PRIVATE
65 
66 #include <netinet/in.h>
67 
68 /*
69  * Data structure to collect and display congestion control debug information
70  */
71 struct tcp_cc_debug_state {
72 	u_int64_t ccd_tsns;
73 	char ccd_srcaddr[INET6_ADDRSTRLEN];
74 	uint16_t ccd_srcport;
75 	char ccd_destaddr[INET6_ADDRSTRLEN];
76 	uint16_t ccd_destport;
77 	uint32_t ccd_snd_cwnd;
78 	uint32_t ccd_snd_wnd;
79 	uint32_t ccd_snd_ssthresh;
80 	uint32_t ccd_pipeack;
81 	uint32_t ccd_rttcur;
82 	uint32_t ccd_rxtcur;
83 	uint32_t ccd_srtt;
84 	uint32_t ccd_event;
85 	uint32_t ccd_sndcc;
86 	uint32_t ccd_sndhiwat;
87 	uint32_t ccd_bytes_acked;
88 	u_int8_t ccd_cc_index;
89 	u_int8_t ccd_unused_1__;
90 	u_int16_t ccd_unused_2__;
91 	union {
92 		struct {
93 			uint32_t ccd_last_max;
94 			uint32_t ccd_tcp_win;
95 			uint32_t ccd_target_win;
96 			uint32_t ccd_avg_lastmax;
97 			uint32_t ccd_mean_deviation;
98 		} cubic_state;
99 		struct {
100 			u_int32_t led_base_rtt;
101 		} ledbat_state;
102 	} u;
103 };
104 
105 /*
106  * Values of ccd_cc_index
107  */
108 #define TCP_CC_ALGO_NONE                0
109 #define TCP_CC_ALGO_NEWRENO_INDEX       1
110 #define TCP_CC_ALGO_BACKGROUND_INDEX    2 /* CC for background transport */
111 #define TCP_CC_ALGO_CUBIC_INDEX         3 /* default CC algorithm */
112 #define TCP_CC_ALGO_PRAGUE_INDEX        4 /* L4S CC algorithm */
113 #define TCP_CC_ALGO_COUNT               5 /* Count of CC algorithms */
114 
115 /*
116  * Values of ccd_event
117  */
118 #define TCP_CC_EVENT_LIST                       \
119 	X(TCP_CC_CWND_INIT)                     \
120 	X(TCP_CC_INSEQ_ACK_RCVD)                \
121 	X(TCP_CC_ACK_RCVD)                      \
122 	X(TCP_CC_ENTER_FASTRECOVERY)            \
123 	X(TCP_CC_IN_FASTRECOVERY)               \
124 	X(TCP_CC_EXIT_FASTRECOVERY)             \
125 	X(TCP_CC_PARTIAL_ACK)                   \
126 	X(TCP_CC_IDLE_TIMEOUT)                  \
127 	X(TCP_CC_REXMT_TIMEOUT)                 \
128 	X(TCP_CC_ECN_RCVD)                      \
129 	X(TCP_CC_BAD_REXMT_RECOVERY)            \
130 	X(TCP_CC_OUTPUT_ERROR)                  \
131 	X(TCP_CC_CHANGE_ALGO)                   \
132 	X(TCP_CC_FLOW_CONTROL)                  \
133 	X(TCP_CC_SUSPEND)                       \
134 	X(TCP_CC_LIMITED_TRANSMIT)              \
135 	X(TCP_CC_EARLY_RETRANSMIT)              \
136 	X(TCP_CC_TLP_RECOVERY)                  \
137 	X(TCP_CC_TLP_RECOVER_LASTPACKET)        \
138 	X(TCP_CC_DELAY_FASTRECOVERY)            \
139 	X(TCP_CC_TLP_IN_FASTRECOVERY)           \
140 	X(TCP_CC_DSACK_BAD_REXMT)               \
141 	X(TCP_CC_FIRST_REXMT)                   \
142 	X(MAX_TCP_CC_EVENTS)
143 
144 enum tcp_cc_event {
145 #define X(name, ...) name,
146 	TCP_CC_EVENT_LIST
147 #undef X
148 };
149 
150 /*
151  * Kernel control ID
152  */
153 #define TCP_CC_CONTROL_NAME     "com.apple.network.tcp_ccdebug"
154 
155 #endif /* PRIVATE */
156 
157 #ifdef KERNEL_PRIVATE
158 
159 #include <netinet/tcp.h>
160 #include <netinet/tcp_var.h>
161 #include <kern/zalloc.h>
162 
163 /*
164  * Maximum characters in the name of a CC algorithm
165  */
166 #define TCP_CA_NAME_MAX 16
167 
168 extern int tcp_recv_bg;
169 extern uint32_t bg_ss_fltsz;
170 
171 /*
172  * Structure to hold definition various actions defined by a congestion
173  * control algorithm for TCP. This can be used to change the congestion
174  * control on a connection based on the user settings of priority of a
175  * connection.
176  */
177 struct tcp_cc_algo {
178 	char name[TCP_CA_NAME_MAX];
179 	_Atomic uint32_t num_sockets;
180 	uint32_t flags;
181 
182 	/* init the congestion algorithm for the specified control block */
183 	int (*init) (struct tcpcb *tp);
184 
185 	/*
186 	 * cleanup any state that is stored in the connection
187 	 * related to the algorithm
188 	 */
189 	int (*cleanup) (struct tcpcb *tp);
190 
191 	/* initialize cwnd at the start of a connection */
192 	void (*cwnd_init) (struct tcpcb *tp);
193 
194 	/*
195 	 * called on the receipt of in-sequence ack during congestion
196 	 * avoidance phase
197 	 */
198 	void (*congestion_avd) (struct tcpcb *tp, struct tcphdr *th);
199 
200 	/* called on the receipt of a valid ack */
201 	void (*ack_rcvd) (struct tcpcb *tp, struct tcphdr *th);
202 
203 	/* called before entering FR */
204 	void (*pre_fr) (struct tcpcb *tp);
205 
206 	/*  after exiting FR */
207 	void (*post_fr) (struct tcpcb *tp, struct tcphdr *th);
208 
209 	/* perform tasks when data transfer resumes after an idle period */
210 	void (*after_idle) (struct tcpcb *tp);
211 
212 	/* perform tasks when the connection's retransmit timer expires */
213 	void (*after_timeout) (struct tcpcb *tp);
214 
215 	/* Whether or not to delay the ack */
216 	int (*delay_ack)(struct tcpcb *tp, struct tcphdr *th);
217 
218 	/* called to process ECN markings, used by Prague only */
219 	void (*process_ecn) (struct tcpcb *tp, struct tcphdr *th, uint32_t new_bytes_marked, uint32_t packets_marked, uint32_t packets_acked);
220 
221 	/* called to set bytes acked in this ACK which are later update to exclude CE marked bytes */
222 	void (*set_bytes_acked) (struct tcpcb *tp, uint32_t acked);
223 
224 	/* Switch a connection to this CC algorithm after sending some packets */
225 	void (*switch_to)(struct tcpcb *tp);
226 } __attribute__((aligned(4)));
227 
228 extern struct tcp_cc_algo* tcp_cc_algo_list[TCP_CC_ALGO_COUNT];
229 
230 #define CC_ALGO(tp) (tcp_cc_algo_list[tp->tcp_cc_index])
231 #define TCP_CC_CWND_INIT_PKTS 10
232 #define TCP_CC_CWND_INIT_BYTES  4380
233 /*
234  * The congestion window will have to be reset after a
235  * non-validated period -- currently set to 3 minutes
236  */
237 #define TCP_CC_CWND_NONVALIDATED_PERIOD (3 * 60 * TCP_RETRANSHZ)
238 
239 /* Less than BE congestion control algo for receive window */
240 struct tcp_rcv_cc_algo {
241 	char name[TCP_CA_NAME_MAX];
242 	_Atomic uint32_t num_sockets;
243 	uint32_t flags;
244 
245 	/* init the congestion algorithm for the specified control block */
246 	void (*init) (struct tcpcb *tp);
247 
248 	/*
249 	 * cleanup any state that is stored in the connection
250 	 * related to the algorithm
251 	 */
252 	void (*cleanup) (struct tcpcb *tp);
253 
254 	/* initialize rwnd at the start of a connection */
255 	void (*rwnd_init) (struct tcpcb *tp);
256 
257 	/* called on the receipt of valid data */
258 	void (*data_rcvd) (struct tcpcb *tp, struct tcphdr *th,
259 	    struct tcpopt *to, uint32_t segment_len);
260 
261 	uint32_t (*get_rlwin) (struct tcpcb *tp);
262 
263 	/* perform tasks when data transfer resumes after an idle period */
264 	void (*after_idle) (struct tcpcb *tp);
265 
266 	/* called when we switch from foreground to background */
267 	void (*switch_to) (struct tcpcb *tp);
268 } __attribute__((aligned(4)));
269 
270 extern struct tcp_rcv_cc_algo tcp_cc_rledbat;
271 
272 extern void tcp_cc_init(void);
273 extern void tcp_cc_resize_sndbuf(struct tcpcb *tp);
274 extern void tcp_bad_rexmt_fix_sndbuf(struct tcpcb *tp);
275 extern void tcp_cc_cwnd_init_or_reset(struct tcpcb *tp);
276 extern int tcp_cc_delay_ack(struct tcpcb *tp, struct tcphdr *th);
277 extern void tcp_cc_allocate_state(struct tcpcb *tp);
278 extern void tcp_cc_after_idle_stretchack(struct tcpcb *tp);
279 extern uint32_t tcp_cc_is_cwnd_nonvalidated(struct tcpcb *tp);
280 extern void tcp_cc_adjust_nonvalidated_cwnd(struct tcpcb *tp);
281 extern u_int32_t tcp_get_max_pipeack(struct tcpcb *tp);
282 extern void tcp_clear_pipeack_state(struct tcpcb *tp);
283 
284 static inline uint32_t
tcp_initial_cwnd(struct tcpcb * tp)285 tcp_initial_cwnd(struct tcpcb *tp)
286 {
287 	if (tcp_cubic_minor_fixes) {
288 		return TCP_CC_CWND_INIT_PKTS * tp->t_maxseg;
289 	} else {
290 		return TCP_CC_CWND_INIT_BYTES;
291 	}
292 }
293 
294 #endif /* KERNEL_PRIVATE */
295 #endif /* _NETINET_CC_H_ */
296