xref: /xnu-12377.41.6/bsd/netinet/tcp_cc.h (revision bbb1b6f9e71b8cdde6e5cd6f4841f207dee3d828)
1 /*
2  * Copyright (c) 2010-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*-
29  * Copyright (c) 2008 Swinburne University of Technology, Melbourne, Australia
30  * All rights reserved.
31  *
32  * This software was developed at the Centre for Advanced Internet
33  * Architectures, Swinburne University, by Lawrence Stewart and James Healy,
34  * made possible in part by a grant from the Cisco University Research Program
35  * Fund at Community Foundation Silicon Valley.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  *
46  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
47  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
48  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
49  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
50  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
51  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
52  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
54  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
55  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
56  * SUCH DAMAGE.
57  *
58  * $FreeBSD$
59  */
60 
61 #ifndef _NETINET_CC_H_
62 #define _NETINET_CC_H_
63 
64 #ifdef PRIVATE
65 
66 #include <netinet/in.h>
67 
68 /*
69  * Data structure to collect and display congestion control debug information
70  */
71 struct tcp_cc_debug_state {
72 	u_int64_t ccd_tsns;
73 	char ccd_srcaddr[INET6_ADDRSTRLEN];
74 	uint16_t ccd_srcport;
75 	char ccd_destaddr[INET6_ADDRSTRLEN];
76 	uint16_t ccd_destport;
77 	uint32_t ccd_snd_cwnd;
78 	uint32_t ccd_snd_wnd;
79 	uint32_t ccd_snd_ssthresh;
80 	uint32_t ccd_pipeack;
81 	uint32_t ccd_rttcur;
82 	uint32_t ccd_rxtcur;
83 	uint32_t ccd_srtt;
84 	uint32_t ccd_event;
85 	uint32_t ccd_sndcc;
86 	uint32_t ccd_sndhiwat;
87 	uint32_t ccd_bytes_acked;
88 	u_int8_t ccd_cc_index;
89 	u_int8_t ccd_unused_1__;
90 	u_int16_t ccd_unused_2__;
91 	union {
92 		struct {
93 			uint32_t ccd_last_max;
94 			uint32_t ccd_tcp_win;
95 			uint32_t ccd_target_win;
96 			uint32_t ccd_avg_lastmax;
97 			uint32_t ccd_mean_deviation;
98 		} cubic_state;
99 		struct {
100 			u_int32_t led_base_rtt;
101 		} ledbat_state;
102 	} u;
103 };
104 
105 /*
106  * Values of ccd_cc_index
107  */
108 #define TCP_CC_ALGO_NONE                0
109 #define TCP_CC_ALGO_NEWRENO_INDEX       1
110 #define TCP_CC_ALGO_BACKGROUND_INDEX    2 /* CC for background transport */
111 #define TCP_CC_ALGO_CUBIC_INDEX         3 /* default CC algorithm */
112 #define TCP_CC_ALGO_PRAGUE_INDEX        4 /* L4S CC algorithm */
113 #define TCP_CC_ALGO_COUNT               5 /* Count of CC algorithms */
114 
115 /*
116  * Values of ccd_event
117  */
118 #define TCP_CC_EVENT_LIST                       \
119 	X(TCP_CC_CWND_INIT)                     \
120 	X(TCP_CC_INSEQ_ACK_RCVD)                \
121 	X(TCP_CC_ACK_RCVD)                      \
122 	X(TCP_CC_ENTER_FASTRECOVERY)            \
123 	X(TCP_CC_IN_FASTRECOVERY)               \
124 	X(TCP_CC_EXIT_FASTRECOVERY)             \
125 	X(TCP_CC_PARTIAL_ACK)                   \
126 	X(TCP_CC_IDLE_TIMEOUT)                  \
127 	X(TCP_CC_REXMT_TIMEOUT)                 \
128 	X(TCP_CC_ECN_RCVD)                      \
129 	X(TCP_CC_BAD_REXMT_RECOVERY)            \
130 	X(TCP_CC_OUTPUT_ERROR)                  \
131 	X(TCP_CC_CHANGE_ALGO)                   \
132 	X(TCP_CC_FLOW_CONTROL)                  \
133 	X(TCP_CC_SUSPEND)                       \
134 	X(TCP_CC_LIMITED_TRANSMIT)              \
135 	X(TCP_CC_EARLY_RETRANSMIT)              \
136 	X(TCP_CC_TLP_RECOVERY)                  \
137 	X(TCP_CC_TLP_RECOVER_LASTPACKET)        \
138 	X(TCP_CC_DELAY_FASTRECOVERY)            \
139 	X(TCP_CC_TLP_IN_FASTRECOVERY)           \
140 	X(TCP_CC_DSACK_BAD_REXMT)               \
141 	X(TCP_CC_FIRST_REXMT)                   \
142 	X(TCP_CC_FLOW_CONGESTION_NOTIFIED)      \
143 	X(MAX_TCP_CC_EVENTS)
144 
145 enum tcp_cc_event {
146 #define X(name, ...) name,
147 	TCP_CC_EVENT_LIST
148 #undef X
149 };
150 
151 /*
152  * Kernel control ID
153  */
154 #define TCP_CC_CONTROL_NAME     "com.apple.network.tcp_ccdebug"
155 
156 #endif /* PRIVATE */
157 
158 #ifdef KERNEL_PRIVATE
159 
160 #include <netinet/tcp.h>
161 #include <netinet/tcp_var.h>
162 #include <kern/zalloc.h>
163 
164 /*
165  * Maximum characters in the name of a CC algorithm
166  */
167 #define TCP_CA_NAME_MAX 16
168 
169 extern int tcp_recv_bg;
170 extern uint32_t bg_ss_fltsz;
171 
172 /*
173  * Structure to hold definition various actions defined by a congestion
174  * control algorithm for TCP. This can be used to change the congestion
175  * control on a connection based on the user settings of priority of a
176  * connection.
177  */
178 struct tcp_cc_algo {
179 	char name[TCP_CA_NAME_MAX];
180 	_Atomic uint32_t num_sockets;
181 	uint32_t flags;
182 
183 	/* init the congestion algorithm for the specified control block */
184 	int (*init) (struct tcpcb *tp);
185 
186 	/*
187 	 * cleanup any state that is stored in the connection
188 	 * related to the algorithm
189 	 */
190 	int (*cleanup) (struct tcpcb *tp);
191 
192 	/* initialize cwnd at the start of a connection */
193 	void (*cwnd_init) (struct tcpcb *tp);
194 
195 	/*
196 	 * called on the receipt of in-sequence ack during congestion
197 	 * avoidance phase
198 	 */
199 	void (*congestion_avd) (struct tcpcb *tp, struct tcphdr *th);
200 
201 	/* called on the receipt of a valid ack */
202 	void (*ack_rcvd) (struct tcpcb *tp, struct tcphdr *th);
203 
204 	/* called before entering FR */
205 	void (*pre_fr) (struct tcpcb *tp);
206 
207 	/*  after exiting FR */
208 	void (*post_fr) (struct tcpcb *tp, struct tcphdr *th);
209 
210 	/* perform tasks when data transfer resumes after an idle period */
211 	void (*after_idle) (struct tcpcb *tp);
212 
213 	/* perform tasks when the connection's retransmit timer expires */
214 	void (*after_timeout) (struct tcpcb *tp);
215 
216 	/* Whether or not to delay the ack */
217 	int (*delay_ack)(struct tcpcb *tp, struct tcphdr *th);
218 
219 	/* called to process ECN markings, used by Prague only */
220 	void (*process_ecn) (struct tcpcb *tp, struct tcphdr *th, uint32_t new_bytes_marked, uint32_t packets_marked, uint32_t packets_acked);
221 
222 	/* called to set bytes acked in this ACK which are later update to exclude CE marked bytes */
223 	void (*set_bytes_acked) (struct tcpcb *tp, uint32_t acked);
224 
225 	/* Switch a connection to this CC algorithm after sending some packets */
226 	void (*switch_to)(struct tcpcb *tp);
227 } __attribute__((aligned(4)));
228 
229 extern struct tcp_cc_algo* tcp_cc_algo_list[TCP_CC_ALGO_COUNT];
230 
231 #define CC_ALGO(tp) (tcp_cc_algo_list[tp->tcp_cc_index])
232 #define TCP_CC_CWND_INIT_PKTS 10
233 #define TCP_CC_CWND_INIT_BYTES  4380
234 /*
235  * The congestion window will have to be reset after a
236  * non-validated period -- currently set to 3 minutes
237  */
238 #define TCP_CC_CWND_NONVALIDATED_PERIOD (3 * 60 * TCP_RETRANSHZ)
239 
240 /* Less than BE congestion control algo for receive window */
241 struct tcp_rcv_cc_algo {
242 	char name[TCP_CA_NAME_MAX];
243 	_Atomic uint32_t num_sockets;
244 	uint32_t flags;
245 
246 	/* init the congestion algorithm for the specified control block */
247 	void (*init) (struct tcpcb *tp);
248 
249 	/*
250 	 * cleanup any state that is stored in the connection
251 	 * related to the algorithm
252 	 */
253 	void (*cleanup) (struct tcpcb *tp);
254 
255 	/* initialize rwnd at the start of a connection */
256 	void (*rwnd_init) (struct tcpcb *tp);
257 
258 	/* called on the receipt of valid data */
259 	void (*data_rcvd) (struct tcpcb *tp, struct tcphdr *th,
260 	    struct tcpopt *to, uint32_t segment_len);
261 
262 	uint32_t (*get_rlwin) (struct tcpcb *tp);
263 
264 	/* perform tasks when data transfer resumes after an idle period */
265 	void (*after_idle) (struct tcpcb *tp);
266 
267 	/* called when we switch from foreground to background */
268 	void (*switch_to) (struct tcpcb *tp);
269 } __attribute__((aligned(4)));
270 
271 extern struct tcp_rcv_cc_algo tcp_cc_rledbat;
272 
273 extern void tcp_cc_init(void);
274 extern void tcp_cc_resize_sndbuf(struct tcpcb *tp);
275 extern void tcp_bad_rexmt_fix_sndbuf(struct tcpcb *tp);
276 extern void tcp_cc_cwnd_init_or_reset(struct tcpcb *tp);
277 extern int tcp_cc_delay_ack(struct tcpcb *tp, struct tcphdr *th);
278 extern void tcp_cc_allocate_state(struct tcpcb *tp);
279 extern uint32_t tcp_cc_is_cwnd_nonvalidated(struct tcpcb *tp);
280 extern void tcp_cc_adjust_nonvalidated_cwnd(struct tcpcb *tp);
281 extern u_int32_t tcp_get_max_pipeack(struct tcpcb *tp);
282 extern void tcp_clear_pipeack_state(struct tcpcb *tp);
283 
284 static inline uint32_t
tcp_initial_cwnd(struct tcpcb * tp)285 tcp_initial_cwnd(struct tcpcb *tp)
286 {
287 	return TCP_CC_CWND_INIT_PKTS * tp->t_maxseg;
288 }
289 
290 #endif /* KERNEL_PRIVATE */
291 #endif /* _NETINET_CC_H_ */
292