1 /*
2 * Copyright (c) 2010-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*-
29 * Copyright (c) 2008 Swinburne University of Technology, Melbourne, Australia
30 * All rights reserved.
31 *
32 * This software was developed at the Centre for Advanced Internet
33 * Architectures, Swinburne University, by Lawrence Stewart and James Healy,
34 * made possible in part by a grant from the Cisco University Research Program
35 * Fund at Community Foundation Silicon Valley.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 *
46 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
47 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
48 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
49 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
50 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
51 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
52 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
54 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
55 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
56 * SUCH DAMAGE.
57 *
58 * $FreeBSD$
59 */
60
61 #ifndef _NETINET_CC_H_
62 #define _NETINET_CC_H_
63
64 #ifdef PRIVATE
65
66 #include <netinet/in.h>
67
68 /*
69 * Data structure to collect and display congestion control debug information
70 */
71 struct tcp_cc_debug_state {
72 u_int64_t ccd_tsns;
73 char ccd_srcaddr[INET6_ADDRSTRLEN];
74 uint16_t ccd_srcport;
75 char ccd_destaddr[INET6_ADDRSTRLEN];
76 uint16_t ccd_destport;
77 uint32_t ccd_snd_cwnd;
78 uint32_t ccd_snd_wnd;
79 uint32_t ccd_snd_ssthresh;
80 uint32_t ccd_pipeack;
81 uint32_t ccd_rttcur;
82 uint32_t ccd_rxtcur;
83 uint32_t ccd_srtt;
84 uint32_t ccd_event;
85 uint32_t ccd_sndcc;
86 uint32_t ccd_sndhiwat;
87 uint32_t ccd_bytes_acked;
88 u_int8_t ccd_cc_index;
89 u_int8_t ccd_unused_1__;
90 u_int16_t ccd_unused_2__;
91 union {
92 struct {
93 uint32_t ccd_last_max;
94 uint32_t ccd_tcp_win;
95 uint32_t ccd_target_win;
96 uint32_t ccd_avg_lastmax;
97 uint32_t ccd_mean_deviation;
98 } cubic_state;
99 struct {
100 u_int32_t led_base_rtt;
101 } ledbat_state;
102 } u;
103 };
104
105 /*
106 * Values of ccd_cc_index
107 */
108 #define TCP_CC_ALGO_NONE 0
109 #define TCP_CC_ALGO_NEWRENO_INDEX 1
110 #define TCP_CC_ALGO_BACKGROUND_INDEX 2 /* CC for background transport */
111 #define TCP_CC_ALGO_CUBIC_INDEX 3 /* default CC algorithm */
112 #define TCP_CC_ALGO_PRAGUE_INDEX 4 /* L4S CC algorithm */
113 #define TCP_CC_ALGO_COUNT 5 /* Count of CC algorithms */
114
115 /*
116 * Values of ccd_event
117 */
118 #define TCP_CC_EVENT_LIST \
119 X(TCP_CC_CWND_INIT) \
120 X(TCP_CC_INSEQ_ACK_RCVD) \
121 X(TCP_CC_ACK_RCVD) \
122 X(TCP_CC_ENTER_FASTRECOVERY) \
123 X(TCP_CC_IN_FASTRECOVERY) \
124 X(TCP_CC_EXIT_FASTRECOVERY) \
125 X(TCP_CC_PARTIAL_ACK) \
126 X(TCP_CC_IDLE_TIMEOUT) \
127 X(TCP_CC_REXMT_TIMEOUT) \
128 X(TCP_CC_ECN_RCVD) \
129 X(TCP_CC_BAD_REXMT_RECOVERY) \
130 X(TCP_CC_OUTPUT_ERROR) \
131 X(TCP_CC_CHANGE_ALGO) \
132 X(TCP_CC_FLOW_CONTROL) \
133 X(TCP_CC_SUSPEND) \
134 X(TCP_CC_LIMITED_TRANSMIT) \
135 X(TCP_CC_EARLY_RETRANSMIT) \
136 X(TCP_CC_TLP_RECOVERY) \
137 X(TCP_CC_TLP_RECOVER_LASTPACKET) \
138 X(TCP_CC_DELAY_FASTRECOVERY) \
139 X(TCP_CC_TLP_IN_FASTRECOVERY) \
140 X(TCP_CC_DSACK_BAD_REXMT) \
141 X(TCP_CC_FIRST_REXMT) \
142 X(MAX_TCP_CC_EVENTS)
143
144 enum tcp_cc_event {
145 #define X(name, ...) name,
146 TCP_CC_EVENT_LIST
147 #undef X
148 };
149
150 /*
151 * Kernel control ID
152 */
153 #define TCP_CC_CONTROL_NAME "com.apple.network.tcp_ccdebug"
154
155 #endif /* PRIVATE */
156
157 #ifdef KERNEL_PRIVATE
158
159 #include <netinet/tcp.h>
160 #include <netinet/tcp_var.h>
161 #include <kern/zalloc.h>
162
163 /*
164 * Maximum characters in the name of a CC algorithm
165 */
166 #define TCP_CA_NAME_MAX 16
167
168 extern int tcp_recv_bg;
169 extern uint32_t bg_ss_fltsz;
170
171 /*
172 * Structure to hold definition various actions defined by a congestion
173 * control algorithm for TCP. This can be used to change the congestion
174 * control on a connection based on the user settings of priority of a
175 * connection.
176 */
177 struct tcp_cc_algo {
178 char name[TCP_CA_NAME_MAX];
179 _Atomic uint32_t num_sockets;
180 uint32_t flags;
181
182 /* init the congestion algorithm for the specified control block */
183 int (*init) (struct tcpcb *tp);
184
185 /*
186 * cleanup any state that is stored in the connection
187 * related to the algorithm
188 */
189 int (*cleanup) (struct tcpcb *tp);
190
191 /* initialize cwnd at the start of a connection */
192 void (*cwnd_init) (struct tcpcb *tp);
193
194 /*
195 * called on the receipt of in-sequence ack during congestion
196 * avoidance phase
197 */
198 void (*congestion_avd) (struct tcpcb *tp, struct tcphdr *th);
199
200 /* called on the receipt of a valid ack */
201 void (*ack_rcvd) (struct tcpcb *tp, struct tcphdr *th);
202
203 /* called before entering FR */
204 void (*pre_fr) (struct tcpcb *tp);
205
206 /* after exiting FR */
207 void (*post_fr) (struct tcpcb *tp, struct tcphdr *th);
208
209 /* perform tasks when data transfer resumes after an idle period */
210 void (*after_idle) (struct tcpcb *tp);
211
212 /* perform tasks when the connection's retransmit timer expires */
213 void (*after_timeout) (struct tcpcb *tp);
214
215 /* Whether or not to delay the ack */
216 int (*delay_ack)(struct tcpcb *tp, struct tcphdr *th);
217
218 /* called to process ECN markings, used by Prague only */
219 void (*process_ecn) (struct tcpcb *tp, struct tcphdr *th, uint32_t new_bytes_marked, uint32_t packets_marked, uint32_t packets_acked);
220
221 /* called to set bytes acked in this ACK which are later update to exclude CE marked bytes */
222 void (*set_bytes_acked) (struct tcpcb *tp, uint32_t acked);
223
224 /* Switch a connection to this CC algorithm after sending some packets */
225 void (*switch_to)(struct tcpcb *tp);
226 } __attribute__((aligned(4)));
227
228 extern struct tcp_cc_algo* tcp_cc_algo_list[TCP_CC_ALGO_COUNT];
229
230 #define CC_ALGO(tp) (tcp_cc_algo_list[tp->tcp_cc_index])
231 #define TCP_CC_CWND_INIT_PKTS 10
232 #define TCP_CC_CWND_INIT_BYTES 4380
233 /*
234 * The congestion window will have to be reset after a
235 * non-validated period -- currently set to 3 minutes
236 */
237 #define TCP_CC_CWND_NONVALIDATED_PERIOD (3 * 60 * TCP_RETRANSHZ)
238
239 /* Less than BE congestion control algo for receive window */
240 struct tcp_rcv_cc_algo {
241 char name[TCP_CA_NAME_MAX];
242 _Atomic uint32_t num_sockets;
243 uint32_t flags;
244
245 /* init the congestion algorithm for the specified control block */
246 void (*init) (struct tcpcb *tp);
247
248 /*
249 * cleanup any state that is stored in the connection
250 * related to the algorithm
251 */
252 void (*cleanup) (struct tcpcb *tp);
253
254 /* initialize rwnd at the start of a connection */
255 void (*rwnd_init) (struct tcpcb *tp);
256
257 /* called on the receipt of valid data */
258 void (*data_rcvd) (struct tcpcb *tp, struct tcphdr *th,
259 struct tcpopt *to, uint32_t segment_len);
260
261 uint32_t (*get_rlwin) (struct tcpcb *tp);
262
263 /* perform tasks when data transfer resumes after an idle period */
264 void (*after_idle) (struct tcpcb *tp);
265
266 /* called when we switch from foreground to background */
267 void (*switch_to) (struct tcpcb *tp);
268 } __attribute__((aligned(4)));
269
270 extern struct tcp_rcv_cc_algo tcp_cc_rledbat;
271
272 extern void tcp_cc_init(void);
273 extern void tcp_cc_resize_sndbuf(struct tcpcb *tp);
274 extern void tcp_bad_rexmt_fix_sndbuf(struct tcpcb *tp);
275 extern void tcp_cc_cwnd_init_or_reset(struct tcpcb *tp);
276 extern int tcp_cc_delay_ack(struct tcpcb *tp, struct tcphdr *th);
277 extern void tcp_cc_allocate_state(struct tcpcb *tp);
278 extern void tcp_cc_after_idle_stretchack(struct tcpcb *tp);
279 extern uint32_t tcp_cc_is_cwnd_nonvalidated(struct tcpcb *tp);
280 extern void tcp_cc_adjust_nonvalidated_cwnd(struct tcpcb *tp);
281 extern u_int32_t tcp_get_max_pipeack(struct tcpcb *tp);
282 extern void tcp_clear_pipeack_state(struct tcpcb *tp);
283
284 static inline uint32_t
tcp_initial_cwnd(struct tcpcb * tp)285 tcp_initial_cwnd(struct tcpcb *tp)
286 {
287 if (tcp_cubic_minor_fixes) {
288 return TCP_CC_CWND_INIT_PKTS * tp->t_maxseg;
289 } else {
290 return TCP_CC_CWND_INIT_BYTES;
291 }
292 }
293
294 #endif /* KERNEL_PRIVATE */
295 #endif /* _NETINET_CC_H_ */
296