1*5e3eaea3SApple OSS Distributions /*
2*5e3eaea3SApple OSS Distributions * Copyright (c) 2010-2021 Apple Inc. All rights reserved.
3*5e3eaea3SApple OSS Distributions *
4*5e3eaea3SApple OSS Distributions * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5*5e3eaea3SApple OSS Distributions *
6*5e3eaea3SApple OSS Distributions * This file contains Original Code and/or Modifications of Original Code
7*5e3eaea3SApple OSS Distributions * as defined in and that are subject to the Apple Public Source License
8*5e3eaea3SApple OSS Distributions * Version 2.0 (the 'License'). You may not use this file except in
9*5e3eaea3SApple OSS Distributions * compliance with the License. The rights granted to you under the License
10*5e3eaea3SApple OSS Distributions * may not be used to create, or enable the creation or redistribution of,
11*5e3eaea3SApple OSS Distributions * unlawful or unlicensed copies of an Apple operating system, or to
12*5e3eaea3SApple OSS Distributions * circumvent, violate, or enable the circumvention or violation of, any
13*5e3eaea3SApple OSS Distributions * terms of an Apple operating system software license agreement.
14*5e3eaea3SApple OSS Distributions *
15*5e3eaea3SApple OSS Distributions * Please obtain a copy of the License at
16*5e3eaea3SApple OSS Distributions * http://www.opensource.apple.com/apsl/ and read it before using this file.
17*5e3eaea3SApple OSS Distributions *
18*5e3eaea3SApple OSS Distributions * The Original Code and all software distributed under the License are
19*5e3eaea3SApple OSS Distributions * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20*5e3eaea3SApple OSS Distributions * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21*5e3eaea3SApple OSS Distributions * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22*5e3eaea3SApple OSS Distributions * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23*5e3eaea3SApple OSS Distributions * Please see the License for the specific language governing rights and
24*5e3eaea3SApple OSS Distributions * limitations under the License.
25*5e3eaea3SApple OSS Distributions *
26*5e3eaea3SApple OSS Distributions * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27*5e3eaea3SApple OSS Distributions */
28*5e3eaea3SApple OSS Distributions
29*5e3eaea3SApple OSS Distributions #include "tcp_includes.h"
30*5e3eaea3SApple OSS Distributions
31*5e3eaea3SApple OSS Distributions #include <sys/param.h>
32*5e3eaea3SApple OSS Distributions #include <sys/kernel.h>
33*5e3eaea3SApple OSS Distributions #include <sys/sysctl.h>
34*5e3eaea3SApple OSS Distributions
35*5e3eaea3SApple OSS Distributions #include <net/route.h>
36*5e3eaea3SApple OSS Distributions #include <netinet/in.h>
37*5e3eaea3SApple OSS Distributions #include <netinet/in_systm.h>
38*5e3eaea3SApple OSS Distributions #include <netinet/ip.h>
39*5e3eaea3SApple OSS Distributions #include <netinet/ip6.h>
40*5e3eaea3SApple OSS Distributions #include <netinet/ip_var.h>
41*5e3eaea3SApple OSS Distributions
42*5e3eaea3SApple OSS Distributions /* This file implements an alternate TCP congestion control algorithm
43*5e3eaea3SApple OSS Distributions * for background transport developed by LEDBAT working group at IETF and
44*5e3eaea3SApple OSS Distributions * described in draft: draft-ietf-ledbat-congestion-02
45*5e3eaea3SApple OSS Distributions *
46*5e3eaea3SApple OSS Distributions * Currently, it also implements LEDBAT++ as described in draft
47*5e3eaea3SApple OSS Distributions * draft-irtf-iccrg-ledbat-plus-plus-01.
48*5e3eaea3SApple OSS Distributions */
49*5e3eaea3SApple OSS Distributions
50*5e3eaea3SApple OSS Distributions #define GAIN_CONSTANT (16)
51*5e3eaea3SApple OSS Distributions #define DEFER_SLOWDOWN_DURATION (30 * 1000) /* 30s */
52*5e3eaea3SApple OSS Distributions
53*5e3eaea3SApple OSS Distributions int tcp_ledbat_init(struct tcpcb *tp);
54*5e3eaea3SApple OSS Distributions int tcp_ledbat_cleanup(struct tcpcb *tp);
55*5e3eaea3SApple OSS Distributions void tcp_ledbat_cwnd_init(struct tcpcb *tp);
56*5e3eaea3SApple OSS Distributions void tcp_ledbat_congestion_avd(struct tcpcb *tp, struct tcphdr *th);
57*5e3eaea3SApple OSS Distributions void tcp_ledbat_ack_rcvd(struct tcpcb *tp, struct tcphdr *th);
58*5e3eaea3SApple OSS Distributions static void ledbat_pp_ack_rcvd(struct tcpcb *tp, uint32_t bytes_acked);
59*5e3eaea3SApple OSS Distributions void tcp_ledbat_pre_fr(struct tcpcb *tp);
60*5e3eaea3SApple OSS Distributions void tcp_ledbat_post_fr(struct tcpcb *tp, struct tcphdr *th);
61*5e3eaea3SApple OSS Distributions void tcp_ledbat_after_idle(struct tcpcb *tp);
62*5e3eaea3SApple OSS Distributions void tcp_ledbat_after_timeout(struct tcpcb *tp);
63*5e3eaea3SApple OSS Distributions static int tcp_ledbat_delay_ack(struct tcpcb *tp, struct tcphdr *th);
64*5e3eaea3SApple OSS Distributions void tcp_ledbat_switch_cc(struct tcpcb *tp);
65*5e3eaea3SApple OSS Distributions
66*5e3eaea3SApple OSS Distributions struct tcp_cc_algo tcp_cc_ledbat = {
67*5e3eaea3SApple OSS Distributions .name = "ledbat",
68*5e3eaea3SApple OSS Distributions .init = tcp_ledbat_init,
69*5e3eaea3SApple OSS Distributions .cleanup = tcp_ledbat_cleanup,
70*5e3eaea3SApple OSS Distributions .cwnd_init = tcp_ledbat_cwnd_init,
71*5e3eaea3SApple OSS Distributions .congestion_avd = tcp_ledbat_congestion_avd,
72*5e3eaea3SApple OSS Distributions .ack_rcvd = tcp_ledbat_ack_rcvd,
73*5e3eaea3SApple OSS Distributions .pre_fr = tcp_ledbat_pre_fr,
74*5e3eaea3SApple OSS Distributions .post_fr = tcp_ledbat_post_fr,
75*5e3eaea3SApple OSS Distributions .after_idle = tcp_ledbat_after_idle,
76*5e3eaea3SApple OSS Distributions .after_timeout = tcp_ledbat_after_timeout,
77*5e3eaea3SApple OSS Distributions .delay_ack = tcp_ledbat_delay_ack,
78*5e3eaea3SApple OSS Distributions .switch_to = tcp_ledbat_switch_cc
79*5e3eaea3SApple OSS Distributions };
80*5e3eaea3SApple OSS Distributions
81*5e3eaea3SApple OSS Distributions static void
update_cwnd(struct tcpcb * tp,uint32_t update,bool is_incr)82*5e3eaea3SApple OSS Distributions update_cwnd(struct tcpcb *tp, uint32_t update, bool is_incr)
83*5e3eaea3SApple OSS Distributions {
84*5e3eaea3SApple OSS Distributions uint32_t max_allowed_cwnd = 0, flight_size = 0;
85*5e3eaea3SApple OSS Distributions uint32_t base_rtt = get_base_rtt(tp);
86*5e3eaea3SApple OSS Distributions uint32_t curr_rtt = tcp_use_min_curr_rtt ? tp->curr_rtt_min :
87*5e3eaea3SApple OSS Distributions tp->t_rttcur;
88*5e3eaea3SApple OSS Distributions
89*5e3eaea3SApple OSS Distributions /* If we do not have a good RTT measurement yet, increment
90*5e3eaea3SApple OSS Distributions * congestion window by the default value.
91*5e3eaea3SApple OSS Distributions */
92*5e3eaea3SApple OSS Distributions if (base_rtt == 0 || curr_rtt == 0) {
93*5e3eaea3SApple OSS Distributions tp->snd_cwnd += update;
94*5e3eaea3SApple OSS Distributions goto check_max;
95*5e3eaea3SApple OSS Distributions }
96*5e3eaea3SApple OSS Distributions
97*5e3eaea3SApple OSS Distributions if (curr_rtt <= (base_rtt + target_qdelay)) {
98*5e3eaea3SApple OSS Distributions /*
99*5e3eaea3SApple OSS Distributions * Delay decreased or remained the same, we can increase
100*5e3eaea3SApple OSS Distributions * the congestion window according to RFC 3465.
101*5e3eaea3SApple OSS Distributions *
102*5e3eaea3SApple OSS Distributions * Move background slow-start threshold to current
103*5e3eaea3SApple OSS Distributions * congestion window so that the next time (after some idle
104*5e3eaea3SApple OSS Distributions * period), we can attempt to do slow-start till here if there
105*5e3eaea3SApple OSS Distributions * is no increase in rtt
106*5e3eaea3SApple OSS Distributions */
107*5e3eaea3SApple OSS Distributions if (tp->bg_ssthresh < tp->snd_cwnd) {
108*5e3eaea3SApple OSS Distributions tp->bg_ssthresh = tp->snd_cwnd;
109*5e3eaea3SApple OSS Distributions }
110*5e3eaea3SApple OSS Distributions tp->snd_cwnd += update;
111*5e3eaea3SApple OSS Distributions tp->snd_cwnd = tcp_round_to(tp->snd_cwnd, tp->t_maxseg);
112*5e3eaea3SApple OSS Distributions } else {
113*5e3eaea3SApple OSS Distributions if (tcp_ledbat_plus_plus) {
114*5e3eaea3SApple OSS Distributions VERIFY(is_incr == false);
115*5e3eaea3SApple OSS Distributions tp->snd_cwnd -= update;
116*5e3eaea3SApple OSS Distributions } else {
117*5e3eaea3SApple OSS Distributions /* In response to an increase in rtt, reduce the congestion
118*5e3eaea3SApple OSS Distributions * window by one-eighth. This will help to yield immediately
119*5e3eaea3SApple OSS Distributions * to a competing stream.
120*5e3eaea3SApple OSS Distributions */
121*5e3eaea3SApple OSS Distributions uint32_t redwin;
122*5e3eaea3SApple OSS Distributions
123*5e3eaea3SApple OSS Distributions redwin = tp->snd_cwnd >> 3;
124*5e3eaea3SApple OSS Distributions tp->snd_cwnd -= redwin;
125*5e3eaea3SApple OSS Distributions }
126*5e3eaea3SApple OSS Distributions
127*5e3eaea3SApple OSS Distributions if (tp->snd_cwnd < bg_ss_fltsz * tp->t_maxseg) {
128*5e3eaea3SApple OSS Distributions tp->snd_cwnd = bg_ss_fltsz * tp->t_maxseg;
129*5e3eaea3SApple OSS Distributions }
130*5e3eaea3SApple OSS Distributions
131*5e3eaea3SApple OSS Distributions tp->snd_cwnd = tcp_round_to(tp->snd_cwnd, tp->t_maxseg);
132*5e3eaea3SApple OSS Distributions /* Lower background slow-start threshold so that the connection
133*5e3eaea3SApple OSS Distributions * will go into congestion avoidance phase
134*5e3eaea3SApple OSS Distributions */
135*5e3eaea3SApple OSS Distributions if (tp->bg_ssthresh > tp->snd_cwnd) {
136*5e3eaea3SApple OSS Distributions tp->bg_ssthresh = tp->snd_cwnd;
137*5e3eaea3SApple OSS Distributions }
138*5e3eaea3SApple OSS Distributions }
139*5e3eaea3SApple OSS Distributions check_max:
140*5e3eaea3SApple OSS Distributions if (!tcp_ledbat_plus_plus) {
141*5e3eaea3SApple OSS Distributions /* Calculate the outstanding flight size and restrict the
142*5e3eaea3SApple OSS Distributions * congestion window to a factor of flight size.
143*5e3eaea3SApple OSS Distributions */
144*5e3eaea3SApple OSS Distributions flight_size = tp->snd_max - tp->snd_una;
145*5e3eaea3SApple OSS Distributions
146*5e3eaea3SApple OSS Distributions max_allowed_cwnd = (tcp_ledbat_allowed_increase * tp->t_maxseg)
147*5e3eaea3SApple OSS Distributions + (flight_size << tcp_ledbat_tether_shift);
148*5e3eaea3SApple OSS Distributions tp->snd_cwnd = min(tp->snd_cwnd, max_allowed_cwnd);
149*5e3eaea3SApple OSS Distributions } else {
150*5e3eaea3SApple OSS Distributions tp->snd_cwnd = min(tp->snd_cwnd, TCP_MAXWIN << tp->snd_scale);
151*5e3eaea3SApple OSS Distributions }
152*5e3eaea3SApple OSS Distributions }
153*5e3eaea3SApple OSS Distributions
154*5e3eaea3SApple OSS Distributions static inline void
tcp_ledbat_clear_state(struct tcpcb * tp)155*5e3eaea3SApple OSS Distributions tcp_ledbat_clear_state(struct tcpcb *tp)
156*5e3eaea3SApple OSS Distributions {
157*5e3eaea3SApple OSS Distributions tp->t_ccstate->ledbat_slowdown_events = 0;
158*5e3eaea3SApple OSS Distributions tp->t_ccstate->ledbat_slowdown_ts = 0;
159*5e3eaea3SApple OSS Distributions tp->t_ccstate->ledbat_slowdown_begin = 0;
160*5e3eaea3SApple OSS Distributions tp->t_ccstate->ledbat_md_bytes_acked = 0;
161*5e3eaea3SApple OSS Distributions }
162*5e3eaea3SApple OSS Distributions
163*5e3eaea3SApple OSS Distributions int
tcp_ledbat_init(struct tcpcb * tp)164*5e3eaea3SApple OSS Distributions tcp_ledbat_init(struct tcpcb *tp)
165*5e3eaea3SApple OSS Distributions {
166*5e3eaea3SApple OSS Distributions os_atomic_inc(&tcp_cc_ledbat.num_sockets, relaxed);
167*5e3eaea3SApple OSS Distributions tcp_ledbat_clear_state(tp);
168*5e3eaea3SApple OSS Distributions return 0;
169*5e3eaea3SApple OSS Distributions }
170*5e3eaea3SApple OSS Distributions
171*5e3eaea3SApple OSS Distributions int
tcp_ledbat_cleanup(struct tcpcb * tp)172*5e3eaea3SApple OSS Distributions tcp_ledbat_cleanup(struct tcpcb *tp)
173*5e3eaea3SApple OSS Distributions {
174*5e3eaea3SApple OSS Distributions #pragma unused(tp)
175*5e3eaea3SApple OSS Distributions os_atomic_dec(&tcp_cc_ledbat.num_sockets, relaxed);
176*5e3eaea3SApple OSS Distributions return 0;
177*5e3eaea3SApple OSS Distributions }
178*5e3eaea3SApple OSS Distributions
179*5e3eaea3SApple OSS Distributions /*
180*5e3eaea3SApple OSS Distributions * Initialize the congestion window for a connection
181*5e3eaea3SApple OSS Distributions */
182*5e3eaea3SApple OSS Distributions void
tcp_ledbat_cwnd_init(struct tcpcb * tp)183*5e3eaea3SApple OSS Distributions tcp_ledbat_cwnd_init(struct tcpcb *tp)
184*5e3eaea3SApple OSS Distributions {
185*5e3eaea3SApple OSS Distributions tp->snd_cwnd = tp->t_maxseg * bg_ss_fltsz;
186*5e3eaea3SApple OSS Distributions tp->bg_ssthresh = tp->snd_ssthresh;
187*5e3eaea3SApple OSS Distributions }
188*5e3eaea3SApple OSS Distributions
189*5e3eaea3SApple OSS Distributions /* Function to handle an in-sequence ack which is fast-path processing
190*5e3eaea3SApple OSS Distributions * of an in sequence ack in tcp_input function (called as header prediction).
191*5e3eaea3SApple OSS Distributions * This gets called only during congestion avoidance phase.
192*5e3eaea3SApple OSS Distributions */
193*5e3eaea3SApple OSS Distributions void
tcp_ledbat_congestion_avd(struct tcpcb * tp,struct tcphdr * th)194*5e3eaea3SApple OSS Distributions tcp_ledbat_congestion_avd(struct tcpcb *tp, struct tcphdr *th)
195*5e3eaea3SApple OSS Distributions {
196*5e3eaea3SApple OSS Distributions int acked = 0;
197*5e3eaea3SApple OSS Distributions uint32_t incr = 0;
198*5e3eaea3SApple OSS Distributions
199*5e3eaea3SApple OSS Distributions acked = BYTES_ACKED(th, tp);
200*5e3eaea3SApple OSS Distributions
201*5e3eaea3SApple OSS Distributions if (tcp_ledbat_plus_plus) {
202*5e3eaea3SApple OSS Distributions ledbat_pp_ack_rcvd(tp, acked);
203*5e3eaea3SApple OSS Distributions } else {
204*5e3eaea3SApple OSS Distributions tp->t_bytes_acked += acked;
205*5e3eaea3SApple OSS Distributions if (tp->t_bytes_acked > tp->snd_cwnd) {
206*5e3eaea3SApple OSS Distributions tp->t_bytes_acked -= tp->snd_cwnd;
207*5e3eaea3SApple OSS Distributions incr = tp->t_maxseg;
208*5e3eaea3SApple OSS Distributions }
209*5e3eaea3SApple OSS Distributions
210*5e3eaea3SApple OSS Distributions if (tp->snd_cwnd < tp->snd_wnd && incr > 0) {
211*5e3eaea3SApple OSS Distributions update_cwnd(tp, incr, true);
212*5e3eaea3SApple OSS Distributions }
213*5e3eaea3SApple OSS Distributions }
214*5e3eaea3SApple OSS Distributions }
215*5e3eaea3SApple OSS Distributions
216*5e3eaea3SApple OSS Distributions /*
217*5e3eaea3SApple OSS Distributions * Compute the denominator
218*5e3eaea3SApple OSS Distributions * MIN(16, ceil(2 * TARGET / base))
219*5e3eaea3SApple OSS Distributions */
220*5e3eaea3SApple OSS Distributions static uint32_t
ledbat_gain(uint32_t base_rtt)221*5e3eaea3SApple OSS Distributions ledbat_gain(uint32_t base_rtt)
222*5e3eaea3SApple OSS Distributions {
223*5e3eaea3SApple OSS Distributions return MIN(GAIN_CONSTANT, tcp_ceil(2 * target_qdelay /
224*5e3eaea3SApple OSS Distributions (double)base_rtt));
225*5e3eaea3SApple OSS Distributions }
226*5e3eaea3SApple OSS Distributions
227*5e3eaea3SApple OSS Distributions /*
228*5e3eaea3SApple OSS Distributions * Congestion avoidance for ledbat++
229*5e3eaea3SApple OSS Distributions */
230*5e3eaea3SApple OSS Distributions static void
ledbat_pp_congestion_avd(struct tcpcb * tp,uint32_t bytes_acked,uint32_t base_rtt,uint32_t curr_rtt,uint32_t now)231*5e3eaea3SApple OSS Distributions ledbat_pp_congestion_avd(struct tcpcb *tp, uint32_t bytes_acked,
232*5e3eaea3SApple OSS Distributions uint32_t base_rtt, uint32_t curr_rtt, uint32_t now)
233*5e3eaea3SApple OSS Distributions {
234*5e3eaea3SApple OSS Distributions uint32_t update = 0;
235*5e3eaea3SApple OSS Distributions /*
236*5e3eaea3SApple OSS Distributions * Set the next slowdown time i.e. 9 times the duration
237*5e3eaea3SApple OSS Distributions * of previous slowdown except the initial slowdown.
238*5e3eaea3SApple OSS Distributions */
239*5e3eaea3SApple OSS Distributions if (tp->t_ccstate->ledbat_slowdown_ts == 0) {
240*5e3eaea3SApple OSS Distributions uint32_t slowdown_duration = 0;
241*5e3eaea3SApple OSS Distributions if (tp->t_ccstate->ledbat_slowdown_events > 0) {
242*5e3eaea3SApple OSS Distributions slowdown_duration = now -
243*5e3eaea3SApple OSS Distributions tp->t_ccstate->ledbat_slowdown_begin;
244*5e3eaea3SApple OSS Distributions
245*5e3eaea3SApple OSS Distributions if (tp->bg_ssthresh > tp->snd_cwnd) {
246*5e3eaea3SApple OSS Distributions /*
247*5e3eaea3SApple OSS Distributions * Special case for slowdowns (other than initial)
248*5e3eaea3SApple OSS Distributions * where cwnd doesn't recover fully to previous
249*5e3eaea3SApple OSS Distributions * ssthresh
250*5e3eaea3SApple OSS Distributions */
251*5e3eaea3SApple OSS Distributions slowdown_duration *= 2;
252*5e3eaea3SApple OSS Distributions }
253*5e3eaea3SApple OSS Distributions }
254*5e3eaea3SApple OSS Distributions tp->t_ccstate->ledbat_slowdown_ts = now + (9 * slowdown_duration);
255*5e3eaea3SApple OSS Distributions if (slowdown_duration == 0) {
256*5e3eaea3SApple OSS Distributions tp->t_ccstate->ledbat_slowdown_ts += (2 * (tp->t_srtt >> TCP_RTT_SHIFT));
257*5e3eaea3SApple OSS Distributions }
258*5e3eaea3SApple OSS Distributions /* Reset the start */
259*5e3eaea3SApple OSS Distributions tp->t_ccstate->ledbat_slowdown_begin = 0;
260*5e3eaea3SApple OSS Distributions
261*5e3eaea3SApple OSS Distributions /* On exit slow start due to higher qdelay, cap the ssthresh */
262*5e3eaea3SApple OSS Distributions if (tp->bg_ssthresh > tp->snd_cwnd) {
263*5e3eaea3SApple OSS Distributions tp->bg_ssthresh = tp->snd_cwnd;
264*5e3eaea3SApple OSS Distributions }
265*5e3eaea3SApple OSS Distributions }
266*5e3eaea3SApple OSS Distributions
267*5e3eaea3SApple OSS Distributions if (curr_rtt <= base_rtt + target_qdelay) {
268*5e3eaea3SApple OSS Distributions /* Additive increase */
269*5e3eaea3SApple OSS Distributions tp->t_bytes_acked += bytes_acked;
270*5e3eaea3SApple OSS Distributions if (tp->t_bytes_acked >= tp->snd_cwnd) {
271*5e3eaea3SApple OSS Distributions update = tp->t_maxseg;
272*5e3eaea3SApple OSS Distributions tp->t_bytes_acked -= tp->snd_cwnd;
273*5e3eaea3SApple OSS Distributions update_cwnd(tp, update, true);
274*5e3eaea3SApple OSS Distributions }
275*5e3eaea3SApple OSS Distributions } else {
276*5e3eaea3SApple OSS Distributions /*
277*5e3eaea3SApple OSS Distributions * Multiplicative decrease
278*5e3eaea3SApple OSS Distributions * W -= min(W * (qdelay/target - 1), W/2) (per RTT)
279*5e3eaea3SApple OSS Distributions * To calculate per bytes acked, it becomes
280*5e3eaea3SApple OSS Distributions * W -= min((qdelay/target - 1), 1/2) * bytes_acked
281*5e3eaea3SApple OSS Distributions */
282*5e3eaea3SApple OSS Distributions uint32_t qdelay = curr_rtt > base_rtt ?
283*5e3eaea3SApple OSS Distributions (curr_rtt - base_rtt) : 0;
284*5e3eaea3SApple OSS Distributions
285*5e3eaea3SApple OSS Distributions tp->t_ccstate->ledbat_md_bytes_acked += bytes_acked;
286*5e3eaea3SApple OSS Distributions if (tp->t_ccstate->ledbat_md_bytes_acked >= tp->snd_cwnd) {
287*5e3eaea3SApple OSS Distributions update = (uint32_t)(MIN(((double)qdelay / target_qdelay - 1), 0.5) *
288*5e3eaea3SApple OSS Distributions (double)tp->snd_cwnd);
289*5e3eaea3SApple OSS Distributions tp->t_ccstate->ledbat_md_bytes_acked -= tp->snd_cwnd;
290*5e3eaea3SApple OSS Distributions update_cwnd(tp, update, false);
291*5e3eaea3SApple OSS Distributions
292*5e3eaea3SApple OSS Distributions if (tp->t_ccstate->ledbat_slowdown_ts != 0) {
293*5e3eaea3SApple OSS Distributions /* As the window has been reduced, defer the slowdown. */
294*5e3eaea3SApple OSS Distributions tp->t_ccstate->ledbat_slowdown_ts = now + DEFER_SLOWDOWN_DURATION;
295*5e3eaea3SApple OSS Distributions }
296*5e3eaea3SApple OSS Distributions }
297*5e3eaea3SApple OSS Distributions }
298*5e3eaea3SApple OSS Distributions }
299*5e3eaea3SApple OSS Distributions
300*5e3eaea3SApple OSS Distributions /*
301*5e3eaea3SApple OSS Distributions * Different handling for ack received for ledbat++
302*5e3eaea3SApple OSS Distributions */
303*5e3eaea3SApple OSS Distributions static void
ledbat_pp_ack_rcvd(struct tcpcb * tp,uint32_t bytes_acked)304*5e3eaea3SApple OSS Distributions ledbat_pp_ack_rcvd(struct tcpcb *tp, uint32_t bytes_acked)
305*5e3eaea3SApple OSS Distributions {
306*5e3eaea3SApple OSS Distributions uint32_t update = 0;
307*5e3eaea3SApple OSS Distributions const uint32_t base_rtt = get_base_rtt(tp);
308*5e3eaea3SApple OSS Distributions const uint32_t curr_rtt = tcp_use_min_curr_rtt ? tp->curr_rtt_min :
309*5e3eaea3SApple OSS Distributions tp->t_rttcur;
310*5e3eaea3SApple OSS Distributions const uint32_t ss_target = (uint32_t)(3 * target_qdelay / 4);
311*5e3eaea3SApple OSS Distributions struct tcp_globals *globals = tcp_get_globals(tp);
312*5e3eaea3SApple OSS Distributions
313*5e3eaea3SApple OSS Distributions /*
314*5e3eaea3SApple OSS Distributions * Slowdown period - first slowdown
315*5e3eaea3SApple OSS Distributions * is 2RTT after we exit initial slow start.
316*5e3eaea3SApple OSS Distributions * Subsequent slowdowns are after 9 times the
317*5e3eaea3SApple OSS Distributions * previous slow down durations.
318*5e3eaea3SApple OSS Distributions */
319*5e3eaea3SApple OSS Distributions if (tp->t_ccstate->ledbat_slowdown_ts != 0 &&
320*5e3eaea3SApple OSS Distributions tcp_globals_now(globals) >= tp->t_ccstate->ledbat_slowdown_ts) {
321*5e3eaea3SApple OSS Distributions if (tp->t_ccstate->ledbat_slowdown_begin == 0) {
322*5e3eaea3SApple OSS Distributions tp->t_ccstate->ledbat_slowdown_begin = tcp_globals_now(globals);
323*5e3eaea3SApple OSS Distributions tp->t_ccstate->ledbat_slowdown_events++;
324*5e3eaea3SApple OSS Distributions }
325*5e3eaea3SApple OSS Distributions if (tcp_globals_now(globals) < tp->t_ccstate->ledbat_slowdown_ts +
326*5e3eaea3SApple OSS Distributions (2 * (tp->t_srtt >> TCP_RTT_SHIFT))) {
327*5e3eaea3SApple OSS Distributions // Set cwnd to 2 packets and return
328*5e3eaea3SApple OSS Distributions if (tp->snd_cwnd > bg_ss_fltsz * tp->t_maxseg) {
329*5e3eaea3SApple OSS Distributions if (tp->bg_ssthresh < tp->snd_cwnd) {
330*5e3eaea3SApple OSS Distributions tp->bg_ssthresh = tp->snd_cwnd;
331*5e3eaea3SApple OSS Distributions }
332*5e3eaea3SApple OSS Distributions tp->snd_cwnd = bg_ss_fltsz * tp->t_maxseg;
333*5e3eaea3SApple OSS Distributions /* Reset total bytes acked */
334*5e3eaea3SApple OSS Distributions tp->t_bytes_acked = 0;
335*5e3eaea3SApple OSS Distributions }
336*5e3eaea3SApple OSS Distributions return;
337*5e3eaea3SApple OSS Distributions }
338*5e3eaea3SApple OSS Distributions }
339*5e3eaea3SApple OSS Distributions
340*5e3eaea3SApple OSS Distributions if (curr_rtt == 0 || base_rtt == 0) {
341*5e3eaea3SApple OSS Distributions update = MIN(bytes_acked, TCP_CC_CWND_INIT_PKTS *
342*5e3eaea3SApple OSS Distributions tp->t_maxseg);
343*5e3eaea3SApple OSS Distributions update_cwnd(tp, update, true);
344*5e3eaea3SApple OSS Distributions } else if (tp->snd_cwnd < tp->bg_ssthresh &&
345*5e3eaea3SApple OSS Distributions ((tp->t_ccstate->ledbat_slowdown_events > 0 &&
346*5e3eaea3SApple OSS Distributions curr_rtt <= (base_rtt + target_qdelay)) ||
347*5e3eaea3SApple OSS Distributions curr_rtt <= (base_rtt + ss_target))) {
348*5e3eaea3SApple OSS Distributions /*
349*5e3eaea3SApple OSS Distributions * Modified slow start with a dynamic GAIN
350*5e3eaea3SApple OSS Distributions * If the queuing delay is larger than 3/4 of the target
351*5e3eaea3SApple OSS Distributions * delay, exit slow start, iff, it is the initial slow start.
352*5e3eaea3SApple OSS Distributions * After the initial slow start, during CA, window growth
353*5e3eaea3SApple OSS Distributions * will be bound by ssthresh.
354*5e3eaea3SApple OSS Distributions */
355*5e3eaea3SApple OSS Distributions tp->t_bytes_acked += bytes_acked;
356*5e3eaea3SApple OSS Distributions uint32_t gain_factor = ledbat_gain(base_rtt);
357*5e3eaea3SApple OSS Distributions if (tp->t_bytes_acked >= tp->t_maxseg * gain_factor) {
358*5e3eaea3SApple OSS Distributions update = MIN(tp->t_bytes_acked / gain_factor,
359*5e3eaea3SApple OSS Distributions TCP_CC_CWND_INIT_PKTS * tp->t_maxseg);
360*5e3eaea3SApple OSS Distributions tp->t_bytes_acked = 0;
361*5e3eaea3SApple OSS Distributions update_cwnd(tp, update, true);
362*5e3eaea3SApple OSS Distributions }
363*5e3eaea3SApple OSS Distributions
364*5e3eaea3SApple OSS Distributions /* Reset the next slowdown timestamp */
365*5e3eaea3SApple OSS Distributions if (tp->t_ccstate->ledbat_slowdown_ts != 0) {
366*5e3eaea3SApple OSS Distributions tp->t_ccstate->ledbat_slowdown_ts = 0;
367*5e3eaea3SApple OSS Distributions }
368*5e3eaea3SApple OSS Distributions } else {
369*5e3eaea3SApple OSS Distributions /* Congestion avoidance */
370*5e3eaea3SApple OSS Distributions ledbat_pp_congestion_avd(tp, bytes_acked, base_rtt, curr_rtt, tcp_globals_now(globals));
371*5e3eaea3SApple OSS Distributions }
372*5e3eaea3SApple OSS Distributions }
373*5e3eaea3SApple OSS Distributions
374*5e3eaea3SApple OSS Distributions /* Function to process an ack.
375*5e3eaea3SApple OSS Distributions */
376*5e3eaea3SApple OSS Distributions void
tcp_ledbat_ack_rcvd(struct tcpcb * tp,struct tcphdr * th)377*5e3eaea3SApple OSS Distributions tcp_ledbat_ack_rcvd(struct tcpcb *tp, struct tcphdr *th)
378*5e3eaea3SApple OSS Distributions {
379*5e3eaea3SApple OSS Distributions /*
380*5e3eaea3SApple OSS Distributions * RFC 3465 - Appropriate Byte Counting.
381*5e3eaea3SApple OSS Distributions *
382*5e3eaea3SApple OSS Distributions * If the window is currently less than ssthresh,
383*5e3eaea3SApple OSS Distributions * open the window by the number of bytes ACKed by
384*5e3eaea3SApple OSS Distributions * the last ACK, however clamp the window increase
385*5e3eaea3SApple OSS Distributions * to an upper limit "L".
386*5e3eaea3SApple OSS Distributions *
387*5e3eaea3SApple OSS Distributions * In congestion avoidance phase, open the window by
388*5e3eaea3SApple OSS Distributions * one segment each time "bytes_acked" grows to be
389*5e3eaea3SApple OSS Distributions * greater than or equal to the congestion window.
390*5e3eaea3SApple OSS Distributions */
391*5e3eaea3SApple OSS Distributions
392*5e3eaea3SApple OSS Distributions uint32_t cw = tp->snd_cwnd;
393*5e3eaea3SApple OSS Distributions uint32_t incr = tp->t_maxseg;
394*5e3eaea3SApple OSS Distributions uint32_t acked = 0;
395*5e3eaea3SApple OSS Distributions
396*5e3eaea3SApple OSS Distributions acked = BYTES_ACKED(th, tp);
397*5e3eaea3SApple OSS Distributions
398*5e3eaea3SApple OSS Distributions if (tcp_ledbat_plus_plus) {
399*5e3eaea3SApple OSS Distributions ledbat_pp_ack_rcvd(tp, acked);
400*5e3eaea3SApple OSS Distributions return;
401*5e3eaea3SApple OSS Distributions }
402*5e3eaea3SApple OSS Distributions
403*5e3eaea3SApple OSS Distributions tp->t_bytes_acked += acked;
404*5e3eaea3SApple OSS Distributions
405*5e3eaea3SApple OSS Distributions if (cw >= tp->bg_ssthresh) {
406*5e3eaea3SApple OSS Distributions /* congestion-avoidance */
407*5e3eaea3SApple OSS Distributions if (tp->t_bytes_acked < cw) {
408*5e3eaea3SApple OSS Distributions /* No need to increase yet. */
409*5e3eaea3SApple OSS Distributions incr = 0;
410*5e3eaea3SApple OSS Distributions }
411*5e3eaea3SApple OSS Distributions } else {
412*5e3eaea3SApple OSS Distributions /*
413*5e3eaea3SApple OSS Distributions * If the user explicitly enables RFC3465
414*5e3eaea3SApple OSS Distributions * use 2*SMSS for the "L" param. Otherwise
415*5e3eaea3SApple OSS Distributions * use the more conservative 1*SMSS.
416*5e3eaea3SApple OSS Distributions *
417*5e3eaea3SApple OSS Distributions * (See RFC 3465 2.3 Choosing the Limit)
418*5e3eaea3SApple OSS Distributions */
419*5e3eaea3SApple OSS Distributions u_int abc_lim;
420*5e3eaea3SApple OSS Distributions
421*5e3eaea3SApple OSS Distributions abc_lim = (tp->snd_nxt == tp->snd_max) ? incr * 2 : incr;
422*5e3eaea3SApple OSS Distributions
423*5e3eaea3SApple OSS Distributions incr = ulmin(acked, abc_lim);
424*5e3eaea3SApple OSS Distributions }
425*5e3eaea3SApple OSS Distributions if (tp->t_bytes_acked >= cw) {
426*5e3eaea3SApple OSS Distributions tp->t_bytes_acked -= cw;
427*5e3eaea3SApple OSS Distributions }
428*5e3eaea3SApple OSS Distributions if (incr > 0) {
429*5e3eaea3SApple OSS Distributions update_cwnd(tp, incr, true);
430*5e3eaea3SApple OSS Distributions }
431*5e3eaea3SApple OSS Distributions }
432*5e3eaea3SApple OSS Distributions
433*5e3eaea3SApple OSS Distributions void
tcp_ledbat_pre_fr(struct tcpcb * tp)434*5e3eaea3SApple OSS Distributions tcp_ledbat_pre_fr(struct tcpcb *tp)
435*5e3eaea3SApple OSS Distributions {
436*5e3eaea3SApple OSS Distributions uint32_t win = min(tp->snd_wnd, tp->snd_cwnd);
437*5e3eaea3SApple OSS Distributions
438*5e3eaea3SApple OSS Distributions if (tp->t_flagsext & TF_CWND_NONVALIDATED) {
439*5e3eaea3SApple OSS Distributions tp->t_lossflightsize = tp->snd_max - tp->snd_una;
440*5e3eaea3SApple OSS Distributions win = max(tp->t_pipeack, tp->t_lossflightsize);
441*5e3eaea3SApple OSS Distributions } else {
442*5e3eaea3SApple OSS Distributions tp->t_lossflightsize = 0;
443*5e3eaea3SApple OSS Distributions }
444*5e3eaea3SApple OSS Distributions
445*5e3eaea3SApple OSS Distributions win = win / 2;
446*5e3eaea3SApple OSS Distributions win = tcp_round_to(win, tp->t_maxseg);
447*5e3eaea3SApple OSS Distributions if (win < 2 * tp->t_maxseg) {
448*5e3eaea3SApple OSS Distributions win = 2 * tp->t_maxseg;
449*5e3eaea3SApple OSS Distributions }
450*5e3eaea3SApple OSS Distributions tp->snd_ssthresh = win;
451*5e3eaea3SApple OSS Distributions if (tp->bg_ssthresh > tp->snd_ssthresh) {
452*5e3eaea3SApple OSS Distributions tp->bg_ssthresh = tp->snd_ssthresh;
453*5e3eaea3SApple OSS Distributions }
454*5e3eaea3SApple OSS Distributions
455*5e3eaea3SApple OSS Distributions tcp_cc_resize_sndbuf(tp);
456*5e3eaea3SApple OSS Distributions }
457*5e3eaea3SApple OSS Distributions
458*5e3eaea3SApple OSS Distributions void
tcp_ledbat_post_fr(struct tcpcb * tp,struct tcphdr * th)459*5e3eaea3SApple OSS Distributions tcp_ledbat_post_fr(struct tcpcb *tp, struct tcphdr *th)
460*5e3eaea3SApple OSS Distributions {
461*5e3eaea3SApple OSS Distributions int32_t ss;
462*5e3eaea3SApple OSS Distributions
463*5e3eaea3SApple OSS Distributions if (th) {
464*5e3eaea3SApple OSS Distributions ss = tp->snd_max - th->th_ack;
465*5e3eaea3SApple OSS Distributions } else {
466*5e3eaea3SApple OSS Distributions ss = tp->snd_max - tp->snd_una;
467*5e3eaea3SApple OSS Distributions }
468*5e3eaea3SApple OSS Distributions
469*5e3eaea3SApple OSS Distributions /*
470*5e3eaea3SApple OSS Distributions * Complete ack. Inflate the congestion window to
471*5e3eaea3SApple OSS Distributions * ssthresh and exit fast recovery.
472*5e3eaea3SApple OSS Distributions *
473*5e3eaea3SApple OSS Distributions * Window inflation should have left us with approx.
474*5e3eaea3SApple OSS Distributions * snd_ssthresh outstanding data. But in case we
475*5e3eaea3SApple OSS Distributions * would be inclined to send a burst, better to do
476*5e3eaea3SApple OSS Distributions * it via the slow start mechanism.
477*5e3eaea3SApple OSS Distributions *
478*5e3eaea3SApple OSS Distributions * If the flight size is zero, then make congestion
479*5e3eaea3SApple OSS Distributions * window to be worth at least 2 segments to avoid
480*5e3eaea3SApple OSS Distributions * delayed acknowledgement (draft-ietf-tcpm-rfc3782-bis-05).
481*5e3eaea3SApple OSS Distributions */
482*5e3eaea3SApple OSS Distributions if (ss < (int32_t)tp->snd_ssthresh) {
483*5e3eaea3SApple OSS Distributions tp->snd_cwnd = max(ss, tp->t_maxseg) + tp->t_maxseg;
484*5e3eaea3SApple OSS Distributions } else {
485*5e3eaea3SApple OSS Distributions tp->snd_cwnd = tp->snd_ssthresh;
486*5e3eaea3SApple OSS Distributions }
487*5e3eaea3SApple OSS Distributions tp->t_bytes_acked = 0;
488*5e3eaea3SApple OSS Distributions tp->t_ccstate->ledbat_md_bytes_acked = 0;
489*5e3eaea3SApple OSS Distributions }
490*5e3eaea3SApple OSS Distributions
491*5e3eaea3SApple OSS Distributions /*
492*5e3eaea3SApple OSS Distributions * Function to handle connections that have been idle for
493*5e3eaea3SApple OSS Distributions * some time. Slow start to get ack "clock" running again.
494*5e3eaea3SApple OSS Distributions * Clear base history after idle time.
495*5e3eaea3SApple OSS Distributions */
496*5e3eaea3SApple OSS Distributions void
tcp_ledbat_after_idle(struct tcpcb * tp)497*5e3eaea3SApple OSS Distributions tcp_ledbat_after_idle(struct tcpcb *tp)
498*5e3eaea3SApple OSS Distributions {
499*5e3eaea3SApple OSS Distributions tcp_ledbat_clear_state(tp);
500*5e3eaea3SApple OSS Distributions /* Reset the congestion window */
501*5e3eaea3SApple OSS Distributions tp->snd_cwnd = tp->t_maxseg * bg_ss_fltsz;
502*5e3eaea3SApple OSS Distributions tp->t_bytes_acked = 0;
503*5e3eaea3SApple OSS Distributions tp->t_ccstate->ledbat_md_bytes_acked = 0;
504*5e3eaea3SApple OSS Distributions }
505*5e3eaea3SApple OSS Distributions
506*5e3eaea3SApple OSS Distributions /* Function to change the congestion window when the retransmit
507*5e3eaea3SApple OSS Distributions * timer fires. The behavior is the same as that for best-effort
508*5e3eaea3SApple OSS Distributions * TCP, reduce congestion window to one segment and start probing
509*5e3eaea3SApple OSS Distributions * the link using "slow start". The slow start threshold is set
510*5e3eaea3SApple OSS Distributions * to half of the current window. Lower the background slow start
511*5e3eaea3SApple OSS Distributions * threshold also.
512*5e3eaea3SApple OSS Distributions */
513*5e3eaea3SApple OSS Distributions void
tcp_ledbat_after_timeout(struct tcpcb * tp)514*5e3eaea3SApple OSS Distributions tcp_ledbat_after_timeout(struct tcpcb *tp)
515*5e3eaea3SApple OSS Distributions {
516*5e3eaea3SApple OSS Distributions if (tp->t_state >= TCPS_ESTABLISHED) {
517*5e3eaea3SApple OSS Distributions tcp_ledbat_clear_state(tp);
518*5e3eaea3SApple OSS Distributions tcp_ledbat_pre_fr(tp);
519*5e3eaea3SApple OSS Distributions tp->snd_cwnd = tp->t_maxseg;
520*5e3eaea3SApple OSS Distributions }
521*5e3eaea3SApple OSS Distributions }
522*5e3eaea3SApple OSS Distributions
523*5e3eaea3SApple OSS Distributions /*
524*5e3eaea3SApple OSS Distributions * Indicate whether this ack should be delayed.
525*5e3eaea3SApple OSS Distributions * We can delay the ack if:
526*5e3eaea3SApple OSS Distributions * - our last ack wasn't a 0-sized window.
527*5e3eaea3SApple OSS Distributions * - the peer hasn't sent us a TH_PUSH data packet: if he did, take this
528*5e3eaea3SApple OSS Distributions * as a clue that we need to ACK without any delay. This helps higher
529*5e3eaea3SApple OSS Distributions * level protocols who won't send us more data even if the window is
530*5e3eaea3SApple OSS Distributions * open because their last "segment" hasn't been ACKed
531*5e3eaea3SApple OSS Distributions * Otherwise the receiver will ack every other full-sized segment or when the
532*5e3eaea3SApple OSS Distributions * delayed ack timer fires. This will help to generate better rtt estimates for
533*5e3eaea3SApple OSS Distributions * the other end if it is a ledbat sender.
534*5e3eaea3SApple OSS Distributions *
535*5e3eaea3SApple OSS Distributions */
536*5e3eaea3SApple OSS Distributions
537*5e3eaea3SApple OSS Distributions static int
tcp_ledbat_delay_ack(struct tcpcb * tp,struct tcphdr * th)538*5e3eaea3SApple OSS Distributions tcp_ledbat_delay_ack(struct tcpcb *tp, struct tcphdr *th)
539*5e3eaea3SApple OSS Distributions {
540*5e3eaea3SApple OSS Distributions if (tcp_ack_strategy == TCP_ACK_STRATEGY_MODERN) {
541*5e3eaea3SApple OSS Distributions return tcp_cc_delay_ack(tp, th);
542*5e3eaea3SApple OSS Distributions } else {
543*5e3eaea3SApple OSS Distributions if ((tp->t_flags & TF_RXWIN0SENT) == 0 &&
544*5e3eaea3SApple OSS Distributions (th->th_flags & TH_PUSH) == 0 && (tp->t_unacksegs == 1)) {
545*5e3eaea3SApple OSS Distributions return 1;
546*5e3eaea3SApple OSS Distributions }
547*5e3eaea3SApple OSS Distributions return 0;
548*5e3eaea3SApple OSS Distributions }
549*5e3eaea3SApple OSS Distributions }
550*5e3eaea3SApple OSS Distributions
551*5e3eaea3SApple OSS Distributions /* Change a connection to use ledbat. First, lower bg_ssthresh value
552*5e3eaea3SApple OSS Distributions * if it needs to be.
553*5e3eaea3SApple OSS Distributions */
554*5e3eaea3SApple OSS Distributions void
tcp_ledbat_switch_cc(struct tcpcb * tp)555*5e3eaea3SApple OSS Distributions tcp_ledbat_switch_cc(struct tcpcb *tp)
556*5e3eaea3SApple OSS Distributions {
557*5e3eaea3SApple OSS Distributions uint32_t cwnd;
558*5e3eaea3SApple OSS Distributions
559*5e3eaea3SApple OSS Distributions tcp_ledbat_clear_state(tp);
560*5e3eaea3SApple OSS Distributions
561*5e3eaea3SApple OSS Distributions if (tp->bg_ssthresh == 0 || tp->bg_ssthresh > tp->snd_ssthresh) {
562*5e3eaea3SApple OSS Distributions tp->bg_ssthresh = tp->snd_ssthresh;
563*5e3eaea3SApple OSS Distributions }
564*5e3eaea3SApple OSS Distributions
565*5e3eaea3SApple OSS Distributions cwnd = min(tp->snd_wnd, tp->snd_cwnd);
566*5e3eaea3SApple OSS Distributions
567*5e3eaea3SApple OSS Distributions if (tp->snd_cwnd > tp->bg_ssthresh) {
568*5e3eaea3SApple OSS Distributions cwnd = cwnd / tp->t_maxseg;
569*5e3eaea3SApple OSS Distributions } else {
570*5e3eaea3SApple OSS Distributions cwnd = cwnd / 2 / tp->t_maxseg;
571*5e3eaea3SApple OSS Distributions }
572*5e3eaea3SApple OSS Distributions
573*5e3eaea3SApple OSS Distributions if (cwnd < bg_ss_fltsz) {
574*5e3eaea3SApple OSS Distributions cwnd = bg_ss_fltsz;
575*5e3eaea3SApple OSS Distributions }
576*5e3eaea3SApple OSS Distributions
577*5e3eaea3SApple OSS Distributions tp->snd_cwnd = cwnd * tp->t_maxseg;
578*5e3eaea3SApple OSS Distributions tp->t_bytes_acked = 0;
579*5e3eaea3SApple OSS Distributions
580*5e3eaea3SApple OSS Distributions os_atomic_inc(&tcp_cc_ledbat.num_sockets, relaxed);
581*5e3eaea3SApple OSS Distributions }
582