xref: /xnu-10002.61.3/bsd/net/pktsched/pktsched_fq_codel.c (revision 0f4c859e951fba394238ab619495c4e1d54d0f34)
1 /*
2  * Copyright (c) 2016-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <sys/types.h>
30 #include <sys/param.h>
31 #include <kern/zalloc.h>
32 #include <net/ethernet.h>
33 #include <net/if_var.h>
34 #include <net/if.h>
35 #include <net/classq/classq.h>
36 #include <net/classq/classq_fq_codel.h>
37 #include <net/pktsched/pktsched_fq_codel.h>
38 #include <os/log.h>
39 #include <pexpert/pexpert.h>    /* for PE_parse_boot_argn */
40 #include <mach/thread_act.h>
41 #include <kern/thread.h>
42 #include <kern/sched_prim.h>
43 
44 #define FQ_CODEL_DEFAULT_QUANTUM 1500
45 
46 #define FQ_CODEL_QUANTUM_BK_SYS(_q)    (_q)
47 #define FQ_CODEL_QUANTUM_BK(_q)        (_q)
48 #define FQ_CODEL_QUANTUM_BE(_q)        (_q)
49 #define FQ_CODEL_QUANTUM_RD(_q)        (_q)
50 #define FQ_CODEL_QUANTUM_OAM(_q)       (_q)
51 #define FQ_CODEL_QUANTUM_AV(_q)        (_q * 2)
52 #define FQ_CODEL_QUANTUM_RV(_q)        (_q * 2)
53 #define FQ_CODEL_QUANTUM_VI(_q)        (_q * 2)
54 #define FQ_CODEL_QUANTUM_VO(_q)        ((_q * 2) / 5)
55 #define FQ_CODEL_QUANTUM_CTL(_q)       ((_q * 2) / 5)
56 
57 static KALLOC_TYPE_DEFINE(fq_if_zone, fq_if_t, NET_KT_DEFAULT);
58 static KALLOC_TYPE_DEFINE(fq_if_grp_zone, fq_if_group_t, NET_KT_DEFAULT);
59 
60 SYSCTL_NODE(_net_classq, OID_AUTO, fq_codel, CTLFLAG_RW | CTLFLAG_LOCKED,
61     0, "FQ-CODEL parameters");
62 
63 SYSCTL_INT(_net_classq_fq_codel, OID_AUTO, fq_enable_pacing, CTLFLAG_RW | CTLFLAG_LOCKED,
64     &ifclassq_enable_pacing, 0, "Enable pacing");
65 
66 static uint64_t fq_empty_purge_delay = FQ_EMPTY_PURGE_DELAY;
67 #if (DEVELOPMENT || DEBUG)
68 SYSCTL_QUAD(_net_classq_fq_codel, OID_AUTO, fq_empty_purge_delay, CTLFLAG_RW |
69     CTLFLAG_LOCKED, &fq_empty_purge_delay, "Empty flow queue purge delay (ns)");
70 #endif /* !DEVELOPMENT && !DEBUG */
71 
72 unsigned int ifclassq_enable_pacing = 1;
73 
74 typedef STAILQ_HEAD(, flowq) flowq_dqlist_t;
75 
76 static fq_if_t *fq_if_alloc(struct ifclassq *, classq_pkt_type_t);
77 static void fq_if_destroy(fq_if_t *fqs);
78 static void fq_if_classq_init(fq_if_group_t *fqg, uint32_t priority,
79     uint32_t quantum, uint32_t drr_max, uint32_t svc_class);
80 static void fq_if_dequeue(fq_if_t *, fq_if_classq_t *, uint32_t,
81     int64_t, classq_pkt_t *, classq_pkt_t *, uint32_t *,
82     uint32_t *, flowq_dqlist_t *, bool, uint64_t, bool*, uint64_t*);
83 void fq_if_stat_sc(fq_if_t *fqs, cqrq_stat_sc_t *stat);
84 static void fq_if_purge(fq_if_t *);
85 static void fq_if_purge_classq(fq_if_t *, fq_if_classq_t *);
86 static void fq_if_purge_flow(fq_if_t *, fq_t *, uint32_t *, uint32_t *,
87     uint64_t);
88 static void fq_if_empty_new_flow(fq_t *fq, fq_if_classq_t *fq_cl);
89 static void fq_if_empty_old_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl,
90     fq_t *fq, uint64_t now);
91 static void fq_if_purge_empty_flow(fq_if_t *fqs, fq_t *fq);
92 static void fq_if_purge_empty_flow_list(fq_if_t *fqs, uint64_t now,
93     bool purge_all);
94 static inline void fq_if_reuse_empty_flow(fq_if_t *fqs, fq_t *fq, uint64_t now);
95 static int fq_if_dequeue_sc_classq_multi_separate(struct ifclassq *ifq,
96     mbuf_svc_class_t svc, u_int32_t maxpktcnt, u_int32_t maxbytecnt,
97     classq_pkt_t *first_packet, classq_pkt_t *last_packet, u_int32_t *retpktcnt,
98     u_int32_t *retbytecnt, uint8_t grp_idx);
99 static void fq_if_grp_stat_sc(fq_if_t *fqs, fq_if_group_t *grp,
100     cqrq_stat_sc_t *stat, uint64_t now);
101 static void fq_if_purge_grp(fq_if_t *fqs, fq_if_group_t *grp);
102 static inline boolean_t fq_if_is_grp_combined(fq_if_t *fqs, uint8_t grp_idx);
103 static void fq_if_destroy_grps(fq_if_t *fqs);
104 
105 uint32_t fq_codel_drr_max_values[FQ_IF_MAX_CLASSES] = {
106 	[FQ_IF_CTL_INDEX]       = 8,
107 	[FQ_IF_VO_INDEX]        = 8,
108 	[FQ_IF_VI_INDEX]        = 6,
109 	[FQ_IF_RV_INDEX]        = 6,
110 	[FQ_IF_AV_INDEX]        = 6,
111 	[FQ_IF_OAM_INDEX]       = 4,
112 	[FQ_IF_RD_INDEX]        = 4,
113 	[FQ_IF_BE_INDEX]        = 4,
114 	[FQ_IF_BK_INDEX]        = 2,
115 	[FQ_IF_BK_SYS_INDEX]    = 2,
116 };
117 
118 #define FQ_CODEL_DRR_MAX(_s)    fq_codel_drr_max_values[FQ_IF_##_s##_INDEX]
119 
120 static boolean_t fq_if_grps_bitmap_zeros(fq_grp_tailq_t *grp_list, int pri,
121     fq_if_state state);
122 static void fq_if_grps_bitmap_cpy(fq_grp_tailq_t *grp_list, int pri,
123     fq_if_state dst_state, fq_if_state src_state);
124 static void fq_if_grps_bitmap_clr(fq_grp_tailq_t *grp_list, int pri,
125     fq_if_state state);
126 static int fq_if_grps_bitmap_ffs(fq_grp_tailq_t *grp_list, int pri,
127     fq_if_state state, fq_if_group_t **selected_grp);
128 static void fq_if_grps_bitmap_move(fq_grp_tailq_t *grp_list, int pri,
129     fq_if_state dst_state, fq_if_state src_state);
130 
131 static boolean_t fq_if_grps_sc_bitmap_zeros(fq_grp_tailq_t *grp_list, int pri,
132     fq_if_state state);
133 static void fq_if_grps_sc_bitmap_cpy(fq_grp_tailq_t *grp_list, int pri,
134     fq_if_state dst_state, fq_if_state src_state);
135 static void fq_if_grps_sc_bitmap_clr(fq_grp_tailq_t *grp_list, int pri,
136     fq_if_state state);
137 static int fq_if_grps_sc_bitmap_ffs(fq_grp_tailq_t *grp_list, int pri,
138     fq_if_state state, fq_if_group_t **selected_grp);
139 static void fq_if_grps_sc_bitmap_move(fq_grp_tailq_t *grp_list, int pri,
140     fq_if_state dst_state, fq_if_state src_state);
141 
142 bitmap_ops_t fq_if_grps_bitmap_ops =
143 {
144 	.ffs    = fq_if_grps_bitmap_ffs,
145 	.zeros  = fq_if_grps_bitmap_zeros,
146 	.cpy    = fq_if_grps_bitmap_cpy,
147 	.clr    = fq_if_grps_bitmap_clr,
148 	.move   = fq_if_grps_bitmap_move,
149 };
150 
151 bitmap_ops_t fq_if_grps_sc_bitmap_ops =
152 {
153 	.ffs    = fq_if_grps_sc_bitmap_ffs,
154 	.zeros  = fq_if_grps_sc_bitmap_zeros,
155 	.cpy    = fq_if_grps_sc_bitmap_cpy,
156 	.clr    = fq_if_grps_sc_bitmap_clr,
157 	.move   = fq_if_grps_sc_bitmap_move,
158 };
159 
160 void
pktsched_fq_init(void)161 pktsched_fq_init(void)
162 {
163 	PE_parse_boot_argn("ifclassq_enable_pacing", &ifclassq_enable_pacing,
164 	    sizeof(ifclassq_enable_pacing));
165 
166 	// format looks like ifcq_drr_max=8,8,6
167 	char buf[(FQ_IF_MAX_CLASSES) * 3];
168 	size_t i, len, pri_index = 0;
169 	uint32_t drr = 0;
170 	if (!PE_parse_boot_arg_str("ifcq_drr_max", buf, sizeof(buf))) {
171 		return;
172 	}
173 
174 	len = strlen(buf);
175 	for (i = 0; i < len + 1 && pri_index < FQ_IF_MAX_CLASSES; i++) {
176 		if (buf[i] != ',' && buf[i] != '\0') {
177 			VERIFY(buf[i] >= '0' && buf[i] <= '9');
178 			drr = drr * 10 + buf[i] - '0';
179 			continue;
180 		}
181 		fq_codel_drr_max_values[pri_index] = drr;
182 		pri_index += 1;
183 		drr = 0;
184 	}
185 }
186 
187 #define FQ_IF_FLOW_HASH_ID(_flowid_) \
188 	(((_flowid_) >> FQ_IF_HASH_TAG_SHIFT) & FQ_IF_HASH_TAG_MASK)
189 
190 #define FQ_IF_CLASSQ_IDLE(_fcl_) \
191 	(STAILQ_EMPTY(&(_fcl_)->fcl_new_flows) && \
192 	STAILQ_EMPTY(&(_fcl_)->fcl_old_flows))
193 
194 typedef void (* fq_if_append_pkt_t)(classq_pkt_t *, classq_pkt_t *);
195 typedef boolean_t (* fq_getq_flow_t)(fq_if_t *, fq_if_classq_t *, fq_t *,
196     int64_t, uint32_t, classq_pkt_t *, classq_pkt_t *, uint32_t *,
197     uint32_t *, boolean_t *, uint64_t);
198 
199 static void
fq_if_append_mbuf(classq_pkt_t * pkt,classq_pkt_t * next_pkt)200 fq_if_append_mbuf(classq_pkt_t *pkt, classq_pkt_t *next_pkt)
201 {
202 	pkt->cp_mbuf->m_nextpkt = next_pkt->cp_mbuf;
203 }
204 
205 static inline uint64_t
fq_codel_get_time(void)206 fq_codel_get_time(void)
207 {
208 	struct timespec ts;
209 	uint64_t now;
210 
211 	nanouptime(&ts);
212 	now = ((uint64_t)ts.tv_sec * NSEC_PER_SEC) + ts.tv_nsec;
213 	return now;
214 }
215 
216 #if SKYWALK
217 static void
fq_if_append_pkt(classq_pkt_t * pkt,classq_pkt_t * next_pkt)218 fq_if_append_pkt(classq_pkt_t *pkt, classq_pkt_t *next_pkt)
219 {
220 	pkt->cp_kpkt->pkt_nextpkt = next_pkt->cp_kpkt;
221 }
222 #endif /* SKYWALK */
223 
224 #if SKYWALK
225 static boolean_t
fq_getq_flow_kpkt(fq_if_t * fqs,fq_if_classq_t * fq_cl,fq_t * fq,int64_t byte_limit,uint32_t pkt_limit,classq_pkt_t * head,classq_pkt_t * tail,uint32_t * byte_cnt,uint32_t * pkt_cnt,boolean_t * qempty,uint64_t now)226 fq_getq_flow_kpkt(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq,
227     int64_t byte_limit, uint32_t pkt_limit, classq_pkt_t *head,
228     classq_pkt_t *tail, uint32_t *byte_cnt, uint32_t *pkt_cnt,
229     boolean_t *qempty, uint64_t now)
230 {
231 	uint32_t plen;
232 	pktsched_pkt_t pkt;
233 	boolean_t limit_reached = FALSE;
234 	struct ifclassq *ifq = fqs->fqs_ifq;
235 	struct ifnet *ifp = ifq->ifcq_ifp;
236 
237 	/*
238 	 * Assert to make sure pflags is part of PKT_F_COMMON_MASK;
239 	 * all common flags need to be declared in that mask.
240 	 */
241 	while (fq->fq_deficit > 0 && limit_reached == FALSE &&
242 	    !KPKTQ_EMPTY(&fq->fq_kpktq) && fq_tx_time_ready(fqs, fq, now, NULL)) {
243 		_PKTSCHED_PKT_INIT(&pkt);
244 		fq_getq_flow(fqs, fq, &pkt, now);
245 		ASSERT(pkt.pktsched_ptype == QP_PACKET);
246 
247 		plen = pktsched_get_pkt_len(&pkt);
248 		fq->fq_deficit -= plen;
249 		if (__improbable((fq->fq_flags & FQF_FRESH_FLOW) != 0)) {
250 			pkt.pktsched_pkt_kpkt->pkt_pflags |= PKT_F_NEW_FLOW;
251 			fq->fq_flags &= ~FQF_FRESH_FLOW;
252 		}
253 
254 		if (head->cp_kpkt == NULL) {
255 			*head = pkt.pktsched_pkt;
256 		} else {
257 			ASSERT(tail->cp_kpkt != NULL);
258 			ASSERT(tail->cp_kpkt->pkt_nextpkt == NULL);
259 			tail->cp_kpkt->pkt_nextpkt = pkt.pktsched_pkt_kpkt;
260 		}
261 		*tail = pkt.pktsched_pkt;
262 		tail->cp_kpkt->pkt_nextpkt = NULL;
263 		fq_cl->fcl_stat.fcl_dequeue++;
264 		fq_cl->fcl_stat.fcl_dequeue_bytes += plen;
265 		*pkt_cnt += 1;
266 		*byte_cnt += plen;
267 
268 		ifclassq_set_packet_metadata(ifq, ifp, &pkt.pktsched_pkt);
269 
270 		/* Check if the limit is reached */
271 		if (*pkt_cnt >= pkt_limit || *byte_cnt >= byte_limit) {
272 			limit_reached = TRUE;
273 		}
274 	}
275 	KDBG(AQM_KTRACE_STATS_FLOW_DEQUEUE, fq->fq_flowhash,
276 	    AQM_KTRACE_FQ_GRP_SC_IDX(fq),
277 	    fq->fq_bytes, fq->fq_min_qdelay);
278 
279 	*qempty = KPKTQ_EMPTY(&fq->fq_kpktq);
280 	return limit_reached;
281 }
282 #endif /* SKYWALK */
283 
284 static boolean_t
fq_getq_flow_mbuf(fq_if_t * fqs,fq_if_classq_t * fq_cl,fq_t * fq,int64_t byte_limit,uint32_t pkt_limit,classq_pkt_t * head,classq_pkt_t * tail,uint32_t * byte_cnt,uint32_t * pkt_cnt,boolean_t * qempty,uint64_t now)285 fq_getq_flow_mbuf(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq,
286     int64_t byte_limit, uint32_t pkt_limit, classq_pkt_t *head,
287     classq_pkt_t *tail, uint32_t *byte_cnt, uint32_t *pkt_cnt,
288     boolean_t *qempty, uint64_t now)
289 {
290 	u_int32_t plen;
291 	pktsched_pkt_t pkt;
292 	boolean_t limit_reached = FALSE;
293 	struct ifclassq *ifq = fqs->fqs_ifq;
294 	struct ifnet *ifp = ifq->ifcq_ifp;
295 
296 	while (fq->fq_deficit > 0 && limit_reached == FALSE &&
297 	    !MBUFQ_EMPTY(&fq->fq_mbufq) && fq_tx_time_ready(fqs, fq, now, NULL)) {
298 		_PKTSCHED_PKT_INIT(&pkt);
299 		fq_getq_flow(fqs, fq, &pkt, now);
300 		ASSERT(pkt.pktsched_ptype == QP_MBUF);
301 
302 		plen = pktsched_get_pkt_len(&pkt);
303 		fq->fq_deficit -= plen;
304 
305 		if (__improbable((fq->fq_flags & FQF_FRESH_FLOW) != 0)) {
306 			pkt.pktsched_pkt_mbuf->m_pkthdr.pkt_flags |= PKTF_NEW_FLOW;
307 			fq->fq_flags &= ~FQF_FRESH_FLOW;
308 		}
309 
310 		if (head->cp_mbuf == NULL) {
311 			*head = pkt.pktsched_pkt;
312 		} else {
313 			ASSERT(tail->cp_mbuf != NULL);
314 			ASSERT(tail->cp_mbuf->m_nextpkt == NULL);
315 			tail->cp_mbuf->m_nextpkt = pkt.pktsched_pkt_mbuf;
316 		}
317 		*tail = pkt.pktsched_pkt;
318 		tail->cp_mbuf->m_nextpkt = NULL;
319 		fq_cl->fcl_stat.fcl_dequeue++;
320 		fq_cl->fcl_stat.fcl_dequeue_bytes += plen;
321 		*pkt_cnt += 1;
322 		*byte_cnt += plen;
323 
324 		ifclassq_set_packet_metadata(ifq, ifp, &pkt.pktsched_pkt);
325 
326 		/* Check if the limit is reached */
327 		if (*pkt_cnt >= pkt_limit || *byte_cnt >= byte_limit) {
328 			limit_reached = TRUE;
329 		}
330 	}
331 	KDBG(AQM_KTRACE_STATS_FLOW_DEQUEUE, fq->fq_flowhash,
332 	    AQM_KTRACE_FQ_GRP_SC_IDX(fq),
333 	    fq->fq_bytes, fq->fq_min_qdelay);
334 
335 	*qempty = MBUFQ_EMPTY(&fq->fq_mbufq);
336 	return limit_reached;
337 }
338 
339 fq_if_t *
fq_if_alloc(struct ifclassq * ifq,classq_pkt_type_t ptype)340 fq_if_alloc(struct ifclassq *ifq, classq_pkt_type_t ptype)
341 {
342 	fq_if_t *fqs;
343 
344 	fqs = zalloc_flags(fq_if_zone, Z_WAITOK | Z_ZERO);
345 	fqs->fqs_ifq = ifq;
346 	fqs->fqs_ptype = ptype;
347 
348 	/* Configure packet drop limit across all queues */
349 	fqs->fqs_pkt_droplimit = IFCQ_PKT_DROP_LIMIT(ifq);
350 	STAILQ_INIT(&fqs->fqs_fclist);
351 	TAILQ_INIT(&fqs->fqs_empty_list);
352 	TAILQ_INIT(&fqs->fqs_combined_grp_list);
353 
354 	return fqs;
355 }
356 
357 void
fq_if_destroy(fq_if_t * fqs)358 fq_if_destroy(fq_if_t *fqs)
359 {
360 	fq_if_purge(fqs);
361 	fq_if_destroy_grps(fqs);
362 
363 	fqs->fqs_ifq = NULL;
364 	zfree(fq_if_zone, fqs);
365 }
366 
367 static inline uint8_t
fq_if_service_to_priority(fq_if_t * fqs,mbuf_svc_class_t svc)368 fq_if_service_to_priority(fq_if_t *fqs, mbuf_svc_class_t svc)
369 {
370 	uint8_t pri;
371 
372 	if (fqs->fqs_flags & FQS_DRIVER_MANAGED) {
373 		switch (svc) {
374 		case MBUF_SC_BK_SYS:
375 		case MBUF_SC_BK:
376 			pri = FQ_IF_BK_INDEX;
377 			break;
378 		case MBUF_SC_BE:
379 		case MBUF_SC_RD:
380 		case MBUF_SC_OAM:
381 			pri = FQ_IF_BE_INDEX;
382 			break;
383 		case MBUF_SC_AV:
384 		case MBUF_SC_RV:
385 		case MBUF_SC_VI:
386 		case MBUF_SC_SIG:
387 			pri = FQ_IF_VI_INDEX;
388 			break;
389 		case MBUF_SC_VO:
390 		case MBUF_SC_CTL:
391 			pri = FQ_IF_VO_INDEX;
392 			break;
393 		default:
394 			pri = FQ_IF_BE_INDEX; /* Use best effort by default */
395 			break;
396 		}
397 		return pri;
398 	}
399 
400 	/* scheduler is not managed by the driver */
401 	switch (svc) {
402 	case MBUF_SC_BK_SYS:
403 		pri = FQ_IF_BK_SYS_INDEX;
404 		break;
405 	case MBUF_SC_BK:
406 		pri = FQ_IF_BK_INDEX;
407 		break;
408 	case MBUF_SC_BE:
409 		pri = FQ_IF_BE_INDEX;
410 		break;
411 	case MBUF_SC_RD:
412 		pri = FQ_IF_RD_INDEX;
413 		break;
414 	case MBUF_SC_OAM:
415 		pri = FQ_IF_OAM_INDEX;
416 		break;
417 	case MBUF_SC_AV:
418 		pri = FQ_IF_AV_INDEX;
419 		break;
420 	case MBUF_SC_RV:
421 		pri = FQ_IF_RV_INDEX;
422 		break;
423 	case MBUF_SC_VI:
424 		pri = FQ_IF_VI_INDEX;
425 		break;
426 	case MBUF_SC_SIG:
427 		pri = FQ_IF_SIG_INDEX;
428 		break;
429 	case MBUF_SC_VO:
430 		pri = FQ_IF_VO_INDEX;
431 		break;
432 	case MBUF_SC_CTL:
433 		pri = FQ_IF_CTL_INDEX;
434 		break;
435 	default:
436 		pri = FQ_IF_BE_INDEX; /* Use best effort by default */
437 		break;
438 	}
439 	return pri;
440 }
441 
442 void
fq_if_classq_init(fq_if_group_t * fqg,uint32_t pri,uint32_t quantum,uint32_t drr_max,uint32_t svc_class)443 fq_if_classq_init(fq_if_group_t *fqg, uint32_t pri, uint32_t quantum,
444     uint32_t drr_max, uint32_t svc_class)
445 {
446 	fq_if_classq_t *fq_cl;
447 	VERIFY(pri < FQ_IF_MAX_CLASSES);
448 	fq_cl = &fqg->fqg_classq[pri];
449 
450 	VERIFY(fq_cl->fcl_quantum == 0);
451 	VERIFY(quantum != 0);
452 	fq_cl->fcl_quantum = quantum;
453 	fq_cl->fcl_pri = pri;
454 	fq_cl->fcl_drr_max = drr_max;
455 	fq_cl->fcl_service_class = svc_class;
456 	fq_cl->fcl_next_tx_time = 0;
457 	fq_cl->fcl_flags = 0;
458 	STAILQ_INIT(&fq_cl->fcl_new_flows);
459 	STAILQ_INIT(&fq_cl->fcl_old_flows);
460 }
461 
462 int
fq_if_enqueue_classq(struct ifclassq * ifq,classq_pkt_t * head,classq_pkt_t * tail,uint32_t cnt,uint32_t bytes,boolean_t * pdrop)463 fq_if_enqueue_classq(struct ifclassq *ifq, classq_pkt_t *head,
464     classq_pkt_t *tail, uint32_t cnt, uint32_t bytes, boolean_t *pdrop)
465 {
466 	uint8_t pri, grp_idx = 0;
467 	fq_if_t *fqs;
468 	fq_if_classq_t *fq_cl;
469 	fq_if_group_t *fq_group;
470 	int ret;
471 	mbuf_svc_class_t svc;
472 	pktsched_pkt_t pkt;
473 
474 	pktsched_pkt_encap_chain(&pkt, head, tail, cnt, bytes);
475 
476 	fqs = (fq_if_t *)ifq->ifcq_disc;
477 	svc = pktsched_get_pkt_svc(&pkt);
478 #if SKYWALK
479 	if (head->cp_ptype == QP_PACKET) {
480 		grp_idx = head->cp_kpkt->pkt_qset_idx;
481 	}
482 #endif /* SKYWALK */
483 	pri = fq_if_service_to_priority(fqs, svc);
484 	VERIFY(pri < FQ_IF_MAX_CLASSES);
485 
486 	IFCQ_LOCK_SPIN(ifq);
487 	fq_group = fq_if_find_grp(fqs, grp_idx);
488 	fq_cl = &fq_group->fqg_classq[pri];
489 
490 	if (__improbable(svc == MBUF_SC_BK_SYS && fqs->fqs_throttle == 1)) {
491 		IFCQ_UNLOCK(ifq);
492 		/* BK_SYS is currently throttled */
493 		os_atomic_inc(&fq_cl->fcl_stat.fcl_throttle_drops, relaxed);
494 		pktsched_free_pkt(&pkt);
495 		*pdrop = TRUE;
496 		ret = EQSUSPENDED;
497 		goto done;
498 	}
499 
500 	ASSERT(pkt.pktsched_ptype == fqs->fqs_ptype);
501 	ret = fq_addq(fqs, fq_group, &pkt, fq_cl);
502 	if (!FQ_IF_CLASSQ_IDLE(fq_cl)) {
503 		if (((fq_group->fqg_bitmaps[FQ_IF_ER] | fq_group->fqg_bitmaps[FQ_IF_EB]) &
504 		    (1 << pri)) == 0) {
505 			/*
506 			 * this group is not in ER or EB groups,
507 			 * mark it as IB
508 			 */
509 			pktsched_bit_set(pri, &fq_group->fqg_bitmaps[FQ_IF_IB]);
510 		}
511 	}
512 
513 	if (__improbable(ret != 0)) {
514 		if (ret == CLASSQEQ_SUCCESS_FC) {
515 			/* packet enqueued, return advisory feedback */
516 			ret = EQFULL;
517 			*pdrop = FALSE;
518 		} else if (ret == CLASSQEQ_COMPRESSED) {
519 			ret = 0;
520 			*pdrop = FALSE;
521 		} else {
522 			IFCQ_UNLOCK(ifq);
523 			*pdrop = TRUE;
524 			pktsched_free_pkt(&pkt);
525 			switch (ret) {
526 			case CLASSQEQ_DROP:
527 				ret = ENOBUFS;
528 				goto done;
529 			case CLASSQEQ_DROP_FC:
530 				ret = EQFULL;
531 				goto done;
532 			case CLASSQEQ_DROP_SP:
533 				ret = EQSUSPENDED;
534 				goto done;
535 			default:
536 				VERIFY(0);
537 				/* NOTREACHED */
538 				__builtin_unreachable();
539 			}
540 			/* NOTREACHED */
541 			__builtin_unreachable();
542 		}
543 	} else {
544 		*pdrop = FALSE;
545 	}
546 	IFCQ_ADD_LEN(ifq, cnt);
547 	IFCQ_INC_BYTES(ifq, bytes);
548 
549 
550 	FQS_GRP_ADD_LEN(fqs, grp_idx, cnt);
551 	FQS_GRP_INC_BYTES(fqs, grp_idx, bytes);
552 
553 	IFCQ_UNLOCK(ifq);
554 done:
555 #if DEBUG || DEVELOPMENT
556 	if (__improbable((ret == EQFULL) && (ifclassq_flow_control_adv == 0))) {
557 		ret = 0;
558 	}
559 #endif /* DEBUG || DEVELOPMENT */
560 	return ret;
561 }
562 
563 void
fq_if_dequeue_classq(struct ifclassq * ifq,classq_pkt_t * pkt,uint8_t grp_idx)564 fq_if_dequeue_classq(struct ifclassq *ifq, classq_pkt_t *pkt, uint8_t grp_idx)
565 {
566 	(void) fq_if_dequeue_classq_multi(ifq, 1,
567 	    CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, pkt, NULL, NULL, NULL, grp_idx);
568 }
569 
570 void
fq_if_dequeue_sc_classq(struct ifclassq * ifq,mbuf_svc_class_t svc,classq_pkt_t * pkt,uint8_t grp_idx)571 fq_if_dequeue_sc_classq(struct ifclassq *ifq, mbuf_svc_class_t svc,
572     classq_pkt_t *pkt, uint8_t grp_idx)
573 {
574 	(void) fq_if_dequeue_sc_classq_multi(ifq, svc, 1,
575 	    CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, pkt, NULL, NULL, NULL, grp_idx);
576 }
577 
578 static inline void
fq_dqlist_add(flowq_dqlist_t * fq_dqlist_head,fq_t * fq)579 fq_dqlist_add(flowq_dqlist_t *fq_dqlist_head, fq_t *fq)
580 {
581 	ASSERT(fq->fq_dq_head.cp_mbuf == NULL);
582 	ASSERT(!fq->fq_in_dqlist);
583 	STAILQ_INSERT_TAIL(fq_dqlist_head, fq, fq_dqlink);
584 	fq->fq_in_dqlist = true;
585 }
586 
587 static inline void
fq_dqlist_remove(flowq_dqlist_t * fq_dqlist_head,fq_t * fq,classq_pkt_t * head,classq_pkt_t * tail,classq_pkt_type_t ptype)588 fq_dqlist_remove(flowq_dqlist_t *fq_dqlist_head, fq_t *fq, classq_pkt_t *head,
589     classq_pkt_t *tail, classq_pkt_type_t ptype)
590 {
591 	ASSERT(fq->fq_in_dqlist);
592 	if (fq->fq_dq_head.cp_mbuf == NULL) {
593 		goto done;
594 	}
595 
596 	if (head->cp_mbuf == NULL) {
597 		*head = fq->fq_dq_head;
598 	} else {
599 		ASSERT(tail->cp_mbuf != NULL);
600 
601 		switch (ptype) {
602 		case QP_MBUF:
603 			ASSERT(tail->cp_mbuf->m_nextpkt == NULL);
604 			tail->cp_mbuf->m_nextpkt = fq->fq_dq_head.cp_mbuf;
605 			ASSERT(fq->fq_dq_tail.cp_mbuf->m_nextpkt == NULL);
606 			break;
607 #if SKYWALK
608 		case QP_PACKET:
609 			ASSERT(tail->cp_kpkt->pkt_nextpkt == NULL);
610 			tail->cp_kpkt->pkt_nextpkt = fq->fq_dq_head.cp_kpkt;
611 			ASSERT(fq->fq_dq_tail.cp_kpkt->pkt_nextpkt == NULL);
612 			break;
613 #endif /* SKYWALK */
614 		default:
615 			VERIFY(0);
616 			/* NOTREACHED */
617 			__builtin_unreachable();
618 		}
619 	}
620 	*tail = fq->fq_dq_tail;
621 done:
622 	STAILQ_REMOVE(fq_dqlist_head, fq, flowq, fq_dqlink);
623 	CLASSQ_PKT_INIT(&fq->fq_dq_head);
624 	CLASSQ_PKT_INIT(&fq->fq_dq_tail);
625 	fq->fq_in_dqlist = false;
626 }
627 
628 static inline void
fq_dqlist_get_packet_list(flowq_dqlist_t * fq_dqlist_head,classq_pkt_t * head,classq_pkt_t * tail,classq_pkt_type_t ptype)629 fq_dqlist_get_packet_list(flowq_dqlist_t *fq_dqlist_head, classq_pkt_t *head,
630     classq_pkt_t *tail, classq_pkt_type_t ptype)
631 {
632 	fq_t *fq, *tfq;
633 
634 	STAILQ_FOREACH_SAFE(fq, fq_dqlist_head, fq_dqlink, tfq) {
635 		fq_dqlist_remove(fq_dqlist_head, fq, head, tail, ptype);
636 	}
637 }
638 
639 static int
fq_if_grps_bitmap_ffs(fq_grp_tailq_t * grp_list,int pri,fq_if_state state,fq_if_group_t ** selected_grp)640 fq_if_grps_bitmap_ffs(fq_grp_tailq_t *grp_list, int pri, fq_if_state state,
641     fq_if_group_t **selected_grp)
642 {
643 	#pragma unused(pri)
644 
645 	fq_if_group_t *grp;
646 	uint32_t highest_pri = FQ_IF_MAX_CLASSES;
647 	int ret_pri = 0;
648 
649 	TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
650 		uint32_t cur_pri = pktsched_ffs(grp->fqg_bitmaps[state]);
651 		/* bitmap is empty in this case */
652 		if (cur_pri == 0) {
653 			continue;
654 		}
655 		if (cur_pri <= highest_pri) {
656 			highest_pri = cur_pri;
657 			ret_pri = cur_pri;
658 			*selected_grp = grp;
659 		}
660 	}
661 	return ret_pri;
662 }
663 
664 static boolean_t
fq_if_grps_bitmap_zeros(fq_grp_tailq_t * grp_list,int pri,fq_if_state state)665 fq_if_grps_bitmap_zeros(fq_grp_tailq_t *grp_list, int pri, fq_if_state state)
666 {
667     #pragma unused(pri)
668 
669 	fq_if_group_t *grp;
670 
671 	TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
672 		if (grp->fqg_bitmaps[state] != 0) {
673 			return FALSE;
674 		}
675 	}
676 	return TRUE;
677 }
678 
679 static void
fq_if_grps_bitmap_cpy(fq_grp_tailq_t * grp_list,int pri,fq_if_state dst_state,fq_if_state src_state)680 fq_if_grps_bitmap_cpy(fq_grp_tailq_t *grp_list, int pri, fq_if_state dst_state,
681     fq_if_state src_state)
682 {
683     #pragma unused(pri)
684 
685 	fq_if_group_t *grp;
686 	TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
687 		grp->fqg_bitmaps[dst_state] = grp->fqg_bitmaps[src_state];
688 	}
689 }
690 
691 static void
fq_if_grps_bitmap_clr(fq_grp_tailq_t * grp_list,int pri,fq_if_state state)692 fq_if_grps_bitmap_clr(fq_grp_tailq_t *grp_list, int pri, fq_if_state state)
693 {
694     #pragma unused(pri)
695 
696 	fq_if_group_t *grp;
697 	TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
698 		grp->fqg_bitmaps[state] = 0;
699 	}
700 }
701 
702 static void
fq_if_grps_bitmap_move(fq_grp_tailq_t * grp_list,int pri,fq_if_state dst_state,fq_if_state src_state)703 fq_if_grps_bitmap_move(fq_grp_tailq_t *grp_list, int pri, fq_if_state dst_state,
704     fq_if_state src_state)
705 {
706     #pragma unused(pri)
707 
708 	fq_if_group_t *grp;
709 	TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
710 		grp->fqg_bitmaps[dst_state] =
711 		    grp->fqg_bitmaps[dst_state] | grp->fqg_bitmaps[src_state];
712 		grp->fqg_bitmaps[src_state] = 0;
713 	}
714 }
715 
716 static int
fq_if_grps_sc_bitmap_ffs(fq_grp_tailq_t * grp_list,int pri,fq_if_state state,fq_if_group_t ** selected_grp)717 fq_if_grps_sc_bitmap_ffs(fq_grp_tailq_t *grp_list, int pri, fq_if_state state,
718     fq_if_group_t **selected_grp)
719 {
720 	fq_if_group_t *grp;
721 	int ret_pri = 0;
722 
723 	TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
724 		if (pktsched_bit_tst(pri, &grp->fqg_bitmaps[state])) {
725 			/* +1 to match the semantics of pktsched_ffs */
726 			ret_pri = pri + 1;
727 			*selected_grp = grp;
728 			break;
729 		}
730 	}
731 
732 	return ret_pri;
733 }
734 
735 static boolean_t
fq_if_grps_sc_bitmap_zeros(fq_grp_tailq_t * grp_list,int pri,fq_if_state state)736 fq_if_grps_sc_bitmap_zeros(fq_grp_tailq_t *grp_list, int pri, fq_if_state state)
737 {
738 	fq_if_group_t *grp;
739 
740 	TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
741 		if (pktsched_bit_tst(pri, &grp->fqg_bitmaps[state])) {
742 			return FALSE;
743 		}
744 	}
745 	return TRUE;
746 }
747 
748 static void
fq_if_grps_sc_bitmap_cpy(fq_grp_tailq_t * grp_list,int pri,fq_if_state dst_state,fq_if_state src_state)749 fq_if_grps_sc_bitmap_cpy(fq_grp_tailq_t *grp_list, int pri, fq_if_state dst_state,
750     fq_if_state src_state)
751 {
752 	fq_if_group_t *grp;
753 
754 	TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
755 		pktsched_bit_cpy(pri, &grp->fqg_bitmaps[dst_state],
756 		    &grp->fqg_bitmaps[src_state]);
757 	}
758 }
759 
760 static void
fq_if_grps_sc_bitmap_clr(fq_grp_tailq_t * grp_list,int pri,fq_if_state state)761 fq_if_grps_sc_bitmap_clr(fq_grp_tailq_t *grp_list, int pri, fq_if_state state)
762 {
763 	fq_if_group_t *grp;
764 
765 	TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
766 		pktsched_bit_clr(pri, &grp->fqg_bitmaps[state]);
767 	}
768 }
769 
770 static void
fq_if_grps_sc_bitmap_move(fq_grp_tailq_t * grp_list,int pri,fq_if_state dst_state,fq_if_state src_state)771 fq_if_grps_sc_bitmap_move(fq_grp_tailq_t *grp_list, int pri, fq_if_state dst_state,
772     fq_if_state src_state)
773 {
774 	fq_if_group_t *grp;
775 
776 	TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
777 		pktsched_bit_move(pri, &grp->fqg_bitmaps[dst_state],
778 		    &grp->fqg_bitmaps[src_state]);
779 		pktsched_bit_clr(pri, &grp->fqg_bitmaps[src_state]);
780 	}
781 }
782 
783 static void
fq_if_schedule_pacemaker(struct ifclassq * ifq,uint64_t next_tx_time)784 fq_if_schedule_pacemaker(struct ifclassq *ifq, uint64_t next_tx_time)
785 {
786 	if (!ifclassq_enable_pacing || !ifclassq_enable_l4s) {
787 		return;
788 	}
789 	ASSERT(next_tx_time != FQ_INVALID_TX_TS);
790 
791 	struct ifnet *ifp = ifq->ifcq_ifp;
792 	ifnet_start_set_pacemaker_time(ifp, next_tx_time);
793 }
794 
795 static int
fq_if_dequeue_classq_multi_common(struct ifclassq * ifq,mbuf_svc_class_t svc,u_int32_t maxpktcnt,u_int32_t maxbytecnt,classq_pkt_t * first_packet,classq_pkt_t * last_packet,u_int32_t * retpktcnt,u_int32_t * retbytecnt,uint8_t grp_idx)796 fq_if_dequeue_classq_multi_common(struct ifclassq *ifq, mbuf_svc_class_t svc,
797     u_int32_t maxpktcnt, u_int32_t maxbytecnt, classq_pkt_t *first_packet,
798     classq_pkt_t *last_packet, u_int32_t *retpktcnt, u_int32_t *retbytecnt,
799     uint8_t grp_idx)
800 {
801 	uint32_t total_pktcnt = 0, total_bytecnt = 0;
802 	classq_pkt_t first = CLASSQ_PKT_INITIALIZER(fisrt);
803 	classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
804 	classq_pkt_t tmp = CLASSQ_PKT_INITIALIZER(tmp);
805 	fq_if_append_pkt_t append_pkt;
806 	flowq_dqlist_t fq_dqlist_head;
807 	fq_if_classq_t *fq_cl;
808 	fq_grp_tailq_t *grp_list, tmp_grp_list;
809 	fq_if_group_t *fq_grp = NULL;
810 	fq_if_t *fqs;
811 	uint64_t now, next_tx_time = FQ_INVALID_TX_TS;
812 	int pri = 0, svc_pri = 0;
813 	bool all_paced = true;
814 
815 	IFCQ_LOCK_ASSERT_HELD(ifq);
816 
817 	fqs = (fq_if_t *)ifq->ifcq_disc;
818 	STAILQ_INIT(&fq_dqlist_head);
819 
820 	switch (fqs->fqs_ptype) {
821 	case QP_MBUF:
822 		append_pkt = fq_if_append_mbuf;
823 		break;
824 
825 #if SKYWALK
826 	case QP_PACKET:
827 		append_pkt = fq_if_append_pkt;
828 		break;
829 #endif /* SKYWALK */
830 
831 	default:
832 		VERIFY(0);
833 		/* NOTREACHED */
834 		__builtin_unreachable();
835 	}
836 
837 	now = fq_codel_get_time();
838 	if (fqs->fqs_flags & FQS_DRIVER_MANAGED) {
839 		svc_pri = fq_if_service_to_priority(fqs, svc);
840 	} else {
841 		VERIFY(svc == MBUF_SC_UNSPEC);
842 	}
843 
844 	if (fq_if_is_grp_combined(fqs, grp_idx)) {
845 		grp_list = &fqs->fqs_combined_grp_list;
846 		VERIFY(!TAILQ_EMPTY(grp_list));
847 	} else {
848 		grp_list = &tmp_grp_list;
849 		fq_grp = fq_if_find_grp(fqs, grp_idx);
850 		TAILQ_INIT(grp_list);
851 		TAILQ_INSERT_TAIL(grp_list, fq_grp, fqg_grp_link);
852 	}
853 
854 	for (;;) {
855 		uint32_t pktcnt = 0, bytecnt = 0;
856 		classq_pkt_t head = CLASSQ_PKT_INITIALIZER(head);
857 		classq_pkt_t tail = CLASSQ_PKT_INITIALIZER(tail);
858 		bool fq_cl_all_paced = false;
859 		uint64_t fq_cl_next_tx_time = FQ_INVALID_TX_TS;
860 
861 		if (fqs->grp_bitmaps_zeros(grp_list, svc_pri, FQ_IF_ER) &&
862 		    fqs->grp_bitmaps_zeros(grp_list, svc_pri, FQ_IF_EB)) {
863 			fqs->grp_bitmaps_cpy(grp_list, svc_pri, FQ_IF_EB, FQ_IF_IB);
864 			fqs->grp_bitmaps_clr(grp_list, svc_pri, FQ_IF_IB);
865 			if (fqs->grp_bitmaps_zeros(grp_list, svc_pri, FQ_IF_EB)) {
866 				if (ifclassq_enable_pacing && ifclassq_enable_l4s) {
867 					/*
868 					 * Move fq_cl in IR back to ER, so that they will inspected with priority
869 					 * the next time the driver dequeues
870 					 */
871 					fqs->grp_bitmaps_cpy(grp_list, svc_pri, FQ_IF_ER, FQ_IF_IR);
872 					fqs->grp_bitmaps_clr(grp_list, svc_pri, FQ_IF_IR);
873 				}
874 				break;
875 			}
876 		}
877 		pri = fqs->grp_bitmaps_ffs(grp_list, svc_pri, FQ_IF_ER, &fq_grp);
878 		if (pri == 0) {
879 			/*
880 			 * There are no ER flows, move the highest
881 			 * priority one from EB if there are any in that
882 			 * category
883 			 */
884 			pri = fqs->grp_bitmaps_ffs(grp_list, svc_pri, FQ_IF_EB, &fq_grp);
885 			VERIFY(pri > 0);
886 			VERIFY(fq_grp != NULL);
887 			pktsched_bit_clr((pri - 1), &fq_grp->fqg_bitmaps[FQ_IF_EB]);
888 			pktsched_bit_set((pri - 1), &fq_grp->fqg_bitmaps[FQ_IF_ER]);
889 		}
890 		VERIFY(fq_grp != NULL);
891 		pri--; /* index starts at 0 */
892 		fq_cl = &fq_grp->fqg_classq[pri];
893 
894 		if (fq_cl->fcl_budget <= 0) {
895 			/* Update the budget */
896 			fq_cl->fcl_budget += (min(fq_cl->fcl_drr_max,
897 			    fq_cl->fcl_stat.fcl_flows_cnt) *
898 			    fq_cl->fcl_quantum);
899 			if (fq_cl->fcl_budget <= 0) {
900 				goto state_change;
901 			}
902 		}
903 		fq_if_dequeue(fqs, fq_cl, (maxpktcnt - total_pktcnt),
904 		    (maxbytecnt - total_bytecnt), &head, &tail, &pktcnt,
905 		    &bytecnt, &fq_dqlist_head, true, now, &fq_cl_all_paced,
906 		    &fq_cl_next_tx_time);
907 		if (head.cp_mbuf != NULL) {
908 			ASSERT(STAILQ_EMPTY(&fq_dqlist_head));
909 			if (first.cp_mbuf == NULL) {
910 				first = head;
911 			} else {
912 				ASSERT(last.cp_mbuf != NULL);
913 				append_pkt(&last, &head);
914 			}
915 			last = tail;
916 			append_pkt(&last, &tmp);
917 		}
918 		if (fq_cl_all_paced && fq_cl_next_tx_time < next_tx_time) {
919 			fq_cl->fcl_stat.fcl_fcl_pacemaker_needed++;
920 			next_tx_time = fq_cl_next_tx_time;
921 		}
922 		fq_cl->fcl_budget -= bytecnt;
923 		total_pktcnt += pktcnt;
924 		total_bytecnt += bytecnt;
925 
926 		/*
927 		 * If the class has exceeded the budget but still has data
928 		 * to send, move it to IB
929 		 */
930 state_change:
931 		VERIFY(fq_grp != NULL);
932 		all_paced &= fq_cl_all_paced;
933 		if (!FQ_IF_CLASSQ_IDLE(fq_cl)) {
934 			if (fq_cl->fcl_budget <= 0) {
935 				pktsched_bit_set(pri, &fq_grp->fqg_bitmaps[FQ_IF_IB]);
936 				pktsched_bit_clr(pri, &fq_grp->fqg_bitmaps[FQ_IF_ER]);
937 			} else if (fq_cl_all_paced) {
938 				if (ifclassq_enable_pacing && ifclassq_enable_l4s) {
939 					/*
940 					 * If a fq_cl still has budget but only paced queues, park it
941 					 * to IR so that we will not keep loopping over it
942 					 */
943 					pktsched_bit_set(pri, &fq_grp->fqg_bitmaps[FQ_IF_IR]);
944 					pktsched_bit_clr(pri, &fq_grp->fqg_bitmaps[FQ_IF_ER]);
945 				}
946 			}
947 		} else {
948 			pktsched_bit_clr(pri, &fq_grp->fqg_bitmaps[FQ_IF_ER]);
949 			VERIFY(((fq_grp->fqg_bitmaps[FQ_IF_ER] |
950 			    fq_grp->fqg_bitmaps[FQ_IF_EB] |
951 			    fq_grp->fqg_bitmaps[FQ_IF_IB]) & (1 << pri)) == 0);
952 			fq_cl->fcl_budget = 0;
953 		}
954 		if (total_pktcnt >= maxpktcnt || total_bytecnt >= maxbytecnt) {
955 			if (ifclassq_enable_pacing && ifclassq_enable_l4s) {
956 				/*
957 				 * Move fq_cl in IR back to ER, so that they will inspected with priority
958 				 * the next time the driver dequeues
959 				 */
960 				fqs->grp_bitmaps_move(grp_list, svc_pri, FQ_IF_ER, FQ_IF_IR);
961 			}
962 			break;
963 		}
964 	}
965 
966 	if (!fq_if_is_grp_combined(fqs, grp_idx)) {
967 		TAILQ_REMOVE(grp_list, fq_grp, fqg_grp_link);
968 		VERIFY(TAILQ_EMPTY(grp_list));
969 	}
970 
971 	fq_dqlist_get_packet_list(&fq_dqlist_head, &first, &last,
972 	    fqs->fqs_ptype);
973 
974 	if (__probable(first_packet != NULL)) {
975 		*first_packet = first;
976 	}
977 	if (last_packet != NULL) {
978 		*last_packet = last;
979 	}
980 	if (retpktcnt != NULL) {
981 		*retpktcnt = total_pktcnt;
982 	}
983 	if (retbytecnt != NULL) {
984 		*retbytecnt = total_bytecnt;
985 	}
986 	if (next_tx_time != FQ_INVALID_TX_TS) {
987 		ASSERT(next_tx_time > now);
988 		fq_if_schedule_pacemaker(ifq, next_tx_time);
989 	}
990 
991 	IFCQ_XMIT_ADD(ifq, total_pktcnt, total_bytecnt);
992 	fq_if_purge_empty_flow_list(fqs, now, false);
993 	return 0;
994 }
995 
996 int
fq_if_dequeue_classq_multi(struct ifclassq * ifq,u_int32_t maxpktcnt,u_int32_t maxbytecnt,classq_pkt_t * first_packet,classq_pkt_t * last_packet,u_int32_t * retpktcnt,u_int32_t * retbytecnt,uint8_t grp_idx)997 fq_if_dequeue_classq_multi(struct ifclassq *ifq, u_int32_t maxpktcnt,
998     u_int32_t maxbytecnt, classq_pkt_t *first_packet,
999     classq_pkt_t *last_packet, u_int32_t *retpktcnt,
1000     u_int32_t *retbytecnt, uint8_t grp_idx)
1001 {
1002 	return fq_if_dequeue_classq_multi_common(ifq, MBUF_SC_UNSPEC, maxpktcnt, maxbytecnt,
1003 	           first_packet, last_packet, retpktcnt, retbytecnt, grp_idx);
1004 }
1005 
1006 int
fq_if_dequeue_sc_classq_multi(struct ifclassq * ifq,mbuf_svc_class_t svc,u_int32_t maxpktcnt,u_int32_t maxbytecnt,classq_pkt_t * first_packet,classq_pkt_t * last_packet,u_int32_t * retpktcnt,u_int32_t * retbytecnt,uint8_t grp_idx)1007 fq_if_dequeue_sc_classq_multi(struct ifclassq *ifq, mbuf_svc_class_t svc,
1008     u_int32_t maxpktcnt, u_int32_t maxbytecnt, classq_pkt_t *first_packet,
1009     classq_pkt_t *last_packet, u_int32_t *retpktcnt, u_int32_t *retbytecnt,
1010     uint8_t grp_idx)
1011 {
1012 	fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
1013 
1014 	if (fq_if_is_grp_combined(fqs, grp_idx)) {
1015 		return fq_if_dequeue_classq_multi_common(ifq, svc, maxpktcnt, maxbytecnt,
1016 		           first_packet, last_packet, retpktcnt, retbytecnt, grp_idx);
1017 	} else {
1018 		/*
1019 		 * take a shortcut here since there is no need to schedule
1020 		 * one single service class.
1021 		 */
1022 		return fq_if_dequeue_sc_classq_multi_separate(ifq, svc, maxpktcnt, maxbytecnt,
1023 		           first_packet, last_packet, retpktcnt, retbytecnt, grp_idx);
1024 	}
1025 }
1026 
1027 static int
fq_if_dequeue_sc_classq_multi_separate(struct ifclassq * ifq,mbuf_svc_class_t svc,u_int32_t maxpktcnt,u_int32_t maxbytecnt,classq_pkt_t * first_packet,classq_pkt_t * last_packet,u_int32_t * retpktcnt,u_int32_t * retbytecnt,uint8_t grp_idx)1028 fq_if_dequeue_sc_classq_multi_separate(struct ifclassq *ifq, mbuf_svc_class_t svc,
1029     u_int32_t maxpktcnt, u_int32_t maxbytecnt, classq_pkt_t *first_packet,
1030     classq_pkt_t *last_packet, u_int32_t *retpktcnt, u_int32_t *retbytecnt,
1031     uint8_t grp_idx)
1032 {
1033 	fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
1034 	uint8_t pri;
1035 	u_int32_t total_pktcnt = 0, total_bytecnt = 0;
1036 	fq_if_classq_t *fq_cl;
1037 	classq_pkt_t first = CLASSQ_PKT_INITIALIZER(fisrt);
1038 	classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
1039 	fq_if_append_pkt_t append_pkt;
1040 	flowq_dqlist_t fq_dqlist_head;
1041 	fq_if_group_t *fq_grp;
1042 	uint64_t now;
1043 
1044 	switch (fqs->fqs_ptype) {
1045 	case QP_MBUF:
1046 		append_pkt = fq_if_append_mbuf;
1047 		break;
1048 
1049 #if SKYWALK
1050 	case QP_PACKET:
1051 		append_pkt = fq_if_append_pkt;
1052 		break;
1053 #endif /* SKYWALK */
1054 
1055 	default:
1056 		VERIFY(0);
1057 		/* NOTREACHED */
1058 		__builtin_unreachable();
1059 	}
1060 
1061 	STAILQ_INIT(&fq_dqlist_head);
1062 	now = fq_codel_get_time();
1063 
1064 	pri = fq_if_service_to_priority(fqs, svc);
1065 	fq_grp = fq_if_find_grp(fqs, grp_idx);
1066 	fq_cl = &fq_grp->fqg_classq[pri];
1067 
1068 	/*
1069 	 * Now we have the queue for a particular service class. We need
1070 	 * to dequeue as many packets as needed, first from the new flows
1071 	 * and then from the old flows.
1072 	 */
1073 	while (total_pktcnt < maxpktcnt && total_bytecnt < maxbytecnt &&
1074 	    fq_cl->fcl_stat.fcl_pkt_cnt > 0) {
1075 		classq_pkt_t head = CLASSQ_PKT_INITIALIZER(head);
1076 		classq_pkt_t tail = CLASSQ_PKT_INITIALIZER(tail);
1077 		u_int32_t pktcnt = 0, bytecnt = 0;
1078 		bool all_paced = false;
1079 		uint64_t next_tx_time = FQ_INVALID_TX_TS;
1080 
1081 		fq_if_dequeue(fqs, fq_cl, (maxpktcnt - total_pktcnt),
1082 		    (maxbytecnt - total_bytecnt), &head, &tail, &pktcnt,
1083 		    &bytecnt, &fq_dqlist_head, false, now, &all_paced, &next_tx_time);
1084 		if (head.cp_mbuf != NULL) {
1085 			if (first.cp_mbuf == NULL) {
1086 				first = head;
1087 			} else {
1088 				ASSERT(last.cp_mbuf != NULL);
1089 				append_pkt(&last, &head);
1090 			}
1091 			last = tail;
1092 		}
1093 		total_pktcnt += pktcnt;
1094 		total_bytecnt += bytecnt;
1095 
1096 		if (next_tx_time != FQ_INVALID_TX_TS) {
1097 			ASSERT(next_tx_time > now);
1098 			fq_cl->fcl_stat.fcl_fcl_pacemaker_needed++;
1099 			fq_if_schedule_pacemaker(ifq, next_tx_time);
1100 			break;
1101 		}
1102 	}
1103 
1104 	/*
1105 	 * Mark classq as IB if it's not idle, so that we can
1106 	 * start without re-init the bitmaps when it's switched
1107 	 * to combined mode.
1108 	 */
1109 	if (!FQ_IF_CLASSQ_IDLE(fq_cl)) {
1110 		pktsched_bit_set(pri, &fq_grp->fqg_bitmaps[FQ_IF_IB]);
1111 		pktsched_bit_clr(pri, &fq_grp->fqg_bitmaps[FQ_IF_ER]);
1112 		pktsched_bit_clr(pri, &fq_grp->fqg_bitmaps[FQ_IF_EB]);
1113 	} else {
1114 		pktsched_bit_clr(pri, &fq_grp->fqg_bitmaps[FQ_IF_IB]);
1115 		VERIFY(((fq_grp->fqg_bitmaps[FQ_IF_ER] |
1116 		    fq_grp->fqg_bitmaps[FQ_IF_EB] |
1117 		    fq_grp->fqg_bitmaps[FQ_IF_IB]) & (1 << pri)) == 0);
1118 	}
1119 
1120 	fq_dqlist_get_packet_list(&fq_dqlist_head, &first, &last, fqs->fqs_ptype);
1121 
1122 	if (__probable(first_packet != NULL)) {
1123 		*first_packet = first;
1124 	}
1125 	if (last_packet != NULL) {
1126 		*last_packet = last;
1127 	}
1128 	if (retpktcnt != NULL) {
1129 		*retpktcnt = total_pktcnt;
1130 	}
1131 	if (retbytecnt != NULL) {
1132 		*retbytecnt = total_bytecnt;
1133 	}
1134 
1135 	IFCQ_XMIT_ADD(ifq, total_pktcnt, total_bytecnt);
1136 	fq_if_purge_empty_flow_list(fqs, now, false);
1137 	return 0;
1138 }
1139 
1140 static void
fq_if_purge_flow(fq_if_t * fqs,fq_t * fq,uint32_t * pktsp,uint32_t * bytesp,uint64_t now)1141 fq_if_purge_flow(fq_if_t *fqs, fq_t *fq, uint32_t *pktsp,
1142     uint32_t *bytesp, uint64_t now)
1143 {
1144 	fq_if_classq_t *fq_cl;
1145 	u_int32_t pkts, bytes;
1146 	pktsched_pkt_t pkt;
1147 	fq_if_group_t *grp;
1148 
1149 	fq_cl = &FQ_CLASSQ(fq);
1150 	grp = FQ_GROUP(fq);
1151 	pkts = bytes = 0;
1152 	_PKTSCHED_PKT_INIT(&pkt);
1153 	for (;;) {
1154 		fq_getq_flow(fqs, fq, &pkt, now);
1155 		if (pkt.pktsched_pkt_mbuf == NULL) {
1156 			VERIFY(pkt.pktsched_ptype == QP_INVALID);
1157 			break;
1158 		}
1159 		pkts++;
1160 		bytes += pktsched_get_pkt_len(&pkt);
1161 		pktsched_free_pkt(&pkt);
1162 	}
1163 	KDBG(AQM_KTRACE_STATS_FLOW_DEQUEUE, fq->fq_flowhash,
1164 	    AQM_KTRACE_FQ_GRP_SC_IDX(fq), fq->fq_bytes, fq->fq_min_qdelay);
1165 
1166 	IFCQ_DROP_ADD(fqs->fqs_ifq, pkts, bytes);
1167 
1168 	/* move through the flow queue states */
1169 	VERIFY((fq->fq_flags & (FQF_NEW_FLOW | FQF_OLD_FLOW | FQF_EMPTY_FLOW)));
1170 	if (fq->fq_flags & FQF_NEW_FLOW) {
1171 		fq_if_empty_new_flow(fq, fq_cl);
1172 	}
1173 	if (fq->fq_flags & FQF_OLD_FLOW) {
1174 		fq_if_empty_old_flow(fqs, fq_cl, fq, now);
1175 	}
1176 	if (fq->fq_flags & FQF_EMPTY_FLOW) {
1177 		fq_if_purge_empty_flow(fqs, fq);
1178 		fq = NULL;
1179 	}
1180 
1181 	if (FQ_IF_CLASSQ_IDLE(fq_cl)) {
1182 		int i;
1183 		for (i = FQ_IF_ER; i < FQ_IF_MAX_STATE; i++) {
1184 			pktsched_bit_clr(fq_cl->fcl_pri, &grp->fqg_bitmaps[i]);
1185 		}
1186 	}
1187 
1188 	if (pktsp != NULL) {
1189 		*pktsp = pkts;
1190 	}
1191 	if (bytesp != NULL) {
1192 		*bytesp = bytes;
1193 	}
1194 }
1195 
1196 static void
fq_if_purge_classq(fq_if_t * fqs,fq_if_classq_t * fq_cl)1197 fq_if_purge_classq(fq_if_t *fqs, fq_if_classq_t *fq_cl)
1198 {
1199 	fq_t *fq, *tfq;
1200 	uint64_t now;
1201 
1202 	now = fq_codel_get_time();
1203 	/*
1204 	 * Take each flow from new/old flow list and flush mbufs
1205 	 * in that flow
1206 	 */
1207 	STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_new_flows, fq_actlink, tfq) {
1208 		fq_if_purge_flow(fqs, fq, NULL, NULL, now);
1209 	}
1210 	STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_old_flows, fq_actlink, tfq) {
1211 		fq_if_purge_flow(fqs, fq, NULL, NULL, now);
1212 	}
1213 	VERIFY(STAILQ_EMPTY(&fq_cl->fcl_new_flows));
1214 	VERIFY(STAILQ_EMPTY(&fq_cl->fcl_old_flows));
1215 
1216 	STAILQ_INIT(&fq_cl->fcl_new_flows);
1217 	STAILQ_INIT(&fq_cl->fcl_old_flows);
1218 	fq_cl->fcl_budget = 0;
1219 }
1220 
1221 static void
fq_if_purge(fq_if_t * fqs)1222 fq_if_purge(fq_if_t *fqs)
1223 {
1224 	uint64_t now;
1225 	fq_if_group_t *grp;
1226 	int i;
1227 
1228 	IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
1229 	for (uint8_t grp_idx = 0; grp_idx < FQ_IF_MAX_GROUPS; grp_idx++) {
1230 		if (fqs->fqs_classq_groups[grp_idx] == NULL) {
1231 			continue;
1232 		}
1233 
1234 		grp = fq_if_find_grp(fqs, grp_idx);
1235 		fq_if_purge_grp(fqs, grp);
1236 	}
1237 
1238 	now = fq_codel_get_time();
1239 	fq_if_purge_empty_flow_list(fqs, now, true);
1240 
1241 	VERIFY(STAILQ_EMPTY(&fqs->fqs_fclist));
1242 	VERIFY(TAILQ_EMPTY(&fqs->fqs_empty_list));
1243 
1244 	fqs->fqs_large_flow = NULL;
1245 	for (i = 0; i < FQ_IF_HASH_TABLE_SIZE; i++) {
1246 		VERIFY(SLIST_EMPTY(&fqs->fqs_flows[i]));
1247 	}
1248 
1249 	IFCQ_LEN(fqs->fqs_ifq) = 0;
1250 	IFCQ_BYTES(fqs->fqs_ifq) = 0;
1251 }
1252 
1253 static void
fq_if_purge_sc(fq_if_t * fqs,cqrq_purge_sc_t * req)1254 fq_if_purge_sc(fq_if_t *fqs, cqrq_purge_sc_t *req)
1255 {
1256 	fq_t *fq;
1257 	uint64_t now;
1258 	fq_if_group_t *grp;
1259 
1260 	IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq);
1261 	req->packets = req->bytes = 0;
1262 	VERIFY(req->flow != 0);
1263 
1264 	now = fq_codel_get_time();
1265 
1266 	for (uint8_t grp_idx = 0; grp_idx < FQ_IF_MAX_GROUPS; grp_idx++) {
1267 		if (fqs->fqs_classq_groups[grp_idx] == NULL) {
1268 			continue;
1269 		}
1270 		uint32_t bytes = 0, pkts = 0;
1271 
1272 		grp = fq_if_find_grp(fqs, grp_idx);
1273 		/*
1274 		 * Packet and traffic type are needed only if we want
1275 		 * to create a flow queue.
1276 		 */
1277 		fq = fq_if_hash_pkt(fqs, grp, req->flow, req->sc, 0, false, FQ_TFC_C);
1278 		if (fq != NULL) {
1279 			fq_if_purge_flow(fqs, fq, &pkts, &bytes, now);
1280 			req->bytes += bytes;
1281 			req->packets += pkts;
1282 		}
1283 	}
1284 }
1285 
1286 static uint16_t
fq_if_calc_quantum(struct ifnet * ifp)1287 fq_if_calc_quantum(struct ifnet *ifp)
1288 {
1289 	uint16_t quantum;
1290 
1291 	switch (ifp->if_family) {
1292 	case IFNET_FAMILY_ETHERNET:
1293 		VERIFY((ifp->if_mtu + ETHER_HDR_LEN) <= UINT16_MAX);
1294 		quantum = (uint16_t)ifp->if_mtu + ETHER_HDR_LEN;
1295 		break;
1296 
1297 	case IFNET_FAMILY_CELLULAR:
1298 	case IFNET_FAMILY_IPSEC:
1299 	case IFNET_FAMILY_UTUN:
1300 		VERIFY(ifp->if_mtu <= UINT16_MAX);
1301 		quantum = (uint16_t)ifp->if_mtu;
1302 		break;
1303 
1304 	default:
1305 		quantum = FQ_CODEL_DEFAULT_QUANTUM;
1306 		break;
1307 	}
1308 
1309 	if ((ifp->if_hwassist & IFNET_TSOF) != 0) {
1310 		VERIFY(ifp->if_tso_v4_mtu <= UINT16_MAX);
1311 		VERIFY(ifp->if_tso_v6_mtu <= UINT16_MAX);
1312 		quantum = (uint16_t)MAX(ifp->if_tso_v4_mtu, ifp->if_tso_v6_mtu);
1313 		quantum = (quantum != 0) ? quantum : IF_MAXMTU;
1314 	}
1315 
1316 	quantum = MAX(FQ_CODEL_DEFAULT_QUANTUM, quantum);
1317 #if DEBUG || DEVELOPMENT
1318 	quantum = (fq_codel_quantum != 0) ? fq_codel_quantum : quantum;
1319 #endif /* DEBUG || DEVELOPMENT */
1320 	VERIFY(quantum != 0);
1321 	return quantum;
1322 }
1323 
1324 static void
fq_if_mtu_update(fq_if_t * fqs)1325 fq_if_mtu_update(fq_if_t *fqs)
1326 {
1327 #define _FQ_CLASSQ_UPDATE_QUANTUM(_grp, _s, _q)                     \
1328 	(_grp)->fqg_classq[FQ_IF_ ## _s ## _INDEX].fcl_quantum =        \
1329 	    FQ_CODEL_QUANTUM_ ## _s(_q)                                 \
1330 
1331 	uint32_t quantum;
1332 	fq_if_group_t *grp;
1333 
1334 	quantum = fq_if_calc_quantum(fqs->fqs_ifq->ifcq_ifp);
1335 
1336 	for (uint8_t grp_idx = 0; grp_idx < FQ_IF_MAX_GROUPS; grp_idx++) {
1337 		if (fqs->fqs_classq_groups[grp_idx] == NULL) {
1338 			continue;
1339 		}
1340 
1341 		grp = fq_if_find_grp(fqs, grp_idx);
1342 
1343 		if ((fqs->fqs_flags & FQS_DRIVER_MANAGED) != 0) {
1344 			_FQ_CLASSQ_UPDATE_QUANTUM(grp, BK, quantum);
1345 			_FQ_CLASSQ_UPDATE_QUANTUM(grp, BE, quantum);
1346 			_FQ_CLASSQ_UPDATE_QUANTUM(grp, VI, quantum);
1347 			_FQ_CLASSQ_UPDATE_QUANTUM(grp, VO, quantum);
1348 		} else {
1349 			_FQ_CLASSQ_UPDATE_QUANTUM(grp, BK_SYS, quantum);
1350 			_FQ_CLASSQ_UPDATE_QUANTUM(grp, BK, quantum);
1351 			_FQ_CLASSQ_UPDATE_QUANTUM(grp, BE, quantum);
1352 			_FQ_CLASSQ_UPDATE_QUANTUM(grp, RD, quantum);
1353 			_FQ_CLASSQ_UPDATE_QUANTUM(grp, OAM, quantum);
1354 			_FQ_CLASSQ_UPDATE_QUANTUM(grp, AV, quantum);
1355 			_FQ_CLASSQ_UPDATE_QUANTUM(grp, RV, quantum);
1356 			_FQ_CLASSQ_UPDATE_QUANTUM(grp, VI, quantum);
1357 			_FQ_CLASSQ_UPDATE_QUANTUM(grp, VO, quantum);
1358 			_FQ_CLASSQ_UPDATE_QUANTUM(grp, CTL, quantum);
1359 		}
1360 	}
1361 #undef _FQ_CLASSQ_UPDATE_QUANTUM
1362 }
1363 
1364 static void
fq_if_event(fq_if_t * fqs,cqev_t ev)1365 fq_if_event(fq_if_t *fqs, cqev_t ev)
1366 {
1367 	IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq);
1368 
1369 	switch (ev) {
1370 	case CLASSQ_EV_LINK_UP:
1371 	case CLASSQ_EV_LINK_DOWN:
1372 		fq_if_purge(fqs);
1373 		break;
1374 	case CLASSQ_EV_LINK_MTU:
1375 		fq_if_mtu_update(fqs);
1376 		break;
1377 	default:
1378 		break;
1379 	}
1380 }
1381 
1382 static void
fq_if_classq_suspend(fq_if_t * fqs,fq_if_classq_t * fq_cl)1383 fq_if_classq_suspend(fq_if_t *fqs, fq_if_classq_t *fq_cl)
1384 {
1385 	fq_if_purge_classq(fqs, fq_cl);
1386 	fqs->fqs_throttle = 1;
1387 	fq_cl->fcl_stat.fcl_throttle_on++;
1388 	KDBG(AQM_KTRACE_AON_THROTTLE | DBG_FUNC_START,
1389 	    fqs->fqs_ifq->ifcq_ifp->if_index, 0, 0, 0);
1390 }
1391 
1392 static void
fq_if_classq_resume(fq_if_t * fqs,fq_if_classq_t * fq_cl)1393 fq_if_classq_resume(fq_if_t *fqs, fq_if_classq_t *fq_cl)
1394 {
1395 	VERIFY(FQ_IF_CLASSQ_IDLE(fq_cl));
1396 	fqs->fqs_throttle = 0;
1397 	fq_cl->fcl_stat.fcl_throttle_off++;
1398 	KDBG(AQM_KTRACE_AON_THROTTLE | DBG_FUNC_END,
1399 	    fqs->fqs_ifq->ifcq_ifp->if_index, 0, 0, 0);
1400 }
1401 
1402 
1403 static int
fq_if_throttle(fq_if_t * fqs,cqrq_throttle_t * tr)1404 fq_if_throttle(fq_if_t *fqs, cqrq_throttle_t *tr)
1405 {
1406 	struct ifclassq *ifq = fqs->fqs_ifq;
1407 	uint8_t index;
1408 	fq_if_group_t *grp;
1409 
1410 #if !MACH_ASSERT
1411 #pragma unused(ifq)
1412 #endif
1413 	IFCQ_LOCK_ASSERT_HELD(ifq);
1414 
1415 	if (!tr->set) {
1416 		tr->level = fqs->fqs_throttle;
1417 		return 0;
1418 	}
1419 
1420 	if (tr->level == fqs->fqs_throttle) {
1421 		return EALREADY;
1422 	}
1423 
1424 	/* Throttling is allowed on BK_SYS class only */
1425 	index = fq_if_service_to_priority(fqs, MBUF_SC_BK_SYS);
1426 
1427 	for (uint8_t grp_idx = 0; grp_idx < FQ_IF_MAX_GROUPS; grp_idx++) {
1428 		if (fqs->fqs_classq_groups[grp_idx] == NULL) {
1429 			continue;
1430 		}
1431 		grp = fq_if_find_grp(fqs, grp_idx);
1432 		switch (tr->level) {
1433 		case IFNET_THROTTLE_OFF:
1434 			fq_if_classq_resume(fqs, &grp->fqg_classq[index]);
1435 			break;
1436 		case IFNET_THROTTLE_OPPORTUNISTIC:
1437 			fq_if_classq_suspend(fqs, &grp->fqg_classq[index]);
1438 			break;
1439 		default:
1440 			break;
1441 		}
1442 	}
1443 	return 0;
1444 }
1445 
1446 static inline boolean_t
fq_if_is_fq_cl_paced(fq_if_classq_t * fq_cl,uint64_t now)1447 fq_if_is_fq_cl_paced(fq_if_classq_t *fq_cl, uint64_t now)
1448 {
1449 	if ((fq_cl->fcl_flags & FCL_PACED) != 0 && fq_cl->fcl_next_tx_time > now) {
1450 		return true;
1451 	}
1452 
1453 	fq_cl->fcl_flags &= ~FCL_PACED;
1454 	fq_cl->fcl_next_tx_time = 0;
1455 	return false;
1456 }
1457 
1458 static void
fq_if_grp_stat_sc(fq_if_t * fqs,fq_if_group_t * grp,cqrq_stat_sc_t * stat,uint64_t now)1459 fq_if_grp_stat_sc(fq_if_t *fqs, fq_if_group_t *grp, cqrq_stat_sc_t *stat, uint64_t now)
1460 {
1461 	uint8_t pri;
1462 	fq_if_classq_t *fq_cl;
1463 
1464 	ASSERT(stat != NULL);
1465 	pri = fq_if_service_to_priority(fqs, stat->sc);
1466 
1467 	fq_cl = &grp->fqg_classq[pri];
1468 	stat->packets = (uint32_t)fq_cl->fcl_stat.fcl_pkt_cnt;
1469 	stat->bytes = (uint32_t)fq_cl->fcl_stat.fcl_byte_cnt;
1470 
1471 	if (ifclassq_enable_pacing && ifclassq_enable_l4s &&
1472 	    fq_if_is_fq_cl_paced(fq_cl, now)) {
1473 		stat->packets = 0;
1474 		stat->bytes = 0;
1475 	}
1476 }
1477 
1478 static boolean_t
fq_if_is_grp_all_paced(fq_if_group_t * grp)1479 fq_if_is_grp_all_paced(fq_if_group_t *grp)
1480 {
1481 	fq_if_classq_t *fq_cl;
1482 	uint64_t now;
1483 
1484 	if (!ifclassq_enable_pacing || !ifclassq_enable_l4s) {
1485 		return false;
1486 	}
1487 
1488 	now = fq_codel_get_time();
1489 	for (uint8_t fq_cl_idx = 0; fq_cl_idx < FQ_IF_MAX_CLASSES; fq_cl_idx++) {
1490 		fq_cl = &grp->fqg_classq[fq_cl_idx];
1491 		if (fq_cl == NULL || FQ_IF_CLASSQ_IDLE(fq_cl)) {
1492 			continue;
1493 		}
1494 		if (!fq_if_is_fq_cl_paced(fq_cl, now)) {
1495 			return false;
1496 		}
1497 	}
1498 
1499 	return true;
1500 }
1501 
1502 boolean_t
fq_if_is_all_paced(struct ifclassq * ifq)1503 fq_if_is_all_paced(struct ifclassq *ifq)
1504 {
1505 	fq_if_group_t *grp;
1506 	fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
1507 
1508 	IFCQ_LOCK_ASSERT_HELD(ifq);
1509 
1510 	if (!ifclassq_enable_pacing || !ifclassq_enable_l4s) {
1511 		return false;
1512 	}
1513 
1514 	for (uint8_t grp_idx = 0; grp_idx < FQ_IF_MAX_GROUPS; grp_idx++) {
1515 		grp = fqs->fqs_classq_groups[grp_idx];
1516 		if (grp == NULL || FQG_BYTES(grp) == 0) {
1517 			continue;
1518 		}
1519 
1520 		if (!fq_if_is_grp_all_paced(grp)) {
1521 			return false;
1522 		}
1523 	}
1524 
1525 	return true;
1526 }
1527 
1528 void
fq_if_stat_sc(fq_if_t * fqs,cqrq_stat_sc_t * stat)1529 fq_if_stat_sc(fq_if_t *fqs, cqrq_stat_sc_t *stat)
1530 {
1531 	cqrq_stat_sc_t grp_sc_stat;
1532 	fq_if_group_t *grp;
1533 	uint64_t now = fq_codel_get_time();
1534 
1535 	if (stat == NULL) {
1536 		return;
1537 	}
1538 	grp_sc_stat.sc = stat->sc;
1539 	stat->packets = 0;
1540 	stat->bytes = 0;
1541 
1542 	if (stat->grp_idx == IF_CLASSQ_ALL_GRPS) {
1543 		if (stat->sc == MBUF_SC_UNSPEC) {
1544 			if (!fq_if_is_all_paced(fqs->fqs_ifq)) {
1545 				stat->packets = IFCQ_LEN(fqs->fqs_ifq);
1546 				stat->bytes = IFCQ_BYTES(fqs->fqs_ifq);
1547 			}
1548 		} else {
1549 			for (uint8_t grp_idx = 0; grp_idx < FQ_IF_MAX_GROUPS; grp_idx++) {
1550 				grp = fqs->fqs_classq_groups[grp_idx];
1551 				if (grp == NULL) {
1552 					continue;
1553 				}
1554 
1555 				fq_if_grp_stat_sc(fqs, grp, &grp_sc_stat, now);
1556 				stat->packets += grp_sc_stat.packets;
1557 				stat->bytes += grp_sc_stat.bytes;
1558 			}
1559 		}
1560 		return;
1561 	}
1562 
1563 	if (stat->sc == MBUF_SC_UNSPEC) {
1564 		if (fq_if_is_grp_combined(fqs, stat->grp_idx)) {
1565 			TAILQ_FOREACH(grp, &fqs->fqs_combined_grp_list, fqg_grp_link) {
1566 				if (fq_if_is_grp_all_paced(grp)) {
1567 					continue;
1568 				}
1569 				stat->packets += FQG_LEN(grp);
1570 				stat->bytes += FQG_BYTES(grp);
1571 			}
1572 		} else {
1573 			grp = fq_if_find_grp(fqs, stat->grp_idx);
1574 			if (!fq_if_is_grp_all_paced(grp)) {
1575 				stat->packets = FQG_LEN(grp);
1576 				stat->bytes = FQG_BYTES(grp);
1577 			}
1578 		}
1579 	} else {
1580 		if (fq_if_is_grp_combined(fqs, stat->grp_idx)) {
1581 			TAILQ_FOREACH(grp, &fqs->fqs_combined_grp_list, fqg_grp_link) {
1582 				if (fq_if_is_grp_all_paced(grp)) {
1583 					continue;
1584 				}
1585 				fq_if_grp_stat_sc(fqs, grp, &grp_sc_stat, now);
1586 				stat->packets += grp_sc_stat.packets;
1587 				stat->bytes += grp_sc_stat.bytes;
1588 			}
1589 		} else {
1590 			grp = fq_if_find_grp(fqs, stat->grp_idx);
1591 			fq_if_grp_stat_sc(fqs, grp, stat, now);
1592 		}
1593 	}
1594 }
1595 
1596 int
fq_if_request_classq(struct ifclassq * ifq,cqrq_t rq,void * arg)1597 fq_if_request_classq(struct ifclassq *ifq, cqrq_t rq, void *arg)
1598 {
1599 	int err = 0;
1600 	fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
1601 
1602 	IFCQ_LOCK_ASSERT_HELD(ifq);
1603 
1604 	/*
1605 	 * These are usually slow operations, convert the lock ahead of time
1606 	 */
1607 	IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
1608 	switch (rq) {
1609 	case CLASSQRQ_PURGE:
1610 		fq_if_purge(fqs);
1611 		break;
1612 	case CLASSQRQ_PURGE_SC:
1613 		fq_if_purge_sc(fqs, (cqrq_purge_sc_t *)arg);
1614 		break;
1615 	case CLASSQRQ_EVENT:
1616 		fq_if_event(fqs, (cqev_t)arg);
1617 		break;
1618 	case CLASSQRQ_THROTTLE:
1619 		fq_if_throttle(fqs, (cqrq_throttle_t *)arg);
1620 		break;
1621 	case CLASSQRQ_STAT_SC:
1622 		fq_if_stat_sc(fqs, (cqrq_stat_sc_t *)arg);
1623 		break;
1624 	}
1625 	return err;
1626 }
1627 
1628 int
fq_if_setup_ifclassq(struct ifclassq * ifq,u_int32_t flags,classq_pkt_type_t ptype)1629 fq_if_setup_ifclassq(struct ifclassq *ifq, u_int32_t flags,
1630     classq_pkt_type_t ptype)
1631 {
1632 	fq_if_t *fqs = NULL;
1633 	int err = 0;
1634 
1635 	IFCQ_LOCK_ASSERT_HELD(ifq);
1636 	VERIFY(ifq->ifcq_disc == NULL);
1637 	VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
1638 
1639 	fqs = fq_if_alloc(ifq, ptype);
1640 	if (fqs == NULL) {
1641 		return ENOMEM;
1642 	}
1643 	if (flags & PKTSCHEDF_QALG_DRIVER_MANAGED) {
1644 		fqs->fqs_flags |= FQS_DRIVER_MANAGED;
1645 		fqs->fqs_bm_ops = &fq_if_grps_sc_bitmap_ops;
1646 	} else {
1647 		fqs->fqs_bm_ops = &fq_if_grps_bitmap_ops;
1648 	}
1649 
1650 	err = ifclassq_attach(ifq, PKTSCHEDT_FQ_CODEL, fqs);
1651 	if (err != 0) {
1652 		os_log_error(OS_LOG_DEFAULT, "%s: error from ifclassq_attach, "
1653 		    "failed to attach fq_if: %d\n", __func__, err);
1654 		fq_if_destroy(fqs);
1655 		return err;
1656 	}
1657 
1658 	/*
1659 	 * Always create one group. If qset 0 is added later,
1660 	 * this group will be updated.
1661 	 */
1662 	err = fq_if_create_grp(ifq, 0, IF_CLASSQ_DEF);
1663 	if (err != 0) {
1664 		os_log_error(OS_LOG_DEFAULT, "%s: error from fq_if_create_grp, "
1665 		    "failed to create a fq group: %d\n", __func__, err);
1666 		fq_if_destroy(fqs);
1667 	}
1668 
1669 	return err;
1670 }
1671 
1672 fq_t *
fq_if_hash_pkt(fq_if_t * fqs,fq_if_group_t * fq_grp,u_int32_t flowid,mbuf_svc_class_t svc_class,u_int64_t now,bool create,fq_tfc_type_t tfc_type)1673 fq_if_hash_pkt(fq_if_t *fqs, fq_if_group_t *fq_grp, u_int32_t flowid,
1674     mbuf_svc_class_t svc_class, u_int64_t now, bool create,
1675     fq_tfc_type_t tfc_type)
1676 {
1677 	fq_t *fq = NULL;
1678 	flowq_list_t *fq_list;
1679 	fq_if_classq_t *fq_cl;
1680 	u_int8_t fqs_hash_id;
1681 	u_int8_t scidx;
1682 
1683 	scidx = fq_if_service_to_priority(fqs, svc_class);
1684 
1685 	fqs_hash_id = FQ_IF_FLOW_HASH_ID(flowid);
1686 
1687 	fq_list = &fqs->fqs_flows[fqs_hash_id];
1688 
1689 	SLIST_FOREACH(fq, fq_list, fq_hashlink) {
1690 		if (fq->fq_flowhash == flowid &&
1691 		    fq->fq_sc_index == scidx &&
1692 		    fq->fq_tfc_type == tfc_type &&
1693 		    fq->fq_group == fq_grp) {
1694 			break;
1695 		}
1696 	}
1697 	if (fq == NULL && create) {
1698 		/* If the flow is not already on the list, allocate it */
1699 		IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
1700 		fq = fq_alloc(fqs->fqs_ptype);
1701 		if (fq != NULL) {
1702 			fq->fq_flowhash = flowid;
1703 			fq->fq_sc_index = scidx;
1704 			fq->fq_group = fq_grp;
1705 			fq->fq_tfc_type = tfc_type;
1706 			fq_cl = &FQ_CLASSQ(fq);
1707 			fq->fq_flags = (FQF_FLOWCTL_CAPABLE | FQF_FRESH_FLOW);
1708 			fq->fq_updatetime = now + FQ_UPDATE_INTERVAL(fq);
1709 			fq->fq_next_tx_time = FQ_INVALID_TX_TS;
1710 			SLIST_INSERT_HEAD(fq_list, fq, fq_hashlink);
1711 			fq_cl->fcl_stat.fcl_flows_cnt++;
1712 		}
1713 		KDBG(AQM_KTRACE_STATS_FLOW_ALLOC,
1714 		    fqs->fqs_ifq->ifcq_ifp->if_index, fq->fq_flowhash,
1715 		    AQM_KTRACE_FQ_GRP_SC_IDX(fq), 0);
1716 	} else if ((fq != NULL) && (fq->fq_flags & FQF_EMPTY_FLOW)) {
1717 		fq_if_reuse_empty_flow(fqs, fq, now);
1718 	}
1719 
1720 	/*
1721 	 * If getq time is not set because this is the first packet or after
1722 	 * idle time, set it now so that we can detect a stall.
1723 	 */
1724 	if (fq != NULL && fq->fq_getqtime == 0) {
1725 		fq->fq_getqtime = now;
1726 	}
1727 
1728 	return fq;
1729 }
1730 
1731 void
fq_if_destroy_flow(fq_if_t * fqs,fq_if_classq_t * fq_cl,fq_t * fq)1732 fq_if_destroy_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq)
1733 {
1734 	u_int8_t hash_id;
1735 
1736 	ASSERT((fq->fq_flags & FQF_EMPTY_FLOW) == 0);
1737 	hash_id = FQ_IF_FLOW_HASH_ID(fq->fq_flowhash);
1738 	SLIST_REMOVE(&fqs->fqs_flows[hash_id], fq, flowq,
1739 	    fq_hashlink);
1740 	IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
1741 	if (__improbable(fq->fq_flags & FQF_FLOWCTL_ON)) {
1742 		fq_if_flow_feedback(fqs, fq, fq_cl);
1743 	}
1744 	KDBG(AQM_KTRACE_STATS_FLOW_DESTROY,
1745 	    fqs->fqs_ifq->ifcq_ifp->if_index, fq->fq_flowhash,
1746 	    AQM_KTRACE_FQ_GRP_SC_IDX(fq), 0);
1747 	fq_destroy(fq, fqs->fqs_ptype);
1748 }
1749 
1750 inline boolean_t
fq_if_at_drop_limit(fq_if_t * fqs)1751 fq_if_at_drop_limit(fq_if_t *fqs)
1752 {
1753 	return (IFCQ_LEN(fqs->fqs_ifq) >= fqs->fqs_pkt_droplimit) ?
1754 	       TRUE : FALSE;
1755 }
1756 
1757 inline boolean_t
fq_if_almost_at_drop_limit(fq_if_t * fqs)1758 fq_if_almost_at_drop_limit(fq_if_t *fqs)
1759 {
1760 	/*
1761 	 * Whether we are above 90% of the queue limit. This is used to tell if we
1762 	 * can stop flow controlling the largest flow.
1763 	 */
1764 	return IFCQ_LEN(fqs->fqs_ifq) >= fqs->fqs_pkt_droplimit * 9 / 10;
1765 }
1766 
1767 static inline void
fq_if_reuse_empty_flow(fq_if_t * fqs,fq_t * fq,uint64_t now)1768 fq_if_reuse_empty_flow(fq_if_t *fqs, fq_t *fq, uint64_t now)
1769 {
1770 	ASSERT(fq->fq_flags & FQF_EMPTY_FLOW);
1771 	TAILQ_REMOVE(&fqs->fqs_empty_list, fq, fq_empty_link);
1772 	STAILQ_NEXT(fq, fq_actlink) = NULL;
1773 	fq->fq_flags &= ~FQF_FLOW_STATE_MASK;
1774 	fq->fq_empty_purge_time = 0;
1775 	fq->fq_getqtime = 0;
1776 	fq->fq_updatetime = now + FQ_UPDATE_INTERVAL(fq);
1777 	fqs->fqs_empty_list_cnt--;
1778 	fq_if_classq_t *fq_cl = &FQ_CLASSQ(fq);
1779 	fq_cl->fcl_stat.fcl_flows_cnt++;
1780 }
1781 
1782 inline void
fq_if_move_to_empty_flow(fq_if_t * fqs,fq_if_classq_t * fq_cl,fq_t * fq,uint64_t now)1783 fq_if_move_to_empty_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq,
1784     uint64_t now)
1785 {
1786 	ASSERT(fq->fq_flags & ~(FQF_NEW_FLOW | FQF_OLD_FLOW | FQF_FLOWCTL_ON));
1787 	fq->fq_empty_purge_time = now + fq_empty_purge_delay;
1788 	TAILQ_INSERT_TAIL(&fqs->fqs_empty_list, fq, fq_empty_link);
1789 	fq->fq_flags |= FQF_EMPTY_FLOW;
1790 	FQ_CLEAR_OVERWHELMING(fq);
1791 	fqs->fqs_empty_list_cnt++;
1792 	/*
1793 	 * fcl_flows_cnt is used in budget determination for the class.
1794 	 * empty flow shouldn't contribute to the budget.
1795 	 */
1796 	fq_cl->fcl_stat.fcl_flows_cnt--;
1797 }
1798 
1799 static void
fq_if_purge_empty_flow(fq_if_t * fqs,fq_t * fq)1800 fq_if_purge_empty_flow(fq_if_t *fqs, fq_t *fq)
1801 {
1802 	fq_if_classq_t *fq_cl;
1803 	fq_cl = &FQ_CLASSQ(fq);
1804 
1805 	ASSERT((fq->fq_flags & FQF_EMPTY_FLOW) != 0);
1806 	TAILQ_REMOVE(&fqs->fqs_empty_list, fq, fq_empty_link);
1807 	fq->fq_flags &= ~FQF_EMPTY_FLOW;
1808 	fqs->fqs_empty_list_cnt--;
1809 	/* Remove from the hash list and free the flow queue */
1810 	fq_if_destroy_flow(fqs, fq_cl, fq);
1811 }
1812 
1813 static void
fq_if_purge_empty_flow_list(fq_if_t * fqs,uint64_t now,bool purge_all)1814 fq_if_purge_empty_flow_list(fq_if_t *fqs, uint64_t now, bool purge_all)
1815 {
1816 	fq_t *fq, *tmp;
1817 	int i = 0;
1818 
1819 	if (fqs->fqs_empty_list_cnt == 0) {
1820 		ASSERT(TAILQ_EMPTY(&fqs->fqs_empty_list));
1821 		return;
1822 	}
1823 
1824 	TAILQ_FOREACH_SAFE(fq, &fqs->fqs_empty_list, fq_empty_link, tmp) {
1825 		if (!purge_all && ((now < fq->fq_empty_purge_time) ||
1826 		    (i++ == FQ_EMPTY_PURGE_MAX))) {
1827 			break;
1828 		}
1829 		fq_if_purge_empty_flow(fqs, fq);
1830 	}
1831 
1832 	if (__improbable(purge_all)) {
1833 		VERIFY(fqs->fqs_empty_list_cnt == 0);
1834 		VERIFY(TAILQ_EMPTY(&fqs->fqs_empty_list));
1835 	}
1836 }
1837 
1838 static void
fq_if_empty_old_flow(fq_if_t * fqs,fq_if_classq_t * fq_cl,fq_t * fq,uint64_t now)1839 fq_if_empty_old_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq,
1840     uint64_t now)
1841 {
1842 	/*
1843 	 * Remove the flow queue from the old flows list.
1844 	 */
1845 	STAILQ_REMOVE(&fq_cl->fcl_old_flows, fq, flowq, fq_actlink);
1846 	fq->fq_flags &= ~FQF_OLD_FLOW;
1847 	fq_cl->fcl_stat.fcl_oldflows_cnt--;
1848 	VERIFY(fq->fq_bytes == 0);
1849 
1850 	/* release any flow control */
1851 	if (__improbable(fq->fq_flags & FQF_FLOWCTL_ON)) {
1852 		fq_if_flow_feedback(fqs, fq, fq_cl);
1853 	}
1854 
1855 	/* move the flow queue to empty flows list */
1856 	fq_if_move_to_empty_flow(fqs, fq_cl, fq, now);
1857 }
1858 
1859 static void
fq_if_empty_new_flow(fq_t * fq,fq_if_classq_t * fq_cl)1860 fq_if_empty_new_flow(fq_t *fq, fq_if_classq_t *fq_cl)
1861 {
1862 	/* Move to the end of old queue list */
1863 	STAILQ_REMOVE(&fq_cl->fcl_new_flows, fq,
1864 	    flowq, fq_actlink);
1865 	fq->fq_flags &= ~FQF_NEW_FLOW;
1866 	fq_cl->fcl_stat.fcl_newflows_cnt--;
1867 
1868 	STAILQ_INSERT_TAIL(&fq_cl->fcl_old_flows, fq, fq_actlink);
1869 	fq->fq_flags |= FQF_OLD_FLOW;
1870 	fq_cl->fcl_stat.fcl_oldflows_cnt++;
1871 }
1872 
1873 inline void
fq_if_drop_packet(fq_if_t * fqs,uint64_t now)1874 fq_if_drop_packet(fq_if_t *fqs, uint64_t now)
1875 {
1876 	fq_t *fq = fqs->fqs_large_flow;
1877 	fq_if_classq_t *fq_cl;
1878 	pktsched_pkt_t pkt;
1879 	volatile uint32_t *pkt_flags;
1880 	uint64_t *pkt_timestamp;
1881 
1882 	if (fq == NULL) {
1883 		return;
1884 	}
1885 	/* queue can not be empty on the largest flow */
1886 	VERIFY(!fq_empty(fq, fqs->fqs_ptype));
1887 
1888 	fq_cl = &FQ_CLASSQ(fq);
1889 	_PKTSCHED_PKT_INIT(&pkt);
1890 	fq_getq_flow_internal(fqs, fq, &pkt);
1891 	ASSERT(pkt.pktsched_ptype != QP_INVALID);
1892 
1893 	pktsched_get_pkt_vars(&pkt, &pkt_flags, &pkt_timestamp, NULL, NULL,
1894 	    NULL, NULL, NULL);
1895 
1896 	IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
1897 	*pkt_timestamp = 0;
1898 	switch (pkt.pktsched_ptype) {
1899 	case QP_MBUF:
1900 		*pkt_flags &= ~PKTF_PRIV_GUARDED;
1901 		break;
1902 #if SKYWALK
1903 	case QP_PACKET:
1904 		/* sanity check */
1905 		ASSERT((*pkt_flags & ~PKT_F_COMMON_MASK) == 0);
1906 		break;
1907 #endif /* SKYWALK */
1908 	default:
1909 		VERIFY(0);
1910 		/* NOTREACHED */
1911 		__builtin_unreachable();
1912 	}
1913 
1914 	if (fq_empty(fq, fqs->fqs_ptype)) {
1915 		fqs->fqs_large_flow = NULL;
1916 		if (fq->fq_flags & FQF_OLD_FLOW) {
1917 			fq_if_empty_old_flow(fqs, fq_cl, fq, now);
1918 		} else {
1919 			VERIFY(fq->fq_flags & FQF_NEW_FLOW);
1920 			fq_if_empty_new_flow(fq, fq_cl);
1921 		}
1922 	}
1923 	IFCQ_DROP_ADD(fqs->fqs_ifq, 1, pktsched_get_pkt_len(&pkt));
1924 
1925 	pktsched_free_pkt(&pkt);
1926 	fq_cl->fcl_stat.fcl_drop_overflow++;
1927 }
1928 
1929 inline void
fq_if_is_flow_heavy(fq_if_t * fqs,fq_t * fq)1930 fq_if_is_flow_heavy(fq_if_t *fqs, fq_t *fq)
1931 {
1932 	fq_t *prev_fq;
1933 
1934 	if (fqs->fqs_large_flow != NULL &&
1935 	    fqs->fqs_large_flow->fq_bytes < FQ_IF_LARGE_FLOW_BYTE_LIMIT) {
1936 		fqs->fqs_large_flow = NULL;
1937 	}
1938 
1939 	if (fq == NULL || fq->fq_bytes < FQ_IF_LARGE_FLOW_BYTE_LIMIT) {
1940 		return;
1941 	}
1942 
1943 	prev_fq = fqs->fqs_large_flow;
1944 	if (prev_fq == NULL) {
1945 		if (!fq_empty(fq, fqs->fqs_ptype)) {
1946 			fqs->fqs_large_flow = fq;
1947 		}
1948 		return;
1949 	} else if (fq->fq_bytes > prev_fq->fq_bytes) {
1950 		fqs->fqs_large_flow = fq;
1951 	}
1952 }
1953 
1954 boolean_t
fq_if_add_fcentry(fq_if_t * fqs,pktsched_pkt_t * pkt,uint8_t flowsrc,fq_t * fq,fq_if_classq_t * fq_cl)1955 fq_if_add_fcentry(fq_if_t *fqs, pktsched_pkt_t *pkt, uint8_t flowsrc,
1956     fq_t *fq, fq_if_classq_t *fq_cl)
1957 {
1958 	struct flowadv_fcentry *fce;
1959 
1960 #if DEBUG || DEVELOPMENT
1961 	if (__improbable(ifclassq_flow_control_adv == 0)) {
1962 		os_log(OS_LOG_DEFAULT, "%s: skipped flow control", __func__);
1963 		return TRUE;
1964 	}
1965 #endif /* DEBUG || DEVELOPMENT */
1966 
1967 	STAILQ_FOREACH(fce, &fqs->fqs_fclist, fce_link) {
1968 		if ((uint8_t)fce->fce_flowsrc_type == flowsrc &&
1969 		    fce->fce_flowid == fq->fq_flowhash) {
1970 			/* Already on flowcontrol list */
1971 			return TRUE;
1972 		}
1973 	}
1974 	IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
1975 	fce = pktsched_alloc_fcentry(pkt, fqs->fqs_ifq->ifcq_ifp, M_WAITOK);
1976 	if (fce != NULL) {
1977 		/* XXX Add number of bytes in the queue */
1978 		STAILQ_INSERT_TAIL(&fqs->fqs_fclist, fce, fce_link);
1979 		fq_cl->fcl_stat.fcl_flow_control++;
1980 		os_log(OS_LOG_DEFAULT, "%s: num: %d, scidx: %d, flowsrc: %d, "
1981 		    "flow: 0x%x, iface: %s, B:%u\n", __func__,
1982 		    fq_cl->fcl_stat.fcl_flow_control,
1983 		    fq->fq_sc_index, fce->fce_flowsrc_type, fq->fq_flowhash,
1984 		    if_name(fqs->fqs_ifq->ifcq_ifp), fq->fq_bytes);
1985 		KDBG(AQM_KTRACE_STATS_FLOW_CTL | DBG_FUNC_START,
1986 		    fq->fq_flowhash, AQM_KTRACE_FQ_GRP_SC_IDX(fq),
1987 		    fq->fq_bytes, fq->fq_min_qdelay);
1988 	}
1989 	return (fce != NULL) ? TRUE : FALSE;
1990 }
1991 
1992 static void
fq_if_remove_fcentry(fq_if_t * fqs,struct flowadv_fcentry * fce)1993 fq_if_remove_fcentry(fq_if_t *fqs, struct flowadv_fcentry *fce)
1994 {
1995 	STAILQ_REMOVE(&fqs->fqs_fclist, fce, flowadv_fcentry, fce_link);
1996 	STAILQ_NEXT(fce, fce_link) = NULL;
1997 	flowadv_add_entry(fce);
1998 }
1999 
2000 void
fq_if_flow_feedback(fq_if_t * fqs,fq_t * fq,fq_if_classq_t * fq_cl)2001 fq_if_flow_feedback(fq_if_t *fqs, fq_t *fq, fq_if_classq_t *fq_cl)
2002 {
2003 	struct flowadv_fcentry *fce = NULL;
2004 
2005 	IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
2006 	STAILQ_FOREACH(fce, &fqs->fqs_fclist, fce_link) {
2007 		if (fce->fce_flowid == fq->fq_flowhash) {
2008 			break;
2009 		}
2010 	}
2011 	if (fce != NULL) {
2012 		fq_cl->fcl_stat.fcl_flow_feedback++;
2013 		fce->fce_event_type = FCE_EVENT_TYPE_FLOW_CONTROL_FEEDBACK;
2014 		os_log(OS_LOG_DEFAULT, "%s: num: %d, scidx: %d, flowsrc: %d, "
2015 		    "flow: 0x%x, iface: %s grp: %hhu, B:%u\n", __func__,
2016 		    fq_cl->fcl_stat.fcl_flow_feedback, fq->fq_sc_index,
2017 		    fce->fce_flowsrc_type, fce->fce_flowid,
2018 		    if_name(fqs->fqs_ifq->ifcq_ifp), FQ_GROUP(fq)->fqg_index,
2019 		    fq->fq_bytes);
2020 		fq_if_remove_fcentry(fqs, fce);
2021 		KDBG(AQM_KTRACE_STATS_FLOW_CTL | DBG_FUNC_END,
2022 		    fq->fq_flowhash, AQM_KTRACE_FQ_GRP_SC_IDX(fq),
2023 		    fq->fq_bytes, fq->fq_min_qdelay);
2024 	}
2025 	fq->fq_flags &= ~FQF_FLOWCTL_ON;
2026 }
2027 
2028 boolean_t
fq_if_report_ce(fq_if_t * fqs,pktsched_pkt_t * pkt,uint32_t ce_cnt,uint32_t pkt_cnt)2029 fq_if_report_ce(fq_if_t *fqs, pktsched_pkt_t *pkt, uint32_t ce_cnt,
2030     uint32_t pkt_cnt)
2031 {
2032 	struct flowadv_fcentry *fce;
2033 
2034 #if DEBUG || DEVELOPMENT
2035 	if (__improbable(ifclassq_flow_control_adv == 0)) {
2036 		os_log(OS_LOG_DEFAULT, "%s: skipped flow control", __func__);
2037 		return TRUE;
2038 	}
2039 #endif /* DEBUG || DEVELOPMENT */
2040 
2041 	IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
2042 	fce = pktsched_alloc_fcentry(pkt, fqs->fqs_ifq->ifcq_ifp, M_WAITOK);
2043 	if (fce != NULL) {
2044 		fce->fce_event_type = FCE_EVENT_TYPE_CONGESTION_EXPERIENCED;
2045 		fce->fce_ce_cnt = ce_cnt;
2046 		fce->fce_pkts_since_last_report = pkt_cnt;
2047 
2048 		flowadv_add_entry(fce);
2049 	}
2050 	return (fce != NULL) ? TRUE : FALSE;
2051 }
2052 
2053 
2054 void
fq_if_dequeue(fq_if_t * fqs,fq_if_classq_t * fq_cl,uint32_t pktlimit,int64_t bytelimit,classq_pkt_t * top,classq_pkt_t * bottom,uint32_t * retpktcnt,uint32_t * retbytecnt,flowq_dqlist_t * fq_dqlist,bool budget_restricted,uint64_t now,bool * fq_cl_paced,uint64_t * next_tx_time)2055 fq_if_dequeue(fq_if_t *fqs, fq_if_classq_t *fq_cl, uint32_t pktlimit,
2056     int64_t bytelimit, classq_pkt_t *top, classq_pkt_t *bottom,
2057     uint32_t *retpktcnt, uint32_t *retbytecnt, flowq_dqlist_t *fq_dqlist,
2058     bool budget_restricted, uint64_t now, bool *fq_cl_paced,
2059     uint64_t *next_tx_time)
2060 {
2061 	fq_t *fq = NULL, *tfq = NULL;
2062 	flowq_stailq_t temp_stailq;
2063 	uint32_t pktcnt, bytecnt;
2064 	boolean_t qempty, limit_reached = FALSE;
2065 	bool all_paced = true;
2066 	classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
2067 	fq_getq_flow_t fq_getq_flow_fn;
2068 	classq_pkt_t *head, *tail;
2069 	uint64_t fq_cl_tx_time = FQ_INVALID_TX_TS;
2070 
2071 	switch (fqs->fqs_ptype) {
2072 	case QP_MBUF:
2073 		fq_getq_flow_fn = fq_getq_flow_mbuf;
2074 		break;
2075 
2076 #if SKYWALK
2077 	case QP_PACKET:
2078 		fq_getq_flow_fn = fq_getq_flow_kpkt;
2079 		break;
2080 #endif /* SKYWALK */
2081 
2082 	default:
2083 		VERIFY(0);
2084 		/* NOTREACHED */
2085 		__builtin_unreachable();
2086 	}
2087 
2088 	/*
2089 	 * maximum byte limit should not be greater than the budget for
2090 	 * this class
2091 	 */
2092 	if (bytelimit > fq_cl->fcl_budget && budget_restricted) {
2093 		bytelimit = fq_cl->fcl_budget;
2094 	}
2095 
2096 	VERIFY(pktlimit > 0 && bytelimit > 0 && top != NULL);
2097 	pktcnt = bytecnt = 0;
2098 	STAILQ_INIT(&temp_stailq);
2099 
2100 	STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_new_flows, fq_actlink, tfq) {
2101 		ASSERT((fq->fq_flags & (FQF_NEW_FLOW | FQF_OLD_FLOW)) ==
2102 		    FQF_NEW_FLOW);
2103 		uint64_t fq_tx_time;
2104 		if (__improbable(!fq_tx_time_ready(fqs, fq, now, &fq_tx_time))) {
2105 			ASSERT(fq_tx_time != FQ_INVALID_TX_TS);
2106 			if (fq_tx_time < fq_cl_tx_time) {
2107 				fq_cl_tx_time = fq_tx_time;
2108 			}
2109 			continue;
2110 		}
2111 		all_paced = false;
2112 
2113 		if (fq_dqlist != NULL) {
2114 			if (!fq->fq_in_dqlist) {
2115 				fq_dqlist_add(fq_dqlist, fq);
2116 			}
2117 			head = &fq->fq_dq_head;
2118 			tail = &fq->fq_dq_tail;
2119 		} else {
2120 			ASSERT(!fq->fq_in_dqlist);
2121 			head = top;
2122 			tail = &last;
2123 		}
2124 
2125 		limit_reached = fq_getq_flow_fn(fqs, fq_cl, fq, bytelimit,
2126 		    pktlimit, head, tail, &bytecnt, &pktcnt, &qempty, now);
2127 
2128 		/*
2129 		 * From RFC 8290:
2130 		 * if that queue has a negative number of credits (i.e., it has already
2131 		 * dequeued at least a quantum of bytes), it is given an additional
2132 		 * quantum of credits, the queue is put onto _the end of_ the list of
2133 		 * old queues, and the routine selects the next queue and starts again.
2134 		 */
2135 		if (fq->fq_deficit <= 0 || qempty) {
2136 			fq->fq_deficit += fq_cl->fcl_quantum;
2137 			fq_if_empty_new_flow(fq, fq_cl);
2138 		}
2139 		//TODO: add credit when it's now paced? so that the fq is trated the same as empty
2140 
2141 		if (!fq_tx_time_ready(fqs, fq, now, &fq_tx_time)) {
2142 			ASSERT(fq_tx_time != FQ_INVALID_TX_TS);
2143 			if (fq_tx_time < fq_cl_tx_time) {
2144 				fq_cl_tx_time = fq_tx_time;
2145 			}
2146 		}
2147 
2148 		if (limit_reached) {
2149 			goto done;
2150 		}
2151 	}
2152 
2153 	STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_old_flows, fq_actlink, tfq) {
2154 		VERIFY((fq->fq_flags & (FQF_NEW_FLOW | FQF_OLD_FLOW)) ==
2155 		    FQF_OLD_FLOW);
2156 		bool destroy = true;
2157 		uint64_t fq_tx_time;
2158 
2159 		if (__improbable(!fq_tx_time_ready(fqs, fq, now, &fq_tx_time))) {
2160 			ASSERT(fq_tx_time != FQ_INVALID_TX_TS);
2161 			if (fq_tx_time < fq_cl_tx_time) {
2162 				fq_cl_tx_time = fq_tx_time;
2163 			}
2164 			continue;
2165 		}
2166 		all_paced = false;
2167 
2168 		if (fq_dqlist != NULL) {
2169 			if (!fq->fq_in_dqlist) {
2170 				fq_dqlist_add(fq_dqlist, fq);
2171 			}
2172 			head = &fq->fq_dq_head;
2173 			tail = &fq->fq_dq_tail;
2174 			destroy = false;
2175 		} else {
2176 			ASSERT(!fq->fq_in_dqlist);
2177 			head = top;
2178 			tail = &last;
2179 		}
2180 
2181 		limit_reached = fq_getq_flow_fn(fqs, fq_cl, fq, bytelimit,
2182 		    pktlimit, head, tail, &bytecnt, &pktcnt, &qempty, now);
2183 
2184 		if (!fq_tx_time_ready(fqs, fq, now, &fq_tx_time)) {
2185 			ASSERT(fq_tx_time != FQ_INVALID_TX_TS);
2186 			if (fq_tx_time < fq_cl_tx_time) {
2187 				fq_cl_tx_time = fq_tx_time;
2188 			}
2189 		}
2190 
2191 		if (qempty) {
2192 			fq_if_empty_old_flow(fqs, fq_cl, fq, now);
2193 		} else if (fq->fq_deficit <= 0) {
2194 			STAILQ_REMOVE(&fq_cl->fcl_old_flows, fq,
2195 			    flowq, fq_actlink);
2196 			/*
2197 			 * Move to the end of the old queues list. We do not
2198 			 * need to update the flow count since this flow
2199 			 * will be added to the tail again
2200 			 */
2201 			STAILQ_INSERT_TAIL(&temp_stailq, fq, fq_actlink);
2202 			fq->fq_deficit += fq_cl->fcl_quantum;
2203 		}
2204 		if (limit_reached) {
2205 			break;
2206 		}
2207 	}
2208 
2209 done:
2210 	if (all_paced) {
2211 		fq_cl->fcl_flags |= FCL_PACED;
2212 		fq_cl->fcl_next_tx_time = fq_cl_tx_time;
2213 	}
2214 	if (!STAILQ_EMPTY(&fq_cl->fcl_old_flows)) {
2215 		STAILQ_CONCAT(&fq_cl->fcl_old_flows, &temp_stailq);
2216 	} else if (!STAILQ_EMPTY(&temp_stailq)) {
2217 		fq_cl->fcl_old_flows = temp_stailq;
2218 	}
2219 	if (last.cp_mbuf != NULL) {
2220 		VERIFY(top->cp_mbuf != NULL);
2221 		if (bottom != NULL) {
2222 			*bottom = last;
2223 		}
2224 	}
2225 	if (retpktcnt != NULL) {
2226 		*retpktcnt = pktcnt;
2227 	}
2228 	if (retbytecnt != NULL) {
2229 		*retbytecnt = bytecnt;
2230 	}
2231 	if (fq_cl_paced != NULL) {
2232 		*fq_cl_paced = all_paced;
2233 	}
2234 	if (next_tx_time != NULL) {
2235 		*next_tx_time = fq_cl_tx_time;
2236 	}
2237 }
2238 
2239 void
fq_if_teardown_ifclassq(struct ifclassq * ifq)2240 fq_if_teardown_ifclassq(struct ifclassq *ifq)
2241 {
2242 	fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
2243 
2244 	IFCQ_LOCK_ASSERT_HELD(ifq);
2245 	VERIFY(fqs != NULL && ifq->ifcq_type == PKTSCHEDT_FQ_CODEL);
2246 	fq_if_destroy(fqs);
2247 	ifq->ifcq_disc = NULL;
2248 	ifclassq_detach(ifq);
2249 }
2250 
2251 static void
fq_export_flowstats(fq_if_t * fqs,fq_t * fq,struct fq_codel_flowstats * flowstat)2252 fq_export_flowstats(fq_if_t *fqs, fq_t *fq,
2253     struct fq_codel_flowstats *flowstat)
2254 {
2255 	bzero(flowstat, sizeof(*flowstat));
2256 	flowstat->fqst_min_qdelay = (uint32_t)fq->fq_min_qdelay;
2257 	flowstat->fqst_bytes = fq->fq_bytes;
2258 	flowstat->fqst_flowhash = fq->fq_flowhash;
2259 	if (fq->fq_flags & FQF_NEW_FLOW) {
2260 		flowstat->fqst_flags |= FQ_FLOWSTATS_NEW_FLOW;
2261 	}
2262 	if (fq->fq_flags & FQF_OLD_FLOW) {
2263 		flowstat->fqst_flags |= FQ_FLOWSTATS_OLD_FLOW;
2264 	}
2265 	if (fq->fq_flags & FQF_DELAY_HIGH) {
2266 		flowstat->fqst_flags |= FQ_FLOWSTATS_DELAY_HIGH;
2267 	}
2268 	if (fq->fq_flags & FQF_FLOWCTL_ON) {
2269 		flowstat->fqst_flags |= FQ_FLOWSTATS_FLOWCTL_ON;
2270 	}
2271 	if (fqs->fqs_large_flow == fq) {
2272 		flowstat->fqst_flags |= FQ_FLOWSTATS_LARGE_FLOW;
2273 	}
2274 }
2275 
2276 int
fq_if_getqstats_ifclassq(struct ifclassq * ifq,uint8_t gid,u_int32_t qid,struct if_ifclassq_stats * ifqs)2277 fq_if_getqstats_ifclassq(struct ifclassq *ifq, uint8_t gid, u_int32_t qid,
2278     struct if_ifclassq_stats *ifqs)
2279 {
2280 	struct fq_codel_classstats *fcls;
2281 	fq_if_classq_t *fq_cl;
2282 	fq_if_t *fqs;
2283 	fq_t *fq = NULL;
2284 	fq_if_group_t *grp;
2285 	u_int32_t i, flowstat_cnt;
2286 
2287 	if (qid >= FQ_IF_MAX_CLASSES || gid >= FQ_IF_MAX_GROUPS) {
2288 		return EINVAL;
2289 	}
2290 
2291 	fqs = (fq_if_t *)ifq->ifcq_disc;
2292 	if (fqs->fqs_classq_groups[gid] == NULL) {
2293 		return ENXIO;
2294 	}
2295 
2296 	fcls = &ifqs->ifqs_fq_codel_stats;
2297 
2298 	fq_cl = &FQS_CLASSQ(fqs, gid, qid);
2299 	grp = fq_if_find_grp(fqs, gid);
2300 
2301 	fcls->fcls_pri = fq_cl->fcl_pri;
2302 	fcls->fcls_service_class = fq_cl->fcl_service_class;
2303 	fcls->fcls_quantum = fq_cl->fcl_quantum;
2304 	fcls->fcls_drr_max = fq_cl->fcl_drr_max;
2305 	fcls->fcls_budget = fq_cl->fcl_budget;
2306 	fcls->fcls_l4s_target_qdelay = grp->fqg_target_qdelays[FQ_TFC_L4S];
2307 	fcls->fcls_target_qdelay = grp->fqg_target_qdelays[FQ_TFC_C];
2308 	fcls->fcls_update_interval = grp->fqg_update_intervals[FQ_TFC_C];
2309 	fcls->fcls_flow_control = fq_cl->fcl_stat.fcl_flow_control;
2310 	fcls->fcls_flow_feedback = fq_cl->fcl_stat.fcl_flow_feedback;
2311 	fcls->fcls_dequeue_stall = fq_cl->fcl_stat.fcl_dequeue_stall;
2312 	fcls->fcls_drop_overflow = fq_cl->fcl_stat.fcl_drop_overflow;
2313 	fcls->fcls_drop_early = fq_cl->fcl_stat.fcl_drop_early;
2314 	fcls->fcls_drop_memfailure = fq_cl->fcl_stat.fcl_drop_memfailure;
2315 	fcls->fcls_flows_cnt = fq_cl->fcl_stat.fcl_flows_cnt;
2316 	fcls->fcls_newflows_cnt = fq_cl->fcl_stat.fcl_newflows_cnt;
2317 	fcls->fcls_oldflows_cnt = fq_cl->fcl_stat.fcl_oldflows_cnt;
2318 	fcls->fcls_pkt_cnt = fq_cl->fcl_stat.fcl_pkt_cnt;
2319 	fcls->fcls_flow_control_fail = fq_cl->fcl_stat.fcl_flow_control_fail;
2320 	fcls->fcls_flow_control_fail = fq_cl->fcl_stat.fcl_flow_control_fail;
2321 	fcls->fcls_dequeue = fq_cl->fcl_stat.fcl_dequeue;
2322 	fcls->fcls_dequeue_bytes = fq_cl->fcl_stat.fcl_dequeue_bytes;
2323 	fcls->fcls_byte_cnt = fq_cl->fcl_stat.fcl_byte_cnt;
2324 	fcls->fcls_throttle_on = fq_cl->fcl_stat.fcl_throttle_on;
2325 	fcls->fcls_throttle_off = fq_cl->fcl_stat.fcl_throttle_off;
2326 	fcls->fcls_throttle_drops = fq_cl->fcl_stat.fcl_throttle_drops;
2327 	fcls->fcls_dup_rexmts = fq_cl->fcl_stat.fcl_dup_rexmts;
2328 	fcls->fcls_pkts_compressible = fq_cl->fcl_stat.fcl_pkts_compressible;
2329 	fcls->fcls_pkts_compressed = fq_cl->fcl_stat.fcl_pkts_compressed;
2330 	fcls->fcls_min_qdelay = fq_cl->fcl_stat.fcl_min_qdelay;
2331 	fcls->fcls_max_qdelay = fq_cl->fcl_stat.fcl_max_qdelay;
2332 	fcls->fcls_avg_qdelay = fq_cl->fcl_stat.fcl_avg_qdelay;
2333 	fcls->fcls_overwhelming = fq_cl->fcl_stat.fcl_overwhelming;
2334 	fcls->fcls_ce_marked = fq_cl->fcl_stat.fcl_ce_marked;
2335 	fcls->fcls_ce_reported = fq_cl->fcl_stat.fcl_ce_reported;
2336 	fcls->fcls_ce_mark_failures = fq_cl->fcl_stat.fcl_ce_mark_failures;
2337 	fcls->fcls_l4s_pkts = fq_cl->fcl_stat.fcl_l4s_pkts;
2338 	fcls->fcls_ignore_tx_time = fq_cl->fcl_stat.fcl_ignore_tx_time;
2339 	fcls->fcls_paced_pkts = fq_cl->fcl_stat.fcl_paced_pkts;
2340 	fcls->fcls_fcl_pacing_needed = fq_cl->fcl_stat.fcl_fcl_pacemaker_needed;
2341 
2342 	/* Gather per flow stats */
2343 	flowstat_cnt = min((fcls->fcls_newflows_cnt +
2344 	    fcls->fcls_oldflows_cnt), FQ_IF_MAX_FLOWSTATS);
2345 	i = 0;
2346 	STAILQ_FOREACH(fq, &fq_cl->fcl_new_flows, fq_actlink) {
2347 		if (i >= fcls->fcls_newflows_cnt || i >= flowstat_cnt) {
2348 			break;
2349 		}
2350 
2351 		/* leave space for a few old flows */
2352 		if ((flowstat_cnt - i) < fcls->fcls_oldflows_cnt &&
2353 		    i >= (FQ_IF_MAX_FLOWSTATS >> 1)) {
2354 			break;
2355 		}
2356 		fq_export_flowstats(fqs, fq, &fcls->fcls_flowstats[i]);
2357 		i++;
2358 	}
2359 	STAILQ_FOREACH(fq, &fq_cl->fcl_old_flows, fq_actlink) {
2360 		if (i >= flowstat_cnt) {
2361 			break;
2362 		}
2363 		fq_export_flowstats(fqs, fq, &fcls->fcls_flowstats[i]);
2364 		i++;
2365 	}
2366 	VERIFY(i <= flowstat_cnt);
2367 	fcls->fcls_flowstats_cnt = i;
2368 	return 0;
2369 }
2370 
2371 int
fq_if_create_grp(struct ifclassq * ifcq,uint8_t grp_idx,uint8_t flags)2372 fq_if_create_grp(struct ifclassq *ifcq, uint8_t grp_idx, uint8_t flags)
2373 {
2374 #define _FQ_CLASSQ_INIT(_grp, _s, _q)                      \
2375     fq_if_classq_init(_grp, FQ_IF_ ## _s ##_INDEX,         \
2376 	FQ_CODEL_QUANTUM_ ## _s(_q), FQ_CODEL_DRR_MAX(_s),     \
2377 	MBUF_SC_ ## _s );
2378 
2379 	fq_if_group_t *grp;
2380 	fq_if_t *fqs;
2381 	uint32_t quantum, calc_flags = IF_CLASSQ_DEF;
2382 	struct ifnet *ifp = ifcq->ifcq_ifp;
2383 
2384 	VERIFY(grp_idx < FQ_IF_MAX_GROUPS);
2385 
2386 	fqs = (fq_if_t *)ifcq->ifcq_disc;
2387 
2388 	if (grp_idx == 0 && fqs->fqs_classq_groups[grp_idx] != NULL) {
2389 		grp = fqs->fqs_classq_groups[grp_idx];
2390 		goto update;
2391 	}
2392 
2393 	if (fqs->fqs_classq_groups[grp_idx] != NULL) {
2394 		return EINVAL;
2395 	}
2396 
2397 	grp = zalloc_flags(fq_if_grp_zone, Z_WAITOK | Z_ZERO);
2398 	if (grp == NULL) {
2399 		return ENOMEM;
2400 	}
2401 
2402 	fqs->fqs_classq_groups[grp_idx] = grp;
2403 	grp->fqg_index = grp_idx;
2404 
2405 	quantum = fq_if_calc_quantum(ifp);
2406 	if (fqs->fqs_flags & FQS_DRIVER_MANAGED) {
2407 		_FQ_CLASSQ_INIT(grp, BK, quantum);
2408 		_FQ_CLASSQ_INIT(grp, BE, quantum);
2409 		_FQ_CLASSQ_INIT(grp, VI, quantum);
2410 		_FQ_CLASSQ_INIT(grp, VO, quantum);
2411 	} else {
2412 		/* SIG shares same INDEX with VI */
2413 		_CASSERT(SCIDX_SIG == SCIDX_VI);
2414 		_CASSERT(FQ_IF_SIG_INDEX == FQ_IF_VI_INDEX);
2415 
2416 		_FQ_CLASSQ_INIT(grp, BK_SYS, quantum);
2417 		_FQ_CLASSQ_INIT(grp, BK, quantum);
2418 		_FQ_CLASSQ_INIT(grp, BE, quantum);
2419 		_FQ_CLASSQ_INIT(grp, RD, quantum);
2420 		_FQ_CLASSQ_INIT(grp, OAM, quantum);
2421 		_FQ_CLASSQ_INIT(grp, AV, quantum);
2422 		_FQ_CLASSQ_INIT(grp, RV, quantum);
2423 		_FQ_CLASSQ_INIT(grp, VI, quantum);
2424 		_FQ_CLASSQ_INIT(grp, VO, quantum);
2425 		_FQ_CLASSQ_INIT(grp, CTL, quantum);
2426 	}
2427 
2428 update:
2429 	if (flags & IF_DEFAULT_GRP) {
2430 		fq_if_set_grp_combined(ifcq, grp_idx);
2431 		grp->fqg_flags |= FQ_IF_DEFAULT_GRP;
2432 	} else {
2433 		fq_if_set_grp_separated(ifcq, grp_idx);
2434 		grp->fqg_flags &= ~FQ_IF_DEFAULT_GRP;
2435 	}
2436 
2437 	calc_flags |= (flags & IF_CLASSQ_LOW_LATENCY);
2438 	ifclassq_calc_target_qdelay(ifp, &grp->fqg_target_qdelays[FQ_TFC_C],
2439 	    calc_flags);
2440 	ifclassq_calc_target_qdelay(ifp, &grp->fqg_target_qdelays[FQ_TFC_L4S],
2441 	    calc_flags | IF_CLASSQ_L4S);
2442 
2443 	ifclassq_calc_update_interval(&grp->fqg_update_intervals[FQ_TFC_C],
2444 	    calc_flags);
2445 	ifclassq_calc_update_interval(&grp->fqg_update_intervals[FQ_TFC_L4S],
2446 	    calc_flags | IF_CLASSQ_L4S);
2447 
2448 	return 0;
2449 #undef _FQ_CLASSQ_INIT
2450 }
2451 
2452 fq_if_group_t *
fq_if_find_grp(fq_if_t * fqs,uint8_t grp_idx)2453 fq_if_find_grp(fq_if_t *fqs, uint8_t grp_idx)
2454 {
2455 	fq_if_group_t *grp;
2456 
2457 	IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq);
2458 	VERIFY(grp_idx < FQ_IF_MAX_GROUPS);
2459 
2460 	grp = fqs->fqs_classq_groups[grp_idx];
2461 	VERIFY(grp != NULL);
2462 
2463 	return grp;
2464 }
2465 
2466 static void
fq_if_purge_grp(fq_if_t * fqs,fq_if_group_t * grp)2467 fq_if_purge_grp(fq_if_t *fqs, fq_if_group_t *grp)
2468 {
2469 	for (uint8_t i = 0; i < FQ_IF_MAX_CLASSES; i++) {
2470 		fq_if_purge_classq(fqs, &grp->fqg_classq[i]);
2471 	}
2472 
2473 	bzero(&grp->fqg_bitmaps, sizeof(grp->fqg_bitmaps));
2474 	grp->fqg_len = 0;
2475 	grp->fqg_bytes = 0;
2476 	fq_if_set_grp_separated(fqs->fqs_ifq, grp->fqg_index);
2477 }
2478 
2479 void
fq_if_destroy_grps(fq_if_t * fqs)2480 fq_if_destroy_grps(fq_if_t *fqs)
2481 {
2482 	fq_if_group_t *grp;
2483 
2484 	IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq);
2485 
2486 	for (uint8_t grp_idx = 0; grp_idx < FQ_IF_MAX_GROUPS; grp_idx++) {
2487 		if (fqs->fqs_classq_groups[grp_idx] == NULL) {
2488 			continue;
2489 		}
2490 
2491 		grp = fq_if_find_grp(fqs, grp_idx);
2492 		fq_if_purge_grp(fqs, grp);
2493 		zfree(fq_if_grp_zone, grp);
2494 		fqs->fqs_classq_groups[grp_idx] = NULL;
2495 	}
2496 }
2497 
2498 static inline boolean_t
fq_if_is_grp_combined(fq_if_t * fqs,uint8_t grp_idx)2499 fq_if_is_grp_combined(fq_if_t *fqs, uint8_t grp_idx)
2500 {
2501 	return pktsched_bit_tst(grp_idx, &fqs->fqs_combined_grp_bitmap);
2502 }
2503 
2504 void
fq_if_set_grp_combined(struct ifclassq * ifcq,uint8_t grp_idx)2505 fq_if_set_grp_combined(struct ifclassq *ifcq, uint8_t grp_idx)
2506 {
2507 	fq_if_t *fqs;
2508 	fq_if_group_t *grp;
2509 
2510 	IFCQ_LOCK_ASSERT_HELD(ifcq);
2511 
2512 	fqs = (fq_if_t *)ifcq->ifcq_disc;
2513 	grp = fq_if_find_grp(fqs, grp_idx);
2514 
2515 	if (fq_if_is_grp_combined(fqs, grp_idx)) {
2516 		return;
2517 	}
2518 
2519 	/*
2520 	 * We keep the current fq_deficit and fcl_budget when combining a group.
2521 	 * That might disrupt the AQM but only for a moment.
2522 	 */
2523 	pktsched_bit_set(grp_idx, &fqs->fqs_combined_grp_bitmap);
2524 	TAILQ_INSERT_TAIL(&fqs->fqs_combined_grp_list, grp, fqg_grp_link);
2525 }
2526 
2527 void
fq_if_set_grp_separated(struct ifclassq * ifcq,uint8_t grp_idx)2528 fq_if_set_grp_separated(struct ifclassq *ifcq, uint8_t grp_idx)
2529 {
2530 	fq_if_t *fqs;
2531 	fq_if_group_t *grp;
2532 
2533 	IFCQ_LOCK_ASSERT_HELD(ifcq);
2534 
2535 	fqs = (fq_if_t *)ifcq->ifcq_disc;
2536 	grp = fq_if_find_grp(fqs, grp_idx);
2537 
2538 	if (!fq_if_is_grp_combined(fqs, grp_idx)) {
2539 		return;
2540 	}
2541 
2542 	pktsched_bit_clr(grp_idx, &fqs->fqs_combined_grp_bitmap);
2543 	TAILQ_REMOVE(&fqs->fqs_combined_grp_list, grp, fqg_grp_link);
2544 }
2545