1 /*
2 * Copyright (c) 2016-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/types.h>
30 #include <sys/param.h>
31 #include <kern/zalloc.h>
32 #include <net/ethernet.h>
33 #include <net/if_var.h>
34 #include <net/if.h>
35 #include <net/classq/classq.h>
36 #include <net/classq/classq_fq_codel.h>
37 #include <net/pktsched/pktsched_fq_codel.h>
38 #include <os/log.h>
39 #include <pexpert/pexpert.h> /* for PE_parse_boot_argn */
40 #include <mach/thread_act.h>
41 #include <kern/thread.h>
42 #include <kern/sched_prim.h>
43
44 #define FQ_CODEL_DEFAULT_QUANTUM 1500
45
46 #define FQ_CODEL_QUANTUM_BK_SYS(_q) (_q)
47 #define FQ_CODEL_QUANTUM_BK(_q) (_q)
48 #define FQ_CODEL_QUANTUM_BE(_q) (_q)
49 #define FQ_CODEL_QUANTUM_RD(_q) (_q)
50 #define FQ_CODEL_QUANTUM_OAM(_q) (_q)
51 #define FQ_CODEL_QUANTUM_AV(_q) (_q * 2)
52 #define FQ_CODEL_QUANTUM_RV(_q) (_q * 2)
53 #define FQ_CODEL_QUANTUM_VI(_q) (_q * 2)
54 #define FQ_CODEL_QUANTUM_VO(_q) ((_q * 2) / 5)
55 #define FQ_CODEL_QUANTUM_CTL(_q) ((_q * 2) / 5)
56
57 static KALLOC_TYPE_DEFINE(fq_if_zone, fq_if_t, NET_KT_DEFAULT);
58 static KALLOC_TYPE_DEFINE(fq_if_grp_zone, fq_if_group_t, NET_KT_DEFAULT);
59
60 SYSCTL_NODE(_net_classq, OID_AUTO, fq_codel, CTLFLAG_RW | CTLFLAG_LOCKED,
61 0, "FQ-CODEL parameters");
62
63 SYSCTL_INT(_net_classq_fq_codel, OID_AUTO, fq_enable_pacing, CTLFLAG_RW | CTLFLAG_LOCKED,
64 &ifclassq_enable_pacing, 0, "Enable pacing");
65
66 static uint64_t fq_empty_purge_delay = FQ_EMPTY_PURGE_DELAY;
67 #if (DEVELOPMENT || DEBUG)
68 SYSCTL_QUAD(_net_classq_fq_codel, OID_AUTO, fq_empty_purge_delay, CTLFLAG_RW |
69 CTLFLAG_LOCKED, &fq_empty_purge_delay, "Empty flow queue purge delay (ns)");
70 #endif /* !DEVELOPMENT && !DEBUG */
71
72 unsigned int ifclassq_enable_pacing = 1;
73
74 typedef STAILQ_HEAD(, flowq) flowq_dqlist_t;
75
76 static fq_if_t *fq_if_alloc(struct ifclassq *, classq_pkt_type_t);
77 static void fq_if_destroy(fq_if_t *fqs);
78 static void fq_if_classq_init(fq_if_group_t *fqg, uint32_t priority,
79 uint32_t quantum, uint32_t drr_max, uint32_t svc_class);
80 static void fq_if_dequeue(fq_if_t *, fq_if_classq_t *, uint32_t,
81 int64_t, classq_pkt_t *, classq_pkt_t *, uint32_t *,
82 uint32_t *, flowq_dqlist_t *, bool, uint64_t, bool*, uint64_t*);
83 void fq_if_stat_sc(fq_if_t *fqs, cqrq_stat_sc_t *stat);
84 static void fq_if_purge(fq_if_t *);
85 static void fq_if_purge_classq(fq_if_t *, fq_if_classq_t *);
86 static void fq_if_purge_flow(fq_if_t *, fq_t *, uint32_t *, uint32_t *,
87 uint64_t);
88 static void fq_if_empty_new_flow(fq_t *fq, fq_if_classq_t *fq_cl);
89 static void fq_if_empty_old_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl,
90 fq_t *fq, uint64_t now);
91 static void fq_if_purge_empty_flow(fq_if_t *fqs, fq_t *fq);
92 static void fq_if_purge_empty_flow_list(fq_if_t *fqs, uint64_t now,
93 bool purge_all);
94 static inline void fq_if_reuse_empty_flow(fq_if_t *fqs, fq_t *fq, uint64_t now);
95 static int fq_if_dequeue_sc_classq_multi_separate(struct ifclassq *ifq,
96 mbuf_svc_class_t svc, u_int32_t maxpktcnt, u_int32_t maxbytecnt,
97 classq_pkt_t *first_packet, classq_pkt_t *last_packet, u_int32_t *retpktcnt,
98 u_int32_t *retbytecnt, uint8_t grp_idx);
99 static void fq_if_grp_stat_sc(fq_if_t *fqs, fq_if_group_t *grp,
100 cqrq_stat_sc_t *stat, uint64_t now);
101 static void fq_if_purge_grp(fq_if_t *fqs, fq_if_group_t *grp);
102 static inline boolean_t fq_if_is_grp_combined(fq_if_t *fqs, uint8_t grp_idx);
103 static void fq_if_destroy_grps(fq_if_t *fqs);
104
105 uint32_t fq_codel_drr_max_values[FQ_IF_MAX_CLASSES] = {
106 [FQ_IF_CTL_INDEX] = 8,
107 [FQ_IF_VO_INDEX] = 8,
108 [FQ_IF_VI_INDEX] = 6,
109 [FQ_IF_RV_INDEX] = 6,
110 [FQ_IF_AV_INDEX] = 6,
111 [FQ_IF_OAM_INDEX] = 4,
112 [FQ_IF_RD_INDEX] = 4,
113 [FQ_IF_BE_INDEX] = 4,
114 [FQ_IF_BK_INDEX] = 2,
115 [FQ_IF_BK_SYS_INDEX] = 2,
116 };
117
118 #define FQ_CODEL_DRR_MAX(_s) fq_codel_drr_max_values[FQ_IF_##_s##_INDEX]
119
120 static boolean_t fq_if_grps_bitmap_zeros(fq_grp_tailq_t *grp_list, int pri,
121 fq_if_state state);
122 static void fq_if_grps_bitmap_cpy(fq_grp_tailq_t *grp_list, int pri,
123 fq_if_state dst_state, fq_if_state src_state);
124 static void fq_if_grps_bitmap_clr(fq_grp_tailq_t *grp_list, int pri,
125 fq_if_state state);
126 static int fq_if_grps_bitmap_ffs(fq_grp_tailq_t *grp_list, int pri,
127 fq_if_state state, fq_if_group_t **selected_grp);
128 static void fq_if_grps_bitmap_move(fq_grp_tailq_t *grp_list, int pri,
129 fq_if_state dst_state, fq_if_state src_state);
130
131 static boolean_t fq_if_grps_sc_bitmap_zeros(fq_grp_tailq_t *grp_list, int pri,
132 fq_if_state state);
133 static void fq_if_grps_sc_bitmap_cpy(fq_grp_tailq_t *grp_list, int pri,
134 fq_if_state dst_state, fq_if_state src_state);
135 static void fq_if_grps_sc_bitmap_clr(fq_grp_tailq_t *grp_list, int pri,
136 fq_if_state state);
137 static int fq_if_grps_sc_bitmap_ffs(fq_grp_tailq_t *grp_list, int pri,
138 fq_if_state state, fq_if_group_t **selected_grp);
139 static void fq_if_grps_sc_bitmap_move(fq_grp_tailq_t *grp_list, int pri,
140 fq_if_state dst_state, fq_if_state src_state);
141
142 bitmap_ops_t fq_if_grps_bitmap_ops =
143 {
144 .ffs = fq_if_grps_bitmap_ffs,
145 .zeros = fq_if_grps_bitmap_zeros,
146 .cpy = fq_if_grps_bitmap_cpy,
147 .clr = fq_if_grps_bitmap_clr,
148 .move = fq_if_grps_bitmap_move,
149 };
150
151 bitmap_ops_t fq_if_grps_sc_bitmap_ops =
152 {
153 .ffs = fq_if_grps_sc_bitmap_ffs,
154 .zeros = fq_if_grps_sc_bitmap_zeros,
155 .cpy = fq_if_grps_sc_bitmap_cpy,
156 .clr = fq_if_grps_sc_bitmap_clr,
157 .move = fq_if_grps_sc_bitmap_move,
158 };
159
160 void
pktsched_fq_init(void)161 pktsched_fq_init(void)
162 {
163 PE_parse_boot_argn("ifclassq_enable_pacing", &ifclassq_enable_pacing,
164 sizeof(ifclassq_enable_pacing));
165
166 // format looks like ifcq_drr_max=8,8,6
167 char buf[(FQ_IF_MAX_CLASSES) * 3];
168 size_t i, len, pri_index = 0;
169 uint32_t drr = 0;
170 if (!PE_parse_boot_arg_str("ifcq_drr_max", buf, sizeof(buf))) {
171 return;
172 }
173
174 len = strlen(buf);
175 for (i = 0; i < len + 1 && pri_index < FQ_IF_MAX_CLASSES; i++) {
176 if (buf[i] != ',' && buf[i] != '\0') {
177 VERIFY(buf[i] >= '0' && buf[i] <= '9');
178 drr = drr * 10 + buf[i] - '0';
179 continue;
180 }
181 fq_codel_drr_max_values[pri_index] = drr;
182 pri_index += 1;
183 drr = 0;
184 }
185 }
186
187 #define FQ_IF_FLOW_HASH_ID(_flowid_) \
188 (((_flowid_) >> FQ_IF_HASH_TAG_SHIFT) & FQ_IF_HASH_TAG_MASK)
189
190 #define FQ_IF_CLASSQ_IDLE(_fcl_) \
191 (STAILQ_EMPTY(&(_fcl_)->fcl_new_flows) && \
192 STAILQ_EMPTY(&(_fcl_)->fcl_old_flows))
193
194 typedef void (* fq_if_append_pkt_t)(classq_pkt_t *, classq_pkt_t *);
195 typedef boolean_t (* fq_getq_flow_t)(fq_if_t *, fq_if_classq_t *, fq_t *,
196 int64_t, uint32_t, classq_pkt_t *, classq_pkt_t *, uint32_t *,
197 uint32_t *, boolean_t *, uint64_t);
198
199 static void
fq_if_append_mbuf(classq_pkt_t * pkt,classq_pkt_t * next_pkt)200 fq_if_append_mbuf(classq_pkt_t *pkt, classq_pkt_t *next_pkt)
201 {
202 pkt->cp_mbuf->m_nextpkt = next_pkt->cp_mbuf;
203 }
204
205 static inline uint64_t
fq_codel_get_time(void)206 fq_codel_get_time(void)
207 {
208 struct timespec ts;
209 uint64_t now;
210
211 nanouptime(&ts);
212 now = ((uint64_t)ts.tv_sec * NSEC_PER_SEC) + ts.tv_nsec;
213 return now;
214 }
215
216 #if SKYWALK
217 static void
fq_if_append_pkt(classq_pkt_t * pkt,classq_pkt_t * next_pkt)218 fq_if_append_pkt(classq_pkt_t *pkt, classq_pkt_t *next_pkt)
219 {
220 pkt->cp_kpkt->pkt_nextpkt = next_pkt->cp_kpkt;
221 }
222 #endif /* SKYWALK */
223
224 #if SKYWALK
225 static boolean_t
fq_getq_flow_kpkt(fq_if_t * fqs,fq_if_classq_t * fq_cl,fq_t * fq,int64_t byte_limit,uint32_t pkt_limit,classq_pkt_t * head,classq_pkt_t * tail,uint32_t * byte_cnt,uint32_t * pkt_cnt,boolean_t * qempty,uint64_t now)226 fq_getq_flow_kpkt(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq,
227 int64_t byte_limit, uint32_t pkt_limit, classq_pkt_t *head,
228 classq_pkt_t *tail, uint32_t *byte_cnt, uint32_t *pkt_cnt,
229 boolean_t *qempty, uint64_t now)
230 {
231 uint32_t plen;
232 pktsched_pkt_t pkt;
233 boolean_t limit_reached = FALSE;
234 struct ifclassq *ifq = fqs->fqs_ifq;
235 struct ifnet *ifp = ifq->ifcq_ifp;
236
237 /*
238 * Assert to make sure pflags is part of PKT_F_COMMON_MASK;
239 * all common flags need to be declared in that mask.
240 */
241 while (fq->fq_deficit > 0 && limit_reached == FALSE &&
242 !KPKTQ_EMPTY(&fq->fq_kpktq) && fq_tx_time_ready(fqs, fq, now, NULL)) {
243 _PKTSCHED_PKT_INIT(&pkt);
244 fq_getq_flow(fqs, fq, &pkt, now);
245 ASSERT(pkt.pktsched_ptype == QP_PACKET);
246
247 plen = pktsched_get_pkt_len(&pkt);
248 fq->fq_deficit -= plen;
249 if (__improbable((fq->fq_flags & FQF_FRESH_FLOW) != 0)) {
250 pkt.pktsched_pkt_kpkt->pkt_pflags |= PKT_F_NEW_FLOW;
251 fq->fq_flags &= ~FQF_FRESH_FLOW;
252 }
253
254 if (head->cp_kpkt == NULL) {
255 *head = pkt.pktsched_pkt;
256 } else {
257 ASSERT(tail->cp_kpkt != NULL);
258 ASSERT(tail->cp_kpkt->pkt_nextpkt == NULL);
259 tail->cp_kpkt->pkt_nextpkt = pkt.pktsched_pkt_kpkt;
260 }
261 *tail = pkt.pktsched_pkt;
262 tail->cp_kpkt->pkt_nextpkt = NULL;
263 fq_cl->fcl_stat.fcl_dequeue++;
264 fq_cl->fcl_stat.fcl_dequeue_bytes += plen;
265 *pkt_cnt += 1;
266 *byte_cnt += plen;
267
268 ifclassq_set_packet_metadata(ifq, ifp, &pkt.pktsched_pkt);
269
270 /* Check if the limit is reached */
271 if (*pkt_cnt >= pkt_limit || *byte_cnt >= byte_limit) {
272 limit_reached = TRUE;
273 }
274 }
275 KDBG(AQM_KTRACE_STATS_FLOW_DEQUEUE, fq->fq_flowhash,
276 AQM_KTRACE_FQ_GRP_SC_IDX(fq),
277 fq->fq_bytes, fq->fq_min_qdelay);
278
279 *qempty = KPKTQ_EMPTY(&fq->fq_kpktq);
280 return limit_reached;
281 }
282 #endif /* SKYWALK */
283
284 static boolean_t
fq_getq_flow_mbuf(fq_if_t * fqs,fq_if_classq_t * fq_cl,fq_t * fq,int64_t byte_limit,uint32_t pkt_limit,classq_pkt_t * head,classq_pkt_t * tail,uint32_t * byte_cnt,uint32_t * pkt_cnt,boolean_t * qempty,uint64_t now)285 fq_getq_flow_mbuf(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq,
286 int64_t byte_limit, uint32_t pkt_limit, classq_pkt_t *head,
287 classq_pkt_t *tail, uint32_t *byte_cnt, uint32_t *pkt_cnt,
288 boolean_t *qempty, uint64_t now)
289 {
290 u_int32_t plen;
291 pktsched_pkt_t pkt;
292 boolean_t limit_reached = FALSE;
293 struct ifclassq *ifq = fqs->fqs_ifq;
294 struct ifnet *ifp = ifq->ifcq_ifp;
295
296 while (fq->fq_deficit > 0 && limit_reached == FALSE &&
297 !MBUFQ_EMPTY(&fq->fq_mbufq) && fq_tx_time_ready(fqs, fq, now, NULL)) {
298 _PKTSCHED_PKT_INIT(&pkt);
299 fq_getq_flow(fqs, fq, &pkt, now);
300 ASSERT(pkt.pktsched_ptype == QP_MBUF);
301
302 plen = pktsched_get_pkt_len(&pkt);
303 fq->fq_deficit -= plen;
304
305 if (__improbable((fq->fq_flags & FQF_FRESH_FLOW) != 0)) {
306 pkt.pktsched_pkt_mbuf->m_pkthdr.pkt_flags |= PKTF_NEW_FLOW;
307 fq->fq_flags &= ~FQF_FRESH_FLOW;
308 }
309
310 if (head->cp_mbuf == NULL) {
311 *head = pkt.pktsched_pkt;
312 } else {
313 ASSERT(tail->cp_mbuf != NULL);
314 ASSERT(tail->cp_mbuf->m_nextpkt == NULL);
315 tail->cp_mbuf->m_nextpkt = pkt.pktsched_pkt_mbuf;
316 }
317 *tail = pkt.pktsched_pkt;
318 tail->cp_mbuf->m_nextpkt = NULL;
319 fq_cl->fcl_stat.fcl_dequeue++;
320 fq_cl->fcl_stat.fcl_dequeue_bytes += plen;
321 *pkt_cnt += 1;
322 *byte_cnt += plen;
323
324 ifclassq_set_packet_metadata(ifq, ifp, &pkt.pktsched_pkt);
325
326 /* Check if the limit is reached */
327 if (*pkt_cnt >= pkt_limit || *byte_cnt >= byte_limit) {
328 limit_reached = TRUE;
329 }
330 }
331 KDBG(AQM_KTRACE_STATS_FLOW_DEQUEUE, fq->fq_flowhash,
332 AQM_KTRACE_FQ_GRP_SC_IDX(fq),
333 fq->fq_bytes, fq->fq_min_qdelay);
334
335 *qempty = MBUFQ_EMPTY(&fq->fq_mbufq);
336 return limit_reached;
337 }
338
339 fq_if_t *
fq_if_alloc(struct ifclassq * ifq,classq_pkt_type_t ptype)340 fq_if_alloc(struct ifclassq *ifq, classq_pkt_type_t ptype)
341 {
342 fq_if_t *fqs;
343
344 fqs = zalloc_flags(fq_if_zone, Z_WAITOK | Z_ZERO);
345 fqs->fqs_ifq = ifq;
346 fqs->fqs_ptype = ptype;
347
348 /* Configure packet drop limit across all queues */
349 fqs->fqs_pkt_droplimit = IFCQ_PKT_DROP_LIMIT(ifq);
350 STAILQ_INIT(&fqs->fqs_fclist);
351 TAILQ_INIT(&fqs->fqs_empty_list);
352 TAILQ_INIT(&fqs->fqs_combined_grp_list);
353
354 return fqs;
355 }
356
357 void
fq_if_destroy(fq_if_t * fqs)358 fq_if_destroy(fq_if_t *fqs)
359 {
360 fq_if_purge(fqs);
361 fq_if_destroy_grps(fqs);
362
363 fqs->fqs_ifq = NULL;
364 zfree(fq_if_zone, fqs);
365 }
366
367 static inline uint8_t
fq_if_service_to_priority(fq_if_t * fqs,mbuf_svc_class_t svc)368 fq_if_service_to_priority(fq_if_t *fqs, mbuf_svc_class_t svc)
369 {
370 uint8_t pri;
371
372 if (fqs->fqs_flags & FQS_DRIVER_MANAGED) {
373 switch (svc) {
374 case MBUF_SC_BK_SYS:
375 case MBUF_SC_BK:
376 pri = FQ_IF_BK_INDEX;
377 break;
378 case MBUF_SC_BE:
379 case MBUF_SC_RD:
380 case MBUF_SC_OAM:
381 pri = FQ_IF_BE_INDEX;
382 break;
383 case MBUF_SC_AV:
384 case MBUF_SC_RV:
385 case MBUF_SC_VI:
386 case MBUF_SC_SIG:
387 pri = FQ_IF_VI_INDEX;
388 break;
389 case MBUF_SC_VO:
390 case MBUF_SC_CTL:
391 pri = FQ_IF_VO_INDEX;
392 break;
393 default:
394 pri = FQ_IF_BE_INDEX; /* Use best effort by default */
395 break;
396 }
397 return pri;
398 }
399
400 /* scheduler is not managed by the driver */
401 switch (svc) {
402 case MBUF_SC_BK_SYS:
403 pri = FQ_IF_BK_SYS_INDEX;
404 break;
405 case MBUF_SC_BK:
406 pri = FQ_IF_BK_INDEX;
407 break;
408 case MBUF_SC_BE:
409 pri = FQ_IF_BE_INDEX;
410 break;
411 case MBUF_SC_RD:
412 pri = FQ_IF_RD_INDEX;
413 break;
414 case MBUF_SC_OAM:
415 pri = FQ_IF_OAM_INDEX;
416 break;
417 case MBUF_SC_AV:
418 pri = FQ_IF_AV_INDEX;
419 break;
420 case MBUF_SC_RV:
421 pri = FQ_IF_RV_INDEX;
422 break;
423 case MBUF_SC_VI:
424 pri = FQ_IF_VI_INDEX;
425 break;
426 case MBUF_SC_SIG:
427 pri = FQ_IF_SIG_INDEX;
428 break;
429 case MBUF_SC_VO:
430 pri = FQ_IF_VO_INDEX;
431 break;
432 case MBUF_SC_CTL:
433 pri = FQ_IF_CTL_INDEX;
434 break;
435 default:
436 pri = FQ_IF_BE_INDEX; /* Use best effort by default */
437 break;
438 }
439 return pri;
440 }
441
442 void
fq_if_classq_init(fq_if_group_t * fqg,uint32_t pri,uint32_t quantum,uint32_t drr_max,uint32_t svc_class)443 fq_if_classq_init(fq_if_group_t *fqg, uint32_t pri, uint32_t quantum,
444 uint32_t drr_max, uint32_t svc_class)
445 {
446 fq_if_classq_t *fq_cl;
447 VERIFY(pri < FQ_IF_MAX_CLASSES);
448 fq_cl = &fqg->fqg_classq[pri];
449
450 VERIFY(fq_cl->fcl_quantum == 0);
451 VERIFY(quantum != 0);
452 fq_cl->fcl_quantum = quantum;
453 fq_cl->fcl_pri = pri;
454 fq_cl->fcl_drr_max = drr_max;
455 fq_cl->fcl_service_class = svc_class;
456 fq_cl->fcl_next_tx_time = 0;
457 fq_cl->fcl_flags = 0;
458 STAILQ_INIT(&fq_cl->fcl_new_flows);
459 STAILQ_INIT(&fq_cl->fcl_old_flows);
460 }
461
462 int
fq_if_enqueue_classq(struct ifclassq * ifq,classq_pkt_t * head,classq_pkt_t * tail,uint32_t cnt,uint32_t bytes,boolean_t * pdrop)463 fq_if_enqueue_classq(struct ifclassq *ifq, classq_pkt_t *head,
464 classq_pkt_t *tail, uint32_t cnt, uint32_t bytes, boolean_t *pdrop)
465 {
466 uint8_t pri, grp_idx = 0;
467 fq_if_t *fqs;
468 fq_if_classq_t *fq_cl;
469 fq_if_group_t *fq_group;
470 int ret;
471 mbuf_svc_class_t svc;
472 pktsched_pkt_t pkt;
473
474 pktsched_pkt_encap_chain(&pkt, head, tail, cnt, bytes);
475
476 fqs = (fq_if_t *)ifq->ifcq_disc;
477 svc = pktsched_get_pkt_svc(&pkt);
478 #if SKYWALK
479 if (head->cp_ptype == QP_PACKET) {
480 grp_idx = head->cp_kpkt->pkt_qset_idx;
481 }
482 #endif /* SKYWALK */
483 pri = fq_if_service_to_priority(fqs, svc);
484 VERIFY(pri < FQ_IF_MAX_CLASSES);
485
486 IFCQ_LOCK_SPIN(ifq);
487 fq_group = fq_if_find_grp(fqs, grp_idx);
488 fq_cl = &fq_group->fqg_classq[pri];
489
490 if (__improbable(svc == MBUF_SC_BK_SYS && fqs->fqs_throttle == 1)) {
491 IFCQ_UNLOCK(ifq);
492 /* BK_SYS is currently throttled */
493 os_atomic_inc(&fq_cl->fcl_stat.fcl_throttle_drops, relaxed);
494 pktsched_free_pkt(&pkt);
495 *pdrop = TRUE;
496 ret = EQSUSPENDED;
497 goto done;
498 }
499
500 ASSERT(pkt.pktsched_ptype == fqs->fqs_ptype);
501 ret = fq_addq(fqs, fq_group, &pkt, fq_cl);
502 if (!FQ_IF_CLASSQ_IDLE(fq_cl)) {
503 if (((fq_group->fqg_bitmaps[FQ_IF_ER] | fq_group->fqg_bitmaps[FQ_IF_EB]) &
504 (1 << pri)) == 0) {
505 /*
506 * this group is not in ER or EB groups,
507 * mark it as IB
508 */
509 pktsched_bit_set(pri, &fq_group->fqg_bitmaps[FQ_IF_IB]);
510 }
511 }
512
513 if (__improbable(ret != 0)) {
514 if (ret == CLASSQEQ_SUCCESS_FC) {
515 /* packet enqueued, return advisory feedback */
516 ret = EQFULL;
517 *pdrop = FALSE;
518 } else if (ret == CLASSQEQ_COMPRESSED) {
519 ret = 0;
520 *pdrop = FALSE;
521 } else {
522 IFCQ_UNLOCK(ifq);
523 *pdrop = TRUE;
524 pktsched_free_pkt(&pkt);
525 switch (ret) {
526 case CLASSQEQ_DROP:
527 ret = ENOBUFS;
528 goto done;
529 case CLASSQEQ_DROP_FC:
530 ret = EQFULL;
531 goto done;
532 case CLASSQEQ_DROP_SP:
533 ret = EQSUSPENDED;
534 goto done;
535 default:
536 VERIFY(0);
537 /* NOTREACHED */
538 __builtin_unreachable();
539 }
540 /* NOTREACHED */
541 __builtin_unreachable();
542 }
543 } else {
544 *pdrop = FALSE;
545 }
546 IFCQ_ADD_LEN(ifq, cnt);
547 IFCQ_INC_BYTES(ifq, bytes);
548
549
550 FQS_GRP_ADD_LEN(fqs, grp_idx, cnt);
551 FQS_GRP_INC_BYTES(fqs, grp_idx, bytes);
552
553 IFCQ_UNLOCK(ifq);
554 done:
555 #if DEBUG || DEVELOPMENT
556 if (__improbable((ret == EQFULL) && (ifclassq_flow_control_adv == 0))) {
557 ret = 0;
558 }
559 #endif /* DEBUG || DEVELOPMENT */
560 return ret;
561 }
562
563 void
fq_if_dequeue_classq(struct ifclassq * ifq,classq_pkt_t * pkt,uint8_t grp_idx)564 fq_if_dequeue_classq(struct ifclassq *ifq, classq_pkt_t *pkt, uint8_t grp_idx)
565 {
566 (void) fq_if_dequeue_classq_multi(ifq, 1,
567 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, pkt, NULL, NULL, NULL, grp_idx);
568 }
569
570 void
fq_if_dequeue_sc_classq(struct ifclassq * ifq,mbuf_svc_class_t svc,classq_pkt_t * pkt,uint8_t grp_idx)571 fq_if_dequeue_sc_classq(struct ifclassq *ifq, mbuf_svc_class_t svc,
572 classq_pkt_t *pkt, uint8_t grp_idx)
573 {
574 (void) fq_if_dequeue_sc_classq_multi(ifq, svc, 1,
575 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, pkt, NULL, NULL, NULL, grp_idx);
576 }
577
578 static inline void
fq_dqlist_add(flowq_dqlist_t * fq_dqlist_head,fq_t * fq)579 fq_dqlist_add(flowq_dqlist_t *fq_dqlist_head, fq_t *fq)
580 {
581 ASSERT(fq->fq_dq_head.cp_mbuf == NULL);
582 ASSERT(!fq->fq_in_dqlist);
583 STAILQ_INSERT_TAIL(fq_dqlist_head, fq, fq_dqlink);
584 fq->fq_in_dqlist = true;
585 }
586
587 static inline void
fq_dqlist_remove(flowq_dqlist_t * fq_dqlist_head,fq_t * fq,classq_pkt_t * head,classq_pkt_t * tail,classq_pkt_type_t ptype)588 fq_dqlist_remove(flowq_dqlist_t *fq_dqlist_head, fq_t *fq, classq_pkt_t *head,
589 classq_pkt_t *tail, classq_pkt_type_t ptype)
590 {
591 ASSERT(fq->fq_in_dqlist);
592 if (fq->fq_dq_head.cp_mbuf == NULL) {
593 goto done;
594 }
595
596 if (head->cp_mbuf == NULL) {
597 *head = fq->fq_dq_head;
598 } else {
599 ASSERT(tail->cp_mbuf != NULL);
600
601 switch (ptype) {
602 case QP_MBUF:
603 ASSERT(tail->cp_mbuf->m_nextpkt == NULL);
604 tail->cp_mbuf->m_nextpkt = fq->fq_dq_head.cp_mbuf;
605 ASSERT(fq->fq_dq_tail.cp_mbuf->m_nextpkt == NULL);
606 break;
607 #if SKYWALK
608 case QP_PACKET:
609 ASSERT(tail->cp_kpkt->pkt_nextpkt == NULL);
610 tail->cp_kpkt->pkt_nextpkt = fq->fq_dq_head.cp_kpkt;
611 ASSERT(fq->fq_dq_tail.cp_kpkt->pkt_nextpkt == NULL);
612 break;
613 #endif /* SKYWALK */
614 default:
615 VERIFY(0);
616 /* NOTREACHED */
617 __builtin_unreachable();
618 }
619 }
620 *tail = fq->fq_dq_tail;
621 done:
622 STAILQ_REMOVE(fq_dqlist_head, fq, flowq, fq_dqlink);
623 CLASSQ_PKT_INIT(&fq->fq_dq_head);
624 CLASSQ_PKT_INIT(&fq->fq_dq_tail);
625 fq->fq_in_dqlist = false;
626 }
627
628 static inline void
fq_dqlist_get_packet_list(flowq_dqlist_t * fq_dqlist_head,classq_pkt_t * head,classq_pkt_t * tail,classq_pkt_type_t ptype)629 fq_dqlist_get_packet_list(flowq_dqlist_t *fq_dqlist_head, classq_pkt_t *head,
630 classq_pkt_t *tail, classq_pkt_type_t ptype)
631 {
632 fq_t *fq, *tfq;
633
634 STAILQ_FOREACH_SAFE(fq, fq_dqlist_head, fq_dqlink, tfq) {
635 fq_dqlist_remove(fq_dqlist_head, fq, head, tail, ptype);
636 }
637 }
638
639 static int
fq_if_grps_bitmap_ffs(fq_grp_tailq_t * grp_list,int pri,fq_if_state state,fq_if_group_t ** selected_grp)640 fq_if_grps_bitmap_ffs(fq_grp_tailq_t *grp_list, int pri, fq_if_state state,
641 fq_if_group_t **selected_grp)
642 {
643 #pragma unused(pri)
644
645 fq_if_group_t *grp;
646 uint32_t highest_pri = FQ_IF_MAX_CLASSES;
647 int ret_pri = 0;
648
649 TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
650 uint32_t cur_pri = pktsched_ffs(grp->fqg_bitmaps[state]);
651 /* bitmap is empty in this case */
652 if (cur_pri == 0) {
653 continue;
654 }
655 if (cur_pri <= highest_pri) {
656 highest_pri = cur_pri;
657 ret_pri = cur_pri;
658 *selected_grp = grp;
659 }
660 }
661 return ret_pri;
662 }
663
664 static boolean_t
fq_if_grps_bitmap_zeros(fq_grp_tailq_t * grp_list,int pri,fq_if_state state)665 fq_if_grps_bitmap_zeros(fq_grp_tailq_t *grp_list, int pri, fq_if_state state)
666 {
667 #pragma unused(pri)
668
669 fq_if_group_t *grp;
670
671 TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
672 if (grp->fqg_bitmaps[state] != 0) {
673 return FALSE;
674 }
675 }
676 return TRUE;
677 }
678
679 static void
fq_if_grps_bitmap_cpy(fq_grp_tailq_t * grp_list,int pri,fq_if_state dst_state,fq_if_state src_state)680 fq_if_grps_bitmap_cpy(fq_grp_tailq_t *grp_list, int pri, fq_if_state dst_state,
681 fq_if_state src_state)
682 {
683 #pragma unused(pri)
684
685 fq_if_group_t *grp;
686 TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
687 grp->fqg_bitmaps[dst_state] = grp->fqg_bitmaps[src_state];
688 }
689 }
690
691 static void
fq_if_grps_bitmap_clr(fq_grp_tailq_t * grp_list,int pri,fq_if_state state)692 fq_if_grps_bitmap_clr(fq_grp_tailq_t *grp_list, int pri, fq_if_state state)
693 {
694 #pragma unused(pri)
695
696 fq_if_group_t *grp;
697 TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
698 grp->fqg_bitmaps[state] = 0;
699 }
700 }
701
702 static void
fq_if_grps_bitmap_move(fq_grp_tailq_t * grp_list,int pri,fq_if_state dst_state,fq_if_state src_state)703 fq_if_grps_bitmap_move(fq_grp_tailq_t *grp_list, int pri, fq_if_state dst_state,
704 fq_if_state src_state)
705 {
706 #pragma unused(pri)
707
708 fq_if_group_t *grp;
709 TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
710 grp->fqg_bitmaps[dst_state] =
711 grp->fqg_bitmaps[dst_state] | grp->fqg_bitmaps[src_state];
712 grp->fqg_bitmaps[src_state] = 0;
713 }
714 }
715
716 static int
fq_if_grps_sc_bitmap_ffs(fq_grp_tailq_t * grp_list,int pri,fq_if_state state,fq_if_group_t ** selected_grp)717 fq_if_grps_sc_bitmap_ffs(fq_grp_tailq_t *grp_list, int pri, fq_if_state state,
718 fq_if_group_t **selected_grp)
719 {
720 fq_if_group_t *grp;
721 int ret_pri = 0;
722
723 TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
724 if (pktsched_bit_tst(pri, &grp->fqg_bitmaps[state])) {
725 /* +1 to match the semantics of pktsched_ffs */
726 ret_pri = pri + 1;
727 *selected_grp = grp;
728 break;
729 }
730 }
731
732 return ret_pri;
733 }
734
735 static boolean_t
fq_if_grps_sc_bitmap_zeros(fq_grp_tailq_t * grp_list,int pri,fq_if_state state)736 fq_if_grps_sc_bitmap_zeros(fq_grp_tailq_t *grp_list, int pri, fq_if_state state)
737 {
738 fq_if_group_t *grp;
739
740 TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
741 if (pktsched_bit_tst(pri, &grp->fqg_bitmaps[state])) {
742 return FALSE;
743 }
744 }
745 return TRUE;
746 }
747
748 static void
fq_if_grps_sc_bitmap_cpy(fq_grp_tailq_t * grp_list,int pri,fq_if_state dst_state,fq_if_state src_state)749 fq_if_grps_sc_bitmap_cpy(fq_grp_tailq_t *grp_list, int pri, fq_if_state dst_state,
750 fq_if_state src_state)
751 {
752 fq_if_group_t *grp;
753
754 TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
755 pktsched_bit_cpy(pri, &grp->fqg_bitmaps[dst_state],
756 &grp->fqg_bitmaps[src_state]);
757 }
758 }
759
760 static void
fq_if_grps_sc_bitmap_clr(fq_grp_tailq_t * grp_list,int pri,fq_if_state state)761 fq_if_grps_sc_bitmap_clr(fq_grp_tailq_t *grp_list, int pri, fq_if_state state)
762 {
763 fq_if_group_t *grp;
764
765 TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
766 pktsched_bit_clr(pri, &grp->fqg_bitmaps[state]);
767 }
768 }
769
770 static void
fq_if_grps_sc_bitmap_move(fq_grp_tailq_t * grp_list,int pri,fq_if_state dst_state,fq_if_state src_state)771 fq_if_grps_sc_bitmap_move(fq_grp_tailq_t *grp_list, int pri, fq_if_state dst_state,
772 fq_if_state src_state)
773 {
774 fq_if_group_t *grp;
775
776 TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
777 pktsched_bit_move(pri, &grp->fqg_bitmaps[dst_state],
778 &grp->fqg_bitmaps[src_state]);
779 pktsched_bit_clr(pri, &grp->fqg_bitmaps[src_state]);
780 }
781 }
782
783 static void
fq_if_schedule_pacemaker(struct ifclassq * ifq,uint64_t next_tx_time)784 fq_if_schedule_pacemaker(struct ifclassq *ifq, uint64_t next_tx_time)
785 {
786 if (!ifclassq_enable_pacing || !ifclassq_enable_l4s) {
787 return;
788 }
789 ASSERT(next_tx_time != FQ_INVALID_TX_TS);
790
791 struct ifnet *ifp = ifq->ifcq_ifp;
792 ifnet_start_set_pacemaker_time(ifp, next_tx_time);
793 }
794
795 static int
fq_if_dequeue_classq_multi_common(struct ifclassq * ifq,mbuf_svc_class_t svc,u_int32_t maxpktcnt,u_int32_t maxbytecnt,classq_pkt_t * first_packet,classq_pkt_t * last_packet,u_int32_t * retpktcnt,u_int32_t * retbytecnt,uint8_t grp_idx)796 fq_if_dequeue_classq_multi_common(struct ifclassq *ifq, mbuf_svc_class_t svc,
797 u_int32_t maxpktcnt, u_int32_t maxbytecnt, classq_pkt_t *first_packet,
798 classq_pkt_t *last_packet, u_int32_t *retpktcnt, u_int32_t *retbytecnt,
799 uint8_t grp_idx)
800 {
801 uint32_t total_pktcnt = 0, total_bytecnt = 0;
802 classq_pkt_t first = CLASSQ_PKT_INITIALIZER(fisrt);
803 classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
804 classq_pkt_t tmp = CLASSQ_PKT_INITIALIZER(tmp);
805 fq_if_append_pkt_t append_pkt;
806 flowq_dqlist_t fq_dqlist_head;
807 fq_if_classq_t *fq_cl;
808 fq_grp_tailq_t *grp_list, tmp_grp_list;
809 fq_if_group_t *fq_grp = NULL;
810 fq_if_t *fqs;
811 uint64_t now, next_tx_time = FQ_INVALID_TX_TS;
812 int pri = 0, svc_pri = 0;
813 bool all_paced = true;
814
815 IFCQ_LOCK_ASSERT_HELD(ifq);
816
817 fqs = (fq_if_t *)ifq->ifcq_disc;
818 STAILQ_INIT(&fq_dqlist_head);
819
820 switch (fqs->fqs_ptype) {
821 case QP_MBUF:
822 append_pkt = fq_if_append_mbuf;
823 break;
824
825 #if SKYWALK
826 case QP_PACKET:
827 append_pkt = fq_if_append_pkt;
828 break;
829 #endif /* SKYWALK */
830
831 default:
832 VERIFY(0);
833 /* NOTREACHED */
834 __builtin_unreachable();
835 }
836
837 now = fq_codel_get_time();
838 if (fqs->fqs_flags & FQS_DRIVER_MANAGED) {
839 svc_pri = fq_if_service_to_priority(fqs, svc);
840 } else {
841 VERIFY(svc == MBUF_SC_UNSPEC);
842 }
843
844 if (fq_if_is_grp_combined(fqs, grp_idx)) {
845 grp_list = &fqs->fqs_combined_grp_list;
846 VERIFY(!TAILQ_EMPTY(grp_list));
847 } else {
848 grp_list = &tmp_grp_list;
849 fq_grp = fq_if_find_grp(fqs, grp_idx);
850 TAILQ_INIT(grp_list);
851 TAILQ_INSERT_TAIL(grp_list, fq_grp, fqg_grp_link);
852 }
853
854 for (;;) {
855 uint32_t pktcnt = 0, bytecnt = 0;
856 classq_pkt_t head = CLASSQ_PKT_INITIALIZER(head);
857 classq_pkt_t tail = CLASSQ_PKT_INITIALIZER(tail);
858 bool fq_cl_all_paced = false;
859 uint64_t fq_cl_next_tx_time = FQ_INVALID_TX_TS;
860
861 if (fqs->grp_bitmaps_zeros(grp_list, svc_pri, FQ_IF_ER) &&
862 fqs->grp_bitmaps_zeros(grp_list, svc_pri, FQ_IF_EB)) {
863 fqs->grp_bitmaps_cpy(grp_list, svc_pri, FQ_IF_EB, FQ_IF_IB);
864 fqs->grp_bitmaps_clr(grp_list, svc_pri, FQ_IF_IB);
865 if (fqs->grp_bitmaps_zeros(grp_list, svc_pri, FQ_IF_EB)) {
866 if (ifclassq_enable_pacing && ifclassq_enable_l4s) {
867 /*
868 * Move fq_cl in IR back to ER, so that they will inspected with priority
869 * the next time the driver dequeues
870 */
871 fqs->grp_bitmaps_cpy(grp_list, svc_pri, FQ_IF_ER, FQ_IF_IR);
872 fqs->grp_bitmaps_clr(grp_list, svc_pri, FQ_IF_IR);
873 }
874 break;
875 }
876 }
877 pri = fqs->grp_bitmaps_ffs(grp_list, svc_pri, FQ_IF_ER, &fq_grp);
878 if (pri == 0) {
879 /*
880 * There are no ER flows, move the highest
881 * priority one from EB if there are any in that
882 * category
883 */
884 pri = fqs->grp_bitmaps_ffs(grp_list, svc_pri, FQ_IF_EB, &fq_grp);
885 VERIFY(pri > 0);
886 VERIFY(fq_grp != NULL);
887 pktsched_bit_clr((pri - 1), &fq_grp->fqg_bitmaps[FQ_IF_EB]);
888 pktsched_bit_set((pri - 1), &fq_grp->fqg_bitmaps[FQ_IF_ER]);
889 }
890 VERIFY(fq_grp != NULL);
891 pri--; /* index starts at 0 */
892 fq_cl = &fq_grp->fqg_classq[pri];
893
894 if (fq_cl->fcl_budget <= 0) {
895 /* Update the budget */
896 fq_cl->fcl_budget += (min(fq_cl->fcl_drr_max,
897 fq_cl->fcl_stat.fcl_flows_cnt) *
898 fq_cl->fcl_quantum);
899 if (fq_cl->fcl_budget <= 0) {
900 goto state_change;
901 }
902 }
903 fq_if_dequeue(fqs, fq_cl, (maxpktcnt - total_pktcnt),
904 (maxbytecnt - total_bytecnt), &head, &tail, &pktcnt,
905 &bytecnt, &fq_dqlist_head, true, now, &fq_cl_all_paced,
906 &fq_cl_next_tx_time);
907 if (head.cp_mbuf != NULL) {
908 ASSERT(STAILQ_EMPTY(&fq_dqlist_head));
909 if (first.cp_mbuf == NULL) {
910 first = head;
911 } else {
912 ASSERT(last.cp_mbuf != NULL);
913 append_pkt(&last, &head);
914 }
915 last = tail;
916 append_pkt(&last, &tmp);
917 }
918 if (fq_cl_all_paced && fq_cl_next_tx_time < next_tx_time) {
919 fq_cl->fcl_stat.fcl_fcl_pacemaker_needed++;
920 next_tx_time = fq_cl_next_tx_time;
921 }
922 fq_cl->fcl_budget -= bytecnt;
923 total_pktcnt += pktcnt;
924 total_bytecnt += bytecnt;
925
926 /*
927 * If the class has exceeded the budget but still has data
928 * to send, move it to IB
929 */
930 state_change:
931 VERIFY(fq_grp != NULL);
932 all_paced &= fq_cl_all_paced;
933 if (!FQ_IF_CLASSQ_IDLE(fq_cl)) {
934 if (fq_cl->fcl_budget <= 0) {
935 pktsched_bit_set(pri, &fq_grp->fqg_bitmaps[FQ_IF_IB]);
936 pktsched_bit_clr(pri, &fq_grp->fqg_bitmaps[FQ_IF_ER]);
937 } else if (fq_cl_all_paced) {
938 if (ifclassq_enable_pacing && ifclassq_enable_l4s) {
939 /*
940 * If a fq_cl still has budget but only paced queues, park it
941 * to IR so that we will not keep loopping over it
942 */
943 pktsched_bit_set(pri, &fq_grp->fqg_bitmaps[FQ_IF_IR]);
944 pktsched_bit_clr(pri, &fq_grp->fqg_bitmaps[FQ_IF_ER]);
945 }
946 }
947 } else {
948 pktsched_bit_clr(pri, &fq_grp->fqg_bitmaps[FQ_IF_ER]);
949 VERIFY(((fq_grp->fqg_bitmaps[FQ_IF_ER] |
950 fq_grp->fqg_bitmaps[FQ_IF_EB] |
951 fq_grp->fqg_bitmaps[FQ_IF_IB]) & (1 << pri)) == 0);
952 fq_cl->fcl_budget = 0;
953 }
954 if (total_pktcnt >= maxpktcnt || total_bytecnt >= maxbytecnt) {
955 if (ifclassq_enable_pacing && ifclassq_enable_l4s) {
956 /*
957 * Move fq_cl in IR back to ER, so that they will inspected with priority
958 * the next time the driver dequeues
959 */
960 fqs->grp_bitmaps_move(grp_list, svc_pri, FQ_IF_ER, FQ_IF_IR);
961 }
962 break;
963 }
964 }
965
966 if (!fq_if_is_grp_combined(fqs, grp_idx)) {
967 TAILQ_REMOVE(grp_list, fq_grp, fqg_grp_link);
968 VERIFY(TAILQ_EMPTY(grp_list));
969 }
970
971 fq_dqlist_get_packet_list(&fq_dqlist_head, &first, &last,
972 fqs->fqs_ptype);
973
974 if (__probable(first_packet != NULL)) {
975 *first_packet = first;
976 }
977 if (last_packet != NULL) {
978 *last_packet = last;
979 }
980 if (retpktcnt != NULL) {
981 *retpktcnt = total_pktcnt;
982 }
983 if (retbytecnt != NULL) {
984 *retbytecnt = total_bytecnt;
985 }
986 if (next_tx_time != FQ_INVALID_TX_TS) {
987 ASSERT(next_tx_time > now);
988 fq_if_schedule_pacemaker(ifq, next_tx_time);
989 }
990
991 IFCQ_XMIT_ADD(ifq, total_pktcnt, total_bytecnt);
992 fq_if_purge_empty_flow_list(fqs, now, false);
993 return 0;
994 }
995
996 int
fq_if_dequeue_classq_multi(struct ifclassq * ifq,u_int32_t maxpktcnt,u_int32_t maxbytecnt,classq_pkt_t * first_packet,classq_pkt_t * last_packet,u_int32_t * retpktcnt,u_int32_t * retbytecnt,uint8_t grp_idx)997 fq_if_dequeue_classq_multi(struct ifclassq *ifq, u_int32_t maxpktcnt,
998 u_int32_t maxbytecnt, classq_pkt_t *first_packet,
999 classq_pkt_t *last_packet, u_int32_t *retpktcnt,
1000 u_int32_t *retbytecnt, uint8_t grp_idx)
1001 {
1002 return fq_if_dequeue_classq_multi_common(ifq, MBUF_SC_UNSPEC, maxpktcnt, maxbytecnt,
1003 first_packet, last_packet, retpktcnt, retbytecnt, grp_idx);
1004 }
1005
1006 int
fq_if_dequeue_sc_classq_multi(struct ifclassq * ifq,mbuf_svc_class_t svc,u_int32_t maxpktcnt,u_int32_t maxbytecnt,classq_pkt_t * first_packet,classq_pkt_t * last_packet,u_int32_t * retpktcnt,u_int32_t * retbytecnt,uint8_t grp_idx)1007 fq_if_dequeue_sc_classq_multi(struct ifclassq *ifq, mbuf_svc_class_t svc,
1008 u_int32_t maxpktcnt, u_int32_t maxbytecnt, classq_pkt_t *first_packet,
1009 classq_pkt_t *last_packet, u_int32_t *retpktcnt, u_int32_t *retbytecnt,
1010 uint8_t grp_idx)
1011 {
1012 fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
1013
1014 if (fq_if_is_grp_combined(fqs, grp_idx)) {
1015 return fq_if_dequeue_classq_multi_common(ifq, svc, maxpktcnt, maxbytecnt,
1016 first_packet, last_packet, retpktcnt, retbytecnt, grp_idx);
1017 } else {
1018 /*
1019 * take a shortcut here since there is no need to schedule
1020 * one single service class.
1021 */
1022 return fq_if_dequeue_sc_classq_multi_separate(ifq, svc, maxpktcnt, maxbytecnt,
1023 first_packet, last_packet, retpktcnt, retbytecnt, grp_idx);
1024 }
1025 }
1026
1027 static int
fq_if_dequeue_sc_classq_multi_separate(struct ifclassq * ifq,mbuf_svc_class_t svc,u_int32_t maxpktcnt,u_int32_t maxbytecnt,classq_pkt_t * first_packet,classq_pkt_t * last_packet,u_int32_t * retpktcnt,u_int32_t * retbytecnt,uint8_t grp_idx)1028 fq_if_dequeue_sc_classq_multi_separate(struct ifclassq *ifq, mbuf_svc_class_t svc,
1029 u_int32_t maxpktcnt, u_int32_t maxbytecnt, classq_pkt_t *first_packet,
1030 classq_pkt_t *last_packet, u_int32_t *retpktcnt, u_int32_t *retbytecnt,
1031 uint8_t grp_idx)
1032 {
1033 fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
1034 uint8_t pri;
1035 u_int32_t total_pktcnt = 0, total_bytecnt = 0;
1036 fq_if_classq_t *fq_cl;
1037 classq_pkt_t first = CLASSQ_PKT_INITIALIZER(fisrt);
1038 classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
1039 fq_if_append_pkt_t append_pkt;
1040 flowq_dqlist_t fq_dqlist_head;
1041 fq_if_group_t *fq_grp;
1042 uint64_t now;
1043
1044 switch (fqs->fqs_ptype) {
1045 case QP_MBUF:
1046 append_pkt = fq_if_append_mbuf;
1047 break;
1048
1049 #if SKYWALK
1050 case QP_PACKET:
1051 append_pkt = fq_if_append_pkt;
1052 break;
1053 #endif /* SKYWALK */
1054
1055 default:
1056 VERIFY(0);
1057 /* NOTREACHED */
1058 __builtin_unreachable();
1059 }
1060
1061 STAILQ_INIT(&fq_dqlist_head);
1062 now = fq_codel_get_time();
1063
1064 pri = fq_if_service_to_priority(fqs, svc);
1065 fq_grp = fq_if_find_grp(fqs, grp_idx);
1066 fq_cl = &fq_grp->fqg_classq[pri];
1067
1068 /*
1069 * Now we have the queue for a particular service class. We need
1070 * to dequeue as many packets as needed, first from the new flows
1071 * and then from the old flows.
1072 */
1073 while (total_pktcnt < maxpktcnt && total_bytecnt < maxbytecnt &&
1074 fq_cl->fcl_stat.fcl_pkt_cnt > 0) {
1075 classq_pkt_t head = CLASSQ_PKT_INITIALIZER(head);
1076 classq_pkt_t tail = CLASSQ_PKT_INITIALIZER(tail);
1077 u_int32_t pktcnt = 0, bytecnt = 0;
1078 bool all_paced = false;
1079 uint64_t next_tx_time = FQ_INVALID_TX_TS;
1080
1081 fq_if_dequeue(fqs, fq_cl, (maxpktcnt - total_pktcnt),
1082 (maxbytecnt - total_bytecnt), &head, &tail, &pktcnt,
1083 &bytecnt, &fq_dqlist_head, false, now, &all_paced, &next_tx_time);
1084 if (head.cp_mbuf != NULL) {
1085 if (first.cp_mbuf == NULL) {
1086 first = head;
1087 } else {
1088 ASSERT(last.cp_mbuf != NULL);
1089 append_pkt(&last, &head);
1090 }
1091 last = tail;
1092 }
1093 total_pktcnt += pktcnt;
1094 total_bytecnt += bytecnt;
1095
1096 if (next_tx_time != FQ_INVALID_TX_TS) {
1097 ASSERT(next_tx_time > now);
1098 fq_cl->fcl_stat.fcl_fcl_pacemaker_needed++;
1099 fq_if_schedule_pacemaker(ifq, next_tx_time);
1100 break;
1101 }
1102 }
1103
1104 /*
1105 * Mark classq as IB if it's not idle, so that we can
1106 * start without re-init the bitmaps when it's switched
1107 * to combined mode.
1108 */
1109 if (!FQ_IF_CLASSQ_IDLE(fq_cl)) {
1110 pktsched_bit_set(pri, &fq_grp->fqg_bitmaps[FQ_IF_IB]);
1111 pktsched_bit_clr(pri, &fq_grp->fqg_bitmaps[FQ_IF_ER]);
1112 pktsched_bit_clr(pri, &fq_grp->fqg_bitmaps[FQ_IF_EB]);
1113 } else {
1114 pktsched_bit_clr(pri, &fq_grp->fqg_bitmaps[FQ_IF_IB]);
1115 VERIFY(((fq_grp->fqg_bitmaps[FQ_IF_ER] |
1116 fq_grp->fqg_bitmaps[FQ_IF_EB] |
1117 fq_grp->fqg_bitmaps[FQ_IF_IB]) & (1 << pri)) == 0);
1118 }
1119
1120 fq_dqlist_get_packet_list(&fq_dqlist_head, &first, &last, fqs->fqs_ptype);
1121
1122 if (__probable(first_packet != NULL)) {
1123 *first_packet = first;
1124 }
1125 if (last_packet != NULL) {
1126 *last_packet = last;
1127 }
1128 if (retpktcnt != NULL) {
1129 *retpktcnt = total_pktcnt;
1130 }
1131 if (retbytecnt != NULL) {
1132 *retbytecnt = total_bytecnt;
1133 }
1134
1135 IFCQ_XMIT_ADD(ifq, total_pktcnt, total_bytecnt);
1136 fq_if_purge_empty_flow_list(fqs, now, false);
1137 return 0;
1138 }
1139
1140 static void
fq_if_purge_flow(fq_if_t * fqs,fq_t * fq,uint32_t * pktsp,uint32_t * bytesp,uint64_t now)1141 fq_if_purge_flow(fq_if_t *fqs, fq_t *fq, uint32_t *pktsp,
1142 uint32_t *bytesp, uint64_t now)
1143 {
1144 fq_if_classq_t *fq_cl;
1145 u_int32_t pkts, bytes;
1146 pktsched_pkt_t pkt;
1147 fq_if_group_t *grp;
1148
1149 fq_cl = &FQ_CLASSQ(fq);
1150 grp = FQ_GROUP(fq);
1151 pkts = bytes = 0;
1152 _PKTSCHED_PKT_INIT(&pkt);
1153 for (;;) {
1154 fq_getq_flow(fqs, fq, &pkt, now);
1155 if (pkt.pktsched_pkt_mbuf == NULL) {
1156 VERIFY(pkt.pktsched_ptype == QP_INVALID);
1157 break;
1158 }
1159 pkts++;
1160 bytes += pktsched_get_pkt_len(&pkt);
1161 pktsched_free_pkt(&pkt);
1162 }
1163 KDBG(AQM_KTRACE_STATS_FLOW_DEQUEUE, fq->fq_flowhash,
1164 AQM_KTRACE_FQ_GRP_SC_IDX(fq), fq->fq_bytes, fq->fq_min_qdelay);
1165
1166 IFCQ_DROP_ADD(fqs->fqs_ifq, pkts, bytes);
1167
1168 /* move through the flow queue states */
1169 VERIFY((fq->fq_flags & (FQF_NEW_FLOW | FQF_OLD_FLOW | FQF_EMPTY_FLOW)));
1170 if (fq->fq_flags & FQF_NEW_FLOW) {
1171 fq_if_empty_new_flow(fq, fq_cl);
1172 }
1173 if (fq->fq_flags & FQF_OLD_FLOW) {
1174 fq_if_empty_old_flow(fqs, fq_cl, fq, now);
1175 }
1176 if (fq->fq_flags & FQF_EMPTY_FLOW) {
1177 fq_if_purge_empty_flow(fqs, fq);
1178 fq = NULL;
1179 }
1180
1181 if (FQ_IF_CLASSQ_IDLE(fq_cl)) {
1182 int i;
1183 for (i = FQ_IF_ER; i < FQ_IF_MAX_STATE; i++) {
1184 pktsched_bit_clr(fq_cl->fcl_pri, &grp->fqg_bitmaps[i]);
1185 }
1186 }
1187
1188 if (pktsp != NULL) {
1189 *pktsp = pkts;
1190 }
1191 if (bytesp != NULL) {
1192 *bytesp = bytes;
1193 }
1194 }
1195
1196 static void
fq_if_purge_classq(fq_if_t * fqs,fq_if_classq_t * fq_cl)1197 fq_if_purge_classq(fq_if_t *fqs, fq_if_classq_t *fq_cl)
1198 {
1199 fq_t *fq, *tfq;
1200 uint64_t now;
1201
1202 now = fq_codel_get_time();
1203 /*
1204 * Take each flow from new/old flow list and flush mbufs
1205 * in that flow
1206 */
1207 STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_new_flows, fq_actlink, tfq) {
1208 fq_if_purge_flow(fqs, fq, NULL, NULL, now);
1209 }
1210 STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_old_flows, fq_actlink, tfq) {
1211 fq_if_purge_flow(fqs, fq, NULL, NULL, now);
1212 }
1213 VERIFY(STAILQ_EMPTY(&fq_cl->fcl_new_flows));
1214 VERIFY(STAILQ_EMPTY(&fq_cl->fcl_old_flows));
1215
1216 STAILQ_INIT(&fq_cl->fcl_new_flows);
1217 STAILQ_INIT(&fq_cl->fcl_old_flows);
1218 fq_cl->fcl_budget = 0;
1219 }
1220
1221 static void
fq_if_purge(fq_if_t * fqs)1222 fq_if_purge(fq_if_t *fqs)
1223 {
1224 uint64_t now;
1225 fq_if_group_t *grp;
1226 int i;
1227
1228 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
1229 for (uint8_t grp_idx = 0; grp_idx < FQ_IF_MAX_GROUPS; grp_idx++) {
1230 if (fqs->fqs_classq_groups[grp_idx] == NULL) {
1231 continue;
1232 }
1233
1234 grp = fq_if_find_grp(fqs, grp_idx);
1235 fq_if_purge_grp(fqs, grp);
1236 }
1237
1238 now = fq_codel_get_time();
1239 fq_if_purge_empty_flow_list(fqs, now, true);
1240
1241 VERIFY(STAILQ_EMPTY(&fqs->fqs_fclist));
1242 VERIFY(TAILQ_EMPTY(&fqs->fqs_empty_list));
1243
1244 fqs->fqs_large_flow = NULL;
1245 for (i = 0; i < FQ_IF_HASH_TABLE_SIZE; i++) {
1246 VERIFY(SLIST_EMPTY(&fqs->fqs_flows[i]));
1247 }
1248
1249 IFCQ_LEN(fqs->fqs_ifq) = 0;
1250 IFCQ_BYTES(fqs->fqs_ifq) = 0;
1251 }
1252
1253 static void
fq_if_purge_sc(fq_if_t * fqs,cqrq_purge_sc_t * req)1254 fq_if_purge_sc(fq_if_t *fqs, cqrq_purge_sc_t *req)
1255 {
1256 fq_t *fq;
1257 uint64_t now;
1258 fq_if_group_t *grp;
1259
1260 IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq);
1261 req->packets = req->bytes = 0;
1262 VERIFY(req->flow != 0);
1263
1264 now = fq_codel_get_time();
1265
1266 for (uint8_t grp_idx = 0; grp_idx < FQ_IF_MAX_GROUPS; grp_idx++) {
1267 if (fqs->fqs_classq_groups[grp_idx] == NULL) {
1268 continue;
1269 }
1270 uint32_t bytes = 0, pkts = 0;
1271
1272 grp = fq_if_find_grp(fqs, grp_idx);
1273 /*
1274 * Packet and traffic type are needed only if we want
1275 * to create a flow queue.
1276 */
1277 fq = fq_if_hash_pkt(fqs, grp, req->flow, req->sc, 0, false, FQ_TFC_C);
1278 if (fq != NULL) {
1279 fq_if_purge_flow(fqs, fq, &pkts, &bytes, now);
1280 req->bytes += bytes;
1281 req->packets += pkts;
1282 }
1283 }
1284 }
1285
1286 static uint16_t
fq_if_calc_quantum(struct ifnet * ifp)1287 fq_if_calc_quantum(struct ifnet *ifp)
1288 {
1289 uint16_t quantum;
1290
1291 switch (ifp->if_family) {
1292 case IFNET_FAMILY_ETHERNET:
1293 VERIFY((ifp->if_mtu + ETHER_HDR_LEN) <= UINT16_MAX);
1294 quantum = (uint16_t)ifp->if_mtu + ETHER_HDR_LEN;
1295 break;
1296
1297 case IFNET_FAMILY_CELLULAR:
1298 case IFNET_FAMILY_IPSEC:
1299 case IFNET_FAMILY_UTUN:
1300 VERIFY(ifp->if_mtu <= UINT16_MAX);
1301 quantum = (uint16_t)ifp->if_mtu;
1302 break;
1303
1304 default:
1305 quantum = FQ_CODEL_DEFAULT_QUANTUM;
1306 break;
1307 }
1308
1309 if ((ifp->if_hwassist & IFNET_TSOF) != 0) {
1310 VERIFY(ifp->if_tso_v4_mtu <= UINT16_MAX);
1311 VERIFY(ifp->if_tso_v6_mtu <= UINT16_MAX);
1312 quantum = (uint16_t)MAX(ifp->if_tso_v4_mtu, ifp->if_tso_v6_mtu);
1313 quantum = (quantum != 0) ? quantum : IF_MAXMTU;
1314 }
1315
1316 quantum = MAX(FQ_CODEL_DEFAULT_QUANTUM, quantum);
1317 #if DEBUG || DEVELOPMENT
1318 quantum = (fq_codel_quantum != 0) ? fq_codel_quantum : quantum;
1319 #endif /* DEBUG || DEVELOPMENT */
1320 VERIFY(quantum != 0);
1321 return quantum;
1322 }
1323
1324 static void
fq_if_mtu_update(fq_if_t * fqs)1325 fq_if_mtu_update(fq_if_t *fqs)
1326 {
1327 #define _FQ_CLASSQ_UPDATE_QUANTUM(_grp, _s, _q) \
1328 (_grp)->fqg_classq[FQ_IF_ ## _s ## _INDEX].fcl_quantum = \
1329 FQ_CODEL_QUANTUM_ ## _s(_q) \
1330
1331 uint32_t quantum;
1332 fq_if_group_t *grp;
1333
1334 quantum = fq_if_calc_quantum(fqs->fqs_ifq->ifcq_ifp);
1335
1336 for (uint8_t grp_idx = 0; grp_idx < FQ_IF_MAX_GROUPS; grp_idx++) {
1337 if (fqs->fqs_classq_groups[grp_idx] == NULL) {
1338 continue;
1339 }
1340
1341 grp = fq_if_find_grp(fqs, grp_idx);
1342
1343 if ((fqs->fqs_flags & FQS_DRIVER_MANAGED) != 0) {
1344 _FQ_CLASSQ_UPDATE_QUANTUM(grp, BK, quantum);
1345 _FQ_CLASSQ_UPDATE_QUANTUM(grp, BE, quantum);
1346 _FQ_CLASSQ_UPDATE_QUANTUM(grp, VI, quantum);
1347 _FQ_CLASSQ_UPDATE_QUANTUM(grp, VO, quantum);
1348 } else {
1349 _FQ_CLASSQ_UPDATE_QUANTUM(grp, BK_SYS, quantum);
1350 _FQ_CLASSQ_UPDATE_QUANTUM(grp, BK, quantum);
1351 _FQ_CLASSQ_UPDATE_QUANTUM(grp, BE, quantum);
1352 _FQ_CLASSQ_UPDATE_QUANTUM(grp, RD, quantum);
1353 _FQ_CLASSQ_UPDATE_QUANTUM(grp, OAM, quantum);
1354 _FQ_CLASSQ_UPDATE_QUANTUM(grp, AV, quantum);
1355 _FQ_CLASSQ_UPDATE_QUANTUM(grp, RV, quantum);
1356 _FQ_CLASSQ_UPDATE_QUANTUM(grp, VI, quantum);
1357 _FQ_CLASSQ_UPDATE_QUANTUM(grp, VO, quantum);
1358 _FQ_CLASSQ_UPDATE_QUANTUM(grp, CTL, quantum);
1359 }
1360 }
1361 #undef _FQ_CLASSQ_UPDATE_QUANTUM
1362 }
1363
1364 static void
fq_if_event(fq_if_t * fqs,cqev_t ev)1365 fq_if_event(fq_if_t *fqs, cqev_t ev)
1366 {
1367 IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq);
1368
1369 switch (ev) {
1370 case CLASSQ_EV_LINK_UP:
1371 case CLASSQ_EV_LINK_DOWN:
1372 fq_if_purge(fqs);
1373 break;
1374 case CLASSQ_EV_LINK_MTU:
1375 fq_if_mtu_update(fqs);
1376 break;
1377 default:
1378 break;
1379 }
1380 }
1381
1382 static void
fq_if_classq_suspend(fq_if_t * fqs,fq_if_classq_t * fq_cl)1383 fq_if_classq_suspend(fq_if_t *fqs, fq_if_classq_t *fq_cl)
1384 {
1385 fq_if_purge_classq(fqs, fq_cl);
1386 fqs->fqs_throttle = 1;
1387 fq_cl->fcl_stat.fcl_throttle_on++;
1388 KDBG(AQM_KTRACE_AON_THROTTLE | DBG_FUNC_START,
1389 fqs->fqs_ifq->ifcq_ifp->if_index, 0, 0, 0);
1390 }
1391
1392 static void
fq_if_classq_resume(fq_if_t * fqs,fq_if_classq_t * fq_cl)1393 fq_if_classq_resume(fq_if_t *fqs, fq_if_classq_t *fq_cl)
1394 {
1395 VERIFY(FQ_IF_CLASSQ_IDLE(fq_cl));
1396 fqs->fqs_throttle = 0;
1397 fq_cl->fcl_stat.fcl_throttle_off++;
1398 KDBG(AQM_KTRACE_AON_THROTTLE | DBG_FUNC_END,
1399 fqs->fqs_ifq->ifcq_ifp->if_index, 0, 0, 0);
1400 }
1401
1402
1403 static int
fq_if_throttle(fq_if_t * fqs,cqrq_throttle_t * tr)1404 fq_if_throttle(fq_if_t *fqs, cqrq_throttle_t *tr)
1405 {
1406 struct ifclassq *ifq = fqs->fqs_ifq;
1407 uint8_t index;
1408 fq_if_group_t *grp;
1409
1410 #if !MACH_ASSERT
1411 #pragma unused(ifq)
1412 #endif
1413 IFCQ_LOCK_ASSERT_HELD(ifq);
1414
1415 if (!tr->set) {
1416 tr->level = fqs->fqs_throttle;
1417 return 0;
1418 }
1419
1420 if (tr->level == fqs->fqs_throttle) {
1421 return EALREADY;
1422 }
1423
1424 /* Throttling is allowed on BK_SYS class only */
1425 index = fq_if_service_to_priority(fqs, MBUF_SC_BK_SYS);
1426
1427 for (uint8_t grp_idx = 0; grp_idx < FQ_IF_MAX_GROUPS; grp_idx++) {
1428 if (fqs->fqs_classq_groups[grp_idx] == NULL) {
1429 continue;
1430 }
1431 grp = fq_if_find_grp(fqs, grp_idx);
1432 switch (tr->level) {
1433 case IFNET_THROTTLE_OFF:
1434 fq_if_classq_resume(fqs, &grp->fqg_classq[index]);
1435 break;
1436 case IFNET_THROTTLE_OPPORTUNISTIC:
1437 fq_if_classq_suspend(fqs, &grp->fqg_classq[index]);
1438 break;
1439 default:
1440 break;
1441 }
1442 }
1443 return 0;
1444 }
1445
1446 static inline boolean_t
fq_if_is_fq_cl_paced(fq_if_classq_t * fq_cl,uint64_t now)1447 fq_if_is_fq_cl_paced(fq_if_classq_t *fq_cl, uint64_t now)
1448 {
1449 if ((fq_cl->fcl_flags & FCL_PACED) != 0 && fq_cl->fcl_next_tx_time > now) {
1450 return true;
1451 }
1452
1453 fq_cl->fcl_flags &= ~FCL_PACED;
1454 fq_cl->fcl_next_tx_time = 0;
1455 return false;
1456 }
1457
1458 static void
fq_if_grp_stat_sc(fq_if_t * fqs,fq_if_group_t * grp,cqrq_stat_sc_t * stat,uint64_t now)1459 fq_if_grp_stat_sc(fq_if_t *fqs, fq_if_group_t *grp, cqrq_stat_sc_t *stat, uint64_t now)
1460 {
1461 uint8_t pri;
1462 fq_if_classq_t *fq_cl;
1463
1464 ASSERT(stat != NULL);
1465 pri = fq_if_service_to_priority(fqs, stat->sc);
1466
1467 fq_cl = &grp->fqg_classq[pri];
1468 stat->packets = (uint32_t)fq_cl->fcl_stat.fcl_pkt_cnt;
1469 stat->bytes = (uint32_t)fq_cl->fcl_stat.fcl_byte_cnt;
1470
1471 if (ifclassq_enable_pacing && ifclassq_enable_l4s &&
1472 fq_if_is_fq_cl_paced(fq_cl, now)) {
1473 stat->packets = 0;
1474 stat->bytes = 0;
1475 }
1476 }
1477
1478 static boolean_t
fq_if_is_grp_all_paced(fq_if_group_t * grp)1479 fq_if_is_grp_all_paced(fq_if_group_t *grp)
1480 {
1481 fq_if_classq_t *fq_cl;
1482 uint64_t now;
1483
1484 if (!ifclassq_enable_pacing || !ifclassq_enable_l4s) {
1485 return false;
1486 }
1487
1488 now = fq_codel_get_time();
1489 for (uint8_t fq_cl_idx = 0; fq_cl_idx < FQ_IF_MAX_CLASSES; fq_cl_idx++) {
1490 fq_cl = &grp->fqg_classq[fq_cl_idx];
1491 if (fq_cl == NULL || FQ_IF_CLASSQ_IDLE(fq_cl)) {
1492 continue;
1493 }
1494 if (!fq_if_is_fq_cl_paced(fq_cl, now)) {
1495 return false;
1496 }
1497 }
1498
1499 return true;
1500 }
1501
1502 boolean_t
fq_if_is_all_paced(struct ifclassq * ifq)1503 fq_if_is_all_paced(struct ifclassq *ifq)
1504 {
1505 fq_if_group_t *grp;
1506 fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
1507
1508 IFCQ_LOCK_ASSERT_HELD(ifq);
1509
1510 if (!ifclassq_enable_pacing || !ifclassq_enable_l4s) {
1511 return false;
1512 }
1513
1514 for (uint8_t grp_idx = 0; grp_idx < FQ_IF_MAX_GROUPS; grp_idx++) {
1515 grp = fqs->fqs_classq_groups[grp_idx];
1516 if (grp == NULL || FQG_BYTES(grp) == 0) {
1517 continue;
1518 }
1519
1520 if (!fq_if_is_grp_all_paced(grp)) {
1521 return false;
1522 }
1523 }
1524
1525 return true;
1526 }
1527
1528 void
fq_if_stat_sc(fq_if_t * fqs,cqrq_stat_sc_t * stat)1529 fq_if_stat_sc(fq_if_t *fqs, cqrq_stat_sc_t *stat)
1530 {
1531 cqrq_stat_sc_t grp_sc_stat;
1532 fq_if_group_t *grp;
1533 uint64_t now = fq_codel_get_time();
1534
1535 if (stat == NULL) {
1536 return;
1537 }
1538 grp_sc_stat.sc = stat->sc;
1539 stat->packets = 0;
1540 stat->bytes = 0;
1541
1542 if (stat->grp_idx == IF_CLASSQ_ALL_GRPS) {
1543 if (stat->sc == MBUF_SC_UNSPEC) {
1544 if (!fq_if_is_all_paced(fqs->fqs_ifq)) {
1545 stat->packets = IFCQ_LEN(fqs->fqs_ifq);
1546 stat->bytes = IFCQ_BYTES(fqs->fqs_ifq);
1547 }
1548 } else {
1549 for (uint8_t grp_idx = 0; grp_idx < FQ_IF_MAX_GROUPS; grp_idx++) {
1550 grp = fqs->fqs_classq_groups[grp_idx];
1551 if (grp == NULL) {
1552 continue;
1553 }
1554
1555 fq_if_grp_stat_sc(fqs, grp, &grp_sc_stat, now);
1556 stat->packets += grp_sc_stat.packets;
1557 stat->bytes += grp_sc_stat.bytes;
1558 }
1559 }
1560 return;
1561 }
1562
1563 if (stat->sc == MBUF_SC_UNSPEC) {
1564 if (fq_if_is_grp_combined(fqs, stat->grp_idx)) {
1565 TAILQ_FOREACH(grp, &fqs->fqs_combined_grp_list, fqg_grp_link) {
1566 if (fq_if_is_grp_all_paced(grp)) {
1567 continue;
1568 }
1569 stat->packets += FQG_LEN(grp);
1570 stat->bytes += FQG_BYTES(grp);
1571 }
1572 } else {
1573 grp = fq_if_find_grp(fqs, stat->grp_idx);
1574 if (!fq_if_is_grp_all_paced(grp)) {
1575 stat->packets = FQG_LEN(grp);
1576 stat->bytes = FQG_BYTES(grp);
1577 }
1578 }
1579 } else {
1580 if (fq_if_is_grp_combined(fqs, stat->grp_idx)) {
1581 TAILQ_FOREACH(grp, &fqs->fqs_combined_grp_list, fqg_grp_link) {
1582 if (fq_if_is_grp_all_paced(grp)) {
1583 continue;
1584 }
1585 fq_if_grp_stat_sc(fqs, grp, &grp_sc_stat, now);
1586 stat->packets += grp_sc_stat.packets;
1587 stat->bytes += grp_sc_stat.bytes;
1588 }
1589 } else {
1590 grp = fq_if_find_grp(fqs, stat->grp_idx);
1591 fq_if_grp_stat_sc(fqs, grp, stat, now);
1592 }
1593 }
1594 }
1595
1596 int
fq_if_request_classq(struct ifclassq * ifq,cqrq_t rq,void * arg)1597 fq_if_request_classq(struct ifclassq *ifq, cqrq_t rq, void *arg)
1598 {
1599 int err = 0;
1600 fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
1601
1602 IFCQ_LOCK_ASSERT_HELD(ifq);
1603
1604 /*
1605 * These are usually slow operations, convert the lock ahead of time
1606 */
1607 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
1608 switch (rq) {
1609 case CLASSQRQ_PURGE:
1610 fq_if_purge(fqs);
1611 break;
1612 case CLASSQRQ_PURGE_SC:
1613 fq_if_purge_sc(fqs, (cqrq_purge_sc_t *)arg);
1614 break;
1615 case CLASSQRQ_EVENT:
1616 fq_if_event(fqs, (cqev_t)arg);
1617 break;
1618 case CLASSQRQ_THROTTLE:
1619 fq_if_throttle(fqs, (cqrq_throttle_t *)arg);
1620 break;
1621 case CLASSQRQ_STAT_SC:
1622 fq_if_stat_sc(fqs, (cqrq_stat_sc_t *)arg);
1623 break;
1624 }
1625 return err;
1626 }
1627
1628 int
fq_if_setup_ifclassq(struct ifclassq * ifq,u_int32_t flags,classq_pkt_type_t ptype)1629 fq_if_setup_ifclassq(struct ifclassq *ifq, u_int32_t flags,
1630 classq_pkt_type_t ptype)
1631 {
1632 fq_if_t *fqs = NULL;
1633 int err = 0;
1634
1635 IFCQ_LOCK_ASSERT_HELD(ifq);
1636 VERIFY(ifq->ifcq_disc == NULL);
1637 VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
1638
1639 fqs = fq_if_alloc(ifq, ptype);
1640 if (fqs == NULL) {
1641 return ENOMEM;
1642 }
1643 if (flags & PKTSCHEDF_QALG_DRIVER_MANAGED) {
1644 fqs->fqs_flags |= FQS_DRIVER_MANAGED;
1645 fqs->fqs_bm_ops = &fq_if_grps_sc_bitmap_ops;
1646 } else {
1647 fqs->fqs_bm_ops = &fq_if_grps_bitmap_ops;
1648 }
1649
1650 err = ifclassq_attach(ifq, PKTSCHEDT_FQ_CODEL, fqs);
1651 if (err != 0) {
1652 os_log_error(OS_LOG_DEFAULT, "%s: error from ifclassq_attach, "
1653 "failed to attach fq_if: %d\n", __func__, err);
1654 fq_if_destroy(fqs);
1655 return err;
1656 }
1657
1658 /*
1659 * Always create one group. If qset 0 is added later,
1660 * this group will be updated.
1661 */
1662 err = fq_if_create_grp(ifq, 0, IF_CLASSQ_DEF);
1663 if (err != 0) {
1664 os_log_error(OS_LOG_DEFAULT, "%s: error from fq_if_create_grp, "
1665 "failed to create a fq group: %d\n", __func__, err);
1666 fq_if_destroy(fqs);
1667 }
1668
1669 return err;
1670 }
1671
1672 fq_t *
fq_if_hash_pkt(fq_if_t * fqs,fq_if_group_t * fq_grp,u_int32_t flowid,mbuf_svc_class_t svc_class,u_int64_t now,bool create,fq_tfc_type_t tfc_type)1673 fq_if_hash_pkt(fq_if_t *fqs, fq_if_group_t *fq_grp, u_int32_t flowid,
1674 mbuf_svc_class_t svc_class, u_int64_t now, bool create,
1675 fq_tfc_type_t tfc_type)
1676 {
1677 fq_t *fq = NULL;
1678 flowq_list_t *fq_list;
1679 fq_if_classq_t *fq_cl;
1680 u_int8_t fqs_hash_id;
1681 u_int8_t scidx;
1682
1683 scidx = fq_if_service_to_priority(fqs, svc_class);
1684
1685 fqs_hash_id = FQ_IF_FLOW_HASH_ID(flowid);
1686
1687 fq_list = &fqs->fqs_flows[fqs_hash_id];
1688
1689 SLIST_FOREACH(fq, fq_list, fq_hashlink) {
1690 if (fq->fq_flowhash == flowid &&
1691 fq->fq_sc_index == scidx &&
1692 fq->fq_tfc_type == tfc_type &&
1693 fq->fq_group == fq_grp) {
1694 break;
1695 }
1696 }
1697 if (fq == NULL && create) {
1698 /* If the flow is not already on the list, allocate it */
1699 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
1700 fq = fq_alloc(fqs->fqs_ptype);
1701 if (fq != NULL) {
1702 fq->fq_flowhash = flowid;
1703 fq->fq_sc_index = scidx;
1704 fq->fq_group = fq_grp;
1705 fq->fq_tfc_type = tfc_type;
1706 fq_cl = &FQ_CLASSQ(fq);
1707 fq->fq_flags = (FQF_FLOWCTL_CAPABLE | FQF_FRESH_FLOW);
1708 fq->fq_updatetime = now + FQ_UPDATE_INTERVAL(fq);
1709 fq->fq_next_tx_time = FQ_INVALID_TX_TS;
1710 SLIST_INSERT_HEAD(fq_list, fq, fq_hashlink);
1711 fq_cl->fcl_stat.fcl_flows_cnt++;
1712 }
1713 KDBG(AQM_KTRACE_STATS_FLOW_ALLOC,
1714 fqs->fqs_ifq->ifcq_ifp->if_index, fq->fq_flowhash,
1715 AQM_KTRACE_FQ_GRP_SC_IDX(fq), 0);
1716 } else if ((fq != NULL) && (fq->fq_flags & FQF_EMPTY_FLOW)) {
1717 fq_if_reuse_empty_flow(fqs, fq, now);
1718 }
1719
1720 /*
1721 * If getq time is not set because this is the first packet or after
1722 * idle time, set it now so that we can detect a stall.
1723 */
1724 if (fq != NULL && fq->fq_getqtime == 0) {
1725 fq->fq_getqtime = now;
1726 }
1727
1728 return fq;
1729 }
1730
1731 void
fq_if_destroy_flow(fq_if_t * fqs,fq_if_classq_t * fq_cl,fq_t * fq)1732 fq_if_destroy_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq)
1733 {
1734 u_int8_t hash_id;
1735
1736 ASSERT((fq->fq_flags & FQF_EMPTY_FLOW) == 0);
1737 hash_id = FQ_IF_FLOW_HASH_ID(fq->fq_flowhash);
1738 SLIST_REMOVE(&fqs->fqs_flows[hash_id], fq, flowq,
1739 fq_hashlink);
1740 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
1741 if (__improbable(fq->fq_flags & FQF_FLOWCTL_ON)) {
1742 fq_if_flow_feedback(fqs, fq, fq_cl);
1743 }
1744 KDBG(AQM_KTRACE_STATS_FLOW_DESTROY,
1745 fqs->fqs_ifq->ifcq_ifp->if_index, fq->fq_flowhash,
1746 AQM_KTRACE_FQ_GRP_SC_IDX(fq), 0);
1747 fq_destroy(fq, fqs->fqs_ptype);
1748 }
1749
1750 inline boolean_t
fq_if_at_drop_limit(fq_if_t * fqs)1751 fq_if_at_drop_limit(fq_if_t *fqs)
1752 {
1753 return (IFCQ_LEN(fqs->fqs_ifq) >= fqs->fqs_pkt_droplimit) ?
1754 TRUE : FALSE;
1755 }
1756
1757 inline boolean_t
fq_if_almost_at_drop_limit(fq_if_t * fqs)1758 fq_if_almost_at_drop_limit(fq_if_t *fqs)
1759 {
1760 /*
1761 * Whether we are above 90% of the queue limit. This is used to tell if we
1762 * can stop flow controlling the largest flow.
1763 */
1764 return IFCQ_LEN(fqs->fqs_ifq) >= fqs->fqs_pkt_droplimit * 9 / 10;
1765 }
1766
1767 static inline void
fq_if_reuse_empty_flow(fq_if_t * fqs,fq_t * fq,uint64_t now)1768 fq_if_reuse_empty_flow(fq_if_t *fqs, fq_t *fq, uint64_t now)
1769 {
1770 ASSERT(fq->fq_flags & FQF_EMPTY_FLOW);
1771 TAILQ_REMOVE(&fqs->fqs_empty_list, fq, fq_empty_link);
1772 STAILQ_NEXT(fq, fq_actlink) = NULL;
1773 fq->fq_flags &= ~FQF_FLOW_STATE_MASK;
1774 fq->fq_empty_purge_time = 0;
1775 fq->fq_getqtime = 0;
1776 fq->fq_updatetime = now + FQ_UPDATE_INTERVAL(fq);
1777 fqs->fqs_empty_list_cnt--;
1778 fq_if_classq_t *fq_cl = &FQ_CLASSQ(fq);
1779 fq_cl->fcl_stat.fcl_flows_cnt++;
1780 }
1781
1782 inline void
fq_if_move_to_empty_flow(fq_if_t * fqs,fq_if_classq_t * fq_cl,fq_t * fq,uint64_t now)1783 fq_if_move_to_empty_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq,
1784 uint64_t now)
1785 {
1786 ASSERT(fq->fq_flags & ~(FQF_NEW_FLOW | FQF_OLD_FLOW | FQF_FLOWCTL_ON));
1787 fq->fq_empty_purge_time = now + fq_empty_purge_delay;
1788 TAILQ_INSERT_TAIL(&fqs->fqs_empty_list, fq, fq_empty_link);
1789 fq->fq_flags |= FQF_EMPTY_FLOW;
1790 FQ_CLEAR_OVERWHELMING(fq);
1791 fqs->fqs_empty_list_cnt++;
1792 /*
1793 * fcl_flows_cnt is used in budget determination for the class.
1794 * empty flow shouldn't contribute to the budget.
1795 */
1796 fq_cl->fcl_stat.fcl_flows_cnt--;
1797 }
1798
1799 static void
fq_if_purge_empty_flow(fq_if_t * fqs,fq_t * fq)1800 fq_if_purge_empty_flow(fq_if_t *fqs, fq_t *fq)
1801 {
1802 fq_if_classq_t *fq_cl;
1803 fq_cl = &FQ_CLASSQ(fq);
1804
1805 ASSERT((fq->fq_flags & FQF_EMPTY_FLOW) != 0);
1806 TAILQ_REMOVE(&fqs->fqs_empty_list, fq, fq_empty_link);
1807 fq->fq_flags &= ~FQF_EMPTY_FLOW;
1808 fqs->fqs_empty_list_cnt--;
1809 /* Remove from the hash list and free the flow queue */
1810 fq_if_destroy_flow(fqs, fq_cl, fq);
1811 }
1812
1813 static void
fq_if_purge_empty_flow_list(fq_if_t * fqs,uint64_t now,bool purge_all)1814 fq_if_purge_empty_flow_list(fq_if_t *fqs, uint64_t now, bool purge_all)
1815 {
1816 fq_t *fq, *tmp;
1817 int i = 0;
1818
1819 if (fqs->fqs_empty_list_cnt == 0) {
1820 ASSERT(TAILQ_EMPTY(&fqs->fqs_empty_list));
1821 return;
1822 }
1823
1824 TAILQ_FOREACH_SAFE(fq, &fqs->fqs_empty_list, fq_empty_link, tmp) {
1825 if (!purge_all && ((now < fq->fq_empty_purge_time) ||
1826 (i++ == FQ_EMPTY_PURGE_MAX))) {
1827 break;
1828 }
1829 fq_if_purge_empty_flow(fqs, fq);
1830 }
1831
1832 if (__improbable(purge_all)) {
1833 VERIFY(fqs->fqs_empty_list_cnt == 0);
1834 VERIFY(TAILQ_EMPTY(&fqs->fqs_empty_list));
1835 }
1836 }
1837
1838 static void
fq_if_empty_old_flow(fq_if_t * fqs,fq_if_classq_t * fq_cl,fq_t * fq,uint64_t now)1839 fq_if_empty_old_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq,
1840 uint64_t now)
1841 {
1842 /*
1843 * Remove the flow queue from the old flows list.
1844 */
1845 STAILQ_REMOVE(&fq_cl->fcl_old_flows, fq, flowq, fq_actlink);
1846 fq->fq_flags &= ~FQF_OLD_FLOW;
1847 fq_cl->fcl_stat.fcl_oldflows_cnt--;
1848 VERIFY(fq->fq_bytes == 0);
1849
1850 /* release any flow control */
1851 if (__improbable(fq->fq_flags & FQF_FLOWCTL_ON)) {
1852 fq_if_flow_feedback(fqs, fq, fq_cl);
1853 }
1854
1855 /* move the flow queue to empty flows list */
1856 fq_if_move_to_empty_flow(fqs, fq_cl, fq, now);
1857 }
1858
1859 static void
fq_if_empty_new_flow(fq_t * fq,fq_if_classq_t * fq_cl)1860 fq_if_empty_new_flow(fq_t *fq, fq_if_classq_t *fq_cl)
1861 {
1862 /* Move to the end of old queue list */
1863 STAILQ_REMOVE(&fq_cl->fcl_new_flows, fq,
1864 flowq, fq_actlink);
1865 fq->fq_flags &= ~FQF_NEW_FLOW;
1866 fq_cl->fcl_stat.fcl_newflows_cnt--;
1867
1868 STAILQ_INSERT_TAIL(&fq_cl->fcl_old_flows, fq, fq_actlink);
1869 fq->fq_flags |= FQF_OLD_FLOW;
1870 fq_cl->fcl_stat.fcl_oldflows_cnt++;
1871 }
1872
1873 inline void
fq_if_drop_packet(fq_if_t * fqs,uint64_t now)1874 fq_if_drop_packet(fq_if_t *fqs, uint64_t now)
1875 {
1876 fq_t *fq = fqs->fqs_large_flow;
1877 fq_if_classq_t *fq_cl;
1878 pktsched_pkt_t pkt;
1879 volatile uint32_t *pkt_flags;
1880 uint64_t *pkt_timestamp;
1881
1882 if (fq == NULL) {
1883 return;
1884 }
1885 /* queue can not be empty on the largest flow */
1886 VERIFY(!fq_empty(fq, fqs->fqs_ptype));
1887
1888 fq_cl = &FQ_CLASSQ(fq);
1889 _PKTSCHED_PKT_INIT(&pkt);
1890 fq_getq_flow_internal(fqs, fq, &pkt);
1891 ASSERT(pkt.pktsched_ptype != QP_INVALID);
1892
1893 pktsched_get_pkt_vars(&pkt, &pkt_flags, &pkt_timestamp, NULL, NULL,
1894 NULL, NULL, NULL);
1895
1896 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
1897 *pkt_timestamp = 0;
1898 switch (pkt.pktsched_ptype) {
1899 case QP_MBUF:
1900 *pkt_flags &= ~PKTF_PRIV_GUARDED;
1901 break;
1902 #if SKYWALK
1903 case QP_PACKET:
1904 /* sanity check */
1905 ASSERT((*pkt_flags & ~PKT_F_COMMON_MASK) == 0);
1906 break;
1907 #endif /* SKYWALK */
1908 default:
1909 VERIFY(0);
1910 /* NOTREACHED */
1911 __builtin_unreachable();
1912 }
1913
1914 if (fq_empty(fq, fqs->fqs_ptype)) {
1915 fqs->fqs_large_flow = NULL;
1916 if (fq->fq_flags & FQF_OLD_FLOW) {
1917 fq_if_empty_old_flow(fqs, fq_cl, fq, now);
1918 } else {
1919 VERIFY(fq->fq_flags & FQF_NEW_FLOW);
1920 fq_if_empty_new_flow(fq, fq_cl);
1921 }
1922 }
1923 IFCQ_DROP_ADD(fqs->fqs_ifq, 1, pktsched_get_pkt_len(&pkt));
1924
1925 pktsched_free_pkt(&pkt);
1926 fq_cl->fcl_stat.fcl_drop_overflow++;
1927 }
1928
1929 inline void
fq_if_is_flow_heavy(fq_if_t * fqs,fq_t * fq)1930 fq_if_is_flow_heavy(fq_if_t *fqs, fq_t *fq)
1931 {
1932 fq_t *prev_fq;
1933
1934 if (fqs->fqs_large_flow != NULL &&
1935 fqs->fqs_large_flow->fq_bytes < FQ_IF_LARGE_FLOW_BYTE_LIMIT) {
1936 fqs->fqs_large_flow = NULL;
1937 }
1938
1939 if (fq == NULL || fq->fq_bytes < FQ_IF_LARGE_FLOW_BYTE_LIMIT) {
1940 return;
1941 }
1942
1943 prev_fq = fqs->fqs_large_flow;
1944 if (prev_fq == NULL) {
1945 if (!fq_empty(fq, fqs->fqs_ptype)) {
1946 fqs->fqs_large_flow = fq;
1947 }
1948 return;
1949 } else if (fq->fq_bytes > prev_fq->fq_bytes) {
1950 fqs->fqs_large_flow = fq;
1951 }
1952 }
1953
1954 boolean_t
fq_if_add_fcentry(fq_if_t * fqs,pktsched_pkt_t * pkt,uint8_t flowsrc,fq_t * fq,fq_if_classq_t * fq_cl)1955 fq_if_add_fcentry(fq_if_t *fqs, pktsched_pkt_t *pkt, uint8_t flowsrc,
1956 fq_t *fq, fq_if_classq_t *fq_cl)
1957 {
1958 struct flowadv_fcentry *fce;
1959
1960 #if DEBUG || DEVELOPMENT
1961 if (__improbable(ifclassq_flow_control_adv == 0)) {
1962 os_log(OS_LOG_DEFAULT, "%s: skipped flow control", __func__);
1963 return TRUE;
1964 }
1965 #endif /* DEBUG || DEVELOPMENT */
1966
1967 STAILQ_FOREACH(fce, &fqs->fqs_fclist, fce_link) {
1968 if ((uint8_t)fce->fce_flowsrc_type == flowsrc &&
1969 fce->fce_flowid == fq->fq_flowhash) {
1970 /* Already on flowcontrol list */
1971 return TRUE;
1972 }
1973 }
1974 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
1975 fce = pktsched_alloc_fcentry(pkt, fqs->fqs_ifq->ifcq_ifp, M_WAITOK);
1976 if (fce != NULL) {
1977 /* XXX Add number of bytes in the queue */
1978 STAILQ_INSERT_TAIL(&fqs->fqs_fclist, fce, fce_link);
1979 fq_cl->fcl_stat.fcl_flow_control++;
1980 os_log(OS_LOG_DEFAULT, "%s: num: %d, scidx: %d, flowsrc: %d, "
1981 "flow: 0x%x, iface: %s, B:%u\n", __func__,
1982 fq_cl->fcl_stat.fcl_flow_control,
1983 fq->fq_sc_index, fce->fce_flowsrc_type, fq->fq_flowhash,
1984 if_name(fqs->fqs_ifq->ifcq_ifp), fq->fq_bytes);
1985 KDBG(AQM_KTRACE_STATS_FLOW_CTL | DBG_FUNC_START,
1986 fq->fq_flowhash, AQM_KTRACE_FQ_GRP_SC_IDX(fq),
1987 fq->fq_bytes, fq->fq_min_qdelay);
1988 }
1989 return (fce != NULL) ? TRUE : FALSE;
1990 }
1991
1992 static void
fq_if_remove_fcentry(fq_if_t * fqs,struct flowadv_fcentry * fce)1993 fq_if_remove_fcentry(fq_if_t *fqs, struct flowadv_fcentry *fce)
1994 {
1995 STAILQ_REMOVE(&fqs->fqs_fclist, fce, flowadv_fcentry, fce_link);
1996 STAILQ_NEXT(fce, fce_link) = NULL;
1997 flowadv_add_entry(fce);
1998 }
1999
2000 void
fq_if_flow_feedback(fq_if_t * fqs,fq_t * fq,fq_if_classq_t * fq_cl)2001 fq_if_flow_feedback(fq_if_t *fqs, fq_t *fq, fq_if_classq_t *fq_cl)
2002 {
2003 struct flowadv_fcentry *fce = NULL;
2004
2005 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
2006 STAILQ_FOREACH(fce, &fqs->fqs_fclist, fce_link) {
2007 if (fce->fce_flowid == fq->fq_flowhash) {
2008 break;
2009 }
2010 }
2011 if (fce != NULL) {
2012 fq_cl->fcl_stat.fcl_flow_feedback++;
2013 fce->fce_event_type = FCE_EVENT_TYPE_FLOW_CONTROL_FEEDBACK;
2014 os_log(OS_LOG_DEFAULT, "%s: num: %d, scidx: %d, flowsrc: %d, "
2015 "flow: 0x%x, iface: %s grp: %hhu, B:%u\n", __func__,
2016 fq_cl->fcl_stat.fcl_flow_feedback, fq->fq_sc_index,
2017 fce->fce_flowsrc_type, fce->fce_flowid,
2018 if_name(fqs->fqs_ifq->ifcq_ifp), FQ_GROUP(fq)->fqg_index,
2019 fq->fq_bytes);
2020 fq_if_remove_fcentry(fqs, fce);
2021 KDBG(AQM_KTRACE_STATS_FLOW_CTL | DBG_FUNC_END,
2022 fq->fq_flowhash, AQM_KTRACE_FQ_GRP_SC_IDX(fq),
2023 fq->fq_bytes, fq->fq_min_qdelay);
2024 }
2025 fq->fq_flags &= ~FQF_FLOWCTL_ON;
2026 }
2027
2028 boolean_t
fq_if_report_ce(fq_if_t * fqs,pktsched_pkt_t * pkt,uint32_t ce_cnt,uint32_t pkt_cnt)2029 fq_if_report_ce(fq_if_t *fqs, pktsched_pkt_t *pkt, uint32_t ce_cnt,
2030 uint32_t pkt_cnt)
2031 {
2032 struct flowadv_fcentry *fce;
2033
2034 #if DEBUG || DEVELOPMENT
2035 if (__improbable(ifclassq_flow_control_adv == 0)) {
2036 os_log(OS_LOG_DEFAULT, "%s: skipped flow control", __func__);
2037 return TRUE;
2038 }
2039 #endif /* DEBUG || DEVELOPMENT */
2040
2041 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
2042 fce = pktsched_alloc_fcentry(pkt, fqs->fqs_ifq->ifcq_ifp, M_WAITOK);
2043 if (fce != NULL) {
2044 fce->fce_event_type = FCE_EVENT_TYPE_CONGESTION_EXPERIENCED;
2045 fce->fce_ce_cnt = ce_cnt;
2046 fce->fce_pkts_since_last_report = pkt_cnt;
2047
2048 flowadv_add_entry(fce);
2049 }
2050 return (fce != NULL) ? TRUE : FALSE;
2051 }
2052
2053
2054 void
fq_if_dequeue(fq_if_t * fqs,fq_if_classq_t * fq_cl,uint32_t pktlimit,int64_t bytelimit,classq_pkt_t * top,classq_pkt_t * bottom,uint32_t * retpktcnt,uint32_t * retbytecnt,flowq_dqlist_t * fq_dqlist,bool budget_restricted,uint64_t now,bool * fq_cl_paced,uint64_t * next_tx_time)2055 fq_if_dequeue(fq_if_t *fqs, fq_if_classq_t *fq_cl, uint32_t pktlimit,
2056 int64_t bytelimit, classq_pkt_t *top, classq_pkt_t *bottom,
2057 uint32_t *retpktcnt, uint32_t *retbytecnt, flowq_dqlist_t *fq_dqlist,
2058 bool budget_restricted, uint64_t now, bool *fq_cl_paced,
2059 uint64_t *next_tx_time)
2060 {
2061 fq_t *fq = NULL, *tfq = NULL;
2062 flowq_stailq_t temp_stailq;
2063 uint32_t pktcnt, bytecnt;
2064 boolean_t qempty, limit_reached = FALSE;
2065 bool all_paced = true;
2066 classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
2067 fq_getq_flow_t fq_getq_flow_fn;
2068 classq_pkt_t *head, *tail;
2069 uint64_t fq_cl_tx_time = FQ_INVALID_TX_TS;
2070
2071 switch (fqs->fqs_ptype) {
2072 case QP_MBUF:
2073 fq_getq_flow_fn = fq_getq_flow_mbuf;
2074 break;
2075
2076 #if SKYWALK
2077 case QP_PACKET:
2078 fq_getq_flow_fn = fq_getq_flow_kpkt;
2079 break;
2080 #endif /* SKYWALK */
2081
2082 default:
2083 VERIFY(0);
2084 /* NOTREACHED */
2085 __builtin_unreachable();
2086 }
2087
2088 /*
2089 * maximum byte limit should not be greater than the budget for
2090 * this class
2091 */
2092 if (bytelimit > fq_cl->fcl_budget && budget_restricted) {
2093 bytelimit = fq_cl->fcl_budget;
2094 }
2095
2096 VERIFY(pktlimit > 0 && bytelimit > 0 && top != NULL);
2097 pktcnt = bytecnt = 0;
2098 STAILQ_INIT(&temp_stailq);
2099
2100 STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_new_flows, fq_actlink, tfq) {
2101 ASSERT((fq->fq_flags & (FQF_NEW_FLOW | FQF_OLD_FLOW)) ==
2102 FQF_NEW_FLOW);
2103 uint64_t fq_tx_time;
2104 if (__improbable(!fq_tx_time_ready(fqs, fq, now, &fq_tx_time))) {
2105 ASSERT(fq_tx_time != FQ_INVALID_TX_TS);
2106 if (fq_tx_time < fq_cl_tx_time) {
2107 fq_cl_tx_time = fq_tx_time;
2108 }
2109 continue;
2110 }
2111 all_paced = false;
2112
2113 if (fq_dqlist != NULL) {
2114 if (!fq->fq_in_dqlist) {
2115 fq_dqlist_add(fq_dqlist, fq);
2116 }
2117 head = &fq->fq_dq_head;
2118 tail = &fq->fq_dq_tail;
2119 } else {
2120 ASSERT(!fq->fq_in_dqlist);
2121 head = top;
2122 tail = &last;
2123 }
2124
2125 limit_reached = fq_getq_flow_fn(fqs, fq_cl, fq, bytelimit,
2126 pktlimit, head, tail, &bytecnt, &pktcnt, &qempty, now);
2127
2128 /*
2129 * From RFC 8290:
2130 * if that queue has a negative number of credits (i.e., it has already
2131 * dequeued at least a quantum of bytes), it is given an additional
2132 * quantum of credits, the queue is put onto _the end of_ the list of
2133 * old queues, and the routine selects the next queue and starts again.
2134 */
2135 if (fq->fq_deficit <= 0 || qempty) {
2136 fq->fq_deficit += fq_cl->fcl_quantum;
2137 fq_if_empty_new_flow(fq, fq_cl);
2138 }
2139 //TODO: add credit when it's now paced? so that the fq is trated the same as empty
2140
2141 if (!fq_tx_time_ready(fqs, fq, now, &fq_tx_time)) {
2142 ASSERT(fq_tx_time != FQ_INVALID_TX_TS);
2143 if (fq_tx_time < fq_cl_tx_time) {
2144 fq_cl_tx_time = fq_tx_time;
2145 }
2146 }
2147
2148 if (limit_reached) {
2149 goto done;
2150 }
2151 }
2152
2153 STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_old_flows, fq_actlink, tfq) {
2154 VERIFY((fq->fq_flags & (FQF_NEW_FLOW | FQF_OLD_FLOW)) ==
2155 FQF_OLD_FLOW);
2156 bool destroy = true;
2157 uint64_t fq_tx_time;
2158
2159 if (__improbable(!fq_tx_time_ready(fqs, fq, now, &fq_tx_time))) {
2160 ASSERT(fq_tx_time != FQ_INVALID_TX_TS);
2161 if (fq_tx_time < fq_cl_tx_time) {
2162 fq_cl_tx_time = fq_tx_time;
2163 }
2164 continue;
2165 }
2166 all_paced = false;
2167
2168 if (fq_dqlist != NULL) {
2169 if (!fq->fq_in_dqlist) {
2170 fq_dqlist_add(fq_dqlist, fq);
2171 }
2172 head = &fq->fq_dq_head;
2173 tail = &fq->fq_dq_tail;
2174 destroy = false;
2175 } else {
2176 ASSERT(!fq->fq_in_dqlist);
2177 head = top;
2178 tail = &last;
2179 }
2180
2181 limit_reached = fq_getq_flow_fn(fqs, fq_cl, fq, bytelimit,
2182 pktlimit, head, tail, &bytecnt, &pktcnt, &qempty, now);
2183
2184 if (!fq_tx_time_ready(fqs, fq, now, &fq_tx_time)) {
2185 ASSERT(fq_tx_time != FQ_INVALID_TX_TS);
2186 if (fq_tx_time < fq_cl_tx_time) {
2187 fq_cl_tx_time = fq_tx_time;
2188 }
2189 }
2190
2191 if (qempty) {
2192 fq_if_empty_old_flow(fqs, fq_cl, fq, now);
2193 } else if (fq->fq_deficit <= 0) {
2194 STAILQ_REMOVE(&fq_cl->fcl_old_flows, fq,
2195 flowq, fq_actlink);
2196 /*
2197 * Move to the end of the old queues list. We do not
2198 * need to update the flow count since this flow
2199 * will be added to the tail again
2200 */
2201 STAILQ_INSERT_TAIL(&temp_stailq, fq, fq_actlink);
2202 fq->fq_deficit += fq_cl->fcl_quantum;
2203 }
2204 if (limit_reached) {
2205 break;
2206 }
2207 }
2208
2209 done:
2210 if (all_paced) {
2211 fq_cl->fcl_flags |= FCL_PACED;
2212 fq_cl->fcl_next_tx_time = fq_cl_tx_time;
2213 }
2214 if (!STAILQ_EMPTY(&fq_cl->fcl_old_flows)) {
2215 STAILQ_CONCAT(&fq_cl->fcl_old_flows, &temp_stailq);
2216 } else if (!STAILQ_EMPTY(&temp_stailq)) {
2217 fq_cl->fcl_old_flows = temp_stailq;
2218 }
2219 if (last.cp_mbuf != NULL) {
2220 VERIFY(top->cp_mbuf != NULL);
2221 if (bottom != NULL) {
2222 *bottom = last;
2223 }
2224 }
2225 if (retpktcnt != NULL) {
2226 *retpktcnt = pktcnt;
2227 }
2228 if (retbytecnt != NULL) {
2229 *retbytecnt = bytecnt;
2230 }
2231 if (fq_cl_paced != NULL) {
2232 *fq_cl_paced = all_paced;
2233 }
2234 if (next_tx_time != NULL) {
2235 *next_tx_time = fq_cl_tx_time;
2236 }
2237 }
2238
2239 void
fq_if_teardown_ifclassq(struct ifclassq * ifq)2240 fq_if_teardown_ifclassq(struct ifclassq *ifq)
2241 {
2242 fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
2243
2244 IFCQ_LOCK_ASSERT_HELD(ifq);
2245 VERIFY(fqs != NULL && ifq->ifcq_type == PKTSCHEDT_FQ_CODEL);
2246 fq_if_destroy(fqs);
2247 ifq->ifcq_disc = NULL;
2248 ifclassq_detach(ifq);
2249 }
2250
2251 static void
fq_export_flowstats(fq_if_t * fqs,fq_t * fq,struct fq_codel_flowstats * flowstat)2252 fq_export_flowstats(fq_if_t *fqs, fq_t *fq,
2253 struct fq_codel_flowstats *flowstat)
2254 {
2255 bzero(flowstat, sizeof(*flowstat));
2256 flowstat->fqst_min_qdelay = (uint32_t)fq->fq_min_qdelay;
2257 flowstat->fqst_bytes = fq->fq_bytes;
2258 flowstat->fqst_flowhash = fq->fq_flowhash;
2259 if (fq->fq_flags & FQF_NEW_FLOW) {
2260 flowstat->fqst_flags |= FQ_FLOWSTATS_NEW_FLOW;
2261 }
2262 if (fq->fq_flags & FQF_OLD_FLOW) {
2263 flowstat->fqst_flags |= FQ_FLOWSTATS_OLD_FLOW;
2264 }
2265 if (fq->fq_flags & FQF_DELAY_HIGH) {
2266 flowstat->fqst_flags |= FQ_FLOWSTATS_DELAY_HIGH;
2267 }
2268 if (fq->fq_flags & FQF_FLOWCTL_ON) {
2269 flowstat->fqst_flags |= FQ_FLOWSTATS_FLOWCTL_ON;
2270 }
2271 if (fqs->fqs_large_flow == fq) {
2272 flowstat->fqst_flags |= FQ_FLOWSTATS_LARGE_FLOW;
2273 }
2274 }
2275
2276 int
fq_if_getqstats_ifclassq(struct ifclassq * ifq,uint8_t gid,u_int32_t qid,struct if_ifclassq_stats * ifqs)2277 fq_if_getqstats_ifclassq(struct ifclassq *ifq, uint8_t gid, u_int32_t qid,
2278 struct if_ifclassq_stats *ifqs)
2279 {
2280 struct fq_codel_classstats *fcls;
2281 fq_if_classq_t *fq_cl;
2282 fq_if_t *fqs;
2283 fq_t *fq = NULL;
2284 fq_if_group_t *grp;
2285 u_int32_t i, flowstat_cnt;
2286
2287 if (qid >= FQ_IF_MAX_CLASSES || gid >= FQ_IF_MAX_GROUPS) {
2288 return EINVAL;
2289 }
2290
2291 fqs = (fq_if_t *)ifq->ifcq_disc;
2292 if (fqs->fqs_classq_groups[gid] == NULL) {
2293 return ENXIO;
2294 }
2295
2296 fcls = &ifqs->ifqs_fq_codel_stats;
2297
2298 fq_cl = &FQS_CLASSQ(fqs, gid, qid);
2299 grp = fq_if_find_grp(fqs, gid);
2300
2301 fcls->fcls_pri = fq_cl->fcl_pri;
2302 fcls->fcls_service_class = fq_cl->fcl_service_class;
2303 fcls->fcls_quantum = fq_cl->fcl_quantum;
2304 fcls->fcls_drr_max = fq_cl->fcl_drr_max;
2305 fcls->fcls_budget = fq_cl->fcl_budget;
2306 fcls->fcls_l4s_target_qdelay = grp->fqg_target_qdelays[FQ_TFC_L4S];
2307 fcls->fcls_target_qdelay = grp->fqg_target_qdelays[FQ_TFC_C];
2308 fcls->fcls_update_interval = grp->fqg_update_intervals[FQ_TFC_C];
2309 fcls->fcls_flow_control = fq_cl->fcl_stat.fcl_flow_control;
2310 fcls->fcls_flow_feedback = fq_cl->fcl_stat.fcl_flow_feedback;
2311 fcls->fcls_dequeue_stall = fq_cl->fcl_stat.fcl_dequeue_stall;
2312 fcls->fcls_drop_overflow = fq_cl->fcl_stat.fcl_drop_overflow;
2313 fcls->fcls_drop_early = fq_cl->fcl_stat.fcl_drop_early;
2314 fcls->fcls_drop_memfailure = fq_cl->fcl_stat.fcl_drop_memfailure;
2315 fcls->fcls_flows_cnt = fq_cl->fcl_stat.fcl_flows_cnt;
2316 fcls->fcls_newflows_cnt = fq_cl->fcl_stat.fcl_newflows_cnt;
2317 fcls->fcls_oldflows_cnt = fq_cl->fcl_stat.fcl_oldflows_cnt;
2318 fcls->fcls_pkt_cnt = fq_cl->fcl_stat.fcl_pkt_cnt;
2319 fcls->fcls_flow_control_fail = fq_cl->fcl_stat.fcl_flow_control_fail;
2320 fcls->fcls_flow_control_fail = fq_cl->fcl_stat.fcl_flow_control_fail;
2321 fcls->fcls_dequeue = fq_cl->fcl_stat.fcl_dequeue;
2322 fcls->fcls_dequeue_bytes = fq_cl->fcl_stat.fcl_dequeue_bytes;
2323 fcls->fcls_byte_cnt = fq_cl->fcl_stat.fcl_byte_cnt;
2324 fcls->fcls_throttle_on = fq_cl->fcl_stat.fcl_throttle_on;
2325 fcls->fcls_throttle_off = fq_cl->fcl_stat.fcl_throttle_off;
2326 fcls->fcls_throttle_drops = fq_cl->fcl_stat.fcl_throttle_drops;
2327 fcls->fcls_dup_rexmts = fq_cl->fcl_stat.fcl_dup_rexmts;
2328 fcls->fcls_pkts_compressible = fq_cl->fcl_stat.fcl_pkts_compressible;
2329 fcls->fcls_pkts_compressed = fq_cl->fcl_stat.fcl_pkts_compressed;
2330 fcls->fcls_min_qdelay = fq_cl->fcl_stat.fcl_min_qdelay;
2331 fcls->fcls_max_qdelay = fq_cl->fcl_stat.fcl_max_qdelay;
2332 fcls->fcls_avg_qdelay = fq_cl->fcl_stat.fcl_avg_qdelay;
2333 fcls->fcls_overwhelming = fq_cl->fcl_stat.fcl_overwhelming;
2334 fcls->fcls_ce_marked = fq_cl->fcl_stat.fcl_ce_marked;
2335 fcls->fcls_ce_reported = fq_cl->fcl_stat.fcl_ce_reported;
2336 fcls->fcls_ce_mark_failures = fq_cl->fcl_stat.fcl_ce_mark_failures;
2337 fcls->fcls_l4s_pkts = fq_cl->fcl_stat.fcl_l4s_pkts;
2338 fcls->fcls_ignore_tx_time = fq_cl->fcl_stat.fcl_ignore_tx_time;
2339 fcls->fcls_paced_pkts = fq_cl->fcl_stat.fcl_paced_pkts;
2340 fcls->fcls_fcl_pacing_needed = fq_cl->fcl_stat.fcl_fcl_pacemaker_needed;
2341
2342 /* Gather per flow stats */
2343 flowstat_cnt = min((fcls->fcls_newflows_cnt +
2344 fcls->fcls_oldflows_cnt), FQ_IF_MAX_FLOWSTATS);
2345 i = 0;
2346 STAILQ_FOREACH(fq, &fq_cl->fcl_new_flows, fq_actlink) {
2347 if (i >= fcls->fcls_newflows_cnt || i >= flowstat_cnt) {
2348 break;
2349 }
2350
2351 /* leave space for a few old flows */
2352 if ((flowstat_cnt - i) < fcls->fcls_oldflows_cnt &&
2353 i >= (FQ_IF_MAX_FLOWSTATS >> 1)) {
2354 break;
2355 }
2356 fq_export_flowstats(fqs, fq, &fcls->fcls_flowstats[i]);
2357 i++;
2358 }
2359 STAILQ_FOREACH(fq, &fq_cl->fcl_old_flows, fq_actlink) {
2360 if (i >= flowstat_cnt) {
2361 break;
2362 }
2363 fq_export_flowstats(fqs, fq, &fcls->fcls_flowstats[i]);
2364 i++;
2365 }
2366 VERIFY(i <= flowstat_cnt);
2367 fcls->fcls_flowstats_cnt = i;
2368 return 0;
2369 }
2370
2371 int
fq_if_create_grp(struct ifclassq * ifcq,uint8_t grp_idx,uint8_t flags)2372 fq_if_create_grp(struct ifclassq *ifcq, uint8_t grp_idx, uint8_t flags)
2373 {
2374 #define _FQ_CLASSQ_INIT(_grp, _s, _q) \
2375 fq_if_classq_init(_grp, FQ_IF_ ## _s ##_INDEX, \
2376 FQ_CODEL_QUANTUM_ ## _s(_q), FQ_CODEL_DRR_MAX(_s), \
2377 MBUF_SC_ ## _s );
2378
2379 fq_if_group_t *grp;
2380 fq_if_t *fqs;
2381 uint32_t quantum, calc_flags = IF_CLASSQ_DEF;
2382 struct ifnet *ifp = ifcq->ifcq_ifp;
2383
2384 VERIFY(grp_idx < FQ_IF_MAX_GROUPS);
2385
2386 fqs = (fq_if_t *)ifcq->ifcq_disc;
2387
2388 if (grp_idx == 0 && fqs->fqs_classq_groups[grp_idx] != NULL) {
2389 grp = fqs->fqs_classq_groups[grp_idx];
2390 goto update;
2391 }
2392
2393 if (fqs->fqs_classq_groups[grp_idx] != NULL) {
2394 return EINVAL;
2395 }
2396
2397 grp = zalloc_flags(fq_if_grp_zone, Z_WAITOK | Z_ZERO);
2398 if (grp == NULL) {
2399 return ENOMEM;
2400 }
2401
2402 fqs->fqs_classq_groups[grp_idx] = grp;
2403 grp->fqg_index = grp_idx;
2404
2405 quantum = fq_if_calc_quantum(ifp);
2406 if (fqs->fqs_flags & FQS_DRIVER_MANAGED) {
2407 _FQ_CLASSQ_INIT(grp, BK, quantum);
2408 _FQ_CLASSQ_INIT(grp, BE, quantum);
2409 _FQ_CLASSQ_INIT(grp, VI, quantum);
2410 _FQ_CLASSQ_INIT(grp, VO, quantum);
2411 } else {
2412 /* SIG shares same INDEX with VI */
2413 _CASSERT(SCIDX_SIG == SCIDX_VI);
2414 _CASSERT(FQ_IF_SIG_INDEX == FQ_IF_VI_INDEX);
2415
2416 _FQ_CLASSQ_INIT(grp, BK_SYS, quantum);
2417 _FQ_CLASSQ_INIT(grp, BK, quantum);
2418 _FQ_CLASSQ_INIT(grp, BE, quantum);
2419 _FQ_CLASSQ_INIT(grp, RD, quantum);
2420 _FQ_CLASSQ_INIT(grp, OAM, quantum);
2421 _FQ_CLASSQ_INIT(grp, AV, quantum);
2422 _FQ_CLASSQ_INIT(grp, RV, quantum);
2423 _FQ_CLASSQ_INIT(grp, VI, quantum);
2424 _FQ_CLASSQ_INIT(grp, VO, quantum);
2425 _FQ_CLASSQ_INIT(grp, CTL, quantum);
2426 }
2427
2428 update:
2429 if (flags & IF_DEFAULT_GRP) {
2430 fq_if_set_grp_combined(ifcq, grp_idx);
2431 grp->fqg_flags |= FQ_IF_DEFAULT_GRP;
2432 } else {
2433 fq_if_set_grp_separated(ifcq, grp_idx);
2434 grp->fqg_flags &= ~FQ_IF_DEFAULT_GRP;
2435 }
2436
2437 calc_flags |= (flags & IF_CLASSQ_LOW_LATENCY);
2438 ifclassq_calc_target_qdelay(ifp, &grp->fqg_target_qdelays[FQ_TFC_C],
2439 calc_flags);
2440 ifclassq_calc_target_qdelay(ifp, &grp->fqg_target_qdelays[FQ_TFC_L4S],
2441 calc_flags | IF_CLASSQ_L4S);
2442
2443 ifclassq_calc_update_interval(&grp->fqg_update_intervals[FQ_TFC_C],
2444 calc_flags);
2445 ifclassq_calc_update_interval(&grp->fqg_update_intervals[FQ_TFC_L4S],
2446 calc_flags | IF_CLASSQ_L4S);
2447
2448 return 0;
2449 #undef _FQ_CLASSQ_INIT
2450 }
2451
2452 fq_if_group_t *
fq_if_find_grp(fq_if_t * fqs,uint8_t grp_idx)2453 fq_if_find_grp(fq_if_t *fqs, uint8_t grp_idx)
2454 {
2455 fq_if_group_t *grp;
2456
2457 IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq);
2458 VERIFY(grp_idx < FQ_IF_MAX_GROUPS);
2459
2460 grp = fqs->fqs_classq_groups[grp_idx];
2461 VERIFY(grp != NULL);
2462
2463 return grp;
2464 }
2465
2466 static void
fq_if_purge_grp(fq_if_t * fqs,fq_if_group_t * grp)2467 fq_if_purge_grp(fq_if_t *fqs, fq_if_group_t *grp)
2468 {
2469 for (uint8_t i = 0; i < FQ_IF_MAX_CLASSES; i++) {
2470 fq_if_purge_classq(fqs, &grp->fqg_classq[i]);
2471 }
2472
2473 bzero(&grp->fqg_bitmaps, sizeof(grp->fqg_bitmaps));
2474 grp->fqg_len = 0;
2475 grp->fqg_bytes = 0;
2476 fq_if_set_grp_separated(fqs->fqs_ifq, grp->fqg_index);
2477 }
2478
2479 void
fq_if_destroy_grps(fq_if_t * fqs)2480 fq_if_destroy_grps(fq_if_t *fqs)
2481 {
2482 fq_if_group_t *grp;
2483
2484 IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq);
2485
2486 for (uint8_t grp_idx = 0; grp_idx < FQ_IF_MAX_GROUPS; grp_idx++) {
2487 if (fqs->fqs_classq_groups[grp_idx] == NULL) {
2488 continue;
2489 }
2490
2491 grp = fq_if_find_grp(fqs, grp_idx);
2492 fq_if_purge_grp(fqs, grp);
2493 zfree(fq_if_grp_zone, grp);
2494 fqs->fqs_classq_groups[grp_idx] = NULL;
2495 }
2496 }
2497
2498 static inline boolean_t
fq_if_is_grp_combined(fq_if_t * fqs,uint8_t grp_idx)2499 fq_if_is_grp_combined(fq_if_t *fqs, uint8_t grp_idx)
2500 {
2501 return pktsched_bit_tst(grp_idx, &fqs->fqs_combined_grp_bitmap);
2502 }
2503
2504 void
fq_if_set_grp_combined(struct ifclassq * ifcq,uint8_t grp_idx)2505 fq_if_set_grp_combined(struct ifclassq *ifcq, uint8_t grp_idx)
2506 {
2507 fq_if_t *fqs;
2508 fq_if_group_t *grp;
2509
2510 IFCQ_LOCK_ASSERT_HELD(ifcq);
2511
2512 fqs = (fq_if_t *)ifcq->ifcq_disc;
2513 grp = fq_if_find_grp(fqs, grp_idx);
2514
2515 if (fq_if_is_grp_combined(fqs, grp_idx)) {
2516 return;
2517 }
2518
2519 /*
2520 * We keep the current fq_deficit and fcl_budget when combining a group.
2521 * That might disrupt the AQM but only for a moment.
2522 */
2523 pktsched_bit_set(grp_idx, &fqs->fqs_combined_grp_bitmap);
2524 TAILQ_INSERT_TAIL(&fqs->fqs_combined_grp_list, grp, fqg_grp_link);
2525 }
2526
2527 void
fq_if_set_grp_separated(struct ifclassq * ifcq,uint8_t grp_idx)2528 fq_if_set_grp_separated(struct ifclassq *ifcq, uint8_t grp_idx)
2529 {
2530 fq_if_t *fqs;
2531 fq_if_group_t *grp;
2532
2533 IFCQ_LOCK_ASSERT_HELD(ifcq);
2534
2535 fqs = (fq_if_t *)ifcq->ifcq_disc;
2536 grp = fq_if_find_grp(fqs, grp_idx);
2537
2538 if (!fq_if_is_grp_combined(fqs, grp_idx)) {
2539 return;
2540 }
2541
2542 pktsched_bit_clr(grp_idx, &fqs->fqs_combined_grp_bitmap);
2543 TAILQ_REMOVE(&fqs->fqs_combined_grp_list, grp, fqg_grp_link);
2544 }
2545