1 /*
2 * Copyright (c) 2016-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/types.h>
30 #include <sys/param.h>
31 #include <kern/zalloc.h>
32 #include <net/ethernet.h>
33 #include <net/if_var.h>
34 #include <net/if.h>
35 #include <net/classq/classq.h>
36 #include <net/classq/classq_fq_codel.h>
37 #include <net/pktsched/pktsched_fq_codel.h>
38 #include <os/log.h>
39 #include <pexpert/pexpert.h> /* for PE_parse_boot_argn */
40
41 #define FQ_CODEL_DEFAULT_QUANTUM 1500
42
43 #define FQ_CODEL_QUANTUM_BK_SYS(_q) (_q)
44 #define FQ_CODEL_QUANTUM_BK(_q) (_q)
45 #define FQ_CODEL_QUANTUM_BE(_q) (_q)
46 #define FQ_CODEL_QUANTUM_RD(_q) (_q)
47 #define FQ_CODEL_QUANTUM_OAM(_q) (_q)
48 #define FQ_CODEL_QUANTUM_AV(_q) (_q * 2)
49 #define FQ_CODEL_QUANTUM_RV(_q) (_q * 2)
50 #define FQ_CODEL_QUANTUM_VI(_q) (_q * 2)
51 #define FQ_CODEL_QUANTUM_VO(_q) ((_q * 2) / 5)
52 #define FQ_CODEL_QUANTUM_CTL(_q) ((_q * 2) / 5)
53
54 static KALLOC_TYPE_DEFINE(fq_if_zone, fq_if_t, NET_KT_DEFAULT);
55 static KALLOC_TYPE_DEFINE(fq_if_grp_zone, fq_if_group_t, NET_KT_DEFAULT);
56
57 static uint64_t fq_empty_purge_delay = FQ_EMPTY_PURGE_DELAY;
58 #if (DEVELOPMENT || DEBUG)
59 SYSCTL_NODE(_net_classq, OID_AUTO, fq_codel, CTLFLAG_RW | CTLFLAG_LOCKED,
60 0, "FQ-CODEL parameters");
61
62 SYSCTL_QUAD(_net_classq_fq_codel, OID_AUTO, fq_empty_purge_delay, CTLFLAG_RW |
63 CTLFLAG_LOCKED, &fq_empty_purge_delay, "Empty flow queue purge delay (ns)");
64 #endif /* !DEVELOPMENT && !DEBUG */
65
66 typedef STAILQ_HEAD(, flowq) flowq_dqlist_t;
67
68 static fq_if_t *fq_if_alloc(struct ifclassq *, classq_pkt_type_t);
69 static void fq_if_destroy(fq_if_t *fqs);
70 static void fq_if_classq_init(fq_if_group_t *fqg, uint32_t priority,
71 uint32_t quantum, uint32_t drr_max, uint32_t svc_class);
72 static void fq_if_dequeue(fq_if_t *, fq_if_classq_t *, uint32_t,
73 int64_t, classq_pkt_t *, classq_pkt_t *, uint32_t *,
74 uint32_t *, flowq_dqlist_t *, bool, uint64_t now);
75 void fq_if_stat_sc(fq_if_t *fqs, cqrq_stat_sc_t *stat);
76 static void fq_if_purge(fq_if_t *);
77 static void fq_if_purge_classq(fq_if_t *, fq_if_classq_t *);
78 static void fq_if_purge_flow(fq_if_t *, fq_t *, uint32_t *, uint32_t *,
79 uint64_t);
80 static void fq_if_empty_new_flow(fq_t *fq, fq_if_classq_t *fq_cl);
81 static void fq_if_empty_old_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl,
82 fq_t *fq, uint64_t now);
83 static void fq_if_purge_empty_flow(fq_if_t *fqs, fq_t *fq);
84 static void fq_if_purge_empty_flow_list(fq_if_t *fqs, uint64_t now,
85 bool purge_all);
86 static inline void fq_if_reuse_empty_flow(fq_if_t *fqs, fq_t *fq, uint64_t now);
87 static int fq_if_dequeue_sc_classq_multi_separate(struct ifclassq *ifq,
88 mbuf_svc_class_t svc, u_int32_t maxpktcnt, u_int32_t maxbytecnt,
89 classq_pkt_t *first_packet, classq_pkt_t *last_packet, u_int32_t *retpktcnt,
90 u_int32_t *retbytecnt, uint8_t grp_idx);
91 static void fq_if_grp_stat_sc(fq_if_t *fqs, fq_if_group_t *grp,
92 cqrq_stat_sc_t *stat);
93 static void fq_if_purge_grp(fq_if_t *fqs, fq_if_group_t *grp);
94 static inline boolean_t fq_if_is_grp_combined(fq_if_t *fqs, uint8_t grp_idx);
95 static void fq_if_destroy_grps(fq_if_t *fqs);
96
97 uint32_t fq_codel_drr_max_values[FQ_IF_MAX_CLASSES] = {
98 [FQ_IF_CTL_INDEX] = 8,
99 [FQ_IF_VO_INDEX] = 8,
100 [FQ_IF_VI_INDEX] = 6,
101 [FQ_IF_RV_INDEX] = 6,
102 [FQ_IF_AV_INDEX] = 6,
103 [FQ_IF_OAM_INDEX] = 4,
104 [FQ_IF_RD_INDEX] = 4,
105 [FQ_IF_BE_INDEX] = 4,
106 [FQ_IF_BK_INDEX] = 2,
107 [FQ_IF_BK_SYS_INDEX] = 2,
108 };
109
110 #define FQ_CODEL_DRR_MAX(_s) fq_codel_drr_max_values[FQ_IF_##_s##_INDEX]
111
112 static boolean_t fq_if_grps_bitmap_zeros(fq_grp_tailq_t *grp_list, int pri,
113 fq_if_state state);
114 static void fq_if_grps_bitmap_cpy(fq_grp_tailq_t *grp_list, int pri,
115 fq_if_state dst_state, fq_if_state src_state);
116 static void fq_if_grps_bitmap_clr(fq_grp_tailq_t *grp_list, int pri,
117 fq_if_state state);
118 static int fq_if_grps_bitmap_ffs(fq_grp_tailq_t *grp_list, int pri,
119 fq_if_state state, fq_if_group_t **selected_grp);
120
121 static boolean_t fq_if_grps_sc_bitmap_zeros(fq_grp_tailq_t *grp_list, int pri,
122 fq_if_state state);
123 static void fq_if_grps_sc_bitmap_cpy(fq_grp_tailq_t *grp_list, int pri,
124 fq_if_state dst_state, fq_if_state src_state);
125 static void fq_if_grps_sc_bitmap_clr(fq_grp_tailq_t *grp_list, int pri,
126 fq_if_state state);
127 static int fq_if_grps_sc_bitmap_ffs(fq_grp_tailq_t *grp_list, int pri,
128 fq_if_state state, fq_if_group_t **selected_grp);
129
130 bitmap_ops_t fq_if_grps_bitmap_ops =
131 {
132 .ffs = fq_if_grps_bitmap_ffs,
133 .zeros = fq_if_grps_bitmap_zeros,
134 .cpy = fq_if_grps_bitmap_cpy,
135 .clr = fq_if_grps_bitmap_clr,
136 };
137
138 bitmap_ops_t fq_if_grps_sc_bitmap_ops =
139 {
140 .ffs = fq_if_grps_sc_bitmap_ffs,
141 .zeros = fq_if_grps_sc_bitmap_zeros,
142 .cpy = fq_if_grps_sc_bitmap_cpy,
143 .clr = fq_if_grps_sc_bitmap_clr,
144 };
145
146 void
pktsched_fq_init(void)147 pktsched_fq_init(void)
148 {
149 // format looks like ifcq_drr_max=8,8,6
150 char buf[(FQ_IF_MAX_CLASSES) * 3];
151 size_t i, len, pri_index = 0;
152 uint32_t drr = 0;
153 if (!PE_parse_boot_arg_str("ifcq_drr_max", buf, sizeof(buf))) {
154 return;
155 }
156
157 len = strlen(buf);
158 for (i = 0; i < len + 1 && pri_index < FQ_IF_MAX_CLASSES; i++) {
159 if (buf[i] != ',' && buf[i] != '\0') {
160 VERIFY(buf[i] >= '0' && buf[i] <= '9');
161 drr = drr * 10 + buf[i] - '0';
162 continue;
163 }
164 fq_codel_drr_max_values[pri_index] = drr;
165 pri_index += 1;
166 drr = 0;
167 }
168 }
169
170 #define FQ_IF_FLOW_HASH_ID(_flowid_) \
171 (((_flowid_) >> FQ_IF_HASH_TAG_SHIFT) & FQ_IF_HASH_TAG_MASK)
172
173 #define FQ_IF_CLASSQ_IDLE(_fcl_) \
174 (STAILQ_EMPTY(&(_fcl_)->fcl_new_flows) && \
175 STAILQ_EMPTY(&(_fcl_)->fcl_old_flows))
176
177 typedef void (* fq_if_append_pkt_t)(classq_pkt_t *, classq_pkt_t *);
178 typedef boolean_t (* fq_getq_flow_t)(fq_if_t *, fq_if_classq_t *, fq_t *,
179 int64_t, uint32_t, classq_pkt_t *, classq_pkt_t *, uint32_t *,
180 uint32_t *, boolean_t *, uint32_t, uint64_t);
181
182 static void
fq_if_append_mbuf(classq_pkt_t * pkt,classq_pkt_t * next_pkt)183 fq_if_append_mbuf(classq_pkt_t *pkt, classq_pkt_t *next_pkt)
184 {
185 pkt->cp_mbuf->m_nextpkt = next_pkt->cp_mbuf;
186 }
187
188 static inline uint64_t
fq_codel_get_time(void)189 fq_codel_get_time(void)
190 {
191 struct timespec ts;
192 uint64_t now;
193
194 nanouptime(&ts);
195 now = ((uint64_t)ts.tv_sec * NSEC_PER_SEC) + ts.tv_nsec;
196 return now;
197 }
198
199 #if SKYWALK
200 static void
fq_if_append_pkt(classq_pkt_t * pkt,classq_pkt_t * next_pkt)201 fq_if_append_pkt(classq_pkt_t *pkt, classq_pkt_t *next_pkt)
202 {
203 pkt->cp_kpkt->pkt_nextpkt = next_pkt->cp_kpkt;
204 }
205 #endif /* SKYWALK */
206
207 #if SKYWALK
208 static boolean_t
fq_getq_flow_kpkt(fq_if_t * fqs,fq_if_classq_t * fq_cl,fq_t * fq,int64_t byte_limit,uint32_t pkt_limit,classq_pkt_t * head,classq_pkt_t * tail,uint32_t * byte_cnt,uint32_t * pkt_cnt,boolean_t * qempty,uint32_t pflags,uint64_t now)209 fq_getq_flow_kpkt(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq,
210 int64_t byte_limit, uint32_t pkt_limit, classq_pkt_t *head,
211 classq_pkt_t *tail, uint32_t *byte_cnt, uint32_t *pkt_cnt,
212 boolean_t *qempty, uint32_t pflags, uint64_t now)
213 {
214 uint32_t plen;
215 pktsched_pkt_t pkt;
216 boolean_t limit_reached = FALSE;
217 struct ifclassq *ifq = fqs->fqs_ifq;
218 struct ifnet *ifp = ifq->ifcq_ifp;
219
220 /*
221 * Assert to make sure pflags is part of PKT_F_COMMON_MASK;
222 * all common flags need to be declared in that mask.
223 */
224 ASSERT((pflags & ~PKT_F_COMMON_MASK) == 0);
225
226 while (fq->fq_deficit > 0 && limit_reached == FALSE &&
227 !KPKTQ_EMPTY(&fq->fq_kpktq)) {
228 _PKTSCHED_PKT_INIT(&pkt);
229 fq_getq_flow(fqs, fq, &pkt, now);
230 ASSERT(pkt.pktsched_ptype == QP_PACKET);
231
232 plen = pktsched_get_pkt_len(&pkt);
233 fq->fq_deficit -= plen;
234 pkt.pktsched_pkt_kpkt->pkt_pflags |= pflags;
235
236 if (head->cp_kpkt == NULL) {
237 *head = pkt.pktsched_pkt;
238 } else {
239 ASSERT(tail->cp_kpkt != NULL);
240 ASSERT(tail->cp_kpkt->pkt_nextpkt == NULL);
241 tail->cp_kpkt->pkt_nextpkt = pkt.pktsched_pkt_kpkt;
242 }
243 *tail = pkt.pktsched_pkt;
244 tail->cp_kpkt->pkt_nextpkt = NULL;
245 fq_cl->fcl_stat.fcl_dequeue++;
246 fq_cl->fcl_stat.fcl_dequeue_bytes += plen;
247 *pkt_cnt += 1;
248 *byte_cnt += plen;
249
250 ifclassq_set_packet_metadata(ifq, ifp, &pkt.pktsched_pkt);
251
252 /* Check if the limit is reached */
253 if (*pkt_cnt >= pkt_limit || *byte_cnt >= byte_limit) {
254 limit_reached = TRUE;
255 }
256 }
257 KDBG(AQM_KTRACE_STATS_FLOW_DEQUEUE, fq->fq_flowhash,
258 AQM_KTRACE_FQ_GRP_SC_IDX(fq),
259 fq->fq_bytes, fq->fq_min_qdelay);
260
261 *qempty = KPKTQ_EMPTY(&fq->fq_kpktq);
262 return limit_reached;
263 }
264 #endif /* SKYWALK */
265
266 static boolean_t
fq_getq_flow_mbuf(fq_if_t * fqs,fq_if_classq_t * fq_cl,fq_t * fq,int64_t byte_limit,uint32_t pkt_limit,classq_pkt_t * head,classq_pkt_t * tail,uint32_t * byte_cnt,uint32_t * pkt_cnt,boolean_t * qempty,uint32_t pflags,uint64_t now)267 fq_getq_flow_mbuf(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq,
268 int64_t byte_limit, uint32_t pkt_limit, classq_pkt_t *head,
269 classq_pkt_t *tail, uint32_t *byte_cnt, uint32_t *pkt_cnt,
270 boolean_t *qempty, uint32_t pflags, uint64_t now)
271 {
272 u_int32_t plen;
273 pktsched_pkt_t pkt;
274 boolean_t limit_reached = FALSE;
275 struct ifclassq *ifq = fqs->fqs_ifq;
276 struct ifnet *ifp = ifq->ifcq_ifp;
277
278 while (fq->fq_deficit > 0 && limit_reached == FALSE &&
279 !MBUFQ_EMPTY(&fq->fq_mbufq)) {
280 _PKTSCHED_PKT_INIT(&pkt);
281 fq_getq_flow(fqs, fq, &pkt, now);
282 ASSERT(pkt.pktsched_ptype == QP_MBUF);
283
284 plen = pktsched_get_pkt_len(&pkt);
285 fq->fq_deficit -= plen;
286 pkt.pktsched_pkt_mbuf->m_pkthdr.pkt_flags |= pflags;
287
288 if (head->cp_mbuf == NULL) {
289 *head = pkt.pktsched_pkt;
290 } else {
291 ASSERT(tail->cp_mbuf != NULL);
292 ASSERT(tail->cp_mbuf->m_nextpkt == NULL);
293 tail->cp_mbuf->m_nextpkt = pkt.pktsched_pkt_mbuf;
294 }
295 *tail = pkt.pktsched_pkt;
296 tail->cp_mbuf->m_nextpkt = NULL;
297 fq_cl->fcl_stat.fcl_dequeue++;
298 fq_cl->fcl_stat.fcl_dequeue_bytes += plen;
299 *pkt_cnt += 1;
300 *byte_cnt += plen;
301
302 ifclassq_set_packet_metadata(ifq, ifp, &pkt.pktsched_pkt);
303
304 /* Check if the limit is reached */
305 if (*pkt_cnt >= pkt_limit || *byte_cnt >= byte_limit) {
306 limit_reached = TRUE;
307 }
308 }
309 KDBG(AQM_KTRACE_STATS_FLOW_DEQUEUE, fq->fq_flowhash,
310 AQM_KTRACE_FQ_GRP_SC_IDX(fq),
311 fq->fq_bytes, fq->fq_min_qdelay);
312
313 *qempty = MBUFQ_EMPTY(&fq->fq_mbufq);
314 return limit_reached;
315 }
316
317 fq_if_t *
fq_if_alloc(struct ifclassq * ifq,classq_pkt_type_t ptype)318 fq_if_alloc(struct ifclassq *ifq, classq_pkt_type_t ptype)
319 {
320 fq_if_t *fqs;
321
322 fqs = zalloc_flags(fq_if_zone, Z_WAITOK | Z_ZERO);
323 fqs->fqs_ifq = ifq;
324 fqs->fqs_ptype = ptype;
325
326 /* Configure packet drop limit across all queues */
327 fqs->fqs_pkt_droplimit = IFCQ_PKT_DROP_LIMIT(ifq);
328 STAILQ_INIT(&fqs->fqs_fclist);
329 TAILQ_INIT(&fqs->fqs_empty_list);
330 TAILQ_INIT(&fqs->fqs_combined_grp_list);
331
332 return fqs;
333 }
334
335 void
fq_if_destroy(fq_if_t * fqs)336 fq_if_destroy(fq_if_t *fqs)
337 {
338 fq_if_purge(fqs);
339 fq_if_destroy_grps(fqs);
340
341 fqs->fqs_ifq = NULL;
342 zfree(fq_if_zone, fqs);
343 }
344
345 static inline uint8_t
fq_if_service_to_priority(fq_if_t * fqs,mbuf_svc_class_t svc)346 fq_if_service_to_priority(fq_if_t *fqs, mbuf_svc_class_t svc)
347 {
348 uint8_t pri;
349
350 if (fqs->fqs_flags & FQS_DRIVER_MANAGED) {
351 switch (svc) {
352 case MBUF_SC_BK_SYS:
353 case MBUF_SC_BK:
354 pri = FQ_IF_BK_INDEX;
355 break;
356 case MBUF_SC_BE:
357 case MBUF_SC_RD:
358 case MBUF_SC_OAM:
359 pri = FQ_IF_BE_INDEX;
360 break;
361 case MBUF_SC_AV:
362 case MBUF_SC_RV:
363 case MBUF_SC_VI:
364 case MBUF_SC_SIG:
365 pri = FQ_IF_VI_INDEX;
366 break;
367 case MBUF_SC_VO:
368 case MBUF_SC_CTL:
369 pri = FQ_IF_VO_INDEX;
370 break;
371 default:
372 pri = FQ_IF_BE_INDEX; /* Use best effort by default */
373 break;
374 }
375 return pri;
376 }
377
378 /* scheduler is not managed by the driver */
379 switch (svc) {
380 case MBUF_SC_BK_SYS:
381 pri = FQ_IF_BK_SYS_INDEX;
382 break;
383 case MBUF_SC_BK:
384 pri = FQ_IF_BK_INDEX;
385 break;
386 case MBUF_SC_BE:
387 pri = FQ_IF_BE_INDEX;
388 break;
389 case MBUF_SC_RD:
390 pri = FQ_IF_RD_INDEX;
391 break;
392 case MBUF_SC_OAM:
393 pri = FQ_IF_OAM_INDEX;
394 break;
395 case MBUF_SC_AV:
396 pri = FQ_IF_AV_INDEX;
397 break;
398 case MBUF_SC_RV:
399 pri = FQ_IF_RV_INDEX;
400 break;
401 case MBUF_SC_VI:
402 pri = FQ_IF_VI_INDEX;
403 break;
404 case MBUF_SC_SIG:
405 pri = FQ_IF_SIG_INDEX;
406 break;
407 case MBUF_SC_VO:
408 pri = FQ_IF_VO_INDEX;
409 break;
410 case MBUF_SC_CTL:
411 pri = FQ_IF_CTL_INDEX;
412 break;
413 default:
414 pri = FQ_IF_BE_INDEX; /* Use best effort by default */
415 break;
416 }
417 return pri;
418 }
419
420 void
fq_if_classq_init(fq_if_group_t * fqg,uint32_t pri,uint32_t quantum,uint32_t drr_max,uint32_t svc_class)421 fq_if_classq_init(fq_if_group_t *fqg, uint32_t pri, uint32_t quantum,
422 uint32_t drr_max, uint32_t svc_class)
423 {
424 fq_if_classq_t *fq_cl;
425 VERIFY(pri < FQ_IF_MAX_CLASSES);
426 fq_cl = &fqg->fqg_classq[pri];
427
428 VERIFY(fq_cl->fcl_quantum == 0);
429 VERIFY(quantum != 0);
430 fq_cl->fcl_quantum = quantum;
431 fq_cl->fcl_pri = pri;
432 fq_cl->fcl_drr_max = drr_max;
433 fq_cl->fcl_service_class = svc_class;
434 STAILQ_INIT(&fq_cl->fcl_new_flows);
435 STAILQ_INIT(&fq_cl->fcl_old_flows);
436 }
437
438 int
fq_if_enqueue_classq(struct ifclassq * ifq,classq_pkt_t * head,classq_pkt_t * tail,uint32_t cnt,uint32_t bytes,boolean_t * pdrop)439 fq_if_enqueue_classq(struct ifclassq *ifq, classq_pkt_t *head,
440 classq_pkt_t *tail, uint32_t cnt, uint32_t bytes, boolean_t *pdrop)
441 {
442 uint8_t pri, grp_idx = 0;
443 fq_if_t *fqs;
444 fq_if_classq_t *fq_cl;
445 fq_if_group_t *fq_group;
446 int ret;
447 mbuf_svc_class_t svc;
448 pktsched_pkt_t pkt;
449
450 pktsched_pkt_encap_chain(&pkt, head, tail, cnt, bytes);
451
452 fqs = (fq_if_t *)ifq->ifcq_disc;
453 svc = pktsched_get_pkt_svc(&pkt);
454 #if SKYWALK
455 if (head->cp_ptype == QP_PACKET) {
456 grp_idx = head->cp_kpkt->pkt_qset_idx;
457 }
458 #endif /* SKYWALK */
459 pri = fq_if_service_to_priority(fqs, svc);
460 VERIFY(pri < FQ_IF_MAX_CLASSES);
461
462 IFCQ_LOCK_SPIN(ifq);
463 fq_group = fq_if_find_grp(fqs, grp_idx);
464 fq_cl = &fq_group->fqg_classq[pri];
465
466 if (__improbable(svc == MBUF_SC_BK_SYS && fqs->fqs_throttle == 1)) {
467 IFCQ_UNLOCK(ifq);
468 /* BK_SYS is currently throttled */
469 atomic_add_32(&fq_cl->fcl_stat.fcl_throttle_drops, 1);
470 pktsched_free_pkt(&pkt);
471 *pdrop = TRUE;
472 ret = EQSUSPENDED;
473 goto done;
474 }
475
476 ASSERT(pkt.pktsched_ptype == fqs->fqs_ptype);
477 ret = fq_addq(fqs, fq_group, &pkt, fq_cl);
478 if (!FQ_IF_CLASSQ_IDLE(fq_cl)) {
479 if (((fq_group->fqg_bitmaps[FQ_IF_ER] | fq_group->fqg_bitmaps[FQ_IF_EB]) &
480 (1 << pri)) == 0) {
481 /*
482 * this group is not in ER or EB groups,
483 * mark it as IB
484 */
485 pktsched_bit_set(pri, &fq_group->fqg_bitmaps[FQ_IF_IB]);
486 }
487 }
488
489 if (__improbable(ret != 0)) {
490 if (ret == CLASSQEQ_SUCCESS_FC) {
491 /* packet enqueued, return advisory feedback */
492 ret = EQFULL;
493 *pdrop = FALSE;
494 } else if (ret == CLASSQEQ_COMPRESSED) {
495 ret = 0;
496 *pdrop = FALSE;
497 } else {
498 IFCQ_UNLOCK(ifq);
499 *pdrop = TRUE;
500 pktsched_free_pkt(&pkt);
501 switch (ret) {
502 case CLASSQEQ_DROP:
503 ret = ENOBUFS;
504 goto done;
505 case CLASSQEQ_DROP_FC:
506 ret = EQFULL;
507 goto done;
508 case CLASSQEQ_DROP_SP:
509 ret = EQSUSPENDED;
510 goto done;
511 default:
512 VERIFY(0);
513 /* NOTREACHED */
514 __builtin_unreachable();
515 }
516 /* NOTREACHED */
517 __builtin_unreachable();
518 }
519 } else {
520 *pdrop = FALSE;
521 }
522 IFCQ_ADD_LEN(ifq, cnt);
523 IFCQ_INC_BYTES(ifq, bytes);
524
525
526 FQS_GRP_ADD_LEN(fqs, grp_idx, cnt);
527 FQS_GRP_INC_BYTES(fqs, grp_idx, bytes);
528
529 IFCQ_UNLOCK(ifq);
530 done:
531 #if DEBUG || DEVELOPMENT
532 if (__improbable((ret == EQFULL) && (ifclassq_flow_control_adv == 0))) {
533 ret = 0;
534 }
535 #endif /* DEBUG || DEVELOPMENT */
536 return ret;
537 }
538
539 void
fq_if_dequeue_classq(struct ifclassq * ifq,classq_pkt_t * pkt,uint8_t grp_idx)540 fq_if_dequeue_classq(struct ifclassq *ifq, classq_pkt_t *pkt, uint8_t grp_idx)
541 {
542 (void) fq_if_dequeue_classq_multi(ifq, 1,
543 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, pkt, NULL, NULL, NULL, grp_idx);
544 }
545
546 void
fq_if_dequeue_sc_classq(struct ifclassq * ifq,mbuf_svc_class_t svc,classq_pkt_t * pkt,uint8_t grp_idx)547 fq_if_dequeue_sc_classq(struct ifclassq *ifq, mbuf_svc_class_t svc,
548 classq_pkt_t *pkt, uint8_t grp_idx)
549 {
550 (void) fq_if_dequeue_sc_classq_multi(ifq, svc, 1,
551 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, pkt, NULL, NULL, NULL, grp_idx);
552 }
553
554 static inline void
fq_dqlist_add(flowq_dqlist_t * fq_dqlist_head,fq_t * fq)555 fq_dqlist_add(flowq_dqlist_t *fq_dqlist_head, fq_t *fq)
556 {
557 ASSERT(fq->fq_dq_head.cp_mbuf == NULL);
558 ASSERT(!fq->fq_in_dqlist);
559 STAILQ_INSERT_TAIL(fq_dqlist_head, fq, fq_dqlink);
560 fq->fq_in_dqlist = true;
561 }
562
563 static inline void
fq_dqlist_remove(flowq_dqlist_t * fq_dqlist_head,fq_t * fq,classq_pkt_t * head,classq_pkt_t * tail,classq_pkt_type_t ptype)564 fq_dqlist_remove(flowq_dqlist_t *fq_dqlist_head, fq_t *fq, classq_pkt_t *head,
565 classq_pkt_t *tail, classq_pkt_type_t ptype)
566 {
567 ASSERT(fq->fq_in_dqlist);
568 if (fq->fq_dq_head.cp_mbuf == NULL) {
569 goto done;
570 }
571
572 if (head->cp_mbuf == NULL) {
573 *head = fq->fq_dq_head;
574 } else {
575 ASSERT(tail->cp_mbuf != NULL);
576
577 switch (ptype) {
578 case QP_MBUF:
579 ASSERT(tail->cp_mbuf->m_nextpkt == NULL);
580 tail->cp_mbuf->m_nextpkt = fq->fq_dq_head.cp_mbuf;
581 ASSERT(fq->fq_dq_tail.cp_mbuf->m_nextpkt == NULL);
582 break;
583 #if SKYWALK
584 case QP_PACKET:
585 ASSERT(tail->cp_kpkt->pkt_nextpkt == NULL);
586 tail->cp_kpkt->pkt_nextpkt = fq->fq_dq_head.cp_kpkt;
587 ASSERT(fq->fq_dq_tail.cp_kpkt->pkt_nextpkt == NULL);
588 break;
589 #endif /* SKYWALK */
590 default:
591 VERIFY(0);
592 /* NOTREACHED */
593 __builtin_unreachable();
594 }
595 }
596 *tail = fq->fq_dq_tail;
597 done:
598 STAILQ_REMOVE(fq_dqlist_head, fq, flowq, fq_dqlink);
599 CLASSQ_PKT_INIT(&fq->fq_dq_head);
600 CLASSQ_PKT_INIT(&fq->fq_dq_tail);
601 fq->fq_in_dqlist = false;
602 }
603
604 static inline void
fq_dqlist_get_packet_list(flowq_dqlist_t * fq_dqlist_head,classq_pkt_t * head,classq_pkt_t * tail,classq_pkt_type_t ptype)605 fq_dqlist_get_packet_list(flowq_dqlist_t *fq_dqlist_head, classq_pkt_t *head,
606 classq_pkt_t *tail, classq_pkt_type_t ptype)
607 {
608 fq_t *fq, *tfq;
609
610 STAILQ_FOREACH_SAFE(fq, fq_dqlist_head, fq_dqlink, tfq) {
611 fq_dqlist_remove(fq_dqlist_head, fq, head, tail, ptype);
612 }
613 }
614
615 static int
fq_if_grps_bitmap_ffs(fq_grp_tailq_t * grp_list,int pri,fq_if_state state,fq_if_group_t ** selected_grp)616 fq_if_grps_bitmap_ffs(fq_grp_tailq_t *grp_list, int pri, fq_if_state state,
617 fq_if_group_t **selected_grp)
618 {
619 #pragma unused(pri)
620
621 fq_if_group_t *grp;
622 uint32_t highest_pri = FQ_IF_MAX_CLASSES;
623 int ret_pri = 0;
624
625 TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
626 uint32_t cur_pri = pktsched_ffs(grp->fqg_bitmaps[state]);
627 /* bitmap is empty in this case */
628 if (cur_pri == 0) {
629 continue;
630 }
631 if (cur_pri <= highest_pri) {
632 highest_pri = cur_pri;
633 ret_pri = cur_pri;
634 *selected_grp = grp;
635 }
636 }
637 return ret_pri;
638 }
639
640 static boolean_t
fq_if_grps_bitmap_zeros(fq_grp_tailq_t * grp_list,int pri,fq_if_state state)641 fq_if_grps_bitmap_zeros(fq_grp_tailq_t *grp_list, int pri, fq_if_state state)
642 {
643 #pragma unused(pri)
644
645 fq_if_group_t *grp;
646
647 TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
648 if (grp->fqg_bitmaps[state] != 0) {
649 return FALSE;
650 }
651 }
652 return TRUE;
653 }
654
655 static void
fq_if_grps_bitmap_cpy(fq_grp_tailq_t * grp_list,int pri,fq_if_state dst_state,fq_if_state src_state)656 fq_if_grps_bitmap_cpy(fq_grp_tailq_t *grp_list, int pri, fq_if_state dst_state,
657 fq_if_state src_state)
658 {
659 #pragma unused(pri)
660
661 fq_if_group_t *grp;
662 TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
663 grp->fqg_bitmaps[dst_state] = grp->fqg_bitmaps[src_state];
664 }
665 }
666
667 static void
fq_if_grps_bitmap_clr(fq_grp_tailq_t * grp_list,int pri,fq_if_state state)668 fq_if_grps_bitmap_clr(fq_grp_tailq_t *grp_list, int pri, fq_if_state state)
669 {
670 #pragma unused(pri)
671
672 fq_if_group_t *grp;
673 TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
674 grp->fqg_bitmaps[state] = 0;
675 }
676 }
677
678 static int
fq_if_grps_sc_bitmap_ffs(fq_grp_tailq_t * grp_list,int pri,fq_if_state state,fq_if_group_t ** selected_grp)679 fq_if_grps_sc_bitmap_ffs(fq_grp_tailq_t *grp_list, int pri, fq_if_state state,
680 fq_if_group_t **selected_grp)
681 {
682 fq_if_group_t *grp;
683 int ret_pri = 0;
684
685 TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
686 if (pktsched_bit_tst(pri, &grp->fqg_bitmaps[state])) {
687 /* +1 to match the semantics of pktsched_ffs */
688 ret_pri = pri + 1;
689 *selected_grp = grp;
690 break;
691 }
692 }
693
694 return ret_pri;
695 }
696
697 static boolean_t
fq_if_grps_sc_bitmap_zeros(fq_grp_tailq_t * grp_list,int pri,fq_if_state state)698 fq_if_grps_sc_bitmap_zeros(fq_grp_tailq_t *grp_list, int pri, fq_if_state state)
699 {
700 fq_if_group_t *grp;
701
702 TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
703 if (pktsched_bit_tst(pri, &grp->fqg_bitmaps[state])) {
704 return FALSE;
705 }
706 }
707 return TRUE;
708 }
709
710 static void
fq_if_grps_sc_bitmap_cpy(fq_grp_tailq_t * grp_list,int pri,fq_if_state dst_state,fq_if_state src_state)711 fq_if_grps_sc_bitmap_cpy(fq_grp_tailq_t *grp_list, int pri, fq_if_state dst_state,
712 fq_if_state src_state)
713 {
714 fq_if_group_t *grp;
715
716 TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
717 pktsched_bit_cpy(pri, &grp->fqg_bitmaps[dst_state],
718 &grp->fqg_bitmaps[src_state]);
719 }
720 }
721
722 static void
fq_if_grps_sc_bitmap_clr(fq_grp_tailq_t * grp_list,int pri,fq_if_state state)723 fq_if_grps_sc_bitmap_clr(fq_grp_tailq_t *grp_list, int pri, fq_if_state state)
724 {
725 fq_if_group_t *grp;
726
727 TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
728 pktsched_bit_clr(pri, &grp->fqg_bitmaps[state]);
729 }
730 }
731
732 static int
fq_if_dequeue_classq_multi_common(struct ifclassq * ifq,mbuf_svc_class_t svc,u_int32_t maxpktcnt,u_int32_t maxbytecnt,classq_pkt_t * first_packet,classq_pkt_t * last_packet,u_int32_t * retpktcnt,u_int32_t * retbytecnt,uint8_t grp_idx)733 fq_if_dequeue_classq_multi_common(struct ifclassq *ifq, mbuf_svc_class_t svc,
734 u_int32_t maxpktcnt, u_int32_t maxbytecnt, classq_pkt_t *first_packet,
735 classq_pkt_t *last_packet, u_int32_t *retpktcnt, u_int32_t *retbytecnt,
736 uint8_t grp_idx)
737 {
738 uint32_t total_pktcnt = 0, total_bytecnt = 0;
739 classq_pkt_t first = CLASSQ_PKT_INITIALIZER(fisrt);
740 classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
741 classq_pkt_t tmp = CLASSQ_PKT_INITIALIZER(tmp);
742 fq_if_append_pkt_t append_pkt;
743 flowq_dqlist_t fq_dqlist_head;
744 fq_if_classq_t *fq_cl;
745 fq_grp_tailq_t *grp_list, tmp_grp_list;
746 fq_if_group_t *fq_grp = NULL;
747 fq_if_t *fqs;
748 uint64_t now;
749 int pri = 0, svc_pri = 0;
750
751 IFCQ_LOCK_ASSERT_HELD(ifq);
752
753 fqs = (fq_if_t *)ifq->ifcq_disc;
754 STAILQ_INIT(&fq_dqlist_head);
755
756 switch (fqs->fqs_ptype) {
757 case QP_MBUF:
758 append_pkt = fq_if_append_mbuf;
759 break;
760
761 #if SKYWALK
762 case QP_PACKET:
763 append_pkt = fq_if_append_pkt;
764 break;
765 #endif /* SKYWALK */
766
767 default:
768 VERIFY(0);
769 /* NOTREACHED */
770 __builtin_unreachable();
771 }
772
773 now = fq_codel_get_time();
774 if (fqs->fqs_flags & FQS_DRIVER_MANAGED) {
775 svc_pri = fq_if_service_to_priority(fqs, svc);
776 } else {
777 VERIFY(svc == MBUF_SC_UNSPEC);
778 }
779
780 if (fq_if_is_grp_combined(fqs, grp_idx)) {
781 grp_list = &fqs->fqs_combined_grp_list;
782 VERIFY(!TAILQ_EMPTY(grp_list));
783 } else {
784 grp_list = &tmp_grp_list;
785 fq_grp = fq_if_find_grp(fqs, grp_idx);
786 TAILQ_INIT(grp_list);
787 TAILQ_INSERT_TAIL(grp_list, fq_grp, fqg_grp_link);
788 }
789
790 for (;;) {
791 uint32_t pktcnt = 0, bytecnt = 0;
792 classq_pkt_t head = CLASSQ_PKT_INITIALIZER(head);
793 classq_pkt_t tail = CLASSQ_PKT_INITIALIZER(tail);
794
795 if (fqs->grp_bitmaps_zeros(grp_list, svc_pri, FQ_IF_ER) &&
796 fqs->grp_bitmaps_zeros(grp_list, svc_pri, FQ_IF_EB)) {
797 fqs->grp_bitmaps_cpy(grp_list, svc_pri, FQ_IF_EB, FQ_IF_IB);
798 fqs->grp_bitmaps_clr(grp_list, svc_pri, FQ_IF_IB);
799 if (fqs->grp_bitmaps_zeros(grp_list, svc_pri, FQ_IF_EB)) {
800 break;
801 }
802 }
803 pri = fqs->grp_bitmaps_ffs(grp_list, svc_pri, FQ_IF_ER, &fq_grp);
804 if (pri == 0) {
805 /*
806 * There are no ER flows, move the highest
807 * priority one from EB if there are any in that
808 * category
809 */
810 pri = fqs->grp_bitmaps_ffs(grp_list, svc_pri, FQ_IF_EB, &fq_grp);
811 VERIFY(pri > 0);
812 VERIFY(fq_grp != NULL);
813 pktsched_bit_clr((pri - 1), &fq_grp->fqg_bitmaps[FQ_IF_EB]);
814 pktsched_bit_set((pri - 1), &fq_grp->fqg_bitmaps[FQ_IF_ER]);
815 }
816 VERIFY(fq_grp != NULL);
817 pri--; /* index starts at 0 */
818 fq_cl = &fq_grp->fqg_classq[pri];
819
820 if (fq_cl->fcl_budget <= 0) {
821 /* Update the budget */
822 fq_cl->fcl_budget += (min(fq_cl->fcl_drr_max,
823 fq_cl->fcl_stat.fcl_flows_cnt) *
824 fq_cl->fcl_quantum);
825 if (fq_cl->fcl_budget <= 0) {
826 goto state_change;
827 }
828 }
829 fq_if_dequeue(fqs, fq_cl, (maxpktcnt - total_pktcnt),
830 (maxbytecnt - total_bytecnt), &head, &tail, &pktcnt,
831 &bytecnt, &fq_dqlist_head, true, now);
832 if (head.cp_mbuf != NULL) {
833 ASSERT(STAILQ_EMPTY(&fq_dqlist_head));
834 if (first.cp_mbuf == NULL) {
835 first = head;
836 } else {
837 ASSERT(last.cp_mbuf != NULL);
838 append_pkt(&last, &head);
839 }
840 last = tail;
841 append_pkt(&last, &tmp);
842 }
843 fq_cl->fcl_budget -= bytecnt;
844 total_pktcnt += pktcnt;
845 total_bytecnt += bytecnt;
846
847 /*
848 * If the class has exceeded the budget but still has data
849 * to send, move it to IB
850 */
851 state_change:
852 VERIFY(fq_grp != NULL);
853 if (!FQ_IF_CLASSQ_IDLE(fq_cl)) {
854 if (fq_cl->fcl_budget <= 0) {
855 pktsched_bit_set(pri, &fq_grp->fqg_bitmaps[FQ_IF_IB]);
856 pktsched_bit_clr(pri, &fq_grp->fqg_bitmaps[FQ_IF_ER]);
857 }
858 } else {
859 pktsched_bit_clr(pri, &fq_grp->fqg_bitmaps[FQ_IF_ER]);
860 VERIFY(((fq_grp->fqg_bitmaps[FQ_IF_ER] |
861 fq_grp->fqg_bitmaps[FQ_IF_EB] |
862 fq_grp->fqg_bitmaps[FQ_IF_IB]) & (1 << pri)) == 0);
863 fq_cl->fcl_budget = 0;
864 }
865 if (total_pktcnt >= maxpktcnt || total_bytecnt >= maxbytecnt) {
866 break;
867 }
868 }
869
870 if (!fq_if_is_grp_combined(fqs, grp_idx)) {
871 TAILQ_REMOVE(grp_list, fq_grp, fqg_grp_link);
872 VERIFY(TAILQ_EMPTY(grp_list));
873 }
874
875 fq_dqlist_get_packet_list(&fq_dqlist_head, &first, &last,
876 fqs->fqs_ptype);
877
878 if (__probable(first_packet != NULL)) {
879 *first_packet = first;
880 }
881 if (last_packet != NULL) {
882 *last_packet = last;
883 }
884 if (retpktcnt != NULL) {
885 *retpktcnt = total_pktcnt;
886 }
887 if (retbytecnt != NULL) {
888 *retbytecnt = total_bytecnt;
889 }
890
891 IFCQ_XMIT_ADD(ifq, total_pktcnt, total_bytecnt);
892 fq_if_purge_empty_flow_list(fqs, now, false);
893 return 0;
894 }
895
896 int
fq_if_dequeue_classq_multi(struct ifclassq * ifq,u_int32_t maxpktcnt,u_int32_t maxbytecnt,classq_pkt_t * first_packet,classq_pkt_t * last_packet,u_int32_t * retpktcnt,u_int32_t * retbytecnt,uint8_t grp_idx)897 fq_if_dequeue_classq_multi(struct ifclassq *ifq, u_int32_t maxpktcnt,
898 u_int32_t maxbytecnt, classq_pkt_t *first_packet,
899 classq_pkt_t *last_packet, u_int32_t *retpktcnt,
900 u_int32_t *retbytecnt, uint8_t grp_idx)
901 {
902 return fq_if_dequeue_classq_multi_common(ifq, MBUF_SC_UNSPEC, maxpktcnt, maxbytecnt,
903 first_packet, last_packet, retpktcnt, retbytecnt, grp_idx);
904 }
905
906 int
fq_if_dequeue_sc_classq_multi(struct ifclassq * ifq,mbuf_svc_class_t svc,u_int32_t maxpktcnt,u_int32_t maxbytecnt,classq_pkt_t * first_packet,classq_pkt_t * last_packet,u_int32_t * retpktcnt,u_int32_t * retbytecnt,uint8_t grp_idx)907 fq_if_dequeue_sc_classq_multi(struct ifclassq *ifq, mbuf_svc_class_t svc,
908 u_int32_t maxpktcnt, u_int32_t maxbytecnt, classq_pkt_t *first_packet,
909 classq_pkt_t *last_packet, u_int32_t *retpktcnt, u_int32_t *retbytecnt,
910 uint8_t grp_idx)
911 {
912 fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
913
914 if (fq_if_is_grp_combined(fqs, grp_idx)) {
915 return fq_if_dequeue_classq_multi_common(ifq, svc, maxpktcnt, maxbytecnt,
916 first_packet, last_packet, retpktcnt, retbytecnt, grp_idx);
917 } else {
918 /*
919 * take a shortcut here since there is no need to schedule
920 * one single service class.
921 */
922 return fq_if_dequeue_sc_classq_multi_separate(ifq, svc, maxpktcnt, maxbytecnt,
923 first_packet, last_packet, retpktcnt, retbytecnt, grp_idx);
924 }
925 }
926
927 static int
fq_if_dequeue_sc_classq_multi_separate(struct ifclassq * ifq,mbuf_svc_class_t svc,u_int32_t maxpktcnt,u_int32_t maxbytecnt,classq_pkt_t * first_packet,classq_pkt_t * last_packet,u_int32_t * retpktcnt,u_int32_t * retbytecnt,uint8_t grp_idx)928 fq_if_dequeue_sc_classq_multi_separate(struct ifclassq *ifq, mbuf_svc_class_t svc,
929 u_int32_t maxpktcnt, u_int32_t maxbytecnt, classq_pkt_t *first_packet,
930 classq_pkt_t *last_packet, u_int32_t *retpktcnt, u_int32_t *retbytecnt,
931 uint8_t grp_idx)
932 {
933 fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
934 uint8_t pri;
935 u_int32_t total_pktcnt = 0, total_bytecnt = 0;
936 fq_if_classq_t *fq_cl;
937 classq_pkt_t first = CLASSQ_PKT_INITIALIZER(fisrt);
938 classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
939 fq_if_append_pkt_t append_pkt;
940 flowq_dqlist_t fq_dqlist_head;
941 fq_if_group_t *fq_grp;
942 uint64_t now;
943
944 switch (fqs->fqs_ptype) {
945 case QP_MBUF:
946 append_pkt = fq_if_append_mbuf;
947 break;
948
949 #if SKYWALK
950 case QP_PACKET:
951 append_pkt = fq_if_append_pkt;
952 break;
953 #endif /* SKYWALK */
954
955 default:
956 VERIFY(0);
957 /* NOTREACHED */
958 __builtin_unreachable();
959 }
960
961 STAILQ_INIT(&fq_dqlist_head);
962 now = fq_codel_get_time();
963
964 pri = fq_if_service_to_priority(fqs, svc);
965 fq_grp = fq_if_find_grp(fqs, grp_idx);
966 fq_cl = &fq_grp->fqg_classq[pri];
967
968 /*
969 * Now we have the queue for a particular service class. We need
970 * to dequeue as many packets as needed, first from the new flows
971 * and then from the old flows.
972 */
973 while (total_pktcnt < maxpktcnt && total_bytecnt < maxbytecnt &&
974 fq_cl->fcl_stat.fcl_pkt_cnt > 0) {
975 classq_pkt_t head = CLASSQ_PKT_INITIALIZER(head);
976 classq_pkt_t tail = CLASSQ_PKT_INITIALIZER(tail);
977 u_int32_t pktcnt = 0, bytecnt = 0;
978
979 fq_if_dequeue(fqs, fq_cl, (maxpktcnt - total_pktcnt),
980 (maxbytecnt - total_bytecnt), &head, &tail, &pktcnt,
981 &bytecnt, &fq_dqlist_head, false, now);
982 if (head.cp_mbuf != NULL) {
983 if (first.cp_mbuf == NULL) {
984 first = head;
985 } else {
986 ASSERT(last.cp_mbuf != NULL);
987 append_pkt(&last, &head);
988 }
989 last = tail;
990 }
991 total_pktcnt += pktcnt;
992 total_bytecnt += bytecnt;
993 }
994
995 /*
996 * Mark classq as IB if it's not idle, so that we can
997 * start without re-init the bitmaps when it's switched
998 * to combined mode.
999 */
1000 if (!FQ_IF_CLASSQ_IDLE(fq_cl)) {
1001 pktsched_bit_set(pri, &fq_grp->fqg_bitmaps[FQ_IF_IB]);
1002 pktsched_bit_clr(pri, &fq_grp->fqg_bitmaps[FQ_IF_ER]);
1003 pktsched_bit_clr(pri, &fq_grp->fqg_bitmaps[FQ_IF_EB]);
1004 } else {
1005 pktsched_bit_clr(pri, &fq_grp->fqg_bitmaps[FQ_IF_IB]);
1006 VERIFY(((fq_grp->fqg_bitmaps[FQ_IF_ER] |
1007 fq_grp->fqg_bitmaps[FQ_IF_EB] |
1008 fq_grp->fqg_bitmaps[FQ_IF_IB]) & (1 << pri)) == 0);
1009 }
1010
1011 fq_dqlist_get_packet_list(&fq_dqlist_head, &first, &last, fqs->fqs_ptype);
1012
1013 if (__probable(first_packet != NULL)) {
1014 *first_packet = first;
1015 }
1016 if (last_packet != NULL) {
1017 *last_packet = last;
1018 }
1019 if (retpktcnt != NULL) {
1020 *retpktcnt = total_pktcnt;
1021 }
1022 if (retbytecnt != NULL) {
1023 *retbytecnt = total_bytecnt;
1024 }
1025
1026 IFCQ_XMIT_ADD(ifq, total_pktcnt, total_bytecnt);
1027 fq_if_purge_empty_flow_list(fqs, now, false);
1028 return 0;
1029 }
1030
1031 static void
fq_if_purge_flow(fq_if_t * fqs,fq_t * fq,uint32_t * pktsp,uint32_t * bytesp,uint64_t now)1032 fq_if_purge_flow(fq_if_t *fqs, fq_t *fq, uint32_t *pktsp,
1033 uint32_t *bytesp, uint64_t now)
1034 {
1035 fq_if_classq_t *fq_cl;
1036 u_int32_t pkts, bytes;
1037 pktsched_pkt_t pkt;
1038 fq_if_group_t *grp;
1039
1040 fq_cl = &FQ_CLASSQ(fq);
1041 grp = FQ_GROUP(fq);
1042 pkts = bytes = 0;
1043 _PKTSCHED_PKT_INIT(&pkt);
1044 for (;;) {
1045 fq_getq_flow(fqs, fq, &pkt, now);
1046 if (pkt.pktsched_pkt_mbuf == NULL) {
1047 VERIFY(pkt.pktsched_ptype == QP_INVALID);
1048 break;
1049 }
1050 pkts++;
1051 bytes += pktsched_get_pkt_len(&pkt);
1052 pktsched_free_pkt(&pkt);
1053 }
1054 KDBG(AQM_KTRACE_STATS_FLOW_DEQUEUE, fq->fq_flowhash,
1055 AQM_KTRACE_FQ_GRP_SC_IDX(fq), fq->fq_bytes, fq->fq_min_qdelay);
1056
1057 IFCQ_DROP_ADD(fqs->fqs_ifq, pkts, bytes);
1058
1059 /* move through the flow queue states */
1060 VERIFY((fq->fq_flags & (FQF_NEW_FLOW | FQF_OLD_FLOW | FQF_EMPTY_FLOW)));
1061 if (fq->fq_flags & FQF_NEW_FLOW) {
1062 fq_if_empty_new_flow(fq, fq_cl);
1063 }
1064 if (fq->fq_flags & FQF_OLD_FLOW) {
1065 fq_if_empty_old_flow(fqs, fq_cl, fq, now);
1066 }
1067 if (fq->fq_flags & FQF_EMPTY_FLOW) {
1068 fq_if_purge_empty_flow(fqs, fq);
1069 fq = NULL;
1070 }
1071
1072 if (FQ_IF_CLASSQ_IDLE(fq_cl)) {
1073 int i;
1074 for (i = FQ_IF_ER; i < FQ_IF_MAX_STATE; i++) {
1075 pktsched_bit_clr(fq_cl->fcl_pri, &grp->fqg_bitmaps[i]);
1076 }
1077 }
1078
1079 if (pktsp != NULL) {
1080 *pktsp = pkts;
1081 }
1082 if (bytesp != NULL) {
1083 *bytesp = bytes;
1084 }
1085 }
1086
1087 static void
fq_if_purge_classq(fq_if_t * fqs,fq_if_classq_t * fq_cl)1088 fq_if_purge_classq(fq_if_t *fqs, fq_if_classq_t *fq_cl)
1089 {
1090 fq_t *fq, *tfq;
1091 uint64_t now;
1092
1093 now = fq_codel_get_time();
1094 /*
1095 * Take each flow from new/old flow list and flush mbufs
1096 * in that flow
1097 */
1098 STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_new_flows, fq_actlink, tfq) {
1099 fq_if_purge_flow(fqs, fq, NULL, NULL, now);
1100 }
1101 STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_old_flows, fq_actlink, tfq) {
1102 fq_if_purge_flow(fqs, fq, NULL, NULL, now);
1103 }
1104 VERIFY(STAILQ_EMPTY(&fq_cl->fcl_new_flows));
1105 VERIFY(STAILQ_EMPTY(&fq_cl->fcl_old_flows));
1106
1107 STAILQ_INIT(&fq_cl->fcl_new_flows);
1108 STAILQ_INIT(&fq_cl->fcl_old_flows);
1109 fq_cl->fcl_budget = 0;
1110 }
1111
1112 static void
fq_if_purge(fq_if_t * fqs)1113 fq_if_purge(fq_if_t *fqs)
1114 {
1115 uint64_t now;
1116 fq_if_group_t *grp;
1117 int i;
1118
1119 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
1120 for (uint8_t grp_idx = 0; grp_idx < FQ_IF_MAX_GROUPS; grp_idx++) {
1121 if (fqs->fqs_classq_groups[grp_idx] == NULL) {
1122 continue;
1123 }
1124
1125 grp = fq_if_find_grp(fqs, grp_idx);
1126 fq_if_purge_grp(fqs, grp);
1127 }
1128
1129 now = fq_codel_get_time();
1130 fq_if_purge_empty_flow_list(fqs, now, true);
1131
1132 VERIFY(STAILQ_EMPTY(&fqs->fqs_fclist));
1133 VERIFY(TAILQ_EMPTY(&fqs->fqs_empty_list));
1134
1135 fqs->fqs_large_flow = NULL;
1136 for (i = 0; i < FQ_IF_HASH_TABLE_SIZE; i++) {
1137 VERIFY(SLIST_EMPTY(&fqs->fqs_flows[i]));
1138 }
1139
1140 IFCQ_LEN(fqs->fqs_ifq) = 0;
1141 IFCQ_BYTES(fqs->fqs_ifq) = 0;
1142 }
1143
1144 static void
fq_if_purge_sc(fq_if_t * fqs,cqrq_purge_sc_t * req)1145 fq_if_purge_sc(fq_if_t *fqs, cqrq_purge_sc_t *req)
1146 {
1147 fq_t *fq;
1148 uint64_t now;
1149 fq_if_group_t *grp;
1150
1151 IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq);
1152 req->packets = req->bytes = 0;
1153 VERIFY(req->flow != 0);
1154
1155 now = fq_codel_get_time();
1156
1157 for (uint8_t grp_idx = 0; grp_idx < FQ_IF_MAX_GROUPS; grp_idx++) {
1158 if (fqs->fqs_classq_groups[grp_idx] == NULL) {
1159 continue;
1160 }
1161 uint32_t bytes = 0, pkts = 0;
1162
1163 grp = fq_if_find_grp(fqs, grp_idx);
1164 /*
1165 * Packet and traffic type are needed only if we want
1166 * to create a flow queue.
1167 */
1168 fq = fq_if_hash_pkt(fqs, grp, req->flow, req->sc, 0, false, FQ_TFC_C);
1169 if (fq != NULL) {
1170 fq_if_purge_flow(fqs, fq, &pkts, &bytes, now);
1171 req->bytes += bytes;
1172 req->packets += pkts;
1173 }
1174 }
1175 }
1176
1177 static uint16_t
fq_if_calc_quantum(struct ifnet * ifp)1178 fq_if_calc_quantum(struct ifnet *ifp)
1179 {
1180 uint16_t quantum;
1181
1182 switch (ifp->if_family) {
1183 case IFNET_FAMILY_ETHERNET:
1184 VERIFY((ifp->if_mtu + ETHER_HDR_LEN) <= UINT16_MAX);
1185 quantum = (uint16_t)ifp->if_mtu + ETHER_HDR_LEN;
1186 break;
1187
1188 case IFNET_FAMILY_CELLULAR:
1189 case IFNET_FAMILY_IPSEC:
1190 case IFNET_FAMILY_UTUN:
1191 VERIFY(ifp->if_mtu <= UINT16_MAX);
1192 quantum = (uint16_t)ifp->if_mtu;
1193 break;
1194
1195 default:
1196 quantum = FQ_CODEL_DEFAULT_QUANTUM;
1197 break;
1198 }
1199
1200 if ((ifp->if_hwassist & IFNET_TSOF) != 0) {
1201 VERIFY(ifp->if_tso_v4_mtu <= UINT16_MAX);
1202 VERIFY(ifp->if_tso_v6_mtu <= UINT16_MAX);
1203 quantum = (uint16_t)MAX(ifp->if_tso_v4_mtu, ifp->if_tso_v6_mtu);
1204 quantum = (quantum != 0) ? quantum : IF_MAXMTU;
1205 }
1206
1207 quantum = MAX(FQ_CODEL_DEFAULT_QUANTUM, quantum);
1208 #if DEBUG || DEVELOPMENT
1209 quantum = (fq_codel_quantum != 0) ? fq_codel_quantum : quantum;
1210 #endif /* DEBUG || DEVELOPMENT */
1211 VERIFY(quantum != 0);
1212 return quantum;
1213 }
1214
1215 static void
fq_if_mtu_update(fq_if_t * fqs)1216 fq_if_mtu_update(fq_if_t *fqs)
1217 {
1218 #define _FQ_CLASSQ_UPDATE_QUANTUM(_grp, _s, _q) \
1219 (_grp)->fqg_classq[FQ_IF_ ## _s ## _INDEX].fcl_quantum = \
1220 FQ_CODEL_QUANTUM_ ## _s(_q) \
1221
1222 uint32_t quantum;
1223 fq_if_group_t *grp;
1224
1225 quantum = fq_if_calc_quantum(fqs->fqs_ifq->ifcq_ifp);
1226
1227 for (uint8_t grp_idx = 0; grp_idx < FQ_IF_MAX_GROUPS; grp_idx++) {
1228 if (fqs->fqs_classq_groups[grp_idx] == NULL) {
1229 continue;
1230 }
1231
1232 grp = fq_if_find_grp(fqs, grp_idx);
1233
1234 if ((fqs->fqs_flags & FQS_DRIVER_MANAGED) != 0) {
1235 _FQ_CLASSQ_UPDATE_QUANTUM(grp, BK, quantum);
1236 _FQ_CLASSQ_UPDATE_QUANTUM(grp, BE, quantum);
1237 _FQ_CLASSQ_UPDATE_QUANTUM(grp, VI, quantum);
1238 _FQ_CLASSQ_UPDATE_QUANTUM(grp, VO, quantum);
1239 } else {
1240 _FQ_CLASSQ_UPDATE_QUANTUM(grp, BK_SYS, quantum);
1241 _FQ_CLASSQ_UPDATE_QUANTUM(grp, BK, quantum);
1242 _FQ_CLASSQ_UPDATE_QUANTUM(grp, BE, quantum);
1243 _FQ_CLASSQ_UPDATE_QUANTUM(grp, RD, quantum);
1244 _FQ_CLASSQ_UPDATE_QUANTUM(grp, OAM, quantum);
1245 _FQ_CLASSQ_UPDATE_QUANTUM(grp, AV, quantum);
1246 _FQ_CLASSQ_UPDATE_QUANTUM(grp, RV, quantum);
1247 _FQ_CLASSQ_UPDATE_QUANTUM(grp, VI, quantum);
1248 _FQ_CLASSQ_UPDATE_QUANTUM(grp, VO, quantum);
1249 _FQ_CLASSQ_UPDATE_QUANTUM(grp, CTL, quantum);
1250 }
1251 }
1252 #undef _FQ_CLASSQ_UPDATE_QUANTUM
1253 }
1254
1255 static void
fq_if_event(fq_if_t * fqs,cqev_t ev)1256 fq_if_event(fq_if_t *fqs, cqev_t ev)
1257 {
1258 IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq);
1259
1260 switch (ev) {
1261 case CLASSQ_EV_LINK_UP:
1262 case CLASSQ_EV_LINK_DOWN:
1263 fq_if_purge(fqs);
1264 break;
1265 case CLASSQ_EV_LINK_MTU:
1266 fq_if_mtu_update(fqs);
1267 break;
1268 default:
1269 break;
1270 }
1271 }
1272
1273 static void
fq_if_classq_suspend(fq_if_t * fqs,fq_if_classq_t * fq_cl)1274 fq_if_classq_suspend(fq_if_t *fqs, fq_if_classq_t *fq_cl)
1275 {
1276 fq_if_purge_classq(fqs, fq_cl);
1277 fqs->fqs_throttle = 1;
1278 fq_cl->fcl_stat.fcl_throttle_on++;
1279 KDBG(AQM_KTRACE_AON_THROTTLE | DBG_FUNC_START,
1280 fqs->fqs_ifq->ifcq_ifp->if_index, 0, 0, 0);
1281 }
1282
1283 static void
fq_if_classq_resume(fq_if_t * fqs,fq_if_classq_t * fq_cl)1284 fq_if_classq_resume(fq_if_t *fqs, fq_if_classq_t *fq_cl)
1285 {
1286 VERIFY(FQ_IF_CLASSQ_IDLE(fq_cl));
1287 fqs->fqs_throttle = 0;
1288 fq_cl->fcl_stat.fcl_throttle_off++;
1289 KDBG(AQM_KTRACE_AON_THROTTLE | DBG_FUNC_END,
1290 fqs->fqs_ifq->ifcq_ifp->if_index, 0, 0, 0);
1291 }
1292
1293
1294 static int
fq_if_throttle(fq_if_t * fqs,cqrq_throttle_t * tr)1295 fq_if_throttle(fq_if_t *fqs, cqrq_throttle_t *tr)
1296 {
1297 struct ifclassq *ifq = fqs->fqs_ifq;
1298 uint8_t index;
1299 fq_if_group_t *grp;
1300
1301 #if !MACH_ASSERT
1302 #pragma unused(ifq)
1303 #endif
1304 IFCQ_LOCK_ASSERT_HELD(ifq);
1305
1306 if (!tr->set) {
1307 tr->level = fqs->fqs_throttle;
1308 return 0;
1309 }
1310
1311 if (tr->level == fqs->fqs_throttle) {
1312 return EALREADY;
1313 }
1314
1315 /* Throttling is allowed on BK_SYS class only */
1316 index = fq_if_service_to_priority(fqs, MBUF_SC_BK_SYS);
1317
1318 for (uint8_t grp_idx = 0; grp_idx < FQ_IF_MAX_GROUPS; grp_idx++) {
1319 if (fqs->fqs_classq_groups[grp_idx] == NULL) {
1320 continue;
1321 }
1322 grp = fq_if_find_grp(fqs, grp_idx);
1323 switch (tr->level) {
1324 case IFNET_THROTTLE_OFF:
1325 fq_if_classq_resume(fqs, &grp->fqg_classq[index]);
1326 break;
1327 case IFNET_THROTTLE_OPPORTUNISTIC:
1328 fq_if_classq_suspend(fqs, &grp->fqg_classq[index]);
1329 break;
1330 default:
1331 break;
1332 }
1333 }
1334 return 0;
1335 }
1336
1337 static void
fq_if_grp_stat_sc(fq_if_t * fqs,fq_if_group_t * grp,cqrq_stat_sc_t * stat)1338 fq_if_grp_stat_sc(fq_if_t *fqs, fq_if_group_t *grp, cqrq_stat_sc_t *stat)
1339 {
1340 uint8_t pri;
1341 fq_if_classq_t *fq_cl;
1342
1343 if (stat == NULL) {
1344 return;
1345 }
1346
1347 pri = fq_if_service_to_priority(fqs, stat->sc);
1348
1349 fq_cl = &grp->fqg_classq[pri];
1350 stat->packets = (uint32_t)fq_cl->fcl_stat.fcl_pkt_cnt;
1351 stat->bytes = (uint32_t)fq_cl->fcl_stat.fcl_byte_cnt;
1352 }
1353
1354 void
fq_if_stat_sc(fq_if_t * fqs,cqrq_stat_sc_t * stat)1355 fq_if_stat_sc(fq_if_t *fqs, cqrq_stat_sc_t *stat)
1356 {
1357 cqrq_stat_sc_t grp_sc_stat;
1358 fq_if_group_t *grp;
1359
1360 if (stat == NULL) {
1361 return;
1362 }
1363 grp_sc_stat.sc = stat->sc;
1364
1365 if (stat->grp_idx == IF_CLASSQ_ALL_GRPS) {
1366 if (stat->sc == MBUF_SC_UNSPEC) {
1367 stat->packets = IFCQ_LEN(fqs->fqs_ifq);
1368 stat->bytes = IFCQ_BYTES(fqs->fqs_ifq);
1369 } else {
1370 for (uint8_t grp_idx = 0; grp_idx < FQ_IF_MAX_GROUPS; grp_idx++) {
1371 grp = fqs->fqs_classq_groups[grp_idx];
1372 if (grp == NULL) {
1373 continue;
1374 }
1375
1376 fq_if_grp_stat_sc(fqs, grp, &grp_sc_stat);
1377 stat->packets += grp_sc_stat.packets;
1378 stat->bytes += grp_sc_stat.bytes;
1379 }
1380 }
1381 return;
1382 }
1383
1384 if (stat->sc == MBUF_SC_UNSPEC) {
1385 if (fq_if_is_grp_combined(fqs, stat->grp_idx)) {
1386 TAILQ_FOREACH(grp, &fqs->fqs_combined_grp_list, fqg_grp_link) {
1387 stat->packets += FQG_LEN(grp);
1388 stat->bytes += FQG_BYTES(grp);
1389 }
1390 } else {
1391 grp = fq_if_find_grp(fqs, stat->grp_idx);
1392 stat->packets = FQG_LEN(grp);
1393 stat->bytes = FQG_BYTES(grp);
1394 }
1395 } else {
1396 if (fq_if_is_grp_combined(fqs, stat->grp_idx)) {
1397 TAILQ_FOREACH(grp, &fqs->fqs_combined_grp_list, fqg_grp_link) {
1398 fq_if_grp_stat_sc(fqs, grp, &grp_sc_stat);
1399 stat->packets += grp_sc_stat.packets;
1400 stat->bytes += grp_sc_stat.bytes;
1401 }
1402 } else {
1403 grp = fq_if_find_grp(fqs, stat->grp_idx);
1404 fq_if_grp_stat_sc(fqs, grp, stat);
1405 }
1406 }
1407 }
1408
1409 int
fq_if_request_classq(struct ifclassq * ifq,cqrq_t rq,void * arg)1410 fq_if_request_classq(struct ifclassq *ifq, cqrq_t rq, void *arg)
1411 {
1412 int err = 0;
1413 fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
1414
1415 IFCQ_LOCK_ASSERT_HELD(ifq);
1416
1417 /*
1418 * These are usually slow operations, convert the lock ahead of time
1419 */
1420 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
1421 switch (rq) {
1422 case CLASSQRQ_PURGE:
1423 fq_if_purge(fqs);
1424 break;
1425 case CLASSQRQ_PURGE_SC:
1426 fq_if_purge_sc(fqs, (cqrq_purge_sc_t *)arg);
1427 break;
1428 case CLASSQRQ_EVENT:
1429 fq_if_event(fqs, (cqev_t)arg);
1430 break;
1431 case CLASSQRQ_THROTTLE:
1432 fq_if_throttle(fqs, (cqrq_throttle_t *)arg);
1433 break;
1434 case CLASSQRQ_STAT_SC:
1435 fq_if_stat_sc(fqs, (cqrq_stat_sc_t *)arg);
1436 break;
1437 }
1438 return err;
1439 }
1440
1441 int
fq_if_setup_ifclassq(struct ifclassq * ifq,u_int32_t flags,classq_pkt_type_t ptype)1442 fq_if_setup_ifclassq(struct ifclassq *ifq, u_int32_t flags,
1443 classq_pkt_type_t ptype)
1444 {
1445 fq_if_t *fqs = NULL;
1446 int err = 0;
1447
1448 IFCQ_LOCK_ASSERT_HELD(ifq);
1449 VERIFY(ifq->ifcq_disc == NULL);
1450 VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
1451
1452 fqs = fq_if_alloc(ifq, ptype);
1453 if (fqs == NULL) {
1454 return ENOMEM;
1455 }
1456 if (flags & PKTSCHEDF_QALG_DRIVER_MANAGED) {
1457 fqs->fqs_flags |= FQS_DRIVER_MANAGED;
1458 fqs->fqs_bm_ops = &fq_if_grps_sc_bitmap_ops;
1459 } else {
1460 fqs->fqs_bm_ops = &fq_if_grps_bitmap_ops;
1461 }
1462
1463 err = ifclassq_attach(ifq, PKTSCHEDT_FQ_CODEL, fqs);
1464 if (err != 0) {
1465 os_log_error(OS_LOG_DEFAULT, "%s: error from ifclassq_attach, "
1466 "failed to attach fq_if: %d\n", __func__, err);
1467 fq_if_destroy(fqs);
1468 return err;
1469 }
1470
1471 /*
1472 * Always create one group. If qset 0 is added later,
1473 * this group will be updated.
1474 */
1475 err = fq_if_create_grp(ifq, 0, IF_CLASSQ_DEF);
1476 if (err != 0) {
1477 os_log_error(OS_LOG_DEFAULT, "%s: error from fq_if_create_grp, "
1478 "failed to create a fq group: %d\n", __func__, err);
1479 fq_if_destroy(fqs);
1480 }
1481 return err;
1482 }
1483
1484 fq_t *
fq_if_hash_pkt(fq_if_t * fqs,fq_if_group_t * fq_grp,u_int32_t flowid,mbuf_svc_class_t svc_class,u_int64_t now,bool create,fq_tfc_type_t tfc_type)1485 fq_if_hash_pkt(fq_if_t *fqs, fq_if_group_t *fq_grp, u_int32_t flowid,
1486 mbuf_svc_class_t svc_class, u_int64_t now, bool create,
1487 fq_tfc_type_t tfc_type)
1488 {
1489 fq_t *fq = NULL;
1490 flowq_list_t *fq_list;
1491 fq_if_classq_t *fq_cl;
1492 u_int8_t fqs_hash_id;
1493 u_int8_t scidx;
1494
1495 scidx = fq_if_service_to_priority(fqs, svc_class);
1496
1497 fqs_hash_id = FQ_IF_FLOW_HASH_ID(flowid);
1498
1499 fq_list = &fqs->fqs_flows[fqs_hash_id];
1500
1501 SLIST_FOREACH(fq, fq_list, fq_hashlink) {
1502 if (fq->fq_flowhash == flowid &&
1503 fq->fq_sc_index == scidx &&
1504 fq->fq_tfc_type == tfc_type &&
1505 fq->fq_group == fq_grp) {
1506 break;
1507 }
1508 }
1509 if (fq == NULL && create) {
1510 /* If the flow is not already on the list, allocate it */
1511 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
1512 fq = fq_alloc(fqs->fqs_ptype);
1513 if (fq != NULL) {
1514 fq->fq_flowhash = flowid;
1515 fq->fq_sc_index = scidx;
1516 fq->fq_group = fq_grp;
1517 fq->fq_tfc_type = tfc_type;
1518 fq_cl = &FQ_CLASSQ(fq);
1519 fq->fq_flags = FQF_FLOWCTL_CAPABLE;
1520 fq->fq_updatetime = now + FQ_UPDATE_INTERVAL(fq);
1521 SLIST_INSERT_HEAD(fq_list, fq, fq_hashlink);
1522 fq_cl->fcl_stat.fcl_flows_cnt++;
1523 }
1524 KDBG(AQM_KTRACE_STATS_FLOW_ALLOC,
1525 fqs->fqs_ifq->ifcq_ifp->if_index, fq->fq_flowhash,
1526 AQM_KTRACE_FQ_GRP_SC_IDX(fq), 0);
1527 } else if ((fq != NULL) && (fq->fq_flags & FQF_EMPTY_FLOW)) {
1528 fq_if_reuse_empty_flow(fqs, fq, now);
1529 }
1530
1531 /*
1532 * If getq time is not set because this is the first packet or after
1533 * idle time, set it now so that we can detect a stall.
1534 */
1535 if (fq != NULL && fq->fq_getqtime == 0) {
1536 fq->fq_getqtime = now;
1537 }
1538
1539 return fq;
1540 }
1541
1542 void
fq_if_destroy_flow(fq_if_t * fqs,fq_if_classq_t * fq_cl,fq_t * fq)1543 fq_if_destroy_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq)
1544 {
1545 u_int8_t hash_id;
1546
1547 ASSERT((fq->fq_flags & FQF_EMPTY_FLOW) == 0);
1548 hash_id = FQ_IF_FLOW_HASH_ID(fq->fq_flowhash);
1549 SLIST_REMOVE(&fqs->fqs_flows[hash_id], fq, flowq,
1550 fq_hashlink);
1551 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
1552 if (__improbable(fq->fq_flags & FQF_FLOWCTL_ON)) {
1553 fq_if_flow_feedback(fqs, fq, fq_cl);
1554 }
1555 KDBG(AQM_KTRACE_STATS_FLOW_DESTROY,
1556 fqs->fqs_ifq->ifcq_ifp->if_index, fq->fq_flowhash,
1557 AQM_KTRACE_FQ_GRP_SC_IDX(fq), 0);
1558 fq_destroy(fq, fqs->fqs_ptype);
1559 }
1560
1561 inline boolean_t
fq_if_at_drop_limit(fq_if_t * fqs)1562 fq_if_at_drop_limit(fq_if_t *fqs)
1563 {
1564 return (IFCQ_LEN(fqs->fqs_ifq) >= fqs->fqs_pkt_droplimit) ?
1565 TRUE : FALSE;
1566 }
1567
1568 inline boolean_t
fq_if_almost_at_drop_limit(fq_if_t * fqs)1569 fq_if_almost_at_drop_limit(fq_if_t *fqs)
1570 {
1571 /*
1572 * Whether we are above 90% of the queue limit. This is used to tell if we
1573 * can stop flow controlling the largest flow.
1574 */
1575 return IFCQ_LEN(fqs->fqs_ifq) >= fqs->fqs_pkt_droplimit * 9 / 10;
1576 }
1577
1578 static inline void
fq_if_reuse_empty_flow(fq_if_t * fqs,fq_t * fq,uint64_t now)1579 fq_if_reuse_empty_flow(fq_if_t *fqs, fq_t *fq, uint64_t now)
1580 {
1581 ASSERT(fq->fq_flags & FQF_EMPTY_FLOW);
1582 TAILQ_REMOVE(&fqs->fqs_empty_list, fq, fq_empty_link);
1583 STAILQ_NEXT(fq, fq_actlink) = NULL;
1584 fq->fq_flags &= ~FQF_FLOW_STATE_MASK;
1585 fq->fq_empty_purge_time = 0;
1586 fq->fq_getqtime = 0;
1587 fq->fq_updatetime = now + FQ_UPDATE_INTERVAL(fq);
1588 fqs->fqs_empty_list_cnt--;
1589 fq_if_classq_t *fq_cl = &FQ_CLASSQ(fq);
1590 fq_cl->fcl_stat.fcl_flows_cnt++;
1591 }
1592
1593 inline void
fq_if_move_to_empty_flow(fq_if_t * fqs,fq_if_classq_t * fq_cl,fq_t * fq,uint64_t now)1594 fq_if_move_to_empty_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq,
1595 uint64_t now)
1596 {
1597 ASSERT(fq->fq_flags & ~(FQF_NEW_FLOW | FQF_OLD_FLOW | FQF_FLOWCTL_ON));
1598 fq->fq_empty_purge_time = now + fq_empty_purge_delay;
1599 TAILQ_INSERT_TAIL(&fqs->fqs_empty_list, fq, fq_empty_link);
1600 fq->fq_flags |= FQF_EMPTY_FLOW;
1601 FQ_CLEAR_OVERWHELMING(fq);
1602 fqs->fqs_empty_list_cnt++;
1603 /*
1604 * fcl_flows_cnt is used in budget determination for the class.
1605 * empty flow shouldn't contribute to the budget.
1606 */
1607 fq_cl->fcl_stat.fcl_flows_cnt--;
1608 }
1609
1610 static void
fq_if_purge_empty_flow(fq_if_t * fqs,fq_t * fq)1611 fq_if_purge_empty_flow(fq_if_t *fqs, fq_t *fq)
1612 {
1613 fq_if_classq_t *fq_cl;
1614 fq_cl = &FQ_CLASSQ(fq);
1615
1616 ASSERT((fq->fq_flags & FQF_EMPTY_FLOW) != 0);
1617 TAILQ_REMOVE(&fqs->fqs_empty_list, fq, fq_empty_link);
1618 fq->fq_flags &= ~FQF_EMPTY_FLOW;
1619 fqs->fqs_empty_list_cnt--;
1620 /* Remove from the hash list and free the flow queue */
1621 fq_if_destroy_flow(fqs, fq_cl, fq);
1622 }
1623
1624 static void
fq_if_purge_empty_flow_list(fq_if_t * fqs,uint64_t now,bool purge_all)1625 fq_if_purge_empty_flow_list(fq_if_t *fqs, uint64_t now, bool purge_all)
1626 {
1627 fq_t *fq, *tmp;
1628 int i = 0;
1629
1630 if (fqs->fqs_empty_list_cnt == 0) {
1631 ASSERT(TAILQ_EMPTY(&fqs->fqs_empty_list));
1632 return;
1633 }
1634
1635 TAILQ_FOREACH_SAFE(fq, &fqs->fqs_empty_list, fq_empty_link, tmp) {
1636 if (!purge_all && ((now < fq->fq_empty_purge_time) ||
1637 (i++ == FQ_EMPTY_PURGE_MAX))) {
1638 break;
1639 }
1640 fq_if_purge_empty_flow(fqs, fq);
1641 }
1642
1643 if (__improbable(purge_all)) {
1644 VERIFY(fqs->fqs_empty_list_cnt == 0);
1645 VERIFY(TAILQ_EMPTY(&fqs->fqs_empty_list));
1646 }
1647 }
1648
1649 static void
fq_if_empty_old_flow(fq_if_t * fqs,fq_if_classq_t * fq_cl,fq_t * fq,uint64_t now)1650 fq_if_empty_old_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq,
1651 uint64_t now)
1652 {
1653 /*
1654 * Remove the flow queue from the old flows list.
1655 */
1656 STAILQ_REMOVE(&fq_cl->fcl_old_flows, fq, flowq, fq_actlink);
1657 fq->fq_flags &= ~FQF_OLD_FLOW;
1658 fq_cl->fcl_stat.fcl_oldflows_cnt--;
1659 VERIFY(fq->fq_bytes == 0);
1660
1661 /* release any flow control */
1662 if (__improbable(fq->fq_flags & FQF_FLOWCTL_ON)) {
1663 fq_if_flow_feedback(fqs, fq, fq_cl);
1664 }
1665
1666 /* move the flow queue to empty flows list */
1667 fq_if_move_to_empty_flow(fqs, fq_cl, fq, now);
1668 }
1669
1670 static void
fq_if_empty_new_flow(fq_t * fq,fq_if_classq_t * fq_cl)1671 fq_if_empty_new_flow(fq_t *fq, fq_if_classq_t *fq_cl)
1672 {
1673 /* Move to the end of old queue list */
1674 STAILQ_REMOVE(&fq_cl->fcl_new_flows, fq,
1675 flowq, fq_actlink);
1676 fq->fq_flags &= ~FQF_NEW_FLOW;
1677 fq_cl->fcl_stat.fcl_newflows_cnt--;
1678
1679 STAILQ_INSERT_TAIL(&fq_cl->fcl_old_flows, fq, fq_actlink);
1680 fq->fq_flags |= FQF_OLD_FLOW;
1681 fq_cl->fcl_stat.fcl_oldflows_cnt++;
1682 }
1683
1684 inline void
fq_if_drop_packet(fq_if_t * fqs,uint64_t now)1685 fq_if_drop_packet(fq_if_t *fqs, uint64_t now)
1686 {
1687 fq_t *fq = fqs->fqs_large_flow;
1688 fq_if_classq_t *fq_cl;
1689 pktsched_pkt_t pkt;
1690 volatile uint32_t *pkt_flags;
1691 uint64_t *pkt_timestamp;
1692
1693 if (fq == NULL) {
1694 return;
1695 }
1696 /* queue can not be empty on the largest flow */
1697 VERIFY(!fq_empty(fq, fqs->fqs_ptype));
1698
1699 fq_cl = &FQ_CLASSQ(fq);
1700 _PKTSCHED_PKT_INIT(&pkt);
1701 fq_getq_flow_internal(fqs, fq, &pkt);
1702 ASSERT(pkt.pktsched_ptype != QP_INVALID);
1703
1704 pktsched_get_pkt_vars(&pkt, &pkt_flags, &pkt_timestamp, NULL, NULL,
1705 NULL, NULL);
1706
1707 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
1708 *pkt_timestamp = 0;
1709 switch (pkt.pktsched_ptype) {
1710 case QP_MBUF:
1711 *pkt_flags &= ~PKTF_PRIV_GUARDED;
1712 break;
1713 #if SKYWALK
1714 case QP_PACKET:
1715 /* sanity check */
1716 ASSERT((*pkt_flags & ~PKT_F_COMMON_MASK) == 0);
1717 break;
1718 #endif /* SKYWALK */
1719 default:
1720 VERIFY(0);
1721 /* NOTREACHED */
1722 __builtin_unreachable();
1723 }
1724
1725 if (fq_empty(fq, fqs->fqs_ptype)) {
1726 fqs->fqs_large_flow = NULL;
1727 if (fq->fq_flags & FQF_OLD_FLOW) {
1728 fq_if_empty_old_flow(fqs, fq_cl, fq, now);
1729 } else {
1730 VERIFY(fq->fq_flags & FQF_NEW_FLOW);
1731 fq_if_empty_new_flow(fq, fq_cl);
1732 }
1733 }
1734 IFCQ_DROP_ADD(fqs->fqs_ifq, 1, pktsched_get_pkt_len(&pkt));
1735
1736 pktsched_free_pkt(&pkt);
1737 fq_cl->fcl_stat.fcl_drop_overflow++;
1738 }
1739
1740 inline void
fq_if_is_flow_heavy(fq_if_t * fqs,fq_t * fq)1741 fq_if_is_flow_heavy(fq_if_t *fqs, fq_t *fq)
1742 {
1743 fq_t *prev_fq;
1744
1745 if (fqs->fqs_large_flow != NULL &&
1746 fqs->fqs_large_flow->fq_bytes < FQ_IF_LARGE_FLOW_BYTE_LIMIT) {
1747 fqs->fqs_large_flow = NULL;
1748 }
1749
1750 if (fq == NULL || fq->fq_bytes < FQ_IF_LARGE_FLOW_BYTE_LIMIT) {
1751 return;
1752 }
1753
1754 prev_fq = fqs->fqs_large_flow;
1755 if (prev_fq == NULL) {
1756 if (!fq_empty(fq, fqs->fqs_ptype)) {
1757 fqs->fqs_large_flow = fq;
1758 }
1759 return;
1760 } else if (fq->fq_bytes > prev_fq->fq_bytes) {
1761 fqs->fqs_large_flow = fq;
1762 }
1763 }
1764
1765 boolean_t
fq_if_add_fcentry(fq_if_t * fqs,pktsched_pkt_t * pkt,uint8_t flowsrc,fq_t * fq,fq_if_classq_t * fq_cl)1766 fq_if_add_fcentry(fq_if_t *fqs, pktsched_pkt_t *pkt, uint8_t flowsrc,
1767 fq_t *fq, fq_if_classq_t *fq_cl)
1768 {
1769 struct flowadv_fcentry *fce;
1770
1771 #if DEBUG || DEVELOPMENT
1772 if (__improbable(ifclassq_flow_control_adv == 0)) {
1773 os_log(OS_LOG_DEFAULT, "%s: skipped flow control", __func__);
1774 return TRUE;
1775 }
1776 #endif /* DEBUG || DEVELOPMENT */
1777
1778 ASSERT(fq->fq_tfc_type != FQ_TFC_L4S);
1779 STAILQ_FOREACH(fce, &fqs->fqs_fclist, fce_link) {
1780 if ((uint8_t)fce->fce_flowsrc_type == flowsrc &&
1781 fce->fce_flowid == fq->fq_flowhash) {
1782 /* Already on flowcontrol list */
1783 return TRUE;
1784 }
1785 }
1786 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
1787 fce = pktsched_alloc_fcentry(pkt, fqs->fqs_ifq->ifcq_ifp, M_WAITOK);
1788 if (fce != NULL) {
1789 /* XXX Add number of bytes in the queue */
1790 STAILQ_INSERT_TAIL(&fqs->fqs_fclist, fce, fce_link);
1791 fq_cl->fcl_stat.fcl_flow_control++;
1792 os_log(OS_LOG_DEFAULT, "%s: num: %d, scidx: %d, flowsrc: %d, "
1793 "flow: 0x%x, iface: %s\n", __func__,
1794 fq_cl->fcl_stat.fcl_flow_control,
1795 fq->fq_sc_index, fce->fce_flowsrc_type, fq->fq_flowhash,
1796 if_name(fqs->fqs_ifq->ifcq_ifp));
1797 KDBG(AQM_KTRACE_STATS_FLOW_CTL | DBG_FUNC_START,
1798 fq->fq_flowhash, AQM_KTRACE_FQ_GRP_SC_IDX(fq),
1799 fq->fq_bytes, fq->fq_min_qdelay);
1800 }
1801 return (fce != NULL) ? TRUE : FALSE;
1802 }
1803
1804 static void
fq_if_remove_fcentry(fq_if_t * fqs,struct flowadv_fcentry * fce)1805 fq_if_remove_fcentry(fq_if_t *fqs, struct flowadv_fcentry *fce)
1806 {
1807 STAILQ_REMOVE(&fqs->fqs_fclist, fce, flowadv_fcentry, fce_link);
1808 STAILQ_NEXT(fce, fce_link) = NULL;
1809 flowadv_add_entry(fce);
1810 }
1811
1812 void
fq_if_flow_feedback(fq_if_t * fqs,fq_t * fq,fq_if_classq_t * fq_cl)1813 fq_if_flow_feedback(fq_if_t *fqs, fq_t *fq, fq_if_classq_t *fq_cl)
1814 {
1815 struct flowadv_fcentry *fce = NULL;
1816
1817 if (fq->fq_tfc_type == FQ_TFC_L4S) {
1818 return;
1819 }
1820
1821 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
1822 STAILQ_FOREACH(fce, &fqs->fqs_fclist, fce_link) {
1823 if (fce->fce_flowid == fq->fq_flowhash) {
1824 break;
1825 }
1826 }
1827 if (fce != NULL) {
1828 fq_cl->fcl_stat.fcl_flow_feedback++;
1829 os_log(OS_LOG_DEFAULT, "%s: num: %d, scidx: %d, flowsrc: %d, "
1830 "flow: 0x%x, iface: %s\n", __func__,
1831 fq_cl->fcl_stat.fcl_flow_feedback, fq->fq_sc_index,
1832 fce->fce_flowsrc_type, fce->fce_flowid,
1833 if_name(fqs->fqs_ifq->ifcq_ifp));
1834 fq_if_remove_fcentry(fqs, fce);
1835 KDBG(AQM_KTRACE_STATS_FLOW_CTL | DBG_FUNC_END,
1836 fq->fq_flowhash, AQM_KTRACE_FQ_GRP_SC_IDX(fq),
1837 fq->fq_bytes, fq->fq_min_qdelay);
1838 }
1839 fq->fq_flags &= ~FQF_FLOWCTL_ON;
1840 }
1841
1842 void
fq_if_dequeue(fq_if_t * fqs,fq_if_classq_t * fq_cl,uint32_t pktlimit,int64_t bytelimit,classq_pkt_t * top,classq_pkt_t * bottom,uint32_t * retpktcnt,uint32_t * retbytecnt,flowq_dqlist_t * fq_dqlist,bool budget_restricted,uint64_t now)1843 fq_if_dequeue(fq_if_t *fqs, fq_if_classq_t *fq_cl, uint32_t pktlimit,
1844 int64_t bytelimit, classq_pkt_t *top, classq_pkt_t *bottom,
1845 uint32_t *retpktcnt, uint32_t *retbytecnt, flowq_dqlist_t *fq_dqlist,
1846 bool budget_restricted, uint64_t now)
1847 {
1848 fq_t *fq = NULL, *tfq = NULL;
1849 flowq_stailq_t temp_stailq;
1850 uint32_t pktcnt, bytecnt;
1851 boolean_t qempty, limit_reached = FALSE;
1852 classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
1853 fq_getq_flow_t fq_getq_flow_fn;
1854 classq_pkt_t *head, *tail;
1855
1856 switch (fqs->fqs_ptype) {
1857 case QP_MBUF:
1858 fq_getq_flow_fn = fq_getq_flow_mbuf;
1859 break;
1860
1861 #if SKYWALK
1862 case QP_PACKET:
1863 fq_getq_flow_fn = fq_getq_flow_kpkt;
1864 break;
1865 #endif /* SKYWALK */
1866
1867 default:
1868 VERIFY(0);
1869 /* NOTREACHED */
1870 __builtin_unreachable();
1871 }
1872
1873 /*
1874 * maximum byte limit should not be greater than the budget for
1875 * this class
1876 */
1877 if (bytelimit > fq_cl->fcl_budget && budget_restricted) {
1878 bytelimit = fq_cl->fcl_budget;
1879 }
1880
1881 VERIFY(pktlimit > 0 && bytelimit > 0 && top != NULL);
1882 pktcnt = bytecnt = 0;
1883 STAILQ_INIT(&temp_stailq);
1884
1885 STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_new_flows, fq_actlink, tfq) {
1886 ASSERT((fq->fq_flags & (FQF_NEW_FLOW | FQF_OLD_FLOW)) ==
1887 FQF_NEW_FLOW);
1888
1889 if (fq_dqlist != NULL) {
1890 if (!fq->fq_in_dqlist) {
1891 fq_dqlist_add(fq_dqlist, fq);
1892 }
1893 head = &fq->fq_dq_head;
1894 tail = &fq->fq_dq_tail;
1895 } else {
1896 ASSERT(!fq->fq_in_dqlist);
1897 head = top;
1898 tail = &last;
1899 }
1900
1901 limit_reached = fq_getq_flow_fn(fqs, fq_cl, fq, bytelimit,
1902 pktlimit, head, tail, &bytecnt, &pktcnt, &qempty,
1903 PKTF_NEW_FLOW, now);
1904
1905 /*
1906 * From RFC 8290:
1907 * if that queue has a negative number of credits (i.e., it has already
1908 * dequeued at least a quantum of bytes), it is given an additional
1909 * quantum of credits, the queue is put onto _the end of_ the list of
1910 * old queues, and the routine selects the next queue and starts again.
1911 */
1912 if (fq->fq_deficit <= 0 || qempty) {
1913 fq->fq_deficit += fq_cl->fcl_quantum;
1914 fq_if_empty_new_flow(fq, fq_cl);
1915 }
1916
1917 if (limit_reached) {
1918 goto done;
1919 }
1920 }
1921
1922 STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_old_flows, fq_actlink, tfq) {
1923 VERIFY((fq->fq_flags & (FQF_NEW_FLOW | FQF_OLD_FLOW)) ==
1924 FQF_OLD_FLOW);
1925 bool destroy = true;
1926
1927 if (fq_dqlist != NULL) {
1928 if (!fq->fq_in_dqlist) {
1929 fq_dqlist_add(fq_dqlist, fq);
1930 }
1931 head = &fq->fq_dq_head;
1932 tail = &fq->fq_dq_tail;
1933 destroy = false;
1934 } else {
1935 ASSERT(!fq->fq_in_dqlist);
1936 head = top;
1937 tail = &last;
1938 }
1939
1940 limit_reached = fq_getq_flow_fn(fqs, fq_cl, fq, bytelimit,
1941 pktlimit, head, tail, &bytecnt, &pktcnt, &qempty, 0, now);
1942
1943 if (qempty) {
1944 fq_if_empty_old_flow(fqs, fq_cl, fq, now);
1945 } else if (fq->fq_deficit <= 0) {
1946 STAILQ_REMOVE(&fq_cl->fcl_old_flows, fq,
1947 flowq, fq_actlink);
1948 /*
1949 * Move to the end of the old queues list. We do not
1950 * need to update the flow count since this flow
1951 * will be added to the tail again
1952 */
1953 STAILQ_INSERT_TAIL(&temp_stailq, fq, fq_actlink);
1954 fq->fq_deficit += fq_cl->fcl_quantum;
1955 }
1956 if (limit_reached) {
1957 break;
1958 }
1959 }
1960
1961 done:
1962 if (!STAILQ_EMPTY(&fq_cl->fcl_old_flows)) {
1963 STAILQ_CONCAT(&fq_cl->fcl_old_flows, &temp_stailq);
1964 } else if (!STAILQ_EMPTY(&temp_stailq)) {
1965 fq_cl->fcl_old_flows = temp_stailq;
1966 }
1967 if (last.cp_mbuf != NULL) {
1968 VERIFY(top->cp_mbuf != NULL);
1969 if (bottom != NULL) {
1970 *bottom = last;
1971 }
1972 }
1973 if (retpktcnt != NULL) {
1974 *retpktcnt = pktcnt;
1975 }
1976 if (retbytecnt != NULL) {
1977 *retbytecnt = bytecnt;
1978 }
1979 }
1980
1981 void
fq_if_teardown_ifclassq(struct ifclassq * ifq)1982 fq_if_teardown_ifclassq(struct ifclassq *ifq)
1983 {
1984 fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
1985
1986 IFCQ_LOCK_ASSERT_HELD(ifq);
1987 VERIFY(fqs != NULL && ifq->ifcq_type == PKTSCHEDT_FQ_CODEL);
1988 fq_if_destroy(fqs);
1989 ifq->ifcq_disc = NULL;
1990 ifclassq_detach(ifq);
1991 }
1992
1993 static void
fq_export_flowstats(fq_if_t * fqs,fq_t * fq,struct fq_codel_flowstats * flowstat)1994 fq_export_flowstats(fq_if_t *fqs, fq_t *fq,
1995 struct fq_codel_flowstats *flowstat)
1996 {
1997 bzero(flowstat, sizeof(*flowstat));
1998 flowstat->fqst_min_qdelay = (uint32_t)fq->fq_min_qdelay;
1999 flowstat->fqst_bytes = fq->fq_bytes;
2000 flowstat->fqst_flowhash = fq->fq_flowhash;
2001 if (fq->fq_flags & FQF_NEW_FLOW) {
2002 flowstat->fqst_flags |= FQ_FLOWSTATS_NEW_FLOW;
2003 }
2004 if (fq->fq_flags & FQF_OLD_FLOW) {
2005 flowstat->fqst_flags |= FQ_FLOWSTATS_OLD_FLOW;
2006 }
2007 if (fq->fq_flags & FQF_DELAY_HIGH) {
2008 flowstat->fqst_flags |= FQ_FLOWSTATS_DELAY_HIGH;
2009 }
2010 if (fq->fq_flags & FQF_FLOWCTL_ON) {
2011 flowstat->fqst_flags |= FQ_FLOWSTATS_FLOWCTL_ON;
2012 }
2013 if (fqs->fqs_large_flow == fq) {
2014 flowstat->fqst_flags |= FQ_FLOWSTATS_LARGE_FLOW;
2015 }
2016 }
2017
2018 int
fq_if_getqstats_ifclassq(struct ifclassq * ifq,uint8_t gid,u_int32_t qid,struct if_ifclassq_stats * ifqs)2019 fq_if_getqstats_ifclassq(struct ifclassq *ifq, uint8_t gid, u_int32_t qid,
2020 struct if_ifclassq_stats *ifqs)
2021 {
2022 struct fq_codel_classstats *fcls;
2023 fq_if_classq_t *fq_cl;
2024 fq_if_t *fqs;
2025 fq_t *fq = NULL;
2026 fq_if_group_t *grp;
2027 u_int32_t i, flowstat_cnt;
2028
2029 if (qid >= FQ_IF_MAX_CLASSES || gid >= FQ_IF_MAX_GROUPS) {
2030 return EINVAL;
2031 }
2032
2033 fqs = (fq_if_t *)ifq->ifcq_disc;
2034 if (fqs->fqs_classq_groups[gid] == NULL) {
2035 return ENXIO;
2036 }
2037
2038 fcls = &ifqs->ifqs_fq_codel_stats;
2039
2040 fq_cl = &FQS_CLASSQ(fqs, gid, qid);
2041 grp = fq_if_find_grp(fqs, gid);
2042
2043 fcls->fcls_pri = fq_cl->fcl_pri;
2044 fcls->fcls_service_class = fq_cl->fcl_service_class;
2045 fcls->fcls_quantum = fq_cl->fcl_quantum;
2046 fcls->fcls_drr_max = fq_cl->fcl_drr_max;
2047 fcls->fcls_budget = fq_cl->fcl_budget;
2048 fcls->fcls_l4s_target_qdelay = grp->fqg_target_qdelays[FQ_TFC_L4S];
2049 fcls->fcls_target_qdelay = grp->fqg_target_qdelays[FQ_TFC_C];
2050 fcls->fcls_update_interval = grp->fqg_update_intervals[FQ_TFC_C];
2051 fcls->fcls_flow_control = fq_cl->fcl_stat.fcl_flow_control;
2052 fcls->fcls_flow_feedback = fq_cl->fcl_stat.fcl_flow_feedback;
2053 fcls->fcls_dequeue_stall = fq_cl->fcl_stat.fcl_dequeue_stall;
2054 fcls->fcls_drop_overflow = fq_cl->fcl_stat.fcl_drop_overflow;
2055 fcls->fcls_drop_early = fq_cl->fcl_stat.fcl_drop_early;
2056 fcls->fcls_drop_memfailure = fq_cl->fcl_stat.fcl_drop_memfailure;
2057 fcls->fcls_flows_cnt = fq_cl->fcl_stat.fcl_flows_cnt;
2058 fcls->fcls_newflows_cnt = fq_cl->fcl_stat.fcl_newflows_cnt;
2059 fcls->fcls_oldflows_cnt = fq_cl->fcl_stat.fcl_oldflows_cnt;
2060 fcls->fcls_pkt_cnt = fq_cl->fcl_stat.fcl_pkt_cnt;
2061 fcls->fcls_flow_control_fail = fq_cl->fcl_stat.fcl_flow_control_fail;
2062 fcls->fcls_flow_control_fail = fq_cl->fcl_stat.fcl_flow_control_fail;
2063 fcls->fcls_dequeue = fq_cl->fcl_stat.fcl_dequeue;
2064 fcls->fcls_dequeue_bytes = fq_cl->fcl_stat.fcl_dequeue_bytes;
2065 fcls->fcls_byte_cnt = fq_cl->fcl_stat.fcl_byte_cnt;
2066 fcls->fcls_throttle_on = fq_cl->fcl_stat.fcl_throttle_on;
2067 fcls->fcls_throttle_off = fq_cl->fcl_stat.fcl_throttle_off;
2068 fcls->fcls_throttle_drops = fq_cl->fcl_stat.fcl_throttle_drops;
2069 fcls->fcls_dup_rexmts = fq_cl->fcl_stat.fcl_dup_rexmts;
2070 fcls->fcls_pkts_compressible = fq_cl->fcl_stat.fcl_pkts_compressible;
2071 fcls->fcls_pkts_compressed = fq_cl->fcl_stat.fcl_pkts_compressed;
2072 fcls->fcls_min_qdelay = fq_cl->fcl_stat.fcl_min_qdelay;
2073 fcls->fcls_max_qdelay = fq_cl->fcl_stat.fcl_max_qdelay;
2074 fcls->fcls_avg_qdelay = fq_cl->fcl_stat.fcl_avg_qdelay;
2075 fcls->fcls_overwhelming = fq_cl->fcl_stat.fcl_overwhelming;
2076 fcls->fcls_ce_marked = fq_cl->fcl_stat.fcl_ce_marked;
2077 fcls->fcls_ce_mark_failures = fq_cl->fcl_stat.fcl_ce_mark_failures;
2078 fcls->fcls_l4s_pkts = fq_cl->fcl_stat.fcl_l4s_pkts;
2079
2080 /* Gather per flow stats */
2081 flowstat_cnt = min((fcls->fcls_newflows_cnt +
2082 fcls->fcls_oldflows_cnt), FQ_IF_MAX_FLOWSTATS);
2083 i = 0;
2084 STAILQ_FOREACH(fq, &fq_cl->fcl_new_flows, fq_actlink) {
2085 if (i >= fcls->fcls_newflows_cnt || i >= flowstat_cnt) {
2086 break;
2087 }
2088
2089 /* leave space for a few old flows */
2090 if ((flowstat_cnt - i) < fcls->fcls_oldflows_cnt &&
2091 i >= (FQ_IF_MAX_FLOWSTATS >> 1)) {
2092 break;
2093 }
2094 fq_export_flowstats(fqs, fq, &fcls->fcls_flowstats[i]);
2095 i++;
2096 }
2097 STAILQ_FOREACH(fq, &fq_cl->fcl_old_flows, fq_actlink) {
2098 if (i >= flowstat_cnt) {
2099 break;
2100 }
2101 fq_export_flowstats(fqs, fq, &fcls->fcls_flowstats[i]);
2102 i++;
2103 }
2104 VERIFY(i <= flowstat_cnt);
2105 fcls->fcls_flowstats_cnt = i;
2106 return 0;
2107 }
2108
2109 int
fq_if_create_grp(struct ifclassq * ifcq,uint8_t grp_idx,uint8_t flags)2110 fq_if_create_grp(struct ifclassq *ifcq, uint8_t grp_idx, uint8_t flags)
2111 {
2112 #define _FQ_CLASSQ_INIT(_grp, _s, _q) \
2113 fq_if_classq_init(_grp, FQ_IF_ ## _s ##_INDEX, \
2114 FQ_CODEL_QUANTUM_ ## _s(_q), FQ_CODEL_DRR_MAX(_s), \
2115 MBUF_SC_ ## _s );
2116
2117 fq_if_group_t *grp;
2118 fq_if_t *fqs;
2119 uint32_t quantum, calc_flags = IF_CLASSQ_DEF;
2120 struct ifnet *ifp = ifcq->ifcq_ifp;
2121
2122 VERIFY(grp_idx < FQ_IF_MAX_GROUPS);
2123
2124 fqs = (fq_if_t *)ifcq->ifcq_disc;
2125
2126 if (grp_idx == 0 && fqs->fqs_classq_groups[grp_idx] != NULL) {
2127 grp = fqs->fqs_classq_groups[grp_idx];
2128 goto update;
2129 }
2130
2131 if (fqs->fqs_classq_groups[grp_idx] != NULL) {
2132 return EINVAL;
2133 }
2134
2135 grp = zalloc_flags(fq_if_grp_zone, Z_WAITOK | Z_ZERO);
2136 if (grp == NULL) {
2137 return ENOMEM;
2138 }
2139
2140 fqs->fqs_classq_groups[grp_idx] = grp;
2141 grp->fqg_index = grp_idx;
2142
2143 quantum = fq_if_calc_quantum(ifp);
2144 if (fqs->fqs_flags & FQS_DRIVER_MANAGED) {
2145 _FQ_CLASSQ_INIT(grp, BK, quantum);
2146 _FQ_CLASSQ_INIT(grp, BE, quantum);
2147 _FQ_CLASSQ_INIT(grp, VI, quantum);
2148 _FQ_CLASSQ_INIT(grp, VO, quantum);
2149 } else {
2150 /* SIG shares same INDEX with VI */
2151 _CASSERT(SCIDX_SIG == SCIDX_VI);
2152 _CASSERT(FQ_IF_SIG_INDEX == FQ_IF_VI_INDEX);
2153
2154 _FQ_CLASSQ_INIT(grp, BK_SYS, quantum);
2155 _FQ_CLASSQ_INIT(grp, BK, quantum);
2156 _FQ_CLASSQ_INIT(grp, BE, quantum);
2157 _FQ_CLASSQ_INIT(grp, RD, quantum);
2158 _FQ_CLASSQ_INIT(grp, OAM, quantum);
2159 _FQ_CLASSQ_INIT(grp, AV, quantum);
2160 _FQ_CLASSQ_INIT(grp, RV, quantum);
2161 _FQ_CLASSQ_INIT(grp, VI, quantum);
2162 _FQ_CLASSQ_INIT(grp, VO, quantum);
2163 _FQ_CLASSQ_INIT(grp, CTL, quantum);
2164 }
2165
2166 update:
2167 if (flags & IF_DEFAULT_GRP) {
2168 fq_if_set_grp_combined(ifcq, grp_idx);
2169 grp->fqg_flags |= FQ_IF_DEFAULT_GRP;
2170 } else {
2171 fq_if_set_grp_separated(ifcq, grp_idx);
2172 grp->fqg_flags &= ~FQ_IF_DEFAULT_GRP;
2173 }
2174
2175 calc_flags |= (flags & IF_CLASSQ_LOW_LATENCY);
2176 ifclassq_calc_target_qdelay(ifp, &grp->fqg_target_qdelays[FQ_TFC_C],
2177 calc_flags);
2178 ifclassq_calc_target_qdelay(ifp, &grp->fqg_target_qdelays[FQ_TFC_L4S],
2179 calc_flags | IF_CLASSQ_L4S);
2180
2181 ifclassq_calc_update_interval(&grp->fqg_update_intervals[FQ_TFC_C],
2182 calc_flags);
2183 ifclassq_calc_update_interval(&grp->fqg_update_intervals[FQ_TFC_L4S],
2184 calc_flags | IF_CLASSQ_L4S);
2185
2186 return 0;
2187 #undef _FQ_CLASSQ_INIT
2188 }
2189
2190 fq_if_group_t *
fq_if_find_grp(fq_if_t * fqs,uint8_t grp_idx)2191 fq_if_find_grp(fq_if_t *fqs, uint8_t grp_idx)
2192 {
2193 fq_if_group_t *grp;
2194
2195 IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq);
2196 VERIFY(grp_idx < FQ_IF_MAX_GROUPS);
2197
2198 grp = fqs->fqs_classq_groups[grp_idx];
2199 VERIFY(grp != NULL);
2200
2201 return grp;
2202 }
2203
2204 static void
fq_if_purge_grp(fq_if_t * fqs,fq_if_group_t * grp)2205 fq_if_purge_grp(fq_if_t *fqs, fq_if_group_t *grp)
2206 {
2207 for (uint8_t i = 0; i < FQ_IF_MAX_CLASSES; i++) {
2208 fq_if_purge_classq(fqs, &grp->fqg_classq[i]);
2209 }
2210
2211 bzero(&grp->fqg_bitmaps, sizeof(grp->fqg_bitmaps));
2212 grp->fqg_len = 0;
2213 grp->fqg_bytes = 0;
2214 fq_if_set_grp_separated(fqs->fqs_ifq, grp->fqg_index);
2215 }
2216
2217 void
fq_if_destroy_grps(fq_if_t * fqs)2218 fq_if_destroy_grps(fq_if_t *fqs)
2219 {
2220 fq_if_group_t *grp;
2221
2222 IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq);
2223
2224 for (uint8_t grp_idx = 0; grp_idx < FQ_IF_MAX_GROUPS; grp_idx++) {
2225 if (fqs->fqs_classq_groups[grp_idx] == NULL) {
2226 continue;
2227 }
2228
2229 grp = fq_if_find_grp(fqs, grp_idx);
2230 fq_if_purge_grp(fqs, grp);
2231 zfree(fq_if_grp_zone, grp);
2232 fqs->fqs_classq_groups[grp_idx] = NULL;
2233 }
2234 }
2235
2236 static inline boolean_t
fq_if_is_grp_combined(fq_if_t * fqs,uint8_t grp_idx)2237 fq_if_is_grp_combined(fq_if_t *fqs, uint8_t grp_idx)
2238 {
2239 return pktsched_bit_tst(grp_idx, &fqs->fqs_combined_grp_bitmap);
2240 }
2241
2242 void
fq_if_set_grp_combined(struct ifclassq * ifcq,uint8_t grp_idx)2243 fq_if_set_grp_combined(struct ifclassq *ifcq, uint8_t grp_idx)
2244 {
2245 fq_if_t *fqs;
2246 fq_if_group_t *grp;
2247
2248 IFCQ_LOCK_ASSERT_HELD(ifcq);
2249
2250 fqs = (fq_if_t *)ifcq->ifcq_disc;
2251 grp = fq_if_find_grp(fqs, grp_idx);
2252
2253 if (fq_if_is_grp_combined(fqs, grp_idx)) {
2254 return;
2255 }
2256
2257 /*
2258 * We keep the current fq_deficit and fcl_budget when combining a group.
2259 * That might disrupt the AQM but only for a moment.
2260 */
2261 pktsched_bit_set(grp_idx, &fqs->fqs_combined_grp_bitmap);
2262 TAILQ_INSERT_TAIL(&fqs->fqs_combined_grp_list, grp, fqg_grp_link);
2263 }
2264
2265 void
fq_if_set_grp_separated(struct ifclassq * ifcq,uint8_t grp_idx)2266 fq_if_set_grp_separated(struct ifclassq *ifcq, uint8_t grp_idx)
2267 {
2268 fq_if_t *fqs;
2269 fq_if_group_t *grp;
2270
2271 IFCQ_LOCK_ASSERT_HELD(ifcq);
2272
2273 fqs = (fq_if_t *)ifcq->ifcq_disc;
2274 grp = fq_if_find_grp(fqs, grp_idx);
2275
2276 if (!fq_if_is_grp_combined(fqs, grp_idx)) {
2277 return;
2278 }
2279
2280 pktsched_bit_clr(grp_idx, &fqs->fqs_combined_grp_bitmap);
2281 TAILQ_REMOVE(&fqs->fqs_combined_grp_list, grp, fqg_grp_link);
2282 }
2283