1 /*
2 * Copyright (c) 2016-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/types.h>
30 #include <sys/param.h>
31 #include <kern/zalloc.h>
32 #include <net/ethernet.h>
33 #include <net/if_var.h>
34 #include <net/if.h>
35 #include <net/droptap.h>
36 #include <net/classq/classq.h>
37 #include <net/classq/classq_fq_codel.h>
38 #include <net/pktsched/pktsched_fq_codel.h>
39 #include <os/log.h>
40 #include <pexpert/pexpert.h> /* for PE_parse_boot_argn */
41 #include <mach/thread_act.h>
42 #include <kern/thread.h>
43 #include <kern/sched_prim.h>
44
45 #define FQ_CODEL_DEFAULT_QUANTUM 1500
46
47 #define FQ_CODEL_QUANTUM_BK_SYS(_q) (_q)
48 #define FQ_CODEL_QUANTUM_BK(_q) (_q)
49 #define FQ_CODEL_QUANTUM_BE(_q) (_q)
50 #define FQ_CODEL_QUANTUM_RD(_q) (_q)
51 #define FQ_CODEL_QUANTUM_OAM(_q) (_q)
52 #define FQ_CODEL_QUANTUM_AV(_q) (_q * 2)
53 #define FQ_CODEL_QUANTUM_RV(_q) (_q * 2)
54 #define FQ_CODEL_QUANTUM_VI(_q) (_q * 2)
55 #define FQ_CODEL_QUANTUM_VO(_q) ((_q * 2) / 5)
56 #define FQ_CODEL_QUANTUM_CTL(_q) ((_q * 2) / 5)
57
58 static KALLOC_TYPE_DEFINE(fq_if_zone, fq_if_t, NET_KT_DEFAULT);
59 static KALLOC_TYPE_DEFINE(fq_if_grp_zone, fq_if_group_t, NET_KT_DEFAULT);
60
61 SYSCTL_NODE(_net_classq, OID_AUTO, fq_codel, CTLFLAG_RW | CTLFLAG_LOCKED,
62 0, "FQ-CODEL parameters");
63
64 SYSCTL_INT(_net_classq_fq_codel, OID_AUTO, fq_enable_pacing, CTLFLAG_RW | CTLFLAG_LOCKED,
65 &ifclassq_enable_pacing, 0, "Enable pacing");
66
67 static uint64_t fq_empty_purge_delay = FQ_EMPTY_PURGE_DELAY;
68 #if (DEVELOPMENT || DEBUG)
69 SYSCTL_QUAD(_net_classq_fq_codel, OID_AUTO, fq_empty_purge_delay, CTLFLAG_RW |
70 CTLFLAG_LOCKED, &fq_empty_purge_delay, "Empty flow queue purge delay (ns)");
71 #endif /* !DEVELOPMENT && !DEBUG */
72
73 unsigned int ifclassq_enable_pacing = 1;
74
75 typedef STAILQ_HEAD(, flowq) flowq_dqlist_t;
76
77 static fq_if_t *fq_if_alloc(struct ifclassq *, classq_pkt_type_t);
78 static void fq_if_destroy(fq_if_t *fqs);
79 static void fq_if_classq_init(fq_if_group_t *fqg, uint32_t priority,
80 uint32_t quantum, uint32_t drr_max, uint32_t svc_class);
81 static void fq_if_dequeue(fq_if_t *, fq_if_classq_t *, uint32_t,
82 int64_t, classq_pkt_t *, classq_pkt_t *, uint32_t *,
83 uint32_t *, flowq_dqlist_t *, bool, uint64_t, bool*, uint64_t*);
84 void fq_if_stat_sc(fq_if_t *fqs, cqrq_stat_sc_t *stat);
85 static void fq_if_purge(fq_if_t *);
86 static void fq_if_purge_classq(fq_if_t *, fq_if_classq_t *);
87 static void fq_if_purge_flow(fq_if_t *, fq_t *, uint32_t *, uint32_t *,
88 uint64_t);
89 static void fq_if_empty_new_flow(fq_t *fq, fq_if_classq_t *fq_cl);
90 static void fq_if_empty_old_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl,
91 fq_t *fq, uint64_t now);
92 static void fq_if_purge_empty_flow(fq_if_t *fqs, fq_t *fq);
93 static void fq_if_purge_empty_flow_list(fq_if_t *fqs, uint64_t now,
94 bool purge_all);
95 static inline void fq_if_reuse_empty_flow(fq_if_t *fqs, fq_t *fq, uint64_t now);
96 static int fq_if_dequeue_sc_classq_multi_separate(struct ifclassq *ifq,
97 mbuf_svc_class_t svc, u_int32_t maxpktcnt, u_int32_t maxbytecnt,
98 classq_pkt_t *first_packet, classq_pkt_t *last_packet, u_int32_t *retpktcnt,
99 u_int32_t *retbytecnt, uint8_t grp_idx);
100 static void fq_if_grp_stat_sc(fq_if_t *fqs, fq_if_group_t *grp,
101 cqrq_stat_sc_t *stat, uint64_t now);
102 static void fq_if_purge_grp(fq_if_t *fqs, fq_if_group_t *grp);
103 static inline boolean_t fq_if_is_grp_combined(fq_if_t *fqs, uint8_t grp_idx);
104 static void fq_if_destroy_grps(fq_if_t *fqs);
105
106 uint32_t fq_codel_drr_max_values[FQ_IF_MAX_CLASSES] = {
107 [FQ_IF_CTL_INDEX] = 8,
108 [FQ_IF_VO_INDEX] = 8,
109 [FQ_IF_VI_INDEX] = 6,
110 [FQ_IF_RV_INDEX] = 6,
111 [FQ_IF_AV_INDEX] = 6,
112 [FQ_IF_OAM_INDEX] = 4,
113 [FQ_IF_RD_INDEX] = 4,
114 [FQ_IF_BE_INDEX] = 4,
115 [FQ_IF_BK_INDEX] = 2,
116 [FQ_IF_BK_SYS_INDEX] = 2,
117 };
118
119 #define FQ_CODEL_DRR_MAX(_s) fq_codel_drr_max_values[FQ_IF_##_s##_INDEX]
120
121 static boolean_t fq_if_grps_bitmap_zeros(fq_grp_tailq_t *grp_list, int pri,
122 fq_if_state state);
123 static void fq_if_grps_bitmap_cpy(fq_grp_tailq_t *grp_list, int pri,
124 fq_if_state dst_state, fq_if_state src_state);
125 static void fq_if_grps_bitmap_clr(fq_grp_tailq_t *grp_list, int pri,
126 fq_if_state state);
127 static int fq_if_grps_bitmap_ffs(fq_grp_tailq_t *grp_list, int pri,
128 fq_if_state state, fq_if_group_t **selected_grp);
129 static void fq_if_grps_bitmap_move(fq_grp_tailq_t *grp_list, int pri,
130 fq_if_state dst_state, fq_if_state src_state);
131
132 static boolean_t fq_if_grps_sc_bitmap_zeros(fq_grp_tailq_t *grp_list, int pri,
133 fq_if_state state);
134 static void fq_if_grps_sc_bitmap_cpy(fq_grp_tailq_t *grp_list, int pri,
135 fq_if_state dst_state, fq_if_state src_state);
136 static void fq_if_grps_sc_bitmap_clr(fq_grp_tailq_t *grp_list, int pri,
137 fq_if_state state);
138 static int fq_if_grps_sc_bitmap_ffs(fq_grp_tailq_t *grp_list, int pri,
139 fq_if_state state, fq_if_group_t **selected_grp);
140 static void fq_if_grps_sc_bitmap_move(fq_grp_tailq_t *grp_list, int pri,
141 fq_if_state dst_state, fq_if_state src_state);
142
143 bitmap_ops_t fq_if_grps_bitmap_ops =
144 {
145 .ffs = fq_if_grps_bitmap_ffs,
146 .zeros = fq_if_grps_bitmap_zeros,
147 .cpy = fq_if_grps_bitmap_cpy,
148 .clr = fq_if_grps_bitmap_clr,
149 .move = fq_if_grps_bitmap_move,
150 };
151
152 bitmap_ops_t fq_if_grps_sc_bitmap_ops =
153 {
154 .ffs = fq_if_grps_sc_bitmap_ffs,
155 .zeros = fq_if_grps_sc_bitmap_zeros,
156 .cpy = fq_if_grps_sc_bitmap_cpy,
157 .clr = fq_if_grps_sc_bitmap_clr,
158 .move = fq_if_grps_sc_bitmap_move,
159 };
160
161 static uint32_t fq_if_hash_table_size;
162
163 extern int serverperfmode; // Temporary to resolve build dependency
164
165 void
pktsched_fq_init(void)166 pktsched_fq_init(void)
167 {
168 PE_parse_boot_argn("ifclassq_enable_pacing", &ifclassq_enable_pacing,
169 sizeof(ifclassq_enable_pacing));
170
171 if (serverperfmode) {
172 fq_if_hash_table_size = (1 << 16);
173 } else {
174 fq_if_hash_table_size = (1 << 8);
175 }
176
177 // format looks like ifcq_drr_max=8,8,6
178 char buf[(FQ_IF_MAX_CLASSES) * 3];
179 size_t i, len, pri_index = 0;
180 uint32_t drr = 0;
181 if (!PE_parse_boot_arg_str("ifcq_drr_max", buf, sizeof(buf))) {
182 return;
183 }
184
185 len = strbuflen(buf, sizeof(buf));
186 for (i = 0; i < len + 1 && pri_index < FQ_IF_MAX_CLASSES; i++) {
187 if (buf[i] != ',' && buf[i] != '\0') {
188 VERIFY(buf[i] >= '0' && buf[i] <= '9');
189 drr = drr * 10 + buf[i] - '0';
190 continue;
191 }
192 fq_codel_drr_max_values[pri_index] = drr;
193 pri_index += 1;
194 drr = 0;
195 }
196 }
197
198 static uint32_t
fq_if_flow_hash_id(uint32_t flowid)199 fq_if_flow_hash_id(uint32_t flowid)
200 {
201 return flowid & (fq_if_hash_table_size - 1);
202 }
203
204 #define FQ_IF_CLASSQ_IDLE(_fcl_) \
205 (STAILQ_EMPTY(&(_fcl_)->fcl_new_flows) && \
206 STAILQ_EMPTY(&(_fcl_)->fcl_old_flows))
207
208 typedef void (* fq_if_append_pkt_t)(classq_pkt_t *, classq_pkt_t *);
209 typedef boolean_t (* fq_getq_flow_t)(fq_if_t *, fq_if_classq_t *, fq_t *,
210 int64_t, uint32_t, classq_pkt_t *, classq_pkt_t *, uint32_t *,
211 uint32_t *, boolean_t *, uint64_t);
212
213 static void
fq_if_append_mbuf(classq_pkt_t * pkt,classq_pkt_t * next_pkt)214 fq_if_append_mbuf(classq_pkt_t *pkt, classq_pkt_t *next_pkt)
215 {
216 pkt->cp_mbuf->m_nextpkt = next_pkt->cp_mbuf;
217 }
218
219 static inline uint64_t
fq_codel_get_time(void)220 fq_codel_get_time(void)
221 {
222 struct timespec ts;
223 uint64_t now;
224
225 nanouptime(&ts);
226 now = ((uint64_t)ts.tv_sec * NSEC_PER_SEC) + ts.tv_nsec;
227 return now;
228 }
229
230 #if SKYWALK
231 static void
fq_if_append_pkt(classq_pkt_t * pkt,classq_pkt_t * next_pkt)232 fq_if_append_pkt(classq_pkt_t *pkt, classq_pkt_t *next_pkt)
233 {
234 pkt->cp_kpkt->pkt_nextpkt = next_pkt->cp_kpkt;
235 }
236 #endif /* SKYWALK */
237
238 #if SKYWALK
239 static boolean_t
fq_getq_flow_kpkt(fq_if_t * fqs,fq_if_classq_t * fq_cl,fq_t * fq,int64_t byte_limit,uint32_t pkt_limit,classq_pkt_t * head,classq_pkt_t * tail,uint32_t * byte_cnt,uint32_t * pkt_cnt,boolean_t * qempty,uint64_t now)240 fq_getq_flow_kpkt(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq,
241 int64_t byte_limit, uint32_t pkt_limit, classq_pkt_t *head,
242 classq_pkt_t *tail, uint32_t *byte_cnt, uint32_t *pkt_cnt,
243 boolean_t *qempty, uint64_t now)
244 {
245 uint32_t plen;
246 pktsched_pkt_t pkt;
247 boolean_t limit_reached = FALSE;
248 struct ifclassq *ifq = fqs->fqs_ifq;
249 struct ifnet *ifp = ifq->ifcq_ifp;
250
251 /*
252 * Assert to make sure pflags is part of PKT_F_COMMON_MASK;
253 * all common flags need to be declared in that mask.
254 */
255 while (fq->fq_deficit > 0 && limit_reached == FALSE &&
256 !KPKTQ_EMPTY(&fq->fq_kpktq) && fq_tx_time_ready(fqs, fq, now, NULL)) {
257 _PKTSCHED_PKT_INIT(&pkt);
258 fq_getq_flow(fqs, fq, &pkt, now);
259 ASSERT(pkt.pktsched_ptype == QP_PACKET);
260
261 plen = pktsched_get_pkt_len(&pkt);
262 fq->fq_deficit -= plen;
263 if (__improbable((fq->fq_flags & FQF_FRESH_FLOW) != 0)) {
264 pkt.pktsched_pkt_kpkt->pkt_pflags |= PKT_F_NEW_FLOW;
265 fq->fq_flags &= ~FQF_FRESH_FLOW;
266 }
267
268 if (head->cp_kpkt == NULL) {
269 *head = pkt.pktsched_pkt;
270 } else {
271 ASSERT(tail->cp_kpkt != NULL);
272 ASSERT(tail->cp_kpkt->pkt_nextpkt == NULL);
273 tail->cp_kpkt->pkt_nextpkt = pkt.pktsched_pkt_kpkt;
274 }
275 *tail = pkt.pktsched_pkt;
276 tail->cp_kpkt->pkt_nextpkt = NULL;
277 fq_cl->fcl_stat.fcl_dequeue++;
278 fq_cl->fcl_stat.fcl_dequeue_bytes += plen;
279 *pkt_cnt += 1;
280 *byte_cnt += plen;
281
282 ifclassq_set_packet_metadata(ifq, ifp, &pkt.pktsched_pkt);
283
284 /* Check if the limit is reached */
285 if (*pkt_cnt >= pkt_limit || *byte_cnt >= byte_limit) {
286 limit_reached = TRUE;
287 }
288 }
289 KDBG(AQM_KTRACE_STATS_FLOW_DEQUEUE, fq->fq_flowhash,
290 AQM_KTRACE_FQ_GRP_SC_IDX(fq),
291 fq->fq_bytes, fq->fq_min_qdelay);
292
293 *qempty = KPKTQ_EMPTY(&fq->fq_kpktq);
294 return limit_reached;
295 }
296 #endif /* SKYWALK */
297
298 static boolean_t
fq_getq_flow_mbuf(fq_if_t * fqs,fq_if_classq_t * fq_cl,fq_t * fq,int64_t byte_limit,uint32_t pkt_limit,classq_pkt_t * head,classq_pkt_t * tail,uint32_t * byte_cnt,uint32_t * pkt_cnt,boolean_t * qempty,uint64_t now)299 fq_getq_flow_mbuf(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq,
300 int64_t byte_limit, uint32_t pkt_limit, classq_pkt_t *head,
301 classq_pkt_t *tail, uint32_t *byte_cnt, uint32_t *pkt_cnt,
302 boolean_t *qempty, uint64_t now)
303 {
304 u_int32_t plen;
305 pktsched_pkt_t pkt;
306 boolean_t limit_reached = FALSE;
307 struct ifclassq *ifq = fqs->fqs_ifq;
308 struct ifnet *ifp = ifq->ifcq_ifp;
309
310 while (fq->fq_deficit > 0 && limit_reached == FALSE &&
311 !MBUFQ_EMPTY(&fq->fq_mbufq) && fq_tx_time_ready(fqs, fq, now, NULL)) {
312 _PKTSCHED_PKT_INIT(&pkt);
313 fq_getq_flow(fqs, fq, &pkt, now);
314 ASSERT(pkt.pktsched_ptype == QP_MBUF);
315
316 plen = pktsched_get_pkt_len(&pkt);
317 fq->fq_deficit -= plen;
318
319 if (__improbable((fq->fq_flags & FQF_FRESH_FLOW) != 0)) {
320 pkt.pktsched_pkt_mbuf->m_pkthdr.pkt_flags |= PKTF_NEW_FLOW;
321 fq->fq_flags &= ~FQF_FRESH_FLOW;
322 }
323
324 if (head->cp_mbuf == NULL) {
325 *head = pkt.pktsched_pkt;
326 } else {
327 ASSERT(tail->cp_mbuf != NULL);
328 ASSERT(tail->cp_mbuf->m_nextpkt == NULL);
329 tail->cp_mbuf->m_nextpkt = pkt.pktsched_pkt_mbuf;
330 }
331 *tail = pkt.pktsched_pkt;
332 tail->cp_mbuf->m_nextpkt = NULL;
333 fq_cl->fcl_stat.fcl_dequeue++;
334 fq_cl->fcl_stat.fcl_dequeue_bytes += plen;
335 *pkt_cnt += 1;
336 *byte_cnt += plen;
337
338 ifclassq_set_packet_metadata(ifq, ifp, &pkt.pktsched_pkt);
339
340 /* Check if the limit is reached */
341 if (*pkt_cnt >= pkt_limit || *byte_cnt >= byte_limit) {
342 limit_reached = TRUE;
343 }
344 }
345 KDBG(AQM_KTRACE_STATS_FLOW_DEQUEUE, fq->fq_flowhash,
346 AQM_KTRACE_FQ_GRP_SC_IDX(fq),
347 fq->fq_bytes, fq->fq_min_qdelay);
348
349 *qempty = MBUFQ_EMPTY(&fq->fq_mbufq);
350 return limit_reached;
351 }
352
353 static void
fq_if_pacemaker_tcall(thread_call_param_t arg0,thread_call_param_t arg1)354 fq_if_pacemaker_tcall(thread_call_param_t arg0, thread_call_param_t arg1)
355 {
356 #pragma unused(arg1)
357 struct ifnet* ifp = (struct ifnet*)arg0;
358 ASSERT(ifp != NULL);
359
360 ifnet_start_ignore_delay(ifp);
361 }
362
363 fq_if_t *
fq_if_alloc(struct ifclassq * ifq,classq_pkt_type_t ptype)364 fq_if_alloc(struct ifclassq *ifq, classq_pkt_type_t ptype)
365 {
366 flowq_list_t *fqs_flows;
367 fq_if_t *fqs;
368
369 ASSERT(ifq->ifcq_ifp != NULL);
370 fqs = zalloc_flags(fq_if_zone, Z_WAITOK | Z_ZERO);
371 if (fqs == NULL) {
372 return NULL;
373 }
374 fqs_flows = kalloc_type(flowq_list_t, fq_if_hash_table_size, Z_WAITOK | Z_ZERO);
375 if (fqs_flows == NULL) {
376 zfree(fq_if_zone, fqs);
377 return NULL;
378 }
379 fqs->fqs_flows = fqs_flows;
380 fqs->fqs_flows_count = fq_if_hash_table_size;
381 fqs->fqs_ifq = ifq;
382 fqs->fqs_ptype = ptype;
383
384 /* Configure packet drop limit across all queues */
385 fqs->fqs_pkt_droplimit = IFCQ_PKT_DROP_LIMIT(ifq);
386 STAILQ_INIT(&fqs->fqs_fclist);
387 TAILQ_INIT(&fqs->fqs_empty_list);
388 TAILQ_INIT(&fqs->fqs_combined_grp_list);
389 fqs->fqs_pacemaker_tcall = thread_call_allocate_with_options(fq_if_pacemaker_tcall,
390 (thread_call_param_t)(ifq->ifcq_ifp), THREAD_CALL_PRIORITY_KERNEL,
391 THREAD_CALL_OPTIONS_ONCE);
392 ASSERT(fqs->fqs_pacemaker_tcall != NULL);
393
394 return fqs;
395 }
396
397 void
fq_if_destroy(fq_if_t * fqs)398 fq_if_destroy(fq_if_t *fqs)
399 {
400 struct ifnet *ifp = fqs->fqs_ifq->ifcq_ifp;
401 thread_call_t __single tcall = fqs->fqs_pacemaker_tcall;
402
403 VERIFY(ifp != NULL);
404 ASSERT(tcall != NULL);
405 IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq);
406 LCK_MTX_ASSERT(&ifp->if_start_lock, LCK_MTX_ASSERT_NOTOWNED);
407 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
408
409 /*
410 * Since we are holding the IFCQ lock here, another thread cannot enter AQM
411 * and schedule a pacemaker call. So we do not need a sleep wait loop here
412 * cancel wait and free should succeed in one call.
413 */
414 thread_call_cancel_wait(tcall);
415 ASSERT(thread_call_free(tcall));
416
417 fq_if_purge(fqs);
418 fq_if_destroy_grps(fqs);
419
420 fqs->fqs_ifq = NULL;
421
422 kfree_type_counted_by(flowq_list_t, fqs->fqs_flows_count, fqs->fqs_flows);
423 zfree(fq_if_zone, fqs);
424 }
425
426 static inline uint8_t
fq_if_service_to_priority(fq_if_t * fqs,mbuf_svc_class_t svc)427 fq_if_service_to_priority(fq_if_t *fqs, mbuf_svc_class_t svc)
428 {
429 uint8_t pri;
430
431 if (fqs->fqs_flags & FQS_DRIVER_MANAGED) {
432 switch (svc) {
433 case MBUF_SC_BK_SYS:
434 case MBUF_SC_BK:
435 pri = FQ_IF_BK_INDEX;
436 break;
437 case MBUF_SC_BE:
438 case MBUF_SC_RD:
439 case MBUF_SC_OAM:
440 pri = FQ_IF_BE_INDEX;
441 break;
442 case MBUF_SC_AV:
443 case MBUF_SC_RV:
444 case MBUF_SC_VI:
445 case MBUF_SC_SIG:
446 pri = FQ_IF_VI_INDEX;
447 break;
448 case MBUF_SC_VO:
449 case MBUF_SC_CTL:
450 pri = FQ_IF_VO_INDEX;
451 break;
452 default:
453 pri = FQ_IF_BE_INDEX; /* Use best effort by default */
454 break;
455 }
456 return pri;
457 }
458
459 /* scheduler is not managed by the driver */
460 switch (svc) {
461 case MBUF_SC_BK_SYS:
462 pri = FQ_IF_BK_SYS_INDEX;
463 break;
464 case MBUF_SC_BK:
465 pri = FQ_IF_BK_INDEX;
466 break;
467 case MBUF_SC_BE:
468 pri = FQ_IF_BE_INDEX;
469 break;
470 case MBUF_SC_RD:
471 pri = FQ_IF_RD_INDEX;
472 break;
473 case MBUF_SC_OAM:
474 pri = FQ_IF_OAM_INDEX;
475 break;
476 case MBUF_SC_AV:
477 pri = FQ_IF_AV_INDEX;
478 break;
479 case MBUF_SC_RV:
480 pri = FQ_IF_RV_INDEX;
481 break;
482 case MBUF_SC_VI:
483 pri = FQ_IF_VI_INDEX;
484 break;
485 case MBUF_SC_SIG:
486 pri = FQ_IF_SIG_INDEX;
487 break;
488 case MBUF_SC_VO:
489 pri = FQ_IF_VO_INDEX;
490 break;
491 case MBUF_SC_CTL:
492 pri = FQ_IF_CTL_INDEX;
493 break;
494 default:
495 pri = FQ_IF_BE_INDEX; /* Use best effort by default */
496 break;
497 }
498 return pri;
499 }
500
501 void
fq_if_classq_init(fq_if_group_t * fqg,uint32_t pri,uint32_t quantum,uint32_t drr_max,uint32_t svc_class)502 fq_if_classq_init(fq_if_group_t *fqg, uint32_t pri, uint32_t quantum,
503 uint32_t drr_max, uint32_t svc_class)
504 {
505 fq_if_classq_t *fq_cl;
506 VERIFY(pri < FQ_IF_MAX_CLASSES);
507 fq_cl = &fqg->fqg_classq[pri];
508
509 VERIFY(fq_cl->fcl_quantum == 0);
510 VERIFY(quantum != 0);
511 fq_cl->fcl_quantum = quantum;
512 fq_cl->fcl_pri = pri;
513 fq_cl->fcl_drr_max = drr_max;
514 fq_cl->fcl_service_class = svc_class;
515 fq_cl->fcl_next_tx_time = 0;
516 fq_cl->fcl_flags = 0;
517 STAILQ_INIT(&fq_cl->fcl_new_flows);
518 STAILQ_INIT(&fq_cl->fcl_old_flows);
519 }
520
521 int
fq_if_enqueue_classq(struct ifclassq * ifq,classq_pkt_t * head,classq_pkt_t * tail,uint32_t cnt,uint32_t bytes,boolean_t * pdrop)522 fq_if_enqueue_classq(struct ifclassq *ifq, classq_pkt_t *head,
523 classq_pkt_t *tail, uint32_t cnt, uint32_t bytes, boolean_t *pdrop)
524 {
525 uint8_t pri, grp_idx = 0;
526 fq_if_t *fqs;
527 fq_if_classq_t *fq_cl;
528 fq_if_group_t *fq_group;
529 int ret;
530 mbuf_svc_class_t svc;
531 pktsched_pkt_t pkt;
532
533 pktsched_pkt_encap_chain(&pkt, head, tail, cnt, bytes);
534
535 fqs = (fq_if_t *)ifq->ifcq_disc;
536 svc = pktsched_get_pkt_svc(&pkt);
537 #if SKYWALK
538 if (head->cp_ptype == QP_PACKET) {
539 grp_idx = head->cp_kpkt->pkt_qset_idx;
540 }
541 #endif /* SKYWALK */
542 pri = fq_if_service_to_priority(fqs, svc);
543 VERIFY(pri < FQ_IF_MAX_CLASSES);
544
545 IFCQ_LOCK_SPIN(ifq);
546 fq_group = fq_if_find_grp(fqs, grp_idx);
547 fq_cl = &fq_group->fqg_classq[pri];
548
549 if (__improbable(svc == MBUF_SC_BK_SYS && fqs->fqs_throttle == 1)) {
550 IFCQ_UNLOCK(ifq);
551 /* BK_SYS is currently throttled */
552 os_atomic_inc(&fq_cl->fcl_stat.fcl_throttle_drops, relaxed);
553 pktsched_free_pkt(&pkt);
554 *pdrop = TRUE;
555 ret = EQSUSPENDED;
556 goto done;
557 }
558
559 ASSERT(pkt.pktsched_ptype == fqs->fqs_ptype);
560 ret = fq_addq(fqs, fq_group, &pkt, fq_cl);
561 if (!FQ_IF_CLASSQ_IDLE(fq_cl)) {
562 if (((fq_group->fqg_bitmaps[FQ_IF_ER] | fq_group->fqg_bitmaps[FQ_IF_EB]) &
563 (1 << pri)) == 0) {
564 /*
565 * this group is not in ER or EB groups,
566 * mark it as IB
567 */
568 pktsched_bit_set(pri, &fq_group->fqg_bitmaps[FQ_IF_IB]);
569 }
570 }
571
572 if (__improbable(ret != 0)) {
573 if (ret == CLASSQEQ_SUCCESS_FC) {
574 /* packet enqueued, return advisory feedback */
575 ret = EQFULL;
576 *pdrop = FALSE;
577 } else if (ret == CLASSQEQ_COMPRESSED) {
578 ret = 0;
579 *pdrop = FALSE;
580 } else {
581 IFCQ_UNLOCK(ifq);
582 *pdrop = TRUE;
583 pktsched_drop_pkt(&pkt, DROP_REASON_AQM_FULL, __func__,
584 __LINE__, 0);
585 switch (ret) {
586 case CLASSQEQ_DROP:
587 ret = ENOBUFS;
588 goto done;
589 case CLASSQEQ_DROP_FC:
590 ret = EQFULL;
591 goto done;
592 case CLASSQEQ_DROP_SP:
593 ret = EQSUSPENDED;
594 goto done;
595 default:
596 VERIFY(0);
597 /* NOTREACHED */
598 __builtin_unreachable();
599 }
600 /* NOTREACHED */
601 __builtin_unreachable();
602 }
603 } else {
604 *pdrop = FALSE;
605 }
606 IFCQ_ADD_LEN(ifq, cnt);
607 IFCQ_INC_BYTES(ifq, bytes);
608
609
610 FQS_GRP_ADD_LEN(fqs, grp_idx, cnt);
611 FQS_GRP_INC_BYTES(fqs, grp_idx, bytes);
612
613 IFCQ_UNLOCK(ifq);
614 done:
615 #if DEBUG || DEVELOPMENT
616 if (__improbable((ret == EQFULL) && (ifclassq_flow_control_adv == 0))) {
617 ret = 0;
618 }
619 #endif /* DEBUG || DEVELOPMENT */
620 return ret;
621 }
622
623 void
fq_if_dequeue_classq(struct ifclassq * ifq,classq_pkt_t * pkt,uint8_t grp_idx)624 fq_if_dequeue_classq(struct ifclassq *ifq, classq_pkt_t *pkt, uint8_t grp_idx)
625 {
626 (void) fq_if_dequeue_classq_multi(ifq, 1,
627 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, pkt, NULL, NULL, NULL, grp_idx);
628 }
629
630 void
fq_if_dequeue_sc_classq(struct ifclassq * ifq,mbuf_svc_class_t svc,classq_pkt_t * pkt,uint8_t grp_idx)631 fq_if_dequeue_sc_classq(struct ifclassq *ifq, mbuf_svc_class_t svc,
632 classq_pkt_t *pkt, uint8_t grp_idx)
633 {
634 (void) fq_if_dequeue_sc_classq_multi(ifq, svc, 1,
635 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, pkt, NULL, NULL, NULL, grp_idx);
636 }
637
638 static inline void
fq_dqlist_add(flowq_dqlist_t * fq_dqlist_head,fq_t * fq)639 fq_dqlist_add(flowq_dqlist_t *fq_dqlist_head, fq_t *fq)
640 {
641 ASSERT(fq->fq_dq_head.cp_mbuf == NULL);
642 ASSERT(!fq->fq_in_dqlist);
643 STAILQ_INSERT_TAIL(fq_dqlist_head, fq, fq_dqlink);
644 fq->fq_in_dqlist = true;
645 }
646
647 static inline void
fq_dqlist_remove(flowq_dqlist_t * fq_dqlist_head,fq_t * fq,classq_pkt_t * head,classq_pkt_t * tail,classq_pkt_type_t ptype)648 fq_dqlist_remove(flowq_dqlist_t *fq_dqlist_head, fq_t *fq, classq_pkt_t *head,
649 classq_pkt_t *tail, classq_pkt_type_t ptype)
650 {
651 ASSERT(fq->fq_in_dqlist);
652 if (fq->fq_dq_head.cp_mbuf == NULL) {
653 goto done;
654 }
655
656 if (head->cp_mbuf == NULL) {
657 *head = fq->fq_dq_head;
658 } else {
659 ASSERT(tail->cp_mbuf != NULL);
660
661 switch (ptype) {
662 case QP_MBUF:
663 ASSERT(tail->cp_mbuf->m_nextpkt == NULL);
664 tail->cp_mbuf->m_nextpkt = fq->fq_dq_head.cp_mbuf;
665 ASSERT(fq->fq_dq_tail.cp_mbuf->m_nextpkt == NULL);
666 break;
667 #if SKYWALK
668 case QP_PACKET:
669 ASSERT(tail->cp_kpkt->pkt_nextpkt == NULL);
670 tail->cp_kpkt->pkt_nextpkt = fq->fq_dq_head.cp_kpkt;
671 ASSERT(fq->fq_dq_tail.cp_kpkt->pkt_nextpkt == NULL);
672 break;
673 #endif /* SKYWALK */
674 default:
675 VERIFY(0);
676 /* NOTREACHED */
677 __builtin_unreachable();
678 }
679 }
680 *tail = fq->fq_dq_tail;
681 done:
682 STAILQ_REMOVE(fq_dqlist_head, fq, flowq, fq_dqlink);
683 CLASSQ_PKT_INIT(&fq->fq_dq_head);
684 CLASSQ_PKT_INIT(&fq->fq_dq_tail);
685 fq->fq_in_dqlist = false;
686 }
687
688 static inline void
fq_dqlist_get_packet_list(flowq_dqlist_t * fq_dqlist_head,classq_pkt_t * head,classq_pkt_t * tail,classq_pkt_type_t ptype)689 fq_dqlist_get_packet_list(flowq_dqlist_t *fq_dqlist_head, classq_pkt_t *head,
690 classq_pkt_t *tail, classq_pkt_type_t ptype)
691 {
692 fq_t *fq, *tfq;
693
694 STAILQ_FOREACH_SAFE(fq, fq_dqlist_head, fq_dqlink, tfq) {
695 fq_dqlist_remove(fq_dqlist_head, fq, head, tail, ptype);
696 }
697 }
698
699 static int
fq_if_grps_bitmap_ffs(fq_grp_tailq_t * grp_list,int pri,fq_if_state state,fq_if_group_t ** selected_grp)700 fq_if_grps_bitmap_ffs(fq_grp_tailq_t *grp_list, int pri, fq_if_state state,
701 fq_if_group_t **selected_grp)
702 {
703 #pragma unused(pri)
704
705 fq_if_group_t *grp;
706 uint32_t highest_pri = FQ_IF_MAX_CLASSES;
707 int ret_pri = 0;
708
709 TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
710 uint32_t cur_pri = pktsched_ffs(grp->fqg_bitmaps[state]);
711 /* bitmap is empty in this case */
712 if (cur_pri == 0) {
713 continue;
714 }
715 if (cur_pri <= highest_pri) {
716 highest_pri = cur_pri;
717 ret_pri = cur_pri;
718 *selected_grp = grp;
719 }
720 }
721 return ret_pri;
722 }
723
724 static boolean_t
fq_if_grps_bitmap_zeros(fq_grp_tailq_t * grp_list,int pri,fq_if_state state)725 fq_if_grps_bitmap_zeros(fq_grp_tailq_t *grp_list, int pri, fq_if_state state)
726 {
727 #pragma unused(pri)
728
729 fq_if_group_t *grp;
730
731 TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
732 if (grp->fqg_bitmaps[state] != 0) {
733 return FALSE;
734 }
735 }
736 return TRUE;
737 }
738
739 static void
fq_if_grps_bitmap_cpy(fq_grp_tailq_t * grp_list,int pri,fq_if_state dst_state,fq_if_state src_state)740 fq_if_grps_bitmap_cpy(fq_grp_tailq_t *grp_list, int pri, fq_if_state dst_state,
741 fq_if_state src_state)
742 {
743 #pragma unused(pri)
744
745 fq_if_group_t *grp;
746 TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
747 grp->fqg_bitmaps[dst_state] = grp->fqg_bitmaps[src_state];
748 }
749 }
750
751 static void
fq_if_grps_bitmap_clr(fq_grp_tailq_t * grp_list,int pri,fq_if_state state)752 fq_if_grps_bitmap_clr(fq_grp_tailq_t *grp_list, int pri, fq_if_state state)
753 {
754 #pragma unused(pri)
755
756 fq_if_group_t *grp;
757 TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
758 grp->fqg_bitmaps[state] = 0;
759 }
760 }
761
762 static void
fq_if_grps_bitmap_move(fq_grp_tailq_t * grp_list,int pri,fq_if_state dst_state,fq_if_state src_state)763 fq_if_grps_bitmap_move(fq_grp_tailq_t *grp_list, int pri, fq_if_state dst_state,
764 fq_if_state src_state)
765 {
766 #pragma unused(pri)
767
768 fq_if_group_t *grp;
769 TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
770 grp->fqg_bitmaps[dst_state] =
771 grp->fqg_bitmaps[dst_state] | grp->fqg_bitmaps[src_state];
772 grp->fqg_bitmaps[src_state] = 0;
773 }
774 }
775
776 static int
fq_if_grps_sc_bitmap_ffs(fq_grp_tailq_t * grp_list,int pri,fq_if_state state,fq_if_group_t ** selected_grp)777 fq_if_grps_sc_bitmap_ffs(fq_grp_tailq_t *grp_list, int pri, fq_if_state state,
778 fq_if_group_t **selected_grp)
779 {
780 fq_if_group_t *grp;
781 int ret_pri = 0;
782
783 TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
784 if (pktsched_bit_tst(pri, &grp->fqg_bitmaps[state])) {
785 /* +1 to match the semantics of pktsched_ffs */
786 ret_pri = pri + 1;
787 *selected_grp = grp;
788 break;
789 }
790 }
791
792 return ret_pri;
793 }
794
795 static boolean_t
fq_if_grps_sc_bitmap_zeros(fq_grp_tailq_t * grp_list,int pri,fq_if_state state)796 fq_if_grps_sc_bitmap_zeros(fq_grp_tailq_t *grp_list, int pri, fq_if_state state)
797 {
798 fq_if_group_t *grp;
799
800 TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
801 if (pktsched_bit_tst(pri, &grp->fqg_bitmaps[state])) {
802 return FALSE;
803 }
804 }
805 return TRUE;
806 }
807
808 static void
fq_if_grps_sc_bitmap_cpy(fq_grp_tailq_t * grp_list,int pri,fq_if_state dst_state,fq_if_state src_state)809 fq_if_grps_sc_bitmap_cpy(fq_grp_tailq_t *grp_list, int pri, fq_if_state dst_state,
810 fq_if_state src_state)
811 {
812 fq_if_group_t *grp;
813
814 TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
815 pktsched_bit_cpy(pri, &grp->fqg_bitmaps[dst_state],
816 &grp->fqg_bitmaps[src_state]);
817 }
818 }
819
820 static void
fq_if_grps_sc_bitmap_clr(fq_grp_tailq_t * grp_list,int pri,fq_if_state state)821 fq_if_grps_sc_bitmap_clr(fq_grp_tailq_t *grp_list, int pri, fq_if_state state)
822 {
823 fq_if_group_t *grp;
824
825 TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
826 pktsched_bit_clr(pri, &grp->fqg_bitmaps[state]);
827 }
828 }
829
830 static void
fq_if_grps_sc_bitmap_move(fq_grp_tailq_t * grp_list,int pri,fq_if_state dst_state,fq_if_state src_state)831 fq_if_grps_sc_bitmap_move(fq_grp_tailq_t *grp_list, int pri, fq_if_state dst_state,
832 fq_if_state src_state)
833 {
834 fq_if_group_t *grp;
835
836 TAILQ_FOREACH(grp, grp_list, fqg_grp_link) {
837 pktsched_bit_move(pri, &grp->fqg_bitmaps[dst_state],
838 &grp->fqg_bitmaps[src_state]);
839 pktsched_bit_clr(pri, &grp->fqg_bitmaps[src_state]);
840 }
841 }
842
843 /*
844 * Pacemaker is only scheduled when no packet can be dequeued from AQM
845 * due to pacing. Pacemaker will doorbell the driver when current >= next_tx_time.
846 * This only applies to L4S traffic at this moment.
847 */
848 static void
fq_if_schedule_pacemaker(fq_if_t * fqs,uint64_t now,uint64_t next_tx_time)849 fq_if_schedule_pacemaker(fq_if_t *fqs, uint64_t now, uint64_t next_tx_time)
850 {
851 uint64_t deadline = 0;
852 if (!ifclassq_enable_pacing || !ifclassq_enable_l4s) {
853 return;
854 }
855 ASSERT(next_tx_time != FQ_INVALID_TX_TS);
856 ASSERT(fqs->fqs_pacemaker_tcall != NULL);
857 ASSERT(now < next_tx_time);
858
859 DTRACE_SKYWALK2(pacemaker__schedule, struct ifnet*, fqs->fqs_ifq->ifcq_ifp,
860 uint64_t, next_tx_time - now);
861 KDBG(AQM_KTRACE_TX_PACEMAKER, fqs->fqs_ifq->ifcq_ifp->if_index, now,
862 next_tx_time, next_tx_time - now);
863
864 clock_interval_to_deadline((uint32_t)(next_tx_time - now), 1, &deadline);
865 thread_call_enter_delayed(fqs->fqs_pacemaker_tcall, deadline);
866 }
867
868 static int
fq_if_dequeue_classq_multi_common(struct ifclassq * ifq,mbuf_svc_class_t svc,u_int32_t maxpktcnt,u_int32_t maxbytecnt,classq_pkt_t * first_packet,classq_pkt_t * last_packet,u_int32_t * retpktcnt,u_int32_t * retbytecnt,uint8_t grp_idx)869 fq_if_dequeue_classq_multi_common(struct ifclassq *ifq, mbuf_svc_class_t svc,
870 u_int32_t maxpktcnt, u_int32_t maxbytecnt, classq_pkt_t *first_packet,
871 classq_pkt_t *last_packet, u_int32_t *retpktcnt, u_int32_t *retbytecnt,
872 uint8_t grp_idx)
873 {
874 uint32_t total_pktcnt = 0, total_bytecnt = 0;
875 classq_pkt_t first = CLASSQ_PKT_INITIALIZER(fisrt);
876 classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
877 classq_pkt_t tmp = CLASSQ_PKT_INITIALIZER(tmp);
878 fq_if_append_pkt_t append_pkt;
879 flowq_dqlist_t fq_dqlist_head;
880 fq_if_classq_t *fq_cl;
881 fq_grp_tailq_t *grp_list, tmp_grp_list;
882 fq_if_group_t *__single fq_grp = NULL;
883 fq_if_t *fqs;
884 uint64_t now, next_tx_time = FQ_INVALID_TX_TS;
885 int pri = 0, svc_pri = 0;
886 bool all_paced = true;
887
888 IFCQ_LOCK_ASSERT_HELD(ifq);
889
890 fqs = (fq_if_t *)ifq->ifcq_disc;
891 STAILQ_INIT(&fq_dqlist_head);
892
893 switch (fqs->fqs_ptype) {
894 case QP_MBUF:
895 append_pkt = fq_if_append_mbuf;
896 break;
897
898 #if SKYWALK
899 case QP_PACKET:
900 append_pkt = fq_if_append_pkt;
901 break;
902 #endif /* SKYWALK */
903
904 default:
905 VERIFY(0);
906 /* NOTREACHED */
907 __builtin_unreachable();
908 }
909
910 now = fq_codel_get_time();
911 if (fqs->fqs_flags & FQS_DRIVER_MANAGED) {
912 svc_pri = fq_if_service_to_priority(fqs, svc);
913 } else {
914 VERIFY(svc == MBUF_SC_UNSPEC);
915 }
916
917 if (fq_if_is_grp_combined(fqs, grp_idx)) {
918 grp_list = &fqs->fqs_combined_grp_list;
919 VERIFY(!TAILQ_EMPTY(grp_list));
920 } else {
921 grp_list = &tmp_grp_list;
922 fq_grp = fq_if_find_grp(fqs, grp_idx);
923 TAILQ_INIT(grp_list);
924 TAILQ_INSERT_TAIL(grp_list, fq_grp, fqg_grp_link);
925 }
926
927 for (;;) {
928 uint32_t pktcnt = 0, bytecnt = 0;
929 classq_pkt_t head = CLASSQ_PKT_INITIALIZER(head);
930 classq_pkt_t tail = CLASSQ_PKT_INITIALIZER(tail);
931 bool fq_cl_all_paced = false;
932 uint64_t fq_cl_next_tx_time = FQ_INVALID_TX_TS;
933
934 if (fqs->grp_bitmaps_zeros(grp_list, svc_pri, FQ_IF_ER) &&
935 fqs->grp_bitmaps_zeros(grp_list, svc_pri, FQ_IF_EB)) {
936 fqs->grp_bitmaps_cpy(grp_list, svc_pri, FQ_IF_EB, FQ_IF_IB);
937 fqs->grp_bitmaps_clr(grp_list, svc_pri, FQ_IF_IB);
938 if (fqs->grp_bitmaps_zeros(grp_list, svc_pri, FQ_IF_EB)) {
939 if (ifclassq_enable_pacing && ifclassq_enable_l4s) {
940 /*
941 * Move fq_cl in IR back to ER, so that they will inspected with priority
942 * the next time the driver dequeues
943 */
944 fqs->grp_bitmaps_cpy(grp_list, svc_pri, FQ_IF_ER, FQ_IF_IR);
945 fqs->grp_bitmaps_clr(grp_list, svc_pri, FQ_IF_IR);
946 }
947 break;
948 }
949 }
950 pri = fqs->grp_bitmaps_ffs(grp_list, svc_pri, FQ_IF_ER, &fq_grp);
951 if (pri == 0) {
952 /*
953 * There are no ER flows, move the highest
954 * priority one from EB if there are any in that
955 * category
956 */
957 pri = fqs->grp_bitmaps_ffs(grp_list, svc_pri, FQ_IF_EB, &fq_grp);
958 VERIFY(pri > 0);
959 VERIFY(fq_grp != NULL);
960 pktsched_bit_clr((pri - 1), &fq_grp->fqg_bitmaps[FQ_IF_EB]);
961 pktsched_bit_set((pri - 1), &fq_grp->fqg_bitmaps[FQ_IF_ER]);
962 }
963 VERIFY(fq_grp != NULL);
964 pri--; /* index starts at 0 */
965 fq_cl = &fq_grp->fqg_classq[pri];
966
967 if (fq_cl->fcl_budget <= 0) {
968 /* Update the budget */
969 fq_cl->fcl_budget += (min(fq_cl->fcl_drr_max,
970 fq_cl->fcl_stat.fcl_flows_cnt) *
971 fq_cl->fcl_quantum);
972 if (fq_cl->fcl_budget <= 0) {
973 goto state_change;
974 }
975 }
976 fq_if_dequeue(fqs, fq_cl, (maxpktcnt - total_pktcnt),
977 (maxbytecnt - total_bytecnt), &head, &tail, &pktcnt,
978 &bytecnt, &fq_dqlist_head, true, now, &fq_cl_all_paced,
979 &fq_cl_next_tx_time);
980 if (head.cp_mbuf != NULL) {
981 ASSERT(STAILQ_EMPTY(&fq_dqlist_head));
982 if (first.cp_mbuf == NULL) {
983 first = head;
984 } else {
985 ASSERT(last.cp_mbuf != NULL);
986 append_pkt(&last, &head);
987 }
988 last = tail;
989 append_pkt(&last, &tmp);
990 }
991 if (fq_cl_all_paced && fq_cl_next_tx_time < next_tx_time) {
992 fq_cl->fcl_stat.fcl_fcl_pacemaker_needed++;
993 next_tx_time = fq_cl_next_tx_time;
994 }
995 fq_cl->fcl_budget -= bytecnt;
996 total_pktcnt += pktcnt;
997 total_bytecnt += bytecnt;
998
999 /*
1000 * If the class has exceeded the budget but still has data
1001 * to send, move it to IB
1002 */
1003 state_change:
1004 VERIFY(fq_grp != NULL);
1005 all_paced &= fq_cl_all_paced;
1006 if (!FQ_IF_CLASSQ_IDLE(fq_cl)) {
1007 if (fq_cl->fcl_budget <= 0) {
1008 pktsched_bit_set(pri, &fq_grp->fqg_bitmaps[FQ_IF_IB]);
1009 pktsched_bit_clr(pri, &fq_grp->fqg_bitmaps[FQ_IF_ER]);
1010 } else if (fq_cl_all_paced) {
1011 if (ifclassq_enable_pacing && ifclassq_enable_l4s) {
1012 /*
1013 * If a fq_cl still has budget but only paced queues, park it
1014 * to IR so that we will not keep loopping over it
1015 */
1016 pktsched_bit_set(pri, &fq_grp->fqg_bitmaps[FQ_IF_IR]);
1017 pktsched_bit_clr(pri, &fq_grp->fqg_bitmaps[FQ_IF_ER]);
1018 }
1019 }
1020 } else {
1021 pktsched_bit_clr(pri, &fq_grp->fqg_bitmaps[FQ_IF_ER]);
1022 VERIFY(((fq_grp->fqg_bitmaps[FQ_IF_ER] |
1023 fq_grp->fqg_bitmaps[FQ_IF_EB] |
1024 fq_grp->fqg_bitmaps[FQ_IF_IB]) & (1 << pri)) == 0);
1025 fq_cl->fcl_budget = 0;
1026 }
1027 if (total_pktcnt >= maxpktcnt || total_bytecnt >= maxbytecnt) {
1028 if (ifclassq_enable_pacing && ifclassq_enable_l4s) {
1029 /*
1030 * Move fq_cl in IR back to ER, so that they will inspected with priority
1031 * the next time the driver dequeues
1032 */
1033 fqs->grp_bitmaps_move(grp_list, svc_pri, FQ_IF_ER, FQ_IF_IR);
1034 }
1035 break;
1036 }
1037 }
1038
1039 if (!fq_if_is_grp_combined(fqs, grp_idx)) {
1040 TAILQ_REMOVE(grp_list, fq_grp, fqg_grp_link);
1041 VERIFY(TAILQ_EMPTY(grp_list));
1042 }
1043
1044 fq_dqlist_get_packet_list(&fq_dqlist_head, &first, &last,
1045 fqs->fqs_ptype);
1046
1047 if (__probable(first_packet != NULL)) {
1048 *first_packet = first;
1049 }
1050 if (last_packet != NULL) {
1051 *last_packet = last;
1052 }
1053 if (retpktcnt != NULL) {
1054 *retpktcnt = total_pktcnt;
1055 }
1056 if (retbytecnt != NULL) {
1057 *retbytecnt = total_bytecnt;
1058 }
1059 if (next_tx_time != FQ_INVALID_TX_TS) {
1060 ASSERT(next_tx_time > now);
1061 fq_if_schedule_pacemaker(fqs, now, next_tx_time);
1062 }
1063
1064 IFCQ_XMIT_ADD(ifq, total_pktcnt, total_bytecnt);
1065 fq_if_purge_empty_flow_list(fqs, now, false);
1066 return 0;
1067 }
1068
1069 int
fq_if_dequeue_classq_multi(struct ifclassq * ifq,u_int32_t maxpktcnt,u_int32_t maxbytecnt,classq_pkt_t * first_packet,classq_pkt_t * last_packet,u_int32_t * retpktcnt,u_int32_t * retbytecnt,uint8_t grp_idx)1070 fq_if_dequeue_classq_multi(struct ifclassq *ifq, u_int32_t maxpktcnt,
1071 u_int32_t maxbytecnt, classq_pkt_t *first_packet,
1072 classq_pkt_t *last_packet, u_int32_t *retpktcnt,
1073 u_int32_t *retbytecnt, uint8_t grp_idx)
1074 {
1075 return fq_if_dequeue_classq_multi_common(ifq, MBUF_SC_UNSPEC, maxpktcnt, maxbytecnt,
1076 first_packet, last_packet, retpktcnt, retbytecnt, grp_idx);
1077 }
1078
1079 int
fq_if_dequeue_sc_classq_multi(struct ifclassq * ifq,mbuf_svc_class_t svc,u_int32_t maxpktcnt,u_int32_t maxbytecnt,classq_pkt_t * first_packet,classq_pkt_t * last_packet,u_int32_t * retpktcnt,u_int32_t * retbytecnt,uint8_t grp_idx)1080 fq_if_dequeue_sc_classq_multi(struct ifclassq *ifq, mbuf_svc_class_t svc,
1081 u_int32_t maxpktcnt, u_int32_t maxbytecnt, classq_pkt_t *first_packet,
1082 classq_pkt_t *last_packet, u_int32_t *retpktcnt, u_int32_t *retbytecnt,
1083 uint8_t grp_idx)
1084 {
1085 fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
1086
1087 if (fq_if_is_grp_combined(fqs, grp_idx)) {
1088 return fq_if_dequeue_classq_multi_common(ifq, svc, maxpktcnt, maxbytecnt,
1089 first_packet, last_packet, retpktcnt, retbytecnt, grp_idx);
1090 } else {
1091 /*
1092 * take a shortcut here since there is no need to schedule
1093 * one single service class.
1094 */
1095 return fq_if_dequeue_sc_classq_multi_separate(ifq, svc, maxpktcnt, maxbytecnt,
1096 first_packet, last_packet, retpktcnt, retbytecnt, grp_idx);
1097 }
1098 }
1099
1100 static int
fq_if_dequeue_sc_classq_multi_separate(struct ifclassq * ifq,mbuf_svc_class_t svc,u_int32_t maxpktcnt,u_int32_t maxbytecnt,classq_pkt_t * first_packet,classq_pkt_t * last_packet,u_int32_t * retpktcnt,u_int32_t * retbytecnt,uint8_t grp_idx)1101 fq_if_dequeue_sc_classq_multi_separate(struct ifclassq *ifq, mbuf_svc_class_t svc,
1102 u_int32_t maxpktcnt, u_int32_t maxbytecnt, classq_pkt_t *first_packet,
1103 classq_pkt_t *last_packet, u_int32_t *retpktcnt, u_int32_t *retbytecnt,
1104 uint8_t grp_idx)
1105 {
1106 fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
1107 uint8_t pri;
1108 u_int32_t total_pktcnt = 0, total_bytecnt = 0;
1109 fq_if_classq_t *fq_cl;
1110 classq_pkt_t first = CLASSQ_PKT_INITIALIZER(fisrt);
1111 classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
1112 fq_if_append_pkt_t append_pkt;
1113 flowq_dqlist_t fq_dqlist_head;
1114 fq_if_group_t *fq_grp;
1115 uint64_t now;
1116
1117 switch (fqs->fqs_ptype) {
1118 case QP_MBUF:
1119 append_pkt = fq_if_append_mbuf;
1120 break;
1121
1122 #if SKYWALK
1123 case QP_PACKET:
1124 append_pkt = fq_if_append_pkt;
1125 break;
1126 #endif /* SKYWALK */
1127
1128 default:
1129 VERIFY(0);
1130 /* NOTREACHED */
1131 __builtin_unreachable();
1132 }
1133
1134 STAILQ_INIT(&fq_dqlist_head);
1135 now = fq_codel_get_time();
1136
1137 pri = fq_if_service_to_priority(fqs, svc);
1138 fq_grp = fq_if_find_grp(fqs, grp_idx);
1139 fq_cl = &fq_grp->fqg_classq[pri];
1140
1141 /*
1142 * Now we have the queue for a particular service class. We need
1143 * to dequeue as many packets as needed, first from the new flows
1144 * and then from the old flows.
1145 */
1146 while (total_pktcnt < maxpktcnt && total_bytecnt < maxbytecnt &&
1147 fq_cl->fcl_stat.fcl_pkt_cnt > 0) {
1148 classq_pkt_t head = CLASSQ_PKT_INITIALIZER(head);
1149 classq_pkt_t tail = CLASSQ_PKT_INITIALIZER(tail);
1150 u_int32_t pktcnt = 0, bytecnt = 0;
1151 bool all_paced = false;
1152 uint64_t next_tx_time = FQ_INVALID_TX_TS;
1153
1154 fq_if_dequeue(fqs, fq_cl, (maxpktcnt - total_pktcnt),
1155 (maxbytecnt - total_bytecnt), &head, &tail, &pktcnt,
1156 &bytecnt, &fq_dqlist_head, false, now, &all_paced, &next_tx_time);
1157 if (head.cp_mbuf != NULL) {
1158 if (first.cp_mbuf == NULL) {
1159 first = head;
1160 } else {
1161 ASSERT(last.cp_mbuf != NULL);
1162 append_pkt(&last, &head);
1163 }
1164 last = tail;
1165 }
1166 total_pktcnt += pktcnt;
1167 total_bytecnt += bytecnt;
1168
1169 if (next_tx_time != FQ_INVALID_TX_TS) {
1170 ASSERT(next_tx_time > now);
1171 fq_cl->fcl_stat.fcl_fcl_pacemaker_needed++;
1172 fq_if_schedule_pacemaker(fqs, now, next_tx_time);
1173 break;
1174 }
1175 }
1176
1177 /*
1178 * Mark classq as IB if it's not idle, so that we can
1179 * start without re-init the bitmaps when it's switched
1180 * to combined mode.
1181 */
1182 if (!FQ_IF_CLASSQ_IDLE(fq_cl)) {
1183 pktsched_bit_set(pri, &fq_grp->fqg_bitmaps[FQ_IF_IB]);
1184 pktsched_bit_clr(pri, &fq_grp->fqg_bitmaps[FQ_IF_ER]);
1185 pktsched_bit_clr(pri, &fq_grp->fqg_bitmaps[FQ_IF_EB]);
1186 } else {
1187 pktsched_bit_clr(pri, &fq_grp->fqg_bitmaps[FQ_IF_IB]);
1188 VERIFY(((fq_grp->fqg_bitmaps[FQ_IF_ER] |
1189 fq_grp->fqg_bitmaps[FQ_IF_EB] |
1190 fq_grp->fqg_bitmaps[FQ_IF_IB]) & (1 << pri)) == 0);
1191 }
1192
1193 fq_dqlist_get_packet_list(&fq_dqlist_head, &first, &last, fqs->fqs_ptype);
1194
1195 if (__probable(first_packet != NULL)) {
1196 *first_packet = first;
1197 }
1198 if (last_packet != NULL) {
1199 *last_packet = last;
1200 }
1201 if (retpktcnt != NULL) {
1202 *retpktcnt = total_pktcnt;
1203 }
1204 if (retbytecnt != NULL) {
1205 *retbytecnt = total_bytecnt;
1206 }
1207
1208 IFCQ_XMIT_ADD(ifq, total_pktcnt, total_bytecnt);
1209 fq_if_purge_empty_flow_list(fqs, now, false);
1210 return 0;
1211 }
1212
1213 static void
fq_if_purge_flow(fq_if_t * fqs,fq_t * fq,uint32_t * pktsp,uint32_t * bytesp,uint64_t now)1214 fq_if_purge_flow(fq_if_t *fqs, fq_t *fq, uint32_t *pktsp,
1215 uint32_t *bytesp, uint64_t now)
1216 {
1217 fq_if_classq_t *fq_cl;
1218 u_int32_t pkts, bytes;
1219 pktsched_pkt_t pkt;
1220 fq_if_group_t *grp;
1221
1222 fq_cl = &FQ_CLASSQ(fq);
1223 grp = FQ_GROUP(fq);
1224 pkts = bytes = 0;
1225 _PKTSCHED_PKT_INIT(&pkt);
1226 for (;;) {
1227 fq_getq_flow(fqs, fq, &pkt, now);
1228 if (pkt.pktsched_pkt_mbuf == NULL) {
1229 VERIFY(pkt.pktsched_ptype == QP_INVALID);
1230 break;
1231 }
1232 pkts++;
1233 bytes += pktsched_get_pkt_len(&pkt);
1234 pktsched_free_pkt(&pkt);
1235 }
1236 KDBG(AQM_KTRACE_STATS_FLOW_DEQUEUE, fq->fq_flowhash,
1237 AQM_KTRACE_FQ_GRP_SC_IDX(fq), fq->fq_bytes, fq->fq_min_qdelay);
1238
1239 IFCQ_DROP_ADD(fqs->fqs_ifq, pkts, bytes);
1240
1241 /* move through the flow queue states */
1242 VERIFY((fq->fq_flags & (FQF_NEW_FLOW | FQF_OLD_FLOW | FQF_EMPTY_FLOW)));
1243 if (fq->fq_flags & FQF_NEW_FLOW) {
1244 fq_if_empty_new_flow(fq, fq_cl);
1245 }
1246 if (fq->fq_flags & FQF_OLD_FLOW) {
1247 fq_if_empty_old_flow(fqs, fq_cl, fq, now);
1248 }
1249 if (fq->fq_flags & FQF_EMPTY_FLOW) {
1250 fq_if_purge_empty_flow(fqs, fq);
1251 fq = NULL;
1252 }
1253
1254 if (FQ_IF_CLASSQ_IDLE(fq_cl)) {
1255 int i;
1256 for (i = FQ_IF_ER; i < FQ_IF_MAX_STATE; i++) {
1257 pktsched_bit_clr(fq_cl->fcl_pri, &grp->fqg_bitmaps[i]);
1258 }
1259 }
1260
1261 if (pktsp != NULL) {
1262 *pktsp = pkts;
1263 }
1264 if (bytesp != NULL) {
1265 *bytesp = bytes;
1266 }
1267 }
1268
1269 static void
fq_if_purge_classq(fq_if_t * fqs,fq_if_classq_t * fq_cl)1270 fq_if_purge_classq(fq_if_t *fqs, fq_if_classq_t *fq_cl)
1271 {
1272 fq_t *fq, *tfq;
1273 uint64_t now;
1274
1275 now = fq_codel_get_time();
1276 /*
1277 * Take each flow from new/old flow list and flush mbufs
1278 * in that flow
1279 */
1280 STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_new_flows, fq_actlink, tfq) {
1281 fq_if_purge_flow(fqs, fq, NULL, NULL, now);
1282 }
1283 STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_old_flows, fq_actlink, tfq) {
1284 fq_if_purge_flow(fqs, fq, NULL, NULL, now);
1285 }
1286 VERIFY(STAILQ_EMPTY(&fq_cl->fcl_new_flows));
1287 VERIFY(STAILQ_EMPTY(&fq_cl->fcl_old_flows));
1288
1289 STAILQ_INIT(&fq_cl->fcl_new_flows);
1290 STAILQ_INIT(&fq_cl->fcl_old_flows);
1291 fq_cl->fcl_budget = 0;
1292 }
1293
1294 static void
fq_if_purge(fq_if_t * fqs)1295 fq_if_purge(fq_if_t *fqs)
1296 {
1297 uint64_t now;
1298 fq_if_group_t *grp;
1299 int i;
1300
1301 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
1302 for (uint8_t grp_idx = 0; grp_idx < FQ_IF_MAX_GROUPS; grp_idx++) {
1303 if (fqs->fqs_classq_groups[grp_idx] == NULL) {
1304 continue;
1305 }
1306
1307 grp = fq_if_find_grp(fqs, grp_idx);
1308 fq_if_purge_grp(fqs, grp);
1309 }
1310
1311 now = fq_codel_get_time();
1312 fq_if_purge_empty_flow_list(fqs, now, true);
1313
1314 VERIFY(STAILQ_EMPTY(&fqs->fqs_fclist));
1315 VERIFY(TAILQ_EMPTY(&fqs->fqs_empty_list));
1316
1317 fqs->fqs_large_flow = NULL;
1318 for (i = 0; i < fqs->fqs_flows_count; i++) {
1319 VERIFY(LIST_EMPTY(&fqs->fqs_flows[i]));
1320 }
1321
1322 IFCQ_LEN(fqs->fqs_ifq) = 0;
1323 IFCQ_BYTES(fqs->fqs_ifq) = 0;
1324 }
1325
1326 static void
fq_if_purge_sc(fq_if_t * fqs,cqrq_purge_sc_t * req)1327 fq_if_purge_sc(fq_if_t *fqs, cqrq_purge_sc_t *req)
1328 {
1329 fq_t *fq;
1330 uint64_t now;
1331 fq_if_group_t *grp;
1332
1333 IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq);
1334 req->packets = req->bytes = 0;
1335 VERIFY(req->flow != 0);
1336
1337 now = fq_codel_get_time();
1338
1339 for (uint8_t grp_idx = 0; grp_idx < FQ_IF_MAX_GROUPS; grp_idx++) {
1340 if (fqs->fqs_classq_groups[grp_idx] == NULL) {
1341 continue;
1342 }
1343 uint32_t bytes = 0, pkts = 0;
1344
1345 grp = fq_if_find_grp(fqs, grp_idx);
1346 /*
1347 * Packet and traffic type are needed only if we want
1348 * to create a flow queue.
1349 */
1350 fq = fq_if_hash_pkt(fqs, grp, req->flow, req->sc, 0, false, FQ_TFC_C);
1351 if (fq != NULL) {
1352 fq_if_purge_flow(fqs, fq, &pkts, &bytes, now);
1353 req->bytes += bytes;
1354 req->packets += pkts;
1355 }
1356 }
1357 }
1358
1359 static uint32_t
fq_if_calc_quantum(struct ifnet * ifp)1360 fq_if_calc_quantum(struct ifnet *ifp)
1361 {
1362 uint32_t quantum;
1363
1364 switch (ifp->if_family) {
1365 case IFNET_FAMILY_ETHERNET:
1366 VERIFY(ifp->if_mtu <= IF_MAXMTU);
1367 quantum = ifp->if_mtu + ETHER_HDR_LEN;
1368 break;
1369
1370 case IFNET_FAMILY_CELLULAR:
1371 case IFNET_FAMILY_IPSEC:
1372 case IFNET_FAMILY_UTUN:
1373 VERIFY(ifp->if_mtu <= UINT16_MAX);
1374 quantum = ifp->if_mtu;
1375 break;
1376
1377 default:
1378 quantum = FQ_CODEL_DEFAULT_QUANTUM;
1379 break;
1380 }
1381
1382 if ((ifp->if_hwassist & IFNET_TSOF) != 0) {
1383 VERIFY(ifp->if_tso_v4_mtu <= UINT16_MAX);
1384 VERIFY(ifp->if_tso_v6_mtu <= UINT16_MAX);
1385 quantum = MAX(ifp->if_tso_v4_mtu, ifp->if_tso_v6_mtu);
1386 quantum = (quantum != 0) ? quantum : IF_MAXMTU;
1387 }
1388
1389 quantum = MAX(FQ_CODEL_DEFAULT_QUANTUM, quantum);
1390 #if DEBUG || DEVELOPMENT
1391 quantum = (fq_codel_quantum != 0) ? fq_codel_quantum : quantum;
1392 #endif /* DEBUG || DEVELOPMENT */
1393 VERIFY(quantum != 0);
1394 return quantum;
1395 }
1396
1397 static void
fq_if_mtu_update(fq_if_t * fqs)1398 fq_if_mtu_update(fq_if_t *fqs)
1399 {
1400 #define _FQ_CLASSQ_UPDATE_QUANTUM(_grp, _s, _q) \
1401 (_grp)->fqg_classq[FQ_IF_ ## _s ## _INDEX].fcl_quantum = \
1402 FQ_CODEL_QUANTUM_ ## _s(_q) \
1403
1404 uint32_t quantum;
1405 fq_if_group_t *grp;
1406
1407 quantum = fq_if_calc_quantum(fqs->fqs_ifq->ifcq_ifp);
1408
1409 for (uint8_t grp_idx = 0; grp_idx < FQ_IF_MAX_GROUPS; grp_idx++) {
1410 if (fqs->fqs_classq_groups[grp_idx] == NULL) {
1411 continue;
1412 }
1413
1414 grp = fq_if_find_grp(fqs, grp_idx);
1415
1416 if ((fqs->fqs_flags & FQS_DRIVER_MANAGED) != 0) {
1417 _FQ_CLASSQ_UPDATE_QUANTUM(grp, BK, quantum);
1418 _FQ_CLASSQ_UPDATE_QUANTUM(grp, BE, quantum);
1419 _FQ_CLASSQ_UPDATE_QUANTUM(grp, VI, quantum);
1420 _FQ_CLASSQ_UPDATE_QUANTUM(grp, VO, quantum);
1421 } else {
1422 _FQ_CLASSQ_UPDATE_QUANTUM(grp, BK_SYS, quantum);
1423 _FQ_CLASSQ_UPDATE_QUANTUM(grp, BK, quantum);
1424 _FQ_CLASSQ_UPDATE_QUANTUM(grp, BE, quantum);
1425 _FQ_CLASSQ_UPDATE_QUANTUM(grp, RD, quantum);
1426 _FQ_CLASSQ_UPDATE_QUANTUM(grp, OAM, quantum);
1427 _FQ_CLASSQ_UPDATE_QUANTUM(grp, AV, quantum);
1428 _FQ_CLASSQ_UPDATE_QUANTUM(grp, RV, quantum);
1429 _FQ_CLASSQ_UPDATE_QUANTUM(grp, VI, quantum);
1430 _FQ_CLASSQ_UPDATE_QUANTUM(grp, VO, quantum);
1431 _FQ_CLASSQ_UPDATE_QUANTUM(grp, CTL, quantum);
1432 }
1433 }
1434 #undef _FQ_CLASSQ_UPDATE_QUANTUM
1435 }
1436
1437 static void
fq_if_event(fq_if_t * fqs,cqev_t ev)1438 fq_if_event(fq_if_t *fqs, cqev_t ev)
1439 {
1440 IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq);
1441
1442 switch (ev) {
1443 case CLASSQ_EV_LINK_UP:
1444 case CLASSQ_EV_LINK_DOWN:
1445 fq_if_purge(fqs);
1446 break;
1447 case CLASSQ_EV_LINK_MTU:
1448 fq_if_mtu_update(fqs);
1449 break;
1450 default:
1451 break;
1452 }
1453 }
1454
1455 static void
fq_if_classq_suspend(fq_if_t * fqs,fq_if_classq_t * fq_cl)1456 fq_if_classq_suspend(fq_if_t *fqs, fq_if_classq_t *fq_cl)
1457 {
1458 fq_if_purge_classq(fqs, fq_cl);
1459 fqs->fqs_throttle = 1;
1460 fq_cl->fcl_stat.fcl_throttle_on++;
1461 KDBG(AQM_KTRACE_AON_THROTTLE | DBG_FUNC_START,
1462 fqs->fqs_ifq->ifcq_ifp->if_index, 0, 0, 0);
1463 }
1464
1465 static void
fq_if_classq_resume(fq_if_t * fqs,fq_if_classq_t * fq_cl)1466 fq_if_classq_resume(fq_if_t *fqs, fq_if_classq_t *fq_cl)
1467 {
1468 VERIFY(FQ_IF_CLASSQ_IDLE(fq_cl));
1469 fqs->fqs_throttle = 0;
1470 fq_cl->fcl_stat.fcl_throttle_off++;
1471 KDBG(AQM_KTRACE_AON_THROTTLE | DBG_FUNC_END,
1472 fqs->fqs_ifq->ifcq_ifp->if_index, 0, 0, 0);
1473 }
1474
1475
1476 static int
fq_if_throttle(fq_if_t * fqs,cqrq_throttle_t * tr)1477 fq_if_throttle(fq_if_t *fqs, cqrq_throttle_t *tr)
1478 {
1479 struct ifclassq *ifq = fqs->fqs_ifq;
1480 uint8_t index;
1481 fq_if_group_t *grp;
1482
1483 #if !MACH_ASSERT
1484 #pragma unused(ifq)
1485 #endif
1486 IFCQ_LOCK_ASSERT_HELD(ifq);
1487
1488 if (!tr->set) {
1489 tr->level = fqs->fqs_throttle;
1490 return 0;
1491 }
1492
1493 if (tr->level == fqs->fqs_throttle) {
1494 return EALREADY;
1495 }
1496
1497 /* Throttling is allowed on BK_SYS class only */
1498 index = fq_if_service_to_priority(fqs, MBUF_SC_BK_SYS);
1499
1500 for (uint8_t grp_idx = 0; grp_idx < FQ_IF_MAX_GROUPS; grp_idx++) {
1501 if (fqs->fqs_classq_groups[grp_idx] == NULL) {
1502 continue;
1503 }
1504 grp = fq_if_find_grp(fqs, grp_idx);
1505 switch (tr->level) {
1506 case IFNET_THROTTLE_OFF:
1507 fq_if_classq_resume(fqs, &grp->fqg_classq[index]);
1508 break;
1509 case IFNET_THROTTLE_OPPORTUNISTIC:
1510 fq_if_classq_suspend(fqs, &grp->fqg_classq[index]);
1511 break;
1512 default:
1513 break;
1514 }
1515 }
1516 return 0;
1517 }
1518
1519 static inline boolean_t
fq_if_is_fq_cl_paced(fq_if_classq_t * fq_cl,uint64_t now)1520 fq_if_is_fq_cl_paced(fq_if_classq_t *fq_cl, uint64_t now)
1521 {
1522 if ((fq_cl->fcl_flags & FCL_PACED) != 0 && fq_cl->fcl_next_tx_time > now) {
1523 return true;
1524 }
1525
1526 fq_cl->fcl_flags &= ~FCL_PACED;
1527 fq_cl->fcl_next_tx_time = 0;
1528 return false;
1529 }
1530
1531 static void
fq_if_grp_stat_sc(fq_if_t * fqs,fq_if_group_t * grp,cqrq_stat_sc_t * stat,uint64_t now)1532 fq_if_grp_stat_sc(fq_if_t *fqs, fq_if_group_t *grp, cqrq_stat_sc_t *stat, uint64_t now)
1533 {
1534 uint8_t pri;
1535 fq_if_classq_t *fq_cl;
1536
1537 ASSERT(stat != NULL);
1538 pri = fq_if_service_to_priority(fqs, stat->sc);
1539
1540 fq_cl = &grp->fqg_classq[pri];
1541 stat->packets = (uint32_t)fq_cl->fcl_stat.fcl_pkt_cnt;
1542 stat->bytes = (uint32_t)fq_cl->fcl_stat.fcl_byte_cnt;
1543
1544 if (ifclassq_enable_pacing && ifclassq_enable_l4s &&
1545 fq_if_is_fq_cl_paced(fq_cl, now)) {
1546 stat->packets = 0;
1547 stat->bytes = 0;
1548 }
1549 }
1550
1551 static boolean_t
fq_if_is_grp_all_paced(fq_if_group_t * grp)1552 fq_if_is_grp_all_paced(fq_if_group_t *grp)
1553 {
1554 fq_if_classq_t *fq_cl;
1555 uint64_t now;
1556
1557 if (!ifclassq_enable_pacing || !ifclassq_enable_l4s) {
1558 return false;
1559 }
1560
1561 now = fq_codel_get_time();
1562 for (uint8_t fq_cl_idx = 0; fq_cl_idx < FQ_IF_MAX_CLASSES; fq_cl_idx++) {
1563 fq_cl = &grp->fqg_classq[fq_cl_idx];
1564 if (fq_cl == NULL || FQ_IF_CLASSQ_IDLE(fq_cl)) {
1565 continue;
1566 }
1567 if (!fq_if_is_fq_cl_paced(fq_cl, now)) {
1568 return false;
1569 }
1570 }
1571
1572 return true;
1573 }
1574
1575 boolean_t
fq_if_is_all_paced(struct ifclassq * ifq)1576 fq_if_is_all_paced(struct ifclassq *ifq)
1577 {
1578 fq_if_group_t *grp;
1579 fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
1580
1581 IFCQ_LOCK_ASSERT_HELD(ifq);
1582
1583 if (!ifclassq_enable_pacing || !ifclassq_enable_l4s) {
1584 return false;
1585 }
1586
1587 for (uint8_t grp_idx = 0; grp_idx < FQ_IF_MAX_GROUPS; grp_idx++) {
1588 grp = fqs->fqs_classq_groups[grp_idx];
1589 if (grp == NULL || FQG_BYTES(grp) == 0) {
1590 continue;
1591 }
1592
1593 if (!fq_if_is_grp_all_paced(grp)) {
1594 return false;
1595 }
1596 }
1597
1598 return true;
1599 }
1600
1601 void
fq_if_stat_sc(fq_if_t * fqs,cqrq_stat_sc_t * stat)1602 fq_if_stat_sc(fq_if_t *fqs, cqrq_stat_sc_t *stat)
1603 {
1604 cqrq_stat_sc_t grp_sc_stat;
1605 fq_if_group_t *grp;
1606 uint64_t now = fq_codel_get_time();
1607
1608 if (stat == NULL) {
1609 return;
1610 }
1611 grp_sc_stat.sc = stat->sc;
1612 stat->packets = 0;
1613 stat->bytes = 0;
1614
1615 if (stat->grp_idx == IF_CLASSQ_ALL_GRPS) {
1616 if (stat->sc == MBUF_SC_UNSPEC) {
1617 if (!fq_if_is_all_paced(fqs->fqs_ifq)) {
1618 stat->packets = IFCQ_LEN(fqs->fqs_ifq);
1619 stat->bytes = IFCQ_BYTES(fqs->fqs_ifq);
1620 }
1621 } else {
1622 for (uint8_t grp_idx = 0; grp_idx < FQ_IF_MAX_GROUPS; grp_idx++) {
1623 grp = fqs->fqs_classq_groups[grp_idx];
1624 if (grp == NULL) {
1625 continue;
1626 }
1627
1628 fq_if_grp_stat_sc(fqs, grp, &grp_sc_stat, now);
1629 stat->packets += grp_sc_stat.packets;
1630 stat->bytes += grp_sc_stat.bytes;
1631 }
1632 }
1633 return;
1634 }
1635
1636 if (stat->sc == MBUF_SC_UNSPEC) {
1637 if (fq_if_is_grp_combined(fqs, stat->grp_idx)) {
1638 TAILQ_FOREACH(grp, &fqs->fqs_combined_grp_list, fqg_grp_link) {
1639 if (fq_if_is_grp_all_paced(grp)) {
1640 continue;
1641 }
1642 stat->packets += FQG_LEN(grp);
1643 stat->bytes += FQG_BYTES(grp);
1644 }
1645 } else {
1646 grp = fq_if_find_grp(fqs, stat->grp_idx);
1647 if (!fq_if_is_grp_all_paced(grp)) {
1648 stat->packets = FQG_LEN(grp);
1649 stat->bytes = FQG_BYTES(grp);
1650 }
1651 }
1652 } else {
1653 if (fq_if_is_grp_combined(fqs, stat->grp_idx)) {
1654 TAILQ_FOREACH(grp, &fqs->fqs_combined_grp_list, fqg_grp_link) {
1655 if (fq_if_is_grp_all_paced(grp)) {
1656 continue;
1657 }
1658 fq_if_grp_stat_sc(fqs, grp, &grp_sc_stat, now);
1659 stat->packets += grp_sc_stat.packets;
1660 stat->bytes += grp_sc_stat.bytes;
1661 }
1662 } else {
1663 grp = fq_if_find_grp(fqs, stat->grp_idx);
1664 fq_if_grp_stat_sc(fqs, grp, stat, now);
1665 }
1666 }
1667 }
1668
1669 int
fq_if_request_classq(struct ifclassq * ifq,cqrq_t rq,void * arg)1670 fq_if_request_classq(struct ifclassq *ifq, cqrq_t rq, void *arg)
1671 {
1672 int err = 0;
1673 fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
1674
1675 IFCQ_LOCK_ASSERT_HELD(ifq);
1676
1677 /*
1678 * These are usually slow operations, convert the lock ahead of time
1679 */
1680 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
1681 switch (rq) {
1682 case CLASSQRQ_PURGE:
1683 fq_if_purge(fqs);
1684 break;
1685 case CLASSQRQ_PURGE_SC:
1686 fq_if_purge_sc(fqs, (cqrq_purge_sc_t *)arg);
1687 break;
1688 case CLASSQRQ_EVENT:
1689 fq_if_event(fqs, *(cqev_t *)arg);
1690 break;
1691 case CLASSQRQ_THROTTLE:
1692 fq_if_throttle(fqs, (cqrq_throttle_t *)arg);
1693 break;
1694 case CLASSQRQ_STAT_SC:
1695 fq_if_stat_sc(fqs, (cqrq_stat_sc_t *)arg);
1696 break;
1697 }
1698 return err;
1699 }
1700
1701 int
fq_if_setup_ifclassq(struct ifclassq * ifq,u_int32_t flags,classq_pkt_type_t ptype)1702 fq_if_setup_ifclassq(struct ifclassq *ifq, u_int32_t flags,
1703 classq_pkt_type_t ptype)
1704 {
1705 fq_if_t *fqs = NULL;
1706 int err = 0;
1707
1708 IFCQ_LOCK_ASSERT_HELD(ifq);
1709 VERIFY(ifq->ifcq_disc == NULL);
1710 VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
1711
1712 fqs = fq_if_alloc(ifq, ptype);
1713 if (fqs == NULL) {
1714 return ENOMEM;
1715 }
1716 if (flags & PKTSCHEDF_QALG_DRIVER_MANAGED) {
1717 fqs->fqs_flags |= FQS_DRIVER_MANAGED;
1718 fqs->fqs_bm_ops = &fq_if_grps_sc_bitmap_ops;
1719 } else {
1720 fqs->fqs_bm_ops = &fq_if_grps_bitmap_ops;
1721 }
1722
1723 err = ifclassq_attach(ifq, PKTSCHEDT_FQ_CODEL, fqs);
1724 if (err != 0) {
1725 os_log_error(OS_LOG_DEFAULT, "%s: error from ifclassq_attach, "
1726 "failed to attach fq_if: %d\n", __func__, err);
1727 fq_if_destroy(fqs);
1728 return err;
1729 }
1730
1731 /*
1732 * Always create one group. If qset 0 is added later,
1733 * this group will be updated.
1734 */
1735 err = fq_if_create_grp(ifq, 0, IF_CLASSQ_DEF);
1736 if (err != 0) {
1737 os_log_error(OS_LOG_DEFAULT, "%s: error from fq_if_create_grp, "
1738 "failed to create a fq group: %d\n", __func__, err);
1739 fq_if_destroy(fqs);
1740 }
1741
1742 return err;
1743 }
1744
1745 fq_t *
fq_if_hash_pkt(fq_if_t * fqs,fq_if_group_t * fq_grp,uint32_t flowid,mbuf_svc_class_t svc_class,uint64_t now,bool create,fq_tfc_type_t tfc_type)1746 fq_if_hash_pkt(fq_if_t *fqs, fq_if_group_t *fq_grp, uint32_t flowid,
1747 mbuf_svc_class_t svc_class, uint64_t now, bool create,
1748 fq_tfc_type_t tfc_type)
1749 {
1750 fq_t *fq = NULL;
1751 flowq_list_t *fq_list;
1752 fq_if_classq_t *fq_cl;
1753 uint32_t fqs_hash_id;
1754 u_int8_t scidx;
1755
1756 scidx = fq_if_service_to_priority(fqs, svc_class);
1757
1758 fqs_hash_id = fq_if_flow_hash_id(flowid);
1759
1760 fq_list = &fqs->fqs_flows[fqs_hash_id];
1761
1762 LIST_FOREACH(fq, fq_list, fq_hashlink) {
1763 if (fq->fq_flowhash == flowid &&
1764 fq->fq_sc_index == scidx &&
1765 fq->fq_tfc_type == tfc_type &&
1766 fq->fq_group == fq_grp) {
1767 break;
1768 }
1769 }
1770 if (fq == NULL && create) {
1771 /* If the flow is not already on the list, allocate it */
1772 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
1773 fq = fq_alloc(fqs->fqs_ptype);
1774 if (fq != NULL) {
1775 fq->fq_flowhash = flowid;
1776 fq->fq_sc_index = scidx;
1777 fq->fq_group = fq_grp;
1778 fq->fq_tfc_type = tfc_type;
1779 fq_cl = &FQ_CLASSQ(fq);
1780 fq->fq_flags = (FQF_FLOWCTL_CAPABLE | FQF_FRESH_FLOW);
1781 fq->fq_updatetime = now + FQ_UPDATE_INTERVAL(fq);
1782 fq->fq_next_tx_time = FQ_INVALID_TX_TS;
1783 LIST_INSERT_HEAD(fq_list, fq, fq_hashlink);
1784 fq_cl->fcl_stat.fcl_flows_cnt++;
1785 }
1786 KDBG(AQM_KTRACE_STATS_FLOW_ALLOC,
1787 fqs->fqs_ifq->ifcq_ifp->if_index, fq->fq_flowhash,
1788 AQM_KTRACE_FQ_GRP_SC_IDX(fq), 0);
1789 } else if ((fq != NULL) && (fq->fq_flags & FQF_EMPTY_FLOW)) {
1790 fq_if_reuse_empty_flow(fqs, fq, now);
1791 }
1792
1793 /*
1794 * If getq time is not set because this is the first packet or after
1795 * idle time, set it now so that we can detect a stall.
1796 */
1797 if (fq != NULL && fq->fq_getqtime == 0) {
1798 fq->fq_getqtime = now;
1799 }
1800
1801 return fq;
1802 }
1803
1804 void
fq_if_destroy_flow(fq_if_t * fqs,fq_if_classq_t * fq_cl,fq_t * fq)1805 fq_if_destroy_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq)
1806 {
1807 ASSERT((fq->fq_flags & FQF_EMPTY_FLOW) == 0);
1808 LIST_REMOVE(fq, fq_hashlink);
1809 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
1810 if (__improbable(fq->fq_flags & FQF_FLOWCTL_ON)) {
1811 fq_if_flow_feedback(fqs, fq, fq_cl);
1812 }
1813 KDBG(AQM_KTRACE_STATS_FLOW_DESTROY,
1814 fqs->fqs_ifq->ifcq_ifp->if_index, fq->fq_flowhash,
1815 AQM_KTRACE_FQ_GRP_SC_IDX(fq), 0);
1816 fq_destroy(fq, fqs->fqs_ptype);
1817 }
1818
1819 inline boolean_t
fq_if_at_drop_limit(fq_if_t * fqs)1820 fq_if_at_drop_limit(fq_if_t *fqs)
1821 {
1822 return (IFCQ_LEN(fqs->fqs_ifq) >= fqs->fqs_pkt_droplimit) ?
1823 TRUE : FALSE;
1824 }
1825
1826 inline boolean_t
fq_if_almost_at_drop_limit(fq_if_t * fqs)1827 fq_if_almost_at_drop_limit(fq_if_t *fqs)
1828 {
1829 /*
1830 * Whether we are above 90% of the queue limit. This is used to tell if we
1831 * can stop flow controlling the largest flow.
1832 */
1833 return IFCQ_LEN(fqs->fqs_ifq) >= fqs->fqs_pkt_droplimit * 9 / 10;
1834 }
1835
1836 static inline void
fq_if_reuse_empty_flow(fq_if_t * fqs,fq_t * fq,uint64_t now)1837 fq_if_reuse_empty_flow(fq_if_t *fqs, fq_t *fq, uint64_t now)
1838 {
1839 ASSERT(fq->fq_flags & FQF_EMPTY_FLOW);
1840 TAILQ_REMOVE(&fqs->fqs_empty_list, fq, fq_empty_link);
1841 STAILQ_NEXT(fq, fq_actlink) = NULL;
1842 fq->fq_flags &= ~FQF_FLOW_STATE_MASK;
1843 fq->fq_empty_purge_time = 0;
1844 fq->fq_getqtime = 0;
1845 fq->fq_updatetime = now + FQ_UPDATE_INTERVAL(fq);
1846 fqs->fqs_empty_list_cnt--;
1847 fq_if_classq_t *fq_cl = &FQ_CLASSQ(fq);
1848 fq_cl->fcl_stat.fcl_flows_cnt++;
1849 }
1850
1851 inline void
fq_if_move_to_empty_flow(fq_if_t * fqs,fq_if_classq_t * fq_cl,fq_t * fq,uint64_t now)1852 fq_if_move_to_empty_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq,
1853 uint64_t now)
1854 {
1855 ASSERT(fq->fq_flags & ~(FQF_NEW_FLOW | FQF_OLD_FLOW | FQF_FLOWCTL_ON));
1856 fq->fq_empty_purge_time = now + fq_empty_purge_delay;
1857 TAILQ_INSERT_TAIL(&fqs->fqs_empty_list, fq, fq_empty_link);
1858 fq->fq_flags |= FQF_EMPTY_FLOW;
1859 FQ_CLEAR_OVERWHELMING(fq);
1860 fqs->fqs_empty_list_cnt++;
1861 /*
1862 * fcl_flows_cnt is used in budget determination for the class.
1863 * empty flow shouldn't contribute to the budget.
1864 */
1865 fq_cl->fcl_stat.fcl_flows_cnt--;
1866 }
1867
1868 static void
fq_if_purge_empty_flow(fq_if_t * fqs,fq_t * fq)1869 fq_if_purge_empty_flow(fq_if_t *fqs, fq_t *fq)
1870 {
1871 fq_if_classq_t *fq_cl;
1872 fq_cl = &FQ_CLASSQ(fq);
1873
1874 ASSERT((fq->fq_flags & FQF_EMPTY_FLOW) != 0);
1875 TAILQ_REMOVE(&fqs->fqs_empty_list, fq, fq_empty_link);
1876 fq->fq_flags &= ~FQF_EMPTY_FLOW;
1877 fqs->fqs_empty_list_cnt--;
1878 /* Remove from the hash list and free the flow queue */
1879 fq_if_destroy_flow(fqs, fq_cl, fq);
1880 }
1881
1882 static void
fq_if_purge_empty_flow_list(fq_if_t * fqs,uint64_t now,bool purge_all)1883 fq_if_purge_empty_flow_list(fq_if_t *fqs, uint64_t now, bool purge_all)
1884 {
1885 fq_t *fq, *tmp;
1886 int i = 0;
1887
1888 if (fqs->fqs_empty_list_cnt == 0) {
1889 ASSERT(TAILQ_EMPTY(&fqs->fqs_empty_list));
1890 return;
1891 }
1892
1893 TAILQ_FOREACH_SAFE(fq, &fqs->fqs_empty_list, fq_empty_link, tmp) {
1894 if (!purge_all && ((now < fq->fq_empty_purge_time) ||
1895 (i++ == FQ_EMPTY_PURGE_MAX))) {
1896 break;
1897 }
1898 fq_if_purge_empty_flow(fqs, fq);
1899 }
1900
1901 if (__improbable(purge_all)) {
1902 VERIFY(fqs->fqs_empty_list_cnt == 0);
1903 VERIFY(TAILQ_EMPTY(&fqs->fqs_empty_list));
1904 }
1905 }
1906
1907 static void
fq_if_empty_old_flow(fq_if_t * fqs,fq_if_classq_t * fq_cl,fq_t * fq,uint64_t now)1908 fq_if_empty_old_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq,
1909 uint64_t now)
1910 {
1911 /*
1912 * Remove the flow queue from the old flows list.
1913 */
1914 STAILQ_REMOVE(&fq_cl->fcl_old_flows, fq, flowq, fq_actlink);
1915 fq->fq_flags &= ~FQF_OLD_FLOW;
1916 fq_cl->fcl_stat.fcl_oldflows_cnt--;
1917 VERIFY(fq->fq_bytes == 0);
1918
1919 /* release any flow control */
1920 if (__improbable(fq->fq_flags & FQF_FLOWCTL_ON)) {
1921 fq_if_flow_feedback(fqs, fq, fq_cl);
1922 }
1923
1924 /* move the flow queue to empty flows list */
1925 fq_if_move_to_empty_flow(fqs, fq_cl, fq, now);
1926 }
1927
1928 static void
fq_if_empty_new_flow(fq_t * fq,fq_if_classq_t * fq_cl)1929 fq_if_empty_new_flow(fq_t *fq, fq_if_classq_t *fq_cl)
1930 {
1931 /* Move to the end of old queue list */
1932 STAILQ_REMOVE(&fq_cl->fcl_new_flows, fq,
1933 flowq, fq_actlink);
1934 fq->fq_flags &= ~FQF_NEW_FLOW;
1935 fq_cl->fcl_stat.fcl_newflows_cnt--;
1936
1937 STAILQ_INSERT_TAIL(&fq_cl->fcl_old_flows, fq, fq_actlink);
1938 fq->fq_flags |= FQF_OLD_FLOW;
1939 fq_cl->fcl_stat.fcl_oldflows_cnt++;
1940 }
1941
1942 inline void
fq_if_drop_packet(fq_if_t * fqs,uint64_t now)1943 fq_if_drop_packet(fq_if_t *fqs, uint64_t now)
1944 {
1945 fq_t *fq = fqs->fqs_large_flow;
1946 fq_if_classq_t *fq_cl;
1947 pktsched_pkt_t pkt;
1948 volatile uint32_t *__single pkt_flags;
1949 uint64_t *__single pkt_timestamp;
1950
1951 if (fq == NULL) {
1952 return;
1953 }
1954 /* queue can not be empty on the largest flow */
1955 VERIFY(!fq_empty(fq, fqs->fqs_ptype));
1956
1957 fq_cl = &FQ_CLASSQ(fq);
1958 _PKTSCHED_PKT_INIT(&pkt);
1959 fq_getq_flow_internal(fqs, fq, &pkt);
1960 ASSERT(pkt.pktsched_ptype != QP_INVALID);
1961
1962 pktsched_get_pkt_vars(&pkt, &pkt_flags, &pkt_timestamp, NULL, NULL,
1963 NULL, NULL, NULL);
1964
1965 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
1966 *pkt_timestamp = 0;
1967 switch (pkt.pktsched_ptype) {
1968 case QP_MBUF:
1969 *pkt_flags &= ~PKTF_PRIV_GUARDED;
1970 break;
1971 #if SKYWALK
1972 case QP_PACKET:
1973 /* sanity check */
1974 ASSERT((*pkt_flags & ~PKT_F_COMMON_MASK) == 0);
1975 break;
1976 #endif /* SKYWALK */
1977 default:
1978 VERIFY(0);
1979 /* NOTREACHED */
1980 __builtin_unreachable();
1981 }
1982
1983 if (fq_empty(fq, fqs->fqs_ptype)) {
1984 fqs->fqs_large_flow = NULL;
1985 if (fq->fq_flags & FQF_OLD_FLOW) {
1986 fq_if_empty_old_flow(fqs, fq_cl, fq, now);
1987 } else {
1988 VERIFY(fq->fq_flags & FQF_NEW_FLOW);
1989 fq_if_empty_new_flow(fq, fq_cl);
1990 }
1991 }
1992 IFCQ_DROP_ADD(fqs->fqs_ifq, 1, pktsched_get_pkt_len(&pkt));
1993
1994 pktsched_free_pkt(&pkt);
1995 fq_cl->fcl_stat.fcl_drop_overflow++;
1996 }
1997
1998 inline void
fq_if_is_flow_heavy(fq_if_t * fqs,fq_t * fq)1999 fq_if_is_flow_heavy(fq_if_t *fqs, fq_t *fq)
2000 {
2001 fq_t *prev_fq;
2002
2003 if (fqs->fqs_large_flow != NULL &&
2004 fqs->fqs_large_flow->fq_bytes < FQ_IF_LARGE_FLOW_BYTE_LIMIT) {
2005 fqs->fqs_large_flow = NULL;
2006 }
2007
2008 if (fq == NULL || fq->fq_bytes < FQ_IF_LARGE_FLOW_BYTE_LIMIT) {
2009 return;
2010 }
2011
2012 prev_fq = fqs->fqs_large_flow;
2013 if (prev_fq == NULL) {
2014 if (!fq_empty(fq, fqs->fqs_ptype)) {
2015 fqs->fqs_large_flow = fq;
2016 }
2017 return;
2018 } else if (fq->fq_bytes > prev_fq->fq_bytes) {
2019 fqs->fqs_large_flow = fq;
2020 }
2021 }
2022
2023 boolean_t
fq_if_add_fcentry(fq_if_t * fqs,pktsched_pkt_t * pkt,uint8_t flowsrc,fq_t * fq,fq_if_classq_t * fq_cl)2024 fq_if_add_fcentry(fq_if_t *fqs, pktsched_pkt_t *pkt, uint8_t flowsrc,
2025 fq_t *fq, fq_if_classq_t *fq_cl)
2026 {
2027 struct flowadv_fcentry *fce;
2028
2029 #if DEBUG || DEVELOPMENT
2030 if (__improbable(ifclassq_flow_control_adv == 0)) {
2031 os_log(OS_LOG_DEFAULT, "%s: skipped flow control", __func__);
2032 return TRUE;
2033 }
2034 #endif /* DEBUG || DEVELOPMENT */
2035
2036 STAILQ_FOREACH(fce, &fqs->fqs_fclist, fce_link) {
2037 if ((uint8_t)fce->fce_flowsrc_type == flowsrc &&
2038 fce->fce_flowid == fq->fq_flowhash) {
2039 /* Already on flowcontrol list */
2040 return TRUE;
2041 }
2042 }
2043 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
2044 fce = pktsched_alloc_fcentry(pkt, fqs->fqs_ifq->ifcq_ifp, M_WAITOK);
2045 if (fce != NULL) {
2046 /* XXX Add number of bytes in the queue */
2047 STAILQ_INSERT_TAIL(&fqs->fqs_fclist, fce, fce_link);
2048 fq_cl->fcl_stat.fcl_flow_control++;
2049 os_log(OS_LOG_DEFAULT, "%s: num: %d, scidx: %d, flowsrc: %d, "
2050 "flow: 0x%x, iface: %s, B:%u\n", __func__,
2051 fq_cl->fcl_stat.fcl_flow_control,
2052 fq->fq_sc_index, fce->fce_flowsrc_type, fq->fq_flowhash,
2053 if_name(fqs->fqs_ifq->ifcq_ifp), fq->fq_bytes);
2054 KDBG(AQM_KTRACE_STATS_FLOW_CTL | DBG_FUNC_START,
2055 fq->fq_flowhash, AQM_KTRACE_FQ_GRP_SC_IDX(fq),
2056 fq->fq_bytes, fq->fq_min_qdelay);
2057 }
2058
2059 if (fce != NULL && fce->fce_flowsrc_type == FLOWSRC_CHANNEL) {
2060 kern_channel_flowadv_set(fce);
2061 }
2062
2063 return (fce != NULL) ? TRUE : FALSE;
2064 }
2065
2066 static void
fq_if_remove_fcentry(fq_if_t * fqs,struct flowadv_fcentry * fce)2067 fq_if_remove_fcentry(fq_if_t *fqs, struct flowadv_fcentry *fce)
2068 {
2069 STAILQ_REMOVE(&fqs->fqs_fclist, fce, flowadv_fcentry, fce_link);
2070 STAILQ_NEXT(fce, fce_link) = NULL;
2071 flowadv_add_entry(fce);
2072 }
2073
2074 void
fq_if_flow_feedback(fq_if_t * fqs,fq_t * fq,fq_if_classq_t * fq_cl)2075 fq_if_flow_feedback(fq_if_t *fqs, fq_t *fq, fq_if_classq_t *fq_cl)
2076 {
2077 struct flowadv_fcentry *fce = NULL;
2078
2079 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
2080 STAILQ_FOREACH(fce, &fqs->fqs_fclist, fce_link) {
2081 if (fce->fce_flowid == fq->fq_flowhash) {
2082 break;
2083 }
2084 }
2085 if (fce != NULL) {
2086 fq_cl->fcl_stat.fcl_flow_feedback++;
2087 fce->fce_event_type = FCE_EVENT_TYPE_FLOW_CONTROL_FEEDBACK;
2088 os_log(OS_LOG_DEFAULT, "%s: num: %d, scidx: %d, flowsrc: %d, "
2089 "flow: 0x%x, iface: %s grp: %hhu, B:%u\n", __func__,
2090 fq_cl->fcl_stat.fcl_flow_feedback, fq->fq_sc_index,
2091 fce->fce_flowsrc_type, fce->fce_flowid,
2092 if_name(fqs->fqs_ifq->ifcq_ifp), FQ_GROUP(fq)->fqg_index,
2093 fq->fq_bytes);
2094 fq_if_remove_fcentry(fqs, fce);
2095 KDBG(AQM_KTRACE_STATS_FLOW_CTL | DBG_FUNC_END,
2096 fq->fq_flowhash, AQM_KTRACE_FQ_GRP_SC_IDX(fq),
2097 fq->fq_bytes, fq->fq_min_qdelay);
2098 }
2099 fq->fq_flags &= ~FQF_FLOWCTL_ON;
2100 }
2101
2102 boolean_t
fq_if_report_ce(fq_if_t * fqs,pktsched_pkt_t * pkt,uint32_t ce_cnt,uint32_t pkt_cnt)2103 fq_if_report_ce(fq_if_t *fqs, pktsched_pkt_t *pkt, uint32_t ce_cnt,
2104 uint32_t pkt_cnt)
2105 {
2106 struct flowadv_fcentry *fce;
2107
2108 #if DEBUG || DEVELOPMENT
2109 if (__improbable(ifclassq_flow_control_adv == 0)) {
2110 os_log(OS_LOG_DEFAULT, "%s: skipped flow control", __func__);
2111 return TRUE;
2112 }
2113 #endif /* DEBUG || DEVELOPMENT */
2114
2115 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
2116 fce = pktsched_alloc_fcentry(pkt, fqs->fqs_ifq->ifcq_ifp, M_WAITOK);
2117 if (fce != NULL) {
2118 fce->fce_event_type = FCE_EVENT_TYPE_CONGESTION_EXPERIENCED;
2119 fce->fce_ce_cnt = ce_cnt;
2120 fce->fce_pkts_since_last_report = pkt_cnt;
2121
2122 flowadv_add_entry(fce);
2123 }
2124 return (fce != NULL) ? TRUE : FALSE;
2125 }
2126
2127
2128 void
fq_if_dequeue(fq_if_t * fqs,fq_if_classq_t * fq_cl,uint32_t pktlimit,int64_t bytelimit,classq_pkt_t * top,classq_pkt_t * bottom,uint32_t * retpktcnt,uint32_t * retbytecnt,flowq_dqlist_t * fq_dqlist,bool budget_restricted,uint64_t now,bool * fq_cl_paced,uint64_t * next_tx_time)2129 fq_if_dequeue(fq_if_t *fqs, fq_if_classq_t *fq_cl, uint32_t pktlimit,
2130 int64_t bytelimit, classq_pkt_t *top, classq_pkt_t *bottom,
2131 uint32_t *retpktcnt, uint32_t *retbytecnt, flowq_dqlist_t *fq_dqlist,
2132 bool budget_restricted, uint64_t now, bool *fq_cl_paced,
2133 uint64_t *next_tx_time)
2134 {
2135 fq_t *fq = NULL, *tfq = NULL;
2136 flowq_stailq_t temp_stailq;
2137 uint32_t pktcnt, bytecnt;
2138 boolean_t qempty, limit_reached = FALSE;
2139 bool all_paced = true;
2140 classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
2141 fq_getq_flow_t fq_getq_flow_fn;
2142 classq_pkt_t *head, *tail;
2143 uint64_t fq_cl_tx_time = FQ_INVALID_TX_TS;
2144
2145 switch (fqs->fqs_ptype) {
2146 case QP_MBUF:
2147 fq_getq_flow_fn = fq_getq_flow_mbuf;
2148 break;
2149
2150 #if SKYWALK
2151 case QP_PACKET:
2152 fq_getq_flow_fn = fq_getq_flow_kpkt;
2153 break;
2154 #endif /* SKYWALK */
2155
2156 default:
2157 VERIFY(0);
2158 /* NOTREACHED */
2159 __builtin_unreachable();
2160 }
2161
2162 /*
2163 * maximum byte limit should not be greater than the budget for
2164 * this class
2165 */
2166 if (bytelimit > fq_cl->fcl_budget && budget_restricted) {
2167 bytelimit = fq_cl->fcl_budget;
2168 }
2169
2170 VERIFY(pktlimit > 0 && bytelimit > 0 && top != NULL);
2171 pktcnt = bytecnt = 0;
2172 STAILQ_INIT(&temp_stailq);
2173
2174 STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_new_flows, fq_actlink, tfq) {
2175 ASSERT((fq->fq_flags & (FQF_NEW_FLOW | FQF_OLD_FLOW)) ==
2176 FQF_NEW_FLOW);
2177 uint64_t fq_tx_time;
2178 if (__improbable(!fq_tx_time_ready(fqs, fq, now, &fq_tx_time))) {
2179 ASSERT(fq_tx_time != FQ_INVALID_TX_TS);
2180 if (fq_tx_time < fq_cl_tx_time) {
2181 fq_cl_tx_time = fq_tx_time;
2182 }
2183 continue;
2184 }
2185 all_paced = false;
2186
2187 if (fq_dqlist != NULL) {
2188 if (!fq->fq_in_dqlist) {
2189 fq_dqlist_add(fq_dqlist, fq);
2190 }
2191 head = &fq->fq_dq_head;
2192 tail = &fq->fq_dq_tail;
2193 } else {
2194 ASSERT(!fq->fq_in_dqlist);
2195 head = top;
2196 tail = &last;
2197 }
2198
2199 limit_reached = fq_getq_flow_fn(fqs, fq_cl, fq, bytelimit,
2200 pktlimit, head, tail, &bytecnt, &pktcnt, &qempty, now);
2201
2202 /*
2203 * From RFC 8290:
2204 * if that queue has a negative number of credits (i.e., it has already
2205 * dequeued at least a quantum of bytes), it is given an additional
2206 * quantum of credits, the queue is put onto _the end of_ the list of
2207 * old queues, and the routine selects the next queue and starts again.
2208 */
2209 if (fq->fq_deficit <= 0 || qempty) {
2210 fq->fq_deficit += fq_cl->fcl_quantum;
2211 fq_if_empty_new_flow(fq, fq_cl);
2212 }
2213 //TODO: add credit when it's now paced? so that the fq is trated the same as empty
2214
2215 if (!fq_tx_time_ready(fqs, fq, now, &fq_tx_time)) {
2216 ASSERT(fq_tx_time != FQ_INVALID_TX_TS);
2217 if (fq_tx_time < fq_cl_tx_time) {
2218 fq_cl_tx_time = fq_tx_time;
2219 }
2220 }
2221
2222 if (limit_reached) {
2223 goto done;
2224 }
2225 }
2226
2227 STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_old_flows, fq_actlink, tfq) {
2228 VERIFY((fq->fq_flags & (FQF_NEW_FLOW | FQF_OLD_FLOW)) ==
2229 FQF_OLD_FLOW);
2230 bool destroy = true;
2231 uint64_t fq_tx_time;
2232
2233 if (__improbable(!fq_tx_time_ready(fqs, fq, now, &fq_tx_time))) {
2234 ASSERT(fq_tx_time != FQ_INVALID_TX_TS);
2235 if (fq_tx_time < fq_cl_tx_time) {
2236 fq_cl_tx_time = fq_tx_time;
2237 }
2238 continue;
2239 }
2240 all_paced = false;
2241
2242 if (fq_dqlist != NULL) {
2243 if (!fq->fq_in_dqlist) {
2244 fq_dqlist_add(fq_dqlist, fq);
2245 }
2246 head = &fq->fq_dq_head;
2247 tail = &fq->fq_dq_tail;
2248 destroy = false;
2249 } else {
2250 ASSERT(!fq->fq_in_dqlist);
2251 head = top;
2252 tail = &last;
2253 }
2254
2255 limit_reached = fq_getq_flow_fn(fqs, fq_cl, fq, bytelimit,
2256 pktlimit, head, tail, &bytecnt, &pktcnt, &qempty, now);
2257
2258 if (!fq_tx_time_ready(fqs, fq, now, &fq_tx_time)) {
2259 ASSERT(fq_tx_time != FQ_INVALID_TX_TS);
2260 if (fq_tx_time < fq_cl_tx_time) {
2261 fq_cl_tx_time = fq_tx_time;
2262 }
2263 }
2264
2265 if (qempty) {
2266 fq_if_empty_old_flow(fqs, fq_cl, fq, now);
2267 } else if (fq->fq_deficit <= 0) {
2268 STAILQ_REMOVE(&fq_cl->fcl_old_flows, fq,
2269 flowq, fq_actlink);
2270 /*
2271 * Move to the end of the old queues list. We do not
2272 * need to update the flow count since this flow
2273 * will be added to the tail again
2274 */
2275 STAILQ_INSERT_TAIL(&temp_stailq, fq, fq_actlink);
2276 fq->fq_deficit += fq_cl->fcl_quantum;
2277 }
2278 if (limit_reached) {
2279 break;
2280 }
2281 }
2282
2283 done:
2284 if (all_paced) {
2285 fq_cl->fcl_flags |= FCL_PACED;
2286 fq_cl->fcl_next_tx_time = fq_cl_tx_time;
2287 }
2288 if (!STAILQ_EMPTY(&fq_cl->fcl_old_flows)) {
2289 STAILQ_CONCAT(&fq_cl->fcl_old_flows, &temp_stailq);
2290 } else if (!STAILQ_EMPTY(&temp_stailq)) {
2291 fq_cl->fcl_old_flows = temp_stailq;
2292 }
2293 if (last.cp_mbuf != NULL) {
2294 VERIFY(top->cp_mbuf != NULL);
2295 if (bottom != NULL) {
2296 *bottom = last;
2297 }
2298 }
2299 if (retpktcnt != NULL) {
2300 *retpktcnt = pktcnt;
2301 }
2302 if (retbytecnt != NULL) {
2303 *retbytecnt = bytecnt;
2304 }
2305 if (fq_cl_paced != NULL) {
2306 *fq_cl_paced = all_paced;
2307 }
2308 if (next_tx_time != NULL) {
2309 *next_tx_time = fq_cl_tx_time;
2310 }
2311 }
2312
2313 void
fq_if_teardown_ifclassq(struct ifclassq * ifq)2314 fq_if_teardown_ifclassq(struct ifclassq *ifq)
2315 {
2316 fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
2317
2318 IFCQ_LOCK_ASSERT_HELD(ifq);
2319 VERIFY(fqs != NULL && ifq->ifcq_type == PKTSCHEDT_FQ_CODEL);
2320 fq_if_destroy(fqs);
2321 ifq->ifcq_disc = NULL;
2322 ifclassq_detach(ifq);
2323 }
2324
2325 static void
fq_export_flowstats(fq_if_t * fqs,fq_t * fq,struct fq_codel_flowstats * flowstat)2326 fq_export_flowstats(fq_if_t *fqs, fq_t *fq,
2327 struct fq_codel_flowstats *flowstat)
2328 {
2329 bzero(flowstat, sizeof(*flowstat));
2330 flowstat->fqst_min_qdelay = (uint32_t)fq->fq_min_qdelay;
2331 flowstat->fqst_bytes = fq->fq_bytes;
2332 flowstat->fqst_flowhash = fq->fq_flowhash;
2333 if (fq->fq_flags & FQF_NEW_FLOW) {
2334 flowstat->fqst_flags |= FQ_FLOWSTATS_NEW_FLOW;
2335 }
2336 if (fq->fq_flags & FQF_OLD_FLOW) {
2337 flowstat->fqst_flags |= FQ_FLOWSTATS_OLD_FLOW;
2338 }
2339 if (fq->fq_flags & FQF_DELAY_HIGH) {
2340 flowstat->fqst_flags |= FQ_FLOWSTATS_DELAY_HIGH;
2341 }
2342 if (fq->fq_flags & FQF_FLOWCTL_ON) {
2343 flowstat->fqst_flags |= FQ_FLOWSTATS_FLOWCTL_ON;
2344 }
2345 if (fqs->fqs_large_flow == fq) {
2346 flowstat->fqst_flags |= FQ_FLOWSTATS_LARGE_FLOW;
2347 }
2348 }
2349
2350 int
fq_if_getqstats_ifclassq(struct ifclassq * ifq,uint8_t gid,u_int32_t qid,struct if_ifclassq_stats * ifqs)2351 fq_if_getqstats_ifclassq(struct ifclassq *ifq, uint8_t gid, u_int32_t qid,
2352 struct if_ifclassq_stats *ifqs)
2353 {
2354 struct fq_codel_classstats *fcls;
2355 fq_if_classq_t *fq_cl;
2356 fq_if_t *fqs;
2357 fq_t *fq = NULL;
2358 fq_if_group_t *grp;
2359 u_int32_t i, flowstat_cnt;
2360
2361 if (qid >= FQ_IF_MAX_CLASSES || gid >= FQ_IF_MAX_GROUPS) {
2362 return EINVAL;
2363 }
2364
2365 fqs = (fq_if_t *)ifq->ifcq_disc;
2366 if (fqs->fqs_classq_groups[gid] == NULL) {
2367 return ENXIO;
2368 }
2369
2370 fcls = &ifqs->ifqs_fq_codel_stats;
2371
2372 fq_cl = &FQS_CLASSQ(fqs, gid, qid);
2373 grp = fq_if_find_grp(fqs, gid);
2374
2375 fcls->fcls_pri = fq_cl->fcl_pri;
2376 fcls->fcls_service_class = fq_cl->fcl_service_class;
2377 fcls->fcls_quantum = fq_cl->fcl_quantum;
2378 fcls->fcls_drr_max = fq_cl->fcl_drr_max;
2379 fcls->fcls_budget = fq_cl->fcl_budget;
2380 fcls->fcls_l4s_target_qdelay = grp->fqg_target_qdelays[FQ_TFC_L4S];
2381 fcls->fcls_target_qdelay = grp->fqg_target_qdelays[FQ_TFC_C];
2382 fcls->fcls_update_interval = grp->fqg_update_intervals[FQ_TFC_C];
2383 fcls->fcls_flow_control = fq_cl->fcl_stat.fcl_flow_control;
2384 fcls->fcls_flow_feedback = fq_cl->fcl_stat.fcl_flow_feedback;
2385 fcls->fcls_dequeue_stall = fq_cl->fcl_stat.fcl_dequeue_stall;
2386 fcls->fcls_drop_overflow = fq_cl->fcl_stat.fcl_drop_overflow;
2387 fcls->fcls_drop_early = fq_cl->fcl_stat.fcl_drop_early;
2388 fcls->fcls_drop_memfailure = fq_cl->fcl_stat.fcl_drop_memfailure;
2389 fcls->fcls_flows_cnt = fq_cl->fcl_stat.fcl_flows_cnt;
2390 fcls->fcls_newflows_cnt = fq_cl->fcl_stat.fcl_newflows_cnt;
2391 fcls->fcls_oldflows_cnt = fq_cl->fcl_stat.fcl_oldflows_cnt;
2392 fcls->fcls_pkt_cnt = fq_cl->fcl_stat.fcl_pkt_cnt;
2393 fcls->fcls_flow_control_fail = fq_cl->fcl_stat.fcl_flow_control_fail;
2394 fcls->fcls_flow_control_fail = fq_cl->fcl_stat.fcl_flow_control_fail;
2395 fcls->fcls_dequeue = fq_cl->fcl_stat.fcl_dequeue;
2396 fcls->fcls_dequeue_bytes = fq_cl->fcl_stat.fcl_dequeue_bytes;
2397 fcls->fcls_byte_cnt = fq_cl->fcl_stat.fcl_byte_cnt;
2398 fcls->fcls_throttle_on = fq_cl->fcl_stat.fcl_throttle_on;
2399 fcls->fcls_throttle_off = fq_cl->fcl_stat.fcl_throttle_off;
2400 fcls->fcls_throttle_drops = fq_cl->fcl_stat.fcl_throttle_drops;
2401 fcls->fcls_dup_rexmts = fq_cl->fcl_stat.fcl_dup_rexmts;
2402 fcls->fcls_pkts_compressible = fq_cl->fcl_stat.fcl_pkts_compressible;
2403 fcls->fcls_pkts_compressed = fq_cl->fcl_stat.fcl_pkts_compressed;
2404 fcls->fcls_min_qdelay = fq_cl->fcl_stat.fcl_min_qdelay;
2405 fcls->fcls_max_qdelay = fq_cl->fcl_stat.fcl_max_qdelay;
2406 fcls->fcls_avg_qdelay = fq_cl->fcl_stat.fcl_avg_qdelay;
2407 fcls->fcls_overwhelming = fq_cl->fcl_stat.fcl_overwhelming;
2408 fcls->fcls_ce_marked = fq_cl->fcl_stat.fcl_ce_marked;
2409 fcls->fcls_ce_reported = fq_cl->fcl_stat.fcl_ce_reported;
2410 fcls->fcls_ce_mark_failures = fq_cl->fcl_stat.fcl_ce_mark_failures;
2411 fcls->fcls_l4s_pkts = fq_cl->fcl_stat.fcl_l4s_pkts;
2412 fcls->fcls_ignore_tx_time = fq_cl->fcl_stat.fcl_ignore_tx_time;
2413 fcls->fcls_paced_pkts = fq_cl->fcl_stat.fcl_paced_pkts;
2414 fcls->fcls_fcl_pacing_needed = fq_cl->fcl_stat.fcl_fcl_pacemaker_needed;
2415
2416 /* Gather per flow stats */
2417 flowstat_cnt = min((fcls->fcls_newflows_cnt +
2418 fcls->fcls_oldflows_cnt), FQ_IF_MAX_FLOWSTATS);
2419 i = 0;
2420 STAILQ_FOREACH(fq, &fq_cl->fcl_new_flows, fq_actlink) {
2421 if (i >= fcls->fcls_newflows_cnt || i >= flowstat_cnt) {
2422 break;
2423 }
2424
2425 /* leave space for a few old flows */
2426 if ((flowstat_cnt - i) < fcls->fcls_oldflows_cnt &&
2427 i >= (FQ_IF_MAX_FLOWSTATS >> 1)) {
2428 break;
2429 }
2430 fq_export_flowstats(fqs, fq, &fcls->fcls_flowstats[i]);
2431 i++;
2432 }
2433 STAILQ_FOREACH(fq, &fq_cl->fcl_old_flows, fq_actlink) {
2434 if (i >= flowstat_cnt) {
2435 break;
2436 }
2437 fq_export_flowstats(fqs, fq, &fcls->fcls_flowstats[i]);
2438 i++;
2439 }
2440 VERIFY(i <= flowstat_cnt);
2441 fcls->fcls_flowstats_cnt = i;
2442 return 0;
2443 }
2444
2445 int
fq_if_create_grp(struct ifclassq * ifcq,uint8_t grp_idx,uint8_t flags)2446 fq_if_create_grp(struct ifclassq *ifcq, uint8_t grp_idx, uint8_t flags)
2447 {
2448 #define _FQ_CLASSQ_INIT(_grp, _s, _q) \
2449 fq_if_classq_init(_grp, FQ_IF_ ## _s ##_INDEX, \
2450 FQ_CODEL_QUANTUM_ ## _s(_q), FQ_CODEL_DRR_MAX(_s), \
2451 MBUF_SC_ ## _s );
2452
2453 fq_if_group_t *grp;
2454 fq_if_t *fqs;
2455 uint32_t quantum, calc_flags = IF_CLASSQ_DEF;
2456 struct ifnet *ifp = ifcq->ifcq_ifp;
2457
2458 VERIFY(grp_idx < FQ_IF_MAX_GROUPS);
2459
2460 fqs = (fq_if_t *)ifcq->ifcq_disc;
2461
2462 if (grp_idx == 0 && fqs->fqs_classq_groups[grp_idx] != NULL) {
2463 grp = fqs->fqs_classq_groups[grp_idx];
2464 goto update;
2465 }
2466
2467 if (fqs->fqs_classq_groups[grp_idx] != NULL) {
2468 return EINVAL;
2469 }
2470
2471 grp = zalloc_flags(fq_if_grp_zone, Z_WAITOK | Z_ZERO);
2472 if (grp == NULL) {
2473 return ENOMEM;
2474 }
2475
2476 fqs->fqs_classq_groups[grp_idx] = grp;
2477 grp->fqg_index = grp_idx;
2478
2479 quantum = fq_if_calc_quantum(ifp);
2480 if (fqs->fqs_flags & FQS_DRIVER_MANAGED) {
2481 _FQ_CLASSQ_INIT(grp, BK, quantum);
2482 _FQ_CLASSQ_INIT(grp, BE, quantum);
2483 _FQ_CLASSQ_INIT(grp, VI, quantum);
2484 _FQ_CLASSQ_INIT(grp, VO, quantum);
2485 } else {
2486 /* SIG shares same INDEX with VI */
2487 _CASSERT(SCIDX_SIG == SCIDX_VI);
2488 _CASSERT(FQ_IF_SIG_INDEX == FQ_IF_VI_INDEX);
2489
2490 _FQ_CLASSQ_INIT(grp, BK_SYS, quantum);
2491 _FQ_CLASSQ_INIT(grp, BK, quantum);
2492 _FQ_CLASSQ_INIT(grp, BE, quantum);
2493 _FQ_CLASSQ_INIT(grp, RD, quantum);
2494 _FQ_CLASSQ_INIT(grp, OAM, quantum);
2495 _FQ_CLASSQ_INIT(grp, AV, quantum);
2496 _FQ_CLASSQ_INIT(grp, RV, quantum);
2497 _FQ_CLASSQ_INIT(grp, VI, quantum);
2498 _FQ_CLASSQ_INIT(grp, VO, quantum);
2499 _FQ_CLASSQ_INIT(grp, CTL, quantum);
2500 }
2501
2502 update:
2503 if (flags & IF_DEFAULT_GRP) {
2504 fq_if_set_grp_combined(ifcq, grp_idx);
2505 grp->fqg_flags |= FQ_IF_DEFAULT_GRP;
2506 } else {
2507 fq_if_set_grp_separated(ifcq, grp_idx);
2508 grp->fqg_flags &= ~FQ_IF_DEFAULT_GRP;
2509 }
2510
2511 calc_flags |= (flags & IF_CLASSQ_LOW_LATENCY);
2512 ifclassq_calc_target_qdelay(ifp, &grp->fqg_target_qdelays[FQ_TFC_C],
2513 calc_flags);
2514 ifclassq_calc_target_qdelay(ifp, &grp->fqg_target_qdelays[FQ_TFC_L4S],
2515 calc_flags | IF_CLASSQ_L4S);
2516
2517 ifclassq_calc_update_interval(&grp->fqg_update_intervals[FQ_TFC_C],
2518 calc_flags);
2519 ifclassq_calc_update_interval(&grp->fqg_update_intervals[FQ_TFC_L4S],
2520 calc_flags | IF_CLASSQ_L4S);
2521
2522 return 0;
2523 #undef _FQ_CLASSQ_INIT
2524 }
2525
2526 fq_if_group_t *
fq_if_find_grp(fq_if_t * fqs,uint8_t grp_idx)2527 fq_if_find_grp(fq_if_t *fqs, uint8_t grp_idx)
2528 {
2529 fq_if_group_t *grp;
2530
2531 IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq);
2532 VERIFY(grp_idx < FQ_IF_MAX_GROUPS);
2533
2534 grp = fqs->fqs_classq_groups[grp_idx];
2535 VERIFY(grp != NULL);
2536
2537 return grp;
2538 }
2539
2540 static void
fq_if_purge_grp(fq_if_t * fqs,fq_if_group_t * grp)2541 fq_if_purge_grp(fq_if_t *fqs, fq_if_group_t *grp)
2542 {
2543 for (uint8_t i = 0; i < FQ_IF_MAX_CLASSES; i++) {
2544 fq_if_purge_classq(fqs, &grp->fqg_classq[i]);
2545 }
2546
2547 bzero(&grp->fqg_bitmaps, sizeof(grp->fqg_bitmaps));
2548 grp->fqg_len = 0;
2549 grp->fqg_bytes = 0;
2550 fq_if_set_grp_separated(fqs->fqs_ifq, grp->fqg_index);
2551 }
2552
2553 void
fq_if_destroy_grps(fq_if_t * fqs)2554 fq_if_destroy_grps(fq_if_t *fqs)
2555 {
2556 fq_if_group_t *__single grp;
2557
2558 IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq);
2559
2560 for (uint8_t grp_idx = 0; grp_idx < FQ_IF_MAX_GROUPS; grp_idx++) {
2561 if (fqs->fqs_classq_groups[grp_idx] == NULL) {
2562 continue;
2563 }
2564
2565 grp = fq_if_find_grp(fqs, grp_idx);
2566 fq_if_purge_grp(fqs, grp);
2567 zfree(fq_if_grp_zone, grp);
2568 fqs->fqs_classq_groups[grp_idx] = NULL;
2569 }
2570 }
2571
2572 static inline boolean_t
fq_if_is_grp_combined(fq_if_t * fqs,uint8_t grp_idx)2573 fq_if_is_grp_combined(fq_if_t *fqs, uint8_t grp_idx)
2574 {
2575 return pktsched_bit_tst(grp_idx, &fqs->fqs_combined_grp_bitmap);
2576 }
2577
2578 void
fq_if_set_grp_combined(struct ifclassq * ifcq,uint8_t grp_idx)2579 fq_if_set_grp_combined(struct ifclassq *ifcq, uint8_t grp_idx)
2580 {
2581 fq_if_t *fqs;
2582 fq_if_group_t *grp;
2583
2584 IFCQ_LOCK_ASSERT_HELD(ifcq);
2585
2586 fqs = (fq_if_t *)ifcq->ifcq_disc;
2587 grp = fq_if_find_grp(fqs, grp_idx);
2588
2589 if (fq_if_is_grp_combined(fqs, grp_idx)) {
2590 return;
2591 }
2592
2593 /*
2594 * We keep the current fq_deficit and fcl_budget when combining a group.
2595 * That might disrupt the AQM but only for a moment.
2596 */
2597 pktsched_bit_set(grp_idx, &fqs->fqs_combined_grp_bitmap);
2598 TAILQ_INSERT_TAIL(&fqs->fqs_combined_grp_list, grp, fqg_grp_link);
2599 }
2600
2601 void
fq_if_set_grp_separated(struct ifclassq * ifcq,uint8_t grp_idx)2602 fq_if_set_grp_separated(struct ifclassq *ifcq, uint8_t grp_idx)
2603 {
2604 fq_if_t *fqs;
2605 fq_if_group_t *grp;
2606
2607 IFCQ_LOCK_ASSERT_HELD(ifcq);
2608
2609 fqs = (fq_if_t *)ifcq->ifcq_disc;
2610 grp = fq_if_find_grp(fqs, grp_idx);
2611
2612 if (!fq_if_is_grp_combined(fqs, grp_idx)) {
2613 return;
2614 }
2615
2616 pktsched_bit_clr(grp_idx, &fqs->fqs_combined_grp_bitmap);
2617 TAILQ_REMOVE(&fqs->fqs_combined_grp_list, grp, fqg_grp_link);
2618 }
2619