1 /*
2 * Copyright (c) 2016-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/cdefs.h>
30 #include <sys/param.h>
31 #include <sys/mbuf.h>
32 #include <sys/socket.h>
33 #include <sys/sockio.h>
34 #include <sys/systm.h>
35 #include <sys/sysctl.h>
36 #include <sys/syslog.h>
37 #include <sys/proc.h>
38 #include <sys/errno.h>
39 #include <sys/kernel.h>
40 #include <sys/kauth.h>
41 #include <sys/sdt.h>
42 #include <kern/zalloc.h>
43 #include <netinet/in.h>
44
45 #include <net/classq/classq.h>
46 #include <net/classq/if_classq.h>
47 #include <net/pktsched/pktsched.h>
48 #include <net/pktsched/pktsched_fq_codel.h>
49 #include <net/classq/classq_fq_codel.h>
50
51 #include <netinet/tcp_var.h>
52
53 static uint32_t flowq_size; /* size of flowq */
54 static struct mcache *flowq_cache = NULL; /* mcache for flowq */
55
56 #define FQ_ZONE_MAX (32 * 1024) /* across all interfaces */
57
58 #define DTYPE_NODROP 0 /* no drop */
59 #define DTYPE_FORCED 1 /* a "forced" drop */
60 #define DTYPE_EARLY 2 /* an "unforced" (early) drop */
61
62 void
fq_codel_init(void)63 fq_codel_init(void)
64 {
65 if (flowq_cache != NULL) {
66 return;
67 }
68
69 flowq_size = sizeof(fq_t);
70 flowq_cache = mcache_create("fq.flowq", flowq_size, sizeof(uint64_t),
71 0, MCR_SLEEP);
72 if (flowq_cache == NULL) {
73 panic("%s: failed to allocate flowq_cache", __func__);
74 /* NOTREACHED */
75 __builtin_unreachable();
76 }
77 }
78
79 void
fq_codel_reap_caches(boolean_t purge)80 fq_codel_reap_caches(boolean_t purge)
81 {
82 mcache_reap_now(flowq_cache, purge);
83 }
84
85 fq_t *
fq_alloc(classq_pkt_type_t ptype)86 fq_alloc(classq_pkt_type_t ptype)
87 {
88 fq_t *fq = NULL;
89 fq = mcache_alloc(flowq_cache, MCR_SLEEP);
90 if (fq == NULL) {
91 log(LOG_ERR, "%s: unable to allocate from flowq_cache\n", __func__);
92 return NULL;
93 }
94
95 bzero(fq, flowq_size);
96 fq->fq_ptype = ptype;
97 if (ptype == QP_MBUF) {
98 MBUFQ_INIT(&fq->fq_mbufq);
99 }
100 #if SKYWALK
101 else {
102 VERIFY(ptype == QP_PACKET);
103 KPKTQ_INIT(&fq->fq_kpktq);
104 }
105 #endif /* SKYWALK */
106 CLASSQ_PKT_INIT(&fq->fq_dq_head);
107 CLASSQ_PKT_INIT(&fq->fq_dq_tail);
108 fq->fq_in_dqlist = false;
109 return fq;
110 }
111
112 void
fq_destroy(fq_t * fq)113 fq_destroy(fq_t *fq)
114 {
115 VERIFY(fq->fq_flags & FQF_DESTROYED);
116 VERIFY(fq_empty(fq));
117 VERIFY(!(fq->fq_flags & (FQF_NEW_FLOW | FQF_OLD_FLOW)));
118 VERIFY(fq->fq_bytes == 0);
119 mcache_free(flowq_cache, fq);
120 }
121
122 static inline void
fq_detect_dequeue_stall(fq_if_t * fqs,fq_t * flowq,fq_if_classq_t * fq_cl,u_int64_t * now)123 fq_detect_dequeue_stall(fq_if_t *fqs, fq_t *flowq, fq_if_classq_t *fq_cl,
124 u_int64_t *now)
125 {
126 u_int64_t maxgetqtime;
127 if (FQ_IS_DELAYHIGH(flowq) || flowq->fq_getqtime == 0 ||
128 fq_empty(flowq) ||
129 flowq->fq_bytes < FQ_MIN_FC_THRESHOLD_BYTES) {
130 return;
131 }
132 maxgetqtime = flowq->fq_getqtime + fqs->fqs_update_interval;
133 if ((*now) > maxgetqtime) {
134 /*
135 * there was no dequeue in an update interval worth of
136 * time. It means that the queue is stalled.
137 */
138 FQ_SET_DELAY_HIGH(flowq);
139 fq_cl->fcl_stat.fcl_dequeue_stall++;
140 os_log_error(OS_LOG_DEFAULT, "%s: dequeue stall num: %d, "
141 "scidx: %d, flow: 0x%x, iface: %s", __func__,
142 fq_cl->fcl_stat.fcl_dequeue_stall, flowq->fq_sc_index,
143 flowq->fq_flowhash, if_name(fqs->fqs_ifq->ifcq_ifp));
144 }
145 }
146
147 void
fq_head_drop(fq_if_t * fqs,fq_t * fq)148 fq_head_drop(fq_if_t *fqs, fq_t *fq)
149 {
150 pktsched_pkt_t pkt;
151 volatile uint32_t *pkt_flags;
152 uint64_t *pkt_timestamp;
153 struct ifclassq *ifq = fqs->fqs_ifq;
154
155 _PKTSCHED_PKT_INIT(&pkt);
156 fq_getq_flow_internal(fqs, fq, &pkt);
157 if (pkt.pktsched_pkt_mbuf == NULL) {
158 return;
159 }
160
161 pktsched_get_pkt_vars(&pkt, &pkt_flags, &pkt_timestamp, NULL, NULL,
162 NULL, NULL);
163
164 *pkt_timestamp = 0;
165 switch (pkt.pktsched_ptype) {
166 case QP_MBUF:
167 *pkt_flags &= ~PKTF_PRIV_GUARDED;
168 break;
169 #if SKYWALK
170 case QP_PACKET:
171 /* sanity check */
172 ASSERT((*pkt_flags & ~PKT_F_COMMON_MASK) == 0);
173 break;
174 #endif /* SKYWALK */
175 default:
176 VERIFY(0);
177 /* NOTREACHED */
178 __builtin_unreachable();
179 }
180
181 IFCQ_DROP_ADD(ifq, 1, pktsched_get_pkt_len(&pkt));
182 IFCQ_CONVERT_LOCK(ifq);
183 pktsched_free_pkt(&pkt);
184 }
185
186
187 static int
fq_compressor(fq_if_t * fqs,fq_t * fq,fq_if_classq_t * fq_cl,pktsched_pkt_t * pkt)188 fq_compressor(fq_if_t *fqs, fq_t *fq, fq_if_classq_t *fq_cl,
189 pktsched_pkt_t *pkt)
190 {
191 classq_pkt_type_t ptype = fq->fq_ptype;
192 uint32_t comp_gencnt = 0;
193 uint64_t *pkt_timestamp;
194 uint64_t old_timestamp = 0;
195 uint32_t old_pktlen = 0;
196 struct ifclassq *ifq = fqs->fqs_ifq;
197
198 if (__improbable(!tcp_do_ack_compression)) {
199 return 0;
200 }
201
202 pktsched_get_pkt_vars(pkt, NULL, &pkt_timestamp, NULL, NULL, NULL,
203 &comp_gencnt);
204
205 if (comp_gencnt == 0) {
206 return 0;
207 }
208
209 fq_cl->fcl_stat.fcl_pkts_compressible++;
210
211 if (fq_empty(fq)) {
212 return 0;
213 }
214
215 if (ptype == QP_MBUF) {
216 struct mbuf *m = MBUFQ_LAST(&fq->fq_mbufq);
217
218 if (comp_gencnt != m->m_pkthdr.comp_gencnt) {
219 return 0;
220 }
221
222 /* If we got until here, we should merge/replace the segment */
223 MBUFQ_REMOVE(&fq->fq_mbufq, m);
224 old_pktlen = m_pktlen(m);
225 old_timestamp = m->m_pkthdr.pkt_timestamp;
226
227 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
228 m_freem(m);
229 }
230 #if SKYWALK
231 else {
232 struct __kern_packet *kpkt = KPKTQ_LAST(&fq->fq_kpktq);
233
234 if (comp_gencnt != kpkt->pkt_comp_gencnt) {
235 return 0;
236 }
237
238 /* If we got until here, we should merge/replace the segment */
239 KPKTQ_REMOVE(&fq->fq_kpktq, kpkt);
240 old_pktlen = kpkt->pkt_length;
241 old_timestamp = kpkt->pkt_timestamp;
242
243 IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
244 pp_free_packet(*(struct kern_pbufpool **)(uintptr_t)&
245 (((struct __kern_quantum *)kpkt)->qum_pp),
246 (uint64_t)kpkt);
247 }
248 #endif /* SKYWALK */
249
250 fq->fq_bytes -= old_pktlen;
251 fq_cl->fcl_stat.fcl_byte_cnt -= old_pktlen;
252 fq_cl->fcl_stat.fcl_pkt_cnt--;
253 IFCQ_DEC_LEN(ifq);
254 IFCQ_DEC_BYTES(ifq, old_pktlen);
255
256 *pkt_timestamp = old_timestamp;
257
258 return CLASSQEQ_COMPRESSED;
259 }
260
261 int
fq_addq(fq_if_t * fqs,pktsched_pkt_t * pkt,fq_if_classq_t * fq_cl)262 fq_addq(fq_if_t *fqs, pktsched_pkt_t *pkt, fq_if_classq_t *fq_cl)
263 {
264 int droptype = DTYPE_NODROP, fc_adv = 0, ret = CLASSQEQ_SUCCESS;
265 u_int64_t now;
266 fq_t *fq = NULL;
267 uint64_t *pkt_timestamp;
268 volatile uint32_t *pkt_flags;
269 uint32_t pkt_flowid, cnt;
270 uint8_t pkt_proto, pkt_flowsrc;
271
272 cnt = pkt->pktsched_pcnt;
273 pktsched_get_pkt_vars(pkt, &pkt_flags, &pkt_timestamp, &pkt_flowid,
274 &pkt_flowsrc, &pkt_proto, NULL);
275
276 /*
277 * XXX Not walking the chain to set this flag on every packet.
278 * This flag is only used for debugging. Nothing is affected if it's
279 * not set.
280 */
281 switch (pkt->pktsched_ptype) {
282 case QP_MBUF:
283 /* See comments in <rdar://problem/14040693> */
284 VERIFY(!(*pkt_flags & PKTF_PRIV_GUARDED));
285 *pkt_flags |= PKTF_PRIV_GUARDED;
286 break;
287 #if SKYWALK
288 case QP_PACKET:
289 /* sanity check */
290 ASSERT((*pkt_flags & ~PKT_F_COMMON_MASK) == 0);
291 break;
292 #endif /* SKYWALK */
293 default:
294 VERIFY(0);
295 /* NOTREACHED */
296 __builtin_unreachable();
297 }
298
299 /*
300 * Timestamps for every packet must be set prior to entering this path.
301 */
302 now = *pkt_timestamp;
303 ASSERT(now > 0);
304
305 /* find the flowq for this packet */
306 fq = fq_if_hash_pkt(fqs, pkt_flowid, pktsched_get_pkt_svc(pkt),
307 now, TRUE, pkt->pktsched_ptype);
308 if (__improbable(fq == NULL)) {
309 DTRACE_IP1(memfail__drop, fq_if_t *, fqs);
310 /* drop the packet if we could not allocate a flow queue */
311 fq_cl->fcl_stat.fcl_drop_memfailure += cnt;
312 return CLASSQEQ_DROP;
313 }
314 VERIFY(fq->fq_ptype == pkt->pktsched_ptype);
315
316 fq_detect_dequeue_stall(fqs, fq, fq_cl, &now);
317
318 if (__improbable(FQ_IS_DELAYHIGH(fq) || FQ_IS_OVERWHELMING(fq))) {
319 if ((fq->fq_flags & FQF_FLOWCTL_CAPABLE) &&
320 (*pkt_flags & PKTF_FLOW_ADV)) {
321 fc_adv = 1;
322 /*
323 * If the flow is suspended or it is not
324 * TCP/QUIC, drop the chain.
325 */
326 if ((pkt_proto != IPPROTO_TCP) &&
327 (pkt_proto != IPPROTO_QUIC)) {
328 droptype = DTYPE_EARLY;
329 fq_cl->fcl_stat.fcl_drop_early += cnt;
330 IFCQ_DROP_ADD(fqs->fqs_ifq, cnt, pktsched_get_pkt_len(pkt));
331 }
332 DTRACE_IP6(flow__adv, fq_if_t *, fqs,
333 fq_if_classq_t *, fq_cl, fq_t *, fq,
334 int, droptype, pktsched_pkt_t *, pkt,
335 uint32_t, cnt);
336 } else {
337 /*
338 * Need to drop packets to make room for the new
339 * ones. Try to drop from the head of the queue
340 * instead of the latest packets.
341 */
342 if (!fq_empty(fq)) {
343 uint32_t i;
344
345 for (i = 0; i < cnt; i++) {
346 fq_head_drop(fqs, fq);
347 }
348 droptype = DTYPE_NODROP;
349 } else {
350 droptype = DTYPE_EARLY;
351 }
352 fq_cl->fcl_stat.fcl_drop_early += cnt;
353
354 DTRACE_IP6(no__flow__adv, fq_if_t *, fqs,
355 fq_if_classq_t *, fq_cl, fq_t *, fq,
356 int, droptype, pktsched_pkt_t *, pkt,
357 uint32_t, cnt);
358 }
359 }
360
361 /* Set the return code correctly */
362 if (__improbable(fc_adv == 1 && droptype != DTYPE_FORCED)) {
363 if (fq_if_add_fcentry(fqs, pkt, pkt_flowsrc, fq, fq_cl)) {
364 fq->fq_flags |= FQF_FLOWCTL_ON;
365 /* deliver flow control advisory error */
366 if (droptype == DTYPE_NODROP) {
367 ret = CLASSQEQ_SUCCESS_FC;
368 } else {
369 /* dropped due to flow control */
370 ret = CLASSQEQ_DROP_FC;
371 }
372 } else {
373 /*
374 * if we could not flow control the flow, it is
375 * better to drop
376 */
377 droptype = DTYPE_FORCED;
378 ret = CLASSQEQ_DROP_FC;
379 fq_cl->fcl_stat.fcl_flow_control_fail++;
380 }
381 DTRACE_IP3(fc__ret, fq_if_t *, fqs, int, droptype, int, ret);
382 }
383
384 /*
385 * If the queue length hits the queue limit, drop a chain with the
386 * same number of packets from the front of the queue for a flow with
387 * maximum number of bytes. This will penalize heavy and unresponsive
388 * flows. It will also avoid a tail drop.
389 */
390 if (__improbable(droptype == DTYPE_NODROP &&
391 fq_if_at_drop_limit(fqs))) {
392 uint32_t i;
393
394 if (fqs->fqs_large_flow == fq) {
395 /*
396 * Drop from the head of the current fq. Since a
397 * new packet will be added to the tail, it is ok
398 * to leave fq in place.
399 */
400 DTRACE_IP5(large__flow, fq_if_t *, fqs,
401 fq_if_classq_t *, fq_cl, fq_t *, fq,
402 pktsched_pkt_t *, pkt, uint32_t, cnt);
403
404 for (i = 0; i < cnt; i++) {
405 fq_head_drop(fqs, fq);
406 }
407 fq_cl->fcl_stat.fcl_drop_overflow += cnt;
408
409 /*
410 * TCP and QUIC will react to the loss of those head dropped pkts
411 * and adjust send rate.
412 */
413 if ((fq->fq_flags & FQF_FLOWCTL_CAPABLE) &&
414 (*pkt_flags & PKTF_FLOW_ADV) &&
415 (pkt_proto != IPPROTO_TCP) &&
416 (pkt_proto != IPPROTO_QUIC)) {
417 if (fq_if_add_fcentry(fqs, pkt, pkt_flowsrc, fq, fq_cl)) {
418 fq->fq_flags |= FQF_FLOWCTL_ON;
419 FQ_SET_OVERWHELMING(fq);
420 fq_cl->fcl_stat.fcl_overwhelming++;
421 /* deliver flow control advisory error */
422 ret = CLASSQEQ_SUCCESS_FC;
423 }
424 }
425 } else {
426 if (fqs->fqs_large_flow == NULL) {
427 droptype = DTYPE_FORCED;
428 fq_cl->fcl_stat.fcl_drop_overflow += cnt;
429 ret = CLASSQEQ_DROP;
430
431 DTRACE_IP5(no__large__flow, fq_if_t *, fqs,
432 fq_if_classq_t *, fq_cl, fq_t *, fq,
433 pktsched_pkt_t *, pkt, uint32_t, cnt);
434
435 /*
436 * if this fq was freshly created and there
437 * is nothing to enqueue, free it
438 */
439 if (fq_empty(fq) && !(fq->fq_flags &
440 (FQF_NEW_FLOW | FQF_OLD_FLOW))) {
441 fq_if_destroy_flow(fqs, fq_cl, fq, true);
442 fq = NULL;
443 }
444 } else {
445 DTRACE_IP5(different__large__flow,
446 fq_if_t *, fqs, fq_if_classq_t *, fq_cl,
447 fq_t *, fq, pktsched_pkt_t *, pkt,
448 uint32_t, cnt);
449
450 for (i = 0; i < cnt; i++) {
451 fq_if_drop_packet(fqs);
452 }
453 }
454 }
455 }
456
457 if (__probable(droptype == DTYPE_NODROP)) {
458 uint32_t chain_len = pktsched_get_pkt_len(pkt);
459
460 /*
461 * We do not compress if we are enqueuing a chain.
462 * Traversing the chain to look for acks would defeat the
463 * purpose of batch enqueueing.
464 */
465 if (cnt == 1) {
466 ret = fq_compressor(fqs, fq, fq_cl, pkt);
467 if (ret != CLASSQEQ_COMPRESSED) {
468 ret = CLASSQEQ_SUCCESS;
469 } else {
470 fq_cl->fcl_stat.fcl_pkts_compressed++;
471 }
472 }
473 DTRACE_IP5(fq_enqueue, fq_if_t *, fqs, fq_if_classq_t *, fq_cl,
474 fq_t *, fq, pktsched_pkt_t *, pkt, uint32_t, cnt);
475 fq_enqueue(fq, pkt->pktsched_pkt, pkt->pktsched_tail, cnt);
476
477 fq->fq_bytes += chain_len;
478 fq_cl->fcl_stat.fcl_byte_cnt += chain_len;
479 fq_cl->fcl_stat.fcl_pkt_cnt += cnt;
480
481 /*
482 * check if this queue will qualify to be the next
483 * victim queue
484 */
485 fq_if_is_flow_heavy(fqs, fq);
486 } else {
487 DTRACE_IP3(fq_drop, fq_if_t *, fqs, int, droptype, int, ret);
488 return (ret != CLASSQEQ_SUCCESS) ? ret : CLASSQEQ_DROP;
489 }
490
491 /*
492 * If the queue is not currently active, add it to the end of new
493 * flows list for that service class.
494 */
495 if ((fq->fq_flags & (FQF_NEW_FLOW | FQF_OLD_FLOW)) == 0) {
496 VERIFY(STAILQ_NEXT(fq, fq_actlink) == NULL);
497 STAILQ_INSERT_TAIL(&fq_cl->fcl_new_flows, fq, fq_actlink);
498 fq->fq_flags |= FQF_NEW_FLOW;
499
500 fq_cl->fcl_stat.fcl_newflows_cnt++;
501
502 fq->fq_deficit = fq_cl->fcl_quantum;
503 }
504 return ret;
505 }
506
507 void
fq_getq_flow_internal(fq_if_t * fqs,fq_t * fq,pktsched_pkt_t * pkt)508 fq_getq_flow_internal(fq_if_t *fqs, fq_t *fq, pktsched_pkt_t *pkt)
509 {
510 classq_pkt_t p = CLASSQ_PKT_INITIALIZER(p);
511 uint32_t plen;
512 fq_if_classq_t *fq_cl;
513 struct ifclassq *ifq = fqs->fqs_ifq;
514
515 fq_dequeue(fq, &p);
516 if (p.cp_ptype == QP_INVALID) {
517 VERIFY(p.cp_mbuf == NULL);
518 return;
519 }
520
521 pktsched_pkt_encap(pkt, &p);
522 plen = pktsched_get_pkt_len(pkt);
523
524 VERIFY(fq->fq_bytes >= plen);
525 fq->fq_bytes -= plen;
526
527 fq_cl = &fqs->fqs_classq[fq->fq_sc_index];
528 fq_cl->fcl_stat.fcl_byte_cnt -= plen;
529 fq_cl->fcl_stat.fcl_pkt_cnt--;
530 IFCQ_DEC_LEN(ifq);
531 IFCQ_DEC_BYTES(ifq, plen);
532
533 /* Reset getqtime so that we don't count idle times */
534 if (fq_empty(fq)) {
535 fq->fq_getqtime = 0;
536 }
537 }
538
539 void
fq_getq_flow(fq_if_t * fqs,fq_t * fq,pktsched_pkt_t * pkt)540 fq_getq_flow(fq_if_t *fqs, fq_t *fq, pktsched_pkt_t *pkt)
541 {
542 fq_if_classq_t *fq_cl;
543 u_int64_t now;
544 int64_t qdelay = 0;
545 struct timespec now_ts;
546 volatile uint32_t *pkt_flags;
547 uint64_t *pkt_timestamp;
548
549 fq_getq_flow_internal(fqs, fq, pkt);
550 if (pkt->pktsched_ptype == QP_INVALID) {
551 VERIFY(pkt->pktsched_pkt_mbuf == NULL);
552 return;
553 }
554
555 pktsched_get_pkt_vars(pkt, &pkt_flags, &pkt_timestamp, NULL, NULL,
556 NULL, NULL);
557
558 nanouptime(&now_ts);
559 now = (now_ts.tv_sec * NSEC_PER_SEC) + now_ts.tv_nsec;
560
561 /* this will compute qdelay in nanoseconds */
562 if (now > *pkt_timestamp) {
563 qdelay = now - *pkt_timestamp;
564 }
565 fq_cl = &fqs->fqs_classq[fq->fq_sc_index];
566
567 if (fq->fq_min_qdelay == 0 ||
568 (qdelay > 0 && (u_int64_t)qdelay < fq->fq_min_qdelay)) {
569 fq->fq_min_qdelay = qdelay;
570 }
571
572 /* Update min/max/avg qdelay for the respective class */
573 if (fq_cl->fcl_stat.fcl_min_qdelay == 0 ||
574 (qdelay > 0 && (u_int64_t)qdelay < fq_cl->fcl_stat.fcl_min_qdelay)) {
575 fq_cl->fcl_stat.fcl_min_qdelay = qdelay;
576 }
577
578 if (fq_cl->fcl_stat.fcl_max_qdelay == 0 ||
579 (qdelay > 0 && (u_int64_t)qdelay > fq_cl->fcl_stat.fcl_max_qdelay)) {
580 fq_cl->fcl_stat.fcl_max_qdelay = qdelay;
581 }
582
583 uint64_t num_dequeues = fq_cl->fcl_stat.fcl_dequeue;
584
585 if (num_dequeues == 0) {
586 fq_cl->fcl_stat.fcl_avg_qdelay = qdelay;
587 } else if (qdelay > 0) {
588 uint64_t res = 0;
589 if (os_add_overflow(num_dequeues, 1, &res)) {
590 /* Reset the dequeue num and dequeue bytes */
591 fq_cl->fcl_stat.fcl_dequeue = num_dequeues = 0;
592 fq_cl->fcl_stat.fcl_dequeue_bytes = 0;
593 fq_cl->fcl_stat.fcl_avg_qdelay = qdelay;
594 os_log_info(OS_LOG_DEFAULT, "%s: dequeue num overflow, "
595 "flow: 0x%x, iface: %s", __func__, fq->fq_flowhash,
596 if_name(fqs->fqs_ifq->ifcq_ifp));
597 } else {
598 uint64_t product = 0;
599 if (os_mul_overflow(fq_cl->fcl_stat.fcl_avg_qdelay,
600 num_dequeues, &product) || os_add_overflow(product, qdelay, &res)) {
601 fq_cl->fcl_stat.fcl_avg_qdelay = qdelay;
602 } else {
603 fq_cl->fcl_stat.fcl_avg_qdelay = res /
604 (num_dequeues + 1);
605 }
606 }
607 }
608
609 if (now >= fq->fq_updatetime) {
610 if (fq->fq_min_qdelay > fqs->fqs_target_qdelay) {
611 if (!FQ_IS_DELAYHIGH(fq)) {
612 FQ_SET_DELAY_HIGH(fq);
613 os_log_error(OS_LOG_DEFAULT,
614 "%s: high delay idx: %d, %llu, flow: 0x%x, "
615 "iface: %s", __func__, fq->fq_sc_index,
616 fq->fq_min_qdelay, fq->fq_flowhash,
617 if_name(fqs->fqs_ifq->ifcq_ifp));
618 }
619 } else {
620 FQ_CLEAR_DELAY_HIGH(fq);
621 }
622 /* Reset measured queue delay and update time */
623 fq->fq_updatetime = now + fqs->fqs_update_interval;
624 fq->fq_min_qdelay = 0;
625 }
626
627 if (fqs->fqs_large_flow != fq || !fq_if_almost_at_drop_limit(fqs)) {
628 FQ_CLEAR_OVERWHELMING(fq);
629 }
630 if (!FQ_IS_DELAYHIGH(fq) || fq_empty(fq)) {
631 FQ_CLEAR_DELAY_HIGH(fq);
632 }
633
634 if ((fq->fq_flags & FQF_FLOWCTL_ON) &&
635 !FQ_IS_DELAYHIGH(fq) && !FQ_IS_OVERWHELMING(fq)) {
636 fq_if_flow_feedback(fqs, fq, fq_cl);
637 }
638
639 if (fq_empty(fq)) {
640 /* Reset getqtime so that we don't count idle times */
641 fq->fq_getqtime = 0;
642 } else {
643 fq->fq_getqtime = now;
644 }
645 fq_if_is_flow_heavy(fqs, fq);
646
647 *pkt_timestamp = 0;
648 switch (pkt->pktsched_ptype) {
649 case QP_MBUF:
650 *pkt_flags &= ~PKTF_PRIV_GUARDED;
651 break;
652 #if SKYWALK
653 case QP_PACKET:
654 /* sanity check */
655 ASSERT((*pkt_flags & ~PKT_F_COMMON_MASK) == 0);
656 break;
657 #endif /* SKYWALK */
658 default:
659 VERIFY(0);
660 /* NOTREACHED */
661 __builtin_unreachable();
662 }
663 }
664