xref: /xnu-8020.101.4/bsd/net/classq/classq_fq_codel.c (revision e7776783b89a353188416a9a346c6cdb4928faad)
1 /*
2  * Copyright (c) 2016-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <sys/cdefs.h>
30 #include <sys/param.h>
31 #include <sys/mbuf.h>
32 #include <sys/socket.h>
33 #include <sys/sockio.h>
34 #include <sys/systm.h>
35 #include <sys/sysctl.h>
36 #include <sys/syslog.h>
37 #include <sys/proc.h>
38 #include <sys/errno.h>
39 #include <sys/kernel.h>
40 #include <sys/kauth.h>
41 #include <sys/sdt.h>
42 #include <kern/zalloc.h>
43 #include <netinet/in.h>
44 
45 #include <net/classq/classq.h>
46 #include <net/classq/if_classq.h>
47 #include <net/pktsched/pktsched.h>
48 #include <net/pktsched/pktsched_fq_codel.h>
49 #include <net/classq/classq_fq_codel.h>
50 
51 #include <netinet/tcp_var.h>
52 
53 static uint32_t flowq_size;                     /* size of flowq */
54 static struct mcache *flowq_cache = NULL;       /* mcache for flowq */
55 
56 #define FQ_ZONE_MAX     (32 * 1024)     /* across all interfaces */
57 
58 #define DTYPE_NODROP    0       /* no drop */
59 #define DTYPE_FORCED    1       /* a "forced" drop */
60 #define DTYPE_EARLY     2       /* an "unforced" (early) drop */
61 
62 void
fq_codel_init(void)63 fq_codel_init(void)
64 {
65 	if (flowq_cache != NULL) {
66 		return;
67 	}
68 
69 	flowq_size = sizeof(fq_t);
70 	flowq_cache = mcache_create("fq.flowq", flowq_size, sizeof(uint64_t),
71 	    0, MCR_SLEEP);
72 	if (flowq_cache == NULL) {
73 		panic("%s: failed to allocate flowq_cache", __func__);
74 		/* NOTREACHED */
75 		__builtin_unreachable();
76 	}
77 }
78 
79 void
fq_codel_reap_caches(boolean_t purge)80 fq_codel_reap_caches(boolean_t purge)
81 {
82 	mcache_reap_now(flowq_cache, purge);
83 }
84 
85 fq_t *
fq_alloc(classq_pkt_type_t ptype)86 fq_alloc(classq_pkt_type_t ptype)
87 {
88 	fq_t *fq = NULL;
89 	fq = mcache_alloc(flowq_cache, MCR_SLEEP);
90 	if (fq == NULL) {
91 		log(LOG_ERR, "%s: unable to allocate from flowq_cache\n", __func__);
92 		return NULL;
93 	}
94 
95 	bzero(fq, flowq_size);
96 	fq->fq_ptype = ptype;
97 	if (ptype == QP_MBUF) {
98 		MBUFQ_INIT(&fq->fq_mbufq);
99 	}
100 #if SKYWALK
101 	else {
102 		VERIFY(ptype == QP_PACKET);
103 		KPKTQ_INIT(&fq->fq_kpktq);
104 	}
105 #endif /* SKYWALK */
106 	CLASSQ_PKT_INIT(&fq->fq_dq_head);
107 	CLASSQ_PKT_INIT(&fq->fq_dq_tail);
108 	fq->fq_in_dqlist = false;
109 	return fq;
110 }
111 
112 void
fq_destroy(fq_t * fq)113 fq_destroy(fq_t *fq)
114 {
115 	VERIFY(fq->fq_flags & FQF_DESTROYED);
116 	VERIFY(fq_empty(fq));
117 	VERIFY(!(fq->fq_flags & (FQF_NEW_FLOW | FQF_OLD_FLOW)));
118 	VERIFY(fq->fq_bytes == 0);
119 	mcache_free(flowq_cache, fq);
120 }
121 
122 static inline void
fq_detect_dequeue_stall(fq_if_t * fqs,fq_t * flowq,fq_if_classq_t * fq_cl,u_int64_t * now)123 fq_detect_dequeue_stall(fq_if_t *fqs, fq_t *flowq, fq_if_classq_t *fq_cl,
124     u_int64_t *now)
125 {
126 	u_int64_t maxgetqtime;
127 	if (FQ_IS_DELAYHIGH(flowq) || flowq->fq_getqtime == 0 ||
128 	    fq_empty(flowq) ||
129 	    flowq->fq_bytes < FQ_MIN_FC_THRESHOLD_BYTES) {
130 		return;
131 	}
132 	maxgetqtime = flowq->fq_getqtime + fqs->fqs_update_interval;
133 	if ((*now) > maxgetqtime) {
134 		/*
135 		 * there was no dequeue in an update interval worth of
136 		 * time. It means that the queue is stalled.
137 		 */
138 		FQ_SET_DELAY_HIGH(flowq);
139 		fq_cl->fcl_stat.fcl_dequeue_stall++;
140 		os_log_error(OS_LOG_DEFAULT, "%s: dequeue stall num: %d, "
141 		    "scidx: %d, flow: 0x%x, iface: %s", __func__,
142 		    fq_cl->fcl_stat.fcl_dequeue_stall, flowq->fq_sc_index,
143 		    flowq->fq_flowhash, if_name(fqs->fqs_ifq->ifcq_ifp));
144 	}
145 }
146 
147 void
fq_head_drop(fq_if_t * fqs,fq_t * fq)148 fq_head_drop(fq_if_t *fqs, fq_t *fq)
149 {
150 	pktsched_pkt_t pkt;
151 	volatile uint32_t *pkt_flags;
152 	uint64_t *pkt_timestamp;
153 	struct ifclassq *ifq = fqs->fqs_ifq;
154 
155 	_PKTSCHED_PKT_INIT(&pkt);
156 	fq_getq_flow_internal(fqs, fq, &pkt);
157 	if (pkt.pktsched_pkt_mbuf == NULL) {
158 		return;
159 	}
160 
161 	pktsched_get_pkt_vars(&pkt, &pkt_flags, &pkt_timestamp, NULL, NULL,
162 	    NULL, NULL);
163 
164 	*pkt_timestamp = 0;
165 	switch (pkt.pktsched_ptype) {
166 	case QP_MBUF:
167 		*pkt_flags &= ~PKTF_PRIV_GUARDED;
168 		break;
169 #if SKYWALK
170 	case QP_PACKET:
171 		/* sanity check */
172 		ASSERT((*pkt_flags & ~PKT_F_COMMON_MASK) == 0);
173 		break;
174 #endif /* SKYWALK */
175 	default:
176 		VERIFY(0);
177 		/* NOTREACHED */
178 		__builtin_unreachable();
179 	}
180 
181 	IFCQ_DROP_ADD(ifq, 1, pktsched_get_pkt_len(&pkt));
182 	IFCQ_CONVERT_LOCK(ifq);
183 	pktsched_free_pkt(&pkt);
184 }
185 
186 
187 static int
fq_compressor(fq_if_t * fqs,fq_t * fq,fq_if_classq_t * fq_cl,pktsched_pkt_t * pkt)188 fq_compressor(fq_if_t *fqs, fq_t *fq, fq_if_classq_t *fq_cl,
189     pktsched_pkt_t *pkt)
190 {
191 	classq_pkt_type_t ptype = fq->fq_ptype;
192 	uint32_t comp_gencnt = 0;
193 	uint64_t *pkt_timestamp;
194 	uint64_t old_timestamp = 0;
195 	uint32_t old_pktlen = 0;
196 	struct ifclassq *ifq = fqs->fqs_ifq;
197 
198 	if (__improbable(!tcp_do_ack_compression)) {
199 		return 0;
200 	}
201 
202 	pktsched_get_pkt_vars(pkt, NULL, &pkt_timestamp, NULL, NULL, NULL,
203 	    &comp_gencnt);
204 
205 	if (comp_gencnt == 0) {
206 		return 0;
207 	}
208 
209 	fq_cl->fcl_stat.fcl_pkts_compressible++;
210 
211 	if (fq_empty(fq)) {
212 		return 0;
213 	}
214 
215 	if (ptype == QP_MBUF) {
216 		struct mbuf *m = MBUFQ_LAST(&fq->fq_mbufq);
217 
218 		if (comp_gencnt != m->m_pkthdr.comp_gencnt) {
219 			return 0;
220 		}
221 
222 		/* If we got until here, we should merge/replace the segment */
223 		MBUFQ_REMOVE(&fq->fq_mbufq, m);
224 		old_pktlen = m_pktlen(m);
225 		old_timestamp = m->m_pkthdr.pkt_timestamp;
226 
227 		IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
228 		m_freem(m);
229 	}
230 #if SKYWALK
231 	else {
232 		struct __kern_packet *kpkt = KPKTQ_LAST(&fq->fq_kpktq);
233 
234 		if (comp_gencnt != kpkt->pkt_comp_gencnt) {
235 			return 0;
236 		}
237 
238 		/* If we got until here, we should merge/replace the segment */
239 		KPKTQ_REMOVE(&fq->fq_kpktq, kpkt);
240 		old_pktlen = kpkt->pkt_length;
241 		old_timestamp = kpkt->pkt_timestamp;
242 
243 		IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
244 		pp_free_packet(*(struct kern_pbufpool **)(uintptr_t)&
245 		    (((struct __kern_quantum *)kpkt)->qum_pp),
246 		    (uint64_t)kpkt);
247 	}
248 #endif /* SKYWALK */
249 
250 	fq->fq_bytes -= old_pktlen;
251 	fq_cl->fcl_stat.fcl_byte_cnt -= old_pktlen;
252 	fq_cl->fcl_stat.fcl_pkt_cnt--;
253 	IFCQ_DEC_LEN(ifq);
254 	IFCQ_DEC_BYTES(ifq, old_pktlen);
255 
256 	*pkt_timestamp = old_timestamp;
257 
258 	return CLASSQEQ_COMPRESSED;
259 }
260 
261 int
fq_addq(fq_if_t * fqs,pktsched_pkt_t * pkt,fq_if_classq_t * fq_cl)262 fq_addq(fq_if_t *fqs, pktsched_pkt_t *pkt, fq_if_classq_t *fq_cl)
263 {
264 	int droptype = DTYPE_NODROP, fc_adv = 0, ret = CLASSQEQ_SUCCESS;
265 	u_int64_t now;
266 	fq_t *fq = NULL;
267 	uint64_t *pkt_timestamp;
268 	volatile uint32_t *pkt_flags;
269 	uint32_t pkt_flowid, cnt;
270 	uint8_t pkt_proto, pkt_flowsrc;
271 
272 	cnt = pkt->pktsched_pcnt;
273 	pktsched_get_pkt_vars(pkt, &pkt_flags, &pkt_timestamp, &pkt_flowid,
274 	    &pkt_flowsrc, &pkt_proto, NULL);
275 
276 	/*
277 	 * XXX Not walking the chain to set this flag on every packet.
278 	 * This flag is only used for debugging. Nothing is affected if it's
279 	 * not set.
280 	 */
281 	switch (pkt->pktsched_ptype) {
282 	case QP_MBUF:
283 		/* See comments in <rdar://problem/14040693> */
284 		VERIFY(!(*pkt_flags & PKTF_PRIV_GUARDED));
285 		*pkt_flags |= PKTF_PRIV_GUARDED;
286 		break;
287 #if SKYWALK
288 	case QP_PACKET:
289 		/* sanity check */
290 		ASSERT((*pkt_flags & ~PKT_F_COMMON_MASK) == 0);
291 		break;
292 #endif /* SKYWALK */
293 	default:
294 		VERIFY(0);
295 		/* NOTREACHED */
296 		__builtin_unreachable();
297 	}
298 
299 	/*
300 	 * Timestamps for every packet must be set prior to entering this path.
301 	 */
302 	now = *pkt_timestamp;
303 	ASSERT(now > 0);
304 
305 	/* find the flowq for this packet */
306 	fq = fq_if_hash_pkt(fqs, pkt_flowid, pktsched_get_pkt_svc(pkt),
307 	    now, TRUE, pkt->pktsched_ptype);
308 	if (__improbable(fq == NULL)) {
309 		DTRACE_IP1(memfail__drop, fq_if_t *, fqs);
310 		/* drop the packet if we could not allocate a flow queue */
311 		fq_cl->fcl_stat.fcl_drop_memfailure += cnt;
312 		return CLASSQEQ_DROP;
313 	}
314 	VERIFY(fq->fq_ptype == pkt->pktsched_ptype);
315 
316 	fq_detect_dequeue_stall(fqs, fq, fq_cl, &now);
317 
318 	if (__improbable(FQ_IS_DELAYHIGH(fq) || FQ_IS_OVERWHELMING(fq))) {
319 		if ((fq->fq_flags & FQF_FLOWCTL_CAPABLE) &&
320 		    (*pkt_flags & PKTF_FLOW_ADV)) {
321 			fc_adv = 1;
322 			/*
323 			 * If the flow is suspended or it is not
324 			 * TCP/QUIC, drop the chain.
325 			 */
326 			if ((pkt_proto != IPPROTO_TCP) &&
327 			    (pkt_proto != IPPROTO_QUIC)) {
328 				droptype = DTYPE_EARLY;
329 				fq_cl->fcl_stat.fcl_drop_early += cnt;
330 				IFCQ_DROP_ADD(fqs->fqs_ifq, cnt, pktsched_get_pkt_len(pkt));
331 			}
332 			DTRACE_IP6(flow__adv, fq_if_t *, fqs,
333 			    fq_if_classq_t *, fq_cl, fq_t *, fq,
334 			    int, droptype, pktsched_pkt_t *, pkt,
335 			    uint32_t, cnt);
336 		} else {
337 			/*
338 			 * Need to drop packets to make room for the new
339 			 * ones. Try to drop from the head of the queue
340 			 * instead of the latest packets.
341 			 */
342 			if (!fq_empty(fq)) {
343 				uint32_t i;
344 
345 				for (i = 0; i < cnt; i++) {
346 					fq_head_drop(fqs, fq);
347 				}
348 				droptype = DTYPE_NODROP;
349 			} else {
350 				droptype = DTYPE_EARLY;
351 			}
352 			fq_cl->fcl_stat.fcl_drop_early += cnt;
353 
354 			DTRACE_IP6(no__flow__adv, fq_if_t *, fqs,
355 			    fq_if_classq_t *, fq_cl, fq_t *, fq,
356 			    int, droptype, pktsched_pkt_t *, pkt,
357 			    uint32_t, cnt);
358 		}
359 	}
360 
361 	/* Set the return code correctly */
362 	if (__improbable(fc_adv == 1 && droptype != DTYPE_FORCED)) {
363 		if (fq_if_add_fcentry(fqs, pkt, pkt_flowsrc, fq, fq_cl)) {
364 			fq->fq_flags |= FQF_FLOWCTL_ON;
365 			/* deliver flow control advisory error */
366 			if (droptype == DTYPE_NODROP) {
367 				ret = CLASSQEQ_SUCCESS_FC;
368 			} else {
369 				/* dropped due to flow control */
370 				ret = CLASSQEQ_DROP_FC;
371 			}
372 		} else {
373 			/*
374 			 * if we could not flow control the flow, it is
375 			 * better to drop
376 			 */
377 			droptype = DTYPE_FORCED;
378 			ret = CLASSQEQ_DROP_FC;
379 			fq_cl->fcl_stat.fcl_flow_control_fail++;
380 		}
381 		DTRACE_IP3(fc__ret, fq_if_t *, fqs, int, droptype, int, ret);
382 	}
383 
384 	/*
385 	 * If the queue length hits the queue limit, drop a chain with the
386 	 * same number of packets from the front of the queue for a flow with
387 	 * maximum number of bytes. This will penalize heavy and unresponsive
388 	 * flows. It will also avoid a tail drop.
389 	 */
390 	if (__improbable(droptype == DTYPE_NODROP &&
391 	    fq_if_at_drop_limit(fqs))) {
392 		uint32_t i;
393 
394 		if (fqs->fqs_large_flow == fq) {
395 			/*
396 			 * Drop from the head of the current fq. Since a
397 			 * new packet will be added to the tail, it is ok
398 			 * to leave fq in place.
399 			 */
400 			DTRACE_IP5(large__flow, fq_if_t *, fqs,
401 			    fq_if_classq_t *, fq_cl, fq_t *, fq,
402 			    pktsched_pkt_t *, pkt, uint32_t, cnt);
403 
404 			for (i = 0; i < cnt; i++) {
405 				fq_head_drop(fqs, fq);
406 			}
407 			fq_cl->fcl_stat.fcl_drop_overflow += cnt;
408 
409 			/*
410 			 * TCP and QUIC will react to the loss of those head dropped pkts
411 			 * and adjust send rate.
412 			 */
413 			if ((fq->fq_flags & FQF_FLOWCTL_CAPABLE) &&
414 			    (*pkt_flags & PKTF_FLOW_ADV) &&
415 			    (pkt_proto != IPPROTO_TCP) &&
416 			    (pkt_proto != IPPROTO_QUIC)) {
417 				if (fq_if_add_fcentry(fqs, pkt, pkt_flowsrc, fq, fq_cl)) {
418 					fq->fq_flags |= FQF_FLOWCTL_ON;
419 					FQ_SET_OVERWHELMING(fq);
420 					fq_cl->fcl_stat.fcl_overwhelming++;
421 					/* deliver flow control advisory error */
422 					ret = CLASSQEQ_SUCCESS_FC;
423 				}
424 			}
425 		} else {
426 			if (fqs->fqs_large_flow == NULL) {
427 				droptype = DTYPE_FORCED;
428 				fq_cl->fcl_stat.fcl_drop_overflow += cnt;
429 				ret = CLASSQEQ_DROP;
430 
431 				DTRACE_IP5(no__large__flow, fq_if_t *, fqs,
432 				    fq_if_classq_t *, fq_cl, fq_t *, fq,
433 				    pktsched_pkt_t *, pkt, uint32_t, cnt);
434 
435 				/*
436 				 * if this fq was freshly created and there
437 				 * is nothing to enqueue, free it
438 				 */
439 				if (fq_empty(fq) && !(fq->fq_flags &
440 				    (FQF_NEW_FLOW | FQF_OLD_FLOW))) {
441 					fq_if_destroy_flow(fqs, fq_cl, fq, true);
442 					fq = NULL;
443 				}
444 			} else {
445 				DTRACE_IP5(different__large__flow,
446 				    fq_if_t *, fqs, fq_if_classq_t *, fq_cl,
447 				    fq_t *, fq, pktsched_pkt_t *, pkt,
448 				    uint32_t, cnt);
449 
450 				for (i = 0; i < cnt; i++) {
451 					fq_if_drop_packet(fqs);
452 				}
453 			}
454 		}
455 	}
456 
457 	if (__probable(droptype == DTYPE_NODROP)) {
458 		uint32_t chain_len = pktsched_get_pkt_len(pkt);
459 
460 		/*
461 		 * We do not compress if we are enqueuing a chain.
462 		 * Traversing the chain to look for acks would defeat the
463 		 * purpose of batch enqueueing.
464 		 */
465 		if (cnt == 1) {
466 			ret = fq_compressor(fqs, fq, fq_cl, pkt);
467 			if (ret != CLASSQEQ_COMPRESSED) {
468 				ret = CLASSQEQ_SUCCESS;
469 			} else {
470 				fq_cl->fcl_stat.fcl_pkts_compressed++;
471 			}
472 		}
473 		DTRACE_IP5(fq_enqueue, fq_if_t *, fqs, fq_if_classq_t *, fq_cl,
474 		    fq_t *, fq, pktsched_pkt_t *, pkt, uint32_t, cnt);
475 		fq_enqueue(fq, pkt->pktsched_pkt, pkt->pktsched_tail, cnt);
476 
477 		fq->fq_bytes += chain_len;
478 		fq_cl->fcl_stat.fcl_byte_cnt += chain_len;
479 		fq_cl->fcl_stat.fcl_pkt_cnt += cnt;
480 
481 		/*
482 		 * check if this queue will qualify to be the next
483 		 * victim queue
484 		 */
485 		fq_if_is_flow_heavy(fqs, fq);
486 	} else {
487 		DTRACE_IP3(fq_drop, fq_if_t *, fqs, int, droptype, int, ret);
488 		return (ret != CLASSQEQ_SUCCESS) ? ret : CLASSQEQ_DROP;
489 	}
490 
491 	/*
492 	 * If the queue is not currently active, add it to the end of new
493 	 * flows list for that service class.
494 	 */
495 	if ((fq->fq_flags & (FQF_NEW_FLOW | FQF_OLD_FLOW)) == 0) {
496 		VERIFY(STAILQ_NEXT(fq, fq_actlink) == NULL);
497 		STAILQ_INSERT_TAIL(&fq_cl->fcl_new_flows, fq, fq_actlink);
498 		fq->fq_flags |= FQF_NEW_FLOW;
499 
500 		fq_cl->fcl_stat.fcl_newflows_cnt++;
501 
502 		fq->fq_deficit = fq_cl->fcl_quantum;
503 	}
504 	return ret;
505 }
506 
507 void
fq_getq_flow_internal(fq_if_t * fqs,fq_t * fq,pktsched_pkt_t * pkt)508 fq_getq_flow_internal(fq_if_t *fqs, fq_t *fq, pktsched_pkt_t *pkt)
509 {
510 	classq_pkt_t p = CLASSQ_PKT_INITIALIZER(p);
511 	uint32_t plen;
512 	fq_if_classq_t *fq_cl;
513 	struct ifclassq *ifq = fqs->fqs_ifq;
514 
515 	fq_dequeue(fq, &p);
516 	if (p.cp_ptype == QP_INVALID) {
517 		VERIFY(p.cp_mbuf == NULL);
518 		return;
519 	}
520 
521 	pktsched_pkt_encap(pkt, &p);
522 	plen = pktsched_get_pkt_len(pkt);
523 
524 	VERIFY(fq->fq_bytes >= plen);
525 	fq->fq_bytes -= plen;
526 
527 	fq_cl = &fqs->fqs_classq[fq->fq_sc_index];
528 	fq_cl->fcl_stat.fcl_byte_cnt -= plen;
529 	fq_cl->fcl_stat.fcl_pkt_cnt--;
530 	IFCQ_DEC_LEN(ifq);
531 	IFCQ_DEC_BYTES(ifq, plen);
532 
533 	/* Reset getqtime so that we don't count idle times */
534 	if (fq_empty(fq)) {
535 		fq->fq_getqtime = 0;
536 	}
537 }
538 
539 void
fq_getq_flow(fq_if_t * fqs,fq_t * fq,pktsched_pkt_t * pkt)540 fq_getq_flow(fq_if_t *fqs, fq_t *fq, pktsched_pkt_t *pkt)
541 {
542 	fq_if_classq_t *fq_cl;
543 	u_int64_t now;
544 	int64_t qdelay = 0;
545 	struct timespec now_ts;
546 	volatile uint32_t *pkt_flags;
547 	uint64_t *pkt_timestamp;
548 
549 	fq_getq_flow_internal(fqs, fq, pkt);
550 	if (pkt->pktsched_ptype == QP_INVALID) {
551 		VERIFY(pkt->pktsched_pkt_mbuf == NULL);
552 		return;
553 	}
554 
555 	pktsched_get_pkt_vars(pkt, &pkt_flags, &pkt_timestamp, NULL, NULL,
556 	    NULL, NULL);
557 
558 	nanouptime(&now_ts);
559 	now = (now_ts.tv_sec * NSEC_PER_SEC) + now_ts.tv_nsec;
560 
561 	/* this will compute qdelay in nanoseconds */
562 	if (now > *pkt_timestamp) {
563 		qdelay = now - *pkt_timestamp;
564 	}
565 	fq_cl = &fqs->fqs_classq[fq->fq_sc_index];
566 
567 	if (fq->fq_min_qdelay == 0 ||
568 	    (qdelay > 0 && (u_int64_t)qdelay < fq->fq_min_qdelay)) {
569 		fq->fq_min_qdelay = qdelay;
570 	}
571 
572 	/* Update min/max/avg qdelay for the respective class */
573 	if (fq_cl->fcl_stat.fcl_min_qdelay == 0 ||
574 	    (qdelay > 0 && (u_int64_t)qdelay < fq_cl->fcl_stat.fcl_min_qdelay)) {
575 		fq_cl->fcl_stat.fcl_min_qdelay = qdelay;
576 	}
577 
578 	if (fq_cl->fcl_stat.fcl_max_qdelay == 0 ||
579 	    (qdelay > 0 && (u_int64_t)qdelay > fq_cl->fcl_stat.fcl_max_qdelay)) {
580 		fq_cl->fcl_stat.fcl_max_qdelay = qdelay;
581 	}
582 
583 	uint64_t num_dequeues = fq_cl->fcl_stat.fcl_dequeue;
584 
585 	if (num_dequeues == 0) {
586 		fq_cl->fcl_stat.fcl_avg_qdelay = qdelay;
587 	} else if (qdelay > 0) {
588 		uint64_t res = 0;
589 		if (os_add_overflow(num_dequeues, 1, &res)) {
590 			/* Reset the dequeue num and dequeue bytes */
591 			fq_cl->fcl_stat.fcl_dequeue = num_dequeues = 0;
592 			fq_cl->fcl_stat.fcl_dequeue_bytes = 0;
593 			fq_cl->fcl_stat.fcl_avg_qdelay = qdelay;
594 			os_log_info(OS_LOG_DEFAULT, "%s: dequeue num overflow, "
595 			    "flow: 0x%x, iface: %s", __func__, fq->fq_flowhash,
596 			    if_name(fqs->fqs_ifq->ifcq_ifp));
597 		} else {
598 			uint64_t product = 0;
599 			if (os_mul_overflow(fq_cl->fcl_stat.fcl_avg_qdelay,
600 			    num_dequeues, &product) || os_add_overflow(product, qdelay, &res)) {
601 				fq_cl->fcl_stat.fcl_avg_qdelay = qdelay;
602 			} else {
603 				fq_cl->fcl_stat.fcl_avg_qdelay = res /
604 				    (num_dequeues + 1);
605 			}
606 		}
607 	}
608 
609 	if (now >= fq->fq_updatetime) {
610 		if (fq->fq_min_qdelay > fqs->fqs_target_qdelay) {
611 			if (!FQ_IS_DELAYHIGH(fq)) {
612 				FQ_SET_DELAY_HIGH(fq);
613 				os_log_error(OS_LOG_DEFAULT,
614 				    "%s: high delay idx: %d, %llu, flow: 0x%x, "
615 				    "iface: %s", __func__, fq->fq_sc_index,
616 				    fq->fq_min_qdelay, fq->fq_flowhash,
617 				    if_name(fqs->fqs_ifq->ifcq_ifp));
618 			}
619 		} else {
620 			FQ_CLEAR_DELAY_HIGH(fq);
621 		}
622 		/* Reset measured queue delay and update time */
623 		fq->fq_updatetime = now + fqs->fqs_update_interval;
624 		fq->fq_min_qdelay = 0;
625 	}
626 
627 	if (fqs->fqs_large_flow != fq || !fq_if_almost_at_drop_limit(fqs)) {
628 		FQ_CLEAR_OVERWHELMING(fq);
629 	}
630 	if (!FQ_IS_DELAYHIGH(fq) || fq_empty(fq)) {
631 		FQ_CLEAR_DELAY_HIGH(fq);
632 	}
633 
634 	if ((fq->fq_flags & FQF_FLOWCTL_ON) &&
635 	    !FQ_IS_DELAYHIGH(fq) && !FQ_IS_OVERWHELMING(fq)) {
636 		fq_if_flow_feedback(fqs, fq, fq_cl);
637 	}
638 
639 	if (fq_empty(fq)) {
640 		/* Reset getqtime so that we don't count idle times */
641 		fq->fq_getqtime = 0;
642 	} else {
643 		fq->fq_getqtime = now;
644 	}
645 	fq_if_is_flow_heavy(fqs, fq);
646 
647 	*pkt_timestamp = 0;
648 	switch (pkt->pktsched_ptype) {
649 	case QP_MBUF:
650 		*pkt_flags &= ~PKTF_PRIV_GUARDED;
651 		break;
652 #if SKYWALK
653 	case QP_PACKET:
654 		/* sanity check */
655 		ASSERT((*pkt_flags & ~PKT_F_COMMON_MASK) == 0);
656 		break;
657 #endif /* SKYWALK */
658 	default:
659 		VERIFY(0);
660 		/* NOTREACHED */
661 		__builtin_unreachable();
662 	}
663 }
664