xref: /xnu-8020.140.41/bsd/net/pktsched/pktsched_fq_codel.c (revision 27b03b360a988dfd3dfdf34262bb0042026747cc)
1 /*
2  * Copyright (c) 2016-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <sys/types.h>
30 #include <sys/param.h>
31 #include <kern/zalloc.h>
32 #include <net/ethernet.h>
33 #include <net/if_var.h>
34 #include <net/if.h>
35 #include <net/classq/classq.h>
36 #include <net/classq/classq_fq_codel.h>
37 #include <net/pktsched/pktsched_fq_codel.h>
38 #include <os/log.h>
39 
40 #define FQ_CODEL_DEFAULT_QUANTUM 1500
41 
42 #define FQ_CODEL_QUANTUM_BK_SYS(_q)    (_q)
43 #define FQ_CODEL_QUANTUM_BK(_q)        (_q)
44 #define FQ_CODEL_QUANTUM_BE(_q)        (_q)
45 #define FQ_CODEL_QUANTUM_RD(_q)        (_q)
46 #define FQ_CODEL_QUANTUM_OAM(_q)       (_q)
47 #define FQ_CODEL_QUANTUM_AV(_q)        (_q * 2)
48 #define FQ_CODEL_QUANTUM_RV(_q)        (_q * 2)
49 #define FQ_CODEL_QUANTUM_VI(_q)        (_q * 2)
50 #define FQ_CODEL_QUANTUM_VO(_q)        ((_q * 2) / 5)
51 #define FQ_CODEL_QUANTUM_CTL(_q)       ((_q * 2) / 5)
52 
53 #define FQ_CODEL_DRR_MAX_BK_SYS    2
54 #define FQ_CODEL_DRR_MAX_BK        2
55 #define FQ_CODEL_DRR_MAX_BE        4
56 #define FQ_CODEL_DRR_MAX_RD        4
57 #define FQ_CODEL_DRR_MAX_OAM       4
58 #define FQ_CODEL_DRR_MAX_AV        6
59 #define FQ_CODEL_DRR_MAX_RV        6
60 #define FQ_CODEL_DRR_MAX_VI        6
61 #define FQ_CODEL_DRR_MAX_VO        8
62 #define FQ_CODEL_DRR_MAX_CTL       8
63 
64 static ZONE_DEFINE_TYPE(fq_if_zone, "pktsched_fq_if", fq_if_t, ZC_ZFREE_CLEARMEM);
65 
66 typedef STAILQ_HEAD(, flowq) flowq_dqlist_t;
67 
68 static fq_if_t *fq_if_alloc(struct ifnet *, struct ifclassq *, classq_pkt_type_t);
69 static void fq_if_destroy(fq_if_t *fqs);
70 static void fq_if_classq_init(fq_if_t *fqs, uint32_t priority,
71     uint32_t quantum, uint32_t drr_max, uint32_t svc_class);
72 static void fq_if_dequeue(fq_if_t *, fq_if_classq_t *, uint32_t,
73     int64_t, classq_pkt_t *, classq_pkt_t *, uint32_t *,
74     uint32_t *, flowq_dqlist_t *, boolean_t drvmgmt);
75 void fq_if_stat_sc(fq_if_t *fqs, cqrq_stat_sc_t *stat);
76 static void fq_if_purge(fq_if_t *);
77 static void fq_if_purge_classq(fq_if_t *, fq_if_classq_t *);
78 static void fq_if_purge_flow(fq_if_t *, fq_t *, u_int32_t *, u_int32_t *);
79 static void fq_if_empty_new_flow(fq_t *fq, fq_if_classq_t *fq_cl,
80     bool add_to_old);
81 static void fq_if_empty_old_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl,
82     fq_t *fq, bool remove_hash, bool destroy);
83 
84 #define FQ_IF_FLOW_HASH_ID(_flowid_) \
85 	(((_flowid_) >> FQ_IF_HASH_TAG_SHIFT) & FQ_IF_HASH_TAG_MASK)
86 
87 #define FQ_IF_CLASSQ_IDLE(_fcl_) \
88 	(STAILQ_EMPTY(&(_fcl_)->fcl_new_flows) && \
89 	STAILQ_EMPTY(&(_fcl_)->fcl_old_flows))
90 
91 typedef void (* fq_if_append_pkt_t)(classq_pkt_t *, classq_pkt_t *);
92 typedef boolean_t (* fq_getq_flow_t)(fq_if_t *, fq_if_classq_t *, fq_t *,
93     int64_t, u_int32_t, classq_pkt_t *, classq_pkt_t *, u_int32_t *,
94     u_int32_t *, boolean_t *, u_int32_t);
95 
96 static void
fq_if_append_mbuf(classq_pkt_t * pkt,classq_pkt_t * next_pkt)97 fq_if_append_mbuf(classq_pkt_t *pkt, classq_pkt_t *next_pkt)
98 {
99 	pkt->cp_mbuf->m_nextpkt = next_pkt->cp_mbuf;
100 }
101 
102 #if SKYWALK
103 static void
fq_if_append_pkt(classq_pkt_t * pkt,classq_pkt_t * next_pkt)104 fq_if_append_pkt(classq_pkt_t *pkt, classq_pkt_t *next_pkt)
105 {
106 	pkt->cp_kpkt->pkt_nextpkt = next_pkt->cp_kpkt;
107 }
108 #endif /* SKYWALK */
109 
110 #if SKYWALK
111 static boolean_t
fq_getq_flow_kpkt(fq_if_t * fqs,fq_if_classq_t * fq_cl,fq_t * fq,int64_t byte_limit,u_int32_t pkt_limit,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t * byte_cnt,u_int32_t * pkt_cnt,boolean_t * qempty,u_int32_t pflags)112 fq_getq_flow_kpkt(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq,
113     int64_t byte_limit, u_int32_t pkt_limit, classq_pkt_t *head,
114     classq_pkt_t *tail, u_int32_t *byte_cnt, u_int32_t *pkt_cnt,
115     boolean_t *qempty, u_int32_t pflags)
116 {
117 	u_int32_t plen;
118 	pktsched_pkt_t pkt;
119 	boolean_t limit_reached = FALSE;
120 	struct ifclassq *ifq = fqs->fqs_ifq;
121 	struct ifnet *ifp = ifq->ifcq_ifp;
122 
123 	/*
124 	 * Assert to make sure pflags is part of PKT_F_COMMON_MASK;
125 	 * all common flags need to be declared in that mask.
126 	 */
127 	ASSERT((pflags & ~PKT_F_COMMON_MASK) == 0);
128 
129 	while (fq->fq_deficit > 0 && limit_reached == FALSE &&
130 	    !KPKTQ_EMPTY(&fq->fq_kpktq)) {
131 		_PKTSCHED_PKT_INIT(&pkt);
132 		fq_getq_flow(fqs, fq, &pkt);
133 		ASSERT(pkt.pktsched_ptype == QP_PACKET);
134 
135 		plen = pktsched_get_pkt_len(&pkt);
136 		fq->fq_deficit -= plen;
137 		pkt.pktsched_pkt_kpkt->pkt_pflags |= pflags;
138 
139 		if (head->cp_kpkt == NULL) {
140 			*head = pkt.pktsched_pkt;
141 		} else {
142 			ASSERT(tail->cp_kpkt != NULL);
143 			ASSERT(tail->cp_kpkt->pkt_nextpkt == NULL);
144 			tail->cp_kpkt->pkt_nextpkt = pkt.pktsched_pkt_kpkt;
145 		}
146 		*tail = pkt.pktsched_pkt;
147 		tail->cp_kpkt->pkt_nextpkt = NULL;
148 		fq_cl->fcl_stat.fcl_dequeue++;
149 		fq_cl->fcl_stat.fcl_dequeue_bytes += plen;
150 		*pkt_cnt += 1;
151 		*byte_cnt += plen;
152 
153 		ifclassq_set_packet_metadata(ifq, ifp, &pkt.pktsched_pkt);
154 
155 		/* Check if the limit is reached */
156 		if (*pkt_cnt >= pkt_limit || *byte_cnt >= byte_limit) {
157 			limit_reached = TRUE;
158 		}
159 	}
160 
161 	*qempty = KPKTQ_EMPTY(&fq->fq_kpktq);
162 	return limit_reached;
163 }
164 #endif /* SKYWALK */
165 
166 static boolean_t
fq_getq_flow_mbuf(fq_if_t * fqs,fq_if_classq_t * fq_cl,fq_t * fq,int64_t byte_limit,u_int32_t pkt_limit,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t * byte_cnt,u_int32_t * pkt_cnt,boolean_t * qempty,u_int32_t pflags)167 fq_getq_flow_mbuf(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq,
168     int64_t byte_limit, u_int32_t pkt_limit, classq_pkt_t *head,
169     classq_pkt_t *tail, u_int32_t *byte_cnt, u_int32_t *pkt_cnt,
170     boolean_t *qempty, u_int32_t pflags)
171 {
172 	u_int32_t plen;
173 	pktsched_pkt_t pkt;
174 	boolean_t limit_reached = FALSE;
175 	struct ifclassq *ifq = fqs->fqs_ifq;
176 	struct ifnet *ifp = ifq->ifcq_ifp;
177 
178 	while (fq->fq_deficit > 0 && limit_reached == FALSE &&
179 	    !MBUFQ_EMPTY(&fq->fq_mbufq)) {
180 		_PKTSCHED_PKT_INIT(&pkt);
181 		fq_getq_flow(fqs, fq, &pkt);
182 		ASSERT(pkt.pktsched_ptype == QP_MBUF);
183 
184 		plen = pktsched_get_pkt_len(&pkt);
185 		fq->fq_deficit -= plen;
186 		pkt.pktsched_pkt_mbuf->m_pkthdr.pkt_flags |= pflags;
187 
188 		if (head->cp_mbuf == NULL) {
189 			*head = pkt.pktsched_pkt;
190 		} else {
191 			ASSERT(tail->cp_mbuf != NULL);
192 			ASSERT(tail->cp_mbuf->m_nextpkt == NULL);
193 			tail->cp_mbuf->m_nextpkt = pkt.pktsched_pkt_mbuf;
194 		}
195 		*tail = pkt.pktsched_pkt;
196 		tail->cp_mbuf->m_nextpkt = NULL;
197 		fq_cl->fcl_stat.fcl_dequeue++;
198 		fq_cl->fcl_stat.fcl_dequeue_bytes += plen;
199 		*pkt_cnt += 1;
200 		*byte_cnt += plen;
201 
202 		ifclassq_set_packet_metadata(ifq, ifp, &pkt.pktsched_pkt);
203 
204 		/* Check if the limit is reached */
205 		if (*pkt_cnt >= pkt_limit || *byte_cnt >= byte_limit) {
206 			limit_reached = TRUE;
207 		}
208 	}
209 
210 	*qempty = MBUFQ_EMPTY(&fq->fq_mbufq);
211 	return limit_reached;
212 }
213 
214 fq_if_t *
fq_if_alloc(struct ifnet * ifp,struct ifclassq * ifq,classq_pkt_type_t ptype)215 fq_if_alloc(struct ifnet *ifp, struct ifclassq *ifq, classq_pkt_type_t ptype)
216 {
217 	fq_if_t *fqs;
218 
219 	fqs = zalloc_flags(fq_if_zone, Z_WAITOK | Z_ZERO);
220 	fqs->fqs_ifq = ifq;
221 	fqs->fqs_ptype = ptype;
222 
223 	/* Calculate target queue delay */
224 	ifclassq_calc_target_qdelay(ifp, &fqs->fqs_target_qdelay);
225 
226 	/* Calculate update interval */
227 	ifclassq_calc_update_interval(&fqs->fqs_update_interval);
228 
229 	/* Configure packet drop limit across all queues */
230 	fqs->fqs_pkt_droplimit = IFCQ_PKT_DROP_LIMIT(ifq);
231 	STAILQ_INIT(&fqs->fqs_fclist);
232 	return fqs;
233 }
234 
235 void
fq_if_destroy(fq_if_t * fqs)236 fq_if_destroy(fq_if_t *fqs)
237 {
238 	fq_if_purge(fqs);
239 	fqs->fqs_ifq = NULL;
240 	zfree(fq_if_zone, fqs);
241 }
242 
243 static inline uint8_t
fq_if_service_to_priority(fq_if_t * fqs,mbuf_svc_class_t svc)244 fq_if_service_to_priority(fq_if_t *fqs, mbuf_svc_class_t svc)
245 {
246 	uint8_t pri;
247 
248 	if (fqs->fqs_flags & FQS_DRIVER_MANAGED) {
249 		switch (svc) {
250 		case MBUF_SC_BK_SYS:
251 		case MBUF_SC_BK:
252 			pri = FQ_IF_BK_INDEX;
253 			break;
254 		case MBUF_SC_BE:
255 		case MBUF_SC_RD:
256 		case MBUF_SC_OAM:
257 			pri = FQ_IF_BE_INDEX;
258 			break;
259 		case MBUF_SC_AV:
260 		case MBUF_SC_RV:
261 		case MBUF_SC_VI:
262 		case MBUF_SC_SIG:
263 			pri = FQ_IF_VI_INDEX;
264 			break;
265 		case MBUF_SC_VO:
266 		case MBUF_SC_CTL:
267 			pri = FQ_IF_VO_INDEX;
268 			break;
269 		default:
270 			pri = FQ_IF_BE_INDEX; /* Use best effort by default */
271 			break;
272 		}
273 		return pri;
274 	}
275 
276 	/* scheduler is not managed by the driver */
277 	switch (svc) {
278 	case MBUF_SC_BK_SYS:
279 		pri = FQ_IF_BK_SYS_INDEX;
280 		break;
281 	case MBUF_SC_BK:
282 		pri = FQ_IF_BK_INDEX;
283 		break;
284 	case MBUF_SC_BE:
285 		pri = FQ_IF_BE_INDEX;
286 		break;
287 	case MBUF_SC_RD:
288 		pri = FQ_IF_RD_INDEX;
289 		break;
290 	case MBUF_SC_OAM:
291 		pri = FQ_IF_OAM_INDEX;
292 		break;
293 	case MBUF_SC_AV:
294 		pri = FQ_IF_AV_INDEX;
295 		break;
296 	case MBUF_SC_RV:
297 		pri = FQ_IF_RV_INDEX;
298 		break;
299 	case MBUF_SC_VI:
300 		pri = FQ_IF_VI_INDEX;
301 		break;
302 	case MBUF_SC_SIG:
303 		pri = FQ_IF_SIG_INDEX;
304 		break;
305 	case MBUF_SC_VO:
306 		pri = FQ_IF_VO_INDEX;
307 		break;
308 	case MBUF_SC_CTL:
309 		pri = FQ_IF_CTL_INDEX;
310 		break;
311 	default:
312 		pri = FQ_IF_BE_INDEX; /* Use best effort by default */
313 		break;
314 	}
315 	return pri;
316 }
317 
318 static void
fq_if_classq_init(fq_if_t * fqs,uint32_t pri,uint32_t quantum,uint32_t drr_max,uint32_t svc_class)319 fq_if_classq_init(fq_if_t *fqs, uint32_t pri, uint32_t quantum,
320     uint32_t drr_max, uint32_t svc_class)
321 {
322 	fq_if_classq_t *fq_cl;
323 	VERIFY(pri < FQ_IF_MAX_CLASSES);
324 	fq_cl = &fqs->fqs_classq[pri];
325 
326 	VERIFY(fq_cl->fcl_quantum == 0);
327 	VERIFY(quantum != 0);
328 	fq_cl->fcl_quantum = quantum;
329 	fq_cl->fcl_pri = pri;
330 	fq_cl->fcl_drr_max = drr_max;
331 	fq_cl->fcl_service_class = svc_class;
332 	STAILQ_INIT(&fq_cl->fcl_new_flows);
333 	STAILQ_INIT(&fq_cl->fcl_old_flows);
334 }
335 
336 int
fq_if_enqueue_classq(struct ifclassq * ifq,classq_pkt_t * head,classq_pkt_t * tail,uint32_t cnt,uint32_t bytes,boolean_t * pdrop)337 fq_if_enqueue_classq(struct ifclassq *ifq, classq_pkt_t *head,
338     classq_pkt_t *tail, uint32_t cnt, uint32_t bytes, boolean_t *pdrop)
339 {
340 	uint8_t pri;
341 	fq_if_t *fqs;
342 	fq_if_classq_t *fq_cl;
343 	int ret;
344 	mbuf_svc_class_t svc;
345 	pktsched_pkt_t pkt;
346 
347 	pktsched_pkt_encap_chain(&pkt, head, tail, cnt, bytes);
348 
349 	fqs = (fq_if_t *)ifq->ifcq_disc;
350 	svc = pktsched_get_pkt_svc(&pkt);
351 	pri = fq_if_service_to_priority(fqs, svc);
352 	VERIFY(pri < FQ_IF_MAX_CLASSES);
353 	fq_cl = &fqs->fqs_classq[pri];
354 
355 	if (__improbable(svc == MBUF_SC_BK_SYS && fqs->fqs_throttle == 1)) {
356 		/* BK_SYS is currently throttled */
357 		atomic_add_32(&fq_cl->fcl_stat.fcl_throttle_drops, 1);
358 		pktsched_free_pkt(&pkt);
359 		*pdrop = TRUE;
360 		ret = EQSUSPENDED;
361 		goto done;
362 	}
363 
364 	IFCQ_LOCK_SPIN(ifq);
365 	ret = fq_addq(fqs, &pkt, fq_cl);
366 	if (!(fqs->fqs_flags & FQS_DRIVER_MANAGED) &&
367 	    !FQ_IF_CLASSQ_IDLE(fq_cl)) {
368 		if (((fqs->fqs_bitmaps[FQ_IF_ER] | fqs->fqs_bitmaps[FQ_IF_EB]) &
369 		    (1 << pri)) == 0) {
370 			/*
371 			 * this group is not in ER or EB groups,
372 			 * mark it as IB
373 			 */
374 			pktsched_bit_set(pri, &fqs->fqs_bitmaps[FQ_IF_IB]);
375 		}
376 	}
377 
378 	if (__improbable(ret != 0)) {
379 		if (ret == CLASSQEQ_SUCCESS_FC) {
380 			/* packet enqueued, return advisory feedback */
381 			ret = EQFULL;
382 			*pdrop = FALSE;
383 		} else if (ret == CLASSQEQ_COMPRESSED) {
384 			ret = 0;
385 			*pdrop = FALSE;
386 		} else {
387 			IFCQ_UNLOCK(ifq);
388 			*pdrop = TRUE;
389 			pktsched_free_pkt(&pkt);
390 			switch (ret) {
391 			case CLASSQEQ_DROP:
392 				ret = ENOBUFS;
393 				goto done;
394 			case CLASSQEQ_DROP_FC:
395 				ret = EQFULL;
396 				goto done;
397 			case CLASSQEQ_DROP_SP:
398 				ret = EQSUSPENDED;
399 				goto done;
400 			default:
401 				VERIFY(0);
402 				/* NOTREACHED */
403 				__builtin_unreachable();
404 			}
405 			/* NOTREACHED */
406 			__builtin_unreachable();
407 		}
408 	} else {
409 		*pdrop = FALSE;
410 	}
411 	IFCQ_ADD_LEN(ifq, cnt);
412 	IFCQ_INC_BYTES(ifq, bytes);
413 	IFCQ_UNLOCK(ifq);
414 done:
415 #if DEBUG || DEVELOPMENT
416 	if (__improbable((ret == EQFULL) && (ifclassq_flow_control_adv == 0))) {
417 		ret = 0;
418 	}
419 #endif /* DEBUG || DEVELOPMENT */
420 	return ret;
421 }
422 
423 void
fq_if_dequeue_classq(struct ifclassq * ifq,classq_pkt_t * pkt)424 fq_if_dequeue_classq(struct ifclassq *ifq, classq_pkt_t *pkt)
425 {
426 	(void) fq_if_dequeue_classq_multi(ifq, 1,
427 	    CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, pkt, NULL, NULL, NULL);
428 }
429 
430 void
fq_if_dequeue_sc_classq(struct ifclassq * ifq,mbuf_svc_class_t svc,classq_pkt_t * pkt)431 fq_if_dequeue_sc_classq(struct ifclassq *ifq, mbuf_svc_class_t svc,
432     classq_pkt_t *pkt)
433 {
434 	fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
435 	uint32_t total_pktcnt = 0, total_bytecnt = 0;
436 	fq_if_classq_t *fq_cl;
437 	uint8_t pri;
438 
439 	pri = fq_if_service_to_priority(fqs, svc);
440 	fq_cl = &fqs->fqs_classq[pri];
441 
442 	fq_if_dequeue(fqs, fq_cl, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT,
443 	    pkt, NULL, &total_pktcnt, &total_bytecnt, NULL, TRUE);
444 
445 	IFCQ_XMIT_ADD(ifq, total_pktcnt, total_bytecnt);
446 }
447 
448 static inline void
fq_dqlist_add(flowq_dqlist_t * fq_dqlist_head,fq_t * fq)449 fq_dqlist_add(flowq_dqlist_t *fq_dqlist_head, fq_t *fq)
450 {
451 	ASSERT(fq->fq_dq_head.cp_mbuf == NULL);
452 	ASSERT(!fq->fq_in_dqlist);
453 	STAILQ_INSERT_TAIL(fq_dqlist_head, fq, fq_dqlink);
454 	fq->fq_in_dqlist = true;
455 }
456 
457 static inline void
fq_dqlist_remove(flowq_dqlist_t * fq_dqlist_head,fq_t * fq,classq_pkt_t * head,classq_pkt_t * tail)458 fq_dqlist_remove(flowq_dqlist_t *fq_dqlist_head, fq_t *fq, classq_pkt_t *head,
459     classq_pkt_t *tail)
460 {
461 	ASSERT(fq->fq_in_dqlist);
462 	if (fq->fq_dq_head.cp_mbuf == NULL) {
463 		goto done;
464 	}
465 
466 	if (head->cp_mbuf == NULL) {
467 		*head = fq->fq_dq_head;
468 	} else {
469 		ASSERT(tail->cp_mbuf != NULL);
470 
471 		switch (fq->fq_ptype) {
472 		case QP_MBUF:
473 			ASSERT(tail->cp_mbuf->m_nextpkt == NULL);
474 			tail->cp_mbuf->m_nextpkt = fq->fq_dq_head.cp_mbuf;
475 			ASSERT(fq->fq_dq_tail.cp_mbuf->m_nextpkt == NULL);
476 			break;
477 #if SKYWALK
478 		case QP_PACKET:
479 			ASSERT(tail->cp_kpkt->pkt_nextpkt == NULL);
480 			tail->cp_kpkt->pkt_nextpkt = fq->fq_dq_head.cp_kpkt;
481 			ASSERT(fq->fq_dq_tail.cp_kpkt->pkt_nextpkt == NULL);
482 			break;
483 #endif /* SKYWALK */
484 		default:
485 			VERIFY(0);
486 			/* NOTREACHED */
487 			__builtin_unreachable();
488 		}
489 	}
490 	*tail = fq->fq_dq_tail;
491 done:
492 	STAILQ_REMOVE(fq_dqlist_head, fq, flowq, fq_dqlink);
493 	CLASSQ_PKT_INIT(&fq->fq_dq_head);
494 	CLASSQ_PKT_INIT(&fq->fq_dq_tail);
495 	fq->fq_in_dqlist = false;
496 	if (fq->fq_flags & FQF_DESTROYED) {
497 		fq_destroy(fq);
498 	}
499 }
500 
501 static inline void
fq_dqlist_get_packet_list(flowq_dqlist_t * fq_dqlist_head,classq_pkt_t * head,classq_pkt_t * tail)502 fq_dqlist_get_packet_list(flowq_dqlist_t *fq_dqlist_head, classq_pkt_t *head,
503     classq_pkt_t *tail)
504 {
505 	fq_t *fq, *tfq;
506 
507 	STAILQ_FOREACH_SAFE(fq, fq_dqlist_head, fq_dqlink, tfq) {
508 		fq_dqlist_remove(fq_dqlist_head, fq, head, tail);
509 	}
510 }
511 
512 int
fq_if_dequeue_classq_multi(struct ifclassq * ifq,u_int32_t maxpktcnt,u_int32_t maxbytecnt,classq_pkt_t * first_packet,classq_pkt_t * last_packet,u_int32_t * retpktcnt,u_int32_t * retbytecnt)513 fq_if_dequeue_classq_multi(struct ifclassq *ifq, u_int32_t maxpktcnt,
514     u_int32_t maxbytecnt, classq_pkt_t *first_packet,
515     classq_pkt_t *last_packet, u_int32_t *retpktcnt,
516     u_int32_t *retbytecnt)
517 {
518 	uint32_t total_pktcnt = 0, total_bytecnt = 0;
519 	classq_pkt_t first = CLASSQ_PKT_INITIALIZER(fisrt);
520 	classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
521 	classq_pkt_t tmp = CLASSQ_PKT_INITIALIZER(tmp);
522 	fq_if_append_pkt_t append_pkt;
523 	flowq_dqlist_t fq_dqlist_head;
524 	fq_if_classq_t *fq_cl;
525 	fq_if_t *fqs;
526 	int pri;
527 
528 	IFCQ_LOCK_ASSERT_HELD(ifq);
529 
530 	fqs = (fq_if_t *)ifq->ifcq_disc;
531 	STAILQ_INIT(&fq_dqlist_head);
532 
533 	switch (fqs->fqs_ptype) {
534 	case QP_MBUF:
535 		append_pkt = fq_if_append_mbuf;
536 		break;
537 
538 #if SKYWALK
539 	case QP_PACKET:
540 		append_pkt = fq_if_append_pkt;
541 		break;
542 #endif /* SKYWALK */
543 
544 	default:
545 		VERIFY(0);
546 		/* NOTREACHED */
547 		__builtin_unreachable();
548 	}
549 
550 	for (;;) {
551 		uint32_t pktcnt = 0, bytecnt = 0;
552 		classq_pkt_t head = CLASSQ_PKT_INITIALIZER(head);
553 		classq_pkt_t tail = CLASSQ_PKT_INITIALIZER(tail);
554 
555 		if (fqs->fqs_bitmaps[FQ_IF_ER] == 0 &&
556 		    fqs->fqs_bitmaps[FQ_IF_EB] == 0) {
557 			fqs->fqs_bitmaps[FQ_IF_EB] = fqs->fqs_bitmaps[FQ_IF_IB];
558 			fqs->fqs_bitmaps[FQ_IF_IB] = 0;
559 			if (fqs->fqs_bitmaps[FQ_IF_EB] == 0) {
560 				break;
561 			}
562 		}
563 		pri = pktsched_ffs(fqs->fqs_bitmaps[FQ_IF_ER]);
564 		if (pri == 0) {
565 			/*
566 			 * There are no ER flows, move the highest
567 			 * priority one from EB if there are any in that
568 			 * category
569 			 */
570 			pri = pktsched_ffs(fqs->fqs_bitmaps[FQ_IF_EB]);
571 			VERIFY(pri > 0);
572 			pktsched_bit_clr((pri - 1),
573 			    &fqs->fqs_bitmaps[FQ_IF_EB]);
574 			pktsched_bit_set((pri - 1),
575 			    &fqs->fqs_bitmaps[FQ_IF_ER]);
576 		}
577 		pri--; /* index starts at 0 */
578 		fq_cl = &fqs->fqs_classq[pri];
579 
580 		if (fq_cl->fcl_budget <= 0) {
581 			/* Update the budget */
582 			fq_cl->fcl_budget += (min(fq_cl->fcl_drr_max,
583 			    fq_cl->fcl_stat.fcl_flows_cnt) *
584 			    fq_cl->fcl_quantum);
585 			if (fq_cl->fcl_budget <= 0) {
586 				goto state_change;
587 			}
588 		}
589 		fq_if_dequeue(fqs, fq_cl, (maxpktcnt - total_pktcnt),
590 		    (maxbytecnt - total_bytecnt), &head, &tail, &pktcnt,
591 		    &bytecnt, &fq_dqlist_head, FALSE);
592 		if (head.cp_mbuf != NULL) {
593 			ASSERT(STAILQ_EMPTY(&fq_dqlist_head));
594 			if (first.cp_mbuf == NULL) {
595 				first = head;
596 			} else {
597 				ASSERT(last.cp_mbuf != NULL);
598 				append_pkt(&last, &head);
599 			}
600 			last = tail;
601 			append_pkt(&last, &tmp);
602 		}
603 		fq_cl->fcl_budget -= bytecnt;
604 		total_pktcnt += pktcnt;
605 		total_bytecnt += bytecnt;
606 
607 		/*
608 		 * If the class has exceeded the budget but still has data
609 		 * to send, move it to IB
610 		 */
611 state_change:
612 		if (!FQ_IF_CLASSQ_IDLE(fq_cl)) {
613 			if (fq_cl->fcl_budget <= 0) {
614 				pktsched_bit_set(pri,
615 				    &fqs->fqs_bitmaps[FQ_IF_IB]);
616 				pktsched_bit_clr(pri,
617 				    &fqs->fqs_bitmaps[FQ_IF_ER]);
618 			}
619 		} else {
620 			pktsched_bit_clr(pri, &fqs->fqs_bitmaps[FQ_IF_ER]);
621 			VERIFY(((fqs->fqs_bitmaps[FQ_IF_ER] |
622 			    fqs->fqs_bitmaps[FQ_IF_EB] |
623 			    fqs->fqs_bitmaps[FQ_IF_IB]) & (1 << pri)) == 0);
624 			fq_cl->fcl_budget = 0;
625 		}
626 		if (total_pktcnt >= maxpktcnt || total_bytecnt >= maxbytecnt) {
627 			break;
628 		}
629 	}
630 
631 	fq_dqlist_get_packet_list(&fq_dqlist_head, &first, &last);
632 
633 	if (__probable(first_packet != NULL)) {
634 		*first_packet = first;
635 	}
636 	if (last_packet != NULL) {
637 		*last_packet = last;
638 	}
639 	if (retpktcnt != NULL) {
640 		*retpktcnt = total_pktcnt;
641 	}
642 	if (retbytecnt != NULL) {
643 		*retbytecnt = total_bytecnt;
644 	}
645 
646 	IFCQ_XMIT_ADD(ifq, total_pktcnt, total_bytecnt);
647 	return 0;
648 }
649 
650 int
fq_if_dequeue_sc_classq_multi(struct ifclassq * ifq,mbuf_svc_class_t svc,u_int32_t maxpktcnt,u_int32_t maxbytecnt,classq_pkt_t * first_packet,classq_pkt_t * last_packet,u_int32_t * retpktcnt,u_int32_t * retbytecnt)651 fq_if_dequeue_sc_classq_multi(struct ifclassq *ifq, mbuf_svc_class_t svc,
652     u_int32_t maxpktcnt, u_int32_t maxbytecnt, classq_pkt_t *first_packet,
653     classq_pkt_t *last_packet, u_int32_t *retpktcnt, u_int32_t *retbytecnt)
654 {
655 	fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
656 	uint8_t pri;
657 	u_int32_t total_pktcnt = 0, total_bytecnt = 0;
658 	fq_if_classq_t *fq_cl;
659 	classq_pkt_t first = CLASSQ_PKT_INITIALIZER(fisrt);
660 	classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
661 	fq_if_append_pkt_t append_pkt;
662 	flowq_dqlist_t fq_dqlist_head;
663 
664 	switch (fqs->fqs_ptype) {
665 	case QP_MBUF:
666 		append_pkt = fq_if_append_mbuf;
667 		break;
668 
669 #if SKYWALK
670 	case QP_PACKET:
671 		append_pkt = fq_if_append_pkt;
672 		break;
673 #endif /* SKYWALK */
674 
675 	default:
676 		VERIFY(0);
677 		/* NOTREACHED */
678 		__builtin_unreachable();
679 	}
680 
681 	STAILQ_INIT(&fq_dqlist_head);
682 	pri = fq_if_service_to_priority(fqs, svc);
683 	fq_cl = &fqs->fqs_classq[pri];
684 	/*
685 	 * Now we have the queue for a particular service class. We need
686 	 * to dequeue as many packets as needed, first from the new flows
687 	 * and then from the old flows.
688 	 */
689 	while (total_pktcnt < maxpktcnt && total_bytecnt < maxbytecnt &&
690 	    fq_cl->fcl_stat.fcl_pkt_cnt > 0) {
691 		classq_pkt_t head = CLASSQ_PKT_INITIALIZER(head);
692 		classq_pkt_t tail = CLASSQ_PKT_INITIALIZER(tail);
693 		u_int32_t pktcnt = 0, bytecnt = 0;
694 
695 		fq_if_dequeue(fqs, fq_cl, (maxpktcnt - total_pktcnt),
696 		    (maxbytecnt - total_bytecnt), &head, &tail, &pktcnt,
697 		    &bytecnt, &fq_dqlist_head, TRUE);
698 		if (head.cp_mbuf != NULL) {
699 			if (first.cp_mbuf == NULL) {
700 				first = head;
701 			} else {
702 				ASSERT(last.cp_mbuf != NULL);
703 				append_pkt(&last, &head);
704 			}
705 			last = tail;
706 		}
707 		total_pktcnt += pktcnt;
708 		total_bytecnt += bytecnt;
709 	}
710 
711 	fq_dqlist_get_packet_list(&fq_dqlist_head, &first, &last);
712 
713 	if (__probable(first_packet != NULL)) {
714 		*first_packet = first;
715 	}
716 	if (last_packet != NULL) {
717 		*last_packet = last;
718 	}
719 	if (retpktcnt != NULL) {
720 		*retpktcnt = total_pktcnt;
721 	}
722 	if (retbytecnt != NULL) {
723 		*retbytecnt = total_bytecnt;
724 	}
725 
726 	IFCQ_XMIT_ADD(ifq, total_pktcnt, total_bytecnt);
727 
728 	return 0;
729 }
730 
731 static void
fq_if_purge_flow(fq_if_t * fqs,fq_t * fq,u_int32_t * pktsp,u_int32_t * bytesp)732 fq_if_purge_flow(fq_if_t *fqs, fq_t *fq, u_int32_t *pktsp,
733     u_int32_t *bytesp)
734 {
735 	fq_if_classq_t *fq_cl;
736 	u_int32_t pkts, bytes;
737 	pktsched_pkt_t pkt;
738 
739 	fq_cl = &fqs->fqs_classq[fq->fq_sc_index];
740 	pkts = bytes = 0;
741 	_PKTSCHED_PKT_INIT(&pkt);
742 	for (;;) {
743 		fq_getq_flow(fqs, fq, &pkt);
744 		if (pkt.pktsched_pkt_mbuf == NULL) {
745 			VERIFY(pkt.pktsched_ptype == QP_INVALID);
746 			break;
747 		}
748 		pkts++;
749 		bytes += pktsched_get_pkt_len(&pkt);
750 		pktsched_free_pkt(&pkt);
751 	}
752 	IFCQ_DROP_ADD(fqs->fqs_ifq, pkts, bytes);
753 
754 	if (fq->fq_flags & FQF_NEW_FLOW) {
755 		fq_if_empty_new_flow(fq, fq_cl, false);
756 	} else if (fq->fq_flags & FQF_OLD_FLOW) {
757 		fq_if_empty_old_flow(fqs, fq_cl, fq, false, true);
758 	}
759 
760 	fq_if_destroy_flow(fqs, fq_cl, fq, true);
761 
762 	if (FQ_IF_CLASSQ_IDLE(fq_cl)) {
763 		int i;
764 		for (i = FQ_IF_ER; i < FQ_IF_MAX_STATE; i++) {
765 			pktsched_bit_clr(fq_cl->fcl_pri,
766 			    &fqs->fqs_bitmaps[i]);
767 		}
768 	}
769 	if (pktsp != NULL) {
770 		*pktsp = pkts;
771 	}
772 	if (bytesp != NULL) {
773 		*bytesp = bytes;
774 	}
775 }
776 
777 static void
fq_if_purge_classq(fq_if_t * fqs,fq_if_classq_t * fq_cl)778 fq_if_purge_classq(fq_if_t *fqs, fq_if_classq_t *fq_cl)
779 {
780 	fq_t *fq, *tfq;
781 	/*
782 	 * Take each flow from new/old flow list and flush mbufs
783 	 * in that flow
784 	 */
785 	STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_new_flows, fq_actlink, tfq) {
786 		fq_if_purge_flow(fqs, fq, NULL, NULL);
787 	}
788 	STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_old_flows, fq_actlink, tfq) {
789 		fq_if_purge_flow(fqs, fq, NULL, NULL);
790 	}
791 	VERIFY(STAILQ_EMPTY(&fq_cl->fcl_new_flows));
792 	VERIFY(STAILQ_EMPTY(&fq_cl->fcl_old_flows));
793 
794 	STAILQ_INIT(&fq_cl->fcl_new_flows);
795 	STAILQ_INIT(&fq_cl->fcl_old_flows);
796 	fq_cl->fcl_budget = 0;
797 }
798 
799 static void
fq_if_purge(fq_if_t * fqs)800 fq_if_purge(fq_if_t *fqs)
801 {
802 	int i;
803 
804 	IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
805 	for (i = 0; i < FQ_IF_MAX_CLASSES; i++) {
806 		fq_if_purge_classq(fqs, &fqs->fqs_classq[i]);
807 	}
808 
809 	VERIFY(STAILQ_EMPTY(&fqs->fqs_fclist));
810 
811 	fqs->fqs_large_flow = NULL;
812 	for (i = 0; i < FQ_IF_HASH_TABLE_SIZE; i++) {
813 		VERIFY(SLIST_EMPTY(&fqs->fqs_flows[i]));
814 	}
815 
816 	bzero(&fqs->fqs_bitmaps, sizeof(fqs->fqs_bitmaps));
817 
818 	IFCQ_LEN(fqs->fqs_ifq) = 0;
819 	IFCQ_BYTES(fqs->fqs_ifq) = 0;
820 }
821 
822 static void
fq_if_purge_sc(fq_if_t * fqs,cqrq_purge_sc_t * req)823 fq_if_purge_sc(fq_if_t *fqs, cqrq_purge_sc_t *req)
824 {
825 	fq_t *fq;
826 
827 	IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq);
828 	req->packets = req->bytes = 0;
829 	VERIFY(req->flow != 0);
830 
831 	/* packet type is needed only if we want to create a flow queue */
832 	fq = fq_if_hash_pkt(fqs, req->flow, req->sc, 0, FALSE, QP_INVALID);
833 
834 	if (fq != NULL) {
835 		fq_if_purge_flow(fqs, fq, &req->packets, &req->bytes);
836 	}
837 }
838 
839 static uint16_t
fq_if_calc_quantum(struct ifnet * ifp)840 fq_if_calc_quantum(struct ifnet *ifp)
841 {
842 	uint16_t quantum;
843 
844 	switch (ifp->if_family) {
845 	case IFNET_FAMILY_ETHERNET:
846 		VERIFY((ifp->if_mtu + ETHER_HDR_LEN) <= UINT16_MAX);
847 		quantum = (uint16_t)ifp->if_mtu + ETHER_HDR_LEN;
848 		break;
849 
850 	case IFNET_FAMILY_CELLULAR:
851 	case IFNET_FAMILY_IPSEC:
852 	case IFNET_FAMILY_UTUN:
853 		VERIFY(ifp->if_mtu <= UINT16_MAX);
854 		quantum = (uint16_t)ifp->if_mtu;
855 		break;
856 
857 	default:
858 		quantum = FQ_CODEL_DEFAULT_QUANTUM;
859 		break;
860 	}
861 
862 	/*
863 	 * XXX: Skywalk native interface doesn't support HW TSO offload.
864 	 */
865 	if (((ifp->if_eflags & IFEF_SKYWALK_NATIVE) == 0) &&
866 	    ((ifp->if_hwassist & IFNET_TSOF) != 0)) {
867 		VERIFY(ifp->if_tso_v4_mtu <= UINT16_MAX);
868 		VERIFY(ifp->if_tso_v6_mtu <= UINT16_MAX);
869 		quantum = (uint16_t)MAX(ifp->if_tso_v4_mtu, ifp->if_tso_v6_mtu);
870 		quantum = (quantum != 0) ? quantum : IF_MAXMTU;
871 	}
872 
873 	quantum = MAX(FQ_CODEL_DEFAULT_QUANTUM, quantum);
874 #if DEBUG || DEVELOPMENT
875 	quantum = (fq_codel_quantum != 0) ? fq_codel_quantum : quantum;
876 #endif /* DEBUG || DEVELOPMENT */
877 	VERIFY(quantum != 0);
878 	return quantum;
879 }
880 
881 static void
fq_if_mtu_update(fq_if_t * fqs)882 fq_if_mtu_update(fq_if_t *fqs)
883 {
884 #define _FQ_CLASSQ_UPDATE_QUANTUM(_fqs, _s, _q)    \
885 	(_fqs)->fqs_classq[FQ_IF_ ## _s ## _INDEX].fcl_quantum = \
886 	FQ_CODEL_QUANTUM_ ## _s(_q)
887 
888 	uint32_t quantum;
889 
890 	quantum = fq_if_calc_quantum(fqs->fqs_ifq->ifcq_ifp);
891 
892 	if ((fqs->fqs_flags & FQS_DRIVER_MANAGED) != 0) {
893 		_FQ_CLASSQ_UPDATE_QUANTUM(fqs, BK, quantum);
894 		_FQ_CLASSQ_UPDATE_QUANTUM(fqs, BE, quantum);
895 		_FQ_CLASSQ_UPDATE_QUANTUM(fqs, VI, quantum);
896 		_FQ_CLASSQ_UPDATE_QUANTUM(fqs, VO, quantum);
897 	} else {
898 		_FQ_CLASSQ_UPDATE_QUANTUM(fqs, BK_SYS, quantum);
899 		_FQ_CLASSQ_UPDATE_QUANTUM(fqs, BK, quantum);
900 		_FQ_CLASSQ_UPDATE_QUANTUM(fqs, BE, quantum);
901 		_FQ_CLASSQ_UPDATE_QUANTUM(fqs, RD, quantum);
902 		_FQ_CLASSQ_UPDATE_QUANTUM(fqs, OAM, quantum);
903 		_FQ_CLASSQ_UPDATE_QUANTUM(fqs, AV, quantum);
904 		_FQ_CLASSQ_UPDATE_QUANTUM(fqs, RV, quantum);
905 		_FQ_CLASSQ_UPDATE_QUANTUM(fqs, VI, quantum);
906 		_FQ_CLASSQ_UPDATE_QUANTUM(fqs, VO, quantum);
907 		_FQ_CLASSQ_UPDATE_QUANTUM(fqs, CTL, quantum);
908 	}
909 #undef _FQ_CLASSQ_UPDATE_QUANTUM
910 }
911 
912 static void
fq_if_event(fq_if_t * fqs,cqev_t ev)913 fq_if_event(fq_if_t *fqs, cqev_t ev)
914 {
915 	IFCQ_LOCK_ASSERT_HELD(fqs->fqs_ifq);
916 
917 	switch (ev) {
918 	case CLASSQ_EV_LINK_UP:
919 	case CLASSQ_EV_LINK_DOWN:
920 		fq_if_purge(fqs);
921 		break;
922 	case CLASSQ_EV_LINK_MTU:
923 		fq_if_mtu_update(fqs);
924 		break;
925 	default:
926 		break;
927 	}
928 }
929 
930 static void
fq_if_classq_suspend(fq_if_t * fqs,fq_if_classq_t * fq_cl)931 fq_if_classq_suspend(fq_if_t *fqs, fq_if_classq_t *fq_cl)
932 {
933 	fq_if_purge_classq(fqs, fq_cl);
934 	fqs->fqs_throttle = 1;
935 	fq_cl->fcl_stat.fcl_throttle_on++;
936 }
937 
938 static void
fq_if_classq_resume(fq_if_t * fqs,fq_if_classq_t * fq_cl)939 fq_if_classq_resume(fq_if_t *fqs, fq_if_classq_t *fq_cl)
940 {
941 	VERIFY(FQ_IF_CLASSQ_IDLE(fq_cl));
942 	fqs->fqs_throttle = 0;
943 	fq_cl->fcl_stat.fcl_throttle_off++;
944 }
945 
946 
947 static int
fq_if_throttle(fq_if_t * fqs,cqrq_throttle_t * tr)948 fq_if_throttle(fq_if_t *fqs, cqrq_throttle_t *tr)
949 {
950 	struct ifclassq *ifq = fqs->fqs_ifq;
951 	uint8_t index;
952 #if !MACH_ASSERT
953 #pragma unused(ifq)
954 #endif
955 	IFCQ_LOCK_ASSERT_HELD(ifq);
956 
957 	if (!tr->set) {
958 		tr->level = fqs->fqs_throttle;
959 		return 0;
960 	}
961 
962 	if (tr->level == fqs->fqs_throttle) {
963 		return EALREADY;
964 	}
965 
966 	/* Throttling is allowed on BK_SYS class only */
967 	index = fq_if_service_to_priority(fqs, MBUF_SC_BK_SYS);
968 	switch (tr->level) {
969 	case IFNET_THROTTLE_OFF:
970 		fq_if_classq_resume(fqs, &fqs->fqs_classq[index]);
971 		break;
972 	case IFNET_THROTTLE_OPPORTUNISTIC:
973 		fq_if_classq_suspend(fqs, &fqs->fqs_classq[index]);
974 		break;
975 	default:
976 		break;
977 	}
978 	return 0;
979 }
980 
981 void
fq_if_stat_sc(fq_if_t * fqs,cqrq_stat_sc_t * stat)982 fq_if_stat_sc(fq_if_t *fqs, cqrq_stat_sc_t *stat)
983 {
984 	uint8_t pri;
985 	fq_if_classq_t *fq_cl;
986 
987 	if (stat == NULL) {
988 		return;
989 	}
990 
991 	pri = fq_if_service_to_priority(fqs, stat->sc);
992 	fq_cl = &fqs->fqs_classq[pri];
993 	stat->packets = (uint32_t)fq_cl->fcl_stat.fcl_pkt_cnt;
994 	stat->bytes = (uint32_t)fq_cl->fcl_stat.fcl_byte_cnt;
995 }
996 
997 int
fq_if_request_classq(struct ifclassq * ifq,cqrq_t rq,void * arg)998 fq_if_request_classq(struct ifclassq *ifq, cqrq_t rq, void *arg)
999 {
1000 	int err = 0;
1001 	fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
1002 
1003 	IFCQ_LOCK_ASSERT_HELD(ifq);
1004 
1005 	/*
1006 	 * These are usually slow operations, convert the lock ahead of time
1007 	 */
1008 	IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
1009 	switch (rq) {
1010 	case CLASSQRQ_PURGE:
1011 		fq_if_purge(fqs);
1012 		break;
1013 	case CLASSQRQ_PURGE_SC:
1014 		fq_if_purge_sc(fqs, (cqrq_purge_sc_t *)arg);
1015 		break;
1016 	case CLASSQRQ_EVENT:
1017 		fq_if_event(fqs, (cqev_t)arg);
1018 		break;
1019 	case CLASSQRQ_THROTTLE:
1020 		fq_if_throttle(fqs, (cqrq_throttle_t *)arg);
1021 		break;
1022 	case CLASSQRQ_STAT_SC:
1023 		fq_if_stat_sc(fqs, (cqrq_stat_sc_t *)arg);
1024 		break;
1025 	}
1026 	return err;
1027 }
1028 
1029 int
fq_if_setup_ifclassq(struct ifclassq * ifq,u_int32_t flags,classq_pkt_type_t ptype)1030 fq_if_setup_ifclassq(struct ifclassq *ifq, u_int32_t flags,
1031     classq_pkt_type_t ptype)
1032 {
1033 #pragma unused(flags)
1034 #define _FQ_CLASSQ_INIT(_fqs, _s, _q)                         \
1035 	fq_if_classq_init((_fqs), FQ_IF_ ## _s ## _INDEX,     \
1036 	FQ_CODEL_QUANTUM_ ## _s(_q), FQ_CODEL_DRR_MAX_ ## _s, \
1037 	MBUF_SC_ ## _s )
1038 
1039 	struct ifnet *ifp = ifq->ifcq_ifp;
1040 	fq_if_t *fqs = NULL;
1041 	uint32_t quantum;
1042 	int err = 0;
1043 
1044 	IFCQ_LOCK_ASSERT_HELD(ifq);
1045 	VERIFY(ifq->ifcq_disc == NULL);
1046 	VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
1047 
1048 	fqs = fq_if_alloc(ifp, ifq, ptype);
1049 	if (fqs == NULL) {
1050 		return ENOMEM;
1051 	}
1052 
1053 	quantum = fq_if_calc_quantum(ifp);
1054 
1055 	if (flags & PKTSCHEDF_QALG_DRIVER_MANAGED) {
1056 		fqs->fqs_flags |= FQS_DRIVER_MANAGED;
1057 		_FQ_CLASSQ_INIT(fqs, BK, quantum);
1058 		_FQ_CLASSQ_INIT(fqs, BE, quantum);
1059 		_FQ_CLASSQ_INIT(fqs, VI, quantum);
1060 		_FQ_CLASSQ_INIT(fqs, VO, quantum);
1061 	} else {
1062 		/* SIG shares same INDEX with VI */
1063 		_CASSERT(SCIDX_SIG == SCIDX_VI);
1064 		_CASSERT(FQ_IF_SIG_INDEX == FQ_IF_VI_INDEX);
1065 
1066 		_FQ_CLASSQ_INIT(fqs, BK_SYS, quantum);
1067 		_FQ_CLASSQ_INIT(fqs, BK, quantum);
1068 		_FQ_CLASSQ_INIT(fqs, BE, quantum);
1069 		_FQ_CLASSQ_INIT(fqs, RD, quantum);
1070 		_FQ_CLASSQ_INIT(fqs, OAM, quantum);
1071 		_FQ_CLASSQ_INIT(fqs, AV, quantum);
1072 		_FQ_CLASSQ_INIT(fqs, RV, quantum);
1073 		_FQ_CLASSQ_INIT(fqs, VI, quantum);
1074 		_FQ_CLASSQ_INIT(fqs, VO, quantum);
1075 		_FQ_CLASSQ_INIT(fqs, CTL, quantum);
1076 	}
1077 
1078 	err = ifclassq_attach(ifq, PKTSCHEDT_FQ_CODEL, fqs);
1079 	if (err != 0) {
1080 		os_log_error(OS_LOG_DEFAULT, "%s: error from ifclassq_attach, "
1081 		    "failed to attach fq_if: %d\n", __func__, err);
1082 		fq_if_destroy(fqs);
1083 	}
1084 	return err;
1085 #undef _FQ_CLASSQ_INIT
1086 }
1087 
1088 fq_t *
fq_if_hash_pkt(fq_if_t * fqs,u_int32_t flowid,mbuf_svc_class_t svc_class,u_int64_t now,boolean_t create,classq_pkt_type_t ptype)1089 fq_if_hash_pkt(fq_if_t *fqs, u_int32_t flowid, mbuf_svc_class_t svc_class,
1090     u_int64_t now, boolean_t create, classq_pkt_type_t ptype)
1091 {
1092 	fq_t *fq = NULL;
1093 	flowq_list_t *fq_list;
1094 	fq_if_classq_t *fq_cl;
1095 	u_int8_t fqs_hash_id;
1096 	u_int8_t scidx;
1097 
1098 	scidx = fq_if_service_to_priority(fqs, svc_class);
1099 
1100 	fqs_hash_id = FQ_IF_FLOW_HASH_ID(flowid);
1101 
1102 	fq_list = &fqs->fqs_flows[fqs_hash_id];
1103 
1104 	SLIST_FOREACH(fq, fq_list, fq_hashlink) {
1105 		if (fq->fq_flowhash == flowid &&
1106 		    fq->fq_sc_index == scidx) {
1107 			break;
1108 		}
1109 	}
1110 	if (fq == NULL && create == TRUE) {
1111 #if SKYWALK
1112 		ASSERT((ptype == QP_MBUF) || (ptype == QP_PACKET));
1113 #else /* !SKYWALK */
1114 		ASSERT(ptype == QP_MBUF);
1115 #endif /* !SKYWALK */
1116 
1117 		/* If the flow is not already on the list, allocate it */
1118 		IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
1119 		fq = fq_alloc(ptype);
1120 		if (fq != NULL) {
1121 			fq->fq_flowhash = flowid;
1122 			fq->fq_sc_index = scidx;
1123 			fq->fq_updatetime = now + fqs->fqs_update_interval;
1124 			fq_cl = &fqs->fqs_classq[scidx];
1125 			fq->fq_flags = FQF_FLOWCTL_CAPABLE;
1126 			SLIST_INSERT_HEAD(fq_list, fq, fq_hashlink);
1127 			fq_cl->fcl_stat.fcl_flows_cnt++;
1128 		}
1129 	}
1130 
1131 	/*
1132 	 * If getq time is not set because this is the first packet or after
1133 	 * idle time, set it now so that we can detect a stall.
1134 	 */
1135 	if (fq != NULL && fq->fq_getqtime == 0) {
1136 		fq->fq_getqtime = now;
1137 	}
1138 
1139 	return fq;
1140 }
1141 
1142 void
fq_if_destroy_flow(fq_if_t * fqs,fq_if_classq_t * fq_cl,fq_t * fq,bool destroy_now)1143 fq_if_destroy_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq,
1144     bool destroy_now)
1145 {
1146 	u_int8_t hash_id;
1147 	hash_id = FQ_IF_FLOW_HASH_ID(fq->fq_flowhash);
1148 	SLIST_REMOVE(&fqs->fqs_flows[hash_id], fq, flowq,
1149 	    fq_hashlink);
1150 	fq_cl->fcl_stat.fcl_flows_cnt--;
1151 	IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
1152 	if (__improbable(fq->fq_flags & FQF_FLOWCTL_ON)) {
1153 		fq_if_flow_feedback(fqs, fq, fq_cl);
1154 	}
1155 	fq->fq_flags |= FQF_DESTROYED;
1156 	if (destroy_now) {
1157 		fq_destroy(fq);
1158 	}
1159 }
1160 
1161 inline boolean_t
fq_if_at_drop_limit(fq_if_t * fqs)1162 fq_if_at_drop_limit(fq_if_t *fqs)
1163 {
1164 	return (IFCQ_LEN(fqs->fqs_ifq) >= fqs->fqs_pkt_droplimit) ?
1165 	       TRUE : FALSE;
1166 }
1167 
1168 inline boolean_t
fq_if_almost_at_drop_limit(fq_if_t * fqs)1169 fq_if_almost_at_drop_limit(fq_if_t *fqs)
1170 {
1171 	/*
1172 	 * Whether we are above 90% of the queue limit. This is used to tell if we
1173 	 * can stop flow controlling the largest flow.
1174 	 */
1175 	return IFCQ_LEN(fqs->fqs_ifq) >= fqs->fqs_pkt_droplimit * 9 / 10;
1176 }
1177 
1178 static void
fq_if_empty_old_flow(fq_if_t * fqs,fq_if_classq_t * fq_cl,fq_t * fq,bool remove_hash,bool destroy)1179 fq_if_empty_old_flow(fq_if_t *fqs, fq_if_classq_t *fq_cl, fq_t *fq,
1180     bool remove_hash, bool destroy)
1181 {
1182 	/*
1183 	 * Remove the flow queue if it is empty
1184 	 * and delete it
1185 	 */
1186 	STAILQ_REMOVE(&fq_cl->fcl_old_flows, fq, flowq,
1187 	    fq_actlink);
1188 	fq->fq_flags &= ~FQF_OLD_FLOW;
1189 	fq_cl->fcl_stat.fcl_oldflows_cnt--;
1190 	VERIFY(fq->fq_bytes == 0);
1191 
1192 	if (remove_hash) {
1193 		/* Remove from the hash list */
1194 		fq_if_destroy_flow(fqs, fq_cl, fq, destroy);
1195 	}
1196 }
1197 
1198 static void
fq_if_empty_new_flow(fq_t * fq,fq_if_classq_t * fq_cl,bool add_to_old)1199 fq_if_empty_new_flow(fq_t *fq, fq_if_classq_t *fq_cl, bool add_to_old)
1200 {
1201 	/* Move to the end of old queue list */
1202 	STAILQ_REMOVE(&fq_cl->fcl_new_flows, fq,
1203 	    flowq, fq_actlink);
1204 	fq->fq_flags &= ~FQF_NEW_FLOW;
1205 	fq_cl->fcl_stat.fcl_newflows_cnt--;
1206 
1207 	if (add_to_old) {
1208 		STAILQ_INSERT_TAIL(&fq_cl->fcl_old_flows, fq,
1209 		    fq_actlink);
1210 		fq->fq_flags |= FQF_OLD_FLOW;
1211 		fq_cl->fcl_stat.fcl_oldflows_cnt++;
1212 	}
1213 }
1214 
1215 inline void
fq_if_drop_packet(fq_if_t * fqs)1216 fq_if_drop_packet(fq_if_t *fqs)
1217 {
1218 	fq_t *fq = fqs->fqs_large_flow;
1219 	fq_if_classq_t *fq_cl;
1220 	pktsched_pkt_t pkt;
1221 	volatile uint32_t *pkt_flags;
1222 	uint64_t *pkt_timestamp;
1223 
1224 	if (fq == NULL) {
1225 		return;
1226 	}
1227 	/* queue can not be empty on the largest flow */
1228 	VERIFY(!fq_empty(fq));
1229 
1230 	fq_cl = &fqs->fqs_classq[fq->fq_sc_index];
1231 	_PKTSCHED_PKT_INIT(&pkt);
1232 	fq_getq_flow_internal(fqs, fq, &pkt);
1233 	ASSERT(pkt.pktsched_ptype != QP_INVALID);
1234 
1235 	pktsched_get_pkt_vars(&pkt, &pkt_flags, &pkt_timestamp, NULL, NULL,
1236 	    NULL, NULL);
1237 
1238 	IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
1239 	*pkt_timestamp = 0;
1240 	switch (pkt.pktsched_ptype) {
1241 	case QP_MBUF:
1242 		*pkt_flags &= ~PKTF_PRIV_GUARDED;
1243 		break;
1244 #if SKYWALK
1245 	case QP_PACKET:
1246 		/* sanity check */
1247 		ASSERT((*pkt_flags & ~PKT_F_COMMON_MASK) == 0);
1248 		break;
1249 #endif /* SKYWALK */
1250 	default:
1251 		VERIFY(0);
1252 		/* NOTREACHED */
1253 		__builtin_unreachable();
1254 	}
1255 
1256 	if (fq_empty(fq)) {
1257 		fqs->fqs_large_flow = NULL;
1258 		if (fq->fq_flags & FQF_OLD_FLOW) {
1259 			fq_if_empty_old_flow(fqs, fq_cl, fq, true, true);
1260 		} else {
1261 			VERIFY(fq->fq_flags & FQF_NEW_FLOW);
1262 			fq_if_empty_new_flow(fq, fq_cl, true);
1263 		}
1264 	}
1265 	IFCQ_DROP_ADD(fqs->fqs_ifq, 1, pktsched_get_pkt_len(&pkt));
1266 
1267 	pktsched_free_pkt(&pkt);
1268 	fq_cl->fcl_stat.fcl_drop_overflow++;
1269 }
1270 
1271 inline void
fq_if_is_flow_heavy(fq_if_t * fqs,fq_t * fq)1272 fq_if_is_flow_heavy(fq_if_t *fqs, fq_t *fq)
1273 {
1274 	fq_t *prev_fq;
1275 
1276 	if (fqs->fqs_large_flow != NULL &&
1277 	    fqs->fqs_large_flow->fq_bytes < FQ_IF_LARGE_FLOW_BYTE_LIMIT) {
1278 		fqs->fqs_large_flow = NULL;
1279 	}
1280 
1281 	if (fq == NULL || fq->fq_bytes < FQ_IF_LARGE_FLOW_BYTE_LIMIT) {
1282 		return;
1283 	}
1284 
1285 	prev_fq = fqs->fqs_large_flow;
1286 	if (prev_fq == NULL) {
1287 		if (!fq_empty(fq)) {
1288 			fqs->fqs_large_flow = fq;
1289 		}
1290 		return;
1291 	} else if (fq->fq_bytes > prev_fq->fq_bytes) {
1292 		fqs->fqs_large_flow = fq;
1293 	}
1294 }
1295 
1296 boolean_t
fq_if_add_fcentry(fq_if_t * fqs,pktsched_pkt_t * pkt,uint8_t flowsrc,fq_t * fq,fq_if_classq_t * fq_cl)1297 fq_if_add_fcentry(fq_if_t *fqs, pktsched_pkt_t *pkt, uint8_t flowsrc,
1298     fq_t *fq, fq_if_classq_t *fq_cl)
1299 {
1300 	struct flowadv_fcentry *fce;
1301 
1302 #if DEBUG || DEVELOPMENT
1303 	if (__improbable(ifclassq_flow_control_adv == 0)) {
1304 		os_log(OS_LOG_DEFAULT, "%s: skipped flow control", __func__);
1305 		return TRUE;
1306 	}
1307 #endif /* DEBUG || DEVELOPMENT */
1308 
1309 	STAILQ_FOREACH(fce, &fqs->fqs_fclist, fce_link) {
1310 		if ((uint8_t)fce->fce_flowsrc_type == flowsrc &&
1311 		    fce->fce_flowid == fq->fq_flowhash) {
1312 			/* Already on flowcontrol list */
1313 			return TRUE;
1314 		}
1315 	}
1316 	IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
1317 	fce = pktsched_alloc_fcentry(pkt, fqs->fqs_ifq->ifcq_ifp, M_WAITOK);
1318 	if (fce != NULL) {
1319 		/* XXX Add number of bytes in the queue */
1320 		STAILQ_INSERT_TAIL(&fqs->fqs_fclist, fce, fce_link);
1321 		fq_cl->fcl_stat.fcl_flow_control++;
1322 		os_log(OS_LOG_DEFAULT, "%s: num: %d, scidx: %d, flowsrc: %d, "
1323 		    "flow: 0x%x, iface: %s\n", __func__,
1324 		    fq_cl->fcl_stat.fcl_flow_control,
1325 		    fq->fq_sc_index, fce->fce_flowsrc_type, fq->fq_flowhash,
1326 		    if_name(fqs->fqs_ifq->ifcq_ifp));
1327 	}
1328 	return (fce != NULL) ? TRUE : FALSE;
1329 }
1330 
1331 static void
fq_if_remove_fcentry(fq_if_t * fqs,struct flowadv_fcentry * fce)1332 fq_if_remove_fcentry(fq_if_t *fqs, struct flowadv_fcentry *fce)
1333 {
1334 	STAILQ_REMOVE(&fqs->fqs_fclist, fce, flowadv_fcentry, fce_link);
1335 	STAILQ_NEXT(fce, fce_link) = NULL;
1336 	flowadv_add_entry(fce);
1337 }
1338 
1339 void
fq_if_flow_feedback(fq_if_t * fqs,fq_t * fq,fq_if_classq_t * fq_cl)1340 fq_if_flow_feedback(fq_if_t *fqs, fq_t *fq, fq_if_classq_t *fq_cl)
1341 {
1342 	struct flowadv_fcentry *fce = NULL;
1343 
1344 	IFCQ_CONVERT_LOCK(fqs->fqs_ifq);
1345 	STAILQ_FOREACH(fce, &fqs->fqs_fclist, fce_link) {
1346 		if (fce->fce_flowid == fq->fq_flowhash) {
1347 			break;
1348 		}
1349 	}
1350 	if (fce != NULL) {
1351 		fq_cl->fcl_stat.fcl_flow_feedback++;
1352 		os_log(OS_LOG_DEFAULT, "%s: num: %d, scidx: %d, flowsrc: %d, "
1353 		    "flow: 0x%x, iface: %s\n", __func__,
1354 		    fq_cl->fcl_stat.fcl_flow_feedback, fq->fq_sc_index,
1355 		    fce->fce_flowsrc_type, fce->fce_flowid,
1356 		    if_name(fqs->fqs_ifq->ifcq_ifp));
1357 		fq_if_remove_fcentry(fqs, fce);
1358 	}
1359 	fq->fq_flags &= ~FQF_FLOWCTL_ON;
1360 }
1361 
1362 void
fq_if_dequeue(fq_if_t * fqs,fq_if_classq_t * fq_cl,uint32_t pktlimit,int64_t bytelimit,classq_pkt_t * top,classq_pkt_t * bottom,uint32_t * retpktcnt,uint32_t * retbytecnt,flowq_dqlist_t * fq_dqlist,boolean_t drvmgmt)1363 fq_if_dequeue(fq_if_t *fqs, fq_if_classq_t *fq_cl, uint32_t pktlimit,
1364     int64_t bytelimit, classq_pkt_t *top, classq_pkt_t *bottom,
1365     uint32_t *retpktcnt, uint32_t *retbytecnt, flowq_dqlist_t *fq_dqlist,
1366     boolean_t drvmgmt)
1367 {
1368 	fq_t *fq = NULL, *tfq = NULL;
1369 	flowq_stailq_t temp_stailq;
1370 	uint32_t pktcnt, bytecnt;
1371 	boolean_t qempty, limit_reached = FALSE;
1372 	classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
1373 	fq_getq_flow_t fq_getq_flow_fn;
1374 	classq_pkt_t *head, *tail;
1375 
1376 	switch (fqs->fqs_ptype) {
1377 	case QP_MBUF:
1378 		fq_getq_flow_fn = fq_getq_flow_mbuf;
1379 		break;
1380 
1381 #if SKYWALK
1382 	case QP_PACKET:
1383 		fq_getq_flow_fn = fq_getq_flow_kpkt;
1384 		break;
1385 #endif /* SKYWALK */
1386 
1387 	default:
1388 		VERIFY(0);
1389 		/* NOTREACHED */
1390 		__builtin_unreachable();
1391 	}
1392 
1393 	/*
1394 	 * maximum byte limit should not be greater than the budget for
1395 	 * this class
1396 	 */
1397 	if (bytelimit > fq_cl->fcl_budget && !drvmgmt) {
1398 		bytelimit = fq_cl->fcl_budget;
1399 	}
1400 
1401 	VERIFY(pktlimit > 0 && bytelimit > 0 && top != NULL);
1402 	pktcnt = bytecnt = 0;
1403 	STAILQ_INIT(&temp_stailq);
1404 
1405 	STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_new_flows, fq_actlink, tfq) {
1406 		ASSERT((fq->fq_flags & (FQF_NEW_FLOW | FQF_OLD_FLOW)) ==
1407 		    FQF_NEW_FLOW);
1408 
1409 		if (fq_dqlist != NULL) {
1410 			if (!fq->fq_in_dqlist) {
1411 				fq_dqlist_add(fq_dqlist, fq);
1412 			}
1413 			head = &fq->fq_dq_head;
1414 			tail = &fq->fq_dq_tail;
1415 		} else {
1416 			ASSERT(!fq->fq_in_dqlist);
1417 			head = top;
1418 			tail = &last;
1419 		}
1420 
1421 		limit_reached = fq_getq_flow_fn(fqs, fq_cl, fq, bytelimit,
1422 		    pktlimit, head, tail, &bytecnt, &pktcnt, &qempty,
1423 		    PKTF_NEW_FLOW);
1424 
1425 		if (fq->fq_deficit <= 0 || qempty) {
1426 			fq_if_empty_new_flow(fq, fq_cl, true);
1427 		}
1428 		fq->fq_deficit += fq_cl->fcl_quantum;
1429 		if (limit_reached) {
1430 			goto done;
1431 		}
1432 	}
1433 
1434 	STAILQ_FOREACH_SAFE(fq, &fq_cl->fcl_old_flows, fq_actlink, tfq) {
1435 		VERIFY((fq->fq_flags & (FQF_NEW_FLOW | FQF_OLD_FLOW)) ==
1436 		    FQF_OLD_FLOW);
1437 		bool destroy = true;
1438 
1439 		if (fq_dqlist != NULL) {
1440 			if (!fq->fq_in_dqlist) {
1441 				fq_dqlist_add(fq_dqlist, fq);
1442 			}
1443 			head = &fq->fq_dq_head;
1444 			tail = &fq->fq_dq_tail;
1445 			destroy = false;
1446 		} else {
1447 			ASSERT(!fq->fq_in_dqlist);
1448 			head = top;
1449 			tail = &last;
1450 		}
1451 
1452 		limit_reached = fq_getq_flow_fn(fqs, fq_cl, fq, bytelimit,
1453 		    pktlimit, head, tail, &bytecnt, &pktcnt, &qempty, 0);
1454 
1455 		if (qempty) {
1456 			fq_if_empty_old_flow(fqs, fq_cl, fq, true, destroy);
1457 		} else if (fq->fq_deficit <= 0) {
1458 			STAILQ_REMOVE(&fq_cl->fcl_old_flows, fq,
1459 			    flowq, fq_actlink);
1460 			/*
1461 			 * Move to the end of the old queues list. We do not
1462 			 * need to update the flow count since this flow
1463 			 * will be added to the tail again
1464 			 */
1465 			STAILQ_INSERT_TAIL(&temp_stailq, fq, fq_actlink);
1466 			fq->fq_deficit += fq_cl->fcl_quantum;
1467 		}
1468 		if (limit_reached) {
1469 			break;
1470 		}
1471 	}
1472 
1473 done:
1474 	if (!STAILQ_EMPTY(&fq_cl->fcl_old_flows)) {
1475 		STAILQ_CONCAT(&fq_cl->fcl_old_flows, &temp_stailq);
1476 	} else if (!STAILQ_EMPTY(&temp_stailq)) {
1477 		fq_cl->fcl_old_flows = temp_stailq;
1478 	}
1479 	if (last.cp_mbuf != NULL) {
1480 		VERIFY(top->cp_mbuf != NULL);
1481 		if (bottom != NULL) {
1482 			*bottom = last;
1483 		}
1484 	}
1485 	if (retpktcnt != NULL) {
1486 		*retpktcnt = pktcnt;
1487 	}
1488 	if (retbytecnt != NULL) {
1489 		*retbytecnt = bytecnt;
1490 	}
1491 }
1492 
1493 void
fq_if_teardown_ifclassq(struct ifclassq * ifq)1494 fq_if_teardown_ifclassq(struct ifclassq *ifq)
1495 {
1496 	fq_if_t *fqs = (fq_if_t *)ifq->ifcq_disc;
1497 
1498 	IFCQ_LOCK_ASSERT_HELD(ifq);
1499 	VERIFY(fqs != NULL && ifq->ifcq_type == PKTSCHEDT_FQ_CODEL);
1500 	fq_if_destroy(fqs);
1501 	ifq->ifcq_disc = NULL;
1502 	ifclassq_detach(ifq);
1503 }
1504 
1505 static void
fq_export_flowstats(fq_if_t * fqs,fq_t * fq,struct fq_codel_flowstats * flowstat)1506 fq_export_flowstats(fq_if_t *fqs, fq_t *fq,
1507     struct fq_codel_flowstats *flowstat)
1508 {
1509 	bzero(flowstat, sizeof(*flowstat));
1510 	flowstat->fqst_min_qdelay = (uint32_t)fq->fq_min_qdelay;
1511 	flowstat->fqst_bytes = fq->fq_bytes;
1512 	flowstat->fqst_flowhash = fq->fq_flowhash;
1513 	if (fq->fq_flags & FQF_NEW_FLOW) {
1514 		flowstat->fqst_flags |= FQ_FLOWSTATS_NEW_FLOW;
1515 	}
1516 	if (fq->fq_flags & FQF_OLD_FLOW) {
1517 		flowstat->fqst_flags |= FQ_FLOWSTATS_OLD_FLOW;
1518 	}
1519 	if (fq->fq_flags & FQF_DELAY_HIGH) {
1520 		flowstat->fqst_flags |= FQ_FLOWSTATS_DELAY_HIGH;
1521 	}
1522 	if (fq->fq_flags & FQF_FLOWCTL_ON) {
1523 		flowstat->fqst_flags |= FQ_FLOWSTATS_FLOWCTL_ON;
1524 	}
1525 	if (fqs->fqs_large_flow == fq) {
1526 		flowstat->fqst_flags |= FQ_FLOWSTATS_LARGE_FLOW;
1527 	}
1528 }
1529 
1530 int
fq_if_getqstats_ifclassq(struct ifclassq * ifq,u_int32_t qid,struct if_ifclassq_stats * ifqs)1531 fq_if_getqstats_ifclassq(struct ifclassq *ifq, u_int32_t qid,
1532     struct if_ifclassq_stats *ifqs)
1533 {
1534 	struct fq_codel_classstats *fcls;
1535 	fq_if_classq_t *fq_cl;
1536 	fq_if_t *fqs;
1537 	fq_t *fq = NULL;
1538 	u_int32_t i, flowstat_cnt;
1539 
1540 	if (qid >= FQ_IF_MAX_CLASSES) {
1541 		return EINVAL;
1542 	}
1543 
1544 	fqs = (fq_if_t *)ifq->ifcq_disc;
1545 	fcls = &ifqs->ifqs_fq_codel_stats;
1546 
1547 	fq_cl = &fqs->fqs_classq[qid];
1548 
1549 	fcls->fcls_pri = fq_cl->fcl_pri;
1550 	fcls->fcls_service_class = fq_cl->fcl_service_class;
1551 	fcls->fcls_quantum = fq_cl->fcl_quantum;
1552 	fcls->fcls_drr_max = fq_cl->fcl_drr_max;
1553 	fcls->fcls_budget = fq_cl->fcl_budget;
1554 	fcls->fcls_target_qdelay = fqs->fqs_target_qdelay;
1555 	fcls->fcls_update_interval = fqs->fqs_update_interval;
1556 	fcls->fcls_flow_control = fq_cl->fcl_stat.fcl_flow_control;
1557 	fcls->fcls_flow_feedback = fq_cl->fcl_stat.fcl_flow_feedback;
1558 	fcls->fcls_dequeue_stall = fq_cl->fcl_stat.fcl_dequeue_stall;
1559 	fcls->fcls_drop_overflow = fq_cl->fcl_stat.fcl_drop_overflow;
1560 	fcls->fcls_drop_early = fq_cl->fcl_stat.fcl_drop_early;
1561 	fcls->fcls_drop_memfailure = fq_cl->fcl_stat.fcl_drop_memfailure;
1562 	fcls->fcls_flows_cnt = fq_cl->fcl_stat.fcl_flows_cnt;
1563 	fcls->fcls_newflows_cnt = fq_cl->fcl_stat.fcl_newflows_cnt;
1564 	fcls->fcls_oldflows_cnt = fq_cl->fcl_stat.fcl_oldflows_cnt;
1565 	fcls->fcls_pkt_cnt = fq_cl->fcl_stat.fcl_pkt_cnt;
1566 	fcls->fcls_flow_control_fail = fq_cl->fcl_stat.fcl_flow_control_fail;
1567 	fcls->fcls_flow_control_fail = fq_cl->fcl_stat.fcl_flow_control_fail;
1568 	fcls->fcls_dequeue = fq_cl->fcl_stat.fcl_dequeue;
1569 	fcls->fcls_dequeue_bytes = fq_cl->fcl_stat.fcl_dequeue_bytes;
1570 	fcls->fcls_byte_cnt = fq_cl->fcl_stat.fcl_byte_cnt;
1571 	fcls->fcls_throttle_on = fq_cl->fcl_stat.fcl_throttle_on;
1572 	fcls->fcls_throttle_off = fq_cl->fcl_stat.fcl_throttle_off;
1573 	fcls->fcls_throttle_drops = fq_cl->fcl_stat.fcl_throttle_drops;
1574 	fcls->fcls_dup_rexmts = fq_cl->fcl_stat.fcl_dup_rexmts;
1575 	fcls->fcls_pkts_compressible = fq_cl->fcl_stat.fcl_pkts_compressible;
1576 	fcls->fcls_pkts_compressed = fq_cl->fcl_stat.fcl_pkts_compressed;
1577 	fcls->fcls_min_qdelay = fq_cl->fcl_stat.fcl_min_qdelay;
1578 	fcls->fcls_max_qdelay = fq_cl->fcl_stat.fcl_max_qdelay;
1579 	fcls->fcls_avg_qdelay = fq_cl->fcl_stat.fcl_avg_qdelay;
1580 	fcls->fcls_overwhelming = fq_cl->fcl_stat.fcl_overwhelming;
1581 
1582 	/* Gather per flow stats */
1583 	flowstat_cnt = min((fcls->fcls_newflows_cnt +
1584 	    fcls->fcls_oldflows_cnt), FQ_IF_MAX_FLOWSTATS);
1585 	i = 0;
1586 	STAILQ_FOREACH(fq, &fq_cl->fcl_new_flows, fq_actlink) {
1587 		if (i >= fcls->fcls_newflows_cnt || i >= flowstat_cnt) {
1588 			break;
1589 		}
1590 
1591 		/* leave space for a few old flows */
1592 		if ((flowstat_cnt - i) < fcls->fcls_oldflows_cnt &&
1593 		    i >= (FQ_IF_MAX_FLOWSTATS >> 1)) {
1594 			break;
1595 		}
1596 		fq_export_flowstats(fqs, fq, &fcls->fcls_flowstats[i]);
1597 		i++;
1598 	}
1599 	STAILQ_FOREACH(fq, &fq_cl->fcl_old_flows, fq_actlink) {
1600 		if (i >= flowstat_cnt) {
1601 			break;
1602 		}
1603 		fq_export_flowstats(fqs, fq, &fcls->fcls_flowstats[i]);
1604 		i++;
1605 	}
1606 	VERIFY(i <= flowstat_cnt);
1607 	fcls->fcls_flowstats_cnt = i;
1608 	return 0;
1609 }
1610