xref: /xnu-8020.121.3/bsd/net/classq/classq_subr.c (revision fdd8201d7b966f0c3ea610489d29bd841d358941)
1 /*
2  * Copyright (c) 2011-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <sys/cdefs.h>
30 #include <sys/param.h>
31 #include <sys/mbuf.h>
32 #include <sys/errno.h>
33 #include <sys/random.h>
34 #include <sys/kernel_types.h>
35 #include <sys/sysctl.h>
36 
37 #include <kern/zalloc.h>
38 
39 #include <net/if.h>
40 #include <net/net_osdep.h>
41 #include <net/classq/classq.h>
42 #include <pexpert/pexpert.h>
43 #include <net/classq/classq_sfb.h>
44 #include <net/classq/classq_fq_codel.h>
45 #include <net/pktsched/pktsched.h>
46 #include <net/pktsched/pktsched_fq_codel.h>
47 #include <net/flowadv.h>
48 
49 #include <libkern/libkern.h>
50 
51 #if SKYWALK
52 #include <skywalk/os_skywalk_private.h>
53 #endif /* SKYWALK */
54 
55 static errno_t ifclassq_dequeue_common(struct ifclassq *, mbuf_svc_class_t,
56     u_int32_t, u_int32_t, classq_pkt_t *, classq_pkt_t *, u_int32_t *,
57     u_int32_t *, boolean_t);
58 static void ifclassq_tbr_dequeue_common(struct ifclassq *, mbuf_svc_class_t,
59     boolean_t, classq_pkt_t *);
60 
61 static u_int64_t ifclassq_target_qdelay = 0;
62 SYSCTL_QUAD(_net_classq, OID_AUTO, target_qdelay, CTLFLAG_RW | CTLFLAG_LOCKED,
63     &ifclassq_target_qdelay, "target queue delay in nanoseconds");
64 
65 static u_int64_t ifclassq_update_interval = 0;
66 SYSCTL_QUAD(_net_classq, OID_AUTO, update_interval,
67     CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_update_interval,
68     "update interval in nanoseconds");
69 
70 #if DEBUG || DEVELOPMENT
71 uint32_t ifclassq_flow_control_adv = 1; /* flow control advisory */
72 SYSCTL_UINT(_net_classq, OID_AUTO, flow_control_adv,
73     CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_flow_control_adv, 1,
74     "enable/disable flow control advisory");
75 
76 uint16_t fq_codel_quantum = 0;
77 #endif /* DEBUG || DEVELOPMENT */
78 
79 static struct zone *ifcq_zone;          /* zone for ifclassq */
80 #define IFCQ_ZONE_NAME    "ifclassq"    /* zone name */
81 LCK_ATTR_DECLARE(ifcq_lock_attr, 0, 0);
82 static LCK_GRP_DECLARE(ifcq_lock_group, "ifclassq locks");
83 
84 void
classq_init(void)85 classq_init(void)
86 {
87 	_CASSERT(MBUF_TC_BE == 0);
88 	_CASSERT(MBUF_SC_BE == 0);
89 	_CASSERT(IFCQ_SC_MAX == MBUF_SC_MAX_CLASSES);
90 #if DEBUG || DEVELOPMENT
91 	PE_parse_boot_argn("fq_codel_quantum", &fq_codel_quantum,
92 	    sizeof(fq_codel_quantum));
93 	PE_parse_boot_argn("ifclassq_target_qdelay", &ifclassq_target_qdelay,
94 	    sizeof(ifclassq_target_qdelay));
95 	PE_parse_boot_argn("ifclassq_update_interval",
96 	    &ifclassq_update_interval, sizeof(ifclassq_update_interval));
97 #endif /* DEBUG || DEVELOPMENT */
98 	ifcq_zone = zone_create(IFCQ_ZONE_NAME, sizeof(struct ifclassq),
99 	    ZC_ZFREE_CLEARMEM);
100 	fq_codel_init();
101 }
102 
103 int
ifclassq_setup(struct ifclassq * ifq,struct ifnet * ifp,uint32_t sflags)104 ifclassq_setup(struct ifclassq *ifq, struct ifnet *ifp, uint32_t sflags)
105 {
106 	int err = 0;
107 
108 	IFCQ_LOCK(ifq);
109 	VERIFY(IFCQ_IS_EMPTY(ifq));
110 	ifq->ifcq_ifp = ifp;
111 	IFCQ_LEN(ifq) = 0;
112 	IFCQ_BYTES(ifq) = 0;
113 	bzero(&ifq->ifcq_xmitcnt, sizeof(ifq->ifcq_xmitcnt));
114 	bzero(&ifq->ifcq_dropcnt, sizeof(ifq->ifcq_dropcnt));
115 
116 	VERIFY(!IFCQ_TBR_IS_ENABLED(ifq));
117 	VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
118 	VERIFY(ifq->ifcq_flags == 0);
119 	VERIFY(ifq->ifcq_sflags == 0);
120 	VERIFY(ifq->ifcq_disc == NULL);
121 
122 	if (ifp->if_eflags & IFEF_TXSTART) {
123 		u_int32_t maxlen = 0;
124 
125 		if ((maxlen = IFCQ_MAXLEN(ifq)) == 0) {
126 			maxlen = if_sndq_maxlen;
127 		}
128 		IFCQ_SET_MAXLEN(ifq, maxlen);
129 
130 		if (IFCQ_MAXLEN(ifq) != if_sndq_maxlen &&
131 		    IFCQ_TARGET_QDELAY(ifq) == 0) {
132 			/*
133 			 * Choose static queues because the interface has
134 			 * maximum queue size set
135 			 */
136 			sflags &= ~PKTSCHEDF_QALG_DELAYBASED;
137 		}
138 		ifq->ifcq_sflags = sflags;
139 		err = ifclassq_pktsched_setup(ifq);
140 		if (err == 0) {
141 			ifq->ifcq_flags = (IFCQF_READY | IFCQF_ENABLED);
142 		}
143 	}
144 	IFCQ_UNLOCK(ifq);
145 	return err;
146 }
147 
148 void
ifclassq_teardown(struct ifclassq * ifq)149 ifclassq_teardown(struct ifclassq *ifq)
150 {
151 	IFCQ_LOCK(ifq);
152 	if (IFCQ_IS_DESTROYED(ifq)) {
153 		ASSERT((ifq->ifcq_flags & ~IFCQF_DESTROYED) == 0);
154 		goto done;
155 	}
156 	if (IFCQ_IS_READY(ifq)) {
157 		if (IFCQ_TBR_IS_ENABLED(ifq)) {
158 			struct tb_profile tb =
159 			{ .rate = 0, .percent = 0, .depth = 0 };
160 			(void) ifclassq_tbr_set(ifq, &tb, FALSE);
161 		}
162 		pktsched_teardown(ifq);
163 		ifq->ifcq_flags &= ~IFCQF_READY;
164 	}
165 	ifq->ifcq_sflags = 0;
166 	VERIFY(IFCQ_IS_EMPTY(ifq));
167 	VERIFY(!IFCQ_TBR_IS_ENABLED(ifq));
168 	VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
169 	VERIFY(ifq->ifcq_flags == 0);
170 	VERIFY(ifq->ifcq_sflags == 0);
171 	VERIFY(ifq->ifcq_disc == NULL);
172 	IFCQ_LEN(ifq) = 0;
173 	IFCQ_BYTES(ifq) = 0;
174 	IFCQ_MAXLEN(ifq) = 0;
175 	bzero(&ifq->ifcq_xmitcnt, sizeof(ifq->ifcq_xmitcnt));
176 	bzero(&ifq->ifcq_dropcnt, sizeof(ifq->ifcq_dropcnt));
177 	ifq->ifcq_flags |= IFCQF_DESTROYED;
178 done:
179 	IFCQ_UNLOCK(ifq);
180 }
181 
182 int
ifclassq_pktsched_setup(struct ifclassq * ifq)183 ifclassq_pktsched_setup(struct ifclassq *ifq)
184 {
185 	struct ifnet *ifp = ifq->ifcq_ifp;
186 	classq_pkt_type_t ptype = QP_MBUF;
187 	int err = 0;
188 
189 	IFCQ_LOCK_ASSERT_HELD(ifq);
190 	VERIFY(ifp->if_eflags & IFEF_TXSTART);
191 #if SKYWALK
192 	ptype = ((ifp->if_eflags & IFEF_SKYWALK_NATIVE) != 0) ? QP_PACKET :
193 	    QP_MBUF;
194 #endif /* SKYWALK */
195 
196 	err = pktsched_setup(ifq, PKTSCHEDT_FQ_CODEL, ifq->ifcq_sflags, ptype);
197 
198 	return err;
199 }
200 
201 void
ifclassq_set_maxlen(struct ifclassq * ifq,u_int32_t maxqlen)202 ifclassq_set_maxlen(struct ifclassq *ifq, u_int32_t maxqlen)
203 {
204 	IFCQ_LOCK(ifq);
205 	if (maxqlen == 0) {
206 		maxqlen = if_sndq_maxlen;
207 	}
208 	IFCQ_SET_MAXLEN(ifq, maxqlen);
209 	IFCQ_UNLOCK(ifq);
210 }
211 
212 u_int32_t
ifclassq_get_maxlen(struct ifclassq * ifq)213 ifclassq_get_maxlen(struct ifclassq *ifq)
214 {
215 	return IFCQ_MAXLEN(ifq);
216 }
217 
218 int
ifclassq_get_len(struct ifclassq * ifq,mbuf_svc_class_t sc,u_int32_t * packets,u_int32_t * bytes)219 ifclassq_get_len(struct ifclassq *ifq, mbuf_svc_class_t sc, u_int32_t *packets,
220     u_int32_t *bytes)
221 {
222 	int err = 0;
223 
224 	IFCQ_LOCK(ifq);
225 	if (sc == MBUF_SC_UNSPEC) {
226 		VERIFY(packets != NULL);
227 		*packets = IFCQ_LEN(ifq);
228 	} else {
229 		cqrq_stat_sc_t req = { sc, 0, 0 };
230 
231 		VERIFY(MBUF_VALID_SC(sc));
232 		VERIFY(packets != NULL && bytes != NULL);
233 
234 		err = fq_if_request_classq(ifq, CLASSQRQ_STAT_SC, &req);
235 		if (packets != NULL) {
236 			*packets = req.packets;
237 		}
238 		if (bytes != NULL) {
239 			*bytes = req.bytes;
240 		}
241 	}
242 	IFCQ_UNLOCK(ifq);
243 
244 #if SKYWALK
245 	struct ifnet *ifp = ifq->ifcq_ifp;
246 
247 	if (__improbable(ifp->if_na_ops != NULL &&
248 	    ifp->if_na_ops->ni_get_len != NULL)) {
249 		err = ifp->if_na_ops->ni_get_len(ifp->if_na, sc, packets,
250 		    bytes, err);
251 	}
252 #endif /* SKYWALK */
253 
254 	return err;
255 }
256 
257 inline void
ifclassq_set_packet_metadata(struct ifclassq * ifq,struct ifnet * ifp,classq_pkt_t * p)258 ifclassq_set_packet_metadata(struct ifclassq *ifq, struct ifnet *ifp,
259     classq_pkt_t *p)
260 {
261 	if (!IFNET_IS_CELLULAR(ifp)) {
262 		return;
263 	}
264 
265 	switch (p->cp_ptype) {
266 	case QP_MBUF: {
267 		struct mbuf *m = p->cp_mbuf;
268 		m->m_pkthdr.pkt_flags |= PKTF_VALID_UNSENT_DATA;
269 		m->m_pkthdr.bufstatus_if = IFCQ_BYTES(ifq);
270 		m->m_pkthdr.bufstatus_sndbuf = (uint32_t)ifp->if_sndbyte_unsent;
271 		break;
272 	}
273 
274 #if SKYWALK
275 	case QP_PACKET:
276 		/*
277 		 * Support for equivalent of mbuf_get_unsent_data_bytes()
278 		 * is not needed in the Skywalk architecture.
279 		 */
280 		break;
281 #endif /* SKYWALK */
282 
283 	default:
284 		VERIFY(0);
285 		/* NOTREACHED */
286 		__builtin_unreachable();
287 	}
288 }
289 
290 errno_t
ifclassq_enqueue(struct ifclassq * ifq,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t cnt,u_int32_t bytes,boolean_t * pdrop)291 ifclassq_enqueue(struct ifclassq *ifq, classq_pkt_t *head, classq_pkt_t *tail,
292     u_int32_t cnt, u_int32_t bytes, boolean_t *pdrop)
293 {
294 	return fq_if_enqueue_classq(ifq, head, tail, cnt, bytes, pdrop);
295 }
296 
297 errno_t
ifclassq_dequeue(struct ifclassq * ifq,u_int32_t pkt_limit,u_int32_t byte_limit,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t * cnt,u_int32_t * len)298 ifclassq_dequeue(struct ifclassq *ifq, u_int32_t pkt_limit,
299     u_int32_t byte_limit, classq_pkt_t *head, classq_pkt_t *tail,
300     u_int32_t *cnt, u_int32_t *len)
301 {
302 	return ifclassq_dequeue_common(ifq, MBUF_SC_UNSPEC, pkt_limit,
303 	           byte_limit, head, tail, cnt, len, FALSE);
304 }
305 
306 errno_t
ifclassq_dequeue_sc(struct ifclassq * ifq,mbuf_svc_class_t sc,u_int32_t pkt_limit,u_int32_t byte_limit,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t * cnt,u_int32_t * len)307 ifclassq_dequeue_sc(struct ifclassq *ifq, mbuf_svc_class_t sc,
308     u_int32_t pkt_limit, u_int32_t byte_limit, classq_pkt_t *head,
309     classq_pkt_t *tail, u_int32_t *cnt, u_int32_t *len)
310 {
311 	return ifclassq_dequeue_common(ifq, sc, pkt_limit, byte_limit,
312 	           head, tail, cnt, len, TRUE);
313 }
314 
315 static errno_t
ifclassq_dequeue_common_default(struct ifclassq * ifq,mbuf_svc_class_t sc,u_int32_t pkt_limit,u_int32_t byte_limit,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t * cnt,u_int32_t * len,boolean_t drvmgt)316 ifclassq_dequeue_common_default(struct ifclassq *ifq, mbuf_svc_class_t sc,
317     u_int32_t pkt_limit, u_int32_t byte_limit, classq_pkt_t *head,
318     classq_pkt_t *tail, u_int32_t *cnt, u_int32_t *len, boolean_t drvmgt)
319 {
320 	struct ifnet *ifp = ifq->ifcq_ifp;
321 	u_int32_t i = 0, l = 0;
322 	classq_pkt_t first = CLASSQ_PKT_INITIALIZER(first);
323 	classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
324 
325 	VERIFY(!drvmgt || MBUF_VALID_SC(sc));
326 
327 	if (IFCQ_TBR_IS_ENABLED(ifq)) {
328 		goto dequeue_loop;
329 	}
330 
331 	/*
332 	 * If the scheduler support dequeueing multiple packets at the
333 	 * same time, call that one instead.
334 	 */
335 	if (drvmgt) {
336 		int err;
337 
338 		IFCQ_LOCK_SPIN(ifq);
339 		err = fq_if_dequeue_sc_classq_multi(ifq, sc, pkt_limit,
340 		    byte_limit, head, tail, cnt, len);
341 		IFCQ_UNLOCK(ifq);
342 
343 		if (err == 0 && head->cp_mbuf == NULL) {
344 			err = EAGAIN;
345 		}
346 		return err;
347 	} else {
348 		int err;
349 
350 		IFCQ_LOCK_SPIN(ifq);
351 		err = fq_if_dequeue_classq_multi(ifq, pkt_limit, byte_limit,
352 		    head, tail, cnt, len);
353 		IFCQ_UNLOCK(ifq);
354 
355 		if (err == 0 && head->cp_mbuf == NULL) {
356 			err = EAGAIN;
357 		}
358 		return err;
359 	}
360 
361 dequeue_loop:
362 
363 	IFCQ_LOCK_SPIN(ifq);
364 
365 	while (i < pkt_limit && l < byte_limit) {
366 		if (drvmgt) {
367 			if (IFCQ_TBR_IS_ENABLED(ifq)) {
368 				IFCQ_TBR_DEQUEUE_SC(ifq, sc, head);
369 			} else {
370 				fq_if_dequeue_sc_classq(ifq, sc, head);
371 			}
372 		} else {
373 			if (IFCQ_TBR_IS_ENABLED(ifq)) {
374 				IFCQ_TBR_DEQUEUE(ifq, head);
375 			} else {
376 				fq_if_dequeue_classq(ifq, head);
377 			}
378 		}
379 
380 		if (head->cp_mbuf == NULL) {
381 			break;
382 		}
383 
384 		if (first.cp_mbuf == NULL) {
385 			first = *head;
386 		}
387 
388 		switch (head->cp_ptype) {
389 		case QP_MBUF:
390 			head->cp_mbuf->m_nextpkt = NULL;
391 			l += head->cp_mbuf->m_pkthdr.len;
392 			ifclassq_set_packet_metadata(ifq, ifp, head);
393 			if (last.cp_mbuf != NULL) {
394 				last.cp_mbuf->m_nextpkt = head->cp_mbuf;
395 			}
396 			break;
397 
398 #if SKYWALK
399 		case QP_PACKET:
400 			head->cp_kpkt->pkt_nextpkt = NULL;
401 			l += head->cp_kpkt->pkt_length;
402 			ifclassq_set_packet_metadata(ifq, ifp, head);
403 			if (last.cp_kpkt != NULL) {
404 				last.cp_kpkt->pkt_nextpkt = head->cp_kpkt;
405 			}
406 			break;
407 #endif /* SKYWALK */
408 
409 		default:
410 			VERIFY(0);
411 			/* NOTREACHED */
412 			__builtin_unreachable();
413 		}
414 
415 		last = *head;
416 		i++;
417 	}
418 
419 	IFCQ_UNLOCK(ifq);
420 
421 	if (tail != NULL) {
422 		*tail = last;
423 	}
424 	if (cnt != NULL) {
425 		*cnt = i;
426 	}
427 	if (len != NULL) {
428 		*len = l;
429 	}
430 
431 	*head = first;
432 	return (first.cp_mbuf != NULL) ? 0 : EAGAIN;
433 }
434 
435 static errno_t
ifclassq_dequeue_common(struct ifclassq * ifq,mbuf_svc_class_t sc,u_int32_t pkt_limit,u_int32_t byte_limit,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t * cnt,u_int32_t * len,boolean_t drvmgt)436 ifclassq_dequeue_common(struct ifclassq *ifq, mbuf_svc_class_t sc,
437     u_int32_t pkt_limit, u_int32_t byte_limit, classq_pkt_t *head,
438     classq_pkt_t *tail, u_int32_t *cnt, u_int32_t *len, boolean_t drvmgt)
439 {
440 #if SKYWALK
441 	struct ifnet *ifp = ifq->ifcq_ifp;
442 
443 	if (__improbable(ifp->if_na_ops != NULL &&
444 	    ifp->if_na_ops->ni_dequeue != NULL)) {
445 		/*
446 		 * TODO:
447 		 * We should be changing the pkt/byte limit to the
448 		 * available space in the next filter. But this is not
449 		 * useful until we can flow control the whole chain of
450 		 * filters.
451 		 */
452 		errno_t err = ifclassq_dequeue_common_default(ifq, sc,
453 		    pkt_limit, byte_limit, head, tail, cnt, len, drvmgt);
454 
455 		return ifp->if_na_ops->ni_dequeue(ifp->if_na, sc, pkt_limit,
456 		           byte_limit, head, tail, cnt, len, drvmgt, err);
457 	}
458 #endif /* SKYWALK */
459 	return ifclassq_dequeue_common_default(ifq, sc,
460 	           pkt_limit, byte_limit, head, tail, cnt, len, drvmgt);
461 }
462 
463 void
ifclassq_update(struct ifclassq * ifq,cqev_t ev)464 ifclassq_update(struct ifclassq *ifq, cqev_t ev)
465 {
466 	IFCQ_LOCK_ASSERT_HELD(ifq);
467 	VERIFY(IFCQ_IS_READY(ifq));
468 	fq_if_request_classq(ifq, CLASSQRQ_EVENT, (void *)ev);
469 }
470 
471 int
ifclassq_attach(struct ifclassq * ifq,u_int32_t type,void * discipline)472 ifclassq_attach(struct ifclassq *ifq, u_int32_t type, void *discipline)
473 {
474 	IFCQ_LOCK_ASSERT_HELD(ifq);
475 	VERIFY(ifq->ifcq_disc == NULL);
476 	ifq->ifcq_type = type;
477 	ifq->ifcq_disc = discipline;
478 	return 0;
479 }
480 
481 void
ifclassq_detach(struct ifclassq * ifq)482 ifclassq_detach(struct ifclassq *ifq)
483 {
484 	IFCQ_LOCK_ASSERT_HELD(ifq);
485 	VERIFY(ifq->ifcq_disc == NULL);
486 	ifq->ifcq_type = PKTSCHEDT_NONE;
487 }
488 
489 int
ifclassq_getqstats(struct ifclassq * ifq,u_int32_t qid,void * ubuf,u_int32_t * nbytes)490 ifclassq_getqstats(struct ifclassq *ifq, u_int32_t qid, void *ubuf,
491     u_int32_t *nbytes)
492 {
493 	struct if_ifclassq_stats *ifqs;
494 	int err;
495 
496 	if (*nbytes < sizeof(*ifqs)) {
497 		return EINVAL;
498 	}
499 
500 	ifqs = kalloc_type(struct if_ifclassq_stats,
501 	    Z_WAITOK | Z_ZERO | Z_NOFAIL);
502 
503 	IFCQ_LOCK(ifq);
504 	if (!IFCQ_IS_READY(ifq)) {
505 		IFCQ_UNLOCK(ifq);
506 		kfree_type(struct if_ifclassq_stats, ifqs);
507 		return ENXIO;
508 	}
509 
510 	ifqs->ifqs_len = IFCQ_LEN(ifq);
511 	ifqs->ifqs_maxlen = IFCQ_MAXLEN(ifq);
512 	*(&ifqs->ifqs_xmitcnt) = *(&ifq->ifcq_xmitcnt);
513 	*(&ifqs->ifqs_dropcnt) = *(&ifq->ifcq_dropcnt);
514 	ifqs->ifqs_scheduler = ifq->ifcq_type;
515 
516 	err = pktsched_getqstats(ifq, qid, ifqs);
517 	IFCQ_UNLOCK(ifq);
518 
519 	if (err == 0 && (err = copyout((caddr_t)ifqs,
520 	    (user_addr_t)(uintptr_t)ubuf, sizeof(*ifqs))) == 0) {
521 		*nbytes = sizeof(*ifqs);
522 	}
523 
524 	kfree_type(struct if_ifclassq_stats, ifqs);
525 
526 	return err;
527 }
528 
529 const char *
ifclassq_ev2str(cqev_t ev)530 ifclassq_ev2str(cqev_t ev)
531 {
532 	const char *c;
533 
534 	switch (ev) {
535 	case CLASSQ_EV_LINK_BANDWIDTH:
536 		c = "LINK_BANDWIDTH";
537 		break;
538 
539 	case CLASSQ_EV_LINK_LATENCY:
540 		c = "LINK_LATENCY";
541 		break;
542 
543 	case CLASSQ_EV_LINK_MTU:
544 		c = "LINK_MTU";
545 		break;
546 
547 	case CLASSQ_EV_LINK_UP:
548 		c = "LINK_UP";
549 		break;
550 
551 	case CLASSQ_EV_LINK_DOWN:
552 		c = "LINK_DOWN";
553 		break;
554 
555 	default:
556 		c = "UNKNOWN";
557 		break;
558 	}
559 
560 	return c;
561 }
562 
563 /*
564  * internal representation of token bucket parameters
565  *	rate:	byte_per_unittime << 32
566  *		(((bits_per_sec) / 8) << 32) / machclk_freq
567  *	depth:	byte << 32
568  *
569  */
570 #define TBR_SHIFT       32
571 #define TBR_SCALE(x)    ((int64_t)(x) << TBR_SHIFT)
572 #define TBR_UNSCALE(x)  ((x) >> TBR_SHIFT)
573 
574 void
ifclassq_tbr_dequeue(struct ifclassq * ifq,classq_pkt_t * pkt)575 ifclassq_tbr_dequeue(struct ifclassq *ifq, classq_pkt_t *pkt)
576 {
577 	ifclassq_tbr_dequeue_common(ifq, MBUF_SC_UNSPEC, FALSE, pkt);
578 }
579 
580 void
ifclassq_tbr_dequeue_sc(struct ifclassq * ifq,mbuf_svc_class_t sc,classq_pkt_t * pkt)581 ifclassq_tbr_dequeue_sc(struct ifclassq *ifq, mbuf_svc_class_t sc,
582     classq_pkt_t *pkt)
583 {
584 	ifclassq_tbr_dequeue_common(ifq, sc, TRUE, pkt);
585 }
586 
587 static void
ifclassq_tbr_dequeue_common(struct ifclassq * ifq,mbuf_svc_class_t sc,boolean_t drvmgt,classq_pkt_t * pkt)588 ifclassq_tbr_dequeue_common(struct ifclassq *ifq, mbuf_svc_class_t sc,
589     boolean_t drvmgt, classq_pkt_t *pkt)
590 {
591 	struct tb_regulator *tbr;
592 	int64_t interval;
593 	u_int64_t now;
594 
595 	IFCQ_LOCK_ASSERT_HELD(ifq);
596 
597 	VERIFY(!drvmgt || MBUF_VALID_SC(sc));
598 	VERIFY(IFCQ_TBR_IS_ENABLED(ifq));
599 
600 	*pkt = CLASSQ_PKT_INITIALIZER(*pkt);
601 	tbr = &ifq->ifcq_tbr;
602 	/* update token only when it is negative */
603 	if (tbr->tbr_token <= 0) {
604 		now = read_machclk();
605 		interval = now - tbr->tbr_last;
606 		if (interval >= tbr->tbr_filluptime) {
607 			tbr->tbr_token = tbr->tbr_depth;
608 		} else {
609 			tbr->tbr_token += interval * tbr->tbr_rate;
610 			if (tbr->tbr_token > tbr->tbr_depth) {
611 				tbr->tbr_token = tbr->tbr_depth;
612 			}
613 		}
614 		tbr->tbr_last = now;
615 	}
616 	/* if token is still negative, don't allow dequeue */
617 	if (tbr->tbr_token <= 0) {
618 		return;
619 	}
620 
621 	/*
622 	 * ifclassq takes precedence over ALTQ queue;
623 	 * ifcq_drain count is adjusted by the caller.
624 	 */
625 	if (drvmgt) {
626 		fq_if_dequeue_sc_classq(ifq, sc, pkt);
627 	} else {
628 		fq_if_dequeue_classq(ifq, pkt);
629 	}
630 
631 	if (pkt->cp_mbuf != NULL) {
632 		switch (pkt->cp_ptype) {
633 		case QP_MBUF:
634 			tbr->tbr_token -= TBR_SCALE(m_pktlen(pkt->cp_mbuf));
635 			break;
636 
637 #if SKYWALK
638 		case QP_PACKET:
639 			tbr->tbr_token -=
640 			    TBR_SCALE(pkt->cp_kpkt->pkt_length);
641 			break;
642 #endif /* SKYWALK */
643 
644 		default:
645 			VERIFY(0);
646 			/* NOTREACHED */
647 		}
648 	}
649 }
650 
651 /*
652  * set a token bucket regulator.
653  * if the specified rate is zero, the token bucket regulator is deleted.
654  */
655 int
ifclassq_tbr_set(struct ifclassq * ifq,struct tb_profile * profile,boolean_t update)656 ifclassq_tbr_set(struct ifclassq *ifq, struct tb_profile *profile,
657     boolean_t update)
658 {
659 	struct tb_regulator *tbr;
660 	struct ifnet *ifp = ifq->ifcq_ifp;
661 	u_int64_t rate, old_rate;
662 
663 	IFCQ_LOCK_ASSERT_HELD(ifq);
664 	VERIFY(IFCQ_IS_READY(ifq));
665 
666 	VERIFY(machclk_freq != 0);
667 
668 	tbr = &ifq->ifcq_tbr;
669 	old_rate = tbr->tbr_rate_raw;
670 
671 	rate = profile->rate;
672 	if (profile->percent > 0) {
673 		u_int64_t eff_rate;
674 
675 		if (profile->percent > 100) {
676 			return EINVAL;
677 		}
678 		if ((eff_rate = ifp->if_output_bw.eff_bw) == 0) {
679 			return ENODEV;
680 		}
681 		rate = (eff_rate * profile->percent) / 100;
682 	}
683 
684 	if (rate == 0) {
685 		if (!IFCQ_TBR_IS_ENABLED(ifq)) {
686 			return 0;
687 		}
688 
689 		if (pktsched_verbose) {
690 			printf("%s: TBR disabled\n", if_name(ifp));
691 		}
692 
693 		/* disable this TBR */
694 		ifq->ifcq_flags &= ~IFCQF_TBR;
695 		bzero(tbr, sizeof(*tbr));
696 		ifnet_set_start_cycle(ifp, NULL);
697 		if (update) {
698 			ifclassq_update(ifq, CLASSQ_EV_LINK_BANDWIDTH);
699 		}
700 		return 0;
701 	}
702 
703 	if (pktsched_verbose) {
704 		printf("%s: TBR %s (rate %llu bps depth %u)\n", if_name(ifp),
705 		    (ifq->ifcq_flags & IFCQF_TBR) ? "reconfigured" :
706 		    "enabled", rate, profile->depth);
707 	}
708 
709 	/* set the new TBR */
710 	bzero(tbr, sizeof(*tbr));
711 	tbr->tbr_rate_raw = rate;
712 	tbr->tbr_percent = profile->percent;
713 	ifq->ifcq_flags |= IFCQF_TBR;
714 
715 	/*
716 	 * Note that the TBR fill up time (hence the ifnet restart time)
717 	 * is directly related to the specified TBR depth.  The ideal
718 	 * depth value should be computed such that the interval time
719 	 * between each successive wakeup is adequately spaced apart,
720 	 * in order to reduce scheduling overheads.  A target interval
721 	 * of 10 ms seems to provide good performance balance.  This can be
722 	 * overridden by specifying the depth profile.  Values smaller than
723 	 * the ideal depth will reduce delay at the expense of CPU cycles.
724 	 */
725 	tbr->tbr_rate = TBR_SCALE(rate / 8) / machclk_freq;
726 	if (tbr->tbr_rate > 0) {
727 		u_int32_t mtu = ifp->if_mtu;
728 		int64_t ival, idepth = 0;
729 		int i;
730 
731 		if (mtu < IF_MINMTU) {
732 			mtu = IF_MINMTU;
733 		}
734 
735 		ival = pktsched_nsecs_to_abstime(10 * NSEC_PER_MSEC); /* 10ms */
736 
737 		for (i = 1;; i++) {
738 			idepth = TBR_SCALE(i * mtu);
739 			if ((idepth / tbr->tbr_rate) > ival) {
740 				break;
741 			}
742 		}
743 		VERIFY(idepth > 0);
744 
745 		tbr->tbr_depth = TBR_SCALE(profile->depth);
746 		if (tbr->tbr_depth == 0) {
747 			tbr->tbr_filluptime = idepth / tbr->tbr_rate;
748 			/* a little fudge factor to get closer to rate */
749 			tbr->tbr_depth = idepth + (idepth >> 3);
750 		} else {
751 			tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate;
752 		}
753 	} else {
754 		tbr->tbr_depth = TBR_SCALE(profile->depth);
755 		tbr->tbr_filluptime = 0xffffffffffffffffLL;
756 	}
757 	tbr->tbr_token = tbr->tbr_depth;
758 	tbr->tbr_last = read_machclk();
759 
760 	if (tbr->tbr_rate > 0 && (ifp->if_flags & IFF_UP)) {
761 		struct timespec ts =
762 		{ 0, (long)pktsched_abs_to_nsecs(tbr->tbr_filluptime) };
763 		if (pktsched_verbose) {
764 			printf("%s: TBR calculated tokens %lld "
765 			    "filluptime %llu ns\n", if_name(ifp),
766 			    TBR_UNSCALE(tbr->tbr_token),
767 			    pktsched_abs_to_nsecs(tbr->tbr_filluptime));
768 		}
769 		ifnet_set_start_cycle(ifp, &ts);
770 	} else {
771 		if (pktsched_verbose) {
772 			if (tbr->tbr_rate == 0) {
773 				printf("%s: TBR calculated tokens %lld "
774 				    "infinite filluptime\n", if_name(ifp),
775 				    TBR_UNSCALE(tbr->tbr_token));
776 			} else if (!(ifp->if_flags & IFF_UP)) {
777 				printf("%s: TBR suspended (link is down)\n",
778 				    if_name(ifp));
779 			}
780 		}
781 		ifnet_set_start_cycle(ifp, NULL);
782 	}
783 	if (update && tbr->tbr_rate_raw != old_rate) {
784 		ifclassq_update(ifq, CLASSQ_EV_LINK_BANDWIDTH);
785 	}
786 
787 	return 0;
788 }
789 
790 void
ifclassq_calc_target_qdelay(struct ifnet * ifp,u_int64_t * if_target_qdelay)791 ifclassq_calc_target_qdelay(struct ifnet *ifp, u_int64_t *if_target_qdelay)
792 {
793 	u_int64_t qdelay = 0;
794 	qdelay = IFCQ_TARGET_QDELAY(ifp->if_snd);
795 
796 	if (ifclassq_target_qdelay != 0) {
797 		qdelay = ifclassq_target_qdelay;
798 	}
799 
800 	/*
801 	 * If we do not know the effective bandwidth, use the default
802 	 * target queue delay.
803 	 */
804 	if (qdelay == 0) {
805 		qdelay = IFQ_TARGET_DELAY;
806 	}
807 
808 	/*
809 	 * If a delay has been added to ifnet start callback for
810 	 * coalescing, we have to add that to the pre-set target delay
811 	 * because the packets can be in the queue longer.
812 	 */
813 	if ((ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
814 	    ifp->if_start_delay_timeout > 0) {
815 		qdelay += ifp->if_start_delay_timeout;
816 	}
817 
818 	*(if_target_qdelay) = qdelay;
819 }
820 
821 void
ifclassq_calc_update_interval(u_int64_t * update_interval)822 ifclassq_calc_update_interval(u_int64_t *update_interval)
823 {
824 	u_int64_t uint = 0;
825 
826 	/* If the system level override is set, use it */
827 	if (ifclassq_update_interval != 0) {
828 		uint = ifclassq_update_interval;
829 	}
830 
831 	/* Otherwise use the default value */
832 	if (uint == 0) {
833 		uint = IFQ_UPDATE_INTERVAL;
834 	}
835 
836 	*update_interval = uint;
837 }
838 
839 void
ifclassq_reap_caches(boolean_t purge)840 ifclassq_reap_caches(boolean_t purge)
841 {
842 	fq_codel_reap_caches(purge);
843 	flowadv_reap_caches(purge);
844 }
845 
846 struct ifclassq *
ifclassq_alloc(void)847 ifclassq_alloc(void)
848 {
849 	struct ifclassq *ifcq;
850 
851 	ifcq = zalloc_flags(ifcq_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
852 	os_ref_init(&ifcq->ifcq_refcnt, NULL);
853 	os_ref_retain(&ifcq->ifcq_refcnt);
854 	lck_mtx_init(&ifcq->ifcq_lock, &ifcq_lock_group, &ifcq_lock_attr);
855 	return ifcq;
856 }
857 
858 void
ifclassq_retain(struct ifclassq * ifcq)859 ifclassq_retain(struct ifclassq *ifcq)
860 {
861 	os_ref_retain(&ifcq->ifcq_refcnt);
862 }
863 
864 void
ifclassq_release(struct ifclassq ** pifcq)865 ifclassq_release(struct ifclassq **pifcq)
866 {
867 	struct ifclassq *ifcq = *pifcq;
868 
869 	*pifcq = NULL;
870 	if (os_ref_release(&ifcq->ifcq_refcnt) == 0) {
871 		ifclassq_teardown(ifcq);
872 		zfree(ifcq_zone, ifcq);
873 	}
874 }
875