xref: /xnu-8792.41.9/bsd/net/classq/classq_subr.c (revision 5c2921b07a2480ab43ec66f5b9e41cb872bc554f)
1 /*
2  * Copyright (c) 2011-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <sys/cdefs.h>
30 #include <sys/param.h>
31 #include <sys/mbuf.h>
32 #include <sys/errno.h>
33 #include <sys/random.h>
34 #include <sys/kernel_types.h>
35 #include <sys/sysctl.h>
36 
37 #include <kern/zalloc.h>
38 
39 #include <net/if.h>
40 #include <net/net_osdep.h>
41 #include <net/classq/classq.h>
42 #include <pexpert/pexpert.h>
43 #include <net/classq/classq_sfb.h>
44 #include <net/classq/classq_fq_codel.h>
45 #include <net/pktsched/pktsched.h>
46 #include <net/pktsched/pktsched_fq_codel.h>
47 #include <net/flowadv.h>
48 
49 #include <libkern/libkern.h>
50 
51 #if SKYWALK
52 #include <skywalk/os_skywalk_private.h>
53 #include <skywalk/nexus/netif/nx_netif.h>
54 #endif /* SKYWALK */
55 
56 static errno_t ifclassq_dequeue_common(struct ifclassq *, mbuf_svc_class_t,
57     u_int32_t, u_int32_t, classq_pkt_t *, classq_pkt_t *, u_int32_t *,
58     u_int32_t *, boolean_t, u_int8_t);
59 static void ifclassq_tbr_dequeue_common(struct ifclassq *, mbuf_svc_class_t,
60     boolean_t, classq_pkt_t *, u_int8_t);
61 
62 static uint64_t ifclassq_def_c_target_qdelay = 0;
63 SYSCTL_QUAD(_net_classq, OID_AUTO, def_c_target_qdelay, CTLFLAG_RW | CTLFLAG_LOCKED,
64     &ifclassq_def_c_target_qdelay, "def classic target queue delay in nanoseconds");
65 
66 static uint64_t ifclassq_def_c_update_interval = 0;
67 SYSCTL_QUAD(_net_classq, OID_AUTO, def_c_update_interval,
68     CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_def_c_update_interval,
69     "def classic update interval in nanoseconds");
70 
71 static uint64_t ifclassq_def_l4s_target_qdelay = 0;
72 SYSCTL_QUAD(_net_classq, OID_AUTO, def_l4s_target_qdelay, CTLFLAG_RW | CTLFLAG_LOCKED,
73     &ifclassq_def_l4s_target_qdelay, "def L4S target queue delay in nanoseconds");
74 
75 static uint64_t ifclassq_def_l4s_update_interval = 0;
76 SYSCTL_QUAD(_net_classq, OID_AUTO, def_l4s_update_interval,
77     CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_def_l4s_update_interval,
78     "def L4S update interval in nanoseconds");
79 
80 static uint64_t ifclassq_ll_c_target_qdelay = 0;
81 SYSCTL_QUAD(_net_classq, OID_AUTO, ll_c_target_qdelay, CTLFLAG_RW | CTLFLAG_LOCKED,
82     &ifclassq_ll_c_target_qdelay, "low latency classic target queue delay in nanoseconds");
83 
84 static uint64_t ifclassq_ll_c_update_interval = 0;
85 SYSCTL_QUAD(_net_classq, OID_AUTO, ll_c_update_interval,
86     CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_ll_c_update_interval,
87     "low latency classic update interval in nanoseconds");
88 
89 static uint64_t ifclassq_ll_l4s_target_qdelay = 0;
90 SYSCTL_QUAD(_net_classq, OID_AUTO, ll_l4s_target_qdelay, CTLFLAG_RW | CTLFLAG_LOCKED,
91     &ifclassq_ll_l4s_target_qdelay, "low latency L4S target queue delay in nanoseconds");
92 
93 static uint64_t ifclassq_ll_l4s_update_interval = 0;
94 SYSCTL_QUAD(_net_classq, OID_AUTO, ll_l4s_update_interval,
95     CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_ll_l4s_update_interval,
96     "low latency L4S update interval in nanoseconds");
97 
98 #if DEBUG || DEVELOPMENT
99 uint32_t ifclassq_flow_control_adv = 1; /* flow control advisory */
100 SYSCTL_UINT(_net_classq, OID_AUTO, flow_control_adv,
101     CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_flow_control_adv, 1,
102     "enable/disable flow control advisory");
103 
104 uint16_t fq_codel_quantum = 0;
105 #endif /* DEBUG || DEVELOPMENT */
106 
107 static struct zone *ifcq_zone;          /* zone for ifclassq */
108 #define IFCQ_ZONE_NAME    "ifclassq"    /* zone name */
109 LCK_ATTR_DECLARE(ifcq_lock_attr, 0, 0);
110 static LCK_GRP_DECLARE(ifcq_lock_group, "ifclassq locks");
111 
112 void
classq_init(void)113 classq_init(void)
114 {
115 	_CASSERT(MBUF_TC_BE == 0);
116 	_CASSERT(MBUF_SC_BE == 0);
117 	_CASSERT(IFCQ_SC_MAX == MBUF_SC_MAX_CLASSES);
118 #if DEBUG || DEVELOPMENT
119 	PE_parse_boot_argn("fq_codel_quantum", &fq_codel_quantum,
120 	    sizeof(fq_codel_quantum));
121 	PE_parse_boot_argn("ifclassq_def_c_target_qdelay", &ifclassq_def_c_target_qdelay,
122 	    sizeof(ifclassq_def_c_target_qdelay));
123 	PE_parse_boot_argn("ifclassq_def_c_update_interval",
124 	    &ifclassq_def_c_update_interval, sizeof(ifclassq_def_c_update_interval));
125 	PE_parse_boot_argn("ifclassq_def_l4s_target_qdelay", &ifclassq_def_l4s_target_qdelay,
126 	    sizeof(ifclassq_def_l4s_target_qdelay));
127 	PE_parse_boot_argn("ifclassq_def_l4s_update_interval",
128 	    &ifclassq_def_l4s_update_interval, sizeof(ifclassq_def_l4s_update_interval));
129 	PE_parse_boot_argn("ifclassq_ll_c_target_qdelay", &ifclassq_ll_c_target_qdelay,
130 	    sizeof(ifclassq_ll_c_target_qdelay));
131 	PE_parse_boot_argn("ifclassq_ll_c_update_interval",
132 	    &ifclassq_ll_c_update_interval, sizeof(ifclassq_ll_c_update_interval));
133 	PE_parse_boot_argn("ifclassq_ll_l4s_target_qdelay", &ifclassq_ll_l4s_target_qdelay,
134 	    sizeof(ifclassq_ll_l4s_target_qdelay));
135 	PE_parse_boot_argn("ifclassq_ll_l4s_update_interval",
136 	    &ifclassq_ll_l4s_update_interval, sizeof(ifclassq_ll_l4s_update_interval));
137 #endif /* DEBUG || DEVELOPMENT */
138 	ifcq_zone = zone_create(IFCQ_ZONE_NAME, sizeof(struct ifclassq),
139 	    ZC_ZFREE_CLEARMEM);
140 	fq_codel_init();
141 }
142 
143 int
ifclassq_setup(struct ifclassq * ifq,struct ifnet * ifp,uint32_t sflags)144 ifclassq_setup(struct ifclassq *ifq, struct ifnet *ifp, uint32_t sflags)
145 {
146 	int err = 0;
147 
148 	IFCQ_LOCK(ifq);
149 	VERIFY(IFCQ_IS_EMPTY(ifq));
150 	ifq->ifcq_ifp = ifp;
151 	IFCQ_LEN(ifq) = 0;
152 	IFCQ_BYTES(ifq) = 0;
153 	bzero(&ifq->ifcq_xmitcnt, sizeof(ifq->ifcq_xmitcnt));
154 	bzero(&ifq->ifcq_dropcnt, sizeof(ifq->ifcq_dropcnt));
155 
156 	VERIFY(!IFCQ_TBR_IS_ENABLED(ifq));
157 	VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
158 	VERIFY(ifq->ifcq_flags == 0);
159 	VERIFY(ifq->ifcq_sflags == 0);
160 	VERIFY(ifq->ifcq_disc == NULL);
161 
162 	if (ifp->if_eflags & IFEF_TXSTART) {
163 		u_int32_t maxlen = 0;
164 
165 		if ((maxlen = IFCQ_MAXLEN(ifq)) == 0) {
166 			maxlen = if_sndq_maxlen;
167 		}
168 		IFCQ_SET_MAXLEN(ifq, maxlen);
169 
170 		if (IFCQ_MAXLEN(ifq) != if_sndq_maxlen &&
171 		    IFCQ_TARGET_QDELAY(ifq) == 0) {
172 			/*
173 			 * Choose static queues because the interface has
174 			 * maximum queue size set
175 			 */
176 			sflags &= ~PKTSCHEDF_QALG_DELAYBASED;
177 		}
178 		ifq->ifcq_sflags = sflags;
179 		err = ifclassq_pktsched_setup(ifq);
180 		if (err == 0) {
181 			ifq->ifcq_flags = (IFCQF_READY | IFCQF_ENABLED);
182 		}
183 	}
184 	IFCQ_UNLOCK(ifq);
185 	return err;
186 }
187 
188 void
ifclassq_teardown(struct ifclassq * ifq)189 ifclassq_teardown(struct ifclassq *ifq)
190 {
191 	IFCQ_LOCK(ifq);
192 	if (IFCQ_IS_DESTROYED(ifq)) {
193 		ASSERT((ifq->ifcq_flags & ~IFCQF_DESTROYED) == 0);
194 		goto done;
195 	}
196 	if (IFCQ_IS_READY(ifq)) {
197 		if (IFCQ_TBR_IS_ENABLED(ifq)) {
198 			struct tb_profile tb =
199 			{ .rate = 0, .percent = 0, .depth = 0 };
200 			(void) ifclassq_tbr_set(ifq, &tb, FALSE);
201 		}
202 		pktsched_teardown(ifq);
203 		ifq->ifcq_flags &= ~IFCQF_READY;
204 	}
205 	ifq->ifcq_sflags = 0;
206 	VERIFY(IFCQ_IS_EMPTY(ifq));
207 	VERIFY(!IFCQ_TBR_IS_ENABLED(ifq));
208 	VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
209 	VERIFY(ifq->ifcq_flags == 0);
210 	VERIFY(ifq->ifcq_sflags == 0);
211 	VERIFY(ifq->ifcq_disc == NULL);
212 	IFCQ_LEN(ifq) = 0;
213 	IFCQ_BYTES(ifq) = 0;
214 	IFCQ_MAXLEN(ifq) = 0;
215 	bzero(&ifq->ifcq_xmitcnt, sizeof(ifq->ifcq_xmitcnt));
216 	bzero(&ifq->ifcq_dropcnt, sizeof(ifq->ifcq_dropcnt));
217 	ifq->ifcq_flags |= IFCQF_DESTROYED;
218 done:
219 	IFCQ_UNLOCK(ifq);
220 }
221 
222 int
ifclassq_pktsched_setup(struct ifclassq * ifq)223 ifclassq_pktsched_setup(struct ifclassq *ifq)
224 {
225 	struct ifnet *ifp = ifq->ifcq_ifp;
226 	classq_pkt_type_t ptype = QP_MBUF;
227 	int err = 0;
228 
229 	IFCQ_LOCK_ASSERT_HELD(ifq);
230 	VERIFY(ifp->if_eflags & IFEF_TXSTART);
231 #if SKYWALK
232 	ptype = ((ifp->if_eflags & IFEF_SKYWALK_NATIVE) != 0) ? QP_PACKET :
233 	    QP_MBUF;
234 #endif /* SKYWALK */
235 
236 	err = pktsched_setup(ifq, PKTSCHEDT_FQ_CODEL, ifq->ifcq_sflags, ptype);
237 
238 	return err;
239 }
240 
241 void
ifclassq_set_maxlen(struct ifclassq * ifq,u_int32_t maxqlen)242 ifclassq_set_maxlen(struct ifclassq *ifq, u_int32_t maxqlen)
243 {
244 	IFCQ_LOCK(ifq);
245 	if (maxqlen == 0) {
246 		maxqlen = if_sndq_maxlen;
247 	}
248 	IFCQ_SET_MAXLEN(ifq, maxqlen);
249 	IFCQ_UNLOCK(ifq);
250 }
251 
252 u_int32_t
ifclassq_get_maxlen(struct ifclassq * ifq)253 ifclassq_get_maxlen(struct ifclassq *ifq)
254 {
255 	return IFCQ_MAXLEN(ifq);
256 }
257 
258 int
ifclassq_get_len(struct ifclassq * ifq,mbuf_svc_class_t sc,u_int8_t grp_idx,u_int32_t * packets,u_int32_t * bytes)259 ifclassq_get_len(struct ifclassq *ifq, mbuf_svc_class_t sc, u_int8_t grp_idx,
260     u_int32_t *packets, u_int32_t *bytes)
261 {
262 	int err = 0;
263 
264 	IFCQ_LOCK(ifq);
265 	if ((ifq->ifcq_flags & (IFCQF_READY | IFCQF_ENABLED)) !=
266 	    (IFCQF_READY | IFCQF_ENABLED)) {
267 		return ENXIO;
268 	}
269 	if (sc == MBUF_SC_UNSPEC && grp_idx == IF_CLASSQ_ALL_GRPS) {
270 		VERIFY(packets != NULL);
271 		*packets = IFCQ_LEN(ifq);
272 	} else {
273 		cqrq_stat_sc_t req = { sc, grp_idx, 0, 0 };
274 
275 		VERIFY(MBUF_VALID_SC(sc) || sc == MBUF_SC_UNSPEC);
276 
277 		err = fq_if_request_classq(ifq, CLASSQRQ_STAT_SC, &req);
278 		if (packets != NULL) {
279 			*packets = req.packets;
280 		}
281 		if (bytes != NULL) {
282 			*bytes = req.bytes;
283 		}
284 	}
285 	IFCQ_UNLOCK(ifq);
286 
287 #if SKYWALK
288 	struct ifnet *ifp = ifq->ifcq_ifp;
289 
290 	if (__improbable(ifp->if_na_ops != NULL &&
291 	    ifp->if_na_ops->ni_get_len != NULL)) {
292 		err = ifp->if_na_ops->ni_get_len(ifp->if_na, sc, packets,
293 		    bytes, err);
294 	}
295 #endif /* SKYWALK */
296 
297 	return err;
298 }
299 
300 inline void
ifclassq_set_packet_metadata(struct ifclassq * ifq,struct ifnet * ifp,classq_pkt_t * p)301 ifclassq_set_packet_metadata(struct ifclassq *ifq, struct ifnet *ifp,
302     classq_pkt_t *p)
303 {
304 	if (!IFNET_IS_CELLULAR(ifp)) {
305 		return;
306 	}
307 
308 	switch (p->cp_ptype) {
309 	case QP_MBUF: {
310 		struct mbuf *m = p->cp_mbuf;
311 		m->m_pkthdr.pkt_flags |= PKTF_VALID_UNSENT_DATA;
312 		m->m_pkthdr.bufstatus_if = IFCQ_BYTES(ifq);
313 		m->m_pkthdr.bufstatus_sndbuf = (uint32_t)ifp->if_sndbyte_unsent;
314 		break;
315 	}
316 
317 #if SKYWALK
318 	case QP_PACKET:
319 		/*
320 		 * Support for equivalent of mbuf_get_unsent_data_bytes()
321 		 * is not needed in the Skywalk architecture.
322 		 */
323 		break;
324 #endif /* SKYWALK */
325 
326 	default:
327 		VERIFY(0);
328 		/* NOTREACHED */
329 		__builtin_unreachable();
330 	}
331 }
332 
333 errno_t
ifclassq_enqueue(struct ifclassq * ifq,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t cnt,u_int32_t bytes,boolean_t * pdrop)334 ifclassq_enqueue(struct ifclassq *ifq, classq_pkt_t *head, classq_pkt_t *tail,
335     u_int32_t cnt, u_int32_t bytes, boolean_t *pdrop)
336 {
337 	return fq_if_enqueue_classq(ifq, head, tail, cnt, bytes, pdrop);
338 }
339 
340 errno_t
ifclassq_dequeue(struct ifclassq * ifq,u_int32_t pkt_limit,u_int32_t byte_limit,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t * cnt,u_int32_t * len,u_int8_t grp_idx)341 ifclassq_dequeue(struct ifclassq *ifq, u_int32_t pkt_limit,
342     u_int32_t byte_limit, classq_pkt_t *head, classq_pkt_t *tail,
343     u_int32_t *cnt, u_int32_t *len, u_int8_t grp_idx)
344 {
345 	return ifclassq_dequeue_common(ifq, MBUF_SC_UNSPEC, pkt_limit,
346 	           byte_limit, head, tail, cnt, len, FALSE, grp_idx);
347 }
348 
349 errno_t
ifclassq_dequeue_sc(struct ifclassq * ifq,mbuf_svc_class_t sc,u_int32_t pkt_limit,u_int32_t byte_limit,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t * cnt,u_int32_t * len,u_int8_t grp_idx)350 ifclassq_dequeue_sc(struct ifclassq *ifq, mbuf_svc_class_t sc,
351     u_int32_t pkt_limit, u_int32_t byte_limit, classq_pkt_t *head,
352     classq_pkt_t *tail, u_int32_t *cnt, u_int32_t *len, u_int8_t grp_idx)
353 {
354 	return ifclassq_dequeue_common(ifq, sc, pkt_limit, byte_limit,
355 	           head, tail, cnt, len, TRUE, grp_idx);
356 }
357 
358 static errno_t
ifclassq_dequeue_common_default(struct ifclassq * ifq,mbuf_svc_class_t sc,u_int32_t pkt_limit,u_int32_t byte_limit,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t * cnt,u_int32_t * len,boolean_t drvmgt,u_int8_t grp_idx)359 ifclassq_dequeue_common_default(struct ifclassq *ifq, mbuf_svc_class_t sc,
360     u_int32_t pkt_limit, u_int32_t byte_limit, classq_pkt_t *head,
361     classq_pkt_t *tail, u_int32_t *cnt, u_int32_t *len, boolean_t drvmgt,
362     u_int8_t grp_idx)
363 {
364 	struct ifnet *ifp = ifq->ifcq_ifp;
365 	u_int32_t i = 0, l = 0;
366 	classq_pkt_t first = CLASSQ_PKT_INITIALIZER(first);
367 	classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
368 
369 	VERIFY(!drvmgt || MBUF_VALID_SC(sc));
370 
371 	if (IFCQ_TBR_IS_ENABLED(ifq)) {
372 		goto dequeue_loop;
373 	}
374 
375 	/*
376 	 * If the scheduler support dequeueing multiple packets at the
377 	 * same time, call that one instead.
378 	 */
379 	if (drvmgt) {
380 		int err;
381 
382 		IFCQ_LOCK_SPIN(ifq);
383 		err = fq_if_dequeue_sc_classq_multi(ifq, sc, pkt_limit,
384 		    byte_limit, head, tail, cnt, len, grp_idx);
385 		IFCQ_UNLOCK(ifq);
386 
387 		if (err == 0 && head->cp_mbuf == NULL) {
388 			err = EAGAIN;
389 		}
390 		return err;
391 	} else {
392 		int err;
393 
394 		IFCQ_LOCK_SPIN(ifq);
395 		err = fq_if_dequeue_classq_multi(ifq, pkt_limit, byte_limit,
396 		    head, tail, cnt, len, grp_idx);
397 		IFCQ_UNLOCK(ifq);
398 
399 		if (err == 0 && head->cp_mbuf == NULL) {
400 			err = EAGAIN;
401 		}
402 		return err;
403 	}
404 
405 dequeue_loop:
406 	VERIFY(IFCQ_TBR_IS_ENABLED(ifq));
407 	IFCQ_LOCK_SPIN(ifq);
408 
409 	while (i < pkt_limit && l < byte_limit) {
410 		if (drvmgt) {
411 			IFCQ_TBR_DEQUEUE_SC(ifq, sc, head, grp_idx);
412 		} else {
413 			IFCQ_TBR_DEQUEUE(ifq, head, grp_idx);
414 		}
415 
416 		if (head->cp_mbuf == NULL) {
417 			break;
418 		}
419 
420 		if (first.cp_mbuf == NULL) {
421 			first = *head;
422 		}
423 
424 		switch (head->cp_ptype) {
425 		case QP_MBUF:
426 			head->cp_mbuf->m_nextpkt = NULL;
427 			l += head->cp_mbuf->m_pkthdr.len;
428 			ifclassq_set_packet_metadata(ifq, ifp, head);
429 			if (last.cp_mbuf != NULL) {
430 				last.cp_mbuf->m_nextpkt = head->cp_mbuf;
431 			}
432 			break;
433 
434 #if SKYWALK
435 		case QP_PACKET:
436 			head->cp_kpkt->pkt_nextpkt = NULL;
437 			l += head->cp_kpkt->pkt_length;
438 			ifclassq_set_packet_metadata(ifq, ifp, head);
439 			if (last.cp_kpkt != NULL) {
440 				last.cp_kpkt->pkt_nextpkt = head->cp_kpkt;
441 			}
442 			break;
443 #endif /* SKYWALK */
444 
445 		default:
446 			VERIFY(0);
447 			/* NOTREACHED */
448 			__builtin_unreachable();
449 		}
450 
451 		last = *head;
452 		i++;
453 	}
454 
455 	IFCQ_UNLOCK(ifq);
456 
457 	if (tail != NULL) {
458 		*tail = last;
459 	}
460 	if (cnt != NULL) {
461 		*cnt = i;
462 	}
463 	if (len != NULL) {
464 		*len = l;
465 	}
466 
467 	*head = first;
468 	return (first.cp_mbuf != NULL) ? 0 : EAGAIN;
469 }
470 
471 static errno_t
ifclassq_dequeue_common(struct ifclassq * ifq,mbuf_svc_class_t sc,u_int32_t pkt_limit,u_int32_t byte_limit,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t * cnt,u_int32_t * len,boolean_t drvmgt,u_int8_t grp_idx)472 ifclassq_dequeue_common(struct ifclassq *ifq, mbuf_svc_class_t sc,
473     u_int32_t pkt_limit, u_int32_t byte_limit, classq_pkt_t *head,
474     classq_pkt_t *tail, u_int32_t *cnt, u_int32_t *len, boolean_t drvmgt,
475     u_int8_t grp_idx)
476 {
477 #if SKYWALK
478 	struct ifnet *ifp = ifq->ifcq_ifp;
479 
480 	if (__improbable(ifp->if_na_ops != NULL &&
481 	    ifp->if_na_ops->ni_dequeue != NULL)) {
482 		/*
483 		 * TODO:
484 		 * We should be changing the pkt/byte limit to the
485 		 * available space in the next filter. But this is not
486 		 * useful until we can flow control the whole chain of
487 		 * filters.
488 		 */
489 		errno_t err = ifclassq_dequeue_common_default(ifq, sc,
490 		    pkt_limit, byte_limit, head, tail, cnt, len, drvmgt, grp_idx);
491 
492 		return ifp->if_na_ops->ni_dequeue(ifp->if_na, sc, pkt_limit,
493 		           byte_limit, head, tail, cnt, len, drvmgt, err);
494 	}
495 #endif /* SKYWALK */
496 	return ifclassq_dequeue_common_default(ifq, sc,
497 	           pkt_limit, byte_limit, head, tail, cnt, len, drvmgt, grp_idx);
498 }
499 
500 void
ifclassq_update(struct ifclassq * ifq,cqev_t ev)501 ifclassq_update(struct ifclassq *ifq, cqev_t ev)
502 {
503 	IFCQ_LOCK_ASSERT_HELD(ifq);
504 	VERIFY(IFCQ_IS_READY(ifq));
505 	fq_if_request_classq(ifq, CLASSQRQ_EVENT, (void *)ev);
506 }
507 
508 int
ifclassq_attach(struct ifclassq * ifq,u_int32_t type,void * discipline)509 ifclassq_attach(struct ifclassq *ifq, u_int32_t type, void *discipline)
510 {
511 	IFCQ_LOCK_ASSERT_HELD(ifq);
512 	VERIFY(ifq->ifcq_disc == NULL);
513 	ifq->ifcq_type = type;
514 	ifq->ifcq_disc = discipline;
515 	return 0;
516 }
517 
518 void
ifclassq_detach(struct ifclassq * ifq)519 ifclassq_detach(struct ifclassq *ifq)
520 {
521 	IFCQ_LOCK_ASSERT_HELD(ifq);
522 	VERIFY(ifq->ifcq_disc == NULL);
523 	ifq->ifcq_type = PKTSCHEDT_NONE;
524 }
525 
526 int
ifclassq_getqstats(struct ifclassq * ifq,u_int8_t gid,u_int32_t qid,void * ubuf,u_int32_t * nbytes)527 ifclassq_getqstats(struct ifclassq *ifq, u_int8_t gid, u_int32_t qid, void *ubuf,
528     u_int32_t *nbytes)
529 {
530 	struct if_ifclassq_stats *ifqs;
531 	int err;
532 
533 	if (*nbytes < sizeof(*ifqs)) {
534 		return EINVAL;
535 	}
536 
537 	ifqs = kalloc_type(struct if_ifclassq_stats,
538 	    Z_WAITOK | Z_ZERO | Z_NOFAIL);
539 
540 	IFCQ_LOCK(ifq);
541 	if (!IFCQ_IS_READY(ifq)) {
542 		IFCQ_UNLOCK(ifq);
543 		kfree_type(struct if_ifclassq_stats, ifqs);
544 		return ENXIO;
545 	}
546 
547 	ifqs->ifqs_len = IFCQ_LEN(ifq);
548 	ifqs->ifqs_maxlen = IFCQ_MAXLEN(ifq);
549 	*(&ifqs->ifqs_xmitcnt) = *(&ifq->ifcq_xmitcnt);
550 	*(&ifqs->ifqs_dropcnt) = *(&ifq->ifcq_dropcnt);
551 	ifqs->ifqs_scheduler = ifq->ifcq_type;
552 
553 	err = pktsched_getqstats(ifq, gid, qid, ifqs);
554 	IFCQ_UNLOCK(ifq);
555 
556 	if (err == 0 && (err = copyout((caddr_t)ifqs,
557 	    (user_addr_t)(uintptr_t)ubuf, sizeof(*ifqs))) == 0) {
558 		*nbytes = sizeof(*ifqs);
559 	}
560 
561 	kfree_type(struct if_ifclassq_stats, ifqs);
562 
563 	return err;
564 }
565 
566 const char *
ifclassq_ev2str(cqev_t ev)567 ifclassq_ev2str(cqev_t ev)
568 {
569 	const char *c;
570 
571 	switch (ev) {
572 	case CLASSQ_EV_LINK_BANDWIDTH:
573 		c = "LINK_BANDWIDTH";
574 		break;
575 
576 	case CLASSQ_EV_LINK_LATENCY:
577 		c = "LINK_LATENCY";
578 		break;
579 
580 	case CLASSQ_EV_LINK_MTU:
581 		c = "LINK_MTU";
582 		break;
583 
584 	case CLASSQ_EV_LINK_UP:
585 		c = "LINK_UP";
586 		break;
587 
588 	case CLASSQ_EV_LINK_DOWN:
589 		c = "LINK_DOWN";
590 		break;
591 
592 	default:
593 		c = "UNKNOWN";
594 		break;
595 	}
596 
597 	return c;
598 }
599 
600 /*
601  * internal representation of token bucket parameters
602  *	rate:	byte_per_unittime << 32
603  *		(((bits_per_sec) / 8) << 32) / machclk_freq
604  *	depth:	byte << 32
605  *
606  */
607 #define TBR_SHIFT       32
608 #define TBR_SCALE(x)    ((int64_t)(x) << TBR_SHIFT)
609 #define TBR_UNSCALE(x)  ((x) >> TBR_SHIFT)
610 
611 void
ifclassq_tbr_dequeue(struct ifclassq * ifq,classq_pkt_t * pkt,u_int8_t grp_idx)612 ifclassq_tbr_dequeue(struct ifclassq *ifq, classq_pkt_t *pkt, u_int8_t grp_idx)
613 {
614 	ifclassq_tbr_dequeue_common(ifq, MBUF_SC_UNSPEC, FALSE, pkt, grp_idx);
615 }
616 
617 void
ifclassq_tbr_dequeue_sc(struct ifclassq * ifq,mbuf_svc_class_t sc,classq_pkt_t * pkt,u_int8_t grp_idx)618 ifclassq_tbr_dequeue_sc(struct ifclassq *ifq, mbuf_svc_class_t sc,
619     classq_pkt_t *pkt, u_int8_t grp_idx)
620 {
621 	ifclassq_tbr_dequeue_common(ifq, sc, TRUE, pkt, grp_idx);
622 }
623 
624 static void
ifclassq_tbr_dequeue_common(struct ifclassq * ifq,mbuf_svc_class_t sc,boolean_t drvmgt,classq_pkt_t * pkt,u_int8_t grp_idx)625 ifclassq_tbr_dequeue_common(struct ifclassq *ifq, mbuf_svc_class_t sc,
626     boolean_t drvmgt, classq_pkt_t *pkt, u_int8_t grp_idx)
627 {
628 	struct tb_regulator *tbr;
629 	int64_t interval;
630 	u_int64_t now;
631 
632 	IFCQ_LOCK_ASSERT_HELD(ifq);
633 
634 	VERIFY(!drvmgt || MBUF_VALID_SC(sc));
635 	VERIFY(IFCQ_TBR_IS_ENABLED(ifq));
636 
637 	*pkt = CLASSQ_PKT_INITIALIZER(*pkt);
638 	tbr = &ifq->ifcq_tbr;
639 	/* update token only when it is negative */
640 	if (tbr->tbr_token <= 0) {
641 		now = read_machclk();
642 		interval = now - tbr->tbr_last;
643 		if (interval >= tbr->tbr_filluptime) {
644 			tbr->tbr_token = tbr->tbr_depth;
645 		} else {
646 			tbr->tbr_token += interval * tbr->tbr_rate;
647 			if (tbr->tbr_token > tbr->tbr_depth) {
648 				tbr->tbr_token = tbr->tbr_depth;
649 			}
650 		}
651 		tbr->tbr_last = now;
652 	}
653 	/* if token is still negative, don't allow dequeue */
654 	if (tbr->tbr_token <= 0) {
655 		return;
656 	}
657 
658 	/*
659 	 * ifclassq takes precedence over ALTQ queue;
660 	 * ifcq_drain count is adjusted by the caller.
661 	 */
662 	if (drvmgt) {
663 		fq_if_dequeue_sc_classq(ifq, sc, pkt, grp_idx);
664 	} else {
665 		fq_if_dequeue_classq(ifq, pkt, grp_idx);
666 	}
667 
668 	if (pkt->cp_mbuf != NULL) {
669 		switch (pkt->cp_ptype) {
670 		case QP_MBUF:
671 			tbr->tbr_token -= TBR_SCALE(m_pktlen(pkt->cp_mbuf));
672 			break;
673 
674 #if SKYWALK
675 		case QP_PACKET:
676 			tbr->tbr_token -=
677 			    TBR_SCALE(pkt->cp_kpkt->pkt_length);
678 			break;
679 #endif /* SKYWALK */
680 
681 		default:
682 			VERIFY(0);
683 			/* NOTREACHED */
684 		}
685 	}
686 }
687 
688 /*
689  * set a token bucket regulator.
690  * if the specified rate is zero, the token bucket regulator is deleted.
691  */
692 int
ifclassq_tbr_set(struct ifclassq * ifq,struct tb_profile * profile,boolean_t update)693 ifclassq_tbr_set(struct ifclassq *ifq, struct tb_profile *profile,
694     boolean_t update)
695 {
696 	struct tb_regulator *tbr;
697 	struct ifnet *ifp = ifq->ifcq_ifp;
698 	u_int64_t rate, old_rate;
699 
700 	IFCQ_LOCK_ASSERT_HELD(ifq);
701 	VERIFY(IFCQ_IS_READY(ifq));
702 
703 	VERIFY(machclk_freq != 0);
704 
705 	tbr = &ifq->ifcq_tbr;
706 	old_rate = tbr->tbr_rate_raw;
707 
708 	rate = profile->rate;
709 	if (profile->percent > 0) {
710 		u_int64_t eff_rate;
711 
712 		if (profile->percent > 100) {
713 			return EINVAL;
714 		}
715 		if ((eff_rate = ifp->if_output_bw.eff_bw) == 0) {
716 			return ENODEV;
717 		}
718 		rate = (eff_rate * profile->percent) / 100;
719 	}
720 
721 	if (rate == 0) {
722 		if (!IFCQ_TBR_IS_ENABLED(ifq)) {
723 			return 0;
724 		}
725 
726 		if (pktsched_verbose) {
727 			printf("%s: TBR disabled\n", if_name(ifp));
728 		}
729 
730 		/* disable this TBR */
731 		ifq->ifcq_flags &= ~IFCQF_TBR;
732 		bzero(tbr, sizeof(*tbr));
733 		ifnet_set_start_cycle(ifp, NULL);
734 		if (update) {
735 			ifclassq_update(ifq, CLASSQ_EV_LINK_BANDWIDTH);
736 		}
737 		return 0;
738 	}
739 
740 	if (pktsched_verbose) {
741 		printf("%s: TBR %s (rate %llu bps depth %u)\n", if_name(ifp),
742 		    (ifq->ifcq_flags & IFCQF_TBR) ? "reconfigured" :
743 		    "enabled", rate, profile->depth);
744 	}
745 
746 	/* set the new TBR */
747 	bzero(tbr, sizeof(*tbr));
748 	tbr->tbr_rate_raw = rate;
749 	tbr->tbr_percent = profile->percent;
750 	ifq->ifcq_flags |= IFCQF_TBR;
751 
752 	/*
753 	 * Note that the TBR fill up time (hence the ifnet restart time)
754 	 * is directly related to the specified TBR depth.  The ideal
755 	 * depth value should be computed such that the interval time
756 	 * between each successive wakeup is adequately spaced apart,
757 	 * in order to reduce scheduling overheads.  A target interval
758 	 * of 10 ms seems to provide good performance balance.  This can be
759 	 * overridden by specifying the depth profile.  Values smaller than
760 	 * the ideal depth will reduce delay at the expense of CPU cycles.
761 	 */
762 	tbr->tbr_rate = TBR_SCALE(rate / 8) / machclk_freq;
763 	if (tbr->tbr_rate > 0) {
764 		u_int32_t mtu = ifp->if_mtu;
765 		int64_t ival, idepth = 0;
766 		int i;
767 
768 		if (mtu < IF_MINMTU) {
769 			mtu = IF_MINMTU;
770 		}
771 
772 		ival = pktsched_nsecs_to_abstime(10 * NSEC_PER_MSEC); /* 10ms */
773 
774 		for (i = 1;; i++) {
775 			idepth = TBR_SCALE(i * mtu);
776 			if ((idepth / tbr->tbr_rate) > ival) {
777 				break;
778 			}
779 		}
780 		VERIFY(idepth > 0);
781 
782 		tbr->tbr_depth = TBR_SCALE(profile->depth);
783 		if (tbr->tbr_depth == 0) {
784 			tbr->tbr_filluptime = idepth / tbr->tbr_rate;
785 			/* a little fudge factor to get closer to rate */
786 			tbr->tbr_depth = idepth + (idepth >> 3);
787 		} else {
788 			tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate;
789 		}
790 	} else {
791 		tbr->tbr_depth = TBR_SCALE(profile->depth);
792 		tbr->tbr_filluptime = 0xffffffffffffffffLL;
793 	}
794 	tbr->tbr_token = tbr->tbr_depth;
795 	tbr->tbr_last = read_machclk();
796 
797 	if (tbr->tbr_rate > 0 && (ifp->if_flags & IFF_UP)) {
798 		struct timespec ts =
799 		{ 0, (long)pktsched_abs_to_nsecs(tbr->tbr_filluptime) };
800 		if (pktsched_verbose) {
801 			printf("%s: TBR calculated tokens %lld "
802 			    "filluptime %llu ns\n", if_name(ifp),
803 			    TBR_UNSCALE(tbr->tbr_token),
804 			    pktsched_abs_to_nsecs(tbr->tbr_filluptime));
805 		}
806 		ifnet_set_start_cycle(ifp, &ts);
807 	} else {
808 		if (pktsched_verbose) {
809 			if (tbr->tbr_rate == 0) {
810 				printf("%s: TBR calculated tokens %lld "
811 				    "infinite filluptime\n", if_name(ifp),
812 				    TBR_UNSCALE(tbr->tbr_token));
813 			} else if (!(ifp->if_flags & IFF_UP)) {
814 				printf("%s: TBR suspended (link is down)\n",
815 				    if_name(ifp));
816 			}
817 		}
818 		ifnet_set_start_cycle(ifp, NULL);
819 	}
820 	if (update && tbr->tbr_rate_raw != old_rate) {
821 		ifclassq_update(ifq, CLASSQ_EV_LINK_BANDWIDTH);
822 	}
823 
824 	return 0;
825 }
826 
827 void
ifclassq_calc_target_qdelay(struct ifnet * ifp,uint64_t * if_target_qdelay,uint32_t flags)828 ifclassq_calc_target_qdelay(struct ifnet *ifp, uint64_t *if_target_qdelay,
829     uint32_t flags)
830 {
831 	uint64_t qdelay = 0, qdelay_configed = 0, qdely_default = 0;
832 	if (flags == IF_CLASSQ_DEF) {
833 		qdelay = IFCQ_TARGET_QDELAY(ifp->if_snd);
834 	}
835 
836 	switch (flags) {
837 	case IF_CLASSQ_DEF:
838 		qdelay_configed = ifclassq_def_c_target_qdelay;
839 		qdely_default = IFQ_DEF_C_TARGET_DELAY;
840 		break;
841 	case IF_CLASSQ_L4S:
842 		qdelay_configed = ifclassq_def_l4s_target_qdelay;
843 		qdely_default = IFQ_DEF_L4S_TARGET_DELAY;
844 		break;
845 	case IF_CLASSQ_LOW_LATENCY:
846 		qdelay_configed = ifclassq_ll_c_target_qdelay;
847 		qdely_default = IFQ_LL_C_TARGET_DELAY;
848 		break;
849 	case (IF_CLASSQ_LOW_LATENCY | IF_CLASSQ_L4S):
850 		qdelay_configed = ifclassq_ll_l4s_target_qdelay;
851 		qdely_default = IFQ_LL_L4S_TARGET_DELAY;
852 		break;
853 	default:
854 		VERIFY(0);
855 		/* NOTREACHED */
856 		__builtin_unreachable();
857 	}
858 
859 	if (qdelay_configed != 0) {
860 		qdelay = qdelay_configed;
861 	}
862 
863 	/*
864 	 * If we do not know the effective bandwidth, use the default
865 	 * target queue delay.
866 	 */
867 	if (qdelay == 0) {
868 		qdelay = qdely_default;
869 	}
870 
871 	/*
872 	 * If a delay has been added to ifnet start callback for
873 	 * coalescing, we have to add that to the pre-set target delay
874 	 * because the packets can be in the queue longer.
875 	 */
876 	if ((ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
877 	    ifp->if_start_delay_timeout > 0) {
878 		qdelay += ifp->if_start_delay_timeout;
879 	}
880 
881 	*(if_target_qdelay) = qdelay;
882 }
883 
884 void
ifclassq_calc_update_interval(uint64_t * update_interval,uint32_t flags)885 ifclassq_calc_update_interval(uint64_t *update_interval, uint32_t flags)
886 {
887 	uint64_t interval = 0, interval_configed = 0, interval_default = 0;
888 
889 	switch (flags) {
890 	case IF_CLASSQ_DEF:
891 		interval_configed = ifclassq_def_c_update_interval;
892 		interval_default = IFQ_DEF_C_UPDATE_INTERVAL;
893 		break;
894 	case IF_CLASSQ_L4S:
895 		interval_configed = ifclassq_def_l4s_update_interval;
896 		interval_default = IFQ_DEF_L4S_UPDATE_INTERVAL;
897 		break;
898 	case IF_CLASSQ_LOW_LATENCY:
899 		interval_configed = ifclassq_ll_c_update_interval;
900 		interval_default = IFQ_LL_C_UPDATE_INTERVAL;
901 		break;
902 	case (IF_CLASSQ_LOW_LATENCY | IF_CLASSQ_L4S):
903 		interval_configed = ifclassq_ll_l4s_update_interval;
904 		interval_default = IFQ_LL_L4S_UPDATE_INTERVAL;
905 		break;
906 	default:
907 		VERIFY(0);
908 		/* NOTREACHED */
909 		__builtin_unreachable();
910 	}
911 
912 	/* If the system level override is set, use it */
913 	if (interval_configed != 0) {
914 		interval = interval_configed;
915 	}
916 
917 	/* Otherwise use the default value */
918 	if (interval == 0) {
919 		interval = interval_default;
920 	}
921 
922 	*update_interval = interval;
923 }
924 
925 void
ifclassq_reap_caches(boolean_t purge)926 ifclassq_reap_caches(boolean_t purge)
927 {
928 	fq_codel_reap_caches(purge);
929 	flowadv_reap_caches(purge);
930 }
931 
932 struct ifclassq *
ifclassq_alloc(void)933 ifclassq_alloc(void)
934 {
935 	struct ifclassq *ifcq;
936 
937 	ifcq = zalloc_flags(ifcq_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
938 	os_ref_init(&ifcq->ifcq_refcnt, NULL);
939 	os_ref_retain(&ifcq->ifcq_refcnt);
940 	lck_mtx_init(&ifcq->ifcq_lock, &ifcq_lock_group, &ifcq_lock_attr);
941 	return ifcq;
942 }
943 
944 void
ifclassq_retain(struct ifclassq * ifcq)945 ifclassq_retain(struct ifclassq *ifcq)
946 {
947 	os_ref_retain(&ifcq->ifcq_refcnt);
948 }
949 
950 void
ifclassq_release(struct ifclassq ** pifcq)951 ifclassq_release(struct ifclassq **pifcq)
952 {
953 	struct ifclassq *ifcq = *pifcq;
954 
955 	*pifcq = NULL;
956 	if (os_ref_release(&ifcq->ifcq_refcnt) == 0) {
957 		ifclassq_teardown(ifcq);
958 		zfree(ifcq_zone, ifcq);
959 	}
960 }
961 
962 int
ifclassq_setup_group(struct ifclassq * ifcq,uint8_t grp_idx,uint8_t flags)963 ifclassq_setup_group(struct ifclassq *ifcq, uint8_t grp_idx, uint8_t flags)
964 {
965 	int err;
966 
967 	IFCQ_LOCK(ifcq);
968 	VERIFY(ifcq->ifcq_disc != NULL);
969 	VERIFY(ifcq->ifcq_type == PKTSCHEDT_FQ_CODEL);
970 
971 	err = fq_if_create_grp(ifcq, grp_idx, flags);
972 	IFCQ_UNLOCK(ifcq);
973 
974 	return err;
975 }
976 
977 void
ifclassq_set_grp_combined(struct ifclassq * ifcq,uint8_t grp_idx)978 ifclassq_set_grp_combined(struct ifclassq *ifcq, uint8_t grp_idx)
979 {
980 	IFCQ_LOCK(ifcq);
981 	VERIFY(ifcq->ifcq_disc != NULL);
982 	VERIFY(ifcq->ifcq_type == PKTSCHEDT_FQ_CODEL);
983 
984 	fq_if_set_grp_combined(ifcq, grp_idx);
985 	IFCQ_UNLOCK(ifcq);
986 }
987 
988 void
ifclassq_set_grp_separated(struct ifclassq * ifcq,uint8_t grp_idx)989 ifclassq_set_grp_separated(struct ifclassq *ifcq, uint8_t grp_idx)
990 {
991 	IFCQ_LOCK(ifcq);
992 	VERIFY(ifcq->ifcq_disc != NULL);
993 	VERIFY(ifcq->ifcq_type == PKTSCHEDT_FQ_CODEL);
994 
995 	fq_if_set_grp_separated(ifcq, grp_idx);
996 	IFCQ_UNLOCK(ifcq);
997 }
998