xref: /xnu-8792.61.2/bsd/net/classq/classq_subr.c (revision 42e220869062b56f8d7d0726fd4c88954f87902c)
1 /*
2  * Copyright (c) 2011-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <sys/cdefs.h>
30 #include <sys/param.h>
31 #include <sys/mbuf.h>
32 #include <sys/errno.h>
33 #include <sys/random.h>
34 #include <sys/kernel_types.h>
35 #include <sys/sysctl.h>
36 
37 #include <kern/zalloc.h>
38 
39 #include <net/if.h>
40 #include <net/net_osdep.h>
41 #include <net/classq/classq.h>
42 #include <pexpert/pexpert.h>
43 #include <net/classq/classq_sfb.h>
44 #include <net/classq/classq_fq_codel.h>
45 #include <net/pktsched/pktsched.h>
46 #include <net/pktsched/pktsched_fq_codel.h>
47 #include <net/flowadv.h>
48 
49 #include <libkern/libkern.h>
50 
51 #if SKYWALK
52 #include <skywalk/os_skywalk_private.h>
53 #include <skywalk/nexus/netif/nx_netif.h>
54 #endif /* SKYWALK */
55 
56 static errno_t ifclassq_dequeue_common(struct ifclassq *, mbuf_svc_class_t,
57     u_int32_t, u_int32_t, classq_pkt_t *, classq_pkt_t *, u_int32_t *,
58     u_int32_t *, boolean_t, u_int8_t);
59 static void ifclassq_tbr_dequeue_common(struct ifclassq *, mbuf_svc_class_t,
60     boolean_t, classq_pkt_t *, u_int8_t);
61 
62 static uint64_t ifclassq_def_c_target_qdelay = 0;
63 SYSCTL_QUAD(_net_classq, OID_AUTO, def_c_target_qdelay, CTLFLAG_RW | CTLFLAG_LOCKED,
64     &ifclassq_def_c_target_qdelay, "def classic target queue delay in nanoseconds");
65 
66 static uint64_t ifclassq_def_c_update_interval = 0;
67 SYSCTL_QUAD(_net_classq, OID_AUTO, def_c_update_interval,
68     CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_def_c_update_interval,
69     "def classic update interval in nanoseconds");
70 
71 static uint64_t ifclassq_def_l4s_target_qdelay = 0;
72 SYSCTL_QUAD(_net_classq, OID_AUTO, def_l4s_target_qdelay, CTLFLAG_RW | CTLFLAG_LOCKED,
73     &ifclassq_def_l4s_target_qdelay, "def L4S target queue delay in nanoseconds");
74 
75 static uint64_t ifclassq_def_l4s_update_interval = 0;
76 SYSCTL_QUAD(_net_classq, OID_AUTO, def_l4s_update_interval,
77     CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_def_l4s_update_interval,
78     "def L4S update interval in nanoseconds");
79 
80 static uint64_t ifclassq_ll_c_target_qdelay = 0;
81 SYSCTL_QUAD(_net_classq, OID_AUTO, ll_c_target_qdelay, CTLFLAG_RW | CTLFLAG_LOCKED,
82     &ifclassq_ll_c_target_qdelay, "low latency classic target queue delay in nanoseconds");
83 
84 static uint64_t ifclassq_ll_c_update_interval = 0;
85 SYSCTL_QUAD(_net_classq, OID_AUTO, ll_c_update_interval,
86     CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_ll_c_update_interval,
87     "low latency classic update interval in nanoseconds");
88 
89 static uint64_t ifclassq_ll_l4s_target_qdelay = 0;
90 SYSCTL_QUAD(_net_classq, OID_AUTO, ll_l4s_target_qdelay, CTLFLAG_RW | CTLFLAG_LOCKED,
91     &ifclassq_ll_l4s_target_qdelay, "low latency L4S target queue delay in nanoseconds");
92 
93 static uint64_t ifclassq_ll_l4s_update_interval = 0;
94 SYSCTL_QUAD(_net_classq, OID_AUTO, ll_l4s_update_interval,
95     CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_ll_l4s_update_interval,
96     "low latency L4S update interval in nanoseconds");
97 
98 uint32_t ifclassq_enable_l4s = 0;
99 SYSCTL_UINT(_net_classq, OID_AUTO, enable_l4s,
100     CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_enable_l4s, 0,
101     "enable/disable L4S");
102 
103 #if DEBUG || DEVELOPMENT
104 uint32_t ifclassq_flow_control_adv = 1; /* flow control advisory */
105 SYSCTL_UINT(_net_classq, OID_AUTO, flow_control_adv,
106     CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_flow_control_adv, 1,
107     "enable/disable flow control advisory");
108 
109 uint16_t fq_codel_quantum = 0;
110 #endif /* DEBUG || DEVELOPMENT */
111 
112 static struct zone *ifcq_zone;          /* zone for ifclassq */
113 #define IFCQ_ZONE_NAME    "ifclassq"    /* zone name */
114 LCK_ATTR_DECLARE(ifcq_lock_attr, 0, 0);
115 static LCK_GRP_DECLARE(ifcq_lock_group, "ifclassq locks");
116 
117 void
classq_init(void)118 classq_init(void)
119 {
120 	_CASSERT(MBUF_TC_BE == 0);
121 	_CASSERT(MBUF_SC_BE == 0);
122 	_CASSERT(IFCQ_SC_MAX == MBUF_SC_MAX_CLASSES);
123 #if DEBUG || DEVELOPMENT
124 	PE_parse_boot_argn("fq_codel_quantum", &fq_codel_quantum,
125 	    sizeof(fq_codel_quantum));
126 	PE_parse_boot_argn("ifclassq_def_c_target_qdelay", &ifclassq_def_c_target_qdelay,
127 	    sizeof(ifclassq_def_c_target_qdelay));
128 	PE_parse_boot_argn("ifclassq_def_c_update_interval",
129 	    &ifclassq_def_c_update_interval, sizeof(ifclassq_def_c_update_interval));
130 	PE_parse_boot_argn("ifclassq_def_l4s_target_qdelay", &ifclassq_def_l4s_target_qdelay,
131 	    sizeof(ifclassq_def_l4s_target_qdelay));
132 	PE_parse_boot_argn("ifclassq_def_l4s_update_interval",
133 	    &ifclassq_def_l4s_update_interval, sizeof(ifclassq_def_l4s_update_interval));
134 	PE_parse_boot_argn("ifclassq_ll_c_target_qdelay", &ifclassq_ll_c_target_qdelay,
135 	    sizeof(ifclassq_ll_c_target_qdelay));
136 	PE_parse_boot_argn("ifclassq_ll_c_update_interval",
137 	    &ifclassq_ll_c_update_interval, sizeof(ifclassq_ll_c_update_interval));
138 	PE_parse_boot_argn("ifclassq_ll_l4s_target_qdelay", &ifclassq_ll_l4s_target_qdelay,
139 	    sizeof(ifclassq_ll_l4s_target_qdelay));
140 	PE_parse_boot_argn("ifclassq_ll_l4s_update_interval",
141 	    &ifclassq_ll_l4s_update_interval, sizeof(ifclassq_ll_l4s_update_interval));
142 #endif /* DEBUG || DEVELOPMENT */
143 	ifcq_zone = zone_create(IFCQ_ZONE_NAME, sizeof(struct ifclassq),
144 	    ZC_ZFREE_CLEARMEM);
145 	fq_codel_init();
146 }
147 
148 int
ifclassq_setup(struct ifclassq * ifq,struct ifnet * ifp,uint32_t sflags)149 ifclassq_setup(struct ifclassq *ifq, struct ifnet *ifp, uint32_t sflags)
150 {
151 	int err = 0;
152 
153 	IFCQ_LOCK(ifq);
154 	VERIFY(IFCQ_IS_EMPTY(ifq));
155 	ifq->ifcq_ifp = ifp;
156 	IFCQ_LEN(ifq) = 0;
157 	IFCQ_BYTES(ifq) = 0;
158 	bzero(&ifq->ifcq_xmitcnt, sizeof(ifq->ifcq_xmitcnt));
159 	bzero(&ifq->ifcq_dropcnt, sizeof(ifq->ifcq_dropcnt));
160 
161 	VERIFY(!IFCQ_TBR_IS_ENABLED(ifq));
162 	VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
163 	VERIFY(ifq->ifcq_flags == 0);
164 	VERIFY(ifq->ifcq_sflags == 0);
165 	VERIFY(ifq->ifcq_disc == NULL);
166 
167 	if (ifp->if_eflags & IFEF_TXSTART) {
168 		u_int32_t maxlen = 0;
169 
170 		if ((maxlen = IFCQ_MAXLEN(ifq)) == 0) {
171 			maxlen = if_sndq_maxlen;
172 		}
173 		IFCQ_SET_MAXLEN(ifq, maxlen);
174 
175 		if (IFCQ_MAXLEN(ifq) != if_sndq_maxlen &&
176 		    IFCQ_TARGET_QDELAY(ifq) == 0) {
177 			/*
178 			 * Choose static queues because the interface has
179 			 * maximum queue size set
180 			 */
181 			sflags &= ~PKTSCHEDF_QALG_DELAYBASED;
182 		}
183 		ifq->ifcq_sflags = sflags;
184 		err = ifclassq_pktsched_setup(ifq);
185 		if (err == 0) {
186 			ifq->ifcq_flags = (IFCQF_READY | IFCQF_ENABLED);
187 		}
188 	}
189 	IFCQ_UNLOCK(ifq);
190 	return err;
191 }
192 
193 void
ifclassq_teardown(struct ifclassq * ifq)194 ifclassq_teardown(struct ifclassq *ifq)
195 {
196 	IFCQ_LOCK(ifq);
197 	if (IFCQ_IS_DESTROYED(ifq)) {
198 		ASSERT((ifq->ifcq_flags & ~IFCQF_DESTROYED) == 0);
199 		goto done;
200 	}
201 	if (IFCQ_IS_READY(ifq)) {
202 		if (IFCQ_TBR_IS_ENABLED(ifq)) {
203 			struct tb_profile tb =
204 			{ .rate = 0, .percent = 0, .depth = 0 };
205 			(void) ifclassq_tbr_set(ifq, &tb, FALSE);
206 		}
207 		pktsched_teardown(ifq);
208 		ifq->ifcq_flags &= ~IFCQF_READY;
209 	}
210 	ifq->ifcq_sflags = 0;
211 	VERIFY(IFCQ_IS_EMPTY(ifq));
212 	VERIFY(!IFCQ_TBR_IS_ENABLED(ifq));
213 	VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
214 	VERIFY(ifq->ifcq_flags == 0);
215 	VERIFY(ifq->ifcq_sflags == 0);
216 	VERIFY(ifq->ifcq_disc == NULL);
217 	IFCQ_LEN(ifq) = 0;
218 	IFCQ_BYTES(ifq) = 0;
219 	IFCQ_MAXLEN(ifq) = 0;
220 	bzero(&ifq->ifcq_xmitcnt, sizeof(ifq->ifcq_xmitcnt));
221 	bzero(&ifq->ifcq_dropcnt, sizeof(ifq->ifcq_dropcnt));
222 	ifq->ifcq_flags |= IFCQF_DESTROYED;
223 done:
224 	IFCQ_UNLOCK(ifq);
225 }
226 
227 int
ifclassq_pktsched_setup(struct ifclassq * ifq)228 ifclassq_pktsched_setup(struct ifclassq *ifq)
229 {
230 	struct ifnet *ifp = ifq->ifcq_ifp;
231 	classq_pkt_type_t ptype = QP_MBUF;
232 	int err = 0;
233 
234 	IFCQ_LOCK_ASSERT_HELD(ifq);
235 	VERIFY(ifp->if_eflags & IFEF_TXSTART);
236 #if SKYWALK
237 	ptype = ((ifp->if_eflags & IFEF_SKYWALK_NATIVE) != 0) ? QP_PACKET :
238 	    QP_MBUF;
239 #endif /* SKYWALK */
240 
241 	err = pktsched_setup(ifq, PKTSCHEDT_FQ_CODEL, ifq->ifcq_sflags, ptype);
242 
243 	return err;
244 }
245 
246 void
ifclassq_set_maxlen(struct ifclassq * ifq,u_int32_t maxqlen)247 ifclassq_set_maxlen(struct ifclassq *ifq, u_int32_t maxqlen)
248 {
249 	IFCQ_LOCK(ifq);
250 	if (maxqlen == 0) {
251 		maxqlen = if_sndq_maxlen;
252 	}
253 	IFCQ_SET_MAXLEN(ifq, maxqlen);
254 	IFCQ_UNLOCK(ifq);
255 }
256 
257 u_int32_t
ifclassq_get_maxlen(struct ifclassq * ifq)258 ifclassq_get_maxlen(struct ifclassq *ifq)
259 {
260 	return IFCQ_MAXLEN(ifq);
261 }
262 
263 int
ifclassq_get_len(struct ifclassq * ifq,mbuf_svc_class_t sc,u_int8_t grp_idx,u_int32_t * packets,u_int32_t * bytes)264 ifclassq_get_len(struct ifclassq *ifq, mbuf_svc_class_t sc, u_int8_t grp_idx,
265     u_int32_t *packets, u_int32_t *bytes)
266 {
267 	int err = 0;
268 
269 	IFCQ_LOCK(ifq);
270 	if ((ifq->ifcq_flags & (IFCQF_READY | IFCQF_ENABLED)) !=
271 	    (IFCQF_READY | IFCQF_ENABLED)) {
272 		return ENXIO;
273 	}
274 	if (sc == MBUF_SC_UNSPEC && grp_idx == IF_CLASSQ_ALL_GRPS) {
275 		VERIFY(packets != NULL);
276 		*packets = IFCQ_LEN(ifq);
277 	} else {
278 		cqrq_stat_sc_t req = { sc, grp_idx, 0, 0 };
279 
280 		VERIFY(MBUF_VALID_SC(sc) || sc == MBUF_SC_UNSPEC);
281 
282 		err = fq_if_request_classq(ifq, CLASSQRQ_STAT_SC, &req);
283 		if (packets != NULL) {
284 			*packets = req.packets;
285 		}
286 		if (bytes != NULL) {
287 			*bytes = req.bytes;
288 		}
289 	}
290 	IFCQ_UNLOCK(ifq);
291 
292 #if SKYWALK
293 	struct ifnet *ifp = ifq->ifcq_ifp;
294 
295 	if (__improbable(ifp->if_na_ops != NULL &&
296 	    ifp->if_na_ops->ni_get_len != NULL)) {
297 		err = ifp->if_na_ops->ni_get_len(ifp->if_na, sc, packets,
298 		    bytes, err);
299 	}
300 #endif /* SKYWALK */
301 
302 	return err;
303 }
304 
305 inline void
ifclassq_set_packet_metadata(struct ifclassq * ifq,struct ifnet * ifp,classq_pkt_t * p)306 ifclassq_set_packet_metadata(struct ifclassq *ifq, struct ifnet *ifp,
307     classq_pkt_t *p)
308 {
309 	if (!IFNET_IS_CELLULAR(ifp)) {
310 		return;
311 	}
312 
313 	switch (p->cp_ptype) {
314 	case QP_MBUF: {
315 		struct mbuf *m = p->cp_mbuf;
316 		m->m_pkthdr.pkt_flags |= PKTF_VALID_UNSENT_DATA;
317 		m->m_pkthdr.bufstatus_if = IFCQ_BYTES(ifq);
318 		m->m_pkthdr.bufstatus_sndbuf = (uint32_t)ifp->if_sndbyte_unsent;
319 		break;
320 	}
321 
322 #if SKYWALK
323 	case QP_PACKET:
324 		/*
325 		 * Support for equivalent of mbuf_get_unsent_data_bytes()
326 		 * is not needed in the Skywalk architecture.
327 		 */
328 		break;
329 #endif /* SKYWALK */
330 
331 	default:
332 		VERIFY(0);
333 		/* NOTREACHED */
334 		__builtin_unreachable();
335 	}
336 }
337 
338 errno_t
ifclassq_enqueue(struct ifclassq * ifq,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t cnt,u_int32_t bytes,boolean_t * pdrop)339 ifclassq_enqueue(struct ifclassq *ifq, classq_pkt_t *head, classq_pkt_t *tail,
340     u_int32_t cnt, u_int32_t bytes, boolean_t *pdrop)
341 {
342 	return fq_if_enqueue_classq(ifq, head, tail, cnt, bytes, pdrop);
343 }
344 
345 errno_t
ifclassq_dequeue(struct ifclassq * ifq,u_int32_t pkt_limit,u_int32_t byte_limit,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t * cnt,u_int32_t * len,u_int8_t grp_idx)346 ifclassq_dequeue(struct ifclassq *ifq, u_int32_t pkt_limit,
347     u_int32_t byte_limit, classq_pkt_t *head, classq_pkt_t *tail,
348     u_int32_t *cnt, u_int32_t *len, u_int8_t grp_idx)
349 {
350 	return ifclassq_dequeue_common(ifq, MBUF_SC_UNSPEC, pkt_limit,
351 	           byte_limit, head, tail, cnt, len, FALSE, grp_idx);
352 }
353 
354 errno_t
ifclassq_dequeue_sc(struct ifclassq * ifq,mbuf_svc_class_t sc,u_int32_t pkt_limit,u_int32_t byte_limit,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t * cnt,u_int32_t * len,u_int8_t grp_idx)355 ifclassq_dequeue_sc(struct ifclassq *ifq, mbuf_svc_class_t sc,
356     u_int32_t pkt_limit, u_int32_t byte_limit, classq_pkt_t *head,
357     classq_pkt_t *tail, u_int32_t *cnt, u_int32_t *len, u_int8_t grp_idx)
358 {
359 	return ifclassq_dequeue_common(ifq, sc, pkt_limit, byte_limit,
360 	           head, tail, cnt, len, TRUE, grp_idx);
361 }
362 
363 static errno_t
ifclassq_dequeue_common_default(struct ifclassq * ifq,mbuf_svc_class_t sc,u_int32_t pkt_limit,u_int32_t byte_limit,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t * cnt,u_int32_t * len,boolean_t drvmgt,u_int8_t grp_idx)364 ifclassq_dequeue_common_default(struct ifclassq *ifq, mbuf_svc_class_t sc,
365     u_int32_t pkt_limit, u_int32_t byte_limit, classq_pkt_t *head,
366     classq_pkt_t *tail, u_int32_t *cnt, u_int32_t *len, boolean_t drvmgt,
367     u_int8_t grp_idx)
368 {
369 	struct ifnet *ifp = ifq->ifcq_ifp;
370 	u_int32_t i = 0, l = 0;
371 	classq_pkt_t first = CLASSQ_PKT_INITIALIZER(first);
372 	classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
373 
374 	VERIFY(!drvmgt || MBUF_VALID_SC(sc));
375 
376 	if (IFCQ_TBR_IS_ENABLED(ifq)) {
377 		goto dequeue_loop;
378 	}
379 
380 	/*
381 	 * If the scheduler support dequeueing multiple packets at the
382 	 * same time, call that one instead.
383 	 */
384 	if (drvmgt) {
385 		int err;
386 
387 		IFCQ_LOCK_SPIN(ifq);
388 		err = fq_if_dequeue_sc_classq_multi(ifq, sc, pkt_limit,
389 		    byte_limit, head, tail, cnt, len, grp_idx);
390 		IFCQ_UNLOCK(ifq);
391 
392 		if (err == 0 && head->cp_mbuf == NULL) {
393 			err = EAGAIN;
394 		}
395 		return err;
396 	} else {
397 		int err;
398 
399 		IFCQ_LOCK_SPIN(ifq);
400 		err = fq_if_dequeue_classq_multi(ifq, pkt_limit, byte_limit,
401 		    head, tail, cnt, len, grp_idx);
402 		IFCQ_UNLOCK(ifq);
403 
404 		if (err == 0 && head->cp_mbuf == NULL) {
405 			err = EAGAIN;
406 		}
407 		return err;
408 	}
409 
410 dequeue_loop:
411 	VERIFY(IFCQ_TBR_IS_ENABLED(ifq));
412 	IFCQ_LOCK_SPIN(ifq);
413 
414 	while (i < pkt_limit && l < byte_limit) {
415 		if (drvmgt) {
416 			IFCQ_TBR_DEQUEUE_SC(ifq, sc, head, grp_idx);
417 		} else {
418 			IFCQ_TBR_DEQUEUE(ifq, head, grp_idx);
419 		}
420 
421 		if (head->cp_mbuf == NULL) {
422 			break;
423 		}
424 
425 		if (first.cp_mbuf == NULL) {
426 			first = *head;
427 		}
428 
429 		switch (head->cp_ptype) {
430 		case QP_MBUF:
431 			head->cp_mbuf->m_nextpkt = NULL;
432 			l += head->cp_mbuf->m_pkthdr.len;
433 			ifclassq_set_packet_metadata(ifq, ifp, head);
434 			if (last.cp_mbuf != NULL) {
435 				last.cp_mbuf->m_nextpkt = head->cp_mbuf;
436 			}
437 			break;
438 
439 #if SKYWALK
440 		case QP_PACKET:
441 			head->cp_kpkt->pkt_nextpkt = NULL;
442 			l += head->cp_kpkt->pkt_length;
443 			ifclassq_set_packet_metadata(ifq, ifp, head);
444 			if (last.cp_kpkt != NULL) {
445 				last.cp_kpkt->pkt_nextpkt = head->cp_kpkt;
446 			}
447 			break;
448 #endif /* SKYWALK */
449 
450 		default:
451 			VERIFY(0);
452 			/* NOTREACHED */
453 			__builtin_unreachable();
454 		}
455 
456 		last = *head;
457 		i++;
458 	}
459 
460 	IFCQ_UNLOCK(ifq);
461 
462 	if (tail != NULL) {
463 		*tail = last;
464 	}
465 	if (cnt != NULL) {
466 		*cnt = i;
467 	}
468 	if (len != NULL) {
469 		*len = l;
470 	}
471 
472 	*head = first;
473 	return (first.cp_mbuf != NULL) ? 0 : EAGAIN;
474 }
475 
476 static errno_t
ifclassq_dequeue_common(struct ifclassq * ifq,mbuf_svc_class_t sc,u_int32_t pkt_limit,u_int32_t byte_limit,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t * cnt,u_int32_t * len,boolean_t drvmgt,u_int8_t grp_idx)477 ifclassq_dequeue_common(struct ifclassq *ifq, mbuf_svc_class_t sc,
478     u_int32_t pkt_limit, u_int32_t byte_limit, classq_pkt_t *head,
479     classq_pkt_t *tail, u_int32_t *cnt, u_int32_t *len, boolean_t drvmgt,
480     u_int8_t grp_idx)
481 {
482 #if SKYWALK
483 	struct ifnet *ifp = ifq->ifcq_ifp;
484 
485 	if (__improbable(ifp->if_na_ops != NULL &&
486 	    ifp->if_na_ops->ni_dequeue != NULL)) {
487 		/*
488 		 * TODO:
489 		 * We should be changing the pkt/byte limit to the
490 		 * available space in the next filter. But this is not
491 		 * useful until we can flow control the whole chain of
492 		 * filters.
493 		 */
494 		errno_t err = ifclassq_dequeue_common_default(ifq, sc,
495 		    pkt_limit, byte_limit, head, tail, cnt, len, drvmgt, grp_idx);
496 
497 		return ifp->if_na_ops->ni_dequeue(ifp->if_na, sc, pkt_limit,
498 		           byte_limit, head, tail, cnt, len, drvmgt, err);
499 	}
500 #endif /* SKYWALK */
501 	return ifclassq_dequeue_common_default(ifq, sc,
502 	           pkt_limit, byte_limit, head, tail, cnt, len, drvmgt, grp_idx);
503 }
504 
505 void
ifclassq_update(struct ifclassq * ifq,cqev_t ev)506 ifclassq_update(struct ifclassq *ifq, cqev_t ev)
507 {
508 	IFCQ_LOCK_ASSERT_HELD(ifq);
509 	VERIFY(IFCQ_IS_READY(ifq));
510 	fq_if_request_classq(ifq, CLASSQRQ_EVENT, (void *)ev);
511 }
512 
513 int
ifclassq_attach(struct ifclassq * ifq,u_int32_t type,void * discipline)514 ifclassq_attach(struct ifclassq *ifq, u_int32_t type, void *discipline)
515 {
516 	IFCQ_LOCK_ASSERT_HELD(ifq);
517 	VERIFY(ifq->ifcq_disc == NULL);
518 	ifq->ifcq_type = type;
519 	ifq->ifcq_disc = discipline;
520 	return 0;
521 }
522 
523 void
ifclassq_detach(struct ifclassq * ifq)524 ifclassq_detach(struct ifclassq *ifq)
525 {
526 	IFCQ_LOCK_ASSERT_HELD(ifq);
527 	VERIFY(ifq->ifcq_disc == NULL);
528 	ifq->ifcq_type = PKTSCHEDT_NONE;
529 }
530 
531 int
ifclassq_getqstats(struct ifclassq * ifq,u_int8_t gid,u_int32_t qid,void * ubuf,u_int32_t * nbytes)532 ifclassq_getqstats(struct ifclassq *ifq, u_int8_t gid, u_int32_t qid, void *ubuf,
533     u_int32_t *nbytes)
534 {
535 	struct if_ifclassq_stats *ifqs;
536 	int err;
537 
538 	if (*nbytes < sizeof(*ifqs)) {
539 		return EINVAL;
540 	}
541 
542 	ifqs = kalloc_type(struct if_ifclassq_stats,
543 	    Z_WAITOK | Z_ZERO | Z_NOFAIL);
544 
545 	IFCQ_LOCK(ifq);
546 	if (!IFCQ_IS_READY(ifq)) {
547 		IFCQ_UNLOCK(ifq);
548 		kfree_type(struct if_ifclassq_stats, ifqs);
549 		return ENXIO;
550 	}
551 
552 	ifqs->ifqs_len = IFCQ_LEN(ifq);
553 	ifqs->ifqs_maxlen = IFCQ_MAXLEN(ifq);
554 	*(&ifqs->ifqs_xmitcnt) = *(&ifq->ifcq_xmitcnt);
555 	*(&ifqs->ifqs_dropcnt) = *(&ifq->ifcq_dropcnt);
556 	ifqs->ifqs_scheduler = ifq->ifcq_type;
557 
558 	err = pktsched_getqstats(ifq, gid, qid, ifqs);
559 	IFCQ_UNLOCK(ifq);
560 
561 	if (err == 0 && (err = copyout((caddr_t)ifqs,
562 	    (user_addr_t)(uintptr_t)ubuf, sizeof(*ifqs))) == 0) {
563 		*nbytes = sizeof(*ifqs);
564 	}
565 
566 	kfree_type(struct if_ifclassq_stats, ifqs);
567 
568 	return err;
569 }
570 
571 const char *
ifclassq_ev2str(cqev_t ev)572 ifclassq_ev2str(cqev_t ev)
573 {
574 	const char *c;
575 
576 	switch (ev) {
577 	case CLASSQ_EV_LINK_BANDWIDTH:
578 		c = "LINK_BANDWIDTH";
579 		break;
580 
581 	case CLASSQ_EV_LINK_LATENCY:
582 		c = "LINK_LATENCY";
583 		break;
584 
585 	case CLASSQ_EV_LINK_MTU:
586 		c = "LINK_MTU";
587 		break;
588 
589 	case CLASSQ_EV_LINK_UP:
590 		c = "LINK_UP";
591 		break;
592 
593 	case CLASSQ_EV_LINK_DOWN:
594 		c = "LINK_DOWN";
595 		break;
596 
597 	default:
598 		c = "UNKNOWN";
599 		break;
600 	}
601 
602 	return c;
603 }
604 
605 /*
606  * internal representation of token bucket parameters
607  *	rate:	byte_per_unittime << 32
608  *		(((bits_per_sec) / 8) << 32) / machclk_freq
609  *	depth:	byte << 32
610  *
611  */
612 #define TBR_SHIFT       32
613 #define TBR_SCALE(x)    ((int64_t)(x) << TBR_SHIFT)
614 #define TBR_UNSCALE(x)  ((x) >> TBR_SHIFT)
615 
616 void
ifclassq_tbr_dequeue(struct ifclassq * ifq,classq_pkt_t * pkt,u_int8_t grp_idx)617 ifclassq_tbr_dequeue(struct ifclassq *ifq, classq_pkt_t *pkt, u_int8_t grp_idx)
618 {
619 	ifclassq_tbr_dequeue_common(ifq, MBUF_SC_UNSPEC, FALSE, pkt, grp_idx);
620 }
621 
622 void
ifclassq_tbr_dequeue_sc(struct ifclassq * ifq,mbuf_svc_class_t sc,classq_pkt_t * pkt,u_int8_t grp_idx)623 ifclassq_tbr_dequeue_sc(struct ifclassq *ifq, mbuf_svc_class_t sc,
624     classq_pkt_t *pkt, u_int8_t grp_idx)
625 {
626 	ifclassq_tbr_dequeue_common(ifq, sc, TRUE, pkt, grp_idx);
627 }
628 
629 static void
ifclassq_tbr_dequeue_common(struct ifclassq * ifq,mbuf_svc_class_t sc,boolean_t drvmgt,classq_pkt_t * pkt,u_int8_t grp_idx)630 ifclassq_tbr_dequeue_common(struct ifclassq *ifq, mbuf_svc_class_t sc,
631     boolean_t drvmgt, classq_pkt_t *pkt, u_int8_t grp_idx)
632 {
633 	struct tb_regulator *tbr;
634 	int64_t interval;
635 	u_int64_t now;
636 
637 	IFCQ_LOCK_ASSERT_HELD(ifq);
638 
639 	VERIFY(!drvmgt || MBUF_VALID_SC(sc));
640 	VERIFY(IFCQ_TBR_IS_ENABLED(ifq));
641 
642 	*pkt = CLASSQ_PKT_INITIALIZER(*pkt);
643 	tbr = &ifq->ifcq_tbr;
644 	/* update token only when it is negative */
645 	if (tbr->tbr_token <= 0) {
646 		now = read_machclk();
647 		interval = now - tbr->tbr_last;
648 		if (interval >= tbr->tbr_filluptime) {
649 			tbr->tbr_token = tbr->tbr_depth;
650 		} else {
651 			tbr->tbr_token += interval * tbr->tbr_rate;
652 			if (tbr->tbr_token > tbr->tbr_depth) {
653 				tbr->tbr_token = tbr->tbr_depth;
654 			}
655 		}
656 		tbr->tbr_last = now;
657 	}
658 	/* if token is still negative, don't allow dequeue */
659 	if (tbr->tbr_token <= 0) {
660 		return;
661 	}
662 
663 	/*
664 	 * ifclassq takes precedence over ALTQ queue;
665 	 * ifcq_drain count is adjusted by the caller.
666 	 */
667 	if (drvmgt) {
668 		fq_if_dequeue_sc_classq(ifq, sc, pkt, grp_idx);
669 	} else {
670 		fq_if_dequeue_classq(ifq, pkt, grp_idx);
671 	}
672 
673 	if (pkt->cp_mbuf != NULL) {
674 		switch (pkt->cp_ptype) {
675 		case QP_MBUF:
676 			tbr->tbr_token -= TBR_SCALE(m_pktlen(pkt->cp_mbuf));
677 			break;
678 
679 #if SKYWALK
680 		case QP_PACKET:
681 			tbr->tbr_token -=
682 			    TBR_SCALE(pkt->cp_kpkt->pkt_length);
683 			break;
684 #endif /* SKYWALK */
685 
686 		default:
687 			VERIFY(0);
688 			/* NOTREACHED */
689 		}
690 	}
691 }
692 
693 /*
694  * set a token bucket regulator.
695  * if the specified rate is zero, the token bucket regulator is deleted.
696  */
697 int
ifclassq_tbr_set(struct ifclassq * ifq,struct tb_profile * profile,boolean_t update)698 ifclassq_tbr_set(struct ifclassq *ifq, struct tb_profile *profile,
699     boolean_t update)
700 {
701 	struct tb_regulator *tbr;
702 	struct ifnet *ifp = ifq->ifcq_ifp;
703 	u_int64_t rate, old_rate;
704 
705 	IFCQ_LOCK_ASSERT_HELD(ifq);
706 	VERIFY(IFCQ_IS_READY(ifq));
707 
708 	VERIFY(machclk_freq != 0);
709 
710 	tbr = &ifq->ifcq_tbr;
711 	old_rate = tbr->tbr_rate_raw;
712 
713 	rate = profile->rate;
714 	if (profile->percent > 0) {
715 		u_int64_t eff_rate;
716 
717 		if (profile->percent > 100) {
718 			return EINVAL;
719 		}
720 		if ((eff_rate = ifp->if_output_bw.eff_bw) == 0) {
721 			return ENODEV;
722 		}
723 		rate = (eff_rate * profile->percent) / 100;
724 	}
725 
726 	if (rate == 0) {
727 		if (!IFCQ_TBR_IS_ENABLED(ifq)) {
728 			return 0;
729 		}
730 
731 		if (pktsched_verbose) {
732 			printf("%s: TBR disabled\n", if_name(ifp));
733 		}
734 
735 		/* disable this TBR */
736 		ifq->ifcq_flags &= ~IFCQF_TBR;
737 		bzero(tbr, sizeof(*tbr));
738 		ifnet_set_start_cycle(ifp, NULL);
739 		if (update) {
740 			ifclassq_update(ifq, CLASSQ_EV_LINK_BANDWIDTH);
741 		}
742 		return 0;
743 	}
744 
745 	if (pktsched_verbose) {
746 		printf("%s: TBR %s (rate %llu bps depth %u)\n", if_name(ifp),
747 		    (ifq->ifcq_flags & IFCQF_TBR) ? "reconfigured" :
748 		    "enabled", rate, profile->depth);
749 	}
750 
751 	/* set the new TBR */
752 	bzero(tbr, sizeof(*tbr));
753 	tbr->tbr_rate_raw = rate;
754 	tbr->tbr_percent = profile->percent;
755 	ifq->ifcq_flags |= IFCQF_TBR;
756 
757 	/*
758 	 * Note that the TBR fill up time (hence the ifnet restart time)
759 	 * is directly related to the specified TBR depth.  The ideal
760 	 * depth value should be computed such that the interval time
761 	 * between each successive wakeup is adequately spaced apart,
762 	 * in order to reduce scheduling overheads.  A target interval
763 	 * of 10 ms seems to provide good performance balance.  This can be
764 	 * overridden by specifying the depth profile.  Values smaller than
765 	 * the ideal depth will reduce delay at the expense of CPU cycles.
766 	 */
767 	tbr->tbr_rate = TBR_SCALE(rate / 8) / machclk_freq;
768 	if (tbr->tbr_rate > 0) {
769 		u_int32_t mtu = ifp->if_mtu;
770 		int64_t ival, idepth = 0;
771 		int i;
772 
773 		if (mtu < IF_MINMTU) {
774 			mtu = IF_MINMTU;
775 		}
776 
777 		ival = pktsched_nsecs_to_abstime(10 * NSEC_PER_MSEC); /* 10ms */
778 
779 		for (i = 1;; i++) {
780 			idepth = TBR_SCALE(i * mtu);
781 			if ((idepth / tbr->tbr_rate) > ival) {
782 				break;
783 			}
784 		}
785 		VERIFY(idepth > 0);
786 
787 		tbr->tbr_depth = TBR_SCALE(profile->depth);
788 		if (tbr->tbr_depth == 0) {
789 			tbr->tbr_filluptime = idepth / tbr->tbr_rate;
790 			/* a little fudge factor to get closer to rate */
791 			tbr->tbr_depth = idepth + (idepth >> 3);
792 		} else {
793 			tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate;
794 		}
795 	} else {
796 		tbr->tbr_depth = TBR_SCALE(profile->depth);
797 		tbr->tbr_filluptime = 0xffffffffffffffffLL;
798 	}
799 	tbr->tbr_token = tbr->tbr_depth;
800 	tbr->tbr_last = read_machclk();
801 
802 	if (tbr->tbr_rate > 0 && (ifp->if_flags & IFF_UP)) {
803 		struct timespec ts =
804 		{ 0, (long)pktsched_abs_to_nsecs(tbr->tbr_filluptime) };
805 		if (pktsched_verbose) {
806 			printf("%s: TBR calculated tokens %lld "
807 			    "filluptime %llu ns\n", if_name(ifp),
808 			    TBR_UNSCALE(tbr->tbr_token),
809 			    pktsched_abs_to_nsecs(tbr->tbr_filluptime));
810 		}
811 		ifnet_set_start_cycle(ifp, &ts);
812 	} else {
813 		if (pktsched_verbose) {
814 			if (tbr->tbr_rate == 0) {
815 				printf("%s: TBR calculated tokens %lld "
816 				    "infinite filluptime\n", if_name(ifp),
817 				    TBR_UNSCALE(tbr->tbr_token));
818 			} else if (!(ifp->if_flags & IFF_UP)) {
819 				printf("%s: TBR suspended (link is down)\n",
820 				    if_name(ifp));
821 			}
822 		}
823 		ifnet_set_start_cycle(ifp, NULL);
824 	}
825 	if (update && tbr->tbr_rate_raw != old_rate) {
826 		ifclassq_update(ifq, CLASSQ_EV_LINK_BANDWIDTH);
827 	}
828 
829 	return 0;
830 }
831 
832 void
ifclassq_calc_target_qdelay(struct ifnet * ifp,uint64_t * if_target_qdelay,uint32_t flags)833 ifclassq_calc_target_qdelay(struct ifnet *ifp, uint64_t *if_target_qdelay,
834     uint32_t flags)
835 {
836 	uint64_t qdelay = 0, qdelay_configed = 0, qdely_default = 0;
837 	if (flags == IF_CLASSQ_DEF) {
838 		qdelay = IFCQ_TARGET_QDELAY(ifp->if_snd);
839 	}
840 
841 	switch (flags) {
842 	case IF_CLASSQ_DEF:
843 		qdelay_configed = ifclassq_def_c_target_qdelay;
844 		qdely_default = IFQ_DEF_C_TARGET_DELAY;
845 		break;
846 	case IF_CLASSQ_L4S:
847 		qdelay_configed = ifclassq_def_l4s_target_qdelay;
848 		qdely_default = IFQ_DEF_L4S_TARGET_DELAY;
849 		break;
850 	case IF_CLASSQ_LOW_LATENCY:
851 		qdelay_configed = ifclassq_ll_c_target_qdelay;
852 		qdely_default = IFQ_LL_C_TARGET_DELAY;
853 		break;
854 	case (IF_CLASSQ_LOW_LATENCY | IF_CLASSQ_L4S):
855 		qdelay_configed = ifclassq_ll_l4s_target_qdelay;
856 		qdely_default = IFQ_LL_L4S_TARGET_DELAY;
857 		break;
858 	default:
859 		VERIFY(0);
860 		/* NOTREACHED */
861 		__builtin_unreachable();
862 	}
863 
864 	if (qdelay_configed != 0) {
865 		qdelay = qdelay_configed;
866 	}
867 
868 	/*
869 	 * If we do not know the effective bandwidth, use the default
870 	 * target queue delay.
871 	 */
872 	if (qdelay == 0) {
873 		qdelay = qdely_default;
874 	}
875 
876 	/*
877 	 * If a delay has been added to ifnet start callback for
878 	 * coalescing, we have to add that to the pre-set target delay
879 	 * because the packets can be in the queue longer.
880 	 */
881 	if ((ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
882 	    ifp->if_start_delay_timeout > 0) {
883 		qdelay += ifp->if_start_delay_timeout;
884 	}
885 
886 	*(if_target_qdelay) = qdelay;
887 }
888 
889 void
ifclassq_calc_update_interval(uint64_t * update_interval,uint32_t flags)890 ifclassq_calc_update_interval(uint64_t *update_interval, uint32_t flags)
891 {
892 	uint64_t interval = 0, interval_configed = 0, interval_default = 0;
893 
894 	switch (flags) {
895 	case IF_CLASSQ_DEF:
896 		interval_configed = ifclassq_def_c_update_interval;
897 		interval_default = IFQ_DEF_C_UPDATE_INTERVAL;
898 		break;
899 	case IF_CLASSQ_L4S:
900 		interval_configed = ifclassq_def_l4s_update_interval;
901 		interval_default = IFQ_DEF_L4S_UPDATE_INTERVAL;
902 		break;
903 	case IF_CLASSQ_LOW_LATENCY:
904 		interval_configed = ifclassq_ll_c_update_interval;
905 		interval_default = IFQ_LL_C_UPDATE_INTERVAL;
906 		break;
907 	case (IF_CLASSQ_LOW_LATENCY | IF_CLASSQ_L4S):
908 		interval_configed = ifclassq_ll_l4s_update_interval;
909 		interval_default = IFQ_LL_L4S_UPDATE_INTERVAL;
910 		break;
911 	default:
912 		VERIFY(0);
913 		/* NOTREACHED */
914 		__builtin_unreachable();
915 	}
916 
917 	/* If the system level override is set, use it */
918 	if (interval_configed != 0) {
919 		interval = interval_configed;
920 	}
921 
922 	/* Otherwise use the default value */
923 	if (interval == 0) {
924 		interval = interval_default;
925 	}
926 
927 	*update_interval = interval;
928 }
929 
930 void
ifclassq_reap_caches(boolean_t purge)931 ifclassq_reap_caches(boolean_t purge)
932 {
933 	fq_codel_reap_caches(purge);
934 	flowadv_reap_caches(purge);
935 }
936 
937 struct ifclassq *
ifclassq_alloc(void)938 ifclassq_alloc(void)
939 {
940 	struct ifclassq *ifcq;
941 
942 	ifcq = zalloc_flags(ifcq_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
943 	os_ref_init(&ifcq->ifcq_refcnt, NULL);
944 	os_ref_retain(&ifcq->ifcq_refcnt);
945 	lck_mtx_init(&ifcq->ifcq_lock, &ifcq_lock_group, &ifcq_lock_attr);
946 	return ifcq;
947 }
948 
949 void
ifclassq_retain(struct ifclassq * ifcq)950 ifclassq_retain(struct ifclassq *ifcq)
951 {
952 	os_ref_retain(&ifcq->ifcq_refcnt);
953 }
954 
955 void
ifclassq_release(struct ifclassq ** pifcq)956 ifclassq_release(struct ifclassq **pifcq)
957 {
958 	struct ifclassq *ifcq = *pifcq;
959 
960 	*pifcq = NULL;
961 	if (os_ref_release(&ifcq->ifcq_refcnt) == 0) {
962 		ifclassq_teardown(ifcq);
963 		zfree(ifcq_zone, ifcq);
964 	}
965 }
966 
967 int
ifclassq_setup_group(struct ifclassq * ifcq,uint8_t grp_idx,uint8_t flags)968 ifclassq_setup_group(struct ifclassq *ifcq, uint8_t grp_idx, uint8_t flags)
969 {
970 	int err;
971 
972 	IFCQ_LOCK(ifcq);
973 	VERIFY(ifcq->ifcq_disc != NULL);
974 	VERIFY(ifcq->ifcq_type == PKTSCHEDT_FQ_CODEL);
975 
976 	err = fq_if_create_grp(ifcq, grp_idx, flags);
977 	IFCQ_UNLOCK(ifcq);
978 
979 	return err;
980 }
981 
982 void
ifclassq_set_grp_combined(struct ifclassq * ifcq,uint8_t grp_idx)983 ifclassq_set_grp_combined(struct ifclassq *ifcq, uint8_t grp_idx)
984 {
985 	IFCQ_LOCK(ifcq);
986 	VERIFY(ifcq->ifcq_disc != NULL);
987 	VERIFY(ifcq->ifcq_type == PKTSCHEDT_FQ_CODEL);
988 
989 	fq_if_set_grp_combined(ifcq, grp_idx);
990 	IFCQ_UNLOCK(ifcq);
991 }
992 
993 void
ifclassq_set_grp_separated(struct ifclassq * ifcq,uint8_t grp_idx)994 ifclassq_set_grp_separated(struct ifclassq *ifcq, uint8_t grp_idx)
995 {
996 	IFCQ_LOCK(ifcq);
997 	VERIFY(ifcq->ifcq_disc != NULL);
998 	VERIFY(ifcq->ifcq_type == PKTSCHEDT_FQ_CODEL);
999 
1000 	fq_if_set_grp_separated(ifcq, grp_idx);
1001 	IFCQ_UNLOCK(ifcq);
1002 }
1003