xref: /xnu-11417.140.69/bsd/net/classq/classq_subr.c (revision 43a90889846e00bfb5cf1d255cdc0a701a1e05a4)
1 /*
2  * Copyright (c) 2011-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <sys/cdefs.h>
30 #include <sys/param.h>
31 #include <sys/mbuf.h>
32 #include <sys/errno.h>
33 #include <sys/random.h>
34 #include <sys/kernel_types.h>
35 #include <sys/sysctl.h>
36 
37 #include <kern/zalloc.h>
38 
39 #include <net/if.h>
40 #include <net/net_osdep.h>
41 #include <net/classq/classq.h>
42 #include <pexpert/pexpert.h>
43 #include <net/classq/classq_sfb.h>
44 #include <net/classq/classq_fq_codel.h>
45 #include <net/pktsched/pktsched.h>
46 #include <net/pktsched/pktsched_fq_codel.h>
47 #include <net/flowadv.h>
48 
49 #include <libkern/libkern.h>
50 
51 #if SKYWALK
52 #include <skywalk/os_skywalk_private.h>
53 #include <skywalk/nexus/netif/nx_netif.h>
54 #endif /* SKYWALK */
55 
56 static errno_t ifclassq_dequeue_common(struct ifclassq *, mbuf_svc_class_t,
57     u_int32_t, u_int32_t, classq_pkt_t *, classq_pkt_t *, u_int32_t *,
58     u_int32_t *, boolean_t, u_int8_t);
59 static void ifclassq_tbr_dequeue_common(struct ifclassq *, mbuf_svc_class_t,
60     boolean_t, classq_pkt_t *, u_int8_t);
61 
62 static uint64_t ifclassq_def_c_target_qdelay = 0;
63 SYSCTL_QUAD(_net_classq, OID_AUTO, def_c_target_qdelay, CTLFLAG_RW | CTLFLAG_LOCKED,
64     &ifclassq_def_c_target_qdelay, "def classic target queue delay in nanoseconds");
65 
66 static uint64_t ifclassq_def_c_update_interval = 0;
67 SYSCTL_QUAD(_net_classq, OID_AUTO, def_c_update_interval,
68     CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_def_c_update_interval,
69     "def classic update interval in nanoseconds");
70 
71 static uint64_t ifclassq_def_l4s_target_qdelay = 0;
72 SYSCTL_QUAD(_net_classq, OID_AUTO, def_l4s_target_qdelay, CTLFLAG_RW | CTLFLAG_LOCKED,
73     &ifclassq_def_l4s_target_qdelay, "def L4S target queue delay in nanoseconds");
74 
75 static uint64_t ifclassq_def_l4s_update_interval = 0;
76 SYSCTL_QUAD(_net_classq, OID_AUTO, def_l4s_update_interval,
77     CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_def_l4s_update_interval,
78     "def L4S update interval in nanoseconds");
79 
80 static uint64_t ifclassq_ll_c_target_qdelay = 0;
81 SYSCTL_QUAD(_net_classq, OID_AUTO, ll_c_target_qdelay, CTLFLAG_RW | CTLFLAG_LOCKED,
82     &ifclassq_ll_c_target_qdelay, "low latency classic target queue delay in nanoseconds");
83 
84 static uint64_t ifclassq_ll_c_update_interval = 0;
85 SYSCTL_QUAD(_net_classq, OID_AUTO, ll_c_update_interval,
86     CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_ll_c_update_interval,
87     "low latency classic update interval in nanoseconds");
88 
89 static uint64_t ifclassq_ll_l4s_target_qdelay = 0;
90 SYSCTL_QUAD(_net_classq, OID_AUTO, ll_l4s_target_qdelay, CTLFLAG_RW | CTLFLAG_LOCKED,
91     &ifclassq_ll_l4s_target_qdelay, "low latency L4S target queue delay in nanoseconds");
92 
93 static uint64_t ifclassq_ll_l4s_update_interval = 0;
94 SYSCTL_QUAD(_net_classq, OID_AUTO, ll_l4s_update_interval,
95     CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_ll_l4s_update_interval,
96     "low latency L4S update interval in nanoseconds");
97 
98 uint32_t ifclassq_enable_l4s = 1;
99 SYSCTL_UINT(_net_classq, OID_AUTO, enable_l4s,
100     CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_enable_l4s, 0,
101     "enable/disable L4S");
102 
103 #if DEBUG || DEVELOPMENT
104 uint32_t ifclassq_flow_control_adv = 1; /* flow control advisory */
105 SYSCTL_UINT(_net_classq, OID_AUTO, flow_control_adv,
106     CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_flow_control_adv, 1,
107     "enable/disable flow control advisory");
108 
109 uint32_t fq_codel_quantum = 0;
110 #endif /* DEBUG || DEVELOPMENT */
111 
112 static KALLOC_TYPE_DEFINE(ifcq_zone, struct ifclassq, NET_KT_DEFAULT);
113 LCK_ATTR_DECLARE(ifcq_lock_attr, 0, 0);
114 static LCK_GRP_DECLARE(ifcq_lock_group, "ifclassq locks");
115 
116 void
classq_init(void)117 classq_init(void)
118 {
119 	_CASSERT(MBUF_TC_BE == 0);
120 	_CASSERT(MBUF_SC_BE == 0);
121 	_CASSERT(IFCQ_SC_MAX == MBUF_SC_MAX_CLASSES);
122 #if DEBUG || DEVELOPMENT
123 	PE_parse_boot_argn("fq_codel_quantum", &fq_codel_quantum,
124 	    sizeof(fq_codel_quantum));
125 	PE_parse_boot_argn("ifclassq_def_c_target_qdelay", &ifclassq_def_c_target_qdelay,
126 	    sizeof(ifclassq_def_c_target_qdelay));
127 	PE_parse_boot_argn("ifclassq_def_c_update_interval",
128 	    &ifclassq_def_c_update_interval, sizeof(ifclassq_def_c_update_interval));
129 	PE_parse_boot_argn("ifclassq_def_l4s_target_qdelay", &ifclassq_def_l4s_target_qdelay,
130 	    sizeof(ifclassq_def_l4s_target_qdelay));
131 	PE_parse_boot_argn("ifclassq_def_l4s_update_interval",
132 	    &ifclassq_def_l4s_update_interval, sizeof(ifclassq_def_l4s_update_interval));
133 	PE_parse_boot_argn("ifclassq_ll_c_target_qdelay", &ifclassq_ll_c_target_qdelay,
134 	    sizeof(ifclassq_ll_c_target_qdelay));
135 	PE_parse_boot_argn("ifclassq_ll_c_update_interval",
136 	    &ifclassq_ll_c_update_interval, sizeof(ifclassq_ll_c_update_interval));
137 	PE_parse_boot_argn("ifclassq_ll_l4s_target_qdelay", &ifclassq_ll_l4s_target_qdelay,
138 	    sizeof(ifclassq_ll_l4s_target_qdelay));
139 	PE_parse_boot_argn("ifclassq_ll_l4s_update_interval",
140 	    &ifclassq_ll_l4s_update_interval, sizeof(ifclassq_ll_l4s_update_interval));
141 #endif /* DEBUG || DEVELOPMENT */
142 	fq_codel_init();
143 }
144 
145 int
ifclassq_setup(struct ifclassq * ifq,struct ifnet * ifp,uint32_t sflags)146 ifclassq_setup(struct ifclassq *ifq, struct ifnet *ifp, uint32_t sflags)
147 {
148 	int err = 0;
149 
150 	IFCQ_LOCK(ifq);
151 	VERIFY(IFCQ_IS_EMPTY(ifq));
152 	ifq->ifcq_ifp = ifp;
153 	IFCQ_LEN(ifq) = 0;
154 	IFCQ_BYTES(ifq) = 0;
155 	bzero(&ifq->ifcq_xmitcnt, sizeof(ifq->ifcq_xmitcnt));
156 	bzero(&ifq->ifcq_dropcnt, sizeof(ifq->ifcq_dropcnt));
157 
158 	VERIFY(!IFCQ_TBR_IS_ENABLED(ifq));
159 	VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
160 	VERIFY(ifq->ifcq_flags == 0);
161 	VERIFY(ifq->ifcq_sflags == 0);
162 	VERIFY(ifq->ifcq_disc == NULL);
163 
164 	if (ifp->if_eflags & IFEF_TXSTART) {
165 		u_int32_t maxlen = 0;
166 
167 		if ((maxlen = IFCQ_MAXLEN(ifq)) == 0) {
168 			maxlen = if_sndq_maxlen;
169 		}
170 		IFCQ_SET_MAXLEN(ifq, maxlen);
171 
172 		if (IFCQ_MAXLEN(ifq) != if_sndq_maxlen &&
173 		    IFCQ_TARGET_QDELAY(ifq) == 0) {
174 			/*
175 			 * Choose static queues because the interface has
176 			 * maximum queue size set
177 			 */
178 			sflags &= ~PKTSCHEDF_QALG_DELAYBASED;
179 		}
180 		ifq->ifcq_sflags = sflags;
181 		err = ifclassq_pktsched_setup(ifq);
182 		if (err == 0) {
183 			ifq->ifcq_flags = (IFCQF_READY | IFCQF_ENABLED);
184 		}
185 	}
186 	IFCQ_UNLOCK(ifq);
187 	return err;
188 }
189 
190 void
ifclassq_teardown(struct ifclassq * ifq)191 ifclassq_teardown(struct ifclassq *ifq)
192 {
193 	IFCQ_LOCK(ifq);
194 	if (IFCQ_IS_DESTROYED(ifq)) {
195 		ASSERT((ifq->ifcq_flags & ~IFCQF_DESTROYED) == 0);
196 		goto done;
197 	}
198 	if (IFCQ_IS_READY(ifq)) {
199 		if (IFCQ_TBR_IS_ENABLED(ifq)) {
200 			struct tb_profile tb =
201 			{ .rate = 0, .percent = 0, .depth = 0 };
202 			(void) ifclassq_tbr_set(ifq, &tb, FALSE);
203 		}
204 		pktsched_teardown(ifq);
205 		ifq->ifcq_flags &= ~IFCQF_READY;
206 	}
207 	ifq->ifcq_sflags = 0;
208 	VERIFY(IFCQ_IS_EMPTY(ifq));
209 	VERIFY(!IFCQ_TBR_IS_ENABLED(ifq));
210 	VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
211 	VERIFY(ifq->ifcq_flags == 0);
212 	VERIFY(ifq->ifcq_sflags == 0);
213 	VERIFY(ifq->ifcq_disc == NULL);
214 	IFCQ_LEN(ifq) = 0;
215 	IFCQ_BYTES(ifq) = 0;
216 	IFCQ_MAXLEN(ifq) = 0;
217 	bzero(&ifq->ifcq_xmitcnt, sizeof(ifq->ifcq_xmitcnt));
218 	bzero(&ifq->ifcq_dropcnt, sizeof(ifq->ifcq_dropcnt));
219 	ifq->ifcq_flags |= IFCQF_DESTROYED;
220 done:
221 	IFCQ_UNLOCK(ifq);
222 }
223 
224 int
ifclassq_pktsched_setup(struct ifclassq * ifq)225 ifclassq_pktsched_setup(struct ifclassq *ifq)
226 {
227 	struct ifnet *ifp = ifq->ifcq_ifp;
228 	classq_pkt_type_t ptype = QP_MBUF;
229 	int err = 0;
230 
231 	IFCQ_LOCK_ASSERT_HELD(ifq);
232 	VERIFY(ifp->if_eflags & IFEF_TXSTART);
233 #if SKYWALK
234 	ptype = ((ifp->if_eflags & IFEF_SKYWALK_NATIVE) != 0) ? QP_PACKET :
235 	    QP_MBUF;
236 #endif /* SKYWALK */
237 
238 	err = pktsched_setup(ifq, PKTSCHEDT_FQ_CODEL, ifq->ifcq_sflags, ptype);
239 
240 	return err;
241 }
242 
243 void
ifclassq_set_maxlen(struct ifclassq * ifq,u_int32_t maxqlen)244 ifclassq_set_maxlen(struct ifclassq *ifq, u_int32_t maxqlen)
245 {
246 	IFCQ_LOCK(ifq);
247 	if (maxqlen == 0) {
248 		maxqlen = if_sndq_maxlen;
249 	}
250 	IFCQ_SET_MAXLEN(ifq, maxqlen);
251 	IFCQ_UNLOCK(ifq);
252 }
253 
254 u_int32_t
ifclassq_get_maxlen(struct ifclassq * ifq)255 ifclassq_get_maxlen(struct ifclassq *ifq)
256 {
257 	return IFCQ_MAXLEN(ifq);
258 }
259 
260 int
ifclassq_get_len(struct ifclassq * ifq,mbuf_svc_class_t sc,u_int8_t grp_idx,u_int32_t * packets,u_int32_t * bytes)261 ifclassq_get_len(struct ifclassq *ifq, mbuf_svc_class_t sc, u_int8_t grp_idx,
262     u_int32_t *packets, u_int32_t *bytes)
263 {
264 	int err = 0;
265 
266 	IFCQ_LOCK(ifq);
267 	if ((ifq->ifcq_flags & (IFCQF_READY | IFCQF_ENABLED)) !=
268 	    (IFCQF_READY | IFCQF_ENABLED)) {
269 		return ENXIO;
270 	}
271 	if (sc == MBUF_SC_UNSPEC && grp_idx == IF_CLASSQ_ALL_GRPS) {
272 		VERIFY(packets != NULL);
273 		if (fq_if_is_all_paced(ifq)) {
274 			*packets = 0;
275 		} else {
276 			*packets = IFCQ_LEN(ifq);
277 		}
278 	} else {
279 		cqrq_stat_sc_t req = { sc, grp_idx, 0, 0 };
280 
281 		VERIFY(MBUF_VALID_SC(sc) || sc == MBUF_SC_UNSPEC);
282 
283 		err = fq_if_request_classq(ifq, CLASSQRQ_STAT_SC, &req);
284 		if (packets != NULL) {
285 			*packets = req.packets;
286 		}
287 		if (bytes != NULL) {
288 			*bytes = req.bytes;
289 		}
290 	}
291 	KDBG(AQM_KTRACE_STATS_GET_QLEN, ifq->ifcq_ifp->if_index,
292 	    packets ? *packets : 0, bytes ? *bytes : 0, fq_if_is_all_paced(ifq));
293 
294 	IFCQ_UNLOCK(ifq);
295 
296 #if SKYWALK
297 	struct ifnet *ifp = ifq->ifcq_ifp;
298 
299 	if (__improbable(ifp->if_na_ops != NULL &&
300 	    ifp->if_na_ops->ni_get_len != NULL)) {
301 		err = ifp->if_na_ops->ni_get_len(ifp->if_na, sc, packets,
302 		    bytes, err);
303 	}
304 #endif /* SKYWALK */
305 
306 	return err;
307 }
308 
309 inline void
ifclassq_set_packet_metadata(struct ifclassq * ifq,struct ifnet * ifp,classq_pkt_t * p)310 ifclassq_set_packet_metadata(struct ifclassq *ifq, struct ifnet *ifp,
311     classq_pkt_t *p)
312 {
313 	if (!IFNET_IS_CELLULAR(ifp)) {
314 		return;
315 	}
316 
317 	switch (p->cp_ptype) {
318 	case QP_MBUF: {
319 		struct mbuf *m = p->cp_mbuf;
320 		m->m_pkthdr.pkt_flags |= PKTF_VALID_UNSENT_DATA;
321 		m->m_pkthdr.bufstatus_if = IFCQ_BYTES(ifq);
322 		m->m_pkthdr.bufstatus_sndbuf = (uint32_t)ifp->if_sndbyte_unsent;
323 		break;
324 	}
325 
326 #if SKYWALK
327 	case QP_PACKET:
328 		/*
329 		 * Support for equivalent of mbuf_get_unsent_data_bytes()
330 		 * is not needed in the Skywalk architecture.
331 		 */
332 		break;
333 #endif /* SKYWALK */
334 
335 	default:
336 		VERIFY(0);
337 		/* NOTREACHED */
338 		__builtin_unreachable();
339 	}
340 }
341 
342 errno_t
ifclassq_enqueue(struct ifclassq * ifq,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t cnt,u_int32_t bytes,boolean_t * pdrop)343 ifclassq_enqueue(struct ifclassq *ifq, classq_pkt_t *head, classq_pkt_t *tail,
344     u_int32_t cnt, u_int32_t bytes, boolean_t *pdrop)
345 {
346 	return fq_if_enqueue_classq(ifq, head, tail, cnt, bytes, pdrop);
347 }
348 
349 errno_t
ifclassq_dequeue(struct ifclassq * ifq,u_int32_t pkt_limit,u_int32_t byte_limit,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t * cnt,u_int32_t * len,u_int8_t grp_idx)350 ifclassq_dequeue(struct ifclassq *ifq, u_int32_t pkt_limit,
351     u_int32_t byte_limit, classq_pkt_t *head, classq_pkt_t *tail,
352     u_int32_t *cnt, u_int32_t *len, u_int8_t grp_idx)
353 {
354 	return ifclassq_dequeue_common(ifq, MBUF_SC_UNSPEC, pkt_limit,
355 	           byte_limit, head, tail, cnt, len, FALSE, grp_idx);
356 }
357 
358 errno_t
ifclassq_dequeue_sc(struct ifclassq * ifq,mbuf_svc_class_t sc,u_int32_t pkt_limit,u_int32_t byte_limit,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t * cnt,u_int32_t * len,u_int8_t grp_idx)359 ifclassq_dequeue_sc(struct ifclassq *ifq, mbuf_svc_class_t sc,
360     u_int32_t pkt_limit, u_int32_t byte_limit, classq_pkt_t *head,
361     classq_pkt_t *tail, u_int32_t *cnt, u_int32_t *len, u_int8_t grp_idx)
362 {
363 	return ifclassq_dequeue_common(ifq, sc, pkt_limit, byte_limit,
364 	           head, tail, cnt, len, TRUE, grp_idx);
365 }
366 
367 static errno_t
ifclassq_dequeue_common_default(struct ifclassq * ifq,mbuf_svc_class_t sc,u_int32_t pkt_limit,u_int32_t byte_limit,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t * cnt,u_int32_t * len,boolean_t drvmgt,u_int8_t grp_idx)368 ifclassq_dequeue_common_default(struct ifclassq *ifq, mbuf_svc_class_t sc,
369     u_int32_t pkt_limit, u_int32_t byte_limit, classq_pkt_t *head,
370     classq_pkt_t *tail, u_int32_t *cnt, u_int32_t *len, boolean_t drvmgt,
371     u_int8_t grp_idx)
372 {
373 	struct ifnet *ifp = ifq->ifcq_ifp;
374 	u_int32_t i = 0, l = 0;
375 	classq_pkt_t first = CLASSQ_PKT_INITIALIZER(first);
376 	classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
377 
378 	VERIFY(!drvmgt || MBUF_VALID_SC(sc));
379 
380 	if (IFCQ_TBR_IS_ENABLED(ifq)) {
381 		goto dequeue_loop;
382 	}
383 
384 	/*
385 	 * If the scheduler support dequeueing multiple packets at the
386 	 * same time, call that one instead.
387 	 */
388 	if (drvmgt) {
389 		int err;
390 
391 		IFCQ_LOCK_SPIN(ifq);
392 		err = fq_if_dequeue_sc_classq_multi(ifq, sc, pkt_limit,
393 		    byte_limit, head, tail, cnt, len, grp_idx);
394 		IFCQ_UNLOCK(ifq);
395 
396 		if (err == 0 && head->cp_mbuf == NULL) {
397 			err = EAGAIN;
398 		}
399 		return err;
400 	} else {
401 		int err;
402 
403 		IFCQ_LOCK_SPIN(ifq);
404 		err = fq_if_dequeue_classq_multi(ifq, pkt_limit, byte_limit,
405 		    head, tail, cnt, len, grp_idx);
406 		IFCQ_UNLOCK(ifq);
407 
408 		if (err == 0 && head->cp_mbuf == NULL) {
409 			err = EAGAIN;
410 		}
411 		return err;
412 	}
413 
414 dequeue_loop:
415 	VERIFY(IFCQ_TBR_IS_ENABLED(ifq));
416 	IFCQ_LOCK_SPIN(ifq);
417 
418 	while (i < pkt_limit && l < byte_limit) {
419 		if (drvmgt) {
420 			IFCQ_TBR_DEQUEUE_SC(ifq, sc, head, grp_idx);
421 		} else {
422 			IFCQ_TBR_DEQUEUE(ifq, head, grp_idx);
423 		}
424 
425 		if (head->cp_mbuf == NULL) {
426 			break;
427 		}
428 
429 		if (first.cp_mbuf == NULL) {
430 			first = *head;
431 		}
432 
433 		switch (head->cp_ptype) {
434 		case QP_MBUF:
435 			head->cp_mbuf->m_nextpkt = NULL;
436 			l += head->cp_mbuf->m_pkthdr.len;
437 			ifclassq_set_packet_metadata(ifq, ifp, head);
438 			if (last.cp_mbuf != NULL) {
439 				last.cp_mbuf->m_nextpkt = head->cp_mbuf;
440 			}
441 			break;
442 
443 #if SKYWALK
444 		case QP_PACKET:
445 			head->cp_kpkt->pkt_nextpkt = NULL;
446 			l += head->cp_kpkt->pkt_length;
447 			ifclassq_set_packet_metadata(ifq, ifp, head);
448 			if (last.cp_kpkt != NULL) {
449 				last.cp_kpkt->pkt_nextpkt = head->cp_kpkt;
450 			}
451 			break;
452 #endif /* SKYWALK */
453 
454 		default:
455 			VERIFY(0);
456 			/* NOTREACHED */
457 			__builtin_unreachable();
458 		}
459 
460 		last = *head;
461 		i++;
462 	}
463 
464 	IFCQ_UNLOCK(ifq);
465 
466 	if (tail != NULL) {
467 		*tail = last;
468 	}
469 	if (cnt != NULL) {
470 		*cnt = i;
471 	}
472 	if (len != NULL) {
473 		*len = l;
474 	}
475 
476 	*head = first;
477 	return (first.cp_mbuf != NULL) ? 0 : EAGAIN;
478 }
479 
480 static errno_t
ifclassq_dequeue_common(struct ifclassq * ifq,mbuf_svc_class_t sc,u_int32_t pkt_limit,u_int32_t byte_limit,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t * cnt,u_int32_t * len,boolean_t drvmgt,u_int8_t grp_idx)481 ifclassq_dequeue_common(struct ifclassq *ifq, mbuf_svc_class_t sc,
482     u_int32_t pkt_limit, u_int32_t byte_limit, classq_pkt_t *head,
483     classq_pkt_t *tail, u_int32_t *cnt, u_int32_t *len, boolean_t drvmgt,
484     u_int8_t grp_idx)
485 {
486 #if SKYWALK
487 	struct ifnet *ifp = ifq->ifcq_ifp;
488 
489 	if (__improbable(ifp->if_na_ops != NULL &&
490 	    ifp->if_na_ops->ni_dequeue != NULL)) {
491 		/*
492 		 * TODO:
493 		 * We should be changing the pkt/byte limit to the
494 		 * available space in the next filter. But this is not
495 		 * useful until we can flow control the whole chain of
496 		 * filters.
497 		 */
498 		errno_t err = ifclassq_dequeue_common_default(ifq, sc,
499 		    pkt_limit, byte_limit, head, tail, cnt, len, drvmgt, grp_idx);
500 
501 		return ifp->if_na_ops->ni_dequeue(ifp->if_na, sc, pkt_limit,
502 		           byte_limit, head, tail, cnt, len, drvmgt, err);
503 	}
504 #endif /* SKYWALK */
505 	return ifclassq_dequeue_common_default(ifq, sc,
506 	           pkt_limit, byte_limit, head, tail, cnt, len, drvmgt, grp_idx);
507 }
508 
509 void
ifclassq_update(struct ifclassq * ifq,cqev_t ev)510 ifclassq_update(struct ifclassq *ifq, cqev_t ev)
511 {
512 	void *ev_p = (void *)&ev;
513 
514 	IFCQ_LOCK_ASSERT_HELD(ifq);
515 	VERIFY(IFCQ_IS_READY(ifq));
516 
517 	fq_if_request_classq(ifq, CLASSQRQ_EVENT, ev_p);
518 }
519 
520 int
ifclassq_attach(struct ifclassq * ifq,u_int32_t type,void * discipline)521 ifclassq_attach(struct ifclassq *ifq, u_int32_t type, void *discipline)
522 {
523 	IFCQ_LOCK_ASSERT_HELD(ifq);
524 	VERIFY(ifq->ifcq_disc == NULL);
525 	ifq->ifcq_type = type;
526 	ifq->ifcq_disc = discipline;
527 	return 0;
528 }
529 
530 void
ifclassq_detach(struct ifclassq * ifq)531 ifclassq_detach(struct ifclassq *ifq)
532 {
533 	IFCQ_LOCK_ASSERT_HELD(ifq);
534 	VERIFY(ifq->ifcq_disc == NULL);
535 	ifq->ifcq_type = PKTSCHEDT_NONE;
536 }
537 
538 int
ifclassq_getqstats(struct ifclassq * ifq,u_int8_t gid,u_int32_t qid,void * ubuf,u_int32_t * nbytes)539 ifclassq_getqstats(struct ifclassq *ifq, u_int8_t gid, u_int32_t qid, void *ubuf,
540     u_int32_t *nbytes)
541 {
542 	struct if_ifclassq_stats *ifqs;
543 	int err;
544 
545 	if (*nbytes < sizeof(*ifqs)) {
546 		return EINVAL;
547 	}
548 
549 	ifqs = kalloc_type(struct if_ifclassq_stats,
550 	    Z_WAITOK | Z_ZERO | Z_NOFAIL);
551 
552 	IFCQ_LOCK(ifq);
553 	if (!IFCQ_IS_READY(ifq)) {
554 		IFCQ_UNLOCK(ifq);
555 		kfree_type(struct if_ifclassq_stats, ifqs);
556 		return ENXIO;
557 	}
558 
559 	ifqs->ifqs_len = IFCQ_LEN(ifq);
560 	ifqs->ifqs_maxlen = IFCQ_MAXLEN(ifq);
561 	*(&ifqs->ifqs_xmitcnt) = *(&ifq->ifcq_xmitcnt);
562 	*(&ifqs->ifqs_dropcnt) = *(&ifq->ifcq_dropcnt);
563 	ifqs->ifqs_scheduler = ifq->ifcq_type;
564 	ifqs->ifqs_doorbells = ifq->ifcq_doorbells;
565 
566 	err = pktsched_getqstats(ifq, gid, qid, ifqs);
567 	IFCQ_UNLOCK(ifq);
568 
569 	if (err == 0 && (err = copyout(ifqs,
570 	    (user_addr_t)(uintptr_t)ubuf, sizeof(*ifqs))) == 0) {
571 		*nbytes = sizeof(*ifqs);
572 	}
573 
574 	kfree_type(struct if_ifclassq_stats, ifqs);
575 
576 	return err;
577 }
578 
579 const char *__null_terminated
ifclassq_ev2str(cqev_t ev)580 ifclassq_ev2str(cqev_t ev)
581 {
582 	const char *__null_terminated c = "";
583 
584 	switch (ev) {
585 	case CLASSQ_EV_LINK_BANDWIDTH:
586 		c = "LINK_BANDWIDTH";
587 		break;
588 
589 	case CLASSQ_EV_LINK_LATENCY:
590 		c = "LINK_LATENCY";
591 		break;
592 
593 	case CLASSQ_EV_LINK_MTU:
594 		c = "LINK_MTU";
595 		break;
596 
597 	case CLASSQ_EV_LINK_UP:
598 		c = "LINK_UP";
599 		break;
600 
601 	case CLASSQ_EV_LINK_DOWN:
602 		c = "LINK_DOWN";
603 		break;
604 
605 	default:
606 		c = "UNKNOWN";
607 		break;
608 	}
609 
610 	return c;
611 }
612 
613 /*
614  * internal representation of token bucket parameters
615  *	rate:	byte_per_unittime << 32
616  *		(((bits_per_sec) / 8) << 32) / machclk_freq
617  *	depth:	byte << 32
618  *
619  */
620 #define TBR_SHIFT       32
621 #define TBR_SCALE(x)    ((int64_t)(x) << TBR_SHIFT)
622 #define TBR_UNSCALE(x)  ((x) >> TBR_SHIFT)
623 
624 void
ifclassq_tbr_dequeue(struct ifclassq * ifq,classq_pkt_t * pkt,u_int8_t grp_idx)625 ifclassq_tbr_dequeue(struct ifclassq *ifq, classq_pkt_t *pkt, u_int8_t grp_idx)
626 {
627 	ifclassq_tbr_dequeue_common(ifq, MBUF_SC_UNSPEC, FALSE, pkt, grp_idx);
628 }
629 
630 void
ifclassq_tbr_dequeue_sc(struct ifclassq * ifq,mbuf_svc_class_t sc,classq_pkt_t * pkt,u_int8_t grp_idx)631 ifclassq_tbr_dequeue_sc(struct ifclassq *ifq, mbuf_svc_class_t sc,
632     classq_pkt_t *pkt, u_int8_t grp_idx)
633 {
634 	ifclassq_tbr_dequeue_common(ifq, sc, TRUE, pkt, grp_idx);
635 }
636 
637 static void
ifclassq_tbr_dequeue_common(struct ifclassq * ifq,mbuf_svc_class_t sc,boolean_t drvmgt,classq_pkt_t * pkt,u_int8_t grp_idx)638 ifclassq_tbr_dequeue_common(struct ifclassq *ifq, mbuf_svc_class_t sc,
639     boolean_t drvmgt, classq_pkt_t *pkt, u_int8_t grp_idx)
640 {
641 	struct tb_regulator *tbr;
642 	int64_t interval;
643 	u_int64_t now;
644 
645 	IFCQ_LOCK_ASSERT_HELD(ifq);
646 
647 	VERIFY(!drvmgt || MBUF_VALID_SC(sc));
648 	VERIFY(IFCQ_TBR_IS_ENABLED(ifq));
649 
650 	*pkt = CLASSQ_PKT_INITIALIZER(*pkt);
651 	tbr = &ifq->ifcq_tbr;
652 	/* update token only when it is negative */
653 	if (tbr->tbr_token <= 0) {
654 		now = read_machclk();
655 		interval = now - tbr->tbr_last;
656 		if (interval >= tbr->tbr_filluptime) {
657 			tbr->tbr_token = tbr->tbr_depth;
658 		} else {
659 			tbr->tbr_token += interval * tbr->tbr_rate;
660 			if (tbr->tbr_token > tbr->tbr_depth) {
661 				tbr->tbr_token = tbr->tbr_depth;
662 			}
663 		}
664 		tbr->tbr_last = now;
665 	}
666 	/* if token is still negative, don't allow dequeue */
667 	if (tbr->tbr_token <= 0) {
668 		return;
669 	}
670 
671 	/*
672 	 * ifclassq takes precedence over ALTQ queue;
673 	 * ifcq_drain count is adjusted by the caller.
674 	 */
675 	if (drvmgt) {
676 		fq_if_dequeue_sc_classq(ifq, sc, pkt, grp_idx);
677 	} else {
678 		fq_if_dequeue_classq(ifq, pkt, grp_idx);
679 	}
680 
681 	if (pkt->cp_mbuf != NULL) {
682 		switch (pkt->cp_ptype) {
683 		case QP_MBUF:
684 			tbr->tbr_token -= TBR_SCALE(m_pktlen(pkt->cp_mbuf));
685 			break;
686 
687 #if SKYWALK
688 		case QP_PACKET:
689 			tbr->tbr_token -=
690 			    TBR_SCALE(pkt->cp_kpkt->pkt_length);
691 			break;
692 #endif /* SKYWALK */
693 
694 		default:
695 			VERIFY(0);
696 			/* NOTREACHED */
697 		}
698 	}
699 }
700 
701 /*
702  * set a token bucket regulator.
703  * if the specified rate is zero, the token bucket regulator is deleted.
704  */
705 int
ifclassq_tbr_set(struct ifclassq * ifq,struct tb_profile * profile,boolean_t update)706 ifclassq_tbr_set(struct ifclassq *ifq, struct tb_profile *profile,
707     boolean_t update)
708 {
709 	struct tb_regulator *tbr;
710 	struct ifnet *ifp = ifq->ifcq_ifp;
711 	u_int64_t rate, old_rate;
712 
713 	IFCQ_LOCK_ASSERT_HELD(ifq);
714 	VERIFY(IFCQ_IS_READY(ifq));
715 
716 	VERIFY(machclk_freq != 0);
717 
718 	tbr = &ifq->ifcq_tbr;
719 	old_rate = tbr->tbr_rate_raw;
720 
721 	rate = profile->rate;
722 	if (profile->percent > 0) {
723 		u_int64_t eff_rate;
724 
725 		if (profile->percent > 100) {
726 			return EINVAL;
727 		}
728 		if ((eff_rate = ifp->if_output_bw.eff_bw) == 0) {
729 			return ENODEV;
730 		}
731 		rate = (eff_rate * profile->percent) / 100;
732 	}
733 
734 	if (rate == 0) {
735 		if (!IFCQ_TBR_IS_ENABLED(ifq)) {
736 			return 0;
737 		}
738 
739 		if (pktsched_verbose) {
740 			printf("%s: TBR disabled\n", if_name(ifp));
741 		}
742 
743 		/* disable this TBR */
744 		ifq->ifcq_flags &= ~IFCQF_TBR;
745 		bzero(tbr, sizeof(*tbr));
746 		ifnet_set_start_cycle(ifp, NULL);
747 		if (update) {
748 			ifclassq_update(ifq, CLASSQ_EV_LINK_BANDWIDTH);
749 		}
750 		return 0;
751 	}
752 
753 	if (pktsched_verbose) {
754 		printf("%s: TBR %s (rate %llu bps depth %u)\n", if_name(ifp),
755 		    (ifq->ifcq_flags & IFCQF_TBR) ? "reconfigured" :
756 		    "enabled", rate, profile->depth);
757 	}
758 
759 	/* set the new TBR */
760 	bzero(tbr, sizeof(*tbr));
761 	tbr->tbr_rate_raw = rate;
762 	tbr->tbr_percent = profile->percent;
763 	ifq->ifcq_flags |= IFCQF_TBR;
764 
765 	/*
766 	 * Note that the TBR fill up time (hence the ifnet restart time)
767 	 * is directly related to the specified TBR depth.  The ideal
768 	 * depth value should be computed such that the interval time
769 	 * between each successive wakeup is adequately spaced apart,
770 	 * in order to reduce scheduling overheads.  A target interval
771 	 * of 10 ms seems to provide good performance balance.  This can be
772 	 * overridden by specifying the depth profile.  Values smaller than
773 	 * the ideal depth will reduce delay at the expense of CPU cycles.
774 	 */
775 	tbr->tbr_rate = TBR_SCALE(rate / 8) / machclk_freq;
776 	if (tbr->tbr_rate > 0) {
777 		u_int32_t mtu = ifp->if_mtu;
778 		int64_t ival, idepth = 0;
779 		int i;
780 
781 		if (mtu < IF_MINMTU) {
782 			mtu = IF_MINMTU;
783 		}
784 
785 		ival = pktsched_nsecs_to_abstime(10 * NSEC_PER_MSEC); /* 10ms */
786 
787 		for (i = 1;; i++) {
788 			idepth = TBR_SCALE(i * mtu);
789 			if ((idepth / tbr->tbr_rate) > ival) {
790 				break;
791 			}
792 		}
793 		VERIFY(idepth > 0);
794 
795 		tbr->tbr_depth = TBR_SCALE(profile->depth);
796 		if (tbr->tbr_depth == 0) {
797 			tbr->tbr_filluptime = idepth / tbr->tbr_rate;
798 			/* a little fudge factor to get closer to rate */
799 			tbr->tbr_depth = idepth + (idepth >> 3);
800 		} else {
801 			tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate;
802 		}
803 	} else {
804 		tbr->tbr_depth = TBR_SCALE(profile->depth);
805 		tbr->tbr_filluptime = 0xffffffffffffffffLL;
806 	}
807 	tbr->tbr_token = tbr->tbr_depth;
808 	tbr->tbr_last = read_machclk();
809 
810 	if (tbr->tbr_rate > 0 && (ifp->if_flags & IFF_UP)) {
811 		struct timespec ts =
812 		{ 0, (long)pktsched_abs_to_nsecs(tbr->tbr_filluptime) };
813 		if (pktsched_verbose) {
814 			printf("%s: TBR calculated tokens %lld "
815 			    "filluptime %llu ns\n", if_name(ifp),
816 			    TBR_UNSCALE(tbr->tbr_token),
817 			    pktsched_abs_to_nsecs(tbr->tbr_filluptime));
818 		}
819 		ifnet_set_start_cycle(ifp, &ts);
820 	} else {
821 		if (pktsched_verbose) {
822 			if (tbr->tbr_rate == 0) {
823 				printf("%s: TBR calculated tokens %lld "
824 				    "infinite filluptime\n", if_name(ifp),
825 				    TBR_UNSCALE(tbr->tbr_token));
826 			} else if (!(ifp->if_flags & IFF_UP)) {
827 				printf("%s: TBR suspended (link is down)\n",
828 				    if_name(ifp));
829 			}
830 		}
831 		ifnet_set_start_cycle(ifp, NULL);
832 	}
833 	if (update && tbr->tbr_rate_raw != old_rate) {
834 		ifclassq_update(ifq, CLASSQ_EV_LINK_BANDWIDTH);
835 	}
836 
837 	return 0;
838 }
839 
840 void
ifclassq_calc_target_qdelay(struct ifnet * ifp,uint64_t * if_target_qdelay,uint32_t flags)841 ifclassq_calc_target_qdelay(struct ifnet *ifp, uint64_t *if_target_qdelay,
842     uint32_t flags)
843 {
844 	uint64_t qdelay = 0, qdelay_configed = 0, qdely_default = 0;
845 	if (flags == IF_CLASSQ_DEF) {
846 		qdelay = IFCQ_TARGET_QDELAY(ifp->if_snd);
847 	}
848 
849 	switch (flags) {
850 	case IF_CLASSQ_DEF:
851 		qdelay_configed = ifclassq_def_c_target_qdelay;
852 		qdely_default = IFQ_DEF_C_TARGET_DELAY;
853 		break;
854 	case IF_CLASSQ_L4S:
855 		qdelay_configed = ifclassq_def_l4s_target_qdelay;
856 		if (ifp->if_subfamily == IFNET_SUBFAMILY_WIFI ||
857 		    ifp->if_family == IFNET_FAMILY_CELLULAR) {
858 			qdely_default = IFQ_DEF_L4S_WIRELESS_TARGET_DELAY;
859 		} else {
860 			qdely_default = IFQ_DEF_L4S_TARGET_DELAY;
861 		}
862 		break;
863 	case IF_CLASSQ_LOW_LATENCY:
864 		qdelay_configed = ifclassq_ll_c_target_qdelay;
865 		qdely_default = IFQ_LL_C_TARGET_DELAY;
866 		break;
867 	case (IF_CLASSQ_LOW_LATENCY | IF_CLASSQ_L4S):
868 		qdelay_configed = ifclassq_ll_l4s_target_qdelay;
869 		if (ifp->if_subfamily == IFNET_SUBFAMILY_WIFI ||
870 		    ifp->if_family == IFNET_FAMILY_CELLULAR) {
871 			qdely_default = IFQ_LL_L4S_WIRELESS_TARGET_DELAY;
872 		} else {
873 			qdely_default = IFQ_LL_L4S_TARGET_DELAY;
874 		}
875 		break;
876 	default:
877 		VERIFY(0);
878 		/* NOTREACHED */
879 		__builtin_unreachable();
880 	}
881 
882 	if (qdelay_configed != 0) {
883 		qdelay = qdelay_configed;
884 	}
885 
886 	/*
887 	 * If we do not know the effective bandwidth, use the default
888 	 * target queue delay.
889 	 */
890 	if (qdelay == 0) {
891 		qdelay = qdely_default;
892 	}
893 
894 	/*
895 	 * If a delay has been added to ifnet start callback for
896 	 * coalescing, we have to add that to the pre-set target delay
897 	 * because the packets can be in the queue longer.
898 	 */
899 	if ((ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
900 	    ifp->if_start_delay_timeout > 0) {
901 		qdelay += ifp->if_start_delay_timeout;
902 	}
903 
904 	*(if_target_qdelay) = qdelay;
905 }
906 
907 void
ifclassq_calc_update_interval(uint64_t * update_interval,uint32_t flags)908 ifclassq_calc_update_interval(uint64_t *update_interval, uint32_t flags)
909 {
910 	uint64_t interval = 0, interval_configed = 0, interval_default = 0;
911 
912 	switch (flags) {
913 	case IF_CLASSQ_DEF:
914 		interval_configed = ifclassq_def_c_update_interval;
915 		interval_default = IFQ_DEF_C_UPDATE_INTERVAL;
916 		break;
917 	case IF_CLASSQ_L4S:
918 		interval_configed = ifclassq_def_l4s_update_interval;
919 		interval_default = IFQ_DEF_L4S_UPDATE_INTERVAL;
920 		break;
921 	case IF_CLASSQ_LOW_LATENCY:
922 		interval_configed = ifclassq_ll_c_update_interval;
923 		interval_default = IFQ_LL_C_UPDATE_INTERVAL;
924 		break;
925 	case (IF_CLASSQ_LOW_LATENCY | IF_CLASSQ_L4S):
926 		interval_configed = ifclassq_ll_l4s_update_interval;
927 		interval_default = IFQ_LL_L4S_UPDATE_INTERVAL;
928 		break;
929 	default:
930 		VERIFY(0);
931 		/* NOTREACHED */
932 		__builtin_unreachable();
933 	}
934 
935 	/* If the system level override is set, use it */
936 	if (interval_configed != 0) {
937 		interval = interval_configed;
938 	}
939 
940 	/* Otherwise use the default value */
941 	if (interval == 0) {
942 		interval = interval_default;
943 	}
944 
945 	*update_interval = interval;
946 }
947 
948 struct ifclassq *
ifclassq_alloc(void)949 ifclassq_alloc(void)
950 {
951 	struct ifclassq *ifcq;
952 
953 	ifcq = zalloc_flags(ifcq_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
954 	os_ref_init(&ifcq->ifcq_refcnt, NULL);
955 	os_ref_retain(&ifcq->ifcq_refcnt);
956 	lck_mtx_init(&ifcq->ifcq_lock, &ifcq_lock_group, &ifcq_lock_attr);
957 	return ifcq;
958 }
959 
960 void
ifclassq_retain(struct ifclassq * ifcq)961 ifclassq_retain(struct ifclassq *ifcq)
962 {
963 	os_ref_retain(&ifcq->ifcq_refcnt);
964 }
965 
966 void
ifclassq_release(struct ifclassq ** pifcq)967 ifclassq_release(struct ifclassq **pifcq)
968 {
969 	struct ifclassq *__single ifcq = *pifcq;
970 
971 	*pifcq = NULL;
972 	if (os_ref_release(&ifcq->ifcq_refcnt) == 0) {
973 		ifclassq_teardown(ifcq);
974 		zfree(ifcq_zone, ifcq);
975 	}
976 }
977 
978 int
ifclassq_setup_group(struct ifclassq * ifcq,uint8_t grp_idx,uint8_t flags)979 ifclassq_setup_group(struct ifclassq *ifcq, uint8_t grp_idx, uint8_t flags)
980 {
981 	int err;
982 
983 	IFCQ_LOCK(ifcq);
984 	VERIFY(ifcq->ifcq_disc != NULL);
985 	VERIFY(ifcq->ifcq_type == PKTSCHEDT_FQ_CODEL);
986 
987 	err = fq_if_create_grp(ifcq, grp_idx, flags);
988 	IFCQ_UNLOCK(ifcq);
989 
990 	return err;
991 }
992 
993 void
ifclassq_set_grp_combined(struct ifclassq * ifcq,uint8_t grp_idx)994 ifclassq_set_grp_combined(struct ifclassq *ifcq, uint8_t grp_idx)
995 {
996 	IFCQ_LOCK(ifcq);
997 	VERIFY(ifcq->ifcq_disc != NULL);
998 	VERIFY(ifcq->ifcq_type == PKTSCHEDT_FQ_CODEL);
999 
1000 	fq_if_set_grp_combined(ifcq, grp_idx);
1001 	IFCQ_UNLOCK(ifcq);
1002 }
1003 
1004 void
ifclassq_set_grp_separated(struct ifclassq * ifcq,uint8_t grp_idx)1005 ifclassq_set_grp_separated(struct ifclassq *ifcq, uint8_t grp_idx)
1006 {
1007 	IFCQ_LOCK(ifcq);
1008 	VERIFY(ifcq->ifcq_disc != NULL);
1009 	VERIFY(ifcq->ifcq_type == PKTSCHEDT_FQ_CODEL);
1010 
1011 	fq_if_set_grp_separated(ifcq, grp_idx);
1012 	IFCQ_UNLOCK(ifcq);
1013 }
1014