xref: /xnu-12377.41.6/bsd/net/classq/classq_subr.c (revision bbb1b6f9e71b8cdde6e5cd6f4841f207dee3d828)
1 /*
2  * Copyright (c) 2011-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <sys/cdefs.h>
30 #include <sys/param.h>
31 #include <sys/mbuf.h>
32 #include <sys/errno.h>
33 #include <sys/random.h>
34 #include <sys/kernel_types.h>
35 #include <sys/sysctl.h>
36 
37 #include <kern/zalloc.h>
38 
39 #include <net/if.h>
40 #include <net/net_osdep.h>
41 #include <net/classq/classq.h>
42 #include <pexpert/pexpert.h>
43 #include <net/classq/classq_sfb.h>
44 #include <net/classq/classq_fq_codel.h>
45 #include <net/pktsched/pktsched.h>
46 #include <net/pktsched/pktsched_fq_codel.h>
47 #include <net/pktsched/pktsched_ops.h>
48 #include <net/flowadv.h>
49 
50 #include <libkern/libkern.h>
51 
52 #if SKYWALK
53 #include <skywalk/os_skywalk_private.h>
54 #include <skywalk/core/skywalk_var.h>
55 #include <skywalk/nexus/netif/nx_netif.h>
56 #endif /* SKYWALK */
57 static int ifclassq_tbr_set_locked(struct ifclassq *ifq, struct tb_profile *profile,
58     boolean_t update);
59 static void ifclassq_tbr_dequeue_common(struct ifclassq *, mbuf_svc_class_t,
60     boolean_t, classq_pkt_t *, u_int8_t);
61 
62 #if DEBUG || DEVELOPMENT
63 uint32_t ifclassq_flow_control_adv = 1; /* flow control advisory */
64 SYSCTL_UINT(_net_classq, OID_AUTO, flow_control_adv,
65     CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_flow_control_adv, 1,
66     "enable/disable flow control advisory");
67 
68 uint32_t ifclassq_congestion_feedback = 1;
69 SYSCTL_UINT(_net_classq, OID_AUTO, flow_congestion_feedback,
70     CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_congestion_feedback, 1,
71     "enable/disable congestion feedback (flow control v2)");
72 
73 SYSCTL_EXTENSIBLE_NODE(_net_classq, OID_AUTO, scheduler,
74     CTLFLAG_RW | CTLFLAG_LOCKED, 0, "classq scheduler");
75 
76 /* list value and description of each model */
77 #define X(name, value, description, ...) #description ":" #value " "
78 SYSCTL_STRING(_net_classq_scheduler, OID_AUTO, available_models, CTLFLAG_RD | CTLFLAG_LOCKED,
79     IFNET_SCHED_MODEL_LIST, 0, "");
80 #undef X
81 
82 static int ifclassq_configure_sysctl SYSCTL_HANDLER_ARGS;
83 #endif /* DEBUG || DEVELOPMENT */
84 
85 static KALLOC_TYPE_DEFINE(ifcq_zone, struct ifclassq, NET_KT_DEFAULT);
86 LCK_ATTR_DECLARE(ifcq_lock_attr, 0, 0);
87 static LCK_GRP_DECLARE(ifcq_lock_group, "ifclassq locks");
88 
89 void
classq_init(void)90 classq_init(void)
91 {
92 	static_assert(MBUF_TC_BE == 0);
93 	static_assert(MBUF_SC_BE == 0);
94 	static_assert(IFCQ_SC_MAX == MBUF_SC_MAX_CLASSES);
95 }
96 
97 int
ifclassq_setup(struct ifclassq * ifq,struct ifnet * ifp,uint32_t sflags)98 ifclassq_setup(struct ifclassq *ifq, struct ifnet *ifp, uint32_t sflags)
99 {
100 	int err = 0;
101 
102 	IFCQ_LOCK(ifq);
103 	VERIFY(IFCQ_IS_EMPTY(ifq));
104 	ifq->ifcq_ifp = ifp;
105 	IFCQ_LEN(ifq) = 0;
106 	IFCQ_BYTES(ifq) = 0;
107 	bzero(&ifq->ifcq_xmitcnt, sizeof(ifq->ifcq_xmitcnt));
108 	bzero(&ifq->ifcq_dropcnt, sizeof(ifq->ifcq_dropcnt));
109 
110 	VERIFY(!IFCQ_TBR_IS_ENABLED(ifq));
111 	VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
112 	VERIFY(ifq->ifcq_flags == 0);
113 	VERIFY(ifq->ifcq_sflags == 0);
114 	VERIFY(ifq->ifcq_disc == NULL);
115 
116 	if (ifp->if_eflags & IFEF_TXSTART) {
117 		u_int32_t maxlen = 0;
118 
119 		if ((maxlen = IFCQ_MAXLEN(ifq)) == 0) {
120 			maxlen = if_sndq_maxlen;
121 		}
122 		IFCQ_SET_MAXLEN(ifq, maxlen);
123 
124 		if (IFCQ_MAXLEN(ifq) != if_sndq_maxlen &&
125 		    IFCQ_TARGET_QDELAY(ifq) == 0) {
126 			/*
127 			 * Choose static queues because the interface has
128 			 * maximum queue size set
129 			 */
130 			sflags &= ~PKTSCHEDF_QALG_DELAYBASED;
131 		}
132 		ifq->ifcq_sflags = sflags;
133 		err = ifclassq_pktsched_setup(ifq);
134 		if (err == 0) {
135 			ifq->ifcq_flags = (IFCQF_READY | IFCQF_ENABLED);
136 		}
137 	}
138 
139 #if (DEBUG || DEVELOPMENT)
140 	static_assert(sizeof(struct skoid) == sizeof(ifcq_oid_t));
141 	static_assert(offsetof(struct skoid, sko_oid_list) == offsetof(ifcq_oid_t, ifcq_oid_list));
142 	static_assert(offsetof(struct skoid, sko_oid) == offsetof(ifcq_oid_t, ifcq_oid));
143 	static_assert(offsetof(struct skoid, sko_name) == offsetof(ifcq_oid_t, ifcq_name));
144 
145 	struct skoid *ifcq_skoid = (struct skoid *)&ifq->ifcq_oid;
146 	skoid_create(ifcq_skoid,
147 	    SKOID_SNODE(_net_classq_scheduler), if_name(ifp),
148 	    CTLFLAG_RW);
149 	skoid_add_handler(ifcq_skoid, "model", CTLFLAG_RW,
150 	    ifclassq_configure_sysctl, ifq, 0);
151 #endif /* (DEBUG || DEVELOPMENT) */
152 
153 	IFCQ_UNLOCK(ifq);
154 
155 	return err;
156 }
157 
158 int
ifclassq_change(struct ifclassq * ifq,uint32_t model)159 ifclassq_change(struct ifclassq *ifq, uint32_t model)
160 {
161 	struct ifnet *ifp = ifq->ifcq_ifp;
162 	uint32_t omodel;
163 	errno_t err;
164 
165 	if (ifp == NULL || !IFNET_MODEL_IS_VALID(model) ||
166 	    (!!(model & IFNET_SCHED_DRIVER_MANGED_MODELS)) !=
167 	    (!!(ifp->if_output_sched_model & IFNET_SCHED_DRIVER_MANGED_MODELS))) {
168 		return EINVAL;
169 	} else if (!(ifp->if_eflags & IFEF_TXSTART)) {
170 		return ENXIO;
171 	}
172 
173 	IFCQ_LOCK(ifq);
174 	omodel = ifp->if_output_sched_model;
175 	ifp->if_output_sched_model = model;
176 
177 	if ((err = ifclassq_pktsched_setup(ifq)) != 0) {
178 		ifp->if_output_sched_model = omodel;
179 	}
180 	IFCQ_UNLOCK(ifq);
181 
182 	return err;
183 }
184 
185 void
ifclassq_teardown(struct ifclassq * ifq)186 ifclassq_teardown(struct ifclassq *ifq)
187 {
188 	IFCQ_LOCK(ifq);
189 	if (IFCQ_IS_DESTROYED(ifq)) {
190 		ASSERT((ifq->ifcq_flags & ~IFCQF_DESTROYED) == 0);
191 		goto done;
192 	}
193 	if (IFCQ_IS_READY(ifq)) {
194 		if (IFCQ_TBR_IS_ENABLED(ifq)) {
195 			struct tb_profile tb =
196 			{ .rate = 0, .percent = 0, .depth = 0 };
197 			(void) ifclassq_tbr_set_locked(ifq, &tb, FALSE);
198 		}
199 		pktsched_teardown(ifq);
200 		ifq->ifcq_flags &= ~IFCQF_READY;
201 	}
202 	ifq->ifcq_sflags = 0;
203 	VERIFY(IFCQ_IS_EMPTY(ifq));
204 	VERIFY(!IFCQ_TBR_IS_ENABLED(ifq));
205 	VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
206 	VERIFY(ifq->ifcq_flags == 0);
207 	VERIFY(ifq->ifcq_sflags == 0);
208 	VERIFY(ifq->ifcq_disc == NULL);
209 	IFCQ_LEN(ifq) = 0;
210 	IFCQ_BYTES(ifq) = 0;
211 	IFCQ_MAXLEN(ifq) = 0;
212 	bzero(&ifq->ifcq_xmitcnt, sizeof(ifq->ifcq_xmitcnt));
213 	bzero(&ifq->ifcq_dropcnt, sizeof(ifq->ifcq_dropcnt));
214 	ifq->ifcq_flags |= IFCQF_DESTROYED;
215 
216 #if (DEBUG || DEVELOPMENT)
217 	struct skoid *ifcq_skoid = (struct skoid *)&ifq->ifcq_oid;
218 	skoid_destroy(ifcq_skoid);
219 #endif /* (DEBUG || DEVELOPMENT) */
220 done:
221 	IFCQ_UNLOCK(ifq);
222 }
223 
224 int
ifclassq_pktsched_setup(struct ifclassq * ifq)225 ifclassq_pktsched_setup(struct ifclassq *ifq)
226 {
227 	struct ifnet *ifp = ifq->ifcq_ifp;
228 	classq_pkt_type_t ptype = QP_MBUF;
229 	int err = 0;
230 
231 	IFCQ_LOCK_ASSERT_HELD(ifq);
232 	VERIFY(ifp->if_eflags & IFEF_TXSTART);
233 #if SKYWALK
234 	ptype = ((ifp->if_eflags & IFEF_SKYWALK_NATIVE) != 0) ? QP_PACKET :
235 	    QP_MBUF;
236 #endif /* SKYWALK */
237 
238 	switch (ifp->if_output_sched_model) {
239 	case IFNET_SCHED_MODEL_DRIVER_MANAGED:
240 	case IFNET_SCHED_MODEL_NORMAL:
241 		if (ifp->if_family == IFNET_FAMILY_ETHERNET &&
242 		    (ifp->if_subfamily != IFNET_SUBFAMILY_WIFI)) {
243 			err = pktsched_setup(ifq, PKTSCHEDT_FQ_CODEL_NEW, ifq->ifcq_sflags, ptype);
244 		} else {
245 			err = pktsched_setup(ifq, PKTSCHEDT_FQ_CODEL, ifq->ifcq_sflags, ptype);
246 		}
247 		break;
248 	case IFNET_SCHED_MODEL_FQ_CODEL:
249 	case IFNET_SCHED_MODEL_FQ_CODEL_DM:
250 		err = pktsched_setup(ifq, PKTSCHEDT_FQ_CODEL, ifq->ifcq_sflags, ptype);
251 		break;
252 	case IFNET_SCHED_MODEL_FQ_CODEL_NEW:
253 	case IFNET_SCHED_MODEL_FQ_CODEL_NEW_DM:
254 		err = pktsched_setup(ifq, PKTSCHEDT_FQ_CODEL_NEW, ifq->ifcq_sflags, ptype);
255 		break;
256 	default:
257 		err = EINVAL;
258 	}
259 
260 	return err;
261 }
262 
263 void
ifclassq_set_maxlen(struct ifclassq * ifq,u_int32_t maxqlen)264 ifclassq_set_maxlen(struct ifclassq *ifq, u_int32_t maxqlen)
265 {
266 	IFCQ_LOCK(ifq);
267 	if (maxqlen == 0) {
268 		maxqlen = if_sndq_maxlen;
269 	}
270 	IFCQ_SET_MAXLEN(ifq, maxqlen);
271 	IFCQ_UNLOCK(ifq);
272 }
273 
274 u_int32_t
ifclassq_get_maxlen(struct ifclassq * ifq)275 ifclassq_get_maxlen(struct ifclassq *ifq)
276 {
277 	return IFCQ_MAXLEN(ifq);
278 }
279 
280 int
ifclassq_get_len(struct ifclassq * ifq,mbuf_svc_class_t sc,u_int8_t grp_idx,u_int32_t * packets,u_int32_t * bytes)281 ifclassq_get_len(struct ifclassq *ifq, mbuf_svc_class_t sc, u_int8_t grp_idx,
282     u_int32_t *packets, u_int32_t *bytes)
283 {
284 	int err = 0;
285 	boolean_t dequeue_paused = false;
286 
287 	IFCQ_LOCK(ifq);
288 	if ((ifq->ifcq_flags & (IFCQF_READY | IFCQF_ENABLED)) !=
289 	    (IFCQF_READY | IFCQF_ENABLED)) {
290 		return ENXIO;
291 	}
292 	if (sc == MBUF_SC_UNSPEC && grp_idx == IF_CLASSQ_ALL_GRPS) {
293 		VERIFY(packets != NULL);
294 		if ((dequeue_paused = ifq->ifcq_ops->ps_allow_dequeue(ifq))) {
295 			*packets = 0;
296 		} else {
297 			*packets = IFCQ_LEN(ifq);
298 		}
299 	} else {
300 		cqrq_stat_sc_t req = { sc, grp_idx, 0, 0 };
301 
302 		VERIFY(MBUF_VALID_SC(sc) || sc == MBUF_SC_UNSPEC);
303 
304 		err = ifclassq_request(ifq, CLASSQRQ_STAT_SC, &req, true);
305 		if (packets != NULL) {
306 			*packets = req.packets;
307 		}
308 		if (bytes != NULL) {
309 			*bytes = req.bytes;
310 		}
311 	}
312 	KDBG(AQM_KTRACE_STATS_GET_QLEN, ifq->ifcq_ifp->if_index,
313 	    packets ? *packets : 0, bytes ? *bytes : 0, dequeue_paused);
314 
315 	IFCQ_UNLOCK(ifq);
316 
317 #if SKYWALK
318 	struct ifnet *ifp = ifq->ifcq_ifp;
319 
320 	if (__improbable(ifp->if_na_ops != NULL &&
321 	    ifp->if_na_ops->ni_get_len != NULL)) {
322 		err = ifp->if_na_ops->ni_get_len(ifp->if_na, sc, packets,
323 		    bytes, err);
324 	}
325 #endif /* SKYWALK */
326 
327 	return err;
328 }
329 
330 inline void
ifclassq_set_packet_metadata(struct ifclassq * ifq,struct ifnet * ifp,classq_pkt_t * p)331 ifclassq_set_packet_metadata(struct ifclassq *ifq, struct ifnet *ifp,
332     classq_pkt_t *p)
333 {
334 	if (!IFNET_IS_CELLULAR(ifp)) {
335 		return;
336 	}
337 
338 	switch (p->cp_ptype) {
339 	case QP_MBUF: {
340 		struct mbuf *m = p->cp_mbuf;
341 		m->m_pkthdr.pkt_flags |= PKTF_VALID_UNSENT_DATA;
342 		m->m_pkthdr.bufstatus_if = IFCQ_BYTES(ifq);
343 		m->m_pkthdr.bufstatus_sndbuf = (uint32_t)ifp->if_sndbyte_unsent;
344 		break;
345 	}
346 
347 #if SKYWALK
348 	case QP_PACKET:
349 		/*
350 		 * Support for equivalent of mbuf_get_unsent_data_bytes()
351 		 * is not needed in the Skywalk architecture.
352 		 */
353 		break;
354 #endif /* SKYWALK */
355 
356 	default:
357 		VERIFY(0);
358 		/* NOTREACHED */
359 		__builtin_unreachable();
360 	}
361 }
362 
363 errno_t
ifclassq_enqueue(struct ifclassq * ifq,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t cnt,u_int32_t bytes,boolean_t * pdrop)364 ifclassq_enqueue(struct ifclassq *ifq, classq_pkt_t *head, classq_pkt_t *tail,
365     u_int32_t cnt, u_int32_t bytes, boolean_t *pdrop)
366 {
367 	return ifq->ifcq_ops->ps_enq(ifq, head, tail, cnt, bytes, pdrop);
368 }
369 
370 static errno_t
ifclassq_dequeue_common_default(struct ifclassq * ifq,mbuf_svc_class_t sc,u_int32_t pkt_limit,u_int32_t byte_limit,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t * cnt,u_int32_t * len,boolean_t drvmgt,u_int8_t grp_idx)371 ifclassq_dequeue_common_default(struct ifclassq *ifq, mbuf_svc_class_t sc,
372     u_int32_t pkt_limit, u_int32_t byte_limit, classq_pkt_t *head,
373     classq_pkt_t *tail, u_int32_t *cnt, u_int32_t *len, boolean_t drvmgt,
374     u_int8_t grp_idx)
375 {
376 	struct ifnet *ifp = ifq->ifcq_ifp;
377 	u_int32_t i = 0, l = 0;
378 	classq_pkt_t first = CLASSQ_PKT_INITIALIZER(first);
379 	classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
380 
381 	VERIFY(!drvmgt || MBUF_VALID_SC(sc));
382 
383 	IFCQ_LOCK_SPIN(ifq);
384 	if (IFCQ_TBR_IS_ENABLED(ifq)) {
385 		goto dequeue_loop;
386 	}
387 
388 	/*
389 	 * If the scheduler support dequeueing multiple packets at the
390 	 * same time, call that one instead.
391 	 */
392 	if (drvmgt) {
393 		int err;
394 
395 		err = ifq->ifcq_ops->ps_deq_sc(ifq, sc, pkt_limit,
396 		    byte_limit, head, tail, cnt, len, grp_idx);
397 		IFCQ_UNLOCK(ifq);
398 
399 		if (err == 0 && head->cp_mbuf == NULL) {
400 			err = EAGAIN;
401 		}
402 		return err;
403 	} else {
404 		int err;
405 
406 		err = ifq->ifcq_ops->ps_deq(ifq, pkt_limit, byte_limit,
407 		    head, tail, cnt, len, grp_idx);
408 		IFCQ_UNLOCK(ifq);
409 
410 		if (err == 0 && head->cp_mbuf == NULL) {
411 			err = EAGAIN;
412 		}
413 		return err;
414 	}
415 
416 dequeue_loop:
417 	VERIFY(IFCQ_TBR_IS_ENABLED(ifq));
418 
419 	while (i < pkt_limit && l < byte_limit) {
420 		if (drvmgt) {
421 			IFCQ_TBR_DEQUEUE_SC(ifq, sc, head, grp_idx);
422 		} else {
423 			IFCQ_TBR_DEQUEUE(ifq, head, grp_idx);
424 		}
425 
426 		if (head->cp_mbuf == NULL) {
427 			break;
428 		}
429 
430 		if (first.cp_mbuf == NULL) {
431 			first = *head;
432 		}
433 
434 		switch (head->cp_ptype) {
435 		case QP_MBUF:
436 			head->cp_mbuf->m_nextpkt = NULL;
437 			l += head->cp_mbuf->m_pkthdr.len;
438 			ifclassq_set_packet_metadata(ifq, ifp, head);
439 			if (last.cp_mbuf != NULL) {
440 				last.cp_mbuf->m_nextpkt = head->cp_mbuf;
441 			}
442 			break;
443 
444 #if SKYWALK
445 		case QP_PACKET:
446 			head->cp_kpkt->pkt_nextpkt = NULL;
447 			l += head->cp_kpkt->pkt_length;
448 			ifclassq_set_packet_metadata(ifq, ifp, head);
449 			if (last.cp_kpkt != NULL) {
450 				last.cp_kpkt->pkt_nextpkt = head->cp_kpkt;
451 			}
452 			break;
453 #endif /* SKYWALK */
454 
455 		default:
456 			VERIFY(0);
457 			/* NOTREACHED */
458 			__builtin_unreachable();
459 		}
460 
461 		last = *head;
462 		i++;
463 	}
464 
465 	IFCQ_UNLOCK(ifq);
466 
467 	if (tail != NULL) {
468 		*tail = last;
469 	}
470 	if (cnt != NULL) {
471 		*cnt = i;
472 	}
473 	if (len != NULL) {
474 		*len = l;
475 	}
476 
477 	*head = first;
478 	return (first.cp_mbuf != NULL) ? 0 : EAGAIN;
479 }
480 
481 errno_t
ifclassq_dequeue(struct ifclassq * ifq,mbuf_svc_class_t sc,u_int32_t pkt_limit,u_int32_t byte_limit,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t * cnt,u_int32_t * len,u_int8_t grp_idx)482 ifclassq_dequeue(struct ifclassq *ifq, mbuf_svc_class_t sc,
483     u_int32_t pkt_limit, u_int32_t byte_limit, classq_pkt_t *head,
484     classq_pkt_t *tail, u_int32_t *cnt, u_int32_t *len, u_int8_t grp_idx)
485 {
486 	boolean_t drvmgt = sc != MBUF_SC_UNSPEC;
487 	struct ifnet *ifp = ifq->ifcq_ifp;
488 
489 	if (__improbable(ifp->if_na_ops != NULL &&
490 	    ifp->if_na_ops->ni_dequeue != NULL)) {
491 		/*
492 		 * TODO:
493 		 * We should be changing the pkt/byte limit to the
494 		 * available space in the next filter. But this is not
495 		 * useful until we can flow control the whole chain of
496 		 * filters.
497 		 */
498 		errno_t err = ifclassq_dequeue_common_default(ifq, sc,
499 		    pkt_limit, byte_limit, head, tail, cnt, len, drvmgt, grp_idx);
500 
501 		return ifp->if_na_ops->ni_dequeue(ifp->if_na, sc, pkt_limit,
502 		           byte_limit, head, tail, cnt, len, drvmgt, err);
503 	}
504 	return ifclassq_dequeue_common_default(ifq, sc,
505 	           pkt_limit, byte_limit, head, tail, cnt, len, drvmgt, grp_idx);
506 }
507 
508 void
ifclassq_update(struct ifclassq * ifq,cqev_t ev,bool locked)509 ifclassq_update(struct ifclassq *ifq, cqev_t ev, bool locked)
510 {
511 	void *ev_p = (void *)&ev;
512 
513 	if (!locked) {
514 		IFCQ_LOCK(ifq);
515 	}
516 	IFCQ_LOCK_ASSERT_HELD(ifq);
517 
518 	if (!(IFCQ_IS_READY(ifq))) {
519 		goto out;
520 	}
521 
522 	if (IFCQ_TBR_IS_ENABLED(ifq)) {
523 		struct tb_profile tb = {
524 			.rate = ifq->ifcq_tbr.tbr_rate_raw,
525 			.percent = ifq->ifcq_tbr.tbr_percent, .depth = 0
526 		};
527 		(void) ifclassq_tbr_set_locked(ifq, &tb, FALSE);
528 	}
529 
530 	ifclassq_request(ifq, CLASSQRQ_EVENT, ev_p, true);
531 
532 out:
533 	if (!locked) {
534 		IFCQ_UNLOCK(ifq);
535 	}
536 }
537 
538 int
ifclassq_attach(struct ifclassq * ifq,u_int32_t type,void * discipline)539 ifclassq_attach(struct ifclassq *ifq, u_int32_t type, void *discipline)
540 {
541 	IFCQ_LOCK_ASSERT_HELD(ifq);
542 	VERIFY(ifq->ifcq_disc == NULL);
543 	ifq->ifcq_type = type;
544 	ifq->ifcq_disc = discipline;
545 	return 0;
546 }
547 
548 void
ifclassq_detach(struct ifclassq * ifq)549 ifclassq_detach(struct ifclassq *ifq)
550 {
551 	IFCQ_LOCK_ASSERT_HELD(ifq);
552 	VERIFY(ifq->ifcq_disc == NULL);
553 	ifq->ifcq_type = PKTSCHEDT_NONE;
554 	ifq->ifcq_ops = pktsched_ops_find(PKTSCHEDT_NONE);
555 }
556 
557 int
ifclassq_getqstats(struct ifclassq * ifq,u_int8_t gid,u_int32_t qid,void * ubuf,u_int32_t * nbytes)558 ifclassq_getqstats(struct ifclassq *ifq, u_int8_t gid, u_int32_t qid, void *ubuf,
559     u_int32_t *nbytes)
560 {
561 	struct if_ifclassq_stats *ifqs;
562 	int err;
563 
564 	if (*nbytes < sizeof(*ifqs)) {
565 		return EINVAL;
566 	}
567 
568 	ifqs = kalloc_type(struct if_ifclassq_stats,
569 	    Z_WAITOK | Z_ZERO | Z_NOFAIL);
570 
571 	IFCQ_LOCK(ifq);
572 	if (!IFCQ_IS_READY(ifq)) {
573 		IFCQ_UNLOCK(ifq);
574 		kfree_type(struct if_ifclassq_stats, ifqs);
575 		return ENXIO;
576 	}
577 
578 	ifqs->ifqs_len = IFCQ_LEN(ifq);
579 	ifqs->ifqs_maxlen = IFCQ_MAXLEN(ifq);
580 	*(&ifqs->ifqs_xmitcnt) = *(&ifq->ifcq_xmitcnt);
581 	*(&ifqs->ifqs_dropcnt) = *(&ifq->ifcq_dropcnt);
582 	ifqs->ifqs_scheduler = ifq->ifcq_type;
583 
584 	err = pktsched_getqstats(ifq, gid, qid, ifqs);
585 	IFCQ_UNLOCK(ifq);
586 
587 	if (err == 0 && (err = copyout(ifqs,
588 	    (user_addr_t)(uintptr_t)ubuf, sizeof(*ifqs))) == 0) {
589 		*nbytes = sizeof(*ifqs);
590 	}
591 
592 	kfree_type(struct if_ifclassq_stats, ifqs);
593 
594 	return err;
595 }
596 
597 const char *__null_terminated
ifclassq_ev2str(cqev_t ev)598 ifclassq_ev2str(cqev_t ev)
599 {
600 	const char *__null_terminated c = "";
601 
602 	switch (ev) {
603 	case CLASSQ_EV_LINK_BANDWIDTH:
604 		c = "LINK_BANDWIDTH";
605 		break;
606 
607 	case CLASSQ_EV_LINK_LATENCY:
608 		c = "LINK_LATENCY";
609 		break;
610 
611 	case CLASSQ_EV_LINK_MTU:
612 		c = "LINK_MTU";
613 		break;
614 
615 	case CLASSQ_EV_LINK_UP:
616 		c = "LINK_UP";
617 		break;
618 
619 	case CLASSQ_EV_LINK_DOWN:
620 		c = "LINK_DOWN";
621 		break;
622 
623 	default:
624 		c = "UNKNOWN";
625 		break;
626 	}
627 
628 	return c;
629 }
630 
631 /*
632  * internal representation of token bucket parameters
633  *	rate:	byte_per_unittime << 32
634  *		(((bits_per_sec) / 8) << 32) / machclk_freq
635  *	depth:	byte << 32
636  *
637  */
638 #define TBR_SHIFT       32
639 #define TBR_SCALE(x)    ((int64_t)(x) << TBR_SHIFT)
640 #define TBR_UNSCALE(x)  ((x) >> TBR_SHIFT)
641 
642 void
ifclassq_tbr_dequeue(struct ifclassq * ifq,classq_pkt_t * pkt,u_int8_t grp_idx)643 ifclassq_tbr_dequeue(struct ifclassq *ifq, classq_pkt_t *pkt, u_int8_t grp_idx)
644 {
645 	ifclassq_tbr_dequeue_common(ifq, MBUF_SC_UNSPEC, FALSE, pkt, grp_idx);
646 }
647 
648 void
ifclassq_tbr_dequeue_sc(struct ifclassq * ifq,mbuf_svc_class_t sc,classq_pkt_t * pkt,u_int8_t grp_idx)649 ifclassq_tbr_dequeue_sc(struct ifclassq *ifq, mbuf_svc_class_t sc,
650     classq_pkt_t *pkt, u_int8_t grp_idx)
651 {
652 	ifclassq_tbr_dequeue_common(ifq, sc, TRUE, pkt, grp_idx);
653 }
654 
655 static void
ifclassq_tbr_dequeue_common(struct ifclassq * ifq,mbuf_svc_class_t sc,boolean_t drvmgt,classq_pkt_t * pkt,u_int8_t grp_idx)656 ifclassq_tbr_dequeue_common(struct ifclassq *ifq, mbuf_svc_class_t sc,
657     boolean_t drvmgt, classq_pkt_t *pkt, u_int8_t grp_idx)
658 {
659 	struct tb_regulator *tbr;
660 	int64_t interval;
661 	u_int64_t now;
662 
663 	IFCQ_LOCK_ASSERT_HELD(ifq);
664 
665 	VERIFY(!drvmgt || MBUF_VALID_SC(sc));
666 	VERIFY(IFCQ_TBR_IS_ENABLED(ifq));
667 
668 	*pkt = CLASSQ_PKT_INITIALIZER(*pkt);
669 	tbr = &ifq->ifcq_tbr;
670 	/* update token only when it is negative */
671 	if (tbr->tbr_token <= 0) {
672 		now = read_machclk();
673 		interval = now - tbr->tbr_last;
674 		if (interval >= tbr->tbr_filluptime) {
675 			tbr->tbr_token = tbr->tbr_depth;
676 		} else {
677 			tbr->tbr_token += interval * tbr->tbr_rate;
678 			if (tbr->tbr_token > tbr->tbr_depth) {
679 				tbr->tbr_token = tbr->tbr_depth;
680 			}
681 		}
682 		tbr->tbr_last = now;
683 	}
684 	/* if token is still negative, don't allow dequeue */
685 	if (tbr->tbr_token <= 0) {
686 		return;
687 	}
688 
689 	/*
690 	 * ifclassq takes precedence over ALTQ queue;
691 	 * ifcq_drain count is adjusted by the caller.
692 	 */
693 	if (drvmgt) {
694 		ifq->ifcq_ops->ps_deq_sc(ifq, sc, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, pkt, NULL, NULL, NULL, grp_idx);
695 	} else {
696 		ifq->ifcq_ops->ps_deq(ifq, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, pkt, NULL, NULL, NULL, grp_idx);
697 	}
698 
699 	if (pkt->cp_mbuf != NULL) {
700 		switch (pkt->cp_ptype) {
701 		case QP_MBUF:
702 			tbr->tbr_token -= TBR_SCALE(m_pktlen(pkt->cp_mbuf));
703 			break;
704 
705 #if SKYWALK
706 		case QP_PACKET:
707 			tbr->tbr_token -=
708 			    TBR_SCALE(pkt->cp_kpkt->pkt_length);
709 			break;
710 #endif /* SKYWALK */
711 
712 		default:
713 			VERIFY(0);
714 			/* NOTREACHED */
715 		}
716 	}
717 }
718 
719 /*
720  * set a token bucket regulator.
721  * if the specified rate is zero, the token bucket regulator is deleted.
722  */
723 static int
ifclassq_tbr_set_locked(struct ifclassq * ifq,struct tb_profile * profile,boolean_t update)724 ifclassq_tbr_set_locked(struct ifclassq *ifq, struct tb_profile *profile,
725     boolean_t update)
726 {
727 	struct tb_regulator *tbr;
728 	struct ifnet *ifp = ifq->ifcq_ifp;
729 	u_int64_t rate, old_rate;
730 	uint8_t ev = CLASSQ_EV_LINK_BANDWIDTH;
731 
732 	IFCQ_LOCK_ASSERT_HELD(ifq);
733 	VERIFY(IFCQ_IS_READY(ifq));
734 
735 	VERIFY(machclk_freq != 0);
736 
737 	tbr = &ifq->ifcq_tbr;
738 	old_rate = tbr->tbr_rate_raw;
739 
740 	rate = profile->rate;
741 	if (profile->percent > 0) {
742 		u_int64_t eff_rate;
743 
744 		if (profile->percent > 100) {
745 			return EINVAL;
746 		}
747 		if ((eff_rate = ifp->if_output_bw.eff_bw) == 0) {
748 			return ENODEV;
749 		}
750 		rate = (eff_rate * profile->percent) / 100;
751 	}
752 
753 	if (rate == 0) {
754 		if (!IFCQ_TBR_IS_ENABLED(ifq)) {
755 			return 0;
756 		}
757 
758 		if (pktsched_verbose) {
759 			printf("%s: TBR disabled\n", if_name(ifp));
760 		}
761 
762 		/* disable this TBR */
763 		ifq->ifcq_flags &= ~IFCQF_TBR;
764 		bzero(tbr, sizeof(*tbr));
765 		ifnet_set_start_cycle(ifp, NULL);
766 		if (update) {
767 			ifclassq_request(ifq, CLASSQRQ_EVENT, (void*)&ev, true);
768 		}
769 		return 0;
770 	}
771 
772 	if (pktsched_verbose) {
773 		printf("%s: TBR %s (rate %llu bps depth %u)\n", if_name(ifp),
774 		    (ifq->ifcq_flags & IFCQF_TBR) ? "reconfigured" :
775 		    "enabled", rate, profile->depth);
776 	}
777 
778 	/* set the new TBR */
779 	bzero(tbr, sizeof(*tbr));
780 	tbr->tbr_rate_raw = rate;
781 	tbr->tbr_percent = profile->percent;
782 	ifq->ifcq_flags |= IFCQF_TBR;
783 
784 	/*
785 	 * Note that the TBR fill up time (hence the ifnet restart time)
786 	 * is directly related to the specified TBR depth.  The ideal
787 	 * depth value should be computed such that the interval time
788 	 * between each successive wakeup is adequately spaced apart,
789 	 * in order to reduce scheduling overheads.  A target interval
790 	 * of 10 ms seems to provide good performance balance.  This can be
791 	 * overridden by specifying the depth profile.  Values smaller than
792 	 * the ideal depth will reduce delay at the expense of CPU cycles.
793 	 */
794 	tbr->tbr_rate = TBR_SCALE(rate / 8) / machclk_freq;
795 	if (tbr->tbr_rate > 0) {
796 		u_int32_t mtu = ifp->if_mtu;
797 		int64_t ival, idepth = 0;
798 		int i;
799 
800 		if (mtu < IF_MINMTU) {
801 			mtu = IF_MINMTU;
802 		}
803 
804 		ival = pktsched_nsecs_to_abstime(10 * NSEC_PER_MSEC); /* 10ms */
805 
806 		for (i = 1;; i++) {
807 			idepth = TBR_SCALE(i * mtu);
808 			if ((idepth / tbr->tbr_rate) > ival) {
809 				break;
810 			}
811 		}
812 		VERIFY(idepth > 0);
813 
814 		tbr->tbr_depth = TBR_SCALE(profile->depth);
815 		if (tbr->tbr_depth == 0) {
816 			tbr->tbr_filluptime = idepth / tbr->tbr_rate;
817 			/* a little fudge factor to get closer to rate */
818 			tbr->tbr_depth = idepth + (idepth >> 3);
819 		} else {
820 			tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate;
821 		}
822 	} else {
823 		tbr->tbr_depth = TBR_SCALE(profile->depth);
824 		tbr->tbr_filluptime = 0xffffffffffffffffLL;
825 	}
826 	tbr->tbr_token = tbr->tbr_depth;
827 	tbr->tbr_last = read_machclk();
828 
829 	if (tbr->tbr_rate > 0 && (ifp->if_flags & IFF_UP)) {
830 		struct timespec ts =
831 		{ 0, (long)pktsched_abs_to_nsecs(tbr->tbr_filluptime) };
832 		if (pktsched_verbose) {
833 			printf("%s: TBR calculated tokens %lld "
834 			    "filluptime %llu ns\n", if_name(ifp),
835 			    TBR_UNSCALE(tbr->tbr_token),
836 			    pktsched_abs_to_nsecs(tbr->tbr_filluptime));
837 		}
838 		ifnet_set_start_cycle(ifp, &ts);
839 	} else {
840 		if (pktsched_verbose) {
841 			if (tbr->tbr_rate == 0) {
842 				printf("%s: TBR calculated tokens %lld "
843 				    "infinite filluptime\n", if_name(ifp),
844 				    TBR_UNSCALE(tbr->tbr_token));
845 			} else if (!(ifp->if_flags & IFF_UP)) {
846 				printf("%s: TBR suspended (link is down)\n",
847 				    if_name(ifp));
848 			}
849 		}
850 		ifnet_set_start_cycle(ifp, NULL);
851 	}
852 	if (update && tbr->tbr_rate_raw != old_rate) {
853 		ifclassq_request(ifq, CLASSQRQ_EVENT, (void*)&ev, true);
854 	}
855 
856 	return 0;
857 }
858 
859 int
ifclassq_tbr_set(struct ifclassq * ifq,struct tb_profile * profile,boolean_t update)860 ifclassq_tbr_set(struct ifclassq *ifq, struct tb_profile *profile,
861     boolean_t update)
862 {
863 	int error = 0;
864 
865 	IFCQ_LOCK(ifq);
866 	if (!IFCQ_IS_READY(ifq)) {
867 		error = ENXIO;
868 		goto out;
869 	}
870 
871 	error = ifclassq_tbr_set_locked(ifq, profile, update);
872 
873 out:
874 	IFCQ_UNLOCK(ifq);
875 	return error;
876 }
877 
878 struct ifclassq *
ifclassq_alloc(void)879 ifclassq_alloc(void)
880 {
881 	struct ifclassq *ifcq;
882 
883 	ifcq = zalloc_flags(ifcq_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
884 	os_ref_init(&ifcq->ifcq_refcnt, NULL);
885 	lck_mtx_init(&ifcq->ifcq_lock, &ifcq_lock_group, &ifcq_lock_attr);
886 	ifcq->ifcq_ops = pktsched_ops_find(PKTSCHEDT_NONE);
887 	VERIFY(ifcq->ifcq_ops != NULL);
888 	os_log(OS_LOG_DEFAULT, "ifclassq instance %p created", ifcq);
889 	return ifcq;
890 }
891 
892 void
ifclassq_retain(struct ifclassq * ifcq)893 ifclassq_retain(struct ifclassq *ifcq)
894 {
895 	os_ref_retain(&ifcq->ifcq_refcnt);
896 }
897 
898 void
ifclassq_release(struct ifclassq ** pifcq)899 ifclassq_release(struct ifclassq **pifcq)
900 {
901 	struct ifclassq *__single ifcq = *pifcq;
902 
903 	*pifcq = NULL;
904 	if (os_ref_release(&ifcq->ifcq_refcnt) == 0) {
905 		ifclassq_teardown(ifcq);
906 		os_log(OS_LOG_DEFAULT, "ifclassq instance %p freed", ifcq);
907 		zfree(ifcq_zone, ifcq);
908 	}
909 }
910 
911 int
ifclassq_setup_group(struct ifclassq * ifcq,uint8_t grp_idx,uint8_t flags)912 ifclassq_setup_group(struct ifclassq *ifcq, uint8_t grp_idx, uint8_t flags)
913 {
914 	int err;
915 
916 	IFCQ_LOCK(ifcq);
917 	VERIFY(ifcq->ifcq_disc != NULL);
918 	VERIFY(ifcq->ifcq_type == PKTSCHEDT_FQ_CODEL || ifcq->ifcq_type == PKTSCHEDT_FQ_CODEL_NEW);
919 
920 	err = fq_if_create_grp(ifcq, grp_idx, flags);
921 	IFCQ_UNLOCK(ifcq);
922 
923 	return err;
924 }
925 
926 int
ifclassq_request(struct ifclassq * ifcq,enum cqrq rq,void * arg,bool locked)927 ifclassq_request(struct ifclassq * ifcq, enum cqrq rq, void *arg, bool locked)
928 {
929 	int err = 0;
930 
931 	if (!locked) {
932 		IFCQ_LOCK(ifcq);
933 	}
934 	IFCQ_LOCK_ASSERT_HELD(ifcq);
935 
936 	if (!IFCQ_IS_ENABLED(ifcq)) {
937 		err = ENXIO;
938 		goto out;
939 	}
940 
941 	err = ifcq->ifcq_ops->ps_req(ifcq, rq, arg);
942 
943 out:
944 	if (!locked) {
945 		IFCQ_UNLOCK(ifcq);
946 	}
947 	return err;
948 }
949 
950 void
ifclassq_tbr_get(struct ifclassq * ifcq,u_int32_t * sched_type,u_int64_t * tbr_bw,u_int64_t * tbr_pct)951 ifclassq_tbr_get(struct ifclassq *ifcq, u_int32_t *sched_type, u_int64_t *tbr_bw,
952     u_int64_t *tbr_pct)
953 {
954 	IFCQ_LOCK(ifcq);
955 
956 	*sched_type = ifcq->ifcq_type;
957 	if (IFCQ_TBR_IS_ENABLED(ifcq)) {
958 		*tbr_bw = ifcq->ifcq_tbr.tbr_rate_raw;
959 		*tbr_pct = ifcq->ifcq_tbr.tbr_percent;
960 	}
961 
962 	IFCQ_UNLOCK(ifcq);
963 }
964 
965 #if (DEBUG || DEVELOPMENT)
966 static int
967 ifclassq_configure_sysctl SYSCTL_HANDLER_ARGS
968 {
969 #pragma unused(oidp, arg2)
970 	struct ifclassq *__single ifcq = arg1;
971 	struct ifnet *ifp = ifcq->ifcq_ifp;
972 	uint32_t new_model;
973 	int changed;
974 	int error;
975 
976 	if (ifp == NULL || !IFCQ_IS_ENABLED(ifcq)) {
977 		return ENXIO;
978 	}
979 
980 	error = sysctl_io_number(req, ifp->if_output_sched_model,
981 	    sizeof(ifp->if_output_sched_model), &new_model, &changed);
982 	if (error == 0 && changed != 0) {
983 		error = ifclassq_change(ifcq, new_model);
984 	}
985 	return error;
986 }
987 #endif /* (DEBUG || DEVELOPMENT) */
988