1 /*
2 * Copyright (c) 2011-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/cdefs.h>
30 #include <sys/param.h>
31 #include <sys/mbuf.h>
32 #include <sys/errno.h>
33 #include <sys/random.h>
34 #include <sys/kernel_types.h>
35 #include <sys/sysctl.h>
36
37 #include <kern/zalloc.h>
38
39 #include <net/if.h>
40 #include <net/net_osdep.h>
41 #include <net/classq/classq.h>
42 #include <pexpert/pexpert.h>
43 #include <net/classq/classq_sfb.h>
44 #include <net/classq/classq_fq_codel.h>
45 #include <net/pktsched/pktsched.h>
46 #include <net/pktsched/pktsched_fq_codel.h>
47 #include <net/pktsched/pktsched_ops.h>
48 #include <net/flowadv.h>
49
50 #include <libkern/libkern.h>
51
52 #if SKYWALK
53 #include <skywalk/os_skywalk_private.h>
54 #include <skywalk/core/skywalk_var.h>
55 #include <skywalk/nexus/netif/nx_netif.h>
56 #endif /* SKYWALK */
57 static int ifclassq_tbr_set_locked(struct ifclassq *ifq, struct tb_profile *profile,
58 boolean_t update);
59 static void ifclassq_tbr_dequeue_common(struct ifclassq *, mbuf_svc_class_t,
60 boolean_t, classq_pkt_t *, u_int8_t);
61
62 #if DEBUG || DEVELOPMENT
63 uint32_t ifclassq_flow_control_adv = 1; /* flow control advisory */
64 SYSCTL_UINT(_net_classq, OID_AUTO, flow_control_adv,
65 CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_flow_control_adv, 1,
66 "enable/disable flow control advisory");
67
68 uint32_t ifclassq_congestion_feedback = 1;
69 SYSCTL_UINT(_net_classq, OID_AUTO, flow_congestion_feedback,
70 CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_congestion_feedback, 1,
71 "enable/disable congestion feedback (flow control v2)");
72
73 SYSCTL_EXTENSIBLE_NODE(_net_classq, OID_AUTO, scheduler,
74 CTLFLAG_RW | CTLFLAG_LOCKED, 0, "classq scheduler");
75
76 /* list value and description of each model */
77 #define X(name, value, description, ...) #description ":" #value " "
78 SYSCTL_STRING(_net_classq_scheduler, OID_AUTO, available_models, CTLFLAG_RD | CTLFLAG_LOCKED,
79 IFNET_SCHED_MODEL_LIST, 0, "");
80 #undef X
81
82 static int ifclassq_configure_sysctl SYSCTL_HANDLER_ARGS;
83 #endif /* DEBUG || DEVELOPMENT */
84
85 static KALLOC_TYPE_DEFINE(ifcq_zone, struct ifclassq, NET_KT_DEFAULT);
86 LCK_ATTR_DECLARE(ifcq_lock_attr, 0, 0);
87 static LCK_GRP_DECLARE(ifcq_lock_group, "ifclassq locks");
88
89 void
classq_init(void)90 classq_init(void)
91 {
92 static_assert(MBUF_TC_BE == 0);
93 static_assert(MBUF_SC_BE == 0);
94 static_assert(IFCQ_SC_MAX == MBUF_SC_MAX_CLASSES);
95 }
96
97 int
ifclassq_setup(struct ifclassq * ifq,struct ifnet * ifp,uint32_t sflags)98 ifclassq_setup(struct ifclassq *ifq, struct ifnet *ifp, uint32_t sflags)
99 {
100 int err = 0;
101
102 IFCQ_LOCK(ifq);
103 VERIFY(IFCQ_IS_EMPTY(ifq));
104 ifq->ifcq_ifp = ifp;
105 IFCQ_LEN(ifq) = 0;
106 IFCQ_BYTES(ifq) = 0;
107 bzero(&ifq->ifcq_xmitcnt, sizeof(ifq->ifcq_xmitcnt));
108 bzero(&ifq->ifcq_dropcnt, sizeof(ifq->ifcq_dropcnt));
109
110 VERIFY(!IFCQ_TBR_IS_ENABLED(ifq));
111 VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
112 VERIFY(ifq->ifcq_flags == 0);
113 VERIFY(ifq->ifcq_sflags == 0);
114 VERIFY(ifq->ifcq_disc == NULL);
115
116 if (ifp->if_eflags & IFEF_TXSTART) {
117 u_int32_t maxlen = 0;
118
119 if ((maxlen = IFCQ_MAXLEN(ifq)) == 0) {
120 maxlen = if_sndq_maxlen;
121 }
122 IFCQ_SET_MAXLEN(ifq, maxlen);
123
124 if (IFCQ_MAXLEN(ifq) != if_sndq_maxlen &&
125 IFCQ_TARGET_QDELAY(ifq) == 0) {
126 /*
127 * Choose static queues because the interface has
128 * maximum queue size set
129 */
130 sflags &= ~PKTSCHEDF_QALG_DELAYBASED;
131 }
132 ifq->ifcq_sflags = sflags;
133 err = ifclassq_pktsched_setup(ifq);
134 if (err == 0) {
135 ifq->ifcq_flags = (IFCQF_READY | IFCQF_ENABLED);
136 }
137 }
138
139 #if (DEBUG || DEVELOPMENT)
140 static_assert(sizeof(struct skoid) == sizeof(ifcq_oid_t));
141 static_assert(offsetof(struct skoid, sko_oid_list) == offsetof(ifcq_oid_t, ifcq_oid_list));
142 static_assert(offsetof(struct skoid, sko_oid) == offsetof(ifcq_oid_t, ifcq_oid));
143 static_assert(offsetof(struct skoid, sko_name) == offsetof(ifcq_oid_t, ifcq_name));
144
145 struct skoid *ifcq_skoid = (struct skoid *)&ifq->ifcq_oid;
146 skoid_create(ifcq_skoid,
147 SKOID_SNODE(_net_classq_scheduler), if_name(ifp),
148 CTLFLAG_RW);
149 skoid_add_handler(ifcq_skoid, "model", CTLFLAG_RW,
150 ifclassq_configure_sysctl, ifq, 0);
151 #endif /* (DEBUG || DEVELOPMENT) */
152
153 IFCQ_UNLOCK(ifq);
154
155 return err;
156 }
157
158 int
ifclassq_change(struct ifclassq * ifq,uint32_t model)159 ifclassq_change(struct ifclassq *ifq, uint32_t model)
160 {
161 struct ifnet *ifp = ifq->ifcq_ifp;
162 uint32_t omodel;
163 errno_t err;
164
165 if (ifp == NULL || !IFNET_MODEL_IS_VALID(model) ||
166 (!!(model & IFNET_SCHED_DRIVER_MANGED_MODELS)) !=
167 (!!(ifp->if_output_sched_model & IFNET_SCHED_DRIVER_MANGED_MODELS))) {
168 return EINVAL;
169 } else if (!(ifp->if_eflags & IFEF_TXSTART)) {
170 return ENXIO;
171 }
172
173 IFCQ_LOCK(ifq);
174 omodel = ifp->if_output_sched_model;
175 ifp->if_output_sched_model = model;
176
177 if ((err = ifclassq_pktsched_setup(ifq)) != 0) {
178 ifp->if_output_sched_model = omodel;
179 }
180 IFCQ_UNLOCK(ifq);
181
182 return err;
183 }
184
185 void
ifclassq_teardown(struct ifclassq * ifq)186 ifclassq_teardown(struct ifclassq *ifq)
187 {
188 IFCQ_LOCK(ifq);
189 if (IFCQ_IS_DESTROYED(ifq)) {
190 ASSERT((ifq->ifcq_flags & ~IFCQF_DESTROYED) == 0);
191 goto done;
192 }
193 if (IFCQ_IS_READY(ifq)) {
194 if (IFCQ_TBR_IS_ENABLED(ifq)) {
195 struct tb_profile tb =
196 { .rate = 0, .percent = 0, .depth = 0 };
197 (void) ifclassq_tbr_set_locked(ifq, &tb, FALSE);
198 }
199 pktsched_teardown(ifq);
200 ifq->ifcq_flags &= ~IFCQF_READY;
201 }
202 ifq->ifcq_sflags = 0;
203 VERIFY(IFCQ_IS_EMPTY(ifq));
204 VERIFY(!IFCQ_TBR_IS_ENABLED(ifq));
205 VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
206 VERIFY(ifq->ifcq_flags == 0);
207 VERIFY(ifq->ifcq_sflags == 0);
208 VERIFY(ifq->ifcq_disc == NULL);
209 IFCQ_LEN(ifq) = 0;
210 IFCQ_BYTES(ifq) = 0;
211 IFCQ_MAXLEN(ifq) = 0;
212 bzero(&ifq->ifcq_xmitcnt, sizeof(ifq->ifcq_xmitcnt));
213 bzero(&ifq->ifcq_dropcnt, sizeof(ifq->ifcq_dropcnt));
214 ifq->ifcq_flags |= IFCQF_DESTROYED;
215
216 #if (DEBUG || DEVELOPMENT)
217 struct skoid *ifcq_skoid = (struct skoid *)&ifq->ifcq_oid;
218 skoid_destroy(ifcq_skoid);
219 #endif /* (DEBUG || DEVELOPMENT) */
220 done:
221 IFCQ_UNLOCK(ifq);
222 }
223
224 int
ifclassq_pktsched_setup(struct ifclassq * ifq)225 ifclassq_pktsched_setup(struct ifclassq *ifq)
226 {
227 struct ifnet *ifp = ifq->ifcq_ifp;
228 classq_pkt_type_t ptype = QP_MBUF;
229 int err = 0;
230
231 IFCQ_LOCK_ASSERT_HELD(ifq);
232 VERIFY(ifp->if_eflags & IFEF_TXSTART);
233 #if SKYWALK
234 ptype = ((ifp->if_eflags & IFEF_SKYWALK_NATIVE) != 0) ? QP_PACKET :
235 QP_MBUF;
236 #endif /* SKYWALK */
237
238 switch (ifp->if_output_sched_model) {
239 case IFNET_SCHED_MODEL_DRIVER_MANAGED:
240 case IFNET_SCHED_MODEL_NORMAL:
241 if (ifp->if_family == IFNET_FAMILY_ETHERNET &&
242 (ifp->if_subfamily != IFNET_SUBFAMILY_WIFI)) {
243 err = pktsched_setup(ifq, PKTSCHEDT_FQ_CODEL_NEW, ifq->ifcq_sflags, ptype);
244 } else {
245 err = pktsched_setup(ifq, PKTSCHEDT_FQ_CODEL, ifq->ifcq_sflags, ptype);
246 }
247 break;
248 case IFNET_SCHED_MODEL_FQ_CODEL:
249 case IFNET_SCHED_MODEL_FQ_CODEL_DM:
250 err = pktsched_setup(ifq, PKTSCHEDT_FQ_CODEL, ifq->ifcq_sflags, ptype);
251 break;
252 case IFNET_SCHED_MODEL_FQ_CODEL_NEW:
253 case IFNET_SCHED_MODEL_FQ_CODEL_NEW_DM:
254 err = pktsched_setup(ifq, PKTSCHEDT_FQ_CODEL_NEW, ifq->ifcq_sflags, ptype);
255 break;
256 default:
257 err = EINVAL;
258 }
259
260 return err;
261 }
262
263 void
ifclassq_set_maxlen(struct ifclassq * ifq,u_int32_t maxqlen)264 ifclassq_set_maxlen(struct ifclassq *ifq, u_int32_t maxqlen)
265 {
266 IFCQ_LOCK(ifq);
267 if (maxqlen == 0) {
268 maxqlen = if_sndq_maxlen;
269 }
270 IFCQ_SET_MAXLEN(ifq, maxqlen);
271 IFCQ_UNLOCK(ifq);
272 }
273
274 u_int32_t
ifclassq_get_maxlen(struct ifclassq * ifq)275 ifclassq_get_maxlen(struct ifclassq *ifq)
276 {
277 return IFCQ_MAXLEN(ifq);
278 }
279
280 int
ifclassq_get_len(struct ifclassq * ifq,mbuf_svc_class_t sc,u_int8_t grp_idx,u_int32_t * packets,u_int32_t * bytes)281 ifclassq_get_len(struct ifclassq *ifq, mbuf_svc_class_t sc, u_int8_t grp_idx,
282 u_int32_t *packets, u_int32_t *bytes)
283 {
284 int err = 0;
285 boolean_t dequeue_paused = false;
286
287 IFCQ_LOCK(ifq);
288 if ((ifq->ifcq_flags & (IFCQF_READY | IFCQF_ENABLED)) !=
289 (IFCQF_READY | IFCQF_ENABLED)) {
290 return ENXIO;
291 }
292 if (sc == MBUF_SC_UNSPEC && grp_idx == IF_CLASSQ_ALL_GRPS) {
293 VERIFY(packets != NULL);
294 if ((dequeue_paused = ifq->ifcq_ops->ps_allow_dequeue(ifq))) {
295 *packets = 0;
296 } else {
297 *packets = IFCQ_LEN(ifq);
298 }
299 } else {
300 cqrq_stat_sc_t req = { sc, grp_idx, 0, 0 };
301
302 VERIFY(MBUF_VALID_SC(sc) || sc == MBUF_SC_UNSPEC);
303
304 err = ifclassq_request(ifq, CLASSQRQ_STAT_SC, &req, true);
305 if (packets != NULL) {
306 *packets = req.packets;
307 }
308 if (bytes != NULL) {
309 *bytes = req.bytes;
310 }
311 }
312 KDBG(AQM_KTRACE_STATS_GET_QLEN, ifq->ifcq_ifp->if_index,
313 packets ? *packets : 0, bytes ? *bytes : 0, dequeue_paused);
314
315 IFCQ_UNLOCK(ifq);
316
317 #if SKYWALK
318 struct ifnet *ifp = ifq->ifcq_ifp;
319
320 if (__improbable(ifp->if_na_ops != NULL &&
321 ifp->if_na_ops->ni_get_len != NULL)) {
322 err = ifp->if_na_ops->ni_get_len(ifp->if_na, sc, packets,
323 bytes, err);
324 }
325 #endif /* SKYWALK */
326
327 return err;
328 }
329
330 inline void
ifclassq_set_packet_metadata(struct ifclassq * ifq,struct ifnet * ifp,classq_pkt_t * p)331 ifclassq_set_packet_metadata(struct ifclassq *ifq, struct ifnet *ifp,
332 classq_pkt_t *p)
333 {
334 if (!IFNET_IS_CELLULAR(ifp)) {
335 return;
336 }
337
338 switch (p->cp_ptype) {
339 case QP_MBUF: {
340 struct mbuf *m = p->cp_mbuf;
341 m->m_pkthdr.pkt_flags |= PKTF_VALID_UNSENT_DATA;
342 m->m_pkthdr.bufstatus_if = IFCQ_BYTES(ifq);
343 m->m_pkthdr.bufstatus_sndbuf = (uint32_t)ifp->if_sndbyte_unsent;
344 break;
345 }
346
347 #if SKYWALK
348 case QP_PACKET:
349 /*
350 * Support for equivalent of mbuf_get_unsent_data_bytes()
351 * is not needed in the Skywalk architecture.
352 */
353 break;
354 #endif /* SKYWALK */
355
356 default:
357 VERIFY(0);
358 /* NOTREACHED */
359 __builtin_unreachable();
360 }
361 }
362
363 errno_t
ifclassq_enqueue(struct ifclassq * ifq,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t cnt,u_int32_t bytes,boolean_t * pdrop)364 ifclassq_enqueue(struct ifclassq *ifq, classq_pkt_t *head, classq_pkt_t *tail,
365 u_int32_t cnt, u_int32_t bytes, boolean_t *pdrop)
366 {
367 return ifq->ifcq_ops->ps_enq(ifq, head, tail, cnt, bytes, pdrop);
368 }
369
370 static errno_t
ifclassq_dequeue_common_default(struct ifclassq * ifq,mbuf_svc_class_t sc,u_int32_t pkt_limit,u_int32_t byte_limit,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t * cnt,u_int32_t * len,boolean_t drvmgt,u_int8_t grp_idx)371 ifclassq_dequeue_common_default(struct ifclassq *ifq, mbuf_svc_class_t sc,
372 u_int32_t pkt_limit, u_int32_t byte_limit, classq_pkt_t *head,
373 classq_pkt_t *tail, u_int32_t *cnt, u_int32_t *len, boolean_t drvmgt,
374 u_int8_t grp_idx)
375 {
376 struct ifnet *ifp = ifq->ifcq_ifp;
377 u_int32_t i = 0, l = 0;
378 classq_pkt_t first = CLASSQ_PKT_INITIALIZER(first);
379 classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
380
381 VERIFY(!drvmgt || MBUF_VALID_SC(sc));
382
383 IFCQ_LOCK_SPIN(ifq);
384 if (IFCQ_TBR_IS_ENABLED(ifq)) {
385 goto dequeue_loop;
386 }
387
388 /*
389 * If the scheduler support dequeueing multiple packets at the
390 * same time, call that one instead.
391 */
392 if (drvmgt) {
393 int err;
394
395 err = ifq->ifcq_ops->ps_deq_sc(ifq, sc, pkt_limit,
396 byte_limit, head, tail, cnt, len, grp_idx);
397 IFCQ_UNLOCK(ifq);
398
399 if (err == 0 && head->cp_mbuf == NULL) {
400 err = EAGAIN;
401 }
402 return err;
403 } else {
404 int err;
405
406 err = ifq->ifcq_ops->ps_deq(ifq, pkt_limit, byte_limit,
407 head, tail, cnt, len, grp_idx);
408 IFCQ_UNLOCK(ifq);
409
410 if (err == 0 && head->cp_mbuf == NULL) {
411 err = EAGAIN;
412 }
413 return err;
414 }
415
416 dequeue_loop:
417 VERIFY(IFCQ_TBR_IS_ENABLED(ifq));
418
419 while (i < pkt_limit && l < byte_limit) {
420 if (drvmgt) {
421 IFCQ_TBR_DEQUEUE_SC(ifq, sc, head, grp_idx);
422 } else {
423 IFCQ_TBR_DEQUEUE(ifq, head, grp_idx);
424 }
425
426 if (head->cp_mbuf == NULL) {
427 break;
428 }
429
430 if (first.cp_mbuf == NULL) {
431 first = *head;
432 }
433
434 switch (head->cp_ptype) {
435 case QP_MBUF:
436 head->cp_mbuf->m_nextpkt = NULL;
437 l += head->cp_mbuf->m_pkthdr.len;
438 ifclassq_set_packet_metadata(ifq, ifp, head);
439 if (last.cp_mbuf != NULL) {
440 last.cp_mbuf->m_nextpkt = head->cp_mbuf;
441 }
442 break;
443
444 #if SKYWALK
445 case QP_PACKET:
446 head->cp_kpkt->pkt_nextpkt = NULL;
447 l += head->cp_kpkt->pkt_length;
448 ifclassq_set_packet_metadata(ifq, ifp, head);
449 if (last.cp_kpkt != NULL) {
450 last.cp_kpkt->pkt_nextpkt = head->cp_kpkt;
451 }
452 break;
453 #endif /* SKYWALK */
454
455 default:
456 VERIFY(0);
457 /* NOTREACHED */
458 __builtin_unreachable();
459 }
460
461 last = *head;
462 i++;
463 }
464
465 IFCQ_UNLOCK(ifq);
466
467 if (tail != NULL) {
468 *tail = last;
469 }
470 if (cnt != NULL) {
471 *cnt = i;
472 }
473 if (len != NULL) {
474 *len = l;
475 }
476
477 *head = first;
478 return (first.cp_mbuf != NULL) ? 0 : EAGAIN;
479 }
480
481 errno_t
ifclassq_dequeue(struct ifclassq * ifq,mbuf_svc_class_t sc,u_int32_t pkt_limit,u_int32_t byte_limit,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t * cnt,u_int32_t * len,u_int8_t grp_idx)482 ifclassq_dequeue(struct ifclassq *ifq, mbuf_svc_class_t sc,
483 u_int32_t pkt_limit, u_int32_t byte_limit, classq_pkt_t *head,
484 classq_pkt_t *tail, u_int32_t *cnt, u_int32_t *len, u_int8_t grp_idx)
485 {
486 boolean_t drvmgt = sc != MBUF_SC_UNSPEC;
487 struct ifnet *ifp = ifq->ifcq_ifp;
488
489 if (__improbable(ifp->if_na_ops != NULL &&
490 ifp->if_na_ops->ni_dequeue != NULL)) {
491 /*
492 * TODO:
493 * We should be changing the pkt/byte limit to the
494 * available space in the next filter. But this is not
495 * useful until we can flow control the whole chain of
496 * filters.
497 */
498 errno_t err = ifclassq_dequeue_common_default(ifq, sc,
499 pkt_limit, byte_limit, head, tail, cnt, len, drvmgt, grp_idx);
500
501 return ifp->if_na_ops->ni_dequeue(ifp->if_na, sc, pkt_limit,
502 byte_limit, head, tail, cnt, len, drvmgt, err);
503 }
504 return ifclassq_dequeue_common_default(ifq, sc,
505 pkt_limit, byte_limit, head, tail, cnt, len, drvmgt, grp_idx);
506 }
507
508 void
ifclassq_update(struct ifclassq * ifq,cqev_t ev,bool locked)509 ifclassq_update(struct ifclassq *ifq, cqev_t ev, bool locked)
510 {
511 void *ev_p = (void *)&ev;
512
513 if (!locked) {
514 IFCQ_LOCK(ifq);
515 }
516 IFCQ_LOCK_ASSERT_HELD(ifq);
517
518 if (!(IFCQ_IS_READY(ifq))) {
519 goto out;
520 }
521
522 if (IFCQ_TBR_IS_ENABLED(ifq)) {
523 struct tb_profile tb = {
524 .rate = ifq->ifcq_tbr.tbr_rate_raw,
525 .percent = ifq->ifcq_tbr.tbr_percent, .depth = 0
526 };
527 (void) ifclassq_tbr_set_locked(ifq, &tb, FALSE);
528 }
529
530 ifclassq_request(ifq, CLASSQRQ_EVENT, ev_p, true);
531
532 out:
533 if (!locked) {
534 IFCQ_UNLOCK(ifq);
535 }
536 }
537
538 int
ifclassq_attach(struct ifclassq * ifq,u_int32_t type,void * discipline)539 ifclassq_attach(struct ifclassq *ifq, u_int32_t type, void *discipline)
540 {
541 IFCQ_LOCK_ASSERT_HELD(ifq);
542 VERIFY(ifq->ifcq_disc == NULL);
543 ifq->ifcq_type = type;
544 ifq->ifcq_disc = discipline;
545 return 0;
546 }
547
548 void
ifclassq_detach(struct ifclassq * ifq)549 ifclassq_detach(struct ifclassq *ifq)
550 {
551 IFCQ_LOCK_ASSERT_HELD(ifq);
552 VERIFY(ifq->ifcq_disc == NULL);
553 ifq->ifcq_type = PKTSCHEDT_NONE;
554 ifq->ifcq_ops = pktsched_ops_find(PKTSCHEDT_NONE);
555 }
556
557 int
ifclassq_getqstats(struct ifclassq * ifq,u_int8_t gid,u_int32_t qid,void * ubuf,u_int32_t * nbytes)558 ifclassq_getqstats(struct ifclassq *ifq, u_int8_t gid, u_int32_t qid, void *ubuf,
559 u_int32_t *nbytes)
560 {
561 struct if_ifclassq_stats *ifqs;
562 int err;
563
564 if (*nbytes < sizeof(*ifqs)) {
565 return EINVAL;
566 }
567
568 ifqs = kalloc_type(struct if_ifclassq_stats,
569 Z_WAITOK | Z_ZERO | Z_NOFAIL);
570
571 IFCQ_LOCK(ifq);
572 if (!IFCQ_IS_READY(ifq)) {
573 IFCQ_UNLOCK(ifq);
574 kfree_type(struct if_ifclassq_stats, ifqs);
575 return ENXIO;
576 }
577
578 ifqs->ifqs_len = IFCQ_LEN(ifq);
579 ifqs->ifqs_maxlen = IFCQ_MAXLEN(ifq);
580 *(&ifqs->ifqs_xmitcnt) = *(&ifq->ifcq_xmitcnt);
581 *(&ifqs->ifqs_dropcnt) = *(&ifq->ifcq_dropcnt);
582 ifqs->ifqs_scheduler = ifq->ifcq_type;
583
584 err = pktsched_getqstats(ifq, gid, qid, ifqs);
585 IFCQ_UNLOCK(ifq);
586
587 if (err == 0 && (err = copyout(ifqs,
588 (user_addr_t)(uintptr_t)ubuf, sizeof(*ifqs))) == 0) {
589 *nbytes = sizeof(*ifqs);
590 }
591
592 kfree_type(struct if_ifclassq_stats, ifqs);
593
594 return err;
595 }
596
597 const char *__null_terminated
ifclassq_ev2str(cqev_t ev)598 ifclassq_ev2str(cqev_t ev)
599 {
600 const char *__null_terminated c = "";
601
602 switch (ev) {
603 case CLASSQ_EV_LINK_BANDWIDTH:
604 c = "LINK_BANDWIDTH";
605 break;
606
607 case CLASSQ_EV_LINK_LATENCY:
608 c = "LINK_LATENCY";
609 break;
610
611 case CLASSQ_EV_LINK_MTU:
612 c = "LINK_MTU";
613 break;
614
615 case CLASSQ_EV_LINK_UP:
616 c = "LINK_UP";
617 break;
618
619 case CLASSQ_EV_LINK_DOWN:
620 c = "LINK_DOWN";
621 break;
622
623 default:
624 c = "UNKNOWN";
625 break;
626 }
627
628 return c;
629 }
630
631 /*
632 * internal representation of token bucket parameters
633 * rate: byte_per_unittime << 32
634 * (((bits_per_sec) / 8) << 32) / machclk_freq
635 * depth: byte << 32
636 *
637 */
638 #define TBR_SHIFT 32
639 #define TBR_SCALE(x) ((int64_t)(x) << TBR_SHIFT)
640 #define TBR_UNSCALE(x) ((x) >> TBR_SHIFT)
641
642 void
ifclassq_tbr_dequeue(struct ifclassq * ifq,classq_pkt_t * pkt,u_int8_t grp_idx)643 ifclassq_tbr_dequeue(struct ifclassq *ifq, classq_pkt_t *pkt, u_int8_t grp_idx)
644 {
645 ifclassq_tbr_dequeue_common(ifq, MBUF_SC_UNSPEC, FALSE, pkt, grp_idx);
646 }
647
648 void
ifclassq_tbr_dequeue_sc(struct ifclassq * ifq,mbuf_svc_class_t sc,classq_pkt_t * pkt,u_int8_t grp_idx)649 ifclassq_tbr_dequeue_sc(struct ifclassq *ifq, mbuf_svc_class_t sc,
650 classq_pkt_t *pkt, u_int8_t grp_idx)
651 {
652 ifclassq_tbr_dequeue_common(ifq, sc, TRUE, pkt, grp_idx);
653 }
654
655 static void
ifclassq_tbr_dequeue_common(struct ifclassq * ifq,mbuf_svc_class_t sc,boolean_t drvmgt,classq_pkt_t * pkt,u_int8_t grp_idx)656 ifclassq_tbr_dequeue_common(struct ifclassq *ifq, mbuf_svc_class_t sc,
657 boolean_t drvmgt, classq_pkt_t *pkt, u_int8_t grp_idx)
658 {
659 struct tb_regulator *tbr;
660 int64_t interval;
661 u_int64_t now;
662
663 IFCQ_LOCK_ASSERT_HELD(ifq);
664
665 VERIFY(!drvmgt || MBUF_VALID_SC(sc));
666 VERIFY(IFCQ_TBR_IS_ENABLED(ifq));
667
668 *pkt = CLASSQ_PKT_INITIALIZER(*pkt);
669 tbr = &ifq->ifcq_tbr;
670 /* update token only when it is negative */
671 if (tbr->tbr_token <= 0) {
672 now = read_machclk();
673 interval = now - tbr->tbr_last;
674 if (interval >= tbr->tbr_filluptime) {
675 tbr->tbr_token = tbr->tbr_depth;
676 } else {
677 tbr->tbr_token += interval * tbr->tbr_rate;
678 if (tbr->tbr_token > tbr->tbr_depth) {
679 tbr->tbr_token = tbr->tbr_depth;
680 }
681 }
682 tbr->tbr_last = now;
683 }
684 /* if token is still negative, don't allow dequeue */
685 if (tbr->tbr_token <= 0) {
686 return;
687 }
688
689 /*
690 * ifclassq takes precedence over ALTQ queue;
691 * ifcq_drain count is adjusted by the caller.
692 */
693 if (drvmgt) {
694 ifq->ifcq_ops->ps_deq_sc(ifq, sc, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, pkt, NULL, NULL, NULL, grp_idx);
695 } else {
696 ifq->ifcq_ops->ps_deq(ifq, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, pkt, NULL, NULL, NULL, grp_idx);
697 }
698
699 if (pkt->cp_mbuf != NULL) {
700 switch (pkt->cp_ptype) {
701 case QP_MBUF:
702 tbr->tbr_token -= TBR_SCALE(m_pktlen(pkt->cp_mbuf));
703 break;
704
705 #if SKYWALK
706 case QP_PACKET:
707 tbr->tbr_token -=
708 TBR_SCALE(pkt->cp_kpkt->pkt_length);
709 break;
710 #endif /* SKYWALK */
711
712 default:
713 VERIFY(0);
714 /* NOTREACHED */
715 }
716 }
717 }
718
719 /*
720 * set a token bucket regulator.
721 * if the specified rate is zero, the token bucket regulator is deleted.
722 */
723 static int
ifclassq_tbr_set_locked(struct ifclassq * ifq,struct tb_profile * profile,boolean_t update)724 ifclassq_tbr_set_locked(struct ifclassq *ifq, struct tb_profile *profile,
725 boolean_t update)
726 {
727 struct tb_regulator *tbr;
728 struct ifnet *ifp = ifq->ifcq_ifp;
729 u_int64_t rate, old_rate;
730 uint8_t ev = CLASSQ_EV_LINK_BANDWIDTH;
731
732 IFCQ_LOCK_ASSERT_HELD(ifq);
733 VERIFY(IFCQ_IS_READY(ifq));
734
735 VERIFY(machclk_freq != 0);
736
737 tbr = &ifq->ifcq_tbr;
738 old_rate = tbr->tbr_rate_raw;
739
740 rate = profile->rate;
741 if (profile->percent > 0) {
742 u_int64_t eff_rate;
743
744 if (profile->percent > 100) {
745 return EINVAL;
746 }
747 if ((eff_rate = ifp->if_output_bw.eff_bw) == 0) {
748 return ENODEV;
749 }
750 rate = (eff_rate * profile->percent) / 100;
751 }
752
753 if (rate == 0) {
754 if (!IFCQ_TBR_IS_ENABLED(ifq)) {
755 return 0;
756 }
757
758 if (pktsched_verbose) {
759 printf("%s: TBR disabled\n", if_name(ifp));
760 }
761
762 /* disable this TBR */
763 ifq->ifcq_flags &= ~IFCQF_TBR;
764 bzero(tbr, sizeof(*tbr));
765 ifnet_set_start_cycle(ifp, NULL);
766 if (update) {
767 ifclassq_request(ifq, CLASSQRQ_EVENT, (void*)&ev, true);
768 }
769 return 0;
770 }
771
772 if (pktsched_verbose) {
773 printf("%s: TBR %s (rate %llu bps depth %u)\n", if_name(ifp),
774 (ifq->ifcq_flags & IFCQF_TBR) ? "reconfigured" :
775 "enabled", rate, profile->depth);
776 }
777
778 /* set the new TBR */
779 bzero(tbr, sizeof(*tbr));
780 tbr->tbr_rate_raw = rate;
781 tbr->tbr_percent = profile->percent;
782 ifq->ifcq_flags |= IFCQF_TBR;
783
784 /*
785 * Note that the TBR fill up time (hence the ifnet restart time)
786 * is directly related to the specified TBR depth. The ideal
787 * depth value should be computed such that the interval time
788 * between each successive wakeup is adequately spaced apart,
789 * in order to reduce scheduling overheads. A target interval
790 * of 10 ms seems to provide good performance balance. This can be
791 * overridden by specifying the depth profile. Values smaller than
792 * the ideal depth will reduce delay at the expense of CPU cycles.
793 */
794 tbr->tbr_rate = TBR_SCALE(rate / 8) / machclk_freq;
795 if (tbr->tbr_rate > 0) {
796 u_int32_t mtu = ifp->if_mtu;
797 int64_t ival, idepth = 0;
798 int i;
799
800 if (mtu < IF_MINMTU) {
801 mtu = IF_MINMTU;
802 }
803
804 ival = pktsched_nsecs_to_abstime(10 * NSEC_PER_MSEC); /* 10ms */
805
806 for (i = 1;; i++) {
807 idepth = TBR_SCALE(i * mtu);
808 if ((idepth / tbr->tbr_rate) > ival) {
809 break;
810 }
811 }
812 VERIFY(idepth > 0);
813
814 tbr->tbr_depth = TBR_SCALE(profile->depth);
815 if (tbr->tbr_depth == 0) {
816 tbr->tbr_filluptime = idepth / tbr->tbr_rate;
817 /* a little fudge factor to get closer to rate */
818 tbr->tbr_depth = idepth + (idepth >> 3);
819 } else {
820 tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate;
821 }
822 } else {
823 tbr->tbr_depth = TBR_SCALE(profile->depth);
824 tbr->tbr_filluptime = 0xffffffffffffffffLL;
825 }
826 tbr->tbr_token = tbr->tbr_depth;
827 tbr->tbr_last = read_machclk();
828
829 if (tbr->tbr_rate > 0 && (ifp->if_flags & IFF_UP)) {
830 struct timespec ts =
831 { 0, (long)pktsched_abs_to_nsecs(tbr->tbr_filluptime) };
832 if (pktsched_verbose) {
833 printf("%s: TBR calculated tokens %lld "
834 "filluptime %llu ns\n", if_name(ifp),
835 TBR_UNSCALE(tbr->tbr_token),
836 pktsched_abs_to_nsecs(tbr->tbr_filluptime));
837 }
838 ifnet_set_start_cycle(ifp, &ts);
839 } else {
840 if (pktsched_verbose) {
841 if (tbr->tbr_rate == 0) {
842 printf("%s: TBR calculated tokens %lld "
843 "infinite filluptime\n", if_name(ifp),
844 TBR_UNSCALE(tbr->tbr_token));
845 } else if (!(ifp->if_flags & IFF_UP)) {
846 printf("%s: TBR suspended (link is down)\n",
847 if_name(ifp));
848 }
849 }
850 ifnet_set_start_cycle(ifp, NULL);
851 }
852 if (update && tbr->tbr_rate_raw != old_rate) {
853 ifclassq_request(ifq, CLASSQRQ_EVENT, (void*)&ev, true);
854 }
855
856 return 0;
857 }
858
859 int
ifclassq_tbr_set(struct ifclassq * ifq,struct tb_profile * profile,boolean_t update)860 ifclassq_tbr_set(struct ifclassq *ifq, struct tb_profile *profile,
861 boolean_t update)
862 {
863 int error = 0;
864
865 IFCQ_LOCK(ifq);
866 if (!IFCQ_IS_READY(ifq)) {
867 error = ENXIO;
868 goto out;
869 }
870
871 error = ifclassq_tbr_set_locked(ifq, profile, update);
872
873 out:
874 IFCQ_UNLOCK(ifq);
875 return error;
876 }
877
878 struct ifclassq *
ifclassq_alloc(void)879 ifclassq_alloc(void)
880 {
881 struct ifclassq *ifcq;
882
883 ifcq = zalloc_flags(ifcq_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
884 os_ref_init(&ifcq->ifcq_refcnt, NULL);
885 lck_mtx_init(&ifcq->ifcq_lock, &ifcq_lock_group, &ifcq_lock_attr);
886 ifcq->ifcq_ops = pktsched_ops_find(PKTSCHEDT_NONE);
887 VERIFY(ifcq->ifcq_ops != NULL);
888 os_log(OS_LOG_DEFAULT, "ifclassq instance %p created", ifcq);
889 return ifcq;
890 }
891
892 void
ifclassq_retain(struct ifclassq * ifcq)893 ifclassq_retain(struct ifclassq *ifcq)
894 {
895 os_ref_retain(&ifcq->ifcq_refcnt);
896 }
897
898 void
ifclassq_release(struct ifclassq ** pifcq)899 ifclassq_release(struct ifclassq **pifcq)
900 {
901 struct ifclassq *__single ifcq = *pifcq;
902
903 *pifcq = NULL;
904 if (os_ref_release(&ifcq->ifcq_refcnt) == 0) {
905 ifclassq_teardown(ifcq);
906 os_log(OS_LOG_DEFAULT, "ifclassq instance %p freed", ifcq);
907 zfree(ifcq_zone, ifcq);
908 }
909 }
910
911 int
ifclassq_setup_group(struct ifclassq * ifcq,uint8_t grp_idx,uint8_t flags)912 ifclassq_setup_group(struct ifclassq *ifcq, uint8_t grp_idx, uint8_t flags)
913 {
914 int err;
915
916 IFCQ_LOCK(ifcq);
917 VERIFY(ifcq->ifcq_disc != NULL);
918 VERIFY(ifcq->ifcq_type == PKTSCHEDT_FQ_CODEL || ifcq->ifcq_type == PKTSCHEDT_FQ_CODEL_NEW);
919
920 err = fq_if_create_grp(ifcq, grp_idx, flags);
921 IFCQ_UNLOCK(ifcq);
922
923 return err;
924 }
925
926 int
ifclassq_request(struct ifclassq * ifcq,enum cqrq rq,void * arg,bool locked)927 ifclassq_request(struct ifclassq * ifcq, enum cqrq rq, void *arg, bool locked)
928 {
929 int err = 0;
930
931 if (!locked) {
932 IFCQ_LOCK(ifcq);
933 }
934 IFCQ_LOCK_ASSERT_HELD(ifcq);
935
936 if (!IFCQ_IS_ENABLED(ifcq)) {
937 err = ENXIO;
938 goto out;
939 }
940
941 err = ifcq->ifcq_ops->ps_req(ifcq, rq, arg);
942
943 out:
944 if (!locked) {
945 IFCQ_UNLOCK(ifcq);
946 }
947 return err;
948 }
949
950 void
ifclassq_tbr_get(struct ifclassq * ifcq,u_int32_t * sched_type,u_int64_t * tbr_bw,u_int64_t * tbr_pct)951 ifclassq_tbr_get(struct ifclassq *ifcq, u_int32_t *sched_type, u_int64_t *tbr_bw,
952 u_int64_t *tbr_pct)
953 {
954 IFCQ_LOCK(ifcq);
955
956 *sched_type = ifcq->ifcq_type;
957 if (IFCQ_TBR_IS_ENABLED(ifcq)) {
958 *tbr_bw = ifcq->ifcq_tbr.tbr_rate_raw;
959 *tbr_pct = ifcq->ifcq_tbr.tbr_percent;
960 }
961
962 IFCQ_UNLOCK(ifcq);
963 }
964
965 #if (DEBUG || DEVELOPMENT)
966 static int
967 ifclassq_configure_sysctl SYSCTL_HANDLER_ARGS
968 {
969 #pragma unused(oidp, arg2)
970 struct ifclassq *__single ifcq = arg1;
971 struct ifnet *ifp = ifcq->ifcq_ifp;
972 uint32_t new_model;
973 int changed;
974 int error;
975
976 if (ifp == NULL || !IFCQ_IS_ENABLED(ifcq)) {
977 return ENXIO;
978 }
979
980 error = sysctl_io_number(req, ifp->if_output_sched_model,
981 sizeof(ifp->if_output_sched_model), &new_model, &changed);
982 if (error == 0 && changed != 0) {
983 error = ifclassq_change(ifcq, new_model);
984 }
985 return error;
986 }
987 #endif /* (DEBUG || DEVELOPMENT) */
988