1 /*
2 * Copyright (c) 2011-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/cdefs.h>
30 #include <sys/param.h>
31 #include <sys/mbuf.h>
32 #include <sys/errno.h>
33 #include <sys/random.h>
34 #include <sys/kernel_types.h>
35 #include <sys/sysctl.h>
36
37 #include <kern/zalloc.h>
38
39 #include <net/if.h>
40 #include <net/net_osdep.h>
41 #include <net/classq/classq.h>
42 #include <pexpert/pexpert.h>
43 #include <net/classq/classq_sfb.h>
44 #include <net/classq/classq_fq_codel.h>
45 #include <net/pktsched/pktsched.h>
46 #include <net/pktsched/pktsched_fq_codel.h>
47 #include <net/flowadv.h>
48
49 #include <libkern/libkern.h>
50
51 #if SKYWALK
52 #include <skywalk/os_skywalk_private.h>
53 #endif /* SKYWALK */
54
55 static errno_t ifclassq_dequeue_common(struct ifclassq *, mbuf_svc_class_t,
56 u_int32_t, u_int32_t, classq_pkt_t *, classq_pkt_t *, u_int32_t *,
57 u_int32_t *, boolean_t);
58 static void ifclassq_tbr_dequeue_common(struct ifclassq *, mbuf_svc_class_t,
59 boolean_t, classq_pkt_t *);
60
61 static u_int64_t ifclassq_target_qdelay = 0;
62 SYSCTL_QUAD(_net_classq, OID_AUTO, target_qdelay, CTLFLAG_RW | CTLFLAG_LOCKED,
63 &ifclassq_target_qdelay, "target queue delay in nanoseconds");
64
65 static u_int64_t ifclassq_update_interval = 0;
66 SYSCTL_QUAD(_net_classq, OID_AUTO, update_interval,
67 CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_update_interval,
68 "update interval in nanoseconds");
69
70 #if DEBUG || DEVELOPMENT
71 uint32_t ifclassq_flow_control_adv = 1; /* flow control advisory */
72 SYSCTL_UINT(_net_classq, OID_AUTO, flow_control_adv,
73 CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_flow_control_adv, 1,
74 "enable/disable flow control advisory");
75
76 uint16_t fq_codel_quantum = 0;
77 #endif /* DEBUG || DEVELOPMENT */
78
79 static struct zone *ifcq_zone; /* zone for ifclassq */
80 #define IFCQ_ZONE_NAME "ifclassq" /* zone name */
81 LCK_ATTR_DECLARE(ifcq_lock_attr, 0, 0);
82 static LCK_GRP_DECLARE(ifcq_lock_group, "ifclassq locks");
83
84 void
classq_init(void)85 classq_init(void)
86 {
87 _CASSERT(MBUF_TC_BE == 0);
88 _CASSERT(MBUF_SC_BE == 0);
89 _CASSERT(IFCQ_SC_MAX == MBUF_SC_MAX_CLASSES);
90 #if DEBUG || DEVELOPMENT
91 PE_parse_boot_argn("fq_codel_quantum", &fq_codel_quantum,
92 sizeof(fq_codel_quantum));
93 PE_parse_boot_argn("ifclassq_target_qdelay", &ifclassq_target_qdelay,
94 sizeof(ifclassq_target_qdelay));
95 PE_parse_boot_argn("ifclassq_update_interval",
96 &ifclassq_update_interval, sizeof(ifclassq_update_interval));
97 #endif /* DEBUG || DEVELOPMENT */
98 ifcq_zone = zone_create(IFCQ_ZONE_NAME, sizeof(struct ifclassq),
99 ZC_ZFREE_CLEARMEM);
100 fq_codel_init();
101 }
102
103 int
ifclassq_setup(struct ifclassq * ifq,struct ifnet * ifp,uint32_t sflags)104 ifclassq_setup(struct ifclassq *ifq, struct ifnet *ifp, uint32_t sflags)
105 {
106 int err = 0;
107
108 IFCQ_LOCK(ifq);
109 VERIFY(IFCQ_IS_EMPTY(ifq));
110 ifq->ifcq_ifp = ifp;
111 IFCQ_LEN(ifq) = 0;
112 IFCQ_BYTES(ifq) = 0;
113 bzero(&ifq->ifcq_xmitcnt, sizeof(ifq->ifcq_xmitcnt));
114 bzero(&ifq->ifcq_dropcnt, sizeof(ifq->ifcq_dropcnt));
115
116 VERIFY(!IFCQ_TBR_IS_ENABLED(ifq));
117 VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
118 VERIFY(ifq->ifcq_flags == 0);
119 VERIFY(ifq->ifcq_sflags == 0);
120 VERIFY(ifq->ifcq_disc == NULL);
121
122 if (ifp->if_eflags & IFEF_TXSTART) {
123 u_int32_t maxlen = 0;
124
125 if ((maxlen = IFCQ_MAXLEN(ifq)) == 0) {
126 maxlen = if_sndq_maxlen;
127 }
128 IFCQ_SET_MAXLEN(ifq, maxlen);
129
130 if (IFCQ_MAXLEN(ifq) != if_sndq_maxlen &&
131 IFCQ_TARGET_QDELAY(ifq) == 0) {
132 /*
133 * Choose static queues because the interface has
134 * maximum queue size set
135 */
136 sflags &= ~PKTSCHEDF_QALG_DELAYBASED;
137 }
138 ifq->ifcq_sflags = sflags;
139 err = ifclassq_pktsched_setup(ifq);
140 if (err == 0) {
141 ifq->ifcq_flags = (IFCQF_READY | IFCQF_ENABLED);
142 }
143 }
144 IFCQ_UNLOCK(ifq);
145 return err;
146 }
147
148 void
ifclassq_teardown(struct ifclassq * ifq)149 ifclassq_teardown(struct ifclassq *ifq)
150 {
151 IFCQ_LOCK(ifq);
152 if (IFCQ_IS_DESTROYED(ifq)) {
153 ASSERT((ifq->ifcq_flags & ~IFCQF_DESTROYED) == 0);
154 goto done;
155 }
156 if (IFCQ_IS_READY(ifq)) {
157 if (IFCQ_TBR_IS_ENABLED(ifq)) {
158 struct tb_profile tb =
159 { .rate = 0, .percent = 0, .depth = 0 };
160 (void) ifclassq_tbr_set(ifq, &tb, FALSE);
161 }
162 pktsched_teardown(ifq);
163 ifq->ifcq_flags &= ~IFCQF_READY;
164 }
165 ifq->ifcq_sflags = 0;
166 VERIFY(IFCQ_IS_EMPTY(ifq));
167 VERIFY(!IFCQ_TBR_IS_ENABLED(ifq));
168 VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
169 VERIFY(ifq->ifcq_flags == 0);
170 VERIFY(ifq->ifcq_sflags == 0);
171 VERIFY(ifq->ifcq_disc == NULL);
172 IFCQ_LEN(ifq) = 0;
173 IFCQ_BYTES(ifq) = 0;
174 IFCQ_MAXLEN(ifq) = 0;
175 bzero(&ifq->ifcq_xmitcnt, sizeof(ifq->ifcq_xmitcnt));
176 bzero(&ifq->ifcq_dropcnt, sizeof(ifq->ifcq_dropcnt));
177 ifq->ifcq_flags |= IFCQF_DESTROYED;
178 done:
179 IFCQ_UNLOCK(ifq);
180 }
181
182 int
ifclassq_pktsched_setup(struct ifclassq * ifq)183 ifclassq_pktsched_setup(struct ifclassq *ifq)
184 {
185 struct ifnet *ifp = ifq->ifcq_ifp;
186 classq_pkt_type_t ptype = QP_MBUF;
187 int err = 0;
188
189 IFCQ_LOCK_ASSERT_HELD(ifq);
190 VERIFY(ifp->if_eflags & IFEF_TXSTART);
191 #if SKYWALK
192 ptype = ((ifp->if_eflags & IFEF_SKYWALK_NATIVE) != 0) ? QP_PACKET :
193 QP_MBUF;
194 #endif /* SKYWALK */
195
196 err = pktsched_setup(ifq, PKTSCHEDT_FQ_CODEL, ifq->ifcq_sflags, ptype);
197
198 return err;
199 }
200
201 void
ifclassq_set_maxlen(struct ifclassq * ifq,u_int32_t maxqlen)202 ifclassq_set_maxlen(struct ifclassq *ifq, u_int32_t maxqlen)
203 {
204 IFCQ_LOCK(ifq);
205 if (maxqlen == 0) {
206 maxqlen = if_sndq_maxlen;
207 }
208 IFCQ_SET_MAXLEN(ifq, maxqlen);
209 IFCQ_UNLOCK(ifq);
210 }
211
212 u_int32_t
ifclassq_get_maxlen(struct ifclassq * ifq)213 ifclassq_get_maxlen(struct ifclassq *ifq)
214 {
215 return IFCQ_MAXLEN(ifq);
216 }
217
218 int
ifclassq_get_len(struct ifclassq * ifq,mbuf_svc_class_t sc,u_int32_t * packets,u_int32_t * bytes)219 ifclassq_get_len(struct ifclassq *ifq, mbuf_svc_class_t sc, u_int32_t *packets,
220 u_int32_t *bytes)
221 {
222 int err = 0;
223
224 IFCQ_LOCK(ifq);
225 if (sc == MBUF_SC_UNSPEC) {
226 VERIFY(packets != NULL);
227 *packets = IFCQ_LEN(ifq);
228 } else {
229 cqrq_stat_sc_t req = { sc, 0, 0 };
230
231 VERIFY(MBUF_VALID_SC(sc));
232 VERIFY(packets != NULL && bytes != NULL);
233
234 err = fq_if_request_classq(ifq, CLASSQRQ_STAT_SC, &req);
235 if (packets != NULL) {
236 *packets = req.packets;
237 }
238 if (bytes != NULL) {
239 *bytes = req.bytes;
240 }
241 }
242 IFCQ_UNLOCK(ifq);
243
244 #if SKYWALK
245 struct ifnet *ifp = ifq->ifcq_ifp;
246
247 if (__improbable(ifp->if_na_ops != NULL &&
248 ifp->if_na_ops->ni_get_len != NULL)) {
249 err = ifp->if_na_ops->ni_get_len(ifp->if_na, sc, packets,
250 bytes, err);
251 }
252 #endif /* SKYWALK */
253
254 return err;
255 }
256
257 inline void
ifclassq_set_packet_metadata(struct ifclassq * ifq,struct ifnet * ifp,classq_pkt_t * p)258 ifclassq_set_packet_metadata(struct ifclassq *ifq, struct ifnet *ifp,
259 classq_pkt_t *p)
260 {
261 if (!IFNET_IS_CELLULAR(ifp)) {
262 return;
263 }
264
265 switch (p->cp_ptype) {
266 case QP_MBUF: {
267 struct mbuf *m = p->cp_mbuf;
268 m->m_pkthdr.pkt_flags |= PKTF_VALID_UNSENT_DATA;
269 m->m_pkthdr.bufstatus_if = IFCQ_BYTES(ifq);
270 m->m_pkthdr.bufstatus_sndbuf = (uint32_t)ifp->if_sndbyte_unsent;
271 break;
272 }
273
274 #if SKYWALK
275 case QP_PACKET:
276 /*
277 * Support for equivalent of mbuf_get_unsent_data_bytes()
278 * is not needed in the Skywalk architecture.
279 */
280 break;
281 #endif /* SKYWALK */
282
283 default:
284 VERIFY(0);
285 /* NOTREACHED */
286 __builtin_unreachable();
287 }
288 }
289
290 errno_t
ifclassq_enqueue(struct ifclassq * ifq,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t cnt,u_int32_t bytes,boolean_t * pdrop)291 ifclassq_enqueue(struct ifclassq *ifq, classq_pkt_t *head, classq_pkt_t *tail,
292 u_int32_t cnt, u_int32_t bytes, boolean_t *pdrop)
293 {
294 return fq_if_enqueue_classq(ifq, head, tail, cnt, bytes, pdrop);
295 }
296
297 errno_t
ifclassq_dequeue(struct ifclassq * ifq,u_int32_t pkt_limit,u_int32_t byte_limit,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t * cnt,u_int32_t * len)298 ifclassq_dequeue(struct ifclassq *ifq, u_int32_t pkt_limit,
299 u_int32_t byte_limit, classq_pkt_t *head, classq_pkt_t *tail,
300 u_int32_t *cnt, u_int32_t *len)
301 {
302 return ifclassq_dequeue_common(ifq, MBUF_SC_UNSPEC, pkt_limit,
303 byte_limit, head, tail, cnt, len, FALSE);
304 }
305
306 errno_t
ifclassq_dequeue_sc(struct ifclassq * ifq,mbuf_svc_class_t sc,u_int32_t pkt_limit,u_int32_t byte_limit,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t * cnt,u_int32_t * len)307 ifclassq_dequeue_sc(struct ifclassq *ifq, mbuf_svc_class_t sc,
308 u_int32_t pkt_limit, u_int32_t byte_limit, classq_pkt_t *head,
309 classq_pkt_t *tail, u_int32_t *cnt, u_int32_t *len)
310 {
311 return ifclassq_dequeue_common(ifq, sc, pkt_limit, byte_limit,
312 head, tail, cnt, len, TRUE);
313 }
314
315 static errno_t
ifclassq_dequeue_common_default(struct ifclassq * ifq,mbuf_svc_class_t sc,u_int32_t pkt_limit,u_int32_t byte_limit,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t * cnt,u_int32_t * len,boolean_t drvmgt)316 ifclassq_dequeue_common_default(struct ifclassq *ifq, mbuf_svc_class_t sc,
317 u_int32_t pkt_limit, u_int32_t byte_limit, classq_pkt_t *head,
318 classq_pkt_t *tail, u_int32_t *cnt, u_int32_t *len, boolean_t drvmgt)
319 {
320 struct ifnet *ifp = ifq->ifcq_ifp;
321 u_int32_t i = 0, l = 0;
322 classq_pkt_t first = CLASSQ_PKT_INITIALIZER(first);
323 classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
324
325 VERIFY(!drvmgt || MBUF_VALID_SC(sc));
326
327 if (IFCQ_TBR_IS_ENABLED(ifq)) {
328 goto dequeue_loop;
329 }
330
331 /*
332 * If the scheduler support dequeueing multiple packets at the
333 * same time, call that one instead.
334 */
335 if (drvmgt) {
336 int err;
337
338 IFCQ_LOCK_SPIN(ifq);
339 err = fq_if_dequeue_sc_classq_multi(ifq, sc, pkt_limit,
340 byte_limit, head, tail, cnt, len);
341 IFCQ_UNLOCK(ifq);
342
343 if (err == 0 && head->cp_mbuf == NULL) {
344 err = EAGAIN;
345 }
346 return err;
347 } else {
348 int err;
349
350 IFCQ_LOCK_SPIN(ifq);
351 err = fq_if_dequeue_classq_multi(ifq, pkt_limit, byte_limit,
352 head, tail, cnt, len);
353 IFCQ_UNLOCK(ifq);
354
355 if (err == 0 && head->cp_mbuf == NULL) {
356 err = EAGAIN;
357 }
358 return err;
359 }
360
361 dequeue_loop:
362
363 IFCQ_LOCK_SPIN(ifq);
364
365 while (i < pkt_limit && l < byte_limit) {
366 if (drvmgt) {
367 if (IFCQ_TBR_IS_ENABLED(ifq)) {
368 IFCQ_TBR_DEQUEUE_SC(ifq, sc, head);
369 } else {
370 fq_if_dequeue_sc_classq(ifq, sc, head);
371 }
372 } else {
373 if (IFCQ_TBR_IS_ENABLED(ifq)) {
374 IFCQ_TBR_DEQUEUE(ifq, head);
375 } else {
376 fq_if_dequeue_classq(ifq, head);
377 }
378 }
379
380 if (head->cp_mbuf == NULL) {
381 break;
382 }
383
384 if (first.cp_mbuf == NULL) {
385 first = *head;
386 }
387
388 switch (head->cp_ptype) {
389 case QP_MBUF:
390 head->cp_mbuf->m_nextpkt = NULL;
391 l += head->cp_mbuf->m_pkthdr.len;
392 ifclassq_set_packet_metadata(ifq, ifp, head);
393 if (last.cp_mbuf != NULL) {
394 last.cp_mbuf->m_nextpkt = head->cp_mbuf;
395 }
396 break;
397
398 #if SKYWALK
399 case QP_PACKET:
400 head->cp_kpkt->pkt_nextpkt = NULL;
401 l += head->cp_kpkt->pkt_length;
402 ifclassq_set_packet_metadata(ifq, ifp, head);
403 if (last.cp_kpkt != NULL) {
404 last.cp_kpkt->pkt_nextpkt = head->cp_kpkt;
405 }
406 break;
407 #endif /* SKYWALK */
408
409 default:
410 VERIFY(0);
411 /* NOTREACHED */
412 __builtin_unreachable();
413 }
414
415 last = *head;
416 i++;
417 }
418
419 IFCQ_UNLOCK(ifq);
420
421 if (tail != NULL) {
422 *tail = last;
423 }
424 if (cnt != NULL) {
425 *cnt = i;
426 }
427 if (len != NULL) {
428 *len = l;
429 }
430
431 *head = first;
432 return (first.cp_mbuf != NULL) ? 0 : EAGAIN;
433 }
434
435 static errno_t
ifclassq_dequeue_common(struct ifclassq * ifq,mbuf_svc_class_t sc,u_int32_t pkt_limit,u_int32_t byte_limit,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t * cnt,u_int32_t * len,boolean_t drvmgt)436 ifclassq_dequeue_common(struct ifclassq *ifq, mbuf_svc_class_t sc,
437 u_int32_t pkt_limit, u_int32_t byte_limit, classq_pkt_t *head,
438 classq_pkt_t *tail, u_int32_t *cnt, u_int32_t *len, boolean_t drvmgt)
439 {
440 #if SKYWALK
441 struct ifnet *ifp = ifq->ifcq_ifp;
442
443 if (__improbable(ifp->if_na_ops != NULL &&
444 ifp->if_na_ops->ni_dequeue != NULL)) {
445 /*
446 * TODO:
447 * We should be changing the pkt/byte limit to the
448 * available space in the next filter. But this is not
449 * useful until we can flow control the whole chain of
450 * filters.
451 */
452 errno_t err = ifclassq_dequeue_common_default(ifq, sc,
453 pkt_limit, byte_limit, head, tail, cnt, len, drvmgt);
454
455 return ifp->if_na_ops->ni_dequeue(ifp->if_na, sc, pkt_limit,
456 byte_limit, head, tail, cnt, len, drvmgt, err);
457 }
458 #endif /* SKYWALK */
459 return ifclassq_dequeue_common_default(ifq, sc,
460 pkt_limit, byte_limit, head, tail, cnt, len, drvmgt);
461 }
462
463 void
ifclassq_update(struct ifclassq * ifq,cqev_t ev)464 ifclassq_update(struct ifclassq *ifq, cqev_t ev)
465 {
466 IFCQ_LOCK_ASSERT_HELD(ifq);
467 VERIFY(IFCQ_IS_READY(ifq));
468 fq_if_request_classq(ifq, CLASSQRQ_EVENT, (void *)ev);
469 }
470
471 int
ifclassq_attach(struct ifclassq * ifq,u_int32_t type,void * discipline)472 ifclassq_attach(struct ifclassq *ifq, u_int32_t type, void *discipline)
473 {
474 IFCQ_LOCK_ASSERT_HELD(ifq);
475 VERIFY(ifq->ifcq_disc == NULL);
476 ifq->ifcq_type = type;
477 ifq->ifcq_disc = discipline;
478 return 0;
479 }
480
481 void
ifclassq_detach(struct ifclassq * ifq)482 ifclassq_detach(struct ifclassq *ifq)
483 {
484 IFCQ_LOCK_ASSERT_HELD(ifq);
485 VERIFY(ifq->ifcq_disc == NULL);
486 ifq->ifcq_type = PKTSCHEDT_NONE;
487 }
488
489 int
ifclassq_getqstats(struct ifclassq * ifq,u_int32_t qid,void * ubuf,u_int32_t * nbytes)490 ifclassq_getqstats(struct ifclassq *ifq, u_int32_t qid, void *ubuf,
491 u_int32_t *nbytes)
492 {
493 struct if_ifclassq_stats *ifqs;
494 int err;
495
496 if (*nbytes < sizeof(*ifqs)) {
497 return EINVAL;
498 }
499
500 ifqs = kalloc_type(struct if_ifclassq_stats,
501 Z_WAITOK | Z_ZERO | Z_NOFAIL);
502
503 IFCQ_LOCK(ifq);
504 if (!IFCQ_IS_READY(ifq)) {
505 IFCQ_UNLOCK(ifq);
506 kfree_type(struct if_ifclassq_stats, ifqs);
507 return ENXIO;
508 }
509
510 ifqs->ifqs_len = IFCQ_LEN(ifq);
511 ifqs->ifqs_maxlen = IFCQ_MAXLEN(ifq);
512 *(&ifqs->ifqs_xmitcnt) = *(&ifq->ifcq_xmitcnt);
513 *(&ifqs->ifqs_dropcnt) = *(&ifq->ifcq_dropcnt);
514 ifqs->ifqs_scheduler = ifq->ifcq_type;
515
516 err = pktsched_getqstats(ifq, qid, ifqs);
517 IFCQ_UNLOCK(ifq);
518
519 if (err == 0 && (err = copyout((caddr_t)ifqs,
520 (user_addr_t)(uintptr_t)ubuf, sizeof(*ifqs))) == 0) {
521 *nbytes = sizeof(*ifqs);
522 }
523
524 kfree_type(struct if_ifclassq_stats, ifqs);
525
526 return err;
527 }
528
529 const char *
ifclassq_ev2str(cqev_t ev)530 ifclassq_ev2str(cqev_t ev)
531 {
532 const char *c;
533
534 switch (ev) {
535 case CLASSQ_EV_LINK_BANDWIDTH:
536 c = "LINK_BANDWIDTH";
537 break;
538
539 case CLASSQ_EV_LINK_LATENCY:
540 c = "LINK_LATENCY";
541 break;
542
543 case CLASSQ_EV_LINK_MTU:
544 c = "LINK_MTU";
545 break;
546
547 case CLASSQ_EV_LINK_UP:
548 c = "LINK_UP";
549 break;
550
551 case CLASSQ_EV_LINK_DOWN:
552 c = "LINK_DOWN";
553 break;
554
555 default:
556 c = "UNKNOWN";
557 break;
558 }
559
560 return c;
561 }
562
563 /*
564 * internal representation of token bucket parameters
565 * rate: byte_per_unittime << 32
566 * (((bits_per_sec) / 8) << 32) / machclk_freq
567 * depth: byte << 32
568 *
569 */
570 #define TBR_SHIFT 32
571 #define TBR_SCALE(x) ((int64_t)(x) << TBR_SHIFT)
572 #define TBR_UNSCALE(x) ((x) >> TBR_SHIFT)
573
574 void
ifclassq_tbr_dequeue(struct ifclassq * ifq,classq_pkt_t * pkt)575 ifclassq_tbr_dequeue(struct ifclassq *ifq, classq_pkt_t *pkt)
576 {
577 ifclassq_tbr_dequeue_common(ifq, MBUF_SC_UNSPEC, FALSE, pkt);
578 }
579
580 void
ifclassq_tbr_dequeue_sc(struct ifclassq * ifq,mbuf_svc_class_t sc,classq_pkt_t * pkt)581 ifclassq_tbr_dequeue_sc(struct ifclassq *ifq, mbuf_svc_class_t sc,
582 classq_pkt_t *pkt)
583 {
584 ifclassq_tbr_dequeue_common(ifq, sc, TRUE, pkt);
585 }
586
587 static void
ifclassq_tbr_dequeue_common(struct ifclassq * ifq,mbuf_svc_class_t sc,boolean_t drvmgt,classq_pkt_t * pkt)588 ifclassq_tbr_dequeue_common(struct ifclassq *ifq, mbuf_svc_class_t sc,
589 boolean_t drvmgt, classq_pkt_t *pkt)
590 {
591 struct tb_regulator *tbr;
592 int64_t interval;
593 u_int64_t now;
594
595 IFCQ_LOCK_ASSERT_HELD(ifq);
596
597 VERIFY(!drvmgt || MBUF_VALID_SC(sc));
598 VERIFY(IFCQ_TBR_IS_ENABLED(ifq));
599
600 *pkt = CLASSQ_PKT_INITIALIZER(*pkt);
601 tbr = &ifq->ifcq_tbr;
602 /* update token only when it is negative */
603 if (tbr->tbr_token <= 0) {
604 now = read_machclk();
605 interval = now - tbr->tbr_last;
606 if (interval >= tbr->tbr_filluptime) {
607 tbr->tbr_token = tbr->tbr_depth;
608 } else {
609 tbr->tbr_token += interval * tbr->tbr_rate;
610 if (tbr->tbr_token > tbr->tbr_depth) {
611 tbr->tbr_token = tbr->tbr_depth;
612 }
613 }
614 tbr->tbr_last = now;
615 }
616 /* if token is still negative, don't allow dequeue */
617 if (tbr->tbr_token <= 0) {
618 return;
619 }
620
621 /*
622 * ifclassq takes precedence over ALTQ queue;
623 * ifcq_drain count is adjusted by the caller.
624 */
625 if (drvmgt) {
626 fq_if_dequeue_sc_classq(ifq, sc, pkt);
627 } else {
628 fq_if_dequeue_classq(ifq, pkt);
629 }
630
631 if (pkt->cp_mbuf != NULL) {
632 switch (pkt->cp_ptype) {
633 case QP_MBUF:
634 tbr->tbr_token -= TBR_SCALE(m_pktlen(pkt->cp_mbuf));
635 break;
636
637 #if SKYWALK
638 case QP_PACKET:
639 tbr->tbr_token -=
640 TBR_SCALE(pkt->cp_kpkt->pkt_length);
641 break;
642 #endif /* SKYWALK */
643
644 default:
645 VERIFY(0);
646 /* NOTREACHED */
647 }
648 }
649 }
650
651 /*
652 * set a token bucket regulator.
653 * if the specified rate is zero, the token bucket regulator is deleted.
654 */
655 int
ifclassq_tbr_set(struct ifclassq * ifq,struct tb_profile * profile,boolean_t update)656 ifclassq_tbr_set(struct ifclassq *ifq, struct tb_profile *profile,
657 boolean_t update)
658 {
659 struct tb_regulator *tbr;
660 struct ifnet *ifp = ifq->ifcq_ifp;
661 u_int64_t rate, old_rate;
662
663 IFCQ_LOCK_ASSERT_HELD(ifq);
664 VERIFY(IFCQ_IS_READY(ifq));
665
666 VERIFY(machclk_freq != 0);
667
668 tbr = &ifq->ifcq_tbr;
669 old_rate = tbr->tbr_rate_raw;
670
671 rate = profile->rate;
672 if (profile->percent > 0) {
673 u_int64_t eff_rate;
674
675 if (profile->percent > 100) {
676 return EINVAL;
677 }
678 if ((eff_rate = ifp->if_output_bw.eff_bw) == 0) {
679 return ENODEV;
680 }
681 rate = (eff_rate * profile->percent) / 100;
682 }
683
684 if (rate == 0) {
685 if (!IFCQ_TBR_IS_ENABLED(ifq)) {
686 return 0;
687 }
688
689 if (pktsched_verbose) {
690 printf("%s: TBR disabled\n", if_name(ifp));
691 }
692
693 /* disable this TBR */
694 ifq->ifcq_flags &= ~IFCQF_TBR;
695 bzero(tbr, sizeof(*tbr));
696 ifnet_set_start_cycle(ifp, NULL);
697 if (update) {
698 ifclassq_update(ifq, CLASSQ_EV_LINK_BANDWIDTH);
699 }
700 return 0;
701 }
702
703 if (pktsched_verbose) {
704 printf("%s: TBR %s (rate %llu bps depth %u)\n", if_name(ifp),
705 (ifq->ifcq_flags & IFCQF_TBR) ? "reconfigured" :
706 "enabled", rate, profile->depth);
707 }
708
709 /* set the new TBR */
710 bzero(tbr, sizeof(*tbr));
711 tbr->tbr_rate_raw = rate;
712 tbr->tbr_percent = profile->percent;
713 ifq->ifcq_flags |= IFCQF_TBR;
714
715 /*
716 * Note that the TBR fill up time (hence the ifnet restart time)
717 * is directly related to the specified TBR depth. The ideal
718 * depth value should be computed such that the interval time
719 * between each successive wakeup is adequately spaced apart,
720 * in order to reduce scheduling overheads. A target interval
721 * of 10 ms seems to provide good performance balance. This can be
722 * overridden by specifying the depth profile. Values smaller than
723 * the ideal depth will reduce delay at the expense of CPU cycles.
724 */
725 tbr->tbr_rate = TBR_SCALE(rate / 8) / machclk_freq;
726 if (tbr->tbr_rate > 0) {
727 u_int32_t mtu = ifp->if_mtu;
728 int64_t ival, idepth = 0;
729 int i;
730
731 if (mtu < IF_MINMTU) {
732 mtu = IF_MINMTU;
733 }
734
735 ival = pktsched_nsecs_to_abstime(10 * NSEC_PER_MSEC); /* 10ms */
736
737 for (i = 1;; i++) {
738 idepth = TBR_SCALE(i * mtu);
739 if ((idepth / tbr->tbr_rate) > ival) {
740 break;
741 }
742 }
743 VERIFY(idepth > 0);
744
745 tbr->tbr_depth = TBR_SCALE(profile->depth);
746 if (tbr->tbr_depth == 0) {
747 tbr->tbr_filluptime = idepth / tbr->tbr_rate;
748 /* a little fudge factor to get closer to rate */
749 tbr->tbr_depth = idepth + (idepth >> 3);
750 } else {
751 tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate;
752 }
753 } else {
754 tbr->tbr_depth = TBR_SCALE(profile->depth);
755 tbr->tbr_filluptime = 0xffffffffffffffffLL;
756 }
757 tbr->tbr_token = tbr->tbr_depth;
758 tbr->tbr_last = read_machclk();
759
760 if (tbr->tbr_rate > 0 && (ifp->if_flags & IFF_UP)) {
761 struct timespec ts =
762 { 0, (long)pktsched_abs_to_nsecs(tbr->tbr_filluptime) };
763 if (pktsched_verbose) {
764 printf("%s: TBR calculated tokens %lld "
765 "filluptime %llu ns\n", if_name(ifp),
766 TBR_UNSCALE(tbr->tbr_token),
767 pktsched_abs_to_nsecs(tbr->tbr_filluptime));
768 }
769 ifnet_set_start_cycle(ifp, &ts);
770 } else {
771 if (pktsched_verbose) {
772 if (tbr->tbr_rate == 0) {
773 printf("%s: TBR calculated tokens %lld "
774 "infinite filluptime\n", if_name(ifp),
775 TBR_UNSCALE(tbr->tbr_token));
776 } else if (!(ifp->if_flags & IFF_UP)) {
777 printf("%s: TBR suspended (link is down)\n",
778 if_name(ifp));
779 }
780 }
781 ifnet_set_start_cycle(ifp, NULL);
782 }
783 if (update && tbr->tbr_rate_raw != old_rate) {
784 ifclassq_update(ifq, CLASSQ_EV_LINK_BANDWIDTH);
785 }
786
787 return 0;
788 }
789
790 void
ifclassq_calc_target_qdelay(struct ifnet * ifp,u_int64_t * if_target_qdelay)791 ifclassq_calc_target_qdelay(struct ifnet *ifp, u_int64_t *if_target_qdelay)
792 {
793 u_int64_t qdelay = 0;
794 qdelay = IFCQ_TARGET_QDELAY(ifp->if_snd);
795
796 if (ifclassq_target_qdelay != 0) {
797 qdelay = ifclassq_target_qdelay;
798 }
799
800 /*
801 * If we do not know the effective bandwidth, use the default
802 * target queue delay.
803 */
804 if (qdelay == 0) {
805 qdelay = IFQ_TARGET_DELAY;
806 }
807
808 /*
809 * If a delay has been added to ifnet start callback for
810 * coalescing, we have to add that to the pre-set target delay
811 * because the packets can be in the queue longer.
812 */
813 if ((ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
814 ifp->if_start_delay_timeout > 0) {
815 qdelay += ifp->if_start_delay_timeout;
816 }
817
818 *(if_target_qdelay) = qdelay;
819 }
820
821 void
ifclassq_calc_update_interval(u_int64_t * update_interval)822 ifclassq_calc_update_interval(u_int64_t *update_interval)
823 {
824 u_int64_t uint = 0;
825
826 /* If the system level override is set, use it */
827 if (ifclassq_update_interval != 0) {
828 uint = ifclassq_update_interval;
829 }
830
831 /* Otherwise use the default value */
832 if (uint == 0) {
833 uint = IFQ_UPDATE_INTERVAL;
834 }
835
836 *update_interval = uint;
837 }
838
839 void
ifclassq_reap_caches(boolean_t purge)840 ifclassq_reap_caches(boolean_t purge)
841 {
842 fq_codel_reap_caches(purge);
843 flowadv_reap_caches(purge);
844 }
845
846 struct ifclassq *
ifclassq_alloc(void)847 ifclassq_alloc(void)
848 {
849 struct ifclassq *ifcq;
850
851 ifcq = zalloc_flags(ifcq_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
852 os_ref_init(&ifcq->ifcq_refcnt, NULL);
853 os_ref_retain(&ifcq->ifcq_refcnt);
854 lck_mtx_init(&ifcq->ifcq_lock, &ifcq_lock_group, &ifcq_lock_attr);
855 return ifcq;
856 }
857
858 void
ifclassq_retain(struct ifclassq * ifcq)859 ifclassq_retain(struct ifclassq *ifcq)
860 {
861 os_ref_retain(&ifcq->ifcq_refcnt);
862 }
863
864 void
ifclassq_release(struct ifclassq ** pifcq)865 ifclassq_release(struct ifclassq **pifcq)
866 {
867 struct ifclassq *ifcq = *pifcq;
868
869 *pifcq = NULL;
870 if (os_ref_release(&ifcq->ifcq_refcnt) == 0) {
871 ifclassq_teardown(ifcq);
872 zfree(ifcq_zone, ifcq);
873 }
874 }
875