1 /*
2 * Copyright (c) 2011-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/cdefs.h>
30 #include <sys/param.h>
31 #include <sys/mbuf.h>
32 #include <sys/errno.h>
33 #include <sys/random.h>
34 #include <sys/kernel_types.h>
35 #include <sys/sysctl.h>
36
37 #include <kern/zalloc.h>
38
39 #include <net/if.h>
40 #include <net/net_osdep.h>
41 #include <net/classq/classq.h>
42 #include <pexpert/pexpert.h>
43 #include <net/classq/classq_sfb.h>
44 #include <net/classq/classq_fq_codel.h>
45 #include <net/pktsched/pktsched.h>
46 #include <net/pktsched/pktsched_fq_codel.h>
47 #include <net/flowadv.h>
48
49 #include <libkern/libkern.h>
50
51 #if SKYWALK
52 #include <skywalk/os_skywalk_private.h>
53 #include <skywalk/nexus/netif/nx_netif.h>
54 #endif /* SKYWALK */
55
56 static errno_t ifclassq_dequeue_common(struct ifclassq *, mbuf_svc_class_t,
57 u_int32_t, u_int32_t, classq_pkt_t *, classq_pkt_t *, u_int32_t *,
58 u_int32_t *, boolean_t, u_int8_t);
59 static void ifclassq_tbr_dequeue_common(struct ifclassq *, mbuf_svc_class_t,
60 boolean_t, classq_pkt_t *, u_int8_t);
61
62 static uint64_t ifclassq_def_c_target_qdelay = 0;
63 SYSCTL_QUAD(_net_classq, OID_AUTO, def_c_target_qdelay, CTLFLAG_RW | CTLFLAG_LOCKED,
64 &ifclassq_def_c_target_qdelay, "def classic target queue delay in nanoseconds");
65
66 static uint64_t ifclassq_def_c_update_interval = 0;
67 SYSCTL_QUAD(_net_classq, OID_AUTO, def_c_update_interval,
68 CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_def_c_update_interval,
69 "def classic update interval in nanoseconds");
70
71 static uint64_t ifclassq_def_l4s_target_qdelay = 0;
72 SYSCTL_QUAD(_net_classq, OID_AUTO, def_l4s_target_qdelay, CTLFLAG_RW | CTLFLAG_LOCKED,
73 &ifclassq_def_l4s_target_qdelay, "def L4S target queue delay in nanoseconds");
74
75 static uint64_t ifclassq_def_l4s_update_interval = 0;
76 SYSCTL_QUAD(_net_classq, OID_AUTO, def_l4s_update_interval,
77 CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_def_l4s_update_interval,
78 "def L4S update interval in nanoseconds");
79
80 static uint64_t ifclassq_ll_c_target_qdelay = 0;
81 SYSCTL_QUAD(_net_classq, OID_AUTO, ll_c_target_qdelay, CTLFLAG_RW | CTLFLAG_LOCKED,
82 &ifclassq_ll_c_target_qdelay, "low latency classic target queue delay in nanoseconds");
83
84 static uint64_t ifclassq_ll_c_update_interval = 0;
85 SYSCTL_QUAD(_net_classq, OID_AUTO, ll_c_update_interval,
86 CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_ll_c_update_interval,
87 "low latency classic update interval in nanoseconds");
88
89 static uint64_t ifclassq_ll_l4s_target_qdelay = 0;
90 SYSCTL_QUAD(_net_classq, OID_AUTO, ll_l4s_target_qdelay, CTLFLAG_RW | CTLFLAG_LOCKED,
91 &ifclassq_ll_l4s_target_qdelay, "low latency L4S target queue delay in nanoseconds");
92
93 static uint64_t ifclassq_ll_l4s_update_interval = 0;
94 SYSCTL_QUAD(_net_classq, OID_AUTO, ll_l4s_update_interval,
95 CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_ll_l4s_update_interval,
96 "low latency L4S update interval in nanoseconds");
97
98 uint32_t ifclassq_enable_l4s = 0;
99 SYSCTL_UINT(_net_classq, OID_AUTO, enable_l4s,
100 CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_enable_l4s, 0,
101 "enable/disable L4S");
102
103 #if DEBUG || DEVELOPMENT
104 uint32_t ifclassq_flow_control_adv = 1; /* flow control advisory */
105 SYSCTL_UINT(_net_classq, OID_AUTO, flow_control_adv,
106 CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_flow_control_adv, 1,
107 "enable/disable flow control advisory");
108
109 uint16_t fq_codel_quantum = 0;
110 #endif /* DEBUG || DEVELOPMENT */
111
112 static KALLOC_TYPE_DEFINE(ifcq_zone, struct ifclassq, NET_KT_DEFAULT);
113 LCK_ATTR_DECLARE(ifcq_lock_attr, 0, 0);
114 static LCK_GRP_DECLARE(ifcq_lock_group, "ifclassq locks");
115
116 void
classq_init(void)117 classq_init(void)
118 {
119 _CASSERT(MBUF_TC_BE == 0);
120 _CASSERT(MBUF_SC_BE == 0);
121 _CASSERT(IFCQ_SC_MAX == MBUF_SC_MAX_CLASSES);
122 #if DEBUG || DEVELOPMENT
123 PE_parse_boot_argn("fq_codel_quantum", &fq_codel_quantum,
124 sizeof(fq_codel_quantum));
125 PE_parse_boot_argn("ifclassq_def_c_target_qdelay", &ifclassq_def_c_target_qdelay,
126 sizeof(ifclassq_def_c_target_qdelay));
127 PE_parse_boot_argn("ifclassq_def_c_update_interval",
128 &ifclassq_def_c_update_interval, sizeof(ifclassq_def_c_update_interval));
129 PE_parse_boot_argn("ifclassq_def_l4s_target_qdelay", &ifclassq_def_l4s_target_qdelay,
130 sizeof(ifclassq_def_l4s_target_qdelay));
131 PE_parse_boot_argn("ifclassq_def_l4s_update_interval",
132 &ifclassq_def_l4s_update_interval, sizeof(ifclassq_def_l4s_update_interval));
133 PE_parse_boot_argn("ifclassq_ll_c_target_qdelay", &ifclassq_ll_c_target_qdelay,
134 sizeof(ifclassq_ll_c_target_qdelay));
135 PE_parse_boot_argn("ifclassq_ll_c_update_interval",
136 &ifclassq_ll_c_update_interval, sizeof(ifclassq_ll_c_update_interval));
137 PE_parse_boot_argn("ifclassq_ll_l4s_target_qdelay", &ifclassq_ll_l4s_target_qdelay,
138 sizeof(ifclassq_ll_l4s_target_qdelay));
139 PE_parse_boot_argn("ifclassq_ll_l4s_update_interval",
140 &ifclassq_ll_l4s_update_interval, sizeof(ifclassq_ll_l4s_update_interval));
141 #endif /* DEBUG || DEVELOPMENT */
142 fq_codel_init();
143 }
144
145 int
ifclassq_setup(struct ifclassq * ifq,struct ifnet * ifp,uint32_t sflags)146 ifclassq_setup(struct ifclassq *ifq, struct ifnet *ifp, uint32_t sflags)
147 {
148 int err = 0;
149
150 IFCQ_LOCK(ifq);
151 VERIFY(IFCQ_IS_EMPTY(ifq));
152 ifq->ifcq_ifp = ifp;
153 IFCQ_LEN(ifq) = 0;
154 IFCQ_BYTES(ifq) = 0;
155 bzero(&ifq->ifcq_xmitcnt, sizeof(ifq->ifcq_xmitcnt));
156 bzero(&ifq->ifcq_dropcnt, sizeof(ifq->ifcq_dropcnt));
157
158 VERIFY(!IFCQ_TBR_IS_ENABLED(ifq));
159 VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
160 VERIFY(ifq->ifcq_flags == 0);
161 VERIFY(ifq->ifcq_sflags == 0);
162 VERIFY(ifq->ifcq_disc == NULL);
163
164 if (ifp->if_eflags & IFEF_TXSTART) {
165 u_int32_t maxlen = 0;
166
167 if ((maxlen = IFCQ_MAXLEN(ifq)) == 0) {
168 maxlen = if_sndq_maxlen;
169 }
170 IFCQ_SET_MAXLEN(ifq, maxlen);
171
172 if (IFCQ_MAXLEN(ifq) != if_sndq_maxlen &&
173 IFCQ_TARGET_QDELAY(ifq) == 0) {
174 /*
175 * Choose static queues because the interface has
176 * maximum queue size set
177 */
178 sflags &= ~PKTSCHEDF_QALG_DELAYBASED;
179 }
180 ifq->ifcq_sflags = sflags;
181 err = ifclassq_pktsched_setup(ifq);
182 if (err == 0) {
183 ifq->ifcq_flags = (IFCQF_READY | IFCQF_ENABLED);
184 }
185 }
186 IFCQ_UNLOCK(ifq);
187 return err;
188 }
189
190 void
ifclassq_teardown(struct ifclassq * ifq)191 ifclassq_teardown(struct ifclassq *ifq)
192 {
193 IFCQ_LOCK(ifq);
194 if (IFCQ_IS_DESTROYED(ifq)) {
195 ASSERT((ifq->ifcq_flags & ~IFCQF_DESTROYED) == 0);
196 goto done;
197 }
198 if (IFCQ_IS_READY(ifq)) {
199 if (IFCQ_TBR_IS_ENABLED(ifq)) {
200 struct tb_profile tb =
201 { .rate = 0, .percent = 0, .depth = 0 };
202 (void) ifclassq_tbr_set(ifq, &tb, FALSE);
203 }
204 pktsched_teardown(ifq);
205 ifq->ifcq_flags &= ~IFCQF_READY;
206 }
207 ifq->ifcq_sflags = 0;
208 VERIFY(IFCQ_IS_EMPTY(ifq));
209 VERIFY(!IFCQ_TBR_IS_ENABLED(ifq));
210 VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
211 VERIFY(ifq->ifcq_flags == 0);
212 VERIFY(ifq->ifcq_sflags == 0);
213 VERIFY(ifq->ifcq_disc == NULL);
214 IFCQ_LEN(ifq) = 0;
215 IFCQ_BYTES(ifq) = 0;
216 IFCQ_MAXLEN(ifq) = 0;
217 bzero(&ifq->ifcq_xmitcnt, sizeof(ifq->ifcq_xmitcnt));
218 bzero(&ifq->ifcq_dropcnt, sizeof(ifq->ifcq_dropcnt));
219 ifq->ifcq_flags |= IFCQF_DESTROYED;
220 done:
221 IFCQ_UNLOCK(ifq);
222 }
223
224 int
ifclassq_pktsched_setup(struct ifclassq * ifq)225 ifclassq_pktsched_setup(struct ifclassq *ifq)
226 {
227 struct ifnet *ifp = ifq->ifcq_ifp;
228 classq_pkt_type_t ptype = QP_MBUF;
229 int err = 0;
230
231 IFCQ_LOCK_ASSERT_HELD(ifq);
232 VERIFY(ifp->if_eflags & IFEF_TXSTART);
233 #if SKYWALK
234 ptype = ((ifp->if_eflags & IFEF_SKYWALK_NATIVE) != 0) ? QP_PACKET :
235 QP_MBUF;
236 #endif /* SKYWALK */
237
238 err = pktsched_setup(ifq, PKTSCHEDT_FQ_CODEL, ifq->ifcq_sflags, ptype);
239
240 return err;
241 }
242
243 void
ifclassq_set_maxlen(struct ifclassq * ifq,u_int32_t maxqlen)244 ifclassq_set_maxlen(struct ifclassq *ifq, u_int32_t maxqlen)
245 {
246 IFCQ_LOCK(ifq);
247 if (maxqlen == 0) {
248 maxqlen = if_sndq_maxlen;
249 }
250 IFCQ_SET_MAXLEN(ifq, maxqlen);
251 IFCQ_UNLOCK(ifq);
252 }
253
254 u_int32_t
ifclassq_get_maxlen(struct ifclassq * ifq)255 ifclassq_get_maxlen(struct ifclassq *ifq)
256 {
257 return IFCQ_MAXLEN(ifq);
258 }
259
260 int
ifclassq_get_len(struct ifclassq * ifq,mbuf_svc_class_t sc,u_int8_t grp_idx,u_int32_t * packets,u_int32_t * bytes)261 ifclassq_get_len(struct ifclassq *ifq, mbuf_svc_class_t sc, u_int8_t grp_idx,
262 u_int32_t *packets, u_int32_t *bytes)
263 {
264 int err = 0;
265
266 IFCQ_LOCK(ifq);
267 if ((ifq->ifcq_flags & (IFCQF_READY | IFCQF_ENABLED)) !=
268 (IFCQF_READY | IFCQF_ENABLED)) {
269 return ENXIO;
270 }
271 if (sc == MBUF_SC_UNSPEC && grp_idx == IF_CLASSQ_ALL_GRPS) {
272 VERIFY(packets != NULL);
273 *packets = IFCQ_LEN(ifq);
274 } else {
275 cqrq_stat_sc_t req = { sc, grp_idx, 0, 0 };
276
277 VERIFY(MBUF_VALID_SC(sc) || sc == MBUF_SC_UNSPEC);
278
279 err = fq_if_request_classq(ifq, CLASSQRQ_STAT_SC, &req);
280 if (packets != NULL) {
281 *packets = req.packets;
282 }
283 if (bytes != NULL) {
284 *bytes = req.bytes;
285 }
286 }
287 IFCQ_UNLOCK(ifq);
288
289 #if SKYWALK
290 struct ifnet *ifp = ifq->ifcq_ifp;
291
292 if (__improbable(ifp->if_na_ops != NULL &&
293 ifp->if_na_ops->ni_get_len != NULL)) {
294 err = ifp->if_na_ops->ni_get_len(ifp->if_na, sc, packets,
295 bytes, err);
296 }
297 #endif /* SKYWALK */
298
299 return err;
300 }
301
302 inline void
ifclassq_set_packet_metadata(struct ifclassq * ifq,struct ifnet * ifp,classq_pkt_t * p)303 ifclassq_set_packet_metadata(struct ifclassq *ifq, struct ifnet *ifp,
304 classq_pkt_t *p)
305 {
306 if (!IFNET_IS_CELLULAR(ifp)) {
307 return;
308 }
309
310 switch (p->cp_ptype) {
311 case QP_MBUF: {
312 struct mbuf *m = p->cp_mbuf;
313 m->m_pkthdr.pkt_flags |= PKTF_VALID_UNSENT_DATA;
314 m->m_pkthdr.bufstatus_if = IFCQ_BYTES(ifq);
315 m->m_pkthdr.bufstatus_sndbuf = (uint32_t)ifp->if_sndbyte_unsent;
316 break;
317 }
318
319 #if SKYWALK
320 case QP_PACKET:
321 /*
322 * Support for equivalent of mbuf_get_unsent_data_bytes()
323 * is not needed in the Skywalk architecture.
324 */
325 break;
326 #endif /* SKYWALK */
327
328 default:
329 VERIFY(0);
330 /* NOTREACHED */
331 __builtin_unreachable();
332 }
333 }
334
335 errno_t
ifclassq_enqueue(struct ifclassq * ifq,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t cnt,u_int32_t bytes,boolean_t * pdrop)336 ifclassq_enqueue(struct ifclassq *ifq, classq_pkt_t *head, classq_pkt_t *tail,
337 u_int32_t cnt, u_int32_t bytes, boolean_t *pdrop)
338 {
339 return fq_if_enqueue_classq(ifq, head, tail, cnt, bytes, pdrop);
340 }
341
342 errno_t
ifclassq_dequeue(struct ifclassq * ifq,u_int32_t pkt_limit,u_int32_t byte_limit,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t * cnt,u_int32_t * len,u_int8_t grp_idx)343 ifclassq_dequeue(struct ifclassq *ifq, u_int32_t pkt_limit,
344 u_int32_t byte_limit, classq_pkt_t *head, classq_pkt_t *tail,
345 u_int32_t *cnt, u_int32_t *len, u_int8_t grp_idx)
346 {
347 return ifclassq_dequeue_common(ifq, MBUF_SC_UNSPEC, pkt_limit,
348 byte_limit, head, tail, cnt, len, FALSE, grp_idx);
349 }
350
351 errno_t
ifclassq_dequeue_sc(struct ifclassq * ifq,mbuf_svc_class_t sc,u_int32_t pkt_limit,u_int32_t byte_limit,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t * cnt,u_int32_t * len,u_int8_t grp_idx)352 ifclassq_dequeue_sc(struct ifclassq *ifq, mbuf_svc_class_t sc,
353 u_int32_t pkt_limit, u_int32_t byte_limit, classq_pkt_t *head,
354 classq_pkt_t *tail, u_int32_t *cnt, u_int32_t *len, u_int8_t grp_idx)
355 {
356 return ifclassq_dequeue_common(ifq, sc, pkt_limit, byte_limit,
357 head, tail, cnt, len, TRUE, grp_idx);
358 }
359
360 static errno_t
ifclassq_dequeue_common_default(struct ifclassq * ifq,mbuf_svc_class_t sc,u_int32_t pkt_limit,u_int32_t byte_limit,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t * cnt,u_int32_t * len,boolean_t drvmgt,u_int8_t grp_idx)361 ifclassq_dequeue_common_default(struct ifclassq *ifq, mbuf_svc_class_t sc,
362 u_int32_t pkt_limit, u_int32_t byte_limit, classq_pkt_t *head,
363 classq_pkt_t *tail, u_int32_t *cnt, u_int32_t *len, boolean_t drvmgt,
364 u_int8_t grp_idx)
365 {
366 struct ifnet *ifp = ifq->ifcq_ifp;
367 u_int32_t i = 0, l = 0;
368 classq_pkt_t first = CLASSQ_PKT_INITIALIZER(first);
369 classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
370
371 VERIFY(!drvmgt || MBUF_VALID_SC(sc));
372
373 if (IFCQ_TBR_IS_ENABLED(ifq)) {
374 goto dequeue_loop;
375 }
376
377 /*
378 * If the scheduler support dequeueing multiple packets at the
379 * same time, call that one instead.
380 */
381 if (drvmgt) {
382 int err;
383
384 IFCQ_LOCK_SPIN(ifq);
385 err = fq_if_dequeue_sc_classq_multi(ifq, sc, pkt_limit,
386 byte_limit, head, tail, cnt, len, grp_idx);
387 IFCQ_UNLOCK(ifq);
388
389 if (err == 0 && head->cp_mbuf == NULL) {
390 err = EAGAIN;
391 }
392 return err;
393 } else {
394 int err;
395
396 IFCQ_LOCK_SPIN(ifq);
397 err = fq_if_dequeue_classq_multi(ifq, pkt_limit, byte_limit,
398 head, tail, cnt, len, grp_idx);
399 IFCQ_UNLOCK(ifq);
400
401 if (err == 0 && head->cp_mbuf == NULL) {
402 err = EAGAIN;
403 }
404 return err;
405 }
406
407 dequeue_loop:
408 VERIFY(IFCQ_TBR_IS_ENABLED(ifq));
409 IFCQ_LOCK_SPIN(ifq);
410
411 while (i < pkt_limit && l < byte_limit) {
412 if (drvmgt) {
413 IFCQ_TBR_DEQUEUE_SC(ifq, sc, head, grp_idx);
414 } else {
415 IFCQ_TBR_DEQUEUE(ifq, head, grp_idx);
416 }
417
418 if (head->cp_mbuf == NULL) {
419 break;
420 }
421
422 if (first.cp_mbuf == NULL) {
423 first = *head;
424 }
425
426 switch (head->cp_ptype) {
427 case QP_MBUF:
428 head->cp_mbuf->m_nextpkt = NULL;
429 l += head->cp_mbuf->m_pkthdr.len;
430 ifclassq_set_packet_metadata(ifq, ifp, head);
431 if (last.cp_mbuf != NULL) {
432 last.cp_mbuf->m_nextpkt = head->cp_mbuf;
433 }
434 break;
435
436 #if SKYWALK
437 case QP_PACKET:
438 head->cp_kpkt->pkt_nextpkt = NULL;
439 l += head->cp_kpkt->pkt_length;
440 ifclassq_set_packet_metadata(ifq, ifp, head);
441 if (last.cp_kpkt != NULL) {
442 last.cp_kpkt->pkt_nextpkt = head->cp_kpkt;
443 }
444 break;
445 #endif /* SKYWALK */
446
447 default:
448 VERIFY(0);
449 /* NOTREACHED */
450 __builtin_unreachable();
451 }
452
453 last = *head;
454 i++;
455 }
456
457 IFCQ_UNLOCK(ifq);
458
459 if (tail != NULL) {
460 *tail = last;
461 }
462 if (cnt != NULL) {
463 *cnt = i;
464 }
465 if (len != NULL) {
466 *len = l;
467 }
468
469 *head = first;
470 return (first.cp_mbuf != NULL) ? 0 : EAGAIN;
471 }
472
473 static errno_t
ifclassq_dequeue_common(struct ifclassq * ifq,mbuf_svc_class_t sc,u_int32_t pkt_limit,u_int32_t byte_limit,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t * cnt,u_int32_t * len,boolean_t drvmgt,u_int8_t grp_idx)474 ifclassq_dequeue_common(struct ifclassq *ifq, mbuf_svc_class_t sc,
475 u_int32_t pkt_limit, u_int32_t byte_limit, classq_pkt_t *head,
476 classq_pkt_t *tail, u_int32_t *cnt, u_int32_t *len, boolean_t drvmgt,
477 u_int8_t grp_idx)
478 {
479 #if SKYWALK
480 struct ifnet *ifp = ifq->ifcq_ifp;
481
482 if (__improbable(ifp->if_na_ops != NULL &&
483 ifp->if_na_ops->ni_dequeue != NULL)) {
484 /*
485 * TODO:
486 * We should be changing the pkt/byte limit to the
487 * available space in the next filter. But this is not
488 * useful until we can flow control the whole chain of
489 * filters.
490 */
491 errno_t err = ifclassq_dequeue_common_default(ifq, sc,
492 pkt_limit, byte_limit, head, tail, cnt, len, drvmgt, grp_idx);
493
494 return ifp->if_na_ops->ni_dequeue(ifp->if_na, sc, pkt_limit,
495 byte_limit, head, tail, cnt, len, drvmgt, err);
496 }
497 #endif /* SKYWALK */
498 return ifclassq_dequeue_common_default(ifq, sc,
499 pkt_limit, byte_limit, head, tail, cnt, len, drvmgt, grp_idx);
500 }
501
502 void
ifclassq_update(struct ifclassq * ifq,cqev_t ev)503 ifclassq_update(struct ifclassq *ifq, cqev_t ev)
504 {
505 IFCQ_LOCK_ASSERT_HELD(ifq);
506 VERIFY(IFCQ_IS_READY(ifq));
507 fq_if_request_classq(ifq, CLASSQRQ_EVENT, (void *)ev);
508 }
509
510 int
ifclassq_attach(struct ifclassq * ifq,u_int32_t type,void * discipline)511 ifclassq_attach(struct ifclassq *ifq, u_int32_t type, void *discipline)
512 {
513 IFCQ_LOCK_ASSERT_HELD(ifq);
514 VERIFY(ifq->ifcq_disc == NULL);
515 ifq->ifcq_type = type;
516 ifq->ifcq_disc = discipline;
517 return 0;
518 }
519
520 void
ifclassq_detach(struct ifclassq * ifq)521 ifclassq_detach(struct ifclassq *ifq)
522 {
523 IFCQ_LOCK_ASSERT_HELD(ifq);
524 VERIFY(ifq->ifcq_disc == NULL);
525 ifq->ifcq_type = PKTSCHEDT_NONE;
526 }
527
528 int
ifclassq_getqstats(struct ifclassq * ifq,u_int8_t gid,u_int32_t qid,void * ubuf,u_int32_t * nbytes)529 ifclassq_getqstats(struct ifclassq *ifq, u_int8_t gid, u_int32_t qid, void *ubuf,
530 u_int32_t *nbytes)
531 {
532 struct if_ifclassq_stats *ifqs;
533 int err;
534
535 if (*nbytes < sizeof(*ifqs)) {
536 return EINVAL;
537 }
538
539 ifqs = kalloc_type(struct if_ifclassq_stats,
540 Z_WAITOK | Z_ZERO | Z_NOFAIL);
541
542 IFCQ_LOCK(ifq);
543 if (!IFCQ_IS_READY(ifq)) {
544 IFCQ_UNLOCK(ifq);
545 kfree_type(struct if_ifclassq_stats, ifqs);
546 return ENXIO;
547 }
548
549 ifqs->ifqs_len = IFCQ_LEN(ifq);
550 ifqs->ifqs_maxlen = IFCQ_MAXLEN(ifq);
551 *(&ifqs->ifqs_xmitcnt) = *(&ifq->ifcq_xmitcnt);
552 *(&ifqs->ifqs_dropcnt) = *(&ifq->ifcq_dropcnt);
553 ifqs->ifqs_scheduler = ifq->ifcq_type;
554
555 err = pktsched_getqstats(ifq, gid, qid, ifqs);
556 IFCQ_UNLOCK(ifq);
557
558 if (err == 0 && (err = copyout((caddr_t)ifqs,
559 (user_addr_t)(uintptr_t)ubuf, sizeof(*ifqs))) == 0) {
560 *nbytes = sizeof(*ifqs);
561 }
562
563 kfree_type(struct if_ifclassq_stats, ifqs);
564
565 return err;
566 }
567
568 const char *
ifclassq_ev2str(cqev_t ev)569 ifclassq_ev2str(cqev_t ev)
570 {
571 const char *c;
572
573 switch (ev) {
574 case CLASSQ_EV_LINK_BANDWIDTH:
575 c = "LINK_BANDWIDTH";
576 break;
577
578 case CLASSQ_EV_LINK_LATENCY:
579 c = "LINK_LATENCY";
580 break;
581
582 case CLASSQ_EV_LINK_MTU:
583 c = "LINK_MTU";
584 break;
585
586 case CLASSQ_EV_LINK_UP:
587 c = "LINK_UP";
588 break;
589
590 case CLASSQ_EV_LINK_DOWN:
591 c = "LINK_DOWN";
592 break;
593
594 default:
595 c = "UNKNOWN";
596 break;
597 }
598
599 return c;
600 }
601
602 /*
603 * internal representation of token bucket parameters
604 * rate: byte_per_unittime << 32
605 * (((bits_per_sec) / 8) << 32) / machclk_freq
606 * depth: byte << 32
607 *
608 */
609 #define TBR_SHIFT 32
610 #define TBR_SCALE(x) ((int64_t)(x) << TBR_SHIFT)
611 #define TBR_UNSCALE(x) ((x) >> TBR_SHIFT)
612
613 void
ifclassq_tbr_dequeue(struct ifclassq * ifq,classq_pkt_t * pkt,u_int8_t grp_idx)614 ifclassq_tbr_dequeue(struct ifclassq *ifq, classq_pkt_t *pkt, u_int8_t grp_idx)
615 {
616 ifclassq_tbr_dequeue_common(ifq, MBUF_SC_UNSPEC, FALSE, pkt, grp_idx);
617 }
618
619 void
ifclassq_tbr_dequeue_sc(struct ifclassq * ifq,mbuf_svc_class_t sc,classq_pkt_t * pkt,u_int8_t grp_idx)620 ifclassq_tbr_dequeue_sc(struct ifclassq *ifq, mbuf_svc_class_t sc,
621 classq_pkt_t *pkt, u_int8_t grp_idx)
622 {
623 ifclassq_tbr_dequeue_common(ifq, sc, TRUE, pkt, grp_idx);
624 }
625
626 static void
ifclassq_tbr_dequeue_common(struct ifclassq * ifq,mbuf_svc_class_t sc,boolean_t drvmgt,classq_pkt_t * pkt,u_int8_t grp_idx)627 ifclassq_tbr_dequeue_common(struct ifclassq *ifq, mbuf_svc_class_t sc,
628 boolean_t drvmgt, classq_pkt_t *pkt, u_int8_t grp_idx)
629 {
630 struct tb_regulator *tbr;
631 int64_t interval;
632 u_int64_t now;
633
634 IFCQ_LOCK_ASSERT_HELD(ifq);
635
636 VERIFY(!drvmgt || MBUF_VALID_SC(sc));
637 VERIFY(IFCQ_TBR_IS_ENABLED(ifq));
638
639 *pkt = CLASSQ_PKT_INITIALIZER(*pkt);
640 tbr = &ifq->ifcq_tbr;
641 /* update token only when it is negative */
642 if (tbr->tbr_token <= 0) {
643 now = read_machclk();
644 interval = now - tbr->tbr_last;
645 if (interval >= tbr->tbr_filluptime) {
646 tbr->tbr_token = tbr->tbr_depth;
647 } else {
648 tbr->tbr_token += interval * tbr->tbr_rate;
649 if (tbr->tbr_token > tbr->tbr_depth) {
650 tbr->tbr_token = tbr->tbr_depth;
651 }
652 }
653 tbr->tbr_last = now;
654 }
655 /* if token is still negative, don't allow dequeue */
656 if (tbr->tbr_token <= 0) {
657 return;
658 }
659
660 /*
661 * ifclassq takes precedence over ALTQ queue;
662 * ifcq_drain count is adjusted by the caller.
663 */
664 if (drvmgt) {
665 fq_if_dequeue_sc_classq(ifq, sc, pkt, grp_idx);
666 } else {
667 fq_if_dequeue_classq(ifq, pkt, grp_idx);
668 }
669
670 if (pkt->cp_mbuf != NULL) {
671 switch (pkt->cp_ptype) {
672 case QP_MBUF:
673 tbr->tbr_token -= TBR_SCALE(m_pktlen(pkt->cp_mbuf));
674 break;
675
676 #if SKYWALK
677 case QP_PACKET:
678 tbr->tbr_token -=
679 TBR_SCALE(pkt->cp_kpkt->pkt_length);
680 break;
681 #endif /* SKYWALK */
682
683 default:
684 VERIFY(0);
685 /* NOTREACHED */
686 }
687 }
688 }
689
690 /*
691 * set a token bucket regulator.
692 * if the specified rate is zero, the token bucket regulator is deleted.
693 */
694 int
ifclassq_tbr_set(struct ifclassq * ifq,struct tb_profile * profile,boolean_t update)695 ifclassq_tbr_set(struct ifclassq *ifq, struct tb_profile *profile,
696 boolean_t update)
697 {
698 struct tb_regulator *tbr;
699 struct ifnet *ifp = ifq->ifcq_ifp;
700 u_int64_t rate, old_rate;
701
702 IFCQ_LOCK_ASSERT_HELD(ifq);
703 VERIFY(IFCQ_IS_READY(ifq));
704
705 VERIFY(machclk_freq != 0);
706
707 tbr = &ifq->ifcq_tbr;
708 old_rate = tbr->tbr_rate_raw;
709
710 rate = profile->rate;
711 if (profile->percent > 0) {
712 u_int64_t eff_rate;
713
714 if (profile->percent > 100) {
715 return EINVAL;
716 }
717 if ((eff_rate = ifp->if_output_bw.eff_bw) == 0) {
718 return ENODEV;
719 }
720 rate = (eff_rate * profile->percent) / 100;
721 }
722
723 if (rate == 0) {
724 if (!IFCQ_TBR_IS_ENABLED(ifq)) {
725 return 0;
726 }
727
728 if (pktsched_verbose) {
729 printf("%s: TBR disabled\n", if_name(ifp));
730 }
731
732 /* disable this TBR */
733 ifq->ifcq_flags &= ~IFCQF_TBR;
734 bzero(tbr, sizeof(*tbr));
735 ifnet_set_start_cycle(ifp, NULL);
736 if (update) {
737 ifclassq_update(ifq, CLASSQ_EV_LINK_BANDWIDTH);
738 }
739 return 0;
740 }
741
742 if (pktsched_verbose) {
743 printf("%s: TBR %s (rate %llu bps depth %u)\n", if_name(ifp),
744 (ifq->ifcq_flags & IFCQF_TBR) ? "reconfigured" :
745 "enabled", rate, profile->depth);
746 }
747
748 /* set the new TBR */
749 bzero(tbr, sizeof(*tbr));
750 tbr->tbr_rate_raw = rate;
751 tbr->tbr_percent = profile->percent;
752 ifq->ifcq_flags |= IFCQF_TBR;
753
754 /*
755 * Note that the TBR fill up time (hence the ifnet restart time)
756 * is directly related to the specified TBR depth. The ideal
757 * depth value should be computed such that the interval time
758 * between each successive wakeup is adequately spaced apart,
759 * in order to reduce scheduling overheads. A target interval
760 * of 10 ms seems to provide good performance balance. This can be
761 * overridden by specifying the depth profile. Values smaller than
762 * the ideal depth will reduce delay at the expense of CPU cycles.
763 */
764 tbr->tbr_rate = TBR_SCALE(rate / 8) / machclk_freq;
765 if (tbr->tbr_rate > 0) {
766 u_int32_t mtu = ifp->if_mtu;
767 int64_t ival, idepth = 0;
768 int i;
769
770 if (mtu < IF_MINMTU) {
771 mtu = IF_MINMTU;
772 }
773
774 ival = pktsched_nsecs_to_abstime(10 * NSEC_PER_MSEC); /* 10ms */
775
776 for (i = 1;; i++) {
777 idepth = TBR_SCALE(i * mtu);
778 if ((idepth / tbr->tbr_rate) > ival) {
779 break;
780 }
781 }
782 VERIFY(idepth > 0);
783
784 tbr->tbr_depth = TBR_SCALE(profile->depth);
785 if (tbr->tbr_depth == 0) {
786 tbr->tbr_filluptime = idepth / tbr->tbr_rate;
787 /* a little fudge factor to get closer to rate */
788 tbr->tbr_depth = idepth + (idepth >> 3);
789 } else {
790 tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate;
791 }
792 } else {
793 tbr->tbr_depth = TBR_SCALE(profile->depth);
794 tbr->tbr_filluptime = 0xffffffffffffffffLL;
795 }
796 tbr->tbr_token = tbr->tbr_depth;
797 tbr->tbr_last = read_machclk();
798
799 if (tbr->tbr_rate > 0 && (ifp->if_flags & IFF_UP)) {
800 struct timespec ts =
801 { 0, (long)pktsched_abs_to_nsecs(tbr->tbr_filluptime) };
802 if (pktsched_verbose) {
803 printf("%s: TBR calculated tokens %lld "
804 "filluptime %llu ns\n", if_name(ifp),
805 TBR_UNSCALE(tbr->tbr_token),
806 pktsched_abs_to_nsecs(tbr->tbr_filluptime));
807 }
808 ifnet_set_start_cycle(ifp, &ts);
809 } else {
810 if (pktsched_verbose) {
811 if (tbr->tbr_rate == 0) {
812 printf("%s: TBR calculated tokens %lld "
813 "infinite filluptime\n", if_name(ifp),
814 TBR_UNSCALE(tbr->tbr_token));
815 } else if (!(ifp->if_flags & IFF_UP)) {
816 printf("%s: TBR suspended (link is down)\n",
817 if_name(ifp));
818 }
819 }
820 ifnet_set_start_cycle(ifp, NULL);
821 }
822 if (update && tbr->tbr_rate_raw != old_rate) {
823 ifclassq_update(ifq, CLASSQ_EV_LINK_BANDWIDTH);
824 }
825
826 return 0;
827 }
828
829 void
ifclassq_calc_target_qdelay(struct ifnet * ifp,uint64_t * if_target_qdelay,uint32_t flags)830 ifclassq_calc_target_qdelay(struct ifnet *ifp, uint64_t *if_target_qdelay,
831 uint32_t flags)
832 {
833 uint64_t qdelay = 0, qdelay_configed = 0, qdely_default = 0;
834 if (flags == IF_CLASSQ_DEF) {
835 qdelay = IFCQ_TARGET_QDELAY(ifp->if_snd);
836 }
837
838 switch (flags) {
839 case IF_CLASSQ_DEF:
840 qdelay_configed = ifclassq_def_c_target_qdelay;
841 qdely_default = IFQ_DEF_C_TARGET_DELAY;
842 break;
843 case IF_CLASSQ_L4S:
844 qdelay_configed = ifclassq_def_l4s_target_qdelay;
845 qdely_default = IFQ_DEF_L4S_TARGET_DELAY;
846 break;
847 case IF_CLASSQ_LOW_LATENCY:
848 qdelay_configed = ifclassq_ll_c_target_qdelay;
849 qdely_default = IFQ_LL_C_TARGET_DELAY;
850 break;
851 case (IF_CLASSQ_LOW_LATENCY | IF_CLASSQ_L4S):
852 qdelay_configed = ifclassq_ll_l4s_target_qdelay;
853 qdely_default = IFQ_LL_L4S_TARGET_DELAY;
854 break;
855 default:
856 VERIFY(0);
857 /* NOTREACHED */
858 __builtin_unreachable();
859 }
860
861 if (qdelay_configed != 0) {
862 qdelay = qdelay_configed;
863 }
864
865 /*
866 * If we do not know the effective bandwidth, use the default
867 * target queue delay.
868 */
869 if (qdelay == 0) {
870 qdelay = qdely_default;
871 }
872
873 /*
874 * If a delay has been added to ifnet start callback for
875 * coalescing, we have to add that to the pre-set target delay
876 * because the packets can be in the queue longer.
877 */
878 if ((ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
879 ifp->if_start_delay_timeout > 0) {
880 qdelay += ifp->if_start_delay_timeout;
881 }
882
883 *(if_target_qdelay) = qdelay;
884 }
885
886 void
ifclassq_calc_update_interval(uint64_t * update_interval,uint32_t flags)887 ifclassq_calc_update_interval(uint64_t *update_interval, uint32_t flags)
888 {
889 uint64_t interval = 0, interval_configed = 0, interval_default = 0;
890
891 switch (flags) {
892 case IF_CLASSQ_DEF:
893 interval_configed = ifclassq_def_c_update_interval;
894 interval_default = IFQ_DEF_C_UPDATE_INTERVAL;
895 break;
896 case IF_CLASSQ_L4S:
897 interval_configed = ifclassq_def_l4s_update_interval;
898 interval_default = IFQ_DEF_L4S_UPDATE_INTERVAL;
899 break;
900 case IF_CLASSQ_LOW_LATENCY:
901 interval_configed = ifclassq_ll_c_update_interval;
902 interval_default = IFQ_LL_C_UPDATE_INTERVAL;
903 break;
904 case (IF_CLASSQ_LOW_LATENCY | IF_CLASSQ_L4S):
905 interval_configed = ifclassq_ll_l4s_update_interval;
906 interval_default = IFQ_LL_L4S_UPDATE_INTERVAL;
907 break;
908 default:
909 VERIFY(0);
910 /* NOTREACHED */
911 __builtin_unreachable();
912 }
913
914 /* If the system level override is set, use it */
915 if (interval_configed != 0) {
916 interval = interval_configed;
917 }
918
919 /* Otherwise use the default value */
920 if (interval == 0) {
921 interval = interval_default;
922 }
923
924 *update_interval = interval;
925 }
926
927 struct ifclassq *
ifclassq_alloc(void)928 ifclassq_alloc(void)
929 {
930 struct ifclassq *ifcq;
931
932 ifcq = zalloc_flags(ifcq_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
933 os_ref_init(&ifcq->ifcq_refcnt, NULL);
934 os_ref_retain(&ifcq->ifcq_refcnt);
935 lck_mtx_init(&ifcq->ifcq_lock, &ifcq_lock_group, &ifcq_lock_attr);
936 return ifcq;
937 }
938
939 void
ifclassq_retain(struct ifclassq * ifcq)940 ifclassq_retain(struct ifclassq *ifcq)
941 {
942 os_ref_retain(&ifcq->ifcq_refcnt);
943 }
944
945 void
ifclassq_release(struct ifclassq ** pifcq)946 ifclassq_release(struct ifclassq **pifcq)
947 {
948 struct ifclassq *ifcq = *pifcq;
949
950 *pifcq = NULL;
951 if (os_ref_release(&ifcq->ifcq_refcnt) == 0) {
952 ifclassq_teardown(ifcq);
953 zfree(ifcq_zone, ifcq);
954 }
955 }
956
957 int
ifclassq_setup_group(struct ifclassq * ifcq,uint8_t grp_idx,uint8_t flags)958 ifclassq_setup_group(struct ifclassq *ifcq, uint8_t grp_idx, uint8_t flags)
959 {
960 int err;
961
962 IFCQ_LOCK(ifcq);
963 VERIFY(ifcq->ifcq_disc != NULL);
964 VERIFY(ifcq->ifcq_type == PKTSCHEDT_FQ_CODEL);
965
966 err = fq_if_create_grp(ifcq, grp_idx, flags);
967 IFCQ_UNLOCK(ifcq);
968
969 return err;
970 }
971
972 void
ifclassq_set_grp_combined(struct ifclassq * ifcq,uint8_t grp_idx)973 ifclassq_set_grp_combined(struct ifclassq *ifcq, uint8_t grp_idx)
974 {
975 IFCQ_LOCK(ifcq);
976 VERIFY(ifcq->ifcq_disc != NULL);
977 VERIFY(ifcq->ifcq_type == PKTSCHEDT_FQ_CODEL);
978
979 fq_if_set_grp_combined(ifcq, grp_idx);
980 IFCQ_UNLOCK(ifcq);
981 }
982
983 void
ifclassq_set_grp_separated(struct ifclassq * ifcq,uint8_t grp_idx)984 ifclassq_set_grp_separated(struct ifclassq *ifcq, uint8_t grp_idx)
985 {
986 IFCQ_LOCK(ifcq);
987 VERIFY(ifcq->ifcq_disc != NULL);
988 VERIFY(ifcq->ifcq_type == PKTSCHEDT_FQ_CODEL);
989
990 fq_if_set_grp_separated(ifcq, grp_idx);
991 IFCQ_UNLOCK(ifcq);
992 }
993