1 /*
2 * Copyright (c) 2011-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/cdefs.h>
30 #include <sys/param.h>
31 #include <sys/mbuf.h>
32 #include <sys/errno.h>
33 #include <sys/random.h>
34 #include <sys/kernel_types.h>
35 #include <sys/sysctl.h>
36
37 #include <kern/zalloc.h>
38
39 #include <net/if.h>
40 #include <net/net_osdep.h>
41 #include <net/classq/classq.h>
42 #include <pexpert/pexpert.h>
43 #include <net/classq/classq_sfb.h>
44 #include <net/classq/classq_fq_codel.h>
45 #include <net/pktsched/pktsched.h>
46 #include <net/pktsched/pktsched_fq_codel.h>
47 #include <net/flowadv.h>
48
49 #include <libkern/libkern.h>
50
51 #if SKYWALK
52 #include <skywalk/os_skywalk_private.h>
53 #include <skywalk/nexus/netif/nx_netif.h>
54 #endif /* SKYWALK */
55
56 static errno_t ifclassq_dequeue_common(struct ifclassq *, mbuf_svc_class_t,
57 u_int32_t, u_int32_t, classq_pkt_t *, classq_pkt_t *, u_int32_t *,
58 u_int32_t *, boolean_t, u_int8_t);
59 static void ifclassq_tbr_dequeue_common(struct ifclassq *, mbuf_svc_class_t,
60 boolean_t, classq_pkt_t *, u_int8_t);
61
62 static uint64_t ifclassq_def_c_target_qdelay = 0;
63 SYSCTL_QUAD(_net_classq, OID_AUTO, def_c_target_qdelay, CTLFLAG_RW | CTLFLAG_LOCKED,
64 &ifclassq_def_c_target_qdelay, "def classic target queue delay in nanoseconds");
65
66 static uint64_t ifclassq_def_c_update_interval = 0;
67 SYSCTL_QUAD(_net_classq, OID_AUTO, def_c_update_interval,
68 CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_def_c_update_interval,
69 "def classic update interval in nanoseconds");
70
71 static uint64_t ifclassq_def_l4s_target_qdelay = 0;
72 SYSCTL_QUAD(_net_classq, OID_AUTO, def_l4s_target_qdelay, CTLFLAG_RW | CTLFLAG_LOCKED,
73 &ifclassq_def_l4s_target_qdelay, "def L4S target queue delay in nanoseconds");
74
75 static uint64_t ifclassq_def_l4s_update_interval = 0;
76 SYSCTL_QUAD(_net_classq, OID_AUTO, def_l4s_update_interval,
77 CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_def_l4s_update_interval,
78 "def L4S update interval in nanoseconds");
79
80 static uint64_t ifclassq_ll_c_target_qdelay = 0;
81 SYSCTL_QUAD(_net_classq, OID_AUTO, ll_c_target_qdelay, CTLFLAG_RW | CTLFLAG_LOCKED,
82 &ifclassq_ll_c_target_qdelay, "low latency classic target queue delay in nanoseconds");
83
84 static uint64_t ifclassq_ll_c_update_interval = 0;
85 SYSCTL_QUAD(_net_classq, OID_AUTO, ll_c_update_interval,
86 CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_ll_c_update_interval,
87 "low latency classic update interval in nanoseconds");
88
89 static uint64_t ifclassq_ll_l4s_target_qdelay = 0;
90 SYSCTL_QUAD(_net_classq, OID_AUTO, ll_l4s_target_qdelay, CTLFLAG_RW | CTLFLAG_LOCKED,
91 &ifclassq_ll_l4s_target_qdelay, "low latency L4S target queue delay in nanoseconds");
92
93 static uint64_t ifclassq_ll_l4s_update_interval = 0;
94 SYSCTL_QUAD(_net_classq, OID_AUTO, ll_l4s_update_interval,
95 CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_ll_l4s_update_interval,
96 "low latency L4S update interval in nanoseconds");
97
98 uint32_t ifclassq_enable_l4s = 0;
99 SYSCTL_UINT(_net_classq, OID_AUTO, enable_l4s,
100 CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_enable_l4s, 0,
101 "enable/disable L4S");
102
103 #if DEBUG || DEVELOPMENT
104 uint32_t ifclassq_flow_control_adv = 1; /* flow control advisory */
105 SYSCTL_UINT(_net_classq, OID_AUTO, flow_control_adv,
106 CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_flow_control_adv, 1,
107 "enable/disable flow control advisory");
108
109 uint16_t fq_codel_quantum = 0;
110 #endif /* DEBUG || DEVELOPMENT */
111
112 static struct zone *ifcq_zone; /* zone for ifclassq */
113 #define IFCQ_ZONE_NAME "ifclassq" /* zone name */
114 LCK_ATTR_DECLARE(ifcq_lock_attr, 0, 0);
115 static LCK_GRP_DECLARE(ifcq_lock_group, "ifclassq locks");
116
117 void
classq_init(void)118 classq_init(void)
119 {
120 _CASSERT(MBUF_TC_BE == 0);
121 _CASSERT(MBUF_SC_BE == 0);
122 _CASSERT(IFCQ_SC_MAX == MBUF_SC_MAX_CLASSES);
123 #if DEBUG || DEVELOPMENT
124 PE_parse_boot_argn("fq_codel_quantum", &fq_codel_quantum,
125 sizeof(fq_codel_quantum));
126 PE_parse_boot_argn("ifclassq_def_c_target_qdelay", &ifclassq_def_c_target_qdelay,
127 sizeof(ifclassq_def_c_target_qdelay));
128 PE_parse_boot_argn("ifclassq_def_c_update_interval",
129 &ifclassq_def_c_update_interval, sizeof(ifclassq_def_c_update_interval));
130 PE_parse_boot_argn("ifclassq_def_l4s_target_qdelay", &ifclassq_def_l4s_target_qdelay,
131 sizeof(ifclassq_def_l4s_target_qdelay));
132 PE_parse_boot_argn("ifclassq_def_l4s_update_interval",
133 &ifclassq_def_l4s_update_interval, sizeof(ifclassq_def_l4s_update_interval));
134 PE_parse_boot_argn("ifclassq_ll_c_target_qdelay", &ifclassq_ll_c_target_qdelay,
135 sizeof(ifclassq_ll_c_target_qdelay));
136 PE_parse_boot_argn("ifclassq_ll_c_update_interval",
137 &ifclassq_ll_c_update_interval, sizeof(ifclassq_ll_c_update_interval));
138 PE_parse_boot_argn("ifclassq_ll_l4s_target_qdelay", &ifclassq_ll_l4s_target_qdelay,
139 sizeof(ifclassq_ll_l4s_target_qdelay));
140 PE_parse_boot_argn("ifclassq_ll_l4s_update_interval",
141 &ifclassq_ll_l4s_update_interval, sizeof(ifclassq_ll_l4s_update_interval));
142 #endif /* DEBUG || DEVELOPMENT */
143 ifcq_zone = zone_create(IFCQ_ZONE_NAME, sizeof(struct ifclassq),
144 ZC_ZFREE_CLEARMEM);
145 fq_codel_init();
146 }
147
148 int
ifclassq_setup(struct ifclassq * ifq,struct ifnet * ifp,uint32_t sflags)149 ifclassq_setup(struct ifclassq *ifq, struct ifnet *ifp, uint32_t sflags)
150 {
151 int err = 0;
152
153 IFCQ_LOCK(ifq);
154 VERIFY(IFCQ_IS_EMPTY(ifq));
155 ifq->ifcq_ifp = ifp;
156 IFCQ_LEN(ifq) = 0;
157 IFCQ_BYTES(ifq) = 0;
158 bzero(&ifq->ifcq_xmitcnt, sizeof(ifq->ifcq_xmitcnt));
159 bzero(&ifq->ifcq_dropcnt, sizeof(ifq->ifcq_dropcnt));
160
161 VERIFY(!IFCQ_TBR_IS_ENABLED(ifq));
162 VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
163 VERIFY(ifq->ifcq_flags == 0);
164 VERIFY(ifq->ifcq_sflags == 0);
165 VERIFY(ifq->ifcq_disc == NULL);
166
167 if (ifp->if_eflags & IFEF_TXSTART) {
168 u_int32_t maxlen = 0;
169
170 if ((maxlen = IFCQ_MAXLEN(ifq)) == 0) {
171 maxlen = if_sndq_maxlen;
172 }
173 IFCQ_SET_MAXLEN(ifq, maxlen);
174
175 if (IFCQ_MAXLEN(ifq) != if_sndq_maxlen &&
176 IFCQ_TARGET_QDELAY(ifq) == 0) {
177 /*
178 * Choose static queues because the interface has
179 * maximum queue size set
180 */
181 sflags &= ~PKTSCHEDF_QALG_DELAYBASED;
182 }
183 ifq->ifcq_sflags = sflags;
184 err = ifclassq_pktsched_setup(ifq);
185 if (err == 0) {
186 ifq->ifcq_flags = (IFCQF_READY | IFCQF_ENABLED);
187 }
188 }
189 IFCQ_UNLOCK(ifq);
190 return err;
191 }
192
193 void
ifclassq_teardown(struct ifclassq * ifq)194 ifclassq_teardown(struct ifclassq *ifq)
195 {
196 IFCQ_LOCK(ifq);
197 if (IFCQ_IS_DESTROYED(ifq)) {
198 ASSERT((ifq->ifcq_flags & ~IFCQF_DESTROYED) == 0);
199 goto done;
200 }
201 if (IFCQ_IS_READY(ifq)) {
202 if (IFCQ_TBR_IS_ENABLED(ifq)) {
203 struct tb_profile tb =
204 { .rate = 0, .percent = 0, .depth = 0 };
205 (void) ifclassq_tbr_set(ifq, &tb, FALSE);
206 }
207 pktsched_teardown(ifq);
208 ifq->ifcq_flags &= ~IFCQF_READY;
209 }
210 ifq->ifcq_sflags = 0;
211 VERIFY(IFCQ_IS_EMPTY(ifq));
212 VERIFY(!IFCQ_TBR_IS_ENABLED(ifq));
213 VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
214 VERIFY(ifq->ifcq_flags == 0);
215 VERIFY(ifq->ifcq_sflags == 0);
216 VERIFY(ifq->ifcq_disc == NULL);
217 IFCQ_LEN(ifq) = 0;
218 IFCQ_BYTES(ifq) = 0;
219 IFCQ_MAXLEN(ifq) = 0;
220 bzero(&ifq->ifcq_xmitcnt, sizeof(ifq->ifcq_xmitcnt));
221 bzero(&ifq->ifcq_dropcnt, sizeof(ifq->ifcq_dropcnt));
222 ifq->ifcq_flags |= IFCQF_DESTROYED;
223 done:
224 IFCQ_UNLOCK(ifq);
225 }
226
227 int
ifclassq_pktsched_setup(struct ifclassq * ifq)228 ifclassq_pktsched_setup(struct ifclassq *ifq)
229 {
230 struct ifnet *ifp = ifq->ifcq_ifp;
231 classq_pkt_type_t ptype = QP_MBUF;
232 int err = 0;
233
234 IFCQ_LOCK_ASSERT_HELD(ifq);
235 VERIFY(ifp->if_eflags & IFEF_TXSTART);
236 #if SKYWALK
237 ptype = ((ifp->if_eflags & IFEF_SKYWALK_NATIVE) != 0) ? QP_PACKET :
238 QP_MBUF;
239 #endif /* SKYWALK */
240
241 err = pktsched_setup(ifq, PKTSCHEDT_FQ_CODEL, ifq->ifcq_sflags, ptype);
242
243 return err;
244 }
245
246 void
ifclassq_set_maxlen(struct ifclassq * ifq,u_int32_t maxqlen)247 ifclassq_set_maxlen(struct ifclassq *ifq, u_int32_t maxqlen)
248 {
249 IFCQ_LOCK(ifq);
250 if (maxqlen == 0) {
251 maxqlen = if_sndq_maxlen;
252 }
253 IFCQ_SET_MAXLEN(ifq, maxqlen);
254 IFCQ_UNLOCK(ifq);
255 }
256
257 u_int32_t
ifclassq_get_maxlen(struct ifclassq * ifq)258 ifclassq_get_maxlen(struct ifclassq *ifq)
259 {
260 return IFCQ_MAXLEN(ifq);
261 }
262
263 int
ifclassq_get_len(struct ifclassq * ifq,mbuf_svc_class_t sc,u_int8_t grp_idx,u_int32_t * packets,u_int32_t * bytes)264 ifclassq_get_len(struct ifclassq *ifq, mbuf_svc_class_t sc, u_int8_t grp_idx,
265 u_int32_t *packets, u_int32_t *bytes)
266 {
267 int err = 0;
268
269 IFCQ_LOCK(ifq);
270 if ((ifq->ifcq_flags & (IFCQF_READY | IFCQF_ENABLED)) !=
271 (IFCQF_READY | IFCQF_ENABLED)) {
272 return ENXIO;
273 }
274 if (sc == MBUF_SC_UNSPEC && grp_idx == IF_CLASSQ_ALL_GRPS) {
275 VERIFY(packets != NULL);
276 *packets = IFCQ_LEN(ifq);
277 } else {
278 cqrq_stat_sc_t req = { sc, grp_idx, 0, 0 };
279
280 VERIFY(MBUF_VALID_SC(sc) || sc == MBUF_SC_UNSPEC);
281
282 err = fq_if_request_classq(ifq, CLASSQRQ_STAT_SC, &req);
283 if (packets != NULL) {
284 *packets = req.packets;
285 }
286 if (bytes != NULL) {
287 *bytes = req.bytes;
288 }
289 }
290 IFCQ_UNLOCK(ifq);
291
292 #if SKYWALK
293 struct ifnet *ifp = ifq->ifcq_ifp;
294
295 if (__improbable(ifp->if_na_ops != NULL &&
296 ifp->if_na_ops->ni_get_len != NULL)) {
297 err = ifp->if_na_ops->ni_get_len(ifp->if_na, sc, packets,
298 bytes, err);
299 }
300 #endif /* SKYWALK */
301
302 return err;
303 }
304
305 inline void
ifclassq_set_packet_metadata(struct ifclassq * ifq,struct ifnet * ifp,classq_pkt_t * p)306 ifclassq_set_packet_metadata(struct ifclassq *ifq, struct ifnet *ifp,
307 classq_pkt_t *p)
308 {
309 if (!IFNET_IS_CELLULAR(ifp)) {
310 return;
311 }
312
313 switch (p->cp_ptype) {
314 case QP_MBUF: {
315 struct mbuf *m = p->cp_mbuf;
316 m->m_pkthdr.pkt_flags |= PKTF_VALID_UNSENT_DATA;
317 m->m_pkthdr.bufstatus_if = IFCQ_BYTES(ifq);
318 m->m_pkthdr.bufstatus_sndbuf = (uint32_t)ifp->if_sndbyte_unsent;
319 break;
320 }
321
322 #if SKYWALK
323 case QP_PACKET:
324 /*
325 * Support for equivalent of mbuf_get_unsent_data_bytes()
326 * is not needed in the Skywalk architecture.
327 */
328 break;
329 #endif /* SKYWALK */
330
331 default:
332 VERIFY(0);
333 /* NOTREACHED */
334 __builtin_unreachable();
335 }
336 }
337
338 errno_t
ifclassq_enqueue(struct ifclassq * ifq,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t cnt,u_int32_t bytes,boolean_t * pdrop)339 ifclassq_enqueue(struct ifclassq *ifq, classq_pkt_t *head, classq_pkt_t *tail,
340 u_int32_t cnt, u_int32_t bytes, boolean_t *pdrop)
341 {
342 return fq_if_enqueue_classq(ifq, head, tail, cnt, bytes, pdrop);
343 }
344
345 errno_t
ifclassq_dequeue(struct ifclassq * ifq,u_int32_t pkt_limit,u_int32_t byte_limit,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t * cnt,u_int32_t * len,u_int8_t grp_idx)346 ifclassq_dequeue(struct ifclassq *ifq, u_int32_t pkt_limit,
347 u_int32_t byte_limit, classq_pkt_t *head, classq_pkt_t *tail,
348 u_int32_t *cnt, u_int32_t *len, u_int8_t grp_idx)
349 {
350 return ifclassq_dequeue_common(ifq, MBUF_SC_UNSPEC, pkt_limit,
351 byte_limit, head, tail, cnt, len, FALSE, grp_idx);
352 }
353
354 errno_t
ifclassq_dequeue_sc(struct ifclassq * ifq,mbuf_svc_class_t sc,u_int32_t pkt_limit,u_int32_t byte_limit,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t * cnt,u_int32_t * len,u_int8_t grp_idx)355 ifclassq_dequeue_sc(struct ifclassq *ifq, mbuf_svc_class_t sc,
356 u_int32_t pkt_limit, u_int32_t byte_limit, classq_pkt_t *head,
357 classq_pkt_t *tail, u_int32_t *cnt, u_int32_t *len, u_int8_t grp_idx)
358 {
359 return ifclassq_dequeue_common(ifq, sc, pkt_limit, byte_limit,
360 head, tail, cnt, len, TRUE, grp_idx);
361 }
362
363 static errno_t
ifclassq_dequeue_common_default(struct ifclassq * ifq,mbuf_svc_class_t sc,u_int32_t pkt_limit,u_int32_t byte_limit,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t * cnt,u_int32_t * len,boolean_t drvmgt,u_int8_t grp_idx)364 ifclassq_dequeue_common_default(struct ifclassq *ifq, mbuf_svc_class_t sc,
365 u_int32_t pkt_limit, u_int32_t byte_limit, classq_pkt_t *head,
366 classq_pkt_t *tail, u_int32_t *cnt, u_int32_t *len, boolean_t drvmgt,
367 u_int8_t grp_idx)
368 {
369 struct ifnet *ifp = ifq->ifcq_ifp;
370 u_int32_t i = 0, l = 0;
371 classq_pkt_t first = CLASSQ_PKT_INITIALIZER(first);
372 classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
373
374 VERIFY(!drvmgt || MBUF_VALID_SC(sc));
375
376 if (IFCQ_TBR_IS_ENABLED(ifq)) {
377 goto dequeue_loop;
378 }
379
380 /*
381 * If the scheduler support dequeueing multiple packets at the
382 * same time, call that one instead.
383 */
384 if (drvmgt) {
385 int err;
386
387 IFCQ_LOCK_SPIN(ifq);
388 err = fq_if_dequeue_sc_classq_multi(ifq, sc, pkt_limit,
389 byte_limit, head, tail, cnt, len, grp_idx);
390 IFCQ_UNLOCK(ifq);
391
392 if (err == 0 && head->cp_mbuf == NULL) {
393 err = EAGAIN;
394 }
395 return err;
396 } else {
397 int err;
398
399 IFCQ_LOCK_SPIN(ifq);
400 err = fq_if_dequeue_classq_multi(ifq, pkt_limit, byte_limit,
401 head, tail, cnt, len, grp_idx);
402 IFCQ_UNLOCK(ifq);
403
404 if (err == 0 && head->cp_mbuf == NULL) {
405 err = EAGAIN;
406 }
407 return err;
408 }
409
410 dequeue_loop:
411 VERIFY(IFCQ_TBR_IS_ENABLED(ifq));
412 IFCQ_LOCK_SPIN(ifq);
413
414 while (i < pkt_limit && l < byte_limit) {
415 if (drvmgt) {
416 IFCQ_TBR_DEQUEUE_SC(ifq, sc, head, grp_idx);
417 } else {
418 IFCQ_TBR_DEQUEUE(ifq, head, grp_idx);
419 }
420
421 if (head->cp_mbuf == NULL) {
422 break;
423 }
424
425 if (first.cp_mbuf == NULL) {
426 first = *head;
427 }
428
429 switch (head->cp_ptype) {
430 case QP_MBUF:
431 head->cp_mbuf->m_nextpkt = NULL;
432 l += head->cp_mbuf->m_pkthdr.len;
433 ifclassq_set_packet_metadata(ifq, ifp, head);
434 if (last.cp_mbuf != NULL) {
435 last.cp_mbuf->m_nextpkt = head->cp_mbuf;
436 }
437 break;
438
439 #if SKYWALK
440 case QP_PACKET:
441 head->cp_kpkt->pkt_nextpkt = NULL;
442 l += head->cp_kpkt->pkt_length;
443 ifclassq_set_packet_metadata(ifq, ifp, head);
444 if (last.cp_kpkt != NULL) {
445 last.cp_kpkt->pkt_nextpkt = head->cp_kpkt;
446 }
447 break;
448 #endif /* SKYWALK */
449
450 default:
451 VERIFY(0);
452 /* NOTREACHED */
453 __builtin_unreachable();
454 }
455
456 last = *head;
457 i++;
458 }
459
460 IFCQ_UNLOCK(ifq);
461
462 if (tail != NULL) {
463 *tail = last;
464 }
465 if (cnt != NULL) {
466 *cnt = i;
467 }
468 if (len != NULL) {
469 *len = l;
470 }
471
472 *head = first;
473 return (first.cp_mbuf != NULL) ? 0 : EAGAIN;
474 }
475
476 static errno_t
ifclassq_dequeue_common(struct ifclassq * ifq,mbuf_svc_class_t sc,u_int32_t pkt_limit,u_int32_t byte_limit,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t * cnt,u_int32_t * len,boolean_t drvmgt,u_int8_t grp_idx)477 ifclassq_dequeue_common(struct ifclassq *ifq, mbuf_svc_class_t sc,
478 u_int32_t pkt_limit, u_int32_t byte_limit, classq_pkt_t *head,
479 classq_pkt_t *tail, u_int32_t *cnt, u_int32_t *len, boolean_t drvmgt,
480 u_int8_t grp_idx)
481 {
482 #if SKYWALK
483 struct ifnet *ifp = ifq->ifcq_ifp;
484
485 if (__improbable(ifp->if_na_ops != NULL &&
486 ifp->if_na_ops->ni_dequeue != NULL)) {
487 /*
488 * TODO:
489 * We should be changing the pkt/byte limit to the
490 * available space in the next filter. But this is not
491 * useful until we can flow control the whole chain of
492 * filters.
493 */
494 errno_t err = ifclassq_dequeue_common_default(ifq, sc,
495 pkt_limit, byte_limit, head, tail, cnt, len, drvmgt, grp_idx);
496
497 return ifp->if_na_ops->ni_dequeue(ifp->if_na, sc, pkt_limit,
498 byte_limit, head, tail, cnt, len, drvmgt, err);
499 }
500 #endif /* SKYWALK */
501 return ifclassq_dequeue_common_default(ifq, sc,
502 pkt_limit, byte_limit, head, tail, cnt, len, drvmgt, grp_idx);
503 }
504
505 void
ifclassq_update(struct ifclassq * ifq,cqev_t ev)506 ifclassq_update(struct ifclassq *ifq, cqev_t ev)
507 {
508 IFCQ_LOCK_ASSERT_HELD(ifq);
509 VERIFY(IFCQ_IS_READY(ifq));
510 fq_if_request_classq(ifq, CLASSQRQ_EVENT, (void *)ev);
511 }
512
513 int
ifclassq_attach(struct ifclassq * ifq,u_int32_t type,void * discipline)514 ifclassq_attach(struct ifclassq *ifq, u_int32_t type, void *discipline)
515 {
516 IFCQ_LOCK_ASSERT_HELD(ifq);
517 VERIFY(ifq->ifcq_disc == NULL);
518 ifq->ifcq_type = type;
519 ifq->ifcq_disc = discipline;
520 return 0;
521 }
522
523 void
ifclassq_detach(struct ifclassq * ifq)524 ifclassq_detach(struct ifclassq *ifq)
525 {
526 IFCQ_LOCK_ASSERT_HELD(ifq);
527 VERIFY(ifq->ifcq_disc == NULL);
528 ifq->ifcq_type = PKTSCHEDT_NONE;
529 }
530
531 int
ifclassq_getqstats(struct ifclassq * ifq,u_int8_t gid,u_int32_t qid,void * ubuf,u_int32_t * nbytes)532 ifclassq_getqstats(struct ifclassq *ifq, u_int8_t gid, u_int32_t qid, void *ubuf,
533 u_int32_t *nbytes)
534 {
535 struct if_ifclassq_stats *ifqs;
536 int err;
537
538 if (*nbytes < sizeof(*ifqs)) {
539 return EINVAL;
540 }
541
542 ifqs = kalloc_type(struct if_ifclassq_stats,
543 Z_WAITOK | Z_ZERO | Z_NOFAIL);
544
545 IFCQ_LOCK(ifq);
546 if (!IFCQ_IS_READY(ifq)) {
547 IFCQ_UNLOCK(ifq);
548 kfree_type(struct if_ifclassq_stats, ifqs);
549 return ENXIO;
550 }
551
552 ifqs->ifqs_len = IFCQ_LEN(ifq);
553 ifqs->ifqs_maxlen = IFCQ_MAXLEN(ifq);
554 *(&ifqs->ifqs_xmitcnt) = *(&ifq->ifcq_xmitcnt);
555 *(&ifqs->ifqs_dropcnt) = *(&ifq->ifcq_dropcnt);
556 ifqs->ifqs_scheduler = ifq->ifcq_type;
557
558 err = pktsched_getqstats(ifq, gid, qid, ifqs);
559 IFCQ_UNLOCK(ifq);
560
561 if (err == 0 && (err = copyout((caddr_t)ifqs,
562 (user_addr_t)(uintptr_t)ubuf, sizeof(*ifqs))) == 0) {
563 *nbytes = sizeof(*ifqs);
564 }
565
566 kfree_type(struct if_ifclassq_stats, ifqs);
567
568 return err;
569 }
570
571 const char *
ifclassq_ev2str(cqev_t ev)572 ifclassq_ev2str(cqev_t ev)
573 {
574 const char *c;
575
576 switch (ev) {
577 case CLASSQ_EV_LINK_BANDWIDTH:
578 c = "LINK_BANDWIDTH";
579 break;
580
581 case CLASSQ_EV_LINK_LATENCY:
582 c = "LINK_LATENCY";
583 break;
584
585 case CLASSQ_EV_LINK_MTU:
586 c = "LINK_MTU";
587 break;
588
589 case CLASSQ_EV_LINK_UP:
590 c = "LINK_UP";
591 break;
592
593 case CLASSQ_EV_LINK_DOWN:
594 c = "LINK_DOWN";
595 break;
596
597 default:
598 c = "UNKNOWN";
599 break;
600 }
601
602 return c;
603 }
604
605 /*
606 * internal representation of token bucket parameters
607 * rate: byte_per_unittime << 32
608 * (((bits_per_sec) / 8) << 32) / machclk_freq
609 * depth: byte << 32
610 *
611 */
612 #define TBR_SHIFT 32
613 #define TBR_SCALE(x) ((int64_t)(x) << TBR_SHIFT)
614 #define TBR_UNSCALE(x) ((x) >> TBR_SHIFT)
615
616 void
ifclassq_tbr_dequeue(struct ifclassq * ifq,classq_pkt_t * pkt,u_int8_t grp_idx)617 ifclassq_tbr_dequeue(struct ifclassq *ifq, classq_pkt_t *pkt, u_int8_t grp_idx)
618 {
619 ifclassq_tbr_dequeue_common(ifq, MBUF_SC_UNSPEC, FALSE, pkt, grp_idx);
620 }
621
622 void
ifclassq_tbr_dequeue_sc(struct ifclassq * ifq,mbuf_svc_class_t sc,classq_pkt_t * pkt,u_int8_t grp_idx)623 ifclassq_tbr_dequeue_sc(struct ifclassq *ifq, mbuf_svc_class_t sc,
624 classq_pkt_t *pkt, u_int8_t grp_idx)
625 {
626 ifclassq_tbr_dequeue_common(ifq, sc, TRUE, pkt, grp_idx);
627 }
628
629 static void
ifclassq_tbr_dequeue_common(struct ifclassq * ifq,mbuf_svc_class_t sc,boolean_t drvmgt,classq_pkt_t * pkt,u_int8_t grp_idx)630 ifclassq_tbr_dequeue_common(struct ifclassq *ifq, mbuf_svc_class_t sc,
631 boolean_t drvmgt, classq_pkt_t *pkt, u_int8_t grp_idx)
632 {
633 struct tb_regulator *tbr;
634 int64_t interval;
635 u_int64_t now;
636
637 IFCQ_LOCK_ASSERT_HELD(ifq);
638
639 VERIFY(!drvmgt || MBUF_VALID_SC(sc));
640 VERIFY(IFCQ_TBR_IS_ENABLED(ifq));
641
642 *pkt = CLASSQ_PKT_INITIALIZER(*pkt);
643 tbr = &ifq->ifcq_tbr;
644 /* update token only when it is negative */
645 if (tbr->tbr_token <= 0) {
646 now = read_machclk();
647 interval = now - tbr->tbr_last;
648 if (interval >= tbr->tbr_filluptime) {
649 tbr->tbr_token = tbr->tbr_depth;
650 } else {
651 tbr->tbr_token += interval * tbr->tbr_rate;
652 if (tbr->tbr_token > tbr->tbr_depth) {
653 tbr->tbr_token = tbr->tbr_depth;
654 }
655 }
656 tbr->tbr_last = now;
657 }
658 /* if token is still negative, don't allow dequeue */
659 if (tbr->tbr_token <= 0) {
660 return;
661 }
662
663 /*
664 * ifclassq takes precedence over ALTQ queue;
665 * ifcq_drain count is adjusted by the caller.
666 */
667 if (drvmgt) {
668 fq_if_dequeue_sc_classq(ifq, sc, pkt, grp_idx);
669 } else {
670 fq_if_dequeue_classq(ifq, pkt, grp_idx);
671 }
672
673 if (pkt->cp_mbuf != NULL) {
674 switch (pkt->cp_ptype) {
675 case QP_MBUF:
676 tbr->tbr_token -= TBR_SCALE(m_pktlen(pkt->cp_mbuf));
677 break;
678
679 #if SKYWALK
680 case QP_PACKET:
681 tbr->tbr_token -=
682 TBR_SCALE(pkt->cp_kpkt->pkt_length);
683 break;
684 #endif /* SKYWALK */
685
686 default:
687 VERIFY(0);
688 /* NOTREACHED */
689 }
690 }
691 }
692
693 /*
694 * set a token bucket regulator.
695 * if the specified rate is zero, the token bucket regulator is deleted.
696 */
697 int
ifclassq_tbr_set(struct ifclassq * ifq,struct tb_profile * profile,boolean_t update)698 ifclassq_tbr_set(struct ifclassq *ifq, struct tb_profile *profile,
699 boolean_t update)
700 {
701 struct tb_regulator *tbr;
702 struct ifnet *ifp = ifq->ifcq_ifp;
703 u_int64_t rate, old_rate;
704
705 IFCQ_LOCK_ASSERT_HELD(ifq);
706 VERIFY(IFCQ_IS_READY(ifq));
707
708 VERIFY(machclk_freq != 0);
709
710 tbr = &ifq->ifcq_tbr;
711 old_rate = tbr->tbr_rate_raw;
712
713 rate = profile->rate;
714 if (profile->percent > 0) {
715 u_int64_t eff_rate;
716
717 if (profile->percent > 100) {
718 return EINVAL;
719 }
720 if ((eff_rate = ifp->if_output_bw.eff_bw) == 0) {
721 return ENODEV;
722 }
723 rate = (eff_rate * profile->percent) / 100;
724 }
725
726 if (rate == 0) {
727 if (!IFCQ_TBR_IS_ENABLED(ifq)) {
728 return 0;
729 }
730
731 if (pktsched_verbose) {
732 printf("%s: TBR disabled\n", if_name(ifp));
733 }
734
735 /* disable this TBR */
736 ifq->ifcq_flags &= ~IFCQF_TBR;
737 bzero(tbr, sizeof(*tbr));
738 ifnet_set_start_cycle(ifp, NULL);
739 if (update) {
740 ifclassq_update(ifq, CLASSQ_EV_LINK_BANDWIDTH);
741 }
742 return 0;
743 }
744
745 if (pktsched_verbose) {
746 printf("%s: TBR %s (rate %llu bps depth %u)\n", if_name(ifp),
747 (ifq->ifcq_flags & IFCQF_TBR) ? "reconfigured" :
748 "enabled", rate, profile->depth);
749 }
750
751 /* set the new TBR */
752 bzero(tbr, sizeof(*tbr));
753 tbr->tbr_rate_raw = rate;
754 tbr->tbr_percent = profile->percent;
755 ifq->ifcq_flags |= IFCQF_TBR;
756
757 /*
758 * Note that the TBR fill up time (hence the ifnet restart time)
759 * is directly related to the specified TBR depth. The ideal
760 * depth value should be computed such that the interval time
761 * between each successive wakeup is adequately spaced apart,
762 * in order to reduce scheduling overheads. A target interval
763 * of 10 ms seems to provide good performance balance. This can be
764 * overridden by specifying the depth profile. Values smaller than
765 * the ideal depth will reduce delay at the expense of CPU cycles.
766 */
767 tbr->tbr_rate = TBR_SCALE(rate / 8) / machclk_freq;
768 if (tbr->tbr_rate > 0) {
769 u_int32_t mtu = ifp->if_mtu;
770 int64_t ival, idepth = 0;
771 int i;
772
773 if (mtu < IF_MINMTU) {
774 mtu = IF_MINMTU;
775 }
776
777 ival = pktsched_nsecs_to_abstime(10 * NSEC_PER_MSEC); /* 10ms */
778
779 for (i = 1;; i++) {
780 idepth = TBR_SCALE(i * mtu);
781 if ((idepth / tbr->tbr_rate) > ival) {
782 break;
783 }
784 }
785 VERIFY(idepth > 0);
786
787 tbr->tbr_depth = TBR_SCALE(profile->depth);
788 if (tbr->tbr_depth == 0) {
789 tbr->tbr_filluptime = idepth / tbr->tbr_rate;
790 /* a little fudge factor to get closer to rate */
791 tbr->tbr_depth = idepth + (idepth >> 3);
792 } else {
793 tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate;
794 }
795 } else {
796 tbr->tbr_depth = TBR_SCALE(profile->depth);
797 tbr->tbr_filluptime = 0xffffffffffffffffLL;
798 }
799 tbr->tbr_token = tbr->tbr_depth;
800 tbr->tbr_last = read_machclk();
801
802 if (tbr->tbr_rate > 0 && (ifp->if_flags & IFF_UP)) {
803 struct timespec ts =
804 { 0, (long)pktsched_abs_to_nsecs(tbr->tbr_filluptime) };
805 if (pktsched_verbose) {
806 printf("%s: TBR calculated tokens %lld "
807 "filluptime %llu ns\n", if_name(ifp),
808 TBR_UNSCALE(tbr->tbr_token),
809 pktsched_abs_to_nsecs(tbr->tbr_filluptime));
810 }
811 ifnet_set_start_cycle(ifp, &ts);
812 } else {
813 if (pktsched_verbose) {
814 if (tbr->tbr_rate == 0) {
815 printf("%s: TBR calculated tokens %lld "
816 "infinite filluptime\n", if_name(ifp),
817 TBR_UNSCALE(tbr->tbr_token));
818 } else if (!(ifp->if_flags & IFF_UP)) {
819 printf("%s: TBR suspended (link is down)\n",
820 if_name(ifp));
821 }
822 }
823 ifnet_set_start_cycle(ifp, NULL);
824 }
825 if (update && tbr->tbr_rate_raw != old_rate) {
826 ifclassq_update(ifq, CLASSQ_EV_LINK_BANDWIDTH);
827 }
828
829 return 0;
830 }
831
832 void
ifclassq_calc_target_qdelay(struct ifnet * ifp,uint64_t * if_target_qdelay,uint32_t flags)833 ifclassq_calc_target_qdelay(struct ifnet *ifp, uint64_t *if_target_qdelay,
834 uint32_t flags)
835 {
836 uint64_t qdelay = 0, qdelay_configed = 0, qdely_default = 0;
837 if (flags == IF_CLASSQ_DEF) {
838 qdelay = IFCQ_TARGET_QDELAY(ifp->if_snd);
839 }
840
841 switch (flags) {
842 case IF_CLASSQ_DEF:
843 qdelay_configed = ifclassq_def_c_target_qdelay;
844 qdely_default = IFQ_DEF_C_TARGET_DELAY;
845 break;
846 case IF_CLASSQ_L4S:
847 qdelay_configed = ifclassq_def_l4s_target_qdelay;
848 qdely_default = IFQ_DEF_L4S_TARGET_DELAY;
849 break;
850 case IF_CLASSQ_LOW_LATENCY:
851 qdelay_configed = ifclassq_ll_c_target_qdelay;
852 qdely_default = IFQ_LL_C_TARGET_DELAY;
853 break;
854 case (IF_CLASSQ_LOW_LATENCY | IF_CLASSQ_L4S):
855 qdelay_configed = ifclassq_ll_l4s_target_qdelay;
856 qdely_default = IFQ_LL_L4S_TARGET_DELAY;
857 break;
858 default:
859 VERIFY(0);
860 /* NOTREACHED */
861 __builtin_unreachable();
862 }
863
864 if (qdelay_configed != 0) {
865 qdelay = qdelay_configed;
866 }
867
868 /*
869 * If we do not know the effective bandwidth, use the default
870 * target queue delay.
871 */
872 if (qdelay == 0) {
873 qdelay = qdely_default;
874 }
875
876 /*
877 * If a delay has been added to ifnet start callback for
878 * coalescing, we have to add that to the pre-set target delay
879 * because the packets can be in the queue longer.
880 */
881 if ((ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
882 ifp->if_start_delay_timeout > 0) {
883 qdelay += ifp->if_start_delay_timeout;
884 }
885
886 *(if_target_qdelay) = qdelay;
887 }
888
889 void
ifclassq_calc_update_interval(uint64_t * update_interval,uint32_t flags)890 ifclassq_calc_update_interval(uint64_t *update_interval, uint32_t flags)
891 {
892 uint64_t interval = 0, interval_configed = 0, interval_default = 0;
893
894 switch (flags) {
895 case IF_CLASSQ_DEF:
896 interval_configed = ifclassq_def_c_update_interval;
897 interval_default = IFQ_DEF_C_UPDATE_INTERVAL;
898 break;
899 case IF_CLASSQ_L4S:
900 interval_configed = ifclassq_def_l4s_update_interval;
901 interval_default = IFQ_DEF_L4S_UPDATE_INTERVAL;
902 break;
903 case IF_CLASSQ_LOW_LATENCY:
904 interval_configed = ifclassq_ll_c_update_interval;
905 interval_default = IFQ_LL_C_UPDATE_INTERVAL;
906 break;
907 case (IF_CLASSQ_LOW_LATENCY | IF_CLASSQ_L4S):
908 interval_configed = ifclassq_ll_l4s_update_interval;
909 interval_default = IFQ_LL_L4S_UPDATE_INTERVAL;
910 break;
911 default:
912 VERIFY(0);
913 /* NOTREACHED */
914 __builtin_unreachable();
915 }
916
917 /* If the system level override is set, use it */
918 if (interval_configed != 0) {
919 interval = interval_configed;
920 }
921
922 /* Otherwise use the default value */
923 if (interval == 0) {
924 interval = interval_default;
925 }
926
927 *update_interval = interval;
928 }
929
930 void
ifclassq_reap_caches(boolean_t purge)931 ifclassq_reap_caches(boolean_t purge)
932 {
933 fq_codel_reap_caches(purge);
934 flowadv_reap_caches(purge);
935 }
936
937 struct ifclassq *
ifclassq_alloc(void)938 ifclassq_alloc(void)
939 {
940 struct ifclassq *ifcq;
941
942 ifcq = zalloc_flags(ifcq_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
943 os_ref_init(&ifcq->ifcq_refcnt, NULL);
944 os_ref_retain(&ifcq->ifcq_refcnt);
945 lck_mtx_init(&ifcq->ifcq_lock, &ifcq_lock_group, &ifcq_lock_attr);
946 return ifcq;
947 }
948
949 void
ifclassq_retain(struct ifclassq * ifcq)950 ifclassq_retain(struct ifclassq *ifcq)
951 {
952 os_ref_retain(&ifcq->ifcq_refcnt);
953 }
954
955 void
ifclassq_release(struct ifclassq ** pifcq)956 ifclassq_release(struct ifclassq **pifcq)
957 {
958 struct ifclassq *ifcq = *pifcq;
959
960 *pifcq = NULL;
961 if (os_ref_release(&ifcq->ifcq_refcnt) == 0) {
962 ifclassq_teardown(ifcq);
963 zfree(ifcq_zone, ifcq);
964 }
965 }
966
967 int
ifclassq_setup_group(struct ifclassq * ifcq,uint8_t grp_idx,uint8_t flags)968 ifclassq_setup_group(struct ifclassq *ifcq, uint8_t grp_idx, uint8_t flags)
969 {
970 int err;
971
972 IFCQ_LOCK(ifcq);
973 VERIFY(ifcq->ifcq_disc != NULL);
974 VERIFY(ifcq->ifcq_type == PKTSCHEDT_FQ_CODEL);
975
976 err = fq_if_create_grp(ifcq, grp_idx, flags);
977 IFCQ_UNLOCK(ifcq);
978
979 return err;
980 }
981
982 void
ifclassq_set_grp_combined(struct ifclassq * ifcq,uint8_t grp_idx)983 ifclassq_set_grp_combined(struct ifclassq *ifcq, uint8_t grp_idx)
984 {
985 IFCQ_LOCK(ifcq);
986 VERIFY(ifcq->ifcq_disc != NULL);
987 VERIFY(ifcq->ifcq_type == PKTSCHEDT_FQ_CODEL);
988
989 fq_if_set_grp_combined(ifcq, grp_idx);
990 IFCQ_UNLOCK(ifcq);
991 }
992
993 void
ifclassq_set_grp_separated(struct ifclassq * ifcq,uint8_t grp_idx)994 ifclassq_set_grp_separated(struct ifclassq *ifcq, uint8_t grp_idx)
995 {
996 IFCQ_LOCK(ifcq);
997 VERIFY(ifcq->ifcq_disc != NULL);
998 VERIFY(ifcq->ifcq_type == PKTSCHEDT_FQ_CODEL);
999
1000 fq_if_set_grp_separated(ifcq, grp_idx);
1001 IFCQ_UNLOCK(ifcq);
1002 }
1003