1 /*
2 * Copyright (c) 2011-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/cdefs.h>
30 #include <sys/param.h>
31 #include <sys/mbuf.h>
32 #include <sys/errno.h>
33 #include <sys/random.h>
34 #include <sys/kernel_types.h>
35 #include <sys/sysctl.h>
36
37 #include <kern/zalloc.h>
38
39 #include <net/if.h>
40 #include <net/net_osdep.h>
41 #include <net/classq/classq.h>
42 #include <pexpert/pexpert.h>
43 #include <net/classq/classq_sfb.h>
44 #include <net/classq/classq_fq_codel.h>
45 #include <net/pktsched/pktsched.h>
46 #include <net/pktsched/pktsched_fq_codel.h>
47 #include <net/flowadv.h>
48
49 #include <libkern/libkern.h>
50
51 #if SKYWALK
52 #include <skywalk/os_skywalk_private.h>
53 #include <skywalk/nexus/netif/nx_netif.h>
54 #endif /* SKYWALK */
55
56 static errno_t ifclassq_dequeue_common(struct ifclassq *, mbuf_svc_class_t,
57 u_int32_t, u_int32_t, classq_pkt_t *, classq_pkt_t *, u_int32_t *,
58 u_int32_t *, boolean_t, u_int8_t);
59 static void ifclassq_tbr_dequeue_common(struct ifclassq *, mbuf_svc_class_t,
60 boolean_t, classq_pkt_t *, u_int8_t);
61
62 static uint64_t ifclassq_def_c_target_qdelay = 0;
63 SYSCTL_QUAD(_net_classq, OID_AUTO, def_c_target_qdelay, CTLFLAG_RW | CTLFLAG_LOCKED,
64 &ifclassq_def_c_target_qdelay, "def classic target queue delay in nanoseconds");
65
66 static uint64_t ifclassq_def_c_update_interval = 0;
67 SYSCTL_QUAD(_net_classq, OID_AUTO, def_c_update_interval,
68 CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_def_c_update_interval,
69 "def classic update interval in nanoseconds");
70
71 static uint64_t ifclassq_def_l4s_target_qdelay = 0;
72 SYSCTL_QUAD(_net_classq, OID_AUTO, def_l4s_target_qdelay, CTLFLAG_RW | CTLFLAG_LOCKED,
73 &ifclassq_def_l4s_target_qdelay, "def L4S target queue delay in nanoseconds");
74
75 static uint64_t ifclassq_def_l4s_update_interval = 0;
76 SYSCTL_QUAD(_net_classq, OID_AUTO, def_l4s_update_interval,
77 CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_def_l4s_update_interval,
78 "def L4S update interval in nanoseconds");
79
80 static uint64_t ifclassq_ll_c_target_qdelay = 0;
81 SYSCTL_QUAD(_net_classq, OID_AUTO, ll_c_target_qdelay, CTLFLAG_RW | CTLFLAG_LOCKED,
82 &ifclassq_ll_c_target_qdelay, "low latency classic target queue delay in nanoseconds");
83
84 static uint64_t ifclassq_ll_c_update_interval = 0;
85 SYSCTL_QUAD(_net_classq, OID_AUTO, ll_c_update_interval,
86 CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_ll_c_update_interval,
87 "low latency classic update interval in nanoseconds");
88
89 static uint64_t ifclassq_ll_l4s_target_qdelay = 0;
90 SYSCTL_QUAD(_net_classq, OID_AUTO, ll_l4s_target_qdelay, CTLFLAG_RW | CTLFLAG_LOCKED,
91 &ifclassq_ll_l4s_target_qdelay, "low latency L4S target queue delay in nanoseconds");
92
93 static uint64_t ifclassq_ll_l4s_update_interval = 0;
94 SYSCTL_QUAD(_net_classq, OID_AUTO, ll_l4s_update_interval,
95 CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_ll_l4s_update_interval,
96 "low latency L4S update interval in nanoseconds");
97
98 #if DEBUG || DEVELOPMENT
99 uint32_t ifclassq_flow_control_adv = 1; /* flow control advisory */
100 SYSCTL_UINT(_net_classq, OID_AUTO, flow_control_adv,
101 CTLFLAG_RW | CTLFLAG_LOCKED, &ifclassq_flow_control_adv, 1,
102 "enable/disable flow control advisory");
103
104 uint16_t fq_codel_quantum = 0;
105 #endif /* DEBUG || DEVELOPMENT */
106
107 static struct zone *ifcq_zone; /* zone for ifclassq */
108 #define IFCQ_ZONE_NAME "ifclassq" /* zone name */
109 LCK_ATTR_DECLARE(ifcq_lock_attr, 0, 0);
110 static LCK_GRP_DECLARE(ifcq_lock_group, "ifclassq locks");
111
112 void
classq_init(void)113 classq_init(void)
114 {
115 _CASSERT(MBUF_TC_BE == 0);
116 _CASSERT(MBUF_SC_BE == 0);
117 _CASSERT(IFCQ_SC_MAX == MBUF_SC_MAX_CLASSES);
118 #if DEBUG || DEVELOPMENT
119 PE_parse_boot_argn("fq_codel_quantum", &fq_codel_quantum,
120 sizeof(fq_codel_quantum));
121 PE_parse_boot_argn("ifclassq_def_c_target_qdelay", &ifclassq_def_c_target_qdelay,
122 sizeof(ifclassq_def_c_target_qdelay));
123 PE_parse_boot_argn("ifclassq_def_c_update_interval",
124 &ifclassq_def_c_update_interval, sizeof(ifclassq_def_c_update_interval));
125 PE_parse_boot_argn("ifclassq_def_l4s_target_qdelay", &ifclassq_def_l4s_target_qdelay,
126 sizeof(ifclassq_def_l4s_target_qdelay));
127 PE_parse_boot_argn("ifclassq_def_l4s_update_interval",
128 &ifclassq_def_l4s_update_interval, sizeof(ifclassq_def_l4s_update_interval));
129 PE_parse_boot_argn("ifclassq_ll_c_target_qdelay", &ifclassq_ll_c_target_qdelay,
130 sizeof(ifclassq_ll_c_target_qdelay));
131 PE_parse_boot_argn("ifclassq_ll_c_update_interval",
132 &ifclassq_ll_c_update_interval, sizeof(ifclassq_ll_c_update_interval));
133 PE_parse_boot_argn("ifclassq_ll_l4s_target_qdelay", &ifclassq_ll_l4s_target_qdelay,
134 sizeof(ifclassq_ll_l4s_target_qdelay));
135 PE_parse_boot_argn("ifclassq_ll_l4s_update_interval",
136 &ifclassq_ll_l4s_update_interval, sizeof(ifclassq_ll_l4s_update_interval));
137 #endif /* DEBUG || DEVELOPMENT */
138 ifcq_zone = zone_create(IFCQ_ZONE_NAME, sizeof(struct ifclassq),
139 ZC_ZFREE_CLEARMEM);
140 fq_codel_init();
141 }
142
143 int
ifclassq_setup(struct ifclassq * ifq,struct ifnet * ifp,uint32_t sflags)144 ifclassq_setup(struct ifclassq *ifq, struct ifnet *ifp, uint32_t sflags)
145 {
146 int err = 0;
147
148 IFCQ_LOCK(ifq);
149 VERIFY(IFCQ_IS_EMPTY(ifq));
150 ifq->ifcq_ifp = ifp;
151 IFCQ_LEN(ifq) = 0;
152 IFCQ_BYTES(ifq) = 0;
153 bzero(&ifq->ifcq_xmitcnt, sizeof(ifq->ifcq_xmitcnt));
154 bzero(&ifq->ifcq_dropcnt, sizeof(ifq->ifcq_dropcnt));
155
156 VERIFY(!IFCQ_TBR_IS_ENABLED(ifq));
157 VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
158 VERIFY(ifq->ifcq_flags == 0);
159 VERIFY(ifq->ifcq_sflags == 0);
160 VERIFY(ifq->ifcq_disc == NULL);
161
162 if (ifp->if_eflags & IFEF_TXSTART) {
163 u_int32_t maxlen = 0;
164
165 if ((maxlen = IFCQ_MAXLEN(ifq)) == 0) {
166 maxlen = if_sndq_maxlen;
167 }
168 IFCQ_SET_MAXLEN(ifq, maxlen);
169
170 if (IFCQ_MAXLEN(ifq) != if_sndq_maxlen &&
171 IFCQ_TARGET_QDELAY(ifq) == 0) {
172 /*
173 * Choose static queues because the interface has
174 * maximum queue size set
175 */
176 sflags &= ~PKTSCHEDF_QALG_DELAYBASED;
177 }
178 ifq->ifcq_sflags = sflags;
179 err = ifclassq_pktsched_setup(ifq);
180 if (err == 0) {
181 ifq->ifcq_flags = (IFCQF_READY | IFCQF_ENABLED);
182 }
183 }
184 IFCQ_UNLOCK(ifq);
185 return err;
186 }
187
188 void
ifclassq_teardown(struct ifclassq * ifq)189 ifclassq_teardown(struct ifclassq *ifq)
190 {
191 IFCQ_LOCK(ifq);
192 if (IFCQ_IS_DESTROYED(ifq)) {
193 ASSERT((ifq->ifcq_flags & ~IFCQF_DESTROYED) == 0);
194 goto done;
195 }
196 if (IFCQ_IS_READY(ifq)) {
197 if (IFCQ_TBR_IS_ENABLED(ifq)) {
198 struct tb_profile tb =
199 { .rate = 0, .percent = 0, .depth = 0 };
200 (void) ifclassq_tbr_set(ifq, &tb, FALSE);
201 }
202 pktsched_teardown(ifq);
203 ifq->ifcq_flags &= ~IFCQF_READY;
204 }
205 ifq->ifcq_sflags = 0;
206 VERIFY(IFCQ_IS_EMPTY(ifq));
207 VERIFY(!IFCQ_TBR_IS_ENABLED(ifq));
208 VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
209 VERIFY(ifq->ifcq_flags == 0);
210 VERIFY(ifq->ifcq_sflags == 0);
211 VERIFY(ifq->ifcq_disc == NULL);
212 IFCQ_LEN(ifq) = 0;
213 IFCQ_BYTES(ifq) = 0;
214 IFCQ_MAXLEN(ifq) = 0;
215 bzero(&ifq->ifcq_xmitcnt, sizeof(ifq->ifcq_xmitcnt));
216 bzero(&ifq->ifcq_dropcnt, sizeof(ifq->ifcq_dropcnt));
217 ifq->ifcq_flags |= IFCQF_DESTROYED;
218 done:
219 IFCQ_UNLOCK(ifq);
220 }
221
222 int
ifclassq_pktsched_setup(struct ifclassq * ifq)223 ifclassq_pktsched_setup(struct ifclassq *ifq)
224 {
225 struct ifnet *ifp = ifq->ifcq_ifp;
226 classq_pkt_type_t ptype = QP_MBUF;
227 int err = 0;
228
229 IFCQ_LOCK_ASSERT_HELD(ifq);
230 VERIFY(ifp->if_eflags & IFEF_TXSTART);
231 #if SKYWALK
232 ptype = ((ifp->if_eflags & IFEF_SKYWALK_NATIVE) != 0) ? QP_PACKET :
233 QP_MBUF;
234 #endif /* SKYWALK */
235
236 err = pktsched_setup(ifq, PKTSCHEDT_FQ_CODEL, ifq->ifcq_sflags, ptype);
237
238 return err;
239 }
240
241 void
ifclassq_set_maxlen(struct ifclassq * ifq,u_int32_t maxqlen)242 ifclassq_set_maxlen(struct ifclassq *ifq, u_int32_t maxqlen)
243 {
244 IFCQ_LOCK(ifq);
245 if (maxqlen == 0) {
246 maxqlen = if_sndq_maxlen;
247 }
248 IFCQ_SET_MAXLEN(ifq, maxqlen);
249 IFCQ_UNLOCK(ifq);
250 }
251
252 u_int32_t
ifclassq_get_maxlen(struct ifclassq * ifq)253 ifclassq_get_maxlen(struct ifclassq *ifq)
254 {
255 return IFCQ_MAXLEN(ifq);
256 }
257
258 int
ifclassq_get_len(struct ifclassq * ifq,mbuf_svc_class_t sc,u_int8_t grp_idx,u_int32_t * packets,u_int32_t * bytes)259 ifclassq_get_len(struct ifclassq *ifq, mbuf_svc_class_t sc, u_int8_t grp_idx,
260 u_int32_t *packets, u_int32_t *bytes)
261 {
262 int err = 0;
263
264 IFCQ_LOCK(ifq);
265 if ((ifq->ifcq_flags & (IFCQF_READY | IFCQF_ENABLED)) !=
266 (IFCQF_READY | IFCQF_ENABLED)) {
267 return ENXIO;
268 }
269 if (sc == MBUF_SC_UNSPEC && grp_idx == IF_CLASSQ_ALL_GRPS) {
270 VERIFY(packets != NULL);
271 *packets = IFCQ_LEN(ifq);
272 } else {
273 cqrq_stat_sc_t req = { sc, grp_idx, 0, 0 };
274
275 VERIFY(MBUF_VALID_SC(sc) || sc == MBUF_SC_UNSPEC);
276
277 err = fq_if_request_classq(ifq, CLASSQRQ_STAT_SC, &req);
278 if (packets != NULL) {
279 *packets = req.packets;
280 }
281 if (bytes != NULL) {
282 *bytes = req.bytes;
283 }
284 }
285 IFCQ_UNLOCK(ifq);
286
287 #if SKYWALK
288 struct ifnet *ifp = ifq->ifcq_ifp;
289
290 if (__improbable(ifp->if_na_ops != NULL &&
291 ifp->if_na_ops->ni_get_len != NULL)) {
292 err = ifp->if_na_ops->ni_get_len(ifp->if_na, sc, packets,
293 bytes, err);
294 }
295 #endif /* SKYWALK */
296
297 return err;
298 }
299
300 inline void
ifclassq_set_packet_metadata(struct ifclassq * ifq,struct ifnet * ifp,classq_pkt_t * p)301 ifclassq_set_packet_metadata(struct ifclassq *ifq, struct ifnet *ifp,
302 classq_pkt_t *p)
303 {
304 if (!IFNET_IS_CELLULAR(ifp)) {
305 return;
306 }
307
308 switch (p->cp_ptype) {
309 case QP_MBUF: {
310 struct mbuf *m = p->cp_mbuf;
311 m->m_pkthdr.pkt_flags |= PKTF_VALID_UNSENT_DATA;
312 m->m_pkthdr.bufstatus_if = IFCQ_BYTES(ifq);
313 m->m_pkthdr.bufstatus_sndbuf = (uint32_t)ifp->if_sndbyte_unsent;
314 break;
315 }
316
317 #if SKYWALK
318 case QP_PACKET:
319 /*
320 * Support for equivalent of mbuf_get_unsent_data_bytes()
321 * is not needed in the Skywalk architecture.
322 */
323 break;
324 #endif /* SKYWALK */
325
326 default:
327 VERIFY(0);
328 /* NOTREACHED */
329 __builtin_unreachable();
330 }
331 }
332
333 errno_t
ifclassq_enqueue(struct ifclassq * ifq,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t cnt,u_int32_t bytes,boolean_t * pdrop)334 ifclassq_enqueue(struct ifclassq *ifq, classq_pkt_t *head, classq_pkt_t *tail,
335 u_int32_t cnt, u_int32_t bytes, boolean_t *pdrop)
336 {
337 return fq_if_enqueue_classq(ifq, head, tail, cnt, bytes, pdrop);
338 }
339
340 errno_t
ifclassq_dequeue(struct ifclassq * ifq,u_int32_t pkt_limit,u_int32_t byte_limit,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t * cnt,u_int32_t * len,u_int8_t grp_idx)341 ifclassq_dequeue(struct ifclassq *ifq, u_int32_t pkt_limit,
342 u_int32_t byte_limit, classq_pkt_t *head, classq_pkt_t *tail,
343 u_int32_t *cnt, u_int32_t *len, u_int8_t grp_idx)
344 {
345 return ifclassq_dequeue_common(ifq, MBUF_SC_UNSPEC, pkt_limit,
346 byte_limit, head, tail, cnt, len, FALSE, grp_idx);
347 }
348
349 errno_t
ifclassq_dequeue_sc(struct ifclassq * ifq,mbuf_svc_class_t sc,u_int32_t pkt_limit,u_int32_t byte_limit,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t * cnt,u_int32_t * len,u_int8_t grp_idx)350 ifclassq_dequeue_sc(struct ifclassq *ifq, mbuf_svc_class_t sc,
351 u_int32_t pkt_limit, u_int32_t byte_limit, classq_pkt_t *head,
352 classq_pkt_t *tail, u_int32_t *cnt, u_int32_t *len, u_int8_t grp_idx)
353 {
354 return ifclassq_dequeue_common(ifq, sc, pkt_limit, byte_limit,
355 head, tail, cnt, len, TRUE, grp_idx);
356 }
357
358 static errno_t
ifclassq_dequeue_common_default(struct ifclassq * ifq,mbuf_svc_class_t sc,u_int32_t pkt_limit,u_int32_t byte_limit,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t * cnt,u_int32_t * len,boolean_t drvmgt,u_int8_t grp_idx)359 ifclassq_dequeue_common_default(struct ifclassq *ifq, mbuf_svc_class_t sc,
360 u_int32_t pkt_limit, u_int32_t byte_limit, classq_pkt_t *head,
361 classq_pkt_t *tail, u_int32_t *cnt, u_int32_t *len, boolean_t drvmgt,
362 u_int8_t grp_idx)
363 {
364 struct ifnet *ifp = ifq->ifcq_ifp;
365 u_int32_t i = 0, l = 0;
366 classq_pkt_t first = CLASSQ_PKT_INITIALIZER(first);
367 classq_pkt_t last = CLASSQ_PKT_INITIALIZER(last);
368
369 VERIFY(!drvmgt || MBUF_VALID_SC(sc));
370
371 if (IFCQ_TBR_IS_ENABLED(ifq)) {
372 goto dequeue_loop;
373 }
374
375 /*
376 * If the scheduler support dequeueing multiple packets at the
377 * same time, call that one instead.
378 */
379 if (drvmgt) {
380 int err;
381
382 IFCQ_LOCK_SPIN(ifq);
383 err = fq_if_dequeue_sc_classq_multi(ifq, sc, pkt_limit,
384 byte_limit, head, tail, cnt, len, grp_idx);
385 IFCQ_UNLOCK(ifq);
386
387 if (err == 0 && head->cp_mbuf == NULL) {
388 err = EAGAIN;
389 }
390 return err;
391 } else {
392 int err;
393
394 IFCQ_LOCK_SPIN(ifq);
395 err = fq_if_dequeue_classq_multi(ifq, pkt_limit, byte_limit,
396 head, tail, cnt, len, grp_idx);
397 IFCQ_UNLOCK(ifq);
398
399 if (err == 0 && head->cp_mbuf == NULL) {
400 err = EAGAIN;
401 }
402 return err;
403 }
404
405 dequeue_loop:
406 VERIFY(IFCQ_TBR_IS_ENABLED(ifq));
407 IFCQ_LOCK_SPIN(ifq);
408
409 while (i < pkt_limit && l < byte_limit) {
410 if (drvmgt) {
411 IFCQ_TBR_DEQUEUE_SC(ifq, sc, head, grp_idx);
412 } else {
413 IFCQ_TBR_DEQUEUE(ifq, head, grp_idx);
414 }
415
416 if (head->cp_mbuf == NULL) {
417 break;
418 }
419
420 if (first.cp_mbuf == NULL) {
421 first = *head;
422 }
423
424 switch (head->cp_ptype) {
425 case QP_MBUF:
426 head->cp_mbuf->m_nextpkt = NULL;
427 l += head->cp_mbuf->m_pkthdr.len;
428 ifclassq_set_packet_metadata(ifq, ifp, head);
429 if (last.cp_mbuf != NULL) {
430 last.cp_mbuf->m_nextpkt = head->cp_mbuf;
431 }
432 break;
433
434 #if SKYWALK
435 case QP_PACKET:
436 head->cp_kpkt->pkt_nextpkt = NULL;
437 l += head->cp_kpkt->pkt_length;
438 ifclassq_set_packet_metadata(ifq, ifp, head);
439 if (last.cp_kpkt != NULL) {
440 last.cp_kpkt->pkt_nextpkt = head->cp_kpkt;
441 }
442 break;
443 #endif /* SKYWALK */
444
445 default:
446 VERIFY(0);
447 /* NOTREACHED */
448 __builtin_unreachable();
449 }
450
451 last = *head;
452 i++;
453 }
454
455 IFCQ_UNLOCK(ifq);
456
457 if (tail != NULL) {
458 *tail = last;
459 }
460 if (cnt != NULL) {
461 *cnt = i;
462 }
463 if (len != NULL) {
464 *len = l;
465 }
466
467 *head = first;
468 return (first.cp_mbuf != NULL) ? 0 : EAGAIN;
469 }
470
471 static errno_t
ifclassq_dequeue_common(struct ifclassq * ifq,mbuf_svc_class_t sc,u_int32_t pkt_limit,u_int32_t byte_limit,classq_pkt_t * head,classq_pkt_t * tail,u_int32_t * cnt,u_int32_t * len,boolean_t drvmgt,u_int8_t grp_idx)472 ifclassq_dequeue_common(struct ifclassq *ifq, mbuf_svc_class_t sc,
473 u_int32_t pkt_limit, u_int32_t byte_limit, classq_pkt_t *head,
474 classq_pkt_t *tail, u_int32_t *cnt, u_int32_t *len, boolean_t drvmgt,
475 u_int8_t grp_idx)
476 {
477 #if SKYWALK
478 struct ifnet *ifp = ifq->ifcq_ifp;
479
480 if (__improbable(ifp->if_na_ops != NULL &&
481 ifp->if_na_ops->ni_dequeue != NULL)) {
482 /*
483 * TODO:
484 * We should be changing the pkt/byte limit to the
485 * available space in the next filter. But this is not
486 * useful until we can flow control the whole chain of
487 * filters.
488 */
489 errno_t err = ifclassq_dequeue_common_default(ifq, sc,
490 pkt_limit, byte_limit, head, tail, cnt, len, drvmgt, grp_idx);
491
492 return ifp->if_na_ops->ni_dequeue(ifp->if_na, sc, pkt_limit,
493 byte_limit, head, tail, cnt, len, drvmgt, err);
494 }
495 #endif /* SKYWALK */
496 return ifclassq_dequeue_common_default(ifq, sc,
497 pkt_limit, byte_limit, head, tail, cnt, len, drvmgt, grp_idx);
498 }
499
500 void
ifclassq_update(struct ifclassq * ifq,cqev_t ev)501 ifclassq_update(struct ifclassq *ifq, cqev_t ev)
502 {
503 IFCQ_LOCK_ASSERT_HELD(ifq);
504 VERIFY(IFCQ_IS_READY(ifq));
505 fq_if_request_classq(ifq, CLASSQRQ_EVENT, (void *)ev);
506 }
507
508 int
ifclassq_attach(struct ifclassq * ifq,u_int32_t type,void * discipline)509 ifclassq_attach(struct ifclassq *ifq, u_int32_t type, void *discipline)
510 {
511 IFCQ_LOCK_ASSERT_HELD(ifq);
512 VERIFY(ifq->ifcq_disc == NULL);
513 ifq->ifcq_type = type;
514 ifq->ifcq_disc = discipline;
515 return 0;
516 }
517
518 void
ifclassq_detach(struct ifclassq * ifq)519 ifclassq_detach(struct ifclassq *ifq)
520 {
521 IFCQ_LOCK_ASSERT_HELD(ifq);
522 VERIFY(ifq->ifcq_disc == NULL);
523 ifq->ifcq_type = PKTSCHEDT_NONE;
524 }
525
526 int
ifclassq_getqstats(struct ifclassq * ifq,u_int8_t gid,u_int32_t qid,void * ubuf,u_int32_t * nbytes)527 ifclassq_getqstats(struct ifclassq *ifq, u_int8_t gid, u_int32_t qid, void *ubuf,
528 u_int32_t *nbytes)
529 {
530 struct if_ifclassq_stats *ifqs;
531 int err;
532
533 if (*nbytes < sizeof(*ifqs)) {
534 return EINVAL;
535 }
536
537 ifqs = kalloc_type(struct if_ifclassq_stats,
538 Z_WAITOK | Z_ZERO | Z_NOFAIL);
539
540 IFCQ_LOCK(ifq);
541 if (!IFCQ_IS_READY(ifq)) {
542 IFCQ_UNLOCK(ifq);
543 kfree_type(struct if_ifclassq_stats, ifqs);
544 return ENXIO;
545 }
546
547 ifqs->ifqs_len = IFCQ_LEN(ifq);
548 ifqs->ifqs_maxlen = IFCQ_MAXLEN(ifq);
549 *(&ifqs->ifqs_xmitcnt) = *(&ifq->ifcq_xmitcnt);
550 *(&ifqs->ifqs_dropcnt) = *(&ifq->ifcq_dropcnt);
551 ifqs->ifqs_scheduler = ifq->ifcq_type;
552
553 err = pktsched_getqstats(ifq, gid, qid, ifqs);
554 IFCQ_UNLOCK(ifq);
555
556 if (err == 0 && (err = copyout((caddr_t)ifqs,
557 (user_addr_t)(uintptr_t)ubuf, sizeof(*ifqs))) == 0) {
558 *nbytes = sizeof(*ifqs);
559 }
560
561 kfree_type(struct if_ifclassq_stats, ifqs);
562
563 return err;
564 }
565
566 const char *
ifclassq_ev2str(cqev_t ev)567 ifclassq_ev2str(cqev_t ev)
568 {
569 const char *c;
570
571 switch (ev) {
572 case CLASSQ_EV_LINK_BANDWIDTH:
573 c = "LINK_BANDWIDTH";
574 break;
575
576 case CLASSQ_EV_LINK_LATENCY:
577 c = "LINK_LATENCY";
578 break;
579
580 case CLASSQ_EV_LINK_MTU:
581 c = "LINK_MTU";
582 break;
583
584 case CLASSQ_EV_LINK_UP:
585 c = "LINK_UP";
586 break;
587
588 case CLASSQ_EV_LINK_DOWN:
589 c = "LINK_DOWN";
590 break;
591
592 default:
593 c = "UNKNOWN";
594 break;
595 }
596
597 return c;
598 }
599
600 /*
601 * internal representation of token bucket parameters
602 * rate: byte_per_unittime << 32
603 * (((bits_per_sec) / 8) << 32) / machclk_freq
604 * depth: byte << 32
605 *
606 */
607 #define TBR_SHIFT 32
608 #define TBR_SCALE(x) ((int64_t)(x) << TBR_SHIFT)
609 #define TBR_UNSCALE(x) ((x) >> TBR_SHIFT)
610
611 void
ifclassq_tbr_dequeue(struct ifclassq * ifq,classq_pkt_t * pkt,u_int8_t grp_idx)612 ifclassq_tbr_dequeue(struct ifclassq *ifq, classq_pkt_t *pkt, u_int8_t grp_idx)
613 {
614 ifclassq_tbr_dequeue_common(ifq, MBUF_SC_UNSPEC, FALSE, pkt, grp_idx);
615 }
616
617 void
ifclassq_tbr_dequeue_sc(struct ifclassq * ifq,mbuf_svc_class_t sc,classq_pkt_t * pkt,u_int8_t grp_idx)618 ifclassq_tbr_dequeue_sc(struct ifclassq *ifq, mbuf_svc_class_t sc,
619 classq_pkt_t *pkt, u_int8_t grp_idx)
620 {
621 ifclassq_tbr_dequeue_common(ifq, sc, TRUE, pkt, grp_idx);
622 }
623
624 static void
ifclassq_tbr_dequeue_common(struct ifclassq * ifq,mbuf_svc_class_t sc,boolean_t drvmgt,classq_pkt_t * pkt,u_int8_t grp_idx)625 ifclassq_tbr_dequeue_common(struct ifclassq *ifq, mbuf_svc_class_t sc,
626 boolean_t drvmgt, classq_pkt_t *pkt, u_int8_t grp_idx)
627 {
628 struct tb_regulator *tbr;
629 int64_t interval;
630 u_int64_t now;
631
632 IFCQ_LOCK_ASSERT_HELD(ifq);
633
634 VERIFY(!drvmgt || MBUF_VALID_SC(sc));
635 VERIFY(IFCQ_TBR_IS_ENABLED(ifq));
636
637 *pkt = CLASSQ_PKT_INITIALIZER(*pkt);
638 tbr = &ifq->ifcq_tbr;
639 /* update token only when it is negative */
640 if (tbr->tbr_token <= 0) {
641 now = read_machclk();
642 interval = now - tbr->tbr_last;
643 if (interval >= tbr->tbr_filluptime) {
644 tbr->tbr_token = tbr->tbr_depth;
645 } else {
646 tbr->tbr_token += interval * tbr->tbr_rate;
647 if (tbr->tbr_token > tbr->tbr_depth) {
648 tbr->tbr_token = tbr->tbr_depth;
649 }
650 }
651 tbr->tbr_last = now;
652 }
653 /* if token is still negative, don't allow dequeue */
654 if (tbr->tbr_token <= 0) {
655 return;
656 }
657
658 /*
659 * ifclassq takes precedence over ALTQ queue;
660 * ifcq_drain count is adjusted by the caller.
661 */
662 if (drvmgt) {
663 fq_if_dequeue_sc_classq(ifq, sc, pkt, grp_idx);
664 } else {
665 fq_if_dequeue_classq(ifq, pkt, grp_idx);
666 }
667
668 if (pkt->cp_mbuf != NULL) {
669 switch (pkt->cp_ptype) {
670 case QP_MBUF:
671 tbr->tbr_token -= TBR_SCALE(m_pktlen(pkt->cp_mbuf));
672 break;
673
674 #if SKYWALK
675 case QP_PACKET:
676 tbr->tbr_token -=
677 TBR_SCALE(pkt->cp_kpkt->pkt_length);
678 break;
679 #endif /* SKYWALK */
680
681 default:
682 VERIFY(0);
683 /* NOTREACHED */
684 }
685 }
686 }
687
688 /*
689 * set a token bucket regulator.
690 * if the specified rate is zero, the token bucket regulator is deleted.
691 */
692 int
ifclassq_tbr_set(struct ifclassq * ifq,struct tb_profile * profile,boolean_t update)693 ifclassq_tbr_set(struct ifclassq *ifq, struct tb_profile *profile,
694 boolean_t update)
695 {
696 struct tb_regulator *tbr;
697 struct ifnet *ifp = ifq->ifcq_ifp;
698 u_int64_t rate, old_rate;
699
700 IFCQ_LOCK_ASSERT_HELD(ifq);
701 VERIFY(IFCQ_IS_READY(ifq));
702
703 VERIFY(machclk_freq != 0);
704
705 tbr = &ifq->ifcq_tbr;
706 old_rate = tbr->tbr_rate_raw;
707
708 rate = profile->rate;
709 if (profile->percent > 0) {
710 u_int64_t eff_rate;
711
712 if (profile->percent > 100) {
713 return EINVAL;
714 }
715 if ((eff_rate = ifp->if_output_bw.eff_bw) == 0) {
716 return ENODEV;
717 }
718 rate = (eff_rate * profile->percent) / 100;
719 }
720
721 if (rate == 0) {
722 if (!IFCQ_TBR_IS_ENABLED(ifq)) {
723 return 0;
724 }
725
726 if (pktsched_verbose) {
727 printf("%s: TBR disabled\n", if_name(ifp));
728 }
729
730 /* disable this TBR */
731 ifq->ifcq_flags &= ~IFCQF_TBR;
732 bzero(tbr, sizeof(*tbr));
733 ifnet_set_start_cycle(ifp, NULL);
734 if (update) {
735 ifclassq_update(ifq, CLASSQ_EV_LINK_BANDWIDTH);
736 }
737 return 0;
738 }
739
740 if (pktsched_verbose) {
741 printf("%s: TBR %s (rate %llu bps depth %u)\n", if_name(ifp),
742 (ifq->ifcq_flags & IFCQF_TBR) ? "reconfigured" :
743 "enabled", rate, profile->depth);
744 }
745
746 /* set the new TBR */
747 bzero(tbr, sizeof(*tbr));
748 tbr->tbr_rate_raw = rate;
749 tbr->tbr_percent = profile->percent;
750 ifq->ifcq_flags |= IFCQF_TBR;
751
752 /*
753 * Note that the TBR fill up time (hence the ifnet restart time)
754 * is directly related to the specified TBR depth. The ideal
755 * depth value should be computed such that the interval time
756 * between each successive wakeup is adequately spaced apart,
757 * in order to reduce scheduling overheads. A target interval
758 * of 10 ms seems to provide good performance balance. This can be
759 * overridden by specifying the depth profile. Values smaller than
760 * the ideal depth will reduce delay at the expense of CPU cycles.
761 */
762 tbr->tbr_rate = TBR_SCALE(rate / 8) / machclk_freq;
763 if (tbr->tbr_rate > 0) {
764 u_int32_t mtu = ifp->if_mtu;
765 int64_t ival, idepth = 0;
766 int i;
767
768 if (mtu < IF_MINMTU) {
769 mtu = IF_MINMTU;
770 }
771
772 ival = pktsched_nsecs_to_abstime(10 * NSEC_PER_MSEC); /* 10ms */
773
774 for (i = 1;; i++) {
775 idepth = TBR_SCALE(i * mtu);
776 if ((idepth / tbr->tbr_rate) > ival) {
777 break;
778 }
779 }
780 VERIFY(idepth > 0);
781
782 tbr->tbr_depth = TBR_SCALE(profile->depth);
783 if (tbr->tbr_depth == 0) {
784 tbr->tbr_filluptime = idepth / tbr->tbr_rate;
785 /* a little fudge factor to get closer to rate */
786 tbr->tbr_depth = idepth + (idepth >> 3);
787 } else {
788 tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate;
789 }
790 } else {
791 tbr->tbr_depth = TBR_SCALE(profile->depth);
792 tbr->tbr_filluptime = 0xffffffffffffffffLL;
793 }
794 tbr->tbr_token = tbr->tbr_depth;
795 tbr->tbr_last = read_machclk();
796
797 if (tbr->tbr_rate > 0 && (ifp->if_flags & IFF_UP)) {
798 struct timespec ts =
799 { 0, (long)pktsched_abs_to_nsecs(tbr->tbr_filluptime) };
800 if (pktsched_verbose) {
801 printf("%s: TBR calculated tokens %lld "
802 "filluptime %llu ns\n", if_name(ifp),
803 TBR_UNSCALE(tbr->tbr_token),
804 pktsched_abs_to_nsecs(tbr->tbr_filluptime));
805 }
806 ifnet_set_start_cycle(ifp, &ts);
807 } else {
808 if (pktsched_verbose) {
809 if (tbr->tbr_rate == 0) {
810 printf("%s: TBR calculated tokens %lld "
811 "infinite filluptime\n", if_name(ifp),
812 TBR_UNSCALE(tbr->tbr_token));
813 } else if (!(ifp->if_flags & IFF_UP)) {
814 printf("%s: TBR suspended (link is down)\n",
815 if_name(ifp));
816 }
817 }
818 ifnet_set_start_cycle(ifp, NULL);
819 }
820 if (update && tbr->tbr_rate_raw != old_rate) {
821 ifclassq_update(ifq, CLASSQ_EV_LINK_BANDWIDTH);
822 }
823
824 return 0;
825 }
826
827 void
ifclassq_calc_target_qdelay(struct ifnet * ifp,uint64_t * if_target_qdelay,uint32_t flags)828 ifclassq_calc_target_qdelay(struct ifnet *ifp, uint64_t *if_target_qdelay,
829 uint32_t flags)
830 {
831 uint64_t qdelay = 0, qdelay_configed = 0, qdely_default = 0;
832 if (flags == IF_CLASSQ_DEF) {
833 qdelay = IFCQ_TARGET_QDELAY(ifp->if_snd);
834 }
835
836 switch (flags) {
837 case IF_CLASSQ_DEF:
838 qdelay_configed = ifclassq_def_c_target_qdelay;
839 qdely_default = IFQ_DEF_C_TARGET_DELAY;
840 break;
841 case IF_CLASSQ_L4S:
842 qdelay_configed = ifclassq_def_l4s_target_qdelay;
843 qdely_default = IFQ_DEF_L4S_TARGET_DELAY;
844 break;
845 case IF_CLASSQ_LOW_LATENCY:
846 qdelay_configed = ifclassq_ll_c_target_qdelay;
847 qdely_default = IFQ_LL_C_TARGET_DELAY;
848 break;
849 case (IF_CLASSQ_LOW_LATENCY | IF_CLASSQ_L4S):
850 qdelay_configed = ifclassq_ll_l4s_target_qdelay;
851 qdely_default = IFQ_LL_L4S_TARGET_DELAY;
852 break;
853 default:
854 VERIFY(0);
855 /* NOTREACHED */
856 __builtin_unreachable();
857 }
858
859 if (qdelay_configed != 0) {
860 qdelay = qdelay_configed;
861 }
862
863 /*
864 * If we do not know the effective bandwidth, use the default
865 * target queue delay.
866 */
867 if (qdelay == 0) {
868 qdelay = qdely_default;
869 }
870
871 /*
872 * If a delay has been added to ifnet start callback for
873 * coalescing, we have to add that to the pre-set target delay
874 * because the packets can be in the queue longer.
875 */
876 if ((ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
877 ifp->if_start_delay_timeout > 0) {
878 qdelay += ifp->if_start_delay_timeout;
879 }
880
881 *(if_target_qdelay) = qdelay;
882 }
883
884 void
ifclassq_calc_update_interval(uint64_t * update_interval,uint32_t flags)885 ifclassq_calc_update_interval(uint64_t *update_interval, uint32_t flags)
886 {
887 uint64_t interval = 0, interval_configed = 0, interval_default = 0;
888
889 switch (flags) {
890 case IF_CLASSQ_DEF:
891 interval_configed = ifclassq_def_c_update_interval;
892 interval_default = IFQ_DEF_C_UPDATE_INTERVAL;
893 break;
894 case IF_CLASSQ_L4S:
895 interval_configed = ifclassq_def_l4s_update_interval;
896 interval_default = IFQ_DEF_L4S_UPDATE_INTERVAL;
897 break;
898 case IF_CLASSQ_LOW_LATENCY:
899 interval_configed = ifclassq_ll_c_update_interval;
900 interval_default = IFQ_LL_C_UPDATE_INTERVAL;
901 break;
902 case (IF_CLASSQ_LOW_LATENCY | IF_CLASSQ_L4S):
903 interval_configed = ifclassq_ll_l4s_update_interval;
904 interval_default = IFQ_LL_L4S_UPDATE_INTERVAL;
905 break;
906 default:
907 VERIFY(0);
908 /* NOTREACHED */
909 __builtin_unreachable();
910 }
911
912 /* If the system level override is set, use it */
913 if (interval_configed != 0) {
914 interval = interval_configed;
915 }
916
917 /* Otherwise use the default value */
918 if (interval == 0) {
919 interval = interval_default;
920 }
921
922 *update_interval = interval;
923 }
924
925 void
ifclassq_reap_caches(boolean_t purge)926 ifclassq_reap_caches(boolean_t purge)
927 {
928 fq_codel_reap_caches(purge);
929 flowadv_reap_caches(purge);
930 }
931
932 struct ifclassq *
ifclassq_alloc(void)933 ifclassq_alloc(void)
934 {
935 struct ifclassq *ifcq;
936
937 ifcq = zalloc_flags(ifcq_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL);
938 os_ref_init(&ifcq->ifcq_refcnt, NULL);
939 os_ref_retain(&ifcq->ifcq_refcnt);
940 lck_mtx_init(&ifcq->ifcq_lock, &ifcq_lock_group, &ifcq_lock_attr);
941 return ifcq;
942 }
943
944 void
ifclassq_retain(struct ifclassq * ifcq)945 ifclassq_retain(struct ifclassq *ifcq)
946 {
947 os_ref_retain(&ifcq->ifcq_refcnt);
948 }
949
950 void
ifclassq_release(struct ifclassq ** pifcq)951 ifclassq_release(struct ifclassq **pifcq)
952 {
953 struct ifclassq *ifcq = *pifcq;
954
955 *pifcq = NULL;
956 if (os_ref_release(&ifcq->ifcq_refcnt) == 0) {
957 ifclassq_teardown(ifcq);
958 zfree(ifcq_zone, ifcq);
959 }
960 }
961
962 int
ifclassq_setup_group(struct ifclassq * ifcq,uint8_t grp_idx,uint8_t flags)963 ifclassq_setup_group(struct ifclassq *ifcq, uint8_t grp_idx, uint8_t flags)
964 {
965 int err;
966
967 IFCQ_LOCK(ifcq);
968 VERIFY(ifcq->ifcq_disc != NULL);
969 VERIFY(ifcq->ifcq_type == PKTSCHEDT_FQ_CODEL);
970
971 err = fq_if_create_grp(ifcq, grp_idx, flags);
972 IFCQ_UNLOCK(ifcq);
973
974 return err;
975 }
976
977 void
ifclassq_set_grp_combined(struct ifclassq * ifcq,uint8_t grp_idx)978 ifclassq_set_grp_combined(struct ifclassq *ifcq, uint8_t grp_idx)
979 {
980 IFCQ_LOCK(ifcq);
981 VERIFY(ifcq->ifcq_disc != NULL);
982 VERIFY(ifcq->ifcq_type == PKTSCHEDT_FQ_CODEL);
983
984 fq_if_set_grp_combined(ifcq, grp_idx);
985 IFCQ_UNLOCK(ifcq);
986 }
987
988 void
ifclassq_set_grp_separated(struct ifclassq * ifcq,uint8_t grp_idx)989 ifclassq_set_grp_separated(struct ifclassq *ifcq, uint8_t grp_idx)
990 {
991 IFCQ_LOCK(ifcq);
992 VERIFY(ifcq->ifcq_disc != NULL);
993 VERIFY(ifcq->ifcq_type == PKTSCHEDT_FQ_CODEL);
994
995 fq_if_set_grp_separated(ifcq, grp_idx);
996 IFCQ_UNLOCK(ifcq);
997 }
998