1 /*
2 * Copyright (c) 2011-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/cdefs.h>
30
31 #include <sys/param.h>
32 #include <sys/malloc.h>
33 #include <sys/mbuf.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/errno.h>
37 #include <sys/mcache.h>
38 #include <sys/sysctl.h>
39
40 #include <dev/random/randomdev.h>
41 #include <net/if.h>
42 #include <net/if_var.h>
43 #include <net/if_dl.h>
44 #include <net/if_types.h>
45 #include <net/net_osdep.h>
46 #include <net/pktsched/pktsched.h>
47 #include <net/pktsched/pktsched_fq_codel.h>
48 #include <net/pktsched/pktsched_netem.h>
49
50 #define _IP_VHL
51 #include <netinet/ip.h>
52 #include <netinet/ip6.h>
53
54 #include <pexpert/pexpert.h>
55
56 #if SKYWALK
57 #include <skywalk/os_skywalk_private.h>
58 #endif /* SKYWALK */
59
60 u_int32_t machclk_freq = 0;
61 u_int64_t machclk_per_sec = 0;
62 u_int32_t pktsched_verbose = 0; /* more noise if greater than 1 */
63
64 static void init_machclk(void);
65
66 SYSCTL_NODE(_net, OID_AUTO, pktsched, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "pktsched");
67
68 SYSCTL_UINT(_net_pktsched, OID_AUTO, verbose, CTLFLAG_RW | CTLFLAG_LOCKED,
69 &pktsched_verbose, 0, "Packet scheduler verbosity level");
70
71 void
pktsched_init(void)72 pktsched_init(void)
73 {
74 init_machclk();
75 if (machclk_freq == 0) {
76 panic("%s: no CPU clock available!", __func__);
77 /* NOTREACHED */
78 }
79 pktsched_fq_init();
80 }
81
82 static void
init_machclk(void)83 init_machclk(void)
84 {
85 /*
86 * Initialize machclk_freq using the timerbase frequency
87 * value from device specific info.
88 */
89 machclk_freq = (uint32_t)gPEClockFrequencyInfo.timebase_frequency_hz;
90
91 clock_interval_to_absolutetime_interval(1, NSEC_PER_SEC,
92 &machclk_per_sec);
93 }
94
95 u_int64_t
pktsched_abs_to_nsecs(u_int64_t abstime)96 pktsched_abs_to_nsecs(u_int64_t abstime)
97 {
98 u_int64_t nsecs;
99
100 absolutetime_to_nanoseconds(abstime, &nsecs);
101 return nsecs;
102 }
103
104 u_int64_t
pktsched_nsecs_to_abstime(u_int64_t nsecs)105 pktsched_nsecs_to_abstime(u_int64_t nsecs)
106 {
107 u_int64_t abstime;
108
109 nanoseconds_to_absolutetime(nsecs, &abstime);
110 return abstime;
111 }
112
113 int
pktsched_setup(struct ifclassq * ifq,u_int32_t scheduler,u_int32_t sflags,classq_pkt_type_t ptype)114 pktsched_setup(struct ifclassq *ifq, u_int32_t scheduler, u_int32_t sflags,
115 classq_pkt_type_t ptype)
116 {
117 int error = 0;
118 u_int32_t rflags;
119
120 IFCQ_LOCK_ASSERT_HELD(ifq);
121
122 VERIFY(machclk_freq != 0);
123
124 /* Nothing to do unless the scheduler type changes */
125 if (ifq->ifcq_type == scheduler) {
126 return 0;
127 }
128
129 /*
130 * Remember the flags that need to be restored upon success, as
131 * they may be cleared when we tear down existing scheduler.
132 */
133 rflags = (ifq->ifcq_flags & IFCQF_ENABLED);
134
135 if (ifq->ifcq_type != PKTSCHEDT_NONE) {
136 pktsched_teardown(ifq);
137
138 /* Teardown should have succeeded */
139 VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
140 VERIFY(ifq->ifcq_disc == NULL);
141 }
142
143 error = fq_if_setup_ifclassq(ifq, sflags, ptype);
144 if (error == 0) {
145 ifq->ifcq_flags |= rflags;
146 }
147
148 return error;
149 }
150
151 void
pktsched_teardown(struct ifclassq * ifq)152 pktsched_teardown(struct ifclassq *ifq)
153 {
154 IFCQ_LOCK_ASSERT_HELD(ifq);
155 if_qflush(ifq->ifcq_ifp, ifq, true);
156 VERIFY(IFCQ_IS_EMPTY(ifq));
157 ifq->ifcq_flags &= ~IFCQF_ENABLED;
158 if (ifq->ifcq_type == PKTSCHEDT_FQ_CODEL) {
159 /* Could be PKTSCHEDT_NONE */
160 fq_if_teardown_ifclassq(ifq);
161 }
162 return;
163 }
164
165 int
pktsched_getqstats(struct ifclassq * ifq,u_int32_t gid,u_int32_t qid,struct if_ifclassq_stats * ifqs)166 pktsched_getqstats(struct ifclassq *ifq, u_int32_t gid, u_int32_t qid,
167 struct if_ifclassq_stats *ifqs)
168 {
169 int error = 0;
170
171 IFCQ_LOCK_ASSERT_HELD(ifq);
172
173 if (ifq->ifcq_type == PKTSCHEDT_FQ_CODEL) {
174 /* Could be PKTSCHEDT_NONE */
175 error = fq_if_getqstats_ifclassq(ifq, (uint8_t)gid, qid, ifqs);
176 }
177
178 return error;
179 }
180
181 void
pktsched_pkt_encap(pktsched_pkt_t * pkt,classq_pkt_t * cpkt)182 pktsched_pkt_encap(pktsched_pkt_t *pkt, classq_pkt_t *cpkt)
183 {
184 pkt->pktsched_pkt = *cpkt;
185 pkt->pktsched_tail = *cpkt;
186 pkt->pktsched_pcnt = 1;
187
188 switch (cpkt->cp_ptype) {
189 case QP_MBUF:
190 pkt->pktsched_plen =
191 (uint32_t)m_pktlen(pkt->pktsched_pkt_mbuf);
192 break;
193
194 #if SKYWALK
195 case QP_PACKET:
196 pkt->pktsched_plen = pkt->pktsched_pkt_kpkt->pkt_length;
197 break;
198 #endif /* SKYWALK */
199
200 default:
201 VERIFY(0);
202 /* NOTREACHED */
203 __builtin_unreachable();
204 }
205 }
206
207 void
pktsched_pkt_encap_chain(pktsched_pkt_t * pkt,classq_pkt_t * cpkt,classq_pkt_t * tail,uint32_t cnt,uint32_t bytes)208 pktsched_pkt_encap_chain(pktsched_pkt_t *pkt, classq_pkt_t *cpkt,
209 classq_pkt_t *tail, uint32_t cnt, uint32_t bytes)
210 {
211 pkt->pktsched_pkt = *cpkt;
212 pkt->pktsched_tail = *tail;
213 pkt->pktsched_pcnt = cnt;
214 pkt->pktsched_plen = bytes;
215
216 switch (cpkt->cp_ptype) {
217 case QP_MBUF:
218 break;
219
220 #if SKYWALK
221 case QP_PACKET:
222 break;
223 #endif /* SKYWALK */
224
225 default:
226 VERIFY(0);
227 /* NOTREACHED */
228 __builtin_unreachable();
229 }
230 }
231
232 int
pktsched_clone_pkt(pktsched_pkt_t * pkt1,pktsched_pkt_t * pkt2)233 pktsched_clone_pkt(pktsched_pkt_t *pkt1, pktsched_pkt_t *pkt2)
234 {
235 struct mbuf *m1, *m2;
236 #if SKYWALK
237 struct __kern_packet *p1;
238 kern_packet_t ph2;
239 int err;
240 #endif /* SKYWALK */
241
242 ASSERT(pkt1 != NULL);
243 ASSERT(pkt1->pktsched_pkt_mbuf != NULL);
244 ASSERT(pkt1->pktsched_pcnt == 1);
245
246 /* allow in place clone, but make sure pkt2->pktsched_pkt won't leak */
247 ASSERT((pkt1 == pkt2 && pkt1->pktsched_pkt_mbuf ==
248 pkt2->pktsched_pkt_mbuf) || (pkt1 != pkt2 &&
249 pkt2->pktsched_pkt_mbuf == NULL));
250
251 switch (pkt1->pktsched_ptype) {
252 case QP_MBUF:
253 m1 = (struct mbuf *)pkt1->pktsched_pkt_mbuf;
254 m2 = m_dup(m1, M_NOWAIT);
255 if (__improbable(m2 == NULL)) {
256 return ENOBUFS;
257 }
258 pkt2->pktsched_pkt_mbuf = m2;
259 break;
260
261 #if SKYWALK
262 case QP_PACKET:
263 p1 = (struct __kern_packet *)pkt1->pktsched_pkt_kpkt;
264 err = kern_packet_clone_nosleep(SK_PTR_ENCODE(p1,
265 METADATA_TYPE(p1), METADATA_SUBTYPE(p1)), &ph2,
266 KPKT_COPY_HEAVY);
267 if (__improbable(err != 0)) {
268 return err;
269 }
270 ASSERT(ph2 != 0);
271 VERIFY(kern_packet_finalize(ph2) == 0);
272 pkt2->pktsched_pkt_kpkt = SK_PTR_ADDR_KPKT(ph2);
273 break;
274 #endif /* SKYWALK */
275
276 default:
277 VERIFY(0);
278 /* NOTREACHED */
279 __builtin_unreachable();
280 }
281
282 pkt2->pktsched_plen = pkt1->pktsched_plen;
283 pkt2->pktsched_ptype = pkt1->pktsched_ptype;
284 pkt2->pktsched_tail = pkt2->pktsched_pkt;
285 pkt2->pktsched_pcnt = 1;
286 return 0;
287 }
288
289 void
pktsched_corrupt_packet(pktsched_pkt_t * pkt)290 pktsched_corrupt_packet(pktsched_pkt_t *pkt)
291 {
292 struct mbuf *m = NULL;
293 uint8_t *data = NULL;
294 uint32_t data_len = 0;
295 uint32_t rand32, rand_off, rand_bit;
296 #if SKYWALK
297 struct __kern_packet *p = NULL;
298 #endif /* SKYWALK */
299
300 switch (pkt->pktsched_ptype) {
301 case QP_MBUF:
302 m = pkt->pktsched_pkt_mbuf;
303 data = mtod(m, uint8_t *);
304 data_len = m->m_pkthdr.len;
305 break;
306 #if SKYWALK
307 case QP_PACKET:
308 p = pkt->pktsched_pkt_kpkt;
309 if (p->pkt_pflags & PKT_F_MBUF_DATA) {
310 m = p->pkt_mbuf;
311 data = mtod(m, uint8_t *);
312 data_len = m->m_pkthdr.len;
313 } else {
314 MD_BUFLET_ADDR_DLEN(p, data, data_len);
315 }
316 break;
317 #endif /* SKYWALK */
318
319 default:
320 /* NOTREACHED */
321 VERIFY(0);
322 __builtin_unreachable();
323 }
324
325 read_frandom(&rand32, sizeof(rand32));
326 rand_bit = rand32 & 0x8;
327 rand_off = (rand32 >> 3) % data_len;
328 data[rand_off] ^= 1 << rand_bit;
329 }
330
331 void
pktsched_free_pkt(pktsched_pkt_t * pkt)332 pktsched_free_pkt(pktsched_pkt_t *pkt)
333 {
334 uint32_t cnt = pkt->pktsched_pcnt;
335 ASSERT(cnt != 0);
336
337 switch (pkt->pktsched_ptype) {
338 case QP_MBUF: {
339 struct mbuf *m;
340
341 m = pkt->pktsched_pkt_mbuf;
342 if (cnt == 1) {
343 VERIFY(m->m_nextpkt == NULL);
344 } else {
345 VERIFY(m->m_nextpkt != NULL);
346 }
347 m_freem_list(m);
348 break;
349 }
350 #if SKYWALK
351 case QP_PACKET: {
352 struct __kern_packet *kpkt;
353 int pcnt = 0;
354
355 kpkt = pkt->pktsched_pkt_kpkt;
356 if (cnt == 1) {
357 VERIFY(kpkt->pkt_nextpkt == NULL);
358 } else {
359 VERIFY(kpkt->pkt_nextpkt != NULL);
360 }
361 pp_free_packet_chain(kpkt, &pcnt);
362 VERIFY(cnt == (uint32_t)pcnt);
363 break;
364 }
365 #endif /* SKYWALK */
366
367 default:
368 VERIFY(0);
369 /* NOTREACHED */
370 __builtin_unreachable();
371 }
372 pkt->pktsched_pkt = CLASSQ_PKT_INITIALIZER(pkt->pktsched_pkt);
373 pkt->pktsched_tail = CLASSQ_PKT_INITIALIZER(pkt->pktsched_tail);
374 pkt->pktsched_plen = 0;
375 pkt->pktsched_pcnt = 0;
376 }
377
378 mbuf_svc_class_t
pktsched_get_pkt_svc(pktsched_pkt_t * pkt)379 pktsched_get_pkt_svc(pktsched_pkt_t *pkt)
380 {
381 mbuf_svc_class_t svc = MBUF_SC_UNSPEC;
382
383 switch (pkt->pktsched_ptype) {
384 case QP_MBUF:
385 svc = m_get_service_class(pkt->pktsched_pkt_mbuf);
386 break;
387
388 #if SKYWALK
389 case QP_PACKET:
390 svc = pkt->pktsched_pkt_kpkt->pkt_svc_class;
391 break;
392 #endif /* SKYWALK */
393
394 default:
395 VERIFY(0);
396 /* NOTREACHED */
397 __builtin_unreachable();
398 }
399
400 return svc;
401 }
402
403 void
pktsched_get_pkt_vars(pktsched_pkt_t * pkt,volatile uint32_t ** flags,uint64_t ** timestamp,uint32_t * flowid,uint8_t * flowsrc,uint8_t * proto,uint32_t * comp_gencnt)404 pktsched_get_pkt_vars(pktsched_pkt_t *pkt, volatile uint32_t **flags,
405 uint64_t **timestamp, uint32_t *flowid, uint8_t *flowsrc, uint8_t *proto,
406 uint32_t *comp_gencnt)
407 {
408 switch (pkt->pktsched_ptype) {
409 case QP_MBUF: {
410 struct pkthdr *pkth = &(pkt->pktsched_pkt_mbuf->m_pkthdr);
411
412 if (flags != NULL) {
413 *flags = &pkth->pkt_flags;
414 }
415 if (timestamp != NULL) {
416 *timestamp = &pkth->pkt_timestamp;
417 }
418 if (flowid != NULL) {
419 *flowid = pkth->pkt_flowid;
420 }
421 if (flowsrc != NULL) {
422 *flowsrc = pkth->pkt_flowsrc;
423 }
424 if (proto != NULL) {
425 *proto = pkth->pkt_proto;
426 }
427 if (comp_gencnt != NULL) {
428 *comp_gencnt = pkth->comp_gencnt;
429 }
430
431 break;
432 }
433
434 #if SKYWALK
435 case QP_PACKET: {
436 struct __kern_packet *kp = pkt->pktsched_pkt_kpkt;
437
438 if (flags != NULL) {
439 /* use lower-32 bit for common flags */
440 *flags = &kp->pkt_pflags32;
441 }
442 if (timestamp != NULL) {
443 *timestamp = &kp->pkt_timestamp;
444 }
445 if (flowid != NULL) {
446 *flowid = kp->pkt_flow_token;
447 }
448 if (flowsrc != NULL) {
449 *flowsrc = (uint8_t)kp->pkt_flowsrc_type;
450 }
451 if (proto != NULL) {
452 *proto = kp->pkt_transport_protocol;
453 }
454 if (comp_gencnt != NULL) {
455 *comp_gencnt = kp->pkt_comp_gencnt;
456 }
457
458 break;
459 }
460 #endif /* SKYWALK */
461
462 default:
463 VERIFY(0);
464 /* NOTREACHED */
465 __builtin_unreachable();
466 }
467 }
468
469 struct flowadv_fcentry *
pktsched_alloc_fcentry(pktsched_pkt_t * pkt,struct ifnet * ifp,int how)470 pktsched_alloc_fcentry(pktsched_pkt_t *pkt, struct ifnet *ifp, int how)
471 {
472 #pragma unused(ifp)
473 struct flowadv_fcentry *fce = NULL;
474
475 switch (pkt->pktsched_ptype) {
476 case QP_MBUF: {
477 struct mbuf *m = pkt->pktsched_pkt_mbuf;
478
479 fce = flowadv_alloc_entry(how);
480 if (fce == NULL) {
481 break;
482 }
483
484 _CASSERT(sizeof(m->m_pkthdr.pkt_flowid) ==
485 sizeof(fce->fce_flowid));
486
487 fce->fce_flowsrc_type = m->m_pkthdr.pkt_flowsrc;
488 fce->fce_flowid = m->m_pkthdr.pkt_flowid;
489 #if SKYWALK
490 _CASSERT(sizeof(m->m_pkthdr.pkt_mpriv_srcid) ==
491 sizeof(fce->fce_flowsrc_token));
492 _CASSERT(sizeof(m->m_pkthdr.pkt_mpriv_fidx) ==
493 sizeof(fce->fce_flowsrc_fidx));
494
495 if (fce->fce_flowsrc_type == FLOWSRC_CHANNEL) {
496 fce->fce_flowsrc_fidx = m->m_pkthdr.pkt_mpriv_fidx;
497 fce->fce_flowsrc_token = m->m_pkthdr.pkt_mpriv_srcid;
498 fce->fce_ifp = ifp;
499 }
500 #endif /* SKYWALK */
501 break;
502 }
503
504 #if SKYWALK
505 case QP_PACKET: {
506 struct __kern_packet *kp = pkt->pktsched_pkt_kpkt;
507
508 fce = flowadv_alloc_entry(how);
509 if (fce == NULL) {
510 break;
511 }
512
513 _CASSERT(sizeof(fce->fce_flowid) ==
514 sizeof(kp->pkt_flow_token));
515 _CASSERT(sizeof(fce->fce_flowsrc_fidx) ==
516 sizeof(kp->pkt_flowsrc_fidx));
517 _CASSERT(sizeof(fce->fce_flowsrc_token) ==
518 sizeof(kp->pkt_flowsrc_token));
519
520 ASSERT(kp->pkt_pflags & PKT_F_FLOW_ADV);
521 fce->fce_flowsrc_type = kp->pkt_flowsrc_type;
522 fce->fce_flowid = kp->pkt_flow_token;
523 fce->fce_flowsrc_fidx = kp->pkt_flowsrc_fidx;
524 fce->fce_flowsrc_token = kp->pkt_flowsrc_token;
525 fce->fce_ifp = ifp;
526 break;
527 }
528 #endif /* SKYWALK */
529
530 default:
531 VERIFY(0);
532 /* NOTREACHED */
533 __builtin_unreachable();
534 }
535
536 return fce;
537 }
538
539 uint32_t *
pktsched_get_pkt_sfb_vars(pktsched_pkt_t * pkt,uint32_t ** sfb_flags)540 pktsched_get_pkt_sfb_vars(pktsched_pkt_t *pkt, uint32_t **sfb_flags)
541 {
542 uint32_t *hashp = NULL;
543
544 switch (pkt->pktsched_ptype) {
545 case QP_MBUF: {
546 struct pkthdr *pkth = &(pkt->pktsched_pkt_mbuf->m_pkthdr);
547
548 _CASSERT(sizeof(pkth->pkt_mpriv_hash) == sizeof(uint32_t));
549 _CASSERT(sizeof(pkth->pkt_mpriv_flags) == sizeof(uint32_t));
550 *sfb_flags = &pkth->pkt_mpriv_flags;
551 hashp = &pkth->pkt_mpriv_hash;
552 break;
553 }
554
555 #if SKYWALK
556 case QP_PACKET: {
557 struct __kern_packet *kp = pkt->pktsched_pkt_kpkt;
558
559 _CASSERT(sizeof(kp->pkt_classq_hash) == sizeof(uint32_t));
560 _CASSERT(sizeof(kp->pkt_classq_flags) == sizeof(uint32_t));
561 *sfb_flags = &kp->pkt_classq_flags;
562 hashp = &kp->pkt_classq_hash;
563 break;
564 }
565 #endif /* SKYWALK */
566
567 default:
568 VERIFY(0);
569 /* NOTREACHED */
570 __builtin_unreachable();
571 }
572
573 return hashp;
574 }
575