1 /*
2 * Copyright (c) 2011-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/cdefs.h>
30
31 #include <sys/param.h>
32 #include <sys/malloc.h>
33 #include <sys/mbuf.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/errno.h>
37 #include <sys/mcache.h>
38 #include <sys/sysctl.h>
39
40 #include <dev/random/randomdev.h>
41 #include <net/droptap.h>
42 #include <net/if.h>
43 #include <net/if_var.h>
44 #include <net/if_dl.h>
45 #include <net/if_types.h>
46 #include <net/net_osdep.h>
47 #include <net/droptap.h>
48 #include <net/pktsched/pktsched.h>
49 #include <net/pktsched/pktsched_fq_codel.h>
50 #include <net/pktsched/pktsched_netem.h>
51
52 #define _IP_VHL
53 #include <netinet/ip.h>
54 #include <netinet/ip6.h>
55
56 #include <pexpert/pexpert.h>
57
58 #if SKYWALK
59 #include <skywalk/os_skywalk_private.h>
60 #endif /* SKYWALK */
61
62 u_int32_t machclk_freq = 0;
63 u_int64_t machclk_per_sec = 0;
64 u_int32_t pktsched_verbose = 0; /* more noise if greater than 1 */
65
66 static void init_machclk(void);
67
68 SYSCTL_NODE(_net, OID_AUTO, pktsched, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "pktsched");
69
70 SYSCTL_UINT(_net_pktsched, OID_AUTO, verbose, CTLFLAG_RW | CTLFLAG_LOCKED,
71 &pktsched_verbose, 0, "Packet scheduler verbosity level");
72
73 void
pktsched_init(void)74 pktsched_init(void)
75 {
76 init_machclk();
77 if (machclk_freq == 0) {
78 panic("%s: no CPU clock available!", __func__);
79 /* NOTREACHED */
80 }
81 pktsched_fq_init();
82 }
83
84 static void
init_machclk(void)85 init_machclk(void)
86 {
87 /*
88 * Initialize machclk_freq using the timerbase frequency
89 * value from device specific info.
90 */
91 machclk_freq = (uint32_t)gPEClockFrequencyInfo.timebase_frequency_hz;
92
93 clock_interval_to_absolutetime_interval(1, NSEC_PER_SEC,
94 &machclk_per_sec);
95 }
96
97 u_int64_t
pktsched_abs_to_nsecs(u_int64_t abstime)98 pktsched_abs_to_nsecs(u_int64_t abstime)
99 {
100 u_int64_t nsecs;
101
102 absolutetime_to_nanoseconds(abstime, &nsecs);
103 return nsecs;
104 }
105
106 u_int64_t
pktsched_nsecs_to_abstime(u_int64_t nsecs)107 pktsched_nsecs_to_abstime(u_int64_t nsecs)
108 {
109 u_int64_t abstime;
110
111 nanoseconds_to_absolutetime(nsecs, &abstime);
112 return abstime;
113 }
114
115 int
pktsched_setup(struct ifclassq * ifq,u_int32_t scheduler,u_int32_t sflags,classq_pkt_type_t ptype)116 pktsched_setup(struct ifclassq *ifq, u_int32_t scheduler, u_int32_t sflags,
117 classq_pkt_type_t ptype)
118 {
119 int error = 0;
120 u_int32_t rflags;
121
122 IFCQ_LOCK_ASSERT_HELD(ifq);
123
124 VERIFY(machclk_freq != 0);
125
126 /* Nothing to do unless the scheduler type changes */
127 if (ifq->ifcq_type == scheduler) {
128 return 0;
129 }
130
131 /*
132 * Remember the flags that need to be restored upon success, as
133 * they may be cleared when we tear down existing scheduler.
134 */
135 rflags = (ifq->ifcq_flags & IFCQF_ENABLED);
136
137 if (ifq->ifcq_type != PKTSCHEDT_NONE) {
138 pktsched_teardown(ifq);
139
140 /* Teardown should have succeeded */
141 VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
142 VERIFY(ifq->ifcq_disc == NULL);
143 }
144
145 error = fq_if_setup_ifclassq(ifq, sflags, ptype);
146 if (error == 0) {
147 ifq->ifcq_flags |= rflags;
148 }
149
150 return error;
151 }
152
153 void
pktsched_teardown(struct ifclassq * ifq)154 pktsched_teardown(struct ifclassq *ifq)
155 {
156 IFCQ_LOCK_ASSERT_HELD(ifq);
157 if_qflush(ifq->ifcq_ifp, ifq, true);
158 VERIFY(IFCQ_IS_EMPTY(ifq));
159 ifq->ifcq_flags &= ~IFCQF_ENABLED;
160 if (ifq->ifcq_type == PKTSCHEDT_FQ_CODEL) {
161 /* Could be PKTSCHEDT_NONE */
162 fq_if_teardown_ifclassq(ifq);
163 }
164 return;
165 }
166
167 int
pktsched_getqstats(struct ifclassq * ifq,u_int32_t gid,u_int32_t qid,struct if_ifclassq_stats * ifqs)168 pktsched_getqstats(struct ifclassq *ifq, u_int32_t gid, u_int32_t qid,
169 struct if_ifclassq_stats *ifqs)
170 {
171 int error = 0;
172
173 IFCQ_LOCK_ASSERT_HELD(ifq);
174
175 if (ifq->ifcq_type == PKTSCHEDT_FQ_CODEL) {
176 /* Could be PKTSCHEDT_NONE */
177 error = fq_if_getqstats_ifclassq(ifq, (uint8_t)gid, qid, ifqs);
178 }
179
180 return error;
181 }
182
183 void
pktsched_pkt_encap(pktsched_pkt_t * pkt,classq_pkt_t * cpkt)184 pktsched_pkt_encap(pktsched_pkt_t *pkt, classq_pkt_t *cpkt)
185 {
186 pkt->pktsched_pkt = *cpkt;
187 pkt->pktsched_tail = *cpkt;
188 pkt->pktsched_pcnt = 1;
189
190 switch (cpkt->cp_ptype) {
191 case QP_MBUF:
192 pkt->pktsched_plen =
193 (uint32_t)m_pktlen(pkt->pktsched_pkt_mbuf);
194 break;
195
196 #if SKYWALK
197 case QP_PACKET:
198 pkt->pktsched_plen = pkt->pktsched_pkt_kpkt->pkt_length;
199 break;
200 #endif /* SKYWALK */
201
202 default:
203 VERIFY(0);
204 /* NOTREACHED */
205 __builtin_unreachable();
206 }
207 }
208
209 void
pktsched_pkt_encap_chain(pktsched_pkt_t * pkt,classq_pkt_t * cpkt,classq_pkt_t * tail,uint32_t cnt,uint32_t bytes)210 pktsched_pkt_encap_chain(pktsched_pkt_t *pkt, classq_pkt_t *cpkt,
211 classq_pkt_t *tail, uint32_t cnt, uint32_t bytes)
212 {
213 pkt->pktsched_pkt = *cpkt;
214 pkt->pktsched_tail = *tail;
215 pkt->pktsched_pcnt = cnt;
216 pkt->pktsched_plen = bytes;
217
218 switch (cpkt->cp_ptype) {
219 case QP_MBUF:
220 break;
221
222 #if SKYWALK
223 case QP_PACKET:
224 break;
225 #endif /* SKYWALK */
226
227 default:
228 VERIFY(0);
229 /* NOTREACHED */
230 __builtin_unreachable();
231 }
232 }
233
234 int
pktsched_clone_pkt(pktsched_pkt_t * pkt1,pktsched_pkt_t * pkt2)235 pktsched_clone_pkt(pktsched_pkt_t *pkt1, pktsched_pkt_t *pkt2)
236 {
237 struct mbuf *m1, *m2;
238 #if SKYWALK
239 struct __kern_packet *p1;
240 kern_packet_t ph2;
241 int err;
242 #endif /* SKYWALK */
243
244 ASSERT(pkt1 != NULL);
245 ASSERT(pkt1->pktsched_pkt_mbuf != NULL);
246 ASSERT(pkt1->pktsched_pcnt == 1);
247
248 /* allow in place clone, but make sure pkt2->pktsched_pkt won't leak */
249 ASSERT((pkt1 == pkt2 && pkt1->pktsched_pkt_mbuf ==
250 pkt2->pktsched_pkt_mbuf) || (pkt1 != pkt2 &&
251 pkt2->pktsched_pkt_mbuf == NULL));
252
253 switch (pkt1->pktsched_ptype) {
254 case QP_MBUF:
255 m1 = (struct mbuf *)pkt1->pktsched_pkt_mbuf;
256 m2 = m_dup(m1, M_NOWAIT);
257 if (__improbable(m2 == NULL)) {
258 return ENOBUFS;
259 }
260 pkt2->pktsched_pkt_mbuf = m2;
261 break;
262
263 #if SKYWALK
264 case QP_PACKET:
265 p1 = (struct __kern_packet *)pkt1->pktsched_pkt_kpkt;
266 err = kern_packet_clone_nosleep(SK_PTR_ENCODE(p1,
267 METADATA_TYPE(p1), METADATA_SUBTYPE(p1)), &ph2,
268 KPKT_COPY_HEAVY);
269 if (__improbable(err != 0)) {
270 return err;
271 }
272 ASSERT(ph2 != 0);
273 VERIFY(kern_packet_finalize(ph2) == 0);
274 pkt2->pktsched_pkt_kpkt = SK_PTR_ADDR_KPKT(ph2);
275 break;
276 #endif /* SKYWALK */
277
278 default:
279 VERIFY(0);
280 /* NOTREACHED */
281 __builtin_unreachable();
282 }
283
284 pkt2->pktsched_plen = pkt1->pktsched_plen;
285 pkt2->pktsched_ptype = pkt1->pktsched_ptype;
286 pkt2->pktsched_tail = pkt2->pktsched_pkt;
287 pkt2->pktsched_pcnt = 1;
288 return 0;
289 }
290
291 void
pktsched_corrupt_packet(pktsched_pkt_t * pkt)292 pktsched_corrupt_packet(pktsched_pkt_t *pkt)
293 {
294 struct mbuf *m = NULL;
295 uint8_t *data = NULL;
296 uint32_t data_len = 0;
297 uint32_t rand32, rand_off, rand_bit;
298 #if SKYWALK
299 struct __kern_packet *p = NULL;
300 #endif /* SKYWALK */
301
302 switch (pkt->pktsched_ptype) {
303 case QP_MBUF:
304 m = pkt->pktsched_pkt_mbuf;
305 data = mtod(m, uint8_t *);
306 data_len = m->m_pkthdr.len;
307 break;
308 #if SKYWALK
309 case QP_PACKET:
310 p = pkt->pktsched_pkt_kpkt;
311 if (p->pkt_pflags & PKT_F_MBUF_DATA) {
312 m = p->pkt_mbuf;
313 data = mtod(m, uint8_t *);
314 data_len = m->m_pkthdr.len;
315 } else {
316 MD_BUFLET_ADDR_DLEN(p, data, data_len);
317 }
318 break;
319 #endif /* SKYWALK */
320
321 default:
322 /* NOTREACHED */
323 VERIFY(0);
324 __builtin_unreachable();
325 }
326
327 read_frandom(&rand32, sizeof(rand32));
328 rand_bit = rand32 & 0x7;
329 rand_off = (rand32 >> 3) % data_len;
330 data[rand_off] ^= (uint8_t)(1 << rand_bit);
331 }
332
333 void
pktsched_free_pkt(pktsched_pkt_t * pkt)334 pktsched_free_pkt(pktsched_pkt_t *pkt)
335 {
336 uint32_t cnt = pkt->pktsched_pcnt;
337 ASSERT(cnt != 0);
338
339 switch (pkt->pktsched_ptype) {
340 case QP_MBUF: {
341 struct mbuf *m;
342
343 m = pkt->pktsched_pkt_mbuf;
344 if (cnt == 1) {
345 VERIFY(m->m_nextpkt == NULL);
346 } else {
347 VERIFY(m->m_nextpkt != NULL);
348 }
349 m_freem_list(m);
350 break;
351 }
352 #if SKYWALK
353 case QP_PACKET: {
354 struct __kern_packet *kpkt;
355 int pcnt = 0;
356
357 kpkt = pkt->pktsched_pkt_kpkt;
358 if (cnt == 1) {
359 VERIFY(kpkt->pkt_nextpkt == NULL);
360 } else {
361 VERIFY(kpkt->pkt_nextpkt != NULL);
362 }
363 pp_free_packet_chain(kpkt, &pcnt);
364 VERIFY(cnt == (uint32_t)pcnt);
365 break;
366 }
367 #endif /* SKYWALK */
368
369 default:
370 VERIFY(0);
371 /* NOTREACHED */
372 __builtin_unreachable();
373 }
374 pkt->pktsched_pkt = CLASSQ_PKT_INITIALIZER(pkt->pktsched_pkt);
375 pkt->pktsched_tail = CLASSQ_PKT_INITIALIZER(pkt->pktsched_tail);
376 pkt->pktsched_plen = 0;
377 pkt->pktsched_pcnt = 0;
378 }
379
380 void
pktsched_drop_pkt(pktsched_pkt_t * pkt,struct ifnet * ifp,drop_reason_t reason,const char * funcname,uint16_t linenum,uint16_t flags)381 pktsched_drop_pkt(pktsched_pkt_t *pkt, struct ifnet *ifp, drop_reason_t reason, const char *funcname,
382 uint16_t linenum, uint16_t flags)
383 {
384 if (__probable(droptap_total_tap_count == 0)) {
385 pktsched_free_pkt(pkt);
386 return;
387 }
388
389 uint32_t cnt = pkt->pktsched_pcnt;
390 ASSERT(cnt != 0);
391
392 switch (pkt->pktsched_ptype) {
393 case QP_MBUF: {
394 struct mbuf *m;
395
396 m = pkt->pktsched_pkt_mbuf;
397 if (cnt == 1) {
398 VERIFY(m->m_nextpkt == NULL);
399 } else {
400 VERIFY(m->m_nextpkt != NULL);
401 }
402 m_drop_list(m, ifp, flags | DROPTAP_FLAG_DIR_OUT, reason, funcname, linenum);
403 break;
404 }
405 #if SKYWALK
406 case QP_PACKET: {
407 struct __kern_packet *kpkt;
408
409 kpkt = pkt->pktsched_pkt_kpkt;
410 if (cnt == 1) {
411 VERIFY(kpkt->pkt_nextpkt == NULL);
412 } else {
413 VERIFY(kpkt->pkt_nextpkt != NULL);
414 }
415 droptap_output_packet(SK_PKT2PH(kpkt), reason, funcname, linenum,
416 flags, ifp, kpkt->pkt_qum.qum_pid, NULL, -1, NULL, 0, 0);
417 break;
418 }
419 #endif /* SKYWALK */
420
421 default:
422 VERIFY(0);
423 /* NOTREACHED */
424 __builtin_unreachable();
425 }
426
427 pktsched_free_pkt(pkt);
428 }
429
430 mbuf_svc_class_t
pktsched_get_pkt_svc(pktsched_pkt_t * pkt)431 pktsched_get_pkt_svc(pktsched_pkt_t *pkt)
432 {
433 mbuf_svc_class_t svc = MBUF_SC_UNSPEC;
434
435 switch (pkt->pktsched_ptype) {
436 case QP_MBUF:
437 svc = m_get_service_class(pkt->pktsched_pkt_mbuf);
438 break;
439
440 #if SKYWALK
441 case QP_PACKET:
442 svc = pkt->pktsched_pkt_kpkt->pkt_svc_class;
443 break;
444 #endif /* SKYWALK */
445
446 default:
447 VERIFY(0);
448 /* NOTREACHED */
449 __builtin_unreachable();
450 }
451
452 return svc;
453 }
454
455 void
pktsched_get_pkt_vars(pktsched_pkt_t * pkt,volatile uint32_t ** flags,uint64_t ** timestamp,uint32_t * flowid,uint8_t * flowsrc,uint8_t * proto,uint32_t * comp_gencnt,uint64_t * pkt_tx_time)456 pktsched_get_pkt_vars(pktsched_pkt_t *pkt, volatile uint32_t **flags,
457 uint64_t **timestamp, uint32_t *flowid, uint8_t *flowsrc, uint8_t *proto,
458 uint32_t *comp_gencnt, uint64_t *pkt_tx_time)
459 {
460 switch (pkt->pktsched_ptype) {
461 case QP_MBUF: {
462 struct pkthdr *pkth = &(pkt->pktsched_pkt_mbuf->m_pkthdr);
463
464 if (flags != NULL) {
465 *flags = &pkth->pkt_flags;
466 }
467 if (timestamp != NULL) {
468 *timestamp = &pkth->pkt_timestamp;
469 }
470 if (flowid != NULL) {
471 *flowid = pkth->pkt_flowid;
472 }
473 if (flowsrc != NULL) {
474 *flowsrc = pkth->pkt_flowsrc;
475 }
476 if (proto != NULL) {
477 /*
478 * rdar://100524205 - We want to use the pkt_ext_flags
479 * to denote QUIC packets, but AQM is already written in
480 * such a way where IPPROTO_QUIC is used to denote QUIC
481 * packets.
482 */
483 if (pkth->pkt_ext_flags & PKTF_EXT_QUIC) {
484 *proto = IPPROTO_QUIC;
485 } else {
486 *proto = pkth->pkt_proto;
487 }
488 }
489 if (comp_gencnt != NULL) {
490 *comp_gencnt = pkth->comp_gencnt;
491 }
492 if (pkt_tx_time != NULL) {
493 struct m_tag *tag;
494 tag = m_tag_locate(pkt->pktsched_pkt_mbuf, KERNEL_MODULE_TAG_ID,
495 KERNEL_TAG_TYPE_AQM);
496 if (__improbable(tag != NULL)) {
497 *pkt_tx_time = *(uint64_t *)tag->m_tag_data;
498 } else {
499 *pkt_tx_time = 0;
500 }
501 }
502
503 break;
504 }
505
506 #if SKYWALK
507 case QP_PACKET: {
508 struct __kern_packet *kp = pkt->pktsched_pkt_kpkt;
509
510 if (flags != NULL) {
511 /* use lower-32 bit for common flags */
512 *flags = &kp->pkt_pflags32;
513 }
514 if (timestamp != NULL) {
515 *timestamp = &kp->pkt_timestamp;
516 }
517 if (flowid != NULL) {
518 *flowid = kp->pkt_flow_token;
519 }
520 if (flowsrc != NULL) {
521 *flowsrc = (uint8_t)kp->pkt_flowsrc_type;
522 }
523 if (proto != NULL) {
524 *proto = kp->pkt_transport_protocol;
525 }
526 if (comp_gencnt != NULL) {
527 *comp_gencnt = kp->pkt_comp_gencnt;
528 }
529 if (pkt_tx_time != NULL) {
530 *pkt_tx_time = __packet_get_tx_timestamp(SK_PKT2PH(kp));
531 }
532
533 break;
534 }
535 #endif /* SKYWALK */
536
537 default:
538 VERIFY(0);
539 /* NOTREACHED */
540 __builtin_unreachable();
541 }
542 }
543
544 struct flowadv_fcentry *
pktsched_alloc_fcentry(pktsched_pkt_t * pkt,struct ifnet * ifp,int how)545 pktsched_alloc_fcentry(pktsched_pkt_t *pkt, struct ifnet *ifp, int how)
546 {
547 #pragma unused(ifp)
548 struct flowadv_fcentry *fce = NULL;
549
550 switch (pkt->pktsched_ptype) {
551 case QP_MBUF: {
552 struct mbuf *m = pkt->pktsched_pkt_mbuf;
553
554 fce = flowadv_alloc_entry(how);
555 if (fce == NULL) {
556 break;
557 }
558
559 _CASSERT(sizeof(m->m_pkthdr.pkt_flowid) ==
560 sizeof(fce->fce_flowid));
561
562 fce->fce_flowsrc_type = m->m_pkthdr.pkt_flowsrc;
563 fce->fce_flowid = m->m_pkthdr.pkt_flowid;
564 #if SKYWALK
565 _CASSERT(sizeof(m->m_pkthdr.pkt_mpriv_srcid) ==
566 sizeof(fce->fce_flowsrc_token));
567 _CASSERT(sizeof(m->m_pkthdr.pkt_mpriv_fidx) ==
568 sizeof(fce->fce_flowsrc_fidx));
569
570 if (fce->fce_flowsrc_type == FLOWSRC_CHANNEL) {
571 fce->fce_flowsrc_fidx = m->m_pkthdr.pkt_mpriv_fidx;
572 fce->fce_flowsrc_token = m->m_pkthdr.pkt_mpriv_srcid;
573 fce->fce_ifp = ifp;
574 }
575 #endif /* SKYWALK */
576 break;
577 }
578
579 #if SKYWALK
580 case QP_PACKET: {
581 struct __kern_packet *kp = pkt->pktsched_pkt_kpkt;
582
583 fce = flowadv_alloc_entry(how);
584 if (fce == NULL) {
585 break;
586 }
587
588 _CASSERT(sizeof(fce->fce_flowid) ==
589 sizeof(kp->pkt_flow_token));
590 _CASSERT(sizeof(fce->fce_flowsrc_fidx) ==
591 sizeof(kp->pkt_flowsrc_fidx));
592 _CASSERT(sizeof(fce->fce_flowsrc_token) ==
593 sizeof(kp->pkt_flowsrc_token));
594
595 ASSERT(kp->pkt_pflags & PKT_F_FLOW_ADV);
596 fce->fce_flowsrc_type = kp->pkt_flowsrc_type;
597 fce->fce_flowid = kp->pkt_flow_token;
598 fce->fce_flowsrc_fidx = kp->pkt_flowsrc_fidx;
599 fce->fce_flowsrc_token = kp->pkt_flowsrc_token;
600 fce->fce_ifp = ifp;
601 break;
602 }
603 #endif /* SKYWALK */
604
605 default:
606 VERIFY(0);
607 /* NOTREACHED */
608 __builtin_unreachable();
609 }
610
611 return fce;
612 }
613
614 uint32_t *
pktsched_get_pkt_sfb_vars(pktsched_pkt_t * pkt,uint32_t ** sfb_flags)615 pktsched_get_pkt_sfb_vars(pktsched_pkt_t *pkt, uint32_t **sfb_flags)
616 {
617 uint32_t *hashp = NULL;
618
619 switch (pkt->pktsched_ptype) {
620 case QP_MBUF: {
621 struct pkthdr *pkth = &(pkt->pktsched_pkt_mbuf->m_pkthdr);
622
623 _CASSERT(sizeof(pkth->pkt_mpriv_hash) == sizeof(uint32_t));
624 _CASSERT(sizeof(pkth->pkt_mpriv_flags) == sizeof(uint32_t));
625 *sfb_flags = &pkth->pkt_mpriv_flags;
626 hashp = &pkth->pkt_mpriv_hash;
627 break;
628 }
629
630 #if SKYWALK
631 case QP_PACKET: {
632 struct __kern_packet *kp = pkt->pktsched_pkt_kpkt;
633
634 _CASSERT(sizeof(kp->pkt_classq_hash) == sizeof(uint32_t));
635 _CASSERT(sizeof(kp->pkt_classq_flags) == sizeof(uint32_t));
636 *sfb_flags = &kp->pkt_classq_flags;
637 hashp = &kp->pkt_classq_hash;
638 break;
639 }
640 #endif /* SKYWALK */
641
642 default:
643 VERIFY(0);
644 /* NOTREACHED */
645 __builtin_unreachable();
646 }
647
648 return hashp;
649 }
650
651 static int
pktsched_mbuf_mark_ecn(struct mbuf * m)652 pktsched_mbuf_mark_ecn(struct mbuf* m)
653 {
654 struct mbuf *m0;
655 void *__single hdr;
656 int af;
657 uint8_t ipv;
658
659 hdr = m->m_pkthdr.pkt_hdr;
660 /* verify that hdr is within the mbuf data */
661 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
662 if (((caddr_t)hdr >= m_mtod_current(m0)) &&
663 ((caddr_t)hdr < m_mtod_current(m0) + m0->m_len)) {
664 break;
665 }
666 }
667 if (m0 == NULL) {
668 return EINVAL;
669 }
670 ipv = IP_VHL_V(*(uint8_t *)hdr);
671 if (ipv == 4) {
672 af = AF_INET;
673 } else if (ipv == 6) {
674 af = AF_INET6;
675 } else {
676 af = AF_UNSPEC;
677 }
678
679 switch (af) {
680 case AF_INET: {
681 struct ip *__single ip = (struct ip *)(void *)hdr;
682 uint8_t otos;
683 int sum;
684
685 if (((uintptr_t)ip + sizeof(*ip)) >
686 ((uintptr_t)mbuf_datastart(m0) + mbuf_maxlen(m0))) {
687 return EINVAL; /* out of bounds */
688 }
689 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT) {
690 return EINVAL; /* not-ECT */
691 }
692 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE) {
693 return 0; /* already marked */
694 }
695 /*
696 * ecn-capable but not marked,
697 * mark CE and update checksum
698 */
699 otos = ip->ip_tos;
700 ip->ip_tos |= IPTOS_ECN_CE;
701 /*
702 * update checksum (from RFC1624) only if hw
703 * checksum is not supported.
704 * HC' = ~(~HC + ~m + m')
705 */
706 if ((m->m_pkthdr.csum_flags & CSUM_DELAY_IP) == 0) {
707 sum = ~ntohs(ip->ip_sum) & 0xffff;
708 sum += (~otos & 0xffff) + ip->ip_tos;
709 sum = (sum >> 16) + (sum & 0xffff);
710 sum += (sum >> 16); /* add carry */
711 ip->ip_sum = htons(~sum & 0xffff);
712 }
713 return 0;
714 }
715 case AF_INET6: {
716 struct ip6_hdr *__single ip6 = (struct ip6_hdr *)(void *)hdr;
717 u_int32_t flowlabel;
718
719 if (((uintptr_t)ip6 + sizeof(*ip6)) >
720 ((uintptr_t)mbuf_datastart(m0) + mbuf_maxlen(m0))) {
721 return EINVAL; /* out of bounds */
722 }
723 flowlabel = ntohl(ip6->ip6_flow);
724 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
725 (IPTOS_ECN_NOTECT << 20)) {
726 return EINVAL; /* not-ECT */
727 }
728 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
729 (IPTOS_ECN_CE << 20)) {
730 return 0; /* already marked */
731 }
732 /*
733 * ecn-capable but not marked, mark CE
734 */
735 flowlabel |= (IPTOS_ECN_CE << 20);
736 ip6->ip6_flow = htonl(flowlabel);
737 return 0;
738 }
739 default:
740 return EPROTONOSUPPORT;
741 }
742 }
743
744 static int
pktsched_kpkt_mark_ecn(struct __kern_packet * kpkt)745 pktsched_kpkt_mark_ecn(struct __kern_packet *kpkt)
746 {
747 uint8_t ipv = 0, *l3_hdr;
748
749 if ((kpkt->pkt_qum_qflags & QUM_F_FLOW_CLASSIFIED) != 0) {
750 uint32_t l3_len = 0;
751 ipv = kpkt->pkt_flow_ip_ver;
752 l3_len = kpkt->pkt_length - kpkt->pkt_l2_len;
753 l3_hdr = __unsafe_forge_bidi_indexable(uint8_t *, kpkt->pkt_flow_ip_hdr, l3_len);
754 } else {
755 uint8_t *pkt_buf;
756 uint32_t bdlen, bdlim, bdoff;
757 MD_BUFLET_ADDR_ABS_DLEN(kpkt, pkt_buf, bdlen, bdlim, bdoff);
758
759 /* takes care of both IPv4 and IPv6 */
760 l3_hdr = pkt_buf + kpkt->pkt_headroom + kpkt->pkt_l2_len;
761 ipv = IP_VHL_V(*(uint8_t *)l3_hdr);
762 if (ipv == 4) {
763 ipv = IPVERSION;
764 } else if (ipv == 6) {
765 ipv = IPV6_VERSION;
766 } else {
767 ipv = 0;
768 }
769 }
770
771 switch (ipv) {
772 case IPVERSION: {
773 uint8_t otos;
774 int sum;
775
776 struct ip *ip = (struct ip *)(void *)l3_hdr;
777 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT) {
778 return EINVAL; /* not-ECT */
779 }
780 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE) {
781 return 0; /* already marked */
782 }
783 /*
784 * ecn-capable but not marked,
785 * mark CE and update checksum
786 */
787 otos = ip->ip_tos;
788 ip->ip_tos |= IPTOS_ECN_CE;
789
790 sum = ~ntohs(ip->ip_sum) & 0xffff;
791 sum += (~otos & 0xffff) + ip->ip_tos;
792 sum = (sum >> 16) + (sum & 0xffff);
793 sum += (sum >> 16); /* add carry */
794 ip->ip_sum = htons(~sum & 0xffff);
795
796 return 0;
797 }
798 case IPV6_VERSION: {
799 struct ip6_hdr *ip6 = (struct ip6_hdr *)(void *)l3_hdr;
800 u_int32_t flowlabel;
801 flowlabel = ntohl(ip6->ip6_flow);
802 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
803 (IPTOS_ECN_NOTECT << 20)) {
804 return EINVAL; /* not-ECT */
805 }
806 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
807 (IPTOS_ECN_CE << 20)) {
808 return 0; /* already marked */
809 }
810 /*
811 * ecn-capable but not marked, mark CE
812 */
813 flowlabel |= (IPTOS_ECN_CE << 20);
814 ip6->ip6_flow = htonl(flowlabel);
815
816 return 0;
817 }
818 default:
819 return EPROTONOSUPPORT;
820 }
821 }
822
823 int
pktsched_mark_ecn(pktsched_pkt_t * pkt)824 pktsched_mark_ecn(pktsched_pkt_t *pkt)
825 {
826 switch (pkt->pktsched_ptype) {
827 case QP_MBUF:
828 return pktsched_mbuf_mark_ecn(pkt->pktsched_pkt_mbuf);
829 case QP_PACKET:
830 return pktsched_kpkt_mark_ecn(pkt->pktsched_pkt_kpkt);
831 default:
832 VERIFY(0);
833 /* NOTREACHED */
834 __builtin_unreachable();
835 }
836 }
837
838 boolean_t
pktsched_is_pkt_l4s(pktsched_pkt_t * pkt)839 pktsched_is_pkt_l4s(pktsched_pkt_t *pkt)
840 {
841 switch (pkt->pktsched_ptype) {
842 case QP_MBUF: {
843 struct pkthdr *pkth = &(pkt->pktsched_pkt_mbuf->m_pkthdr);
844 return (pkth->pkt_ext_flags & PKTF_EXT_L4S) != 0;
845 }
846 case QP_PACKET: {
847 struct __kern_packet *kp = pkt->pktsched_pkt_kpkt;
848 return (kp->pkt_pflags & PKT_F_L4S) != 0;
849 }
850
851 default:
852 VERIFY(0);
853 /* NOTREACHED */
854 __builtin_unreachable();
855 }
856 return FALSE;
857 }
858
859 struct aqm_tag_container {
860 struct m_tag aqm_m_tag;
861 uint64_t aqm_tag;
862 };
863
864 static struct m_tag *
m_tag_kalloc_aqm(u_int32_t id,u_int16_t type,uint16_t len,int wait)865 m_tag_kalloc_aqm(u_int32_t id, u_int16_t type, uint16_t len, int wait)
866 {
867 struct aqm_tag_container *tag_container;
868 struct m_tag *tag = NULL;
869
870 assert3u(id, ==, KERNEL_MODULE_TAG_ID);
871 assert3u(type, ==, KERNEL_TAG_TYPE_AQM);
872 assert3u(len, ==, sizeof(uint64_t));
873
874 if (len != sizeof(uint64_t)) {
875 return NULL;
876 }
877
878 tag_container = kalloc_type(struct aqm_tag_container, wait | M_ZERO);
879 if (tag_container != NULL) {
880 tag = &tag_container->aqm_m_tag;
881
882 assert3p(tag, ==, tag_container);
883
884 M_TAG_INIT(tag, id, type, len, &tag_container->aqm_tag, NULL);
885 }
886
887 return tag;
888 }
889
890 static void
m_tag_kfree_aqm(struct m_tag * tag)891 m_tag_kfree_aqm(struct m_tag *tag)
892 {
893 struct aqm_tag_container *__single tag_container = (struct aqm_tag_container *)tag;
894
895 assert3u(tag->m_tag_len, ==, sizeof(uint64_t));
896
897 kfree_type(struct aqm_tag_container, tag_container);
898 }
899
900 void
pktsched_register_m_tag(void)901 pktsched_register_m_tag(void)
902 {
903 int error;
904
905 error = m_register_internal_tag_type(KERNEL_TAG_TYPE_AQM, sizeof(uint64_t),
906 m_tag_kalloc_aqm, m_tag_kfree_aqm);
907
908 assert3u(error, ==, 0);
909 }
910