1 /*
2 * Copyright (c) 2011-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/cdefs.h>
30
31 #include <sys/param.h>
32 #include <sys/malloc.h>
33 #include <sys/mbuf.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/errno.h>
37 #include <sys/mcache.h>
38 #include <sys/sysctl.h>
39
40 #include <dev/random/randomdev.h>
41 #include <net/if.h>
42 #include <net/if_var.h>
43 #include <net/if_dl.h>
44 #include <net/if_types.h>
45 #include <net/net_osdep.h>
46 #include <net/pktsched/pktsched.h>
47 #include <net/pktsched/pktsched_fq_codel.h>
48 #include <net/pktsched/pktsched_netem.h>
49
50 #define _IP_VHL
51 #include <netinet/ip.h>
52 #include <netinet/ip6.h>
53
54 #include <pexpert/pexpert.h>
55
56 #if SKYWALK
57 #include <skywalk/os_skywalk_private.h>
58 #endif /* SKYWALK */
59
60 u_int32_t machclk_freq = 0;
61 u_int64_t machclk_per_sec = 0;
62 u_int32_t pktsched_verbose = 0; /* more noise if greater than 1 */
63
64 static void init_machclk(void);
65
66 SYSCTL_NODE(_net, OID_AUTO, pktsched, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "pktsched");
67
68 SYSCTL_UINT(_net_pktsched, OID_AUTO, verbose, CTLFLAG_RW | CTLFLAG_LOCKED,
69 &pktsched_verbose, 0, "Packet scheduler verbosity level");
70
71 void
pktsched_init(void)72 pktsched_init(void)
73 {
74 init_machclk();
75 if (machclk_freq == 0) {
76 panic("%s: no CPU clock available!", __func__);
77 /* NOTREACHED */
78 }
79 pktsched_fq_init();
80 }
81
82 static void
init_machclk(void)83 init_machclk(void)
84 {
85 /*
86 * Initialize machclk_freq using the timerbase frequency
87 * value from device specific info.
88 */
89 machclk_freq = (uint32_t)gPEClockFrequencyInfo.timebase_frequency_hz;
90
91 clock_interval_to_absolutetime_interval(1, NSEC_PER_SEC,
92 &machclk_per_sec);
93 }
94
95 u_int64_t
pktsched_abs_to_nsecs(u_int64_t abstime)96 pktsched_abs_to_nsecs(u_int64_t abstime)
97 {
98 u_int64_t nsecs;
99
100 absolutetime_to_nanoseconds(abstime, &nsecs);
101 return nsecs;
102 }
103
104 u_int64_t
pktsched_nsecs_to_abstime(u_int64_t nsecs)105 pktsched_nsecs_to_abstime(u_int64_t nsecs)
106 {
107 u_int64_t abstime;
108
109 nanoseconds_to_absolutetime(nsecs, &abstime);
110 return abstime;
111 }
112
113 int
pktsched_setup(struct ifclassq * ifq,u_int32_t scheduler,u_int32_t sflags,classq_pkt_type_t ptype)114 pktsched_setup(struct ifclassq *ifq, u_int32_t scheduler, u_int32_t sflags,
115 classq_pkt_type_t ptype)
116 {
117 int error = 0;
118 u_int32_t rflags;
119
120 IFCQ_LOCK_ASSERT_HELD(ifq);
121
122 VERIFY(machclk_freq != 0);
123
124 /* Nothing to do unless the scheduler type changes */
125 if (ifq->ifcq_type == scheduler) {
126 return 0;
127 }
128
129 /*
130 * Remember the flags that need to be restored upon success, as
131 * they may be cleared when we tear down existing scheduler.
132 */
133 rflags = (ifq->ifcq_flags & IFCQF_ENABLED);
134
135 if (ifq->ifcq_type != PKTSCHEDT_NONE) {
136 pktsched_teardown(ifq);
137
138 /* Teardown should have succeeded */
139 VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
140 VERIFY(ifq->ifcq_disc == NULL);
141 }
142
143 error = fq_if_setup_ifclassq(ifq, sflags, ptype);
144 if (error == 0) {
145 ifq->ifcq_flags |= rflags;
146 }
147
148 return error;
149 }
150
151 void
pktsched_teardown(struct ifclassq * ifq)152 pktsched_teardown(struct ifclassq *ifq)
153 {
154 IFCQ_LOCK_ASSERT_HELD(ifq);
155 if_qflush(ifq->ifcq_ifp, ifq, true);
156 VERIFY(IFCQ_IS_EMPTY(ifq));
157 ifq->ifcq_flags &= ~IFCQF_ENABLED;
158 if (ifq->ifcq_type == PKTSCHEDT_FQ_CODEL) {
159 /* Could be PKTSCHEDT_NONE */
160 fq_if_teardown_ifclassq(ifq);
161 }
162 return;
163 }
164
165 int
pktsched_getqstats(struct ifclassq * ifq,u_int32_t gid,u_int32_t qid,struct if_ifclassq_stats * ifqs)166 pktsched_getqstats(struct ifclassq *ifq, u_int32_t gid, u_int32_t qid,
167 struct if_ifclassq_stats *ifqs)
168 {
169 int error = 0;
170
171 IFCQ_LOCK_ASSERT_HELD(ifq);
172
173 if (ifq->ifcq_type == PKTSCHEDT_FQ_CODEL) {
174 /* Could be PKTSCHEDT_NONE */
175 error = fq_if_getqstats_ifclassq(ifq, (uint8_t)gid, qid, ifqs);
176 }
177
178 return error;
179 }
180
181 void
pktsched_pkt_encap(pktsched_pkt_t * pkt,classq_pkt_t * cpkt)182 pktsched_pkt_encap(pktsched_pkt_t *pkt, classq_pkt_t *cpkt)
183 {
184 pkt->pktsched_pkt = *cpkt;
185 pkt->pktsched_tail = *cpkt;
186 pkt->pktsched_pcnt = 1;
187
188 switch (cpkt->cp_ptype) {
189 case QP_MBUF:
190 pkt->pktsched_plen =
191 (uint32_t)m_pktlen(pkt->pktsched_pkt_mbuf);
192 break;
193
194 #if SKYWALK
195 case QP_PACKET:
196 pkt->pktsched_plen = pkt->pktsched_pkt_kpkt->pkt_length;
197 break;
198 #endif /* SKYWALK */
199
200 default:
201 VERIFY(0);
202 /* NOTREACHED */
203 __builtin_unreachable();
204 }
205 }
206
207 void
pktsched_pkt_encap_chain(pktsched_pkt_t * pkt,classq_pkt_t * cpkt,classq_pkt_t * tail,uint32_t cnt,uint32_t bytes)208 pktsched_pkt_encap_chain(pktsched_pkt_t *pkt, classq_pkt_t *cpkt,
209 classq_pkt_t *tail, uint32_t cnt, uint32_t bytes)
210 {
211 pkt->pktsched_pkt = *cpkt;
212 pkt->pktsched_tail = *tail;
213 pkt->pktsched_pcnt = cnt;
214 pkt->pktsched_plen = bytes;
215
216 switch (cpkt->cp_ptype) {
217 case QP_MBUF:
218 break;
219
220 #if SKYWALK
221 case QP_PACKET:
222 break;
223 #endif /* SKYWALK */
224
225 default:
226 VERIFY(0);
227 /* NOTREACHED */
228 __builtin_unreachable();
229 }
230 }
231
232 int
pktsched_clone_pkt(pktsched_pkt_t * pkt1,pktsched_pkt_t * pkt2)233 pktsched_clone_pkt(pktsched_pkt_t *pkt1, pktsched_pkt_t *pkt2)
234 {
235 struct mbuf *m1, *m2;
236 #if SKYWALK
237 struct __kern_packet *p1;
238 kern_packet_t ph2;
239 int err;
240 #endif /* SKYWALK */
241
242 ASSERT(pkt1 != NULL);
243 ASSERT(pkt1->pktsched_pkt_mbuf != NULL);
244 ASSERT(pkt1->pktsched_pcnt == 1);
245
246 /* allow in place clone, but make sure pkt2->pktsched_pkt won't leak */
247 ASSERT((pkt1 == pkt2 && pkt1->pktsched_pkt_mbuf ==
248 pkt2->pktsched_pkt_mbuf) || (pkt1 != pkt2 &&
249 pkt2->pktsched_pkt_mbuf == NULL));
250
251 switch (pkt1->pktsched_ptype) {
252 case QP_MBUF:
253 m1 = (struct mbuf *)pkt1->pktsched_pkt_mbuf;
254 m2 = m_dup(m1, M_NOWAIT);
255 if (__improbable(m2 == NULL)) {
256 return ENOBUFS;
257 }
258 pkt2->pktsched_pkt_mbuf = m2;
259 break;
260
261 #if SKYWALK
262 case QP_PACKET:
263 p1 = (struct __kern_packet *)pkt1->pktsched_pkt_kpkt;
264 err = kern_packet_clone_nosleep(SK_PTR_ENCODE(p1,
265 METADATA_TYPE(p1), METADATA_SUBTYPE(p1)), &ph2,
266 KPKT_COPY_HEAVY);
267 if (__improbable(err != 0)) {
268 return err;
269 }
270 ASSERT(ph2 != 0);
271 VERIFY(kern_packet_finalize(ph2) == 0);
272 pkt2->pktsched_pkt_kpkt = SK_PTR_ADDR_KPKT(ph2);
273 break;
274 #endif /* SKYWALK */
275
276 default:
277 VERIFY(0);
278 /* NOTREACHED */
279 __builtin_unreachable();
280 }
281
282 pkt2->pktsched_plen = pkt1->pktsched_plen;
283 pkt2->pktsched_ptype = pkt1->pktsched_ptype;
284 pkt2->pktsched_tail = pkt2->pktsched_pkt;
285 pkt2->pktsched_pcnt = 1;
286 return 0;
287 }
288
289 void
pktsched_corrupt_packet(pktsched_pkt_t * pkt)290 pktsched_corrupt_packet(pktsched_pkt_t *pkt)
291 {
292 struct mbuf *m = NULL;
293 uint8_t *data = NULL;
294 uint32_t data_len = 0;
295 uint32_t rand32, rand_off, rand_bit;
296 #if SKYWALK
297 struct __kern_packet *p = NULL;
298 #endif /* SKYWALK */
299
300 switch (pkt->pktsched_ptype) {
301 case QP_MBUF:
302 m = pkt->pktsched_pkt_mbuf;
303 data = mtod(m, uint8_t *);
304 data_len = m->m_pkthdr.len;
305 break;
306 #if SKYWALK
307 case QP_PACKET:
308 p = pkt->pktsched_pkt_kpkt;
309 if (p->pkt_pflags & PKT_F_MBUF_DATA) {
310 m = p->pkt_mbuf;
311 data = mtod(m, uint8_t *);
312 data_len = m->m_pkthdr.len;
313 } else {
314 MD_BUFLET_ADDR_DLEN(p, data, data_len);
315 }
316 break;
317 #endif /* SKYWALK */
318
319 default:
320 /* NOTREACHED */
321 VERIFY(0);
322 __builtin_unreachable();
323 }
324
325 read_frandom(&rand32, sizeof(rand32));
326 rand_bit = rand32 & 0x8;
327 rand_off = (rand32 >> 3) % data_len;
328 data[rand_off] ^= 1 << rand_bit;
329 }
330
331 void
pktsched_free_pkt(pktsched_pkt_t * pkt)332 pktsched_free_pkt(pktsched_pkt_t *pkt)
333 {
334 uint32_t cnt = pkt->pktsched_pcnt;
335 ASSERT(cnt != 0);
336
337 switch (pkt->pktsched_ptype) {
338 case QP_MBUF: {
339 struct mbuf *m;
340
341 m = pkt->pktsched_pkt_mbuf;
342 if (cnt == 1) {
343 VERIFY(m->m_nextpkt == NULL);
344 } else {
345 VERIFY(m->m_nextpkt != NULL);
346 }
347 m_freem_list(m);
348 break;
349 }
350 #if SKYWALK
351 case QP_PACKET: {
352 struct __kern_packet *kpkt;
353 int pcnt = 0;
354
355 kpkt = pkt->pktsched_pkt_kpkt;
356 if (cnt == 1) {
357 VERIFY(kpkt->pkt_nextpkt == NULL);
358 } else {
359 VERIFY(kpkt->pkt_nextpkt != NULL);
360 }
361 pp_free_packet_chain(kpkt, &pcnt);
362 VERIFY(cnt == (uint32_t)pcnt);
363 break;
364 }
365 #endif /* SKYWALK */
366
367 default:
368 VERIFY(0);
369 /* NOTREACHED */
370 __builtin_unreachable();
371 }
372 pkt->pktsched_pkt = CLASSQ_PKT_INITIALIZER(pkt->pktsched_pkt);
373 pkt->pktsched_tail = CLASSQ_PKT_INITIALIZER(pkt->pktsched_tail);
374 pkt->pktsched_plen = 0;
375 pkt->pktsched_pcnt = 0;
376 }
377
378 mbuf_svc_class_t
pktsched_get_pkt_svc(pktsched_pkt_t * pkt)379 pktsched_get_pkt_svc(pktsched_pkt_t *pkt)
380 {
381 mbuf_svc_class_t svc = MBUF_SC_UNSPEC;
382
383 switch (pkt->pktsched_ptype) {
384 case QP_MBUF:
385 svc = m_get_service_class(pkt->pktsched_pkt_mbuf);
386 break;
387
388 #if SKYWALK
389 case QP_PACKET:
390 svc = pkt->pktsched_pkt_kpkt->pkt_svc_class;
391 break;
392 #endif /* SKYWALK */
393
394 default:
395 VERIFY(0);
396 /* NOTREACHED */
397 __builtin_unreachable();
398 }
399
400 return svc;
401 }
402
403 void
pktsched_get_pkt_vars(pktsched_pkt_t * pkt,volatile uint32_t ** flags,uint64_t ** timestamp,uint32_t * flowid,uint8_t * flowsrc,uint8_t * proto,uint32_t * comp_gencnt)404 pktsched_get_pkt_vars(pktsched_pkt_t *pkt, volatile uint32_t **flags,
405 uint64_t **timestamp, uint32_t *flowid, uint8_t *flowsrc, uint8_t *proto,
406 uint32_t *comp_gencnt)
407 {
408 switch (pkt->pktsched_ptype) {
409 case QP_MBUF: {
410 struct pkthdr *pkth = &(pkt->pktsched_pkt_mbuf->m_pkthdr);
411
412 if (flags != NULL) {
413 *flags = &pkth->pkt_flags;
414 }
415 if (timestamp != NULL) {
416 *timestamp = &pkth->pkt_timestamp;
417 }
418 if (flowid != NULL) {
419 *flowid = pkth->pkt_flowid;
420 }
421 if (flowsrc != NULL) {
422 *flowsrc = pkth->pkt_flowsrc;
423 }
424 if (proto != NULL) {
425 /*
426 * rdar://100524205 - We want to use the pkt_ext_flags
427 * to denote QUIC packets, but AQM is already written in
428 * such a way where IPPROTO_QUIC is used to denote QUIC
429 * packets.
430 */
431 if (pkth->pkt_ext_flags & PKTF_EXT_QUIC) {
432 *proto = IPPROTO_QUIC;
433 } else {
434 *proto = pkth->pkt_proto;
435 }
436 }
437 if (comp_gencnt != NULL) {
438 *comp_gencnt = pkth->comp_gencnt;
439 }
440
441 break;
442 }
443
444 #if SKYWALK
445 case QP_PACKET: {
446 struct __kern_packet *kp = pkt->pktsched_pkt_kpkt;
447
448 if (flags != NULL) {
449 /* use lower-32 bit for common flags */
450 *flags = &kp->pkt_pflags32;
451 }
452 if (timestamp != NULL) {
453 *timestamp = &kp->pkt_timestamp;
454 }
455 if (flowid != NULL) {
456 *flowid = kp->pkt_flow_token;
457 }
458 if (flowsrc != NULL) {
459 *flowsrc = (uint8_t)kp->pkt_flowsrc_type;
460 }
461 if (proto != NULL) {
462 *proto = kp->pkt_transport_protocol;
463 }
464 if (comp_gencnt != NULL) {
465 *comp_gencnt = kp->pkt_comp_gencnt;
466 }
467
468 break;
469 }
470 #endif /* SKYWALK */
471
472 default:
473 VERIFY(0);
474 /* NOTREACHED */
475 __builtin_unreachable();
476 }
477 }
478
479 struct flowadv_fcentry *
pktsched_alloc_fcentry(pktsched_pkt_t * pkt,struct ifnet * ifp,int how)480 pktsched_alloc_fcentry(pktsched_pkt_t *pkt, struct ifnet *ifp, int how)
481 {
482 #pragma unused(ifp)
483 struct flowadv_fcentry *fce = NULL;
484
485 switch (pkt->pktsched_ptype) {
486 case QP_MBUF: {
487 struct mbuf *m = pkt->pktsched_pkt_mbuf;
488
489 fce = flowadv_alloc_entry(how);
490 if (fce == NULL) {
491 break;
492 }
493
494 _CASSERT(sizeof(m->m_pkthdr.pkt_flowid) ==
495 sizeof(fce->fce_flowid));
496
497 fce->fce_flowsrc_type = m->m_pkthdr.pkt_flowsrc;
498 fce->fce_flowid = m->m_pkthdr.pkt_flowid;
499 #if SKYWALK
500 _CASSERT(sizeof(m->m_pkthdr.pkt_mpriv_srcid) ==
501 sizeof(fce->fce_flowsrc_token));
502 _CASSERT(sizeof(m->m_pkthdr.pkt_mpriv_fidx) ==
503 sizeof(fce->fce_flowsrc_fidx));
504
505 if (fce->fce_flowsrc_type == FLOWSRC_CHANNEL) {
506 fce->fce_flowsrc_fidx = m->m_pkthdr.pkt_mpriv_fidx;
507 fce->fce_flowsrc_token = m->m_pkthdr.pkt_mpriv_srcid;
508 fce->fce_ifp = ifp;
509 }
510 #endif /* SKYWALK */
511 break;
512 }
513
514 #if SKYWALK
515 case QP_PACKET: {
516 struct __kern_packet *kp = pkt->pktsched_pkt_kpkt;
517
518 fce = flowadv_alloc_entry(how);
519 if (fce == NULL) {
520 break;
521 }
522
523 _CASSERT(sizeof(fce->fce_flowid) ==
524 sizeof(kp->pkt_flow_token));
525 _CASSERT(sizeof(fce->fce_flowsrc_fidx) ==
526 sizeof(kp->pkt_flowsrc_fidx));
527 _CASSERT(sizeof(fce->fce_flowsrc_token) ==
528 sizeof(kp->pkt_flowsrc_token));
529
530 ASSERT(kp->pkt_pflags & PKT_F_FLOW_ADV);
531 fce->fce_flowsrc_type = kp->pkt_flowsrc_type;
532 fce->fce_flowid = kp->pkt_flow_token;
533 fce->fce_flowsrc_fidx = kp->pkt_flowsrc_fidx;
534 fce->fce_flowsrc_token = kp->pkt_flowsrc_token;
535 fce->fce_ifp = ifp;
536 break;
537 }
538 #endif /* SKYWALK */
539
540 default:
541 VERIFY(0);
542 /* NOTREACHED */
543 __builtin_unreachable();
544 }
545
546 return fce;
547 }
548
549 uint32_t *
pktsched_get_pkt_sfb_vars(pktsched_pkt_t * pkt,uint32_t ** sfb_flags)550 pktsched_get_pkt_sfb_vars(pktsched_pkt_t *pkt, uint32_t **sfb_flags)
551 {
552 uint32_t *hashp = NULL;
553
554 switch (pkt->pktsched_ptype) {
555 case QP_MBUF: {
556 struct pkthdr *pkth = &(pkt->pktsched_pkt_mbuf->m_pkthdr);
557
558 _CASSERT(sizeof(pkth->pkt_mpriv_hash) == sizeof(uint32_t));
559 _CASSERT(sizeof(pkth->pkt_mpriv_flags) == sizeof(uint32_t));
560 *sfb_flags = &pkth->pkt_mpriv_flags;
561 hashp = &pkth->pkt_mpriv_hash;
562 break;
563 }
564
565 #if SKYWALK
566 case QP_PACKET: {
567 struct __kern_packet *kp = pkt->pktsched_pkt_kpkt;
568
569 _CASSERT(sizeof(kp->pkt_classq_hash) == sizeof(uint32_t));
570 _CASSERT(sizeof(kp->pkt_classq_flags) == sizeof(uint32_t));
571 *sfb_flags = &kp->pkt_classq_flags;
572 hashp = &kp->pkt_classq_hash;
573 break;
574 }
575 #endif /* SKYWALK */
576
577 default:
578 VERIFY(0);
579 /* NOTREACHED */
580 __builtin_unreachable();
581 }
582
583 return hashp;
584 }
585
586 static int
pktsched_mbuf_mark_ecn(struct mbuf * m)587 pktsched_mbuf_mark_ecn(struct mbuf* m)
588 {
589 struct mbuf *m0;
590 void *hdr;
591 int af;
592 uint8_t ipv;
593
594 hdr = m->m_pkthdr.pkt_hdr;
595 /* verify that hdr is within the mbuf data */
596 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
597 if (((caddr_t)hdr >= m0->m_data) &&
598 ((caddr_t)hdr < m0->m_data + m0->m_len)) {
599 break;
600 }
601 }
602 if (m0 == NULL) {
603 return EINVAL;
604 }
605 ipv = IP_VHL_V(*(uint8_t *)hdr);
606 if (ipv == 4) {
607 af = AF_INET;
608 } else if (ipv == 6) {
609 af = AF_INET6;
610 } else {
611 af = AF_UNSPEC;
612 }
613
614 switch (af) {
615 case AF_INET: {
616 struct ip *ip = hdr;
617 uint8_t otos;
618 int sum;
619
620 if (((uintptr_t)ip + sizeof(*ip)) >
621 ((uintptr_t)mbuf_datastart(m0) + mbuf_maxlen(m0))) {
622 return EINVAL; /* out of bounds */
623 }
624 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT) {
625 return EINVAL; /* not-ECT */
626 }
627 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE) {
628 return 0; /* already marked */
629 }
630 /*
631 * ecn-capable but not marked,
632 * mark CE and update checksum
633 */
634 otos = ip->ip_tos;
635 ip->ip_tos |= IPTOS_ECN_CE;
636 /*
637 * update checksum (from RFC1624) only if hw
638 * checksum is not supported.
639 * HC' = ~(~HC + ~m + m')
640 */
641 if ((m->m_pkthdr.csum_flags & CSUM_DELAY_IP) == 0) {
642 sum = ~ntohs(ip->ip_sum) & 0xffff;
643 sum += (~otos & 0xffff) + ip->ip_tos;
644 sum = (sum >> 16) + (sum & 0xffff);
645 sum += (sum >> 16); /* add carry */
646 ip->ip_sum = htons(~sum & 0xffff);
647 }
648 return 0;
649 }
650 case AF_INET6: {
651 struct ip6_hdr *ip6 = hdr;
652 u_int32_t flowlabel;
653
654 if (((uintptr_t)ip6 + sizeof(*ip6)) >
655 ((uintptr_t)mbuf_datastart(m0) + mbuf_maxlen(m0))) {
656 return EINVAL; /* out of bounds */
657 }
658 flowlabel = ntohl(ip6->ip6_flow);
659 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
660 (IPTOS_ECN_NOTECT << 20)) {
661 return EINVAL; /* not-ECT */
662 }
663 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
664 (IPTOS_ECN_CE << 20)) {
665 return 0; /* already marked */
666 }
667 /*
668 * ecn-capable but not marked, mark CE
669 */
670 flowlabel |= (IPTOS_ECN_CE << 20);
671 ip6->ip6_flow = htonl(flowlabel);
672 return 0;
673 }
674 default:
675 return EPROTONOSUPPORT;
676 }
677 }
678
679 static int
pktsched_kpkt_mark_ecn(struct __kern_packet * kpkt)680 pktsched_kpkt_mark_ecn(struct __kern_packet *kpkt)
681 {
682 uint8_t ipv = 0, *l3_hdr;
683
684 if (__improbable((kpkt->pkt_qum_qflags & QUM_F_FLOW_CLASSIFIED) != 0)) {
685 ipv = kpkt->pkt_flow_ip_ver;
686 l3_hdr = (uint8_t *)kpkt->pkt_flow_ip_hdr;
687 } else {
688 uint8_t *pkt_buf;
689 uint16_t bdlen, bdlim, bdoff;
690 MD_BUFLET_ADDR_ABS_DLEN(kpkt, pkt_buf, bdlen, bdlim, bdoff);
691
692 /* takes care of both IPv4 and IPv6 */
693 l3_hdr = pkt_buf + kpkt->pkt_headroom + kpkt->pkt_l2_len;
694 ipv = IP_VHL_V(*(uint8_t *)l3_hdr);
695 if (ipv == 4) {
696 ipv = IPVERSION;
697 } else if (ipv == 6) {
698 ipv = IPV6_VERSION;
699 } else {
700 ipv = 0;
701 }
702 }
703
704 switch (ipv) {
705 case IPVERSION: {
706 uint8_t otos;
707 int sum;
708
709 struct ip *ip = (struct ip *)(void *)l3_hdr;
710 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT) {
711 return EINVAL; /* not-ECT */
712 }
713 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE) {
714 return 0; /* already marked */
715 }
716 /*
717 * ecn-capable but not marked,
718 * mark CE and update checksum
719 */
720 otos = ip->ip_tos;
721 ip->ip_tos |= IPTOS_ECN_CE;
722
723 sum = ~ntohs(ip->ip_sum) & 0xffff;
724 sum += (~otos & 0xffff) + ip->ip_tos;
725 sum = (sum >> 16) + (sum & 0xffff);
726 sum += (sum >> 16); /* add carry */
727 ip->ip_sum = htons(~sum & 0xffff);
728
729 return 0;
730 }
731 case IPV6_VERSION: {
732 struct ip6_hdr *ip6 = (struct ip6_hdr *)l3_hdr;
733 u_int32_t flowlabel;
734 flowlabel = ntohl(ip6->ip6_flow);
735 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
736 (IPTOS_ECN_NOTECT << 20)) {
737 return EINVAL; /* not-ECT */
738 }
739 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
740 (IPTOS_ECN_CE << 20)) {
741 return 0; /* already marked */
742 }
743 /*
744 * ecn-capable but not marked, mark CE
745 */
746 flowlabel |= (IPTOS_ECN_CE << 20);
747 ip6->ip6_flow = htonl(flowlabel);
748
749 return 0;
750 }
751 default:
752 return EPROTONOSUPPORT;
753 }
754 }
755
756 int
pktsched_mark_ecn(pktsched_pkt_t * pkt)757 pktsched_mark_ecn(pktsched_pkt_t *pkt)
758 {
759 switch (pkt->pktsched_ptype) {
760 case QP_MBUF:
761 return pktsched_mbuf_mark_ecn(pkt->pktsched_pkt_mbuf);
762 case QP_PACKET:
763 return pktsched_kpkt_mark_ecn(pkt->pktsched_pkt_kpkt);
764 default:
765 VERIFY(0);
766 /* NOTREACHED */
767 __builtin_unreachable();
768 }
769 }
770
771 boolean_t
pktsched_is_pkt_l4s(pktsched_pkt_t * pkt)772 pktsched_is_pkt_l4s(pktsched_pkt_t *pkt)
773 {
774 switch (pkt->pktsched_ptype) {
775 case QP_MBUF: {
776 struct pkthdr *pkth = &(pkt->pktsched_pkt_mbuf->m_pkthdr);
777 return (pkth->pkt_ext_flags & PKTF_EXT_L4S) != 0;
778 }
779 case QP_PACKET: {
780 struct __kern_packet *kp = pkt->pktsched_pkt_kpkt;
781 return (kp->pkt_pflags & PKT_F_L4S) != 0;
782 }
783
784 default:
785 VERIFY(0);
786 /* NOTREACHED */
787 __builtin_unreachable();
788 }
789 return FALSE;
790 }
791