xref: /xnu-8792.41.9/bsd/net/pktsched/pktsched.c (revision 5c2921b07a2480ab43ec66f5b9e41cb872bc554f)
1 /*
2  * Copyright (c) 2011-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <sys/cdefs.h>
30 
31 #include <sys/param.h>
32 #include <sys/malloc.h>
33 #include <sys/mbuf.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/errno.h>
37 #include <sys/mcache.h>
38 #include <sys/sysctl.h>
39 
40 #include <dev/random/randomdev.h>
41 #include <net/if.h>
42 #include <net/if_var.h>
43 #include <net/if_dl.h>
44 #include <net/if_types.h>
45 #include <net/net_osdep.h>
46 #include <net/pktsched/pktsched.h>
47 #include <net/pktsched/pktsched_fq_codel.h>
48 #include <net/pktsched/pktsched_netem.h>
49 
50 #define _IP_VHL
51 #include <netinet/ip.h>
52 #include <netinet/ip6.h>
53 
54 #include <pexpert/pexpert.h>
55 
56 #if SKYWALK
57 #include <skywalk/os_skywalk_private.h>
58 #endif /* SKYWALK */
59 
60 u_int32_t machclk_freq = 0;
61 u_int64_t machclk_per_sec = 0;
62 u_int32_t pktsched_verbose = 0; /* more noise if greater than 1 */
63 
64 static void init_machclk(void);
65 
66 SYSCTL_NODE(_net, OID_AUTO, pktsched, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "pktsched");
67 
68 SYSCTL_UINT(_net_pktsched, OID_AUTO, verbose, CTLFLAG_RW | CTLFLAG_LOCKED,
69     &pktsched_verbose, 0, "Packet scheduler verbosity level");
70 
71 void
pktsched_init(void)72 pktsched_init(void)
73 {
74 	init_machclk();
75 	if (machclk_freq == 0) {
76 		panic("%s: no CPU clock available!", __func__);
77 		/* NOTREACHED */
78 	}
79 	pktsched_fq_init();
80 }
81 
82 static void
init_machclk(void)83 init_machclk(void)
84 {
85 	/*
86 	 * Initialize machclk_freq using the timerbase frequency
87 	 * value from device specific info.
88 	 */
89 	machclk_freq = (uint32_t)gPEClockFrequencyInfo.timebase_frequency_hz;
90 
91 	clock_interval_to_absolutetime_interval(1, NSEC_PER_SEC,
92 	    &machclk_per_sec);
93 }
94 
95 u_int64_t
pktsched_abs_to_nsecs(u_int64_t abstime)96 pktsched_abs_to_nsecs(u_int64_t abstime)
97 {
98 	u_int64_t nsecs;
99 
100 	absolutetime_to_nanoseconds(abstime, &nsecs);
101 	return nsecs;
102 }
103 
104 u_int64_t
pktsched_nsecs_to_abstime(u_int64_t nsecs)105 pktsched_nsecs_to_abstime(u_int64_t nsecs)
106 {
107 	u_int64_t abstime;
108 
109 	nanoseconds_to_absolutetime(nsecs, &abstime);
110 	return abstime;
111 }
112 
113 int
pktsched_setup(struct ifclassq * ifq,u_int32_t scheduler,u_int32_t sflags,classq_pkt_type_t ptype)114 pktsched_setup(struct ifclassq *ifq, u_int32_t scheduler, u_int32_t sflags,
115     classq_pkt_type_t ptype)
116 {
117 	int error = 0;
118 	u_int32_t rflags;
119 
120 	IFCQ_LOCK_ASSERT_HELD(ifq);
121 
122 	VERIFY(machclk_freq != 0);
123 
124 	/* Nothing to do unless the scheduler type changes */
125 	if (ifq->ifcq_type == scheduler) {
126 		return 0;
127 	}
128 
129 	/*
130 	 * Remember the flags that need to be restored upon success, as
131 	 * they may be cleared when we tear down existing scheduler.
132 	 */
133 	rflags = (ifq->ifcq_flags & IFCQF_ENABLED);
134 
135 	if (ifq->ifcq_type != PKTSCHEDT_NONE) {
136 		pktsched_teardown(ifq);
137 
138 		/* Teardown should have succeeded */
139 		VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
140 		VERIFY(ifq->ifcq_disc == NULL);
141 	}
142 
143 	error = fq_if_setup_ifclassq(ifq, sflags, ptype);
144 	if (error == 0) {
145 		ifq->ifcq_flags |= rflags;
146 	}
147 
148 	return error;
149 }
150 
151 void
pktsched_teardown(struct ifclassq * ifq)152 pktsched_teardown(struct ifclassq *ifq)
153 {
154 	IFCQ_LOCK_ASSERT_HELD(ifq);
155 	if_qflush(ifq->ifcq_ifp, ifq, true);
156 	VERIFY(IFCQ_IS_EMPTY(ifq));
157 	ifq->ifcq_flags &= ~IFCQF_ENABLED;
158 	if (ifq->ifcq_type == PKTSCHEDT_FQ_CODEL) {
159 		/* Could be PKTSCHEDT_NONE */
160 		fq_if_teardown_ifclassq(ifq);
161 	}
162 	return;
163 }
164 
165 int
pktsched_getqstats(struct ifclassq * ifq,u_int32_t gid,u_int32_t qid,struct if_ifclassq_stats * ifqs)166 pktsched_getqstats(struct ifclassq *ifq, u_int32_t gid, u_int32_t qid,
167     struct if_ifclassq_stats *ifqs)
168 {
169 	int error = 0;
170 
171 	IFCQ_LOCK_ASSERT_HELD(ifq);
172 
173 	if (ifq->ifcq_type == PKTSCHEDT_FQ_CODEL) {
174 		/* Could be PKTSCHEDT_NONE */
175 		error = fq_if_getqstats_ifclassq(ifq, (uint8_t)gid, qid, ifqs);
176 	}
177 
178 	return error;
179 }
180 
181 void
pktsched_pkt_encap(pktsched_pkt_t * pkt,classq_pkt_t * cpkt)182 pktsched_pkt_encap(pktsched_pkt_t *pkt, classq_pkt_t *cpkt)
183 {
184 	pkt->pktsched_pkt = *cpkt;
185 	pkt->pktsched_tail = *cpkt;
186 	pkt->pktsched_pcnt = 1;
187 
188 	switch (cpkt->cp_ptype) {
189 	case QP_MBUF:
190 		pkt->pktsched_plen =
191 		    (uint32_t)m_pktlen(pkt->pktsched_pkt_mbuf);
192 		break;
193 
194 #if SKYWALK
195 	case QP_PACKET:
196 		pkt->pktsched_plen = pkt->pktsched_pkt_kpkt->pkt_length;
197 		break;
198 #endif /* SKYWALK */
199 
200 	default:
201 		VERIFY(0);
202 		/* NOTREACHED */
203 		__builtin_unreachable();
204 	}
205 }
206 
207 void
pktsched_pkt_encap_chain(pktsched_pkt_t * pkt,classq_pkt_t * cpkt,classq_pkt_t * tail,uint32_t cnt,uint32_t bytes)208 pktsched_pkt_encap_chain(pktsched_pkt_t *pkt, classq_pkt_t *cpkt,
209     classq_pkt_t *tail, uint32_t cnt, uint32_t bytes)
210 {
211 	pkt->pktsched_pkt = *cpkt;
212 	pkt->pktsched_tail = *tail;
213 	pkt->pktsched_pcnt = cnt;
214 	pkt->pktsched_plen = bytes;
215 
216 	switch (cpkt->cp_ptype) {
217 	case QP_MBUF:
218 		break;
219 
220 #if SKYWALK
221 	case QP_PACKET:
222 		break;
223 #endif /* SKYWALK */
224 
225 	default:
226 		VERIFY(0);
227 		/* NOTREACHED */
228 		__builtin_unreachable();
229 	}
230 }
231 
232 int
pktsched_clone_pkt(pktsched_pkt_t * pkt1,pktsched_pkt_t * pkt2)233 pktsched_clone_pkt(pktsched_pkt_t *pkt1, pktsched_pkt_t *pkt2)
234 {
235 	struct mbuf *m1, *m2;
236 #if SKYWALK
237 	struct __kern_packet *p1;
238 	kern_packet_t ph2;
239 	int err;
240 #endif /* SKYWALK */
241 
242 	ASSERT(pkt1 != NULL);
243 	ASSERT(pkt1->pktsched_pkt_mbuf != NULL);
244 	ASSERT(pkt1->pktsched_pcnt == 1);
245 
246 	/* allow in place clone, but make sure pkt2->pktsched_pkt won't leak */
247 	ASSERT((pkt1 == pkt2 && pkt1->pktsched_pkt_mbuf ==
248 	    pkt2->pktsched_pkt_mbuf) || (pkt1 != pkt2 &&
249 	    pkt2->pktsched_pkt_mbuf == NULL));
250 
251 	switch (pkt1->pktsched_ptype) {
252 	case QP_MBUF:
253 		m1 = (struct mbuf *)pkt1->pktsched_pkt_mbuf;
254 		m2 = m_dup(m1, M_NOWAIT);
255 		if (__improbable(m2 == NULL)) {
256 			return ENOBUFS;
257 		}
258 		pkt2->pktsched_pkt_mbuf = m2;
259 		break;
260 
261 #if SKYWALK
262 	case QP_PACKET:
263 		p1 = (struct __kern_packet *)pkt1->pktsched_pkt_kpkt;
264 		err = kern_packet_clone_nosleep(SK_PTR_ENCODE(p1,
265 		    METADATA_TYPE(p1), METADATA_SUBTYPE(p1)), &ph2,
266 		    KPKT_COPY_HEAVY);
267 		if (__improbable(err != 0)) {
268 			return err;
269 		}
270 		ASSERT(ph2 != 0);
271 		VERIFY(kern_packet_finalize(ph2) == 0);
272 		pkt2->pktsched_pkt_kpkt = SK_PTR_ADDR_KPKT(ph2);
273 		break;
274 #endif /* SKYWALK */
275 
276 	default:
277 		VERIFY(0);
278 		/* NOTREACHED */
279 		__builtin_unreachable();
280 	}
281 
282 	pkt2->pktsched_plen = pkt1->pktsched_plen;
283 	pkt2->pktsched_ptype = pkt1->pktsched_ptype;
284 	pkt2->pktsched_tail = pkt2->pktsched_pkt;
285 	pkt2->pktsched_pcnt = 1;
286 	return 0;
287 }
288 
289 void
pktsched_corrupt_packet(pktsched_pkt_t * pkt)290 pktsched_corrupt_packet(pktsched_pkt_t *pkt)
291 {
292 	struct mbuf *m = NULL;
293 	uint8_t *data = NULL;
294 	uint32_t data_len = 0;
295 	uint32_t rand32, rand_off, rand_bit;
296 #if SKYWALK
297 	struct __kern_packet *p = NULL;
298 #endif /* SKYWALK */
299 
300 	switch (pkt->pktsched_ptype) {
301 	case QP_MBUF:
302 		m = pkt->pktsched_pkt_mbuf;
303 		data = mtod(m, uint8_t *);
304 		data_len = m->m_pkthdr.len;
305 		break;
306 #if SKYWALK
307 	case QP_PACKET:
308 		p = pkt->pktsched_pkt_kpkt;
309 		if (p->pkt_pflags & PKT_F_MBUF_DATA) {
310 			m = p->pkt_mbuf;
311 			data = mtod(m, uint8_t *);
312 			data_len = m->m_pkthdr.len;
313 		} else {
314 			MD_BUFLET_ADDR_DLEN(p, data, data_len);
315 		}
316 		break;
317 #endif /* SKYWALK */
318 
319 	default:
320 		/* NOTREACHED */
321 		VERIFY(0);
322 		__builtin_unreachable();
323 	}
324 
325 	read_frandom(&rand32, sizeof(rand32));
326 	rand_bit = rand32 & 0x8;
327 	rand_off = (rand32 >> 3) % data_len;
328 	data[rand_off] ^= 1 << rand_bit;
329 }
330 
331 void
pktsched_free_pkt(pktsched_pkt_t * pkt)332 pktsched_free_pkt(pktsched_pkt_t *pkt)
333 {
334 	uint32_t cnt = pkt->pktsched_pcnt;
335 	ASSERT(cnt != 0);
336 
337 	switch (pkt->pktsched_ptype) {
338 	case QP_MBUF: {
339 		struct mbuf *m;
340 
341 		m = pkt->pktsched_pkt_mbuf;
342 		if (cnt == 1) {
343 			VERIFY(m->m_nextpkt == NULL);
344 		} else {
345 			VERIFY(m->m_nextpkt != NULL);
346 		}
347 		m_freem_list(m);
348 		break;
349 	}
350 #if SKYWALK
351 	case QP_PACKET: {
352 		struct __kern_packet *kpkt;
353 		int pcnt = 0;
354 
355 		kpkt = pkt->pktsched_pkt_kpkt;
356 		if (cnt == 1) {
357 			VERIFY(kpkt->pkt_nextpkt == NULL);
358 		} else {
359 			VERIFY(kpkt->pkt_nextpkt != NULL);
360 		}
361 		pp_free_packet_chain(kpkt, &pcnt);
362 		VERIFY(cnt == (uint32_t)pcnt);
363 		break;
364 	}
365 #endif /* SKYWALK */
366 
367 	default:
368 		VERIFY(0);
369 		/* NOTREACHED */
370 		__builtin_unreachable();
371 	}
372 	pkt->pktsched_pkt = CLASSQ_PKT_INITIALIZER(pkt->pktsched_pkt);
373 	pkt->pktsched_tail = CLASSQ_PKT_INITIALIZER(pkt->pktsched_tail);
374 	pkt->pktsched_plen = 0;
375 	pkt->pktsched_pcnt = 0;
376 }
377 
378 mbuf_svc_class_t
pktsched_get_pkt_svc(pktsched_pkt_t * pkt)379 pktsched_get_pkt_svc(pktsched_pkt_t *pkt)
380 {
381 	mbuf_svc_class_t svc = MBUF_SC_UNSPEC;
382 
383 	switch (pkt->pktsched_ptype) {
384 	case QP_MBUF:
385 		svc = m_get_service_class(pkt->pktsched_pkt_mbuf);
386 		break;
387 
388 #if SKYWALK
389 	case QP_PACKET:
390 		svc = pkt->pktsched_pkt_kpkt->pkt_svc_class;
391 		break;
392 #endif /* SKYWALK */
393 
394 	default:
395 		VERIFY(0);
396 		/* NOTREACHED */
397 		__builtin_unreachable();
398 	}
399 
400 	return svc;
401 }
402 
403 void
pktsched_get_pkt_vars(pktsched_pkt_t * pkt,volatile uint32_t ** flags,uint64_t ** timestamp,uint32_t * flowid,uint8_t * flowsrc,uint8_t * proto,uint32_t * comp_gencnt)404 pktsched_get_pkt_vars(pktsched_pkt_t *pkt, volatile uint32_t **flags,
405     uint64_t **timestamp, uint32_t *flowid, uint8_t *flowsrc, uint8_t *proto,
406     uint32_t *comp_gencnt)
407 {
408 	switch (pkt->pktsched_ptype) {
409 	case QP_MBUF: {
410 		struct pkthdr *pkth = &(pkt->pktsched_pkt_mbuf->m_pkthdr);
411 
412 		if (flags != NULL) {
413 			*flags = &pkth->pkt_flags;
414 		}
415 		if (timestamp != NULL) {
416 			*timestamp = &pkth->pkt_timestamp;
417 		}
418 		if (flowid != NULL) {
419 			*flowid = pkth->pkt_flowid;
420 		}
421 		if (flowsrc != NULL) {
422 			*flowsrc = pkth->pkt_flowsrc;
423 		}
424 		if (proto != NULL) {
425 			*proto = pkth->pkt_proto;
426 		}
427 		if (comp_gencnt != NULL) {
428 			*comp_gencnt = pkth->comp_gencnt;
429 		}
430 
431 		break;
432 	}
433 
434 #if SKYWALK
435 	case QP_PACKET: {
436 		struct __kern_packet *kp = pkt->pktsched_pkt_kpkt;
437 
438 		if (flags != NULL) {
439 			/* use lower-32 bit for common flags */
440 			*flags = &kp->pkt_pflags32;
441 		}
442 		if (timestamp != NULL) {
443 			*timestamp = &kp->pkt_timestamp;
444 		}
445 		if (flowid != NULL) {
446 			*flowid = kp->pkt_flow_token;
447 		}
448 		if (flowsrc != NULL) {
449 			*flowsrc = (uint8_t)kp->pkt_flowsrc_type;
450 		}
451 		if (proto != NULL) {
452 			*proto = kp->pkt_transport_protocol;
453 		}
454 		if (comp_gencnt != NULL) {
455 			*comp_gencnt = kp->pkt_comp_gencnt;
456 		}
457 
458 		break;
459 	}
460 #endif /* SKYWALK */
461 
462 	default:
463 		VERIFY(0);
464 		/* NOTREACHED */
465 		__builtin_unreachable();
466 	}
467 }
468 
469 struct flowadv_fcentry *
pktsched_alloc_fcentry(pktsched_pkt_t * pkt,struct ifnet * ifp,int how)470 pktsched_alloc_fcentry(pktsched_pkt_t *pkt, struct ifnet *ifp, int how)
471 {
472 #pragma unused(ifp)
473 	struct flowadv_fcentry *fce = NULL;
474 
475 	switch (pkt->pktsched_ptype) {
476 	case QP_MBUF: {
477 		struct mbuf *m = pkt->pktsched_pkt_mbuf;
478 
479 		fce = flowadv_alloc_entry(how);
480 		if (fce == NULL) {
481 			break;
482 		}
483 
484 		_CASSERT(sizeof(m->m_pkthdr.pkt_flowid) ==
485 		    sizeof(fce->fce_flowid));
486 
487 		fce->fce_flowsrc_type = m->m_pkthdr.pkt_flowsrc;
488 		fce->fce_flowid = m->m_pkthdr.pkt_flowid;
489 #if SKYWALK
490 		_CASSERT(sizeof(m->m_pkthdr.pkt_mpriv_srcid) ==
491 		    sizeof(fce->fce_flowsrc_token));
492 		_CASSERT(sizeof(m->m_pkthdr.pkt_mpriv_fidx) ==
493 		    sizeof(fce->fce_flowsrc_fidx));
494 
495 		if (fce->fce_flowsrc_type == FLOWSRC_CHANNEL) {
496 			fce->fce_flowsrc_fidx = m->m_pkthdr.pkt_mpriv_fidx;
497 			fce->fce_flowsrc_token = m->m_pkthdr.pkt_mpriv_srcid;
498 			fce->fce_ifp = ifp;
499 		}
500 #endif /* SKYWALK */
501 		break;
502 	}
503 
504 #if SKYWALK
505 	case QP_PACKET: {
506 		struct __kern_packet *kp = pkt->pktsched_pkt_kpkt;
507 
508 		fce = flowadv_alloc_entry(how);
509 		if (fce == NULL) {
510 			break;
511 		}
512 
513 		_CASSERT(sizeof(fce->fce_flowid) ==
514 		    sizeof(kp->pkt_flow_token));
515 		_CASSERT(sizeof(fce->fce_flowsrc_fidx) ==
516 		    sizeof(kp->pkt_flowsrc_fidx));
517 		_CASSERT(sizeof(fce->fce_flowsrc_token) ==
518 		    sizeof(kp->pkt_flowsrc_token));
519 
520 		ASSERT(kp->pkt_pflags & PKT_F_FLOW_ADV);
521 		fce->fce_flowsrc_type = kp->pkt_flowsrc_type;
522 		fce->fce_flowid = kp->pkt_flow_token;
523 		fce->fce_flowsrc_fidx = kp->pkt_flowsrc_fidx;
524 		fce->fce_flowsrc_token = kp->pkt_flowsrc_token;
525 		fce->fce_ifp = ifp;
526 		break;
527 	}
528 #endif /* SKYWALK */
529 
530 	default:
531 		VERIFY(0);
532 		/* NOTREACHED */
533 		__builtin_unreachable();
534 	}
535 
536 	return fce;
537 }
538 
539 uint32_t *
pktsched_get_pkt_sfb_vars(pktsched_pkt_t * pkt,uint32_t ** sfb_flags)540 pktsched_get_pkt_sfb_vars(pktsched_pkt_t *pkt, uint32_t **sfb_flags)
541 {
542 	uint32_t *hashp = NULL;
543 
544 	switch (pkt->pktsched_ptype) {
545 	case QP_MBUF: {
546 		struct pkthdr *pkth = &(pkt->pktsched_pkt_mbuf->m_pkthdr);
547 
548 		_CASSERT(sizeof(pkth->pkt_mpriv_hash) == sizeof(uint32_t));
549 		_CASSERT(sizeof(pkth->pkt_mpriv_flags) == sizeof(uint32_t));
550 		*sfb_flags = &pkth->pkt_mpriv_flags;
551 		hashp = &pkth->pkt_mpriv_hash;
552 		break;
553 	}
554 
555 #if SKYWALK
556 	case QP_PACKET: {
557 		struct __kern_packet *kp = pkt->pktsched_pkt_kpkt;
558 
559 		_CASSERT(sizeof(kp->pkt_classq_hash) == sizeof(uint32_t));
560 		_CASSERT(sizeof(kp->pkt_classq_flags) == sizeof(uint32_t));
561 		*sfb_flags = &kp->pkt_classq_flags;
562 		hashp = &kp->pkt_classq_hash;
563 		break;
564 	}
565 #endif /* SKYWALK */
566 
567 	default:
568 		VERIFY(0);
569 		/* NOTREACHED */
570 		__builtin_unreachable();
571 	}
572 
573 	return hashp;
574 }
575