xref: /xnu-8020.101.4/bsd/net/pktsched/pktsched.c (revision e7776783b89a353188416a9a346c6cdb4928faad)
1 /*
2  * Copyright (c) 2011-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <sys/cdefs.h>
30 
31 #include <sys/param.h>
32 #include <sys/malloc.h>
33 #include <sys/mbuf.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/errno.h>
37 #include <sys/mcache.h>
38 #include <sys/sysctl.h>
39 
40 #include <dev/random/randomdev.h>
41 #include <net/if.h>
42 #include <net/if_var.h>
43 #include <net/if_dl.h>
44 #include <net/if_types.h>
45 #include <net/net_osdep.h>
46 #include <net/pktsched/pktsched.h>
47 #include <net/pktsched/pktsched_fq_codel.h>
48 #include <net/pktsched/pktsched_netem.h>
49 
50 #include <pexpert/pexpert.h>
51 
52 #if SKYWALK
53 #include <skywalk/os_skywalk_private.h>
54 #endif /* SKYWALK */
55 
56 u_int32_t machclk_freq = 0;
57 u_int64_t machclk_per_sec = 0;
58 u_int32_t pktsched_verbose = 0; /* more noise if greater than 1 */
59 
60 static void init_machclk(void);
61 
62 SYSCTL_NODE(_net, OID_AUTO, pktsched, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "pktsched");
63 
64 SYSCTL_UINT(_net_pktsched, OID_AUTO, verbose, CTLFLAG_RW | CTLFLAG_LOCKED,
65     &pktsched_verbose, 0, "Packet scheduler verbosity level");
66 
67 void
pktsched_init(void)68 pktsched_init(void)
69 {
70 	init_machclk();
71 	if (machclk_freq == 0) {
72 		panic("%s: no CPU clock available!", __func__);
73 		/* NOTREACHED */
74 	}
75 }
76 
77 static void
init_machclk(void)78 init_machclk(void)
79 {
80 	/*
81 	 * Initialize machclk_freq using the timerbase frequency
82 	 * value from device specific info.
83 	 */
84 	machclk_freq = (uint32_t)gPEClockFrequencyInfo.timebase_frequency_hz;
85 
86 	clock_interval_to_absolutetime_interval(1, NSEC_PER_SEC,
87 	    &machclk_per_sec);
88 }
89 
90 u_int64_t
pktsched_abs_to_nsecs(u_int64_t abstime)91 pktsched_abs_to_nsecs(u_int64_t abstime)
92 {
93 	u_int64_t nsecs;
94 
95 	absolutetime_to_nanoseconds(abstime, &nsecs);
96 	return nsecs;
97 }
98 
99 u_int64_t
pktsched_nsecs_to_abstime(u_int64_t nsecs)100 pktsched_nsecs_to_abstime(u_int64_t nsecs)
101 {
102 	u_int64_t abstime;
103 
104 	nanoseconds_to_absolutetime(nsecs, &abstime);
105 	return abstime;
106 }
107 
108 int
pktsched_setup(struct ifclassq * ifq,u_int32_t scheduler,u_int32_t sflags,classq_pkt_type_t ptype)109 pktsched_setup(struct ifclassq *ifq, u_int32_t scheduler, u_int32_t sflags,
110     classq_pkt_type_t ptype)
111 {
112 	int error = 0;
113 	u_int32_t rflags;
114 
115 	IFCQ_LOCK_ASSERT_HELD(ifq);
116 
117 	VERIFY(machclk_freq != 0);
118 
119 	/* Nothing to do unless the scheduler type changes */
120 	if (ifq->ifcq_type == scheduler) {
121 		return 0;
122 	}
123 
124 	/*
125 	 * Remember the flags that need to be restored upon success, as
126 	 * they may be cleared when we tear down existing scheduler.
127 	 */
128 	rflags = (ifq->ifcq_flags & IFCQF_ENABLED);
129 
130 	if (ifq->ifcq_type != PKTSCHEDT_NONE) {
131 		pktsched_teardown(ifq);
132 
133 		/* Teardown should have succeeded */
134 		VERIFY(ifq->ifcq_type == PKTSCHEDT_NONE);
135 		VERIFY(ifq->ifcq_disc == NULL);
136 	}
137 
138 	error = fq_if_setup_ifclassq(ifq, sflags, ptype);
139 	if (error == 0) {
140 		ifq->ifcq_flags |= rflags;
141 	}
142 
143 	return error;
144 }
145 
146 void
pktsched_teardown(struct ifclassq * ifq)147 pktsched_teardown(struct ifclassq *ifq)
148 {
149 	IFCQ_LOCK_ASSERT_HELD(ifq);
150 	if_qflush(ifq->ifcq_ifp, ifq, true);
151 	VERIFY(IFCQ_IS_EMPTY(ifq));
152 	ifq->ifcq_flags &= ~IFCQF_ENABLED;
153 	if (ifq->ifcq_type == PKTSCHEDT_FQ_CODEL) {
154 		/* Could be PKTSCHEDT_NONE */
155 		fq_if_teardown_ifclassq(ifq);
156 	}
157 	return;
158 }
159 
160 int
pktsched_getqstats(struct ifclassq * ifq,u_int32_t qid,struct if_ifclassq_stats * ifqs)161 pktsched_getqstats(struct ifclassq *ifq, u_int32_t qid,
162     struct if_ifclassq_stats *ifqs)
163 {
164 	int error = 0;
165 
166 	IFCQ_LOCK_ASSERT_HELD(ifq);
167 
168 	if (ifq->ifcq_type == PKTSCHEDT_FQ_CODEL) {
169 		/* Could be PKTSCHEDT_NONE */
170 		error = fq_if_getqstats_ifclassq(ifq, qid, ifqs);
171 	}
172 
173 	return error;
174 }
175 
176 void
pktsched_pkt_encap(pktsched_pkt_t * pkt,classq_pkt_t * cpkt)177 pktsched_pkt_encap(pktsched_pkt_t *pkt, classq_pkt_t *cpkt)
178 {
179 	pkt->pktsched_pkt = *cpkt;
180 	pkt->pktsched_tail = *cpkt;
181 	pkt->pktsched_pcnt = 1;
182 
183 	switch (cpkt->cp_ptype) {
184 	case QP_MBUF:
185 		pkt->pktsched_plen =
186 		    (uint32_t)m_pktlen(pkt->pktsched_pkt_mbuf);
187 		break;
188 
189 #if SKYWALK
190 	case QP_PACKET:
191 		pkt->pktsched_plen = pkt->pktsched_pkt_kpkt->pkt_length;
192 		break;
193 #endif /* SKYWALK */
194 
195 	default:
196 		VERIFY(0);
197 		/* NOTREACHED */
198 		__builtin_unreachable();
199 	}
200 }
201 
202 void
pktsched_pkt_encap_chain(pktsched_pkt_t * pkt,classq_pkt_t * cpkt,classq_pkt_t * tail,uint32_t cnt,uint32_t bytes)203 pktsched_pkt_encap_chain(pktsched_pkt_t *pkt, classq_pkt_t *cpkt,
204     classq_pkt_t *tail, uint32_t cnt, uint32_t bytes)
205 {
206 	pkt->pktsched_pkt = *cpkt;
207 	pkt->pktsched_tail = *tail;
208 	pkt->pktsched_pcnt = cnt;
209 	pkt->pktsched_plen = bytes;
210 
211 	switch (cpkt->cp_ptype) {
212 	case QP_MBUF:
213 		break;
214 
215 #if SKYWALK
216 	case QP_PACKET:
217 		break;
218 #endif /* SKYWALK */
219 
220 	default:
221 		VERIFY(0);
222 		/* NOTREACHED */
223 		__builtin_unreachable();
224 	}
225 }
226 
227 int
pktsched_clone_pkt(pktsched_pkt_t * pkt1,pktsched_pkt_t * pkt2)228 pktsched_clone_pkt(pktsched_pkt_t *pkt1, pktsched_pkt_t *pkt2)
229 {
230 	struct mbuf *m1, *m2;
231 #if SKYWALK
232 	struct __kern_packet *p1;
233 	kern_packet_t ph2;
234 	int err;
235 #endif /* SKYWALK */
236 
237 	ASSERT(pkt1 != NULL);
238 	ASSERT(pkt1->pktsched_pkt_mbuf != NULL);
239 	ASSERT(pkt1->pktsched_pcnt == 1);
240 
241 	/* allow in place clone, but make sure pkt2->pktsched_pkt won't leak */
242 	ASSERT((pkt1 == pkt2 && pkt1->pktsched_pkt_mbuf ==
243 	    pkt2->pktsched_pkt_mbuf) || (pkt1 != pkt2 &&
244 	    pkt2->pktsched_pkt_mbuf == NULL));
245 
246 	switch (pkt1->pktsched_ptype) {
247 	case QP_MBUF:
248 		m1 = (struct mbuf *)pkt1->pktsched_pkt_mbuf;
249 		m2 = m_dup(m1, M_NOWAIT);
250 		if (__improbable(m2 == NULL)) {
251 			return ENOBUFS;
252 		}
253 		pkt2->pktsched_pkt_mbuf = m2;
254 		break;
255 
256 #if SKYWALK
257 	case QP_PACKET:
258 		p1 = (struct __kern_packet *)pkt1->pktsched_pkt_kpkt;
259 		err = kern_packet_clone_nosleep(SK_PTR_ENCODE(p1,
260 		    METADATA_TYPE(p1), METADATA_SUBTYPE(p1)), &ph2,
261 		    KPKT_COPY_HEAVY);
262 		if (__improbable(err != 0)) {
263 			return err;
264 		}
265 		ASSERT(ph2 != 0);
266 		VERIFY(kern_packet_finalize(ph2) == 0);
267 		pkt2->pktsched_pkt_kpkt = SK_PTR_ADDR_KPKT(ph2);
268 		break;
269 #endif /* SKYWALK */
270 
271 	default:
272 		VERIFY(0);
273 		/* NOTREACHED */
274 		__builtin_unreachable();
275 	}
276 
277 	pkt2->pktsched_plen = pkt1->pktsched_plen;
278 	pkt2->pktsched_ptype = pkt1->pktsched_ptype;
279 	pkt2->pktsched_tail = pkt2->pktsched_pkt;
280 	pkt2->pktsched_pcnt = 1;
281 	return 0;
282 }
283 
284 void
pktsched_corrupt_packet(pktsched_pkt_t * pkt)285 pktsched_corrupt_packet(pktsched_pkt_t *pkt)
286 {
287 	struct mbuf *m = NULL;
288 	uint8_t *data = NULL;
289 	uint32_t data_len = 0;
290 	uint32_t rand32, rand_off, rand_bit;
291 #if SKYWALK
292 	struct __kern_packet *p = NULL;
293 #endif /* SKYWALK */
294 
295 	switch (pkt->pktsched_ptype) {
296 	case QP_MBUF:
297 		m = pkt->pktsched_pkt_mbuf;
298 		data = mtod(m, uint8_t *);
299 		data_len = m->m_pkthdr.len;
300 		break;
301 #if SKYWALK
302 	case QP_PACKET:
303 		p = pkt->pktsched_pkt_kpkt;
304 		if (p->pkt_pflags & PKT_F_MBUF_DATA) {
305 			m = p->pkt_mbuf;
306 			data = mtod(m, uint8_t *);
307 			data_len = m->m_pkthdr.len;
308 		} else {
309 			MD_BUFLET_ADDR_DLEN(p, data, data_len);
310 		}
311 		break;
312 #endif /* SKYWALK */
313 
314 	default:
315 		/* NOTREACHED */
316 		VERIFY(0);
317 		__builtin_unreachable();
318 	}
319 
320 	read_frandom(&rand32, sizeof(rand32));
321 	rand_bit = rand32 & 0x8;
322 	rand_off = (rand32 >> 3) % data_len;
323 	data[rand_off] ^= 1 << rand_bit;
324 }
325 
326 void
pktsched_free_pkt(pktsched_pkt_t * pkt)327 pktsched_free_pkt(pktsched_pkt_t *pkt)
328 {
329 	uint32_t cnt = pkt->pktsched_pcnt;
330 	ASSERT(cnt != 0);
331 
332 	switch (pkt->pktsched_ptype) {
333 	case QP_MBUF: {
334 		struct mbuf *m;
335 
336 		m = pkt->pktsched_pkt_mbuf;
337 		if (cnt == 1) {
338 			VERIFY(m->m_nextpkt == NULL);
339 		} else {
340 			VERIFY(m->m_nextpkt != NULL);
341 		}
342 		m_freem_list(m);
343 		break;
344 	}
345 #if SKYWALK
346 	case QP_PACKET: {
347 		struct __kern_packet *kpkt;
348 		int pcnt = 0;
349 
350 		kpkt = pkt->pktsched_pkt_kpkt;
351 		if (cnt == 1) {
352 			VERIFY(kpkt->pkt_nextpkt == NULL);
353 		} else {
354 			VERIFY(kpkt->pkt_nextpkt != NULL);
355 		}
356 		pp_free_packet_chain(kpkt, &pcnt);
357 		VERIFY(cnt == (uint32_t)pcnt);
358 		break;
359 	}
360 #endif /* SKYWALK */
361 
362 	default:
363 		VERIFY(0);
364 		/* NOTREACHED */
365 		__builtin_unreachable();
366 	}
367 	pkt->pktsched_pkt = CLASSQ_PKT_INITIALIZER(pkt->pktsched_pkt);
368 	pkt->pktsched_tail = CLASSQ_PKT_INITIALIZER(pkt->pktsched_tail);
369 	pkt->pktsched_plen = 0;
370 	pkt->pktsched_pcnt = 0;
371 }
372 
373 mbuf_svc_class_t
pktsched_get_pkt_svc(pktsched_pkt_t * pkt)374 pktsched_get_pkt_svc(pktsched_pkt_t *pkt)
375 {
376 	mbuf_svc_class_t svc = MBUF_SC_UNSPEC;
377 
378 	switch (pkt->pktsched_ptype) {
379 	case QP_MBUF:
380 		svc = m_get_service_class(pkt->pktsched_pkt_mbuf);
381 		break;
382 
383 #if SKYWALK
384 	case QP_PACKET:
385 		svc = pkt->pktsched_pkt_kpkt->pkt_svc_class;
386 		break;
387 #endif /* SKYWALK */
388 
389 	default:
390 		VERIFY(0);
391 		/* NOTREACHED */
392 		__builtin_unreachable();
393 	}
394 
395 	return svc;
396 }
397 
398 void
pktsched_get_pkt_vars(pktsched_pkt_t * pkt,volatile uint32_t ** flags,uint64_t ** timestamp,uint32_t * flowid,uint8_t * flowsrc,uint8_t * proto,uint32_t * comp_gencnt)399 pktsched_get_pkt_vars(pktsched_pkt_t *pkt, volatile uint32_t **flags,
400     uint64_t **timestamp, uint32_t *flowid, uint8_t *flowsrc, uint8_t *proto,
401     uint32_t *comp_gencnt)
402 {
403 	switch (pkt->pktsched_ptype) {
404 	case QP_MBUF: {
405 		struct pkthdr *pkth = &(pkt->pktsched_pkt_mbuf->m_pkthdr);
406 
407 		if (flags != NULL) {
408 			*flags = &pkth->pkt_flags;
409 		}
410 		if (timestamp != NULL) {
411 			*timestamp = &pkth->pkt_timestamp;
412 		}
413 		if (flowid != NULL) {
414 			*flowid = pkth->pkt_flowid;
415 		}
416 		if (flowsrc != NULL) {
417 			*flowsrc = pkth->pkt_flowsrc;
418 		}
419 		if (proto != NULL) {
420 			*proto = pkth->pkt_proto;
421 		}
422 		if (comp_gencnt != NULL) {
423 			*comp_gencnt = pkth->comp_gencnt;
424 		}
425 
426 		break;
427 	}
428 
429 #if SKYWALK
430 	case QP_PACKET: {
431 		struct __kern_packet *kp = pkt->pktsched_pkt_kpkt;
432 
433 		if (flags != NULL) {
434 			/* use lower-32 bit for common flags */
435 			*flags = &kp->pkt_pflags32;
436 		}
437 		if (timestamp != NULL) {
438 			*timestamp = &kp->pkt_timestamp;
439 		}
440 		if (flowid != NULL) {
441 			*flowid = kp->pkt_flow_token;
442 		}
443 		if (flowsrc != NULL) {
444 			*flowsrc = (uint8_t)kp->pkt_flowsrc_type;
445 		}
446 		if (proto != NULL) {
447 			*proto = kp->pkt_transport_protocol;
448 		}
449 		if (comp_gencnt != NULL) {
450 			*comp_gencnt = kp->pkt_comp_gencnt;
451 		}
452 
453 		break;
454 	}
455 #endif /* SKYWALK */
456 
457 	default:
458 		VERIFY(0);
459 		/* NOTREACHED */
460 		__builtin_unreachable();
461 	}
462 }
463 
464 struct flowadv_fcentry *
pktsched_alloc_fcentry(pktsched_pkt_t * pkt,struct ifnet * ifp,int how)465 pktsched_alloc_fcentry(pktsched_pkt_t *pkt, struct ifnet *ifp, int how)
466 {
467 #pragma unused(ifp)
468 	struct flowadv_fcentry *fce = NULL;
469 
470 	switch (pkt->pktsched_ptype) {
471 	case QP_MBUF: {
472 		struct mbuf *m = pkt->pktsched_pkt_mbuf;
473 
474 		fce = flowadv_alloc_entry(how);
475 		if (fce == NULL) {
476 			break;
477 		}
478 
479 		_CASSERT(sizeof(m->m_pkthdr.pkt_flowid) ==
480 		    sizeof(fce->fce_flowid));
481 
482 		fce->fce_flowsrc_type = m->m_pkthdr.pkt_flowsrc;
483 		fce->fce_flowid = m->m_pkthdr.pkt_flowid;
484 #if SKYWALK
485 		_CASSERT(sizeof(m->m_pkthdr.pkt_mpriv_srcid) ==
486 		    sizeof(fce->fce_flowsrc_token));
487 		_CASSERT(sizeof(m->m_pkthdr.pkt_mpriv_fidx) ==
488 		    sizeof(fce->fce_flowsrc_fidx));
489 
490 		if (fce->fce_flowsrc_type == FLOWSRC_CHANNEL) {
491 			fce->fce_flowsrc_fidx = m->m_pkthdr.pkt_mpriv_fidx;
492 			fce->fce_flowsrc_token = m->m_pkthdr.pkt_mpriv_srcid;
493 			fce->fce_ifp = ifp;
494 		}
495 #endif /* SKYWALK */
496 		break;
497 	}
498 
499 #if SKYWALK
500 	case QP_PACKET: {
501 		struct __kern_packet *kp = pkt->pktsched_pkt_kpkt;
502 
503 		fce = flowadv_alloc_entry(how);
504 		if (fce == NULL) {
505 			break;
506 		}
507 
508 		_CASSERT(sizeof(fce->fce_flowid) ==
509 		    sizeof(kp->pkt_flow_token));
510 		_CASSERT(sizeof(fce->fce_flowsrc_fidx) ==
511 		    sizeof(kp->pkt_flowsrc_fidx));
512 		_CASSERT(sizeof(fce->fce_flowsrc_token) ==
513 		    sizeof(kp->pkt_flowsrc_token));
514 
515 		ASSERT(kp->pkt_pflags & PKT_F_FLOW_ADV);
516 		fce->fce_flowsrc_type = kp->pkt_flowsrc_type;
517 		fce->fce_flowid = kp->pkt_flow_token;
518 		fce->fce_flowsrc_fidx = kp->pkt_flowsrc_fidx;
519 		fce->fce_flowsrc_token = kp->pkt_flowsrc_token;
520 		fce->fce_ifp = ifp;
521 		break;
522 	}
523 #endif /* SKYWALK */
524 
525 	default:
526 		VERIFY(0);
527 		/* NOTREACHED */
528 		__builtin_unreachable();
529 	}
530 
531 	return fce;
532 }
533 
534 uint32_t *
pktsched_get_pkt_sfb_vars(pktsched_pkt_t * pkt,uint32_t ** sfb_flags)535 pktsched_get_pkt_sfb_vars(pktsched_pkt_t *pkt, uint32_t **sfb_flags)
536 {
537 	uint32_t *hashp = NULL;
538 
539 	switch (pkt->pktsched_ptype) {
540 	case QP_MBUF: {
541 		struct pkthdr *pkth = &(pkt->pktsched_pkt_mbuf->m_pkthdr);
542 
543 		_CASSERT(sizeof(pkth->pkt_mpriv_hash) == sizeof(uint32_t));
544 		_CASSERT(sizeof(pkth->pkt_mpriv_flags) == sizeof(uint32_t));
545 		*sfb_flags = &pkth->pkt_mpriv_flags;
546 		hashp = &pkth->pkt_mpriv_hash;
547 		break;
548 	}
549 
550 #if SKYWALK
551 	case QP_PACKET: {
552 		struct __kern_packet *kp = pkt->pktsched_pkt_kpkt;
553 
554 		_CASSERT(sizeof(kp->pkt_classq_hash) == sizeof(uint32_t));
555 		_CASSERT(sizeof(kp->pkt_classq_flags) == sizeof(uint32_t));
556 		*sfb_flags = &kp->pkt_classq_flags;
557 		hashp = &kp->pkt_classq_hash;
558 		break;
559 	}
560 #endif /* SKYWALK */
561 
562 	default:
563 		VERIFY(0);
564 		/* NOTREACHED */
565 		__builtin_unreachable();
566 	}
567 
568 	return hashp;
569 }
570