xref: /xnu-11417.140.69/tests/skywalk/skywalk_test_common.c (revision 43a90889846e00bfb5cf1d255cdc0a701a1e05a4)
1 /*
2  * Copyright (c) 2016-2024 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <assert.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <pthread.h>
34 #include <unistd.h>
35 #include <errno.h>
36 #include <stdbool.h>
37 #include <ifaddrs.h>
38 #include <netdb.h>
39 
40 #include <sys/event.h>
41 #include <sys/ioctl.h>
42 #include <sys/types.h>
43 #include <sys/time.h>
44 #include <sys/sysctl.h>
45 
46 #include <mach/mach_time.h>
47 #include <net/if.h>
48 #include <net/if_fake_var.h>
49 #include <net/if_redirect.h>
50 #include <net/if_dl.h>
51 #include <net/if_types.h>
52 #include <net/route.h>
53 #include <netinet/if_ether.h>
54 #include <net/if_arp.h>
55 #include <netinet6/in6_var.h>
56 #include <netinet6/nd6.h>
57 #include <arpa/inet.h>
58 #include <net/pktsched/pktsched.h>
59 #include <net/classq/if_classq.h>
60 #include <os/log.h>
61 
62 #include <err.h>
63 #include <TargetConditionals.h>
64 
65 #include <darwintest.h>
66 
67 #include <skywalk/os_skywalk_private.h>
68 #include <skywalk/os_channel_event.h>
69 
70 #include "skywalk_test_driver.h"
71 #include "skywalk_test_utils.h"
72 #include "skywalk_test_common.h"
73 
74 //#define SKT_COMMON_DEBUG 1
75 
76 static int
77 sktc_ifnet_add_addr_with_socket(int s, char *ifname, struct in_addr *addr,
78     struct in_addr *mask, struct in_addr *broadaddr);
79 
80 const char *BOLD =              "\033[1m";
81 const char *BOLD_RED =          "\033[91;1m";
82 const char *BOLD_GREEN =        "\033[92;1m";
83 const char *BOLD_YELLOW =       "\033[93;1m";
84 const char *BOLD_BLUE =         "\033[94;1m";
85 const char *BOLD_MAGENTA =      "\033[95;1m";
86 const char *BOLD_CYAN =         "\033[96;1m";
87 const char *BOLD_WHITE =        "\033[97;1m";
88 const char *NORMAL =            "\033[0m";
89 
90 int sktc_verbose = 1;
91 nexus_controller_t sktc_nexus_controller;
92 uuid_t sktc_provider_uuid;
93 uuid_t sktc_instance_uuid;
94 
95 uuid_string_t sktc_instance_uuid_string;
96 
97 uint64_t sktc_nexus_ntxrings;
98 uint64_t sktc_nexus_nrxrings;
99 uint64_t sktc_nexus_ntxslots;
100 uint64_t sktc_nexus_nrxslots;
101 uint64_t sktc_nexus_slotsize;
102 uint64_t sktc_nexus_metasize;
103 uint64_t sktc_nexus_anonymous;
104 uint64_t sktc_nexus_max_frags;
105 uint64_t sktc_rejectonclose;
106 
107 static void sktc_feth_set_flags(uint32_t flags);
108 static void sktc_feth_restore_flags(void);
109 
110 static int expire_time, flags;
111 
112 #ifndef SA_SIZE
113 #define SA_SIZE(sa)                                             \
114     (  (!(sa) || ((struct sockaddr *)(sa))->sa_len == 0) ?      \
115 	sizeof(uint32_t)            :                               \
116 	1 + ( (((struct sockaddr *)(sa))->sa_len - 1) | (sizeof(uint32_t) - 1) ) )
117 #endif
118 #define AVGN_MAX        8
119 
120 struct queue_stats {
121 	int                      avgn;
122 	double                   avg_bytes;
123 	double                   avg_packets;
124 	u_int64_t                prev_bytes;
125 	u_int64_t                prev_packets;
126 	unsigned int             handle;
127 };
128 
129 static void print_fq_codel_stats(int slot, struct fq_codel_classstats *,
130     struct queue_stats *);
131 
132 int qflag;
133 struct queue_stats qstats[IFCQ_SC_MAX];
134 
135 static void arp_stats(uint32_t off, char *name, int af);
136 
137 struct protox protox[] = {
138 	{ NULL, arp_stats, NULL, "arp", 0 }
139 };
140 
141 
142 void
sktc_setup_nexus(struct sktc_nexus_attr * sktc_attr)143 sktc_setup_nexus(struct sktc_nexus_attr *sktc_attr)
144 {
145 	nexus_attr_t attr;
146 	int error;
147 
148 	assert(sktc_nexus_controller == NULL);
149 	assert(uuid_is_null(sktc_provider_uuid));
150 	assert(uuid_is_null(sktc_instance_uuid));
151 
152 	sktc_nexus_controller = os_nexus_controller_create();
153 	assert(sktc_nexus_controller);
154 
155 	sktc_build_nexus(sktc_nexus_controller, sktc_attr, &sktc_provider_uuid,
156 	    &sktc_instance_uuid);
157 
158 	uuid_unparse_upper(sktc_instance_uuid, sktc_instance_uuid_string);
159 
160 	attr = os_nexus_attr_create();
161 	assert(attr);
162 
163 	/* Clear the parameters to make sure they are being read */
164 	os_nexus_attr_set(attr, NEXUS_ATTR_ANONYMOUS, -1);
165 	os_nexus_attr_set(attr, NEXUS_ATTR_TX_RINGS, -1);
166 	os_nexus_attr_set(attr, NEXUS_ATTR_RX_RINGS, -1);
167 	os_nexus_attr_set(attr, NEXUS_ATTR_TX_SLOTS, -1);
168 	os_nexus_attr_set(attr, NEXUS_ATTR_RX_SLOTS, -1);
169 	os_nexus_attr_set(attr, NEXUS_ATTR_SLOT_BUF_SIZE, -1);
170 	os_nexus_attr_set(attr, NEXUS_ATTR_MAX_FRAGS, -1);
171 	os_nexus_attr_set(attr, NEXUS_ATTR_REJECT_ON_CLOSE, -1);
172 
173 	/* The following are not settable */
174 	error = os_nexus_attr_set(attr, NEXUS_ATTR_SLOT_META_SIZE, -1);
175 	SKTC_ASSERT_ERR(error == ENOTSUP);
176 	error = os_nexus_attr_set(attr, NEXUS_ATTR_IFINDEX, -1);
177 	SKTC_ASSERT_ERR(error == ENOTSUP);
178 
179 	error = os_nexus_controller_read_provider_attr(sktc_nexus_controller,
180 	    sktc_provider_uuid, attr);
181 	SKTC_ASSERT_ERR(!error);
182 
183 	error = os_nexus_attr_get(attr, NEXUS_ATTR_ANONYMOUS,
184 	    &sktc_nexus_anonymous);
185 	SKTC_ASSERT_ERR(!error);
186 	assert(sktc_nexus_anonymous != -1);
187 	assert(sktc_attr->anonymous == -1 ||
188 	    sktc_attr->anonymous == sktc_nexus_anonymous);
189 
190 	error = os_nexus_attr_get(attr, NEXUS_ATTR_TX_RINGS,
191 	    &sktc_nexus_ntxrings);
192 	SKTC_ASSERT_ERR(!error);
193 	assert(sktc_nexus_ntxrings != -1);
194 	assert(sktc_attr->ntxrings == -1 ||
195 	    sktc_attr->ntxrings == sktc_nexus_ntxrings);
196 
197 	error = os_nexus_attr_get(attr, NEXUS_ATTR_RX_RINGS,
198 	    &sktc_nexus_nrxrings);
199 	SKTC_ASSERT_ERR(!error);
200 	assert(sktc_nexus_nrxrings != -1);
201 	assert(sktc_attr->nrxrings == -1 ||
202 	    sktc_attr->nrxrings == sktc_nexus_nrxrings);
203 
204 	error = os_nexus_attr_get(attr, NEXUS_ATTR_TX_SLOTS,
205 	    &sktc_nexus_ntxslots);
206 	SKTC_ASSERT_ERR(!error);
207 	assert(sktc_nexus_ntxslots != -1);
208 	assert(sktc_attr->ntxslots == -1 ||
209 	    sktc_attr->ntxslots == sktc_nexus_ntxslots);
210 
211 	error = os_nexus_attr_get(attr, NEXUS_ATTR_RX_SLOTS,
212 	    &sktc_nexus_nrxslots);
213 	SKTC_ASSERT_ERR(!error);
214 	assert(sktc_nexus_nrxslots != -1);
215 	assert(sktc_attr->nrxslots == -1 ||
216 	    sktc_attr->nrxslots == sktc_nexus_nrxslots);
217 
218 	error = os_nexus_attr_get(attr, NEXUS_ATTR_SLOT_BUF_SIZE,
219 	    &sktc_nexus_slotsize);
220 	SKTC_ASSERT_ERR(!error);
221 	assert(sktc_nexus_slotsize != -1);
222 	assert(sktc_attr->slotsize == -1 ||
223 	    sktc_attr->slotsize == sktc_nexus_slotsize);
224 
225 	error = os_nexus_attr_get(attr, NEXUS_ATTR_SLOT_META_SIZE,
226 	    &sktc_nexus_metasize);
227 	SKTC_ASSERT_ERR(!error);
228 	assert(sktc_nexus_metasize != -1);
229 	assert(sktc_attr->metasize == -1 ||
230 	    sktc_attr->metasize == sktc_nexus_metasize);
231 
232 	error = os_nexus_attr_get(attr, NEXUS_ATTR_MAX_FRAGS,
233 	    &sktc_nexus_max_frags);
234 	SKTC_ASSERT_ERR(!error);
235 	assert(sktc_nexus_max_frags != -1);
236 	assert(sktc_attr->maxfrags == -1 ||
237 	    sktc_attr->maxfrags == sktc_nexus_max_frags);
238 
239 	error = os_nexus_attr_get(attr, NEXUS_ATTR_REJECT_ON_CLOSE,
240 	    &sktc_rejectonclose);
241 	SKTC_ASSERT_ERR(!error);
242 	assert(sktc_rejectonclose != -1);
243 	assert(sktc_attr->rejectonclose == -1 ||
244 	    sktc_attr->rejectonclose == sktc_rejectonclose);
245 
246 	os_nexus_attr_destroy(attr);
247 }
248 
249 void
sktc_cleanup_nexus(void)250 sktc_cleanup_nexus(void)
251 {
252 	int error;
253 
254 	assert(sktc_nexus_controller);
255 	assert(!uuid_is_null(sktc_provider_uuid));
256 	assert(!uuid_is_null(sktc_instance_uuid));
257 
258 	error = os_nexus_controller_free_provider_instance(sktc_nexus_controller,
259 	    sktc_instance_uuid);
260 	SKTC_ASSERT_ERR(!error);
261 	uuid_clear(sktc_instance_uuid);
262 	memset(sktc_instance_uuid_string, 0, sizeof(sktc_instance_uuid_string));
263 
264 	error = os_nexus_controller_deregister_provider(sktc_nexus_controller,
265 	    sktc_provider_uuid);
266 	SKTC_ASSERT_ERR(!error);
267 	uuid_clear(sktc_provider_uuid);
268 
269 	os_nexus_controller_destroy(sktc_nexus_controller);
270 	sktc_nexus_controller = NULL;
271 }
272 
273 
274 
275 int
sktc_bind_nexus_key(nexus_port_t port,const void * key,size_t keylen)276 sktc_bind_nexus_key(nexus_port_t port, const void *key, size_t keylen)
277 {
278 	return os_nexus_controller_bind_provider_instance(sktc_nexus_controller,
279 	           sktc_instance_uuid, port, -1, NULL, key, keylen, NEXUS_BIND_KEY);
280 }
281 
282 int
sktc_unbind_nexus_key(nexus_port_t port)283 sktc_unbind_nexus_key(nexus_port_t port)
284 {
285 	return os_nexus_controller_unbind_provider_instance(sktc_nexus_controller,
286 	           sktc_instance_uuid, port);
287 }
288 
289 channel_t sktc_channel;
290 
291 static volatile int sktc_channel_worker_die;
292 static size_t sktc_nrings;
293 static pthread_t *sktc_threads;
294 static pthread_t *sktc_threads2;
295 static int *sktc_rxkqs;
296 static int *sktc_txkqs;
297 
298 static void *
sktc_channel_worker_echo(void * arg)299 sktc_channel_worker_echo(void *arg)
300 {
301 	int index = (pthread_t *)arg - sktc_threads;
302 	ring_id_t ringid;
303 	channel_ring_t rxring, txring;
304 	struct kevent kev;
305 	int error;
306 	uint64_t ntxrings, nrxrings, ntxslots, nrxslots,
307 	    slotsize, metasize;
308 
309 	channel_attr_t attr = os_channel_attr_create();
310 	assert(attr);
311 
312 	error = os_channel_read_attr(sktc_channel, attr);
313 	SKTC_ASSERT_ERR(!error);
314 
315 	ntxrings = -1;
316 	error = os_channel_attr_get(attr, CHANNEL_ATTR_TX_RINGS, &ntxrings);
317 	SKTC_ASSERT_ERR(!error);
318 	assert(ntxrings != -1);
319 	assert(ntxrings == sktc_nexus_ntxrings);
320 
321 	nrxrings = -1;
322 	error = os_channel_attr_get(attr, CHANNEL_ATTR_RX_RINGS, &nrxrings);
323 	SKTC_ASSERT_ERR(!error);
324 	assert(nrxrings != -1);
325 	assert(nrxrings == sktc_nexus_nrxrings);
326 
327 	ntxslots = -1;
328 	error = os_channel_attr_get(attr, CHANNEL_ATTR_TX_SLOTS, &ntxslots);
329 	SKTC_ASSERT_ERR(!error);
330 	assert(ntxslots != -1);
331 	assert(ntxslots == sktc_nexus_ntxslots);
332 
333 	nrxslots = -1;
334 	error = os_channel_attr_get(attr, CHANNEL_ATTR_RX_SLOTS, &nrxslots);
335 	SKTC_ASSERT_ERR(!error);
336 	assert(nrxslots != -1);
337 	assert(nrxslots == sktc_nexus_nrxslots);
338 
339 	slotsize = -1;
340 	error = os_channel_attr_get(attr, CHANNEL_ATTR_SLOT_BUF_SIZE, &slotsize);
341 	SKTC_ASSERT_ERR(!error);
342 	assert(slotsize != -1);
343 	assert(slotsize == sktc_nexus_slotsize);
344 
345 	metasize = -1;
346 	error = os_channel_attr_get(attr, CHANNEL_ATTR_SLOT_META_SIZE, &metasize);
347 	SKTC_ASSERT_ERR(!error);
348 	assert(metasize != -1);
349 	assert(metasize == sktc_nexus_metasize);
350 
351 	os_channel_attr_destroy(attr);
352 
353 	ringid = os_channel_ring_id(sktc_channel, CHANNEL_FIRST_RX_RING);
354 	ringid += index;
355 	rxring = os_channel_rx_ring(sktc_channel, ringid);
356 	assert(rxring);
357 
358 	ringid = os_channel_ring_id(sktc_channel, CHANNEL_FIRST_TX_RING);
359 	ringid += index;
360 	txring = os_channel_tx_ring(sktc_channel, ringid);
361 	assert(txring);
362 
363 	while (!sktc_channel_worker_die) {
364 		slot_prop_t rxprop, txprop;
365 		channel_slot_t rxprev, rxslot, txprev, txslot;
366 		uint32_t rxavail, txavail;
367 
368 		rxavail = os_channel_available_slot_count(rxring);
369 
370 		/* Wait for incoming data */
371 		if (!rxavail) {
372 			error = kevent(sktc_rxkqs[index], NULL, 0, &kev, 1, NULL);
373 			SKTC_ASSERT_ERR(error != -1);
374 			SKTC_ASSERT_ERR(error == 1);
375 
376 			if (kev.filter == EVFILT_USER) {
377 				assert(kev.ident == (uintptr_t)&sktc_channel_worker_die);
378 				assert(sktc_channel_worker_die);
379 				break;
380 			}
381 
382 			assert(kev.filter == EVFILT_READ);
383 			assert(kev.ident == os_channel_get_fd(sktc_channel));
384 			assert(kev.udata == NULL);
385 
386 			rxavail = os_channel_available_slot_count(rxring);
387 			if (!rxavail && skywalk_in_driver) {
388 				T_LOG("%s: no rx slots available\n", __func__);
389 				continue;
390 			} else {
391 				assert(rxavail);
392 			}
393 		}
394 
395 		txavail = os_channel_available_slot_count(txring);
396 
397 		/* Wait for outgoing space */
398 		if (!txavail) {
399 			error = kevent(sktc_txkqs[index], NULL, 0, &kev, 1, NULL);
400 			SKTC_ASSERT_ERR(error != -1);
401 			SKTC_ASSERT_ERR(error == 1);
402 
403 			if (kev.filter == EVFILT_USER) {
404 				assert(kev.ident == (uintptr_t)&sktc_channel_worker_die);
405 				assert(sktc_channel_worker_die);
406 				break;
407 			}
408 
409 			assert(kev.filter == EVFILT_WRITE);
410 			assert(kev.ident == os_channel_get_fd(sktc_channel));
411 			assert(kev.udata == NULL);
412 
413 			txavail = os_channel_available_slot_count(txring);
414 			if (!txavail && skywalk_in_driver) {
415 				T_LOG("%s: no tx slots available\n", __func__);
416 				continue;
417 			} else {
418 				assert(txavail);
419 			}
420 		}
421 
422 		rxprev = NULL;
423 		rxslot = os_channel_get_next_slot(rxring, NULL, &rxprop);
424 		assert(rxslot);
425 		txprev = NULL;
426 		txslot = os_channel_get_next_slot(txring, NULL, &txprop);
427 		assert(txslot);
428 
429 		do {
430 			assert(txprop.sp_len == slotsize);
431 			assert(txprop.sp_len >= rxprop.sp_len);
432 			memcpy((void *)txprop.sp_buf_ptr, (void *)rxprop.sp_buf_ptr, rxprop.sp_len);
433 			txprop.sp_len = rxprop.sp_len;
434 			/* XXX: can't do this since it'll corrupt __user_quantum */
435 			//memcpy((void *)txprop.sp_mdata_ptr, (void *)rxprop.sp_mdata_ptr, metasize);
436 			os_channel_set_slot_properties(txring, txslot, &txprop);
437 
438 			rxprev = rxslot;
439 			rxslot = os_channel_get_next_slot(rxring, rxslot, &rxprop);
440 			txprev = txslot;
441 			txslot = os_channel_get_next_slot(txring, txslot, &txprop);
442 		} while (rxslot && txslot);
443 
444 		assert(txprev);
445 		error = os_channel_advance_slot(txring, txprev);
446 		SKTC_ASSERT_ERR(!error);
447 		error = os_channel_sync(sktc_channel, CHANNEL_SYNC_TX);
448 		if (error && skywalk_in_driver) {
449 			SKT_LOG("%s: sync fail error %d errno %d: %s\n", __func__, error, errno, strerror(errno));
450 		} else {
451 			SKTC_ASSERT_ERR(!error);
452 		}
453 
454 		assert(rxprev);
455 		error = os_channel_advance_slot(rxring, rxprev);
456 		SKTC_ASSERT_ERR(!error);
457 	}
458 
459 	T_LOG("exiting %s", __func__);
460 
461 	return NULL;
462 }
463 
464 static void *
sktc_channel_worker_sink(void * arg)465 sktc_channel_worker_sink(void *arg)
466 {
467 	int index = (pthread_t *)arg - sktc_threads;
468 	ring_id_t ringid;
469 	channel_ring_t rxring;
470 	struct kevent kev;
471 	int error;
472 
473 	/* Only validate the attributes if we are the nexus creator */
474 	if (sktc_nexus_controller) {
475 		uint64_t nrxrings, nrxslots, slotsize;
476 
477 		channel_attr_t attr = os_channel_attr_create();
478 		assert(attr);
479 
480 		error = os_channel_read_attr(sktc_channel, attr);
481 		SKTC_ASSERT_ERR(!error);
482 
483 		nrxrings = -1;
484 		error = os_channel_attr_get(attr, CHANNEL_ATTR_RX_RINGS, &nrxrings);
485 		SKTC_ASSERT_ERR(!error);
486 		assert(nrxrings != -1);
487 		assert(nrxrings == sktc_nexus_nrxrings);
488 
489 		nrxslots = -1;
490 		error = os_channel_attr_get(attr, CHANNEL_ATTR_RX_SLOTS, &nrxslots);
491 		SKTC_ASSERT_ERR(!error);
492 		assert(nrxslots != -1);
493 		assert(nrxslots == sktc_nexus_nrxslots);
494 
495 		slotsize = -1;
496 		error = os_channel_attr_get(attr, CHANNEL_ATTR_SLOT_BUF_SIZE, &slotsize);
497 		SKTC_ASSERT_ERR(!error);
498 		assert(slotsize != -1);
499 		assert(slotsize == sktc_nexus_slotsize);
500 
501 		os_channel_attr_destroy(attr);
502 	}
503 
504 	ringid = os_channel_ring_id(sktc_channel, CHANNEL_FIRST_RX_RING);
505 	ringid += index;
506 	rxring = os_channel_rx_ring(sktc_channel, ringid);
507 	assert(rxring);
508 
509 	while (!sktc_channel_worker_die) {
510 		uint32_t rxavail;
511 
512 		rxavail = os_channel_available_slot_count(rxring);
513 
514 		/* Wait for incoming data */
515 		if (!rxavail) {
516 			error = kevent(sktc_rxkqs[index], NULL, 0, &kev, 1, NULL);
517 			SKTC_ASSERT_ERR(error != -1);
518 			SKTC_ASSERT_ERR(error == 1);
519 
520 			if (kev.filter == EVFILT_USER) {
521 				assert(kev.ident == (uintptr_t)&sktc_channel_worker_die);
522 				assert(sktc_channel_worker_die);
523 				break;
524 			}
525 
526 			assert(kev.filter == EVFILT_READ);
527 			assert(kev.ident == os_channel_get_fd(sktc_channel));
528 			assert(kev.udata == NULL);
529 
530 			rxavail = os_channel_available_slot_count(rxring);
531 			if (!rxavail && skywalk_in_driver) {
532 				T_LOG("%s: no rx slots available\n", __func__);
533 				continue;
534 			} else {
535 				assert(rxavail);
536 			}
537 		}
538 
539 		sktc_chew_random(sktc_channel, rxring, CHANNEL_SYNC_RX, false, rxavail);
540 	}
541 
542 	return NULL;
543 }
544 
545 static void *
sktc_channel_worker_source(void * arg)546 sktc_channel_worker_source(void *arg)
547 {
548 	int index = (pthread_t *)arg - sktc_threads;
549 	ring_id_t ringid;
550 	channel_ring_t txring;
551 	struct kevent kev;
552 	int error;
553 
554 	/* Only validate the attributes if we are the nexus creator */
555 	if (sktc_nexus_controller) {
556 		uint64_t ntxrings, ntxslots, slotsize;
557 
558 		channel_attr_t attr = os_channel_attr_create();
559 		assert(attr);
560 
561 		error = os_channel_read_attr(sktc_channel, attr);
562 		SKTC_ASSERT_ERR(!error);
563 
564 		ntxrings = -1;
565 		error = os_channel_attr_get(attr, CHANNEL_ATTR_TX_RINGS, &ntxrings);
566 		SKTC_ASSERT_ERR(!error);
567 		assert(ntxrings != -1);
568 		assert(ntxrings == sktc_nexus_ntxrings);
569 
570 		ntxslots = -1;
571 		error = os_channel_attr_get(attr, CHANNEL_ATTR_TX_SLOTS, &ntxslots);
572 		SKTC_ASSERT_ERR(!error);
573 		assert(ntxslots != -1);
574 		assert(ntxslots == sktc_nexus_ntxslots);
575 
576 		slotsize = -1;
577 		error = os_channel_attr_get(attr, CHANNEL_ATTR_SLOT_BUF_SIZE, &slotsize);
578 		SKTC_ASSERT_ERR(!error);
579 		assert(slotsize != -1);
580 		assert(slotsize == sktc_nexus_slotsize);
581 
582 		os_channel_attr_destroy(attr);
583 	}
584 
585 	ringid = os_channel_ring_id(sktc_channel, CHANNEL_FIRST_TX_RING);
586 	ringid += index;
587 	txring = os_channel_tx_ring(sktc_channel, ringid);
588 	assert(txring);
589 
590 	while (!sktc_channel_worker_die) {
591 		uint32_t txavail;
592 
593 		txavail = os_channel_available_slot_count(txring);
594 
595 		/* Wait for outgoing space */
596 		if (!txavail) {
597 			error = kevent(sktc_txkqs[index], NULL, 0, &kev, 1, NULL);
598 			SKTC_ASSERT_ERR(error != -1);
599 			SKTC_ASSERT_ERR(error == 1);
600 
601 			if (kev.filter == EVFILT_USER) {
602 				assert(kev.ident == (uintptr_t)&sktc_channel_worker_die);
603 				assert(sktc_channel_worker_die);
604 				break;
605 			}
606 
607 			assert(kev.filter == EVFILT_WRITE);
608 			assert(kev.ident == os_channel_get_fd(sktc_channel));
609 			assert(kev.udata == NULL);
610 
611 			txavail = os_channel_available_slot_count(txring);
612 			if (!txavail && skywalk_in_driver) {
613 				T_LOG("%s: no tx slots available\n", __func__);
614 				continue;
615 			} else {
616 				assert(txavail);
617 			}
618 		}
619 
620 		sktc_chew_random(sktc_channel, txring, CHANNEL_SYNC_TX, true, txavail);
621 	}
622 
623 	return NULL;
624 }
625 
626 void
sktc_setup_channel_worker(uuid_t instance_uuid,nexus_port_t channel_port,ring_id_t ringid,char * key,size_t keylen,bool echo,bool defunct_ok)627 sktc_setup_channel_worker(uuid_t instance_uuid, nexus_port_t channel_port,
628     ring_id_t ringid, char *key, size_t keylen, bool echo, bool defunct_ok)
629 {
630 	channel_attr_t attr = NULL;
631 
632 	if (key) {
633 		attr = os_channel_attr_create();
634 		assert(attr);
635 		os_channel_attr_set_key(attr, key, keylen);
636 	}
637 
638 	sktc_channel = sktu_channel_create_extended(instance_uuid, channel_port,
639 	    CHANNEL_DIR_TX_RX, ringid, attr,
640 	    -1, -1, -1, -1, -1, -1, -1, defunct_ok ? 1 : -1, -1, -1);
641 	assert(sktc_channel);
642 
643 	if (attr) {
644 		os_channel_attr_destroy(attr);
645 	}
646 
647 	ring_id_t fringid = os_channel_ring_id(sktc_channel, CHANNEL_FIRST_TX_RING);
648 	ring_id_t lringid = os_channel_ring_id(sktc_channel, CHANNEL_LAST_TX_RING);
649 
650 	assert(!sktc_nrings);
651 	sktc_nrings = lringid - fringid + 1;
652 
653 	assert(!sktc_threads);
654 	sktc_threads = malloc(sktc_nrings * sizeof(sktc_threads[0]));
655 	assert(sktc_threads);
656 
657 	assert(!sktc_threads2);
658 	if (!echo) {
659 		sktc_threads2 = malloc(sktc_nrings * sizeof(sktc_threads[0]));
660 		assert(sktc_threads2);
661 	}
662 
663 	// Double check the rx rings are the same
664 	assert(fringid == os_channel_ring_id(sktc_channel, CHANNEL_FIRST_RX_RING));
665 	assert(lringid == os_channel_ring_id(sktc_channel, CHANNEL_LAST_RX_RING));
666 
667 	sktc_rxkqs = malloc(sktc_nrings * sizeof(sktc_rxkqs[0]));
668 	assert(sktc_rxkqs);
669 	sktc_txkqs = malloc(sktc_nrings * sizeof(sktc_txkqs[0]));
670 	assert(sktc_txkqs);
671 
672 	for (size_t i = 0; i < sktc_nrings; i++) {
673 		struct kevent kev;
674 		int error;
675 		int channelfd = os_channel_get_fd(sktc_channel);
676 		assert(channelfd != -1);
677 
678 		sktc_rxkqs[i] = kqueue();
679 		assert(sktc_rxkqs[i] != -1);
680 		EV_SET(&kev, channelfd, EVFILT_READ, EV_ADD | EV_ENABLE, 0, 0, NULL);
681 		error = kevent(sktc_rxkqs[i], &kev, 1, NULL, 0, NULL);
682 		SKTC_ASSERT_ERR(!error);
683 
684 		EV_SET(&kev, (uintptr_t)&sktc_channel_worker_die,
685 		    EVFILT_USER, EV_ADD | EV_ENABLE, 0, 0, NULL);
686 		error = kevent(sktc_rxkqs[i], &kev, 1, NULL, 0, NULL);
687 		SKTC_ASSERT_ERR(!error);
688 
689 		sktc_txkqs[i] = kqueue();
690 		assert(sktc_txkqs[i] != -1);
691 		EV_SET(&kev, channelfd, EVFILT_WRITE, EV_ADD | EV_ENABLE, 0, 0, NULL);
692 		error = kevent(sktc_txkqs[i], &kev, 1, NULL, 0, NULL);
693 		SKTC_ASSERT_ERR(!error);
694 
695 		EV_SET(&kev, (uintptr_t)&sktc_channel_worker_die,
696 		    EVFILT_USER, EV_ADD | EV_ENABLE, 0, 0, NULL);
697 		error = kevent(sktc_txkqs[i], &kev, 1, NULL, 0, NULL);
698 		SKTC_ASSERT_ERR(!error);
699 
700 		if (echo) {
701 			error = pthread_create(&sktc_threads[i], NULL, sktc_channel_worker_echo, &sktc_threads[i]);
702 			SKTC_ASSERT_ERR(!error);
703 		} else {
704 			error = pthread_create(&sktc_threads[i], NULL, sktc_channel_worker_source, &sktc_threads[i]);
705 			SKTC_ASSERT_ERR(!error);
706 			error = pthread_create(&sktc_threads2[i], NULL, sktc_channel_worker_sink, &sktc_threads[i]);
707 			SKTC_ASSERT_ERR(!error);
708 		}
709 	}
710 }
711 
712 void
sktc_cleanup_channel_worker(void)713 sktc_cleanup_channel_worker(void)
714 {
715 	sktc_channel_worker_die = 1;
716 
717 	for (size_t i = 0; i < sktc_nrings; i++) {
718 		struct kevent kev;
719 		int error;
720 
721 		EV_SET(&kev, (uintptr_t)&sktc_channel_worker_die,
722 		    EVFILT_USER, 0, NOTE_TRIGGER, 0, NULL);
723 		error = kevent(sktc_rxkqs[i], &kev, 1, NULL, 0, NULL);
724 		SKTC_ASSERT_ERR(!error);
725 		error = kevent(sktc_txkqs[i], &kev, 1, NULL, 0, NULL);
726 		SKTC_ASSERT_ERR(!error);
727 
728 		error = pthread_join(sktc_threads[i], NULL);
729 		SKTC_ASSERT_ERR(!error);
730 		if (sktc_threads2) {
731 			error = pthread_join(sktc_threads2[i], NULL);
732 			SKTC_ASSERT_ERR(!error);
733 		}
734 
735 		error = close(sktc_rxkqs[i]);
736 		SKTC_ASSERT_ERR(!error);
737 		error = close(sktc_txkqs[i]);
738 		SKTC_ASSERT_ERR(!error);
739 	}
740 
741 	sktc_channel_worker_die = 0;
742 
743 	free(sktc_rxkqs);
744 	sktc_rxkqs = NULL;
745 	free(sktc_txkqs);
746 	sktc_txkqs = NULL;
747 
748 	sktc_nrings = 0;
749 	free(sktc_threads);
750 	sktc_threads = NULL;
751 
752 	if (sktc_threads2) {
753 		free(sktc_threads2);
754 		sktc_threads2 = NULL;
755 	}
756 
757 	os_channel_destroy(sktc_channel);
758 	sktc_channel = NULL;
759 }
760 
761 
762 void
sktc_generic_upipe_nexus_init(void)763 sktc_generic_upipe_nexus_init(void)
764 {
765 	struct sktc_nexus_attr attr = SKTC_NEXUS_ATTR_INIT();
766 
767 	strncpy((char *)attr.name, "skywalk_test_generic_upipe",
768 	    sizeof(nexus_name_t) - 1);
769 	attr.type = NEXUS_TYPE_USER_PIPE;
770 	attr.anonymous = 1;
771 
772 	sktc_setup_nexus(&attr);
773 }
774 
775 void
sktc_generic_upipe_echo_init(void)776 sktc_generic_upipe_echo_init(void)
777 {
778 	sktc_generic_upipe_nexus_init();
779 	sktc_setup_channel_worker(sktc_instance_uuid, 1, CHANNEL_RING_ID_ANY,
780 	    NULL, 0, true, true);
781 }
782 
783 void
sktc_generic_upipe_null_init(void)784 sktc_generic_upipe_null_init(void)
785 {
786 	sktc_generic_upipe_nexus_init();
787 	sktc_setup_channel_worker(sktc_instance_uuid, 1, CHANNEL_RING_ID_ANY,
788 	    NULL, 0, false, true);
789 }
790 
791 void
sktc_generic_upipe_fini(void)792 sktc_generic_upipe_fini(void)
793 {
794 	sktc_cleanup_channel_worker();
795 	sktc_cleanup_nexus();
796 }
797 
798 
799 static int sktc_kpipe_loopback_was_enabled;
800 
801 void
sktc_generic_kpipe_init(void)802 sktc_generic_kpipe_init(void)
803 {
804 	int enabled = 1;
805 	size_t len = sizeof(sktc_kpipe_loopback_was_enabled);
806 
807 	assert(uuid_is_null(sktc_instance_uuid));
808 	sysctlbyname("kern.skywalk.kpipe.loopback.enabled",
809 	    &sktc_kpipe_loopback_was_enabled, &len, &enabled, sizeof(enabled));
810 
811 	len = sizeof(sktc_instance_uuid_string);
812 	sysctlbyname("kern.skywalk.kpipe.loopback.nx_uuid",
813 	    sktc_instance_uuid_string, &len, NULL, 0);
814 
815 	if (uuid_parse(sktc_instance_uuid_string, sktc_instance_uuid) ||
816 	    uuid_is_null(sktc_instance_uuid)) {
817 		T_LOG("loopback kpipe failed to start\n");
818 	}
819 }
820 
821 void
sktc_generic_kpipe_fini(void)822 sktc_generic_kpipe_fini(void)
823 {
824 	uuid_clear(sktc_instance_uuid);
825 	memset(sktc_instance_uuid_string, 0, sizeof(sktc_instance_uuid_string));
826 	sysctlbyname("kern.skywalk.kpipe.loopback.enabled",
827 	    NULL, 0, &sktc_kpipe_loopback_was_enabled, sizeof(sktc_kpipe_loopback_was_enabled));
828 }
829 
830 static int sktc_memory_test_was_enabled;
831 
832 void
sktc_generic_memory_init(void)833 sktc_generic_memory_init(void)
834 {
835 	size_t len = sizeof(sktc_memory_test_was_enabled);
836 	int enabled = 1;
837 
838 	sysctlbyname("kern.skywalk.mem.test", &sktc_memory_test_was_enabled,
839 	    &len, &enabled, sizeof(enabled));
840 }
841 
842 void
sktc_generic_memory_fini(void)843 sktc_generic_memory_fini(void)
844 {
845 	sysctlbyname("kern.skywalk.mem.test", NULL, 0,
846 	    &sktc_memory_test_was_enabled,
847 	    sizeof(sktc_memory_test_was_enabled));
848 }
849 
850 channel_slot_t
send_bytes(channel_ring_t txring,uint32_t count)851 send_bytes(channel_ring_t txring, uint32_t count)
852 {
853 	int error;
854 
855 	assert(txring);
856 
857 	slot_prop_t prop;
858 	channel_slot_t slot = os_channel_get_next_slot(txring, NULL, &prop);
859 	assert(slot);
860 
861 	assert(prop.sp_buf_ptr);
862 	assert(prop.sp_len == txring->chrd_ring->ring_def_buf_size);
863 
864 	memset((void *)prop.sp_buf_ptr, 0x55, count);
865 	prop.sp_len = count;
866 	os_channel_set_slot_properties(txring, slot, &prop);
867 
868 	error = os_channel_advance_slot(txring, slot);
869 	SKTC_ASSERT_ERR(!error);
870 
871 	return slot;
872 }
873 
874 int
chew_slots(channel_ring_t rxring,uint32_t max)875 chew_slots(channel_ring_t rxring, uint32_t max)
876 {
877 	channel_slot_t pslot, slot;
878 	struct slot_prop prop;
879 	int count = 0;
880 	int error;
881 
882 	assert(rxring);
883 
884 	slot = os_channel_get_next_slot(rxring, NULL, &prop);
885 	assert(slot);
886 
887 	do {
888 		pslot = slot;
889 		slot = os_channel_get_next_slot(rxring, slot, &prop);
890 		count++;
891 		if (max > 0 && count >= max) {
892 			break;
893 		}
894 	} while (slot != NULL);
895 
896 	error = os_channel_advance_slot(rxring, pslot);
897 	SKTC_ASSERT_ERR(!error);
898 
899 	return count;
900 }
901 
902 void
set_watermark(channel_t channel,bool is_tx,channel_threshold_unit_t unit,uint32_t value)903 set_watermark(channel_t channel, bool is_tx,
904     channel_threshold_unit_t unit, uint32_t value)
905 {
906 	channel_attr_t ch_attr = os_channel_attr_create();
907 	os_channel_read_attr(channel, ch_attr);
908 	if (is_tx) {
909 		os_channel_attr_set(ch_attr, CHANNEL_ATTR_TX_LOWAT_UNIT, unit);
910 		os_channel_attr_set(ch_attr, CHANNEL_ATTR_TX_LOWAT_VALUE, value);
911 	} else {
912 		os_channel_attr_set(ch_attr, CHANNEL_ATTR_RX_LOWAT_UNIT, unit);
913 		os_channel_attr_set(ch_attr, CHANNEL_ATTR_RX_LOWAT_VALUE, value);
914 	}
915 	os_channel_write_attr(channel, ch_attr);
916 	os_channel_attr_destroy(ch_attr);
917 }
918 
919 int
wait_on_fd(int kq,int16_t filter,channel_t channel,uint32_t lowat,enum timeout_behavior b)920 wait_on_fd(int kq, int16_t filter,
921     channel_t channel, uint32_t lowat, enum timeout_behavior b)
922 {
923 	const char *filt_name = (filter == EVFILT_READ) ? "READ" : "WRITE";
924 	struct kevent kev, evlist;
925 	int triggered_events;
926 	struct timespec time_100ms = { .tv_sec = 0, .tv_nsec = 100000000 };
927 	struct timespec *timeout;
928 
929 	bzero(&kev, sizeof(kev));
930 	bzero(&evlist, sizeof(evlist));
931 
932 	EV_SET(
933 		&kev, 0, filter, EV_ADD | EV_ENABLE | EV_ONESHOT,
934 		(lowat > 0) ? NOTE_LOWAT : 0, lowat,
935 		NULL
936 		);
937 
938 	if (b != TIMEOUT_DISABLE) {
939 		timeout = &time_100ms;
940 	} else {
941 		timeout = NULL;
942 	}
943 
944 	T_LOG("Sleeping with kevent (%s)...", filt_name);
945 	fflush(stdout);
946 
947 	kev.ident = os_channel_get_fd(channel);
948 	triggered_events = kevent(kq, &kev, 1, &evlist, 1, timeout);
949 	T_LOG("...kevent (%s) woke up with return=%i, data=%li\n",
950 	    filt_name, triggered_events, evlist.data);
951 
952 	if (evlist.flags & EV_ERROR) {
953 		T_LOG("kevent (%s) encountered error %li\n", filt_name, evlist.data);
954 		assert(0);
955 	}
956 
957 	if (b == TIMEOUT_EXPECT) {
958 		assert(triggered_events == 0);
959 	} else if (b == TIMEOUT_FAIL) {
960 		assert(triggered_events == 1);
961 	}
962 
963 	return evlist.data;
964 }
965 
966 void
test_stage_init(struct stage_ctx * stage,uint32_t start)967 test_stage_init(struct stage_ctx *stage, uint32_t start)
968 {
969 	int error;
970 	stage->test_stage = start;
971 	error = pthread_mutex_init(&stage->change_mtx, NULL);
972 	SKTC_ASSERT_ERR(error == 0);
973 	error = pthread_cond_init(&stage->change_cond, NULL);
974 	SKTC_ASSERT_ERR(error == 0);
975 }
976 
977 void
test_stage_wait(struct stage_ctx * stage,uint32_t target)978 test_stage_wait(struct stage_ctx *stage, uint32_t target)
979 {
980 	pthread_mutex_lock(&stage->change_mtx);
981 
982 	while (stage->test_stage != target) {
983 		pthread_cond_wait(&stage->change_cond, &stage->change_mtx);
984 	}
985 
986 	pthread_mutex_unlock(&stage->change_mtx);
987 }
988 
989 void
test_stage_change(struct stage_ctx * stage,uint32_t new)990 test_stage_change(struct stage_ctx *stage, uint32_t new)
991 {
992 	pthread_mutex_lock(&stage->change_mtx);
993 
994 	T_LOG("Test stage changed from %u to %u\n", stage->test_stage, new);
995 	stage->test_stage = new;
996 	pthread_cond_signal(&stage->change_cond);
997 
998 	pthread_mutex_unlock(&stage->change_mtx);
999 }
1000 
1001 void
test_stage_destroy(struct stage_ctx * stage)1002 test_stage_destroy(struct stage_ctx *stage)
1003 {
1004 	int error;
1005 	error = pthread_mutex_destroy(&stage->change_mtx);
1006 	SKTC_ASSERT_ERR(error == 0);
1007 	error = pthread_cond_destroy(&stage->change_cond);
1008 	SKTC_ASSERT_ERR(error == 0);
1009 }
1010 
1011 static int
inet_dgram_socket(void)1012 inet_dgram_socket(void)
1013 {
1014 	int     s;
1015 
1016 	s = socket(AF_INET, SOCK_DGRAM, 0);
1017 	if (s < 0) {
1018 		SKT_LOG("socket: %s\n", strerror(errno));
1019 	}
1020 	return s;
1021 }
1022 
1023 static int
inet6_dgram_socket(void)1024 inet6_dgram_socket(void)
1025 {
1026 	int     s;
1027 
1028 	s = socket(AF_INET6, SOCK_DGRAM, 0);
1029 	if (s < 0) {
1030 		SKT_LOG("socket: %s\n", strerror(errno));
1031 	}
1032 	return s;
1033 }
1034 
1035 bool
sktc_get_netif_nexus(const char * ifname,uuid_t netif)1036 sktc_get_netif_nexus(const char *ifname, uuid_t netif)
1037 {
1038 	bool                    found = FALSE;
1039 	struct if_nexusreq      ifnr;
1040 	int                     s;
1041 
1042 	s = inet_dgram_socket();
1043 	if (s >= 0) {
1044 		bzero((char *)&ifnr, sizeof(ifnr));
1045 		strlcpy(ifnr.ifnr_name, ifname, sizeof(ifnr.ifnr_name));
1046 		if (ioctl(s, SIOCGIFNEXUS, &ifnr) >= 0) {
1047 			uuid_copy(netif, ifnr.ifnr_netif);
1048 			found = TRUE;
1049 		}
1050 		close(s);
1051 	}
1052 	return found;
1053 }
1054 
1055 bool
sktc_get_flowswitch_nexus(const char * ifname,uuid_t netif)1056 sktc_get_flowswitch_nexus(const char *ifname, uuid_t netif)
1057 {
1058 	bool                    found = FALSE;
1059 	struct if_nexusreq      ifnr;
1060 	int                     s;
1061 
1062 	s = inet_dgram_socket();
1063 	if (s >= 0) {
1064 		bzero((char *)&ifnr, sizeof(ifnr));
1065 		strlcpy(ifnr.ifnr_name, ifname, sizeof(ifnr.ifnr_name));
1066 		if (ioctl(s, SIOCGIFNEXUS, &ifnr) >= 0) {
1067 			uuid_copy(netif, ifnr.ifnr_flowswitch);
1068 			found = TRUE;
1069 		}
1070 		close(s);
1071 	}
1072 	return found;
1073 }
1074 
1075 int
sktc_get_mac_addr(const char * ifname,uint8_t * addr)1076 sktc_get_mac_addr(const char *ifname, uint8_t *addr)
1077 {
1078 	int s, err = 0;
1079 	struct ifreq ifr;
1080 
1081 	s = inet_dgram_socket();
1082 	if (s < 0) {
1083 		return errno;
1084 	}
1085 	bzero(&ifr, sizeof(ifr));
1086 	(void) strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
1087 	ifr.ifr_addr.sa_family = AF_LINK;
1088 	ifr.ifr_addr.sa_len = ETHER_ADDR_LEN;
1089 	err = ioctl(s, SIOCGIFLLADDR, &ifr);
1090 	if (err < 0) {
1091 		(void) close(s);
1092 		return errno;
1093 	}
1094 	bcopy(ifr.ifr_addr.sa_data, addr, ETHER_ADDR_LEN);
1095 	(void) close(s);
1096 	return 0;
1097 }
1098 
1099 static int
sktc_ifnet_destroy(int s,const char * ifname)1100 sktc_ifnet_destroy(int s, const char * ifname)
1101 {
1102 	int             error = 0;
1103 	struct ifreq    ifr;
1104 
1105 	bzero(&ifr, sizeof(ifr));
1106 	strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
1107 	if (ioctl(s, SIOCIFDESTROY, &ifr) < 0) {
1108 		error = errno;
1109 		SKT_LOG("SIOCSIFDESTROY %s: %s\n", ifr.ifr_name,
1110 		    strerror(errno));
1111 	}
1112 	return error;
1113 }
1114 
1115 static int
sktc_ifnet_set_flags(int s,const char * ifname,uint16_t flags_set,uint16_t flags_clear)1116 sktc_ifnet_set_flags(int s, const char * ifname,
1117     uint16_t flags_set, uint16_t flags_clear)
1118 {
1119 	uint16_t        flags_after;
1120 	uint16_t        flags_before;
1121 	struct ifreq    ifr;
1122 	int             ret;
1123 
1124 	bzero(&ifr, sizeof(ifr));
1125 	strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
1126 	ret = ioctl(s, SIOCGIFFLAGS, (caddr_t)&ifr);
1127 	if (ret != 0) {
1128 		SKT_LOG("SIOCGIFFLAGS %s: %s\n",
1129 		    ifr.ifr_name, strerror(errno));
1130 		return ret;
1131 	}
1132 	flags_before = ifr.ifr_flags;
1133 	ifr.ifr_flags |= flags_set;
1134 	ifr.ifr_flags &= ~(flags_clear);
1135 	flags_after = ifr.ifr_flags;
1136 	if (flags_before == flags_after) {
1137 		/* nothing to do */
1138 		ret = 0;
1139 	} else {
1140 		/* issue the ioctl */
1141 		ret = ioctl(s, SIOCSIFFLAGS, &ifr);
1142 		if (ret != 0) {
1143 			SKT_LOG("SIOCSIFFLAGS %s 0x%x: %s\n",
1144 			    ifr.ifr_name, (uint16_t)ifr.ifr_flags,
1145 			    strerror(errno));
1146 		} else {
1147 			T_LOG(
1148 				"setflags(%s set 0x%x clear 0x%x) "
1149 				"0x%x => 0x%x\n",
1150 				ifr.ifr_name, flags_set, flags_clear,
1151 				flags_before, flags_after);
1152 		}
1153 	}
1154 	return ret;
1155 }
1156 
1157 /* On some platforms with DEBUG kernel, we need to wait a while */
1158 #define SIFCREATE_RETRY 100
1159 
1160 static int
sktc_ifnet_create(int s,const char * ifname)1161 sktc_ifnet_create(int s, const char * ifname)
1162 {
1163 	int             error = 0;
1164 	struct ifreq    ifr;
1165 
1166 	bzero(&ifr, sizeof(ifr));
1167 	strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
1168 
1169 	for (int i = 0; i < SIFCREATE_RETRY; i++) {
1170 		if (ioctl(s, SIOCIFCREATE, &ifr) < 0) {
1171 			error = errno;
1172 			T_LOG("SIOCSIFCREATE %s: %s\n", ifname,
1173 			    strerror(error));
1174 			if (error == EBUSY) {
1175 				/* interface is tearing down, try again */
1176 				usleep(100000);
1177 			} else if (error == EEXIST) {
1178 				/* interface exists, try destroying it */
1179 				(void)sktc_ifnet_destroy(s, ifname);
1180 			} else {
1181 				/* unexpected failure */
1182 				break;
1183 			}
1184 		} else {
1185 			error = 0;
1186 			break;
1187 		}
1188 	}
1189 	if (error == 0) {
1190 		error = sktc_ifnet_set_flags(s, ifname, IFF_UP, 0);
1191 	}
1192 	return error;
1193 }
1194 
1195 static int
sktc_ifnet_create_with_type(int s,const char * ifname,uint8_t type)1196 sktc_ifnet_create_with_type(int s, const char * ifname, uint8_t type)
1197 {
1198 	int             error = 0;
1199 	struct if_redirect_create_params ircp = {};
1200 	struct ifreq ifr = {};
1201 
1202 	bzero(&ircp, sizeof(ircp));
1203 	bzero(&ifr, sizeof(ifr));
1204 	strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
1205 
1206 	ircp.ircp_type = RD_CREATE_PARAMS_TYPE_NOATTACH;
1207 	ircp.ircp_len = sizeof(ircp);
1208 
1209 	switch (type) {
1210 	case RD_IF_TYPE_ETHERNET:
1211 		ircp.ircp_ftype = IFRTYPE_FAMILY_ETHERNET;
1212 		break;
1213 	case RD_IF_TYPE_CELLULAR:
1214 		ircp.ircp_ftype = IFRTYPE_FAMILY_CELLULAR;
1215 		break;
1216 	default:
1217 		ircp.ircp_ftype = IFRTYPE_FAMILY_ETHERNET;
1218 	}
1219 
1220 	ifr.ifr_data = (caddr_t)&ircp;
1221 
1222 	for (int i = 0; i < SIFCREATE_RETRY; i++) {
1223 		if (ioctl(s, SIOCIFCREATE2, &ifr) < 0) {
1224 			error = errno;
1225 			T_LOG("SIOCSIFCREATE2 %s: %s\n", ifname,
1226 			    strerror(error));
1227 			if (error == EBUSY) {
1228 				/* interface is tearing down, try again */
1229 				usleep(100000);
1230 			} else if (error == EEXIST) {
1231 				/* interface exists, try destroying it */
1232 				(void)sktc_ifnet_destroy(s, ifname);
1233 			} else {
1234 				/* unexpected failure */
1235 				break;
1236 			}
1237 		} else {
1238 			error = 0;
1239 			break;
1240 		}
1241 	}
1242 	if (error == 0) {
1243 		error = sktc_ifnet_set_flags(s, ifname, IFF_UP, 0);
1244 	}
1245 	return error;
1246 }
1247 
1248 int
sktc_ifnet_feth_create(int unit)1249 sktc_ifnet_feth_create(int unit)
1250 {
1251 	int             error = 0;
1252 	int             s;
1253 
1254 	s = inet_dgram_socket();
1255 	if (s < 0) {
1256 		error = errno;
1257 	} else {
1258 		char    ifname[IFNAMSIZ];
1259 
1260 		sktc_feth_name_for_unit(ifname, sizeof(ifname), unit);
1261 		error = sktc_ifnet_create(s, ifname);
1262 		close(s);
1263 	}
1264 	SKTC_ASSERT_ERR(error == 0);
1265 	return error;
1266 }
1267 
1268 int
sktc_ifnet_feth_destroy(int unit)1269 sktc_ifnet_feth_destroy(int unit)
1270 {
1271 	int             error = 0;
1272 	int             s;
1273 
1274 	s = inet_dgram_socket();
1275 	if (s < 0) {
1276 		error = errno;
1277 	} else {
1278 		char    ifname[IFNAMSIZ];
1279 
1280 		sktc_feth_name_for_unit(ifname, sizeof(ifname), unit);
1281 		error = sktc_ifnet_destroy(s, ifname);
1282 		close(s);
1283 	}
1284 	return error;
1285 }
1286 
1287 void
sktc_ifnet_feth0_create(void)1288 sktc_ifnet_feth0_create(void)
1289 {
1290 	sktc_feth_set_flags(FETH_FLAGS_TXSTART);
1291 	sktc_ifnet_feth_create(0);
1292 	sktc_feth_restore_flags();
1293 }
1294 
1295 void
sktc_ifnet_feth0_destroy(void)1296 sktc_ifnet_feth0_destroy(void)
1297 {
1298 	sktc_ifnet_feth_destroy(0);
1299 }
1300 
1301 void
sktc_ifnet_feth1_create(void)1302 sktc_ifnet_feth1_create(void)
1303 {
1304 	sktc_feth_set_flags(FETH_FLAGS_TXSTART);
1305 	sktc_ifnet_feth_create(1);
1306 	sktc_feth_restore_flags();
1307 }
1308 
1309 void
sktc_ifnet_feth1_destroy(void)1310 sktc_ifnet_feth1_destroy(void)
1311 {
1312 	sktc_ifnet_feth_destroy(1);
1313 }
1314 
1315 void
sktc_ifnet_feth0_1_create(void)1316 sktc_ifnet_feth0_1_create(void)
1317 {
1318 	sktc_feth_set_flags(FETH_FLAGS_TXSTART);
1319 	sktc_ifnet_feth_create(0);
1320 	sktc_ifnet_feth_create(1);
1321 	sktc_feth_restore_flags();
1322 }
1323 
1324 void
sktc_ifnet_feth0_1_destroy(void)1325 sktc_ifnet_feth0_1_destroy(void)
1326 {
1327 	sktc_ifnet_feth_destroy(0);
1328 	sktc_ifnet_feth_destroy(1);
1329 }
1330 
1331 static int
fake_set_dequeue_stall(const char * feth,boolean_t enable)1332 fake_set_dequeue_stall(const char *feth, boolean_t enable)
1333 {
1334 	struct ifdrv                    ifd;
1335 	struct if_fake_request          iffr;
1336 	int                             ret = 0;
1337 	int                             s;
1338 
1339 	s = inet_dgram_socket();
1340 	if (s < 0) {
1341 		return errno;
1342 	}
1343 
1344 	bzero((char *)&ifd, sizeof(ifd));
1345 	bzero((char *)&iffr, sizeof(iffr));
1346 	strlcpy(ifd.ifd_name, feth, sizeof(ifd.ifd_name));
1347 	ifd.ifd_cmd = IF_FAKE_S_CMD_SET_DEQUEUE_STALL;
1348 	ifd.ifd_len = sizeof(iffr);
1349 	ifd.ifd_data = &iffr;
1350 	iffr.iffr_dequeue_stall = enable ? 1 : 0;
1351 
1352 	if (ioctl(s, SIOCSDRVSPEC, &ifd) < 0) {
1353 		SKT_LOG("SIOCDRVSPEC set dequeue stall %s %d failed, "
1354 		    "%s\n", feth, iffr.iffr_dequeue_stall, strerror(errno));
1355 		ret = errno;
1356 	}
1357 	close(s);
1358 	return ret;
1359 }
1360 
1361 int
sktc_ifnet_feth0_set_dequeue_stall(boolean_t enable)1362 sktc_ifnet_feth0_set_dequeue_stall(boolean_t enable)
1363 {
1364 	return fake_set_dequeue_stall(FETH0_NAME, enable);
1365 }
1366 
1367 int
sktc_ifnet_feth1_set_dequeue_stall(boolean_t enable)1368 sktc_ifnet_feth1_set_dequeue_stall(boolean_t enable)
1369 {
1370 	return fake_set_dequeue_stall(FETH1_NAME, enable);
1371 }
1372 
1373 static void
fake_set_peer(int s,const char * feth,const char * feth_peer)1374 fake_set_peer(int s, const char * feth, const char * feth_peer)
1375 {
1376 	struct ifdrv                    ifd;
1377 	struct if_fake_request          iffr;
1378 
1379 	bzero((char *)&ifd, sizeof(ifd));
1380 	bzero((char *)&iffr, sizeof(iffr));
1381 	strlcpy(ifd.ifd_name, feth, sizeof(ifd.ifd_name));
1382 	ifd.ifd_cmd = IF_FAKE_S_CMD_SET_PEER;
1383 	ifd.ifd_len = sizeof(iffr);
1384 	ifd.ifd_data = &iffr;
1385 	if (feth_peer != NULL) {
1386 		strlcpy(iffr.iffr_peer_name, feth_peer,
1387 		    sizeof(iffr.iffr_peer_name));
1388 	}
1389 	if (ioctl(s, SIOCSDRVSPEC, &ifd) < 0) {
1390 		SKT_LOG("SIOCDRVSPEC set peer %s %s failed, %s\n",
1391 		    feth, (feth_peer != NULL) ? feth_peer : "<none>",
1392 		    strerror(errno));
1393 	}
1394 	return;
1395 }
1396 
1397 static int fake_txstart_value;
1398 static int fake_bsd_mode_value;
1399 static int fake_llink_cnt_value;
1400 static int fake_wmm_mode_value;
1401 static int fake_multi_buflet_value;
1402 static int fake_pktpool_mode_value;
1403 static int fake_tx_headroom_value;
1404 static int fake_nxattach_value;
1405 static int fake_user_access_value;
1406 static char fake_sk_ll_prefix[IFNAMSIZ - 1] = {'\0'};
1407 static int fake_fcs_value;
1408 static int fake_trailer_length_value;
1409 
1410 static void
sktc_feth_set_flags(uint32_t flags)1411 sktc_feth_set_flags(uint32_t flags)
1412 {
1413 	int     bsd_mode;
1414 	int     llink_cnt;
1415 	int     wmm_mode;
1416 	int     pp_mode;
1417 	int     multi_buflet;
1418 	int     error;
1419 	size_t  len = sizeof(fake_txstart_value);
1420 	int     nxattach;
1421 	int     txstart;
1422 	uint32_t tx_headroom;
1423 	int     user_access;
1424 	int     fcs;
1425 	int     trailer_length;
1426 	char    ifname[IFNAMSIZ - 1] = "feth";
1427 
1428 	/* make sure bsd mode is set correctly */
1429 	bsd_mode = ((flags & FETH_FLAGS_NATIVE) == 0);
1430 	error = sysctlbyname("net.link.fake.bsd_mode",
1431 	    &fake_bsd_mode_value, &len,
1432 	    &bsd_mode, sizeof(bsd_mode));
1433 	if (error != 0) {
1434 		SKT_LOG(
1435 			"sysctlbyname(net.link.fake.bsd_mode) failed, %s\n",
1436 			strerror(errno));
1437 	}
1438 	SKTC_ASSERT_ERR(error == 0);
1439 
1440 	/* set the number of logical links */
1441 	if ((flags & FETH_FLAGS_LLINK) != 0) {
1442 		llink_cnt = 1;
1443 	} else if ((flags & FETH_FLAGS_MULTI_LLINK) != 0) {
1444 		llink_cnt = 4;
1445 	} else {
1446 		llink_cnt = 0;
1447 	}
1448 	error = sysctlbyname("net.link.fake.llink_cnt",
1449 	    &fake_llink_cnt_value, &len,
1450 	    &llink_cnt, sizeof(llink_cnt));
1451 	if (error != 0) {
1452 		SKT_LOG(
1453 			"sysctlbyname(net.link.fake.llink_cnt) failed, %s\n",
1454 			strerror(errno));
1455 	}
1456 	SKTC_ASSERT_ERR(error == 0);
1457 
1458 	/* make sure feth TXSTART is set correctly */
1459 	txstart = ((flags & FETH_FLAGS_TXSTART) != 0) ? 1 : 0;
1460 	error = sysctlbyname("net.link.fake.txstart",
1461 	    &fake_txstart_value, &len,
1462 	    &txstart, sizeof(txstart));
1463 	if (error != 0) {
1464 		SKT_LOG(
1465 			"sysctlbyname(net.link.fake.txstart) failed, %s\n",
1466 			strerror(errno));
1467 	}
1468 	SKTC_ASSERT_ERR(error == 0);
1469 
1470 	/* make sure wmm mode is set correctly */
1471 	wmm_mode = ((flags & FETH_FLAGS_WMM) != 0) ? 1 : 0;
1472 	error = sysctlbyname("net.link.fake.wmm_mode",
1473 	    &fake_wmm_mode_value, &len,
1474 	    &wmm_mode, sizeof(wmm_mode));
1475 	if (error != 0) {
1476 		SKT_LOG(
1477 			"sysctlbyname(net.link.fake.wmm_mode) failed, %s\n",
1478 			strerror(errno));
1479 	}
1480 	SKTC_ASSERT_ERR(error == 0);
1481 
1482 	/* enable multi-buflet mode if requested */
1483 	multi_buflet = ((flags & FETH_FLAGS_MULTI_BUFLET) != 0) ? 1 : 0;
1484 	error = sysctlbyname("net.link.fake.multibuflet",
1485 	    &fake_multi_buflet_value, &len,
1486 	    &multi_buflet, sizeof(multi_buflet));
1487 	if (error != 0) {
1488 		SKT_LOG(
1489 			"sysctlbyname(net.link.fake.multibuflet) failed, %s\n",
1490 			strerror(errno));
1491 	}
1492 	SKTC_ASSERT_ERR(error == 0);
1493 
1494 	/* enable user-acess mode if requested */
1495 	user_access = ((flags & FETH_FLAGS_USER_ACCESS) != 0) ? 1 : 0;
1496 	error = sysctlbyname("net.link.fake.user_access",
1497 	    &fake_user_access_value, &len,
1498 	    &user_access, sizeof(user_access));
1499 	if (error != 0) {
1500 		SKT_LOG(
1501 			"sysctlbyname(net.link.fake.user_access) failed, %s\n",
1502 			strerror(errno));
1503 	}
1504 	SKTC_ASSERT_ERR(error == 0);
1505 
1506 	/* change netif IPv6 ULA ifname prefix if requested */
1507 	if ((flags & FETH_FLAGS_LOW_LATENCY) != 0) {
1508 		len = sizeof(fake_sk_ll_prefix);
1509 		error =
1510 		    sysctlbyname("kern.skywalk.netif.sk_ll_prefix",
1511 		    fake_sk_ll_prefix, &len, ifname, sizeof(ifname));
1512 		if (error != 0) {
1513 			SKT_LOG("sysctlbyname failed "
1514 			    "for (sk_ll_prefix) %s\n",
1515 			    strerror(errno));
1516 		}
1517 	}
1518 	SKTC_ASSERT_ERR(error == 0);
1519 
1520 	/* enable copy mode if requested */
1521 	assert((flags & (FETH_FLAGS_NONSHAREDPOOL |
1522 	    FETH_FLAGS_NONSHAREDSPLITPOOLS)) != (FETH_FLAGS_NONSHAREDPOOL |
1523 	    FETH_FLAGS_NONSHAREDSPLITPOOLS));
1524 	pp_mode = ((flags & FETH_FLAGS_NONSHAREDPOOL) != 0) ? 1 : 0;
1525 	pp_mode = ((flags & FETH_FLAGS_NONSHAREDSPLITPOOLS) != 0) ? 2 : pp_mode;
1526 	len = sizeof(fake_pktpool_mode_value);
1527 	error = sysctlbyname("net.link.fake.pktpool_mode",
1528 	    &fake_pktpool_mode_value, &len, &pp_mode, sizeof(pp_mode));
1529 	if (error != 0) {
1530 		SKT_LOG(
1531 			"sysctlbyname(net.link.fake.pktpool_mode) failed, %s\n",
1532 			strerror(errno));
1533 	}
1534 	if ((flags & FETH_FLAGS_TX_HEADROOM) != 0) {
1535 #define FETH_HEADROOM_MUL_8_MAX         4
1536 		srand(time(NULL));
1537 		tx_headroom = (rand() % FETH_HEADROOM_MUL_8_MAX) * 8;
1538 		T_LOG("Using TX headroom %u\n", tx_headroom);
1539 	} else {
1540 		tx_headroom = 0;
1541 	}
1542 	SKTC_ASSERT_ERR(error == 0);
1543 	len = sizeof(fake_tx_headroom_value);
1544 	error = sysctlbyname("net.link.fake.tx_headroom",
1545 	    &fake_tx_headroom_value, &len,
1546 	    &tx_headroom, sizeof(tx_headroom));
1547 	SKTC_ASSERT_ERR(error == 0);
1548 
1549 	nxattach = ((flags & FETH_FLAGS_NXATTACH) != 0) ? 1 : 0;
1550 	len = sizeof(fake_nxattach_value);
1551 	error = sysctlbyname("net.link.fake.nxattach",
1552 	    &fake_nxattach_value, &len,
1553 	    &nxattach, sizeof(nxattach));
1554 	SKTC_ASSERT_ERR(error == 0);
1555 
1556 	fcs = ((flags & FETH_FLAGS_FCS) != 0) ? 1 : 0;
1557 	len = sizeof(fake_fcs_value);
1558 	error = sysctlbyname("net.link.fake.fcs",
1559 	    &fake_fcs_value, &len,
1560 	    &fcs, sizeof(fcs));
1561 	SKTC_ASSERT_ERR(error == 0);
1562 
1563 	trailer_length = ((flags & FETH_FLAGS_TRAILER) != 0) ? 2 : 0;
1564 	len = sizeof(fake_trailer_length_value);
1565 	error = sysctlbyname("net.link.fake.trailer_length",
1566 	    &fake_trailer_length_value, &len,
1567 	    &trailer_length, sizeof(trailer_length));
1568 	SKTC_ASSERT_ERR(error == 0);
1569 }
1570 
1571 static void
sktc_feth_restore_flags(void)1572 sktc_feth_restore_flags(void)
1573 {
1574 	int error;
1575 
1576 	error = sysctlbyname("net.link.fake.txstart",
1577 	    NULL, 0,
1578 	    &fake_txstart_value, sizeof(fake_txstart_value));
1579 	if (error != 0) {
1580 		SKT_LOG(
1581 			"sysctlbyname(net.link.fake.txstart) failed, %s\n",
1582 			strerror(errno));
1583 	}
1584 	error = sysctlbyname("net.link.fake.bsd_mode",
1585 	    NULL, 0,
1586 	    &fake_bsd_mode_value, sizeof(fake_bsd_mode_value));
1587 	if (error != 0) {
1588 		SKT_LOG(
1589 			"sysctlbyname(net.link.fake.bsd_mode) failed, %s\n",
1590 			strerror(errno));
1591 	}
1592 	error = sysctlbyname("net.link.fake.llink_cnt",
1593 	    NULL, 0,
1594 	    &fake_llink_cnt_value, sizeof(fake_llink_cnt_value));
1595 	if (error != 0) {
1596 		SKT_LOG(
1597 			"sysctlbyname(net.link.fake.llink) failed, %s\n",
1598 			strerror(errno));
1599 	}
1600 	error = sysctlbyname("net.link.fake.wmm_mode",
1601 	    NULL, 0,
1602 	    &fake_wmm_mode_value, sizeof(fake_wmm_mode_value));
1603 	if (error != 0) {
1604 		SKT_LOG(
1605 			"sysctlbyname(net.link.fake.wmm_mode) failed, %s\n",
1606 			strerror(errno));
1607 	}
1608 	error = sysctlbyname("net.link.fake.multibuflet", NULL, 0,
1609 	    &fake_multi_buflet_value, sizeof(fake_multi_buflet_value));
1610 	if (error != 0) {
1611 		SKT_LOG(
1612 			"sysctlbyname(net.link.fake.multibuflet) failed, %s\n",
1613 			strerror(errno));
1614 	}
1615 	error = sysctlbyname("net.link.fake.pktpool_mode", NULL, 0,
1616 	    &fake_pktpool_mode_value, sizeof(fake_pktpool_mode_value));
1617 	if (error != 0) {
1618 		SKT_LOG(
1619 			"sysctlbyname(net.link.fake.pktpool_mode) failed, %s\n",
1620 			strerror(errno));
1621 	}
1622 	error = sysctlbyname("net.link.fake.tx_headroom",
1623 	    NULL, 0,
1624 	    &fake_tx_headroom_value, sizeof(fake_tx_headroom_value));
1625 	if (error != 0) {
1626 		SKT_LOG(
1627 			"sysctlbyname(net.link.fake.tx_headroom) failed, %s\n",
1628 			strerror(errno));
1629 	}
1630 	error = sysctlbyname("net.link.fake.user_access", NULL, 0,
1631 	    &fake_user_access_value, sizeof(fake_user_access_value));
1632 	if (error != 0) {
1633 		SKT_LOG(
1634 			"sysctlbyname(net.link.fake.user_access) failed, %s\n",
1635 			strerror(errno));
1636 	}
1637 	if (strlen(fake_sk_ll_prefix) != 0) {
1638 		error =
1639 		    sysctlbyname("kern.skywalk.netif.sk_ll_prefix",
1640 		    NULL, 0, &fake_sk_ll_prefix,
1641 		    sizeof(fake_sk_ll_prefix));
1642 		if (error != 0) {
1643 			SKT_LOG(
1644 				"sysctlbyname(sk_ll_prefix) failed, %s\n",
1645 				strerror(errno));
1646 		}
1647 	}
1648 	error = sysctlbyname("net.link.fake.nxattach",
1649 	    NULL, 0,
1650 	    &fake_nxattach_value, sizeof(fake_nxattach_value));
1651 	SKTC_ASSERT_ERR(error == 0);
1652 	error = sysctlbyname("net.link.fake.fcs",
1653 	    NULL, 0,
1654 	    &fake_fcs_value, sizeof(fake_fcs_value));
1655 	SKTC_ASSERT_ERR(error == 0);
1656 	error = sysctlbyname("net.link.fake.trailer_length",
1657 	    NULL, 0,
1658 	    &fake_trailer_length_value, sizeof(fake_trailer_length_value));
1659 	SKTC_ASSERT_ERR(error == 0);
1660 }
1661 
1662 void
sktc_ifnet_feth_pair_create(uint32_t flags)1663 sktc_ifnet_feth_pair_create(uint32_t flags)
1664 {
1665 	int             error;
1666 	struct in_addr  feth0_addr;
1667 	struct in_addr  feth1_addr;
1668 	struct in_addr  mask;
1669 	int             s;
1670 
1671 	s = inet_dgram_socket();
1672 	if (s < 0) {
1673 		return;
1674 	}
1675 
1676 	/* create feth0, feth1 using flags */
1677 	sktc_feth_set_flags(flags);
1678 	error = sktc_ifnet_create(s, FETH0_NAME);
1679 	if (error == 0) {
1680 		error = sktc_ifnet_create(s, FETH1_NAME);
1681 	}
1682 	sktc_feth_restore_flags();
1683 	if (error != 0) {
1684 		return;
1685 	}
1686 
1687 	/* set them as peers */
1688 	fake_set_peer(s, FETH0_NAME, FETH1_NAME);
1689 
1690 	/* subnet mask 255.255.255.0 */
1691 	mask.s_addr = htonl(IN_CLASSC_NET);
1692 
1693 	/* assign feth0 IP */
1694 	feth0_addr = sktc_feth0_in_addr();
1695 	error = sktc_ifnet_add_addr_with_socket(s, FETH0_NAME, &feth0_addr,
1696 	    &mask, NULL);
1697 	if (error != 0) {
1698 		return;
1699 	}
1700 
1701 	/* assign feth1 IP */
1702 	feth1_addr = sktc_feth1_in_addr();
1703 	error = sktc_ifnet_add_addr_with_socket(s, FETH1_NAME, &feth1_addr,
1704 	    &mask, NULL);
1705 	if (error != 0) {
1706 		return;
1707 	}
1708 
1709 	/* add feth0 scoped default route */
1710 	error = sktc_ifnet_add_scoped_default_route(FETH0_NAME, feth0_addr);
1711 	if (error != 0) {
1712 		T_LOG("Failed to add default route for feth0, %s\n",
1713 		    strerror(error));
1714 		return;
1715 	}
1716 
1717 	/* add feth1 scoped default route */
1718 	error = sktc_ifnet_add_scoped_default_route(FETH1_NAME, feth1_addr);
1719 	if (error != 0) {
1720 		T_LOG("Failed to add default route for feth1, %s\n",
1721 		    strerror(error));
1722 		return;
1723 	}
1724 }
1725 
1726 void
sktc_ifnet_feth_pair_destroy(void)1727 sktc_ifnet_feth_pair_destroy(void)
1728 {
1729 	sktc_feth_restore_flags();
1730 	sktc_ifnet_feth0_1_destroy();
1731 }
1732 
1733 static void
redirect_set_delegate(int s,const char * redirect,const char * delegate)1734 redirect_set_delegate(int s, const char *redirect, const char *delegate)
1735 {
1736 	struct ifdrv                    ifd;
1737 	struct if_redirect_request      iffr;
1738 
1739 	bzero((char *)&ifd, sizeof(ifd));
1740 	bzero((char *)&iffr, sizeof(iffr));
1741 
1742 	strlcpy(ifd.ifd_name, redirect, sizeof(ifd.ifd_name));
1743 	ifd.ifd_cmd = RD_S_CMD_SET_DELEGATE;
1744 	ifd.ifd_len = sizeof(iffr);
1745 	ifd.ifd_data = &iffr;
1746 	if (delegate != NULL) {
1747 		strlcpy(iffr.ifrr_delegate_name, delegate,
1748 		    sizeof(iffr.ifrr_delegate_name));
1749 	}
1750 	if (ioctl(s, SIOCSDRVSPEC, &ifd) < 0) {
1751 		SKT_LOG("SIOCDRVSPEC set delegate %s %s failed, %s\n",
1752 		    redirect, (delegate != NULL) ? delegate : "<none>",
1753 		    strerror(errno));
1754 	}
1755 	return;
1756 }
1757 
1758 void
sktc_ifnet_rd_create(void)1759 sktc_ifnet_rd_create(void)
1760 {
1761 	int             error;
1762 	struct in_addr  rd_addr;
1763 	struct in_addr  mask;
1764 	int             s;
1765 
1766 	s = inet_dgram_socket();
1767 	if (s < 0) {
1768 		return;
1769 	}
1770 
1771 	/* create rd0 using flags */
1772 	error = sktc_ifnet_create_with_type(s, RD0_NAME, RD_IF_TYPE_ETHERNET);
1773 	if (error != 0) {
1774 		return;
1775 	}
1776 
1777 	/* subnet mask 255.255.255.0 */
1778 	mask.s_addr = htonl(IN_CLASSC_NET);
1779 
1780 	/* assign rd0 IP */
1781 	rd_addr = sktc_rd0_in_addr();
1782 	error = sktc_ifnet_add_addr_with_socket(s, RD0_NAME, &rd_addr,
1783 	    &mask, NULL);
1784 	if (error != 0) {
1785 		return;
1786 	}
1787 
1788 	/* add rd0 scoped default route */
1789 	error = sktc_ifnet_add_scoped_default_route(RD0_NAME, rd_addr);
1790 	if (error != 0) {
1791 		T_LOG("Failed to add default route for rd0, %s\n",
1792 		    strerror(error));
1793 		return;
1794 	}
1795 
1796 	/* set feth0 as delegate for rd0 */
1797 	redirect_set_delegate(s, RD0_NAME, FETH0_NAME);
1798 }
1799 
1800 void
sktc_ifnet_rd_destroy(void)1801 sktc_ifnet_rd_destroy(void)
1802 {
1803 	int             error = 0;
1804 	int             s;
1805 
1806 	s = inet_dgram_socket();
1807 	if (s < 0) {
1808 		error = errno;
1809 	} else {
1810 		error = sktc_ifnet_destroy(s, RD0_NAME);
1811 		close(s);
1812 	}
1813 	return;
1814 }
1815 
1816 static short
sktc_ifnet_get_flags(int s,char * ifname)1817 sktc_ifnet_get_flags(int s, char * ifname)
1818 {
1819 	struct ifreq    ifr;
1820 	u_int           flags;
1821 
1822 	flags = 0;
1823 	bzero(&ifr, sizeof(ifr));
1824 	strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
1825 	if (ioctl(s, SIOCGIFFLAGS, (caddr_t)&ifr) < 0) {
1826 		SKT_LOG("SIOCIFFLAGS(%s) failed, %s\n", ifname,
1827 		    strerror(errno));
1828 	} else {
1829 		flags = ifr.ifr_flags;
1830 	}
1831 	return flags;
1832 }
1833 
1834 #define SIOCAIFADDR_RETRY 100
1835 static int
sktc_ifnet_add_addr_with_socket(int s,char * ifname,struct in_addr * addr,struct in_addr * mask,struct in_addr * broadaddr)1836 sktc_ifnet_add_addr_with_socket(int s, char *ifname, struct in_addr *addr,
1837     struct in_addr *mask, struct in_addr *broadaddr)
1838 {
1839 	struct sockaddr_in *sin;
1840 	struct ifaliasreq ifra;
1841 	int err = 0, tries = 0;
1842 
1843 	bzero(&ifra, sizeof(ifra));
1844 	(void) strncpy(ifra.ifra_name, ifname, sizeof(ifra.ifra_name));
1845 
1846 	if (addr != NULL) {
1847 		sin = (struct sockaddr_in *)&ifra.ifra_addr;
1848 		sin->sin_len = sizeof(*sin);
1849 		sin->sin_family = AF_INET;
1850 		sin->sin_addr = *addr;
1851 	}
1852 
1853 	if (mask != NULL) {
1854 		sin = (struct sockaddr_in *)&ifra.ifra_mask;
1855 		sin->sin_len = sizeof(*sin);
1856 		sin->sin_family = AF_INET;
1857 		sin->sin_addr = *mask;
1858 	}
1859 
1860 	if (broadaddr != NULL || (addr != NULL &&
1861 	    (sktc_ifnet_get_flags(s, ifname) & IFF_POINTOPOINT) != 0)) {
1862 		sin = (struct sockaddr_in *)&ifra.ifra_broadaddr;
1863 		sin->sin_len = sizeof(*sin);
1864 		sin->sin_family = AF_INET;
1865 		sin->sin_addr = (broadaddr != NULL) ? *broadaddr : *addr;
1866 	}
1867 
1868 retry:
1869 	if ((err = ioctl(s, SIOCAIFADDR, &ifra)) == -1) {
1870 		err = errno;
1871 		SKT_LOG("SIOCAIFADDR: %s\n", strerror(errno));
1872 		if (++tries < SIOCAIFADDR_RETRY) {
1873 			usleep(100000);
1874 			goto retry;
1875 		}
1876 	}
1877 	assert(err || sktu_check_interface_ipv4_address(ifname, addr->s_addr));
1878 	return err;
1879 }
1880 
1881 int
sktc_ifnet_add_addr(char * ifname,struct in_addr * addr,struct in_addr * mask,struct in_addr * broadaddr)1882 sktc_ifnet_add_addr(char *ifname, struct in_addr *addr, struct in_addr *mask,
1883     struct in_addr *broadaddr)
1884 {
1885 	int     s;
1886 	int     err;
1887 
1888 	s = inet_dgram_socket();
1889 	if (s < 0) {
1890 		return errno;
1891 	}
1892 	err = sktc_ifnet_add_addr_with_socket(s, ifname, addr, mask, broadaddr);
1893 	(void) close(s);
1894 	return err;
1895 }
1896 
1897 static void
in6_len2mask(struct in6_addr * mask,int len)1898 in6_len2mask(struct in6_addr * mask, int len)
1899 {
1900 	int i;
1901 	bzero(mask, sizeof(*mask));
1902 	for (i = 0; i < len / 8; i++) {
1903 		mask->s6_addr[i] = 0xff;
1904 	}
1905 	if (len % 8) {
1906 		mask->s6_addr[i] = (0xff00 >> (len % 8)) & 0xff;
1907 	}
1908 }
1909 
1910 static int
sktc_ifnet_add_addr6_with_socket(int s,char * ifname,struct in6_addr * addr,struct in6_addr * dstaddr,int prefix_len,int flags)1911 sktc_ifnet_add_addr6_with_socket(int s, char *ifname, struct in6_addr *addr,
1912     struct in6_addr *dstaddr, int prefix_len, int flags)
1913 {
1914 	struct sockaddr_in6 *sin;
1915 	struct in6_aliasreq ifra;
1916 	int err = 0, tries = 0;
1917 
1918 	bzero(&ifra, sizeof(ifra));
1919 	(void) strncpy(ifra.ifra_name, ifname, sizeof(ifra.ifra_name));
1920 
1921 	if (addr != NULL) {
1922 		sin = (struct sockaddr_in6 *)(&ifra.ifra_addr);
1923 		sin->sin6_family = AF_INET6;
1924 		sin->sin6_len = sizeof(*sin);
1925 		sin->sin6_addr = *addr;
1926 	}
1927 
1928 	if (dstaddr != NULL) {
1929 		sin = (struct sockaddr_in6 *)(&ifra.ifra_dstaddr);
1930 		sin->sin6_family = AF_INET6;
1931 		sin->sin6_len = sizeof(*sin);
1932 		sin->sin6_addr = *dstaddr;
1933 	}
1934 
1935 	if (prefix_len != 0) {
1936 		struct in6_addr prefixmask;
1937 		in6_len2mask(&prefixmask, prefix_len);
1938 
1939 		sin = (struct sockaddr_in6 *)&ifra.ifra_prefixmask;
1940 		sin->sin6_family = AF_INET6;
1941 		sin->sin6_len = sizeof(*sin);
1942 		sin->sin6_addr = prefixmask;
1943 	}
1944 
1945 	ifra.ifra_flags = flags;
1946 	ifra.ifra_lifetime.ia6t_vltime = ND6_INFINITE_LIFETIME;
1947 	ifra.ifra_lifetime.ia6t_pltime = ND6_INFINITE_LIFETIME;
1948 
1949 retry:
1950 	if ((err = ioctl(s, SIOCAIFADDR_IN6, &ifra)) == -1) {
1951 		err = errno;
1952 		SKT_LOG("SIOCAIFADDR_IN6: %s\n", strerror(errno));
1953 		if (++tries < SIOCAIFADDR_RETRY) {
1954 			usleep(100000);
1955 			goto retry;
1956 		}
1957 	}
1958 	return err;
1959 }
1960 
1961 int
sktc_ifnet_add_addr6(char * ifname,struct in6_addr * addr,struct in6_addr * dstaddr,int prefix_len,int flags)1962 sktc_ifnet_add_addr6(char *ifname, struct in6_addr *addr,
1963     struct in6_addr *dstaddr, int prefix_len, int flags)
1964 {
1965 	int     s;
1966 	int     err;
1967 
1968 	s = inet6_dgram_socket();
1969 	if (s < 0) {
1970 		return errno;
1971 	}
1972 	err = sktc_ifnet_add_addr6_with_socket(s, ifname, addr, dstaddr,
1973 	    prefix_len, flags);
1974 	(void) close(s);
1975 	return err;
1976 }
1977 
1978 int
sktc_ifnet_del_addr(char * ifname,struct in_addr * addr)1979 sktc_ifnet_del_addr(char *ifname, struct in_addr *addr)
1980 {
1981 	struct sockaddr_in *sin;
1982 	struct ifreq ifr;
1983 	int s, err = 0;
1984 
1985 	s = inet_dgram_socket();
1986 	if (s < 0) {
1987 		return errno;
1988 	}
1989 
1990 	bzero(&ifr, sizeof(ifr));
1991 	(void) strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
1992 
1993 	if (addr != NULL) {
1994 		sin = (struct sockaddr_in *)&ifr.ifr_addr;
1995 		sin->sin_len = sizeof(*sin);
1996 		sin->sin_family = AF_INET;
1997 		sin->sin_addr = *addr;
1998 	}
1999 
2000 	if (ioctl(s, SIOCDIFADDR, &ifr) == -1) {
2001 		err = errno;
2002 		SKT_LOG("SIOCDIFADDR: %s\n", strerror(errno));
2003 	}
2004 	(void) close(s);
2005 	return err;
2006 }
2007 
2008 int
sktc_ifnet_del_addr6(char * ifname,struct in6_addr * addr)2009 sktc_ifnet_del_addr6(char *ifname, struct in6_addr *addr)
2010 {
2011 	struct sockaddr_in6 *sin6;
2012 	struct ifreq ifr;
2013 	int s, err = 0;
2014 
2015 	s = inet_dgram_socket();
2016 	if (s < 0) {
2017 		return errno;
2018 	}
2019 
2020 	bzero(&ifr, sizeof(ifr));
2021 	(void) strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
2022 
2023 	if (addr != NULL) {
2024 		sin6 = (struct sockaddr_in6 *)&ifr.ifr_addr;
2025 		sin6->sin6_len = sizeof(*sin6);
2026 		sin6->sin6_family = AF_INET6;
2027 		sin6->sin6_addr = *addr;
2028 	}
2029 
2030 	if (ioctl(s, SIOCDIFADDR_IN6, &ifr) == -1) {
2031 		err = errno;
2032 		SKT_LOG("SIOCDIFADDR: %s\n", strerror(errno));
2033 	}
2034 	(void) close(s);
2035 	return err;
2036 }
2037 
2038 /*
2039  * Stolen/modified from IPMonitor/ip_plugin.c
2040  */
2041 /*
2042  * Define: ROUTE_MSG_ADDRS_SPACE
2043  * Purpose:
2044  *   Since sizeof(sockaddr_dl) > sizeof(sockaddr_in), we need space for
2045  *   3 sockaddr_in's and 2 sockaddr_dl's, but pad it just in case
2046  *   someone changes the code and doesn't think to modify this.
2047  */
2048 #define ROUTE_MSG_ADDRS_SPACE   (3 * sizeof(struct sockaddr_in) \
2049 	                         + 2 * sizeof(struct sockaddr_dl) \
2050 	                         + 128)
2051 typedef struct {
2052 	struct rt_msghdr    hdr;
2053 	char                addrs[ROUTE_MSG_ADDRS_SPACE];
2054 } route_msg;
2055 
2056 typedef unsigned int    IFIndex;
2057 
2058 typedef enum {
2059 	kRouteFlagsIsScoped         = 0x0001,
2060 	kRouteFlagsHasGateway       = 0x0002,
2061 	kRouteFlagsIsHost           = 0x0004,
2062 } RouteFlags;
2063 
2064 typedef struct {
2065 	IFIndex         ifindex;
2066 	RouteFlags      flags;
2067 	struct in_addr  dest;
2068 	struct in_addr  mask;
2069 	struct in_addr  gateway;
2070 	struct in_addr  ifa;
2071 } IPv4Route, * IPv4RouteRef;
2072 
2073 /*
2074  * Function: IPv4RouteApply
2075  * Purpose:
2076  *   Add or remove the specified route to/from the kernel routing table.
2077  */
2078 static int
IPv4RouteApply(IPv4RouteRef route,int cmd,int sockfd)2079 IPv4RouteApply(IPv4RouteRef route, int cmd, int sockfd)
2080 {
2081 	size_t          len;
2082 	int             ret = 0;
2083 	route_msg       rtmsg;
2084 	union {
2085 		struct sockaddr_in *    in_p;
2086 		struct sockaddr_dl *    dl_p;
2087 		void *                  ptr;
2088 	} rtaddr;
2089 	static int      rtm_seq;
2090 	static int      rtm_seq_inited;
2091 
2092 	if (rtm_seq_inited == 0) {
2093 		rtm_seq_inited = 1;
2094 		rtm_seq = arc4random();
2095 		T_LOG("pid %d: rtm start seq %d\n", getpid(), rtm_seq);
2096 	}
2097 
2098 	if (route->ifindex == 0) {
2099 		T_LOG(
2100 			"no interface specified, ignoring %s\n",
2101 			inet_ntoa(route->dest));
2102 		return ENXIO;
2103 	}
2104 	if (sockfd == -1) {
2105 		return EBADF;
2106 	}
2107 	memset(&rtmsg, 0, sizeof(rtmsg));
2108 	rtmsg.hdr.rtm_type = cmd;
2109 	rtmsg.hdr.rtm_version = RTM_VERSION;
2110 	rtmsg.hdr.rtm_seq = rtm_seq++;
2111 	rtmsg.hdr.rtm_addrs = RTA_DST | RTA_GATEWAY | RTA_IFP;
2112 	if (route->ifa.s_addr != 0) {
2113 		rtmsg.hdr.rtm_addrs |= RTA_IFA;
2114 	}
2115 	rtmsg.hdr.rtm_flags = RTF_UP | RTF_STATIC;
2116 	if ((route->flags & kRouteFlagsIsHost) != 0) {
2117 		rtmsg.hdr.rtm_flags |= RTF_HOST;
2118 	} else {
2119 		rtmsg.hdr.rtm_addrs |= RTA_NETMASK;
2120 		if ((route->flags & kRouteFlagsHasGateway) == 0) {
2121 			rtmsg.hdr.rtm_flags |= RTF_CLONING;
2122 		}
2123 	}
2124 	if ((route->flags & kRouteFlagsHasGateway) != 0) {
2125 		rtmsg.hdr.rtm_flags |= RTF_GATEWAY;
2126 	}
2127 	if ((route->flags & kRouteFlagsIsScoped) != 0) {
2128 		rtmsg.hdr.rtm_index = route->ifindex;
2129 		rtmsg.hdr.rtm_flags |= RTF_IFSCOPE;
2130 	}
2131 
2132 	rtaddr.ptr = rtmsg.addrs;
2133 
2134 	/* dest */
2135 	rtaddr.in_p->sin_len = sizeof(*rtaddr.in_p);
2136 	rtaddr.in_p->sin_family = AF_INET;
2137 	rtaddr.in_p->sin_addr = route->dest;
2138 	rtaddr.ptr += sizeof(*rtaddr.in_p);
2139 
2140 	/* gateway */
2141 	if ((rtmsg.hdr.rtm_flags & RTF_GATEWAY) != 0) {
2142 		/* gateway is an IP address */
2143 		rtaddr.in_p->sin_len = sizeof(*rtaddr.in_p);
2144 		rtaddr.in_p->sin_family = AF_INET;
2145 		rtaddr.in_p->sin_addr = route->gateway;
2146 		rtaddr.ptr += sizeof(*rtaddr.in_p);
2147 	} else {
2148 		/* gateway is the interface itself */
2149 		rtaddr.dl_p->sdl_len = sizeof(*rtaddr.dl_p);
2150 		rtaddr.dl_p->sdl_family = AF_LINK;
2151 		rtaddr.dl_p->sdl_index = route->ifindex;
2152 		rtaddr.ptr += sizeof(*rtaddr.dl_p);
2153 	}
2154 
2155 	/* mask */
2156 	if ((rtmsg.hdr.rtm_addrs & RTA_NETMASK) != 0) {
2157 		rtaddr.in_p->sin_len = sizeof(*rtaddr.in_p);
2158 		rtaddr.in_p->sin_family = AF_INET;
2159 		rtaddr.in_p->sin_addr = route->mask;
2160 		rtaddr.ptr += sizeof(*rtaddr.in_p);
2161 	}
2162 
2163 	/* interface */
2164 	if ((rtmsg.hdr.rtm_addrs & RTA_IFP) != 0) {
2165 		rtaddr.dl_p->sdl_len = sizeof(*rtaddr.dl_p);
2166 		rtaddr.dl_p->sdl_family = AF_LINK;
2167 		rtaddr.dl_p->sdl_index = route->ifindex;
2168 		rtaddr.ptr += sizeof(*rtaddr.dl_p);
2169 	}
2170 	/* interface address */
2171 	if ((rtmsg.hdr.rtm_addrs & RTA_IFA) != 0) {
2172 		rtaddr.in_p->sin_len = sizeof(*rtaddr.in_p);
2173 		rtaddr.in_p->sin_family = AF_INET;
2174 		rtaddr.in_p->sin_addr = route->ifa;
2175 		rtaddr.ptr += sizeof(*rtaddr.in_p);
2176 	}
2177 
2178 	/* apply the route */
2179 	len = (int)(sizeof(rtmsg.hdr) + (rtaddr.ptr - (void *)rtmsg.addrs));
2180 	rtmsg.hdr.rtm_msglen = len;
2181 	if (write(sockfd, &rtmsg, len) == -1) {
2182 		ret = errno;
2183 	}
2184 	return ret;
2185 }
2186 
2187 static int
open_routing_socket(void)2188 open_routing_socket(void)
2189 {
2190 	int sockfd;
2191 
2192 	if ((sockfd = socket(PF_ROUTE, SOCK_RAW, PF_ROUTE)) == -1) {
2193 		perror("socket");
2194 	}
2195 	return sockfd;
2196 }
2197 
2198 
2199 int
sktc_ifnet_add_scoped_default_route(char * ifname,struct in_addr ifa)2200 sktc_ifnet_add_scoped_default_route(char * ifname, struct in_addr ifa)
2201 {
2202 	int             error;
2203 	IPv4Route       route;
2204 	int             sockfd;
2205 
2206 	bzero(&route, sizeof(route));
2207 	route.flags |= kRouteFlagsIsScoped;
2208 	route.ifa = ifa;
2209 	route.ifindex = if_nametoindex(ifname);
2210 	if (route.ifindex == 0) {
2211 		return ENOENT;
2212 	}
2213 	sockfd = open_routing_socket();
2214 	error = IPv4RouteApply(&route, RTM_ADD, sockfd);
2215 	if (sockfd >= 0) {
2216 		close(sockfd);
2217 	}
2218 	return error;
2219 }
2220 
2221 /* interval in nanoseconds */
2222 int
sktc_set_classq_update_interval(uint64_t ns,sktc_classq_type_t type)2223 sktc_set_classq_update_interval(uint64_t ns, sktc_classq_type_t type)
2224 {
2225 	int     error;
2226 	char    *sysctl_name;
2227 
2228 	switch (type) {
2229 	case SKTC_CLASSQ_DEF_C:
2230 		sysctl_name = "net.classq.def_c_update_interval";
2231 		break;
2232 	case SKTC_CLASSQ_DEF_L4S:
2233 		sysctl_name = "net.classq.def_l4s_update_interval";
2234 		break;
2235 	case SKTC_CLASSQ_LL_C:
2236 		sysctl_name = "net.classq.ll_c_update_interval";
2237 		break;
2238 	case SKTC_CLASSQ_LL_L4S:
2239 		sysctl_name = "net.classq.ll_l4s_update_interval";
2240 		break;
2241 
2242 	default:
2243 		assert(0);
2244 		__builtin_unreachable();
2245 		break;
2246 	}
2247 
2248 	error = sysctlbyname(sysctl_name,
2249 	    NULL, NULL, &ns, sizeof(ns));
2250 
2251 	if (error != 0) {
2252 		SKT_LOG(
2253 			"sysctlbyname(%s) failed, %s\n", sysctl_name,
2254 			strerror(errno));
2255 	}
2256 	return error;
2257 }
2258 
2259 /* interval in nanoseconds */
2260 int
sktc_set_classq_update_intervals(uint64_t ns)2261 sktc_set_classq_update_intervals(uint64_t ns)
2262 {
2263 	int     error;
2264 
2265 	error = sktc_set_classq_update_interval(ns, SKTC_CLASSQ_DEF_C);
2266 	assert(error == 0);
2267 	error = sktc_set_classq_update_interval(ns, SKTC_CLASSQ_DEF_L4S);
2268 	assert(error == 0);
2269 	error = sktc_set_classq_update_interval(ns, SKTC_CLASSQ_LL_C);
2270 	assert(error == 0);
2271 	error = sktc_set_classq_update_interval(ns, SKTC_CLASSQ_LL_L4S);
2272 	assert(error == 0);
2273 
2274 	return 0;
2275 }
2276 
2277 int
sktc_reset_classq_update_interval(sktc_classq_type_t type)2278 sktc_reset_classq_update_interval(sktc_classq_type_t type)
2279 {
2280 	return sktc_set_classq_update_interval(0, type);
2281 }
2282 
2283 int
sktc_reset_classq_update_intervals(void)2284 sktc_reset_classq_update_intervals(void)
2285 {
2286 	return sktc_set_classq_update_intervals(0);
2287 }
2288 
2289 /* interval in nanoseconds */
2290 int
sktc_set_classq_target_qdelay(uint64_t ns,sktc_classq_type_t type)2291 sktc_set_classq_target_qdelay(uint64_t ns, sktc_classq_type_t type)
2292 {
2293 	int     error;
2294 	char    *sysctl_name;
2295 
2296 	switch (type) {
2297 	case SKTC_CLASSQ_DEF_C:
2298 		sysctl_name = "net.classq.def_c_target_qdelay";
2299 		break;
2300 	case SKTC_CLASSQ_DEF_L4S:
2301 		sysctl_name = "net.classq.def_l4s_target_qdelay";
2302 		break;
2303 	case SKTC_CLASSQ_LL_C:
2304 		sysctl_name = "net.classq.ll_c_target_qdelay";
2305 		break;
2306 	case SKTC_CLASSQ_LL_L4S:
2307 		sysctl_name = "net.classq.ll_l4s_target_qdelay";
2308 		break;
2309 
2310 	default:
2311 		assert(0);
2312 		__builtin_unreachable();
2313 		break;
2314 	}
2315 
2316 	error = sysctlbyname(sysctl_name, NULL, NULL,
2317 	    &ns, sizeof(ns));
2318 
2319 	if (error != 0) {
2320 		SKT_LOG(
2321 			"sysctlbyname(%s) failed, %s\n", sysctl_name,
2322 			strerror(errno));
2323 	}
2324 	return error;
2325 }
2326 
2327 /* interval in nanoseconds */
2328 int
sktc_set_classq_target_qdelays(uint64_t ns)2329 sktc_set_classq_target_qdelays(uint64_t ns)
2330 {
2331 	int     error;
2332 
2333 	error = sktc_set_classq_target_qdelay(ns, SKTC_CLASSQ_DEF_C);
2334 	assert(error == 0);
2335 	error = sktc_set_classq_target_qdelay(ns, SKTC_CLASSQ_DEF_L4S);
2336 	assert(error == 0);
2337 	error = sktc_set_classq_target_qdelay(ns, SKTC_CLASSQ_LL_C);
2338 	assert(error == 0);
2339 	error = sktc_set_classq_target_qdelay(ns, SKTC_CLASSQ_LL_L4S);
2340 	assert(error == 0);
2341 
2342 	return 0;
2343 }
2344 
2345 int
sktc_reset_classq_target_qdelay(sktc_classq_type_t type)2346 sktc_reset_classq_target_qdelay(sktc_classq_type_t type)
2347 {
2348 	return sktc_set_classq_target_qdelay(0, type);
2349 }
2350 
2351 int
sktc_reset_classq_target_qdelays(void)2352 sktc_reset_classq_target_qdelays(void)
2353 {
2354 	return sktc_set_classq_target_qdelays(0);
2355 }
2356 
2357 static int sktc_tcp_msl;
2358 /* interval in milliseconds */
2359 void
sktc_set_tcp_msl(int ms)2360 sktc_set_tcp_msl(int ms)
2361 {
2362 	int     error;
2363 	size_t  len = sizeof(sktc_tcp_msl);
2364 
2365 	error = sysctlbyname("net.inet.tcp.msl",
2366 	    &sktc_tcp_msl, &len, &ms, sizeof(ms));
2367 
2368 	if (error != 0) {
2369 		SKT_LOG(
2370 			"sysctlbyname(net.inet.tcp.msl) failed, %s\n",
2371 			strerror(errno));
2372 	}
2373 	SKTC_ASSERT_ERR(!error);
2374 }
2375 
2376 void
sktc_restore_tcp_msl(void)2377 sktc_restore_tcp_msl(void)
2378 {
2379 	int             error;
2380 
2381 	error = sysctlbyname("net.inet.tcp.msl",
2382 	    NULL, 0, &sktc_tcp_msl, sizeof(sktc_tcp_msl));
2383 
2384 	if (error != 0) {
2385 		SKT_LOG(
2386 			"sysctlbyname(net.inet.tcp.msl) failed, %s\n",
2387 			strerror(errno));
2388 	}
2389 	SKTC_ASSERT_ERR(!error);
2390 }
2391 
2392 static int old_ip_reass_sysctl_value = -1;
2393 static void
sktc_toggle_ip_reass(int new_value,int * old_value)2394 sktc_toggle_ip_reass(int new_value, int *old_value)
2395 {
2396 	size_t old_value_size = sizeof(*old_value);
2397 	const char *ip_reass_sysctl = "kern.skywalk.flowswitch.ip_reass";
2398 	int error;
2399 
2400 	error = sysctlbyname(ip_reass_sysctl,
2401 	    old_value, old_value != NULL ? &old_value_size : NULL,
2402 	    &new_value, sizeof(new_value));
2403 	if (error) {
2404 		SKT_LOG("sysctlbyname(%s) failed,%s\n", ip_reass_sysctl,
2405 		    strerror(errno));
2406 	}
2407 	SKTC_ASSERT_ERR(!error);
2408 }
2409 
2410 void
sktc_enable_ip_reass()2411 sktc_enable_ip_reass()
2412 {
2413 	sktc_toggle_ip_reass(1, &old_ip_reass_sysctl_value);
2414 }
2415 
2416 void
sktc_restore_ip_reass()2417 sktc_restore_ip_reass()
2418 {
2419 	assert(old_ip_reass_sysctl_value != -1);
2420 	sktc_toggle_ip_reass(old_ip_reass_sysctl_value, NULL);
2421 }
2422 
2423 bool
sktc_is_ip_reass_enabled()2424 sktc_is_ip_reass_enabled()
2425 {
2426 	int enabled;
2427 	size_t len = sizeof(enabled);
2428 	const char *ip_reass_sysctl = "kern.skywalk.flowswitch.ip_reass";
2429 	int error;
2430 
2431 	error = sysctlbyname(ip_reass_sysctl, &enabled, &len, NULL, 0);
2432 	if (error) {
2433 		SKT_LOG("sysctlbyname(%s) failed,%s\n", ip_reass_sysctl,
2434 		    strerror(errno));
2435 	}
2436 	SKTC_ASSERT_ERR(!error);
2437 
2438 	return enabled != 0; // 0 force off, 1 force on, 2 no force.
2439 }
2440 
2441 bool
sktc_is_netagent_enabled(void)2442 sktc_is_netagent_enabled(void)
2443 {
2444 	int enabled = 0;
2445 	size_t len = sizeof(enabled);
2446 	const char *enable_netagent_sysctl = "net.link.generic.system.enable_netagent";
2447 	int error;
2448 
2449 	error = sysctlbyname(enable_netagent_sysctl, &enabled, &len, NULL, 0);
2450 	if (error) {
2451 		SKT_LOG("sysctlbyname(%s) failed,%s\n", enable_netagent_sysctl,
2452 		    strerror(errno));
2453 	}
2454 	SKTC_ASSERT_ERR(!error);
2455 	return enabled == 1;
2456 }
2457 
2458 uint64_t
sktc_get_channel_attr(const channel_t chd,channel_attr_type_t type)2459 sktc_get_channel_attr(const channel_t chd, channel_attr_type_t type)
2460 {
2461 	channel_attr_t attr;
2462 	uint64_t attrval = -1;
2463 	int error;
2464 
2465 	attr = os_channel_attr_create();
2466 	error = os_channel_read_attr(chd, attr);
2467 	SKTC_ASSERT_ERR(!error);
2468 	error = os_channel_attr_get(attr, type, &attrval);
2469 	SKTC_ASSERT_ERR(!error);
2470 	assert(attrval != -1);
2471 	return attrval;
2472 }
2473 
2474 static uint32_t sktc_fsw_rx_agg_tcp = (uint32_t)-1;
2475 void
sktc_config_fsw_rx_agg_tcp(uint32_t agg)2476 sktc_config_fsw_rx_agg_tcp(uint32_t agg)
2477 {
2478 	int error;
2479 	size_t len = sizeof(sktc_fsw_rx_agg_tcp);
2480 
2481 	error = sysctlbyname("kern.skywalk.flowswitch.rx_agg_tcp",
2482 	    &sktc_fsw_rx_agg_tcp, &len, &agg, sizeof(agg));
2483 
2484 	if (error != 0) {
2485 		SKT_LOG("sysctlbyname(kern.skywalk.flowswitch."
2486 		    "rx_agg_tcp) failed, %s\n", strerror(errno));
2487 	}
2488 	SKTC_ASSERT_ERR(!error);
2489 }
2490 
2491 void
sktc_restore_fsw_rx_agg_tcp(void)2492 sktc_restore_fsw_rx_agg_tcp(void)
2493 {
2494 	int error;
2495 
2496 	if (sktc_fsw_rx_agg_tcp == (uint32_t)-1) {
2497 		return;
2498 	}
2499 	error = sysctlbyname("kern.skywalk.flowswitch.rx_agg_tcp",
2500 	    NULL, 0, &sktc_fsw_rx_agg_tcp, sizeof(sktc_fsw_rx_agg_tcp));
2501 
2502 	if (error != 0) {
2503 		SKT_LOG("sysctlbyname(kern.skywalk.flowswitch."
2504 		    "rx_agg_tcp) failed, %s\n", strerror(errno));
2505 	}
2506 	SKTC_ASSERT_ERR(!error);
2507 }
2508 
2509 static uint32_t sktc_channel_buflet_alloc = (uint32_t)-1;
2510 void
sktc_enable_channel_buflet_alloc(void)2511 sktc_enable_channel_buflet_alloc(void)
2512 {
2513 	int error;
2514 	uint32_t enable = 1;
2515 	size_t len = sizeof(sktc_channel_buflet_alloc);
2516 
2517 	error = sysctlbyname("kern.skywalk.chan_buf_alloc",
2518 	    &sktc_channel_buflet_alloc, &len, &enable, sizeof(enable));
2519 
2520 	if (error != 0) {
2521 		SKT_LOG("sysctlbyname(kern.skywalk.chan_buf_alloc "
2522 		    "failed, %s\n", strerror(errno));
2523 	}
2524 	SKTC_ASSERT_ERR(!error);
2525 }
2526 
2527 void
sktc_restore_channel_buflet_alloc(void)2528 sktc_restore_channel_buflet_alloc(void)
2529 {
2530 	int error;
2531 
2532 	if (sktc_channel_buflet_alloc == (uint32_t)-1) {
2533 		return;
2534 	}
2535 	error = sysctlbyname("kern.skywalk.chan_buf_alloc",
2536 	    NULL, 0, &sktc_channel_buflet_alloc,
2537 	    sizeof(sktc_channel_buflet_alloc));
2538 
2539 	if (error != 0) {
2540 		SKT_LOG("sysctlbyname(kern.skywalk.chan_buf_alloc "
2541 		    "failed, %s\n", strerror(errno));
2542 	}
2543 	SKTC_ASSERT_ERR(!error);
2544 }
2545 
2546 void
skt_process_if_adv(nexus_port_t port,channel_t chan)2547 skt_process_if_adv(nexus_port_t port, channel_t chan)
2548 {
2549 	int error;
2550 	struct ifnet_interface_advisory ifadv;
2551 
2552 	error = os_channel_get_interface_advisory(chan, &ifadv);
2553 	if (error == EAGAIN) {
2554 		T_LOG("retrying interface advisory get\n");
2555 		error = os_channel_get_interface_advisory(chan, &ifadv);
2556 	}
2557 #if SKT_COMMON_DEBUG
2558 	if (error == 0) {
2559 		T_LOG("Interface Advisory on port %u:\n", port);
2560 		T_LOG("\t version: %u\n", ifadv.header.version);
2561 		T_LOG("\t direction: %u\n", ifadv.header.direction);
2562 		T_LOG("\t rate trend: %d\n",
2563 		    ifadv.capacity.rate_trend_suggestion);
2564 		T_LOG("\t timestamp: 0x%llx\n",
2565 		    ifadv.capacity.timestamp);
2566 		T_LOG("\t max_bandwidth: 0x%llx\n",
2567 		    ifadv.capacity.max_bandwidth);
2568 		T_LOG("\t total_byte_count: 0x%llx\n",
2569 		    ifadv.capacity.total_byte_count);
2570 		T_LOG("\t average_throughput: 0x%llx\n",
2571 		    ifadv.capacity.average_throughput);
2572 		T_LOG("\t flushable_queue_size: %u\n",
2573 		    ifadv.capacity.flushable_queue_size);
2574 		T_LOG("\t non_flushable_queue_size: %u\n",
2575 		    ifadv.capacity.non_flushable_queue_size);
2576 		T_LOG("\t average_delay: %u\n",
2577 		    ifadv.capacity.average_delay);
2578 	}
2579 #endif /* SKT_COMMON_DEBUG */
2580 	SKTC_ASSERT_ERR(error == 0);
2581 }
2582 
2583 static void
skt_process_chan_event_common(channel_t chan,uint8_t payload_type,uint32_t stream_id,os_channel_event_type_t * captured_event_type,uint8_t * captured_event_data,size_t * captured_event_dlen,size_t max_event_dlen)2584 skt_process_chan_event_common(channel_t chan, uint8_t payload_type,
2585     uint32_t stream_id, os_channel_event_type_t *captured_event_type,
2586     uint8_t *captured_event_data, size_t *captured_event_dlen, size_t max_event_dlen)
2587 {
2588 	int error;
2589 	uint32_t nevents;
2590 	os_channel_event_t ev;
2591 	os_channel_event_handle_t eh;
2592 	os_channel_event_type_t etype;
2593 	packet_id_t *packet_id;
2594 	struct os_channel_event_data ed;
2595 
2596 	error = os_channel_get_next_event_handle(chan, &eh, &etype,
2597 	    &nevents);
2598 	SKTC_ASSERT_ERR(error == 0);
2599 	error = os_channel_event_get_next_event(eh, 0, &ev);
2600 	SKTC_ASSERT_ERR(error == 0);
2601 	error = os_channel_event_get_event_data(ev, &ed);
2602 	SKTC_ASSERT_ERR(error == 0);
2603 	assert(sizeof(*packet_id) <= ed.event_data_length);
2604 	packet_id = (packet_id_t*)ed.event_data;
2605 
2606  #if SKT_COMMON_DEBUG
2607 	T_LOG("chan event: packet: [%03hhu-%03hhu-%05hu-%05u-%05u-(%05u)] [%03hhu-%03hhu-xxxxx-xxxxx-%05u-(xxxxx)] event [%1u %03hu] [%1u %03hu]\n",
2608 	    packet_id->pktid_version,
2609 	    packet_id->pktid_payload_type,
2610 	    packet_id->pktid_sequence_number,
2611 	    packet_id->pktid_timestamp,
2612 	    packet_id->pktid_stream_identifier,
2613 	    packet_id->_reserved,
2614 
2615 	    (uint8_t)OS_PACKET_PKTID_VERSION_CURRENT,
2616 	    payload_type,
2617 	    stream_id,
2618 
2619 	    ed.event_type,
2620 	    ed.event_data_length,
2621 	    etype,
2622 	    (uint16_t)max_event_dlen
2623 	    );
2624 #endif /* SKT_COMMON_DEBUG */
2625 	assert(nevents == 1);
2626 	assert(etype == ed.event_type);
2627 	assert(packet_id->pktid_payload_type == payload_type);
2628 	assert(packet_id->pktid_stream_identifier == stream_id);
2629 	assert(packet_id->pktid_version == OS_PACKET_PKTID_VERSION_CURRENT);
2630 	assert(!ed.event_more);
2631 	assert(ed.event_data_length <= max_event_dlen);
2632 
2633 	memcpy(captured_event_data, ed.event_data, ed.event_data_length);
2634 	*captured_event_dlen = ed.event_data_length;
2635 	*captured_event_type = etype;
2636 
2637 	error = os_channel_event_free(chan, eh);
2638 	SKTC_ASSERT_ERR(error == 0);
2639 }
2640 
2641 void
skt_process_channel_event(channel_t chan,uint8_t payload_type,uint32_t stream_id,transmit_status_event_handler_t transmit_status_handler,transmit_expired_event_handler_t transmit_expired_handler,wildcard_event_handler_t wildcard_handler)2642 skt_process_channel_event(channel_t chan, uint8_t payload_type, uint32_t stream_id,
2643     transmit_status_event_handler_t transmit_status_handler,
2644     transmit_expired_event_handler_t transmit_expired_handler,
2645     wildcard_event_handler_t wildcard_handler)
2646 {
2647 	os_channel_event_type_t event_type;
2648 	uint8_t event_data[CHANNEL_EVENT_MAX_PAYLOAD_LEN];
2649 	size_t event_dlen;
2650 	skt_process_chan_event_common(chan, payload_type, stream_id,
2651 	    &event_type, event_data, &event_dlen, sizeof(event_data));
2652 
2653 #if SKT_COMMON_DEBUG
2654 	T_LOG("expiry_event=%p len=%lu [%lu]\n",
2655 	    event_data, event_dlen, sizeof(event_data));
2656 #endif /* SKT_COMMON_DEBUG */
2657 
2658 	switch (event_type) {
2659 	case CHANNEL_EVENT_PACKET_TRANSMIT_STATUS:
2660 		assert(event_dlen == sizeof(os_channel_event_packet_transmit_status_t));
2661 		assert(transmit_status_handler != NULL || wildcard_handler != NULL);
2662 		if (transmit_status_handler != NULL) {
2663 			transmit_status_handler(
2664 				(os_channel_event_packet_transmit_status_t*)event_data);
2665 		} else {
2666 			assert(wildcard_handler != NULL);
2667 			wildcard_handler(event_type, event_data, event_dlen);
2668 		}
2669 		break;
2670 	case CHANNEL_EVENT_PACKET_TRANSMIT_EXPIRED:
2671 		assert(event_dlen == sizeof(os_channel_event_packet_transmit_expired_t));
2672 		assert(transmit_expired_handler != NULL || wildcard_handler != NULL);
2673 		if (transmit_expired_handler != NULL) {
2674 			transmit_expired_handler(
2675 				(os_channel_event_packet_transmit_expired_t*)event_data);
2676 		} else {
2677 			assert(wildcard_handler != NULL);
2678 			wildcard_handler(event_type, event_data, event_dlen);
2679 		}
2680 		break;
2681 	default:
2682 		assert(wildcard_handler != NULL);
2683 		wildcard_handler(event_type, event_data, event_dlen);
2684 		break;
2685 	}
2686 }
2687 
2688 static struct sockaddr_inarp *
getaddr(struct in_addr host)2689 getaddr(struct in_addr host)
2690 {
2691 	static struct sockaddr_inarp reply;
2692 
2693 	bzero(&reply, sizeof(reply));
2694 	reply.sin_len = sizeof(reply);
2695 	reply.sin_family = AF_INET;
2696 	reply.sin_addr = host;
2697 	return &reply;
2698 }
2699 
2700 /*
2701  * Returns true if the type is a valid one for ARP.
2702  */
2703 static int
valid_type(int type)2704 valid_type(int type)
2705 {
2706 	switch (type) {
2707 	case IFT_ETHER:
2708 	case IFT_FDDI:
2709 	case IFT_ISO88023:
2710 	case IFT_ISO88024:
2711 	case IFT_L2VLAN:
2712 #ifdef IFT_BRIDGE
2713 	case IFT_BRIDGE:
2714 #endif
2715 		return 1;
2716 	default:
2717 		return 0;
2718 	}
2719 }
2720 
2721 static struct rt_msghdr *
rtmsg(int cmd,struct sockaddr_inarp * dst,struct sockaddr_dl * sdl)2722 rtmsg(int cmd, struct sockaddr_inarp *dst, struct sockaddr_dl *sdl)
2723 {
2724 	static int seq;
2725 	int rlen;
2726 	int l;
2727 	struct sockaddr_in so_mask, *so_mask_ptr = &so_mask;
2728 	static int s = -1;
2729 	static pid_t pid;
2730 
2731 	static struct {
2732 		struct  rt_msghdr m_rtm;
2733 		char    m_space[512];
2734 	} m_rtmsg;
2735 
2736 	struct rt_msghdr *rtm = &m_rtmsg.m_rtm;
2737 	char *cp = m_rtmsg.m_space;
2738 
2739 	if (s < 0) {    /* first time: open socket, get pid */
2740 		s = socket(PF_ROUTE, SOCK_RAW, 0);
2741 		if (s < 0) {
2742 			err(1, "socket() failed\n");
2743 		}
2744 		pid = getpid();
2745 	}
2746 	bzero(&so_mask, sizeof(so_mask));
2747 	so_mask.sin_len = 8;
2748 	so_mask.sin_addr.s_addr = 0xffffffff;
2749 
2750 	errno = 0;
2751 	/*
2752 	 * XXX RTM_DELETE relies on a previous RTM_GET to fill the buffer
2753 	 * appropriately (except for the mask set just above).
2754 	 */
2755 	if (cmd == RTM_DELETE) {
2756 		goto doit;
2757 	}
2758 	bzero((char *)&m_rtmsg, sizeof(m_rtmsg));
2759 	rtm->rtm_flags = flags;
2760 	rtm->rtm_version = RTM_VERSION;
2761 
2762 	switch (cmd) {
2763 	default:
2764 		errx(1, "internal wrong cmd");
2765 	case RTM_ADD:
2766 		rtm->rtm_addrs |= RTA_GATEWAY;
2767 		rtm->rtm_rmx.rmx_expire = expire_time;
2768 		rtm->rtm_inits = RTV_EXPIRE;
2769 		rtm->rtm_flags |= (RTF_HOST | RTF_STATIC);
2770 		dst->sin_other = 0;
2771 	/* FALLTHROUGH */
2772 	case RTM_GET:
2773 		rtm->rtm_addrs |= RTA_DST;
2774 	}
2775 
2776 #define NEXTADDR(w, s) \
2777 	if ((s) != NULL && rtm->rtm_addrs & (w)) { \
2778 	        bcopy((s), cp, sizeof(*(s))); cp += SA_SIZE(s);}
2779 
2780 	NEXTADDR(RTA_DST, dst);
2781 	NEXTADDR(RTA_GATEWAY, sdl);
2782 	NEXTADDR(RTA_NETMASK, so_mask_ptr);
2783 
2784 	rtm->rtm_msglen = cp - (char *)&m_rtmsg;
2785 doit:
2786 	l = rtm->rtm_msglen;
2787 	rtm->rtm_seq = ++seq;
2788 	rtm->rtm_type = cmd;
2789 	if ((rlen = write(s, (char *)&m_rtmsg, l)) < 0) {
2790 		if (errno != ESRCH || cmd != RTM_DELETE) {
2791 			warn("writing to routing socket");
2792 			return NULL;
2793 		}
2794 	}
2795 	do {
2796 		l = read(s, (char *)&m_rtmsg, sizeof(m_rtmsg));
2797 	} while (l > 0 && (rtm->rtm_seq != seq || rtm->rtm_pid != pid));
2798 	if (l < 0) {
2799 		warn("read from routing socket");
2800 	}
2801 	return rtm;
2802 }
2803 
2804 int
skt_add_arp_entry(struct in_addr host,struct ether_addr * eaddr)2805 skt_add_arp_entry(struct in_addr host, struct ether_addr *eaddr)
2806 {
2807 	struct sockaddr_inarp *addr;
2808 	struct sockaddr_inarp *dst;     /* what are we looking for */
2809 	struct sockaddr_dl *sdl;
2810 	struct rt_msghdr *rtm;
2811 	struct ether_addr *ea;
2812 	struct sockaddr_dl sdl_m;
2813 
2814 	bzero(&sdl_m, sizeof(sdl_m));
2815 	sdl_m.sdl_len = sizeof(sdl_m);
2816 	sdl_m.sdl_family = AF_LINK;
2817 
2818 	dst = getaddr(host);
2819 	if (dst == NULL) {
2820 		return 1;
2821 	}
2822 	flags = expire_time = 0;
2823 
2824 	ea = (struct ether_addr *)LLADDR(&sdl_m);
2825 	*ea = *eaddr;
2826 	sdl_m.sdl_alen = ETHER_ADDR_LEN;
2827 
2828 	for (;;) {      /* try at most twice */
2829 		rtm = rtmsg(RTM_GET, dst, &sdl_m);
2830 		if (rtm == NULL) {
2831 			warn("%s", inet_ntoa(host));
2832 			return 1;
2833 		}
2834 		addr = (struct sockaddr_inarp *)(rtm + 1);
2835 		sdl = (struct sockaddr_dl *)(SA_SIZE(addr) + (char *)addr);
2836 		if (addr->sin_addr.s_addr != dst->sin_addr.s_addr) {
2837 			break;
2838 		}
2839 		if (sdl->sdl_family == AF_LINK &&
2840 		    (rtm->rtm_flags & RTF_LLINFO) &&
2841 		    !(rtm->rtm_flags & RTF_GATEWAY) &&
2842 		    valid_type(sdl->sdl_type)) {
2843 			break;
2844 		}
2845 	}
2846 
2847 	if (sdl->sdl_family != AF_LINK) {
2848 		T_LOG("cannot intuit interface index and type for %s\n",
2849 		    inet_ntoa(host));
2850 		return 1;
2851 	}
2852 	sdl_m.sdl_type = sdl->sdl_type;
2853 	sdl_m.sdl_index = sdl->sdl_index;
2854 	return rtmsg(RTM_ADD, dst, &sdl_m) == NULL;
2855 }
2856 
2857 static void
update_avg(struct if_ifclassq_stats * ifcqs,struct queue_stats * qs)2858 update_avg(struct if_ifclassq_stats *ifcqs, struct queue_stats *qs)
2859 {
2860 	u_int64_t                b, p;
2861 	int                      n;
2862 
2863 	n = qs->avgn;
2864 
2865 	switch (ifcqs->ifqs_scheduler) {
2866 	case PKTSCHEDT_FQ_CODEL:
2867 		b = ifcqs->ifqs_fq_codel_stats.fcls_dequeue_bytes;
2868 		p = ifcqs->ifqs_fq_codel_stats.fcls_dequeue;
2869 		break;
2870 	default:
2871 		b = 0;
2872 		p = 0;
2873 		break;
2874 	}
2875 
2876 	if (n == 0) {
2877 		qs->prev_bytes = b;
2878 		qs->prev_packets = p;
2879 		qs->avgn++;
2880 		return;
2881 	}
2882 
2883 	if (b >= qs->prev_bytes) {
2884 		qs->avg_bytes = ((qs->avg_bytes * (n - 1)) +
2885 		    (b - qs->prev_bytes)) / n;
2886 	}
2887 
2888 	if (p >= qs->prev_packets) {
2889 		qs->avg_packets = ((qs->avg_packets * (n - 1)) +
2890 		    (p - qs->prev_packets)) / n;
2891 	}
2892 
2893 	qs->prev_bytes = b;
2894 	qs->prev_packets = p;
2895 	if (n < AVGN_MAX) {
2896 		qs->avgn++;
2897 	}
2898 }
2899 
2900 
2901 static char *
nsec_to_str(unsigned long long nsec)2902 nsec_to_str(unsigned long long nsec)
2903 {
2904 	static char buf[32];
2905 	const char *u;
2906 	long double n = nsec, t;
2907 
2908 	if (nsec >= NSEC_PER_SEC) {
2909 		t = n / NSEC_PER_SEC;
2910 		u = "sec ";
2911 	} else if (n >= USEC_PER_SEC) {
2912 		t = n / USEC_PER_SEC;
2913 		u = "msec";
2914 	} else if (n >= MSEC_PER_SEC) {
2915 		t = n / MSEC_PER_SEC;
2916 		u = "usec";
2917 	} else {
2918 		t = n;
2919 		u = "nsec";
2920 	}
2921 
2922 	snprintf(buf, sizeof(buf), "%-4.2Lf %4s", t, u);
2923 	return buf;
2924 }
2925 
2926 static char *
sched2str(unsigned int s)2927 sched2str(unsigned int s)
2928 {
2929 	char *c;
2930 
2931 	switch (s) {
2932 	case PKTSCHEDT_NONE:
2933 		c = "NONE";
2934 		break;
2935 	case PKTSCHEDT_FQ_CODEL:
2936 		c = "FQ_CODEL";
2937 		break;
2938 	default:
2939 		c = "UNKNOWN";
2940 		break;
2941 	}
2942 
2943 	return c;
2944 }
2945 
2946 static char *
pri2str(unsigned int i)2947 pri2str(unsigned int i)
2948 {
2949 	char *c;
2950 	switch (i) {
2951 	case 9:
2952 		c = "BK_SYS";
2953 		break;
2954 	case 8:
2955 		c = "BK";
2956 		break;
2957 	case 7:
2958 		c = "BE";
2959 		break;
2960 	case 6:
2961 		c = "RD";
2962 		break;
2963 	case 5:
2964 		c = "OAM";
2965 		break;
2966 	case 4:
2967 		c = "AV";
2968 		break;
2969 	case 3:
2970 		c = "RV";
2971 		break;
2972 	case 2:
2973 		c = "VI";
2974 		break;
2975 	case 1:
2976 		c = "VO";
2977 		break;
2978 	case 0:
2979 		c = "CTL";
2980 		break;
2981 	default:
2982 		c = "?";
2983 		break;
2984 	}
2985 	return c;
2986 }
2987 
2988 void
skt_aqstatpr(const char * interface)2989 skt_aqstatpr(const char *interface)
2990 {
2991 	unsigned int ifindex;
2992 	struct if_qstatsreq ifqr;
2993 	struct if_ifclassq_stats *ifcqs;
2994 	u_int32_t scheduler;
2995 	int s, n;
2996 
2997 	qflag = 2;  /* The 2 comes from the # of q's in netstat -qq */
2998 
2999 	ifindex = if_nametoindex(interface);
3000 	if (ifindex == 0) {
3001 		T_LOG("Invalid interface name\n");
3002 		return;
3003 	}
3004 
3005 	ifcqs = malloc(sizeof(*ifcqs));
3006 	if (ifcqs == NULL) {
3007 		T_LOG("Unable to allocate memory\n");
3008 		return;
3009 	}
3010 
3011 	if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
3012 		perror("Warning: socket(AF_INET)");
3013 		free(ifcqs);
3014 		return;
3015 	}
3016 
3017 	bzero(&ifqr, sizeof(ifqr));
3018 	strlcpy(ifqr.ifqr_name, interface, sizeof(ifqr.ifqr_name));
3019 	ifqr.ifqr_buf = ifcqs;
3020 	ifqr.ifqr_len = sizeof(*ifcqs);
3021 
3022 	ifqr.ifqr_slot = 0;
3023 	if (ioctl(s, SIOCGIFQUEUESTATS, (char *)&ifqr) < 0) {
3024 		if (errno == ENXIO) {
3025 			os_log(OS_LOG_DEFAULT, "Queue statistics are not available on %s\n",
3026 			    interface);
3027 		} else {
3028 			perror("Warning: ioctl(SIOCGIFQUEUESTATS)");
3029 		}
3030 		goto done;
3031 	}
3032 	scheduler = ifcqs->ifqs_scheduler;
3033 
3034 	os_log(OS_LOG_DEFAULT, "%s:"
3035 	    "     [ sched: %18s  qlength:  %3u/%3u ]\n",
3036 	    interface, sched2str(ifcqs->ifqs_scheduler),
3037 	    ifcqs->ifqs_len, ifcqs->ifqs_maxlen);
3038 	os_log(OS_LOG_DEFAULT, "     [ dequeued pkts: %10llu  bytes: %10llu "
3039 	    " dropped pkts: %6llu bytes: %6llu ]\n",
3040 	    ifcqs->ifqs_xmitcnt.packets, ifcqs->ifqs_xmitcnt.bytes,
3041 	    ifcqs->ifqs_dropcnt.packets, ifcqs->ifqs_dropcnt.bytes);
3042 
3043 	for (n = 0; n < IFCQ_SC_MAX && scheduler != PKTSCHEDT_NONE; n++) {
3044 		ifqr.ifqr_slot = n;
3045 		if (ioctl(s, SIOCGIFQUEUESTATS, (char *)&ifqr) < 0) {
3046 			perror("Warning: ioctl(SIOCGIFQUEUESTATS)");
3047 			goto done;
3048 		}
3049 
3050 		update_avg(ifcqs, &qstats[n]);
3051 
3052 		switch (scheduler) {
3053 		case PKTSCHEDT_FQ_CODEL:
3054 			print_fq_codel_stats(n,
3055 			    &ifcqs->ifqs_fq_codel_stats,
3056 			    &qstats[n]);
3057 			break;
3058 		case PKTSCHEDT_NONE:
3059 		default:
3060 			break;
3061 		}
3062 	}
3063 
3064 done:
3065 	free(ifcqs);
3066 	close(s);
3067 }
3068 
3069 static void
print_fq_codel_stats(int pri,struct fq_codel_classstats * fqst,struct queue_stats * qs)3070 print_fq_codel_stats(int pri, struct fq_codel_classstats *fqst,
3071     struct queue_stats *qs)
3072 {
3073 	int i = 0;
3074 
3075 	if (fqst->fcls_service_class == 0 && fqst->fcls_pri == 0) {
3076 		return;
3077 	}
3078 	os_log(OS_LOG_DEFAULT, "=====================================================\n");
3079 	os_log(OS_LOG_DEFAULT, "     [ pri: %s (%u)\tsrv_cl: 0x%x\tquantum: %u\tdrr_max: %u ]\n",
3080 	    pri2str(fqst->fcls_pri), fqst->fcls_pri,
3081 	    fqst->fcls_service_class, fqst->fcls_quantum,
3082 	    fqst->fcls_drr_max);
3083 	os_log(OS_LOG_DEFAULT, "     [ queued pkts: %llu\tbytes: %llu ]\n",
3084 	    fqst->fcls_pkt_cnt, fqst->fcls_byte_cnt);
3085 	os_log(OS_LOG_DEFAULT, "     [ dequeued pkts: %llu\tbytes: %llu ]\n",
3086 	    fqst->fcls_dequeue, fqst->fcls_dequeue_bytes);
3087 	os_log(OS_LOG_DEFAULT, "     [ budget: %lld\ttarget qdelay: %10s\tupdate interval:%10s ]\n",
3088 	    fqst->fcls_budget, nsec_to_str(fqst->fcls_target_qdelay), nsec_to_str(fqst->fcls_update_interval));
3089 	os_log(OS_LOG_DEFAULT, "     [ flow control: %u\tfeedback: %u\tstalls: %u\tfailed: %u \toverwhelming: %u ]\n",
3090 	    fqst->fcls_flow_control, fqst->fcls_flow_feedback,
3091 	    fqst->fcls_dequeue_stall, fqst->fcls_flow_control_fail, fqst->fcls_overwhelming);
3092 	os_log(OS_LOG_DEFAULT, "     [ drop overflow: %llu\tearly: %llu\tmemfail: %u\tduprexmt:%u ]\n",
3093 	    fqst->fcls_drop_overflow, fqst->fcls_drop_early,
3094 	    fqst->fcls_drop_memfailure, fqst->fcls_dup_rexmts);
3095 	os_log(OS_LOG_DEFAULT, "     [ l4s target qdelay: %10s ]\n", nsec_to_str(fqst->fcls_l4s_target_qdelay));
3096 	os_log(OS_LOG_DEFAULT, "     [ ce marked:%llu\tce mark failures:%llu\tce reported:%llu\tL4S pkts:%llu   ]\n",
3097 	    fqst->fcls_ce_marked, fqst->fcls_ce_mark_failures, fqst->fcls_ce_reported, fqst->fcls_l4s_pkts);
3098 	os_log(OS_LOG_DEFAULT, "     [ flows total: %u\tnew: %u\told: %u ]\n",
3099 	    fqst->fcls_flows_cnt,
3100 	    fqst->fcls_newflows_cnt, fqst->fcls_oldflows_cnt);
3101 	os_log(OS_LOG_DEFAULT, "     [ throttle on: %u\toff: %u\tdrop: %u ]\n",
3102 	    fqst->fcls_throttle_on, fqst->fcls_throttle_off,
3103 	    fqst->fcls_throttle_drops);
3104 	os_log(OS_LOG_DEFAULT, "     [ compressible pkts: %u compressed pkts: %u]\n",
3105 	    fqst->fcls_pkts_compressible, fqst->fcls_pkts_compressed);
3106 
3107 	if (qflag < 2) {
3108 		return;
3109 	}
3110 
3111 	if (fqst->fcls_flowstats_cnt > 0) {
3112 		os_log(OS_LOG_DEFAULT, "Flowhash\tBytes\tMin qdelay\tFlags\t\n");
3113 		for (i = 0; i < fqst->fcls_flowstats_cnt; i++) {
3114 			os_log(OS_LOG_DEFAULT, "%u\t%u\t%14s\t",
3115 			    fqst->fcls_flowstats[i].fqst_flowhash,
3116 			    fqst->fcls_flowstats[i].fqst_bytes,
3117 			    nsec_to_str(fqst->fcls_flowstats[i].fqst_min_qdelay));
3118 			if (fqst->fcls_flowstats[i].fqst_flags &
3119 			    FQ_FLOWSTATS_OLD_FLOW) {
3120 				os_log(OS_LOG_DEFAULT, "O");
3121 			}
3122 			if (fqst->fcls_flowstats[i].fqst_flags &
3123 			    FQ_FLOWSTATS_NEW_FLOW) {
3124 				os_log(OS_LOG_DEFAULT, "N");
3125 			}
3126 			if (fqst->fcls_flowstats[i].fqst_flags &
3127 			    FQ_FLOWSTATS_LARGE_FLOW) {
3128 				os_log(OS_LOG_DEFAULT, "L");
3129 			}
3130 			if (fqst->fcls_flowstats[i].fqst_flags &
3131 			    FQ_FLOWSTATS_DELAY_HIGH) {
3132 				os_log(OS_LOG_DEFAULT, "D");
3133 			}
3134 			if (fqst->fcls_flowstats[i].fqst_flags &
3135 			    FQ_FLOWSTATS_FLOWCTL_ON) {
3136 				os_log(OS_LOG_DEFAULT, "F");
3137 			}
3138 			os_log(OS_LOG_DEFAULT, "\n");
3139 		}
3140 	}
3141 }
3142 
3143 
3144 
3145 char *
plural(int n)3146 plural(int n)
3147 {
3148 	return n > 1 ? "s" : "";
3149 }
3150 
3151 char *
plurales(int n)3152 plurales(int n)
3153 {
3154 	return n > 1 ? "es" : "";
3155 }
3156 
3157 char *
pluralies(int n)3158 pluralies(int n)
3159 {
3160 	return n > 1 ? "ies" : "y";
3161 }
3162 
3163 /*
3164  * Dump ARP statistics structure.
3165  */
3166 static void
arp_stats(uint32_t off,char * name,int af)3167 arp_stats(uint32_t off, char *name, int af)
3168 {
3169 	static struct arpstat parpstat;
3170 	struct arpstat arpstat;
3171 	size_t len = sizeof(arpstat);
3172 
3173 	if (sysctlbyname("net.link.ether.inet.stats", &arpstat,
3174 	    &len, 0, 0) < 0) {
3175 		warn("sysctl: net.link.ether.inet.stats");
3176 		return;
3177 	}
3178 
3179 	os_log(OS_LOG_DEFAULT, "%s:\n", name);
3180 
3181 #define ARPDIFF(f) (arpstat.f - parpstat.f)
3182 #define p(f, m) \
3183 os_log(OS_LOG_DEFAULT, m, ARPDIFF(f), plural(ARPDIFF(f)))
3184 #define p2(f, m) \
3185 os_log(OS_LOG_DEFAULT, m, ARPDIFF(f), pluralies(ARPDIFF(f)))
3186 #define p3(f, m) \
3187 os_log(OS_LOG_DEFAULT, m, ARPDIFF(f), plural(ARPDIFF(f)), pluralies(ARPDIFF(f)))
3188 
3189 	p(txrequests, "\t%u broadast ARP request%s sent\n");
3190 	p(txurequests, "\t%u unicast ARP request%s sent\n");
3191 	p2(txreplies, "\t%u ARP repl%s sent\n");
3192 	p(txannounces, "\t%u ARP announcement%s sent\n");
3193 	p(rxrequests, "\t%u ARP request%s received\n");
3194 	p2(rxreplies, "\t%u ARP repl%s received\n");
3195 	p(received, "\t%u total ARP packet%s received\n");
3196 	p(txconflicts, "\t%u ARP conflict probe%s sent\n");
3197 	p(invalidreqs, "\t%u invalid ARP resolve request%s\n");
3198 	p(reqnobufs, "\t%u total packet%s dropped due to lack of memory\n");
3199 	p3(held, "\t%u total packet%s held awaiting ARP repl%s\n");
3200 	p(dropped, "\t%u total packet%s dropped due to no ARP entry\n");
3201 	p(purged, "\t%u total packet%s dropped during ARP entry removal\n");
3202 	p2(timeouts, "\t%u ARP entr%s timed out\n");
3203 	p(dupips, "\t%u Duplicate IP%s seen\n");
3204 
3205 #undef ARPDIFF
3206 #undef p
3207 #undef p2
3208 }
3209 
3210 /*
3211  * Print out protocol statistics or control blocks (per sflag).
3212  * If the interface was not specifically requested, and the symbol
3213  * is not in the namelist, ignore this one.
3214  */
3215 void
skt_printproto(register struct protox * tp,char * name)3216 skt_printproto(register struct protox *tp, char *name)
3217 {
3218 	void (*pr)(uint32_t, char *, int);
3219 	uint32_t off;
3220 	int af = AF_UNSPEC;
3221 
3222 	pr = tp->pr_stats;
3223 	if (!pr) {
3224 		return;
3225 	}
3226 	off = tp->pr_protocol;
3227 	if (pr != NULL) {
3228 		(*pr)(off, name, af);
3229 	} else {
3230 		T_LOG("### no stats for %s\n", name);
3231 	}
3232 }
3233